From a835393fa14fa6052c46abeae47a3dda95996de2 Mon Sep 17 00:00:00 2001 From: Gregory Burd Date: Tue, 13 Sep 2011 13:48:33 -0400 Subject: [PATCH] Merge over all the parts of Berkeley DB that libdb will include. --- .gitignore | 1 + LICENSE | 130 + README | 5 + build_unix/.IGNORE_ME | 3 + dist/ChangeLog | 426 + dist/Makefile.in | 2657 ++ dist/RELEASE | 16 + dist/aclocal/clock.m4 | 59 + dist/aclocal/config.m4 | 19 + dist/aclocal/cxx.m4 | 304 + dist/aclocal/gcc.m4 | 19 + dist/aclocal/libtool.m4 | 7831 +++++ dist/aclocal/ltoptions.m4 | 369 + dist/aclocal/ltsugar.m4 | 123 + dist/aclocal/ltversion.m4 | 23 + dist/aclocal/lt~obsolete.m4 | 98 + dist/aclocal/mmap.m4 | 109 + dist/aclocal/mutex.m4 | 932 + dist/aclocal/options.m4 | 467 + dist/aclocal/perfmon.m4 | 97 + dist/aclocal/programs.m4 | 59 + dist/aclocal/sequence.m4 | 91 + dist/aclocal/socklen.m4 | 55 + dist/aclocal/sosuffix.m4 | 75 + dist/aclocal/tcl.m4 | 138 + dist/aclocal/tls.m4 | 65 + dist/aclocal/types.m4 | 212 + dist/api_flags | 527 + dist/api_flags.c | 444 + dist/buildpkg | 244 + dist/bumprel | 59 + dist/clib_port.in | 279 + dist/config.guess | 1511 + dist/config.hin | 636 + dist/config.sub | 1760 + dist/configure | 27335 ++++++++++++++++ dist/configure.ac | 1119 + dist/db_provider.d | 361 + dist/errno.h | 186 + dist/events.in | 265 + dist/gen_inc.awk | 69 + dist/gen_msg.awk | 465 + dist/gen_provider.pl | 86 + dist/gen_rec.awk | 611 + dist/install-sh | 519 + dist/ltmain.sh | 9636 ++++++ dist/pubdef.in | 545 + dist/s_all | 39 + dist/s_apiflags | 24 + dist/s_config | 37 + dist/s_crypto | 69 + dist/s_cscope | 17 + dist/s_include | 175 + dist/s_message | 31 + dist/s_message_id | 187 + dist/s_perm | 49 + dist/s_php | 23 + dist/s_readme | 25 + dist/s_recover | 71 + dist/s_sig | 119 + dist/s_symlink | 47 + dist/s_tags | 64 + dist/s_test | 109 + dist/s_validate | 79 + dist/srcfiles.in | 339 + dist/template/rec_btree | 917 + dist/template/rec_crdel | 267 + dist/template/rec_ctemp | 65 + dist/template/rec_db | 1437 + dist/template/rec_dbreg | 72 + dist/template/rec_fileops | 527 + dist/template/rec_hash | 982 + dist/template/rec_heap | 267 + dist/template/rec_qam | 332 + dist/template/rec_repmgr | 72 + dist/template/rec_txn | 527 + dist/template/rec_utemp | 68 + dist/validate/README | 10 + dist/validate/s_chk_build_configs | 118 + dist/validate/s_chk_comma | 35 + dist/validate/s_chk_comma.c | 53 + dist/validate/s_chk_copyright | 53 + dist/validate/s_chk_defines | 124 + dist/validate/s_chk_err | 34 + dist/validate/s_chk_ext_method_calls | 89 + dist/validate/s_chk_flags | 186 + dist/validate/s_chk_flags.c | 79 + dist/validate/s_chk_inclconfig | 46 + dist/validate/s_chk_include | 42 + dist/validate/s_chk_logverify | 270 + dist/validate/s_chk_logverify.c | 267 + dist/validate/s_chk_message_id | 60 + dist/validate/s_chk_mutex_print | 36 + dist/validate/s_chk_newline | 116 + dist/validate/s_chk_offt | 66 + dist/validate/s_chk_osdir | 30 + dist/validate/s_chk_proto | 46 + dist/validate/s_chk_pubdef | 189 + dist/validate/s_chk_runrecovery | 44 + dist/validate/s_chk_spell | 43 + dist/validate/s_chk_spell.dict | 6500 ++++ dist/validate/s_chk_srcfiles | 52 + dist/validate/s_chk_stats | 152 + dist/validate/s_chk_tags | 50 + examples/c/README | 40 + examples/c/csv/DbRecord.c | 470 + examples/c/csv/Makefile | 75 + examples/c/csv/README | 408 + examples/c/csv/code.c | 406 + examples/c/csv/csv.h | 101 + examples/c/csv/csv_extern.h | 37 + examples/c/csv/db.c | 244 + examples/c/csv/load.c | 347 + examples/c/csv/load_main.c | 117 + examples/c/csv/query.c | 241 + examples/c/csv/query_main.c | 99 + examples/c/csv/sample.csv | 8 + examples/c/csv/sample.desc | 10 + examples/c/csv/util.c | 309 + examples/c/ex_access.c | 161 + examples/c/ex_apprec/auto_rebuild | 10 + examples/c/ex_apprec/ex_apprec.c | 277 + examples/c/ex_apprec/ex_apprec.h | 20 + examples/c/ex_apprec/ex_apprec.src | 33 + examples/c/ex_apprec/ex_apprec_auto.c | 13 + examples/c/ex_apprec/ex_apprec_auto.h | 32 + examples/c/ex_apprec/ex_apprec_autop.c | 43 + examples/c/ex_apprec/ex_apprec_rec.c | 109 + examples/c/ex_apprec/ex_apprec_template | 70 + examples/c/ex_btrec.c | 202 + examples/c/ex_bulk.c | 867 + examples/c/ex_env.c | 183 + examples/c/ex_heap.c | 640 + examples/c/ex_lock.c | 239 + examples/c/ex_mpool.c | 258 + examples/c/ex_rep/README | 19 + examples/c/ex_rep/base/rep_base.c | 247 + examples/c/ex_rep/base/rep_base.h | 117 + examples/c/ex_rep/base/rep_msg.c | 467 + examples/c/ex_rep/base/rep_net.c | 749 + examples/c/ex_rep/common/rep_common.c | 680 + examples/c/ex_rep/common/rep_common.h | 82 + examples/c/ex_rep/mgr/rep_mgr.c | 227 + examples/c/ex_rep_chan/rep_chan.c | 757 + examples/c/ex_rep_chan/rep_chan.h | 130 + examples/c/ex_rep_chan/rep_chan_util.c | 670 + examples/c/ex_sequence.c | 132 + examples/c/ex_stream.c | 222 + examples/c/ex_thread.c | 626 + examples/c/ex_tpcb.c | 718 + examples/cxx/AccessExample.cpp | 168 + examples/cxx/BtRecExample.cpp | 223 + examples/cxx/EnvExample.cpp | 143 + examples/cxx/LockExample.cpp | 245 + examples/cxx/MpoolExample.cpp | 217 + examples/cxx/SequenceExample.cpp | 133 + examples/cxx/TpcbExample.cpp | 646 + examples/cxx/excxx_repquote/RepConfigInfo.cpp | 58 + examples/cxx/excxx_repquote/RepConfigInfo.h | 37 + .../cxx/excxx_repquote/RepQuoteExample.cpp | 827 + examples/cxx/excxx_repquote/dbc_auto.h | 65 + .../cxx/excxx_repquote_gsg/RepConfigInfo.h | 79 + examples/cxx/excxx_repquote_gsg/RepMgrGSG.cpp | 418 + .../cxx/excxx_repquote_gsg/SimpleConfigInfo.h | 29 + examples/cxx/excxx_repquote_gsg/SimpleTxn.cpp | 279 + examples/cxx/getting_started/MyDb.cpp | 71 + examples/cxx/getting_started/MyDb.hpp | 46 + .../excxx_example_database_load.cpp | 229 + .../excxx_example_database_read.cpp | 236 + .../getting_started/gettingStartedCommon.hpp | 198 + examples/cxx/getting_started/inventory.txt | 800 + examples/cxx/getting_started/vendors.txt | 6 + examples/cxx/txn_guide/TxnGuide.cpp | 399 + examples/cxx/txn_guide/TxnGuideInMemory.cpp | 380 + examples/stl/README | 41 + examples/stl/StlAccessExample.cpp | 143 + examples/stl/StlAdvancedFeatures.cpp | 1088 + examples/stl/StlAdvancedFeatures.h | 718 + examples/stl/StlTpcbExample.cpp | 626 + examples/stl/StlTransactionGuideExample.cpp | 372 + examples/stl/repquote/README | 17 + examples/stl/repquote/StlRepConfigInfo.cpp | 58 + examples/stl/repquote/StlRepConfigInfo.h | 37 + examples/stl/repquote/StlRepQuoteExample.cpp | 769 + lang/cxx/cxx_channel.cpp | 104 + lang/cxx/cxx_db.cpp | 825 + lang/cxx/cxx_dbc.cpp | 123 + lang/cxx/cxx_dbt.cpp | 56 + lang/cxx/cxx_env.cpp | 1354 + lang/cxx/cxx_except.cpp | 356 + lang/cxx/cxx_lock.cpp | 41 + lang/cxx/cxx_logc.cpp | 78 + lang/cxx/cxx_mpool.cpp | 128 + lang/cxx/cxx_multi.cpp | 123 + lang/cxx/cxx_rid.cpp | 46 + lang/cxx/cxx_seq.cpp | 109 + lang/cxx/cxx_site.cpp | 57 + lang/cxx/cxx_txn.cpp | 117 + lang/cxx/stl/dbstl_base_iterator.h | 498 + lang/cxx/stl/dbstl_common.in | 458 + lang/cxx/stl/dbstl_container.cpp | 525 + lang/cxx/stl/dbstl_container.h | 582 + lang/cxx/stl/dbstl_dbc.h | 1328 + lang/cxx/stl/dbstl_dbt.h | 803 + lang/cxx/stl/dbstl_element_ref.h | 873 + lang/cxx/stl/dbstl_exception.h | 257 + lang/cxx/stl/dbstl_inner_utility.h | 59 + lang/cxx/stl/dbstl_map.h | 3395 ++ lang/cxx/stl/dbstl_resource_manager.cpp | 1069 + lang/cxx/stl/dbstl_resource_manager.h | 359 + lang/cxx/stl/dbstl_set.h | 1583 + lang/cxx/stl/dbstl_utility.h | 496 + lang/cxx/stl/dbstl_vector.h | 3332 ++ lang/tcl/docs/db.html | 267 + lang/tcl/docs/env.html | 344 + lang/tcl/docs/historic.html | 168 + lang/tcl/docs/index.html | 50 + lang/tcl/docs/library.html | 26 + lang/tcl/docs/lock.html | 206 + lang/tcl/docs/log.html | 123 + lang/tcl/docs/mpool.html | 189 + lang/tcl/docs/rep.html | 50 + lang/tcl/docs/sequence.html | 93 + lang/tcl/docs/test.html | 103 + lang/tcl/docs/txn.html | 69 + lang/tcl/tcl_compat.c | 737 + lang/tcl/tcl_db.c | 4050 +++ lang/tcl/tcl_db_pkg.c | 5056 +++ lang/tcl/tcl_dbcursor.c | 1332 + lang/tcl/tcl_env.c | 3252 ++ lang/tcl/tcl_internal.c | 879 + lang/tcl/tcl_lock.c | 862 + lang/tcl/tcl_log.c | 824 + lang/tcl/tcl_mp.c | 1020 + lang/tcl/tcl_mutex.c | 389 + lang/tcl/tcl_rep.c | 1691 + lang/tcl/tcl_seq.c | 569 + lang/tcl/tcl_txn.c | 886 + lang/tcl/tcl_util.c | 152 + src/btree/bt_compact.c | 2644 ++ src/btree/bt_compare.c | 213 + src/btree/bt_compress.c | 3023 ++ src/btree/bt_conv.c | 95 + src/btree/bt_curadj.c | 694 + src/btree/bt_cursor.c | 3076 ++ src/btree/bt_delete.c | 541 + src/btree/bt_method.c | 745 + src/btree/bt_open.c | 677 + src/btree/bt_put.c | 1087 + src/btree/bt_rec.c | 2036 ++ src/btree/bt_reclaim.c | 98 + src/btree/bt_recno.c | 1427 + src/btree/bt_rsearch.c | 513 + src/btree/bt_search.c | 1028 + src/btree/bt_split.c | 1332 + src/btree/bt_stat.c | 658 + src/btree/bt_upgrade.c | 153 + src/btree/bt_verify.c | 2772 ++ src/btree/btree.src | 290 + src/btree/btree_auto.c | 207 + src/btree/btree_autop.c | 291 + src/clib/atoi.c | 50 + src/clib/atol.c | 50 + src/clib/bsearch.c | 38 + src/clib/getcwd.c | 261 + src/clib/getopt.c | 153 + src/clib/isalpha.c | 28 + src/clib/isprint.c | 28 + src/clib/isspace.c | 26 + src/clib/memcmp.c | 62 + src/clib/memmove.c | 150 + src/clib/printf.c | 116 + src/clib/qsort.c | 181 + src/clib/raise.c | 26 + src/clib/rand.c | 25 + src/clib/snprintf.c | 149 + src/clib/strcasecmp.c | 97 + src/clib/strcat.c | 53 + src/clib/strchr.c | 57 + src/clib/strdup.c | 59 + src/clib/strerror.c | 225 + src/clib/strncat.c | 69 + src/clib/strncmp.c | 61 + src/clib/strrchr.c | 58 + src/clib/strsep.c | 80 + src/clib/strtol.c | 142 + src/clib/strtoul.c | 121 + src/clib/time.c | 34 + src/common/clock.c | 57 + src/common/crypto_stub.c | 44 + src/common/db_byteorder.c | 63 + src/common/db_compint.c | 555 + src/common/db_err.c | 1114 + src/common/db_getlong.c | 146 + src/common/db_idspace.c | 85 + src/common/db_log2.c | 57 + src/common/db_shash.c | 104 + src/common/dbt.c | 74 + src/common/mkpath.c | 68 + src/common/openflags.c | 51 + src/common/os_method.c | 270 + src/common/util_arg.c | 56 + src/common/util_cache.c | 47 + src/common/util_log.c | 45 + src/common/util_sig.c | 110 + src/common/zerofill.c | 129 + src/crypto/aes_method.c | 357 + src/crypto/crypto.c | 411 + src/crypto/crypto.html | 638 + src/crypto/mersenne/mt19937db.c | 187 + src/crypto/rijndael/rijndael-alg-fst.c | 1466 + src/crypto/rijndael/rijndael-alg-fst.h | 40 + src/crypto/rijndael/rijndael-api-fst.c | 491 + src/crypto/rijndael/rijndael-api-fst.h | 91 + src/db/crdel.src | 71 + src/db/crdel_auto.c | 59 + src/db/crdel_autop.c | 103 + src/db/crdel_rec.c | 301 + src/db/db.c | 1650 + src/db/db.src | 431 + src/db/db_am.c | 1098 + src/db/db_auto.c | 276 + src/db/db_autop.c | 441 + src/db/db_cam.c | 3496 ++ src/db/db_cds.c | 201 + src/db/db_compact.c | 1083 + src/db/db_conv.c | 890 + src/db/db_copy.c | 256 + src/db/db_dispatch.c | 977 + src/db/db_dup.c | 214 + src/db/db_iface.c | 2980 ++ src/db/db_join.c | 940 + src/db/db_meta.c | 1433 + src/db/db_method.c | 1089 + src/db/db_open.c | 800 + src/db/db_overflow.c | 705 + src/db/db_ovfl_vrfy.c | 410 + src/db/db_pr.c | 1921 ++ src/db/db_rec.c | 2779 ++ src/db/db_reclaim.c | 245 + src/db/db_remove.c | 515 + src/db/db_rename.c | 383 + src/db/db_ret.c | 169 + src/db/db_setid.c | 213 + src/db/db_setlsn.c | 137 + src/db/db_sort_multiple.c | 327 + src/db/db_stati.c | 502 + src/db/db_truncate.c | 233 + src/db/db_upg.c | 521 + src/db/db_upg_opd.c | 343 + src/db/db_vrfy.c | 2970 ++ src/db/db_vrfy_stub.c | 120 + src/db/db_vrfyutil.c | 932 + src/db/partition.c | 2058 ++ src/dbinc/atomic.h | 220 + src/dbinc/btree.h | 553 + src/dbinc/clock.h | 131 + src/dbinc/crypto.h | 93 + src/dbinc/cxx_int.h | 77 + src/dbinc/db.in | 2769 ++ src/dbinc/db_185.in | 176 + src/dbinc/db_am.h | 325 + src/dbinc/db_cxx.in | 1493 + src/dbinc/db_dispatch.h | 97 + src/dbinc/db_int.in | 1138 + src/dbinc/db_join.h | 37 + src/dbinc/db_page.h | 840 + src/dbinc/db_swap.h | 262 + src/dbinc/db_upgrade.h | 248 + src/dbinc/db_verify.h | 209 + src/dbinc/debug.h | 283 + src/dbinc/fop.h | 32 + src/dbinc/globals.h | 103 + src/dbinc/hash.h | 173 + src/dbinc/heap.h | 59 + src/dbinc/hmac.h | 39 + src/dbinc/lock.h | 326 + src/dbinc/log.h | 458 + src/dbinc/log_verify.h | 205 + src/dbinc/mp.h | 684 + src/dbinc/mutex.h | 304 + src/dbinc/mutex_int.h | 1070 + src/dbinc/os.h | 178 + src/dbinc/partition.h | 55 + src/dbinc/perfmon.h | 103 + src/dbinc/qam.h | 199 + src/dbinc/queue.h | 570 + src/dbinc/region.h | 327 + src/dbinc/rep.h | 1087 + src/dbinc/repmgr.h | 839 + src/dbinc/shqueue.h | 410 + src/dbinc/tcl_db.h | 316 + src/dbinc/txn.h | 287 + src/dbinc/win_db.h | 148 + src/dbinc/xa.h | 183 + src/dbinc_auto/api_flags.in | 220 + src/dbinc_auto/btree_auto.h | 456 + src/dbinc_auto/btree_ext.h | 147 + src/dbinc_auto/clib_ext.h | 113 + src/dbinc_auto/common_ext.h | 75 + src/dbinc_auto/crdel_auto.h | 127 + src/dbinc_auto/crypto_ext.h | 38 + src/dbinc_auto/db_auto.h | 666 + src/dbinc_auto/db_ext.h | 342 + src/dbinc_auto/dbreg_auto.h | 43 + src/dbinc_auto/dbreg_ext.h | 46 + src/dbinc_auto/env_ext.h | 152 + src/dbinc_auto/ext_185_def.in | 12 + src/dbinc_auto/ext_185_prot.in | 19 + src/dbinc_auto/ext_def.in | 66 + src/dbinc_auto/ext_prot.in | 73 + src/dbinc_auto/fileops_auto.h | 262 + src/dbinc_auto/fileops_ext.h | 44 + src/dbinc_auto/hash_auto.h | 484 + src/dbinc_auto/hash_ext.h | 129 + src/dbinc_auto/heap_auto.h | 146 + src/dbinc_auto/heap_ext.h | 55 + src/dbinc_auto/hmac_ext.h | 20 + src/dbinc_auto/int_def.in | 2239 ++ src/dbinc_auto/lock_ext.h | 78 + src/dbinc_auto/log_ext.h | 207 + src/dbinc_auto/mp_ext.h | 102 + src/dbinc_auto/mutex_ext.h | 91 + src/dbinc_auto/os_ext.h | 83 + src/dbinc_auto/qam_auto.h | 174 + src/dbinc_auto/qam_ext.h | 67 + src/dbinc_auto/rep_automsg.h | 106 + src/dbinc_auto/rep_ext.h | 149 + src/dbinc_auto/repmgr_auto.h | 41 + src/dbinc_auto/repmgr_automsg.h | 113 + src/dbinc_auto/repmgr_ext.h | 246 + src/dbinc_auto/sequence_ext.h | 17 + src/dbinc_auto/tcl_ext.h | 133 + src/dbinc_auto/txn_auto.h | 220 + src/dbinc_auto/txn_ext.h | 93 + src/dbinc_auto/xa_ext.h | 18 + src/dbreg/dbreg.c | 1004 + src/dbreg/dbreg.src | 37 + src/dbreg/dbreg_auto.c | 35 + src/dbreg/dbreg_autop.c | 43 + src/dbreg/dbreg_rec.c | 392 + src/dbreg/dbreg_stat.c | 140 + src/dbreg/dbreg_util.c | 838 + src/env/env_alloc.c | 757 + src/env/env_config.c | 744 + src/env/env_failchk.c | 551 + src/env/env_file.c | 128 + src/env/env_globals.c | 65 + src/env/env_method.c | 1846 ++ src/env/env_name.c | 281 + src/env/env_open.c | 1234 + src/env/env_recover.c | 1093 + src/env/env_region.c | 1466 + src/env/env_register.c | 730 + src/env/env_sig.c | 200 + src/env/env_stat.c | 878 + src/fileops/fileops.src | 137 + src/fileops/fileops_auto.c | 118 + src/fileops/fileops_autop.c | 177 + src/fileops/fop_basic.c | 318 + src/fileops/fop_rec.c | 697 + src/fileops/fop_util.c | 1770 + src/hash/hash.c | 2340 ++ src/hash/hash.src | 328 + src/hash/hash_auto.c | 209 + src/hash/hash_autop.c | 314 + src/hash/hash_compact.c | 535 + src/hash/hash_conv.c | 110 + src/hash/hash_dup.c | 943 + src/hash/hash_func.c | 240 + src/hash/hash_meta.c | 168 + src/hash/hash_method.c | 250 + src/hash/hash_open.c | 584 + src/hash/hash_page.c | 3182 ++ src/hash/hash_rec.c | 1895 ++ src/hash/hash_reclaim.c | 98 + src/hash/hash_stat.c | 518 + src/hash/hash_stub.c | 470 + src/hash/hash_upgrade.c | 323 + src/hash/hash_verify.c | 1148 + src/heap/heap.c | 2530 ++ src/heap/heap.src | 101 + src/heap/heap_auto.c | 73 + src/heap/heap_autop.c | 105 + src/heap/heap_conv.c | 92 + src/heap/heap_method.c | 118 + src/heap/heap_open.c | 427 + src/heap/heap_rec.c | 374 + src/heap/heap_reclaim.c | 151 + src/heap/heap_stat.c | 286 + src/heap/heap_stub.c | 311 + src/heap/heap_verify.c | 451 + src/hmac/hmac.c | 234 + src/hmac/sha1.c | 289 + src/lock/Design | 301 + src/lock/lock.c | 2019 ++ src/lock/lock_alloc.incl | 138 + src/lock/lock_deadlock.c | 1063 + src/lock/lock_failchk.c | 114 + src/lock/lock_id.c | 572 + src/lock/lock_list.c | 365 + src/lock/lock_method.c | 630 + src/lock/lock_region.c | 578 + src/lock/lock_stat.c | 770 + src/lock/lock_stub.c | 632 + src/lock/lock_timer.c | 128 + src/lock/lock_util.c | 98 + src/log/log.c | 1685 + src/log/log_archive.c | 643 + src/log/log_compare.c | 66 + src/log/log_debug.c | 146 + src/log/log_get.c | 1615 + src/log/log_method.c | 533 + src/log/log_print.c | 366 + src/log/log_put.c | 2054 ++ src/log/log_stat.c | 336 + src/log/log_verify.c | 437 + src/log/log_verify_auto.c | 318 + src/log/log_verify_int.c | 4343 +++ src/log/log_verify_stub.c | 79 + src/log/log_verify_util.c | 2233 ++ src/mp/mp_alloc.c | 699 + src/mp/mp_bh.c | 646 + src/mp/mp_fget.c | 1222 + src/mp/mp_fmethod.c | 587 + src/mp/mp_fopen.c | 1103 + src/mp/mp_fput.c | 374 + src/mp/mp_fset.c | 170 + src/mp/mp_method.c | 1091 + src/mp/mp_mvcc.c | 636 + src/mp/mp_region.c | 620 + src/mp/mp_register.c | 116 + src/mp/mp_resize.c | 605 + src/mp/mp_stat.c | 900 + src/mp/mp_sync.c | 956 + src/mp/mp_trickle.c | 112 + src/mutex/README | 110 + src/mutex/mut_alloc.c | 291 + src/mutex/mut_failchk.c | 73 + src/mutex/mut_fcntl.c | 248 + src/mutex/mut_method.c | 482 + src/mutex/mut_pthread.c | 769 + src/mutex/mut_region.c | 469 + src/mutex/mut_stat.c | 580 + src/mutex/mut_stub.c | 252 + src/mutex/mut_tas.c | 608 + src/mutex/mut_win32.c | 589 + src/mutex/test_mutex.c | 1051 + src/mutex/uts4_cc.s | 26 + src/os/os_abort.c | 33 + src/os/os_abs.c | 24 + src/os/os_addrinfo.c | 179 + src/os/os_alloc.c | 464 + src/os/os_clock.c | 73 + src/os/os_config.c | 70 + src/os/os_cpu.c | 47 + src/os/os_ctime.c | 47 + src/os/os_dir.c | 140 + src/os/os_errno.c | 129 + src/os/os_fid.c | 135 + src/os/os_flock.c | 64 + src/os/os_fsync.c | 104 + src/os/os_getenv.c | 58 + src/os/os_handle.c | 243 + src/os/os_map.c | 607 + src/os/os_mkdir.c | 52 + src/os/os_open.c | 162 + src/os/os_pid.c | 57 + src/os/os_rename.c | 53 + src/os/os_root.c | 27 + src/os/os_rpath.c | 36 + src/os/os_rw.c | 291 + src/os/os_seek.c | 66 + src/os/os_stack.c | 45 + src/os/os_stat.c | 108 + src/os/os_tmpdir.c | 141 + src/os/os_truncate.c | 63 + src/os/os_uid.c | 55 + src/os/os_unlink.c | 80 + src/os/os_yield.c | 95 + src/qam/qam.c | 1760 + src/qam/qam.src | 89 + src/qam/qam_auto.c | 83 + src/qam/qam_autop.c | 126 + src/qam/qam_conv.c | 79 + src/qam/qam_files.c | 893 + src/qam/qam_method.c | 399 + src/qam/qam_open.c | 346 + src/qam/qam_rec.c | 687 + src/qam/qam_stat.c | 255 + src/qam/qam_stub.c | 339 + src/qam/qam_upgrade.c | 101 + src/qam/qam_verify.c | 636 + src/rep/mlease.html | 1198 + src/rep/rep.msg | 144 + src/rep/rep_automsg.c | 827 + src/rep/rep_backup.c | 3373 ++ src/rep/rep_elect.c | 1481 + src/rep/rep_lease.c | 545 + src/rep/rep_log.c | 1044 + src/rep/rep_method.c | 3027 ++ src/rep/rep_record.c | 2582 ++ src/rep/rep_region.c | 608 + src/rep/rep_stat.c | 692 + src/rep/rep_stub.c | 425 + src/rep/rep_util.c | 2705 ++ src/rep/rep_verify.c | 751 + src/repmgr/repmgr.msg | 119 + src/repmgr/repmgr.src | 23 + src/repmgr/repmgr_auto.c | 32 + src/repmgr/repmgr_automsg.c | 748 + src/repmgr/repmgr_autop.c | 44 + src/repmgr/repmgr_elect.c | 550 + src/repmgr/repmgr_method.c | 3045 ++ src/repmgr/repmgr_msg.c | 1638 + src/repmgr/repmgr_net.c | 1882 ++ src/repmgr/repmgr_posix.c | 799 + src/repmgr/repmgr_queue.c | 180 + src/repmgr/repmgr_rec.c | 45 + src/repmgr/repmgr_sel.c | 1971 ++ src/repmgr/repmgr_stat.c | 355 + src/repmgr/repmgr_stub.c | 262 + src/repmgr/repmgr_util.c | 2026 ++ src/repmgr/repmgr_windows.c | 835 + src/sequence/seq_stat.c | 275 + src/sequence/sequence.c | 1011 + src/txn/txn.c | 2161 ++ src/txn/txn.src | 120 + src/txn/txn_auto.c | 93 + src/txn/txn_autop.c | 175 + src/txn/txn_chkpt.c | 410 + src/txn/txn_failchk.c | 101 + src/txn/txn_method.c | 124 + src/txn/txn_rec.c | 616 + src/txn/txn_recover.c | 317 + src/txn/txn_region.c | 518 + src/txn/txn_stat.c | 461 + src/txn/txn_util.c | 654 + src/xa/xa.c | 1063 + src/xa/xa_map.c | 152 + test/c/README | 91 + test/c/chk.ctests | 70 + test/c/common/test_util.h | 76 + test/c/cutest/CuTest.c | 460 + test/c/cutest/CuTest.h | 160 + test/c/cutest/CuTests.c | 252 + test/c/cutest/Runner.c | 127 + test/c/cutest/gen_tester.sh | 94 + test/c/cutest/license.txt | 38 + test/c/suites/TestChannel.c | 1265 + test/c/suites/TestDbTuner.c | 249 + test/c/suites/TestEncryption.c | 350 + test/c/suites/TestEnvConfig.c | 303 + test/c/suites/TestEnvMethod.c | 33 + test/c/suites/TestKeyExistErrorReturn.c | 273 + test/c/suites/TestPartial.c | 380 + test/c/suites/TestQueue.c | 845 + test/c/test_api_methods.c | 95 + test/c/test_log_verify.c | 265 + test/cxx/README | 36 + test/cxx/TestConstruct01.cpp | 321 + test/cxx/TestConstruct01.testerr | 0 test/cxx/TestConstruct01.testout | 27 + test/cxx/TestGetSetMethods.cpp | 88 + test/cxx/TestKeyRange.cpp | 169 + test/cxx/TestKeyRange.testin | 8 + test/cxx/TestKeyRange.testout | 19 + test/cxx/TestLogc.cpp | 108 + test/cxx/TestLogc.testout | 1 + test/cxx/TestMulti.cpp | 207 + test/cxx/TestSimpleAccess.cpp | 67 + test/cxx/TestSimpleAccess.testout | 3 + test/cxx/TestTruncate.cpp | 84 + test/cxx/TestTruncate.testout | 6 + test/cxx/chk.cxxtests | 71 + test/cxx/ignore | 4 + test/cxx/testall | 32 + test/cxx/testone | 122 + test/micro/README | 84 + test/micro/configs/run.small | 127 + test/micro/configs/run.std | 134 + test/micro/configs/run.workload | 38 + test/micro/report | 121 + test/micro/report.awk | 40 + test/micro/source/LIST | 101 + test/micro/source/b_curalloc.c | 69 + test/micro/source/b_curwalk.c | 241 + test/micro/source/b_del.c | 202 + test/micro/source/b_get.c | 188 + test/micro/source/b_inmem.c | 426 + test/micro/source/b_latch.c | 199 + test/micro/source/b_load.c | 197 + test/micro/source/b_open.c | 157 + test/micro/source/b_put.c | 250 + test/micro/source/b_recover.c | 141 + test/micro/source/b_txn.c | 93 + test/micro/source/b_txn_write.c | 172 + test/micro/source/b_uname.c | 147 + test/micro/source/b_util.c | 170 + test/micro/source/b_workload.c | 631 + test/micro/source/b_workload.h | 153 + test/micro/source/bench.h | 218 + test/micro/source/test_micro.c | 211 + test/micro/test_micro | 171 + test/tcl/README | 147 + test/tcl/TESTS | 3405 ++ test/tcl/archive.tcl | 255 + test/tcl/backup.tcl | 290 + test/tcl/bigfile001.tcl | 79 + test/tcl/bigfile002.tcl | 45 + test/tcl/byteorder.tcl | 33 + test/tcl/conscript.tcl | 123 + test/tcl/db_reptest.tcl | 1171 + test/tcl/dbm.tcl | 127 + test/tcl/dbscript.tcl | 358 + test/tcl/ddoyscript.tcl | 171 + test/tcl/ddscript.tcl | 43 + test/tcl/dead001.tcl | 133 + test/tcl/dead002.tcl | 126 + test/tcl/dead003.tcl | 167 + test/tcl/dead004.tcl | 113 + test/tcl/dead005.tcl | 143 + test/tcl/dead006.tcl | 15 + test/tcl/dead007.tcl | 35 + test/tcl/dead008.tcl | 13 + test/tcl/dead009.tcl | 13 + test/tcl/dead010.tcl | 16 + test/tcl/dead011.tcl | 15 + test/tcl/env001.tcl | 145 + test/tcl/env002.tcl | 155 + test/tcl/env003.tcl | 148 + test/tcl/env004.tcl | 94 + test/tcl/env005.tcl | 51 + test/tcl/env006.tcl | 90 + test/tcl/env007.tcl | 923 + test/tcl/env007script.tcl | 37 + test/tcl/env008.tcl | 72 + test/tcl/env009.tcl | 81 + test/tcl/env010.tcl | 49 + test/tcl/env011.tcl | 38 + test/tcl/env012.tcl | 393 + test/tcl/env013.tcl | 84 + test/tcl/env014.tcl | 117 + test/tcl/env015.tcl | 85 + test/tcl/env016.tcl | 243 + test/tcl/env017.tcl | 674 + test/tcl/env018.tcl | 57 + test/tcl/env019.tcl | 68 + test/tcl/env019script.tcl | 50 + test/tcl/env020.tcl | 1378 + test/tcl/env021.tcl | 79 + test/tcl/envscript.tcl | 100 + test/tcl/fop001.tcl | 332 + test/tcl/fop002.tcl | 135 + test/tcl/fop003.tcl | 127 + test/tcl/fop004.tcl | 260 + test/tcl/fop005.tcl | 147 + test/tcl/fop006.tcl | 241 + test/tcl/fop007.tcl | 21 + test/tcl/fop008.tcl | 15 + test/tcl/fop009.tcl | 22 + test/tcl/fop010.tcl | 22 + test/tcl/fop011.tcl | 23 + test/tcl/fop012.tcl | 22 + test/tcl/fopscript.tcl | 81 + test/tcl/foputils.tcl | 548 + test/tcl/hsearch.tcl | 50 + test/tcl/include.tcl | 29 + test/tcl/join.tcl | 454 + test/tcl/lock001.tcl | 121 + test/tcl/lock002.tcl | 154 + test/tcl/lock003.tcl | 100 + test/tcl/lock004.tcl | 28 + test/tcl/lock005.tcl | 176 + test/tcl/lock006.tcl | 186 + test/tcl/lockscript.tcl | 116 + test/tcl/log001.tcl | 143 + test/tcl/log002.tcl | 101 + test/tcl/log003.tcl | 143 + test/tcl/log004.tcl | 51 + test/tcl/log005.tcl | 117 + test/tcl/log006.tcl | 230 + test/tcl/log007.tcl | 110 + test/tcl/log008.tcl | 46 + test/tcl/log008script.tcl | 84 + test/tcl/log009.tcl | 122 + test/tcl/logtrack.list | 67 + test/tcl/logtrack.tcl | 142 + test/tcl/mdbscript.tcl | 402 + test/tcl/memp001.tcl | 209 + test/tcl/memp002.tcl | 74 + test/tcl/memp003.tcl | 160 + test/tcl/memp004.tcl | 82 + test/tcl/memp005.tcl | 49 + test/tcl/mpoolscript.tcl | 174 + test/tcl/mut001.tcl | 111 + test/tcl/mut002.tcl | 52 + test/tcl/mut002script.tcl | 39 + test/tcl/mut003.tcl | 38 + test/tcl/ndbm.tcl | 143 + test/tcl/parallel.tcl | 375 + test/tcl/plat001.tcl | 74 + test/tcl/portable.tcl | 351 + test/tcl/recd001.tcl | 258 + test/tcl/recd002.tcl | 108 + test/tcl/recd003.tcl | 125 + test/tcl/recd004.tcl | 103 + test/tcl/recd005.tcl | 271 + test/tcl/recd006.tcl | 268 + test/tcl/recd007.tcl | 1073 + test/tcl/recd008.tcl | 226 + test/tcl/recd009.tcl | 179 + test/tcl/recd010.tcl | 256 + test/tcl/recd011.tcl | 135 + test/tcl/recd012.tcl | 434 + test/tcl/recd013.tcl | 291 + test/tcl/recd014.tcl | 446 + test/tcl/recd015.tcl | 151 + test/tcl/recd016.tcl | 180 + test/tcl/recd017.tcl | 157 + test/tcl/recd018.tcl | 109 + test/tcl/recd019.tcl | 122 + test/tcl/recd020.tcl | 81 + test/tcl/recd021.tcl | 278 + test/tcl/recd022.tcl | 136 + test/tcl/recd023.tcl | 91 + test/tcl/recd024.tcl | 81 + test/tcl/recd025.tcl | 237 + test/tcl/recd15scr.tcl | 73 + test/tcl/recdscript.tcl | 37 + test/tcl/rep001.tcl | 238 + test/tcl/rep002.tcl | 332 + test/tcl/rep003.tcl | 299 + test/tcl/rep005.tcl | 358 + test/tcl/rep006.tcl | 224 + test/tcl/rep007.tcl | 272 + test/tcl/rep008.tcl | 153 + test/tcl/rep009.tcl | 206 + test/tcl/rep010.tcl | 274 + test/tcl/rep011.tcl | 203 + test/tcl/rep012.tcl | 299 + test/tcl/rep013.tcl | 307 + test/tcl/rep014.tcl | 209 + test/tcl/rep015.tcl | 329 + test/tcl/rep016.tcl | 289 + test/tcl/rep017.tcl | 263 + test/tcl/rep017script.tcl | 83 + test/tcl/rep018.tcl | 188 + test/tcl/rep018script.tcl | 98 + test/tcl/rep019.tcl | 191 + test/tcl/rep020.tcl | 322 + test/tcl/rep021.tcl | 337 + test/tcl/rep022.tcl | 309 + test/tcl/rep023.tcl | 213 + test/tcl/rep024.tcl | 245 + test/tcl/rep025.tcl | 229 + test/tcl/rep026.tcl | 277 + test/tcl/rep027.tcl | 196 + test/tcl/rep028.tcl | 255 + test/tcl/rep029.tcl | 274 + test/tcl/rep030.tcl | 384 + test/tcl/rep031.tcl | 337 + test/tcl/rep032.tcl | 207 + test/tcl/rep033.tcl | 269 + test/tcl/rep034.tcl | 398 + test/tcl/rep035.tcl | 290 + test/tcl/rep035script.tcl | 81 + test/tcl/rep036.tcl | 204 + test/tcl/rep036script.tcl | 125 + test/tcl/rep037.tcl | 283 + test/tcl/rep038.tcl | 268 + test/tcl/rep039.tcl | 463 + test/tcl/rep040.tcl | 244 + test/tcl/rep040script.tcl | 74 + test/tcl/rep041.tcl | 242 + test/tcl/rep042.tcl | 197 + test/tcl/rep042script.tcl | 78 + test/tcl/rep043.tcl | 241 + test/tcl/rep043script.tcl | 125 + test/tcl/rep044.tcl | 294 + test/tcl/rep045.tcl | 281 + test/tcl/rep045script.tcl | 164 + test/tcl/rep046.tcl | 354 + test/tcl/rep047.tcl | 273 + test/tcl/rep048.tcl | 193 + test/tcl/rep048script.tcl | 84 + test/tcl/rep049.tcl | 246 + test/tcl/rep050.tcl | 369 + test/tcl/rep051.tcl | 251 + test/tcl/rep052.tcl | 238 + test/tcl/rep053.tcl | 235 + test/tcl/rep054.tcl | 259 + test/tcl/rep055.tcl | 227 + test/tcl/rep058.tcl | 158 + test/tcl/rep060.tcl | 349 + test/tcl/rep061.tcl | 458 + test/tcl/rep062.tcl | 308 + test/tcl/rep063.tcl | 388 + test/tcl/rep064.tcl | 178 + test/tcl/rep065.tcl | 445 + test/tcl/rep065script.tcl | 416 + test/tcl/rep066.tcl | 276 + test/tcl/rep067.tcl | 323 + test/tcl/rep068.tcl | 222 + test/tcl/rep069.tcl | 265 + test/tcl/rep070.tcl | 196 + test/tcl/rep071.tcl | 173 + test/tcl/rep072.tcl | 199 + test/tcl/rep073.tcl | 201 + test/tcl/rep074.tcl | 204 + test/tcl/rep075.tcl | 558 + test/tcl/rep076.tcl | 199 + test/tcl/rep077.tcl | 165 + test/tcl/rep078.tcl | 361 + test/tcl/rep078script.tcl | 134 + test/tcl/rep079.tcl | 350 + test/tcl/rep080.tcl | 196 + test/tcl/rep081.tcl | 296 + test/tcl/rep082.tcl | 204 + test/tcl/rep083.tcl | 159 + test/tcl/rep084.tcl | 149 + test/tcl/rep085.tcl | 167 + test/tcl/rep086.tcl | 160 + test/tcl/rep087.tcl | 227 + test/tcl/rep088.tcl | 244 + test/tcl/rep089.tcl | 245 + test/tcl/rep090.tcl | 251 + test/tcl/rep091.tcl | 789 + test/tcl/rep092.tcl | 325 + test/tcl/rep092script.tcl | 74 + test/tcl/rep093.tcl | 252 + test/tcl/rep094.tcl | 240 + test/tcl/rep095.tcl | 235 + test/tcl/rep095script.tcl | 91 + test/tcl/rep096.tcl | 209 + test/tcl/rep097.tcl | 306 + test/tcl/rep097script.tcl | 152 + test/tcl/rep098.tcl | 273 + test/tcl/repmgr001.tcl | 198 + test/tcl/repmgr002.tcl | 241 + test/tcl/repmgr003.tcl | 202 + test/tcl/repmgr007.tcl | 147 + test/tcl/repmgr009.tcl | 186 + test/tcl/repmgr010.tcl | 167 + test/tcl/repmgr011.tcl | 116 + test/tcl/repmgr012.tcl | 123 + test/tcl/repmgr013.tcl | 127 + test/tcl/repmgr017.tcl | 156 + test/tcl/repmgr018.tcl | 142 + test/tcl/repmgr023.tcl | 183 + test/tcl/repmgr024.tcl | 218 + test/tcl/repmgr025.tcl | 179 + test/tcl/repmgr026.tcl | 243 + test/tcl/repmgr027.tcl | 125 + test/tcl/repmgr028.tcl | 286 + test/tcl/repmgr028script.tcl | 18 + test/tcl/repmgr029.tcl | 1747 + test/tcl/repmgr029script.tcl | 33 + test/tcl/repmgr029script2.tcl | 44 + test/tcl/repmgr030.tcl | 153 + test/tcl/repmgr031.tcl | 107 + test/tcl/repmgr032.tcl | 158 + test/tcl/repmgr100.tcl | 108 + test/tcl/repmgr101.tcl | 134 + test/tcl/repmgr102.tcl | 144 + test/tcl/repmgr105.tcl | 218 + test/tcl/repmgr106.tcl | 163 + test/tcl/repmgr107.tcl | 131 + test/tcl/repmgr108.tcl | 91 + test/tcl/repmgr109.tcl | 212 + test/tcl/repmgr110.tcl | 197 + test/tcl/repmgr111.tcl | 78 + test/tcl/repmgr112.tcl | 156 + test/tcl/reputils.tcl | 2924 ++ test/tcl/reputilsnoenv.tcl | 514 + test/tcl/rsrc001.tcl | 215 + test/tcl/rsrc002.tcl | 65 + test/tcl/rsrc003.tcl | 178 + test/tcl/rsrc004.tcl | 51 + test/tcl/sdb001.tcl | 146 + test/tcl/sdb002.tcl | 227 + test/tcl/sdb003.tcl | 179 + test/tcl/sdb004.tcl | 243 + test/tcl/sdb005.tcl | 161 + test/tcl/sdb006.tcl | 168 + test/tcl/sdb007.tcl | 108 + test/tcl/sdb008.tcl | 93 + test/tcl/sdb009.tcl | 107 + test/tcl/sdb010.tcl | 169 + test/tcl/sdb011.tcl | 141 + test/tcl/sdb012.tcl | 434 + test/tcl/sdb013.tcl | 179 + test/tcl/sdb014.tcl | 112 + test/tcl/sdb015.tcl | 117 + test/tcl/sdb016.tcl | 98 + test/tcl/sdb017.tcl | 99 + test/tcl/sdb018.tcl | 156 + test/tcl/sdb019.tcl | 139 + test/tcl/sdb020.tcl | 124 + test/tcl/sdbscript.tcl | 46 + test/tcl/sdbtest001.tcl | 149 + test/tcl/sdbtest002.tcl | 167 + test/tcl/sdbutils.tcl | 196 + test/tcl/sec001.tcl | 222 + test/tcl/sec002.tcl | 180 + test/tcl/shelltest.tcl | 80 + test/tcl/si001.tcl | 307 + test/tcl/si002.tcl | 243 + test/tcl/si003.tcl | 179 + test/tcl/si004.tcl | 237 + test/tcl/si005.tcl | 170 + test/tcl/si006.tcl | 186 + test/tcl/si007.tcl | 188 + test/tcl/si008.tcl | 274 + test/tcl/sijointest.tcl | 179 + test/tcl/siutils.tcl | 292 + test/tcl/sysscript.tcl | 282 + test/tcl/t106script.tcl | 331 + test/tcl/test.tcl | 2879 ++ test/tcl/test001.tcl | 221 + test/tcl/test002.tcl | 160 + test/tcl/test003.tcl | 204 + test/tcl/test004.tcl | 168 + test/tcl/test005.tcl | 18 + test/tcl/test006.tcl | 199 + test/tcl/test007.tcl | 18 + test/tcl/test008.tcl | 199 + test/tcl/test009.tcl | 17 + test/tcl/test010.tcl | 181 + test/tcl/test011.tcl | 475 + test/tcl/test012.tcl | 138 + test/tcl/test013.tcl | 239 + test/tcl/test014.tcl | 252 + test/tcl/test015.tcl | 284 + test/tcl/test016.tcl | 206 + test/tcl/test017.tcl | 321 + test/tcl/test018.tcl | 20 + test/tcl/test019.tcl | 135 + test/tcl/test020.tcl | 141 + test/tcl/test021.tcl | 161 + test/tcl/test022.tcl | 61 + test/tcl/test023.tcl | 225 + test/tcl/test024.tcl | 280 + test/tcl/test025.tcl | 145 + test/tcl/test026.tcl | 159 + test/tcl/test027.tcl | 16 + test/tcl/test028.tcl | 224 + test/tcl/test029.tcl | 255 + test/tcl/test030.tcl | 259 + test/tcl/test031.tcl | 234 + test/tcl/test032.tcl | 266 + test/tcl/test033.tcl | 226 + test/tcl/test034.tcl | 34 + test/tcl/test035.tcl | 30 + test/tcl/test036.tcl | 172 + test/tcl/test037.tcl | 199 + test/tcl/test038.tcl | 232 + test/tcl/test039.tcl | 217 + test/tcl/test040.tcl | 22 + test/tcl/test041.tcl | 17 + test/tcl/test042.tcl | 199 + test/tcl/test043.tcl | 196 + test/tcl/test044.tcl | 265 + test/tcl/test045.tcl | 125 + test/tcl/test046.tcl | 820 + test/tcl/test047.tcl | 261 + test/tcl/test048.tcl | 178 + test/tcl/test049.tcl | 186 + test/tcl/test050.tcl | 220 + test/tcl/test051.tcl | 225 + test/tcl/test052.tcl | 268 + test/tcl/test053.tcl | 241 + test/tcl/test054.tcl | 459 + test/tcl/test055.tcl | 140 + test/tcl/test056.tcl | 174 + test/tcl/test057.tcl | 207 + test/tcl/test058.tcl | 110 + test/tcl/test059.tcl | 149 + test/tcl/test060.tcl | 59 + test/tcl/test061.tcl | 231 + test/tcl/test062.tcl | 159 + test/tcl/test063.tcl | 173 + test/tcl/test064.tcl | 68 + test/tcl/test065.tcl | 207 + test/tcl/test066.tcl | 103 + test/tcl/test067.tcl | 163 + test/tcl/test068.tcl | 233 + test/tcl/test069.tcl | 13 + test/tcl/test070.tcl | 137 + test/tcl/test071.tcl | 15 + test/tcl/test072.tcl | 258 + test/tcl/test073.tcl | 296 + test/tcl/test074.tcl | 276 + test/tcl/test076.tcl | 90 + test/tcl/test077.tcl | 92 + test/tcl/test078.tcl | 252 + test/tcl/test079.tcl | 28 + test/tcl/test081.tcl | 14 + test/tcl/test082.tcl | 13 + test/tcl/test083.tcl | 171 + test/tcl/test084.tcl | 52 + test/tcl/test085.tcl | 340 + test/tcl/test086.tcl | 168 + test/tcl/test087.tcl | 293 + test/tcl/test088.tcl | 176 + test/tcl/test089.tcl | 275 + test/tcl/test090.tcl | 15 + test/tcl/test091.tcl | 19 + test/tcl/test092.tcl | 252 + test/tcl/test093.tcl | 445 + test/tcl/test094.tcl | 201 + test/tcl/test095.tcl | 369 + test/tcl/test096.tcl | 398 + test/tcl/test097.tcl | 199 + test/tcl/test098.tcl | 85 + test/tcl/test099.tcl | 275 + test/tcl/test100.tcl | 16 + test/tcl/test101.tcl | 16 + test/tcl/test102.tcl | 235 + test/tcl/test103.tcl | 222 + test/tcl/test106.tcl | 113 + test/tcl/test107.tcl | 168 + test/tcl/test109.tcl | 321 + test/tcl/test110.tcl | 168 + test/tcl/test111.tcl | 384 + test/tcl/test112.tcl | 291 + test/tcl/test113.tcl | 289 + test/tcl/test114.tcl | 378 + test/tcl/test115.tcl | 349 + test/tcl/test116.tcl | 343 + test/tcl/test117.tcl | 227 + test/tcl/test119.tcl | 258 + test/tcl/test120.tcl | 98 + test/tcl/test121.tcl | 125 + test/tcl/test122.tcl | 103 + test/tcl/test123.tcl | 81 + test/tcl/test124.tcl | 150 + test/tcl/test125.tcl | 205 + test/tcl/test126.tcl | 348 + test/tcl/test127.tcl | 312 + test/tcl/test128.tcl | 35 + test/tcl/test129.tcl | 19 + test/tcl/test130.tcl | 301 + test/tcl/test131.tcl | 479 + test/tcl/test132.tcl | 29 + test/tcl/test133.tcl | 183 + test/tcl/test134.tcl | 15 + test/tcl/testparams.tcl | 472 + test/tcl/testutils.tcl | 4045 +++ test/tcl/txn001.tcl | 114 + test/tcl/txn002.tcl | 89 + test/tcl/txn003.tcl | 230 + test/tcl/txn004.tcl | 60 + test/tcl/txn005.tcl | 73 + test/tcl/txn006.tcl | 45 + test/tcl/txn007.tcl | 56 + test/tcl/txn008.tcl | 30 + test/tcl/txn009.tcl | 30 + test/tcl/txn010.tcl | 143 + test/tcl/txn011.tcl | 224 + test/tcl/txn012.tcl | 61 + test/tcl/txn012script.tcl | 33 + test/tcl/txn013.tcl | 76 + test/tcl/txn014.tcl | 158 + test/tcl/txnscript.tcl | 66 + test/tcl/update.tcl | 92 + test/tcl/upgrade.tcl | 856 + test/tcl/wordlist | 10001 ++++++ test/tcl/wrap.tcl | 99 + test/tcl/wrap_reptest.tcl | 63 + test/xa/chk.xa | 133 + test/xa/src1/client.c | 359 + test/xa/src1/datafml.fml | 5 + test/xa/src1/datafml.h | 5 + test/xa/src1/hdbrec.h | 14 + test/xa/src1/htimestampxa.c | 14 + test/xa/src1/htimestampxa.h | 13 + test/xa/src1/run.sh | 130 + test/xa/src1/server.c | 244 + test/xa/src1/tuxconfig.sh | 47 + test/xa/src2/bdb1.c | 223 + test/xa/src2/bdb2.c | 158 + test/xa/src2/client.c | 149 + test/xa/src2/run.sh | 97 + test/xa/src2/tuxconfig.sh | 53 + test/xa/src3/client.c | 361 + test/xa/src3/run.sh | 122 + test/xa/src3/server.c | 174 + test/xa/src3/tuxconfig.sh | 56 + test/xa/src4/client.c | 244 + test/xa/src4/run.sh | 117 + test/xa/src4/server.c | 149 + test/xa/src4/tuxconfig.sh | 47 + util/db_archive.c | 185 + util/db_checkpoint.c | 240 + util/db_deadlock.c | 237 + util/db_dump.c | 528 + util/db_dump185.c | 358 + util/db_hotbackup.c | 1158 + util/db_load.c | 1502 + util/db_log_verify.c | 338 + util/db_printlog.c | 678 + util/db_printlog/README | 34 + util/db_printlog/commit.awk | 7 + util/db_printlog/count.awk | 9 + util/db_printlog/dbname.awk | 83 + util/db_printlog/fileid.awk | 38 + util/db_printlog/logstat.awk | 36 + util/db_printlog/pgno.awk | 56 + util/db_printlog/range.awk | 27 + util/db_printlog/rectype.awk | 27 + util/db_printlog/status.awk | 50 + util/db_printlog/txn.awk | 35 + util/db_recover.c | 316 + util/db_replicate.c | 419 + util/db_stat.c | 477 + util/db_stat/dd.sh | 82 + util/db_tuner.c | 1336 + util/db_upgrade.c | 199 + util/db_verify.c | 272 + util/dtrace/apicalls.d | 35 + util/dtrace/apitimes.d | 53 + util/dtrace/apitrace.d | 65 + util/dtrace/cache.d | 106 + util/dtrace/dbdefs.d | 135 + util/dtrace/locktimes.d | 160 + util/dtrace/locktimesid.d | 113 + util/dtrace/mutex.d | 102 + util/dtrace/showerror.d | 89 + util/systemtap/apicalls.stp | 51 + util/systemtap/apitimes.stp | 54 + util/systemtap/apitrace.stp | 45 + util/systemtap/cache.stp | 104 + util/systemtap/locktimes.stp | 154 + util/systemtap/locktimesid.stp | 123 + util/systemtap/mutex.stp | 112 + util/systemtap/showerror.stp | 62 + 1237 files changed, 496881 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 build_unix/.IGNORE_ME create mode 100644 dist/ChangeLog create mode 100644 dist/Makefile.in create mode 100644 dist/RELEASE create mode 100644 dist/aclocal/clock.m4 create mode 100644 dist/aclocal/config.m4 create mode 100644 dist/aclocal/cxx.m4 create mode 100644 dist/aclocal/gcc.m4 create mode 100644 dist/aclocal/libtool.m4 create mode 100644 dist/aclocal/ltoptions.m4 create mode 100644 dist/aclocal/ltsugar.m4 create mode 100644 dist/aclocal/ltversion.m4 create mode 100644 dist/aclocal/lt~obsolete.m4 create mode 100644 dist/aclocal/mmap.m4 create mode 100644 dist/aclocal/mutex.m4 create mode 100644 dist/aclocal/options.m4 create mode 100644 dist/aclocal/perfmon.m4 create mode 100644 dist/aclocal/programs.m4 create mode 100644 dist/aclocal/sequence.m4 create mode 100644 dist/aclocal/socklen.m4 create mode 100644 dist/aclocal/sosuffix.m4 create mode 100644 dist/aclocal/tcl.m4 create mode 100644 dist/aclocal/tls.m4 create mode 100644 dist/aclocal/types.m4 create mode 100644 dist/api_flags create mode 100644 dist/api_flags.c create mode 100644 dist/buildpkg create mode 100644 dist/bumprel create mode 100644 dist/clib_port.in create mode 100755 dist/config.guess create mode 100644 dist/config.hin create mode 100755 dist/config.sub create mode 100755 dist/configure create mode 100644 dist/configure.ac create mode 100644 dist/db_provider.d create mode 100644 dist/errno.h create mode 100644 dist/events.in create mode 100644 dist/gen_inc.awk create mode 100644 dist/gen_msg.awk create mode 100644 dist/gen_provider.pl create mode 100644 dist/gen_rec.awk create mode 100755 dist/install-sh create mode 100644 dist/ltmain.sh create mode 100644 dist/pubdef.in create mode 100755 dist/s_all create mode 100755 dist/s_apiflags create mode 100755 dist/s_config create mode 100755 dist/s_crypto create mode 100755 dist/s_cscope create mode 100755 dist/s_include create mode 100755 dist/s_message create mode 100755 dist/s_message_id create mode 100755 dist/s_perm create mode 100755 dist/s_php create mode 100755 dist/s_readme create mode 100755 dist/s_recover create mode 100755 dist/s_sig create mode 100755 dist/s_symlink create mode 100755 dist/s_tags create mode 100755 dist/s_test create mode 100755 dist/s_validate create mode 100644 dist/srcfiles.in create mode 100644 dist/template/rec_btree create mode 100644 dist/template/rec_crdel create mode 100644 dist/template/rec_ctemp create mode 100644 dist/template/rec_db create mode 100644 dist/template/rec_dbreg create mode 100644 dist/template/rec_fileops create mode 100644 dist/template/rec_hash create mode 100644 dist/template/rec_heap create mode 100644 dist/template/rec_qam create mode 100644 dist/template/rec_repmgr create mode 100644 dist/template/rec_txn create mode 100644 dist/template/rec_utemp create mode 100644 dist/validate/README create mode 100644 dist/validate/s_chk_build_configs create mode 100644 dist/validate/s_chk_comma create mode 100644 dist/validate/s_chk_comma.c create mode 100644 dist/validate/s_chk_copyright create mode 100644 dist/validate/s_chk_defines create mode 100644 dist/validate/s_chk_err create mode 100644 dist/validate/s_chk_ext_method_calls create mode 100644 dist/validate/s_chk_flags create mode 100644 dist/validate/s_chk_flags.c create mode 100644 dist/validate/s_chk_inclconfig create mode 100644 dist/validate/s_chk_include create mode 100644 dist/validate/s_chk_logverify create mode 100644 dist/validate/s_chk_logverify.c create mode 100644 dist/validate/s_chk_message_id create mode 100644 dist/validate/s_chk_mutex_print create mode 100644 dist/validate/s_chk_newline create mode 100644 dist/validate/s_chk_offt create mode 100644 dist/validate/s_chk_osdir create mode 100644 dist/validate/s_chk_proto create mode 100644 dist/validate/s_chk_pubdef create mode 100644 dist/validate/s_chk_runrecovery create mode 100644 dist/validate/s_chk_spell create mode 100644 dist/validate/s_chk_spell.dict create mode 100644 dist/validate/s_chk_srcfiles create mode 100644 dist/validate/s_chk_stats create mode 100644 dist/validate/s_chk_tags create mode 100644 examples/c/README create mode 100644 examples/c/csv/DbRecord.c create mode 100644 examples/c/csv/Makefile create mode 100644 examples/c/csv/README create mode 100644 examples/c/csv/code.c create mode 100644 examples/c/csv/csv.h create mode 100644 examples/c/csv/csv_extern.h create mode 100644 examples/c/csv/db.c create mode 100644 examples/c/csv/load.c create mode 100644 examples/c/csv/load_main.c create mode 100644 examples/c/csv/query.c create mode 100644 examples/c/csv/query_main.c create mode 100644 examples/c/csv/sample.csv create mode 100644 examples/c/csv/sample.desc create mode 100644 examples/c/csv/util.c create mode 100644 examples/c/ex_access.c create mode 100644 examples/c/ex_apprec/auto_rebuild create mode 100644 examples/c/ex_apprec/ex_apprec.c create mode 100644 examples/c/ex_apprec/ex_apprec.h create mode 100644 examples/c/ex_apprec/ex_apprec.src create mode 100644 examples/c/ex_apprec/ex_apprec_auto.c create mode 100644 examples/c/ex_apprec/ex_apprec_auto.h create mode 100644 examples/c/ex_apprec/ex_apprec_autop.c create mode 100644 examples/c/ex_apprec/ex_apprec_rec.c create mode 100644 examples/c/ex_apprec/ex_apprec_template create mode 100644 examples/c/ex_btrec.c create mode 100644 examples/c/ex_bulk.c create mode 100644 examples/c/ex_env.c create mode 100644 examples/c/ex_heap.c create mode 100644 examples/c/ex_lock.c create mode 100644 examples/c/ex_mpool.c create mode 100644 examples/c/ex_rep/README create mode 100644 examples/c/ex_rep/base/rep_base.c create mode 100644 examples/c/ex_rep/base/rep_base.h create mode 100644 examples/c/ex_rep/base/rep_msg.c create mode 100644 examples/c/ex_rep/base/rep_net.c create mode 100644 examples/c/ex_rep/common/rep_common.c create mode 100644 examples/c/ex_rep/common/rep_common.h create mode 100644 examples/c/ex_rep/mgr/rep_mgr.c create mode 100644 examples/c/ex_rep_chan/rep_chan.c create mode 100644 examples/c/ex_rep_chan/rep_chan.h create mode 100644 examples/c/ex_rep_chan/rep_chan_util.c create mode 100644 examples/c/ex_sequence.c create mode 100644 examples/c/ex_stream.c create mode 100644 examples/c/ex_thread.c create mode 100644 examples/c/ex_tpcb.c create mode 100644 examples/cxx/AccessExample.cpp create mode 100644 examples/cxx/BtRecExample.cpp create mode 100644 examples/cxx/EnvExample.cpp create mode 100644 examples/cxx/LockExample.cpp create mode 100644 examples/cxx/MpoolExample.cpp create mode 100644 examples/cxx/SequenceExample.cpp create mode 100644 examples/cxx/TpcbExample.cpp create mode 100644 examples/cxx/excxx_repquote/RepConfigInfo.cpp create mode 100644 examples/cxx/excxx_repquote/RepConfigInfo.h create mode 100644 examples/cxx/excxx_repquote/RepQuoteExample.cpp create mode 100644 examples/cxx/excxx_repquote/dbc_auto.h create mode 100644 examples/cxx/excxx_repquote_gsg/RepConfigInfo.h create mode 100644 examples/cxx/excxx_repquote_gsg/RepMgrGSG.cpp create mode 100644 examples/cxx/excxx_repquote_gsg/SimpleConfigInfo.h create mode 100644 examples/cxx/excxx_repquote_gsg/SimpleTxn.cpp create mode 100644 examples/cxx/getting_started/MyDb.cpp create mode 100644 examples/cxx/getting_started/MyDb.hpp create mode 100644 examples/cxx/getting_started/excxx_example_database_load.cpp create mode 100644 examples/cxx/getting_started/excxx_example_database_read.cpp create mode 100644 examples/cxx/getting_started/gettingStartedCommon.hpp create mode 100644 examples/cxx/getting_started/inventory.txt create mode 100644 examples/cxx/getting_started/vendors.txt create mode 100644 examples/cxx/txn_guide/TxnGuide.cpp create mode 100644 examples/cxx/txn_guide/TxnGuideInMemory.cpp create mode 100644 examples/stl/README create mode 100644 examples/stl/StlAccessExample.cpp create mode 100644 examples/stl/StlAdvancedFeatures.cpp create mode 100644 examples/stl/StlAdvancedFeatures.h create mode 100644 examples/stl/StlTpcbExample.cpp create mode 100644 examples/stl/StlTransactionGuideExample.cpp create mode 100644 examples/stl/repquote/README create mode 100644 examples/stl/repquote/StlRepConfigInfo.cpp create mode 100644 examples/stl/repquote/StlRepConfigInfo.h create mode 100644 examples/stl/repquote/StlRepQuoteExample.cpp create mode 100644 lang/cxx/cxx_channel.cpp create mode 100644 lang/cxx/cxx_db.cpp create mode 100644 lang/cxx/cxx_dbc.cpp create mode 100644 lang/cxx/cxx_dbt.cpp create mode 100644 lang/cxx/cxx_env.cpp create mode 100644 lang/cxx/cxx_except.cpp create mode 100644 lang/cxx/cxx_lock.cpp create mode 100644 lang/cxx/cxx_logc.cpp create mode 100644 lang/cxx/cxx_mpool.cpp create mode 100644 lang/cxx/cxx_multi.cpp create mode 100644 lang/cxx/cxx_rid.cpp create mode 100644 lang/cxx/cxx_seq.cpp create mode 100644 lang/cxx/cxx_site.cpp create mode 100644 lang/cxx/cxx_txn.cpp create mode 100644 lang/cxx/stl/dbstl_base_iterator.h create mode 100644 lang/cxx/stl/dbstl_common.in create mode 100644 lang/cxx/stl/dbstl_container.cpp create mode 100644 lang/cxx/stl/dbstl_container.h create mode 100644 lang/cxx/stl/dbstl_dbc.h create mode 100644 lang/cxx/stl/dbstl_dbt.h create mode 100644 lang/cxx/stl/dbstl_element_ref.h create mode 100644 lang/cxx/stl/dbstl_exception.h create mode 100644 lang/cxx/stl/dbstl_inner_utility.h create mode 100644 lang/cxx/stl/dbstl_map.h create mode 100644 lang/cxx/stl/dbstl_resource_manager.cpp create mode 100644 lang/cxx/stl/dbstl_resource_manager.h create mode 100644 lang/cxx/stl/dbstl_set.h create mode 100644 lang/cxx/stl/dbstl_utility.h create mode 100644 lang/cxx/stl/dbstl_vector.h create mode 100644 lang/tcl/docs/db.html create mode 100644 lang/tcl/docs/env.html create mode 100644 lang/tcl/docs/historic.html create mode 100644 lang/tcl/docs/index.html create mode 100644 lang/tcl/docs/library.html create mode 100644 lang/tcl/docs/lock.html create mode 100644 lang/tcl/docs/log.html create mode 100644 lang/tcl/docs/mpool.html create mode 100644 lang/tcl/docs/rep.html create mode 100644 lang/tcl/docs/sequence.html create mode 100644 lang/tcl/docs/test.html create mode 100644 lang/tcl/docs/txn.html create mode 100644 lang/tcl/tcl_compat.c create mode 100644 lang/tcl/tcl_db.c create mode 100644 lang/tcl/tcl_db_pkg.c create mode 100644 lang/tcl/tcl_dbcursor.c create mode 100644 lang/tcl/tcl_env.c create mode 100644 lang/tcl/tcl_internal.c create mode 100644 lang/tcl/tcl_lock.c create mode 100644 lang/tcl/tcl_log.c create mode 100644 lang/tcl/tcl_mp.c create mode 100644 lang/tcl/tcl_mutex.c create mode 100644 lang/tcl/tcl_rep.c create mode 100644 lang/tcl/tcl_seq.c create mode 100644 lang/tcl/tcl_txn.c create mode 100644 lang/tcl/tcl_util.c create mode 100644 src/btree/bt_compact.c create mode 100644 src/btree/bt_compare.c create mode 100644 src/btree/bt_compress.c create mode 100644 src/btree/bt_conv.c create mode 100644 src/btree/bt_curadj.c create mode 100644 src/btree/bt_cursor.c create mode 100644 src/btree/bt_delete.c create mode 100644 src/btree/bt_method.c create mode 100644 src/btree/bt_open.c create mode 100644 src/btree/bt_put.c create mode 100644 src/btree/bt_rec.c create mode 100644 src/btree/bt_reclaim.c create mode 100644 src/btree/bt_recno.c create mode 100644 src/btree/bt_rsearch.c create mode 100644 src/btree/bt_search.c create mode 100644 src/btree/bt_split.c create mode 100644 src/btree/bt_stat.c create mode 100644 src/btree/bt_upgrade.c create mode 100644 src/btree/bt_verify.c create mode 100644 src/btree/btree.src create mode 100644 src/btree/btree_auto.c create mode 100644 src/btree/btree_autop.c create mode 100644 src/clib/atoi.c create mode 100644 src/clib/atol.c create mode 100644 src/clib/bsearch.c create mode 100644 src/clib/getcwd.c create mode 100644 src/clib/getopt.c create mode 100644 src/clib/isalpha.c create mode 100644 src/clib/isprint.c create mode 100644 src/clib/isspace.c create mode 100644 src/clib/memcmp.c create mode 100644 src/clib/memmove.c create mode 100644 src/clib/printf.c create mode 100644 src/clib/qsort.c create mode 100644 src/clib/raise.c create mode 100644 src/clib/rand.c create mode 100644 src/clib/snprintf.c create mode 100644 src/clib/strcasecmp.c create mode 100644 src/clib/strcat.c create mode 100644 src/clib/strchr.c create mode 100644 src/clib/strdup.c create mode 100644 src/clib/strerror.c create mode 100644 src/clib/strncat.c create mode 100644 src/clib/strncmp.c create mode 100644 src/clib/strrchr.c create mode 100644 src/clib/strsep.c create mode 100644 src/clib/strtol.c create mode 100644 src/clib/strtoul.c create mode 100644 src/clib/time.c create mode 100644 src/common/clock.c create mode 100644 src/common/crypto_stub.c create mode 100644 src/common/db_byteorder.c create mode 100644 src/common/db_compint.c create mode 100644 src/common/db_err.c create mode 100644 src/common/db_getlong.c create mode 100644 src/common/db_idspace.c create mode 100644 src/common/db_log2.c create mode 100644 src/common/db_shash.c create mode 100644 src/common/dbt.c create mode 100644 src/common/mkpath.c create mode 100644 src/common/openflags.c create mode 100644 src/common/os_method.c create mode 100644 src/common/util_arg.c create mode 100644 src/common/util_cache.c create mode 100644 src/common/util_log.c create mode 100644 src/common/util_sig.c create mode 100644 src/common/zerofill.c create mode 100644 src/crypto/aes_method.c create mode 100644 src/crypto/crypto.c create mode 100644 src/crypto/crypto.html create mode 100644 src/crypto/mersenne/mt19937db.c create mode 100644 src/crypto/rijndael/rijndael-alg-fst.c create mode 100644 src/crypto/rijndael/rijndael-alg-fst.h create mode 100644 src/crypto/rijndael/rijndael-api-fst.c create mode 100644 src/crypto/rijndael/rijndael-api-fst.h create mode 100644 src/db/crdel.src create mode 100644 src/db/crdel_auto.c create mode 100644 src/db/crdel_autop.c create mode 100644 src/db/crdel_rec.c create mode 100644 src/db/db.c create mode 100644 src/db/db.src create mode 100644 src/db/db_am.c create mode 100644 src/db/db_auto.c create mode 100644 src/db/db_autop.c create mode 100644 src/db/db_cam.c create mode 100644 src/db/db_cds.c create mode 100644 src/db/db_compact.c create mode 100644 src/db/db_conv.c create mode 100644 src/db/db_copy.c create mode 100644 src/db/db_dispatch.c create mode 100644 src/db/db_dup.c create mode 100644 src/db/db_iface.c create mode 100644 src/db/db_join.c create mode 100644 src/db/db_meta.c create mode 100644 src/db/db_method.c create mode 100644 src/db/db_open.c create mode 100644 src/db/db_overflow.c create mode 100644 src/db/db_ovfl_vrfy.c create mode 100644 src/db/db_pr.c create mode 100644 src/db/db_rec.c create mode 100644 src/db/db_reclaim.c create mode 100644 src/db/db_remove.c create mode 100644 src/db/db_rename.c create mode 100644 src/db/db_ret.c create mode 100644 src/db/db_setid.c create mode 100644 src/db/db_setlsn.c create mode 100644 src/db/db_sort_multiple.c create mode 100644 src/db/db_stati.c create mode 100644 src/db/db_truncate.c create mode 100644 src/db/db_upg.c create mode 100644 src/db/db_upg_opd.c create mode 100644 src/db/db_vrfy.c create mode 100644 src/db/db_vrfy_stub.c create mode 100644 src/db/db_vrfyutil.c create mode 100644 src/db/partition.c create mode 100644 src/dbinc/atomic.h create mode 100644 src/dbinc/btree.h create mode 100644 src/dbinc/clock.h create mode 100644 src/dbinc/crypto.h create mode 100644 src/dbinc/cxx_int.h create mode 100644 src/dbinc/db.in create mode 100644 src/dbinc/db_185.in create mode 100644 src/dbinc/db_am.h create mode 100644 src/dbinc/db_cxx.in create mode 100644 src/dbinc/db_dispatch.h create mode 100644 src/dbinc/db_int.in create mode 100644 src/dbinc/db_join.h create mode 100644 src/dbinc/db_page.h create mode 100644 src/dbinc/db_swap.h create mode 100644 src/dbinc/db_upgrade.h create mode 100644 src/dbinc/db_verify.h create mode 100644 src/dbinc/debug.h create mode 100644 src/dbinc/fop.h create mode 100644 src/dbinc/globals.h create mode 100644 src/dbinc/hash.h create mode 100644 src/dbinc/heap.h create mode 100644 src/dbinc/hmac.h create mode 100644 src/dbinc/lock.h create mode 100644 src/dbinc/log.h create mode 100644 src/dbinc/log_verify.h create mode 100644 src/dbinc/mp.h create mode 100644 src/dbinc/mutex.h create mode 100644 src/dbinc/mutex_int.h create mode 100644 src/dbinc/os.h create mode 100644 src/dbinc/partition.h create mode 100644 src/dbinc/perfmon.h create mode 100644 src/dbinc/qam.h create mode 100644 src/dbinc/queue.h create mode 100644 src/dbinc/region.h create mode 100644 src/dbinc/rep.h create mode 100644 src/dbinc/repmgr.h create mode 100644 src/dbinc/shqueue.h create mode 100644 src/dbinc/tcl_db.h create mode 100644 src/dbinc/txn.h create mode 100644 src/dbinc/win_db.h create mode 100644 src/dbinc/xa.h create mode 100644 src/dbinc_auto/api_flags.in create mode 100644 src/dbinc_auto/btree_auto.h create mode 100644 src/dbinc_auto/btree_ext.h create mode 100644 src/dbinc_auto/clib_ext.h create mode 100644 src/dbinc_auto/common_ext.h create mode 100644 src/dbinc_auto/crdel_auto.h create mode 100644 src/dbinc_auto/crypto_ext.h create mode 100644 src/dbinc_auto/db_auto.h create mode 100644 src/dbinc_auto/db_ext.h create mode 100644 src/dbinc_auto/dbreg_auto.h create mode 100644 src/dbinc_auto/dbreg_ext.h create mode 100644 src/dbinc_auto/env_ext.h create mode 100644 src/dbinc_auto/ext_185_def.in create mode 100644 src/dbinc_auto/ext_185_prot.in create mode 100644 src/dbinc_auto/ext_def.in create mode 100644 src/dbinc_auto/ext_prot.in create mode 100644 src/dbinc_auto/fileops_auto.h create mode 100644 src/dbinc_auto/fileops_ext.h create mode 100644 src/dbinc_auto/hash_auto.h create mode 100644 src/dbinc_auto/hash_ext.h create mode 100644 src/dbinc_auto/heap_auto.h create mode 100644 src/dbinc_auto/heap_ext.h create mode 100644 src/dbinc_auto/hmac_ext.h create mode 100644 src/dbinc_auto/int_def.in create mode 100644 src/dbinc_auto/lock_ext.h create mode 100644 src/dbinc_auto/log_ext.h create mode 100644 src/dbinc_auto/mp_ext.h create mode 100644 src/dbinc_auto/mutex_ext.h create mode 100644 src/dbinc_auto/os_ext.h create mode 100644 src/dbinc_auto/qam_auto.h create mode 100644 src/dbinc_auto/qam_ext.h create mode 100644 src/dbinc_auto/rep_automsg.h create mode 100644 src/dbinc_auto/rep_ext.h create mode 100644 src/dbinc_auto/repmgr_auto.h create mode 100644 src/dbinc_auto/repmgr_automsg.h create mode 100644 src/dbinc_auto/repmgr_ext.h create mode 100644 src/dbinc_auto/sequence_ext.h create mode 100644 src/dbinc_auto/tcl_ext.h create mode 100644 src/dbinc_auto/txn_auto.h create mode 100644 src/dbinc_auto/txn_ext.h create mode 100644 src/dbinc_auto/xa_ext.h create mode 100644 src/dbreg/dbreg.c create mode 100644 src/dbreg/dbreg.src create mode 100644 src/dbreg/dbreg_auto.c create mode 100644 src/dbreg/dbreg_autop.c create mode 100644 src/dbreg/dbreg_rec.c create mode 100644 src/dbreg/dbreg_stat.c create mode 100644 src/dbreg/dbreg_util.c create mode 100644 src/env/env_alloc.c create mode 100644 src/env/env_config.c create mode 100644 src/env/env_failchk.c create mode 100644 src/env/env_file.c create mode 100644 src/env/env_globals.c create mode 100644 src/env/env_method.c create mode 100644 src/env/env_name.c create mode 100644 src/env/env_open.c create mode 100644 src/env/env_recover.c create mode 100644 src/env/env_region.c create mode 100644 src/env/env_register.c create mode 100644 src/env/env_sig.c create mode 100644 src/env/env_stat.c create mode 100644 src/fileops/fileops.src create mode 100644 src/fileops/fileops_auto.c create mode 100644 src/fileops/fileops_autop.c create mode 100644 src/fileops/fop_basic.c create mode 100644 src/fileops/fop_rec.c create mode 100644 src/fileops/fop_util.c create mode 100644 src/hash/hash.c create mode 100644 src/hash/hash.src create mode 100644 src/hash/hash_auto.c create mode 100644 src/hash/hash_autop.c create mode 100644 src/hash/hash_compact.c create mode 100644 src/hash/hash_conv.c create mode 100644 src/hash/hash_dup.c create mode 100644 src/hash/hash_func.c create mode 100644 src/hash/hash_meta.c create mode 100644 src/hash/hash_method.c create mode 100644 src/hash/hash_open.c create mode 100644 src/hash/hash_page.c create mode 100644 src/hash/hash_rec.c create mode 100644 src/hash/hash_reclaim.c create mode 100644 src/hash/hash_stat.c create mode 100644 src/hash/hash_stub.c create mode 100644 src/hash/hash_upgrade.c create mode 100644 src/hash/hash_verify.c create mode 100644 src/heap/heap.c create mode 100644 src/heap/heap.src create mode 100644 src/heap/heap_auto.c create mode 100644 src/heap/heap_autop.c create mode 100644 src/heap/heap_conv.c create mode 100644 src/heap/heap_method.c create mode 100644 src/heap/heap_open.c create mode 100644 src/heap/heap_rec.c create mode 100644 src/heap/heap_reclaim.c create mode 100644 src/heap/heap_stat.c create mode 100644 src/heap/heap_stub.c create mode 100644 src/heap/heap_verify.c create mode 100644 src/hmac/hmac.c create mode 100644 src/hmac/sha1.c create mode 100644 src/lock/Design create mode 100644 src/lock/lock.c create mode 100644 src/lock/lock_alloc.incl create mode 100644 src/lock/lock_deadlock.c create mode 100644 src/lock/lock_failchk.c create mode 100644 src/lock/lock_id.c create mode 100644 src/lock/lock_list.c create mode 100644 src/lock/lock_method.c create mode 100644 src/lock/lock_region.c create mode 100644 src/lock/lock_stat.c create mode 100644 src/lock/lock_stub.c create mode 100644 src/lock/lock_timer.c create mode 100644 src/lock/lock_util.c create mode 100644 src/log/log.c create mode 100644 src/log/log_archive.c create mode 100644 src/log/log_compare.c create mode 100644 src/log/log_debug.c create mode 100644 src/log/log_get.c create mode 100644 src/log/log_method.c create mode 100644 src/log/log_print.c create mode 100644 src/log/log_put.c create mode 100644 src/log/log_stat.c create mode 100644 src/log/log_verify.c create mode 100644 src/log/log_verify_auto.c create mode 100644 src/log/log_verify_int.c create mode 100644 src/log/log_verify_stub.c create mode 100644 src/log/log_verify_util.c create mode 100644 src/mp/mp_alloc.c create mode 100644 src/mp/mp_bh.c create mode 100644 src/mp/mp_fget.c create mode 100644 src/mp/mp_fmethod.c create mode 100644 src/mp/mp_fopen.c create mode 100644 src/mp/mp_fput.c create mode 100644 src/mp/mp_fset.c create mode 100644 src/mp/mp_method.c create mode 100644 src/mp/mp_mvcc.c create mode 100644 src/mp/mp_region.c create mode 100644 src/mp/mp_register.c create mode 100644 src/mp/mp_resize.c create mode 100644 src/mp/mp_stat.c create mode 100644 src/mp/mp_sync.c create mode 100644 src/mp/mp_trickle.c create mode 100644 src/mutex/README create mode 100644 src/mutex/mut_alloc.c create mode 100644 src/mutex/mut_failchk.c create mode 100644 src/mutex/mut_fcntl.c create mode 100644 src/mutex/mut_method.c create mode 100644 src/mutex/mut_pthread.c create mode 100644 src/mutex/mut_region.c create mode 100644 src/mutex/mut_stat.c create mode 100644 src/mutex/mut_stub.c create mode 100644 src/mutex/mut_tas.c create mode 100644 src/mutex/mut_win32.c create mode 100644 src/mutex/test_mutex.c create mode 100644 src/mutex/uts4_cc.s create mode 100644 src/os/os_abort.c create mode 100644 src/os/os_abs.c create mode 100644 src/os/os_addrinfo.c create mode 100644 src/os/os_alloc.c create mode 100644 src/os/os_clock.c create mode 100644 src/os/os_config.c create mode 100644 src/os/os_cpu.c create mode 100644 src/os/os_ctime.c create mode 100644 src/os/os_dir.c create mode 100644 src/os/os_errno.c create mode 100644 src/os/os_fid.c create mode 100644 src/os/os_flock.c create mode 100644 src/os/os_fsync.c create mode 100644 src/os/os_getenv.c create mode 100644 src/os/os_handle.c create mode 100644 src/os/os_map.c create mode 100644 src/os/os_mkdir.c create mode 100644 src/os/os_open.c create mode 100644 src/os/os_pid.c create mode 100644 src/os/os_rename.c create mode 100644 src/os/os_root.c create mode 100644 src/os/os_rpath.c create mode 100644 src/os/os_rw.c create mode 100644 src/os/os_seek.c create mode 100644 src/os/os_stack.c create mode 100644 src/os/os_stat.c create mode 100644 src/os/os_tmpdir.c create mode 100644 src/os/os_truncate.c create mode 100644 src/os/os_uid.c create mode 100644 src/os/os_unlink.c create mode 100644 src/os/os_yield.c create mode 100644 src/qam/qam.c create mode 100644 src/qam/qam.src create mode 100644 src/qam/qam_auto.c create mode 100644 src/qam/qam_autop.c create mode 100644 src/qam/qam_conv.c create mode 100644 src/qam/qam_files.c create mode 100644 src/qam/qam_method.c create mode 100644 src/qam/qam_open.c create mode 100644 src/qam/qam_rec.c create mode 100644 src/qam/qam_stat.c create mode 100644 src/qam/qam_stub.c create mode 100644 src/qam/qam_upgrade.c create mode 100644 src/qam/qam_verify.c create mode 100644 src/rep/mlease.html create mode 100644 src/rep/rep.msg create mode 100644 src/rep/rep_automsg.c create mode 100644 src/rep/rep_backup.c create mode 100644 src/rep/rep_elect.c create mode 100644 src/rep/rep_lease.c create mode 100644 src/rep/rep_log.c create mode 100644 src/rep/rep_method.c create mode 100644 src/rep/rep_record.c create mode 100644 src/rep/rep_region.c create mode 100644 src/rep/rep_stat.c create mode 100644 src/rep/rep_stub.c create mode 100644 src/rep/rep_util.c create mode 100644 src/rep/rep_verify.c create mode 100644 src/repmgr/repmgr.msg create mode 100644 src/repmgr/repmgr.src create mode 100644 src/repmgr/repmgr_auto.c create mode 100644 src/repmgr/repmgr_automsg.c create mode 100644 src/repmgr/repmgr_autop.c create mode 100644 src/repmgr/repmgr_elect.c create mode 100644 src/repmgr/repmgr_method.c create mode 100644 src/repmgr/repmgr_msg.c create mode 100644 src/repmgr/repmgr_net.c create mode 100644 src/repmgr/repmgr_posix.c create mode 100644 src/repmgr/repmgr_queue.c create mode 100644 src/repmgr/repmgr_rec.c create mode 100644 src/repmgr/repmgr_sel.c create mode 100644 src/repmgr/repmgr_stat.c create mode 100644 src/repmgr/repmgr_stub.c create mode 100644 src/repmgr/repmgr_util.c create mode 100644 src/repmgr/repmgr_windows.c create mode 100644 src/sequence/seq_stat.c create mode 100644 src/sequence/sequence.c create mode 100644 src/txn/txn.c create mode 100644 src/txn/txn.src create mode 100644 src/txn/txn_auto.c create mode 100644 src/txn/txn_autop.c create mode 100644 src/txn/txn_chkpt.c create mode 100644 src/txn/txn_failchk.c create mode 100644 src/txn/txn_method.c create mode 100644 src/txn/txn_rec.c create mode 100644 src/txn/txn_recover.c create mode 100644 src/txn/txn_region.c create mode 100644 src/txn/txn_stat.c create mode 100644 src/txn/txn_util.c create mode 100644 src/xa/xa.c create mode 100644 src/xa/xa_map.c create mode 100644 test/c/README create mode 100644 test/c/chk.ctests create mode 100644 test/c/common/test_util.h create mode 100644 test/c/cutest/CuTest.c create mode 100644 test/c/cutest/CuTest.h create mode 100644 test/c/cutest/CuTests.c create mode 100644 test/c/cutest/Runner.c create mode 100644 test/c/cutest/gen_tester.sh create mode 100644 test/c/cutest/license.txt create mode 100644 test/c/suites/TestChannel.c create mode 100644 test/c/suites/TestDbTuner.c create mode 100644 test/c/suites/TestEncryption.c create mode 100644 test/c/suites/TestEnvConfig.c create mode 100644 test/c/suites/TestEnvMethod.c create mode 100644 test/c/suites/TestKeyExistErrorReturn.c create mode 100644 test/c/suites/TestPartial.c create mode 100644 test/c/suites/TestQueue.c create mode 100644 test/c/test_api_methods.c create mode 100644 test/c/test_log_verify.c create mode 100644 test/cxx/README create mode 100644 test/cxx/TestConstruct01.cpp create mode 100644 test/cxx/TestConstruct01.testerr create mode 100644 test/cxx/TestConstruct01.testout create mode 100644 test/cxx/TestGetSetMethods.cpp create mode 100644 test/cxx/TestKeyRange.cpp create mode 100644 test/cxx/TestKeyRange.testin create mode 100644 test/cxx/TestKeyRange.testout create mode 100644 test/cxx/TestLogc.cpp create mode 100644 test/cxx/TestLogc.testout create mode 100644 test/cxx/TestMulti.cpp create mode 100644 test/cxx/TestSimpleAccess.cpp create mode 100644 test/cxx/TestSimpleAccess.testout create mode 100644 test/cxx/TestTruncate.cpp create mode 100644 test/cxx/TestTruncate.testout create mode 100644 test/cxx/chk.cxxtests create mode 100644 test/cxx/ignore create mode 100644 test/cxx/testall create mode 100644 test/cxx/testone create mode 100644 test/micro/README create mode 100644 test/micro/configs/run.small create mode 100644 test/micro/configs/run.std create mode 100644 test/micro/configs/run.workload create mode 100644 test/micro/report create mode 100644 test/micro/report.awk create mode 100644 test/micro/source/LIST create mode 100644 test/micro/source/b_curalloc.c create mode 100644 test/micro/source/b_curwalk.c create mode 100644 test/micro/source/b_del.c create mode 100644 test/micro/source/b_get.c create mode 100644 test/micro/source/b_inmem.c create mode 100644 test/micro/source/b_latch.c create mode 100644 test/micro/source/b_load.c create mode 100644 test/micro/source/b_open.c create mode 100644 test/micro/source/b_put.c create mode 100644 test/micro/source/b_recover.c create mode 100644 test/micro/source/b_txn.c create mode 100644 test/micro/source/b_txn_write.c create mode 100644 test/micro/source/b_uname.c create mode 100644 test/micro/source/b_util.c create mode 100644 test/micro/source/b_workload.c create mode 100644 test/micro/source/b_workload.h create mode 100644 test/micro/source/bench.h create mode 100644 test/micro/source/test_micro.c create mode 100644 test/micro/test_micro create mode 100644 test/tcl/README create mode 100644 test/tcl/TESTS create mode 100644 test/tcl/archive.tcl create mode 100644 test/tcl/backup.tcl create mode 100644 test/tcl/bigfile001.tcl create mode 100644 test/tcl/bigfile002.tcl create mode 100644 test/tcl/byteorder.tcl create mode 100644 test/tcl/conscript.tcl create mode 100644 test/tcl/db_reptest.tcl create mode 100644 test/tcl/dbm.tcl create mode 100644 test/tcl/dbscript.tcl create mode 100644 test/tcl/ddoyscript.tcl create mode 100644 test/tcl/ddscript.tcl create mode 100644 test/tcl/dead001.tcl create mode 100644 test/tcl/dead002.tcl create mode 100644 test/tcl/dead003.tcl create mode 100644 test/tcl/dead004.tcl create mode 100644 test/tcl/dead005.tcl create mode 100644 test/tcl/dead006.tcl create mode 100644 test/tcl/dead007.tcl create mode 100644 test/tcl/dead008.tcl create mode 100644 test/tcl/dead009.tcl create mode 100644 test/tcl/dead010.tcl create mode 100644 test/tcl/dead011.tcl create mode 100644 test/tcl/env001.tcl create mode 100644 test/tcl/env002.tcl create mode 100644 test/tcl/env003.tcl create mode 100644 test/tcl/env004.tcl create mode 100644 test/tcl/env005.tcl create mode 100644 test/tcl/env006.tcl create mode 100644 test/tcl/env007.tcl create mode 100644 test/tcl/env007script.tcl create mode 100644 test/tcl/env008.tcl create mode 100644 test/tcl/env009.tcl create mode 100644 test/tcl/env010.tcl create mode 100644 test/tcl/env011.tcl create mode 100644 test/tcl/env012.tcl create mode 100644 test/tcl/env013.tcl create mode 100644 test/tcl/env014.tcl create mode 100644 test/tcl/env015.tcl create mode 100644 test/tcl/env016.tcl create mode 100644 test/tcl/env017.tcl create mode 100644 test/tcl/env018.tcl create mode 100644 test/tcl/env019.tcl create mode 100644 test/tcl/env019script.tcl create mode 100644 test/tcl/env020.tcl create mode 100644 test/tcl/env021.tcl create mode 100644 test/tcl/envscript.tcl create mode 100644 test/tcl/fop001.tcl create mode 100644 test/tcl/fop002.tcl create mode 100644 test/tcl/fop003.tcl create mode 100644 test/tcl/fop004.tcl create mode 100644 test/tcl/fop005.tcl create mode 100644 test/tcl/fop006.tcl create mode 100644 test/tcl/fop007.tcl create mode 100644 test/tcl/fop008.tcl create mode 100644 test/tcl/fop009.tcl create mode 100644 test/tcl/fop010.tcl create mode 100644 test/tcl/fop011.tcl create mode 100644 test/tcl/fop012.tcl create mode 100644 test/tcl/fopscript.tcl create mode 100644 test/tcl/foputils.tcl create mode 100644 test/tcl/hsearch.tcl create mode 100644 test/tcl/include.tcl create mode 100644 test/tcl/join.tcl create mode 100644 test/tcl/lock001.tcl create mode 100644 test/tcl/lock002.tcl create mode 100644 test/tcl/lock003.tcl create mode 100644 test/tcl/lock004.tcl create mode 100644 test/tcl/lock005.tcl create mode 100644 test/tcl/lock006.tcl create mode 100644 test/tcl/lockscript.tcl create mode 100644 test/tcl/log001.tcl create mode 100644 test/tcl/log002.tcl create mode 100644 test/tcl/log003.tcl create mode 100644 test/tcl/log004.tcl create mode 100644 test/tcl/log005.tcl create mode 100644 test/tcl/log006.tcl create mode 100644 test/tcl/log007.tcl create mode 100644 test/tcl/log008.tcl create mode 100644 test/tcl/log008script.tcl create mode 100644 test/tcl/log009.tcl create mode 100644 test/tcl/logtrack.list create mode 100644 test/tcl/logtrack.tcl create mode 100644 test/tcl/mdbscript.tcl create mode 100644 test/tcl/memp001.tcl create mode 100644 test/tcl/memp002.tcl create mode 100644 test/tcl/memp003.tcl create mode 100644 test/tcl/memp004.tcl create mode 100644 test/tcl/memp005.tcl create mode 100644 test/tcl/mpoolscript.tcl create mode 100644 test/tcl/mut001.tcl create mode 100644 test/tcl/mut002.tcl create mode 100644 test/tcl/mut002script.tcl create mode 100644 test/tcl/mut003.tcl create mode 100644 test/tcl/ndbm.tcl create mode 100644 test/tcl/parallel.tcl create mode 100644 test/tcl/plat001.tcl create mode 100644 test/tcl/portable.tcl create mode 100644 test/tcl/recd001.tcl create mode 100644 test/tcl/recd002.tcl create mode 100644 test/tcl/recd003.tcl create mode 100644 test/tcl/recd004.tcl create mode 100644 test/tcl/recd005.tcl create mode 100644 test/tcl/recd006.tcl create mode 100644 test/tcl/recd007.tcl create mode 100644 test/tcl/recd008.tcl create mode 100644 test/tcl/recd009.tcl create mode 100644 test/tcl/recd010.tcl create mode 100644 test/tcl/recd011.tcl create mode 100644 test/tcl/recd012.tcl create mode 100644 test/tcl/recd013.tcl create mode 100644 test/tcl/recd014.tcl create mode 100644 test/tcl/recd015.tcl create mode 100644 test/tcl/recd016.tcl create mode 100644 test/tcl/recd017.tcl create mode 100644 test/tcl/recd018.tcl create mode 100644 test/tcl/recd019.tcl create mode 100644 test/tcl/recd020.tcl create mode 100644 test/tcl/recd021.tcl create mode 100644 test/tcl/recd022.tcl create mode 100644 test/tcl/recd023.tcl create mode 100644 test/tcl/recd024.tcl create mode 100644 test/tcl/recd025.tcl create mode 100644 test/tcl/recd15scr.tcl create mode 100644 test/tcl/recdscript.tcl create mode 100644 test/tcl/rep001.tcl create mode 100644 test/tcl/rep002.tcl create mode 100644 test/tcl/rep003.tcl create mode 100644 test/tcl/rep005.tcl create mode 100644 test/tcl/rep006.tcl create mode 100644 test/tcl/rep007.tcl create mode 100644 test/tcl/rep008.tcl create mode 100644 test/tcl/rep009.tcl create mode 100644 test/tcl/rep010.tcl create mode 100644 test/tcl/rep011.tcl create mode 100644 test/tcl/rep012.tcl create mode 100644 test/tcl/rep013.tcl create mode 100644 test/tcl/rep014.tcl create mode 100644 test/tcl/rep015.tcl create mode 100644 test/tcl/rep016.tcl create mode 100644 test/tcl/rep017.tcl create mode 100644 test/tcl/rep017script.tcl create mode 100644 test/tcl/rep018.tcl create mode 100644 test/tcl/rep018script.tcl create mode 100644 test/tcl/rep019.tcl create mode 100644 test/tcl/rep020.tcl create mode 100644 test/tcl/rep021.tcl create mode 100644 test/tcl/rep022.tcl create mode 100644 test/tcl/rep023.tcl create mode 100644 test/tcl/rep024.tcl create mode 100644 test/tcl/rep025.tcl create mode 100644 test/tcl/rep026.tcl create mode 100644 test/tcl/rep027.tcl create mode 100644 test/tcl/rep028.tcl create mode 100644 test/tcl/rep029.tcl create mode 100644 test/tcl/rep030.tcl create mode 100644 test/tcl/rep031.tcl create mode 100644 test/tcl/rep032.tcl create mode 100644 test/tcl/rep033.tcl create mode 100644 test/tcl/rep034.tcl create mode 100644 test/tcl/rep035.tcl create mode 100644 test/tcl/rep035script.tcl create mode 100644 test/tcl/rep036.tcl create mode 100644 test/tcl/rep036script.tcl create mode 100644 test/tcl/rep037.tcl create mode 100644 test/tcl/rep038.tcl create mode 100644 test/tcl/rep039.tcl create mode 100644 test/tcl/rep040.tcl create mode 100644 test/tcl/rep040script.tcl create mode 100644 test/tcl/rep041.tcl create mode 100644 test/tcl/rep042.tcl create mode 100644 test/tcl/rep042script.tcl create mode 100644 test/tcl/rep043.tcl create mode 100644 test/tcl/rep043script.tcl create mode 100644 test/tcl/rep044.tcl create mode 100644 test/tcl/rep045.tcl create mode 100644 test/tcl/rep045script.tcl create mode 100644 test/tcl/rep046.tcl create mode 100644 test/tcl/rep047.tcl create mode 100644 test/tcl/rep048.tcl create mode 100644 test/tcl/rep048script.tcl create mode 100644 test/tcl/rep049.tcl create mode 100644 test/tcl/rep050.tcl create mode 100644 test/tcl/rep051.tcl create mode 100644 test/tcl/rep052.tcl create mode 100644 test/tcl/rep053.tcl create mode 100644 test/tcl/rep054.tcl create mode 100644 test/tcl/rep055.tcl create mode 100644 test/tcl/rep058.tcl create mode 100644 test/tcl/rep060.tcl create mode 100644 test/tcl/rep061.tcl create mode 100644 test/tcl/rep062.tcl create mode 100644 test/tcl/rep063.tcl create mode 100644 test/tcl/rep064.tcl create mode 100644 test/tcl/rep065.tcl create mode 100644 test/tcl/rep065script.tcl create mode 100644 test/tcl/rep066.tcl create mode 100644 test/tcl/rep067.tcl create mode 100644 test/tcl/rep068.tcl create mode 100644 test/tcl/rep069.tcl create mode 100644 test/tcl/rep070.tcl create mode 100644 test/tcl/rep071.tcl create mode 100644 test/tcl/rep072.tcl create mode 100644 test/tcl/rep073.tcl create mode 100644 test/tcl/rep074.tcl create mode 100644 test/tcl/rep075.tcl create mode 100644 test/tcl/rep076.tcl create mode 100644 test/tcl/rep077.tcl create mode 100644 test/tcl/rep078.tcl create mode 100644 test/tcl/rep078script.tcl create mode 100644 test/tcl/rep079.tcl create mode 100644 test/tcl/rep080.tcl create mode 100644 test/tcl/rep081.tcl create mode 100644 test/tcl/rep082.tcl create mode 100644 test/tcl/rep083.tcl create mode 100644 test/tcl/rep084.tcl create mode 100644 test/tcl/rep085.tcl create mode 100644 test/tcl/rep086.tcl create mode 100644 test/tcl/rep087.tcl create mode 100644 test/tcl/rep088.tcl create mode 100644 test/tcl/rep089.tcl create mode 100644 test/tcl/rep090.tcl create mode 100644 test/tcl/rep091.tcl create mode 100644 test/tcl/rep092.tcl create mode 100644 test/tcl/rep092script.tcl create mode 100644 test/tcl/rep093.tcl create mode 100644 test/tcl/rep094.tcl create mode 100644 test/tcl/rep095.tcl create mode 100644 test/tcl/rep095script.tcl create mode 100644 test/tcl/rep096.tcl create mode 100644 test/tcl/rep097.tcl create mode 100644 test/tcl/rep097script.tcl create mode 100644 test/tcl/rep098.tcl create mode 100644 test/tcl/repmgr001.tcl create mode 100644 test/tcl/repmgr002.tcl create mode 100644 test/tcl/repmgr003.tcl create mode 100644 test/tcl/repmgr007.tcl create mode 100644 test/tcl/repmgr009.tcl create mode 100644 test/tcl/repmgr010.tcl create mode 100644 test/tcl/repmgr011.tcl create mode 100644 test/tcl/repmgr012.tcl create mode 100644 test/tcl/repmgr013.tcl create mode 100644 test/tcl/repmgr017.tcl create mode 100644 test/tcl/repmgr018.tcl create mode 100644 test/tcl/repmgr023.tcl create mode 100644 test/tcl/repmgr024.tcl create mode 100644 test/tcl/repmgr025.tcl create mode 100644 test/tcl/repmgr026.tcl create mode 100644 test/tcl/repmgr027.tcl create mode 100644 test/tcl/repmgr028.tcl create mode 100644 test/tcl/repmgr028script.tcl create mode 100644 test/tcl/repmgr029.tcl create mode 100644 test/tcl/repmgr029script.tcl create mode 100644 test/tcl/repmgr029script2.tcl create mode 100644 test/tcl/repmgr030.tcl create mode 100644 test/tcl/repmgr031.tcl create mode 100644 test/tcl/repmgr032.tcl create mode 100644 test/tcl/repmgr100.tcl create mode 100644 test/tcl/repmgr101.tcl create mode 100644 test/tcl/repmgr102.tcl create mode 100644 test/tcl/repmgr105.tcl create mode 100644 test/tcl/repmgr106.tcl create mode 100644 test/tcl/repmgr107.tcl create mode 100644 test/tcl/repmgr108.tcl create mode 100644 test/tcl/repmgr109.tcl create mode 100644 test/tcl/repmgr110.tcl create mode 100644 test/tcl/repmgr111.tcl create mode 100644 test/tcl/repmgr112.tcl create mode 100644 test/tcl/reputils.tcl create mode 100644 test/tcl/reputilsnoenv.tcl create mode 100644 test/tcl/rsrc001.tcl create mode 100644 test/tcl/rsrc002.tcl create mode 100644 test/tcl/rsrc003.tcl create mode 100644 test/tcl/rsrc004.tcl create mode 100644 test/tcl/sdb001.tcl create mode 100644 test/tcl/sdb002.tcl create mode 100644 test/tcl/sdb003.tcl create mode 100644 test/tcl/sdb004.tcl create mode 100644 test/tcl/sdb005.tcl create mode 100644 test/tcl/sdb006.tcl create mode 100644 test/tcl/sdb007.tcl create mode 100644 test/tcl/sdb008.tcl create mode 100644 test/tcl/sdb009.tcl create mode 100644 test/tcl/sdb010.tcl create mode 100644 test/tcl/sdb011.tcl create mode 100644 test/tcl/sdb012.tcl create mode 100644 test/tcl/sdb013.tcl create mode 100644 test/tcl/sdb014.tcl create mode 100644 test/tcl/sdb015.tcl create mode 100644 test/tcl/sdb016.tcl create mode 100644 test/tcl/sdb017.tcl create mode 100644 test/tcl/sdb018.tcl create mode 100644 test/tcl/sdb019.tcl create mode 100644 test/tcl/sdb020.tcl create mode 100644 test/tcl/sdbscript.tcl create mode 100644 test/tcl/sdbtest001.tcl create mode 100644 test/tcl/sdbtest002.tcl create mode 100644 test/tcl/sdbutils.tcl create mode 100644 test/tcl/sec001.tcl create mode 100644 test/tcl/sec002.tcl create mode 100644 test/tcl/shelltest.tcl create mode 100644 test/tcl/si001.tcl create mode 100644 test/tcl/si002.tcl create mode 100644 test/tcl/si003.tcl create mode 100644 test/tcl/si004.tcl create mode 100644 test/tcl/si005.tcl create mode 100644 test/tcl/si006.tcl create mode 100644 test/tcl/si007.tcl create mode 100644 test/tcl/si008.tcl create mode 100644 test/tcl/sijointest.tcl create mode 100644 test/tcl/siutils.tcl create mode 100644 test/tcl/sysscript.tcl create mode 100644 test/tcl/t106script.tcl create mode 100644 test/tcl/test.tcl create mode 100644 test/tcl/test001.tcl create mode 100644 test/tcl/test002.tcl create mode 100644 test/tcl/test003.tcl create mode 100644 test/tcl/test004.tcl create mode 100644 test/tcl/test005.tcl create mode 100644 test/tcl/test006.tcl create mode 100644 test/tcl/test007.tcl create mode 100644 test/tcl/test008.tcl create mode 100644 test/tcl/test009.tcl create mode 100644 test/tcl/test010.tcl create mode 100644 test/tcl/test011.tcl create mode 100644 test/tcl/test012.tcl create mode 100644 test/tcl/test013.tcl create mode 100644 test/tcl/test014.tcl create mode 100644 test/tcl/test015.tcl create mode 100644 test/tcl/test016.tcl create mode 100644 test/tcl/test017.tcl create mode 100644 test/tcl/test018.tcl create mode 100644 test/tcl/test019.tcl create mode 100644 test/tcl/test020.tcl create mode 100644 test/tcl/test021.tcl create mode 100644 test/tcl/test022.tcl create mode 100644 test/tcl/test023.tcl create mode 100644 test/tcl/test024.tcl create mode 100644 test/tcl/test025.tcl create mode 100644 test/tcl/test026.tcl create mode 100644 test/tcl/test027.tcl create mode 100644 test/tcl/test028.tcl create mode 100644 test/tcl/test029.tcl create mode 100644 test/tcl/test030.tcl create mode 100644 test/tcl/test031.tcl create mode 100644 test/tcl/test032.tcl create mode 100644 test/tcl/test033.tcl create mode 100644 test/tcl/test034.tcl create mode 100644 test/tcl/test035.tcl create mode 100644 test/tcl/test036.tcl create mode 100644 test/tcl/test037.tcl create mode 100644 test/tcl/test038.tcl create mode 100644 test/tcl/test039.tcl create mode 100644 test/tcl/test040.tcl create mode 100644 test/tcl/test041.tcl create mode 100644 test/tcl/test042.tcl create mode 100644 test/tcl/test043.tcl create mode 100644 test/tcl/test044.tcl create mode 100644 test/tcl/test045.tcl create mode 100644 test/tcl/test046.tcl create mode 100644 test/tcl/test047.tcl create mode 100644 test/tcl/test048.tcl create mode 100644 test/tcl/test049.tcl create mode 100644 test/tcl/test050.tcl create mode 100644 test/tcl/test051.tcl create mode 100644 test/tcl/test052.tcl create mode 100644 test/tcl/test053.tcl create mode 100644 test/tcl/test054.tcl create mode 100644 test/tcl/test055.tcl create mode 100644 test/tcl/test056.tcl create mode 100644 test/tcl/test057.tcl create mode 100644 test/tcl/test058.tcl create mode 100644 test/tcl/test059.tcl create mode 100644 test/tcl/test060.tcl create mode 100644 test/tcl/test061.tcl create mode 100644 test/tcl/test062.tcl create mode 100644 test/tcl/test063.tcl create mode 100644 test/tcl/test064.tcl create mode 100644 test/tcl/test065.tcl create mode 100644 test/tcl/test066.tcl create mode 100644 test/tcl/test067.tcl create mode 100644 test/tcl/test068.tcl create mode 100644 test/tcl/test069.tcl create mode 100644 test/tcl/test070.tcl create mode 100644 test/tcl/test071.tcl create mode 100644 test/tcl/test072.tcl create mode 100644 test/tcl/test073.tcl create mode 100644 test/tcl/test074.tcl create mode 100644 test/tcl/test076.tcl create mode 100644 test/tcl/test077.tcl create mode 100644 test/tcl/test078.tcl create mode 100644 test/tcl/test079.tcl create mode 100644 test/tcl/test081.tcl create mode 100644 test/tcl/test082.tcl create mode 100644 test/tcl/test083.tcl create mode 100644 test/tcl/test084.tcl create mode 100644 test/tcl/test085.tcl create mode 100644 test/tcl/test086.tcl create mode 100644 test/tcl/test087.tcl create mode 100644 test/tcl/test088.tcl create mode 100644 test/tcl/test089.tcl create mode 100644 test/tcl/test090.tcl create mode 100644 test/tcl/test091.tcl create mode 100644 test/tcl/test092.tcl create mode 100644 test/tcl/test093.tcl create mode 100644 test/tcl/test094.tcl create mode 100644 test/tcl/test095.tcl create mode 100644 test/tcl/test096.tcl create mode 100644 test/tcl/test097.tcl create mode 100644 test/tcl/test098.tcl create mode 100644 test/tcl/test099.tcl create mode 100644 test/tcl/test100.tcl create mode 100644 test/tcl/test101.tcl create mode 100644 test/tcl/test102.tcl create mode 100644 test/tcl/test103.tcl create mode 100644 test/tcl/test106.tcl create mode 100644 test/tcl/test107.tcl create mode 100644 test/tcl/test109.tcl create mode 100644 test/tcl/test110.tcl create mode 100644 test/tcl/test111.tcl create mode 100644 test/tcl/test112.tcl create mode 100644 test/tcl/test113.tcl create mode 100644 test/tcl/test114.tcl create mode 100644 test/tcl/test115.tcl create mode 100644 test/tcl/test116.tcl create mode 100644 test/tcl/test117.tcl create mode 100644 test/tcl/test119.tcl create mode 100644 test/tcl/test120.tcl create mode 100644 test/tcl/test121.tcl create mode 100644 test/tcl/test122.tcl create mode 100644 test/tcl/test123.tcl create mode 100644 test/tcl/test124.tcl create mode 100644 test/tcl/test125.tcl create mode 100644 test/tcl/test126.tcl create mode 100644 test/tcl/test127.tcl create mode 100644 test/tcl/test128.tcl create mode 100644 test/tcl/test129.tcl create mode 100644 test/tcl/test130.tcl create mode 100644 test/tcl/test131.tcl create mode 100644 test/tcl/test132.tcl create mode 100644 test/tcl/test133.tcl create mode 100644 test/tcl/test134.tcl create mode 100644 test/tcl/testparams.tcl create mode 100644 test/tcl/testutils.tcl create mode 100644 test/tcl/txn001.tcl create mode 100644 test/tcl/txn002.tcl create mode 100644 test/tcl/txn003.tcl create mode 100644 test/tcl/txn004.tcl create mode 100644 test/tcl/txn005.tcl create mode 100644 test/tcl/txn006.tcl create mode 100644 test/tcl/txn007.tcl create mode 100644 test/tcl/txn008.tcl create mode 100644 test/tcl/txn009.tcl create mode 100644 test/tcl/txn010.tcl create mode 100644 test/tcl/txn011.tcl create mode 100644 test/tcl/txn012.tcl create mode 100644 test/tcl/txn012script.tcl create mode 100644 test/tcl/txn013.tcl create mode 100644 test/tcl/txn014.tcl create mode 100644 test/tcl/txnscript.tcl create mode 100644 test/tcl/update.tcl create mode 100644 test/tcl/upgrade.tcl create mode 100644 test/tcl/wordlist create mode 100644 test/tcl/wrap.tcl create mode 100644 test/tcl/wrap_reptest.tcl create mode 100644 test/xa/chk.xa create mode 100644 test/xa/src1/client.c create mode 100644 test/xa/src1/datafml.fml create mode 100644 test/xa/src1/datafml.h create mode 100644 test/xa/src1/hdbrec.h create mode 100644 test/xa/src1/htimestampxa.c create mode 100644 test/xa/src1/htimestampxa.h create mode 100644 test/xa/src1/run.sh create mode 100644 test/xa/src1/server.c create mode 100644 test/xa/src1/tuxconfig.sh create mode 100644 test/xa/src2/bdb1.c create mode 100644 test/xa/src2/bdb2.c create mode 100644 test/xa/src2/client.c create mode 100644 test/xa/src2/run.sh create mode 100644 test/xa/src2/tuxconfig.sh create mode 100644 test/xa/src3/client.c create mode 100644 test/xa/src3/run.sh create mode 100644 test/xa/src3/server.c create mode 100644 test/xa/src3/tuxconfig.sh create mode 100644 test/xa/src4/client.c create mode 100644 test/xa/src4/run.sh create mode 100644 test/xa/src4/server.c create mode 100644 test/xa/src4/tuxconfig.sh create mode 100644 util/db_archive.c create mode 100644 util/db_checkpoint.c create mode 100644 util/db_deadlock.c create mode 100644 util/db_dump.c create mode 100644 util/db_dump185.c create mode 100644 util/db_hotbackup.c create mode 100644 util/db_load.c create mode 100644 util/db_log_verify.c create mode 100644 util/db_printlog.c create mode 100644 util/db_printlog/README create mode 100644 util/db_printlog/commit.awk create mode 100644 util/db_printlog/count.awk create mode 100644 util/db_printlog/dbname.awk create mode 100644 util/db_printlog/fileid.awk create mode 100644 util/db_printlog/logstat.awk create mode 100644 util/db_printlog/pgno.awk create mode 100644 util/db_printlog/range.awk create mode 100644 util/db_printlog/rectype.awk create mode 100644 util/db_printlog/status.awk create mode 100644 util/db_printlog/txn.awk create mode 100644 util/db_recover.c create mode 100644 util/db_replicate.c create mode 100644 util/db_stat.c create mode 100644 util/db_stat/dd.sh create mode 100644 util/db_tuner.c create mode 100644 util/db_upgrade.c create mode 100644 util/db_verify.c create mode 100755 util/dtrace/apicalls.d create mode 100755 util/dtrace/apitimes.d create mode 100755 util/dtrace/apitrace.d create mode 100755 util/dtrace/cache.d create mode 100755 util/dtrace/dbdefs.d create mode 100755 util/dtrace/locktimes.d create mode 100755 util/dtrace/locktimesid.d create mode 100755 util/dtrace/mutex.d create mode 100755 util/dtrace/showerror.d create mode 100755 util/systemtap/apicalls.stp create mode 100755 util/systemtap/apitimes.stp create mode 100755 util/systemtap/apitrace.stp create mode 100755 util/systemtap/cache.stp create mode 100755 util/systemtap/locktimes.stp create mode 100755 util/systemtap/locktimesid.stp create mode 100755 util/systemtap/mutex.stp create mode 100755 util/systemtap/showerror.stp diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..b782850d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build_unix/** diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..c3b51eda --- /dev/null +++ b/LICENSE @@ -0,0 +1,130 @@ +/*- + * $Id$ + */ + +The following is the license that applies to this copy of the Berkeley DB +software. For a license to use the Berkeley DB software under conditions +other than those described here, or to purchase support for this software, +please contact Oracle at berkeleydb-info_us@oracle.com. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +/* + * Copyright (c) 1990, 2011 Oracle and/or its affiliates. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Redistributions in any form must be accompanied by information on + * how to obtain complete source code for the DB software and any + * accompanying software that uses the DB software. The source code + * must either be included in the distribution or be available for no + * more than the cost of distribution plus a nominal fee, and must be + * freely redistributable under reasonable conditions. For an + * executable file, complete source code means the source code for all + * modules it contains. It does not include source code for modules or + * files that typically accompany the major components of the operating + * system on which the executable file runs. + * + * THIS SOFTWARE IS PROVIDED BY ORACLE ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR + * NON-INFRINGEMENT, ARE DISCLAIMED. IN NO EVENT SHALL ORACLE BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY HARVARD AND ITS CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL HARVARD OR ITS CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +/*** + * ASM: a very small and fast Java bytecode manipulation framework + * Copyright (c) 2000-2005 INRIA, France Telecom + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ diff --git a/README b/README index e69de29b..6cd73919 100644 --- a/README +++ b/README @@ -0,0 +1,5 @@ +Berkeley DB 11g Release 2, library version 11.2.5.2.28: (June 10, 2011) + +This is Berkeley DB 11g Release 2 from Oracle. To view release and +installation documentation, load the distribution file docs/index.html +into your web browser. diff --git a/build_unix/.IGNORE_ME b/build_unix/.IGNORE_ME new file mode 100644 index 00000000..558fd496 --- /dev/null +++ b/build_unix/.IGNORE_ME @@ -0,0 +1,3 @@ +Some combinations of the gzip and tar archive exploders found +on Linux systems ignore directories that don't have any files +(other than symbolic links) in them. So, here's a file. diff --git a/dist/ChangeLog b/dist/ChangeLog new file mode 100644 index 00000000..23a44691 --- /dev/null +++ b/dist/ChangeLog @@ -0,0 +1,426 @@ += Berkeley DB 5.2 Changelog = + +== Database or Log File On-Disk Format Changes == + +Existing database file formats were unchanged in library version 11.2.5.2. +However, a new database file format, "heap", was introduced. + +The log file format changed in library version 11.2.5.2. + +== New Features == + +Replication Manager now manages Group Membership. This allows sites to be added to +and removed from the replication group dynamically. Replication Manager +also now automatically keeps track of the group size (nsites). [#14778] + +Initial allocations for various non-pagebuffer (mpool) system resources +may now be specified, as well as a total maximum of memory to use, rather than +specifying a maximum value for each resource. [#16334] + +Implemented Berkeley DB globalization support architecture to enable localized +and stripped error and output messages. [#16863] + +Added a new access method, DB_HEAP. Heap aims for efficient use (and re-use) +of disk space. Keys in a heap database are automatically generated by BDB, it +is recommended that one or more secondary indexes be used with a heap +database. For full details on DB_HEAP, see the Programmer's Reference +Guide. [#17627] + +Added a compatible mode for 32bit and 64bit Windows environment. [#18225] + +For the SQL API, concurrency between read and write transactions can now be +enabled using "PRAGMA multiversion". Added several pragmas that can be +used to configure the Berkeley DB datastore. [#18521] + +Add several new pragmas to provide in-process support for replication in +the SQL API. [#18528] + +The Berkeley DB X/open compliant XA resource manager has been restored, +including support for multi-threaded servers. [#18701] + +Improved the ability to recover from an application crash on connections +through the SQL API. Berkeley DB will try to automatically clean up locks, +mutexes and transactions from the failed process. [#18713] + +Add support for sequence usage in the SQL API using SQLite custom +functions. [#19007] + +Add a pragma in the SQL API to allow execution of a cache trickle +command. [#19202] + +Add a pragma in the SQL API to allow configuration of DB_SYSTEM_MEM +environments. [#19249] + +The new db_env_set_win_security(SECURITY_ATTRIBUTES *) function allows an +application to specify the particular Microsoft Windows security attributes +to be used by Berkeley DB. This helps support applications which reduce their +privileges after opening the environment. [#19529] + + +== Database Environment Changes == + +None + +== Concurrent Data Store Changes == + +None + +== Access Method Changes == + +Modified the queue access method so that it only uses latches on the metadata +page rather than a latch and a lock. This was done to improve +performance. [#18749] + +Fixed several bugs that could cause an update running with MVCC to get the +wrong version of a page or improperly update the metadata last page +number. [#19063] + +The database open code will no longer log the open and close of the master +database in a file when opening a sub database in that file. [#19071] + +Fixed a bug where an error during an update to a hash database with +DB_NOOVERWRITE set could return DB_KEYEXIST rather than the correct +error. [#19077] + +Fixed a bug that could cause the wrong page number to be on a root or metadata +page if DB->compact moved the page and the operation was later rolled +forward. [#19167] + +Fixed a bug that could cause the close of a secondary index database to fail +if the transaction doing the open aborted. [#19169] + +Fixed a bug that could prevent an update to a primary recno or queue database +with DB_NOOVERWITE set. [#19230] + +Fixed a bug when an update to a database with DB_NOOVERWRITE set could +incorrectly return DB_KEYEXIST rather than the correct error +(e.g., DB_LOCK_DEADLOCK). [#19345] + +Fixed a bug preventing the use of the set_re_len and set_re_pad methods with +a RECNO database when configuring with --disable-queue. [#19367] + +Fixed a bug in DB->compact on BTREE databases that did not check if the last +page in the database could be moved to a lower numbered page. [#19394] + +Fixed a bug that could cause a Log Sequence Error when recovering the +deallocation of a multiple page overflow chain. [#19474] + +Fixed a bug that could cause a diagnostic assertion if MVCC was in use +and multiple levels of a btree needed to be split. [#19481] + +Fixed a few error paths that could cause a Panic with an "unpinned page +returned" error. [#19493] + +Fixed a bug that closed a race condition that under heavy mult-threaded +appending to a queue database could cause some records to be lost. [#19498] + +Fixed a bug that might cause DB->compact to mis-estimate the size of an +overflow record when merging two pages. This may cause the page to have more +data than desired. [#19562] + +Fixed a bug in DB_ENV->fileid_reset that did not update the fileid's on the +metadata pages of subdatabases if the database file was not in native +byte order. [#19608] + +Fixed a bug that caused the first directory specified in the create of +a partitioned database to get too many partitions. [#20041] + +== SQL API Changes == + +Fixed a race condition that would cause a corruption error in one process when +two processes created the same SQL database. [#18929] + +Fixed a bug that would cause a constraint violation when updating the +primary key with the same value. [#18976] + +Overwriting an old backup with a new backup using the SQL online backup API +will no longer double the size of the database. [#19021] + +Implemented index optimizations for indexes on large values. [#19094] + +Fixed a bug that could cause an undetected deadlock between a +thread which moved a metadata or root page via a DB->compact operation and +another thread trying to open the database if the old page was being removed +from the file. [#19186] + +Fix a bug in the BDBSQL_FILE_PER_TABLE option, to allow absolute +path names. [#19190] + +Add a pragma to allow configuration of DB_SYSTEM_MEM environments. [#19249] + +Exclusive transactions will now block new transactions and will prevent +existing transactions from making forward progress. [#19256] + +Fixed a bug that would cause assert error when opening an in-memory hash +database with thread count configured when compiled with +--enable-diagnostic. [#19357] + +Upgrade the bundled version of SQLite to 3.7.6.2 [#19376] + +Fixed a performance bug with the cache victim selection algorithm when there +were multiple cache regions. [#19385] + +Fixed a bug which could cause two SQL threads to have an undetected deadlock +when opening or closing tables. [#19386] + +Fix a bug that could cause a hang when deleting a table if there are multiple +connections to a database from different processes. [#19419] + +Fixed a bug which could cause multiple threads performing DB->compact on +the same database file to overrun the in-memory freelist, which could +potentially lead to memory corruption. [#19571] + +Fixed a bug in DB->compact that could cause a loop if an attempt to move a +sub-database meta data page deadlocked. [#20028] + +== C API Changes == + +Fixed a bug where encryption could not be enabled for individual databases +in an encrypted environment. [#18891] + +Removed two unused error codes, DB_NOSERVER_HOME and DB_NOSERVER_ID. [#18978] + +Added a DB_DBT_READONLY flag so that users can pass in a non-usermem key +(DB_DBT_USERMEM) for get operations. [#19360] + +Fixed a bug in DB/DBC->get/pget that the partial flags are silently ignored +with positional flags and return inconsistent DBT. [#19540] + +Fixed a bug which prevented items from being deleted on a secondary +database. [#19573] + +Fixed a bug to correctly handle the DB_BUFFER_SMALL case on delete +operations when compression is enabled. [#19660] + +== Tcl-specific API Changes == + +None. + + +== C#-specific API Changes == + +Added support for partial put/get in the C# API. [#18795] + +Fixed a bug in compare delegate for secondary db. [#18935] + +== Replication Changes == + +Replication Manager now allows differing ack policies at different +sites throughout the group, and supports dynamic changes to the ack +policy. (The ack policy in force is determined by the current +master.) [#14993] + +Replication Manager "channels" feature allows applications to +share repmgr's communication facilities. [#17228] + +Add example program for RepMgr "channels" feature: ex_rep_chan. [#17387] + +Replication Manager now allows dynamic changes to a site's +"electability" (changes between zero and non-zero priority). This +feature should be used with care, because electability changes can in +boundary cases invalidate durability guarantees granted for previous +transactions. [#17497] + +Changed election criteria so that later group transactions +won't get overwritten by earlier generations with more log. [#17815] + +Added changes to master lease checks that result in improved +performance when using master leases. [#18960] + +A log write failure on a replication master will now cause +a panic since the transaction may be committed on some clients. [#19054] + +Fixed a few memory leak conditions on error paths. [#19131] + +Change lease code so that zero priority sites do not count +in lease guarantees since they cannot be elected. [#19154] + +Repmgr rerequest processing is moved from a dedicated thread to heartbeat +messages. Repmgr clients using heartbeats can now detect and rerequest +missing final master log records without master activity. [#19197] + +Repmgr statistics are now included in full statistics output for +an environment. [#19198] + +Fix an inefficiency in mixed version elections. We now check +if an election is won via the EID instead of priority. [#19254] + +Changed election LSNs to use the last txn commit LSN instead +of the end of the log. [#19278] + +Create replication internal database files in the environment +home directory rather than the data directory so that they are in the +same location as the other internal replication files. [#19403] + +Fix a bug that was preventing repmgr from calling an election +when starting a site with the DB_REP_ELECTION flag. [#19546] + +Fixed a bug which could cause a segfault at a replication master if a +named in-memory database was being created around the same time as a +client site were synchronizing (in "internal init") with the master. +[#19583] + +Adjust lease code to consider timeout length when retrying. [#19705] + +Fixed a bug that could cause a crash in replication groups of more +than 10 sites, with multiple processes sharing each DB environment +concurrently. [#19818] + +Fix a bug where an assertion failure could happen if pages in a database +were deallocated during a client internal initialization.[#19851] + +Fix a bug where an internal initialization of a queue database with +non-contiguous extent files could return an error. [#19925] + +The 2SITE_STRICT replication configuration parameter is now turned on +by default. It can be turned off via a call to +DB_ENV->rep_set_config(). [#19937] + +Repmgr heartbeats can now help detect a duplicate master without the +need for application activity. [#19950] + +== Locking Subsystem Changes == + +Fixed a bug where an updater supporting DB_READ_UNCOMMITED might downgrade +its lock too soon if there was an error during the update. [#19155] + +Fixed a bug where transaction timeouts could have been specified in a +database environment where the locking subsystem was disabled. [#19582] + +Fixed a bug in a diagnostic assertion that was improperly triggered by the +removal of a sub-database. [#19683] + +Fixed a bug that would cause DB_ENV->failcheck to free locks for a locker +associated with a database handle after the thread that opened the handle +exited. [#19881] + +== Logging Subsystem Changes == + +Enhanced recovery so that it will not output extra checkpoint or transaction +id recycle log records if there was no activity since the last +checkpoint. [#15330] + +Log checksums can now be disabled using the compile argument +--disable-log-checksum. This will give a performance increase at the risk +of undetectable corruption in the log records, which would make recovery +impossible. [#19143] + +Fixed a bug that could cause a page that should have been removed from the +end of a file still be in the copy of the file in a hot backup. [#19996] + +== Memory Pool Subsystem Changes == + +Fixed a bug in MPOOLFILE->get that did not permit the DB_MPOOL_DIRTY flag +to be used with other flags. [#19421] + +== Mutex Subsystem Changes == + +Fixed a bug when the mutex region needs to be larger than 4GB, the region size +was incorrectly adjusted to be slightly too small to fit the mutexes. [#18968] + +Fixed a performance problem with hybrid shared latches in which a request for +exclusive access would busy-wait (rather than put itself to sleep) if the latch +were held by a shared reader. This also fixed the timeout handling of hybrid +mutexes. In some cases the timeout would not be honored, resulting in delays +for the replication "read your writes" feature which were longer than requested. +[#18982] + +Fixed the timeout handling of the pthreads mutexes used by the replication +"read your writes" feature. When a timeout occurred there was a race condition +which might result in a hang. [#19047] + + +== Transaction Subsystem Changes == + +Fixed a leak of log file ids when a database is closed before the end of a +transaction that references it. [#15957] + +Fixed a bug that would cause a panic if a child transaction performed a database +rename, then aborted, and then the parent transaction committed. [#18069] + +Fixed a bug where we released the metadata page lock too early if a +non-transactional update was being done. [#19036] + +Removed the possibility that checkpoints will overlap in the log, decreasing +the time to recover. [#19062] + + +== Test Suite Changes == + +Require Tcl 8.5 or greater. + +== Utility Changes == + +Added a new utility, db_tuner, which analyzes the data in a btree database, +and suggests a reasonable pagesize. [#18910] + +Fixed some bugs in log_verify when there are in-memory database logs and subdb +logs. [#19157] + +Modified db_hotbackup to not read from the file system as required on non-UNIX +systems. Also provided the db_copy function for this purpose. [#19863] + +Fixed db_hotbackup so that when -d/-l or -D is not specified, DB_CONFIG is +used to determine the locations of the databases and logs in the source +environment. [#19994] + +== Configuration, Documentation, Sample Apps, Portability and Build Changes == + +Changed SQL API library built on *nix to link with libpthreads when +necessary. [#19098] + +Added CPPFLAGS into our --enable-jdbc configuration. [#19234] + +Added encryption support into the Windows CE build project for SQL API. [#19632] + +Fixed a bug in the STAT_INC_VERB() dtrace probe that was causing compiler +warnings. [#19707] + +Fixed a bug that could cause a trap in db_dump using salvage mode if a +page was found that was not associated with any database in the file. [#19974] + +On Cygwin, circumvented a bug in libtool that is exposed when building the +BDB SQL API in a directory path containing whitespace characters. [#19812] + +== Example Changes == + +Update repmgr C, C#, C++, Java examples(ex_rep_mgr, ex_rep_gsg_repmgr, +ex_rep_chan, excs_repquote, excxx_repquote, excxx_epquote_gsg, repquote, +repquote_gsg) with their related API changes for group +membership. [#19586][#19622] + +Port ex_rep_chan, ex_rep_gsg_repmgr,ex_rep_gsg_simple, +excxx_repquote_gsg_repmgr, excxx_repquote_gsg_simple to Window.[#19890] + +== Miscellaneous Bug Fixes == + +Fixed a bug where memory copied from the Java API could leak if flags were not +correctly configured. [#19152] + +== Deprecated Features == + +None + +== Known Bugs == + +The SQL API has a known issue when using a blob field with a lot of content +and multiple concurrent connections to the database. [#19945] + +Rollback of a dropped table in the SQL layer contains a mutex leak, which +can consume all mutex resources if enough rollbacks of table drops are +performed. [#20077] + +The DB_CONFIG configuration parameters which specify path names currently +do not support names containing any whitespace characters. [#20158] + +The BFile module has a known crash issue when using BFile handle for SQL +expressions interface on 64bit platforms. [#20193] + +On systems without FTRUNCATE, db_verify will return an error for truncated +heap databases. This is a bug in db_verify, the database has been truncated +correctly and can be used in the future. [#20195] + +An application using queue extents which is append mostly could see a +decrease in the buffer pool hit rate due to the failure to remove pages +from closed extents from the buffer pool. [#20217] + diff --git a/dist/Makefile.in b/dist/Makefile.in new file mode 100644 index 00000000..1ee35140 --- /dev/null +++ b/dist/Makefile.in @@ -0,0 +1,2657 @@ +# $Id$ + +topdir= @topdir@ +srcdir= $(topdir)/src +langdir= $(topdir)/lang +exampledir= $(topdir)/examples +testdir= $(topdir)/test +utildir= $(topdir)/util +distdir= $(topdir)/dist +builddir=. + +################################################## +# Installation directories and permissions. +################################################## +prefix= @prefix@ +exec_prefix=@exec_prefix@ +bindir= @bindir@ +includedir=@includedir@ +libdir= @libdir@ +docdir= $(prefix)/docs + +dmode= 755 +emode= 555 +fmode= 444 + +transform=@program_transform_name@ + +################################################## +# Paths for standard user-level commands. +################################################## +AR= @AR@ +CHMOD= @CHMOD@ +CP= @CP@ +LN= @LN@ +MKDIR= @MKDIR@ +MV= @MV@ +RANLIB= @RANLIB@ +RM= @RM@ +SHELL= @db_cv_path_sh@ +STRIP= @STRIP@ +SED= @SED@ +PERL= @PERL@ + +################################################## +# General library information. +################################################## +DEF_LIB= @DEFAULT_LIB@ +DEF_LIB_CXX= @DEFAULT_LIB_CXX@ +DEF_LIB_SQL= @DEFAULT_LIB_SQL@ +DEF_LIB_SQLITE= @DEFAULT_LIB_SQLITE@ +DEF_LIB_STL= @DEFAULT_LIB_STL@ +DEF_LIB_TCL= @DEFAULT_LIB_TCL@ +INSTALLER= @INSTALLER@ +LIBTOOL= @LIBTOOL@ + +POSTLINK= @POSTLINK@ +SOLINK= @MAKEFILE_SOLINK@ @CFLAGS@ +SOFLAGS= @SOFLAGS@ +LIBMAJOR= @DB_VERSION_MAJOR@ +LIBVERSION= @DB_VERSION_MAJOR@.@DB_VERSION_MINOR@ + +CPPFLAGS= -I$(builddir) -I$(srcdir) @CPPFLAGS@ + +################################################## +# C API. +################################################## +CFLAGS= -c $(CPPFLAGS) @CFLAGS@ +CC= @MAKEFILE_CC@ +CCLINK= @MAKEFILE_CCLINK@ @CFLAGS@ + +LDFLAGS= @LDFLAGS@ +LIBS= @LIBSO_LIBS@ +TEST_LIBS= @TEST_LIBS@ +LIBCSO_LIBS= @LIBCSO_LIBS@ @LIBSO_LIBS@ + +libdb_base= libdb +libdb= $(libdb_base).a +libdb_version= $(libdb_base)-$(LIBVERSION).a +libso= $(libdb_base)-$(LIBVERSION)@SOSUFFIX@ +libso_target= $(libdb_base)-$(LIBVERSION).la +libso_default= $(libdb_base)@SOSUFFIX@ +libso_major= $(libdb_base)-$(LIBMAJOR)@SOSUFFIX@ + +################################################## +# C++ API. +# +# C++ support is optional, and can be built with static or shared libraries. +################################################## +CXXFLAGS= -c $(CPPFLAGS) @CXXFLAGS@ +CXX= @MAKEFILE_CXX@ +CXXLINK= @MAKEFILE_CXXLINK@ @CXXFLAGS@ +XSOLINK= @MAKEFILE_XSOLINK@ @CXXFLAGS@ +LIBXSO_LIBS= @LIBXSO_LIBS@ @LIBSO_LIBS@ + +libcxx_base= libdb_cxx +libcxx= $(libcxx_base).a +libcxx_version= $(libcxx_base)-$(LIBVERSION).a +libxso= $(libcxx_base)-$(LIBVERSION)@SOSUFFIX@ +libxso_target= $(libcxx_base)-$(LIBVERSION).la +libxso_default= $(libcxx_base)@SOSUFFIX@ +libxso_major= $(libcxx_base)-$(LIBMAJOR)@SOSUFFIX@ + +################################################## +# SQL API. +# +# SQL support is optional, and can be built with static or shared libraries. +################################################## +SQLFLAGS= -I$(builddir) -I$(builddir)/sql -I$(langdir)/sql/generated \ + -D_HAVE_SQLITE_CONFIG_H @SQL_FLAGS@ + +libsql_base= libdb_sql +libsql= $(libsql_base).a +libsql_version= $(libsql_base)-$(LIBVERSION).a +libsqlso= $(libsql_base)-$(LIBVERSION)@SOSUFFIX@ +libsqlso_target=$(libsql_base)-$(LIBVERSION).la +libsqlso_default=$(libsql_base)@SOSUFFIX@ +libsqlso_major= $(libsql_base)-$(LIBMAJOR)@SOSUFFIX@ + +libsqlite_base= libsqlite3 +libsqlite= $(libsqlite_base).a +libsqliteso= $(libsqlite_base)@SOSUFFIX@ +libsqliteso_target=$(libsqlite_base).la + +################################################## +# STL API. +# +# STL support is optional, and can be built with static or shared libraries. +################################################## +STLFLAGS= $(CXXFLAGS) -I$(langdir)/cxx/stl +LIBSTLSO_LIBS= @LIBXSO_LIBS@ @LIBSO_LIBS@ + +libstl_base= libdb_stl +libstl= $(libstl_base).a +libstl_version= $(libstl_base)-$(LIBVERSION).a +libstlso= $(libstl_base)-$(LIBVERSION)@SOSUFFIX@ +libstlso_target=$(libstl_base)-$(LIBVERSION).la +libstlso_default=$(libstl_base)@SOSUFFIX@ +libstlso_major= $(libstl_base)-$(LIBMAJOR)@SOSUFFIX@ + +################################################## +# Java API. +# +# Java support is optional and requires shared librarires. +################################################## +CLASSPATH= $(JAVA_CLASSTOP) +LIBJSO_LIBS= @LIBJSO_LIBS@ @LIBSO_LIBS@ +SWIGCFLAGS= @SWIGCFLAGS@ + +JAR= @JAR@ +JAVA= env CLASSPATH="$(CLASSPATH)" @JAVA@ +JAVAC= env CLASSPATH="$(CLASSPATH)" @JAVAC@ +JAVACFLAGS= @JAVACFLAGS@ +JAVA_CLASSTOP= ./classes +JAVA_EXCLASSTOP=./classes.ex +JAVA_SRCDIR= $(langdir)/java/src +JAVA_EXDIR= $(exampledir)/java/src +JAVA_SLEEPYCAT= $(langdir)/java/src/com/sleepycat +JAVA_MANIFEST= $(langdir)/java/jarManifestEntries + +libj_jarfile= db.jar +libj_exjarfile= dbexamples.jar +libjso_base= libdb_java +libjso= $(libjso_base)-$(LIBVERSION)@JMODSUFFIX@ +libjso_static= $(libjso_base)-$(LIBVERSION).a +libjso_target= $(libjso_base)-$(LIBVERSION).la +libjso_default= $(libjso_base)@JMODSUFFIX@ +libjso_major= $(libjso_base)-$(LIBMAJOR)@JMODSUFFIX@ +libjso_g= $(libjso_base)-$(LIBVERSION)_g@JMODSUFFIX@ + +################################################## +# TCL API. +# +# Tcl support is optional. +################################################## +TCL_INCLUDE_SPEC= @TCL_INCLUDE_SPEC@ +LIBTSO_LIBS= @LIBTSO_LIBS@ @LIBSO_LIBS@ +TCL_SRCDIR= $(langdir)/tcl +libtcl_base= libdb_tcl +libtcl= $(libtcl_base).a +libtcl_version= $(libtcl_base)-$(LIBVERSION).a +libtso= $(libtcl_base)-$(LIBVERSION)@LIBTSO_MODSUFFIX@ +libtso_target= $(libtcl_base)-$(LIBVERSION).la +libtso_default= $(libtcl_base)@LIBTSO_MODSUFFIX@ +libtso_major= $(libtcl_base)-$(LIBMAJOR)@LIBTSO_MODSUFFIX@ + +################################################## +# db_dump185 UTILITY +# +# The db_dump185 application should be compiled using the system's db.h file +# (which should be a DB 1.85/1.86 include file), and the system's 1.85/1.86 +# object library. To include the right db.h, don't include -I$(builddir) on +# the compile line. You may also need to add a local include directory and +# local libraries, for example. Do that by adding -I options to the DB185INC +# line, and -l options to the DB185LIB line. +################################################## +DB185INC= -c @CFLAGS@ -I$(topdir) @CPPFLAGS@ +DB185LIB= + +################################################## +# Performance Event Monitoring definitions +################################################## +DTRACE= @DTRACE@ +DTRACE_PROVIDER= $(distdir)/db_provider.d + +################################################## +# NOTHING BELOW THIS LINE SHOULD EVER NEED TO BE MODIFIED. +################################################## + +################################################## +# Object and utility lists. +################################################## +BTREE_OBJS=\ + bt_compare@o@ bt_compress@o@ bt_conv@o@ bt_curadj@o@ bt_cursor@o@ \ + bt_delete@o@ bt_method@o@ bt_open@o@ bt_put@o@ bt_rec@o@ \ + bt_reclaim@o@ bt_recno@o@ bt_rsearch@o@ bt_search@o@ bt_split@o@ bt_stat@o@ \ + bt_compact@o@ bt_upgrade@o@ btree_auto@o@ +BTREE_VRFY_OBJS=\ + db_ovfl_vrfy@o@ db_vrfy@o@ db_vrfyutil@o@ bt_verify@o@ +HASH_OBJS=\ + hash@o@ hash_auto@o@ hash_compact@o@ hash_conv@o@ hash_dup@o@ \ + hash_meta@o@ hash_method@o@ hash_open@o@ hash_page@o@ hash_rec@o@ \ + hash_reclaim@o@ hash_stat@o@ hash_upgrade@o@ +HASH_VRFY_OBJS=\ + hash_verify@o@ +HEAP_OBJS=\ + heap@o@ heap_auto@o@ heap_conv@o@ heap_method@o@ heap_open@o@ \ + heap_rec@o@ heap_reclaim@o@ heap_stat@o@ +HEAP_VRFY_OBJS=\ + heap_verify@o@ +QUEUE_OBJS=\ + qam@o@ qam_auto@o@ qam_conv@o@ qam_files@o@ qam_method@o@ \ + qam_open@o@ qam_rec@o@ qam_stat@o@ qam_upgrade@o@ +QUEUE_VRFY_OBJS=\ + qam_verify@o@ +LOCK_OBJS=\ + lock@o@ lock_deadlock@o@ lock_failchk@o@ lock_id@o@ lock_list@o@ \ + lock_method@o@ lock_region@o@ lock_stat@o@ lock_timer@o@ lock_util@o@ + +LOG_VRFY_OBJS=\ + log_verify@o@ log_verify_util@o@ log_verify_int@o@ \ + log_verify_auto@o@ + +MUTEX_OBJS=\ + mut_alloc@o@ mut_failchk@o@ mut_method@o@ mut_region@o@ mut_stat@o@ +REP_OBJS=\ + rep_automsg@o@ rep_backup@o@ rep_elect@o@ rep_lease@o@ rep_log@o@ \ + rep_method@o@ rep_record@o@ rep_region@o@ rep_stat@o@ \ + rep_util@o@ rep_verify@o@ +REPMGR_OBJS=\ + os_addrinfo@o@\ + repmgr_auto@o@ repmgr_automsg@o@ repmgr_elect@o@ \ + repmgr_method@o@ repmgr_msg@o@ \ + repmgr_net@o@ repmgr_posix@o@ repmgr_queue@o@ repmgr_rec@o@ \ + repmgr_sel@o@ repmgr_stat@o@ repmgr_util@o@ +PRINT_OBJS=\ + btree_autop@o@ crdel_autop@o@ db_autop@o@ dbreg_autop@o@ \ + fileops_autop@o@ hash_autop@o@ heap_autop@o@ qam_autop@o@ \ + repmgr_autop@o@ txn_autop@o@ +XA_OBJS=\ + xa@o@ xa_map@o@ + +# When DTrace is enabled it may need to post-process (with -G) most of the +# object files in order to generate the additional objects in @FINAL_OBJS@. + +DTRACE_OBJS= @ADDITIONAL_OBJS@ @REPLACEMENT_OBJS@ @CRYPTO_OBJS@ \ + clock@o@ crdel_auto@o@ crdel_rec@o@ db@o@ db_am@o@ \ + db_auto@o@ db_byteorder@o@ db_cam@o@ db_cds@o@ db_compact@o@ \ + db_compint@o@ db_conv@o@ db_copy@o@ db_dispatch@o@ db_dup@o@ db_err@o@ \ + db_getlong@o@ db_idspace@o@ db_iface@o@ db_join@o@ db_log2@o@ \ + db_meta@o@ db_method@o@ db_open@o@ db_overflow@o@ db_pr@o@ db_rec@o@ \ + db_reclaim@o@ db_remove@o@ db_rename@o@ db_ret@o@ db_setid@o@ \ + db_setlsn@o@ db_shash@o@ db_sort_multiple@o@ db_stati@o@ \ + db_truncate@o@ db_upg@o@ db_upg_opd@o@ dbreg@o@ dbreg_stat@o@ \ + dbreg_auto@o@ dbreg_rec@o@ dbreg_util@o@ dbt@o@ env_alloc@o@ \ + env_config@o@ env_failchk@o@ env_file@o@ env_globals@o@ env_open@o@ \ + env_method@o@ env_name@o@ env_recover@o@ env_region@o@ env_register@o@ \ + env_sig@o@ env_stat@o@ fileops_auto@o@ fop_basic@o@ fop_rec@o@ \ + fop_util@o@ hash_func@o@ hmac@o@ log@o@ log_archive@o@ \ + log_compare@o@ log_debug@o@ log_get@o@ log_method@o@ log_print@o@ \ + log_put@o@ log_stat@o@ mkpath@o@ mp_alloc@o@ mp_bh@o@ mp_fget@o@ \ + mp_fmethod@o@ mp_fopen@o@ mp_fput@o@ mp_fset@o@ mp_method@o@ \ + mp_mvcc@o@ mp_region@o@ mp_register@o@ mp_resize@o@ mp_stat@o@ \ + mp_sync@o@ mp_trickle@o@ openflags@o@ os_abort@o@ os_abs@o@ \ + os_alloc@o@ os_clock@o@ os_cpu@o@ os_ctime@o@ os_config@o@ \ + os_dir@o@ os_errno@o@ os_fid@o@ os_flock@o@ os_fsync@o@ \ + os_getenv@o@ os_handle@o@ os_map@o@ os_method@o@ os_mkdir@o@ \ + os_open@o@ os_pid@o@ os_rename@o@ os_root@o@ os_rpath@o@ \ + os_rw@o@ os_seek@o@ os_stack@o@ os_stat@o@ os_tmpdir@o@ \ + os_truncate@o@ os_uid@o@ os_unlink@o@ os_yield@o@ partition@o@ \ + seq_stat@o@ sequence@o@ sha1@o@ snprintf@o@ txn@o@ txn_auto@o@ \ + txn_chkpt@o@ txn_failchk@o@ txn_method@o@ txn_rec@o@ txn_recover@o@ \ + txn_region@o@ txn_stat@o@ txn_util@o@ xa@o@ xa_map@o@ zerofill@o@ + +C_OBJS= $(DTRACE_OBJS) @FINAL_OBJS@ + +CUTEST_OBJS=\ + CuTest@o@ CuTests@o@ Runner@o@ TestChannel@o@ TestDbTuner@o@ \ + TestEncryption@o@ TestEnvConfig@o@ TestEnvMethod@o@ \ + TestKeyExistErrorReturn@o@ TestPartial@o@ TestQueue@o@ \ + +CXX_OBJS=\ + cxx_channel@o@ cxx_db@o@ cxx_dbc@o@ cxx_dbt@o@ cxx_env@o@ \ + cxx_except@o@ cxx_lock@o@ cxx_logc@o@ cxx_mpool@o@ cxx_multi@o@ \ + cxx_rid@o@ cxx_seq@o@ cxx_site@o@ cxx_txn@o@ + +CRYPTO_OBJS=\ + aes_method@o@ crypto@o@ mt19937db@o@ rijndael-alg-fst@o@ \ + rijndael-api-fst@o@ + +JAVA_OBJS=\ + db_java_wrap@o@ + +JAVA_DBSRCS=\ + $(JAVA_SLEEPYCAT)/asm/AnnotationVisitor.java \ + $(JAVA_SLEEPYCAT)/asm/AnnotationWriter.java \ + $(JAVA_SLEEPYCAT)/asm/Attribute.java \ + $(JAVA_SLEEPYCAT)/asm/ByteVector.java \ + $(JAVA_SLEEPYCAT)/asm/ClassAdapter.java \ + $(JAVA_SLEEPYCAT)/asm/ClassReader.java \ + $(JAVA_SLEEPYCAT)/asm/ClassVisitor.java \ + $(JAVA_SLEEPYCAT)/asm/ClassWriter.java \ + $(JAVA_SLEEPYCAT)/asm/Edge.java \ + $(JAVA_SLEEPYCAT)/asm/FieldVisitor.java \ + $(JAVA_SLEEPYCAT)/asm/FieldWriter.java \ + $(JAVA_SLEEPYCAT)/asm/Handler.java \ + $(JAVA_SLEEPYCAT)/asm/Item.java \ + $(JAVA_SLEEPYCAT)/asm/Label.java \ + $(JAVA_SLEEPYCAT)/asm/MethodVisitor.java \ + $(JAVA_SLEEPYCAT)/asm/MethodWriter.java \ + $(JAVA_SLEEPYCAT)/asm/Opcodes.java \ + $(JAVA_SLEEPYCAT)/asm/Type.java \ + $(JAVA_SLEEPYCAT)/bind/ByteArrayBinding.java \ + $(JAVA_SLEEPYCAT)/bind/EntityBinding.java \ + $(JAVA_SLEEPYCAT)/bind/EntryBinding.java \ + $(JAVA_SLEEPYCAT)/bind/RecordNumberBinding.java \ + $(JAVA_SLEEPYCAT)/bind/serial/ClassCatalog.java \ + $(JAVA_SLEEPYCAT)/bind/serial/SerialBase.java \ + $(JAVA_SLEEPYCAT)/bind/serial/SerialBinding.java \ + $(JAVA_SLEEPYCAT)/bind/serial/SerialInput.java \ + $(JAVA_SLEEPYCAT)/bind/serial/SerialOutput.java \ + $(JAVA_SLEEPYCAT)/bind/serial/SerialSerialBinding.java \ + $(JAVA_SLEEPYCAT)/bind/serial/SerialSerialKeyCreator.java \ + $(JAVA_SLEEPYCAT)/bind/serial/StoredClassCatalog.java \ + $(JAVA_SLEEPYCAT)/bind/serial/TupleSerialBinding.java \ + $(JAVA_SLEEPYCAT)/bind/serial/TupleSerialKeyCreator.java \ + $(JAVA_SLEEPYCAT)/bind/serial/TupleSerialMarshalledBinding.java \ + $(JAVA_SLEEPYCAT)/bind/serial/TupleSerialMarshalledKeyCreator.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/BigDecimalBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/BigIntegerBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/BooleanBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/ByteBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/CharacterBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/DoubleBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/FloatBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/IntegerBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/LongBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/MarshalledTupleEntry.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/MarshalledTupleKeyEntity.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/PackedIntegerBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/PackedLongBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/ShortBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/SortedBigDecimalBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/SortedDoubleBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/SortedFloatBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/SortedPackedIntegerBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/SortedPackedLongBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/StringBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/TupleBase.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/TupleBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/TupleInput.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/TupleInputBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/TupleMarshalledBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/TupleOutput.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/TupleTupleBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/TupleTupleKeyCreator.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/TupleTupleMarshalledBinding.java \ + $(JAVA_SLEEPYCAT)/bind/tuple/TupleTupleMarshalledKeyCreator.java \ + $(JAVA_SLEEPYCAT)/collections/BaseIterator.java \ + $(JAVA_SLEEPYCAT)/collections/BlockIterator.java \ + $(JAVA_SLEEPYCAT)/collections/CurrentTransaction.java \ + $(JAVA_SLEEPYCAT)/collections/DataCursor.java \ + $(JAVA_SLEEPYCAT)/collections/DataView.java \ + $(JAVA_SLEEPYCAT)/collections/MapEntryParameter.java \ + $(JAVA_SLEEPYCAT)/collections/MyRangeCursor.java \ + $(JAVA_SLEEPYCAT)/collections/PrimaryKeyAssigner.java \ + $(JAVA_SLEEPYCAT)/collections/StoredCollection.java \ + $(JAVA_SLEEPYCAT)/collections/StoredCollections.java \ + $(JAVA_SLEEPYCAT)/collections/StoredContainer.java \ + $(JAVA_SLEEPYCAT)/collections/StoredEntrySet.java \ + $(JAVA_SLEEPYCAT)/collections/StoredIterator.java \ + $(JAVA_SLEEPYCAT)/collections/StoredKeySet.java \ + $(JAVA_SLEEPYCAT)/collections/StoredList.java \ + $(JAVA_SLEEPYCAT)/collections/StoredMap.java \ + $(JAVA_SLEEPYCAT)/collections/StoredMapEntry.java \ + $(JAVA_SLEEPYCAT)/collections/StoredSortedEntrySet.java \ + $(JAVA_SLEEPYCAT)/collections/StoredSortedKeySet.java \ + $(JAVA_SLEEPYCAT)/collections/StoredSortedMap.java \ + $(JAVA_SLEEPYCAT)/collections/StoredSortedValueSet.java \ + $(JAVA_SLEEPYCAT)/collections/StoredValueSet.java \ + $(JAVA_SLEEPYCAT)/collections/TransactionRunner.java \ + $(JAVA_SLEEPYCAT)/collections/TransactionWorker.java \ + $(JAVA_SLEEPYCAT)/collections/TupleSerialFactory.java \ + $(JAVA_SLEEPYCAT)/compat/DbCompat.java \ + $(JAVA_SLEEPYCAT)/db/BtreeCompressor.java \ + $(JAVA_SLEEPYCAT)/db/BtreePrefixCalculator.java \ + $(JAVA_SLEEPYCAT)/db/BtreeStats.java \ + $(JAVA_SLEEPYCAT)/db/CacheFile.java \ + $(JAVA_SLEEPYCAT)/db/CacheFilePriority.java \ + $(JAVA_SLEEPYCAT)/db/CacheFileStats.java \ + $(JAVA_SLEEPYCAT)/db/CacheStats.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationChannel.java \ + $(JAVA_SLEEPYCAT)/db/CheckpointConfig.java \ + $(JAVA_SLEEPYCAT)/db/CompactConfig.java \ + $(JAVA_SLEEPYCAT)/db/CompactStats.java \ + $(JAVA_SLEEPYCAT)/db/Cursor.java \ + $(JAVA_SLEEPYCAT)/db/CursorConfig.java \ + $(JAVA_SLEEPYCAT)/db/Database.java \ + $(JAVA_SLEEPYCAT)/db/DatabaseConfig.java \ + $(JAVA_SLEEPYCAT)/db/DatabaseEntry.java \ + $(JAVA_SLEEPYCAT)/db/DatabaseException.java \ + $(JAVA_SLEEPYCAT)/db/DatabaseStats.java \ + $(JAVA_SLEEPYCAT)/db/DatabaseType.java \ + $(JAVA_SLEEPYCAT)/db/DeadlockException.java \ + $(JAVA_SLEEPYCAT)/db/Environment.java \ + $(JAVA_SLEEPYCAT)/db/EnvironmentConfig.java \ + $(JAVA_SLEEPYCAT)/db/ErrorHandler.java \ + $(JAVA_SLEEPYCAT)/db/EventHandler.java \ + $(JAVA_SLEEPYCAT)/db/EventHandlerAdapter.java \ + $(JAVA_SLEEPYCAT)/db/FeedbackHandler.java \ + $(JAVA_SLEEPYCAT)/db/ForeignKeyDeleteAction.java \ + $(JAVA_SLEEPYCAT)/db/ForeignKeyNullifier.java \ + $(JAVA_SLEEPYCAT)/db/ForeignMultiKeyNullifier.java \ + $(JAVA_SLEEPYCAT)/db/HashStats.java \ + $(JAVA_SLEEPYCAT)/db/Hasher.java \ + $(JAVA_SLEEPYCAT)/db/HeapRecordId.java \ + $(JAVA_SLEEPYCAT)/db/HeapFullException.java \ + $(JAVA_SLEEPYCAT)/db/HeapStats.java \ + $(JAVA_SLEEPYCAT)/db/JoinConfig.java \ + $(JAVA_SLEEPYCAT)/db/JoinCursor.java \ + $(JAVA_SLEEPYCAT)/db/KeyRange.java \ + $(JAVA_SLEEPYCAT)/db/Lock.java \ + $(JAVA_SLEEPYCAT)/db/LockDetectMode.java \ + $(JAVA_SLEEPYCAT)/db/LockMode.java \ + $(JAVA_SLEEPYCAT)/db/LockNotGrantedException.java \ + $(JAVA_SLEEPYCAT)/db/LockOperation.java \ + $(JAVA_SLEEPYCAT)/db/LockRequest.java \ + $(JAVA_SLEEPYCAT)/db/LockRequestMode.java \ + $(JAVA_SLEEPYCAT)/db/LockStats.java \ + $(JAVA_SLEEPYCAT)/db/LogCursor.java \ + $(JAVA_SLEEPYCAT)/db/LogRecordHandler.java \ + $(JAVA_SLEEPYCAT)/db/LogSequenceNumber.java \ + $(JAVA_SLEEPYCAT)/db/LogStats.java \ + $(JAVA_SLEEPYCAT)/db/LogVerifyConfig.java \ + $(JAVA_SLEEPYCAT)/db/MemoryException.java \ + $(JAVA_SLEEPYCAT)/db/MessageHandler.java \ + $(JAVA_SLEEPYCAT)/db/MultipleNIODataEntry.java \ + $(JAVA_SLEEPYCAT)/db/MultipleKeyNIODataEntry.java \ + $(JAVA_SLEEPYCAT)/db/MultipleRecnoNIODataEntry.java \ + $(JAVA_SLEEPYCAT)/db/MultipleDataEntry.java \ + $(JAVA_SLEEPYCAT)/db/MultipleEntry.java \ + $(JAVA_SLEEPYCAT)/db/MultipleKeyDataEntry.java \ + $(JAVA_SLEEPYCAT)/db/MultipleRecnoDataEntry.java \ + $(JAVA_SLEEPYCAT)/db/MutexStats.java \ + $(JAVA_SLEEPYCAT)/db/OperationStatus.java \ + $(JAVA_SLEEPYCAT)/db/PanicHandler.java \ + $(JAVA_SLEEPYCAT)/db/PartitionHandler.java \ + $(JAVA_SLEEPYCAT)/db/PreparedTransaction.java \ + $(JAVA_SLEEPYCAT)/db/QueueStats.java \ + $(JAVA_SLEEPYCAT)/db/RecordNumberAppender.java \ + $(JAVA_SLEEPYCAT)/db/RecoveryOperation.java \ + $(JAVA_SLEEPYCAT)/db/RegionResourceType.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationConfig.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationDuplicateMasterException.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationHandleDeadException.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationHoldElectionException.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationHostAddress.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationJoinFailureException.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationLeaseExpiredException.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationLockoutException.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationManagerAckPolicy.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationManagerMessageDispatch.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationManagerSite.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationManagerSiteConfig.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationManagerSiteInfo.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationManagerStats.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationManagerStartPolicy.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationSiteUnavailableException.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationStats.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationStatus.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationTimeoutType.java \ + $(JAVA_SLEEPYCAT)/db/ReplicationTransport.java \ + $(JAVA_SLEEPYCAT)/db/RunRecoveryException.java \ + $(JAVA_SLEEPYCAT)/db/SecondaryConfig.java \ + $(JAVA_SLEEPYCAT)/db/SecondaryCursor.java \ + $(JAVA_SLEEPYCAT)/db/SecondaryDatabase.java \ + $(JAVA_SLEEPYCAT)/db/SecondaryKeyCreator.java \ + $(JAVA_SLEEPYCAT)/db/SecondaryMultiKeyCreator.java \ + $(JAVA_SLEEPYCAT)/db/Sequence.java \ + $(JAVA_SLEEPYCAT)/db/SequenceConfig.java \ + $(JAVA_SLEEPYCAT)/db/SequenceStats.java \ + $(JAVA_SLEEPYCAT)/db/StatsConfig.java \ + $(JAVA_SLEEPYCAT)/db/Transaction.java \ + $(JAVA_SLEEPYCAT)/db/TransactionConfig.java \ + $(JAVA_SLEEPYCAT)/db/TransactionStats.java \ + $(JAVA_SLEEPYCAT)/db/TransactionStatus.java \ + $(JAVA_SLEEPYCAT)/db/VerboseConfig.java \ + $(JAVA_SLEEPYCAT)/db/VerifyConfig.java \ + $(JAVA_SLEEPYCAT)/db/VersionMismatchException.java \ + $(JAVA_SLEEPYCAT)/db/internal/Db.java \ + $(JAVA_SLEEPYCAT)/db/internal/DbConstants.java \ + $(JAVA_SLEEPYCAT)/db/internal/DbChannel.java \ + $(JAVA_SLEEPYCAT)/db/internal/DbEnv.java \ + $(JAVA_SLEEPYCAT)/db/internal/DbLock.java \ + $(JAVA_SLEEPYCAT)/db/internal/DbLogc.java \ + $(JAVA_SLEEPYCAT)/db/internal/DbMpoolFile.java \ + $(JAVA_SLEEPYCAT)/db/internal/DbSequence.java \ + $(JAVA_SLEEPYCAT)/db/internal/DbSite.java \ + $(JAVA_SLEEPYCAT)/db/internal/DbTxn.java \ + $(JAVA_SLEEPYCAT)/db/internal/DbUtil.java \ + $(JAVA_SLEEPYCAT)/db/internal/Dbc.java \ + $(JAVA_SLEEPYCAT)/db/internal/db_java.java \ + $(JAVA_SLEEPYCAT)/db/internal/db_javaJNI.java \ + $(JAVA_SLEEPYCAT)/persist/BasicCursor.java \ + $(JAVA_SLEEPYCAT)/persist/BasicIndex.java \ + $(JAVA_SLEEPYCAT)/persist/BasicIterator.java \ + $(JAVA_SLEEPYCAT)/persist/DataValueAdapter.java \ + $(JAVA_SLEEPYCAT)/persist/DatabaseNamer.java \ + $(JAVA_SLEEPYCAT)/persist/EntityCursor.java \ + $(JAVA_SLEEPYCAT)/persist/EntityIndex.java \ + $(JAVA_SLEEPYCAT)/persist/EntityJoin.java \ + $(JAVA_SLEEPYCAT)/persist/EntityStore.java \ + $(JAVA_SLEEPYCAT)/persist/EntityValueAdapter.java \ + $(JAVA_SLEEPYCAT)/persist/ForwardCursor.java \ + $(JAVA_SLEEPYCAT)/persist/IndexNotAvailableException.java \ + $(JAVA_SLEEPYCAT)/persist/KeySelector.java \ + $(JAVA_SLEEPYCAT)/persist/KeyValueAdapter.java \ + $(JAVA_SLEEPYCAT)/persist/KeysIndex.java \ + $(JAVA_SLEEPYCAT)/persist/PrimaryIndex.java \ + $(JAVA_SLEEPYCAT)/persist/PrimaryKeyValueAdapter.java \ + $(JAVA_SLEEPYCAT)/persist/SecondaryIndex.java \ + $(JAVA_SLEEPYCAT)/persist/StoreConfig.java \ + $(JAVA_SLEEPYCAT)/persist/StoreExistsException.java \ + $(JAVA_SLEEPYCAT)/persist/StoreNotFoundException.java \ + $(JAVA_SLEEPYCAT)/persist/SubIndex.java \ + $(JAVA_SLEEPYCAT)/persist/SubIndexCursor.java \ + $(JAVA_SLEEPYCAT)/persist/ValueAdapter.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/Conversion.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/Converter.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/DeletedClassException.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/Deleter.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/EntityConverter.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/EvolveConfig.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/EvolveEvent.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/EvolveInternal.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/EvolveListener.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/EvolveStats.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/IncompatibleClassException.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/Mutation.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/Mutations.java \ + $(JAVA_SLEEPYCAT)/persist/evolve/Renamer.java \ + $(JAVA_SLEEPYCAT)/persist/impl/AbstractInput.java \ + $(JAVA_SLEEPYCAT)/persist/impl/Accessor.java \ + $(JAVA_SLEEPYCAT)/persist/impl/Catalog.java \ + $(JAVA_SLEEPYCAT)/persist/impl/CollectionProxy.java \ + $(JAVA_SLEEPYCAT)/persist/impl/ComplexFormat.java \ + $(JAVA_SLEEPYCAT)/persist/impl/ComparatorCatalog.java \ + $(JAVA_SLEEPYCAT)/persist/impl/CompositeKeyFormat.java \ + $(JAVA_SLEEPYCAT)/persist/impl/ConverterReader.java \ + $(JAVA_SLEEPYCAT)/persist/impl/Enhanced.java \ + $(JAVA_SLEEPYCAT)/persist/impl/EnhancedAccessor.java \ + $(JAVA_SLEEPYCAT)/persist/impl/EntityInput.java \ + $(JAVA_SLEEPYCAT)/persist/impl/EntityOutput.java \ + $(JAVA_SLEEPYCAT)/persist/impl/EnumFormat.java \ + $(JAVA_SLEEPYCAT)/persist/impl/Evolver.java \ + $(JAVA_SLEEPYCAT)/persist/impl/FieldInfo.java \ + $(JAVA_SLEEPYCAT)/persist/impl/Format.java \ + $(JAVA_SLEEPYCAT)/persist/impl/KeyLocation.java \ + $(JAVA_SLEEPYCAT)/persist/impl/MapProxy.java \ + $(JAVA_SLEEPYCAT)/persist/impl/NonPersistentFormat.java \ + $(JAVA_SLEEPYCAT)/persist/impl/ObjectArrayFormat.java \ + $(JAVA_SLEEPYCAT)/persist/impl/PersistCatalog.java \ + $(JAVA_SLEEPYCAT)/persist/impl/PersistComparator.java \ + $(JAVA_SLEEPYCAT)/persist/impl/PersistEntityBinding.java \ + $(JAVA_SLEEPYCAT)/persist/impl/PersistKeyAssigner.java \ + $(JAVA_SLEEPYCAT)/persist/impl/PersistKeyBinding.java \ + $(JAVA_SLEEPYCAT)/persist/impl/PersistKeyCreator.java \ + $(JAVA_SLEEPYCAT)/persist/impl/PrimitiveArrayFormat.java \ + $(JAVA_SLEEPYCAT)/persist/impl/ProxiedFormat.java \ + $(JAVA_SLEEPYCAT)/persist/impl/RawAbstractInput.java \ + $(JAVA_SLEEPYCAT)/persist/impl/RawAccessor.java \ + $(JAVA_SLEEPYCAT)/persist/impl/RawArrayInput.java \ + $(JAVA_SLEEPYCAT)/persist/impl/RawComplexInput.java \ + $(JAVA_SLEEPYCAT)/persist/impl/RawSingleInput.java \ + $(JAVA_SLEEPYCAT)/persist/impl/ReadOnlyCatalog.java \ + $(JAVA_SLEEPYCAT)/persist/impl/Reader.java \ + $(JAVA_SLEEPYCAT)/persist/impl/RecordInput.java \ + $(JAVA_SLEEPYCAT)/persist/impl/RecordOutput.java \ + $(JAVA_SLEEPYCAT)/persist/impl/ReflectionAccessor.java \ + $(JAVA_SLEEPYCAT)/persist/impl/RefreshException.java \ + $(JAVA_SLEEPYCAT)/persist/impl/SimpleCatalog.java \ + $(JAVA_SLEEPYCAT)/persist/impl/SimpleFormat.java \ + $(JAVA_SLEEPYCAT)/persist/impl/Store.java \ + $(JAVA_SLEEPYCAT)/persist/impl/StoredModel.java \ + $(JAVA_SLEEPYCAT)/persist/impl/VisitedObjects.java \ + $(JAVA_SLEEPYCAT)/persist/impl/WidenerInput.java \ + $(JAVA_SLEEPYCAT)/persist/model/AnnotationModel.java \ + $(JAVA_SLEEPYCAT)/persist/model/BytecodeEnhancer.java \ + $(JAVA_SLEEPYCAT)/persist/model/ClassEnhancer.java \ + $(JAVA_SLEEPYCAT)/persist/model/ClassMetadata.java \ + $(JAVA_SLEEPYCAT)/persist/model/DeleteAction.java \ + $(JAVA_SLEEPYCAT)/persist/model/Entity.java \ + $(JAVA_SLEEPYCAT)/persist/model/EntityMetadata.java \ + $(JAVA_SLEEPYCAT)/persist/model/EntityModel.java \ + $(JAVA_SLEEPYCAT)/persist/model/FieldMetadata.java \ + $(JAVA_SLEEPYCAT)/persist/model/KeyField.java \ + $(JAVA_SLEEPYCAT)/persist/model/ModelInternal.java \ + $(JAVA_SLEEPYCAT)/persist/model/NotPersistent.java \ + $(JAVA_SLEEPYCAT)/persist/model/NotTransient.java \ + $(JAVA_SLEEPYCAT)/persist/model/Persistent.java \ + $(JAVA_SLEEPYCAT)/persist/model/PersistentProxy.java \ + $(JAVA_SLEEPYCAT)/persist/model/PrimaryKey.java \ + $(JAVA_SLEEPYCAT)/persist/model/PrimaryKeyMetadata.java \ + $(JAVA_SLEEPYCAT)/persist/model/Relationship.java \ + $(JAVA_SLEEPYCAT)/persist/model/SecondaryKey.java \ + $(JAVA_SLEEPYCAT)/persist/model/SecondaryKeyMetadata.java \ + $(JAVA_SLEEPYCAT)/persist/raw/RawField.java \ + $(JAVA_SLEEPYCAT)/persist/raw/RawObject.java \ + $(JAVA_SLEEPYCAT)/persist/raw/RawStore.java \ + $(JAVA_SLEEPYCAT)/persist/raw/RawType.java \ + $(JAVA_SLEEPYCAT)/util/ErrorBuffer.java \ + $(JAVA_SLEEPYCAT)/util/ExceptionUnwrapper.java \ + $(JAVA_SLEEPYCAT)/util/ExceptionWrapper.java \ + $(JAVA_SLEEPYCAT)/util/FastInputStream.java \ + $(JAVA_SLEEPYCAT)/util/FastOutputStream.java \ + $(JAVA_SLEEPYCAT)/util/IOExceptionWrapper.java \ + $(JAVA_SLEEPYCAT)/util/PackedInteger.java \ + $(JAVA_SLEEPYCAT)/util/RuntimeExceptionWrapper.java \ + $(JAVA_SLEEPYCAT)/util/UtfOps.java \ + $(JAVA_SLEEPYCAT)/util/keyrange/KeyRange.java \ + $(JAVA_SLEEPYCAT)/util/keyrange/KeyRangeException.java \ + $(JAVA_SLEEPYCAT)/util/keyrange/RangeCursor.java + +JAVA_EXSRCS=\ + $(JAVA_EXDIR)/collections/access/AccessExample.java \ + $(JAVA_EXDIR)/collections/hello/HelloDatabaseWorld.java \ + $(JAVA_EXDIR)/collections/ship/basic/PartData.java \ + $(JAVA_EXDIR)/collections/ship/basic/PartKey.java \ + $(JAVA_EXDIR)/collections/ship/basic/Sample.java \ + $(JAVA_EXDIR)/collections/ship/basic/SampleDatabase.java \ + $(JAVA_EXDIR)/collections/ship/basic/SampleViews.java \ + $(JAVA_EXDIR)/collections/ship/basic/ShipmentData.java \ + $(JAVA_EXDIR)/collections/ship/basic/ShipmentKey.java \ + $(JAVA_EXDIR)/collections/ship/basic/SupplierData.java \ + $(JAVA_EXDIR)/collections/ship/basic/SupplierKey.java \ + $(JAVA_EXDIR)/collections/ship/basic/Weight.java \ + $(JAVA_EXDIR)/collections/ship/entity/Part.java \ + $(JAVA_EXDIR)/collections/ship/entity/PartData.java \ + $(JAVA_EXDIR)/collections/ship/entity/PartKey.java \ + $(JAVA_EXDIR)/collections/ship/entity/Sample.java \ + $(JAVA_EXDIR)/collections/ship/entity/SampleDatabase.java \ + $(JAVA_EXDIR)/collections/ship/entity/SampleViews.java \ + $(JAVA_EXDIR)/collections/ship/entity/Shipment.java \ + $(JAVA_EXDIR)/collections/ship/entity/ShipmentData.java \ + $(JAVA_EXDIR)/collections/ship/entity/ShipmentKey.java \ + $(JAVA_EXDIR)/collections/ship/entity/Supplier.java \ + $(JAVA_EXDIR)/collections/ship/entity/SupplierData.java \ + $(JAVA_EXDIR)/collections/ship/entity/SupplierKey.java \ + $(JAVA_EXDIR)/collections/ship/entity/Weight.java \ + $(JAVA_EXDIR)/collections/ship/factory/Part.java \ + $(JAVA_EXDIR)/collections/ship/factory/PartKey.java \ + $(JAVA_EXDIR)/collections/ship/factory/Sample.java \ + $(JAVA_EXDIR)/collections/ship/factory/SampleDatabase.java \ + $(JAVA_EXDIR)/collections/ship/factory/SampleViews.java \ + $(JAVA_EXDIR)/collections/ship/factory/Shipment.java \ + $(JAVA_EXDIR)/collections/ship/factory/ShipmentKey.java \ + $(JAVA_EXDIR)/collections/ship/factory/Supplier.java \ + $(JAVA_EXDIR)/collections/ship/factory/SupplierKey.java \ + $(JAVA_EXDIR)/collections/ship/factory/Weight.java \ + $(JAVA_EXDIR)/collections/ship/index/PartData.java \ + $(JAVA_EXDIR)/collections/ship/index/PartKey.java \ + $(JAVA_EXDIR)/collections/ship/index/Sample.java \ + $(JAVA_EXDIR)/collections/ship/index/SampleDatabase.java \ + $(JAVA_EXDIR)/collections/ship/index/SampleViews.java \ + $(JAVA_EXDIR)/collections/ship/index/ShipmentData.java \ + $(JAVA_EXDIR)/collections/ship/index/ShipmentKey.java \ + $(JAVA_EXDIR)/collections/ship/index/SupplierData.java \ + $(JAVA_EXDIR)/collections/ship/index/SupplierKey.java \ + $(JAVA_EXDIR)/collections/ship/index/Weight.java \ + $(JAVA_EXDIR)/collections/ship/marshal/MarshalledEnt.java \ + $(JAVA_EXDIR)/collections/ship/marshal/MarshalledKey.java \ + $(JAVA_EXDIR)/collections/ship/marshal/Part.java \ + $(JAVA_EXDIR)/collections/ship/marshal/PartKey.java \ + $(JAVA_EXDIR)/collections/ship/marshal/Sample.java \ + $(JAVA_EXDIR)/collections/ship/marshal/SampleDatabase.java \ + $(JAVA_EXDIR)/collections/ship/marshal/SampleViews.java \ + $(JAVA_EXDIR)/collections/ship/marshal/Shipment.java \ + $(JAVA_EXDIR)/collections/ship/marshal/ShipmentKey.java \ + $(JAVA_EXDIR)/collections/ship/marshal/Supplier.java \ + $(JAVA_EXDIR)/collections/ship/marshal/SupplierKey.java \ + $(JAVA_EXDIR)/collections/ship/marshal/Weight.java \ + $(JAVA_EXDIR)/collections/ship/sentity/Part.java \ + $(JAVA_EXDIR)/collections/ship/sentity/PartKey.java \ + $(JAVA_EXDIR)/collections/ship/sentity/Sample.java \ + $(JAVA_EXDIR)/collections/ship/sentity/SampleDatabase.java \ + $(JAVA_EXDIR)/collections/ship/sentity/SampleViews.java \ + $(JAVA_EXDIR)/collections/ship/sentity/Shipment.java \ + $(JAVA_EXDIR)/collections/ship/sentity/ShipmentKey.java \ + $(JAVA_EXDIR)/collections/ship/sentity/Supplier.java \ + $(JAVA_EXDIR)/collections/ship/sentity/SupplierKey.java \ + $(JAVA_EXDIR)/collections/ship/sentity/Weight.java \ + $(JAVA_EXDIR)/collections/ship/tuple/Part.java \ + $(JAVA_EXDIR)/collections/ship/tuple/PartData.java \ + $(JAVA_EXDIR)/collections/ship/tuple/PartKey.java \ + $(JAVA_EXDIR)/collections/ship/tuple/Sample.java \ + $(JAVA_EXDIR)/collections/ship/tuple/SampleDatabase.java \ + $(JAVA_EXDIR)/collections/ship/tuple/SampleViews.java \ + $(JAVA_EXDIR)/collections/ship/tuple/Shipment.java \ + $(JAVA_EXDIR)/collections/ship/tuple/ShipmentData.java \ + $(JAVA_EXDIR)/collections/ship/tuple/ShipmentKey.java \ + $(JAVA_EXDIR)/collections/ship/tuple/Supplier.java \ + $(JAVA_EXDIR)/collections/ship/tuple/SupplierData.java \ + $(JAVA_EXDIR)/collections/ship/tuple/SupplierKey.java \ + $(JAVA_EXDIR)/collections/ship/tuple/Weight.java \ + $(JAVA_EXDIR)/db/AccessExample.java \ + $(JAVA_EXDIR)/db/BtRecExample.java \ + $(JAVA_EXDIR)/db/BulkAccessExample.java \ + $(JAVA_EXDIR)/db/BulkAccessNIOExample.java \ + $(JAVA_EXDIR)/db/BulkExample.java \ + $(JAVA_EXDIR)/db/EnvExample.java \ + $(JAVA_EXDIR)/db/GettingStarted/ExampleDatabaseLoad.java \ + $(JAVA_EXDIR)/db/GettingStarted/ExampleDatabaseRead.java \ + $(JAVA_EXDIR)/db/GettingStarted/Inventory.java \ + $(JAVA_EXDIR)/db/GettingStarted/InventoryBinding.java \ + $(JAVA_EXDIR)/db/GettingStarted/ItemNameKeyCreator.java \ + $(JAVA_EXDIR)/db/GettingStarted/MyDbs.java \ + $(JAVA_EXDIR)/db/GettingStarted/Vendor.java \ + $(JAVA_EXDIR)/db/LockExample.java \ + $(JAVA_EXDIR)/db/SequenceExample.java \ + $(JAVA_EXDIR)/db/TpcbExample.java \ + $(JAVA_EXDIR)/db/repquote/RepConfig.java \ + $(JAVA_EXDIR)/db/repquote/RepQuoteEnvironment.java \ + $(JAVA_EXDIR)/db/repquote/RepQuoteExample.java \ + $(JAVA_EXDIR)/db/repquote/RepRemoteHost.java \ + $(JAVA_EXDIR)/db/repquote_gsg/RepConfig.java \ + $(JAVA_EXDIR)/db/repquote_gsg/RepQuoteEnvironment.java \ + $(JAVA_EXDIR)/db/repquote_gsg/RepQuoteExampleGSG.java \ + $(JAVA_EXDIR)/db/repquote_gsg/SimpleConfig.java \ + $(JAVA_EXDIR)/db/repquote_gsg/SimpleTxn.java \ + $(JAVA_EXDIR)/db/txn/DBWriter.java \ + $(JAVA_EXDIR)/db/txn/PayloadData.java \ + $(JAVA_EXDIR)/db/txn/TxnGuide.java \ + $(JAVA_EXDIR)/db/txn/TxnGuideInMemory.java \ + $(JAVA_EXDIR)/persist/CustomKeyOrderExample.java \ + $(JAVA_EXDIR)/persist/DplDump.java \ + $(JAVA_EXDIR)/persist/EventExample.java \ + $(JAVA_EXDIR)/persist/EventExampleDPL.java \ + $(JAVA_EXDIR)/persist/PersonExample.java \ + $(JAVA_EXDIR)/persist/gettingStarted/SimpleDA.java \ + $(JAVA_EXDIR)/persist/gettingStarted/SimpleEntityClass.java \ + $(JAVA_EXDIR)/persist/gettingStarted/SimpleStoreGet.java \ + $(JAVA_EXDIR)/persist/gettingStarted/SimpleStorePut.java \ + $(JAVA_EXDIR)/persist/txn/PayloadDataEntity.java \ + $(JAVA_EXDIR)/persist/txn/StoreWriter.java \ + $(JAVA_EXDIR)/persist/txn/TxnGuideDPL.java + +SQL_OBJS=\ + sqlite3@o@ $(C_OBJS) + +STL_OBJS=\ + dbstl_container@o@ dbstl_resource_manager@o@ + +TCL_OBJS=\ + tcl_compat@o@ tcl_db@o@ tcl_db_pkg@o@ tcl_dbcursor@o@ tcl_env@o@ \ + tcl_internal@o@ tcl_lock@o@ tcl_log@o@ tcl_mp@o@ tcl_mutex@o@ \ + tcl_rep@o@ tcl_seq@o@ tcl_txn@o@ tcl_util@o@ + +TEST_MICRO_OBJS=\ + b_curalloc@o@ b_curwalk@o@ b_del@o@ b_get@o@ b_inmem@o@ b_latch@o@ \ + b_load@o@ b_open@o@ b_put@o@ b_recover@o@ b_txn@o@ b_txn_write@o@ \ + b_uname@o@ b_util@o@ b_workload@o@ test_micro@o@ util_arg@o@ + +UTIL_PROGS=\ + @ADDITIONAL_PROGS@ \ + db_archive db_checkpoint db_deadlock db_dump \ + db_hotbackup db_load db_log_verify db_printlog db_recover \ + db_replicate db_stat db_tuner db_upgrade db_verify + +################################################## +# List of files installed into the library directory. +################################################## +LIB_INSTALL_FILE_LIST=\ + $(libdb) \ + $(libso) \ + $(libso_default) \ + $(libso_major) \ + $(libdb_version) \ + $(libcxx) \ + $(libxso) \ + $(libxso_default) \ + $(libxso_major) \ + $(libcxx_version) \ + $(libsql) \ + $(libsqlso) \ + $(libsqlso_default) \ + $(libsqlso_major) \ + $(libsql_version) \ + $(libstl) \ + $(libstlso) \ + $(libstlso_default) \ + $(libstlso_major) \ + $(libstl_version) \ + $(libtcl) \ + $(libtso) \ + $(libtso_default) \ + $(libtso_major) \ + $(libtcl_version) \ + $(libjso) \ + $(libjso_default) \ + $(libjso_g) \ + $(libjso_major) \ + $(libjso_static) \ + $(libj_exjarfile) \ + $(libj_jarfile) \ + @INSTALL_LIBS@ @INSTALL_LIBS_EXTRA@ + +################################################## +# Note: "all" must be the first target in the Makefile. +################################################## +all: @BUILD_TARGET@ + +install: all @INSTALL_TARGET@ + +################################################## +# Library and standard utilities build. +################################################## +library_build: @INSTALL_LIBS@ @ADDITIONAL_LANG@ $(UTIL_PROGS) + +# Static C library named libdb.a. +$(libdb): $(DEF_LIB) + +# Real static C library. +$(libdb_version): $(C_OBJS) + $(AR) cr $@ $(C_OBJS) + $(RANLIB) $@ + $(RM) $(libdb) + $(LN) -s $(libdb_version) $(libdb) + +# Shared C library. +$(libso_target): $(C_OBJS) + $(SOLINK) $(SOFLAGS) $(LDFLAGS) -o $@ $(C_OBJS) \ + $(LIBCSO_LIBS) + $(RM) $(libdb) + $(LN) -s .libs/$(libdb_version) $(libdb) + +# Static C++ library named libdb_cxx.a. +$(libcxx): $(DEF_LIB_CXX) + +# Real static C++ library. +$(libcxx_version): $(CXX_OBJS) $(C_OBJS) + $(AR) cr $@ $(CXX_OBJS) $(C_OBJS) + $(RANLIB) $@ + $(RM) $(libcxx) + $(LN) -s $(libcxx_version) $(libcxx) + +# Shared C++ library. +$(libxso_target): $(CXX_OBJS) $(C_OBJS) + $(XSOLINK) $(SOFLAGS) $(LDFLAGS) \ + -o $@ $(CXX_OBJS) $(C_OBJS) $(LIBXSO_LIBS) + $(RM) $(libcxx) + $(LN) -s .libs/$(libcxx_version) $(libcxx) + +# Static SQL library named libdb_sql.a. +$(libsql): $(DEF_LIB_SQL) + +# Static SQL library. +$(libsql_version): $(SQL_OBJS) + $(AR) cr $@ $(SQL_OBJS) + $(RANLIB) $@ + $(RM) $(libsql) + $(LN) -s $(libsql_version) $(libsql) + +# Shared SQL library. +$(libsqlso_target): $(SQL_OBJS) + $(SOLINK) $(SOFLAGS) $(LDFLAGS) -o $@ $(SQL_OBJS) $(LIBCSO_LIBS) + $(RM) $(libsql) + $(LN) -s .libs/$(libsql_version) $(libsql) + +dbsql: shell@o@ $(DEF_LIB_SQL) + $(CCLINK) -o $@ $(LDFLAGS) shell@o@ $(DEF_LIB_SQL) @SQL_LIBS@ $(LIBS) + $(POSTLINK) $@ + +jdbc: $(DEF_LIB) + @(test -d jdbc && cd jdbc && PWD='.' $(MAKE)) + +sql-test: $(DEF_LIB) + @(cd sql && $(MAKE) LTLINK_EXTRAS="../$(DEF_LIB) $(LIBS)" testfixture@EXEEXT@) + +# SQL API header file to be installed +# Some configurations of Solaris make don't handle $< as an explicit dependency +# so duplicate the name in the rule. +dbsql.h: $(langdir)/sql/generated/sqlite3.h + $(CP) $(langdir)/sql/generated/sqlite3.h $@ + +# SQLite drop-in replacements +$(libsqlite): $(SQL_OBJS) + $(AR) cr $@ $(SQL_OBJS) + $(RANLIB) $@ + +$(libsqliteso_target): $(SQL_OBJS) + $(SOLINK) $(SOFLAGS) $(LDFLAGS) -version-info "8:6:8" -o $@ \ + $(SQL_OBJS) $(LIBCSO_LIBS) + +sqlite3: shell@o@ $(DEF_LIB_SQLITE) + $(CCLINK) -o $@ $(LDFLAGS) shell@o@ $(DEF_LIB_SQLITE) @SQL_LIBS@ $(LIBS) + $(POSTLINK) $@ + +# Static STL library named libdb_stl.a. +$(libstl): $(DEF_LIB_STL) + +# Real static STL library. +$(libstl_version): $(STL_OBJS) $(CXX_OBJS) $(C_OBJS) + $(AR) cr $@ $(STL_OBJS) $(CXX_OBJS) $(C_OBJS) + $(RANLIB) $@ + $(RM) $(libstl) + $(LN) -s $(libstl_version) $(libstl) + +# Shared STL library. +$(libstlso_target): $(STL_OBJS) $(CXX_OBJS) $(C_OBJS) + $(XSOLINK) $(SOFLAGS) $(LDFLAGS) -o $@ $(STL_OBJS) \ + $(LIBSTLSO_LIBS) $(CXX_OBJS) $(C_OBJS) $(LIBXSO_LIBS) + $(RM) $(libstl) + $(LN) -s .libs/$(libstl_version) $(libstl) + +# Shared Java library. +$(libjso_target): $(JAVA_OBJS) $(C_OBJS) + $(SOLINK) -shrext @JMODSUFFIX@ $(SOFLAGS) $(LDFLAGS) \ + -o $@ $(JAVA_OBJS) $(C_OBJS) $(LIBJSO_LIBS) + +# Static Tcl library +$(libtcl): $(DEF_LIB_TCL) + +# Real static Tcl library. +$(libtcl_version): $(TCL_OBJS) $(C_OBJS) + $(AR) cr $@ $(TCL_OBJS) $(C_OBJS) + $(RANLIB) $@ + $(RM) $(libtcl) + $(LN) -s $(libtcl_version) $(libtcl) + +# Shared Tcl library. +$(libtso_target): $(TCL_OBJS) $(C_OBJS) + $(SOLINK) @LIBTSO_MODULE@ $(SOFLAGS) $(LDFLAGS) \ + -o $@ $(TCL_OBJS) $(C_OBJS) $(LIBTSO_LIBS) + $(RM) $(libtcl) + $(LN) -s .libs/$(libtcl_version) $(libtcl) + +################################################## +# Creating individual dependencies and actions for building class +# files is possible, but it is very messy and error prone. +################################################## +java: $(libj_jarfile) $(libj_exjarfile) + +$(libj_jarfile): $(JAVA_DBSRCS) + @test -d $(JAVA_CLASSTOP) || \ + ($(MKDIR) -p $(JAVA_CLASSTOP) && \ + $(CHMOD) $(dmode) $(JAVA_CLASSTOP)) + $(JAVAC) -d $(JAVA_CLASSTOP) $(JAVACFLAGS) $(JAVA_DBSRCS) + $(JAVA) -classpath $(JAVA_CLASSTOP) \ + com.sleepycat.persist.model.ClassEnhancer $(JAVA_CLASSTOP) + cd $(JAVA_CLASSTOP) && \ + $(JAR) cfm ../$(libj_jarfile) ../$(JAVA_MANIFEST) ./com/sleepycat + +$(libj_exjarfile): $(libj_jarfile) $(JAVA_EXSRCS) + @test -d $(JAVA_EXCLASSTOP) || \ + ($(MKDIR) -p $(JAVA_EXCLASSTOP) && \ + $(CHMOD) $(dmode) $(JAVA_EXCLASSTOP)) + $(JAVAC) -classpath $(libj_jarfile) -d $(JAVA_EXCLASSTOP) \ + $(JAVACFLAGS) $(JAVA_EXSRCS) + cd $(JAVA_EXCLASSTOP) && $(JAR) cf ../$(libj_exjarfile) . + +################################################## +# Utilities +################################################## +db_archive: db_archive@o@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_archive@o@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +db_checkpoint: db_checkpoint@o@ util_log@o@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_checkpoint@o@ util_log@o@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +db_deadlock: db_deadlock@o@ util_log@o@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_deadlock@o@ util_log@o@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +db_dump: db_dump@o@ util_cache@o@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_dump@o@ util_cache@o@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +db_dump185: db_dump185@o@ @REPLACEMENT_OBJS@ + $(CCLINK) -o $@ $(LDFLAGS) db_dump185@o@ @REPLACEMENT_OBJS@ $(DB185LIB) + $(POSTLINK) $@ + +db_hotbackup: db_hotbackup@o@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_hotbackup@o@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +db_load: db_load@o@ util_cache@o@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_load@o@ util_cache@o@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +db_printlog: db_printlog@o@ @PRINTLOG_OBJS@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_printlog@o@ @PRINTLOG_OBJS@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +db_recover: db_recover@o@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_recover@o@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +db_replicate: db_replicate@o@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_replicate@o@ util_log@o@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +DBSQL_CODEGEN_OBJS=\ + db_sql_codegen@o@ parse@o@ preparser@o@ parsefuncs@o@ tokenize@o@ \ + sqlprintf@o@ buildpt@o@ utils@o@ generate@o@ generate_test@o@ \ + generation_utils@o@ generate_verification@o@ hint_comment@o@ + +db_sql_codegen: $(DBSQL_CODEGEN_OBJS) $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) $(DBSQL_CODEGEN_OBJS) $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +db_stat: db_stat@o@ util_cache@o@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_stat@o@ util_cache@o@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +db_tuner: db_tuner@o@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_tuner@o@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +db_upgrade: db_upgrade@o@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_upgrade@o@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +db_verify: db_verify@o@ util_cache@o@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_verify@o@ util_cache@o@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +db_log_verify: db_log_verify@o@ util_cache@o@ util_sig@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + db_log_verify@o@ util_cache@o@ util_sig@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ +################################################## +# Library and standard utilities install. +################################################## +library_install: install_setup +library_install: install_include install_lib install_utilities install_docs + +uninstall: uninstall_include uninstall_lib uninstall_utilities uninstall_docs + +install_setup: + @test -d $(DESTDIR)$(prefix) || \ + ($(MKDIR) -p $(DESTDIR)$(prefix) && \ + $(CHMOD) $(dmode) $(DESTDIR)$(prefix)) + +INCDOT= db.h db_cxx.h @ADDITIONAL_INCS@ +install_include: $(INCDOT) + @echo "Installing DB include files: $(DESTDIR)$(includedir) ..." + @test -d $(DESTDIR)$(includedir) || \ + ($(MKDIR) -p $(DESTDIR)$(includedir) && \ + $(CHMOD) $(dmode) $(DESTDIR)$(includedir)) + @for f in $(INCDOT); do \ + basef=`echo $$f | $(SED) 's,.*/,,'` ; \ + $(RM) $(DESTDIR)$(includedir)/$$basef ; \ + $(CP) -p $$f $(DESTDIR)$(includedir) ; \ + $(CHMOD) $(fmode) $(DESTDIR)$(includedir)/$$basef ; \ + done + +uninstall_include: + @for f in $(INCDOT); do \ + basef=`echo $$f | $(SED) 's,.*/,,'` ; \ + $(RM) $(DESTDIR)$(includedir)/$$basef ; \ + done + +install_lib: + @echo "Installing DB library: $(DESTDIR)$(libdir) ..." + @test -d $(DESTDIR)$(libdir) || \ + ($(MKDIR) -p $(DESTDIR)$(libdir) && \ + $(CHMOD) $(dmode) $(DESTDIR)$(libdir)) + @cd $(DESTDIR)$(libdir) && $(RM) $(LIB_INSTALL_FILE_LIST) + @$(INSTALLER) @INSTALL_LIBS@ $(DESTDIR)$(libdir) + @(cd $(DESTDIR)$(libdir) && \ + test -f $(libso) && $(LN) -s $(libso) $(libso_default); \ + test -f $(libso) && $(LN) -s $(libso) $(libso_major); \ + test -f $(libxso) && $(LN) -s $(libxso) $(libxso_default); \ + test -f $(libxso) && $(LN) -s $(libxso) $(libxso_major); \ + test -f $(libsqlso) && $(LN) -s $(libsqlso) $(libsqlso_default); \ + test -f $(libsqlso) && $(LN) -s $(libsqlso) $(libsqlso_major); \ + test -f $(libstlso) && $(LN) -s $(libstlso) $(libstlso_default); \ + test -f $(libstlso) && $(LN) -s $(libstlso) $(libstlso_major); \ + test -f $(libtso) && $(LN) -s $(libtso) $(libtso_default); \ + test -f $(libtso) && $(LN) -s $(libtso) $(libtso_major); \ + test -f $(libjso) && $(LN) -s $(libjso) $(libjso_default); \ + test -f $(libjso) && $(LN) -s $(libjso) $(libjso_major); \ + test -f $(libjso) && $(LN) -s $(libjso) $(libjso_g)) || exit 0 + @(test -f $(libj_jarfile) && \ + $(CP) $(libj_jarfile) $(DESTDIR)$(libdir) && \ + $(CHMOD) $(fmode) $(DESTDIR)$(libdir)/$(libj_jarfile)) || exit 0 + @(test -d jdbc && cd jdbc && make install && cd -) || exit 0 + +uninstall_lib: + @cd $(DESTDIR)$(libdir) && $(RM) $(LIB_INSTALL_FILE_LIST) + +install_utilities: + @echo "Installing DB utilities: $(DESTDIR)$(bindir) ..." + @test -d $(DESTDIR)$(bindir) || \ + ($(MKDIR) -p $(DESTDIR)$(bindir) && \ + $(CHMOD) $(dmode) $(DESTDIR)$(bindir)) + @for i in $(UTIL_PROGS); do \ + test -f $$i.exe && i=$$i.exe; \ + e=`echo $$i | $(SED) '$(transform)'`; \ + $(RM) $(DESTDIR)$(bindir)/$$e; \ + $(INSTALLER) $$i $(DESTDIR)$(bindir)/$$e; \ + $(STRIP) $(DESTDIR)$(bindir)/$$e; \ + $(CHMOD) $(emode) $(DESTDIR)$(bindir)/$$e; \ + done + +uninstall_utilities: + @(cd $(DESTDIR)$(bindir); for i in $(UTIL_PROGS); do \ + i=`echo $$i | $(SED) '$(transform)'`; \ + test -f $$i.exe && i=$$i.exe; \ + $(RM) $$i; \ + done) + +# We install csharp docs even on UNIX so we don't have a +# broken link on the landing page. +DOCLIST=api_reference articles bdb-sql collections csharp \ + gsg gsg_db_rep gsg_txn index.html installation java \ + license porting programmer_reference upgrading + +install_docs: + @echo "Installing documentation: $(DESTDIR)$(docdir) ..." + @test -d $(DESTDIR)$(docdir) || \ + ($(MKDIR) -p $(DESTDIR)$(docdir) && \ + $(CHMOD) $(dmode) $(DESTDIR)$(docdir)) + @cd $(DESTDIR)$(docdir) && $(RM) -r $(DOCLIST) + @cd $(topdir)/docs && $(CP) -pr $(DOCLIST) $(DESTDIR)$(docdir)/ + +uninstall_docs: + @cd $(DESTDIR)$(docdir) && $(RM) -r $(DOCLIST) + +################################################## +# Remaining standard Makefile targets. +################################################## +CLEAN_LIST=\ + StlTxnGuide TxnGuide TxnGuideInMemory berkeley_db_cxxsvc \ + berkeley_db_svc cutest db_dump185 db_perf db_repsite db_reptest dbs \ + ex_access ex_apprec ex_btrec ex_bulk ex_dbclient ex_env ex_heap ex_lock \ + ex_mpool ex_rep_base ex_rep_chan ex_rep_gsg_repmgr ex_rep_gsg_simple \ + ex_rep_mgr ex_sequence ex_stream ex_thread ex_tpcb \ + example_database_load example_database_read excxx_access \ + excxx_btrec excxx_env excxx_example_database_load \ + excxx_example_database_read excxx_lock excxx_mpool \ + excxx_repquote excxx_repquote_gsg_repmgr excxx_repquote_gsg_simple \ + excxx_sequence excxx_tpcb ex_sql_binding ex_sql_fts3 ex_sql_index \ + ex_sql_load ex_sql_multi_thread ex_sql_query ex_sql_rtree \ + ex_sql_savepoint ex_sql_statement ex_sql_transaction exstl_access \ + exstl_advancedfeatures exstl_repquote exstl_tpcb txn_guide test_dbstl \ + test_dbstl_ms_examples test_dbstl_stlport txn_guide_inmemory + +mostly-clean clean: + $(RM) -r $(C_OBJS) $(CUTEST_OBJS) + $(RM) -r $(CXX_OBJS) $(JAVA_OBJS) $(SQL_OBJS) $(SQL_OBJS) $(STL_OBJS) + $(RM) -r $(TCL_OBJS) $(UTIL_PROGS) *.exe $(CLEAN_LIST) + $(RM) -r $(JAVA_CLASSTOP) $(JAVA_EXCLASSTOP) + $(RM) -r $(DB_STL_TEST_OBJS) $(TEST_MICRO_OBJS) + $(RM) -r tags *@o@ *.o *.o.lock *.lo core *.core core.* + $(RM) -r ALL.OUT.* PARALLEL_TESTDIR.* + $(RM) -r RUN_LOG RUNQUEUE TESTDIR TESTDIR.A TEST.LIST + $(RM) -r logtrack_seen.db test_micro test_mutex .libs + $(RM) -r $(LIB_INSTALL_FILE_LIST) + @subdir_cmd@ + +REALCLEAN_LIST=\ + Makefile clib_port.h confdefs.h config.cache config.log config.status \ + configure.lineno db.h db185_int.h db_185.h db_config.h db_cxx.h \ + db_int.h db_int_def.h db_provider.c db_provider.h dbstl_common.h \ + db_server.h db_server_clnt.c db_server_svc.c db_server_xdr.c \ + gen_db_server.c include.tcl dbsql.h $(builddir)/sql $(builddir)/jdbc + +distclean maintainer-clean realclean: clean + $(RM) -r $(REALCLEAN_LIST) + $(RM) -r libtool + +check depend dvi info obj TAGS: + @echo "make: $@ target not available" + +dist rpm rpmbuild: + @echo "make: $@ target not available" && exit 1 + +################################################## +# Testers, benchmarks. +################################################## +dbs@o@: $(testdir)/server/dbs.c + $(CC) $(CFLAGS) $? +dbs_am@o@: $(testdir)/server/dbs_am.c + $(CC) $(CFLAGS) $? +dbs_checkpoint@o@: $(testdir)/server/dbs_checkpoint.c + $(CC) $(CFLAGS) $? +dbs_debug@o@: $(testdir)/server/dbs_debug.c + $(CC) $(CFLAGS) $? +dbs_handles@o@: $(testdir)/server/dbs_handles.c + $(CC) $(CFLAGS) $? +dbs_log@o@: $(testdir)/server/dbs_log.c + $(CC) $(CFLAGS) $? +dbs_qam@o@: $(testdir)/server/dbs_qam.c + $(CC) $(CFLAGS) $? +dbs_spawn@o@: $(testdir)/server/dbs_spawn.c + $(CC) $(CFLAGS) $? +dbs_trickle@o@: $(testdir)/server/dbs_trickle.c + $(CC) $(CFLAGS) $? +dbs_util@o@: $(testdir)/server/dbs_util.c + $(CC) $(CFLAGS) $? +dbs_yield@o@: $(testdir)/server/dbs_yield.c + $(CC) $(CFLAGS) $? +DBS_OBJS=\ + dbs@o@ dbs_am@o@ dbs_checkpoint@o@ dbs_debug@o@ dbs_handles@o@ \ + dbs_log@o@ dbs_qam@o@ dbs_spawn@o@ dbs_trickle@o@ dbs_util@o@ \ + dbs_yield@o@ +dbs: $(DBS_OBJS) $(DEF_LIB) + $(CCLINK) -o $@ \ + $(LDFLAGS) $(DBS_OBJS) $(DEF_LIB) $(TEST_LIBS) $(LIBS) + $(POSTLINK) $@ + +db_perf@o@: $(testdir)/perf/db_perf.c + $(CC) $(CFLAGS) $? +perf_checkpoint@o@: $(testdir)/perf/perf_checkpoint.c + $(CC) $(CFLAGS) $? +perf_config@o@: $(testdir)/perf/perf_config.c + $(CC) $(CFLAGS) $? +perf_dbs@o@: $(testdir)/perf/perf_dbs.c + $(CC) $(CFLAGS) $? +perf_dead@o@: $(testdir)/perf/perf_dead.c + $(CC) $(CFLAGS) $? +perf_debug@o@: $(testdir)/perf/perf_debug.c + $(CC) $(CFLAGS) $? +perf_file@o@: $(testdir)/perf/perf_file.c + $(CC) $(CFLAGS) $? +perf_key@o@: $(testdir)/perf/perf_key.c + $(CC) $(CFLAGS) $? +perf_log@o@: $(testdir)/perf/perf_log.c + $(CC) $(CFLAGS) $? +perf_misc@o@: $(testdir)/perf/perf_misc.c + $(CC) $(CFLAGS) $? +perf_op@o@: $(testdir)/perf/perf_op.c + $(CC) $(CFLAGS) $? +perf_parse@o@: $(testdir)/perf/perf_parse.c + $(CC) $(CFLAGS) $? +perf_rand@o@: $(testdir)/perf/perf_rand.c + $(CC) $(CFLAGS) $? +perf_spawn@o@: $(testdir)/perf/perf_spawn.c + $(CC) $(CFLAGS) $? +perf_stat@o@: $(testdir)/perf/perf_stat.c + $(CC) $(CFLAGS) $? +perf_sync@o@: $(testdir)/perf/perf_sync.c + $(CC) $(CFLAGS) $? +perf_thread@o@: $(testdir)/perf/perf_thread.c + $(CC) $(CFLAGS) $? +perf_trickle@o@: $(testdir)/perf/perf_trickle.c + $(CC) $(CFLAGS) $? +perf_txn@o@: $(testdir)/perf/perf_txn.c + $(CC) $(CFLAGS) $? +perf_util@o@: $(testdir)/perf/perf_util.c + $(CC) $(CFLAGS) $? +perf_vx@o@: $(testdir)/perf/perf_vx.c + $(CC) $(CFLAGS) $? +DBPERF_OBJS=\ + db_perf@o@ perf_checkpoint@o@ perf_config@o@ perf_dbs@o@ \ + perf_dead@o@ perf_debug@o@ perf_file@o@ perf_key@o@ perf_log@o@ \ + perf_misc@o@ perf_op@o@ perf_parse@o@ perf_rand@o@ perf_spawn@o@ \ + perf_stat@o@ perf_sync@o@ perf_thread@o@ perf_trickle@o@ \ + perf_txn@o@ perf_util@o@ perf_vx@o@ util_sig@o@ +db_perf: $(DBPERF_OBJS) $(DEF_LIB) + $(CCLINK) -o $@ \ + $(LDFLAGS) $(DBPERF_OBJS) $(DEF_LIB) $(TEST_LIBS) $(LIBS) + $(POSTLINK) $@ + +# C unit test suite. +CUTEST_FLAGS= $(CFLAGS) -I$(testdir)/c/cutest -I$(testdir)/c/suites -I$(testdir)/c/common + +CuTest@o@: $(testdir)/c/cutest/CuTest.c + $(CC) $(CUTEST_FLAGS) $? +CuTests@o@: $(testdir)/c/cutest/CuTests.c + $(CC) $(CUTEST_FLAGS) $? +Runner@o@: $(testdir)/c/cutest/Runner.c + $(CC) $(CUTEST_FLAGS) $? +TestDbTuner@o@: $(testdir)/c/suites/TestDbTuner.c + $(CC) $(CUTEST_FLAGS) $? +TestChannel@o@: $(testdir)/c/suites/TestChannel.c + $(CC) $(CUTEST_FLAGS) $? +TestEncryption@o@: $(testdir)/c/suites/TestEncryption.c + $(CC) $(CUTEST_FLAGS) $? +TestEnvConfig@o@: $(testdir)/c/suites/TestEnvConfig.c + $(CC) $(CUTEST_FLAGS) $? +TestEnvMethod@o@: $(testdir)/c/suites/TestEnvMethod.c + $(CC) $(CUTEST_FLAGS) $? +TestKeyExistErrorReturn@o@: $(testdir)/c/suites/TestKeyExistErrorReturn.c + $(CC) $(CUTEST_FLAGS) $? +TestPartial@o@: $(testdir)/c/suites/TestPartial.c + $(CC) $(CUTEST_FLAGS) $? +TestQueue@o@: $(testdir)/c/suites/TestQueue.c + $(CC) $(CUTEST_FLAGS) $? + +cutest: $(CUTEST_OBJS) $(DEF_LIB) db_tuner.lo + $(CCLINK) -o $@ \ + $(LDFLAGS) $(CUTEST_OBJS) $(DEF_LIB) $(TEST_LIBS) $(LIBS) + $(POSTLINK) $@ + +db_repsite@o@: $(testdir)/repmgr/db_repsite.cpp + $(CXX) $(CXXFLAGS) $? +DBREPSITE_OBJS=db_repsite@o@ +db_repsite: $(DBREPSITE_OBJS) $(DEF_LIB_CXX) + $(CXXLINK) -o $@ \ + $(LDFLAGS) $(DBREPSITE_OBJS) $(DEF_LIB_CXX) $(TEST_LIBS) $(LIBS) + $(POSTLINK) $@ + +db_reptest@o@: $(testdir)/repmgr/db_reptest.c + $(CC) $(CFLAGS) $? +reptest_am@o@: $(testdir)/repmgr/reptest_am.c + $(CC) $(CFLAGS) $? +reptest_handles@o@: $(testdir)/repmgr/reptest_handles.c + $(CC) $(CFLAGS) $? +reptest_spawn@o@: $(testdir)/repmgr/reptest_spawn.c + $(CC) $(CFLAGS) $? +reptest_util@o@: $(testdir)/repmgr/reptest_util.c + $(CC) $(CFLAGS) $? +DBREPTEST_OBJS=\ + db_reptest@o@ reptest_am@o@ reptest_handles@o@ \ + reptest_spawn@o@ reptest_util@o@ + +db_reptest: $(DBREPTEST_OBJS) $(DEF_LIB) + $(CCLINK) -o $@ \ + $(LDFLAGS) $(DBREPTEST_OBJS) $(DEF_LIB) $(TEST_LIBS) $(LIBS) + $(POSTLINK) $@ + +test_dbstl@o@: $(testdir)/stl/base/test_dbstl.cpp + $(CXX) $(STLFLAGS) $? +test_util@o@: $(testdir)/stl/base/test_util.cpp + $(CXX) $(STLFLAGS) $? + +stl_test@o@: $(testdir)/stl/stlport/stl_test.cpp + $(CXX) $(STLFLAGS) $? +utility@o@: $(testdir)/stl/stlport/utility.cpp + $(CXX) $(STLFLAGS) $? +stlport_test_builder@o@: $(testdir)/stl/stlport/stlport_test_builder.cpp + $(CXX) $(STLFLAGS) $? + +ms_stl_main@o@: $(testdir)/stl/ms_examples/ms_stl_main.cpp + $(CXX) $(STLFLAGS) -DGCC_BAD_AUTO_CONVERSION $? +ms_stl_common@o@: $(testdir)/stl/ms_examples/ms_stl_common.cpp + $(CXX) $(STLFLAGS) -DGCC_BAD_AUTO_CONVERSION $? +test_builder@o@: $(testdir)/stl/ms_examples/test_builder.cpp + $(CXX) $(STLFLAGS) -DGCC_BAD_AUTO_CONVERSION $? + +DB_STL_TEST_OBJS=test_dbstl@o@ test_util@o@ +DB_STL_STLPORT_TEST_OBJS=stl_test@o@ stlport_test_builder@o@ utility@o@ +DB_STL_MS_TEST_OBJS=ms_stl_main@o@ ms_stl_common@o@ \ + test_builder@o@ + +test_dbstl: $(DB_STL_TEST_OBJS) $(DEF_LIB_CXX) $(DEF_LIB_STL) + $(CXXLINK) -o $@ $(LDFLAGS) $(DB_STL_TEST_OBJS) \ + $(DEF_LIB_CXX) $(DEF_LIB_STL) $(LIBS) $(LIBSTLSO_LIBS) + $(POSTLINK) $@ + +test_dbstl_ms_examples: $(DB_STL_MS_TEST_OBJS) $(DEF_LIB_CXX) $(DEF_LIB_STL) + $(CXXLINK) -o $@ $(LDFLAGS) $(DB_STL_MS_TEST_OBJS) \ + $(DEF_LIB_CXX) $(DEF_LIB_STL) $(LIBS) $(LIBSTLSO_LIBS) + $(POSTLINK) $@ +test_dbstl_stlport: $(DB_STL_STLPORT_TEST_OBJS) $(DEF_LIB_CXX) $(DEF_LIB_STL) + $(CXXLINK) -o $@ $(LDFLAGS) $(DB_STL_STLPORT_TEST_OBJS) \ + $(DEF_LIB_CXX) $(DEF_LIB_STL) $(LIBS) $(LIBSTLSO_LIBS) + $(POSTLINK) $@ + +b_curalloc@o@: $(testdir)/micro/source/b_curalloc.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_curwalk@o@: $(testdir)/micro/source/b_curwalk.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_del@o@: $(testdir)/micro/source/b_del.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_get@o@: $(testdir)/micro/source/b_get.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_inmem@o@: $(testdir)/micro/source/b_inmem.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_latch@o@: $(testdir)/micro/source/b_latch.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_load@o@: $(testdir)/micro/source/b_load.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_open@o@: $(testdir)/micro/source/b_open.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_put@o@: $(testdir)/micro/source/b_put.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_recover@o@: $(testdir)/micro/source/b_recover.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_txn@o@: $(testdir)/micro/source/b_txn.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_txn_write@o@: $(testdir)/micro/source/b_txn_write.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_uname@o@: $(testdir)/micro/source/b_uname.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_util@o@: $(testdir)/micro/source/b_util.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +b_workload@o@: $(testdir)/micro/source/b_workload.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +test_micro@o@: $(testdir)/micro/source/test_micro.c + $(CC) $(CFLAGS) -I$(testdir)/micro/source $? +test_micro: $(TEST_MICRO_OBJS) $(DEF_LIB) + $(CCLINK) -o $@ \ + $(LDFLAGS) $(TEST_MICRO_OBJS) $(DEF_LIB) $(TEST_LIBS) $(LIBS) + $(POSTLINK) $@ + +test_mutex@o@: $(srcdir)/mutex/test_mutex.c + $(CC) $(CFLAGS) $? +test_mutex: test_mutex@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) test_mutex@o@ $(DEF_LIB) $(TEST_LIBS) $(LIBS) + $(POSTLINK) $@ + +################################################## +# Targets for example programs. +################################################## +examples_c: ex_access ex_apprec ex_btrec ex_bulk ex_env ex_heap ex_lock \ + ex_mpool ex_rep_base ex_rep_chan ex_rep_gsg_repmgr ex_rep_gsg_simple \ + ex_rep_mgr ex_sequence ex_stream ex_thread ex_tpcb \ + example_database_load example_database_read txn_guide txn_guide_inmemory + +examples_cxx: TxnGuide TxnGuideInMemory excxx_access \ + excxx_example_database_load excxx_example_database_read \ + excxx_lock excxx_mpool excxx_repquote excxx_repquote_gsg_repmgr \ + excxx_repquote_gsg_simple excxx_sequence excxx_tpcb + +examples_stl: StlTxnGuide exstl_access exstl_advancedfeatures exstl_repquote \ + exstl_tpcb + +examples_sql: ex_sql_binding ex_sql_fts3 ex_sql_index ex_sql_load \ + ex_sql_multi_thread ex_sql_query ex_sql_rtree ex_sql_savepoint \ + ex_sql_statement ex_sql_transaction + +examples: examples_c examples_cxx examples_stl examples_sql + +################################################## +# Example programs for C. +################################################## +ex_access@o@: $(exampledir)/c/ex_access.c + $(CC) $(CFLAGS) $? +ex_access: ex_access@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) ex_access@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +ex_apprec@o@: $(exampledir)/c/ex_apprec/ex_apprec.c + $(CC) $(CFLAGS) $? +ex_apprec_auto@o@: $(exampledir)/c/ex_apprec/ex_apprec_auto.c + $(CC) $(CFLAGS) $? +ex_apprec_autop@o@: $(exampledir)/c/ex_apprec/ex_apprec_autop.c + $(CC) $(CFLAGS) $? +ex_apprec_rec@o@: $(exampledir)/c/ex_apprec/ex_apprec_rec.c + $(CC) $(CFLAGS) $? +EX_APPREC_OBJS=\ + ex_apprec@o@ ex_apprec_auto@o@ ex_apprec_autop@o@ ex_apprec_rec@o@ +ex_apprec: $(EX_APPREC_OBJS) $(DEF_LIB) + $(CCLINK) -o $@ \ + $(LDFLAGS) $(EX_APPREC_OBJS) $(DEF_LIB) $(TEST_LIBS) $(LIBS) + +ex_btrec@o@: $(exampledir)/c/ex_btrec.c + $(CC) $(CFLAGS) $? +ex_btrec: ex_btrec@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) ex_btrec@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +ex_bulk@o@: $(exampledir)/c/ex_bulk.c + $(CC) $(CFLAGS) $? +ex_bulk: ex_bulk@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) ex_bulk@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +ex_dbclient@o@: $(exampledir)/c/ex_dbclient.c + $(CC) $(CFLAGS) $? +ex_dbclient: ex_dbclient@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) ex_dbclient@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +ex_env@o@: $(exampledir)/c/ex_env.c + $(CC) $(CFLAGS) $? +ex_env: ex_env@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) ex_env@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +ex_heap@o@: $(exampledir)/c/ex_heap.c + $(CC) $(CFLAGS) $? +ex_heap: ex_heap@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) ex_heap@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +ex_lock@o@: $(exampledir)/c/ex_lock.c + $(CC) $(CFLAGS) $? +ex_lock: ex_lock@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) ex_lock@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +ex_mpool@o@: $(exampledir)/c/ex_mpool.c + $(CC) $(CFLAGS) $? +ex_mpool: ex_mpool@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) ex_mpool@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +rep_base@o@: $(exampledir)/c/ex_rep/base/rep_base.c + $(CC) $(CFLAGS) $? +rep_common@o@: $(exampledir)/c/ex_rep/common/rep_common.c + $(CC) $(CFLAGS) $? +rep_msg@o@: $(exampledir)/c/ex_rep/base/rep_msg.c + $(CC) $(CFLAGS) $? +rep_net@o@: $(exampledir)/c/ex_rep/base/rep_net.c + $(CC) $(CFLAGS) $? +EX_REP_BASE_OBJS=\ + rep_base@o@ rep_common@o@ rep_msg@o@ rep_net@o@ +ex_rep_base: $(EX_REP_BASE_OBJS) $(DEF_LIB) + $(CCLINK) -o $@ \ + $(LDFLAGS) $(EX_REP_BASE_OBJS) $(DEF_LIB) $(TEST_LIBS) $(LIBS) + $(POSTLINK) $@ + +rep_chan@o@: $(exampledir)/c/ex_rep_chan/rep_chan.c + $(CC) $(CFLAGS) $? +rep_chan_util@o@: $(exampledir)/c/ex_rep_chan/rep_chan_util.c + $(CC) $(CFLAGS) $? +EX_REP_CHAN_OBJS=\ + rep_chan@o@ rep_chan_util@o@ +ex_rep_chan: $(EX_REP_CHAN_OBJS) $(DEF_LIB) + $(CCLINK) -o $@ \ + $(LDFLAGS) $(EX_REP_CHAN_OBJS) $(DEF_LIB) $(TEST_LIBS) $(LIBS) + $(POSTLINK) $@ + +simple_txn@o@: $(exampledir)/c/ex_rep_gsg/simple_txn.c + $(CC) $(CFLAGS) $? +ex_rep_gsg_simple: simple_txn@o@ $(DEF_LIB) + $(CCLINK) -o $@ \ + $(LDFLAGS) simple_txn@o@ $(DEF_LIB) $(TEST_LIBS) $(LIBS) + $(POSTLINK) $@ + +rep_mgr_gsg@o@: $(exampledir)/c/ex_rep_gsg/rep_mgr_gsg.c + $(CC) $(CFLAGS) $? +ex_rep_gsg_repmgr: rep_mgr_gsg@o@ $(DEF_LIB) + $(CCLINK) -o $@ \ + $(LDFLAGS) rep_mgr_gsg@o@ $(DEF_LIB) $(TEST_LIBS) $(LIBS) + $(POSTLINK) $@ + +rep_mgr@o@: $(exampledir)/c/ex_rep/mgr/rep_mgr.c + $(CC) $(CFLAGS) $? +EX_REP_MGR_OBJS=\ + rep_common@o@ rep_mgr@o@ +ex_rep_mgr: $(EX_REP_MGR_OBJS) $(DEF_LIB) + $(CCLINK) -o $@ \ + $(LDFLAGS) $(EX_REP_MGR_OBJS) $(DEF_LIB) $(TEST_LIBS) $(LIBS) + $(POSTLINK) $@ + +ex_sequence@o@: $(exampledir)/c/ex_sequence.c + $(CC) $(CFLAGS) $? +ex_sequence: ex_sequence@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) ex_sequence@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +ex_stream@o@: $(exampledir)/c/ex_stream.c + $(CC) $(CFLAGS) $? +ex_stream: ex_stream@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) ex_stream@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +ex_thread@o@: $(exampledir)/c/ex_thread.c + $(CC) $(CFLAGS) $? +ex_thread: ex_thread@o@ $(DEF_LIB) + $(CCLINK) -o $@ \ + $(LDFLAGS) ex_thread@o@ $(DEF_LIB) $(TEST_LIBS) $(LIBS) + $(POSTLINK) $@ + +ex_tpcb@o@: $(exampledir)/c/ex_tpcb.c + $(CC) $(CFLAGS) $? +ex_tpcb: ex_tpcb@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) ex_tpcb@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +gettingstarted_common@o@: \ + $(exampledir)/c/getting_started/gettingstarted_common.c + $(CC) -I$(exampledir)/c/getting_started $(CFLAGS) $? +example_database_load@o@: \ + $(exampledir)/c/getting_started/example_database_load.c + $(CC) $(CFLAGS) $? +example_database_read@o@: \ + $(exampledir)/c/getting_started/example_database_read.c + $(CC) $(CFLAGS) $? +example_database_load: example_database_load@o@ gettingstarted_common@o@ \ + $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + example_database_load@o@ gettingstarted_common@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ +example_database_read: example_database_read@o@ gettingstarted_common@o@ \ + $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) \ + example_database_read@o@ gettingstarted_common@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +txn_guide_inmemory@o@: $(exampledir)/c/txn_guide/txn_guide_inmemory.c + $(CC) $(CFLAGS) $? +txn_guide_inmemory: txn_guide_inmemory@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) txn_guide_inmemory@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +txn_guide@o@: $(exampledir)/c/txn_guide/txn_guide.c + $(CC) $(CFLAGS) $? +txn_guide: txn_guide@o@ $(DEF_LIB) + $(CCLINK) -o $@ $(LDFLAGS) txn_guide@o@ $(DEF_LIB) $(LIBS) + $(POSTLINK) $@ + +################################################## +# Example programs for C++. +################################################## +AccessExample@o@: $(exampledir)/cxx/AccessExample.cpp + $(CXX) $(CXXFLAGS) $? +excxx_access: AccessExample@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) AccessExample@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ + +BtRecExample@o@: $(exampledir)/cxx/BtRecExample.cpp + $(CXX) $(CXXFLAGS) $? +excxx_btrec: BtRecExample@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) BtRecExample@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ + +EnvExample@o@: $(exampledir)/cxx/EnvExample.cpp + $(CXX) $(CXXFLAGS) $? +excxx_env: EnvExample@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) EnvExample@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ + +LockExample@o@: $(exampledir)/cxx/LockExample.cpp + $(CXX) $(CXXFLAGS) $? +excxx_lock: LockExample@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) LockExample@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ + +MpoolExample@o@: $(exampledir)/cxx/MpoolExample.cpp + $(CXX) $(CXXFLAGS) $? +excxx_mpool: MpoolExample@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) MpoolExample@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ + +RepQuoteExample@o@: $(exampledir)/cxx/excxx_repquote/RepQuoteExample.cpp + $(CXX) -I$(exampledir)/cxx/excxx_repquote $(CXXFLAGS) $? +RepConfigInfo@o@: $(exampledir)/cxx/excxx_repquote/RepConfigInfo.cpp + $(CXX) -I$(exampledir)/cxx/excxx_repquote $(CXXFLAGS) $? +excxx_repquote: RepQuoteExample@o@ RepConfigInfo@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) \ + RepQuoteExample@o@ RepConfigInfo@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ + +RepMgrGSG@o@: $(exampledir)/cxx/excxx_repquote_gsg/RepMgrGSG.cpp + $(CXX) -I$(exampledir)/cxx/excxx_repquote_gsg $(CXXFLAGS) $? +excxx_repquote_gsg_repmgr: RepMgrGSG@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) RepMgrGSG@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ + +SimpleTxn@o@: $(exampledir)/cxx/excxx_repquote_gsg/SimpleTxn.cpp + $(CXX) -I$(exampledir)/cxx/excxx_repquote_gsg $(CXXFLAGS) $? +excxx_repquote_gsg_simple: SimpleTxn@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) SimpleTxn@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ + +SequenceExample@o@: $(exampledir)/cxx/SequenceExample.cpp + $(CXX) $(CXXFLAGS) $? +excxx_sequence: SequenceExample@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) SequenceExample@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ + +TpcbExample@o@: $(exampledir)/cxx/TpcbExample.cpp + $(CXX) $(CXXFLAGS) $? +excxx_tpcb: TpcbExample@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) TpcbExample@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ + +excxx_example_database_load@o@: \ + $(exampledir)/cxx/getting_started/excxx_example_database_load.cpp + $(CXX) -I$(exampledir)/cxx/getting_started $(CXXFLAGS) $? +excxx_example_database_read@o@: \ + $(exampledir)/cxx/getting_started/excxx_example_database_read.cpp + $(CXX) -I$(exampledir)/cxx/getting_started $(CXXFLAGS) $? +MyDb@o@: $(exampledir)/cxx/getting_started/MyDb.cpp + $(CXX) -I$(exampledir)/cxx/getting_started $(CXXFLAGS) $? +excxx_example_database_load: \ + excxx_example_database_load@o@ MyDb@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) \ + excxx_example_database_load@o@ MyDb@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ +excxx_example_database_read: \ + excxx_example_database_read@o@ MyDb@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) \ + excxx_example_database_read@o@ MyDb@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ + +TxnGuideInMemory@o@: $(exampledir)/cxx/txn_guide/TxnGuideInMemory.cpp + $(CXX) $(CXXFLAGS) $? +TxnGuideInMemory: TxnGuideInMemory@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) TxnGuideInMemory@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ + +TxnGuide@o@: $(exampledir)/cxx/txn_guide/TxnGuide.cpp + $(CXX) $(CXXFLAGS) $? +TxnGuide: TxnGuide@o@ $(DEF_LIB_CXX) + $(CXXLINK) -o $@ $(LDFLAGS) TxnGuide@o@ $(DEF_LIB_CXX) $(LIBS) + $(POSTLINK) $@ + +################################################## +# Example programs for STL. +################################################## +StlAccessExample@o@: $(exampledir)/stl/StlAccessExample.cpp + $(CXX) $(STLFLAGS) $? +exstl_access: StlAccessExample@o@ $(DEF_LIB_STL) + $(CXXLINK) -o $@ $(LDFLAGS) StlAccessExample@o@ $(DEF_LIB_STL) $(LIBS) + $(POSTLINK) $@ + +StlAdvancedFeatures@o@: $(exampledir)/stl/StlAdvancedFeatures.cpp + $(CXX) -I$(exampledir)/stl $(STLFLAGS) $? +exstl_advancedfeatures: StlAdvancedFeatures@o@ $(DEF_LIB_STL) + $(CXXLINK) -o $@ $(LDFLAGS) StlAdvancedFeatures@o@ $(DEF_LIB_STL) $(LIBS) + $(POSTLINK) $@ + +StlRepQuoteExample@o@: $(exampledir)/stl/repquote/StlRepQuoteExample.cpp + $(CXX) -I$(exampledir)/stl/repquote $(STLFLAGS) $? +StlRepConfigInfo@o@: $(exampledir)/stl/repquote/StlRepConfigInfo.cpp + $(CXX) -I$(exampledir)/stl/repquote $(STLFLAGS) $? +exstl_repquote: StlRepQuoteExample@o@ StlRepConfigInfo@o@ $(DEF_LIB_STL) + $(CXXLINK) -o $@ $(LDFLAGS) \ + StlRepQuoteExample@o@ StlRepConfigInfo@o@ $(DEF_LIB_STL) $(LIBS) + $(POSTLINK) $@ + +StlTpcbExample@o@: $(exampledir)/stl/StlTpcbExample.cpp + $(CXX) $(STLFLAGS) $? +exstl_tpcb: StlTpcbExample@o@ $(DEF_LIB_STL) + $(CXXLINK) -o $@ $(LDFLAGS) StlTpcbExample@o@ $(DEF_LIB_STL) $(LIBS) + $(POSTLINK) $@ + +StlTransactionGuideExample@o@: $(exampledir)/stl/StlTransactionGuideExample.cpp + $(CXX) $(STLFLAGS) $? +StlTxnGuide: StlTransactionGuideExample@o@ $(DEF_LIB_STL) + $(CXXLINK) -o $@ $(LDFLAGS) StlTransactionGuideExample@o@ $(DEF_LIB_STL) $(LIBS) + $(POSTLINK) $@ + +################################################## +# Example programs for SQL. +################################################## +ex_sql_binding: ex_sql_binding@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) + $(CCLINK) -o $@ $(LDFLAGS) ex_sql_binding@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) @SQL_LIBS@ $(LIBS) + $(POSTLINK) $@ + +ex_sql_fts3: ex_sql_fts3@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) + $(CCLINK) -o $@ $(LDFLAGS) ex_sql_fts3@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) @SQL_LIBS@ $(LIBS) + $(POSTLINK) $@ + +ex_sql_index: ex_sql_index@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) + $(CCLINK) -o $@ $(LDFLAGS) ex_sql_index@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) @SQL_LIBS@ $(LIBS) + $(POSTLINK) $@ + +ex_sql_load: ex_sql_load@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) + $(CCLINK) -o $@ $(LDFLAGS) ex_sql_load@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) @SQL_LIBS@ $(LIBS) + $(POSTLINK) $@ + +ex_sql_multi_thread: ex_sql_multi_thread@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) + $(CCLINK) -o $@ $(LDFLAGS) ex_sql_multi_thread@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) @SQL_LIBS@ $(LIBS) + $(POSTLINK) $@ + +ex_sql_query: ex_sql_query@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) + $(CCLINK) -o $@ $(LDFLAGS) ex_sql_query@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) @SQL_LIBS@ $(LIBS) + $(POSTLINK) $@ + +ex_sql_rtree: ex_sql_rtree@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) + $(CCLINK) -o $@ $(LDFLAGS) ex_sql_rtree@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) @SQL_LIBS@ $(LIBS) + $(POSTLINK) $@ + +ex_sql_savepoint: ex_sql_savepoint@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) + $(CCLINK) -o $@ $(LDFLAGS) ex_sql_savepoint@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) @SQL_LIBS@ $(LIBS) + $(POSTLINK) $@ + +ex_sql_statement: ex_sql_statement@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) + $(CCLINK) -o $@ $(LDFLAGS) ex_sql_statement@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) @SQL_LIBS@ $(LIBS) + $(POSTLINK) $@ + +ex_sql_transaction: ex_sql_transaction@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) + $(CCLINK) -o $@ $(LDFLAGS) ex_sql_transaction@o@ ex_sql_utils@o@ $(DEF_LIB_SQL) @SQL_LIBS@ $(LIBS) + $(POSTLINK) $@ + +ex_sql_binding@o@: $(exampledir)/sql/c/ex_sql_binding.c + $(CC) $(CFLAGS) $(SQLFLAGS) $? +ex_sql_fts3@o@: $(exampledir)/sql/c/ex_sql_fts3.c + $(CC) $(CFLAGS) $(SQLFLAGS) $? +ex_sql_index@o@: $(exampledir)/sql/c/ex_sql_index.c + $(CC) $(CFLAGS) $(SQLFLAGS) $? +ex_sql_load@o@: $(exampledir)/sql/c/ex_sql_load.c + $(CC) $(CFLAGS) $(SQLFLAGS) $? +ex_sql_multi_thread@o@: $(exampledir)/sql/c/ex_sql_multi_thread.c + $(CC) $(CFLAGS) $(SQLFLAGS) $? +ex_sql_utils@o@: $(exampledir)/sql/c/ex_sql_utils.c + $(CC) $(CFLAGS) $(SQLFLAGS) $? +ex_sql_query@o@: $(exampledir)/sql/c/ex_sql_query.c + $(CC) $(CFLAGS) $(SQLFLAGS) $? +ex_sql_rtree@o@: $(exampledir)/sql/c/ex_sql_rtree.c + $(CC) $(CFLAGS) $(SQLFLAGS) $? +ex_sql_savepoint@o@: $(exampledir)/sql/c/ex_sql_savepoint.c + $(CC) $(CFLAGS) $(SQLFLAGS) $? +ex_sql_statement@o@: $(exampledir)/sql/c/ex_sql_statement.c + $(CC) $(CFLAGS) $(SQLFLAGS) $? +ex_sql_transaction@o@: $(exampledir)/sql/c/ex_sql_transaction.c + $(CC) $(CFLAGS) $(SQLFLAGS) $? + +################################################## +# C API build rules. +################################################## +aes_method@o@: $(srcdir)/crypto/aes_method.c + $(CC) $(CFLAGS) $? +bt_compare@o@: $(srcdir)/btree/bt_compare.c + $(CC) $(CFLAGS) $? +bt_compress@o@: $(srcdir)/btree/bt_compress.c + $(CC) $(CFLAGS) $? +bt_conv@o@: $(srcdir)/btree/bt_conv.c + $(CC) $(CFLAGS) $? +bt_curadj@o@: $(srcdir)/btree/bt_curadj.c + $(CC) $(CFLAGS) $? +bt_cursor@o@: $(srcdir)/btree/bt_cursor.c + $(CC) $(CFLAGS) $? +bt_delete@o@: $(srcdir)/btree/bt_delete.c + $(CC) $(CFLAGS) $? +bt_method@o@: $(srcdir)/btree/bt_method.c + $(CC) $(CFLAGS) $? +bt_open@o@: $(srcdir)/btree/bt_open.c + $(CC) $(CFLAGS) $? +bt_put@o@: $(srcdir)/btree/bt_put.c + $(CC) $(CFLAGS) $? +bt_rec@o@: $(srcdir)/btree/bt_rec.c + $(CC) $(CFLAGS) $? +bt_reclaim@o@: $(srcdir)/btree/bt_reclaim.c + $(CC) $(CFLAGS) $? +bt_recno@o@: $(srcdir)/btree/bt_recno.c + $(CC) $(CFLAGS) $? +bt_rsearch@o@: $(srcdir)/btree/bt_rsearch.c + $(CC) $(CFLAGS) $? +bt_search@o@: $(srcdir)/btree/bt_search.c + $(CC) $(CFLAGS) $? +bt_split@o@: $(srcdir)/btree/bt_split.c + $(CC) $(CFLAGS) $? +bt_stat@o@: $(srcdir)/btree/bt_stat.c + $(CC) $(CFLAGS) $? +bt_compact@o@: $(srcdir)/btree/bt_compact.c + $(CC) $(CFLAGS) $? +bt_upgrade@o@: $(srcdir)/btree/bt_upgrade.c + $(CC) $(CFLAGS) $? +bt_verify@o@: $(srcdir)/btree/bt_verify.c + $(CC) $(CFLAGS) $? +btree_auto@o@: $(srcdir)/btree/btree_auto.c + $(CC) $(CFLAGS) $? +btree_autop@o@: $(srcdir)/btree/btree_autop.c + $(CC) $(CFLAGS) $? +clock@o@: $(srcdir)/common/clock.c + $(CC) $(CFLAGS) $? +crdel_auto@o@: $(srcdir)/db/crdel_auto.c + $(CC) $(CFLAGS) $? +crdel_autop@o@: $(srcdir)/db/crdel_autop.c + $(CC) $(CFLAGS) $? +crdel_rec@o@: $(srcdir)/db/crdel_rec.c + $(CC) $(CFLAGS) $? +crypto@o@: $(srcdir)/crypto/crypto.c + $(CC) $(CFLAGS) $? +crypto_stub@o@: $(srcdir)/common/crypto_stub.c + $(CC) $(CFLAGS) $? +db185@o@: $(langdir)/db185/db185.c + $(CC) $(CFLAGS) $? +db@o@: $(srcdir)/db/db.c + $(CC) $(CFLAGS) $? +db_am@o@: $(srcdir)/db/db_am.c + $(CC) $(CFLAGS) $? +db_auto@o@: $(srcdir)/db/db_auto.c + $(CC) $(CFLAGS) $? +db_autop@o@: $(srcdir)/db/db_autop.c + $(CC) $(CFLAGS) $? +db_byteorder@o@: $(srcdir)/common/db_byteorder.c + $(CC) $(CFLAGS) $? +db_cam@o@: $(srcdir)/db/db_cam.c + $(CC) $(CFLAGS) $? +db_cds@o@: $(srcdir)/db/db_cds.c + $(CC) $(CFLAGS) $? +db_compact@o@: $(srcdir)/db/db_compact.c + $(CC) $(CFLAGS) $? +db_compint@o@: $(srcdir)/common/db_compint.c + $(CC) $(CFLAGS) $? +db_conv@o@: $(srcdir)/db/db_conv.c + $(CC) $(CFLAGS) $? +db_copy@o@: $(srcdir)/db/db_copy.c + $(CC) $(CFLAGS) $? +db_dispatch@o@: $(srcdir)/db/db_dispatch.c + $(CC) $(CFLAGS) $? +db_dup@o@: $(srcdir)/db/db_dup.c + $(CC) $(CFLAGS) $? +db_err@o@: $(srcdir)/common/db_err.c + $(CC) $(CFLAGS) $? +db_getlong@o@: $(srcdir)/common/db_getlong.c + $(CC) $(CFLAGS) $? +db_idspace@o@: $(srcdir)/common/db_idspace.c + $(CC) $(CFLAGS) $? +db_iface@o@: $(srcdir)/db/db_iface.c + $(CC) $(CFLAGS) $? +db_join@o@: $(srcdir)/db/db_join.c + $(CC) $(CFLAGS) $? +db_log2@o@: $(srcdir)/common/db_log2.c + $(CC) $(CFLAGS) $? +db_meta@o@: $(srcdir)/db/db_meta.c + $(CC) $(CFLAGS) $? +db_method@o@: $(srcdir)/db/db_method.c + $(CC) $(CFLAGS) $? +db_open@o@: $(srcdir)/db/db_open.c + $(CC) $(CFLAGS) $? +db_overflow@o@: $(srcdir)/db/db_overflow.c + $(CC) $(CFLAGS) $? +db_ovfl_vrfy@o@: $(srcdir)/db/db_ovfl_vrfy.c + $(CC) $(CFLAGS) $? +db_pr@o@: $(srcdir)/db/db_pr.c + $(CC) $(CFLAGS) $? +db_rec@o@: $(srcdir)/db/db_rec.c + $(CC) $(CFLAGS) $? +db_reclaim@o@: $(srcdir)/db/db_reclaim.c + $(CC) $(CFLAGS) $? +db_rename@o@: $(srcdir)/db/db_rename.c + $(CC) $(CFLAGS) $? +db_remove@o@: $(srcdir)/db/db_remove.c + $(CC) $(CFLAGS) $? +db_ret@o@: $(srcdir)/db/db_ret.c + $(CC) $(CFLAGS) $? +db_setid@o@: $(srcdir)/db/db_setid.c + $(CC) $(CFLAGS) $? +db_setlsn@o@: $(srcdir)/db/db_setlsn.c + $(CC) $(CFLAGS) $? +db_shash@o@: $(srcdir)/common/db_shash.c + $(CC) $(CFLAGS) $? +db_sort_multiple@o@: $(srcdir)/db/db_sort_multiple.c + $(CC) $(CFLAGS) $? +db_stati@o@: $(srcdir)/db/db_stati.c + $(CC) $(CFLAGS) $? +db_truncate@o@: $(srcdir)/db/db_truncate.c + $(CC) $(CFLAGS) $? +db_upg@o@: $(srcdir)/db/db_upg.c + $(CC) $(CFLAGS) $? +db_upg_opd@o@: $(srcdir)/db/db_upg_opd.c + $(CC) $(CFLAGS) $? +db_vrfy@o@: $(srcdir)/db/db_vrfy.c + $(CC) $(CFLAGS) $? +db_vrfyutil@o@: $(srcdir)/db/db_vrfyutil.c + $(CC) $(CFLAGS) $? +db_vrfy_stub@o@: $(srcdir)/db/db_vrfy_stub.c + $(CC) $(CFLAGS) $? +dbm@o@: $(langdir)/dbm/dbm.c + $(CC) $(CFLAGS) $? +dbreg@o@: $(srcdir)/dbreg/dbreg.c + $(CC) $(CFLAGS) $? +dbreg_auto@o@: $(srcdir)/dbreg/dbreg_auto.c + $(CC) $(CFLAGS) $? +dbreg_autop@o@: $(srcdir)/dbreg/dbreg_autop.c + $(CC) $(CFLAGS) $? +dbreg_rec@o@: $(srcdir)/dbreg/dbreg_rec.c + $(CC) $(CFLAGS) $? +dbreg_stat@o@: $(srcdir)/dbreg/dbreg_stat.c + $(CC) $(CFLAGS) $? +dbreg_util@o@: $(srcdir)/dbreg/dbreg_util.c + $(CC) $(CFLAGS) $? +dbt@o@: $(srcdir)/common/dbt.c + $(CC) $(CFLAGS) $? +env_alloc@o@: $(srcdir)/env/env_alloc.c + $(CC) $(CFLAGS) $? +env_config@o@: $(srcdir)/env/env_config.c + $(CC) $(CFLAGS) $? +env_failchk@o@: $(srcdir)/env/env_failchk.c + $(CC) $(CFLAGS) $? +env_file@o@: $(srcdir)/env/env_file.c + $(CC) $(CFLAGS) $? +env_globals@o@: $(srcdir)/env/env_globals.c + $(CC) $(CFLAGS) $? +env_method@o@: $(srcdir)/env/env_method.c + $(CC) $(CFLAGS) $? +env_name@o@: $(srcdir)/env/env_name.c + $(CC) $(CFLAGS) $? +env_open@o@: $(srcdir)/env/env_open.c + $(CC) $(CFLAGS) $? +env_recover@o@: $(srcdir)/env/env_recover.c + $(CC) $(CFLAGS) $? +env_region@o@: $(srcdir)/env/env_region.c + $(CC) $(CFLAGS) $? +env_register@o@: $(srcdir)/env/env_register.c + $(CC) $(CFLAGS) $? +env_sig@o@: $(srcdir)/env/env_sig.c + $(CC) $(CFLAGS) $? +env_stat@o@: $(srcdir)/env/env_stat.c + $(CC) $(CFLAGS) $? +fileops_auto@o@: $(srcdir)/fileops/fileops_auto.c + $(CC) $(CFLAGS) $? +fileops_autop@o@: $(srcdir)/fileops/fileops_autop.c + $(CC) $(CFLAGS) $? +fop_basic@o@: $(srcdir)/fileops/fop_basic.c + $(CC) $(CFLAGS) $? +fop_rec@o@: $(srcdir)/fileops/fop_rec.c + $(CC) $(CFLAGS) $? +fop_util@o@: $(srcdir)/fileops/fop_util.c + $(CC) $(CFLAGS) $? +hash@o@: $(srcdir)/hash/hash.c + $(CC) $(CFLAGS) $? +hash_auto@o@: $(srcdir)/hash/hash_auto.c + $(CC) $(CFLAGS) $? +hash_autop@o@: $(srcdir)/hash/hash_autop.c + $(CC) $(CFLAGS) $? +hash_compact@o@: $(srcdir)/hash/hash_compact.c + $(CC) $(CFLAGS) $? +hash_conv@o@: $(srcdir)/hash/hash_conv.c + $(CC) $(CFLAGS) $? +hash_dup@o@: $(srcdir)/hash/hash_dup.c + $(CC) $(CFLAGS) $? +hash_func@o@: $(srcdir)/hash/hash_func.c + $(CC) $(CFLAGS) $? +hash_meta@o@: $(srcdir)/hash/hash_meta.c + $(CC) $(CFLAGS) $? +hash_method@o@: $(srcdir)/hash/hash_method.c + $(CC) $(CFLAGS) $? +hash_open@o@: $(srcdir)/hash/hash_open.c + $(CC) $(CFLAGS) $? +hash_page@o@: $(srcdir)/hash/hash_page.c + $(CC) $(CFLAGS) $? +hash_rec@o@: $(srcdir)/hash/hash_rec.c + $(CC) $(CFLAGS) $? +hash_reclaim@o@: $(srcdir)/hash/hash_reclaim.c + $(CC) $(CFLAGS) $? +hash_stat@o@: $(srcdir)/hash/hash_stat.c + $(CC) $(CFLAGS) $? +hash_stub@o@: $(srcdir)/hash/hash_stub.c + $(CC) $(CFLAGS) $? +hash_upgrade@o@: $(srcdir)/hash/hash_upgrade.c + $(CC) $(CFLAGS) $? +hash_verify@o@: $(srcdir)/hash/hash_verify.c + $(CC) $(CFLAGS) $? +heap@o@: $(srcdir)/heap/heap.c + $(CC) $(CFLAGS) $? +heap_auto@o@: $(srcdir)/heap/heap_auto.c + $(CC) $(CFLAGS) $? +heap_autop@o@: $(srcdir)/heap/heap_autop.c + $(CC) $(CFLAGS) $? +heap_conv@o@: $(srcdir)/heap/heap_conv.c + $(CC) $(CFLAGS) $? +heap_method@o@: $(srcdir)/heap/heap_method.c + $(CC) $(CFLAGS) $? +heap_open@o@: $(srcdir)/heap/heap_open.c + $(CC) $(CFLAGS) $? +heap_rec@o@: $(srcdir)/heap/heap_rec.c + $(CC) $(CFLAGS) $? +heap_reclaim@o@: $(srcdir)/heap/heap_reclaim.c + $(CC) $(CFLAGS) $? +heap_stat@o@: $(srcdir)/heap/heap_stat.c + $(CC) $(CFLAGS) $? +heap_stub@o@: $(srcdir)/heap/heap_stub.c + $(CC) $(CFLAGS) $? +heap_verify@o@: $(srcdir)/heap/heap_verify.c + $(CC) $(CFLAGS) $? +hmac@o@: $(srcdir)/hmac/hmac.c + $(CC) $(CFLAGS) $? +hsearch@o@: $(langdir)/hsearch/hsearch.c + $(CC) $(CFLAGS) $? +lock@o@: $(srcdir)/lock/lock.c $(srcdir)/lock/lock_alloc.incl + $(CC) $(CFLAGS) $(srcdir)/lock/lock.c +lock_deadlock@o@:$(srcdir)/lock/lock_deadlock.c + $(CC) $(CFLAGS) $? +lock_failchk@o@:$(srcdir)/lock/lock_failchk.c + $(CC) $(CFLAGS) $? +lock_id@o@:$(srcdir)/lock/lock_id.c + $(CC) $(CFLAGS) $? +lock_list@o@:$(srcdir)/lock/lock_list.c + $(CC) $(CFLAGS) $? +lock_method@o@:$(srcdir)/lock/lock_method.c + $(CC) $(CFLAGS) $? +lock_region@o@:$(srcdir)/lock/lock_region.c + $(CC) $(CFLAGS) $? +lock_stat@o@:$(srcdir)/lock/lock_stat.c + $(CC) $(CFLAGS) $? +lock_stub@o@: $(srcdir)/lock/lock_stub.c + $(CC) $(CFLAGS) $? +lock_timer@o@:$(srcdir)/lock/lock_timer.c + $(CC) $(CFLAGS) $? +lock_util@o@:$(srcdir)/lock/lock_util.c + $(CC) $(CFLAGS) $? +log@o@: $(srcdir)/log/log.c + $(CC) $(CFLAGS) $? +log_archive@o@: $(srcdir)/log/log_archive.c + $(CC) $(CFLAGS) $? +log_compare@o@: $(srcdir)/log/log_compare.c + $(CC) $(CFLAGS) $? +log_debug@o@: $(srcdir)/log/log_debug.c + $(CC) $(CFLAGS) $? +log_get@o@: $(srcdir)/log/log_get.c + $(CC) $(CFLAGS) $? +log_method@o@: $(srcdir)/log/log_method.c + $(CC) $(CFLAGS) $? +log_print@o@: $(srcdir)/log/log_print.c + $(CC) $(CFLAGS) $? +log_put@o@: $(srcdir)/log/log_put.c + $(CC) $(CFLAGS) $? +log_stat@o@: $(srcdir)/log/log_stat.c + $(CC) $(CFLAGS) $? +log_verify@o@: $(srcdir)/log/log_verify.c + $(CC) $(CFLAGS) $? +log_verify_auto@o@: $(srcdir)/log/log_verify_auto.c + $(CC) $(CFLAGS) $? +log_verify_int@o@: $(srcdir)/log/log_verify_int.c + $(CC) $(CFLAGS) $? +log_verify_util@o@: $(srcdir)/log/log_verify_util.c + $(CC) $(CFLAGS) $? +log_verify_stub@o@: $(srcdir)/log/log_verify_stub.c + $(CC) $(CFLAGS) $? +db_log_verify@o@: $(utildir)/db_log_verify.c + $(CC) $(CFLAGS) $? +mkpath@o@: $(srcdir)/common/mkpath.c + $(CC) $(CFLAGS) $? +mp_alloc@o@: $(srcdir)/mp/mp_alloc.c + $(CC) $(CFLAGS) $? +mp_bh@o@: $(srcdir)/mp/mp_bh.c + $(CC) $(CFLAGS) $? +mp_fget@o@: $(srcdir)/mp/mp_fget.c + $(CC) $(CFLAGS) $? +mp_fmethod@o@: $(srcdir)/mp/mp_fmethod.c + $(CC) $(CFLAGS) $? +mp_fopen@o@: $(srcdir)/mp/mp_fopen.c + $(CC) $(CFLAGS) $? +mp_fput@o@: $(srcdir)/mp/mp_fput.c + $(CC) $(CFLAGS) $? +mp_fset@o@: $(srcdir)/mp/mp_fset.c + $(CC) $(CFLAGS) $? +mp_method@o@: $(srcdir)/mp/mp_method.c + $(CC) $(CFLAGS) $? +mp_mvcc@o@: $(srcdir)/mp/mp_mvcc.c + $(CC) $(CFLAGS) $? +mp_region@o@: $(srcdir)/mp/mp_region.c + $(CC) $(CFLAGS) $? +mp_register@o@: $(srcdir)/mp/mp_register.c + $(CC) $(CFLAGS) $? +mp_resize@o@: $(srcdir)/mp/mp_resize.c + $(CC) $(CFLAGS) $? +mp_stat@o@: $(srcdir)/mp/mp_stat.c + $(CC) $(CFLAGS) $? +mp_sync@o@: $(srcdir)/mp/mp_sync.c + $(CC) $(CFLAGS) $? +mp_trickle@o@: $(srcdir)/mp/mp_trickle.c + $(CC) $(CFLAGS) $? +mt19937db@o@: $(srcdir)/crypto/mersenne/mt19937db.c + $(CC) $(CFLAGS) $? +mut_alloc@o@: $(srcdir)/mutex/mut_alloc.c + $(CC) $(CFLAGS) $? +mut_failchk@o@: $(srcdir)/mutex/mut_failchk.c + $(CC) $(CFLAGS) $? +mut_fcntl@o@: $(srcdir)/mutex/mut_fcntl.c + $(CC) $(CFLAGS) $? +mut_method@o@: $(srcdir)/mutex/mut_method.c + $(CC) $(CFLAGS) $? +mut_pthread@o@: $(srcdir)/mutex/mut_pthread.c + $(CC) $(CFLAGS) $? +mut_region@o@: $(srcdir)/mutex/mut_region.c + $(CC) $(CFLAGS) $? +mut_stat@o@: $(srcdir)/mutex/mut_stat.c + $(CC) $(CFLAGS) $? +mut_stub@o@: $(srcdir)/mutex/mut_stub.c + $(CC) $(CFLAGS) $? +mut_tas@o@: $(srcdir)/mutex/mut_tas.c + $(CC) $(CFLAGS) $? +mut_win32@o@: $(srcdir)/mutex/mut_win32.c + $(CC) $(CFLAGS) $? +openflags@o@: $(srcdir)/common/openflags.c + $(CC) $(CFLAGS) $? +os_abs@o@: $(srcdir)/@OSDIR@/os_abs.c + $(CC) $(CFLAGS) $? +os_abort@o@: $(srcdir)/os/os_abort.c + $(CC) $(CFLAGS) $? +os_addrinfo@o@: $(srcdir)/os/os_addrinfo.c + $(CC) $(CFLAGS) $? +os_alloc@o@: $(srcdir)/os/os_alloc.c + $(CC) $(CFLAGS) $? +os_clock@o@: $(srcdir)/@OSDIR@/os_clock.c + $(CC) $(CFLAGS) $? +os_config@o@: $(srcdir)/@OSDIR@/os_config.c + $(CC) $(CFLAGS) $? +os_cpu@o@: $(srcdir)/@OSDIR@/os_cpu.c + $(CC) $(CFLAGS) $? +os_ctime@o@: $(srcdir)/os/os_ctime.c + $(CC) $(CFLAGS) $? +os_dir@o@: $(srcdir)/@OSDIR@/os_dir.c + $(CC) $(CFLAGS) $? +os_errno@o@: $(srcdir)/@OSDIR@/os_errno.c + $(CC) $(CFLAGS) $? +os_fid@o@: $(srcdir)/@OSDIR@/os_fid.c + $(CC) $(CFLAGS) $? +os_flock@o@: $(srcdir)/@OSDIR@/os_flock.c + $(CC) $(CFLAGS) $? +os_fsync@o@: $(srcdir)/@OSDIR@/os_fsync.c + $(CC) $(CFLAGS) $? +os_getenv@o@: $(srcdir)/@OSDIR@/os_getenv.c + $(CC) $(CFLAGS) $? +os_handle@o@: $(srcdir)/@OSDIR@/os_handle.c + $(CC) $(CFLAGS) $? +os_map@o@: $(srcdir)/@OSDIR@/os_map.c + $(CC) $(CFLAGS) $? +os_method@o@: $(srcdir)/common/os_method.c + $(CC) $(CFLAGS) $? +os_mkdir@o@: $(srcdir)/@OSDIR@/os_mkdir.c + $(CC) $(CFLAGS) $? +os_open@o@: $(srcdir)/@OSDIR@/os_open.c + $(CC) $(CFLAGS) $? +os_pid@o@: $(srcdir)/os/os_pid.c + $(CC) $(CFLAGS) $? +os_qnx_fsync@o@: $(srcdir)/os_qnx/os_qnx_fsync.c + $(CC) $(CFLAGS) $? +os_qnx_open@o@: $(srcdir)/os_qnx/os_qnx_open.c + $(CC) $(CFLAGS) $? +os_rename@o@: $(srcdir)/@OSDIR@/os_rename.c + $(CC) $(CFLAGS) $? +os_root@o@: $(srcdir)/os/os_root.c + $(CC) $(CFLAGS) $? +os_rpath@o@: $(srcdir)/os/os_rpath.c + $(CC) $(CFLAGS) $? +os_rw@o@: $(srcdir)/@OSDIR@/os_rw.c + $(CC) $(CFLAGS) $? +os_seek@o@: $(srcdir)/@OSDIR@/os_seek.c + $(CC) $(CFLAGS) $? +os_stack@o@: $(srcdir)/os/os_stack.c + $(CC) $(CFLAGS) $? +os_stat@o@: $(srcdir)/@OSDIR@/os_stat.c + $(CC) $(CFLAGS) $? +os_tmpdir@o@: $(srcdir)/os/os_tmpdir.c + $(CC) $(CFLAGS) $? +os_truncate@o@: $(srcdir)/@OSDIR@/os_truncate.c + $(CC) $(CFLAGS) $? +os_uid@o@: $(srcdir)/os/os_uid.c + $(CC) $(CFLAGS) $? +os_unlink@o@: $(srcdir)/@OSDIR@/os_unlink.c + $(CC) $(CFLAGS) $? +os_yield@o@: $(srcdir)/@OSDIR@/os_yield.c + $(CC) $(CFLAGS) $? +partition@o@: $(srcdir)/db/partition.c + $(CC) $(CFLAGS) $? +qam@o@: $(srcdir)/qam/qam.c + $(CC) $(CFLAGS) $? +qam_auto@o@: $(srcdir)/qam/qam_auto.c + $(CC) $(CFLAGS) $? +qam_autop@o@: $(srcdir)/qam/qam_autop.c + $(CC) $(CFLAGS) $? +qam_conv@o@: $(srcdir)/qam/qam_conv.c + $(CC) $(CFLAGS) $? +qam_files@o@: $(srcdir)/qam/qam_files.c + $(CC) $(CFLAGS) $? +qam_method@o@: $(srcdir)/qam/qam_method.c + $(CC) $(CFLAGS) $? +qam_open@o@: $(srcdir)/qam/qam_open.c + $(CC) $(CFLAGS) $? +qam_rec@o@: $(srcdir)/qam/qam_rec.c + $(CC) $(CFLAGS) $? +qam_stat@o@: $(srcdir)/qam/qam_stat.c + $(CC) $(CFLAGS) $? +qam_stub@o@: $(srcdir)/qam/qam_stub.c + $(CC) $(CFLAGS) $? +qam_upgrade@o@: $(srcdir)/qam/qam_upgrade.c + $(CC) $(CFLAGS) $? +qam_verify@o@: $(srcdir)/qam/qam_verify.c + $(CC) $(CFLAGS) $? +rep_automsg@o@: $(srcdir)/rep/rep_automsg.c + $(CC) $(CFLAGS) $? +rep_backup@o@: $(srcdir)/rep/rep_backup.c + $(CC) $(CFLAGS) $? +rep_elect@o@: $(srcdir)/rep/rep_elect.c + $(CC) $(CFLAGS) $? +rep_lease@o@: $(srcdir)/rep/rep_lease.c + $(CC) $(CFLAGS) $? +rep_log@o@: $(srcdir)/rep/rep_log.c + $(CC) $(CFLAGS) $? +rep_method@o@: $(srcdir)/rep/rep_method.c + $(CC) $(CFLAGS) $? +rep_record@o@: $(srcdir)/rep/rep_record.c + $(CC) $(CFLAGS) $? +rep_region@o@: $(srcdir)/rep/rep_region.c + $(CC) $(CFLAGS) $? +rep_stub@o@: $(srcdir)/rep/rep_stub.c + $(CC) $(CFLAGS) $? +rep_stat@o@: $(srcdir)/rep/rep_stat.c + $(CC) $(CFLAGS) $? +rep_util@o@: $(srcdir)/rep/rep_util.c + $(CC) $(CFLAGS) $? +rep_verify@o@: $(srcdir)/rep/rep_verify.c + $(CC) $(CFLAGS) $? +repmgr_auto@o@: $(srcdir)/repmgr/repmgr_auto.c + $(CC) $(CFLAGS) $? +repmgr_automsg@o@: $(srcdir)/repmgr/repmgr_automsg.c + $(CC) $(CFLAGS) $? +repmgr_autop@o@: $(srcdir)/repmgr/repmgr_autop.c + $(CC) $(CFLAGS) $? +repmgr_elect@o@: $(srcdir)/repmgr/repmgr_elect.c + $(CC) $(CFLAGS) $? +repmgr_method@o@: $(srcdir)/repmgr/repmgr_method.c + $(CC) $(CFLAGS) $? +repmgr_msg@o@: $(srcdir)/repmgr/repmgr_msg.c + $(CC) $(CFLAGS) $? +repmgr_net@o@: $(srcdir)/repmgr/repmgr_net.c + $(CC) $(CFLAGS) $? +repmgr_posix@o@: $(srcdir)/repmgr/repmgr_posix.c + $(CC) $(CFLAGS) $? +repmgr_queue@o@: $(srcdir)/repmgr/repmgr_queue.c + $(CC) $(CFLAGS) $? +repmgr_rec@o@: $(srcdir)/repmgr/repmgr_rec.c + $(CC) $(CFLAGS) $? +repmgr_sel@o@: $(srcdir)/repmgr/repmgr_sel.c + $(CC) $(CFLAGS) $? +repmgr_stat@o@: $(srcdir)/repmgr/repmgr_stat.c + $(CC) $(CFLAGS) $? +repmgr_stub@o@: $(srcdir)/repmgr/repmgr_stub.c + $(CC) $(CFLAGS) $? +repmgr_util@o@: $(srcdir)/repmgr/repmgr_util.c + $(CC) $(CFLAGS) $? +rijndael-alg-fst@o@: $(srcdir)/crypto/rijndael/rijndael-alg-fst.c + $(CC) $(CFLAGS) $? +rijndael-api-fst@o@: $(srcdir)/crypto/rijndael/rijndael-api-fst.c + $(CC) $(CFLAGS) $? +seq_stat@o@: $(srcdir)/sequence/seq_stat.c + $(CC) $(CFLAGS) $? +sequence@o@: $(srcdir)/sequence/sequence.c + $(CC) $(CFLAGS) $? +sha1@o@: $(srcdir)/hmac/sha1.c + $(CC) $(CFLAGS) $? +stat_stub@o@: $(srcdir)/common/stat_stub.c + $(CC) $(CFLAGS) $? +txn@o@: $(srcdir)/txn/txn.c + $(CC) $(CFLAGS) $? +txn_auto@o@: $(srcdir)/txn/txn_auto.c + $(CC) $(CFLAGS) $? +txn_autop@o@: $(srcdir)/txn/txn_autop.c + $(CC) $(CFLAGS) $? +txn_chkpt@o@: $(srcdir)/txn/txn_chkpt.c + $(CC) $(CFLAGS) $? +txn_failchk@o@: $(srcdir)/txn/txn_failchk.c + $(CC) $(CFLAGS) $? +txn_method@o@: $(srcdir)/txn/txn_method.c + $(CC) $(CFLAGS) $? +txn_rec@o@: $(srcdir)/txn/txn_rec.c + $(CC) $(CFLAGS) $? +txn_recover@o@: $(srcdir)/txn/txn_recover.c + $(CC) $(CFLAGS) $? +txn_region@o@: $(srcdir)/txn/txn_region.c + $(CC) $(CFLAGS) $? +txn_stat@o@: $(srcdir)/txn/txn_stat.c + $(CC) $(CFLAGS) $? +txn_util@o@: $(srcdir)/txn/txn_util.c + $(CC) $(CFLAGS) $? +util_arg@o@: $(srcdir)/common/util_arg.c + $(CC) $(CFLAGS) $? +util_cache@o@: $(srcdir)/common/util_cache.c + $(CC) $(CFLAGS) $? +util_log@o@: $(srcdir)/common/util_log.c + $(CC) $(CFLAGS) $? +util_sig@o@: $(srcdir)/common/util_sig.c + $(CC) $(CFLAGS) $? +uts4_cc@o@: $(srcdir)/mutex/uts4_cc.s + $(AS) $(ASFLAGS) -o $@ $? +xa@o@: $(srcdir)/xa/xa.c + $(CC) $(CFLAGS) $? +xa_map@o@: $(srcdir)/xa/xa_map.c + $(CC) $(CFLAGS) $? +zerofill@o@: $(srcdir)/common/zerofill.c + $(CC) $(CFLAGS) $? + +################################################## +# C++ API build rules. +################################################## +cxx_channel@o@: $(langdir)/cxx/cxx_channel.cpp + $(CXX) $(CXXFLAGS) $? +cxx_db@o@: $(langdir)/cxx/cxx_db.cpp + $(CXX) $(CXXFLAGS) $? +cxx_dbc@o@: $(langdir)/cxx/cxx_dbc.cpp + $(CXX) $(CXXFLAGS) $? +cxx_dbt@o@: $(langdir)/cxx/cxx_dbt.cpp + $(CXX) $(CXXFLAGS) $? +cxx_env@o@: $(langdir)/cxx/cxx_env.cpp + $(CXX) $(CXXFLAGS) $? +cxx_except@o@: $(langdir)/cxx/cxx_except.cpp + $(CXX) $(CXXFLAGS) $? +cxx_lock@o@: $(langdir)/cxx/cxx_lock.cpp + $(CXX) $(CXXFLAGS) $? +cxx_logc@o@: $(langdir)/cxx/cxx_logc.cpp + $(CXX) $(CXXFLAGS) $? +cxx_mpool@o@: $(langdir)/cxx/cxx_mpool.cpp + $(CXX) $(CXXFLAGS) $? +cxx_multi@o@: $(langdir)/cxx/cxx_multi.cpp + $(CXX) $(CXXFLAGS) $? +cxx_rid@o@: $(langdir)/cxx/cxx_rid.cpp + $(CXX) $(CXXFLAGS) $? +cxx_seq@o@: $(langdir)/cxx/cxx_seq.cpp + $(CXX) $(CXXFLAGS) $? +cxx_site@o@: $(langdir)/cxx/cxx_site.cpp + $(CXX) $(CXXFLAGS) $? +cxx_txn@o@: $(langdir)/cxx/cxx_txn.cpp + $(CXX) $(CXXFLAGS) $? + +################################################## +# Java API build rules. +################################################## +db_java_wrap@o@: $(langdir)/java/libdb_java/db_java_wrap.c + $(CC) $(CFLAGS) $(SWIGCFLAGS) $? + +################################################## +# SQL API build rules. +################################################## +sqlite3@o@: $(langdir)/sql/generated/sqlite3.c + $(CC) $(CFLAGS) $(SQLFLAGS) $? +shell@o@: $(langdir)/sql/sqlite/src/shell.c + $(CC) $(CFLAGS) $(SQLFLAGS) $? + +################################################## +# STL API build rules. +################################################## +dbstl_container@o@: $(langdir)/cxx/stl/dbstl_container.cpp + $(CXX) $(STLFLAGS) $? +dbstl_resource_manager@o@: $(langdir)/cxx/stl/dbstl_resource_manager.cpp + $(CXX) $(STLFLAGS) $? + +################################################## +# Tcl API build rules. +################################################## +tcl_compat@o@: $(TCL_SRCDIR)/tcl_compat.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? +tcl_db@o@: $(TCL_SRCDIR)/tcl_db.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? +tcl_db_pkg@o@: $(TCL_SRCDIR)/tcl_db_pkg.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? +tcl_dbcursor@o@: $(TCL_SRCDIR)/tcl_dbcursor.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? +tcl_env@o@: $(TCL_SRCDIR)/tcl_env.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? +tcl_internal@o@: $(TCL_SRCDIR)/tcl_internal.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? +tcl_lock@o@: $(TCL_SRCDIR)/tcl_lock.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? +tcl_log@o@: $(TCL_SRCDIR)/tcl_log.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? +tcl_mp@o@: $(TCL_SRCDIR)/tcl_mp.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? +tcl_mutex@o@: $(TCL_SRCDIR)/tcl_mutex.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? +tcl_rep@o@: $(TCL_SRCDIR)/tcl_rep.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? +tcl_seq@o@: $(TCL_SRCDIR)/tcl_seq.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? +tcl_txn@o@: $(TCL_SRCDIR)/tcl_txn.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? +tcl_util@o@: $(TCL_SRCDIR)/tcl_util.c + $(CC) $(CFLAGS) $(TCL_INCLUDE_SPEC) $? + +################################################## +# Utility build rules. +################################################## +db_archive@o@: $(utildir)/db_archive.c + $(CC) $(CFLAGS) $? +db_checkpoint@o@: $(utildir)/db_checkpoint.c + $(CC) $(CFLAGS) $? +db_deadlock@o@: $(utildir)/db_deadlock.c + $(CC) $(CFLAGS) $? +db_dump@o@: $(utildir)/db_dump.c + $(CC) $(CFLAGS) $? +db_dump185@o@: $(utildir)/db_dump185.c + $(CC) $(DB185INC) $? +db_hotbackup@o@: $(utildir)/db_hotbackup.c + $(CC) $(CFLAGS) $? +db_load@o@: $(utildir)/db_load.c + $(CC) $(CFLAGS) $? +db_printlog@o@: $(utildir)/db_printlog.c + $(CC) $(CFLAGS) $? +db_recover@o@: $(utildir)/db_recover.c + $(CC) $(CFLAGS) $? +db_replicate@o@: $(utildir)/db_replicate.c + $(CC) $(CFLAGS) $? +db_stat@o@: $(utildir)/db_stat.c + $(CC) $(CFLAGS) $? +db_tuner@o@: $(utildir)/db_tuner.c + $(CC) $(CFLAGS) $? +db_upgrade@o@: $(utildir)/db_upgrade.c + $(CC) $(CFLAGS) $? +db_verify@o@: $(utildir)/db_verify.c + $(CC) $(CFLAGS) $? + +db_sql_codegen@o@: $(utildir)/db_sql_codegen/db_sql_codegen.c + $(CC) $(CFLAGS) $? +preparser@o@: $(utildir)/db_sql_codegen/preparser.c + $(CC) $(CFLAGS) $? +parsefuncs@o@: $(utildir)/db_sql_codegen/parsefuncs.c + $(CC) $(CFLAGS) $? +tokenize@o@: $(utildir)/db_sql_codegen/tokenize.c + $(CC) $(CFLAGS) $? +buildpt@o@: $(utildir)/db_sql_codegen/buildpt.c + $(CC) $(CFLAGS) $? +utils@o@: $(utildir)/db_sql_codegen/utils.c + $(CC) $(CFLAGS) $? +generate@o@: $(utildir)/db_sql_codegen/generate.c + $(CC) $(CFLAGS) $? +generate_test@o@: $(utildir)/db_sql_codegen/generate_test.c + $(CC) $(CFLAGS) $? +generate_verification@o@: $(utildir)/db_sql_codegen/generate_verification.c + $(CC) $(CFLAGS) $? +generation_utils@o@: $(utildir)/db_sql_codegen/generation_utils.c + $(CC) $(CFLAGS) $? +hint_comment@o@: $(utildir)/db_sql_codegen/hint_comment.c + $(CC) $(CFLAGS) $? +sqlprintf@o@: $(utildir)/db_sql_codegen/sqlite/sqlprintf.c + $(CC) $(CFLAGS) $? +parse@o@: $(utildir)/db_sql_codegen/sqlite/parse.c + $(CC) $(CFLAGS) $? + +################################################## +# C library replacement files. +################################################## +atoi@o@: $(srcdir)/clib/atoi.c + $(CC) $(CFLAGS) $? +atol@o@: $(srcdir)/clib/atol.c + $(CC) $(CFLAGS) $? +bsearch@o@: $(srcdir)/clib/bsearch.c + $(CC) $(CFLAGS) $? +getcwd@o@: $(srcdir)/clib/getcwd.c + $(CC) $(CFLAGS) $? +getopt@o@: $(srcdir)/clib/getopt.c + $(CC) $(CFLAGS) $? +isalpha@o@: $(srcdir)/clib/isalpha.c + $(CC) $(CFLAGS) $? +isdigit@o@: $(srcdir)/clib/isdigit.c + $(CC) $(CFLAGS) $? +isprint@o@: $(srcdir)/clib/isprint.c + $(CC) $(CFLAGS) $? +isspace@o@: $(srcdir)/clib/isspace.c + $(CC) $(CFLAGS) $? +memcmp@o@: $(srcdir)/clib/memcmp.c + $(CC) $(CFLAGS) $? +memcpy@o@: $(srcdir)/clib/memmove.c + $(CC) -DMEMCOPY $(CFLAGS) $? -o $@ +memmove@o@: $(srcdir)/clib/memmove.c + $(CC) -DMEMMOVE $(CFLAGS) $? +printf@o@: $(srcdir)/clib/printf.c + $(CC) $(CFLAGS) $? +qsort@o@: $(srcdir)/clib/qsort.c + $(CC) $(CFLAGS) $? +raise@o@: $(srcdir)/clib/raise.c + $(CC) $(CFLAGS) $? +rand@o@: $(srcdir)/clib/rand.c + $(CC) $(CFLAGS) $? +strcasecmp@o@: $(srcdir)/clib/strcasecmp.c + $(CC) $(CFLAGS) $? +strdup@o@: $(srcdir)/clib/strdup.c + $(CC) $(CFLAGS) $? +snprintf@o@: $(srcdir)/clib/snprintf.c + $(CC) $(CFLAGS) $? +strcat@o@: $(srcdir)/clib/strcat.c + $(CC) $(CFLAGS) $? +strchr@o@: $(srcdir)/clib/strchr.c + $(CC) $(CFLAGS) $? +strerror@o@: $(srcdir)/clib/strerror.c + $(CC) $(CFLAGS) $? +strncat@o@: $(srcdir)/clib/strncat.c + $(CC) $(CFLAGS) $? +strncmp@o@: $(srcdir)/clib/strncmp.c + $(CC) $(CFLAGS) $? +strrchr@o@: $(srcdir)/clib/strrchr.c + $(CC) $(CFLAGS) $? +strsep@o@: $(srcdir)/clib/strsep.c + $(CC) $(CFLAGS) $? +strtol@o@: $(srcdir)/clib/strtol.c + $(CC) $(CFLAGS) $? +strtoul@o@: $(srcdir)/clib/strtoul.c + $(CC) $(CFLAGS) $? +time@o@: $(srcdir)/clib/time.c + $(CC) $(CFLAGS) $? + +################################################## +# Performance Event Monitoring build rules +################################################## + +# DTrace rules: +# The DTrace DB provider description (db_provider.d) is included in the +# distribution. It needs to be rebuilt when adding events to +# $(distdir)/events.in. The C/C++ header file db_provider.h is +# created at configure time, and is rebuilt when db_provider.d changes. +# It has lower-case versions of the event class and type names. +# +# To list the DB static probes, try: +# [pfexec | sudo] dtrace -l -n 'bdb$target:::' -c " " +# [sudo] stap -l 'process(".libs/libdb-@DB_VERSION_MAJOR@.@DB_VERSION_MINOR@.so").mark("*")' +listevents listprobes: @LISTPROBES_DEPENDENCY@ + @LISTPROBES_COMMAND@ + +$(DTRACE_PROVIDER): $(distdir)/gen_provider.pl $(distdir)/events.in + -@$(RM) $@ + $(PERL) $(distdir)/gen_provider.pl $(distdir)/events.in > $@ + +SED_PROVIDER_PATTERN='/^\#define[ ]*BDB_[A-Z_]*(/y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/' +db_provider.h : $(DTRACE_PROVIDER) + -@$(RM) $@ + @if test -z "$(DTRACE)" ; then echo "$@ is used only with dtrace or stap"; exit 1; fi + ($(DTRACE) -h @DTRACE_CPP@ -I$(utildir)/dtrace -s $(DTRACE_PROVIDER) -o $@ && \ + $(MV) $@ $@.tmp && \ + $(SED) -e $(SED_PROVIDER_PATTERN) $@.tmp > $@) || $(RM) $@ + @$(RM) $@.tmp + +# DTrace on platforms which require dtrace -G (e.g., Solaris) need to include +# an extra object file when linking. This placeholder 'db_provider.lo' has a +# special update rule which knows that the PIC object files may be in .libs. +# If that directory exists, the object files located there are updated by a +# separate dtrace -G call. +# +# Should this be added to libtool's --mode=link step on Solaris? +# +DTRACE_OFILES=`echo $(DTRACE_OBJS) " " | $(SED) -e 's/\.lo /\.o /g'` + +db_provider@o@: db_provider.c $(DTRACE_OBJS) $(DTRACE_PROVIDER) + $(RM) db_provider.o .libs/db_provider.o + @# A compilation warning such as 'empty translation unit' is harmless. + $(CC) $(CFLAGS) db_provider.c + if test -f db_provider.o ; then \ + $(DTRACE) -G @DTRACE_CPP@ -I$(utildir)/dtrace -s $(DTRACE_PROVIDER) $(DTRACE_OFILES) ; \ + fi + if test -f .libs/db_provider.o ; then \ + (cd .libs && \ + $(DTRACE) -G @DTRACE_CPP@ -I../$(utildir)/dtrace -s ../$(DTRACE_PROVIDER) $(DTRACE_OFILES)) || \ + $(RM) $@ ; \ + fi + +# db_provider.c is created as an empty file at configure time with +# --enable-dtrace. If missing then configure should be re-run. +# So far only Solaris needs it. +db_provider.c: + @echo "The file db_provider@o@ is used only for configurations requiring dtrace -G" + @echo "Re-run configure with --enable-dtrace if needed" + @exit 1 diff --git a/dist/RELEASE b/dist/RELEASE new file mode 100644 index 00000000..61670208 --- /dev/null +++ b/dist/RELEASE @@ -0,0 +1,16 @@ +# $Id$ + +DB_VERSION_FAMILY=11 +DB_VERSION_LETTER="g" +DB_VERSION_RELEASE=2 +DB_VERSION_MAJOR=5 +DB_VERSION_MINOR=2 +DB_VERSION_PATCH=28 +DB_VERSION="$DB_VERSION_MAJOR.$DB_VERSION_MINOR.$DB_VERSION_PATCH" +DB_VERSION_FULL="$DB_VERSION_FAMILY.$DB_VERSION_RELEASE.$DB_VERSION_MAJOR.$DB_VERSION_MINOR.$DB_VERSION_PATCH" + +DB_VERSION_UNIQUE_NAME=`printf "_%d%03d" $DB_VERSION_MAJOR $DB_VERSION_MINOR` + +DB_RELEASE_DATE="June 10, 2011" +DB_VERSION_STRING="Berkeley DB $DB_VERSION: ($DB_RELEASE_DATE)" +DB_VERSION_FULL_STRING="Berkeley DB $DB_VERSION_FAMILY$DB_VERSION_LETTER Release $DB_VERSION_RELEASE, library version $DB_VERSION_FULL: ($DB_RELEASE_DATE)" diff --git a/dist/aclocal/clock.m4 b/dist/aclocal/clock.m4 new file mode 100644 index 00000000..a978ffa5 --- /dev/null +++ b/dist/aclocal/clock.m4 @@ -0,0 +1,59 @@ +# $Id$ + +# Configure clocks and timers. +AC_DEFUN(AC_TIMERS, [ + +AC_CHECK_FUNCS(gettimeofday localtime time strftime) + +# AIX 4.3 will link applications with calls to clock_gettime, but the +# calls always fail. +case "$host_os" in +aix4.3.*) + ;; +*) + AC_CHECK_FUNCS(clock_gettime);; +esac + +# clock_gettime -- monotonic clocks. +# Check to see if we can get a monotonic clock. We actually try and +# run the program if possible, because we don't trust the #define's +# existence to mean the clock really exists. +AC_CACHE_CHECK([for clock_gettime monotonic clock], db_cv_clock_monotonic, [ +AC_TRY_RUN([ +#include +main() { + struct timespec t; + return (clock_gettime(CLOCK_MONOTONIC, &t) != 0); +}], db_cv_clock_monotonic=yes, db_cv_clock_monotonic=no, +AC_TRY_LINK([ +#include ], [ +struct timespec t; +clock_gettime(CLOCK_MONOTONIC, &t); +], db_cv_clock_monotonic=yes, db_cv_clock_monotonic=no)) +]) +if test "$db_cv_clock_monotonic" = "yes"; then + AC_DEFINE(HAVE_CLOCK_MONOTONIC) + AH_TEMPLATE(HAVE_CLOCK_MONOTONIC, + [Define to 1 if clock_gettime supports CLOCK_MONOTONIC.]) +fi + +# ctime_r -- +# +# There are two versions of ctime_r, one of which takes a buffer length as a +# third argument, and one which only takes two arguments. (There is also a +# difference in return values and the type of the 3rd argument, but we handle +# those problems in the code itself.) +AC_CHECK_FUNCS(ctime_r) +if test "$ac_cv_func_ctime_r" = "yes"; then +AC_CACHE_CHECK([for 2 or 3 argument version of ctime_r], db_cv_ctime_r_3arg, [ +AC_TRY_LINK([ +#include ], [ + ctime_r(NULL, NULL, 100); +], [db_cv_ctime_r_3arg="3-argument"], [db_cv_ctime_r_3arg="2-argument"])]) +fi +if test "$db_cv_ctime_r_3arg" = "3-argument"; then + AC_DEFINE(HAVE_CTIME_R_3ARG) + AH_TEMPLATE(HAVE_CTIME_R_3ARG, + [Define to 1 if ctime_r takes a buffer length as a third argument.]) +fi +]) diff --git a/dist/aclocal/config.m4 b/dist/aclocal/config.m4 new file mode 100644 index 00000000..b776c7a0 --- /dev/null +++ b/dist/aclocal/config.m4 @@ -0,0 +1,19 @@ +# Features we don't test for, but want the #defines to exist for +# other ports. +AH_TEMPLATE(DB_WIN32, + [We use DB_WIN32 much as one would use _WIN32 -- to specify that + we're using an operating system environment that supports Win32 + calls and semantics. We don't use _WIN32 because Cygwin/GCC also + defines _WIN32, even though Cygwin/GCC closely emulates the Unix + environment.]) + +AH_TEMPLATE(HAVE_VXWORKS, [Define to 1 if building on VxWorks.]) + +AH_TEMPLATE(HAVE_FILESYSTEM_NOTZERO, + [Define to 1 if allocated filesystem blocks are not zeroed.]) + +AH_TEMPLATE(HAVE_UNLINK_WITH_OPEN_FAILURE, + [Define to 1 if unlink of file with open file descriptors will fail.]) + +AH_TEMPLATE(HAVE_SYSTEM_INCLUDE_FILES, + [Define to 1 if port includes files in the Berkeley DB source code.]) diff --git a/dist/aclocal/cxx.m4 b/dist/aclocal/cxx.m4 new file mode 100644 index 00000000..07b076ad --- /dev/null +++ b/dist/aclocal/cxx.m4 @@ -0,0 +1,304 @@ +# C++ language checks + +AC_DEFUN(AC_CXX_STDHEADERS, [ +AC_SUBST(cxx_have_stdheaders) +AC_MSG_CHECKING(whether C++ supports the ISO C++ standard includes) +AC_LANG_SAVE +AC_LANG_CPLUSPLUS +AC_TRY_COMPILE([#include ],[std::ostream *o; return 0;], + db_cv_cxx_have_stdheaders=yes, db_cv_cxx_have_stdheaders=no) +AC_LANG_RESTORE +AC_MSG_RESULT($db_cv_cxx_have_stdheaders) +if test "$db_cv_cxx_have_stdheaders" = yes; then + cxx_have_stdheaders="#define HAVE_CXX_STDHEADERS 1" +fi]) + +AC_DEFUN(AC_CXX_WSTRING, [ +AC_MSG_CHECKING(whether C++ supports the wstring class) +AC_SUBST(WSTRING_decl) +AC_LANG_PUSH(C++) +AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]] + [[std::wstring ws; ws.find_first_of(ws);]]), + [WSTRING_decl="#define HAVE_WSTRING 1" ; AC_MSG_RESULT(yes)], + [WSTRING_decl="#undef HAVE_WSTRING" ; AC_MSG_RESULT(no)]]) +AC_LANG_POP(C++) +]) + +AC_DEFUN(AC_CXX_SUPPORTS_TEMPLATES, [ +AC_MSG_CHECKING(whether the C++ compiler supports templates for STL) +AC_LANG_SAVE +AC_LANG_CPLUSPLUS +AC_TRY_COMPILE([#include +#include +#include + +using std::string; +using std::vector; +namespace dbstl_configure_test { + +template +class MyClass +{ +public: + explicit MyClass(int i) { imem = i;} + + MyClass(const T1& t1, const T2& t2, int i) + { + mem1 = t1; + mem2 = t2; + imem = i; + } + + template + T2 templ_mem_func(T1 t1, T3 t3) + { + mem2 = t1; + T3 t32 = t3; + T2 t2; + return t2; + } + + double templ_mem_func(T1 t1, double t3) + { + mem1 = t1; + double t32 = t3; + return t3; + } + + template + ReturnType templ_mem_func(T7, T8); + + operator T1() const {return mem1;} +private: + T1 mem1; + T2 mem2; + int imem; +}; + +template +template +ReturnType MyClass::templ_mem_func(T7, T8) +{ + ReturnType rt; + return rt; +} + +template<> +class MyClass +{ +public: + explicit MyClass(int i) { imem = i;} + + MyClass(const double& t1, const float& t2, int i) + { + mem1 = t1; + mem2 = t2; + imem = i; + } + + template + float templ_mem_func(double t1, T3 t3) + { + mem2 = t1; + T3 t32 = t3; + float t2; + return t2; + } + + double templ_mem_func(double t1, double t3) + { + mem1 = t1; + double t32 = t3; + return t3; + } + + template + ReturnType templ_mem_func(T7, T8); + + operator double() const {return mem1;} +private: + double mem1; + float mem2; + int imem; +}; + +template +ReturnType MyClass::templ_mem_func(T7, T8) +{ + ReturnType rt; + return rt; +} + +template +class MyClass2 { +public: + MyClass2(const T1& t1, const T2&t2){} +}; + +// partial specialization: both template parameters have same type +template +class MyClass2 { +public: + MyClass2(const T& t1, const T&t2){} +}; + +// partial specialization: second type is int +template +class MyClass2 { +public: + MyClass2(const T& t1, const int&t2){} +}; + +// partial specialization: both template parameters are pointer types +template +class MyClass2 { +public: + MyClass2(const T1* t1, const T2*t2){} +}; + +template +class MyClass2 { +public: + MyClass2(const T* t1, const T*t2){} +}; + +template +int part_spec_func(T4 t4, T5 t5) +{ + // Zero Initialization should work. + T4 t44 = T4(); + T5 t55 = T5(); + + t44 = t4; + t55 = t5; +} + +template +int part_spec_func(T4 t4, std::vector t55) +{ + T4 t44 = t4; + std::vector abc = t55; +} + +// maximum of two int values +inline int const& max (int const& a, int const& b) +{ + return a +inline T2 const max (T1 const& a, T2 const& b) +{ + return a +inline T const& max (T const& a, T const& b) +{ + return a +inline T const& max (T const& a, T const& b, T const& c) +{ + return max (max(a,b), c); +} + +template +class Base { +public: + void exit2(){} + Base(){} +}; + +template +class Derived : public Base { +public: + // Call Base() explicitly here, otherwise can't access it. + // Kind of like this->. + Derived() : Base(){} + + void foo() { + this->exit2(); + } +}; + +} // dbstl_configure_test + +using namespace dbstl_configure_test;], [ + char cc = 'a'; + int i = 4; + double pi = 3.14; + float gold = 0.618; + + MyClass2 mif(i, gold); // uses MyClass2 + MyClass2 mff(gold, gold); // uses MyClass2 + MyClass2 mfi(gold, i); // uses MyClass2 + MyClass2 mp(&i, &gold); // uses MyClass2 + MyClass2 m(&i, &i); // uses MyClass2 + + MyClass obj1(i); + obj1.templ_mem_func(cc, pi); + obj1.templ_mem_func(cc, gold); + obj1.templ_mem_func(i, pi); + obj1.templ_mem_func(cc, cc); + char ch = (char)obj1; + + string str1("abc"), str2("def"); + MyClass obj2(str1.c_str(), str2, i); + obj2.templ_mem_func("klm", str2); + obj2.templ_mem_func("hij", pi); + + // Use j to help distinguish, otherwise unable to use the one defined + // outside of class body. + int j = obj2.templ_mem_func(cc, cc); + // Call explicitly. + obj2.templ_mem_func(gold, pi); + const char *pch = (const char*)obj2; + + MyClass obj3(pi, gold, i); + obj3.templ_mem_func(pi, i); + obj3.templ_mem_func(pi, str1); + obj3.templ_mem_func(pi, pi); + obj3.templ_mem_func(cc, pi); + obj3.templ_mem_func(cc, cc); + double tmpd = (double)obj3; + + MyClass obj4(i); + obj4.templ_mem_func(pi, i); + obj4.templ_mem_func(pi, str1); + obj4.templ_mem_func(pi, pi); + obj4.templ_mem_func(gold, pi); + tmpd = (double)obj4; + + // Function template partial specialization. + part_spec_func(pi, gold); + part_spec_func(gold, i); + part_spec_func(str1, str2); + std::vector strv; + part_spec_func(str1, strv); + std::vector dblv; + part_spec_func(pi, dblv); + + // Function template overloads and explicit call and deduction. + dbstl_configure_test::max(7, 42, 68); // calls the template for three arguments + dbstl_configure_test::max(7.0, 42.0); // calls max (by argument deduction) + dbstl_configure_test::max('a', 'b'); // calls max (by argument deduction) + dbstl_configure_test::max(7, 42.0); + dbstl_configure_test::max(4,4.2); // instantiate T as double + dbstl_configure_test::max(7, 42); // calls the nontemplate for two ints + dbstl_configure_test::max<>(7, 42); // calls max (by argument deduction) + dbstl_configure_test::max(7, 42); // calls max (no argument deduction) + dbstl_configure_test::max('a', 42.7); // calls the nontemplate for two ints + + Base bobj; + bobj.exit2(); + // Using this-> to access base class members. + Derived dobj; + dobj.foo(); + dobj.exit2(); +], AC_MSG_RESULT(yes), AC_MSG_ERROR(no)) +AC_LANG_RESTORE +]) diff --git a/dist/aclocal/gcc.m4 b/dist/aclocal/gcc.m4 new file mode 100644 index 00000000..156a0e7a --- /dev/null +++ b/dist/aclocal/gcc.m4 @@ -0,0 +1,19 @@ +# Version 2.96 of gcc (shipped with RedHat Linux 7.[01] and Mandrake) had +# serious problems. +AC_DEFUN(AC_GCC_CONFIG1, [ +AC_CACHE_CHECK([whether we are using gcc version 2.96], +db_cv_gcc_2_96, [ +db_cv_gcc_2_96=no +if test "$GCC" = "yes"; then + GCC_VERSION=`${MAKEFILE_CC} --version` + case ${GCC_VERSION} in + 2.96*) + db_cv_gcc_2_96=yes;; + esac +fi]) +if test "$db_cv_gcc_2_96" = "yes"; then + CFLAGS=`echo "$CFLAGS" | sed 's/-O2/-O/'` + CXXFLAGS=`echo "$CXXFLAGS" | sed 's/-O2/-O/'` + AC_MSG_WARN([INSTALLED GCC COMPILER HAS SERIOUS BUGS; PLEASE UPGRADE.]) + AC_MSG_WARN([GCC OPTIMIZATION LEVEL SET TO -O.]) +fi]) diff --git a/dist/aclocal/libtool.m4 b/dist/aclocal/libtool.m4 new file mode 100644 index 00000000..d8125842 --- /dev/null +++ b/dist/aclocal/libtool.m4 @@ -0,0 +1,7831 @@ +# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- +# +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, +# 2006, 2007, 2008, 2009, 2010 Free Software Foundation, +# Inc. +# Written by Gordon Matzigkeit, 1996 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +m4_define([_LT_COPYING], [dnl +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, +# 2006, 2007, 2008, 2009, 2010 Free Software Foundation, +# Inc. +# Written by Gordon Matzigkeit, 1996 +# +# This file is part of GNU Libtool. +# +# GNU Libtool is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# As a special exception to the GNU General Public License, +# if you distribute this file as part of a program or library that +# is built using GNU Libtool, you may include this file under the +# same distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Libtool; see the file COPYING. If not, a copy +# can be downloaded from http://www.gnu.org/licenses/gpl.html, or +# obtained by writing to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +]) + +# serial 57 LT_INIT + + +# LT_PREREQ(VERSION) +# ------------------ +# Complain and exit if this libtool version is less that VERSION. +m4_defun([LT_PREREQ], +[m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1, + [m4_default([$3], + [m4_fatal([Libtool version $1 or higher is required], + 63)])], + [$2])]) + + +# _LT_CHECK_BUILDDIR +# ------------------ +# Complain if the absolute build directory name contains unusual characters +m4_defun([_LT_CHECK_BUILDDIR], +[case `pwd` in + *\ * | *\ *) + AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;; +esac +]) + + +# LT_INIT([OPTIONS]) +# ------------------ +AC_DEFUN([LT_INIT], +[AC_PREREQ([2.58])dnl We use AC_INCLUDES_DEFAULT +AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +AC_BEFORE([$0], [LT_LANG])dnl +AC_BEFORE([$0], [LT_OUTPUT])dnl +AC_BEFORE([$0], [LTDL_INIT])dnl +m4_require([_LT_CHECK_BUILDDIR])dnl + +dnl Autoconf doesn't catch unexpanded LT_ macros by default: +m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl +m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl +dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4 +dnl unless we require an AC_DEFUNed macro: +AC_REQUIRE([LTOPTIONS_VERSION])dnl +AC_REQUIRE([LTSUGAR_VERSION])dnl +AC_REQUIRE([LTVERSION_VERSION])dnl +AC_REQUIRE([LTOBSOLETE_VERSION])dnl +m4_require([_LT_PROG_LTMAIN])dnl + +_LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}]) + +dnl Parse OPTIONS +_LT_SET_OPTIONS([$0], [$1]) + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS="$ltmain" + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' +AC_SUBST(LIBTOOL)dnl + +_LT_SETUP + +# Only expand once: +m4_define([LT_INIT]) +])# LT_INIT + +# Old names: +AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT]) +AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PROG_LIBTOOL], []) +dnl AC_DEFUN([AM_PROG_LIBTOOL], []) + + +# _LT_CC_BASENAME(CC) +# ------------------- +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +m4_defun([_LT_CC_BASENAME], +[for cc_temp in $1""; do + case $cc_temp in + compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;; + distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;; + \-*) ;; + *) break;; + esac +done +cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +]) + + +# _LT_FILEUTILS_DEFAULTS +# ---------------------- +# It is okay to use these file commands and assume they have been set +# sensibly after `m4_require([_LT_FILEUTILS_DEFAULTS])'. +m4_defun([_LT_FILEUTILS_DEFAULTS], +[: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} +])# _LT_FILEUTILS_DEFAULTS + + +# _LT_SETUP +# --------- +m4_defun([_LT_SETUP], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl + +_LT_DECL([], [host_alias], [0], [The host system])dnl +_LT_DECL([], [host], [0])dnl +_LT_DECL([], [host_os], [0])dnl +dnl +_LT_DECL([], [build_alias], [0], [The build system])dnl +_LT_DECL([], [build], [0])dnl +_LT_DECL([], [build_os], [0])dnl +dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +dnl +AC_REQUIRE([AC_PROG_LN_S])dnl +test -z "$LN_S" && LN_S="ln -s" +_LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl +dnl +AC_REQUIRE([LT_CMD_MAX_LEN])dnl +_LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl +_LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl +dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl +m4_require([_LT_CMD_RELOAD])dnl +m4_require([_LT_CHECK_MAGIC_METHOD])dnl +m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl +m4_require([_LT_CMD_OLD_ARCHIVE])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_WITH_SYSROOT])dnl + +_LT_CONFIG_LIBTOOL_INIT([ +# See if we are running on zsh, and set the options which allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST +fi +]) +if test -n "${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST +fi + +_LT_CHECK_OBJDIR + +m4_require([_LT_TAG_COMPILER])dnl + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a `.a' archive for static linking (except MSVC, +# which needs '.lib'). +libext=a + +with_gnu_ld="$lt_cv_prog_gnu_ld" + +old_CC="$CC" +old_CFLAGS="$CFLAGS" + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +_LT_CC_BASENAME([$compiler]) + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + _LT_PATH_MAGIC + fi + ;; +esac + +# Use C for the default configuration in the libtool script +LT_SUPPORTED_TAG([CC]) +_LT_LANG_C_CONFIG +_LT_LANG_DEFAULT_CONFIG +_LT_CONFIG_COMMANDS +])# _LT_SETUP + + +# _LT_PREPARE_SED_QUOTE_VARS +# -------------------------- +# Define a few sed substitution that help us do robust quoting. +m4_defun([_LT_PREPARE_SED_QUOTE_VARS], +[# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\([["`$\\]]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\([["`\\]]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' +]) + +# _LT_PROG_LTMAIN +# --------------- +# Note that this code is called both from `configure', and `config.status' +# now that we use AC_CONFIG_COMMANDS to generate libtool. Notably, +# `config.status' has no value for ac_aux_dir unless we are using Automake, +# so we pass a copy along to make sure it has a sensible value anyway. +m4_defun([_LT_PROG_LTMAIN], +[m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl +_LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir']) +ltmain="$ac_aux_dir/ltmain.sh" +])# _LT_PROG_LTMAIN + + +## ------------------------------------- ## +## Accumulate code for creating libtool. ## +## ------------------------------------- ## + +# So that we can recreate a full libtool script including additional +# tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS +# in macros and then make a single call at the end using the `libtool' +# label. + + +# _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS]) +# ---------------------------------------- +# Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL_INIT], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_INIT], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_INIT]) + + +# _LT_CONFIG_LIBTOOL([COMMANDS]) +# ------------------------------ +# Register COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS]) + + +# _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS]) +# ----------------------------------------------------- +m4_defun([_LT_CONFIG_SAVE_COMMANDS], +[_LT_CONFIG_LIBTOOL([$1]) +_LT_CONFIG_LIBTOOL_INIT([$2]) +]) + + +# _LT_FORMAT_COMMENT([COMMENT]) +# ----------------------------- +# Add leading comment marks to the start of each line, and a trailing +# full-stop to the whole comment if one is not present already. +m4_define([_LT_FORMAT_COMMENT], +[m4_ifval([$1], [ +m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])], + [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.]) +)]) + + + +## ------------------------ ## +## FIXME: Eliminate VARNAME ## +## ------------------------ ## + + +# _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?]) +# ------------------------------------------------------------------- +# CONFIGNAME is the name given to the value in the libtool script. +# VARNAME is the (base) name used in the configure script. +# VALUE may be 0, 1 or 2 for a computed quote escaped value based on +# VARNAME. Any other value will be used directly. +m4_define([_LT_DECL], +[lt_if_append_uniq([lt_decl_varnames], [$2], [, ], + [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name], + [m4_ifval([$1], [$1], [$2])]) + lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3]) + m4_ifval([$4], + [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])]) + lt_dict_add_subkey([lt_decl_dict], [$2], + [tagged?], [m4_ifval([$5], [yes], [no])])]) +]) + + +# _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION]) +# -------------------------------------------------------- +m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])]) + + +# lt_decl_tag_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_tag_varnames], +[_lt_decl_filter([tagged?], [yes], $@)]) + + +# _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..]) +# --------------------------------------------------------- +m4_define([_lt_decl_filter], +[m4_case([$#], + [0], [m4_fatal([$0: too few arguments: $#])], + [1], [m4_fatal([$0: too few arguments: $#: $1])], + [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)], + [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)], + [lt_dict_filter([lt_decl_dict], $@)])[]dnl +]) + + +# lt_decl_quote_varnames([SEPARATOR], [VARNAME1...]) +# -------------------------------------------------- +m4_define([lt_decl_quote_varnames], +[_lt_decl_filter([value], [1], $@)]) + + +# lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_dquote_varnames], +[_lt_decl_filter([value], [2], $@)]) + + +# lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_varnames_tagged], +[m4_assert([$# <= 2])dnl +_$0(m4_quote(m4_default([$1], [[, ]])), + m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]), + m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))]) +m4_define([_lt_decl_varnames_tagged], +[m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])]) + + +# lt_decl_all_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_all_varnames], +[_$0(m4_quote(m4_default([$1], [[, ]])), + m4_if([$2], [], + m4_quote(lt_decl_varnames), + m4_quote(m4_shift($@))))[]dnl +]) +m4_define([_lt_decl_all_varnames], +[lt_join($@, lt_decl_varnames_tagged([$1], + lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl +]) + + +# _LT_CONFIG_STATUS_DECLARE([VARNAME]) +# ------------------------------------ +# Quote a variable value, and forward it to `config.status' so that its +# declaration there will have the same value as in `configure'. VARNAME +# must have a single quote delimited value for this to work. +m4_define([_LT_CONFIG_STATUS_DECLARE], +[$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`']) + + +# _LT_CONFIG_STATUS_DECLARATIONS +# ------------------------------ +# We delimit libtool config variables with single quotes, so when +# we write them to config.status, we have to be sure to quote all +# embedded single quotes properly. In configure, this macro expands +# each variable declared with _LT_DECL (and _LT_TAGDECL) into: +# +# ='`$ECHO "$" | $SED "$delay_single_quote_subst"`' +m4_defun([_LT_CONFIG_STATUS_DECLARATIONS], +[m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames), + [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAGS +# ---------------- +# Output comment and list of tags supported by the script +m4_defun([_LT_LIBTOOL_TAGS], +[_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl +available_tags="_LT_TAGS"dnl +]) + + +# _LT_LIBTOOL_DECLARE(VARNAME, [TAG]) +# ----------------------------------- +# Extract the dictionary values for VARNAME (optionally with TAG) and +# expand to a commented shell variable setting: +# +# # Some comment about what VAR is for. +# visible_name=$lt_internal_name +m4_define([_LT_LIBTOOL_DECLARE], +[_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], + [description])))[]dnl +m4_pushdef([_libtool_name], + m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl +m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])), + [0], [_libtool_name=[$]$1], + [1], [_libtool_name=$lt_[]$1], + [2], [_libtool_name=$lt_[]$1], + [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl +m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl +]) + + +# _LT_LIBTOOL_CONFIG_VARS +# ----------------------- +# Produce commented declarations of non-tagged libtool config variables +# suitable for insertion in the LIBTOOL CONFIG section of the `libtool' +# script. Tagged libtool config variables (even for the LIBTOOL CONFIG +# section) are produced by _LT_LIBTOOL_TAG_VARS. +m4_defun([_LT_LIBTOOL_CONFIG_VARS], +[m4_foreach([_lt_var], + m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAG_VARS(TAG) +# ------------------------- +m4_define([_LT_LIBTOOL_TAG_VARS], +[m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])]) + + +# _LT_TAGVAR(VARNAME, [TAGNAME]) +# ------------------------------ +m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])]) + + +# _LT_CONFIG_COMMANDS +# ------------------- +# Send accumulated output to $CONFIG_STATUS. Thanks to the lists of +# variables for single and double quote escaping we saved from calls +# to _LT_DECL, we can put quote escaped variables declarations +# into `config.status', and then the shell code to quote escape them in +# for loops in `config.status'. Finally, any additional code accumulated +# from calls to _LT_CONFIG_LIBTOOL_INIT is expanded. +m4_defun([_LT_CONFIG_COMMANDS], +[AC_PROVIDE_IFELSE([LT_OUTPUT], + dnl If the libtool generation code has been placed in $CONFIG_LT, + dnl instead of duplicating it all over again into config.status, + dnl then we will have config.status run $CONFIG_LT later, so it + dnl needs to know what name is stored there: + [AC_CONFIG_COMMANDS([libtool], + [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])], + dnl If the libtool generation code is destined for config.status, + dnl expand the accumulated commands and init code now: + [AC_CONFIG_COMMANDS([libtool], + [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])]) +])#_LT_CONFIG_COMMANDS + + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT], +[ + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +_LT_CONFIG_STATUS_DECLARATIONS +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$[]1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_quote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_dquote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +_LT_OUTPUT_LIBTOOL_INIT +]) + +# _LT_GENERATED_FILE_INIT(FILE, [COMMENT]) +# ------------------------------------ +# Generate a child script FILE with all initialization necessary to +# reuse the environment learned by the parent script, and make the +# file executable. If COMMENT is supplied, it is inserted after the +# `#!' sequence but before initialization text begins. After this +# macro, additional text can be appended to FILE to form the body of +# the child script. The macro ends with non-zero status if the +# file could not be fully written (such as if the disk is full). +m4_ifdef([AS_INIT_GENERATED], +[m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])], +[m4_defun([_LT_GENERATED_FILE_INIT], +[m4_require([AS_PREPARE])]dnl +[m4_pushdef([AS_MESSAGE_LOG_FD])]dnl +[lt_write_fail=0 +cat >$1 <<_ASEOF || lt_write_fail=1 +#! $SHELL +# Generated by $as_me. +$2 +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$1 <<\_ASEOF || lt_write_fail=1 +AS_SHELL_SANITIZE +_AS_PREPARE +exec AS_MESSAGE_FD>&1 +_ASEOF +test $lt_write_fail = 0 && chmod +x $1[]dnl +m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT + +# LT_OUTPUT +# --------- +# This macro allows early generation of the libtool script (before +# AC_OUTPUT is called), incase it is used in configure for compilation +# tests. +AC_DEFUN([LT_OUTPUT], +[: ${CONFIG_LT=./config.lt} +AC_MSG_NOTICE([creating $CONFIG_LT]) +_LT_GENERATED_FILE_INIT(["$CONFIG_LT"], +[# Run this file to recreate a libtool stub with the current configuration.]) + +cat >>"$CONFIG_LT" <<\_LTEOF +lt_cl_silent=false +exec AS_MESSAGE_LOG_FD>>config.log +{ + echo + AS_BOX([Running $as_me.]) +} >&AS_MESSAGE_LOG_FD + +lt_cl_help="\ +\`$as_me' creates a local libtool stub from the current configuration, +for use in further configure time tests before the real libtool is +generated. + +Usage: $[0] [[OPTIONS]] + + -h, --help print this help, then exit + -V, --version print version number, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + +Report bugs to ." + +lt_cl_version="\ +m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl +m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION]) +configured by $[0], generated by m4_PACKAGE_STRING. + +Copyright (C) 2010 Free Software Foundation, Inc. +This config.lt script is free software; the Free Software Foundation +gives unlimited permision to copy, distribute and modify it." + +while test $[#] != 0 +do + case $[1] in + --version | --v* | -V ) + echo "$lt_cl_version"; exit 0 ;; + --help | --h* | -h ) + echo "$lt_cl_help"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --quiet | --q* | --silent | --s* | -q ) + lt_cl_silent=: ;; + + -*) AC_MSG_ERROR([unrecognized option: $[1] +Try \`$[0] --help' for more information.]) ;; + + *) AC_MSG_ERROR([unrecognized argument: $[1] +Try \`$[0] --help' for more information.]) ;; + esac + shift +done + +if $lt_cl_silent; then + exec AS_MESSAGE_FD>/dev/null +fi +_LTEOF + +cat >>"$CONFIG_LT" <<_LTEOF +_LT_OUTPUT_LIBTOOL_COMMANDS_INIT +_LTEOF + +cat >>"$CONFIG_LT" <<\_LTEOF +AC_MSG_NOTICE([creating $ofile]) +_LT_OUTPUT_LIBTOOL_COMMANDS +AS_EXIT(0) +_LTEOF +chmod +x "$CONFIG_LT" + +# configure is writing to config.log, but config.lt does its own redirection, +# appending to config.log, which fails on DOS, as config.log is still kept +# open by configure. Here we exec the FD to /dev/null, effectively closing +# config.log, so it can be properly (re)opened and appended to by config.lt. +lt_cl_success=: +test "$silent" = yes && + lt_config_lt_args="$lt_config_lt_args --quiet" +exec AS_MESSAGE_LOG_FD>/dev/null +$SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false +exec AS_MESSAGE_LOG_FD>>config.log +$lt_cl_success || AS_EXIT(1) +])# LT_OUTPUT + + +# _LT_CONFIG(TAG) +# --------------- +# If TAG is the built-in tag, create an initial libtool script with a +# default configuration from the untagged config vars. Otherwise add code +# to config.status for appending the configuration named by TAG from the +# matching tagged config vars. +m4_defun([_LT_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_CONFIG_SAVE_COMMANDS([ + m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl + m4_if(_LT_TAG, [C], [ + # See if we are running on zsh, and set the options which allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST + fi + + cfgfile="${ofile}T" + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL + +# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. +# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION +# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: +# NOTE: Changes made to this file will be lost: look at ltmain.sh. +# +_LT_COPYING +_LT_LIBTOOL_TAGS + +# ### BEGIN LIBTOOL CONFIG +_LT_LIBTOOL_CONFIG_VARS +_LT_LIBTOOL_TAG_VARS +# ### END LIBTOOL CONFIG + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + _LT_PROG_LTMAIN + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + sed '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + _LT_PROG_REPLACE_SHELLFNS + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" +], +[cat <<_LT_EOF >> "$ofile" + +dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded +dnl in a comment (ie after a #). +# ### BEGIN LIBTOOL TAG CONFIG: $1 +_LT_LIBTOOL_TAG_VARS(_LT_TAG) +# ### END LIBTOOL TAG CONFIG: $1 +_LT_EOF +])dnl /m4_if +], +[m4_if([$1], [], [ + PACKAGE='$PACKAGE' + VERSION='$VERSION' + TIMESTAMP='$TIMESTAMP' + RM='$RM' + ofile='$ofile'], []) +])dnl /_LT_CONFIG_SAVE_COMMANDS +])# _LT_CONFIG + + +# LT_SUPPORTED_TAG(TAG) +# --------------------- +# Trace this macro to discover what tags are supported by the libtool +# --tag option, using: +# autoconf --trace 'LT_SUPPORTED_TAG:$1' +AC_DEFUN([LT_SUPPORTED_TAG], []) + + +# C support is built-in for now +m4_define([_LT_LANG_C_enabled], []) +m4_define([_LT_TAGS], []) + + +# LT_LANG(LANG) +# ------------- +# Enable libtool support for the given language if not already enabled. +AC_DEFUN([LT_LANG], +[AC_BEFORE([$0], [LT_OUTPUT])dnl +m4_case([$1], + [C], [_LT_LANG(C)], + [C++], [_LT_LANG(CXX)], + [Java], [_LT_LANG(GCJ)], + [Fortran 77], [_LT_LANG(F77)], + [Fortran], [_LT_LANG(FC)], + [Windows Resource], [_LT_LANG(RC)], + [m4_ifdef([_LT_LANG_]$1[_CONFIG], + [_LT_LANG($1)], + [m4_fatal([$0: unsupported language: "$1"])])])dnl +])# LT_LANG + + +# _LT_LANG(LANGNAME) +# ------------------ +m4_defun([_LT_LANG], +[m4_ifdef([_LT_LANG_]$1[_enabled], [], + [LT_SUPPORTED_TAG([$1])dnl + m4_append([_LT_TAGS], [$1 ])dnl + m4_define([_LT_LANG_]$1[_enabled], [])dnl + _LT_LANG_$1_CONFIG($1)])dnl +])# _LT_LANG + + +# _LT_LANG_DEFAULT_CONFIG +# ----------------------- +m4_defun([_LT_LANG_DEFAULT_CONFIG], +[AC_PROVIDE_IFELSE([AC_PROG_CXX], + [LT_LANG(CXX)], + [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])]) + +AC_PROVIDE_IFELSE([AC_PROG_F77], + [LT_LANG(F77)], + [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])]) + +AC_PROVIDE_IFELSE([AC_PROG_FC], + [LT_LANG(FC)], + [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])]) + +dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal +dnl pulling things in needlessly. +AC_PROVIDE_IFELSE([AC_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([A][M_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([LT_PROG_GCJ], + [LT_LANG(GCJ)], + [m4_ifdef([AC_PROG_GCJ], + [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([A][M_PROG_GCJ], + [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([LT_PROG_GCJ], + [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])]) + +AC_PROVIDE_IFELSE([LT_PROG_RC], + [LT_LANG(RC)], + [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])]) +])# _LT_LANG_DEFAULT_CONFIG + +# Obsolete macros: +AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)]) +AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)]) +AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)]) +AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)]) +AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_CXX], []) +dnl AC_DEFUN([AC_LIBTOOL_F77], []) +dnl AC_DEFUN([AC_LIBTOOL_FC], []) +dnl AC_DEFUN([AC_LIBTOOL_GCJ], []) +dnl AC_DEFUN([AC_LIBTOOL_RC], []) + + +# _LT_TAG_COMPILER +# ---------------- +m4_defun([_LT_TAG_COMPILER], +[AC_REQUIRE([AC_PROG_CC])dnl + +_LT_DECL([LTCC], [CC], [1], [A C compiler])dnl +_LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl +_LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl +_LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC +])# _LT_TAG_COMPILER + + +# _LT_COMPILER_BOILERPLATE +# ------------------------ +# Check for compiler boilerplate output or warnings with +# the simple compiler test code. +m4_defun([_LT_COMPILER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* +])# _LT_COMPILER_BOILERPLATE + + +# _LT_LINKER_BOILERPLATE +# ---------------------- +# Check for linker boilerplate output or warnings with +# the simple link test code. +m4_defun([_LT_LINKER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* +])# _LT_LINKER_BOILERPLATE + +# _LT_REQUIRED_DARWIN_CHECKS +# ------------------------- +m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[ + case $host_os in + rhapsody* | darwin*) + AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:]) + AC_CHECK_TOOL([NMEDIT], [nmedit], [:]) + AC_CHECK_TOOL([LIPO], [lipo], [:]) + AC_CHECK_TOOL([OTOOL], [otool], [:]) + AC_CHECK_TOOL([OTOOL64], [otool64], [:]) + _LT_DECL([], [DSYMUTIL], [1], + [Tool to manipulate archived DWARF debug symbol files on Mac OS X]) + _LT_DECL([], [NMEDIT], [1], + [Tool to change global to local symbols on Mac OS X]) + _LT_DECL([], [LIPO], [1], + [Tool to manipulate fat objects and archives on Mac OS X]) + _LT_DECL([], [OTOOL], [1], + [ldd/readelf like tool for Mach-O binaries on Mac OS X]) + _LT_DECL([], [OTOOL64], [1], + [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4]) + + AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod], + [lt_cv_apple_cc_single_mod=no + if test -z "${LT_MULTI_MODULE}"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + if test -f libconftest.dylib && test ! -s conftest.err && test $_lt_result = 0; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi]) + AC_CACHE_CHECK([for -exported_symbols_list linker flag], + [lt_cv_ld_exported_symbols_list], + [lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [lt_cv_ld_exported_symbols_list=yes], + [lt_cv_ld_exported_symbols_list=no]) + LDFLAGS="$save_LDFLAGS" + ]) + AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load], + [lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD + echo "$AR cru libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD + $AR cru libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD + echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD + $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -f conftest && test ! -s conftest.err && test $_lt_result = 0 && $GREP forced_load conftest 2>&1 >/dev/null; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + ]) + case $host_os in + rhapsody* | darwin1.[[012]]) + _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + darwin*) # darwin 5.x on + # if running on 10.5 or later, the deployment target defaults + # to the OS version, if on x86, and 10.4, the deployment + # target defaults to 10.4. Don't you love it? + case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in + 10.0,*86*-darwin8*|10.0,*-darwin[[91]]*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + 10.[[012]]*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + 10.*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test "$lt_cv_apple_cc_single_mod" = "yes"; then + _lt_dar_single_mod='$single_module' + fi + if test "$lt_cv_ld_exported_symbols_list" = "yes"; then + _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}' + fi + if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac +]) + + +# _LT_DARWIN_LINKER_FEATURES +# -------------------------- +# Checks for linker and compiler features on darwin +m4_defun([_LT_DARWIN_LINKER_FEATURES], +[ + m4_require([_LT_REQUIRED_DARWIN_CHECKS]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_automatic, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + if test "$lt_cv_ld_force_load" = "yes"; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='' + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)="$_lt_dar_allow_undefined" + case $cc_basename in + ifort*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test "$_lt_dar_can_shared" = "yes"; then + output_verbose_link_cmd=func_echo_all + _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" + _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" + _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" + _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" + m4_if([$1], [CXX], +[ if test "$lt_cv_apple_cc_single_mod" != "yes"; then + _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}" + _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}" + fi +],[]) + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi +]) + +# _LT_SYS_MODULE_PATH_AIX([TAGNAME]) +# ---------------------------------- +# Links a minimal program and checks the executable +# for the system default hardcoded library path. In most cases, +# this is /usr/lib:/lib, but when the MPI compilers are used +# the location of the communication and MPI libs are included too. +# If we don't find anything, use the default library path according +# to the aix ld manual. +# Store the results from the different compilers for each TAGNAME. +# Allow to override them for all tags through lt_cv_aix_libpath. +m4_defun([_LT_SYS_MODULE_PATH_AIX], +[m4_require([_LT_DECL_SED])dnl +if test "${lt_cv_aix_libpath+set}" = set; then + aix_libpath=$lt_cv_aix_libpath +else + AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])], + [AC_LINK_IFELSE([AC_LANG_PROGRAM],[ + lt_aix_libpath_sed='[ + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }]' + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi],[]) + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])="/usr/lib:/lib" + fi + ]) + aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1]) +fi +])# _LT_SYS_MODULE_PATH_AIX + + +# _LT_SHELL_INIT(ARG) +# ------------------- +m4_define([_LT_SHELL_INIT], +[m4_divert_text([M4SH-INIT], [$1 +])])# _LT_SHELL_INIT + + + +# _LT_PROG_ECHO_BACKSLASH +# ----------------------- +# Find how we can fake an echo command that does not interpret backslash. +# In particular, with Autoconf 2.60 or later we add some code to the start +# of the generated configure script which will find a shell with a builtin +# printf (which we can use as an echo command). +m4_defun([_LT_PROG_ECHO_BACKSLASH], +[ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +AC_MSG_CHECKING([how to print strings]) +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$[]1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + +case "$ECHO" in + printf*) AC_MSG_RESULT([printf]) ;; + print*) AC_MSG_RESULT([print -r]) ;; + *) AC_MSG_RESULT([cat]) ;; +esac + +m4_ifdef([_AS_DETECT_SUGGESTED], +[_AS_DETECT_SUGGESTED([ + test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test "X`printf %s $ECHO`" = "X$ECHO" \ + || test "X`print -r -- $ECHO`" = "X$ECHO" )])]) + +_LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts]) +_LT_DECL([], [ECHO], [1], [An echo program that protects backslashes]) +])# _LT_PROG_ECHO_BACKSLASH + + +# _LT_WITH_SYSROOT +# ---------------- +AC_DEFUN([_LT_WITH_SYSROOT], +[AC_MSG_CHECKING([for sysroot]) +AC_ARG_WITH([sysroot], +[ --with-sysroot[=DIR] Search for dependent libraries within DIR + (or the compiler's sysroot if not specified).], +[], [with_sysroot=no]) + +dnl lt_sysroot will always be passed unquoted. We quote it here +dnl in case the user passed a directory name. +lt_sysroot= +case ${with_sysroot} in #( + yes) + if test "$GCC" = yes; then + lt_sysroot=`$CC --print-sysroot 2>/dev/null` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + AC_MSG_RESULT([${with_sysroot}]) + AC_MSG_ERROR([The sysroot must be an absolute path.]) + ;; +esac + + AC_MSG_RESULT([${lt_sysroot:-no}]) +_LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl +[dependent libraries, and in which our libraries should be installed.])]) + +# _LT_ENABLE_LOCK +# --------------- +m4_defun([_LT_ENABLE_LOCK], +[AC_ARG_ENABLE([libtool-lock], + [AS_HELP_STRING([--disable-libtool-lock], + [avoid locking (might break parallel builds)])]) +test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE="32" + ;; + *ELF-64*) + HPUX_IA64_MODE="64" + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out which ABI we are using. + echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + if test "$lt_cv_prog_gnu_ld" = yes; then + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_i386" + ;; + ppc64-*linux*|powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; + ppc*-*linux*|powerpc*-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -belf" + AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, + [AC_LANG_PUSH(C) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) + AC_LANG_POP]) + if test x"$lt_cv_cc_needs_belf" != x"yes"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS="$SAVE_CFLAGS" + fi + ;; +sparc*-*solaris*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) LD="${LD-ld} -m elf64_sparc" ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks="$enable_libtool_lock" +])# _LT_ENABLE_LOCK + + +# _LT_PROG_AR +# ----------- +m4_defun([_LT_PROG_AR], +[AC_CHECK_TOOLS(AR, [ar], false) +: ${AR=ar} +: ${AR_FLAGS=cru} +_LT_DECL([], [AR], [1], [The archiver]) +_LT_DECL([], [AR_FLAGS], [1], [Flags to create an archive]) + +AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file], + [lt_cv_ar_at_file=no + AC_COMPILE_IFELSE([AC_LANG_PROGRAM], + [echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([lt_ar_try]) + if test "$ac_status" -eq 0; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + AC_TRY_EVAL([lt_ar_try]) + if test "$ac_status" -ne 0; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + ]) + ]) + +if test "x$lt_cv_ar_at_file" = xno; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi +_LT_DECL([], [archiver_list_spec], [1], + [How to feed a file listing to the archiver]) +])# _LT_PROG_AR + + +# _LT_CMD_OLD_ARCHIVE +# ------------------- +m4_defun([_LT_CMD_OLD_ARCHIVE], +[_LT_PROG_AR + +AC_CHECK_TOOL(STRIP, strip, :) +test -z "$STRIP" && STRIP=: +_LT_DECL([], [STRIP], [1], [A symbol stripping program]) + +AC_CHECK_TOOL(RANLIB, ranlib, :) +test -z "$RANLIB" && RANLIB=: +_LT_DECL([], [RANLIB], [1], + [Commands used to install an old-style archive]) + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$oldlib" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac +_LT_DECL([], [old_postinstall_cmds], [2]) +_LT_DECL([], [old_postuninstall_cmds], [2]) +_LT_TAGDECL([], [old_archive_cmds], [2], + [Commands used to build an old-style archive]) +_LT_DECL([], [lock_old_archive_extraction], [0], + [Whether to use a lock for old archive extraction]) +])# _LT_CMD_OLD_ARCHIVE + + +# _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------------------- +# Check whether the given compiler option works +AC_DEFUN([_LT_COMPILER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$3" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + fi + $RM conftest* +]) + +if test x"[$]$2" = xyes; then + m4_if([$5], , :, [$5]) +else + m4_if([$6], , :, [$6]) +fi +])# _LT_COMPILER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], []) + + +# _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------- +# Check whether the given linker option works +AC_DEFUN([_LT_LINKER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS $3" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&AS_MESSAGE_LOG_FD + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + else + $2=yes + fi + fi + $RM -r conftest* + LDFLAGS="$save_LDFLAGS" +]) + +if test x"[$]$2" = xyes; then + m4_if([$4], , :, [$4]) +else + m4_if([$5], , :, [$5]) +fi +])# _LT_LINKER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], []) + + +# LT_CMD_MAX_LEN +#--------------- +AC_DEFUN([LT_CMD_MAX_LEN], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +# find the maximum length of command line arguments +AC_MSG_CHECKING([the maximum length of command line arguments]) +AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl + i=0 + teststring="ABCD" + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + netbsd* | freebsd* | openbsd* | darwin* | dragonfly*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[ ]]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8 ; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test "X"`func_fallback_echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test $i != 17 # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac +]) +if test -n $lt_cv_sys_max_cmd_len ; then + AC_MSG_RESULT($lt_cv_sys_max_cmd_len) +else + AC_MSG_RESULT(none) +fi +max_cmd_len=$lt_cv_sys_max_cmd_len +_LT_DECL([], [max_cmd_len], [0], + [What is the maximum length of a command?]) +])# LT_CMD_MAX_LEN + +# Old name: +AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], []) + + +# _LT_HEADER_DLFCN +# ---------------- +m4_defun([_LT_HEADER_DLFCN], +[AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl +])# _LT_HEADER_DLFCN + + +# _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, +# ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) +# ---------------------------------------------------------------- +m4_defun([_LT_TRY_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test "$cross_compiling" = yes; then : + [$4] +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +[#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisbility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +}] +_LT_EOF + if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then + (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) $1 ;; + x$lt_dlneed_uscore) $2 ;; + x$lt_dlunknown|x*) $3 ;; + esac + else : + # compilation failed + $3 + fi +fi +rm -fr conftest* +])# _LT_TRY_DLOPEN_SELF + + +# LT_SYS_DLOPEN_SELF +# ------------------ +AC_DEFUN([LT_SYS_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test "x$enable_dlopen" != xyes; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen="load_add_on" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | pw32* | cegcc*) + lt_cv_dlopen="LoadLibrary" + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen="dlopen" + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[ + lt_cv_dlopen="dyld" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ]) + ;; + + *) + AC_CHECK_FUNC([shl_load], + [lt_cv_dlopen="shl_load"], + [AC_CHECK_LIB([dld], [shl_load], + [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"], + [AC_CHECK_FUNC([dlopen], + [lt_cv_dlopen="dlopen"], + [AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"], + [AC_CHECK_LIB([svld], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], + [AC_CHECK_LIB([dld], [dld_link], + [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"]) + ]) + ]) + ]) + ]) + ]) + ;; + esac + + if test "x$lt_cv_dlopen" != xno; then + enable_dlopen=yes + else + enable_dlopen=no + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS="$CPPFLAGS" + test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS="$LDFLAGS" + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS="$LIBS" + LIBS="$lt_cv_dlopen_libs $LIBS" + + AC_CACHE_CHECK([whether a program can dlopen itself], + lt_cv_dlopen_self, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, + lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) + ]) + + if test "x$lt_cv_dlopen_self" = xyes; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + AC_CACHE_CHECK([whether a statically linked program can dlopen itself], + lt_cv_dlopen_self_static, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, + lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) + ]) + fi + + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + LIBS="$save_LIBS" + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi +_LT_DECL([dlopen_support], [enable_dlopen], [0], + [Whether dlopen is supported]) +_LT_DECL([dlopen_self], [enable_dlopen_self], [0], + [Whether dlopen of programs is supported]) +_LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0], + [Whether dlopen of statically linked programs is supported]) +])# LT_SYS_DLOPEN_SELF + +# Old name: +AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], []) + + +# _LT_COMPILER_C_O([TAGNAME]) +# --------------------------- +# Check to see if options -c and -o are simultaneously supported by compiler. +# This macro does not hard code the compiler like AC_PROG_CC_C_O. +m4_defun([_LT_COMPILER_C_O], +[m4_require([_LT_DECL_SED])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + fi + fi + chmod u+w . 2>&AS_MESSAGE_LOG_FD + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* +]) +_LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1], + [Does compiler simultaneously support -c and -o options?]) +])# _LT_COMPILER_C_O + + +# _LT_COMPILER_FILE_LOCKS([TAGNAME]) +# ---------------------------------- +# Check to see if we can do hard links to lock some files if needed +m4_defun([_LT_COMPILER_FILE_LOCKS], +[m4_require([_LT_ENABLE_LOCK])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_COMPILER_C_O([$1]) + +hard_links="nottested" +if test "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" = no && test "$need_locks" != no; then + # do not overwrite the value of need_locks provided by the user + AC_MSG_CHECKING([if we can lock with hard links]) + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + AC_MSG_RESULT([$hard_links]) + if test "$hard_links" = no; then + AC_MSG_WARN([`$CC' does not support `-c -o', so `make -j' may be unsafe]) + need_locks=warn + fi +else + need_locks=no +fi +_LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?]) +])# _LT_COMPILER_FILE_LOCKS + + +# _LT_CHECK_OBJDIR +# ---------------- +m4_defun([_LT_CHECK_OBJDIR], +[AC_CACHE_CHECK([for objdir], [lt_cv_objdir], +[rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null]) +objdir=$lt_cv_objdir +_LT_DECL([], [objdir], [0], + [The name of the directory that contains temporary libtool files])dnl +m4_pattern_allow([LT_OBJDIR])dnl +AC_DEFINE_UNQUOTED(LT_OBJDIR, "$lt_cv_objdir/", + [Define to the sub-directory in which libtool stores uninstalled libraries.]) +])# _LT_CHECK_OBJDIR + + +# _LT_LINKER_HARDCODE_LIBPATH([TAGNAME]) +# -------------------------------------- +# Check hardcoding attributes. +m4_defun([_LT_LINKER_HARDCODE_LIBPATH], +[AC_MSG_CHECKING([how to hardcode library paths into programs]) +_LT_TAGVAR(hardcode_action, $1)= +if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" || + test -n "$_LT_TAGVAR(runpath_var, $1)" || + test "X$_LT_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then + + # We can hardcode non-existent directories. + if test "$_LT_TAGVAR(hardcode_direct, $1)" != no && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" != no && + test "$_LT_TAGVAR(hardcode_minus_L, $1)" != no; then + # Linking always hardcodes the temporary library directory. + _LT_TAGVAR(hardcode_action, $1)=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + _LT_TAGVAR(hardcode_action, $1)=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + _LT_TAGVAR(hardcode_action, $1)=unsupported +fi +AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)]) + +if test "$_LT_TAGVAR(hardcode_action, $1)" = relink || + test "$_LT_TAGVAR(inherit_rpath, $1)" = yes; then + # Fast installation is not supported + enable_fast_install=no +elif test "$shlibpath_overrides_runpath" = yes || + test "$enable_shared" = no; then + # Fast installation is not necessary + enable_fast_install=needless +fi +_LT_TAGDECL([], [hardcode_action], [0], + [How to hardcode a shared library path into an executable]) +])# _LT_LINKER_HARDCODE_LIBPATH + + +# _LT_CMD_STRIPLIB +# ---------------- +m4_defun([_LT_CMD_STRIPLIB], +[m4_require([_LT_DECL_EGREP]) +striplib= +old_striplib= +AC_MSG_CHECKING([whether stripping libraries is possible]) +if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" + test -z "$striplib" && striplib="$STRIP --strip-unneeded" + AC_MSG_RESULT([yes]) +else +# FIXME - insert some real tests, host_os isn't really good enough + case $host_os in + darwin*) + if test -n "$STRIP" ; then + striplib="$STRIP -x" + old_striplib="$STRIP -S" + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + ;; + *) + AC_MSG_RESULT([no]) + ;; + esac +fi +_LT_DECL([], [old_striplib], [1], [Commands to strip libraries]) +_LT_DECL([], [striplib], [1]) +])# _LT_CMD_STRIPLIB + + +# _LT_SYS_DYNAMIC_LINKER([TAG]) +# ----------------------------- +# PORTME Fill in your ld.so characteristics +m4_defun([_LT_SYS_DYNAMIC_LINKER], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_OBJDUMP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +AC_MSG_CHECKING([dynamic linker characteristics]) +m4_if([$1], + [], [ +if test "$GCC" = yes; then + case $host_os in + darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; + *) lt_awk_arg="/^libraries:/" ;; + esac + case $host_os in + mingw* | cegcc*) lt_sed_strip_eq="s,=\([[A-Za-z]]:\),\1,g" ;; + *) lt_sed_strip_eq="s,=/,/,g" ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary. + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path/$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" + else + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS=" "; FS="/|\n";} { + lt_foo=""; + lt_count=0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo="/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[[lt_foo]]++; } + if (lt_freq[[lt_foo]] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's,/\([[A-Za-z]]:\),\1,g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi]) +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=".so" +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + +case $host_os in +aix3*) + version_type=linux + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='${libname}${release}${shared_ext}$major' + ;; + +aix[[4-9]]*) + version_type=linux + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test "$host_cpu" = ia64; then + # AIX 5 supports IA64 + library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line `#! .'. This would cause the generated library to + # depend on `.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[[01]] | aix4.[[01]].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # AIX (on Power*) has no versioning support, so currently we can not hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + if test "$aix_use_runtimelinking" = yes; then + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + else + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='${libname}${release}.a $libname.a' + soname_spec='${libname}${release}${shared_ext}$major' + fi + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='${libname}${shared_ext}' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[[45]]*) + version_type=linux + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=".dll" + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl*) + # Native MSVC + libname_spec='$name' + soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' + library_names_spec='${libname}.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec="$LIB" + if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC wrapper + library_names_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext} $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' + soname_spec='${libname}${release}${major}$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd1*) + dynamic_linker=no + ;; + +freebsd* | dragonfly*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[[123]]*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[[01]]* | freebsdelf3.[[01]]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \ + freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +gnu*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + hardcode_into_libs=yes + ;; + +haiku*) + version_type=linux + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=yes + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + if test "X$HPUX_IA64_MODE" = X32; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + fi + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[[3-9]]*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test "$lt_cv_prog_gnu_ld" = yes; then + version_type=linux + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" + sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +# This must be Linux ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath], + [lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \ + LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\"" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null], + [lt_cv_shlibpath_overrides_runpath=yes])]) + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + ]) + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Append ld.so.conf contents to the search path + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd*) + version_type=sunos + sys_lib_dlsearch_path_spec="/usr/lib" + need_lib_prefix=no + # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. + case $host_os in + openbsd3.3 | openbsd3.3.*) need_version=yes ;; + *) need_version=no ;; + esac + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + case $host_os in + openbsd2.[[89]] | openbsd2.[[89]].*) + shlibpath_overrides_runpath=no + ;; + *) + shlibpath_overrides_runpath=yes + ;; + esac + else + shlibpath_overrides_runpath=yes + fi + ;; + +os2*) + libname_spec='$name' + shrext_cmds=".dll" + need_lib_prefix=no + library_names_spec='$libname${shared_ext} $libname.a' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=LIBPATH + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test "$with_gnu_ld" = yes; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec ;then + version_type=linux + library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' + soname_spec='$libname${shared_ext}.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=freebsd-elf + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test "$with_gnu_ld" = yes; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +AC_MSG_RESULT([$dynamic_linker]) +test "$dynamic_linker" = no && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test "$GCC" = yes; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then + sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" +fi +if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then + sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" +fi + +_LT_DECL([], [variables_saved_for_relink], [1], + [Variables whose values should be saved in libtool wrapper scripts and + restored at link time]) +_LT_DECL([], [need_lib_prefix], [0], + [Do we need the "lib" prefix for modules?]) +_LT_DECL([], [need_version], [0], [Do we need a version for libraries?]) +_LT_DECL([], [version_type], [0], [Library versioning type]) +_LT_DECL([], [runpath_var], [0], [Shared library runtime path variable]) +_LT_DECL([], [shlibpath_var], [0],[Shared library path variable]) +_LT_DECL([], [shlibpath_overrides_runpath], [0], + [Is shlibpath searched before the hard-coded library search path?]) +_LT_DECL([], [libname_spec], [1], [Format of library name prefix]) +_LT_DECL([], [library_names_spec], [1], + [[List of archive names. First name is the real one, the rest are links. + The last name is the one that the linker finds with -lNAME]]) +_LT_DECL([], [soname_spec], [1], + [[The coded name of the library, if different from the real name]]) +_LT_DECL([], [install_override_mode], [1], + [Permission mode override for installation of shared libraries]) +_LT_DECL([], [postinstall_cmds], [2], + [Command to use after installation of a shared archive]) +_LT_DECL([], [postuninstall_cmds], [2], + [Command to use after uninstallation of a shared archive]) +_LT_DECL([], [finish_cmds], [2], + [Commands used to finish a libtool library installation in a directory]) +_LT_DECL([], [finish_eval], [1], + [[As "finish_cmds", except a single script fragment to be evaled but + not shown]]) +_LT_DECL([], [hardcode_into_libs], [0], + [Whether we should hardcode library paths into libraries]) +_LT_DECL([], [sys_lib_search_path_spec], [2], + [Compile-time system search path for libraries]) +_LT_DECL([], [sys_lib_dlsearch_path_spec], [2], + [Run-time system search path for libraries]) +])# _LT_SYS_DYNAMIC_LINKER + + +# _LT_PATH_TOOL_PREFIX(TOOL) +# -------------------------- +# find a file program which can recognize shared library +AC_DEFUN([_LT_PATH_TOOL_PREFIX], +[m4_require([_LT_DECL_EGREP])dnl +AC_MSG_CHECKING([for $1]) +AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, +[case $MAGIC_CMD in +[[\\/*] | ?:[\\/]*]) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD="$MAGIC_CMD" + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR +dnl $ac_dummy forces splitting on constant user-supplied paths. +dnl POSIX.2 word splitting is done only on the output of word expansions, +dnl not every word. This closes a longstanding sh security hole. + ac_dummy="m4_if([$2], , $PATH, [$2])" + for ac_dir in $ac_dummy; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$1; then + lt_cv_path_MAGIC_CMD="$ac_dir/$1" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD="$lt_cv_path_MAGIC_CMD" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS="$lt_save_ifs" + MAGIC_CMD="$lt_save_MAGIC_CMD" + ;; +esac]) +MAGIC_CMD="$lt_cv_path_MAGIC_CMD" +if test -n "$MAGIC_CMD"; then + AC_MSG_RESULT($MAGIC_CMD) +else + AC_MSG_RESULT(no) +fi +_LT_DECL([], [MAGIC_CMD], [0], + [Used to examine libraries when file_magic_cmd begins with "file"])dnl +])# _LT_PATH_TOOL_PREFIX + +# Old name: +AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], []) + + +# _LT_PATH_MAGIC +# -------------- +# find a file program which can recognize a shared library +m4_defun([_LT_PATH_MAGIC], +[_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH) + else + MAGIC_CMD=: + fi +fi +])# _LT_PATH_MAGIC + + +# LT_PATH_LD +# ---------- +# find the pathname to the GNU or non-GNU linker +AC_DEFUN([LT_PATH_LD], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PROG_ECHO_BACKSLASH])dnl + +AC_ARG_WITH([gnu-ld], + [AS_HELP_STRING([--with-gnu-ld], + [assume the C compiler uses GNU ld @<:@default=no@:>@])], + [test "$withval" = no || with_gnu_ld=yes], + [with_gnu_ld=no])dnl + +ac_prog=ld +if test "$GCC" = yes; then + # Check if gcc -print-prog-name=ld gives a path. + AC_MSG_CHECKING([for ld used by $CC]) + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [[\\/]]* | ?:[[\\/]]*) + re_direlt='/[[^/]][[^/]]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD="$ac_prog" + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test "$with_gnu_ld" = yes; then + AC_MSG_CHECKING([for GNU ld]) +else + AC_MSG_CHECKING([for non-GNU ld]) +fi +AC_CACHE_VAL(lt_cv_path_LD, +[if test -z "$LD"; then + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD="$ac_dir/$ac_prog" + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &1 /dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=/usr/bin/file + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'] + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[[3-9]]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be Linux ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +esac +]) + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + +_LT_DECL([], [deplibs_check_method], [1], + [Method to check whether dependent libraries are shared objects]) +_LT_DECL([], [file_magic_cmd], [1], + [Command to use when deplibs_check_method = "file_magic"]) +_LT_DECL([], [file_magic_glob], [1], + [How to find potential files when deplibs_check_method = "file_magic"]) +_LT_DECL([], [want_nocaseglob], [1], + [Find potential files using nocaseglob when deplibs_check_method = "file_magic"]) +])# _LT_CHECK_MAGIC_METHOD + + +# LT_PATH_NM +# ---------- +# find the pathname to a BSD- or MS-compatible name lister +AC_DEFUN([LT_PATH_NM], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM, +[if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM="$NM" +else + lt_nm_to_check="${ac_tool_prefix}nm" + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + tmp_nm="$ac_dir/$lt_tmp_nm" + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the `sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in + */dev/null* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS="$lt_save_ifs" + done + : ${lt_cv_path_NM=no} +fi]) +if test "$lt_cv_path_NM" != "no"; then + NM="$lt_cv_path_NM" +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :) + case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols" + ;; + *) + DUMPBIN=: + ;; + esac + fi + AC_SUBST([DUMPBIN]) + if test "$DUMPBIN" != ":"; then + NM="$DUMPBIN" + fi +fi +test -z "$NM" && NM=nm +AC_SUBST([NM]) +_LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl + +AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface], + [lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD) + cat conftest.out >&AS_MESSAGE_LOG_FD + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest*]) +])# LT_PATH_NM + +# Old names: +AU_ALIAS([AM_PROG_NM], [LT_PATH_NM]) +AU_ALIAS([AC_PROG_NM], [LT_PATH_NM]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_PROG_NM], []) +dnl AC_DEFUN([AC_PROG_NM], []) + +# _LT_CHECK_SHAREDLIB_FROM_LINKLIB +# -------------------------------- +# how to determine the name of the shared library +# associated with a specific link library. +# -- PORTME fill in with the dynamic library characteristics +m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB], +[m4_require([_LT_DECL_EGREP]) +m4_require([_LT_DECL_OBJDUMP]) +m4_require([_LT_DECL_DLLTOOL]) +AC_CACHE_CHECK([how to associate runtime and link libraries], +lt_cv_sharedlib_from_linklib_cmd, +[lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh + # decide which to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd="$ECHO" + ;; +esac +]) +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + +_LT_DECL([], [sharedlib_from_linklib_cmd], [1], + [Command to associate shared and link libraries]) +])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB + + +# _LT_PATH_MANIFEST_TOOL +# ---------------------- +# locate the manifest tool +m4_defun([_LT_PATH_MANIFEST_TOOL], +[AC_CHECK_TOOL(MANIFEST_TOOL, mt, :) +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool], + [lt_cv_path_mainfest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&AS_MESSAGE_LOG_FD + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_mainfest_tool=yes + fi + rm -f conftest*]) +if test "x$lt_cv_path_mainfest_tool" != xyes; then + MANIFEST_TOOL=: +fi +_LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl +])# _LT_PATH_MANIFEST_TOOL + + +# LT_LIB_M +# -------- +# check for math library +AC_DEFUN([LT_LIB_M], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +LIBM= +case $host in +*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) + # These system don't have libm, or don't need it + ;; +*-ncr-sysv4.3*) + AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw") + AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm") + ;; +*) + AC_CHECK_LIB(m, cos, LIBM="-lm") + ;; +esac +AC_SUBST([LIBM]) +])# LT_LIB_M + +# Old name: +AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_CHECK_LIBM], []) + + +# _LT_COMPILER_NO_RTTI([TAGNAME]) +# ------------------------------- +m4_defun([_LT_COMPILER_NO_RTTI], +[m4_require([_LT_TAG_COMPILER])dnl + +_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + +if test "$GCC" = yes; then + case $cc_basename in + nvcc*) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;; + *) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;; + esac + + _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions], + lt_cv_prog_compiler_rtti_exceptions, + [-fno-rtti -fno-exceptions], [], + [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"]) +fi +_LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1], + [Compiler flag to turn off builtin functions]) +])# _LT_COMPILER_NO_RTTI + + +# _LT_CMD_GLOBAL_SYMBOLS +# ---------------------- +m4_defun([_LT_CMD_GLOBAL_SYMBOLS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([LT_PATH_NM])dnl +AC_REQUIRE([LT_PATH_LD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_TAG_COMPILER])dnl + +# Check for command to grab the raw symbol name followed by C symbol from nm. +AC_MSG_CHECKING([command to parse $NM output from $compiler object]) +AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], +[ +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[[BCDEGRST]]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[[BCDT]]' + ;; +cygwin* | mingw* | pw32* | cegcc*) + symcode='[[ABCDGISTW]]' + ;; +hpux*) + if test "$host_cpu" = ia64; then + symcode='[[ABCDEGRST]]' + fi + ;; +irix* | nonstopux*) + symcode='[[BCDEGRST]]' + ;; +osf*) + symcode='[[BCDEGQRST]]' + ;; +solaris*) + symcode='[[BDRT]]' + ;; +sco3.2v5*) + symcode='[[DT]]' + ;; +sysv4.2uw2*) + symcode='[[DT]]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[[ABDT]]' + ;; +sysv4) + symcode='[[DFNSTU]]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[[ABCDGIRSTW]]' ;; +esac + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p'" +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \(lib[[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"lib\2\", (void *) \&\2},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function + # and D for any global variable. + # Also find C++ and __fastcall symbols from MSVC++, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK ['"\ +" {last_section=section; section=\$ 3};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\ +" {split(\$ 0, a, /\||\r/); split(a[2], s)};"\ +" s[1]~/^[@?]/{print s[1], s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx]" + else + lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if AC_TRY_EVAL(ac_compile); then + # Now try to grab the symbols. + nlist=conftest.nm + if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) +/* DATA imports from DLLs on WIN32 con't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT@&t@_DLSYM_CONST +#elif defined(__osf__) +/* This system does not cope well with relocations in const data. */ +# define LT@&t@_DLSYM_CONST +#else +# define LT@&t@_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT@&t@_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[[]] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS="conftstm.$ac_objext" + CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)" + if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD + fi + else + echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test "$pipe_works" = yes; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done +]) +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + AC_MSG_RESULT(failed) +else + AC_MSG_RESULT(ok) +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + +_LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1], + [Take the output of nm and produce a listing of raw symbols and C names]) +_LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1], + [Transform the output of nm in a proper C declaration]) +_LT_DECL([global_symbol_to_c_name_address], + [lt_cv_sys_global_symbol_to_c_name_address], [1], + [Transform the output of nm in a C name address pair]) +_LT_DECL([global_symbol_to_c_name_address_lib_prefix], + [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1], + [Transform the output of nm in a C name address pair when lib prefix is needed]) +_LT_DECL([], [nm_file_list_spec], [1], + [Specify filename containing input files for $NM]) +]) # _LT_CMD_GLOBAL_SYMBOLS + + +# _LT_COMPILER_PIC([TAGNAME]) +# --------------------------- +m4_defun([_LT_COMPILER_PIC], +[m4_require([_LT_TAG_COMPILER])dnl +_LT_TAGVAR(lt_prog_compiler_wl, $1)= +_LT_TAGVAR(lt_prog_compiler_pic, $1)= +_LT_TAGVAR(lt_prog_compiler_static, $1)= + +m4_if([$1], [CXX], [ + # C++ specific cases for pic, static, wl, etc. + if test "$GXX" = yes; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + *djgpp*) + # DJGPP does not support shared libraries at all + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + else + case $host_os in + aix[[4-9]]*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + chorus*) + case $cc_basename in + cxch68*) + # Green Hills C++ Compiler + # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" + ;; + esac + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + dgux*) + case $cc_basename in + ec++*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + ghcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + freebsd* | dragonfly*) + # FreeBSD uses GNU C++ + ;; + hpux9* | hpux10* | hpux11*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' + if test "$host_cpu" != ia64; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + fi + ;; + aCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + ;; + *) + ;; + esac + ;; + interix*) + # This is c89, which is MS Visual C++ (no shared libs) + # Anyone wants to do a port? + ;; + irix5* | irix6* | nonstopux*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + # CC pic flag -KPIC is the default. + ;; + *) + ;; + esac + ;; + linux* | k*bsd*-gnu | kopensolaris*-gnu) + case $cc_basename in + KCC*) + # KAI C++ Compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + ecpc* ) + # old Intel C++ for x86_64 which still supported -KPIC. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + icpc* ) + # Intel C++, used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + cxx*) + # Compaq C++ + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*) + # IBM XL 8.0, 9.0 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + esac + ;; + esac + ;; + lynxos*) + ;; + m88k*) + ;; + mvs*) + case $cc_basename in + cxx*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall' + ;; + *) + ;; + esac + ;; + netbsd*) + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + ;; + RCC*) + # Rational C++ 2.4.1 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + cxx*) + # Digital/Compaq C++ + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + *) + ;; + esac + ;; + psos*) + ;; + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + ;; + *) + ;; + esac + ;; + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + lcc*) + # Lucid + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + *) + ;; + esac + ;; + vxworks*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +], +[ + if test "$GCC" = yes; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker ' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Xcompiler -fPIC' + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + + hpux9* | hpux10* | hpux11*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC (with -KPIC) is the default. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu) + case $cc_basename in + # old Intel for x86_64 which still supported -KPIC. + ecc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared' + _LT_TAGVAR(lt_prog_compiler_static, $1)='--static' + ;; + nagfor*) + # NAG Fortran compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + ccc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All Alpha code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ F* | *Sun*Fortran*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='' + ;; + *Sun\ C*) + # Sun C 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + ;; + esac + ;; + esac + ;; + + newsos6) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All OSF/1 code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + rdos*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + solaris*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';; + *) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';; + esac + ;; + + sunos4*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec ;then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + unicos*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + + uts4*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +]) +case $host_os in + # For platforms which do not support PIC, -DPIC is meaningless: + *djgpp*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])" + ;; +esac + +AC_CACHE_CHECK([for $compiler option to produce PIC], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)]) +_LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1) + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then + _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works], + [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)], + [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [], + [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in + "" | " "*) ;; + *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;; + esac], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no]) +fi +_LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1], + [Additional compiler flags for building library objects]) + +_LT_TAGDECL([wl], [lt_prog_compiler_wl], [1], + [How to pass a linker flag through the compiler]) +# +# Check to make sure the static flag actually works. +# +wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\" +_LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], + _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1), + $lt_tmp_static_flag, + [], + [_LT_TAGVAR(lt_prog_compiler_static, $1)=]) +_LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1], + [Compiler flag to prevent dynamic linking]) +])# _LT_COMPILER_PIC + + +# _LT_LINKER_SHLIBS([TAGNAME]) +# ---------------------------- +# See if the linker supports building shared libraries. +m4_defun([_LT_LINKER_SHLIBS], +[AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) +m4_if([$1], [CXX], [ + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + case $host_os in + aix[[4-9]]*) + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to AIX nm, but means don't demangle with GNU nm + # Also, AIX nm treats weak defined symbols like other global defined + # symbols, whereas GNU nm marks them as "W". + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + fi + ;; + pw32*) + _LT_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds" + ;; + cygwin* | mingw* | cegcc*) + case $cc_basename in + cl*) ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + ;; + esac + ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + ;; + esac +], [ + runpath_var= + _LT_TAGVAR(allow_undefined_flag, $1)= + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(archive_cmds, $1)= + _LT_TAGVAR(archive_expsym_cmds, $1)= + _LT_TAGVAR(compiler_needs_object, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(hardcode_automatic, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)= + _LT_TAGVAR(hardcode_libdir_separator, $1)= + _LT_TAGVAR(hardcode_minus_L, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + _LT_TAGVAR(inherit_rpath, $1)=no + _LT_TAGVAR(link_all_deplibs, $1)=unknown + _LT_TAGVAR(module_cmds, $1)= + _LT_TAGVAR(module_expsym_cmds, $1)= + _LT_TAGVAR(old_archive_from_new_cmds, $1)= + _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)= + _LT_TAGVAR(thread_safe_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + _LT_TAGVAR(include_expsyms, $1)= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ` (' and `)$', so one must not match beginning or + # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', + # as well as any symbol that contains `d'. + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. +dnl Note also adjust exclude_expsyms for C++ above. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + if test "$GCC" != yes; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++) + with_gnu_ld=yes + ;; + openbsd*) + with_gnu_ld=no + ;; + esac + + _LT_TAGVAR(ld_shlibs, $1)=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test "$with_gnu_ld" = yes; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;; + *\ \(GNU\ Binutils\)\ [[3-9]]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test "$lt_use_gnu_ld_interface" = yes; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='${wl}' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + supports_anon_versioning=no + case `$LD -v 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[[3-9]]*) + # On AIX/PPC, the GNU linker is very broken + if test "$host_cpu" != ia64; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file (1st line + # is EXPORTS), use it as is; otherwise, prepend... + _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test "$host_os" = linux-dietlibc; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test "$tmp_diet" = no + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + _LT_TAGVAR(whole_archive_flag_spec, $1)= + tmp_sharedflag='--shared' ;; + xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + ;; + esac + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) # Sun C 5.9 + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + + if test "x$supports_anon_versioning" = xyes; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='-rpath $libdir' + _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test "x$supports_anon_versioning" = xyes; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + sunos4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + + if test "$_LT_TAGVAR(ld_shlibs, $1)" = no; then + runpath_var= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + _LT_TAGVAR(hardcode_direct, $1)=unsupported + fi + ;; + + aix[[4-9]]*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to AIX nm, but means don't demangle with GNU nm + # Also, AIX nm treats weak defined symbols like other global + # defined symbols, whereas GNU nm marks them as "W". + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then + aix_use_runtimelinking=yes + break + fi + done + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='${wl}-f,' + + if test "$GCC" = yes; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + ;; + esac + shared_flag='-shared' + if test "$aix_use_runtimelinking" = yes; then + shared_flag="$shared_flag "'${wl}-G' + fi + else + # not using gcc + if test "$host_cpu" = ia64; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(allow_undefined_flag, $1)='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" + else + if test "$host_cpu" = ia64; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' + if test "$with_gnu_ld" = yes; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + # This is similar to how AIX traditionally builds its shared libraries. + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + bsdi[[45]]*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl*) + # Native MSVC + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; + else + sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile="$lt_outputfile.exe" + lt_tool_outputfile="$lt_tool_outputfile.exe" + ;; + esac~ + if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC wrapper + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + # FIXME: Should let the user specify the lib program. + _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + ;; + esac + ;; + + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + dgux*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + freebsd1*) + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + hpux9*) + if test "$GCC" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + ;; + + hpux10*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test "$with_gnu_ld" = no; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='+b $libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + fi + ;; + + hpux11*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + m4_if($1, [], [ + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + _LT_LINKER_OPTION([if $CC understands -b], + _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'], + [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags']) + ;; + esac + fi + if test "$with_gnu_ld" = no; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test "$GCC" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol], + [lt_cv_irix_exported_symbol], + [save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null" + AC_LINK_IFELSE( + [AC_LANG_SOURCE( + [AC_LANG_CASE([C], [[int foo (void) { return 0; }]], + [C++], [[int foo (void) { return 0; }]], + [Fortran 77], [[ + subroutine foo + end]], + [Fortran], [[ + subroutine foo + end]])])], + [lt_cv_irix_exported_symbol=yes], + [lt_cv_irix_exported_symbol=no]) + LDFLAGS="$save_LDFLAGS"]) + if test "$lt_cv_irix_exported_symbol" = yes; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib' + fi + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + newsos6) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *nto* | *qnx*) + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + else + case $host_os in + openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + ;; + esac + fi + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' + ;; + + osf3*) + if test "$GCC" = yes; then + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test "$GCC" = yes; then + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + solaris*) + _LT_TAGVAR(no_undefined_flag, $1)=' -z defs' + if test "$GCC" = yes; then + wlarc='${wl}' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + _LT_TAGVAR(archive_cmds, $1)='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='${wl}' + _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands `-z linker_flag'. GCC discards it without `$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test "$GCC" = yes; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + fi + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + sunos4*) + if test "x$host_vendor" = xsequent; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4) + case $host_vendor in + sni) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs' + _LT_TAGVAR(hardcode_direct, $1)=no + ;; + motorola) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4.3*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + _LT_TAGVAR(ld_shlibs, $1)=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We can NOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + if test x$host_vendor = xsni; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Blargedynsym' + ;; + esac + fi + fi +]) +AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) +test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no + +_LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld + +_LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl +_LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl +_LT_DECL([], [extract_expsyms_cmds], [2], + [The commands to extract the exported symbol list from a shared archive]) + +# +# Do we need to explicitly link libc? +# +case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in +x|xyes) + # Assume -lc should be added + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + + if test "$enable_shared" = yes && test "$GCC" = yes; then + case $_LT_TAGVAR(archive_cmds, $1) in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + AC_CACHE_CHECK([whether -lc should be explicitly linked in], + [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1), + [$RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if AC_TRY_EVAL(ac_compile) 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) + pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1) + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1) + _LT_TAGVAR(allow_undefined_flag, $1)= + if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) + then + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no + else + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes + fi + _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + ]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1) + ;; + esac + fi + ;; +esac + +_LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0], + [Whether or not to add -lc for building shared libraries]) +_LT_TAGDECL([allow_libtool_libs_with_static_runtimes], + [enable_shared_with_static_runtimes], [0], + [Whether or not to disallow shared libs when runtime libs are static]) +_LT_TAGDECL([], [export_dynamic_flag_spec], [1], + [Compiler flag to allow reflexive dlopens]) +_LT_TAGDECL([], [whole_archive_flag_spec], [1], + [Compiler flag to generate shared objects directly from archives]) +_LT_TAGDECL([], [compiler_needs_object], [1], + [Whether the compiler copes with passing no objects directly]) +_LT_TAGDECL([], [old_archive_from_new_cmds], [2], + [Create an old-style archive from a shared archive]) +_LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2], + [Create a temporary old-style archive to link instead of a shared archive]) +_LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive]) +_LT_TAGDECL([], [archive_expsym_cmds], [2]) +_LT_TAGDECL([], [module_cmds], [2], + [Commands used to build a loadable module if different from building + a shared archive.]) +_LT_TAGDECL([], [module_expsym_cmds], [2]) +_LT_TAGDECL([], [with_gnu_ld], [1], + [Whether we are building with GNU ld or not]) +_LT_TAGDECL([], [allow_undefined_flag], [1], + [Flag that allows shared libraries with undefined symbols to be built]) +_LT_TAGDECL([], [no_undefined_flag], [1], + [Flag that enforces no undefined symbols]) +_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1], + [Flag to hardcode $libdir into a binary during linking. + This must work even if $libdir does not exist]) +_LT_TAGDECL([], [hardcode_libdir_flag_spec_ld], [1], + [[If ld is used when linking, flag to hardcode $libdir into a binary + during linking. This must work even if $libdir does not exist]]) +_LT_TAGDECL([], [hardcode_libdir_separator], [1], + [Whether we need a single "-rpath" flag with a separated argument]) +_LT_TAGDECL([], [hardcode_direct], [0], + [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes + DIR into the resulting binary]) +_LT_TAGDECL([], [hardcode_direct_absolute], [0], + [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes + DIR into the resulting binary and the resulting library dependency is + "absolute", i.e impossible to change by setting ${shlibpath_var} if the + library is relocated]) +_LT_TAGDECL([], [hardcode_minus_L], [0], + [Set to "yes" if using the -LDIR flag during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_shlibpath_var], [0], + [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_automatic], [0], + [Set to "yes" if building a shared library automatically hardcodes DIR + into the library and all subsequent libraries and executables linked + against it]) +_LT_TAGDECL([], [inherit_rpath], [0], + [Set to yes if linker adds runtime paths of dependent libraries + to runtime path list]) +_LT_TAGDECL([], [link_all_deplibs], [0], + [Whether libtool must link a program against all its dependency libraries]) +_LT_TAGDECL([], [always_export_symbols], [0], + [Set to "yes" if exported symbols are required]) +_LT_TAGDECL([], [export_symbols_cmds], [2], + [The commands to list exported symbols]) +_LT_TAGDECL([], [exclude_expsyms], [1], + [Symbols that should not be listed in the preloaded symbols]) +_LT_TAGDECL([], [include_expsyms], [1], + [Symbols that must always be exported]) +_LT_TAGDECL([], [prelink_cmds], [2], + [Commands necessary for linking programs (against libraries) with templates]) +_LT_TAGDECL([], [postlink_cmds], [2], + [Commands necessary for finishing linking programs]) +_LT_TAGDECL([], [file_list_spec], [1], + [Specify filename containing input files]) +dnl FIXME: Not yet implemented +dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1], +dnl [Compiler flag to generate thread safe objects]) +])# _LT_LINKER_SHLIBS + + +# _LT_LANG_C_CONFIG([TAG]) +# ------------------------ +# Ensure that the configuration variables for a C compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to `libtool'. +m4_defun([_LT_LANG_C_CONFIG], +[m4_require([_LT_DECL_EGREP])dnl +lt_save_CC="$CC" +AC_LANG_PUSH(C) + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + +_LT_TAG_COMPILER +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + LT_SYS_DLOPEN_SELF + _LT_CMD_STRIPLIB + + # Report which library types will actually be built + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test "$can_build_shared" = "no" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[[4-9]]*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test "$enable_shared" = yes || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_CONFIG($1) +fi +AC_LANG_POP +CC="$lt_save_CC" +])# _LT_LANG_C_CONFIG + + +# _LT_LANG_CXX_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a C++ compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to `libtool'. +m4_defun([_LT_LANG_CXX_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +if test -n "$CXX" && ( test "X$CXX" != "Xno" && + ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) || + (test "X$CXX" != "Xg++"))) ; then + AC_PROG_CXXCPP +else + _lt_caught_CXX_error=yes +fi + +AC_LANG_PUSH(C++) +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(compiler_needs_object, $1)=no +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for C++ test sources. +ac_ext=cpp + +# Object file extension for compiled C++ test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the CXX compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test "$_lt_caught_CXX_error" != yes; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="int some_variable = 0;" + + # Code to be used in simple link tests + lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_CFLAGS=$CFLAGS + lt_save_LD=$LD + lt_save_GCC=$GCC + GCC=$GXX + lt_save_with_gnu_ld=$with_gnu_ld + lt_save_path_LD=$lt_cv_path_LD + if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then + lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx + else + $as_unset lt_cv_prog_gnu_ld + fi + if test -n "${lt_cv_path_LDCXX+set}"; then + lt_cv_path_LD=$lt_cv_path_LDCXX + else + $as_unset lt_cv_path_LD + fi + test -z "${LDCXX+set}" || LD=$LDCXX + CC=${CXX-"c++"} + CFLAGS=$CXXFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + # We don't want -fno-exception when compiling C++ code, so set the + # no_builtin_flag separately + if test "$GXX" = yes; then + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' + else + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + fi + + if test "$GXX" = yes; then + # Set up default GNU C++ configuration + + LT_PATH_LD + + # Check if GNU C++ uses GNU ld as the underlying linker, since the + # archiving commands below assume that GNU ld is being used. + if test "$with_gnu_ld" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + + # If archive_cmds runs LD, not CC, wlarc should be empty + # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to + # investigate it a little bit more. (MM) + wlarc='${wl}' + + # ancient GNU ld didn't support --whole-archive et. al. + if eval "`$CC -print-prog-name=ld` --help 2>&1" | + $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + with_gnu_ld=no + wlarc= + + # A generic and very simple default shared library creation + # command for GNU C++ for the case where it uses the native + # linker, instead of GNU ld. If possible, this setting should + # overridden to take advantage of the native linker features on + # the platform it is being used on. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + fi + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + + else + GXX=no + with_gnu_ld=no + wlarc= + fi + + # PORTME: fill in a description of your system's C++ link characteristics + AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) + _LT_TAGVAR(ld_shlibs, $1)=yes + case $host_os in + aix3*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aix[[4-9]]*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + case $ld_flag in + *-brtl*) + aix_use_runtimelinking=yes + break + ;; + esac + done + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='${wl}-f,' + + if test "$GXX" = yes; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + esac + shared_flag='-shared' + if test "$aix_use_runtimelinking" = yes; then + shared_flag="$shared_flag "'${wl}-G' + fi + else + # not using gcc + if test "$host_cpu" = ia64; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to + # export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(allow_undefined_flag, $1)='-berok' + # Determine the default libpath from the value encoded in an empty + # executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" + + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" + else + if test "$host_cpu" = ia64; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' + if test "$with_gnu_ld" = yes; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + # This is similar to how AIX traditionally builds its shared + # libraries. + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' + fi + fi + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + chorus*) + case $cc_basename in + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + cygwin* | mingw* | pw32* | cegcc*) + case $GXX,$cc_basename in + ,cl* | no,cl*) + # Native MSVC + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; + else + $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile="$lt_outputfile.exe" + lt_tool_outputfile="$lt_tool_outputfile.exe" + ;; + esac~ + func_to_tool_file "$lt_outputfile"~ + if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # g++ + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file (1st line + # is EXPORTS), use it as is; otherwise, prepend... + _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + dgux*) + case $cc_basename in + ec++*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + ghcx*) + # Green Hills C++ Compiler + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + freebsd[[12]]*) + # C++ shared libraries reported to be fairly broken before + # switch to ELF + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + freebsd-elf*) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + ;; + + freebsd* | dragonfly*) + # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF + # conventions + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + gnu*) + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + hpux9*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test "$GXX" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + hpux10*|hpux11*) + if test $with_gnu_ld = no; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + ;; + *) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + ;; + esac + fi + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + ;; + esac + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test "$GXX" = yes; then + if test $with_gnu_ld = no; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + fi + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + irix5* | irix6*) + case $cc_basename in + CC*) + # SGI C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + + # Archives containing C++ object files must be created using + # "CC -ar", where "CC" is the IRIX C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs' + ;; + *) + if test "$GXX" = yes; then + if test "$with_gnu_ld" = no; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib' + fi + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + esac + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + + # Archives containing C++ object files must be created using + # "CC -Bstatic", where "CC" is the KAI C++ compiler. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' + ;; + icpc* | ecpc* ) + # Intel C++ + with_gnu_ld=yes + # version 8.0 and above of icpc choke on multiply defined symbols + # if we add $predep_objects and $postdep_objects, however 7.1 and + # earlier do not add the objects themselves. + case `$CC -V 2>&1` in + *"Version 7."*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 8.0 or newer + tmp_idyn= + case $host_cpu in + ia64*) tmp_idyn=' -i_dynamic';; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + case `$CC -V` in + *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*) + _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ + compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' + _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ + $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ + $RANLIB $oldlib' + _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' + ;; + *) # Version 6 and above use weak symbols + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + ;; + cxx*) + # Compaq C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib ${wl}-retain-symbols-file $wl$export_symbols' + + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' + ;; + xl* | mpixl* | bgxl*) + # IBM XL 8.0 on PPC, with GNU ld + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + if test "x$supports_anon_versioning" = xyes; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' + fi + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + + # Not sure whether something based on + # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 + # would be better. + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + esac + ;; + esac + ;; + + lynxos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + m88k*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + mvs*) + case $cc_basename in + cxx*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + fi + # Workaround some broken pre-1.5 toolchains + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' + ;; + + *nto* | *qnx*) + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + openbsd2*) + # C++ shared libraries are fairly broken + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + fi + output_verbose_link_cmd=func_echo_all + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Archives containing C++ object files must be created using + # the KAI C++ compiler. + case $host in + osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; + *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;; + esac + ;; + RCC*) + # Rational C++ 2.4.1 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + cxx*) + case $host in + osf3*) + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + ;; + *) + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ + echo "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~ + $RM $lib.exp' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test "$GXX" = yes && test "$with_gnu_ld" = no; then + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' + case $host in + osf3*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + psos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + lcc*) + # Lucid + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(archive_cmds_need_lc,$1)=yes + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands `-z linker_flag'. + # Supported since Solaris 2.6 (maybe 2.5.1?) + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' + + # The C++ compiler must be used to create the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs' + ;; + *) + # GNU C++ compiler with Solaris linker + if test "$GXX" = yes && test "$with_gnu_ld" = no; then + _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-z ${wl}defs' + if $CC --version | $GREP -v '^2\.7' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + else + # g++ 2.7 appears to require `-G' NOT `-shared' on this + # platform. + _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + fi + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir' + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' + ;; + esac + fi + ;; + esac + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We can NOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~ + '"$_LT_TAGVAR(old_archive_cmds, $1)" + _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~ + '"$_LT_TAGVAR(reload_cmds, $1)" + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + vxworks*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) + test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no + + _LT_TAGVAR(GCC, $1)="$GXX" + _LT_TAGVAR(LD, $1)="$LD" + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS + LDCXX=$LD + LD=$lt_save_LD + GCC=$lt_save_GCC + with_gnu_ld=$lt_save_with_gnu_ld + lt_cv_path_LDCXX=$lt_cv_path_LD + lt_cv_path_LD=$lt_save_path_LD + lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld + lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld +fi # test "$_lt_caught_CXX_error" != yes + +AC_LANG_POP +])# _LT_LANG_CXX_CONFIG + + +# _LT_FUNC_STRIPNAME_CNF +# ---------------------- +# func_stripname_cnf prefix suffix name +# strip PREFIX and SUFFIX off of NAME. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +# +# This function is identical to the (non-XSI) version of func_stripname, +# except this one can be used by m4 code that may be executed by configure, +# rather than the libtool script. +m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl +AC_REQUIRE([_LT_DECL_SED]) +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH]) +func_stripname_cnf () +{ + case ${2} in + .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;; + *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;; + esac +} # func_stripname_cnf +])# _LT_FUNC_STRIPNAME_CNF + +# _LT_SYS_HIDDEN_LIBDEPS([TAGNAME]) +# --------------------------------- +# Figure out "hidden" library dependencies from verbose +# compiler output when linking a shared library. +# Parse the compiler output and extract the necessary +# objects, libraries and library flags. +m4_defun([_LT_SYS_HIDDEN_LIBDEPS], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl +# Dependencies to place before and after the object being linked: +_LT_TAGVAR(predep_objects, $1)= +_LT_TAGVAR(postdep_objects, $1)= +_LT_TAGVAR(predeps, $1)= +_LT_TAGVAR(postdeps, $1)= +_LT_TAGVAR(compiler_lib_search_path, $1)= + +dnl we can't use the lt_simple_compile_test_code here, +dnl because it contains code intended for an executable, +dnl not a library. It's possible we should let each +dnl tag define a new lt_????_link_test_code variable, +dnl but it's only used here... +m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF +int a; +void foo (void) { a = 0; } +_LT_EOF +], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF +class Foo +{ +public: + Foo (void) { a = 0; } +private: + int a; +}; +_LT_EOF +], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer*4 a + a=0 + return + end +_LT_EOF +], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer a + a=0 + return + end +_LT_EOF +], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF +public class foo { + private int a; + public void bar (void) { + a = 0; + } +}; +_LT_EOF +]) + +_lt_libdeps_save_CFLAGS=$CFLAGS +case "$CC $CFLAGS " in #( +*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; +*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; +esac + +dnl Parse the compiler output and extract the necessary +dnl objects, libraries and library flags. +if AC_TRY_EVAL(ac_compile); then + # Parse the compiler output and extract the necessary + # objects, libraries and library flags. + + # Sentinel used to keep track of whether or not we are before + # the conftest object file. + pre_test_object_deps_done=no + + for p in `eval "$output_verbose_link_cmd"`; do + case ${prev}${p} in + + -L* | -R* | -l*) + # Some compilers place space between "-{L,R}" and the path. + # Remove the space. + if test $p = "-L" || + test $p = "-R"; then + prev=$p + continue + fi + + # Expand the sysroot to ease extracting the directories later. + if test -z "$prev"; then + case $p in + -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; + -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; + -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; + esac + fi + case $p in + =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; + esac + if test "$pre_test_object_deps_done" = no; then + case ${prev} in + -L | -R) + # Internal compiler library paths should come after those + # provided the user. The postdeps already come after the + # user supplied libs so there is no need to process them. + if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then + _LT_TAGVAR(compiler_lib_search_path, $1)="${prev}${p}" + else + _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} ${prev}${p}" + fi + ;; + # The "-l" case would never come before the object being + # linked, so don't bother handling this case. + esac + else + if test -z "$_LT_TAGVAR(postdeps, $1)"; then + _LT_TAGVAR(postdeps, $1)="${prev}${p}" + else + _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} ${prev}${p}" + fi + fi + prev= + ;; + + *.lto.$objext) ;; # Ignore GCC LTO objects + *.$objext) + # This assumes that the test object file only shows up + # once in the compiler output. + if test "$p" = "conftest.$objext"; then + pre_test_object_deps_done=yes + continue + fi + + if test "$pre_test_object_deps_done" = no; then + if test -z "$_LT_TAGVAR(predep_objects, $1)"; then + _LT_TAGVAR(predep_objects, $1)="$p" + else + _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p" + fi + else + if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then + _LT_TAGVAR(postdep_objects, $1)="$p" + else + _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p" + fi + fi + ;; + + *) ;; # Ignore the rest. + + esac + done + + # Clean up. + rm -f a.out a.exe +else + echo "libtool.m4: error: problem compiling $1 test program" +fi + +$RM -f confest.$objext +CFLAGS=$_lt_libdeps_save_CFLAGS + +# PORTME: override above test on systems where it is broken +m4_if([$1], [CXX], +[case $host_os in +interix[[3-9]]*) + # Interix 3.5 installs completely hosed .la files for C++, so rather than + # hack all around it, let's just trust "g++" to DTRT. + _LT_TAGVAR(predep_objects,$1)= + _LT_TAGVAR(postdep_objects,$1)= + _LT_TAGVAR(postdeps,$1)= + ;; + +linux*) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + + # The more standards-conforming stlport4 library is + # incompatible with the Cstd library. Avoid specifying + # it if it's in CXXFLAGS. Ignore libCrun as + # -library=stlport4 depends on it. + case " $CXX $CXXFLAGS " in + *" -library=stlport4 "*) + solaris_use_stlport4=yes + ;; + esac + + if test "$solaris_use_stlport4" != yes; then + _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' + fi + ;; + esac + ;; + +solaris*) + case $cc_basename in + CC* | sunCC*) + # The more standards-conforming stlport4 library is + # incompatible with the Cstd library. Avoid specifying + # it if it's in CXXFLAGS. Ignore libCrun as + # -library=stlport4 depends on it. + case " $CXX $CXXFLAGS " in + *" -library=stlport4 "*) + solaris_use_stlport4=yes + ;; + esac + + # Adding this requires a known-good setup of shared libraries for + # Sun compiler versions before 5.6, else PIC objects from an old + # archive will be linked into the output, leading to subtle bugs. + if test "$solaris_use_stlport4" != yes; then + _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' + fi + ;; + esac + ;; +esac +]) + +case " $_LT_TAGVAR(postdeps, $1) " in +*" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; +esac + _LT_TAGVAR(compiler_lib_search_dirs, $1)= +if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then + _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | ${SED} -e 's! -L! !g' -e 's!^ !!'` +fi +_LT_TAGDECL([], [compiler_lib_search_dirs], [1], + [The directories searched by this compiler when creating a shared library]) +_LT_TAGDECL([], [predep_objects], [1], + [Dependencies to place before and after the objects being linked to + create a shared library]) +_LT_TAGDECL([], [postdep_objects], [1]) +_LT_TAGDECL([], [predeps], [1]) +_LT_TAGDECL([], [postdeps], [1]) +_LT_TAGDECL([], [compiler_lib_search_path], [1], + [The library search path used internally by the compiler when linking + a shared library]) +])# _LT_SYS_HIDDEN_LIBDEPS + + +# _LT_LANG_F77_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a Fortran 77 compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_F77_CONFIG], +[AC_LANG_PUSH(Fortran 77) +if test -z "$F77" || test "X$F77" = "Xno"; then + _lt_disable_F77=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for f77 test sources. +ac_ext=f + +# Object file extension for compiled f77 test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the F77 compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test "$_lt_disable_F77" != yes; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC="$CC" + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${F77-"f77"} + CFLAGS=$FFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + GCC=$G77 + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test "$can_build_shared" = "no" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test "$enable_shared" = yes || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)="$G77" + _LT_TAGVAR(LD, $1)="$LD" + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC="$lt_save_CC" + CFLAGS="$lt_save_CFLAGS" +fi # test "$_lt_disable_F77" != yes + +AC_LANG_POP +])# _LT_LANG_F77_CONFIG + + +# _LT_LANG_FC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for a Fortran compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_FC_CONFIG], +[AC_LANG_PUSH(Fortran) + +if test -z "$FC" || test "X$FC" = "Xno"; then + _lt_disable_FC=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for fc test sources. +ac_ext=${ac_fc_srcext-f} + +# Object file extension for compiled fc test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the FC compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test "$_lt_disable_FC" != yes; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC="$CC" + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${FC-"f95"} + CFLAGS=$FCFLAGS + compiler=$CC + GCC=$ac_cv_fc_compiler_gnu + + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test "$can_build_shared" = "no" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test "$enable_shared" = yes || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)="$ac_cv_fc_compiler_gnu" + _LT_TAGVAR(LD, $1)="$LD" + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS +fi # test "$_lt_disable_FC" != yes + +AC_LANG_POP +])# _LT_LANG_FC_CONFIG + + +# _LT_LANG_GCJ_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for the GNU Java Compiler compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_GCJ_CONFIG], +[AC_REQUIRE([LT_PROG_GCJ])dnl +AC_LANG_SAVE + +# Source file extension for Java test sources. +ac_ext=java + +# Object file extension for compiled Java test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="class foo {}" + +# Code to be used in simple link tests +lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC=yes +CC=${GCJ-"gcj"} +CFLAGS=$GCJFLAGS +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_TAGVAR(LD, $1)="$LD" +_LT_CC_BASENAME([$compiler]) + +# GCJ did not exist at the time GCC didn't implicitly link libc in. +_LT_TAGVAR(archive_cmds_need_lc, $1)=no + +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) +fi + +AC_LANG_RESTORE + +GCC=$lt_save_GCC +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_GCJ_CONFIG + + +# _LT_LANG_RC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for the Windows resource compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_RC_CONFIG], +[AC_REQUIRE([LT_PROG_RC])dnl +AC_LANG_SAVE + +# Source file extension for RC test sources. +ac_ext=rc + +# Object file extension for compiled RC test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' + +# Code to be used in simple link tests +lt_simple_link_test_code="$lt_simple_compile_test_code" + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC="$CC" +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC= +CC=${RC-"windres"} +CFLAGS= +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_CC_BASENAME([$compiler]) +_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + +if test -n "$compiler"; then + : + _LT_CONFIG($1) +fi + +GCC=$lt_save_GCC +AC_LANG_RESTORE +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_RC_CONFIG + + +# LT_PROG_GCJ +# ----------- +AC_DEFUN([LT_PROG_GCJ], +[m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ], + [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ], + [AC_CHECK_TOOL(GCJ, gcj,) + test "x${GCJFLAGS+set}" = xset || GCJFLAGS="-g -O2" + AC_SUBST(GCJFLAGS)])])[]dnl +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_GCJ], []) + + +# LT_PROG_RC +# ---------- +AC_DEFUN([LT_PROG_RC], +[AC_CHECK_TOOL(RC, windres,) +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_RC], []) + + +# _LT_DECL_EGREP +# -------------- +# If we don't have a new enough Autoconf to choose the best grep +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_EGREP], +[AC_REQUIRE([AC_PROG_EGREP])dnl +AC_REQUIRE([AC_PROG_FGREP])dnl +test -z "$GREP" && GREP=grep +_LT_DECL([], [GREP], [1], [A grep program that handles long lines]) +_LT_DECL([], [EGREP], [1], [An ERE matcher]) +_LT_DECL([], [FGREP], [1], [A literal string matcher]) +dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too +AC_SUBST([GREP]) +]) + + +# _LT_DECL_OBJDUMP +# -------------- +# If we don't have a new enough Autoconf to choose the best objdump +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_OBJDUMP], +[AC_CHECK_TOOL(OBJDUMP, objdump, false) +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [An object symbol dumper]) +AC_SUBST([OBJDUMP]) +]) + +# _LT_DECL_DLLTOOL +# ---------------- +# Ensure DLLTOOL variable is set. +m4_defun([_LT_DECL_DLLTOOL], +[AC_CHECK_TOOL(DLLTOOL, dlltool, false) +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program]) +AC_SUBST([DLLTOOL]) +]) + +# _LT_DECL_SED +# ------------ +# Check for a fully-functional sed program, that truncates +# as few characters as possible. Prefer GNU sed if found. +m4_defun([_LT_DECL_SED], +[AC_PROG_SED +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" +_LT_DECL([], [SED], [1], [A sed program that does not truncate output]) +_LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"], + [Sed that helps us avoid accidentally triggering echo(1) options like -n]) +])# _LT_DECL_SED + +m4_ifndef([AC_PROG_SED], [ +############################################################ +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_SED. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # +############################################################ + +m4_defun([AC_PROG_SED], +[AC_MSG_CHECKING([for a sed that does not truncate output]) +AC_CACHE_VAL(lt_cv_path_SED, +[# Loop through the user's path and test for sed and gsed. +# Then use that list of sed's as ones to test for truncation. +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for lt_ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then + lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" + fi + done + done +done +IFS=$as_save_IFS +lt_ac_max=0 +lt_ac_count=0 +# Add /usr/xpg4/bin/sed as it is typically found on Solaris +# along with /bin/sed that truncates output. +for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do + test ! -f $lt_ac_sed && continue + cat /dev/null > conftest.in + lt_ac_count=0 + echo $ECHO_N "0123456789$ECHO_C" >conftest.in + # Check for GNU sed and select it if it is found. + if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then + lt_cv_path_SED=$lt_ac_sed + break + fi + while true; do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo >>conftest.nl + $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break + cmp -s conftest.out conftest.nl || break + # 10000 chars as input seems more than enough + test $lt_ac_count -gt 10 && break + lt_ac_count=`expr $lt_ac_count + 1` + if test $lt_ac_count -gt $lt_ac_max; then + lt_ac_max=$lt_ac_count + lt_cv_path_SED=$lt_ac_sed + fi + done +done +]) +SED=$lt_cv_path_SED +AC_SUBST([SED]) +AC_MSG_RESULT([$SED]) +])#AC_PROG_SED +])#m4_ifndef + +# Old name: +AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_SED], []) + + +# _LT_CHECK_SHELL_FEATURES +# ------------------------ +# Find out whether the shell is Bourne or XSI compatible, +# or has some other useful features. +m4_defun([_LT_CHECK_SHELL_FEATURES], +[AC_MSG_CHECKING([whether the shell understands some XSI constructs]) +# Try some XSI features +xsi_shell=no +( _lt_dummy="a/b/c" + test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \ + = c,a/b,b/c, \ + && eval 'test $(( 1 + 1 )) -eq 2 \ + && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \ + && xsi_shell=yes +AC_MSG_RESULT([$xsi_shell]) +_LT_CONFIG_LIBTOOL_INIT([xsi_shell='$xsi_shell']) + +AC_MSG_CHECKING([whether the shell understands "+="]) +lt_shell_append=no +( foo=bar; set foo baz; eval "$[1]+=\$[2]" && test "$foo" = barbaz ) \ + >/dev/null 2>&1 \ + && lt_shell_append=yes +AC_MSG_RESULT([$lt_shell_append]) +_LT_CONFIG_LIBTOOL_INIT([lt_shell_append='$lt_shell_append']) + +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi +_LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac +_LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl +_LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl +])# _LT_CHECK_SHELL_FEATURES + + +# _LT_PROG_FUNCTION_REPLACE (FUNCNAME, REPLACEMENT-BODY) +# ------------------------------------------------------ +# In `$cfgfile', look for function FUNCNAME delimited by `^FUNCNAME ()$' and +# '^} FUNCNAME ', and replace its body with REPLACEMENT-BODY. +m4_defun([_LT_PROG_FUNCTION_REPLACE], +[dnl { +sed -e '/^$1 ()$/,/^} # $1 /c\ +$1 ()\ +{\ +m4_bpatsubsts([$2], [$], [\\], [^\([ ]\)], [\\\1]) +} # Extended-shell $1 implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: +]) + + +# _LT_PROG_REPLACE_SHELLFNS +# ------------------------- +# Replace existing portable implementations of several shell functions with +# equivalent extended shell implementations where those features are available.. +m4_defun([_LT_PROG_REPLACE_SHELLFNS], +[if test x"$xsi_shell" = xyes; then + _LT_PROG_FUNCTION_REPLACE([func_dirname], [dnl + case ${1} in + */*) func_dirname_result="${1%/*}${2}" ;; + * ) func_dirname_result="${3}" ;; + esac]) + + _LT_PROG_FUNCTION_REPLACE([func_basename], [dnl + func_basename_result="${1##*/}"]) + + _LT_PROG_FUNCTION_REPLACE([func_dirname_and_basename], [dnl + case ${1} in + */*) func_dirname_result="${1%/*}${2}" ;; + * ) func_dirname_result="${3}" ;; + esac + func_basename_result="${1##*/}"]) + + _LT_PROG_FUNCTION_REPLACE([func_stripname], [dnl + # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are + # positional parameters, so assign one to ordinary parameter first. + func_stripname_result=${3} + func_stripname_result=${func_stripname_result#"${1}"} + func_stripname_result=${func_stripname_result%"${2}"}]) + + _LT_PROG_FUNCTION_REPLACE([func_split_long_opt], [dnl + func_split_long_opt_name=${1%%=*} + func_split_long_opt_arg=${1#*=}]) + + _LT_PROG_FUNCTION_REPLACE([func_split_short_opt], [dnl + func_split_short_opt_arg=${1#??} + func_split_short_opt_name=${1%"$func_split_short_opt_arg"}]) + + _LT_PROG_FUNCTION_REPLACE([func_lo2o], [dnl + case ${1} in + *.lo) func_lo2o_result=${1%.lo}.${objext} ;; + *) func_lo2o_result=${1} ;; + esac]) + + _LT_PROG_FUNCTION_REPLACE([func_xform], [ func_xform_result=${1%.*}.lo]) + + _LT_PROG_FUNCTION_REPLACE([func_arith], [ func_arith_result=$(( $[*] ))]) + + _LT_PROG_FUNCTION_REPLACE([func_len], [ func_len_result=${#1}]) +fi + +if test x"$lt_shell_append" = xyes; then + _LT_PROG_FUNCTION_REPLACE([func_append], [ eval "${1}+=\\${2}"]) + + _LT_PROG_FUNCTION_REPLACE([func_append_quoted], [dnl + func_quote_for_eval "${2}" +dnl m4 expansion turns \\\\ into \\, and then the shell eval turns that into \ + eval "${1}+=\\\\ \\$func_quote_for_eval_result"]) + + # Save a `func_append' function call where possible by direct use of '+=' + sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") + test 0 -eq $? || _lt_function_replace_fail=: +else + # Save a `func_append' function call even when '+=' is not available + sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") + test 0 -eq $? || _lt_function_replace_fail=: +fi + +if test x"$_lt_function_replace_fail" = x":"; then + AC_MSG_WARN([Unable to substitute extended shell functions in $ofile]) +fi +]) + +# _LT_PATH_CONVERSION_FUNCTIONS +# ----------------------------- +# Determine which file name conversion functions should be used by +# func_to_host_file (and, implicitly, by func_to_host_path). These are needed +# for certain cross-compile configurations and native mingw. +m4_defun([_LT_PATH_CONVERSION_FUNCTIONS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_MSG_CHECKING([how to convert $build file names to $host format]) +AC_CACHE_VAL(lt_cv_to_host_file_cmd, +[case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac +]) +to_host_file_cmd=$lt_cv_to_host_file_cmd +AC_MSG_RESULT([$lt_cv_to_host_file_cmd]) +_LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd], + [0], [convert $build file names to $host format])dnl + +AC_MSG_CHECKING([how to convert $build file names to toolchain format]) +AC_CACHE_VAL(lt_cv_to_tool_file_cmd, +[#assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac +]) +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +AC_MSG_RESULT([$lt_cv_to_tool_file_cmd]) +_LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd], + [0], [convert $build files to toolchain format])dnl +])# _LT_PATH_CONVERSION_FUNCTIONS diff --git a/dist/aclocal/ltoptions.m4 b/dist/aclocal/ltoptions.m4 new file mode 100644 index 00000000..17cfd51c --- /dev/null +++ b/dist/aclocal/ltoptions.m4 @@ -0,0 +1,369 @@ +# Helper functions for option handling. -*- Autoconf -*- +# +# Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation, +# Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 7 ltoptions.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) + + +# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) +# ------------------------------------------ +m4_define([_LT_MANGLE_OPTION], +[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) + + +# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) +# --------------------------------------- +# Set option OPTION-NAME for macro MACRO-NAME, and if there is a +# matching handler defined, dispatch to it. Other OPTION-NAMEs are +# saved as a flag. +m4_define([_LT_SET_OPTION], +[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl +m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), + _LT_MANGLE_DEFUN([$1], [$2]), + [m4_warning([Unknown $1 option `$2'])])[]dnl +]) + + +# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) +# ------------------------------------------------------------ +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +m4_define([_LT_IF_OPTION], +[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) + + +# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) +# ------------------------------------------------------- +# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME +# are set. +m4_define([_LT_UNLESS_OPTIONS], +[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), + [m4_define([$0_found])])])[]dnl +m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 +])[]dnl +]) + + +# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) +# ---------------------------------------- +# OPTION-LIST is a space-separated list of Libtool options associated +# with MACRO-NAME. If any OPTION has a matching handler declared with +# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about +# the unknown option and exit. +m4_defun([_LT_SET_OPTIONS], +[# Set options +m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [_LT_SET_OPTION([$1], _LT_Option)]) + +m4_if([$1],[LT_INIT],[ + dnl + dnl Simply set some default values (i.e off) if boolean options were not + dnl specified: + _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no + ]) + _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no + ]) + dnl + dnl If no reference was made to various pairs of opposing options, then + dnl we run the default mode handler for the pair. For example, if neither + dnl `shared' nor `disable-shared' was passed, we enable building of shared + dnl archives by default: + _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) + _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], + [_LT_ENABLE_FAST_INSTALL]) + ]) +])# _LT_SET_OPTIONS + + +## --------------------------------- ## +## Macros to handle LT_INIT options. ## +## --------------------------------- ## + +# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) +# ----------------------------------------- +m4_define([_LT_MANGLE_DEFUN], +[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) + + +# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) +# ----------------------------------------------- +m4_define([LT_OPTION_DEFINE], +[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl +])# LT_OPTION_DEFINE + + +# dlopen +# ------ +LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes +]) + +AU_DEFUN([AC_LIBTOOL_DLOPEN], +[_LT_SET_OPTION([LT_INIT], [dlopen]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the `dlopen' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) + + +# win32-dll +# --------- +# Declare package support for building win32 dll's. +LT_OPTION_DEFINE([LT_INIT], [win32-dll], +[enable_win32_dll=yes + +case $host in +*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) + AC_CHECK_TOOL(AS, as, false) + AC_CHECK_TOOL(DLLTOOL, dlltool, false) + AC_CHECK_TOOL(OBJDUMP, objdump, false) + ;; +esac + +test -z "$AS" && AS=as +_LT_DECL([], [AS], [1], [Assembler program])dnl + +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl + +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl +])# win32-dll + +AU_DEFUN([AC_LIBTOOL_WIN32_DLL], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +_LT_SET_OPTION([LT_INIT], [win32-dll]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the `win32-dll' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) + + +# _LT_ENABLE_SHARED([DEFAULT]) +# ---------------------------- +# implement the --enable-shared flag, and supports the `shared' and +# `disable-shared' LT_INIT options. +# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. +m4_define([_LT_ENABLE_SHARED], +[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([shared], + [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], + [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac], + [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) + + _LT_DECL([build_libtool_libs], [enable_shared], [0], + [Whether or not to build shared libraries]) +])# _LT_ENABLE_SHARED + +LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) +]) + +AC_DEFUN([AC_DISABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], [disable-shared]) +]) + +AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) +AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_SHARED], []) +dnl AC_DEFUN([AM_DISABLE_SHARED], []) + + + +# _LT_ENABLE_STATIC([DEFAULT]) +# ---------------------------- +# implement the --enable-static flag, and support the `static' and +# `disable-static' LT_INIT options. +# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. +m4_define([_LT_ENABLE_STATIC], +[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([static], + [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], + [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac], + [enable_static=]_LT_ENABLE_STATIC_DEFAULT) + + _LT_DECL([build_old_libs], [enable_static], [0], + [Whether or not to build static libraries]) +])# _LT_ENABLE_STATIC + +LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) +]) + +AC_DEFUN([AC_DISABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], [disable-static]) +]) + +AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) +AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_STATIC], []) +dnl AC_DEFUN([AM_DISABLE_STATIC], []) + + + +# _LT_ENABLE_FAST_INSTALL([DEFAULT]) +# ---------------------------------- +# implement the --enable-fast-install flag, and support the `fast-install' +# and `disable-fast-install' LT_INIT options. +# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. +m4_define([_LT_ENABLE_FAST_INSTALL], +[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([fast-install], + [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], + [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac], + [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) + +_LT_DECL([fast_install], [enable_fast_install], [0], + [Whether or not to optimize for fast installation])dnl +])# _LT_ENABLE_FAST_INSTALL + +LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) + +# Old names: +AU_DEFUN([AC_ENABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the `fast-install' option into LT_INIT's first parameter.]) +]) + +AU_DEFUN([AC_DISABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], [disable-fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the `disable-fast-install' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) +dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) + + +# _LT_WITH_PIC([MODE]) +# -------------------- +# implement the --with-pic flag, and support the `pic-only' and `no-pic' +# LT_INIT options. +# MODE is either `yes' or `no'. If omitted, it defaults to `both'. +m4_define([_LT_WITH_PIC], +[AC_ARG_WITH([pic], + [AS_HELP_STRING([--with-pic], + [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], + [pic_mode="$withval"], + [pic_mode=default]) + +test -z "$pic_mode" && pic_mode=m4_default([$1], [default]) + +_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl +])# _LT_WITH_PIC + +LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) + +# Old name: +AU_DEFUN([AC_LIBTOOL_PICMODE], +[_LT_SET_OPTION([LT_INIT], [pic-only]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the `pic-only' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) + +## ----------------- ## +## LTDL_INIT Options ## +## ----------------- ## + +m4_define([_LTDL_MODE], []) +LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], + [m4_define([_LTDL_MODE], [nonrecursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [recursive], + [m4_define([_LTDL_MODE], [recursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [subproject], + [m4_define([_LTDL_MODE], [subproject])]) + +m4_define([_LTDL_TYPE], []) +LT_OPTION_DEFINE([LTDL_INIT], [installable], + [m4_define([_LTDL_TYPE], [installable])]) +LT_OPTION_DEFINE([LTDL_INIT], [convenience], + [m4_define([_LTDL_TYPE], [convenience])]) diff --git a/dist/aclocal/ltsugar.m4 b/dist/aclocal/ltsugar.m4 new file mode 100644 index 00000000..9000a057 --- /dev/null +++ b/dist/aclocal/ltsugar.m4 @@ -0,0 +1,123 @@ +# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- +# +# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 6 ltsugar.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) + + +# lt_join(SEP, ARG1, [ARG2...]) +# ----------------------------- +# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their +# associated separator. +# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier +# versions in m4sugar had bugs. +m4_define([lt_join], +[m4_if([$#], [1], [], + [$#], [2], [[$2]], + [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) +m4_define([_lt_join], +[m4_if([$#$2], [2], [], + [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) + + +# lt_car(LIST) +# lt_cdr(LIST) +# ------------ +# Manipulate m4 lists. +# These macros are necessary as long as will still need to support +# Autoconf-2.59 which quotes differently. +m4_define([lt_car], [[$1]]) +m4_define([lt_cdr], +[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], + [$#], 1, [], + [m4_dquote(m4_shift($@))])]) +m4_define([lt_unquote], $1) + + +# lt_append(MACRO-NAME, STRING, [SEPARATOR]) +# ------------------------------------------ +# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'. +# Note that neither SEPARATOR nor STRING are expanded; they are appended +# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). +# No SEPARATOR is output if MACRO-NAME was previously undefined (different +# than defined and empty). +# +# This macro is needed until we can rely on Autoconf 2.62, since earlier +# versions of m4sugar mistakenly expanded SEPARATOR but not STRING. +m4_define([lt_append], +[m4_define([$1], + m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) + + + +# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) +# ---------------------------------------------------------- +# Produce a SEP delimited list of all paired combinations of elements of +# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list +# has the form PREFIXmINFIXSUFFIXn. +# Needed until we can rely on m4_combine added in Autoconf 2.62. +m4_define([lt_combine], +[m4_if(m4_eval([$# > 3]), [1], + [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl +[[m4_foreach([_Lt_prefix], [$2], + [m4_foreach([_Lt_suffix], + ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, + [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) + + +# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) +# ----------------------------------------------------------------------- +# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited +# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. +m4_define([lt_if_append_uniq], +[m4_ifdef([$1], + [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], + [lt_append([$1], [$2], [$3])$4], + [$5])], + [lt_append([$1], [$2], [$3])$4])]) + + +# lt_dict_add(DICT, KEY, VALUE) +# ----------------------------- +m4_define([lt_dict_add], +[m4_define([$1($2)], [$3])]) + + +# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) +# -------------------------------------------- +m4_define([lt_dict_add_subkey], +[m4_define([$1($2:$3)], [$4])]) + + +# lt_dict_fetch(DICT, KEY, [SUBKEY]) +# ---------------------------------- +m4_define([lt_dict_fetch], +[m4_ifval([$3], + m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), + m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) + + +# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) +# ----------------------------------------------------------------- +m4_define([lt_if_dict_fetch], +[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], + [$5], + [$6])]) + + +# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) +# -------------------------------------------------------------- +m4_define([lt_dict_filter], +[m4_if([$5], [], [], + [lt_join(m4_quote(m4_default([$4], [[, ]])), + lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), + [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl +]) diff --git a/dist/aclocal/ltversion.m4 b/dist/aclocal/ltversion.m4 new file mode 100644 index 00000000..9c7b5d41 --- /dev/null +++ b/dist/aclocal/ltversion.m4 @@ -0,0 +1,23 @@ +# ltversion.m4 -- version numbers -*- Autoconf -*- +# +# Copyright (C) 2004 Free Software Foundation, Inc. +# Written by Scott James Remnant, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# @configure_input@ + +# serial 3293 ltversion.m4 +# This file is part of GNU Libtool + +m4_define([LT_PACKAGE_VERSION], [2.4]) +m4_define([LT_PACKAGE_REVISION], [1.3293]) + +AC_DEFUN([LTVERSION_VERSION], +[macro_version='2.4' +macro_revision='1.3293' +_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) +_LT_DECL(, macro_revision, 0) +]) diff --git a/dist/aclocal/lt~obsolete.m4 b/dist/aclocal/lt~obsolete.m4 new file mode 100644 index 00000000..c573da90 --- /dev/null +++ b/dist/aclocal/lt~obsolete.m4 @@ -0,0 +1,98 @@ +# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- +# +# Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc. +# Written by Scott James Remnant, 2004. +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 5 lt~obsolete.m4 + +# These exist entirely to fool aclocal when bootstrapping libtool. +# +# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN) +# which have later been changed to m4_define as they aren't part of the +# exported API, or moved to Autoconf or Automake where they belong. +# +# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN +# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us +# using a macro with the same name in our local m4/libtool.m4 it'll +# pull the old libtool.m4 in (it doesn't see our shiny new m4_define +# and doesn't know about Autoconf macros at all.) +# +# So we provide this file, which has a silly filename so it's always +# included after everything else. This provides aclocal with the +# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything +# because those macros already exist, or will be overwritten later. +# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. +# +# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. +# Yes, that means every name once taken will need to remain here until +# we give up compatibility with versions before 1.7, at which point +# we need to keep only those names which we still refer to. + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) + +m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) +m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) +m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) +m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) +m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) +m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) +m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) +m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) +m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) +m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) +m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) +m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) +m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) +m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) +m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) +m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) +m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) +m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) +m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) +m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) +m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) +m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) +m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) +m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) +m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) +m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) +m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) +m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) +m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) +m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) +m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) +m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) +m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) +m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) +m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) +m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) +m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) +m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) +m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) +m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) +m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) +m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) +m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) +m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) +m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) +m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) +m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) +m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) +m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) +m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) +m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) diff --git a/dist/aclocal/mmap.m4 b/dist/aclocal/mmap.m4 new file mode 100644 index 00000000..bb0256aa --- /dev/null +++ b/dist/aclocal/mmap.m4 @@ -0,0 +1,109 @@ +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. + +# Detect mmap capability: If the file underlying an mmap is extended, +# does the addressable memory grow too? +AC_DEFUN(AM_MMAP_EXTEND, [ + + AH_TEMPLATE(HAVE_MMAP_EXTEND, [Define to 1 where mmap() incrementally extends the accessible mapping as the underlying file grows.]) + + +if test "$mmap_ok" = "yes" ; then + AC_MSG_CHECKING([for growing a file under an mmap region]) + + db_cv_mmap_extend="no" + + AC_TRY_RUN([ + /* + * Most mmap() implemenations allow you to map in a region which is much + * larger than the underlying file. Only the second less than the actual + * file size is accessible -- a SIGSEV typically results when attemping + * a memory reference between EOF and the end of the mapped region. + * One can extend the file to allow references into higher-addressed + * sections of the region. However this automatic extension of the + * addressible memory is beyond what POSIX requires. This function detects + * whether mmap supports this automatic extension. If not (e.g. cygwin) + * then the entire (hopefully sparse) file will need to be written before + * the first mmap. + */ + /* Not all these includes are needed, but the minimal set varies from + * system to system. + */ + #include + #include + #include + #include + #include + #include + #include + + #define TEST_MMAP_BUFSIZE (16 * 1024) + #define TEST_MMAP_EXTENDSIZE (16 * 1024 * 1024) + #ifndef MAP_FAILED + #define MAP_FAILED (-1) + #endif + + int catch_sig(sig) + int sig; + { + exit(1); + } + + main() { + const char *underlying; + unsigned gapsize; + char *base; + int count, fd, i, mode, open_flags, ret, total_size; + char buf[TEST_MMAP_BUFSIZE]; + + gapsize = 1024; + underlying = ".mmap_config"; + (void) unlink(underlying); + + open_flags = O_CREAT | O_TRUNC | O_RDWR; + mode = S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH; + + if ((fd = open(underlying, open_flags, mode)) < 0) { + perror("open"); + return (1); + } + + total_size = TEST_MMAP_EXTENDSIZE; + + memset(buf, 0, sizeof(buf)); + if ((count = write(fd, buf, sizeof(buf))) != sizeof(buf)) { + perror("initial write"); + return (2); + } + + if ((base = mmap(NULL, total_size, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) { + perror("mmap"); + return (3); + } + + /* Extend the file with just 1 byte */ + if (lseek(fd, total_size - 1, SEEK_SET) < 0 || + (count = write(fd, buf, 1)) != 1) { + perror("extending write"); + return (4); + } + + (void) signal(SIGSEGV, catch_sig); + (void) signal(SIGBUS, catch_sig); + + for (i = sizeof(buf); i < total_size; i += gapsize) + base[i] = 'A'; + + close(fd); + (void) unlink(underlying); + return (0); + }], [db_cv_mmap_extend="yes"],[db_cv_mmap_extend="no"],[db_cv_mmap_extend="no"]) + + + if test "$db_cv_mmap_extend" = yes; then + AC_DEFINE(HAVE_MMAP_EXTEND) + fi + AC_MSG_RESULT($db_cv_mmap_extend) +fi +]) + diff --git a/dist/aclocal/mutex.m4 b/dist/aclocal/mutex.m4 new file mode 100644 index 00000000..3963796b --- /dev/null +++ b/dist/aclocal/mutex.m4 @@ -0,0 +1,932 @@ +# $Id$ + +# POSIX pthreads tests: inter-process safe and intra-process only. +AC_DEFUN(AM_PTHREADS_SHARED, [ +AC_TRY_RUN([ +#include +#include +main() { + pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + exit ( + pthread_condattr_init(&condattr) || + pthread_condattr_setpshared(&condattr, PTHREAD_PROCESS_SHARED) || + pthread_mutexattr_init(&mutexattr) || + pthread_mutexattr_setpshared(&mutexattr, PTHREAD_PROCESS_SHARED) || + pthread_cond_init(&cond, &condattr) || + pthread_mutex_init(&mutex, &mutexattr) || + pthread_mutex_lock(&mutex) || + pthread_mutex_unlock(&mutex) || + pthread_mutex_destroy(&mutex) || + pthread_cond_destroy(&cond) || + pthread_condattr_destroy(&condattr) || + pthread_mutexattr_destroy(&mutexattr)); +}], [db_cv_mutex="$1"],, +AC_TRY_LINK([ +#include +#include ],[ + pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + exit ( + pthread_condattr_init(&condattr) || + pthread_condattr_setpshared(&condattr, PTHREAD_PROCESS_SHARED) || + pthread_mutexattr_init(&mutexattr) || + pthread_mutexattr_setpshared(&mutexattr, PTHREAD_PROCESS_SHARED) || + pthread_cond_init(&cond, &condattr) || + pthread_mutex_init(&mutex, &mutexattr) || + pthread_mutex_lock(&mutex) || + pthread_mutex_unlock(&mutex) || + pthread_mutex_destroy(&mutex) || + pthread_cond_destroy(&cond) || + pthread_condattr_destroy(&condattr) || + pthread_mutexattr_destroy(&mutexattr)); +], [db_cv_mutex="$1"]))]) +AC_DEFUN(AM_PTHREADS_PRIVATE, [ +AC_TRY_RUN([ +#include +#include +main() { + pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + exit ( + pthread_condattr_init(&condattr) || + pthread_mutexattr_init(&mutexattr) || + pthread_cond_init(&cond, &condattr) || + pthread_mutex_init(&mutex, &mutexattr) || + pthread_mutex_lock(&mutex) || + pthread_mutex_unlock(&mutex) || + pthread_mutex_destroy(&mutex) || + pthread_cond_destroy(&cond) || + pthread_condattr_destroy(&condattr) || + pthread_mutexattr_destroy(&mutexattr)); +}], [db_cv_mutex="$1"],, +AC_TRY_LINK([ +#include +#include ],[ + pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + exit ( + pthread_condattr_init(&condattr) || + pthread_mutexattr_init(&mutexattr) || + pthread_cond_init(&cond, &condattr) || + pthread_mutex_init(&mutex, &mutexattr) || + pthread_mutex_lock(&mutex) || + pthread_mutex_unlock(&mutex) || + pthread_mutex_destroy(&mutex) || + pthread_cond_destroy(&cond) || + pthread_condattr_destroy(&condattr) || + pthread_mutexattr_destroy(&mutexattr)); +], [db_cv_mutex="$1"]))]) +AC_DEFUN(AM_PTHREADS_CONDVAR_DUPINITCHK, [ +AC_TRY_RUN([ +#include +#include +main() { + pthread_cond_t cond; + pthread_condattr_t condattr; + exit(pthread_condattr_init(&condattr) || + pthread_cond_init(&cond, &condattr) || + pthread_cond_init(&cond, &condattr)); +}], [db_cv_pthread_condinit_dupgood="yes"], +[db_cv_pthread_condinit_dupgood="no"], +AC_TRY_LINK([ +#include +#include ], [ + pthread_cond_t cond; + pthread_condattr_t condattr; + exit(pthread_condattr_init(&condattr) || + pthread_cond_init(&cond, &condattr)); +], [db_cv_pthread_condinit_dupgood="yes"], +[db_cv_pthread_condinit_dupgood="no"]))]) +AC_DEFUN(AM_PTHREADS_RWLOCKVAR_DUPINITCHK, [ +AC_TRY_RUN([ +#include +#include +main() { + pthread_rwlock_t rwlock; + pthread_rwlockattr_t rwlockattr; + exit(pthread_rwlockattr_init(&rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr)); +}], [db_cv_pthread_rwlockinit_dupgood="yes"], +[db_cv_pthread_rwlockinit_dupgood="no"], +AC_TRY_LINK([ +#include +#include ], [ + pthread_rwlock_t rwlock; + pthread_rwlockattr_t rwlockattr; + exit(pthread_rwlockattr_init(&rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr)); +], [db_cv_pthread_rwlockinit_dupgood="yes"], +[db_cv_pthread_rwlockinit_dupgood="no"]))]) + +# Figure out mutexes for this compiler/architecture. +# +# There are 3 mutex groups in BDB: pthreads-style, test-and-set, or a hybrid +# combination of the two. We first test for the pthreads-style mutex, and +# then for a test-and-set mutex. +AC_DEFUN(AM_DEFINE_MUTEXES, [ + +# Mutexes we don't test for, but want the #defines to exist for other ports. +AH_TEMPLATE(HAVE_MUTEX_VMS, [Define to 1 to use VMS mutexes.]) +AH_TEMPLATE(HAVE_MUTEX_VXWORKS, [Define to 1 to use VxWorks mutexes.]) + +AC_CACHE_CHECK([for mutexes], db_cv_mutex, [ + +orig_libs=$LIBS + +db_cv_mutex=no + +# Mutexes can be disabled. +if test "$db_cv_build_mutexsupport" = no; then + db_cv_mutex=disabled; +fi + +# User-specified Win32 mutexes (MinGW build) +if test "$db_cv_mingw" = yes; then + db_cv_mutex=win32/gcc +fi + +if test "$db_cv_mutex" = no; then + # User-specified POSIX or UI mutexes. + # + # There are two different reasons to specify mutexes: First, the + # application is already using one type of mutex and doesn't want + # to mix-and-match (for example, on Solaris, which has POSIX, UI + # and LWP mutexes). Second, the application's POSIX pthreads + # mutexes don't support inter-process locking, but the application + # wants to use them anyway (for example, some Linux and *BSD systems). + # + # Test for POSIX threads before testing for UI/LWP threads, they are + # the Sun-recommended choice on Solaris. Also, there are Linux systems + # that support a UI compatibility mode, and applications are more + # likely to be written for POSIX threads than UI threads. + if test "$db_cv_posixmutexes" = yes; then + db_cv_mutex=posix_only; + fi + if test "$db_cv_uimutexes" = yes; then + db_cv_mutex=ui_only; + fi + + # POSIX.1 pthreads: pthread_XXX + # + # If the user specified we use POSIX pthreads mutexes, and we fail to + # find the full interface, try and configure for just intra-process + # support. + if test "$db_cv_mutex" = no -o "$db_cv_mutex" = posix_only; then + LIBS="$LIBS -lpthread" + AM_PTHREADS_SHARED(POSIX/pthreads/library) + AM_PTHREADS_CONDVAR_DUPINITCHK + AM_PTHREADS_RWLOCKVAR_DUPINITCHK + LIBS="$orig_libs" + fi + if test "$db_cv_mutex" = no -o "$db_cv_mutex" = posix_only; then + AM_PTHREADS_SHARED(POSIX/pthreads) + AM_PTHREADS_CONDVAR_DUPINITCHK + AM_PTHREADS_RWLOCKVAR_DUPINITCHK + fi + if test "$db_cv_mutex" = posix_only; then + AM_PTHREADS_PRIVATE(POSIX/pthreads/private) + AM_PTHREADS_CONDVAR_DUPINITCHK + AM_PTHREADS_RWLOCKVAR_DUPINITCHK + fi + if test "$db_cv_mutex" = posix_only; then + LIBS="$LIBS -lpthread" + AM_PTHREADS_PRIVATE(POSIX/pthreads/library/private) + AM_PTHREADS_CONDVAR_DUPINITCHK + AM_PTHREADS_RWLOCKVAR_DUPINITCHK + LIBS="$orig_libs" + fi + if test "$db_cv_mutex" = posix_only; then + AC_MSG_ERROR([unable to find POSIX 1003.1 mutex interfaces]) + fi + + # LWP threads: _lwp_XXX + if test "$db_cv_mutex" = no; then + AC_TRY_LINK([ + #include ],[ + static lwp_mutex_t mi = SHAREDMUTEX; + static lwp_cond_t ci = SHAREDCV; + lwp_mutex_t mutex = mi; + lwp_cond_t cond = ci; + exit ( + _lwp_mutex_lock(&mutex) || + _lwp_mutex_unlock(&mutex)); + ], [db_cv_mutex=Solaris/lwp]) + fi + + # UI threads: thr_XXX + if test "$db_cv_mutex" = no -o "$db_cv_mutex" = ui_only; then + LIBS="$LIBS -lthread" + AC_TRY_LINK([ + #include + #include ],[ + mutex_t mutex; + cond_t cond; + int type = USYNC_PROCESS; + exit ( + mutex_init(&mutex, type, NULL) || + cond_init(&cond, type, NULL) || + mutex_lock(&mutex) || + mutex_unlock(&mutex)); + ], [db_cv_mutex=UI/threads/library]) + LIBS="$orig_libs" + fi + if test "$db_cv_mutex" = no -o "$db_cv_mutex" = ui_only; then + AC_TRY_LINK([ + #include + #include ],[ + mutex_t mutex; + cond_t cond; + int type = USYNC_PROCESS; + exit ( + mutex_init(&mutex, type, NULL) || + cond_init(&cond, type, NULL) || + mutex_lock(&mutex) || + mutex_unlock(&mutex)); + ], [db_cv_mutex=UI/threads]) + fi + if test "$db_cv_mutex" = ui_only; then + AC_MSG_ERROR([unable to find UI mutex interfaces]) + fi + + # We're done testing for pthreads-style mutexes. Next, check for + # test-and-set mutexes. Check first for hybrid implementations, + # because we check for them even if we've already found a + # pthreads-style mutex and they're the most common architectures + # anyway. + # + # x86/gcc: FreeBSD, NetBSD, BSD/OS, Linux + AC_TRY_COMPILE(,[ + #if (defined(i386) || defined(__i386__)) && defined(__GNUC__) + exit(0); + #else + FAIL TO COMPILE/LINK + #endif + ], [db_cv_mutex="$db_cv_mutex/x86/gcc-assembly"]) + + # x86_64/gcc: FreeBSD, NetBSD, BSD/OS, Linux + AC_TRY_COMPILE(,[ + #if (defined(x86_64) || defined(__x86_64__)) && defined(__GNUC__) + exit(0); + #else + FAIL TO COMPILE/LINK + #endif + ], [db_cv_mutex="$db_cv_mutex/x86_64/gcc-assembly"]) + + # Solaris is one of the systems where we can configure hybrid mutexes. + # However, we require the membar_enter function for that, and only newer + # Solaris releases have it. Check to see if we can configure hybrids. + AC_TRY_LINK([ + #include + #include ],[ + typedef lock_t tsl_t; + lock_t x; + _lock_try(&x); + _lock_clear(&x); + membar_enter(); + ], [db_cv_mutex="$db_cv_mutex/Solaris/_lock_try/membar"]) + + # Sparc/gcc: SunOS, Solaris, ultrasparc assembler support + AC_TRY_COMPILE(,[ + #if defined(__sparc__) && defined(__GNUC__) + asm volatile ("membar #StoreStore|#StoreLoad|#LoadStore"); + exit(0); + #else + FAIL TO COMPILE/LINK + #endif + ], [db_cv_mutex="$db_cv_mutex/Sparc/gcc-assembly"]) + + # We're done testing for any hybrid mutex implementations. If we did + # not find a pthreads-style mutex, but did find a test-and-set mutex, + # we set db_cv_mutex to "no/XXX" -- clean that up. + db_cv_mutex=`echo $db_cv_mutex | sed 's/^no\///'` +fi + +# If we still don't have a mutex implementation yet, continue testing for a +# test-and-set mutex implementation. + +# _lock_try/_lock_clear: Solaris +# On Solaris systems without other mutex interfaces, DB uses the undocumented +# _lock_try _lock_clear function calls instead of either the sema_trywait(3T) +# or sema_wait(3T) function calls. This is because of problems in those +# interfaces in some releases of the Solaris C library. +if test "$db_cv_mutex" = no; then +AC_TRY_LINK([ +#include +#include ],[ + typedef lock_t tsl_t; + lock_t x; + _lock_try(&x); + _lock_clear(&x); +], [db_cv_mutex=Solaris/_lock_try]) +fi + +# msemaphore: HPPA only +# Try HPPA before general msem test, it needs special alignment. +if test "$db_cv_mutex" = no; then +AC_TRY_LINK([ +#include ],[ +#if defined(__hppa) + typedef msemaphore tsl_t; + msemaphore x; + msem_init(&x, 0); + msem_lock(&x, 0); + msem_unlock(&x, 0); + exit(0); +#else + FAIL TO COMPILE/LINK +#endif +], [db_cv_mutex=HP/msem_init]) +fi + +# msemaphore: AIX, OSF/1 +if test "$db_cv_mutex" = no; then +AC_TRY_LINK([ +#include +#include ],[ + typedef msemaphore tsl_t; + msemaphore x; + msem_init(&x, 0); + msem_lock(&x, 0); + msem_unlock(&x, 0); + exit(0); +], [db_cv_mutex=UNIX/msem_init]) +fi + +# ReliantUNIX +if test "$db_cv_mutex" = no; then +LIBS="$LIBS -lmproc" +AC_TRY_LINK([ +#include ],[ + typedef spinlock_t tsl_t; + spinlock_t x; + initspin(&x, 1); + cspinlock(&x); + spinunlock(&x); +], [db_cv_mutex=ReliantUNIX/initspin]) +LIBS="$orig_libs" +fi + +# SCO: UnixWare has threads in libthread, but OpenServer doesn't. +if test "$db_cv_mutex" = no; then +AC_TRY_COMPILE(,[ +#if defined(__USLC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif +], [db_cv_mutex=SCO/x86/cc-assembly]) +fi + +# abilock_t: SGI +if test "$db_cv_mutex" = no; then +AC_TRY_LINK([ +#include ],[ + typedef abilock_t tsl_t; + abilock_t x; + init_lock(&x); + acquire_lock(&x); + release_lock(&x); +], [db_cv_mutex=SGI/init_lock]) +fi + +# sema_t: Solaris +# The sema_XXX calls do not work on Solaris 5.5. I see no reason to ever +# turn this test on, unless we find some other platform that uses the old +# POSIX.1 interfaces. +if test "$db_cv_mutex" = DOESNT_WORK; then +AC_TRY_LINK([ +#include ],[ + typedef sema_t tsl_t; + sema_t x; + sema_init(&x, 1, USYNC_PROCESS, NULL); + sema_wait(&x); + sema_post(&x); +], [db_cv_mutex=UNIX/sema_init]) +fi + +# _check_lock/_clear_lock: AIX +if test "$db_cv_mutex" = no; then +AC_TRY_LINK([ +#include ],[ + int x; + _check_lock(&x,0,1); + _clear_lock(&x,0); +], [db_cv_mutex=AIX/_check_lock]) +fi + +# _spin_lock_try/_spin_unlock: Apple/Darwin +if test "$db_cv_mutex" = no; then +AC_TRY_LINK(,[ + int x; + _spin_lock_try(&x); + _spin_unlock(&x); +], [db_cv_mutex=Darwin/_spin_lock_try]) +fi + +# Tru64/cc +if test "$db_cv_mutex" = no; then +AC_TRY_COMPILE(,[ +#if defined(__alpha) && defined(__DECC) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif +], [db_cv_mutex=Tru64/cc-assembly]) +fi + +# Alpha/gcc +if test "$db_cv_mutex" = no; then +AC_TRY_COMPILE(,[ +#if defined(__alpha) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif +], [db_cv_mutex=ALPHA/gcc-assembly]) +fi + +# ARM/gcc: Linux +if test "$db_cv_mutex" = no; then +AC_TRY_COMPILE(,[ +#if defined(__arm__) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif +], [db_cv_mutex=ARM/gcc-assembly]) +fi + +# MIPS/gcc: Linux +if test "$db_cv_mutex" = no; then +AC_TRY_COMPILE(,[ +#if (defined(__mips) || defined(__mips__)) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif +], [db_cv_mutex=MIPS/gcc-assembly]) +fi + +# PaRisc/gcc: HP/UX +if test "$db_cv_mutex" = no; then +AC_TRY_COMPILE(,[ +#if (defined(__hppa) || defined(__hppa__)) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif +], [db_cv_mutex=HPPA/gcc-assembly]) +fi + +# PPC/gcc: +if test "$db_cv_mutex" = no; then +AC_TRY_COMPILE(,[ +#if (defined(__powerpc__) || defined(__ppc__)) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif +], [db_cv_mutex=PPC/gcc-assembly]) +fi + +# 68K/gcc: SunOS +if test "$db_cv_mutex" = no; then +AC_TRY_COMPILE(,[ +#if (defined(mc68020) || defined(sun3)) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif +], [db_cv_mutex=68K/gcc-assembly]) +fi + +# S390/cc: IBM OS/390 Unix +if test "$db_cv_mutex" = no; then +AC_TRY_COMPILE(,[ +#if defined(__MVS__) && defined(__IBMC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif +], [db_cv_mutex=S390/cc-assembly]) +fi + +# S390/gcc: Linux +if test "$db_cv_mutex" = no; then +AC_TRY_COMPILE(,[ +#if defined(__s390__) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif +], [db_cv_mutex=S390/gcc-assembly]) +fi + +# ia64/gcc: Linux +if test "$db_cv_mutex" = no; then +AC_TRY_COMPILE(,[ +#if defined(__ia64) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif +], [db_cv_mutex=ia64/gcc-assembly]) +fi + +# uts/cc: UTS +if test "$db_cv_mutex" = no; then +AC_TRY_COMPILE(,[ +#if defined(_UTS) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif +], [db_cv_mutex=UTS/cc-assembly]) +fi + +# UNIX fcntl system call mutexes. +# Note that fcntl mutexes are no longer supported as of 4.8. This code has been +# left in place in case there is some system that we are not aware of that +# only uses fcntl mutexes. In that case, contact Oracle for support. +if test "$db_cv_mutex" = no; then + db_cv_mutex=UNIX/fcntl +AC_TRY_LINK([ +#include ],[ + struct flock l; + l.l_whence = SEEK_SET; + l.l_start = 10; + l.l_len = 1; + l.l_type = F_WRLCK; + fcntl(0, F_SETLK, &l); +], [db_cv_mutex=UNIX/fcntl]) +fi +]) + +# Configure a pthreads-style mutex implementation. +hybrid=pthread +case "$db_cv_mutex" in +POSIX/pthreads/private*)ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_PTHREADS) + if test "$db_cv_pthread_condinit_dupgood" = "yes"; then + AC_DEFINE(HAVE_PTHREAD_COND_REINIT_OKAY) + fi + if test "$db_cv_pthread_rwlockinit_dupgood" = "yes"; then + AC_DEFINE(HAVE_PTHREAD_RWLOCK_REINIT_OKAY) + fi + AC_DEFINE(HAVE_MUTEX_THREAD_ONLY) + AH_TEMPLATE(HAVE_MUTEX_THREAD_ONLY, + [Define to 1 to configure mutexes intra-process only.]);; +POSIX/pthreads/library/private*) + ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_PTHREADS) + if test "$db_cv_pthread_condinit_dupgood" = "yes"; then + AC_DEFINE(HAVE_PTHREAD_COND_REINIT_OKAY) + fi + if test "$db_cv_pthread_rwlockinit_dupgood" = "yes"; then + AC_DEFINE(HAVE_PTHREAD_RWLOCK_REINIT_OKAY) + fi + AC_DEFINE(HAVE_MUTEX_THREAD_ONLY);; +POSIX/pthreads/library*)ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_PTHREADS) + if test "$db_cv_pthread_condinit_dupgood" = "yes"; then + AC_DEFINE(HAVE_PTHREAD_COND_REINIT_OKAY) + fi + if test "$db_cv_pthread_rwlockinit_dupgood" = "yes"; then + AC_DEFINE(HAVE_PTHREAD_RWLOCK_REINIT_OKAY) + fi;; +POSIX/pthreads*) ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_PTHREADS) + AH_TEMPLATE(HAVE_MUTEX_PTHREADS, + [Define to 1 to use POSIX 1003.1 pthread_XXX mutexes.]) + if test "$db_cv_pthread_condinit_dupgood" = "yes"; then + AC_DEFINE(HAVE_PTHREAD_COND_REINIT_OKAY) + fi + if test "$db_cv_pthread_rwlockinit_dupgood" = "yes"; then + AC_DEFINE(HAVE_PTHREAD_RWLOCK_REINIT_OKAY) + fi + AH_TEMPLATE(HAVE_PTHREAD_COND_REINIT_OKAY, + [Define to 1 if it is OK to initialize an already initialized pthread_cond_t.]) + AH_TEMPLATE(HAVE_PTHREAD_RWLOCK_REINIT_OKAY, + [Define to 1 if it is OK to initialize an already initialized pthread_rwlock_t.]);; +Solaris/lwp*) ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_SOLARIS_LWP) + AH_TEMPLATE(HAVE_MUTEX_SOLARIS_LWP, + [Define to 1 to use the Solaris lwp threads mutexes.]);; +UI/threads/library*) ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_UI_THREADS);; +*) hybrid=no;; +UI/threads*) ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_UI_THREADS) + AH_TEMPLATE(HAVE_MUTEX_UI_THREADS, + [Define to 1 to use the UNIX International mutexes.]);; +esac + +# Configure a test-and-set mutex implementation. +case "$db_cv_mutex" in +68K/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_68K_GCC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_68K_GCC_ASSEMBLY, + [Define to 1 to use the GCC compiler and 68K assembly language mutexes.]);; +AIX/_check_lock) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_AIX_CHECK_LOCK) + AH_TEMPLATE(HAVE_MUTEX_AIX_CHECK_LOCK, + [Define to 1 to use the AIX _check_lock mutexes.]);; +Darwin/_spin_lock_try) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_DARWIN_SPIN_LOCK_TRY) + AH_TEMPLATE(HAVE_MUTEX_DARWIN_SPIN_LOCK_TRY, + [Define to 1 to use the Apple/Darwin _spin_lock_try mutexes.]);; +ALPHA/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_ALPHA_GCC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_ALPHA_GCC_ASSEMBLY, + [Define to 1 to use the GCC compiler and Alpha assembly language mutexes.]);; +ARM/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_ARM_GCC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_ARM_GCC_ASSEMBLY, + [Define to 1 to use the GCC compiler and ARM assembly language mutexes.]);; +HP/msem_init) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_HPPA_MSEM_INIT) + AH_TEMPLATE(HAVE_MUTEX_HPPA_MSEM_INIT, + [Define to 1 to use the msem_XXX mutexes on HP-UX.]);; +HPPA/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_HPPA_GCC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_HPPA_GCC_ASSEMBLY, + [Define to 1 to use the GCC compiler and PaRisc assembly language mutexes.]);; +ia64/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_IA64_GCC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_IA64_GCC_ASSEMBLY, + [Define to 1 to use the GCC compiler and IA64 assembly language mutexes.]);; +MIPS/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_MIPS_GCC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_MIPS_GCC_ASSEMBLY, + [Define to 1 to use the GCC compiler and MIPS assembly language mutexes.]);; +PPC/gcc-assembly) + ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_PPC_GCC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_PPC_GCC_ASSEMBLY, + [Define to 1 to use the GCC compiler and PowerPC assembly language mutexes.]);; +ReliantUNIX/initspin) LIBSO_LIBS="$LIBSO_LIBS -lmproc" + ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_RELIANTUNIX_INITSPIN) + AH_TEMPLATE(HAVE_MUTEX_RELIANTUNIX_INITSPIN, + [Define to 1 to use Reliant UNIX initspin mutexes.]);; +S390/cc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_S390_CC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_S390_CC_ASSEMBLY, + [Define to 1 to use the IBM C compiler and S/390 assembly language mutexes.]);; +S390/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_S390_GCC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_S390_GCC_ASSEMBLY, + [Define to 1 to use the GCC compiler and S/390 assembly language mutexes.]);; +SCO/x86/cc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_SCO_X86_CC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_SCO_X86_CC_ASSEMBLY, + [Define to 1 to use the SCO compiler and x86 assembly language mutexes.]);; +SGI/init_lock) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_SGI_INIT_LOCK) + AH_TEMPLATE(HAVE_MUTEX_SGI_INIT_LOCK, + [Define to 1 to use the SGI XXX_lock mutexes.]);; +Solaris/_lock_try) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_SOLARIS_LOCK_TRY) + AH_TEMPLATE(HAVE_MUTEX_SOLARIS_LOCK_TRY, + [Define to 1 to use the Solaris _lock_XXX mutexes.]);; +*Solaris/_lock_try/membar) + hybrid="$hybrid/tas" + ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_SOLARIS_LOCK_TRY) + AH_TEMPLATE(HAVE_MUTEX_SOLARIS_LOCK_TRY, + [Define to 1 to use the Solaris _lock_XXX mutexes.]);; +*Sparc/gcc-assembly) hybrid="$hybrid/tas" + ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_SPARC_GCC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_SPARC_GCC_ASSEMBLY, + [Define to 1 to use the GCC compiler and Sparc assembly language mutexes.]);; +Tru64/cc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_TRU64_CC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_TRU64_CC_ASSEMBLY, + [Define to 1 to use the CC compiler and Tru64 assembly language mutexes.]);; +UNIX/msem_init) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_MSEM_INIT) + AH_TEMPLATE(HAVE_MUTEX_MSEM_INIT, + [Define to 1 to use the msem_XXX mutexes on systems other than HP-UX.]);; +UNIX/sema_init) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_SEMA_INIT) + AH_TEMPLATE(HAVE_MUTEX_SEMA_INIT, + [Define to 1 to use the obsolete POSIX 1003.1 sema_XXX mutexes.]);; +UTS/cc-assembly) ADDITIONAL_OBJS="uts4.cc${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_UTS_CC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_UTS_CC_ASSEMBLY, + [Define to 1 to use the UTS compiler and assembly language mutexes.]);; +*x86/gcc-assembly) hybrid="$hybrid/tas" + ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_X86_GCC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_X86_GCC_ASSEMBLY, + [Define to 1 to use the GCC compiler and 32-bit x86 assembly language mutexes.]);; +*x86_64/gcc-assembly) hybrid="$hybrid/tas" + ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_X86_64_GCC_ASSEMBLY) + AH_TEMPLATE(HAVE_MUTEX_X86_64_GCC_ASSEMBLY, + [Define to 1 to use the GCC compiler and 64-bit x86 assembly language mutexes.]);; +esac + +# Configure the remaining special cases. +case "$db_cv_mutex" in +UNIX/fcntl) AC_MSG_ERROR( + [Support for FCNTL mutexes was removed in BDB 4.8.]) + ADDITIONAL_OBJS="mut_fcntl${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_FCNTL) + AH_TEMPLATE(HAVE_MUTEX_FCNTL, + [Define to 1 to use the UNIX fcntl system call mutexes.]);; +win32) ADDITIONAL_OBJS="mut_win32${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_WIN32) + AH_TEMPLATE(HAVE_MUTEX_WIN32, [Define to 1 to use the MSVC compiler and Windows mutexes.]);; +win32/gcc) ADDITIONAL_OBJS="mut_win32${o} $ADDITIONAL_OBJS" + AC_DEFINE(HAVE_MUTEX_WIN32_GCC) + AH_TEMPLATE(HAVE_MUTEX_WIN32_GCC, [Define to 1 to use the GCC compiler and Windows mutexes.]);; +esac + +# Mutexes may not have been found, or may have been disabled. +case "$db_cv_mutex" in +disabled) + ;; +*) + # Test to see if mutexes have been found by checking the list of + # additional objects for a mutex implementation. + case "$ADDITIONAL_OBJS" in + *mut_pthread*|*mut_tas*|*mut_win32*) + AC_DEFINE(HAVE_MUTEX_SUPPORT) + AH_TEMPLATE(HAVE_MUTEX_SUPPORT, + [Define to 1 if the Berkeley DB library should support mutexes.]) + + # Shared latches are required in 4.8, and are implemented using + # mutexes if we don't have a native implementation. + # This macro may be removed in a future release. + AH_TEMPLATE(HAVE_SHARED_LATCHES, + [Define to 1 to configure Berkeley DB to use read/write latches.]) + AC_DEFINE(HAVE_SHARED_LATCHES);; + *) + AC_MSG_ERROR([Unable to find a mutex implementation]);; + esac +esac + +# We may have found both a pthreads-style mutex implementation as well as a +# test-and-set, in which case configure for the hybrid. +if test "$hybrid" = pthread/tas; then + AC_DEFINE(HAVE_MUTEX_HYBRID) + AH_TEMPLATE(HAVE_MUTEX_HYBRID, + [Define to 1 to use test-and-set mutexes with blocking mutexes.]) +fi + +# The mutex selection may require specific declarations -- we fill in most of +# them above, but here are the common ones. +# +# The mutex selection may tell us what kind of thread package we're using, +# which we use to figure out the thread type. +# +# If we're configured for the POSIX pthread API, then force the thread ID type +# and include function, regardless of the mutex selection. Ditto for the +# (default) Solaris lwp mutexes, because they don't have a way to return the +# thread ID. +# +# Try and link with a threads library if possible. The problem is the Solaris +# C library has UI/POSIX interface stubs, but they're broken, configuring them +# for inter-process mutexes doesn't return an error, but it doesn't work either. +# For that reason always add -lpthread if we're using pthread calls or mutexes +# and there's a pthread library. +# +# We can't depend on any specific call existing (pthread_create, for example), +# as it may be #defined in an include file -- OSF/1 (Tru64) has this problem. + +AC_SUBST(thread_h_decl) +AC_SUBST(db_threadid_t_decl) +db_threadid_t_decl=notset + +case "$db_cv_mutex" in +UI/threads*) + thread_h_decl="#include " + db_threadid_t_decl="typedef thread_t db_threadid_t;" + AC_HAVE_LIBRARY(thread, LIBSO_LIBS="$LIBSO_LIBS -lthread");; +*) + AC_CHECK_HEADER(pthread.h, [ac_cv_header_pthread_h=yes]) + if test "$ac_cv_header_pthread_h" = "yes" ; then + thread_h_decl="#include " + db_threadid_t_decl="typedef pthread_t db_threadid_t;" + fi + AC_HAVE_LIBRARY(pthread, LIBSO_LIBS="$LIBSO_LIBS -lpthread");; +esac + +# We need to know if the thread ID type will fit into an integral type and we +# can compare it for equality and generally treat it like an int, or if it's a +# non-integral type and we have to treat it like a structure or other untyped +# block of bytes. For example, MVS typedef's pthread_t to a structure. +AH_TEMPLATE(HAVE_SIMPLE_THREAD_TYPE, + [Define to 1 if thread identifier type db_threadid_t is integral.]) +if test "$db_threadid_t_decl" = notset; then + db_threadid_t_decl="typedef uintmax_t db_threadid_t;" + AC_DEFINE(HAVE_SIMPLE_THREAD_TYPE) +else + AC_TRY_COMPILE( + #include + $thread_h_decl, [ + $db_threadid_t_decl + db_threadid_t a; + a = 0; + ], AC_DEFINE(HAVE_SIMPLE_THREAD_TYPE)) +fi + +# There are 3 classes of mutexes: +# +# 1: Mutexes requiring no cleanup, for example, test-and-set mutexes. +# 2: Mutexes that must be destroyed, but which don't hold permanent system +# resources, for example, pthread mutexes on MVS aka OS/390 aka z/OS. +# 3: Mutexes that must be destroyed, even after the process is gone, for +# example, pthread mutexes on QNX and binary semaphores on VxWorks. +# +# DB cannot currently distinguish between #2 and #3 because DB does not know +# if the application is running environment recovery as part of startup and +# does not need to do cleanup, or if the environment is being removed and/or +# recovered in a loop in the application, and so does need to clean up. If +# we get it wrong, we're going to call the mutex destroy routine on a random +# piece of memory, which usually works, but just might drop core. For now, +# we group #2 and #3 into the HAVE_MUTEX_SYSTEM_RESOURCES define, until we +# have a better solution or reason to solve this in a general way -- so far, +# the places we've needed to handle this are few. +AH_TEMPLATE(HAVE_MUTEX_SYSTEM_RESOURCES, + [Define to 1 if mutexes hold system resources.]) + +case "$host_os$db_cv_mutex" in +*qnx*POSIX/pthread*|openedition*POSIX/pthread*) + AC_DEFINE(HAVE_MUTEX_SYSTEM_RESOURCES);; +esac]) + +AC_DEFUN(AM_DEFINE_ATOMIC, [ +# Probe for native atomic operations +# gcc/x86{,_64} inline asm +# solaris atomic_* library calls + +AH_TEMPLATE(HAVE_ATOMIC_SUPPORT, + [Define to 1 to use native atomic operations.]) +AH_TEMPLATE(HAVE_ATOMIC_X86_GCC_ASSEMBLY, + [Define to 1 to use GCC and x86 or x86_64 assemlby language atomic operations.]) +AH_TEMPLATE(HAVE_ATOMIC_SOLARIS, + [Define to 1 to use Solaris library routes for atomic operations.]) + +AC_CACHE_CHECK([for atomic operations], db_cv_atomic, [ +db_cv_atomic=no +# atomic operations can be disabled via --disable-atomicsupport +if test "$db_cv_build_atomicsupport" = no; then + db_cv_atomic=disabled +fi + +# The MinGW build uses the Windows API for atomic operations +if test "$db_cv_mingw" = yes; then + db_cv_atomic=mingw +fi + +if test "$db_cv_atomic" = no; then + AC_TRY_COMPILE(,[ + #if ((defined(i386) || defined(__i386__)) && defined(__GNUC__)) + exit(0); + #elif ((defined(x86_64) || defined(__x86_64__)) && defined(__GNUC__)) + exit(0); + #else + FAIL TO COMPILE/LINK + #endif + ], [db_cv_atomic="x86/gcc-assembly"]) +fi + +if test "$db_cv_atomic" = no; then +AC_TRY_LINK([ +#include ],[ + volatile unsigned val = 1; + exit (atomic_inc_uint_nv(&val) != 2 || + atomic_dec_uint_nv(&val) != 1 || + atomic_cas_32(&val, 1, 3) != 3); +], [db_cv_atomic="solaris/atomic"]) +fi +]) + +case "$db_cv_atomic" in + x86/gcc-assembly) + AC_DEFINE(HAVE_ATOMIC_SUPPORT) + AC_DEFINE(HAVE_ATOMIC_X86_GCC_ASSEMBLY) + ;; + + solaris/atomic) + AC_DEFINE(HAVE_ATOMIC_SUPPORT) + AC_DEFINE(HAVE_ATOMIC_SOLARIS) + ;; + mingw) + AC_DEFINE(HAVE_ATOMIC_SUPPORT) + ;; +esac +]) diff --git a/dist/aclocal/options.m4 b/dist/aclocal/options.m4 new file mode 100644 index 00000000..1fd93342 --- /dev/null +++ b/dist/aclocal/options.m4 @@ -0,0 +1,467 @@ +# $Id$ + +# Process user-specified options. +AC_DEFUN(AM_OPTIONS_SET, [ + +AC_MSG_CHECKING(if --enable-smallbuild option specified) +AC_ARG_ENABLE(smallbuild, + [AC_HELP_STRING([--enable-smallbuild], + [Build small footprint version of the library.])], + [db_cv_smallbuild="$enable_smallbuild"], [db_cv_smallbuild="no"]) +case "$db_cv_smallbuild" in +yes) db_cv_build_full="no";; + *) db_cv_build_full="yes";; +esac +AC_MSG_RESULT($db_cv_smallbuild) + +AC_MSG_CHECKING(if --disable-atomicsupport option specified) +AC_ARG_ENABLE(atomicsupport, + AC_HELP_STRING([--disable-atomicsupport], + [Do not build any native atomic operation support.]),, enableval="yes") +db_cv_build_atomicsupport="$enableval" +case "$enableval" in + no) AC_MSG_RESULT(yes);; +yes) AC_MSG_RESULT(no);; +esac + +# --enable-bigfile was the configuration option that Berkeley DB used before +# autoconf 2.50 was released (which had --enable-largefile integrated in). +AC_ARG_ENABLE(bigfile, + [AC_HELP_STRING([--disable-bigfile], + [Obsolete; use --disable-largefile instead.])], + [AC_MSG_ERROR( + [--enable-bigfile no longer supported, use --enable-largefile])]) + +AC_MSG_CHECKING(if --disable-compression option specified) +AC_ARG_ENABLE(compression, + AC_HELP_STRING([--disable-compression], + [Do not build compression support.]),, enableval=$db_cv_build_full) +db_cv_build_compression="$enableval" +case "$enableval" in + no) AC_MSG_RESULT(yes);; +yes) AC_MSG_RESULT(no);; +esac + +AC_MSG_CHECKING(if --disable-hash option specified) +AC_ARG_ENABLE(hash, + AC_HELP_STRING([--disable-hash], + [Do not build Hash access method.]),, enableval=$db_cv_build_full) +db_cv_build_hash="$enableval" +case "$enableval" in + no) AC_MSG_RESULT(yes);; +yes) AC_MSG_RESULT(no);; +esac + +AC_MSG_CHECKING(if --disable-heap option specified) +AC_ARG_ENABLE(heap, + [AC_HELP_STRING([--disable-heap], + [Do not build Heap access method.])],, enableval=$db_cv_build_full) +db_cv_build_heap="$enableval" +case "$enableval" in + no) AC_MSG_RESULT(yes);; +yes) AC_MSG_RESULT(no);; +esac + +AC_MSG_CHECKING(if --disable-mutexsupport option specified) +AC_ARG_ENABLE(mutexsupport, + AC_HELP_STRING([--disable-mutexsupport], + [Do not build any mutex support.]),, enableval="yes") +db_cv_build_mutexsupport="$enableval" +case "$enableval" in + no) AC_MSG_RESULT(yes);; +yes) AC_MSG_RESULT(no);; +esac + +AC_MSG_CHECKING(if --disable-log_checksum option specified) +AC_ARG_ENABLE(log_checksum, + AC_HELP_STRING([--disable-log_checksum], + [Disable log checksums.]), + [case "$enableval" in + no | yes) db_cv_log_checksum="$enableval" ;; + *) db_cv_log_checksum="yes" ;; + esac], + db_cv_log_checksum="yes") +case "$db_cv_log_checksum" in + no) AC_MSG_RESULT(yes);; +yes) AC_MSG_RESULT(no);; +esac + + +AC_MSG_CHECKING(if --disable-partition option specified) +AC_ARG_ENABLE(partition, + AC_HELP_STRING([--disable-partition], + [Do not build partitioned database support.]),, + enableval=$db_cv_build_full) +db_cv_build_partition="$enableval" +case "$enableval" in + no) AC_MSG_RESULT(yes);; +yes) AC_MSG_RESULT(no);; +esac + +AC_MSG_CHECKING(if --disable-queue option specified) +AC_ARG_ENABLE(queue, + AC_HELP_STRING([--disable-queue], + [Do not build Queue access method.]),, enableval=$db_cv_build_full) +db_cv_build_queue="$enableval" +case "$enableval" in + no) AC_MSG_RESULT(yes);; +yes) AC_MSG_RESULT(no);; +esac + +AC_MSG_CHECKING(if --disable-replication option specified) +AC_ARG_ENABLE(replication, + AC_HELP_STRING([--disable-replication], + [Do not build database replication support.]),, + enableval=$db_cv_build_full) +db_cv_build_replication="$enableval" +case "$enableval" in + no) AC_MSG_RESULT(yes);; +yes) AC_MSG_RESULT(no);; +esac + +AC_MSG_CHECKING(if --disable-statistics option specified) +AC_ARG_ENABLE(statistics, + AC_HELP_STRING([--disable-statistics], + [Do not build statistics support.]),, enableval=$db_cv_build_full) +db_cv_build_statistics="$enableval" +case "$enableval" in + no) AC_MSG_RESULT(yes);; +yes) AC_MSG_RESULT(no);; +esac + +AC_MSG_CHECKING(if --disable-verify option specified) +AC_ARG_ENABLE(verify, + AC_HELP_STRING([--disable-verify], + [Do not build database verification support.]),, + enableval=$db_cv_build_full) +db_cv_build_verify="$enableval" +case "$enableval" in + no) AC_MSG_RESULT(yes);; +yes) AC_MSG_RESULT(no);; +esac + +AC_MSG_CHECKING(if --enable-compat185 option specified) +AC_ARG_ENABLE(compat185, + [AC_HELP_STRING([--enable-compat185], + [Build DB 1.85 compatibility API.])], + [db_cv_compat185="$enable_compat185"], [db_cv_compat185="no"]) +AC_MSG_RESULT($db_cv_compat185) + +AC_MSG_CHECKING(if --enable-cxx option specified) +AC_ARG_ENABLE(cxx, + [AC_HELP_STRING([--enable-cxx], + [Build C++ API.])], + [db_cv_cxx="$enable_cxx"], [db_cv_cxx="no"]) +AC_MSG_RESULT($db_cv_cxx) + +AC_MSG_CHECKING(if --enable-debug option specified) +AC_ARG_ENABLE(debug, + [AC_HELP_STRING([--enable-debug], + [Build a debugging version.])], + [db_cv_debug="$enable_debug"], [db_cv_debug="no"]) +AC_MSG_RESULT($db_cv_debug) + +AC_MSG_CHECKING(if --enable-debug_rop option specified) +AC_ARG_ENABLE(debug_rop, + [AC_HELP_STRING([--enable-debug_rop], + [Build a version that logs read operations.])], + [db_cv_debug_rop="$enable_debug_rop"], [db_cv_debug_rop="no"]) +AC_MSG_RESULT($db_cv_debug_rop) + +AC_MSG_CHECKING(if --enable-debug_wop option specified) +AC_ARG_ENABLE(debug_wop, + [AC_HELP_STRING([--enable-debug_wop], + [Build a version that logs write operations.])], + [db_cv_debug_wop="$enable_debug_wop"], [db_cv_debug_wop="no"]) +AC_MSG_RESULT($db_cv_debug_wop) + +AC_MSG_CHECKING(if --enable-diagnostic option specified) +AC_ARG_ENABLE(diagnostic, + [AC_HELP_STRING([--enable-diagnostic], + [Build a version with run-time diagnostics.])], + [db_cv_diagnostic="$enable_diagnostic"], [db_cv_diagnostic="no"]) +if test "$db_cv_diagnostic" = "yes"; then + AC_MSG_RESULT($db_cv_diagnostic) +fi +if test "$db_cv_diagnostic" = "no" -a "$db_cv_debug_rop" = "yes"; then + db_cv_diagnostic="yes" + AC_MSG_RESULT([by --enable-debug_rop]) +fi +if test "$db_cv_diagnostic" = "no" -a "$db_cv_debug_wop" = "yes"; then + db_cv_diagnostic="yes" + AC_MSG_RESULT([by --enable-debug_wop]) +fi +if test "$db_cv_diagnostic" = "no"; then + AC_MSG_RESULT($db_cv_diagnostic) +fi + +AC_MSG_CHECKING(if --enable-dump185 option specified) +AC_ARG_ENABLE(dump185, + [AC_HELP_STRING([--enable-dump185], + [Build db_dump185(1) to dump 1.85 databases.])], + [db_cv_dump185="$enable_dump185"], [db_cv_dump185="no"]) +AC_MSG_RESULT($db_cv_dump185) + +AC_MSG_CHECKING(if --enable-java option specified) +AC_ARG_ENABLE(java, + [AC_HELP_STRING([--enable-java], + [Build Java API.])], + [db_cv_java="$enable_java"], [db_cv_java="no"]) +AC_MSG_RESULT($db_cv_java) + +AC_MSG_CHECKING(if --enable-mingw option specified) +AC_ARG_ENABLE(mingw, + [AC_HELP_STRING([--enable-mingw], + [Build Berkeley DB for MinGW.])], + [db_cv_mingw="$enable_mingw"], [db_cv_mingw="no"]) +AC_MSG_RESULT($db_cv_mingw) + +AC_MSG_CHECKING(if --enable-o_direct option specified) +AC_ARG_ENABLE(o_direct, + [AC_HELP_STRING([--enable-o_direct], + [Enable the O_DIRECT flag for direct I/O.])], + [db_cv_o_direct="$enable_o_direct"], [db_cv_o_direct="no"]) +AC_MSG_RESULT($db_cv_o_direct) + +AC_MSG_CHECKING(if --enable-posixmutexes option specified) +AC_ARG_ENABLE(posixmutexes, + [AC_HELP_STRING([--enable-posixmutexes], + [Force use of POSIX standard mutexes.])], + [db_cv_posixmutexes="$enable_posixmutexes"], [db_cv_posixmutexes="no"]) +AC_MSG_RESULT($db_cv_posixmutexes) + +AC_ARG_ENABLE(pthread_self,, + [AC_MSG_WARN([--enable-pthread_self is now always enabled])]) + +AC_ARG_ENABLE(pthread_api,, + [AC_MSG_WARN([--enable-pthread_api is now always enabled])]) + +AC_MSG_CHECKING(if --enable-rpc option specified) +AC_ARG_ENABLE(rpc,, + [AC_MSG_ERROR([RPC support has been removed from Berkeley DB.])] + , [db_cv_rpc="no"]) +AC_MSG_RESULT($db_cv_rpc) + +AC_MSG_CHECKING(if --enable-sql option specified) +AC_ARG_ENABLE(sql, + [AC_HELP_STRING([--enable-sql], + [Build the SQL API.])], + [db_cv_sql="$enable_sql"], [db_cv_sql="no"]) +AC_MSG_RESULT($db_cv_sql) + +AC_MSG_CHECKING(if --enable-sql_compat option specified) +AC_ARG_ENABLE(sql_compat, + [AC_HELP_STRING([--enable-sql_compat], + [Build a drop-in replacement sqlite3 library.])], + [db_cv_sql_compat="$enable_sql_compat"], [db_cv_sql_compat="no"]) +AC_MSG_RESULT($db_cv_sql_compat) + +AC_MSG_CHECKING(if --enable-jdbc option specified) +AC_ARG_ENABLE(jdbc, + [AC_HELP_STRING([--enable-jdbc], + [Build BDB SQL JDBC library.])], + [db_cv_jdbc="$enable_jdbc"], [db_cv_jdbc="no"]) +AC_MSG_RESULT($db_cv_jdbc) + +AC_MSG_CHECKING([if --with-jdbc=DIR option specified]) +AC_ARG_WITH(jdbc, + [AC_HELP_STRING([--with-jdbc=DIR], + [Specify source directory of JDBC.])], + [with_jdbc="$withval"], [with_jdbc="no"]) +AC_MSG_RESULT($with_jdbc) +if test "$with_jdbc" != "no"; then + db_cv_jdbc="yes" +fi + +AC_MSG_CHECKING(if --enable-amalgamation option specified) +AC_ARG_ENABLE(amalgamation, + AC_HELP_STRING([--enable-amalgamation], + [Build a SQL amalgamation instead of building files separately.]), + [db_cv_sql_amalgamation="$enable_amalgamation"], + [db_cv_sql_amalgamation="no"]) +AC_MSG_RESULT($db_cv_sql_amalgamation) + +AC_MSG_CHECKING(if --enable-sql_codegen option specified) +AC_ARG_ENABLE(sql_codegen, + [AC_HELP_STRING([--enable-sql_codegen], + [Build the SQL-to-C code generation tool.])], + [db_cv_sql_codegen="$enable_sql_codegen"], [db_cv_sql_codegen="no"]) +AC_MSG_RESULT($db_cv_sql_codegen) + +AC_MSG_CHECKING(if --enable-stl option specified) +AC_ARG_ENABLE(stl, + [AC_HELP_STRING([--enable-stl], + [Build STL API.])], + [db_cv_stl="$enable_stl"], [db_cv_stl="no"]) +if test "$db_cv_stl" = "yes" -a "$db_cv_cxx" = "no"; then + db_cv_cxx="yes" +fi +AC_MSG_RESULT($db_cv_stl) + +AC_MSG_CHECKING(if --enable-tcl option specified) +AC_ARG_ENABLE(tcl, + [AC_HELP_STRING([--enable-tcl], + [Build Tcl API.])], + [db_cv_tcl="$enable_tcl"], [db_cv_tcl="no"]) +AC_MSG_RESULT($db_cv_tcl) + +AC_MSG_CHECKING(if --enable-test option specified) +AC_ARG_ENABLE(test, + [AC_HELP_STRING([--enable-test], + [Configure to run the test suite.])], + [db_cv_test="$enable_test"], [db_cv_test="no"]) +AC_MSG_RESULT($db_cv_test) + +AC_MSG_CHECKING(if --enable-localization option specified) +AC_ARG_ENABLE(localization, + [AC_HELP_STRING([--enable-localization], + [Configure to enable localization.])], + [db_cv_localization="$enable_localization"], [db_cv_localization="no"]) +AC_MSG_RESULT($db_cv_localization) + +AC_MSG_CHECKING(if --enable-stripped_messages option specified) +AC_ARG_ENABLE(stripped_messages, + [AC_HELP_STRING([--enable-stripped_messages], + [Configure to enable stripped messages.])], + [db_cv_stripped_messages="$enable_stripped_messages"], [db_cv_stripped_messages="no"]) +AC_MSG_RESULT($db_cv_stripped_messages) + +AC_MSG_CHECKING(if --enable-dbm option specified) +AC_ARG_ENABLE(dbm, + [AC_HELP_STRING([--enable-dbm], + [Configure to enable the historic dbm interface.])], + [db_cv_dbm="$enable_dbm"], [db_cv_dbm="$db_cv_test"]) +AC_MSG_RESULT($db_cv_dbm) + +AC_MSG_CHECKING(if --enable-dtrace option specified) +AC_ARG_ENABLE(dtrace, + [AC_HELP_STRING([--enable-dtrace], + [Configure to build in dtrace static probes])], + [db_cv_dtrace="$enable_dtrace"], [db_cv_dtrace="no"]) +AC_MSG_RESULT($db_cv_dtrace) + +AC_MSG_CHECKING(if --enable-systemtap option specified) +AC_ARG_ENABLE(systemtap, + [AC_HELP_STRING([--enable-systemtap], + [Configure to use systemtap to emulate dtrace static probes])], + [db_cv_systemtap="$enable_systemtap"], [db_cv_systemtap="no"]) +AC_MSG_RESULT($db_cv_systemtap) + +AC_MSG_CHECKING(if --enable-perfmon-statistics option specified) +AC_ARG_ENABLE(perfmon_statistics, + [AC_HELP_STRING([--enable-perfmon-statistics], + [Configure to build in performance monitoring of statistics values @<:@default=no@:>@.])], + [db_cv_perfmon_statistics="$enable_perfmon_statistics"], [db_cv_perfmon_statistics="no"]) +AC_MSG_RESULT($db_cv_perfmon_statistics) + +AC_MSG_CHECKING(if --enable-uimutexes option specified) +AC_ARG_ENABLE(uimutexes, + [AC_HELP_STRING([--enable-uimutexes], + [Force use of Unix International mutexes.])], + [db_cv_uimutexes="$enable_uimutexes"], [db_cv_uimutexes="no"]) +AC_MSG_RESULT($db_cv_uimutexes) + +AC_MSG_CHECKING(if --enable-umrw option specified) +AC_ARG_ENABLE(umrw, + [AC_HELP_STRING([--enable-umrw], + [Mask harmless uninitialized memory read/writes.])], + [db_cv_umrw="$enable_umrw"], [db_cv_umrw="no"]) +AC_MSG_RESULT($db_cv_umrw) + +# Cryptography support. +# Until Berkeley DB 5.0, this was a simple yes/no decision. +# With the addition of support for Intel Integrated Performance Primitives (ipp) +# things are more complex. There are now three options: +# 1) don't build cryptography (no) +# 2) build using the built-in software implementation (yes) +# 3) build using the Intel IPP implementation (ipp) +# We handle this by making the primary configuration method: +# --with-cryptography={yes|no|ipp} +# which defaults to yes. The old enable/disable-cryptography argument is still +# supported for backwards compatibility. +AC_MSG_CHECKING(if --with-cryptography option specified) +AC_ARG_ENABLE(cryptography, [], [], enableval=$db_cv_build_full) +enable_cryptography="$enableval" +AC_ARG_WITH([cryptography], + AC_HELP_STRING([--with-cryptography=yes|no|ipp], [Build database cryptography support @<:@default=yes@:>@.]), + [], [with_cryptography=$enable_cryptography]) +case "$with_cryptography" in +yes|no|ipp) ;; +*) AC_MSG_ERROR([unknown --with-cryptography argument \'$with_cryptography\']) ;; +esac +db_cv_build_cryptography="$with_cryptography" +AC_MSG_RESULT($db_cv_build_cryptography) + +AC_MSG_CHECKING(if --with-mutex=MUTEX option specified) +AC_ARG_WITH(mutex, + [AC_HELP_STRING([--with-mutex=MUTEX], + [Select non-default mutex implementation.])], + [with_mutex="$withval"], [with_mutex="no"]) +if test "$with_mutex" = "yes"; then + AC_MSG_ERROR([--with-mutex requires a mutex name argument]) +fi +if test "$with_mutex" != "no"; then + db_cv_mutex="$with_mutex" +fi +AC_MSG_RESULT($with_mutex) + +# --with-mutexalign=ALIGNMENT was the configuration option that Berkeley DB +# used before the DbEnv::mutex_set_align method was added. +AC_ARG_WITH(mutexalign, + [AC_HELP_STRING([--with-mutexalign=ALIGNMENT], + [Obsolete; use DbEnv::mutex_set_align instead.])], + [AC_MSG_ERROR( + [--with-mutexalign no longer supported, use DbEnv::mutex_set_align])]) + +AC_ARG_WITH(stacksize, + [AC_HELP_STRING([--with-stacksize=SIZE], + [Set the stack size for Berkeley DB threads.])], + [with_stacksize="$withval"], [with_stacksize="no"]) + +AC_MSG_CHECKING([if --with-tcl=DIR option specified]) +AC_ARG_WITH(tcl, + [AC_HELP_STRING([--with-tcl=DIR], + [Directory location of tclConfig.sh.])], + [with_tclconfig="$withval"], [with_tclconfig="no"]) +AC_MSG_RESULT($with_tclconfig) +if test "$with_tclconfig" != "no"; then + db_cv_tcl="yes" +fi + +AC_MSG_CHECKING([if --with-uniquename=NAME option specified]) +AC_ARG_WITH(uniquename, + [AC_HELP_STRING([--with-uniquename=NAME], + [Build a uniquely named library.])], + [with_uniquename="$withval"], [with_uniquename="no"]) +if test "$with_uniquename" = "no"; then + db_cv_uniquename="no" + DB_VERSION_UNIQUE_NAME="" + AC_MSG_RESULT($with_uniquename) +else + db_cv_uniquename="yes" + if test "$with_uniquename" = "yes"; then + DB_VERSION_UNIQUE_NAME="__EDIT_DB_VERSION_UNIQUE_NAME__" + else + DB_VERSION_UNIQUE_NAME="$with_uniquename" + fi + AC_MSG_RESULT($DB_VERSION_UNIQUE_NAME) +fi + +# Undocumented option used for the dbsql command line tool (to match SQLite). +AC_ARG_ENABLE(readline, [], [with_readline=$enableval], [with_readline=no]) + +# --enable-sql_compat implies --enable-sql +if test "$db_cv_sql_compat" = "yes" -a "$db_cv_sql" = "no"; then + db_cv_sql=$db_cv_sql_compat +fi + +# --enable-jdbc implies --enable-sql +if test "$db_cv_jdbc" = "yes" -a "$db_cv_sql" = "no"; then + db_cv_sql=$db_cv_jdbc +fi + +# Testing requires Tcl. +if test "$db_cv_test" = "yes" -a "$db_cv_tcl" = "no"; then + AC_MSG_ERROR([--enable-test requires --enable-tcl]) +fi]) + diff --git a/dist/aclocal/perfmon.m4 b/dist/aclocal/perfmon.m4 new file mode 100644 index 00000000..b85be291 --- /dev/null +++ b/dist/aclocal/perfmon.m4 @@ -0,0 +1,97 @@ +# $Id$ + +# Determine what kind of application-specified performance event monitoring +# support is available for this platform. The options are: +# --enable-dtrace is supported on +# Solaris +# Linux SystemTap 1.1 or better +# Mac OS X version 10.5 (Leopard) or better + +AC_DEFUN(AM_DEFINE_PERFMON, [ + +AH_TEMPLATE(HAVE_PERFMON, + [Define to 1 to enable some kind of performance event monitoring.]) +AH_TEMPLATE(HAVE_PERFMON_STATISTICS, + [Define to 1 to enable performance event monitoring of *_stat() statistics.]) +AH_TEMPLATE(HAVE_DTRACE, + [Define to 1 to use dtrace for performance monitoring.]) + +if test "$db_cv_systemtap" = "yes" ; then + if test "$DTRACE" != "dtrace"; then + AC_MSG_ERROR([The dtrace program is missing; is systemtap v1.1 or better installed?]) + fi + db_cv_dtrace="yes" +fi +if test "$db_cv_dtrace" = "yes" ; then + db_cv_perfmon="yes" +fi + +AC_SUBST(DTRACE_CPP) +DTRACE_CPP=-C +if test "$db_cv_perfmon" = "yes" ; then + if test "$DTRACE" = "dtrace" ; then + AC_CHECK_HEADERS(sys/sdt.h) + # Generate the DTrace provider header file. This is duplicated + # in Makefile.in, to allow custom events to be added. + if test "$STAP" = "stap"; then + # Linux DTrace support may have a bug with dtrace -C -h + # The preprocessing isn't needed for -h on Linux, + # so skip the unnecessary preprocessing. + DTRACE_CPP= + fi + # The OS X version of dtrace prints a spurious line here. + if ! dtrace -h $DTRACE_CPP -I../util/dtrace -s ../dist/db_provider.d; then + AC_MSG_ERROR([Could not build db_provider.d: dtrace -h failed]) + fi + $RM db_provider.h.tmp + if ! mv db_provider.h db_provider.h.tmp ; then + AC_MSG_ERROR([Could not build db_provider.d: mv failed]) + elif ! sed -e \ +'/^#define[ ]*BDB_[A-Z_]*(.*)/y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/' \ +db_provider.h.tmp > db_provider.h ; then + AC_MSG_ERROR([Could not build db_provider.d: sed failed]) + fi + + # DTrace on Solaris needs to post-process .o files to both + # generate an additional .o as well as resolving the + # __dtrace___bdb___ symbols before putting them into + # libraries; Mac OS X does not. Treat a failing dtrace -G + # command as the indicator sign that dtrace -G is unnecessary. + # If it is needed then create an empty .c file to be a + # placeholder for the PIC & non-PIC versions of the dtrace -G + # output file. The root of this .c file must be the same as + # the root of the .d file -- i.e. db_provider -- for the + # dtrace -G lines at the end of Makefile.in to work correctly. + $RM db_provider.o + if dtrace -G $DTRACE_CPP -I../util/dtrace -s ../dist/db_provider.d 2> /dev/null && \ + test -f db_provider.o ; then + FINAL_OBJS="$FINAL_OBJS db_provider${o}" + rm -f db_provider.c + echo "" > db_provider.c + fi + AC_DEFINE(HAVE_DTRACE) + else + AC_MSG_ERROR([No supported performance utility found.]) + fi + AC_DEFINE(HAVE_PERFMON) + if test "$db_cv_perfmon_statistics" != "no" ; then + AC_DEFINE(HAVE_PERFMON_STATISTICS) + fi + # The method by which probes are listed depends on the underlying + # implementation; Linux's emulation of DTrace still uses the stap + # command at runtime. + AC_SUBST(LISTPROBES_DEPENDENCY) + AC_SUBST(LISTPROBES_COMMAND) + if test "$STAP" = "stap"; then + LISTPROBES_DEPENDENCY=.libs/libdb-$DB_VERSION_MAJOR.$DB_VERSION_MINOR$SOSUFFIX + LISTPROBES_COMMAND="stap -l 'process(\"$LISTPROBES_DEPENDENCY\").mark(\"*\")'" + elif test "$DTRACE" = "dtrace" ; then + LISTPROBES_DEPENDENCY=db_load + LISTPROBES_COMMAND="dnl + # Run a simple command which uses the library without needing any setup. + sleep 1 | dtrace -l -n 'bdb\$\$target:::' -c '.libs/db_load dummy.db'" + fi +elif test "$db_cv_perfmon_statistics" = "yes" ; then + AC_MSG_ERROR([Enabling perfmon statistics requires --enable-dtrace]) +fi +]) diff --git a/dist/aclocal/programs.m4 b/dist/aclocal/programs.m4 new file mode 100644 index 00000000..021d6605 --- /dev/null +++ b/dist/aclocal/programs.m4 @@ -0,0 +1,59 @@ +# $Id$ + +# Check for programs used in building/installation. +AC_DEFUN(AM_PROGRAMS_SET, [ + +AC_CHECK_TOOL(CHMOD, chmod, none) +test "$CHMOD" = "none" && AC_MSG_ERROR([No chmod utility found.]) + +AC_CHECK_TOOL(CP, cp, none) +test "$CP" = "none" && AC_MSG_ERROR([No cp utility found.]) + +# The Tcl test suite requires a kill utility. +if test "$db_cv_test" = "yes"; then + AC_CHECK_TOOL(KILL, kill, none) + test "$KILL" = "none" && AC_MSG_ERROR([No kill utility found.]) +fi + +AC_CHECK_TOOL(LN, ln, none) +test "$LN" = "none" && AC_MSG_ERROR([No ln utility found.]) + +AC_CHECK_TOOL(MKDIR, mkdir, none) +test "$MKDIR" = "none" && AC_MSG_ERROR([No mkdir utility found.]) + +AC_CHECK_TOOL(RM, rm, none) +test "$RM" = "none" && AC_MSG_ERROR([No rm utility found.]) + +# We always want to force removes, and libtool assumes the same. +RM="$RM -f" + +AC_CHECK_TOOL(MV, mv, none) +test "$MV" = "none" && AC_MSG_ERROR([No mv utility found.]) + +if test "$db_cv_systemtap" = "yes" -o "$db_cv_dtrace" = "yes"; then + AC_CHECK_TOOL(STAP, stap, none) + test "$STAP" = "none" -a "$db_cv_systemtap" = "yes" && \ + AC_MSG_ERROR([No stap utility found.]) + db_cv_dtrace=yes +fi + +if test "$db_cv_dtrace" = "yes"; then + AC_CHECK_TOOL(DTRACE, dtrace, none) + test "$DTRACE" = "none" && AC_MSG_ERROR([No dtrace utility found.]) + # Sed and perl are needed only if events are added after building + # the distribution; if either is missing it is not an error for now. + AC_CHECK_TOOL(SED, sed, none) + AC_CHECK_TOOL(PERL, perl, none) +fi + +# We need a complete path for sh, because some make utility implementations get +# upset if SHELL is set to just the command name. Don't use the SHELL variable +# here because the user likely has the SHELL variable set to something other +# than the Bourne shell, which is what Make wants. +AC_PATH_TOOL(db_cv_path_sh, sh, none) +test "$db_cv_path_sh" = "none" && AC_MSG_ERROR([No sh utility found.]) + +# Don't strip the binaries if --enable-debug was specified. +if test "$db_cv_debug" = yes; then + STRIP=":" +fi]) diff --git a/dist/aclocal/sequence.m4 b/dist/aclocal/sequence.m4 new file mode 100644 index 00000000..6e99b936 --- /dev/null +++ b/dist/aclocal/sequence.m4 @@ -0,0 +1,91 @@ +# $Id$ + +# Try and configure sequence support. +AC_DEFUN(AM_SEQUENCE_CONFIGURE, [ + AC_MSG_CHECKING([for 64-bit integral type support for sequences]) + + db_cv_build_sequence="yes" + + # Have to have found 64-bit types to support sequences. If we don't + # find the native types, we try and create our own. + if test "$ac_cv_type_int64_t" = "no" -a -z "$int64_decl"; then + db_cv_build_sequence="no" + fi + if test "$ac_cv_type_uint64_t" = "no" -a -z "$u_int64_decl"; then + db_cv_build_sequence="no" + fi + + # Figure out what type is the right size, and set the format. + AC_SUBST(INT64_FMT) + AC_SUBST(UINT64_FMT) + db_cv_seq_type="no" + if test "$db_cv_build_sequence" = "yes" -a\ + "$ac_cv_sizeof_long" -eq "8"; then + db_cv_seq_type="long" + db_cv_seq_fmt='"%ld"' + db_cv_seq_ufmt='"%lu"' + INT64_FMT='#define INT64_FMT "%ld"' + UINT64_FMT='#define UINT64_FMT "%lu"' + else if test "$db_cv_build_sequence" = "yes" -a\ + "$ac_cv_sizeof_long_long" -eq "8"; then + db_cv_seq_type="long long" + db_cv_seq_fmt='"%lld"' + db_cv_seq_ufmt='"%llu"' + INT64_FMT='#define INT64_FMT "%lld"' + UINT64_FMT='#define UINT64_FMT "%llu"' + else + db_cv_build_sequence="no" + fi + fi + + # Test to see if we can declare variables of the appropriate size + # and format them. If we're cross-compiling, all we get is a link + # test, which won't test for the appropriate printf format strings. + if test "$db_cv_build_sequence" = "yes"; then + AC_TRY_RUN([ + main() { + $db_cv_seq_type l; + unsigned $db_cv_seq_type u; + char buf@<:@100@:>@; + + buf@<:@0@:>@ = 'a'; + l = 9223372036854775807LL; + (void)snprintf(buf, sizeof(buf), $db_cv_seq_fmt, l); + if (strcmp(buf, "9223372036854775807")) + return (1); + u = 18446744073709551615ULL; + (void)snprintf(buf, sizeof(buf), $db_cv_seq_ufmt, u); + if (strcmp(buf, "18446744073709551615")) + return (1); + return (0); + }],, [db_cv_build_sequence="no"], + AC_TRY_LINK(,[ + $db_cv_seq_type l; + unsigned $db_cv_seq_type u; + char buf@<:@100@:>@; + + buf@<:@0@:>@ = 'a'; + l = 9223372036854775807LL; + (void)snprintf(buf, sizeof(buf), $db_cv_seq_fmt, l); + if (strcmp(buf, "9223372036854775807")) + return (1); + u = 18446744073709551615ULL; + (void)snprintf(buf, sizeof(buf), $db_cv_seq_ufmt, u); + if (strcmp(buf, "18446744073709551615")) + return (1); + return (0); + ],, [db_cv_build_sequence="no"])) + fi + if test "$db_cv_build_sequence" = "yes"; then + AC_SUBST(db_seq_decl) + db_seq_decl="typedef $db_cv_seq_type db_seq_t;"; + + AC_DEFINE(HAVE_64BIT_TYPES) + AH_TEMPLATE(HAVE_64BIT_TYPES, + [Define to 1 if 64-bit types are available.]) + else + # It still has to compile, but it won't run. + db_seq_decl="typedef int db_seq_t;"; + fi + AC_MSG_RESULT($db_cv_build_sequence) +]) diff --git a/dist/aclocal/socklen.m4 b/dist/aclocal/socklen.m4 new file mode 100644 index 00000000..b6c47944 --- /dev/null +++ b/dist/aclocal/socklen.m4 @@ -0,0 +1,55 @@ +dnl Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Albert Chin, Windows fixes from Simon Josefsson. + +dnl Check for socklen_t: historically on BSD it is an int, and in +dnl POSIX 1g it is a type of its own, but some platforms use different +dnl types for the argument to getsockopt, getpeername, etc. So we +dnl have to test to find something that will work. + +dnl On mingw32, socklen_t is in ws2tcpip.h ('int'), so we try to find +dnl it there first. That file is included by gnulib's socket_.h, which +dnl all users of this module should include. Cygwin must not include +dnl ws2tcpip.h. + +dnl Windows fixes removed for Berkeley DB. Functions renamed, basic check +dnl remains the same though. +dnl !!! +dnl The original version had fixes for MinGW -- if you need those, go back +dnl and look at the original code. + +AC_DEFUN([AM_SOCKLEN_T],[ + AC_CHECK_TYPE([socklen_t], , + [AC_MSG_CHECKING([for socklen_t equivalent]) + AC_CACHE_VAL([db_cv_socklen_t_equiv], + [# Systems have either "struct sockaddr *" or + # "void *" as the second argument to getpeername + db_cv_socklen_t_equiv= + for arg2 in "struct sockaddr" void; do + for t in int size_t "unsigned int" "long int" "unsigned long int"; do + AC_TRY_COMPILE([$db_includes +#ifdef HAVE_SYS_SOCKET_H +#include +#endif + int getpeername (int, $arg2 *, $t *);], + [$t len; + getpeername (0, 0, &len);], + [db_cv_socklen_t_equiv="$t"]) + test "$db_cv_socklen_t_equiv" != "" && break + done + test "$db_cv_socklen_t_equiv" != "" && break + done + ]) + if test "$db_cv_socklen_t_equiv" = ""; then + AC_MSG_ERROR([Cannot find a type to use in place of socklen_t]) + fi + AC_MSG_RESULT([$db_cv_socklen_t_equiv]) + AC_DEFINE_UNQUOTED([socklen_t], [$db_cv_socklen_t_equiv], + [type to use in place of socklen_t if not defined])], + [$db_includes +#ifdef HAVE_SYS_SOCKET_H +#include +#endif])]) diff --git a/dist/aclocal/sosuffix.m4 b/dist/aclocal/sosuffix.m4 new file mode 100644 index 00000000..4498eee6 --- /dev/null +++ b/dist/aclocal/sosuffix.m4 @@ -0,0 +1,75 @@ +# $Id$ +# Determine shared object suffixes. +# +# Our method is to use the libtool variable $library_names_spec, +# set by using AC_PROG_LIBTOOL. This variable is a snippet of shell +# defined in terms of $versuffix, $release, $libname and $module +# We want to eval it and grab the suffix used for shared objects. +# By setting $module to yes/no, we obtain the suffixes +# used to create dlloadable, or java loadable modules. +# On many (*nix) systems, these all evaluate to .so, but there +# are some notable exceptions. +# Before calling this macro, libtool must have been configured. + +# This macro is used internally to discover the suffix for the current +# settings of $module. The result is stored in $_SOSUFFIX. +AC_DEFUN(_SOSUFFIX_INTERNAL, [ + versuffix="" + release="" + libname=libfoo + eval _SOSUFFIX=\"$shrext_cmds\" + if test "$_SOSUFFIX" = "" ; then + _SOSUFFIX=".so" + if test "$enable_shared" != "yes"; then + if test "$_SOSUFFIX_MESSAGE" = ""; then + _SOSUFFIX_MESSAGE=yes + AC_MSG_WARN([libtool may not know about this architecture.]) + AC_MSG_WARN([assuming $_SOSUFFIX suffix for dynamic libraries.]) + fi + fi + fi +]) + +# SOSUFFIX_CONFIG will set the variable SOSUFFIX to be the +# shared library extension used for general linking, not dlopen. +AC_DEFUN(SOSUFFIX_CONFIG, [ + AC_MSG_CHECKING([SOSUFFIX from libtool]) + module=no + _SOSUFFIX_INTERNAL + SOSUFFIX=$_SOSUFFIX + AC_MSG_RESULT($SOSUFFIX) + AC_SUBST(SOSUFFIX) +]) + +# MODSUFFIX_CONFIG will set the variable MODSUFFIX to be the +# shared library extension used for dlopen'ed modules. +# To discover this, we set $module, simulating libtool's -module option. +AC_DEFUN(MODSUFFIX_CONFIG, [ + AC_MSG_CHECKING([MODSUFFIX from libtool]) + module=yes + _SOSUFFIX_INTERNAL + MODSUFFIX=$_SOSUFFIX + AC_MSG_RESULT($MODSUFFIX) + AC_SUBST(MODSUFFIX) +]) + +# JMODSUFFIX_CONFIG will set the variable JMODSUFFIX to be the +# shared library extension used JNI modules opened by Java. +# To discover this, we set $jnimodule, simulating libtool's -shrext option. +########################################################################## +# Robert Boehne: Not much point in this macro any more because apparently +# Darwin is the only OS that wants or needs the .jnilib extension. +########################################################################## +AC_DEFUN(JMODSUFFIX_CONFIG, [ + AC_MSG_CHECKING([JMODSUFFIX from libtool]) + module=yes + _SOSUFFIX_INTERNAL + if test `uname` = "Darwin"; then + JMODSUFFIX=".jnilib" + else + JMODSUFFIX=$_SOSUFFIX + fi + AC_MSG_RESULT($JMODSUFFIX) + AC_SUBST(JMODSUFFIX) +]) + diff --git a/dist/aclocal/tcl.m4 b/dist/aclocal/tcl.m4 new file mode 100644 index 00000000..0aba9b5b --- /dev/null +++ b/dist/aclocal/tcl.m4 @@ -0,0 +1,138 @@ +# $Id$ + +# The SC_* macros in this file are from the unix/tcl.m4 files in the Tcl +# 8.3.0 distribution, with some minor changes. For this reason, license +# terms for the Berkeley DB distribution dist/aclocal/tcl.m4 file are as +# follows (copied from the license.terms file in the Tcl 8.3 distribution): +# +# This software is copyrighted by the Regents of the University of +# California, Sun Microsystems, Inc., Scriptics Corporation, +# and other parties. The following terms apply to all files associated +# with the software unless explicitly disclaimed in individual files. +# +# The authors hereby grant permission to use, copy, modify, distribute, +# and license this software and its documentation for any purpose, provided +# that existing copyright notices are retained in all copies and that this +# notice is included verbatim in any distributions. No written agreement, +# license, or royalty fee is required for any of the authorized uses. +# Modifications to this software may be copyrighted by their authors +# and need not follow the licensing terms described here, provided that +# the new terms are clearly indicated on the first page of each file where +# they apply. +# +# IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY +# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES +# ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY +# DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, +# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE +# IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE +# NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR +# MODIFICATIONS. +# +# GOVERNMENT USE: If you are acquiring this software on behalf of the +# U.S. government, the Government shall have only "Restricted Rights" +# in the software and related documentation as defined in the Federal +# Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you +# are acquiring the software on behalf of the Department of Defense, the +# software shall be classified as "Commercial Computer Software" and the +# Government shall have only "Restricted Rights" as defined in Clause +# 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the +# authors grant the U.S. Government and others acting in its behalf +# permission to use and distribute the software in accordance with the +# terms specified in this license. + +AC_DEFUN(SC_PATH_TCLCONFIG, [ + AC_CACHE_VAL(ac_cv_c_tclconfig,[ + + # First check to see if --with-tclconfig was specified. + if test "${with_tclconfig}" != no; then + if test -f "${with_tclconfig}/tclConfig.sh" ; then + ac_cv_c_tclconfig=`(cd ${with_tclconfig}; pwd)` + else + AC_MSG_ERROR([${with_tclconfig} directory doesn't contain tclConfig.sh]) + fi + fi + + # check in a few common install locations + if test x"${ac_cv_c_tclconfig}" = x ; then + for i in `ls -d /usr/local/lib 2>/dev/null` ; do + if test -f "$i/tclConfig.sh" ; then + ac_cv_c_tclconfig=`(cd $i; pwd)` + break + fi + done + fi + + ]) + + if test x"${ac_cv_c_tclconfig}" = x ; then + TCL_BIN_DIR="# no Tcl configs found" + AC_MSG_ERROR(can't find Tcl configuration definitions) + else + TCL_BIN_DIR=${ac_cv_c_tclconfig} + fi +]) + +AC_DEFUN(SC_LOAD_TCLCONFIG, [ + AC_MSG_CHECKING([for existence of $TCL_BIN_DIR/tclConfig.sh]) + + if test -f "$TCL_BIN_DIR/tclConfig.sh" ; then + AC_MSG_RESULT([loading]) + . $TCL_BIN_DIR/tclConfig.sh + else + AC_MSG_RESULT([file not found]) + fi + + # DB requires at least version 8.4. + if test ${TCL_MAJOR_VERSION} -lt 8 \ + -o ${TCL_MAJOR_VERSION} -eq 8 -a ${TCL_MINOR_VERSION} -lt 4; then + AC_MSG_ERROR([Berkeley DB requires Tcl version 8.4 or better.]) + fi + + # The eval is required to do substitution (for example, the TCL_DBGX + # substitution in the TCL_LIB_FILE variable. + eval "TCL_INCLUDE_SPEC=\"${TCL_INCLUDE_SPEC}\"" + eval "TCL_LIB_FILE=\"${TCL_LIB_FILE}\"" + eval "TCL_LIB_FLAG=\"${TCL_LIB_FLAG}\"" + eval "TCL_LIB_SPEC=\"${TCL_LIB_SPEC}\"" + + # + # If the DB Tcl library isn't loaded with the Tcl spec and library + # flags on AIX, the resulting libdb_tcl-X.Y.so.0 will drop core at + # load time. [#4843] Furthermore, with Tcl 8.3, the link flags + # given by the Tcl spec are insufficient for our use. [#5779],[#17109] + # + case "$host_os" in + aix*) + LIBTSO_LIBS="$LIBTSO_LIBS $TCL_LIB_SPEC $TCL_LIB_FLAG" + LIBTSO_LIBS="$LIBTSO_LIBS -L$TCL_EXEC_PREFIX/lib -ltcl$TCL_VERSION";; + esac + AC_SUBST(TCL_BIN_DIR) + AC_SUBST(TCL_INCLUDE_SPEC) + AC_SUBST(TCL_LIB_FILE) + AC_SUBST(TCL_SRC_DIR) + + AC_SUBST(TCL_TCLSH) + TCL_TCLSH="${TCL_PREFIX}/bin/tclsh${TCL_VERSION}" +]) + +# Optional Tcl API. +AC_DEFUN(AM_TCL_LOAD, [ + SC_PATH_TCLCONFIG + SC_LOAD_TCLCONFIG + + if test "$enable_shared" = "no"; then + DEFAULT_LIB_TCL="\$(libtcl_version)" + fi + if test "$enable_shared" = "yes"; then + DEFAULT_LIB_TCL="\$(libtso_target)" + fi + INSTALL_LIBS="$INSTALL_LIBS $DEFAULT_LIB_TCL" + if test "$enable_static" = "yes"; then + INSTALL_LIBS="$INSTALL_LIBS \$(libtcl)" + fi +]) diff --git a/dist/aclocal/tls.m4 b/dist/aclocal/tls.m4 new file mode 100644 index 00000000..52d232be --- /dev/null +++ b/dist/aclocal/tls.m4 @@ -0,0 +1,65 @@ +# Check for thread local storage support. +# Required when building with DB STL support. +# Based in part on: http://autoconf-archive.cryp.to/ax_tls.html +# by Alan Woodland + +AC_DEFUN([AX_TLS], [ + AC_MSG_CHECKING(for thread local storage (TLS) class) + AC_SUBST(TLS_decl) + AC_SUBST(TLS_defn) + ac_cv_tls=none + AC_LANG_SAVE + AC_LANG_CPLUSPLUS + ax_tls_keywords="__thread __declspec(thread) __declspec(__thread)" + for ax_tls_decl_keyword in $ax_tls_keywords ""; do + for ax_tls_defn_keyword in $ax_tls_keywords ""; do + test -z "$ax_tls_decl_keyword" && + test -z "$ax_tls_defn_keyword" && continue + AC_TRY_COMPILE([template class TLSClass { + public: static ] $ax_tls_decl_keyword [ T *tlsvar; + }; + class TLSClass2 { + public: static ] $ax_tls_decl_keyword [int tlsvar; + }; + template ] $ax_tls_defn_keyword [ T* TLSClass::tlsvar = NULL;] + $ax_tls_defn_keyword [int TLSClass2::tlsvar = 1; + static $ax_tls_decl_keyword int x = 0;], + [TLSClass::tlsvar = NULL; TLSClass2::tlsvar = 1;], + [ac_cv_tls=modifier ; break]) + done + test "$ac_cv_tls" = none || break + done + AC_LANG_RESTORE + if test "$ac_cv_tls" = "none" ; then + AC_TRY_COMPILE( + [#include + #include + + static pthread_once_t once_control_ = PTHREAD_ONCE_INIT; + static pthread_key_t key; + + static void init_once(void) { + pthread_key_create(&key, NULL); + } + static void *get_tls() { + return (void *)pthread_getspecific(&key); + } + static void set_tls(void *p) { + pthread_setspecific(&key, p); + }], [], + [ac_cv_tls=pthread]) + fi + + case "$ac_cv_tls" in + none) break ;; + pthread) + TLS_decl="#define HAVE_PTHREAD_TLS" + TLS_defn="" ;; + modifier) + TLS_decl="#define TLS_DECL_MODIFIER $ax_tls_decl_keyword" + TLS_defn="#define TLS_DEFN_MODIFIER $ax_tls_defn_keyword" ;; + esac + + AC_MSG_RESULT([$ac_cv_tls]) +]) + diff --git a/dist/aclocal/types.m4 b/dist/aclocal/types.m4 new file mode 100644 index 00000000..c3a2b78d --- /dev/null +++ b/dist/aclocal/types.m4 @@ -0,0 +1,212 @@ +# $Id$ + +# Check the sizes we know about, and see if any of them match what's needed. +# +# Prefer ints to anything else, because read, write and others historically +# returned an int. +AC_DEFUN(AM_SEARCH_USIZES, [ + case "$3" in + "$ac_cv_sizeof_unsigned_int") + $1="typedef unsigned int $2;";; + "$ac_cv_sizeof_unsigned_char") + $1="typedef unsigned char $2;";; + "$ac_cv_sizeof_unsigned_short") + $1="typedef unsigned short $2;";; + "$ac_cv_sizeof_unsigned_long") + $1="typedef unsigned long $2;";; + "$ac_cv_sizeof_unsigned_long_long") + $1="typedef unsigned long long $2;";; + *) + if test "$4" != "notfatal"; then + AC_MSG_ERROR([No unsigned $3-byte integral type]) + fi;; + esac]) +AC_DEFUN(AM_SEARCH_SSIZES, [ + case "$3" in + "$ac_cv_sizeof_int") + $1="typedef int $2;";; + "$ac_cv_sizeof_char") + $1="typedef char $2;";; + "$ac_cv_sizeof_short") + $1="typedef short $2;";; + "$ac_cv_sizeof_long") + $1="typedef long $2;";; + "$ac_cv_sizeof_long_long") + $1="typedef long long $2;";; + *) + if test "$4" != "notfatal"; then + AC_MSG_ERROR([No signed $3-byte integral type]) + fi;; + esac]) + +# Check for the standard system types. +AC_DEFUN(AM_TYPES, [ + +# db.h includes and , not the other default includes +# autoconf usually includes. For that reason, we specify a set of includes +# for all type checking tests. [#5060] +# +# C99 says types should be in ; include if it exists. +# +# Some systems have types in ; include if it exists. +# +# IBM's OS/390 and z/OS releases have types in not also found +# in ; include if it exists. +db_includes="#include " +AC_SUBST(inttypes_h_decl) +AC_CHECK_HEADER(inttypes.h, [ + db_includes="$db_includes +#include " + inttypes_h_decl="#include "]) + +# IRIX has stdint.h that is only available when using c99 (i.e. __c99 +# is defined). Problem with having it in a public header is that a c++ +# compiler cannot #include if db.h #includes stdint.h, so we +# need to check that stdint.h is available for all cases. Also the +# IRIX compiler does not exit with a non-zero exit code when it sees +# #error, so we actually need to use the header for the compiler to fail. +AC_SUBST(stdint_h_decl) +AC_MSG_CHECKING(for stdint.h) +AC_COMPILE_IFELSE([AC_LANG_SOURCE([ +#include + int main() { + uint_least8_t x=0; + return x; + }])],[AC_MSG_RESULT(yes) +if test "$db_cv_cxx" = "yes"; then + AC_MSG_CHECKING([if stdint.h can be used by C++]) + AC_LANG_PUSH(C++) + AC_COMPILE_IFELSE([AC_LANG_SOURCE([#include + int main() { + uint_least8_t x=0; + return x; + }])],[AC_MSG_RESULT(yes) + stdint_h_decl="#include " + db_includes="$db_includes +#include " +],[AC_MSG_RESULT(no) + stdint_h_decl="#ifndef __cplusplus +#include +#endif" + db_includes="$db_includes +#ifndef __cplusplus +#include +#endif" +]) + AC_LANG_POP +else + stdint_h_decl="#include " + db_includes="$db_includes +#include " +fi],[AC_MSG_RESULT(no)]) + +AC_SUBST(stddef_h_decl) +AC_CHECK_HEADER(stddef.h, [ + db_includes="$db_includes +#include " + stddef_h_decl="#include "]) +AC_SUBST(unistd_h_decl) +AC_CHECK_HEADER(unistd.h, [ + db_includes="$db_includes +#include " + unistd_h_decl="#include "]) +db_includes="$db_includes +#include " + +# We need to know the sizes of various objects on this system. +AC_CHECK_SIZEOF(char,, $db_includes) +AC_CHECK_SIZEOF(unsigned char,, $db_includes) +AC_CHECK_SIZEOF(short,, $db_includes) +AC_CHECK_SIZEOF(unsigned short,, $db_includes) +AC_CHECK_SIZEOF(int,, $db_includes) +AC_CHECK_SIZEOF(unsigned int,, $db_includes) +AC_CHECK_SIZEOF(long,, $db_includes) +AC_CHECK_SIZEOF(unsigned long,, $db_includes) +AC_CHECK_SIZEOF(long long,, $db_includes) +AC_CHECK_SIZEOF(unsigned long long,, $db_includes) +AC_CHECK_SIZEOF(char *,, $db_includes) + +# We look for u_char, u_short, u_int, u_long -- if we can't find them, +# we create our own. +AC_SUBST(u_char_decl) +AC_CHECK_TYPE(u_char,, + [u_char_decl="typedef unsigned char u_char;"], $db_includes) + +AC_SUBST(u_short_decl) +AC_CHECK_TYPE(u_short,, + [u_short_decl="typedef unsigned short u_short;"], $db_includes) + +AC_SUBST(u_int_decl) +AC_CHECK_TYPE(u_int,, + [u_int_decl="typedef unsigned int u_int;"], $db_includes) + +AC_SUBST(u_long_decl) +AC_CHECK_TYPE(u_long,, + [u_long_decl="typedef unsigned long u_long;"], $db_includes) + +# We look for fixed-size variants of u_char, u_short, u_int, u_long as well. +AC_SUBST(u_int8_decl) +AC_CHECK_TYPE(u_int8_t,, + [AM_SEARCH_USIZES(u_int8_decl, u_int8_t, 1)], $db_includes) + +AC_SUBST(u_int16_decl) +AC_CHECK_TYPE(u_int16_t,, + [AM_SEARCH_USIZES(u_int16_decl, u_int16_t, 2)], $db_includes) + +AC_SUBST(int16_decl) +AC_CHECK_TYPE(int16_t,, + [AM_SEARCH_SSIZES(int16_decl, int16_t, 2)], $db_includes) + +AC_SUBST(u_int32_decl) +AC_CHECK_TYPE(u_int32_t,, + [AM_SEARCH_USIZES(u_int32_decl, u_int32_t, 4)], $db_includes) + +AC_SUBST(int32_decl) +AC_CHECK_TYPE(int32_t,, + [AM_SEARCH_SSIZES(int32_decl, int32_t, 4)], $db_includes) + +AC_SUBST(u_int64_decl) +AC_CHECK_TYPE(u_int64_t,, + [AM_SEARCH_USIZES(u_int64_decl, u_int64_t, 8, notfatal)], $db_includes) + +AC_SUBST(int64_decl) +AC_CHECK_TYPE(int64_t,, + [AM_SEARCH_SSIZES(int64_decl, int64_t, 8, notfatal)], $db_includes) + +# No currently autoconf'd systems lack FILE, off_t pid_t, size_t, time_t. +# +# We require them, we don't try to substitute our own if we can't find them. +AC_SUBST(FILE_t_decl) +AC_CHECK_TYPE(FILE *,, AC_MSG_ERROR([No FILE type.]), $db_includes) +AC_SUBST(off_t_decl) +AC_CHECK_TYPE(off_t,, AC_MSG_ERROR([No off_t type.]), $db_includes) +AC_SUBST(pid_t_decl) +AC_CHECK_TYPE(pid_t,, AC_MSG_ERROR([No pid_t type.]), $db_includes) +AC_SUBST(size_t_decl) +AC_CHECK_TYPE(size_t,, AC_MSG_ERROR([No size_t type.]), $db_includes) +AC_SUBST(time_t_decl) +AC_CHECK_TYPE(time_t,, AC_MSG_ERROR([No time_t type.]), $db_includes) + +# Check for ssize_t -- if none exists, find a signed integral type that's +# the same size as a size_t. +AC_CHECK_SIZEOF(size_t,, $db_includes) +AC_SUBST(ssize_t_decl) +AC_CHECK_TYPE(ssize_t,, + [AM_SEARCH_SSIZES(ssize_t_decl, ssize_t, $ac_cv_sizeof_size_t)], + $db_includes) + +# Check for uintmax_t -- if none exists, find the largest unsigned integral +# type available. +AC_SUBST(uintmax_t_decl) +AC_CHECK_TYPE(uintmax_t,, [AC_CHECK_TYPE(unsigned long long, + [uintmax_t_decl="typedef unsigned long long uintmax_t;"], + [uintmax_t_decl="typedef unsigned long uintmax_t;"], $db_includes)]) + +# Check for uintptr_t -- if none exists, find an integral type which is +# the same size as a pointer. +AC_SUBST(uintptr_t_decl) +AC_CHECK_TYPE(uintptr_t,, + [AM_SEARCH_USIZES(uintptr_t_decl, uintptr_t, $ac_cv_sizeof_char_p)]) + +AM_SOCKLEN_T +]) diff --git a/dist/api_flags b/dist/api_flags new file mode 100644 index 00000000..cb6008c5 --- /dev/null +++ b/dist/api_flags @@ -0,0 +1,527 @@ +db_env_create + DB_CXX_NO_EXCEPTIONS # C++: return error values + +DbEnv.close + # Sync database when automatically closing its db handles. + DB_FORCESYNC + +DbEnv.dbremove + DB_AUTO_COMMIT # Implied transaction + DB_LOG_NO_DATA # UNDOC: Don't log the data. + DB_NOSYNC # UNDOC: Don't sync for a subdb remove + DB_TXN_NOT_DURABLE # UNDOC: Don't log the remove + +DbEnv.dbrename + DB_AUTO_COMMIT # Implied transaction + DB_NOSYNC # Don't sync for a subdb rename + +DbEnv.fileid_reset + DB_ENCRYPT # File contains encrypted databases + +DbEnv.log_verify + DB_LOG_VERIFY_ERR # Verify failed. + DB_LOG_VERIFY_CAF # Continue after a failed check. + # Not the entire logs are verified, due to user specified log range, + # archiving, or other reasons. Only part of the entire logs verified. + DB_LOG_VERIFY_PARTIAL + DB_LOG_VERIFY_DBFILE # Verify only logs of one db file. + # Pre-scan log from last to first record to get aborts and timestamps. + DB_LOG_VERIFY_FORWARD + DB_LOG_VERIFY_INTERR # Got internal error during verify. + DB_LOG_VERIFY_WARNING # Got warnings during verify. + DB_LOG_VERIFY_VERBOSE # Output verbose information. + +DbEnv.open + DB_CREATE # Create as necessary + DB_FAILCHK # Run failchk on open + DB_FAILCHK_ISALIVE # UNDOC: failchk with isalive, for SQL + DB_INIT_CDB # Concurrent Access Methods + DB_INIT_LOCK # Initialize locking + DB_INIT_LOG # Initialize logging + DB_INIT_MPOOL # Initialize mpool + DB_INIT_MUTEX # Initialize mutex + DB_INIT_REP # Initialize replication + DB_INIT_TXN # Initialize transactions + DB_LOCKDOWN # Lock memory into physical core + DB_NO_CHECKPOINT # UNDOC: Recover but do not checkpoint + DB_PRIVATE # DB_ENV is process local + DB_RECOVER # Run normal recovery + DB_RECOVER_FATAL # Run catastrophic recovery + DB_REGISTER # Multi-process registry + DB_SYSTEM_MEM # Use system-backed memory + DB_THREAD # Handle is free-threaded + DB_USE_ENVIRON # Use the environment + DB_USE_ENVIRON_ROOT # Use the environment if root + +DbEnv.lock_stat + DB_STAT_CLEAR # Clear stats after return + +DbEnv.lock_stat_print + DB_STAT_ALL # Everything + DB_STAT_ALLOC # Print allocation information + DB_STAT_CLEAR # Clear stats after return + DB_STAT_LOCK_CONF # Print lock conflict matrix + DB_STAT_LOCK_LOCKERS # Print lockers + DB_STAT_LOCK_OBJECTS # Print lock objects + DB_STAT_LOCK_PARAMS # Print lock parameters + DB_STAT_SUBSYSTEM # Print subsystems + +DbEnv.lock_vec + DB_LOCK_CHECK # UNDOC: check for a lock + DB_LOCK_NOWAIT # Don't wait for an unavailable lock + DB_LOCK_RECORD # UNDOC: record lock + DB_LOCK_SET_TIMEOUT # UNDOC: set lock timeout + DB_LOCK_SWITCH # UNDOC: switch existing lock + DB_LOCK_UPGRADE # UNDOC: upgrade existing lock + +DbEnv.log_archive + DB_ARCH_ABS # Absolute pathnames + DB_ARCH_DATA # Data files + DB_ARCH_LOG # Log files + DB_ARCH_REMOVE # Remove log files + +DbEnv.log_put + DB_FLUSH # Flush data to disk + DB_LOG_CHKPNT # UNDOC: Flush supports a checkpoint + DB_LOG_COMMIT # UNDOC: Flush supports a commit + DB_LOG_NOCOPY # UNDOC: Don't copy data + DB_LOG_NOT_DURABLE # UNDOC: Do not log; keep in memory + DB_LOG_WRNOSYNC # UNDOC: Write, don't sync log_put + +DbEnv.log_set_config + DB_LOG_DIRECT # Don't buffer log files in the OS + DB_LOG_DSYNC # Set O_DSYNC on the log + DB_LOG_AUTO_REMOVE # Automatically remove log files + DB_LOG_IN_MEMORY # Store logs in buffers in memory + DB_LOG_ZERO # Zero log file on creation + +DbEnv.log_stat + DB_STAT_CLEAR # Clear stats after return + +DbEnv.log_stat_print + DB_STAT_ALL # Everything + DB_STAT_ALLOC # Print allocation information + DB_STAT_CLEAR # Clear stats after return + DB_STAT_SUBSYSTEM # Print subsystems + +DbEnv.lsn_reset + DB_ENCRYPT # File contains encrypted databases + +DbEnv.memp_stat + DB_STAT_CLEAR # Clear stats after return + +DbEnv.memp_stat_print + DB_STAT_ALL # Everything + DB_STAT_ALLOC # Print allocation information + DB_STAT_CLEAR # Clear stats after return + DB_STAT_MEMP_HASH # Print mpool hash buckets + DB_STAT_MEMP_NOERROR # UNDOC: continue on error + DB_STAT_SUBSYSTEM # Print subsystems + +DbEnv.mutex_alloc + DB_MUTEX_ALLOCATED # UNDOC: Mutex currently allocated + DB_MUTEX_LOCKED # UNDOC: Mutex currently locked + DB_MUTEX_LOGICAL_LOCK # UNDOC: Mutex backs a database lock + DB_MUTEX_PROCESS_ONLY # Mutex private to a process + DB_MUTEX_SELF_BLOCK # Must be able to block self + DB_MUTEX_SHARED # Shared (read/write) mutex + +DbEnv.mutex_stat + DB_STAT_CLEAR # Clear stats after return + +DbEnv.mutex_stat_print + DB_STAT_ALL # Everything + DB_STAT_ALLOC # Print allocation information + DB_STAT_CLEAR # Clear stats after return + DB_STAT_SUBSYSTEM # Print subsystems + +DbEnv.remove + DB_FORCE + DB_USE_ENVIRON # Use the environment + DB_USE_ENVIRON_ROOT # Use the environment if root + +DbEnv.rep_set_config + DB_REPMGR_CONF_2SITE_STRICT # Don't cheat on election votes + DB_REPMGR_CONF_ELECTIONS # Manage elections to choose master + DB_REP_CONF_AUTOINIT # automatic client init + DB_REP_CONF_AUTOROLLBACK # UNDOC: discard txns to sync w/ master + DB_REP_CONF_BULK # Bulk transfer + DB_REP_CONF_DELAYCLIENT # Delay client synchronization + DB_REP_CONF_INMEM # In-memory replication + DB_REP_CONF_LEASE # Master leases + DB_REP_CONF_NOWAIT # Don't wait, return error + +DbEnv.rep_set_transport + DB_REP_ANYWHERE # Message can be serviced anywhere + DB_REP_NOBUFFER # Do not buffer this message + DB_REP_PERMANENT # Important -- app may want to flush + DB_REP_REREQUEST # This msg already been requested + +DbEnv.rep_start + DB_REP_CLIENT # Client + DB_REP_MASTER # Master + +DbEnv.rep_stat + DB_STAT_CLEAR # Clear stats after return + +DbEnv.rep_stat_print + DB_STAT_ALL # Everything + DB_STAT_ALLOC # Print allocation information + DB_STAT_CLEAR # Clear stats after return + DB_STAT_SUBSYSTEM # Print subsystems + DB_STAT_SUMMARY # UNDOC: print summary + +# Covers both flags to the callback-setting method, and the callback itself +DbEnv.repmgr_msg_dispatch + DB_REPMGR_NEED_RESPONSE # Synchronous request message type + +DbEnv.repmgr_start + DB_REP_CLIENT # Client + DB_REP_ELECTION # Election + DB_REP_MASTER # Master + +DbEnv.repmgr_stat + DB_STAT_CLEAR # Clear stats after return + +DbEnv.repmgr_stat_print + DB_STAT_ALL # Everything + DB_STAT_ALLOC # Print allocation information + DB_STAT_CLEAR # Clear stats after return + DB_STAT_SUBSYSTEM # Print subsystems + +DbEnv.set_encrypt + DB_ENCRYPT_AES # AES, assumes SHA1 checksum + +DbEnv.set_feedback.op + DB_RECOVER # Running recovery. + +DbEnv.set_flags + DB_AUTO_COMMIT # Implied transaction + DB_CDB_ALLDB # Set CDB locking per environment + DB_DATABASE_LOCKING # UNDOC: try database-level locking + DB_DIRECT_DB # Don't buffer databases in the OS + DB_DSYNC_DB # Set O_DSYNC on the databases + DB_HOTBACKUP_IN_PROGRESS # Inhibit bulk loading optimization + DB_MULTIVERSION # Multiversion concurrency control + DB_NOLOCKING # Set locking/mutex behavior + DB_NOMMAP # Don't mmap the underlying file + DB_NOPANIC # Set panic state per environment + DB_NOFLUSH # UNDOC: don't flush cache on close + DB_OVERWRITE # Overwrite unlinked region files + DB_PANIC_ENVIRONMENT # Set panic state per environment + DB_REGION_INIT # Page-fault regions on open + DB_TIME_NOTGRANTED # Return NOTGRANTED on timeout + DB_TXN_NOSYNC # Do not sync log on commit + DB_TXN_NOWAIT # Do not wait for locks + DB_TXN_SNAPSHOT # Snapshot isolation + DB_TXN_WRITE_NOSYNC # Write the log bug don't sync + DB_YIELDCPU # Yield the CPU (a lot) + +DbEnv.set_isalive + DB_MUTEX_ALLOCATED # UNDOC: Mutex currently allocated + DB_MUTEX_LOCKED # UNDOC: Mutex currently locked + DB_MUTEX_LOGICAL_LOCK # UNDOC: Mutex backs a database lock + DB_MUTEX_PROCESS_ONLY # Mutex private to a process + DB_MUTEX_SELF_BLOCK # Must be able to block self + +DbEnv.set_timeout + DB_SET_LOCK_TIMEOUT # Set lock timeout + DB_SET_TXN_NOW # UNDOC: Timeout lock now + DB_SET_TXN_TIMEOUT # Set transaction timeout + DB_SET_REG_TIMEOUT # Set dbregister timeout + +DbEnv.set_verbose + DB_VERB_DEADLOCK # Deadlock detection information + DB_VERB_FILEOPS # Major file operations + DB_VERB_FILEOPS_ALL # All file operations + DB_VERB_RECOVERY # Recovery information + DB_VERB_REGISTER # Dump waits-for table + DB_VERB_REPLICATION # All replication and repmgr output + DB_VERB_REPMGR_CONNFAIL # Repmgr connection failure output + DB_VERB_REPMGR_MISC # Miscellaneous repmgr output + DB_VERB_REP_ELECT # Replication election output + DB_VERB_REP_LEASE # Replication master lease output + DB_VERB_REP_MISC # Miscellaneous replication output + DB_VERB_REP_MSGS # Replication message output + DB_VERB_REP_SYNC # Replication client sync output + DB_VERB_REP_SYSTEM # Replication system messages + DB_VERB_REP_TEST # Replication temporary test output + DB_VERB_WAITSFOR # Dump waits-for table + +DbEnv.stat_print + DB_STAT_ALL # Everything + DB_STAT_ALLOC # Print allocation information + DB_STAT_CLEAR # Clear stats after return + DB_STAT_SUBSYSTEM # Print subsystems + +DbEnv.txn_begin + DB_IGNORE_LEASE # UNDOC: Ignore leases + DB_READ_COMMITTED # Degree 2 isolation + DB_READ_UNCOMMITTED # Degree 1 isolation + DB_TXN_NOSYNC # Do not sync log on commit + DB_TXN_NOWAIT # Do not wait for locks + DB_TXN_FAMILY # Cursors and child txns are + # independent but lock-compatible + DB_TXN_SNAPSHOT # Snapshot isolation + DB_TXN_SYNC # Always sync log on commit + DB_TXN_WAIT # Always wait for locks in this txn + DB_TXN_WRITE_NOSYNC # Write the log but don't sync + DB_TXN_BULK # Enable transactional bulk loading + +DbEnv.txn_checkpoint + DB_CKP_INTERNAL # UNDOC: internally generated checkpoint + DB_FORCE # Force + +DbEnv.txn_recover + __MASK=0xff # Berkeley DB operation codes. + +DbEnv.txn_stat + DB_STAT_CLEAR # Clear stats after return + +DbEnv.txn_stat_print + DB_STAT_ALL # Everything + DB_STAT_ALLOC # Print allocation information + DB_STAT_CLEAR # Clear stats after return + DB_STAT_SUBSYSTEM # Print subsystems + +DbLogc.get + __MASK=0xff # Berkeley DB operation codes. + +DbMpoolFile.close + DB_MPOOL_DISCARD # UNDOC: Discard file + DB_MPOOL_NOLOCK # UNDOC: Already have mpf locked + +DbMpoolFile.get + DB_MPOOL_CREATE # Create a page + DB_MPOOL_DIRTY # Get page for an update + DB_MPOOL_EDIT # Modify without copying + DB_MPOOL_FREE # UNDOC: Free page if present + DB_MPOOL_LAST # Return the last page + DB_MPOOL_NEW # Create a new page + DB_MPOOL_TRY # Try to read a page, but don't block + +DbMpoolFile.open + DB_CREATE # Create as necessary + DB_DIRECT # Don't buffer the file in the OS + DB_DURABLE_UNKNOWN # UNDOC: Durability on open + DB_EXTENT # UNDOC: dealing with an extent + DB_MULTIVERSION # Multiversion concurrency control + DB_NOMMAP # Don't mmap underlying file + DB_ODDFILESIZE # Truncate file to N * pgsize + DB_RDONLY # Read-only (O_RDONLY) + DB_TXN_NOT_DURABLE # UNDOC: Mark file not durable on open + +DbMpoolFile.set_flags + DB_MPOOL_NOFILE # Never open a backing file + DB_MPOOL_UNLINK # Unlink the file on last close + +DbSequence.get + DB_AUTO_COMMIT # UNDOC: compatibility only + DB_TXN_NOSYNC # Do not sync log on commit + +DbSequence.open + DB_AUTO_COMMIT # UNDOC: compatibility only + DB_CREATE # Create as necessary + DB_EXCL # Exclusive open (O_EXCL) + DB_THREAD # Handle is free-threaded + +DbSequence.remove + DB_TXN_NOSYNC # Do not sync log on commit + +DbSequence.set_flags + DB_SEQ_DEC # Decrement sequence + DB_SEQ_INC # Increment sequence + DB_SEQ_RANGE_SET # UNDOC: Range set + DB_SEQ_WRAP # Wrap sequence at min/max + DB_SEQ_WRAPPED # UNDOC: Just wrapped + +DbSequence.stat + DB_STAT_CLEAR # Clear stats after return + +DbSequence.stat_print + DB_STAT_CLEAR # Clear stats after return + DB_STAT_SUBSYSTEM # Print subsystems + +DbSite.set_config + DB_BOOTSTRAP_HELPER # Join target for new group member + DB_GROUP_CREATOR # Primordial membership DB creator + DB_LEGACY # Upgrading pre-5.2 group + DB_LOCAL_SITE # Identifies this as the local site + DB_REPMGR_PEER # C2C synchronization + +DbTxn.commit + DB_TXN_NOSYNC # Do not sync log on commit + DB_TXN_SYNC # Always sync log on commit + +DbTxn.set_timeout + DB_SET_LOCK_TIMEOUT # Set lock timeout + DB_SET_TXN_TIMEOUT # Set transaction timeout + +db_create + DB_CXX_NO_EXCEPTIONS # C++: return error values + DB_XA_CREATE # Create a DBP for an XA database + +Db.associate + DB_AUTO_COMMIT # UNDOC: compatibility only + DB_CREATE # Create as necessary + DB_IMMUTABLE_KEY # Secondary key is immutable + +Db.associate_foreign + DB_FOREIGN_ABORT # If foreign key exists, delete aborts + DB_FOREIGN_CASCADE # If foreign key exists, delete cascades + DB_FOREIGN_NULLIFY # If foreign key exists, nullify it + +Db.close + DB_NOSYNC # Berkeley DB operation codes. + +Db.compact + DB_FREELIST_ONLY # Just sort and truncate + DB_FREE_SPACE # Free space + +Db.cursor + DB_CURSOR_BULK # Optimize for bulk updates + DB_CURSOR_TRANSIENT # UNDOC: Single-use cursor + DB_READ_COMMITTED # Degree 2 isolation + DB_READ_UNCOMMITTED # Degree 1 isolation + DB_RECOVER # Called from recovery (internal) + DB_WRITECURSOR # Cursor can update (CDB) + DB_WRITELOCK # Cursor should get write locks + DB_TXN_SNAPSHOT # Snapshot isolation + +Db.del + DB_AUTO_COMMIT # UNDOC: compatibility only + DB_MULTIPLE # Delete multiple data values + DB_MULTIPLE_KEY # Delete multiple key/data pairs + +Db.exists + DB_READ_COMMITTED # Degree 2 isolation + DB_READ_UNCOMMITTED # Degree 1 isolation + DB_RMW # Acquire write lock immediately + +Db.get + __MASK=0xff # Berkeley DB operation codes. + DB_AUTO_COMMIT # UNDOC: compatibility only + DB_IGNORE_LEASE # Ignore leases + DB_MULTIPLE # Return multiple data values + DB_READ_COMMITTED # Degree 2 isolation + DB_READ_UNCOMMITTED # Degree 1 isolation + DB_RMW # Acquire write lock immediately + +Db.pget + __MASK=0xff # Berkeley DB operation codes. + DB_IGNORE_LEASE # Ignore leases + DB_MULTIPLE # Return multiple data values + DB_READ_COMMITTED # Degree 2 isolation + DB_READ_UNCOMMITTED # Degree 1 isolation + DB_RMW # Acquire write lock immediately + +Db.join + DB_JOIN_NOSORT # Don't try to optmize join + +Db.open + DB_AUTO_COMMIT # Implied transaction + DB_CREATE # Create file as necessary + DB_EXCL # Exclusive open (O_EXCL) + DB_FCNTL_LOCKING # UNDOC: fcntl(2) locking + DB_MULTIVERSION # Multiversion concurrency control + DB_NOMMAP # Don't mmap underlying file + DB_NO_AUTO_COMMIT # UNDOC: override env's AUTO_COMMIT + DB_RDONLY # Read-only (O_RDONLY) + DB_RDWRMASTER # UNDOC: allow subdb master open R/W + DB_READ_UNCOMMITTED # Degree 1 isolation + DB_THREAD # Handle is free-threaded + DB_TRUNCATE # Discard existing DB (O_TRUNC) + + # The following flags aren't actually part of the Db.open method + # API, but they are accepted by the underlying __db_open function. + DB_DURABLE_UNKNOWN # UNDOC: Durability on open + DB_INTERNAL_DB # UNDOC: Open db in env dir + DB_NOERROR # UNDOC: Don't raise errors. + DB_ODDFILESIZE # UNDOC: Truncate file to N * pgsize + DB_WRITEOPEN # UNDOC: open with write lock + + # The following flags are DB constructor flags. However, create and/or + # open of BDB XML containers is done in a single call (rather than the + # two-call "construct the object, then open it" paradigm used by DB), + # and they can't collide for that reason. + DB_CXX_NO_EXCEPTIONS # C++: return error values + +Db.put + __MASK=0xff # Berkeley DB operation codes. + DB_AUTO_COMMIT # UNDOC: compatibility only + DB_MULTIPLE # Put multiple (from key and data DBTs) + DB_MULTIPLE_KEY # Put multiple (from key DBT) + +Db.set_encrypt + DB_ENCRYPT_AES # AES, assumes SHA1 checksum + +Db.set_feedback + DB_UPGRADE # Upgrading + DB_VERIFY # Verifying + +Db.set_flags + DB_CHKSUM # Checksums + DB_DUP # Btree, Hash: duplicate keys + DB_DUPSORT # Btree, Hash: sorted duplicated + DB_ENCRYPT # AES, assumes SHA1 checksum + DB_INORDER # Queue: strict ordering on consume + DB_RECNUM # Btree: record numbers + DB_RENUMBER # Recno: renumber on insert/delete + DB_REVSPLITOFF # Btree: turn off reverse splits + DB_SNAPSHOT # Recno: snapshot the input + DB_TXN_NOT_DURABLE # Do not log changes + +Db.stat + DB_FAST_STAT # Don't traverse the database + DB_READ_COMMITTED # Degree 2 isolation + DB_READ_UNCOMMITTED # Degree 1 isolation + +Db.truncate + DB_AUTO_COMMIT # UNDOC: compatibility only + +Db.upgrade + DB_DUPSORT # Upgrade duplicate data items + +Db.verify + DB_AGGRESSIVE # Salvage whatever could be data + DB_NOORDERCHK # Skip sort order/hashing check + DB_ORDERCHKONLY # Only perform the order check + DB_PRINTABLE # Use printable format for salvage + DB_PR_PAGE # UNDOC: Show page contents (-da) + DB_PR_RECOVERYTEST # UNDOC: Recover test (-dr) + DB_SALVAGE # Salvage what looks like data + DB_UNREF # UNDOC: Report unreferenced pages + DB_VERIFY_PARTITION # Verifying a partition + + # Flags understood by the btree structure checks (__bam_vrfy_subtree). + # These share the same space as the global flags to Db.verify. + DB_ST_DUPOK # UNDOC: Duplicates are acceptable + DB_ST_DUPSET # UNDOC: Subtree is in a duplicate tree + DB_ST_DUPSORT # UNDOC: Duplicates are sorted + DB_ST_IS_RECNO # UNDOC: Subtree is a recno + DB_ST_OVFL_LEAF # UNDOC: Overflow reffed from leaf page + DB_ST_RECNUM # UNDOC: Subtree has record numbering on + DB_ST_RELEN # UNDOC: Subtree has fixed-length recs + DB_ST_TOPLEVEL # UNDOC: Subtree == entire tree + + # Flags understood by __bam_salvage and __db_salvage. These need not + # share name space with the __bam_vrfy_subtree flags, but must share + # with Db.verify. + DB_SA_SKIPFIRSTKEY # UNDOC: I have no idea what this does. + DB_SA_UNKNOWNKEY # UNDOC: The salvage key is unknown + +DbCursor.dup + __MASK=0xff # Berkeley DB operation codes. + DB_SHALLOW_DUP # UNDOC: Don't duplicate compression info + +DbCursor.get + __MASK=0xff # Berkeley DB operation codes. + DB_IGNORE_LEASE # Ignore leases + DB_MULTIPLE # Return multiple data values + DB_MULTIPLE_KEY # Return multiple key/data pairs + DB_READ_COMMITTED # Degree 2 isolation + DB_READ_UNCOMMITTED # Degree 1 isolation + DB_RMW # Acquire write lock immediately + +DbCursor.put + __MASK=0xff # Berkeley DB operation codes. diff --git a/dist/api_flags.c b/dist/api_flags.c new file mode 100644 index 00000000..70e21602 --- /dev/null +++ b/dist/api_flags.c @@ -0,0 +1,444 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#include + +#include +#include +#include +#include +#include +#include + +typedef struct { + char *name; /* API name */ + + u_int used_mask; /* Bits used. */ +} API; +API **api_list, **api_end; + +typedef struct { + char *name; /* Flag name */ + + int api_cnt; /* APIs that use this flag. */ + API **api, **api_end; + + u_int value; /* Bit value */ +} FLAG; +FLAG **flag_list, **flag_end; + +int verbose; +char *progname; + +int add_entry(char *, char *); +void define_print(char *, u_int); +void dump_api(void); +void dump_flags(void); +int flag_cmp_alpha(const void *, const void *); +int flag_cmp_api_cnt(const void *, const void *); +int generate_flags(void); +int parse(void); +void print_api_mask(void); +void print_api_remainder(void); +void print_flag_value(void); +int syserr(void); +int usage(void); + +int +main(int argc, char *argv[]) +{ + enum { API_MASK, API_REMAINDER, FLAG_VALUE } output; + int ch; + + if ((progname = strrchr(argv[0], '/')) == NULL) + progname = argv[0]; + else + ++progname; + + output = FLAG_VALUE; + while ((ch = getopt(argc, argv, "mrv")) != EOF) + switch (ch) { + case 'm': + output = API_MASK; + break; + case 'r': + output = API_REMAINDER; + break; + case 'v': + verbose = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (parse() || generate_flags()) + return (EXIT_FAILURE); + + switch (output) { + case API_MASK: + print_api_mask(); + break; + case API_REMAINDER: + print_api_remainder(); + break; + case FLAG_VALUE: + print_flag_value(); + break; + } + + if (verbose) { + dump_api(); + dump_flags(); + } + + return (EXIT_SUCCESS); +} + +int +parse() +{ + int lc; + char *p, *api, buf[256]; + + api = NULL; + + /* + * Read the method name/flag pairs. + */ + for (lc = 1; fgets(buf, sizeof(buf), stdin) != NULL; ++lc) { + if ((p = strchr(buf, '\n')) != NULL) + *p = '\0'; + else { + fprintf( + stderr, "%s: %d: line too long\n", progname, lc); + return (1); + } + + /* Ignore any empty line or hash mark. */ + if (buf[0] == '\0' || buf[0] == '#') + continue; + + /* + * A line without leading whitespace is an API name, a line + * with leading whitespace is a flag name. + */ + if (isspace(buf[0])) { + if ((p = strtok(buf, " \t")) == NULL || *p == '#') + continue; + + /* A flag without an API makes no sense. */ + if (api == NULL) + goto format; + + /* Enter the pair into the array. */ + if (add_entry(api, p)) + return (1); + } else { + if ((p = strtok(buf, " \t")) == NULL) + continue; + if (api != NULL) + free(api); + if ((api = strdup(p)) == NULL) + return (syserr()); + } + if ((p = strtok(NULL, " \t")) != NULL && *p != '#') + goto format; + } + + return (0); + +format: fprintf(stderr, "%s: format error: line %d\n", progname, lc); + return (1); +} + +int +add_entry(char *api_name, char *flag_name) +{ + FLAG **fpp, *fp; + API **app, *ap, **p; + u_int cnt; + + /* Search for this api's API structure. */ + for (app = api_list; + app != NULL && *app != NULL && app < api_end; ++app) + if (strcmp(api_name, (*app)->name) == 0) + break; + + /* Allocate new space in the API array if necessary. */ + if (app == NULL || app == api_end) { + cnt = app == NULL ? 100 : (u_int)(api_end - api_list) + 100; + if ((api_list = realloc(api_list, sizeof(API *) * cnt)) == NULL) + return (syserr()); + api_end = api_list + cnt; + app = api_list + (cnt - 100); + memset(app, 0, (u_int)(api_end - app) * sizeof(API *)); + } + + /* Allocate a new API structure and fill in the name if necessary. */ + if (*app == NULL && + ((*app = calloc(sizeof(API), 1)) == NULL || + ((*app)->name = strdup(api_name)) == NULL)) + return (syserr()); + + ap = *app; + + /* + * There's a special keyword, "__MASK=" that sets the initial + * flags value for an API, and so prevents those flag bits from being + * chosen for that API's flags. + */ + if (strncmp(flag_name, "__MASK=", sizeof("__MASK=") - 1) == 0) { + ap->used_mask |= + strtoul(flag_name + sizeof("__MASK=") - 1, NULL, 0); + return (0); + } + + /* Search for this flag's FLAG structure. */ + for (fpp = flag_list; + fpp != NULL && *fpp != NULL && fpp < flag_end; ++fpp) + if (strcmp(flag_name, (*fpp)->name) == 0) + break; + + /* Realloc space in the FLAG array if necessary. */ + if (fpp == NULL || fpp == flag_end) { + cnt = fpp == NULL ? 100 : (u_int)(flag_end - flag_list) + 100; + if ((flag_list = + realloc(flag_list, sizeof(FLAG *) * cnt)) == NULL) + return (syserr()); + flag_end = flag_list + cnt; + fpp = flag_list + (cnt - 100); + memset(fpp, 0, (u_int)(flag_end - fpp) * sizeof(FLAG *)); + } + + /* Allocate a new FLAG structure and fill in the name if necessary. */ + if (*fpp == NULL && + ((*fpp = calloc(sizeof(FLAG), 1)) == NULL || + ((*fpp)->name = strdup(flag_name)) == NULL)) + return (syserr()); + + fp = *fpp; + ++fp->api_cnt; + + /* Check to see if this API is already listed for this flag. */ + for (p = fp->api; p != NULL && *p != NULL && p < fp->api_end; ++p) + if (strcmp(api_name, (*p)->name) == 0) { + fprintf(stderr, + "duplicate entry: %s / %s\n", api_name, flag_name); + return (1); + } + + /* Realloc space in the FLAG's API array if necessary. */ + if (p == NULL || p == fp->api_end) { + cnt = p == NULL ? 20 : (u_int)(fp->api_end - fp->api) + 20; + if ((fp->api = realloc(fp->api, sizeof(API *) * cnt)) == NULL) + return (syserr()); + fp->api_end = fp->api + cnt; + p = fp->api + (cnt - 20); + memset(p, 0, (u_int)(fp->api_end - fp->api) * sizeof(API *)); + } + *p = ap; + + return (0); +} + +void +dump_api() +{ + API **app; + + printf("=============================\nAPI:\n"); + for (app = api_list; *app != NULL; ++app) + printf("%s (%#x)\n", (*app)->name, (*app)->used_mask); +} + +void +dump_flags() +{ + FLAG **fpp; + API **api; + char *sep; + + printf("=============================\nFLAGS:\n"); + for (fpp = flag_list; *fpp != NULL; ++fpp) { + printf("%s (%#x, %d): ", + (*fpp)->name, (*fpp)->value, (*fpp)->api_cnt); + sep = ""; + for (api = (*fpp)->api; *api != NULL; ++api) { + printf("%s%s", sep, (*api)->name); + sep = ", "; + } + printf("\n"); + } +} + +int +flag_cmp_api_cnt(const void *a, const void *b) +{ + FLAG *af, *bf; + + af = *(FLAG **)a; + bf = *(FLAG **)b; + + if (af == NULL) { + if (bf == NULL) + return (0); + return (1); + } + if (bf == NULL) { + if (af == NULL) + return (0); + return (-1); + } + if (af->api_cnt > bf->api_cnt) + return (-1); + if (af->api_cnt < bf->api_cnt) + return (1); + return (strcmp(af->name, bf->name)); +} + +int +generate_flags() +{ + FLAG **fpp; + API **api; + u_int mask; + + /* Sort the FLAGS array by reference count, in reverse order. */ + qsort(flag_list, + (u_int)(flag_end - flag_list), sizeof(FLAG *), flag_cmp_api_cnt); + + /* + * Here's the plan: walk the list of flags, allocating bits. For + * each flag, we walk the list of APIs that use it and find a bit + * none of them are using. That bit becomes the flag's value. + */ + for (fpp = flag_list; *fpp != NULL; ++fpp) { + mask = 0xffffffff; /* Set to all 1's */ + for (api = (*fpp)->api; *api != NULL; ++api) + mask &= ~(*api)->used_mask; /* Clear API's bits */ + if (mask == 0) { + fprintf(stderr, "%s: ran out of bits at flag %s\n", + progname, (*fpp)->name); + return (1); + } + (*fpp)->value = mask = 1 << (ffs(mask) - 1); + for (api = (*fpp)->api; *api != NULL; ++api) + (*api)->used_mask |= mask; /* Set bit for API */ + } + + return (0); +} + +int +flag_cmp_alpha(const void *a, const void *b) +{ + FLAG *af, *bf; + + af = *(FLAG **)a; + bf = *(FLAG **)b; + + if (af == NULL) { + if (bf == NULL) + return (0); + return (1); + } + if (bf == NULL) { + if (af == NULL) + return (0); + return (-1); + } + return (strcmp(af->name, bf->name)); +} + +void +print_api_mask() +{ + API **app; + char *p, buf[256]; + + /* Output a mask for the API. */ + for (app = api_list; *app != NULL; ++app) { + (void)snprintf( + buf, sizeof(buf), "_%s_API_MASK", (*app)->name); + for (p = buf; *p != '\0'; ++p) + if (islower(*p)) + *p = toupper(*p); + else if (!isalpha(*p)) + *p = '_'; + define_print(buf, (*app)->used_mask); + } +} + +void +print_api_remainder() +{ + API **app; + int unused, i; + + /* Output the bits remaining for the API. */ + for (app = api_list; *app != NULL; ++app) { + for (i = unused = 0; i < 32; ++i) + if (!((*app)->used_mask & (1 << i))) + ++unused; + printf("%s: %d bits unused\n", (*app)->name, unused); + } +} + +void +print_flag_value() +{ + FLAG **fpp; + + /* Sort the FLAGS array in alphabetical order. */ + qsort(flag_list, + (u_int)(flag_end - flag_list), sizeof(FLAG *), flag_cmp_alpha); + + /* Output each flag's value. */ + for (fpp = flag_list; *fpp != NULL; ++fpp) + define_print((*fpp)->name, (*fpp)->value); +} + +void +define_print(char *name, u_int value) +{ + char *sep; + + switch (strlen(name) / 8) { + case 0: + sep = "\t\t\t\t\t"; + break; + case 1: + sep = "\t\t\t\t"; + break; + case 2: + sep = "\t\t\t"; + break; + case 3: + sep = "\t\t"; + break; + default: + sep = "\t"; + break; + } + printf("#define\t%s%s%#010x\n", name, sep, value); +} + +int +syserr(void) +{ + fprintf(stderr, "%s: %s\n", progname, strerror(errno)); + return (1); +} + +int +usage() +{ + (void)fprintf(stderr, "usage: %s [-mrv]\n", progname); + return (EXIT_FAILURE); +} diff --git a/dist/buildpkg b/dist/buildpkg new file mode 100644 index 00000000..aa1b07cd --- /dev/null +++ b/dist/buildpkg @@ -0,0 +1,244 @@ +#!/bin/sh +# $Id: $ +# +die() +{ + echo >&2 "$@" + exit 1 +} + +# Build the distribution package. +. ./RELEASE || die "Can't read the RELEASE file" + +CSHARP_DOC_SRC="" +# 0 is none, 1 is local dir, 2 is remote dir +CSHARP_DOC_LOCATION=0 +test_run=0 + +while [ $# -gt 0 ] +do + case "$1" in + -n) + nodocs=true;; + -csharp_doc_src) + shift + if [ ! $# -gt 0 ]; then + die "csharp_doc_dir param requires argument." + fi + CSHARP_DOC_SRC=$1 + CSHARP_DOC_LOCATION=1 + if [ ! -f $CSHARP_DOC_SRC ]; then + die "CSharp doc archive must exist." + fi;; + -csharp_doc_url) + shift + if [ ! $# -gt 0 ]; then + die "csharp_doc_dir param requires argument." + fi + CSHARP_DOC_SRC=$1 + CSHARP_DOC_LOCATION=2;; + -test) + echo "Doing a test run - this may contain changes that aren't\ +reflected in a tag, so the package won't be reproducible." + test_run=1;; + esac + shift +done + +# A version string can be specified on the command line (e.g., "20080219"). +# Otherwise, use the standard X.X.X format. +VERSION=${1:-${DB_VERSION_MAJOR}.${DB_VERSION_MINOR}.${DB_VERSION_PATCH}} + +# Use "ustar" as the archiver +TAR=ustar + +# Set root directory where we do the work, can be anywhere. +START_DIR=`pwd` +D=`pwd`/../release +R="$D/db-${VERSION}" +RNC="$D/db-$VERSION.NC" +DOCS=`pwd`/../../docs_books +DB_ADDONS=`pwd`/../../db_addons + +if [ ! -d $DB_ADDONS ]; then + echo "buildpkg requires a db_addons repository at the same level as the db repository." + exit 1 +fi + +# Create directory, remove any previous release tree. +rm -rf $R $RNC +mkdir -p $R + +echo "Removed old release build from $R" + +# Copy the files in the current tip to $R +hg archive $R + +# If doing a test run, apply any local changes to the new tree. +if [ $test_run != 0 ]; then + hg diff | patch -p1 -d $R +fi + +echo "Created hg archive in $R" + +if [ "$nodocs" = true ] ; then + rm -rf $R/docs +else + [ -d $DOCS ] || die "buildpkg requires a docs_books repository at the same level as the db repository." + + # Check that the doc repo is up to date, and create a tag if necessary. + cd $DOCS + hg pull -u + if [ $? != 0 ]; then + rm -rf $R + die "Failed updating the docs_books repository." + fi + has_tag=`hg tags | grep "db-${VERSION}"` + if [ "$has_tag" = "" ]; then + hg tag "db-${VERSION}" + TAG_CREATED="true" + else + hg up -r "db-${VERSION}" + fi + + # Build a copy of the documentation in the release tree. + cd $R/dist + sh s_docs db-${VERSION} $DOCS + + if [ $? != 0 ]; then + rm -rf $R + die "Failed generating documentation." + fi + + # Copy in the C sharp doc. + if [ $CSHARP_DOC_LOCATION -eq 2 ]; then + scp $CSHARP_DOC_SRC . + CSHARP_DOC_SRC="csharp_docs.tgz" + if [ ! -f $CSHARP_DOC_SRC ]; then + echo "WARNING: Invalid csharp doc file - csharp_docs.tgz expected." + fi + fi + if [ $CSHARP_DOC_LOCATION -eq 0 -o ! -f $CSHARP_DOC_SRC ]; then + echo "WARNING: No csharp docs, skipping." + CSHARP_DOC_LOCATION=0 + fi + if [ $CSHARP_DOC_LOCATION != 0 ]; then + rm -rf $R/docs/csharp + mkdir -p $R/docs/csharp + $TAR zxf $CSHARP_DOC_SRC -C $R/docs/csharp + fi + + # Build the Java documentation. + cd $R/dist && sh s_javadoc +fi + +# Pull a copy of the JDBC and ODBC libraries into the package. +# Build the ADO.NET package, including moving the ADO.NET doc built above +# into that package. +# Tell the script where to look for packages. +cd $R/dist && sh s_sql_drivers ../../../.. +# Warn if s_sql_drivers didn't move its docs. +if [ -e "$R/docs/bdb-sql-ado" ]; then + echo "WARNING: ADO.NET doc is still in the non ADO.NET package." +fi + +cd $START_DIR + +# Pull a copy of the bfile directory into the package. +cd $DB_ADDONS +hg pull -u +if [ $? != 0 ]; then + echo "Failed updating the db_addons repository. Exiting." + rm -rf $R + exit 1 +fi + +cd $START_DIR +SQL_EXT_DIR=$R/lang/sql/sqlite/ext +if [ ! -d $SQL_EXT_DIR ]; then + mkdir -p $SQL_EXT_DIR +fi +if [ -d $SQL_EXT_DIR/bfile ]; then + rm -rf $SQL_EXT_DIR/bfile +fi +cp -rp $DB_ADDONS/bfile $SQL_EXT_DIR + +# Remove source directories we don't distribute. +cd $R && rm -rf test/tcl/TODO test/upgrade test/scr036 test/erlang +cd $R && rm -rf test/perf test/purify test/repmgr +cd $R && rm -rf test/server test/stl test/vxworks +cd $R && find . -name '.hg*' | xargs rm -f +cd $R && find . -name 'tags' | xargs rm -f + +# Create symbolic links and cscope output, fix permissions. +#cd $R/dist && sh s_perm +#cd $R/dist && sh s_cscope + +# Build a regular version and smoke test. +### cd $R && rm -rf build_run && mkdir build_run +### cd $R/build_run && ../dist/configure && make >& mklog +### cd $R/build_run && make ex_access && echo "test" | ./ex_access +# Check the install +### cd $R/build_run && make prefix=`pwd`/BDB install + +# Build a small-footprint version and smoke test. +### cd $R && rm -rf build_run && mkdir build_run +### cd $R/build_run && ../dist/configure --enable-smallbuild && make >& mklog +### cd $R/build_run && make ex_access && echo "test" | ./ex_access + +# Remove the build directory +### cd $R && rm -rf build_run + +(cd $R/dist && ./s_perm) + +# Check for file names differing only in case. +cd $R && find . | sort -f | uniq -ic | sed '/1 /d' + +# Create the crypto tar archive release. +T="$D/db-$VERSION.tar.gz" +rm -f $T +cd $D || die "Can't find $D" +# Move package files in db-$VERSION/release to current directory so that +# regular packages won't includes generated package twice. +if [ -d "db-$VERSION/release" ]; then + mv db-$VERSION/release/* . + rm -rf db-$VERSION/release +fi +$TAR czf $T -find db-$VERSION -chown 100 -chgrp 100 +chmod 444 $T + +# Create the non-crypto tree. +cd $D && mv -i db-$VERSION $RNC && $TAR xzf $T +cd $RNC/dist && sh s_crypto + +(cd $RNC/dist && ./s_perm) + +# Create the non-crypto tar archive release. +T="$D/db-$VERSION.NC.tar.gz" +rm -f $T +cd $RNC/.. && $TAR czf $T -find db-$VERSION.NC -chown 100 -chgrp 100 +chmod 444 $T + +t=__tmp +cd $R && awk '{print $0 "\r"}' < LICENSE > $t && rm -f LICENSE && cp $t LICENSE +cd $R && awk '{print $0 "\r"}' < README > $t && rm -f README && cp $t README && rm $t +cd $RNC && awk '{print $0 "\r"}' < LICENSE > $t && rm -f LICENSE && cp $t LICENSE +cd $RNC && awk '{print $0 "\r"}' < README > $t && rm -f README && cp $t README && rm $t + +# Create the crypto zip archive release. +T="$D/db-$VERSION.zip" +rm -f $T +cd $R/.. && rm -f $T && zip -q -r $T db-$VERSION +chmod 444 $T + +# Create the non-crypto zip archive release. +T="$D/db-$VERSION.NC.zip" +rm -f $T +cd $RNC/.. && rm -f $T && zip -q -r $T db-$VERSION.NC +chmod 444 $T + +rm -rf $R $RNC + +if [ "$TAG_CREATED" = "true" ]; then + echo "Created a tag in docs_books repository. Please push." +fi diff --git a/dist/bumprel b/dist/bumprel new file mode 100644 index 00000000..459dd3f1 --- /dev/null +++ b/dist/bumprel @@ -0,0 +1,59 @@ +#!/bin/sh +# +# $Id: $ +# +# Bump the Berkeley DB version + +P=`pwd` +R=`dirname $P` +progname="$0" +VERSION="$1" +assembly="../lang/csharp/src/Properties/AssemblyInfo.cs" +t=/tmp/__assembly + +# Sanity check +if [ ! -f $R/dist/RELEASE ] ; then + echo "$progname must be run in the dist directory of a Berkeley DB tree" + exit 1 +fi + +OIFS="$IFS" +IFS=. +set -- $VERSION + +if [ $# != 5 ] ; then + echo "Usage: $progname X.X.X.X.X -- sets the Berkeley DB version to X.X.X.X.X" + exit 1 +fi +FAMILY="$1" RELEASE="$2" MAJOR="$3" MINOR="$4" PATCH="$5" +DATE=`date "+%B %e, %Y"` + +IFS="$OFS" + +# Update the change log patch number -- there's 1 location to update in +# the change log "table of contents", and 2 in the Change Log itself. +#cd $R/docs_src/ref/changelog && vi toc.so ${MAJOR}.${MINOR}.html + +# Update the release number. +cd $R/dist &&\ + (echo "/^DB_VERSION_FAMILY/s/=.*/=$FAMILY/" &&\ + echo "/^DB_VERSION_RELEASE/s/=.*/=$RELEASE/" &&\ + echo "/^DB_VERSION_MAJOR/s/=.*/=$MAJOR/" &&\ + echo "/^DB_VERSION_MINOR/s/=.*/=$MINOR/" &&\ + echo "/^DB_VERSION_PATCH/s/=.*/=$PATCH/" &&\ + echo "/^DB_RELEASE_DATE/s/=.*/=\"$DATE\"/" &&\ + echo w &&\ + echo q) | ed RELEASE > /dev/null +VERSION=`sh -c '. ./RELEASE; echo $DB_VERSION'` +echo "Berkeley DB release $VERSION." + +# Build auto-generated files. +cd $R/dist && sh s_all + +# Update the CSharp assembly information +sed -e "s:AssemblyVersion(\"[0-9]*\.[0-9]*\.[0-9]*\"):AssemblyVersion(\"$MAJOR\.$MINOR\.$PATCH\"):" < $assembly > $t +cmp $t $assembly > /dev/null 2>&1 || + (rm -f $assembly && cp $t $assembly && rm -f $t && chmod 444 $assembly) + +# Commit all of the changes. +echo "Now run 'hg commit && hg tag db-$MAJOR.$MINOR.$PATCH && hg push'" diff --git a/dist/clib_port.in b/dist/clib_port.in new file mode 100644 index 00000000..b29978d4 --- /dev/null +++ b/dist/clib_port.in @@ -0,0 +1,279 @@ +/* DO NOT EDIT: automatically built from dist/clib_port.in. */ +/* + * Minimum/maximum values for various types. + */ +#ifndef UINT16_MAX /* Maximum 16-bit unsigned. */ +#define UINT16_MAX 65535 +#endif +#ifndef UINT32_MAX /* Maximum 32-bit unsigned. */ +#define UINT32_MAX 4294967295U +#endif + +#ifndef INT_MAX +#if SIZEOF_INT == 4 +#define INT_MAX 2147483647 +#endif +#if SIZEOF_INT == 8 +#define INT_MAX 9223372036854775807 +#endif +#endif + +#ifndef INT_MIN /* minimum (signed) int value */ +#define INT_MIN (-INT_MAX-1) +#endif + +#ifndef UINT_MAX /* maximum (signed) int value */ +#if SIZEOF_INT == 4 +#define UINT_MAX 4294967295U +#endif +#if SIZEOF_INT == 8 +#define UINT_MAX 18446744073709551615U +#endif +#endif + +#ifndef LONG_MAX /* maximum (signed) long value */ +#if SIZEOF_LONG == 4 +#define LONG_MAX 2147483647 +#endif +#if SIZEOF_LONG == 8 +#define LONG_MAX 9223372036854775807L +#endif +#endif + +#ifndef LONG_MIN /* minimum (signed) long value */ +#define LONG_MIN (-LONG_MAX-1) +#endif + +#ifndef ULONG_MAX /* maximum (unsigned) long value */ +#if SIZEOF_LONG == 4 +#define ULONG_MAX 4294967295U +#endif +#if SIZEOF_LONG == 8 +#define ULONG_MAX 18446744073709551615UL +#endif +#endif + +#if defined(HAVE_64BIT_TYPES) +/* + * Override the system's 64-bit min/max constants. AIX's 32-bit compiler can + * handle 64-bit values, but the system's constants don't include the LL/ULL + * suffix, and so can't be compiled using the 32-bit compiler. + */ +#undef INT64_MAX +#undef INT64_MIN +#undef UINT64_MAX + +#ifdef DB_WIN32 +#define INT64_MAX _I64_MAX +#define INT64_MIN _I64_MIN +#define UINT64_MAX _UI64_MAX +#else +#define INT64_MAX 9223372036854775807LL +#define INT64_MIN (-INT64_MAX-1) +#define UINT64_MAX 18446744073709551615ULL +#endif /* DB_WIN32 */ + +@INT64_FMT@ +@UINT64_FMT@ +#endif /* HAVE_64BIT_TYPES */ + +/* + * Exit success/failure macros. + */ +#ifndef HAVE_EXIT_SUCCESS +#define EXIT_FAILURE 1 +#define EXIT_SUCCESS 0 +#endif + +/* + * File modes. + */ +#ifdef DB_WIN32 +#ifndef S_IREAD /* WinCE doesn't have S_IREAD. */ +#define S_IREAD 0 +#endif +#ifndef S_IWRITE /* WinCE doesn't have S_IWRITE. */ +#define S_IWRITE 0 +#endif +#ifndef S_IRUSR +#define S_IRUSR S_IREAD /* R for owner */ +#endif +#ifndef S_IWUSR +#define S_IWUSR S_IWRITE /* W for owner */ +#endif +#ifndef S_IXUSR +#define S_IXUSR 0 /* X for owner */ +#endif +#ifndef S_IRGRP +#define S_IRGRP 0 /* R for group */ +#endif +#ifndef S_IWGRP +#define S_IWGRP 0 /* W for group */ +#endif +#ifndef S_IXGRP +#define S_IXGRP 0 /* X for group */ +#endif +#ifndef S_IROTH +#define S_IROTH 0 /* R for other */ +#endif +#ifndef S_IWOTH +#define S_IWOTH 0 /* W for other */ +#endif +#ifndef S_IXOTH +#define S_IXOTH 0 /* X for other */ +#endif +#else /* !DB_WIN32 */ +#ifndef S_IRUSR +#define S_IRUSR 0000400 /* R for owner */ +#endif +#ifndef S_IWUSR +#define S_IWUSR 0000200 /* W for owner */ +#endif +#ifndef S_IXUSR +#define S_IXUSR 0000100 /* X for owner */ +#endif +#ifndef S_IRGRP +#define S_IRGRP 0000040 /* R for group */ +#endif +#ifndef S_IWGRP +#define S_IWGRP 0000020 /* W for group */ +#endif +#ifndef S_IXGRP +#define S_IXGRP 0000010 /* X for group */ +#endif +#ifndef S_IROTH +#define S_IROTH 0000004 /* R for other */ +#endif +#ifndef S_IWOTH +#define S_IWOTH 0000002 /* W for other */ +#endif +#ifndef S_IXOTH +#define S_IXOTH 0000001 /* X for other */ +#endif +#endif /* !DB_WIN32 */ + +/* + * Don't step on the namespace. Other libraries may have their own + * implementations of these functions, we don't want to use their + * implementations or force them to use ours based on the load order. + */ +#ifndef HAVE_ATOI +#define atoi __db_Catoi +#endif +#ifndef HAVE_ATOL +#define atol __db_Catol +#endif +#ifndef HAVE_BSEARCH +#define bsearch __db_Cbsearch +#endif +#ifndef HAVE_FCLOSE +#define fclose __db_Cfclose +#endif +#ifndef HAVE_FGETC +#define fgetc __db_Cfgetc +#endif +#ifndef HAVE_FGETS +#define fgets __db_Cfgets +#endif +#ifndef HAVE_FOPEN +#define fopen __db_Cfopen +#endif +#ifndef HAVE_FWRITE +#define fwrite __db_Cfwrite +#endif +#ifndef HAVE_GETADDRINFO +#define freeaddrinfo(a) __db_Cfreeaddrinfo(a) +#define getaddrinfo(a, b, c, d) __db_Cgetaddrinfo(a, b, c, d) +#endif +#ifndef HAVE_GETCWD +#define getcwd __db_Cgetcwd +#endif +#ifndef HAVE_GETOPT +#define getopt __db_Cgetopt +#define optarg __db_Coptarg +#define opterr __db_Copterr +#define optind __db_Coptind +#define optopt __db_Coptopt +#define optreset __db_Coptreset +#endif +#ifndef HAVE_ISALPHA +#define isalpha __db_Cisalpha +#endif +#ifndef HAVE_ISDIGIT +#define isdigit __db_Cisdigit +#endif +#ifndef HAVE_ISPRINT +#define isprint __db_Cisprint +#endif +#ifndef HAVE_ISSPACE +#define isspace __db_Cisspace +#endif +#ifndef HAVE_LOCALTIME +#define localtime __db_Clocaltime +#endif +#ifndef HAVE_MEMCMP +#define memcmp __db_Cmemcmp +#endif +#ifndef HAVE_MEMCPY +#define memcpy __db_Cmemcpy +#endif +#ifndef HAVE_MEMMOVE +#define memmove __db_Cmemmove +#endif +#ifndef HAVE_PRINTF +#define printf __db_Cprintf +#define fprintf __db_Cfprintf +#endif +#ifndef HAVE_QSORT +#define qsort __db_Cqsort +#endif +#ifndef HAVE_RAISE +#define raise __db_Craise +#endif +#ifndef HAVE_RAND +#define rand __db_Crand +#define srand __db_Csrand +#endif +#ifndef HAVE_SNPRINTF +#define snprintf __db_Csnprintf +#endif +#ifndef HAVE_STRCASECMP +#define strcasecmp __db_Cstrcasecmp +#define strncasecmp __db_Cstrncasecmp +#endif +#ifndef HAVE_STRCAT +#define strcat __db_Cstrcat +#endif +#ifndef HAVE_STRCHR +#define strchr __db_Cstrchr +#endif +#ifndef HAVE_STRDUP +#define strdup __db_Cstrdup +#endif +#ifndef HAVE_STRERROR +#define strerror __db_Cstrerror +#endif +#ifndef HAVE_STRNCAT +#define strncat __db_Cstrncat +#endif +#ifndef HAVE_STRNCMP +#define strncmp __db_Cstrncmp +#endif +#ifndef HAVE_STRRCHR +#define strrchr __db_Cstrrchr +#endif +#ifndef HAVE_STRSEP +#define strsep __db_Cstrsep +#endif +#ifndef HAVE_STRTOL +#define strtol __db_Cstrtol +#endif +#ifndef HAVE_STRTOUL +#define strtoul __db_Cstrtoul +#endif +#ifndef HAVE_TIME +#define time __db_Ctime +#endif +#ifndef HAVE_VSNPRINTF +#define vsnprintf __db_Cvsnprintf +#endif diff --git a/dist/config.guess b/dist/config.guess new file mode 100755 index 00000000..187cd54e --- /dev/null +++ b/dist/config.guess @@ -0,0 +1,1511 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011 Free Software Foundation, Inc. + +timestamp='2011-02-02' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + + +# Originally written by Per Bothner. Please send patches (context +# diff format) to and include a ChangeLog +# entry. +# +# This script attempts to guess a canonical system name similar to +# config.sub. If it succeeds, it prints the system name on stdout, and +# exits with 0. Otherwise, it exits with 1. +# +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free +Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > $dummy.c ; + for c in cc gcc c89 c99 ; do + if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +# Note: order is significant - the case branches are not exclusive. + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || echo unknown)` + case "${UNAME_MACHINE_ARCH}" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently, or will in the future. + case "${UNAME_MACHINE_ARCH}" in + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval $set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "${UNAME_VERSION}" in + Debian*) + release='-gnu' + ;; + *) + release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}" + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; + *:ekkoBSD:*:*) + echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + exit ;; + *:SolidBSD:*:*) + echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd${UNAME_RELEASE} + exit ;; + *:MirBSD:*:*) + echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE="alpha" ;; + "EV4.5 (21064)") + UNAME_MACHINE="alpha" ;; + "LCA4 (21066/21068)") + UNAME_MACHINE="alpha" ;; + "EV5 (21164)") + UNAME_MACHINE="alphaev5" ;; + "EV5.6 (21164A)") + UNAME_MACHINE="alphaev56" ;; + "EV5.6 (21164PC)") + UNAME_MACHINE="alphapca56" ;; + "EV5.7 (21164PC)") + UNAME_MACHINE="alphapca57" ;; + "EV6 (21264)") + UNAME_MACHINE="alphaev6" ;; + "EV6.7 (21264A)") + UNAME_MACHINE="alphaev67" ;; + "EV6.8CB (21264C)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8AL (21264B)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8CX (21264D)") + UNAME_MACHINE="alphaev68" ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE="alphaev69" ;; + "EV7 (21364)") + UNAME_MACHINE="alphaev7" ;; + "EV7.9 (21364A)") + UNAME_MACHINE="alphaev79" ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead + # of the specific Alpha model? + echo alpha-pc-interix + exit ;; + 21064:Windows_NT:50:3) + echo alpha-dec-winnt3.5 + exit ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit ;; + arm:riscos:*:*|arm:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + s390x:SunOS:*:*) + echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux${UNAME_RELEASE} + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + eval $set_cc_for_build + SUN_ARCH="i386" + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH="x86_64" + fi + fi + echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos${UNAME_RELEASE} + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos${UNAME_RELEASE} + ;; + sun4) + echo sparc-sun-sunos${UNAME_RELEASE} + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos${UNAME_RELEASE} + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten${UNAME_RELEASE} + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix${UNAME_RELEASE} + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix${UNAME_RELEASE} + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix${UNAME_RELEASE} + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos${UNAME_RELEASE} + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ + [ ${TARGET_BINARY_INTERFACE}x = x ] + then + echo m88k-dg-dgux${UNAME_RELEASE} + else + echo m88k-dg-dguxbcs${UNAME_RELEASE} + fi + else + echo i586-dg-dgux${UNAME_RELEASE} + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${IBM_ARCH}-ibm-aix${IBM_REV} + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + case "${UNAME_MACHINE}" in + 9000/31? ) HP_ARCH=m68000 ;; + 9000/[34]?? ) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "${HP_ARCH}" = "" ]; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ ${HP_ARCH} = "hppa2.0w" ] + then + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ + then + HP_ARCH="hppa2.0w" + else + HP_ARCH="hppa64" + fi + fi + echo ${HP_ARCH}-hp-hpux${HPUX_REV} + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux${HPUX_REV} + exit ;; + 3050*:HI-UX:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo ${UNAME_MACHINE}-unknown-osf1mk + else + echo ${UNAME_MACHINE}-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:BSD/OS:*:*) + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:FreeBSD:*:*) + case ${UNAME_MACHINE} in + pc98) + echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + amd64) + echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) + echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + esac + exit ;; + i*:CYGWIN*:*) + echo ${UNAME_MACHINE}-pc-cygwin + exit ;; + *:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; + i*:PW*:*) + echo ${UNAME_MACHINE}-pc-pw32 + exit ;; + *:Interix*:*) + case ${UNAME_MACHINE} in + x86) + echo i586-pc-interix${UNAME_RELEASE} + exit ;; + authenticamd | genuineintel | EM64T) + echo x86_64-unknown-interix${UNAME_RELEASE} + exit ;; + IA64) + echo ia64-unknown-interix${UNAME_RELEASE} + exit ;; + esac ;; + [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) + echo i${UNAME_MACHINE}-pc-mks + exit ;; + 8664:Windows_NT:*) + echo x86_64-pc-mks + exit ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we + # UNAME_MACHINE based on the output of uname instead of i386? + echo i586-pc-interix + exit ;; + i*:UWIN*:*) + echo ${UNAME_MACHINE}-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + p*:CYGWIN*:*) + echo powerpcle-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + *:GNU:*:*) + # the GNU system + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi + echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo ${UNAME_MACHINE}-unknown-linux-gnu + else + echo ${UNAME_MACHINE}-unknown-linux-gnueabi + fi + exit ;; + avr32*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + cris:Linux:*:*) + echo cris-axis-linux-gnu + exit ;; + crisv32:Linux:*:*) + echo crisv32-axis-linux-gnu + exit ;; + frv:Linux:*:*) + echo frv-unknown-linux-gnu + exit ;; + i*86:Linux:*:*) + LIBC=gnu + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #ifdef __dietlibc__ + LIBC=dietlibc + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` + echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=${UNAME_MACHINE}el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=${UNAME_MACHINE} + #else + CPU= + #endif + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + or32:Linux:*:*) + echo or32-unknown-linux-gnu + exit ;; + padre:Linux:*:*) + echo sparc-unknown-linux-gnu + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-gnu ;; + PA8*) echo hppa2.0-unknown-linux-gnu ;; + *) echo hppa-unknown-linux-gnu ;; + esac + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo ${UNAME_MACHINE}-ibm-linux + exit ;; + sh64*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sh*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + tile*:Linux:*:*) + echo ${UNAME_MACHINE}-tilera-linux-gnu + exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-gnu + exit ;; + x86_64:Linux:*:*) + echo x86_64-unknown-linux-gnu + exit ;; + xtensa*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo ${UNAME_MACHINE}-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo ${UNAME_MACHINE}-unknown-stop + exit ;; + i*86:atheos:*:*) + echo ${UNAME_MACHINE}-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit ;; + i*86:*DOS:*:*) + echo ${UNAME_MACHINE}-pc-msdosdjgpp + exit ;; + i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) + UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} + else + echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo ${UNAME_MACHINE}-pc-sco$UNAME_REL + else + echo ${UNAME_MACHINE}-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. + # Note: whatever this is, it MUST be the same as what config.sub + # prints for the "djgpp" host, or else GDB configury will decide that + # this is a cross-build. + echo i586-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos${UNAME_RELEASE} + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos${UNAME_RELEASE} + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos${UNAME_RELEASE} + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + echo powerpc-unknown-lynxos${UNAME_RELEASE} + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv${UNAME_RELEASE} + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo ${UNAME_MACHINE}-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux${UNAME_RELEASE} + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv${UNAME_RELEASE} + else + echo mips-unknown-sysv${UNAME_RELEASE} + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux${UNAME_RELEASE} + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux${UNAME_RELEASE} + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux${UNAME_RELEASE} + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux${UNAME_RELEASE} + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux${UNAME_RELEASE} + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Rhapsody:*:*) + echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + case $UNAME_PROCESSOR in + i386) + eval $set_cc_for_build + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + UNAME_PROCESSOR="x86_64" + fi + fi ;; + unknown) UNAME_PROCESSOR=powerpc ;; + esac + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = "x86"; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NEO-?:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk${UNAME_RELEASE} + exit ;; + NSE-?:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; + NSR-?:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk${UNAME_RELEASE} + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = "386"; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo ${UNAME_MACHINE}-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux${UNAME_RELEASE} + exit ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + exit ;; + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; + i*86:AROS:*:*) + echo ${UNAME_MACHINE}-pc-aros + exit ;; +esac + +#echo '(No uname command or uname output not recognized.)' 1>&2 +#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 + +eval $set_cc_for_build +cat >$dummy.c < +# include +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (__arm) && defined (__acorn) && defined (__unix) + printf ("arm-acorn-riscix\n"); exit (0); +#endif + +#if defined (hp300) && !defined (hpux) + printf ("m68k-hp-bsd\n"); exit (0); +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); + +#endif + +#if defined (vax) +# if !defined (ultrix) +# include +# if defined (BSD) +# if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +# else +# if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# endif +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# else + printf ("vax-dec-ultrix\n"); exit (0); +# endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. + +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } + +# Convex versions that predate uname can use getsysinfo(1) + +if [ -x /usr/convex/getsysinfo ] +then + case `getsysinfo -f cpu_type` in + c1*) + echo c1-convex-bsd + exit ;; + c2*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + c34*) + echo c34-convex-bsd + exit ;; + c38*) + echo c38-convex-bsd + exit ;; + c4*) + echo c4-convex-bsd + exit ;; + esac +fi + +cat >&2 < in order to provide the needed +information to handle your system. + +config.guess timestamp = $timestamp + +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = ${UNAME_MACHINE} +UNAME_RELEASE = ${UNAME_RELEASE} +UNAME_SYSTEM = ${UNAME_SYSTEM} +UNAME_VERSION = ${UNAME_VERSION} +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/dist/config.hin b/dist/config.hin new file mode 100644 index 00000000..7644e1a0 --- /dev/null +++ b/dist/config.hin @@ -0,0 +1,636 @@ +/* config.hin. Generated from configure.ac by autoheader. */ + +/* Define to 1 if you want to build a version for running the test suite. */ +#undef CONFIG_TEST + +/* Defined to a size to limit the stack size of Berkeley DB threads. */ +#undef DB_STACKSIZE + +/* We use DB_WIN32 much as one would use _WIN32 -- to specify that we're using + an operating system environment that supports Win32 calls and semantics. We + don't use _WIN32 because Cygwin/GCC also defines _WIN32, even though + Cygwin/GCC closely emulates the Unix environment. */ +#undef DB_WIN32 + +/* Define to 1 if you want a debugging version. */ +#undef DEBUG + +/* Define to 1 if you want a version that logs read operations. */ +#undef DEBUG_ROP + +/* Define to 1 if you want a version that logs write operations. */ +#undef DEBUG_WOP + +/* Define to 1 if you want a version with run-time diagnostic checking. */ +#undef DIAGNOSTIC + +/* Define to 1 if 64-bit types are available. */ +#undef HAVE_64BIT_TYPES + +/* Define to 1 if you have the `abort' function. */ +#undef HAVE_ABORT + +/* Define to 1 if you have the `atoi' function. */ +#undef HAVE_ATOI + +/* Define to 1 if you have the `atol' function. */ +#undef HAVE_ATOL + +/* Define to 1 to use Solaris library routes for atomic operations. */ +#undef HAVE_ATOMIC_SOLARIS + +/* Define to 1 to use native atomic operations. */ +#undef HAVE_ATOMIC_SUPPORT + +/* Define to 1 to use GCC and x86 or x86_64 assemlby language atomic + operations. */ +#undef HAVE_ATOMIC_X86_GCC_ASSEMBLY + +/* Define to 1 if you have the `backtrace' function. */ +#undef HAVE_BACKTRACE + +/* Define to 1 if you have the `backtrace_symbols' function. */ +#undef HAVE_BACKTRACE_SYMBOLS + +/* Define to 1 if you have the `bsearch' function. */ +#undef HAVE_BSEARCH + +/* Define to 1 if you have the `clock_gettime' function. */ +#undef HAVE_CLOCK_GETTIME + +/* Define to 1 if clock_gettime supports CLOCK_MONOTONIC. */ +#undef HAVE_CLOCK_MONOTONIC + +/* Define to 1 if building compression support. */ +#undef HAVE_COMPRESSION + +/* Define to 1 if building cryptography support. */ +#undef HAVE_CRYPTO + +/* Define to 1 if using Intel IPP for cryptography. */ +#undef HAVE_CRYPTO_IPP + +/* Define to 1 if you have the `ctime_r' function. */ +#undef HAVE_CTIME_R + +/* Define to 1 if ctime_r takes a buffer length as a third argument. */ +#undef HAVE_CTIME_R_3ARG + +/* Define to 1 if building the DBM API. */ +#undef HAVE_DBM + +/* Define to 1 if you have the `directio' function. */ +#undef HAVE_DIRECTIO + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +#undef HAVE_DIRENT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 to use dtrace for performance monitoring. */ +#undef HAVE_DTRACE + +/* Define to 1 if you have the header file. */ +#undef HAVE_EXECINFO_H + +/* Define to 1 if platform has EXIT_SUCCESS/EXIT_FAILURE #defines. */ +#undef HAVE_EXIT_SUCCESS + +/* Define to 1 if you have the `fchmod' function. */ +#undef HAVE_FCHMOD + +/* Define to 1 if you have the `fclose' function. */ +#undef HAVE_FCLOSE + +/* Define to 1 if you have the `fcntl' function. */ +#undef HAVE_FCNTL + +/* Define to 1 if fcntl/F_SETFD denies child access to file descriptors. */ +#undef HAVE_FCNTL_F_SETFD + +/* Define to 1 if you have the `fdatasync' function. */ +#undef HAVE_FDATASYNC + +/* Define to 1 if you have the `fgetc' function. */ +#undef HAVE_FGETC + +/* Define to 1 if you have the `fgets' function. */ +#undef HAVE_FGETS + +/* Define to 1 if allocated filesystem blocks are not zeroed. */ +#undef HAVE_FILESYSTEM_NOTZERO + +/* Define to 1 if you have the `fopen' function. */ +#undef HAVE_FOPEN + +/* Define to 1 if you have the `ftruncate' function. */ +#undef HAVE_FTRUNCATE + +/* Define to 1 if you have the `fwrite' function. */ +#undef HAVE_FWRITE + +/* Define to 1 if you have the `getaddrinfo' function. */ +#undef HAVE_GETADDRINFO + +/* Define to 1 if you have the `getcwd' function. */ +#undef HAVE_GETCWD + +/* Define to 1 if you have the `getenv' function. */ +#undef HAVE_GETENV + +/* Define to 1 if you have the `getgid' function. */ +#undef HAVE_GETGID + +/* Define to 1 if you have the `getopt' function. */ +#undef HAVE_GETOPT + +/* Define to 1 if getopt supports the optreset variable. */ +#undef HAVE_GETOPT_OPTRESET + +/* Define to 1 if you have the `getrusage' function. */ +#undef HAVE_GETRUSAGE + +/* Define to 1 if you have the `gettimeofday' function. */ +#undef HAVE_GETTIMEOFDAY + +/* Define to 1 if you have the `getuid' function. */ +#undef HAVE_GETUID + +/* Define to 1 if building Hash access method. */ +#undef HAVE_HASH + +/* Define to 1 if building Heap access method. */ +#undef HAVE_HEAP + +/* Define to 1 if you have the `hstrerror' function. */ +#undef HAVE_HSTRERROR + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the `isalpha' function. */ +#undef HAVE_ISALPHA + +/* Define to 1 if you have the `isdigit' function. */ +#undef HAVE_ISDIGIT + +/* Define to 1 if you have the `isprint' function. */ +#undef HAVE_ISPRINT + +/* Define to 1 if you have the `isspace' function. */ +#undef HAVE_ISSPACE + +/* Define to 1 if you have localization function to support globalization. */ +#undef HAVE_LOCALIZATION + +/* Define to 1 if you have the `localtime' function. */ +#undef HAVE_LOCALTIME + +/* Define to 1 if enabling checksums in log records. */ +#undef HAVE_LOG_CHECKSUM + +/* Define to 1 if you have the `memcmp' function. */ +#undef HAVE_MEMCMP + +/* Define to 1 if you have the `memcpy' function. */ +#undef HAVE_MEMCPY + +/* Define to 1 if you have the `memmove' function. */ +#undef HAVE_MEMMOVE + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the `mlock' function. */ +#undef HAVE_MLOCK + +/* Define to 1 if you have the `mmap' function. */ +#undef HAVE_MMAP + +/* Define to 1 where mmap() incrementally extends the accessible mapping as + the underlying file grows. */ +#undef HAVE_MMAP_EXTEND + +/* Define to 1 if you have the `mprotect' function. */ +#undef HAVE_MPROTECT + +/* Define to 1 if you have the `munlock' function. */ +#undef HAVE_MUNLOCK + +/* Define to 1 if you have the `munmap' function. */ +#undef HAVE_MUNMAP + +/* Define to 1 to use the GCC compiler and 68K assembly language mutexes. */ +#undef HAVE_MUTEX_68K_GCC_ASSEMBLY + +/* Define to 1 to use the AIX _check_lock mutexes. */ +#undef HAVE_MUTEX_AIX_CHECK_LOCK + +/* Define to 1 to use the GCC compiler and Alpha assembly language mutexes. */ +#undef HAVE_MUTEX_ALPHA_GCC_ASSEMBLY + +/* Define to 1 to use the GCC compiler and ARM assembly language mutexes. */ +#undef HAVE_MUTEX_ARM_GCC_ASSEMBLY + +/* Define to 1 to use the Apple/Darwin _spin_lock_try mutexes. */ +#undef HAVE_MUTEX_DARWIN_SPIN_LOCK_TRY + +/* Define to 1 to use the UNIX fcntl system call mutexes. */ +#undef HAVE_MUTEX_FCNTL + +/* Define to 1 to use the GCC compiler and PaRisc assembly language mutexes. + */ +#undef HAVE_MUTEX_HPPA_GCC_ASSEMBLY + +/* Define to 1 to use the msem_XXX mutexes on HP-UX. */ +#undef HAVE_MUTEX_HPPA_MSEM_INIT + +/* Define to 1 to use test-and-set mutexes with blocking mutexes. */ +#undef HAVE_MUTEX_HYBRID + +/* Define to 1 to use the GCC compiler and IA64 assembly language mutexes. */ +#undef HAVE_MUTEX_IA64_GCC_ASSEMBLY + +/* Define to 1 to use the GCC compiler and MIPS assembly language mutexes. */ +#undef HAVE_MUTEX_MIPS_GCC_ASSEMBLY + +/* Define to 1 to use the msem_XXX mutexes on systems other than HP-UX. */ +#undef HAVE_MUTEX_MSEM_INIT + +/* Define to 1 to use the GCC compiler and PowerPC assembly language mutexes. + */ +#undef HAVE_MUTEX_PPC_GCC_ASSEMBLY + +/* Define to 1 to use POSIX 1003.1 pthread_XXX mutexes. */ +#undef HAVE_MUTEX_PTHREADS + +/* Define to 1 to use Reliant UNIX initspin mutexes. */ +#undef HAVE_MUTEX_RELIANTUNIX_INITSPIN + +/* Define to 1 to use the IBM C compiler and S/390 assembly language mutexes. + */ +#undef HAVE_MUTEX_S390_CC_ASSEMBLY + +/* Define to 1 to use the GCC compiler and S/390 assembly language mutexes. */ +#undef HAVE_MUTEX_S390_GCC_ASSEMBLY + +/* Define to 1 to use the SCO compiler and x86 assembly language mutexes. */ +#undef HAVE_MUTEX_SCO_X86_CC_ASSEMBLY + +/* Define to 1 to use the obsolete POSIX 1003.1 sema_XXX mutexes. */ +#undef HAVE_MUTEX_SEMA_INIT + +/* Define to 1 to use the SGI XXX_lock mutexes. */ +#undef HAVE_MUTEX_SGI_INIT_LOCK + +/* Define to 1 to use the Solaris _lock_XXX mutexes. */ +#undef HAVE_MUTEX_SOLARIS_LOCK_TRY + +/* Define to 1 to use the Solaris lwp threads mutexes. */ +#undef HAVE_MUTEX_SOLARIS_LWP + +/* Define to 1 to use the GCC compiler and Sparc assembly language mutexes. */ +#undef HAVE_MUTEX_SPARC_GCC_ASSEMBLY + +/* Define to 1 if the Berkeley DB library should support mutexes. */ +#undef HAVE_MUTEX_SUPPORT + +/* Define to 1 if mutexes hold system resources. */ +#undef HAVE_MUTEX_SYSTEM_RESOURCES + +/* Define to 1 to configure mutexes intra-process only. */ +#undef HAVE_MUTEX_THREAD_ONLY + +/* Define to 1 to use the CC compiler and Tru64 assembly language mutexes. */ +#undef HAVE_MUTEX_TRU64_CC_ASSEMBLY + +/* Define to 1 to use the UNIX International mutexes. */ +#undef HAVE_MUTEX_UI_THREADS + +/* Define to 1 to use the UTS compiler and assembly language mutexes. */ +#undef HAVE_MUTEX_UTS_CC_ASSEMBLY + +/* Define to 1 to use VMS mutexes. */ +#undef HAVE_MUTEX_VMS + +/* Define to 1 to use VxWorks mutexes. */ +#undef HAVE_MUTEX_VXWORKS + +/* Define to 1 to use the MSVC compiler and Windows mutexes. */ +#undef HAVE_MUTEX_WIN32 + +/* Define to 1 to use the GCC compiler and Windows mutexes. */ +#undef HAVE_MUTEX_WIN32_GCC + +/* Define to 1 to use the GCC compiler and 64-bit x86 assembly language + mutexes. */ +#undef HAVE_MUTEX_X86_64_GCC_ASSEMBLY + +/* Define to 1 to use the GCC compiler and 32-bit x86 assembly language + mutexes. */ +#undef HAVE_MUTEX_X86_GCC_ASSEMBLY + +/* Define to 1 if you have the header file, and it defines `DIR'. */ +#undef HAVE_NDIR_H + +/* Define to 1 if you have the O_DIRECT flag. */ +#undef HAVE_O_DIRECT + +/* Define to 1 if building partitioned database support. */ +#undef HAVE_PARTITION + +/* Define to 1 to enable some kind of performance event monitoring. */ +#undef HAVE_PERFMON + +/* Define to 1 to enable performance event monitoring of *_stat() statistics. + */ +#undef HAVE_PERFMON_STATISTICS + +/* Define to 1 if you have the `pread' function. */ +#undef HAVE_PREAD + +/* Define to 1 if you have the `printf' function. */ +#undef HAVE_PRINTF + +/* Define to 1 if you have the `pstat_getdynamic' function. */ +#undef HAVE_PSTAT_GETDYNAMIC + +/* Define to 1 if it is OK to initialize an already initialized + pthread_cond_t. */ +#undef HAVE_PTHREAD_COND_REINIT_OKAY + +/* Define to 1 if it is OK to initialize an already initialized + pthread_rwlock_t. */ +#undef HAVE_PTHREAD_RWLOCK_REINIT_OKAY + +/* Define to 1 if you have the `pthread_self' function. */ +#undef HAVE_PTHREAD_SELF + +/* Define to 1 if you have the `pthread_yield' function. */ +#undef HAVE_PTHREAD_YIELD + +/* Define to 1 if you have the `pwrite' function. */ +#undef HAVE_PWRITE + +/* Define to 1 if building on QNX. */ +#undef HAVE_QNX + +/* Define to 1 if you have the `qsort' function. */ +#undef HAVE_QSORT + +/* Define to 1 if building Queue access method. */ +#undef HAVE_QUEUE + +/* Define to 1 if you have the `raise' function. */ +#undef HAVE_RAISE + +/* Define to 1 if you have the `rand' function. */ +#undef HAVE_RAND + +/* Define to 1 if you have the `random' function. */ +#undef HAVE_RANDOM + +/* Define to 1 if building replication support. */ +#undef HAVE_REPLICATION + +/* Define to 1 if building the Berkeley DB replication framework. */ +#undef HAVE_REPLICATION_THREADS + +/* Define to 1 if you have the `sched_yield' function. */ +#undef HAVE_SCHED_YIELD + +/* Define to 1 if you have the `select' function. */ +#undef HAVE_SELECT + +/* Define to 1 if you have the `setgid' function. */ +#undef HAVE_SETGID + +/* Define to 1 if you have the `setuid' function. */ +#undef HAVE_SETUID + +/* Define to 1 to configure Berkeley DB to use read/write latches. */ +#undef HAVE_SHARED_LATCHES + +/* Define to 1 if shmctl/SHM_LOCK locks down shared memory segments. */ +#undef HAVE_SHMCTL_SHM_LOCK + +/* Define to 1 if you have the `shmget' function. */ +#undef HAVE_SHMGET + +/* Define to 1 if you have the `sigaction' function. */ +#undef HAVE_SIGACTION + +/* Define to 1 if thread identifier type db_threadid_t is integral. */ +#undef HAVE_SIMPLE_THREAD_TYPE + +/* Define to 1 if you have the `snprintf' function. */ +#undef HAVE_SNPRINTF + +/* Define to 1 if you have the `stat' function. */ +#undef HAVE_STAT + +/* Define to 1 if building statistics support. */ +#undef HAVE_STATISTICS + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the `strcasecmp' function. */ +#undef HAVE_STRCASECMP + +/* Define to 1 if you have the `strcat' function. */ +#undef HAVE_STRCAT + +/* Define to 1 if you have the `strchr' function. */ +#undef HAVE_STRCHR + +/* Define to 1 if you have the `strdup' function. */ +#undef HAVE_STRDUP + +/* Define to 1 if you have the `strerror' function. */ +#undef HAVE_STRERROR + +/* Define to 1 if you have the `strftime' function. */ +#undef HAVE_STRFTIME + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if building without output message content. */ +#undef HAVE_STRIPPED_MESSAGES + +/* Define to 1 if you have the `strncat' function. */ +#undef HAVE_STRNCAT + +/* Define to 1 if you have the `strncmp' function. */ +#undef HAVE_STRNCMP + +/* Define to 1 if you have the `strrchr' function. */ +#undef HAVE_STRRCHR + +/* Define to 1 if you have the `strsep' function. */ +#undef HAVE_STRSEP + +/* Define to 1 if you have the `strtol' function. */ +#undef HAVE_STRTOL + +/* Define to 1 if you have the `strtoul' function. */ +#undef HAVE_STRTOUL + +/* Define to 1 if `st_blksize' is a member of `struct stat'. */ +#undef HAVE_STRUCT_STAT_ST_BLKSIZE + +/* Define to 1 if you have the `sysconf' function. */ +#undef HAVE_SYSCONF + +/* Define to 1 if port includes files in the Berkeley DB source code. */ +#undef HAVE_SYSTEM_INCLUDE_FILES + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +#undef HAVE_SYS_DIR_H + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +#undef HAVE_SYS_NDIR_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_SDT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_SELECT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_SOCKET_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TIME_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the `time' function. */ +#undef HAVE_TIME + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if unlink of file with open file descriptors will fail. */ +#undef HAVE_UNLINK_WITH_OPEN_FAILURE + +/* Define to 1 if port includes historic database upgrade support. */ +#undef HAVE_UPGRADE_SUPPORT + +/* Define to 1 if building access method verification support. */ +#undef HAVE_VERIFY + +/* Define to 1 if you have the `vsnprintf' function. */ +#undef HAVE_VSNPRINTF + +/* Define to 1 if building on VxWorks. */ +#undef HAVE_VXWORKS + +/* Define to 1 if you have the `yield' function. */ +#undef HAVE_YIELD + +/* Define to 1 if you have the `_fstati64' function. */ +#undef HAVE__FSTATI64 + +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#undef LT_OBJDIR + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* The size of `char', as computed by sizeof. */ +#undef SIZEOF_CHAR + +/* The size of `char *', as computed by sizeof. */ +#undef SIZEOF_CHAR_P + +/* The size of `int', as computed by sizeof. */ +#undef SIZEOF_INT + +/* The size of `long', as computed by sizeof. */ +#undef SIZEOF_LONG + +/* The size of `long long', as computed by sizeof. */ +#undef SIZEOF_LONG_LONG + +/* The size of `short', as computed by sizeof. */ +#undef SIZEOF_SHORT + +/* The size of `size_t', as computed by sizeof. */ +#undef SIZEOF_SIZE_T + +/* The size of `unsigned char', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_CHAR + +/* The size of `unsigned int', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_INT + +/* The size of `unsigned long', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_LONG + +/* The size of `unsigned long long', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_LONG_LONG + +/* The size of `unsigned short', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_SHORT + +/* Define to 1 if the `S_IS*' macros in do not work properly. */ +#undef STAT_MACROS_BROKEN + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to 1 if you can safely include both and . */ +#undef TIME_WITH_SYS_TIME + +/* Define to 1 to mask harmless uninitialized memory read/writes. */ +#undef UMRW + +/* Number of bits in a file offset, on hosts where this is settable. */ +#undef _FILE_OFFSET_BITS + +/* Define for large files, on AIX-style hosts. */ +#undef _LARGE_FILES + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif + +/* type to use in place of socklen_t if not defined */ +#undef socklen_t diff --git a/dist/config.sub b/dist/config.sub new file mode 100755 index 00000000..30fdca81 --- /dev/null +++ b/dist/config.sub @@ -0,0 +1,1760 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011 Free Software Foundation, Inc. + +timestamp='2011-03-23' + +# This file is (in principle) common to ALL GNU software. +# The presence of a machine in this file suggests that SOME GNU software +# can handle that machine. It does not imply ALL GNU software can. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + + +# Please send patches to . Submit a context +# diff and a properly formatted GNU ChangeLog entry. +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS + +Canonicalize a configuration name. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free +Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo $1 + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ + linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ + knetbsd*-gnu* | netbsd*-gnu* | \ + kopensolaris*-gnu* | \ + storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + *) + basic_machine=`echo $1 | sed 's/-[^-]*$//'` + if [ $basic_machine != $1 ] + then os=`echo $1 | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis | -knuth | -cray | -microblaze) + os= + basic_machine=$1 + ;; + -bluegene*) + os=-cnk + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco6) + os=-sco5v6 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` + ;; + -windowsnt*) + os=`echo $os | sed -e 's/windowsnt/winnt/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ + | bfin \ + | c4x | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | fido | fr30 | frv \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | i370 | i860 | i960 | ia64 \ + | ip2k | iq2000 \ + | lm32 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | mcore | mep | metag \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64octeon | mips64octeonel \ + | mips64orion | mips64orionel \ + | mips64r5900 | mips64r5900el \ + | mips64vr | mips64vrel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | moxie \ + | mt \ + | msp430 \ + | nds32 | nds32le | nds32be \ + | nios | nios2 \ + | ns16k | ns32k \ + | open8 \ + | or32 \ + | pdp10 | pdp11 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle \ + | pyramid \ + | rx \ + | score \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu \ + | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ + | ubicom32 \ + | v850 | v850e \ + | we32k \ + | x86 | xc16x | xstormy16 | xtensa \ + | z8k | z80) + basic_machine=$basic_machine-unknown + ;; + c54x) + basic_machine=tic54x-unknown + ;; + c55x) + basic_machine=tic55x-unknown + ;; + c6x) + basic_machine=tic6x-unknown + ;; + m6811 | m68hc11 | m6812 | m68hc12 | picochip) + # Motorola 68HC11/12. + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) + ;; + ms1) + basic_machine=mt-unknown + ;; + + strongarm | thumb | xscale) + basic_machine=arm-unknown + ;; + + xscaleeb) + basic_machine=armeb-unknown + ;; + + xscaleel) + basic_machine=armel-unknown + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* | avr32-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* \ + | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | elxsi-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | i*86-* | i860-* | i960-* | ia64-* \ + | ip2k-* | iq2000-* \ + | lm32-* \ + | m32c-* | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64octeon-* | mips64octeonel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64r5900-* | mips64r5900el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ + | msp430-* \ + | nds32-* | nds32le-* | nds32be-* \ + | nios-* | nios2-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | open8-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ + | pyramid-* \ + | romp-* | rs6000-* | rx-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ + | tahoe-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tile-* | tilegx-* \ + | tron-* \ + | ubicom32-* \ + | v850-* | v850e-* | vax-* \ + | we32k-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* \ + | xstormy16-* | xtensa*-* \ + | ymp-* \ + | z8k-* | z80-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) + basic_machine=$basic_machine-unknown + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-unknown + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aros) + basic_machine=i386-pc + os=-aros + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + blackfin) + basic_machine=bfin-unknown + os=-linux + ;; + blackfin-*) + basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + bluegene*) + basic_machine=powerpc-ibm + os=-cnk + ;; + c54x-*) + basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c55x-*) + basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c6x-*) + basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; + cegcc) + basic_machine=arm-unknown + os=-cegcc + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | j90) + basic_machine=j90-cray + os=-unicos + ;; + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16 | cr16-*) + basic_machine=cr16-unknown + os=-elf + ;; + crds | unos) + basic_machine=m68k-crds + ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + dicos) + basic_machine=i686-pc + os=-dicos + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2* | dpx2*-bull) + basic_machine=m68k-bull + os=-sysv3 + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppa-next) + os=-nextstep3 + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; +# I'm not sure what "Sysv32" means. Should this be sysv3.2? + i*86v32) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + i386-vsta | vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + m68knommu) + basic_machine=m68k-unknown + os=-linux + ;; + m68knommu-*) + basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + m88k-omron*) + basic_machine=m88k-omron + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + microblaze) + basic_machine=microblaze-xilinx + ;; + mingw32) + basic_machine=i386-pc + os=-mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + os=-mingw32ce + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mips3*-*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + ms1-*) + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next ) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + neo-tandem) + basic_machine=neo-tandem + ;; + nse-tandem) + basic_machine=nse-tandem + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + openrisc | openrisc-*) + basic_machine=or32-unknown + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + parisc) + basic_machine=hppa-unknown + os=-linux + ;; + parisc-*) + basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) + basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium4-*) + basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc | ppcbe) basic_machine=powerpc-unknown + ;; + ppc-* | ppcbe-*) + basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle | ppc-le | powerpc-little) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little | ppc64-le | powerpc64-little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rdos) + basic_machine=i386-pc + os=-rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; + sequent) + basic_machine=i386-sequent + ;; + sh) + basic_machine=sh-hitachi + os=-hms + ;; + sh5el) + basic_machine=sh5le-unknown + ;; + sh64) + basic_machine=sh64-unknown + ;; + sparclite-wrs | simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + strongarm-* | thumb-*) + basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + # This must be matched before tile*. + tilegx*) + basic_machine=tilegx-unknown + os=-linux-gnu + ;; + tile*) + basic_machine=tile-unknown + os=-linux-gnu + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + xscale-* | xscalee[bl]-*) + basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'` + ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + z8k-*-coff) + basic_machine=z8k-unknown + os=-sim + ;; + z80-*-coff) + basic_machine=z80-unknown + os=-sim + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + romp) + basic_machine=romp-ibm + ;; + mmix) + basic_machine=mmix-knuth + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp10) + # there are many clones, so DEC is not a safe bet + basic_machine=pdp10-unknown + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) + basic_machine=sparc-sun + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases + # that might get confused with valid system types. + # -solaris* is a basic system type, with this one exception. + -auroraux) + os=-auroraux + ;; + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -svr4*) + os=-sysv4 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # First accept the basic system types. + # The portable systems comes first. + # Each alternative MUST END IN A *, to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ + | -sym* | -kopensolaris* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* | -aros* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ + | -openbsd* | -solidbsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ + | -chorusos* | -chorusrdb* | -cegcc* \ + | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -linux-gnu* | -linux-android* \ + | -linux-newlib* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo $os | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo $os | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo $os | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; + -osfrose*) + os=-osfrose + ;; + -osf*) + os=-osf + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -nova*) + os=-rtmk-nova + ;; + -ns2 ) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -tpf*) + os=-tpf + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -es1800*) + os=-ose + ;; + -xenix) + os=-xenix + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -aros*) + os=-aros + ;; + -kaos*) + os=-kaos + ;; + -zvmoe) + os=-zvmoe + ;; + -dicos*) + os=-dicos + ;; + -nacl*) + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + c4x-* | tic4x-*) + os=-coff + ;; + tic54x-*) + os=-coff + ;; + tic55x-*) + os=-coff + ;; + tic6x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + # This also exists in the configure program, but was not the + # default. + # os=-sunos4 + ;; + m68*-cisco) + os=-aout + ;; + mep-*) + os=-elf + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or32-*) + os=-coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + *-be) + os=-beos + ;; + *-haiku) + os=-haiku + ;; + *-ibm) + os=-aix + ;; + *-knuth) + os=-mmixware + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next ) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-next) + os=-nextstep3 + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -cnk*|-aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; + esac + basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` + ;; +esac + +echo $basic_machine$os +exit + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/dist/configure b/dist/configure new file mode 100755 index 00000000..e28f2a38 --- /dev/null +++ b/dist/configure @@ -0,0 +1,27335 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.68 for Berkeley DB 5.2.28. +# +# Report bugs to . +# +# +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, +# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software +# Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 + + test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\ + || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + # We cannot yet assume a decent shell, so we have to provide a + # neutralization value for shells without unset; and this also + # works around shells that cannot unset nonexistent variables. + # Preserve -v and -x to the replacement shell. + BASH_ENV=/dev/null + ENV=/dev/null + (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV + export CONFIG_SHELL + case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; + esac + exec "$CONFIG_SHELL" $as_opts "$as_myself" ${1+"$@"} +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org and Oracle Technology +$0: Network Berkeley DB forum about your system, including +$0: any error possibly output before this message. Then +$0: install a modern shell, or manually run the script +$0: under such a shell if you do have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -p'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -p' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -p' + fi +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +if test -x / >/dev/null 2>&1; then + as_test_x='test -x' +else + if ls -dL / >/dev/null 2>&1; then + as_ls_L_option=L + else + as_ls_L_option= + fi + as_test_x=' + eval sh -c '\'' + if test -d "$1"; then + test -d "$1/."; + else + case $1 in #( + -*)set "./$1";; + esac; + case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #(( + ???[sx]*):;;*)false;;esac;fi + '\'' sh + ' +fi +as_executable_p=$as_test_x + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + +SHELL=${CONFIG_SHELL-/bin/sh} + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='Berkeley DB' +PACKAGE_TARNAME='db-5.2.28' +PACKAGE_VERSION='5.2.28' +PACKAGE_STRING='Berkeley DB 5.2.28' +PACKAGE_BUGREPORT='Oracle Technology Network Berkeley DB forum' +PACKAGE_URL='' + +ac_unique_file="../src/db/db.c" +enable_option_checking=no +ac_default_prefix=/usr/local/BerkeleyDB.5.2 +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='LTLIBOBJS +db_seq_decl +UINT64_FMT +INT64_FMT +TCL_TCLSH +TCL_SRC_DIR +TCL_LIB_FILE +TCL_INCLUDE_SPEC +TCL_BIN_DIR +LIBOBJS +LISTPROBES_COMMAND +LISTPROBES_DEPENDENCY +DTRACE_CPP +db_threadid_t_decl +thread_h_decl +uintptr_t_decl +uintmax_t_decl +ssize_t_decl +time_t_decl +size_t_decl +pid_t_decl +off_t_decl +FILE_t_decl +int64_decl +u_int64_decl +int32_decl +u_int32_decl +int16_decl +u_int16_decl +u_int8_decl +u_long_decl +u_int_decl +u_short_decl +u_char_decl +unistd_h_decl +stddef_h_decl +stdint_h_decl +inttypes_h_decl +TLS_defn +TLS_decl +WSTRING_decl +_ACJNI_JAVAC +uudecode +JAVA +JAVAC +JMODSUFFIX +MODSUFFIX +SOSUFFIX +CPP +OTOOL64 +OTOOL +LIPO +NMEDIT +DSYMUTIL +MANIFEST_TOOL +AWK +RANLIB +STRIP +ac_ct_AR +AR +DLLTOOL +OBJDUMP +LN_S +NM +ac_ct_DUMPBIN +DUMPBIN +LD +FGREP +EGREP +GREP +cxx_have_stdheaders +CXXCPP +ac_ct_CXX +CCC +OBJEXT +EXEEXT +ac_ct_CC +CC +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +db_cv_path_sh +PERL +SED +STAP +MV +RM +MKDIR +LN +KILL +CP +CHMOD +DB_VERSION_UNIQUE_NAME +DB_VERSION_FULL_STRING +DB_VERSION_STRING +DB_VERSION_PATCH +DB_VERSION_MINOR +DB_VERSION_MAJOR +DB_VERSION_RELEASE +DB_VERSION_FAMILY +platform_footer +platform_header +topdir +subdir_cmd +o +db_int_def +TEST_LIBS +SWIGCFLAGS +SQL_LIBS +SQL_FLAGS +SOFLAGS +REPLACEMENT_OBJS +PRINTLOG_OBJS +POSTLINK +OSDIR +MAKEFILE_XSOLINK +MAKEFILE_SOLINK +MAKEFILE_CXXLINK +MAKEFILE_CXX +MAKEFILE_CCLINK +MAKEFILE_CC +LIBXSO_LIBS +LIBTSO_MODULE +LIBTSO_MODSUFFIX +LIBTSO_LIBS +LIBTOOL +LIBSO_LIBS +LIBJSO_LIBS +LIBCSO_LIBS +LDFLAGS +JAVACFLAGS +JAR +INSTALL_TARGET +INSTALL_LIBS_EXTRA +INSTALL_LIBS +INSTALLER +FINAL_OBJS +DTRACE +DEFAULT_LIB_TCL +DEFAULT_LIB_STL +DEFAULT_LIB_SQLITE +DEFAULT_LIB_SQL +DEFAULT_LIB_CXX +DEFAULT_LIB +DB_STRUCT_ALIGN8 +DB_PROTO2 +DB_PROTO1 +DB_CONST +CXXFLAGS +CXX +CRYPTO_OBJS +CPPFLAGS +CONFIGURATION_PATH +CONFIGURATION_ARGS +CFLAGS +BUILD_TARGET +ADDITIONAL_PROGS +ADDITIONAL_OBJS +ADDITIONAL_LANG +ADDITIONAL_INCS +host_os +host_vendor +host_cpu +host +build_os +build_vendor +build_cpu +build +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +enable_smallbuild +enable_atomicsupport +enable_bigfile +enable_compression +enable_hash +enable_heap +enable_mutexsupport +enable_log_checksum +enable_partition +enable_queue +enable_replication +enable_statistics +enable_verify +enable_compat185 +enable_cxx +enable_debug +enable_debug_rop +enable_debug_wop +enable_diagnostic +enable_dump185 +enable_java +enable_mingw +enable_o_direct +enable_posixmutexes +enable_pthread_self +enable_pthread_api +enable_rpc +enable_sql +enable_sql_compat +enable_jdbc +with_jdbc +enable_amalgamation +enable_sql_codegen +enable_stl +enable_tcl +enable_test +enable_localization +enable_stripped_messages +enable_dbm +enable_dtrace +enable_systemtap +enable_perfmon_statistics +enable_uimutexes +enable_umrw +enable_cryptography +with_cryptography +with_mutex +with_mutexalign +with_stacksize +with_tcl +with_uniquename +enable_readline +enable_shared +enable_static +with_pic +enable_fast_install +with_gnu_ld +with_sysroot +enable_libtool_lock +enable_largefile +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CXX +CXXFLAGS +CCC +CXXCPP +CPP' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + $as_echo "$as_me: WARNING: if you wanted to set the --build type, don't use --host. + If a cross compiler is detected then cross compile mode will be used" >&2 + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures Berkeley DB 5.2.28 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root + [DATAROOTDIR/doc/db-5.2.28] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names + +System types: + --build=BUILD configure for building on BUILD [guessed] + --host=HOST cross-compile to build programs to run on HOST [BUILD] +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of Berkeley DB 5.2.28:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-smallbuild Build small footprint version of the library. + --disable-atomicsupport Do not build any native atomic operation support. + --disable-bigfile Obsolete; use --disable-largefile instead. + --disable-compression Do not build compression support. + --disable-hash Do not build Hash access method. + --disable-heap Do not build Heap access method. + --disable-mutexsupport Do not build any mutex support. + --disable-log_checksum Disable log checksums. + --disable-partition Do not build partitioned database support. + --disable-queue Do not build Queue access method. + --disable-replication Do not build database replication support. + --disable-statistics Do not build statistics support. + --disable-verify Do not build database verification support. + --enable-compat185 Build DB 1.85 compatibility API. + --enable-cxx Build C++ API. + --enable-debug Build a debugging version. + --enable-debug_rop Build a version that logs read operations. + --enable-debug_wop Build a version that logs write operations. + --enable-diagnostic Build a version with run-time diagnostics. + --enable-dump185 Build db_dump185(1) to dump 1.85 databases. + --enable-java Build Java API. + --enable-mingw Build Berkeley DB for MinGW. + --enable-o_direct Enable the O_DIRECT flag for direct I/O. + --enable-posixmutexes Force use of POSIX standard mutexes. + + --enable-sql Build the SQL API. + --enable-sql_compat Build a drop-in replacement sqlite3 library. + --enable-jdbc Build BDB SQL JDBC library. + --enable-amalgamation Build a SQL amalgamation instead of building files + separately. + --enable-sql_codegen Build the SQL-to-C code generation tool. + --enable-stl Build STL API. + --enable-tcl Build Tcl API. + --enable-test Configure to run the test suite. + --enable-localization Configure to enable localization. + --enable-stripped_messages + Configure to enable stripped messages. + --enable-dbm Configure to enable the historic dbm interface. + --enable-dtrace Configure to build in dtrace static probes + --enable-systemtap Configure to use systemtap to emulate dtrace static + probes + --enable-perfmon-statistics + Configure to build in performance monitoring of + statistics values [default=no]. + --enable-uimutexes Force use of Unix International mutexes. + --enable-umrw Mask harmless uninitialized memory read/writes. + --enable-shared[=PKGS] build shared libraries [default=yes] + --enable-static[=PKGS] build static libraries [default=yes] + --enable-fast-install[=PKGS] + optimize for fast installation [default=yes] + --disable-libtool-lock avoid locking (might break parallel builds) + --disable-largefile omit support for large files + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-jdbc=DIR Specify source directory of JDBC. + --with-cryptography=yes|no|ipp + Build database cryptography support [default=yes]. + --with-mutex=MUTEX Select non-default mutex implementation. + --with-mutexalign=ALIGNMENT + Obsolete; use DbEnv::mutex_set_align instead. + --with-stacksize=SIZE Set the stack size for Berkeley DB threads. + --with-tcl=DIR Directory location of tclConfig.sh. + --with-uniquename=NAME Build a uniquely named library. + --with-pic try to use only PIC/non-PIC objects [default=use + both] + --with-gnu-ld assume the C compiler uses GNU ld [default=no] + --with-sysroot=DIR Search for dependent libraries within DIR + (or the compiler's sysroot if not specified). + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CXX C++ compiler command + CXXFLAGS C++ compiler flags + CXXCPP C++ preprocessor + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to . +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +Berkeley DB configure 5.2.28 +generated by GNU Autoconf 2.68 + +Copyright (C) 2010 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_cxx_try_compile LINENO +# ---------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_cxx_try_compile + +# ac_fn_cxx_try_cpp LINENO +# ------------------------ +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_cxx_try_cpp + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + $as_test_x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main () +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func + +# ac_fn_cxx_try_link LINENO +# ------------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + $as_test_x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_cxx_try_link + +# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists, giving a warning if it cannot be compiled using +# the include files in INCLUDES and setting the cache variable VAR +# accordingly. +ac_fn_c_check_header_mongrel () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if eval \${$3+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} +( $as_echo "## ---------------------------------------------------------- ## +## Report this to Oracle Technology Network Berkeley DB forum ## +## ---------------------------------------------------------- ##" + ) | sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_mongrel + +# ac_fn_c_check_member LINENO AGGR MEMBER VAR INCLUDES +# ---------------------------------------------------- +# Tries to find if the field MEMBER exists in type AGGR, after including +# INCLUDES, setting cache variable VAR accordingly. +ac_fn_c_check_member () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2.$3" >&5 +$as_echo_n "checking for $2.$3... " >&6; } +if eval \${$4+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$5 +int +main () +{ +static $2 ac_aggr; +if (ac_aggr.$3) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$4=yes" +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$5 +int +main () +{ +static $2 ac_aggr; +if (sizeof ac_aggr.$3) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$4=yes" +else + eval "$4=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$4 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_member + +# ac_fn_c_compute_int LINENO EXPR VAR INCLUDES +# -------------------------------------------- +# Tries to find the compile-time value of EXPR in a program that includes +# INCLUDES, setting VAR accordingly. Returns whether the value could be +# computed +ac_fn_c_compute_int () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if test "$cross_compiling" = yes; then + # Depending upon the size, compute the lo and hi bounds. +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) >= 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_lo=0 ac_mid=0 + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_hi=$ac_mid; break +else + as_fn_arith $ac_mid + 1 && ac_lo=$as_val + if test $ac_lo -le $ac_mid; then + ac_lo= ac_hi= + break + fi + as_fn_arith 2 '*' $ac_mid + 1 && ac_mid=$as_val +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + done +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) < 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_hi=-1 ac_mid=-1 + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) >= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_lo=$ac_mid; break +else + as_fn_arith '(' $ac_mid ')' - 1 && ac_hi=$as_val + if test $ac_mid -le $ac_hi; then + ac_lo= ac_hi= + break + fi + as_fn_arith 2 '*' $ac_mid && ac_mid=$as_val +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + done +else + ac_lo= ac_hi= +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +# Binary search between lo and hi bounds. +while test "x$ac_lo" != "x$ac_hi"; do + as_fn_arith '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo && ac_mid=$as_val + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_hi=$ac_mid +else + as_fn_arith '(' $ac_mid ')' + 1 && ac_lo=$as_val +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +done +case $ac_lo in #(( +?*) eval "$3=\$ac_lo"; ac_retval=0 ;; +'') ac_retval=1 ;; +esac + else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +static long int longval () { return $2; } +static unsigned long int ulongval () { return $2; } +#include +#include +int +main () +{ + + FILE *f = fopen ("conftest.val", "w"); + if (! f) + return 1; + if (($2) < 0) + { + long int i = longval (); + if (i != ($2)) + return 1; + fprintf (f, "%ld", i); + } + else + { + unsigned long int i = ulongval (); + if (i != ($2)) + return 1; + fprintf (f, "%lu", i); + } + /* Do not output a trailing newline, as this causes \r\n confusion + on some platforms. */ + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + echo >>conftest.val; read $3 &5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof ($2)) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof (($2))) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + eval "$3=yes" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_type +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by Berkeley DB $as_me 5.2.28, which was +generated by GNU Autoconf 2.68. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +ac_config_headers="$ac_config_headers db_config.h:config.hin" + + + + +# We're going to pass options through to SQLite, don't check them first. + + + +# Configure setup. +ac_aux_dir= +for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + +# Make sure we can run config.sub. +$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || + as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 +$as_echo_n "checking build system type... " >&6; } +if ${ac_cv_build+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_build_alias=$build_alias +test "x$ac_build_alias" = x && + ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` +test "x$ac_build_alias" = x && + as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 +ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5 + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 +$as_echo "$ac_cv_build" >&6; } +case $ac_cv_build in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; +esac +build=$ac_cv_build +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_build +shift +build_cpu=$1 +build_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +build_os=$* +IFS=$ac_save_IFS +case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 +$as_echo_n "checking host system type... " >&6; } +if ${ac_cv_host+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "x$host_alias" = x; then + ac_cv_host=$ac_cv_build +else + ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5 +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 +$as_echo "$ac_cv_host" >&6; } +case $ac_cv_host in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; +esac +host=$ac_cv_host +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_host +shift +host_cpu=$1 +host_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +host_os=$* +IFS=$ac_save_IFS +case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac + + +test "$program_prefix" != NONE && + program_transform_name="s&^&$program_prefix&;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s&\$&$program_suffix&;$program_transform_name" +# Double any \ or $. +# By default was `s,x,x', remove it if useless. +ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' +program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` + + +# Don't build in the dist directory. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if building in the top-level or dist directories" >&5 +$as_echo_n "checking if building in the top-level or dist directories... " >&6; } +if test -f configure.ac ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + as_fn_error $? "\ +Berkeley DB should not be built in the \"dist\" directory. \ +Change directory to the build_unix directory and run ../dist/configure \ +from there." "$LINENO" 5 +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +topdir=`echo "$srcdir/.." | sed 's,/dist/\.\.,,'` +# Substitution variables. BDB additions need to be documented. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# The Windows public header has two extra symbols we need to remove. + + + +# Set the default installation location. + + +# Configure the version information. + +DB_VERSION_FAMILY="11" + +DB_VERSION_RELEASE="2" + +DB_VERSION_MAJOR="5" + +DB_VERSION_MINOR="2" + +DB_VERSION_PATCH="28" + +DB_VERSION_STRING='"Berkeley DB 5.2.28: (June 10, 2011)"' + +DB_VERSION_FULL_STRING='"Berkeley DB 11g Release 2, library version 11.2.5.2.28: (June 10, 2011)"' + + +# Process all options before using them. + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-smallbuild option specified" >&5 +$as_echo_n "checking if --enable-smallbuild option specified... " >&6; } +# Check whether --enable-smallbuild was given. +if test "${enable_smallbuild+set}" = set; then : + enableval=$enable_smallbuild; db_cv_smallbuild="$enable_smallbuild" +else + db_cv_smallbuild="no" +fi + +case "$db_cv_smallbuild" in +yes) db_cv_build_full="no";; + *) db_cv_build_full="yes";; +esac +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_smallbuild" >&5 +$as_echo "$db_cv_smallbuild" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --disable-atomicsupport option specified" >&5 +$as_echo_n "checking if --disable-atomicsupport option specified... " >&6; } +# Check whether --enable-atomicsupport was given. +if test "${enable_atomicsupport+set}" = set; then : + enableval=$enable_atomicsupport; +else + enableval="yes" +fi + +db_cv_build_atomicsupport="$enableval" +case "$enableval" in + no) { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; };; +yes) { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; };; +esac + +# --enable-bigfile was the configuration option that Berkeley DB used before +# autoconf 2.50 was released (which had --enable-largefile integrated in). +# Check whether --enable-bigfile was given. +if test "${enable_bigfile+set}" = set; then : + enableval=$enable_bigfile; as_fn_error $? "--enable-bigfile no longer supported, use --enable-largefile" "$LINENO" 5 +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --disable-compression option specified" >&5 +$as_echo_n "checking if --disable-compression option specified... " >&6; } +# Check whether --enable-compression was given. +if test "${enable_compression+set}" = set; then : + enableval=$enable_compression; +else + enableval=$db_cv_build_full +fi + +db_cv_build_compression="$enableval" +case "$enableval" in + no) { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; };; +yes) { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; };; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --disable-hash option specified" >&5 +$as_echo_n "checking if --disable-hash option specified... " >&6; } +# Check whether --enable-hash was given. +if test "${enable_hash+set}" = set; then : + enableval=$enable_hash; +else + enableval=$db_cv_build_full +fi + +db_cv_build_hash="$enableval" +case "$enableval" in + no) { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; };; +yes) { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; };; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --disable-heap option specified" >&5 +$as_echo_n "checking if --disable-heap option specified... " >&6; } +# Check whether --enable-heap was given. +if test "${enable_heap+set}" = set; then : + enableval=$enable_heap; +else + enableval=$db_cv_build_full +fi + +db_cv_build_heap="$enableval" +case "$enableval" in + no) { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; };; +yes) { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; };; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --disable-mutexsupport option specified" >&5 +$as_echo_n "checking if --disable-mutexsupport option specified... " >&6; } +# Check whether --enable-mutexsupport was given. +if test "${enable_mutexsupport+set}" = set; then : + enableval=$enable_mutexsupport; +else + enableval="yes" +fi + +db_cv_build_mutexsupport="$enableval" +case "$enableval" in + no) { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; };; +yes) { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; };; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --disable-log_checksum option specified" >&5 +$as_echo_n "checking if --disable-log_checksum option specified... " >&6; } +# Check whether --enable-log_checksum was given. +if test "${enable_log_checksum+set}" = set; then : + enableval=$enable_log_checksum; case "$enableval" in + no | yes) db_cv_log_checksum="$enableval" ;; + *) db_cv_log_checksum="yes" ;; + esac +else + db_cv_log_checksum="yes" +fi + +case "$db_cv_log_checksum" in + no) { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; };; +yes) { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; };; +esac + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --disable-partition option specified" >&5 +$as_echo_n "checking if --disable-partition option specified... " >&6; } +# Check whether --enable-partition was given. +if test "${enable_partition+set}" = set; then : + enableval=$enable_partition; +else + enableval=$db_cv_build_full +fi + +db_cv_build_partition="$enableval" +case "$enableval" in + no) { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; };; +yes) { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; };; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --disable-queue option specified" >&5 +$as_echo_n "checking if --disable-queue option specified... " >&6; } +# Check whether --enable-queue was given. +if test "${enable_queue+set}" = set; then : + enableval=$enable_queue; +else + enableval=$db_cv_build_full +fi + +db_cv_build_queue="$enableval" +case "$enableval" in + no) { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; };; +yes) { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; };; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --disable-replication option specified" >&5 +$as_echo_n "checking if --disable-replication option specified... " >&6; } +# Check whether --enable-replication was given. +if test "${enable_replication+set}" = set; then : + enableval=$enable_replication; +else + enableval=$db_cv_build_full +fi + +db_cv_build_replication="$enableval" +case "$enableval" in + no) { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; };; +yes) { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; };; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --disable-statistics option specified" >&5 +$as_echo_n "checking if --disable-statistics option specified... " >&6; } +# Check whether --enable-statistics was given. +if test "${enable_statistics+set}" = set; then : + enableval=$enable_statistics; +else + enableval=$db_cv_build_full +fi + +db_cv_build_statistics="$enableval" +case "$enableval" in + no) { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; };; +yes) { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; };; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --disable-verify option specified" >&5 +$as_echo_n "checking if --disable-verify option specified... " >&6; } +# Check whether --enable-verify was given. +if test "${enable_verify+set}" = set; then : + enableval=$enable_verify; +else + enableval=$db_cv_build_full +fi + +db_cv_build_verify="$enableval" +case "$enableval" in + no) { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; };; +yes) { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; };; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-compat185 option specified" >&5 +$as_echo_n "checking if --enable-compat185 option specified... " >&6; } +# Check whether --enable-compat185 was given. +if test "${enable_compat185+set}" = set; then : + enableval=$enable_compat185; db_cv_compat185="$enable_compat185" +else + db_cv_compat185="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_compat185" >&5 +$as_echo "$db_cv_compat185" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-cxx option specified" >&5 +$as_echo_n "checking if --enable-cxx option specified... " >&6; } +# Check whether --enable-cxx was given. +if test "${enable_cxx+set}" = set; then : + enableval=$enable_cxx; db_cv_cxx="$enable_cxx" +else + db_cv_cxx="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_cxx" >&5 +$as_echo "$db_cv_cxx" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-debug option specified" >&5 +$as_echo_n "checking if --enable-debug option specified... " >&6; } +# Check whether --enable-debug was given. +if test "${enable_debug+set}" = set; then : + enableval=$enable_debug; db_cv_debug="$enable_debug" +else + db_cv_debug="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_debug" >&5 +$as_echo "$db_cv_debug" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-debug_rop option specified" >&5 +$as_echo_n "checking if --enable-debug_rop option specified... " >&6; } +# Check whether --enable-debug_rop was given. +if test "${enable_debug_rop+set}" = set; then : + enableval=$enable_debug_rop; db_cv_debug_rop="$enable_debug_rop" +else + db_cv_debug_rop="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_debug_rop" >&5 +$as_echo "$db_cv_debug_rop" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-debug_wop option specified" >&5 +$as_echo_n "checking if --enable-debug_wop option specified... " >&6; } +# Check whether --enable-debug_wop was given. +if test "${enable_debug_wop+set}" = set; then : + enableval=$enable_debug_wop; db_cv_debug_wop="$enable_debug_wop" +else + db_cv_debug_wop="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_debug_wop" >&5 +$as_echo "$db_cv_debug_wop" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-diagnostic option specified" >&5 +$as_echo_n "checking if --enable-diagnostic option specified... " >&6; } +# Check whether --enable-diagnostic was given. +if test "${enable_diagnostic+set}" = set; then : + enableval=$enable_diagnostic; db_cv_diagnostic="$enable_diagnostic" +else + db_cv_diagnostic="no" +fi + +if test "$db_cv_diagnostic" = "yes"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_diagnostic" >&5 +$as_echo "$db_cv_diagnostic" >&6; } +fi +if test "$db_cv_diagnostic" = "no" -a "$db_cv_debug_rop" = "yes"; then + db_cv_diagnostic="yes" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: by --enable-debug_rop" >&5 +$as_echo "by --enable-debug_rop" >&6; } +fi +if test "$db_cv_diagnostic" = "no" -a "$db_cv_debug_wop" = "yes"; then + db_cv_diagnostic="yes" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: by --enable-debug_wop" >&5 +$as_echo "by --enable-debug_wop" >&6; } +fi +if test "$db_cv_diagnostic" = "no"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_diagnostic" >&5 +$as_echo "$db_cv_diagnostic" >&6; } +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-dump185 option specified" >&5 +$as_echo_n "checking if --enable-dump185 option specified... " >&6; } +# Check whether --enable-dump185 was given. +if test "${enable_dump185+set}" = set; then : + enableval=$enable_dump185; db_cv_dump185="$enable_dump185" +else + db_cv_dump185="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_dump185" >&5 +$as_echo "$db_cv_dump185" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-java option specified" >&5 +$as_echo_n "checking if --enable-java option specified... " >&6; } +# Check whether --enable-java was given. +if test "${enable_java+set}" = set; then : + enableval=$enable_java; db_cv_java="$enable_java" +else + db_cv_java="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_java" >&5 +$as_echo "$db_cv_java" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-mingw option specified" >&5 +$as_echo_n "checking if --enable-mingw option specified... " >&6; } +# Check whether --enable-mingw was given. +if test "${enable_mingw+set}" = set; then : + enableval=$enable_mingw; db_cv_mingw="$enable_mingw" +else + db_cv_mingw="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_mingw" >&5 +$as_echo "$db_cv_mingw" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-o_direct option specified" >&5 +$as_echo_n "checking if --enable-o_direct option specified... " >&6; } +# Check whether --enable-o_direct was given. +if test "${enable_o_direct+set}" = set; then : + enableval=$enable_o_direct; db_cv_o_direct="$enable_o_direct" +else + db_cv_o_direct="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_o_direct" >&5 +$as_echo "$db_cv_o_direct" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-posixmutexes option specified" >&5 +$as_echo_n "checking if --enable-posixmutexes option specified... " >&6; } +# Check whether --enable-posixmutexes was given. +if test "${enable_posixmutexes+set}" = set; then : + enableval=$enable_posixmutexes; db_cv_posixmutexes="$enable_posixmutexes" +else + db_cv_posixmutexes="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_posixmutexes" >&5 +$as_echo "$db_cv_posixmutexes" >&6; } + +# Check whether --enable-pthread_self was given. +if test "${enable_pthread_self+set}" = set; then : + enableval=$enable_pthread_self; { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --enable-pthread_self is now always enabled" >&5 +$as_echo "$as_me: WARNING: --enable-pthread_self is now always enabled" >&2;} +fi + + +# Check whether --enable-pthread_api was given. +if test "${enable_pthread_api+set}" = set; then : + enableval=$enable_pthread_api; { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --enable-pthread_api is now always enabled" >&5 +$as_echo "$as_me: WARNING: --enable-pthread_api is now always enabled" >&2;} +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-rpc option specified" >&5 +$as_echo_n "checking if --enable-rpc option specified... " >&6; } +# Check whether --enable-rpc was given. +if test "${enable_rpc+set}" = set; then : + enableval=$enable_rpc; as_fn_error $? "RPC support has been removed from Berkeley DB." "$LINENO" 5 + +else + db_cv_rpc="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_rpc" >&5 +$as_echo "$db_cv_rpc" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-sql option specified" >&5 +$as_echo_n "checking if --enable-sql option specified... " >&6; } +# Check whether --enable-sql was given. +if test "${enable_sql+set}" = set; then : + enableval=$enable_sql; db_cv_sql="$enable_sql" +else + db_cv_sql="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_sql" >&5 +$as_echo "$db_cv_sql" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-sql_compat option specified" >&5 +$as_echo_n "checking if --enable-sql_compat option specified... " >&6; } +# Check whether --enable-sql_compat was given. +if test "${enable_sql_compat+set}" = set; then : + enableval=$enable_sql_compat; db_cv_sql_compat="$enable_sql_compat" +else + db_cv_sql_compat="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_sql_compat" >&5 +$as_echo "$db_cv_sql_compat" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-jdbc option specified" >&5 +$as_echo_n "checking if --enable-jdbc option specified... " >&6; } +# Check whether --enable-jdbc was given. +if test "${enable_jdbc+set}" = set; then : + enableval=$enable_jdbc; db_cv_jdbc="$enable_jdbc" +else + db_cv_jdbc="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_jdbc" >&5 +$as_echo "$db_cv_jdbc" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --with-jdbc=DIR option specified" >&5 +$as_echo_n "checking if --with-jdbc=DIR option specified... " >&6; } + +# Check whether --with-jdbc was given. +if test "${with_jdbc+set}" = set; then : + withval=$with_jdbc; with_jdbc="$withval" +else + with_jdbc="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_jdbc" >&5 +$as_echo "$with_jdbc" >&6; } +if test "$with_jdbc" != "no"; then + db_cv_jdbc="yes" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-amalgamation option specified" >&5 +$as_echo_n "checking if --enable-amalgamation option specified... " >&6; } +# Check whether --enable-amalgamation was given. +if test "${enable_amalgamation+set}" = set; then : + enableval=$enable_amalgamation; db_cv_sql_amalgamation="$enable_amalgamation" +else + db_cv_sql_amalgamation="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_sql_amalgamation" >&5 +$as_echo "$db_cv_sql_amalgamation" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-sql_codegen option specified" >&5 +$as_echo_n "checking if --enable-sql_codegen option specified... " >&6; } +# Check whether --enable-sql_codegen was given. +if test "${enable_sql_codegen+set}" = set; then : + enableval=$enable_sql_codegen; db_cv_sql_codegen="$enable_sql_codegen" +else + db_cv_sql_codegen="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_sql_codegen" >&5 +$as_echo "$db_cv_sql_codegen" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-stl option specified" >&5 +$as_echo_n "checking if --enable-stl option specified... " >&6; } +# Check whether --enable-stl was given. +if test "${enable_stl+set}" = set; then : + enableval=$enable_stl; db_cv_stl="$enable_stl" +else + db_cv_stl="no" +fi + +if test "$db_cv_stl" = "yes" -a "$db_cv_cxx" = "no"; then + db_cv_cxx="yes" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_stl" >&5 +$as_echo "$db_cv_stl" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-tcl option specified" >&5 +$as_echo_n "checking if --enable-tcl option specified... " >&6; } +# Check whether --enable-tcl was given. +if test "${enable_tcl+set}" = set; then : + enableval=$enable_tcl; db_cv_tcl="$enable_tcl" +else + db_cv_tcl="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_tcl" >&5 +$as_echo "$db_cv_tcl" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-test option specified" >&5 +$as_echo_n "checking if --enable-test option specified... " >&6; } +# Check whether --enable-test was given. +if test "${enable_test+set}" = set; then : + enableval=$enable_test; db_cv_test="$enable_test" +else + db_cv_test="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_test" >&5 +$as_echo "$db_cv_test" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-localization option specified" >&5 +$as_echo_n "checking if --enable-localization option specified... " >&6; } +# Check whether --enable-localization was given. +if test "${enable_localization+set}" = set; then : + enableval=$enable_localization; db_cv_localization="$enable_localization" +else + db_cv_localization="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_localization" >&5 +$as_echo "$db_cv_localization" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-stripped_messages option specified" >&5 +$as_echo_n "checking if --enable-stripped_messages option specified... " >&6; } +# Check whether --enable-stripped_messages was given. +if test "${enable_stripped_messages+set}" = set; then : + enableval=$enable_stripped_messages; db_cv_stripped_messages="$enable_stripped_messages" +else + db_cv_stripped_messages="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_stripped_messages" >&5 +$as_echo "$db_cv_stripped_messages" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-dbm option specified" >&5 +$as_echo_n "checking if --enable-dbm option specified... " >&6; } +# Check whether --enable-dbm was given. +if test "${enable_dbm+set}" = set; then : + enableval=$enable_dbm; db_cv_dbm="$enable_dbm" +else + db_cv_dbm="$db_cv_test" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_dbm" >&5 +$as_echo "$db_cv_dbm" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-dtrace option specified" >&5 +$as_echo_n "checking if --enable-dtrace option specified... " >&6; } +# Check whether --enable-dtrace was given. +if test "${enable_dtrace+set}" = set; then : + enableval=$enable_dtrace; db_cv_dtrace="$enable_dtrace" +else + db_cv_dtrace="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_dtrace" >&5 +$as_echo "$db_cv_dtrace" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-systemtap option specified" >&5 +$as_echo_n "checking if --enable-systemtap option specified... " >&6; } +# Check whether --enable-systemtap was given. +if test "${enable_systemtap+set}" = set; then : + enableval=$enable_systemtap; db_cv_systemtap="$enable_systemtap" +else + db_cv_systemtap="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_systemtap" >&5 +$as_echo "$db_cv_systemtap" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-perfmon-statistics option specified" >&5 +$as_echo_n "checking if --enable-perfmon-statistics option specified... " >&6; } +# Check whether --enable-perfmon_statistics was given. +if test "${enable_perfmon_statistics+set}" = set; then : + enableval=$enable_perfmon_statistics; db_cv_perfmon_statistics="$enable_perfmon_statistics" +else + db_cv_perfmon_statistics="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_perfmon_statistics" >&5 +$as_echo "$db_cv_perfmon_statistics" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-uimutexes option specified" >&5 +$as_echo_n "checking if --enable-uimutexes option specified... " >&6; } +# Check whether --enable-uimutexes was given. +if test "${enable_uimutexes+set}" = set; then : + enableval=$enable_uimutexes; db_cv_uimutexes="$enable_uimutexes" +else + db_cv_uimutexes="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_uimutexes" >&5 +$as_echo "$db_cv_uimutexes" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --enable-umrw option specified" >&5 +$as_echo_n "checking if --enable-umrw option specified... " >&6; } +# Check whether --enable-umrw was given. +if test "${enable_umrw+set}" = set; then : + enableval=$enable_umrw; db_cv_umrw="$enable_umrw" +else + db_cv_umrw="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_umrw" >&5 +$as_echo "$db_cv_umrw" >&6; } + +# Cryptography support. +# Until Berkeley DB 5.0, this was a simple yes/no decision. +# With the addition of support for Intel Integrated Performance Primitives (ipp) +# things are more complex. There are now three options: +# 1) don't build cryptography (no) +# 2) build using the built-in software implementation (yes) +# 3) build using the Intel IPP implementation (ipp) +# We handle this by making the primary configuration method: +# --with-cryptography={yes|no|ipp} +# which defaults to yes. The old enable/disable-cryptography argument is still +# supported for backwards compatibility. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --with-cryptography option specified" >&5 +$as_echo_n "checking if --with-cryptography option specified... " >&6; } +# Check whether --enable-cryptography was given. +if test "${enable_cryptography+set}" = set; then : + enableval=$enable_cryptography; +else + enableval=$db_cv_build_full +fi + +enable_cryptography="$enableval" + +# Check whether --with-cryptography was given. +if test "${with_cryptography+set}" = set; then : + withval=$with_cryptography; +else + with_cryptography=$enable_cryptography +fi + +case "$with_cryptography" in +yes|no|ipp) ;; +*) as_fn_error $? "unknown --with-cryptography argument \'$with_cryptography\'" "$LINENO" 5 ;; +esac +db_cv_build_cryptography="$with_cryptography" +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_build_cryptography" >&5 +$as_echo "$db_cv_build_cryptography" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --with-mutex=MUTEX option specified" >&5 +$as_echo_n "checking if --with-mutex=MUTEX option specified... " >&6; } + +# Check whether --with-mutex was given. +if test "${with_mutex+set}" = set; then : + withval=$with_mutex; with_mutex="$withval" +else + with_mutex="no" +fi + +if test "$with_mutex" = "yes"; then + as_fn_error $? "--with-mutex requires a mutex name argument" "$LINENO" 5 +fi +if test "$with_mutex" != "no"; then + db_cv_mutex="$with_mutex" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_mutex" >&5 +$as_echo "$with_mutex" >&6; } + +# --with-mutexalign=ALIGNMENT was the configuration option that Berkeley DB +# used before the DbEnv::mutex_set_align method was added. + +# Check whether --with-mutexalign was given. +if test "${with_mutexalign+set}" = set; then : + withval=$with_mutexalign; as_fn_error $? "--with-mutexalign no longer supported, use DbEnv::mutex_set_align" "$LINENO" 5 +fi + + + +# Check whether --with-stacksize was given. +if test "${with_stacksize+set}" = set; then : + withval=$with_stacksize; with_stacksize="$withval" +else + with_stacksize="no" +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --with-tcl=DIR option specified" >&5 +$as_echo_n "checking if --with-tcl=DIR option specified... " >&6; } + +# Check whether --with-tcl was given. +if test "${with_tcl+set}" = set; then : + withval=$with_tcl; with_tclconfig="$withval" +else + with_tclconfig="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_tclconfig" >&5 +$as_echo "$with_tclconfig" >&6; } +if test "$with_tclconfig" != "no"; then + db_cv_tcl="yes" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if --with-uniquename=NAME option specified" >&5 +$as_echo_n "checking if --with-uniquename=NAME option specified... " >&6; } + +# Check whether --with-uniquename was given. +if test "${with_uniquename+set}" = set; then : + withval=$with_uniquename; with_uniquename="$withval" +else + with_uniquename="no" +fi + +if test "$with_uniquename" = "no"; then + db_cv_uniquename="no" + DB_VERSION_UNIQUE_NAME="" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_uniquename" >&5 +$as_echo "$with_uniquename" >&6; } +else + db_cv_uniquename="yes" + if test "$with_uniquename" = "yes"; then + DB_VERSION_UNIQUE_NAME="_5002" + else + DB_VERSION_UNIQUE_NAME="$with_uniquename" + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DB_VERSION_UNIQUE_NAME" >&5 +$as_echo "$DB_VERSION_UNIQUE_NAME" >&6; } +fi + +# Undocumented option used for the dbsql command line tool (to match SQLite). +# Check whether --enable-readline was given. +if test "${enable_readline+set}" = set; then : + enableval=$enable_readline; with_readline=$enableval +else + with_readline=no +fi + + +# --enable-sql_compat implies --enable-sql +if test "$db_cv_sql_compat" = "yes" -a "$db_cv_sql" = "no"; then + db_cv_sql=$db_cv_sql_compat +fi + +# --enable-jdbc implies --enable-sql +if test "$db_cv_jdbc" = "yes" -a "$db_cv_sql" = "no"; then + db_cv_sql=$db_cv_jdbc +fi + +# Testing requires Tcl. +if test "$db_cv_test" = "yes" -a "$db_cv_tcl" = "no"; then + as_fn_error $? "--enable-test requires --enable-tcl" "$LINENO" 5 +fi + +# Set some #defines based on configuration options. +if test "$db_cv_diagnostic" = "yes"; then + $as_echo "#define DIAGNOSTIC 1" >>confdefs.h + + +fi +if test "$db_cv_debug_rop" = "yes"; then + $as_echo "#define DEBUG_ROP 1" >>confdefs.h + + +fi +if test "$db_cv_debug_wop" = "yes"; then + $as_echo "#define DEBUG_WOP 1" >>confdefs.h + + +fi +if test "$db_cv_umrw" = "yes"; then + $as_echo "#define UMRW 1" >>confdefs.h + + + +fi +if test "$db_cv_test" = "yes"; then + $as_echo "#define CONFIG_TEST 1" >>confdefs.h + + +fi + + +$as_echo "#define HAVE_UPGRADE_SUPPORT 1" >>confdefs.h + + +# Check for programs used in building and installation. + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}chmod", so it can be a program name with args. +set dummy ${ac_tool_prefix}chmod; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CHMOD+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CHMOD"; then + ac_cv_prog_CHMOD="$CHMOD" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CHMOD="${ac_tool_prefix}chmod" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CHMOD=$ac_cv_prog_CHMOD +if test -n "$CHMOD"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CHMOD" >&5 +$as_echo "$CHMOD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CHMOD"; then + ac_ct_CHMOD=$CHMOD + # Extract the first word of "chmod", so it can be a program name with args. +set dummy chmod; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CHMOD+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CHMOD"; then + ac_cv_prog_ac_ct_CHMOD="$ac_ct_CHMOD" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CHMOD="chmod" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CHMOD=$ac_cv_prog_ac_ct_CHMOD +if test -n "$ac_ct_CHMOD"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CHMOD" >&5 +$as_echo "$ac_ct_CHMOD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CHMOD" = x; then + CHMOD="none" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CHMOD=$ac_ct_CHMOD + fi +else + CHMOD="$ac_cv_prog_CHMOD" +fi + +test "$CHMOD" = "none" && as_fn_error $? "No chmod utility found." "$LINENO" 5 + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cp", so it can be a program name with args. +set dummy ${ac_tool_prefix}cp; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CP"; then + ac_cv_prog_CP="$CP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CP="${ac_tool_prefix}cp" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CP=$ac_cv_prog_CP +if test -n "$CP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CP" >&5 +$as_echo "$CP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CP"; then + ac_ct_CP=$CP + # Extract the first word of "cp", so it can be a program name with args. +set dummy cp; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CP"; then + ac_cv_prog_ac_ct_CP="$ac_ct_CP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CP="cp" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CP=$ac_cv_prog_ac_ct_CP +if test -n "$ac_ct_CP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CP" >&5 +$as_echo "$ac_ct_CP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CP" = x; then + CP="none" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CP=$ac_ct_CP + fi +else + CP="$ac_cv_prog_CP" +fi + +test "$CP" = "none" && as_fn_error $? "No cp utility found." "$LINENO" 5 + +# The Tcl test suite requires a kill utility. +if test "$db_cv_test" = "yes"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}kill", so it can be a program name with args. +set dummy ${ac_tool_prefix}kill; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_KILL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$KILL"; then + ac_cv_prog_KILL="$KILL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_KILL="${ac_tool_prefix}kill" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +KILL=$ac_cv_prog_KILL +if test -n "$KILL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $KILL" >&5 +$as_echo "$KILL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_KILL"; then + ac_ct_KILL=$KILL + # Extract the first word of "kill", so it can be a program name with args. +set dummy kill; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_KILL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_KILL"; then + ac_cv_prog_ac_ct_KILL="$ac_ct_KILL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_KILL="kill" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_KILL=$ac_cv_prog_ac_ct_KILL +if test -n "$ac_ct_KILL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_KILL" >&5 +$as_echo "$ac_ct_KILL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_KILL" = x; then + KILL="none" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + KILL=$ac_ct_KILL + fi +else + KILL="$ac_cv_prog_KILL" +fi + + test "$KILL" = "none" && as_fn_error $? "No kill utility found." "$LINENO" 5 +fi + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ln", so it can be a program name with args. +set dummy ${ac_tool_prefix}ln; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_LN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$LN"; then + ac_cv_prog_LN="$LN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_LN="${ac_tool_prefix}ln" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +LN=$ac_cv_prog_LN +if test -n "$LN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LN" >&5 +$as_echo "$LN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_LN"; then + ac_ct_LN=$LN + # Extract the first word of "ln", so it can be a program name with args. +set dummy ln; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_LN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_LN"; then + ac_cv_prog_ac_ct_LN="$ac_ct_LN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_LN="ln" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_LN=$ac_cv_prog_ac_ct_LN +if test -n "$ac_ct_LN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LN" >&5 +$as_echo "$ac_ct_LN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_LN" = x; then + LN="none" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + LN=$ac_ct_LN + fi +else + LN="$ac_cv_prog_LN" +fi + +test "$LN" = "none" && as_fn_error $? "No ln utility found." "$LINENO" 5 + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}mkdir", so it can be a program name with args. +set dummy ${ac_tool_prefix}mkdir; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_MKDIR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$MKDIR"; then + ac_cv_prog_MKDIR="$MKDIR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_MKDIR="${ac_tool_prefix}mkdir" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +MKDIR=$ac_cv_prog_MKDIR +if test -n "$MKDIR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR" >&5 +$as_echo "$MKDIR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_MKDIR"; then + ac_ct_MKDIR=$MKDIR + # Extract the first word of "mkdir", so it can be a program name with args. +set dummy mkdir; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_MKDIR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_MKDIR"; then + ac_cv_prog_ac_ct_MKDIR="$ac_ct_MKDIR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_MKDIR="mkdir" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_MKDIR=$ac_cv_prog_ac_ct_MKDIR +if test -n "$ac_ct_MKDIR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MKDIR" >&5 +$as_echo "$ac_ct_MKDIR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_MKDIR" = x; then + MKDIR="none" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + MKDIR=$ac_ct_MKDIR + fi +else + MKDIR="$ac_cv_prog_MKDIR" +fi + +test "$MKDIR" = "none" && as_fn_error $? "No mkdir utility found." "$LINENO" 5 + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}rm", so it can be a program name with args. +set dummy ${ac_tool_prefix}rm; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_RM+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$RM"; then + ac_cv_prog_RM="$RM" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_RM="${ac_tool_prefix}rm" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +RM=$ac_cv_prog_RM +if test -n "$RM"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RM" >&5 +$as_echo "$RM" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RM"; then + ac_ct_RM=$RM + # Extract the first word of "rm", so it can be a program name with args. +set dummy rm; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_RM+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_RM"; then + ac_cv_prog_ac_ct_RM="$ac_ct_RM" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_RM="rm" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_RM=$ac_cv_prog_ac_ct_RM +if test -n "$ac_ct_RM"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RM" >&5 +$as_echo "$ac_ct_RM" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_RM" = x; then + RM="none" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RM=$ac_ct_RM + fi +else + RM="$ac_cv_prog_RM" +fi + +test "$RM" = "none" && as_fn_error $? "No rm utility found." "$LINENO" 5 + +# We always want to force removes, and libtool assumes the same. +RM="$RM -f" + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}mv", so it can be a program name with args. +set dummy ${ac_tool_prefix}mv; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_MV+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$MV"; then + ac_cv_prog_MV="$MV" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_MV="${ac_tool_prefix}mv" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +MV=$ac_cv_prog_MV +if test -n "$MV"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MV" >&5 +$as_echo "$MV" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_MV"; then + ac_ct_MV=$MV + # Extract the first word of "mv", so it can be a program name with args. +set dummy mv; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_MV+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_MV"; then + ac_cv_prog_ac_ct_MV="$ac_ct_MV" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_MV="mv" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_MV=$ac_cv_prog_ac_ct_MV +if test -n "$ac_ct_MV"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MV" >&5 +$as_echo "$ac_ct_MV" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_MV" = x; then + MV="none" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + MV=$ac_ct_MV + fi +else + MV="$ac_cv_prog_MV" +fi + +test "$MV" = "none" && as_fn_error $? "No mv utility found." "$LINENO" 5 + +if test "$db_cv_systemtap" = "yes" -o "$db_cv_dtrace" = "yes"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}stap", so it can be a program name with args. +set dummy ${ac_tool_prefix}stap; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_STAP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$STAP"; then + ac_cv_prog_STAP="$STAP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_STAP="${ac_tool_prefix}stap" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STAP=$ac_cv_prog_STAP +if test -n "$STAP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STAP" >&5 +$as_echo "$STAP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STAP"; then + ac_ct_STAP=$STAP + # Extract the first word of "stap", so it can be a program name with args. +set dummy stap; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_STAP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STAP"; then + ac_cv_prog_ac_ct_STAP="$ac_ct_STAP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_STAP="stap" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STAP=$ac_cv_prog_ac_ct_STAP +if test -n "$ac_ct_STAP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STAP" >&5 +$as_echo "$ac_ct_STAP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STAP" = x; then + STAP="none" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STAP=$ac_ct_STAP + fi +else + STAP="$ac_cv_prog_STAP" +fi + + test "$STAP" = "none" -a "$db_cv_systemtap" = "yes" && \ + as_fn_error $? "No stap utility found." "$LINENO" 5 + db_cv_dtrace=yes +fi + +if test "$db_cv_dtrace" = "yes"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dtrace", so it can be a program name with args. +set dummy ${ac_tool_prefix}dtrace; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DTRACE+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DTRACE"; then + ac_cv_prog_DTRACE="$DTRACE" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_DTRACE="${ac_tool_prefix}dtrace" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DTRACE=$ac_cv_prog_DTRACE +if test -n "$DTRACE"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DTRACE" >&5 +$as_echo "$DTRACE" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DTRACE"; then + ac_ct_DTRACE=$DTRACE + # Extract the first word of "dtrace", so it can be a program name with args. +set dummy dtrace; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DTRACE+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DTRACE"; then + ac_cv_prog_ac_ct_DTRACE="$ac_ct_DTRACE" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_DTRACE="dtrace" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DTRACE=$ac_cv_prog_ac_ct_DTRACE +if test -n "$ac_ct_DTRACE"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DTRACE" >&5 +$as_echo "$ac_ct_DTRACE" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_DTRACE" = x; then + DTRACE="none" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DTRACE=$ac_ct_DTRACE + fi +else + DTRACE="$ac_cv_prog_DTRACE" +fi + + test "$DTRACE" = "none" && as_fn_error $? "No dtrace utility found." "$LINENO" 5 + # Sed and perl are needed only if events are added after building + # the distribution; if either is missing it is not an error for now. + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}sed", so it can be a program name with args. +set dummy ${ac_tool_prefix}sed; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_SED+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$SED"; then + ac_cv_prog_SED="$SED" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_SED="${ac_tool_prefix}sed" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +SED=$ac_cv_prog_SED +if test -n "$SED"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $SED" >&5 +$as_echo "$SED" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_SED"; then + ac_ct_SED=$SED + # Extract the first word of "sed", so it can be a program name with args. +set dummy sed; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_SED+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_SED"; then + ac_cv_prog_ac_ct_SED="$ac_ct_SED" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_SED="sed" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_SED=$ac_cv_prog_ac_ct_SED +if test -n "$ac_ct_SED"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_SED" >&5 +$as_echo "$ac_ct_SED" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_SED" = x; then + SED="none" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + SED=$ac_ct_SED + fi +else + SED="$ac_cv_prog_SED" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}perl", so it can be a program name with args. +set dummy ${ac_tool_prefix}perl; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_PERL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$PERL"; then + ac_cv_prog_PERL="$PERL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_PERL="${ac_tool_prefix}perl" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +PERL=$ac_cv_prog_PERL +if test -n "$PERL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PERL" >&5 +$as_echo "$PERL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_PERL"; then + ac_ct_PERL=$PERL + # Extract the first word of "perl", so it can be a program name with args. +set dummy perl; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_PERL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_PERL"; then + ac_cv_prog_ac_ct_PERL="$ac_ct_PERL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_PERL="perl" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_PERL=$ac_cv_prog_ac_ct_PERL +if test -n "$ac_ct_PERL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_PERL" >&5 +$as_echo "$ac_ct_PERL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_PERL" = x; then + PERL="none" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + PERL=$ac_ct_PERL + fi +else + PERL="$ac_cv_prog_PERL" +fi + +fi + +# We need a complete path for sh, because some make utility implementations get +# upset if SHELL is set to just the command name. Don't use the SHELL variable +# here because the user likely has the SHELL variable set to something other +# than the Bourne shell, which is what Make wants. +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}sh", so it can be a program name with args. +set dummy ${ac_tool_prefix}sh; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_db_cv_path_sh+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $db_cv_path_sh in + [\\/]* | ?:[\\/]*) + ac_cv_path_db_cv_path_sh="$db_cv_path_sh" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_path_db_cv_path_sh="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +db_cv_path_sh=$ac_cv_path_db_cv_path_sh +if test -n "$db_cv_path_sh"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_path_sh" >&5 +$as_echo "$db_cv_path_sh" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_db_cv_path_sh"; then + ac_pt_db_cv_path_sh=$db_cv_path_sh + # Extract the first word of "sh", so it can be a program name with args. +set dummy sh; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_db_cv_path_sh+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_db_cv_path_sh in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_db_cv_path_sh="$ac_pt_db_cv_path_sh" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_path_ac_pt_db_cv_path_sh="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_db_cv_path_sh=$ac_cv_path_ac_pt_db_cv_path_sh +if test -n "$ac_pt_db_cv_path_sh"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_db_cv_path_sh" >&5 +$as_echo "$ac_pt_db_cv_path_sh" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_db_cv_path_sh" = x; then + db_cv_path_sh="none" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + db_cv_path_sh=$ac_pt_db_cv_path_sh + fi +else + db_cv_path_sh="$ac_cv_path_db_cv_path_sh" +fi + +test "$db_cv_path_sh" = "none" && as_fn_error $? "No sh utility found." "$LINENO" 5 + +# Don't strip the binaries if --enable-debug was specified. +if test "$db_cv_debug" = yes; then + STRIP=":" +fi +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +$as_echo_n "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if ${ac_cv_path_install+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in #(( + ./ | .// | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; }; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +$as_echo "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + + +BUILD_TARGET="library_build" +INSTALL_TARGET="library_install" + +# Respect the environment LIBS settings +LIBSO_LIBS="$LIBS" + +# This is where we handle stuff that autoconf can't handle: compiler, +# preprocessor and load flags, libraries that the standard tests don't +# look for. +# +# There are additional libraries we need for some compiler/architecture +# combinations. +# +# Some architectures require DB to be compiled with special flags and/or +# libraries for threaded applications +# +# The makefile CC may be different than the CC used in config testing, +# because the makefile CC may be set to use $(LIBTOOL). +# +# Don't override anything if it's already set from the environment. +optimize_flag="-O" +extra_cflags="" + +case "$host_os" in +aix4.3.*|aix[56]*) + case "$host_os" in + aix4.3.*) + CPPFLAGS="$CPPFLAGS -D_LINUX_SOURCE_COMPAT";; + esac + # IBM's XLC compilers (at least versions 7/8/9) generate incorrect code + # when ordinary optimization is enabled because they make strong + # assumptions about the types held at each memory location, and some + # Berkeley DB code violates those assumptions. [#16141] + extra_cflags=" -qalias=noansi" + optimize_flag="-O2" + CC=${CC-"xlc_r"} + CPPFLAGS="$CPPFLAGS -D_THREAD_SAFE" + LDFLAGS="$LDFLAGS -Wl,-brtl";; +bsdi3*) CC=${CC-"shlicc2"} + LIBSO_LIBS="$LIBSO_LIBS -lipc";; +cygwin*) + CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE -D_REENTRANT";; +freebsd*) + CPPFLAGS="$CPPFLAGS -D_THREAD_SAFE" + LDFLAGS="$LDFLAGS -pthread";; +gnu*|k*bsd*-gnu|linux*) + CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE -D_REENTRANT";; +hpux*) CPPFLAGS="$CPPFLAGS -D_REENTRANT";; +irix*) optimize_flag="-O2" + CPPFLAGS="$CPPFLAGS -D_SGI_MP_SOURCE";; +mpeix*) CPPFLAGS="$CPPFLAGS -D_POSIX_SOURCE -D_SOCKET_SOURCE" + LIBSO_LIBS="$LIBSO_LIBS -lsocket -lsvipc";; +osf*) CPPFLAGS="$CPPFLAGS -pthread";; +*qnx*) qnx_build="yes" + $as_echo "#define HAVE_QNX 1" >>confdefs.h + + ;; +solaris*) + CPPFLAGS="$CPPFLAGS -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS ";; +esac + +# Set CFLAGS/CXXFLAGS. We MUST set the flags before we call autoconf +# compiler configuration macros, because if we don't, they set CFLAGS +# to no optimization and -g, which isn't what we want. +# +# If the user wants a debugging environment, add -g the their compiler flags +# and don't automatically optimize. If you want to compile with a different +# set of flags, specify CFLAGS in the environment before configuring. +if test "$db_cv_debug" = "yes"; then + $as_echo "#define DEBUG 1" >>confdefs.h + + + + CFLAGS="-g $CFLAGS" +else + CFLAGS=${CFLAGS-$optimize_flag} +fi + +CFLAGS="$CFLAGS$extra_cflags" +CXXFLAGS=${CXXFLAGS-"$CFLAGS"} + +# The default compiler is cc (NOT gcc), the default CFLAGS is as specified +# above, NOT what is set by AC_PROG_CC, as it won't set optimization flags +# for any compiler other than gcc. +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + for ac_prog in cc gcc + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cc gcc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# We know what compiler we're going to use, now. Set per-compiler flags. +if test "$GCC" = "yes"; then + # Use -O3 if we're using gcc, unless we're doing a small build, in + # which case we use -Os alone. The code size for -O3 is quite a + # bit larger than -O2: a compromise is "-Os -finline-functions", + # it's smaller and explicitly inlining the functions helps Berkeley + # DB. + CFLAGS="$CFLAGS " + if test "$db_cv_smallbuild" = "yes"; then + CFLAGS=`echo "$CFLAGS" | sed 's/-O /-Os /g'` + else + CFLAGS=`echo "$CFLAGS" | sed 's/-O /-O3 /g'` + fi +else + case "$host_os" in + hpux11.0*) ;; + hpux11*) CPPFLAGS="$CPPFLAGS -mt" + test "$host_cpu" = "ia64" && + CFLAGS="$CFLAGS +u1";; + esac +fi + +# Check for "const" and "inline" keywords. + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for an ANSI C-conforming const" >&5 +$as_echo_n "checking for an ANSI C-conforming const... " >&6; } +if ${ac_cv_c_const+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +/* FIXME: Include the comments suggested by Paul. */ +#ifndef __cplusplus + /* Ultrix mips cc rejects this. */ + typedef int charset[2]; + const charset cs; + /* SunOS 4.1.1 cc rejects this. */ + char const *const *pcpcc; + char **ppc; + /* NEC SVR4.0.2 mips cc rejects this. */ + struct point {int x, y;}; + static struct point const zero = {0,0}; + /* AIX XL C 1.02.0.0 rejects this. + It does not let you subtract one const X* pointer from another in + an arm of an if-expression whose if-part is not a constant + expression */ + const char *g = "string"; + pcpcc = &g + (g ? g-g : 0); + /* HPUX 7.0 cc rejects these. */ + ++pcpcc; + ppc = (char**) pcpcc; + pcpcc = (char const *const *) ppc; + { /* SCO 3.2v4 cc rejects this. */ + char *t; + char const *s = 0 ? (char *) 0 : (char const *) 0; + + *t++ = 0; + if (s) return 0; + } + { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */ + int x[] = {25, 17}; + const int *foo = &x[0]; + ++foo; + } + { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */ + typedef const int *iptr; + iptr p = 0; + ++p; + } + { /* AIX XL C 1.02.0.0 rejects this saying + "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */ + struct s { int j; const int *ap[3]; }; + struct s *b; b->j = 5; + } + { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ + const int foo = 10; + if (!foo) return 0; + } + return !cs[0] && !zero.x; +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_c_const=yes +else + ac_cv_c_const=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_const" >&5 +$as_echo "$ac_cv_c_const" >&6; } +if test $ac_cv_c_const = no; then + +$as_echo "#define const /**/" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for inline" >&5 +$as_echo_n "checking for inline... " >&6; } +if ${ac_cv_c_inline+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_c_inline=no +for ac_kw in inline __inline__ __inline; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifndef __cplusplus +typedef int foo_t; +static $ac_kw foo_t static_foo () {return 0; } +$ac_kw foo_t foo () {return 0; } +#endif + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_c_inline=$ac_kw +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + test "$ac_cv_c_inline" != no && break +done + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_inline" >&5 +$as_echo "$ac_cv_c_inline" >&6; } + +case $ac_cv_c_inline in + inline | yes) ;; + *) + case $ac_cv_c_inline in + no) ac_val=;; + *) ac_val=$ac_cv_c_inline;; + esac + cat >>confdefs.h <<_ACEOF +#ifndef __cplusplus +#define inline $ac_val +#endif +_ACEOF + ;; +esac + + +# We use prototypes and the keyword "const" in db.h which doesn't include +# db_config.h, so we have to figure out what to do there. +# +# There is an autoconf AC_C_PROTOTYPES macro, but as all it does is define +# db_config.h variables, it doesn't help us. +# +# We don't have much choice, we look at internal autoconf variables. +if test "$ac_cv_c_const" != "yes"; then + DB_CONST="#define const" +fi + +# We use alignment attributes in db.h - figure out if the compiler supports +# them. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for GCC aligned attribute" >&5 +$as_echo_n "checking for GCC aligned attribute... " >&6; } +if ${db_cv_aligned_attribute+:} false; then : + $as_echo_n "(cached) " >&6 +else + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +__attribute__ ((aligned (8))) int i; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_aligned_attribute=yes +else + db_cv_aligned_attribute=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_aligned_attribute" >&5 +$as_echo "$db_cv_aligned_attribute" >&6; } +if test "$db_cv_aligned_attribute" = "yes"; then + DB_STRUCT_ALIGN8="__attribute__ ((aligned (8)))" +fi + +# Clear __P, some other systems use it too. +DB_PROTO1="#undef __P" +if test "$ac_cv_prog_cc_c89" = "no"; then + DB_PROTO2="#define __P(protos) ()" +else + DB_PROTO2="#define __P(protos) protos" +fi + +# Because of shared library building, the ${CC} used for config tests +# may be different than the ${CC} we want to put in the Makefile. +# The latter is known as ${MAKEFILE_CC} in this script. +MAKEFILE_CC="${CC}" +MAKEFILE_CCLINK="${CC}" +MAKEFILE_CXX="nocxx" +MAKEFILE_CXXLINK="nocxx" + +# See if we need the C++ compiler at all. If so, we'd like to find one that +# interoperates with the C compiler we chose. Since we prefered cc over gcc, +# we'll also prefer the vendor's compiler over g++/gcc. If we're wrong, the +# user can set CC and CXX in their environment before running configure. +# +# AC_PROG_CXX sets CXX, but it uses $CXX and $CCC (in that order) as its +# first choices. +if test "$db_cv_cxx" = "yes"; then + if test "$GCC" != "yes"; then + case "$host_os" in + aix*) if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}xlC_r", so it can be a program name with args. +set dummy ${ac_tool_prefix}xlC_r; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CCC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CCC"; then + ac_cv_prog_CCC="$CCC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CCC="${ac_tool_prefix}xlC_r" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CCC=$ac_cv_prog_CCC +if test -n "$CCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CCC" >&5 +$as_echo "$CCC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CCC"; then + ac_ct_CCC=$CCC + # Extract the first word of "xlC_r", so it can be a program name with args. +set dummy xlC_r; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CCC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CCC"; then + ac_cv_prog_ac_ct_CCC="$ac_ct_CCC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CCC="xlC_r" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CCC=$ac_cv_prog_ac_ct_CCC +if test -n "$ac_ct_CCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CCC" >&5 +$as_echo "$ac_ct_CCC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CCC" = x; then + CCC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CCC=$ac_ct_CCC + fi +else + CCC="$ac_cv_prog_CCC" +fi + + LIBXSO_LIBS="-lC_r $LIBXSO_LIBS" + LIBSO_LIBS="-lC_r $LIBSO_LIBS";; + hpux*) if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}aCC", so it can be a program name with args. +set dummy ${ac_tool_prefix}aCC; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CCC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CCC"; then + ac_cv_prog_CCC="$CCC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CCC="${ac_tool_prefix}aCC" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CCC=$ac_cv_prog_CCC +if test -n "$CCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CCC" >&5 +$as_echo "$CCC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CCC"; then + ac_ct_CCC=$CCC + # Extract the first word of "aCC", so it can be a program name with args. +set dummy aCC; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CCC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CCC"; then + ac_cv_prog_ac_ct_CCC="$ac_ct_CCC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CCC="aCC" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CCC=$ac_cv_prog_ac_ct_CCC +if test -n "$ac_ct_CCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CCC" >&5 +$as_echo "$ac_ct_CCC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CCC" = x; then + CCC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CCC=$ac_ct_CCC + fi +else + CCC="$ac_cv_prog_CCC" +fi +;; + irix*) if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}CC", so it can be a program name with args. +set dummy ${ac_tool_prefix}CC; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CCC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CCC"; then + ac_cv_prog_CCC="$CCC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CCC="${ac_tool_prefix}CC" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CCC=$ac_cv_prog_CCC +if test -n "$CCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CCC" >&5 +$as_echo "$CCC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CCC"; then + ac_ct_CCC=$CCC + # Extract the first word of "CC", so it can be a program name with args. +set dummy CC; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CCC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CCC"; then + ac_cv_prog_ac_ct_CCC="$ac_ct_CCC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CCC="CC" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CCC=$ac_cv_prog_ac_ct_CCC +if test -n "$ac_ct_CCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CCC" >&5 +$as_echo "$ac_ct_CCC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CCC" = x; then + CCC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CCC=$ac_ct_CCC + fi +else + CCC="$ac_cv_prog_CCC" +fi +;; + osf*) if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cxx", so it can be a program name with args. +set dummy ${ac_tool_prefix}cxx; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CCC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CCC"; then + ac_cv_prog_CCC="$CCC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CCC="${ac_tool_prefix}cxx" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CCC=$ac_cv_prog_CCC +if test -n "$CCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CCC" >&5 +$as_echo "$CCC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CCC"; then + ac_ct_CCC=$CCC + # Extract the first word of "cxx", so it can be a program name with args. +set dummy cxx; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CCC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CCC"; then + ac_cv_prog_ac_ct_CCC="$ac_ct_CCC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CCC="cxx" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CCC=$ac_cv_prog_ac_ct_CCC +if test -n "$ac_ct_CCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CCC" >&5 +$as_echo "$ac_ct_CCC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CCC" = x; then + CCC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CCC=$ac_ct_CCC + fi +else + CCC="$ac_cv_prog_CCC" +fi + + CXXFLAGS="$CXXFLAGS -D__USE_STD_IOSTREAM" + test -d /usr/include.dtk && + CXXFLAGS="$CXXFLAGS -I/usr/include.dtk";; + solaris*) if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}CC", so it can be a program name with args. +set dummy ${ac_tool_prefix}CC; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CCC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CCC"; then + ac_cv_prog_CCC="$CCC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CCC="${ac_tool_prefix}CC" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CCC=$ac_cv_prog_CCC +if test -n "$CCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CCC" >&5 +$as_echo "$CCC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CCC"; then + ac_ct_CCC=$CCC + # Extract the first word of "CC", so it can be a program name with args. +set dummy CC; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CCC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CCC"; then + ac_cv_prog_ac_ct_CCC="$ac_ct_CCC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CCC="CC" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CCC=$ac_cv_prog_ac_ct_CCC +if test -n "$ac_ct_CCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CCC" >&5 +$as_echo "$ac_ct_CCC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CCC" = x; then + CCC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CCC=$ac_ct_CCC + fi +else + CCC="$ac_cv_prog_CCC" +fi +;; + esac + fi + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +if test -z "$CXX"; then + if test -n "$CCC"; then + CXX=$CCC + else + if test -n "$ac_tool_prefix"; then + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CXX"; then + ac_cv_prog_CXX="$CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CXX=$ac_cv_prog_CXX +if test -n "$CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 +$as_echo "$CXX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CXX" && break + done +fi +if test -z "$CXX"; then + ac_ct_CXX=$CXX + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CXX"; then + ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CXX="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CXX=$ac_cv_prog_ac_ct_CXX +if test -n "$ac_ct_CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5 +$as_echo "$ac_ct_CXX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CXX" && break +done + + if test "x$ac_ct_CXX" = x; then + CXX="g++" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CXX=$ac_ct_CXX + fi +fi + + fi +fi +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5 +$as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; } +if ${ac_cv_cxx_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_cxx_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5 +$as_echo "$ac_cv_cxx_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GXX=yes +else + GXX= +fi +ac_test_CXXFLAGS=${CXXFLAGS+set} +ac_save_CXXFLAGS=$CXXFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5 +$as_echo_n "checking whether $CXX accepts -g... " >&6; } +if ${ac_cv_prog_cxx_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_cxx_werror_flag=$ac_cxx_werror_flag + ac_cxx_werror_flag=yes + ac_cv_prog_cxx_g=no + CXXFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_cv_prog_cxx_g=yes +else + CXXFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + +else + ac_cxx_werror_flag=$ac_save_cxx_werror_flag + CXXFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_cv_prog_cxx_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_cxx_werror_flag=$ac_save_cxx_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5 +$as_echo "$ac_cv_prog_cxx_g" >&6; } +if test "$ac_test_CXXFLAGS" = set; then + CXXFLAGS=$ac_save_CXXFLAGS +elif test $ac_cv_prog_cxx_g = yes; then + if test "$GXX" = yes; then + CXXFLAGS="-g -O2" + else + CXXFLAGS="-g" + fi +else + if test "$GXX" = yes; then + CXXFLAGS="-O2" + else + CXXFLAGS= + fi +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + ###### WORKAROUND: SEE SR #7938 + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5 +$as_echo_n "checking how to run the C++ preprocessor... " >&6; } +if test -z "$CXXCPP"; then + if ${ac_cv_prog_CXXCPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CXXCPP needs to be expanded + for CXXCPP in "$CXX -E" "/lib/cpp" + do + ac_preproc_ok=false +for ac_cxx_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CXXCPP=$CXXCPP + +fi + CXXCPP=$ac_cv_prog_CXXCPP +else + ac_cv_prog_CXXCPP=$CXXCPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXXCPP" >&5 +$as_echo "$CXXCPP" >&6; } +ac_preproc_ok=false +for ac_cxx_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + ############################### + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ supports the ISO C++ standard includes" >&5 +$as_echo_n "checking whether C++ supports the ISO C++ standard includes... " >&6; } + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +std::ostream *o; return 0; + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + db_cv_cxx_have_stdheaders=yes +else + db_cv_cxx_have_stdheaders=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_cxx_have_stdheaders" >&5 +$as_echo "$db_cv_cxx_have_stdheaders" >&6; } +if test "$db_cv_cxx_have_stdheaders" = yes; then + cxx_have_stdheaders="#define HAVE_CXX_STDHEADERS 1" +fi + MAKEFILE_CXX="${CXX}" + MAKEFILE_CXXLINK="${CXX}" +fi + +# Do some gcc specific configuration. + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using gcc version 2.96" >&5 +$as_echo_n "checking whether we are using gcc version 2.96... " >&6; } +if ${db_cv_gcc_2_96+:} false; then : + $as_echo_n "(cached) " >&6 +else + +db_cv_gcc_2_96=no +if test "$GCC" = "yes"; then + GCC_VERSION=`${MAKEFILE_CC} --version` + case ${GCC_VERSION} in + 2.96*) + db_cv_gcc_2_96=yes;; + esac +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_gcc_2_96" >&5 +$as_echo "$db_cv_gcc_2_96" >&6; } +if test "$db_cv_gcc_2_96" = "yes"; then + CFLAGS=`echo "$CFLAGS" | sed 's/-O2/-O/'` + CXXFLAGS=`echo "$CXXFLAGS" | sed 's/-O2/-O/'` + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: INSTALLED GCC COMPILER HAS SERIOUS BUGS; PLEASE UPGRADE." >&5 +$as_echo "$as_me: WARNING: INSTALLED GCC COMPILER HAS SERIOUS BUGS; PLEASE UPGRADE." >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: GCC OPTIMIZATION LEVEL SET TO -O." >&5 +$as_echo "$as_me: WARNING: GCC OPTIMIZATION LEVEL SET TO -O." >&2;} +fi + +# We need the -Kthread/-pthread flag when compiling on SCO/Caldera's UnixWare +# and OpenUNIX releases. We can't make the test until we know which compiler +# we're using. +case "$host_os" in +sysv5UnixWare*|sysv5OpenUNIX8*) + if test "$GCC" == "yes"; then + CPPFLAGS="$CPPFLAGS -pthread" + LDFLAGS="$LDFLAGS -pthread" + else + CPPFLAGS="$CPPFLAGS -Kthread" + LDFLAGS="$LDFLAGS -Kthread" + fi;; +esac + +# Export our compiler preferences for the libtool configuration. +export CC CCC +CCC=$CXX + +# Libtool configuration. +case `pwd` in + *\ * | *\ *) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5 +$as_echo "$as_me: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&2;} ;; +esac + + + +macro_version='2.4' +macro_revision='1.3293' + + + + + + + + + + + + + +ltmain="$ac_aux_dir/ltmain.sh" + +# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\(["`$\\]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' + +ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to print strings" >&5 +$as_echo_n "checking how to print strings... " >&6; } +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "" +} + +case "$ECHO" in + printf*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: printf" >&5 +$as_echo "printf" >&6; } ;; + print*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: print -r" >&5 +$as_echo "print -r" >&6; } ;; + *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: cat" >&5 +$as_echo "cat" >&6; } ;; +esac + + + + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 +$as_echo_n "checking for a sed that does not truncate output... " >&6; } +if ${ac_cv_path_SED+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ + for ac_i in 1 2 3 4 5 6 7; do + ac_script="$ac_script$as_nl$ac_script" + done + echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed + { ac_script=; unset ac_script;} + if test -z "$SED"; then + ac_path_SED_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_SED="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_SED" && $as_test_x "$ac_path_SED"; } || continue +# Check for GNU ac_path_SED and select it if it is found. + # Check for GNU $ac_path_SED +case `"$ac_path_SED" --version 2>&1` in +*GNU*) + ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo '' >> "conftest.nl" + "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_SED_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_SED="$ac_path_SED" + ac_path_SED_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_SED_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_SED"; then + as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 + fi +else + ac_cv_path_SED=$SED +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 +$as_echo "$ac_cv_path_SED" >&6; } + SED="$ac_cv_path_SED" + rm -f conftest.sed + +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5 +$as_echo_n "checking for fgrep... " >&6; } +if ${ac_cv_path_FGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1 + then ac_cv_path_FGREP="$GREP -F" + else + if test -z "$FGREP"; then + ac_path_FGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in fgrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_FGREP" && $as_test_x "$ac_path_FGREP"; } || continue +# Check for GNU ac_path_FGREP and select it if it is found. + # Check for GNU $ac_path_FGREP +case `"$ac_path_FGREP" --version 2>&1` in +*GNU*) + ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'FGREP' >> "conftest.nl" + "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_FGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_FGREP="$ac_path_FGREP" + ac_path_FGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_FGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_FGREP"; then + as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_FGREP=$FGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5 +$as_echo "$ac_cv_path_FGREP" >&6; } + FGREP="$ac_cv_path_FGREP" + + +test -z "$GREP" && GREP=grep + + + + + + + + + + + + + + + + + + + +# Check whether --with-gnu-ld was given. +if test "${with_gnu_ld+set}" = set; then : + withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes +else + with_gnu_ld=no +fi + +ac_prog=ld +if test "$GCC" = yes; then + # Check if gcc -print-prog-name=ld gives a path. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 +$as_echo_n "checking for ld used by $CC... " >&6; } + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [\\/]* | ?:[\\/]*) + re_direlt='/[^/][^/]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD="$ac_prog" + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test "$with_gnu_ld" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 +$as_echo_n "checking for GNU ld... " >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 +$as_echo_n "checking for non-GNU ld... " >&6; } +fi +if ${lt_cv_path_LD+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$LD"; then + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD="$ac_dir/$ac_prog" + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &5 +$as_echo "$LD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 +$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; } +if ${lt_cv_prog_gnu_ld+:} false; then : + $as_echo_n "(cached) " >&6 +else + # I'd rather use --version here, but apparently some GNU lds only accept -v. +case `$LD -v 2>&1 &5 +$as_echo "$lt_cv_prog_gnu_ld" >&6; } +with_gnu_ld=$lt_cv_prog_gnu_ld + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5 +$as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; } +if ${lt_cv_path_NM+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM="$NM" +else + lt_nm_to_check="${ac_tool_prefix}nm" + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + tmp_nm="$ac_dir/$lt_tmp_nm" + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the `sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in + */dev/null* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS="$lt_save_ifs" + done + : ${lt_cv_path_NM=no} +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5 +$as_echo "$lt_cv_path_NM" >&6; } +if test "$lt_cv_path_NM" != "no"; then + NM="$lt_cv_path_NM" +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + if test -n "$ac_tool_prefix"; then + for ac_prog in dumpbin "link -dump" + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DUMPBIN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DUMPBIN"; then + ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DUMPBIN=$ac_cv_prog_DUMPBIN +if test -n "$DUMPBIN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5 +$as_echo "$DUMPBIN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$DUMPBIN" && break + done +fi +if test -z "$DUMPBIN"; then + ac_ct_DUMPBIN=$DUMPBIN + for ac_prog in dumpbin "link -dump" +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DUMPBIN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DUMPBIN"; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN +if test -n "$ac_ct_DUMPBIN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5 +$as_echo "$ac_ct_DUMPBIN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_DUMPBIN" && break +done + + if test "x$ac_ct_DUMPBIN" = x; then + DUMPBIN=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DUMPBIN=$ac_ct_DUMPBIN + fi +fi + + case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols" + ;; + *) + DUMPBIN=: + ;; + esac + fi + + if test "$DUMPBIN" != ":"; then + NM="$DUMPBIN" + fi +fi +test -z "$NM" && NM=nm + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5 +$as_echo_n "checking the name lister ($NM) interface... " >&6; } +if ${lt_cv_nm_interface+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: output\"" >&5) + cat conftest.out >&5 + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5 +$as_echo "$lt_cv_nm_interface" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5 +$as_echo_n "checking whether ln -s works... " >&6; } +LN_S=$as_ln_s +if test "$LN_S" = "ln -s"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5 +$as_echo "no, using $LN_S" >&6; } +fi + +# find the maximum length of command line arguments +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5 +$as_echo_n "checking the maximum length of command line arguments... " >&6; } +if ${lt_cv_sys_max_cmd_len+:} false; then : + $as_echo_n "(cached) " >&6 +else + i=0 + teststring="ABCD" + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + netbsd* | freebsd* | openbsd* | darwin* | dragonfly*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[ ]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8 ; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test "X"`func_fallback_echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test $i != 17 # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac + +fi + +if test -n $lt_cv_sys_max_cmd_len ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sys_max_cmd_len" >&5 +$as_echo "$lt_cv_sys_max_cmd_len" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none" >&5 +$as_echo "none" >&6; } +fi +max_cmd_len=$lt_cv_sys_max_cmd_len + + + + + + +: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands some XSI constructs" >&5 +$as_echo_n "checking whether the shell understands some XSI constructs... " >&6; } +# Try some XSI features +xsi_shell=no +( _lt_dummy="a/b/c" + test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \ + = c,a/b,b/c, \ + && eval 'test $(( 1 + 1 )) -eq 2 \ + && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \ + && xsi_shell=yes +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $xsi_shell" >&5 +$as_echo "$xsi_shell" >&6; } + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands \"+=\"" >&5 +$as_echo_n "checking whether the shell understands \"+=\"... " >&6; } +lt_shell_append=no +( foo=bar; set foo baz; eval "$1+=\$2" && test "$foo" = barbaz ) \ + >/dev/null 2>&1 \ + && lt_shell_append=yes +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_shell_append" >&5 +$as_echo "$lt_shell_append" >&6; } + + +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi + + + + + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5 +$as_echo_n "checking how to convert $build file names to $host format... " >&6; } +if ${lt_cv_to_host_file_cmd+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac + +fi + +to_host_file_cmd=$lt_cv_to_host_file_cmd +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" >&5 +$as_echo "$lt_cv_to_host_file_cmd" >&6; } + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5 +$as_echo_n "checking how to convert $build file names to toolchain format... " >&6; } +if ${lt_cv_to_tool_file_cmd+:} false; then : + $as_echo_n "(cached) " >&6 +else + #assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac + +fi + +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" >&5 +$as_echo "$lt_cv_to_tool_file_cmd" >&6; } + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5 +$as_echo_n "checking for $LD option to reload object files... " >&6; } +if ${lt_cv_ld_reload_flag+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ld_reload_flag='-r' +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5 +$as_echo "$lt_cv_ld_reload_flag" >&6; } +reload_flag=$lt_cv_ld_reload_flag +case $reload_flag in +"" | " "*) ;; +*) reload_flag=" $reload_flag" ;; +esac +reload_cmds='$LD$reload_flag -o $output$reload_objs' +case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + if test "$GCC" != yes; then + reload_cmds=false + fi + ;; + darwin*) + if test "$GCC" = yes; then + reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs' + else + reload_cmds='$LD$reload_flag -o $output$reload_objs' + fi + ;; +esac + + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. +set dummy ${ac_tool_prefix}objdump; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OBJDUMP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OBJDUMP"; then + ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OBJDUMP=$ac_cv_prog_OBJDUMP +if test -n "$OBJDUMP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 +$as_echo "$OBJDUMP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OBJDUMP"; then + ac_ct_OBJDUMP=$OBJDUMP + # Extract the first word of "objdump", so it can be a program name with args. +set dummy objdump; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_OBJDUMP"; then + ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_OBJDUMP="objdump" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP +if test -n "$ac_ct_OBJDUMP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 +$as_echo "$ac_ct_OBJDUMP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_OBJDUMP" = x; then + OBJDUMP="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OBJDUMP=$ac_ct_OBJDUMP + fi +else + OBJDUMP="$ac_cv_prog_OBJDUMP" +fi + +test -z "$OBJDUMP" && OBJDUMP=objdump + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5 +$as_echo_n "checking how to recognize dependent libraries... " >&6; } +if ${lt_cv_deplibs_check_method+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# `unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [[regex]]' -- check by looking for files in library path +# which responds to the $file_magic_cmd with a given extended regex. +# If you have `file' or equivalent on your system and you're not sure +# whether `pass_all' will *always* work, you probably want this one. + +case $host_os in +aix[4-9]*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi[45]*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='/usr/bin/file -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin*) + # func_win32_libid is a shell function defined in ltmain.sh + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + ;; + +mingw* | pw32*) + # Base MSYS/MinGW do not provide the 'file' command needed by + # func_win32_libid shell function, so use a weaker test based on 'objdump', + # unless we find 'file', for example because we are cross-compiling. + # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin. + if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=/usr/bin/file + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]' + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[3-9]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be Linux ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +esac + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5 +$as_echo "$lt_cv_deplibs_check_method" >&6; } + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[\1]\/[\1]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + + + + + + + + + + + + + + + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. +set dummy ${ac_tool_prefix}dlltool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DLLTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DLLTOOL"; then + ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DLLTOOL=$ac_cv_prog_DLLTOOL +if test -n "$DLLTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 +$as_echo "$DLLTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DLLTOOL"; then + ac_ct_DLLTOOL=$DLLTOOL + # Extract the first word of "dlltool", so it can be a program name with args. +set dummy dlltool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DLLTOOL"; then + ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_DLLTOOL="dlltool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL +if test -n "$ac_ct_DLLTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 +$as_echo "$ac_ct_DLLTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_DLLTOOL" = x; then + DLLTOOL="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DLLTOOL=$ac_ct_DLLTOOL + fi +else + DLLTOOL="$ac_cv_prog_DLLTOOL" +fi + +test -z "$DLLTOOL" && DLLTOOL=dlltool + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5 +$as_echo_n "checking how to associate runtime and link libraries... " >&6; } +if ${lt_cv_sharedlib_from_linklib_cmd+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh + # decide which to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd="$ECHO" + ;; +esac + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5 +$as_echo "$lt_cv_sharedlib_from_linklib_cmd" >&6; } +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + + + + + + + +if test -n "$ac_tool_prefix"; then + for ac_prog in ar + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AR"; then + ac_cv_prog_AR="$AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_AR="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AR=$ac_cv_prog_AR +if test -n "$AR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 +$as_echo "$AR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AR" && break + done +fi +if test -z "$AR"; then + ac_ct_AR=$AR + for ac_prog in ar +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_AR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_AR"; then + ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_AR="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_AR=$ac_cv_prog_ac_ct_AR +if test -n "$ac_ct_AR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 +$as_echo "$ac_ct_AR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_AR" && break +done + + if test "x$ac_ct_AR" = x; then + AR="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AR=$ac_ct_AR + fi +fi + +: ${AR=ar} +: ${AR_FLAGS=cru} + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5 +$as_echo_n "checking for archiver @FILE support... " >&6; } +if ${lt_cv_ar_at_file+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ar_at_file=no + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -eq 0; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -ne 0; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5 +$as_echo "$lt_cv_ar_at_file" >&6; } + +if test "x$lt_cv_ar_at_file" = xno; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +$as_echo "$STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_STRIP="strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +$as_echo "$ac_ct_STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +test -z "$STRIP" && STRIP=: + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +$as_echo "$RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +$as_echo "$ac_ct_RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + +test -z "$RANLIB" && RANLIB=: + + + + + + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$oldlib" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac + + + + + + + + + + + + + + + + + + + + + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AWK+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_AWK="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +$as_echo "$AWK" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AWK" && break +done + + + + + + + + + + + + + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + + +# Check for command to grab the raw symbol name followed by C symbol from nm. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5 +$as_echo_n "checking command to parse $NM output from $compiler object... " >&6; } +if ${lt_cv_sys_global_symbol_pipe+:} false; then : + $as_echo_n "(cached) " >&6 +else + +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[BCDEGRST]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([_A-Za-z][_A-Za-z0-9]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[BCDT]' + ;; +cygwin* | mingw* | pw32* | cegcc*) + symcode='[ABCDGISTW]' + ;; +hpux*) + if test "$host_cpu" = ia64; then + symcode='[ABCDEGRST]' + fi + ;; +irix* | nonstopux*) + symcode='[BCDEGRST]' + ;; +osf*) + symcode='[BCDEGQRST]' + ;; +solaris*) + symcode='[BDRT]' + ;; +sco3.2v5*) + symcode='[DT]' + ;; +sysv4.2uw2*) + symcode='[DT]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[ABDT]' + ;; +sysv4) + symcode='[DFNSTU]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[ABCDGIRSTW]' ;; +esac + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\)[ ]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"\2\", (void *) \&\2},/p'" +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([^ ]*\)[ ]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \(lib[^ ]*\)$/ {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"lib\2\", (void *) \&\2},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function + # and D for any global variable. + # Also find C++ and __fastcall symbols from MSVC++, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK '"\ +" {last_section=section; section=\$ 3};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\ +" {split(\$ 0, a, /\||\r/); split(a[2], s)};"\ +" s[1]~/^[@?]/{print s[1], s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx" + else + lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + # Now try to grab the symbols. + nlist=conftest.nm + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist\""; } >&5 + (eval $NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) +/* DATA imports from DLLs on WIN32 con't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined(__osf__) +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS="conftstm.$ac_objext" + CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag" + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s conftest${ac_exeext}; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&5 + fi + else + echo "cannot find nm_test_var in $nlist" >&5 + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5 + fi + else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test "$pipe_works" = yes; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done + +fi + +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: failed" >&5 +$as_echo "failed" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5 +$as_echo "ok" >&6; } +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5 +$as_echo_n "checking for sysroot... " >&6; } + +# Check whether --with-sysroot was given. +if test "${with_sysroot+set}" = set; then : + withval=$with_sysroot; +else + with_sysroot=no +fi + + +lt_sysroot= +case ${with_sysroot} in #( + yes) + if test "$GCC" = yes; then + lt_sysroot=`$CC --print-sysroot 2>/dev/null` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${with_sysroot}" >&5 +$as_echo "${with_sysroot}" >&6; } + as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5 + ;; +esac + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5 +$as_echo "${lt_sysroot:-no}" >&6; } + + + + + +# Check whether --enable-libtool-lock was given. +if test "${enable_libtool_lock+set}" = set; then : + enableval=$enable_libtool_lock; +fi + +test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `/usr/bin/file conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE="32" + ;; + *ELF-64*) + HPUX_IA64_MODE="64" + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out which ABI we are using. + echo '#line '$LINENO' "configure"' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + if test "$lt_cv_prog_gnu_ld" = yes; then + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `/usr/bin/file conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_i386" + ;; + ppc64-*linux*|powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; + ppc*-*linux*|powerpc*-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -belf" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5 +$as_echo_n "checking whether the C compiler needs -belf... " >&6; } +if ${lt_cv_cc_needs_belf+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + lt_cv_cc_needs_belf=yes +else + lt_cv_cc_needs_belf=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5 +$as_echo "$lt_cv_cc_needs_belf" >&6; } + if test x"$lt_cv_cc_needs_belf" != x"yes"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS="$SAVE_CFLAGS" + fi + ;; +sparc*-*solaris*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `/usr/bin/file conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) LD="${LD-ld} -m elf64_sparc" ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks="$enable_libtool_lock" + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}mt", so it can be a program name with args. +set dummy ${ac_tool_prefix}mt; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_MANIFEST_TOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$MANIFEST_TOOL"; then + ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL +if test -n "$MANIFEST_TOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5 +$as_echo "$MANIFEST_TOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_MANIFEST_TOOL"; then + ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL + # Extract the first word of "mt", so it can be a program name with args. +set dummy mt; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_MANIFEST_TOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_MANIFEST_TOOL"; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="mt" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL +if test -n "$ac_ct_MANIFEST_TOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" >&5 +$as_echo "$ac_ct_MANIFEST_TOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_MANIFEST_TOOL" = x; then + MANIFEST_TOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL + fi +else + MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL" +fi + +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5 +$as_echo_n "checking if $MANIFEST_TOOL is a manifest tool... " >&6; } +if ${lt_cv_path_mainfest_tool+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_path_mainfest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5 + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&5 + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_mainfest_tool=yes + fi + rm -f conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_mainfest_tool" >&5 +$as_echo "$lt_cv_path_mainfest_tool" >&6; } +if test "x$lt_cv_path_mainfest_tool" != xyes; then + MANIFEST_TOOL=: +fi + + + + + + + case $host_os in + rhapsody* | darwin*) + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a program name with args. +set dummy ${ac_tool_prefix}dsymutil; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DSYMUTIL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DSYMUTIL"; then + ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DSYMUTIL=$ac_cv_prog_DSYMUTIL +if test -n "$DSYMUTIL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5 +$as_echo "$DSYMUTIL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DSYMUTIL"; then + ac_ct_DSYMUTIL=$DSYMUTIL + # Extract the first word of "dsymutil", so it can be a program name with args. +set dummy dsymutil; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DSYMUTIL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DSYMUTIL"; then + ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_DSYMUTIL="dsymutil" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL +if test -n "$ac_ct_DSYMUTIL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5 +$as_echo "$ac_ct_DSYMUTIL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_DSYMUTIL" = x; then + DSYMUTIL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DSYMUTIL=$ac_ct_DSYMUTIL + fi +else + DSYMUTIL="$ac_cv_prog_DSYMUTIL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a program name with args. +set dummy ${ac_tool_prefix}nmedit; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_NMEDIT+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$NMEDIT"; then + ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +NMEDIT=$ac_cv_prog_NMEDIT +if test -n "$NMEDIT"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5 +$as_echo "$NMEDIT" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_NMEDIT"; then + ac_ct_NMEDIT=$NMEDIT + # Extract the first word of "nmedit", so it can be a program name with args. +set dummy nmedit; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_NMEDIT+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_NMEDIT"; then + ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_NMEDIT="nmedit" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT +if test -n "$ac_ct_NMEDIT"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5 +$as_echo "$ac_ct_NMEDIT" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_NMEDIT" = x; then + NMEDIT=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + NMEDIT=$ac_ct_NMEDIT + fi +else + NMEDIT="$ac_cv_prog_NMEDIT" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program name with args. +set dummy ${ac_tool_prefix}lipo; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_LIPO+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$LIPO"; then + ac_cv_prog_LIPO="$LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_LIPO="${ac_tool_prefix}lipo" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +LIPO=$ac_cv_prog_LIPO +if test -n "$LIPO"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5 +$as_echo "$LIPO" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_LIPO"; then + ac_ct_LIPO=$LIPO + # Extract the first word of "lipo", so it can be a program name with args. +set dummy lipo; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_LIPO+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_LIPO"; then + ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_LIPO="lipo" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO +if test -n "$ac_ct_LIPO"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5 +$as_echo "$ac_ct_LIPO" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_LIPO" = x; then + LIPO=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + LIPO=$ac_ct_LIPO + fi +else + LIPO="$ac_cv_prog_LIPO" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OTOOL"; then + ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_OTOOL="${ac_tool_prefix}otool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OTOOL=$ac_cv_prog_OTOOL +if test -n "$OTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5 +$as_echo "$OTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL"; then + ac_ct_OTOOL=$OTOOL + # Extract the first word of "otool", so it can be a program name with args. +set dummy otool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_OTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_OTOOL"; then + ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_OTOOL="otool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL +if test -n "$ac_ct_OTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5 +$as_echo "$ac_ct_OTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_OTOOL" = x; then + OTOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL=$ac_ct_OTOOL + fi +else + OTOOL="$ac_cv_prog_OTOOL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool64", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool64; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OTOOL64+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OTOOL64"; then + ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OTOOL64=$ac_cv_prog_OTOOL64 +if test -n "$OTOOL64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5 +$as_echo "$OTOOL64" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL64"; then + ac_ct_OTOOL64=$OTOOL64 + # Extract the first word of "otool64", so it can be a program name with args. +set dummy otool64; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_OTOOL64+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_OTOOL64"; then + ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_OTOOL64="otool64" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64 +if test -n "$ac_ct_OTOOL64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5 +$as_echo "$ac_ct_OTOOL64" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_OTOOL64" = x; then + OTOOL64=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL64=$ac_ct_OTOOL64 + fi +else + OTOOL64="$ac_cv_prog_OTOOL64" +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5 +$as_echo_n "checking for -single_module linker flag... " >&6; } +if ${lt_cv_apple_cc_single_mod+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_apple_cc_single_mod=no + if test -z "${LT_MULTI_MODULE}"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + if test -f libconftest.dylib && test ! -s conftest.err && test $_lt_result = 0; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&5 + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5 +$as_echo "$lt_cv_apple_cc_single_mod" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5 +$as_echo_n "checking for -exported_symbols_list linker flag... " >&6; } +if ${lt_cv_ld_exported_symbols_list+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + lt_cv_ld_exported_symbols_list=yes +else + lt_cv_ld_exported_symbols_list=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS="$save_LDFLAGS" + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5 +$as_echo "$lt_cv_ld_exported_symbols_list" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5 +$as_echo_n "checking for -force_load linker flag... " >&6; } +if ${lt_cv_ld_force_load+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5 + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5 + echo "$AR cru libconftest.a conftest.o" >&5 + $AR cru libconftest.a conftest.o 2>&5 + echo "$RANLIB libconftest.a" >&5 + $RANLIB libconftest.a 2>&5 + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -f conftest && test ! -s conftest.err && test $_lt_result = 0 && $GREP forced_load conftest 2>&1 >/dev/null; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&5 + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5 +$as_echo "$lt_cv_ld_force_load" >&6; } + case $host_os in + rhapsody* | darwin1.[012]) + _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + darwin*) # darwin 5.x on + # if running on 10.5 or later, the deployment target defaults + # to the OS version, if on x86, and 10.4, the deployment + # target defaults to 10.4. Don't you love it? + case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in + 10.0,*86*-darwin8*|10.0,*-darwin[91]*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + 10.[012]*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + 10.*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test "$lt_cv_apple_cc_single_mod" = "yes"; then + _lt_dar_single_mod='$single_module' + fi + if test "$lt_cv_ld_exported_symbols_list" = "yes"; then + _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}' + fi + if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if ${ac_cv_prog_CPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in dlfcn.h +do : + ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default +" +if test "x$ac_cv_header_dlfcn_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_DLFCN_H 1 +_ACEOF + +fi + +done + + + +func_stripname_cnf () +{ + case ${2} in + .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;; + *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;; + esac +} # func_stripname_cnf + + + + + +# Set options + + + + enable_dlopen=no + + + enable_win32_dll=no + + + # Check whether --enable-shared was given. +if test "${enable_shared+set}" = set; then : + enableval=$enable_shared; p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac +else + enable_shared=yes +fi + + + + + + + + + + # Check whether --enable-static was given. +if test "${enable_static+set}" = set; then : + enableval=$enable_static; p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac +else + enable_static=yes +fi + + + + + + + + + + +# Check whether --with-pic was given. +if test "${with_pic+set}" = set; then : + withval=$with_pic; pic_mode="$withval" +else + pic_mode=default +fi + + +test -z "$pic_mode" && pic_mode=default + + + + + + + + # Check whether --enable-fast-install was given. +if test "${enable_fast_install+set}" = set; then : + enableval=$enable_fast_install; p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac +else + enable_fast_install=yes +fi + + + + + + + + + + + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS="$ltmain" + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' + + + + + + + + + + + + + + + + + + + + + + + + + + +test -z "$LN_S" && LN_S="ln -s" + + + + + + + + + + + + + + +if test -n "${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5 +$as_echo_n "checking for objdir... " >&6; } +if ${lt_cv_objdir+:} false; then : + $as_echo_n "(cached) " >&6 +else + rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5 +$as_echo "$lt_cv_objdir" >&6; } +objdir=$lt_cv_objdir + + + + + +cat >>confdefs.h <<_ACEOF +#define LT_OBJDIR "$lt_cv_objdir/" +_ACEOF + + + + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a `.a' archive for static linking (except MSVC, +# which needs '.lib'). +libext=a + +with_gnu_ld="$lt_cv_prog_gnu_ld" + +old_CC="$CC" +old_CFLAGS="$CFLAGS" + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +for cc_temp in $compiler""; do + case $cc_temp in + compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; + distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; + \-*) ;; + *) break;; + esac +done +cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` + + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5 +$as_echo_n "checking for ${ac_tool_prefix}file... " >&6; } +if ${lt_cv_path_MAGIC_CMD+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD="$MAGIC_CMD" + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/${ac_tool_prefix}file; then + lt_cv_path_MAGIC_CMD="$ac_dir/${ac_tool_prefix}file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD="$lt_cv_path_MAGIC_CMD" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS="$lt_save_ifs" + MAGIC_CMD="$lt_save_MAGIC_CMD" + ;; +esac +fi + +MAGIC_CMD="$lt_cv_path_MAGIC_CMD" +if test -n "$MAGIC_CMD"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +$as_echo "$MAGIC_CMD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + + + +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for file" >&5 +$as_echo_n "checking for file... " >&6; } +if ${lt_cv_path_MAGIC_CMD+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD="$MAGIC_CMD" + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/file; then + lt_cv_path_MAGIC_CMD="$ac_dir/file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD="$lt_cv_path_MAGIC_CMD" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS="$lt_save_ifs" + MAGIC_CMD="$lt_save_MAGIC_CMD" + ;; +esac +fi + +MAGIC_CMD="$lt_cv_path_MAGIC_CMD" +if test -n "$MAGIC_CMD"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +$as_echo "$MAGIC_CMD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + else + MAGIC_CMD=: + fi +fi + + fi + ;; +esac + +# Use C for the default configuration in the libtool script + +lt_save_CC="$CC" +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +objext=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* + +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* + + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + +lt_prog_compiler_no_builtin_flag= + +if test "$GCC" = yes; then + case $cc_basename in + nvcc*) + lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;; + *) + lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;; + esac + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 +$as_echo_n "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; } +if ${lt_cv_prog_compiler_rtti_exceptions+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_rtti_exceptions=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="-fno-rtti -fno-exceptions" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_rtti_exceptions=yes + fi + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5 +$as_echo "$lt_cv_prog_compiler_rtti_exceptions" >&6; } + +if test x"$lt_cv_prog_compiler_rtti_exceptions" = xyes; then + lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions" +else + : +fi + +fi + + + + + + + lt_prog_compiler_wl= +lt_prog_compiler_pic= +lt_prog_compiler_static= + + + if test "$GCC" = yes; then + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_static='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + lt_prog_compiler_pic='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + lt_prog_compiler_pic='-DDLL_EXPORT' + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + lt_prog_compiler_static= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + ;; + + interix[3-9]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + lt_prog_compiler_can_build_shared=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic=-Kconform_pic + fi + ;; + + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + lt_prog_compiler_wl='-Xlinker ' + lt_prog_compiler_pic='-Xcompiler -fPIC' + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + lt_prog_compiler_wl='-Wl,' + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + else + lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_prog_compiler_pic='-DDLL_EXPORT' + ;; + + hpux9* | hpux10* | hpux11*) + lt_prog_compiler_wl='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + lt_prog_compiler_static='${wl}-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + lt_prog_compiler_wl='-Wl,' + # PIC (with -KPIC) is the default. + lt_prog_compiler_static='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu) + case $cc_basename in + # old Intel for x86_64 which still supported -KPIC. + ecc*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='--shared' + lt_prog_compiler_static='--static' + ;; + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl='-Wl,-Wl,,' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fpic' + lt_prog_compiler_static='-Bstatic' + ;; + ccc*) + lt_prog_compiler_wl='-Wl,' + # All Alpha code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-qpic' + lt_prog_compiler_static='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ F* | *Sun*Fortran*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='' + ;; + *Sun\ C*) + # Sun C 5.9 + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='-Wl,' + ;; + esac + ;; + esac + ;; + + newsos6) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + lt_prog_compiler_wl='-Wl,' + # All OSF/1 code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + + rdos*) + lt_prog_compiler_static='-non_shared' + ;; + + solaris*) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + lt_prog_compiler_wl='-Qoption ld ';; + *) + lt_prog_compiler_wl='-Wl,';; + esac + ;; + + sunos4*) + lt_prog_compiler_wl='-Qoption ld ' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec ;then + lt_prog_compiler_pic='-Kconform_pic' + lt_prog_compiler_static='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + unicos*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_can_build_shared=no + ;; + + uts4*) + lt_prog_compiler_pic='-pic' + lt_prog_compiler_static='-Bstatic' + ;; + + *) + lt_prog_compiler_can_build_shared=no + ;; + esac + fi + +case $host_os in + # For platforms which do not support PIC, -DPIC is meaningless: + *djgpp*) + lt_prog_compiler_pic= + ;; + *) + lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC" + ;; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 +$as_echo_n "checking for $compiler option to produce PIC... " >&6; } +if ${lt_cv_prog_compiler_pic+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic=$lt_prog_compiler_pic +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5 +$as_echo "$lt_cv_prog_compiler_pic" >&6; } +lt_prog_compiler_pic=$lt_cv_prog_compiler_pic + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$lt_prog_compiler_pic"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5 +$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; } +if ${lt_cv_prog_compiler_pic_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic_works=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$lt_prog_compiler_pic -DPIC" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_pic_works=yes + fi + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5 +$as_echo "$lt_cv_prog_compiler_pic_works" >&6; } + +if test x"$lt_cv_prog_compiler_pic_works" = xyes; then + case $lt_prog_compiler_pic in + "" | " "*) ;; + *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;; + esac +else + lt_prog_compiler_pic= + lt_prog_compiler_can_build_shared=no +fi + +fi + + + + + + + + + + + +# +# Check to make sure the static flag actually works. +# +wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 +$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } +if ${lt_cv_prog_compiler_static_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_static_works=no + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS $lt_tmp_static_flag" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_static_works=yes + fi + else + lt_cv_prog_compiler_static_works=yes + fi + fi + $RM -r conftest* + LDFLAGS="$save_LDFLAGS" + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5 +$as_echo "$lt_cv_prog_compiler_static_works" >&6; } + +if test x"$lt_cv_prog_compiler_static_works" = xyes; then + : +else + lt_prog_compiler_static= +fi + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +$as_echo "$lt_cv_prog_compiler_c_o" >&6; } + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +$as_echo "$lt_cv_prog_compiler_c_o" >&6; } + + + + +hard_links="nottested" +if test "$lt_cv_prog_compiler_c_o" = no && test "$need_locks" != no; then + # do not overwrite the value of need_locks provided by the user + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 +$as_echo_n "checking if we can lock with hard links... " >&6; } + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 +$as_echo "$hard_links" >&6; } + if test "$hard_links" = no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5 +$as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;} + need_locks=warn + fi +else + need_locks=no +fi + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + + runpath_var= + allow_undefined_flag= + always_export_symbols=no + archive_cmds= + archive_expsym_cmds= + compiler_needs_object=no + enable_shared_with_static_runtimes=no + export_dynamic_flag_spec= + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + hardcode_automatic=no + hardcode_direct=no + hardcode_direct_absolute=no + hardcode_libdir_flag_spec= + hardcode_libdir_flag_spec_ld= + hardcode_libdir_separator= + hardcode_minus_L=no + hardcode_shlibpath_var=unsupported + inherit_rpath=no + link_all_deplibs=unknown + module_cmds= + module_expsym_cmds= + old_archive_from_new_cmds= + old_archive_from_expsyms_cmds= + thread_safe_flag_spec= + whole_archive_flag_spec= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + include_expsyms= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ` (' and `)$', so one must not match beginning or + # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', + # as well as any symbol that contains `d'. + exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + if test "$GCC" != yes; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++) + with_gnu_ld=yes + ;; + openbsd*) + with_gnu_ld=no + ;; + esac + + ld_shlibs=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test "$with_gnu_ld" = yes; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; + *\ \(GNU\ Binutils\)\ [3-9]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test "$lt_use_gnu_ld_interface" = yes; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='${wl}' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + export_dynamic_flag_spec='${wl}--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + whole_archive_flag_spec= + fi + supports_anon_versioning=no + case `$LD -v 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[3-9]*) + # On AIX/PPC, the GNU linker is very broken + if test "$host_cpu" != ia64; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + else + ld_shlibs=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless, + # as there is no search path for DLLs. + hardcode_libdir_flag_spec='-L$libdir' + export_dynamic_flag_spec='${wl}--export-all-symbols' + allow_undefined_flag=unsupported + always_export_symbols=no + enable_shared_with_static_runtimes=yes + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file (1st line + # is EXPORTS), use it as is; otherwise, prepend... + archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + ld_shlibs=no + fi + ;; + + haiku*) + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + link_all_deplibs=yes + ;; + + interix[3-9]*) + hardcode_direct=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + export_dynamic_flag_spec='${wl}-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test "$host_os" = linux-dietlibc; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test "$tmp_diet" = no + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + whole_archive_flag_spec= + tmp_sharedflag='--shared' ;; + xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + compiler_needs_object=yes + ;; + esac + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) # Sun C 5.9 + whole_archive_flag_spec='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + compiler_needs_object=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + + if test "x$supports_anon_versioning" = xyes; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive' + hardcode_libdir_flag_spec= + hardcode_libdir_flag_spec_ld='-rpath $libdir' + archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test "x$supports_anon_versioning" = xyes; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + ld_shlibs=no + fi + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + ;; + + sunos4*) + archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + + if test "$ld_shlibs" = no; then + runpath_var= + hardcode_libdir_flag_spec= + export_dynamic_flag_spec= + whole_archive_flag_spec= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + allow_undefined_flag=unsupported + always_export_symbols=yes + archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + hardcode_minus_L=yes + if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + hardcode_direct=unsupported + fi + ;; + + aix[4-9]*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to AIX nm, but means don't demangle with GNU nm + # Also, AIX nm treats weak defined symbols like other global + # defined symbols, whereas GNU nm marks them as "W". + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + else + export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) + for ld_flag in $LDFLAGS; do + if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then + aix_use_runtimelinking=yes + break + fi + done + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + archive_cmds='' + hardcode_direct=yes + hardcode_direct_absolute=yes + hardcode_libdir_separator=':' + link_all_deplibs=yes + file_list_spec='${wl}-f,' + + if test "$GCC" = yes; then + case $host_os in aix4.[012]|aix4.[012].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + hardcode_direct=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L=yes + hardcode_libdir_flag_spec='-L$libdir' + hardcode_libdir_separator= + fi + ;; + esac + shared_flag='-shared' + if test "$aix_use_runtimelinking" = yes; then + shared_flag="$shared_flag "'${wl}-G' + fi + else + # not using gcc + if test "$host_cpu" = ia64; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi + fi + fi + + export_dynamic_flag_spec='${wl}-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + always_export_symbols=yes + if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + if test "${lt_cv_aix_libpath+set}" = set; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath_+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_="/usr/lib:/lib" + fi + +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath" + archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" + else + if test "$host_cpu" = ia64; then + hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib' + allow_undefined_flag="-z nodefs" + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + if test "${lt_cv_aix_libpath+set}" = set; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath_+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_="/usr/lib:/lib" + fi + +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + no_undefined_flag=' ${wl}-bernotok' + allow_undefined_flag=' ${wl}-berok' + if test "$with_gnu_ld" = yes; then + # We only use this code for GNU lds that support --whole-archive. + whole_archive_flag_spec='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + whole_archive_flag_spec='$convenience' + fi + archive_cmds_need_lc=yes + # This is similar to how AIX traditionally builds its shared libraries. + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + bsdi[45]*) + export_dynamic_flag_spec=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl*) + # Native MSVC + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + always_export_symbols=yes + file_list_spec='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' + archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; + else + sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, )='true' + enable_shared_with_static_runtimes=yes + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + old_postinstall_cmds='chmod 644 $oldlib' + postlink_cmds='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile="$lt_outputfile.exe" + lt_tool_outputfile="$lt_tool_outputfile.exe" + ;; + esac~ + if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC wrapper + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + old_archive_from_new_cmds='true' + # FIXME: Should let the user specify the lib program. + old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs' + enable_shared_with_static_runtimes=yes + ;; + esac + ;; + + darwin* | rhapsody*) + + + archive_cmds_need_lc=no + hardcode_direct=no + hardcode_automatic=yes + hardcode_shlibpath_var=unsupported + if test "$lt_cv_ld_force_load" = "yes"; then + whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + else + whole_archive_flag_spec='' + fi + link_all_deplibs=yes + allow_undefined_flag="$_lt_dar_allow_undefined" + case $cc_basename in + ifort*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test "$_lt_dar_can_shared" = "yes"; then + output_verbose_link_cmd=func_echo_all + archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" + module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" + archive_expsym_cmds="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" + module_expsym_cmds="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" + + else + ld_shlibs=no + fi + + ;; + + dgux*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + freebsd1*) + ld_shlibs=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly*) + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + hpux9*) + if test "$GCC" = yes; then + archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + else + archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + fi + hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + export_dynamic_flag_spec='${wl}-E' + ;; + + hpux10*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test "$with_gnu_ld" = no; then + hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' + hardcode_libdir_flag_spec_ld='+b $libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='${wl}-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + fi + ;; + + hpux11*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + case $host_cpu in + hppa*64*) + archive_cmds='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + archive_cmds='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5 +$as_echo_n "checking if $CC understands -b... " >&6; } +if ${lt_cv_prog_compiler__b+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler__b=no + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -b" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler__b=yes + fi + else + lt_cv_prog_compiler__b=yes + fi + fi + $RM -r conftest* + LDFLAGS="$save_LDFLAGS" + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5 +$as_echo "$lt_cv_prog_compiler__b" >&6; } + +if test x"$lt_cv_prog_compiler__b" = xyes; then + archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' +else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' +fi + + ;; + esac + fi + if test "$with_gnu_ld" = no; then + hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' + hardcode_libdir_separator=: + + case $host_cpu in + hppa*64*|ia64*) + hardcode_direct=no + hardcode_shlibpath_var=no + ;; + *) + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='${wl}-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test "$GCC" = yes; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 +$as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; } +if ${lt_cv_irix_exported_symbol+:} false; then : + $as_echo_n "(cached) " >&6 +else + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo (void) { return 0; } +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + lt_cv_irix_exported_symbol=yes +else + lt_cv_irix_exported_symbol=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS="$save_LDFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 +$as_echo "$lt_cv_irix_exported_symbol" >&6; } + if test "$lt_cv_irix_exported_symbol" = yes; then + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib' + fi + else + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + inherit_rpath=yes + link_all_deplibs=yes + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + newsos6) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + hardcode_shlibpath_var=no + ;; + + *nto* | *qnx*) + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + hardcode_direct=yes + hardcode_shlibpath_var=no + hardcode_direct_absolute=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + export_dynamic_flag_spec='${wl}-E' + else + case $host_os in + openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-R$libdir' + ;; + *) + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + ;; + esac + fi + else + ld_shlibs=no + fi + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + archive_cmds='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' + old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' + ;; + + osf3*) + if test "$GCC" = yes; then + allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test "$GCC" = yes; then + allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + hardcode_libdir_flag_spec='-rpath $libdir' + fi + archive_cmds_need_lc='no' + hardcode_libdir_separator=: + ;; + + solaris*) + no_undefined_flag=' -z defs' + if test "$GCC" = yes; then + wlarc='${wl}' + archive_cmds='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='${wl}' + archive_cmds='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_shlibpath_var=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands `-z linker_flag'. GCC discards it without `$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test "$GCC" = yes; then + whole_archive_flag_spec='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' + else + whole_archive_flag_spec='-z allextract$convenience -z defaultextract' + fi + ;; + esac + link_all_deplibs=yes + ;; + + sunos4*) + if test "x$host_vendor" = xsequent; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + hardcode_libdir_flag_spec='-L$libdir' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + sysv4) + case $host_vendor in + sni) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' + reload_cmds='$CC -r -o $output$reload_objs' + hardcode_direct=no + ;; + motorola) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var=no + ;; + + sysv4.3*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + export_dynamic_flag_spec='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ld_shlibs=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) + no_undefined_flag='${wl}-z,text' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We can NOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + no_undefined_flag='${wl}-z,text' + allow_undefined_flag='${wl}-z,nodefs' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='${wl}-R,$libdir' + hardcode_libdir_separator=':' + link_all_deplibs=yes + export_dynamic_flag_spec='${wl}-Bexport' + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + *) + ld_shlibs=no + ;; + esac + + if test x$host_vendor = xsni; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + export_dynamic_flag_spec='${wl}-Blargedynsym' + ;; + esac + fi + fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5 +$as_echo "$ld_shlibs" >&6; } +test "$ld_shlibs" = no && can_build_shared=no + +with_gnu_ld=$with_gnu_ld + + + + + + + + + + + + + + + +# +# Do we need to explicitly link libc? +# +case "x$archive_cmds_need_lc" in +x|xyes) + # Assume -lc should be added + archive_cmds_need_lc=yes + + if test "$enable_shared" = yes && test "$GCC" = yes; then + case $archive_cmds in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 +$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; } +if ${lt_cv_archive_cmds_need_lc+:} false; then : + $as_echo_n "(cached) " >&6 +else + $RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_prog_compiler_wl + pic_flag=$lt_prog_compiler_pic + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$allow_undefined_flag + allow_undefined_flag= + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 + (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + then + lt_cv_archive_cmds_need_lc=no + else + lt_cv_archive_cmds_need_lc=yes + fi + allow_undefined_flag=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5 +$as_echo "$lt_cv_archive_cmds_need_lc" >&6; } + archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc + ;; + esac + fi + ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 +$as_echo_n "checking dynamic linker characteristics... " >&6; } + +if test "$GCC" = yes; then + case $host_os in + darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; + *) lt_awk_arg="/^libraries:/" ;; + esac + case $host_os in + mingw* | cegcc*) lt_sed_strip_eq="s,=\([A-Za-z]:\),\1,g" ;; + *) lt_sed_strip_eq="s,=/,/,g" ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary. + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path/$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" + else + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS=" "; FS="/|\n";} { + lt_foo=""; + lt_count=0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo="/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[lt_foo]++; } + if (lt_freq[lt_foo] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's,/\([A-Za-z]:\),\1,g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=".so" +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + +case $host_os in +aix3*) + version_type=linux + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='${libname}${release}${shared_ext}$major' + ;; + +aix[4-9]*) + version_type=linux + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test "$host_cpu" = ia64; then + # AIX 5 supports IA64 + library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line `#! .'. This would cause the generated library to + # depend on `.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # AIX (on Power*) has no versioning support, so currently we can not hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + if test "$aix_use_runtimelinking" = yes; then + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + else + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='${libname}${release}.a $libname.a' + soname_spec='${libname}${release}${shared_ext}$major' + fi + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='${libname}${shared_ext}' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[45]*) + version_type=linux + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=".dll" + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api" + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl*) + # Native MSVC + libname_spec='$name' + soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + library_names_spec='${libname}.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec="$LIB" + if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC wrapper + library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' + soname_spec='${libname}${release}${major}$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib" + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd1*) + dynamic_linker=no + ;; + +freebsd* | dragonfly*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[123]*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ + freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +gnu*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + hardcode_into_libs=yes + ;; + +haiku*) + version_type=linux + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=yes + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + if test "X$HPUX_IA64_MODE" = X32; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + fi + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[3-9]*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test "$lt_cv_prog_gnu_ld" = yes; then + version_type=linux + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" + sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +# This must be Linux ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + if ${lt_cv_shlibpath_overrides_runpath+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \ + LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\"" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then : + lt_cv_shlibpath_overrides_runpath=yes +fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + +fi + + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Append ld.so.conf contents to the search path + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd*) + version_type=sunos + sys_lib_dlsearch_path_spec="/usr/lib" + need_lib_prefix=no + # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. + case $host_os in + openbsd3.3 | openbsd3.3.*) need_version=yes ;; + *) need_version=no ;; + esac + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + case $host_os in + openbsd2.[89] | openbsd2.[89].*) + shlibpath_overrides_runpath=no + ;; + *) + shlibpath_overrides_runpath=yes + ;; + esac + else + shlibpath_overrides_runpath=yes + fi + ;; + +os2*) + libname_spec='$name' + shrext_cmds=".dll" + need_lib_prefix=no + library_names_spec='$libname${shared_ext} $libname.a' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=LIBPATH + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test "$with_gnu_ld" = yes; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec ;then + version_type=linux + library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' + soname_spec='$libname${shared_ext}.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=freebsd-elf + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test "$with_gnu_ld" = yes; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 +$as_echo "$dynamic_linker" >&6; } +test "$dynamic_linker" = no && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test "$GCC" = yes; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then + sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" +fi +if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then + sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 +$as_echo_n "checking how to hardcode library paths into programs... " >&6; } +hardcode_action= +if test -n "$hardcode_libdir_flag_spec" || + test -n "$runpath_var" || + test "X$hardcode_automatic" = "Xyes" ; then + + # We can hardcode non-existent directories. + if test "$hardcode_direct" != no && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test "$_LT_TAGVAR(hardcode_shlibpath_var, )" != no && + test "$hardcode_minus_L" != no; then + # Linking always hardcodes the temporary library directory. + hardcode_action=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action=unsupported +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5 +$as_echo "$hardcode_action" >&6; } + +if test "$hardcode_action" = relink || + test "$inherit_rpath" = yes; then + # Fast installation is not supported + enable_fast_install=no +elif test "$shlibpath_overrides_runpath" = yes || + test "$enable_shared" = no; then + # Fast installation is not necessary + enable_fast_install=needless +fi + + + + + + + if test "x$enable_dlopen" != xyes; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen="load_add_on" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | pw32* | cegcc*) + lt_cv_dlopen="LoadLibrary" + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen="dlopen" + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +$as_echo_n "checking for dlopen in -ldl... " >&6; } +if ${ac_cv_lib_dl_dlopen+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (); +int +main () +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dl_dlopen=yes +else + ac_cv_lib_dl_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +$as_echo "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes; then : + lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" +else + + lt_cv_dlopen="dyld" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + +fi + + ;; + + *) + ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load" +if test "x$ac_cv_func_shl_load" = xyes; then : + lt_cv_dlopen="shl_load" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5 +$as_echo_n "checking for shl_load in -ldld... " >&6; } +if ${ac_cv_lib_dld_shl_load+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char shl_load (); +int +main () +{ +return shl_load (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dld_shl_load=yes +else + ac_cv_lib_dld_shl_load=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5 +$as_echo "$ac_cv_lib_dld_shl_load" >&6; } +if test "x$ac_cv_lib_dld_shl_load" = xyes; then : + lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld" +else + ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" +if test "x$ac_cv_func_dlopen" = xyes; then : + lt_cv_dlopen="dlopen" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +$as_echo_n "checking for dlopen in -ldl... " >&6; } +if ${ac_cv_lib_dl_dlopen+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (); +int +main () +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dl_dlopen=yes +else + ac_cv_lib_dl_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +$as_echo "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes; then : + lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5 +$as_echo_n "checking for dlopen in -lsvld... " >&6; } +if ${ac_cv_lib_svld_dlopen+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsvld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (); +int +main () +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_svld_dlopen=yes +else + ac_cv_lib_svld_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5 +$as_echo "$ac_cv_lib_svld_dlopen" >&6; } +if test "x$ac_cv_lib_svld_dlopen" = xyes; then : + lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5 +$as_echo_n "checking for dld_link in -ldld... " >&6; } +if ${ac_cv_lib_dld_dld_link+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dld_link (); +int +main () +{ +return dld_link (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dld_dld_link=yes +else + ac_cv_lib_dld_dld_link=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5 +$as_echo "$ac_cv_lib_dld_dld_link" >&6; } +if test "x$ac_cv_lib_dld_dld_link" = xyes; then : + lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld" +fi + + +fi + + +fi + + +fi + + +fi + + +fi + + ;; + esac + + if test "x$lt_cv_dlopen" != xno; then + enable_dlopen=yes + else + enable_dlopen=no + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS="$CPPFLAGS" + test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS="$LDFLAGS" + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS="$LIBS" + LIBS="$lt_cv_dlopen_libs $LIBS" + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5 +$as_echo_n "checking whether a program can dlopen itself... " >&6; } +if ${lt_cv_dlopen_self+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + lt_cv_dlopen_self=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisbility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self=no + fi +fi +rm -fr conftest* + + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5 +$as_echo "$lt_cv_dlopen_self" >&6; } + + if test "x$lt_cv_dlopen_self" = xyes; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5 +$as_echo_n "checking whether a statically linked program can dlopen itself... " >&6; } +if ${lt_cv_dlopen_self_static+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + lt_cv_dlopen_self_static=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisbility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self_static=no + fi +fi +rm -fr conftest* + + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5 +$as_echo "$lt_cv_dlopen_self_static" >&6; } + fi + + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + LIBS="$save_LIBS" + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi + + + + + + + + + + + + + + + + + +striplib= +old_striplib= +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5 +$as_echo_n "checking whether stripping libraries is possible... " >&6; } +if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" + test -z "$striplib" && striplib="$STRIP --strip-unneeded" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else +# FIXME - insert some real tests, host_os isn't really good enough + case $host_os in + darwin*) + if test -n "$STRIP" ; then + striplib="$STRIP -x" + old_striplib="$STRIP -S" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + fi + ;; + *) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + ;; + esac +fi + + + + + + + + + + + + + # Report which library types will actually be built + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 +$as_echo_n "checking if libtool supports shared libraries... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 +$as_echo "$can_build_shared" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 +$as_echo_n "checking whether to build shared libraries... " >&6; } + test "$can_build_shared" = "no" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[4-9]*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; + esac + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 +$as_echo "$enable_shared" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 +$as_echo_n "checking whether to build static libraries... " >&6; } + # Make sure either enable_shared or enable_static is yes. + test "$enable_shared" = yes || enable_static=yes + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 +$as_echo "$enable_static" >&6; } + + + + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +CC="$lt_save_CC" + + if test -n "$CXX" && ( test "X$CXX" != "Xno" && + ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) || + (test "X$CXX" != "Xg++"))) ; then + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5 +$as_echo_n "checking how to run the C++ preprocessor... " >&6; } +if test -z "$CXXCPP"; then + if ${ac_cv_prog_CXXCPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CXXCPP needs to be expanded + for CXXCPP in "$CXX -E" "/lib/cpp" + do + ac_preproc_ok=false +for ac_cxx_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CXXCPP=$CXXCPP + +fi + CXXCPP=$ac_cv_prog_CXXCPP +else + ac_cv_prog_CXXCPP=$CXXCPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXXCPP" >&5 +$as_echo "$CXXCPP" >&6; } +ac_preproc_ok=false +for ac_cxx_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +else + _lt_caught_CXX_error=yes +fi + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +archive_cmds_need_lc_CXX=no +allow_undefined_flag_CXX= +always_export_symbols_CXX=no +archive_expsym_cmds_CXX= +compiler_needs_object_CXX=no +export_dynamic_flag_spec_CXX= +hardcode_direct_CXX=no +hardcode_direct_absolute_CXX=no +hardcode_libdir_flag_spec_CXX= +hardcode_libdir_flag_spec_ld_CXX= +hardcode_libdir_separator_CXX= +hardcode_minus_L_CXX=no +hardcode_shlibpath_var_CXX=unsupported +hardcode_automatic_CXX=no +inherit_rpath_CXX=no +module_cmds_CXX= +module_expsym_cmds_CXX= +link_all_deplibs_CXX=unknown +old_archive_cmds_CXX=$old_archive_cmds +reload_flag_CXX=$reload_flag +reload_cmds_CXX=$reload_cmds +no_undefined_flag_CXX= +whole_archive_flag_spec_CXX= +enable_shared_with_static_runtimes_CXX=no + +# Source file extension for C++ test sources. +ac_ext=cpp + +# Object file extension for compiled C++ test sources. +objext=o +objext_CXX=$objext + +# No sense in running all these tests if we already determined that +# the CXX compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test "$_lt_caught_CXX_error" != yes; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="int some_variable = 0;" + + # Code to be used in simple link tests + lt_simple_link_test_code='int main(int, char *[]) { return(0); }' + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + + + # save warnings/boilerplate of simple test code + ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* + + ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* + + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_CFLAGS=$CFLAGS + lt_save_LD=$LD + lt_save_GCC=$GCC + GCC=$GXX + lt_save_with_gnu_ld=$with_gnu_ld + lt_save_path_LD=$lt_cv_path_LD + if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then + lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx + else + $as_unset lt_cv_prog_gnu_ld + fi + if test -n "${lt_cv_path_LDCXX+set}"; then + lt_cv_path_LD=$lt_cv_path_LDCXX + else + $as_unset lt_cv_path_LD + fi + test -z "${LDCXX+set}" || LD=$LDCXX + CC=${CXX-"c++"} + CFLAGS=$CXXFLAGS + compiler=$CC + compiler_CXX=$CC + for cc_temp in $compiler""; do + case $cc_temp in + compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; + distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; + \-*) ;; + *) break;; + esac +done +cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` + + + if test -n "$compiler"; then + # We don't want -fno-exception when compiling C++ code, so set the + # no_builtin_flag separately + if test "$GXX" = yes; then + lt_prog_compiler_no_builtin_flag_CXX=' -fno-builtin' + else + lt_prog_compiler_no_builtin_flag_CXX= + fi + + if test "$GXX" = yes; then + # Set up default GNU C++ configuration + + + +# Check whether --with-gnu-ld was given. +if test "${with_gnu_ld+set}" = set; then : + withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes +else + with_gnu_ld=no +fi + +ac_prog=ld +if test "$GCC" = yes; then + # Check if gcc -print-prog-name=ld gives a path. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 +$as_echo_n "checking for ld used by $CC... " >&6; } + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [\\/]* | ?:[\\/]*) + re_direlt='/[^/][^/]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD="$ac_prog" + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test "$with_gnu_ld" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 +$as_echo_n "checking for GNU ld... " >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 +$as_echo_n "checking for non-GNU ld... " >&6; } +fi +if ${lt_cv_path_LD+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$LD"; then + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD="$ac_dir/$ac_prog" + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &5 +$as_echo "$LD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 +$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; } +if ${lt_cv_prog_gnu_ld+:} false; then : + $as_echo_n "(cached) " >&6 +else + # I'd rather use --version here, but apparently some GNU lds only accept -v. +case `$LD -v 2>&1 &5 +$as_echo "$lt_cv_prog_gnu_ld" >&6; } +with_gnu_ld=$lt_cv_prog_gnu_ld + + + + + + + + # Check if GNU C++ uses GNU ld as the underlying linker, since the + # archiving commands below assume that GNU ld is being used. + if test "$with_gnu_ld" = yes; then + archive_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + + hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' + export_dynamic_flag_spec_CXX='${wl}--export-dynamic' + + # If archive_cmds runs LD, not CC, wlarc should be empty + # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to + # investigate it a little bit more. (MM) + wlarc='${wl}' + + # ancient GNU ld didn't support --whole-archive et. al. + if eval "`$CC -print-prog-name=ld` --help 2>&1" | + $GREP 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + whole_archive_flag_spec_CXX= + fi + else + with_gnu_ld=no + wlarc= + + # A generic and very simple default shared library creation + # command for GNU C++ for the case where it uses the native + # linker, instead of GNU ld. If possible, this setting should + # overridden to take advantage of the native linker features on + # the platform it is being used on. + archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + fi + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + + else + GXX=no + with_gnu_ld=no + wlarc= + fi + + # PORTME: fill in a description of your system's C++ link characteristics + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + ld_shlibs_CXX=yes + case $host_os in + aix3*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + aix[4-9]*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) + for ld_flag in $LDFLAGS; do + case $ld_flag in + *-brtl*) + aix_use_runtimelinking=yes + break + ;; + esac + done + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + archive_cmds_CXX='' + hardcode_direct_CXX=yes + hardcode_direct_absolute_CXX=yes + hardcode_libdir_separator_CXX=':' + link_all_deplibs_CXX=yes + file_list_spec_CXX='${wl}-f,' + + if test "$GXX" = yes; then + case $host_os in aix4.[012]|aix4.[012].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + hardcode_direct_CXX=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L_CXX=yes + hardcode_libdir_flag_spec_CXX='-L$libdir' + hardcode_libdir_separator_CXX= + fi + esac + shared_flag='-shared' + if test "$aix_use_runtimelinking" = yes; then + shared_flag="$shared_flag "'${wl}-G' + fi + else + # not using gcc + if test "$host_cpu" = ia64; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi + fi + fi + + export_dynamic_flag_spec_CXX='${wl}-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to + # export. + always_export_symbols_CXX=yes + if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag_CXX='-berok' + # Determine the default libpath from the value encoded in an empty + # executable. + if test "${lt_cv_aix_libpath+set}" = set; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath__CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath__CXX"; then + lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath__CXX"; then + lt_cv_aix_libpath__CXX="/usr/lib:/lib" + fi + +fi + + aix_libpath=$lt_cv_aix_libpath__CXX +fi + + hardcode_libdir_flag_spec_CXX='${wl}-blibpath:$libdir:'"$aix_libpath" + + archive_expsym_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" + else + if test "$host_cpu" = ia64; then + hardcode_libdir_flag_spec_CXX='${wl}-R $libdir:/usr/lib:/lib' + allow_undefined_flag_CXX="-z nodefs" + archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + if test "${lt_cv_aix_libpath+set}" = set; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath__CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath__CXX"; then + lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath__CXX"; then + lt_cv_aix_libpath__CXX="/usr/lib:/lib" + fi + +fi + + aix_libpath=$lt_cv_aix_libpath__CXX +fi + + hardcode_libdir_flag_spec_CXX='${wl}-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + no_undefined_flag_CXX=' ${wl}-bernotok' + allow_undefined_flag_CXX=' ${wl}-berok' + if test "$with_gnu_ld" = yes; then + # We only use this code for GNU lds that support --whole-archive. + whole_archive_flag_spec_CXX='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + whole_archive_flag_spec_CXX='$convenience' + fi + archive_cmds_need_lc_CXX=yes + # This is similar to how AIX traditionally builds its shared + # libraries. + archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' + fi + fi + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag_CXX=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds_CXX='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + else + ld_shlibs_CXX=no + fi + ;; + + chorus*) + case $cc_basename in + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + cygwin* | mingw* | pw32* | cegcc*) + case $GXX,$cc_basename in + ,cl* | no,cl*) + # Native MSVC + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + hardcode_libdir_flag_spec_CXX=' ' + allow_undefined_flag_CXX=unsupported + always_export_symbols_CXX=yes + file_list_spec_CXX='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + archive_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' + archive_expsym_cmds_CXX='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; + else + $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, CXX)='true' + enable_shared_with_static_runtimes_CXX=yes + # Don't use ranlib + old_postinstall_cmds_CXX='chmod 644 $oldlib' + postlink_cmds_CXX='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile="$lt_outputfile.exe" + lt_tool_outputfile="$lt_tool_outputfile.exe" + ;; + esac~ + func_to_tool_file "$lt_outputfile"~ + if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # g++ + # _LT_TAGVAR(hardcode_libdir_flag_spec, CXX) is actually meaningless, + # as there is no search path for DLLs. + hardcode_libdir_flag_spec_CXX='-L$libdir' + export_dynamic_flag_spec_CXX='${wl}--export-all-symbols' + allow_undefined_flag_CXX=unsupported + always_export_symbols_CXX=no + enable_shared_with_static_runtimes_CXX=yes + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file (1st line + # is EXPORTS), use it as is; otherwise, prepend... + archive_expsym_cmds_CXX='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + ld_shlibs_CXX=no + fi + ;; + esac + ;; + darwin* | rhapsody*) + + + archive_cmds_need_lc_CXX=no + hardcode_direct_CXX=no + hardcode_automatic_CXX=yes + hardcode_shlibpath_var_CXX=unsupported + if test "$lt_cv_ld_force_load" = "yes"; then + whole_archive_flag_spec_CXX='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + else + whole_archive_flag_spec_CXX='' + fi + link_all_deplibs_CXX=yes + allow_undefined_flag_CXX="$_lt_dar_allow_undefined" + case $cc_basename in + ifort*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test "$_lt_dar_can_shared" = "yes"; then + output_verbose_link_cmd=func_echo_all + archive_cmds_CXX="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" + module_cmds_CXX="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" + archive_expsym_cmds_CXX="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" + module_expsym_cmds_CXX="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" + if test "$lt_cv_apple_cc_single_mod" != "yes"; then + archive_cmds_CXX="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}" + archive_expsym_cmds_CXX="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}" + fi + + else + ld_shlibs_CXX=no + fi + + ;; + + dgux*) + case $cc_basename in + ec++*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + ghcx*) + # Green Hills C++ Compiler + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + freebsd[12]*) + # C++ shared libraries reported to be fairly broken before + # switch to ELF + ld_shlibs_CXX=no + ;; + + freebsd-elf*) + archive_cmds_need_lc_CXX=no + ;; + + freebsd* | dragonfly*) + # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF + # conventions + ld_shlibs_CXX=yes + ;; + + gnu*) + ;; + + haiku*) + archive_cmds_CXX='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + link_all_deplibs_CXX=yes + ;; + + hpux9*) + hardcode_libdir_flag_spec_CXX='${wl}+b ${wl}$libdir' + hardcode_libdir_separator_CXX=: + export_dynamic_flag_spec_CXX='${wl}-E' + hardcode_direct_CXX=yes + hardcode_minus_L_CXX=yes # Not in the search PATH, + # but as the default + # location of the library. + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + aCC*) + archive_cmds_CXX='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test "$GXX" = yes; then + archive_cmds_CXX='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + else + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + fi + ;; + esac + ;; + + hpux10*|hpux11*) + if test $with_gnu_ld = no; then + hardcode_libdir_flag_spec_CXX='${wl}+b ${wl}$libdir' + hardcode_libdir_separator_CXX=: + + case $host_cpu in + hppa*64*|ia64*) + ;; + *) + export_dynamic_flag_spec_CXX='${wl}-E' + ;; + esac + fi + case $host_cpu in + hppa*64*|ia64*) + hardcode_direct_CXX=no + hardcode_shlibpath_var_CXX=no + ;; + *) + hardcode_direct_CXX=yes + hardcode_direct_absolute_CXX=yes + hardcode_minus_L_CXX=yes # Not in the search PATH, + # but as the default + # location of the library. + ;; + esac + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + aCC*) + case $host_cpu in + hppa*64*) + archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test "$GXX" = yes; then + if test $with_gnu_ld = no; then + case $host_cpu in + hppa*64*) + archive_cmds_CXX='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + archive_cmds_CXX='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + archive_cmds_CXX='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + fi + else + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + fi + ;; + esac + ;; + + interix[3-9]*) + hardcode_direct_CXX=no + hardcode_shlibpath_var_CXX=no + hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' + export_dynamic_flag_spec_CXX='${wl}-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + archive_cmds_CXX='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds_CXX='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + irix5* | irix6*) + case $cc_basename in + CC*) + # SGI C++ + archive_cmds_CXX='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + + # Archives containing C++ object files must be created using + # "CC -ar", where "CC" is the IRIX C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + old_archive_cmds_CXX='$CC -ar -WR,-u -o $oldlib $oldobjs' + ;; + *) + if test "$GXX" = yes; then + if test "$with_gnu_ld" = no; then + archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib' + fi + fi + link_all_deplibs_CXX=yes + ;; + esac + hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator_CXX=: + inherit_rpath_CXX=yes + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + archive_expsym_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + + hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' + export_dynamic_flag_spec_CXX='${wl}--export-dynamic' + + # Archives containing C++ object files must be created using + # "CC -Bstatic", where "CC" is the KAI C++ compiler. + old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' + ;; + icpc* | ecpc* ) + # Intel C++ + with_gnu_ld=yes + # version 8.0 and above of icpc choke on multiply defined symbols + # if we add $predep_objects and $postdep_objects, however 7.1 and + # earlier do not add the objects themselves. + case `$CC -V 2>&1` in + *"Version 7."*) + archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 8.0 or newer + tmp_idyn= + case $host_cpu in + ia64*) tmp_idyn=' -i_dynamic';; + esac + archive_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + archive_cmds_need_lc_CXX=no + hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' + export_dynamic_flag_spec_CXX='${wl}--export-dynamic' + whole_archive_flag_spec_CXX='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + case `$CC -V` in + *pgCC\ [1-5].* | *pgcpp\ [1-5].*) + prelink_cmds_CXX='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ + compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' + old_archive_cmds_CXX='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ + $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ + $RANLIB $oldlib' + archive_cmds_CXX='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' + archive_expsym_cmds_CXX='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' + ;; + *) # Version 6 and above use weak symbols + archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' + archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' + ;; + esac + + hardcode_libdir_flag_spec_CXX='${wl}--rpath ${wl}$libdir' + export_dynamic_flag_spec_CXX='${wl}--export-dynamic' + whole_archive_flag_spec_CXX='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + ;; + cxx*) + # Compaq C++ + archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib ${wl}-retain-symbols-file $wl$export_symbols' + + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec_CXX='-rpath $libdir' + hardcode_libdir_separator_CXX=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' + ;; + xl* | mpixl* | bgxl*) + # IBM XL 8.0 on PPC, with GNU ld + hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' + export_dynamic_flag_spec_CXX='${wl}--export-dynamic' + archive_cmds_CXX='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + if test "x$supports_anon_versioning" = xyes; then + archive_expsym_cmds_CXX='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' + fi + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + no_undefined_flag_CXX=' -zdefs' + archive_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + archive_expsym_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols' + hardcode_libdir_flag_spec_CXX='-R$libdir' + whole_archive_flag_spec_CXX='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + compiler_needs_object_CXX=yes + + # Not sure whether something based on + # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 + # would be better. + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs' + ;; + esac + ;; + esac + ;; + + lynxos*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + + m88k*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + + mvs*) + case $cc_basename in + cxx*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds_CXX='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' + wlarc= + hardcode_libdir_flag_spec_CXX='-R$libdir' + hardcode_direct_CXX=yes + hardcode_shlibpath_var_CXX=no + fi + # Workaround some broken pre-1.5 toolchains + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' + ;; + + *nto* | *qnx*) + ld_shlibs_CXX=yes + ;; + + openbsd2*) + # C++ shared libraries are fairly broken + ld_shlibs_CXX=no + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + hardcode_direct_CXX=yes + hardcode_shlibpath_var_CXX=no + hardcode_direct_absolute_CXX=yes + archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib' + export_dynamic_flag_spec_CXX='${wl}-E' + whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + fi + output_verbose_link_cmd=func_echo_all + else + ld_shlibs_CXX=no + fi + ;; + + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + + hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' + hardcode_libdir_separator_CXX=: + + # Archives containing C++ object files must be created using + # the KAI C++ compiler. + case $host in + osf3*) old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' ;; + *) old_archive_cmds_CXX='$CC -o $oldlib $oldobjs' ;; + esac + ;; + RCC*) + # Rational C++ 2.4.1 + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + cxx*) + case $host in + osf3*) + allow_undefined_flag_CXX=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds_CXX='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' + ;; + *) + allow_undefined_flag_CXX=' -expect_unresolved \*' + archive_cmds_CXX='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + archive_expsym_cmds_CXX='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ + echo "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~ + $RM $lib.exp' + hardcode_libdir_flag_spec_CXX='-rpath $libdir' + ;; + esac + + hardcode_libdir_separator_CXX=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test "$GXX" = yes && test "$with_gnu_ld" = no; then + allow_undefined_flag_CXX=' ${wl}-expect_unresolved ${wl}\*' + case $host in + osf3*) + archive_cmds_CXX='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + ;; + *) + archive_cmds_CXX='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + ;; + esac + + hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator_CXX=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + + else + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + fi + ;; + esac + ;; + + psos*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + lcc*) + # Lucid + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + archive_cmds_need_lc_CXX=yes + no_undefined_flag_CXX=' -zdefs' + archive_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + hardcode_libdir_flag_spec_CXX='-R$libdir' + hardcode_shlibpath_var_CXX=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands `-z linker_flag'. + # Supported since Solaris 2.6 (maybe 2.5.1?) + whole_archive_flag_spec_CXX='-z allextract$convenience -z defaultextract' + ;; + esac + link_all_deplibs_CXX=yes + + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs' + ;; + gcx*) + # Green Hills C++ Compiler + archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' + + # The C++ compiler must be used to create the archive. + old_archive_cmds_CXX='$CC $LDFLAGS -archive -o $oldlib $oldobjs' + ;; + *) + # GNU C++ compiler with Solaris linker + if test "$GXX" = yes && test "$with_gnu_ld" = no; then + no_undefined_flag_CXX=' ${wl}-z ${wl}defs' + if $CC --version | $GREP -v '^2\.7' > /dev/null; then + archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' + archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + else + # g++ 2.7 appears to require `-G' NOT `-shared' on this + # platform. + archive_cmds_CXX='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' + archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + fi + + hardcode_libdir_flag_spec_CXX='${wl}-R $wl$libdir' + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + whole_archive_flag_spec_CXX='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' + ;; + esac + fi + ;; + esac + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) + no_undefined_flag_CXX='${wl}-z,text' + archive_cmds_need_lc_CXX=no + hardcode_shlibpath_var_CXX=no + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + archive_cmds_CXX='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_CXX='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds_CXX='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_CXX='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We can NOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + no_undefined_flag_CXX='${wl}-z,text' + allow_undefined_flag_CXX='${wl}-z,nodefs' + archive_cmds_need_lc_CXX=no + hardcode_shlibpath_var_CXX=no + hardcode_libdir_flag_spec_CXX='${wl}-R,$libdir' + hardcode_libdir_separator_CXX=':' + link_all_deplibs_CXX=yes + export_dynamic_flag_spec_CXX='${wl}-Bexport' + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + archive_cmds_CXX='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_CXX='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + old_archive_cmds_CXX='$CC -Tprelink_objects $oldobjs~ + '"$old_archive_cmds_CXX" + reload_cmds_CXX='$CC -Tprelink_objects $reload_objs~ + '"$reload_cmds_CXX" + ;; + *) + archive_cmds_CXX='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_CXX='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + vxworks*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5 +$as_echo "$ld_shlibs_CXX" >&6; } + test "$ld_shlibs_CXX" = no && can_build_shared=no + + GCC_CXX="$GXX" + LD_CXX="$LD" + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + # Dependencies to place before and after the object being linked: +predep_objects_CXX= +postdep_objects_CXX= +predeps_CXX= +postdeps_CXX= +compiler_lib_search_path_CXX= + +cat > conftest.$ac_ext <<_LT_EOF +class Foo +{ +public: + Foo (void) { a = 0; } +private: + int a; +}; +_LT_EOF + + +_lt_libdeps_save_CFLAGS=$CFLAGS +case "$CC $CFLAGS " in #( +*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; +*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; +esac + +if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + # Parse the compiler output and extract the necessary + # objects, libraries and library flags. + + # Sentinel used to keep track of whether or not we are before + # the conftest object file. + pre_test_object_deps_done=no + + for p in `eval "$output_verbose_link_cmd"`; do + case ${prev}${p} in + + -L* | -R* | -l*) + # Some compilers place space between "-{L,R}" and the path. + # Remove the space. + if test $p = "-L" || + test $p = "-R"; then + prev=$p + continue + fi + + # Expand the sysroot to ease extracting the directories later. + if test -z "$prev"; then + case $p in + -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; + -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; + -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; + esac + fi + case $p in + =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; + esac + if test "$pre_test_object_deps_done" = no; then + case ${prev} in + -L | -R) + # Internal compiler library paths should come after those + # provided the user. The postdeps already come after the + # user supplied libs so there is no need to process them. + if test -z "$compiler_lib_search_path_CXX"; then + compiler_lib_search_path_CXX="${prev}${p}" + else + compiler_lib_search_path_CXX="${compiler_lib_search_path_CXX} ${prev}${p}" + fi + ;; + # The "-l" case would never come before the object being + # linked, so don't bother handling this case. + esac + else + if test -z "$postdeps_CXX"; then + postdeps_CXX="${prev}${p}" + else + postdeps_CXX="${postdeps_CXX} ${prev}${p}" + fi + fi + prev= + ;; + + *.lto.$objext) ;; # Ignore GCC LTO objects + *.$objext) + # This assumes that the test object file only shows up + # once in the compiler output. + if test "$p" = "conftest.$objext"; then + pre_test_object_deps_done=yes + continue + fi + + if test "$pre_test_object_deps_done" = no; then + if test -z "$predep_objects_CXX"; then + predep_objects_CXX="$p" + else + predep_objects_CXX="$predep_objects_CXX $p" + fi + else + if test -z "$postdep_objects_CXX"; then + postdep_objects_CXX="$p" + else + postdep_objects_CXX="$postdep_objects_CXX $p" + fi + fi + ;; + + *) ;; # Ignore the rest. + + esac + done + + # Clean up. + rm -f a.out a.exe +else + echo "libtool.m4: error: problem compiling CXX test program" +fi + +$RM -f confest.$objext +CFLAGS=$_lt_libdeps_save_CFLAGS + +# PORTME: override above test on systems where it is broken +case $host_os in +interix[3-9]*) + # Interix 3.5 installs completely hosed .la files for C++, so rather than + # hack all around it, let's just trust "g++" to DTRT. + predep_objects_CXX= + postdep_objects_CXX= + postdeps_CXX= + ;; + +linux*) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + + # The more standards-conforming stlport4 library is + # incompatible with the Cstd library. Avoid specifying + # it if it's in CXXFLAGS. Ignore libCrun as + # -library=stlport4 depends on it. + case " $CXX $CXXFLAGS " in + *" -library=stlport4 "*) + solaris_use_stlport4=yes + ;; + esac + + if test "$solaris_use_stlport4" != yes; then + postdeps_CXX='-library=Cstd -library=Crun' + fi + ;; + esac + ;; + +solaris*) + case $cc_basename in + CC* | sunCC*) + # The more standards-conforming stlport4 library is + # incompatible with the Cstd library. Avoid specifying + # it if it's in CXXFLAGS. Ignore libCrun as + # -library=stlport4 depends on it. + case " $CXX $CXXFLAGS " in + *" -library=stlport4 "*) + solaris_use_stlport4=yes + ;; + esac + + # Adding this requires a known-good setup of shared libraries for + # Sun compiler versions before 5.6, else PIC objects from an old + # archive will be linked into the output, leading to subtle bugs. + if test "$solaris_use_stlport4" != yes; then + postdeps_CXX='-library=Cstd -library=Crun' + fi + ;; + esac + ;; +esac + + +case " $postdeps_CXX " in +*" -lc "*) archive_cmds_need_lc_CXX=no ;; +esac + compiler_lib_search_dirs_CXX= +if test -n "${compiler_lib_search_path_CXX}"; then + compiler_lib_search_dirs_CXX=`echo " ${compiler_lib_search_path_CXX}" | ${SED} -e 's! -L! !g' -e 's!^ !!'` +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lt_prog_compiler_wl_CXX= +lt_prog_compiler_pic_CXX= +lt_prog_compiler_static_CXX= + + + # C++ specific cases for pic, static, wl, etc. + if test "$GXX" = yes; then + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_static_CXX='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static_CXX='-Bstatic' + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + lt_prog_compiler_pic_CXX='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + lt_prog_compiler_pic_CXX='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + lt_prog_compiler_pic_CXX='-DDLL_EXPORT' + ;; + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic_CXX='-fno-common' + ;; + *djgpp*) + # DJGPP does not support shared libraries at all + lt_prog_compiler_pic_CXX= + ;; + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + lt_prog_compiler_static_CXX= + ;; + interix[3-9]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic_CXX=-Kconform_pic + fi + ;; + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + ;; + *) + lt_prog_compiler_pic_CXX='-fPIC' + ;; + esac + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic_CXX='-fPIC -shared' + ;; + *) + lt_prog_compiler_pic_CXX='-fPIC' + ;; + esac + else + case $host_os in + aix[4-9]*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static_CXX='-Bstatic' + else + lt_prog_compiler_static_CXX='-bnso -bI:/lib/syscalls.exp' + fi + ;; + chorus*) + case $cc_basename in + cxch68*) + # Green Hills C++ Compiler + # _LT_TAGVAR(lt_prog_compiler_static, CXX)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" + ;; + esac + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_prog_compiler_pic_CXX='-DDLL_EXPORT' + ;; + dgux*) + case $cc_basename in + ec++*) + lt_prog_compiler_pic_CXX='-KPIC' + ;; + ghcx*) + # Green Hills C++ Compiler + lt_prog_compiler_pic_CXX='-pic' + ;; + *) + ;; + esac + ;; + freebsd* | dragonfly*) + # FreeBSD uses GNU C++ + ;; + hpux9* | hpux10* | hpux11*) + case $cc_basename in + CC*) + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_static_CXX='${wl}-a ${wl}archive' + if test "$host_cpu" != ia64; then + lt_prog_compiler_pic_CXX='+Z' + fi + ;; + aCC*) + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_static_CXX='${wl}-a ${wl}archive' + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic_CXX='+Z' + ;; + esac + ;; + *) + ;; + esac + ;; + interix*) + # This is c89, which is MS Visual C++ (no shared libs) + # Anyone wants to do a port? + ;; + irix5* | irix6* | nonstopux*) + case $cc_basename in + CC*) + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_static_CXX='-non_shared' + # CC pic flag -KPIC is the default. + ;; + *) + ;; + esac + ;; + linux* | k*bsd*-gnu | kopensolaris*-gnu) + case $cc_basename in + KCC*) + # KAI C++ Compiler + lt_prog_compiler_wl_CXX='--backend -Wl,' + lt_prog_compiler_pic_CXX='-fPIC' + ;; + ecpc* ) + # old Intel C++ for x86_64 which still supported -KPIC. + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-KPIC' + lt_prog_compiler_static_CXX='-static' + ;; + icpc* ) + # Intel C++, used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-fPIC' + lt_prog_compiler_static_CXX='-static' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-fpic' + lt_prog_compiler_static_CXX='-Bstatic' + ;; + cxx*) + # Compaq C++ + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + lt_prog_compiler_pic_CXX= + lt_prog_compiler_static_CXX='-non_shared' + ;; + xlc* | xlC* | bgxl[cC]* | mpixl[cC]*) + # IBM XL 8.0, 9.0 on PPC and BlueGene + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-qpic' + lt_prog_compiler_static_CXX='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + lt_prog_compiler_pic_CXX='-KPIC' + lt_prog_compiler_static_CXX='-Bstatic' + lt_prog_compiler_wl_CXX='-Qoption ld ' + ;; + esac + ;; + esac + ;; + lynxos*) + ;; + m88k*) + ;; + mvs*) + case $cc_basename in + cxx*) + lt_prog_compiler_pic_CXX='-W c,exportall' + ;; + *) + ;; + esac + ;; + netbsd*) + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic_CXX='-fPIC -shared' + ;; + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + lt_prog_compiler_wl_CXX='--backend -Wl,' + ;; + RCC*) + # Rational C++ 2.4.1 + lt_prog_compiler_pic_CXX='-pic' + ;; + cxx*) + # Digital/Compaq C++ + lt_prog_compiler_wl_CXX='-Wl,' + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + lt_prog_compiler_pic_CXX= + lt_prog_compiler_static_CXX='-non_shared' + ;; + *) + ;; + esac + ;; + psos*) + ;; + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + lt_prog_compiler_pic_CXX='-KPIC' + lt_prog_compiler_static_CXX='-Bstatic' + lt_prog_compiler_wl_CXX='-Qoption ld ' + ;; + gcx*) + # Green Hills C++ Compiler + lt_prog_compiler_pic_CXX='-PIC' + ;; + *) + ;; + esac + ;; + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + lt_prog_compiler_pic_CXX='-pic' + lt_prog_compiler_static_CXX='-Bstatic' + ;; + lcc*) + # Lucid + lt_prog_compiler_pic_CXX='-pic' + ;; + *) + ;; + esac + ;; + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + case $cc_basename in + CC*) + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-KPIC' + lt_prog_compiler_static_CXX='-Bstatic' + ;; + esac + ;; + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + lt_prog_compiler_pic_CXX='-KPIC' + ;; + *) + ;; + esac + ;; + vxworks*) + ;; + *) + lt_prog_compiler_can_build_shared_CXX=no + ;; + esac + fi + +case $host_os in + # For platforms which do not support PIC, -DPIC is meaningless: + *djgpp*) + lt_prog_compiler_pic_CXX= + ;; + *) + lt_prog_compiler_pic_CXX="$lt_prog_compiler_pic_CXX -DPIC" + ;; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 +$as_echo_n "checking for $compiler option to produce PIC... " >&6; } +if ${lt_cv_prog_compiler_pic_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic_CXX=$lt_prog_compiler_pic_CXX +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_CXX" >&5 +$as_echo "$lt_cv_prog_compiler_pic_CXX" >&6; } +lt_prog_compiler_pic_CXX=$lt_cv_prog_compiler_pic_CXX + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$lt_prog_compiler_pic_CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works" >&5 +$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works... " >&6; } +if ${lt_cv_prog_compiler_pic_works_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic_works_CXX=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$lt_prog_compiler_pic_CXX -DPIC" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_pic_works_CXX=yes + fi + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_CXX" >&5 +$as_echo "$lt_cv_prog_compiler_pic_works_CXX" >&6; } + +if test x"$lt_cv_prog_compiler_pic_works_CXX" = xyes; then + case $lt_prog_compiler_pic_CXX in + "" | " "*) ;; + *) lt_prog_compiler_pic_CXX=" $lt_prog_compiler_pic_CXX" ;; + esac +else + lt_prog_compiler_pic_CXX= + lt_prog_compiler_can_build_shared_CXX=no +fi + +fi + + + + + +# +# Check to make sure the static flag actually works. +# +wl=$lt_prog_compiler_wl_CXX eval lt_tmp_static_flag=\"$lt_prog_compiler_static_CXX\" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 +$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } +if ${lt_cv_prog_compiler_static_works_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_static_works_CXX=no + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS $lt_tmp_static_flag" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_static_works_CXX=yes + fi + else + lt_cv_prog_compiler_static_works_CXX=yes + fi + fi + $RM -r conftest* + LDFLAGS="$save_LDFLAGS" + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_CXX" >&5 +$as_echo "$lt_cv_prog_compiler_static_works_CXX" >&6; } + +if test x"$lt_cv_prog_compiler_static_works_CXX" = xyes; then + : +else + lt_prog_compiler_static_CXX= +fi + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o_CXX=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o_CXX=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5 +$as_echo "$lt_cv_prog_compiler_c_o_CXX" >&6; } + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o_CXX=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o_CXX=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5 +$as_echo "$lt_cv_prog_compiler_c_o_CXX" >&6; } + + + + +hard_links="nottested" +if test "$lt_cv_prog_compiler_c_o_CXX" = no && test "$need_locks" != no; then + # do not overwrite the value of need_locks provided by the user + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 +$as_echo_n "checking if we can lock with hard links... " >&6; } + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 +$as_echo "$hard_links" >&6; } + if test "$hard_links" = no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5 +$as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;} + need_locks=warn + fi +else + need_locks=no +fi + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + + export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms_CXX='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' + case $host_os in + aix[4-9]*) + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to AIX nm, but means don't demangle with GNU nm + # Also, AIX nm treats weak defined symbols like other global defined + # symbols, whereas GNU nm marks them as "W". + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + export_symbols_cmds_CXX='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + else + export_symbols_cmds_CXX='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + fi + ;; + pw32*) + export_symbols_cmds_CXX="$ltdll_cmds" + ;; + cygwin* | mingw* | cegcc*) + case $cc_basename in + cl*) ;; + *) + export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms_CXX='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' + ;; + esac + ;; + *) + export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + ;; + esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5 +$as_echo "$ld_shlibs_CXX" >&6; } +test "$ld_shlibs_CXX" = no && can_build_shared=no + +with_gnu_ld_CXX=$with_gnu_ld + + + + + + +# +# Do we need to explicitly link libc? +# +case "x$archive_cmds_need_lc_CXX" in +x|xyes) + # Assume -lc should be added + archive_cmds_need_lc_CXX=yes + + if test "$enable_shared" = yes && test "$GCC" = yes; then + case $archive_cmds_CXX in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 +$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; } +if ${lt_cv_archive_cmds_need_lc_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + $RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_prog_compiler_wl_CXX + pic_flag=$lt_prog_compiler_pic_CXX + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$allow_undefined_flag_CXX + allow_undefined_flag_CXX= + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 + (eval $archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + then + lt_cv_archive_cmds_need_lc_CXX=no + else + lt_cv_archive_cmds_need_lc_CXX=yes + fi + allow_undefined_flag_CXX=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_CXX" >&5 +$as_echo "$lt_cv_archive_cmds_need_lc_CXX" >&6; } + archive_cmds_need_lc_CXX=$lt_cv_archive_cmds_need_lc_CXX + ;; + esac + fi + ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 +$as_echo_n "checking dynamic linker characteristics... " >&6; } + +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=".so" +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + +case $host_os in +aix3*) + version_type=linux + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='${libname}${release}${shared_ext}$major' + ;; + +aix[4-9]*) + version_type=linux + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test "$host_cpu" = ia64; then + # AIX 5 supports IA64 + library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line `#! .'. This would cause the generated library to + # depend on `.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # AIX (on Power*) has no versioning support, so currently we can not hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + if test "$aix_use_runtimelinking" = yes; then + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + else + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='${libname}${release}.a $libname.a' + soname_spec='${libname}${release}${shared_ext}$major' + fi + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='${libname}${shared_ext}' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[45]*) + version_type=linux + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=".dll" + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl*) + # Native MSVC + libname_spec='$name' + soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + library_names_spec='${libname}.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec="$LIB" + if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC wrapper + library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' + soname_spec='${libname}${release}${major}$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' + + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd1*) + dynamic_linker=no + ;; + +freebsd* | dragonfly*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[123]*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ + freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +gnu*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + hardcode_into_libs=yes + ;; + +haiku*) + version_type=linux + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=yes + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + if test "X$HPUX_IA64_MODE" = X32; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + fi + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[3-9]*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test "$lt_cv_prog_gnu_ld" = yes; then + version_type=linux + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" + sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +# This must be Linux ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + if ${lt_cv_shlibpath_overrides_runpath+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_CXX\"; \ + LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_CXX\"" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then : + lt_cv_shlibpath_overrides_runpath=yes +fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + +fi + + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Append ld.so.conf contents to the search path + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd*) + version_type=sunos + sys_lib_dlsearch_path_spec="/usr/lib" + need_lib_prefix=no + # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. + case $host_os in + openbsd3.3 | openbsd3.3.*) need_version=yes ;; + *) need_version=no ;; + esac + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + case $host_os in + openbsd2.[89] | openbsd2.[89].*) + shlibpath_overrides_runpath=no + ;; + *) + shlibpath_overrides_runpath=yes + ;; + esac + else + shlibpath_overrides_runpath=yes + fi + ;; + +os2*) + libname_spec='$name' + shrext_cmds=".dll" + need_lib_prefix=no + library_names_spec='$libname${shared_ext} $libname.a' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=LIBPATH + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test "$with_gnu_ld" = yes; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec ;then + version_type=linux + library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' + soname_spec='$libname${shared_ext}.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=freebsd-elf + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test "$with_gnu_ld" = yes; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 +$as_echo "$dynamic_linker" >&6; } +test "$dynamic_linker" = no && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test "$GCC" = yes; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then + sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" +fi +if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then + sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 +$as_echo_n "checking how to hardcode library paths into programs... " >&6; } +hardcode_action_CXX= +if test -n "$hardcode_libdir_flag_spec_CXX" || + test -n "$runpath_var_CXX" || + test "X$hardcode_automatic_CXX" = "Xyes" ; then + + # We can hardcode non-existent directories. + if test "$hardcode_direct_CXX" != no && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test "$_LT_TAGVAR(hardcode_shlibpath_var, CXX)" != no && + test "$hardcode_minus_L_CXX" != no; then + # Linking always hardcodes the temporary library directory. + hardcode_action_CXX=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action_CXX=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action_CXX=unsupported +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_CXX" >&5 +$as_echo "$hardcode_action_CXX" >&6; } + +if test "$hardcode_action_CXX" = relink || + test "$inherit_rpath_CXX" = yes; then + # Fast installation is not supported + enable_fast_install=no +elif test "$shlibpath_overrides_runpath" = yes || + test "$enable_shared" = no; then + # Fast installation is not necessary + enable_fast_install=needless +fi + + + + + + + + fi # test -n "$compiler" + + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS + LDCXX=$LD + LD=$lt_save_LD + GCC=$lt_save_GCC + with_gnu_ld=$lt_save_with_gnu_ld + lt_cv_path_LDCXX=$lt_cv_path_LD + lt_cv_path_LD=$lt_save_path_LD + lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld + lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld +fi # test "$_lt_caught_CXX_error" != yes + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + + + + + + + + ac_config_commands="$ac_config_commands libtool" + + + + +# Only expand once: + + + +SOFLAGS="-rpath \$(libdir)" + +# Set SOSUFFIX and friends + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking SOSUFFIX from libtool" >&5 +$as_echo_n "checking SOSUFFIX from libtool... " >&6; } + module=no + + versuffix="" + release="" + libname=libfoo + eval _SOSUFFIX=\"$shrext_cmds\" + if test "$_SOSUFFIX" = "" ; then + _SOSUFFIX=".so" + if test "$enable_shared" != "yes"; then + if test "$_SOSUFFIX_MESSAGE" = ""; then + _SOSUFFIX_MESSAGE=yes + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: libtool may not know about this architecture." >&5 +$as_echo "$as_me: WARNING: libtool may not know about this architecture." >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: assuming $_SOSUFFIX suffix for dynamic libraries." >&5 +$as_echo "$as_me: WARNING: assuming $_SOSUFFIX suffix for dynamic libraries." >&2;} + fi + fi + fi + + SOSUFFIX=$_SOSUFFIX + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $SOSUFFIX" >&5 +$as_echo "$SOSUFFIX" >&6; } + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking MODSUFFIX from libtool" >&5 +$as_echo_n "checking MODSUFFIX from libtool... " >&6; } + module=yes + + versuffix="" + release="" + libname=libfoo + eval _SOSUFFIX=\"$shrext_cmds\" + if test "$_SOSUFFIX" = "" ; then + _SOSUFFIX=".so" + if test "$enable_shared" != "yes"; then + if test "$_SOSUFFIX_MESSAGE" = ""; then + _SOSUFFIX_MESSAGE=yes + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: libtool may not know about this architecture." >&5 +$as_echo "$as_me: WARNING: libtool may not know about this architecture." >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: assuming $_SOSUFFIX suffix for dynamic libraries." >&5 +$as_echo "$as_me: WARNING: assuming $_SOSUFFIX suffix for dynamic libraries." >&2;} + fi + fi + fi + + MODSUFFIX=$_SOSUFFIX + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MODSUFFIX" >&5 +$as_echo "$MODSUFFIX" >&6; } + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking JMODSUFFIX from libtool" >&5 +$as_echo_n "checking JMODSUFFIX from libtool... " >&6; } + module=yes + + versuffix="" + release="" + libname=libfoo + eval _SOSUFFIX=\"$shrext_cmds\" + if test "$_SOSUFFIX" = "" ; then + _SOSUFFIX=".so" + if test "$enable_shared" != "yes"; then + if test "$_SOSUFFIX_MESSAGE" = ""; then + _SOSUFFIX_MESSAGE=yes + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: libtool may not know about this architecture." >&5 +$as_echo "$as_me: WARNING: libtool may not know about this architecture." >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: assuming $_SOSUFFIX suffix for dynamic libraries." >&5 +$as_echo "$as_me: WARNING: assuming $_SOSUFFIX suffix for dynamic libraries." >&2;} + fi + fi + fi + + if test `uname` = "Darwin"; then + JMODSUFFIX=".jnilib" + else + JMODSUFFIX=$_SOSUFFIX + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JMODSUFFIX" >&5 +$as_echo "$JMODSUFFIX" >&6; } + + + +LIBTOOL="./libtool" + +INSTALLER="\$(LIBTOOL) --mode=install cp -p" + +MAKEFILE_CC="\$(LIBTOOL) --mode=compile ${MAKEFILE_CC}" +MAKEFILE_SOLINK="\$(LIBTOOL) --mode=link ${MAKEFILE_CCLINK} -avoid-version" +MAKEFILE_CCLINK="\$(LIBTOOL) --mode=link ${MAKEFILE_CCLINK}" +MAKEFILE_CXX="\$(LIBTOOL) --mode=compile ${MAKEFILE_CXX}" +MAKEFILE_XSOLINK="\$(LIBTOOL) --mode=link ${MAKEFILE_CXXLINK} -avoid-version" +MAKEFILE_CXXLINK="\$(LIBTOOL) --mode=link ${MAKEFILE_CXXLINK}" + + +case "$host_os" in +cygwin* | mingw*) + MAKEFILE_SOLINK="$MAKEFILE_SOLINK -no-undefined" + MAKEFILE_XSOLINK="$MAKEFILE_XSOLINK -no-undefined";; +esac + +case "$host_os" in + darwin*) + LIBTSO_MODULE="" + LIBTSO_MODSUFFIX=".dylib";; + *qnx*) + LIBTSO_MODULE="" + LIBTSO_MODSUFFIX=$MODSUFFIX;; + *) + LIBTSO_MODULE="-module" + LIBTSO_MODSUFFIX=$MODSUFFIX;; +esac + +if test "$enable_static" = "yes"; then + test "$AR" = "false" && as_fn_error $? "No ar utility found." "$LINENO" 5 +fi + +# C API. +if test "$enable_shared" = "no"; then + DEFAULT_LIB="\$(libdb_version)" + POSTLINK=": " + o=".o" +else + DEFAULT_LIB="\$(libso_target)" + POSTLINK="\$(LIBTOOL) --mode=execute true" + o=".lo" +fi +INSTALL_LIBS="$DEFAULT_LIB" +if test "$enable_static" = "yes"; then + INSTALL_LIBS="$INSTALL_LIBS \$(libdb)" +fi + +# Optional C++ API. +if test "$db_cv_cxx" = "yes"; then + if test "$enable_shared" = "no"; then + DEFAULT_LIB_CXX="\$(libcxx_version)" + fi + if test "$enable_shared" = "yes"; then + DEFAULT_LIB_CXX="\$(libxso_target)" + fi + INSTALL_LIBS="$INSTALL_LIBS $DEFAULT_LIB_CXX" + if test "$enable_static" = "yes"; then + INSTALL_LIBS="$INSTALL_LIBS \$(libcxx)" + fi +fi + +# Optional Java API / JDBC. +if test "$db_cv_java" = "yes" -o "$db_cv_jdbc" = "yes"; then + # BDB Java API requires shared libraries. + if test "$db_cv_java" = "yes" -a "$enable_shared" = "no"; then + as_fn_error $? "Java requires shared libraries" "$LINENO" 5 + fi + + # A classpath that includes . is needed to check for Java + # Since Cygwin uses Windows' javac, we need Windows path separators + case "$host_os" in + cygwin*) CLASSPATH=".;$CLASSPATH";; + *) CLASSPATH=".:$CLASSPATH";; + esac + export CLASSPATH + + +if test "x$JAVAPREFIX" = x; then + test "x$JAVAC" = x && for ac_prog in javac$EXEEXT "gcj$EXEEXT -C" guavac$EXEEXT jikes$EXEEXT +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_JAVAC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$JAVAC"; then + ac_cv_prog_JAVAC="$JAVAC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_JAVAC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +JAVAC=$ac_cv_prog_JAVAC +if test -n "$JAVAC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JAVAC" >&5 +$as_echo "$JAVAC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$JAVAC" && break +done + +else + test "x$JAVAC" = x && for ac_prog in javac$EXEEXT "gcj$EXEEXT -C" guavac$EXEEXT jikes$EXEEXT +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_JAVAC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$JAVAC"; then + ac_cv_prog_JAVAC="$JAVAC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_JAVAC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +JAVAC=$ac_cv_prog_JAVAC +if test -n "$JAVAC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JAVAC" >&5 +$as_echo "$JAVAC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$JAVAC" && break +done +test -n "$JAVAC" || JAVAC="$JAVAPREFIX" + +fi +test "x$JAVAC" = x && as_fn_error $? "no acceptable Java compiler found in \$PATH" "$LINENO" 5 + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $JAVAC works" >&5 +$as_echo_n "checking if $JAVAC works... " >&6; } +if ${ac_cv_prog_javac_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + +JAVA_TEST=Test.java +CLASS_TEST=Test.class +cat << \EOF > $JAVA_TEST +/* #line 17860 "configure" */ +public class Test { +} +EOF +if { ac_try='$JAVAC $JAVACFLAGS $JAVA_TEST' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } >/dev/null 2>&1; then + ac_cv_prog_javac_works=yes +else + as_fn_error $? "The Java compiler $JAVAC failed (see config.log, check the CLASSPATH?)" "$LINENO" 5 + echo "configure: failed program was:" >&5 + cat $JAVA_TEST >&5 +fi +rm -f $JAVA_TEST $CLASS_TEST + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_javac_works" >&5 +$as_echo "$ac_cv_prog_javac_works" >&6; } + + + +if test "x$JAVAPREFIX" = x; then + test "x$JAR" = x && for ac_prog in jar$EXEEXT +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_JAR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$JAR"; then + ac_cv_prog_JAR="$JAR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_JAR="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +JAR=$ac_cv_prog_JAR +if test -n "$JAR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JAR" >&5 +$as_echo "$JAR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$JAR" && break +done + +else + test "x$JAR" = x && for ac_prog in jar +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_JAR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$JAR"; then + ac_cv_prog_JAR="$JAR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_JAR="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +JAR=$ac_cv_prog_JAR +if test -n "$JAR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JAR" >&5 +$as_echo "$JAR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$JAR" && break +done +test -n "$JAR" || JAR="$JAVAPREFIX" + +fi +test "x$JAR" = x && as_fn_error $? "no acceptable jar program found in \$PATH" "$LINENO" 5 + + +if test x$JAVAPREFIX = x; then + test x$JAVA = x && for ac_prog in java$EXEEXT kaffe$EXEEXT +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_JAVA+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$JAVA"; then + ac_cv_prog_JAVA="$JAVA" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_JAVA="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +JAVA=$ac_cv_prog_JAVA +if test -n "$JAVA"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JAVA" >&5 +$as_echo "$JAVA" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$JAVA" && break +done + +else + test x$JAVA = x && for ac_prog in java$EXEEXT kaffe$EXEEXT +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_JAVA+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$JAVA"; then + ac_cv_prog_JAVA="$JAVA" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_JAVA="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +JAVA=$ac_cv_prog_JAVA +if test -n "$JAVA"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JAVA" >&5 +$as_echo "$JAVA" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$JAVA" && break +done +test -n "$JAVA" || JAVA="$JAVAPREFIX" + +fi +test x$JAVA = x && as_fn_error $? "no acceptable Java virtual machine found in \$PATH" "$LINENO" 5 + +# Extract the first word of "uudecode$EXEEXT", so it can be a program name with args. +set dummy uudecode$EXEEXT; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_uudecode+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$uudecode"; then + ac_cv_prog_uudecode="$uudecode" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_uudecode="yes" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +uudecode=$ac_cv_prog_uudecode +if test -n "$uudecode"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $uudecode" >&5 +$as_echo "$uudecode" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +if test x$uudecode = xyes; then +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if uudecode can decode base 64 file" >&5 +$as_echo_n "checking if uudecode can decode base 64 file... " >&6; } +if ${ac_cv_prog_uudecode_base64+:} false; then : + $as_echo_n "(cached) " >&6 +else + +cat << \EOF > Test.uue +begin-base64 644 Test.class +yv66vgADAC0AFQcAAgEABFRlc3QHAAQBABBqYXZhL2xhbmcvT2JqZWN0AQAE +bWFpbgEAFihbTGphdmEvbGFuZy9TdHJpbmc7KVYBAARDb2RlAQAPTGluZU51 +bWJlclRhYmxlDAAKAAsBAARleGl0AQAEKEkpVgoADQAJBwAOAQAQamF2YS9s +YW5nL1N5c3RlbQEABjxpbml0PgEAAygpVgwADwAQCgADABEBAApTb3VyY2VG +aWxlAQAJVGVzdC5qYXZhACEAAQADAAAAAAACAAkABQAGAAEABwAAACEAAQAB +AAAABQO4AAyxAAAAAQAIAAAACgACAAAACgAEAAsAAQAPABAAAQAHAAAAIQAB +AAEAAAAFKrcAErEAAAABAAgAAAAKAAIAAAAEAAQABAABABMAAAACABQ= +==== +EOF +if uudecode$EXEEXT Test.uue; then + ac_cv_prog_uudecode_base64=yes +else + echo "configure: 18123: uudecode had trouble decoding base 64 file 'Test.uue'" >&5 + echo "configure: failed file was:" >&5 + cat Test.uue >&5 + ac_cv_prog_uudecode_base64=no +fi +rm -f Test.uue +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_uudecode_base64" >&5 +$as_echo "$ac_cv_prog_uudecode_base64" >&6; } +fi +if test x$ac_cv_prog_uudecode_base64 != xyes; then + rm -f Test.class + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: I have to compile Test.class from scratch" >&5 +$as_echo "$as_me: WARNING: I have to compile Test.class from scratch" >&2;} + if test x$ac_cv_prog_javac_works = xno; then + as_fn_error $? "Cannot compile java source. $JAVAC does not work properly" "$LINENO" 5 + fi + if test x$ac_cv_prog_javac_works = x; then + +if test "x$JAVAPREFIX" = x; then + test "x$JAVAC" = x && for ac_prog in javac$EXEEXT "gcj$EXEEXT -C" guavac$EXEEXT jikes$EXEEXT +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_JAVAC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$JAVAC"; then + ac_cv_prog_JAVAC="$JAVAC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_JAVAC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +JAVAC=$ac_cv_prog_JAVAC +if test -n "$JAVAC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JAVAC" >&5 +$as_echo "$JAVAC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$JAVAC" && break +done + +else + test "x$JAVAC" = x && for ac_prog in javac$EXEEXT "gcj$EXEEXT -C" guavac$EXEEXT jikes$EXEEXT +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_JAVAC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$JAVAC"; then + ac_cv_prog_JAVAC="$JAVAC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_JAVAC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +JAVAC=$ac_cv_prog_JAVAC +if test -n "$JAVAC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JAVAC" >&5 +$as_echo "$JAVAC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$JAVAC" && break +done +test -n "$JAVAC" || JAVAC="$JAVAPREFIX" + +fi +test "x$JAVAC" = x && as_fn_error $? "no acceptable Java compiler found in \$PATH" "$LINENO" 5 + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $JAVAC works" >&5 +$as_echo_n "checking if $JAVAC works... " >&6; } +if ${ac_cv_prog_javac_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + +JAVA_TEST=Test.java +CLASS_TEST=Test.class +cat << \EOF > $JAVA_TEST +/* #line 18241 "configure" */ +public class Test { +} +EOF +if { ac_try='$JAVAC $JAVACFLAGS $JAVA_TEST' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } >/dev/null 2>&1; then + ac_cv_prog_javac_works=yes +else + as_fn_error $? "The Java compiler $JAVAC failed (see config.log, check the CLASSPATH?)" "$LINENO" 5 + echo "configure: failed program was:" >&5 + cat $JAVA_TEST >&5 +fi +rm -f $JAVA_TEST $CLASS_TEST + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_javac_works" >&5 +$as_echo "$ac_cv_prog_javac_works" >&6; } + + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $JAVA works" >&5 +$as_echo_n "checking if $JAVA works... " >&6; } +if ${ac_cv_prog_java_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + +JAVA_TEST=Test.java +CLASS_TEST=Test.class +TEST=Test +cat << \EOF > $JAVA_TEST +/* [#]line 18276 "configure" */ +public class Test { +public static void main (String args[]) { + System.exit (0); +} } +EOF +if test x$ac_cv_prog_uudecode_base64 != xyes; then + if { ac_try='$JAVAC $JAVACFLAGS $JAVA_TEST' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } && test -s $CLASS_TEST; then + : + else + echo "configure: failed program was:" >&5 + cat $JAVA_TEST >&5 + as_fn_error $? "The Java compiler $JAVAC failed (see config.log, check the CLASSPATH?)" "$LINENO" 5 + fi +fi +if { ac_try='$JAVA $JAVAFLAGS $TEST' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } >/dev/null 2>&1; then + ac_cv_prog_java_works=yes +else + echo "configure: failed program was:" >&5 + cat $JAVA_TEST >&5 + as_fn_error $? "The Java VM $JAVA failed (see config.log, check the CLASSPATH?)" "$LINENO" 5 +fi +rm -fr $JAVA_TEST $CLASS_TEST Test.uue + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_java_works" >&5 +$as_echo "$ac_cv_prog_java_works" >&6; } + + + + + +JNI_INCLUDE_DIRS="" + +test "x$JAVAC" = x && as_fn_error $? "'$JAVAC' undefined" "$LINENO" 5 +# Extract the first word of "$JAVAC", so it can be a program name with args. +set dummy $JAVAC; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path__ACJNI_JAVAC+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $_ACJNI_JAVAC in + [\\/]* | ?:[\\/]*) + ac_cv_path__ACJNI_JAVAC="$_ACJNI_JAVAC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_path__ACJNI_JAVAC="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path__ACJNI_JAVAC" && ac_cv_path__ACJNI_JAVAC="$JAVAC" + ;; +esac +fi +_ACJNI_JAVAC=$ac_cv_path__ACJNI_JAVAC +if test -n "$_ACJNI_JAVAC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_ACJNI_JAVAC" >&5 +$as_echo "$_ACJNI_JAVAC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +test ! -x "$_ACJNI_JAVAC" && as_fn_error $? "$JAVAC could not be found in path" "$LINENO" 5 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking absolute path of $JAVAC" >&5 +$as_echo_n "checking absolute path of $JAVAC... " >&6; } +case "$_ACJNI_JAVAC" in +/*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_ACJNI_JAVAC" >&5 +$as_echo "$_ACJNI_JAVAC" >&6; };; +*) as_fn_error $? "$_ACJNI_JAVAC is not an absolute path name" "$LINENO" 5;; +esac + + +# find the include directory relative to the javac executable +_cur=""$_ACJNI_JAVAC"" +while ls -ld "$_cur" 2>/dev/null | grep " -> " >/dev/null; do + { $as_echo "$as_me:${as_lineno-$LINENO}: checking symlink for $_cur" >&5 +$as_echo_n "checking symlink for $_cur... " >&6; } + _slink=`ls -ld "$_cur" | sed 's/.* -> //'` + case "$_slink" in + /*) _cur="$_slink";; + # 'X' avoids triggering unwanted echo options. + *) _cur=`echo "X$_cur" | sed -e 's/^X//' -e 's:[^/]*$::'`"$_slink";; + esac + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_cur" >&5 +$as_echo "$_cur" >&6; } +done +_ACJNI_FOLLOWED="$_cur" + +_JTOPDIR=`echo "$_ACJNI_FOLLOWED" | sed -e 's://*:/:g' -e 's:/[^/]*$::'` +case "$host_os" in + darwin*) _JTOPDIR=`echo "$_JTOPDIR" | sed -e 's:/[^/]*$::'` + _JINC="$_JTOPDIR/Headers";; + *) _JINC="$_JTOPDIR/include";; +esac + +# If we find jni.h in /usr/include, then it's not a java-only tree, so +# don't add /usr/include or subdirectories to the list of includes. +# An extra -I/usr/include can foul things up with newer gcc's. +# +# If we don't find jni.h, just keep going. Hopefully javac knows where +# to find its include files, even if we can't. +if test -r "$_JINC/jni.h"; then + if test "$_JINC" != "/usr/include"; then + JNI_INCLUDE_DIRS="$JNI_INCLUDE_DIRS $_JINC" + fi +else + _JTOPDIR=`echo "$_JTOPDIR" | sed -e 's:/[^/]*$::'` + if test -r "$_JTOPDIR/include/jni.h"; then + if test "$_JTOPDIR" != "/usr"; then + JNI_INCLUDE_DIRS="$JNI_INCLUDE_DIRS $_JTOPDIR/include" + fi + fi +fi + +# get the likely subdirectories for system specific java includes +if test "$_JTOPDIR" != "/usr"; then + case "$host_os" in + aix*) _JNI_INC_SUBDIRS="aix";; + bsdi*) _JNI_INC_SUBDIRS="bsdos";; + cygwin*) _JNI_INC_SUBDIRS="win32";; + freebsd*) _JNI_INC_SUBDIRS="freebsd";; + hp*) _JNI_INC_SUBDIRS="hp-ux";; + linux*) _JNI_INC_SUBDIRS="linux genunix";; + osf*) _JNI_INC_SUBDIRS="alpha";; + solaris*) _JNI_INC_SUBDIRS="solaris";; + *) _JNI_INC_SUBDIRS="genunix";; + esac +fi + +# add any subdirectories that are present +for _JINCSUBDIR in $_JNI_INC_SUBDIRS +do + if test -d "$_JTOPDIR/include/$_JINCSUBDIR"; then + JNI_INCLUDE_DIRS="$JNI_INCLUDE_DIRS $_JTOPDIR/include/$_JINCSUBDIR" + fi +done + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking java version" >&5 +$as_echo_n "checking java version... " >&6; } + case "$JAVA" in + *kaffe* ) + JAVA_VERSION=`$JAVA -version 2>&1 | + sed -e '/Java Version:/!d' -e 's/.*Java Version: \([^ ]*\)[ ]*/\1/'` ;; + * ) JAVA_VERSION=`$JAVA -version 2>&1 | + sed -e '/ version /!d' -e 's/.*"\(.*\)".*/\1/'` ;; + esac + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JAVA_VERSION" >&5 +$as_echo "$JAVA_VERSION" >&6; } + case "$JAVA_VERSION" in + 1.[3456789]* | 1.[1-9][0-9]* | [23456789]* ) ;; + * ) + as_fn_error $? "Java version 1.3 or higher required, got $JAVA_VERSION" "$LINENO" 5 ;; + esac + + # Because of the code that SWIG generates to cast between pointers and + # integers, we need to add the flag "-fno-strict-aliasing" to the gcc + # command line when compiling the JNI code. This is documented in + # [#14953] and at http://www.swig.org/Doc1.3/Java.html + if test "${GCC}" = "yes"; then + SWIGCFLAGS="-fno-strict-aliasing" + fi + + for JNI_INCLUDE_DIR in $JNI_INCLUDE_DIRS + do + CPPFLAGS="$CPPFLAGS -I$JNI_INCLUDE_DIR" + done + + if test "$db_cv_java" = "yes"; then + ADDITIONAL_LANG="$ADDITIONAL_LANG java" + INSTALL_LIBS="$INSTALL_LIBS \$(libjso_target)" + fi +else + JAVAC=nojavac +fi + +# MinGW support. +if test "$db_cv_mingw" = "yes"; then + OSDIR=os_windows + PATH_SEPARATOR="\\\\/:" + + $as_echo "#define DB_WIN32 1" >>confdefs.h + + $as_echo "#define STDC_HEADERS 1" >>confdefs.h + +else + OSDIR=os + PATH_SEPARATOR="/" + $as_echo "#define HAVE_SYSTEM_INCLUDE_FILES 1" >>confdefs.h + +fi + +# Optional SQL API. +if test "$db_cv_sql" = "yes"; then + ADDITIONAL_INCS="$ADDITIONAL_INCS dbsql.h" + ADDITIONAL_PROGS="$ADDITIONAL_PROGS dbsql" + + # Link against libdl, if found. It is only needed for the load + # extension, but shouldn't hurt. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -ldl" >&5 +$as_echo_n "checking for main in -ldl... " >&6; } +if ${ac_cv_lib_dl_main+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dl_main=yes +else + ac_cv_lib_dl_main=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_main" >&5 +$as_echo "$ac_cv_lib_dl_main" >&6; } +if test "x$ac_cv_lib_dl_main" = xyes; then : + SQL_LIBS="$SQL_LIBS -ldl" +fi +ac_cv_lib_dl=ac_cv_lib_dl_main + + + # Link against libedit or readline for command-line editing. + if test x"$with_readline" != xno; then + header=readline.h + for rl_lib in edit readline; do + found="yes" + save_LIBS="" + LIBS="" + { ac_cv_search_tgetent=; unset ac_cv_search_tgetent;} + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing tgetent" >&5 +$as_echo_n "checking for library containing tgetent... " >&6; } +if ${ac_cv_search_tgetent+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char tgetent (); +int +main () +{ +return tgetent (); + ; + return 0; +} +_ACEOF +for ac_lib in '' $rl_lib ncurses curses termcap; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_tgetent=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_tgetent+:} false; then : + break +fi +done +if ${ac_cv_search_tgetent+:} false; then : + +else + ac_cv_search_tgetent=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_tgetent" >&5 +$as_echo "$ac_cv_search_tgetent" >&6; } +ac_res=$ac_cv_search_tgetent +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + term_LIBS="$LIBS" +else + term_LIBS="" +fi + + as_ac_Lib=`$as_echo "ac_cv_lib_$rl_lib''_readline" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for readline in -l$rl_lib" >&5 +$as_echo_n "checking for readline in -l$rl_lib... " >&6; } +if eval \${$as_ac_Lib+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-l$rl_lib $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char readline (); +int +main () +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$as_ac_Lib=yes" +else + eval "$as_ac_Lib=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +eval ac_res=\$$as_ac_Lib + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +if eval test \"x\$"$as_ac_Lib"\" = x"yes"; then : + SQL_LIBS="$SQL_LIBS -l$rl_lib $term_LIBS" +else + found="no" +fi + + LIBS="$save_LIBS" + test "$found" = "yes" && break + done + + if test x"$rl_lib" = xedit; then + header="editline/readline.h" + fi + + if test "$found" = "yes"; then + as_ac_Header=`$as_echo "ac_cv_header_$header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + found="yes" +else + + found="no" + if test "$cross_compiling" != yes; then + for dir in /usr /usr/local /usr/local/readline /usr/contrib /mingw; do + for subdir in include include/readline; do + as_ac_File=`$as_echo "ac_cv_file_$dir/$subdir/$header" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $dir/$subdir/$header" >&5 +$as_echo_n "checking for $dir/$subdir/$header... " >&6; } +if eval \${$as_ac_File+:} false; then : + $as_echo_n "(cached) " >&6 +else + test "$cross_compiling" = yes && + as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5 +if test -r "$dir/$subdir/$header"; then + eval "$as_ac_File=yes" +else + eval "$as_ac_File=no" +fi +fi +eval ac_res=\$$as_ac_File + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +if eval test \"x\$"$as_ac_File"\" = x"yes"; then : + found=yes +fi + + if test "$found" = "yes"; then + SQL_FLAGS="$SQL_FLAGS -I$dir/$subdir" + break + fi + done + test "$found" = "yes" && break + done + fi +fi + + + fi + fi + + if test "$enable_shared" = "no"; then + DEFAULT_LIB_SQL="\$(libsql_version)" + else + DEFAULT_LIB_SQL="\$(libsqlso_target)" + fi + INSTALL_LIBS="$INSTALL_LIBS $DEFAULT_LIB_SQL" + if test "$enable_static" = "yes"; then + INSTALL_LIBS="$INSTALL_LIBS \$(libsql)" + fi + + if test "$db_cv_test" = "yes"; then + subdirs="$subdirs sql" + ADDITIONAL_LANG="$ADDITIONAL_LANG sql-test" + fi + + if test "$db_cv_jdbc" = "yes"; then + subdirs="$subdirs jdbc" + ADDITIONAL_LANG="$ADDITIONAL_LANG jdbc" + fi + + if test "$db_cv_debug" = "yes"; then + SQL_FLAGS="$SQL_FLAGS -DSQLITE_DEBUG=1" + fi + + if test "$db_cv_build_cryptography" = "yes"; then + SQL_FLAGS="$SQL_FLAGS -DSQLITE_HAS_CODEC=1" + fi +fi + +if test "$db_cv_sql_compat" = "yes"; then + if test "$enable_shared" = "no"; then + DEFAULT_LIB_SQLITE="\$(libsqlite)" + else + DEFAULT_LIB_SQLITE="\$(libsqliteso_target)" + fi + + ADDITIONAL_INCS="$ADDITIONAL_INCS \$(langdir)/sql/generated/sqlite3.h" + ADDITIONAL_PROGS="$ADDITIONAL_PROGS sqlite3" + INSTALL_LIBS="$INSTALL_LIBS $DEFAULT_LIB_SQLITE" + + # This is different to the other libraries: we need to be very + # careful not to delete an existing installation of SQLite unless + # we are installing over it. + if test "$enable_shared" = "yes"; then + INSTALL_LIBS_EXTRA="$INSTALL_LIBS_EXTRA \$(libsqliteso)" + fi + if test "$enable_static" = "yes"; then + INSTALL_LIBS="$INSTALL_LIBS \$(libsqlite)" + fi +fi + +# Optional SQL code generation tool. +if test "$db_cv_sql_codegen" = "yes"; then + ADDITIONAL_PROGS="$ADDITIONAL_PROGS db_sql_codegen" +fi + +# Optional STL API. +if test "$db_cv_stl" = "yes"; then + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C++ compiler supports templates for STL" >&5 +$as_echo_n "checking whether the C++ compiler supports templates for STL... " >&6; } + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include + +using std::string; +using std::vector; +namespace dbstl_configure_test { + +template +class MyClass +{ +public: + explicit MyClass(int i) { imem = i;} + + MyClass(const T1& t1, const T2& t2, int i) + { + mem1 = t1; + mem2 = t2; + imem = i; + } + + template + T2 templ_mem_func(T1 t1, T3 t3) + { + mem2 = t1; + T3 t32 = t3; + T2 t2; + return t2; + } + + double templ_mem_func(T1 t1, double t3) + { + mem1 = t1; + double t32 = t3; + return t3; + } + + template + ReturnType templ_mem_func(T7, T8); + + operator T1() const {return mem1;} +private: + T1 mem1; + T2 mem2; + int imem; +}; + +template +template +ReturnType MyClass::templ_mem_func(T7, T8) +{ + ReturnType rt; + return rt; +} + +template<> +class MyClass +{ +public: + explicit MyClass(int i) { imem = i;} + + MyClass(const double& t1, const float& t2, int i) + { + mem1 = t1; + mem2 = t2; + imem = i; + } + + template + float templ_mem_func(double t1, T3 t3) + { + mem2 = t1; + T3 t32 = t3; + float t2; + return t2; + } + + double templ_mem_func(double t1, double t3) + { + mem1 = t1; + double t32 = t3; + return t3; + } + + template + ReturnType templ_mem_func(T7, T8); + + operator double() const {return mem1;} +private: + double mem1; + float mem2; + int imem; +}; + +template +ReturnType MyClass::templ_mem_func(T7, T8) +{ + ReturnType rt; + return rt; +} + +template +class MyClass2 { +public: + MyClass2(const T1& t1, const T2&t2){} +}; + +// partial specialization: both template parameters have same type +template +class MyClass2 { +public: + MyClass2(const T& t1, const T&t2){} +}; + +// partial specialization: second type is int +template +class MyClass2 { +public: + MyClass2(const T& t1, const int&t2){} +}; + +// partial specialization: both template parameters are pointer types +template +class MyClass2 { +public: + MyClass2(const T1* t1, const T2*t2){} +}; + +template +class MyClass2 { +public: + MyClass2(const T* t1, const T*t2){} +}; + +template +int part_spec_func(T4 t4, T5 t5) +{ + // Zero Initialization should work. + T4 t44 = T4(); + T5 t55 = T5(); + + t44 = t4; + t55 = t5; +} + +template +int part_spec_func(T4 t4, std::vector t55) +{ + T4 t44 = t4; + std::vector abc = t55; +} + +// maximum of two int values +inline int const& max (int const& a, int const& b) +{ + return a +inline T2 const max (T1 const& a, T2 const& b) +{ + return a +inline T const& max (T const& a, T const& b) +{ + return a +inline T const& max (T const& a, T const& b, T const& c) +{ + return max (max(a,b), c); +} + +template +class Base { +public: + void exit2(){} + Base(){} +}; + +template +class Derived : public Base { +public: + // Call Base() explicitly here, otherwise can't access it. + // Kind of like this->. + Derived() : Base(){} + + void foo() { + this->exit2(); + } +}; + +} // dbstl_configure_test + +using namespace dbstl_configure_test; +int +main () +{ + + char cc = 'a'; + int i = 4; + double pi = 3.14; + float gold = 0.618; + + MyClass2 mif(i, gold); // uses MyClass2 + MyClass2 mff(gold, gold); // uses MyClass2 + MyClass2 mfi(gold, i); // uses MyClass2 + MyClass2 mp(&i, &gold); // uses MyClass2 + MyClass2 m(&i, &i); // uses MyClass2 + + MyClass obj1(i); + obj1.templ_mem_func(cc, pi); + obj1.templ_mem_func(cc, gold); + obj1.templ_mem_func(i, pi); + obj1.templ_mem_func(cc, cc); + char ch = (char)obj1; + + string str1("abc"), str2("def"); + MyClass obj2(str1.c_str(), str2, i); + obj2.templ_mem_func("klm", str2); + obj2.templ_mem_func("hij", pi); + + // Use j to help distinguish, otherwise unable to use the one defined + // outside of class body. + int j = obj2.templ_mem_func(cc, cc); + // Call explicitly. + obj2.templ_mem_func(gold, pi); + const char *pch = (const char*)obj2; + + MyClass obj3(pi, gold, i); + obj3.templ_mem_func(pi, i); + obj3.templ_mem_func(pi, str1); + obj3.templ_mem_func(pi, pi); + obj3.templ_mem_func(cc, pi); + obj3.templ_mem_func(cc, cc); + double tmpd = (double)obj3; + + MyClass obj4(i); + obj4.templ_mem_func(pi, i); + obj4.templ_mem_func(pi, str1); + obj4.templ_mem_func(pi, pi); + obj4.templ_mem_func(gold, pi); + tmpd = (double)obj4; + + // Function template partial specialization. + part_spec_func(pi, gold); + part_spec_func(gold, i); + part_spec_func(str1, str2); + std::vector strv; + part_spec_func(str1, strv); + std::vector dblv; + part_spec_func(pi, dblv); + + // Function template overloads and explicit call and deduction. + dbstl_configure_test::max(7, 42, 68); // calls the template for three arguments + dbstl_configure_test::max(7.0, 42.0); // calls max (by argument deduction) + dbstl_configure_test::max('a', 'b'); // calls max (by argument deduction) + dbstl_configure_test::max(7, 42.0); + dbstl_configure_test::max(4,4.2); // instantiate T as double + dbstl_configure_test::max(7, 42); // calls the nontemplate for two ints + dbstl_configure_test::max<>(7, 42); // calls max (by argument deduction) + dbstl_configure_test::max(7, 42); // calls max (no argument deduction) + dbstl_configure_test::max('a', 42.7); // calls the nontemplate for two ints + + Base bobj; + bobj.exit2(); + // Using this-> to access base class members. + Derived dobj; + dobj.foo(); + dobj.exit2(); + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + as_fn_error $? "no" "$LINENO" 5 +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ supports the wstring class" >&5 +$as_echo_n "checking whether C++ supports the wstring class... " >&6; } + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + std::wstring ws; ws.find_first_of(ws); +int +main () +{ + + ; + return 0; +}, + WSTRING_decl="#define HAVE_WSTRING 1" ; AC_MSG_RESULT(yes), + WSTRING_decl="#undef HAVE_WSTRING" ; AC_MSG_RESULT(no) +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for thread local storage (TLS) class" >&5 +$as_echo_n "checking for thread local storage (TLS) class... " >&6; } + + + ac_cv_tls=none + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + ax_tls_keywords="__thread __declspec(thread) __declspec(__thread)" + for ax_tls_decl_keyword in $ax_tls_keywords ""; do + for ax_tls_defn_keyword in $ax_tls_keywords ""; do + test -z "$ax_tls_decl_keyword" && + test -z "$ax_tls_defn_keyword" && continue + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +template class TLSClass { + public: static $ax_tls_decl_keyword T *tlsvar; + }; + class TLSClass2 { + public: static $ax_tls_decl_keyword int tlsvar; + }; + template $ax_tls_defn_keyword T* TLSClass::tlsvar = NULL; + $ax_tls_defn_keyword int TLSClass2::tlsvar = 1; + static $ax_tls_decl_keyword int x = 0; +int +main () +{ +TLSClass::tlsvar = NULL; TLSClass2::tlsvar = 1; + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_cv_tls=modifier ; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + done + test "$ac_cv_tls" = none || break + done + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + if test "$ac_cv_tls" = "none" ; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + + static pthread_once_t once_control_ = PTHREAD_ONCE_INIT; + static pthread_key_t key; + + static void init_once(void) { + pthread_key_create(&key, NULL); + } + static void *get_tls() { + return (void *)pthread_getspecific(&key); + } + static void set_tls(void *p) { + pthread_setspecific(&key, p); + } +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_tls=pthread +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + fi + + case "$ac_cv_tls" in + none) break ;; + pthread) + TLS_decl="#define HAVE_PTHREAD_TLS" + TLS_defn="" ;; + modifier) + TLS_decl="#define TLS_DECL_MODIFIER $ax_tls_decl_keyword" + TLS_defn="#define TLS_DEFN_MODIFIER $ax_tls_defn_keyword" ;; + esac + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_tls" >&5 +$as_echo "$ac_cv_tls" >&6; } + + if test "$enable_shared" = "no"; then + DEFAULT_LIB_STL="\$(libstl_version)" + fi + if test "$enable_shared" = "yes"; then + DEFAULT_LIB_STL="\$(libstlso_target)" + fi + ADDITIONAL_INCS="$ADDITIONAL_INCS dbstl_common.h" + for f in dbstl_set.h dbstl_vector.h dbstl_exception.h dbstl_map.h dbstl_utility.h dbstl_dbc.h dbstl_dbt.h dbstl_base_iterator.h dbstl_container.h dbstl_element_ref.h dbstl_inner_utility.h dbstl_resource_manager.h ; do + ADDITIONAL_INCS="$ADDITIONAL_INCS \$(topdir)/lang/cxx/stl/$f" + done + INSTALL_LIBS="$INSTALL_LIBS $DEFAULT_LIB_STL" + if test "$enable_static" = "yes"; then + INSTALL_LIBS="$INSTALL_LIBS \$(libstl)" + fi +fi + +# Checks for include files, structures, C types. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether stat file-mode macros are broken" >&5 +$as_echo_n "checking whether stat file-mode macros are broken... " >&6; } +if ${ac_cv_header_stat_broken+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include + +#if defined S_ISBLK && defined S_IFDIR +extern char c1[S_ISBLK (S_IFDIR) ? -1 : 1]; +#endif + +#if defined S_ISBLK && defined S_IFCHR +extern char c2[S_ISBLK (S_IFCHR) ? -1 : 1]; +#endif + +#if defined S_ISLNK && defined S_IFREG +extern char c3[S_ISLNK (S_IFREG) ? -1 : 1]; +#endif + +#if defined S_ISSOCK && defined S_IFREG +extern char c4[S_ISSOCK (S_IFREG) ? -1 : 1]; +#endif + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stat_broken=no +else + ac_cv_header_stat_broken=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stat_broken" >&5 +$as_echo "$ac_cv_header_stat_broken" >&6; } +if test $ac_cv_header_stat_broken = yes; then + +$as_echo "#define STAT_MACROS_BROKEN 1" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether time.h and sys/time.h may both be included" >&5 +$as_echo_n "checking whether time.h and sys/time.h may both be included... " >&6; } +if ${ac_cv_header_time+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include + +int +main () +{ +if ((struct tm *) 0) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_time=yes +else + ac_cv_header_time=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_time" >&5 +$as_echo "$ac_cv_header_time" >&6; } +if test $ac_cv_header_time = yes; then + +$as_echo "#define TIME_WITH_SYS_TIME 1" >>confdefs.h + +fi + +ac_header_dirent=no +for ac_hdr in dirent.h sys/ndir.h sys/dir.h ndir.h; do + as_ac_Header=`$as_echo "ac_cv_header_dirent_$ac_hdr" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_hdr that defines DIR" >&5 +$as_echo_n "checking for $ac_hdr that defines DIR... " >&6; } +if eval \${$as_ac_Header+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include <$ac_hdr> + +int +main () +{ +if ((DIR *) 0) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$as_ac_Header=yes" +else + eval "$as_ac_Header=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$as_ac_Header + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_hdr" | $as_tr_cpp` 1 +_ACEOF + +ac_header_dirent=$ac_hdr; break +fi + +done +# Two versions of opendir et al. are in -ldir and -lx on SCO Xenix. +if test $ac_header_dirent = dirent.h; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing opendir" >&5 +$as_echo_n "checking for library containing opendir... " >&6; } +if ${ac_cv_search_opendir+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char opendir (); +int +main () +{ +return opendir (); + ; + return 0; +} +_ACEOF +for ac_lib in '' dir; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_opendir=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_opendir+:} false; then : + break +fi +done +if ${ac_cv_search_opendir+:} false; then : + +else + ac_cv_search_opendir=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_opendir" >&5 +$as_echo "$ac_cv_search_opendir" >&6; } +ac_res=$ac_cv_search_opendir +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +fi + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing opendir" >&5 +$as_echo_n "checking for library containing opendir... " >&6; } +if ${ac_cv_search_opendir+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char opendir (); +int +main () +{ +return opendir (); + ; + return 0; +} +_ACEOF +for ac_lib in '' x; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_opendir=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_opendir+:} false; then : + break +fi +done +if ${ac_cv_search_opendir+:} false; then : + +else + ac_cv_search_opendir=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_opendir" >&5 +$as_echo "$ac_cv_search_opendir" >&6; } +ac_res=$ac_cv_search_opendir +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +fi + +fi + +for ac_header in execinfo.h sys/select.h sys/socket.h sys/time.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + +ac_fn_c_check_member "$LINENO" "struct stat" "st_blksize" "ac_cv_member_struct_stat_st_blksize" "$ac_includes_default" +if test "x$ac_cv_member_struct_stat_st_blksize" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_STAT_ST_BLKSIZE 1 +_ACEOF + + +fi + + + +# db.h includes and , not the other default includes +# autoconf usually includes. For that reason, we specify a set of includes +# for all type checking tests. [#5060] +# +# C99 says types should be in ; include if it exists. +# +# Some systems have types in ; include if it exists. +# +# IBM's OS/390 and z/OS releases have types in not also found +# in ; include if it exists. +db_includes="#include " + +ac_fn_c_check_header_mongrel "$LINENO" "inttypes.h" "ac_cv_header_inttypes_h" "$ac_includes_default" +if test "x$ac_cv_header_inttypes_h" = xyes; then : + + db_includes="$db_includes +#include " + inttypes_h_decl="#include " +fi + + + +# IRIX has stdint.h that is only available when using c99 (i.e. __c99 +# is defined). Problem with having it in a public header is that a c++ +# compiler cannot #include if db.h #includes stdint.h, so we +# need to check that stdint.h is available for all cases. Also the +# IRIX compiler does not exit with a non-zero exit code when it sees +# #error, so we actually need to use the header for the compiler to fail. + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for stdint.h" >&5 +$as_echo_n "checking for stdint.h... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include + int main() { + uint_least8_t x=0; + return x; + } +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +if test "$db_cv_cxx" = "yes"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if stdint.h can be used by C++" >&5 +$as_echo_n "checking if stdint.h can be used by C++... " >&6; } + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int main() { + uint_least8_t x=0; + return x; + } +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + stdint_h_decl="#include " + db_includes="$db_includes +#include " + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + stdint_h_decl="#ifndef __cplusplus +#include +#endif" + db_includes="$db_includes +#ifndef __cplusplus +#include +#endif" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +else + stdint_h_decl="#include " + db_includes="$db_includes +#include " +fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + +ac_fn_c_check_header_mongrel "$LINENO" "stddef.h" "ac_cv_header_stddef_h" "$ac_includes_default" +if test "x$ac_cv_header_stddef_h" = xyes; then : + + db_includes="$db_includes +#include " + stddef_h_decl="#include " +fi + + + +ac_fn_c_check_header_mongrel "$LINENO" "unistd.h" "ac_cv_header_unistd_h" "$ac_includes_default" +if test "x$ac_cv_header_unistd_h" = xyes; then : + + db_includes="$db_includes +#include " + unistd_h_decl="#include " +fi + + +db_includes="$db_includes +#include " + +# We need to know the sizes of various objects on this system. +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of char" >&5 +$as_echo_n "checking size of char... " >&6; } +if ${ac_cv_sizeof_char+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (char))" "ac_cv_sizeof_char" "$db_includes +"; then : + +else + if test "$ac_cv_type_char" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (char) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_char=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_char" >&5 +$as_echo "$ac_cv_sizeof_char" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_CHAR $ac_cv_sizeof_char +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned char" >&5 +$as_echo_n "checking size of unsigned char... " >&6; } +if ${ac_cv_sizeof_unsigned_char+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned char))" "ac_cv_sizeof_unsigned_char" "$db_includes +"; then : + +else + if test "$ac_cv_type_unsigned_char" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (unsigned char) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_unsigned_char=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned_char" >&5 +$as_echo "$ac_cv_sizeof_unsigned_char" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_UNSIGNED_CHAR $ac_cv_sizeof_unsigned_char +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of short" >&5 +$as_echo_n "checking size of short... " >&6; } +if ${ac_cv_sizeof_short+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (short))" "ac_cv_sizeof_short" "$db_includes +"; then : + +else + if test "$ac_cv_type_short" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (short) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_short=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_short" >&5 +$as_echo "$ac_cv_sizeof_short" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_SHORT $ac_cv_sizeof_short +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned short" >&5 +$as_echo_n "checking size of unsigned short... " >&6; } +if ${ac_cv_sizeof_unsigned_short+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned short))" "ac_cv_sizeof_unsigned_short" "$db_includes +"; then : + +else + if test "$ac_cv_type_unsigned_short" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (unsigned short) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_unsigned_short=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned_short" >&5 +$as_echo "$ac_cv_sizeof_unsigned_short" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_UNSIGNED_SHORT $ac_cv_sizeof_unsigned_short +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of int" >&5 +$as_echo_n "checking size of int... " >&6; } +if ${ac_cv_sizeof_int+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (int))" "ac_cv_sizeof_int" "$db_includes +"; then : + +else + if test "$ac_cv_type_int" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (int) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_int=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_int" >&5 +$as_echo "$ac_cv_sizeof_int" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_INT $ac_cv_sizeof_int +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned int" >&5 +$as_echo_n "checking size of unsigned int... " >&6; } +if ${ac_cv_sizeof_unsigned_int+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned int))" "ac_cv_sizeof_unsigned_int" "$db_includes +"; then : + +else + if test "$ac_cv_type_unsigned_int" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (unsigned int) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_unsigned_int=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned_int" >&5 +$as_echo "$ac_cv_sizeof_unsigned_int" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_UNSIGNED_INT $ac_cv_sizeof_unsigned_int +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of long" >&5 +$as_echo_n "checking size of long... " >&6; } +if ${ac_cv_sizeof_long+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (long))" "ac_cv_sizeof_long" "$db_includes +"; then : + +else + if test "$ac_cv_type_long" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (long) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_long=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_long" >&5 +$as_echo "$ac_cv_sizeof_long" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_LONG $ac_cv_sizeof_long +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned long" >&5 +$as_echo_n "checking size of unsigned long... " >&6; } +if ${ac_cv_sizeof_unsigned_long+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned long))" "ac_cv_sizeof_unsigned_long" "$db_includes +"; then : + +else + if test "$ac_cv_type_unsigned_long" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (unsigned long) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_unsigned_long=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned_long" >&5 +$as_echo "$ac_cv_sizeof_unsigned_long" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_UNSIGNED_LONG $ac_cv_sizeof_unsigned_long +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of long long" >&5 +$as_echo_n "checking size of long long... " >&6; } +if ${ac_cv_sizeof_long_long+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (long long))" "ac_cv_sizeof_long_long" "$db_includes +"; then : + +else + if test "$ac_cv_type_long_long" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (long long) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_long_long=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_long_long" >&5 +$as_echo "$ac_cv_sizeof_long_long" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_LONG_LONG $ac_cv_sizeof_long_long +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned long long" >&5 +$as_echo_n "checking size of unsigned long long... " >&6; } +if ${ac_cv_sizeof_unsigned_long_long+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned long long))" "ac_cv_sizeof_unsigned_long_long" "$db_includes +"; then : + +else + if test "$ac_cv_type_unsigned_long_long" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (unsigned long long) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_unsigned_long_long=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned_long_long" >&5 +$as_echo "$ac_cv_sizeof_unsigned_long_long" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_UNSIGNED_LONG_LONG $ac_cv_sizeof_unsigned_long_long +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of char *" >&5 +$as_echo_n "checking size of char *... " >&6; } +if ${ac_cv_sizeof_char_p+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (char *))" "ac_cv_sizeof_char_p" "$db_includes +"; then : + +else + if test "$ac_cv_type_char_p" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (char *) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_char_p=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_char_p" >&5 +$as_echo "$ac_cv_sizeof_char_p" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_CHAR_P $ac_cv_sizeof_char_p +_ACEOF + + + +# We look for u_char, u_short, u_int, u_long -- if we can't find them, +# we create our own. + +ac_fn_c_check_type "$LINENO" "u_char" "ac_cv_type_u_char" "$db_includes +" +if test "x$ac_cv_type_u_char" = xyes; then : + +else + u_char_decl="typedef unsigned char u_char;" +fi + + + +ac_fn_c_check_type "$LINENO" "u_short" "ac_cv_type_u_short" "$db_includes +" +if test "x$ac_cv_type_u_short" = xyes; then : + +else + u_short_decl="typedef unsigned short u_short;" +fi + + + +ac_fn_c_check_type "$LINENO" "u_int" "ac_cv_type_u_int" "$db_includes +" +if test "x$ac_cv_type_u_int" = xyes; then : + +else + u_int_decl="typedef unsigned int u_int;" +fi + + + +ac_fn_c_check_type "$LINENO" "u_long" "ac_cv_type_u_long" "$db_includes +" +if test "x$ac_cv_type_u_long" = xyes; then : + +else + u_long_decl="typedef unsigned long u_long;" +fi + + +# We look for fixed-size variants of u_char, u_short, u_int, u_long as well. + +ac_fn_c_check_type "$LINENO" "u_int8_t" "ac_cv_type_u_int8_t" "$db_includes +" +if test "x$ac_cv_type_u_int8_t" = xyes; then : + +else + + case "1" in + "$ac_cv_sizeof_unsigned_int") + u_int8_decl="typedef unsigned int u_int8_t;";; + "$ac_cv_sizeof_unsigned_char") + u_int8_decl="typedef unsigned char u_int8_t;";; + "$ac_cv_sizeof_unsigned_short") + u_int8_decl="typedef unsigned short u_int8_t;";; + "$ac_cv_sizeof_unsigned_long") + u_int8_decl="typedef unsigned long u_int8_t;";; + "$ac_cv_sizeof_unsigned_long_long") + u_int8_decl="typedef unsigned long long u_int8_t;";; + *) + if test "" != "notfatal"; then + as_fn_error $? "No unsigned 1-byte integral type" "$LINENO" 5 + fi;; + esac +fi + + + +ac_fn_c_check_type "$LINENO" "u_int16_t" "ac_cv_type_u_int16_t" "$db_includes +" +if test "x$ac_cv_type_u_int16_t" = xyes; then : + +else + + case "2" in + "$ac_cv_sizeof_unsigned_int") + u_int16_decl="typedef unsigned int u_int16_t;";; + "$ac_cv_sizeof_unsigned_char") + u_int16_decl="typedef unsigned char u_int16_t;";; + "$ac_cv_sizeof_unsigned_short") + u_int16_decl="typedef unsigned short u_int16_t;";; + "$ac_cv_sizeof_unsigned_long") + u_int16_decl="typedef unsigned long u_int16_t;";; + "$ac_cv_sizeof_unsigned_long_long") + u_int16_decl="typedef unsigned long long u_int16_t;";; + *) + if test "" != "notfatal"; then + as_fn_error $? "No unsigned 2-byte integral type" "$LINENO" 5 + fi;; + esac +fi + + + +ac_fn_c_check_type "$LINENO" "int16_t" "ac_cv_type_int16_t" "$db_includes +" +if test "x$ac_cv_type_int16_t" = xyes; then : + +else + + case "2" in + "$ac_cv_sizeof_int") + int16_decl="typedef int int16_t;";; + "$ac_cv_sizeof_char") + int16_decl="typedef char int16_t;";; + "$ac_cv_sizeof_short") + int16_decl="typedef short int16_t;";; + "$ac_cv_sizeof_long") + int16_decl="typedef long int16_t;";; + "$ac_cv_sizeof_long_long") + int16_decl="typedef long long int16_t;";; + *) + if test "" != "notfatal"; then + as_fn_error $? "No signed 2-byte integral type" "$LINENO" 5 + fi;; + esac +fi + + + +ac_fn_c_check_type "$LINENO" "u_int32_t" "ac_cv_type_u_int32_t" "$db_includes +" +if test "x$ac_cv_type_u_int32_t" = xyes; then : + +else + + case "4" in + "$ac_cv_sizeof_unsigned_int") + u_int32_decl="typedef unsigned int u_int32_t;";; + "$ac_cv_sizeof_unsigned_char") + u_int32_decl="typedef unsigned char u_int32_t;";; + "$ac_cv_sizeof_unsigned_short") + u_int32_decl="typedef unsigned short u_int32_t;";; + "$ac_cv_sizeof_unsigned_long") + u_int32_decl="typedef unsigned long u_int32_t;";; + "$ac_cv_sizeof_unsigned_long_long") + u_int32_decl="typedef unsigned long long u_int32_t;";; + *) + if test "" != "notfatal"; then + as_fn_error $? "No unsigned 4-byte integral type" "$LINENO" 5 + fi;; + esac +fi + + + +ac_fn_c_check_type "$LINENO" "int32_t" "ac_cv_type_int32_t" "$db_includes +" +if test "x$ac_cv_type_int32_t" = xyes; then : + +else + + case "4" in + "$ac_cv_sizeof_int") + int32_decl="typedef int int32_t;";; + "$ac_cv_sizeof_char") + int32_decl="typedef char int32_t;";; + "$ac_cv_sizeof_short") + int32_decl="typedef short int32_t;";; + "$ac_cv_sizeof_long") + int32_decl="typedef long int32_t;";; + "$ac_cv_sizeof_long_long") + int32_decl="typedef long long int32_t;";; + *) + if test "" != "notfatal"; then + as_fn_error $? "No signed 4-byte integral type" "$LINENO" 5 + fi;; + esac +fi + + + +ac_fn_c_check_type "$LINENO" "u_int64_t" "ac_cv_type_u_int64_t" "$db_includes +" +if test "x$ac_cv_type_u_int64_t" = xyes; then : + +else + + case "8" in + "$ac_cv_sizeof_unsigned_int") + u_int64_decl="typedef unsigned int u_int64_t;";; + "$ac_cv_sizeof_unsigned_char") + u_int64_decl="typedef unsigned char u_int64_t;";; + "$ac_cv_sizeof_unsigned_short") + u_int64_decl="typedef unsigned short u_int64_t;";; + "$ac_cv_sizeof_unsigned_long") + u_int64_decl="typedef unsigned long u_int64_t;";; + "$ac_cv_sizeof_unsigned_long_long") + u_int64_decl="typedef unsigned long long u_int64_t;";; + *) + if test "notfatal" != "notfatal"; then + as_fn_error $? "No unsigned 8-byte integral type" "$LINENO" 5 + fi;; + esac +fi + + + +ac_fn_c_check_type "$LINENO" "int64_t" "ac_cv_type_int64_t" "$db_includes +" +if test "x$ac_cv_type_int64_t" = xyes; then : + +else + + case "8" in + "$ac_cv_sizeof_int") + int64_decl="typedef int int64_t;";; + "$ac_cv_sizeof_char") + int64_decl="typedef char int64_t;";; + "$ac_cv_sizeof_short") + int64_decl="typedef short int64_t;";; + "$ac_cv_sizeof_long") + int64_decl="typedef long int64_t;";; + "$ac_cv_sizeof_long_long") + int64_decl="typedef long long int64_t;";; + *) + if test "notfatal" != "notfatal"; then + as_fn_error $? "No signed 8-byte integral type" "$LINENO" 5 + fi;; + esac +fi + + +# No currently autoconf'd systems lack FILE, off_t pid_t, size_t, time_t. +# +# We require them, we don't try to substitute our own if we can't find them. + +as_ac_Type=`$as_echo "ac_cv_type_FILE *" | $as_tr_sh` +ac_fn_c_check_type "$LINENO" "FILE *" "$as_ac_Type" "$db_includes +" +if eval test \"x\$"$as_ac_Type"\" = x"yes"; then : + +else + as_fn_error $? "No FILE type." "$LINENO" 5 +fi + + +ac_fn_c_check_type "$LINENO" "off_t" "ac_cv_type_off_t" "$db_includes +" +if test "x$ac_cv_type_off_t" = xyes; then : + +else + as_fn_error $? "No off_t type." "$LINENO" 5 +fi + + +ac_fn_c_check_type "$LINENO" "pid_t" "ac_cv_type_pid_t" "$db_includes +" +if test "x$ac_cv_type_pid_t" = xyes; then : + +else + as_fn_error $? "No pid_t type." "$LINENO" 5 +fi + + +ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$db_includes +" +if test "x$ac_cv_type_size_t" = xyes; then : + +else + as_fn_error $? "No size_t type." "$LINENO" 5 +fi + + +ac_fn_c_check_type "$LINENO" "time_t" "ac_cv_type_time_t" "$db_includes +" +if test "x$ac_cv_type_time_t" = xyes; then : + +else + as_fn_error $? "No time_t type." "$LINENO" 5 +fi + + +# Check for ssize_t -- if none exists, find a signed integral type that's +# the same size as a size_t. +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of size_t" >&5 +$as_echo_n "checking size of size_t... " >&6; } +if ${ac_cv_sizeof_size_t+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (size_t))" "ac_cv_sizeof_size_t" "$db_includes +"; then : + +else + if test "$ac_cv_type_size_t" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (size_t) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_size_t=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_size_t" >&5 +$as_echo "$ac_cv_sizeof_size_t" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_SIZE_T $ac_cv_sizeof_size_t +_ACEOF + + + +ac_fn_c_check_type "$LINENO" "ssize_t" "ac_cv_type_ssize_t" "$db_includes +" +if test "x$ac_cv_type_ssize_t" = xyes; then : + +else + + case "$ac_cv_sizeof_size_t" in + "$ac_cv_sizeof_int") + ssize_t_decl="typedef int ssize_t;";; + "$ac_cv_sizeof_char") + ssize_t_decl="typedef char ssize_t;";; + "$ac_cv_sizeof_short") + ssize_t_decl="typedef short ssize_t;";; + "$ac_cv_sizeof_long") + ssize_t_decl="typedef long ssize_t;";; + "$ac_cv_sizeof_long_long") + ssize_t_decl="typedef long long ssize_t;";; + *) + if test "" != "notfatal"; then + as_fn_error $? "No signed $ac_cv_sizeof_size_t-byte integral type" "$LINENO" 5 + fi;; + esac +fi + + +# Check for uintmax_t -- if none exists, find the largest unsigned integral +# type available. + +ac_fn_c_check_type "$LINENO" "uintmax_t" "ac_cv_type_uintmax_t" "$ac_includes_default" +if test "x$ac_cv_type_uintmax_t" = xyes; then : + +else + ac_fn_c_check_type "$LINENO" "unsigned long long" "ac_cv_type_unsigned_long_long" "$db_includes +" +if test "x$ac_cv_type_unsigned_long_long" = xyes; then : + uintmax_t_decl="typedef unsigned long long uintmax_t;" +else + uintmax_t_decl="typedef unsigned long uintmax_t;" +fi + +fi + + +# Check for uintptr_t -- if none exists, find an integral type which is +# the same size as a pointer. + +ac_fn_c_check_type "$LINENO" "uintptr_t" "ac_cv_type_uintptr_t" "$ac_includes_default" +if test "x$ac_cv_type_uintptr_t" = xyes; then : + +else + + case "$ac_cv_sizeof_char_p" in + "$ac_cv_sizeof_unsigned_int") + uintptr_t_decl="typedef unsigned int uintptr_t;";; + "$ac_cv_sizeof_unsigned_char") + uintptr_t_decl="typedef unsigned char uintptr_t;";; + "$ac_cv_sizeof_unsigned_short") + uintptr_t_decl="typedef unsigned short uintptr_t;";; + "$ac_cv_sizeof_unsigned_long") + uintptr_t_decl="typedef unsigned long uintptr_t;";; + "$ac_cv_sizeof_unsigned_long_long") + uintptr_t_decl="typedef unsigned long long uintptr_t;";; + *) + if test "" != "notfatal"; then + as_fn_error $? "No unsigned $ac_cv_sizeof_char_p-byte integral type" "$LINENO" 5 + fi;; + esac +fi + + + + ac_fn_c_check_type "$LINENO" "socklen_t" "ac_cv_type_socklen_t" "$db_includes +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +" +if test "x$ac_cv_type_socklen_t" = xyes; then : + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for socklen_t equivalent" >&5 +$as_echo_n "checking for socklen_t equivalent... " >&6; } + if ${db_cv_socklen_t_equiv+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Systems have either "struct sockaddr *" or + # "void *" as the second argument to getpeername + db_cv_socklen_t_equiv= + for arg2 in "struct sockaddr" void; do + for t in int size_t "unsigned int" "long int" "unsigned long int"; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$db_includes +#ifdef HAVE_SYS_SOCKET_H +#include +#endif + int getpeername (int, $arg2 *, $t *); +int +main () +{ +$t len; + getpeername (0, 0, &len); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_socklen_t_equiv="$t" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + test "$db_cv_socklen_t_equiv" != "" && break + done + test "$db_cv_socklen_t_equiv" != "" && break + done + +fi + + if test "$db_cv_socklen_t_equiv" = ""; then + as_fn_error $? "Cannot find a type to use in place of socklen_t" "$LINENO" 5 + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_socklen_t_equiv" >&5 +$as_echo "$db_cv_socklen_t_equiv" >&6; } + +cat >>confdefs.h <<_ACEOF +#define socklen_t $db_cv_socklen_t_equiv +_ACEOF + +fi + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C exit success/failure values" >&5 +$as_echo_n "checking for ANSI C exit success/failure values... " >&6; } +if ${db_cv_exit_defines+:} false; then : + $as_echo_n "(cached) " >&6 +else + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +return (EXIT_SUCCESS); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_exit_defines=yes +else + db_cv_exit_defines=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_exit_defines" >&5 +$as_echo "$db_cv_exit_defines" >&6; } +if test "$db_cv_exit_defines" = "yes"; then + $as_echo "#define HAVE_EXIT_SUCCESS 1" >>confdefs.h + + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for getopt optreset variable" >&5 +$as_echo_n "checking for getopt optreset variable... " >&6; } +if ${db_cv_optreset+:} false; then : + $as_echo_n "(cached) " >&6 +else + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +extern int optreset; optreset = 1; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_optreset=yes +else + db_cv_optreset=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_optreset" >&5 +$as_echo "$db_cv_optreset" >&6; } +if test "$db_cv_optreset" = "yes"; then + $as_echo "#define HAVE_GETOPT_OPTRESET 1" >>confdefs.h + + +fi + +# Check for mutexes. +# We do this first because it changes $LIBSO_LIBS. + + +# Mutexes we don't test for, but want the #defines to exist for other ports. + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for mutexes" >&5 +$as_echo_n "checking for mutexes... " >&6; } +if ${db_cv_mutex+:} false; then : + $as_echo_n "(cached) " >&6 +else + + +orig_libs=$LIBS + +db_cv_mutex=no + +# Mutexes can be disabled. +if test "$db_cv_build_mutexsupport" = no; then + db_cv_mutex=disabled; +fi + +# User-specified Win32 mutexes (MinGW build) +if test "$db_cv_mingw" = yes; then + db_cv_mutex=win32/gcc +fi + +if test "$db_cv_mutex" = no; then + # User-specified POSIX or UI mutexes. + # + # There are two different reasons to specify mutexes: First, the + # application is already using one type of mutex and doesn't want + # to mix-and-match (for example, on Solaris, which has POSIX, UI + # and LWP mutexes). Second, the application's POSIX pthreads + # mutexes don't support inter-process locking, but the application + # wants to use them anyway (for example, some Linux and *BSD systems). + # + # Test for POSIX threads before testing for UI/LWP threads, they are + # the Sun-recommended choice on Solaris. Also, there are Linux systems + # that support a UI compatibility mode, and applications are more + # likely to be written for POSIX threads than UI threads. + if test "$db_cv_posixmutexes" = yes; then + db_cv_mutex=posix_only; + fi + if test "$db_cv_uimutexes" = yes; then + db_cv_mutex=ui_only; + fi + + # POSIX.1 pthreads: pthread_XXX + # + # If the user specified we use POSIX pthreads mutexes, and we fail to + # find the full interface, try and configure for just intra-process + # support. + if test "$db_cv_mutex" = no -o "$db_cv_mutex" = posix_only; then + LIBS="$LIBS -lpthread" + +if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + exit ( + pthread_condattr_init(&condattr) || + pthread_condattr_setpshared(&condattr, PTHREAD_PROCESS_SHARED) || + pthread_mutexattr_init(&mutexattr) || + pthread_mutexattr_setpshared(&mutexattr, PTHREAD_PROCESS_SHARED) || + pthread_cond_init(&cond, &condattr) || + pthread_mutex_init(&mutex, &mutexattr) || + pthread_mutex_lock(&mutex) || + pthread_mutex_unlock(&mutex) || + pthread_mutex_destroy(&mutex) || + pthread_cond_destroy(&cond) || + pthread_condattr_destroy(&condattr) || + pthread_mutexattr_destroy(&mutexattr)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex="POSIX/pthreads/library" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +main() { + pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + exit ( + pthread_condattr_init(&condattr) || + pthread_condattr_setpshared(&condattr, PTHREAD_PROCESS_SHARED) || + pthread_mutexattr_init(&mutexattr) || + pthread_mutexattr_setpshared(&mutexattr, PTHREAD_PROCESS_SHARED) || + pthread_cond_init(&cond, &condattr) || + pthread_mutex_init(&mutex, &mutexattr) || + pthread_mutex_lock(&mutex) || + pthread_mutex_unlock(&mutex) || + pthread_mutex_destroy(&mutex) || + pthread_cond_destroy(&cond) || + pthread_condattr_destroy(&condattr) || + pthread_mutexattr_destroy(&mutexattr)); +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_mutex="POSIX/pthreads/library" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + pthread_cond_t cond; + pthread_condattr_t condattr; + exit(pthread_condattr_init(&condattr) || + pthread_cond_init(&cond, &condattr)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_pthread_condinit_dupgood="yes" +else + db_cv_pthread_condinit_dupgood="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +main() { + pthread_cond_t cond; + pthread_condattr_t condattr; + exit(pthread_condattr_init(&condattr) || + pthread_cond_init(&cond, &condattr) || + pthread_cond_init(&cond, &condattr)); +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_pthread_condinit_dupgood="yes" +else + db_cv_pthread_condinit_dupgood="no" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + pthread_rwlock_t rwlock; + pthread_rwlockattr_t rwlockattr; + exit(pthread_rwlockattr_init(&rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_pthread_rwlockinit_dupgood="yes" +else + db_cv_pthread_rwlockinit_dupgood="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +main() { + pthread_rwlock_t rwlock; + pthread_rwlockattr_t rwlockattr; + exit(pthread_rwlockattr_init(&rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr)); +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_pthread_rwlockinit_dupgood="yes" +else + db_cv_pthread_rwlockinit_dupgood="no" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + LIBS="$orig_libs" + fi + if test "$db_cv_mutex" = no -o "$db_cv_mutex" = posix_only; then + +if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + exit ( + pthread_condattr_init(&condattr) || + pthread_condattr_setpshared(&condattr, PTHREAD_PROCESS_SHARED) || + pthread_mutexattr_init(&mutexattr) || + pthread_mutexattr_setpshared(&mutexattr, PTHREAD_PROCESS_SHARED) || + pthread_cond_init(&cond, &condattr) || + pthread_mutex_init(&mutex, &mutexattr) || + pthread_mutex_lock(&mutex) || + pthread_mutex_unlock(&mutex) || + pthread_mutex_destroy(&mutex) || + pthread_cond_destroy(&cond) || + pthread_condattr_destroy(&condattr) || + pthread_mutexattr_destroy(&mutexattr)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex="POSIX/pthreads" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +main() { + pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + exit ( + pthread_condattr_init(&condattr) || + pthread_condattr_setpshared(&condattr, PTHREAD_PROCESS_SHARED) || + pthread_mutexattr_init(&mutexattr) || + pthread_mutexattr_setpshared(&mutexattr, PTHREAD_PROCESS_SHARED) || + pthread_cond_init(&cond, &condattr) || + pthread_mutex_init(&mutex, &mutexattr) || + pthread_mutex_lock(&mutex) || + pthread_mutex_unlock(&mutex) || + pthread_mutex_destroy(&mutex) || + pthread_cond_destroy(&cond) || + pthread_condattr_destroy(&condattr) || + pthread_mutexattr_destroy(&mutexattr)); +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_mutex="POSIX/pthreads" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + pthread_cond_t cond; + pthread_condattr_t condattr; + exit(pthread_condattr_init(&condattr) || + pthread_cond_init(&cond, &condattr)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_pthread_condinit_dupgood="yes" +else + db_cv_pthread_condinit_dupgood="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +main() { + pthread_cond_t cond; + pthread_condattr_t condattr; + exit(pthread_condattr_init(&condattr) || + pthread_cond_init(&cond, &condattr) || + pthread_cond_init(&cond, &condattr)); +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_pthread_condinit_dupgood="yes" +else + db_cv_pthread_condinit_dupgood="no" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + pthread_rwlock_t rwlock; + pthread_rwlockattr_t rwlockattr; + exit(pthread_rwlockattr_init(&rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_pthread_rwlockinit_dupgood="yes" +else + db_cv_pthread_rwlockinit_dupgood="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +main() { + pthread_rwlock_t rwlock; + pthread_rwlockattr_t rwlockattr; + exit(pthread_rwlockattr_init(&rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr)); +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_pthread_rwlockinit_dupgood="yes" +else + db_cv_pthread_rwlockinit_dupgood="no" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + fi + if test "$db_cv_mutex" = posix_only; then + +if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + exit ( + pthread_condattr_init(&condattr) || + pthread_mutexattr_init(&mutexattr) || + pthread_cond_init(&cond, &condattr) || + pthread_mutex_init(&mutex, &mutexattr) || + pthread_mutex_lock(&mutex) || + pthread_mutex_unlock(&mutex) || + pthread_mutex_destroy(&mutex) || + pthread_cond_destroy(&cond) || + pthread_condattr_destroy(&condattr) || + pthread_mutexattr_destroy(&mutexattr)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex="POSIX/pthreads/private" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +main() { + pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + exit ( + pthread_condattr_init(&condattr) || + pthread_mutexattr_init(&mutexattr) || + pthread_cond_init(&cond, &condattr) || + pthread_mutex_init(&mutex, &mutexattr) || + pthread_mutex_lock(&mutex) || + pthread_mutex_unlock(&mutex) || + pthread_mutex_destroy(&mutex) || + pthread_cond_destroy(&cond) || + pthread_condattr_destroy(&condattr) || + pthread_mutexattr_destroy(&mutexattr)); +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_mutex="POSIX/pthreads/private" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + pthread_cond_t cond; + pthread_condattr_t condattr; + exit(pthread_condattr_init(&condattr) || + pthread_cond_init(&cond, &condattr)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_pthread_condinit_dupgood="yes" +else + db_cv_pthread_condinit_dupgood="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +main() { + pthread_cond_t cond; + pthread_condattr_t condattr; + exit(pthread_condattr_init(&condattr) || + pthread_cond_init(&cond, &condattr) || + pthread_cond_init(&cond, &condattr)); +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_pthread_condinit_dupgood="yes" +else + db_cv_pthread_condinit_dupgood="no" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + pthread_rwlock_t rwlock; + pthread_rwlockattr_t rwlockattr; + exit(pthread_rwlockattr_init(&rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_pthread_rwlockinit_dupgood="yes" +else + db_cv_pthread_rwlockinit_dupgood="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +main() { + pthread_rwlock_t rwlock; + pthread_rwlockattr_t rwlockattr; + exit(pthread_rwlockattr_init(&rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr)); +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_pthread_rwlockinit_dupgood="yes" +else + db_cv_pthread_rwlockinit_dupgood="no" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + fi + if test "$db_cv_mutex" = posix_only; then + LIBS="$LIBS -lpthread" + +if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + exit ( + pthread_condattr_init(&condattr) || + pthread_mutexattr_init(&mutexattr) || + pthread_cond_init(&cond, &condattr) || + pthread_mutex_init(&mutex, &mutexattr) || + pthread_mutex_lock(&mutex) || + pthread_mutex_unlock(&mutex) || + pthread_mutex_destroy(&mutex) || + pthread_cond_destroy(&cond) || + pthread_condattr_destroy(&condattr) || + pthread_mutexattr_destroy(&mutexattr)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex="POSIX/pthreads/library/private" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +main() { + pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; + exit ( + pthread_condattr_init(&condattr) || + pthread_mutexattr_init(&mutexattr) || + pthread_cond_init(&cond, &condattr) || + pthread_mutex_init(&mutex, &mutexattr) || + pthread_mutex_lock(&mutex) || + pthread_mutex_unlock(&mutex) || + pthread_mutex_destroy(&mutex) || + pthread_cond_destroy(&cond) || + pthread_condattr_destroy(&condattr) || + pthread_mutexattr_destroy(&mutexattr)); +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_mutex="POSIX/pthreads/library/private" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + pthread_cond_t cond; + pthread_condattr_t condattr; + exit(pthread_condattr_init(&condattr) || + pthread_cond_init(&cond, &condattr)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_pthread_condinit_dupgood="yes" +else + db_cv_pthread_condinit_dupgood="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +main() { + pthread_cond_t cond; + pthread_condattr_t condattr; + exit(pthread_condattr_init(&condattr) || + pthread_cond_init(&cond, &condattr) || + pthread_cond_init(&cond, &condattr)); +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_pthread_condinit_dupgood="yes" +else + db_cv_pthread_condinit_dupgood="no" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + pthread_rwlock_t rwlock; + pthread_rwlockattr_t rwlockattr; + exit(pthread_rwlockattr_init(&rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_pthread_rwlockinit_dupgood="yes" +else + db_cv_pthread_rwlockinit_dupgood="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +main() { + pthread_rwlock_t rwlock; + pthread_rwlockattr_t rwlockattr; + exit(pthread_rwlockattr_init(&rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr) || + pthread_rwlock_init(&rwlock, &rwlockattr)); +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_pthread_rwlockinit_dupgood="yes" +else + db_cv_pthread_rwlockinit_dupgood="no" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + LIBS="$orig_libs" + fi + if test "$db_cv_mutex" = posix_only; then + as_fn_error $? "unable to find POSIX 1003.1 mutex interfaces" "$LINENO" 5 + fi + + # LWP threads: _lwp_XXX + if test "$db_cv_mutex" = no; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include +int +main () +{ + + static lwp_mutex_t mi = SHAREDMUTEX; + static lwp_cond_t ci = SHAREDCV; + lwp_mutex_t mutex = mi; + lwp_cond_t cond = ci; + exit ( + _lwp_mutex_lock(&mutex) || + _lwp_mutex_unlock(&mutex)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex=Solaris/lwp +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + fi + + # UI threads: thr_XXX + if test "$db_cv_mutex" = no -o "$db_cv_mutex" = ui_only; then + LIBS="$LIBS -lthread" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + #include +int +main () +{ + + mutex_t mutex; + cond_t cond; + int type = USYNC_PROCESS; + exit ( + mutex_init(&mutex, type, NULL) || + cond_init(&cond, type, NULL) || + mutex_lock(&mutex) || + mutex_unlock(&mutex)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex=UI/threads/library +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LIBS="$orig_libs" + fi + if test "$db_cv_mutex" = no -o "$db_cv_mutex" = ui_only; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + #include +int +main () +{ + + mutex_t mutex; + cond_t cond; + int type = USYNC_PROCESS; + exit ( + mutex_init(&mutex, type, NULL) || + cond_init(&cond, type, NULL) || + mutex_lock(&mutex) || + mutex_unlock(&mutex)); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex=UI/threads +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + fi + if test "$db_cv_mutex" = ui_only; then + as_fn_error $? "unable to find UI mutex interfaces" "$LINENO" 5 + fi + + # We're done testing for pthreads-style mutexes. Next, check for + # test-and-set mutexes. Check first for hybrid implementations, + # because we check for them even if we've already found a + # pthreads-style mutex and they're the most common architectures + # anyway. + # + # x86/gcc: FreeBSD, NetBSD, BSD/OS, Linux + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + #if (defined(i386) || defined(__i386__)) && defined(__GNUC__) + exit(0); + #else + FAIL TO COMPILE/LINK + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex="$db_cv_mutex/x86/gcc-assembly" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + # x86_64/gcc: FreeBSD, NetBSD, BSD/OS, Linux + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + #if (defined(x86_64) || defined(__x86_64__)) && defined(__GNUC__) + exit(0); + #else + FAIL TO COMPILE/LINK + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex="$db_cv_mutex/x86_64/gcc-assembly" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + # Solaris is one of the systems where we can configure hybrid mutexes. + # However, we require the membar_enter function for that, and only newer + # Solaris releases have it. Check to see if we can configure hybrids. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + #include +int +main () +{ + + typedef lock_t tsl_t; + lock_t x; + _lock_try(&x); + _lock_clear(&x); + membar_enter(); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex="$db_cv_mutex/Solaris/_lock_try/membar" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + + # Sparc/gcc: SunOS, Solaris, ultrasparc assembler support + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + #if defined(__sparc__) && defined(__GNUC__) + asm volatile ("membar #StoreStore|#StoreLoad|#LoadStore"); + exit(0); + #else + FAIL TO COMPILE/LINK + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex="$db_cv_mutex/Sparc/gcc-assembly" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + # We're done testing for any hybrid mutex implementations. If we did + # not find a pthreads-style mutex, but did find a test-and-set mutex, + # we set db_cv_mutex to "no/XXX" -- clean that up. + db_cv_mutex=`echo $db_cv_mutex | sed 's/^no\///'` +fi + +# If we still don't have a mutex implementation yet, continue testing for a +# test-and-set mutex implementation. + +# _lock_try/_lock_clear: Solaris +# On Solaris systems without other mutex interfaces, DB uses the undocumented +# _lock_try _lock_clear function calls instead of either the sema_trywait(3T) +# or sema_wait(3T) function calls. This is because of problems in those +# interfaces in some releases of the Solaris C library. +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + typedef lock_t tsl_t; + lock_t x; + _lock_try(&x); + _lock_clear(&x); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex=Solaris/_lock_try +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi + +# msemaphore: HPPA only +# Try HPPA before general msem test, it needs special alignment. +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +int +main () +{ + +#if defined(__hppa) + typedef msemaphore tsl_t; + msemaphore x; + msem_init(&x, 0); + msem_lock(&x, 0); + msem_unlock(&x, 0); + exit(0); +#else + FAIL TO COMPILE/LINK +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex=HP/msem_init +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi + +# msemaphore: AIX, OSF/1 +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + typedef msemaphore tsl_t; + msemaphore x; + msem_init(&x, 0); + msem_lock(&x, 0); + msem_unlock(&x, 0); + exit(0); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex=UNIX/msem_init +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi + +# ReliantUNIX +if test "$db_cv_mutex" = no; then +LIBS="$LIBS -lmproc" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +int +main () +{ + + typedef spinlock_t tsl_t; + spinlock_t x; + initspin(&x, 1); + cspinlock(&x); + spinunlock(&x); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex=ReliantUNIX/initspin +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS="$orig_libs" +fi + +# SCO: UnixWare has threads in libthread, but OpenServer doesn't. +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if defined(__USLC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex=SCO/x86/cc-assembly +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +# abilock_t: SGI +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +int +main () +{ + + typedef abilock_t tsl_t; + abilock_t x; + init_lock(&x); + acquire_lock(&x); + release_lock(&x); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex=SGI/init_lock +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi + +# sema_t: Solaris +# The sema_XXX calls do not work on Solaris 5.5. I see no reason to ever +# turn this test on, unless we find some other platform that uses the old +# POSIX.1 interfaces. +if test "$db_cv_mutex" = DOESNT_WORK; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +int +main () +{ + + typedef sema_t tsl_t; + sema_t x; + sema_init(&x, 1, USYNC_PROCESS, NULL); + sema_wait(&x); + sema_post(&x); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex=UNIX/sema_init +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi + +# _check_lock/_clear_lock: AIX +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +int +main () +{ + + int x; + _check_lock(&x,0,1); + _clear_lock(&x,0); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex=AIX/_check_lock +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi + +# _spin_lock_try/_spin_unlock: Apple/Darwin +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + int x; + _spin_lock_try(&x); + _spin_unlock(&x); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex=Darwin/_spin_lock_try +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi + +# Tru64/cc +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if defined(__alpha) && defined(__DECC) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex=Tru64/cc-assembly +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +# Alpha/gcc +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if defined(__alpha) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex=ALPHA/gcc-assembly +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +# ARM/gcc: Linux +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if defined(__arm__) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex=ARM/gcc-assembly +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +# MIPS/gcc: Linux +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if (defined(__mips) || defined(__mips__)) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex=MIPS/gcc-assembly +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +# PaRisc/gcc: HP/UX +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if (defined(__hppa) || defined(__hppa__)) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex=HPPA/gcc-assembly +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +# PPC/gcc: +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if (defined(__powerpc__) || defined(__ppc__)) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex=PPC/gcc-assembly +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +# 68K/gcc: SunOS +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if (defined(mc68020) || defined(sun3)) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex=68K/gcc-assembly +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +# S390/cc: IBM OS/390 Unix +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if defined(__MVS__) && defined(__IBMC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex=S390/cc-assembly +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +# S390/gcc: Linux +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if defined(__s390__) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex=S390/gcc-assembly +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +# ia64/gcc: Linux +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if defined(__ia64) && defined(__GNUC__) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex=ia64/gcc-assembly +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +# uts/cc: UTS +if test "$db_cv_mutex" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if defined(_UTS) + exit(0); +#else + FAIL TO COMPILE/LINK +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_mutex=UTS/cc-assembly +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +# UNIX fcntl system call mutexes. +# Note that fcntl mutexes are no longer supported as of 4.8. This code has been +# left in place in case there is some system that we are not aware of that +# only uses fcntl mutexes. In that case, contact Oracle for support. +if test "$db_cv_mutex" = no; then + db_cv_mutex=UNIX/fcntl +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +int +main () +{ + + struct flock l; + l.l_whence = SEEK_SET; + l.l_start = 10; + l.l_len = 1; + l.l_type = F_WRLCK; + fcntl(0, F_SETLK, &l); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_mutex=UNIX/fcntl +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_mutex" >&5 +$as_echo "$db_cv_mutex" >&6; } + +# Configure a pthreads-style mutex implementation. +hybrid=pthread +case "$db_cv_mutex" in +POSIX/pthreads/private*)ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_PTHREADS 1" >>confdefs.h + + if test "$db_cv_pthread_condinit_dupgood" = "yes"; then + $as_echo "#define HAVE_PTHREAD_COND_REINIT_OKAY 1" >>confdefs.h + + fi + if test "$db_cv_pthread_rwlockinit_dupgood" = "yes"; then + $as_echo "#define HAVE_PTHREAD_RWLOCK_REINIT_OKAY 1" >>confdefs.h + + fi + $as_echo "#define HAVE_MUTEX_THREAD_ONLY 1" >>confdefs.h + + ;; +POSIX/pthreads/library/private*) + ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_PTHREADS 1" >>confdefs.h + + if test "$db_cv_pthread_condinit_dupgood" = "yes"; then + $as_echo "#define HAVE_PTHREAD_COND_REINIT_OKAY 1" >>confdefs.h + + fi + if test "$db_cv_pthread_rwlockinit_dupgood" = "yes"; then + $as_echo "#define HAVE_PTHREAD_RWLOCK_REINIT_OKAY 1" >>confdefs.h + + fi + $as_echo "#define HAVE_MUTEX_THREAD_ONLY 1" >>confdefs.h +;; +POSIX/pthreads/library*)ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_PTHREADS 1" >>confdefs.h + + if test "$db_cv_pthread_condinit_dupgood" = "yes"; then + $as_echo "#define HAVE_PTHREAD_COND_REINIT_OKAY 1" >>confdefs.h + + fi + if test "$db_cv_pthread_rwlockinit_dupgood" = "yes"; then + $as_echo "#define HAVE_PTHREAD_RWLOCK_REINIT_OKAY 1" >>confdefs.h + + fi;; +POSIX/pthreads*) ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_PTHREADS 1" >>confdefs.h + + + if test "$db_cv_pthread_condinit_dupgood" = "yes"; then + $as_echo "#define HAVE_PTHREAD_COND_REINIT_OKAY 1" >>confdefs.h + + fi + if test "$db_cv_pthread_rwlockinit_dupgood" = "yes"; then + $as_echo "#define HAVE_PTHREAD_RWLOCK_REINIT_OKAY 1" >>confdefs.h + + fi + + ;; +Solaris/lwp*) ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_SOLARIS_LWP 1" >>confdefs.h + + ;; +UI/threads/library*) ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_UI_THREADS 1" >>confdefs.h +;; +*) hybrid=no;; +UI/threads*) ADDITIONAL_OBJS="mut_pthread${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_UI_THREADS 1" >>confdefs.h + + ;; +esac + +# Configure a test-and-set mutex implementation. +case "$db_cv_mutex" in +68K/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_68K_GCC_ASSEMBLY 1" >>confdefs.h + + ;; +AIX/_check_lock) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_AIX_CHECK_LOCK 1" >>confdefs.h + + ;; +Darwin/_spin_lock_try) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_DARWIN_SPIN_LOCK_TRY 1" >>confdefs.h + + ;; +ALPHA/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_ALPHA_GCC_ASSEMBLY 1" >>confdefs.h + + ;; +ARM/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_ARM_GCC_ASSEMBLY 1" >>confdefs.h + + ;; +HP/msem_init) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_HPPA_MSEM_INIT 1" >>confdefs.h + + ;; +HPPA/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_HPPA_GCC_ASSEMBLY 1" >>confdefs.h + + ;; +ia64/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_IA64_GCC_ASSEMBLY 1" >>confdefs.h + + ;; +MIPS/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_MIPS_GCC_ASSEMBLY 1" >>confdefs.h + + ;; +PPC/gcc-assembly) + ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_PPC_GCC_ASSEMBLY 1" >>confdefs.h + + ;; +ReliantUNIX/initspin) LIBSO_LIBS="$LIBSO_LIBS -lmproc" + ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_RELIANTUNIX_INITSPIN 1" >>confdefs.h + + ;; +S390/cc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_S390_CC_ASSEMBLY 1" >>confdefs.h + + ;; +S390/gcc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_S390_GCC_ASSEMBLY 1" >>confdefs.h + + ;; +SCO/x86/cc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_SCO_X86_CC_ASSEMBLY 1" >>confdefs.h + + ;; +SGI/init_lock) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_SGI_INIT_LOCK 1" >>confdefs.h + + ;; +Solaris/_lock_try) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_SOLARIS_LOCK_TRY 1" >>confdefs.h + + ;; +*Solaris/_lock_try/membar) + hybrid="$hybrid/tas" + ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_SOLARIS_LOCK_TRY 1" >>confdefs.h + + ;; +*Sparc/gcc-assembly) hybrid="$hybrid/tas" + ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_SPARC_GCC_ASSEMBLY 1" >>confdefs.h + + ;; +Tru64/cc-assembly) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_TRU64_CC_ASSEMBLY 1" >>confdefs.h + + ;; +UNIX/msem_init) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_MSEM_INIT 1" >>confdefs.h + + ;; +UNIX/sema_init) ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_SEMA_INIT 1" >>confdefs.h + + ;; +UTS/cc-assembly) ADDITIONAL_OBJS="uts4.cc${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_UTS_CC_ASSEMBLY 1" >>confdefs.h + + ;; +*x86/gcc-assembly) hybrid="$hybrid/tas" + ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_X86_GCC_ASSEMBLY 1" >>confdefs.h + + ;; +*x86_64/gcc-assembly) hybrid="$hybrid/tas" + ADDITIONAL_OBJS="mut_tas${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_X86_64_GCC_ASSEMBLY 1" >>confdefs.h + + ;; +esac + +# Configure the remaining special cases. +case "$db_cv_mutex" in +UNIX/fcntl) as_fn_error $? "Support for FCNTL mutexes was removed in BDB 4.8." "$LINENO" 5 + ADDITIONAL_OBJS="mut_fcntl${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_FCNTL 1" >>confdefs.h + + ;; +win32) ADDITIONAL_OBJS="mut_win32${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_WIN32 1" >>confdefs.h + + ;; +win32/gcc) ADDITIONAL_OBJS="mut_win32${o} $ADDITIONAL_OBJS" + $as_echo "#define HAVE_MUTEX_WIN32_GCC 1" >>confdefs.h + + ;; +esac + +# Mutexes may not have been found, or may have been disabled. +case "$db_cv_mutex" in +disabled) + ;; +*) + # Test to see if mutexes have been found by checking the list of + # additional objects for a mutex implementation. + case "$ADDITIONAL_OBJS" in + *mut_pthread*|*mut_tas*|*mut_win32*) + $as_echo "#define HAVE_MUTEX_SUPPORT 1" >>confdefs.h + + + + # Shared latches are required in 4.8, and are implemented using + # mutexes if we don't have a native implementation. + # This macro may be removed in a future release. + + $as_echo "#define HAVE_SHARED_LATCHES 1" >>confdefs.h +;; + *) + as_fn_error $? "Unable to find a mutex implementation" "$LINENO" 5;; + esac +esac + +# We may have found both a pthreads-style mutex implementation as well as a +# test-and-set, in which case configure for the hybrid. +if test "$hybrid" = pthread/tas; then + $as_echo "#define HAVE_MUTEX_HYBRID 1" >>confdefs.h + + +fi + +# The mutex selection may require specific declarations -- we fill in most of +# them above, but here are the common ones. +# +# The mutex selection may tell us what kind of thread package we're using, +# which we use to figure out the thread type. +# +# If we're configured for the POSIX pthread API, then force the thread ID type +# and include function, regardless of the mutex selection. Ditto for the +# (default) Solaris lwp mutexes, because they don't have a way to return the +# thread ID. +# +# Try and link with a threads library if possible. The problem is the Solaris +# C library has UI/POSIX interface stubs, but they're broken, configuring them +# for inter-process mutexes doesn't return an error, but it doesn't work either. +# For that reason always add -lpthread if we're using pthread calls or mutexes +# and there's a pthread library. +# +# We can't depend on any specific call existing (pthread_create, for example), +# as it may be #defined in an include file -- OSF/1 (Tru64) has this problem. + + + +db_threadid_t_decl=notset + +case "$db_cv_mutex" in +UI/threads*) + thread_h_decl="#include " + db_threadid_t_decl="typedef thread_t db_threadid_t;" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lthread" >&5 +$as_echo_n "checking for main in -lthread... " >&6; } +if ${ac_cv_lib_thread_main+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lthread $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_thread_main=yes +else + ac_cv_lib_thread_main=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_thread_main" >&5 +$as_echo "$ac_cv_lib_thread_main" >&6; } +if test "x$ac_cv_lib_thread_main" = xyes; then : + LIBSO_LIBS="$LIBSO_LIBS -lthread" +fi +ac_cv_lib_thread=ac_cv_lib_thread_main +;; +*) + ac_fn_c_check_header_mongrel "$LINENO" "pthread.h" "ac_cv_header_pthread_h" "$ac_includes_default" +if test "x$ac_cv_header_pthread_h" = xyes; then : + ac_cv_header_pthread_h=yes +fi + + + if test "$ac_cv_header_pthread_h" = "yes" ; then + thread_h_decl="#include " + db_threadid_t_decl="typedef pthread_t db_threadid_t;" + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lpthread" >&5 +$as_echo_n "checking for main in -lpthread... " >&6; } +if ${ac_cv_lib_pthread_main+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpthread $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pthread_main=yes +else + ac_cv_lib_pthread_main=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_main" >&5 +$as_echo "$ac_cv_lib_pthread_main" >&6; } +if test "x$ac_cv_lib_pthread_main" = xyes; then : + LIBSO_LIBS="$LIBSO_LIBS -lpthread" +fi +ac_cv_lib_pthread=ac_cv_lib_pthread_main +;; +esac + +# We need to know if the thread ID type will fit into an integral type and we +# can compare it for equality and generally treat it like an int, or if it's a +# non-integral type and we have to treat it like a structure or other untyped +# block of bytes. For example, MVS typedef's pthread_t to a structure. + +if test "$db_threadid_t_decl" = notset; then + db_threadid_t_decl="typedef uintmax_t db_threadid_t;" + $as_echo "#define HAVE_SIMPLE_THREAD_TYPE 1" >>confdefs.h + +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + $thread_h_decl +int +main () +{ + + $db_threadid_t_decl + db_threadid_t a; + a = 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + $as_echo "#define HAVE_SIMPLE_THREAD_TYPE 1" >>confdefs.h + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +# There are 3 classes of mutexes: +# +# 1: Mutexes requiring no cleanup, for example, test-and-set mutexes. +# 2: Mutexes that must be destroyed, but which don't hold permanent system +# resources, for example, pthread mutexes on MVS aka OS/390 aka z/OS. +# 3: Mutexes that must be destroyed, even after the process is gone, for +# example, pthread mutexes on QNX and binary semaphores on VxWorks. +# +# DB cannot currently distinguish between #2 and #3 because DB does not know +# if the application is running environment recovery as part of startup and +# does not need to do cleanup, or if the environment is being removed and/or +# recovered in a loop in the application, and so does need to clean up. If +# we get it wrong, we're going to call the mutex destroy routine on a random +# piece of memory, which usually works, but just might drop core. For now, +# we group #2 and #3 into the HAVE_MUTEX_SYSTEM_RESOURCES define, until we +# have a better solution or reason to solve this in a general way -- so far, +# the places we've needed to handle this are few. + + +case "$host_os$db_cv_mutex" in +*qnx*POSIX/pthread*|openedition*POSIX/pthread*) + $as_echo "#define HAVE_MUTEX_SYSTEM_RESOURCES 1" >>confdefs.h +;; +esac + +# Check for native (system call or instruction set) support for +# atomic increment, decrement, and compare & exchange. + +# Probe for native atomic operations +# gcc/x86{,_64} inline asm +# solaris atomic_* library calls + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for atomic operations" >&5 +$as_echo_n "checking for atomic operations... " >&6; } +if ${db_cv_atomic+:} false; then : + $as_echo_n "(cached) " >&6 +else + +db_cv_atomic=no +# atomic operations can be disabled via --disable-atomicsupport +if test "$db_cv_build_atomicsupport" = no; then + db_cv_atomic=disabled +fi + +# The MinGW build uses the Windows API for atomic operations +if test "$db_cv_mingw" = yes; then + db_cv_atomic=mingw +fi + +if test "$db_cv_atomic" = no; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + #if ((defined(i386) || defined(__i386__)) && defined(__GNUC__)) + exit(0); + #elif ((defined(x86_64) || defined(__x86_64__)) && defined(__GNUC__)) + exit(0); + #else + FAIL TO COMPILE/LINK + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + db_cv_atomic="x86/gcc-assembly" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +if test "$db_cv_atomic" = no; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +int +main () +{ + + volatile unsigned val = 1; + exit (atomic_inc_uint_nv(&val) != 2 || + atomic_dec_uint_nv(&val) != 1 || + atomic_cas_32(&val, 1, 3) != 3); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_atomic="solaris/atomic" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_atomic" >&5 +$as_echo "$db_cv_atomic" >&6; } + +case "$db_cv_atomic" in + x86/gcc-assembly) + $as_echo "#define HAVE_ATOMIC_SUPPORT 1" >>confdefs.h + + $as_echo "#define HAVE_ATOMIC_X86_GCC_ASSEMBLY 1" >>confdefs.h + + ;; + + solaris/atomic) + $as_echo "#define HAVE_ATOMIC_SUPPORT 1" >>confdefs.h + + $as_echo "#define HAVE_ATOMIC_SOLARIS 1" >>confdefs.h + + ;; + mingw) + $as_echo "#define HAVE_ATOMIC_SUPPORT 1" >>confdefs.h + + ;; +esac + + +# Check for os-specific event support for performance monitoring such as +# DTrace or SystemTap. + + + + + + +if test "$db_cv_systemtap" = "yes" ; then + if test "$DTRACE" != "dtrace"; then + as_fn_error $? "The dtrace program is missing; is systemtap v1.1 or better installed?" "$LINENO" 5 + fi + db_cv_dtrace="yes" +fi +if test "$db_cv_dtrace" = "yes" ; then + db_cv_perfmon="yes" +fi + + +DTRACE_CPP=-C +if test "$db_cv_perfmon" = "yes" ; then + if test "$DTRACE" = "dtrace" ; then + for ac_header in sys/sdt.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "sys/sdt.h" "ac_cv_header_sys_sdt_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_sdt_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_SYS_SDT_H 1 +_ACEOF + +fi + +done + + # Generate the DTrace provider header file. This is duplicated + # in Makefile.in, to allow custom events to be added. + if test "$STAP" = "stap"; then + # Linux DTrace support may have a bug with dtrace -C -h + # The preprocessing isn't needed for -h on Linux, + # so skip the unnecessary preprocessing. + DTRACE_CPP= + fi + # The OS X version of dtrace prints a spurious line here. + if ! dtrace -h $DTRACE_CPP -I../util/dtrace -s ../dist/db_provider.d; then + as_fn_error $? "Could not build db_provider.d: dtrace -h failed" "$LINENO" 5 + fi + $RM db_provider.h.tmp + if ! mv db_provider.h db_provider.h.tmp ; then + as_fn_error $? "Could not build db_provider.d: mv failed" "$LINENO" 5 + elif ! sed -e \ +'/^#define[ ]*BDB_[A-Z_]*(.*)/y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/' \ +db_provider.h.tmp > db_provider.h ; then + as_fn_error $? "Could not build db_provider.d: sed failed" "$LINENO" 5 + fi + + # DTrace on Solaris needs to post-process .o files to both + # generate an additional .o as well as resolving the + # __dtrace___bdb___ symbols before putting them into + # libraries; Mac OS X does not. Treat a failing dtrace -G + # command as the indicator sign that dtrace -G is unnecessary. + # If it is needed then create an empty .c file to be a + # placeholder for the PIC & non-PIC versions of the dtrace -G + # output file. The root of this .c file must be the same as + # the root of the .d file -- i.e. db_provider -- for the + # dtrace -G lines at the end of Makefile.in to work correctly. + $RM db_provider.o + if dtrace -G $DTRACE_CPP -I../util/dtrace -s ../dist/db_provider.d 2> /dev/null && \ + test -f db_provider.o ; then + FINAL_OBJS="$FINAL_OBJS db_provider${o}" + rm -f db_provider.c + echo "" > db_provider.c + fi + $as_echo "#define HAVE_DTRACE 1" >>confdefs.h + + else + as_fn_error $? "No supported performance utility found." "$LINENO" 5 + fi + $as_echo "#define HAVE_PERFMON 1" >>confdefs.h + + if test "$db_cv_perfmon_statistics" != "no" ; then + $as_echo "#define HAVE_PERFMON_STATISTICS 1" >>confdefs.h + + fi + # The method by which probes are listed depends on the underlying + # implementation; Linux's emulation of DTrace still uses the stap + # command at runtime. + + + if test "$STAP" = "stap"; then + LISTPROBES_DEPENDENCY=.libs/libdb-$DB_VERSION_MAJOR.$DB_VERSION_MINOR$SOSUFFIX + LISTPROBES_COMMAND="stap -l 'process(\"$LISTPROBES_DEPENDENCY\").mark(\"*\")'" + elif test "$DTRACE" = "dtrace" ; then + LISTPROBES_DEPENDENCY=db_load + LISTPROBES_COMMAND=" # Run a simple command which uses the library without needing any setup. + sleep 1 | dtrace -l -n 'bdb\$\$target:::' -c '.libs/db_load dummy.db'" + fi +elif test "$db_cv_perfmon_statistics" = "yes" ; then + as_fn_error $? "Enabling perfmon statistics requires --enable-dtrace" "$LINENO" 5 +fi + + +# Test for various functions/libraries -- do tests that change library values +# first. +# +# Update LIBS, so we're testing against the current list of libraries. +LIBS="$LIBSO_LIBS" + +# The yield function on Solaris is almost certainly pthread_yield (LWP threads +# or POSIX pthreads), or thr_yield (UI threads). There's an outside chance it +# is sched_yield() though, only available in -lrt on Solaris. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing sched_yield" >&5 +$as_echo_n "checking for library containing sched_yield... " >&6; } +if ${ac_cv_search_sched_yield+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char sched_yield (); +int +main () +{ +return sched_yield (); + ; + return 0; +} +_ACEOF +for ac_lib in '' rt; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_sched_yield=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_sched_yield+:} false; then : + break +fi +done +if ${ac_cv_search_sched_yield+:} false; then : + +else + ac_cv_search_sched_yield=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_sched_yield" >&5 +$as_echo "$ac_cv_search_sched_yield" >&6; } +ac_res=$ac_cv_search_sched_yield +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +fi + + +# The Berkeley DB library calls fdatasync, only available in -lrt on Solaris. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing fdatasync" >&5 +$as_echo_n "checking for library containing fdatasync... " >&6; } +if ${ac_cv_search_fdatasync+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char fdatasync (); +int +main () +{ +return fdatasync (); + ; + return 0; +} +_ACEOF +for ac_lib in '' rt; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_fdatasync=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_fdatasync+:} false; then : + break +fi +done +if ${ac_cv_search_fdatasync+:} false; then : + +else + ac_cv_search_fdatasync=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_fdatasync" >&5 +$as_echo "$ac_cv_search_fdatasync" >&6; } +ac_res=$ac_cv_search_fdatasync +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing getaddrinfo" >&5 +$as_echo_n "checking for library containing getaddrinfo... " >&6; } +if ${ac_cv_search_getaddrinfo+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char getaddrinfo (); +int +main () +{ +return getaddrinfo (); + ; + return 0; +} +_ACEOF +for ac_lib in '' nsl socket; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_getaddrinfo=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_getaddrinfo+:} false; then : + break +fi +done +if ${ac_cv_search_getaddrinfo+:} false; then : + +else + ac_cv_search_getaddrinfo=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_getaddrinfo" >&5 +$as_echo "$ac_cv_search_getaddrinfo" >&6; } +ac_res=$ac_cv_search_getaddrinfo +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing hstrerror" >&5 +$as_echo_n "checking for library containing hstrerror... " >&6; } +if ${ac_cv_search_hstrerror+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char hstrerror (); +int +main () +{ +return hstrerror (); + ; + return 0; +} +_ACEOF +for ac_lib in '' resolv; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_hstrerror=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_hstrerror+:} false; then : + break +fi +done +if ${ac_cv_search_hstrerror+:} false; then : + +else + ac_cv_search_hstrerror=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_hstrerror" >&5 +$as_echo "$ac_cv_search_hstrerror" >&6; } +ac_res=$ac_cv_search_hstrerror +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +fi + + +# Those tests updated LIBS, update our internal list. +LIBSO_LIBS="$LIBS" + +# !!! +# We could be more exact about whether these libraries are needed, but don't +# bother -- if they exist, we load them, it's only the test programs anyway. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lm" >&5 +$as_echo_n "checking for main in -lm... " >&6; } +if ${ac_cv_lib_m_main+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lm $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_m_main=yes +else + ac_cv_lib_m_main=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_main" >&5 +$as_echo "$ac_cv_lib_m_main" >&6; } +if test "x$ac_cv_lib_m_main" = xyes; then : + TEST_LIBS="$TEST_LIBS -lm" +fi +ac_cv_lib_m=ac_cv_lib_m_main + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lnsl" >&5 +$as_echo_n "checking for main in -lnsl... " >&6; } +if ${ac_cv_lib_nsl_main+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lnsl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_nsl_main=yes +else + ac_cv_lib_nsl_main=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_nsl_main" >&5 +$as_echo "$ac_cv_lib_nsl_main" >&6; } +if test "x$ac_cv_lib_nsl_main" = xyes; then : + TEST_LIBS="$TEST_LIBS -lnsl" +fi +ac_cv_lib_nsl=ac_cv_lib_nsl_main + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lsocket" >&5 +$as_echo_n "checking for main in -lsocket... " >&6; } +if ${ac_cv_lib_socket_main+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsocket $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_socket_main=yes +else + ac_cv_lib_socket_main=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_socket_main" >&5 +$as_echo "$ac_cv_lib_socket_main" >&6; } +if test "x$ac_cv_lib_socket_main" = xyes; then : + TEST_LIBS="$TEST_LIBS -lsocket" +fi +ac_cv_lib_socket=ac_cv_lib_socket_main + + +# Checks for system functions for which we have replacements. +# +# The only portable getcwd call is getcwd(char *, size_t), where the +# buffer is non-NULL -- Solaris can't handle a NULL buffer, and they +# deleted getwd(). +ac_fn_c_check_func "$LINENO" "abort" "ac_cv_func_abort" +if test "x$ac_cv_func_abort" = xyes; then : + $as_echo "#define HAVE_ABORT 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" abort.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS abort.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "atoi" "ac_cv_func_atoi" +if test "x$ac_cv_func_atoi" = xyes; then : + $as_echo "#define HAVE_ATOI 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" atoi.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS atoi.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "atol" "ac_cv_func_atol" +if test "x$ac_cv_func_atol" = xyes; then : + $as_echo "#define HAVE_ATOL 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" atol.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS atol.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "bsearch" "ac_cv_func_bsearch" +if test "x$ac_cv_func_bsearch" = xyes; then : + $as_echo "#define HAVE_BSEARCH 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" bsearch.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS bsearch.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "getcwd" "ac_cv_func_getcwd" +if test "x$ac_cv_func_getcwd" = xyes; then : + $as_echo "#define HAVE_GETCWD 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" getcwd.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS getcwd.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "getenv" "ac_cv_func_getenv" +if test "x$ac_cv_func_getenv" = xyes; then : + $as_echo "#define HAVE_GETENV 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" getenv.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS getenv.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "getopt" "ac_cv_func_getopt" +if test "x$ac_cv_func_getopt" = xyes; then : + $as_echo "#define HAVE_GETOPT 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" getopt.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS getopt.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "isalpha" "ac_cv_func_isalpha" +if test "x$ac_cv_func_isalpha" = xyes; then : + $as_echo "#define HAVE_ISALPHA 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" isalpha.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS isalpha.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "isdigit" "ac_cv_func_isdigit" +if test "x$ac_cv_func_isdigit" = xyes; then : + $as_echo "#define HAVE_ISDIGIT 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" isdigit.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS isdigit.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "isprint" "ac_cv_func_isprint" +if test "x$ac_cv_func_isprint" = xyes; then : + $as_echo "#define HAVE_ISPRINT 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" isprint.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS isprint.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "isspace" "ac_cv_func_isspace" +if test "x$ac_cv_func_isspace" = xyes; then : + $as_echo "#define HAVE_ISSPACE 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" isspace.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS isspace.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "memcmp" "ac_cv_func_memcmp" +if test "x$ac_cv_func_memcmp" = xyes; then : + $as_echo "#define HAVE_MEMCMP 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" memcmp.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS memcmp.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "memcpy" "ac_cv_func_memcpy" +if test "x$ac_cv_func_memcpy" = xyes; then : + $as_echo "#define HAVE_MEMCPY 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" memcpy.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS memcpy.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "memmove" "ac_cv_func_memmove" +if test "x$ac_cv_func_memmove" = xyes; then : + $as_echo "#define HAVE_MEMMOVE 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" memmove.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS memmove.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "printf" "ac_cv_func_printf" +if test "x$ac_cv_func_printf" = xyes; then : + $as_echo "#define HAVE_PRINTF 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" printf.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS printf.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "qsort" "ac_cv_func_qsort" +if test "x$ac_cv_func_qsort" = xyes; then : + $as_echo "#define HAVE_QSORT 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" qsort.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS qsort.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "raise" "ac_cv_func_raise" +if test "x$ac_cv_func_raise" = xyes; then : + $as_echo "#define HAVE_RAISE 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" raise.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS raise.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "rand" "ac_cv_func_rand" +if test "x$ac_cv_func_rand" = xyes; then : + $as_echo "#define HAVE_RAND 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" rand.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS rand.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "strcasecmp" "ac_cv_func_strcasecmp" +if test "x$ac_cv_func_strcasecmp" = xyes; then : + $as_echo "#define HAVE_STRCASECMP 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" strcasecmp.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strcasecmp.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "strcat" "ac_cv_func_strcat" +if test "x$ac_cv_func_strcat" = xyes; then : + $as_echo "#define HAVE_STRCAT 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" strcat.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strcat.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "strchr" "ac_cv_func_strchr" +if test "x$ac_cv_func_strchr" = xyes; then : + $as_echo "#define HAVE_STRCHR 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" strchr.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strchr.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "strdup" "ac_cv_func_strdup" +if test "x$ac_cv_func_strdup" = xyes; then : + $as_echo "#define HAVE_STRDUP 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" strdup.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strdup.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "strerror" "ac_cv_func_strerror" +if test "x$ac_cv_func_strerror" = xyes; then : + $as_echo "#define HAVE_STRERROR 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" strerror.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strerror.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "strncat" "ac_cv_func_strncat" +if test "x$ac_cv_func_strncat" = xyes; then : + $as_echo "#define HAVE_STRNCAT 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" strncat.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strncat.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "strncmp" "ac_cv_func_strncmp" +if test "x$ac_cv_func_strncmp" = xyes; then : + $as_echo "#define HAVE_STRNCMP 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" strncmp.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strncmp.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "strrchr" "ac_cv_func_strrchr" +if test "x$ac_cv_func_strrchr" = xyes; then : + $as_echo "#define HAVE_STRRCHR 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" strrchr.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strrchr.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "strsep" "ac_cv_func_strsep" +if test "x$ac_cv_func_strsep" = xyes; then : + $as_echo "#define HAVE_STRSEP 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" strsep.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strsep.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "strtol" "ac_cv_func_strtol" +if test "x$ac_cv_func_strtol" = xyes; then : + $as_echo "#define HAVE_STRTOL 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" strtol.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strtol.$ac_objext" + ;; +esac + +fi + +ac_fn_c_check_func "$LINENO" "strtoul" "ac_cv_func_strtoul" +if test "x$ac_cv_func_strtoul" = xyes; then : + $as_echo "#define HAVE_STRTOUL 1" >>confdefs.h + +else + case " $LIBOBJS " in + *" strtoul.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strtoul.$ac_objext" + ;; +esac + +fi + + + +# Check for system functions we optionally use. +for ac_func in \ + _fstati64 backtrace backtrace_symbols directio fchmod fclose\ + fcntl fdatasync fgetc fgets fopen fwrite getgid\ + getrusage getuid hstrerror mprotect pstat_getdynamic\ + pthread_self pthread_yield random sched_yield select setgid setuid\ + sigaction snprintf stat sysconf vsnprintf yield +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + + + +for ac_func in gettimeofday localtime time strftime +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + +# AIX 4.3 will link applications with calls to clock_gettime, but the +# calls always fail. +case "$host_os" in +aix4.3.*) + ;; +*) + for ac_func in clock_gettime +do : + ac_fn_c_check_func "$LINENO" "clock_gettime" "ac_cv_func_clock_gettime" +if test "x$ac_cv_func_clock_gettime" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_CLOCK_GETTIME 1 +_ACEOF + +fi +done +;; +esac + +# clock_gettime -- monotonic clocks. +# Check to see if we can get a monotonic clock. We actually try and +# run the program if possible, because we don't trust the #define's +# existence to mean the clock really exists. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for clock_gettime monotonic clock" >&5 +$as_echo_n "checking for clock_gettime monotonic clock... " >&6; } +if ${db_cv_clock_monotonic+:} false; then : + $as_echo_n "(cached) " >&6 +else + +if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +int +main () +{ + +struct timespec t; +clock_gettime(CLOCK_MONOTONIC, &t); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_clock_monotonic=yes +else + db_cv_clock_monotonic=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +main() { + struct timespec t; + return (clock_gettime(CLOCK_MONOTONIC, &t) != 0); +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_clock_monotonic=yes +else + db_cv_clock_monotonic=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_clock_monotonic" >&5 +$as_echo "$db_cv_clock_monotonic" >&6; } +if test "$db_cv_clock_monotonic" = "yes"; then + $as_echo "#define HAVE_CLOCK_MONOTONIC 1" >>confdefs.h + + +fi + +# ctime_r -- +# +# There are two versions of ctime_r, one of which takes a buffer length as a +# third argument, and one which only takes two arguments. (There is also a +# difference in return values and the type of the 3rd argument, but we handle +# those problems in the code itself.) +for ac_func in ctime_r +do : + ac_fn_c_check_func "$LINENO" "ctime_r" "ac_cv_func_ctime_r" +if test "x$ac_cv_func_ctime_r" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_CTIME_R 1 +_ACEOF + +fi +done + +if test "$ac_cv_func_ctime_r" = "yes"; then +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for 2 or 3 argument version of ctime_r" >&5 +$as_echo_n "checking for 2 or 3 argument version of ctime_r... " >&6; } +if ${db_cv_ctime_r_3arg+:} false; then : + $as_echo_n "(cached) " >&6 +else + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +int +main () +{ + + ctime_r(NULL, NULL, 100); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_ctime_r_3arg="3-argument" +else + db_cv_ctime_r_3arg="2-argument" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_ctime_r_3arg" >&5 +$as_echo "$db_cv_ctime_r_3arg" >&6; } +fi +if test "$db_cv_ctime_r_3arg" = "3-argument"; then + $as_echo "#define HAVE_CTIME_R_3ARG 1" >>confdefs.h + + +fi + + +# Ftruncate. +# We've run into a problem with ftruncate on Alpha/Tru64, the issue is that +# after a truncate the last page of the file mmaps as all zeros. So just don't +# use ftruncate. +case "$host_os" in +osf*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: ftruncate ignored on $host_os-$host_vendor." >&5 +$as_echo "$as_me: WARNING: ftruncate ignored on $host_os-$host_vendor." >&2;};; +*) + for ac_func in ftruncate +do : + ac_fn_c_check_func "$LINENO" "ftruncate" "ac_cv_func_ftruncate" +if test "x$ac_cv_func_ftruncate" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_FTRUNCATE 1 +_ACEOF + +fi +done +;; +esac + +# Pread/pwrite. +# HP-UX has pread/pwrite, but it doesn't work with largefile support. +# NCR's version of System V R 4.3 has pread/pwrite symbols, but no support. +case "$host_os-$host_vendor" in +hpux*|sysv4.3*-ncr) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: pread/pwrite interfaces ignored on $host_os-$host_vendor." >&5 +$as_echo "$as_me: WARNING: pread/pwrite interfaces ignored on $host_os-$host_vendor." >&2;};; +*) + for ac_func in pread pwrite +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done +;; +esac + +# Check for getaddrinfo; do the test explicitly instead of using AC_CHECK_FUNCS +# because isn't a standard include file. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for getaddrinfo" >&5 +$as_echo_n "checking for getaddrinfo... " >&6; } +if ${db_cv_getaddrinfo+:} false; then : + $as_echo_n "(cached) " >&6 +else + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + getaddrinfo(0, 0, 0, 0); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_getaddrinfo=yes +else + db_cv_getaddrinfo=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_getaddrinfo" >&5 +$as_echo "$db_cv_getaddrinfo" >&6; } +if test "$db_cv_getaddrinfo" = "yes"; then + $as_echo "#define HAVE_GETADDRINFO 1" >>confdefs.h + + +fi + +# Check for the fcntl F_SETFD flag to deny child process access to file +# descriptors. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fcntl/F_SETFD" >&5 +$as_echo_n "checking for fcntl/F_SETFD... " >&6; } +if ${db_cv_fcntl_f_setfd+:} false; then : + $as_echo_n "(cached) " >&6 +else + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + fcntl(1, F_SETFD, 1); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_fcntl_f_setfd=yes +else + db_cv_fcntl_f_setfd=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_fcntl_f_setfd" >&5 +$as_echo "$db_cv_fcntl_f_setfd" >&6; } +if test "$db_cv_fcntl_f_setfd" = "yes"; then + $as_echo "#define HAVE_FCNTL_F_SETFD 1" >>confdefs.h + + +fi + +# A/UX has a broken getopt(3). +case "$host_os" in +aux*) case " $LIBOBJS " in + *" getopt.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS getopt.$ac_objext" + ;; +esac +;; +esac + +# Linux has a broken O_DIRECT flag, but you can't detect it at configure time. +# Linux and SGI require buffer alignment we may not match, otherwise writes +# will fail. Default to not using the O_DIRECT flag. +if test "$db_cv_o_direct" = "yes"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for open/O_DIRECT" >&5 +$as_echo_n "checking for open/O_DIRECT... " >&6; } +if ${db_cv_open_o_direct+:} false; then : + $as_echo_n "(cached) " >&6 +else + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + #include +int +main () +{ + + open("a", O_RDONLY | O_DIRECT, 0); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_open_o_direct=yes +else + db_cv_open_o_direct=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_open_o_direct" >&5 +$as_echo "$db_cv_open_o_direct" >&6; } + if test \ + "$db_cv_o_direct" = "yes" -a "$db_cv_open_o_direct" = "yes"; then + $as_echo "#define HAVE_O_DIRECT 1" >>confdefs.h + + + fi +fi + +# Check for largefile support. +# Check whether --enable-largefile was given. +if test "${enable_largefile+set}" = set; then : + enableval=$enable_largefile; +fi + +if test "$enable_largefile" != no; then + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for special C compiler options needed for large files" >&5 +$as_echo_n "checking for special C compiler options needed for large files... " >&6; } +if ${ac_cv_sys_largefile_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_sys_largefile_CC=no + if test "$GCC" != yes; then + ac_save_CC=$CC + while :; do + # IRIX 6.2 and later do not support large files by default, + # so use the C compiler's -n32 option if that helps. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main () +{ + + ; + return 0; +} +_ACEOF + if ac_fn_c_try_compile "$LINENO"; then : + break +fi +rm -f core conftest.err conftest.$ac_objext + CC="$CC -n32" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_sys_largefile_CC=' -n32'; break +fi +rm -f core conftest.err conftest.$ac_objext + break + done + CC=$ac_save_CC + rm -f conftest.$ac_ext + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_largefile_CC" >&5 +$as_echo "$ac_cv_sys_largefile_CC" >&6; } + if test "$ac_cv_sys_largefile_CC" != no; then + CC=$CC$ac_cv_sys_largefile_CC + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _FILE_OFFSET_BITS value needed for large files" >&5 +$as_echo_n "checking for _FILE_OFFSET_BITS value needed for large files... " >&6; } +if ${ac_cv_sys_file_offset_bits+:} false; then : + $as_echo_n "(cached) " >&6 +else + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_sys_file_offset_bits=no; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#define _FILE_OFFSET_BITS 64 +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_sys_file_offset_bits=64; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_cv_sys_file_offset_bits=unknown + break +done +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_file_offset_bits" >&5 +$as_echo "$ac_cv_sys_file_offset_bits" >&6; } +case $ac_cv_sys_file_offset_bits in #( + no | unknown) ;; + *) +cat >>confdefs.h <<_ACEOF +#define _FILE_OFFSET_BITS $ac_cv_sys_file_offset_bits +_ACEOF +;; +esac +rm -rf conftest* + if test $ac_cv_sys_file_offset_bits = unknown; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _LARGE_FILES value needed for large files" >&5 +$as_echo_n "checking for _LARGE_FILES value needed for large files... " >&6; } +if ${ac_cv_sys_large_files+:} false; then : + $as_echo_n "(cached) " >&6 +else + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_sys_large_files=no; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#define _LARGE_FILES 1 +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_sys_large_files=1; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_cv_sys_large_files=unknown + break +done +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_large_files" >&5 +$as_echo "$ac_cv_sys_large_files" >&6; } +case $ac_cv_sys_large_files in #( + no | unknown) ;; + *) +cat >>confdefs.h <<_ACEOF +#define _LARGE_FILES $ac_cv_sys_large_files +_ACEOF +;; +esac +rm -rf conftest* + fi +fi + + +# Figure out how to create shared regions. +# +# First, we look for mmap. +# +# BSD/OS has mlock(2), but it doesn't work until the 4.1 release. +# +# Nextstep (version 3.3) apparently supports mmap(2) (the mmap symbol +# is defined in the C library) but does not support munmap(2). Don't +# try to use mmap if we can't find munmap. +# +# Ultrix has mmap(2), but it doesn't work. +mmap_ok=no +case "$host_os" in +bsdi3*|bsdi4.0) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: mlock(2) interface ignored on $host_os-$host_vendor." >&5 +$as_echo "$as_me: WARNING: mlock(2) interface ignored on $host_os-$host_vendor." >&2;} + mmap_ok=yes + for ac_func in mmap munmap +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +else + mmap_ok=no +fi +done +;; +ultrix*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: mmap(2) interface ignored on $host_os-$host_vendor." >&5 +$as_echo "$as_me: WARNING: mmap(2) interface ignored on $host_os-$host_vendor." >&2;};; +*) + mmap_ok=yes + for ac_func in mlock munlock +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + for ac_func in mmap munmap +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +else + mmap_ok=no +fi +done +;; +esac + +# Second, we look for shmget. +# +# SunOS has the shmget(2) interfaces, but there appears to be a missing +# #include file, so we ignore them. +shmget_ok=no +case "$host_os" in +sunos*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: shmget(2) interface ignored on $host_os-$host_vendor." >&5 +$as_echo "$as_me: WARNING: shmget(2) interface ignored on $host_os-$host_vendor." >&2;};; +*) + shmget_ok=yes + for ac_func in shmget +do : + ac_fn_c_check_func "$LINENO" "shmget" "ac_cv_func_shmget" +if test "x$ac_cv_func_shmget" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_SHMGET 1 +_ACEOF + +else + shmget_ok=no +fi +done + + + # Check for shmctl to lock down shared memory segments. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for shmctl" >&5 +$as_echo_n "checking for shmctl... " >&6; } +if ${db_cv_shmctl_shm_lock+:} false; then : + $as_echo_n "(cached) " >&6 +else + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#include +int +main () +{ + + shmctl(0, SHM_LOCK, NULL); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_shmctl_shm_lock=yes +else + db_cv_shmctl_shm_lock=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_shmctl_shm_lock" >&5 +$as_echo "$db_cv_shmctl_shm_lock" >&6; } + if test "$db_cv_shmctl_shm_lock" = "yes"; then + $as_echo "#define HAVE_SHMCTL_SHM_LOCK 1" >>confdefs.h + + + fi;; +esac + +# We require either mmap/munmap(2) or shmget(2). +if test "$mmap_ok" = "no" -a "$shmget_ok" = "no"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Neither mmap/munmap(2) or shmget(2) library functions." >&5 +$as_echo "$as_me: WARNING: Neither mmap/munmap(2) or shmget(2) library functions." >&2;} +fi + +# Optional Tcl support. +if test "$db_cv_tcl" = "yes"; then + + + if ${ac_cv_c_tclconfig+:} false; then : + $as_echo_n "(cached) " >&6 +else + + + # First check to see if --with-tclconfig was specified. + if test "${with_tclconfig}" != no; then + if test -f "${with_tclconfig}/tclConfig.sh" ; then + ac_cv_c_tclconfig=`(cd ${with_tclconfig}; pwd)` + else + as_fn_error $? "${with_tclconfig} directory doesn't contain tclConfig.sh" "$LINENO" 5 + fi + fi + + # check in a few common install locations + if test x"${ac_cv_c_tclconfig}" = x ; then + for i in `ls -d /usr/local/lib 2>/dev/null` ; do + if test -f "$i/tclConfig.sh" ; then + ac_cv_c_tclconfig=`(cd $i; pwd)` + break + fi + done + fi + + +fi + + + if test x"${ac_cv_c_tclconfig}" = x ; then + TCL_BIN_DIR="# no Tcl configs found" + as_fn_error $? "can't find Tcl configuration definitions" "$LINENO" 5 + else + TCL_BIN_DIR=${ac_cv_c_tclconfig} + fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for existence of $TCL_BIN_DIR/tclConfig.sh" >&5 +$as_echo_n "checking for existence of $TCL_BIN_DIR/tclConfig.sh... " >&6; } + + if test -f "$TCL_BIN_DIR/tclConfig.sh" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: loading" >&5 +$as_echo "loading" >&6; } + . $TCL_BIN_DIR/tclConfig.sh + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: file not found" >&5 +$as_echo "file not found" >&6; } + fi + + # DB requires at least version 8.4. + if test ${TCL_MAJOR_VERSION} -lt 8 \ + -o ${TCL_MAJOR_VERSION} -eq 8 -a ${TCL_MINOR_VERSION} -lt 4; then + as_fn_error $? "Berkeley DB requires Tcl version 8.4 or better." "$LINENO" 5 + fi + + # The eval is required to do substitution (for example, the TCL_DBGX + # substitution in the TCL_LIB_FILE variable. + eval "TCL_INCLUDE_SPEC=\"${TCL_INCLUDE_SPEC}\"" + eval "TCL_LIB_FILE=\"${TCL_LIB_FILE}\"" + eval "TCL_LIB_FLAG=\"${TCL_LIB_FLAG}\"" + eval "TCL_LIB_SPEC=\"${TCL_LIB_SPEC}\"" + + # + # If the DB Tcl library isn't loaded with the Tcl spec and library + # flags on AIX, the resulting libdb_tcl-X.Y.so.0 will drop core at + # load time. [#4843] Furthermore, with Tcl 8.3, the link flags + # given by the Tcl spec are insufficient for our use. [#5779],[#17109] + # + case "$host_os" in + aix*) + LIBTSO_LIBS="$LIBTSO_LIBS $TCL_LIB_SPEC $TCL_LIB_FLAG" + LIBTSO_LIBS="$LIBTSO_LIBS -L$TCL_EXEC_PREFIX/lib -ltcl$TCL_VERSION";; + esac + + + + + + + TCL_TCLSH="${TCL_PREFIX}/bin/tclsh${TCL_VERSION}" + + + if test "$enable_shared" = "no"; then + DEFAULT_LIB_TCL="\$(libtcl_version)" + fi + if test "$enable_shared" = "yes"; then + DEFAULT_LIB_TCL="\$(libtso_target)" + fi + INSTALL_LIBS="$INSTALL_LIBS $DEFAULT_LIB_TCL" + if test "$enable_static" = "yes"; then + INSTALL_LIBS="$INSTALL_LIBS \$(libtcl)" + fi + +fi + +# Optional sequence code. + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for 64-bit integral type support for sequences" >&5 +$as_echo_n "checking for 64-bit integral type support for sequences... " >&6; } + + db_cv_build_sequence="yes" + + # Have to have found 64-bit types to support sequences. If we don't + # find the native types, we try and create our own. + if test "$ac_cv_type_int64_t" = "no" -a -z "$int64_decl"; then + db_cv_build_sequence="no" + fi + if test "$ac_cv_type_uint64_t" = "no" -a -z "$u_int64_decl"; then + db_cv_build_sequence="no" + fi + + # Figure out what type is the right size, and set the format. + + + db_cv_seq_type="no" + if test "$db_cv_build_sequence" = "yes" -a\ + "$ac_cv_sizeof_long" -eq "8"; then + db_cv_seq_type="long" + db_cv_seq_fmt='"%ld"' + db_cv_seq_ufmt='"%lu"' + INT64_FMT='#define INT64_FMT "%ld"' + UINT64_FMT='#define UINT64_FMT "%lu"' + else if test "$db_cv_build_sequence" = "yes" -a\ + "$ac_cv_sizeof_long_long" -eq "8"; then + db_cv_seq_type="long long" + db_cv_seq_fmt='"%lld"' + db_cv_seq_ufmt='"%llu"' + INT64_FMT='#define INT64_FMT "%lld"' + UINT64_FMT='#define UINT64_FMT "%llu"' + else + db_cv_build_sequence="no" + fi + fi + + # Test to see if we can declare variables of the appropriate size + # and format them. If we're cross-compiling, all we get is a link + # test, which won't test for the appropriate printf format strings. + if test "$db_cv_build_sequence" = "yes"; then + if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + $db_cv_seq_type l; + unsigned $db_cv_seq_type u; + char buf[100]; + + buf[0] = 'a'; + l = 9223372036854775807LL; + (void)snprintf(buf, sizeof(buf), $db_cv_seq_fmt, l); + if (strcmp(buf, "9223372036854775807")) + return (1); + u = 18446744073709551615ULL; + (void)snprintf(buf, sizeof(buf), $db_cv_seq_ufmt, u); + if (strcmp(buf, "18446744073709551615")) + return (1); + return (0); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + +else + db_cv_build_sequence="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + main() { + $db_cv_seq_type l; + unsigned $db_cv_seq_type u; + char buf[100]; + + buf[0] = 'a'; + l = 9223372036854775807LL; + (void)snprintf(buf, sizeof(buf), $db_cv_seq_fmt, l); + if (strcmp(buf, "9223372036854775807")) + return (1); + u = 18446744073709551615ULL; + (void)snprintf(buf, sizeof(buf), $db_cv_seq_ufmt, u); + if (strcmp(buf, "18446744073709551615")) + return (1); + return (0); + } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + db_cv_build_sequence="no" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + fi + if test "$db_cv_build_sequence" = "yes"; then + + db_seq_decl="typedef $db_cv_seq_type db_seq_t;"; + + $as_echo "#define HAVE_64BIT_TYPES 1" >>confdefs.h + + + else + # It still has to compile, but it won't run. + db_seq_decl="typedef int db_seq_t;"; + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_build_sequence" >&5 +$as_echo "$db_cv_build_sequence" >&6; } + + +# Detect whether a large mmap() supports automatically extending the accessible +# region after growing the underlying file. + + + + + +if test "$mmap_ok" = "yes" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for growing a file under an mmap region" >&5 +$as_echo_n "checking for growing a file under an mmap region... " >&6; } + + db_cv_mmap_extend="no" + + if test "$cross_compiling" = yes; then : + db_cv_mmap_extend="no" +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + /* + * Most mmap() implemenations allow you to map in a region which is much + * larger than the underlying file. Only the second less than the actual + * file size is accessible -- a SIGSEV typically results when attemping + * a memory reference between EOF and the end of the mapped region. + * One can extend the file to allow references into higher-addressed + * sections of the region. However this automatic extension of the + * addressible memory is beyond what POSIX requires. This function detects + * whether mmap supports this automatic extension. If not (e.g. cygwin) + * then the entire (hopefully sparse) file will need to be written before + * the first mmap. + */ + /* Not all these includes are needed, but the minimal set varies from + * system to system. + */ + #include + #include + #include + #include + #include + #include + #include + + #define TEST_MMAP_BUFSIZE (16 * 1024) + #define TEST_MMAP_EXTENDSIZE (16 * 1024 * 1024) + #ifndef MAP_FAILED + #define MAP_FAILED (-1) + #endif + + int catch_sig(sig) + int sig; + { + exit(1); + } + + main() { + const char *underlying; + unsigned gapsize; + char *base; + int count, fd, i, mode, open_flags, ret, total_size; + char buf[TEST_MMAP_BUFSIZE]; + + gapsize = 1024; + underlying = ".mmap_config"; + (void) unlink(underlying); + + open_flags = O_CREAT | O_TRUNC | O_RDWR; + mode = S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH; + + if ((fd = open(underlying, open_flags, mode)) < 0) { + perror("open"); + return (1); + } + + total_size = TEST_MMAP_EXTENDSIZE; + + memset(buf, 0, sizeof(buf)); + if ((count = write(fd, buf, sizeof(buf))) != sizeof(buf)) { + perror("initial write"); + return (2); + } + + if ((base = mmap(NULL, total_size, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) { + perror("mmap"); + return (3); + } + + /* Extend the file with just 1 byte */ + if (lseek(fd, total_size - 1, SEEK_SET) < 0 || + (count = write(fd, buf, 1)) != 1) { + perror("extending write"); + return (4); + } + + (void) signal(SIGSEGV, catch_sig); + (void) signal(SIGBUS, catch_sig); + + for (i = sizeof(buf); i < total_size; i += gapsize) + base[i] = 'A'; + + close(fd); + (void) unlink(underlying); + return (0); + } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + db_cv_mmap_extend="yes" +else + db_cv_mmap_extend="no" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + + + if test "$db_cv_mmap_extend" = yes; then + $as_echo "#define HAVE_MMAP_EXTEND 1" >>confdefs.h + + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_mmap_extend" >&5 +$as_echo "$db_cv_mmap_extend" >&6; } +fi + + +# Optional DB 1.85 compatibility API. +if test "$db_cv_compat185" = "yes"; then + ADDITIONAL_INCS="db_185.h $ADDITIONAL_INCS" + + ADDITIONAL_OBJS="db185${o} $ADDITIONAL_OBJS" +fi + +# Optional utilities. +if test "$db_cv_dump185" = "yes"; then + ADDITIONAL_PROGS="db_dump185 $ADDITIONAL_PROGS" +fi + +# Log checksums can be disabled to increase performance +if test "$db_cv_log_checksum" = "yes"; then + $as_echo "#define HAVE_LOG_CHECKSUM 1" >>confdefs.h + + +fi + +# You can disable pieces of functionality to save space. +# +# Btree is always configured: it is the standard method, and Hash off-page +# duplicates require it. +ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(BTREE_OBJS)" + +# Compression can be disabled. +if test "$db_cv_build_compression" = "yes"; then + $as_echo "#define HAVE_COMPRESSION 1" >>confdefs.h + + +fi + +# Partitioning can be disabled. +if test "$db_cv_build_partition" = "yes"; then + $as_echo "#define HAVE_PARTITION 1" >>confdefs.h + + +fi + +# Hash can be disabled. +if test "$db_cv_build_hash" = "yes"; then + $as_echo "#define HAVE_HASH 1" >>confdefs.h + + + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(HASH_OBJS)" + if test "$db_cv_build_verify" = "yes"; then + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(HASH_VRFY_OBJS)" + fi +else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS hash_stub${o}" +fi + +# Heap can be disabled. +if test "$db_cv_build_heap" = "yes"; then + $as_echo "#define HAVE_HEAP 1" >>confdefs.h + + + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(HEAP_OBJS)" + if test "$db_cv_build_verify" = "yes"; then + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(HEAP_VRFY_OBJS)" + fi +else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS heap_stub${o}" +fi + +# Queue can be disabled. +if test "$db_cv_build_queue" = "yes"; then + $as_echo "#define HAVE_QUEUE 1" >>confdefs.h + + + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(QUEUE_OBJS)" + if test "$db_cv_build_verify" = "yes"; then + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(QUEUE_VRFY_OBJS)" + fi +else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS qam_stub${o}" +fi + +# Replication can be disabled. +if test "$db_cv_build_replication" = "yes"; then + $as_echo "#define HAVE_REPLICATION 1" >>confdefs.h + + + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(REP_OBJS)" + + # If we're building replication and detected POSIX threads, build the + # replication manager. + + + if test "$ac_cv_header_pthread_h" = yes; then + $as_echo "#define HAVE_REPLICATION_THREADS 1" >>confdefs.h + + + if test "$with_stacksize" != "no"; then + +cat >>confdefs.h <<_ACEOF +#define DB_STACKSIZE $with_stacksize +_ACEOF + + fi + + # Solaris requires the socket and nsl libraries to build the + # replication manager. Don't add nsl regardless of the OS, + # it causes RPC to fail on AIX 4.3.3. + case "$host_os" in + solaris*) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lnsl" >&5 +$as_echo_n "checking for main in -lnsl... " >&6; } +if ${ac_cv_lib_nsl_main+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lnsl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_nsl_main=yes +else + ac_cv_lib_nsl_main=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_nsl_main" >&5 +$as_echo "$ac_cv_lib_nsl_main" >&6; } +if test "x$ac_cv_lib_nsl_main" = xyes; then : + LIBSO_LIBS="$LIBSO_LIBS -lnsl" +fi +ac_cv_lib_nsl=ac_cv_lib_nsl_main + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lsocket" >&5 +$as_echo_n "checking for main in -lsocket... " >&6; } +if ${ac_cv_lib_socket_main+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsocket $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_socket_main=yes +else + ac_cv_lib_socket_main=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_socket_main" >&5 +$as_echo "$ac_cv_lib_socket_main" >&6; } +if test "x$ac_cv_lib_socket_main" = xyes; then : + LIBSO_LIBS="$LIBSO_LIBS -lsocket" +fi +ac_cv_lib_socket=ac_cv_lib_socket_main +;; + esac + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(REPMGR_OBJS)" + else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS repmgr_stub${o}" + fi +else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS rep_stub${o} repmgr_stub${o}" +fi + +# The statistics code can be disabled. +if test "$db_cv_build_statistics" = "yes"; then + $as_echo "#define HAVE_STATISTICS 1" >>confdefs.h + + +fi + +# The verification code can be disabled. +if test "$db_cv_build_verify" = "yes"; then + $as_echo "#define HAVE_VERIFY 1" >>confdefs.h + + + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(BTREE_VRFY_OBJS) \$(LOG_VRFY_OBJS)" +else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS db_vrfy_stub${o} log_verify_stub${o}" +fi + +# The crypto code can be disabled. +if test -d "$topdir/src/crypto" -a "$db_cv_build_cryptography" != "no"; then + $as_echo "#define HAVE_CRYPTO 1" >>confdefs.h + + + + CRYPTO_OBJS="\$(CRYPTO_OBJS)" + + if test "$db_cv_build_cryptography" = "ipp"; then + ac_fn_c_check_header_mongrel "$LINENO" "ippcp.h" "ac_cv_header_ippcp_h" "$ac_includes_default" +if test "x$ac_cv_header_ippcp_h" = xyes; then : + +else + as_fn_error $? "\ +The 'ippcp.h' header file required for IPP cryptography support was not found \ +in the configured include path." "$LINENO" 5 +fi + + + $as_echo "#define HAVE_CRYPTO_IPP 1" >>confdefs.h + + + fi +else + CRYPTO_OBJS="crypto_stub${o}" +fi + +# The mutex code can be disabled, and if there aren't any mutexes, then there's +# no reason to include the locking code. +if test "$db_cv_build_mutexsupport" = "yes"; then + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(LOCK_OBJS) \$(MUTEX_OBJS)" +else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS lock_stub${o} mut_stub${o}" +fi + +# If DIAGNOSTIC is defined, include the log print routines in the library +# itself, various diagnostic modes use them. +if test "$db_cv_diagnostic" = "yes"; then + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(PRINT_OBJS)" + PRINTLOG_OBJS="" +else + PRINTLOG_OBJS="\$(PRINT_OBJS)" +fi + +# If building for QNX, we need additional OS files. +if test "$qnx_build" = "yes"; then + ADDITIONAL_OBJS="$ADDITIONAL_OBJS os_qnx_fsync${o} os_qnx_open${o}" +fi + +# The DBM API can be disabled. +if test "$db_cv_dbm" = "yes"; then + $as_echo "#define HAVE_DBM 1" >>confdefs.h + + + ADDITIONAL_OBJS="$ADDITIONAL_OBJS dbm${o} hsearch${o}" +fi + +# The output and error messages can be stripped. +if test "$db_cv_stripped_messages" = "yes"; then + $as_echo "#define HAVE_STRIPPED_MESSAGES 1" >>confdefs.h + + +fi + +# The output and error messages can be localized. +if test "$db_cv_localization" = "yes"; then + $as_echo "#define HAVE_LOCALIZATION 1" >>confdefs.h + + +fi + +# We need to add the additional object files into the Makefile with the correct +# suffix. We can't use $LTLIBOBJS itself, because that variable has $U encoded +# in it for automake, and that's not what we want. See SR #7227 for additional +# information. +# +# XXX: I'm not sure this is correct. +REPLACEMENT_OBJS=`echo "$LIBOBJS" | + sed "s,\.[^.]* ,$o ,g;s,\.[^.]*$,$o,"` + +# This is necessary so that .o files in LIBOBJS are also built via +# the ANSI2KNR-filtering rules. +LIBOBJS=`echo "$LIBOBJS" | + sed 's,\.[^.]* ,$U&,g;s,\.[^.]*$,$U&,'` +LTLIBOBJS=`echo "$LIBOBJS" | + sed 's,\.[^.]* ,.lo ,g;s,\.[^.]*$,.lo,'` + + +# Initial output file list. +CREATE_LIST="Makefile + db_cxx.h:$topdir/src/dbinc/db_cxx.in + db_int.h:$topdir/src/dbinc/db_int.in + clib_port.h:$topdir/dist/clib_port.in + include.tcl:$topdir/test/tcl/include.tcl" + +# Create the db.h file from a source file, a list of global function +# prototypes, and, if configured for unique names, a list of #defines +# to do DB_VERSION_UNIQUE_NAME substitution. +if test "$db_cv_uniquename" = "yes"; then + CREATE_LIST="$CREATE_LIST + db.h:$topdir/src/dbinc/db.in:$topdir/src/dbinc_auto/api_flags.in:$topdir/src/dbinc_auto/ext_def.in:$topdir/src/dbinc_auto/ext_prot.in" +else + CREATE_LIST="$CREATE_LIST + db.h:$topdir/src/dbinc/db.in:$topdir/src/dbinc_auto/api_flags.in:$topdir/src/dbinc_auto/ext_prot.in" +fi + +# If configured for unique names, create the db_int_uext.h file (which +# does the DB_VERSION_UNIQUE_NAME substitution), which is included by +# the db_int.h file. +if test "$db_cv_uniquename" = "yes"; then + CREATE_LIST="$CREATE_LIST + db_int_def.h:$topdir/src/dbinc_auto/int_def.in" + db_int_def='#include "db_int_def.h"' +fi + +# Create the db_185.h and db185_int.h files from source files, a list of +# global function prototypes, and, if configured for unique names, a list +# of #defines to do DB_VERSION_UNIQUE_NAME substitution. +if test "$db_cv_compat185" = "yes"; then + if test "$db_cv_uniquename" = "yes"; then + CREATE_LIST="$CREATE_LIST + db_185.h:$topdir/src/dbinc/db_185.in:$topdir/src/dbinc_auto/ext_185_def.in:$topdir/src/dbinc_auto/ext_185_prot.in + db185_int.h:$topdir/lang/db185/db185_int.in:$topdir/src/dbinc_auto/ext_185_def.in:$topdir/src/dbinc_auto/ext_185_prot.in" + else + CREATE_LIST="$CREATE_LIST + db_185.h:$topdir/src/dbinc/db_185.in:$topdir/src/dbinc_auto/ext_185_prot.in + db185_int.h:$topdir/lang/db185/db185_int.in:$topdir/src/dbinc_auto/ext_185_prot.in" + fi +fi + +if test "$db_cv_stl" = "yes"; then + CREATE_LIST="$CREATE_LIST + dbstl_common.h:$topdir/lang/cxx/stl/dbstl_common.in" +fi + +if test "x$subdirs" != "x"; then + subdir_cmd="@for d in ${subdirs}; do (cd \$\$d && \${MAKE} \$@) ; done" +fi + +ac_config_files="$ac_config_files $CREATE_LIST" + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -p'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -p' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -p' + fi +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +if test -x / >/dev/null 2>&1; then + as_test_x='test -x' +else + if ls -dL / >/dev/null 2>&1; then + as_ls_L_option=L + else + as_ls_L_option= + fi + as_test_x=' + eval sh -c '\'' + if test -d "$1"; then + test -d "$1/."; + else + case $1 in #( + -*)set "./$1";; + esac; + case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #(( + ???[sx]*):;;*)false;;esac;fi + '\'' sh + ' +fi +as_executable_p=$as_test_x + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by Berkeley DB $as_me 5.2.28, which was +generated by GNU Autoconf 2.68. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration commands: +$config_commands + +Report bugs to ." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +Berkeley DB config.status 5.2.28 +configured by $0, generated by GNU Autoconf 2.68, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2010 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +AWK='$AWK' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# +# INIT-COMMANDS +# + + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`' +macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`' +enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`' +enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`' +pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`' +enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`' +SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`' +ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`' +host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`' +host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`' +host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`' +build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`' +build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`' +build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`' +SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`' +Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`' +GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`' +EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`' +FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`' +LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`' +NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`' +LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`' +max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`' +ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`' +exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`' +lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`' +lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`' +lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`' +lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_quote_subst"`' +lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`' +reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`' +reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`' +OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`' +deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`' +file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`' +file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`' +want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`' +DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`' +sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`' +AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`' +AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`' +archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`' +STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`' +RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`' +old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED "$delay_single_quote_subst"`' +old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED "$delay_single_quote_subst"`' +lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED "$delay_single_quote_subst"`' +CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`' +CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`' +compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`' +GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED "$delay_single_quote_subst"`' +nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED "$delay_single_quote_subst"`' +lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`' +objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`' +MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED "$delay_single_quote_subst"`' +lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED "$delay_single_quote_subst"`' +need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`' +MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`' +DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`' +NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`' +LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`' +OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`' +OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`' +libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`' +shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`' +extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED "$delay_single_quote_subst"`' +enable_shared_with_static_runtimes='`$ECHO "$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`' +export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED "$delay_single_quote_subst"`' +whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED "$delay_single_quote_subst"`' +compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED "$delay_single_quote_subst"`' +old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`' +archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED "$delay_single_quote_subst"`' +module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`' +module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED "$delay_single_quote_subst"`' +with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`' +allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`' +no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec_ld='`$ECHO "$hardcode_libdir_flag_spec_ld" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`' +hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`' +hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`' +hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED "$delay_single_quote_subst"`' +hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED "$delay_single_quote_subst"`' +hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED "$delay_single_quote_subst"`' +inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`' +link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED "$delay_single_quote_subst"`' +always_export_symbols='`$ECHO "$always_export_symbols" | $SED "$delay_single_quote_subst"`' +export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED "$delay_single_quote_subst"`' +exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`' +include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`' +prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`' +postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`' +file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`' +variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED "$delay_single_quote_subst"`' +need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`' +need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`' +version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`' +runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED "$delay_single_quote_subst"`' +libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`' +library_names_spec='`$ECHO "$library_names_spec" | $SED "$delay_single_quote_subst"`' +soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`' +install_override_mode='`$ECHO "$install_override_mode" | $SED "$delay_single_quote_subst"`' +postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED "$delay_single_quote_subst"`' +postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`' +finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`' +hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED "$delay_single_quote_subst"`' +sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED "$delay_single_quote_subst"`' +sys_lib_dlsearch_path_spec='`$ECHO "$sys_lib_dlsearch_path_spec" | $SED "$delay_single_quote_subst"`' +hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`' +enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED "$delay_single_quote_subst"`' +old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`' +striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_dirs='`$ECHO "$compiler_lib_search_dirs" | $SED "$delay_single_quote_subst"`' +predep_objects='`$ECHO "$predep_objects" | $SED "$delay_single_quote_subst"`' +postdep_objects='`$ECHO "$postdep_objects" | $SED "$delay_single_quote_subst"`' +predeps='`$ECHO "$predeps" | $SED "$delay_single_quote_subst"`' +postdeps='`$ECHO "$postdeps" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_path='`$ECHO "$compiler_lib_search_path" | $SED "$delay_single_quote_subst"`' +LD_CXX='`$ECHO "$LD_CXX" | $SED "$delay_single_quote_subst"`' +reload_flag_CXX='`$ECHO "$reload_flag_CXX" | $SED "$delay_single_quote_subst"`' +reload_cmds_CXX='`$ECHO "$reload_cmds_CXX" | $SED "$delay_single_quote_subst"`' +old_archive_cmds_CXX='`$ECHO "$old_archive_cmds_CXX" | $SED "$delay_single_quote_subst"`' +compiler_CXX='`$ECHO "$compiler_CXX" | $SED "$delay_single_quote_subst"`' +GCC_CXX='`$ECHO "$GCC_CXX" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_no_builtin_flag_CXX='`$ECHO "$lt_prog_compiler_no_builtin_flag_CXX" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_pic_CXX='`$ECHO "$lt_prog_compiler_pic_CXX" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_wl_CXX='`$ECHO "$lt_prog_compiler_wl_CXX" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_static_CXX='`$ECHO "$lt_prog_compiler_static_CXX" | $SED "$delay_single_quote_subst"`' +lt_cv_prog_compiler_c_o_CXX='`$ECHO "$lt_cv_prog_compiler_c_o_CXX" | $SED "$delay_single_quote_subst"`' +archive_cmds_need_lc_CXX='`$ECHO "$archive_cmds_need_lc_CXX" | $SED "$delay_single_quote_subst"`' +enable_shared_with_static_runtimes_CXX='`$ECHO "$enable_shared_with_static_runtimes_CXX" | $SED "$delay_single_quote_subst"`' +export_dynamic_flag_spec_CXX='`$ECHO "$export_dynamic_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' +whole_archive_flag_spec_CXX='`$ECHO "$whole_archive_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' +compiler_needs_object_CXX='`$ECHO "$compiler_needs_object_CXX" | $SED "$delay_single_quote_subst"`' +old_archive_from_new_cmds_CXX='`$ECHO "$old_archive_from_new_cmds_CXX" | $SED "$delay_single_quote_subst"`' +old_archive_from_expsyms_cmds_CXX='`$ECHO "$old_archive_from_expsyms_cmds_CXX" | $SED "$delay_single_quote_subst"`' +archive_cmds_CXX='`$ECHO "$archive_cmds_CXX" | $SED "$delay_single_quote_subst"`' +archive_expsym_cmds_CXX='`$ECHO "$archive_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`' +module_cmds_CXX='`$ECHO "$module_cmds_CXX" | $SED "$delay_single_quote_subst"`' +module_expsym_cmds_CXX='`$ECHO "$module_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`' +with_gnu_ld_CXX='`$ECHO "$with_gnu_ld_CXX" | $SED "$delay_single_quote_subst"`' +allow_undefined_flag_CXX='`$ECHO "$allow_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`' +no_undefined_flag_CXX='`$ECHO "$no_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec_CXX='`$ECHO "$hardcode_libdir_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec_ld_CXX='`$ECHO "$hardcode_libdir_flag_spec_ld_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_separator_CXX='`$ECHO "$hardcode_libdir_separator_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_direct_CXX='`$ECHO "$hardcode_direct_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_direct_absolute_CXX='`$ECHO "$hardcode_direct_absolute_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_minus_L_CXX='`$ECHO "$hardcode_minus_L_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_shlibpath_var_CXX='`$ECHO "$hardcode_shlibpath_var_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_automatic_CXX='`$ECHO "$hardcode_automatic_CXX" | $SED "$delay_single_quote_subst"`' +inherit_rpath_CXX='`$ECHO "$inherit_rpath_CXX" | $SED "$delay_single_quote_subst"`' +link_all_deplibs_CXX='`$ECHO "$link_all_deplibs_CXX" | $SED "$delay_single_quote_subst"`' +always_export_symbols_CXX='`$ECHO "$always_export_symbols_CXX" | $SED "$delay_single_quote_subst"`' +export_symbols_cmds_CXX='`$ECHO "$export_symbols_cmds_CXX" | $SED "$delay_single_quote_subst"`' +exclude_expsyms_CXX='`$ECHO "$exclude_expsyms_CXX" | $SED "$delay_single_quote_subst"`' +include_expsyms_CXX='`$ECHO "$include_expsyms_CXX" | $SED "$delay_single_quote_subst"`' +prelink_cmds_CXX='`$ECHO "$prelink_cmds_CXX" | $SED "$delay_single_quote_subst"`' +postlink_cmds_CXX='`$ECHO "$postlink_cmds_CXX" | $SED "$delay_single_quote_subst"`' +file_list_spec_CXX='`$ECHO "$file_list_spec_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_action_CXX='`$ECHO "$hardcode_action_CXX" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_dirs_CXX='`$ECHO "$compiler_lib_search_dirs_CXX" | $SED "$delay_single_quote_subst"`' +predep_objects_CXX='`$ECHO "$predep_objects_CXX" | $SED "$delay_single_quote_subst"`' +postdep_objects_CXX='`$ECHO "$postdep_objects_CXX" | $SED "$delay_single_quote_subst"`' +predeps_CXX='`$ECHO "$predeps_CXX" | $SED "$delay_single_quote_subst"`' +postdeps_CXX='`$ECHO "$postdeps_CXX" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_path_CXX='`$ECHO "$compiler_lib_search_path_CXX" | $SED "$delay_single_quote_subst"`' + +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in SHELL \ +ECHO \ +SED \ +GREP \ +EGREP \ +FGREP \ +LD \ +NM \ +LN_S \ +lt_SP2NL \ +lt_NL2SP \ +reload_flag \ +OBJDUMP \ +deplibs_check_method \ +file_magic_cmd \ +file_magic_glob \ +want_nocaseglob \ +DLLTOOL \ +sharedlib_from_linklib_cmd \ +AR \ +AR_FLAGS \ +archiver_list_spec \ +STRIP \ +RANLIB \ +CC \ +CFLAGS \ +compiler \ +lt_cv_sys_global_symbol_pipe \ +lt_cv_sys_global_symbol_to_cdecl \ +lt_cv_sys_global_symbol_to_c_name_address \ +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \ +nm_file_list_spec \ +lt_prog_compiler_no_builtin_flag \ +lt_prog_compiler_pic \ +lt_prog_compiler_wl \ +lt_prog_compiler_static \ +lt_cv_prog_compiler_c_o \ +need_locks \ +MANIFEST_TOOL \ +DSYMUTIL \ +NMEDIT \ +LIPO \ +OTOOL \ +OTOOL64 \ +shrext_cmds \ +export_dynamic_flag_spec \ +whole_archive_flag_spec \ +compiler_needs_object \ +with_gnu_ld \ +allow_undefined_flag \ +no_undefined_flag \ +hardcode_libdir_flag_spec \ +hardcode_libdir_flag_spec_ld \ +hardcode_libdir_separator \ +exclude_expsyms \ +include_expsyms \ +file_list_spec \ +variables_saved_for_relink \ +libname_spec \ +library_names_spec \ +soname_spec \ +install_override_mode \ +finish_eval \ +old_striplib \ +striplib \ +compiler_lib_search_dirs \ +predep_objects \ +postdep_objects \ +predeps \ +postdeps \ +compiler_lib_search_path \ +LD_CXX \ +reload_flag_CXX \ +compiler_CXX \ +lt_prog_compiler_no_builtin_flag_CXX \ +lt_prog_compiler_pic_CXX \ +lt_prog_compiler_wl_CXX \ +lt_prog_compiler_static_CXX \ +lt_cv_prog_compiler_c_o_CXX \ +export_dynamic_flag_spec_CXX \ +whole_archive_flag_spec_CXX \ +compiler_needs_object_CXX \ +with_gnu_ld_CXX \ +allow_undefined_flag_CXX \ +no_undefined_flag_CXX \ +hardcode_libdir_flag_spec_CXX \ +hardcode_libdir_flag_spec_ld_CXX \ +hardcode_libdir_separator_CXX \ +exclude_expsyms_CXX \ +include_expsyms_CXX \ +file_list_spec_CXX \ +compiler_lib_search_dirs_CXX \ +predep_objects_CXX \ +postdep_objects_CXX \ +predeps_CXX \ +postdeps_CXX \ +compiler_lib_search_path_CXX; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in reload_cmds \ +old_postinstall_cmds \ +old_postuninstall_cmds \ +old_archive_cmds \ +extract_expsyms_cmds \ +old_archive_from_new_cmds \ +old_archive_from_expsyms_cmds \ +archive_cmds \ +archive_expsym_cmds \ +module_cmds \ +module_expsym_cmds \ +export_symbols_cmds \ +prelink_cmds \ +postlink_cmds \ +postinstall_cmds \ +postuninstall_cmds \ +finish_cmds \ +sys_lib_search_path_spec \ +sys_lib_dlsearch_path_spec \ +reload_cmds_CXX \ +old_archive_cmds_CXX \ +old_archive_from_new_cmds_CXX \ +old_archive_from_expsyms_cmds_CXX \ +archive_cmds_CXX \ +archive_expsym_cmds_CXX \ +module_cmds_CXX \ +module_expsym_cmds_CXX \ +export_symbols_cmds_CXX \ +prelink_cmds_CXX \ +postlink_cmds_CXX; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +ac_aux_dir='$ac_aux_dir' +xsi_shell='$xsi_shell' +lt_shell_append='$lt_shell_append' + +# See if we are running on zsh, and set the options which allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST +fi + + + PACKAGE='$PACKAGE' + VERSION='$VERSION' + TIMESTAMP='$TIMESTAMP' + RM='$RM' + ofile='$ofile' + + + + + + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "db_config.h") CONFIG_HEADERS="$CONFIG_HEADERS db_config.h:config.hin" ;; + "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; + "$CREATE_LIST") CONFIG_FILES="$CONFIG_FILES $CREATE_LIST" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers + test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS" +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +$as_echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi + ;; + + :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +$as_echo "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "libtool":C) + + # See if we are running on zsh, and set the options which allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST + fi + + cfgfile="${ofile}T" + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL + +# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. +# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION +# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: +# NOTE: Changes made to this file will be lost: look at ltmain.sh. +# +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, +# 2006, 2007, 2008, 2009, 2010 Free Software Foundation, +# Inc. +# Written by Gordon Matzigkeit, 1996 +# +# This file is part of GNU Libtool. +# +# GNU Libtool is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# As a special exception to the GNU General Public License, +# if you distribute this file as part of a program or library that +# is built using GNU Libtool, you may include this file under the +# same distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Libtool; see the file COPYING. If not, a copy +# can be downloaded from http://www.gnu.org/licenses/gpl.html, or +# obtained by writing to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + +# The names of the tagged configurations supported by this script. +available_tags="CXX " + +# ### BEGIN LIBTOOL CONFIG + +# Which release of libtool.m4 was used? +macro_version=$macro_version +macro_revision=$macro_revision + +# Whether or not to build shared libraries. +build_libtool_libs=$enable_shared + +# Whether or not to build static libraries. +build_old_libs=$enable_static + +# What type of objects to build. +pic_mode=$pic_mode + +# Whether or not to optimize for fast installation. +fast_install=$enable_fast_install + +# Shell to use when invoking shell scripts. +SHELL=$lt_SHELL + +# An echo program that protects backslashes. +ECHO=$lt_ECHO + +# The host system. +host_alias=$host_alias +host=$host +host_os=$host_os + +# The build system. +build_alias=$build_alias +build=$build +build_os=$build_os + +# A sed program that does not truncate output. +SED=$lt_SED + +# Sed that helps us avoid accidentally triggering echo(1) options like -n. +Xsed="\$SED -e 1s/^X//" + +# A grep program that handles long lines. +GREP=$lt_GREP + +# An ERE matcher. +EGREP=$lt_EGREP + +# A literal string matcher. +FGREP=$lt_FGREP + +# A BSD- or MS-compatible name lister. +NM=$lt_NM + +# Whether we need soft or hard links. +LN_S=$lt_LN_S + +# What is the maximum length of a command? +max_cmd_len=$max_cmd_len + +# Object file suffix (normally "o"). +objext=$ac_objext + +# Executable file suffix (normally ""). +exeext=$exeext + +# whether the shell understands "unset". +lt_unset=$lt_unset + +# turn spaces into newlines. +SP2NL=$lt_lt_SP2NL + +# turn newlines into spaces. +NL2SP=$lt_lt_NL2SP + +# convert \$build file names to \$host format. +to_host_file_cmd=$lt_cv_to_host_file_cmd + +# convert \$build files to toolchain format. +to_tool_file_cmd=$lt_cv_to_tool_file_cmd + +# An object symbol dumper. +OBJDUMP=$lt_OBJDUMP + +# Method to check whether dependent libraries are shared objects. +deplibs_check_method=$lt_deplibs_check_method + +# Command to use when deplibs_check_method = "file_magic". +file_magic_cmd=$lt_file_magic_cmd + +# How to find potential files when deplibs_check_method = "file_magic". +file_magic_glob=$lt_file_magic_glob + +# Find potential files using nocaseglob when deplibs_check_method = "file_magic". +want_nocaseglob=$lt_want_nocaseglob + +# DLL creation program. +DLLTOOL=$lt_DLLTOOL + +# Command to associate shared and link libraries. +sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd + +# The archiver. +AR=$lt_AR + +# Flags to create an archive. +AR_FLAGS=$lt_AR_FLAGS + +# How to feed a file listing to the archiver. +archiver_list_spec=$lt_archiver_list_spec + +# A symbol stripping program. +STRIP=$lt_STRIP + +# Commands used to install an old-style archive. +RANLIB=$lt_RANLIB +old_postinstall_cmds=$lt_old_postinstall_cmds +old_postuninstall_cmds=$lt_old_postuninstall_cmds + +# Whether to use a lock for old archive extraction. +lock_old_archive_extraction=$lock_old_archive_extraction + +# A C compiler. +LTCC=$lt_CC + +# LTCC compiler flags. +LTCFLAGS=$lt_CFLAGS + +# Take the output of nm and produce a listing of raw symbols and C names. +global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe + +# Transform the output of nm in a proper C declaration. +global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl + +# Transform the output of nm in a C name address pair. +global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address + +# Transform the output of nm in a C name address pair when lib prefix is needed. +global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix + +# Specify filename containing input files for \$NM. +nm_file_list_spec=$lt_nm_file_list_spec + +# The root where to search for dependent libraries,and in which our libraries should be installed. +lt_sysroot=$lt_sysroot + +# The name of the directory that contains temporary libtool files. +objdir=$objdir + +# Used to examine libraries when file_magic_cmd begins with "file". +MAGIC_CMD=$MAGIC_CMD + +# Must we lock files when doing compilation? +need_locks=$lt_need_locks + +# Manifest tool. +MANIFEST_TOOL=$lt_MANIFEST_TOOL + +# Tool to manipulate archived DWARF debug symbol files on Mac OS X. +DSYMUTIL=$lt_DSYMUTIL + +# Tool to change global to local symbols on Mac OS X. +NMEDIT=$lt_NMEDIT + +# Tool to manipulate fat objects and archives on Mac OS X. +LIPO=$lt_LIPO + +# ldd/readelf like tool for Mach-O binaries on Mac OS X. +OTOOL=$lt_OTOOL + +# ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4. +OTOOL64=$lt_OTOOL64 + +# Old archive suffix (normally "a"). +libext=$libext + +# Shared library suffix (normally ".so"). +shrext_cmds=$lt_shrext_cmds + +# The commands to extract the exported symbol list from a shared archive. +extract_expsyms_cmds=$lt_extract_expsyms_cmds + +# Variables whose values should be saved in libtool wrapper scripts and +# restored at link time. +variables_saved_for_relink=$lt_variables_saved_for_relink + +# Do we need the "lib" prefix for modules? +need_lib_prefix=$need_lib_prefix + +# Do we need a version for libraries? +need_version=$need_version + +# Library versioning type. +version_type=$version_type + +# Shared library runtime path variable. +runpath_var=$runpath_var + +# Shared library path variable. +shlibpath_var=$shlibpath_var + +# Is shlibpath searched before the hard-coded library search path? +shlibpath_overrides_runpath=$shlibpath_overrides_runpath + +# Format of library name prefix. +libname_spec=$lt_libname_spec + +# List of archive names. First name is the real one, the rest are links. +# The last name is the one that the linker finds with -lNAME +library_names_spec=$lt_library_names_spec + +# The coded name of the library, if different from the real name. +soname_spec=$lt_soname_spec + +# Permission mode override for installation of shared libraries. +install_override_mode=$lt_install_override_mode + +# Command to use after installation of a shared archive. +postinstall_cmds=$lt_postinstall_cmds + +# Command to use after uninstallation of a shared archive. +postuninstall_cmds=$lt_postuninstall_cmds + +# Commands used to finish a libtool library installation in a directory. +finish_cmds=$lt_finish_cmds + +# As "finish_cmds", except a single script fragment to be evaled but +# not shown. +finish_eval=$lt_finish_eval + +# Whether we should hardcode library paths into libraries. +hardcode_into_libs=$hardcode_into_libs + +# Compile-time system search path for libraries. +sys_lib_search_path_spec=$lt_sys_lib_search_path_spec + +# Run-time system search path for libraries. +sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec + +# Whether dlopen is supported. +dlopen_support=$enable_dlopen + +# Whether dlopen of programs is supported. +dlopen_self=$enable_dlopen_self + +# Whether dlopen of statically linked programs is supported. +dlopen_self_static=$enable_dlopen_self_static + +# Commands to strip libraries. +old_striplib=$lt_old_striplib +striplib=$lt_striplib + + +# The linker used to build libraries. +LD=$lt_LD + +# How to create reloadable object files. +reload_flag=$lt_reload_flag +reload_cmds=$lt_reload_cmds + +# Commands used to build an old-style archive. +old_archive_cmds=$lt_old_archive_cmds + +# A language specific compiler. +CC=$lt_compiler + +# Is the compiler the GNU compiler? +with_gcc=$GCC + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag + +# Additional compiler flags for building library objects. +pic_flag=$lt_lt_prog_compiler_pic + +# How to pass a linker flag through the compiler. +wl=$lt_lt_prog_compiler_wl + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_lt_prog_compiler_static + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_lt_cv_prog_compiler_c_o + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$archive_cmds_need_lc + +# Whether or not to disallow shared libs when runtime libs are static. +allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec + +# Whether the compiler copes with passing no objects directly. +compiler_needs_object=$lt_compiler_needs_object + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds + +# Commands used to build a shared archive. +archive_cmds=$lt_archive_cmds +archive_expsym_cmds=$lt_archive_expsym_cmds + +# Commands used to build a loadable module if different from building +# a shared archive. +module_cmds=$lt_module_cmds +module_expsym_cmds=$lt_module_expsym_cmds + +# Whether we are building with GNU ld or not. +with_gnu_ld=$lt_with_gnu_ld + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag + +# Flag that enforces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec + +# If ld is used when linking, flag to hardcode \$libdir into a binary +# during linking. This must work even if \$libdir does not exist. +hardcode_libdir_flag_spec_ld=$lt_hardcode_libdir_flag_spec_ld + +# Whether we need a single "-rpath" flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator + +# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes +# DIR into the resulting binary. +hardcode_direct=$hardcode_direct + +# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes +# DIR into the resulting binary and the resulting library dependency is +# "absolute",i.e impossible to change by setting \${shlibpath_var} if the +# library is relocated. +hardcode_direct_absolute=$hardcode_direct_absolute + +# Set to "yes" if using the -LDIR flag during linking hardcodes DIR +# into the resulting binary. +hardcode_minus_L=$hardcode_minus_L + +# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR +# into the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var + +# Set to "yes" if building a shared library automatically hardcodes DIR +# into the library and all subsequent libraries and executables linked +# against it. +hardcode_automatic=$hardcode_automatic + +# Set to yes if linker adds runtime paths of dependent libraries +# to runtime path list. +inherit_rpath=$inherit_rpath + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs + +# Set to "yes" if exported symbols are required. +always_export_symbols=$always_export_symbols + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms + +# Commands necessary for linking programs (against libraries) with templates. +prelink_cmds=$lt_prelink_cmds + +# Commands necessary for finishing linking programs. +postlink_cmds=$lt_postlink_cmds + +# Specify filename containing input files. +file_list_spec=$lt_file_list_spec + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action + +# The directories searched by this compiler when creating a shared library. +compiler_lib_search_dirs=$lt_compiler_lib_search_dirs + +# Dependencies to place before and after the objects being linked to +# create a shared library. +predep_objects=$lt_predep_objects +postdep_objects=$lt_postdep_objects +predeps=$lt_predeps +postdeps=$lt_postdeps + +# The library search path used internally by the compiler when linking +# a shared library. +compiler_lib_search_path=$lt_compiler_lib_search_path + +# ### END LIBTOOL CONFIG + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + +ltmain="$ac_aux_dir/ltmain.sh" + + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + sed '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + if test x"$xsi_shell" = xyes; then + sed -e '/^func_dirname ()$/,/^} # func_dirname /c\ +func_dirname ()\ +{\ +\ case ${1} in\ +\ */*) func_dirname_result="${1%/*}${2}" ;;\ +\ * ) func_dirname_result="${3}" ;;\ +\ esac\ +} # Extended-shell func_dirname implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_basename ()$/,/^} # func_basename /c\ +func_basename ()\ +{\ +\ func_basename_result="${1##*/}"\ +} # Extended-shell func_basename implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_dirname_and_basename ()$/,/^} # func_dirname_and_basename /c\ +func_dirname_and_basename ()\ +{\ +\ case ${1} in\ +\ */*) func_dirname_result="${1%/*}${2}" ;;\ +\ * ) func_dirname_result="${3}" ;;\ +\ esac\ +\ func_basename_result="${1##*/}"\ +} # Extended-shell func_dirname_and_basename implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_stripname ()$/,/^} # func_stripname /c\ +func_stripname ()\ +{\ +\ # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are\ +\ # positional parameters, so assign one to ordinary parameter first.\ +\ func_stripname_result=${3}\ +\ func_stripname_result=${func_stripname_result#"${1}"}\ +\ func_stripname_result=${func_stripname_result%"${2}"}\ +} # Extended-shell func_stripname implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_split_long_opt ()$/,/^} # func_split_long_opt /c\ +func_split_long_opt ()\ +{\ +\ func_split_long_opt_name=${1%%=*}\ +\ func_split_long_opt_arg=${1#*=}\ +} # Extended-shell func_split_long_opt implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_split_short_opt ()$/,/^} # func_split_short_opt /c\ +func_split_short_opt ()\ +{\ +\ func_split_short_opt_arg=${1#??}\ +\ func_split_short_opt_name=${1%"$func_split_short_opt_arg"}\ +} # Extended-shell func_split_short_opt implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_lo2o ()$/,/^} # func_lo2o /c\ +func_lo2o ()\ +{\ +\ case ${1} in\ +\ *.lo) func_lo2o_result=${1%.lo}.${objext} ;;\ +\ *) func_lo2o_result=${1} ;;\ +\ esac\ +} # Extended-shell func_lo2o implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_xform ()$/,/^} # func_xform /c\ +func_xform ()\ +{\ + func_xform_result=${1%.*}.lo\ +} # Extended-shell func_xform implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_arith ()$/,/^} # func_arith /c\ +func_arith ()\ +{\ + func_arith_result=$(( $* ))\ +} # Extended-shell func_arith implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_len ()$/,/^} # func_len /c\ +func_len ()\ +{\ + func_len_result=${#1}\ +} # Extended-shell func_len implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + +fi + +if test x"$lt_shell_append" = xyes; then + sed -e '/^func_append ()$/,/^} # func_append /c\ +func_append ()\ +{\ + eval "${1}+=\\${2}"\ +} # Extended-shell func_append implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_append_quoted ()$/,/^} # func_append_quoted /c\ +func_append_quoted ()\ +{\ +\ func_quote_for_eval "${2}"\ +\ eval "${1}+=\\\\ \\$func_quote_for_eval_result"\ +} # Extended-shell func_append_quoted implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + # Save a `func_append' function call where possible by direct use of '+=' + sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") + test 0 -eq $? || _lt_function_replace_fail=: +else + # Save a `func_append' function call even when '+=' is not available + sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") + test 0 -eq $? || _lt_function_replace_fail=: +fi + +if test x"$_lt_function_replace_fail" = x":"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unable to substitute extended shell functions in $ofile" >&5 +$as_echo "$as_me: WARNING: Unable to substitute extended shell functions in $ofile" >&2;} +fi + + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" + + + cat <<_LT_EOF >> "$ofile" + +# ### BEGIN LIBTOOL TAG CONFIG: CXX + +# The linker used to build libraries. +LD=$lt_LD_CXX + +# How to create reloadable object files. +reload_flag=$lt_reload_flag_CXX +reload_cmds=$lt_reload_cmds_CXX + +# Commands used to build an old-style archive. +old_archive_cmds=$lt_old_archive_cmds_CXX + +# A language specific compiler. +CC=$lt_compiler_CXX + +# Is the compiler the GNU compiler? +with_gcc=$GCC_CXX + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_CXX + +# Additional compiler flags for building library objects. +pic_flag=$lt_lt_prog_compiler_pic_CXX + +# How to pass a linker flag through the compiler. +wl=$lt_lt_prog_compiler_wl_CXX + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_lt_prog_compiler_static_CXX + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_lt_cv_prog_compiler_c_o_CXX + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$archive_cmds_need_lc_CXX + +# Whether or not to disallow shared libs when runtime libs are static. +allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_CXX + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_CXX + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec_CXX + +# Whether the compiler copes with passing no objects directly. +compiler_needs_object=$lt_compiler_needs_object_CXX + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_CXX + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_CXX + +# Commands used to build a shared archive. +archive_cmds=$lt_archive_cmds_CXX +archive_expsym_cmds=$lt_archive_expsym_cmds_CXX + +# Commands used to build a loadable module if different from building +# a shared archive. +module_cmds=$lt_module_cmds_CXX +module_expsym_cmds=$lt_module_expsym_cmds_CXX + +# Whether we are building with GNU ld or not. +with_gnu_ld=$lt_with_gnu_ld_CXX + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag_CXX + +# Flag that enforces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag_CXX + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_CXX + +# If ld is used when linking, flag to hardcode \$libdir into a binary +# during linking. This must work even if \$libdir does not exist. +hardcode_libdir_flag_spec_ld=$lt_hardcode_libdir_flag_spec_ld_CXX + +# Whether we need a single "-rpath" flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator_CXX + +# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes +# DIR into the resulting binary. +hardcode_direct=$hardcode_direct_CXX + +# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes +# DIR into the resulting binary and the resulting library dependency is +# "absolute",i.e impossible to change by setting \${shlibpath_var} if the +# library is relocated. +hardcode_direct_absolute=$hardcode_direct_absolute_CXX + +# Set to "yes" if using the -LDIR flag during linking hardcodes DIR +# into the resulting binary. +hardcode_minus_L=$hardcode_minus_L_CXX + +# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR +# into the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var_CXX + +# Set to "yes" if building a shared library automatically hardcodes DIR +# into the library and all subsequent libraries and executables linked +# against it. +hardcode_automatic=$hardcode_automatic_CXX + +# Set to yes if linker adds runtime paths of dependent libraries +# to runtime path list. +inherit_rpath=$inherit_rpath_CXX + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs_CXX + +# Set to "yes" if exported symbols are required. +always_export_symbols=$always_export_symbols_CXX + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds_CXX + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms_CXX + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms_CXX + +# Commands necessary for linking programs (against libraries) with templates. +prelink_cmds=$lt_prelink_cmds_CXX + +# Commands necessary for finishing linking programs. +postlink_cmds=$lt_postlink_cmds_CXX + +# Specify filename containing input files. +file_list_spec=$lt_file_list_spec_CXX + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action_CXX + +# The directories searched by this compiler when creating a shared library. +compiler_lib_search_dirs=$lt_compiler_lib_search_dirs_CXX + +# Dependencies to place before and after the objects being linked to +# create a shared library. +predep_objects=$lt_predep_objects_CXX +postdep_objects=$lt_postdep_objects_CXX +predeps=$lt_predeps_CXX +postdeps=$lt_postdeps_CXX + +# The library search path used internally by the compiler when linking +# a shared library. +compiler_lib_search_path=$lt_compiler_lib_search_path_CXX + +# ### END LIBTOOL TAG CONFIG: CXX +_LT_EOF + + ;; + + esac +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + + +if test "$db_cv_sql" = "yes"; then + # This command runs the configure script from the SQL tree. + { $as_echo "$as_me:${as_lineno-$LINENO}: Configuring the SQL API" >&5 +$as_echo "$as_me: Configuring the SQL API" >&6;} + +# Setup the SQLite debug build. +mkdir -p sql +if test "$with_tclconfig" != "no"; then + db_cv_sql_config_tclconfig="--with-tcl=$with_tclconfig" +else + db_cv_sql_config_tclconfig= +fi + +# Whitespace in path names causes libtool to generate an invalid +# dependency_libs line in sql/libsqlite3.la. +# Work around this on cygwin, which commonly has spaces in path names. +case `pwd` in + *\ * | *\ *) + if cygpath -d "$PWD" > /dev/null 2>&1 ; then + cd `cygpath -d "$PWD"` + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Changing current directory to $PWD to hide whitespace from libtool" >&5 +$as_echo "$as_me: WARNING: Changing current directory to $PWD to hide whitespace from libtool" >&2;} + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Current bugs in libtool may prevent building the SQL API in \"$PWD\"; please use another working directory" >&5 +$as_echo "$as_me: WARNING: Current bugs in libtool may prevent building the SQL API in \"$PWD\"; please use another working directory" >&2;} + fi + ;; +esac + +# It would be nice to use AC_CONFIG_SUBDIRS here, but it does not allow for +# tweaking of command line options, so hard code things instead. +# +# !!! BEGIN COPIED from autoconf distribution +# Modified to not repeat CPPFLAGS or readline settings + + # Remove --cache-file, --srcdir, and --disable-option-checking arguments + # so they do not pile up. + ac_sub_configure_args= + ac_prev= + eval "set x $ac_configure_args" + shift + for ac_arg + do + if test -n "$ac_prev"; then + ac_prev= + continue + fi + case $ac_arg in + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* \ + | --c=*) + ;; + --config-cache | -C) + ;; + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + ;; + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + ;; + --disable-option-checking) + ;; + CPPFLAGS=* | *readline*) + ;; + *) + case $ac_arg in + *\'*) ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + ac_sub_configure_args="$ac_sub_configure_args '$ac_arg'" ;; + esac + done + + # Always prepend --prefix to ensure using the same prefix + # in subdir configurations. + ac_arg="--prefix=$prefix" + case $ac_arg in + *\'*) ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + ac_sub_configure_args="'$ac_arg' $ac_sub_configure_args" + + # Pass --silent + if test "$silent" = yes; then + ac_sub_configure_args="--silent $ac_sub_configure_args" + fi + + # Always prepend --disable-option-checking to silence warnings, since + # different subdirs can have different --enable and --with options. + ac_sub_configure_args="--disable-option-checking $ac_sub_configure_args" + +# !!! END COPIED from autoconf distribution + +sqlite_dir=$srcdir/../lang/sql/sqlite +(cd sql && eval "\$SHELL ../$sqlite_dir/configure --disable-option-checking $ac_sub_configure_args CPPFLAGS=\"-I.. $CPPFLAGS\" --enable-amalgamation=$db_cv_sql_amalgamation --enable-readline=$with_readline" && cat build_config.h >> config.h) || exit 1 + +# Configure JDBC if --enable-jdbc +if test "$db_cv_jdbc" != "no"; then + + # Deal with user-defined jdbc source path + if test "$with_jdbc" != "no"; then + jdbc_path="$with_jdbc" + else + jdbc_path="$srcdir/../lang/sql/jdbc" + fi + + if test ! -d $jdbc_path; then + echo "Cannot find jdbc source in $jdbc_path." + echo "Please check that path or use --with-jdbc to specify the source directory" + exit 1 + fi + jdbc_dir=`cd $jdbc_path && /bin/pwd` + + # Transfer following setting to jdbc configure: + # . --prefix + # . --enable-shared/--disable-shared + # . --enable-static/--disable-static + # . CFLAGS, CPPFLAGS and LDFLAGS + jdbc_args="" + jdbc_flags="" + + test "$prefix" != "" && jdbc_args="--prefix=$prefix --with-jardir=$prefix/jar" + test "$enable_shared" != "" && jdbc_args="$jdbc_args --enable-shared=$enable_shared" + test "$enable_static" != "" && jdbc_args="$jdbc_args --enable-static=$enable_static" + + # 1. The build directory is build_unix/jdbc, so the include paths are relative + # to that. + # 2. The JDBC driver does not accept CPPFLAGS. So we move the CPPFLAGS options + # into CFLAGS for the JDBC driver. + jdbc_flags="$jdbc_flags CFLAGS=\"-I.. -I../../src/dbinc -I../sql \ + -DHAVE_ERRNO_H -D_HAVE_SQLITE_CONFIG_H -DHAVE_SQLITE3_MALLOC \ + $CFLAGS $CPPFLAGS\"" + # Set LDFLAGS for JDBC driver + test "$LDFLAGS" != "" && jdbc_flags="$jdbc_flags LDFLAGS=\"$LDFLAGS\"" + + # Copy ../lang/sql/jdbc to build_unix/ + test ! -d jdbc && cp -r $jdbc_dir . + + # Set DBSQL LIB for Makefile.in + BDB_LIB="..\/libdb-$DB_VERSION_MAJOR.$DB_VERSION_MINOR.la" + test $enable_shared != "yes" && BDB_LIB='..\/libdb.a' + + # Run the jdbc/configure + cd jdbc + test ! -e Makefile.in.tmp && mv Makefile.in Makefile.in.tmp + sed "s/@BDB_LIB@/$BDB_LIB/g" Makefile.in.tmp > Makefile.in + eval "\$SHELL ./configure --with-sqlite3=../../lang/sql/generated $jdbc_args $jdbc_flags" +fi + +fi diff --git a/dist/configure.ac b/dist/configure.ac new file mode 100644 index 00000000..1608c3b2 --- /dev/null +++ b/dist/configure.ac @@ -0,0 +1,1119 @@ +# $Id$ +# Process this file with autoconf to produce a configure script. + +PACKAGE=db +AC_INIT(Berkeley DB, __EDIT_DB_VERSION__, + [Oracle Technology Network Berkeley DB forum], + db-__EDIT_DB_VERSION__) +AC_CONFIG_SRCDIR([../src/db/db.c]) +AC_CONFIG_HEADERS([db_config.h:config.hin]) + +AC_CONFIG_MACRO_DIR([aclocal]) + +# We're going to pass options through to SQLite, don't check them first. +AC_DISABLE_OPTION_CHECKING() + +# Configure setup. +AC_CANONICAL_HOST() +AC_ARG_PROGRAM() + +# Don't build in the dist directory. +AC_MSG_CHECKING(if building in the top-level or dist directories) +if [ test -f configure.ac ] ; then + AC_MSG_RESULT(yes) + AC_MSG_ERROR([\ +Berkeley DB should not be built in the "dist" directory. \ +Change directory to the build_unix directory and run ../dist/configure \ +from there.]) +fi +AC_MSG_RESULT(no) + +topdir=`echo "$srcdir/.." | sed 's,/dist/\.\.,,'` +# Substitution variables. BDB additions need to be documented. +AC_SUBST(ADDITIONAL_INCS) +AC_SUBST(ADDITIONAL_LANG) +AC_SUBST(ADDITIONAL_OBJS) +AC_SUBST(ADDITIONAL_PROGS) +AC_SUBST(BUILD_TARGET) +AC_SUBST(CFLAGS) +AC_SUBST(CONFIGURATION_ARGS) +AC_SUBST(CONFIGURATION_PATH) +AC_SUBST(CPPFLAGS) +AC_SUBST(CRYPTO_OBJS) +AC_SUBST(CXX) +AC_SUBST(CXXFLAGS) +AC_SUBST(DB_CONST) +AC_SUBST(DB_PROTO1) +AC_SUBST(DB_PROTO2) +AC_SUBST(DB_STRUCT_ALIGN8) +AC_SUBST(DEFAULT_LIB) +AC_SUBST(DEFAULT_LIB_CXX) +AC_SUBST(DEFAULT_LIB_SQL) +AC_SUBST(DEFAULT_LIB_SQLITE) +AC_SUBST(DEFAULT_LIB_STL) +AC_SUBST(DEFAULT_LIB_TCL) +AC_SUBST(DTRACE) +AC_SUBST(FINAL_OBJS) +AC_SUBST(INSTALLER) +AC_SUBST(INSTALL_LIBS) +AC_SUBST(INSTALL_LIBS_EXTRA) +AC_SUBST(INSTALL_TARGET) +AC_SUBST(JAR) +AC_SUBST(JAVACFLAGS) +AC_SUBST(LDFLAGS) +AC_SUBST(LIBCSO_LIBS) +AC_SUBST(LIBJSO_LIBS) +AC_SUBST(LIBS) +AC_SUBST(LIBSO_LIBS) +AC_SUBST(LIBTOOL) +AC_SUBST(LIBTSO_LIBS) +AC_SUBST(LIBTSO_MODSUFFIX) +AC_SUBST(LIBTSO_MODULE) +AC_SUBST(LIBXSO_LIBS) +AC_SUBST(MAKEFILE_CC) +AC_SUBST(MAKEFILE_CCLINK) +AC_SUBST(MAKEFILE_CXX) +AC_SUBST(MAKEFILE_CXXLINK) +AC_SUBST(MAKEFILE_SOLINK) +AC_SUBST(MAKEFILE_XSOLINK) +AC_SUBST(OSDIR) +AC_SUBST(PATH_SEPARATOR) +AC_SUBST(POSTLINK) +AC_SUBST(PRINTLOG_OBJS) +AC_SUBST(REPLACEMENT_OBJS) +AC_SUBST(SOFLAGS) +AC_SUBST(SQL_FLAGS) +AC_SUBST(SQL_LIBS) +AC_SUBST(SWIGCFLAGS) +AC_SUBST(TEST_LIBS) +AC_SUBST(db_int_def) +AC_SUBST(o) +AC_SUBST(subdir_cmd) +AC_SUBST(topdir) + +# The Windows public header has two extra symbols we need to remove. +AC_SUBST(platform_header) +AC_SUBST(platform_footer) + +# Set the default installation location. +AC_PREFIX_DEFAULT(/usr/local/BerkeleyDB.__EDIT_DB_VERSION_MAJOR__.__EDIT_DB_VERSION_MINOR__) + +# Configure the version information. +AC_SUBST(DB_VERSION_FAMILY) +DB_VERSION_FAMILY="__EDIT_DB_VERSION_FAMILY__" +AC_SUBST(DB_VERSION_RELEASE) +DB_VERSION_RELEASE="__EDIT_DB_VERSION_RELEASE__" +AC_SUBST(DB_VERSION_MAJOR) +DB_VERSION_MAJOR="__EDIT_DB_VERSION_MAJOR__" +AC_SUBST(DB_VERSION_MINOR) +DB_VERSION_MINOR="__EDIT_DB_VERSION_MINOR__" +AC_SUBST(DB_VERSION_PATCH) +DB_VERSION_PATCH="__EDIT_DB_VERSION_PATCH__" +AC_SUBST(DB_VERSION_STRING) +DB_VERSION_STRING='"__EDIT_DB_VERSION_STRING__"' +AC_SUBST(DB_VERSION_FULL_STRING) +DB_VERSION_FULL_STRING='"__EDIT_DB_VERSION_FULL_STRING__"' +AC_SUBST(DB_VERSION_UNIQUE_NAME) + +# Process all options before using them. +AM_OPTIONS_SET + +# Set some #defines based on configuration options. +if test "$db_cv_diagnostic" = "yes"; then + AC_DEFINE(DIAGNOSTIC) + AH_TEMPLATE(DIAGNOSTIC, + [Define to 1 if you want a version with run-time diagnostic checking.]) +fi +if test "$db_cv_debug_rop" = "yes"; then + AC_DEFINE(DEBUG_ROP) + AH_TEMPLATE(DEBUG_ROP, + [Define to 1 if you want a version that logs read operations.]) +fi +if test "$db_cv_debug_wop" = "yes"; then + AC_DEFINE(DEBUG_WOP) + AH_TEMPLATE(DEBUG_WOP, + [Define to 1 if you want a version that logs write operations.]) +fi +if test "$db_cv_umrw" = "yes"; then + AC_DEFINE(UMRW) + AH_TEMPLATE(UMRW, + [Define to 1 to mask harmless uninitialized memory read/writes.]) + +fi +if test "$db_cv_test" = "yes"; then + AC_DEFINE(CONFIG_TEST) + AH_TEMPLATE(CONFIG_TEST, + [Define to 1 if you want to build a version for running the test suite.]) +fi + +AH_TEMPLATE(HAVE_UPGRADE_SUPPORT, + [Define to 1 if port includes historic database upgrade support.]) +AC_DEFINE(HAVE_UPGRADE_SUPPORT) + +# Check for programs used in building and installation. +AM_PROGRAMS_SET +AC_PROG_INSTALL + +BUILD_TARGET="library_build" +INSTALL_TARGET="library_install" + +# Respect the environment LIBS settings +LIBSO_LIBS="$LIBS" + +# This is where we handle stuff that autoconf can't handle: compiler, +# preprocessor and load flags, libraries that the standard tests don't +# look for. +# +# There are additional libraries we need for some compiler/architecture +# combinations. +# +# Some architectures require DB to be compiled with special flags and/or +# libraries for threaded applications +# +# The makefile CC may be different than the CC used in config testing, +# because the makefile CC may be set to use $(LIBTOOL). +# +# Don't override anything if it's already set from the environment. +optimize_flag="-O" +extra_cflags="" + +case "$host_os" in +aix4.3.*|aix[[56]]*) + case "$host_os" in + aix4.3.*) + CPPFLAGS="$CPPFLAGS -D_LINUX_SOURCE_COMPAT";; + esac + # IBM's XLC compilers (at least versions 7/8/9) generate incorrect code + # when ordinary optimization is enabled because they make strong + # assumptions about the types held at each memory location, and some + # Berkeley DB code violates those assumptions. [#16141] + extra_cflags=" -qalias=noansi" + optimize_flag="-O2" + CC=${CC-"xlc_r"} + CPPFLAGS="$CPPFLAGS -D_THREAD_SAFE" + LDFLAGS="$LDFLAGS -Wl,-brtl";; +bsdi3*) CC=${CC-"shlicc2"} + LIBSO_LIBS="$LIBSO_LIBS -lipc";; +cygwin*) + CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE -D_REENTRANT";; +freebsd*) + CPPFLAGS="$CPPFLAGS -D_THREAD_SAFE" + LDFLAGS="$LDFLAGS -pthread";; +gnu*|k*bsd*-gnu|linux*) + CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE -D_REENTRANT";; +hpux*) CPPFLAGS="$CPPFLAGS -D_REENTRANT";; +irix*) optimize_flag="-O2" + CPPFLAGS="$CPPFLAGS -D_SGI_MP_SOURCE";; +mpeix*) CPPFLAGS="$CPPFLAGS -D_POSIX_SOURCE -D_SOCKET_SOURCE" + LIBSO_LIBS="$LIBSO_LIBS -lsocket -lsvipc";; +osf*) CPPFLAGS="$CPPFLAGS -pthread";; +*qnx*) qnx_build="yes" + AC_DEFINE(HAVE_QNX) + AH_TEMPLATE(HAVE_QNX, [Define to 1 if building on QNX.]);; +solaris*) + CPPFLAGS="$CPPFLAGS -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS ";; +esac + +# Set CFLAGS/CXXFLAGS. We MUST set the flags before we call autoconf +# compiler configuration macros, because if we don't, they set CFLAGS +# to no optimization and -g, which isn't what we want. +# +# If the user wants a debugging environment, add -g the their compiler flags +# and don't automatically optimize. If you want to compile with a different +# set of flags, specify CFLAGS in the environment before configuring. +if test "$db_cv_debug" = "yes"; then + AC_DEFINE(DEBUG) + AH_TEMPLATE(DEBUG, [Define to 1 if you want a debugging version.]) + + CFLAGS="-g $CFLAGS" +else + CFLAGS=${CFLAGS-$optimize_flag} +fi + +CFLAGS="$CFLAGS$extra_cflags" +CXXFLAGS=${CXXFLAGS-"$CFLAGS"} + +# The default compiler is cc (NOT gcc), the default CFLAGS is as specified +# above, NOT what is set by AC_PROG_CC, as it won't set optimization flags +# for any compiler other than gcc. +AC_PROG_CC(cc gcc) + +# We know what compiler we're going to use, now. Set per-compiler flags. +if test "$GCC" = "yes"; then + # Use -O3 if we're using gcc, unless we're doing a small build, in + # which case we use -Os alone. The code size for -O3 is quite a + # bit larger than -O2: a compromise is "-Os -finline-functions", + # it's smaller and explicitly inlining the functions helps Berkeley + # DB. + CFLAGS="$CFLAGS " + if test "$db_cv_smallbuild" = "yes"; then + CFLAGS=`echo "$CFLAGS" | sed 's/-O /-Os /g'` + else + CFLAGS=`echo "$CFLAGS" | sed 's/-O /-O3 /g'` + fi +else + case "$host_os" in + hpux11.0*) ;; + hpux11*) CPPFLAGS="$CPPFLAGS -mt" + test "$host_cpu" = "ia64" && + CFLAGS="$CFLAGS +u1";; + esac +fi + +# Check for "const" and "inline" keywords. +AC_C_CONST +AC_C_INLINE + +# We use prototypes and the keyword "const" in db.h which doesn't include +# db_config.h, so we have to figure out what to do there. +# +# There is an autoconf AC_C_PROTOTYPES macro, but as all it does is define +# db_config.h variables, it doesn't help us. +# +# We don't have much choice, we look at internal autoconf variables. +if test "$ac_cv_c_const" != "yes"; then + DB_CONST="#define const" +fi + +# We use alignment attributes in db.h - figure out if the compiler supports +# them. +AC_CACHE_CHECK([for GCC aligned attribute], db_cv_aligned_attribute, [ +AC_TRY_COMPILE(, __attribute__ ((aligned (8))) int i;, + [db_cv_aligned_attribute=yes], [db_cv_aligned_attribute=no])]) +if test "$db_cv_aligned_attribute" = "yes"; then + DB_STRUCT_ALIGN8="__attribute__ ((aligned (8)))" +fi + +# Clear __P, some other systems use it too. +DB_PROTO1="#undef __P" +if test "$ac_cv_prog_cc_c89" = "no"; then + DB_PROTO2="#define __P(protos) ()" +else + DB_PROTO2="#define __P(protos) protos" +fi + +# Because of shared library building, the ${CC} used for config tests +# may be different than the ${CC} we want to put in the Makefile. +# The latter is known as ${MAKEFILE_CC} in this script. +MAKEFILE_CC="${CC}" +MAKEFILE_CCLINK="${CC}" +MAKEFILE_CXX="nocxx" +MAKEFILE_CXXLINK="nocxx" + +# See if we need the C++ compiler at all. If so, we'd like to find one that +# interoperates with the C compiler we chose. Since we prefered cc over gcc, +# we'll also prefer the vendor's compiler over g++/gcc. If we're wrong, the +# user can set CC and CXX in their environment before running configure. +# +# AC_PROG_CXX sets CXX, but it uses $CXX and $CCC (in that order) as its +# first choices. +if test "$db_cv_cxx" = "yes"; then + if test "$GCC" != "yes"; then + case "$host_os" in + aix*) AC_CHECK_TOOL(CCC, xlC_r) + LIBXSO_LIBS="-lC_r $LIBXSO_LIBS" + LIBSO_LIBS="-lC_r $LIBSO_LIBS";; + hpux*) AC_CHECK_TOOL(CCC, aCC);; + irix*) AC_CHECK_TOOL(CCC, CC);; + osf*) AC_CHECK_TOOL(CCC, cxx) + CXXFLAGS="$CXXFLAGS -D__USE_STD_IOSTREAM" + test -d /usr/include.dtk && + CXXFLAGS="$CXXFLAGS -I/usr/include.dtk";; + solaris*) AC_CHECK_TOOL(CCC, CC);; + esac + fi + AC_PROG_CXX + ###### WORKAROUND: SEE SR #7938 + AC_PROG_CXXCPP + ############################### + AC_CXX_STDHEADERS + MAKEFILE_CXX="${CXX}" + MAKEFILE_CXXLINK="${CXX}" +fi + +# Do some gcc specific configuration. +AC_GCC_CONFIG1 + +# We need the -Kthread/-pthread flag when compiling on SCO/Caldera's UnixWare +# and OpenUNIX releases. We can't make the test until we know which compiler +# we're using. +case "$host_os" in +sysv5UnixWare*|sysv5OpenUNIX8*) + if test "$GCC" == "yes"; then + CPPFLAGS="$CPPFLAGS -pthread" + LDFLAGS="$LDFLAGS -pthread" + else + CPPFLAGS="$CPPFLAGS -Kthread" + LDFLAGS="$LDFLAGS -Kthread" + fi;; +esac + +# Export our compiler preferences for the libtool configuration. +export CC CCC +CCC=$CXX + +# Libtool configuration. +AC_PROG_LIBTOOL + +SOFLAGS="-rpath \$(libdir)" + +# Set SOSUFFIX and friends +SOSUFFIX_CONFIG +MODSUFFIX_CONFIG +JMODSUFFIX_CONFIG + +LIBTOOL="./libtool" + +INSTALLER="\$(LIBTOOL) --mode=install cp -p" + +MAKEFILE_CC="\$(LIBTOOL) --mode=compile ${MAKEFILE_CC}" +MAKEFILE_SOLINK="\$(LIBTOOL) --mode=link ${MAKEFILE_CCLINK} -avoid-version" +MAKEFILE_CCLINK="\$(LIBTOOL) --mode=link ${MAKEFILE_CCLINK}" +MAKEFILE_CXX="\$(LIBTOOL) --mode=compile ${MAKEFILE_CXX}" +MAKEFILE_XSOLINK="\$(LIBTOOL) --mode=link ${MAKEFILE_CXXLINK} -avoid-version" +MAKEFILE_CXXLINK="\$(LIBTOOL) --mode=link ${MAKEFILE_CXXLINK}" + + +case "$host_os" in +cygwin* | mingw*) + MAKEFILE_SOLINK="$MAKEFILE_SOLINK -no-undefined" + MAKEFILE_XSOLINK="$MAKEFILE_XSOLINK -no-undefined";; +esac + +case "$host_os" in + darwin*) + LIBTSO_MODULE="" + LIBTSO_MODSUFFIX=".dylib";; + *qnx*) + LIBTSO_MODULE="" + LIBTSO_MODSUFFIX=$MODSUFFIX;; + *) + LIBTSO_MODULE="-module" + LIBTSO_MODSUFFIX=$MODSUFFIX;; +esac + +if test "$enable_static" = "yes"; then + test "$AR" = "false" && AC_MSG_ERROR([No ar utility found.]) +fi + +# C API. +if test "$enable_shared" = "no"; then + DEFAULT_LIB="\$(libdb_version)" + POSTLINK=": " + o=".o" +else + DEFAULT_LIB="\$(libso_target)" + POSTLINK="\$(LIBTOOL) --mode=execute true" + o=".lo" +fi +INSTALL_LIBS="$DEFAULT_LIB" +if test "$enable_static" = "yes"; then + INSTALL_LIBS="$INSTALL_LIBS \$(libdb)" +fi + +# Optional C++ API. +if test "$db_cv_cxx" = "yes"; then + if test "$enable_shared" = "no"; then + DEFAULT_LIB_CXX="\$(libcxx_version)" + fi + if test "$enable_shared" = "yes"; then + DEFAULT_LIB_CXX="\$(libxso_target)" + fi + INSTALL_LIBS="$INSTALL_LIBS $DEFAULT_LIB_CXX" + if test "$enable_static" = "yes"; then + INSTALL_LIBS="$INSTALL_LIBS \$(libcxx)" + fi +fi + +# Optional Java API / JDBC. +if test "$db_cv_java" = "yes" -o "$db_cv_jdbc" = "yes"; then + # BDB Java API requires shared libraries. + if test "$db_cv_java" = "yes" -a "$enable_shared" = "no"; then + AC_MSG_ERROR([Java requires shared libraries]) + fi + + # A classpath that includes . is needed to check for Java + # Since Cygwin uses Windows' javac, we need Windows path separators + case "$host_os" in + cygwin*) CLASSPATH=".;$CLASSPATH";; + *) CLASSPATH=".:$CLASSPATH";; + esac + export CLASSPATH + AC_PROG_JAVAC + AC_PROG_JAR + AC_PROG_JAVA + AC_JNI_INCLUDE_DIR + + AC_MSG_CHECKING(java version) + case "$JAVA" in + *kaffe* ) + JAVA_VERSION=`$JAVA -version 2>&1 | + sed -e '/Java Version:/!d' -e 's/.*Java Version: \([[^ ]]*\)[[ ]]*/\1/'` ;; + * ) JAVA_VERSION=`$JAVA -version 2>&1 | + sed -e '/ version /!d' -e 's/.*"\(.*\)".*/\1/'` ;; + esac + AC_MSG_RESULT($JAVA_VERSION) + case "$JAVA_VERSION" in + 1.[[3456789]]* | 1.[[1-9]][[0-9]]* | [[23456789]]* ) ;; + * ) + AC_MSG_ERROR([Java version 1.3 or higher required, got $JAVA_VERSION]) ;; + esac + + # Because of the code that SWIG generates to cast between pointers and + # integers, we need to add the flag "-fno-strict-aliasing" to the gcc + # command line when compiling the JNI code. This is documented in + # [#14953] and at http://www.swig.org/Doc1.3/Java.html + if test "${GCC}" = "yes"; then + SWIGCFLAGS="-fno-strict-aliasing" + fi + + for JNI_INCLUDE_DIR in $JNI_INCLUDE_DIRS + do + CPPFLAGS="$CPPFLAGS -I$JNI_INCLUDE_DIR" + done + + if test "$db_cv_java" = "yes"; then + ADDITIONAL_LANG="$ADDITIONAL_LANG java" + INSTALL_LIBS="$INSTALL_LIBS \$(libjso_target)" + fi +else + JAVAC=nojavac +fi + +# MinGW support. +if test "$db_cv_mingw" = "yes"; then + OSDIR=os_windows + PATH_SEPARATOR="\\\\/:" + + AC_DEFINE(DB_WIN32) + AC_DEFINE(STDC_HEADERS) +else + OSDIR=os + PATH_SEPARATOR="/" + AC_DEFINE(HAVE_SYSTEM_INCLUDE_FILES) +fi + +# Optional SQL API. +if test "$db_cv_sql" = "yes"; then + ADDITIONAL_INCS="$ADDITIONAL_INCS dbsql.h" + ADDITIONAL_PROGS="$ADDITIONAL_PROGS dbsql" + + # Link against libdl, if found. It is only needed for the load + # extension, but shouldn't hurt. + AC_HAVE_LIBRARY(dl, SQL_LIBS="$SQL_LIBS -ldl") + + # Link against libedit or readline for command-line editing. + if test x"$with_readline" != xno; then + header=readline.h + for rl_lib in edit readline; do + found="yes" + save_LIBS="" + LIBS="" + AS_UNSET(ac_cv_search_tgetent) + AC_SEARCH_LIBS(tgetent, + [$rl_lib ncurses curses termcap], + [term_LIBS="$LIBS"], [term_LIBS=""]) + AC_CHECK_LIB([$rl_lib], [readline], + [SQL_LIBS="$SQL_LIBS -l$rl_lib $term_LIBS"], + [found="no"]) + LIBS="$save_LIBS" + test "$found" = "yes" && break + done + + if test x"$rl_lib" = xedit; then + header="editline/readline.h" + fi + + if test "$found" = "yes"; then + AC_CHECK_HEADER($header, [found="yes"], [ + found="no" + if test "$cross_compiling" != yes; then + for dir in /usr /usr/local /usr/local/readline /usr/contrib /mingw; do + for subdir in include include/readline; do + AC_CHECK_FILE($dir/$subdir/$header, found=yes) + if test "$found" = "yes"; then + SQL_FLAGS="$SQL_FLAGS -I$dir/$subdir" + break + fi + done + test "$found" = "yes" && break + done + fi]) + fi + fi + + if test "$enable_shared" = "no"; then + DEFAULT_LIB_SQL="\$(libsql_version)" + else + DEFAULT_LIB_SQL="\$(libsqlso_target)" + fi + INSTALL_LIBS="$INSTALL_LIBS $DEFAULT_LIB_SQL" + if test "$enable_static" = "yes"; then + INSTALL_LIBS="$INSTALL_LIBS \$(libsql)" + fi + + if test "$db_cv_test" = "yes"; then + subdirs="$subdirs sql" + ADDITIONAL_LANG="$ADDITIONAL_LANG sql-test" + fi + + if test "$db_cv_jdbc" = "yes"; then + subdirs="$subdirs jdbc" + ADDITIONAL_LANG="$ADDITIONAL_LANG jdbc" + fi + + if test "$db_cv_debug" = "yes"; then + SQL_FLAGS="$SQL_FLAGS -DSQLITE_DEBUG=1" + fi + + if test "$db_cv_build_cryptography" = "yes"; then + SQL_FLAGS="$SQL_FLAGS -DSQLITE_HAS_CODEC=1" + fi +fi + +if test "$db_cv_sql_compat" = "yes"; then + if test "$enable_shared" = "no"; then + DEFAULT_LIB_SQLITE="\$(libsqlite)" + else + DEFAULT_LIB_SQLITE="\$(libsqliteso_target)" + fi + + ADDITIONAL_INCS="$ADDITIONAL_INCS \$(langdir)/sql/generated/sqlite3.h" + ADDITIONAL_PROGS="$ADDITIONAL_PROGS sqlite3" + INSTALL_LIBS="$INSTALL_LIBS $DEFAULT_LIB_SQLITE" + + # This is different to the other libraries: we need to be very + # careful not to delete an existing installation of SQLite unless + # we are installing over it. + if test "$enable_shared" = "yes"; then + INSTALL_LIBS_EXTRA="$INSTALL_LIBS_EXTRA \$(libsqliteso)" + fi + if test "$enable_static" = "yes"; then + INSTALL_LIBS="$INSTALL_LIBS \$(libsqlite)" + fi +fi + +# Optional SQL code generation tool. +if test "$db_cv_sql_codegen" = "yes"; then + ADDITIONAL_PROGS="$ADDITIONAL_PROGS db_sql_codegen" +fi + +# Optional STL API. +if test "$db_cv_stl" = "yes"; then + AC_CXX_SUPPORTS_TEMPLATES + AC_CXX_WSTRING + AX_TLS + if test "$enable_shared" = "no"; then + DEFAULT_LIB_STL="\$(libstl_version)" + fi + if test "$enable_shared" = "yes"; then + DEFAULT_LIB_STL="\$(libstlso_target)" + fi + ADDITIONAL_INCS="$ADDITIONAL_INCS dbstl_common.h" + for f in dbstl_set.h dbstl_vector.h dbstl_exception.h dbstl_map.h dbstl_utility.h dbstl_dbc.h dbstl_dbt.h dbstl_base_iterator.h dbstl_container.h dbstl_element_ref.h dbstl_inner_utility.h dbstl_resource_manager.h ; do + ADDITIONAL_INCS="$ADDITIONAL_INCS \$(topdir)/lang/cxx/stl/$f" + done + INSTALL_LIBS="$INSTALL_LIBS $DEFAULT_LIB_STL" + if test "$enable_static" = "yes"; then + INSTALL_LIBS="$INSTALL_LIBS \$(libstl)" + fi +fi + +# Checks for include files, structures, C types. +AC_HEADER_STAT +AC_HEADER_TIME +AC_HEADER_DIRENT +AC_CHECK_HEADERS(execinfo.h sys/select.h sys/socket.h sys/time.h) +AC_CHECK_MEMBERS([struct stat.st_blksize]) +AM_TYPES + +AC_CACHE_CHECK([for ANSI C exit success/failure values], db_cv_exit_defines, [ +AC_TRY_COMPILE([#include ], return (EXIT_SUCCESS);, + [db_cv_exit_defines=yes], [db_cv_exit_defines=no])]) +if test "$db_cv_exit_defines" = "yes"; then + AC_DEFINE(HAVE_EXIT_SUCCESS) + AH_TEMPLATE(HAVE_EXIT_SUCCESS, + [Define to 1 if platform has EXIT_SUCCESS/EXIT_FAILURE #defines.]) +fi + +AC_CACHE_CHECK([for getopt optreset variable], db_cv_optreset, [ +AC_TRY_LINK([#include ], extern int optreset; optreset = 1;, + [db_cv_optreset=yes], [db_cv_optreset=no])]) +if test "$db_cv_optreset" = "yes"; then + AC_DEFINE(HAVE_GETOPT_OPTRESET) + AH_TEMPLATE(HAVE_GETOPT_OPTRESET, + [Define to 1 if getopt supports the optreset variable.]) +fi + +# Check for mutexes. +# We do this first because it changes $LIBSO_LIBS. +AM_DEFINE_MUTEXES + +# Check for native (system call or instruction set) support for +# atomic increment, decrement, and compare & exchange. +AM_DEFINE_ATOMIC + +# Check for os-specific event support for performance monitoring such as +# DTrace or SystemTap. +AM_DEFINE_PERFMON + +# Test for various functions/libraries -- do tests that change library values +# first. +# +# Update LIBS, so we're testing against the current list of libraries. +LIBS="$LIBSO_LIBS" + +# The yield function on Solaris is almost certainly pthread_yield (LWP threads +# or POSIX pthreads), or thr_yield (UI threads). There's an outside chance it +# is sched_yield() though, only available in -lrt on Solaris. +AC_SEARCH_LIBS(sched_yield, rt) + +# The Berkeley DB library calls fdatasync, only available in -lrt on Solaris. +AC_SEARCH_LIBS(fdatasync, rt) + +AC_SEARCH_LIBS(getaddrinfo, nsl socket) +AC_SEARCH_LIBS(hstrerror, resolv) + +# Those tests updated LIBS, update our internal list. +LIBSO_LIBS="$LIBS" + +# !!! +# We could be more exact about whether these libraries are needed, but don't +# bother -- if they exist, we load them, it's only the test programs anyway. +AC_HAVE_LIBRARY(m, TEST_LIBS="$TEST_LIBS -lm") +AC_HAVE_LIBRARY(nsl, TEST_LIBS="$TEST_LIBS -lnsl") +AC_HAVE_LIBRARY(socket, TEST_LIBS="$TEST_LIBS -lsocket") + +# Checks for system functions for which we have replacements. +# +# The only portable getcwd call is getcwd(char *, size_t), where the +# buffer is non-NULL -- Solaris can't handle a NULL buffer, and they +# deleted getwd(). +AC_REPLACE_FUNCS(\ + abort atoi atol bsearch getcwd getenv getopt isalpha isdigit isprint\ + isspace memcmp memcpy memmove printf qsort raise rand strcasecmp\ + strcat strchr strdup strerror strncat strncmp strrchr strsep\ + strtol strtoul) + +# Check for system functions we optionally use. +AC_CHECK_FUNCS(\ + _fstati64 backtrace backtrace_symbols directio fchmod fclose\ + fcntl fdatasync fgetc fgets fopen fwrite getgid\ + getrusage getuid hstrerror mprotect pstat_getdynamic\ + pthread_self pthread_yield random sched_yield select setgid setuid\ + sigaction snprintf stat sysconf vsnprintf yield) + +AC_TIMERS + +# Ftruncate. +# We've run into a problem with ftruncate on Alpha/Tru64, the issue is that +# after a truncate the last page of the file mmaps as all zeros. So just don't +# use ftruncate. +case "$host_os" in +osf*) + AC_MSG_WARN( + [ftruncate ignored on $host_os-$host_vendor.]);; +*) + AC_CHECK_FUNCS(ftruncate);; +esac + +# Pread/pwrite. +# HP-UX has pread/pwrite, but it doesn't work with largefile support. +# NCR's version of System V R 4.3 has pread/pwrite symbols, but no support. +case "$host_os-$host_vendor" in +hpux*|sysv4.3*-ncr) + AC_MSG_WARN( + [pread/pwrite interfaces ignored on $host_os-$host_vendor.]);; +*) + AC_CHECK_FUNCS(pread pwrite);; +esac + +# Check for getaddrinfo; do the test explicitly instead of using AC_CHECK_FUNCS +# because isn't a standard include file. +AC_CACHE_CHECK([for getaddrinfo], db_cv_getaddrinfo, [ +AC_TRY_LINK([ +#include +#include ], [ + getaddrinfo(0, 0, 0, 0); +], [db_cv_getaddrinfo=yes], [db_cv_getaddrinfo=no])]) +if test "$db_cv_getaddrinfo" = "yes"; then + AC_DEFINE(HAVE_GETADDRINFO) + AH_TEMPLATE(HAVE_GETADDRINFO, + [Define to 1 if you have the `getaddrinfo' function.]) +fi + +# Check for the fcntl F_SETFD flag to deny child process access to file +# descriptors. +AC_CACHE_CHECK([for fcntl/F_SETFD], db_cv_fcntl_f_setfd, [ +AC_TRY_LINK([ +#include +#include ], [ + fcntl(1, F_SETFD, 1); +], [db_cv_fcntl_f_setfd=yes], [db_cv_fcntl_f_setfd=no])]) +if test "$db_cv_fcntl_f_setfd" = "yes"; then + AC_DEFINE(HAVE_FCNTL_F_SETFD) + AH_TEMPLATE(HAVE_FCNTL_F_SETFD, + [Define to 1 if fcntl/F_SETFD denies child access to file descriptors.]) +fi + +# A/UX has a broken getopt(3). +case "$host_os" in +aux*) AC_LIBOBJ([getopt]);; +esac + +# Linux has a broken O_DIRECT flag, but you can't detect it at configure time. +# Linux and SGI require buffer alignment we may not match, otherwise writes +# will fail. Default to not using the O_DIRECT flag. +if test "$db_cv_o_direct" = "yes"; then + AC_CACHE_CHECK([for open/O_DIRECT], db_cv_open_o_direct, [ + AC_TRY_LINK([ + #include + #include ], [ + open("a", O_RDONLY | O_DIRECT, 0); + ], [db_cv_open_o_direct=yes], [db_cv_open_o_direct=no])]) + if test \ + "$db_cv_o_direct" = "yes" -a "$db_cv_open_o_direct" = "yes"; then + AC_DEFINE(HAVE_O_DIRECT) + AH_TEMPLATE(HAVE_O_DIRECT, + [Define to 1 if you have the O_DIRECT flag.]) + fi +fi + +# Check for largefile support. +AC_SYS_LARGEFILE + +# Figure out how to create shared regions. +# +# First, we look for mmap. +# +# BSD/OS has mlock(2), but it doesn't work until the 4.1 release. +# +# Nextstep (version 3.3) apparently supports mmap(2) (the mmap symbol +# is defined in the C library) but does not support munmap(2). Don't +# try to use mmap if we can't find munmap. +# +# Ultrix has mmap(2), but it doesn't work. +mmap_ok=no +case "$host_os" in +bsdi3*|bsdi4.0) + AC_MSG_WARN([mlock(2) interface ignored on $host_os-$host_vendor.]) + mmap_ok=yes + AC_CHECK_FUNCS(mmap munmap, , mmap_ok=no);; +ultrix*) + AC_MSG_WARN([mmap(2) interface ignored on $host_os-$host_vendor.]);; +*) + mmap_ok=yes + AC_CHECK_FUNCS(mlock munlock) + AC_CHECK_FUNCS(mmap munmap, , mmap_ok=no);; +esac + +# Second, we look for shmget. +# +# SunOS has the shmget(2) interfaces, but there appears to be a missing +# #include file, so we ignore them. +shmget_ok=no +case "$host_os" in +sunos*) + AC_MSG_WARN([shmget(2) interface ignored on $host_os-$host_vendor.]);; +*) + shmget_ok=yes + AC_CHECK_FUNCS(shmget, , shmget_ok=no) + + # Check for shmctl to lock down shared memory segments. + AC_CACHE_CHECK([for shmctl], db_cv_shmctl_shm_lock, [ + AC_TRY_LINK([ +#include +#include +#include +#include ], [ + shmctl(0, SHM_LOCK, NULL); + ], [db_cv_shmctl_shm_lock=yes], [db_cv_shmctl_shm_lock=no])]) + if test "$db_cv_shmctl_shm_lock" = "yes"; then + AC_DEFINE(HAVE_SHMCTL_SHM_LOCK) + AH_TEMPLATE(HAVE_SHMCTL_SHM_LOCK, + [Define to 1 if shmctl/SHM_LOCK locks down shared memory segments.]) + fi;; +esac + +# We require either mmap/munmap(2) or shmget(2). +if test "$mmap_ok" = "no" -a "$shmget_ok" = "no"; then + AC_MSG_WARN([Neither mmap/munmap(2) or shmget(2) library functions.]) +fi + +# Optional Tcl support. +if test "$db_cv_tcl" = "yes"; then + AM_TCL_LOAD +fi + +# Optional sequence code. +AM_SEQUENCE_CONFIGURE + +# Detect whether a large mmap() supports automatically extending the accessible +# region after growing the underlying file. +AM_MMAP_EXTEND + +# Optional DB 1.85 compatibility API. +if test "$db_cv_compat185" = "yes"; then + ADDITIONAL_INCS="db_185.h $ADDITIONAL_INCS" + + ADDITIONAL_OBJS="db185${o} $ADDITIONAL_OBJS" +fi + +# Optional utilities. +if test "$db_cv_dump185" = "yes"; then + ADDITIONAL_PROGS="db_dump185 $ADDITIONAL_PROGS" +fi + +# Log checksums can be disabled to increase performance +if test "$db_cv_log_checksum" = "yes"; then + AC_DEFINE(HAVE_LOG_CHECKSUM) + AH_TEMPLATE(HAVE_LOG_CHECKSUM, [Define to 1 if enabling checksums in log records.]) +fi + +# You can disable pieces of functionality to save space. +# +# Btree is always configured: it is the standard method, and Hash off-page +# duplicates require it. +ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(BTREE_OBJS)" + +# Compression can be disabled. +if test "$db_cv_build_compression" = "yes"; then + AC_DEFINE(HAVE_COMPRESSION) + AH_TEMPLATE(HAVE_COMPRESSION, [Define to 1 if building compression support.]) +fi + +# Partitioning can be disabled. +if test "$db_cv_build_partition" = "yes"; then + AC_DEFINE(HAVE_PARTITION) + AH_TEMPLATE(HAVE_PARTITION, [Define to 1 if building partitioned database support.]) +fi + +# Hash can be disabled. +if test "$db_cv_build_hash" = "yes"; then + AC_DEFINE(HAVE_HASH) + AH_TEMPLATE(HAVE_HASH, [Define to 1 if building Hash access method.]) + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(HASH_OBJS)" + if test "$db_cv_build_verify" = "yes"; then + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(HASH_VRFY_OBJS)" + fi +else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS hash_stub${o}" +fi + +# Heap can be disabled. +if test "$db_cv_build_heap" = "yes"; then + AC_DEFINE(HAVE_HEAP) + AH_TEMPLATE(HAVE_HEAP, [Define to 1 if building Heap access method.]) + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(HEAP_OBJS)" + if test "$db_cv_build_verify" = "yes"; then + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(HEAP_VRFY_OBJS)" + fi +else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS heap_stub${o}" +fi + +# Queue can be disabled. +if test "$db_cv_build_queue" = "yes"; then + AC_DEFINE(HAVE_QUEUE) + AH_TEMPLATE(HAVE_QUEUE, [Define to 1 if building Queue access method.]) + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(QUEUE_OBJS)" + if test "$db_cv_build_verify" = "yes"; then + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(QUEUE_VRFY_OBJS)" + fi +else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS qam_stub${o}" +fi + +# Replication can be disabled. +if test "$db_cv_build_replication" = "yes"; then + AC_DEFINE(HAVE_REPLICATION) + AH_TEMPLATE(HAVE_REPLICATION, + [Define to 1 if building replication support.]) + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(REP_OBJS)" + + # If we're building replication and detected POSIX threads, build the + # replication manager. + AH_TEMPLATE(HAVE_REPLICATION_THREADS, + [Define to 1 if building the Berkeley DB replication framework.]) + + if test "$ac_cv_header_pthread_h" = yes; then + AC_DEFINE(HAVE_REPLICATION_THREADS) + + if test "$with_stacksize" != "no"; then + AC_DEFINE_UNQUOTED(DB_STACKSIZE, $with_stacksize, + [Defined to a size to limit the stack size of Berkeley DB threads.]) + fi + + # Solaris requires the socket and nsl libraries to build the + # replication manager. Don't add nsl regardless of the OS, + # it causes RPC to fail on AIX 4.3.3. + case "$host_os" in + solaris*) + AC_HAVE_LIBRARY(nsl, LIBSO_LIBS="$LIBSO_LIBS -lnsl") + AC_HAVE_LIBRARY(socket, + LIBSO_LIBS="$LIBSO_LIBS -lsocket");; + esac + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(REPMGR_OBJS)" + else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS repmgr_stub${o}" + fi +else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS rep_stub${o} repmgr_stub${o}" +fi + +# The statistics code can be disabled. +if test "$db_cv_build_statistics" = "yes"; then + AC_DEFINE(HAVE_STATISTICS) + AH_TEMPLATE(HAVE_STATISTICS, + [Define to 1 if building statistics support.]) +fi + +# The verification code can be disabled. +if test "$db_cv_build_verify" = "yes"; then + AC_DEFINE(HAVE_VERIFY) + AH_TEMPLATE(HAVE_VERIFY, + [Define to 1 if building access method verification support.]) + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(BTREE_VRFY_OBJS) \$(LOG_VRFY_OBJS)" +else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS db_vrfy_stub${o} log_verify_stub${o}" +fi + +# The crypto code can be disabled. +if test -d "$topdir/src/crypto" -a "$db_cv_build_cryptography" != "no"; then + AC_DEFINE(HAVE_CRYPTO) + AH_TEMPLATE(HAVE_CRYPTO, + [Define to 1 if building cryptography support.]) + + CRYPTO_OBJS="\$(CRYPTO_OBJS)" + + if test "$db_cv_build_cryptography" = "ipp"; then + AC_CHECK_HEADER([ippcp.h], [], AC_MSG_ERROR([\ +The 'ippcp.h' header file required for IPP cryptography support was not found \ +in the configured include path.])) + AC_DEFINE(HAVE_CRYPTO_IPP) + AH_TEMPLATE(HAVE_CRYPTO_IPP, + [Define to 1 if using Intel IPP for cryptography.]) + fi +else + CRYPTO_OBJS="crypto_stub${o}" +fi + +# The mutex code can be disabled, and if there aren't any mutexes, then there's +# no reason to include the locking code. +if test "$db_cv_build_mutexsupport" = "yes"; then + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(LOCK_OBJS) \$(MUTEX_OBJS)" +else + ADDITIONAL_OBJS="$ADDITIONAL_OBJS lock_stub${o} mut_stub${o}" +fi + +# If DIAGNOSTIC is defined, include the log print routines in the library +# itself, various diagnostic modes use them. +if test "$db_cv_diagnostic" = "yes"; then + ADDITIONAL_OBJS="$ADDITIONAL_OBJS \$(PRINT_OBJS)" + PRINTLOG_OBJS="" +else + PRINTLOG_OBJS="\$(PRINT_OBJS)" +fi + +# If building for QNX, we need additional OS files. +if test "$qnx_build" = "yes"; then + ADDITIONAL_OBJS="$ADDITIONAL_OBJS os_qnx_fsync${o} os_qnx_open${o}" +fi + +# The DBM API can be disabled. +if test "$db_cv_dbm" = "yes"; then + AC_DEFINE(HAVE_DBM) + AH_TEMPLATE(HAVE_DBM, [Define to 1 if building the DBM API.]) + ADDITIONAL_OBJS="$ADDITIONAL_OBJS dbm${o} hsearch${o}" +fi + +# The output and error messages can be stripped. +if test "$db_cv_stripped_messages" = "yes"; then + AC_DEFINE(HAVE_STRIPPED_MESSAGES) + AH_TEMPLATE(HAVE_STRIPPED_MESSAGES, + [Define to 1 if building without output message content.]) +fi + +# The output and error messages can be localized. +if test "$db_cv_localization" = "yes"; then + AC_DEFINE(HAVE_LOCALIZATION) + AH_TEMPLATE(HAVE_LOCALIZATION, +[Define to 1 if you have localization function to support globalization.]) +fi + +# We need to add the additional object files into the Makefile with the correct +# suffix. We can't use $LTLIBOBJS itself, because that variable has $U encoded +# in it for automake, and that's not what we want. See SR #7227 for additional +# information. +# +# XXX: I'm not sure this is correct. +REPLACEMENT_OBJS=`echo "$LIB@&t@OBJS" | + sed "s,\.[[^.]]* ,$o ,g;s,\.[[^.]]*$,$o,"` + +# This is necessary so that .o files in LIBOBJS are also built via +# the ANSI2KNR-filtering rules. +LIB@&t@OBJS=`echo "$LIB@&t@OBJS" | + sed 's,\.[[^.]]* ,$U&,g;s,\.[[^.]]*$,$U&,'` +LTLIBOBJS=`echo "$LIB@&t@OBJS" | + sed 's,\.[[^.]]* ,.lo ,g;s,\.[[^.]]*$,.lo,'` +AC_SUBST(LTLIBOBJS) + +# Initial output file list. +CREATE_LIST="Makefile + db_cxx.h:$topdir/src/dbinc/db_cxx.in + db_int.h:$topdir/src/dbinc/db_int.in + clib_port.h:$topdir/dist/clib_port.in + include.tcl:$topdir/test/tcl/include.tcl" + +# Create the db.h file from a source file, a list of global function +# prototypes, and, if configured for unique names, a list of #defines +# to do DB_VERSION_UNIQUE_NAME substitution. +if test "$db_cv_uniquename" = "yes"; then + CREATE_LIST="$CREATE_LIST + db.h:$topdir/src/dbinc/db.in:$topdir/src/dbinc_auto/api_flags.in:$topdir/src/dbinc_auto/ext_def.in:$topdir/src/dbinc_auto/ext_prot.in" +else + CREATE_LIST="$CREATE_LIST + db.h:$topdir/src/dbinc/db.in:$topdir/src/dbinc_auto/api_flags.in:$topdir/src/dbinc_auto/ext_prot.in" +fi + +# If configured for unique names, create the db_int_uext.h file (which +# does the DB_VERSION_UNIQUE_NAME substitution), which is included by +# the db_int.h file. +if test "$db_cv_uniquename" = "yes"; then + CREATE_LIST="$CREATE_LIST + db_int_def.h:$topdir/src/dbinc_auto/int_def.in" + db_int_def='#include "db_int_def.h"' +fi + +# Create the db_185.h and db185_int.h files from source files, a list of +# global function prototypes, and, if configured for unique names, a list +# of #defines to do DB_VERSION_UNIQUE_NAME substitution. +if test "$db_cv_compat185" = "yes"; then + if test "$db_cv_uniquename" = "yes"; then + CREATE_LIST="$CREATE_LIST + db_185.h:$topdir/src/dbinc/db_185.in:$topdir/src/dbinc_auto/ext_185_def.in:$topdir/src/dbinc_auto/ext_185_prot.in + db185_int.h:$topdir/lang/db185/db185_int.in:$topdir/src/dbinc_auto/ext_185_def.in:$topdir/src/dbinc_auto/ext_185_prot.in" + else + CREATE_LIST="$CREATE_LIST + db_185.h:$topdir/src/dbinc/db_185.in:$topdir/src/dbinc_auto/ext_185_prot.in + db185_int.h:$topdir/lang/db185/db185_int.in:$topdir/src/dbinc_auto/ext_185_prot.in" + fi +fi + +if test "$db_cv_stl" = "yes"; then + CREATE_LIST="$CREATE_LIST + dbstl_common.h:$topdir/lang/cxx/stl/dbstl_common.in" +fi + +if test "x$subdirs" != "x"; then + subdir_cmd="@for d in ${subdirs}; do (cd \$\$d && \${MAKE} \$@) ; done" +fi + +AC_CONFIG_FILES($CREATE_LIST) +AC_OUTPUT + +if test "$db_cv_sql" = "yes"; then + # This command runs the configure script from the SQL tree. + AC_MSG_NOTICE([Configuring the SQL API]) + AC_SQL_CONFIG +fi diff --git a/dist/db_provider.d b/dist/db_provider.d new file mode 100644 index 00000000..3d1ba3a6 --- /dev/null +++ b/dist/db_provider.d @@ -0,0 +1,361 @@ +/* + * DO NOT EDIT: automatically built by dist/s_include. + * Oracle Berkeley DB DTrace Provider + */ +#include "dbdefs.d" + +provider bdb { +/* + * + * dist/events.in - This description of Oracle Berkeley DB's internal + * events hierarchy is processes by dist/s_perfmon to generate the + * platform-specific files needed by the configured operating system. + * + * The entries starting in the first column are event class names, and consist + * of a single word. The class's individual function-like events follow. + * + * Some of these are included to enhance consistency; thse calls could be + * supported by pid$target:::entry (DTrace) or + * probe process("$LIB").function("").call (SystemTap) probes. + * + * For DTrace + * dist/bdb_provider.d + * util/dtrace/dbdefs.d + * + * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. + * + */ + +/* The alloc class covers the allocation of "on disk" database pages. */ + /* + * An attempt to allocate a database page of type 'type' for database 'db' + * returned 'ret'. If the allocation succeeded then ret is 0, pgno is the + * location of the new page, and pg is the address of the new page. + * Details of the page can be extracted from the pg pointer. + */ + probe alloc__new(char *file, char *db, unsigned pgno, unsigned type, + struct _db_page *pg, int ret); + /* + * An attempt to free the page 'pgno' of 'db' returned 'ret'. + * When successful the page is returned to the free list + * or the file is truncated. + */ + probe alloc__free(char *file, char *db, unsigned pgno, unsigned ret); + /* + * A btree split of pgno in db is being attempted. The parent page number + * and the level in the btree are also provided. + */ + probe alloc__btree_split(char *file, char *db, unsigned pgno, unsigned parent, + unsigned level); + +/* + * These DB API calls provide the name of the file and database being accessed. + * In-memory databases will have a NULL (0) file name address. The db name will + * be null unless subdatabases are in use. + */ + /* + * The database or file name was opened. The 20 byte unique fileid can be + * used to keep track of databases as they are created and destroyed. + */ + probe db__open(char *file, char *db, unsigned flags, uint8_t *fileid); + /* The database or file name was closed. */ + probe db__close(char *file, char *db, unsigned flags, uint8_t *fileid); + /* An attempt is being made to open a cursor on the database or file. */ + probe db__cursor(char *file, char *db, unsigned txnid, unsigned flags, + uint8_t *fileid); + /* An attempt is being made to get data from a db. */ + probe db__get(char *file, char *db, unsigned txnid, DBT *key, DBT *data, + unsigned flags); + /* An attempt is being made to put data to a db. */ + probe db__put(char *file, char *db, unsigned txnid, DBT *key, DBT *data, + unsigned flags); + /* An attempt is being made to delete data from a db. */ + probe db__del(char *file, char *db, unsigned txnid, DBT *key, unsigned flags); + +/* + * The lock class monitors the transactional consistency locks: page, record, + * and database. It also monitors the non-transactional file handle locks. + */ + /* + * The thread is about to suspend itself because another locker already has + * a conflicting lock on object 'lock'. The lock DBT's data points to + * a __db_ilock structure, except for the atypical program which uses + * application specific locking. + */ + probe lock__suspend(DBT *lock, db_lockmode_t lock_mode); + /* The thread is awakening from a suspend. */ + probe lock__resume(DBT *lock, db_lockmode_t lock_mode); + /* The lock is being freed. */ + probe lock__put(struct __sh_dbt *lock, unsigned flags); + /* + * The lock would have been freed except that its refcount was greater + * than 1. + */ + probe lock__put_reduce_count(struct __sh_dbt *lock, unsigned flags); + + /* + * These lock counters are included by --enable-perfmon-statistics. + */ + + /* + * The locker_id's lock request in lock_obj is about to be aborted in + * order to resolve a deadlock. The lock region's st_ndeadlocks has + * been incremented. + */ + probe lock__deadlock(unsigned st_ndeadlocks, unsigned locker_id, + struct __sh_dbt *lock_obj); + /* + * A DB_LOCK_NOWAIT lock request by locker_id would have had to wait. + * The lock regions's st_lock_nowait has been incremented and + * the request returns DB_LOCK_NOTGRANTED. + */ + probe lock__nowait_notgranted(unsigned count, DBT *lock, unsigned locker_id); + probe lock__request(unsigned request_count, DBT *lock, unsigned locker_id); + probe lock__upgrade(unsigned upgrade_count, DBT *lock, unsigned locker_id); + /* + * A lock is being stolen from one partition for another one + * The 'from' lock partition's st_locksteals has been incremented. + */ + probe lock__steal(unsigned st_locksteals, unsigned from, unsigned to); + /* + * A lock object is being stolen from one partition for another one. + * The 'from' lock partition's st_objectsteals has been incremented. + */ + probe lock__object_steal(unsigned st_objectsteals, unsigned from, unsigned to); + /* A lock wait expired due to the lock request timeout. */ + probe lock__locktimeout(unsigned st_nlocktimeouts, const DBT *lock); + /* A lock wait expired due to the transaction's timeout. */ + probe lock__txntimeout(unsigned st_ntxntimeouts, const DBT *lock); + /* + * The allocation or deallocation of the locker id changed the number + * of active locker identifiers. + */ + probe lock__nlockers(unsigned active, unsigned locker_id); + /* + * The allocation of the locker id set a new maximum + * number of active locker identifiers. + */ + probe lock__maxnlockers(unsigned new_max_active, unsigned locker_id); + +/* Log - Transaction log */ + probe log__read(unsigned read_count, unsigned logfile); + +/* + * The mpool class monitors the allocation and management of memory, + * including the cache. + */ + /* Read a page from file into buf. */ + probe mpool__read(char *file, unsigned pgno, struct __bh *buf); + /* Write a page from buf to file. */ + probe mpool__write(char *file, unsigned pgno, struct __bh *buf); + /* + * This is an attempt to allocate size bytes from region_id. + * The reg_type is one of the reg_type_t enum values. + */ + probe mpool__env_alloc(unsigned size, unsigned region_id, unsigned reg_type); + /* The page is about to be removed from the cache. */ + probe mpool__evict(char *file, unsigned pgno, struct __bh *buf); + /* + * The memory allocator has incremented wrap_count after searching through + * the entire region without being able to fulfill the request for + * alloc_len bytes. As wrap_count increases the library makes more effort + * to allocate space. + */ + probe mpool__alloc_wrap(unsigned alloc_len, int region_id, int wrap_count, + int put_counter); + + /* + * These mpool counters are included by --enable-perfmon-statistics. + */ + + /* The eviction of a clean page from a cache incremented st_ro_evict. */ + probe mpool__clean_eviction(unsigned st_ro_evict, unsigned region_id); + /* + * The eviction of a dirty page from a cache incremented st_rw_evict. + * The page has already been written out. + */ + probe mpool__dirty_eviction(unsigned st_rw_evict, unsigned region_id); + /* An attempt to allocate memory from region_id failed. */ + probe mpool__fail(unsigned failure_count, unsigned alloc_len, unsigned region_id); + probe mpool__free(unsigned freed_count, unsigned alloc_len, unsigned region_id); + probe mpool__longest_search(unsigned count, unsigned alloc_len, unsigned region_id); + probe mpool__free_frozen(unsigned count, char *file, unsigned pgno); + probe mpool__freeze(unsigned hash_frozen, unsigned pgno); + /* A search for pgno of file incremented st_hash_searches. */ + probe mpool__hash_search(unsigned st_hash_searches, char *file, unsigned pgno); + /* + * A search for pgno of file increased st_hash_examined by the number + * of hash buckets examined. + */ + probe mpool__hash_examined(unsigned st_hash_examined, char *file, unsigned pgno); + /* A search for pgno of file set a new maximum st_hash_longest value. */ + probe mpool__hash_longest(unsigned st_hash_longest, char *file, unsigned pgno); + /* + * A file's st_map count was incremented after a page was mapped into + * memory. The mapping might have caused disk I/O. + */ + probe mpool__map(unsigned st_map, char *file, unsigned pgno); + /* + * The hit count was incremented because pgno from file was found + * in the cache. + */ + probe mpool__hit(unsigned st_cache_hit, char *file, unsigned pgno); + /* + * The miss count was incremented because pgno from file was + * not already present in the cache. + */ + probe mpool__miss(unsigned st_cache_miss, char *file, unsigned pgno); + /* + * The st_page_create field was incremented because + * the pgno of file was created in the cache. + */ + probe mpool__page_create(unsigned st_page_create, char *file, unsigned pgno); + /* + * The st_page_in field was incremented because + * the pgno from file was read into the cache. + */ + probe mpool__page_in(unsigned st_page_in, char *file, unsigned pgno); + /* + * The st_page_out field was incremented because + * the pgno from file was written out. + */ + probe mpool__page_out(unsigned st_page_out, char *file, unsigned pgno); + probe mpool__thaw(unsigned count, char *file, unsigned pgno); + probe mpool__alloc(unsigned success_count, unsigned len, unsigned region_id); + probe mpool__nallocs(unsigned st_alloc, unsigned alloc_len); + probe mpool__alloc_buckets(unsigned st_alloc_buckets, unsigned region_id); + probe mpool__alloc_max_buckets(unsigned max, unsigned region_id); + probe mpool__alloc_max_pages(unsigned max, unsigned region_id); + probe mpool__alloc_pages(unsigned count, unsigned region_id); + +/* + * The mutex category monitors includes shared latches. The alloc_id value + * is one of the MTX_XXX definitions from dbinc/mutex.h + */ + /* + * This thread is about to suspend itself because a thread has the + * mutex or shared latch locked in a mode which conflicts with the + * this request. + */ + probe mutex__suspend(unsigned mutex, unsigned excl, unsigned alloc_id, + struct __db_mutex_t *mutexp); + /* + * The thread is returning from a suspend and will attempt to obtain + * the mutex or shared latch again. It might need to suspend again. + */ + probe mutex__resume(unsigned mutex, unsigned excl, unsigned alloc_id, + struct __db_mutex_t *mutexp); + + /* + * These mutex counters are included by --enable-perfmon-statistics. + */ + + /* + * Increment the count of times that the mutex was free when trying + * to lock it. + */ + probe mutex__set_nowait(unsigned mutex_set_nowait, unsigned mutex); + /* + * Increment the count of times that the mutex was busy when trying + * to lock it. + */ + probe mutex__set_wait(unsigned mutex_set_wait, unsigned mutex); + /* + * Increment the count of times that the shared latch was free + * when trying to get a shared lock on it. + */ + probe mutex__set_rd_nowait(unsigned mutex_set_rd_nowait, unsigned mutex); + /* + * Increment the count of times that the shared latch was already + * exclusively latched when trying to get a shared lock on it. + */ + probe mutex__set_rd_wait(unsigned mutex_set_rd_wait, unsigned mutex); + /* + * Increment the count of times that a hybrid mutex had to block + * on its condition variable. n a busy system this might happen + * several times before the corresponding hybrid_wakeup. + */ + probe mutex__hybrid_wait(unsigned hybrid_wait, unsigned mutex); + /* + * Increment the count of times that a hybrid mutex finished + * one or more waits for its condition variable. + */ + probe mutex__hybrid_wakeup(unsigned hybrid_wakeup, unsigned mutex); + +/* + * The race events are triggered when the interactions between two threads + * causes a rarely executed code path to be taken. They are used primarily + * to help test and diagnose race conditions, though if they are being + * triggered too frequently it could result in performance degradation. + * They are intended for use by Berkeley DB engineers. + */ + /* A Btree search needs to wait for a page lock and retry from the top. */ + probe race__bam_search(char *file, char *db, int errcode, struct _db_page *pg, + struct _db_page *parent, unsigned flags); + /* + * A record was not found searching an off-page duplicate tree. + * Retry the search. + */ + probe race__dbc_get(char *file, char *db, int errcode, unsigned flags, DBT *key); + /* + * The thread could not immediately open and lock the file handle + * without waiting. The thread will close, wait, and retry. + */ + probe race__fop_file_setup(char *file, int errcode, unsigned flags); + /* A get next or previous in a recno db had to retry. */ + probe race__ramc_get(char *file, char *db, struct _db_page *pg, unsigned flags); + +/* The statistics counters for replication are for internal use. */ + probe rep__log_queued(unsigned count, DB_LSN *lsn); + probe rep__pg_duplicated(unsigned eid, unsigned pgno, unsigned file, unsigned count); + probe rep__pg_record(unsigned count, unsigned eid); + probe rep__pg_request(unsigned count, unsigned eid); + probe rep__election_won(unsigned count, unsigned generation); + probe rep__election(unsigned count, unsigned generation); + probe rep__log_request(unsigned count, unsigned eid); + probe rep__master_change(unsigned count, unsigned eid); + +/* The txn category covers the basic transaction operations. */ + /* A transaction was successfully begun. */ + probe txn__begin(unsigned txnid, unsigned flags); + /* A transaction is starting to commit. */ + probe txn__commit(unsigned txnid, unsigned flags); + /* + * The transaction is starting to prepare, flushing the log + * so that a future commit can be guaranteed to succeed. + * The global identifier field is 128 bytes long. + */ + probe txn__prepare(unsigned txnid, uint8_t *gid); + /* The transaction is about to abort. */ + probe txn__abort(unsigned txnid); + + /* + * These txn counters are included by --enable-perfmon-statistics. + */ + + /* Beginning the transaction incremented st_nbegins. */ + probe txn__nbegins(unsigned st_nbegins, unsigned txnid); + /* Aborting the transaction incremented st_naborts. */ + probe txn__naborts(unsigned st_naborts, unsigned txnid); + /* Committing the transaction incremented st_ncommits. */ + probe txn__ncommits(unsigned st_ncommits, unsigned txnid); + /* + * Beginning or ending the transaction updated the number of active + * transactions. + */ + probe txn__nactive(unsigned st_nactive, unsigned txnid); + /* + * The creation of the transaction set a new maximum number + * of active transactions. + */ + probe txn__maxnactive(unsigned st_maxnactive, unsigned txnid); + probe txn__nsnapshot(unsigned st_nsnapshot, unsigned txnid); + probe txn__maxnsnapshot(unsigned st_maxnsnapshot, unsigned txnid); +}; + +#pragma D attributes Evolving/Evolving/Common provider bdb provider +#pragma D attributes Private/Private/Common provider bdb module +#pragma D attributes Private/Private/Common provider bdb function +#pragma D attributes Evolving/Evolving/Common provider bdb name +#pragma D attributes Evolving/Evolving/Common provider bdb args + diff --git a/dist/errno.h b/dist/errno.h new file mode 100644 index 00000000..24106609 --- /dev/null +++ b/dist/errno.h @@ -0,0 +1,186 @@ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)errno.h 8.5 (Berkeley) 1/21/94 + * FreeBSD: /repoman/r/ncvs/src/sys/sys/errno.h,v 1.28 2005/04/02 12:33:28 das Exp $ + * + * $Id$ + */ + +#ifndef _SYS_ERRNO_H_ +#define _SYS_ERRNO_H_ + +#undef errno +#define errno DB_GLOBAL(db_errno) + +#define EPERM 1 /* Operation not permitted */ +#define ENOENT 2 /* No such file or directory */ +#define ESRCH 3 /* No such process */ +#define EINTR 4 /* Interrupted system call */ +#define EIO 5 /* Input/output error */ +#define ENXIO 6 /* Device not configured */ +#define E2BIG 7 /* Argument list too long */ +#define ENOEXEC 8 /* Exec format error */ +#define EBADF 9 /* Bad file descriptor */ +#define ECHILD 10 /* No child processes */ +#define EDEADLK 11 /* Resource deadlock avoided */ + /* 11 was EAGAIN */ +#define ENOMEM 12 /* Cannot allocate memory */ +#define EACCES 13 /* Permission denied */ +#define EFAULT 14 /* Bad address */ +#ifndef _POSIX_SOURCE +#define ENOTBLK 15 /* Block device required */ +#endif +#define EBUSY 16 /* Device busy */ +#define EEXIST 17 /* File exists */ +#define EXDEV 18 /* Cross-device link */ +#define ENODEV 19 /* Operation not supported by device */ +#define ENOTDIR 20 /* Not a directory */ +#define EISDIR 21 /* Is a directory */ +#define EINVAL 22 /* Invalid argument */ +#define ENFILE 23 /* Too many open files in system */ +#define EMFILE 24 /* Too many open files */ +#define ENOTTY 25 /* Inappropriate ioctl for device */ +#ifndef _POSIX_SOURCE +#define ETXTBSY 26 /* Text file busy */ +#endif +#define EFBIG 27 /* File too large */ +#define ENOSPC 28 /* No space left on device */ +#define ESPIPE 29 /* Illegal seek */ +#define EROFS 30 /* Read-only filesystem */ +#define EMLINK 31 /* Too many links */ +#define EPIPE 32 /* Broken pipe */ + +/* math software */ +#define EDOM 33 /* Numerical argument out of domain */ +#define ERANGE 34 /* Result too large */ + +/* non-blocking and interrupt i/o */ +#define EAGAIN 35 /* Resource temporarily unavailable */ +#ifndef _POSIX_SOURCE +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define EINPROGRESS 36 /* Operation now in progress */ + +#define EALREADY 37 /* Operation already in progress */ + +/* ipc/network software -- argument errors */ +#define ENOTSOCK 38 /* Socket operation on non-socket */ +#define EDESTADDRREQ 39 /* Destination address required */ +#define EMSGSIZE 40 /* Message too long */ +#define EPROTOTYPE 41 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 42 /* Protocol not available */ +#define EPROTONOSUPPORT 43 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 44 /* Socket type not supported */ +#define EOPNOTSUPP 45 /* Operation not supported */ +#define ENOTSUP EOPNOTSUPP /* Operation not supported */ +#define EPFNOSUPPORT 46 /* Protocol family not supported */ +#define EAFNOSUPPORT 47 /* Address family not supported by protocol family */ +#define EADDRINUSE 48 /* Address already in use */ +#define EADDRNOTAVAIL 49 /* Can't assign requested address */ + +/* ipc/network software -- operational errors */ +#define ENETDOWN 50 /* Network is down */ +#define ENETUNREACH 51 /* Network is unreachable */ +#define ENETRESET 52 /* Network dropped connection on reset */ +#define ECONNABORTED 53 /* Software caused connection abort */ +#define ECONNRESET 54 /* Connection reset by peer */ +#define ENOBUFS 55 /* No buffer space available */ +#define EISCONN 56 /* Socket is already connected */ +#define ENOTCONN 57 /* Socket is not connected */ +#define ESHUTDOWN 58 /* Can't send after socket shutdown */ +#define ETOOMANYREFS 59 /* Too many references: can't splice */ +#define ETIMEDOUT 60 /* Operation timed out */ +#define ECONNREFUSED 61 /* Connection refused */ + +#define ELOOP 62 /* Too many levels of symbolic links */ +#endif /* _POSIX_SOURCE */ +#define ENAMETOOLONG 63 /* File name too long */ + +/* should be rearranged */ +#ifndef _POSIX_SOURCE +#define EHOSTDOWN 64 /* Host is down */ +#define EHOSTUNREACH 65 /* No route to host */ +#endif /* _POSIX_SOURCE */ +#define ENOTEMPTY 66 /* Directory not empty */ + +/* quotas & mush */ +#ifndef _POSIX_SOURCE +#define EPROCLIM 67 /* Too many processes */ +#define EUSERS 68 /* Too many users */ +#define EDQUOT 69 /* Disc quota exceeded */ + +/* Network File System */ +#define ESTALE 70 /* Stale NFS file handle */ +#define EREMOTE 71 /* Too many levels of remote in path */ +#define EBADRPC 72 /* RPC struct is bad */ +#define ERPCMISMATCH 73 /* RPC version wrong */ +#define EPROGUNAVAIL 74 /* RPC prog. not avail */ +#define EPROGMISMATCH 75 /* Program version wrong */ +#define EPROCUNAVAIL 76 /* Bad procedure for program */ +#endif /* _POSIX_SOURCE */ + +#define ENOLCK 77 /* No locks available */ +#define ENOSYS 78 /* Function not implemented */ + +#ifndef _POSIX_SOURCE +#define EFTYPE 79 /* Inappropriate file type or format */ +#define EAUTH 80 /* Authentication error */ +#define ENEEDAUTH 81 /* Need authenticator */ +#define EIDRM 82 /* Identifier removed */ +#define ENOMSG 83 /* No message of desired type */ +#define EOVERFLOW 84 /* Value too large to be stored in data type */ +#define ECANCELED 85 /* Operation canceled */ +#define EILSEQ 86 /* Illegal byte sequence */ +#define ENOATTR 87 /* Attribute not found */ + +#define EDOOFUS 88 /* Programming error */ +#endif /* _POSIX_SOURCE */ + +#define EBADMSG 89 /* Bad message */ +#define EMULTIHOP 90 /* Multihop attempted */ +#define ENOLINK 91 /* Link has been severed */ +#define EPROTO 92 /* Protocol error */ + +#ifndef _POSIX_SOURCE +#define ELAST 92 /* Must be equal largest errno */ +#endif /* _POSIX_SOURCE */ + +#ifdef _KERNEL +/* pseudo-errors returned inside kernel to modify return to process */ +#define ERESTART (-1) /* restart syscall */ +#define EJUSTRETURN (-2) /* don't modify regs, just return */ +#define ENOIOCTL (-3) /* ioctl not handled by this layer */ +#define EDIRIOCTL (-4) /* do direct ioctl in GEOM */ +#endif + +#endif diff --git a/dist/events.in b/dist/events.in new file mode 100644 index 00000000..6953fa2f --- /dev/null +++ b/dist/events.in @@ -0,0 +1,265 @@ +# +# dist/events.in - This description of Oracle Berkeley DB's internal +# events hierarchy is processes by dist/s_perfmon to generate the +# platform-specific files needed by the configured operating system. +# +# The entries starting in the first column are event class names, and consist +# of a single word. The class's individual function-like events follow. +# +# Some of these are included to enhance consistency; thse calls could be +# supported by pid$target:::entry (DTrace) or +# probe process("$LIB").function("").call (SystemTap) probes. +# +# For DTrace +# dist/bdb_provider.d +# util/dtrace/dbdefs.d +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# + +# The alloc class covers the allocation of "on disk" database pages. +alloc + # An attempt to allocate a database page of type 'type' for database 'db' + # returned 'ret'. If the allocation succeeded then ret is 0, pgno is the + # location of the new page, and pg is the address of the new page. + # Details of the page can be extracted from the pg pointer. + new(char *file, char *db, unsigned pgno, unsigned type, + struct _db_page *pg, int ret); + # An attempt to free the page 'pgno' of 'db' returned 'ret'. + # When successful the page is returned to the free list + # or the file is truncated. + free(char *file, char *db, unsigned pgno, unsigned ret); + # A btree split of pgno in db is being attempted. The parent page number + # and the level in the btree are also provided. + btree_split(char *file, char *db, unsigned pgno, unsigned parent, + unsigned level); + +# These DB API calls provide the name of the file and database being accessed. +# In-memory databases will have a NULL (0) file name address. The db name will +# be null unless subdatabases are in use. +db + # The database or file name was opened. The 20 byte unique fileid can be + # used to keep track of databases as they are created and destroyed. + open(char *file, char *db, unsigned flags, uint8_t *fileid); + # The database or file name was closed. + close(char *file, char *db, unsigned flags, uint8_t *fileid); + # An attempt is being made to open a cursor on the database or file. + cursor(char *file, char *db, unsigned txnid, unsigned flags, + uint8_t *fileid); + # An attempt is being made to get data from a db. + get(char *file, char *db, unsigned txnid, DBT *key, DBT *data, + unsigned flags); + # An attempt is being made to put data to a db. + put(char *file, char *db, unsigned txnid, DBT *key, DBT *data, + unsigned flags); + # An attempt is being made to delete data from a db. + del(char *file, char *db, unsigned txnid, DBT *key, unsigned flags); + +# The lock class monitors the transactional consistency locks: page, record, +# and database. It also monitors the non-transactional file handle locks. +lock + # The thread is about to suspend itself because another locker already has + # a conflicting lock on object 'lock'. The lock DBT's data points to + # a __db_ilock structure, except for the atypical program which uses + # application specific locking. + suspend(DBT *lock, db_lockmode_t lock_mode); + # The thread is awakening from a suspend. + resume(DBT *lock, db_lockmode_t lock_mode); + # The lock is being freed. + put(struct __sh_dbt *lock, unsigned flags); + # The lock would have been freed except that its refcount was greater + # than 1. + put_reduce_count(struct __sh_dbt *lock, unsigned flags); + + # These lock counters are included by --enable-perfmon-statistics. + + # The locker_id's lock request in lock_obj is about to be aborted in + # order to resolve a deadlock. The lock region's st_ndeadlocks has + # been incremented. + deadlock(unsigned st_ndeadlocks, unsigned locker_id, + struct __sh_dbt *lock_obj); + # A DB_LOCK_NOWAIT lock request by locker_id would have had to wait. + # The lock regions's st_lock_nowait has been incremented and + # the request returns DB_LOCK_NOTGRANTED. + nowait_notgranted(unsigned count, DBT *lock, unsigned locker_id); + request(unsigned request_count, DBT *lock, unsigned locker_id); + upgrade(unsigned upgrade_count, DBT *lock, unsigned locker_id); + # A lock is being stolen from one partition for another one + # The 'from' lock partition's st_locksteals has been incremented. + steal(unsigned st_locksteals, unsigned from, unsigned to); + # A lock object is being stolen from one partition for another one. + # The 'from' lock partition's st_objectsteals has been incremented. + object_steal(unsigned st_objectsteals, unsigned from, unsigned to); + # A lock wait expired due to the lock request timeout. + locktimeout(unsigned st_nlocktimeouts, const DBT *lock); + # A lock wait expired due to the transaction's timeout. + txntimeout(unsigned st_ntxntimeouts, const DBT *lock); + # The allocation or deallocation of the locker id changed the number + # of active locker identifiers. + nlockers(unsigned active, unsigned locker_id); + # The allocation of the locker id set a new maximum + # number of active locker identifiers. + maxnlockers(unsigned new_max_active, unsigned locker_id); + +# Log - Transaction log +log + read(unsigned read_count, unsigned logfile); + +# The mpool class monitors the allocation and management of memory, +# including the cache. +mpool + # Read a page from file into buf. + read(char *file, unsigned pgno, struct __bh *buf); + # Write a page from buf to file. + write(char *file, unsigned pgno, struct __bh *buf); + # This is an attempt to allocate size bytes from region_id. + # The reg_type is one of the reg_type_t enum values. + env_alloc(unsigned size, unsigned region_id, unsigned reg_type); + # The page is about to be removed from the cache. + evict(char *file, unsigned pgno, struct __bh *buf); + # The memory allocator has incremented wrap_count after searching through + # the entire region without being able to fulfill the request for + # alloc_len bytes. As wrap_count increases the library makes more effort + # to allocate space. + alloc_wrap(unsigned alloc_len, int region_id, int wrap_count, + int put_counter); + + # These mpool counters are included by --enable-perfmon-statistics. + + # The eviction of a clean page from a cache incremented st_ro_evict. + clean_eviction(unsigned st_ro_evict, unsigned region_id); + # The eviction of a dirty page from a cache incremented st_rw_evict. + # The page has already been written out. + dirty_eviction(unsigned st_rw_evict, unsigned region_id); + # An attempt to allocate memory from region_id failed. + fail(unsigned failure_count, unsigned alloc_len, unsigned region_id); + free(unsigned freed_count, unsigned alloc_len, unsigned region_id); + longest_search(unsigned count, unsigned alloc_len, unsigned region_id); + free_frozen(unsigned count, char *file, unsigned pgno); + freeze(unsigned hash_frozen, unsigned pgno); + # A search for pgno of file incremented st_hash_searches. + hash_search(unsigned st_hash_searches, char *file, unsigned pgno); + # A search for pgno of file increased st_hash_examined by the number + # of hash buckets examined. + hash_examined(unsigned st_hash_examined, char *file, unsigned pgno); + # A search for pgno of file set a new maximum st_hash_longest value. + hash_longest(unsigned st_hash_longest, char *file, unsigned pgno); + # A file's st_map count was incremented after a page was mapped into + # memory. The mapping might have caused disk I/O. + map(unsigned st_map, char *file, unsigned pgno); + # The hit count was incremented because pgno from file was found + # in the cache. + hit(unsigned st_cache_hit, char *file, unsigned pgno); + # The miss count was incremented because pgno from file was + # not already present in the cache. + miss(unsigned st_cache_miss, char *file, unsigned pgno); + # The st_page_create field was incremented because + # the pgno of file was created in the cache. + page_create(unsigned st_page_create, char *file, unsigned pgno); + # The st_page_in field was incremented because + # the pgno from file was read into the cache. + page_in(unsigned st_page_in, char *file, unsigned pgno); + # The st_page_out field was incremented because + # the pgno from file was written out. + page_out(unsigned st_page_out, char *file, unsigned pgno); + thaw(unsigned count, char *file, unsigned pgno); + alloc(unsigned success_count, unsigned len, unsigned region_id); + nallocs(unsigned st_alloc, unsigned alloc_len); + alloc_buckets(unsigned st_alloc_buckets, unsigned region_id); + alloc_max_buckets(unsigned max, unsigned region_id); + alloc_max_pages(unsigned max, unsigned region_id); + alloc_pages(unsigned count, unsigned region_id); + +# The mutex category monitors includes shared latches. The alloc_id value +# is one of the MTX_XXX definitions from dbinc/mutex.h +mutex + # This thread is about to suspend itself because a thread has the + # mutex or shared latch locked in a mode which conflicts with the + # this request. + suspend(unsigned mutex, unsigned excl, unsigned alloc_id, + struct __db_mutex_t *mutexp); + # The thread is returning from a suspend and will attempt to obtain + # the mutex or shared latch again. It might need to suspend again. + resume(unsigned mutex, unsigned excl, unsigned alloc_id, + struct __db_mutex_t *mutexp); + + # These mutex counters are included by --enable-perfmon-statistics. + + # Increment the count of times that the mutex was free when trying + # to lock it. + set_nowait(unsigned mutex_set_nowait, unsigned mutex); + # Increment the count of times that the mutex was busy when trying + # to lock it. + set_wait(unsigned mutex_set_wait, unsigned mutex); + # Increment the count of times that the shared latch was free + # when trying to get a shared lock on it. + set_rd_nowait(unsigned mutex_set_rd_nowait, unsigned mutex); + # Increment the count of times that the shared latch was already + # exclusively latched when trying to get a shared lock on it. + set_rd_wait(unsigned mutex_set_rd_wait, unsigned mutex); + # Increment the count of times that a hybrid mutex had to block + # on its condition variable. n a busy system this might happen + # several times before the corresponding hybrid_wakeup. + hybrid_wait(unsigned hybrid_wait, unsigned mutex); + # Increment the count of times that a hybrid mutex finished + # one or more waits for its condition variable. + hybrid_wakeup(unsigned hybrid_wakeup, unsigned mutex); + +# The race events are triggered when the interactions between two threads +# causes a rarely executed code path to be taken. They are used primarily +# to help test and diagnose race conditions, though if they are being +# triggered too frequently it could result in performance degradation. +# They are intended for use by Berkeley DB engineers. +race + # A Btree search needs to wait for a page lock and retry from the top. + bam_search(char *file, char *db, int errcode, struct _db_page *pg, + struct _db_page *parent, unsigned flags); + # A record was not found searching an off-page duplicate tree. + # Retry the search. + dbc_get(char *file, char *db, int errcode, unsigned flags, DBT *key); + # The thread could not immediately open and lock the file handle + # without waiting. The thread will close, wait, and retry. + fop_file_setup(char *file, int errcode, unsigned flags); + # A get next or previous in a recno db had to retry. + ramc_get(char *file, char *db, struct _db_page *pg, unsigned flags); + +# The statistics counters for replication are for internal use. +rep + log_queued(unsigned count, DB_LSN *lsn); + pg_duplicated(unsigned eid, unsigned pgno, unsigned file, unsigned count); + pg_record(unsigned count, unsigned eid); + pg_request(unsigned count, unsigned eid); + election_won(unsigned count, unsigned generation); + election(unsigned count, unsigned generation); + log_request(unsigned count, unsigned eid); + master_change(unsigned count, unsigned eid); + +# The txn category covers the basic transaction operations. +txn + # A transaction was successfully begun. + begin(unsigned txnid, unsigned flags); + # A transaction is starting to commit. + commit(unsigned txnid, unsigned flags); + # The transaction is starting to prepare, flushing the log + # so that a future commit can be guaranteed to succeed. + # The global identifier field is 128 bytes long. + prepare(unsigned txnid, uint8_t *gid); + # The transaction is about to abort. + abort(unsigned txnid); + + # These txn counters are included by --enable-perfmon-statistics. + + # Beginning the transaction incremented st_nbegins. + nbegins(unsigned st_nbegins, unsigned txnid); + # Aborting the transaction incremented st_naborts. + naborts(unsigned st_naborts, unsigned txnid); + # Committing the transaction incremented st_ncommits. + ncommits(unsigned st_ncommits, unsigned txnid); + # Beginning or ending the transaction updated the number of active + # transactions. + nactive(unsigned st_nactive, unsigned txnid); + # The creation of the transaction set a new maximum number + # of active transactions. + maxnactive(unsigned st_maxnactive, unsigned txnid); + nsnapshot(unsigned st_nsnapshot, unsigned txnid); + maxnsnapshot(unsigned st_maxnsnapshot, unsigned txnid); diff --git a/dist/gen_inc.awk b/dist/gen_inc.awk new file mode 100644 index 00000000..6084f27f --- /dev/null +++ b/dist/gen_inc.awk @@ -0,0 +1,69 @@ +# This awk script parses C input files looking for lines marked "PUBLIC:" +# "EXTERN:", and "DB_LOG_RECSPEC". (PUBLIC lines are DB internal function +# prototypes and #defines, EXTERN lines are DB external function prototypes +# and #defines, and DB_LOG_RECSPEC lines are the definition of log record +# templates.) +# +# PUBLIC lines are put into two versions of per-directory include files: +# one file that contains the prototypes, and one file that contains a +# #define for the name to be processed during configuration when creating +# unique names for every global C-language symbol in the DB library. +# +# The EXTERN lines are put into two files: one of which contains prototypes +# which are always appended to the db.h file, and one of which contains a +# #define list for use when creating unique symbol names. +# +# DB_LOG_RECSPEC lines are put into PUBLIC's internal #define file. +# +# Four arguments: +# e_dfile list of EXTERN #defines +# e_pfile include file that contains EXTERN prototypes +# i_dfile list of internal (PUBLIC) #defines +# i_pfile include file that contains internal (PUBLIC) prototypes +/PUBLIC:/ { + sub(/^.*PUBLIC:[ ][ ]*/, "") + if ($0 ~ /^#if|^#ifdef|^#ifndef|^#else|^#endif/) { + print $0 >> i_pfile + print $0 >> i_dfile + next + } + pline = sprintf("%s %s", pline, $0) + if (pline ~ /\)\);/) { + sub(/^[ ]*/, "", pline) + print pline >> i_pfile + if (pline !~ db_version_unique_name) { + gsub(/[ ][ ]*__P.*/, "", pline) + sub(/^.*[ ][*]*/, "", pline) + printf("#define %s %s@DB_VERSION_UNIQUE_NAME@\n", + pline, pline) >> i_dfile + } + pline = "" + } +} + +/EXTERN:/ { + sub(/^.*EXTERN:[ ][ ]*/, "") + if ($0 ~ /^#if|^#ifdef|^#ifndef|^#else|^#endif/) { + print $0 >> e_pfile + print $0 >> e_dfile + next + } + eline = sprintf("%s %s", eline, $0) + if (eline ~ /\)\);/) { + sub(/^[ ]*/, "", eline) + print eline >> e_pfile + if (eline !~ db_version_unique_name) { + gsub(/[ ][ ]*__P.*/, "", eline) + sub(/^.*[ ][*]*/, "", eline) + printf("#define %s %s@DB_VERSION_UNIQUE_NAME@\n", + eline, eline) >> e_dfile + } + eline = "" + } +} + +/^DB_LOG_RECSPEC.*_desc\[\]/ { + sub(/DB_LOG_RECSPEC[ ]*/, ""); + sub(/\[][ ]*=[ ]*{.*$/, ""); + printf("#define\t%s %s@DB_VERSION_UNIQUE_NAME@\n", $0, $0) >> i_dfile +} diff --git a/dist/gen_msg.awk b/dist/gen_msg.awk new file mode 100644 index 00000000..0f08d3d0 --- /dev/null +++ b/dist/gen_msg.awk @@ -0,0 +1,465 @@ +# +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# + +BEGIN { + if (source_file == "" || header_file == "") { + print "Usage: gen_msg.awk requires these variables to be set:"; + print "\theader_file\t-- the message #include file being created"; + print "\tsource_file\t-- the message source file being created"; + exit; + } + CFILE=source_file; + HFILE=header_file; + maxmsg = 0; +} +/^[ ]*PREFIX/ { + prefix = $2; + + # Start .c files. + printf("/* Do not edit: automatically built by gen_msg.awk. */\n\n") \ + > CFILE + printf("#include \"db_config.h\"\n\n") >> CFILE + + # Start .h file, make the entire file conditional. + printf("/* Do not edit: automatically built by gen_msg.awk. */\n\n") \ + > HFILE + printf("#ifndef\t%s_AUTOMSG_H\n#define\t%s_AUTOMSG_H\n\n", prefix, prefix) \ + >> HFILE; + printf("/*\n") >> HFILE; + printf(" * Message sizes are simply the sum of field sizes (not\n") \ + >> HFILE; + printf(" * counting variable size parts, when DBTs are present),\n") \ + >> HFILE; + printf(" * and may be different from struct sizes due to padding.\n") \ + >> HFILE; + printf(" */\n") >> HFILE; +} +/^[ ]*INCLUDE/ { + for (i = 2; i < NF; i++) + printf("%s ", $i) >> CFILE; + printf("%s\n", $i) >> CFILE; +} +/^[ ]*BEGIN_MSG/ { + if (in_begin) { + print "Invalid format: missing END statement"; + exit; + } + in_begin = 1; + nvars = 0; + thismsg = $2; + for (i = 2; i<= NF; i++) { + if ($i == "alloc") + alloc = 1; + else if ($i == "check_length") + check_length = 1; + else if ($i == "version") + version = 1; + } + + base_name = sprintf("%s_%s", prefix, thismsg); + typedef_name = sprintf("%s_args", base_name); + msg_size_name = toupper(sprintf("%s_SIZE", base_name)); + max_name = toupper(sprintf("%s_MAXMSG_SIZE", prefix)); +} +/^[ ]*ARG/ { + vars[nvars] = $2; + types[nvars] = $3; + if (types[nvars] == "DBT") + has_dbt = 1; + nvars++; +} +/^[ ]*END/ { + if (!in_begin) { + print "Invalid format: missing BEGIN statement"; + exit; + } + if (nvars == 0) { + printf("%s needs at least one field\n", thismsg); + exit; + } + + sum = 0; + for (i = 0; i < nvars; i++) + sum += type_length(types[i]); + printf("#define\t%s\t%d\n", msg_size_name, sum) >> HFILE; + if (sum > maxmsg) + maxmsg = sum; + + printf("typedef struct _%s {\n", typedef_name) >> HFILE; + for (i = 0; i < nvars; i++) { + if (types[i] == "DB_LSN" || types[i] == "DBT") + printf("\t%s\t\t%s;\n", types[i], vars[i]) >> HFILE; + else + printf("\t%s\t%s;\n", types[i], vars[i]) >> HFILE; + } + printf("} %s;\n\n", typedef_name) >> HFILE; + + emit_marshal(); + emit_unmarshal(); + + # Reinitialize variables for next time. + in_begin = 0; + alloc = 0; + check_length = 0; + version = 0; + has_dbt = 0; +} +END { + # End the conditional for the HFILE + printf("#define\t%s\t%d\n", max_name, maxmsg) >> HFILE; + printf("#endif\n") >> HFILE; +} + +# Length of fixed part of message. Does not count variable-length data portion +# of DBT. +# +function type_length(type) +{ + if (type == "DB_LSN") + return (8); + if (type == "DBT" || type == "u_int32_t" || type == "db_pgno_t") + return (4); + if (type == "u_int16_t") + return (2); + if (type == "u_int8_t") + return (1); + printf("unknown field type: %s", type); + exit(1); +} + +function emit_marshal() +{ + pi = 1; + if (check_length) + p[pi++] = "int "; + else + p[pi++] = "void "; + function_name = sprintf("%s_marshal", base_name); + p[pi++] = function_name; + p[pi++] = " __P((ENV *, "; + if (version) + p[pi++] = "u_int32_t, "; + p[pi++] = sprintf("%s *, u_int8_t *", typedef_name); + if (check_length) + p[pi++] = ", size_t, size_t *"; + p[pi++] = "));"; + proto_format(p, CFILE); + + if (check_length) + printf("int\n") >> CFILE; + else + printf("void\n") >> CFILE; + printf("%s(env", function_name) >> CFILE; + if (version) + printf(", version") >> CFILE; + printf(", argp, bp") >> CFILE; + if (check_length) + printf(", max, lenp") >> CFILE; + printf(")\n") >> CFILE; + + printf("\tENV *env;\n") >> CFILE; + if (version) + printf("\tu_int32_t version;\n") >> CFILE; + printf("\t%s *argp;\n", typedef_name) >> CFILE; + printf("\tu_int8_t *bp;\n") >> CFILE; + if (check_length) + printf("\tsize_t *lenp, max;\n") >> CFILE; + printf("{\n") >> CFILE; + + if (version) + printf("\tint copy_only;\n") >> CFILE; + if (check_length) { + printf("\tu_int8_t *start;\n\n") >> CFILE; + printf("\tif (max < %s", msg_size_name) >> CFILE; + for (i = 0; i < nvars; i++) + if (types[i] == "DBT") + printf("\n\t + (size_t)argp->%s.size", \ + vars[i]) >> CFILE; + # add in dbt sizes + printf(")\n") >> CFILE; + printf("\t\treturn (ENOMEM);\n") >> CFILE; + printf("\tstart = bp;\n\n") >> CFILE; + } + + if (version) { + printf("\tcopy_only = 0;\n") >> CFILE; + printf("\tif (version < DB_REPVERSION_47)\n") >> CFILE; + printf("\t\tcopy_only = 1;\n") >> CFILE; + } + for (i = 0; i < nvars; i++) { + if (types[i] == "u_int32_t" || types[i] == "db_pgno_t") { + if (version) { + printf("\tif (copy_only) {\n") >> CFILE; + printf(\ + "\t\tmemcpy(bp, &argp->%s, sizeof(u_int32_t));\n", vars[i]) >> CFILE; + printf(\ + "\t\tbp += sizeof(u_int32_t);\n") >> CFILE; + printf("\t} else\n\t") >> CFILE; + } + printf("\tDB_HTONL_COPYOUT(env, bp, argp->%s);\n", \ + vars[i]) >> CFILE; + } else if (types[i] == "u_int16_t") { + if (version) { + printf("\tif (copy_only) {\n") >> CFILE; + printf(\ + "\t\tmemcpy(bp, &argp->%s, sizeof(u_int16_t));\n", vars[i]) >> CFILE; + printf(\ + "\t\tbp += sizeof(u_int16_t);\n") >> CFILE; + printf("\t} else\n\t") >> CFILE; + } + printf("\tDB_HTONS_COPYOUT(env, bp, argp->%s);\n", \ + vars[i]) >> CFILE; + } else if (types[i] == "u_int8_t") { + printf(\ + "\t*bp++ = argp->%s;\n", vars[i]) >> CFILE; + } else if (types[i] == "DB_LSN") { + if (version) { + printf("\tif (copy_only) {\n") >> CFILE; + printf(\ + "\t\tmemcpy(bp, &argp->%s.file, sizeof(u_int32_t));\n", vars[i]) >> CFILE; + printf(\ + "\t\tbp += sizeof(u_int32_t);\n") >> CFILE; + printf(\ + "\t\tmemcpy(bp, &argp->%s.offset, sizeof(u_int32_t));\n", vars[i]) >> CFILE; + printf(\ + "\t\tbp += sizeof(u_int32_t);\n") >> CFILE; + printf("\t} else {\n\t") >> CFILE; + } + printf("\tDB_HTONL_COPYOUT(env, bp, argp->%s.file);\n",\ + vars[i]) >> CFILE; + if (version) + printf("\t") >> CFILE; + printf( \ + "\tDB_HTONL_COPYOUT(env, bp, argp->%s.offset);\n", \ + vars[i]) >> CFILE; + if (version) + printf("\t}\n") >> CFILE; + } else if (types[i] == "DBT") { + if (version) { + printf("\tif (copy_only) {\n") >> CFILE; + printf(\ + "\t\tmemcpy(bp, &argp->%s.size, sizeof(u_int32_t));\n", vars[i]) >> CFILE; + printf(\ + "\t\tbp += sizeof(u_int32_t);\n") >> CFILE; + printf("\t} else\n\t") >> CFILE; + } + printf("\tDB_HTONL_COPYOUT(env, bp, argp->%s.size);\n",\ + vars[i]) >> CFILE; + printf("\tif (argp->%s.size > 0) {\n", vars[i]) \ + >> CFILE; + printf( \ + "\t\tmemcpy(bp, argp->%s.data, argp->%s.size);\n", \ + vars[i], vars[i]) >> CFILE; + printf("\t\tbp += argp->%s.size;\n", vars[i]) >> CFILE; + printf("\t}\n") >> CFILE; + } else { + printf("unknown field type: %s", types[i]); + exit(1); + } + } + + if (check_length) { + printf("\n\t*lenp = (size_t)(bp - start);\n") >> CFILE; + printf("\treturn (0);\n") >> CFILE; + } + printf("}\n\n") >> CFILE; +} + +function emit_unmarshal() +{ + pi = 1; + p[pi++] = "int "; + function_name = sprintf("%s_unmarshal", base_name); + p[pi++] = function_name; + p[pi++] = " __P((ENV *, "; + if (version) + p[pi++] = sprintf("u_int32_t, "); + if (alloc) + p[pi++] = sprintf("%s **, u_int8_t *, ", typedef_name); + else + p[pi++] = sprintf("%s *, u_int8_t *, ", typedef_name); + p[pi++] = sprintf("size_t, u_int8_t **));"); + proto_format(p, CFILE); + + printf("int\n") >> CFILE; + if (alloc) + arg_name = "argpp"; + else + arg_name = "argp"; + printf("%s(env, ", function_name) >> CFILE; + if (version) + printf("version, ") >> CFILE; + printf("%s, bp, ", arg_name) >> CFILE; + printf("max, nextp)\n") >> CFILE; + printf("\tENV *env;\n") >> CFILE; + if (version) + printf("\tu_int32_t version;\n") >> CFILE; + if (alloc) + printf("\t%s **argpp;\n", typedef_name) >> CFILE; + else + printf("\t%s *argp;\n", typedef_name) >> CFILE; + printf("\tu_int8_t *bp;\n") >> CFILE; + printf("\tsize_t max;\n") >> CFILE; + printf("\tu_int8_t **nextp;\n") >> CFILE; + printf("{\n") >> CFILE; + has_locals = 0; + if (has_dbt) { + printf("\tsize_t needed;\n") >> CFILE; + has_locals = 1; + } + if (alloc) { + printf("\t%s *argp;\n", typedef_name) >> CFILE; + printf("\tint ret;\n") >> CFILE; + has_locals = 1; + } + if (version) { + printf("\tint copy_only;\n") >> CFILE; + has_locals = 1; + } + if (has_locals) + printf("\n") >> CFILE; + + # Check that input byte buffer is long enough. + # + if (has_dbt) { + printf("\tneeded = %s;\n", msg_size_name) >> CFILE; + printf("\tif (max < needed)\n") >> CFILE; + } else + printf("\tif (max < %s)\n", msg_size_name) >> CFILE; + printf("\t\tgoto too_few;\n") >> CFILE; + + if (alloc) { + printf( \ + "\tif ((ret = __os_malloc(env, sizeof(*argp), &argp)) != 0)\n") \ + >> CFILE; + printf("\t\treturn (ret);\n\n") >> CFILE; + } + if (version) { + printf("\tcopy_only = 0;\n") >> CFILE; + printf("\tif (version < DB_REPVERSION_47)\n") >> CFILE; + printf("\t\tcopy_only = 1;\n") >> CFILE; + } + + for (i = 0; i < nvars; i++) { + if (types[i] == "u_int32_t" || types[i] == "db_pgno_t") { + if (version) { + printf("\tif (copy_only) {\n") >> CFILE; + printf(\ + "\t\tmemcpy(&argp->%s, bp, sizeof(u_int32_t));\n", vars[i]) >> CFILE; + printf(\ + "\t\tbp += sizeof(u_int32_t);\n") >> CFILE; + printf("\t} else\n\t") >> CFILE; + } + printf("\tDB_NTOHL_COPYIN(env, argp->%s, bp);\n", \ + vars[i]) >> CFILE; + } else if (types[i] == "u_int16_t") { + if (version) { + printf("\tif (copy_only) {\n") >> CFILE; + printf(\ + "\t\tmemcpy(&argp->%s, bp, sizeof(u_int16_t));\n", vars[i]) >> CFILE; + printf(\ + "\t\tbp += sizeof(u_int16_t);\n") >> CFILE; + printf("\t} else\n\t") >> CFILE; + } + printf("\tDB_NTOHS_COPYIN(env, argp->%s, bp);\n", \ + vars[i]) >> CFILE; + } else if (types[i] == "u_int8_t") { + printf(\ + "\targp->%s = *bp++;\n", vars[i]) >> CFILE; + } else if (types[i] == "DB_LSN") { + if (version) { + printf("\tif (copy_only) {\n") >> CFILE; + printf(\ + "\t\tmemcpy(&argp->%s.file, bp, sizeof(u_int32_t));\n", vars[i]) >> CFILE; + printf(\ + "\t\tbp += sizeof(u_int32_t);\n") >> CFILE; + printf(\ + "\t\tmemcpy(&argp->%s.offset, bp, sizeof(u_int32_t));\n", vars[i]) >> CFILE; + printf(\ + "\t\tbp += sizeof(u_int32_t);\n") >> CFILE; + printf("\t} else {\n\t") >> CFILE; + } + printf("\tDB_NTOHL_COPYIN(env, argp->%s.file, bp);\n", \ + vars[i]) >> CFILE; + if (version) + printf("\t") >> CFILE; + printf( \ + "\tDB_NTOHL_COPYIN(env, argp->%s.offset, bp);\n", \ + vars[i]) >> CFILE; + if (version) + printf("\t}\n") >> CFILE; + } else if (types[i] == "DBT") { + if (version) { + printf("\tif (copy_only) {\n") >> CFILE; + printf(\ + "\t\tmemcpy(&argp->%s.size, bp, sizeof(u_int32_t));\n", vars[i]) >> CFILE; + printf(\ + "\t\tbp += sizeof(u_int32_t);\n") >> CFILE; + printf("\t} else\n\t") >> CFILE; + } + printf("\tDB_NTOHL_COPYIN(env, argp->%s.size, bp);\n", \ + vars[i]) >> CFILE; + printf("\targp->%s.data = bp;\n", vars[i]) >> CFILE; + printf("\tneeded += (size_t)argp->%s.size;\n", \ + vars[i]) >> CFILE; + printf("\tif (max < needed)\n") >> CFILE; + printf("\t\tgoto too_few;\n") >> CFILE; + printf("\tbp += argp->%s.size;\n", vars[i]) >> CFILE; + } else { + printf("unknown field type: %s", types[i]); + exit(1); + } + } + + printf("\n\tif (nextp != NULL)\n") >> CFILE; + printf("\t\t*nextp = bp;\n") >> CFILE; + if (alloc) { + printf("\t*argpp = argp;\n") >> CFILE; + } + printf("\treturn (0);\n\n") >> CFILE; + + printf("too_few:\n") >> CFILE; + printf("\t__db_errx(env, DB_STR(\"3675\",\n") >> CFILE; + printf("\t \"Not enough input bytes to fill a %s message\"));\n", \ + base_name) >> CFILE; + printf("\treturn (EINVAL);\n") >> CFILE; + printf("}\n\n") >> CFILE; +} + +# proto_format -- +# Pretty-print a function prototype. +function proto_format(p, fp) +{ + printf("/*\n") >> fp; + + s = ""; + for (i = 1; i in p; ++i) + s = s p[i]; + + t = " * PUBLIC: " + if (length(s) + length(t) < 80) + printf("%s%s", t, s) >> fp; + else { + split(s, p, "__P"); + len = length(t) + length(p[1]); + printf("%s%s", t, p[1]) >> fp + + n = split(p[2], comma, ","); + comma[1] = "__P" comma[1]; + for (i = 1; i <= n; i++) { + if (len + length(comma[i]) > 70) { + printf("\n * PUBLIC: ") >> fp; + len = 0; + } + printf("%s%s", comma[i], i == n ? "" : ",") >> fp; + len += length(comma[i]) + 2; + } + } + printf("\n */\n") >> fp; + delete p; +} diff --git a/dist/gen_provider.pl b/dist/gen_provider.pl new file mode 100644 index 00000000..3e9c7266 --- /dev/null +++ b/dist/gen_provider.pl @@ -0,0 +1,86 @@ +# Perl script to generate a DTrace provider specification from dist/events.in +$class = undef; +$continuing = 0; +$header_comment = 1; +$single_line = undef; +$multi_line = undef; +print "#include \"dbdefs.d\"\n\nprovider bdb {\n"; + +while (<>) { + $lineno++; + if (/^$/) { + # Always put the (required multi-line) header comment + # by itself. It ends at the first empty line. + if ($header_comment && defined($multi_line)) { + printf("%s*/\n", $multi_line); + $multi_line = undef; + } + printf("\n"); + next; + } + chop; + + # Translate single-line # comments to DTrace compatible /* ... */. + # Generate both single and multi-line version to match KNF standards. + if (/^([ ]*)#([ ]*)(.*)$/) { + if (defined($multi_line)) { + $single_line = undef; + $multi_line = "$multi_line* $3\n$1 "; + } else { + $single_line = "$1/* $3 */"; + $multi_line = "$1/*\n$1 * $3\n$1 "; + } + next; + } + # It is not a comment line, see whether to output any pending comment + # that was saved up over the previous line(s). + if (defined($multi_line)) { + if (defined($single_line)) { + printf("%s\n", $single_line); + } else { + printf("%s*/\n", $multi_line); + } + $single_line = undef; + $multi_line = undef; + } + # A line starting with a letter is an event class name. + if (/^[a-z]/) { + $class = $_; + next; + } + if ($continuing) { + # End of a continued probe signature? + if (/([a-z0-9-_ ,)]*;)$/) { + printf("%s\n", $_); + $continuing = 0; + next; + } elsif (/([a-z0-9-_ ,]*,)$/) { + printf("%s\n", $_); + next; + } + } + if (/([ ]*)([a-z-_]*)[ ]*(\([^)]*,)$/) { + # printf("\tprobe %s__%s%s\n", $class, $1, $2); + printf("%sprobe %s__%s%s\n", $1, $class, $2, $3); + $continuing = 1; + } elsif (/([ ]*)([a-z-_]*)[ ]*(\([^)]*\);)/) { + printf("%sprobe %s__%s%s\n", $1, $class, $2, $3); + # printf("\tprobe %s__%s%s\n", $class, $1, $2); + } else { + printf("** Error in line %d: %s\n", $lineno, $_); + printf("** Missing or unrecognized parameter list under class %s\n", $class); + exit(1); + } +} +print "};\n\n"; +if ($continuing) { + printf("** Unfinished probe under class %s\n", $class); + exit(1); +} + + +print "#pragma D attributes Evolving/Evolving/Common provider bdb provider\n"; +print "#pragma D attributes Private/Private/Common provider bdb module\n"; +print "#pragma D attributes Private/Private/Common provider bdb function\n"; +print "#pragma D attributes Evolving/Evolving/Common provider bdb name\n"; +print "#pragma D attributes Evolving/Evolving/Common provider bdb args\n\n"; diff --git a/dist/gen_rec.awk b/dist/gen_rec.awk new file mode 100644 index 00000000..5c788361 --- /dev/null +++ b/dist/gen_rec.awk @@ -0,0 +1,611 @@ +#!/bin/sh - +# +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# + +# This awk script generates all the log, print, and read routines for the DB +# logging. It also generates a template for the recovery functions (these +# functions must still be edited, but are highly stylized and the initial +# template gets you a fair way along the path). +# +# For a given file prefix.src, we generate a file prefix_auto.c, and a file +# prefix_auto.h that contains: +# +# external declarations for the file's functions +# defines for the physical record types +# (logical types are defined in each subsystem manually) +# structures to contain the data unmarshalled from the log. +# +# This awk script requires that four variables be set when it is called: +# +# source_file -- the C source file being created +# header_file -- the C #include file being created +# template_file -- the template file being created +# +# And stdin must be the input file that defines the recovery setup. +# +# Within each file prefix.src, we use a number of public keywords (documented +# in the reference guide) as well as the following ones which are private to +# DB: +# DBPRIVATE Indicates that a file will be built as part of DB, +# rather than compiled independently, and so can use +# DB-private interfaces (such as DB_LOG_NOCOPY). +# DB A DB handle. Logs the dbreg fileid for that handle, +# and makes the *_log interface take a DB * instead of a +# DB_ENV *. +# OP The low byte contains the page type of the data +# that needs byte swapping. The rest is log record +# specific. +# PGDBT,PGDDBT Just like DBT, only we know it stores a page or page +# header, so we can byte-swap it (once we write the +# byte-swapping code, which doesn't exist yet). +# HDR,DATA Just like DBT, but we know that these contain database +# records that may need byte-swapping. +# LOCKS,PGLIST Just like DBT, but uses a print function for locks or +# page lists. + +BEGIN { + if (source_file == "" || + header_file == "" || template_file == "") { + print "Usage: gen_rec.awk requires three variables to be set:" + print "\theader_file\t-- the recover #include file being created" + print "\tprint_file\t-- the print source file being created" + print "\tsource_file\t-- the recover source file being created" + print "\ttemplate_file\t-- the template file being created" + exit + } + FS="[\t ][\t ]*" + CFILE=source_file + HFILE=header_file + PFILE=print_file + TFILE=template_file + + # These are the variables we use to create code that calls into + # db routines and/or uses an environment. + dbprivate = 0 + env_type = "DB_ENV" + env_var = "dbenv" + log_call = "dbenv->log_put_record" + +} +/^[ ]*DBPRIVATE/ { + dbprivate = 1 + env_type = "ENV" + env_var = "env" + log_call = "__log_put_record" +} +/^[ ]*PREFIX/ { + prefix = $2 + num_funcs = 0; + + # Start .c files. + printf("/* Do not edit: automatically built by gen_rec.awk. */\n\n")\ + > CFILE + printf("#include \"db_config.h\"\n") >> CFILE + if (!dbprivate) { + printf("#include \n") >> CFILE + printf("#include \n") >> CFILE + printf("#include \n") >> CFILE + printf("#include \"db_int.h\"\n") >> CFILE + printf("#include \"dbinc/db_swap.h\"\n") >> CFILE + } + + printf("/* Do not edit: automatically built by gen_rec.awk. */\n\n")\ + > PFILE + printf("#include \"db_config.h\"\n\n") >> PFILE + if (!dbprivate) { + printf("#include \n") >> PFILE + printf("#include \n") >> PFILE + printf("#include \n") >> PFILE + printf("#include \"db_int.h\"\n") >> PFILE + printf("#include \"dbinc/log.h\"\n") >> PFILE + } + + if (prefix == "__ham") + printf("#ifdef HAVE_HASH\n") >> PFILE + if (prefix == "__heap") + printf("#ifdef HAVE_HEAP\n") >> PFILE + if (prefix == "__qam") + printf("#ifdef HAVE_QUEUE\n") >> PFILE + if (prefix == "__repmgr") + printf("#ifdef HAVE_REPLICATION_THREADS\n") >> PFILE + + # Start .h file, make the entire file conditional. + printf("/* Do not edit: automatically built by gen_rec.awk. */\n\n")\ + > HFILE + printf("#ifndef\t%s_AUTO_H\n#define\t%s_AUTO_H\n", prefix, prefix)\ + >> HFILE + if (prefix == "__ham") + printf("#ifdef HAVE_HASH\n") >> HFILE + if (prefix == "__heap") + printf("#ifdef HAVE_HEAP\n") >> HFILE + if (prefix == "__qam") + printf("#ifdef HAVE_QUEUE\n") >> HFILE + if (prefix == "__repmgr") + printf("#ifdef HAVE_REPLICATION_THREADS\n") >> HFILE + if (dbprivate) + printf("#include \"dbinc/log.h\"\n") >> HFILE + + # Write recovery template file headers + if (dbprivate) { + # This assumes we're doing DB recovery. + printf("#include \"db_config.h\"\n\n") > TFILE + printf("#include \"db_int.h\"\n") >> TFILE + printf("#include \"dbinc/db_page.h\"\n") >> TFILE + printf("#include \"dbinc/%s.h\"\n", prefix) >> TFILE + printf("#include \"dbinc/log.h\"\n\n") >> TFILE + } else { + printf("#include \"db.h\"\n\n") > TFILE + } +} +/^[ ]*INCLUDE/ { + for (i = 2; i < NF; i++) + printf("%s ", $i) >> CFILE + printf("%s\n", $i) >> CFILE + for (i = 2; i < NF; i++) + printf("%s ", $i) >> PFILE + printf("%s\n", $i) >> PFILE +} +/^[ ]*(BEGIN|BEGIN_COMPAT)/ { + if (in_begin) { + print "Invalid format: missing END statement" + exit + } + in_begin = 1; + is_duplicate = 0; + is_dbt = 0; + has_dbp = 0; + has_data = 0; + is_uint = 0; + hdrdbt = "NULL"; + ddbt = "NULL"; + # + # BEGIN_COMPAT does not need logging function or rec table entry. + # + need_log_function = ($1 == "BEGIN"); + is_compat = ($1 == "BEGIN_COMPAT"); + nvars = 0; + + thisfunc = $2; + dup_thisfunc = $2; + version = $3; + + rectype = $4; + + make_name(thisfunc, thisfunc, version); +} +/^[ ]*(DB|ARG|DBOP|DBT|DATA|HDR|LOCKS|OP|PGDBT|PGDDBT|PGLIST|POINTER|TIME)/ { + vars[nvars] = $2; + types[nvars] = $3; + modes[nvars] = $1; + formats[nvars] = $NF; + for (i = 4; i < NF; i++) + types[nvars] = sprintf("%s %s", types[nvars], $i); + + if ($1 == "DB") { + has_dbp = 1; + } + if ($1 == "DATA" || $1 == "PGDDBT") { + if (has_data == 1) { + print "Invalid format: multiple data fields" + exit + } + has_data = 1; + } + + if ($1 == "DB" || $1 == "DBOP" || $1 == "ARG" || \ + $1 == "OP" || $1 == "TIME") { + sizes[nvars] = sprintf("sizeof(u_int32_t)"); + if ($3 != "u_int32_t") + is_uint = 1; + } else if ($1 == "POINTER") + sizes[nvars] = sprintf("sizeof(*%s)", $2); + else { # DBT, PGDBT, PGDDBT + sizes[nvars] = sprintf("LOG_DBT_SIZE(%s)", $2); + is_dbt = 1; + if ($1 == "PGDBT") { + if (hdrdbt != "NULL") { + print "Multiple PGDBTs in record" + exit; + } + if (ddbt != "NULL") { + print "PGDDBT must follow PGDBT" + exit; + } + hdrdbt = vars[nvars]; + } else if ($1 == "PGDDBT") { + if (ddbt != "NULL") { + print "Multiple PGDDBTs in record" + exit; + } + if (hdrdbt == "NULL") { + print "PGDDBT must follow PGDBT" + exit; + } + ddbt = vars[nvars]; + } + } + nvars++; +} +/^[ ]*DUPLICATE/ { + is_duplicate = 1; + dup_rectype = $4; + old_logfunc = logfunc; + old_funcname = funcname; + make_name($2, funcname, $3); + internal_name = sprintf("%s_%s_int", prefix, thisfunc); + dup_logfunc = logfunc; + dup_funcname = funcname; + dup_thisfunc = $2; + logfunc = old_logfunc; + funcname = old_funcname; +} +/^[ ]*END/ { + if (!in_begin) { + print "Invalid format: missing BEGIN statement" + exit; + } + + # Declare the record type. + printf("#define\tDB_%s\t%d\n", funcname, rectype) >> HFILE + if (is_duplicate) + printf("#define\tDB_%s\t%d\n",\ + dup_funcname, dup_rectype) >> HFILE + + # Structure declaration. + printf("typedef struct _%s_args {\n", funcname) >> HFILE + + # Here are the required fields for every structure + printf("\tu_int32_t type;\n\tDB_TXN *txnp;\n") >> HFILE + printf("\tDB_LSN prev_lsn;\n") >>HFILE + + # Here are the specified fields. + for (i = 0; i < nvars; i++) { + t = types[i]; + if (modes[i] == "POINTER") { + ndx = index(t, "*"); + t = substr(types[i], 1, ndx - 2); + } + printf("\t%s\t%s;\n", t, vars[i]) >> HFILE + } + printf("} %s_args;\n\n", funcname) >> HFILE + + # Output the read, log, and print functions (note that we must + # generate the required read function first, because we use its + # prototype in the print function). + + log_function(funcname, funcname); + read_function(funcname, funcname); + if (is_duplicate) { + log_function(dup_funcname, funcname); + read_function(dup_funcname, funcname); + } + print_function(); + + # Recovery template + if (dbprivate) + f = "template/rec_ctemp" + else + f = "template/rec_utemp" + + cmd = sprintf(\ + "sed -e s/PREF/%s/ -e s/FUNC/%s/ -e s/DUP/%s/ < template/rec_%s >> %s", + prefix, thisfunc, dup_thisfunc, + dbprivate ? "ctemp" : "utemp", TFILE) + system(cmd); + + # Done writing stuff, reset and continue. + in_begin = 0; +} + +END { + # End the conditional for the HFILE + if (prefix == "__ham") + printf("#endif /* HAVE_HASH */\n") >> HFILE + if (prefix == "__heap") + printf("#endif /* HAVE_HEAP */\n") >> HFILE + if (prefix == "__qam") + printf("#endif /* HAVE_QUEUE */\n") >> HFILE + if (prefix == "__repmgr") + printf("#endif /* HAVE_REPLICATION_THREADS */\n") >> HFILE + printf("#endif\n") >> HFILE + + # Print initialization routine; function prototype + p[1] = sprintf("int %s_init_print %s%s%s", prefix, + "__P((", env_type, " *, DB_DISTAB *));"); + p[2] = ""; + proto_format(p, PFILE); + + # Create the routine to call __db_add_recovery(print_fn, id) + printf("int\n%s_init_print(%s, dtabp)\n",\ + prefix, env_var) >> PFILE + printf("\t%s *%s;\n", env_type, env_var) >> PFILE + printf("\tDB_DISTAB *dtabp;\n{\n") >> PFILE + # If application-specific, the user will need a prototype for + # __db_add_recovery, since they won't have DB's. + if (!dbprivate) { + printf(\ + "\tint __db_add_recovery __P((%s *, DB_DISTAB *,\n",\ + env_type) >> PFILE + printf(\ + "\t int (*)(%s *, DBT *, DB_LSN *, db_recops), u_int32_t));\n",\ + env_type) >> PFILE + } + + printf("\tint ret;\n\n") >> PFILE + for (i = 0; i < num_funcs; i++) { + if (functable[i] == 1) + continue; + printf("\tif ((ret = __db_add_recovery%s(%s, ",\ + dbprivate ? "_int" : "", env_var) >> PFILE + printf("dtabp,\n") >> PFILE + printf("\t %s_print, DB_%s)) != 0)\n",\ + dupfuncs[i], funcs[i]) >> PFILE + printf("\t\treturn (ret);\n") >> PFILE + } + printf("\treturn (0);\n}\n") >> PFILE + if (prefix == "__ham") + printf("#endif /* HAVE_HASH */\n") >> PFILE + if (prefix == "__heap") + printf("#endif /* HAVE_HEAP */\n") >> PFILE + if (prefix == "__qam") + printf("#endif /* HAVE_QUEUE */\n") >> PFILE + if (prefix == "__repmgr") + printf("#endif /* HAVE_REPLICATION_THREADS */\n") >> PFILE + + # We only want to generate *_init_recover functions if this is a + # DB-private, rather than application-specific, set of recovery + # functions. Application-specific recovery functions should be + # dispatched using the DB_ENV->set_app_dispatch callback rather than + # a DB dispatch table ("dtab"). + if (!dbprivate) + exit + # Everything below here is dbprivate, so it uses ENV instead of DB_ENV + # Recover initialization routine + p[1] = sprintf("int %s_init_recover %s", prefix,\ + "__P((ENV *, DB_DISTAB *));"); + p[2] = ""; + proto_format(p, CFILE); + + # Create the routine to call db_add_recovery(func, id) + printf("int\n%s_init_recover(env, dtabp)\n", prefix) >> CFILE + printf("\tENV *env;\n") >> CFILE + printf("\tDB_DISTAB *dtabp;\n{\n") >> CFILE + printf("\tint ret;\n\n") >> CFILE + for (i = 0; i < num_funcs; i++) { + if (functable[i] == 1) + continue; + printf("\tif ((ret = __db_add_recovery_int(env, ") >> CFILE + printf("dtabp,\n") >> CFILE + printf("\t %s_recover, DB_%s)) != 0)\n",\ + funcs[i], funcs[i]) >> CFILE + printf("\t\treturn (ret);\n") >> CFILE + } + printf("\treturn (0);\n}\n") >> CFILE +} + +function log_function(logfunc, arg) +{ + # Descriptor array + printf("extern __DB_IMPORT DB_LOG_RECSPEC %s_desc[];\n", \ + logfunc) >> HFILE; + printf("DB_LOG_RECSPEC %s_desc[] = {\n", logfunc) >> CFILE + + # Function declaration + if (need_log_function) { + printf("static inline int\n%s_log(", logfunc) >> HFILE + # Now print the parameters + if (has_dbp) { + printf("DB *dbp, ") >> HFILE + } else { + printf("%s *%s, ", env_type, env_var) >> HFILE + } + printf("DB_TXN *txnp, DB_LSN *ret_lsnp, ") >> HFILE + printf("u_int32_t flags") >> HFILE + } + + for (i = 0; i < nvars; i++) { + # Descriptor element + if (modes[i] == "ARG" || modes[i] == "OP") + printf(\ + "\t{LOGREC_%s, SSZ(%s_args, %s), \"%s\", \"%%%s\"},\n",\ + modes[i], arg, vars[i], \ + vars[i], formats[i]) >> CFILE + else + printf( \ + "\t{LOGREC_%s, SSZ(%s_args, %s), \"%s\", \"\"},\n",\ + modes[i], arg, vars[i], vars[i]) >> CFILE + + # Function argument + # We just skip for modes == DB. + if (!need_log_function || modes[i] == "DB") + continue; + printf(",") >> HFILE + if ((i % 5) == 0) + printf("\n ") >> HFILE + else + printf(" ") >> HFILE + if (modes[i] == "DBT" || modes[i] == "HDR" || + modes[i] == "DATA" || modes[i] == "LOCKS" || + modes[i] == "PGDBT" || modes[i] == "PGDDBT"|| + modes[i] == "PGLIST") + printf("const %s *%s", types[i], vars[i]) >> HFILE + else + printf("%s %s", types[i], vars[i]) >> HFILE + } + + # Descriptor termination + printf("\t{LOGREC_Done, 0, \"\", \"\"}\n};\n") >> CFILE + if (!need_log_function) + return; + + # Function call + printf(")\n{\n\treturn (%s(", log_call) >> HFILE + if (dbprivate) { + if (has_dbp) + printf("(dbp)->env, dbp, ") >> HFILE + else + printf("env, NULL, ") >> HFILE + } else { + if (has_dbp) + printf("dbenv, dbp, ") >> HFILE + else + printf("dbenv, NULL, ") >> HFILE + } + printf("txnp, ret_lsnp,\n\t flags, DB_%s, %d,\n\t", + logfunc, has_data) >> HFILE + printf(" sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN)")\ + >> HFILE + for (i = 0; i < nvars; i++) { + if (i % 3 == 0) + printf(" +\n\t %s", sizes[i]) >> HFILE + else + printf(" + %s", sizes[i]) >> HFILE + } + printf(",\n\t %s_desc", logfunc) >> HFILE + for (i = 0; i < nvars; i++) { + # We just skip for modes == DB. + if (modes[i] == "DB") + continue; + printf(",") >> HFILE + if ((i % 8) == 0) + printf("\n\t ") >> HFILE + else + printf(" ") >> HFILE + printf("%s", vars[i]) >> HFILE + } + printf("));\n}\n\n") >> HFILE +} + +function read_function(logfunc, arg) +{ + # Read function + printf("static inline int %s_read(%s *%s, \n ", \ + logfunc, env_type, env_var) >> HFILE + if (has_dbp) + printf("DB **dbpp, void *td, ") >> HFILE + printf("void *data, ") >> HFILE + printf("%s_args **arg)\n{\n", arg) >> HFILE + if (dbprivate) { + printf("\t*arg = NULL;\n") >> HFILE + printf("\treturn (__log_read_record(env, \n") >> HFILE + if (has_dbp) + printf("\t dbpp, td, data, ") >> HFILE + else + printf("\t NULL, NULL, data, ") >> HFILE + } else { + printf("\treturn (dbenv->log_read_record(dbenv, \n") >> HFILE + if (has_dbp) + printf("\t dbpp, td, data, ") >> HFILE + else + printf("\t NULL, NULL, data, ") >> HFILE + } + + printf("%s_desc, sizeof(%s_args), (void**)arg));\n}\n", logfunc, arg)\ + >> HFILE; +} + +function print_function() +{ + # Write the print function; function prototype + p[1] = sprintf("int %s_print", funcname); + p[2] = " "; + if (dbprivate) + p[3] = "__P((ENV *, DBT *, DB_LSN *, db_recops, void *));"; + else + p[3] = "__P((DB_ENV *, DBT *, DB_LSN *, db_recops));"; + p[4] = ""; + proto_format(p, PFILE); + + # Function declaration + printf("int\n%s_print(%s, ", funcname, env_var) >> PFILE + printf("dbtp, lsnp, notused2") >> PFILE + if (dbprivate) + printf(", info") >> PFILE + printf(")\n") >> PFILE + printf("\t%s *%s;\n", env_type, env_var) >> PFILE + printf("\tDBT *dbtp;\n") >> PFILE + printf("\tDB_LSN *lsnp;\n") >> PFILE + printf("\tdb_recops notused2;\n") >> PFILE + if (dbprivate) + printf("\tvoid *info;\n") >> PFILE + printf("{\n") >> PFILE + + # Get rid of complaints about unused parameters. + if (dbprivate) { + printf("\tCOMPQUIET(notused2, DB_TXN_PRINT);\n") >> PFILE + } else { + printf("\tnotused2 = DB_TXN_PRINT;\n") >> PFILE + } + printf("\n") >> PFILE + + printf(\ + "\treturn (__log_print_record(%senv, dbtp, lsnp, \"%s\", %s_desc", + dbprivate ? "" : "dbenv->", funcname, funcname) >> PFILE + + if (dbprivate) + printf(", info));\n") >> PFILE + else + printf(", NULL));\n") >> PFILE + + printf("}\n\n") >> PFILE +} + + +# proto_format -- +# Pretty-print a function prototype. +function proto_format(p, fp) +{ + printf("/*\n") >> fp; + + s = ""; + for (i = 1; i in p; ++i) + s = s p[i]; + + t = " * PUBLIC: " + if (length(s) + length(t) < 80) + printf("%s%s", t, s) >> fp; + else { + split(s, p, "__P"); + len = length(t) + length(p[1]); + printf("%s%s", t, p[1]) >> fp + + n = split(p[2], comma, ","); + comma[1] = "__P" comma[1]; + for (i = 1; i <= n; i++) { + if (len + length(comma[i]) > 70) { + printf("\n * PUBLIC: ") >> fp; + len = 0; + } + printf("%s%s", comma[i], i == n ? "" : ",") >> fp; + len += length(comma[i]) + 2; + } + } + printf("\n */\n") >> fp; + delete p; +} + +function make_name(unique_name, dup_name, p_version) +{ + logfunc = sprintf("%s_%s", prefix, unique_name); + logname[num_funcs] = logfunc; + if (is_compat) { + funcname = sprintf("%s_%s_%s", prefix, unique_name, p_version); + } else { + funcname = logfunc; + } + + if (is_duplicate) + dupfuncs[num_funcs] = dup_name; + else + dupfuncs[num_funcs] = funcname; + + funcs[num_funcs] = funcname; + functable[num_funcs] = is_compat; + ++num_funcs; +} + diff --git a/dist/install-sh b/dist/install-sh new file mode 100755 index 00000000..a5897de6 --- /dev/null +++ b/dist/install-sh @@ -0,0 +1,519 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2006-12-25.00 + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# `make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +nl=' +' +IFS=" "" $nl" + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit=${DOITPROG-} +if test -z "$doit"; then + doit_exec=exec +else + doit_exec=$doit +fi + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_glob='?' +initialize_posix_glob=' + test "$posix_glob" != "?" || { + if (set -f) 2>/dev/null; then + posix_glob= + else + posix_glob=: + fi + } +' + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +no_target_directory= + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve the last data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -s $stripprog installed files. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *' '* | *' +'* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -s) stripcmd=$stripprog;; + + -t) dst_arg=$2 + shift;; + + -T) no_target_directory=true;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call `install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + trap '(exit $?); exit' 1 2 13 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names starting with `-'. + case $src in + -*) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + + dst=$dst_arg + # Protect names starting with `-'. + case $dst in + -*) dst=./$dst;; + esac + + # If destination is a directory, append the input filename; won't work + # if double slashes aren't ignored. + if test -d "$dst"; then + if test -n "$no_target_directory"; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dst=$dstdir/`basename "$src"` + dstdir_status=0 + else + # Prefer dirname, but fall back on a substitute if dirname fails. + dstdir=` + (dirname "$dst") 2>/dev/null || + expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$dst" : 'X\(//\)[^/]' \| \ + X"$dst" : 'X\(//\)$' \| \ + X"$dst" : 'X\(/\)' \| . 2>/dev/null || + echo X"$dst" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q' + ` + + test -d "$dstdir" + dstdir_status=$? + fi + fi + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 + + if (umask $mkdir_umask && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writeable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + ls_ld_tmpdir=`ls -ld "$tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/d" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null + fi + trap '' 0;; + esac;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # The umask is ridiculous, or mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + -*) prefix='./';; + *) prefix='';; + esac + + eval "$initialize_posix_glob" + + oIFS=$IFS + IFS=/ + $posix_glob set -f + set fnord $dstdir + shift + $posix_glob set +f + IFS=$oIFS + + prefixes= + + for d + do + test -z "$d" && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=$dstdir/_inst.$$_ + rmtmp=$dstdir/_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + + eval "$initialize_posix_glob" && + $posix_glob set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + $posix_glob set +f && + + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd -f "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: diff --git a/dist/ltmain.sh b/dist/ltmain.sh new file mode 100644 index 00000000..7798cedf --- /dev/null +++ b/dist/ltmain.sh @@ -0,0 +1,9636 @@ + +# libtool (GNU libtool) 2.4 +# Written by Gordon Matzigkeit , 1996 + +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, +# 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, +# if you distribute this file as part of a program or library that +# is built using GNU Libtool, you may include this file under the +# same distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Libtool; see the file COPYING. If not, a copy +# can be downloaded from http://www.gnu.org/licenses/gpl.html, +# or obtained by writing to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# Usage: $progname [OPTION]... [MODE-ARG]... +# +# Provide generalized library-building support services. +# +# --config show all configuration variables +# --debug enable verbose shell tracing +# -n, --dry-run display commands without modifying any files +# --features display basic configuration information and exit +# --mode=MODE use operation mode MODE +# --preserve-dup-deps don't remove duplicate dependency libraries +# --quiet, --silent don't print informational messages +# --no-quiet, --no-silent +# print informational messages (default) +# --tag=TAG use configuration variables from tag TAG +# -v, --verbose print more informational messages than default +# --no-verbose don't print the extra informational messages +# --version print version information +# -h, --help, --help-all print short, long, or detailed help message +# +# MODE must be one of the following: +# +# clean remove files from the build directory +# compile compile a source file into a libtool object +# execute automatically set library path, then run a program +# finish complete the installation of libtool libraries +# install install libraries or executables +# link create a library or an executable +# uninstall remove libraries from an installed directory +# +# MODE-ARGS vary depending on the MODE. When passed as first option, +# `--mode=MODE' may be abbreviated as `MODE' or a unique abbreviation of that. +# Try `$progname --help --mode=MODE' for a more detailed description of MODE. +# +# When reporting a bug, please describe a test case to reproduce it and +# include the following information: +# +# host-triplet: $host +# shell: $SHELL +# compiler: $LTCC +# compiler flags: $LTCFLAGS +# linker: $LD (gnu? $with_gnu_ld) +# $progname: (GNU libtool) 2.4 +# automake: $automake_version +# autoconf: $autoconf_version +# +# Report bugs to . +# GNU libtool home page: . +# General help using GNU software: . + +PROGRAM=libtool +PACKAGE=libtool +VERSION=2.4 +TIMESTAMP="" +package_revision=1.3293 + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' +} + +# NLS nuisances: We save the old values to restore during execute mode. +lt_user_locale= +lt_safe_locale= +for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES +do + eval "if test \"\${$lt_var+set}\" = set; then + save_$lt_var=\$$lt_var + $lt_var=C + export $lt_var + lt_user_locale=\"$lt_var=\\\$save_\$lt_var; \$lt_user_locale\" + lt_safe_locale=\"$lt_var=C; \$lt_safe_locale\" + fi" +done +LC_ALL=C +LANGUAGE=C +export LANGUAGE LC_ALL + +$lt_unset CDPATH + + +# Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh +# is ksh but when the shell is invoked as "sh" and the current value of +# the _XPG environment variable is not equal to 1 (one), the special +# positional parameter $0, within a function call, is the name of the +# function. +progpath="$0" + + + +: ${CP="cp -f"} +test "${ECHO+set}" = set || ECHO=${as_echo-'printf %s\n'} +: ${EGREP="grep -E"} +: ${FGREP="grep -F"} +: ${GREP="grep"} +: ${LN_S="ln -s"} +: ${MAKE="make"} +: ${MKDIR="mkdir"} +: ${MV="mv -f"} +: ${RM="rm -f"} +: ${SED="sed"} +: ${SHELL="${CONFIG_SHELL-/bin/sh}"} +: ${Xsed="$SED -e 1s/^X//"} + +# Global variables: +EXIT_SUCCESS=0 +EXIT_FAILURE=1 +EXIT_MISMATCH=63 # $? = 63 is used to indicate version mismatch to missing. +EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake. + +exit_status=$EXIT_SUCCESS + +# Make sure IFS has a sensible default +lt_nl=' +' +IFS=" $lt_nl" + +dirname="s,/[^/]*$,," +basename="s,^.*/,," + +# func_dirname file append nondir_replacement +# Compute the dirname of FILE. If nonempty, add APPEND to the result, +# otherwise set result to NONDIR_REPLACEMENT. +func_dirname () +{ + func_dirname_result=`$ECHO "${1}" | $SED "$dirname"` + if test "X$func_dirname_result" = "X${1}"; then + func_dirname_result="${3}" + else + func_dirname_result="$func_dirname_result${2}" + fi +} # func_dirname may be replaced by extended shell implementation + + +# func_basename file +func_basename () +{ + func_basename_result=`$ECHO "${1}" | $SED "$basename"` +} # func_basename may be replaced by extended shell implementation + + +# func_dirname_and_basename file append nondir_replacement +# perform func_basename and func_dirname in a single function +# call: +# dirname: Compute the dirname of FILE. If nonempty, +# add APPEND to the result, otherwise set result +# to NONDIR_REPLACEMENT. +# value returned in "$func_dirname_result" +# basename: Compute filename of FILE. +# value retuned in "$func_basename_result" +# Implementation must be kept synchronized with func_dirname +# and func_basename. For efficiency, we do not delegate to +# those functions but instead duplicate the functionality here. +func_dirname_and_basename () +{ + # Extract subdirectory from the argument. + func_dirname_result=`$ECHO "${1}" | $SED -e "$dirname"` + if test "X$func_dirname_result" = "X${1}"; then + func_dirname_result="${3}" + else + func_dirname_result="$func_dirname_result${2}" + fi + func_basename_result=`$ECHO "${1}" | $SED -e "$basename"` +} # func_dirname_and_basename may be replaced by extended shell implementation + + +# func_stripname prefix suffix name +# strip PREFIX and SUFFIX off of NAME. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +# func_strip_suffix prefix name +func_stripname () +{ + case ${2} in + .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;; + *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;; + esac +} # func_stripname may be replaced by extended shell implementation + + +# These SED scripts presuppose an absolute path with a trailing slash. +pathcar='s,^/\([^/]*\).*$,\1,' +pathcdr='s,^/[^/]*,,' +removedotparts=':dotsl + s@/\./@/@g + t dotsl + s,/\.$,/,' +collapseslashes='s@/\{1,\}@/@g' +finalslash='s,/*$,/,' + +# func_normal_abspath PATH +# Remove doubled-up and trailing slashes, "." path components, +# and cancel out any ".." path components in PATH after making +# it an absolute path. +# value returned in "$func_normal_abspath_result" +func_normal_abspath () +{ + # Start from root dir and reassemble the path. + func_normal_abspath_result= + func_normal_abspath_tpath=$1 + func_normal_abspath_altnamespace= + case $func_normal_abspath_tpath in + "") + # Empty path, that just means $cwd. + func_stripname '' '/' "`pwd`" + func_normal_abspath_result=$func_stripname_result + return + ;; + # The next three entries are used to spot a run of precisely + # two leading slashes without using negated character classes; + # we take advantage of case's first-match behaviour. + ///*) + # Unusual form of absolute path, do nothing. + ;; + //*) + # Not necessarily an ordinary path; POSIX reserves leading '//' + # and for example Cygwin uses it to access remote file shares + # over CIFS/SMB, so we conserve a leading double slash if found. + func_normal_abspath_altnamespace=/ + ;; + /*) + # Absolute path, do nothing. + ;; + *) + # Relative path, prepend $cwd. + func_normal_abspath_tpath=`pwd`/$func_normal_abspath_tpath + ;; + esac + # Cancel out all the simple stuff to save iterations. We also want + # the path to end with a slash for ease of parsing, so make sure + # there is one (and only one) here. + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$removedotparts" -e "$collapseslashes" -e "$finalslash"` + while :; do + # Processed it all yet? + if test "$func_normal_abspath_tpath" = / ; then + # If we ascended to the root using ".." the result may be empty now. + if test -z "$func_normal_abspath_result" ; then + func_normal_abspath_result=/ + fi + break + fi + func_normal_abspath_tcomponent=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$pathcar"` + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$pathcdr"` + # Figure out what to do with it + case $func_normal_abspath_tcomponent in + "") + # Trailing empty path component, ignore it. + ;; + ..) + # Parent dir; strip last assembled component from result. + func_dirname "$func_normal_abspath_result" + func_normal_abspath_result=$func_dirname_result + ;; + *) + # Actual path component, append it. + func_normal_abspath_result=$func_normal_abspath_result/$func_normal_abspath_tcomponent + ;; + esac + done + # Restore leading double-slash if one was found on entry. + func_normal_abspath_result=$func_normal_abspath_altnamespace$func_normal_abspath_result +} + +# func_relative_path SRCDIR DSTDIR +# generates a relative path from SRCDIR to DSTDIR, with a trailing +# slash if non-empty, suitable for immediately appending a filename +# without needing to append a separator. +# value returned in "$func_relative_path_result" +func_relative_path () +{ + func_relative_path_result= + func_normal_abspath "$1" + func_relative_path_tlibdir=$func_normal_abspath_result + func_normal_abspath "$2" + func_relative_path_tbindir=$func_normal_abspath_result + + # Ascend the tree starting from libdir + while :; do + # check if we have found a prefix of bindir + case $func_relative_path_tbindir in + $func_relative_path_tlibdir) + # found an exact match + func_relative_path_tcancelled= + break + ;; + $func_relative_path_tlibdir*) + # found a matching prefix + func_stripname "$func_relative_path_tlibdir" '' "$func_relative_path_tbindir" + func_relative_path_tcancelled=$func_stripname_result + if test -z "$func_relative_path_result"; then + func_relative_path_result=. + fi + break + ;; + *) + func_dirname $func_relative_path_tlibdir + func_relative_path_tlibdir=${func_dirname_result} + if test "x$func_relative_path_tlibdir" = x ; then + # Have to descend all the way to the root! + func_relative_path_result=../$func_relative_path_result + func_relative_path_tcancelled=$func_relative_path_tbindir + break + fi + func_relative_path_result=../$func_relative_path_result + ;; + esac + done + + # Now calculate path; take care to avoid doubling-up slashes. + func_stripname '' '/' "$func_relative_path_result" + func_relative_path_result=$func_stripname_result + func_stripname '/' '/' "$func_relative_path_tcancelled" + if test "x$func_stripname_result" != x ; then + func_relative_path_result=${func_relative_path_result}/${func_stripname_result} + fi + + # Normalisation. If bindir is libdir, return empty string, + # else relative path ending with a slash; either way, target + # file name can be directly appended. + if test ! -z "$func_relative_path_result"; then + func_stripname './' '' "$func_relative_path_result/" + func_relative_path_result=$func_stripname_result + fi +} + +# The name of this program: +func_dirname_and_basename "$progpath" +progname=$func_basename_result + +# Make sure we have an absolute path for reexecution: +case $progpath in + [\\/]*|[A-Za-z]:\\*) ;; + *[\\/]*) + progdir=$func_dirname_result + progdir=`cd "$progdir" && pwd` + progpath="$progdir/$progname" + ;; + *) + save_IFS="$IFS" + IFS=: + for progdir in $PATH; do + IFS="$save_IFS" + test -x "$progdir/$progname" && break + done + IFS="$save_IFS" + test -n "$progdir" || progdir=`pwd` + progpath="$progdir/$progname" + ;; +esac + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +Xsed="${SED}"' -e 1s/^X//' +sed_quote_subst='s/\([`"$\\]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution that turns a string into a regex matching for the +# string literally. +sed_make_literal_regex='s,[].[^$\\*\/],\\&,g' + +# Sed substitution that converts a w32 file name or path +# which contains forward slashes, into one that contains +# (escaped) backslashes. A very naive implementation. +lt_sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g' + +# Re-`\' parameter expansions in output of double_quote_subst that were +# `\'-ed in input to the same. If an odd number of `\' preceded a '$' +# in input to double_quote_subst, that '$' was protected from expansion. +# Since each input `\' is now two `\'s, look for any number of runs of +# four `\'s followed by two `\'s and then a '$'. `\' that '$'. +bs='\\' +bs2='\\\\' +bs4='\\\\\\\\' +dollar='\$' +sed_double_backslash="\ + s/$bs4/&\\ +/g + s/^$bs2$dollar/$bs&/ + s/\\([^$bs]\\)$bs2$dollar/\\1$bs2$bs$dollar/g + s/\n//g" + +# Standard options: +opt_dry_run=false +opt_help=false +opt_quiet=false +opt_verbose=false +opt_warning=: + +# func_echo arg... +# Echo program name prefixed message, along with the current mode +# name if it has been set yet. +func_echo () +{ + $ECHO "$progname: ${opt_mode+$opt_mode: }$*" +} + +# func_verbose arg... +# Echo program name prefixed message in verbose mode only. +func_verbose () +{ + $opt_verbose && func_echo ${1+"$@"} + + # A bug in bash halts the script if the last line of a function + # fails when set -e is in force, so we need another command to + # work around that: + : +} + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + +# func_error arg... +# Echo program name prefixed message to standard error. +func_error () +{ + $ECHO "$progname: ${opt_mode+$opt_mode: }"${1+"$@"} 1>&2 +} + +# func_warning arg... +# Echo program name prefixed warning message to standard error. +func_warning () +{ + $opt_warning && $ECHO "$progname: ${opt_mode+$opt_mode: }warning: "${1+"$@"} 1>&2 + + # bash bug again: + : +} + +# func_fatal_error arg... +# Echo program name prefixed message to standard error, and exit. +func_fatal_error () +{ + func_error ${1+"$@"} + exit $EXIT_FAILURE +} + +# func_fatal_help arg... +# Echo program name prefixed message to standard error, followed by +# a help hint, and exit. +func_fatal_help () +{ + func_error ${1+"$@"} + func_fatal_error "$help" +} +help="Try \`$progname --help' for more information." ## default + + +# func_grep expression filename +# Check whether EXPRESSION matches any line of FILENAME, without output. +func_grep () +{ + $GREP "$1" "$2" >/dev/null 2>&1 +} + + +# func_mkdir_p directory-path +# Make sure the entire path to DIRECTORY-PATH is available. +func_mkdir_p () +{ + my_directory_path="$1" + my_dir_list= + + if test -n "$my_directory_path" && test "$opt_dry_run" != ":"; then + + # Protect directory names starting with `-' + case $my_directory_path in + -*) my_directory_path="./$my_directory_path" ;; + esac + + # While some portion of DIR does not yet exist... + while test ! -d "$my_directory_path"; do + # ...make a list in topmost first order. Use a colon delimited + # list incase some portion of path contains whitespace. + my_dir_list="$my_directory_path:$my_dir_list" + + # If the last portion added has no slash in it, the list is done + case $my_directory_path in */*) ;; *) break ;; esac + + # ...otherwise throw away the child directory and loop + my_directory_path=`$ECHO "$my_directory_path" | $SED -e "$dirname"` + done + my_dir_list=`$ECHO "$my_dir_list" | $SED 's,:*$,,'` + + save_mkdir_p_IFS="$IFS"; IFS=':' + for my_dir in $my_dir_list; do + IFS="$save_mkdir_p_IFS" + # mkdir can fail with a `File exist' error if two processes + # try to create one of the directories concurrently. Don't + # stop in that case! + $MKDIR "$my_dir" 2>/dev/null || : + done + IFS="$save_mkdir_p_IFS" + + # Bail out if we (or some other process) failed to create a directory. + test -d "$my_directory_path" || \ + func_fatal_error "Failed to create \`$1'" + fi +} + + +# func_mktempdir [string] +# Make a temporary directory that won't clash with other running +# libtool processes, and avoids race conditions if possible. If +# given, STRING is the basename for that directory. +func_mktempdir () +{ + my_template="${TMPDIR-/tmp}/${1-$progname}" + + if test "$opt_dry_run" = ":"; then + # Return a directory name, but don't create it in dry-run mode + my_tmpdir="${my_template}-$$" + else + + # If mktemp works, use that first and foremost + my_tmpdir=`mktemp -d "${my_template}-XXXXXXXX" 2>/dev/null` + + if test ! -d "$my_tmpdir"; then + # Failing that, at least try and use $RANDOM to avoid a race + my_tmpdir="${my_template}-${RANDOM-0}$$" + + save_mktempdir_umask=`umask` + umask 0077 + $MKDIR "$my_tmpdir" + umask $save_mktempdir_umask + fi + + # If we're not in dry-run mode, bomb out on failure + test -d "$my_tmpdir" || \ + func_fatal_error "cannot create temporary directory \`$my_tmpdir'" + fi + + $ECHO "$my_tmpdir" +} + + +# func_quote_for_eval arg +# Aesthetically quote ARG to be evaled later. +# This function returns two values: FUNC_QUOTE_FOR_EVAL_RESULT +# is double-quoted, suitable for a subsequent eval, whereas +# FUNC_QUOTE_FOR_EVAL_UNQUOTED_RESULT has merely all characters +# which are still active within double quotes backslashified. +func_quote_for_eval () +{ + case $1 in + *[\\\`\"\$]*) + func_quote_for_eval_unquoted_result=`$ECHO "$1" | $SED "$sed_quote_subst"` ;; + *) + func_quote_for_eval_unquoted_result="$1" ;; + esac + + case $func_quote_for_eval_unquoted_result in + # Double-quote args containing shell metacharacters to delay + # word splitting, command substitution and and variable + # expansion for a subsequent eval. + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + func_quote_for_eval_result="\"$func_quote_for_eval_unquoted_result\"" + ;; + *) + func_quote_for_eval_result="$func_quote_for_eval_unquoted_result" + esac +} + + +# func_quote_for_expand arg +# Aesthetically quote ARG to be evaled later; same as above, +# but do not quote variable references. +func_quote_for_expand () +{ + case $1 in + *[\\\`\"]*) + my_arg=`$ECHO "$1" | $SED \ + -e "$double_quote_subst" -e "$sed_double_backslash"` ;; + *) + my_arg="$1" ;; + esac + + case $my_arg in + # Double-quote args containing shell metacharacters to delay + # word splitting and command substitution for a subsequent eval. + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + my_arg="\"$my_arg\"" + ;; + esac + + func_quote_for_expand_result="$my_arg" +} + + +# func_show_eval cmd [fail_exp] +# Unless opt_silent is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. +func_show_eval () +{ + my_cmd="$1" + my_fail_exp="${2-:}" + + ${opt_silent-false} || { + func_quote_for_expand "$my_cmd" + eval "func_echo $func_quote_for_expand_result" + } + + if ${opt_dry_run-false}; then :; else + eval "$my_cmd" + my_status=$? + if test "$my_status" -eq 0; then :; else + eval "(exit $my_status); $my_fail_exp" + fi + fi +} + + +# func_show_eval_locale cmd [fail_exp] +# Unless opt_silent is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. Use the saved locale for evaluation. +func_show_eval_locale () +{ + my_cmd="$1" + my_fail_exp="${2-:}" + + ${opt_silent-false} || { + func_quote_for_expand "$my_cmd" + eval "func_echo $func_quote_for_expand_result" + } + + if ${opt_dry_run-false}; then :; else + eval "$lt_user_locale + $my_cmd" + my_status=$? + eval "$lt_safe_locale" + if test "$my_status" -eq 0; then :; else + eval "(exit $my_status); $my_fail_exp" + fi + fi +} + +# func_tr_sh +# Turn $1 into a string suitable for a shell variable name. +# Result is stored in $func_tr_sh_result. All characters +# not in the set a-zA-Z0-9_ are replaced with '_'. Further, +# if $1 begins with a digit, a '_' is prepended as well. +func_tr_sh () +{ + case $1 in + [0-9]* | *[!a-zA-Z0-9_]*) + func_tr_sh_result=`$ECHO "$1" | $SED 's/^\([0-9]\)/_\1/; s/[^a-zA-Z0-9_]/_/g'` + ;; + * ) + func_tr_sh_result=$1 + ;; + esac +} + + +# func_version +# Echo version message to standard output and exit. +func_version () +{ + $opt_debug + + $SED -n '/(C)/!b go + :more + /\./!{ + N + s/\n# / / + b more + } + :go + /^# '$PROGRAM' (GNU /,/# warranty; / { + s/^# // + s/^# *$// + s/\((C)\)[ 0-9,-]*\( [1-9][0-9]*\)/\1\2/ + p + }' < "$progpath" + exit $? +} + +# func_usage +# Echo short help message to standard output and exit. +func_usage () +{ + $opt_debug + + $SED -n '/^# Usage:/,/^# *.*--help/ { + s/^# // + s/^# *$// + s/\$progname/'$progname'/ + p + }' < "$progpath" + echo + $ECHO "run \`$progname --help | more' for full usage" + exit $? +} + +# func_help [NOEXIT] +# Echo long help message to standard output and exit, +# unless 'noexit' is passed as argument. +func_help () +{ + $opt_debug + + $SED -n '/^# Usage:/,/# Report bugs to/ { + :print + s/^# // + s/^# *$// + s*\$progname*'$progname'* + s*\$host*'"$host"'* + s*\$SHELL*'"$SHELL"'* + s*\$LTCC*'"$LTCC"'* + s*\$LTCFLAGS*'"$LTCFLAGS"'* + s*\$LD*'"$LD"'* + s/\$with_gnu_ld/'"$with_gnu_ld"'/ + s/\$automake_version/'"`(automake --version) 2>/dev/null |$SED 1q`"'/ + s/\$autoconf_version/'"`(autoconf --version) 2>/dev/null |$SED 1q`"'/ + p + d + } + /^# .* home page:/b print + /^# General help using/b print + ' < "$progpath" + ret=$? + if test -z "$1"; then + exit $ret + fi +} + +# func_missing_arg argname +# Echo program name prefixed message to standard error and set global +# exit_cmd. +func_missing_arg () +{ + $opt_debug + + func_error "missing argument for $1." + exit_cmd=exit +} + + +# func_split_short_opt shortopt +# Set func_split_short_opt_name and func_split_short_opt_arg shell +# variables after splitting SHORTOPT after the 2nd character. +func_split_short_opt () +{ + my_sed_short_opt='1s/^\(..\).*$/\1/;q' + my_sed_short_rest='1s/^..\(.*\)$/\1/;q' + + func_split_short_opt_name=`$ECHO "$1" | $SED "$my_sed_short_opt"` + func_split_short_opt_arg=`$ECHO "$1" | $SED "$my_sed_short_rest"` +} # func_split_short_opt may be replaced by extended shell implementation + + +# func_split_long_opt longopt +# Set func_split_long_opt_name and func_split_long_opt_arg shell +# variables after splitting LONGOPT at the `=' sign. +func_split_long_opt () +{ + my_sed_long_opt='1s/^\(--[^=]*\)=.*/\1/;q' + my_sed_long_arg='1s/^--[^=]*=//' + + func_split_long_opt_name=`$ECHO "$1" | $SED "$my_sed_long_opt"` + func_split_long_opt_arg=`$ECHO "$1" | $SED "$my_sed_long_arg"` +} # func_split_long_opt may be replaced by extended shell implementation + +exit_cmd=: + + + + + +magic="%%%MAGIC variable%%%" +magic_exe="%%%MAGIC EXE variable%%%" + +# Global variables. +nonopt= +preserve_args= +lo2o="s/\\.lo\$/.${objext}/" +o2lo="s/\\.${objext}\$/.lo/" +extracted_archives= +extracted_serial=0 + +# If this variable is set in any of the actions, the command in it +# will be execed at the end. This prevents here-documents from being +# left over by shells. +exec_cmd= + +# func_append var value +# Append VALUE to the end of shell variable VAR. +func_append () +{ + eval "${1}=\$${1}\${2}" +} # func_append may be replaced by extended shell implementation + +# func_append_quoted var value +# Quote VALUE and append to the end of shell variable VAR, separated +# by a space. +func_append_quoted () +{ + func_quote_for_eval "${2}" + eval "${1}=\$${1}\\ \$func_quote_for_eval_result" +} # func_append_quoted may be replaced by extended shell implementation + + +# func_arith arithmetic-term... +func_arith () +{ + func_arith_result=`expr "${@}"` +} # func_arith may be replaced by extended shell implementation + + +# func_len string +# STRING may not start with a hyphen. +func_len () +{ + func_len_result=`expr "${1}" : ".*" 2>/dev/null || echo $max_cmd_len` +} # func_len may be replaced by extended shell implementation + + +# func_lo2o object +func_lo2o () +{ + func_lo2o_result=`$ECHO "${1}" | $SED "$lo2o"` +} # func_lo2o may be replaced by extended shell implementation + + +# func_xform libobj-or-source +func_xform () +{ + func_xform_result=`$ECHO "${1}" | $SED 's/\.[^.]*$/.lo/'` +} # func_xform may be replaced by extended shell implementation + + +# func_fatal_configuration arg... +# Echo program name prefixed message to standard error, followed by +# a configuration failure hint, and exit. +func_fatal_configuration () +{ + func_error ${1+"$@"} + func_error "See the $PACKAGE documentation for more information." + func_fatal_error "Fatal configuration error." +} + + +# func_config +# Display the configuration for all the tags in this script. +func_config () +{ + re_begincf='^# ### BEGIN LIBTOOL' + re_endcf='^# ### END LIBTOOL' + + # Default configuration. + $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath" + + # Now print the configurations for the tags. + for tagname in $taglist; do + $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath" + done + + exit $? +} + +# func_features +# Display the features supported by this script. +func_features () +{ + echo "host: $host" + if test "$build_libtool_libs" = yes; then + echo "enable shared libraries" + else + echo "disable shared libraries" + fi + if test "$build_old_libs" = yes; then + echo "enable static libraries" + else + echo "disable static libraries" + fi + + exit $? +} + +# func_enable_tag tagname +# Verify that TAGNAME is valid, and either flag an error and exit, or +# enable the TAGNAME tag. We also add TAGNAME to the global $taglist +# variable here. +func_enable_tag () +{ + # Global variable: + tagname="$1" + + re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$" + re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$" + sed_extractcf="/$re_begincf/,/$re_endcf/p" + + # Validate tagname. + case $tagname in + *[!-_A-Za-z0-9,/]*) + func_fatal_error "invalid tag name: $tagname" + ;; + esac + + # Don't test for the "default" C tag, as we know it's + # there but not specially marked. + case $tagname in + CC) ;; + *) + if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then + taglist="$taglist $tagname" + + # Evaluate the configuration. Be careful to quote the path + # and the sed script, to avoid splitting on whitespace, but + # also don't use non-portable quotes within backquotes within + # quotes we have to do it in 2 steps: + extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"` + eval "$extractedcf" + else + func_error "ignoring unknown tag $tagname" + fi + ;; + esac +} + +# func_check_version_match +# Ensure that we are using m4 macros, and libtool script from the same +# release of libtool. +func_check_version_match () +{ + if test "$package_revision" != "$macro_revision"; then + if test "$VERSION" != "$macro_version"; then + if test -z "$macro_version"; then + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from an older release. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from $PACKAGE $macro_version. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + fi + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, revision $package_revision, +$progname: but the definition of this LT_INIT comes from revision $macro_revision. +$progname: You should recreate aclocal.m4 with macros from revision $package_revision +$progname: of $PACKAGE $VERSION and run autoconf again. +_LT_EOF + fi + + exit $EXIT_MISMATCH + fi +} + + +# Shorthand for --mode=foo, only valid as the first argument +case $1 in +clean|clea|cle|cl) + shift; set dummy --mode clean ${1+"$@"}; shift + ;; +compile|compil|compi|comp|com|co|c) + shift; set dummy --mode compile ${1+"$@"}; shift + ;; +execute|execut|execu|exec|exe|ex|e) + shift; set dummy --mode execute ${1+"$@"}; shift + ;; +finish|finis|fini|fin|fi|f) + shift; set dummy --mode finish ${1+"$@"}; shift + ;; +install|instal|insta|inst|ins|in|i) + shift; set dummy --mode install ${1+"$@"}; shift + ;; +link|lin|li|l) + shift; set dummy --mode link ${1+"$@"}; shift + ;; +uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u) + shift; set dummy --mode uninstall ${1+"$@"}; shift + ;; +esac + + + +# Option defaults: +opt_debug=: +opt_dry_run=false +opt_config=false +opt_preserve_dup_deps=false +opt_features=false +opt_finish=false +opt_help=false +opt_help_all=false +opt_silent=: +opt_verbose=: +opt_silent=false +opt_verbose=false + + +# Parse options once, thoroughly. This comes as soon as possible in the +# script to make things like `--version' happen as quickly as we can. +{ + # this just eases exit handling + while test $# -gt 0; do + opt="$1" + shift + case $opt in + --debug|-x) opt_debug='set -x' + func_echo "enabling shell trace mode" + $opt_debug + ;; + --dry-run|--dryrun|-n) + opt_dry_run=: + ;; + --config) + opt_config=: +func_config + ;; + --dlopen|-dlopen) + optarg="$1" + opt_dlopen="${opt_dlopen+$opt_dlopen +}$optarg" + shift + ;; + --preserve-dup-deps) + opt_preserve_dup_deps=: + ;; + --features) + opt_features=: +func_features + ;; + --finish) + opt_finish=: +set dummy --mode finish ${1+"$@"}; shift + ;; + --help) + opt_help=: + ;; + --help-all) + opt_help_all=: +opt_help=': help-all' + ;; + --mode) + test $# = 0 && func_missing_arg $opt && break + optarg="$1" + opt_mode="$optarg" +case $optarg in + # Valid mode arguments: + clean|compile|execute|finish|install|link|relink|uninstall) ;; + + # Catch anything else as an error + *) func_error "invalid argument for $opt" + exit_cmd=exit + break + ;; +esac + shift + ;; + --no-silent|--no-quiet) + opt_silent=false +func_append preserve_args " $opt" + ;; + --no-verbose) + opt_verbose=false +func_append preserve_args " $opt" + ;; + --silent|--quiet) + opt_silent=: +func_append preserve_args " $opt" + opt_verbose=false + ;; + --verbose|-v) + opt_verbose=: +func_append preserve_args " $opt" +opt_silent=false + ;; + --tag) + test $# = 0 && func_missing_arg $opt && break + optarg="$1" + opt_tag="$optarg" +func_append preserve_args " $opt $optarg" +func_enable_tag "$optarg" + shift + ;; + + -\?|-h) func_usage ;; + --help) func_help ;; + --version) func_version ;; + + # Separate optargs to long options: + --*=*) + func_split_long_opt "$opt" + set dummy "$func_split_long_opt_name" "$func_split_long_opt_arg" ${1+"$@"} + shift + ;; + + # Separate non-argument short options: + -\?*|-h*|-n*|-v*) + func_split_short_opt "$opt" + set dummy "$func_split_short_opt_name" "-$func_split_short_opt_arg" ${1+"$@"} + shift + ;; + + --) break ;; + -*) func_fatal_help "unrecognized option \`$opt'" ;; + *) set dummy "$opt" ${1+"$@"}; shift; break ;; + esac + done + + # Validate options: + + # save first non-option argument + if test "$#" -gt 0; then + nonopt="$opt" + shift + fi + + # preserve --debug + test "$opt_debug" = : || func_append preserve_args " --debug" + + case $host in + *cygwin* | *mingw* | *pw32* | *cegcc*) + # don't eliminate duplications in $postdeps and $predeps + opt_duplicate_compiler_generated_deps=: + ;; + *) + opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps + ;; + esac + + $opt_help || { + # Sanity checks first: + func_check_version_match + + if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then + func_fatal_configuration "not configured to build any kind of library" + fi + + # Darwin sucks + eval std_shrext=\"$shrext_cmds\" + + # Only execute mode is allowed to have -dlopen flags. + if test -n "$opt_dlopen" && test "$opt_mode" != execute; then + func_error "unrecognized option \`-dlopen'" + $ECHO "$help" 1>&2 + exit $EXIT_FAILURE + fi + + # Change the help message to a mode-specific one. + generic_help="$help" + help="Try \`$progname --help --mode=$opt_mode' for more information." + } + + + # Bail if the options were screwed + $exit_cmd $EXIT_FAILURE +} + + + + +## ----------- ## +## Main. ## +## ----------- ## + +# func_lalib_p file +# True iff FILE is a libtool `.la' library or `.lo' object file. +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_lalib_p () +{ + test -f "$1" && + $SED -e 4q "$1" 2>/dev/null \ + | $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1 +} + +# func_lalib_unsafe_p file +# True iff FILE is a libtool `.la' library or `.lo' object file. +# This function implements the same check as func_lalib_p without +# resorting to external programs. To this end, it redirects stdin and +# closes it afterwards, without saving the original file descriptor. +# As a safety measure, use it only where a negative result would be +# fatal anyway. Works if `file' does not exist. +func_lalib_unsafe_p () +{ + lalib_p=no + if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then + for lalib_p_l in 1 2 3 4 + do + read lalib_p_line + case "$lalib_p_line" in + \#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;; + esac + done + exec 0<&5 5<&- + fi + test "$lalib_p" = yes +} + +# func_ltwrapper_script_p file +# True iff FILE is a libtool wrapper script +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_script_p () +{ + func_lalib_p "$1" +} + +# func_ltwrapper_executable_p file +# True iff FILE is a libtool wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_executable_p () +{ + func_ltwrapper_exec_suffix= + case $1 in + *.exe) ;; + *) func_ltwrapper_exec_suffix=.exe ;; + esac + $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1 +} + +# func_ltwrapper_scriptname file +# Assumes file is an ltwrapper_executable +# uses $file to determine the appropriate filename for a +# temporary ltwrapper_script. +func_ltwrapper_scriptname () +{ + func_dirname_and_basename "$1" "" "." + func_stripname '' '.exe' "$func_basename_result" + func_ltwrapper_scriptname_result="$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper" +} + +# func_ltwrapper_p file +# True iff FILE is a libtool wrapper script or wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_p () +{ + func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1" +} + + +# func_execute_cmds commands fail_cmd +# Execute tilde-delimited COMMANDS. +# If FAIL_CMD is given, eval that upon failure. +# FAIL_CMD may read-access the current command in variable CMD! +func_execute_cmds () +{ + $opt_debug + save_ifs=$IFS; IFS='~' + for cmd in $1; do + IFS=$save_ifs + eval cmd=\"$cmd\" + func_show_eval "$cmd" "${2-:}" + done + IFS=$save_ifs +} + + +# func_source file +# Source FILE, adding directory component if necessary. +# Note that it is not necessary on cygwin/mingw to append a dot to +# FILE even if both FILE and FILE.exe exist: automatic-append-.exe +# behavior happens only for exec(3), not for open(2)! Also, sourcing +# `FILE.' does not work on cygwin managed mounts. +func_source () +{ + $opt_debug + case $1 in + */* | *\\*) . "$1" ;; + *) . "./$1" ;; + esac +} + + +# func_resolve_sysroot PATH +# Replace a leading = in PATH with a sysroot. Store the result into +# func_resolve_sysroot_result +func_resolve_sysroot () +{ + func_resolve_sysroot_result=$1 + case $func_resolve_sysroot_result in + =*) + func_stripname '=' '' "$func_resolve_sysroot_result" + func_resolve_sysroot_result=$lt_sysroot$func_stripname_result + ;; + esac +} + +# func_replace_sysroot PATH +# If PATH begins with the sysroot, replace it with = and +# store the result into func_replace_sysroot_result. +func_replace_sysroot () +{ + case "$lt_sysroot:$1" in + ?*:"$lt_sysroot"*) + func_stripname "$lt_sysroot" '' "$1" + func_replace_sysroot_result="=$func_stripname_result" + ;; + *) + # Including no sysroot. + func_replace_sysroot_result=$1 + ;; + esac +} + +# func_infer_tag arg +# Infer tagged configuration to use if any are available and +# if one wasn't chosen via the "--tag" command line option. +# Only attempt this if the compiler in the base compile +# command doesn't match the default compiler. +# arg is usually of the form 'gcc ...' +func_infer_tag () +{ + $opt_debug + if test -n "$available_tags" && test -z "$tagname"; then + CC_quoted= + for arg in $CC; do + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case $@ in + # Blanks in the command may have been stripped by the calling shell, + # but not from the CC environment variable when configure was run. + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) ;; + # Blanks at the start of $base_compile will cause this to fail + # if we don't check for them as well. + *) + for z in $available_tags; do + if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then + # Evaluate the configuration. + eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`" + CC_quoted= + for arg in $CC; do + # Double-quote args containing other shell metacharacters. + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case "$@ " in + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) + # The compiler in the base compile command matches + # the one in the tagged configuration. + # Assume this is the tagged configuration we want. + tagname=$z + break + ;; + esac + fi + done + # If $tagname still isn't set, then no tagged configuration + # was found and let the user know that the "--tag" command + # line option must be used. + if test -z "$tagname"; then + func_echo "unable to infer tagged configuration" + func_fatal_error "specify a tag with \`--tag'" +# else +# func_verbose "using $tagname tagged configuration" + fi + ;; + esac + fi +} + + + +# func_write_libtool_object output_name pic_name nonpic_name +# Create a libtool object file (analogous to a ".la" file), +# but don't create it if we're doing a dry run. +func_write_libtool_object () +{ + write_libobj=${1} + if test "$build_libtool_libs" = yes; then + write_lobj=\'${2}\' + else + write_lobj=none + fi + + if test "$build_old_libs" = yes; then + write_oldobj=\'${3}\' + else + write_oldobj=none + fi + + $opt_dry_run || { + cat >${write_libobj}T </dev/null` + if test "$?" -eq 0 && test -n "${func_convert_core_file_wine_to_w32_tmp}"; then + func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" | + $SED -e "$lt_sed_naive_backslashify"` + else + func_convert_core_file_wine_to_w32_result= + fi + fi +} +# end: func_convert_core_file_wine_to_w32 + + +# func_convert_core_path_wine_to_w32 ARG +# Helper function used by path conversion functions when $build is *nix, and +# $host is mingw, cygwin, or some other w32 environment. Relies on a correctly +# configured wine environment available, with the winepath program in $build's +# $PATH. Assumes ARG has no leading or trailing path separator characters. +# +# ARG is path to be converted from $build format to win32. +# Result is available in $func_convert_core_path_wine_to_w32_result. +# Unconvertible file (directory) names in ARG are skipped; if no directory names +# are convertible, then the result may be empty. +func_convert_core_path_wine_to_w32 () +{ + $opt_debug + # unfortunately, winepath doesn't convert paths, only file names + func_convert_core_path_wine_to_w32_result="" + if test -n "$1"; then + oldIFS=$IFS + IFS=: + for func_convert_core_path_wine_to_w32_f in $1; do + IFS=$oldIFS + func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f" + if test -n "$func_convert_core_file_wine_to_w32_result" ; then + if test -z "$func_convert_core_path_wine_to_w32_result"; then + func_convert_core_path_wine_to_w32_result="$func_convert_core_file_wine_to_w32_result" + else + func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result" + fi + fi + done + IFS=$oldIFS + fi +} +# end: func_convert_core_path_wine_to_w32 + + +# func_cygpath ARGS... +# Wrapper around calling the cygpath program via LT_CYGPATH. This is used when +# when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2) +# $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or +# (2), returns the Cygwin file name or path in func_cygpath_result (input +# file name or path is assumed to be in w32 format, as previously converted +# from $build's *nix or MSYS format). In case (3), returns the w32 file name +# or path in func_cygpath_result (input file name or path is assumed to be in +# Cygwin format). Returns an empty string on error. +# +# ARGS are passed to cygpath, with the last one being the file name or path to +# be converted. +# +# Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH +# environment variable; do not put it in $PATH. +func_cygpath () +{ + $opt_debug + if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then + func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null` + if test "$?" -ne 0; then + # on failure, ensure result is empty + func_cygpath_result= + fi + else + func_cygpath_result= + func_error "LT_CYGPATH is empty or specifies non-existent file: \`$LT_CYGPATH'" + fi +} +#end: func_cygpath + + +# func_convert_core_msys_to_w32 ARG +# Convert file name or path ARG from MSYS format to w32 format. Return +# result in func_convert_core_msys_to_w32_result. +func_convert_core_msys_to_w32 () +{ + $opt_debug + # awkward: cmd appends spaces to result + func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null | + $SED -e 's/[ ]*$//' -e "$lt_sed_naive_backslashify"` +} +#end: func_convert_core_msys_to_w32 + + +# func_convert_file_check ARG1 ARG2 +# Verify that ARG1 (a file name in $build format) was converted to $host +# format in ARG2. Otherwise, emit an error message, but continue (resetting +# func_to_host_file_result to ARG1). +func_convert_file_check () +{ + $opt_debug + if test -z "$2" && test -n "$1" ; then + func_error "Could not determine host file name corresponding to" + func_error " \`$1'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback: + func_to_host_file_result="$1" + fi +} +# end func_convert_file_check + + +# func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH +# Verify that FROM_PATH (a path in $build format) was converted to $host +# format in TO_PATH. Otherwise, emit an error message, but continue, resetting +# func_to_host_file_result to a simplistic fallback value (see below). +func_convert_path_check () +{ + $opt_debug + if test -z "$4" && test -n "$3"; then + func_error "Could not determine the host path corresponding to" + func_error " \`$3'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback. This is a deliberately simplistic "conversion" and + # should not be "improved". See libtool.info. + if test "x$1" != "x$2"; then + lt_replace_pathsep_chars="s|$1|$2|g" + func_to_host_path_result=`echo "$3" | + $SED -e "$lt_replace_pathsep_chars"` + else + func_to_host_path_result="$3" + fi + fi +} +# end func_convert_path_check + + +# func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG +# Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT +# and appending REPL if ORIG matches BACKPAT. +func_convert_path_front_back_pathsep () +{ + $opt_debug + case $4 in + $1 ) func_to_host_path_result="$3$func_to_host_path_result" + ;; + esac + case $4 in + $2 ) func_append func_to_host_path_result "$3" + ;; + esac +} +# end func_convert_path_front_back_pathsep + + +################################################## +# $build to $host FILE NAME CONVERSION FUNCTIONS # +################################################## +# invoked via `$to_host_file_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# Result will be available in $func_to_host_file_result. + + +# func_to_host_file ARG +# Converts the file name ARG from $build format to $host format. Return result +# in func_to_host_file_result. +func_to_host_file () +{ + $opt_debug + $to_host_file_cmd "$1" +} +# end func_to_host_file + + +# func_to_tool_file ARG LAZY +# converts the file name ARG from $build format to toolchain format. Return +# result in func_to_tool_file_result. If the conversion in use is listed +# in (the comma separated) LAZY, no conversion takes place. +func_to_tool_file () +{ + $opt_debug + case ,$2, in + *,"$to_tool_file_cmd",*) + func_to_tool_file_result=$1 + ;; + *) + $to_tool_file_cmd "$1" + func_to_tool_file_result=$func_to_host_file_result + ;; + esac +} +# end func_to_tool_file + + +# func_convert_file_noop ARG +# Copy ARG to func_to_host_file_result. +func_convert_file_noop () +{ + func_to_host_file_result="$1" +} +# end func_convert_file_noop + + +# func_convert_file_msys_to_w32 ARG +# Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_file_result. +func_convert_file_msys_to_w32 () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_to_host_file_result="$func_convert_core_msys_to_w32_result" + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_w32 + + +# func_convert_file_cygwin_to_w32 ARG +# Convert file name ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_file_cygwin_to_w32 () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + # because $build is cygwin, we call "the" cygpath in $PATH; no need to use + # LT_CYGPATH in this case. + func_to_host_file_result=`cygpath -m "$1"` + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_cygwin_to_w32 + + +# func_convert_file_nix_to_w32 ARG +# Convert file name ARG from *nix to w32 format. Requires a wine environment +# and a working winepath. Returns result in func_to_host_file_result. +func_convert_file_nix_to_w32 () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + func_convert_core_file_wine_to_w32 "$1" + func_to_host_file_result="$func_convert_core_file_wine_to_w32_result" + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_w32 + + +# func_convert_file_msys_to_cygwin ARG +# Convert file name ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_file_msys_to_cygwin () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_cygpath -u "$func_convert_core_msys_to_w32_result" + func_to_host_file_result="$func_cygpath_result" + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_cygwin + + +# func_convert_file_nix_to_cygwin ARG +# Convert file name ARG from *nix to Cygwin format. Requires Cygwin installed +# in a wine environment, working winepath, and LT_CYGPATH set. Returns result +# in func_to_host_file_result. +func_convert_file_nix_to_cygwin () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + # convert from *nix to w32, then use cygpath to convert from w32 to cygwin. + func_convert_core_file_wine_to_w32 "$1" + func_cygpath -u "$func_convert_core_file_wine_to_w32_result" + func_to_host_file_result="$func_cygpath_result" + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_cygwin + + +############################################# +# $build to $host PATH CONVERSION FUNCTIONS # +############################################# +# invoked via `$to_host_path_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# The result will be available in $func_to_host_path_result. +# +# Path separators are also converted from $build format to $host format. If +# ARG begins or ends with a path separator character, it is preserved (but +# converted to $host format) on output. +# +# All path conversion functions are named using the following convention: +# file name conversion function : func_convert_file_X_to_Y () +# path conversion function : func_convert_path_X_to_Y () +# where, for any given $build/$host combination the 'X_to_Y' value is the +# same. If conversion functions are added for new $build/$host combinations, +# the two new functions must follow this pattern, or func_init_to_host_path_cmd +# will break. + + +# func_init_to_host_path_cmd +# Ensures that function "pointer" variable $to_host_path_cmd is set to the +# appropriate value, based on the value of $to_host_file_cmd. +to_host_path_cmd= +func_init_to_host_path_cmd () +{ + $opt_debug + if test -z "$to_host_path_cmd"; then + func_stripname 'func_convert_file_' '' "$to_host_file_cmd" + to_host_path_cmd="func_convert_path_${func_stripname_result}" + fi +} + + +# func_to_host_path ARG +# Converts the path ARG from $build format to $host format. Return result +# in func_to_host_path_result. +func_to_host_path () +{ + $opt_debug + func_init_to_host_path_cmd + $to_host_path_cmd "$1" +} +# end func_to_host_path + + +# func_convert_path_noop ARG +# Copy ARG to func_to_host_path_result. +func_convert_path_noop () +{ + func_to_host_path_result="$1" +} +# end func_convert_path_noop + + +# func_convert_path_msys_to_w32 ARG +# Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_path_result. +func_convert_path_msys_to_w32 () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # Remove leading and trailing path separator characters from ARG. MSYS + # behavior is inconsistent here; cygpath turns them into '.;' and ';.'; + # and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result="$func_convert_core_msys_to_w32_result" + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_msys_to_w32 + + +# func_convert_path_cygwin_to_w32 ARG +# Convert path ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_path_cygwin_to_w32 () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"` + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_cygwin_to_w32 + + +# func_convert_path_nix_to_w32 ARG +# Convert path ARG from *nix to w32 format. Requires a wine environment and +# a working winepath. Returns result in func_to_host_file_result. +func_convert_path_nix_to_w32 () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result="$func_convert_core_path_wine_to_w32_result" + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_nix_to_w32 + + +# func_convert_path_msys_to_cygwin ARG +# Convert path ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_path_msys_to_cygwin () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_msys_to_w32_result" + func_to_host_path_result="$func_cygpath_result" + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_msys_to_cygwin + + +# func_convert_path_nix_to_cygwin ARG +# Convert path ARG from *nix to Cygwin format. Requires Cygwin installed in a +# a wine environment, working winepath, and LT_CYGPATH set. Returns result in +# func_to_host_file_result. +func_convert_path_nix_to_cygwin () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # Remove leading and trailing path separator characters from + # ARG. msys behavior is inconsistent here, cygpath turns them + # into '.;' and ';.', and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result" + func_to_host_path_result="$func_cygpath_result" + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_nix_to_cygwin + + +# func_mode_compile arg... +func_mode_compile () +{ + $opt_debug + # Get the compilation command and the source file. + base_compile= + srcfile="$nonopt" # always keep a non-empty value in "srcfile" + suppress_opt=yes + suppress_output= + arg_mode=normal + libobj= + later= + pie_flag= + + for arg + do + case $arg_mode in + arg ) + # do not "continue". Instead, add this to base_compile + lastarg="$arg" + arg_mode=normal + ;; + + target ) + libobj="$arg" + arg_mode=normal + continue + ;; + + normal ) + # Accept any command-line options. + case $arg in + -o) + test -n "$libobj" && \ + func_fatal_error "you cannot specify \`-o' more than once" + arg_mode=target + continue + ;; + + -pie | -fpie | -fPIE) + func_append pie_flag " $arg" + continue + ;; + + -shared | -static | -prefer-pic | -prefer-non-pic) + func_append later " $arg" + continue + ;; + + -no-suppress) + suppress_opt=no + continue + ;; + + -Xcompiler) + arg_mode=arg # the next one goes into the "base_compile" arg list + continue # The current "srcfile" will either be retained or + ;; # replaced later. I would guess that would be a bug. + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + lastarg= + save_ifs="$IFS"; IFS=',' + for arg in $args; do + IFS="$save_ifs" + func_append_quoted lastarg "$arg" + done + IFS="$save_ifs" + func_stripname ' ' '' "$lastarg" + lastarg=$func_stripname_result + + # Add the arguments to base_compile. + func_append base_compile " $lastarg" + continue + ;; + + *) + # Accept the current argument as the source file. + # The previous "srcfile" becomes the current argument. + # + lastarg="$srcfile" + srcfile="$arg" + ;; + esac # case $arg + ;; + esac # case $arg_mode + + # Aesthetically quote the previous argument. + func_append_quoted base_compile "$lastarg" + done # for arg + + case $arg_mode in + arg) + func_fatal_error "you must specify an argument for -Xcompile" + ;; + target) + func_fatal_error "you must specify a target with \`-o'" + ;; + *) + # Get the name of the library object. + test -z "$libobj" && { + func_basename "$srcfile" + libobj="$func_basename_result" + } + ;; + esac + + # Recognize several different file suffixes. + # If the user specifies -o file.o, it is replaced with file.lo + case $libobj in + *.[cCFSifmso] | \ + *.ada | *.adb | *.ads | *.asm | \ + *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \ + *.[fF][09]? | *.for | *.java | *.obj | *.sx | *.cu | *.cup) + func_xform "$libobj" + libobj=$func_xform_result + ;; + esac + + case $libobj in + *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;; + *) + func_fatal_error "cannot determine name of library object from \`$libobj'" + ;; + esac + + func_infer_tag $base_compile + + for arg in $later; do + case $arg in + -shared) + test "$build_libtool_libs" != yes && \ + func_fatal_configuration "can not build a shared library" + build_old_libs=no + continue + ;; + + -static) + build_libtool_libs=no + build_old_libs=yes + continue + ;; + + -prefer-pic) + pic_mode=yes + continue + ;; + + -prefer-non-pic) + pic_mode=no + continue + ;; + esac + done + + func_quote_for_eval "$libobj" + test "X$libobj" != "X$func_quote_for_eval_result" \ + && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"' &()|`$[]' \ + && func_warning "libobj name \`$libobj' may not contain shell special characters." + func_dirname_and_basename "$obj" "/" "" + objname="$func_basename_result" + xdir="$func_dirname_result" + lobj=${xdir}$objdir/$objname + + test -z "$base_compile" && \ + func_fatal_help "you must specify a compilation command" + + # Delete any leftover library objects. + if test "$build_old_libs" = yes; then + removelist="$obj $lobj $libobj ${libobj}T" + else + removelist="$lobj $libobj ${libobj}T" + fi + + # On Cygwin there's no "real" PIC flag so we must build both object types + case $host_os in + cygwin* | mingw* | pw32* | os2* | cegcc*) + pic_mode=default + ;; + esac + if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then + # non-PIC code in shared libraries is not supported + pic_mode=default + fi + + # Calculate the filename of the output object if compiler does + # not support -o with -c + if test "$compiler_c_o" = no; then + output_obj=`$ECHO "$srcfile" | $SED 's%^.*/%%; s%\.[^.]*$%%'`.${objext} + lockfile="$output_obj.lock" + else + output_obj= + need_locks=no + lockfile= + fi + + # Lock this critical section if it is needed + # We use this script file to make the link, it avoids creating a new file + if test "$need_locks" = yes; then + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + elif test "$need_locks" = warn; then + if test -f "$lockfile"; then + $ECHO "\ +*** ERROR, $lockfile exists and contains: +`cat $lockfile 2>/dev/null` + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + func_append removelist " $output_obj" + $ECHO "$srcfile" > "$lockfile" + fi + + $opt_dry_run || $RM $removelist + func_append removelist " $lockfile" + trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15 + + func_to_tool_file "$srcfile" func_convert_file_msys_to_w32 + srcfile=$func_to_tool_file_result + func_quote_for_eval "$srcfile" + qsrcfile=$func_quote_for_eval_result + + # Only build a PIC object if we are building libtool libraries. + if test "$build_libtool_libs" = yes; then + # Without this assignment, base_compile gets emptied. + fbsd_hideous_sh_bug=$base_compile + + if test "$pic_mode" != no; then + command="$base_compile $qsrcfile $pic_flag" + else + # Don't build PIC code + command="$base_compile $qsrcfile" + fi + + func_mkdir_p "$xdir$objdir" + + if test -z "$output_obj"; then + # Place PIC objects in $objdir + func_append command " -o $lobj" + fi + + func_show_eval_locale "$command" \ + 'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE' + + if test "$need_locks" = warn && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed, then go on to compile the next one + if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then + func_show_eval '$MV "$output_obj" "$lobj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + + # Allow error messages only from the first compilation. + if test "$suppress_opt" = yes; then + suppress_output=' >/dev/null 2>&1' + fi + fi + + # Only build a position-dependent object if we build old libraries. + if test "$build_old_libs" = yes; then + if test "$pic_mode" != yes; then + # Don't build PIC code + command="$base_compile $qsrcfile$pie_flag" + else + command="$base_compile $qsrcfile $pic_flag" + fi + if test "$compiler_c_o" = yes; then + func_append command " -o $obj" + fi + + # Suppress compiler output if we already did a PIC compilation. + func_append command "$suppress_output" + func_show_eval_locale "$command" \ + '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' + + if test "$need_locks" = warn && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed + if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then + func_show_eval '$MV "$output_obj" "$obj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + fi + + $opt_dry_run || { + func_write_libtool_object "$libobj" "$objdir/$objname" "$objname" + + # Unlock the critical section if it was locked + if test "$need_locks" != no; then + removelist=$lockfile + $RM "$lockfile" + fi + } + + exit $EXIT_SUCCESS +} + +$opt_help || { + test "$opt_mode" = compile && func_mode_compile ${1+"$@"} +} + +func_mode_help () +{ + # We need to display help for each of the modes. + case $opt_mode in + "") + # Generic help is extracted from the usage comments + # at the start of this file. + func_help + ;; + + clean) + $ECHO \ +"Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE... + +Remove files from the build directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed +to RM. + +If FILE is a libtool library, object or program, all the files associated +with it are deleted. Otherwise, only FILE itself is deleted using RM." + ;; + + compile) + $ECHO \ +"Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE + +Compile a source file into a libtool library object. + +This mode accepts the following additional options: + + -o OUTPUT-FILE set the output file name to OUTPUT-FILE + -no-suppress do not suppress compiler output for multiple passes + -prefer-pic try to build PIC objects only + -prefer-non-pic try to build non-PIC objects only + -shared do not build a \`.o' file suitable for static linking + -static only build a \`.o' file suitable for static linking + -Wc,FLAG pass FLAG directly to the compiler + +COMPILE-COMMAND is a command to be used in creating a \`standard' object file +from the given SOURCEFILE. + +The output file name is determined by removing the directory component from +SOURCEFILE, then substituting the C source code suffix \`.c' with the +library object suffix, \`.lo'." + ;; + + execute) + $ECHO \ +"Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]... + +Automatically set library path, then run a program. + +This mode accepts the following additional options: + + -dlopen FILE add the directory containing FILE to the library path + +This mode sets the library path environment variable according to \`-dlopen' +flags. + +If any of the ARGS are libtool executable wrappers, then they are translated +into their corresponding uninstalled binary, and any of their required library +directories are added to the library path. + +Then, COMMAND is executed, with ARGS as arguments." + ;; + + finish) + $ECHO \ +"Usage: $progname [OPTION]... --mode=finish [LIBDIR]... + +Complete the installation of libtool libraries. + +Each LIBDIR is a directory that contains libtool libraries. + +The commands that this mode executes may require superuser privileges. Use +the \`--dry-run' option if you just want to see what would be executed." + ;; + + install) + $ECHO \ +"Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND... + +Install executables or libraries. + +INSTALL-COMMAND is the installation command. The first component should be +either the \`install' or \`cp' program. + +The following components of INSTALL-COMMAND are treated specially: + + -inst-prefix-dir PREFIX-DIR Use PREFIX-DIR as a staging area for installation + +The rest of the components are interpreted as arguments to that command (only +BSD-compatible install options are recognized)." + ;; + + link) + $ECHO \ +"Usage: $progname [OPTION]... --mode=link LINK-COMMAND... + +Link object files or libraries together to form another library, or to +create an executable program. + +LINK-COMMAND is a command using the C compiler that you would use to create +a program from several object files. + +The following components of LINK-COMMAND are treated specially: + + -all-static do not do any dynamic linking at all + -avoid-version do not add a version suffix if possible + -bindir BINDIR specify path to binaries directory (for systems where + libraries must be found in the PATH setting at runtime) + -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime + -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols + -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) + -export-symbols SYMFILE + try to export only the symbols listed in SYMFILE + -export-symbols-regex REGEX + try to export only the symbols matching REGEX + -LLIBDIR search LIBDIR for required installed libraries + -lNAME OUTPUT-FILE requires the installed library libNAME + -module build a library that can dlopened + -no-fast-install disable the fast-install mode + -no-install link a not-installable executable + -no-undefined declare that a library does not refer to external symbols + -o OUTPUT-FILE create OUTPUT-FILE from the specified objects + -objectlist FILE Use a list of object files found in FILE to specify objects + -precious-files-regex REGEX + don't remove output files matching REGEX + -release RELEASE specify package release information + -rpath LIBDIR the created library will eventually be installed in LIBDIR + -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries + -shared only do dynamic linking of libtool libraries + -shrext SUFFIX override the standard shared library file extension + -static do not do any dynamic linking of uninstalled libtool libraries + -static-libtool-libs + do not do any dynamic linking of libtool libraries + -version-info CURRENT[:REVISION[:AGE]] + specify library version info [each variable defaults to 0] + -weak LIBNAME declare that the target provides the LIBNAME interface + -Wc,FLAG + -Xcompiler FLAG pass linker-specific FLAG directly to the compiler + -Wl,FLAG + -Xlinker FLAG pass linker-specific FLAG directly to the linker + -XCClinker FLAG pass link-specific FLAG to the compiler driver (CC) + +All other options (arguments beginning with \`-') are ignored. + +Every other argument is treated as a filename. Files ending in \`.la' are +treated as uninstalled libtool libraries, other files are standard or library +object files. + +If the OUTPUT-FILE ends in \`.la', then a libtool library is created, +only library objects (\`.lo' files) may be specified, and \`-rpath' is +required, except when creating a convenience library. + +If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created +using \`ar' and \`ranlib', or on Windows using \`lib'. + +If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file +is created, otherwise an executable program is created." + ;; + + uninstall) + $ECHO \ +"Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... + +Remove libraries from an installation directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed +to RM. + +If FILE is a libtool library, all the files associated with it are deleted. +Otherwise, only FILE itself is deleted using RM." + ;; + + *) + func_fatal_help "invalid operation mode \`$opt_mode'" + ;; + esac + + echo + $ECHO "Try \`$progname --help' for more information about other modes." +} + +# Now that we've collected a possible --mode arg, show help if necessary +if $opt_help; then + if test "$opt_help" = :; then + func_mode_help + else + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + func_mode_help + done + } | sed -n '1p; 2,$s/^Usage:/ or: /p' + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + echo + func_mode_help + done + } | + sed '1d + /^When reporting/,/^Report/{ + H + d + } + $x + /information about other modes/d + /more detailed .*MODE/d + s/^Usage:.*--mode=\([^ ]*\) .*/Description of \1 mode:/' + fi + exit $? +fi + + +# func_mode_execute arg... +func_mode_execute () +{ + $opt_debug + # The first argument is the command name. + cmd="$nonopt" + test -z "$cmd" && \ + func_fatal_help "you must specify a COMMAND" + + # Handle -dlopen flags immediately. + for file in $opt_dlopen; do + test -f "$file" \ + || func_fatal_help "\`$file' is not a file" + + dir= + case $file in + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "\`$lib' is not a valid libtool archive" + + # Read the libtool library. + dlname= + library_names= + func_source "$file" + + # Skip this library if it cannot be dlopened. + if test -z "$dlname"; then + # Warn if it was a shared library. + test -n "$library_names" && \ + func_warning "\`$file' was not linked with \`-export-dynamic'" + continue + fi + + func_dirname "$file" "" "." + dir="$func_dirname_result" + + if test -f "$dir/$objdir/$dlname"; then + func_append dir "/$objdir" + else + if test ! -f "$dir/$dlname"; then + func_fatal_error "cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" + fi + fi + ;; + + *.lo) + # Just add the directory containing the .lo file. + func_dirname "$file" "" "." + dir="$func_dirname_result" + ;; + + *) + func_warning "\`-dlopen' is ignored for non-libtool libraries and objects" + continue + ;; + esac + + # Get the absolute pathname. + absdir=`cd "$dir" && pwd` + test -n "$absdir" && dir="$absdir" + + # Now add the directory to shlibpath_var. + if eval "test -z \"\$$shlibpath_var\""; then + eval "$shlibpath_var=\"\$dir\"" + else + eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" + fi + done + + # This variable tells wrapper scripts just to set shlibpath_var + # rather than running their programs. + libtool_execute_magic="$magic" + + # Check if any of the arguments is a wrapper script. + args= + for file + do + case $file in + -* | *.la | *.lo ) ;; + *) + # Do a test to see if this is really a libtool program. + if func_ltwrapper_script_p "$file"; then + func_source "$file" + # Transform arg to wrapped name. + file="$progdir/$program" + elif func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + func_source "$func_ltwrapper_scriptname_result" + # Transform arg to wrapped name. + file="$progdir/$program" + fi + ;; + esac + # Quote arguments (to preserve shell metacharacters). + func_append_quoted args "$file" + done + + if test "X$opt_dry_run" = Xfalse; then + if test -n "$shlibpath_var"; then + # Export the shlibpath_var. + eval "export $shlibpath_var" + fi + + # Restore saved environment variables + for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES + do + eval "if test \"\${save_$lt_var+set}\" = set; then + $lt_var=\$save_$lt_var; export $lt_var + else + $lt_unset $lt_var + fi" + done + + # Now prepare to actually exec the command. + exec_cmd="\$cmd$args" + else + # Display what would be done. + if test -n "$shlibpath_var"; then + eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\"" + echo "export $shlibpath_var" + fi + $ECHO "$cmd$args" + exit $EXIT_SUCCESS + fi +} + +test "$opt_mode" = execute && func_mode_execute ${1+"$@"} + + +# func_mode_finish arg... +func_mode_finish () +{ + $opt_debug + libs= + libdirs= + admincmds= + + for opt in "$nonopt" ${1+"$@"} + do + if test -d "$opt"; then + func_append libdirs " $opt" + + elif test -f "$opt"; then + if func_lalib_unsafe_p "$opt"; then + func_append libs " $opt" + else + func_warning "\`$opt' is not a valid libtool archive" + fi + + else + func_fatal_error "invalid argument \`$opt'" + fi + done + + if test -n "$libs"; then + if test -n "$lt_sysroot"; then + sysroot_regex=`$ECHO "$lt_sysroot" | $SED "$sed_make_literal_regex"` + sysroot_cmd="s/\([ ']\)$sysroot_regex/\1/g;" + else + sysroot_cmd= + fi + + # Remove sysroot references + if $opt_dry_run; then + for lib in $libs; do + echo "removing references to $lt_sysroot and \`=' prefixes from $lib" + done + else + tmpdir=`func_mktempdir` + for lib in $libs; do + sed -e "${sysroot_cmd} s/\([ ']-[LR]\)=/\1/g; s/\([ ']\)=/\1/g" $lib \ + > $tmpdir/tmp-la + mv -f $tmpdir/tmp-la $lib + done + ${RM}r "$tmpdir" + fi + fi + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + for libdir in $libdirs; do + if test -n "$finish_cmds"; then + # Do each command in the finish commands. + func_execute_cmds "$finish_cmds" 'admincmds="$admincmds +'"$cmd"'"' + fi + if test -n "$finish_eval"; then + # Do the single finish_eval. + eval cmds=\"$finish_eval\" + $opt_dry_run || eval "$cmds" || func_append admincmds " + $cmds" + fi + done + fi + + # Exit here if they wanted silent mode. + $opt_silent && exit $EXIT_SUCCESS + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + echo "----------------------------------------------------------------------" + echo "Libraries have been installed in:" + for libdir in $libdirs; do + $ECHO " $libdir" + done + echo + echo "If you ever happen to want to link against installed libraries" + echo "in a given directory, LIBDIR, you must either use libtool, and" + echo "specify the full pathname of the library, or use the \`-LLIBDIR'" + echo "flag during linking and do at least one of the following:" + if test -n "$shlibpath_var"; then + echo " - add LIBDIR to the \`$shlibpath_var' environment variable" + echo " during execution" + fi + if test -n "$runpath_var"; then + echo " - add LIBDIR to the \`$runpath_var' environment variable" + echo " during linking" + fi + if test -n "$hardcode_libdir_flag_spec"; then + libdir=LIBDIR + eval flag=\"$hardcode_libdir_flag_spec\" + + $ECHO " - use the \`$flag' linker flag" + fi + if test -n "$admincmds"; then + $ECHO " - have your system administrator run these commands:$admincmds" + fi + if test -f /etc/ld.so.conf; then + echo " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'" + fi + echo + + echo "See any operating system documentation about shared libraries for" + case $host in + solaris2.[6789]|solaris2.1[0-9]) + echo "more information, such as the ld(1), crle(1) and ld.so(8) manual" + echo "pages." + ;; + *) + echo "more information, such as the ld(1) and ld.so(8) manual pages." + ;; + esac + echo "----------------------------------------------------------------------" + fi + exit $EXIT_SUCCESS +} + +test "$opt_mode" = finish && func_mode_finish ${1+"$@"} + + +# func_mode_install arg... +func_mode_install () +{ + $opt_debug + # There may be an optional sh(1) argument at the beginning of + # install_prog (especially on Windows NT). + if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh || + # Allow the use of GNU shtool's install command. + case $nonopt in *shtool*) :;; *) false;; esac; then + # Aesthetically quote it. + func_quote_for_eval "$nonopt" + install_prog="$func_quote_for_eval_result " + arg=$1 + shift + else + install_prog= + arg=$nonopt + fi + + # The real first argument should be the name of the installation program. + # Aesthetically quote it. + func_quote_for_eval "$arg" + func_append install_prog "$func_quote_for_eval_result" + install_shared_prog=$install_prog + case " $install_prog " in + *[\\\ /]cp\ *) install_cp=: ;; + *) install_cp=false ;; + esac + + # We need to accept at least all the BSD install flags. + dest= + files= + opts= + prev= + install_type= + isdir=no + stripme= + no_mode=: + for arg + do + arg2= + if test -n "$dest"; then + func_append files " $dest" + dest=$arg + continue + fi + + case $arg in + -d) isdir=yes ;; + -f) + if $install_cp; then :; else + prev=$arg + fi + ;; + -g | -m | -o) + prev=$arg + ;; + -s) + stripme=" -s" + continue + ;; + -*) + ;; + *) + # If the previous option needed an argument, then skip it. + if test -n "$prev"; then + if test "x$prev" = x-m && test -n "$install_override_mode"; then + arg2=$install_override_mode + no_mode=false + fi + prev= + else + dest=$arg + continue + fi + ;; + esac + + # Aesthetically quote the argument. + func_quote_for_eval "$arg" + func_append install_prog " $func_quote_for_eval_result" + if test -n "$arg2"; then + func_quote_for_eval "$arg2" + fi + func_append install_shared_prog " $func_quote_for_eval_result" + done + + test -z "$install_prog" && \ + func_fatal_help "you must specify an install program" + + test -n "$prev" && \ + func_fatal_help "the \`$prev' option requires an argument" + + if test -n "$install_override_mode" && $no_mode; then + if $install_cp; then :; else + func_quote_for_eval "$install_override_mode" + func_append install_shared_prog " -m $func_quote_for_eval_result" + fi + fi + + if test -z "$files"; then + if test -z "$dest"; then + func_fatal_help "no file or destination specified" + else + func_fatal_help "you must specify a destination" + fi + fi + + # Strip any trailing slash from the destination. + func_stripname '' '/' "$dest" + dest=$func_stripname_result + + # Check to see that the destination is a directory. + test -d "$dest" && isdir=yes + if test "$isdir" = yes; then + destdir="$dest" + destname= + else + func_dirname_and_basename "$dest" "" "." + destdir="$func_dirname_result" + destname="$func_basename_result" + + # Not a directory, so check to see that there is only one file specified. + set dummy $files; shift + test "$#" -gt 1 && \ + func_fatal_help "\`$dest' is not a directory" + fi + case $destdir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + for file in $files; do + case $file in + *.lo) ;; + *) + func_fatal_help "\`$destdir' must be an absolute directory name" + ;; + esac + done + ;; + esac + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic="$magic" + + staticlibs= + future_libdirs= + current_libdirs= + for file in $files; do + + # Do each installation. + case $file in + *.$libext) + # Do the static libraries later. + func_append staticlibs " $file" + ;; + + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "\`$file' is not a valid libtool archive" + + library_names= + old_library= + relink_command= + func_source "$file" + + # Add the libdir to current_libdirs if it is the destination. + if test "X$destdir" = "X$libdir"; then + case "$current_libdirs " in + *" $libdir "*) ;; + *) func_append current_libdirs " $libdir" ;; + esac + else + # Note the libdir as a future libdir. + case "$future_libdirs " in + *" $libdir "*) ;; + *) func_append future_libdirs " $libdir" ;; + esac + fi + + func_dirname "$file" "/" "" + dir="$func_dirname_result" + func_append dir "$objdir" + + if test -n "$relink_command"; then + # Determine the prefix the user has applied to our future dir. + inst_prefix_dir=`$ECHO "$destdir" | $SED -e "s%$libdir\$%%"` + + # Don't allow the user to place us outside of our expected + # location b/c this prevents finding dependent libraries that + # are installed to the same prefix. + # At present, this check doesn't affect windows .dll's that + # are installed into $libdir/../bin (currently, that works fine) + # but it's something to keep an eye on. + test "$inst_prefix_dir" = "$destdir" && \ + func_fatal_error "error: cannot install \`$file' to a directory not ending in $libdir" + + if test -n "$inst_prefix_dir"; then + # Stick the inst_prefix_dir data into the link command. + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"` + else + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%%"` + fi + + func_warning "relinking \`$file'" + func_show_eval "$relink_command" \ + 'func_fatal_error "error: relink \`$file'\'' with the above command before installing it"' + fi + + # See the names of the shared library. + set dummy $library_names; shift + if test -n "$1"; then + realname="$1" + shift + + srcname="$realname" + test -n "$relink_command" && srcname="$realname"T + + # Install the shared library and build the symlinks. + func_show_eval "$install_shared_prog $dir/$srcname $destdir/$realname" \ + 'exit $?' + tstripme="$stripme" + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + case $realname in + *.dll.a) + tstripme="" + ;; + esac + ;; + esac + if test -n "$tstripme" && test -n "$striplib"; then + func_show_eval "$striplib $destdir/$realname" 'exit $?' + fi + + if test "$#" -gt 0; then + # Delete the old symlinks, and create new ones. + # Try `ln -sf' first, because the `ln' binary might depend on + # the symlink we replace! Solaris /bin/ln does not understand -f, + # so we also need to try rm && ln -s. + for linkname + do + test "$linkname" != "$realname" \ + && func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })" + done + fi + + # Do each command in the postinstall commands. + lib="$destdir/$realname" + func_execute_cmds "$postinstall_cmds" 'exit $?' + fi + + # Install the pseudo-library for information purposes. + func_basename "$file" + name="$func_basename_result" + instname="$dir/$name"i + func_show_eval "$install_prog $instname $destdir/$name" 'exit $?' + + # Maybe install the static library, too. + test -n "$old_library" && func_append staticlibs " $dir/$old_library" + ;; + + *.lo) + # Install (i.e. copy) a libtool object. + + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile="$destdir/$destname" + else + func_basename "$file" + destfile="$func_basename_result" + destfile="$destdir/$destfile" + fi + + # Deduce the name of the destination old-style object file. + case $destfile in + *.lo) + func_lo2o "$destfile" + staticdest=$func_lo2o_result + ;; + *.$objext) + staticdest="$destfile" + destfile= + ;; + *) + func_fatal_help "cannot copy a libtool object to \`$destfile'" + ;; + esac + + # Install the libtool object if requested. + test -n "$destfile" && \ + func_show_eval "$install_prog $file $destfile" 'exit $?' + + # Install the old object if enabled. + if test "$build_old_libs" = yes; then + # Deduce the name of the old-style object file. + func_lo2o "$file" + staticobj=$func_lo2o_result + func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?' + fi + exit $EXIT_SUCCESS + ;; + + *) + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile="$destdir/$destname" + else + func_basename "$file" + destfile="$func_basename_result" + destfile="$destdir/$destfile" + fi + + # If the file is missing, and there is a .exe on the end, strip it + # because it is most likely a libtool script we actually want to + # install + stripped_ext="" + case $file in + *.exe) + if test ! -f "$file"; then + func_stripname '' '.exe' "$file" + file=$func_stripname_result + stripped_ext=".exe" + fi + ;; + esac + + # Do a test to see if this is really a libtool program. + case $host in + *cygwin* | *mingw*) + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + wrapper=$func_ltwrapper_scriptname_result + else + func_stripname '' '.exe' "$file" + wrapper=$func_stripname_result + fi + ;; + *) + wrapper=$file + ;; + esac + if func_ltwrapper_script_p "$wrapper"; then + notinst_deplibs= + relink_command= + + func_source "$wrapper" + + # Check the variables that should have been set. + test -z "$generated_by_libtool_version" && \ + func_fatal_error "invalid libtool wrapper script \`$wrapper'" + + finalize=yes + for lib in $notinst_deplibs; do + # Check to see that each library is installed. + libdir= + if test -f "$lib"; then + func_source "$lib" + fi + libfile="$libdir/"`$ECHO "$lib" | $SED 's%^.*/%%g'` ### testsuite: skip nested quoting test + if test -n "$libdir" && test ! -f "$libfile"; then + func_warning "\`$lib' has not been installed in \`$libdir'" + finalize=no + fi + done + + relink_command= + func_source "$wrapper" + + outputname= + if test "$fast_install" = no && test -n "$relink_command"; then + $opt_dry_run || { + if test "$finalize" = yes; then + tmpdir=`func_mktempdir` + func_basename "$file$stripped_ext" + file="$func_basename_result" + outputname="$tmpdir/$file" + # Replace the output file specification. + relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'` + + $opt_silent || { + func_quote_for_expand "$relink_command" + eval "func_echo $func_quote_for_expand_result" + } + if eval "$relink_command"; then : + else + func_error "error: relink \`$file' with the above command before installing it" + $opt_dry_run || ${RM}r "$tmpdir" + continue + fi + file="$outputname" + else + func_warning "cannot relink \`$file'" + fi + } + else + # Install the binary that we compiled earlier. + file=`$ECHO "$file$stripped_ext" | $SED "s%\([^/]*\)$%$objdir/\1%"` + fi + fi + + # remove .exe since cygwin /usr/bin/install will append another + # one anyway + case $install_prog,$host in + */usr/bin/install*,*cygwin*) + case $file:$destfile in + *.exe:*.exe) + # this is ok + ;; + *.exe:*) + destfile=$destfile.exe + ;; + *:*.exe) + func_stripname '' '.exe' "$destfile" + destfile=$func_stripname_result + ;; + esac + ;; + esac + func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?' + $opt_dry_run || if test -n "$outputname"; then + ${RM}r "$tmpdir" + fi + ;; + esac + done + + for file in $staticlibs; do + func_basename "$file" + name="$func_basename_result" + + # Set up the ranlib parameters. + oldlib="$destdir/$name" + + func_show_eval "$install_prog \$file \$oldlib" 'exit $?' + + if test -n "$stripme" && test -n "$old_striplib"; then + func_show_eval "$old_striplib $oldlib" 'exit $?' + fi + + # Do each command in the postinstall commands. + func_execute_cmds "$old_postinstall_cmds" 'exit $?' + done + + test -n "$future_libdirs" && \ + func_warning "remember to run \`$progname --finish$future_libdirs'" + + if test -n "$current_libdirs"; then + # Maybe just do a dry run. + $opt_dry_run && current_libdirs=" -n$current_libdirs" + exec_cmd='$SHELL $progpath $preserve_args --finish$current_libdirs' + else + exit $EXIT_SUCCESS + fi +} + +test "$opt_mode" = install && func_mode_install ${1+"$@"} + + +# func_generate_dlsyms outputname originator pic_p +# Extract symbols from dlprefiles and create ${outputname}S.o with +# a dlpreopen symbol table. +func_generate_dlsyms () +{ + $opt_debug + my_outputname="$1" + my_originator="$2" + my_pic_p="${3-no}" + my_prefix=`$ECHO "$my_originator" | sed 's%[^a-zA-Z0-9]%_%g'` + my_dlsyms= + + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + if test -n "$NM" && test -n "$global_symbol_pipe"; then + my_dlsyms="${my_outputname}S.c" + else + func_error "not configured to extract global symbols from dlpreopened files" + fi + fi + + if test -n "$my_dlsyms"; then + case $my_dlsyms in + "") ;; + *.c) + # Discover the nlist of each of the dlfiles. + nlist="$output_objdir/${my_outputname}.nm" + + func_show_eval "$RM $nlist ${nlist}S ${nlist}T" + + # Parse the name list into a source file. + func_verbose "creating $output_objdir/$my_dlsyms" + + $opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\ +/* $my_dlsyms - symbol resolution table for \`$my_outputname' dlsym emulation. */ +/* Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION */ + +#ifdef __cplusplus +extern \"C\" { +#endif + +#if defined(__GNUC__) && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ > 4)) +#pragma GCC diagnostic ignored \"-Wstrict-prototypes\" +#endif + +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) +/* DATA imports from DLLs on WIN32 con't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined(__osf__) +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +/* External symbol declarations for the compiler. */\ +" + + if test "$dlself" = yes; then + func_verbose "generating symbol list for \`$output'" + + $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist" + + # Add our own program objects to the symbol list. + progfiles=`$ECHO "$objs$old_deplibs" | $SP2NL | $SED "$lo2o" | $NL2SP` + for progfile in $progfiles; do + func_to_tool_file "$progfile" func_convert_file_msys_to_w32 + func_verbose "extracting global C symbols from \`$func_to_tool_file_result'" + $opt_dry_run || eval "$NM $func_to_tool_file_result | $global_symbol_pipe >> '$nlist'" + done + + if test -n "$exclude_expsyms"; then + $opt_dry_run || { + eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + if test -n "$export_symbols_regex"; then + $opt_dry_run || { + eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + export_symbols="$output_objdir/$outputname.exp" + $opt_dry_run || { + $RM $export_symbols + eval "${SED} -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' + case $host in + *cygwin* | *mingw* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"' + ;; + esac + } + else + $opt_dry_run || { + eval "${SED} -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"' + eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + case $host in + *cygwin* | *mingw* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$nlist" >> "$output_objdir/$outputname.def"' + ;; + esac + } + fi + fi + + for dlprefile in $dlprefiles; do + func_verbose "extracting global C symbols from \`$dlprefile'" + func_basename "$dlprefile" + name="$func_basename_result" + case $host in + *cygwin* | *mingw* | *cegcc* ) + # if an import library, we need to obtain dlname + if func_win32_import_lib_p "$dlprefile"; then + func_tr_sh "$dlprefile" + eval "curr_lafile=\$libfile_$func_tr_sh_result" + dlprefile_dlbasename="" + if test -n "$curr_lafile" && func_lalib_p "$curr_lafile"; then + # Use subshell, to avoid clobbering current variable values + dlprefile_dlname=`source "$curr_lafile" && echo "$dlname"` + if test -n "$dlprefile_dlname" ; then + func_basename "$dlprefile_dlname" + dlprefile_dlbasename="$func_basename_result" + else + # no lafile. user explicitly requested -dlpreopen . + $sharedlib_from_linklib_cmd "$dlprefile" + dlprefile_dlbasename=$sharedlib_from_linklib_result + fi + fi + $opt_dry_run || { + if test -n "$dlprefile_dlbasename" ; then + eval '$ECHO ": $dlprefile_dlbasename" >> "$nlist"' + else + func_warning "Could not compute DLL name from $name" + eval '$ECHO ": $name " >> "$nlist"' + fi + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe | + $SED -e '/I __imp/d' -e 's/I __nm_/D /;s/_nm__//' >> '$nlist'" + } + else # not an import lib + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + fi + ;; + *) + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + ;; + esac + done + + $opt_dry_run || { + # Make sure we have at least an empty file. + test -f "$nlist" || : > "$nlist" + + if test -n "$exclude_expsyms"; then + $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T + $MV "$nlist"T "$nlist" + fi + + # Try sorting and uniquifying the output. + if $GREP -v "^: " < "$nlist" | + if sort -k 3 /dev/null 2>&1; then + sort -k 3 + else + sort +2 + fi | + uniq > "$nlist"S; then + : + else + $GREP -v "^: " < "$nlist" > "$nlist"S + fi + + if test -f "$nlist"S; then + eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"' + else + echo '/* NONE */' >> "$output_objdir/$my_dlsyms" + fi + + echo >> "$output_objdir/$my_dlsyms" "\ + +/* The mapping between symbol names and symbols. */ +typedef struct { + const char *name; + void *address; +} lt_dlsymlist; +extern LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[]; +LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[] = +{\ + { \"$my_originator\", (void *) 0 }," + + case $need_lib_prefix in + no) + eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + *) + eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + esac + echo >> "$output_objdir/$my_dlsyms" "\ + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt_${my_prefix}_LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif\ +" + } # !$opt_dry_run + + pic_flag_for_symtable= + case "$compile_command " in + *" -static "*) ;; + *) + case $host in + # compiling the symbol table file with pic_flag works around + # a FreeBSD bug that causes programs to crash when -lm is + # linked before any other PIC object. But we must not use + # pic_flag when linking with -static. The problem exists in + # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. + *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) + pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;; + *-*-hpux*) + pic_flag_for_symtable=" $pic_flag" ;; + *) + if test "X$my_pic_p" != Xno; then + pic_flag_for_symtable=" $pic_flag" + fi + ;; + esac + ;; + esac + symtab_cflags= + for arg in $LTCFLAGS; do + case $arg in + -pie | -fpie | -fPIE) ;; + *) func_append symtab_cflags " $arg" ;; + esac + done + + # Now compile the dynamic symbol file. + func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?' + + # Clean up the generated files. + func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T"' + + # Transform the symbol file into the correct name. + symfileobj="$output_objdir/${my_outputname}S.$objext" + case $host in + *cygwin* | *mingw* | *cegcc* ) + if test -f "$output_objdir/$my_outputname.def"; then + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + else + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + fi + ;; + *) + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + ;; + esac + ;; + *) + func_fatal_error "unknown suffix for \`$my_dlsyms'" + ;; + esac + else + # We keep going just in case the user didn't refer to + # lt_preloaded_symbols. The linker will fail if global_symbol_pipe + # really was required. + + # Nullify the symbol file. + compile_command=`$ECHO "$compile_command" | $SED "s% @SYMFILE@%%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s% @SYMFILE@%%"` + fi +} + +# func_win32_libid arg +# return the library type of file 'arg' +# +# Need a lot of goo to handle *both* DLLs and import libs +# Has to be a shell function in order to 'eat' the argument +# that is supplied when $file_magic_command is called. +# Despite the name, also deal with 64 bit binaries. +func_win32_libid () +{ + $opt_debug + win32_libid_type="unknown" + win32_fileres=`file -L $1 2>/dev/null` + case $win32_fileres in + *ar\ archive\ import\ library*) # definitely import + win32_libid_type="x86 archive import" + ;; + *ar\ archive*) # could be an import, or static + # Keep the egrep pattern in sync with the one in _LT_CHECK_MAGIC_METHOD. + if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null | + $EGREP 'file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' >/dev/null; then + func_to_tool_file "$1" func_convert_file_msys_to_w32 + win32_nmres=`eval $NM -f posix -A \"$func_to_tool_file_result\" | + $SED -n -e ' + 1,100{ + / I /{ + s,.*,import, + p + q + } + }'` + case $win32_nmres in + import*) win32_libid_type="x86 archive import";; + *) win32_libid_type="x86 archive static";; + esac + fi + ;; + *DLL*) + win32_libid_type="x86 DLL" + ;; + *executable*) # but shell scripts are "executable" too... + case $win32_fileres in + *MS\ Windows\ PE\ Intel*) + win32_libid_type="x86 DLL" + ;; + esac + ;; + esac + $ECHO "$win32_libid_type" +} + +# func_cygming_dll_for_implib ARG +# +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib () +{ + $opt_debug + sharedlib_from_linklib_result=`$DLLTOOL --identify-strict --identify "$1"` +} + +# func_cygming_dll_for_implib_fallback_core SECTION_NAME LIBNAMEs +# +# The is the core of a fallback implementation of a +# platform-specific function to extract the name of the +# DLL associated with the specified import library LIBNAME. +# +# SECTION_NAME is either .idata$6 or .idata$7, depending +# on the platform and compiler that created the implib. +# +# Echos the name of the DLL associated with the +# specified import library. +func_cygming_dll_for_implib_fallback_core () +{ + $opt_debug + match_literal=`$ECHO "$1" | $SED "$sed_make_literal_regex"` + $OBJDUMP -s --section "$1" "$2" 2>/dev/null | + $SED '/^Contents of section '"$match_literal"':/{ + # Place marker at beginning of archive member dllname section + s/.*/====MARK====/ + p + d + } + # These lines can sometimes be longer than 43 characters, but + # are always uninteresting + /:[ ]*file format pe[i]\{,1\}-/d + /^In archive [^:]*:/d + # Ensure marker is printed + /^====MARK====/p + # Remove all lines with less than 43 characters + /^.\{43\}/!d + # From remaining lines, remove first 43 characters + s/^.\{43\}//' | + $SED -n ' + # Join marker and all lines until next marker into a single line + /^====MARK====/ b para + H + $ b para + b + :para + x + s/\n//g + # Remove the marker + s/^====MARK====// + # Remove trailing dots and whitespace + s/[\. \t]*$// + # Print + /./p' | + # we now have a list, one entry per line, of the stringified + # contents of the appropriate section of all members of the + # archive which possess that section. Heuristic: eliminate + # all those which have a first or second character that is + # a '.' (that is, objdump's representation of an unprintable + # character.) This should work for all archives with less than + # 0x302f exports -- but will fail for DLLs whose name actually + # begins with a literal '.' or a single character followed by + # a '.'. + # + # Of those that remain, print the first one. + $SED -e '/^\./d;/^.\./d;q' +} + +# func_cygming_gnu_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is a GNU/binutils-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_gnu_implib_p () +{ + $opt_debug + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_gnu_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $EGREP ' (_head_[A-Za-z0-9_]+_[ad]l*|[A-Za-z0-9_]+_[ad]l*_iname)$'` + test -n "$func_cygming_gnu_implib_tmp" +} + +# func_cygming_ms_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is an MS-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_ms_implib_p () +{ + $opt_debug + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_ms_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $GREP '_NULL_IMPORT_DESCRIPTOR'` + test -n "$func_cygming_ms_implib_tmp" +} + +# func_cygming_dll_for_implib_fallback ARG +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# +# This fallback implementation is for use when $DLLTOOL +# does not support the --identify-strict option. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib_fallback () +{ + $opt_debug + if func_cygming_gnu_implib_p "$1" ; then + # binutils import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$7' "$1"` + elif func_cygming_ms_implib_p "$1" ; then + # ms-generated import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$6' "$1"` + else + # unknown + sharedlib_from_linklib_result="" + fi +} + + +# func_extract_an_archive dir oldlib +func_extract_an_archive () +{ + $opt_debug + f_ex_an_ar_dir="$1"; shift + f_ex_an_ar_oldlib="$1" + if test "$lock_old_archive_extraction" = yes; then + lockfile=$f_ex_an_ar_oldlib.lock + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + fi + func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" \ + 'stat=$?; rm -f "$lockfile"; exit $stat' + if test "$lock_old_archive_extraction" = yes; then + $opt_dry_run || rm -f "$lockfile" + fi + if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then + : + else + func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib" + fi +} + + +# func_extract_archives gentop oldlib ... +func_extract_archives () +{ + $opt_debug + my_gentop="$1"; shift + my_oldlibs=${1+"$@"} + my_oldobjs="" + my_xlib="" + my_xabs="" + my_xdir="" + + for my_xlib in $my_oldlibs; do + # Extract the objects. + case $my_xlib in + [\\/]* | [A-Za-z]:[\\/]*) my_xabs="$my_xlib" ;; + *) my_xabs=`pwd`"/$my_xlib" ;; + esac + func_basename "$my_xlib" + my_xlib="$func_basename_result" + my_xlib_u=$my_xlib + while :; do + case " $extracted_archives " in + *" $my_xlib_u "*) + func_arith $extracted_serial + 1 + extracted_serial=$func_arith_result + my_xlib_u=lt$extracted_serial-$my_xlib ;; + *) break ;; + esac + done + extracted_archives="$extracted_archives $my_xlib_u" + my_xdir="$my_gentop/$my_xlib_u" + + func_mkdir_p "$my_xdir" + + case $host in + *-darwin*) + func_verbose "Extracting $my_xabs" + # Do not bother doing anything if just a dry run + $opt_dry_run || { + darwin_orig_dir=`pwd` + cd $my_xdir || exit $? + darwin_archive=$my_xabs + darwin_curdir=`pwd` + darwin_base_archive=`basename "$darwin_archive"` + darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true` + if test -n "$darwin_arches"; then + darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'` + darwin_arch= + func_verbose "$darwin_base_archive has multiple architectures $darwin_arches" + for darwin_arch in $darwin_arches ; do + func_mkdir_p "unfat-$$/${darwin_base_archive}-${darwin_arch}" + $LIPO -thin $darwin_arch -output "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" "${darwin_archive}" + cd "unfat-$$/${darwin_base_archive}-${darwin_arch}" + func_extract_an_archive "`pwd`" "${darwin_base_archive}" + cd "$darwin_curdir" + $RM "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" + done # $darwin_arches + ## Okay now we've a bunch of thin objects, gotta fatten them up :) + darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$basename" | sort -u` + darwin_file= + darwin_files= + for darwin_file in $darwin_filelist; do + darwin_files=`find unfat-$$ -name $darwin_file -print | sort | $NL2SP` + $LIPO -create -output "$darwin_file" $darwin_files + done # $darwin_filelist + $RM -rf unfat-$$ + cd "$darwin_orig_dir" + else + cd $darwin_orig_dir + func_extract_an_archive "$my_xdir" "$my_xabs" + fi # $darwin_arches + } # !$opt_dry_run + ;; + *) + func_extract_an_archive "$my_xdir" "$my_xabs" + ;; + esac + my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | sort | $NL2SP` + done + + func_extract_archives_result="$my_oldobjs" +} + + +# func_emit_wrapper [arg=no] +# +# Emit a libtool wrapper script on stdout. +# Don't directly open a file because we may want to +# incorporate the script contents within a cygwin/mingw +# wrapper executable. Must ONLY be called from within +# func_mode_link because it depends on a number of variables +# set therein. +# +# ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR +# variable will take. If 'yes', then the emitted script +# will assume that the directory in which it is stored is +# the $objdir directory. This is a cygwin/mingw-specific +# behavior. +func_emit_wrapper () +{ + func_emit_wrapper_arg1=${1-no} + + $ECHO "\ +#! $SHELL + +# $output - temporary wrapper script for $objdir/$outputname +# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION +# +# The $output program cannot be directly executed until all the libtool +# libraries that it depends on are installed. +# +# This wrapper script should never be moved out of the build directory. +# If it is, it will not operate correctly. + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='$sed_quote_subst' + +# Be Bourne compatible +if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +relink_command=\"$relink_command\" + +# This environment variable determines our operation mode. +if test \"\$libtool_install_magic\" = \"$magic\"; then + # install mode needs the following variables: + generated_by_libtool_version='$macro_version' + notinst_deplibs='$notinst_deplibs' +else + # When we are sourced in execute mode, \$file and \$ECHO are already set. + if test \"\$libtool_execute_magic\" != \"$magic\"; then + file=\"\$0\"" + + qECHO=`$ECHO "$ECHO" | $SED "$sed_quote_subst"` + $ECHO "\ + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + ECHO=\"$qECHO\" + fi + +# Very basic option parsing. These options are (a) specific to +# the libtool wrapper, (b) are identical between the wrapper +# /script/ and the wrapper /executable/ which is used only on +# windows platforms, and (c) all begin with the string "--lt-" +# (application programs are unlikely to have options which match +# this pattern). +# +# There are only two supported options: --lt-debug and +# --lt-dump-script. There is, deliberately, no --lt-help. +# +# The first argument to this parsing function should be the +# script's $0 value, followed by "$@". +lt_option_debug= +func_parse_lt_options () +{ + lt_script_arg0=\$0 + shift + for lt_opt + do + case \"\$lt_opt\" in + --lt-debug) lt_option_debug=1 ;; + --lt-dump-script) + lt_dump_D=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%/[^/]*$%%'\` + test \"X\$lt_dump_D\" = \"X\$lt_script_arg0\" && lt_dump_D=. + lt_dump_F=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%^.*/%%'\` + cat \"\$lt_dump_D/\$lt_dump_F\" + exit 0 + ;; + --lt-*) + \$ECHO \"Unrecognized --lt- option: '\$lt_opt'\" 1>&2 + exit 1 + ;; + esac + done + + # Print the debug banner immediately: + if test -n \"\$lt_option_debug\"; then + echo \"${outputname}:${output}:\${LINENO}: libtool wrapper (GNU $PACKAGE$TIMESTAMP) $VERSION\" 1>&2 + fi +} + +# Used when --lt-debug. Prints its arguments to stdout +# (redirection is the responsibility of the caller) +func_lt_dump_args () +{ + lt_dump_args_N=1; + for lt_arg + do + \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[\$lt_dump_args_N]: \$lt_arg\" + lt_dump_args_N=\`expr \$lt_dump_args_N + 1\` + done +} + +# Core function for launching the target application +func_exec_program_core () +{ +" + case $host in + # Backslashes separate directories on plain windows + *-*-mingw | *-*-os2* | *-cegcc*) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir\\\\\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir\\\\\$program\" \${1+\"\$@\"} +" + ;; + + *) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir/\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir/\$program\" \${1+\"\$@\"} +" + ;; + esac + $ECHO "\ + \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2 + exit 1 +} + +# A function to encapsulate launching the target application +# Strips options in the --lt-* namespace from \$@ and +# launches target application with the remaining arguments. +func_exec_program () +{ + for lt_wr_arg + do + case \$lt_wr_arg in + --lt-*) ;; + *) set x \"\$@\" \"\$lt_wr_arg\"; shift;; + esac + shift + done + func_exec_program_core \${1+\"\$@\"} +} + + # Parse options + func_parse_lt_options \"\$0\" \${1+\"\$@\"} + + # Find the directory that this script lives in. + thisdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*$%%'\` + test \"x\$thisdir\" = \"x\$file\" && thisdir=. + + # Follow symbolic links until we get to the real thisdir. + file=\`ls -ld \"\$file\" | $SED -n 's/.*-> //p'\` + while test -n \"\$file\"; do + destdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*\$%%'\` + + # If there was a directory component, then change thisdir. + if test \"x\$destdir\" != \"x\$file\"; then + case \"\$destdir\" in + [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;; + *) thisdir=\"\$thisdir/\$destdir\" ;; + esac + fi + + file=\`\$ECHO \"\$file\" | $SED 's%^.*/%%'\` + file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\` + done + + # Usually 'no', except on cygwin/mingw when embedded into + # the cwrapper. + WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1 + if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then + # special case for '.' + if test \"\$thisdir\" = \".\"; then + thisdir=\`pwd\` + fi + # remove .libs from thisdir + case \"\$thisdir\" in + *[\\\\/]$objdir ) thisdir=\`\$ECHO \"\$thisdir\" | $SED 's%[\\\\/][^\\\\/]*$%%'\` ;; + $objdir ) thisdir=. ;; + esac + fi + + # Try to get the absolute directory name. + absdir=\`cd \"\$thisdir\" && pwd\` + test -n \"\$absdir\" && thisdir=\"\$absdir\" +" + + if test "$fast_install" = yes; then + $ECHO "\ + program=lt-'$outputname'$exeext + progdir=\"\$thisdir/$objdir\" + + if test ! -f \"\$progdir/\$program\" || + { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\ + test \"X\$file\" != \"X\$progdir/\$program\"; }; then + + file=\"\$\$-\$program\" + + if test ! -d \"\$progdir\"; then + $MKDIR \"\$progdir\" + else + $RM \"\$progdir/\$file\" + fi" + + $ECHO "\ + + # relink executable if necessary + if test -n \"\$relink_command\"; then + if relink_command_output=\`eval \$relink_command 2>&1\`; then : + else + $ECHO \"\$relink_command_output\" >&2 + $RM \"\$progdir/\$file\" + exit 1 + fi + fi + + $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || + { $RM \"\$progdir/\$program\"; + $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; } + $RM \"\$progdir/\$file\" + fi" + else + $ECHO "\ + program='$outputname' + progdir=\"\$thisdir/$objdir\" +" + fi + + $ECHO "\ + + if test -f \"\$progdir/\$program\"; then" + + # fixup the dll searchpath if we need to. + # + # Fix the DLL searchpath if we need to. Do this before prepending + # to shlibpath, because on Windows, both are PATH and uninstalled + # libraries must come first. + if test -n "$dllsearchpath"; then + $ECHO "\ + # Add the dll search path components to the executable PATH + PATH=$dllsearchpath:\$PATH +" + fi + + # Export our shlibpath_var if we have one. + if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then + $ECHO "\ + # Add our own library path to $shlibpath_var + $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" + + # Some systems cannot cope with colon-terminated $shlibpath_var + # The second colon is a workaround for a bug in BeOS R4 sed + $shlibpath_var=\`\$ECHO \"\$$shlibpath_var\" | $SED 's/::*\$//'\` + + export $shlibpath_var +" + fi + + $ECHO "\ + if test \"\$libtool_execute_magic\" != \"$magic\"; then + # Run the actual program with our arguments. + func_exec_program \${1+\"\$@\"} + fi + else + # The program doesn't exist. + \$ECHO \"\$0: error: \\\`\$progdir/\$program' does not exist\" 1>&2 + \$ECHO \"This script is just a wrapper for \$program.\" 1>&2 + \$ECHO \"See the $PACKAGE documentation for more information.\" 1>&2 + exit 1 + fi +fi\ +" +} + + +# func_emit_cwrapperexe_src +# emit the source code for a wrapper executable on stdout +# Must ONLY be called from within func_mode_link because +# it depends on a number of variable set therein. +func_emit_cwrapperexe_src () +{ + cat < +#include +#ifdef _MSC_VER +# include +# include +# include +#else +# include +# include +# ifdef __CYGWIN__ +# include +# endif +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +/* declarations of non-ANSI functions */ +#if defined(__MINGW32__) +# ifdef __STRICT_ANSI__ +int _putenv (const char *); +# endif +#elif defined(__CYGWIN__) +# ifdef __STRICT_ANSI__ +char *realpath (const char *, char *); +int putenv (char *); +int setenv (const char *, const char *, int); +# endif +/* #elif defined (other platforms) ... */ +#endif + +/* portability defines, excluding path handling macros */ +#if defined(_MSC_VER) +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +# define S_IXUSR _S_IEXEC +# ifndef _INTPTR_T_DEFINED +# define _INTPTR_T_DEFINED +# define intptr_t int +# endif +#elif defined(__MINGW32__) +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +#elif defined(__CYGWIN__) +# define HAVE_SETENV +# define FOPEN_WB "wb" +/* #elif defined (other platforms) ... */ +#endif + +#if defined(PATH_MAX) +# define LT_PATHMAX PATH_MAX +#elif defined(MAXPATHLEN) +# define LT_PATHMAX MAXPATHLEN +#else +# define LT_PATHMAX 1024 +#endif + +#ifndef S_IXOTH +# define S_IXOTH 0 +#endif +#ifndef S_IXGRP +# define S_IXGRP 0 +#endif + +/* path handling portability macros */ +#ifndef DIR_SEPARATOR +# define DIR_SEPARATOR '/' +# define PATH_SEPARATOR ':' +#endif + +#if defined (_WIN32) || defined (__MSDOS__) || defined (__DJGPP__) || \ + defined (__OS2__) +# define HAVE_DOS_BASED_FILE_SYSTEM +# define FOPEN_WB "wb" +# ifndef DIR_SEPARATOR_2 +# define DIR_SEPARATOR_2 '\\' +# endif +# ifndef PATH_SEPARATOR_2 +# define PATH_SEPARATOR_2 ';' +# endif +#endif + +#ifndef DIR_SEPARATOR_2 +# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR) +#else /* DIR_SEPARATOR_2 */ +# define IS_DIR_SEPARATOR(ch) \ + (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2)) +#endif /* DIR_SEPARATOR_2 */ + +#ifndef PATH_SEPARATOR_2 +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR) +#else /* PATH_SEPARATOR_2 */ +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2) +#endif /* PATH_SEPARATOR_2 */ + +#ifndef FOPEN_WB +# define FOPEN_WB "w" +#endif +#ifndef _O_BINARY +# define _O_BINARY 0 +#endif + +#define XMALLOC(type, num) ((type *) xmalloc ((num) * sizeof(type))) +#define XFREE(stale) do { \ + if (stale) { free ((void *) stale); stale = 0; } \ +} while (0) + +#if defined(LT_DEBUGWRAPPER) +static int lt_debug = 1; +#else +static int lt_debug = 0; +#endif + +const char *program_name = "libtool-wrapper"; /* in case xstrdup fails */ + +void *xmalloc (size_t num); +char *xstrdup (const char *string); +const char *base_name (const char *name); +char *find_executable (const char *wrapper); +char *chase_symlinks (const char *pathspec); +int make_executable (const char *path); +int check_executable (const char *path); +char *strendzap (char *str, const char *pat); +void lt_debugprintf (const char *file, int line, const char *fmt, ...); +void lt_fatal (const char *file, int line, const char *message, ...); +static const char *nonnull (const char *s); +static const char *nonempty (const char *s); +void lt_setenv (const char *name, const char *value); +char *lt_extend_str (const char *orig_value, const char *add, int to_end); +void lt_update_exe_path (const char *name, const char *value); +void lt_update_lib_path (const char *name, const char *value); +char **prepare_spawn (char **argv); +void lt_dump_script (FILE *f); +EOF + + cat <= 0) + && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) + return 1; + else + return 0; +} + +int +make_executable (const char *path) +{ + int rval = 0; + struct stat st; + + lt_debugprintf (__FILE__, __LINE__, "(make_executable): %s\n", + nonempty (path)); + if ((!path) || (!*path)) + return 0; + + if (stat (path, &st) >= 0) + { + rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR); + } + return rval; +} + +/* Searches for the full path of the wrapper. Returns + newly allocated full path name if found, NULL otherwise + Does not chase symlinks, even on platforms that support them. +*/ +char * +find_executable (const char *wrapper) +{ + int has_slash = 0; + const char *p; + const char *p_next; + /* static buffer for getcwd */ + char tmp[LT_PATHMAX + 1]; + int tmp_len; + char *concat_name; + + lt_debugprintf (__FILE__, __LINE__, "(find_executable): %s\n", + nonempty (wrapper)); + + if ((wrapper == NULL) || (*wrapper == '\0')) + return NULL; + + /* Absolute path? */ +#if defined (HAVE_DOS_BASED_FILE_SYSTEM) + if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':') + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + else + { +#endif + if (IS_DIR_SEPARATOR (wrapper[0])) + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } +#if defined (HAVE_DOS_BASED_FILE_SYSTEM) + } +#endif + + for (p = wrapper; *p; p++) + if (*p == '/') + { + has_slash = 1; + break; + } + if (!has_slash) + { + /* no slashes; search PATH */ + const char *path = getenv ("PATH"); + if (path != NULL) + { + for (p = path; *p; p = p_next) + { + const char *q; + size_t p_len; + for (q = p; *q; q++) + if (IS_PATH_SEPARATOR (*q)) + break; + p_len = q - p; + p_next = (*q == '\0' ? q : q + 1); + if (p_len == 0) + { + /* empty path: current directory */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = + XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + } + else + { + concat_name = + XMALLOC (char, p_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, p, p_len); + concat_name[p_len] = '/'; + strcpy (concat_name + p_len + 1, wrapper); + } + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + } + /* not found in PATH; assume curdir */ + } + /* Relative path | not found in path: prepend cwd */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + return NULL; +} + +char * +chase_symlinks (const char *pathspec) +{ +#ifndef S_ISLNK + return xstrdup (pathspec); +#else + char buf[LT_PATHMAX]; + struct stat s; + char *tmp_pathspec = xstrdup (pathspec); + char *p; + int has_symlinks = 0; + while (strlen (tmp_pathspec) && !has_symlinks) + { + lt_debugprintf (__FILE__, __LINE__, + "checking path component for symlinks: %s\n", + tmp_pathspec); + if (lstat (tmp_pathspec, &s) == 0) + { + if (S_ISLNK (s.st_mode) != 0) + { + has_symlinks = 1; + break; + } + + /* search backwards for last DIR_SEPARATOR */ + p = tmp_pathspec + strlen (tmp_pathspec) - 1; + while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + p--; + if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + { + /* no more DIR_SEPARATORS left */ + break; + } + *p = '\0'; + } + else + { + lt_fatal (__FILE__, __LINE__, + "error accessing file \"%s\": %s", + tmp_pathspec, nonnull (strerror (errno))); + } + } + XFREE (tmp_pathspec); + + if (!has_symlinks) + { + return xstrdup (pathspec); + } + + tmp_pathspec = realpath (pathspec, buf); + if (tmp_pathspec == 0) + { + lt_fatal (__FILE__, __LINE__, + "could not follow symlinks for %s", pathspec); + } + return xstrdup (tmp_pathspec); +#endif +} + +char * +strendzap (char *str, const char *pat) +{ + size_t len, patlen; + + assert (str != NULL); + assert (pat != NULL); + + len = strlen (str); + patlen = strlen (pat); + + if (patlen <= len) + { + str += len - patlen; + if (strcmp (str, pat) == 0) + *str = '\0'; + } + return str; +} + +void +lt_debugprintf (const char *file, int line, const char *fmt, ...) +{ + va_list args; + if (lt_debug) + { + (void) fprintf (stderr, "%s:%s:%d: ", program_name, file, line); + va_start (args, fmt); + (void) vfprintf (stderr, fmt, args); + va_end (args); + } +} + +static void +lt_error_core (int exit_status, const char *file, + int line, const char *mode, + const char *message, va_list ap) +{ + fprintf (stderr, "%s:%s:%d: %s: ", program_name, file, line, mode); + vfprintf (stderr, message, ap); + fprintf (stderr, ".\n"); + + if (exit_status >= 0) + exit (exit_status); +} + +void +lt_fatal (const char *file, int line, const char *message, ...) +{ + va_list ap; + va_start (ap, message); + lt_error_core (EXIT_FAILURE, file, line, "FATAL", message, ap); + va_end (ap); +} + +static const char * +nonnull (const char *s) +{ + return s ? s : "(null)"; +} + +static const char * +nonempty (const char *s) +{ + return (s && !*s) ? "(empty)" : nonnull (s); +} + +void +lt_setenv (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_setenv) setting '%s' to '%s'\n", + nonnull (name), nonnull (value)); + { +#ifdef HAVE_SETENV + /* always make a copy, for consistency with !HAVE_SETENV */ + char *str = xstrdup (value); + setenv (name, str, 1); +#else + int len = strlen (name) + 1 + strlen (value) + 1; + char *str = XMALLOC (char, len); + sprintf (str, "%s=%s", name, value); + if (putenv (str) != EXIT_SUCCESS) + { + XFREE (str); + } +#endif + } +} + +char * +lt_extend_str (const char *orig_value, const char *add, int to_end) +{ + char *new_value; + if (orig_value && *orig_value) + { + int orig_value_len = strlen (orig_value); + int add_len = strlen (add); + new_value = XMALLOC (char, add_len + orig_value_len + 1); + if (to_end) + { + strcpy (new_value, orig_value); + strcpy (new_value + orig_value_len, add); + } + else + { + strcpy (new_value, add); + strcpy (new_value + add_len, orig_value); + } + } + else + { + new_value = xstrdup (add); + } + return new_value; +} + +void +lt_update_exe_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_exe_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + /* some systems can't cope with a ':'-terminated path #' */ + int len = strlen (new_value); + while (((len = strlen (new_value)) > 0) && IS_PATH_SEPARATOR (new_value[len-1])) + { + new_value[len-1] = '\0'; + } + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +void +lt_update_lib_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_lib_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +EOF + case $host_os in + mingw*) + cat <<"EOF" + +/* Prepares an argument vector before calling spawn(). + Note that spawn() does not by itself call the command interpreter + (getenv ("COMSPEC") != NULL ? getenv ("COMSPEC") : + ({ OSVERSIONINFO v; v.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + GetVersionEx(&v); + v.dwPlatformId == VER_PLATFORM_WIN32_NT; + }) ? "cmd.exe" : "command.com"). + Instead it simply concatenates the arguments, separated by ' ', and calls + CreateProcess(). We must quote the arguments since Win32 CreateProcess() + interprets characters like ' ', '\t', '\\', '"' (but not '<' and '>') in a + special way: + - Space and tab are interpreted as delimiters. They are not treated as + delimiters if they are surrounded by double quotes: "...". + - Unescaped double quotes are removed from the input. Their only effect is + that within double quotes, space and tab are treated like normal + characters. + - Backslashes not followed by double quotes are not special. + - But 2*n+1 backslashes followed by a double quote become + n backslashes followed by a double quote (n >= 0): + \" -> " + \\\" -> \" + \\\\\" -> \\" + */ +#define SHELL_SPECIAL_CHARS "\"\\ \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +#define SHELL_SPACE_CHARS " \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +char ** +prepare_spawn (char **argv) +{ + size_t argc; + char **new_argv; + size_t i; + + /* Count number of arguments. */ + for (argc = 0; argv[argc] != NULL; argc++) + ; + + /* Allocate new argument vector. */ + new_argv = XMALLOC (char *, argc + 1); + + /* Put quoted arguments into the new argument vector. */ + for (i = 0; i < argc; i++) + { + const char *string = argv[i]; + + if (string[0] == '\0') + new_argv[i] = xstrdup ("\"\""); + else if (strpbrk (string, SHELL_SPECIAL_CHARS) != NULL) + { + int quote_around = (strpbrk (string, SHELL_SPACE_CHARS) != NULL); + size_t length; + unsigned int backslashes; + const char *s; + char *quoted_string; + char *p; + + length = 0; + backslashes = 0; + if (quote_around) + length++; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + length += backslashes + 1; + length++; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + length += backslashes + 1; + + quoted_string = XMALLOC (char, length + 1); + + p = quoted_string; + backslashes = 0; + if (quote_around) + *p++ = '"'; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + { + unsigned int j; + for (j = backslashes + 1; j > 0; j--) + *p++ = '\\'; + } + *p++ = c; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + { + unsigned int j; + for (j = backslashes; j > 0; j--) + *p++ = '\\'; + *p++ = '"'; + } + *p = '\0'; + + new_argv[i] = quoted_string; + } + else + new_argv[i] = (char *) string; + } + new_argv[argc] = NULL; + + return new_argv; +} +EOF + ;; + esac + + cat <<"EOF" +void lt_dump_script (FILE* f) +{ +EOF + func_emit_wrapper yes | + $SED -e 's/\([\\"]\)/\\\1/g' \ + -e 's/^/ fputs ("/' -e 's/$/\\n", f);/' + + cat <<"EOF" +} +EOF +} +# end: func_emit_cwrapperexe_src + +# func_win32_import_lib_p ARG +# True if ARG is an import lib, as indicated by $file_magic_cmd +func_win32_import_lib_p () +{ + $opt_debug + case `eval $file_magic_cmd \"\$1\" 2>/dev/null | $SED -e 10q` in + *import*) : ;; + *) false ;; + esac +} + +# func_mode_link arg... +func_mode_link () +{ + $opt_debug + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + # It is impossible to link a dll without this setting, and + # we shouldn't force the makefile maintainer to figure out + # which system we are compiling for in order to pass an extra + # flag for every libtool invocation. + # allow_undefined=no + + # FIXME: Unfortunately, there are problems with the above when trying + # to make a dll which has undefined symbols, in which case not + # even a static library is built. For now, we need to specify + # -no-undefined on the libtool link line when we can be certain + # that all symbols are satisfied, otherwise we get a static library. + allow_undefined=yes + ;; + *) + allow_undefined=yes + ;; + esac + libtool_args=$nonopt + base_compile="$nonopt $@" + compile_command=$nonopt + finalize_command=$nonopt + + compile_rpath= + finalize_rpath= + compile_shlibpath= + finalize_shlibpath= + convenience= + old_convenience= + deplibs= + old_deplibs= + compiler_flags= + linker_flags= + dllsearchpath= + lib_search_path=`pwd` + inst_prefix_dir= + new_inherited_linker_flags= + + avoid_version=no + bindir= + dlfiles= + dlprefiles= + dlself=no + export_dynamic=no + export_symbols= + export_symbols_regex= + generated= + libobjs= + ltlibs= + module=no + no_install=no + objs= + non_pic_objects= + precious_files_regex= + prefer_static_libs=no + preload=no + prev= + prevarg= + release= + rpath= + xrpath= + perm_rpath= + temp_rpath= + thread_safe=no + vinfo= + vinfo_number=no + weak_libs= + single_module="${wl}-single_module" + func_infer_tag $base_compile + + # We need to know -static, to get the right output filenames. + for arg + do + case $arg in + -shared) + test "$build_libtool_libs" != yes && \ + func_fatal_configuration "can not build a shared library" + build_old_libs=no + break + ;; + -all-static | -static | -static-libtool-libs) + case $arg in + -all-static) + if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then + func_warning "complete static linking is impossible in this configuration" + fi + if test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + -static) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=built + ;; + -static-libtool-libs) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + esac + build_libtool_libs=no + build_old_libs=yes + break + ;; + esac + done + + # See if our shared archives depend on static archives. + test -n "$old_archive_from_new_cmds" && build_old_libs=yes + + # Go through the arguments, transforming them on the way. + while test "$#" -gt 0; do + arg="$1" + shift + func_quote_for_eval "$arg" + qarg=$func_quote_for_eval_unquoted_result + func_append libtool_args " $func_quote_for_eval_result" + + # If the previous option needs an argument, assign it. + if test -n "$prev"; then + case $prev in + output) + func_append compile_command " @OUTPUT@" + func_append finalize_command " @OUTPUT@" + ;; + esac + + case $prev in + bindir) + bindir="$arg" + prev= + continue + ;; + dlfiles|dlprefiles) + if test "$preload" = no; then + # Add the symbol object into the linking commands. + func_append compile_command " @SYMFILE@" + func_append finalize_command " @SYMFILE@" + preload=yes + fi + case $arg in + *.la | *.lo) ;; # We handle these cases below. + force) + if test "$dlself" = no; then + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + self) + if test "$prev" = dlprefiles; then + dlself=yes + elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then + dlself=yes + else + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + *) + if test "$prev" = dlfiles; then + func_append dlfiles " $arg" + else + func_append dlprefiles " $arg" + fi + prev= + continue + ;; + esac + ;; + expsyms) + export_symbols="$arg" + test -f "$arg" \ + || func_fatal_error "symbol file \`$arg' does not exist" + prev= + continue + ;; + expsyms_regex) + export_symbols_regex="$arg" + prev= + continue + ;; + framework) + case $host in + *-*-darwin*) + case "$deplibs " in + *" $qarg.ltframework "*) ;; + *) func_append deplibs " $qarg.ltframework" # this is fixed later + ;; + esac + ;; + esac + prev= + continue + ;; + inst_prefix) + inst_prefix_dir="$arg" + prev= + continue + ;; + objectlist) + if test -f "$arg"; then + save_arg=$arg + moreargs= + for fil in `cat "$save_arg"` + do +# func_append moreargs " $fil" + arg=$fil + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test "$pic_object" = none && + test "$non_pic_object" = none; then + func_fatal_error "cannot find name of object for \`$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir="$func_dirname_result" + + if test "$pic_object" != none; then + # Prepend the subdirectory the object is found in. + pic_object="$xdir$pic_object" + + if test "$prev" = dlfiles; then + if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test "$prev" = dlprefiles; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg="$pic_object" + fi + + # Non-PIC object. + if test "$non_pic_object" != none; then + # Prepend the subdirectory the object is found in. + non_pic_object="$xdir$non_pic_object" + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test "$pic_object" = none ; then + arg="$non_pic_object" + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object="$pic_object" + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir="$func_dirname_result" + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "\`$arg' is not a valid libtool object" + fi + fi + done + else + func_fatal_error "link input file \`$arg' does not exist" + fi + arg=$save_arg + prev= + continue + ;; + precious_regex) + precious_files_regex="$arg" + prev= + continue + ;; + release) + release="-$arg" + prev= + continue + ;; + rpath | xrpath) + # We need an absolute path. + case $arg in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + if test "$prev" = rpath; then + case "$rpath " in + *" $arg "*) ;; + *) func_append rpath " $arg" ;; + esac + else + case "$xrpath " in + *" $arg "*) ;; + *) func_append xrpath " $arg" ;; + esac + fi + prev= + continue + ;; + shrext) + shrext_cmds="$arg" + prev= + continue + ;; + weak) + func_append weak_libs " $arg" + prev= + continue + ;; + xcclinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xcompiler) + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xlinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $wl$qarg" + prev= + func_append compile_command " $wl$qarg" + func_append finalize_command " $wl$qarg" + continue + ;; + *) + eval "$prev=\"\$arg\"" + prev= + continue + ;; + esac + fi # test -n "$prev" + + prevarg="$arg" + + case $arg in + -all-static) + if test -n "$link_static_flag"; then + # See comment for -static flag below, for more details. + func_append compile_command " $link_static_flag" + func_append finalize_command " $link_static_flag" + fi + continue + ;; + + -allow-undefined) + # FIXME: remove this flag sometime in the future. + func_fatal_error "\`-allow-undefined' must not be used because it is the default" + ;; + + -avoid-version) + avoid_version=yes + continue + ;; + + -bindir) + prev=bindir + continue + ;; + + -dlopen) + prev=dlfiles + continue + ;; + + -dlpreopen) + prev=dlprefiles + continue + ;; + + -export-dynamic) + export_dynamic=yes + continue + ;; + + -export-symbols | -export-symbols-regex) + if test -n "$export_symbols" || test -n "$export_symbols_regex"; then + func_fatal_error "more than one -exported-symbols argument is not allowed" + fi + if test "X$arg" = "X-export-symbols"; then + prev=expsyms + else + prev=expsyms_regex + fi + continue + ;; + + -framework) + prev=framework + continue + ;; + + -inst-prefix-dir) + prev=inst_prefix + continue + ;; + + # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:* + # so, if we see these flags be careful not to treat them like -L + -L[A-Z][A-Z]*:*) + case $with_gcc/$host in + no/*-*-irix* | /*-*-irix*) + func_append compile_command " $arg" + func_append finalize_command " $arg" + ;; + esac + continue + ;; + + -L*) + func_stripname "-L" '' "$arg" + if test -z "$func_stripname_result"; then + if test "$#" -gt 0; then + func_fatal_error "require no space between \`-L' and \`$1'" + else + func_fatal_error "need path for \`-L' option" + fi + fi + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + absdir=`cd "$dir" && pwd` + test -z "$absdir" && \ + func_fatal_error "cannot determine absolute directory name of \`$dir'" + dir="$absdir" + ;; + esac + case "$deplibs " in + *" -L$dir "* | *" $arg "*) + # Will only happen for absolute or sysroot arguments + ;; + *) + # Preserve sysroot, but never include relative directories + case $dir in + [\\/]* | [A-Za-z]:[\\/]* | =*) func_append deplibs " $arg" ;; + *) func_append deplibs " -L$dir" ;; + esac + func_append lib_search_path " $dir" + ;; + esac + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$dir:"*) ;; + ::) dllsearchpath=$dir;; + *) func_append dllsearchpath ":$dir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + continue + ;; + + -l*) + if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*) + # These systems don't actually have a C or math library (as such) + continue + ;; + *-*-os2*) + # These systems don't actually have a C library (as such) + test "X$arg" = "X-lc" && continue + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) + # Do not include libc due to us having libc/libc_r. + test "X$arg" = "X-lc" && continue + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C and math libraries are in the System framework + func_append deplibs " System.ltframework" + continue + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + test "X$arg" = "X-lc" && continue + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + test "X$arg" = "X-lc" && continue + ;; + esac + elif test "X$arg" = "X-lc_r"; then + case $host in + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) + # Do not include libc_r directly, use -pthread flag. + continue + ;; + esac + fi + func_append deplibs " $arg" + continue + ;; + + -module) + module=yes + continue + ;; + + # Tru64 UNIX uses -model [arg] to determine the layout of C++ + # classes, name mangling, and exception handling. + # Darwin uses the -arch flag to determine output architecture. + -model|-arch|-isysroot|--sysroot) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + prev=xcompiler + continue + ;; + + -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe|-threads) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + case "$new_inherited_linker_flags " in + *" $arg "*) ;; + * ) func_append new_inherited_linker_flags " $arg" ;; + esac + continue + ;; + + -multi_module) + single_module="${wl}-multi_module" + continue + ;; + + -no-fast-install) + fast_install=no + continue + ;; + + -no-install) + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*) + # The PATH hackery in wrapper scripts is required on Windows + # and Darwin in order for the loader to find any dlls it needs. + func_warning "\`-no-install' is ignored for $host" + func_warning "assuming \`-no-fast-install' instead" + fast_install=no + ;; + *) no_install=yes ;; + esac + continue + ;; + + -no-undefined) + allow_undefined=no + continue + ;; + + -objectlist) + prev=objectlist + continue + ;; + + -o) prev=output ;; + + -precious-files-regex) + prev=precious_regex + continue + ;; + + -release) + prev=release + continue + ;; + + -rpath) + prev=rpath + continue + ;; + + -R) + prev=xrpath + continue + ;; + + -R*) + func_stripname '-R' '' "$arg" + dir=$func_stripname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + =*) + func_stripname '=' '' "$dir" + dir=$lt_sysroot$func_stripname_result + ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + continue + ;; + + -shared) + # The effects of -shared are defined in a previous loop. + continue + ;; + + -shrext) + prev=shrext + continue + ;; + + -static | -static-libtool-libs) + # The effects of -static are defined in a previous loop. + # We used to do the same as -all-static on platforms that + # didn't have a PIC flag, but the assumption that the effects + # would be equivalent was wrong. It would break on at least + # Digital Unix and AIX. + continue + ;; + + -thread-safe) + thread_safe=yes + continue + ;; + + -version-info) + prev=vinfo + continue + ;; + + -version-number) + prev=vinfo + vinfo_number=yes + continue + ;; + + -weak) + prev=weak + continue + ;; + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs="$IFS"; IFS=',' + for flag in $args; do + IFS="$save_ifs" + func_quote_for_eval "$flag" + func_append arg " $func_quote_for_eval_result" + func_append compiler_flags " $func_quote_for_eval_result" + done + IFS="$save_ifs" + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Wl,*) + func_stripname '-Wl,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs="$IFS"; IFS=',' + for flag in $args; do + IFS="$save_ifs" + func_quote_for_eval "$flag" + func_append arg " $wl$func_quote_for_eval_result" + func_append compiler_flags " $wl$func_quote_for_eval_result" + func_append linker_flags " $func_quote_for_eval_result" + done + IFS="$save_ifs" + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Xcompiler) + prev=xcompiler + continue + ;; + + -Xlinker) + prev=xlinker + continue + ;; + + -XCClinker) + prev=xcclinker + continue + ;; + + # -msg_* for osf cc + -msg_*) + func_quote_for_eval "$arg" + arg="$func_quote_for_eval_result" + ;; + + # Flags to be passed through unchanged, with rationale: + # -64, -mips[0-9] enable 64-bit mode for the SGI compiler + # -r[0-9][0-9]* specify processor for the SGI compiler + # -xarch=*, -xtarget=* enable 64-bit mode for the Sun compiler + # +DA*, +DD* enable 64-bit mode for the HP compiler + # -q* compiler args for the IBM compiler + # -m*, -t[45]*, -txscale* architecture-specific flags for GCC + # -F/path path to uninstalled frameworks, gcc on darwin + # -p, -pg, --coverage, -fprofile-* profiling flags for GCC + # @file GCC response files + # -tp=* Portland pgcc target processor selection + # --sysroot=* for sysroot support + # -O*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization + -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \ + -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \ + -O*|-flto*|-fwhopr*|-fuse-linker-plugin) + func_quote_for_eval "$arg" + arg="$func_quote_for_eval_result" + func_append compile_command " $arg" + func_append finalize_command " $arg" + func_append compiler_flags " $arg" + continue + ;; + + # Some other compiler flag. + -* | +*) + func_quote_for_eval "$arg" + arg="$func_quote_for_eval_result" + ;; + + *.$objext) + # A standard object. + func_append objs " $arg" + ;; + + *.lo) + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test "$pic_object" = none && + test "$non_pic_object" = none; then + func_fatal_error "cannot find name of object for \`$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir="$func_dirname_result" + + if test "$pic_object" != none; then + # Prepend the subdirectory the object is found in. + pic_object="$xdir$pic_object" + + if test "$prev" = dlfiles; then + if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test "$prev" = dlprefiles; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg="$pic_object" + fi + + # Non-PIC object. + if test "$non_pic_object" != none; then + # Prepend the subdirectory the object is found in. + non_pic_object="$xdir$non_pic_object" + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test "$pic_object" = none ; then + arg="$non_pic_object" + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object="$pic_object" + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir="$func_dirname_result" + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "\`$arg' is not a valid libtool object" + fi + fi + ;; + + *.$libext) + # An archive. + func_append deplibs " $arg" + func_append old_deplibs " $arg" + continue + ;; + + *.la) + # A libtool-controlled library. + + func_resolve_sysroot "$arg" + if test "$prev" = dlfiles; then + # This library was specified with -dlopen. + func_append dlfiles " $func_resolve_sysroot_result" + prev= + elif test "$prev" = dlprefiles; then + # The library was specified with -dlpreopen. + func_append dlprefiles " $func_resolve_sysroot_result" + prev= + else + func_append deplibs " $func_resolve_sysroot_result" + fi + continue + ;; + + # Some other compiler argument. + *) + # Unknown arguments in both finalize_command and compile_command need + # to be aesthetically quoted because they are evaled later. + func_quote_for_eval "$arg" + arg="$func_quote_for_eval_result" + ;; + esac # arg + + # Now actually substitute the argument into the commands. + if test -n "$arg"; then + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + done # argument parsing loop + + test -n "$prev" && \ + func_fatal_help "the \`$prevarg' option requires an argument" + + if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then + eval arg=\"$export_dynamic_flag_spec\" + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + + oldlibs= + # calculate the name of the file, without its directory + func_basename "$output" + outputname="$func_basename_result" + libobjs_save="$libobjs" + + if test -n "$shlibpath_var"; then + # get the directories listed in $shlibpath_var + eval shlib_search_path=\`\$ECHO \"\${$shlibpath_var}\" \| \$SED \'s/:/ /g\'\` + else + shlib_search_path= + fi + eval sys_lib_search_path=\"$sys_lib_search_path_spec\" + eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" + + func_dirname "$output" "/" "" + output_objdir="$func_dirname_result$objdir" + func_to_tool_file "$output_objdir/" + tool_output_objdir=$func_to_tool_file_result + # Create the object directory. + func_mkdir_p "$output_objdir" + + # Determine the type of output + case $output in + "") + func_fatal_help "you must specify an output file" + ;; + *.$libext) linkmode=oldlib ;; + *.lo | *.$objext) linkmode=obj ;; + *.la) linkmode=lib ;; + *) linkmode=prog ;; # Anything else should be a program. + esac + + specialdeplibs= + + libs= + # Find all interdependent deplibs by searching for libraries + # that are linked more than once (e.g. -la -lb -la) + for deplib in $deplibs; do + if $opt_preserve_dup_deps ; then + case "$libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append libs " $deplib" + done + + if test "$linkmode" = lib; then + libs="$predeps $libs $compiler_lib_search_path $postdeps" + + # Compute libraries that are listed more than once in $predeps + # $postdeps and mark them as special (i.e., whose duplicates are + # not to be eliminated). + pre_post_deps= + if $opt_duplicate_compiler_generated_deps; then + for pre_post_dep in $predeps $postdeps; do + case "$pre_post_deps " in + *" $pre_post_dep "*) func_append specialdeplibs " $pre_post_deps" ;; + esac + func_append pre_post_deps " $pre_post_dep" + done + fi + pre_post_deps= + fi + + deplibs= + newdependency_libs= + newlib_search_path= + need_relink=no # whether we're linking any uninstalled libtool libraries + notinst_deplibs= # not-installed libtool libraries + notinst_path= # paths that contain not-installed libtool libraries + + case $linkmode in + lib) + passes="conv dlpreopen link" + for file in $dlfiles $dlprefiles; do + case $file in + *.la) ;; + *) + func_fatal_help "libraries can \`-dlopen' only libtool libraries: $file" + ;; + esac + done + ;; + prog) + compile_deplibs= + finalize_deplibs= + alldeplibs=no + newdlfiles= + newdlprefiles= + passes="conv scan dlopen dlpreopen link" + ;; + *) passes="conv" + ;; + esac + + for pass in $passes; do + # The preopen pass in lib mode reverses $deplibs; put it back here + # so that -L comes before libs that need it for instance... + if test "$linkmode,$pass" = "lib,link"; then + ## FIXME: Find the place where the list is rebuilt in the wrong + ## order, and fix it there properly + tmp_deplibs= + for deplib in $deplibs; do + tmp_deplibs="$deplib $tmp_deplibs" + done + deplibs="$tmp_deplibs" + fi + + if test "$linkmode,$pass" = "lib,link" || + test "$linkmode,$pass" = "prog,scan"; then + libs="$deplibs" + deplibs= + fi + if test "$linkmode" = prog; then + case $pass in + dlopen) libs="$dlfiles" ;; + dlpreopen) libs="$dlprefiles" ;; + link) libs="$deplibs %DEPLIBS% $dependency_libs" ;; + esac + fi + if test "$linkmode,$pass" = "lib,dlpreopen"; then + # Collect and forward deplibs of preopened libtool libs + for lib in $dlprefiles; do + # Ignore non-libtool-libs + dependency_libs= + func_resolve_sysroot "$lib" + case $lib in + *.la) func_source "$func_resolve_sysroot_result" ;; + esac + + # Collect preopened libtool deplibs, except any this library + # has declared as weak libs + for deplib in $dependency_libs; do + func_basename "$deplib" + deplib_base=$func_basename_result + case " $weak_libs " in + *" $deplib_base "*) ;; + *) func_append deplibs " $deplib" ;; + esac + done + done + libs="$dlprefiles" + fi + if test "$pass" = dlopen; then + # Collect dlpreopened libraries + save_deplibs="$deplibs" + deplibs= + fi + + for deplib in $libs; do + lib= + found=no + case $deplib in + -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe|-threads) + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append compiler_flags " $deplib" + if test "$linkmode" = lib ; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -l*) + if test "$linkmode" != lib && test "$linkmode" != prog; then + func_warning "\`-l' is ignored for archives/objects" + continue + fi + func_stripname '-l' '' "$deplib" + name=$func_stripname_result + if test "$linkmode" = lib; then + searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path" + else + searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path" + fi + for searchdir in $searchdirs; do + for search_ext in .la $std_shrext .so .a; do + # Search the libtool library + lib="$searchdir/lib${name}${search_ext}" + if test -f "$lib"; then + if test "$search_ext" = ".la"; then + found=yes + else + found=no + fi + break 2 + fi + done + done + if test "$found" != yes; then + # deplib doesn't seem to be a libtool library + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs" + fi + continue + else # deplib is a libtool library + # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib, + # We need to do some special things here, and not later. + if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then + case " $predeps $postdeps " in + *" $deplib "*) + if func_lalib_p "$lib"; then + library_names= + old_library= + func_source "$lib" + for l in $old_library $library_names; do + ll="$l" + done + if test "X$ll" = "X$old_library" ; then # only static version available + found=no + func_dirname "$lib" "" "." + ladir="$func_dirname_result" + lib=$ladir/$old_library + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs" + fi + continue + fi + fi + ;; + *) ;; + esac + fi + fi + ;; # -l + *.ltframework) + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + if test "$linkmode" = lib ; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -L*) + case $linkmode in + lib) + deplibs="$deplib $deplibs" + test "$pass" = conv && continue + newdependency_libs="$deplib $newdependency_libs" + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + prog) + if test "$pass" = conv; then + deplibs="$deplib $deplibs" + continue + fi + if test "$pass" = scan; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + *) + func_warning "\`-L' is ignored for archives/objects" + ;; + esac # linkmode + continue + ;; # -L + -R*) + if test "$pass" = link; then + func_stripname '-R' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # Make sure the xrpath contains only unique directories. + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + fi + deplibs="$deplib $deplibs" + continue + ;; + *.la) + func_resolve_sysroot "$deplib" + lib=$func_resolve_sysroot_result + ;; + *.$libext) + if test "$pass" = conv; then + deplibs="$deplib $deplibs" + continue + fi + case $linkmode in + lib) + # Linking convenience modules into shared libraries is allowed, + # but linking other static libraries is non-portable. + case " $dlpreconveniencelibs " in + *" $deplib "*) ;; + *) + valid_a_lib=no + case $deplibs_check_method in + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + if eval "\$ECHO \"$deplib\"" 2>/dev/null | $SED 10q \ + | $EGREP "$match_pattern_regex" > /dev/null; then + valid_a_lib=yes + fi + ;; + pass_all) + valid_a_lib=yes + ;; + esac + if test "$valid_a_lib" != yes; then + echo + $ECHO "*** Warning: Trying to link with static lib archive $deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because the file extensions .$libext of this argument makes me believe" + echo "*** that it is just a static archive that I should not use here." + else + echo + $ECHO "*** Warning: Linking the shared library $output against the" + $ECHO "*** static library $deplib is not portable!" + deplibs="$deplib $deplibs" + fi + ;; + esac + continue + ;; + prog) + if test "$pass" != link; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + continue + ;; + esac # linkmode + ;; # *.$libext + *.lo | *.$objext) + if test "$pass" = conv; then + deplibs="$deplib $deplibs" + elif test "$linkmode" = prog; then + if test "$pass" = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then + # If there is no dlopen support or we're linking statically, + # we need to preload. + func_append newdlprefiles " $deplib" + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append newdlfiles " $deplib" + fi + fi + continue + ;; + %DEPLIBS%) + alldeplibs=yes + continue + ;; + esac # case $deplib + + if test "$found" = yes || test -f "$lib"; then : + else + func_fatal_error "cannot find the library \`$lib' or unhandled argument \`$deplib'" + fi + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$lib" \ + || func_fatal_error "\`$lib' is not a valid libtool archive" + + func_dirname "$lib" "" "." + ladir="$func_dirname_result" + + dlname= + dlopen= + dlpreopen= + libdir= + library_names= + old_library= + inherited_linker_flags= + # If the library was installed with an old release of libtool, + # it will not redefine variables installed, or shouldnotlink + installed=yes + shouldnotlink=no + avoidtemprpath= + + + # Read the .la file + func_source "$lib" + + # Convert "-framework foo" to "foo.ltframework" + if test -n "$inherited_linker_flags"; then + tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'` + for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do + case " $new_inherited_linker_flags " in + *" $tmp_inherited_linker_flag "*) ;; + *) func_append new_inherited_linker_flags " $tmp_inherited_linker_flag";; + esac + done + fi + dependency_libs=`$ECHO " $dependency_libs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + if test "$linkmode,$pass" = "lib,link" || + test "$linkmode,$pass" = "prog,scan" || + { test "$linkmode" != prog && test "$linkmode" != lib; }; then + test -n "$dlopen" && func_append dlfiles " $dlopen" + test -n "$dlpreopen" && func_append dlprefiles " $dlpreopen" + fi + + if test "$pass" = conv; then + # Only check for convenience libraries + deplibs="$lib $deplibs" + if test -z "$libdir"; then + if test -z "$old_library"; then + func_fatal_error "cannot find name of link library for \`$lib'" + fi + # It is a libtool convenience library, so add in its objects. + func_append convenience " $ladir/$objdir/$old_library" + func_append old_convenience " $ladir/$objdir/$old_library" + elif test "$linkmode" != prog && test "$linkmode" != lib; then + func_fatal_error "\`$lib' is not a convenience library" + fi + tmp_libs= + for deplib in $dependency_libs; do + deplibs="$deplib $deplibs" + if $opt_preserve_dup_deps ; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done + continue + fi # $pass = conv + + + # Get the name of the library we link against. + linklib= + if test -n "$old_library" && + { test "$prefer_static_libs" = yes || + test "$prefer_static_libs,$installed" = "built,no"; }; then + linklib=$old_library + else + for l in $old_library $library_names; do + linklib="$l" + done + fi + if test -z "$linklib"; then + func_fatal_error "cannot find name of link library for \`$lib'" + fi + + # This library was specified with -dlopen. + if test "$pass" = dlopen; then + if test -z "$libdir"; then + func_fatal_error "cannot -dlopen a convenience library: \`$lib'" + fi + if test -z "$dlname" || + test "$dlopen_support" != yes || + test "$build_libtool_libs" = no; then + # If there is no dlname, no dlopen support or we're linking + # statically, we need to preload. We also need to preload any + # dependent libraries so libltdl's deplib preloader doesn't + # bomb out in the load deplibs phase. + func_append dlprefiles " $lib $dependency_libs" + else + func_append newdlfiles " $lib" + fi + continue + fi # $pass = dlopen + + # We need an absolute path. + case $ladir in + [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;; + *) + abs_ladir=`cd "$ladir" && pwd` + if test -z "$abs_ladir"; then + func_warning "cannot determine absolute directory name of \`$ladir'" + func_warning "passing it literally to the linker, although it might fail" + abs_ladir="$ladir" + fi + ;; + esac + func_basename "$lib" + laname="$func_basename_result" + + # Find the relevant object directory and library name. + if test "X$installed" = Xyes; then + if test ! -f "$lt_sysroot$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then + func_warning "library \`$lib' was moved." + dir="$ladir" + absdir="$abs_ladir" + libdir="$abs_ladir" + else + dir="$lt_sysroot$libdir" + absdir="$lt_sysroot$libdir" + fi + test "X$hardcode_automatic" = Xyes && avoidtemprpath=yes + else + if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then + dir="$ladir" + absdir="$abs_ladir" + # Remove this search path later + func_append notinst_path " $abs_ladir" + else + dir="$ladir/$objdir" + absdir="$abs_ladir/$objdir" + # Remove this search path later + func_append notinst_path " $abs_ladir" + fi + fi # $installed = yes + func_stripname 'lib' '.la' "$laname" + name=$func_stripname_result + + # This library was specified with -dlpreopen. + if test "$pass" = dlpreopen; then + if test -z "$libdir" && test "$linkmode" = prog; then + func_fatal_error "only libraries may -dlpreopen a convenience library: \`$lib'" + fi + case "$host" in + # special handling for platforms with PE-DLLs. + *cygwin* | *mingw* | *cegcc* ) + # Linker will automatically link against shared library if both + # static and shared are present. Therefore, ensure we extract + # symbols from the import library if a shared library is present + # (otherwise, the dlopen module name will be incorrect). We do + # this by putting the import library name into $newdlprefiles. + # We recover the dlopen module name by 'saving' the la file + # name in a special purpose variable, and (later) extracting the + # dlname from the la file. + if test -n "$dlname"; then + func_tr_sh "$dir/$linklib" + eval "libfile_$func_tr_sh_result=\$abs_ladir/\$laname" + func_append newdlprefiles " $dir/$linklib" + else + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + fi + ;; + * ) + # Prefer using a static library (so that no silly _DYNAMIC symbols + # are required to link). + if test -n "$old_library"; then + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + # Otherwise, use the dlname, so that lt_dlopen finds it. + elif test -n "$dlname"; then + func_append newdlprefiles " $dir/$dlname" + else + func_append newdlprefiles " $dir/$linklib" + fi + ;; + esac + fi # $pass = dlpreopen + + if test -z "$libdir"; then + # Link the convenience library + if test "$linkmode" = lib; then + deplibs="$dir/$old_library $deplibs" + elif test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$dir/$old_library $compile_deplibs" + finalize_deplibs="$dir/$old_library $finalize_deplibs" + else + deplibs="$lib $deplibs" # used for prog,scan pass + fi + continue + fi + + + if test "$linkmode" = prog && test "$pass" != link; then + func_append newlib_search_path " $ladir" + deplibs="$lib $deplibs" + + linkalldeplibs=no + if test "$link_all_deplibs" != no || test -z "$library_names" || + test "$build_libtool_libs" = no; then + linkalldeplibs=yes + fi + + tmp_libs= + for deplib in $dependency_libs; do + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + esac + # Need to link against all dependency_libs? + if test "$linkalldeplibs" = yes; then + deplibs="$deplib $deplibs" + else + # Need to hardcode shared library paths + # or/and link against static libraries + newdependency_libs="$deplib $newdependency_libs" + fi + if $opt_preserve_dup_deps ; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done # for deplib + continue + fi # $linkmode = prog... + + if test "$linkmode,$pass" = "prog,link"; then + if test -n "$library_names" && + { { test "$prefer_static_libs" = no || + test "$prefer_static_libs,$installed" = "built,yes"; } || + test -z "$old_library"; }; then + # We need to hardcode the library path + if test -n "$shlibpath_var" && test -z "$avoidtemprpath" ; then + # Make sure the rpath contains only unique directories. + case "$temp_rpath:" in + *"$absdir:"*) ;; + *) func_append temp_rpath "$absdir:" ;; + esac + fi + + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi # $linkmode,$pass = prog,link... + + if test "$alldeplibs" = yes && + { test "$deplibs_check_method" = pass_all || + { test "$build_libtool_libs" = yes && + test -n "$library_names"; }; }; then + # We only need to search for static libraries + continue + fi + fi + + link_static=no # Whether the deplib will be linked statically + use_static_libs=$prefer_static_libs + if test "$use_static_libs" = built && test "$installed" = yes; then + use_static_libs=no + fi + if test -n "$library_names" && + { test "$use_static_libs" = no || test -z "$old_library"; }; then + case $host in + *cygwin* | *mingw* | *cegcc*) + # No point in relinking DLLs because paths are not encoded + func_append notinst_deplibs " $lib" + need_relink=no + ;; + *) + if test "$installed" = no; then + func_append notinst_deplibs " $lib" + need_relink=yes + fi + ;; + esac + # This is a shared library + + # Warn about portability, can't link against -module's on some + # systems (darwin). Don't bleat about dlopened modules though! + dlopenmodule="" + for dlpremoduletest in $dlprefiles; do + if test "X$dlpremoduletest" = "X$lib"; then + dlopenmodule="$dlpremoduletest" + break + fi + done + if test -z "$dlopenmodule" && test "$shouldnotlink" = yes && test "$pass" = link; then + echo + if test "$linkmode" = prog; then + $ECHO "*** Warning: Linking the executable $output against the loadable module" + else + $ECHO "*** Warning: Linking the shared library $output against the loadable module" + fi + $ECHO "*** $linklib is not portable!" + fi + if test "$linkmode" = lib && + test "$hardcode_into_libs" = yes; then + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi + + if test -n "$old_archive_from_expsyms_cmds"; then + # figure out the soname + set dummy $library_names + shift + realname="$1" + shift + libname=`eval "\\$ECHO \"$libname_spec\""` + # use dlname if we got it. it's perfectly good, no? + if test -n "$dlname"; then + soname="$dlname" + elif test -n "$soname_spec"; then + # bleh windows + case $host in + *cygwin* | mingw* | *cegcc*) + func_arith $current - $age + major=$func_arith_result + versuffix="-$major" + ;; + esac + eval soname=\"$soname_spec\" + else + soname="$realname" + fi + + # Make a new name for the extract_expsyms_cmds to use + soroot="$soname" + func_basename "$soroot" + soname="$func_basename_result" + func_stripname 'lib' '.dll' "$soname" + newlib=libimp-$func_stripname_result.a + + # If the library has no export list, then create one now + if test -f "$output_objdir/$soname-def"; then : + else + func_verbose "extracting exported symbol list from \`$soname'" + func_execute_cmds "$extract_expsyms_cmds" 'exit $?' + fi + + # Create $newlib + if test -f "$output_objdir/$newlib"; then :; else + func_verbose "generating import library for \`$soname'" + func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?' + fi + # make sure the library variables are pointing to the new library + dir=$output_objdir + linklib=$newlib + fi # test -n "$old_archive_from_expsyms_cmds" + + if test "$linkmode" = prog || test "$opt_mode" != relink; then + add_shlibpath= + add_dir= + add= + lib_linked=yes + case $hardcode_action in + immediate | unsupported) + if test "$hardcode_direct" = no; then + add="$dir/$linklib" + case $host in + *-*-sco3.2v5.0.[024]*) add_dir="-L$dir" ;; + *-*-sysv4*uw2*) add_dir="-L$dir" ;; + *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \ + *-*-unixware7*) add_dir="-L$dir" ;; + *-*-darwin* ) + # if the lib is a (non-dlopened) module then we can not + # link against it, someone is ignoring the earlier warnings + if /usr/bin/file -L $add 2> /dev/null | + $GREP ": [^:]* bundle" >/dev/null ; then + if test "X$dlopenmodule" != "X$lib"; then + $ECHO "*** Warning: lib $linklib is a module, not a shared library" + if test -z "$old_library" ; then + echo + echo "*** And there doesn't seem to be a static archive available" + echo "*** The link will probably fail, sorry" + else + add="$dir/$old_library" + fi + elif test -n "$old_library"; then + add="$dir/$old_library" + fi + fi + esac + elif test "$hardcode_minus_L" = no; then + case $host in + *-*-sunos*) add_shlibpath="$dir" ;; + esac + add_dir="-L$dir" + add="-l$name" + elif test "$hardcode_shlibpath_var" = no; then + add_shlibpath="$dir" + add="-l$name" + else + lib_linked=no + fi + ;; + relink) + if test "$hardcode_direct" = yes && + test "$hardcode_direct_absolute" = no; then + add="$dir/$linklib" + elif test "$hardcode_minus_L" = yes; then + add_dir="-L$dir" + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add="-l$name" + elif test "$hardcode_shlibpath_var" = yes; then + add_shlibpath="$dir" + add="-l$name" + else + lib_linked=no + fi + ;; + *) lib_linked=no ;; + esac + + if test "$lib_linked" != yes; then + func_fatal_configuration "unsupported hardcode properties" + fi + + if test -n "$add_shlibpath"; then + case :$compile_shlibpath: in + *":$add_shlibpath:"*) ;; + *) func_append compile_shlibpath "$add_shlibpath:" ;; + esac + fi + if test "$linkmode" = prog; then + test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" + test -n "$add" && compile_deplibs="$add $compile_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + if test "$hardcode_direct" != yes && + test "$hardcode_minus_L" != yes && + test "$hardcode_shlibpath_var" = yes; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + fi + fi + fi + + if test "$linkmode" = prog || test "$opt_mode" = relink; then + add_shlibpath= + add_dir= + add= + # Finalize command for both is simple: just hardcode it. + if test "$hardcode_direct" = yes && + test "$hardcode_direct_absolute" = no; then + add="$libdir/$linklib" + elif test "$hardcode_minus_L" = yes; then + add_dir="-L$libdir" + add="-l$name" + elif test "$hardcode_shlibpath_var" = yes; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + add="-l$name" + elif test "$hardcode_automatic" = yes; then + if test -n "$inst_prefix_dir" && + test -f "$inst_prefix_dir$libdir/$linklib" ; then + add="$inst_prefix_dir$libdir/$linklib" + else + add="$libdir/$linklib" + fi + else + # We cannot seem to hardcode it, guess we'll fake it. + add_dir="-L$libdir" + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add="-l$name" + fi + + if test "$linkmode" = prog; then + test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" + test -n "$add" && finalize_deplibs="$add $finalize_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + fi + fi + elif test "$linkmode" = prog; then + # Here we assume that one of hardcode_direct or hardcode_minus_L + # is not unsupported. This is valid on all known static and + # shared platforms. + if test "$hardcode_direct" != unsupported; then + test -n "$old_library" && linklib="$old_library" + compile_deplibs="$dir/$linklib $compile_deplibs" + finalize_deplibs="$dir/$linklib $finalize_deplibs" + else + compile_deplibs="-l$name -L$dir $compile_deplibs" + finalize_deplibs="-l$name -L$dir $finalize_deplibs" + fi + elif test "$build_libtool_libs" = yes; then + # Not a shared library + if test "$deplibs_check_method" != pass_all; then + # We're trying link a shared library against a static one + # but the system doesn't support it. + + # Just print a warning and add the library to dependency_libs so + # that the program can be linked against the static library. + echo + $ECHO "*** Warning: This system can not link to static lib archive $lib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have." + if test "$module" = yes; then + echo "*** But as you try to build a module library, libtool will still create " + echo "*** a static module, that should work as long as the dlopening application" + echo "*** is linked with the -dlopen flag to resolve symbols at runtime." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using \`nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** \`nm' from GNU binutils and a full rebuild may help." + fi + if test "$build_old_libs" = no; then + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + else + deplibs="$dir/$old_library $deplibs" + link_static=yes + fi + fi # link shared/static library? + + if test "$linkmode" = lib; then + if test -n "$dependency_libs" && + { test "$hardcode_into_libs" != yes || + test "$build_old_libs" = yes || + test "$link_static" = yes; }; then + # Extract -R from dependency_libs + temp_deplibs= + for libdir in $dependency_libs; do + case $libdir in + -R*) func_stripname '-R' '' "$libdir" + temp_xrpath=$func_stripname_result + case " $xrpath " in + *" $temp_xrpath "*) ;; + *) func_append xrpath " $temp_xrpath";; + esac;; + *) func_append temp_deplibs " $libdir";; + esac + done + dependency_libs="$temp_deplibs" + fi + + func_append newlib_search_path " $absdir" + # Link against this library + test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs" + # ... and its dependency_libs + tmp_libs= + for deplib in $dependency_libs; do + newdependency_libs="$deplib $newdependency_libs" + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result";; + *) func_resolve_sysroot "$deplib" ;; + esac + if $opt_preserve_dup_deps ; then + case "$tmp_libs " in + *" $func_resolve_sysroot_result "*) + func_append specialdeplibs " $func_resolve_sysroot_result" ;; + esac + fi + func_append tmp_libs " $func_resolve_sysroot_result" + done + + if test "$link_all_deplibs" != no; then + # Add the search paths of all dependency libraries + for deplib in $dependency_libs; do + path= + case $deplib in + -L*) path="$deplib" ;; + *.la) + func_resolve_sysroot "$deplib" + deplib=$func_resolve_sysroot_result + func_dirname "$deplib" "" "." + dir=$func_dirname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;; + *) + absdir=`cd "$dir" && pwd` + if test -z "$absdir"; then + func_warning "cannot determine absolute directory name of \`$dir'" + absdir="$dir" + fi + ;; + esac + if $GREP "^installed=no" $deplib > /dev/null; then + case $host in + *-*-darwin*) + depdepl= + eval deplibrary_names=`${SED} -n -e 's/^library_names=\(.*\)$/\1/p' $deplib` + if test -n "$deplibrary_names" ; then + for tmp in $deplibrary_names ; do + depdepl=$tmp + done + if test -f "$absdir/$objdir/$depdepl" ; then + depdepl="$absdir/$objdir/$depdepl" + darwin_install_name=`${OTOOL} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + if test -z "$darwin_install_name"; then + darwin_install_name=`${OTOOL64} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + fi + func_append compiler_flags " ${wl}-dylib_file ${wl}${darwin_install_name}:${depdepl}" + func_append linker_flags " -dylib_file ${darwin_install_name}:${depdepl}" + path= + fi + fi + ;; + *) + path="-L$absdir/$objdir" + ;; + esac + else + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` + test -z "$libdir" && \ + func_fatal_error "\`$deplib' is not a valid libtool archive" + test "$absdir" != "$libdir" && \ + func_warning "\`$deplib' seems to be moved" + + path="-L$absdir" + fi + ;; + esac + case " $deplibs " in + *" $path "*) ;; + *) deplibs="$path $deplibs" ;; + esac + done + fi # link_all_deplibs != no + fi # linkmode = lib + done # for deplib in $libs + if test "$pass" = link; then + if test "$linkmode" = "prog"; then + compile_deplibs="$new_inherited_linker_flags $compile_deplibs" + finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs" + else + compiler_flags="$compiler_flags "`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + fi + fi + dependency_libs="$newdependency_libs" + if test "$pass" = dlpreopen; then + # Link the dlpreopened libraries before other libraries + for deplib in $save_deplibs; do + deplibs="$deplib $deplibs" + done + fi + if test "$pass" != dlopen; then + if test "$pass" != conv; then + # Make sure lib_search_path contains only unique directories. + lib_search_path= + for dir in $newlib_search_path; do + case "$lib_search_path " in + *" $dir "*) ;; + *) func_append lib_search_path " $dir" ;; + esac + done + newlib_search_path= + fi + + if test "$linkmode,$pass" != "prog,link"; then + vars="deplibs" + else + vars="compile_deplibs finalize_deplibs" + fi + for var in $vars dependency_libs; do + # Add libraries to $var in reverse order + eval tmp_libs=\"\$$var\" + new_libs= + for deplib in $tmp_libs; do + # FIXME: Pedantically, this is the right thing to do, so + # that some nasty dependency loop isn't accidentally + # broken: + #new_libs="$deplib $new_libs" + # Pragmatically, this seems to cause very few problems in + # practice: + case $deplib in + -L*) new_libs="$deplib $new_libs" ;; + -R*) ;; + *) + # And here is the reason: when a library appears more + # than once as an explicit dependence of a library, or + # is implicitly linked in more than once by the + # compiler, it is considered special, and multiple + # occurrences thereof are not removed. Compare this + # with having the same library being listed as a + # dependency of multiple other libraries: in this case, + # we know (pedantically, we assume) the library does not + # need to be listed more than once, so we keep only the + # last copy. This is not always right, but it is rare + # enough that we require users that really mean to play + # such unportable linking tricks to link the library + # using -Wl,-lname, so that libtool does not consider it + # for duplicate removal. + case " $specialdeplibs " in + *" $deplib "*) new_libs="$deplib $new_libs" ;; + *) + case " $new_libs " in + *" $deplib "*) ;; + *) new_libs="$deplib $new_libs" ;; + esac + ;; + esac + ;; + esac + done + tmp_libs= + for deplib in $new_libs; do + case $deplib in + -L*) + case " $tmp_libs " in + *" $deplib "*) ;; + *) func_append tmp_libs " $deplib" ;; + esac + ;; + *) func_append tmp_libs " $deplib" ;; + esac + done + eval $var=\"$tmp_libs\" + done # for var + fi + # Last step: remove runtime libs from dependency_libs + # (they stay in deplibs) + tmp_libs= + for i in $dependency_libs ; do + case " $predeps $postdeps $compiler_lib_search_path " in + *" $i "*) + i="" + ;; + esac + if test -n "$i" ; then + func_append tmp_libs " $i" + fi + done + dependency_libs=$tmp_libs + done # for pass + if test "$linkmode" = prog; then + dlfiles="$newdlfiles" + fi + if test "$linkmode" = prog || test "$linkmode" = lib; then + dlprefiles="$newdlprefiles" + fi + + case $linkmode in + oldlib) + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + func_warning "\`-dlopen' is ignored for archives" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "\`-l' and \`-L' are ignored for archives" ;; + esac + + test -n "$rpath" && \ + func_warning "\`-rpath' is ignored for archives" + + test -n "$xrpath" && \ + func_warning "\`-R' is ignored for archives" + + test -n "$vinfo" && \ + func_warning "\`-version-info/-version-number' is ignored for archives" + + test -n "$release" && \ + func_warning "\`-release' is ignored for archives" + + test -n "$export_symbols$export_symbols_regex" && \ + func_warning "\`-export-symbols' is ignored for archives" + + # Now set the variables for building old libraries. + build_libtool_libs=no + oldlibs="$output" + func_append objs "$old_deplibs" + ;; + + lib) + # Make sure we only generate libraries of the form `libNAME.la'. + case $outputname in + lib*) + func_stripname 'lib' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + ;; + *) + test "$module" = no && \ + func_fatal_help "libtool library \`$output' must begin with \`lib'" + + if test "$need_lib_prefix" != no; then + # Add the "lib" prefix for modules if required + func_stripname '' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + else + func_stripname '' '.la' "$outputname" + libname=$func_stripname_result + fi + ;; + esac + + if test -n "$objs"; then + if test "$deplibs_check_method" != pass_all; then + func_fatal_error "cannot build libtool library \`$output' from non-libtool objects on this host:$objs" + else + echo + $ECHO "*** Warning: Linking the shared library $output against the non-libtool" + $ECHO "*** objects $objs is not portable!" + func_append libobjs " $objs" + fi + fi + + test "$dlself" != no && \ + func_warning "\`-dlopen self' is ignored for libtool libraries" + + set dummy $rpath + shift + test "$#" -gt 1 && \ + func_warning "ignoring multiple \`-rpath's for a libtool library" + + install_libdir="$1" + + oldlibs= + if test -z "$rpath"; then + if test "$build_libtool_libs" = yes; then + # Building a libtool convenience library. + # Some compilers have problems with a `.al' extension so + # convenience libraries should have the same extension an + # archive normally would. + oldlibs="$output_objdir/$libname.$libext $oldlibs" + build_libtool_libs=convenience + build_old_libs=yes + fi + + test -n "$vinfo" && \ + func_warning "\`-version-info/-version-number' is ignored for convenience libraries" + + test -n "$release" && \ + func_warning "\`-release' is ignored for convenience libraries" + else + + # Parse the version information argument. + save_ifs="$IFS"; IFS=':' + set dummy $vinfo 0 0 0 + shift + IFS="$save_ifs" + + test -n "$7" && \ + func_fatal_help "too many parameters to \`-version-info'" + + # convert absolute version numbers to libtool ages + # this retains compatibility with .la files and attempts + # to make the code below a bit more comprehensible + + case $vinfo_number in + yes) + number_major="$1" + number_minor="$2" + number_revision="$3" + # + # There are really only two kinds -- those that + # use the current revision as the major version + # and those that subtract age and use age as + # a minor version. But, then there is irix + # which has an extra 1 added just for fun + # + case $version_type in + darwin|linux|osf|windows|none) + func_arith $number_major + $number_minor + current=$func_arith_result + age="$number_minor" + revision="$number_revision" + ;; + freebsd-aout|freebsd-elf|qnx|sunos) + current="$number_major" + revision="$number_minor" + age="0" + ;; + irix|nonstopux) + func_arith $number_major + $number_minor + current=$func_arith_result + age="$number_minor" + revision="$number_minor" + lt_irix_increment=no + ;; + esac + ;; + no) + current="$1" + revision="$2" + age="$3" + ;; + esac + + # Check that each of the things are valid numbers. + case $current in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "CURRENT \`$current' must be a nonnegative integer" + func_fatal_error "\`$vinfo' is not valid version information" + ;; + esac + + case $revision in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "REVISION \`$revision' must be a nonnegative integer" + func_fatal_error "\`$vinfo' is not valid version information" + ;; + esac + + case $age in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "AGE \`$age' must be a nonnegative integer" + func_fatal_error "\`$vinfo' is not valid version information" + ;; + esac + + if test "$age" -gt "$current"; then + func_error "AGE \`$age' is greater than the current interface number \`$current'" + func_fatal_error "\`$vinfo' is not valid version information" + fi + + # Calculate the version variables. + major= + versuffix= + verstring= + case $version_type in + none) ;; + + darwin) + # Like Linux, but with the current version available in + # verstring for coding it into the library header + func_arith $current - $age + major=.$func_arith_result + versuffix="$major.$age.$revision" + # Darwin ld doesn't like 0 for these options... + func_arith $current + 1 + minor_current=$func_arith_result + xlcverstring="${wl}-compatibility_version ${wl}$minor_current ${wl}-current_version ${wl}$minor_current.$revision" + verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" + ;; + + freebsd-aout) + major=".$current" + versuffix=".$current.$revision"; + ;; + + freebsd-elf) + major=".$current" + versuffix=".$current" + ;; + + irix | nonstopux) + if test "X$lt_irix_increment" = "Xno"; then + func_arith $current - $age + else + func_arith $current - $age + 1 + fi + major=$func_arith_result + + case $version_type in + nonstopux) verstring_prefix=nonstopux ;; + *) verstring_prefix=sgi ;; + esac + verstring="$verstring_prefix$major.$revision" + + # Add in all the interfaces that we are compatible with. + loop=$revision + while test "$loop" -ne 0; do + func_arith $revision - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring="$verstring_prefix$major.$iface:$verstring" + done + + # Before this point, $major must not contain `.'. + major=.$major + versuffix="$major.$revision" + ;; + + linux) + func_arith $current - $age + major=.$func_arith_result + versuffix="$major.$age.$revision" + ;; + + osf) + func_arith $current - $age + major=.$func_arith_result + versuffix=".$current.$age.$revision" + verstring="$current.$age.$revision" + + # Add in all the interfaces that we are compatible with. + loop=$age + while test "$loop" -ne 0; do + func_arith $current - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring="$verstring:${iface}.0" + done + + # Make executables depend on our current version. + func_append verstring ":${current}.0" + ;; + + qnx) + major=".$current" + versuffix=".$current" + ;; + + sunos) + major=".$current" + versuffix=".$current.$revision" + ;; + + windows) + # Use '-' rather than '.', since we only want one + # extension on DOS 8.3 filesystems. + func_arith $current - $age + major=$func_arith_result + versuffix="-$major" + ;; + + *) + func_fatal_configuration "unknown library version type \`$version_type'" + ;; + esac + + # Clear the version info if we defaulted, and they specified a release. + if test -z "$vinfo" && test -n "$release"; then + major= + case $version_type in + darwin) + # we can't check for "0.0" in archive_cmds due to quoting + # problems, so we reset it completely + verstring= + ;; + *) + verstring="0.0" + ;; + esac + if test "$need_version" = no; then + versuffix= + else + versuffix=".0.0" + fi + fi + + # Remove version info from name if versioning should be avoided + if test "$avoid_version" = yes && test "$need_version" = no; then + major= + versuffix= + verstring="" + fi + + # Check to see if the archive will have undefined symbols. + if test "$allow_undefined" = yes; then + if test "$allow_undefined_flag" = unsupported; then + func_warning "undefined symbols not allowed in $host shared libraries" + build_libtool_libs=no + build_old_libs=yes + fi + else + # Don't allow undefined symbols. + allow_undefined_flag="$no_undefined_flag" + fi + + fi + + func_generate_dlsyms "$libname" "$libname" "yes" + func_append libobjs " $symfileobj" + test "X$libobjs" = "X " && libobjs= + + if test "$opt_mode" != relink; then + # Remove our outputs, but don't remove object files since they + # may have been created when compiling PIC objects. + removelist= + tempremovelist=`$ECHO "$output_objdir/*"` + for p in $tempremovelist; do + case $p in + *.$objext | *.gcno) + ;; + $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/${libname}${release}.*) + if test "X$precious_files_regex" != "X"; then + if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1 + then + continue + fi + fi + func_append removelist " $p" + ;; + *) ;; + esac + done + test -n "$removelist" && \ + func_show_eval "${RM}r \$removelist" + fi + + # Now set the variables for building old libraries. + if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then + func_append oldlibs " $output_objdir/$libname.$libext" + + # Transform .lo files to .o files. + oldobjs="$objs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; $lo2o" | $NL2SP` + fi + + # Eliminate all temporary directories. + #for path in $notinst_path; do + # lib_search_path=`$ECHO "$lib_search_path " | $SED "s% $path % %g"` + # deplibs=`$ECHO "$deplibs " | $SED "s% -L$path % %g"` + # dependency_libs=`$ECHO "$dependency_libs " | $SED "s% -L$path % %g"` + #done + + if test -n "$xrpath"; then + # If the user specified any rpath flags, then add them. + temp_xrpath= + for libdir in $xrpath; do + func_replace_sysroot "$libdir" + func_append temp_xrpath " -R$func_replace_sysroot_result" + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + if test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes; then + dependency_libs="$temp_xrpath $dependency_libs" + fi + fi + + # Make sure dlfiles contains only unique files that won't be dlpreopened + old_dlfiles="$dlfiles" + dlfiles= + for lib in $old_dlfiles; do + case " $dlprefiles $dlfiles " in + *" $lib "*) ;; + *) func_append dlfiles " $lib" ;; + esac + done + + # Make sure dlprefiles contains only unique files + old_dlprefiles="$dlprefiles" + dlprefiles= + for lib in $old_dlprefiles; do + case "$dlprefiles " in + *" $lib "*) ;; + *) func_append dlprefiles " $lib" ;; + esac + done + + if test "$build_libtool_libs" = yes; then + if test -n "$rpath"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*) + # these systems don't actually have a c library (as such)! + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C library is in the System framework + func_append deplibs " System.ltframework" + ;; + *-*-netbsd*) + # Don't link with libc until the a.out ld.so is fixed. + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) + # Do not include libc due to us having libc/libc_r. + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + ;; + *) + # Add libc to deplibs on all other systems if necessary. + if test "$build_libtool_need_lc" = "yes"; then + func_append deplibs " -lc" + fi + ;; + esac + fi + + # Transform deplibs into only deplibs that can be linked in shared. + name_save=$name + libname_save=$libname + release_save=$release + versuffix_save=$versuffix + major_save=$major + # I'm not sure if I'm treating the release correctly. I think + # release should show up in the -l (ie -lgmp5) so we don't want to + # add it in twice. Is that correct? + release="" + versuffix="" + major="" + newdeplibs= + droppeddeps=no + case $deplibs_check_method in + pass_all) + # Don't check for shared/static. Everything works. + # This might be a little naive. We might want to check + # whether the library exists or not. But this is on + # osf3 & osf4 and I'm not really sure... Just + # implementing what was already the behavior. + newdeplibs=$deplibs + ;; + test_compile) + # This code stresses the "libraries are programs" paradigm to its + # limits. Maybe even breaks it. We compile a program, linking it + # against the deplibs as a proxy for the library. Then we can check + # whether they linked in statically or dynamically with ldd. + $opt_dry_run || $RM conftest.c + cat > conftest.c </dev/null` + $nocaseglob + else + potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null` + fi + for potent_lib in $potential_libs; do + # Follow soft links. + if ls -lLd "$potent_lib" 2>/dev/null | + $GREP " -> " >/dev/null; then + continue + fi + # The statement above tries to avoid entering an + # endless loop below, in case of cyclic links. + # We might still enter an endless loop, since a link + # loop can be closed while we follow links, + # but so what? + potlib="$potent_lib" + while test -h "$potlib" 2>/dev/null; do + potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'` + case $potliblink in + [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";; + *) potlib=`$ECHO "$potlib" | $SED 's,[^/]*$,,'`"$potliblink";; + esac + done + if eval $file_magic_cmd \"\$potlib\" 2>/dev/null | + $SED -e 10q | + $EGREP "$file_magic_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib="" + break 2 + fi + done + done + fi + if test -n "$a_deplib" ; then + droppeddeps=yes + echo + $ECHO "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib" ; then + $ECHO "*** with $libname but no candidates were found. (...for file magic test)" + else + $ECHO "*** with $libname and none of the candidates passed a file format test" + $ECHO "*** using a file magic. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + for a_deplib in $deplibs; do + case $a_deplib in + -l*) + func_stripname -l '' "$a_deplib" + name=$func_stripname_result + if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then + case " $predeps $postdeps " in + *" $a_deplib "*) + func_append newdeplibs " $a_deplib" + a_deplib="" + ;; + esac + fi + if test -n "$a_deplib" ; then + libname=`eval "\\$ECHO \"$libname_spec\""` + for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do + potential_libs=`ls $i/$libname[.-]* 2>/dev/null` + for potent_lib in $potential_libs; do + potlib="$potent_lib" # see symlink-check above in file_magic test + if eval "\$ECHO \"$potent_lib\"" 2>/dev/null | $SED 10q | \ + $EGREP "$match_pattern_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib="" + break 2 + fi + done + done + fi + if test -n "$a_deplib" ; then + droppeddeps=yes + echo + $ECHO "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib" ; then + $ECHO "*** with $libname but no candidates were found. (...for regex pattern test)" + else + $ECHO "*** with $libname and none of the candidates passed a file format test" + $ECHO "*** using a regex pattern. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + none | unknown | *) + newdeplibs="" + tmp_deplibs=`$ECHO " $deplibs" | $SED 's/ -lc$//; s/ -[LR][^ ]*//g'` + if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then + for i in $predeps $postdeps ; do + # can't use Xsed below, because $i might contain '/' + tmp_deplibs=`$ECHO " $tmp_deplibs" | $SED "s,$i,,"` + done + fi + case $tmp_deplibs in + *[!\ \ ]*) + echo + if test "X$deplibs_check_method" = "Xnone"; then + echo "*** Warning: inter-library dependencies are not supported in this platform." + else + echo "*** Warning: inter-library dependencies are not known to be supported." + fi + echo "*** All declared inter-library dependencies are being dropped." + droppeddeps=yes + ;; + esac + ;; + esac + versuffix=$versuffix_save + major=$major_save + release=$release_save + libname=$libname_save + name=$name_save + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library with the System framework + newdeplibs=`$ECHO " $newdeplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + if test "$droppeddeps" = yes; then + if test "$module" = yes; then + echo + echo "*** Warning: libtool could not satisfy all declared inter-library" + $ECHO "*** dependencies of module $libname. Therefore, libtool will create" + echo "*** a static module, that should work as long as the dlopening" + echo "*** application is linked with the -dlopen flag." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using \`nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** \`nm' from GNU binutils and a full rebuild may help." + fi + if test "$build_old_libs" = no; then + oldlibs="$output_objdir/$libname.$libext" + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + else + echo "*** The inter-library dependencies that have been dropped here will be" + echo "*** automatically added whenever a program is linked with this library" + echo "*** or is declared to -dlopen it." + + if test "$allow_undefined" = no; then + echo + echo "*** Since this library must not contain undefined symbols," + echo "*** because either the platform does not support them or" + echo "*** it was explicitly requested with -no-undefined," + echo "*** libtool will only create a static version of it." + if test "$build_old_libs" = no; then + oldlibs="$output_objdir/$libname.$libext" + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + fi + fi + # Done checking deplibs! + deplibs=$newdeplibs + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + case $host in + *-*-darwin*) + newdeplibs=`$ECHO " $newdeplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + deplibs=`$ECHO " $deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + deplibs="$new_libs" + + # All the library-specific variables (install_libdir is set above). + library_names= + old_library= + dlname= + + # Test again, we may have decided not to build it any more + if test "$build_libtool_libs" = yes; then + if test "$hardcode_into_libs" = yes; then + # Hardcode the library paths + hardcode_libdirs= + dep_rpath= + rpath="$finalize_rpath" + test "$opt_mode" != relink && rpath="$compile_rpath$rpath" + for libdir in $rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + func_replace_sysroot "$libdir" + libdir=$func_replace_sysroot_result + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append dep_rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_apped perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + if test -n "$hardcode_libdir_flag_spec_ld"; then + eval dep_rpath=\"$hardcode_libdir_flag_spec_ld\" + else + eval dep_rpath=\"$hardcode_libdir_flag_spec\" + fi + fi + if test -n "$runpath_var" && test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" + fi + test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" + fi + + shlibpath="$finalize_shlibpath" + test "$opt_mode" != relink && shlibpath="$compile_shlibpath$shlibpath" + if test -n "$shlibpath"; then + eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" + fi + + # Get the real and link names of the library. + eval shared_ext=\"$shrext_cmds\" + eval library_names=\"$library_names_spec\" + set dummy $library_names + shift + realname="$1" + shift + + if test -n "$soname_spec"; then + eval soname=\"$soname_spec\" + else + soname="$realname" + fi + if test -z "$dlname"; then + dlname=$soname + fi + + lib="$output_objdir/$realname" + linknames= + for link + do + func_append linknames " $link" + done + + # Use standard objects if they are pic + test -z "$pic_flag" && libobjs=`$ECHO "$libobjs" | $SP2NL | $SED "$lo2o" | $NL2SP` + test "X$libobjs" = "X " && libobjs= + + delfiles= + if test -n "$export_symbols" && test -n "$include_expsyms"; then + $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp" + export_symbols="$output_objdir/$libname.uexp" + func_append delfiles " $export_symbols" + fi + + orig_export_symbols= + case $host_os in + cygwin* | mingw* | cegcc*) + if test -n "$export_symbols" && test -z "$export_symbols_regex"; then + # exporting using user supplied symfile + if test "x`$SED 1q $export_symbols`" != xEXPORTS; then + # and it's NOT already a .def file. Must figure out + # which of the given symbols are data symbols and tag + # them as such. So, trigger use of export_symbols_cmds. + # export_symbols gets reassigned inside the "prepare + # the list of exported symbols" if statement, so the + # include_expsyms logic still works. + orig_export_symbols="$export_symbols" + export_symbols= + always_export_symbols=yes + fi + fi + ;; + esac + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then + func_verbose "generating symbol list for \`$libname.la'" + export_symbols="$output_objdir/$libname.exp" + $opt_dry_run || $RM $export_symbols + cmds=$export_symbols_cmds + save_ifs="$IFS"; IFS='~' + for cmd1 in $cmds; do + IFS="$save_ifs" + # Take the normal branch if the nm_file_list_spec branch + # doesn't work or if tool conversion is not needed. + case $nm_file_list_spec~$to_tool_file_cmd in + *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*) + try_normal_branch=yes + eval cmd=\"$cmd1\" + func_len " $cmd" + len=$func_len_result + ;; + *) + try_normal_branch=no + ;; + esac + if test "$try_normal_branch" = yes \ + && { test "$len" -lt "$max_cmd_len" \ + || test "$max_cmd_len" -le -1; } + then + func_show_eval "$cmd" 'exit $?' + skipped_export=false + elif test -n "$nm_file_list_spec"; then + func_basename "$output" + output_la=$func_basename_result + save_libobjs=$libobjs + save_output=$output + output=${output_objdir}/${output_la}.nm + func_to_tool_file "$output" + libobjs=$nm_file_list_spec$func_to_tool_file_result + func_append delfiles " $output" + func_verbose "creating $NM input file list: $output" + for obj in $save_libobjs; do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > "$output" + eval cmd=\"$cmd1\" + func_show_eval "$cmd" 'exit $?' + output=$save_output + libobjs=$save_libobjs + skipped_export=false + else + # The command line is too long to execute in one step. + func_verbose "using reloadable object file for export list..." + skipped_export=: + # Break out early, otherwise skipped_export may be + # set to false by a later but shorter cmd. + break + fi + done + IFS="$save_ifs" + if test -n "$export_symbols_regex" && test "X$skipped_export" != "X:"; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + fi + + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols="$export_symbols" + test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols" + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test "X$skipped_export" != "X:" && test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for \`$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + + tmp_deplibs= + for test_deplib in $deplibs; do + case " $convenience " in + *" $test_deplib "*) ;; + *) + func_append tmp_deplibs " $test_deplib" + ;; + esac + done + deplibs="$tmp_deplibs" + + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec" && + test "$compiler_needs_object" = yes && + test -z "$libobjs"; then + # extract the archives, so we have objects to list. + # TODO: could optimize this to just extract one archive. + whole_archive_flag_spec= + fi + if test -n "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + else + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + fi + + if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then + eval flag=\"$thread_safe_flag_spec\" + func_append linker_flags " $flag" + fi + + # Make a backup of the uninstalled library when relinking + if test "$opt_mode" = relink; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $? + fi + + # Do each of the archive commands. + if test "$module" = yes && test -n "$module_cmds" ; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + eval test_cmds=\"$module_expsym_cmds\" + cmds=$module_expsym_cmds + else + eval test_cmds=\"$module_cmds\" + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + eval test_cmds=\"$archive_expsym_cmds\" + cmds=$archive_expsym_cmds + else + eval test_cmds=\"$archive_cmds\" + cmds=$archive_cmds + fi + fi + + if test "X$skipped_export" != "X:" && + func_len " $test_cmds" && + len=$func_len_result && + test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + : + else + # The command line is too long to link in one step, link piecewise + # or, if using GNU ld and skipped_export is not :, use a linker + # script. + + # Save the value of $output and $libobjs because we want to + # use them later. If we have whole_archive_flag_spec, we + # want to use save_libobjs as it was before + # whole_archive_flag_spec was expanded, because we can't + # assume the linker understands whole_archive_flag_spec. + # This may have to be revisited, in case too many + # convenience libraries get linked in and end up exceeding + # the spec. + if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + fi + save_output=$output + func_basename "$output" + output_la=$func_basename_result + + # Clear the reloadable object creation command queue and + # initialize k to one. + test_cmds= + concat_cmds= + objlist= + last_robj= + k=1 + + if test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "$with_gnu_ld" = yes; then + output=${output_objdir}/${output_la}.lnkscript + func_verbose "creating GNU ld script: $output" + echo 'INPUT (' > $output + for obj in $save_libobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + echo ')' >> $output + func_append delfiles " $output" + func_to_tool_file "$output" + output=$func_to_tool_file_result + elif test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "X$file_list_spec" != X; then + output=${output_objdir}/${output_la}.lnk + func_verbose "creating linker input file list: $output" + : > $output + set x $save_libobjs + shift + firstobj= + if test "$compiler_needs_object" = yes; then + firstobj="$1 " + shift + fi + for obj + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + func_append delfiles " $output" + func_to_tool_file "$output" + output=$firstobj\"$file_list_spec$func_to_tool_file_result\" + else + if test -n "$save_libobjs"; then + func_verbose "creating reloadable object files..." + output=$output_objdir/$output_la-${k}.$objext + eval test_cmds=\"$reload_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + + # Loop over the list of objects to be linked. + for obj in $save_libobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + if test "X$objlist" = X || + test "$len" -lt "$max_cmd_len"; then + func_append objlist " $obj" + else + # The command $test_cmds is almost too long, add a + # command to the queue. + if test "$k" -eq 1 ; then + # The first file doesn't have a previous command to add. + reload_objs=$objlist + eval concat_cmds=\"$reload_cmds\" + else + # All subsequent reloadable object files will link in + # the last one created. + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\$concat_cmds~$reload_cmds~\$RM $last_robj\" + fi + last_robj=$output_objdir/$output_la-${k}.$objext + func_arith $k + 1 + k=$func_arith_result + output=$output_objdir/$output_la-${k}.$objext + objlist=" $obj" + func_len " $last_robj" + func_arith $len0 + $func_len_result + len=$func_arith_result + fi + done + # Handle the remaining objects by creating one last + # reloadable object file. All subsequent reloadable object + # files will link in the last one created. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\${concat_cmds}$reload_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\${concat_cmds}~\$RM $last_robj\" + fi + func_append delfiles " $output" + + else + output= + fi + + if ${skipped_export-false}; then + func_verbose "generating symbol list for \`$libname.la'" + export_symbols="$output_objdir/$libname.exp" + $opt_dry_run || $RM $export_symbols + libobjs=$output + # Append the command to create the export file. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" + fi + fi + + test -n "$save_libobjs" && + func_verbose "creating a temporary reloadable object file: $output" + + # Loop through the commands generated above and execute them. + save_ifs="$IFS"; IFS='~' + for cmd in $concat_cmds; do + IFS="$save_ifs" + $opt_silent || { + func_quote_for_expand "$cmd" + eval "func_echo $func_quote_for_expand_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test "$opt_mode" = relink; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS="$save_ifs" + + if test -n "$export_symbols_regex" && ${skipped_export-false}; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + + if ${skipped_export-false}; then + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols="$export_symbols" + test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols" + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for \`$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + fi + + libobjs=$output + # Restore the value of output. + output=$save_output + + if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + fi + # Expand the library linking commands again to reset the + # value of $libobjs for piecewise linking. + + # Do each of the archive commands. + if test "$module" = yes && test -n "$module_cmds" ; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + cmds=$module_expsym_cmds + else + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + cmds=$archive_expsym_cmds + else + cmds=$archive_cmds + fi + fi + fi + + if test -n "$delfiles"; then + # Append the command to remove temporary files to $cmds. + eval cmds=\"\$cmds~\$RM $delfiles\" + fi + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + + save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + eval cmd=\"$cmd\" + $opt_silent || { + func_quote_for_expand "$cmd" + eval "func_echo $func_quote_for_expand_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test "$opt_mode" = relink; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS="$save_ifs" + + # Restore the uninstalled library and exit + if test "$opt_mode" = relink; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $? + + if test -n "$convenience"; then + if test -z "$whole_archive_flag_spec"; then + func_show_eval '${RM}r "$gentop"' + fi + fi + + exit $EXIT_SUCCESS + fi + + # Create links to the real library. + for linkname in $linknames; do + if test "$realname" != "$linkname"; then + func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?' + fi + done + + # If -module or -export-dynamic was specified, set the dlname. + if test "$module" = yes || test "$export_dynamic" = yes; then + # On all known operating systems, these are identical. + dlname="$soname" + fi + fi + ;; + + obj) + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + func_warning "\`-dlopen' is ignored for objects" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "\`-l' and \`-L' are ignored for objects" ;; + esac + + test -n "$rpath" && \ + func_warning "\`-rpath' is ignored for objects" + + test -n "$xrpath" && \ + func_warning "\`-R' is ignored for objects" + + test -n "$vinfo" && \ + func_warning "\`-version-info' is ignored for objects" + + test -n "$release" && \ + func_warning "\`-release' is ignored for objects" + + case $output in + *.lo) + test -n "$objs$old_deplibs" && \ + func_fatal_error "cannot build library object \`$output' from non-libtool objects" + + libobj=$output + func_lo2o "$libobj" + obj=$func_lo2o_result + ;; + *) + libobj= + obj="$output" + ;; + esac + + # Delete the old objects. + $opt_dry_run || $RM $obj $libobj + + # Objects from convenience libraries. This assumes + # single-version convenience libraries. Whenever we create + # different ones for PIC/non-PIC, this we'll have to duplicate + # the extraction. + reload_conv_objs= + gentop= + # reload_cmds runs $LD directly, so let us get rid of + # -Wl from whole_archive_flag_spec and hope we can get by with + # turning comma into space.. + wl= + + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec"; then + eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\" + reload_conv_objs=$reload_objs\ `$ECHO "$tmp_whole_archive_flags" | $SED 's|,| |g'` + else + gentop="$output_objdir/${obj}x" + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + reload_conv_objs="$reload_objs $func_extract_archives_result" + fi + fi + + # If we're not building shared, we need to use non_pic_objs + test "$build_libtool_libs" != yes && libobjs="$non_pic_objects" + + # Create the old-style object. + reload_objs="$objs$old_deplibs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; /\.lib$/d; $lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test + + output="$obj" + func_execute_cmds "$reload_cmds" 'exit $?' + + # Exit if we aren't doing a library object file. + if test -z "$libobj"; then + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + fi + + if test "$build_libtool_libs" != yes; then + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + # Create an invalid libtool object if no PIC, so that we don't + # accidentally link it into a program. + # $show "echo timestamp > $libobj" + # $opt_dry_run || eval "echo timestamp > $libobj" || exit $? + exit $EXIT_SUCCESS + fi + + if test -n "$pic_flag" || test "$pic_mode" != default; then + # Only do commands if we really have different PIC objects. + reload_objs="$libobjs $reload_conv_objs" + output="$libobj" + func_execute_cmds "$reload_cmds" 'exit $?' + fi + + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + ;; + + prog) + case $host in + *cygwin*) func_stripname '' '.exe' "$output" + output=$func_stripname_result.exe;; + esac + test -n "$vinfo" && \ + func_warning "\`-version-info' is ignored for programs" + + test -n "$release" && \ + func_warning "\`-release' is ignored for programs" + + test "$preload" = yes \ + && test "$dlopen_support" = unknown \ + && test "$dlopen_self" = unknown \ + && test "$dlopen_self_static" = unknown && \ + func_warning "\`LT_INIT([dlopen])' not used. Assuming no dlopen support." + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library is the System framework + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's/ -lc / System.ltframework /'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + case $host in + *-*-darwin*) + # Don't allow lazy linking, it breaks C++ global constructors + # But is supposedly fixed on 10.4 or later (yay!). + if test "$tagname" = CXX ; then + case ${MACOSX_DEPLOYMENT_TARGET-10.0} in + 10.[0123]) + func_append compile_command " ${wl}-bind_at_load" + func_append finalize_command " ${wl}-bind_at_load" + ;; + esac + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $compile_deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $compile_deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + compile_deplibs="$new_libs" + + + func_append compile_command " $compile_deplibs" + func_append finalize_command " $finalize_deplibs" + + if test -n "$rpath$xrpath"; then + # If the user specified any rpath flags, then add them. + for libdir in $rpath $xrpath; do + # This is the magic to use -rpath. + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + fi + + # Now hardcode the library paths + rpath= + hardcode_libdirs= + for libdir in $compile_rpath $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_append perm_rpath " $libdir" ;; + esac + fi + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`${ECHO} "$libdir" | ${SED} -e 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$libdir:"*) ;; + ::) dllsearchpath=$libdir;; + *) func_append dllsearchpath ":$libdir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + compile_rpath="$rpath" + + rpath= + hardcode_libdirs= + for libdir in $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$finalize_perm_rpath " in + *" $libdir "*) ;; + *) func_append finalize_perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + finalize_rpath="$rpath" + + if test -n "$libobjs" && test "$build_old_libs" = yes; then + # Transform all the library objects into standard objects. + compile_command=`$ECHO "$compile_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + finalize_command=`$ECHO "$finalize_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + fi + + func_generate_dlsyms "$outputname" "@PROGRAM@" "no" + + # template prelinking step + if test -n "$prelink_cmds"; then + func_execute_cmds "$prelink_cmds" 'exit $?' + fi + + wrappers_required=yes + case $host in + *cegcc* | *mingw32ce*) + # Disable wrappers for cegcc and mingw32ce hosts, we are cross compiling anyway. + wrappers_required=no + ;; + *cygwin* | *mingw* ) + if test "$build_libtool_libs" != yes; then + wrappers_required=no + fi + ;; + *) + if test "$need_relink" = no || test "$build_libtool_libs" != yes; then + wrappers_required=no + fi + ;; + esac + if test "$wrappers_required" = no; then + # Replace the output file specification. + compile_command=`$ECHO "$compile_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + link_command="$compile_command$compile_rpath" + + # We have no uninstalled library dependencies, so finalize right now. + exit_status=0 + func_show_eval "$link_command" 'exit_status=$?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Delete the generated files. + if test -f "$output_objdir/${outputname}S.${objext}"; then + func_show_eval '$RM "$output_objdir/${outputname}S.${objext}"' + fi + + exit $exit_status + fi + + if test -n "$compile_shlibpath$finalize_shlibpath"; then + compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" + fi + if test -n "$finalize_shlibpath"; then + finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" + fi + + compile_var= + finalize_var= + if test -n "$runpath_var"; then + if test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + compile_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + if test -n "$finalize_perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $finalize_perm_rpath; do + func_append rpath "$dir:" + done + finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + fi + + if test "$no_install" = yes; then + # We don't need to create a wrapper script. + link_command="$compile_var$compile_command$compile_rpath" + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + # Delete the old output file. + $opt_dry_run || $RM $output + # Link the executable and exit + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + exit $EXIT_SUCCESS + fi + + if test "$hardcode_action" = relink; then + # Fast installation is not supported + link_command="$compile_var$compile_command$compile_rpath" + relink_command="$finalize_var$finalize_command$finalize_rpath" + + func_warning "this platform does not like uninstalled shared libraries" + func_warning "\`$output' will be relinked during installation" + else + if test "$fast_install" != no; then + link_command="$finalize_var$compile_command$finalize_rpath" + if test "$fast_install" = yes; then + relink_command=`$ECHO "$compile_var$compile_command$compile_rpath" | $SED 's%@OUTPUT@%\$progdir/\$file%g'` + else + # fast_install is set to needless + relink_command= + fi + else + link_command="$compile_var$compile_command$compile_rpath" + relink_command="$finalize_var$finalize_command$finalize_rpath" + fi + fi + + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` + + # Delete the old output files. + $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname + + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output_objdir/$outputname" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Now create the wrapper script. + func_verbose "creating $output" + + # Quote the relink command for shipping. + if test -n "$relink_command"; then + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_for_eval "$var_value" + relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" + fi + done + relink_command="(cd `pwd`; $relink_command)" + relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` + fi + + # Only actually do things if not in dry run mode. + $opt_dry_run || { + # win32 will think the script is a binary if it has + # a .exe suffix, so we strip it off here. + case $output in + *.exe) func_stripname '' '.exe' "$output" + output=$func_stripname_result ;; + esac + # test for cygwin because mv fails w/o .exe extensions + case $host in + *cygwin*) + exeext=.exe + func_stripname '' '.exe' "$outputname" + outputname=$func_stripname_result ;; + *) exeext= ;; + esac + case $host in + *cygwin* | *mingw* ) + func_dirname_and_basename "$output" "" "." + output_name=$func_basename_result + output_path=$func_dirname_result + cwrappersource="$output_path/$objdir/lt-$output_name.c" + cwrapper="$output_path/$output_name.exe" + $RM $cwrappersource $cwrapper + trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15 + + func_emit_cwrapperexe_src > $cwrappersource + + # The wrapper executable is built using the $host compiler, + # because it contains $host paths and files. If cross- + # compiling, it, like the target executable, must be + # executed on the $host or under an emulation environment. + $opt_dry_run || { + $LTCC $LTCFLAGS -o $cwrapper $cwrappersource + $STRIP $cwrapper + } + + # Now, create the wrapper script for func_source use: + func_ltwrapper_scriptname $cwrapper + $RM $func_ltwrapper_scriptname_result + trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15 + $opt_dry_run || { + # note: this script will not be executed, so do not chmod. + if test "x$build" = "x$host" ; then + $cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result + else + func_emit_wrapper no > $func_ltwrapper_scriptname_result + fi + } + ;; + * ) + $RM $output + trap "$RM $output; exit $EXIT_FAILURE" 1 2 15 + + func_emit_wrapper no > $output + chmod +x $output + ;; + esac + } + exit $EXIT_SUCCESS + ;; + esac + + # See if we need to build an old-fashioned archive. + for oldlib in $oldlibs; do + + if test "$build_libtool_libs" = convenience; then + oldobjs="$libobjs_save $symfileobj" + addlibs="$convenience" + build_libtool_libs=no + else + if test "$build_libtool_libs" = module; then + oldobjs="$libobjs_save" + build_libtool_libs=no + else + oldobjs="$old_deplibs $non_pic_objects" + if test "$preload" = yes && test -f "$symfileobj"; then + func_append oldobjs " $symfileobj" + fi + fi + addlibs="$old_convenience" + fi + + if test -n "$addlibs"; then + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + + func_extract_archives $gentop $addlibs + func_append oldobjs " $func_extract_archives_result" + fi + + # Do each command in the archive commands. + if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then + cmds=$old_archive_from_new_cmds + else + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append oldobjs " $func_extract_archives_result" + fi + + # POSIX demands no paths to be encoded in archives. We have + # to avoid creating archives with duplicate basenames if we + # might have to extract them afterwards, e.g., when creating a + # static archive out of a convenience library, or when linking + # the entirety of a libtool archive into another (currently + # not supported by libtool). + if (for obj in $oldobjs + do + func_basename "$obj" + $ECHO "$func_basename_result" + done | sort | sort -uc >/dev/null 2>&1); then + : + else + echo "copying selected object files to avoid basename conflicts..." + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + func_mkdir_p "$gentop" + save_oldobjs=$oldobjs + oldobjs= + counter=1 + for obj in $save_oldobjs + do + func_basename "$obj" + objbase="$func_basename_result" + case " $oldobjs " in + " ") oldobjs=$obj ;; + *[\ /]"$objbase "*) + while :; do + # Make sure we don't pick an alternate name that also + # overlaps. + newobj=lt$counter-$objbase + func_arith $counter + 1 + counter=$func_arith_result + case " $oldobjs " in + *[\ /]"$newobj "*) ;; + *) if test ! -f "$gentop/$newobj"; then break; fi ;; + esac + done + func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj" + func_append oldobjs " $gentop/$newobj" + ;; + *) func_append oldobjs " $obj" ;; + esac + done + fi + eval cmds=\"$old_archive_cmds\" + + func_len " $cmds" + len=$func_len_result + if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + cmds=$old_archive_cmds + elif test -n "$archiver_list_spec"; then + func_verbose "using command file archive linking..." + for obj in $oldobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > $output_objdir/$libname.libcmd + func_to_tool_file "$output_objdir/$libname.libcmd" + oldobjs=" $archiver_list_spec$func_to_tool_file_result" + cmds=$old_archive_cmds + else + # the command line is too long to link in one step, link in parts + func_verbose "using piecewise archive linking..." + save_RANLIB=$RANLIB + RANLIB=: + objlist= + concat_cmds= + save_oldobjs=$oldobjs + oldobjs= + # Is there a better way of finding the last object in the list? + for obj in $save_oldobjs + do + last_oldobj=$obj + done + eval test_cmds=\"$old_archive_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + for obj in $save_oldobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + func_append objlist " $obj" + if test "$len" -lt "$max_cmd_len"; then + : + else + # the above command should be used before it gets too long + oldobjs=$objlist + if test "$obj" = "$last_oldobj" ; then + RANLIB=$save_RANLIB + fi + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\${concat_cmds}$old_archive_cmds\" + objlist= + len=$len0 + fi + done + RANLIB=$save_RANLIB + oldobjs=$objlist + if test "X$oldobjs" = "X" ; then + eval cmds=\"\$concat_cmds\" + else + eval cmds=\"\$concat_cmds~\$old_archive_cmds\" + fi + fi + fi + func_execute_cmds "$cmds" 'exit $?' + done + + test -n "$generated" && \ + func_show_eval "${RM}r$generated" + + # Now create the libtool archive. + case $output in + *.la) + old_library= + test "$build_old_libs" = yes && old_library="$libname.$libext" + func_verbose "creating $output" + + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_for_eval "$var_value" + relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" + fi + done + # Quote the link command for shipping. + relink_command="(cd `pwd`; $SHELL $progpath $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)" + relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` + if test "$hardcode_automatic" = yes ; then + relink_command= + fi + + # Only create the output if not a dry run. + $opt_dry_run || { + for installed in no yes; do + if test "$installed" = yes; then + if test -z "$install_libdir"; then + break + fi + output="$output_objdir/$outputname"i + # Replace all uninstalled libtool libraries with the installed ones + newdependency_libs= + for deplib in $dependency_libs; do + case $deplib in + *.la) + func_basename "$deplib" + name="$func_basename_result" + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` + test -z "$libdir" && \ + func_fatal_error "\`$deplib' is not a valid libtool archive" + func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name" + ;; + -L*) + func_stripname -L '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -L$func_replace_sysroot_result" + ;; + -R*) + func_stripname -R '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -R$func_replace_sysroot_result" + ;; + *) func_append newdependency_libs " $deplib" ;; + esac + done + dependency_libs="$newdependency_libs" + newdlfiles= + + for lib in $dlfiles; do + case $lib in + *.la) + func_basename "$lib" + name="$func_basename_result" + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "\`$lib' is not a valid libtool archive" + func_append newdlfiles " ${lt_sysroot:+=}$libdir/$name" + ;; + *) func_append newdlfiles " $lib" ;; + esac + done + dlfiles="$newdlfiles" + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + *.la) + # Only pass preopened files to the pseudo-archive (for + # eventual linking with the app. that links it) if we + # didn't already link the preopened objects directly into + # the library: + func_basename "$lib" + name="$func_basename_result" + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "\`$lib' is not a valid libtool archive" + func_append newdlprefiles " ${lt_sysroot:+=}$libdir/$name" + ;; + esac + done + dlprefiles="$newdlprefiles" + else + newdlfiles= + for lib in $dlfiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlfiles " $abs" + done + dlfiles="$newdlfiles" + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlprefiles " $abs" + done + dlprefiles="$newdlprefiles" + fi + $RM $output + # place dlname in correct position for cygwin + # In fact, it would be nice if we could use this code for all target + # systems that can't hard-code library paths into their executables + # and that have no shared library path variable independent of PATH, + # but it turns out we can't easily determine that from inspecting + # libtool variables, so we have to hard-code the OSs to which it + # applies here; at the moment, that means platforms that use the PE + # object format with DLL files. See the long comment at the top of + # tests/bindir.at for full details. + tdlname=$dlname + case $host,$output,$installed,$module,$dlname in + *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll) + # If a -bindir argument was supplied, place the dll there. + if test "x$bindir" != x ; + then + func_relative_path "$install_libdir" "$bindir" + tdlname=$func_relative_path_result$dlname + else + # Otherwise fall back on heuristic. + tdlname=../bin/$dlname + fi + ;; + esac + $ECHO > $output "\ +# $outputname - a libtool library file +# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='$tdlname' + +# Names of this library. +library_names='$library_names' + +# The name of the static archive. +old_library='$old_library' + +# Linker flags that can not go in dependency_libs. +inherited_linker_flags='$new_inherited_linker_flags' + +# Libraries that this one depends upon. +dependency_libs='$dependency_libs' + +# Names of additional weak libraries provided by this library +weak_library_names='$weak_libs' + +# Version information for $libname. +current=$current +age=$age +revision=$revision + +# Is this an already installed library? +installed=$installed + +# Should we warn about portability when linking against -modules? +shouldnotlink=$module + +# Files to dlopen/dlpreopen +dlopen='$dlfiles' +dlpreopen='$dlprefiles' + +# Directory that this library needs to be installed in: +libdir='$install_libdir'" + if test "$installed" = no && test "$need_relink" = yes; then + $ECHO >> $output "\ +relink_command=\"$relink_command\"" + fi + done + } + + # Do a symbolic link so that the libtool archive can be found in + # LD_LIBRARY_PATH before the program is installed. + func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?' + ;; + esac + exit $EXIT_SUCCESS +} + +{ test "$opt_mode" = link || test "$opt_mode" = relink; } && + func_mode_link ${1+"$@"} + + +# func_mode_uninstall arg... +func_mode_uninstall () +{ + $opt_debug + RM="$nonopt" + files= + rmforce= + exit_status=0 + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic="$magic" + + for arg + do + case $arg in + -f) func_append RM " $arg"; rmforce=yes ;; + -*) func_append RM " $arg" ;; + *) func_append files " $arg" ;; + esac + done + + test -z "$RM" && \ + func_fatal_help "you must specify an RM program" + + rmdirs= + + for file in $files; do + func_dirname "$file" "" "." + dir="$func_dirname_result" + if test "X$dir" = X.; then + odir="$objdir" + else + odir="$dir/$objdir" + fi + func_basename "$file" + name="$func_basename_result" + test "$opt_mode" = uninstall && odir="$dir" + + # Remember odir for removal later, being careful to avoid duplicates + if test "$opt_mode" = clean; then + case " $rmdirs " in + *" $odir "*) ;; + *) func_append rmdirs " $odir" ;; + esac + fi + + # Don't error if the file doesn't exist and rm -f was used. + if { test -L "$file"; } >/dev/null 2>&1 || + { test -h "$file"; } >/dev/null 2>&1 || + test -f "$file"; then + : + elif test -d "$file"; then + exit_status=1 + continue + elif test "$rmforce" = yes; then + continue + fi + + rmfiles="$file" + + case $name in + *.la) + # Possibly a libtool archive, so verify it. + if func_lalib_p "$file"; then + func_source $dir/$name + + # Delete the libtool libraries and symlinks. + for n in $library_names; do + func_append rmfiles " $odir/$n" + done + test -n "$old_library" && func_append rmfiles " $odir/$old_library" + + case "$opt_mode" in + clean) + case " $library_names " in + *" $dlname "*) ;; + *) test -n "$dlname" && func_append rmfiles " $odir/$dlname" ;; + esac + test -n "$libdir" && func_append rmfiles " $odir/$name $odir/${name}i" + ;; + uninstall) + if test -n "$library_names"; then + # Do each command in the postuninstall commands. + func_execute_cmds "$postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1' + fi + + if test -n "$old_library"; then + # Do each command in the old_postuninstall commands. + func_execute_cmds "$old_postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1' + fi + # FIXME: should reinstall the best remaining shared library. + ;; + esac + fi + ;; + + *.lo) + # Possibly a libtool object, so verify it. + if func_lalib_p "$file"; then + + # Read the .lo file + func_source $dir/$name + + # Add PIC object to the list of files to remove. + if test -n "$pic_object" && + test "$pic_object" != none; then + func_append rmfiles " $dir/$pic_object" + fi + + # Add non-PIC object to the list of files to remove. + if test -n "$non_pic_object" && + test "$non_pic_object" != none; then + func_append rmfiles " $dir/$non_pic_object" + fi + fi + ;; + + *) + if test "$opt_mode" = clean ; then + noexename=$name + case $file in + *.exe) + func_stripname '' '.exe' "$file" + file=$func_stripname_result + func_stripname '' '.exe' "$name" + noexename=$func_stripname_result + # $file with .exe has already been added to rmfiles, + # add $file without .exe + func_append rmfiles " $file" + ;; + esac + # Do a test to see if this is a libtool program. + if func_ltwrapper_p "$file"; then + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + relink_command= + func_source $func_ltwrapper_scriptname_result + func_append rmfiles " $func_ltwrapper_scriptname_result" + else + relink_command= + func_source $dir/$noexename + fi + + # note $name still contains .exe if it was in $file originally + # as does the version of $file that was added into $rmfiles + func_append rmfiles " $odir/$name $odir/${name}S.${objext}" + if test "$fast_install" = yes && test -n "$relink_command"; then + func_append rmfiles " $odir/lt-$name" + fi + if test "X$noexename" != "X$name" ; then + func_append rmfiles " $odir/lt-${noexename}.c" + fi + fi + fi + ;; + esac + func_show_eval "$RM $rmfiles" 'exit_status=1' + done + + # Try to remove the ${objdir}s in the directories where we deleted files + for dir in $rmdirs; do + if test -d "$dir"; then + func_show_eval "rmdir $dir >/dev/null 2>&1" + fi + done + + exit $exit_status +} + +{ test "$opt_mode" = uninstall || test "$opt_mode" = clean; } && + func_mode_uninstall ${1+"$@"} + +test -z "$opt_mode" && { + help="$generic_help" + func_fatal_help "you must specify a MODE" +} + +test -z "$exec_cmd" && \ + func_fatal_help "invalid operation mode \`$opt_mode'" + +if test -n "$exec_cmd"; then + eval exec "$exec_cmd" + exit $EXIT_FAILURE +fi + +exit $exit_status + + +# The TAGs below are defined such that we never get into a situation +# in which we disable both kinds of libraries. Given conflicting +# choices, we go for a static library, that is the most portable, +# since we can't tell whether shared libraries were disabled because +# the user asked for that or because the platform doesn't support +# them. This is particularly important on AIX, because we don't +# support having both static and shared libraries enabled at the same +# time on that platform, so we default to a shared-only configuration. +# If a disable-shared tag is given, we'll fallback to a static-only +# configuration. But we'll never go from static-only to shared-only. + +# ### BEGIN LIBTOOL TAG CONFIG: disable-shared +build_libtool_libs=no +build_old_libs=yes +# ### END LIBTOOL TAG CONFIG: disable-shared + +# ### BEGIN LIBTOOL TAG CONFIG: disable-static +build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac` +# ### END LIBTOOL TAG CONFIG: disable-static + +# Local Variables: +# mode:shell-script +# sh-indentation:2 +# End: +# vi:sw=2 + diff --git a/dist/pubdef.in b/dist/pubdef.in new file mode 100644 index 00000000..aef8b298 --- /dev/null +++ b/dist/pubdef.in @@ -0,0 +1,545 @@ +# $Id: pubdef.in,v 4d4a04145f28 2010/07/28 15:20:45 ben $ +# +# Name +# D == documentation +# I == include file +# J == Java constant +# N == wrapped by the Java native layer +# C == C# constant +DB_AFTER D I J C +DB_AGGRESSIVE D I J C +DB_ALIGN8 * I * * +DB_ALREADY_ABORTED * I * * +DB_AM_CHKSUM * I * * +DB_AM_COMPENSATE * I * * +DB_AM_COMPRESS * I * * +DB_AM_CREATED * I * * +DB_AM_CREATED_MSTR * I * * +DB_AM_DBM_ERROR * I * * +DB_AM_DELIMITER * I * * +DB_AM_DISCARD * I * * +DB_AM_DUP * I * * +DB_AM_DUPSORT * I * * +DB_AM_ENCRYPT * I * * +DB_AM_FIXEDLEN * I * * +DB_AM_INMEM * I * * +DB_AM_INORDER * I * * +DB_AM_IN_RENAME * I * * +DB_AM_NOT_DURABLE * I * * +DB_AM_OPEN_CALLED * I * * +DB_AM_PAD * I * * +DB_AM_PGDEF * I * * +DB_AM_RDONLY * I * * +DB_AM_READ_UNCOMMITTED * I * * +DB_AM_RECNUM * I * * +DB_AM_RECOVER * I * * +DB_AM_RENUMBER * I * * +DB_AM_REVSPLITOFF * I * * +DB_AM_SECONDARY * I * * +DB_AM_SNAPSHOT * I * * +DB_AM_SUBDB * I * * +DB_AM_SWAP * I * * +DB_AM_TXN * I * * +DB_AM_VERIFYING * I * * +DB_APPEND D I J C +DB_ARCH_ABS D I J C +DB_ARCH_DATA D I J C +DB_ARCH_LOG D I J C +DB_ARCH_REMOVE D I J C +DB_ASSOC_IMMUTABLE_KEY * I J C +DB_ASSOC_CREATE * I J C +DB_AUTO_COMMIT D I J C +DB_BEFORE D I J C +DB_BOOTSTRAP_HELPER D I J C +DB_BTREE D I J C +DB_BTREEMAGIC * I * * +DB_BTREEOLDVER * I * * +DB_BTREEVERSION * I * * +DB_BUFFER_SMALL D I J C +DB_CDB_ALLDB D I J C +DB_CHKSUM D I J C +DB_CKP_INTERNAL * I * * +DB_CONFIG D * * * +DB_CONSUME D I J C +DB_CONSUME_WAIT D I J C +DB_CREATE D I J C +DB_CURRENT D I J C +DB_CURSOR_BULK D I J C +DB_CURSOR_TRANSIENT * I * * +DB_CXX_NO_EXCEPTIONS D I * * +DB_DATABASE_LOCK * I * * +DB_DATABASE_LOCKING * I * * +DB_DBM_HSEARCH * I * * +DB_DBT_APPMALLOC D I N C +DB_DBT_BULK D I J C +DB_DBT_DUPOK * I * * +DB_DBT_ISSET * I * * +DB_DBT_MALLOC D I J C +DB_DBT_MULTIPLE D I N C +DB_DBT_PARTIAL D I J C +DB_DBT_READONLY D I J C +DB_DBT_REALLOC D I N C +DB_DBT_STREAMING D I * * +DB_DBT_USERCOPY * I N C +DB_DBT_USERMEM D I J C +DB_DEGREE_2 * I * * +DB_DELETED * I * * +DB_DIRECT D I * * +DB_DIRECT_DB D I J C +DB_DIRTY_READ * I * * +DB_DONOTINDEX D I N C +DB_DSYNC_DB D I J C +DB_DUP D I J C +DB_DUPSORT D I J C +DB_DURABLE_UNKNOWN * I * * +DB_EID_BROADCAST D I J C +DB_EID_INVALID D I J C +DB_EID_MASTER D I J C +DB_ENCRYPT D I J C +DB_ENCRYPT_AES D I J C +DB_ENV_AUTO_COMMIT * I * * +DB_ENV_CDB_ALLDB * I * * +DB_ENV_DATABASE_LOCKING * I * * +DB_ENV_DIRECT_DB * I * * +DB_ENV_DSYNC_DB * I * * +DB_ENV_FAILCHK * I * * +DB_ENV_MULTIVERSION * I * * +DB_ENV_NOLOCKING * I * * +DB_ENV_NOMMAP * I * * +DB_ENV_NOPANIC * I * * +DB_ENV_OVERWRITE * I * * +DB_ENV_REGION_INIT * I * * +DB_ENV_TIME_NOTGRANTED * I * * +DB_ENV_TXN_NOSYNC * I * * +DB_ENV_TXN_NOWAIT * I * * +DB_ENV_TXN_SNAPSHOT * I * * +DB_ENV_TXN_WRITE_NOSYNC * I * * +DB_ENV_YIELDCPU * I * * +DB_EVENT_NOT_HANDLED * I * * +DB_EVENT_NO_SUCH_EVENT * I * * +DB_EVENT_PANIC D I N C +DB_EVENT_REG_ALIVE D I * * +DB_EVENT_REG_PANIC D I * * +DB_EVENT_REP_CLIENT D I N C +DB_EVENT_REP_CONNECT_BROKEN D I N C +DB_EVENT_REP_CONNECT_ESTD D I N C +DB_EVENT_REP_CONNECT_TRY_FAILED D I N C +DB_EVENT_REP_DUPMASTER D I N C +DB_EVENT_REP_ELECTED D I N C +DB_EVENT_REP_ELECTION_FAILED D I N C +DB_EVENT_REP_INIT_DONE D I N C +DB_EVENT_REP_JOIN_FAILURE D I N C +DB_EVENT_REP_LOCAL_SITE_REMOVED D I N C +DB_EVENT_REP_MASTER D I N C +DB_EVENT_REP_MASTER_FAILURE D I N C +DB_EVENT_REP_NEWMASTER D I N C +DB_EVENT_REP_PERM_FAILED D I N C +DB_EVENT_REP_SITE_ADDED D I N C +DB_EVENT_REP_SITE_REMOVED D I N C +DB_EVENT_REP_STARTUPDONE D I N C +DB_EVENT_REP_WOULD_ROLLBACK * I * * +DB_EVENT_WRITE_FAILED D I N C +DB_EXCL D I J C +DB_EXTENT * I * * +DB_FAILCHK D I * * +DB_FAILCHK_ISALIVE * I * * +DB_FAST_STAT D I J C +DB_FCNTL_LOCKING * I * * +DB_FILE_ID_LEN * I * * +DB_FIRST D I J C +DB_FLUSH D I J C +DB_FORCE D I J C +DB_FORCESYNC D I J C +DB_FOREIGN_ABORT * I J C +DB_FOREIGN_CASCADE * I J C +DB_FOREIGN_CONFLICT * I * C +DB_FOREIGN_NULLIFY * I J C +DB_FREELIST_ONLY D I J C +DB_FREE_SPACE D I J C +DB_GET_BOTH D I J C +DB_GET_BOTHC * I * * +DB_GET_BOTH_LTE D I * * +DB_GET_BOTH_RANGE D I J C +DB_GET_RECNO D I J C +DB_GID_SIZE * I N C +DB_GROUP_CREATOR D I J C +DB_HANDLE_LOCK * I * * +DB_HASH D I J C +DB_HASHMAGIC * I * * +DB_HASHOLDVER * I * * +DB_HASHVERSION * I * * +DB_HEAP D I J C +DB_HEAPMAGIC * I * * +DB_HEAPOLDVER * I * * +DB_HEAPVERSION * I * * +DB_HEAP_FULL D I J C +DB_HEAP_RID_SZ D I * * +DB_HOME D * * * +DB_HOTBACKUP_IN_PROGRESS D I J C +DB_IGNORE_LEASE D I J C +DB_IMMUTABLE_KEY D I J C +DB_INIT_CDB D I J C +DB_INIT_LOCK D I J C +DB_INIT_LOG D I J C +DB_INIT_MPOOL D I J C +DB_INIT_MUTEX * I * * +DB_INIT_REP D I J C +DB_INIT_TXN D I J C +DB_INORDER D I J C +DB_INTERNAL_DB * I * * +DB_JOINENV * I J C +DB_JOIN_ITEM D I J C +DB_JOIN_NOSORT D I J C +DB_KEYEMPTY D I J C +DB_KEYEXIST D I J C +DB_KEYFIRST D I J C +DB_KEYLAST D I J C +DB_LAST D I J C +DB_LEGACY D I J C +DB_LOCAL_SITE D I J C +DB_LOCKDOWN D I J C +DB_LOCKVERSION * I * * +DB_LOCK_CHECK * I * * +DB_LOCK_DEADLOCK D I N C +DB_LOCK_DEFAULT D I J C +DB_LOCK_DUMP * I * * +DB_LOCK_EXPIRE D I J C +DB_LOCK_GET D I J C +DB_LOCK_GET_TIMEOUT D I J C +DB_LOCK_INHERIT * I * * +DB_LOCK_IREAD D I J C +DB_LOCK_IWR D I J C +DB_LOCK_IWRITE D I J C +DB_LOCK_MAXLOCKS D I J C +DB_LOCK_MAXWRITE D I J C +DB_LOCK_MINLOCKS D I J C +DB_LOCK_MINWRITE D I J C +DB_LOCK_NG * I * * +DB_LOCK_NORUN * I * * +DB_LOCK_NOTGRANTED D I J C +DB_LOCK_NOWAIT D I J C +DB_LOCK_OLDEST D I J C +DB_LOCK_PUT D I J C +DB_LOCK_PUT_ALL D I J C +DB_LOCK_PUT_OBJ D I J C +DB_LOCK_PUT_READ * I * * +DB_LOCK_RANDOM D I J C +DB_LOCK_READ D I J C +DB_LOCK_READ_UNCOMMITTED * I * * +DB_LOCK_RECORD * I * * +DB_LOCK_SET_TIMEOUT * I * * +DB_LOCK_SWITCH * I * * +DB_LOCK_TIMEOUT D I J C +DB_LOCK_TRADE * I * * +DB_LOCK_UPGRADE * I * * +DB_LOCK_UPGRADE_WRITE * I * * +DB_LOCK_WAIT * I * * +DB_LOCK_WRITE D I J C +DB_LOCK_WWRITE * I * * +DB_LOCK_YOUNGEST D I J C +DB_LOGCHKSUM D I * * +DB_LOGFILEID_INVALID * I * * +DB_LOGMAGIC * I * * +DB_LOGOLDVER * I * * +DB_LOGVERSION * I * * +DB_LOGVERSION_LATCHING D I * * +DB_LOG_AUTO_REMOVE D I J C +DB_LOG_BUFFER_FULL D I * C +DB_LOG_CHKPNT * I * * +DB_LOG_COMMIT * I * * +DB_LOG_DIRECT D I J C +DB_LOG_DISK * I * * +DB_LOG_DSYNC D I J C +DB_LOG_IN_MEMORY D I J C +DB_LOG_LOCKED * I * * +DB_LOG_NOCOPY * I * * +DB_LOG_NO_DATA * I * * +DB_LOG_NOT_DURABLE * I * * +DB_LOG_SILENT_ERR * I * * +DB_LOG_WRNOSYNC * I * * +DB_LOG_VERIFY_BAD D I * * +DB_LOG_VERIFY_CAF D I * * +DB_LOG_VERIFY_ERR D I * * +DB_LOG_VERIFY_PARTIAL * I * * +DB_LOG_VERIFY_DBFILE * I * * +DB_LOG_VERIFY_FORWARD * I * * +DB_LOG_VERIFY_INTERR D I * * +DB_LOG_VERIFY_WARNING D I * * +DB_LOG_VERIFY_VERBOSE D I * * +DB_LOG_ZERO D I J C +DB_LSTAT_ABORTED * I * * +DB_LSTAT_EXPIRED * I * * +DB_LSTAT_FREE * I * * +DB_LSTAT_HELD * I * * +DB_LSTAT_PENDING * I * * +DB_LSTAT_WAITING * I * * +DB_MAX_PAGES * I * * +DB_MAX_RECORDS * I * * +DB_MEM_LOCK D I J C +DB_MEM_LOCKOBJECT D I J C +DB_MEM_LOCKER D I J C +DB_MEM_LOGID D I J C +DB_MEM_TRANSACTION D I J C +DB_MEM_THREAD D I J C +DB_MPOOL_CREATE D I * * +DB_MPOOL_DIRTY D I * * +DB_MPOOL_TRY D I * * +DB_MPOOL_DISCARD * I * * +DB_MPOOL_EDIT D I * * +DB_MPOOL_FREE * I * * +DB_MPOOL_LAST D I * * +DB_MPOOL_NEW D I * * +DB_MPOOL_NOFILE D I J C +DB_MPOOL_NOLOCK * I * * +DB_MPOOL_UNLINK D I J C +DB_MULTIPLE D I J C +DB_MULTIPLE_INIT D I * * +DB_MULTIPLE_KEY D I J C +DB_MULTIPLE_KEY_NEXT D I * * +DB_MULTIPLE_KEY_RESERVE_NEXT D I * * +DB_MULTIPLE_KEY_WRITE_NEXT D I * * +DB_MULTIPLE_NEXT D I * * +DB_MULTIPLE_RECNO_NEXT D I * * +DB_MULTIPLE_RECNO_RESERVE_NEXT D I * * +DB_MULTIPLE_RECNO_WRITE_INIT D I * * +DB_MULTIPLE_RECNO_WRITE_NEXT D I * * +DB_MULTIPLE_RESERVE_NEXT D I * * +DB_MULTIPLE_WRITE_INIT D I * * +DB_MULTIPLE_WRITE_NEXT D I * * +DB_MULTIVERSION D I J C +DB_MUTEX_ALLOCATED * I * * +DB_MUTEX_LOCKED * I * * +DB_MUTEX_LOGICAL_LOCK * I * * +DB_MUTEX_PROCESS_ONLY D I * C +DB_MUTEX_SELF_BLOCK D I * C +DB_MUTEX_SHARED D I * * +DB_NEEDSPLIT * I * * +DB_NEXT D I J C +DB_NEXT_DUP D I J C +DB_NEXT_NODUP D I J C +DB_NODUPDATA D I J C +DB_NOERROR * I * * +DB_NOFLUSH * I * * +DB_NOLOCKING D I J C +DB_NOMMAP D I J C +DB_NOORDERCHK D I J C +DB_NOOVERWRITE D I J C +DB_NOPANIC D I J C +DB_NOSERVER D I * C +DB_NOSYNC D I J C +DB_NOTFOUND D I J C +DB_NO_AUTO_COMMIT * I * * +DB_NO_CHECKPOINT * I * * +DB_ODDFILESIZE D I * * +DB_OK_BTREE * I * * +DB_OK_HASH * I * * +DB_OK_HEAP * I * * +DB_OK_QUEUE * I * * +DB_OK_RECNO * I * * +DB_OLD_VERSION D I * C +DB_OPFLAGS_MASK * I * * +DB_ORDERCHKONLY D I J C +DB_OVERWRITE D I J C +DB_OVERWRITE_DUP D I * * +DB_PAGE_LOCK * I * * +DB_PAGE_NOTFOUND D I * C +DB_PANIC_ENVIRONMENT D I J C +DB_POSITION D I J C +DB_PREV D I J C +DB_PREV_DUP D I J C +DB_PREV_NODUP D I J C +DB_PRINTABLE D I J C +DB_PRIORITY_DEFAULT D I J C +DB_PRIORITY_HIGH D I J C +DB_PRIORITY_LOW D I J C +DB_PRIORITY_UNCHANGED D I * * +DB_PRIORITY_VERY_HIGH D I J C +DB_PRIORITY_VERY_LOW D I J C +DB_PRIVATE D I J C +DB_PR_PAGE * I * * +DB_PR_RECOVERYTEST * I * * +DB_QAMMAGIC * I * * +DB_QAMOLDVER * I * * +DB_QAMVERSION * I * * +DB_QUEUE D I J C +DB_RDONLY D I J C +DB_RDWRMASTER * I * * +DB_READ_COMMITTED D I J C +DB_READ_UNCOMMITTED D I J C +DB_RECNO D I J C +DB_RECNUM D I J C +DB_RECORD_LOCK * I * * +DB_RECOVER D I J C +DB_RECOVER_FATAL D I J C +DB_REDO * I * * +DB_REGION_INIT D I J C +DB_REGION_MAGIC * I * * +DB_REGISTER D I J C +DB_RENAMEMAGIC * I * * +DB_RENUMBER D I J C +DB_REP_CONF_AUTOROLLBACK * I * * +DB_REP_WOULDROLLBACK * I * * +DB_REPMGR_ACKS_ALL D I J C +DB_REPMGR_ACKS_ALL_AVAILABLE D I J C +DB_REPMGR_ACKS_ALL_PEERS D I J C +DB_REPMGR_ACKS_NONE D I J C +DB_REPMGR_ACKS_ONE D I J C +DB_REPMGR_ACKS_ONE_PEER D I J C +DB_REPMGR_ACKS_QUORUM D I J C +DB_REPMGR_CONF_2SITE_STRICT D I J C +DB_REPMGR_CONF_ELECTIONS D I J C +DB_REPMGR_CONNECTED D I J C +DB_REPMGR_DISCONNECTED D I J C +DB_REPMGR_NEED_RESPONSE D I J C +DB_REPMGR_ISPEER D I J * +DB_REPMGR_PEER D I J C +DB_REP_ACK_TIMEOUT D I J C +DB_REP_ANYWHERE D I J C +DB_REP_BULKOVF * I * * +DB_REP_CHECKPOINT_DELAY D I J C +DB_REP_CLIENT D I J C +DB_REP_CONF_AUTOINIT D I J C +DB_REP_CONF_BULK D I J C +DB_REP_CONF_DELAYCLIENT D I J C +DB_REP_CONF_INMEM D I J C +DB_REP_CONF_LEASE D I J C +DB_REP_CONF_NOWAIT D I J C +DB_REP_CONNECTION_RETRY D I J C +DB_REP_DEFAULT_PRIORITY * I J C +DB_REP_DUPMASTER D I N C +DB_REP_ELECTION D I J C +DB_REP_ELECTION_RETRY D I J C +DB_REP_ELECTION_TIMEOUT D I J C +DB_REP_FULL_ELECTION_TIMEOUT D I J C +DB_REP_HANDLE_DEAD D I N C +DB_REP_HEARTBEAT_MONITOR D I J C +DB_REP_HEARTBEAT_SEND D I J C +DB_REP_HOLDELECTION D I N C +DB_REP_IGNORE D I J C +DB_REP_ISPERM D I J C +DB_REP_JOIN_FAILURE D I N C +DB_REP_LEASE_EXPIRED D I N C +DB_REP_LEASE_TIMEOUT D I J C +DB_REP_LOCKOUT D I N C +DB_REP_LOGREADY * I * * +DB_REP_MASTER D I J C +DB_REP_NEWMASTER * I * * +DB_REP_NEWSITE D I J C +DB_REP_NOBUFFER D I J C +DB_REP_NOTPERM D I J C +DB_REP_PAGEDONE * I * * +DB_REP_PERMANENT D I J C +DB_REP_REREQUEST D I J C +DB_REP_UNAVAIL D I N C +DB_REVSPLITOFF D I J C +DB_RMW D I J C +DB_RUNRECOVERY D I N C +DB_SALVAGE D I J C +DB_SA_SKIPFIRSTKEY * I * * +DB_SA_UNKNOWNKEY * I * * +DB_SECONDARY_BAD D I * C +DB_SEQUENCE_OLDVER * I * * +DB_SEQUENCE_VERSION * I * * +DB_SEQ_DEC D I J C +DB_SEQ_INC D I J C +DB_SEQ_RANGE_SET * I * * +DB_SEQ_WRAP D I J C +DB_SEQ_WRAPPED * I * * +DB_SET D I J C +DB_SET_LTE D I * * +DB_SET_LOCK_TIMEOUT D I J C +DB_SET_RANGE D I J C +DB_SET_REG_TIMEOUT D I * * +DB_SET_RECNO D I J C +DB_SET_TXN_LSNP * I * * +DB_SET_TXN_NOW * I * * +DB_SET_TXN_TIMEOUT D I J C +DB_SHALLOW_DUP * I * * +DB_SNAPSHOT D I J C +DB_STAT_ALL D I * C +DB_STAT_ALLOC * I * * +DB_STAT_CLEAR D I J C +DB_STAT_LOCK_CONF D I * C +DB_STAT_LOCK_LOCKERS D I * C +DB_STAT_LOCK_OBJECTS D I * C +DB_STAT_LOCK_PARAMS D I * C +DB_STAT_MEMP_HASH D I * C +DB_STAT_MEMP_NOERROR * I * * +DB_STAT_SUBSYSTEM D I * C +DB_STAT_SUMMARY * I * * +DB_ST_DUPOK * I * * +DB_ST_DUPSET * I * * +DB_ST_DUPSORT * I * * +DB_ST_IS_RECNO * I * * +DB_ST_OVFL_LEAF * I * * +DB_ST_RECNUM * I * * +DB_ST_RELEN * I * * +DB_ST_TOPLEVEL * I * * +DB_SURPRISE_KID * I * * +DB_SWAPBYTES * I * * +DB_SYSTEM_MEM D I J C +DB_THREAD D I J C +DB_THREADID_STRLEN D I * * +DB_TIMEOUT D I J C +DB_TIME_NOTGRANTED D I J C +DB_TRUNCATE D I J C +DB_TXNVERSION * I * * +DB_TXN_ABORT D I J C +DB_TXN_APPLY D I J C +DB_TXN_BACKWARD_ROLL D I J C +DB_TXN_BULK D I J C +DB_TXN_CKP * I * * +DB_TXN_FAMILY * I * * +DB_TXN_FORWARD_ROLL D I J C +DB_TXN_LOG_VERIFY D I * * +DB_TXN_NOSYNC D I J C +DB_TXN_NOT_DURABLE D I J C +DB_TXN_NOWAIT D I J C +DB_TXN_OPENFILES * I * * +DB_TXN_POPENFILES * I * * +DB_TXN_PRINT D I J C +DB_TXN_SNAPSHOT D I J C +DB_TXN_SYNC D I J C +DB_TXN_TOKEN_SIZE * I J C +DB_TXN_WAIT D I J C +DB_TXN_WRITE_NOSYNC D I J C +DB_UNDO * I * * +DB_UNKNOWN D I J C +DB_UNREF * I * * +DB_UPDATE_SECONDARY * I * * +DB_UPGRADE D I J C +DB_USE_ENVIRON D I J C +DB_USE_ENVIRON_ROOT D I J C +DB_VERB_DEADLOCK D I J C +DB_VERB_FILEOPS D I J C +DB_VERB_FILEOPS_ALL D I J C +DB_VERB_RECOVERY D I J C +DB_VERB_REGISTER D I J C +DB_VERB_REPLICATION D I J C +DB_VERB_REPMGR_CONNFAIL D I J C +DB_VERB_REPMGR_MISC D I J C +DB_VERB_REP_ELECT D I J C +DB_VERB_REP_LEASE D I J C +DB_VERB_REP_MISC D I J C +DB_VERB_REP_MSGS D I J C +DB_VERB_REP_SYNC D I J C +DB_VERB_REP_SYSTEM D I J C +DB_VERB_REP_TEST D I J C +DB_VERB_WAITSFOR D I J C +DB_VERIFY D I J C +DB_VERIFY_BAD D I N C +DB_VERIFY_FATAL * I * * +DB_VERIFY_PARTITION D I * * +DB_VERSION_FAMILY * I N C +DB_VERSION_RELEASE * I N C +DB_VERSION_MAJOR * I J C +DB_VERSION_MINOR * I J C +DB_VERSION_MISMATCH D I N C +DB_VERSION_PATCH * I J C +DB_VERSION_STRING * I N C +DB_VERSION_FULL_STRING * I N C +DB_WRITECURSOR D I J C +DB_WRITELOCK * I * * +DB_WRITEOPEN * I * * +DB_XA_CREATE D I * * +DB_YIELDCPU D I J C diff --git a/dist/s_all b/dist/s_all new file mode 100755 index 00000000..ab254b05 --- /dev/null +++ b/dist/s_all @@ -0,0 +1,39 @@ +#!/bin/sh - +# $Id$ + +sh s_readme # distribution README file. + +sh s_config # autoconf. +sh s_apiflags # API flags. +sh s_sig # Structure signature. +sh s_recover # logging/recovery files. +sh s_message # replication and repmgr message files. +sh s_message_id # generate message id. +sh s_sql # regenerate sqlite3.c +sh s_php # PHP config files + +############################################################# +# The following order is important, s_include must run after +# the other source files have been created. +############################################################# +sh s_include # standard include files. + +sh s_java # Java support. +sh s_csharp # Csharp support. +sh s_test # Test suite support. +#sh s_tags # Tags files. + +############################################################# +# We only build the Cscope file for releases, it's too big to +# commit into the CVS tree. +############################################################# +#sh s_cscope # Cscope files. + +############################################################# +# Create the build environments last, they use files created +# by previous steps. +############################################################# +sh s_vxworks # VxWorks support. +sh s_windows # Windows support. +sh s_windows_dsp # Windows support: build environment. +sh s_android # Android support: drop-in build environment. diff --git a/dist/s_apiflags b/dist/s_apiflags new file mode 100755 index 00000000..ffe7e8a3 --- /dev/null +++ b/dist/s_apiflags @@ -0,0 +1,24 @@ +#!/bin/sh - +# $Id$ +# +# Build the automatically generated API flag #defines. + +msgc="/* DO NOT EDIT: automatically built by dist/s_apiflags. */" + +b=/tmp/api_flags_binary +t=/tmp/__db_a + +trap 'rm -f $b $t; exit 0' 0 +trap 'rm -f $b $t; exit 1' 1 2 3 13 15 + +cc api_flags.c -o $b || { + echo 's_apiflags: unable to compile api_flags.c' + exit 1 +} + +(echo "$msgc" + $b < api_flags) > $t + +f=../src/dbinc_auto/api_flags.in +cmp $f $t > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $t $f) diff --git a/dist/s_config b/dist/s_config new file mode 100755 index 00000000..a16e758a --- /dev/null +++ b/dist/s_config @@ -0,0 +1,37 @@ +#!/bin/sh - +# $Id$ +# +# Build the autoconfiguration files. + +trap 'rm -f aclocal.m4 ; exit 0' 0 1 2 3 13 15 + +. ./RELEASE + +echo "autoconf: building aclocal.m4..." +cat aclocal/*.m4 aclocal_java/*.m4 > aclocal.m4 + +echo "autoconf: running autoheader to build config.hin..." +rm -f config.hin +autoheader +chmod 644 config.hin + +echo "autoconf: running autoconf to build configure" +rm -f configure +autoconf + +# Edit version information we couldn't pre-compute. +sed -e "s/__EDIT_DB_VERSION_FAMILY__/$DB_VERSION_FAMILY/g" \ + -e "s/__EDIT_DB_VERSION_RELEASE__/$DB_VERSION_RELEASE/g" \ + -e "s/__EDIT_DB_VERSION_MAJOR__/$DB_VERSION_MAJOR/g" \ + -e "s/__EDIT_DB_VERSION_MINOR__/$DB_VERSION_MINOR/g" \ + -e "s/__EDIT_DB_VERSION_PATCH__/$DB_VERSION_PATCH/g" \ + -e "s/__EDIT_DB_VERSION_STRING__/$DB_VERSION_STRING/g" \ + -e "s/__EDIT_DB_VERSION_FULL_STRING__/$DB_VERSION_FULL_STRING/g" \ + -e "s/__EDIT_DB_VERSION_UNIQUE_NAME__/$DB_VERSION_UNIQUE_NAME/g" \ + -e "s/__EDIT_DB_VERSION__/$DB_VERSION/g" configure > configure.version +mv configure.version configure + +rm -rf autom4te.cache +chmod 755 configure + +chmod 755 config.guess config.sub install-sh diff --git a/dist/s_crypto b/dist/s_crypto new file mode 100755 index 00000000..d98ea343 --- /dev/null +++ b/dist/s_crypto @@ -0,0 +1,69 @@ +#!/bin/sh - +# $Id$ + +# Remove crypto from the DB source tree. + +d=.. + +t=/tmp/__db_a +trap 'rm -f $t ; exit 0' 0 +trap 'rm -f $t ; exit 1' 1 2 3 13 15 + +if ! test -d $d/src/crypto; then + echo "s_crypto: no crypto sources found in the source tree." + exit 1 +fi + +# Remove the crypto. +rm -rf $d/src/crypto + +# Update the release splash page. +f=$d/docs/index.html +(echo '/DOES/' && + echo 's/DOES/DOES NOT/' && + echo 'w' && + echo 'q') | ed $f + +# Win/32. +f=win_config.in +(echo '/#define.HAVE_CRYPTO/' && + echo 'c' && + echo '/* #undef HAVE_CRYPTO */' + echo '.' && + echo 'w' && + echo 'q') | ed $f + +f=srcfiles.in +r=`egrep crypto/crypto.c $f | awk 'BEGIN { FS="\t*" } { print $2 }'` +(echo 'g/^src\/crypto\//d' && + echo '/crypto_stub\.c/' && + echo "s/\$/ $r/" && + echo 'w' && + echo 'q') | ed $f + +# Change out crypto/crypto.c for common/crypto_stub.c, remove all other +# references to crypto files. +f=win_projects/projects.template.xml +(echo '/crypto\/crypto\.c/' && + echo 'c' && + echo ' ' && + echo '.' && + echo 'g/"src\/crypto\//d' && + echo "s/\$/ $r/" && + echo ',' && + echo 'w' && + echo 'q') | ed $f + + sh ./s_windows + sh ./s_windows_dsp + +# VxWorks +f=vx_config.in +(echo '/#define.HAVE_CRYPTO/' && + echo 'c' && + echo '/* #undef HAVE_CRYPTO */' + echo '.' && + echo 'w' && + echo 'q') | ed $f + + sh ./s_vxworks diff --git a/dist/s_cscope b/dist/s_cscope new file mode 100755 index 00000000..00966b5d --- /dev/null +++ b/dist/s_cscope @@ -0,0 +1,17 @@ +#!/bin/sh - +# $Id$ +# +# Build cscope files. + +( +echo ../dbinc/*.in ../dbinc/*.h ../dbinc_auto/*.h; +cat srcfiles.in | +sed -e '/^#/d' \ + -e '/^$/d' \ + -e 's/^/..\//' \ + -e '/\/csv_local.c/d' \ + -e '/\/db_server_clnt.c/d' \ + -e '/\/db_server_svc.c/d' \ + -e '/\/db_server_xdr.c/d' \ + -e '/\/gen_db_server.c/d' | +awk '{print $1}') | cscope -bkUu -i- 2>&1 | sed '/cannot find file/d' diff --git a/dist/s_include b/dist/s_include new file mode 100755 index 00000000..1d3c6aac --- /dev/null +++ b/dist/s_include @@ -0,0 +1,175 @@ +#!/bin/sh - +# $Id$ +# +# Build the automatically generated function prototype files. + +msgc="/* DO NOT EDIT: automatically built by dist/s_include. */" + +. ./RELEASE + +AWK=${AWK:-awk} +head() +{ + defonly=0 + while : + do case "$1" in + space) + echo ""; shift;; + defonly) + defonly=1; shift;; + *) + name="$1"; break;; + esac + done + + echo "$msgc" + echo "#ifndef $name" + echo "#define $name" + echo "" + if [ $defonly -eq 0 ]; then + echo "#if defined(__cplusplus)" + echo "extern \"C\" {" + echo "#endif" + echo "" + fi +} + +tail() +{ + defonly=0 + while : + do case "$1" in + defonly) + defonly=1; shift;; + *) + name="$1"; break;; + esac + done + + echo "" + if [ $defonly -eq 0 ]; then + echo "#if defined(__cplusplus)" + echo "}" + echo "#endif" + fi + echo "#endif /* !$name */" +} + +# This script is run on a variety of systems. To avoid spurious changes, fix +# some variables that affect sort order of ls(1). +unset LANG +export LANG +LC_ALL="C" +export LC_ALL + +# We are building several files: +# 1 external #define file +# 1 external prototype file +# 1 internal #define file +# N internal prototype files +e_dfile=/tmp/__db_c.$$ +e_pfile=/tmp/__db_a.$$ +i_dfile=/tmp/__db_d.$$ +i_pfile=/tmp/__db_b.$$ +trap 'rm -f $e_dfile $e_pfile $i_dfile $i_pfile; exit 0' 0 1 2 3 13 15 + +head defonly space _DB_EXT_DEF_IN_ > $e_dfile +head space _DB_EXT_PROT_IN_ > $e_pfile +head defonly _DB_INT_DEF_IN_ > $i_dfile + +# Process the standard directories, creating per-directory prototype +# files and adding to the external prototype and #define files. +for i in db btree clib common crypto dbreg env fileops hash heap \ + hmac lock log mp mutex os qam rep repmgr sequence tcl txn xa; do + head "_${i}_ext_h_" > $i_pfile + + if [ $i = os ] ; then + f=`ls ../src/$i/*.c \ + ../src/os_qnx/*.c ../src/os_vxworks/*.c ../src/os_windows/*.c` + elif [ $i = crypto ] ; then + f=`ls ../src/$i/*.c ../src/$i/*/*.c` + elif [ $i = env ] ; then + f=`ls ../src/$i/*.c ../src/repmgr/repmgr_stub.c` + elif [ $i = tcl ] ; then + f=`ls ../lang/$i/*.c` + else + f=`ls ../src/$i/*.c` + fi + $AWK -f gen_inc.awk \ + -v db_version_unique_name=$DB_VERSION_UNIQUE_NAME \ + -v e_dfile=$e_dfile \ + -v e_pfile=$e_pfile \ + -v i_dfile=$i_dfile \ + -v i_pfile=$i_pfile $f + + tail "_${i}_ext_h_" >> $i_pfile + + f=../src/dbinc_auto/${i}_ext.h + cmp $i_pfile $f > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $i_pfile $f) +done + +# Process directories which only add to the external prototype and #define +# files. +for i in dbm hsearch; do + f=`ls ../lang/$i/*.c` + $AWK -f gen_inc.awk \ + -v db_version_unique_name=$DB_VERSION_UNIQUE_NAME \ + -v e_dfile=$e_dfile \ + -v e_pfile=$e_pfile \ + -v i_dfile="" \ + -v i_pfile="" $f +done + +# There are a few global variables in DB -- add them to the external/internal +# #define files. +(echo "#define __db_global_values __db_global_values@DB_VERSION_UNIQUE_NAME@"; + echo "#define __repmgr_guesstimated_max __repmgr_guesstimated_max@DB_VERSION_UNIQUE_NAME@"; + echo "#define db_xa_switch db_xa_switch@DB_VERSION_UNIQUE_NAME@" + ) >> $i_dfile + +# Wrap up the external #defines/prototypes, and internal #defines. +tail defonly _DB_EXT_DEF_IN_ >> $e_dfile +f=../src/dbinc_auto/ext_def.in +cmp $e_dfile $f > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $e_dfile $f) + +tail _DB_EXT_PROT_IN_ >> $e_pfile +f=../src/dbinc_auto/ext_prot.in +cmp $e_pfile $f > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $e_pfile $f) + +tail defonly _DB_INT_DEF_IN_ >> $i_dfile +f=../src/dbinc_auto/int_def.in +cmp $i_dfile $f > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $i_dfile $f) + +# DB185 compatibility support. +head space defonly _DB_EXT_185_DEF_IN_ > $e_dfile +head space _DB_EXT_185_PROT_IN_ > $e_pfile + +f=`ls ../lang/db185/*.c` +$AWK -f gen_inc.awk \ + -v db_version_unique_name=$DB_VERSION_UNIQUE_NAME \ + -v e_dfile=$e_dfile \ + -v e_pfile=$e_pfile \ + -v i_dfile="" \ + -v i_pfile="" $f + +tail defonly _DB_EXT_185_DEF_IN_ >> $e_dfile +f=../src/dbinc_auto/ext_185_def.in +cmp $e_dfile $f > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $e_dfile $f) + +tail _DB_EXT_185_PROT_IN_ >> $e_pfile +f=../src/dbinc_auto/ext_185_prot.in +cmp $e_pfile $f > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $e_pfile $f) + +# Make the DTrace provider description file from events.in +rm -f db_provider.d +(echo "/*"; \ + echo " * DO NOT EDIT: automatically built by dist/s_include."; \ + echo " * Oracle Berkeley DB DTrace Provider"; \ + echo " */"; \ +perl gen_provider.pl events.in) > db_provider.d diff --git a/dist/s_message b/dist/s_message new file mode 100755 index 00000000..5ff273c6 --- /dev/null +++ b/dist/s_message @@ -0,0 +1,31 @@ +#!/bin/sh - +# $Id$ +# +# Build the automatically generated rep & repmgr message files. + +header=/tmp/__db_a +source=/tmp/__db_b + +trap 'rm -f /tmp/__db_[ab]; exit 1' 1 2 3 13 15 +trap 'rm -f /tmp/__db_[ab]; exit 0' 0 + +DIR="rep repmgr" + +# Build DB's message marshaling/unmarshaling routines. +for i in $DIR; do + for f in ../src/$i/*.msg; do + subsystem=`basename $f .msg` + awk -f gen_msg.awk \ + -v header_file=$header \ + -v source_file=$source < $f + + f=../src/dbinc_auto/${subsystem}_automsg.h + cmp $header $f > /dev/null 2>&1 || + (echo "Building $f" && + rm -f $f && cp $header $f) + f=../src/$i/${subsystem}_automsg.c + cmp $source $f > /dev/null 2>&1 || + (echo "Building $f" && + rm -f $f && cp $source $f) + done +done diff --git a/dist/s_message_id b/dist/s_message_id new file mode 100755 index 00000000..4159cc60 --- /dev/null +++ b/dist/s_message_id @@ -0,0 +1,187 @@ +#!/bin/sh - +# $Id$ +# +# Automatically generate message id. + +# Usage: A BDB#### would be replaced with an unused message id in: +# 1) DB_STR(BDB#### +# 2) DB_STR_A(BDB#### +# NOTE: Do not add whitespace between DB_STR( or DB_STR_A( and the message ID. + +# NOTE: Please update the MSG_DIRS if there is any new source dir. +MSG_DIRS="../src/ ../util/ ../lang/dbm/ ../dist/gen_msg.awk" + +# Environment Configuration +GREP_CMDS="grep ggrep" +for i in $GREP_CMDS; do + if [ "`which $i`" = "" ]; then + continue + fi + echo "s_message_id test" > s_message_id.tmp + $i "s_message_id test" s_message_id.tmp -o --include=*.tmp | \ + $i -v "s_message_id.txt" > s_message_id.tmp.out 2>&1 + if [ `$i "unrecognized option" s_message_id.tmp.out | wc -l` \ + -eq 0 ] && [ `$i "invalid option" s_message_id.tmp.out | \ + wc -l` -eq 0 ] && [ `$i "can't open" s_message_id.tmp.out | \ + wc -l` -eq 0 ]; then + GREP_CMD=$i + break + fi + rm s_message_id.tmp s_message_id.tmp.out +done +if [ "$GREP_CMD" = "" ]; then + echo "UNSUPPORTED COMMAND: (g)grep -o --include" + echo "Please try other platform" + exit +fi + +pref=MSG_INDX + +get_value() { + value=${1}${2} + eval "echo $`echo $value`" +} + +get_max() { + if [ ${1} -gt ${2} ] ; then + eval "echo ${1}" + else + eval "echo ${2}" + fi +} + +# Iterate source files and replace "BDB####" with the real message id. +for i in $MSG_DIRS; do + for f in `$GREP_CMD BDB#### -r $i -wl --include=*.c --include=*.h \ + --include=*.in --include=*.awk | \ + $GREP_CMD -v "../util/db_dump185.c" | \ + $GREP_CMD -v "../util/db_sql_codegen"`; do + IFS='/' + set $f + # There are 11 categories in the MSG_DIRS: + # 1) COMMON; 2) DB; 3) AM; 4) ENV; 5) LOCK; 6) LOG; + # 7) MPOOL; 8) REP; 9) SEQUENCE; 10) TXN; 11) UTIL. + # + # NOTE: Please add a new block (see below) and assign the values + # of cat_indx and cat_dirs if there is new group, or update the + # existing block if there is new source tree under the existing + # group. + if [ "$i" = "../util/" ] || [ "$i" = "../lang/dbm/" ]; then + cat_indx=10 + cat_dirs="../util/ ../lang/dbm/" + elif [ "$3" = "common" ] || [ "$3" = "crypto" ] || \ + [ "$3" = "fileops" ] || [ "$3" = "hmac" ] || \ + [ "$3" = "os" ] || [ "$3" = "os_qnx" ] || \ + [ "$3" = "os_vxworks" ] || [ "$3" = "os_windows" ]; then + cat_indx=0 + cat_dirs=$i"common "$i"crypto "$i"fileops " + cat_dirs=$cat_dirs$i"hmac "$i"os "$i"os_qnx " + cat_dirs=$cat_dirs$i"os_vxworks "$i"os_windows" + elif [ "$3" = "db" ] || [ "$3" = "dbinc" ] || \ + [ "$3" = "dbinc_auto" ]; then + cat_indx=1 + cat_dirs=$i"db "$i"dbinc "$i"dbinc_auto" + elif [ "$3" = "btree" ] || [ "$3" = "hash" ] || \ + [ "$3" = "heap" ] || [ "$3" = "qam" ]; then + cat_indx=2 + cat_dirs=$i"btree "$i"hash "$i"heap "$i"qam" + elif [ "$3" = "dbreg" ] || [ "$3" = "env" ] ; then + cat_indx=3 + cat_dirs=$i"dbreg "$i"env" + elif [ "$3" = "lock" ] || [ "$3" = "mutex" ]; then + cat_indx=4 + cat_dirs=$i"lock "$i"mutex" + elif [ "$3" = "log" ]; then + cat_indx=5 + cat_dirs=$i"log" + elif [ "$3" = "mp" ]; then + cat_indx=6 + cat_dirs=$i"mp" + elif [ "$3" = "rep" ] || [ "$3" = "repmgr" ] || \ + [ "$3" = "gen_msg.awk" ] ; then + cat_indx=7 + cat_dirs="../src/rep ../src/repmgr ../dist/gen_msg.awk" + elif [ "$3" = "sequence" ]; then + cat_indx=8 + cat_dirs=$i"sequence" + elif [ "$3" = "txn" ] || [ "$3" = "xa" ]; then + cat_indx=9 + cat_dirs=$i"txn "$i"xa" + else + echo "ERROR UNKNOWN PATH:" "$i""$3" + exit + fi + + unset IFS + + # Initialize MSG_INDX for each group if it is never initialized. + if [ "`get_value $pref $cat_indx`" = "" ]; then + # Get the start index, that is the next available number + # for the current group. If there is no existing message + # marked by DB_STR or DB_STR_A, the start index is the + # first available integer in its range. Unless, it is + # the next available integer in its range. + MSG_START_NUM=`expr $cat_indx \* 500 + 1` + DB_STR_NUM=`$GREP_CMD -E "DB_STR\(\"[0-9]{4}\"" -r \ + $cat_dirs | wc -l` + if [ $DB_STR_NUM -eq 0 ]; then + DB_STR_NUM=`expr 0 + $MSG_START_NUM` + else + DB_STR_NUM=`$GREP_CMD -E "DB_STR\(\"[0-9]{4}\"" -oh \ + -r $cat_dirs | sort | tail -n 1 | \ + sed -e "s/DB_STR(//g" | sed -e "s/\"//g"` + DB_STR_NUM=`expr 1 + $DB_STR_NUM` + fi + + DB_STR_A_NUM=`$GREP_CMD -E "DB_STR_A\(\"[0-9]{4}\"" -r \ + $cat_dirs | wc -l` + if [ $DB_STR_A_NUM -eq 0 ]; then + DB_STR_A_NUM=`expr 0 + $MSG_START_NUM` + else + DB_STR_A_NUM=`$GREP_CMD -E \ + "DB_STR_A\(\"[0-9]{4}\"" -oh -r $cat_dirs | \ + sort | tail -n 1 | \ + sed -e "s/DB_STR_A(//g" | sed -e "s/\"//g"` + DB_STR_A_NUM=`expr 1 + $DB_STR_A_NUM` + fi + + MSG_START_INDX=`get_max $DB_STR_NUM $DB_STR_A_NUM` + eval "$pref$cat_indx=$MSG_START_INDX" + fi + + cat $f | awk '{ + if(num"tmp.tmp" + }END{ + printf("%s", num)>"tmp.num"} + function fill(i) { + s="" + j=4-length(i) + while(j) { + s=0""s + j-- + } + return "\""s""i"\"" + }' num=`get_value $pref $cat_indx` \ + max=`expr \( $cat_indx + 1 \) \* 500` + cp tmp.tmp $f + eval "$pref$cat_indx=`sed 'q' tmp.num`" + if [ "$cat_indx" -lt 11 ]; then + if [ `get_value $pref $cat_indx` -ge \ + `expr \( $cat_indx + 1 \) \* 500` ]; then + echo "RANGE FULL" + fi + else + echo "ERROR CATEGORY NUMBER: " $cat_indx + fi + rm tmp.tmp tmp.num + done +done + +# Check if there is any remaining "BDB####". +# NOTE: If "BDB####" is not .c, .h, .in files, they won't be updated with the +# real message id. +if [ `$GREP_CMD "BDB####" -r $MSG_DIRS | wc -l` -gt 0 ]; then + echo "WARNING: There is remaining BDB####. Please check:" + $GREP_CMD "BDB####" -r $MSG_DIRS +fi diff --git a/dist/s_perm b/dist/s_perm new file mode 100755 index 00000000..e52360fe --- /dev/null +++ b/dist/s_perm @@ -0,0 +1,49 @@ +#!/bin/sh - +# $Id$ +# +# This script is designed to be run while packaging up releases, not during +# other s_all processing. + +d=.. +echo 'Updating Berkeley DB source tree permissions for a release package' + +run() +{ + if [ -f "$d/$1" ]; then + chmod "$2" "$d/$1" + else + echo "$d/$1: no such file or directory" + exit 1 + fi +} + +# Ensure all files in the package have consistent permissions. +find $d -type d | xargs chmod 775 +find $d -type f | xargs chmod 644 + +# Assign execute permissions where necessary. +# ODBC and JDBC are only present in release packages. +if [ -d $d/lang/sql/odbc ]; then + chmod 755 $d/lang/sql/odbc/conf* +fi +if [ -d $d/lang/sql/jdbc ]; then + chmod 755 $d/lang/sql/jdbc/conf* +fi +chmod 755 $d/dist/s_* + +run dist/config.guess 755 +run dist/config.sub 755 +run dist/configure 755 +run dist/install-sh 755 +run dist/vx_buildcd 755 +run lang/perl/BerkeleyDB/dbinfo 755 +run lang/perl/BerkeleyDB/mkpod 755 +run lang/sql/sqlite/configure 755 + +for i in `cd $d && find build_vxworks \ + -name '*.wsp' -o -name '*.cdf' -o -name '*.wpj'`; do + chmod 775 $d/$i +done + +chmod 555 $d/util/dtrace/*.d +chmod 555 $d/util/systemtap/*.stp diff --git a/dist/s_php b/dist/s_php new file mode 100755 index 00000000..80375b13 --- /dev/null +++ b/dist/s_php @@ -0,0 +1,23 @@ +#!/bin/sh - +# $Id$ +# +# Build Windows include files. + +. ./RELEASE + +process() +{ + src=$1.in + tdest=$1.out + dest=$1 + + sed -e "s/@DB_VERSION_MAJOR@/$DB_VERSION_MAJOR/g" \ + -e "s/@DB_VERSION_MINOR@/$DB_VERSION_MINOR/g" < $src > $tdest + + cmp $tdest $dest > /dev/null 2>&1 || + (echo "Building $dest" && rm -f $dest && cp $tdest $dest) + rm -f $tdest +} + +process ../lang/php_db4/config.m4 +process ../lang/php_db4/config.w32 diff --git a/dist/s_readme b/dist/s_readme new file mode 100755 index 00000000..38d32998 --- /dev/null +++ b/dist/s_readme @@ -0,0 +1,25 @@ +#!/bin/sh - +# $Id$ +# +# Build the README. + +echo 'Updating Berkeley DB README file...' + +d=.. + +t=/tmp/__t +trap 'rm -f $t; exit 0' 0 1 2 3 13 15 + +. ./RELEASE + +cat << END_OF_README>$t +$DB_VERSION_FULL_STRING + +This is Berkeley DB $DB_VERSION_FAMILY$DB_VERSION_LETTER Release $DB_VERSION_RELEASE from Oracle. To view release and +installation documentation, load the distribution file docs/index.html +into your web browser. +END_OF_README + +f=../README +cmp $t $f > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $t $f) diff --git a/dist/s_recover b/dist/s_recover new file mode 100755 index 00000000..b0ba3431 --- /dev/null +++ b/dist/s_recover @@ -0,0 +1,71 @@ +#!/bin/sh - +# $Id$ +# +# Build the automatically generated logging/recovery files. + +header=/tmp/__db_a +loglist=/tmp/__db_b +print=/tmp/__db_c +source=/tmp/__db_d +template=/tmp/__db_e +tmp=/tmp/__db_f + +trap 'rm -f /tmp/__db_[abcdef]; exit 1' 1 2 3 13 15 +trap 'rm -f /tmp/__db_[abcdef]; exit 0' 0 + +DIR="db dbreg btree fileops hash heap qam repmgr txn" + +# Check to make sure we haven't duplicated a log record entry, and build +# the list of log record types that the test suite uses. +for i in $DIR; do + for f in ../src/$i/*.src; do + # Grab the PREFIX; there should only be one per file, and + # so it's okay to just take the first. + grep '^PREFIX' $f | sed q + egrep '^BEGIN[ ]|^IGNORED[ ]|^DEPRECATED[ ]' $f | + awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' + done +done > $loglist +grep -v '^PREFIX' $loglist | + awk '{print $2 "\t" $3 "\t" $4}' | sort -n -k 3 | uniq -d -f 1 > $tmp +[ -s $tmp ] && { + echo "DUPLICATE LOG VALUES:" + cat $tmp + rm -f $tmp + exit 1 +} +f=../test/logtrack.list +cmp $loglist $f > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $loglist $f) + +# Build DB's recovery routines. +for i in $DIR; do + for f in ../src/$i/*.src; do + subsystem=`basename $f .src` + awk -f gen_rec.awk \ + -v header_file=$header \ + -v print_file=$print\ + -v source_file=$source \ + -v template_file=$template < $f + + f=../src/dbinc_auto/${subsystem}_auto.h + cmp $header $f > /dev/null 2>&1 || + (echo "Building $f" && + rm -f $f && cp $header $f) + f=../src/$i/${subsystem}_auto.c + cmp $source $f > /dev/null 2>&1 || + (echo "Building $f" && + rm -f $f && cp $source $f) + f=../src/$i/${subsystem}_autop.c + cmp $print $f > /dev/null 2>&1 || + (echo "Building $f" && + rm -f $f && cp $print $f) + f=template/rec_${subsystem} + cmp $template $f > /dev/null 2>&1 || + (echo "Building $f" && + rm -f $f && cp $template $f) + done +done + +# Build the example application's recovery routines. +(cd ../examples/c/ex_apprec && sh auto_rebuild) diff --git a/dist/s_sig b/dist/s_sig new file mode 100755 index 00000000..fdbbf9b6 --- /dev/null +++ b/dist/s_sig @@ -0,0 +1,119 @@ +#!/bin/sh - +# $Id$ +# +# Build structure signature code. + +a=/tmp/__db_a.$$ +b=/tmp/__db_b.$$ +c=/tmp/__db_c.$$ +trap 'rm -f $a $b $c; exit 0' 0 1 2 3 13 15 + +cat ../src/dbinc/db.in ../src/dbinc/db_int.in ../src/dbinc/*.h | +sed -e '/.*struct.*mutex.*{[ ]*\/\* SHARED \*\/.*/i\ +#ifdef HAVE_MUTEX_SUPPORT' \ + -e '/.*struct.*mutex.*{[ ]*\/\* SHARED \*\/.*/a\ +#endif' \ + -e 's/.*[ ]*\(__[a-z_]*\)[ ]*{[ ]*\/\* SHARED \*\/.*/ __ADD(\1);/p' \ + -e d > $a + +cat ../src/dbinc/db.in ../src/dbinc/db_int.in ../src/dbinc/*.h | +sed -e '/__addrinfo/d' \ + -e '/__aes_cipher/d' \ + -e '/__cipher/d' \ + -e '/__channel/d' \ + -e '/__queued_output/d' \ + -e '/__repmgr_connection/d' \ + -e '/__repmgr_message/d' \ + -e '/__repmgr_response/d' \ + -e '/__repmgr_retry/d' \ + -e '/__repmgr_runnable/d' \ + -e '/__repmgr_site/d' \ + -e '/.*[ ]*\(__[a-z_]*\)[ ]*{[ ]*\/\* SHARED \*\/.*/d' \ + -e '/struct.*mutex.*{/i\ +#ifdef HAVE_MUTEX_SUPPORT' \ + -e '/struct.*mutex.*{/a\ +#endif' \ + -e 's/.*[ ]*struct[ ]*\(__[a-z_]*\)[ ]*{.*/ __ADD(\1);/p' \ + -e d > $c + +cnt1=`sed -e '$=' -e d $a` +cnt2=`sed -e '$=' -e d $c` + +cat << END_OF_TEXT > $b +/*- + * DO NOT EDIT: automatically built by dist/s_sig. + * + * \$Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/crypto.h" +#include "dbinc/db_join.h" +#include "dbinc/db_verify.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/lock.h" +#include "dbinc/log_verify.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +END_OF_TEXT +cat << END_OF_TEXT >> $b +/* + * For a pure 32bit/64bit environment, we check all structures and calculate a + * signature. For compatible environment, we only check the structures in + * shared memory. + */ +END_OF_TEXT +echo "#ifdef HAVE_MIXED_SIZE_ADDRESSING" >> $b +echo "#define __STRUCTURE_COUNT $cnt1" >> $b +echo "#else" >> $b +echo "#define __STRUCTURE_COUNT ($cnt1 + $cnt2)" >> $b +echo "#endif" >> $b + +cat << END_OF_TEXT >> $b + +/* + * __env_struct_sig -- + * Compute signature of structures. + * + * PUBLIC: u_int32_t __env_struct_sig __P((void)); + */ +u_int32_t +__env_struct_sig() +{ + u_short t[__STRUCTURE_COUNT + 5]; + u_int i; + + i = 0; +#define __ADD(s) (t[i++] = sizeof(struct s)) + +END_OF_TEXT + +cat $a >> $b + +cat << END_OF_TEXT >> $b + +#ifndef HAVE_MIXED_SIZE_ADDRESSING +END_OF_TEXT + +cat $c >> $b + +echo "#endif" >> $b + +cat << END_OF_TEXT >> $b + + return (__ham_func5(NULL, t, i * sizeof(t[0]))); +} +END_OF_TEXT + +f=../src/env/env_sig.c +cmp $b $f > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $b $f) diff --git a/dist/s_symlink b/dist/s_symlink new file mode 100755 index 00000000..eae12232 --- /dev/null +++ b/dist/s_symlink @@ -0,0 +1,47 @@ +#!/bin/sh - +# $Id$ + +echo 'Creating Berkeley DB source tree symbolic links...' + +build() +{ + #echo " $1 -> $2" + (cd ../`dirname $1` && rm -f `basename $1` && ln -s $2 `basename $1`) +} + +build src/btree/tags ../../dist/tagdir/tags +build build_unix/tags ../dist/tagdir/tags +build src/clib/tags ../../dist/tagdir/tags +build src/common/tags ../../dist/tagdir/tags +build src/crypto/tags ../../dist/tagdir/tags +build lang/cxx/tags ../../dist/tagdir/tags +build src/db/tags ../../dist/tagdir/tags +build lang/db185/tags ../../dist/tagdir/tags +build util/tags ../dist/tagdir/tags +build src/dbinc/tags ../../dist/tagdir/tags +build src/dbinc_auto/tags ../../dist/tagdir/tags +build lang/dbm/tags ../../dist/tagdir/tags +build src/dbreg/tags ../../dist/tagdir/tags +build src/env/tags ../../dist/tagdir/tags +build examples/c/tags ../../dist/tagdir/tags +build examples/cxx/tags ../../dist/tagdir/tags +build src/fileops/tags ../../dist/tagdir/tags +build src/hash/tags ../../dist/tagdir/tags +build src/hmac/tags ../../dist/tagdir/tags +build lang/hsearch/tags ../../dist/tagdir/tags +build lang/java/libdb_java/tags ../../../dist/tagdir/tags +build src/lock/tags ../../dist/tagdir/tags +build src/log/tags ../../dist/tagdir/tags +build src/mp/tags ../../dist/tagdir/tags +build src/mutex/tags ../../dist/tagdir/tags +build src/os/tags ../../dist/tagdir/tags +build src/os_qnx/tags ../../dist/tagdir/tags +build src/os_vxworks/tags ../../dist/tagdir/tags +build src/os_windows/tags ../../dist/tagdir/tags +build src/qam/tags ../../dist/tagdir/tags +build src/rep/tags ../../dist/tagdir/tags +build src/repmgr/tags ../../dist/tagdir/tags +build src/sequence/tags ../../dist/tagdir/tags +build lang/tcl/tags ../../dist/tagdir/tags +build src/txn/tags ../../dist/tagdir/tags +build src/xa/tags ../../dist/tagdir/tags diff --git a/dist/s_tags b/dist/s_tags new file mode 100755 index 00000000..7e72be7f --- /dev/null +++ b/dist/s_tags @@ -0,0 +1,64 @@ +#!/bin/sh - +# $Id$ +# +# Build tags files. + +cd tagdir +files=`echo ../../src/dbinc/*.h \ + ../../src/dbinc/*.in \ + ../../src/btree/*.[ch] \ + ../../src/clib/*.[ch] \ + ../../src/common/*.[ch] \ + ../../src/crypto/*.[ch] \ + ../../src/crypto/mersenne/*.[ch] \ + ../../src/crypto/rijndael/*.[ch] \ + ../../src/db/*.[ch] \ + ../../lang/db185/*.[ch] \ + ../../lang/dbm/*.[ch] \ + ../../src/dbreg/*.[ch] \ + ../../src/env/*.[ch] \ + ../../src/fileops/*.[ch] \ + ../../src/hash/*.[ch] \ + ../../src/hmac/*.[ch] \ + ../../src/hsearch/*.[ch] \ + ../../src/lock/*.[ch] \ + ../../src/log/*.[ch] \ + ../../src/mp/*.[ch] \ + ../../src/mutex/*.[ch] \ + ../../src/os/*.[ch] \ + ../../src/os_qnx/*.[ch] \ + ../../src/qam/*.[ch] \ + ../../src/rep/*.[ch] \ + ../../src/repmgr/*.[ch] \ + ../../src/sequence/*.[ch] \ + ../../lang/tcl/*.[ch] \ + ../../src/txn/*.[ch] \ + ../../src/xa/*.[ch] \ + ../../lang/cxx/*.cpp \ + ../../lang/java/libdb_java/*.[ch] | sed 's/[^ ]*stub.c//g'` + +f=tags +echo "Building $f" +rm -f $f + +# Figure out what flags this ctags accepts. +flags="" +if ctags -d ../../src/db/db.c 2>/dev/null; then + flags="-d $flags" +fi +if ctags -t ../../src/db/db.c 2>/dev/null; then + flags="-t $flags" +fi +if ctags -w ../../src/db/db.c 2>/dev/null; then + flags="-w $flags" +fi + +ctags $flags $files 2>/dev/null + +for i in test/perf testrrepmgr testrserver; do + test -d ../../$i || continue + + f=../../$i/tags + echo "Building $f" + (cd ../../$i && ctags $flags *.[ch] 2>/dev/null) +done diff --git a/dist/s_test b/dist/s_test new file mode 100755 index 00000000..d0abc1b6 --- /dev/null +++ b/dist/s_test @@ -0,0 +1,109 @@ +#!/bin/sh - +# $Id$ +# +# Build the Tcl test files. + +msg1="# Automatically built by dist/s_test; may require local editing." +msg2="# Automatically built by dist/s_test; may require local editing." + +t=/tmp/__t +trap 'rm -f $t; exit 0' 0 1 2 3 13 15 + +. ./RELEASE + +(echo "$msg1" && \ + echo "" && \ + echo "set tclsh_path @TCL_TCLSH@" && \ + echo "set tcllib .libs/libdb_tcl-@DB_VERSION_MAJOR@.@DB_VERSION_MINOR@@LIBTSO_MODSUFFIX@" && \ + echo "" && \ + echo "set src_root @srcdir@/.." && \ + echo "set test_path @srcdir@/../test/tcl" && \ + echo "set je_root @srcdir@/../../je" && \ + echo "" && \ + echo "global testdir" && \ + echo "set testdir ./TESTDIR" && \ + echo "" && \ + echo "global dict" && \ + echo "global util_path" && \ + echo "" && \ + echo "global is_freebsd_test" && \ + echo "global is_hp_test" && \ + echo "global is_linux_test" && \ + echo "global is_osx_test" && \ + echo "global is_qnx_test" && \ + echo "global is_sunos_test" && \ + echo "global is_windows_test" && \ + echo "global is_windows9x_test" && \ + echo "" && \ + echo "global valid_methods" && \ + echo "global checking_valid_methods" && \ + echo "global test_recopts" && \ + echo "" && \ + echo "set KILL \"@KILL@\"") > $t + +f=../test/tcl/include.tcl +cmp $t $f > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $t $f) + +(echo "$msg1" && \ + echo "" && \ + echo "set tclsh_path SET_YOUR_TCLSH_PATH" && \ + echo "set buildpath Win32/Debug" && \ + echo "set tcllib libdb_tcl${DB_VERSION_MAJOR}${DB_VERSION_MINOR}d.dll" && \ + echo "" && \ + echo "set src_root .." && \ + echo "set test_path ../test/tcl" && \ + echo "set je_root ../../je" && \ + echo "" && \ + echo "global testdir" && \ + echo "set testdir ./TESTDIR" && \ + echo "" && \ + echo "global dict" && \ + echo "global util_path" && \ + echo "" && \ + echo "global is_freebsd_test" && \ + echo "global is_hp_test" && \ + echo "global is_linux_test" && \ + echo "global is_osx_test" && \ + echo "global is_qnx_test" && \ + echo "global is_sunos_test" && \ + echo "global is_windows_test" && \ + echo "global is_windows9x_test" && \ + echo "" && \ + echo "global valid_methods" && \ + echo "global checking_valid_methods" && \ + echo "global test_recopts" && \ + echo "" && \ + echo "set KILL dbkill.exe") > $t + +f=../build_windows/include.tcl +cmp $t $f > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $t $f) + +# Build the test directory TESTS file. +(echo $msg2; +cat `egrep -l '^#[ ][ ]*TEST' ../test/tcl/*.tcl` | +sed -e '/^#[ ][ ]*TEST/!{' \ + -e 's/.*//' \ + -e '}' | +cat -s | +sed -e '/TEST/{' \ + -e 's/^#[ ][ ]*TEST[ ]*//' \ + -e 's/^ //' \ + -e 'H' \ + -e 'd' \ + -e '}' \ + -e 's/.*//' \ + -e x \ + -e 's/\n/__LINEBREAK__/g' | +LANG=C sort | +sed -e 's/__LINEBREAK__/\ +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\ +/' \ + -e 's/__LINEBREAK__/\ + /g' | +sed -e 's/^[ ][ ]*$//') > $t + +f=../test/tcl/TESTS +cmp $t $f > /dev/null 2>&1 || + (echo "Building $f" && rm -f $f && cp $t $f) diff --git a/dist/s_validate b/dist/s_validate new file mode 100755 index 00000000..32e8fe52 --- /dev/null +++ b/dist/s_validate @@ -0,0 +1,79 @@ +#!/bin/sh - +# $Id$ +# +# This script runs the various validation tests in the validate directory. + +# Run everything, even those known to be invalid or useless. +all_tests=1 +# Run all tests, even those that require odd env setup or a long time. +full=0 +ignore_failures=0 +nocleanup=0 +verbose=1 +while [ $# -gt 0 ] +do + case "$1" in + -a*) # all + all_tests=1; full=1; shift;; + -c*) # continue + ignore_failures=1; shift;; + -f*) # full + full=1; shift;; + -nocleanup) + nocleanup=1; shift;; + -q*) + verbose=0; shift;; + *) + echo "Unrecognized option: $1, ignoring" + shift;; + esac +done + +# The set of full tests are those that have special env setup requirements +# or take a long time to run. They should be run at release time. +FULL_TESTS="s_chk_build_configs s_chk_vxworks" +EXCLUDE_TESTS="s_chk_logverify s_chk_srcfiles s_chk_java_samples" + +# Run all s_chk scripts, files with extensions are used by the script with +# the shorter name, they shouldn't be run directly. +for t in `(cd validate && ls s_chk_* | grep -v "\.")` +do + excluded=0 + for skip in $FULL_TESTS; do + if [ $full = 0 -a "$t" = "$skip" ]; then + echo "===!! Skipping $t ===" + echo "=== Add -full to the command line to enable ===" + excluded=1 + break; + fi + done + for skip in $EXCLUDE_TESTS; do + if [ $all_tests != 0 -a "$t" = "$skip" ]; then + echo "===!! Skipping $t ===" + echo "=== Add -all to the command line to enable ===" + excluded=1 + break; + fi + done + if [ $excluded != 0 ]; then + continue + fi + + echo "=== Running $t ===" + if [ "$verbose" = 1 ]; then + (cd validate && sh $t) + else + (cd validate && sh $t > /dev/null) + fi + ret_val=$? + if [ "$ret_val" != 0 ]; then + echo "=== Test $t reported a failure $ret_val." >&2 + if [ $ignore_failures = 0 ]; then + exit $ret_val + fi + else + echo "=== Test $t passed, $ret_val" + fi + rm -f validate/__? +done +echo "Finished running validate tests." diff --git a/dist/srcfiles.in b/dist/srcfiles.in new file mode 100644 index 00000000..b9d8a37a --- /dev/null +++ b/dist/srcfiles.in @@ -0,0 +1,339 @@ +# $Id$ +# +# This is an input file for the s_vxworks script. It lists the source files in +# the Berkeley DB tree and notes which are used to build the Android and +# VxWorks libraries. +# +# If you are adding a new file, putting the filename here in srcfiles.in and +# indicate which modules require the file. Also make sure to add the file to +# dist/win_projects/projects.template.xml for Windows, and +# dist/win_projects/projects_wince.template.xml for Windows CE. +# +# Please keep this list sorted alphabetically! +# +# Each non-blank, non-comment line is of the form +# filename module [ module ...] + +src/btree/bt_compact.c android vx vxsmall +src/btree/bt_compare.c android vx vxsmall +src/btree/bt_compress.c android vx vxsmall +src/btree/bt_conv.c android vx vxsmall +src/btree/bt_curadj.c android vx vxsmall +src/btree/bt_cursor.c android vx vxsmall +src/btree/bt_delete.c android vx vxsmall +src/btree/bt_method.c android vx vxsmall +src/btree/bt_open.c android vx vxsmall +src/btree/bt_put.c android vx vxsmall +src/btree/bt_rec.c android vx vxsmall +src/btree/bt_reclaim.c android vx vxsmall +src/btree/bt_recno.c android vx vxsmall +src/btree/bt_rsearch.c android vx vxsmall +src/btree/bt_search.c android vx vxsmall +src/btree/bt_split.c android vx vxsmall +src/btree/bt_stat.c android vx vxsmall +src/btree/bt_upgrade.c android vx vxsmall +src/btree/bt_verify.c vx +src/btree/btree_auto.c android vx vxsmall +src/btree/btree_autop.c vx6 +build_vxworks/util/db_archive.c vx6 +build_vxworks/util/db_checkpoint.c vx6 +build_vxworks/util/db_deadlock.c vx6 +build_vxworks/util/db_dump.c vx6 +build_vxworks/util/db_hotbackup.c vx6 +build_vxworks/util/db_load.c vx6 +build_vxworks/util/db_log_verify.c vx6 +build_vxworks/util/db_printlog.c vx6 +build_vxworks/util/db_recover.c vx6 +build_vxworks/util/db_stat.c vx6 +build_vxworks/util/db_tuner.c vx6 +build_vxworks/util/db_upgrade.c vx6 +build_vxworks/util/db_verify.c vx6 +build_vxworks/dbdemo/dbdemo.c vx6 +build_vxworks/test/micro/b_curalloc.c vx6 +build_vxworks/test/micro/b_curwalk.c vx6 +build_vxworks/test/micro/b_del.c vx6 +build_vxworks/test/micro/b_get.c vx6 +build_vxworks/test/micro/b_inmem.c vx6 +build_vxworks/test/micro/b_latch.c vx6 +build_vxworks/test/micro/b_load.c vx6 +build_vxworks/test/micro/b_open.c vx6 +build_vxworks/test/micro/b_put.c vx6 +build_vxworks/test/micro/b_recover.c vx6 +build_vxworks/test/micro/b_txn.c vx6 +build_vxworks/test/micro/b_txn_write.c vx6 +build_vxworks/test/micro/b_uname.c vx6 +build_vxworks/test/micro/b_util.c vx6 +build_vxworks/test/micro/b_workload.c vx6 +build_vxworks/test/micro/test_micro.c vx6 +src/clib/getopt.c vx vxsmall +src/clib/rand.c android +src/clib/snprintf.c android vx vxsmall +src/clib/strcasecmp.c vx vxsmall +src/clib/strdup.c vx vxsmall +src/clib/strsep.c vx vxsmall +src/common/clock.c android vx vxsmall +src/common/crypto_stub.c android vxsmall +src/common/db_byteorder.c android vx vxsmall +src/common/db_compint.c android vx vxsmall +src/common/db_err.c android vx vxsmall +src/common/db_getlong.c android vx vxsmall +src/common/db_idspace.c android vx vxsmall +src/common/db_log2.c android vx vxsmall +src/common/db_shash.c android vx vxsmall +src/common/dbt.c android vx vxsmall +src/common/mkpath.c android vx vxsmall +src/common/os_method.c android vx vxsmall +src/common/util_arg.c vx vxsmall +src/common/util_cache.c vx vxsmall +src/common/util_log.c vx vxsmall +src/common/util_sig.c vx vxsmall +src/common/zerofill.c android vx vxsmall +src/crypto/aes_method.c vx +src/crypto/crypto.c vx +src/crypto/mersenne/mt19937db.c vx +src/crypto/rijndael/rijndael-alg-fst.c vx +src/crypto/rijndael/rijndael-api-fst.c vx +lang/cxx/cxx_channel.cpp vx6 +lang/cxx/cxx_db.cpp vx6 +lang/cxx/cxx_dbc.cpp vx6 +lang/cxx/cxx_dbt.cpp vx6 +lang/cxx/cxx_env.cpp vx6 +lang/cxx/cxx_except.cpp vx6 +lang/cxx/cxx_lock.cpp vx6 +lang/cxx/cxx_logc.cpp vx6 +lang/cxx/cxx_mpool.cpp vx6 +lang/cxx/cxx_multi.cpp vx6 +lang/cxx/cxx_seq.cpp vx6 +lang/cxx/cxx_site.cpp vx6 +lang/cxx/cxx_txn.cpp vx6 +src/db/crdel_auto.c android vx vxsmall +src/db/crdel_autop.c vx6 +src/db/crdel_rec.c android vx vxsmall +src/db/db.c android vx vxsmall +src/db/db_am.c android vx vxsmall +src/db/db_auto.c android vx vxsmall +src/db/db_autop.c vx6 +src/db/db_cam.c android vx vxsmall +src/db/db_cds.c android vx vxsmall +src/db/db_compact.c android vx vxsmall +src/db/db_conv.c android vx vxsmall +src/db/db_copy.c android vx vxsmall +src/db/db_dispatch.c android vx vxsmall +src/db/db_dup.c android vx vxsmall +src/db/db_iface.c android vx vxsmall +src/db/db_join.c android vx vxsmall +src/db/db_meta.c android vx vxsmall +src/db/db_method.c android vx vxsmall +src/db/db_open.c android vx vxsmall +src/db/db_overflow.c android vx vxsmall +src/db/db_ovfl_vrfy.c vx +src/db/db_pr.c android vx vxsmall +src/db/db_rec.c android vx vxsmall +src/db/db_reclaim.c android vx vxsmall +src/db/db_remove.c android vx vxsmall +src/db/db_rename.c android vx vxsmall +src/db/db_ret.c android vx vxsmall +src/db/db_setid.c android vx vxsmall +src/db/db_setlsn.c android vx vxsmall +src/db/db_sort_multiple.c android vx vxsmall +src/db/db_stati.c android vx vxsmall +src/db/db_truncate.c android vx vxsmall +src/db/db_upg.c android vx vxsmall +src/db/db_upg_opd.c android vx vxsmall +src/db/db_vrfy.c vx +src/db/db_vrfy_stub.c android vxsmall +src/db/db_vrfyutil.c vx +src/db/partition.c android vx +lang/db185/db185.c +src/dbreg/dbreg.c android vx vxsmall +src/dbreg/dbreg_auto.c android vx vxsmall +src/dbreg/dbreg_autop.c vx6 +src/dbreg/dbreg_rec.c android vx vxsmall +src/dbreg/dbreg_stat.c android vx vxsmall +src/dbreg/dbreg_util.c android vx vxsmall +src/env/env_alloc.c android vx vxsmall +src/env/env_config.c android vx vxsmall +src/env/env_failchk.c android vx vxsmall +src/env/env_file.c android vx vxsmall +src/env/env_globals.c android vx vxsmall +src/env/env_method.c android vx vxsmall +src/env/env_name.c android vx vxsmall +src/env/env_open.c android vx vxsmall +src/env/env_recover.c android vx vxsmall +src/env/env_region.c android vx vxsmall +src/env/env_register.c android vx vxsmall +src/env/env_sig.c android vx vxsmall +src/env/env_stat.c android vx vxsmall +src/fileops/fileops_auto.c android vx vxsmall +src/fileops/fileops_autop.c vx6 +src/fileops/fop_basic.c android vx vxsmall +src/fileops/fop_rec.c android vx vxsmall +src/fileops/fop_util.c android vx vxsmall +src/hash/hash.c vx +src/hash/hash_auto.c vx +src/hash/hash_autop.c vx6 +src/hash/hash_compact.c vx +src/hash/hash_conv.c vx +src/hash/hash_dup.c vx +src/hash/hash_func.c android vx vxsmall +src/hash/hash_meta.c vx +src/hash/hash_method.c vx +src/hash/hash_open.c vx +src/hash/hash_page.c vx +src/hash/hash_rec.c vx +src/hash/hash_reclaim.c vx +src/hash/hash_stat.c vx +src/hash/hash_stub.c android vxsmall +src/hash/hash_upgrade.c vx +src/hash/hash_verify.c vx +src/heap/heap_auto.c vx +src/heap/heap_autop.c vx +src/heap/heap.c vx +src/heap/heap_conv.c vx +src/heap/heap_method.c vx +src/heap/heap_open.c vx +src/heap/heap_rec.c vx +src/heap/heap_reclaim.c vx +src/heap/heap_stat.c vx +src/heap/heap_stub.c android vxsmall +src/heap/heap_verify.c vx +src/hmac/hmac.c android vx vxsmall +src/hmac/sha1.c android vx vxsmall +src/lock/lock.c android vx vxsmall +src/lock/lock_deadlock.c android vx vxsmall +src/lock/lock_failchk.c android vx vxsmall +src/lock/lock_id.c android vx vxsmall +src/lock/lock_list.c android vx vxsmall +src/lock/lock_method.c android vx vxsmall +src/lock/lock_region.c android vx vxsmall +src/lock/lock_stat.c android vx vxsmall +src/lock/lock_timer.c android vx vxsmall +src/lock/lock_util.c android vx vxsmall +src/log/log.c android vx vxsmall +src/log/log_archive.c android vx vxsmall +src/log/log_compare.c android vx vxsmall +src/log/log_debug.c android vx vxsmall +src/log/log_get.c android vx vxsmall +src/log/log_method.c android vx vxsmall +src/log/log_print.c android vx vxsmall +src/log/log_put.c android vx vxsmall +src/log/log_stat.c android vx vxsmall +src/log/log_verify.c vx +src/log/log_verify_util.c vx +src/log/log_verify_auto.c vx +src/log/log_verify_int.c vx +src/log/log_verify_stub.c android vxsmall +src/mp/mp_alloc.c android vx vxsmall +src/mp/mp_bh.c android vx vxsmall +src/mp/mp_fget.c android vx vxsmall +src/mp/mp_fmethod.c android vx vxsmall +src/mp/mp_fopen.c android vx vxsmall +src/mp/mp_fput.c android vx vxsmall +src/mp/mp_fset.c android vx vxsmall +src/mp/mp_method.c android vx vxsmall +src/mp/mp_mvcc.c android vx vxsmall +src/mp/mp_region.c android vx vxsmall +src/mp/mp_register.c android vx vxsmall +src/mp/mp_resize.c android vx vxsmall +src/mp/mp_stat.c android vx vxsmall +src/mp/mp_sync.c android vx vxsmall +src/mp/mp_trickle.c android vx vxsmall +src/mutex/mut_alloc.c android vx vxsmall +src/mutex/mut_failchk.c android vx vxsmall +src/mutex/mut_fcntl.c +src/mutex/mut_method.c android vx vxsmall +src/mutex/mut_region.c android vx vxsmall +src/mutex/mut_stat.c android vx vxsmall +src/mutex/mut_tas.c android vx vxsmall +src/mutex/mut_win32.c ce_small +src/os/os_abort.c android vx vxsmall +src/os/os_abs.c android +src/os/os_addrinfo.c vx +src/os/os_alloc.c android vx vxsmall +src/os/os_clock.c android vx vxsmall +src/os/os_config.c android +src/os/os_cpu.c android vx vxsmall +src/os/os_ctime.c android vx vxsmall +src/os/os_dir.c android vx vxsmall +src/os/os_errno.c android vx vxsmall +src/os/os_fid.c android vx vxsmall +src/os/os_flock.c android vx vxsmall +src/os/os_fsync.c android vx vxsmall +src/os/os_getenv.c android vx vxsmall +src/os/os_handle.c android vx vxsmall +src/os/os_map.c android +src/os/os_mkdir.c android vx vxsmall +src/os/os_open.c android vx vxsmall +src/os/os_pid.c android vx vxsmall +src/os/os_rename.c android vx vxsmall +src/os/os_root.c android vx vxsmall +src/os/os_rpath.c android +src/os/os_rw.c android vx vxsmall +src/os/os_seek.c android vx vxsmall +src/os/os_stack.c android vx vxsmall +src/os/os_stat.c android vx vxsmall +src/os/os_tmpdir.c android vx vxsmall +src/os/os_truncate.c android vx vxsmall +src/os/os_uid.c android vx vxsmall +src/os/os_unlink.c android vx vxsmall +src/os/os_yield.c android +src/os_qnx/os_qnx_fsync.c +src/os_qnx/os_qnx_open.c +src/os_vxworks/os_vx_abs.c vx vxsmall +src/os_vxworks/os_vx_config.c vx vxsmall +src/os_vxworks/os_vx_map.c vx vxsmall +src/os_vxworks/os_vx_rpath.c vx vxsmall +src/os_vxworks/os_vx_yield.c vx vxsmall +src/qam/qam.c vx +src/qam/qam_auto.c vx +src/qam/qam_autop.c vx6 +src/qam/qam_conv.c vx +src/qam/qam_files.c vx +src/qam/qam_method.c vx +src/qam/qam_open.c vx +src/qam/qam_rec.c vx +src/qam/qam_stat.c vx +src/qam/qam_stub.c android vxsmall +src/qam/qam_upgrade.c vx +src/qam/qam_verify.c vx +src/rep/rep_automsg.c vx +src/rep/rep_backup.c vx +src/rep/rep_elect.c vx +src/rep/rep_lease.c vx +src/rep/rep_log.c vx +src/rep/rep_method.c vx +src/rep/rep_record.c vx +src/rep/rep_region.c vx +src/rep/rep_stat.c vx +src/rep/rep_stub.c android vxsmall +src/rep/rep_util.c vx +src/rep/rep_verify.c vx +src/repmgr/repmgr_auto.c vx +src/repmgr/repmgr_autop.c vx6 +src/repmgr/repmgr_automsg.c vx +src/repmgr/repmgr_elect.c vx +src/repmgr/repmgr_method.c vx +src/repmgr/repmgr_msg.c vx +src/repmgr/repmgr_net.c vx +src/repmgr/repmgr_posix.c vx +src/repmgr/repmgr_queue.c vx +src/repmgr/repmgr_rec.c vx +src/repmgr/repmgr_sel.c vx +src/repmgr/repmgr_stat.c vx +src/repmgr/repmgr_stub.c android vxsmall +src/repmgr/repmgr_util.c vx +src/sequence/seq_stat.c android vx6 +src/sequence/sequence.c android vx6 +src/txn/txn.c android vx vxsmall +src/txn/txn_auto.c android vx vxsmall +src/txn/txn_autop.c vx6 +src/txn/txn_chkpt.c android vx vxsmall +src/txn/txn_failchk.c android vx vxsmall +src/txn/txn_method.c android vx vxsmall +src/txn/txn_rec.c android vx vxsmall +src/txn/txn_recover.c android vx vxsmall +src/txn/txn_region.c android vx vxsmall +src/txn/txn_stat.c android vx vxsmall +src/txn/txn_util.c android vx vxsmall +src/xa/xa.c vx +src/xa/xa_map.c vx diff --git a/dist/template/rec_btree b/dist/template/rec_btree new file mode 100644 index 00000000..c16219cc --- /dev/null +++ b/dist/template/rec_btree @@ -0,0 +1,917 @@ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/__bam.h" +#include "dbinc/log.h" + +/* + * __bam_split_recover -- + * Recovery function for split. + * + * PUBLIC: int __bam_split_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_split_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_split_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_split_print); + REC_INTRO(__bam_split_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_split_recover -- + * Recovery function for split. + * + * PUBLIC: int __bam_split_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_split_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_split_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_split_print); + REC_INTRO(__bam_split_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_split_recover -- + * Recovery function for split. + * + * PUBLIC: int __bam_split_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_split_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_split_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_split_print); + REC_INTRO(__bam_split_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_rsplit_recover -- + * Recovery function for rsplit. + * + * PUBLIC: int __bam_rsplit_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_rsplit_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_rsplit_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_rsplit_print); + REC_INTRO(__bam_rsplit_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_adj_recover -- + * Recovery function for adj. + * + * PUBLIC: int __bam_adj_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_adj_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_adj_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_adj_print); + REC_INTRO(__bam_adj_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_cadjust_recover -- + * Recovery function for cadjust. + * + * PUBLIC: int __bam_cadjust_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_cadjust_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_cadjust_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_cadjust_print); + REC_INTRO(__bam_cadjust_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_cdel_recover -- + * Recovery function for cdel. + * + * PUBLIC: int __bam_cdel_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_cdel_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_cdel_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_cdel_print); + REC_INTRO(__bam_cdel_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_repl_recover -- + * Recovery function for repl. + * + * PUBLIC: int __bam_repl_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_repl_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_repl_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_repl_print); + REC_INTRO(__bam_repl_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_irep_recover -- + * Recovery function for irep. + * + * PUBLIC: int __bam_irep_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_irep_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_irep_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_irep_print); + REC_INTRO(__bam_irep_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_root_recover -- + * Recovery function for root. + * + * PUBLIC: int __bam_root_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_root_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_root_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_root_print); + REC_INTRO(__bam_root_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_curadj_recover -- + * Recovery function for curadj. + * + * PUBLIC: int __bam_curadj_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_curadj_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_curadj_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_curadj_print); + REC_INTRO(__bam_curadj_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_rcuradj_recover -- + * Recovery function for rcuradj. + * + * PUBLIC: int __bam_rcuradj_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_rcuradj_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_rcuradj_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_rcuradj_print); + REC_INTRO(__bam_rcuradj_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_relink_recover -- + * Recovery function for relink. + * + * PUBLIC: int __bam_relink_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_relink_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_relink_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_relink_print); + REC_INTRO(__bam_relink_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_merge_recover -- + * Recovery function for merge. + * + * PUBLIC: int __bam_merge_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_merge_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_merge_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__bam_merge_print); + REC_INTRO(__bam_merge_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + diff --git a/dist/template/rec_crdel b/dist/template/rec_crdel new file mode 100644 index 00000000..fabdc3ee --- /dev/null +++ b/dist/template/rec_crdel @@ -0,0 +1,267 @@ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/__crdel.h" +#include "dbinc/log.h" + +/* + * __crdel_metasub_recover -- + * Recovery function for metasub. + * + * PUBLIC: int __crdel_metasub_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__crdel_metasub_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __crdel_metasub_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__crdel_metasub_print); + REC_INTRO(__crdel_metasub_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __crdel_inmem_create_recover -- + * Recovery function for inmem_create. + * + * PUBLIC: int __crdel_inmem_create_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_create_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __crdel_inmem_create_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__crdel_inmem_create_print); + REC_INTRO(__crdel_inmem_create_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __crdel_inmem_rename_recover -- + * Recovery function for inmem_rename. + * + * PUBLIC: int __crdel_inmem_rename_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_rename_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __crdel_inmem_rename_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__crdel_inmem_rename_print); + REC_INTRO(__crdel_inmem_rename_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __crdel_inmem_remove_recover -- + * Recovery function for inmem_remove. + * + * PUBLIC: int __crdel_inmem_remove_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_remove_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __crdel_inmem_remove_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__crdel_inmem_remove_print); + REC_INTRO(__crdel_inmem_remove_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + diff --git a/dist/template/rec_ctemp b/dist/template/rec_ctemp new file mode 100644 index 00000000..2d90331b --- /dev/null +++ b/dist/template/rec_ctemp @@ -0,0 +1,65 @@ +/* + * PREF_FUNC_recover -- + * Recovery function for FUNC. + * + * PUBLIC: int PREF_FUNC_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +PREF_FUNC_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + PREF_DUP_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(PREF_DUP_print); + REC_INTRO(PREF_DUP_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + diff --git a/dist/template/rec_db b/dist/template/rec_db new file mode 100644 index 00000000..0325f702 --- /dev/null +++ b/dist/template/rec_db @@ -0,0 +1,1437 @@ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/__db.h" +#include "dbinc/log.h" + +/* + * __db_addrem_recover -- + * Recovery function for addrem. + * + * PUBLIC: int __db_addrem_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_addrem_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_addrem_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_addrem_print); + REC_INTRO(__db_addrem_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_addrem_recover -- + * Recovery function for addrem. + * + * PUBLIC: int __db_addrem_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_addrem_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_addrem_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_addrem_print); + REC_INTRO(__db_addrem_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_big_recover -- + * Recovery function for big. + * + * PUBLIC: int __db_big_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_big_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_big_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_big_print); + REC_INTRO(__db_big_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_big_recover -- + * Recovery function for big. + * + * PUBLIC: int __db_big_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_big_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_big_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_big_print); + REC_INTRO(__db_big_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_ovref_recover -- + * Recovery function for ovref. + * + * PUBLIC: int __db_ovref_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_ovref_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_ovref_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_ovref_print); + REC_INTRO(__db_ovref_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_relink_recover -- + * Recovery function for relink. + * + * PUBLIC: int __db_relink_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_relink_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_relink_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_relink_print); + REC_INTRO(__db_relink_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_debug_recover -- + * Recovery function for debug. + * + * PUBLIC: int __db_debug_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_debug_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_debug_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_debug_print); + REC_INTRO(__db_debug_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_noop_recover -- + * Recovery function for noop. + * + * PUBLIC: int __db_noop_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_noop_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_noop_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_noop_print); + REC_INTRO(__db_noop_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_pg_alloc_recover -- + * Recovery function for pg_alloc. + * + * PUBLIC: int __db_pg_alloc_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_alloc_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_alloc_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_pg_alloc_print); + REC_INTRO(__db_pg_alloc_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_pg_alloc_recover -- + * Recovery function for pg_alloc. + * + * PUBLIC: int __db_pg_alloc_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_alloc_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_alloc_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_pg_alloc_print); + REC_INTRO(__db_pg_alloc_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_pg_free_recover -- + * Recovery function for pg_free. + * + * PUBLIC: int __db_pg_free_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_free_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_free_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_pg_free_print); + REC_INTRO(__db_pg_free_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_pg_free_recover -- + * Recovery function for pg_free. + * + * PUBLIC: int __db_pg_free_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_free_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_free_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_pg_free_print); + REC_INTRO(__db_pg_free_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_cksum_recover -- + * Recovery function for cksum. + * + * PUBLIC: int __db_cksum_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_cksum_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_cksum_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_cksum_print); + REC_INTRO(__db_cksum_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_pg_freedata_recover -- + * Recovery function for pg_freedata. + * + * PUBLIC: int __db_pg_freedata_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_freedata_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_freedata_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_pg_freedata_print); + REC_INTRO(__db_pg_freedata_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_pg_freedata_recover -- + * Recovery function for pg_freedata. + * + * PUBLIC: int __db_pg_freedata_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_freedata_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_freedata_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_pg_freedata_print); + REC_INTRO(__db_pg_freedata_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_pg_init_recover -- + * Recovery function for pg_init. + * + * PUBLIC: int __db_pg_init_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_init_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_init_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_pg_init_print); + REC_INTRO(__db_pg_init_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_pg_sort_recover -- + * Recovery function for pg_sort. + * + * PUBLIC: int __db_pg_sort_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_sort_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_sort_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_pg_sort_print); + REC_INTRO(__db_pg_sort_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_pg_trunc_recover -- + * Recovery function for pg_trunc. + * + * PUBLIC: int __db_pg_trunc_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_trunc_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_trunc_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_pg_trunc_print); + REC_INTRO(__db_pg_trunc_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_realloc_recover -- + * Recovery function for realloc. + * + * PUBLIC: int __db_realloc_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_realloc_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_realloc_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_realloc_print); + REC_INTRO(__db_realloc_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_relink_recover -- + * Recovery function for relink. + * + * PUBLIC: int __db_relink_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_relink_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_relink_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_relink_print); + REC_INTRO(__db_relink_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_merge_recover -- + * Recovery function for merge. + * + * PUBLIC: int __db_merge_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_merge_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_merge_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_merge_print); + REC_INTRO(__db_merge_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_pgno_recover -- + * Recovery function for pgno. + * + * PUBLIC: int __db_pgno_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pgno_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pgno_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_pgno_print); + REC_INTRO(__db_pgno_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + diff --git a/dist/template/rec_dbreg b/dist/template/rec_dbreg new file mode 100644 index 00000000..f5657967 --- /dev/null +++ b/dist/template/rec_dbreg @@ -0,0 +1,72 @@ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/__dbreg.h" +#include "dbinc/log.h" + +/* + * __dbreg_register_recover -- + * Recovery function for register. + * + * PUBLIC: int __dbreg_register_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__dbreg_register_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __dbreg_register_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__dbreg_register_print); + REC_INTRO(__dbreg_register_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + diff --git a/dist/template/rec_fileops b/dist/template/rec_fileops new file mode 100644 index 00000000..c2c770d8 --- /dev/null +++ b/dist/template/rec_fileops @@ -0,0 +1,527 @@ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/__fop.h" +#include "dbinc/log.h" + +/* + * __fop_create_recover -- + * Recovery function for create. + * + * PUBLIC: int __fop_create_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_create_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_create_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__fop_create_print); + REC_INTRO(__fop_create_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __fop_create_recover -- + * Recovery function for create. + * + * PUBLIC: int __fop_create_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_create_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_create_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__fop_create_print); + REC_INTRO(__fop_create_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __fop_remove_recover -- + * Recovery function for remove. + * + * PUBLIC: int __fop_remove_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_remove_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_remove_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__fop_remove_print); + REC_INTRO(__fop_remove_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __fop_write_recover -- + * Recovery function for write. + * + * PUBLIC: int __fop_write_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_write_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_write_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__fop_write_print); + REC_INTRO(__fop_write_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __fop_write_recover -- + * Recovery function for write. + * + * PUBLIC: int __fop_write_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_write_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_write_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__fop_write_print); + REC_INTRO(__fop_write_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __fop_rename_recover -- + * Recovery function for rename. + * + * PUBLIC: int __fop_rename_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_rename_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_rename_noundo_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__fop_rename_noundo_print); + REC_INTRO(__fop_rename_noundo_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __fop_rename_recover -- + * Recovery function for rename. + * + * PUBLIC: int __fop_rename_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_rename_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_rename_noundo_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__fop_rename_noundo_print); + REC_INTRO(__fop_rename_noundo_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __fop_file_remove_recover -- + * Recovery function for file_remove. + * + * PUBLIC: int __fop_file_remove_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_file_remove_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_file_remove_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__fop_file_remove_print); + REC_INTRO(__fop_file_remove_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + diff --git a/dist/template/rec_hash b/dist/template/rec_hash new file mode 100644 index 00000000..7db84b3e --- /dev/null +++ b/dist/template/rec_hash @@ -0,0 +1,982 @@ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/__ham.h" +#include "dbinc/log.h" + +/* + * __ham_insdel_recover -- + * Recovery function for insdel. + * + * PUBLIC: int __ham_insdel_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_insdel_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_insdel_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_insdel_print); + REC_INTRO(__ham_insdel_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_insdel_recover -- + * Recovery function for insdel. + * + * PUBLIC: int __ham_insdel_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_insdel_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_insdel_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_insdel_print); + REC_INTRO(__ham_insdel_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_newpage_recover -- + * Recovery function for newpage. + * + * PUBLIC: int __ham_newpage_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_newpage_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_newpage_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_newpage_print); + REC_INTRO(__ham_newpage_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_splitdata_recover -- + * Recovery function for splitdata. + * + * PUBLIC: int __ham_splitdata_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_splitdata_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_splitdata_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_splitdata_print); + REC_INTRO(__ham_splitdata_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_replace_recover -- + * Recovery function for replace. + * + * PUBLIC: int __ham_replace_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_replace_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_replace_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_replace_print); + REC_INTRO(__ham_replace_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_replace_recover -- + * Recovery function for replace. + * + * PUBLIC: int __ham_replace_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_replace_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_replace_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_replace_print); + REC_INTRO(__ham_replace_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_copypage_recover -- + * Recovery function for copypage. + * + * PUBLIC: int __ham_copypage_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_copypage_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_copypage_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_copypage_print); + REC_INTRO(__ham_copypage_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_metagroup_recover -- + * Recovery function for metagroup. + * + * PUBLIC: int __ham_metagroup_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_metagroup_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_metagroup_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_metagroup_print); + REC_INTRO(__ham_metagroup_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_metagroup_recover -- + * Recovery function for metagroup. + * + * PUBLIC: int __ham_metagroup_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_metagroup_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_metagroup_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_metagroup_print); + REC_INTRO(__ham_metagroup_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_groupalloc_recover -- + * Recovery function for groupalloc. + * + * PUBLIC: int __ham_groupalloc_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_groupalloc_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_groupalloc_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_groupalloc_print); + REC_INTRO(__ham_groupalloc_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_groupalloc_recover -- + * Recovery function for groupalloc. + * + * PUBLIC: int __ham_groupalloc_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_groupalloc_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_groupalloc_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_groupalloc_print); + REC_INTRO(__ham_groupalloc_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_changeslot_recover -- + * Recovery function for changeslot. + * + * PUBLIC: int __ham_changeslot_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_changeslot_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_changeslot_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_changeslot_print); + REC_INTRO(__ham_changeslot_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_contract_recover -- + * Recovery function for contract. + * + * PUBLIC: int __ham_contract_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_contract_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_contract_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_contract_print); + REC_INTRO(__ham_contract_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_curadj_recover -- + * Recovery function for curadj. + * + * PUBLIC: int __ham_curadj_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_curadj_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_curadj_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_curadj_print); + REC_INTRO(__ham_curadj_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __ham_chgpg_recover -- + * Recovery function for chgpg. + * + * PUBLIC: int __ham_chgpg_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_chgpg_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_chgpg_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_chgpg_print); + REC_INTRO(__ham_chgpg_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + diff --git a/dist/template/rec_heap b/dist/template/rec_heap new file mode 100644 index 00000000..b6dc7a3b --- /dev/null +++ b/dist/template/rec_heap @@ -0,0 +1,267 @@ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/__heap.h" +#include "dbinc/log.h" + +/* + * __heap_addrem_recover -- + * Recovery function for addrem. + * + * PUBLIC: int __heap_addrem_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_addrem_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __heap_addrem_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__heap_addrem_print); + REC_INTRO(__heap_addrem_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __heap_pg_alloc_recover -- + * Recovery function for pg_alloc. + * + * PUBLIC: int __heap_pg_alloc_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_pg_alloc_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __heap_pg_alloc_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__heap_pg_alloc_print); + REC_INTRO(__heap_pg_alloc_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __heap_trunc_meta_recover -- + * Recovery function for trunc_meta. + * + * PUBLIC: int __heap_trunc_meta_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_trunc_meta_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __heap_trunc_meta_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__heap_trunc_meta_print); + REC_INTRO(__heap_trunc_meta_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __heap_trunc_page_recover -- + * Recovery function for trunc_page. + * + * PUBLIC: int __heap_trunc_page_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_trunc_page_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __heap_trunc_page_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__heap_trunc_page_print); + REC_INTRO(__heap_trunc_page_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + diff --git a/dist/template/rec_qam b/dist/template/rec_qam new file mode 100644 index 00000000..502b555f --- /dev/null +++ b/dist/template/rec_qam @@ -0,0 +1,332 @@ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/__qam.h" +#include "dbinc/log.h" + +/* + * __qam_incfirst_recover -- + * Recovery function for incfirst. + * + * PUBLIC: int __qam_incfirst_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__qam_incfirst_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __qam_incfirst_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__qam_incfirst_print); + REC_INTRO(__qam_incfirst_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __qam_mvptr_recover -- + * Recovery function for mvptr. + * + * PUBLIC: int __qam_mvptr_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__qam_mvptr_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __qam_mvptr_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__qam_mvptr_print); + REC_INTRO(__qam_mvptr_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __qam_del_recover -- + * Recovery function for del. + * + * PUBLIC: int __qam_del_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__qam_del_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __qam_del_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__qam_del_print); + REC_INTRO(__qam_del_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __qam_add_recover -- + * Recovery function for add. + * + * PUBLIC: int __qam_add_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__qam_add_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __qam_add_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__qam_add_print); + REC_INTRO(__qam_add_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __qam_delext_recover -- + * Recovery function for delext. + * + * PUBLIC: int __qam_delext_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__qam_delext_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __qam_delext_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__qam_delext_print); + REC_INTRO(__qam_delext_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + diff --git a/dist/template/rec_repmgr b/dist/template/rec_repmgr new file mode 100644 index 00000000..9f9b2e40 --- /dev/null +++ b/dist/template/rec_repmgr @@ -0,0 +1,72 @@ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/__repmgr.h" +#include "dbinc/log.h" + +/* + * __repmgr_member_recover -- + * Recovery function for member. + * + * PUBLIC: int __repmgr_member_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__repmgr_member_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __repmgr_member_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__repmgr_member_print); + REC_INTRO(__repmgr_member_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + diff --git a/dist/template/rec_txn b/dist/template/rec_txn new file mode 100644 index 00000000..89f05387 --- /dev/null +++ b/dist/template/rec_txn @@ -0,0 +1,527 @@ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/__txn.h" +#include "dbinc/log.h" + +/* + * __txn_regop_recover -- + * Recovery function for regop. + * + * PUBLIC: int __txn_regop_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__txn_regop_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_regop_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__txn_regop_print); + REC_INTRO(__txn_regop_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __txn_regop_recover -- + * Recovery function for regop. + * + * PUBLIC: int __txn_regop_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__txn_regop_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_regop_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__txn_regop_print); + REC_INTRO(__txn_regop_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __txn_ckp_recover -- + * Recovery function for ckp. + * + * PUBLIC: int __txn_ckp_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__txn_ckp_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_ckp_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__txn_ckp_print); + REC_INTRO(__txn_ckp_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __txn_ckp_recover -- + * Recovery function for ckp. + * + * PUBLIC: int __txn_ckp_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__txn_ckp_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_ckp_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__txn_ckp_print); + REC_INTRO(__txn_ckp_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __txn_child_recover -- + * Recovery function for child. + * + * PUBLIC: int __txn_child_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__txn_child_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_child_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__txn_child_print); + REC_INTRO(__txn_child_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __txn_xa_regop_recover -- + * Recovery function for xa_regop. + * + * PUBLIC: int __txn_xa_regop_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__txn_xa_regop_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_xa_regop_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__txn_xa_regop_print); + REC_INTRO(__txn_xa_regop_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __txn_prepare_recover -- + * Recovery function for prepare. + * + * PUBLIC: int __txn_prepare_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__txn_prepare_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_prepare_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__txn_prepare_print); + REC_INTRO(__txn_prepare_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __txn_recycle_recover -- + * Recovery function for recycle. + * + * PUBLIC: int __txn_recycle_recover + * PUBLIC: __P((env *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__txn_recycle_recover(env, dbtp, lsnp, op, info) + env *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_recycle_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__txn_recycle_print); + REC_INTRO(__txn_recycle_read, ip, 0); + + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) + if (DB_REDO(op)) { + if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + + /* + * Use this when there is something like "pagelsn" in the argp + * structure. Sometimes, you might need to compare meta-data + * lsn's instead. + * + * cmp_p = log_compare(&LSN(pagep), argp->pagelsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + if (ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) + goto out; + + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + diff --git a/dist/template/rec_utemp b/dist/template/rec_utemp new file mode 100644 index 00000000..dc3431b1 --- /dev/null +++ b/dist/template/rec_utemp @@ -0,0 +1,68 @@ +/* + * PREF_FUNC_recover -- + * Recovery function for FUNC. + * + * PUBLIC: int PREF_FUNC_recover + * PUBLIC: __P((dbenv *, DBT *, DB_LSN *, db_recops)); + */ +int +PREF_FUNC_recover(dbenv, dbtp, lsnp, op) + dbenv *dbenv; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; +{ + PREF_DUP_args *argp; + int cmp_n, cmp_p, modified, ret; + +#ifdef DEBUG_RECOVER + (void)PREF_DUP_print(dbenv, dbtp, lsnp, op); +#endif + argp = NULL; + if ((ret = PREF_DUP_read(dbenv, dbtp->data, &argp)) != 0) + goto out; + + modified = 0; + cmp_n = 0; + cmp_p = 0; + + /* + * The function now needs to calculate cmp_n and cmp_p based + * on whatever is in argp (usually an LSN representing the state + * of an object BEFORE the operation described in this record was + * applied) and whatever other information the function needs, + * e.g., the LSN of the object as it exists now. + * + * cmp_p should be set to 0 if the current state of the object + * is believed to be same as the state of the object BEFORE the + * described operation was applied. For example, if you had an + * LSN in the log record (argp->prevlsn) and a current LSN of the + * object (curlsn), you might want to do: + * + * cmp_p = log_compare(curlsn, argp->prevlsn); + * + * Similarly, cmp_n should be set to 0 if the current state + * of the object reflects the object AFTER this operation has + * been applied. Thus, if you can figure out an object's current + * LSN, yo might set cmp_n as: + * + * cmp_n = log_compare(lsnp, curlsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + + /* Allow for following LSN pointers through a transaction. */ + *lsnp = argp->prev_lsn; + ret = 0; + +out: if (argp != NULL) + free(argp); + + return (ret); +} + diff --git a/dist/validate/README b/dist/validate/README new file mode 100644 index 00000000..65afafe6 --- /dev/null +++ b/dist/validate/README @@ -0,0 +1,10 @@ +All the files in this folder implement test cases that validate some component +of the Berkeley DB system. + +They are driven by script files, that are shell script based. They are not +generally designed to be run outside of the shell. + +There is a "driver" script called s_validate in the dist directory of the +repository that can be used to run all the scripts in this directory. + +Any new script will be automatically run by the s_validate driver. diff --git a/dist/validate/s_chk_build_configs b/dist/validate/s_chk_build_configs new file mode 100644 index 00000000..56579a7e --- /dev/null +++ b/dist/validate/s_chk_build_configs @@ -0,0 +1,118 @@ +#!/bin/sh - +# +# $Id$ +# +# Build a program that calls the run-time API configuration functions. +d=../.. +b=tmp_build + +trap 'rm -rf $b ; exit 0' 0 +trap 'rm -rf $b ; exit 1' 1 2 3 13 15 + +[ -f $d/LICENSE ] || { + echo 'FAIL: Test must be run from scr directory.' + exit 1 +} + +# Flags to build Java. +JAVA_INC=/usr/local/diablo-jdk1.5.0/include +JAVA_FLAGS="-I$JAVA_INC -I$JAVA_INC/linux -I$JAVA_INC/freebsd" + +# Configure and build. +# $1: config flags +config() +{ + (echo `date`; echo "run: $1: $dir") | tee CONFIGURATION + + $d/../dist/configure $1 > config.OUT 2>&1 + if test $? -ne 0; then + echo "$i: FAILED in configure" + return 1 + fi + + if `echo "$1" | grep disable-statistics > /dev/null`; then + echo '#define __TEST_DB_NO_STATISTICS 1' >> db_config.h + fi + + (echo /^CFLAGS=/ && + # Configure gcc to complain about everything, and make warnings fatal + # errors. + # TODO: This used to -Werror, but that fails regularly now :( + echo \ + 's/-c /-c -W -Wall -Wpointer-arith -Wmissing-prototypes /' && + # Warnings are fatal errors, so don't set gcc warning flags for files + # where we can't avoid warnings. + echo '/^db_java_wrap.*: .*db_java_wrap.c$/' && + echo '+1s/\$(CFLAGS)/-c \$(CPPFLAGS)/' && + echo '/^tcl_db_pkg.*: .*tcl_db_pkg.c$/' && + echo '+1s/\$(CFLAGS)/-c \$(CPPFLAGS)/' && + echo w && + echo q) | ed Makefile > /dev/null + + # If we're compiling Java, we'll need to set up the path. + echo "$1" | grep enable-java > /dev/null + if test $? -eq 0; then + (echo /^CPPFLAGS=/ && + echo "s;\$; $JAVA_FLAGS;" && + echo w && + echo q) | ed Makefile > /dev/null + fi + + make > mklog 2>&1 && make ex_access >> mklog 2>&1 + if test $? -ne 0; then + echo "$i: FAILED in make" + return 1 + fi + + (echo a; echo b; echo c) | ./ex_access > /dev/null 2>&1 + return $? +} + +# Run a test. +# $1: config flags +count=0 +r() +{ + count=$(expr $count + 1) + dir="$b.$count" + (rm -rf $dir && mkdir $dir && cd $dir && config "$1") + if test $? -eq 0; then + rm -rf $dir + else + echo "$1: FAILED to build" + fi +} + +# Run through all of the standard single options. +s="\ +--disable-cryptography \ +--disable-hash \ +--disable-largefile \ +--disable-mutexsupport \ +--disable-queue \ +--disable-replication \ +--disable-statistics \ +--disable-verify \ +--enable-compat185 \ +--enable-debug \ +--enable-debug_rop \ +--enable-debug_wop \ +--enable-diagnostic \ +--enable-dump185 \ +--enable-posixmutexes \ +--enable-smallbuild \ +--enable-umrw \ +--with-mutex=x86/gcc-assembly \ +--with-uniquename=__KEITH__" +for i in $s; do + r "$i --disable-shared" +done + +# Build specific runs of interest. +r +r "--disable-static" +r "--enable-cxx" +r "--enable-java" +r "--with-tcl=/usr/local/lib/tcl8.4" +r "--enable-test --with-tcl=/usr/local/lib/tcl8.4" +r "--enable-cxx --enable-java --with-tcl=/usr/local/lib/tcl8.4" diff --git a/dist/validate/s_chk_comma b/dist/validate/s_chk_comma new file mode 100644 index 00000000..86cc25f4 --- /dev/null +++ b/dist/validate/s_chk_comma @@ -0,0 +1,35 @@ +#!/bin/sh - +# +# $Id$ +# +# Look for trailing commas in declarations. Some compilers can't handle: +# enum { +# foo, +# bar, +# }; +# mar 2010 added -D_GNU_SOURCE to compile on linux blade server + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +s=$d/src +if cc -g -Wall -D_GNU_SOURCE $0.c -o t; then + : +else + echo "FAIL: unable to compile test program $0.c" + exit 1 +fi + +if ./t $s/*/*.[ch] $s/*/*.in; then + : +else + echo "FAIL: test program failed" + exit 1 +fi + +rm -rf t + +exit 0 diff --git a/dist/validate/s_chk_comma.c b/dist/validate/s_chk_comma.c new file mode 100644 index 00000000..4a41bddc --- /dev/null +++ b/dist/validate/s_chk_comma.c @@ -0,0 +1,53 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#include + +#include +#include +#include +#include +#include + +int +chk(f) + char *f; +{ + int ch, l, r; + + if (freopen(f, "r", stdin) == NULL) { + fprintf(stderr, "%s: %s\n", f, strerror(errno)); + exit(EXIT_FAILURE); + } + for (l = 1, r = 0; (ch = getchar()) != EOF;) { + if (ch != ',') + goto next; + do { ch = getchar(); } while (isblank(ch)); + if (ch != '\n') + goto next; + ++l; + do { ch = getchar(); } while (isblank(ch)); + if (ch != '}') + goto next; + r = 1; + printf("%s: line %d\n", f, l); + +next: if (ch == '\n') + ++l; + } + return (r); +} + +int +main(int argc, char *argv[]) +{ + int r; + + for (r = 0; *++argv != NULL;) + if (chk(*argv)) + r = 1; + return (r); +} diff --git a/dist/validate/s_chk_copyright b/dist/validate/s_chk_copyright new file mode 100644 index 00000000..bdd16231 --- /dev/null +++ b/dist/validate/s_chk_copyright @@ -0,0 +1,53 @@ +#!/bin/sh - +# +# $Id$ +# +# Check all source files for proper copyright notices. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +t1=__1 +t2=__2 + +# create regex for Copyright notice using current year +COPYEXP='Copyright.*'`date +%C%y`'.*affiliates' +echo $COPYEXP + +(cd $d && find . -name '*.[chys]' -o -name '*.cpp' -o -name '*.tcl' \ + -o -name '*.java' -o -name '*.cs' -o -name '*.hpp' \ + -o -name '*.src' | xargs egrep -l $COPYEXP) > $t1 + +# use sed to remove the files we do not care about, these are the ones +# from 3rd parties that are included in our distribution + +(cd $d && find . -name '*.[chys]' -o -name '*.cpp' -o -name '*.tcl' \ + -o -name '*.java' -o -name '*.cs' -o -name '*.hpp') | tee /tmp/o | + sed -e '/crypto\//d' \ + -e '/dbinc_auto\//d' \ + -e '/lang\/csharp\/src\/Internal\//d'\ + -e '/lang\/java\/src\/com\/sleepycat\/db\/internal\//d' \ + -e '/lang\/sql\/generated\//d' \ + -e '/db_csharp_wrap.c$/d' \ + -e '/db_java_wrap.c$/d' \ + -e '/perl\//d' \ + -e '/sha1.c$/d' \ + -e '/sleepycat\/asm\//d' \ + -e '/sqlite\//d' \ + -e '/test\/stl\//d' > $t2 + + +if diff $t1 $t2 > /dev/null; then + exit 0 +else + echo "<<< source tree >>> missing copyright notices" + diff $t1 $t2 | grep '>' | awk '{print $2}' + exit 1 +fi + +rm -f $t1 $t2 + +exit 0 diff --git a/dist/validate/s_chk_defines b/dist/validate/s_chk_defines new file mode 100644 index 00000000..824c3ae2 --- /dev/null +++ b/dist/validate/s_chk_defines @@ -0,0 +1,124 @@ +#!/bin/sh - +# +# $Id$ +# +# Check to make sure that all #defines are actually used. +# Check to make sure that all #defines start in column 1. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +exitv=0 +t1=__1 +t2=__2 +t3=__3 + +find $d -name '*.c' -o -name '*.cpp' | + sed -e '/\/php_db4\//d' \ + -e '/\/sql\//d'| + xargs egrep '^[ ][ ]*#' > $t1 +test -s $t1 && { + echo "FAIL: found #defines with leading white space:" + cat $t1 + exit 1 +} + +egrep '^#define' $d/src/dbinc/*.h $d/src/dbinc/*.in | + sed -e '/db_185.in/d' -e '/queue.h/d' | + awk '{print $2}' | + sed -e '/^AI_CANONNAME/d' \ + -e '/^AI_NUMERICHOST/d' \ + -e '/^B_DELETE/d' \ + -e '/^B_MAX/d' \ + -e '/^CHECK_THREAD/d' \ + -e '/^DB_BTREEOLDVER/d' \ + -e '/^DB_DEGREE_2/d' \ + -e '/^DB_HASHOLDVER/d' \ + -e '/^DB_LOCKVERSION/d' \ + -e '/^DB_LOG_PERM_42_44/d' \ + -e '/^DB_LOG_RESEND_42_44/d' \ + -e '/^DB_MAX_PAGES/d' \ + -e '/^DB_PAGE_QUEUE_LEN/d' \ + -e '/^DB_QAMOLDVER/d' \ + -e '/^DB_RETURNS_A_KEY_HASH/d' \ + -e '/^DB_SPARE_FLAG/d' \ + -e '/^DB_TXNVERSION/d' \ + -e '/^DB_UNUSED/d' \ + -e '/^DEFINE_DB_CLASS/d' \ + -e '/^FHASH/d' \ + -e '/^HASH_UNUSED/d' \ + -e '/^HPUX_MUTEX_PAD/d' \ + -e '/^LOG_OP/d' \ + -e '/^MAX_ID/d' \ + -e '/^MAXHOSTNAMELEN/d' \ + -e '/^MINFILL/d' \ + -e '/^MUTEX_FIELDS/d' \ + -e '/^MUTEX_LOCK_PARTITION/d' \ + -e '/^MUTEX_UNLOCK_PARTITION/d' \ + -e '/^NAME_TO_SEQUENCE/d' \ + -e '/^NCACHED2X/d' \ + -e '/^NCACHED30/d' \ + -e '/^PAIR_MASK/d' \ + -e '/^P_16_COPY/d' \ + -e '/^P_32_COPY/d' \ + -e '/^P_32_SWAP/d' \ + -e '/^P_64_COPY/d' \ + -e '/^P_64_SWAP/d' \ + -e '/^P_TO_UINT16/d' \ + -e '/^QPAGE_CHKSUM/d' \ + -e '/^QPAGE_NORMAL/d' \ + -e '/^QPAGE_SEC/d' \ + -e '/^SIZEOF_PAGE/d' \ + -e '/^TAILQ_/d' \ + -e '/^UINT64_FMT/d' \ + -e '/^UINT64_MAX/d' \ + -e '/^VM_PAGESIZE/d' \ + -e '/^WRAPPED_CLASS/d' \ + -e '/^_WIN32_WINNT/d' \ + -e '/^__BIT_TYPES_DEFINED__/d' \ + -e '/^__DBC_INTERNAL/d' \ + -e '/^__STDC__/d' \ + -e '/^__lock_locker_hash/d' \ + -e '/^i_/d' \ + -e '/_H_/d' \ + -e '/__DB_IMPORT/d' \ + -e '/ERR_ORIGIN/d' \ + -e '/LOG_DBT_SIZE/d' \ + -e '/__mutex_timedlock/d' \ + -e '/__mutex_tryrdlock/d' \ + -e '/OP_MODE_SHIFT/d' \ + -e '/OP_PAGE_MASK/d' \ + -e '/PAIR_/d' \ + -e '/PERFMON/d' \ + -e '/WARNING_ORIGIN/d' \ + -e '/STAT_DEC/d' \ + -e '/STAT_SET/d' \ + -e '/MUTEX_SET/d' \ + -e '/XA.*/d' \ + -e '/TM[EM_].*/d' \ + -e '/MAX[BGI].*SIZE/d' \ + -e 's/(.*//' | sort > $t1 + +find $d -name '*.c' -o -name '*.cpp' > $t2 +for i in `cat $t1`; do + if egrep -w $i `cat $t2` > /dev/null; then + :; + else + f=`egrep -l "#define.*$i" $d/src/dbinc/*.h $d/src/dbinc/*.in | + sed 's;\.\.\/\.\.\/src\/dbinc/;;' | tr -s "[:space:]" " "` + echo "FAIL: $i: $f" + fi +done | sort -k 2 > $t3 + +test -s $t3 && { + cat $t3 + echo "FAIL: found unused #defines" + exit 1 +} + +rm -f $t1 $t2 $t3 + +exit $exitv diff --git a/dist/validate/s_chk_err b/dist/validate/s_chk_err new file mode 100644 index 00000000..d79eb20c --- /dev/null +++ b/dist/validate/s_chk_err @@ -0,0 +1,34 @@ +#!/bin/sh - +# +# $Id$ +# +# Check to make sure all of the error values have corresponding error +# message strings in db_strerror(). + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +s=$d/src +t1=__1 +t2=__2 + +egrep -- "define.*DB_.*-309" $s/dbinc/db.in | awk '{print $2}' > $t1 +sed -e '/^db_strerror/,/^}/{' \ + -e '/ case DB_/{' \ + -e 's/:.*//' \ + -e 's/.* //' \ + -e p \ + -e '}' \ + -e '}' \ + -e d \ + < $s/common/db_err.c > $t2 + +cmp $t1 $t2 > /dev/null || +(echo "<<< db.h >>> db_strerror" && diff $t1 $t2 && exit 1) + +rm -f $t1 $t2 + +exit 0 diff --git a/dist/validate/s_chk_ext_method_calls b/dist/validate/s_chk_ext_method_calls new file mode 100644 index 00000000..9ac4f695 --- /dev/null +++ b/dist/validate/s_chk_ext_method_calls @@ -0,0 +1,89 @@ +#!/bin/sh - +# +# $Id: s_chk_ext_method_calls,v 0f73af5ae3da 2010/05/10 05:38:40 alexander $ +# +# Check that DB doesn't call DB or DB_ENV methods internally. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +s=$d/src + +t=__1 + +echo ======================================================== +echo "Check that DB doesn't call DB or DB_ENV methods internally." +echo ======================================================== +m=`grep '(\*[a-z][_a-z]*)' $s/dbinc/db.in | + sed -e 's/^[^(]*(\*//' \ + -e 's/).*//' \ + -e '/alt_close/d' \ + -e '/am_bulk/d' \ + -e '/am_close/d' \ + -e '/am_del/d' \ + -e '/am_destroy/d' \ + -e '/am_get/d' \ + -e '/am_put/d' \ + -e '/am_remove/d' \ + -e '/am_rename/d' \ + -e '/am_writelock/d' \ + -e '/app_dispatch/d' \ + -e '/db_append_recno/d' \ + -e '/db_errcall/d' \ + -e '/db_event_func/d' \ + -e '/db_feedback/d' \ + -e '/db_free/d' \ + -e '/db_malloc/d' \ + -e '/db_paniccall/d' \ + -e '/db_realloc/d' \ + -e '/dbt_usercopy/d' \ + -e '/dup_compare/d' \ + -e '/s_callback/d' | + sort -u` + +(cd $s && for i in $m; do + #echo "$i..." > /dev/stderr + egrep -- "->$i\(" */*.[ch] +done) | +sed \ + -e '/Wrapper function for/d' \ + -e '/\/db.h:/d' \ + -e '/^[^:]*:[ ]*\*[ ]/d' \ + -e '/^common\/db_getlong.c:/d' \ + -e '/^common\/util_cache.c:/d' \ + -e '/^common\/util_log.c:/d' \ + -e '/^common\/util_sig.c:/d' \ + -e '/^dbm\//d' \ + -e '/^hsearch\//d' \ + -e '/^mutex\/tm.c:/d' \ + -e '/closeme->close() is a wrapper;/d' \ + -e '/crypto.c.*db_cipher->close/d' \ + -e '/db_err.c:.*dbenv->db_msgcall(dbenv, buf);/d' \ + -e '/db_iface.c:.*(txn->commit(txn, nosync ? DB_TXN_NOSYNC : 0));/d' \ + -e '/db_iface.c:.*if ((t_ret = txn->abort(txn)) != 0)/d' \ + -e '/db_iface.c:.*return (dbenv->txn_begin(dbenv, NULL, txnidp, 0));/d' \ + -e '/db_iface.c:.*return (dbp->get(dbp, txn, key, &data, flags));/d' \ + -e '/dbenv->is_alive(/d' \ + -e '/dbenv->thread_id(/d' \ + -e '/dbenv->thread_id_string(/d' \ + -e '/rep_util.c:.*ret = dbenv->rep_send(/d' \ + -e '/test_mutex.c:/d' \ + -e '/xa.c:/d' \ + -e '/repmgr\/repmgr_net.c.*env->close()/d'\ + -e '/repmgr\/repmgr_method.c.*DB_CHANNEL->send_request()/d' \ + -e '/dbinc\/repmgr.h.*set_config()/d' \ + -e '/db_vrfyutil.c:.*vdp->txn->commit/d' \ +>$t + +test -s $t && { + cat $t + echo "FAIL: found DB/DB_ENV method calls DB library." + exit 1 +} + +rm -f $t + +exit 0 diff --git a/dist/validate/s_chk_flags b/dist/validate/s_chk_flags new file mode 100644 index 00000000..f7f65cbc --- /dev/null +++ b/dist/validate/s_chk_flags @@ -0,0 +1,186 @@ +#!/bin/sh - +# +# $Id$ +# +# Check flag name-spaces. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +t1=__1 +t2=__2 + +if cc -g -Wall $0.c -o t; then + : +else + echo "FAIL: unable to compile test program $0.c" + exit 1 +fi + +if ./t $d/*/*.[ch] $d/*/*.in > $t1; then + : +else + echo "FAIL: test program failed" + exit 1 +fi + +echo 'Checking "dbenv" variables with flags other than DB_ENV_XXX' +grep 'dbenv,' $t1 | +sed -e '/DB_ENV_/d' \ + -e '/env_method.c.*, mapped_flags*)/d' \ + -e '/env_region.c.*, flags_orig*)/d' \ + > $t2 +[ -s $t2 ] && { + cat $t2 + exit 1 +} + +echo 'Checking DB_ENV_XXX flags with variables other than "dbenv"' +grep 'DB_ENV_' $t1 | +sed -e '/dbenv,/d' \ + -e '/(dbenv),/d' \ + > $t2 +[ -s $t2 ] && { + cat $t2 + exit 1 +} + +echo 'Checking "env" variables with flags other than ENV_XXX' +grep '[^b]env,' $t1 | +sed -e '/[^B]ENV_/d' \ + -e '/env_method.c.*, mapped_flags*)/d' \ + > $t2 +[ -s $t2 ] && { + cat $t2 + exit 1 +} + +echo 'Checking ENV_XXX flags with variables other than "env"' +grep '[^A-Z_]ENV_' $t1 | +sed -e '/[^b]env,/d' \ + -e '/(env),/d' \ + > $t2 +[ -s $t2 ] && { + cat $t2 + exit 1 +} + +echo 'Checking dbenv "verbose" field with flags other than DB_VERB_XXX' +grep -- 'dbenv->verbose,' $t1 | +sed -e '/DB_VERB_/d' \ + -e '/env_method.c.*, which)/d' \ + > $t2 +[ -s $t2 ] && { + cat $t2 + exit 1 +} + +# there are a few exceptions in rep_util.c +echo 'Checking DB_VER_XXX flags with other than dbenv "verbose" field' +grep -- 'DB_VERB_' $t1 | +sed -e '/dbenv->verbose,/d' \ + -e '/rep_util.c.*(verbflag/d' \ + -e '/rep_util.c.*(tmp_verbose/d' \ + -e '/rep_util.c.*(verbose/d' \ + > $t2 +[ -s $t2 ] && { + cat $t2 + exit 1 +} + +echo 'Checking "db" variables with flags other than DB_AM_XXX' +cp $t1 /tmp/_f +grep 'dbp,' $t1 | +sed -e '/DB_AM_/d' \ + -e '/dbp, mapped_flag)/d' \ + > $t2 +[ -s $t2 ] && { + cat $t2 + exit 1 +} + +echo 'Checking DB_AM_XXX flags with variables other than "db"' +grep 'DB_AM_' $t1 | +sed \ + -e '/(&db,/d' \ + -e '/(db,/d' \ + -e '/log_get.c:.*dbpp,/d' \ + -e '/_method.c:.*outflagsp,/d' \ + -e '/partition.c:.*pdbp,/d' \ + -e '/rep_backup.c:.*->flags,/d' \ + -e /'rep_backup.c:.*->db_flags,/d' \ + -e '/db.c:.*save_flags,/d' \ + -e '/((*[ ]*db_rep->rep_db)*,/d' \ + -e '/((*[ ]*dbc)*->dbp,/d' \ + -e '/((*[ ]*dbc_arg->dbp)*,/d' \ + -e '/((*[ ]*dbp)*,/d' \ + -e '/((*[ ]*dbp)*->s_primary,/d' \ + -e '/((D),/d' \ + -e '/((sdbp),/d' \ + -e '/(fdbp,/d' \ + -e '/(file_dbp,/d' \ + -e '/(ldbp,/d' \ + -e '/(msgfp->db_flags,/d' \ + -e '/(mdbp,/d' \ + -e '/(pdbp,/d' \ + -e '/(pginfo, /d' \ + -e '/(rfp->db_flags,/d' \ + -e '/(sdbp,/d' \ + -e '/(subdbp,/d' \ + -e '/fop_util.c:.*(t2dbp,/d' \ + -e '/fop_util.c:.*(tmpdbp,/d' \ + > $t2 +[ -s $t2 ] && { + cat $t2 + exit 1 +} + +echo 'Checking "dbc" variables flags with flags other than DBC_XXX' +echo Checking DBC flags... +cat $t1 | +grep 'dbc,' | +sed -e '/DBC_/d' \ + -e '/db_cam.c:.*tmp_read_locking)/d' + > $t2 +[ -s $t2 ] && { + cat $t2 + exit 1 +} + +echo 'Checking DBC_XXX flags with variables other than "dbc"' +grep 'DBC_' $t1 | +sed -e '/((*dbc)*,/d' \ + -e '/(dbc_arg,/d' \ + -e '/(dbc_c,/d' \ + -e '/(dbc_n,/d' \ + -e '/(dbc_orig,/d' \ + -e '/(opd,/d' \ + -e '/(pdbc,/d' \ + -e '/[(*]put_state[p]*,/d' \ + -e '/(sdbc,/d' \ + -e '/partition.c:.*_dbc,/d' \ + -e '/partition.c:.*_cursor,/d' + > $t2 +[ -s $t2 ] && { + cat $t2 + exit 1 +} + +echo Checking for bad use of macros... +egrep 'case .*F_SET\(|case .*F_CLR\(' $d/*/*.c > $t1 +egrep 'for .*F_SET\(|for .*F_CLR\(' $d/*/*.c >> $t1 +egrep 'if .*F_SET\(|if .*F_CLR\(' $d/*/*.c >> $t1 +egrep 'switch .*F_SET\(|switch .*F_CLR\(' $d/*/*.c >> $t1 +egrep 'while .*F_SET\(|while .*F_CLR\(' $d/*/*.c >> $t1 +[ -s $t1 ] && { + echo 'if statement followed by non-test macro' + cat $t1 + exit 1 +} + +rm -f $t1 $t2 ./t + +exit 0 diff --git a/dist/validate/s_chk_flags.c b/dist/validate/s_chk_flags.c new file mode 100644 index 00000000..bcc3c8b0 --- /dev/null +++ b/dist/validate/s_chk_flags.c @@ -0,0 +1,79 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#include + +#include +#include +#include +#include +#include + +void +chk(f, str) + char *f, *str; +{ + char *s; + int ch, l, ok, pc; + + if (freopen(f, "r", stdin) == NULL) { + fprintf(stderr, "%s: %s\n", f, strerror(errno)); + exit(EXIT_FAILURE); + } + + for (l = 1, ok = 1, s = str; (ch = getchar()) != EOF;) { + if (ch == '\n') + ++l; + if (!ok || ch != *s) { + s = str; + ok = !isalpha(ch) && !isdigit(ch) && ch != '_'; + continue; + } + if (*++s != '\0') + continue; + + /* Match. */ + printf("%s: %d: %s", f, l, str); + for (pc = 1; (ch = getchar()) != EOF;) { + switch (ch) { + case '(': + ++pc; + break; + case ')': + --pc; + break; + case '\n': + ++l; + break; + } + if (ch == '\n') + putchar(' '); + else + putchar(ch); + if (pc <= 0) { + putchar('\n'); + break; + } + } + s = str; + } +} + +int +main(int argc, char *argv[]) +{ + int r; + + for (r = 0; *++argv != NULL;) { + chk(*argv, "FLD_CLR("); + chk(*argv, "FLD_ISSET("); + chk(*argv, "FLD_SET("); + chk(*argv, "F_CLR("); + chk(*argv, "F_ISSET("); + chk(*argv, "F_SET("); + } + return (0); +} diff --git a/dist/validate/s_chk_inclconfig b/dist/validate/s_chk_inclconfig new file mode 100644 index 00000000..e48e825f --- /dev/null +++ b/dist/validate/s_chk_inclconfig @@ -0,0 +1,46 @@ +#!/bin/sh - +# +# $Id$ +# +# Check for inclusion of db_config.h after "const" or other includes. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +t1=__1 +t2=__2 + +(cd $d && find . -name '*.[chys]' -o -name '*.cpp' | + xargs egrep -l '#include.*db_config.h') | tee /tmp/o | + sed -e '/dbdemo.c$/d' \ + -e '/db_java_wrap.c$/d' \ + -e '/test\/stl\/base\/test.h$/d' \ + -e '/ex_apprec.c$/d' > $t1 + +(for i in `cat $t1`; do + egrep -w 'db_config.h|const' /dev/null $d/$i | head -1 +done) > $t2 + +if egrep const $t2 > /dev/null; then + echo 'FAIL: found const before include of db_config.h' + egrep const $t2 + exit 1 +fi + +:> $t2 +for i in `cat $t1`; do + egrep -w '#include' /dev/null $d/$i | head -1 >> $t2 +done + +if egrep -v db_config.h $t2 > /dev/null; then + echo 'FAIL: found includes before include of db_config.h' + egrep -v db_config.h $t2 + exit 1 +fi + +rm -f $t1 $t2 + +exit 0 diff --git a/dist/validate/s_chk_include b/dist/validate/s_chk_include new file mode 100644 index 00000000..d733d011 --- /dev/null +++ b/dist/validate/s_chk_include @@ -0,0 +1,42 @@ +#!/bin/sh - +# +# $Id$ +# +# Check for inclusion of files already included in db_int.h. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +s=$d/src +t1=__1 +t2=__2 + +egrep -- '#include[ ]' $s/dbinc/db_int.in | +sed -e '/[ ]db\.h'/d \ + -e 's/^#include.//' \ + -e 's/[<>"]//g' \ + -e 's/[ ].*//' > $t1 + +for i in `cat $t1`; do + (cd $s && egrep "^#include[ ].*[<\"]$i[>\"]" */*.[ch]) +done | +sed -e '/^build/d' \ + -e '/^clib\/getopt.c/d' \ + -e '/^dbinc\/mutex_int.h.*/d' \ + -e '/^dbinc\/win_db.h:/d' \ + -e '/^mutex\/tm.c/d' \ + -e '/^os\/os_map.c.*/d' \ + > $t2 + +[ -s $t2 ] && { + echo 'FAIL: found extraneous includes in the source' + cat $t2 + exit 1 +} + +rm -f $t1 $t2 + +exit 0 diff --git a/dist/validate/s_chk_logverify b/dist/validate/s_chk_logverify new file mode 100644 index 00000000..69de0f16 --- /dev/null +++ b/dist/validate/s_chk_logverify @@ -0,0 +1,270 @@ +#!/bin/sh - +# +# $Id: s_chk_logverify,v 0f73af5ae3da 2010/05/10 05:38:40 alexander $ +# +# Verify that the log verify code works as expected. +# Run from build_unix dir on linux blade server + +d=.. +[ -f $d/LICENSE ] || { + echo 'FAIL: Test must be run from scr directory.' + exit 1 +} + +b=./tmp_build/ +s=$d/src + +mkdir -p $b + +opts="--disable-shared --enable-stl" +echo "Building DB library, this can take a while." +(cd $b && ../../dist/configure $opts > /dev/null && make > /dev/null) || { + echo 'FAIL: unable to build libdb.a' + exit 1 +} + +# if compile on linux blade server, add -D_GNU_SOURCE -pthread on cc +CINC="-I$b -I$s" +[ `uname` = "Linux" ] && CINC=" -D_GNU_SOURCE $CINC -pthread" + +if cc -g -Wall $CINC $0.c $b/libdb.a -o t; then + : +else + echo "FAIL: unable to compile test program $0.c" + exit 1 +fi + +if ./t; then + : +else + echo "FAIL: test program failed" + exit 1 +fi + +rm -fr TESTDIR +mkdir -p TESTDIR +resdir=`pwd`/TESTDIR + +# Running log verify internal functions test. +./t + +cd $b +lv=./db_log_verify + +# Running db_perf tests. +make db_perf +cmd=./db_perf +configs=../$d/test/perf/configs + +echo Sanity Tests + +echo "**** db_perf -c $configs/sanity_btree.conf ****" +rm -rf TESTDIR +mkdir TESTDIR +$cmd -I -hTESTDIR -c $configs/sanity_btree.conf 2>&1 > TESTDIR/OUTPUT +$cmd -hTESTDIR -c $configs/sanity_btree.conf 2>&1 >> TESTDIR/OUTPUT +$lv -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.1 + +echo "**** db_perf -c $configs/sanity_hash.conf ****" +rm -rf TESTDIR +mkdir TESTDIR +$cmd -I -hTESTDIR -c $configs/sanity_hash.conf 2>&1 > TESTDIR/OUTPUT +$cmd -hTESTDIR -c $configs/sanity_hash.conf 2>&1 >> TESTDIR/OUTPUT +$lv -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.2 + +echo Archive Tests + +echo "**** db_perf -c $configs/archive_fatal_hash.conf -C ****" +rm -rf TESTDIR TESTDIR.A +rm -f logverify.ERR +mkdir TESTDIR TESTDIR.A +$cmd -I -hTESTDIR -c $configs/archive_fatal_hash.conf 2>&1 > TESTDIR/OUTPUT +$cmd -C -hTESTDIR -c $configs/archive_fatal_hash.conf 2>&1 >> TESTDIR/OUTPUT +$lv -hTESTDIR 2>logverify.ERR >> TESTDIR/OUTPUT || { +grep DB_NOTFOUND logverify.ERR >/dev/null 2>/dev/null || \ + echo "ERROR: expect DB_NOTFOUND in the error output" +} +cp TESTDIR/OUTPUT $resdir/OUTPUT.3 + +echo "**** db_perf -c $configs/archive_fatal_btree.conf -C ****" +rm -rf TESTDIR TESTDIR.A +rm -f logverify.ERR +mkdir TESTDIR TESTDIR.A +$cmd -I -hTESTDIR -c $configs/archive_fatal_btree.conf 2>&1 > TESTDIR/OUTPUT +$cmd -C -hTESTDIR -c $configs/archive_fatal_btree.conf 2>&1 >> TESTDIR/OUTPUT +$lv -hTESTDIR 2>logverify.ERR >> TESTDIR/OUTPUT || { +grep DB_NOTFOUND logverify.ERR >/dev/null 2>/dev/null || \ + echo "ERROR: expect DB_NOTFOUND in the error output" +} +cp TESTDIR/OUTPUT $resdir/OUTPUT.4 + +echo "**** db_perf -c $configs/archive_btree.conf -C ****" +rm -rf TESTDIR TESTDIR.A +mkdir TESTDIR TESTDIR.A +$cmd -I -hTESTDIR -c $configs/archive_btree.conf 2>&1 > TESTDIR/OUTPUT +$cmd -C -hTESTDIR -c $configs/archive_btree.conf 2>&1 >> TESTDIR/OUTPUT +$lv -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.5 + + +echo Crash Tests +echo "**** db_perf -c $configs/crash_test_1.conf ****" +rm -rf TESTDIR TESTDIR.A +mkdir TESTDIR TESTDIR.A +$cmd -I -hTESTDIR -c $configs/crash_test_1.conf 2>&1 > TESTDIR/OUTPUT +$cmd -hTESTDIR -c $configs/crash_test_1.conf 2>&1 >> TESTDIR/OUTPUT +rm TESTDIR/__db.0* # Remove region files because the environment is corrupted. +$lv -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.6 + +echo "**** db_perf -c $configs/crash_test_2.conf ****" +rm -rf TESTDIR TESTDIR.A +rm -f logverify.ERR +mkdir TESTDIR TESTDIR.A +$cmd -I -hTESTDIR -c $configs/crash_test_2.conf 2>&1 > TESTDIR/OUTPUT +$cmd -hTESTDIR -c $configs/crash_test_2.conf 2>&1 >> TESTDIR/OUTPUT +rm TESTDIR/__db.0* # Remove region files because the environment is corrupted. +$lv -hTESTDIR 2>logverify.ERR >> TESTDIR/OUTPUT || { +grep DB_NOTFOUND logverify.ERR >/dev/null 2>/dev/null || \ + echo "ERROR: expect DB_NOTFOUND in the error output" +} +cp TESTDIR/OUTPUT $resdir/OUTPUT.7 + +echo "**** db_perf -c $configs/crash_test_3.conf ****" +rm -rf TESTDIR TESTDIR.A +mkdir TESTDIR TESTDIR.A +$cmd -I -hTESTDIR -c $configs/crash_test_3.conf 2>&1 > TESTDIR/OUTPUT +$cmd -hTESTDIR -c $configs/crash_test_3.conf 2>&1 >> TESTDIR/OUTPUT +rm TESTDIR/__db.0* # Remove region files because the environment is corrupted. +$lv -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.8 + +echo "**** db_perf -S 2 -c $configs/crash_test_1.conf ****" +rm -rf TESTDIR TESTDIR.A +mkdir TESTDIR TESTDIR.A +$cmd -I -hTESTDIR -S 2 -c $configs/crash_test_1.conf 2>&1 > TESTDIR/OUTPUT +$cmd -hTESTDIR -S 2 -c $configs/crash_test_1.conf 2>&1 >> TESTDIR/OUTPUT +rm TESTDIR/__db.0* # Remove region files because the environment is corrupted. +$lv -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.9 + +echo "**** db_perf -S 1 -c $configs/crash_test_4.conf ****" +rm -rf TESTDIR TESTDIR.A +mkdir TESTDIR TESTDIR.A +$cmd -I -hTESTDIR -S 1 -c $configs/crash_test_4.conf 2>&1 > TESTDIR/OUTPUT +$cmd -hTESTDIR -S 1 -c $configs/crash_test_4.conf 2>&1 >> TESTDIR/OUTPUT +rm TESTDIR/__db.0* # Remove region files because the environment is corrupted. +$lv -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.10 + + + +# Running dbs tests. +cmd=./dbs +make dbs + +echo Sanity Tests +echo "************* dbs *******************" +rm -rf TESTDIR +mkdir TESTDIR +$cmd -I -hTESTDIR 2>&1 > TESTDIR/OUTPUT +$cmd -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +$lv -h TESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.11 + +echo "************* dbs -t btree *******************" +rm -rf TESTDIR +mkdir TESTDIR +$cmd -I -tbtree -hTESTDIR 2>&1 > TESTDIR/OUTPUT +$cmd -tbtree -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +$lv -h TESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.12 + +echo Archive Tests +echo "************* dbs -CA *******************" +rm -rf TESTDIR T2 +mkdir TESTDIR T2 +$cmd -I -hTESTDIR 2>&1 > TESTDIR/OUTPUT +$cmd -CA T2 -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +$lv -h TESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.13 + +echo "************* dbs -CA -tbtree *******************" +rm -rf TESTDIR T2 +mkdir TESTDIR T2 +$cmd -I -tbtree -hTESTDIR 2>&1 > TESTDIR/OUTPUT +$cmd -CA T2 -tbtree -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +$lv -h TESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.14 + +echo "************* dbs -Ca -tbtree *******************" +rm -rf TESTDIR T2 +mkdir TESTDIR T2 +$cmd -I -tbtree -c 10 2>&1 > TESTDIR/OUTPUT +$cmd -Ca T2 -tbtree -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +$lv -h TESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.15 + +echo Crash Tests + +echo "************ dbs -K30 -a T2 -tbtree -c 10 -k 4 -V" +rm -rf TESTDIR T2 +mkdir TESTDIR T2 +$cmd -I -tbtree -c 10 -hTESTDIR 2>&1 > TESTDIR/OUTPUT +$cmd -K30 -a T2 -tbtree -c 10 -k 4 -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +rm TESTDIR/__db.0* # Remove region files because the environment is corrupted. +$lv -h TESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.16 + +echo "************ dbs -K60 -A T2 -tbtree -c 10 -k 4 -V" +rm -rf TESTDIR T2 +mkdir TESTDIR T2 +$cmd -I -tbtree -c 10 -hTESTDIR 2>&1 > TESTDIR/OUTPUT +$cmd -K60 -A T2 -tbtree -c 10 -k 4 -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +rm TESTDIR/__db.0* # Remove region files because the environment is corrupted. +$lv -h TESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.17 + +echo "************ dbs -K180 -a T2 -tbtree -c 10 -k 4 -V" +rm -rf TESTDIR T2 +mkdir TESTDIR T2 +$cmd -I -tbtree -c 10 -hTESTDIR 2>&1 > TESTDIR/OUTPUT +$cmd -K180 -a T2 -tbtree -c 10 -k 4 -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +rm TESTDIR/__db.0* # Remove region files because the environment is corrupted. +$lv -h TESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.18 + +echo "************ dbs -s 2 -K30 -a T2 -tbtree -c 10 -k 4 -V" +rm -rf TESTDIR T2 +mkdir TESTDIR T2 +$cmd -s 2 -I -tbtree -c 10 -hTESTDIR 2>&1 > TESTDIR/OUTPUT +$cmd -s 2 -K30 -A T2 -tbtree -c 10 -k 4 -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +rm TESTDIR/__db.0* # Remove region files because the environment is corrupted. +$lv -h TESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.19 + +echo "************ dbs -s 1 -K60 -a T2 -tbtree -c 10 -k 4 -V" +rm -rf TESTDIR T2 +mkdir TESTDIR T2 +$cmd -s 1 -I -tbtree -c 10 -hTESTDIR 2>&1 > TESTDIR/OUTPUT +$cmd -s 1 -K60 -a T2 -tbtree -c 10 -k 4 -hTESTDIR 2>&1 >> TESTDIR/OUTPUT +rm TESTDIR/__db.0* # Remove region files because the environment is corrupted. +$lv -h TESTDIR 2>&1 >> TESTDIR/OUTPUT +cp TESTDIR/OUTPUT $resdir/OUTPUT.20 + + + +# Running test_dbstl tests. + +make test_dbstl + +./test_dbstl -I -s b -m t -t a -T 200 -k 50 -l 100 -c 33554432 +$lv -h dbenv +./test_dbstl -I -s h -m t -t e -T 200 -k 50 -l 100 -c 33554432 +$lv -h dbenv +./test_dbstl -I -s b -m t -t e -T 200 -k 50 -l 100 -c 33554432 -M +$lv -h dbenv +./test_dbstl -I -s h -m t -t a -T 200 -k 50 -l 100 -c 33554432 -M +$lv -h dbenv diff --git a/dist/validate/s_chk_logverify.c b/dist/validate/s_chk_logverify.c new file mode 100644 index 00000000..dc6faf69 --- /dev/null +++ b/dist/validate/s_chk_logverify.c @@ -0,0 +1,267 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include "db_config.h" +#include "db_int.h" +#include "db.h" + +#include "dbinc/log.h" +#include "dbinc/log_verify.h" + +static int put_get_cmp_ckp __P((DB_LOG_VRFY_INFO *, VRFY_CKP_INFO *, + DB_LSN)); + +static int put_get_cmp_ts __P((DB_LOG_VRFY_INFO *, VRFY_TIMESTAMP_INFO *, + DB_LSN)); +static int put_get_cmp_freg __P((DB_LOG_VRFY_INFO *, VRFY_FILEREG_INFO *, + const DBT *)); +static int put_get_cmp __P((DB_LOG_VRFY_INFO *, VRFY_TXN_INFO *, u_int32_t)); +static int dbt_cmp __P((const DBT *, const DBT *)); +static int dbtarr_cmp __P((const DBT *, const DBT *, u_int32_t)); +/* + * __rem_last_recycle_lsn , clear_fileups __put_filelife, __del_filelife + * __get_filelife __get_filereg_by_dbregid __add_dbregid __get_last_ckp_info + * __get_latest_timestamp_info _find_lsnrg_by_timerg __add_txnrange + * __get_aborttxn __txn_started __add_page_to_txn __del_txn_pages + */ +int +main(argc, argv) + int argc; + char **argv; +{ + int i, ret; + DB_LOG_VERIFY_CONFIG cfg; + DB_LOG_VRFY_INFO *lvinfop; + VRFY_TXN_INFO txninfo; + VRFY_FILEREG_INFO freginfo; + VRFY_CKP_INFO ckpinfo; + VRFY_TIMESTAMP_INFO tsinfo; + DB_LSN rlsn; + char *buf; + u_int32_t bufsz; + DBT fid; + DB_THREAD_INFO *ip; + DB_ENV *dbenv; + + memset(&cfg, 0, sizeof(cfg)); + buf = malloc(bufsz = 2048);// trash bytes to make DBT fileids. + cfg.temp_envhome = NULL; + cfg.cachesize = 8 * 1024 * 1024; + + lvinfop = NULL; + memset(&txninfo, 0, sizeof(txninfo)); + memset(&freginfo, 0, sizeof(freginfo)); + memset(&ckpinfo, 0, sizeof(ckpinfo)); + memset(&tsinfo, 0, sizeof(tsinfo)); + memset(&fid, 0, sizeof(fid)); + db_env_create(&dbenv, 0); + dbenv->open(dbenv, NULL, DB_CREATE | DB_INIT_MPOOL, 0644); + + ENV_ENTER(dbenv->env, ip); + if (__create_log_vrfy_info(&cfg, &lvinfop, ip)) + return -1; + + + txninfo.txnid = 80000001; + rlsn.file = 1; + put_get_cmp(lvinfop, &txninfo, txninfo.txnid); + for (i = 1000; i <= 2000; i += 100) { + rlsn.offset = i; + if ((ret = __add_recycle_lsn_range(lvinfop, &rlsn, 80000000, 80000300))) + goto err; + if ((ret = put_get_cmp(lvinfop, &txninfo, txninfo.txnid))) + goto err; + if (i % 200) { + fid.data = buf + abs(rand()) % (bufsz / 2); + fid.size = (char *)fid.data - buf; + if ((ret = __add_file_updated(&txninfo, &fid, i))) + goto err; + } + if ((i % 200 == 0) && ((ret = __del_file_updated(&txninfo, &fid)))) + goto err; + if ((ret = put_get_cmp(lvinfop, &txninfo, txninfo.txnid))) + goto err; + } + freginfo.fileid = fid; + freginfo.fname = "mydb.db"; + if ((ret = put_get_cmp_freg(lvinfop, &freginfo, &freginfo.fileid))) + goto err; + + ckpinfo.lsn.file = 2; + ckpinfo.lsn.offset = 3201; + ckpinfo.ckplsn.file = 2; + ckpinfo.ckplsn.offset = 2824; + if ((ret = put_get_cmp_ckp(lvinfop, &ckpinfo, ckpinfo.lsn))) + goto err; + + tsinfo.lsn.file = 1; + tsinfo.lsn.offset = 829013; + tsinfo.timestamp = time(NULL); + tsinfo.logtype = 123; + if ((ret = put_get_cmp_ts(lvinfop, &tsinfo, tsinfo.lsn))) + goto err; + +err: + __destroy_log_vrfy_info(lvinfop); + ENV_LEAVE(dbenv->env, ip); + dbenv->close(dbenv, 0); + return ret; +} + +static int +put_get_cmp_ckp(lvinfop, ckp, lsn) + DB_LOG_VRFY_INFO *lvinfop; + VRFY_CKP_INFO *ckp; + DB_LSN lsn; +{ + int ret; + VRFY_CKP_INFO *ckppp; + + ckppp = NULL; + if ((ret = __put_ckp_info(lvinfop, ckp))) + goto err; + + if ((ret = __get_ckp_info(lvinfop, lsn, &ckppp))) + goto err; + if (memcmp(ckp, ckppp, sizeof(VRFY_CKP_INFO))) { + fprintf(stderr, +"\n__get_ckp_info got different ckp info than the one put by __put_ckp_info"); + goto err; + } +err: + if (ckppp) + __os_free(NULL, ckppp); + if (ret) + printf("\nError in put_get_cmp_ckp"); + return ret; +} + +static int +put_get_cmp_ts(lvinfop, ts, lsn) + DB_LOG_VRFY_INFO *lvinfop; + VRFY_TIMESTAMP_INFO *ts; + DB_LSN lsn; +{ + int ret; + VRFY_TIMESTAMP_INFO *tsp; + + tsp = NULL; + if ((ret = __put_timestamp_info(lvinfop, ts))) + goto err; + + if ((ret = __get_timestamp_info(lvinfop, lsn, &tsp))) + goto err; + if (memcmp(ts, tsp, sizeof(VRFY_TIMESTAMP_INFO))) { + fprintf(stderr, +"\n__get_timestamp_info got different timestamp info than the one put by __put_timestamp_info"); + goto err; + } +err: + if (tsp) + __os_free(NULL, tsp); + if (ret) + printf("\nError in put_get_cmp_ts"); + return ret; +} + +static int +put_get_cmp_freg(lvinfop, freg, fid) + DB_LOG_VRFY_INFO *lvinfop; + VRFY_FILEREG_INFO *freg; + const DBT *fid; +{ + int ret; + VRFY_FILEREG_INFO *freginfop; + + freginfop = NULL; + if ((ret = __put_filereg_info(lvinfop, freg))) + goto err; + + if ((ret = __get_filereg_info(lvinfop, fid, &freginfop))) + goto err; + if (memcmp(freg, freginfop, FILE_REG_INFO_FIXSIZE) || + dbt_cmp(&(freg->fileid), &(freginfop->fileid)) || + strcmp(freg->fname, freginfop->fname)) { + fprintf(stderr, +"\n__get_filereg_info got different filereg info than the one put by __put_filereg_info"); + goto err; + } +err: + + if (freginfop) + __free_filereg_info(freginfop); + if (ret) + printf("\nError in put_get_cmp_freg"); + return ret; +} + + +static int +dbt_cmp(d1, d2) + const DBT *d1; + const DBT *d2; +{ + int ret; + + if ((ret = d1->size - d2->size)) + return ret; + + if ((ret = memcmp(d1->data, d2->data, d1->size))) + return ret; + + return 0; +} + +static int +dbtarr_cmp(a1, a2, len) + const DBT *a1; + const DBT *a2; + u_int32_t len; +{ + int i, ret; + + for (i = 0; i < len; i++) { + if ((ret = a1[i].size - a2[i].size)) + return ret; + if ((ret = memcmp(a1[i].data, a2[i].data, a1[i].size))) + return ret; + } + + return 0; +} + +static int +put_get_cmp(lvinfop, txninfo, tid) + DB_LOG_VRFY_INFO *lvinfop; + VRFY_TXN_INFO *txninfo; + u_int32_t tid; +{ + int ret; + VRFY_TXN_INFO *txninfop; + + txninfop = NULL; + if ((ret = __put_txn_vrfy_info(lvinfop, txninfo))) + goto err; + + if ((ret = __get_txn_vrfy_info(lvinfop, tid, &txninfop))) + goto err; + if (memcmp(txninfo, txninfop, TXN_VERIFY_INFO_FIXSIZE) || + memcmp(txninfo->recycle_lsns, txninfop->recycle_lsns, + sizeof(DB_LSN) * txninfo->num_recycle) || + dbtarr_cmp(txninfo->fileups, txninfop->fileups, + txninfop->filenum)) { + fprintf(stderr, +"\n__get_txn_vrfy_info got different txinfo than the one put by __put_txn_vrfy_info"); + goto err; + } +err: + if (txninfop) + __free_txninfo(txninfop); + if (ret) + printf("\nError in put_get_cmp"); + return ret; +} diff --git a/dist/validate/s_chk_message_id b/dist/validate/s_chk_message_id new file mode 100644 index 00000000..9550d7dd --- /dev/null +++ b/dist/validate/s_chk_message_id @@ -0,0 +1,60 @@ +#!/bin/sh - +# +# $Id$ +# +# Check ID generated for globalization support. +# TODO: Verify the positional parameters (like "%d") in the two strings +# to the macro. + +# Get the list of all message ID + +MSG_DIR="../../src/ ../../util/ ../../lang/dbm/" + +grep -E "DB_STR\([^ ,]*" $MSG_DIR -r -h -o | sed "s/\"//g" | \ + sed "s/DB_STR(//g" > msg_id_unsorted +grep -E "DB_STR_A\([^ ,]*" $MSG_DIR -r -h -o | sed "s/\"//g" | \ + sed "s/DB_STR_A(//g" >> msg_id_unsorted +sort msg_id_unsorted > msg_id_list +rm msg_id_unsorted + +maxline=`cat msg_id_list | wc -l` + +# The exception for duplicate messages +rep_dup_msg_id=`grep -E "\"[0-9]{4}" -r ../../dist/gen_msg.awk -o | \ + sed "s/\"//g"` +lastnum="" +for (( i = 1; i <= $maxline ; i++ )) +do + curnum=`sed -n -e $i"p" msg_id_list` + + # "DB_STR(id" and "DB_STR_A(id" are from macros defination, + # should not be regarded as invalid message id. + if [ "$curnum" == "id" ]; then + continue + fi + + # The message id should be 4 digit number. + if [ `echo $curnum | wc -c` -ne 5 ]; then + echo "[ERROR] size of message id should be 4: $curnum" + exit 1 + fi + if [[ ! -z $(echo $curnum | sed 's/[0-9]//g') ]]; then + echo "[ERROR] message id should be digit number only: $curnum" + exit 1 + fi + + # There shouldn't be duplicate message id. + if [ "$curnum" == "$lastnum" ]; then + if [ "$curnum" == "$rep_dup_msg_id" ]; then + echo "[EXPECTED] duplicate message id: $curnum" + else + echo "[ERROR] duplicate message id: $curnum" + exit 1 + fi + fi + lastnum=$curnum +done + +rm -f msg_id_list + +exit 0 diff --git a/dist/validate/s_chk_mutex_print b/dist/validate/s_chk_mutex_print new file mode 100644 index 00000000..971976b2 --- /dev/null +++ b/dist/validate/s_chk_mutex_print @@ -0,0 +1,36 @@ +#!/bin/sh - +# +# $Id$ +# +# Check to make sure __mutex_print_id knows about all of the mutex types. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +s=$d/src +t1=__1 +t2=__2 + +egrep 'case MTX_.*return' $s/mutex/mut_stat.c | +sed -e 's/.*case //' \ + -e 's/:.*//' | +sort > $t1 + +egrep '#define.MTX_' $s/dbinc/mutex.h | +sed -e 's/#define.//' \ + -e 's/ .*//' \ + -e '/MTX_MAX_ENTRY/d' | +sort > $t2 + +cmp $t1 $t2 > /dev/null || { + echo "<<< mutex/mut_stat.c >>> dbinc/mutex.h" + diff $t1 $t2 + exit 1 +} + +rm -f $t1 $t2 + +exit 0 diff --git a/dist/validate/s_chk_newline b/dist/validate/s_chk_newline new file mode 100644 index 00000000..aaa02193 --- /dev/null +++ b/dist/validate/s_chk_newline @@ -0,0 +1,116 @@ +#!/bin/sh - +# +# $Id$ +# +# Check to make sure that there are no trailing newlines in __db_err calls. + +d=../.. +[ -f $d/LICENSE ] || { + echo "FAIL: chk.nl can't find the source directory." + exit 1 +} + +cat << END_OF_CODE > t.c +#include + +#include +#include +#include + +int chk(FILE *, char *); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + FILE *fp; + int exitv; + + for (exitv = 0; *++argv != NULL;) { + if ((fp = fopen(*argv, "r")) == NULL) { + fprintf(stderr, "%s: %s\n", *argv, strerror(errno)); + return (1); + } + if (chk(fp, *argv)) + exitv = 1; + (void)fclose(fp); + } + return (exitv); +} + +int +chk(fp, name) + FILE *fp; + char *name; +{ + int ch, exitv, line, q; + + exitv = 0; + for (ch = 'a', line = 1;;) { + if ((ch = getc(fp)) == EOF) + return (exitv); + if (ch == '\n') { + ++line; + continue; + } + if (!isspace(ch)) continue; + if ((ch = getc(fp)) != '_') continue; + if ((ch = getc(fp)) != '_') continue; + if ((ch = getc(fp)) != 'd') continue; + if ((ch = getc(fp)) != 'b') continue; + if ((ch = getc(fp)) != '_') continue; + if ((ch = getc(fp)) != 'e') continue; + if ((ch = getc(fp)) != 'r') continue; + if ((ch = getc(fp)) != 'r') continue; + if ((ch = getc(fp)) != '(') continue; + while ((ch = getc(fp)) != '"') { + if (ch == EOF) + return (exitv); + if (ch == '\n') + ++line; + } + while ((ch = getc(fp)) != '"') + switch (ch) { + case EOF: + return (exitv); + case '\\n': + ++line; + break; + case '.': + if ((ch = getc(fp)) != '"') + ungetc(ch, fp); + else { + fprintf(stderr, + "%s: at line %d\n", name, line); + exitv = 1; + } + break; + case '\\\\': + if ((ch = getc(fp)) != 'n') + ungetc(ch, fp); + else if ((ch = getc(fp)) != '"') + ungetc(ch, fp); + else { + fprintf(stderr, + "%s: at line %d\n", name, line); + exitv = 1; + } + break; + } + } + return (exitv); +} +END_OF_CODE + +cc t.c -o t +if ./t $d/*/*/*.[ch] $d/*/*/*.cpp $d/*/*/*.in ; then + : +else + echo "FAIL: found __db_err calls ending with periods/newlines." + exit 1 +fi + +rm t t.c + +exit 0 diff --git a/dist/validate/s_chk_offt b/dist/validate/s_chk_offt new file mode 100644 index 00000000..81610b9b --- /dev/null +++ b/dist/validate/s_chk_offt @@ -0,0 +1,66 @@ +#!/bin/sh - +# +# $Id: s_chk_offt,v 0f73af5ae3da 2010/05/10 05:38:40 alexander $ +# +# Make sure that no off_t's have snuck into the release. +# off_t check is done at runtime in env_sig.c We add a declaration for +# off_t to db.h, so it is always guaranteed to be available. +# MJC:We add a declaration for off_t to db.h, so it is always guaranteed to be +# available. There are already many many cases where data structures are +# different sizes on 64-bit systems than 32-bit systems (we have a runtime +# check for that in env_sig.c). Test is really obsolete. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +t=__1 + +egrep -w off_t $d/src/*/*.[ch] $d/util/*.[ch] $d/src/*/*.in | +sed -e "/#undef off_t/d" \ + -e "/build_wince\//d" \ + -e "/build_windows\//d" \ + -e "/db_env_set_func_ftruncate/d" \ + -e "/db_env_set_func_pread/d" \ + -e "/db_env_set_func_pwrite/d" \ + -e "/db_env_set_func_seek/d" \ + -e "/env_register.c/d" \ + -e "/j_ftruncate/d" \ + -e "/j_pread/d" \ + -e "/j_pwrite/d" \ + -e "/j_seek/d" \ + -e "/mp_fopen.c:.*can't use off_t's here/d" \ + -e "/mp_fopen.c:.*size or type off_t's or/d" \ + -e "/mp_fopen.c:.*where an off_t is 32-bits/d" \ + -e "/mutex\/tm.c:/d" \ + -e "/off_t because its size depends on/d" \ + -e "/os_ext.h/d" \ + -e "/os.h/d" \ + -e "/os_flock.c/d" \ + -e "/rep.h/d" \ + -e "/zerofill.c:.*stat_offset/d" \ + -e "/zerofill.c:.*write_offset/d" \ + -e "/os_map.c:.*(off_t)0))/d" \ + -e "/os_method.c/d" \ + -e "/os_rw.c:/d" \ + -e "/os_seek.c:.*off_t offset;/d" \ + -e "/os_seek.c:.*offset = /d" \ + -e "/os_seek.c:.*off_t/d" \ + -e "/os_truncate.c:.*off_t offset;/d" \ + -e "/os_truncate.c:.*off_t stat_offset;/d" \ + -e "/os_truncate.c:.*offset = /d" \ + -e "/test_perf\/perf_misc.c:/d" \ + -e "/test_server\/dbs.c:/d" \ + -e "/test_vxworks\/vx_mutex.c:/d" > $t + +test -s $t && { + cat $t + echo "FAIL: found questionable off_t usage" + exit 1 +} + +rm -f $t + +exit 0 diff --git a/dist/validate/s_chk_osdir b/dist/validate/s_chk_osdir new file mode 100644 index 00000000..2ba86991 --- /dev/null +++ b/dist/validate/s_chk_osdir @@ -0,0 +1,30 @@ +#!/bin/sh - +# +# $Id$ +# +# Check to make sure the @OSDIR@ entries in the Makefile are correct. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +s=$d/src +t1=__1 +t2=__2 + +egrep '/@OSDIR@/' $d/dist/Makefile.in | sed -e 's/@.*/.c/' > $t1 + +(cd $s/os_windows && ls os_*.c) > $t2 + +cmp $t1 $t2 || { + echo "Makefile @OSDIR@ mismatch with os_windows files" + echo "<<< Makefile >>> os_windows" + diff $t1 $t2 + exit 1 +} + +rm -f $t1 $t2 + +exit 0 diff --git a/dist/validate/s_chk_proto b/dist/validate/s_chk_proto new file mode 100644 index 00000000..061ac86c --- /dev/null +++ b/dist/validate/s_chk_proto @@ -0,0 +1,46 @@ +#!/bin/sh - +# +# $Id$ +# +# Check to make sure that prototypes are actually needed. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +t1=__1 +t2=__2 +t3=__3 + +egrep '__P' $d/src/dbinc_auto/*.h | + sed -e 's/[ ][ ]*__P.*//' \ + -e 's/^.*[ *]//' \ + -e 's/HANDLER)/HANDLER/' \ + -e '/__db_cprint/d' \ + -e '/__db_lprint/d' \ + -e '/__db_noop_log/d' \ + -e '/__db_prnpage/d' \ + -e '/__db_txnlist_print/d' \ + -e '/__db_util_arg/d' \ + -e '/__ham_func2/d' \ + -e '/__ham_func3/d' \ + -e '/_print$/d' \ + -e '/_read$/d' > $t1 + +find $d -name '*.in' -o -name '*.[ch]' -o -name '*.cpp' > $t2 +for i in `cat $t1`; do + c=$(egrep -Hlw $i $(cat $t2) | wc -l) + echo "$i: $c" +done | egrep ' 1$' > $t3 + +test -s $t3 && { + cat $t3 + echo "FAIL: found unnecessary prototypes." + exit 1 +} + +rm -f $t1 $t2 $t3 + +exit 0 diff --git a/dist/validate/s_chk_pubdef b/dist/validate/s_chk_pubdef new file mode 100644 index 00000000..db767bec --- /dev/null +++ b/dist/validate/s_chk_pubdef @@ -0,0 +1,189 @@ +#!/bin/sh - +# +# Reconcile the list of public defines with the man pages and the Java files. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +docs=$d/docs_src +p=$d/dist/pubdef.in + +exitv=0 + +# remove m4 doc tests, m4 has been removed for 4.8 +# TODO: add test for csharp const +#cat < /dev/null`; then +# : +# else +# echo "$f: $name is missing from $p" +# exitv=1 +# fi +#done + +#cat < /dev/null`; then +# [ "X$isdoc" != "XD" ] && { +# echo "$name should not appear in $f" +# exitv=1 +# } +# else +# [ "X$isdoc" = "XD" ] && { +# echo "$name does not appear in $f" +# exitv=1; +# } +# fi +#done + +cat < /dev/null`; then + : + else + echo "db.in/api_flags.in: $name is missing from $p" + exitv=1 + fi +done + +cat < /dev/null`; then + [ "X$isinc" != "XI" ] && { + echo "$name should not appear in db.in/api_flags.in" + exitv=1 + } + else + [ "X$isinc" = "XI" ] && { + echo "$name does not appear in db.in/api_flags.in" + exitv=1 + } + fi +done + +cat < /dev/null`; then + : + else + echo "$f: $name is missing from $p" + exitv=1 + fi +done + +cat < $t + +sed '/^#/d' $p | +while read name isdoc isinc isjava iscsharp; do + if `egrep -w "$name" $t > /dev/null`; then + [ "X$isjava" != "XJ" ] && { + echo "$name should not appear in the Java API" + exitv=1 + } + else + [ "X$isjava" = "XJ" ] && { + echo "$name does not appear in the Java API" + exitv=1 + } + fi +done + +cat < /dev/null`; then + [ "X$isjava" != "XN" ] && [ "X$isjava" != "XJ" ] && { + echo "$name should not appear in the Java native layer" + exitv=1 + } + else + [ "X$isjava" = "XN" ] && { + echo "$name does not appear in the Java native layer" + exitv=1 + } + fi +done + +rm -f $t + +exit $exitv diff --git a/dist/validate/s_chk_runrecovery b/dist/validate/s_chk_runrecovery new file mode 100644 index 00000000..384666fd --- /dev/null +++ b/dist/validate/s_chk_runrecovery @@ -0,0 +1,44 @@ +#!/bin/sh - +# +# $Id$ +# Check for DB_RUNRECOVERY being specified instead of a call to env_panic. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +s=$d/src + +t1=__1 + +egrep DB_RUNRECOVERY $s/*/*.c | + sed -e '/__env_panic(.*, DB_RUNRECOVERY)/d' \ + -e '/\/php_db4\//d' \ + -e '/case DB_RUNRECOVERY:/d' \ + -e '/db_dispatch.c:.*if (ret == DB_RUNRECOVERY/d' \ + -e '/db_err.c:/d' \ + -e '/env_open.c:.*ret = DB_RUNRECOVERY;/d' \ + -e '/os_errno.c:.*evalue == DB_RUNRECOVERY/d' \ + -e '/mut_fcntl.c:.*return (DB_RUNRECOVERY);/d' \ + -e '/mut_pthread.c:.*return (DB_RUNRECOVERY);/d' \ + -e '/mut_tas.c:.*return (DB_RUNRECOVERY);/d' \ + -e '/mut_tas.c:.*Possibly DB_RUNRECOVERY if/d' \ + -e '/mut_win32.c:.*return (DB_RUNRECOVERY);/d' \ + -e '/mut_win32.c:.*ret = DB_RUNRECOVERY;/d' \ + -e '/rep_backup.c:.*Panic the env and return DB_RUNRECOVERY/d' \ + -e '/rep_backup.c:.*DB_RUNRECOVERY?/d' \ + -e '/txn.c:.* \* DB_RUNRECOVERY and we need to/d' \ + -e '/txn.c:.*returned DB_RUNRECOVERY and we need to/d' \ + -e '/xa.c:.*(ret == DB_RUNRECOVERY)/d' \ + > $t1 +[ -s $t1 ] && { + echo "DB_RUNRECOVERY used; should be a call to env_panic." + cat $t1 + exit 1 +} + +rm -f $t1 + +exit 0 diff --git a/dist/validate/s_chk_spell b/dist/validate/s_chk_spell new file mode 100644 index 00000000..35583c88 --- /dev/null +++ b/dist/validate/s_chk_spell @@ -0,0 +1,43 @@ +#!/bin/sh - +# +# $Id$ +# +# Check spelling in quoted strings. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +t1=__1 + +sed -e '/^#include/d' \ + -e '/"/!d' \ + -e 's/^[^"]*//' \ + -e 's/%s/ /g' \ + -e 's/[^"]*$//' \ + -e 's/\\[nt]/ /g' \ + `find $d -name '*.[ch]' -o -name '*.cpp' -o -name '*.java' | + sed -e '/\/perl\//d' -e '/\/test_erlang\//d'` | +spell | sort -u | comm -23 /dev/stdin s_chk_spell.dict > $t1 + +test -s $t1 && { + cat $t1 + echo "FAIL: found questionable spelling in strings." + exit 1 +} + +egrep -h '/\* | \* ' \ + `find $d -name '*.[ch]' -o -name '*.cpp' | sed -e '/\/perl\//d' -e '/\/test_erlang\//d'` | +spell | sort -u | comm -23 /dev/stdin s_chk_spell.dict > $t1 + +test -s $t1 && { + cat $t1 + echo "FAIL: found questionable spelling in comments." + exit 1 +} + +rm -f $t1 + +exit 0 diff --git a/dist/validate/s_chk_spell.dict b/dist/validate/s_chk_spell.dict new file mode 100644 index 00000000..3d21e5c0 --- /dev/null +++ b/dist/validate/s_chk_spell.dict @@ -0,0 +1,6500 @@ +aa +aaa +aaA +AAAA +aaaaa +aaaaaa +aaaaaaaaaaaaaaaaaaaaaa +AAAAAAAAAAAAAAAAAMAAAAAAAAAAAAAAAAAAAAIIIIIIIIIIIIIIIIDNOAA +AAAAAAAAAAAAAAAABCKLLDDDDDEEEEEEEEEEEEEEEEEEEEAAAAAAAADD +aaaaab +aaaaac +AAAAGGGGGGGHAFBFAAFFAAQPIIJJIIIIIIIIIIIIIIIIII +aaab +aab +aaB +aac +aaC +aad +aaD +aagen +ab +abc +ABc +abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq +abcde +abcdef +ABCDEF +ABCDEFabcdef +abcdefg +abcdefghijklmnopqrstuvwxuz +abcdefghijklmnopqrstuvwxyz +ABCDEFGHIJKLMNOPQRSTUVWXYZ +abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ +abd +abs +ABS +abshome +ABSMODE +absname +abspath +abX +ac +aC +aca +AccessExample +accessor +AccesssExample +accum +Acflmo +ack +ACK +ack'ed +acknowledgement +Acknowledgement +acknowledgements +Acknowledgements +acks +ack's +Acks +ACKS +Aclmop +Aclop +aCode +acos +acplt +activekids +activep +activepips +acttab +ACTTAB +actualData +actualKey +acurs +ada +Adata +addAll +addAnnotation +addbasis +AddCheckConstraint +AddCollateType +AddDefaultValue +AddEnumConstant +addfamilylocker +addField +AddImm +addl +add'l +AddNotNull +addop +addpage +addr +Addr +ADDR +addrBrk +addrem +addressValues +addrinfo +ADDRINFO +addrlen +addrNxt +addrp +addrq +adfind +adh +adj +adjdiff +adjfind +adjindx +adlsVv +admin +adv +AEE +AEEApplet +AES +af +afp +AFP +afpfs +AFPSETLOCK +afterop +afterwards +agg +AggFinal +AggStep +aHash +Ahlm +Ahm +ahr +AIX's +AJVX +al +alanb +aLen +alexg +alfred +alg +ALG +algsetup +alignp +aliveproc +allavailable +ALLBACK +alldb +ALLDB +AllFields +alloc +ALLOC +ALLOCA +alloced +alloc'ed +alloc'ing +allocs +AllowAddSecondary +AllowChangeKeyMetadata +AllowChangeKeyMetadataInSubclass +AllowCreate +AllowFieldAddDelete +AllowFieldTypeChanges +AllowPopulate +AllowPriKeyField +allpeers +allreq +ALLZEROES +alsn +alsVv +AlterBeginAddColumn +AlterFinishAddColumn +AlterRenameTable +altertab +altertable +ALTERTABLE +amanda +amd +AMs +amx +ance +andi +aNext +AnnotationDefault +AnnotRemoved +antoon +Antoon +anum +anylist +aNZ +aOffset +ap +aparts +api +Api +API +APIs +apologise +app +App +APP +APPDEF +AppDispatch +appendrecnotest +AppendRecnoTest +appendRecordNumber +appexit +appl +Applock +APPMALLOC +appname +APPNAME +appnotes +apprec +APPREC +apps +app's +AppWizard +aq +arbitary +Arbitary +archivable +archivedir +areDuplicatesAllowed +areDuplicatesOrdered +areFormatsChanged +areKeysRenumbered +arg +ARG +argc +argcount +argp +args +Args +ARGS +argtype +argv +argvIdx +ArithmeticException +Arntzen +arr +arraycopy +ArrayIndex +ArrayList +arrayname +ArrayNameChange +arraysz +ArrayType +arw +asap +asc +ASC +asin +asites +asprintf +assertEquals +AssertionError +ASSOC +AssociateCallbacks +associatetest +AssociateTest +astubble +async +ASYNC +AsyncWrite +ata +atan +atoi +ATOI +atol +ATOL +atomicwrite +att +AttachCurrentThread +attr +ATTR +attrFile +Aug +auth +AUTH +authorSeq +autocheckpoint +autocommit +autoCommit +AutoCommit +AUTOCOMMIT +autoconf +autoconfig +autodetect +autoext +Autoext +autoinc +AUTOINCR +AUTOINCREMENT +autoindex +Autoindex +AUTOINDEX +autoinit +AUTOINIT +autoinstall +autolockIoFinder +autoremove +AUTOREMOVE +autoreset +AUTORESET +autorollback +AUTOROLLBACK +autovacuum +AUTOVACUUM +auxdata +AV +aValues +avg +awk +aygshell +az +azArg +azCol +azResult +baaaaa +backoff +Backoff +backpointer +backtrace +badend +badgen +badkey +badnum +bak +bam +bamc +barkVolume +barreto +Barreto +BASEAPI +baseClass +BaseClass +baseCur +BASETEST +basisflag +bb +bba +BBBB +bbbbb +Bc +BC +BCc +bcd +BCFILprRsvVxX +bcfp +bCh +bcompos +bcopy +bcurs +bd +Bdata +bdb +BDB +bdbcmds +bdbExistingStaticBlock +bdbGetField +bdbIsPriKeyFieldNullOrZero +bdbNewArray +bdbNewInstance +bdbReadCompositeKeyFields +bdbReadNonKeyFields +bdbReadPriKeyField +bdbReadSecKeyFields +bdbSetField +bdbsql +bdbtmp +bdbWriteCompositeKeyFields +bdbWriteNonKeyFields +bdbWritePriKeyField +bdbWriteSecKeyFields +BDBXXXXX +BDBXXXXXX +bde +beav +BEFOREIGNOREGEXPLAINSTEADDESCAPEACHECKEYCONSTRAINTERSECTABLEFT +BEGID +beginInclusive +beginKey +BeginTransaction +BeginTrigger +benigncnt +beq +beql +beqzl +berkdb +berkdbcmds +berkeley +BerkeleyDB +BestIndex +bExt +bfname +bfree +bh +BH +bharray +bhfree +bhp +BH's +bhwrite +bi +BI +BigDecimal +bigDecimalFormats +bigending +bigint +BigInt +BigInteger +bigpages +bigpair +BII +binarize +binding's +BindingSpeedTest +binsert +BINTERNAL +BitAnd +BITAND +bitmask +BitMask +bitmasks +BitNot +BITNOT +BitOr +BITOR +bitset +BITSPERBLOCK +bitvec +BitvecBuiltinTest +bk +BKEYDATA +blbs +ble +blk +blkname +blksize +BLKSIZE +bloblit +blobsize +blockable +blockDecrypt +blockEncrypt +blockid +blocknum +blocksize +blockSize +bLookup +bmeta +bmp +BMS +bndx +bne +BNF +bnum +bookId +bookSeq +BOOL +booleanValue +bosselaers +Bosselaers +bostic +BOTHC +boundPV +BOVERFLOW +bp +bqual +br +BR +BREW's +broot +bs +bshift +bsize +BSIZE +bsizep +bSort +bstart +bt +btcompare +BTCOMPARE +BTMETA +btrec +BtRecExample +btree +Btree +BTREE +BtreeBeginStmt +BtreeBeginTrans +BtreeCacheOverflow +btreeCleanupCachedHandles +btreeCleanupEnv +BtreeClearCursor +BtreeClearTable +BtreeClose +btreeCloseAllCursors +BtreeCloseCursor +BtreeCommit +BtreeCommitPhaseOne +BtreeCommitPhaseTwo +BtreeComparator +btreeCompare +BTREECOUNT +BtreeCreateDataTable +BtreeCreateTable +BtreeCursor +BtreeData +BtreeDataFetch +BtreeDataSize +BtreeDelete +BtreeDropTable +BtreeEof +BtreeFirst +BtreeGetFilename +BtreeGetJournalname +BtreeGetMeta +BtreeGetPageSize +BtreeGetReserve +BtreeHoldsAllMutexes +BtreeHoldsMutex +BtreeInsert +BtreeIntegrityCheck +BtreeIsInReadTrans +BtreeKey +BtreeKeyFetch +BtreeKeySize +BtreeLast +BtreeLastPage +BtreeLockTable +BTREEMAGIC +BtreeMaxPageCount +btreemeta +BTREEMETA +BtreeMoveto +BtreeMovetoUnpacked +BtreeNext +BTREEOLDVER +BtreeOpen +BtreePrevious +BtreePutData +btreeReopenPrivateEnvironment +BtreeRollback +btrees +Btrees +BtreeSavepoint +BtreeSchema +BtreeSchemaLocked +BtreeSetCacheSize +BtreeSetPageSize +BtreeSetSafetyLevel +BtreeStat +BtreeStats +BtreeSyncDisabled +BtreeTripAllCursors +BtreeUpdateMeta +BTREEVERSION +bucketsp +buf +BUF +BUFID +bufp +bufpt +bufs +bufsize +buildpartial +builtin +BUILTIN +BulkAccessExample +BulkAccessNIOExample +bulkbufsize +bulkbufsz +BulkExample +BULKOVF +bumpSize +BusyHandler +bval +bvec +bylsn +bypage +bytearray +ByteArray +ByteArrayBinding +ByteArrayFormat +ByteArrayInputStream +ByteArrayOutputStream +ByteBuffer +bytecode +byteLen +byteOffset +byteorder +bytesExpected +bytesize +bytesp +bytesToChars +byteswap +byteswapped +bytevalue +byteValue +caaaaa +CacheFilePriority +CacheFileStats +cachemax +cachep +cachesize +Cachesize +CACHESIZE +CacheStats +cachesz +cadjust +callback +Callback +CALLBACK +callbacks +callback's +Callbacks +callbacktest +callbk +calloc +callocs +callpgin +CALLPGIN +call's +canonname +CANONNAME +CANTOPEN +cantopenerr +capi +carg +carglist +carray +carrie +CartoonDeque +CartoonVector +casesensitivelike +catalogDb +catalogtest +cb +CBC +cbts +cc +cC +Cc +CC +CCCC +ccclose +ccnext +ccons +ccrtrtti +ccset +ccurs +CCYYMMDDhhmm +cd +CD +cdata +CDATA +cdb +CDB +CDCDEEEEEEEEEEEEEEEEEEEEBABABBBBDCFFFGGGEDCDCDCDCDCDCDCDCD +CDCEEEEDDDDDDDCDCDCEFEFDDEEFFDEDEEEBDDBBDDDDDDCCCCCCCCEFED +cDd +CDdFILTVvX +cdel +CdFILTvVX +CdFILTvX +cds +CDS +cdsgroup +CDSGroup +CDSGROUP +cDuVv +cEelmNrtVxZ +cEelmNrtVZ +cEelmrtVx +cefh +cefVv +ceh +cellno +celmNrtVZ +celmNtV +celmNtVZ +cerod +CEROD +C'est +ceVv +Cf +CFB +CFf +CFh +cfile +CFILpRsv +CFLprsvVxX +cfp +cFrom +cget +cgetchk +cgt +ch +Ch +chan +changeslot +charkey +CHARKEY +charLen +charLength +charMap +charOffset +char's +charset +charValue +checkgen +checklocker +checkpointed +CHECKRESERVEDLOCK +chgpg +CHGPG +CHILDCOMMIT +childcursor +childinc +CHILDINFO +CHILDINFOs +childproof +childput +childs +chk +CHK +CHKPNT +chkpoint +CHKPOINT +chkpt +chkspace +chksum +CHKSUM +chmod +chongo +chunksize +ci +cid +ciitr +cip +cipherInit +cipherInstance +cipherUpdateRounds +ckp +CKP +ckplsn +CKPLSN +ckps +cksum +cl +CL +ClassCastException +classCatalog +ClassCatalog +ClassCatalogDB +ClassEnhancerTask +classID +ClassInfo +className +ClassNotFoundException +classpath +CLASSPATH +classpaths +ClassRemoved +cleandir +clearerr +clearIndexKey +CLEARLEN +CLI +clib +clientData +ClientData +clientdb +clientrun +clientThread +clinit +clist +clnt +clockskew +closeddbtest +closeEnv +closeevent +closefiles +CLOSEFP +closehandle +CloseHandle +closeme +CLpsvxX +CLR +CLRDBC +cls +cmap +cmd +Cmd +CMD +cmdargs +cmdlist +CMDNAME +cmds +cmdx +cmp +Cmp +CMP +cmpfnc +cmpwi +cmpxchgl +cn +cnt +CNT +Co +codec +CODEC +CODECKEY +codegen +CollectionTest +collseq +CollSeq +colname +columnid +COLUMNKW +columnlist +columnmetadata +com +comm +commiting +commitToken +CommitTransaction +comositeFieldOrder +CompactStat +CompactStats +CompanyD +compareDuplicates +compareproc +comparitors +compat +COMPAT +compileoption +COMPILEOPTION +ComplexType +componentClass +componentId +compositeKey +compositeKeyField +CompositeKeyField +COMPQUIET +compresssion +Concat +CONCAT +concatdatakey +CONCATDATAKEY +concatkeydata +CONCATKEYDATA +cond +condwait +conf +CONF +config +Config +CONFIG +ConfigInfo +configlist +Configlist +configname +CONNECTIONBROKEN +ConnectScript +connfail +CONNFAIL +connnect +conslist +const +CONST +ConstantValue +containsAll +containsKey +containsValue +conv +ConvertExample +ConvertFieldContent +convprintable +copyb +copyFrom +copyin +copyKey +copyleft +copymap +CopyObjBytes +copyout +copypage +copypair +copyPV +CorruptionFollowingBusyError +cosh +countElements +countif +countp +countRecords +cp +cpage +cpp +cputchk +cq +cr +CRASHFILE +crashparams +crashtest +crdel +creat +CreateEvent +CreateFile +CreateFileForMapping +CreateFileMapping +createflag +CreateHashEntry +CreateIndex +CreateInstance +createkw +CREATELOCKPATH +CreateTable +CreateThread +CreateView +CRTL +crypto +Crypto +CRYPTO +cs +csearch +csp +csr +cstr +csv +CSV +ct +CT +ctime +CTIME +ctlflags +cTo +ctoken +ctp +ctps +ctp's +ctx +CTX +ctxn +curadj +curalloc +curdata +curfile +curinfo +curinval +curlist +curlsn +curregion +currentCount +CurrentImpl +currentOut +CurrentTransaction +currentVersionId +curroffset +currval +curslen +curslist +cursorGet +cursp +cursq +curtime +curwalk +customerName +CuStrCopy +CuTest +cutlim +cuVv +cuz +cvstrac +cxx +CXX +cxxproc +cxxthis +cxxutil +cygwin +Cygwin +das +dat +DatabaseEntry +DatabaseException +databasename +databasetest +DatabaseTest +DatabaseType +dataBinding +DataBinding +databuf +DataBuffer +DataCursor +DataDb +DataEnvironment +DataFormat +datagen +DATAHOME +DataIndex +dataInput +DataInput +DataInputStream +datalen +dataonly +DATAONLY +dataOutput +DataOutput +DataOutputStream +datap +datapage +datasize +DataStore +datastr +datastring +DataThang +dataToObject +datatype +DataType +DataView +DATEDELETEDETACH +datetime +DATETIME +db +Db +DBa +DBaa +DbAppDispatch +DbAppendRecno +DbAssociate +DbAttachImpl +DBaz +DBba +DbBtreeCompare +DbBtreePrefix +DbBtreeStat +dbc +Dbc +DBC +dbca +dbcb +DbcDup +DbcGet +dbcl +dbclear +dbclient +DbClient +dbclose +dbcmds +DbCompact +DbCompactStat +DbCount +dbcp +DbcPut +dbc's +dbcursor +DbCursor +DBcursor +DBCursor +dbdata +DbDeadlockException +dbdel +DbDelete +dbdemo +DBDIR +DbDispatcher +dbdp +DbDupCompare +dbe +dbentry +dbenv +DbEnv +DBENV +DbEnvFeedback +DbEnvFeedbackHandler +DbErrcall +DbErrorHandler +DbException +DbFeedback +DbFeedbackHandler +DbField +dbfile +dbFileName +dbfuid +DbGet +DbGetFlags +DbGetjoin +DbGetOpenFlags +DBGPRINTF +dbh +DbHash +DbHashStat +dbheader +DBHOME +dbinc +dbinfo +DBINFO +DbInfoDelete +dbinit +DBINIT +dbip +dbj +dbjoin +DbKeyRange +dbkill +dblist +dblistlinks +DBLOCAL +DbLock +DbLockNotGrantedException +DbLockRequest +DbLockStat +DBLOG +DbLogc +DbLogStat +dblp +DbLsn +dbm +Dbm +DBM +dbmclose +DbmCommand +DbMemoryException +dbmeta +DBMETA +DBMETASIZE +dbmfp +dbminit +dbmp +DbMpoolFile +DbMpoolFileStat +DbMpoolFStat +DbMpoolStat +DBMSG +DbMultiple +DbMultipleDataIterator +DbMultipleIterator +DbMultipleKeyDataIterator +DbMultipleRecnoDataIterator +dbname +DBNAME +dbnamep +dbnm +dbo +dbobj +dbopen +DbOpen +dbOpenFlags +DbOutputStreamErrcall +dbp +Dbp +DBP +DbPanicHandler +dbpp +dbpquedb +dbprep +DbPreplist +dbprim +dbps +dbp's +DBP's +DbPut +dbq +dbquedb +DbQueueStat +dbrdonly +DbRecord +DbRecoveryInit +dbreg +DBREG +dbregid +dbregids +dbregister +dbremove +DbRemove +dbrename +DbRename +DBREP +DbRepStat +DbRepTransport +DbRunRecoveryException +dbs +Dbs +DBs +DB's +DBS +DBSDIR +DbSecondaryKeyCreate +DbSequence +DbServer +dbsite +DbSite +dbsizes +dbsql +dbsrv +dbstat +DbStat +DBSTATUS +dbstl +DbstlMalloc +DbstlReAlloc +dbstr +dbstrmap +dbt +Dbt +DBT +dbta +dbtb +DBTCL +DbTestUtil +DBTList +dbtp +dbtree +dbtruncate +DbTruncate +dbts +Dbts +Dbt's +DBTs +DBT's +dbtxn +DbTxn +DbTxnRecover +DbTxnStat +dbtype +DBTYPE +dbuf +DbUpgrade +DbUtil +dbverify +DbVerify +dbx +DbXA +DbXAResource +DbXid +DBz +dcursor +dd +dda +ddbc +ddbt +DDCDCDEEEEEEEEEEFEEEEEEDDEEDDEE +DDDEEEEEEEEEEEEEEEEEEEEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +Dde +ddt +de +deadfile +Deadfile +DEADFILE +deadlocker +DeadlockException +DeadLockException +deadmap +dealloc +Debian +dec +decl +declaringClassName +DECLS +decltype +DECLTYPE +decrpyting +dee +def +dEf +DEF +defaultAnswer +defaultvfs +defcd +defcmp +DeferredWrite +defg +DEFMINKEYPAGE +defpfx +defto +del +delayclient +DELAYCLIENT +deletable +deleteAction +DeletedEntity +DeletedKeys +DeletedPersist +DeleteEnumConstant +DeleteFile +DeleteFrom +DeleteInfo +DeleteSuperclass +DeleteTable +DeleteTrigger +DeleteTriggerStep +delext +delflag +delFlag +delim +delimp +DELNO +delpg +Demitrius +demovfs +denom +deque +dereffing +Deref'ing +des +desc +DESC +descendent +descheduled +deserialize +deserialized +deserializing +dest +DESTHANDLE +DESTNAME +detectp +dev +devchar +devsym +df +dflt +DFLT +dft +DGREE +dh +diag +DIAG +diags +DIAGS +diff +difflen +difftime +dir +Dir +DIR +DirectBuffer +directio +DIRECTIO +dirent +DIRENT +dirf +dirfno +dirfree +dirlist +dirname +dirp +dirs +dir's +dirsync +DIRSYNC +dirtyRead +DisallowChangeKeyRelate +DisallowChangeProxyFor +DisallowCompositeKeyField +DisallowDeleteProxyFor +DisallowNonKeyField +DisallowPriKeyField +DisallowSecKeyField +DISCARDCHANCE +diskio +DISKIO +disp +dist +DIST +dists +ditem +DJ +dKB +dl +DL +dlbytes +dlen +dlfcn +DLFCN +dll +DLL +DLLs +dlopen +dm +dmms +dms +dname +dndx +DNS +DOALL +dobj +docid +doclist +docRoot +docsize +doesnt +doevents +doGet +donext +DONOTINDEX +dont +DONTEXTEND +DONTLOCK +dontwritehere +doreq +dotfile +doTimeouts +dotlock +doubleToLongBits +doubleValue +doWork +dp +dpagep +dpages +dpair +dpgno +DPL +dpn +dqeque +dqueue +dr +dref +dremoved +dropAnnotation +dropField +DropIndex +DropTable +DropTrigger +ds +DS +dsearch +dsize +DSQLITE +dst +DST +dsvi +dsync +DSYNC +dt +dtab +dtabsize +dth +DTL +dtnum +dtr +dumpDatabase +dumptree +dup +Dup +DUP +dupcnt +dupcompare +dupdata +dup'ed +duperr +DUPFIRST +DUPID +DUPLAST +DuplicateComparator +duplicate's +dupmaster +DUPMASTER +dupmasters +DUPOK +DUPONLY +dupped +dups +Dups +DUPS +dupset +DUPSET +dupsort +DUPSORT +duptree +duptype +dv +dvi +dvz +dwNumberOfProcessors +DWORD +dx +EACCES +EADDRINUSE +EADDRNOTAVAIL +EAFNOSUPPORT +eag +EAGAIN +EALREADY +EAUTH +eax +eb +EBADF +EBADFILENAME +EBADMSG +EBADRPC +EBADSEEKPOS +ebuf +EBUSY +ECANCELED +ECB +ECHILD +echoOn +ecmd +ECONNABORTED +ECONNREFUSED +ECONNRESET +edc +EDEADLK +EDESTADDRREQ +edggsefcd +EDIRIOCTL +EDIRNOEXISTS +EDIRNOTEMPTY +EDOM +EDOOFUS +EDQUOT +edu +ee +Eefgh +Eefh +EEXIST +EEXISTS +EFAULT +EFBIG +EFFFFFFFFGGFFFGGFFFEEFGFGFEEEEEEEEEEEEEEEEEEEEDEDEDDDDD +efg +efh +EFILEEOF +EFILEEXISTS +EFILENOEXISTS +EFILEOPEN +EFSFULL +EFTYPE +egen +Egen +EGENCHG +EGENUPDATE +EHOSTDOWN +EHOSTUNREACH +eid +EID +eidp +EIDRM +eids +eid's +EIDS +EIi +EILSEQ +EINPROGRESS +EINTR +EINVAL +EINVALIDOPERATION +EIO +EIRT +EISCONN +EISDIR +EJUSTRETURN +ek +ELAST +ele +electinit +ELECTINIT +electsend +electtally +electvote +ELECTVOTE +electwait +elem +elementPV +elif +ELOOP +elp +Elp +elseif +Elymas +emailAddresses +emap +EMFILE +EMLINK +employerIds +EMSG +EMSGSIZE +emt +EMULTIHOP +ENAMETOOLONG +ence +EnclosingMethod +encrpyting +encryptaes +encryptany +encrypttest +EncryptTest +endian +Endian +ENDIAN +endianness +endif +endInclusive +endKey +endl +endname +endodata +endofile +endpath +endsWith +endtime +endTime +ENEEDAUTH +ENETDOWN +ENETRESET +ENETUNREACH +english +EnhancedAccessor +eNNN +ENOATTR +ENOBUFS +ENODEV +ENOENT +ENOERROR +ENOEXEC +ENOIOCTL +ENOLCK +ENOLINK +ENOMEDIA +ENOMEM +ENOMEMORY +ENOMSG +ENOPROTOOPT +ENOSPC +ENOSYS +ENOTBLK +ENOTCONN +ENOTDIR +ENOTEMPTY +ENOTSOCK +ENOTSUP +ENOTTY +enqueuing +ent +ENT +entityBinding +EntityBinding +entityClassName +EntityConverter +EntityInput +EntityKeys +EntityModel +EntityOutput +EntityStore +EntityToPersist +entryp +entrySet +enum +Enum +EnumInit +EnumNetworkEvents +enums +enumType +EnumType +env +Env +ENV +EnvAttr +envcl +envdata +envdir +envdp +EnvExample +envFlags +EnvGetEncryptFlags +envHome +envid +EnvIdReset +EnvInfoDelete +envip +ENVLINK +envlock +EnvLsnReset +EnvOpen +envp +envpanic +envparent +envpp +envreg +envregionsizetest +envremove +EnvRemove +envrpcserver +envs +env's +ENV's +EnvSetErrfile +EnvSetErrpfx +EnvSetFlags +EnvTest +ENVVAR +EnvVerbose +ENXIO +eobj +eof +EOF +EOFException +EOPNOTSUPP +eor +EOUTOFNODES +EOVERFLOW +ep +EPERM +EPFNOSUPPORT +EPG +EPGNO +EPHASE +EPIPE +EPRINT +EPRINTed +EPROCLIM +EPROCUNAVAIL +EPROGMISMATCH +EPROGUNAVAIL +EPROTO +EPROTONOSUPPORT +EPROTOTYPE +eq +Eq +EQ +eqlrnge +Equidistributed +ERANGE +EREMOTE +ERESTART +erl +erlangen +EROFS +ERPCMISMATCH +errbuf +errcall +Errcall +errcode +ERRCODE +errfile +Errfile +errlock +errlst +errmsg +errno +Errno +ERRNO +errnum +errorcode +errorFormatId +ErrorFunc +errorInfo +erroring +errorret +ErrorSetup +errpfx +Errpfx +ERRROR +errstr +errstream +errunlock +errx +esat +esec +ESHUTDOWN +ESOCKTNOSUPPORT +esp +ESPIPE +ESRCH +ESTALE +estBound +ESTD +estimatedCost +etaci +etByte +etilqs +etime +ETIME +ETIMEDOUT +ETOOMANYREFS +ETXTBSY +eusec +EUSERS +eval +EvalObjv +eventDb +EventFunc +EVENTITEMS +EventNotify +eventproc +EventType +evita +EvnExample +EvolveClasses +EWOULDBLOCK +exactp +ExampleDatabaseLoad +ExampleDatabaseRead +exampleStore +ExampleStore +ExceptionUnwrapper +ExceptionWrapper +ExceptionWrapperTest +excl +EXCL +ExclusiveCreate +excxx +EXDEV +exe +exenv +exFAT +exid +exnum +Exp +expandtab +expectEnhanced +expectNoClassChanges +explainPrev +expr +EXPR +ExprAssignVarNumber +ExprDelete +ExprFunction +exprlist +ExprListDelete +ExprSetColl +ExprSetHeight +ExprSpan +exprtest +exstl +ext +EXT +extentsize +extentsizep +externalizable +Externalizable +externs +extid +extractIndexKey +ezila +fabs +failcheck +failchk +FAILCHK +fallback +FALLBACK +fallocate +FALLTHROUGH +faq +FastInputStream +FastOutputStream +FastOutputStreamTest +faststat +FatalRecover +faultmem +faultsim +favoriteColors +FBSDID +fc +fcchk +Fchg +fchk +fchmod +FCHMOD +fclose +FCLOSE +fcn +fcntl +FCNTL +FCONTROL +fcreate +fd +Fd +FD +fdatasync +FDATASYNC +fdlist +fdlock +fdm +fdp +fd's +fdupcurs +feedback's +ferr +ff +Ff +FF +ffactor +ffactorp +fff +FFF +fget +fgetc +FGETC +fgets +FGETS +fh +Fh +FH +fhp +FH's +fid +Fid +fids +FieldAddAndConvert +fieldFormatData +fieldlist +FieldMetadata +fieldName +fieldno +filedone +FILEDONE +filehandle +fileid +fileID +FILEID +fileids +fileIDs +FileIndexHigh +FileIndexLow +fileinfo +Fileinfo +fileinit +filelist +FILELIST +filemode +filenamep +FILENO +FileNotFound +FileNotFoundException +filenum +fileopen +FILEOPEN +fileops +FILEOPS +filePath +filereg +fileregs +fileset +filesize +filestart +FILEWRITTEN +fillf +fillfactor +filln +fillpercent +FImpt +finalcount +findData +finddatum +findFirst +FindHashEntry +findif +findlastckp +finfo +finickyLevel +FinishTrigger +finsert +firstkey +firstKey +fiv +FIXEDLEN +FIXLEN +FIXME +fixup +fixups +fk +FkCounter +fkey +FkIfZero +flagN +flagsp +FLD +FLDID +floatFormats +floatingpoint +floatToIntBits +floatValue +flushcommit +FlushFileBuffers +FMAP +fmax +fmethod +FML +fmsg +fmt +FMT +fn +fN +FN +fnam +fname +FNAME +FNAMES +fnameuid +Fneeded +FNFE +fnl +fnp +fns +fnum +foo +FooImp +fopen +FOPEN +foreach +Foreach +FOREACH +foreignkey +ForeignKeyDatabase +ForeignKeyDeleteAction +ForeignKeyIndex +ForeignKeyNullifer +ForeignKeyTest +ForeignMultiKeyNullifier +foreignStore +formatId +formatID +FormatMessageA +format's +form's +forName +fp +FP +fprintf +fprobe +fptr +fput +fq +freakin +frecno +freeable +freeaddrinfo +freeblock +FreeBSD +FreeBSD's +free'd +freedata +freefamilylocker +FreeFunc +free'ing +freelist +FreeList +FREELIST +freelock +freelocker +freelst +freeonly +freep +freespace +FREESPACE +freg +fremove +freq +friedl +Friedl +fromInclusive +fromIndex +fromKey +fromMapEntry +fromValue +frontback +frotzed +fs +FS +fsctl +fset +fst +FST +fstat +FSTAT +fstati +FSTATI +fsts +fsync +FSYNC +ftab +ftruncate +FTRUNCATE +fts +FTS +ftsHashFunction +ftype +FTYPE +ful +fullerr +fullfsync +fullhome +fullmutex +fullname +fullName +fullnew +Fullscan +FULLSCAN +fullsync +fullSync +FULLSYNC +fulltext +FULLTEXT +func +FUNC +funcs +FUNCS +funcxx +fuzzer +fv +fw +fwd +fwrite +FWRITE +fzero +gapflags +Gb +gbytes +gbytesp +gc +gcc +Gcc +GCC +gc'ed +gcount +gdb +gen +gener +genern +genrand +Gentles +geo +GEOM +george +getactive +getaddr +getaddrinfo +GETADDRINFO +GETALL +getboth +getbothc +getBranchQualifier +GetByteArray +GetByteArrayFromObj +getBytes +getCity +getckp +getClass +getClassFormat +getCollection +getconfig +GetConfig +getCurrentKey +getcwd +GETCWD +getdata +getData +getDbEnv +getDBFileName +getDbt +getDbtString +getDetail +GetDiskFreeSpace +GetDiskFreeSpaceEx +getdynamic +GETDYNAMIC +getenv +GETENV +getEnvironment +GetEnvironmentVariable +getErrno +getField +GetFileInformationByHandle +getFlags +GetFlags +GetFlagsList +getFormatId +getGlobalTransactionId +GetGlobPrefix +GetHashValue +gethostbyname +getindex +getIndex +GetIndexFromObj +getinfo +GetInfo +getInstance +GetIntFromObj +getitrs +GetJavaVM +getjoin +GetJoin +GetLastError +getline +GETLK +getLock +GetLockDetect +getlocker +GETLOCKPATH +getlong +GetLongFromObj +GetLsn +getMode +getname +getName +GETNAME +getnext +getNext +getno +getobj +getObj +getObject +getOffset +getOp +GetOpenFlag +getopt +GETOPT +getpageinfo +getPartialLength +getPartialOffset +getpid +getPrimaryKeyFormat +getPrimitiveBinding +GetProcessTimes +getProperty +getReadLock +GETREADLOCK +getRecno +GETRECNO +getRecordNumber +getrusage +GETRUSAGE +Get's +getSize +getstack +getString +getSubclassIndex +getsubopt +GetSystemTime +GetSystemTimeAsFileTime +gettime +getTime +GETTIME +gettimeofday +GETTIMEOFDAY +getTimeout +GetTimeout +gettingstarted +gettype +getuid +GETUID +GetUInt +getulong +GetUnsignedIntFromObj +getUserBufferLength +getval +getValue +getValueFormat +GetVerbose +GetVersion +getzip +GETZIP +Gh +ghi +gi +gid +GID +GIGA +Gizmo +GlobalRefs +gm +gmail +GMDB +gmtoff +gni +GoDone +golive +gonna +Gosub +gotkey +GotRange +gotta +gp +grep +grey +groupalloc +groupby +Groupby +groupgrow +gsf +gsg +gsp +gtrid +guesspgsize +guesstimated +HaltIfNull +hamc +handleException +handlePanicEvent +handle's +HANDSOFF +happend +Harbison +hardcode +hardcoding +HASHC +hashCode +hashcompare +hashcomparetest +HashCompareTest +HASHC's +hashFunction +hashhdr +HASHHDR +hashinit +HASHINSERT +HASHLOOKUP +HASHMAGIC +hashmeta +HASHMETA +HASHOLDVER +hashp +hashproc +HASHREMOVE +HashStats +hashtab +HASHTAB +Hashtable +HASHVERSION +hasNext +hasPrevious +hasVowel +hc +HCommand +hcp +hcreate +hdestroy +hdr +HDR +hdrbuf +hdrchk +hdrpages +hdrs +HDRs +header's +headMap +headp +headSet +heapsize +Hedin +heldby +HelloDatabaseWorld +helloworld +HENDATABASELECTRANSACTIONATURALTERAISELSEXCEPTRIGGEREFERENCES +HEURCOM +HEURHAZ +HEURMIX +HEURRB +hEvent +HEXDATA +hexdouble +hexio +hexkey +hexrekey +hf +hfp +hfs +HFS +hH +HIGHSTRESS +highwater +hij +hijkl +himark +histdbt +hk +HKEYDATA +hlock +hm +hmac +HMAC +hmap +hmeta +HMETA +hmm +Hmm +hmmap +hmset +HOFFDUP +HOFFPAGE +HOFFSET +HOLDELECTION +Holder's +holdl +HOMEDRIVE +homep +HOMEPATH +homeroot +hostaddr +hostid +hostname +hotbackup +HOTBACKUP +hotcopy +hotupdate +hp +HPPA +HPUX +hq +href +hs +hsearch +Hsearch +HSEARCH +hset +HSTAT +hstrerror +HSTRERROR +htab +html +htonl +http +httpd +hu +ia +IA +IAFTER +IBEFORE +iBlob +ibtree +IBTREE +ibulk +ic +Ick +icna +icne +icu +iCur +ICURRENT +icursor +iDb +idbase +IDL +idletimeout +IDLETIMEOUT +idleto +IdList +IdListAppend +IdListDelete +idmap +idnum +idp +ids +Ids +IDs +ID's +idShadow +idspace +idup +idup'ed +idx +IDX +IdxDelete +IdxGE +IdxInsert +idxitem +idxlist +IdxLT +idxNum +IdxRowid +idxStr +iface +ifdef +ifdef's +ifexists +iff +IFF +IFile +IFILE +IFILEMGR +IFJDCS +ifmt +ifndef +IfNeg +IfNot +ifnotexists +ifnull +ifp +IfPos +IfZero +iget +igol +ihold +IIL +iitem +iitr +IIZ +IIZLjava +ik +Ik +IL +ilb +ile +illa +IllegalArgumentException +IllegalStateException +ILo +ilock +ILOCK +ilocks +ILprR +ilsuo +iltne +IMMEDIATEJOINSERTMATCHPLANALYZEPRAGMABORTVALUESVIRTUALIMITWHEN +immidiately +impl +IMT +INADDR +inc +incfirst +includesP +includesPV +incomp +IncompatibleClassException +incr +incrblob +INCRBLOB +IncrRefCount +IncrVacuum +incursor +ind +indexCursor +indexKey +indexKeyData +indexKeyFormat +indexKeyOutput +indexKeys +indexlist +indexOf +IndexOutOfBoundsException +Index's +indexViews +indices +indx +INDX +indxp +iNextWrite +infop +informatik +info's +INFOTYPE +ini +INI +init +Init +INIT +inited +INITED +INITENV +initfail +Initialise +initialSize +init'ing +initlockers +initlocks +initobjects +inits +initspin +INITSPIN +inittxns +inlen +inline +Inline +inmem +INMEM +inmemdbflags +inmemlist +inmemory +INMEMORY +InnerClasses +ino +inode +inorder +INORDER +inp +inpitem +inplace +inplmrg +inputOctets +inregion +INREPELECT +inrprod +inscollist +insdel +InsertEnumConstant +InsertInt +insertpair +InsertSuperclass +INSTALLFLAG +int +INT +intarray +IntArray +INTARRAY +intBitsToFloat +integerFormats +integrityck +IntegrityCk +IntegrityConstraintException +intel +interp +Interp +intial +ints +inttypes +INTTYPES +intValue +inuse +inval +INVAL +INVALIDID +inventorydb +inventoryDB +InventoryDB +io +ioerr +IOERR +IOException +IOExceptionWrapper +ioinfo +iopsize +iosDevFind +IOSIZE +iotrace +IoTrace +IOTRACE +iovec +iovecs +IOVECS +ip +IP +ipc +IPC +ipcs +ipk +IPP +IPPROTO +iput +IPv +iq +iqueue +IR +iread +IREAD +IRECNO +iRegion +irep +iRgn +IRGRP +IRIX +IROTH +iRow +IRUSR +isalive +IsAlive +isalpha +ISALPHA +isAutoCommit +isbad +ISBIG +isbigendian +isByteLen +isCatNotDog +isCrash +ISDEFAULT +isdeleted +isdigit +ISDIGIT +isDirtyReadAllowed +isDirtyReadEnabled +isdone +isdst +isdup +ISDUP +isEmpty +ISHELL +isIndexed +iskeyword +islease +ismemdb +ISNAN +IsNull +ISNULL +isolder +isopd +isOptimal +isOrdered +ISPERM +ispget +isprint +ISPRINT +isroot +isSearch +ISSET +isspace +ISSPACE +istmp +Istmp +isTransactional +iSub +iSum +IsUnique +isvalid +isView +isWrite +isWriteAllowed +isync +itemcount +itemlist +itemname +Itemname +itemnameDB +ItemNameIndexDB +itemorder +iter +IterDeadlockTest +iterswp +ith +iti +itici +iTid +itila +itilib +itivi +itr +itrc +ive +iversion +IV's +IW +IWGRP +IWOTH +iwr +IWR +iwrite +IWRITE +IWUSR +iX +IXGRP +IXOTH +IXUSR +iY +ize +JanFebMarAprMayJunJulAugSepOctNovDec +japanese +java +javaagent +JavaIO +JavaRPCServer +java's +javax +jbyte +jc +JDB +je +JE +JEDB +jennifer +jenv +jEnvExample +JHB +jhi +JJ +jkl +JKL +jl +jlong +jmsjdbc +jndi +JNI +JNIEnv +JNIs +JOINCUR +JOINENV +joinop +JOINs +JoinTest +JoinType +journaling +JournalMode +jp +jq +jrpcgen +jt +jta +JVM +JZ +kb +kboolean +kBoolean +kbyte +kByte +kbytes +kchar +kCharacter +kcomposite +kComposite +kdouble +kDouble +KEEPALIVE +keio +Kerberos +keyAssigner +keyBinding +keybuf +keyClass +keyClassName +KeyCreator +KEYDATA +KEYEMPTY +KEYEXIST +keyexists +keyExtractor +KeyExtractor +KeyField +keyfirst +KEYFIRST +keyfive +keyflag +keyFormat +keyfour +keygroup +KEYGROUP +keygroups +keygrp +KEYGRP +keyinfo +keyInput +keyInstance +keylast +KEYLAST +keyLen +KEYLEN +keyMaterial +keyName +keynum +keyone +keyOutput +keyp +keyrange +KeyRange +KeyRangeException +KeyRangeTest +key's +keySet +keysize +keysSize +keystr +keystring +keythree +keytwo +keytype +keyTypes +keywordCode +KeywordCode +kfloat +kFloat +kgnum +ki +kidsp +killid +killinterval +killiteration +killtest +kint +kInteger +KL +klen +klinks +klL +klm +klNpP +klNprRs +klNprRV +klong +kLong +kMm +kNpV +kow +kp +kpv +krinsky +Krinsky +ks +kshort +kShort +kstring +kuleuven +kvpair +kwcolumn +laci +Landon +lang +LANGLVL +lanoit +lanoita +LASTCKP +lasterrno +lastError +lastfile +lastid +lastIndexOf +lastKey +Lastp +lastpgno +last's +later's +laurie +lbtree +LBTREE +lbucket +lbuf +lc +lck +LCK +lcnt +Lcom +ld +ldata +ldbp +ldbs +ldbt +ldbtsize +ldcws +LDF +ldl +ldstub +LDUP +le +LEAFCHAIN +leafcnt +LEAFLEVEL +LEAFSEEN +Leapin +legacyformat +leisa +lemp +lempar +len +lenp +les +lessthan +LessThan +levelp +lf +LF +lfhp +lfname +LFNAME +LFPREFIX +lfs +LFS +lg +LG +lget +LGPL +lh +lhash +lhi +lhs +LHS +li +lib +libc +libcd +libcmtx +libdb +libfile +libname +LIBNSL +libpthread +libresolv +libsqlite +libthread +libversion +likeop +lineno +LineNumberTable +lintInt +linux +listIterator +ListIterator +listobj +ListObjAppendElement +listp +listsize +Ljava +lk +lkrs +ll +Ll +LL +lld +llsn +llu +llx +lm +lM +ln +lnP +lnsl +LoadAnalysis +loadEnvVars +loadme +LoadStore +loc +LOC +localhost +LocalIterator +localtime +LOCALTIME +LocalVariableTable +LocalVariableTypeTable +lockcount +LockDetect +LockDetectMode +lockdown +LOCKDOWN +lockerid +locker's +lockevent +LockExample +lockfhp +Lockfhp +Lockfile +LockFileEx +lockfiles +lockForWrite +lockGet +LockGet +lockid +lockinfo +lockmgr +lockmode +LockMode +LockNotGrantedException +lockobj +LOCKOBJ +LOCKOBJECT +lockop +LockOperation +lockproxy +LOCKPROXYFILE +LOCKREGION +lockreq +LOCKREQ +LockRequest +LockRequestMode +LockStat +LOCKSTATE +LockStats +locksteals +lockstep +LOCKTAB +locktimeout +LockTimeout +LOCKTIMEOUT +LockVec +lockVector +LOCKVERSION +LogArchive +logbuf +logbufsize +logbufsz +logc +Logc +LOGC +LogcGet +logclean +LogCompare +logcursortest +logdata +logdir +logend +logfile +LogFile +LOGFILE +LOGFILEID +logfiles +logflush +LogFlush +loggap +LogGet +LOGID +LOGMAGIC +logmaxset +logmsg +LOGOLDVER +LOGP +LogPut +logready +LOGREADY +logrec +LogRegister +logreq +LogSequenceNumber +logset +logsonly +LOGSONLY +LogStat +LogStats +logv +LOGVERSION +logvrfy +longBitsToDouble +longtest +longValue +lookaside +Lookaside +LOOKASIDE +lorder +LORDER +lorderp +lowlsn +lp +lP +LP +lpBuffer +lpgno +lprint +LprRsS +LpRsS +lput +lrand +LRECNO +LRECNODUP +lref +lrp +lru +LRU +LRUness +lseek +LSHIFT +lsn +lSN +Lsn +LSN +lsnadd +LSNfile +lsnget +lsninit +lsnoff +LSNoffset +lsnp +LSNs +LSN's +lsntime +LSTAT +lsVv +lsynch +lt +ltm +LtoR +lu +luB +luf +luGB +luK +luKb +luKB +luM +luMb +luMB +lv +LV +lvalue +lwarx +LWARX +lwp +LWP +lwrbnd +LWZ +lx +lx's +machid +machlock +machtab +maddr +magicno +maintinit +maj +majver +makecopy +MAKEDEFAULT +makedup +makeheaders +makeKey +MakeRecord +Makoto +malloc +Malloc +MALLOC +malloc'd +malloc'ed +mallocing +mallocs +malloc's +MAMAMIA +manyToMany +manyToOne +mapEntry +MapEntry +mapfile +MapViewOfFile +margo +Margo +MARGO +markdone +markneeded +markus +marshalIndexKey +marshalled +MarshalledEnt +MarshalledEntityBinding +MarshalledKey +MarshalledKeyBinding +MarshalledObject +MarshalledTupleData +MarshalledTupleEntry +MarshalledTupleKeyEntity +marshalling +MASTERELECT +matchinfo +mathfunc +MathOps +Matsumoto +matumoto +MAXARGS +maxb +MAXBINSZ +maxblock +MAXBQUALSIZE +MAXBTREELEVEL +maxcache +maxcommitperflush +maxCount +MAXFIELD +MAXGTRIDSIZE +maxhlocks +maxhobjects +maxid +Maxid +MaxID +MAXID +MAXINFOSIZE +maxkey +maxKey +Maxkey +maxkeypage +maxlen +maxlockers +maxlocks +MAXLOCKS +maxlsn +maxLSN +maxlsteals +MAXMMAPSIZE +maxn +maxnactive +maxnfileid +maxnlockers +maxnlocks +maxnobjects +MAXNR +maxnsnapshot +maxobjects +maxopenfd +maxops +maxosteals +maxOut +maxp +MAXPATHLEN +maxpending +maxperm +maxpg +MaxPgcnt +maxpgno +maxrec +maxRequest +maxRetries +maxsites +maxsize +MAXSIZE +MAXSIZEONPAGE +maxtimeout +MAXTIMEOUT +maxto +maxtxn +maxtxns +maxvalue +maxwrite +MAXWRITE +maxwrites +maxX +maxY +mb +Mb +mbp +mbucket +mbuf +mbytes +Mbytes +mbytesp +MC +McIlroy's +md +mday +mdays +MDups +mem +Mem +MEM +membar +membr +memcmp +MEMCMP +memcmps +memcpy +MEMCPY +memdebug +MEMDEBUG +memget +MEMMAPPED +MemMax +memmove +MEMMOVE +MemoryCheck +memorydb +MEMORYDB +MemoryException +memorymanage +memp +MEMP +Mempool +memset +memstatus +memsys +MEMSYS +mergePV +Mersenne +messgae +metachk +metadata +Metadata +METADATA +metadb +METADIRTY +metaflags +metagroup +metalsn +metapage +Metapage +metasub +metaswap +methodID +mf +mfence +mfp +MFT +mgr +mgrelections +mgrp +mhflag +midpage +millitm +MINCACHE +mincommitperflush +MINFO +MinGW +MINIT +minkey +Minkey +minkeyp +minkeypage +minlocks +MINLOCKS +minp +MINPAGECACHE +minval +minvalue +minver +minwrite +MINWRITE +minwrites +minX +minY +mip +mips +MIPS +mis +misc +Misc +mismtch +MixColumn +mj +mjc +mkdir +MkDir +mkdir's +mkheap +mkkeywordhash +mkpath +MKS +mlock +MLOCK +mmap +MMAP +mmap'd +mmap'ing +mmapped +mmapsize +mmapsizep +MMDDhhmm +mmetalsn +MMM +mmpgno +mno +MNO +mNP +mNs +Mobilus +modeFlag +MonTueWedThuFriSatSun +moremiddle +mortem +moshen +motorboy +mov +movb +MoveFile +MoveFileEx +movl +mp +Mp +MP +MPE +mpf +mpfarray +MPFARRAY +mpfq +MpGet +mpgno +MpInfoDelete +mpip +mpool +Mpool +MPOOL +MpoolExample +mpoolfile +Mpoolfile +MPOOLFILE +MPOOLFILEs +MPOOLFILE's +mpools +mpreg +MPREG +MPREGs +mprintf +mprotect +MPROTECT +mps +MpStat +MpSync +MpTrickle +mrv +ms +MSB +MSC +MSDN +msdos +msem +MSEM +msemaphore +mset +msg +Msg +MSG +msgadd +msgbuf +MSGBUF +msgcall +Msgcall +msgdbt +msgdir +msgfile +Msgfile +msgfp +msgs +msg's +MSGS +msgth +msgtype +MsgType +msgv +MSHUTDOWN +msila +msize +MSLEEP +msort +MSTR +MSVC +mswap +mt +MT +mti +mtoken +mtx +mtxcount +mtxmgr +mtxp +mtxregion +muation +Multihop +MultiKeyCreator +multimap +multiplecursortest +MultipleCursorTest +multiselect +multiset +multithread +multiversion +Multiversion +MULTIVERSION +munlock +MUNLOCK +munmap +MUNMAP +MustBeInt +mut +mutex +Mutex +MUTEX +mutexes +Mutexes +MUTEXes +mutexid +mutexlocks +MUTEXMGR +mutexp +MUTEXREGION +MutexStats +muxfile +mv +Mv +mvcc +MVCC +mvptr +MVS +MVv +mx +mxpathname +mxUsed +MyClass +myclassdb +myClassDb +mydatabase +myDatabaseName +mydb +myDb +MyDbs +MYDIRECTORY +mydrive +mydrivexxx +MyEntity +myfree +mykey +mylock +MyMap +myobjc +myobjv +MYQUE +myStore +MySubtype +mytime +MyType +myval +myvalue +naborts +nactive +nad +nalloc +nAlloc +namedsem +namelistp +nameop +namep +namesp +NamesVect +NameToInfo +NameToPtr +NaN +nano +nArg +NARG +nargc +nargv +naturaleftouterightfullinnercross +NB +nbegins +nbucket +NBUCKET +nbuckets +nbuf +nBusy +nbyte +nByte +NBYTE +nbytes +NBYTES +ncache +NCACHE +NCACHED +ncachep +ncaches +ncell +ncommit +ncommits +nconflicts +ncp +ncurs +ncvs +NcvV +ndary +ndata +NDATA +ndbc +Ndbc +ndbm +Ndbm +NDBM +NdbmOpen +ndeadalloc +ndeadlocks +ndir +NDIR +nDoc +ndowngrade +ndx +NDX +NEEDSPLIT +needswap +NEEDSWAP +neg +negFlag +nelem +nelemp +nelems +nentries +nentry +nEq +netaddr +neterr +nevict +newalloc +newclient +NEWCLIENT +newdata +newdatabase +NewEntityName +newfh +newfile +Newfile +NEWFILE +newFormats +NewInfo +newitem +newlist +newmaster +NEWMASTER +newname +NewName +newopd +newpage +newpgno +NewRowid +newsite +NEWSITE +newsitep +newsites +newsize +newstp +NewStringObj +newtype +newval +nexprlist +nextdup +nextents +NEXTFILE +nextIndex +nextinfo +NEXTINFO +nextkey +nextlsn +nextnodup +nextpgno +nextprm +next's +nextval +nf +nfid +nfileid +nfiles +nFiles +nFormats +nfs +ng +NG +Nikunj +NIMDB +NIMDBs +NIMDBS +nInMul +nio +Nishimura +nitems +nkeys +nl +NL +nlist +nlockers +nlocks +nlocktimeouts +nlog +nlsn +NMINALLOC +nmnum +nmodes +nnext +nnextlsn +nnn +NNN +NNNb +NNNbc +NNNbm +NNNN +NNNt +NNNtd +NNNtr +NnO +nNotFound +nnowaits +nO +noarchive +NOARCHIVE +NOAUTO +nobjects +nobuf +nobuffer +NOBUFFER +NOCASE +NOCOPY +nodefaultlib +nodeMaxEntries +nodename +NODENAME +nodeno +nodesize +nodup +NODUP +nodupdata +NODUPDATA +NODUPS +NOERROR +noet +NoFields +NOFILE +nogrant +nogroup +nohasham +noheader +NOHEADER +noi +noita +noitazi +NOKEY +nolease +NOLFS +nolinenosflag +nolock +NOLOCK +nolocking +NOLOCKING +nolonger +NOLSN +nomacro +nomem +NOMEM +NOMIGRATE +nommap +NOMMAP +NOMORE +NoMutation +nomutex +nonassoc +nonblock +nonewline +NonKeyField +NonKeys +NONTRANSACTIONAL +noone +noop +Noop +NOOP +noorderchk +NOORDERCHK +nooverwrite +NOOVERWRITE +nop +NoP +nopanic +NOPANIC +nopenp +NOPROMOTE +NoqV +NORECNUM +noreorder +norep +norepmgr +noResort +NORUN +NOSERVER +noshell +noshm +nosort +NOSORT +nosuchtokenizer +nosync +NOSYNC +nosystemmem +NOTA +notadatabase +NOTADB +notdurable +NOTEMPDIR +NOTEXIST +NotExists +notfound +NotFound +NOTFOUND +notgranted +NOTGRANTED +NOTLOGGED +notnull +NotNull +NOTNULL +NOTPERM +NotPersistent +NOTREACHED +notReady +NOTSET +notsup +NotTransient +notused +NotUsed +NOTUSED +NOTYPE +notzero +NOTZERO +noundo +novfl +novrfy +nowait +noWait +NOWAIT +nowaits +nowUsed +np +nP +NP +npage +npages +nparts +npeers +npg +npgno +npg's +NpP +nprocs +nptr +NqV +nr +Nr +nread +nreaders +nrec +nrecords +nrecs +NRECS +nRef +nreg +NREGION +nregions +nreleases +nrepeat +nrequests +nrestores +nrow +nRow +NrV +NS +nsec +nSingle +nsites +Nsites +nsize +nskeys +nsl +nsleep +nsleepp +NSLOTS +nsnapshot +nSpan +NsV +NsVv +NT +ntasks +NTFS +n'th +nthelem +nthreads +nthrottles +ntohl +nTt +nTV +ntxns +ntxntimeouts +Nuff +NullClassCatalog +NULLINDICATOR +NULLing +NULL'ing +NullPointerException +NullRow +NULLs +NullTransactionRunner +nullvalue +NULLVALUE +nullx +NULLXID +nuls +NULs +num +Num +NUM +numberOfKeysRead +NumbersDeque +NumbersList +NumbersVector +numdup +numdups +NUMERICHOST +numext +numlocks +numparts +nums +NUMWRITES +nupgrade +nval +nvotes +nw +nwrite +nwritep +nwriters +nwrites +nwrotep +nxt +Nxt +obj +Obj +OBJ +objc +objectArrayToString +ObjectInputStream +ObjectOutputStream +objectsteals +ObjectStreamClass +objectToData +objectToKey +ObjectToPrimitive +ObjectToSubtype +ObjectToUnrelatedOther +ObjectToUnrelatedSimple +objectToValue +objp +objproc +objProc +objs +Objs +objv +occurence +octets +ODDFILESIZE +oe +offdup +OFFDUP +offp +offpage +Offpage +OFFPAGE +offsetp +oflags +OFM +ofst +ohash +oid +OID +ok +Ok +Ol +oldConfig +olddata +olditem +oldname +oldrec +oldsize +oldskey +oldtype +oldValue +oldValues +oldver +OLDVER +oldversion +OLDVERSION +oli +Omiting +omniti +omode +ONC +onconf +ondisk +onecolumn +onefile +onepeer +oneselect +oneToMany +oneToOne +onint +onoff +ONOFF +onoffalseyestruefull +onoffp +onpage +onRelatedEntityDelete +onstack +OOB +oom +OOM +Ooops +op +OP +opbuf +opcnt +opd +OPD +OpenAutoindex +openCursors +openDb +OPENDIR +openEnv +OpenEphemeral +OPENEX +openfd +OpenFile +OpenFileMapping +openfiles +OPENFILES +openFlags +openhandle +OpenPseudo +OpenRead +OpenServer +OpenSharedMemory +opensub +opentemp +OpenWrite +OPENX +opequal +Opequal +OperationStatus +opflags +OPFLAGS +opgno +opless +Opless +opmods +ops +Ops +OPS +optarg +opterr +Optimised +optind +optopt +optreset +ORacle +orconf +OR'd +orderby +orderByConsumed +orderchkonly +ORDERCHKONLY +orderedkeys +org +ori +orig +originfo +origline +origmap +origp +os +OsError +OSF +oslog +OSO +OSS +ostrace +osynch +OTNULLIKECASCADEFERRABLECASECOLLATECREATECURRENT +ous +outbuf +outBuffer +outdatedp +outfd +outfile +OUTFILE +outFlags +outfp +outlen +OutOfMemoryError +OutputStream +outstr +ov +OVERREAD +oversized +overwritedup +ovfl +OVFL +ovflcnt +ovflok +ovflpage +ovflpoint +ovflsize +ovput +ovref +PackedIntegerTest +padDecrypt +padEncrypt +padp +pagecache +PAGECACHE +pagecnt +pagecount +Pagecount +PAGECOUNT +PAGEDONE +pageimage +pageinfo +PAGEINFO +PAGEINFOs +pagelayout +pagelist +PAGELIST +pagelsn +pageno +pagep +pagereq +PAGEs +pagesize +Pagesize +pagesizep +pagesizes +Pagesizes +pagespace +pagesz +pagetype +pagezero +pagf +pagfno +paniccall +panic'd +PANIC'd +panicing +panic'ing +panicstate +Param +params +Params +PARAMS +paren +parentid +parentnode +PARENT's +parentSsn +PaRisc +paritions +parm +ParseAlloc +parseLong +parseonetoken +ParserAlloc +ParseSchema +parsrt +parsrtc +PartBinding +partc +PartData +partialgettest +PartialGetTest +PartKey +PartKeyBinding +Part's +partsize +partsum +PartValue +passwd +passwds +patchver +paula +paulo +Paulo +pb +PBNYC +pbuf +pc +pcache +Pcache +PCH +pChanges +pcount +pct +pcursor +pcx +pd +pdata +pdb +pdbp +PDECL +pdf +pentry +penv +perf +perfdb +perftool +Perl +permlsn +perms +permutationPV +PersistToEntity +PersonStore +personValues +pExpr +PExpr +PF +pFile +pflag +pfx +pg +Pg +PG +pgaddr +pgcnt +pgcookie +pgdbp +pgdbt +PGDEF +pgerr +pget +pgfmt +pgfree +pggap +pghdrsz +pgheader +pgin +Pgin +pginfo +PGINFO +PgInit +pgip +PgIsset +pglist +pgmax +pgmin +pgno +Pgno +PGNO +pgnoadd +pgnoaddr +pgnop +pgnos +pgnum +pgout +pgp +pgread +pgrec +pgs +pgset +pgsize +PGSIZE +pgtxn +pgwrite +ph +pheap +Phong +php +PHP +physdel +physwrite +pid +Pid +PID +pids +piLimit +pindx +pinref +pinsert +pitem +pk +pkey +pkeyint +pkeyInteger +pkeys +pkey's +pkg +PKG +pl +placeholder +PlatformSDK +plist +PLIST +plsn +pm +pmap +pmem +PMerge +pn +pName +pNew +pNext +poff +pollLimit +POPENFILES +popfront +portmapper +pos +posix +Posix +POSIX +postdestroy +POSTDESTROY +postlog +POSTLOG +postlogmeta +POSTLOGMETA +postopen +POSTOPEN +postsync +POSTSYNC +pOutFlags +PowerPC +pp +ppc +PPC +ppgno +pq +pqr +pqrs +pqrst +pqueue +pr +PR +pragma +Pragma +PRAGMA +pragmas +PRAGMAS +prdb +prdbt +pre +Pre +pread +PREAD +prec +pred +predestroy +preopen +PREOPEN +PreparedTransaction +preparse +preplist +PREPLIST +preprocess +preprocessed +preread +prerelase +prereq +presorted +prev +Prev +PREV +prevdup +prevfile +previousIndex +previousVersionId +prevlsn +prevnodup +prevprm +prev's +prflags +prfooter +prheader +pri +PRI +priceDb +pridata +priKey +PriKey +PrimaryIndex +primaryKey +PrimaryKey +PrimaryKeyAssigner +primaryKeyData +primaryKeyFormat +primaryKeyInput +primaryKeyThang +primget +PrimitiveToObject +printf +PRINTF +PRINTFOOTER +PRINTHEADER +printlock +printlog +priorityp +prioritytest +prng +Prng +PRNGSEEED +prnpage +proactively +proc +Proc +PROC +proccess +procs +proff +prog +progname +progpath +PROT +protos +proxied +ProxiedClass +proxiedTypeClass +proxiedTypeId +proxyTypes +prpage +prqueue +prR +prtree +pseudorandom +psize +PSIZE +psplit +pSs +pSst +pstat +PSTAT +pstatus +pTab +ptail +pthread +Pthread +PTHREAD +pthreads +PTHREADS +ptint +ptition +ptr +Ptr +ptrdiff +pTrigger +pTriggerStack +PtrToInfo +ptype +pupdate +pushfront +pushpop +Pushpop +putall +putAll +putc +putchar +putitem +putobj +putop +putpageinfo +putr +pv +pvfunctions +PWD +pwrite +PWRITE +qam +QAM +qamc +QAMDATA +QAMMAGIC +qammeta +QAMMETA +QAMOLDVER +QAMVERSION +qmeta +QMETA +qmpf +qnx +QNX +qp +QPAGE +Qq +qr +qrs +qs +qsort +Qsort +QSORT +QSPACE +qtest +queueext +queuestart +QueueStats +QUICKBALANCE +quicksort +QUIESCED +quotedStr +QUOTESERVER +quV +qV +ra +raisetype +raison +ramc +rand +RandCommand +RandDbCursor +randomblob +randstr +randtbl +RangeExceeded +RangeKeyNotEqual +RangeNotFound +RawObject +RawStore +RawType +rb +RB +RBBASE +RBCOMMFAIL +RBDEADLOCK +rbegin +RBEND +RBINTEGRITY +RBOTHER +RBPROTO +RBROLLBACK +RBTIMEOUT +RBTRANSIENT +rbtree +rc +RCLOSE +rcon +rcount +rcrd +rcsv +rcuradj +rcursor +rcvd +rdata +rdbc +rdbenv +RDLCK +RDLOCK +rdonly +RDONLY +rdtsc +rdump +RDWRMASTER +reacquired +reacquires +readback +readBigInteger +readBoolean +readByte +readBytes +readChar +readChars +ReadCookie +readd +readdb +readdir +readDouble +readFloat +readInt +readkey +readKeyObject +readline +readlock +READLOCK +readLong +readn +readObject +readonly +ReadOnly +READONLY +readratio +readShort +readsocket +readSortedDouble +readSortedFloat +readString +readUnsignedByte +readUnsignedInt +readUnsignedShort +readv +READWRITE +RealAffinity +realloc +Realloc +REALLOC +realloc'd +realloc'ed +REALLOC'ed +reallocing +realPorts +realpri +rec +Rec +REC +recfill +recip +reclen +RECLEN +reclength +recno +Recno +RECNO +recnop +recnos +Recnos +RECNOSYNC +recnum +RECNUM +recnums +recognised +recops +RECORDCOUNT +recordlen +recordNumber +RecordNumberBinding +RecordNumberFormat +recordp +record's +recoveryFeedback +RecoveryOperation +RECOVERYTEST +recs +rectype +rectypes +Recurisvely +Recv +recvd +refact +refactor +refarg +refargs +refcnt +Refcnt +refcount +Refcount +refcounting +refcounts +reffed +reflectField +reflectMethod +ReflectPermission +refs +regcomp +regenv +REGENV +regid +regids +reginfo +REGINFO +regionmax +RegionResourceType +REGIONs +registerClass +RegisterExpr +registerPriKeyObject +reglocks +REGMAINT +regmax +regmutex +regmutexes +regop +regs +regsize +reindex +Reindex +REINDEX +reindexed +Reinit +rekey +relatedEntity +RELEASECONCH +relen +RELEN +RELIANTUNIX +relink +rem +REM +remainingfiles +remax +Remax +remcpif +remevent +remfile +remfirst +remif +remlock +removeall +removeAll +remrem +RenamedEntity +renamedField +RENAMEMAGIC +Renamer +rentry +renum +renv +reorg +RepConfig +RepConfigInfo +repcpif +REPCTL +repdb +RepElect +RepElectResult +repflag +REPFLAGS +RepFlush +RepGetConfig +repl +RepLease +ReplicationConfig +ReplicationDuplicateMasterException +ReplicationHandleDeadException +ReplicationHoldElectionException +ReplicationHostAddress +ReplicationJoinFailureException +ReplicationLeaseExpiredException +ReplicationLeaseTimeoutException +ReplicationLockoutException +ReplicationManagerAckPolicy +ReplicationManagerSiteInfo +replicationManagerStart +ReplicationManagerStartPolicy +ReplicationManagerStats +REPLICATIONMGR +replication's +ReplicationSiteUnavailableException +ReplicationStats +ReplicationStatus +ReplicationTimeoutType +replif +RepLimit +REPLOCKED +replpair +replyp +repmgr +Repmgr +RepMgr +REPMGR +RepmgrConfigTest +RepmgrElectionTest +repmgr's +Repmgr's +RepmgrStartupTest +RepMgrStat +repmgrtests +RepNoarchiveTimeout +repoman +reppg +RepProcessMessage +repquote +REPQUOTE +RepQuoteExample +RepQuoteExampleGSG +RepRequest +rep's +repsite +repstart +RepStart +REPSTART +RepStat +RepSync +reptest +REPTEST +repth +RepTransport +reput +reputpair +RepVersion +REPVERSION +req +Req +REQ +REQs +requried +reregister +rereq +rerequest +Rerequest +REREQUEST +rerequests +ResetCount +RESETFLAG +ResetResult +resizep +resolvepath +ResolvePath +resolvetype +ResultRow +resync +ret +retbuf +retcopy +retcount +rethrown +reties +retp +retPrimaryKey +retsp +ReturnSetup +retval +retValue +REUSEADDR +reverseconcat +REVERSECONCAT +reversedata +REVERSEDATA +reviter +revsplitoff +REVSPLITOFF +rezi +rf +rfd +rfp +rget +rheader +rhs +RHS +ri +Rieffel +rijmen +Rijmen +rijndael +Rijndael +RIJNDAEL +rijndaelDecrypt +rijndaelDecryptRound +rijndaelEncrypt +rijndaelEncryptRound +rijndaelKeySetupDec +rijndaelKeySetupEnc +RINTERNAL +ritem +ritr +riw +RIW +rk +rkey +rlen +rlimit +RLOCK +rlsn +rlsnp +rm +RM +rmdir +RMDIR +rmdir's +RMERR +RMFAIL +rmid +RMID +RMNAMESZ +RMs +rmw +RMW +rndshuf +ro +Roeber +roff +RollbackTransaction +rollforward +rootent +rootlsn +rootpage +rop +ROP +rotcopy +RowData +rowid +Rowid +ROWID +RowKey +rowset +RowSet +RowSetAdd +RowSetRead +RowSetTest +rp +Rp +rpath +rpc +RPC +RPCCLIENT +RpcDb +RpcDbc +RpcDbEnv +RpcDbTxn +RPCExample +rpcgen +RPCGEN +rpcid +rpcserver +rpflag +rprint +RPRINT +rp's +rptr +rq +rr +rrecno +rrecovery +rRs +rRV +rs +rsearch +RSHIFT +rskey +rsplit +rsT +RT +RtoL +rtree +RTREE +rtreedepth +rtreenode +RTRIM +RTTarget +rtxn +rtype +rundb +runlog +RUNLOG +RUNRECOVERY +RunRecoveryException +RunTest +RuntimeException +RuntimeExceptionWrapper +RuntimeInvisibleAnnotations +RuntimeInvisibleParameterAnnotations +RuntimeVisibleAnnotations +RuntimeVisibleParameterAnnotations +rusage +Rusage +RUSAGE +rV +rw +rwrw +rwrwrw +rwx +salloc +salvager's +SampleDatabase +sampleno +SampleViews +saskia +savekey +savepoint +Savepoint +SAVEPOINT +savetime +sbtree +sc +SC +scanf +sccsid +sce +sched +SCHED +Schlossnagle +sclp +scm +SCO +SCopy +SCO's +scount +scratchmalloc +scursor +sdb +sdbp +SDECL +SDK +SDups +secdata +seckey +secKey +SecKeys +secon +SecondaryDeadlockTest +SecondaryIndex +secondaryKey +SecondaryKey +secondaryKeyCreate +secondary's +secs +SECS +secsp +sectorsize +Sedgewick +SeekGe +SeekGt +SeekLe +SeekLt +seekMode +seg +segdata +SEGDATA +segdir +segid +SEGID +sei +sel +selcollist +SelectDelete +selectid +SELECTs +selftest +seltablist +sem +SEM +sema +SEMA +semid +seminfo +semun +SENDEVENT +sendpages +sendproc +sep +SEP +seq +Seq +SEQ +SeqGet +SeqGetFlags +seqno +seqNo +SeqNo +seqnum +SeqOpen +seqp +SequenceExample +SequenceStats +sequentialness +SerialBinding +SerialBindingTest +SerialFormat +SerialInput +serialobj +SerialOutput +SerialSerialBinding +SerialSerialKeyExtractor +SERVERPROG +SERVERVERS +servlet +sess +SETALL +setAppDispatch +setAppendRecno +setboundrange +SetBoundRange +setBtreeCompare +setBtreePrefix +setCacheSize +setcomp +SetComp +SetCookie +setcount +SetCount +SETCURSOR +setData +setdiff +setDuplicatelicateCompare +setemptyclear +SetEmptyClear +setEncrypted +SetEndOfFile +setErrorHandler +setErrorPrefix +SETFD +setFeedback +setFeedbackHandler +setField +SetFilePointer +SetFilePointerEx +setfind +SetFind +setflags +setFlags +setHash +setid +SetInfoData +setintr +setlist +SetListElem +SetListElemInt +SetListElemWideInt +SetListRecnoElem +SETLK +setLock +setlsn +setmax +SetMax +setMode +SetMultiList +setname +setObj +setObject +SetObjResult +setOffset +setOp +setPanicHandler +setPartialLength +setPartialOffset +setrbeginrend +SetRbeginRend +setRecno +setRecordNumber +setReplicationLimit +setReplicationTransport +setRepRequest +SET's +setsid +setsize +setSize +SetSize +setstacksize +setswapbeginend +SetSwapBeginEnd +setsymd +setText +settimeout +setTimeout +settitle +setto +settxn +setunon +setUp +setUserBufferLength +setval +SETVAL +setValue +sexing +sgenrand +SGI +sh +SHA +shalloc +SHALLOC +shalloced +shalloc'ed +sharedb +SHAREDCACHE +sharedValue +shareenv +SHAREMEM +shash +SHASH +shellstatic +shere +ShiftLeft +ShiftRight +ShipmentBinding +ShipmentByPart +ShipmentBySupplier +ShipmentData +ShipmentKey +ShipmentKeyBinding +ShipmentValue +shm +Shm +SHM +shmat +shmctl +shmdt +shmem +SHMEM +shmget +SHMGET +shmid +ShmLock +SHMLOCK +ShmMap +shmname +SHMOPEN +SHMSIZE +ShmSystemLock +ShmUnmap +shortread +shortValue +showHeader +shownull +shqueue +SHQUEUE +shr +shreg +shufflePV +si +SibDup +sig +SIGABRT +sigaction +SIGACTION +SIGALRM +SIGCHLD +siginit +signalled +signalling +Signalling +signo +SIGPIPE +sigresend +Simapp +Simpapp +SIMPAPP +SimpleBuffer +SimpleConfig +SimpleStoreGet +SimpleStorePut +SimpleTxn +SimpleType +simpleTypes +singleKey +singlethread +sinh +sitelist +sitestrict +sizeAdded +sizefix +sizehint +sizeNeeded +sizeof +Sizeof +SIZEOF +sizeq +sj +sk +skey +sKey +skeyfour +skeyone +skeys +skeyset +skeythree +skeytwo +SKIPFIRSTKEY +Skiping +skiplist +Skiplist +skiplists +skipput +Skodon +skodonj +SKU +sl +sle +sleepycat +Sleepycat +slh +slist +slumber'd +smallint +smap +smax +smbfs +SMerge +sms +SNaN +snapshotting +sniglet +snprintf +SNPRINTF +sockaddr +socklen +sockopt +socktype +Something's +sortdups +SortedDuplicates +SortedMap +SortedSet +sortitem +sortlist +sortorder +sortOrder +SORTPAGE +sortPV +SOUNDEX +SourceDebugExtension +SourceFile +sourcep +sp +SPACEONLY +spanp +sparc +Sparc +spawnl +spawnvp +spcified +spcnt +spgno +spinlock +spinlocks +spinsp +SPL +splitdata +splitmeta +SPLITOLD +splitp +Splitp +spoofPorts +sprintf +SPRINTF +sql +SQL +sqlite +SQLite +SQLITE +sqliteicu +sqliteInt +sqliteplocks +sqliterc +sqlthread +sqr +sqrtpow +srand +srandom +src +SRCHANDLE +srclen +SrcListAppendFromTerm +SrcListDelete +SrcListShiftJoinType +SRCNAME +sread +SRT +ss +sS +SS +sscanf +sse +sSELECT +ssen +ssenevi +ssenluf +ssensuo +sshift +ssize +SSLeay +sslll +ssn +sss +sstring +SSZ +stacksize +StackSize +stacktopempty +StackTopEmpty +STAILQ +standalone +Standalone +startn +startsWith +startsync +STARTSYNC +startTime +startupdone +STARTUPDONE +stat +Stat +STAT +stati +stats +Stats +STATS +statsOn +stbar +stblptn +STCWX +std +Std +STD +stdafx +STDC +stddev +stderr +STDERR +stdfd +stdin +stdint +STDINT +stdio's +stdlib +STDLIB +stdmode +stdout +Stdout +Steele +STK +stkgrow +stkrel +stl +STL +StlTpcbExample +stmax +stmt +STMT +stmts +STMTSTATUS +storeConversion +StoredClassCatalog +StoredClassCatalogTest +StoredClassCatalogTestInit +storedCollection +StoredCollection +StoredCollections +StoredContainer +StoredEntrySet +storedIter +storedIterator +StoredIterator +StoredKeySet +storedList +StoredList +storedMap +StoredMap +StoredMapEntry +storedSet +StoredSortedEntrySet +StoredSortedKeySet +storedSortedMap +StoredSortedMap +storedSortedSet +StoredSortedValueSet +StoredValueSet +StoreLoad +storeName +StoreStore +stp +stqe +stqh +str +STR +strace +strcasecmp +STRCASECMP +strcat +STRCAT +strchr +STRCHR +strcmp +strcoll +strcpy +strdup +STRDUP +strdup'ed +strerror +STRERROR +strftime +STRFTIME +StringBuffer +StringBuilder +stringdb +StringDbt +stringFormats +stringify +STRINGIZE +stringp +stringToBytes +strlen +STRLIST +strmap +strncasecmp +strncat +STRNCAT +strncmp +STRNCMP +strncpy +StrNICmp +STROFFSET +stronly +strrchr +STRRCHR +strsep +STRSEP +strtod +strtol +STRTOL +strToLower +strtoul +STRTOUL +strToUpper +struct +STRUCT +structs +structure's +sts +stu +stwcx +STWCX +stype +subcases +subclassed +subclause +subdata +subdatabase +Subdatabase +subdatabases +subdatabase's +Subdatabases +subdb +Subdb +SUBDB +subdbname +subdbpg +subdbs +subdistribution +subdistributions +subList +submap +subMap +subname +Subname +subpackages +subqueries +SUBQUERIES +subquery +Subquery +SUBQUERY +subSet +subshell +substr +subtransaction +subtransactions +SUCCEDED +sullivan +SunOS +superclasses +superlock +superTypeClass +superTypeId +SupplierBinding +SupplierByCity +SupplierData +SupplierKey +SupplierKeyBinding +SupplierValue +suppressAccessChecks +sv +sV +svc +sVv +sw +SWAPBYTES +swigCPtr +SWIG's +SWITCHes +swpb +SWR +symbian +Symbian +SYMBIAN +sync'd +syncDir +synced +sync'ed +syncs +sysattach +sysbuf +syscall +SYSCALL +sysconf +SYSCONF +sysdetach +syserr +SystemInfo +SYSTEMTIME +sz +szosfile +szRegion +szRgn +ta +tableent +TableLock +tablename +TABLENUM +tablesize +tailMap +tailq +TAILQ +tailSet +tailvar +TAKECONCH +Takuji +tanh +tas +TAS +taskLock +tbhp +tbl +tblname +tbuf +Tbuf +tc +tchar +TCHAR +tcl +Tcl +TCL +TCLDB +Tcl's +tclvar +tcons +tcp +td +tdata +tdkids +TDS +tearDown +Teardown +tempdb +TEMPDB +TempFolder +termid +terra +testAddr +TestAppendRecno +TestAssociate +TestCallback +testcase +TestChannel +TestClassCatalog +TestClosedDb +TestConstruct +testcopy +testctrl +testdata +TestDataBinding +testdb +TestDbtFlags +TestDbTuner +testdestdir +testdigits +TESTDIR +testdocopy +TestEntity +TestEntityBinding +TestEnv +TestEnvConfig +TestEnvMethod +testevolvedir +testfixture +TestGetSetMethods +TestKeyAssigner +TestKeyExistErrorReturn +TestKeyExtractor +TestKeyRange +TestLockVec +TestLogc +testname +testName +TestOpenEmpty +testprefix +TestQueue +TestReplication +TestRpcServer +TestSameDbt +TestSerial +TestSetThreadCount +TestSimpleAccess +TestSR +TestStat +TestStore +TESTTESTEST +TestThread +TestTruncate +TestTxn +TestUtil +TestUtils +testval +testvfs +TESTXADIR +TestXAServlet +tffsp +tfsp +thang +Thang +theVector +theVector's +theVendor +thies +Thies +thr +THR +threadedness +threadid +threadID +ThreadId +ThreadIdString +threadNumber +ThreadOne +thread's +threadsafe +THREADSAFE +threadtest +ThreadTwo +Threshhold +Throwable +thrp +thru +thstndrd +tid +TID +tids +tiebreaker +tiebreaking +timelsn +timeoutp +timerange +timespec +TIMESPEC +timespecp +timespecs +timestamp +TIMESTAMP +TIMESTAMPRIMARYDEFERREDISTINCTDROPFAILFROMFULLGLOB +TimeUnits +timeval +timout +timouts +tinyint +tion +TK +tkt +tlen +TLPUT +TLS +tm +TM +tmap +TMASYNC +tmax +TMENDRSCAN +TMER +TMERR +TMFAIL +TMJOIN +TMMIGRATE +TMMULTIPLE +TMNOFLAGGS +TMNOFLAGS +TMNOMIGRATE +TMNOWAIT +TMONEPHASE +tmp +Tmp +TMP +tmpath +tmpBackup +tmpdb +tmpdir +TMPDIR +tmpmap +tmpname +TMREGISTER +TMRESUME +TMs +TMSTARTRSCAN +TMSUCCESS +TMSUSPEND +TMUSEASYNC +tmutex +tne +tnem +tneme +tNULL +tnum +toArray +toBeRenamedField +ToBlob +toBuf +toched +todo +TODO +toHexString +toInclusive +toIndex +ToInt +tokenize +tokenizer +TOKENTYPE +toKey +toList +tolower +toMapEntry +ToNumeric +toobig +TOOBIG +TOOSMALL +Topher +TOPLEVEL +ToReal +Torek +toString +totalAlloc +totalExcess +ToText +toValue +tp +tpabort +tpacall +tpalloc +tpbegin +TPC +tpcb +TPCB +TpcbExample +tpcommit +tpinit +Tpinit +TPINIT +TPS +tpsvrdone +tpsvrinit +tpterm +tput +tqe +tqh +tr +TransactionRunner +TransactionStats +TransactionStatus +TransactionTest +TransactionTests +TransactionWorker +transapp +transport's +TRANSPROXY +transtype +treeorder +tregion +tridxby +TriggerDeleteStep +TriggerInsertStep +TriggerSelectStep +TriggerUpdateStep +trigHash +trinomials +trnm +Tru +TRU +trunc +TRUNC +truncdata +TRUNCDATA +tryable +ts +t's +TS +tsinghua +Tsinghua +tsize +tsl +tstart +TSTRING +tT +Tt +ttmax +ttpcbddlk +ttpcbi +ttpcbr +ttype +TupleBinding +TupleBindingTest +TupleFormat +TupleFormatTest +TupleInput +TupleInputBinding +TupleMarshalledBinding +TupleOrderingTest +TupleOutput +TupleSerialBinding +TupleSerialDbFactory +TupleSerialDbFactoryTest +TupleSerialEntityBinding +TupleSerialFactoryTest +TupleSerialKeyExtractor +TupleSerialMarshalledBinding +TupleSerialMarshalledKeyExtractor +TupleTupleBinding +TupleTupleKeyExtractor +TupleTupleMarshalledBinding +TupleTupleMarshalledKeyExtractor +tuv +tv +tV +tvoid +tVxX +tVZ +tx +tX +TX +txinfo +txn +Txn +TXN +txnaborts +txnal +txnapp +TXNAPP +txnarray +TxnCheckpoint +TxnGuide +TxnGuideDPL +TxnGuideInMemory +TxnGuideStl +TXNHEAD +txnid +Txnid +TXNID +txnidcl +txnids +txninfo +TxnInfoDelete +txnip +txnlist +TXNLIST +TXNLOGREC +TXNMGR +txnnosync +txnp +txnpg +TxnPriorityTest +TxnRecover +TXNREGION +txns +Txns +TXNs +TXNS +TxnStat +txntimeout +TxnTimeout +TXNVERSION +txnwait +txt +typeClass +typeId +typemore +typename +Typename +typetoken +ua +ubell +uber +ucol +ucompos +ud +udbt +ufid +ufree +ufs +UI +uid +UID +uint +UINT +uintarg +uintmax +uintptr +uK +ul +ulAction +ulen +ulens +ulFlags +ulinks +ulMode +ulong +ULONG +umalloc +umask +UMINUS +UMRW +un +uname +UNAVAIL +uncorrect +undef +UNDEF +undeletable +undeleting +UNDOC +undodup +undo'ing +undosplit +Unencrypted +unenhanced +unfinalised +Unhandled +uni +unicode +Unicode +UNICODE +unindexed +uninstall +uniq +uniqued +uniqueflag +UNIQUERYATTACHAVINGROUPDATEMPORARYBEGINNEREINDEXCLUSIVEXISTSBETWEEN +unistd +UNISTD +unix +unixepoch +UnixLib +Unixware +UnixWare +UnknownError +UNLCK +UnlockFile +UnlockFileEx +unmap +unmapfile +UnmapViewOfFile +unmark +unmarshal +unmarshalData +Unmarshall +unmarshalled +unpinned +unpinning +unprintables +UNPv +unreadlock +unref +UNREF +unregistry +UnsupportedOperationException +UOC +upd +updateckp +UpdateConflictException +updateDatabaseEntry +updateDbt +UPDATEROOT +upg +upgradeFeedback +upi +uplevel +UPLUS +UPREFIX +urandom +urealloc +uregex +usabled +UseBaseclass +usec +USEC +useconds +usecs +usecsp +useCurrentKey +usePrimaryKey +usercopy +USERCOPY +userfree +usermem +USERMEM +USERPROFILE +UseSubclass +useValue +usr +usrAppInit +utc +utf +UTF +UTFDataFormatException +UtfOps +UtfTest +util +Util +UTS +uVv +UVW +UX +va +vac +val +valarray +valn +valueBinding +valueData +valueEntityBinding +valueFormat +valueInput +valueInputOutput +valueOf +valueOutput +value's +var +Var +Varargs +varbin +varchar +variadic +varint +varints +VARNAME +vars +VBegin +vbuf +vc +Vc +VC +VColumn +VCreate +vdbe +Vdbe +VDBE +vdbeaux +VDestroy +vDIRSU +vdp +vdp's +vec +vendordb +vendorDB +VendorDB +ver +VER +verbage +VerboseConfig +verboseconfigtest +VerifyCookie +verifyFeedback +vers +versioned +versioning +VersionMismatchException +VFilter +vflag +vfprintf +vfs +VFS +vfslog +VFSNAME +vfstrace +vget +vica +VIEWINITIALLY +view's +VIRTUALTABLE +vk +VM +VMPAGESIZE +VNext +Vo +VOpen +Voter's +VOTEs +VRename +vrfy +VRFY +vrfyutil +vsnprintf +VSNPRINTF +vsprintf +vtab +vtabarg +VtabArgExtend +VtabArgInit +vtabarglist +vtabargtoken +VtabBeginParse +VtabFinishParse +vtable +VTABLE +VTALLY +vtruncate +VUpdate +vV +Vv +VVA +vvp +Vvs +VVV +Vvw +VvW +vVxXyZ +vVxXZ +VvXxZ +vw +vwx +vx +Vx +VX +vxmutex +vxtmp +vxtpcb +vxworks +VxWorks +VXWORKS +vz +vZ +waitl +waitlist +waitpid +waitsfor +Waitsfor +WAITSFOR +waittime +wakeme +WAKEME +wakep +wakeups +wal +WAL +walkcnt +walkdupint +walkpages +walkqueue +walthread +WATCOM +wb +wc +wce +wchar +wcount +wday +wDay +webdav +weblogic +WebLogic +weblogic's +webquill +WHERENAMEAFTEREPLACEANDEFAULTAUTOINCREMENTCASTCOLUMNCOMMITCONFLICT +wholenumber +wHour +wideInt +wiki +Wikieup +wildcard +WILDCARD +wildcards +WinCE +windsh +winnt +WinNT +winuser +WinXP +WithConverter +WithDeleter +WithRenamer +WLInitialContextFactory +wmask +wMinute +wMonth +wnt +Woloschuk +wordlist +WORDLIST +workcurs +WOULDROLLBACK +WR +writeable +WRITEABLE +writeAllowed +writeback +writeBigInteger +writeBoolean +writeByte +writeBytes +writeChar +writeChars +writeCursor +WRITECURSOR +writedb +writeDouble +WriteFile +writeFloat +writeInt +writej +writeKeyObject +writelock +WRITELOCK +writelocks +writeLong +writeObject +WRITEOPEN +writeShort +writeSortedDouble +writeSortedFloat +writeString +writeUnsignedByte +writeUnsignedInt +writeUnsignedShort +writev +WRLCK +wrlock +wrnosync +WRNOSYNC +WRONLY +wsa +WSA +WSACleanup +WSAStartup +WSAWaitForMultipleEvents +wsd +WSD +wsdAutoext +wsdHooks +wsdPrng +wsdStat +wSecond +wsFlags +wsize +wt +WT +wthread +WW +wWeekDay +WWRITE +www +wxy +wxyz +wYear +xa +XA +xAccess +xact +xadd +XAER +XAException +xalinks +XAResource +Xbcdef +xBegin +xBestIndex +xCheckReservedLock +xCheckResLock +xchg +xchgb +xClose +xColumn +xCommit +xConnect +xCreate +xCurrentTime +xDelete +xDestroy +xDeviceChar +xDeviceCharacteristics +xDisconnect +xDlError +xDlOpen +xDlSym +xdr +XFER +xferopt +xFFFF +xFileControl +xFilesize +xFileSize +xFilter +xFullPathname +x'hhhhhh +xid +Xid +XID +XIDDATASIZE +xids +xLock +XMIT +xml +xNext +xOpen +xor +XOR'd +xori +xorl +XP +XPG +xRandomness +xRead +xRename +xRollback +xRowid +X's +xSectorSize +xShmBarrier +xShmLock +xShmMap +xShmOpen +xShmUnmap +xSleep +xSync +xTruncate +xunlock +xUnlock +xUpdate +xWrite +xxx +XXX +xxxx +xxxxx +XxZ +xy +xyz +yieldcpu +YIELDCPU +YIFINTOFFSETISNULLORDERESTRICTOUTERIGHTROLLBACKROWUNIONUSINGVACUUM +Yongmin +yy +YY +YYACTIONTYPE +YYCODETYPE +YYERRORSYMBOL +YYERRSYMDT +YYFALLBACK +yygotominor +yyinit +yymajor +YYMINORTYPE +YYMMDDhhmm +yymsp +YYNOCODE +YYNRULE +YYNSTATE +yypminor +yypParser +yyruleno +YYSTACKDEPTH +yytestcase +yytrackmaxstackdepth +YYWILDCARD +yyy +yza +zAND +zArg +zCrashFile +ZED +zend +zeroblob +zero'd +Zero'd +zeroeth +zerofill +ZF +zFilename +zFromCol +zi +zIndex +zipcode +zipCode +zl +zName +zNEAR +zNOT +zOpt +zOR +zOutput +zParent +zPHRASE +zSp +zSql +zText +zToCol +zToken +zulu +zValues diff --git a/dist/validate/s_chk_srcfiles b/dist/validate/s_chk_srcfiles new file mode 100644 index 00000000..7ccca64e --- /dev/null +++ b/dist/validate/s_chk_srcfiles @@ -0,0 +1,52 @@ +#!/bin/sh - +# +# $Id$ +# +# Check to make sure we haven't forgotten to add any files to the list +# of source files Windows uses to build its dsp files. +# +# ALEXG: This is no longer a reasonable test, Windows uses a different +# mechanism to include files now, and the platforms still using srcfiles.in +# don't support all of the source code. Not sure if there is a reasonable +# alternative check here? + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +f=$d/dist/srcfiles.in +t1=__1 +t2=__2 + +sed -e '/^[ #]/d' \ + -e '/^examples\/c\/csv\/csv_local.c/d' \ + -e '/^$/d' < $f | + awk '{print $1}' > $t1 +find $d -type f | + sed -e 's/^\.\.\/\.\.\/\.\.\///' \ + -e '/^build[^_]/d' \ + -e '/^dist\//d' \ + -e '/^lang\/java\/libdb_java\/java_stat_auto.c/d' \ + -e '/^lang\/perl\//d' \ + -e '/^lang\/php_db4\//d' \ + -e '/^test\//d' \ + -e '/^test\/erlang/d' \ + -e '/^test\/repmgr/d' \ + -e '/^test\/stl/d' \ + -e '/^test\/server/d' \ + -e '/^test_thread/d' \ + -e '/^test\/vxworks/d' | + egrep '\.c$|\.cpp$|\.def$|\.rc$' | + sort > $t2 + +cmp $t1 $t2 > /dev/null || { + echo "<<< srcfiles.in >>> existing files" + diff $t1 $t2 + exit 1 +} + +rm -f $t1 $t2 + +exit 0 diff --git a/dist/validate/s_chk_stats b/dist/validate/s_chk_stats new file mode 100644 index 00000000..d7c5c5f3 --- /dev/null +++ b/dist/validate/s_chk_stats @@ -0,0 +1,152 @@ +#!/bin/sh - +# +# $Id$ +# +# Check to make sure all of the stat structure members are included in +# all of the possible formats. + +# Top-level directory. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +docs=$d/docs/api_reference +s=$d/src +l=$d/lang + +# Docs aren't in the source tree anymore, so we can't check the completeness. +check_docs="no" +exitv=0 +t1=__tmp + +# Extract the field names for a structure from the db.h file. +inc_fields() +{ + sed -e "/struct $1 {/,/^};$/p" \ + -e d < $s/dbinc/db.in | + sed -e 1d \ + -e '$d' \ + -e '/;/!d' \ + -e 's/;.*//' \ + -e 's/^[ ].*[ \*]//' +} + +cat << END_OF_IGNORE > IGNORE +bt_maxkey +bt_metaflags +hash_metaflags +qs_metaflags +qs_ndata +st_hash_max_nowait +st_filefail_cleanups +END_OF_IGNORE + +# Check to make sure the elements of a structure from db.h appear in +# the other files. +inc() +{ + for i in `inc_fields $1`; do + if egrep -w $i IGNORE > /dev/null; then + echo " $1: ignoring $i" + continue + fi + for j in $2; do + # Docs aren't in the tree any more, skip the checks. + if [ ! -e $j ]; then + ext=`echo $j | awk -F . '{print $NF}'` + if [ "$ext" != "html" \ + -o "$check_docs" = "yes" ]; then + echo "$j file not found, skipping." + fi + continue + fi + if egrep -w $i $j > /dev/null; then + :; + else + echo " $1: $i not found in $j." + exitv=1 + fi + done + done +} + +inc "__db_bt_stat" "$l/tcl/tcl_db.c $s/btree/bt_stat.c $docs/C/dbstat.html" +inc "__db_h_stat" "$l/tcl/tcl_db.c $s/hash/hash_stat.c $docs/C/dbstat.html" +inc __db_lock_stat \ + "$l/tcl/tcl_lock.c $s/lock/lock_stat.c $docs/C/lockstat.html" +inc __db_log_stat "$l/tcl/tcl_log.c $s/log/log_stat.c $docs/C/logstat.html" +inc __db_mpool_fstat \ + "$l/tcl/tcl_mp.c $s/mp/mp_stat.c $docs/C/mempstat.html" +inc __db_mpool_stat \ + "$l/tcl/tcl_mp.c $s/mp/mp_stat.c $docs/C/mempstat.html" +inc __db_mutex_stat \ + "$s/mutex/mut_stat.c $docs/C/mutexstat.html" +inc "__db_qam_stat" \ + "$l/tcl/tcl_db.c $s/qam/qam_stat.c $docs/C/dbstat.html" +inc __db_rep_stat \ + "$l/tcl/tcl_rep.c $s/rep/rep_stat.c $docs/C/repstat.html" +inc __db_seq_stat \ + "$l/tcl/tcl_seq.c $s/sequence/seq_stat.c $docs/C/seqstat.html" +inc __db_txn_stat \ + "$l/tcl/tcl_txn.c $s/txn/txn_stat.c $docs/C/txnstat.html" + +# Check to make sure the elements from a man page appears in db.in. +man() +{ + for i in `cat $t`; do + if egrep -w $i IGNORE > /dev/null; then + echo " $1: ignoring $i" + continue + fi + if egrep -w $i $s/dbinc/db.in > /dev/null; then + :; + else + echo " $1: $i not found in db.h." + exitv=1 + fi + done +} + +if [ "$check_docs" = "yes" ]; then + sed -e '/m4_field(/!d' \ + -e 's/.*m4_field[^,]*,[ ]*\([^,]*\).*/\1/' < \ + $docs/C/dbstat.html > $t + man "checking db_stat.so against db.h" + + sed -e '/m4_field(/!d' \ + -e 's/.*m4_field[^,]*,[ ]*\([^,]*\).*/\1/' < \ + $docs/C/lockstat.html > $t + man "checking lock_stat.so against db.h" + + sed -e '/m4_field(/!d' \ + -e 's/.*m4_field[^,]*,[ ]*\([^,]*\).*/\1/' < \ + $docs/C/logstat.html > $t + man "checking log_stat.so against db.h" + + sed -e '/m4_field(/!d' \ + -e 's/.*m4_field[^,]*,[ ]*\([^,]*\).*/\1/' < \ + $docs/C/mempstat.html > $t + man "checking memp_stat.so against db.h" + + sed -e '/m4_field(/!d' \ + -e 's/.*m4_field[^,]*,[ ]*\([^,]*\).*/\1/' < \ + $docs/C/repstat.html > $t + man "checking rep_stat.so against db.h" + + sed -e '/m4_field(/!d' \ + -e 's/.*m4_field[^,]*,[ ]*\([^,]*\).*/\1/' < \ + $docs/C/seqstat.html > $t + man "checking seq_stat.so against db.h" + + sed -e '/m4_field(/!d' \ + -e 's/.*m4_field[^,]*,[ ]*\([^,]*\).*/\1/' \ + -e 's/__LB__.*//' < $docs/C/txnstat.html > $t +man "checking txn_stat.so against db.h" +fi + +rm -f IGNORE + +exit $exitv diff --git a/dist/validate/s_chk_tags b/dist/validate/s_chk_tags new file mode 100644 index 00000000..98f0e2ec --- /dev/null +++ b/dist/validate/s_chk_tags @@ -0,0 +1,50 @@ +#!/bin/sh - +# +# $Id$ +# +# Check to make sure we don't need any more symbolic links to tags files. + +d=../.. +[ -f $d/LICENSE ] || { + echo 'FAIL: cannot find source distribution directory.' + exit 1 +} + +# We are dropping the tags file for 4.8 - test is invalid +# for now exit out +echo "<<< test is no longer valid >>>" +exit 0 + + +t1=__1 +t2=__2 + +(cd $d && ls -F | egrep / | sort | + sed -e 's/\///' \ + -e '/^CVS$/d' \ + -e '/^build_vxworks$/d' \ + -e '/^build_wince$/d' \ + -e '/^build_windows$/d' \ + -e '/^docs$/d' \ + -e '/^docs_book$/d' \ + -e '/^docs_src$/d' \ + -e '/^examples_java$/d' \ + -e '/^java$/d' \ + -e '/^mod_db4$/d' \ + -e '/^perl$/d' \ + -e '/^php_db4$/d' \ + -e '/^test$/d' \ + -e '/^test_cxx$/d' \ + -e '/^test_micro$/d' \ + -e '/^test_purify$/d' \ + -e '/^test_thread$/d' \ + -e '/^test_vxworks$/d') > $t1 + +(cd $d && ls */tags | sed 's/\/tags$//' | sort) > $t2 +if diff $t1 $t2 > /dev/null; then + exit 0 +else + echo "<<< source tree >>> tags files" + diff $t1 $t2 + exit 1 +fi diff --git a/examples/c/README b/examples/c/README new file mode 100644 index 00000000..407c32cb --- /dev/null +++ b/examples/c/README @@ -0,0 +1,40 @@ +# $Id$ + +getting_started/ + Examples from the Getting Started Guide + +ex_access.c Using just the DB access methods. + +ex_apprec/ Application-specific recovery. + +ex_btrec.c Using the BTREE access method with record numbers. + +ex_bulk.c Bulk interfaces. + +ex_dbclient.c Using DB from an RPC client. + +ex_env.c Setting up the DB environment. + +ex_lock.c Locking. + +ex_mpool.c Shared memory buffer pools. + +ex_rep/ Replication. This creates a toy stock quote server + with DB's single-master, multiple-client replication, + with communication over TCP. See ex_rep/README. + +ex_sequence.c Sequences. + +ex_thread.c Threaded application with multiple readers and writers. + +ex_tpcb.c TPC/B. + Ex_tpcb sets up a framework in which to run a TPC/B test. + Database initialization (the -i flag) and running the + benchmark (-n flag) must take place separately (i.e., + first create the database, then run 1 or more copies of + the benchmark). Furthermore, when running more than one + TPCB process, it is necessary to run the deadlock detector + (db_deadlock), since it is possible for concurrent tpcb + processes to deadlock. For performance measurement, it + will also be beneficial to run the db_checkpoint process + as well. diff --git a/examples/c/csv/DbRecord.c b/examples/c/csv/DbRecord.c new file mode 100644 index 00000000..cbfbd5e9 --- /dev/null +++ b/examples/c/csv/DbRecord.c @@ -0,0 +1,470 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "csv.h" +#include "csv_local.h" +#include "csv_extern.h" + +static int DbRecord_field(DbRecord *, u_int, void *, datatype); +static int DbRecord_search_field(DbField *, char *, OPERATOR); +static int DbRecord_search_recno(char *, OPERATOR); + +/* + * DbRecord_print -- + * Display a DbRecord structure. + */ +void +DbRecord_print(DbRecord *recordp, FILE *fp) +{ + DbField *f; + void *faddr; + + if (fp == NULL) + fp = stdout; + + fprintf(fp, "Record: %lu:\n", (u_long)recordp->recno); + for (f = fieldlist; f->name != NULL; ++f) { + faddr = (u_int8_t *)recordp + f->offset; + fprintf(fp, "\t%s: ", f->name); + switch (f->type) { + case NOTSET: + /* NOTREACHED */ + abort(); + break; + case DOUBLE: + fprintf(fp, "%f\n", *(double *)faddr); + break; + case STRING: + fprintf(fp, "%s\n", *(char **)faddr); + break; + case UNSIGNED_LONG: + fprintf(fp, "%lu\n", *(u_long *)faddr); + break; + } + } +} + +/* + * DbRecord_read -- + * Read a specific record from the database. + */ +int +DbRecord_read(u_long recno_ulong, DbRecord *recordp) +{ + DBT key, data; + u_int32_t recno; + int ret; + + /* + * XXX + * This code assumes a record number (typed as u_int32_t) is the same + * size as an unsigned long, and there's no reason to believe that. + */ + recno = recno_ulong; + + /* + * Retrieve the requested record from the primary database. Have + * Berkeley DB allocate memory for us, keeps the DB handle thread + * safe. + * + * We have the Berkeley DB library allocate memory for the record, + * which we own and must eventually free. The reason is so we can + * have the string fields in the structure point into the actual + * record, rather than allocating structure local memory to hold them + * and copying them out of the record. + */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.data = &recno; + key.size = sizeof(recno); + data.flags = DB_DBT_MALLOC; + if ((ret = db->get(db, NULL, &key, &data, 0)) != 0) + return (ret); + + if ((ret = DbRecord_init(&key, &data, recordp)) != 0) + return (ret); + + return (0); +} + +/* + * DbRecord_discard -- + * Discard a DbRecord structure. + */ +int +DbRecord_discard(DbRecord *recordp) +{ + /* Free the allocated memory. */ + free(recordp->raw); + recordp->raw = NULL; + + return (0); +} + +/* + * DbRecord_init -- + * Fill in a DbRecord from the database key/data pair. + */ +int +DbRecord_init(const DBT *key, const DBT *data, DbRecord *recordp) +{ + DbField *f; + u_int32_t skip; + void *faddr; + + /* Initialize the structure (get the pre-set index values). */ + *recordp = DbRecord_base; + + /* Fill in the ID and version. */ + memcpy(&recordp->recno, key->data, sizeof(u_int32_t)); + memcpy(&recordp->version, + (u_int32_t *)data->data + 1, sizeof(u_int32_t)); + + /* Set up the record references. */ + recordp->raw = data->data; + recordp->offset = (u_int32_t *)data->data + 1; + skip = (recordp->field_count + 2) * sizeof(u_int32_t); + recordp->record = (u_int8_t *)data->data + skip; + recordp->record_len = data->size - skip; + + for (f = fieldlist; f->name != NULL; ++f) { + faddr = (u_int8_t *)recordp + f->offset; + if (DbRecord_field( + recordp, f->fieldno, faddr, f->type) != 0) + return (1); + } + return (0); +} + +/* + * DbRecord_field -- + * Fill in an individual field of the DbRecord. + */ +static int +DbRecord_field( + DbRecord *recordp, u_int field, void *addr, datatype type) +{ + size_t len; + char number_buf[20]; + + /* + * The offset table is 0-based, the field numbers are 1-based. + * Correct. + */ + --field; + + switch (type) { + case NOTSET: + /* NOTREACHED */ + abort(); + break; + case STRING: + *((u_char **)addr) = recordp->record + recordp->offset[field]; + recordp->record[recordp->offset[field] + + OFFSET_LEN(recordp->offset, field)] = '\0'; + break; + case DOUBLE: + case UNSIGNED_LONG: + /* This shouldn't be possible -- 2^32 is only 10 digits. */ + len = OFFSET_LEN(recordp->offset, field); + if (len > sizeof(number_buf) - 1) { + dbenv->errx(dbenv, + "record %lu field %lu: numeric field is %lu bytes and too large to copy", + recordp->recno, field, (u_long)len); + return (1); + } + memcpy(number_buf, + recordp->record + recordp->offset[field], len); + number_buf[len] = '\0'; + + if (type == DOUBLE) { + if (len == 0) + *(double *)addr = 0; + else if (strtod_err(number_buf, (double *)addr) != 0) + goto fmt_err; + } else + if (len == 0) + *(u_long *)addr = 0; + else if (strtoul_err(number_buf, (u_long *)addr) != 0) { +fmt_err: dbenv->errx(dbenv, + "record %lu: numeric field %u error: %s", + recordp->recno, field, number_buf); + return (1); + } + break; + } + return (0); +} + +/* + * DbRecord_search_field_name -- + * Search, looking for a record by field name. + */ +int +DbRecord_search_field_name(char *field, char *value, OPERATOR op) +{ + DbField *f; + + for (f = fieldlist; f->name != NULL; ++f) + if (strcasecmp(field, f->name) == 0) + return (DbRecord_search_field(f, value, op)); + + /* Record numbers aren't handled as fields. */ + if (strcasecmp(field, "id") == 0) + return (DbRecord_search_recno(value, op)); + + dbenv->errx(dbenv, "unknown field name: %s", field); + return (1); +} + +/* + * DbRecord_search_field_number -- + * Search, looking for a record by field number. + */ +int +DbRecord_search_field_number(u_int32_t fieldno, char *value, OPERATOR op) +{ + DbField *f; + + for (f = fieldlist; f->name != NULL; ++f) + if (fieldno == f->fieldno) + return (DbRecord_search_field(f, value, op)); + + dbenv->errx(dbenv, "field number %lu not configured", (u_long)fieldno); + return (1); +} + +/* + * DbRecord_search_recno -- + * Search, looking for a record by record number. + */ +static int +DbRecord_search_recno(char *value, OPERATOR op) +{ + DBC *dbc; + DbRecord record; + DBT key, data; + u_int32_t recno; + u_long recno_ulong; + int ret; + + /* + * XXX + * This code assumes a record number (typed as u_int32_t) is the same + * size as an unsigned long, and there's no reason to believe that. + */ + if (strtoul_err(value, &recno_ulong) != 0) + return (1); + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.data = &recno; + key.size = sizeof(recno); + + if ((ret = db->cursor(db, NULL, &dbc, 0)) != 0) + return (ret); + + /* + * Retrieve the first record that interests us. The range depends on + * the operator: + * + * ~ error + * != beginning to end + * < beginning to first match + * <= beginning to last match + * = first match to last match + * > record after last match to end + * >= first match to end + */ + if (op == LT || op == LTEQ || op == NEQ || op == WC || op == NWC) + recno = 1; + else if (op == WC || op == NWC) { + dbenv->errx(dbenv, + "wildcard operator only supported for string fields"); + return (1); + } else { + recno = recno_ulong; + if (op == GT) + ++recno; + } + if ((ret = dbc->c_get(dbc, &key, &data, DB_SET)) != 0) + goto err; + + for (;;) { + if ((ret = DbRecord_init(&key, &data, &record)) != 0) + break; + if (field_cmp_ulong(&record.recno, &recno_ulong, op)) + DbRecord_print(&record, NULL); + else + if (op == LT || op == LTEQ || op == EQ) + break; + if ((ret = dbc->c_get(dbc, &key, &data, DB_NEXT)) != 0) + break; + } + +err: return (ret == DB_NOTFOUND ? 0 : ret); +} + +/* + * DbRecord_search_field -- + * Search, looking for a record by field. + */ +static int +DbRecord_search_field(DbField *f, char *value, OPERATOR op) +{ +#ifdef HAVE_WILDCARD_SUPPORT + regex_t preq; +#endif + DBC *dbc; + DbRecord record; + DBT key, data, pkey; + double value_double; + u_long value_ulong; + u_int32_t cursor_flags; + int ret, t_ret; + int (*cmp)(void *, void *, OPERATOR); + void *faddr, *valuep; + + dbc = NULL; + memset(&key, 0, sizeof(key)); + memset(&pkey, 0, sizeof(pkey)); + memset(&data, 0, sizeof(data)); + + /* + * Initialize the comparison function, crack the value. Wild cards + * are always strings, otherwise we follow the field type. + */ + if (op == WC || op == NWC) { +#ifdef HAVE_WILDCARD_SUPPORT + if (f->type != STRING) { + dbenv->errx(dbenv, + "wildcard operator only supported for string fields"); + return (1); + } + if (regcomp(&preq, value, 0) != 0) { + dbenv->errx(dbenv, "regcomp of pattern failed"); + return (1); + } + valuep = &preq; + cmp = field_cmp_re; +#else + dbenv->errx(dbenv, + "wildcard operators not supported in this build"); + return (1); +#endif + } else + switch (f->type) { + case DOUBLE: + if (strtod_err(value, &value_double) != 0) + return (1); + cmp = field_cmp_double; + valuep = &value_double; + key.size = sizeof(double); + break; + case STRING: + valuep = value; + cmp = field_cmp_string; + key.size = (u_int32_t)strlen(value); + break; + case UNSIGNED_LONG: + if (strtoul_err(value, &value_ulong) != 0) + return (1); + cmp = field_cmp_ulong; + valuep = &value_ulong; + key.size = sizeof(u_long); + break; + default: + case NOTSET: + abort(); + /* NOTREACHED */ + } + + /* + * Retrieve the first record that interests us. The range depends on + * the operator: + * + * ~ beginning to end + * != beginning to end + * < beginning to first match + * <= beginning to last match + * = first match to last match + * > record after last match to end + * >= first match to end + * + * If we have a secondary, set a cursor in the secondary, else set the + * cursor to the beginning of the primary. + * + * XXX + * If the wildcard string has a leading non-magic character we should + * be able to do a range search instead of a full-database search. + * + * Step through records to the first non-match or to the end of the + * database, depending on the operation. If the comparison function + * returns success for a key/data pair, print the pair. + */ + if (f->secondary == NULL || op == NEQ || op == WC || op == NWC) { + if ((ret = db->cursor(db, NULL, &dbc, 0)) != 0) + goto err; + while ((ret = dbc->c_get(dbc, &key, &data, DB_NEXT)) == 0) { + if ((ret = DbRecord_init(&key, &data, &record)) != 0) + break; + faddr = (u_int8_t *)&record + f->offset; + if (cmp(faddr, valuep, op)) + DbRecord_print(&record, NULL); + else + if (op == EQ || op == LT || op == LTEQ) + break; + } + } else { + if ((ret = + f->secondary->cursor(f->secondary, NULL, &dbc, 0)) != 0) + goto err; + key.data = valuep; + cursor_flags = op == LT || op == LTEQ ? DB_FIRST : DB_SET_RANGE; + if ((ret = + dbc->c_pget(dbc, &key, &pkey, &data, cursor_flags)) != 0) + goto done; + if (op == GT) { + while ((ret = dbc->c_pget( + dbc, &key, &pkey, &data, DB_NEXT)) == 0) { + if ((ret = + DbRecord_init(&pkey, &data, &record)) != 0) + break; + faddr = (u_int8_t *)&record + f->offset; + if (cmp(faddr, valuep, op) != 0) + break; + } + if (ret != 0) + goto done; + } + do { + if ((ret = DbRecord_init(&pkey, &data, &record)) != 0) + break; + faddr = (u_int8_t *)&record + f->offset; + if (cmp(faddr, valuep, op)) + DbRecord_print(&record, NULL); + else + if (op == EQ || op == LT || op == LTEQ) + break; + } while ((ret = + dbc->c_pget(dbc, &key, &pkey, &data, DB_NEXT)) == 0); + } + +done: if (ret == DB_NOTFOUND) + ret = 0; + +err: if (dbc != NULL && (t_ret = dbc->c_close(dbc)) != 0 && ret == 0) + ret = t_ret; + +#ifdef HAVE_WILDCARD_SUPPORT + if (op == WC || op == NWC) + regfree(&preq); +#endif + + return (ret); +} diff --git a/examples/c/csv/Makefile b/examples/c/csv/Makefile new file mode 100644 index 00000000..9d46cc7e --- /dev/null +++ b/examples/c/csv/Makefile @@ -0,0 +1,75 @@ +# $Id$ + +# Berkeley DB installation. +DB_INCLUDE=../../build_unix +LIBS= -L../../build_unix -L../../build_unix/.libs/ -ldb + +INC= -I. -I$(DB_INCLUDE) +CFLAGS= $(INC) -g -W -Wall -Wpointer-arith -Wmissing-prototypes + +PROGS= csv_code csv_load csv_query +SRCS= DbRecord.c code.c csv_local.c db.c load.c load_main.c query.c \ + query_main.c util.c + +all: csv_load csv_query + +csv_code: code.o + $(CC) -o $@ $? $(LIBS) + +LOAD_OBJS=DbRecord.o csv_local.o db.o load.o load_main.o util.o +csv_load: $(LOAD_OBJS) + $(CC) -o $@ $(LOAD_OBJS) $(LIBS) + +QUERY_OBJS=DbRecord.o csv_local.o db.o query.o query_main.o util.o +csv_query: $(QUERY_OBJS) + $(CC) -o $@ $(QUERY_OBJS) $(LIBS) + +clean distclean realclean: + rm -rf $(PROGS) TESTDIR eBay tags *.o *.core csv_local.[ch] + +tags: + rm -f tags + ctags $(SRCS) code.c + +DbRecord.o csv_local.o db.o load.o load_main.o query.o: csv_local.h csv.h +query_main.o util.o: csv_local.h csv.h + +csv_local.c csv_local.h: csv_code + ./csv_code -c csv_local.c -h csv_local.h -f sample.desc + +lint_code: + flexelint +fll \ + "-e801" \ + "-e818" \ + "-esym(534,fprintf)" \ + "-esym(534,memcpy)" \ + "-esym(534,memmove)" \ + "-esym(534,memset)" \ + "-esym(534,printf)" \ + "-wlib(1)" \ + -i$(DB_INCLUDE) "-i/usr/include" \ + code.c +lint_load: + flexelint +fll \ + "-e801" \ + "-e818" \ + "-esym(534,fprintf)" \ + "-esym(534,memcpy)" \ + "-esym(534,memmove)" \ + "-esym(534,memset)" \ + "-esym(534,printf)" \ + "-wlib(1)" \ + -i$(DB_INCLUDE) "-i/usr/include" \ + DbRecord.c csv_local.c db.c load.c load_main.c util.c +lint_query: + flexelint +fll \ + "-e801" \ + "-e818" \ + "-esym(534,fprintf)" \ + "-esym(534,memcpy)" \ + "-esym(534,memmove)" \ + "-esym(534,memset)" \ + "-esym(534,printf)" \ + "-wlib(1)" \ + -i$(DB_INCLUDE) "-i/usr/include" \ + DbRecord.c csv_local.c db.c query.c query_main.c util.c diff --git a/examples/c/csv/README b/examples/c/csv/README new file mode 100644 index 00000000..00e9e1f7 --- /dev/null +++ b/examples/c/csv/README @@ -0,0 +1,408 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2010 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +The "comma-separated value" (csv) directory is a suite of three programs: + + csv_code: write "helper" code on which to build applications, + csv_load: import csv files into a Berkeley DB database, + csv_query: query databases created by csv_load. + +The goal is to allow programmers to easily build applications for using +csv databases. + +You can build the three programs, and run a sample application in this +directory. + +First, there's the sample.csv file: + + Adams,Bob,01/02/03,green,apple,37 + Carter,Denise Ann,04/05/06,blue,banana,38 + Eidel,Frank,07/08/09,red,cherry,38 + Grabel,Harriet,10/11/12,purple,date,40 + Indals,Jason,01/03/05,pink,orange,32 + Kilt,Laura,07/09/11,yellow,grape,38 + Moreno,Nancy,02/04/06,black,strawberry,38 + Octon,Patrick,08/10/12,magenta,kiwi,15 + +The fields are: + Last name, + First name, + Birthdate, + Favorite color, + Favorite fruit, + Age + +Second, there's a "description" of that csv file in sample.desc: + + version 1 { + LastName string + FirstName string + BirthDate + Color string index + Fruit string index + Age unsigned_long index + } + +The DESCRIPTION file maps one-to-one to the fields in the csv file, and +provides a data type for any field the application wants to use. (If +the application doesn't care about a field, don't specify a data type +and the csv code will ignore it.) The string "index" specifies there +should be a secondary index based on the field. + +The "field" names in the DESCRIPTION file don't have to be the same as +the ones in the csv file (and, as they may not have embedded spaces, +probably won't be). + +To build in the sample directory, on POSIX-like systems, type "make". +This first builds the program csv_code, which it then run, with the file +DESCRIPTION as an input. Running csv_code creates two additional files: +csv_local.c and csv_local.h. Those two files are then used as part of +the build process for two more programs: csv_load and csv_query. + +You can load now load the csv file into a Berkeley DB database with the +following command: + + % ./csv_load -h TESTDIR < sample.csv + +The csv_load command will create a directory and four databases: + + primary primary database + Age secondary index on Age field + Color secondary index on Color field + Fruit secondary index on Fruit field + +You can then query the database: + + % ./csv_query -h TESTDIR + Query: id=2 + Record: 2: + LastName: Carter + FirstName: Denise + Color: blue + Fruit: banana + Age: 38 + Query: color==green + Record: 1: + LastName: Adams + FirstName: Bob + Color: green + Fruit: apple + Age: 37 + +and so on. + +The csv_code process also creates source code modules that support +building your own applications based on this database. First, there +is the local csv_local.h include file: + + /* + * DO NOT EDIT: automatically built by csv_code. + * + * Record structure. + */ + typedef struct __DbRecord { + u_int32_t recno; /* Record number */ + + /* + * Management fields + */ + void *raw; /* Memory returned by DB */ + char *record; /* Raw record */ + size_t record_len; /* Raw record length */ + + u_int32_t field_count; /* Field count */ + u_int32_t version; /* Record version */ + + u_int32_t *offset; /* Offset table */ + + /* + * Indexed fields + */ + #define CSV_INDX_LASTNAME 1 + char *LastName; + + #define CSV_INDX_FIRSTNAME 2 + char *FirstName; + + #define CSV_INDX_COLOR 4 + char *Color; + + #define CSV_INDX_FRUIT 5 + char *Fruit; + + #define CSV_INDX_AGE 6 + u_long Age; + } DbRecord; + +This defines the DbRecord structure that is the primary object for this +csv file. As you can see, the intersting fields in the csv file have +mappings in this structure. + +Also, there are routines in the Dbrecord.c file your application can use +to handle DbRecord structures. When you retrieve a record from the +database the DbRecord structure will be filled in based on that record. + +Here are the helper routines: + + int + DbRecord_print(DbRecord *recordp, FILE *fp) + Display the contents of a DbRecord structure to the specified + output stream. + + int + DbRecord_init(const DBT *key, DBT *data, DbRecord *recordp) + Fill in a DbRecord from a returned database key/data pair. + + int + DbRecord_read(u_long key, DbRecord *recordp) + Read the specified record (DbRecord_init will be called + to fill in the DbRecord). + + int + DbRecord_discard(DbRecord *recordp) + Discard the DbRecord structure (must be called after the + DbRecord_read function), when the application no longer + needs the returned DbRecord. + + int + DbRecord_search_field_name(char *field, char *value, OPERATOR op) + Display the DbRecords where the field (named by field) has + the specified relationship to the value. For example: + + DbRecord_search_field_name("Age", "35", GT) + + would search for records with a "Age" field greater than + 35. + + int + DbRecord_search_field_number( + u_int32_t fieldno, char *value, OPERATOR op) + Display the DbRecords where the field (named by field) + has the specified relationship to the value. The field + number used as an argument comes from the csv_local.h + file, for example, CSV_INDX_AGE is the field index for + the "Age" field in this csv file. For example: + + DbRecord_search_field_number(CSV_INDX_AGE, 35, GT) + + would search for records with a "Age" field greater than + 35. + + Currently, the csv code only supports three types of data: + strings, unsigned longs and doubles. Others can easily be + added. + +The usage of the csv_code program is as follows: + + usage: csv_code [-v] [-c source-file] [-f input] [-h header-file] + -c output C source code file + -h output C header file + -f input file + -v verbose (defaults to off) + + -c A file to which to write the C language code. By default, + the file "csv_local.c" is used. + + -f A file to read for a description of the fields in the + csv file. By default, csv_code reads from stdin. + + -h A file to which to write the C language header structures. + By default, the file "csv_local.h" is used. + + -v The -v verbose flag outputs potentially useful debugging + information. + +There are two applications built on top of the code produced by +csv_code, csv_load and csv_query. + +The usage of the csv_load program is as follows: + + usage: csv_load [-v] [-F format] [-f csv-file] [-h home] [-V version] + -F format (currently supports "excel") + -f input file + -h database environment home directory + -v verbose (defaults to off) + + -F See "Input format" below. + + -f If an input file is specified using the -f flag, the file + is read and the records in the file are stored into the + database. By default, csv_load reads from stdin. + + -h If a database environment home directory is specified + using the -h flag, that directory is used as the + Berkeley DB directory. The default for -h is the + current working directory or the value of the DB_HOME + environment variable. + + -V Specify a version number for the input (the default is 1). + + -v The -v verbose flag outputs potentially useful debugging + information. It can be specified twice for additional + information. + +The usage of csv_query program is as follows: + + usage: csv_query [-v] [-c cmd] [-h home] + + -c A command to run, otherwise csv_query will enter + interactive mode and prompt for user input. + + -h If a database environment home directory is specified + using the -h flag, that directory is used as the + Berkeley DB directory. The default for -h is the + current working directory or the value of the DB_HOME + environment variable. + + -v The -v verbose flag outputs potentially useful debugging + information. It can be specified twice for additional + information. + +The query program currently supports the following commands: + + ? Display help screen + exit Exit program + fields Display list of field names + help Display help screen + quit Exit program + version Display database format version + field[op]value Display fields by value (=, !=, <, <=, >, >=, ~, !~) + +The "field[op]value" command allows you to specify a field and a +relationship to a value. For example, you could run the query: + + csv_query -c "price < 5" + +to list all of the records with a "price" field less than "5". + +Field names and all string comparisons are case-insensitive. + +The operators ~ and !~ do match/no-match based on the IEEE Std 1003.2 +(POSIX.2) Basic Regular Expression standard. + +As a special case, every database has the field "Id", which matches the +record number of the primary key. + +Input format: + The input to the csv_load utility is a text file, containing + lines of comma-separated fields. + + Blank lines are ignored. All non-blank lines must be comma-separated + lists of fields. + + By default: + (\000) bytes and unprintable characters are stripped, + input lines are (\012) separated, + commas cannot be escaped. + + If "-F excel" is specified: + (\000) bytes and unprintable characters are stripped, + input lines are (\015) separated, + bytes (\012) characters are stripped from the input, + commas surrounded by double-quote character (") are not + treated as field separators. + +Storage format: + Records in the primary database are stored with a 32-bit unsigned + record number as the key. + + Key/Data pair 0 is of the format: + [version] 32-bit unsigned int + [field count] 32-bit unsigned int + [raw record] byte array + + For example: + [1] + [5] + [field1,field2,field3,field4,field5] + + All other Key/Data pairs are of the format: + [version] 32-bit unsigned int + [offset to field 1] 32-bit unsigned int + [offset to field 2] 32-bit unsigned int + [offset to field 3] 32-bit unsigned int + ... 32-bit unsigned int + [offset to field N] 32-bit unsigned int + [offset past field N] 32-bit unsigned int + [raw record] byte array + + For example: + [1] + [0] + [2] + [5] + [9] + [14] + [19] + [a,ab,abc,abcd,abcde] + 012345678901234567890 << byte offsets + 0 1 2 + + So, field 3 of the data can be directly accessed by using + the "offset to field 3", and the length of the field is + the "((offset to field 4) - (offset to field 3)) - 1". + +Limits: + The csv program stores the primary key in a 32-bit unsigned + value, limiting the number of records in the database. New + records are inserted after the last existing record, that is, + new records are not inserted into gaps left by any deleted + records. This will limit the total number of records stored in + any database. + +Versioning: + Versioning is when a database supports multiple versions of the + records. This is likely to be necessary when dealing with large + applications and databases, as record fields change over time. + + The csv application suite does not currently support versions, + although all of the necessary hooks are there. + + The way versioning will work is as follows: + + The XXX.desc file needs to support multiple version layouts. + + The generated C language structure defined should be a superset + of all of the interesting fields from all of the version + layouts, regardless of which versions of the csv records those + fields exist in. + + When the csv layer is asked for a record, the record's version + will provide a lookup into a separate database of field lists. + That is, there will be another database which has key/data pairs + where the key is a version number, and the data is the field + list. At that point, it's relatively easy to map the fields + to the structure as is currently done, except that some of the + fields may not be filled in. + + To determine if a field is filled in, in the structure, the + application has to have an out-of-band value to put in that + field during DbRecord initialization. If that's a problem, the + alternative would be to add an additional field for each listed + field -- if the additional field is set to 1, the listed field + has been filled in, otherwise it hasn't. The csv code will + support the notion of required fields, so in most cases the + application won't need to check before simply using the field, + it's only if a field isn't required and may be filled in that + the check will be necessary. + +TODO: + Csv databases are not portable between machines of different + byte orders. To make them portable, all of the 32-bit unsigned + int fields currently written into the database should be + converted to a standard byte order. This would include the + version number and field count in the column-map record, and the + version and field offsets in the other records. + + Add Extended RE string matches. + + Add APIs to replace the reading of a schema file, allow users to + fill in a DbRecord structure and do a put on it. (Hard problem: + how to flag fields that aren't filled in.) + + Add a second sample file, and write the actual versioning code. diff --git a/examples/c/csv/code.c b/examples/c/csv/code.c new file mode 100644 index 00000000..54cef6c1 --- /dev/null +++ b/examples/c/csv/code.c @@ -0,0 +1,406 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "csv.h" + +typedef struct { + char *name; /* Field name */ + char *upper; /* Field name in upper-case */ + datatype type; /* Data type */ + int indx; /* Index */ +} FIELD; + +int code_source(void); +int code_header(void); +int desc_dump(void); +int desc_load(void); +char *type_to_string(datatype); +int usage(void); + +/* + * Globals + */ +FILE *cfp; /* C source file */ +FILE *hfp; /* C source file */ +char *progname; /* Program name */ +int verbose; /* Verbose flag */ + +u_int field_cnt; /* Count of fields */ +FIELD *fields; /* Field list */ + +int +main(int argc, char *argv[]) +{ + int ch; + char *cfile, *hfile; + + /* Initialize globals. */ + if ((progname = strrchr(argv[0], '/')) == NULL) + progname = argv[0]; + else + ++progname; + + /* Initialize arguments. */ + cfile = "csv_local.c"; /* Default header/source files */ + hfile = "csv_local.h"; + + /* Process arguments. */ + while ((ch = getopt(argc, argv, "c:f:h:v")) != EOF) + switch (ch) { + case 'c': + cfile = optarg; + break; + case 'f': + if (freopen(optarg, "r", stdin) == NULL) { + fprintf(stderr, + "%s: %s\n", optarg, strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 'h': + hfile = optarg; + break; + case 'v': + ++verbose; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (*argv != NULL) + return (usage()); + + /* Load records from the input file. */ + if (desc_load()) + return (EXIT_FAILURE); + + /* Dump records for debugging. */ + if (verbose && desc_dump()) + return (EXIT_FAILURE); + + /* Open output files. */ + if ((cfp = fopen(cfile, "w")) == NULL) { + fprintf(stderr, + "%s: %s: %s\n", progname, cfile, strerror(errno)); + return (EXIT_FAILURE); + } + if ((hfp = fopen(hfile, "w")) == NULL) { + fprintf(stderr, + "%s: %s: %s\n", progname, hfile, strerror(errno)); + return (EXIT_FAILURE); + } + + /* Build the source and header files. */ + if (code_header()) + return (EXIT_FAILURE); + if (code_source()) + return (EXIT_FAILURE); + + return (EXIT_SUCCESS); +} + +/* + * desc_load -- + * Load a description file. + */ +int +desc_load() +{ + u_int field_alloc; + int version; + char *p, *t, save_ch, buf[256]; + + field_alloc = version = 0; + while (fgets(buf, sizeof(buf), stdin) != NULL) { + if ((p = strchr(buf, '\n')) == NULL) { + fprintf(stderr, "%s: input line too long\n", progname); + return (1); + } + *p = '\0'; + + /* Skip leading whitespace. */ + for (p = buf; isspace(*p); ++p) + ; + + /* Skip empty lines or lines beginning with '#'. */ + if (*p == '\0' || *p == '#') + continue; + + /* Get a version. */ + if (!version) { + if (strncasecmp( + p, "version", sizeof("version") - 1) == 0) { + version = 1; + continue; + } + fprintf(stderr, + "%s: expected \"version\" line\n", progname); + return (1); + } + + /* + * Skip block close -- not currently useful, but when this + * code supports versioned descriptions, it will matter. + */ + if (*p == '}') { + version = 0; + continue; + } + + /* Allocate a new field structure as necessary. */ + if (field_cnt == field_alloc && + (fields = realloc(fields, + (field_alloc += 10) * sizeof(FIELD))) == NULL) { + fprintf(stderr, "%s: %s\n", progname, strerror(errno)); + return (1); + } + + /* Find the end of the field name. */ + for (t = p; *t != '\0' && !isspace(*t); ++t) + ; + save_ch = *t; + *t = '\0'; + if ((fields[field_cnt].name = strdup(p)) == NULL || + (fields[field_cnt].upper = strdup(p)) == NULL) { + fprintf(stderr, "%s: %s\n", progname, strerror(errno)); + return (1); + } + *t = save_ch; + p = t; + + fields[field_cnt].indx = 0; + fields[field_cnt].type = NOTSET; + for (;;) { + /* Skip to the next field, if any. */ + for (; *p != '\0' && isspace(*p); ++p) + ; + if (*p == '\0') + break; + + /* Find the end of the field. */ + for (t = p; *t != '\0' && !isspace(*t); ++t) + ; + save_ch = *t; + *t = '\0'; + if (strcasecmp(p, "double") == 0) + fields[field_cnt].type = DOUBLE; + else if (strcasecmp(p, "index") == 0) + fields[field_cnt].indx = 1; + else if (strcasecmp(p, "string") == 0) + fields[field_cnt].type = STRING; + else if (strcasecmp(p, "unsigned_long") == 0) + fields[field_cnt].type = UNSIGNED_LONG; + else { + fprintf(stderr, + "%s: unknown keyword: %s\n", progname, p); + return (1); + } + *t = save_ch; + p = t; + } + + /* Create a copy of the field name that's upper-case. */ + for (p = fields[field_cnt].upper; *p != '\0'; ++p) + if (islower(*p)) + *p = (char)toupper(*p); + ++field_cnt; + } + if (ferror(stdin)) { + fprintf(stderr, "%s: stdin: %s\n", progname, strerror(errno)); + return (1); + } + return (0); +} + +/* + * desc_dump -- + * Dump a set of FIELD structures. + */ +int +desc_dump() +{ + FIELD *f; + u_int i; + + for (f = fields, i = 0; i < field_cnt; ++i, ++f) { + fprintf(stderr, "field {%s}: (", f->name); + switch (f->type) { + case NOTSET: + fprintf(stderr, "ignored"); + break; + case DOUBLE: + fprintf(stderr, "double"); + break; + case STRING: + fprintf(stderr, "string"); + break; + case UNSIGNED_LONG: + fprintf(stderr, "unsigned_long"); + break; + } + if (f->indx) + fprintf(stderr, ", indexed"); + fprintf(stderr, ")\n"); + } + return (0); +} + +/* + * code_header -- + * Print out the C #include file. + */ +int +code_header() +{ + FIELD *f; + u_int i; + + fprintf(hfp, "/*\n"); + fprintf(hfp, " * DO NOT EDIT: automatically built by %s.\n", progname); + fprintf(hfp, " *\n"); + fprintf(hfp, " * Record structure.\n"); + fprintf(hfp, " */\n"); + fprintf(hfp, "typedef struct __DbRecord {\n"); + fprintf(hfp, "\tu_int32_t\t recno;\t\t/* Record number */\n"); + fprintf(hfp, "\n"); + fprintf(hfp, "\t/*\n"); + fprintf(hfp, "\t * Management fields\n"); + fprintf(hfp, "\t */\n"); + fprintf(hfp, "\tvoid\t\t*raw;\t\t/* Memory returned by DB */\n"); + fprintf(hfp, "\tu_char\t\t*record;\t/* Raw record */\n"); + fprintf(hfp, "\tsize_t\t\t record_len;\t/* Raw record length */\n\n"); + fprintf(hfp, "\tu_int32_t\t field_count;\t/* Field count */\n"); + fprintf(hfp, "\tu_int32_t\t version;\t/* Record version */\n\n"); + fprintf(hfp, "\tu_int32_t\t*offset;\t/* Offset table */\n"); + fprintf(hfp, "\n"); + + fprintf(hfp, "\t/*\n"); + fprintf(hfp, "\t * Indexed fields\n"); + fprintf(hfp, "\t */\n"); + for (f = fields, i = 0; i < field_cnt; ++i, ++f) { + if (f->type == NOTSET) + continue; + if (i != 0) + fprintf(hfp, "\n"); + fprintf(hfp, "#define CSV_INDX_%s\t%d\n", f->upper, i + 1); + switch (f->type) { + case NOTSET: + /* NOTREACHED */ + abort(); + break; + case DOUBLE: + fprintf(hfp, "\tdouble\t\t %s;\n", f->name); + break; + case STRING: + fprintf(hfp, "\tchar\t\t*%s;\n", f->name); + break; + case UNSIGNED_LONG: + fprintf(hfp, "\tu_long\t\t %s;\n", f->name); + break; + } + } + fprintf(hfp, "} DbRecord;\n"); + + return (0); +} + +/* + * code_source -- + * Print out the C structure initialization. + */ +int +code_source() +{ + FIELD *f; + u_int i; + + fprintf(cfp, "/*\n"); + fprintf(cfp, + " * DO NOT EDIT: automatically built by %s.\n", progname); + fprintf(cfp, " *\n"); + fprintf(cfp, " * Initialized record structure.\n"); + fprintf(cfp, " */\n"); + fprintf(cfp, "\n"); + fprintf(cfp, "#include \"csv.h\"\n"); + fprintf(cfp, "#include \"csv_local.h\"\n"); + fprintf(cfp, "\n"); + fprintf(cfp, "DbRecord DbRecord_base = {\n"); + fprintf(cfp, "\t0,\t\t/* Record number */\n"); + fprintf(cfp, "\tNULL,\t\t/* Memory returned by DB */\n"); + fprintf(cfp, "\tNULL,\t\t/* Raw record */\n"); + fprintf(cfp, "\t0,\t\t/* Raw record length */\n"); + fprintf(cfp, "\t%d,\t\t/* Field count */\n", field_cnt); + fprintf(cfp, "\t0,\t\t/* Record version */\n"); + fprintf(cfp, "\tNULL,\t\t/* Offset table */\n"); + fprintf(cfp, "\n"); + for (f = fields, i = 0; i < field_cnt; ++i, ++f) { + if (f->type == NOTSET) + continue; + switch (f->type) { + case NOTSET: + abort(); + /* NOTREACHED */ + break; + case DOUBLE: + case UNSIGNED_LONG: + fprintf(cfp, "\t0,\t\t/* %s */\n", f->name); + break; + case STRING: + fprintf(cfp, "\tNULL,\t\t/* %s */\n", f->name); + break; + } + } + fprintf(cfp, "};\n"); + + fprintf(cfp, "\n"); + fprintf(cfp, "DbField fieldlist[] = {\n"); + for (f = fields, i = 0; i < field_cnt; ++i, ++f) { + if (f->type == NOTSET) + continue; + fprintf(cfp, "\t{ \"%s\",", f->name); + fprintf(cfp, " CSV_INDX_%s,", f->upper); + fprintf(cfp, "\n\t %s,", type_to_string(f->type)); + fprintf(cfp, " %d,", f->indx ? 1 : 0); + fprintf(cfp, " NULL,"); + fprintf(cfp, " FIELD_OFFSET(%s)},\n", f->name); + } + fprintf(cfp, "\t{NULL, 0, STRING, 0, NULL, 0}\n};\n"); + + return (0); +} + +char * +type_to_string(type) + datatype type; +{ + switch (type) { + case NOTSET: + return ("NOTSET"); + case DOUBLE: + return ("DOUBLE"); + case STRING: + return ("STRING"); + case UNSIGNED_LONG: + return ("UNSIGNED_LONG"); + } + + abort(); + /* NOTREACHED */ +} + +int +usage() +{ + (void)fprintf(stderr, + "usage: %s [-v] [-c source-file] [-f input] [-h header-file]\n", + progname); + exit(1); +} diff --git a/examples/c/csv/csv.h b/examples/c/csv/csv.h new file mode 100644 index 00000000..5f5eab5c --- /dev/null +++ b/examples/c/csv/csv.h @@ -0,0 +1,101 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include +#include + +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN 1 + +#include +#include + +extern int getopt(int, char * const *, const char *); +extern char *optarg; +extern int optind; +#else +#define HAVE_WILDCARD_SUPPORT 1 + +#include +#include +#endif + +#include "db.h" + +/* + * MAP_VERSION + * This code has hooks for versioning, but does not directly support it. + * See the README file for details. + */ +#define MAP_VERSION 1 + +/* + * Supported formats. + * + * FORMAT_NL: separated + * FORMAT_EXCEL: Excel dumped flat text. + */ +typedef enum { FORMAT_EXCEL, FORMAT_NL } input_fmt; + +/* + * OFFSET_LEN + * The length of any item can be calculated from the two offset fields. + * OFFSET_OOB + * An offset that's illegal, used to detect unavailable fields. + */ +#define OFFSET_LEN(offset, indx) \ + (((offset)[(indx) + 1] - (offset)[(indx)]) - 1) + +#define OFFSET_OOB 0 + +/* + * Field comparison operators. + */ +typedef enum { EQ=1, NEQ, GT, GTEQ, LT, LTEQ, WC, NWC } OPERATOR; + +/* + * Supported data types. + */ +typedef enum { NOTSET=1, DOUBLE, STRING, UNSIGNED_LONG } datatype; + +/* + * C structure that describes the csv fields. + */ +typedef struct { + char *name; /* Field name */ + u_int32_t fieldno; /* Field index */ + datatype type; /* Data type */ + + int indx; /* Indexed */ + DB *secondary; /* Secondary index handle */ + +#define FIELD_OFFSET(field) ((size_t)(&(((DbRecord *)0)->field))) + size_t offset; /* DbRecord field offset */ +} DbField; + +/* + * Globals + */ +extern DB *db; /* Primary database */ +extern DbField fieldlist[]; /* Field list */ +extern DB_ENV *dbenv; /* Database environment */ +extern char *progname; /* Program name */ +extern int verbose; /* Program verbosity */ +#ifdef _WIN32 +#undef strcasecmp +#define strcasecmp _stricmp +#undef strncasecmp +#define strncasecmp _strnicmp +#define mkdir(d, perm) _mkdir(d) +#endif diff --git a/examples/c/csv/csv_extern.h b/examples/c/csv/csv_extern.h new file mode 100644 index 00000000..36581708 --- /dev/null +++ b/examples/c/csv/csv_extern.h @@ -0,0 +1,37 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +extern DbRecord DbRecord_base; /* Initialized structure. */ + +/* + * Prototypes + */ +extern int DbRecord_discard(DbRecord *); +extern int DbRecord_init(const DBT *, const DBT *, DbRecord *); +extern void DbRecord_print(DbRecord *, FILE *); +extern int DbRecord_read(u_long, DbRecord *); +extern int DbRecord_search_field_name(char *, char *, OPERATOR); +extern int DbRecord_search_field_number(u_int, char *, OPERATOR); +extern int compare_double(DB *, const DBT *, const DBT *); +extern int compare_string(DB *, const DBT *, const DBT *); +extern int compare_ulong(DB *, const DBT *, const DBT *); +extern int csv_env_close(void); +extern int csv_env_open(const char *, int); +extern int csv_secondary_close(void); +extern int csv_secondary_open(void); +extern int entry_print(void *, size_t, u_int32_t); +extern int field_cmp_double(void *, void *, OPERATOR); +extern int field_cmp_re(void *, void *, OPERATOR); +extern int field_cmp_string(void *, void *, OPERATOR); +extern int field_cmp_ulong(void *, void *, OPERATOR); +extern int input_load(input_fmt, u_long); +extern int query(char *, int *); +extern int query_interactive(void); +extern int secondary_callback(DB *, const DBT *, const DBT *, DBT *); +extern int strtod_err(char *, double *); +extern int strtoul_err(char *, u_long *); diff --git a/examples/c/csv/db.c b/examples/c/csv/db.c new file mode 100644 index 00000000..71086c25 --- /dev/null +++ b/examples/c/csv/db.c @@ -0,0 +1,244 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "csv.h" +#include "csv_local.h" +#include "csv_extern.h" + +static int compare_uint32(DB *, const DBT *, const DBT *); + +/* + * csv_env_init -- + * Initialize the database environment. + */ +int +csv_env_open(const char *home, int is_rdonly) +{ + int ret; + + dbenv = NULL; + db = NULL; + + /* Create a database environment handle. */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + return (1); + } + + /* + * Configure Berkeley DB error reporting to stderr, with our program + * name as the prefix. + */ + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + /* + * The default Berkeley DB cache size is fairly small; configure a + * 1MB cache for now. This value will require tuning in the future. + */ + if ((ret = dbenv->set_cachesize(dbenv, 0, 1048576, 1)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->set_cachesize"); + return (1); + } + + /* + * We may be working with an existing environment -- try and join it. + * If that fails, create a new database environment; for now, we only + * need a cache, no logging, locking, or transactions. + */ + if ((ret = dbenv->open(dbenv, home, + DB_JOINENV | DB_USE_ENVIRON, 0)) != 0 && + (ret = dbenv->open(dbenv, home, + DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE | DB_USE_ENVIRON, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + return (1); + } + + /* Create the primary database handle. */ + if ((ret = db_create(&db, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + return (1); + } + + /* + * Records may be relatively large -- use a large page size. + */ + if ((ret = db->set_pagesize(db, 32 * 1024)) != 0) { + dbenv->err(dbenv, ret, "DB->set_pagesize"); + return (1); + } + + /* + * The primary database uses an integer as its key; on little-endian + * machines, integers sort badly using the default Berkeley DB sort + * function (which is lexicographic). Specify a comparison function + * for the database. + */ + if ((ret = db->set_bt_compare(db, compare_uint32)) != 0) { + dbenv->err(dbenv, ret, "DB->set_bt_compare"); + return (1); + } + + /* Open the primary database. */ + if ((ret = db->open(db, NULL, + "primary", NULL, DB_BTREE, is_rdonly ? 0 : DB_CREATE, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->open: primary"); + return (1); + } + + /* Open the secondaries. */ + if ((ret = csv_secondary_open()) != 0) + return (1); + + return (0); +} + +/* + * csv_env_close -- + * Discard the database environment. + */ +int +csv_env_close() +{ + int ret, t_ret; + + ret = 0; + + /* Close the secondaries. */ + ret = csv_secondary_close(); + + /* Close the primary handle. */ + if (db != NULL && (t_ret = db->close(db, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->close"); + if (ret == 0) + ret = t_ret; + } + if ((t_ret = dbenv->close(dbenv, 0)) != 0) { + fprintf(stderr, + "%s: DB_ENV->close: %s\n", progname, db_strerror(ret)); + if (ret == 0) + ret = t_ret; + } + + return (ret); +} + +/* + * csv_secondary_open -- + * Open any secondary indices. + */ +int +csv_secondary_open() +{ + DB *sdb; + DbField *f; + int ret, (*fcmp)(DB *, const DBT *, const DBT *); + + /* + * Create secondary database handles. + */ + for (f = fieldlist; f->name != NULL; ++f) { + if (f->indx == 0) + continue; + + if ((ret = db_create(&sdb, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + return (1); + } + sdb->app_private = f; + + /* Keys are small, use a relatively small page size. */ + if ((ret = sdb->set_pagesize(sdb, 8 * 1024)) != 0) { + dbenv->err(dbenv, ret, "DB->set_pagesize"); + return (1); + } + + /* + * Sort the database based on the underlying type. Skip + * strings, Berkeley DB defaults to lexicographic sort. + */ + switch (f->type) { + case DOUBLE: + fcmp = compare_double; + break; + case UNSIGNED_LONG: + fcmp = compare_ulong; + break; + case NOTSET: + case STRING: + default: + fcmp = NULL; + break; + } + if (fcmp != NULL && + (ret = sdb->set_bt_compare(sdb, fcmp)) != 0) { + dbenv->err(dbenv, ret, "DB->set_bt_compare"); + return (1); + } + + /* Always configure secondaries for sorted duplicates. */ + if ((ret = sdb->set_flags(sdb, DB_DUPSORT)) != 0) { + dbenv->err(dbenv, ret, "DB->set_flags"); + return (1); + } + if ((ret = sdb->set_dup_compare(sdb, compare_ulong)) != 0) { + dbenv->err(dbenv, ret, "DB->set_dup_compare"); + return (1); + } + + if ((ret = sdb->open( + sdb, NULL, f->name, NULL, DB_BTREE, DB_CREATE, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->open: %s", f->name); + return (1); + } + if ((ret = sdb->associate( + db, NULL, sdb, secondary_callback, DB_CREATE)) != 0) { + dbenv->err(dbenv, ret, "DB->set_associate"); + return (1); + } + f->secondary = sdb; + } + + return (0); +} + +/* + * csv_secondary_close -- + * Close any secondary indices. + */ +int +csv_secondary_close() +{ + DbField *f; + int ret, t_ret; + + ret = 0; + for (f = fieldlist; f->name != NULL; ++f) + if (f->secondary != NULL && (t_ret = + f->secondary->close(f->secondary, 0)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * compare_uint32 -- + * Compare two keys. + */ +static int +compare_uint32(DB *db_arg, const DBT *a_arg, const DBT *b_arg) +{ + u_int32_t a, b; + + db_arg = db_arg; /* Quiet compiler. */ + + memcpy(&a, a_arg->data, sizeof(u_int32_t)); + memcpy(&b, b_arg->data, sizeof(u_int32_t)); + return (a > b ? 1 : ((a < b) ? -1 : 0)); +} diff --git a/examples/c/csv/load.c b/examples/c/csv/load.c new file mode 100644 index 00000000..5f7212b3 --- /dev/null +++ b/examples/c/csv/load.c @@ -0,0 +1,347 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "csv.h" +#include "csv_local.h" +#include "csv_extern.h" + +typedef enum { GL_OK, GL_EOF, GL_FAIL } getline_status; + +static int input_field_count(const char *, size_t, u_int32_t *); +static getline_status + input_getline(char **, size_t *, size_t *); +static int input_put_alloc(u_int32_t **, size_t *, size_t, u_int32_t); +static int input_set_offset(u_int32_t *, char *, size_t, u_int32_t); + +static input_fmt ifmt; /* Input format. */ +static u_long record_count = 0; /* Input record count for errors. */ +static u_long version; /* Version we're loading. */ + +/* + * input_load -- + * Read the input file and load new records into the database. + */ +int +input_load(input_fmt ifmt_arg, u_long version_arg) +{ + getline_status gtl_status; + DBT key, data; + DBC *cursor; + u_int32_t field_count, primary_key, *put_line; + size_t input_len, len, put_len; + int is_first, ret; + char *input_line; + + field_count = 0; /* Shut the compiler up. */ + + /* ifmt and version are global to this file. */ + ifmt = ifmt_arg; + version = version_arg; + + /* + * The primary key for the database is a unique number. Find out the + * last unique number allocated in this database by opening a cursor + * and fetching the last record. + */ + if ((ret = db->cursor(db, NULL, &cursor, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->cursor"); + return (1); + } + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + if ((ret = cursor->c_get(cursor, &key, &data, DB_LAST)) != 0) + if (ret == DB_NOTFOUND) + primary_key = 0; + else { + dbenv->err(dbenv, ret, "DB->cursor: DB_LAST"); + return (1); + } + else + memcpy(&primary_key, key.data, sizeof(primary_key)); + if ((ret = cursor->c_close(cursor)) != 0) { + dbenv->err(dbenv, ret, "DBC->close"); + return (1); + } + if (verbose) + dbenv->errx(dbenv, + "maximum existing record in the database is %lu", + (u_long)primary_key); + + key.data = &primary_key; + key.size = sizeof(primary_key); + input_line = NULL; + put_line = NULL; + input_len = put_len = 0; + + /* + * See the README file for a description of the file input format. + */ + for (is_first = 1; (gtl_status = + input_getline(&input_line, &input_len, &len)) == GL_OK;) { + ++record_count; + if (verbose > 1) + dbenv->errx(dbenv, "reading %lu", (u_long)record_count); + + /* The first non-blank line of the input is a column map. */ + if (is_first) { + is_first = 0; + + /* Count the fields we're expecting in the input. */ + if (input_field_count( + input_line, len, &field_count) != 0) + return (1); + + } + + /* Allocate room for the table of offsets. */ + if (input_put_alloc( + &put_line, &put_len, len, field_count) != 0) + return (1); + + /* + * Build the offset table and create the record we're + * going to store. + */ + if (input_set_offset(put_line, + input_line, len, field_count) != 0) + return (1); + + ++primary_key; + + memcpy(put_line + (field_count + 2), input_line, len); + data.data = put_line; + data.size = (u_int32_t) + ((field_count + 2) * sizeof(u_int32_t) + len); + + if (verbose > 1) + (void)entry_print( + data.data, data.size, field_count); + + /* Load the key/data pair into the database. */ + if ((ret = db->put(db, NULL, &key, &data, 0)) != 0) { + dbenv->err(dbenv, ret, + "DB->put: %lu", (u_long)primary_key); + return (1); + } + } + + if (gtl_status != GL_EOF) + return (1); + + if (verbose) + dbenv->errx(dbenv, + "%lu records read from the input file into the database", + record_count); + + /* + * This program isn't transactional, limit the window for corruption. + */ + if ((ret = db->sync(db, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->sync"); + return (1); + } + + return (0); +} + +/* + * input_getline -- + * Read in a line of input into a buffer. + */ +static getline_status +input_getline(char **input_linep, size_t *input_lenp, size_t *lenp) +{ + size_t input_len, len; + int ch; + char *input_line, *p, *endp; + + input_line = *input_linep; + input_len = *input_lenp; + + p = input_line; + endp = input_line + input_len; + + for (len = 0; (ch = getchar()) != EOF;) { + if (ch == '\0') /* Strip (\000) bytes. */ + continue; + switch (ifmt) { + case FORMAT_NL: + if (ch == '\n') + goto end; + break; + case FORMAT_EXCEL: + /* Strip (\012) bytes. */ + if (ch == '\n') + continue; + /* + * (\015) bytes terminate lines. + * Skip blank lines. + */ + if (ch == '\015') { + if (len == 0) + continue; + goto end; + } + } + if (input_line == endp) { + input_len += 256; + input_len *= 2; + if ((input_line = + realloc(input_line, input_len)) == NULL) { + dbenv->err(dbenv, errno, + "unable to allocate %lu bytes for record", + (u_long)input_len); + return (GL_FAIL); + } + p = input_line; + endp = p + input_len; + } + + if (isprint(ch)) { /* Strip unprintables. */ + *p++ = (char)ch; + ++len; + } + } + +end: if (len == 0) + return (GL_EOF); + + *lenp = len; + *input_linep = input_line; + *input_lenp = input_len; + + return (GL_OK); +} + +/* + * input_field_count -- + * Count the fields in the line. + */ +static int +input_field_count(const char *line, size_t len, u_int32_t *field_countp) +{ + u_int32_t field_count; + int quoted; + + field_count = 1; + + /* + * There are N-1 separators for N fields, that is, "a,b,c" is three + * fields, with two comma separators. + */ + switch (ifmt) { + case FORMAT_EXCEL: + quoted = 0; + for (field_count = 1; len > 0; ++line, --len) + if (*line == '"') + quoted = !quoted; + else if (*line == ',' && !quoted) + ++field_count; + break; + case FORMAT_NL: + for (field_count = 1; len > 0; ++line, --len) + if (*line == ',') + ++field_count; + break; + } + *field_countp = field_count; + + if (verbose) + dbenv->errx(dbenv, + "input file made up of %lu fields", (u_int)field_count); + + return (0); +} + +/* + * input_put_alloc -- + * Allocate room for the offset table plus the input. + */ +static int +input_put_alloc(u_int32_t **put_linep, + size_t *put_lenp, size_t len, u_int32_t field_count) +{ + size_t total; + + total = (field_count + 2) * sizeof(u_int32_t) + len; + if (total > *put_lenp && + (*put_linep = realloc(*put_linep, *put_lenp += total)) == NULL) { + dbenv->err(dbenv, errno, + "unable to allocate %lu bytes for record", + (u_long)*put_lenp); + return (1); + } + return (0); +} + +/* + * input_set_offset -- + * Build an offset table and record combination. + */ +static int +input_set_offset(u_int32_t *put_line, + char *input_line, size_t len, u_int32_t field_count) +{ + u_int32_t *op; + int quoted; + char *p, *endp; + + op = put_line; + + /* The first field is the version number. */ + *op++ = version; + + /* + * Walk the input line, looking for comma separators. It's an error + * to have too many or too few fields. + */ + *op++ = 0; + quoted = 0; + for (p = input_line, endp = input_line + len;; ++p) { + if (ifmt == FORMAT_EXCEL && p < endp) { + if (*p == '"') + quoted = !quoted; + if (quoted) + continue; + } + if (*p == ',' || p == endp) { + if (field_count == 0) { + dbenv->errx(dbenv, + "record %lu: too many fields in the record", + record_count); + return (1); + } + --field_count; + + *op++ = (u_int32_t)(p - input_line) + 1; + + if (verbose > 1) + dbenv->errx(dbenv, + "offset %lu: {%.*s}", op[-1], + OFFSET_LEN(op, -2), input_line + op[-2]); + + /* + * Don't insert a new field if the input lines ends + * in a comma. + */ + if (p == endp || p + 1 == endp) + break; + } + } + *op++ = (u_int32_t)(p - input_line); + + if (field_count != 0) { + dbenv->errx(dbenv, + "record %lu: not enough fields in the record", + record_count); + return (1); + } + memcpy(op, input_line, len); + + return (0); +} diff --git a/examples/c/csv/load_main.c b/examples/c/csv/load_main.c new file mode 100644 index 00000000..0353bfee --- /dev/null +++ b/examples/c/csv/load_main.c @@ -0,0 +1,117 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "csv.h" +#include "csv_local.h" +#include "csv_extern.h" + +static int usage(void); + +/* + * Globals + */ +DB_ENV *dbenv; /* Database environment */ +DB *db; /* Primary database */ +DB **secondary; /* Secondaries */ +int verbose; /* Program verbosity */ +char *progname; /* Program name */ + +int +main(int argc, char *argv[]) +{ + input_fmt ifmt; + u_long version; + int ch, ret, t_ret; + char *home; + + /* Initialize globals. */ + dbenv = NULL; + db = NULL; + if ((progname = strrchr(argv[0], '/')) == NULL) + progname = argv[0]; + else + ++progname; + verbose = 0; + + /* Initialize arguments. */ + home = NULL; + ifmt = FORMAT_NL; + version = 1; + + /* Process arguments. */ + while ((ch = getopt(argc, argv, "F:f:h:V:v")) != EOF) + switch (ch) { + case 'f': + if (freopen(optarg, "r", stdin) == NULL) { + fprintf(stderr, + "%s: %s\n", optarg, db_strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 'F': + if (strcasecmp(optarg, "excel") == 0) { + ifmt = FORMAT_EXCEL; + break; + } + return (usage()); + case 'h': + home = optarg; + break; + case 'V': + if (strtoul_err(optarg, &version)) + return (EXIT_FAILURE); + break; + case 'v': + ++verbose; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (*argv != NULL) + return (usage()); + + /* + * The home directory may not exist -- try and create it. We don't + * bother to distinguish between failure to create it and it already + * existing, as the database environment open will fail if we aren't + * successful. + */ + if (home == NULL) + home = getenv("DB_HOME"); + if (home != NULL) + (void)mkdir(home, S_IRWXU); + + /* Create or join the database environment. */ + if (csv_env_open(home, 0) != 0) + return (EXIT_FAILURE); + + /* Load records into the database. */ + ret = input_load(ifmt, version); + + /* Close the database environment. */ + if ((t_ret = csv_env_close()) != 0 && ret == 0) + ret = t_ret; + + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* + * usage -- + * Program usage message. + */ +static int +usage(void) +{ + (void)fprintf(stderr, + "usage: %s [-v] [-F excel] [-f csv-file] [-h home]\n", progname); + return (EXIT_FAILURE); +} diff --git a/examples/c/csv/query.c b/examples/c/csv/query.c new file mode 100644 index 00000000..e08c023f --- /dev/null +++ b/examples/c/csv/query.c @@ -0,0 +1,241 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "csv.h" +#include "csv_local.h" +#include "csv_extern.h" + +static int query_by_field(char *); +static int query_fieldlist(char *); +static int query_help(char *); +static int query_usage(void); + +typedef struct _cmdtab { + char *cmd; /* Command name */ + int (*f)(char *); /* Underlying function. */ + char *help; /* Help message. */ +} CMDTAB; + +static CMDTAB cmdtab[] = { + { "?", + query_help, + "?\t\tDisplay help screen" }, + { "exit", + NULL, + "exit\t\tExit program" }, + { "fields", + query_fieldlist, + "fields\t\tDisplay list of field names" }, + { "help", + query_help, + "help\t\tDisplay help screen" }, + { "quit", + NULL, + "quit\t\tExit program" }, + { NULL, + query_by_field, + "field[op]value\tDisplay fields by value (=, !=, <, <=, >, >=, ~, !~)" }, + { NULL, NULL, NULL } +}; + +/* + * query_interactive -- + * Allow the user to interactively query the database. + */ +int +query_interactive() +{ + int done; + char *p, input[256]; + + for (;;) { + printf("Query: "); + (void)fflush(stdout); + if (fgets(input, sizeof(input), stdin) == NULL) { + printf("\n"); + if (ferror(stdin)) { + dbenv->err(dbenv, errno, + "error occurred reading from stdin"); + return (1); + } + break; + } + if ((p = strchr(input, '\n')) == NULL) { + dbenv->errx(dbenv, "input buffer too small"); + return (1); + } + *p = '\0'; + if (query(input, &done) != 0) + return (1); + if (done != 0) + break; + } + return (0); +} + +/* + * query -- + * Process a query. + */ +int +query(char *cmd, int *donep) +{ + CMDTAB *p; + + if (donep != NULL) + *donep = 0; + + for (p = cmdtab; p->cmd != NULL; ++p) + if (p->cmd != NULL && + strncasecmp(cmd, p->cmd, strlen(p->cmd)) == 0) + break; + + if (p->cmd == NULL) + return (query_by_field(cmd)); + + if (p->f == NULL) { + if (donep != NULL) + *donep = 1; + return (0); + } + + return (p->f(cmd)); +} + +/* + * query_by_field -- + * Query the primary database by field. + */ +static int +query_by_field(char *input) +{ + OPERATOR operator; + size_t len; + char *field, *op, *value; + + /* + * We expect to see "field [op] value" -- figure it out. + * + * Skip leading whitespace. + */ + while (isspace(*input)) + ++input; + + /* + * Find an operator, and it better not start the string. + */ + if ((len = strcspn(field = input, "<>!=~")) == 0) + return (query_usage()); + op = field + len; + + /* Figure out the operator, and find the start of the value. */ + switch (op[0]) { + case '~': + operator = WC; + value = op + 1; + break; + case '!': + if (op[1] == '=') { + operator = NEQ; + value = op + 2; + break; + } + if (op[1] == '~') { + operator = NWC; + value = op + 2; + break; + } + return (query_usage()); + case '<': + if (op[1] == '=') { + operator = LTEQ; + value = op + 2; + } else { + operator = LT; + value = op + 1; + } + break; + case '=': + operator = EQ; + if (op[1] == '=') + value = op + 2; + else + value = op + 1; + break; + case '>': + if (op[1] == '=') { + operator = GTEQ; + value = op + 2; + } else { + operator = GT; + value = op + 1; + } + break; + default: + return (query_usage()); + } + + /* Terminate the field name, and there better be a field name. */ + while (--op > input && isspace(*op)) + ; + if (op == input) + return (query_usage()); + op[1] = '\0'; + + /* Make sure there is a value field. */ + while (isspace(*value)) + ++value; + if (*value == '\0') + return (query_usage()); + + return (DbRecord_search_field_name(field, value, operator)); +} + +/* + * query_fieldlist -- + * Display list of field names. + */ +static int +query_fieldlist(char *input) +{ + DbField *f; + + input = input; /* Quiet compiler. */ + + for (f = fieldlist; f->name != NULL; ++f) + printf("field %3d: %s\n", f->fieldno, f->name); + return (0); +} + +/* + * query_help -- + * Query command list. + */ +static int +query_help(char *input) +{ + CMDTAB *p; + + input = input; /* Quiet compiler. */ + + printf("Query commands:\n"); + for (p = cmdtab; p->help != NULL; ++p) + printf("\t%s\n", p->help); + return (0); +} + +/* + * query_usage -- + * Query usage message. + */ +static int +query_usage(void) +{ + fprintf(stderr, "%s: query syntax error\n", progname); + return (query_help(NULL)); +} diff --git a/examples/c/csv/query_main.c b/examples/c/csv/query_main.c new file mode 100644 index 00000000..0e223115 --- /dev/null +++ b/examples/c/csv/query_main.c @@ -0,0 +1,99 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "csv.h" +#include "csv_local.h" +#include "csv_extern.h" + +static int usage(void); + +/* + * Globals + */ +DB_ENV *dbenv; /* Database environment */ +DB *db; /* Primary database */ +int verbose; /* Program verbosity */ +char *progname; /* Program name */ + +int +main(int argc, char *argv[]) +{ + int ch, done, ret, t_ret; + char **clist, **clp, *home; + + /* Initialize globals. */ + dbenv = NULL; + db = NULL; + if ((progname = strrchr(argv[0], '/')) == NULL) + progname = argv[0]; + else + ++progname; + verbose = 0; + + /* Initialize arguments. */ + home = NULL; + ret = 0; + + /* Allocate enough room for command-list arguments. */ + if ((clp = clist = + (char **)calloc((size_t)argc + 1, sizeof(char *))) == NULL) { + fprintf(stderr, "%s: %s\n", progname, strerror(ENOMEM)); + return (EXIT_FAILURE); + } + + /* Process arguments. */ + while ((ch = getopt(argc, argv, "c:h:v")) != EOF) + switch (ch) { + case 'c': + *clp++ = optarg; + break; + case 'h': + home = optarg; + break; + case 'v': + ++verbose; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (*argv != NULL) + return (usage()); + + /* Create or join the database environment. */ + if (csv_env_open(home, 1) != 0) + return (EXIT_FAILURE); + + /* Handle the queries. */ + if (clp == clist) + ret = query_interactive(); + else + for (clp = clist, done = 0; *clp != NULL && !done; ++clp) + if ((ret = query(*clp, &done)) != 0) + break; + + /* Close the database environment. */ + if ((t_ret = csv_env_close()) != 0 && ret == 0) + ret = t_ret; + + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* + * usage -- + * Program usage message. + */ +static int +usage(void) +{ + (void)fprintf(stderr, "usage: %s [-v] [-c cmd] [-h home]\n", progname); + return (EXIT_FAILURE); +} diff --git a/examples/c/csv/sample.csv b/examples/c/csv/sample.csv new file mode 100644 index 00000000..b3f09706 --- /dev/null +++ b/examples/c/csv/sample.csv @@ -0,0 +1,8 @@ +Adams,Bob,01/02/03,green,apple,37 +Carter,Denise Ann,04/05/06,blue,banana,38 +Eidel,Frank,07/08/09,red,cherry,38 +Grabel,Harriet,10/11/12,purple,date,40 +Indals,Jason,01/03/05,pink,orange,32 +Kilt,Laura,07/09/11,yellow,grape,38 +Moreno,Nancy,02/04/06,black,strawberry,38 +Octon,Patrick,08/10/12,magenta,kiwi,15 diff --git a/examples/c/csv/sample.desc b/examples/c/csv/sample.desc new file mode 100644 index 00000000..65aa9394 --- /dev/null +++ b/examples/c/csv/sample.desc @@ -0,0 +1,10 @@ +# $Id$ + +version 1 { + LastName string + FirstName string + BirthDate + Color string index + Fruit string index + Age unsigned_long index +} diff --git a/examples/c/csv/util.c b/examples/c/csv/util.c new file mode 100644 index 00000000..81196475 --- /dev/null +++ b/examples/c/csv/util.c @@ -0,0 +1,309 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "csv.h" +#include "csv_local.h" +#include "csv_extern.h" + +/* + * entry_print -- + * Display the primary database's data item. + */ +int +entry_print(void *data, size_t len, u_int32_t field_count) +{ + u_int32_t a, *offset; + u_int i; + char *raw; + + memcpy(&a, data, sizeof(u_int32_t)); + printf("\tversion: %lu\n", (u_long)a); + + offset = (u_int32_t *)data + 1; + if (field_count == 0) { + memcpy(&a, offset++, sizeof(u_int32_t)); + printf("\tcolumn map: %lu fields: {%.*s}\n", (u_long)a, + (int)(len - 2 * sizeof(u_int32_t)), + (u_int8_t *)data + 2 * sizeof(u_int32_t)); + } else { + raw = (char *)(offset + (field_count + 1)); + for (i = 0; i < field_count; ++i) { + memcpy(&a, &offset[i], sizeof(u_int32_t)); + len = OFFSET_LEN(offset, i); + printf("\toffset %4lu: len %4lu: {%.*s}\n", + (u_long)offset[i], + (u_long)len, (int)len, raw + a); + } + } + + return (0); +} + +/* + * strtod_err -- + * strtod(3) with error checking. + */ +int +strtod_err(char *input, double *valp) +{ + double val; + char *end; + + /* + * strtoul requires setting errno to detect errors. + */ + errno = 0; + val = strtod(input, &end); + if (errno == ERANGE) { + dbenv->err(dbenv, ERANGE, "%s", input); + return (1); + } + if (input[0] == '\0' || + (end[0] != '\0' && end[0] != '\n' && !isspace(end[0]))) { + dbenv->errx(dbenv, + "%s: invalid floating point argument", input); + return (1); + } + + *valp = val; + return (0); +} + +/* + * strtoul_err -- + * strtoul(3) with error checking. + */ +int +strtoul_err(char *input, u_long *valp) +{ + u_long val; + char *end; + + /* + * strtoul requires setting errno to detect errors. + */ + errno = 0; + val = strtoul(input, &end, 10); + if (errno == ERANGE) { + dbenv->err(dbenv, ERANGE, "%s", input); + return (1); + } + if (input[0] == '\0' || + (end[0] != '\0' && end[0] != '\n' && !isspace(end[0]))) { + dbenv->errx(dbenv, "%s: invalid unsigned long argument", input); + return (1); + } + + *valp = val; + return (0); +} + +int +secondary_callback(DB *db_arg, const DBT *key, const DBT *data, DBT *result) +{ + DbField *f; + DbRecord record; + void *faddr, *addr; + + /* Populate the field. */ + if (DbRecord_init(key, data, &record) != 0) + return (-1); + + f = db_arg->app_private; + faddr = (u_int8_t *)&record + f->offset; + + /* + * If necessary, copy the field into separate memory. + * Set up the result DBT. + */ + switch (f->type) { + case STRING: + result->data = *(char **)faddr; + result->size = (u_int32_t)strlen(*(char **)faddr) + 1; + break; + case DOUBLE: + if ((addr = malloc(sizeof(double))) == NULL) + return (-1); + result->data = addr; + result->size = sizeof(double); + result->flags = DB_DBT_APPMALLOC; + memcpy(addr, faddr, sizeof(double)); + break; + case UNSIGNED_LONG: + if ((addr = malloc(sizeof(u_long))) == NULL) + return (-1); + result->data = addr; + result->size = sizeof(u_long); + result->flags = DB_DBT_APPMALLOC; + memcpy(addr, faddr, sizeof(u_long)); + break; + default: + case NOTSET: + abort(); + /* NOTREACHED */ + } + + return (0); +} + +/* + * compare_double -- + * Compare two keys. + */ +int +compare_double(DB *db_arg, const DBT *a_arg, const DBT *b_arg) +{ + double a, b; + + db_arg = db_arg; /* Quiet compiler. */ + + memcpy(&a, a_arg->data, sizeof(double)); + memcpy(&b, b_arg->data, sizeof(double)); + return (a > b ? 1 : ((a < b) ? -1 : 0)); +} + +/* + * compare_ulong -- + * Compare two keys. + */ +int +compare_ulong(DB *db_arg, const DBT *a_arg, const DBT *b_arg) +{ + u_long a, b; + + db_arg = db_arg; /* Quiet compiler. */ + + memcpy(&a, a_arg->data, sizeof(u_long)); + memcpy(&b, b_arg->data, sizeof(u_long)); + return (a > b ? 1 : ((a < b) ? -1 : 0)); +} + +/* + * field_cmp_double -- + * Compare two double. + */ +int +field_cmp_double(void *a, void *b, OPERATOR op) +{ + switch (op) { + case GT: + return (*(double *)a > *(double *)b); + case GTEQ: + return (*(double *)a >= *(double *)b); + case LT: + return (*(double *)a < *(double *)b); + case LTEQ: + return (*(double *)a <= *(double *)b); + case NEQ: + return (*(double *)a != *(double *)b); + case EQ: + return (*(double *)a == *(double *)b); + case WC: + case NWC: + break; + } + + abort(); + /* NOTREACHED */ +} + +/* + * field_cmp_re -- + * Compare against regular expression. + */ +int +field_cmp_re(void *a, void *b, OPERATOR op) +{ + op = op; /* Quiet compiler. */ + + switch (op) { +#ifdef HAVE_WILDCARD_SUPPORT + case WC: + return (regexec(b, *(char **)a, 0, NULL, 0) == 0); + case NWC: + return (regexec(b, *(char **)a, 0, NULL, 0) != 0); +#else + case WC: + case NWC: + a = a; + b = b; /* Quiet compiler. */ + /* FALLTHROUGH */ +#endif + case GT: + case GTEQ: + case LT: + case LTEQ: + case NEQ: + case EQ: + break; + } + + abort(); + /* NOTREACHED */ +} + +/* + * field_cmp_string -- + * Compare two strings. + */ +int +field_cmp_string(void *a, void *b, OPERATOR op) +{ + int v; + + v = strcasecmp(*(char **)a, b); + switch (op) { + case GT: + return (v > 0 ? 1 : 0); + case GTEQ: + return (v >= 0 ? 1 : 0); + case LT: + return (v < 0 ? 1 : 0); + case LTEQ: + return (v <= 0 ? 1 : 0); + case NEQ: + return (v ? 1 : 0); + case EQ: + return (v ? 0 : 1); + case WC: + case NWC: + break; + } + + abort(); + /* NOTREACHED */ +} + +/* + * field_cmp_ulong -- + * Compare two ulongs. + */ +int +field_cmp_ulong(void *a, void *b, OPERATOR op) +{ + switch (op) { + case GT: + return (*(u_long *)a > *(u_long *)b); + case GTEQ: + return (*(u_long *)a >= *(u_long *)b); + case LT: + return (*(u_long *)a < *(u_long *)b); + case LTEQ: + return (*(u_long *)a <= *(u_long *)b); + case NEQ: + return (*(u_long *)a != *(u_long *)b); + case EQ: + return (*(u_long *)a == *(u_long *)b); + case WC: + case NWC: + break; + } + + abort(); + /* NOTREACHED */ +} diff --git a/examples/c/ex_access.c b/examples/c/ex_access.c new file mode 100644 index 00000000..e4aca821 --- /dev/null +++ b/examples/c/ex_access.c @@ -0,0 +1,161 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include + +#ifdef _WIN32 +extern int getopt(int, char * const *, const char *); +#else +#include +#endif + +#include + +#define DATABASE "access.db" +int main __P((int, char *[])); +int usage __P((void)); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern int optind; + DB *dbp; + DBC *dbcp; + DBT key, data; + size_t len; + int ch, ret, rflag; + char *database, *p, *t, buf[1024], rbuf[1024]; + const char *progname = "ex_access"; /* Program name. */ + + rflag = 0; + while ((ch = getopt(argc, argv, "r")) != EOF) + switch (ch) { + case 'r': + rflag = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + /* Accept optional database name. */ + database = *argv == NULL ? DATABASE : argv[0]; + + /* Optionally discard the database. */ + if (rflag) + (void)remove(database); + + /* Create and initialize database object, open the database. */ + if ((ret = db_create(&dbp, NULL, 0)) != 0) { + fprintf(stderr, + "%s: db_create: %s\n", progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + dbp->set_errfile(dbp, stderr); + dbp->set_errpfx(dbp, progname); + if ((ret = dbp->set_pagesize(dbp, 1024)) != 0) { + dbp->err(dbp, ret, "set_pagesize"); + goto err1; + } + if ((ret = dbp->set_cachesize(dbp, 0, 32 * 1024, 0)) != 0) { + dbp->err(dbp, ret, "set_cachesize"); + goto err1; + } + if ((ret = dbp->open(dbp, + NULL, database, NULL, DB_BTREE, DB_CREATE, 0664)) != 0) { + dbp->err(dbp, ret, "%s: open", database); + goto err1; + } + + /* + * Insert records into the database, where the key is the user + * input and the data is the user input in reverse order. + */ + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + for (;;) { + printf("input> "); + fflush(stdout); + if (fgets(buf, sizeof(buf), stdin) == NULL) + break; + if (strcmp(buf, "exit\n") == 0 || strcmp(buf, "quit\n") == 0) + break; + if ((len = strlen(buf)) <= 1) + continue; + for (t = rbuf, p = buf + (len - 2); p >= buf;) + *t++ = *p--; + *t++ = '\0'; + + key.data = buf; + data.data = rbuf; + data.size = key.size = (u_int32_t)len - 1; + + switch (ret = + dbp->put(dbp, NULL, &key, &data, DB_NOOVERWRITE)) { + case 0: + break; + default: + dbp->err(dbp, ret, "DB->put"); + if (ret != DB_KEYEXIST) + goto err1; + break; + } + } + printf("\n"); + + /* Acquire a cursor for the database. */ + if ((ret = dbp->cursor(dbp, NULL, &dbcp, 0)) != 0) { + dbp->err(dbp, ret, "DB->cursor"); + goto err1; + } + + /* Initialize the key/data pair so the flags aren't set. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + + /* Walk through the database and print out the key/data pairs. */ + while ((ret = dbcp->get(dbcp, &key, &data, DB_NEXT)) == 0) + printf("%.*s : %.*s\n", + (int)key.size, (char *)key.data, + (int)data.size, (char *)data.data); + if (ret != DB_NOTFOUND) { + dbp->err(dbp, ret, "DBcursor->get"); + goto err2; + } + + /* Close everything down. */ + if ((ret = dbcp->close(dbcp)) != 0) { + dbp->err(dbp, ret, "DBcursor->close"); + goto err1; + } + if ((ret = dbp->close(dbp, 0)) != 0) { + fprintf(stderr, + "%s: DB->close: %s\n", progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + return (EXIT_SUCCESS); + +err2: (void)dbcp->close(dbcp); +err1: (void)dbp->close(dbp, 0); + return (EXIT_FAILURE); +} + +int +usage() +{ + (void)fprintf(stderr, "usage: ex_access [-r] [database]\n"); + return (EXIT_FAILURE); +} diff --git a/examples/c/ex_apprec/auto_rebuild b/examples/c/ex_apprec/auto_rebuild new file mode 100644 index 00000000..bbd52ee2 --- /dev/null +++ b/examples/c/ex_apprec/auto_rebuild @@ -0,0 +1,10 @@ +# Script to rebuild automatically generated files for ex_apprec. + +E=../examples/c/ex_apprec + +cd ../../../dist +awk -f gen_rec.awk \ + -v source_file=$E/ex_apprec_auto.c \ + -v header_file=$E/ex_apprec_auto.h \ + -v print_file=$E/ex_apprec_autop.c \ + -v template_file=$E/ex_apprec_template < $E/ex_apprec.src diff --git a/examples/c/ex_apprec/ex_apprec.c b/examples/c/ex_apprec/ex_apprec.c new file mode 100644 index 00000000..5d3b4ac8 --- /dev/null +++ b/examples/c/ex_apprec/ex_apprec.c @@ -0,0 +1,277 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "db_int.h" +#include "dbinc/db_swap.h" + +#include "ex_apprec.h" + +int apprec_dispatch __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); +int open_env __P((const char *, FILE *, const char *, DB_ENV **)); +int verify_absence __P((DB_ENV *, const char *)); +int verify_presence __P((DB_ENV *, const char *)); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + DB_ENV *dbenv; + DB_LSN lsn; + DB_TXN *txn; + DBT dirnamedbt; + int ch, ret; + const char *home; + char dirname[256]; + const char *progname = "ex_apprec"; /* Program name. */ + + /* Default home. */ + home = "TESTDIR"; + + while ((ch = getopt(argc, argv, "h:")) != EOF) + switch (ch) { + case 'h': + home = optarg; + break; + default: + fprintf(stderr, "usage: %s [-h home]", progname); + exit(EXIT_FAILURE); + } + + printf("Set up environment.\n"); + if ((ret = open_env(home, stderr, progname, &dbenv)) != 0) + return (EXIT_FAILURE); + + printf("Create a directory in a transaction.\n"); + /* + * This application's convention is to log the full directory name, + * including trailing nul. + */ + memset(&dirnamedbt, 0, sizeof(dirnamedbt)); + sprintf(dirname, "%s/MYDIRECTORY", home); + dirnamedbt.data = dirname; + dirnamedbt.size = strlen(dirname) + 1; + + if ((ret = dbenv->txn_begin(dbenv, NULL, &txn, 0)) != 0) { + dbenv->err(dbenv, ret, "txn_begin"); + return (EXIT_FAILURE); + } + + /* + * Remember, always log actions before you execute them! + * Since this log record is describing a file system operation and + * we have no control over when file system operations go to disk, + * we need to flush the log record immediately to ensure that the + * log record is on disk before the operation it describes. The + * flush would not be necessary were we doing an operation into the + * BDB mpool and using LSNs that mpool knew about. + */ + memset(&lsn, 0, sizeof(lsn)); + if ((ret = + ex_apprec_mkdir_log(dbenv, + txn, &lsn, DB_FLUSH, &dirnamedbt)) != 0) { + dbenv->err(dbenv, ret, "mkdir_log"); + return (EXIT_FAILURE); + } + if (mkdir(dirname, 0755) != 0) { + dbenv->err(dbenv, errno, "mkdir"); + return (EXIT_FAILURE); + } + + printf("Verify the directory's presence: "); + verify_presence(dbenv, dirname); + printf("check.\n"); + + /* Now abort the transaction and verify that the directory goes away. */ + printf("Abort the transaction.\n"); + if ((ret = txn->abort(txn)) != 0) { + dbenv->err(dbenv, ret, "txn_abort"); + return (EXIT_FAILURE); + } + + printf("Verify the directory's absence: "); + verify_absence(dbenv, dirname); + printf("check.\n"); + + /* Now do the same thing over again, only with a commit this time. */ + printf("Create a directory in a transaction.\n"); + memset(&dirnamedbt, 0, sizeof(dirnamedbt)); + sprintf(dirname, "%s/MYDIRECTORY", home); + dirnamedbt.data = dirname; + dirnamedbt.size = strlen(dirname) + 1; + if ((ret = dbenv->txn_begin(dbenv, NULL, &txn, 0)) != 0) { + dbenv->err(dbenv, ret, "txn_begin"); + return (EXIT_FAILURE); + } + + memset(&lsn, 0, sizeof(lsn)); + if ((ret = + ex_apprec_mkdir_log(dbenv, txn, &lsn, 0, &dirnamedbt)) != 0) { + dbenv->err(dbenv, ret, "mkdir_log"); + return (EXIT_FAILURE); + } + if (mkdir(dirname, 0755) != 0) { + dbenv->err(dbenv, errno, "mkdir"); + return (EXIT_FAILURE); + } + + printf("Verify the directory's presence: "); + verify_presence(dbenv, dirname); + printf("check.\n"); + + /* Now abort the transaction and verify that the directory goes away. */ + printf("Commit the transaction.\n"); + if ((ret = txn->commit(txn, 0)) != 0) { + dbenv->err(dbenv, ret, "txn_commit"); + return (EXIT_FAILURE); + } + + printf("Verify the directory's presence: "); + verify_presence(dbenv, dirname); + printf("check.\n"); + + printf("Now remove the directory, then run recovery.\n"); + if ((ret = dbenv->close(dbenv, 0)) != 0) { + fprintf(stderr, "DB_ENV->close: %s\n", db_strerror(ret)); + return (EXIT_FAILURE); + } + if (rmdir(dirname) != 0) { + fprintf(stderr, + "%s: rmdir failed with error %s", progname, + strerror(errno)); + } + verify_absence(dbenv, dirname); + + /* Opening with DB_RECOVER runs recovery. */ + if ((ret = open_env(home, stderr, progname, &dbenv)) != 0) + return (EXIT_FAILURE); + + printf("Verify the directory's presence: "); + verify_presence(dbenv, dirname); + printf("check.\n"); + + /* Close the handle. */ + if ((ret = dbenv->close(dbenv, 0)) != 0) { + fprintf(stderr, "DB_ENV->close: %s\n", db_strerror(ret)); + return (EXIT_FAILURE); + } + + return (EXIT_SUCCESS); +} + +int +open_env(home, errfp, progname, dbenvp) + const char *home, *progname; + FILE *errfp; + DB_ENV **dbenvp; +{ + DB_ENV *dbenv; + int ret; + + /* + * Create an environment object and initialize it for error + * reporting. + */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(errfp, "%s: %s\n", progname, db_strerror(ret)); + return (ret); + } + dbenv->set_errfile(dbenv, errfp); + dbenv->set_errpfx(dbenv, progname); + + /* Set up our custom recovery dispatch function. */ + if ((ret = dbenv->set_app_dispatch(dbenv, apprec_dispatch)) != 0) { + dbenv->err(dbenv, ret, "set_app_dispatch"); + return (ret); + } + + /* + * Open the environment with full transactional support, running + * recovery. + */ + if ((ret = + dbenv->open(dbenv, home, DB_CREATE | DB_RECOVER | DB_INIT_LOCK | + DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN, 0)) != 0) { + dbenv->err(dbenv, ret, "environment open: %s", home); + dbenv->close(dbenv, 0); + return (ret); + } + + *dbenvp = dbenv; + return (0); +} + +/* + * Sample application dispatch function to handle user-specified log record + * types. + */ +int +apprec_dispatch(dbenv, dbt, lsn, op) + DB_ENV *dbenv; + DBT *dbt; + DB_LSN *lsn; + db_recops op; +{ + u_int32_t rectype; + + /* Pull the record type out of the log record. */ + LOGCOPY_32(dbenv->env, &rectype, dbt->data); + + switch (rectype) { + case DB_ex_apprec_mkdir: + return (ex_apprec_mkdir_recover(dbenv, dbt, lsn, op)); + default: + /* + * We've hit an unexpected, allegedly user-defined record + * type. + */ + dbenv->errx(dbenv, "Unexpected log record type encountered"); + return (EINVAL); + } +} + +int +verify_absence(dbenv, dirname) + DB_ENV *dbenv; + const char *dirname; +{ + + if (access(dirname, F_OK) == 0) { + dbenv->errx(dbenv, "Error--directory present!"); + exit(EXIT_FAILURE); + } + + return (0); +} + +int +verify_presence(dbenv, dirname) + DB_ENV *dbenv; + const char *dirname; +{ + + if (access(dirname, F_OK) != 0) { + dbenv->errx(dbenv, "Error--directory not present!"); + exit(EXIT_FAILURE); + } + + return (0); +} diff --git a/examples/c/ex_apprec/ex_apprec.h b/examples/c/ex_apprec/ex_apprec.h new file mode 100644 index 00000000..e0702426 --- /dev/null +++ b/examples/c/ex_apprec/ex_apprec.h @@ -0,0 +1,20 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _EX_APPREC_H_ +#define _EX_APPREC_H_ + +#include "ex_apprec_auto.h" + +int ex_apprec_mkdir_print + __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); +int ex_apprec_mkdir_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); +int ex_apprec_init_print __P((DB_ENV *, DB_DISTAB *)); + +#endif /* !_EX_APPREC_H_ */ diff --git a/examples/c/ex_apprec/ex_apprec.src b/examples/c/ex_apprec/ex_apprec.src new file mode 100644 index 00000000..d332133a --- /dev/null +++ b/examples/c/ex_apprec/ex_apprec.src @@ -0,0 +1,33 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2002, 2010 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +PREFIX ex_apprec +INCLUDE #include "ex_apprec.h" + +/* + * This is the source file used to generate the application-specific recovery + * functions used by the ex_apprec example. It should be turned into usable + * source code (including a template for the recovery function itself) by + * invoking changing to the dist directory of the DB distribution and + * running the gen_rec.awk script there as follows: + * + * awk -f ./gen_rec.awk \ + * -v source_file=../examples/c/ex_apprec/ex_apprec_auto.c \ + * -v header_file=../examples/c/ex_apprec/ex_apprec_auto.h \ + * -v template_file=../examples/c/ex_apprec/ex_apprec_template \ + * < ../examples/c/ex_apprec/ex_apprec.src + */ + +/* + * mkdir: used to create a directory + * + * dirname: relative or absolute pathname of the directory to be created + */ +BEGIN mkdir 42 10000 +DBT dirname DBT s +END diff --git a/examples/c/ex_apprec/ex_apprec_auto.c b/examples/c/ex_apprec/ex_apprec_auto.c new file mode 100644 index 00000000..0781c44d --- /dev/null +++ b/examples/c/ex_apprec/ex_apprec_auto.c @@ -0,0 +1,13 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include +#include +#include +#include "db_int.h" +#include "dbinc/db_swap.h" +#include "ex_apprec.h" +DB_LOG_RECSPEC ex_apprec_mkdir_desc[] = { + {LOGREC_DBT, SSZ(ex_apprec_mkdir_args, dirname), "dirname", ""}, + {LOGREC_Done, 0, "", ""} +}; diff --git a/examples/c/ex_apprec/ex_apprec_auto.h b/examples/c/ex_apprec/ex_apprec_auto.h new file mode 100644 index 00000000..b734135a --- /dev/null +++ b/examples/c/ex_apprec/ex_apprec_auto.h @@ -0,0 +1,32 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef ex_apprec_AUTO_H +#define ex_apprec_AUTO_H +#define DB_ex_apprec_mkdir 10000 +typedef struct _ex_apprec_mkdir_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DBT dirname; +} ex_apprec_mkdir_args; + +extern __DB_IMPORT DB_LOG_RECSPEC ex_apprec_mkdir_desc[]; +static inline int +ex_apprec_mkdir_log(DB_ENV *dbenv, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + const DBT *dirname) +{ + return (dbenv->log_put_record(dbenv, NULL, txnp, ret_lsnp, + flags, DB_ex_apprec_mkdir, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + LOG_DBT_SIZE(dirname), + ex_apprec_mkdir_desc, + dirname)); +} + +static inline int ex_apprec_mkdir_read(DB_ENV *dbenv, + void *data, ex_apprec_mkdir_args **arg) +{ + return (dbenv->log_read_record(dbenv, + NULL, NULL, data, ex_apprec_mkdir_desc, sizeof(ex_apprec_mkdir_args), (void**)arg)); +} +#endif diff --git a/examples/c/ex_apprec/ex_apprec_autop.c b/examples/c/ex_apprec/ex_apprec_autop.c new file mode 100644 index 00000000..eac393c0 --- /dev/null +++ b/examples/c/ex_apprec/ex_apprec_autop.c @@ -0,0 +1,43 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" + +#include +#include +#include +#include "db_int.h" +#include "dbinc/log.h" +#include "ex_apprec.h" +/* + * PUBLIC: int ex_apprec_mkdir_print __P((DB_ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops)); + */ +int +ex_apprec_mkdir_print(dbenv, dbtp, lsnp, notused2) + DB_ENV *dbenv; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; +{ + notused2 = DB_TXN_PRINT; + + return (__log_print_record(dbenv->env, dbtp, lsnp, "ex_apprec_mkdir", ex_apprec_mkdir_desc, NULL)); +} + +/* + * PUBLIC: int ex_apprec_init_print __P((DB_ENV *, DB_DISTAB *)); + */ +int +ex_apprec_init_print(dbenv, dtabp) + DB_ENV *dbenv; + DB_DISTAB *dtabp; +{ + int __db_add_recovery __P((DB_ENV *, DB_DISTAB *, + int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops), u_int32_t)); + int ret; + + if ((ret = __db_add_recovery(dbenv, dtabp, + ex_apprec_mkdir_print, DB_ex_apprec_mkdir)) != 0) + return (ret); + return (0); +} diff --git a/examples/c/ex_apprec/ex_apprec_rec.c b/examples/c/ex_apprec/ex_apprec_rec.c new file mode 100644 index 00000000..43f3fed6 --- /dev/null +++ b/examples/c/ex_apprec/ex_apprec_rec.c @@ -0,0 +1,109 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * This file is based on the template file ex_apprec_template. Note that + * because ex_apprec_mkdir, like most application-specific recovery functions, + * does not make use of DB-private structures, it has actually been simplified + * significantly. + */ + +#include "db_config.h" +#include +#include +#include +#include + +#include "db_int.h" + +#include "ex_apprec.h" + +/* + * ex_apprec_mkdir_recover -- + * Recovery function for mkdir. + * + * PUBLIC: int ex_apprec_mkdir_recover + * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); + */ +int +ex_apprec_mkdir_recover(dbenv, dbtp, lsnp, op) + DB_ENV *dbenv; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; +{ + ex_apprec_mkdir_args *argp; + int ret; + + argp = NULL; + +#ifdef DEBUG_RECOVER + ex_apprec_mkdir_print(dbenv, dbtp, lsnp, op); +#endif + if ((ret = ex_apprec_mkdir_read(dbenv, dbtp->data, &argp)) != 0) + goto out; + + switch (op) { + case DB_TXN_ABORT: + case DB_TXN_BACKWARD_ROLL: + /* + * If we're aborting, we need to remove the directory if it + * exists. We log the trailing zero in pathnames, so we can + * simply pass the data part of the DBT into rmdir as a string. + * (Note that we don't have any alignment guarantees, but for + * a char * this doesn't matter.) + * + * Ignore all errors other than ENOENT; DB may attempt to undo + * or redo operations without knowing whether they have already + * been done or undone, so we should never assume in a recovery + * function that the task definitely needs doing or undoing. + */ + ret = rmdir(argp->dirname.data); + if (ret != 0 && errno != ENOENT) + dbenv->err(dbenv, ret, "Error in abort of mkdir"); + else + ret = 0; + break; + case DB_TXN_FORWARD_ROLL: + /* + * The forward direction is just the opposite; here, we ignore + * EEXIST, because the directory may already exist. + */ + ret = mkdir(argp->dirname.data, 0755); + if (ret != 0 && errno != EEXIST) + dbenv->err(dbenv, + ret, "Error in roll-forward of mkdir"); + else + ret = 0; + break; + default: + /* + * We might want to handle DB_TXN_PRINT or DB_TXN_APPLY here, + * too, but we don't try to print the log records and aren't + * using replication, so there's no need to in this example. + */ + dbenv->errx(dbenv, "Unexpected operation type\n"); + return (EINVAL); + } + + /* + * The recovery function is responsible for returning the LSN of the + * previous log record in this transaction, so that transaction aborts + * can follow the chain backwards. + * + * (If we'd wanted the LSN of this record earlier, we could have + * read it from lsnp, as well--but because we weren't working with + * pages or other objects that store their LSN and base recovery + * decisions on it, we didn't need to.) + */ + *lsnp = argp->prev_lsn; + +out: if (argp != NULL) + free(argp); + return (ret); +} diff --git a/examples/c/ex_apprec/ex_apprec_template b/examples/c/ex_apprec/ex_apprec_template new file mode 100644 index 00000000..55182f65 --- /dev/null +++ b/examples/c/ex_apprec/ex_apprec_template @@ -0,0 +1,70 @@ +#include "db.h" + +/* + * ex_apprec_mkdir_recover -- + * Recovery function for mkdir. + * + * PUBLIC: int ex_apprec_mkdir_recover + * PUBLIC: __P((dbenv *, DBT *, DB_LSN *, db_recops)); + */ +int +ex_apprec_mkdir_recover(dbenv, dbtp, lsnp, op) + dbenv *dbenv; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; +{ + ex_apprec_mkdir_args *argp; + int cmp_n, cmp_p, modified, ret; + +#ifdef DEBUG_RECOVER + (void)ex_apprec_mkdir_print(dbenv, dbtp, lsnp, op); +#endif + argp = NULL; + if ((ret = ex_apprec_mkdir_read(dbenv, dbtp->data, &argp)) != 0) + goto out; + + modified = 0; + cmp_n = 0; + cmp_p = 0; + + /* + * The function now needs to calculate cmp_n and cmp_p based + * on whatever is in argp (usually an LSN representing the state + * of an object BEFORE the operation described in this record was + * applied) and whatever other information the function needs, + * e.g., the LSN of the object as it exists now. + * + * cmp_p should be set to 0 if the current state of the object + * is believed to be same as the state of the object BEFORE the + * described operation was applied. For example, if you had an + * LSN in the log record (argp->prevlsn) and a current LSN of the + * object (curlsn), you might want to do: + * + * cmp_p = log_compare(curlsn, argp->prevlsn); + * + * Similarly, cmp_n should be set to 0 if the current state + * of the object reflects the object AFTER this operation has + * been applied. Thus, if you can figure out an object's current + * LSN, yo might set cmp_n as: + * + * cmp_n = log_compare(lsnp, curlsn); + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + modified = 1; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to undo update described. */ + modified = 1; + } + + /* Allow for following LSN pointers through a transaction. */ + *lsnp = argp->prev_lsn; + ret = 0; + +out: if (argp != NULL) + free(argp); + + return (ret); +} + diff --git a/examples/c/ex_btrec.c b/examples/c/ex_btrec.c new file mode 100644 index 00000000..42540eaf --- /dev/null +++ b/examples/c/ex_btrec.c @@ -0,0 +1,202 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id: ex_btrec.c,v 0f73af5ae3da 2010/05/10 05:38:40 alexander $ + */ + +#include + +#include +#include +#include + +#include + +#define DATABASE "access.db" +#define WORDLIST "../test/tcl/wordlist" +int main __P((void)); + +int ex_btrec __P((void)); +void show __P((const char *, DBT *, DBT *)); + +int +main() +{ + return (ex_btrec() == 1 ? EXIT_FAILURE : EXIT_SUCCESS); +} + +int +ex_btrec() +{ + DB *dbp; + DBC *dbcp; + DBT key, data; + DB_BTREE_STAT *statp; + FILE *fp; + db_recno_t recno; + size_t len; + int cnt, ret; + char *p, *t, buf[1024], rbuf[1024]; + const char *progname = "ex_btrec"; /* Program name. */ + + /* Open the word database. */ + if ((fp = fopen(WORDLIST, "r")) == NULL) { + fprintf(stderr, "%s: open %s: %s\n", + progname, WORDLIST, db_strerror(errno)); + return (1); + } + + /* Remove the previous database. */ + (void)remove(DATABASE); + + /* Create and initialize database object, open the database. */ + if ((ret = db_create(&dbp, NULL, 0)) != 0) { + fprintf(stderr, + "%s: db_create: %s\n", progname, db_strerror(ret)); + return (1); + } + dbp->set_errfile(dbp, stderr); + dbp->set_errpfx(dbp, progname); /* 1K page sizes. */ + if ((ret = dbp->set_pagesize(dbp, 1024)) != 0) { + dbp->err(dbp, ret, "set_pagesize"); + return (1); + } /* Record numbers. */ + if ((ret = dbp->set_flags(dbp, DB_RECNUM)) != 0) { + dbp->err(dbp, ret, "set_flags: DB_RECNUM"); + return (1); + } + if ((ret = dbp->open(dbp, + NULL, DATABASE, NULL, DB_BTREE, DB_CREATE, 0664)) != 0) { + dbp->err(dbp, ret, "open: %s", DATABASE); + return (1); + } + + /* + * Insert records into the database, where the key is the word + * preceded by its record number, and the data is the same, but + * in reverse order. + */ + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + for (cnt = 1; cnt <= 1000; ++cnt) { + (void)sprintf(buf, "%04d_", cnt); + if (fgets(buf + 4, sizeof(buf) - 4, fp) == NULL) + break; + len = strlen(buf); + for (t = rbuf, p = buf + (len - 2); p >= buf;) + *t++ = *p--; + *t++ = '\0'; + + key.data = buf; + data.data = rbuf; + data.size = key.size = (u_int32_t)len - 1; + + if ((ret = + dbp->put(dbp, NULL, &key, &data, DB_NOOVERWRITE)) != 0) { + dbp->err(dbp, ret, "DB->put"); + if (ret != DB_KEYEXIST) + goto err1; + } + } + + /* Close the word database. */ + (void)fclose(fp); + + /* Print out the number of records in the database. */ + if ((ret = dbp->stat(dbp, NULL, &statp, 0)) != 0) { + dbp->err(dbp, ret, "DB->stat"); + goto err1; + } + printf("%s: database contains %lu records\n", + progname, (u_long)statp->bt_ndata); + free(statp); + + /* Acquire a cursor for the database. */ + if ((ret = dbp->cursor(dbp, NULL, &dbcp, 0)) != 0) { + dbp->err(dbp, ret, "DB->cursor"); + goto err1; + } + + /* + * Prompt the user for a record number, then retrieve and display + * that record. + */ + for (;;) { + /* Get a record number. */ + printf("recno #> "); + fflush(stdout); + if (fgets(buf, sizeof(buf), stdin) == NULL) + break; + recno = atoi(buf); + + /* + * Reset the key each time, the dbp->get() routine returns + * the key and data pair, not just the key! + */ + key.data = &recno; + key.size = sizeof(recno); + if ((ret = dbcp->get(dbcp, &key, &data, DB_SET_RECNO)) != 0) + goto get_err; + + /* Display the key and data. */ + show("k/d\t", &key, &data); + + /* Move the cursor a record forward. */ + if ((ret = dbcp->get(dbcp, &key, &data, DB_NEXT)) != 0) + goto get_err; + + /* Display the key and data. */ + show("next\t", &key, &data); + + /* + * Retrieve the record number for the following record into + * local memory. + */ + data.data = &recno; + data.size = sizeof(recno); + data.ulen = sizeof(recno); + data.flags |= DB_DBT_USERMEM; + if ((ret = dbcp->get(dbcp, &key, &data, DB_GET_RECNO)) != 0) { +get_err: dbp->err(dbp, ret, "DBcursor->get"); + if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY) + goto err2; + } else + printf("retrieved recno: %lu\n", (u_long)recno); + + /* Reset the data DBT. */ + memset(&data, 0, sizeof(data)); + } + + if ((ret = dbcp->close(dbcp)) != 0) { + dbp->err(dbp, ret, "DBcursor->close"); + goto err1; + } + if ((ret = dbp->close(dbp, 0)) != 0) { + fprintf(stderr, + "%s: DB->close: %s\n", progname, db_strerror(ret)); + return (1); + } + + return (0); + +err2: (void)dbcp->close(dbcp); +err1: (void)dbp->close(dbp, 0); + return (ret); + +} + +/* + * show -- + * Display a key/data pair. + */ +void +show(msg, key, data) + const char *msg; + DBT *key, *data; +{ + printf("%s%.*s : %.*s\n", msg, + (int)key->size, (char *)key->data, + (int)data->size, (char *)data->data); +} diff --git a/examples/c/ex_bulk.c b/examples/c/ex_bulk.c new file mode 100644 index 00000000..3212d4df --- /dev/null +++ b/examples/c/ex_bulk.c @@ -0,0 +1,867 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * ex_bulk - Demonstrate usage of all bulk APIs available in Berkeley DB. + * NOTE: Though this example code generates timing information, it's important + * to note that it is written as code to demonstrate functionality, and is not + * optimized as a benchmark. + */ +#include +#include +#include +#include + +#define DATABASE "ex_bulk.db" /* Database name */ +#define DATALEN 20 /* The length of data */ +#define NS_PER_MS 1000000 /* Nanoseconds in a millisecond */ +#define NS_PER_US 1000 /* Nanoseconds in a microsecond */ +#define PUTS_PER_TXN 10 +#define STRLEN DATALEN - sizeof(int) /* The length of string */ +#define UPDATES_PER_BULK_PUT 100 + +#ifdef _WIN32 +#include +extern int getopt(int, char * const *, const char *); +/* Implement a basic high res timer with a POSIX interface for Windows. */ +struct timeval { + time_t tv_sec; + long tv_usec; +}; +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + struct _timeb now; + _ftime(&now); + tv->tv_sec = now.time; + tv->tv_usec = now.millitm * NS_PER_US; + return (0); +} +#define CLEANUP_CMD "rmdir EX_BULK /q/s" +#else +#include +#include +#define CLEANUP_CMD "rm -rf EX_BULK" +#endif + +#include + +int bulk_delete(DB_ENV *, DB *, int, int, int *, int *, int); +int bulk_delete_sec(DB_ENV *, DB *, int, int, int *, int *, int); +int bulk_fill(DB_ENV *, DB *, int, int, int *, int *, int); +int bulk_get(DB_ENV *, DB *, int, int, int, int *, int); +int compare_int(DB *, const DBT *, const DBT *); +DB_ENV *db_init(char *, char *, u_int); +int get_first_str(DB *, const DBT *, const DBT *, DBT *); +int get_string(const char *, char *, int); +int main(int, char *[]); +void usage(void); + +const char *progname = "ex_bulk"; /* Program name */ +const char tstring[STRLEN] = "0123456789abcde"; /* Const string */ + +struct data { + int id; + char str[STRLEN]; +}; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB *dbp, *sdbp; + DB_ENV *dbenv; + DB_TXN *txnp; + struct timeval start_time, end_time; + double secs; + u_int cache, pagesize; + int biter, ch, count, dups, init, iter, num; + int ret, rflag, sflag, bulk, delete, pair, verbose; + + dbp = sdbp = NULL; + dbenv = NULL; + txnp = NULL; + iter = num = 1000000; + dups = init = rflag = sflag = bulk = delete = verbose = 0; + + pagesize = 65536; + cache = 1000 * pagesize; + + while ((ch = getopt(argc, argv, "c:d:i:n:p:vDIRSU")) != EOF) + switch (ch) { + case 'c': + cache = (u_int)atoi(optarg); + break; + case 'd': + dups = atoi(optarg); + break; + case 'i': + iter = atoi(optarg); + break; + case 'n': + num = atoi(optarg); + break; + case 'p': + pagesize = (u_int)atoi(optarg); + break; + case 'v': + verbose = 1; + break; + case 'D': + delete = 1; + break; + case 'I': + init = 1; + break; + case 'R': + rflag = 1; + break; + case 'S': + sflag = 1; + break; + case 'U': + bulk = 1; + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + /* Remove the previous environment and database(s). */ + if (!rflag) { + system(CLEANUP_CMD); + system("mkdir EX_BULK"); + } + + if ((dbenv = db_init("EX_BULK", "ex_bulk", cache)) == NULL) + return (-1); + if (init) + exit(0); + + /* Create and initialize database object, open the database. */ + if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_create: %s\n", progname, db_strerror(ret)); + exit(EXIT_FAILURE); + } + dbp->set_errfile(dbp, stderr); + dbp->set_errpfx(dbp, progname); + if ((ret = dbp->set_bt_compare(dbp, compare_int)) != 0) { + dbp->err(dbp, ret, "set_bt_compare"); + goto err; + } + if ((ret = dbp->set_pagesize(dbp, pagesize)) != 0) { + dbp->err(dbp, ret, "set_pagesize"); + goto err; + } + if (dups && (ret = dbp->set_flags(dbp, DB_DUP)) != 0) { + dbp->err(dbp, ret, "set_flags"); + goto err; + } + + if ((ret = dbenv->txn_begin(dbenv, NULL, &txnp, 0)) != 0) + goto err; + + if ((ret = dbp->open(dbp, txnp, DATABASE, "primary", DB_BTREE, + DB_CREATE , 0664)) != 0) { + dbp->err(dbp, ret, "%s: open", DATABASE); + if (txnp != NULL) + (void)txnp->abort(txnp); + goto err; + } + + if (sflag) { + /* + * Open secondary database. The keys in secondary database + * are the first charactor in str of struct data in data + * field of primary database. + */ + if ((ret = db_create(&sdbp, dbenv, 0)) != 0) { + fprintf(stderr, "%s: db_create: %s\n", + progname, db_strerror(ret)); + exit(EXIT_FAILURE); + } + if ((ret = sdbp->set_flags(sdbp, DB_DUPSORT)) != 0) { + sdbp->err(sdbp, ret, "set_flags"); + goto err; + } + if ((ret = sdbp->open(sdbp, txnp, DATABASE, "secondary", + DB_BTREE, DB_CREATE, 0664)) != 0) { + sdbp->err(sdbp, ret, "%s: secondary open", DATABASE); + if (txnp != NULL) + (void)txnp->abort(txnp); + goto err; + } + if ((ret = dbp->associate(dbp, txnp, sdbp, get_first_str, + 0)) != 0) { + dbp->err(dbp, ret, "%s: associate", DATABASE); + if (txnp != NULL) + (void)txnp->abort(txnp); + goto err; + } + } + + if ((ret = txnp->commit(txnp, 0)) != 0) + goto err; + txnp = NULL; + + if (rflag) { + /* Time the get loop. */ + (void)gettimeofday(&start_time, NULL); + if ((ret = bulk_get( + dbenv, dbp, num, dups, iter, &count, verbose)) != 0) + goto err; + (void)gettimeofday(&end_time, NULL); + secs = + (((double)end_time.tv_sec * 1000000 + + end_time.tv_usec) - + ((double)start_time.tv_sec * 1000000 + + start_time.tv_usec)) / 1000000; + printf("[STAT] Read %d records read using %d batches", + count, iter); + printf(" in %.2f seconds: ", secs); + printf("%.0f records/second\n", (double)count / secs); + } else { + /* Time the fill loop. */ + (void)gettimeofday(&start_time, NULL); + if ((ret = bulk_fill(dbenv, dbp, num, dups, &count, + &biter, verbose)) != 0) + goto err; + (void)gettimeofday(&end_time, NULL); + secs = (((double)end_time.tv_sec * 1000000 + + end_time.tv_usec) - + ((double)start_time.tv_sec * 1000000 + + start_time.tv_usec)) / 1000000; + printf("[STAT] Insert %d records using %d batches", + count, biter); + printf(" in %.2f seconds: ", secs); + printf("%.0f records/second\n", (double)count / secs); + + if (delete) { + if (sflag) { + pair = rand() % 2; + /* Time the delete loop in secondary db */ + (void)gettimeofday(&start_time, NULL); + if ((ret = bulk_delete_sec(dbenv, sdbp, num, + pair, &count, &iter, verbose)) != 0) + goto err; + (void)gettimeofday(&end_time, NULL); + secs = (((double)end_time.tv_sec * 1000000 + + end_time.tv_usec) - + ((double)start_time.tv_sec * 1000000 + + start_time.tv_usec)) / 1000000; + printf("[STAT] Delete %d %s using %d batches", + count, (pair) ? "records" : "keys", iter); + printf(" in %.2f seconds: ", secs); + printf("%.0f records/second\n", + (double)count / secs); + } else { + /* Time the delete loop in primary db */ + (void)gettimeofday(&start_time, NULL); + if ((ret = bulk_delete(dbenv, dbp, num, dups, + &count, &iter, verbose)) != 0) + goto err; + (void)gettimeofday(&end_time, NULL); + secs = (((double)end_time.tv_sec * 1000000 + + end_time.tv_usec) - + ((double)start_time.tv_sec * 1000000 + + start_time.tv_usec)) / 1000000; + printf( +"[STAT] Delete %d records using %d batches", + count, iter); + printf(" in %.2f seconds: ", secs); + printf("%.0f records/second\n", + (double)count / secs); + } + } + } + + /* Close everything down. */ + if (sflag) + if ((ret = sdbp->close(sdbp, rflag ? DB_NOSYNC : 0)) != 0) { + fprintf(stderr, "%s: DB->close: %s\n", + progname, db_strerror(ret)); + return (1); + } + + if ((ret = dbp->close(dbp, rflag ? DB_NOSYNC : 0)) != 0) { + fprintf(stderr, + "%s: DB->close: %s\n", progname, db_strerror(ret)); + return (1); + } + return (ret); + +err: + if (sflag) + (void)sdbp->close(sdbp, 0); + (void)dbp->close(dbp, 0); + return (1); +} + +/* + * bulk_delete - bulk_delete from a db + * Since we open/created the db with transactions, we need to delete + * from it with transactions. We'll bundle the deletes UPDATES_PER_BULK_PUT + * to a transaction. + */ +int +bulk_delete(dbenv, dbp, num, dups, countp, iterp, verbose) + DB_ENV *dbenv; + DB *dbp; + int num, dups, verbose; + int *countp, *iterp; +{ + DBT key; + DB_TXN *txnp; + struct data *data_val; + u_int32_t flag; + int count, i, j, iter, ret; + void *ptrk; + + txnp = NULL; + count = flag = iter = ret = 0; + memset(&key, 0, sizeof(DBT)); + + j = rand() % num; + + /* + * Need to account for proper buffer size, the buffer must be at + * least as large as the page size of the underlying database, + * aligned for unsigned integer access, and be a multiple of 1024 + * bytes in size. + */ + key.ulen = (u_int32_t)UPDATES_PER_BULK_PUT * + (sizeof(u_int32_t) + DATALEN) * 1024; + key.flags = DB_DBT_USERMEM | DB_DBT_BULK; + key.data = malloc(key.ulen); + memset(key.data, 0, key.ulen); + data_val = malloc(DATALEN); + memset(data_val, 0, DATALEN); + + /* + * If DB_MULTIPLE, delete all records with a specified set of keys + * in a DBT. The DBT is constructed by DB_MULTIPLE_WRITE_NEXT. If + * DB_MULTIPLE_KEY, delete a specific set of key/data pairs in the + * DBT constructed by DB_MULTIPLE_KEY_WRITE_NEXT. Here, delete + * keys before the random key, if there are duplicate records, + * delete duplicate pairs with DB_MULTIPLE_KEY, unless, delete keys + * with DB_MULTIPLE. + */ + flag |= (dups) ? DB_MULTIPLE : DB_MULTIPLE_KEY; + DB_MULTIPLE_WRITE_INIT(ptrk, &key); + for (i = 0; i < j; i++) { + if (i % UPDATES_PER_BULK_PUT == 0) { + if (txnp != NULL) { + if ((ret = txnp->commit(txnp, 0)) != 0) + goto err; + txnp = NULL; + } + if ((ret = + dbenv->txn_begin(dbenv, NULL, &txnp, 0)) != 0) + goto err; + } + + if (dups) { + DB_MULTIPLE_WRITE_NEXT(ptrk, &key, &i, sizeof(i)); + assert(ptrk != NULL); + count++; + if (verbose) + printf("Delete key: %d\n", i); + } else { + data_val->id = 0; + do { + get_string(tstring, data_val->str, i); + DB_MULTIPLE_KEY_WRITE_NEXT(ptrk, &key, &i, + sizeof(i), data_val, DATALEN); + assert(ptrk != NULL); + count++; + if (verbose) + printf( +"Delete key: %d, \tdata: (id %d, str %s)\n", + i, data_val->id, data_val->str); + } while (++data_val->id < dups); + } + + if ((i + 1) % UPDATES_PER_BULK_PUT == 0) { + switch (ret = dbp->del(dbp, txnp, &key, flag)) { + case 0: + iter++; + DB_MULTIPLE_WRITE_INIT(ptrk, &key); + break; + default: + dbp->err(dbp, ret, "Bulk DB->del"); + goto err; + } + } + } + if ((j % UPDATES_PER_BULK_PUT) != 0) { + switch (ret = dbp->del(dbp, txnp, &key, flag)) { + case 0: + iter++; + break; + default: + dbp->err(dbp, ret, "Bulk DB->del"); + goto err; + } + } + + if (txnp != NULL) { + ret = txnp->commit(txnp, 0); + txnp = NULL; + } + + *countp = count; + *iterp = iter; + +err: if (txnp != NULL) + (void)txnp->abort(txnp); + free(data_val); + free(key.data); + return (ret); +} + +/* + * bulk_delete_sec - bulk_delete_sec from a secondary db + */ +int +bulk_delete_sec(dbenv, dbp, num, pair, countp, iterp, verbose) + DB_ENV *dbenv; + DB *dbp; + int num, verbose, pair; + int *countp, *iterp; +{ + DBT key; + DB_TXN *txnp; + u_int32_t flag; + int count, i, iter, j, k, rc, ret; + void *ptrk; + char ch; + + txnp = NULL; + count = flag = iter = ret = 0; + memset(&key, 0, sizeof(DBT)); + rc = rand() % (STRLEN - 1); + + /* + * Need to account for proper buffer size, the buffer must be at + * least as large as the page size of the underlying database, + * aligned for unsigned integer access, and be a multiple of + * 1024 bytes in size. + */ + key.ulen = (u_int32_t)UPDATES_PER_BULK_PUT * + (sizeof(u_int32_t) + DATALEN) * 1024; + key.flags = DB_DBT_USERMEM | DB_DBT_BULK; + key.data = malloc(key.ulen); + memset(key.data, 0, key.ulen); + + /* + * Bulk delete records from a random key, which is one of charaters + * in tstring. If DB_MULTIPLE, delete all characters before the random + * key in the tstring. If DB_MULTIPLE_KEY, get duplicate data to the + * specified keys and delete key/data pairs. + */ + flag |= (pair) ? DB_MULTIPLE_KEY : DB_MULTIPLE; + DB_MULTIPLE_WRITE_INIT(ptrk, &key); + for (i = 0; i <= rc; i++) { + if (i % UPDATES_PER_BULK_PUT == 0) { + if (txnp != NULL) { + if ((ret = txnp->commit(txnp, 0)) != 0) + goto err; + txnp = NULL; + } + if ((ret = dbenv->txn_begin( + dbenv, NULL, &txnp, 0)) != 0) + goto err; + } + + ch = tstring[i]; + if (!pair) { + DB_MULTIPLE_WRITE_NEXT(ptrk, &key, &ch, sizeof(ch)); + assert(ptrk != NULL); + count++; + if (verbose) + printf("Delete key: %c\n", ch); + } else { + j = 0; + do { + k = j * (STRLEN - 1) + i; + DB_MULTIPLE_KEY_WRITE_NEXT(ptrk, &key, &ch, + sizeof(ch), &k, sizeof(k)); + assert(ptrk != NULL); + count++; + if (verbose) + printf( +"Delete secondary key: %c, \tdata: %d\n", + ch, k); + } while (++j < (int)(num / (STRLEN - 1))); + } + + if ((i + 1) % UPDATES_PER_BULK_PUT == 0) { + switch (ret = dbp->del(dbp, txnp, &key, flag)) { + case 0: + iter++; + DB_MULTIPLE_WRITE_INIT(ptrk, &key); + break; + default: + dbp->err(dbp, ret, "Bulk DB->del"); + goto err; + } + } + } + if ((rc % UPDATES_PER_BULK_PUT) != 0) { + switch (ret = dbp->del(dbp, txnp, &key, flag)) { + case 0: + iter++; + break; + default: + dbp->err(dbp, ret, "Bulk DB->del"); + goto err; + } + } + + if (txnp != NULL) { + ret = txnp->commit(txnp, 0); + txnp = NULL; + } + + *countp = count; + *iterp = iter; + +err: if (txnp != NULL) + (void)txnp->abort(txnp); + free(key.data); + return (ret); +} + +/* + * bulk_fill - bulk_fill a db + * Since we open/created the db with transactions, we need to populate + * it with transactions. We'll bundle the puts UPDATES_PER_BULK_PUT to a + * transaction. + */ +int +bulk_fill(dbenv, dbp, num, dups, countp, iterp, verbose) + DB_ENV *dbenv; + DB *dbp; + int num, dups, verbose; + int *countp, *iterp; +{ + DBT key, data; + u_int32_t flag; + DB_TXN *txnp; + struct data *data_val; + int count, i, iter, ret; + void *ptrk, *ptrd; + + txnp = NULL; + count = flag = iter = ret = 0; + ptrk = ptrd = NULL; + data_val = malloc(DATALEN); + memset(data_val, 0, DATALEN); + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + /* + * Need to account for proper buffer size, + * The buffer must be at least as large as the page size of + * the underlying database, aligned for unsigned integer + * access, and be a multiple of 1024 bytes in size. + */ + key.ulen = (u_int32_t) UPDATES_PER_BULK_PUT * + (sizeof(u_int32_t) + DATALEN) * 1024; + key.flags = DB_DBT_USERMEM | DB_DBT_BULK; + key.data = malloc(key.ulen); + memset(key.data, 0, key.ulen); + + data.ulen = (u_int32_t)UPDATES_PER_BULK_PUT * + (u_int32_t)DATALEN * 1024; + data.flags = DB_DBT_USERMEM | DB_DBT_BULK; + data.data = malloc(data.ulen); + memset(data.data, 0, data.ulen); + + /* + * We could bulk insert with either DB_MULTIPLE in two buffers or + * DB_MULTIPLE_KEY in one buffer. With DB_MULTIPLE, all keys are + * constructed in key DBT, all data is constructed in data DBT. + * With DB_MULTIPLE_KEY, all key/data pairs are constructed in + * the key DBT. Here, we use DB_MULTIPLE mode when there are + * duplicate records. + */ + flag |= (dups) ? DB_MULTIPLE : DB_MULTIPLE_KEY; + DB_MULTIPLE_WRITE_INIT(ptrk, &key); + if (dups) + DB_MULTIPLE_WRITE_INIT(ptrd, &data); + for (i = 0; i < num; i++) { + if (i % UPDATES_PER_BULK_PUT == 0) { + if (txnp != NULL) { + if ((ret = txnp->commit(txnp, 0)) != 0) + goto err; + txnp = NULL; + } + if ((ret = + dbenv->txn_begin(dbenv, NULL, &txnp, 0)) != 0) + goto err; + } + data_val->id = 0; + do { + get_string(tstring, data_val->str, i); + if (dups) { + DB_MULTIPLE_WRITE_NEXT(ptrk, &key, + &i, sizeof(i)); + assert(ptrk != NULL); + DB_MULTIPLE_WRITE_NEXT(ptrd, &data, + data_val, DATALEN); + assert(ptrd != NULL); + } else { + DB_MULTIPLE_KEY_WRITE_NEXT(ptrk, + &key, &i, sizeof(i), data_val, DATALEN); + assert(ptrk != NULL); + } + if (verbose) + printf( +"Insert key: %d, \t data: (id %d, str %s)\n", + i, data_val->id, data_val->str); + count++; + } while (++data_val->id < dups); + if ((i + 1) % UPDATES_PER_BULK_PUT == 0) { + switch (ret = dbp->put(dbp, txnp, &key, &data, flag)) { + case 0: + iter++; + DB_MULTIPLE_WRITE_INIT(ptrk, &key); + if (dups) + DB_MULTIPLE_WRITE_INIT(ptrd, &data); + break; + default: + dbp->err(dbp, ret, "Bulk DB->put"); + goto err; + } + } + } + if ((num % UPDATES_PER_BULK_PUT) != 0) { + switch (ret = dbp->put(dbp, txnp, &key, &data, flag)) { + case 0: + iter++; + break; + default: + dbp->err(dbp, ret, "Bulk DB->put"); + goto err; + } + } + + if (txnp != NULL) { + ret = txnp->commit(txnp, 0); + txnp = NULL; + } + + *countp = count; + *iterp = iter; + +err: + if (txnp != NULL) + (void)txnp->abort(txnp); + free(key.data); + free(data.data); + free(data_val); + return (ret); +} + +/* + * bulk_get -- loop getting batches of records. + */ +int +bulk_get(dbenv, dbp, num, dups, iter, countp, verbose) + DB_ENV *dbenv; + DB *dbp; + int num, dups, iter, *countp, verbose; +{ + DBC *dbcp; + DBT key, data; + DB_TXN *txnp; + u_int32_t flags, len, klen; + int count, i, j, ret; + void *pointer, *dp, *kp; + + klen = count = ret = 0; + dbcp = NULL; + dp = kp = pointer = NULL; + txnp = NULL; + + /* Initialize key DBT and data DBT, malloc bulk buffer. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.data = &j; + key.size = sizeof(j); + data.flags = DB_DBT_USERMEM; + data.data = malloc(DATALEN * 16 * 1024); + memset(data.data, 0, DATALEN * 16 * 1024); + data.ulen = data.size = DATALEN * 16 * 1024; + + flags = DB_SET; + flags |= (dups) ? DB_MULTIPLE: DB_MULTIPLE_KEY; + for (i = 0; i < iter; i++) { + if ((ret = + dbenv->txn_begin(dbenv, NULL, &txnp, 0)) != 0) + goto err; + if ((ret = dbp->cursor(dbp, txnp, &dbcp, 0)) != 0) + goto err; + + /* + * Bulk retrieve from a random key. Use DB_MULTIPLE_INIT to + * initialize variables for bulk retrieval. If there are + * duplicate records, use DB_MULTIPLE_NEXT to iterate them + * in the buffer. Unless, use DB_MULTIPLE_KEY_NEXT to iterate + * records after the random key. + */ + j = rand() % num; + if ((ret = dbcp->get(dbcp, &key, &data, flags)) != 0) + goto err; + DB_MULTIPLE_INIT(pointer, &data); + if (dups) + while (pointer) { + DB_MULTIPLE_NEXT(pointer, &data, dp, len); + if (dp) { + count++; + if (verbose) + printf( +"Retrieve key: %d, \tdata: (id %d, str %s)\n", + j, ((struct data *)(dp))->id, + (char *)((struct data *)(dp))->str); + } + } + else + while (pointer) { + DB_MULTIPLE_KEY_NEXT(pointer, + &data, kp, klen, dp, len); + if (kp) { + count++; + if (verbose) + printf( +"Retrieve key: %d, \tdata: (id %d, str %s)\n", + *((int *)kp), ((struct data *)(dp))->id, + (char *)((struct data *)(dp))->str); + } + } + + if ((ret = dbcp->close(dbcp)) != 0) + goto err; + if ((ret = txnp->commit(txnp, 0)) != 0) + goto err; + } + + *countp = count; + +err: + if (ret != 0) + dbp->err(dbp, ret, "get"); + free(data.data); + return (ret); +} + +int +compare_int(dbp, a, b) + DB *dbp; + const DBT *a, *b; +{ + int ai, bi; + + dbp = NULL; + + /* + * Returns: + * < 0 if a < b + * = 0 if a = b + * > 0 if a > b + */ + memcpy(&ai, a->data, sizeof(int)); + memcpy(&bi, b->data, sizeof(int)); + return (ai - bi); +} + +/* + * db_init -- + * Initialize the environment. + */ +DB_ENV * +db_init(home, prefix, cachesize) + char *home, *prefix; + u_int cachesize; +{ + DB_ENV *dbenv; + int ret; + + if ((ret = db_env_create(&dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_env_create"); + return (NULL); + } + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, prefix); + (void)dbenv->set_cachesize(dbenv, 0, + cachesize == 0 ? 50 * 1024 * 1024 : (u_int32_t)cachesize, 0); + + if ((ret = dbenv->open(dbenv, home, DB_CREATE | DB_INIT_MPOOL | + DB_INIT_TXN | DB_INIT_LOCK, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->open: %s", home); + (void)dbenv->close(dbenv, 0); + return (NULL); + } + return (dbenv); +} + +int +get_first_str(sdbp, key, data, skey) + DB *sdbp; + const DBT *key; + const DBT *data; + DBT *skey; +{ + sdbp = NULL; + key = NULL; + + memset(skey, 0, sizeof(DBT)); + skey->data = ((struct data *)(data->data))->str; + skey->size = sizeof(char); + return (0); +} + +int +get_string(src, des, off) + const char *src; + char *des; + int off; +{ + int i; + + for (i = 0; i < (int)(STRLEN - 1); i++) + des[i] = src[(off + i) % (STRLEN - 1)]; + des[STRLEN - 1] = '\0'; + return (0); +} + +void +usage() +{ + (void)fprintf(stderr, +"Usage: %s \n\ + -c cachesize [1000 * pagesize] \n\ + -d number of duplicates [none] \n\ + -i number of read iterations [1000000] \n\ + -n number of keys [1000000] \n\ + -p pagesize [65536] \n\ + -v verbose output \n\ + -D perform bulk delete \n\ + -I just initialize the environment \n\ + -R perform bulk read \n\ + -S perform bulk read in secondary database \n\ + -U perform bulk update \n", + progname); + exit(EXIT_FAILURE); +} diff --git a/examples/c/ex_env.c b/examples/c/ex_env.c new file mode 100644 index 00000000..2bdc79c5 --- /dev/null +++ b/examples/c/ex_env.c @@ -0,0 +1,183 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include + +#ifdef _WIN32 +extern int getopt(int, char * const *, const char *); +#else +#include +#endif + +#include + +int db_setup __P((const char *, const char *, FILE *, const char *)); +int db_teardown __P((const char *, const char *, FILE *, const char *)); +static int usage __P((void)); + +const char *progname = "ex_env"; /* Program name. */ + +/* + * An example of a program creating/configuring a Berkeley DB environment. + */ +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + const char *data_dir, *home; + + int ch; + /* + * All of the shared database files live in home, but + * data files will live in data_dir. + */ + home = "TESTDIR"; + data_dir = "data"; + while ((ch = getopt(argc, argv, "h:d:")) != EOF) + switch (ch) { + case 'h': + home = optarg; + break; + case 'd': + data_dir = optarg; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc != 0) + return (usage()); + + printf("Setup env\n"); + if (db_setup(home, data_dir, stderr, progname) != 0) + return (EXIT_FAILURE); + + printf("Teardown env\n"); + if (db_teardown(home, data_dir, stderr, progname) != 0) + return (EXIT_FAILURE); + + return (EXIT_SUCCESS); +} + +int +db_setup(home, data_dir, errfp, progname) + const char *home, *data_dir, *progname; + FILE *errfp; +{ + DB_ENV *dbenv; + DB *dbp; + int ret; + + /* + * Create an environment object and initialize it for error + * reporting. + */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(errfp, "%s: %s\n", progname, db_strerror(ret)); + return (1); + } + dbenv->set_errfile(dbenv, errfp); + dbenv->set_errpfx(dbenv, progname); + + /* + * We want to specify the shared memory buffer pool cachesize, + * but everything else is the default. + */ + if ((ret = dbenv->set_cachesize(dbenv, 0, 64 * 1024, 0)) != 0) { + dbenv->err(dbenv, ret, "set_cachesize"); + dbenv->close(dbenv, 0); + return (1); + } + + /* Databases are in a subdirectory. */ + (void)dbenv->set_data_dir(dbenv, data_dir); + + /* Open the environment with full transactional support. */ + if ((ret = dbenv->open(dbenv, home, + DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | + DB_INIT_TXN, 0644)) != 0) { + dbenv->err(dbenv, ret, "environment open: %s", home); + dbenv->close(dbenv, 0); + return (1); + } + + /* + * Open a database in the environment to verify the data_dir + * has been set correctly. + * Create a database object and initialize it for error + * reporting. + */ + if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + fprintf(errfp, "%s: %s\n", progname, db_strerror(ret)); + return (1); + } + + /* Open a database with DB_BTREE access method. */ + if ((ret = dbp->open(dbp, NULL, "exenv_db1.db", NULL, + DB_BTREE, DB_CREATE,0644)) != 0) { + fprintf(stderr, "database open: %s\n", db_strerror(ret)); + return (1); + } + + /* Close the database handle. */ + if ((ret = dbp->close(dbp, 0)) != 0) { + fprintf(stderr, "database close: %s\n", db_strerror(ret)); + return (1); + } + + /* Close the environment handle. */ + if ((ret = dbenv->close(dbenv, 0)) != 0) { + fprintf(stderr, "DB_ENV->close: %s\n", db_strerror(ret)); + return (1); + } + return (0); +} + +int +db_teardown(home, data_dir, errfp, progname) + const char *home, *data_dir, *progname; + FILE *errfp; +{ + DB_ENV *dbenv; + int ret; + + /* Remove the shared database regions. */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(errfp, "%s: %s\n", progname, db_strerror(ret)); + return (1); + } + dbenv->set_errfile(dbenv, errfp); + dbenv->set_errpfx(dbenv, progname); + + (void)dbenv->set_data_dir(dbenv, data_dir); + + /* Remove the environment. */ + if ((ret = dbenv->remove(dbenv, home, 0)) != 0) { + fprintf(stderr, "DB_ENV->remove: %s\n", db_strerror(ret)); + return (1); + } + return (0); +} + +static int +usage() +{ + (void)fprintf(stderr, + "usage: %s [-h home] [-d data_dir]\n", progname); + return (EXIT_FAILURE); +} diff --git a/examples/c/ex_heap.c b/examples/c/ex_heap.c new file mode 100644 index 00000000..a6711408 --- /dev/null +++ b/examples/c/ex_heap.c @@ -0,0 +1,640 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + * + * This program demonstrates: + * 1. Usage of heap access method. + * 2. Differences between the heap and btree access methods. + * + * The application initially populates a database, and then proceeds to + * move into a process of adding and removing data. Keeping a fairly + * constant amount of data in the database. The heap access method will + * maintain a constant database size if the heap size is configured properly, + * while the btree database will continue to grow. + */ + +#include +#include +#include + +#include "db.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +#define BUFFER_LEN 30 /* Buffer size to hold data */ +#define NS_PER_MS 1000000/* Nanoseconds in a millisecond*/ +#define NS_PER_US 1000 /* Nanoseconds in a microsecond*/ +#define DEF_INIT_RECS 10000 /* Default initial records */ +#define DEF_RECS_PER_REP 10000 /* Default records per repeat. */ +#define DEF_REPEATS 1 /* Default repetition value */ + +/* + * Average space each record needs, based on the data generated. + * The ideal heap size for this example should be set as (bytes): + * AVG_SPACE_PER_RECORD * (DEF_INIT_RECS + records inserted each repetition) + */ +#define AVG_SPACE_PER_RECORD 36 + +#ifdef _WIN32 +#include +#include + +extern int getopt(int, char * const *, const char *); + +/* Implement a basic high resource timer with a POSIX interface for Windows.*/ +struct timeval { + time_t tv_sec; + long tv_usec; +}; + +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + struct _timeb now; + _ftime(&now); + tv->tv_sec = now.time; + tv->tv_usec = now.millitm * NS_PER_US; + return (0); +} +#else +#include +#include +#endif + +int compare_int(DB *, const DBT *, const DBT *); +int delete_recs __P((DB *, DB_ENV *, int)); +int file_size __P((DB *, DBTYPE, int *)); +int generate_data __P((char [], int, int)); +int insert_btree __P((DB *, DB_ENV *, int, int, int)); +int insert_heap __P((DB *, DB_ENV *, int, int, int)); +int open_db __P(( + DB **, DB_ENV *, DBTYPE, char *, u_int32_t, u_int32_t, u_int32_t)); +int open_env __P((DB_ENV **, char *, u_int32_t)); +int run_workload __P((DB *, int, int, int)); +void usage __P((void)); + +const char *progname = "ex_heap"; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + DB_ENV *dbenv; + DB *dbp; + u_int32_t cachesize, ghpsize, hpsize, pgsize; + char *home; + int ch, ret, set_ghpsize, set_hpsize, test_btree, test_var_data; + int recs_per_rep, repeats; + + dbenv = NULL; + dbp = NULL; + cachesize = 0; + ret = set_ghpsize = set_hpsize = test_btree = 0; + home = NULL; + + recs_per_rep = DEF_RECS_PER_REP; + ghpsize = hpsize = pgsize = 0; + repeats = DEF_REPEATS; + test_var_data = 0; /* Default as fix-length data. */ + + while ((ch = getopt(argc, argv, "bc:dh:n:p:r:S:s:")) != EOF) + switch (ch) { + case 'b': + test_btree = 1; + break; + case 'c': + cachesize = atoi(optarg); + break; + case 'd': + test_var_data = 1; + break; + case 'h': + home = optarg; + break; + case 'n': + recs_per_rep = atoi(optarg); + break; + case 'p': + pgsize = atoi(optarg); + break; + case 'r': + repeats = atoi(optarg); + break; + case 's': + set_hpsize = 1; + hpsize = atoi(optarg); + break; + case 'S': + set_ghpsize = 1; + ghpsize = atoi(optarg); + break; + default: + usage(); + } + + if (!home) + usage(); + + srand((int)time(NULL)); + + /* + * If heap size is not specified, then use our default configuration + * as follows. + */ + if (!set_hpsize && !set_ghpsize) + hpsize = AVG_SPACE_PER_RECORD * (DEF_INIT_RECS + recs_per_rep); + + if ((ret = open_env(&dbenv, home, cachesize)) != 0) { + fprintf(stderr, "%s: open_env: %s", progname, db_strerror(ret)); + goto err; + } + + if ((ret = open_db(&dbp, dbenv, DB_HEAP, home, + ghpsize, hpsize, pgsize)) != 0) { + dbenv->err(dbenv, ret, "Failed to open heap database."); + goto err; + } + + /* + * Perform requested rounds of insert/delete operations + * using heap database. + */ + if ((ret = + run_workload(dbp, repeats, recs_per_rep, test_var_data)) != 0) { + dbenv->err(dbenv, ret, + "Failed to perform operations on heap database."); + goto err; + } + + if (test_btree) { + /* Close the DB handle for heap. */ + if ((ret = dbp->close(dbp, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->close"); + goto err; + } + dbp = NULL; + + if ((ret = + open_db(&dbp, dbenv, DB_BTREE, home, 0, 0, pgsize)) != 0) { + dbenv->err(dbenv, ret, + "Failed to open btree database."); + goto err; + } + + /* + * Perform requested rounds of insert/delete operations + * using btree database. + */ + if ((ret = run_workload(dbp, + repeats, recs_per_rep, test_var_data)) != 0) { + dbenv->err(dbenv, ret, + "Failed to perform operations on btree database."); + goto err; + } + } +err: + if (dbp != NULL && (ret = dbp->close(dbp, 0)) != 0) + dbenv->err(dbenv, ret, "DB->close"); + dbp = NULL; + + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) { + fprintf(stderr, "%s: dbenv->close: %s", progname, + db_strerror(ret)); + } + + return (ret); +} + +int +run_workload(dbp, repeats, recs_per_rep, test_var) + DB *dbp; + int repeats, recs_per_rep, test_var; +{ + DB_ENV *dbenv; + DBTYPE dbtype; + u_int32_t ghpsize, hpsize; + struct timeval end_time, start_time; + double *time_secs; + int *db_file_sizes, fsize, i, ret; + + dbenv = dbp->dbenv; + fsize = 0; + time_secs = NULL; + db_file_sizes = NULL; + + if ((ret = dbp->get_type(dbp, &dbtype)) != 0) { + dbenv->err(dbenv, ret, "DB->get_type"); + goto err; + } + + if (dbtype == DB_HEAP && + (ret = dbp->get_heapsize(dbp, &ghpsize, &hpsize)) != 0) { + dbenv->err(dbenv, ret, "DB->get_heapsize"); + goto err; + } + + /* An array to record the physical database file size. */ + if ((db_file_sizes = + (int *)malloc((repeats + 1) * sizeof(int))) == NULL) { + fprintf(stderr, + "%s: Unable to allocate space for array db_file_sizes.\n", + progname); + goto err; + } + memset(db_file_sizes, 0, (repeats + 1) * sizeof(int)); + + /* An array to record the running time for each repetition. */ + if ((time_secs = + (double *)malloc((repeats + 1) * sizeof(double))) == NULL) { + fprintf(stderr, + "%s: Unable to allocate space for array time_secs.\n", + progname); + goto err; + } + memset(time_secs, 0, (repeats + 1) * sizeof(double)); + + printf("\n\n======================================================"); + printf("\nAbout to enter the insert phase."); + printf("\n\tDatabase type: %s \t", + dbtype == DB_HEAP ? "Heap" : "Btree"); + if (dbtype == DB_HEAP) + printf("with configured heapsize = %d gbytes and %d bytes.", + ghpsize, hpsize); + printf("\n\tPagesize: %d", dbp->pgsize); + printf("\n\tInitial records number: %d", DEF_INIT_RECS); + printf("\n\tNumber of repetitions: %d", repeats); + printf("\n\tNumber of inserts per repetition: %d\n", recs_per_rep); + + /* + * Insert records to the database and delete the same number from + * the database, then check the change of the physical database file. + * + * Don't delete after the first insertion to leave some data + * in the tables for subsequent iterations. + */ + for (i = 0; i <= repeats; i++) { + /* Time for each loop. */ + (void)gettimeofday(&start_time, NULL); + + if ((dbtype == DB_HEAP) && (ret = insert_heap(dbp, dbenv, + i == 0 ? DEF_INIT_RECS : recs_per_rep, + i == 0 ? 0 : (DEF_INIT_RECS + (i - 1) * recs_per_rep), + test_var)) != 0) { + dbenv->err(dbenv, ret, + "Failed to insert records to heap database."); + goto err; + } + + if ((dbtype == DB_BTREE) && (ret = insert_btree(dbp, dbenv, + i == 0 ? DEF_INIT_RECS : recs_per_rep, + i == 0 ? 0 : (DEF_INIT_RECS + (i - 1) * recs_per_rep), + test_var)) != 0) { + dbenv->err(dbenv, ret, + "Failed to insert records to btree database."); + goto err; + } + + if (i > 0 && + (ret = delete_recs(dbp, dbenv, recs_per_rep)) != 0) { + dbenv->err(dbenv, ret, "Failed to delete records."); + goto err; + } + + (void)gettimeofday(&end_time, NULL); + time_secs[i] = + (((double)end_time.tv_sec * NS_PER_MS + + end_time.tv_usec) - + ((double)start_time.tv_sec * NS_PER_MS + + start_time.tv_usec)) / NS_PER_MS; + + /* Calculate the physical file size for each repetition. */ + if ((ret = file_size(dbp, dbtype, &fsize)) != 0) { + dbenv->err(dbenv, ret, "Failed to calculate " + "the file size on repeat %d.\n", i); + goto err; + } + db_file_sizes[i] = fsize; + } + printf("\n------------------------------------------------------\n"); + printf("%5s \t %10s \t %10s\n", "repetition", "physical file size", + "running time"); + for (i = 0; i <= repeats; i++) + printf("%5d \t\t %10d \t\t %.2f seconds\n", + i, db_file_sizes[i], time_secs[i]); + +err: + if (db_file_sizes != NULL) + free(db_file_sizes); + if (time_secs != NULL) + free(time_secs); + + return (ret); +} + +/* Calculate the size of the given database. */ +int +file_size(dbp, dbtype, fsize) + DB *dbp; + DBTYPE dbtype; + int *fsize; +{ + DB_ENV *dbenv; + u_int32_t pgcnt, pgsize; + int ret, size; + void *statp; + + dbenv = dbp->dbenv; + pgsize = dbp->pgsize; + ret = size = 0; + + if ((ret = dbp->stat(dbp, NULL, &statp, DB_FAST_STAT)) != 0) { + dbenv->err(dbenv, ret, "DB->stat"); + return (ret); + } + + pgcnt = (dbtype == DB_HEAP ? ((DB_HEAP_STAT *)statp)->heap_pagecnt : + ((DB_BTREE_STAT *)statp)->bt_pagecnt); + + size = pgcnt * pgsize; + *fsize = size; + + free(statp); + + return (ret); +} + +/* + * Insert an certain number of records to heap database, + * with the key beginning with a specified value. + */ +int +insert_heap(dbp, dbenv, numrecs, start, test_var) + DB *dbp; + DB_ENV *dbenv; + int numrecs, start, test_var; +{ + DB_HEAP_RID rid; + DBT key, data; + char buf[BUFFER_LEN]; + int cnt, ret; + + memset(&rid, 0, sizeof(DB_HEAP_RID)); + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + ret = 0; + + key.data = &rid; + key.size = key.ulen = sizeof(DB_HEAP_RID); + key.flags = DB_DBT_USERMEM; + data.data = buf; + data.flags = DB_DBT_USERMEM; + + for (cnt = start; cnt < (numrecs + start) && + (ret = generate_data(buf, cnt, test_var)) == 0; ++cnt) { + data.size = data.ulen = (u_int32_t)strlen(buf) + 1; + + /* Require DB_APPEND flag to add new data to the database.*/ + if ((ret = dbp->put(dbp, NULL, &key, &data, DB_APPEND)) != 0) { + dbenv->err(dbenv, ret, "insert_heap:DB->put"); + break; + } + } + + return (ret); +} + +/* + * Insert an certain number of records to btree database, + * with the key beginning with a specified value. + */ +int +insert_btree(dbp, dbenv, numrecs, start, test_var) + DB *dbp; + DB_ENV *dbenv; + int numrecs, start, test_var; +{ + DBT key, data; + char buf[BUFFER_LEN]; + int cnt, ret; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + ret = 0; + + key.data = &cnt; + key.size = key.ulen = sizeof(int); + key.flags = DB_DBT_USERMEM; + data.data = buf; + data.flags = DB_DBT_USERMEM; + + for (cnt = start; cnt < (numrecs + start) && + (ret = generate_data(buf, cnt, test_var)) == 0; ++cnt) { + data.size = data.ulen = (u_int32_t)strlen(buf) + 1; + + if ((ret = dbp->put(dbp, NULL, &key, &data, 0)) != 0) { + dbenv->err(dbenv, ret, "insert_btree:DB->put"); + break; + } + } + + return (ret); +} + +/* Generate the data for the specified record. */ +int +generate_data(buf, rec_no, test_var) + char *buf; + int rec_no, test_var; +{ + const char *str = "abcdefghijklmnopqrst"; + size_t len = strlen(str); + + /* + * Default use the fix-length data, + * if required then use variable-length data. + */ + if (test_var == 1) + len = rand() % (len - 2) + 1; + + (void)sprintf(buf, "%04d_%*s", rec_no, len, str); + + return (0); +} + +/* Delete an certain number of records. */ +int +delete_recs(dbp, dbenv, numrecs) + DB *dbp; + DB_ENV *dbenv; + int numrecs; +{ + DBC *dbcp; + DBT key, data; + int cnt, ret; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + dbcp = NULL; + cnt = ret = 0; + + /* + * Delete from the first entry, get the first entry using + * the DBcursor, then delete it using the DB handle. + */ + if ((ret = dbp->cursor(dbp, NULL, &dbcp, 0)) != 0) { + dbenv->err(dbenv, ret, "delete_recs:DB->cursor"); + goto err; + } + + while ((ret = dbcp->get(dbcp, &key, &data, DB_NEXT)) == 0 && + cnt < numrecs) { + if ((ret = dbcp->del(dbcp, 0)) != 0) { + dbenv->err(dbenv, ret, "delete_recs:DBCursor->del"); + break; + } else + ++cnt; + } + +err: + if (dbcp != NULL && (ret = dbcp->close(dbcp)) != 0) + dbenv->err(dbenv, ret, "delete_recs:DBCursor->close"); + + return (ret); +} + +void +usage() +{ + fprintf(stderr, "usage: %s:\n%s \n %s\n", progname, + "\t[-b][-c cachesize][-d] -h home [-n recs_per_rep]", + "\t[-p pgsize][-r repeats][-S ghpsize][-s hpsize]"); + + fprintf(stderr, "-b: run sample application using a btree database.\n"); + fprintf(stderr, "-c: specify the cache size for the environment.\n"); + fprintf(stderr, "-d: test on variable-length data " + "(default: fix-length).\n"); + fprintf(stderr, "-h: specify the home directory for " + "the environment (required).\n"); + fprintf(stderr, "-n: specify the num. of records " + "per repetition (default: %d).\n", DEF_RECS_PER_REP); + fprintf(stderr, "-p: specify the pgsize of database.\n"); + fprintf(stderr, "-r: number of repetition (a pair of " + "insertion and deletion (default: %d)).\n", DEF_REPEATS); + fprintf(stderr, + "-S: specify the heap size (gbytes) for the heap database.\n"); + fprintf(stderr, + "-s: specify the heap size (bytes) for the heap database.\n"); + + exit(EXIT_FAILURE); +} + +int +open_env(dbenvp, home, cachesize) + DB_ENV **dbenvp; + char *home; + u_int32_t cachesize; +{ + DB_ENV *dbenv; + int ret = 0; + + /* Create an environment handle and open an environment. */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, "%s: db_env_create: %s\n", + progname, db_strerror(ret)); + return (ret); + } + + *dbenvp = dbenv; + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + if ((cachesize > 0) && (ret = + dbenv->set_cachesize(dbenv, (u_int32_t)0, cachesize, 1)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->set_cachesize"); + return (ret); + } + + if ((ret = dbenv->open(dbenv, home, DB_CREATE | DB_INIT_MPOOL, 0)) != 0) + dbenv->err(dbenv, ret, "DB_ENV->open"); + + return (ret); +} + +int +open_db(dbpp, dbenv, dbtype, home, ghpsize, hpsize, pgsize) + DB **dbpp; + DB_ENV *dbenv; + DBTYPE dbtype; + char *home; + u_int32_t ghpsize, hpsize, pgsize; +{ + DB *dbp; + u_int32_t dbflags = 0; + char *dbname; + int ret = 0; + + dbname = (dbtype == DB_HEAP) ? "heap.db" : "btree.db"; + + /* Create a database handle and open a database. */ + if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create : %s", dbname); + goto err; + } + + *dbpp = dbp; + + if ((dbtype == DB_BTREE) && + (ret = dbp->set_bt_compare(dbp, compare_int)) != 0) { + dbp->err(dbp, ret, "DB->set_bt_compare"); + goto err; + } + + if ((dbtype == DB_HEAP) && (ghpsize > 0 || hpsize > 0) && + (ret = dbp->set_heapsize(dbp, ghpsize, hpsize, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->set_heapsize"); + return (ret); + } + + if ((pgsize > 0) && (ret = dbp->set_pagesize(dbp, pgsize)) != 0) { + dbenv->err(dbenv, ret, "DB->set_pagesize"); + return (ret); + } + + if ((ret = + dbp->open(dbp, NULL, dbname, NULL, dbtype, DB_CREATE, 0)) != 0) + dbenv->err(dbenv, ret, "DB->open"); +err: + + return (ret); +} + +int +compare_int(dbp, a, b) + DB *dbp; + const DBT *a, *b; +{ + int ai, bi; + + dbp = NULL; + + /* + * Returns: + * < 0 if a < b + * = 0 if a = b + * > 0 if a > b + */ + memcpy(&ai, a->data, sizeof(int)); + memcpy(&bi, b->data, sizeof(int)); + return (ai - bi); +} diff --git a/examples/c/ex_lock.c b/examples/c/ex_lock.c new file mode 100644 index 00000000..83665398 --- /dev/null +++ b/examples/c/ex_lock.c @@ -0,0 +1,239 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include + +#ifdef _WIN32 +extern int getopt(int, char * const *, const char *); +#else +#include +#endif + +#include + +int db_init __P((const char *, u_int32_t, int)); +int main __P((int, char *[])); +int usage __P((void)); + +DB_ENV *dbenv; +const char + *progname = "ex_lock"; /* Program name. */ + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DBT lock_dbt; + DB_LOCK lock; + DB_LOCK *locks; + db_lockmode_t lock_type; + long held; + size_t len; + u_int32_t locker, maxlocks; + int ch, do_unlink, did_get, i, lockid, lockcount, ret; + const char *home; + char opbuf[16], objbuf[1024], lockbuf[16]; + + home = "TESTDIR"; + maxlocks = 0; + do_unlink = 0; + while ((ch = getopt(argc, argv, "h:m:u")) != EOF) + switch (ch) { + case 'h': + home = optarg; + break; + case 'm': + if ((i = atoi(optarg)) <= 0) + return (usage()); + maxlocks = (u_int32_t)i; /* XXX: possible overflow. */ + break; + case 'u': + do_unlink = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc != 0) + return (usage()); + + /* Initialize the database environment. */ + if ((ret = db_init(home, maxlocks, do_unlink)) != 0) + return (ret); + + locks = 0; + lockcount = 0; + + /* + * Accept lock requests. + */ + if ((ret = dbenv->lock_id(dbenv, &locker)) != 0) { + dbenv->err(dbenv, ret, "unable to get locker id"); + (void)dbenv->close(dbenv, 0); + return (EXIT_FAILURE); + } + lockid = -1; + + memset(&lock_dbt, 0, sizeof(lock_dbt)); + for (held = 0, did_get = 0;;) { + printf("Operation get/release [get]> "); + fflush(stdout); + if (fgets(opbuf, sizeof(opbuf), stdin) == NULL) + break; + if ((len = strlen(opbuf)) <= 1 || strcmp(opbuf, "get\n") == 0) { + /* Acquire a lock. */ + printf("input object (text string) to lock> "); + fflush(stdout); + if (fgets(objbuf, sizeof(objbuf), stdin) == NULL) + break; + if ((len = strlen(objbuf)) <= 1) + continue; + + do { + printf("lock type read/write [read]> "); + fflush(stdout); + if (fgets(lockbuf, + sizeof(lockbuf), stdin) == NULL) + break; + len = strlen(lockbuf); + } while (len > 1 && + strcmp(lockbuf, "read\n") != 0 && + strcmp(lockbuf, "write\n") != 0); + if (len == 1 || strcmp(lockbuf, "read\n") == 0) + lock_type = DB_LOCK_READ; + else + lock_type = DB_LOCK_WRITE; + + lock_dbt.data = objbuf; + lock_dbt.size = (u_int32_t)strlen(objbuf); + ret = dbenv->lock_get(dbenv, locker, + DB_LOCK_NOWAIT, &lock_dbt, lock_type, &lock); + if (ret == 0) { + did_get = 1; + lockid = lockcount++; + if (locks == NULL) + locks = + (DB_LOCK *)malloc(sizeof(DB_LOCK)); + else + locks = (DB_LOCK *)realloc(locks, + lockcount * sizeof(DB_LOCK)); + locks[lockid] = lock; + } + } else { + /* Release a lock. */ + do { + printf("input lock to release> "); + fflush(stdout); + if (fgets(objbuf, + sizeof(objbuf), stdin) == NULL) + break; + } while ((len = strlen(objbuf)) <= 1); + lockid = strtol(objbuf, NULL, 16); + if (lockid < 0 || lockid >= lockcount) { + printf("Lock #%d out of range\n", lockid); + continue; + } + lock = locks[lockid]; + ret = dbenv->lock_put(dbenv, &lock); + did_get = 0; + } + switch (ret) { + case 0: + printf("Lock #%d %s\n", lockid, + did_get ? "granted" : "released"); + held += did_get ? 1 : -1; + break; + case DB_LOCK_NOTGRANTED: + dbenv->err(dbenv, ret, NULL); + break; + case DB_LOCK_DEADLOCK: + dbenv->err(dbenv, ret, + "lock_%s", did_get ? "get" : "put"); + break; + default: + dbenv->err(dbenv, ret, + "lock_%s", did_get ? "get" : "put"); + (void)dbenv->close(dbenv, 0); + return (EXIT_FAILURE); + } + } + + printf("\nClosing lock region %ld locks held\n", held); + + if (locks != NULL) + free(locks); + + if ((ret = dbenv->close(dbenv, 0)) != 0) { + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + return (EXIT_SUCCESS); +} + +/* + * db_init -- + * Initialize the environment. + */ +int +db_init(home, maxlocks, do_unlink) + const char *home; + u_int32_t maxlocks; + int do_unlink; +{ + int ret; + + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, "%s: db_env_create: %s\n", + progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + + if (do_unlink) { + if ((ret = dbenv->remove(dbenv, home, DB_FORCE)) != 0) { + fprintf(stderr, "%s: dbenv->remove: %s\n", + progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, "%s: db_env_create: %s\n", + progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + } + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + if (maxlocks != 0) + dbenv->set_lk_max_locks(dbenv, maxlocks); + + if ((ret = + dbenv->open(dbenv, home, DB_CREATE | DB_INIT_LOCK, 0)) != 0) { + dbenv->err(dbenv, ret, NULL); + (void)dbenv->close(dbenv, 0); + return (EXIT_FAILURE); + } + return (0); +} + +int +usage() +{ + (void)fprintf(stderr, + "usage: %s [-u] [-h home] [-m maxlocks]\n", progname); + return (EXIT_FAILURE); +} diff --git a/examples/c/ex_mpool.c b/examples/c/ex_mpool.c new file mode 100644 index 00000000..18befdf7 --- /dev/null +++ b/examples/c/ex_mpool.c @@ -0,0 +1,258 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include +#include + +#ifdef _WIN32 +extern int getopt(int, char * const *, const char *); +#else +#include +#endif + +#include + +int init __P((const char *, int, int, const char *)); +int run __P((int, int, int, int, const char *)); +int run_mpool __P((int, int, int, int, const char *)); +int main __P((int, char *[])); +int usage __P((const char *)); +#define MPOOL "mpool" /* File. */ + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + int cachesize, ch, hits, npages, pagesize; + char *progname; + + cachesize = 20 * 1024; + hits = 1000; + npages = 50; + pagesize = 1024; + progname = argv[0]; + while ((ch = getopt(argc, argv, "c:h:n:p:")) != EOF) + switch (ch) { + case 'c': + if ((cachesize = atoi(optarg)) < 20 * 1024) + return (usage(progname)); + break; + case 'h': + if ((hits = atoi(optarg)) <= 0) + return (usage(progname)); + break; + case 'n': + if ((npages = atoi(optarg)) <= 0) + return (usage(progname)); + break; + case 'p': + if ((pagesize = atoi(optarg)) <= 0) + return (usage(progname)); + break; + case '?': + default: + return (usage(progname)); + } + argc -= optind; + argv += optind; + + return (run_mpool(pagesize, cachesize, + hits, npages, progname) == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +int +usage(progname) + const char *progname; +{ + (void)fprintf(stderr, + "usage: %s [-c cachesize] [-h hits] [-n npages] [-p pagesize]\n", + progname); + return (EXIT_FAILURE); +} + +int +run_mpool(pagesize, cachesize, hits, npages, progname) + int pagesize, cachesize, hits, npages; + const char *progname; +{ + int ret; + + /* Initialize the file. */ + if ((ret = init(MPOOL, pagesize, npages, progname)) != 0) + return (ret); + + /* Get the pages. */ + if ((ret = run(hits, cachesize, pagesize, npages, progname)) != 0) + return (ret); + + return (0); +} + +/* + * init -- + * Create a backing file. + */ +int +init(file, pagesize, npages, progname) + const char *file, *progname; + int pagesize, npages; +{ + FILE *fp; + int cnt; + char *p; + + /* + * Create a file with the right number of pages, and store a page + * number on each page. + */ + (void)remove(file); + if ((fp = fopen(file, "wb")) == NULL) { + fprintf(stderr, + "%s: %s: %s\n", progname, file, strerror(errno)); + return (1); + } + if ((p = (char *)malloc(pagesize)) == NULL) { + fprintf(stderr, "%s: %s\n", progname, strerror(ENOMEM)); + return (1); + } + + /* + * The pages are numbered from 0, not 1. + * + * Write the index of the page at the beginning of the page in order + * to verify the retrieved page (see run()). + */ + for (cnt = 0; cnt < npages; ++cnt) { + *(db_pgno_t *)p = cnt; + if (fwrite(p, pagesize, 1, fp) != 1) { + fprintf(stderr, + "%s: %s: %s\n", progname, file, strerror(errno)); + return (1); + } + } + + (void)fclose(fp); + free(p); + return (0); +} + +/* + * run -- + * Get a set of pages. + */ +int +run(hits, cachesize, pagesize, npages, progname) + int hits, cachesize, pagesize, npages; + const char *progname; +{ + DB_ENV *dbenv; + DB_MPOOLFILE *mfp; + db_pgno_t pageno; + int cnt, ret; + void *p; + + dbenv = NULL; + mfp = NULL; + + printf("%s: cachesize: %d; pagesize: %d; N pages: %d\n", + progname, cachesize, pagesize, npages); + + /* + * Open a memory pool, specify a cachesize, output error messages + * to stderr. + */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + return (1); + } + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); +#ifdef HAVE_VXWORKS + if ((ret = dbenv->set_shm_key(dbenv, VXSHM_KEY)) != 0) { + dbenv->err(dbenv, ret, "set_shm_key"); + return (1); + } +#endif + + /* Set the cachesize. */ + if ((ret = dbenv->set_cachesize(dbenv, 0, cachesize, 0)) != 0) { + dbenv->err(dbenv, ret, "set_cachesize"); + goto err; + } + + /* Open the environment. */ + if ((ret = dbenv->open( + dbenv, NULL, DB_CREATE | DB_INIT_MPOOL, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + goto err; + } + + /* Open the file in the environment. */ + if ((ret = dbenv->memp_fcreate(dbenv, &mfp, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->memp_fcreate: %s", MPOOL); + goto err; + } + if ((ret = mfp->open(mfp, MPOOL, 0, 0, pagesize)) != 0) { + dbenv->err(dbenv, ret, "DB_MPOOLFILE->open: %s", MPOOL); + goto err; + } + + printf("retrieve %d random pages... ", hits); + + srand((u_int)time(NULL)); + for (cnt = 0; cnt < hits; ++cnt) { + pageno = rand() % npages; + if ((ret = mfp->get(mfp, &pageno, NULL, 0, &p)) != 0) { + dbenv->err(dbenv, ret, + "unable to retrieve page %lu", (u_long)pageno); + goto err; + } + /* Verify the page's number that was written in init(). */ + if (*(db_pgno_t *)p != pageno) { + dbenv->errx(dbenv, + "wrong page retrieved (%lu != %d)", + (u_long)pageno, *(int *)p); + goto err; + } + if ((ret = mfp->put(mfp, p, DB_PRIORITY_UNCHANGED, 0)) != 0) { + dbenv->err(dbenv, ret, + "unable to return page %lu", (u_long)pageno); + goto err; + } + } + + printf("successful.\n"); + + /* Close the file. */ + if ((ret = mfp->close(mfp, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_MPOOLFILE->close"); + goto err; + } + + /* Close the pool. */ + if ((ret = dbenv->close(dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + return (1); + } + return (0); + +err: if (mfp != NULL) + (void)mfp->close(mfp, 0); + if (dbenv != NULL) + (void)dbenv->close(dbenv, 0); + return (1); +} diff --git a/examples/c/ex_rep/README b/examples/c/ex_rep/README new file mode 100644 index 00000000..7f20e13e --- /dev/null +++ b/examples/c/ex_rep/README @@ -0,0 +1,19 @@ +# $Id$ + +This is the parent directory for the replication example programs. + +The example is a toy stock quote server. There are two versions of +the program: one version uses Berkeley DB's Replication Manager +support, and the other uses the base replication API. + +common/ Contains code to implement the basic functions of the + application, to demonstrate that these are largely + independent of which replication API is used. + +mgr/ Contains the small amount of code necessary to + configure the application to use Replication Manager. + +base/ Contains the sample communications infrastructure, and + other replication support code, to demonstrate some of + the kinds of things that are necessary when using the + base replication API. diff --git a/examples/c/ex_rep/base/rep_base.c b/examples/c/ex_rep/base/rep_base.c new file mode 100644 index 00000000..93556ad0 --- /dev/null +++ b/examples/c/ex_rep/base/rep_base.c @@ -0,0 +1,247 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include +#include +#include +#include +#include + +#include + +#include "rep_base.h" + +/* + * Process globals (we could put these in the machtab I suppose). + */ +int master_eid; +char *myaddr; +unsigned short myport; + +const char *progname = "ex_rep_base"; + +static void event_callback __P((DB_ENV *, u_int32_t, void *)); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + DB_ENV *dbenv; + SETUP_DATA setup_info; + DBT local; + all_args aa; + connect_args ca; + supthr_args supa; + machtab_t *machtab; + thread_t all_thr, ckp_thr, conn_thr, lga_thr; + void *astatus, *cstatus; +#ifdef _WIN32 + WSADATA wsaData; +#else + struct sigaction sigact; +#endif + APP_DATA my_app_data; + int ret; + + memset(&setup_info, 0, sizeof(SETUP_DATA)); + setup_info.progname = progname; + master_eid = DB_EID_INVALID; + memset(&my_app_data, 0, sizeof(APP_DATA)); + dbenv = NULL; + machtab = NULL; + ret = 0; + + if ((ret = create_env(progname, &dbenv)) != 0) + goto err; + dbenv->app_private = &my_app_data; + (void)dbenv->set_event_notify(dbenv, event_callback); + + /* Parse command line and perform common replication setup. */ + if ((ret = common_rep_setup(dbenv, argc, argv, &setup_info)) != 0) + goto err; + + if (setup_info.role == MASTER) + master_eid = SELF_EID; + + myaddr = strdup(setup_info.self.host); + myport = setup_info.self.port; + +#ifdef _WIN32 + /* Initialize the Windows sockets DLL. */ + if ((ret = WSAStartup(MAKEWORD(2, 2), &wsaData)) != 0) { + fprintf(stderr, + "Unable to initialize Windows sockets: %d\n", ret); + goto err; + } +#else + /* + * Turn off SIGPIPE so that we don't kill processes when they + * happen to lose a connection at the wrong time. + */ + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = SIG_IGN; + if ((ret = sigaction(SIGPIPE, &sigact, NULL)) != 0) { + fprintf(stderr, + "Unable to turn off SIGPIPE: %s\n", strerror(ret)); + goto err; + } +#endif + + /* + * We are hardcoding priorities here that all clients have the + * same priority except for a designated master who gets a higher + * priority. + */ + if ((ret = + machtab_init(&machtab, setup_info.nsites)) != 0) + goto err; + my_app_data.comm_infrastructure = machtab; + + if ((ret = env_init(dbenv, setup_info.home)) != 0) + goto err; + + /* + * Now sets up comm infrastructure. There are two phases. First, + * we open our port for listening for incoming connections. Then + * we attempt to connect to every host we know about. + */ + + (void)dbenv->rep_set_transport(dbenv, SELF_EID, quote_send); + + ca.dbenv = dbenv; + ca.home = setup_info.home; + ca.progname = progname; + ca.machtab = machtab; + ca.port = setup_info.self.port; + if ((ret = thread_create(&conn_thr, NULL, connect_thread, &ca)) != 0) { + dbenv->errx(dbenv, "can't create connect thread"); + goto err; + } + + aa.dbenv = dbenv; + aa.progname = progname; + aa.home = setup_info.home; + aa.machtab = machtab; + aa.sites = setup_info.site_list; + aa.nsites = setup_info.remotesites; + if ((ret = thread_create(&all_thr, NULL, connect_all, &aa)) != 0) { + dbenv->errx(dbenv, "can't create connect-all thread"); + goto err; + } + + /* Start checkpoint and log archive threads. */ + supa.dbenv = dbenv; + supa.shared = &my_app_data.shared_data; + if ((ret = start_support_threads(dbenv, &supa, &ckp_thr, &lga_thr)) + != 0) + goto err; + + /* + * We have now got the entire communication infrastructure set up. + * It's time to declare ourselves to be a client or master. + */ + if (setup_info.role == MASTER) { + if ((ret = dbenv->rep_start(dbenv, NULL, DB_REP_MASTER)) != 0) { + dbenv->err(dbenv, ret, "dbenv->rep_start failed"); + goto err; + } + } else { + memset(&local, 0, sizeof(local)); + local.data = myaddr; + local.size = (u_int32_t)strlen(myaddr) + 1; + if ((ret = + dbenv->rep_start(dbenv, &local, DB_REP_CLIENT)) != 0) { + dbenv->err(dbenv, ret, "dbenv->rep_start failed"); + goto err; + } + /* Sleep to give ourselves time to find a master. */ + sleep(5); + } + + if ((ret = doloop(dbenv, &my_app_data.shared_data)) != 0) { + dbenv->err(dbenv, ret, "Main loop failed"); + goto err; + } + + /* Finish checkpoint and log archive threads. */ + if ((ret = finish_support_threads(&ckp_thr, &lga_thr)) != 0) + goto err; + + /* Wait on the connection threads. */ + if (thread_join(all_thr, &astatus) || thread_join(conn_thr, &cstatus)) { + ret = -1; + goto err; + } + if ((uintptr_t)astatus != EXIT_SUCCESS || + (uintptr_t)cstatus != EXIT_SUCCESS) { + ret = -1; + goto err; + } + + /* + * We have used the DB_TXN_NOSYNC environment flag for improved + * performance without the usual sacrifice of transactional durability, + * as discussed in the "Transactional guarantees" page of the Reference + * Guide: if one replication site crashes, we can expect the data to + * exist at another site. However, in case we shut down all sites + * gracefully, we push out the end of the log here so that the most + * recent transactions don't mysteriously disappear. + */ + if ((ret = dbenv->log_flush(dbenv, NULL)) != 0) + dbenv->err(dbenv, ret, "log_flush"); + +err: if (machtab != NULL) + free(machtab); + if (dbenv != NULL) + (void)dbenv->close(dbenv, 0); +#ifdef _WIN32 + /* Shut down the Windows sockets DLL. */ + (void)WSACleanup(); +#endif + return (ret); +} + +static void +event_callback(dbenv, which, info) + DB_ENV *dbenv; + u_int32_t which; + void *info; +{ + APP_DATA *app = dbenv->app_private; + SHARED_DATA *shared = &app->shared_data; + + switch (which) { + case DB_EVENT_REP_CLIENT: + shared->is_master = 0; + shared->in_client_sync = 1; + break; + + case DB_EVENT_REP_ELECTED: + app->elected = 1; + master_eid = SELF_EID; + break; + + case DB_EVENT_REP_MASTER: + shared->is_master = 1; + shared->in_client_sync = 0; + break; + + case DB_EVENT_REP_NEWMASTER: + master_eid = *(int*)info; + shared->in_client_sync = 1; + break; + + case DB_EVENT_REP_STARTUPDONE: + shared->in_client_sync = 0; + break; + + default: + dbenv->errx(dbenv, "ignoring event %d", which); + } +} diff --git a/examples/c/ex_rep/base/rep_base.h b/examples/c/ex_rep/base/rep_base.h new file mode 100644 index 00000000..fbc45bcd --- /dev/null +++ b/examples/c/ex_rep/base/rep_base.h @@ -0,0 +1,117 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _EX_REPQUOTE_H_ +#define _EX_REPQUOTE_H_ + +#include "../common/rep_common.h" + +#define SELF_EID 1 + +/* Globals */ +typedef struct { + SHARED_DATA shared_data; + int elected; + void *comm_infrastructure; +} APP_DATA; + +extern int master_eid; +extern char *myaddr; +extern unsigned short myport; + +struct __member; typedef struct __member member_t; +struct __machtab; typedef struct __machtab machtab_t; + +/* Arguments for the connect_all thread. */ +typedef struct { + DB_ENV *dbenv; + const char *progname; + const char *home; + machtab_t *machtab; + repsite_t *sites; + int nsites; +} all_args; + +/* Arguments for the connect_loop thread. */ +typedef struct { + DB_ENV *dbenv; + const char * home; + const char * progname; + machtab_t *machtab; + int port; +} connect_args; + +#define CACHESIZE (10 * 1024 * 1024) +#define DATABASE "quote.db" +#define MAX_THREADS 25 +#define SLEEPTIME 3 + +#ifndef COMPQUIET +#define COMPQUIET(x,y) x = (y) +#endif + +/* Portability macros for basic threading and networking */ +#ifdef _WIN32 + +typedef HANDLE mutex_t; +#define mutex_init(m, attr) \ + (((*(m) = CreateMutex(NULL, FALSE, NULL)) != NULL) ? 0 : -1) +#define mutex_lock(m) \ + ((WaitForSingleObject(*(m), INFINITE) == WAIT_OBJECT_0) ? 0 : -1) +#define mutex_unlock(m) (ReleaseMutex(*(m)) ? 0 : -1) + +typedef int socklen_t; +typedef SOCKET socket_t; +#define SOCKET_CREATION_FAILURE INVALID_SOCKET +#define readsocket(s, buf, sz) recv((s), (buf), (int)(sz), 0) +#define writesocket(s, buf, sz) send((s), (const char *)(buf), (int)(sz), 0) +#define net_errno WSAGetLastError() + +#else /* !_WIN32 */ + +#include +#include +#include +#include +#include +#include +#include + +typedef pthread_mutex_t mutex_t; +#define mutex_init(m, attr) pthread_mutex_init((m), (attr)) +#define mutex_lock(m) pthread_mutex_lock(m) +#define mutex_unlock(m) pthread_mutex_unlock(m) + +typedef int socket_t; +#define SOCKET_CREATION_FAILURE -1 +#define closesocket(fd) close(fd) +#define net_errno errno +#define readsocket(s, buf, sz) read((s), (buf), (sz)) +#define writesocket(s, buf, sz) write((s), (buf), (sz)) + +#endif + +void *connect_all __P((void *)); +void *connect_thread __P((void *)); +int doclient __P((DB_ENV *, const char *, machtab_t *)); +int domaster __P((DB_ENV *, const char *)); +socket_t get_accepted_socket __P((const char *, int)); +socket_t get_connected_socket + __P((machtab_t *, const char *, const char *, int, int *, int *)); +int get_next_message __P((socket_t, DBT *, DBT *)); +socket_t listen_socket_init __P((const char *, int)); +socket_t listen_socket_accept + __P((machtab_t *, const char *, socket_t, int *)); +int machtab_getinfo __P((machtab_t *, int, u_int32_t *, int *)); +int machtab_init __P((machtab_t **, int)); +void machtab_parm __P((machtab_t *, int *, u_int32_t *)); +int machtab_rem __P((machtab_t *, int, int)); +int quote_send __P((DB_ENV *, const DBT *, const DBT *, const DB_LSN *, + int, u_int32_t)); + +#endif /* !_EX_REPQUOTE_H_ */ diff --git a/examples/c/ex_rep/base/rep_msg.c b/examples/c/ex_rep/base/rep_msg.c new file mode 100644 index 00000000..51826e84 --- /dev/null +++ b/examples/c/ex_rep/base/rep_msg.c @@ -0,0 +1,467 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include +#include +#include +#include +#include + +#include + +#include "rep_base.h" + +static int connect_site __P((DB_ENV *, machtab_t *, + const char *, repsite_t *, int *, thread_t *)); +static void *elect_thread __P((void *)); +static void *hm_loop __P((void *)); + +typedef struct { + DB_ENV *dbenv; + machtab_t *machtab; +} elect_args; + +typedef struct { + DB_ENV *dbenv; + const char *progname; + const char *home; + socket_t fd; + u_int32_t eid; + machtab_t *tab; +} hm_loop_args; + +/* + * This is a generic message handling loop that is used both by the + * master to accept messages from a client as well as by clients + * to communicate with other clients. + */ +static void * +hm_loop(args) + void *args; +{ + DB_ENV *dbenv; + DB_LSN permlsn; + DBT rec, control; + APP_DATA *app; + const char *c, *home, *progname; + elect_args *ea; + hm_loop_args *ha; + machtab_t *tab; + thread_t elect_thr, *site_thrs, *tmp, tid; + repsite_t self; + u_int32_t timeout; + int eid, n, nsites, nsites_allocd; + int already_open, r, ret, t_ret; + socket_t fd; + void *status; + + ea = NULL; + site_thrs = NULL; + nsites_allocd = 0; + nsites = 0; + + ha = (hm_loop_args *)args; + dbenv = ha->dbenv; + fd = ha->fd; + home = ha->home; + eid = ha->eid; + progname = ha->progname; + tab = ha->tab; + free(ha); + app = dbenv->app_private; + + memset(&rec, 0, sizeof(DBT)); + memset(&control, 0, sizeof(DBT)); + + for (ret = 0; ret == 0;) { + if ((ret = get_next_message(fd, &rec, &control)) != 0) { + /* + * Close this connection; if it's the master call + * for an election. + */ + closesocket(fd); + if ((ret = machtab_rem(tab, eid, 1)) != 0) + break; + + /* + * If I'm the master, I just lost a client and this + * thread is done. + */ + if (master_eid == SELF_EID) + break; + + /* + * If I was talking with the master and the master + * went away, I need to call an election; else I'm + * done. + */ + if (master_eid != eid) + break; + + master_eid = DB_EID_INVALID; + machtab_parm(tab, &n, &timeout); + (void)dbenv->rep_set_timeout(dbenv, + DB_REP_ELECTION_TIMEOUT, timeout); + if ((ret = dbenv->rep_elect(dbenv, + n, (n/2+1), 0)) != 0) + continue; + + /* + * Regardless of the results, the site I was talking + * to is gone, so I have nothing to do but exit. + */ + if (app->elected) { + app->elected = 0; + ret = dbenv->rep_start(dbenv, + NULL, DB_REP_MASTER); + } + break; + } + + switch (r = dbenv->rep_process_message(dbenv, + &control, &rec, eid, &permlsn)) { + case DB_REP_NEWSITE: + /* + * Check if we got sent connect information and if we + * did, if this is me or if we already have a + * connection to this new site. If we don't, + * establish a new one. + */ + + /* No connect info. */ + if (rec.size == 0) + break; + + /* It's me, do nothing. */ + if (strncmp(myaddr, rec.data, rec.size) == 0) + break; + + self.host = (char *)rec.data; + self.host = strtok(self.host, ":"); + if ((c = strtok(NULL, ":")) == NULL) { + dbenv->errx(dbenv, "Bad host specification"); + goto out; + } + self.port = atoi(c); + + /* + * We try to connect to the new site. If we can't, + * we treat it as an error since we know that the site + * should be up if we got a message from it (even + * indirectly). + */ + if (nsites == nsites_allocd) { + /* Need to allocate more space. */ + if ((tmp = realloc( + site_thrs, (10 + nsites) * + sizeof(thread_t))) == NULL) { + ret = errno; + goto out; + } + site_thrs = tmp; + nsites_allocd += 10; + } + if ((ret = connect_site(dbenv, tab, progname, + &self, &already_open, &tid)) != 0) + goto out; + if (!already_open) + memcpy(&site_thrs + [nsites++], &tid, sizeof(thread_t)); + break; + case DB_REP_HOLDELECTION: + if (master_eid == SELF_EID) + break; + /* Make sure that previous election has finished. */ + if (ea != NULL) { + if (thread_join(elect_thr, &status) != 0) { + dbenv->errx(dbenv, + "thread join failure"); + goto out; + } + ea = NULL; + } + if ((ea = calloc(sizeof(elect_args), 1)) == NULL) { + dbenv->errx(dbenv, "can't allocate memory"); + ret = errno; + goto out; + } + ea->dbenv = dbenv; + ea->machtab = tab; + if ((ret = thread_create(&elect_thr, + NULL, elect_thread, (void *)ea)) != 0) { + dbenv->errx(dbenv, + "can't create election thread"); + } + break; + case DB_REP_ISPERM: + break; + case 0: + if (app->elected) { + app->elected = 0; + if ((ret = dbenv->rep_start(dbenv, + NULL, DB_REP_MASTER)) != 0) { + dbenv->err(dbenv, ret, + "can't start as master"); + goto out; + } + } + break; + default: + dbenv->err(dbenv, r, "DB_ENV->rep_process_message"); + break; + } + } + +out: if ((t_ret = machtab_rem(tab, eid, 1)) != 0 && ret == 0) + ret = t_ret; + + /* Don't close the environment before any children exit. */ + if (ea != NULL && thread_join(elect_thr, &status) != 0) + dbenv->errx(dbenv, "can't join election thread"); + + if (site_thrs != NULL) + while (--nsites >= 0) + if (thread_join(site_thrs[nsites], &status) != 0) + dbenv->errx(dbenv, "can't join site thread"); + + return ((void *)(uintptr_t)ret); +} + +/* + * This is a generic thread that spawns a thread to listen for connections + * on a socket and then spawns off child threads to handle each new + * connection. + */ +void * +connect_thread(args) + void *args; +{ + DB_ENV *dbenv; + const char *home, *progname; + hm_loop_args *ha; + connect_args *cargs; + machtab_t *machtab; + thread_t hm_thrs[MAX_THREADS]; + void *status; + int i, eid, port, ret; + socket_t fd, ns; + + ha = NULL; + cargs = (connect_args *)args; + dbenv = cargs->dbenv; + home = cargs->home; + progname = cargs->progname; + machtab = cargs->machtab; + port = cargs->port; + + /* + * Loop forever, accepting connections from new machines, + * and forking off a thread to handle each. + */ + if ((fd = listen_socket_init(progname, port)) < 0) { + ret = errno; + goto err; + } + + for (i = 0; i < MAX_THREADS; i++) { + if ((ns = listen_socket_accept(machtab, + progname, fd, &eid)) == SOCKET_CREATION_FAILURE) { + ret = errno; + goto err; + } + if ((ha = calloc(sizeof(hm_loop_args), 1)) == NULL) { + dbenv->errx(dbenv, "can't allocate memory"); + ret = errno; + goto err; + } + ha->progname = progname; + ha->home = home; + ha->fd = ns; + ha->eid = eid; + ha->tab = machtab; + ha->dbenv = dbenv; + if ((ret = thread_create(&hm_thrs[i++], NULL, + hm_loop, (void *)ha)) != 0) { + dbenv->errx(dbenv, "can't create thread for site"); + goto err; + } + ha = NULL; + } + + /* If we fell out, we ended up with too many threads. */ + dbenv->errx(dbenv, "Too many threads"); + ret = ENOMEM; + + /* Do not return until all threads have exited. */ + while (--i >= 0) + if (thread_join(hm_thrs[i], &status) != 0) + dbenv->errx(dbenv, "can't join site thread"); + +err: return (ret == 0 ? (void *)EXIT_SUCCESS : (void *)EXIT_FAILURE); +} + +/* + * Open a connection to everyone that we've been told about. If we + * cannot open some connections, keep trying. + */ +void * +connect_all(args) + void *args; +{ + DB_ENV *dbenv; + all_args *aa; + const char *home, *progname; + hm_loop_args *ha; + int failed, i, nsites, open, ret, *success; + machtab_t *machtab; + thread_t *hm_thr; + repsite_t *sites; + + ha = NULL; + aa = (all_args *)args; + dbenv = aa->dbenv; + progname = aa->progname; + home = aa->home; + machtab = aa->machtab; + nsites = aa->nsites; + sites = aa->sites; + + ret = 0; + hm_thr = NULL; + success = NULL; + + /* Some implementations of calloc are sad about allocating 0 things. */ + if ((success = calloc(nsites > 0 ? nsites : 1, sizeof(int))) == NULL) { + dbenv->err(dbenv, errno, "connect_all"); + ret = 1; + goto err; + } + + if (nsites > 0 && (hm_thr = calloc(nsites, sizeof(int))) == NULL) { + dbenv->err(dbenv, errno, "connect_all"); + ret = 1; + goto err; + } + + for (failed = nsites; failed > 0;) { + for (i = 0; i < nsites; i++) { + if (success[i]) + continue; + + ret = connect_site(dbenv, machtab, + progname, &sites[i], &open, &hm_thr[i]); + + /* + * If we couldn't make the connection, this isn't + * fatal to the loop, but we have nothing further + * to do on this machine at the moment. + */ + if (ret == DB_REP_UNAVAIL) + continue; + + if (ret != 0) + goto err; + + failed--; + success[i] = 1; + + /* If the connection is already open, we're done. */ + if (ret == 0 && open == 1) + continue; + + } + sleep(1); + } + +err: if (success != NULL) + free(success); + if (hm_thr != NULL) + free(hm_thr); + return (ret ? (void *)EXIT_FAILURE : (void *)EXIT_SUCCESS); +} + +static int +connect_site(dbenv, machtab, progname, site, is_open, hm_thrp) + DB_ENV *dbenv; + machtab_t *machtab; + const char *progname; + repsite_t *site; + int *is_open; + thread_t *hm_thrp; +{ + int eid, ret; + socket_t s; + hm_loop_args *ha; + + if ((s = get_connected_socket(machtab, progname, + site->host, site->port, is_open, &eid)) < 0) + return (DB_REP_UNAVAIL); + + if (*is_open) + return (0); + + if ((ha = calloc(sizeof(hm_loop_args), 1)) == NULL) { + dbenv->errx(dbenv, "can't allocate memory"); + ret = errno; + goto err; + } + + ha->progname = progname; + ha->fd = s; + ha->eid = eid; + ha->tab = machtab; + ha->dbenv = dbenv; + + if ((ret = thread_create(hm_thrp, NULL, + hm_loop, (void *)ha)) != 0) { + dbenv->errx(dbenv, "can't create thread for connected site"); + goto err1; + } + + return (0); + +err1: free(ha); +err: + return (ret); +} + +/* + * We need to spawn off a new thread in which to hold an election in + * case we are the only thread listening on for messages. + */ +static void * +elect_thread(args) + void *args; +{ + DB_ENV *dbenv; + elect_args *eargs; + machtab_t *machtab; + u_int32_t timeout; + int n, ret; + APP_DATA *app; + + eargs = (elect_args *)args; + dbenv = eargs->dbenv; + machtab = eargs->machtab; + free(eargs); + app = dbenv->app_private; + + machtab_parm(machtab, &n, &timeout); + (void)dbenv->rep_set_timeout(dbenv, DB_REP_ELECTION_TIMEOUT, timeout); + while ((ret = dbenv->rep_elect(dbenv, n, (n/2+1), 0)) != 0) + sleep(2); + + if (app->elected) { + app->elected = 0; + if ((ret = dbenv->rep_start(dbenv, NULL, DB_REP_MASTER)) != 0) + dbenv->err(dbenv, ret, + "can't start as master in election thread"); + } + + return (NULL); +} diff --git a/examples/c/ex_rep/base/rep_net.c b/examples/c/ex_rep/base/rep_net.c new file mode 100644 index 00000000..08ac5da7 --- /dev/null +++ b/examples/c/ex_rep/base/rep_net.c @@ -0,0 +1,749 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include +#include +#include +#include +#include + +#include +#include "rep_base.h" +#ifndef _SYS_QUEUE_H +/* + * Some *BSD Unix variants include the Queue macros in their libraries and + * these might already have been included. In that case, it would be bad + * to include them again. + */ +#include "dbinc/queue.h" /* !!!: for the LIST_XXX macros. */ +#endif + +int machtab_add __P((machtab_t *, socket_t, u_int32_t, int, int *)); +#ifdef DIAGNOSTIC +void machtab_print __P((machtab_t *)); +#endif +ssize_t readn __P((socket_t, void *, size_t)); + +/* + * This file defines the communication infrastructure for the ex_repquote + * sample application. + * + * This application uses TCP/IP for its communication. In an N-site + * replication group, this means that there are N * N communication + * channels so that every site can communicate with every other site + * (this allows elections to be held when the master fails). We do + * not require that anyone know about all sites when the application + * starts up. In order to communicate, the application should know + * about someone, else it has no idea how to ever get in the game. + * + * Communication is handled via a number of different threads. These + * thread functions are implemented in rep_util.c In this file, we + * define the data structures that maintain the state that describes + * the comm infrastructure, the functions that manipulates this state + * and the routines used to actually send and receive data over the + * sockets. + */ + +/* + * The communication infrastructure is represented by a machine table, + * machtab_t, which is essentially a mutex-protected linked list of members + * of the group. The machtab also contains the parameters that are needed + * to call for an election. We hardwire values for these parameters in the + * init function, but these could be set via some configuration setup in a + * real application. We reserve the machine-id 1 to refer to ourselves and + * make the machine-id 0 be invalid. + */ + +#define MACHID_INVALID 0 +#define MACHID_SELF 1 + +struct __machtab { + LIST_HEAD(__machlist, __member) machlist; + int nextid; + mutex_t mtmutex; + u_int32_t timeout_time; + int current; + int max; + int nsites; +}; + +/* Data structure that describes each entry in the machtab. */ +struct __member { + u_int32_t hostaddr; /* Host IP address. */ + int port; /* Port number. */ + int eid; /* Application-specific machine id. */ + socket_t fd; /* File descriptor for the socket. */ + LIST_ENTRY(__member) links; + /* For linked list of all members we know of. */ +}; + +static int quote_send_broadcast __P((machtab_t *, + const DBT *, const DBT *, u_int32_t)); +static int quote_send_one __P((const DBT *, const DBT *, socket_t, u_int32_t)); + +/* + * machtab_init -- + * Initialize the machine ID table. + * XXX Right now we treat the number of sites as the maximum + * number we've ever had on the list at one time. We probably + * want to make that smarter. + */ +int +machtab_init(machtabp, nsites) + machtab_t **machtabp; + int nsites; +{ + int ret; + machtab_t *machtab; + + if ((machtab = malloc(sizeof(machtab_t))) == NULL) { + fprintf(stderr, "can't allocate memory\n"); + return (ENOMEM); + } + + LIST_INIT(&machtab->machlist); + + /* Reserve eid's 0 and 1. */ + machtab->nextid = 2; + machtab->timeout_time = 2 * 1000000; /* 2 seconds. */ + machtab->current = machtab->max = 0; + machtab->nsites = nsites; + + ret = mutex_init(&machtab->mtmutex, NULL); + *machtabp = machtab; + + return (ret); +} + +/* + * machtab_add -- + * Add a file descriptor to the table of machines, returning + * a new machine ID. + */ +int +machtab_add(machtab, fd, hostaddr, port, idp) + machtab_t *machtab; + socket_t fd; + u_int32_t hostaddr; + int port, *idp; +{ + int ret; + member_t *m, *member; + + ret = 0; + if ((member = malloc(sizeof(member_t))) == NULL) { + fprintf(stderr, "can't allocate memory\n"); + return (ENOMEM); + } + + member->fd = fd; + member->hostaddr = hostaddr; + member->port = port; + + if ((ret = mutex_lock(&machtab->mtmutex)) != 0) { + fprintf(stderr, "can't lock mutex"); + return (ret); + } + + for (m = LIST_FIRST(&machtab->machlist); + m != NULL; m = LIST_NEXT(m, links)) + if (m->hostaddr == hostaddr && m->port == port) + break; + + if (m == NULL) { + member->eid = machtab->nextid++; + LIST_INSERT_HEAD(&machtab->machlist, member, links); + } else + member->eid = m->eid; + + if ((ret = mutex_unlock(&machtab->mtmutex)) != 0) { + fprintf(stderr, "can't unlock mutex\n"); + return (ret); + } + + if (idp != NULL) + *idp = member->eid; + + if (m == NULL) { + if (++machtab->current > machtab->max) + machtab->max = machtab->current; + } else { + free(member); + ret = EEXIST; + } +#ifdef DIAGNOSTIC + printf("Exiting machtab_add\n"); + machtab_print(machtab); +#endif + return (ret); +} + +/* + * machtab_getinfo -- + * Return host and port information for a particular machine id. + */ +int +machtab_getinfo(machtab, eid, hostp, portp) + machtab_t *machtab; + int eid; + u_int32_t *hostp; + int *portp; +{ + int ret; + member_t *member; + + if ((ret = mutex_lock(&machtab->mtmutex)) != 0) { + fprintf(stderr, "can't lock mutex\n"); + return (ret); + } + + for (member = LIST_FIRST(&machtab->machlist); + member != NULL; + member = LIST_NEXT(member, links)) + if (member->eid == eid) { + *hostp = member->hostaddr; + *portp = member->port; + break; + } + + if ((ret = mutex_unlock(&machtab->mtmutex)) != 0) { + fprintf(stderr, "can't unlock mutex\n"); + return (ret); + } + + return (member != NULL ? 0 : EINVAL); +} + +/* + * machtab_rem -- + * Remove a mapping from the table of machines. Lock indicates + * whether we need to lock the machtab or not (0 indicates we do not + * need to lock; non-zero indicates that we do need to lock). + */ +int +machtab_rem(machtab, eid, lock) + machtab_t *machtab; + int eid; + int lock; +{ + int found, ret; + member_t *member; + + ret = 0; + if (lock && (ret = mutex_lock(&machtab->mtmutex)) != 0) { + fprintf(stderr, "can't lock mutex\n"); + return (ret); + } + + for (found = 0, member = LIST_FIRST(&machtab->machlist); + member != NULL; + member = LIST_NEXT(member, links)) + if (member->eid == eid) { + found = 1; + LIST_REMOVE(member, links); + (void)closesocket(member->fd); + free(member); + machtab->current--; + break; + } + + if (LIST_FIRST(&machtab->machlist) == NULL) + machtab->nextid = 2; + + if (lock && (ret = mutex_unlock(&machtab->mtmutex)) != 0) + fprintf(stderr, "can't unlock mutex\n"); + +#ifdef DIAGNOSTIC + printf("Exiting machtab_rem\n"); + machtab_print(machtab); +#endif + return (ret); +} + +void +machtab_parm(machtab, nump, timeoutp) + machtab_t *machtab; + int *nump; + u_int32_t *timeoutp; +{ + if (machtab->nsites == 0) + *nump = machtab->max; + else + *nump = machtab->nsites; + *timeoutp = machtab->timeout_time; +} + +#ifdef DIAGNOSTIC +void +machtab_print(machtab) + machtab_t *machtab; +{ + member_t *m; + + if (mutex_lock(&machtab->mtmutex) != 0) { + fprintf(stderr, "can't lock mutex\n"); + abort(); + } + + for (m = LIST_FIRST(&machtab->machlist); + m != NULL; m = LIST_NEXT(m, links)) { + + printf("IP: %lx Port: %6d EID: %2d FD: %3d\n", + (long)m->hostaddr, m->port, m->eid, m->fd); + } + + if (mutex_unlock(&machtab->mtmutex) != 0) { + fprintf(stderr, "can't unlock mutex\n"); + abort(); + } +} +#endif +/* + * listen_socket_init -- + * Initialize a socket for listening on the specified port. Returns + * a file descriptor for the socket, ready for an accept() call + * in a thread that we're happy to let block. + */ +socket_t +listen_socket_init(progname, port) + const char *progname; + int port; +{ + socket_t s; + int sockopt; + struct sockaddr_in si; + + COMPQUIET(progname, NULL); + + if ((s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) { + perror("can't create listen socket"); + return (-1); + } + + memset(&si, 0, sizeof(si)); + si.sin_family = AF_INET; + si.sin_addr.s_addr = htonl(INADDR_ANY); + si.sin_port = htons((unsigned short)port); + + /* + * When using this example for testing, it's common to kill and restart + * regularly. On some systems, this causes bind to fail with "address + * in use" errors unless this option is set. + */ + sockopt = 1; + setsockopt(s, SOL_SOCKET, SO_REUSEADDR, + (const char *)&sockopt, sizeof(sockopt)); + + if (bind(s, (struct sockaddr *)&si, sizeof(si)) != 0) { + perror("can't bind listen socket"); + goto err; + } + + if (listen(s, 5) != 0) { + perror("can't establish listen queue"); + goto err; + } + + return (s); + +err: closesocket(s); + return (-1); +} + +/* + * listen_socket_accept -- + * Accept a connection on a socket. This is essentially just a wrapper + * for accept(3). + */ +socket_t +listen_socket_accept(machtab, progname, s, eidp) + machtab_t *machtab; + const char *progname; + socket_t s; + int *eidp; +{ + struct sockaddr_in si; + socklen_t si_len; + int host, ret; + socket_t ns; + u_int16_t port; + + COMPQUIET(progname, NULL); + +accept_wait: + memset(&si, 0, sizeof(si)); + si_len = sizeof(si); + ns = accept(s, (struct sockaddr *)&si, &si_len); + if (ns == SOCKET_CREATION_FAILURE) { + fprintf(stderr, "can't accept incoming connection\n"); + return ns; + } + host = ntohl(si.sin_addr.s_addr); + + /* + * Sites send their listening port when connections are first + * established, as it will be different from the outgoing port + * for this connection. + */ + if (readn(ns, &port, 2) != 2) + goto err; + port = ntohs(port); + + ret = machtab_add(machtab, ns, host, port, eidp); + if (ret == EEXIST) { + closesocket(ns); + goto accept_wait; + } else if (ret != 0) + goto err; + printf("Connected to host %x port %d, eid = %d\n", host, port, *eidp); + return (ns); + +err: closesocket(ns); + return SOCKET_CREATION_FAILURE; +} + +/* + * get_connected_socket -- + * Connect to the specified port of the specified remote machine, + * and return a file descriptor when we have accepted a connection on it. + * Add this connection to the machtab. If we already have a connection + * open to this machine, then don't create another one, return the eid + * of the connection (in *eidp) and set is_open to 1. Return 0. + */ +socket_t +get_connected_socket(machtab, progname, remotehost, port, is_open, eidp) + machtab_t *machtab; + const char *progname, *remotehost; + int port, *is_open, *eidp; +{ + int ret; + socket_t s; + struct hostent *hp; + struct sockaddr_in si; + u_int32_t addr; + u_int16_t nport; + + *is_open = 0; + + if ((hp = gethostbyname(remotehost)) == NULL) { + fprintf(stderr, "%s: host not found: %s\n", progname, + strerror(net_errno)); + return (-1); + } + + if ((s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) { + perror("can't create outgoing socket"); + return (-1); + } + memset(&si, 0, sizeof(si)); + memcpy((char *)&si.sin_addr, hp->h_addr, hp->h_length); + addr = ntohl(si.sin_addr.s_addr); + ret = machtab_add(machtab, s, addr, port, eidp); + if (ret == EEXIST) { + *is_open = 1; + closesocket(s); + return (0); + } else if (ret != 0) { + closesocket(s); + return (-1); + } + + si.sin_family = AF_INET; + si.sin_port = htons((unsigned short)port); + if (connect(s, (struct sockaddr *)&si, sizeof(si)) < 0) { + fprintf(stderr, "%s: connection failed: %s\n", + progname, strerror(net_errno)); + (void)machtab_rem(machtab, *eidp, 1); + return (-1); + } + + /* + * The first thing we send on the socket is our (listening) port + * so the site we are connecting to can register us correctly in + * its machtab. + */ + nport = htons(myport); + writesocket(s, &nport, 2); + + return (s); +} + +/* + * get_next_message -- + * Read a single message from the specified file descriptor, and + * return it in the format used by rep functions (two DBTs and a type). + * + * This function is called in a loop by both clients and masters, and + * the resulting DBTs are manually dispatched to DB_ENV->rep_process_message(). + */ +int +get_next_message(fd, rec, control) + socket_t fd; + DBT *rec, *control; +{ + size_t nr; + u_int32_t rsize, csize; + u_int8_t *recbuf, *controlbuf; + + /* + * The protocol we use on the wire is dead simple: + * + * 4 bytes - rec->size + * (# read above) - rec->data + * 4 bytes - control->size + * (# read above) - control->data + */ + + /* Read rec->size. */ + nr = readn(fd, &rsize, 4); + if (nr != 4) + return (1); + + /* Read the record itself. */ + if (rsize > 0) { + if (rec->size < rsize) + rec->data = realloc(rec->data, rsize); + if ((recbuf = rec->data) == NULL) + return (1); + nr = readn(fd, recbuf, rsize); + } else { + if (rec->data != NULL) + free(rec->data); + rec->data = NULL; + } + rec->size = rsize; + + /* Read control->size. */ + nr = readn(fd, &csize, 4); + if (nr != 4) + return (1); + + /* Read the control struct itself. */ + if (csize > 0) { + controlbuf = control->data; + if (control->size < csize) + controlbuf = realloc(controlbuf, csize); + if (controlbuf == NULL) + return (1); + nr = readn(fd, controlbuf, csize); + if (nr != csize) + return (1); + } else { + if (control->data != NULL) + free(control->data); + controlbuf = NULL; + } + control->data = controlbuf; + control->size = csize; + + return (0); +} + +/* + * readn -- + * Read a full n characters from a file descriptor, unless we get an error + * or EOF. + */ +ssize_t +readn(fd, vptr, n) + socket_t fd; + void *vptr; + size_t n; +{ + size_t nleft; + ssize_t nread; + char *ptr; + + ptr = vptr; + nleft = n; + while (nleft > 0) { + if ((nread = readsocket(fd, ptr, nleft)) < 0) { + /* + * Call read() again on interrupted system call; + * on other errors, bail. + */ + if (net_errno == EINTR) + nread = 0; + else { + perror("can't read from socket"); + return (-1); + } + } else if (nread == 0) + break; /* EOF */ + + nleft -= nread; + ptr += nread; + } + + return (n - nleft); +} + +/* + * quote_send -- + * The f_send function for DB_ENV->rep_set_transport. + */ +int +quote_send(dbenv, control, rec, lsnp, eid, flags) + DB_ENV *dbenv; + const DBT *control, *rec; + const DB_LSN *lsnp; + int eid; + u_int32_t flags; +{ + int n, ret, t_ret; + socket_t fd; + machtab_t *machtab; + member_t *m; + + COMPQUIET(lsnp, NULL); + machtab = + (machtab_t *)((APP_DATA*)dbenv->app_private)->comm_infrastructure; + + if (eid == DB_EID_BROADCAST) { + /* + * Right now, we do not require successful transmission. + * I'd like to move this requiring at least one successful + * transmission on PERMANENT requests. + */ + n = quote_send_broadcast(machtab, rec, control, flags); + if (n < 0 /*|| (n == 0 && LF_ISSET(DB_REP_PERMANENT))*/) + return (DB_REP_UNAVAIL); + return (0); + } + + if ((ret = mutex_lock(&machtab->mtmutex)) != 0) { + dbenv->errx(dbenv, "can't lock mutex"); + return (ret); + } + + fd = 0; + for (m = LIST_FIRST(&machtab->machlist); m != NULL; + m = LIST_NEXT(m, links)) { + if (m->eid == eid) { + fd = m->fd; + break; + } + } + + if (fd == 0) { + dbenv->err(dbenv, DB_REP_UNAVAIL, + "quote_send: cannot find machine ID %d", eid); + return (DB_REP_UNAVAIL); + } + + if ((ret = quote_send_one(rec, control, fd, flags)) != 0) + fprintf(stderr, "socket write error in send() function\n"); + + if ((t_ret = mutex_unlock(&machtab->mtmutex)) != 0) { + dbenv->errx(dbenv, "can't unlock mutex"); + if (ret == 0) + ret = t_ret; + } + + return (ret); +} + +/* + * quote_send_broadcast -- + * Send a message to everybody. + * Returns the number of sites to which this message was successfully + * communicated. A -1 indicates a fatal error. + */ +static int +quote_send_broadcast(machtab, rec, control, flags) + machtab_t *machtab; + const DBT *rec, *control; + u_int32_t flags; +{ + int ret, sent; + member_t *m, *next; + + if ((ret = mutex_lock(&machtab->mtmutex)) != 0) { + fprintf(stderr, "can't lock mutex\n"); + return (ret); + } + + sent = 0; + for (m = LIST_FIRST(&machtab->machlist); m != NULL; m = next) { + next = LIST_NEXT(m, links); + if ((ret = quote_send_one(rec, control, m->fd, flags)) != 0) { + fprintf(stderr, "socket write error in broadcast\n"); + (void)machtab_rem(machtab, m->eid, 0); + } else + sent++; + } + + if (mutex_unlock(&machtab->mtmutex) != 0) { + fprintf(stderr, "can't unlock mutex\n"); + return (-1); + } + + return (sent); +} + +/* + * quote_send_one -- + * Send a message to a single machine, given that machine's file + * descriptor. + * + * !!! + * Note that the machtab mutex should be held through this call. + * It doubles as a synchronizer to make sure that two threads don't + * intersperse writes that are part of two single messages. + */ +static int +quote_send_one(rec, control, fd, flags) + const DBT *rec, *control; + socket_t fd; + u_int32_t flags; + +{ + int retry; + ssize_t bytes_left, nw; + u_int8_t *wp; + + COMPQUIET(flags, 0); + + /* + * The protocol is simply: write rec->size, write rec->data, + * write control->size, write control->data. + */ + nw = writesocket(fd, (const char *)&rec->size, 4); + if (nw != 4) + return (DB_REP_UNAVAIL); + + if (rec->size > 0) { + nw = writesocket(fd, rec->data, rec->size); + if (nw < 0) + return (DB_REP_UNAVAIL); + if (nw != (ssize_t)rec->size) { + /* Try a couple of times to finish the write. */ + wp = (u_int8_t *)rec->data + nw; + bytes_left = rec->size - nw; + for (retry = 0; bytes_left > 0 && retry < 3; retry++) { + nw = writesocket(fd, wp, bytes_left); + if (nw < 0) + return (DB_REP_UNAVAIL); + bytes_left -= nw; + wp += nw; + } + if (bytes_left > 0) + return (DB_REP_UNAVAIL); + } + } + + nw = writesocket(fd, (const char *)&control->size, 4); + if (nw != 4) + return (DB_REP_UNAVAIL); + if (control->size > 0) { + nw = writesocket(fd, control->data, control->size); + if (nw != (ssize_t)control->size) + return (DB_REP_UNAVAIL); + } + return (0); +} diff --git a/examples/c/ex_rep/common/rep_common.c b/examples/c/ex_rep/common/rep_common.c new file mode 100644 index 00000000..0ecca3ed --- /dev/null +++ b/examples/c/ex_rep/common/rep_common.c @@ -0,0 +1,680 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include +#include +#include +#include + +#include + +#include "rep_common.h" + +#define CACHESIZE (10 * 1024 * 1024) +#define DATABASE "quote.db" +#define SLEEPTIME 3 + +static int print_stocks __P((DB *)); + +/* + * Perform command line parsing and common replication setup for the repmgr + * and base replication example programs. + */ +int +common_rep_setup(dbenv, argc, argv, setup_info) + DB_ENV *dbenv; + int argc; + char *argv[]; + SETUP_DATA *setup_info; +{ + repsite_t site; + extern char *optarg; + char ch, *portstr; + int ack_policy, got_self, is_repmgr, maxsites, priority, ret; + + got_self = is_repmgr = maxsites = ret = 0; + site.peer = site.creator = 0; + + priority = 100; + ack_policy = DB_REPMGR_ACKS_QUORUM; + setup_info->role = UNKNOWN; + if (strncmp(setup_info->progname, "ex_rep_mgr", 10) == 0) + is_repmgr = 1; + + /* + * Replication setup calls that are only needed if a command + * line option is specified are made within this while/switch + * statement. Replication setup calls that should be made + * whether or not a command line option is specified are after + * this while/switch statement. + */ + while ((ch = getopt(argc, argv, "a:bCh:L:l:Mn:p:R:r:v")) != EOF) { + switch (ch) { + case 'a': + if (!is_repmgr) + usage(is_repmgr, setup_info->progname); + if (strncmp(optarg, "all", 3) == 0) + ack_policy = DB_REPMGR_ACKS_ALL; + else if (strncmp(optarg, "quorum", 6) != 0) + usage(is_repmgr, setup_info->progname); + break; + case 'b': + /* + * Configure bulk transfer to send groups of records + * to clients in a single network transfer. This is + * useful for master sites and clients participating + * in client-to-client synchronization. + */ + if ((ret = dbenv->rep_set_config(dbenv, + DB_REP_CONF_BULK, 1)) != 0) { + dbenv->err(dbenv, ret, + "Could not configure bulk transfer.\n"); + goto err; + } + break; + case 'C': + setup_info->role = CLIENT; + break; + case 'h': + setup_info->home = optarg; + break; + case 'L': + if (!is_repmgr) + usage(is_repmgr, setup_info->progname); + setup_info->self.creator = 1; /* FALLTHROUGH */ + case 'l': + setup_info->self.host = strtok(optarg, ":"); + if ((portstr = strtok(NULL, ":")) == NULL) { + fprintf(stderr, "Bad host specification.\n"); + goto err; + } + setup_info->self.port = (unsigned short)atoi(portstr); + setup_info->self.peer = 0; + got_self = 1; + break; + case 'M': + setup_info->role = MASTER; + break; + case 'n': + if (is_repmgr) + usage(is_repmgr, setup_info->progname); + setup_info->nsites = atoi(optarg); + /* + * For repmgr, using group membership, we cannot + * set this any more. For base replication, nsites + * is simply passed back to main for use in its + * communications and election processing. + */ + if (setup_info->nsites > 0 && + (ret = dbenv->rep_set_nsites(dbenv, + setup_info->nsites)) != 0) { + dbenv->err(dbenv, ret, + "Could not set nsites.\n"); + goto err; + } + break; + case 'p': + priority = atoi(optarg); + break; + case 'R': + if (!is_repmgr) + usage(is_repmgr, setup_info->progname); + site.peer = 1; /* FALLTHROUGH */ + case 'r': + site.host = optarg; + site.host = strtok(site.host, ":"); + if ((portstr = strtok(NULL, ":")) == NULL) { + fprintf(stderr, "Bad host specification.\n"); + goto err; + } + site.port = (unsigned short)atoi(portstr); + if (setup_info->site_list == NULL || + setup_info->remotesites >= maxsites) { + maxsites = maxsites == 0 ? 10 : 2 * maxsites; + if ((setup_info->site_list = + realloc(setup_info->site_list, + maxsites * sizeof(repsite_t))) == NULL) { + fprintf(stderr, "System error %s\n", + strerror(errno)); + goto err; + } + } + (setup_info->site_list)[(setup_info->remotesites)++] = + site; + site.peer = 0; + break; + case 'v': + if ((ret = dbenv->set_verbose(dbenv, + DB_VERB_REPLICATION, 1)) != 0) + goto err; + break; + case '?': + default: + usage(is_repmgr, setup_info->progname); + } + } + + /* Error check command line. */ + if (!got_self || setup_info->home == NULL) + usage(is_repmgr, setup_info->progname); + if (!is_repmgr && setup_info->role == UNKNOWN) { + fprintf(stderr, "Must specify -M or -C.\n"); + goto err; + } + + /* + * Set replication group election priority for this environment. + * An election first selects the site with the most recent log + * records as the new master. If multiple sites have the most + * recent log records, the site with the highest priority value + * is selected as master. + */ + if ((ret = dbenv->rep_set_priority(dbenv, priority)) != 0) { + dbenv->err(dbenv, ret, "Could not set priority.\n"); + goto err; + } + + /* + * For repmgr, set the policy that determines how master and client + * sites handle acknowledgement of replication messages needed for + * permanent records. The default policy of "quorum" requires only + * a quorum of electable peers sufficient to ensure a permanent + * record remains durable if an election is held. The "all" option + * requires all clients to acknowledge a permanent replication + * message instead. + */ + if (is_repmgr && + (ret = dbenv->repmgr_set_ack_policy(dbenv, ack_policy)) != 0) { + dbenv->err(dbenv, ret, "Could not set ack policy.\n"); + goto err; + } + + /* + * Set the threshold for the minimum and maximum time the client + * waits before requesting retransmission of a missing message. + * Base these values on the performance and load characteristics + * of the master and client host platforms as well as the round + * trip message time. + */ + if ((ret = dbenv->rep_set_request(dbenv, 20000, 500000)) != 0) { + dbenv->err(dbenv, ret, + "Could not set client_retransmission defaults.\n"); + goto err; + } + + /* + * Configure deadlock detection to ensure that any deadlocks + * are broken by having one of the conflicting lock requests + * rejected. DB_LOCK_DEFAULT uses the lock policy specified + * at environment creation time or DB_LOCK_RANDOM if none was + * specified. + */ + if ((ret = dbenv->set_lk_detect(dbenv, DB_LOCK_DEFAULT)) != 0) { + dbenv->err(dbenv, ret, + "Could not configure deadlock detection.\n"); + goto err; + } + + /* The following base replication features may also be useful to your + * application. See Berkeley DB documentation for more details. + * - Master leases: Provide stricter consistency for data reads + * on a master site. + * - Timeouts: Customize the amount of time Berkeley DB waits + * for such things as an election to be concluded or a master + * lease to be granted. + * - Delayed client synchronization: Manage the master site's + * resources by spreading out resource-intensive client + * synchronizations. + * - Blocked client operations: Return immediately with an error + * instead of waiting indefinitely if a client operation is + * blocked by an ongoing client synchronization. + */ + +err: + return (ret); +} + +static int +print_stocks(dbp) + DB *dbp; +{ + DBC *dbc; + DBT key, data; +#define MAXKEYSIZE 10 +#define MAXDATASIZE 20 + char keybuf[MAXKEYSIZE + 1], databuf[MAXDATASIZE + 1]; + int ret, t_ret; + u_int32_t keysize, datasize; + + if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) { + dbp->err(dbp, ret, "can't open cursor"); + return (ret); + } + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + + printf("\tSymbol\tPrice\n"); + printf("\t======\t=====\n"); + + for (ret = dbc->get(dbc, &key, &data, DB_FIRST); + ret == 0; + ret = dbc->get(dbc, &key, &data, DB_NEXT)) { + keysize = key.size > MAXKEYSIZE ? MAXKEYSIZE : key.size; + memcpy(keybuf, key.data, keysize); + keybuf[keysize] = '\0'; + + datasize = data.size >= MAXDATASIZE ? MAXDATASIZE : data.size; + memcpy(databuf, data.data, datasize); + databuf[datasize] = '\0'; + + printf("\t%s\t%s\n", keybuf, databuf); + } + printf("\n"); + fflush(stdout); + + if ((t_ret = dbc->close(dbc)) != 0 && ret == 0) + ret = t_ret; + + switch (ret) { + case 0: + case DB_NOTFOUND: + case DB_LOCK_DEADLOCK: + return (0); + default: + return (ret); + } +} + +/* Start checkpoint and log archive support threads. */ +int +start_support_threads(dbenv, sup_args, ckp_thr, lga_thr) + DB_ENV *dbenv; + supthr_args *sup_args; + thread_t *ckp_thr; + thread_t *lga_thr; +{ + int ret; + + ret = 0; + if ((ret = thread_create(ckp_thr, NULL, checkpoint_thread, + sup_args)) != 0) { + dbenv->errx(dbenv, "can't create checkpoint thread"); + goto err; + } + if ((ret = thread_create(lga_thr, NULL, log_archive_thread, + sup_args)) != 0) + dbenv->errx(dbenv, "can't create log archive thread"); +err: + return (ret); + +} + +/* Wait for checkpoint and log archive support threads to finish. */ +int +finish_support_threads(ckp_thr, lga_thr) + thread_t *ckp_thr; + thread_t *lga_thr; +{ + void *ctstatus, *ltstatus; + int ret; + + ret = 0; + if (thread_join(*lga_thr, <status) || + thread_join(*ckp_thr, &ctstatus)) { + ret = -1; + goto err; + } + if ((uintptr_t)ltstatus != EXIT_SUCCESS || + (uintptr_t)ctstatus != EXIT_SUCCESS) + ret = -1; +err: + return (ret); +} + +#define BUFSIZE 1024 + +int +doloop(dbenv, shared_data) + DB_ENV *dbenv; + SHARED_DATA *shared_data; +{ + DB *dbp; + DBT key, data; + char buf[BUFSIZE], *first, *price; + u_int32_t flags; + int ret; + + dbp = NULL; + ret = 0; + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + + for (;;) { + printf("QUOTESERVER%s> ", + shared_data->is_master ? "" : " (read-only)"); + fflush(stdout); + + if (fgets(buf, sizeof(buf), stdin) == NULL) + break; + +#define DELIM " \t\n" + if ((first = strtok(&buf[0], DELIM)) == NULL) { + /* Blank input line. */ + price = NULL; + } else if ((price = strtok(NULL, DELIM)) == NULL) { + /* Just one input token. */ + if (strncmp(buf, "exit", 4) == 0 || + strncmp(buf, "quit", 4) == 0) { + /* + * This makes the checkpoint and log + * archive threads stop. + */ + shared_data->app_finished = 1; + break; + } + dbenv->errx(dbenv, "Format: TICKER VALUE"); + continue; + } else { + /* Normal two-token input line. */ + if (first != NULL && !shared_data->is_master) { + dbenv->errx(dbenv, "Can't update at client"); + continue; + } + } + + if (dbp == NULL) { + if ((ret = db_create(&dbp, dbenv, 0)) != 0) + return (ret); + + flags = DB_AUTO_COMMIT; + /* + * Open database with DB_CREATE only if this is + * a master database. A client database uses + * polling to attempt to open the database without + * DB_CREATE until it is successful. + * + * This DB_CREATE polling logic can be simplified + * under some circumstances. For example, if the + * application can be sure a database is already + * there, it would never need to open it with + * DB_CREATE. + */ + if (shared_data->is_master) + flags |= DB_CREATE; + if ((ret = dbp->open(dbp, + NULL, DATABASE, NULL, DB_BTREE, flags, 0)) != 0) { + if (ret == ENOENT) { + printf( + "No stock database yet available.\n"); + if ((ret = dbp->close(dbp, 0)) != 0) { + dbenv->err(dbenv, ret, + "DB->close"); + goto err; + } + dbp = NULL; + continue; + } + if (ret == DB_REP_HANDLE_DEAD || + ret == DB_LOCK_DEADLOCK) { + dbenv->err(dbenv, ret, + "please retry the operation"); + dbp->close(dbp, DB_NOSYNC); + dbp = NULL; + continue; + } + dbenv->err(dbenv, ret, "DB->open"); + goto err; + } + } + + if (first == NULL) { + /* + * If this is a client in the middle of + * synchronizing with the master, the client data + * is possibly stale and won't be displayed until + * client synchronization is finished. It is also + * possible to display the stale data if this is + * acceptable to the application. + */ + if (shared_data->in_client_sync) + printf( +"Cannot read data during client synchronization - please try again.\n"); + else + switch ((ret = print_stocks(dbp))) { + case 0: + break; + case DB_REP_HANDLE_DEAD: + (void)dbp->close(dbp, DB_NOSYNC); + dbp = NULL; + break; + default: + dbp->err(dbp, ret, + "Error traversing data"); + goto err; + } + } else { + key.data = first; + key.size = (u_int32_t)strlen(first); + + data.data = price; + data.size = (u_int32_t)strlen(price); + + if ((ret = dbp->put(dbp, + NULL, &key, &data, DB_AUTO_COMMIT)) != 0) { + dbp->err(dbp, ret, "DB->put"); + goto err; + } + } + } + +err: if (dbp != NULL) + (void)dbp->close(dbp, DB_NOSYNC); + return (ret); +} + +int +create_env(progname, dbenvp) + const char *progname; + DB_ENV **dbenvp; +{ + DB_ENV *dbenv; + int ret; + + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, "can't create env handle: %s\n", + db_strerror(ret)); + return (ret); + } + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + *dbenvp = dbenv; + return (0); +} + +/* Open and configure an environment. */ +int +env_init(dbenv, home) + DB_ENV *dbenv; + const char *home; +{ + u_int32_t flags; + int ret; + + (void)dbenv->set_cachesize(dbenv, 0, CACHESIZE, 0); + (void)dbenv->set_flags(dbenv, DB_TXN_NOSYNC, 1); + + flags = DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | + DB_INIT_REP | DB_INIT_TXN | DB_RECOVER | DB_THREAD; + if ((ret = dbenv->open(dbenv, home, flags, 0)) != 0) + dbenv->err(dbenv, ret, "can't open environment"); + return (ret); +} + +/* + * In this application, we specify all communication via the command line. In + * a real application, we would expect that information about the other sites + * in the system would be maintained in some sort of configuration file. The + * critical part of this interface is that we assume at startup that we can + * find out + * 1) what host/port we wish to listen on for connections, + * 2) a (possibly empty) list of other sites we should attempt to connect + * to; and + * 3) what our Berkeley DB home environment is. + * + * These pieces of information are expressed by the following flags. + * -a all|quorum (optional; repmgr only, a stands for ack policy) + * -b (optional, b stands for bulk) + * -C or -M start up as client or master (optional for repmgr, required + * for base example) + * -h home directory (required) + * -l host:port (required for base example; + * required for repmgr unless -L is specified; l stands for local) + * -L host:port (optional; repmgr only, local site will be the group creator) + * -n nsites (optional; base example only, number of sites in replication group; + * defaults to 0 in which case we try to dynamically compute the + * number of sites in the replication group) + * -p priority (optional: defaults to 100) + * -r host:port (optional; r stands for remote; any number of these may be + * specified) + * -R host:port (optional; repmgr only, remote peer) + * -v (optional; v stands for verbose) + */ +void +usage(is_repmgr, progname) + const int is_repmgr; + const char *progname; +{ + fprintf(stderr, "usage: %s ", progname); + if (is_repmgr) + fprintf(stderr, "[-CM]-h home -l|-L host:port %s%s\n", + "[-r host:port][-R host:port][-a all|quorum]", + "[-b][-p priority][-v]"); + else + fprintf(stderr, "-CM -h home -l host:port[-r host:port]%s", + "[-b][-n nsites][-p priority][-v]\n"); + exit(EXIT_FAILURE); +} + +/* + * This is a very simple thread that performs checkpoints at a fixed + * time interval. For a master site, the time interval is one minute + * plus the duration of the checkpoint_delay timeout (30 seconds by + * default.) For a client site, the time interval is one minute. + */ +void * +checkpoint_thread(args) + void *args; +{ + DB_ENV *dbenv; + SHARED_DATA *shared; + supthr_args *ca; + int i, ret; + + ca = (supthr_args *)args; + dbenv = ca->dbenv; + shared = ca->shared; + + for (;;) { + /* + * Wait for one minute, polling once per second to see if + * application has finished. When application has finished, + * terminate this thread. + */ + for (i = 0; i < 60; i++) { + sleep(1); + if (shared->app_finished == 1) + return ((void *)EXIT_SUCCESS); + } + + /* Perform a checkpoint. */ + if ((ret = dbenv->txn_checkpoint(dbenv, 0, 0, 0)) != 0) { + dbenv->err(dbenv, ret, + "Could not perform checkpoint.\n"); + return ((void *)EXIT_FAILURE); + } + } +} + +/* + * This is a simple log archive thread. Once per minute, it removes all but + * the most recent 3 logs that are safe to remove according to a call to + * DB_ENV->log_archive(). + * + * Log cleanup is needed to conserve disk space, but aggressive log cleanup + * can cause more frequent client initializations if a client lags too far + * behind the current master. This can happen in the event of a slow client, + * a network partition, or a new master that has not kept as many logs as the + * previous master. + * + * The approach in this routine balances the need to mitigate against a + * lagging client by keeping a few more of the most recent unneeded logs + * with the need to conserve disk space by regularly cleaning up log files. + * Use of automatic log removal (DB_ENV->log_set_config() DB_LOG_AUTO_REMOVE + * flag) is not recommended for replication due to the risk of frequent + * client initializations. + */ +void * +log_archive_thread(args) + void *args; +{ + DB_ENV *dbenv; + SHARED_DATA *shared; + char **begin, **list; + supthr_args *la; + int i, listlen, logs_to_keep, minlog, ret; + + la = (supthr_args *)args; + dbenv = la->dbenv; + shared = la->shared; + logs_to_keep = 3; + + for (;;) { + /* + * Wait for one minute, polling once per second to see if + * application has finished. When application has finished, + * terminate this thread. + */ + for (i = 0; i < 60; i++) { + sleep(1); + if (shared->app_finished == 1) + return ((void *)EXIT_SUCCESS); + } + + /* Get the list of unneeded log files. */ + if ((ret = dbenv->log_archive(dbenv, &list, DB_ARCH_ABS)) + != 0) { + dbenv->err(dbenv, ret, + "Could not get log archive list."); + return ((void *)EXIT_FAILURE); + } + if (list != NULL) { + listlen = 0; + /* Get the number of logs in the list. */ + for (begin = list; *begin != NULL; begin++, listlen++); + /* + * Remove all but the logs_to_keep most recent + * unneeded log files. + */ + minlog = listlen - logs_to_keep; + for (begin = list, i= 0; i < minlog; list++, i++) { + if ((ret = unlink(*list)) != 0) { + dbenv->err(dbenv, ret, + "logclean: remove %s", *list); + dbenv->errx(dbenv, + "logclean: Error remove %s", *list); + free(begin); + return ((void *)EXIT_FAILURE); + } + } + free(begin); + } + } +} diff --git a/examples/c/ex_rep/common/rep_common.h b/examples/c/ex_rep/common/rep_common.h new file mode 100644 index 00000000..a8d9881c --- /dev/null +++ b/examples/c/ex_rep/common/rep_common.h @@ -0,0 +1,82 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* User-specified role an environment should play in the replication group. */ +typedef enum { MASTER, CLIENT, UNKNOWN } ENV_ROLE; + +/* User-specified information about a replication site. */ +typedef struct { + char *host; /* Host name. */ + u_int32_t port; /* Port on which to connect to this site. */ + int peer; /* Whether remote site is repmgr peer. */ + int creator; /* Whether local site is group creator. */ +} repsite_t; + +/* Data used for common replication setup. */ +typedef struct { + const char *progname; + char *home; + int nsites; + int remotesites; + ENV_ROLE role; + repsite_t self; + repsite_t *site_list; +} SETUP_DATA; + +/* Data shared by both repmgr and base versions of this program. */ +typedef struct { + int is_master; + int app_finished; + int in_client_sync; +} SHARED_DATA; + +/* Arguments for support threads. */ +typedef struct { + DB_ENV *dbenv; + SHARED_DATA *shared; +} supthr_args; + +/* Portability macros for basic threading & timing */ +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#include +#define snprintf _snprintf +#define sleep(s) Sleep(1000 * (s)) + +extern int getopt(int, char * const *, const char *); + +typedef HANDLE thread_t; +#define thread_create(thrp, attr, func, arg) \ + (((*(thrp) = CreateThread(NULL, 0, \ + (LPTHREAD_START_ROUTINE)(func), (arg), 0, NULL)) == NULL) ? -1 : 0) +#define thread_join(thr, statusp) \ + ((WaitForSingleObject((thr), INFINITE) == WAIT_OBJECT_0) && \ + GetExitCodeThread((thr), (LPDWORD)(statusp)) ? 0 : -1) + +#else /* !_WIN32 */ +#include +#include + +typedef pthread_t thread_t; +#define thread_create(thrp, attr, func, arg) \ + pthread_create((thrp), (attr), (func), (arg)) +#define thread_join(thr, statusp) pthread_join((thr), (statusp)) + +#endif + +void *checkpoint_thread __P((void *)); +int common_rep_setup __P((DB_ENV *, int, char *[], SETUP_DATA *)); +int create_env __P((const char *, DB_ENV **)); +int doloop __P((DB_ENV *, SHARED_DATA *)); +int env_init __P((DB_ENV *, const char *)); +int finish_support_threads __P((thread_t *, thread_t *)); +void *log_archive_thread __P((void *)); +int start_support_threads __P((DB_ENV *, supthr_args *, thread_t *, + thread_t *)); +void usage __P((const int, const char *)); diff --git a/examples/c/ex_rep/mgr/rep_mgr.c b/examples/c/ex_rep/mgr/rep_mgr.c new file mode 100644 index 00000000..62ebf745 --- /dev/null +++ b/examples/c/ex_rep/mgr/rep_mgr.c @@ -0,0 +1,227 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include +#include +#include +#include +#ifndef _WIN32 +#include +#endif + +#include + +#include "../common/rep_common.h" + +typedef struct { + SHARED_DATA shared_data; +} APP_DATA; + +const char *progname = "ex_rep_mgr"; + +static void event_callback __P((DB_ENV *, u_int32_t, void *)); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + DB_ENV *dbenv; + DB_SITE *dbsite; + SETUP_DATA setup_info; + repsite_t *site_list; + APP_DATA my_app_data; + thread_t ckp_thr, lga_thr; + supthr_args sup_args; + u_int32_t start_policy; + int i, ret, t_ret; + + memset(&setup_info, 0, sizeof(SETUP_DATA)); + setup_info.progname = progname; + memset(&my_app_data, 0, sizeof(APP_DATA)); + dbenv = NULL; + ret = 0; + + start_policy = DB_REP_ELECTION; + + if ((ret = create_env(progname, &dbenv)) != 0) + goto err; + dbenv->app_private = &my_app_data; + (void)dbenv->set_event_notify(dbenv, event_callback); + + /* Parse command line and perform common replication setup. */ + if ((ret = common_rep_setup(dbenv, argc, argv, &setup_info)) != 0) + goto err; + + /* Perform repmgr-specific setup based on command line options. */ + if (setup_info.role == MASTER) + start_policy = DB_REP_MASTER; + else if (setup_info.role == CLIENT) + start_policy = DB_REP_CLIENT; + + /* Configure the local site. */ + if ((ret = dbenv->repmgr_site(dbenv, setup_info.self.host, + setup_info.self.port, &dbsite, 0)) != 0) { + dbenv->err(dbenv, ret, "Could not set local site."); + goto err; + } + dbsite->set_config(dbsite, DB_LOCAL_SITE, 1); + if (setup_info.self.creator) + dbsite->set_config(dbsite, DB_GROUP_CREATOR, 1); + + if ((ret = dbsite->close(dbsite)) != 0) { + dbenv->err(dbenv, ret, "DB_SITE->close"); + goto err; + } + + /* Configure the remote site list. */ + site_list = setup_info.site_list; + for (i = 0; i < setup_info.remotesites; i++) { + if ((ret = dbenv->repmgr_site(dbenv, site_list[i].host, + site_list[i].port, &dbsite, 0)) != 0) { + dbenv->err(dbenv, ret, "Could not add site %s:%d", + site_list[i].host, (int)site_list[i].port); + goto err; + } + dbsite->set_config(dbsite, DB_BOOTSTRAP_HELPER, 1); + if (site_list[i].peer) + dbsite->set_config(dbsite, DB_REPMGR_PEER, 1); + if ((ret = dbsite->close(dbsite)) != 0) { + dbenv->err(dbenv, ret, "DB_SITE->close"); + goto err; + } + } + + /* + * Configure heartbeat timeouts so that repmgr monitors the + * health of the TCP connection. Master sites broadcast a heartbeat + * at the frequency specified by the DB_REP_HEARTBEAT_SEND timeout. + * Client sites wait for message activity the length of the + * DB_REP_HEARTBEAT_MONITOR timeout before concluding that the + * connection to the master is lost. The DB_REP_HEARTBEAT_MONITOR + * timeout should be longer than the DB_REP_HEARTBEAT_SEND timeout. + */ + if ((ret = dbenv->rep_set_timeout(dbenv, DB_REP_HEARTBEAT_SEND, + 5000000)) != 0) + dbenv->err(dbenv, ret, + "Could not set heartbeat send timeout.\n"); + if ((ret = dbenv->rep_set_timeout(dbenv, DB_REP_HEARTBEAT_MONITOR, + 10000000)) != 0) + dbenv->err(dbenv, ret, + "Could not set heartbeat monitor timeout.\n"); + + /* + * The following repmgr features may also be useful to your + * application. See Berkeley DB documentation for more details. + * - Two-site strict majority rule - In a two-site replication + * group, require both sites to be available to elect a new + * master. + * - Timeouts - Customize the amount of time repmgr waits + * for such things as waiting for acknowledgements or attempting + * to reconnect to other sites. + * - Site list - return a list of sites currently known to repmgr. + */ + + if ((ret = env_init(dbenv, setup_info.home)) != 0) + goto err; + + /* Start checkpoint and log archive threads. */ + sup_args.dbenv = dbenv; + sup_args.shared = &my_app_data.shared_data; + if ((ret = start_support_threads(dbenv, &sup_args, &ckp_thr, + &lga_thr)) != 0) + goto err; + + if ((ret = dbenv->repmgr_start(dbenv, 3, start_policy)) != 0) + goto err; + + if ((ret = doloop(dbenv, &my_app_data.shared_data)) != 0) { + dbenv->err(dbenv, ret, "Client failed"); + goto err; + } + + /* Finish checkpoint and log archive threads. */ + if ((ret = finish_support_threads(&ckp_thr, &lga_thr)) != 0) + goto err; + + /* + * We have used the DB_TXN_NOSYNC environment flag for improved + * performance without the usual sacrifice of transactional durability, + * as discussed in the "Transactional guarantees" page of the Reference + * Guide: if one replication site crashes, we can expect the data to + * exist at another site. However, in case we shut down all sites + * gracefully, we push out the end of the log here so that the most + * recent transactions don't mysteriously disappear. + */ + if ((ret = dbenv->log_flush(dbenv, NULL)) != 0) { + dbenv->err(dbenv, ret, "log_flush"); + goto err; + } + +err: + if (dbenv != NULL && + (t_ret = dbenv->close(dbenv, 0)) != 0) { + fprintf(stderr, "failure closing env: %s (%d)\n", + db_strerror(t_ret), t_ret); + if (ret == 0) + ret = t_ret; + } + + return (ret); +} + +static void +event_callback(dbenv, which, info) + DB_ENV *dbenv; + u_int32_t which; + void *info; +{ + APP_DATA *app = dbenv->app_private; + SHARED_DATA *shared = &app->shared_data; + int err; + + + switch (which) { + case DB_EVENT_PANIC: + err = *(int*)info; + printf("Got a panic: %s (%d)\n", db_strerror(err), err); + abort(); + + case DB_EVENT_REP_CLIENT: + shared->is_master = 0; + shared->in_client_sync = 1; + break; + + case DB_EVENT_REP_MASTER: + shared->is_master = 1; + shared->in_client_sync = 0; + break; + + case DB_EVENT_REP_NEWMASTER: + shared->in_client_sync = 1; + break; + + case DB_EVENT_REP_PERM_FAILED: + /* + * Did not get enough acks to guarantee transaction + * durability based on the configured ack policy. This + * transaction will be flushed to the master site's + * local disk storage for durability. + */ + printf( + "Insufficient acknowledgements to guarantee transaction durability.\n"); + break; + + case DB_EVENT_REP_STARTUPDONE: + shared->in_client_sync = 0; + break; + + default: + dbenv->errx(dbenv, "ignoring event %d", which); + } +} diff --git a/examples/c/ex_rep_chan/rep_chan.c b/examples/c/ex_rep_chan/rep_chan.c new file mode 100644 index 00000000..1742ed38 --- /dev/null +++ b/examples/c/ex_rep_chan/rep_chan.c @@ -0,0 +1,757 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include +#include +#include +#include +#include +#include +#ifndef _WIN32 +#include +#endif +#include + +#include "rep_chan.h" + +static int doloop __P((DB_ENV *, GLOBAL *)); +static void get_op __P((DB_ENV *, DB_CHANNEL *, GLOBAL *, DBT *, + u_int32_t)); +static int master_op __P((DB_ENV *, GLOBAL *, int, DBT *, int)); +static void operation_dispatch __P((DB_ENV *, DB_CHANNEL *, DBT *, + u_int32_t, u_int32_t)); +static int parse_input __P((char *, int *, DBT *, int *)); +static int process_reply __P((DB_ENV *, int, DBT *)); +static void put_op __P((DB_ENV *, DB_CHANNEL *, int, GLOBAL *, DBT *, + u_int32_t)); +static void send_error_reply __P((DB_CHANNEL *, int)); +static void send_reply __P((DB_CHANNEL *, DBT *, int)); + +const char *progname = "ex_rep_chan"; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + DB_ENV *dbenv; + SETUP_DATA setup_info; + GLOBAL global; + thread_t ckp_thr, lga_thr; + u_int32_t start_policy; + int ret, t_ret; + + memset(&setup_info, 0, sizeof(SETUP_DATA)); + setup_info.progname = progname; + memset(&global, 0, sizeof(GLOBAL)); + dbenv = NULL; + + if ((ret = mutex_init(&global.mutex, NULL)) != 0) + return (ret); + start_policy = DB_REP_ELECTION; + + if ((ret = create_env(progname, &dbenv)) != 0) + goto err; + dbenv->app_private = &global; + (void)dbenv->set_event_notify(dbenv, event_callback); + + /* + * Set up the callback function for the channel. + */ + (void)dbenv->repmgr_msg_dispatch(dbenv, operation_dispatch, 0); + + /* Parse command line and perform common replication setup. */ + if ((ret = rep_setup(dbenv, argc, argv, &setup_info)) != 0) + goto err; + + /* Perform repmgr-specific setup based on command line options. */ + if (setup_info.role == MASTER) + start_policy = DB_REP_MASTER; + else if (setup_info.role == CLIENT) + start_policy = DB_REP_CLIENT; + + if ((ret = env_init(dbenv, setup_info.home)) != 0) + goto err; + + /* Start checkpoint and log archive threads. */ + global.dbenv = dbenv; + if ((ret = start_support_threads(dbenv, &global, &ckp_thr, + &lga_thr)) != 0) + goto err; + + if ((ret = dbenv->repmgr_start(dbenv, 3, start_policy)) != 0) + goto err; + + if ((ret = doloop(dbenv, &global)) != 0) { + dbenv->err(dbenv, ret, "Site failed"); + goto err; + } + + /* Finish checkpoint and log archive threads. */ + if ((ret = finish_support_threads(&ckp_thr, &lga_thr)) != 0) + goto err; + + /* + * We have used the DB_TXN_NOSYNC environment flag for improved + * performance without the usual sacrifice of transactional durability, + * as discussed in the "Transactional guarantees" page of the Reference + * Guide: if one replication site crashes, we can expect the data to + * exist at another site. However, in case we shut down all sites + * gracefully, we push out the end of the log here so that the most + * recent transactions don't mysteriously disappear. + */ + if ((ret = dbenv->log_flush(dbenv, NULL)) != 0) { + dbenv->err(dbenv, ret, "log_flush"); + goto err; + } + +err: + if (global.channel != NULL && + (t_ret = global.channel->close(global.channel, 0)) != 0) { + fprintf(stderr, "failure closing channel: %s (%d)\n", + db_strerror(t_ret), t_ret); + if (ret == 0) + ret = t_ret; + } + if (dbenv != NULL && + (t_ret = dbenv->close(dbenv, 0)) != 0) { + fprintf(stderr, "failure closing env: %s (%d)\n", + db_strerror(t_ret), t_ret); + if (ret == 0) + ret = t_ret; + } + + return (ret); +} + +/* + * This is the main user-driven loop of the program. Read a command + * from the user and process it, sending it to the master if needed. + */ +static int +doloop(dbenv, global) + DB_ENV *dbenv; + GLOBAL *global; +{ + DB *dbp; + DBT cmdargs[REPCMD_MAX_DBT]; + char buf[BUFSIZE]; + int cmd, ndbt, ret; + + dbp = NULL; + ret = 0; + + memset(cmdargs, 0, sizeof(DBT) * REPCMD_MAX_DBT); + for (;;) { + printf("QUOTESERVER> "); + fflush(stdout); + if (fgets(buf, sizeof(buf), stdin) == NULL) + break; + if ((ret = parse_input(buf, &cmd, cmdargs, &ndbt)) != 0) + return (ret); + if (cmd == REPCMD_INVALID) + continue; + if (cmd == REPCMD_HELP) { + print_cmdhelp(); + continue; + } + if (cmd == REPCMD_EXIT) { + global->app_finished = 1; + break; + } + + /* + * All other commands deal with the database. Open it + * now if it isn't already open. + */ + if (dbp == NULL) { + ret = open_dbp(dbenv, global->is_master, &dbp); + if (ret == ENOENT || ret == DB_REP_HANDLE_DEAD || + ret == DB_LOCK_DEADLOCK) + continue; + if (ret != 0) + goto err; + } + if (cmd == REPCMD_PRINT) { + /* + * If this is a client in the middle of + * synchronizing with the master, the client data + * is possibly stale and won't be displayed until + * client synchronization is finished. It is also + * possible to display the stale data if this is + * acceptable to the application. + */ + if (global->in_client_sync) + printf( +"Cannot read data during client synchronization - please try again.\n"); + else + switch ((ret = print_stocks(dbp))) { + case 0: + break; + case DB_REP_HANDLE_DEAD: + (void)dbp->close(dbp, DB_NOSYNC); + dbp = NULL; + break; + default: + dbp->err(dbp, ret, + "Error traversing data"); + goto err; + } + } else { + /* + * This is a command that should be forwarded + * to the master only if we actually have something + * to send. + * + * Even if this site is the master, the channel + * does the right thing and executes locally. + */ + if (ndbt > 1 && (ret = master_op(dbenv, global, + cmd, cmdargs, ndbt)) != 0) + goto err; + } + } + +err: + if (dbp != NULL) + (void)dbp->close(dbp, DB_NOSYNC); + return (ret); +} + +/* + * Take the user input and break it up into its list of DBTs. + */ +static int +parse_input(buf, cmdp, argsdbt, ndbtp) + char *buf; + int *cmdp, *ndbtp; + DBT *argsdbt; +{ + DBT *dbtp; + int i, max, ret; + char *arg, *cmd; + + *ndbtp = 0; + ret = 0; + + /* + * Commands are: + * . | print - Print out current contents. + * . ? | help - Print out commands. + * . exit | quit | q - Stop program. + * . get key key ... - Read the given keys at the master site. + * . put key value key value ... - Write given pairs in one txn. + * . put_sync key value key value ... - Write given pairs in one txn + * and don't return until the data is replicated to the local site. + */ +#define DELIM " \t\n" + cmd = strtok(&buf[0], DELIM); + *cmdp = parse_cmd(cmd); + + /* + * These commands take no args. Return (ignoring any args + * the user may have actually given us). + */ + if (*cmdp == REPCMD_INVALID || *cmdp == REPCMD_HELP || + *cmdp == REPCMD_PRINT || *cmdp == REPCMD_EXIT) + return (0); + + /* + * All other commands require at least one arg. Print a message + * if there are none. Don't return an error because we don't + * want the program to exit, just go back to the user. + */ + if ((arg = strtok(NULL, DELIM)) == NULL) { + printf("%s command expects at least one arg\n",cmd); + return (0); + } + + /* + * The 0'th DBT is the command we send to the master. + */ + dbtp = &argsdbt[0]; + dbtp->data = cmdp; + dbtp->size = sizeof(int); + /* + * For a get, the master returns key/data pairs so we can only ask for + * half the maximum of keys. + */ + if (*cmdp == REPCMD_GET) + max = REPCMD_MAX_DBT / 2; + else + max = REPCMD_MAX_DBT; + for (i = 1; i < max && arg != NULL; + i++, arg = strtok(NULL, DELIM)) { + dbtp = &argsdbt[i]; + dbtp->data = arg; + dbtp->size = (u_int32_t)strlen(arg); + } + + /* + * See if we reached the maximum size of input. + */ + *ndbtp = i; + if (arg != NULL) + printf("Reached maximum %d input tokens. Ignoring remainder\n", + max); + /* + * See if we have a mismatched number of key/data pairs for put. + * We check for % 2 == 0 for the straggler case because key/data + * pairs plus a command gives us an odd number. + */ + if ((*cmdp == REPCMD_PUT || *cmdp == REPCMD_PUTSYNC) && + (i % 2) == 0) { + *ndbtp = i - 1; + printf("Mismatched key/data pairs. Ignoring straggler.\n"); + } + return (0); +} + +/* + * Operations that get forwarded to the master are an array of DBTs. + * This is what each operation looks like on the request and reply side. + * For every request, DBT[0] is a header DBT that contains the command. + * For every reply, DBT[0] is the status; if DBT[0] + * is an error indicator, DBT[1] is always the error value. + * + * NOTE: This example assumes a homogeneous set of environments. If you + * are running with mixed endian hardware you will need to be aware of + * byte order issues between sender and recipient. + * + * This function is run on the requesting side of the channel. + * + * Each operation is performed at the master as a single transaction. + * For example, when you write 3 key/data pairs in a single 'put' command + * all 3 will be done under one transaction and will be applied all-or-none. + * + * get key1 key2 key3... + * DBT[0] GET command + * DBT[1] key1 + * DBT[2] key2... + * Reply: + * DBT[0] Success + * DBT[1] key1 + * DBT[2] data1 + * DBT[3] key2 + * DBT[4] data2... + * + * put key1 data1 key2 data2... + * DBT[0] PUT command + * DBT[1] key1 + * DBT[2] data1 + * DBT[3] key2 + * DBT[4] data2... + * Reply: + * DBT[0] Success + * + * put_sync key1 data1 key2 data2... + * DBT[0] PUT_NOWAIT command + * DBT[1] key1 + * DBT[2] data1 + * DBT[3] key2 + * DBT[4] data2... + * Reply: + * DBT[0] Success + * DBT[1] Read-your-write txn token + */ +static int +master_op(dbenv, global, cmd, cmdargs, ndbt) + DB_ENV *dbenv; + GLOBAL *global; + int cmd, ndbt; + DBT *cmdargs; +{ + DB_CHANNEL *chan; + DBT resp; + u_int32_t flags; + int ret; + + memset(&resp, 0, sizeof(DBT)); + resp.flags = DB_DBT_MALLOC; + if (global->channel == NULL && + (ret = dbenv->repmgr_channel(dbenv, DB_EID_MASTER, + &global->channel, 0)) != 0) { + return (ret); + /* + * If you want to change the default channel timeout + * call global->channel->set_timeout here. + */ + } + + chan = global->channel; + flags = 0; + /* + * Send message. Wait for reply if needed. All replies can + * potentially get multiple DBTs, even a put operation. + * The reason is that if an error occurs, we send back an error + * indicator header DBT followed by a DBT with the error value. + * For get operations, we send back a DBT for each key given. + */ + flags = DB_MULTIPLE; + if ((ret = chan->send_request(chan, + cmdargs, ndbt, &resp, 0, flags)) != 0) + goto out; + ret = process_reply(dbenv, cmd, &resp); + free(resp.data); +out: + return (ret); +} + +/* + * This is the recipient's (master's) callback function. Functions called + * by this function are going to be performed at the master and then the + * results packaged up and sent back to the request originator. + * + * This function runs on the serving side of the channel. + */ +static void +operation_dispatch(dbenv, chan, reqdbts, ndbt, flags) + DB_ENV *dbenv; + DB_CHANNEL *chan; + DBT *reqdbts; + u_int32_t ndbt, flags; +{ + DBT *d; + GLOBAL *global = dbenv->app_private; + int cmd, ret; + + d = &reqdbts[0]; + cmd = *(int *)d->data; + if (cmd != REPCMD_PUT && cmd != REPCMD_PUTSYNC && + cmd != REPCMD_GET) { + fprintf(stderr, "Received unknown operation %d\n", cmd); + abort(); + } + if (mutex_lock(&global->mutex) != 0) { + fprintf(stderr, "can't lock mutex\n"); + abort(); + } + if (global->dbp == NULL) { +retry: + ret = open_dbp(dbenv, global->is_master, &global->dbp); + if (ret == ENOENT || ret == DB_REP_HANDLE_DEAD || + ret == DB_LOCK_DEADLOCK) { + dbenv->err(dbenv, ret, "Retry opening database file."); + goto retry; + } + /* + * If the sender is expecting a reply, send the error value. + */ + if (ret != 0 && flags != DB_REPMGR_NEED_RESPONSE) { + send_error_reply(chan, ret); + return; + } + } + if (mutex_unlock(&global->mutex) != 0) { + fprintf(stderr, "can't unlock mutex\n"); + abort(); + } + switch (cmd) { + case REPCMD_GET: + get_op(dbenv, chan, global, reqdbts, ndbt); + break; + case REPCMD_PUT: + case REPCMD_PUTSYNC: + put_op(dbenv, chan, cmd, global, reqdbts, ndbt); + break; + } + return; +} + +/* + * Receive a list of keys and send back the list of key/data pairs. + * + * This function runs on the serving side of the channel. + */ +static void +get_op(dbenv, chan, global, reqdbts, ndbt) + DB_ENV *dbenv; + DB_CHANNEL *chan; + GLOBAL *global; + DBT *reqdbts; + u_int32_t ndbt; +{ + DB *dbp; + DBT *key, reply[REPCMD_MAX_DBT]; + DB_TXN *txn; + u_int32_t i, resp_ndbt; + int ret; + + /* + * We have a valid dbp in the global structure at this point. + */ + memset(&reply, 0, sizeof(DBT) * REPCMD_MAX_DBT); + if ((ret = dbenv->txn_begin(dbenv, NULL, &txn, 0)) != 0) { + txn = NULL; + goto err; + } + + dbp = global->dbp; + /* + * Start at 1. The 0'th DBT is reserved for the header. + * For the request DBTs, it contains the request header and for + * the reply DBTs it will contain the reply header. + * That will be filled in when we send the reply. + * + * On the reply, copy in the key given and then the data, so that + * we send back key/data pairs. + */ + for (resp_ndbt = 1, i = 1; i < ndbt; i++) { + key = &reqdbts[i]; + /* + * Remove setting the DB_DBT_MALLOC flag for the key DBT + * that we're sending back in the reply. + */ + reply[resp_ndbt].data = reqdbts[i].data; + reply[resp_ndbt].size = reqdbts[i].size; + reply[resp_ndbt++].flags = 0; + reply[resp_ndbt].flags = DB_DBT_MALLOC; + if ((ret = dbp->get(dbp, + txn, key, &reply[resp_ndbt++], 0)) != 0) + goto err; + } + if ((ret = txn->commit(txn, 0)) != 0) { + txn = NULL; + goto err; + } + send_reply(chan, &reply[0], resp_ndbt); + /* + * On success, free any DBT that used DB_DBT_MALLOC. + */ + for (i = 1; i < resp_ndbt; i++) { + if (reply[i].flags == DB_DBT_MALLOC) + free(reply[i].data); + } + return; + +err: + if (txn != NULL) + (void)txn->abort(txn); + send_error_reply(chan, ret); + for (i = 1; i < resp_ndbt; i++) { + if (reply[i].flags == DB_DBT_MALLOC && reply[i].data != NULL) + free(reply[i].data); + } + return; + +} + +/* + * Receive a list of key/data pairs to write into the database. + * If the request originator asked, send back the txn read token. + * + * This function runs on the serving side of the channel. + */ +static void +put_op(dbenv, chan, cmd, global, reqdbts, ndbt) + DB_ENV *dbenv; + DB_CHANNEL *chan; + int cmd; + GLOBAL *global; + DBT *reqdbts; + u_int32_t ndbt; +{ + DB *dbp; + DBT *data, *key; + DBT reply[REPCMD_MAX_DBT]; + DB_TXN *txn; + DB_TXN_TOKEN t; + u_int32_t i, reply_ndbt; + int ret; + + /* + * We have a valid dbp in the global structure at this point. + */ + memset(reply, 0, sizeof(DBT) * REPCMD_MAX_DBT); + if ((ret = dbenv->txn_begin(dbenv, NULL, &txn, 0)) != 0) { + txn = NULL; + goto err; + } + + dbp = global->dbp; + /* + * Start at 1. The 0'th DBT is reserved for the header. + * For the request DBTs, it contains the request header and for + * the reply DBTs it will contain the reply header. + * That will be filled in when we send the reply. + */ + if (ndbt % 2 == 0) { + fprintf(stderr, "ERROR: Unpaired key/data items.\n"); + ret = EINVAL; + goto err; + } + for (i = 1; i < ndbt; i+=2) { + key = &reqdbts[i]; + data = &reqdbts[i + 1]; + if ((ret = dbp->put(dbp, txn, key, data, 0)) != 0) + goto err; + } + if (cmd == REPCMD_PUTSYNC && + (ret = txn->set_commit_token(txn, &t)) != 0) + goto err; + if ((ret = txn->commit(txn, 0)) != 0) { + txn = NULL; + goto err; + } + if (cmd == REPCMD_PUTSYNC) { + reply[1].data = &t; + reply[1].size = sizeof(DB_TXN_TOKEN); + reply_ndbt = 2; + } else + reply_ndbt = 1; + + send_reply(chan, &reply[0], reply_ndbt); + return; + +err: + if (txn != NULL) + (void)txn->abort(txn); + send_error_reply(chan, ret); + return; +} + +/* + * Send an error reply. DBT[0] is the error status and + * DBT[1] is the error return value. + * + * This function runs on the serving side of the channel. + */ +static void +send_error_reply(chan, ret) + DB_CHANNEL *chan; + int ret; +{ + DBT d[REPLY_ERROR_NDBT]; + int r; + + r = REPREPLY_ERROR; + d[0].data = &r; + d[0].size = sizeof(int); + d[1].data = &ret; + d[1].size = sizeof(int); + (void)chan->send_msg(chan, d, REPLY_ERROR_NDBT, 0); + return; +} + +/* + * Send back a success reply. DBT[0] is the success status. + * The rest of the DBTs were already filled in by the caller. + * + * This function runs on the serving side of the channel. + */ +static void +send_reply(chan, reply, ndbt) + DB_CHANNEL *chan; + DBT *reply; + int ndbt; +{ + int r; + + /* + * Fill in the reply header now. The rest is already + * filled in for us. + */ + r = REPREPLY_OK; + reply[0].data = &r; + reply[0].size = sizeof(int); + (void)chan->send_msg(chan, reply, ndbt, 0); + return; +} + +/* + * Process the reply sent by the master. All operations use DB_MULTIPLE + * because all have the potential to return multiple DBTs. + * + * This function runs on the requesting side of the channel. + */ +static int +process_reply(dbenv, cmd, resp) + DB_ENV *dbenv; + int cmd; + DBT *resp; +{ + u_int32_t dlen, hlen, klen; + int first, ret; + char *cmdstr; + void *dp, *hdr, *kp, *p; + + dlen = 0; + kp = NULL; + ret = 0; + + if (cmd == REPCMD_GET) + cmdstr = "DB->get"; + else + cmdstr = "DB->put"; + + DB_MULTIPLE_INIT(p, resp); + /* + * First part of all replies is the header. + * If it is an error reply, there is only 1 more expected data DBT, + * which is the error value. If it is success, then what we do + * depends on the command operations. + */ + DB_MULTIPLE_NEXT(p, resp, hdr, hlen); + if (*(int *)hdr == REPREPLY_ERROR) { + DB_MULTIPLE_NEXT(p, resp, dp, dlen); + ret = *(int *)dp; + dbenv->err(dbenv, *(int *)dp, cmdstr); + if (cmd == REPCMD_GET && ret == DB_NOTFOUND) + ret = 0; + } else if (cmd != REPCMD_PUT) { + if (cmd == REPCMD_PUTSYNC) { + /* + * The only expected successful response from this is + * the token. Get it and wait for it to be applied + * locally. + */ + DB_MULTIPLE_NEXT(p, resp, dp, dlen); + ret = dbenv->txn_applied(dbenv, + (DB_TXN_TOKEN *)dp, REPLY_TOKEN_TIMEOUT, 0); + if (ret == DB_NOTFOUND) + fprintf(stderr, + "%s: Token never expected to arrive.\n", + cmdstr); + if (ret == DB_TIMEOUT) + fprintf(stderr, + "%s: Token arrival timed out.\n", + cmdstr); + goto out; + } else { + /* + * We have a get with an arbitrary number of key/data + * pairs as responses. But they should come in pairs. + */ + first = 1; + do { + DB_MULTIPLE_NEXT(p, resp, kp, klen); + /* + * If p is NULL here, we processed our last + * set of key/data pairs. + */ + if (p != NULL) { + DB_MULTIPLE_NEXT(p, resp, dp, dlen); + /* + * If p is NULL here, we got a key + * but no data. That is an error. + */ + if (p == NULL) { + fprintf(stderr, + "%s: Unexpected pair mismatch\n", cmdstr); + ret = EINVAL; + goto out; + } + print_one(kp, klen, dp, dlen, first); + } + first = 0; + } while (p != NULL); + } + } + +out: + return (ret); +} diff --git a/examples/c/ex_rep_chan/rep_chan.h b/examples/c/ex_rep_chan/rep_chan.h new file mode 100644 index 00000000..96647cab --- /dev/null +++ b/examples/c/ex_rep_chan/rep_chan.h @@ -0,0 +1,130 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. + * $Id$ + */ + +/* User-specified role an environment should play in the replication group. */ +typedef enum { MASTER, CLIENT, UNKNOWN } ENV_ROLE; + +/* User-specified information about a replication site. */ +typedef struct { + char *host; /* Host name. */ + u_int32_t port; /* Port on which to connect to this site. */ + int peer; /* Whether remote site is repmgr peer. */ + int creator; /* Whether local site is group creator. */ +} repsite_t; + +/* Data used for common replication setup. */ +typedef struct { + const char *progname; + char *home; + int nsites; + int remotesites; + ENV_ROLE role; + repsite_t self; + repsite_t *site_list; +} SETUP_DATA; + +/* Operational commands for the program. */ +#define REPCMD_INVALID 0 +#define REPCMD_EXIT 1 +#define REPCMD_GET 2 +#define REPCMD_HELP 3 +#define REPCMD_PRINT 4 +#define REPCMD_PUT 5 /* Put on master and return. */ +#define REPCMD_PUTSYNC 6 /* Put on master and wait for it + to be applied locally. */ + +/* + * Max args for a command. Although the API itself does not have + * a maximum, it is simpler for the example code to just have space + * allocated ahead of time. We use an odd number so that we can + * have the header DBT plus some number of key/data pairs. + */ +#define REPCMD_MAX_DBT 33 + +/* Reply commands. */ +#define REPREPLY_ERROR 1 +#define REPREPLY_OK 0 + +#define REPLY_ERROR_NDBT 2 +#define REPLY_TOKEN_TIMEOUT 5000000 /* 5 seconds, arbitrary */ + +/* + * General program defines + */ +#define BUFSIZE 1024 +#define CACHESIZE (10 * 1024 * 1024) +#define DATABASE "quote.db" + +extern const char *progname; + +/* Portability macros for basic threading & timing */ +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#include +#define snprintf _snprintf +#define sleep(s) Sleep(1000 * (s)) + +extern int getopt(int, char * const *, const char *); + +typedef HANDLE thread_t; +#define thread_create(thrp, attr, func, arg) \ + (((*(thrp) = CreateThread(NULL, 0, \ + (LPTHREAD_START_ROUTINE)(func), (arg), 0, NULL)) == NULL) ? -1 : 0) +#define thread_join(thr, statusp) \ + ((WaitForSingleObject((thr), INFINITE) == WAIT_OBJECT_0) && \ + GetExitCodeThread((thr), (LPDWORD)(statusp)) ? 0 : -1) + +typedef HANDLE mutex_t; +#define mutex_init(m, attr) \ + (((*(m) = CreateMutex(NULL, FALSE, NULL)) != NULL) ? 0 : -1) +#define mutex_lock(m) \ + ((WaitForSingleObject(*(m), INFINITE) == WAIT_OBJECT_0) ? 0 : -1) +#define mutex_unlock(m) (ReleaseMutex(*(m)) ? 0 : -1) + +#else /* !_WIN32 */ +#include +#include + +typedef pthread_t thread_t; +#define thread_create(thrp, attr, func, arg) \ + pthread_create((thrp), (attr), (func), (arg)) +#define thread_join(thr, statusp) pthread_join((thr), (statusp)) + +typedef pthread_mutex_t mutex_t; +#define mutex_init(m, attr) pthread_mutex_init((m), (attr)) +#define mutex_lock(m) pthread_mutex_lock(m) +#define mutex_unlock(m) pthread_mutex_unlock(m) + +#endif + +/* Global data. */ +typedef struct { + DB_ENV *dbenv; + mutex_t mutex; + int is_master; + int app_finished; + int in_client_sync; + DB_CHANNEL *channel; + DB *dbp; +} GLOBAL; + +void *checkpoint_thread __P((void *)); +int create_env __P((const char *, DB_ENV **)); +int env_init __P((DB_ENV *, const char *)); +void event_callback __P((DB_ENV *, u_int32_t, void *)); +int finish_support_threads __P((thread_t *, thread_t *)); +void *log_archive_thread __P((void *)); +int open_dbp __P((DB_ENV *, int, DB **)); +int parse_cmd __P((char *)); +void print_cmdhelp __P(()); +void print_one __P((void *, u_int32_t, void *, u_int32_t, int)); +int print_stocks __P((DB *)); +int rep_setup __P((DB_ENV *, int, char **, SETUP_DATA *)); +int start_support_threads __P((DB_ENV *, GLOBAL *, thread_t *, + thread_t *)); +void usage __P((const char *)); diff --git a/examples/c/ex_rep_chan/rep_chan_util.c b/examples/c/ex_rep_chan/rep_chan_util.c new file mode 100644 index 00000000..ade6bec4 --- /dev/null +++ b/examples/c/ex_rep_chan/rep_chan_util.c @@ -0,0 +1,670 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include +#include +#include +#include +#include +#include +#ifndef _WIN32 +#include +#endif +#include + +#include "rep_chan.h" + +/* + * Perform command line parsing and replication setup. + */ +int +rep_setup(dbenv, argc, argv, setup_info) + DB_ENV *dbenv; + int argc; + char *argv[]; + SETUP_DATA *setup_info; +{ + DB_SITE *dbsite; + repsite_t site, *site_list; + extern char *optarg; + char ch, *portstr; + int ack_policy, got_self, i, maxsites, priority, ret; + + got_self = maxsites = ret = site.peer = site.creator = 0; + + priority = 100; + ack_policy = DB_REPMGR_ACKS_QUORUM; + setup_info->role = UNKNOWN; + + /* + * Replication setup calls that are only needed if a command + * line option is specified are made within this while/switch + * statement. Replication setup calls that should be made + * whether or not a command line option is specified are after + * this while/switch statement. + */ + while ((ch = getopt(argc, argv, "Ch:L:l:Mp:R:r:v")) != EOF) { + switch (ch) { + case 'C': + setup_info->role = CLIENT; + break; + case 'h': + setup_info->home = optarg; + break; + case 'L': + setup_info->self.creator = 1; /* FALLTHROUGH */ + case 'l': + setup_info->self.host = strtok(optarg, ":"); + if ((portstr = strtok(NULL, ":")) == NULL) { + fprintf(stderr, "Bad host specification.\n"); + goto err; + } + setup_info->self.port = (unsigned short)atoi(portstr); + setup_info->self.peer = 0; + got_self = 1; + break; + case 'M': + setup_info->role = MASTER; + break; + case 'p': + priority = atoi(optarg); + break; + case 'R': + site.peer = 1; /* FALLTHROUGH */ + case 'r': + site.host = optarg; + site.host = strtok(site.host, ":"); + if ((portstr = strtok(NULL, ":")) == NULL) { + fprintf(stderr, "Bad host specification.\n"); + goto err; + } + site.port = (unsigned short)atoi(portstr); + if (setup_info->site_list == NULL || + setup_info->remotesites >= maxsites) { + maxsites = maxsites == 0 ? 10 : 2 * maxsites; + if ((setup_info->site_list = + realloc(setup_info->site_list, + maxsites * sizeof(repsite_t))) == NULL) { + fprintf(stderr, "System error %s\n", + strerror(errno)); + goto err; + } + } + (setup_info->site_list)[(setup_info->remotesites)++] = + site; + site.peer = 0; + site.creator = 0; + break; + case 'v': + if ((ret = dbenv->set_verbose(dbenv, + DB_VERB_REPLICATION, 1)) != 0) + goto err; + break; + case '?': + default: + usage(setup_info->progname); + } + } + + /* Error check command line. */ + if (!got_self || setup_info->home == NULL) + usage(setup_info->progname); + + /* + * Set replication group election priority for this environment. + * An election first selects the site with the most recent log + * records as the new master. If multiple sites have the most + * recent log records, the site with the highest priority value + * is selected as master. + */ + if ((ret = dbenv->rep_set_priority(dbenv, priority)) != 0) { + dbenv->err(dbenv, ret, "Could not set priority.\n"); + goto err; + } + + /* + * Set the policy that determines how master and client + * sites handle acknowledgement of replication messages needed for + * permanent records. The default policy of "quorum" requires only + * a quorum of electable peers sufficient to ensure a permanent + * record remains durable if an election is held. + */ + if ((ret = dbenv->repmgr_set_ack_policy(dbenv, ack_policy)) != 0) { + dbenv->err(dbenv, ret, "Could not set ack policy.\n"); + goto err; + } + + /* + * Set the threshold for the minimum and maximum time the client + * waits before requesting retransmission of a missing message. + * Base these values on the performance and load characteristics + * of the master and client host platforms as well as the round + * trip message time. + */ + if ((ret = dbenv->rep_set_request(dbenv, 20000, 500000)) != 0) { + dbenv->err(dbenv, ret, + "Could not set client_retransmission defaults.\n"); + goto err; + } + + /* + * Configure deadlock detection to ensure that any deadlocks + * are broken by having one of the conflicting lock requests + * rejected. DB_LOCK_DEFAULT uses the lock policy specified + * at environment creation time or DB_LOCK_RANDOM if none was + * specified. + */ + if ((ret = dbenv->set_lk_detect(dbenv, DB_LOCK_DEFAULT)) != 0) { + dbenv->err(dbenv, ret, + "Could not configure deadlock detection.\n"); + goto err; + } + + /* Configure the local site. */ + if ((ret = dbenv->repmgr_site(dbenv, setup_info->self.host, + setup_info->self.port, &dbsite, 0)) != 0) { + dbenv->err(dbenv, ret, "Could not set local site."); + goto err; + } + dbsite->set_config(dbsite, DB_LOCAL_SITE, 1); + if (setup_info->self.creator) + dbsite->set_config(dbsite, DB_GROUP_CREATOR, 1); + + if ((ret = dbsite->close(dbsite)) != 0) { + dbenv->err(dbenv, ret, "DB_SITE->close"); + goto err; + } + + site_list = setup_info->site_list; + for (i = 0; i < setup_info->remotesites; i++) { + if ((ret = dbenv->repmgr_site(dbenv, site_list[i].host, + site_list[i].port, &dbsite, 0)) != 0) { + dbenv->err(dbenv, ret, "Could not add site %s:%d", + site_list[i].host, (int)site_list[i].port); + goto err; + } + dbsite->set_config(dbsite, DB_BOOTSTRAP_HELPER, 1); + if (site_list[i].peer) + dbsite->set_config(dbsite, DB_REPMGR_PEER, 1); + if ((ret = dbsite->close(dbsite)) != 0) { + dbenv->err(dbenv, ret, "DB_SITE->close"); + } + } + + /* + * Configure heartbeat timeouts so that repmgr monitors the + * health of the TCP connection. Master sites broadcast a heartbeat + * at the frequency specified by the DB_REP_HEARTBEAT_SEND timeout. + * Client sites wait for message activity the length of the + * DB_REP_HEARTBEAT_MONITOR timeout before concluding that the + * connection to the master is lost. The DB_REP_HEARTBEAT_MONITOR + * timeout should be longer than the DB_REP_HEARTBEAT_SEND timeout. + */ + if ((ret = dbenv->rep_set_timeout(dbenv, DB_REP_HEARTBEAT_SEND, + 5000000)) != 0) + dbenv->err(dbenv, ret, + "Could not set heartbeat send timeout.\n"); + if ((ret = dbenv->rep_set_timeout(dbenv, DB_REP_HEARTBEAT_MONITOR, + 10000000)) != 0) + dbenv->err(dbenv, ret, + "Could not set heartbeat monitor timeout.\n"); + +err: + return (ret); +} + +void +event_callback(dbenv, which, info) + DB_ENV *dbenv; + u_int32_t which; + void *info; +{ + GLOBAL *global = dbenv->app_private; + int err; + + switch (which) { + case DB_EVENT_PANIC: + err = *(int*)info; + printf("Got a panic: %s (%d)\n", db_strerror(err), err); + abort(); + case DB_EVENT_REP_CLIENT: + if ((err = mutex_lock(&global->mutex)) != 0) { + fprintf(stderr, "can't lock mutex %d\n", err); + abort(); + } + global->is_master = 0; + global->in_client_sync = 1; + if (mutex_unlock(&global->mutex) != 0) { + fprintf(stderr, "can't unlock mutex\n"); + abort(); + } + break; + case DB_EVENT_REP_MASTER: + if ((err = mutex_lock(&global->mutex)) != 0) { + fprintf(stderr, "can't lock mutex %d\n", err); + abort(); + } + global->is_master = 1; + global->in_client_sync = 0; + if (mutex_unlock(&global->mutex) != 0) { + fprintf(stderr, "can't unlock mutex\n"); + abort(); + } + break; + case DB_EVENT_REP_NEWMASTER: + global->in_client_sync = 1; + break; + case DB_EVENT_REP_PERM_FAILED: + /* + * Did not get enough acks to guarantee transaction + * durability based on the configured ack policy. This + * transaction will be flushed to the master site's + * local disk storage for durability. + */ + printf( + "Insufficient acknowledgements to guarantee transaction durability.\n"); + break; + case DB_EVENT_REP_STARTUPDONE: + global->in_client_sync = 0; + break; + default: + dbenv->errx(dbenv, "ignoring event %d", which); + break; + } +} + +int +print_stocks(dbp) + DB *dbp; +{ + DBC *dbc; + DBT key, data; + int first, ret, t_ret; + + if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) { + dbp->err(dbp, ret, "can't open cursor"); + return (ret); + } + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + for (first = 1, ret = dbc->get(dbc, &key, &data, DB_FIRST); + ret == 0; + ret = dbc->get(dbc, &key, &data, DB_NEXT)) { + print_one(key.data, key.size, data.data, data.size, first); + first = 0; + } + if ((t_ret = dbc->close(dbc)) != 0 && ret == 0) + ret = t_ret; + + switch (ret) { + case 0: + case DB_NOTFOUND: + case DB_LOCK_DEADLOCK: + return (0); + default: + return (ret); + } +} + +void +print_one(key, klen, data, dlen, print_hdr) + void *key, *data; + u_int32_t klen, dlen; + int print_hdr; +{ +#define MAXKEYSIZE 10 +#define MAXDATASIZE 20 + char keybuf[MAXKEYSIZE + 1], databuf[MAXDATASIZE + 1]; + u_int32_t keysize, datasize; + + if (print_hdr) { + printf("\tSymbol\tPrice\n"); + printf("\t======\t=====\n"); + } + + keysize = klen > MAXKEYSIZE ? MAXKEYSIZE : klen; + memcpy(keybuf, key, keysize); + keybuf[keysize] = '\0'; + + datasize = dlen >= MAXDATASIZE ? MAXDATASIZE : dlen; + memcpy(databuf, data, datasize); + databuf[datasize] = '\0'; + printf("\t%s\t%s\n", keybuf, databuf); + printf("\n"); + fflush(stdout); +} + +/* Start checkpoint and log archive support threads. */ +int +start_support_threads(dbenv, sup_args, ckp_thr, lga_thr) + DB_ENV *dbenv; + GLOBAL *sup_args; + thread_t *ckp_thr; + thread_t *lga_thr; +{ + int ret; + + ret = 0; + if ((ret = thread_create(ckp_thr, NULL, checkpoint_thread, + sup_args)) != 0) { + dbenv->errx(dbenv, "can't create checkpoint thread"); + goto err; + } + if ((ret = thread_create(lga_thr, NULL, log_archive_thread, + sup_args)) != 0) + dbenv->errx(dbenv, "can't create log archive thread"); +err: + return (ret); + +} + +/* Wait for checkpoint and log archive support threads to finish. */ +int +finish_support_threads(ckp_thr, lga_thr) + thread_t *ckp_thr; + thread_t *lga_thr; +{ + void *ctstatus, *ltstatus; + int ret; + + ret = 0; + if (thread_join(*lga_thr, <status) || + thread_join(*ckp_thr, &ctstatus)) { + ret = -1; + goto err; + } + if ((uintptr_t)ltstatus != EXIT_SUCCESS || + (uintptr_t)ctstatus != EXIT_SUCCESS) + ret = -1; +err: + return (ret); +} + +int +create_env(progname, dbenvp) + const char *progname; + DB_ENV **dbenvp; +{ + DB_ENV *dbenv; + int ret; + + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, "can't create env handle: %s\n", + db_strerror(ret)); + return (ret); + } + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + *dbenvp = dbenv; + return (0); +} + +/* Open and configure an environment. */ +int +env_init(dbenv, home) + DB_ENV *dbenv; + const char *home; +{ + u_int32_t flags; + int ret; + + (void)dbenv->set_cachesize(dbenv, 0, CACHESIZE, 0); + (void)dbenv->set_flags(dbenv, DB_TXN_NOSYNC, 1); + + flags = DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | + DB_INIT_REP | DB_INIT_TXN | DB_RECOVER | DB_THREAD; + if ((ret = dbenv->open(dbenv, home, flags, 0)) != 0) + dbenv->err(dbenv, ret, "can't open environment"); + return (ret); +} + +/* + * In this application, we specify all communication via the command line. In + * a real application, we would expect that information about the other sites + * in the system would be maintained in some sort of configuration file. The + * critical part of this interface is that we assume at startup that we can + * find out + * 1) what host/port we wish to listen on for connections, + * 2) a (possibly empty) list of other sites we should attempt to connect + * to; and + * 3) what our Berkeley DB home environment is. + * + * These pieces of information are expressed by the following flags. + * -C or -M start up as client or master + * -h home directory (required) + * -l host:port (required unless -L is specified; l stands for local) + * -L host:port (optional; L means the group creator) + * -p priority (optional: defaults to 100) + * -r host:port (optional; r stands for remote; any number of these may be + * specified) + * -R host:port (optional; remote peer) + * -v (optional; v stands for verbose) + */ +void +usage(progname) + const char *progname; +{ + fprintf(stderr, "usage: %s ", progname); + fprintf(stderr, "[-CM]-h home -l|-L host:port %s\n", + "[-r host:port][-R host:port][-p priority][-v]"); + exit(EXIT_FAILURE); +} + +/* + * This is a very simple thread that performs checkpoints at a fixed + * time interval. For a master site, the time interval is one minute + * plus the duration of the checkpoint_delay timeout (30 seconds by + * default.) For a client site, the time interval is one minute. + */ +void * +checkpoint_thread(args) + void *args; +{ + DB_ENV *dbenv; + GLOBAL *global; + int i, ret; + + global = (GLOBAL *)args; + dbenv = global->dbenv; + + for (;;) { + /* + * Wait for one minute, polling once per second to see if + * application has finished. When application has finished, + * terminate this thread. + */ + for (i = 0; i < 60; i++) { + sleep(1); + if (global->app_finished == 1) + return ((void *)EXIT_SUCCESS); + } + + /* Perform a checkpoint. */ + if ((ret = dbenv->txn_checkpoint(dbenv, 0, 0, 0)) != 0) { + dbenv->err(dbenv, ret, + "Could not perform checkpoint.\n"); + return ((void *)EXIT_FAILURE); + } + } +} + +/* + * This is a simple log archive thread. Once per minute, it removes all but + * the most recent 3 logs that are safe to remove according to a call to + * DB_ENV->log_archive(). + * + * Log cleanup is needed to conserve disk space, but aggressive log cleanup + * can cause more frequent client initializations if a client lags too far + * behind the current master. This can happen in the event of a slow client, + * a network partition, or a new master that has not kept as many logs as the + * previous master. + * + * The approach in this routine balances the need to mitigate against a + * lagging client by keeping a few more of the most recent unneeded logs + * with the need to conserve disk space by regularly cleaning up log files. + * Use of automatic log removal (DB_ENV->log_set_config() DB_LOG_AUTO_REMOVE + * flag) is not recommended for replication due to the risk of frequent + * client initializations. + */ +void * +log_archive_thread(args) + void *args; +{ + DB_ENV *dbenv; + GLOBAL *global; + char **begin, **list; + int i, listlen, logs_to_keep, minlog, ret; + + global = (GLOBAL *)args; + dbenv = global->dbenv; + logs_to_keep = 3; + + for (;;) { + /* + * Wait for one minute, polling once per second to see if + * application has finished. When application has finished, + * terminate this thread. + */ + for (i = 0; i < 60; i++) { + sleep(1); + if (global->app_finished == 1) + return ((void *)EXIT_SUCCESS); + } + + /* Get the list of unneeded log files. */ + if ((ret = dbenv->log_archive(dbenv, &list, DB_ARCH_ABS)) + != 0) { + dbenv->err(dbenv, ret, + "Could not get log archive list."); + return ((void *)EXIT_FAILURE); + } + if (list != NULL) { + listlen = 0; + /* Get the number of logs in the list. */ + for (begin = list; *begin != NULL; begin++, listlen++); + /* + * Remove all but the logs_to_keep most recent + * unneeded log files. + */ + minlog = listlen - logs_to_keep; + for (begin = list, i= 0; i < minlog; list++, i++) { + if ((ret = unlink(*list)) != 0) { + dbenv->err(dbenv, ret, + "logclean: remove %s", *list); + dbenv->errx(dbenv, + "logclean: Error remove %s", *list); + free(begin); + return ((void *)EXIT_FAILURE); + } + } + free(begin); + } + } +} + +int +parse_cmd(cmd) + char *cmd; +{ + /* + * Commands are: + * | print + * exit | quit | q + * get + * put + * put_sync + */ + if (cmd == NULL || strcmp(cmd, "print") == 0) + return (REPCMD_PRINT); + if (strcmp(cmd, "exit") == 0 || + strcmp(cmd, "quit") == 0 || + strcmp(cmd, "q") == 0) + return (REPCMD_EXIT); + if (strcmp(cmd, "get") == 0) + return (REPCMD_GET); + if (strcmp(cmd, "?") == 0 || + strcmp(cmd, "help") == 0) + return (REPCMD_HELP); + if (strcmp(cmd, "put_sync") == 0) + return (REPCMD_PUTSYNC); + if (strcmp(cmd, "put") == 0) + return (REPCMD_PUT); + printf("Unknown invalid command %s\n", cmd); + return (REPCMD_INVALID); +} + +void +print_cmdhelp() +{ + printf(" | print - Print out current contents.\n"); + printf("? | help - Print out commands.\n"); + printf("exit | quit - Stop program.\n"); + printf("get key key ... - Read the given keys at the master site.\n"); + printf("put key value key value ... - Write given pairs in one txn.\n"); + printf("put_sync key value key value ... "); + printf("- Write given pairs in one\n"); + printf(" txn and don't return until locally available.\n"); + return; +} + +int +open_dbp(dbenv, is_master, dbpp) + DB_ENV *dbenv; + int is_master; + DB **dbpp; +{ + DB *dbp; + u_int32_t flags; + int ret; + + if ((ret = db_create(dbpp, dbenv, 0)) != 0) + return (ret); + + dbp = *dbpp; + flags = DB_AUTO_COMMIT; + /* + * Open database with DB_CREATE only if this is + * a master database. A client database uses + * polling to attempt to open the database without + * DB_CREATE until it is successful. + * + * This DB_CREATE polling logic can be simplified + * under some circumstances. For example, if the + * application can be sure a database is already + * there, it would never need to open it with + * DB_CREATE. + */ + if (is_master) + flags |= DB_CREATE; + + if ((ret = dbp->open(dbp, + NULL, DATABASE, NULL, DB_BTREE, flags, 0)) != 0) { + if (ret == ENOENT) { + printf( "No stock database yet available.\n"); + if ((ret = dbp->close(dbp, 0)) != 0) { + dbenv->err(dbenv, ret, + "DB->close"); + goto err; + } + *dbpp = NULL; + } + if (ret == DB_REP_HANDLE_DEAD || + ret == DB_LOCK_DEADLOCK) { + dbenv->err(dbenv, ret, + "please retry the operation"); + dbp->close(dbp, DB_NOSYNC); + *dbpp = NULL; + } + dbenv->err(dbenv, ret, "DB->open"); + goto err; + } +err: + return (ret); +} diff --git a/examples/c/ex_sequence.c b/examples/c/ex_sequence.c new file mode 100644 index 00000000..3d3fb136 --- /dev/null +++ b/examples/c/ex_sequence.c @@ -0,0 +1,132 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include + +#ifdef _WIN32 +extern int getopt(int, char * const *, const char *); +#else +#include +#endif + +#include + +#define DATABASE "sequence.db" +#define SEQUENCE "my_sequence" +int main __P((int, char *[])); +int usage __P((void)); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern int optind; + DB *dbp; + DB_SEQUENCE *seq; + DBT key; + int ch, i, ret, rflag; + db_seq_t seqnum; + const char *database, *progname = "ex_sequence"; + + dbp = NULL; + seq = NULL; + + rflag = 0; + while ((ch = getopt(argc, argv, "r")) != EOF) + switch (ch) { + case 'r': + rflag = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + /* Accept optional database name. */ + database = *argv == NULL ? DATABASE : argv[0]; + + /* Optionally discard the database. */ + if (rflag) + (void)remove(database); + + /* Create and initialize database object, open the database. */ + if ((ret = db_create(&dbp, NULL, 0)) != 0) { + fprintf(stderr, + "%s: db_create: %s\n", progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + dbp->set_errfile(dbp, stderr); + dbp->set_errpfx(dbp, progname); + if ((ret = dbp->open(dbp, + NULL, database, NULL, DB_BTREE, DB_CREATE, 0664)) != 0) { + dbp->err(dbp, ret, "%s: open", database); + goto err; + } + + if ((ret = db_sequence_create(&seq, dbp, 0)) != 0) { + dbp->err(dbp, ret, "db_sequence_create"); + goto err; + } + + memset(&key, 0, sizeof(DBT)); + key.data = SEQUENCE; + key.size = (u_int32_t)strlen(SEQUENCE); + + if ((ret = seq->open(seq, NULL, &key, DB_CREATE)) != 0) { + dbp->err(dbp, ret, "%s: DB_SEQUENCE->open", SEQUENCE); + goto err; + } + + for (i = 0; i < 10; i++) { + if ((ret = seq->get(seq, NULL, 1, &seqnum, 0)) != 0) { + dbp->err(dbp, ret, "DB_SEQUENCE->get"); + goto err; + } + + /* There's no portable way to print 64-bit numbers. */ +#ifdef _WIN32 + printf("Got sequence number %I64d\n", (int64_t)seqnum); +#else + printf( + "Got sequence number %llu\n", (unsigned long long)seqnum); +#endif + } + + /* Close everything down. */ + if ((ret = seq->close(seq, 0)) != 0) { + seq = NULL; + dbp->err(dbp, ret, "DB_SEQUENCE->close"); + goto err; + } + if ((ret = dbp->close(dbp, 0)) != 0) { + fprintf(stderr, + "%s: DB->close: %s\n", progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + return (EXIT_SUCCESS); + +err: if (seq != NULL) + (void)seq->close(seq, 0); + if (dbp != NULL) + (void)dbp->close(dbp, 0); + return (EXIT_FAILURE); +} + +int +usage() +{ + (void)fprintf(stderr, "usage: ex_sequence [-r] [database]\n"); + return (EXIT_FAILURE); +} diff --git a/examples/c/ex_stream.c b/examples/c/ex_stream.c new file mode 100644 index 00000000..2e30404e --- /dev/null +++ b/examples/c/ex_stream.c @@ -0,0 +1,222 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include + +#ifdef _WIN32 +extern int getopt(int, char * const *, const char *); +#else +#include +#endif + +#include + +#define DATABASE "stream.db" +#define CHUNK_SIZE 500 +#define DATA_SIZE CHUNK_SIZE * 100 + +int main __P((int, char *[])); +int usage __P((void)); +int invarg __P((const char *, int, const char *)); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB *dbp; + DBC *dbcp; + DBT key, data; + DBTYPE db_type; + int ch, chunk_sz, chunk_off, data_sz, i, ret, rflag; + int page_sz; + char *database, *buf; + const char *progname = "ex_stream"; /* Program name. */ + + chunk_sz = CHUNK_SIZE; + data_sz = DATA_SIZE; + chunk_off = page_sz = rflag = 0; + db_type = DB_BTREE; + while ((ch = getopt(argc, argv, "c:d:p:t:")) != EOF) + switch (ch) { + case 'c': + if ((chunk_sz = atoi(optarg)) <= 0) + return (invarg(progname, ch, optarg)); + break; + case 'd': + if ((data_sz = atoi(optarg)) <= 0) + return (invarg(progname, ch, optarg)); + break; + case 'p': + if ((page_sz = atoi(optarg)) <= 0 || + page_sz % 2 != 0 || page_sz < 512 || + page_sz > 64 * 1024) + return (invarg(progname, ch, optarg)); + break; + case 't': + switch (optarg[0]) { + case 'b': + db_type = DB_BTREE; + break; + case 'h': + db_type = DB_HASH; + break; + case 'r': + db_type = DB_RECNO; + break; + default: + return (invarg(progname, ch, optarg)); + break; + } + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + /* Accept optional database name. */ + database = *argv == NULL ? DATABASE : argv[0]; + + if (chunk_sz > data_sz) { + fprintf(stderr, +"Chunk size must be less than and a factor of the data size\n"); + + return (usage()); + } + + /* Discard any existing database. */ + (void)remove(database); + + /* Create and initialize database object, open the database. */ + if ((ret = db_create(&dbp, NULL, 0)) != 0) { + fprintf(stderr, + "%s: db_create: %s\n", progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + dbp->set_errfile(dbp, stderr); + dbp->set_errpfx(dbp, progname); + if (page_sz != 0 && (ret = dbp->set_pagesize(dbp, page_sz)) != 0) { + dbp->err(dbp, ret, "set_pagesize"); + goto err1; + } + if ((ret = dbp->set_cachesize(dbp, 0, 32 * 1024, 0)) != 0) { + dbp->err(dbp, ret, "set_cachesize"); + goto err1; + } + if ((ret = dbp->open(dbp, + NULL, database, NULL, db_type, DB_CREATE, 0664)) != 0) { + dbp->err(dbp, ret, "%s: open", database); + goto err1; + } + + /* Ensure the data size is a multiple of the chunk size. */ + data_sz = data_sz - (data_sz % chunk_sz); + + /* Initialize the key/data pair for a streaming insert. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.data = &chunk_sz; /* Our key value does not really matter. */ + key.size = sizeof(int); + data.ulen = data_sz; + data.size = chunk_sz; + data.data = buf = malloc(data_sz); + data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + + /* Populate the data with something. */ + for (i = 0; i < data_sz; ++i) + buf[i] = (char)('a' + i % ('z' - 'a')); + + if ((ret = dbp->cursor(dbp, NULL, &dbcp, 0)) != 0) { + dbp->err(dbp, ret, "DB->cursor"); + goto err1; + } + for (chunk_off = 0; chunk_off < data_sz; chunk_off += chunk_sz) { + data.size = chunk_sz; + if ((ret = dbcp->put(dbcp, &key, &data, + (chunk_off == 0 ? DB_KEYFIRST : DB_CURRENT)) != 0)) { + dbp->err(dbp, ret, "DBCursor->put"); + goto err2; + } + data.doff += chunk_sz; + } + if ((ret = dbcp->close(dbcp)) != 0) { + dbp->err(dbp, ret, "DBcursor->close"); + goto err1; + } + + memset(data.data, 0, data.ulen); + /* Retrieve the data item in chunks. */ + if ((ret = dbp->cursor(dbp, NULL, &dbcp, 0)) != 0) { + dbp->err(dbp, ret, "DB->cursor"); + goto err1; + } + data.doff = 0; + data.dlen = chunk_sz; + memset(data.data, 0, data.ulen); + + /* + * Loop over the item, retrieving a chunk at a time. + * The requested chunk will be stored at the start of data.data. + */ + for (chunk_off = 0; chunk_off < data_sz; chunk_off += chunk_sz) { + if ((ret = dbcp->get(dbcp, &key, &data, + (chunk_off == 0 ? DB_SET : DB_CURRENT)) != 0)) { + dbp->err(dbp, ret, "DBCursor->get"); + goto err2; + } + data.doff += chunk_sz; + } + + if ((ret = dbcp->close(dbcp)) != 0) { + dbp->err(dbp, ret, "DBcursor->close"); + goto err1; + } + if ((ret = dbp->close(dbp, 0)) != 0) { + fprintf(stderr, + "%s: DB->close: %s\n", progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + return (EXIT_SUCCESS); + +err2: (void)dbcp->close(dbcp); +err1: (void)dbp->close(dbp, 0); + return (EXIT_FAILURE); +} + +int +invarg(progname, arg, str) + const char *progname; + int arg; + const char *str; +{ + (void)fprintf(stderr, + "%s: invalid argument for -%c: %s\n", progname, arg, str); + return (EXIT_FAILURE); +} + +int +usage() +{ + (void)fprintf(stderr, +"usage: ex_stream [-c int] [-d int] [-p int] [-t char] [database]\n"); + (void)fprintf(stderr, "Where options are:\n"); + (void)fprintf(stderr, "\t-c set the chunk size.\n"); + (void)fprintf(stderr, "\t-d set the total record size.\n"); + (void)fprintf(stderr, + "\t-t choose a database type btree (b), hash (h) or recno (r)\n"); + + return (EXIT_FAILURE); +} diff --git a/examples/c/ex_thread.c b/examples/c/ex_thread.c new file mode 100644 index 00000000..8d851915 --- /dev/null +++ b/examples/c/ex_thread.c @@ -0,0 +1,626 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +extern int getopt(int, char * const *, const char *); +#else +#include +#endif + +#include + +/* + * NB: This application is written using POSIX 1003.1b-1993 pthreads + * interfaces, which may not be portable to your system. + */ +extern int sched_yield __P((void)); /* Pthread yield function. */ + +int db_init __P((const char *)); +void *deadlock __P((void *)); +void fatal __P((const char *, int, int)); +void onint __P((int)); +int main __P((int, char *[])); +int reader __P((int)); +void stats __P((void)); +void *trickle __P((void *)); +void *tstart __P((void *)); +int usage __P((void)); +void word __P((void)); +int writer __P((int)); + +int quit; /* Interrupt handling flag. */ + +struct _statistics { + int aborted; /* Write. */ + int aborts; /* Read/write. */ + int adds; /* Write. */ + int deletes; /* Write. */ + int txns; /* Write. */ + int found; /* Read. */ + int notfound; /* Read. */ +} *perf; + +const char + *progname = "ex_thread"; /* Program name. */ + +#define DATABASE "access.db" /* Database name. */ +#define WORDLIST "../test/tcl/wordlist" /* Dictionary. */ + +/* + * We can seriously increase the number of collisions and transaction + * aborts by yielding the scheduler after every DB call. Specify the + * -p option to do this. + */ +int punish; /* -p */ +int nlist; /* -n */ +int nreaders; /* -r */ +int verbose; /* -v */ +int nwriters; /* -w */ + +DB *dbp; /* Database handle. */ +DB_ENV *dbenv; /* Database environment. */ +int nthreads; /* Total threads. */ +char **list; /* Word list. */ + +/* + * ex_thread -- + * Run a simple threaded application of some numbers of readers and + * writers competing for a set of words. + * + * Example UNIX shell script to run this program: + * % rm -rf TESTDIR + * % mkdir TESTDIR + * % ex_thread -h TESTDIR + */ +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int errno, optind; + DB_TXN *txnp; + pthread_t *tids; + int ch, i, ret; + const char *home; + void *retp; + + txnp = NULL; + nlist = 1000; + nreaders = nwriters = 4; + home = "TESTDIR"; + while ((ch = getopt(argc, argv, "h:pn:r:vw:")) != EOF) + switch (ch) { + case 'h': + home = optarg; + break; + case 'p': + punish = 1; + break; + case 'n': + nlist = atoi(optarg); + break; + case 'r': + nreaders = atoi(optarg); + break; + case 'v': + verbose = 1; + break; + case 'w': + nwriters = atoi(optarg); + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + /* Initialize the random number generator. */ + srand(getpid() | time(NULL)); + + /* Register the signal handler. */ + (void)signal(SIGINT, onint); + + /* Build the key list. */ + word(); + + /* Remove the previous database. */ + (void)remove(DATABASE); + + /* Initialize the database environment. */ + if ((ret = db_init(home)) != 0) + return (ret); + + /* Initialize the database. */ + if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + (void)dbenv->close(dbenv, 0); + return (EXIT_FAILURE); + } + if ((ret = dbp->set_pagesize(dbp, 1024)) != 0) { + dbp->err(dbp, ret, "set_pagesize"); + goto err; + } + + if ((ret = dbenv->txn_begin(dbenv, NULL, &txnp, 0)) != 0) + fatal("txn_begin", ret, 1); + if ((ret = dbp->open(dbp, txnp, + DATABASE, NULL, DB_BTREE, DB_CREATE | DB_THREAD, 0664)) != 0) { + dbp->err(dbp, ret, "%s: open", DATABASE); + goto err; + } else { + ret = txnp->commit(txnp, 0); + txnp = NULL; + if (ret != 0) + goto err; + } + + nthreads = nreaders + nwriters + 2; + printf("Running: readers %d, writers %d\n", nreaders, nwriters); + fflush(stdout); + + /* Create statistics structures, offset by 1. */ + if ((perf = calloc(nreaders + nwriters + 1, sizeof(*perf))) == NULL) + fatal(NULL, errno, 1); + + /* Create thread ID structures. */ + if ((tids = malloc(nthreads * sizeof(pthread_t))) == NULL) + fatal(NULL, errno, 1); + + /* Create reader/writer threads. */ + for (i = 0; i < nreaders + nwriters; ++i) + if ((ret = pthread_create( + &tids[i], NULL, tstart, (void *)(uintptr_t)i)) != 0) + fatal("pthread_create", ret > 0 ? ret : errno, 1); + + /* Create buffer pool trickle thread. */ + if (pthread_create(&tids[i], NULL, trickle, &i)) + fatal("pthread_create", errno, 1); + ++i; + + /* Create deadlock detector thread. */ + if (pthread_create(&tids[i], NULL, deadlock, &i)) + fatal("pthread_create", errno, 1); + + /* Wait for the threads. */ + for (i = 0; i < nthreads; ++i) + (void)pthread_join(tids[i], &retp); + + printf("Exiting\n"); + stats(); + +err: if (txnp != NULL) + (void)txnp->abort(txnp); + (void)dbp->close(dbp, 0); + (void)dbenv->close(dbenv, 0); + + return (EXIT_SUCCESS); +} + +int +reader(id) + int id; +{ + DBT key, data; + int n, ret; + char buf[64]; + + /* + * DBT's must use local memory or malloc'd memory if the DB handle + * is accessed in a threaded fashion. + */ + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + data.flags = DB_DBT_MALLOC; + + /* + * Read-only threads do not require transaction protection, unless + * there's a need for repeatable reads. + */ + while (!quit) { + /* Pick a key at random, and look it up. */ + n = rand() % nlist; + key.data = list[n]; + key.size = strlen(key.data); + + if (verbose) { + sprintf(buf, "reader: %d: list entry %d\n", id, n); + write(STDOUT_FILENO, buf, strlen(buf)); + } + + switch (ret = dbp->get(dbp, NULL, &key, &data, 0)) { + case DB_LOCK_DEADLOCK: /* Deadlock. */ + ++perf[id].aborts; + break; + case 0: /* Success. */ + ++perf[id].found; + free(data.data); + break; + case DB_NOTFOUND: /* Not found. */ + ++perf[id].notfound; + break; + default: + sprintf(buf, + "reader %d: dbp->get: %s", id, (char *)key.data); + fatal(buf, ret, 0); + } + } + return (0); +} + +int +writer(id) + int id; +{ + DBT key, data; + DB_TXN *tid; + time_t now, then; + int n, ret; + char buf[256], dbuf[10000]; + + time(&now); + then = now; + + /* + * DBT's must use local memory or malloc'd memory if the DB handle + * is accessed in a threaded fashion. + */ + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + data.data = dbuf; + data.ulen = sizeof(dbuf); + data.flags = DB_DBT_USERMEM; + + while (!quit) { + /* Pick a random key. */ + n = rand() % nlist; + key.data = list[n]; + key.size = strlen(key.data); + + if (verbose) { + sprintf(buf, "writer: %d: list entry %d\n", id, n); + write(STDOUT_FILENO, buf, strlen(buf)); + } + + /* Abort and retry. */ + if (0) { +retry: if ((ret = tid->abort(tid)) != 0) + fatal("DB_TXN->abort", ret, 1); + ++perf[id].aborts; + ++perf[id].aborted; + } + + /* Thread #1 prints out the stats every 20 seconds. */ + if (id == 1) { + time(&now); + if (now - then >= 20) { + stats(); + then = now; + } + } + + /* Begin the transaction. */ + if ((ret = dbenv->txn_begin(dbenv, NULL, &tid, 0)) != 0) + fatal("txn_begin", ret, 1); + + /* + * Get the key. If it doesn't exist, add it. If it does + * exist, delete it. + */ + switch (ret = dbp->get(dbp, tid, &key, &data, 0)) { + case DB_LOCK_DEADLOCK: + goto retry; + case 0: + goto delete; + case DB_NOTFOUND: + goto add; + } + + sprintf(buf, "writer: %d: dbp->get", id); + fatal(buf, ret, 1); + /* NOTREACHED */ + +delete: /* Delete the key. */ + switch (ret = dbp->del(dbp, tid, &key, 0)) { + case DB_LOCK_DEADLOCK: + goto retry; + case 0: + ++perf[id].deletes; + goto commit; + } + + sprintf(buf, "writer: %d: dbp->del", id); + fatal(buf, ret, 1); + /* NOTREACHED */ + +add: /* Add the key. 1 data item in 30 is an overflow item. */ + data.size = 20 + rand() % 128; + if (rand() % 30 == 0) + data.size += 8192; + + switch (ret = dbp->put(dbp, tid, &key, &data, 0)) { + case DB_LOCK_DEADLOCK: + goto retry; + case 0: + ++perf[id].adds; + goto commit; + default: + sprintf(buf, "writer: %d: dbp->put", id); + fatal(buf, ret, 1); + } + +commit: /* The transaction finished, commit it. */ + if ((ret = tid->commit(tid, 0)) != 0) + fatal("DB_TXN->commit", ret, 1); + + /* + * Every time the thread completes 20 transactions, show + * our progress. + */ + if (++perf[id].txns % 20 == 0) { + sprintf(buf, +"writer: %2d: adds: %4d: deletes: %4d: aborts: %4d: txns: %4d\n", + id, perf[id].adds, perf[id].deletes, + perf[id].aborts, perf[id].txns); + write(STDOUT_FILENO, buf, strlen(buf)); + } + + /* + * If this thread was aborted more than 5 times before + * the transaction finished, complain. + */ + if (perf[id].aborted > 5) { + sprintf(buf, +"writer: %2d: adds: %4d: deletes: %4d: aborts: %4d: txns: %4d: ABORTED: %2d\n", + id, perf[id].adds, perf[id].deletes, + perf[id].aborts, perf[id].txns, perf[id].aborted); + write(STDOUT_FILENO, buf, strlen(buf)); + } + perf[id].aborted = 0; + } + return (0); +} + +/* + * stats -- + * Display reader/writer thread statistics. To display the statistics + * for the mpool trickle or deadlock threads, use db_stat(1). + */ +void +stats() +{ + int id; + char *p, buf[8192]; + + p = buf + sprintf(buf, "-------------\n"); + for (id = 0; id < nreaders + nwriters;) + if (id++ < nwriters) + p += sprintf(p, + "writer: %2d: adds: %4d: deletes: %4d: aborts: %4d: txns: %4d\n", + id, perf[id].adds, + perf[id].deletes, perf[id].aborts, perf[id].txns); + else + p += sprintf(p, + "reader: %2d: found: %5d: notfound: %5d: aborts: %4d\n", + id, perf[id].found, + perf[id].notfound, perf[id].aborts); + p += sprintf(p, "-------------\n"); + + write(STDOUT_FILENO, buf, p - buf); +} + +/* + * db_init -- + * Initialize the environment. + */ +int +db_init(home) + const char *home; +{ + int ret; + + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + if (punish) + (void)dbenv->set_flags(dbenv, DB_YIELDCPU, 1); + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + (void)dbenv->set_cachesize(dbenv, 0, 100 * 1024, 0); + (void)dbenv->set_lg_max(dbenv, 200000); + + if ((ret = dbenv->open(dbenv, home, + DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN | DB_THREAD, 0)) != 0) { + dbenv->err(dbenv, ret, NULL); + (void)dbenv->close(dbenv, 0); + return (EXIT_FAILURE); + } + + return (0); +} + +/* + * tstart -- + * Thread start function for readers and writers. + */ +void * +tstart(arg) + void *arg; +{ + pthread_t tid; + u_int id; + + id = (uintptr_t)arg + 1; + + tid = pthread_self(); + + if (id <= (u_int)nwriters) { + printf("write thread %d starting: tid: %lu\n", id, (u_long)tid); + fflush(stdout); + writer(id); + } else { + printf("read thread %d starting: tid: %lu\n", id, (u_long)tid); + fflush(stdout); + reader(id); + } + + /* NOTREACHED */ + return (NULL); +} + +/* + * deadlock -- + * Thread start function for DB_ENV->lock_detect. + */ +void * +deadlock(arg) + void *arg; +{ + struct timeval t; + pthread_t tid; + + arg = arg; /* XXX: shut the compiler up. */ + tid = pthread_self(); + + printf("deadlock thread starting: tid: %lu\n", (u_long)tid); + fflush(stdout); + + t.tv_sec = 0; + t.tv_usec = 100000; + while (!quit) { + (void)dbenv->lock_detect(dbenv, 0, DB_LOCK_YOUNGEST, NULL); + + /* Check every 100ms. */ + (void)select(0, NULL, NULL, NULL, &t); + } + + return (NULL); +} + +/* + * trickle -- + * Thread start function for memp_trickle. + */ +void * +trickle(arg) + void *arg; +{ + pthread_t tid; + int wrote; + char buf[64]; + + arg = arg; /* XXX: shut the compiler up. */ + tid = pthread_self(); + + printf("trickle thread starting: tid: %lu\n", (u_long)tid); + fflush(stdout); + + while (!quit) { + (void)dbenv->memp_trickle(dbenv, 10, &wrote); + if (verbose) { + sprintf(buf, "trickle: wrote %d\n", wrote); + write(STDOUT_FILENO, buf, strlen(buf)); + } + if (wrote == 0) { + sleep(1); + sched_yield(); + } + } + + return (NULL); +} + +/* + * word -- + * Build the dictionary word list. + */ +void +word() +{ + FILE *fp; + int cnt; + char buf[256]; + + if ((fp = fopen(WORDLIST, "r")) == NULL) + fatal(WORDLIST, errno, 1); + + if ((list = malloc(nlist * sizeof(char *))) == NULL) + fatal(NULL, errno, 1); + + for (cnt = 0; cnt < nlist; ++cnt) { + if (fgets(buf, sizeof(buf), fp) == NULL) + break; + if ((list[cnt] = strdup(buf)) == NULL) + fatal(NULL, errno, 1); + } + nlist = cnt; /* In case nlist was larger than possible. */ +} + +/* + * fatal -- + * Report a fatal error and quit. + */ +void +fatal(msg, err, syserr) + const char *msg; + int err, syserr; +{ + fprintf(stderr, "%s: ", progname); + if (msg != NULL) { + fprintf(stderr, "%s", msg); + if (syserr) + fprintf(stderr, ": "); + } + if (syserr) + fprintf(stderr, "%s", strerror(err)); + fprintf(stderr, "\n"); + exit(EXIT_FAILURE); + + /* NOTREACHED */ +} + +/* + * usage -- + * Usage message. + */ +int +usage() +{ + (void)fprintf(stderr, + "usage: %s [-pv] [-h home] [-n words] [-r readers] [-w writers]\n", + progname); + return (EXIT_FAILURE); +} + +/* + * onint -- + * Interrupt signal handler. + */ +void +onint(signo) + int signo; +{ + signo = 0; /* Quiet compiler. */ + quit = 1; +} diff --git a/examples/c/ex_tpcb.c b/examples/c/ex_tpcb.c new file mode 100644 index 00000000..041a3ef4 --- /dev/null +++ b/examples/c/ex_tpcb.c @@ -0,0 +1,718 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include +#include + +#define NS_PER_MS 1000000 /* Nanoseconds in a millisecond */ +#define NS_PER_US 1000 /* Nanoseconds in a microsecond */ +#ifdef _WIN32 +#include +extern int getopt(int, char * const *, const char *); +/* Implement a basic high res timer with a POSIX interface for Windows. */ +struct timeval { + time_t tv_sec; + long tv_usec; +}; +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + struct _timeb now; + _ftime(&now); + tv->tv_sec = now.time; + tv->tv_usec = now.millitm * NS_PER_US; + return (0); +} +#else +#include +#include +#endif + +#include + +typedef enum { ACCOUNT, BRANCH, TELLER } FTYPE; + +DB_ENV *db_init __P((const char *, const char *, int, u_int32_t)); +int hpopulate __P((DB *, int, int, int, int)); +int populate __P((DB *, u_int32_t, u_int32_t, int, const char *)); +u_int32_t random_id __P((FTYPE, int, int, int)); +u_int32_t random_int __P((u_int32_t, u_int32_t)); +int tp_populate __P((DB_ENV *, int, int, int, int, int)); +int tp_run __P((DB_ENV *, int, int, int, int, int)); +int tp_txn __P((DB_ENV *, DB *, DB *, DB *, DB *, int, int, int, int)); + +int invarg __P((const char *, int, const char *)); +int main __P((int, char *[])); +int usage __P((const char *)); + +/* + * This program implements a basic TPC/B driver program. To create the + * TPC/B database, run with the -i (init) flag. The number of records + * with which to populate the account, history, branch, and teller tables + * is specified by the a, s, b, and t flags respectively. To run a TPC/B + * test, use the n flag to indicate a number of transactions to run (note + * that you can run many of these processes in parallel to simulate a + * multiuser test run). + */ +#define TELLERS_PER_BRANCH 10 +#define ACCOUNTS_PER_TELLER 10000 +#define HISTORY_PER_BRANCH 2592000 + +/* + * The default configuration that adheres to TPCB scaling rules requires + * nearly 3 GB of space. To avoid requiring that much space for testing, + * we set the parameters much lower. If you want to run a valid 10 TPS + * configuration, define VALID_SCALING. + */ +#ifdef VALID_SCALING +#define ACCOUNTS 1000000 +#define BRANCHES 10 +#define TELLERS 100 +#define HISTORY 25920000 +#endif + +#ifdef TINY +#define ACCOUNTS 1000 +#define BRANCHES 10 +#define TELLERS 100 +#define HISTORY 10000 +#endif + +#ifdef VERY_TINY +#define ACCOUNTS 500 +#define BRANCHES 10 +#define TELLERS 50 +#define HISTORY 5000 +#endif + +#if !defined(VALID_SCALING) && !defined(TINY) && !defined(VERY_TINY) +#define ACCOUNTS 100000 +#define BRANCHES 10 +#define TELLERS 100 +#define HISTORY 259200 +#endif + +#define HISTORY_LEN 100 +#define RECLEN 100 +#define BEGID 1000000 + +typedef struct _defrec { + u_int32_t id; + u_int32_t balance; + u_int8_t pad[RECLEN - sizeof(u_int32_t) - sizeof(u_int32_t)]; +} defrec; + +typedef struct _histrec { + u_int32_t aid; + u_int32_t bid; + u_int32_t tid; + u_int32_t amount; + u_int8_t pad[RECLEN - 4 * sizeof(u_int32_t)]; +} histrec; + +char *progname = "ex_tpcb"; /* Program name. */ + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB_ENV *dbenv; + int accounts, branches, seed, tellers, history; + int ch, iflag, mpool, ntxns, ret, txn_no_sync, verbose; + const char *home; + + home = "TESTDIR"; + accounts = branches = history = tellers = 0; + iflag = mpool = ntxns = txn_no_sync = verbose = 0; + seed = (int)time(NULL); + + while ((ch = getopt(argc, argv, "a:b:c:fh:in:S:s:t:v")) != EOF) + switch (ch) { + case 'a': /* Number of account records */ + if ((accounts = atoi(optarg)) <= 0) + return (invarg(progname, ch, optarg)); + break; + case 'b': /* Number of branch records */ + if ((branches = atoi(optarg)) <= 0) + return (invarg(progname, ch, optarg)); + break; + case 'c': /* Cachesize in bytes */ + if ((mpool = atoi(optarg)) <= 0) + return (invarg(progname, ch, optarg)); + break; + case 'f': /* Fast mode: no txn sync. */ + txn_no_sync = 1; + break; + case 'h': /* DB home. */ + home = optarg; + break; + case 'i': /* Initialize the test. */ + iflag = 1; + break; + case 'n': /* Number of transactions */ + if ((ntxns = atoi(optarg)) <= 0) + return (invarg(progname, ch, optarg)); + break; + case 'S': /* Random number seed. */ + if ((seed = atoi(optarg)) <= 0) + return (invarg(progname, ch, optarg)); + break; + case 's': /* Number of history records */ + if ((history = atoi(optarg)) <= 0) + return (invarg(progname, ch, optarg)); + break; + case 't': /* Number of teller records */ + if ((tellers = atoi(optarg)) <= 0) + return (invarg(progname, ch, optarg)); + break; + case 'v': /* Verbose option. */ + verbose = 1; + break; + case '?': + default: + return (usage(progname)); + } + argc -= optind; + argv += optind; + + srand((u_int)seed); + + /* Initialize the database environment. */ + if ((dbenv = db_init(home, + progname, mpool, txn_no_sync ? DB_TXN_NOSYNC : 0)) == NULL) + return (EXIT_FAILURE); + + accounts = accounts == 0 ? ACCOUNTS : accounts; + branches = branches == 0 ? BRANCHES : branches; + tellers = tellers == 0 ? TELLERS : tellers; + history = history == 0 ? HISTORY : history; + + if (verbose) + printf("%ld Accounts, %ld Branches, %ld Tellers, %ld History\n", + (long)accounts, (long)branches, + (long)tellers, (long)history); + + if (iflag) { + if (ntxns != 0) + return (usage(progname)); + tp_populate(dbenv, + accounts, branches, history, tellers, verbose); + } else { + if (ntxns == 0) + return (usage(progname)); + tp_run(dbenv, ntxns, accounts, branches, tellers, verbose); + } + + if ((ret = dbenv->close(dbenv, 0)) != 0) { + fprintf(stderr, "%s: dbenv->close failed: %s\n", + progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + + return (EXIT_SUCCESS); +} + +int +invarg(progname, arg, str) + const char *progname; + int arg; + const char *str; +{ + (void)fprintf(stderr, + "%s: invalid argument for -%c: %s\n", progname, arg, str); + return (EXIT_FAILURE); +} + +int +usage(progname) + const char *progname; +{ + const char *a1, *a2; + + a1 = "[-fv] [-a accounts] [-b branches]\n"; + a2 = "\t[-c cache_size] [-h home] [-S seed] [-s history] [-t tellers]"; + (void)fprintf(stderr, "usage: %s -i %s %s\n", progname, a1, a2); + (void)fprintf(stderr, + " %s -n transactions %s %s\n", progname, a1, a2); + return (EXIT_FAILURE); +} + +/* + * db_init -- + * Initialize the environment. + */ +DB_ENV * +db_init(home, prefix, cachesize, flags) + const char *home, *prefix; + int cachesize; + u_int32_t flags; +{ + DB_ENV *dbenv; + u_int32_t local_flags; + int ret; + + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + return (NULL); + } + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, prefix); + (void)dbenv->set_lk_detect(dbenv, DB_LOCK_DEFAULT); + (void)dbenv->set_cachesize(dbenv, 0, + cachesize == 0 ? 4 * 1024 * 1024 : (u_int32_t)cachesize, 0); + + if (flags & (DB_TXN_NOSYNC)) + (void)dbenv->set_flags(dbenv, DB_TXN_NOSYNC, 1); + flags &= ~(DB_TXN_NOSYNC); + + local_flags = flags | DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN; + if ((ret = dbenv->open(dbenv, home, local_flags, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->open: %s", home); + (void)dbenv->close(dbenv, 0); + return (NULL); + } + return (dbenv); +} + +/* + * Initialize the database to the specified number of accounts, branches, + * history records, and tellers. + */ +int +tp_populate(env, accounts, branches, history, tellers, verbose) + DB_ENV *env; + int accounts, branches, history, tellers, verbose; +{ + DB *dbp; + u_int32_t balance, idnum, oflags; + u_int32_t end_anum, end_bnum, end_tnum; + u_int32_t start_anum, start_bnum, start_tnum; + int ret; + + idnum = BEGID; + balance = 500000; + oflags = DB_CREATE; + + if ((ret = db_create(&dbp, env, 0)) != 0) { + env->err(env, ret, "db_create"); + return (1); + } + (void)dbp->set_h_nelem(dbp, (u_int32_t)accounts); + + if ((ret = dbp->open(dbp, NULL, "account", NULL, + DB_HASH, oflags, 0644)) != 0) { + env->err(env, ret, "DB->open: account"); + return (1); + } + + start_anum = idnum; + populate(dbp, idnum, balance, accounts, "account"); + idnum += accounts; + end_anum = idnum - 1; + if ((ret = dbp->close(dbp, 0)) != 0) { + env->err(env, ret, "DB->close: account"); + return (1); + } + if (verbose) + printf("Populated accounts: %ld - %ld\n", + (long)start_anum, (long)end_anum); + + /* + * Since the number of branches is very small, we want to use very + * small pages and only 1 key per page, i.e., key-locking instead + * of page locking. + */ + if ((ret = db_create(&dbp, env, 0)) != 0) { + env->err(env, ret, "db_create"); + return (1); + } + (void)dbp->set_h_ffactor(dbp, 1); + (void)dbp->set_h_nelem(dbp, (u_int32_t)branches); + (void)dbp->set_pagesize(dbp, 512); + if ((ret = dbp->open(dbp, NULL, "branch", NULL, + DB_HASH, oflags, 0644)) != 0) { + env->err(env, ret, "DB->open: branch"); + return (1); + } + start_bnum = idnum; + populate(dbp, idnum, balance, branches, "branch"); + idnum += branches; + end_bnum = idnum - 1; + if ((ret = dbp->close(dbp, 0)) != 0) { + env->err(env, ret, "DB->close: branch"); + return (1); + } + if (verbose) + printf("Populated branches: %ld - %ld\n", + (long)start_bnum, (long)end_bnum); + + /* + * In the case of tellers, we also want small pages, but we'll let + * the fill factor dynamically adjust itself. + */ + if ((ret = db_create(&dbp, env, 0)) != 0) { + env->err(env, ret, "db_create"); + return (1); + } + (void)dbp->set_h_ffactor(dbp, 0); + (void)dbp->set_h_nelem(dbp, (u_int32_t)tellers); + (void)dbp->set_pagesize(dbp, 512); + if ((ret = dbp->open(dbp, NULL, "teller", NULL, + DB_HASH, oflags, 0644)) != 0) { + env->err(env, ret, "DB->open: teller"); + return (1); + } + + start_tnum = idnum; + populate(dbp, idnum, balance, tellers, "teller"); + idnum += tellers; + end_tnum = idnum - 1; + if ((ret = dbp->close(dbp, 0)) != 0) { + env->err(env, ret, "DB->close: teller"); + return (1); + } + if (verbose) + printf("Populated tellers: %ld - %ld\n", + (long)start_tnum, (long)end_tnum); + + if ((ret = db_create(&dbp, env, 0)) != 0) { + env->err(env, ret, "db_create"); + return (1); + } + (void)dbp->set_re_len(dbp, HISTORY_LEN); + if ((ret = dbp->open(dbp, NULL, "history", NULL, + DB_RECNO, oflags, 0644)) != 0) { + env->err(env, ret, "DB->open: history"); + return (1); + } + + hpopulate(dbp, history, accounts, branches, tellers); + if ((ret = dbp->close(dbp, 0)) != 0) { + env->err(env, ret, "DB->close: history"); + return (1); + } + return (0); +} + +int +populate(dbp, start_id, balance, nrecs, msg) + DB *dbp; + u_int32_t start_id, balance; + int nrecs; + const char *msg; +{ + DBT kdbt, ddbt; + defrec drec; + int i, ret; + + kdbt.flags = 0; + kdbt.data = &drec.id; + kdbt.size = sizeof(u_int32_t); + ddbt.flags = 0; + ddbt.data = &drec; + ddbt.size = sizeof(drec); + memset(&drec.pad[0], 1, sizeof(drec.pad)); + + for (i = 0; i < nrecs; i++) { + drec.id = start_id + (u_int32_t)i; + drec.balance = balance; + if ((ret = + (dbp->put)(dbp, NULL, &kdbt, &ddbt, DB_NOOVERWRITE)) != 0) { + dbp->err(dbp, + ret, "Failure initializing %s file\n", msg); + return (1); + } + } + return (0); +} + +int +hpopulate(dbp, history, accounts, branches, tellers) + DB *dbp; + int history, accounts, branches, tellers; +{ + DBT kdbt, ddbt; + histrec hrec; + db_recno_t key; + int i, ret; + + memset(&kdbt, 0, sizeof(kdbt)); + memset(&ddbt, 0, sizeof(ddbt)); + ddbt.data = &hrec; + ddbt.size = sizeof(hrec); + kdbt.data = &key; + kdbt.size = sizeof(key); + memset(&hrec.pad[0], 1, sizeof(hrec.pad)); + hrec.amount = 10; + + for (i = 1; i <= history; i++) { + hrec.aid = random_id(ACCOUNT, accounts, branches, tellers); + hrec.bid = random_id(BRANCH, accounts, branches, tellers); + hrec.tid = random_id(TELLER, accounts, branches, tellers); + if ((ret = dbp->put(dbp, NULL, &kdbt, &ddbt, DB_APPEND)) != 0) { + dbp->err(dbp, ret, "dbp->put"); + return (1); + } + } + return (0); +} + +u_int32_t +random_int(lo, hi) + u_int32_t lo, hi; +{ + u_int32_t ret; + int t; + +#ifndef RAND_MAX +#define RAND_MAX 0x7fffffff +#endif + t = rand(); + ret = (u_int32_t)(((double)t / ((double)(RAND_MAX) + 1)) * + (hi - lo + 1)); + ret += lo; + return (ret); +} + +u_int32_t +random_id(type, accounts, branches, tellers) + FTYPE type; + int accounts, branches, tellers; +{ + u_int32_t min, max, num; + + max = min = BEGID; + num = accounts; + switch (type) { + case TELLER: + min += branches; + num = tellers; + /* FALLTHROUGH */ + case BRANCH: + if (type == BRANCH) + num = branches; + min += accounts; + /* FALLTHROUGH */ + case ACCOUNT: + max = min + num - 1; + } + return (random_int(min, max)); +} + +int +tp_run(dbenv, n, accounts, branches, tellers, verbose) + DB_ENV *dbenv; + int n, accounts, branches, tellers, verbose; +{ + DB *adb, *bdb, *hdb, *tdb; + int failed, ret, txns; + struct timeval start_tv, end_tv; + double start_time, end_time; + + adb = bdb = hdb = tdb = NULL; + + /* + * Open the database files. + */ + if ((ret = db_create(&adb, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + goto err; + } + if ((ret = adb->open(adb, NULL, "account", NULL, DB_UNKNOWN, + DB_AUTO_COMMIT, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->open: account"); + goto err; + } + if ((ret = db_create(&bdb, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + goto err; + } + if ((ret = bdb->open(bdb, NULL, "branch", NULL, DB_UNKNOWN, + DB_AUTO_COMMIT, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->open: branch"); + goto err; + } + if ((ret = db_create(&hdb, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + goto err; + } + if ((ret = hdb->open(hdb, NULL, "history", NULL, DB_UNKNOWN, + DB_AUTO_COMMIT, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->open: history"); + goto err; + } + if ((ret = db_create(&tdb, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + goto err; + } + if ((ret = tdb->open(tdb, NULL, "teller", NULL, DB_UNKNOWN, + DB_AUTO_COMMIT, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->open: teller"); + goto err; + } + + (void)gettimeofday(&start_tv, NULL); + + for (txns = n, failed = 0; n-- > 0;) + if ((ret = tp_txn(dbenv, adb, bdb, tdb, hdb, + accounts, branches, tellers, verbose)) != 0) + ++failed; + + (void)gettimeofday(&end_tv, NULL); + + start_time = start_tv.tv_sec + ((start_tv.tv_usec + 0.0)/NS_PER_MS); + end_time = end_tv.tv_sec + ((end_tv.tv_usec + 0.0)/NS_PER_MS); + if (end_time == start_time) + end_time += 1/NS_PER_MS; + + printf("%s: %d txns: %d failed, %.3f sec, %.2f TPS\n", progname, + txns, failed, (end_time - start_time), + (txns - failed) / (double)(end_time - start_time)); + +err: if (adb != NULL) + (void)adb->close(adb, 0); + if (bdb != NULL) + (void)bdb->close(bdb, 0); + if (tdb != NULL) + (void)tdb->close(tdb, 0); + if (hdb != NULL) + (void)hdb->close(hdb, 0); + return (ret == 0 ? 0 : 1); +} + +/* + * XXX Figure out the appropriate way to pick out IDs. + */ +int +tp_txn(dbenv, adb, bdb, tdb, hdb, accounts, branches, tellers, verbose) + DB_ENV *dbenv; + DB *adb, *bdb, *tdb, *hdb; + int accounts, branches, tellers, verbose; +{ + DBC *acurs, *bcurs, *tcurs; + DBT d_dbt, d_histdbt, k_dbt, k_histdbt; + DB_TXN *t; + db_recno_t key; + defrec rec; + histrec hrec; + int account, branch, teller, ret; + + t = NULL; + acurs = bcurs = tcurs = NULL; + + /* + * !!! + * This is sample code -- we could move a lot of this into the driver + * to make it faster. + */ + account = random_id(ACCOUNT, accounts, branches, tellers); + branch = random_id(BRANCH, accounts, branches, tellers); + teller = random_id(TELLER, accounts, branches, tellers); + + memset(&d_histdbt, 0, sizeof(d_histdbt)); + + memset(&k_histdbt, 0, sizeof(k_histdbt)); + k_histdbt.data = &key; + k_histdbt.size = sizeof(key); + + memset(&k_dbt, 0, sizeof(k_dbt)); + k_dbt.size = sizeof(int); + + memset(&d_dbt, 0, sizeof(d_dbt)); + d_dbt.flags = DB_DBT_USERMEM; + d_dbt.data = &rec; + d_dbt.ulen = sizeof(rec); + + hrec.aid = account; + hrec.bid = branch; + hrec.tid = teller; + hrec.amount = 10; + /* Request 0 bytes since we're just positioning. */ + d_histdbt.flags = DB_DBT_PARTIAL; + + /* + * START PER-TRANSACTION TIMING. + * + * Technically, TPCB requires a limit on response time, you only get + * to count transactions that complete within 2 seconds. That's not + * an issue for this sample application -- regardless, here's where + * the transaction begins. + */ + if (dbenv->txn_begin(dbenv, NULL, &t, 0) != 0) + goto err; + + if (adb->cursor(adb, t, &acurs, 0) != 0 || + bdb->cursor(bdb, t, &bcurs, 0) != 0 || + tdb->cursor(tdb, t, &tcurs, 0) != 0) + goto err; + + /* Account record */ + k_dbt.data = &account; + if (acurs->get(acurs, &k_dbt, &d_dbt, DB_SET) != 0) + goto err; + rec.balance += 10; + if (acurs->put(acurs, &k_dbt, &d_dbt, DB_CURRENT) != 0) + goto err; + + /* Branch record */ + k_dbt.data = &branch; + if (bcurs->get(bcurs, &k_dbt, &d_dbt, DB_SET) != 0) + goto err; + rec.balance += 10; + if (bcurs->put(bcurs, &k_dbt, &d_dbt, DB_CURRENT) != 0) + goto err; + + /* Teller record */ + k_dbt.data = &teller; + if (tcurs->get(tcurs, &k_dbt, &d_dbt, DB_SET) != 0) + goto err; + rec.balance += 10; + if (tcurs->put(tcurs, &k_dbt, &d_dbt, DB_CURRENT) != 0) + goto err; + + /* History record */ + d_histdbt.flags = 0; + d_histdbt.data = &hrec; + d_histdbt.ulen = sizeof(hrec); + if (hdb->put(hdb, t, &k_histdbt, &d_histdbt, DB_APPEND) != 0) + goto err; + + if (acurs->close(acurs) != 0 || bcurs->close(bcurs) != 0 || + tcurs->close(tcurs) != 0) + goto err; + + ret = t->commit(t, 0); + t = NULL; + if (ret != 0) + goto err; + /* END PER-TRANSACTION TIMING. */ + + return (0); + +err: if (acurs != NULL) + (void)acurs->close(acurs); + if (bcurs != NULL) + (void)bcurs->close(bcurs); + if (tcurs != NULL) + (void)tcurs->close(tcurs); + if (t != NULL) + (void)t->abort(t); + + if (verbose) + printf("Transaction A=%ld B=%ld T=%ld failed\n", + (long)account, (long)branch, (long)teller); + return (-1); +} diff --git a/examples/cxx/AccessExample.cpp b/examples/cxx/AccessExample.cpp new file mode 100644 index 00000000..2cf38d2e --- /dev/null +++ b/examples/cxx/AccessExample.cpp @@ -0,0 +1,168 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include +#include +#include + +#ifdef _WIN32 +extern "C" { + extern int getopt(int, char * const *, const char *); + extern int optind; +} +#else +#include +#endif + +#include + +#define DATABASE "access.db" + +using std::cin; +using std::cout; +using std::cerr; + +class AccessExample +{ +public: + AccessExample(); + void run(bool removeExistingDatabase, const char *fileName); + +private: + // no need for copy and assignment + AccessExample(const AccessExample &); + void operator = (const AccessExample &); +}; + +int +usage() +{ + (void)fprintf(stderr, "usage: AccessExample [-r] [database]\n"); + return (EXIT_FAILURE); +} + +int +main(int argc, char *argv[]) +{ + int ch, rflag; + const char *database; + + rflag = 0; + while ((ch = getopt(argc, argv, "r")) != EOF) + switch (ch) { + case 'r': + rflag = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + /* Accept optional database name. */ + database = *argv == NULL ? DATABASE : argv[0]; + + // Use a try block just to report any errors. + // An alternate approach to using exceptions is to + // use error models (see DbEnv::set_error_model()) so + // that error codes are returned for all Berkeley DB methods. + // + try { + AccessExample app; + app.run((bool)(rflag == 1 ? true : false), database); + return (EXIT_SUCCESS); + } + catch (DbException &dbe) { + cerr << "AccessExample: " << dbe.what() << "\n"; + return (EXIT_FAILURE); + } +} + +AccessExample::AccessExample() +{ +} + +void AccessExample::run(bool removeExistingDatabase, const char *fileName) +{ + // Remove the previous database. + if (removeExistingDatabase) + (void)remove(fileName); + + // Create the database object. + // There is no environment for this simple example. + Db db(0, 0); + + db.set_error_stream(&cerr); + db.set_errpfx("AccessExample"); + db.set_pagesize(1024); /* Page size: 1K. */ + db.set_cachesize(0, 32 * 1024, 0); + db.open(NULL, fileName, NULL, DB_BTREE, DB_CREATE, 0664); + + // + // Insert records into the database, where the key is the user + // input and the data is the user input in reverse order. + // + char buf[1024], rbuf[1024]; + char *p, *t; + int ret; + u_int32_t len; + + for (;;) { + cout << "input> "; + cout.flush(); + + cin.getline(buf, sizeof(buf)); + if (cin.eof()) + break; + + if ((len = (u_int32_t)strlen(buf)) <= 0) + continue; + for (t = rbuf, p = buf + (len - 1); p >= buf;) + *t++ = *p--; + *t++ = '\0'; + + Dbt key(buf, len + 1); + Dbt data(rbuf, len + 1); + + ret = db.put(0, &key, &data, DB_NOOVERWRITE); + if (ret == DB_KEYEXIST) { + cout << "Key " << buf << " already exists.\n"; + } + } + cout << "\n"; + + // We put a try block around this section of code + // to ensure that our database is properly closed + // in the event of an error. + // + try { + // Acquire a cursor for the table. + Dbc *dbcp; + db.cursor(NULL, &dbcp, 0); + + // Walk through the table, printing the key/data pairs. + Dbt key; + Dbt data; + while (dbcp->get(&key, &data, DB_NEXT) == 0) { + char *key_string = (char *)key.get_data(); + char *data_string = (char *)data.get_data(); + cout << key_string << " : " << data_string << "\n"; + } + dbcp->close(); + } + catch (DbException &dbe) { + cerr << "AccessExample: " << dbe.what() << "\n"; + } + + db.close(0); +} diff --git a/examples/cxx/BtRecExample.cpp b/examples/cxx/BtRecExample.cpp new file mode 100644 index 00000000..5cf5fb79 --- /dev/null +++ b/examples/cxx/BtRecExample.cpp @@ -0,0 +1,223 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +using std::cout; +using std::cerr; + +#define DATABASE "access.db" +#define WORDLIST "../test/tcl/wordlist" + +const char *progname = "BtRecExample"; // Program name. + +class BtRecExample +{ +public: + BtRecExample(FILE *fp); + ~BtRecExample(); + void run(); + void stats(); + void show(const char *msg, Dbt *key, Dbt *data); + +private: + Db *dbp; + Dbc *dbcp; +}; + +BtRecExample::BtRecExample(FILE *fp) +{ + char *p, *t, buf[1024], rbuf[1024]; + int ret; + + // Remove the previous database. + (void)remove(DATABASE); + + dbp = new Db(NULL, 0); + + dbp->set_error_stream(&cerr); + dbp->set_errpfx(progname); + dbp->set_pagesize(1024); // 1K page sizes. + + dbp->set_flags(DB_RECNUM); // Record numbers. + dbp->open(NULL, DATABASE, NULL, DB_BTREE, DB_CREATE, 0664); + + // + // Insert records into the database, where the key is the word + // preceded by its record number, and the data is the same, but + // in reverse order. + // + + for (int cnt = 1; cnt <= 1000; ++cnt) { + (void)sprintf(buf, "%04d_", cnt); + if (fgets(buf + 4, sizeof(buf) - 4, fp) == NULL) + break; + u_int32_t len = (u_int32_t)strlen(buf); + buf[len - 1] = '\0'; + for (t = rbuf, p = buf + (len - 2); p >= buf;) + *t++ = *p--; + *t++ = '\0'; + + // As a convenience for printing, we include the null terminator + // in the stored data. + // + Dbt key(buf, len); + Dbt data(rbuf, len); + + if ((ret = dbp->put(NULL, &key, &data, DB_NOOVERWRITE)) != 0) { + dbp->err(ret, "Db::put"); + if (ret != DB_KEYEXIST) + throw DbException(ret); + } + } +} + +BtRecExample::~BtRecExample() +{ + if (dbcp != 0) + dbcp->close(); + dbp->close(0); + delete dbp; +} + +// +// Print out the number of records in the database. +// +void BtRecExample::stats() +{ + DB_BTREE_STAT *statp; + + dbp->stat(NULL, &statp, 0); + cout << progname << ": database contains " + << (u_long)statp->bt_ndata << " records\n"; + + // Note: must use free, not delete. + // This struct is allocated by C. + // + free(statp); +} + +void BtRecExample::run() +{ + db_recno_t recno; + int ret; + char buf[1024]; + + // Acquire a cursor for the database. + dbp->cursor(NULL, &dbcp, 0); + + // + // Prompt the user for a record number, then retrieve and display + // that record. + // + for (;;) { + // Get a record number. + cout << "recno #> "; + cout.flush(); + if (fgets(buf, sizeof(buf), stdin) == NULL) + break; + recno = atoi(buf); + + // + // Start with a fresh key each time, + // the dbp->get() routine returns + // the key and data pair, not just the key! + // + Dbt key(&recno, sizeof(recno)); + Dbt data; + + if ((ret = dbcp->get(&key, &data, DB_SET_RECNO)) != 0) { + dbp->err(ret, "DBcursor->get"); + throw DbException(ret); + } + + // Display the key and data. + show("k/d\t", &key, &data); + + // Move the cursor a record forward. + if ((ret = dbcp->get(&key, &data, DB_NEXT)) != 0) { + dbp->err(ret, "DBcursor->get"); + throw DbException(ret); + } + + // Display the key and data. + show("next\t", &key, &data); + + // + // Retrieve the record number for the following record into + // local memory. + // + data.set_data(&recno); + data.set_size(sizeof(recno)); + data.set_ulen(sizeof(recno)); + data.set_flags(data.get_flags() | DB_DBT_USERMEM); + + if ((ret = dbcp->get(&key, &data, DB_GET_RECNO)) != 0) { + if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY) { + dbp->err(ret, "DBcursor->get"); + throw DbException(ret); + } + } + else { + cout << "retrieved recno: " << (u_long)recno << "\n"; + } + } + + dbcp->close(); + dbcp = NULL; +} + +// +// show -- +// Display a key/data pair. +// +void BtRecExample::show(const char *msg, Dbt *key, Dbt *data) +{ + cout << msg << (char *)key->get_data() + << " : " << (char *)data->get_data() << "\n"; +} + +int +main() +{ + FILE *fp; + + // Open the word database. + if ((fp = fopen(WORDLIST, "r")) == NULL) { + fprintf(stderr, "%s: open %s: %s\n", + progname, WORDLIST, db_strerror(errno)); + return (EXIT_FAILURE); + } + + try { + BtRecExample app(fp); + + // Close the word database. + (void)fclose(fp); + fp = NULL; + + app.stats(); + app.run(); + } + catch (DbException &dbe) { + cerr << "Exception: " << dbe.what() << "\n"; + return (EXIT_FAILURE); + } + + return (EXIT_SUCCESS); +} diff --git a/examples/cxx/EnvExample.cpp b/examples/cxx/EnvExample.cpp new file mode 100644 index 00000000..b83d07e0 --- /dev/null +++ b/examples/cxx/EnvExample.cpp @@ -0,0 +1,143 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include + +using std::ostream; +using std::cout; +using std::cerr; + +void db_setup(const char *, const char *, ostream&); +void db_teardown(const char *, const char *, ostream&); +static int usage(); + +const char *progname = "EnvExample"; /* Program name. */ + +// +// An example of a program creating/configuring a Berkeley DB environment. +// +int +main(int argc, char *argv[]) +{ + // + // Note: it may be easiest to put all Berkeley DB operations in a + // try block, as seen here. Alternatively, you can change the + // ErrorModel in the DbEnv so that exceptions are never thrown + // and check error returns from all methods. + // + try { + const char *data_dir, *home; + + // + // All of the shared database files live in home, + // but data files live in data_dir. + // + home = "TESTDIR"; + data_dir = "data"; + + for (int argnum = 1; argnum < argc; ++argnum) { + if (strcmp(argv[argnum], "-h") == 0) { + if (++argnum >= argc) + return (usage()); + home = argv[argnum]; + } + else if (strcmp(argv[argnum], "-d") == 0) { + if (++argnum >= argc) + return (usage()); + data_dir = argv[argnum]; + } + else { + return (usage()); + } + } + + cout << "Setup env\n"; + db_setup(home, data_dir, cerr); + + cout << "Teardown env\n"; + db_teardown(home, data_dir, cerr); + return (EXIT_SUCCESS); + } + catch (DbException &dbe) { + cerr << "EnvExample: " << dbe.what() << "\n"; + return (EXIT_FAILURE); + } +} + +// Note that any of the db calls can throw DbException +void +db_setup(const char *home, const char *data_dir, ostream& err_stream) +{ + // + // Create an environment object and initialize it for error + // reporting. + // + DbEnv *dbenv = new DbEnv(0); + dbenv->set_error_stream(&err_stream); + dbenv->set_errpfx(progname); + + // + // We want to specify the shared memory buffer pool cachesize, + // but everything else is the default. + // + dbenv->set_cachesize(0, 64 * 1024, 0); + + // Databases are in a subdirectory. + (void)dbenv->set_data_dir(data_dir); + + // Open the environment with full transactional support. + dbenv->open(home, + DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | + DB_INIT_TXN, 0); + + // Open a database in the environment to verify the data_dir + // has been set correctly. + // Create a database handle, using the environment. + Db *db = new Db(dbenv, 0) ; + + // Open the database. + db->open(NULL, "EvnExample_db1.db", NULL, DB_BTREE, DB_CREATE, 0644); + + // Close the database handle. + db->close(0) ; + delete db; + + // Close the handle. + dbenv->close(0); + delete dbenv; +} + +void +db_teardown(const char *home, const char *data_dir, ostream& err_stream) +{ + // Remove the shared database regions. + DbEnv *dbenv = new DbEnv(0); + + dbenv->set_error_stream(&err_stream); + dbenv->set_errpfx(progname); + + (void)dbenv->set_data_dir(data_dir); + dbenv->remove(home, 0); + delete dbenv; +} + +static int +usage() +{ + cerr << "usage: excxx_env [-h home] [-d data_dir]\n"; + return (EXIT_FAILURE); +} diff --git a/examples/cxx/LockExample.cpp b/examples/cxx/LockExample.cpp new file mode 100644 index 00000000..11d5adbf --- /dev/null +++ b/examples/cxx/LockExample.cpp @@ -0,0 +1,245 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include +#include + +#include + +using std::cin; +using std::cout; +using std::cerr; + +const char *progname = "LockExample"; // Program name. + +// +// An example of a program using DBLock and related classes. +// +class LockExample : public DbEnv +{ +public: + void run(); + int error_code() { return (ecode); } + + LockExample(const char *home, u_int32_t maxlocks, int do_unlink); + +private: + static const char FileName[]; + int ecode; + + // no need for copy and assignment + LockExample(const LockExample &); + void operator = (const LockExample &); +}; + +static int usage(); // forward + +int +main(int argc, char *argv[]) +{ + const char *home; + int do_unlink; + u_int32_t maxlocks; + int i; + + home = "TESTDIR"; + maxlocks = 0; + do_unlink = 0; + for (int argnum = 1; argnum < argc; ++argnum) { + if (strcmp(argv[argnum], "-h") == 0) { + if (++argnum >= argc) + return (usage()); + home = argv[argnum]; + } + else if (strcmp(argv[argnum], "-m") == 0) { + if (++argnum >= argc) + return (usage()); + if ((i = atoi(argv[argnum])) <= 0) + return (usage()); + maxlocks = (u_int32_t)i; /* XXX: possible overflow. */ + } + else if (strcmp(argv[argnum], "-u") == 0) { + do_unlink = 1; + } + else { + return (usage()); + } + } + + try { + int ecode; + + if (do_unlink) { + // Create an environment that immediately + // removes all files. + LockExample tmp(home, maxlocks, do_unlink); + if ((ecode = tmp.error_code()) != 0) + return (ecode); + } + + LockExample app(home, maxlocks, do_unlink); + if ((ecode = app.error_code()) != 0) + return (ecode); + app.run(); + app.close(0); + return (EXIT_SUCCESS); + } + catch (DbException &dbe) { + cerr << "LockExample: " << dbe.what() << "\n"; + return (EXIT_FAILURE); + } +} + +LockExample::LockExample(const char *home, u_int32_t maxlocks, int do_unlink) +: DbEnv(0) +, ecode(0) +{ + int ret; + + if (do_unlink) { + if ((ret = remove(home, DB_FORCE)) != 0) { + cerr << progname << ": DbEnv::remove: " + << strerror(errno) << "\n"; + ecode = EXIT_FAILURE; + } + } + else { + set_error_stream(&cerr); + set_errpfx("LockExample"); + if (maxlocks != 0) + set_lk_max_locks(maxlocks); + open(home, DB_CREATE | DB_INIT_LOCK, 0); + } +} + +void LockExample::run() +{ + long held; + size_t len; + u_int32_t locker; + int did_get, ret; + DbLock *locks = 0; + int lockcount = 0; + int lockid = 0; + char objbuf[1024]; + + // + // Accept lock requests. + // + lock_id(&locker); + for (held = 0;;) { + cout << "Operation get/release [get]> "; + cout.flush(); + + char opbuf[16]; + cin.getline(opbuf, sizeof(opbuf)); + if (cin.eof()) + break; + if ((len = strlen(opbuf)) <= 1 || strcmp(opbuf, "get") == 0) { + // Acquire a lock. + cout << "input object (text string) to lock> "; + cout.flush(); + cin.getline(objbuf, sizeof(objbuf)); + if (cin.eof()) + break; + if ((len = strlen(objbuf)) <= 0) + continue; + + char lockbuf[16]; + do { + cout << "lock type read/write [read]> "; + cout.flush(); + cin.getline(lockbuf, sizeof(lockbuf)); + if (cin.eof()) + break; + len = strlen(lockbuf); + } while (len >= 1 && + strcmp(lockbuf, "read") != 0 && + strcmp(lockbuf, "write") != 0); + + db_lockmode_t lock_type; + if (len <= 1 || strcmp(lockbuf, "read") == 0) + lock_type = DB_LOCK_READ; + else + lock_type = DB_LOCK_WRITE; + + Dbt dbt(objbuf, (u_int32_t)strlen(objbuf)); + + DbLock lock; + ret = lock_get(locker, DB_LOCK_NOWAIT, &dbt, + lock_type, &lock); + did_get = 1; + lockid = lockcount++; + if (locks == NULL) { + locks = new DbLock[1]; + } + else { + DbLock *newlocks = new DbLock[lockcount]; + for (int lockno = 0; + lockno < lockid; lockno++) { + newlocks[lockno] = locks[lockno]; + } + delete locks; + locks = newlocks; + } + locks[lockid] = lock; + } else { + // Release a lock. + do { + cout << "input lock to release> "; + cout.flush(); + cin.getline(objbuf, sizeof(objbuf)); + if (cin.eof()) + break; + } while ((len = strlen(objbuf)) <= 0); + lockid = strtol(objbuf, NULL, 16); + if (lockid < 0 || lockid >= lockcount) { + cout << "Lock #" << lockid << " out of range\n"; + continue; + } + DbLock lock = locks[lockid]; + ret = lock_put(&lock); + did_get = 0; + } + + switch (ret) { + case 0: + cout << "Lock #" << lockid << " " + << (did_get ? "granted" : "released") + << "\n"; + held += did_get ? 1 : -1; + break; + case DB_LOCK_NOTGRANTED: + cout << "Lock not granted\n"; + break; + case DB_LOCK_DEADLOCK: + cerr << "LockExample: lock_" + << (did_get ? "get" : "put") + << ": " << "returned DEADLOCK"; + break; + default: + cerr << "LockExample: lock_get: %s", + strerror(errno); + } + } + cout << "\n"; + cout << "Closing lock region " << held << " locks held\n"; + if (locks != 0) + delete locks; +} + +static int +usage() +{ + cerr << "usage: LockExample [-u] [-h home] [-m maxlocks]\n"; + return (EXIT_FAILURE); +} diff --git a/examples/cxx/MpoolExample.cpp b/examples/cxx/MpoolExample.cpp new file mode 100644 index 00000000..5cf35b4d --- /dev/null +++ b/examples/cxx/MpoolExample.cpp @@ -0,0 +1,217 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +using std::cout; +using std::cerr; +using std::ios; +using std::ofstream; + +#define MPOOL "mpool" + +int init(const char *, int, int); +int run(DB_ENV *, int, int, int); + +static int usage(); + +const char *progname = "MpoolExample"; // Program name. + +class MpoolExample : public DbEnv +{ +public: + MpoolExample(); + int initdb(const char *home, int cachesize); + int run(int hits, int pagesize, int npages); + +private: + static const char FileName[]; + + // no need for copy and assignment + MpoolExample(const MpoolExample &); + void operator = (const MpoolExample &); +}; + +int main(int argc, char *argv[]) +{ + int ret; + int cachesize = 20 * 1024; + int hits = 1000; + int npages = 50; + int pagesize = 1024; + + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-c") == 0) { + if ((cachesize = atoi(argv[++i])) < 20 * 1024) + usage(); + } + else if (strcmp(argv[i], "-h") == 0) { + if ((hits = atoi(argv[++i])) <= 0) + usage(); + } + else if (strcmp(argv[i], "-n") == 0) { + if ((npages = atoi(argv[++i])) <= 0) + usage(); + } + else if (strcmp(argv[i], "-p") == 0) { + if ((pagesize = atoi(argv[++i])) <= 0) + usage(); + } + else { + usage(); + } + } + + // Initialize the file. + if ((ret = init(MPOOL, pagesize, npages)) != 0) + return (ret); + + try { + MpoolExample app; + + cout << progname + << ": cachesize: " << cachesize + << "; pagesize: " << pagesize + << "; N pages: " << npages << "\n"; + + if ((ret = app.initdb(NULL, cachesize)) != 0) + return (ret); + if ((ret = app.run(hits, pagesize, npages)) != 0) + return (ret); + cout << "MpoolExample: completed\n"; + return (EXIT_SUCCESS); + } + catch (DbException &dbe) { + cerr << "MpoolExample: " << dbe.what() << "\n"; + return (EXIT_FAILURE); + } +} + +// +// init -- +// Create a backing file. +// +int +init(const char *file, int pagesize, int npages) +{ + // Create a file with the right number of pages, and store a page + // number on each page. + ofstream of(file, ios::out | ios::binary); + + if (of.fail()) { + cerr << "MpoolExample: " << file << ": open failed\n"; + return (EXIT_FAILURE); + } + char *p = new char[pagesize]; + memset(p, 0, pagesize); + + // The pages are numbered from 0. + for (int cnt = 0; cnt <= npages; ++cnt) { + *(db_pgno_t *)p = cnt; + of.write(p, pagesize); + if (of.fail()) { + cerr << "MpoolExample: " << file << ": write failed\n"; + return (EXIT_FAILURE); + } + } + delete [] p; + return (EXIT_SUCCESS); +} + +static int +usage() +{ + cerr << "usage: MpoolExample [-c cachesize] " + << "[-h hits] [-n npages] [-p pagesize]\n"; + return (EXIT_FAILURE); +} + +// Note: by using DB_CXX_NO_EXCEPTIONS, we get explicit error returns +// from various methods rather than exceptions so we can report more +// information with each error. +// +MpoolExample::MpoolExample() +: DbEnv(DB_CXX_NO_EXCEPTIONS) +{ +} + +int MpoolExample::initdb(const char *home, int cachesize) +{ + set_error_stream(&cerr); + set_errpfx("MpoolExample"); + set_cachesize(0, cachesize, 0); + + open(home, DB_CREATE | DB_INIT_MPOOL, 0); + return (EXIT_SUCCESS); +} + +// +// run -- +// Get a set of pages. +// +int +MpoolExample::run(int hits, int pagesize, int npages) +{ + db_pgno_t pageno; + int cnt, ret; + void *p; + + // Open the file in the environment. + DbMpoolFile *mfp; + + if ((ret = memp_fcreate(&mfp, 0)) != 0) { + cerr << "MpoolExample: memp_fcreate failed: " + << strerror(ret) << "\n"; + return (EXIT_FAILURE); + } + mfp->open(MPOOL, 0, 0, pagesize); + + cout << "retrieve " << hits << " random pages... "; + + srand((unsigned int)time(NULL)); + for (cnt = 0; cnt < hits; ++cnt) { + pageno = (rand() % npages) + 1; + if ((ret = mfp->get(&pageno, NULL, 0, &p)) != 0) { + cerr << "MpoolExample: unable to retrieve page " + << (unsigned long)pageno << ": " + << strerror(ret) << "\n"; + return (EXIT_FAILURE); + } + if (*(db_pgno_t *)p != pageno) { + cerr << "MpoolExample: wrong page retrieved (" + << (unsigned long)pageno << " != " + << *(int *)p << ")\n"; + return (EXIT_FAILURE); + } + if ((ret = mfp->put(p, DB_PRIORITY_UNCHANGED, 0)) != 0) { + cerr << "MpoolExample: unable to return page " + << (unsigned long)pageno << ": " + << strerror(ret) << "\n"; + return (EXIT_FAILURE); + } + } + + cout << "successful.\n"; + + // Close the pool. + if ((ret = close(0)) != 0) { + cerr << "MpoolExample: " << strerror(ret) << "\n"; + return (EXIT_FAILURE); + } + return (EXIT_SUCCESS); +} diff --git a/examples/cxx/SequenceExample.cpp b/examples/cxx/SequenceExample.cpp new file mode 100644 index 00000000..52984340 --- /dev/null +++ b/examples/cxx/SequenceExample.cpp @@ -0,0 +1,133 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include +#include + +#ifdef _WIN32 +extern "C" { + extern int getopt(int, char * const *, const char *); + extern int optind; +} +#else +#include +#endif + +#include + +#define DATABASE "sequence.db" +#define SEQUENCE "my_sequence" + +using std::cout; +using std::cerr; + +class SequenceExample +{ +public: + SequenceExample(); + void run(bool removeExistingDatabase, const char *fileName); + +private: + // no need for copy and assignment + SequenceExample(const SequenceExample &); + void operator = (const SequenceExample &); +}; + +int +usage() +{ + (void)fprintf(stderr, "usage: SequenceExample [-r] [database]\n"); + return (EXIT_FAILURE); +} + +int +main(int argc, char *argv[]) +{ + int ch, rflag; + const char *database; + + rflag = 0; + while ((ch = getopt(argc, argv, "r")) != EOF) + switch (ch) { + case 'r': + rflag = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + /* Accept optional database name. */ + database = *argv == NULL ? DATABASE : argv[0]; + + // Use a try block just to report any errors. + // An alternate approach to using exceptions is to + // use error models (see DbEnv::set_error_model()) so + // that error codes are returned for all Berkeley DB methods. + // + try { + SequenceExample app; + app.run((bool)(rflag == 1 ? true : false), database); + return (EXIT_SUCCESS); + } + catch (DbException &dbe) { + cerr << "SequenceExample: " << dbe.what() << "\n"; + return (EXIT_FAILURE); + } +} + +SequenceExample::SequenceExample() +{ +} + +void SequenceExample::run(bool removeExistingDatabase, const char *fileName) +{ + // Remove the previous database. + if (removeExistingDatabase) + (void)remove(fileName); + + // Create the database object. + // There is no environment for this simple example. + Db db(0, 0); + + db.set_error_stream(&cerr); + db.set_errpfx("SequenceExample"); + db.open(NULL, fileName, NULL, DB_BTREE, DB_CREATE, 0664); + + // We put a try block around this section of code + // to ensure that our database is properly closed + // in the event of an error. + // + try { + Dbt key((void *)SEQUENCE, (u_int32_t)strlen(SEQUENCE)); + DbSequence seq(&db, 0); + seq.open(0, &key, DB_CREATE); + + for (int i = 0; i < 10; i++) { + db_seq_t seqnum; + seq.get(0, 1, &seqnum, 0); + + // We don't have a portable way to print 64-bit numbers. + cout << "Got sequence number (" << + (int)(seqnum >> 32) << ", " << (unsigned)seqnum << + ")\n"; + } + + seq.close(0); + } catch (DbException &dbe) { + cerr << "SequenceExample: " << dbe.what() << "\n"; + } + + db.close(0); +} diff --git a/examples/cxx/TpcbExample.cpp b/examples/cxx/TpcbExample.cpp new file mode 100644 index 00000000..28fa4691 --- /dev/null +++ b/examples/cxx/TpcbExample.cpp @@ -0,0 +1,646 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include + +using std::cout; +using std::cerr; + +typedef enum { ACCOUNT, BRANCH, TELLER } FTYPE; + +static int invarg(int, char *); +u_int32_t random_id(FTYPE, u_int32_t, u_int32_t, u_int32_t); +u_int32_t random_int(u_int32_t, u_int32_t); +static int usage(void); + +int verbose; +const char *progname = "TpcbExample"; // Program name. + +class TpcbExample : public DbEnv +{ +public: + void populate(int, int, int, int); + void run(int, int, int, int); + int txn(Db *, Db *, Db *, Db *, + int, int, int); + void populateHistory(Db *, int, u_int32_t, u_int32_t, u_int32_t); + void populateTable(Db *, u_int32_t, u_int32_t, int, const char *); + + // Note: the constructor creates a DbEnv(), which is + // not fully initialized until the DbEnv::open() method + // is called. + // + TpcbExample(const char *home, int cachesize, int flags); + +private: + static const char FileName[]; + + // no need for copy and assignment + TpcbExample(const TpcbExample &); + void operator = (const TpcbExample &); +}; + +// +// This program implements a basic TPC/B driver program. To create the +// TPC/B database, run with the -i (init) flag. The number of records +// with which to populate the account, history, branch, and teller tables +// is specified by the a, s, b, and t flags respectively. To run a TPC/B +// test, use the n flag to indicate a number of transactions to run (note +// that you can run many of these processes in parallel to simulate a +// multiuser test run). +// +#define TELLERS_PER_BRANCH 100 +#define ACCOUNTS_PER_TELLER 1000 +#define HISTORY_PER_BRANCH 2592000 + +/* + * The default configuration that adheres to TPCB scaling rules requires + * nearly 3 GB of space. To avoid requiring that much space for testing, + * we set the parameters much lower. If you want to run a valid 10 TPS + * configuration, define VALID_SCALING. + */ +#ifdef VALID_SCALING +#define ACCOUNTS 1000000 +#define BRANCHES 10 +#define TELLERS 100 +#define HISTORY 25920000 +#endif + +#ifdef TINY +#define ACCOUNTS 1000 +#define BRANCHES 10 +#define TELLERS 100 +#define HISTORY 10000 +#endif + +#if !defined(VALID_SCALING) && !defined(TINY) +#define ACCOUNTS 100000 +#define BRANCHES 10 +#define TELLERS 100 +#define HISTORY 259200 +#endif + +#define HISTORY_LEN 100 +#define RECLEN 100 +#define BEGID 1000000 + +struct Defrec { + u_int32_t id; + u_int32_t balance; + u_int8_t pad[RECLEN - sizeof(u_int32_t) - sizeof(u_int32_t)]; +}; + +struct Histrec { + u_int32_t aid; + u_int32_t bid; + u_int32_t tid; + u_int32_t amount; + u_int8_t pad[RECLEN - 4 * sizeof(u_int32_t)]; +}; + +int +main(int argc, char *argv[]) +{ + unsigned long seed; + int accounts, branches, tellers, history; + int iflag, mpool, ntxns, txn_no_sync; + const char *home; + char *endarg; + + home = "TESTDIR"; + accounts = branches = history = tellers = 0; + txn_no_sync = 0; + mpool = ntxns = 0; + verbose = 0; + iflag = 0; + seed = (unsigned long)time(NULL); + + for (int i = 1; i < argc; ++i) { + + if (strcmp(argv[i], "-a") == 0) { + // Number of account records + if ((accounts = atoi(argv[++i])) <= 0) + return (invarg('a', argv[i])); + } + else if (strcmp(argv[i], "-b") == 0) { + // Number of branch records + if ((branches = atoi(argv[++i])) <= 0) + return (invarg('b', argv[i])); + } + else if (strcmp(argv[i], "-c") == 0) { + // Cachesize in bytes + if ((mpool = atoi(argv[++i])) <= 0) + return (invarg('c', argv[i])); + } + else if (strcmp(argv[i], "-f") == 0) { + // Fast mode: no txn sync. + txn_no_sync = 1; + } + else if (strcmp(argv[i], "-h") == 0) { + // DB home. + home = argv[++i]; + } + else if (strcmp(argv[i], "-i") == 0) { + // Initialize the test. + iflag = 1; + } + else if (strcmp(argv[i], "-n") == 0) { + // Number of transactions + if ((ntxns = atoi(argv[++i])) <= 0) + return (invarg('n', argv[i])); + } + else if (strcmp(argv[i], "-S") == 0) { + // Random number seed. + seed = strtoul(argv[++i], &endarg, 0); + if (*endarg != '\0') + return (invarg('S', argv[i])); + } + else if (strcmp(argv[i], "-s") == 0) { + // Number of history records + if ((history = atoi(argv[++i])) <= 0) + return (invarg('s', argv[i])); + } + else if (strcmp(argv[i], "-t") == 0) { + // Number of teller records + if ((tellers = atoi(argv[++i])) <= 0) + return (invarg('t', argv[i])); + } + else if (strcmp(argv[i], "-v") == 0) { + // Verbose option. + verbose = 1; + } + else { + return (usage()); + } + } + + srand((unsigned int)seed); + + accounts = accounts == 0 ? ACCOUNTS : accounts; + branches = branches == 0 ? BRANCHES : branches; + tellers = tellers == 0 ? TELLERS : tellers; + history = history == 0 ? HISTORY : history; + + if (verbose) + cout << (long)accounts << " Accounts, " + << (long)branches << " Branches, " + << (long)tellers << " Tellers, " + << (long)history << " History\n"; + + try { + // Initialize the database environment. + // Must be done in within a try block, unless you + // change the error model in the environment options. + // + TpcbExample app(home, mpool, txn_no_sync ? DB_TXN_NOSYNC : 0); + + if (iflag) { + if (ntxns != 0) + return (usage()); + app.populate(accounts, branches, history, tellers); + } + else { + if (ntxns == 0) + return (usage()); + app.run(ntxns, accounts, branches, tellers); + } + + app.close(0); + return (EXIT_SUCCESS); + } + catch (DbException &dbe) { + cerr << "TpcbExample: " << dbe.what() << "\n"; + return (EXIT_FAILURE); + } +} + +static int +invarg(int arg, char *str) +{ + cerr << "TpcbExample: invalid argument for -" + << (char)arg << ": " << str << "\n"; + return (EXIT_FAILURE); +} + +static int +usage() +{ + cerr << "usage: TpcbExample [-fiv] [-a accounts] [-b branches]\n" + << " [-c cachesize] [-h home] [-n transactions ]\n" + << " [-S seed] [-s history] [-t tellers]\n"; + return (EXIT_FAILURE); +} + +TpcbExample::TpcbExample(const char *home, int cachesize, int flags) +: DbEnv(0) +{ + u_int32_t local_flags; + + set_error_stream(&cerr); + set_errpfx("TpcbExample"); + (void)set_lk_detect(DB_LOCK_DEFAULT); + (void)set_cachesize(0, cachesize == 0 ? + 4 * 1024 * 1024 : (u_int32_t)cachesize, 0); + + if (flags & (DB_TXN_NOSYNC)) + set_flags(DB_TXN_NOSYNC, 1); + flags &= ~(DB_TXN_NOSYNC); + + local_flags = flags | DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN; + open(home, local_flags, 0); +} + +// +// Initialize the database to the specified number of accounts, branches, +// history records, and tellers. +// +void +TpcbExample::populate(int accounts, int branches, int history, int tellers) +{ + Db *dbp; + + int err; + u_int32_t balance, idnum; + u_int32_t end_anum, end_bnum, end_tnum; + u_int32_t start_anum, start_bnum, start_tnum; + + idnum = BEGID; + balance = 500000; + + dbp = new Db(this, 0); + dbp->set_h_nelem((unsigned int)accounts); + + if ((err = dbp->open(NULL, "account", NULL, DB_HASH, + DB_CREATE, 0644)) != 0) { + DbException except("Account file create failed", err); + throw except; + } + + start_anum = idnum; + populateTable(dbp, idnum, balance, accounts, "account"); + idnum += accounts; + end_anum = idnum - 1; + if ((err = dbp->close(0)) != 0) { + DbException except("Account file close failed", err); + throw except; + } + delete dbp; + if (verbose) + cout << "Populated accounts: " + << (long)start_anum << " - " << (long)end_anum << "\n"; + + dbp = new Db(this, 0); + // + // Since the number of branches is very small, we want to use very + // small pages and only 1 key per page. This is the poor-man's way + // of getting key locking instead of page locking. + // + dbp->set_h_ffactor(1); + dbp->set_h_nelem((unsigned int)branches); + dbp->set_pagesize(512); + + if ((err = dbp->open(NULL, "branch", NULL, DB_HASH, + DB_CREATE, 0644)) != 0) { + DbException except("Branch file create failed", err); + throw except; + } + start_bnum = idnum; + populateTable(dbp, idnum, balance, branches, "branch"); + idnum += branches; + end_bnum = idnum - 1; + if ((err = dbp->close(0)) != 0) { + DbException except("Close of branch file failed", err); + throw except; + } + delete dbp; + + if (verbose) + cout << "Populated branches: " + << (long)start_bnum << " - " << (long)end_bnum << "\n"; + + dbp = new Db(this, 0); + // + // In the case of tellers, we also want small pages, but we'll let + // the fill factor dynamically adjust itself. + // + dbp->set_h_ffactor(0); + dbp->set_h_nelem((unsigned int)tellers); + dbp->set_pagesize(512); + + if ((err = dbp->open(NULL, "teller", NULL, DB_HASH, + DB_CREATE, 0644)) != 0) { + DbException except("Teller file create failed", err); + throw except; + } + + start_tnum = idnum; + populateTable(dbp, idnum, balance, tellers, "teller"); + idnum += tellers; + end_tnum = idnum - 1; + if ((err = dbp->close(0)) != 0) { + DbException except("Close of teller file failed", err); + throw except; + } + delete dbp; + if (verbose) + cout << "Populated tellers: " + << (long)start_tnum << " - " << (long)end_tnum << "\n"; + + dbp = new Db(this, 0); + dbp->set_re_len(HISTORY_LEN); + if ((err = dbp->open(NULL, "history", NULL, DB_RECNO, + DB_CREATE, 0644)) != 0) { + DbException except("Create of history file failed", err); + throw except; + } + + populateHistory(dbp, history, accounts, branches, tellers); + if ((err = dbp->close(0)) != 0) { + DbException except("Close of history file failed", err); + throw except; + } + delete dbp; +} + +void +TpcbExample::populateTable(Db *dbp, + u_int32_t start_id, u_int32_t balance, + int nrecs, const char *msg) +{ + Defrec drec; + memset(&drec.pad[0], 1, sizeof(drec.pad)); + + Dbt kdbt(&drec.id, sizeof(u_int32_t)); + Dbt ddbt(&drec, sizeof(drec)); + + for (int i = 0; i < nrecs; i++) { + drec.id = start_id + (u_int32_t)i; + drec.balance = balance; + int err; + if ((err = + dbp->put(NULL, &kdbt, &ddbt, DB_NOOVERWRITE)) != 0) { + cerr << "Failure initializing " << msg << " file: " + << strerror(err) << "\n"; + DbException except("failure initializing file", err); + throw except; + } + } +} + +void +TpcbExample::populateHistory(Db *dbp, int nrecs, u_int32_t accounts, + u_int32_t branches, u_int32_t tellers) +{ + Histrec hrec; + memset(&hrec.pad[0], 1, sizeof(hrec.pad)); + hrec.amount = 10; + db_recno_t key; + + Dbt kdbt(&key, sizeof(u_int32_t)); + Dbt ddbt(&hrec, sizeof(hrec)); + + for (int i = 1; i <= nrecs; i++) { + hrec.aid = random_id(ACCOUNT, accounts, branches, tellers); + hrec.bid = random_id(BRANCH, accounts, branches, tellers); + hrec.tid = random_id(TELLER, accounts, branches, tellers); + + int err; + key = (db_recno_t)i; + if ((err = dbp->put(NULL, &kdbt, &ddbt, DB_APPEND)) != 0) { + DbException except("failure initializing history file", + err); + throw except; + } + } +} + +u_int32_t +random_int(u_int32_t lo, u_int32_t hi) +{ + u_int32_t ret; + int t; + + t = rand(); + ret = (u_int32_t)(((double)t / ((double)(RAND_MAX) + 1)) * + (hi - lo + 1)); + ret += lo; + return (ret); +} + +u_int32_t +random_id(FTYPE type, u_int32_t accounts, u_int32_t branches, u_int32_t tellers) +{ + u_int32_t min, max, num; + + max = min = BEGID; + num = accounts; + switch (type) { + case TELLER: + min += branches; + num = tellers; + // Fallthrough + case BRANCH: + if (type == BRANCH) + num = branches; + min += accounts; + // Fallthrough + case ACCOUNT: + max = min + num - 1; + } + return (random_int(min, max)); +} + +void +TpcbExample::run(int n, int accounts, int branches, int tellers) +{ + Db *adb, *bdb, *hdb, *tdb; + int failed, ret, txns; + time_t start_time, end_time; + + // + // Open the database files. + // + + int err; + adb = new Db(this, 0); + if ((err = adb->open(NULL, "account", NULL, DB_UNKNOWN, + DB_AUTO_COMMIT, 0)) != 0) { + DbException except("Open of account file failed", err); + throw except; + } + + bdb = new Db(this, 0); + if ((err = bdb->open(NULL, "branch", NULL, DB_UNKNOWN, + DB_AUTO_COMMIT, 0)) != 0) { + DbException except("Open of branch file failed", err); + throw except; + } + + tdb = new Db(this, 0); + if ((err = tdb->open(NULL, "teller", NULL, DB_UNKNOWN, + DB_AUTO_COMMIT, 0)) != 0) { + DbException except("Open of teller file failed", err); + throw except; + } + + hdb = new Db(this, 0); + if ((err = hdb->open(NULL, "history", NULL, DB_UNKNOWN, + DB_AUTO_COMMIT, 0)) != 0) { + DbException except("Open of history file failed", err); + throw except; + } + + (void)time(&start_time); + for (txns = n, failed = 0; n-- > 0;) + if ((ret = txn(adb, bdb, tdb, hdb, + accounts, branches, tellers)) != 0) + ++failed; + (void)time(&end_time); + if (end_time == start_time) + ++end_time; + // We use printf because it provides much simpler + // formatting than iostreams. + // + printf("%s: %d txns: %d failed, %.2f TPS\n", progname, txns, failed, + (txns - failed) / (double)(end_time - start_time)); + + (void)adb->close(0); + (void)bdb->close(0); + (void)tdb->close(0); + (void)hdb->close(0); +} + +// +// XXX Figure out the appropriate way to pick out IDs. +// +int +TpcbExample::txn(Db *adb, Db *bdb, Db *tdb, Db *hdb, + int accounts, int branches, int tellers) +{ + Dbc *acurs = NULL; + Dbc *bcurs = NULL; + Dbc *tcurs = NULL; + DbTxn *t = NULL; + + db_recno_t key; + Defrec rec; + Histrec hrec; + int account, branch, teller, ret; + + Dbt d_dbt; + Dbt d_histdbt; + Dbt k_dbt; + Dbt k_histdbt(&key, sizeof(key)); + + // !!! + // This is sample code -- we could move a lot of this into the driver + // to make it faster. + // + account = random_id(ACCOUNT, accounts, branches, tellers); + branch = random_id(BRANCH, accounts, branches, tellers); + teller = random_id(TELLER, accounts, branches, tellers); + + k_dbt.set_size(sizeof(int)); + + d_dbt.set_flags(DB_DBT_USERMEM); + d_dbt.set_data(&rec); + d_dbt.set_ulen(sizeof(rec)); + + hrec.aid = account; + hrec.bid = branch; + hrec.tid = teller; + hrec.amount = 10; + // Request 0 bytes since we're just positioning. + d_histdbt.set_flags(DB_DBT_PARTIAL); + + // START PER-TRANSACTION TIMING. + // + // Technically, TPCB requires a limit on response time, you only get + // to count transactions that complete within 2 seconds. That's not + // an issue for this sample application -- regardless, here's where + // the transaction begins. + if (txn_begin(NULL, &t, 0) != 0) + goto err; + + if (adb->cursor(t, &acurs, 0) != 0 || + bdb->cursor(t, &bcurs, 0) != 0 || + tdb->cursor(t, &tcurs, 0) != 0) + goto err; + + try { + // Account record + k_dbt.set_data(&account); + if (acurs->get(&k_dbt, &d_dbt, DB_SET) != 0) + goto err; + rec.balance += 10; + if (acurs->put(&k_dbt, &d_dbt, DB_CURRENT) != 0) + goto err; + + // Branch record + k_dbt.set_data(&branch); + if (bcurs->get(&k_dbt, &d_dbt, DB_SET) != 0) + goto err; + rec.balance += 10; + if (bcurs->put(&k_dbt, &d_dbt, DB_CURRENT) != 0) + goto err; + + // Teller record + k_dbt.set_data(&teller); + if (tcurs->get(&k_dbt, &d_dbt, DB_SET) != 0) + goto err; + rec.balance += 10; + if (tcurs->put(&k_dbt, &d_dbt, DB_CURRENT) != 0) + goto err; + + // History record + d_histdbt.set_flags(0); + d_histdbt.set_data(&hrec); + d_histdbt.set_ulen(sizeof(hrec)); + if (hdb->put(t, &k_histdbt, &d_histdbt, DB_APPEND) != 0) + goto err; + } catch (DbDeadlockException e) { + goto err; + } + + if (acurs->close() != 0 || bcurs->close() != 0 || tcurs->close() != 0) + goto err; + + ret = t->commit(0); + t = NULL; + if (ret != 0) + goto err; + + // END PER-TRANSACTION TIMING. + return (0); + +err: + if (acurs != NULL) + (void)acurs->close(); + if (bcurs != NULL) + (void)bcurs->close(); + if (tcurs != NULL) + (void)tcurs->close(); + if (t != NULL) + (void)t->abort(); + + if (verbose) + cout << "Transaction A=" << (long)account + << " B=" << (long)branch + << " T=" << (long)teller << " failed\n"; + return (-1); +} diff --git a/examples/cxx/excxx_repquote/RepConfigInfo.cpp b/examples/cxx/excxx_repquote/RepConfigInfo.cpp new file mode 100644 index 00000000..45195d57 --- /dev/null +++ b/examples/cxx/excxx_repquote/RepConfigInfo.cpp @@ -0,0 +1,58 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include +#include + +#include "RepConfigInfo.h" + +RepConfigInfo::RepConfigInfo() +{ + start_policy = DB_REP_ELECTION; + home = NULL; + got_listen_address = false; + nrsites = 0; + priority = 100; + verbose = false; + this_host.creator = false; + other_hosts = NULL; + ack_policy = DB_REPMGR_ACKS_QUORUM; + bulk = false; +} + +RepConfigInfo::~RepConfigInfo() +{ + // release any other_hosts structs. + if (other_hosts != NULL) { + REP_HOST_INFO *CurItem = other_hosts; + while (CurItem->next != NULL) { + REP_HOST_INFO *TmpItem = CurItem->next; + free(CurItem); + CurItem = TmpItem; + } + free(CurItem); + } + other_hosts = NULL; +} + +void RepConfigInfo::addOtherHost(char* host, int port, bool peer) +{ + REP_HOST_INFO *newinfo; + newinfo = (REP_HOST_INFO*)malloc(sizeof(REP_HOST_INFO)); + newinfo->host = host; + newinfo->port = port; + newinfo->peer = peer; + if (other_hosts == NULL) { + other_hosts = newinfo; + newinfo->next = NULL; + } else { + newinfo->next = other_hosts; + other_hosts = newinfo; + } + nrsites++; +} diff --git a/examples/cxx/excxx_repquote/RepConfigInfo.h b/examples/cxx/excxx_repquote/RepConfigInfo.h new file mode 100644 index 00000000..ba12fd94 --- /dev/null +++ b/examples/cxx/excxx_repquote/RepConfigInfo.h @@ -0,0 +1,37 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include + +// Chainable struct used to store host information. +typedef struct RepHostInfoObj{ + bool creator; + char* host; + int port; + bool peer; // only relevant for "other" hosts + RepHostInfoObj* next; // used for chaining multiple "other" hosts. +} REP_HOST_INFO; + +class RepConfigInfo { +public: + RepConfigInfo(); + virtual ~RepConfigInfo(); + + void addOtherHost(char* host, int port, bool peer); +public: + u_int32_t start_policy; + char* home; + bool got_listen_address; + REP_HOST_INFO this_host; + int nrsites; + int priority; + bool verbose; + // used to store a set of optional other hosts. + REP_HOST_INFO *other_hosts; + int ack_policy; + bool bulk; +}; diff --git a/examples/cxx/excxx_repquote/RepQuoteExample.cpp b/examples/cxx/excxx_repquote/RepQuoteExample.cpp new file mode 100644 index 00000000..f06f1c90 --- /dev/null +++ b/examples/cxx/excxx_repquote/RepQuoteExample.cpp @@ -0,0 +1,827 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * In this application, we specify all communication via the command line. In + * a real application, we would expect that information about the other sites + * in the system would be maintained in some sort of configuration file. The + * critical part of this interface is that we assume at startup that we can + * find out + * 1) what our Berkeley DB home environment is, + * 2) what host/port we wish to listen on for connections; and + * 3) an optional list of other sites we should attempt to connect to. + * + * These pieces of information are expressed by the following flags. + * -h home (required; h stands for home directory) + * -l host:port (required unless -L is specified; l stands for local) + * -L host:port (optional, L means group creator) + * -C or -M (optional; start up as client or master) + * -r host:port (optional; r stands for remote; any number of these may be + * specified) + * -R host:port (optional; R stands for remote peer; only one of these may + * be specified) + * -a all|quorum (optional; a stands for ack policy) + * -b (optional; b stands for bulk) + * -p priority (optional; defaults to 100) + * -v (optional; v stands for verbose) + */ + +#include +#include + +#include +#include +#include + +#include +#include "RepConfigInfo.h" +#include "dbc_auto.h" + +using std::cout; +using std::cin; +using std::cerr; +using std::endl; +using std::flush; +using std::istream; +using std::istringstream; +using std::string; +using std::getline; + +#define CACHESIZE (10 * 1024 * 1024) +#define DATABASE "quote.db" + +const char *progname = "excxx_repquote"; + +#include +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#define snprintf _snprintf +#define sleep(s) Sleep(1000 * (s)) + +extern "C" { + extern int getopt(int, char * const *, const char *); + extern char *optarg; +} + +typedef HANDLE thread_t; +typedef DWORD thread_exit_status_t; +#define thread_create(thrp, attr, func, arg) \ + (((*(thrp) = CreateThread(NULL, 0, \ + (LPTHREAD_START_ROUTINE)(func), (arg), 0, NULL)) == NULL) ? -1 : 0) +#define thread_join(thr, statusp) \ + ((WaitForSingleObject((thr), INFINITE) == WAIT_OBJECT_0) && \ + GetExitCodeThread((thr), (LPDWORD)(statusp)) ? 0 : -1) +#else /* !_WIN32 */ +#include + +typedef pthread_t thread_t; +typedef void* thread_exit_status_t; +#define thread_create(thrp, attr, func, arg) \ + pthread_create((thrp), (attr), (func), (arg)) +#define thread_join(thr, statusp) pthread_join((thr), (statusp)) +#endif + +// Struct used to store information in Db app_private field. +typedef struct { + bool app_finished; + bool in_client_sync; + bool is_master; + bool verbose; +} APP_DATA; + +static void log(const char *); +void *checkpoint_thread (void *); +void *log_archive_thread (void *); + +class RepQuoteExample { +public: + RepQuoteExample(); + void init(RepConfigInfo* config); + void doloop(); + int terminate(); + + static void event_callback(DbEnv* dbenv, u_int32_t which, void *info); + +private: + // disable copy constructor. + RepQuoteExample(const RepQuoteExample &); + void operator = (const RepQuoteExample &); + + // internal data members. + APP_DATA app_data; + RepConfigInfo *app_config; + DbEnv cur_env; + thread_t ckp_thr; + thread_t lga_thr; + + // private methods. + void print_stocks(Db *dbp); + void prompt(); +}; + +class DbHolder { +public: + DbHolder(DbEnv *env) : env(env) { + dbp = 0; + } + + ~DbHolder() { + try { + close(); + } catch (...) { + // Ignore: this may mean another exception is pending + } + } + + bool ensure_open(bool creating) { + if (dbp) + return (true); + dbp = new Db(env, 0); + + u_int32_t flags = DB_AUTO_COMMIT; + if (creating) + flags |= DB_CREATE; + try { + dbp->open(NULL, DATABASE, NULL, DB_BTREE, flags, 0); + return (true); + } catch (DbDeadlockException e) { + } catch (DbRepHandleDeadException e) { + } catch (DbException e) { + if (e.get_errno() == DB_REP_LOCKOUT) { + // Just fall through. + } else if (e.get_errno() == ENOENT && !creating) { + // Provide a bit of extra explanation. + log("Stock DB does not yet exist"); + } else + throw; + } + + // (All retryable errors fall through to here.) + log("please retry the operation"); + close(); + return (false); + } + + void close() { + if (dbp) { + try { + dbp->close(0); + delete dbp; + dbp = 0; + } catch (...) { + delete dbp; + dbp = 0; + throw; + } + } + } + + operator Db *() { + return dbp; + } + + Db *operator->() { + return dbp; + } + +private: + Db *dbp; + DbEnv *env; +}; + +class StringDbt : public Dbt { +public: +#define GET_STRING_OK 0 +#define GET_STRING_INVALID_PARAM 1 +#define GET_STRING_SMALL_BUFFER 2 +#define GET_STRING_EMPTY_DATA 3 + int get_string(char **buf, size_t buf_len) + { + size_t copy_len; + int ret = GET_STRING_OK; + if (buf == NULL) { + cerr << "Invalid input buffer to get_string" << endl; + return GET_STRING_INVALID_PARAM; + } + + // make sure the string is null terminated. + memset(*buf, 0, buf_len); + + // if there is no string, just return. + if (get_data() == NULL || get_size() == 0) + return GET_STRING_OK; + + if (get_size() >= buf_len) { + ret = GET_STRING_SMALL_BUFFER; + copy_len = buf_len - 1; // save room for a terminator. + } else + copy_len = get_size(); + memcpy(*buf, get_data(), copy_len); + + return ret; + } + size_t get_string_length() + { + if (get_size() == 0) + return 0; + return strlen((char *)get_data()); + } + void set_string(char *string) + { + set_data(string); + set_size((u_int32_t)strlen(string)); + } + + StringDbt(char *string) : + Dbt(string, (u_int32_t)strlen(string)) {}; + StringDbt() : Dbt() {}; + ~StringDbt() {}; + + // Don't add extra data to this sub-class since we want it to remain + // compatible with Dbt objects created internally by Berkeley DB. +}; + +RepQuoteExample::RepQuoteExample() : app_config(0), cur_env(0) { + app_data.app_finished = 0; + app_data.in_client_sync = 0; + app_data.is_master = 0; // assume I start out as client + app_data.verbose = 0; +} + +void RepQuoteExample::init(RepConfigInfo *config) { + DbSite *dbsite; + int i; + + app_config = config; + + cur_env.set_app_private(&app_data); + cur_env.set_errfile(stderr); + cur_env.set_errpfx(progname); + cur_env.set_event_notify(event_callback); + + // Configure bulk transfer to send groups of records to clients + // in a single network transfer. This is useful for master sites + // and clients participating in client-to-client synchronization. + // + if (app_config->bulk) + cur_env.rep_set_config(DB_REP_CONF_BULK, 1); + + // Turn on debugging and informational output if requested. + if (app_config->verbose) { + cur_env.set_verbose(DB_VERB_REPLICATION, 1); + app_data.verbose = 1; + } + + // Set replication group election priority for this environment. + // An election first selects the site with the most recent log + // records as the new master. If multiple sites have the most + // recent log records, the site with the highest priority value + // is selected as master. + // + cur_env.rep_set_priority(app_config->priority); + + // Set the policy that determines how master and client sites + // handle acknowledgement of replication messages needed for + // permanent records. The default policy of "quorum" requires only + // a quorum of electable peers sufficient to ensure a permanent + // record remains durable if an election is held. The "all" option + // requires all clients to acknowledge a permanent replication + // message instead. + // + cur_env.repmgr_set_ack_policy(app_config->ack_policy); + + // Set the threshold for the minimum and maximum time the client + // waits before requesting retransmission of a missing message. + // Base these values on the performance and load characteristics + // of the master and client host platforms as well as the round + // trip message time. + // + cur_env.rep_set_request(20000, 500000); + + // Configure deadlock detection to ensure that any deadlocks + // are broken by having one of the conflicting lock requests + // rejected. DB_LOCK_DEFAULT uses the lock policy specified + // at environment creation time or DB_LOCK_RANDOM if none was + // specified. + // + cur_env.set_lk_detect(DB_LOCK_DEFAULT); + + // The following base replication features may also be useful to your + // application. See Berkeley DB documentation for more details. + // - Master leases: Provide stricter consistency for data reads + // on a master site. + // - Timeouts: Customize the amount of time Berkeley DB waits + // for such things as an election to be concluded or a master + // lease to be granted. + // - Delayed client synchronization: Manage the master site's + // resources by spreading out resource-intensive client + // synchronizations. + // - Blocked client operations: Return immediately with an error + // instead of waiting indefinitely if a client operation is + // blocked by an ongoing client synchronization. + cur_env.repmgr_site(app_config->this_host.host, + app_config->this_host.port, &dbsite, 0); + dbsite->set_config(DB_LOCAL_SITE, 1); + if (app_config->this_host.creator) + dbsite->set_config(DB_GROUP_CREATOR, 1); + dbsite->close(); + + i = 1; + for ( REP_HOST_INFO *cur = app_config->other_hosts; + cur != NULL && i <= app_config->nrsites; + cur = cur->next, i++) { + cur_env.repmgr_site(cur->host, cur->port, &dbsite, 0); + dbsite->set_config(DB_BOOTSTRAP_HELPER, 1); + if (cur->peer) + dbsite->set_config(DB_REPMGR_PEER, 1); + dbsite->close(); + } + + // Configure heartbeat timeouts so that repmgr monitors the + // health of the TCP connection. Master sites broadcast a heartbeat + // at the frequency specified by the DB_REP_HEARTBEAT_SEND timeout. + // Client sites wait for message activity the length of the + // DB_REP_HEARTBEAT_MONITOR timeout before concluding that the + // connection to the master is lost. The DB_REP_HEARTBEAT_MONITOR + // timeout should be longer than the DB_REP_HEARTBEAT_SEND timeout. + // + cur_env.rep_set_timeout(DB_REP_HEARTBEAT_SEND, 5000000); + cur_env.rep_set_timeout(DB_REP_HEARTBEAT_MONITOR, 10000000); + + // The following repmgr features may also be useful to your + // application. See Berkeley DB documentation for more details. + // - Two-site strict majority rule - In a two-site replication + // group, require both sites to be available to elect a new + // master. + // - Timeouts - Customize the amount of time repmgr waits + // for such things as waiting for acknowledgements or attempting + // to reconnect to other sites. + // - Site list - return a list of sites currently known to repmgr. + + // We can now open our environment, although we're not ready to + // begin replicating. However, we want to have a dbenv around + // so that we can send it into any of our message handlers. + // + cur_env.set_cachesize(0, CACHESIZE, 0); + cur_env.set_flags(DB_TXN_NOSYNC, 1); + + cur_env.open(app_config->home, DB_CREATE | DB_RECOVER | + DB_THREAD | DB_INIT_REP | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN, 0); + + // Start checkpoint and log archive support threads. + (void)thread_create(&ckp_thr, NULL, checkpoint_thread, &cur_env); + (void)thread_create(&lga_thr, NULL, log_archive_thread, &cur_env); + + cur_env.repmgr_start(3, app_config->start_policy); +} + +int RepQuoteExample::terminate() { + try { + // Wait for checkpoint and log archive threads to finish. + // Windows does not allow NULL pointer for exit code variable. + thread_exit_status_t exstat; + + (void)thread_join(lga_thr, &exstat); + (void)thread_join(ckp_thr, &exstat); + + // We have used the DB_TXN_NOSYNC environment flag for + // improved performance without the usual sacrifice of + // transactional durability, as discussed in the + // "Transactional guarantees" page of the Reference + // Guide: if one replication site crashes, we can + // expect the data to exist at another site. However, + // in case we shut down all sites gracefully, we push + // out the end of the log here so that the most + // recent transactions don't mysteriously disappear. + // + cur_env.log_flush(NULL); + + cur_env.close(0); + } catch (DbException dbe) { + cout << "error closing environment: " << dbe.what() << endl; + } + return 0; +} + +void RepQuoteExample::prompt() { + cout << "QUOTESERVER"; + if (!app_data.is_master) + cout << "(read-only)"; + cout << "> " << flush; +} + +void log(const char *msg) { + cerr << msg << endl; +} + +// Simple command-line user interface: +// - enter " " to insert or update a record in the +// database; +// - just press Return (i.e., blank input line) to print out the contents of +// the database; +// - enter "quit" or "exit" to quit. +// +void RepQuoteExample::doloop() { + DbHolder dbh(&cur_env); + + string input; + while (prompt(), getline(cin, input)) { + istringstream is(input); + string token1, token2; + + // Read 0, 1 or 2 tokens from the input. + // + int count = 0; + if (is >> token1) { + count++; + if (is >> token2) + count++; + } + + if (count == 1) { + if (token1 == "exit" || token1 == "quit") { + app_data.app_finished = 1; + break; + } else { + log("Format: "); + continue; + } + } + + // Here we know count is either 0 or 2, so we're about to try a + // DB operation. + // + // Open database with DB_CREATE only if this is a master + // database. A client database uses polling to attempt + // to open the database without DB_CREATE until it is + // successful. + // + // This DB_CREATE polling logic can be simplified under + // some circumstances. For example, if the application can + // be sure a database is already there, it would never need + // to open it with DB_CREATE. + // + if (!dbh.ensure_open(app_data.is_master)) + continue; + + try { + if (count == 0) + if (app_data.in_client_sync) + log( + "Cannot read data during client initialization - please try again."); + else + print_stocks(dbh); + else if (!app_data.is_master) + log("Can't update at client"); + else { + const char *symbol = token1.c_str(); + StringDbt key(const_cast(symbol)); + + const char *price = token2.c_str(); + StringDbt data(const_cast(price)); + + dbh->put(NULL, &key, &data, 0); + } + } catch (DbDeadlockException e) { + log("please retry the operation"); + dbh.close(); + } catch (DbRepHandleDeadException e) { + log("please retry the operation"); + dbh.close(); + } catch (DbException e) { + if (e.get_errno() == DB_REP_LOCKOUT) { + log("please retry the operation"); + dbh.close(); + } else + throw; + } + } + + dbh.close(); +} + +void RepQuoteExample::event_callback(DbEnv* dbenv, u_int32_t which, void *info) +{ + APP_DATA *app = (APP_DATA*)dbenv->get_app_private(); + + info = NULL; /* Currently unused. */ + + switch (which) { + case DB_EVENT_REP_CLIENT: + app->is_master = 0; + app->in_client_sync = 1; + break; + case DB_EVENT_REP_MASTER: + app->is_master = 1; + app->in_client_sync = 0; + break; + case DB_EVENT_REP_NEWMASTER: + app->in_client_sync = 1; + break; + case DB_EVENT_REP_PERM_FAILED: + // Did not get enough acks to guarantee transaction + // durability based on the configured ack policy. This + // transaction will be flushed to the master site's + // local disk storage for durability. + // + if (app->verbose) + log( +"EVENT: Insufficient acknowledgements to guarantee transaction durability."); + break; + + case DB_EVENT_REP_STARTUPDONE: + app->in_client_sync = 0; + break; + + case DB_EVENT_PANIC: + if (app->verbose) + log("EVENT: receive panic event"); + break; + + case DB_EVENT_REP_CONNECT_BROKEN: + if (app->verbose) + log("EVENT: connection is broken"); + break; + + case DB_EVENT_REP_DUPMASTER: + if (app->verbose) + log("EVENT: duplicate master"); + break; + + case DB_EVENT_REP_ELECTED: + if (app->verbose) + log("EVENT: election in replication group"); + break; + + case DB_EVENT_REP_CONNECT_ESTD: + if (app->verbose) + log("EVENT: establish connection"); + break; + + case DB_EVENT_REP_CONNECT_TRY_FAILED: + if (app->verbose) + log("EVENT: fail to try connection"); + break; + + case DB_EVENT_REP_INIT_DONE: + if (app->verbose) + log("EVENT: finish initialization"); + break; + + case DB_EVENT_REP_LOCAL_SITE_REMOVED: + if (app->verbose) + log("EVENT: remove local site"); + break; + + case DB_EVENT_REP_SITE_ADDED: + if (app->verbose) + log("EVENT: add site"); + break; + + case DB_EVENT_REP_SITE_REMOVED: + if (app->verbose) + log("EVENT: remote site"); + break; + + default: + dbenv->errx("ignoring event %d", which); + } +} + +void RepQuoteExample::print_stocks(Db *dbp) { + StringDbt key, data; +#define MAXKEYSIZE 10 +#define MAXDATASIZE 20 + char keybuf[MAXKEYSIZE + 1], databuf[MAXDATASIZE + 1]; + char *kbuf, *dbuf; + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + kbuf = keybuf; + dbuf = databuf; + + DbcAuto dbc(dbp, 0, 0); + cout << "\tSymbol\tPrice" << endl + << "\t======\t=====" << endl; + + for (int ret = dbc->get(&key, &data, DB_FIRST); + ret == 0; + ret = dbc->get(&key, &data, DB_NEXT)) { + key.get_string(&kbuf, MAXKEYSIZE); + data.get_string(&dbuf, MAXDATASIZE); + + cout << "\t" << keybuf << "\t" << databuf << endl; + } + cout << endl << flush; + dbc.close(); +} + +static void usage() { + cerr << "usage: " << progname << endl << " -h home -l|-L host:port" + << " [-C|M] [-r host:port] [-R host:port]" << endl + << " [-a all|quorum] [-b] [-p priority] [-v]" << endl; + + cerr << "\t -h home (required; h stands for home directory)" << endl + << "\t -l host:port (required unless -L is specified;" + << " l stands for local)" << endl + << "\t -L host:port (optional, L means group creator)" << endl + << "\t -C or -M (optional; start up as client or master)" << endl + << "\t -r host:port (optional; r stands for remote; any " + << "number of these" << endl + << "\t may be specified)" << endl + << "\t -R host:port (optional; R stands for remote peer; only " + << "one of" << endl + << "\t these may be specified)" << endl + << "\t -a all|quorum (optional; a stands for ack policy)" << endl + << "\t -b (optional; b stands for bulk)" << endl + << "\t -p priority (optional; defaults to 100)" << endl + << "\t -v (optional; v stands for verbose)" << endl; + + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) { + RepConfigInfo config; + char ch, *portstr, *tmphost; + int tmpport; + bool tmppeer; + + // Extract the command line parameters + while ((ch = getopt(argc, argv, "a:bCh:L:l:Mp:R:r:v")) != EOF) { + tmppeer = false; + switch (ch) { + case 'a': + if (strncmp(optarg, "all", 3) == 0) + config.ack_policy = DB_REPMGR_ACKS_ALL; + else if (strncmp(optarg, "quorum", 6) != 0) + usage(); + break; + case 'b': + config.bulk = true; + break; + case 'C': + config.start_policy = DB_REP_CLIENT; + break; + case 'h': + config.home = optarg; + break; + case 'L': + config.this_host.creator = true; // FALLTHROUGH + case 'l': + config.this_host.host = strtok(optarg, ":"); + if ((portstr = strtok(NULL, ":")) == NULL) { + cerr << "Bad host specification." << endl; + usage(); + } + config.this_host.port = (unsigned short)atoi(portstr); + config.got_listen_address = true; + break; + case 'M': + config.start_policy = DB_REP_MASTER; + break; + case 'p': + config.priority = atoi(optarg); + break; + case 'R': + tmppeer = true; // FALLTHROUGH + case 'r': + tmphost = strtok(optarg, ":"); + if ((portstr = strtok(NULL, ":")) == NULL) { + cerr << "Bad host specification." << endl; + usage(); + } + tmpport = (unsigned short)atoi(portstr); + + config.addOtherHost(tmphost, tmpport, tmppeer); + break; + case 'v': + config.verbose = true; + break; + case '?': + default: + usage(); + } + } + + // Error check command line. + if ((!config.got_listen_address) || config.home == NULL) + usage(); + + RepQuoteExample runner; + try { + runner.init(&config); + runner.doloop(); + } catch (DbException dbe) { + cerr << "Caught an exception during initialization or" + << " processing: " << dbe.what() << endl; + } + runner.terminate(); + return 0; +} + +// This is a very simple thread that performs checkpoints at a fixed +// time interval. For a master site, the time interval is one minute +// plus the duration of the checkpoint_delay timeout (30 seconds by +// default.) For a client site, the time interval is one minute. +// +void *checkpoint_thread(void *args) +{ + DbEnv *env; + APP_DATA *app; + int i, ret; + + env = (DbEnv *)args; + app = (APP_DATA *)env->get_app_private(); + + for (;;) { + // Wait for one minute, polling once per second to see if + // application has finished. When application has finished, + // terminate this thread. + // + for (i = 0; i < 60; i++) { + sleep(1); + if (app->app_finished == 1) + return ((void *)EXIT_SUCCESS); + } + + // Perform a checkpoint. + if ((ret = env->txn_checkpoint(0, 0, 0)) != 0) { + env->err(ret, "Could not perform checkpoint.\n"); + return ((void *)EXIT_FAILURE); + } + } +} + +// This is a simple log archive thread. Once per minute, it removes all but +// the most recent 3 logs that are safe to remove according to a call to +// DBENV->log_archive(). +// +// Log cleanup is needed to conserve disk space, but aggressive log cleanup +// can cause more frequent client initializations if a client lags too far +// behind the current master. This can happen in the event of a slow client, +// a network partition, or a new master that has not kept as many logs as the +// previous master. +// +// The approach in this routine balances the need to mitigate against a +// lagging client by keeping a few more of the most recent unneeded logs +// with the need to conserve disk space by regularly cleaning up log files. +// Use of automatic log removal (DBENV->log_set_config() DB_LOG_AUTO_REMOVE +// flag) is not recommended for replication due to the risk of frequent +// client initializations. +// +void *log_archive_thread(void *args) +{ + DbEnv *env; + APP_DATA *app; + char **begin, **list; + int i, listlen, logs_to_keep, minlog, ret; + + env = (DbEnv *)args; + app = (APP_DATA *)env->get_app_private(); + logs_to_keep = 3; + + for (;;) { + // Wait for one minute, polling once per second to see if + // application has finished. When application has finished, + // terminate this thread. + // + for (i = 0; i < 60; i++) { + sleep(1); + if (app->app_finished == 1) + return ((void *)EXIT_SUCCESS); + } + + // Get the list of unneeded log files. + if ((ret = env->log_archive(&list, DB_ARCH_ABS)) != 0) { + env->err(ret, "Could not get log archive list."); + return ((void *)EXIT_FAILURE); + } + if (list != NULL) { + listlen = 0; + // Get the number of logs in the list. + for (begin = list; *begin != NULL; begin++, listlen++); + // Remove all but the logs_to_keep most recent + // unneeded log files. + // + minlog = listlen - logs_to_keep; + for (begin = list, i= 0; i < minlog; list++, i++) { + if ((ret = unlink(*list)) != 0) { + env->err(ret, + "logclean: remove %s", *list); + env->errx( + "logclean: Error remove %s", *list); + free(begin); + return ((void *)EXIT_FAILURE); + } + } + free(begin); + } + } +} diff --git a/examples/cxx/excxx_repquote/dbc_auto.h b/examples/cxx/excxx_repquote/dbc_auto.h new file mode 100644 index 00000000..fc8518d5 --- /dev/null +++ b/examples/cxx/excxx_repquote/dbc_auto.h @@ -0,0 +1,65 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2008, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +/* + * Resource-acquisition-as-initialization pattern for Berkeley DB's cursors. + * + * Use DbcAuto instead of Berkeley DB's builtin Dbc class. The constructor + * allocates a new cursor, and it is freed automatically when it goes out of + * scope. + * + * Note that some care is required with the order in which Berkeley DB handles + * are closed. In particular, the cursor handle must be closed before any + * database or transaction handles the cursor references. In addition, the + * cursor close method can throw exceptions, which are masked by the destructor. + * + * For these reasons, you are strongly advised to call the DbcAuto::close + * method in the non-exceptional case. This class exists to ensure that + * cursors are closed if an exception occurs. + */ +class DbcAuto { +public: + DbcAuto(Db *db, DbTxn *txn, u_int32_t flags) { + db->cursor(txn, &dbc_, flags); + } + + ~DbcAuto() { + try { + close(); + } catch(...) { + // Ignore it, another exception is pending + } + } + + void close() { + if (dbc_) { + // Set the member to 0 before making the call in + // case an exception is thrown. + Dbc *tdbc = dbc_; + dbc_ = 0; + tdbc->close(); + } + } + + operator Dbc *() { + return dbc_; + } + + operator Dbc **() { + return &dbc_; + } + + Dbc *operator->() { + return dbc_; + } + +private: + Dbc *dbc_; +}; diff --git a/examples/cxx/excxx_repquote_gsg/RepConfigInfo.h b/examples/cxx/excxx_repquote_gsg/RepConfigInfo.h new file mode 100644 index 00000000..e9bdcc3c --- /dev/null +++ b/examples/cxx/excxx_repquote_gsg/RepConfigInfo.h @@ -0,0 +1,79 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include +#include + +#include + +// Chainable struct used to store host information. +typedef struct RepHostInfoObj{ + char* host; + u_int16_t port; + RepHostInfoObj* next; // used for chaining multiple "other" hosts. + bool creator; +} REP_HOST_INFO; + +class RepConfigInfo { +public: + RepConfigInfo(); + virtual ~RepConfigInfo(); + + void addOtherHost(char* host, int port); +public: + u_int32_t start_policy; + char* home; + bool got_listen_address; + REP_HOST_INFO this_host; + int nrsites; + int priority; + // used to store a set of optional other hosts. + REP_HOST_INFO *other_hosts; +}; + + +RepConfigInfo::RepConfigInfo() +{ + start_policy = DB_REP_ELECTION; + home = NULL; + got_listen_address = false; + nrsites = 0; + priority = 100; + other_hosts = NULL; +} + +RepConfigInfo::~RepConfigInfo() +{ + // release any other_hosts structs. + if (other_hosts != NULL) { + REP_HOST_INFO *CurItem = other_hosts; + while (CurItem->next != NULL) { + REP_HOST_INFO *TmpItem = CurItem->next; + free(CurItem); + CurItem = TmpItem; + } + free(CurItem); + } + other_hosts = NULL; +} + +void RepConfigInfo::addOtherHost(char* host, int port) +{ + REP_HOST_INFO *newinfo; + newinfo = (REP_HOST_INFO*)malloc(sizeof(REP_HOST_INFO)); + newinfo->host = host; + newinfo->port = port; + if (other_hosts == NULL) { + other_hosts = newinfo; + newinfo->next = NULL; + } else { + newinfo->next = other_hosts; + other_hosts = newinfo; + } + nrsites++; +} diff --git a/examples/cxx/excxx_repquote_gsg/RepMgrGSG.cpp b/examples/cxx/excxx_repquote_gsg/RepMgrGSG.cpp new file mode 100644 index 00000000..a13eea6d --- /dev/null +++ b/examples/cxx/excxx_repquote_gsg/RepMgrGSG.cpp @@ -0,0 +1,418 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +// NOTE: This example is a simplified version of the RepQuoteExample.cxx +// example that can be found in the db/examples/cxx/excxx_repquote directory. +// +// This example is intended only as an aid in learning Replication Manager +// concepts. It is not complete in that many features are not exercised +// in it, nor are many error conditions properly handled. + +#include +#include + +#include +#include "RepConfigInfo.h" + +using std::cout; +using std::cin; +using std::cerr; +using std::endl; +using std::flush; + +#define CACHESIZE (10 * 1024 * 1024) +#define DATABASE "quote.db" +#define SLEEPTIME 3 + +const char *progname = "excxx_repquote_gsg_repmgr"; + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#include +#define sleep(s) Sleep(1000 * (s)) + +extern "C" { + extern int getopt(int, char * const *, const char *); + extern char *optarg; +} +#endif + +// Struct used to store information in Db app_private field. +typedef struct { + int is_master; +} APP_DATA; + +class RepMgrGSG +{ +public: + RepMgrGSG(); + int init(RepConfigInfo* config); + int doloop(); + int terminate(); + + static void event_callback(DbEnv * dbenv, u_int32_t which, void *info); + +private: + // Disable copy constructor. + RepMgrGSG(const RepMgrGSG &); + void operator = (const RepMgrGSG &); + + // Internal data members. + APP_DATA app_data; + RepConfigInfo *app_config; + DbEnv dbenv; + + // Private methods. + static int print_stocks(Db *dbp); +}; + +static void usage() +{ + cerr << "usage: " << progname << endl + << "-h home -l|-L host:port [-r host:port] [-p priority]" << endl; + + cerr + << "\t -h home directory (required)" << endl + << "\t -l host:port (required unless -L is specified;" + << "\t l stands for local)" << endl + << "\t -L host:port (optional, L means group creator)" << endl + << "\t -r host:port (optional; r stands for remote; any " + << "number of these" << endl + << "\t may be specified)" << endl + << "\t -p priority (optional; defaults to 100)" << endl; + + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + RepConfigInfo config; + char ch, *portstr, *tmphost; + int tmpport; + int ret; + + // Extract the command line parameters. + while ((ch = getopt(argc, argv, "h:l:L:p:r:")) != EOF) { + switch (ch) { + case 'h': + config.home = optarg; + break; + case 'L': + config.this_host.creator = true; // FALLTHROUGH + case 'l': + config.this_host.host = strtok(optarg, ":"); + if ((portstr = strtok(NULL, ":")) == NULL) { + cerr << "Bad host specification." << endl; + usage(); + } + config.this_host.port = (unsigned short)atoi(portstr); + config.got_listen_address = true; + break; + case 'p': + config.priority = atoi(optarg); + break; + case 'r': + tmphost = strtok(optarg, ":"); + if ((portstr = strtok(NULL, ":")) == NULL) { + cerr << "Bad host specification." << endl; + usage(); + } + tmpport = (unsigned short)atoi(portstr); + config.addOtherHost(tmphost, tmpport); + break; + case '?': + default: + usage(); + } + } + + // Error check command line. + if ((!config.got_listen_address) || config.home == NULL) + usage(); + + RepMgrGSG runner; + try { + if((ret = runner.init(&config)) != 0) + goto err; + if((ret = runner.doloop()) != 0) + goto err; + } catch (DbException dbe) { + cerr << "Caught an exception during initialization or" + << " processing: " << dbe.what() << endl; + } +err: + runner.terminate(); + return 0; +} + +RepMgrGSG::RepMgrGSG() : app_config(0), dbenv(0) +{ + app_data.is_master = 0; // By default, assume this site is not a master. +} + +int RepMgrGSG::init(RepConfigInfo *config) +{ + int ret = 0; + + app_config = config; + + dbenv.set_errfile(stderr); + dbenv.set_errpfx(progname); + dbenv.set_app_private(&app_data); + dbenv.set_event_notify(event_callback); + dbenv.repmgr_set_ack_policy(DB_REPMGR_ACKS_ALL); + + DbSite *dbsite; + dbenv.repmgr_site(app_config->this_host.host, + app_config->this_host.port, &dbsite, 0); + dbsite->set_config(DB_LOCAL_SITE, 1); + if (app_config->this_host.creator) + dbsite->set_config(DB_GROUP_CREATOR, 1); + + dbsite->close(); + + int i = 1; + for ( REP_HOST_INFO *cur = app_config->other_hosts; + cur != NULL && i <= app_config->nrsites; + cur = cur->next, i++) { + + dbenv.repmgr_site(cur->host, cur->port, &dbsite, 0); + dbsite->set_config(DB_BOOTSTRAP_HELPER, 1); + + dbsite->close(); + } + + dbenv.rep_set_priority(app_config->priority); + + // Permanent messages require at least one ack. + dbenv.repmgr_set_ack_policy(DB_REPMGR_ACKS_ONE); + // Give 500 microseconds to receive the ack. + dbenv.rep_set_timeout(DB_REP_ACK_TIMEOUT, 5); + + // We can now open our environment, although we're not ready to + // begin replicating. However, we want to have a dbenv around + // so that we can send it into any of our message handlers. + dbenv.set_cachesize(0, CACHESIZE, 0); + dbenv.set_flags(DB_TXN_NOSYNC, 1); + + try { + dbenv.open(app_config->home, DB_CREATE | DB_RECOVER | + DB_THREAD | DB_INIT_REP | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN, 0); + } catch(DbException dbe) { + cerr << "Caught an exception during DB environment open." << endl + << "Ensure that the home directory is created prior to starting" + << " the application." << endl; + ret = ENOENT; + goto err; + } + + if ((ret = dbenv.repmgr_start(3, app_config->start_policy)) != 0) + goto err; + +err: + return ret; +} + +int RepMgrGSG::terminate() +{ + try { + dbenv.close(0); + } catch (DbException dbe) { + cerr << "error closing environment: " << dbe.what() << endl; + } + return 0; +} + +// Provides the main data processing function for our application. +// This function provides a command line prompt to which the user +// can provide a ticker string and a stock price. Once a value is +// entered to the application, the application writes the value to +// the database and then displays the entire database. +#define BUFSIZE 1024 +int RepMgrGSG::doloop() +{ + Dbt key, data; + Db *dbp; + char buf[BUFSIZE], *rbuf; + int ret; + + dbp = 0; + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + ret = 0; + + for (;;) { + if (dbp == 0) { + dbp = new Db(&dbenv, 0); + + try { + dbp->open(NULL, DATABASE, NULL, DB_BTREE, + app_data.is_master ? DB_CREATE | DB_AUTO_COMMIT : + DB_AUTO_COMMIT, 0); + } catch(DbException dbe) { + // It is expected that this condition will be triggered + // when client sites start up. It can take a while for + // the master site to be found and synced, and no DB will + // be available until then. + if (dbe.get_errno() == ENOENT) { + cout << "No stock db available yet - retrying." << endl; + try { + dbp->close(0); + } catch (DbException dbe2) { + cout << "Unexpected error closing after failed" << + " open, message: " << dbe2.what() << endl; + dbp = NULL; + goto err; + } + dbp = NULL; + sleep(SLEEPTIME); + continue; + } else { + dbenv.err(ret, "DB->open"); + throw dbe; + } + } + } + + cout << "QUOTESERVER" ; + if (!app_data.is_master) + cout << "(read-only)"; + cout << "> " << flush; + + if (fgets(buf, sizeof(buf), stdin) == NULL) + break; + if (strtok(&buf[0], " \t\n") == NULL) { + switch ((ret = print_stocks(dbp))) { + case 0: + continue; + case DB_REP_HANDLE_DEAD: + (void)dbp->close(DB_NOSYNC); + cout << "closing db handle due to rep handle dead" << endl; + dbp = NULL; + continue; + default: + dbp->err(ret, "Error traversing data"); + goto err; + } + } + rbuf = strtok(NULL, " \t\n"); + if (rbuf == NULL || rbuf[0] == '\0') { + if (strncmp(buf, "exit", 4) == 0 || + strncmp(buf, "quit", 4) == 0) + break; + dbenv.errx("Format: TICKER VALUE"); + continue; + } + + if (!app_data.is_master) { + dbenv.errx("Can't update at client"); + continue; + } + + key.set_data(buf); + key.set_size((u_int32_t)strlen(buf)); + + data.set_data(rbuf); + data.set_size((u_int32_t)strlen(rbuf)); + + if ((ret = dbp->put(NULL, &key, &data, 0)) != 0) + { + dbp->err(ret, "DB->put"); + if (ret != DB_KEYEXIST) + goto err; + } + } + +err: if (dbp != 0) { + (void)dbp->close(DB_NOSYNC); + } + + return (ret); +} + +// Handle replication events of interest to this application. +void RepMgrGSG::event_callback(DbEnv* dbenv, u_int32_t which, void *info) +{ + APP_DATA *app = (APP_DATA*)dbenv->get_app_private(); + + info = 0; // Currently unused. + + switch (which) { + case DB_EVENT_REP_MASTER: + app->is_master = 1; + break; + + case DB_EVENT_REP_CLIENT: + app->is_master = 0; + break; + + case DB_EVENT_REP_STARTUPDONE: // FALLTHROUGH + case DB_EVENT_REP_NEWMASTER: + // Ignore. + break; + + default: + dbenv->errx("ignoring event %d", which); + } +} + +// Display all the stock quote information in the database. +int RepMgrGSG::print_stocks(Db *dbp) +{ + Dbc *dbc; + Dbt key, data; +#define MAXKEYSIZE 10 +#define MAXDATASIZE 20 + char keybuf[MAXKEYSIZE + 1], databuf[MAXDATASIZE + 1]; + int ret, t_ret; + u_int32_t keysize, datasize; + + if ((ret = dbp->cursor(NULL, &dbc, 0)) != 0) { + dbp->err(ret, "can't open cursor"); + return (ret); + } + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + + cout << "\tSymbol\tPrice" << endl + << "\t======\t=====" << endl; + + for (ret = dbc->get(&key, &data, DB_FIRST); + ret == 0; + ret = dbc->get(&key, &data, DB_NEXT)) { + keysize = key.get_size() > MAXKEYSIZE ? MAXKEYSIZE : key.get_size(); + memcpy(keybuf, key.get_data(), keysize); + keybuf[keysize] = '\0'; + + datasize = data.get_size() >= + MAXDATASIZE ? MAXDATASIZE : data.get_size(); + memcpy(databuf, data.get_data(), datasize); + databuf[datasize] = '\0'; + + cout << "\t" << keybuf << "\t" << databuf << endl; + } + cout << endl << flush; + + if ((t_ret = dbc->close()) != 0 && ret == 0) { + cout << "closed cursor" << endl; + ret = t_ret; + } + + switch (ret) { + case 0: + case DB_NOTFOUND: + case DB_LOCK_DEADLOCK: + return (0); + default: + return (ret); + } +} + diff --git a/examples/cxx/excxx_repquote_gsg/SimpleConfigInfo.h b/examples/cxx/excxx_repquote_gsg/SimpleConfigInfo.h new file mode 100644 index 00000000..7198b526 --- /dev/null +++ b/examples/cxx/excxx_repquote_gsg/SimpleConfigInfo.h @@ -0,0 +1,29 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +class SimpleConfigInfo { +public: + SimpleConfigInfo(); + virtual ~SimpleConfigInfo(); + +public: + char* home; +}; + + +SimpleConfigInfo::SimpleConfigInfo() +{ + home = NULL; +} + +SimpleConfigInfo::~SimpleConfigInfo() +{ +} + diff --git a/examples/cxx/excxx_repquote_gsg/SimpleTxn.cpp b/examples/cxx/excxx_repquote_gsg/SimpleTxn.cpp new file mode 100644 index 00000000..1c219ffb --- /dev/null +++ b/examples/cxx/excxx_repquote_gsg/SimpleTxn.cpp @@ -0,0 +1,279 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include +#include + +#include +#include + +#include +#include "SimpleConfigInfo.h" + +using std::cout; +using std::cin; +using std::cerr; +using std::endl; +using std::flush; + +#define CACHESIZE (10 * 1024 * 1024) +#define DATABASE "quote.db" + +const char *progname = "excxx_repquote_gsg_simple"; + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#include + +extern "C" { + extern int getopt(int, char * const *, const char *); + extern char *optarg; +} +#endif + +class RepMgr +{ +public: + // Constructor. + RepMgr(); + // Initialization method. Creates and opens our environment handle. + int init(SimpleConfigInfo* config); + // The doloop is where all the work is performed. + int doloop(); + // terminate() provides our shutdown code. + int terminate(); + +private: + // Disable copy constructor. + RepMgr(const RepMgr &); + void operator = (const RepMgr &); + + // Internal data members. + SimpleConfigInfo *app_config; + DbEnv dbenv; + + // Private methods. + // print_stocks() is used to display the contents of our database. + static int print_stocks(Db *dbp); +}; + +static void usage() +{ + cerr << "usage: " << progname << " -h home" << endl; + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + SimpleConfigInfo config; + char ch; + int ret; + + // Extract the command line parameters + while ((ch = getopt(argc, argv, "h:")) != EOF) { + switch (ch) { + case 'h': + config.home = optarg; + break; + case '?': + default: + usage(); + } + } + + // Error check command line. + if (config.home == NULL) + usage(); + + RepMgr runner; + try { + if((ret = runner.init(&config)) != 0) + goto err; + if((ret = runner.doloop()) != 0) + goto err; + } catch (DbException dbe) { + cerr << "Caught an exception during initialization or" + << " processing: " << dbe.what() << endl; + } +err: + runner.terminate(); + return 0; +} + +RepMgr::RepMgr() : app_config(0), dbenv(0) +{ +} + +int RepMgr::init(SimpleConfigInfo *config) +{ + int ret = 0; + + app_config = config; + + dbenv.set_errfile(stderr); + dbenv.set_errpfx(progname); + + + // We can now open our environment. + dbenv.set_cachesize(0, CACHESIZE, 0); + dbenv.set_flags(DB_TXN_NOSYNC, 1); + + try { + dbenv.open(app_config->home, DB_CREATE | DB_RECOVER | + DB_THREAD | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN, 0); + } catch(DbException dbe) { + cerr << "Caught an exception during DB environment open." << endl + << "Ensure that the home directory is created prior to starting" + << " the application." << endl; + ret = ENOENT; + goto err; + } + +err: + return ret; +} + +int RepMgr::terminate() +{ + try { + dbenv.close(0); + } catch (DbException dbe) { + cerr << "error closing environment: " << dbe.what() << endl; + } + return 0; +} + +// Provides the main data processing function for our application. +// This function provides a command line prompt to which the user +// can provide a ticker string and a stock price. Once a value is +// entered to the application, the application writes the value to +// the database and then displays the entire database. +#define BUFSIZE 1024 +int RepMgr::doloop() +{ + Db *dbp; + Dbt key, data; + char buf[BUFSIZE], *rbuf; + int ret; + + dbp = NULL; + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + ret = 0; + + for (;;) { + if (dbp == NULL) { + dbp = new Db(&dbenv, 0); + + try { + dbp->open(NULL, DATABASE, NULL, DB_BTREE, + DB_CREATE | DB_AUTO_COMMIT, 0); + } catch(DbException dbe) { + dbenv.err(ret, "DB->open"); + throw dbe; + } + } + + cout << "QUOTESERVER" ; + cout << "> " << flush; + + if (fgets(buf, sizeof(buf), stdin) == NULL) + break; + if (strtok(&buf[0], " \t\n") == NULL) { + switch ((ret = print_stocks(dbp))) { + case 0: + continue; + default: + dbp->err(ret, "Error traversing data"); + goto err; + } + } + rbuf = strtok(NULL, " \t\n"); + if (rbuf == NULL || rbuf[0] == '\0') { + if (strncmp(buf, "exit", 4) == 0 || + strncmp(buf, "quit", 4) == 0) + break; + dbenv.errx("Format: TICKER VALUE"); + continue; + } + + key.set_data(buf); + key.set_size((u_int32_t)strlen(buf)); + + data.set_data(rbuf); + data.set_size((u_int32_t)strlen(rbuf)); + + if ((ret = dbp->put(NULL, &key, &data, 0)) != 0) + { + dbp->err(ret, "DB->put"); + if (ret != DB_KEYEXIST) + goto err; + } + } + +err: + if (dbp != NULL) + (void)dbp->close(DB_NOSYNC); + + return (ret); +} + +// Display all the stock quote information in the database. +int RepMgr::print_stocks(Db *dbp) +{ + Dbc *dbc; + Dbt key, data; +#define MAXKEYSIZE 10 +#define MAXDATASIZE 20 + char keybuf[MAXKEYSIZE + 1], databuf[MAXDATASIZE + 1]; + int ret, t_ret; + u_int32_t keysize, datasize; + + if ((ret = dbp->cursor(NULL, &dbc, 0)) != 0) { + dbp->err(ret, "can't open cursor"); + return (ret); + } + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + + cout << "\tSymbol\tPrice" << endl + << "\t======\t=====" << endl; + + for (ret = dbc->get(&key, &data, DB_FIRST); + ret == 0; + ret = dbc->get(&key, &data, DB_NEXT)) { + keysize = key.get_size() > MAXKEYSIZE ? MAXKEYSIZE : key.get_size(); + memcpy(keybuf, key.get_data(), keysize); + keybuf[keysize] = '\0'; + + datasize = data.get_size() >= + MAXDATASIZE ? MAXDATASIZE : data.get_size(); + memcpy(databuf, data.get_data(), datasize); + databuf[datasize] = '\0'; + + cout << "\t" << keybuf << "\t" << databuf << endl; + } + cout << endl << flush; + + if ((t_ret = dbc->close()) != 0 && ret == 0) { + cout << "closed cursor" << endl; + ret = t_ret; + } + + switch (ret) { + case 0: + case DB_NOTFOUND: + return (0); + default: + return (ret); + } +} + diff --git a/examples/cxx/getting_started/MyDb.cpp b/examples/cxx/getting_started/MyDb.cpp new file mode 100644 index 00000000..4a04023f --- /dev/null +++ b/examples/cxx/getting_started/MyDb.cpp @@ -0,0 +1,71 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "MyDb.hpp" + +// File: MyDb.cpp + +// Class constructor. Requires a path to the location +// where the database is located, and a database name +MyDb::MyDb(std::string &path, std::string &dbName, + bool isSecondary) + : db_(NULL, 0), // Instantiate Db object + dbFileName_(path + dbName), // Database file name + cFlags_(DB_CREATE) // If the database doesn't yet exist, + // allow it to be created. +{ + try + { + // Redirect debugging information to std::cerr + db_.set_error_stream(&std::cerr); + + // If this is a secondary database, support + // sorted duplicates + if (isSecondary) + db_.set_flags(DB_DUPSORT); + + // Open the database + db_.open(NULL, dbFileName_.c_str(), NULL, DB_BTREE, cFlags_, 0); + } + // DbException is not a subclass of std::exception, so we + // need to catch them both. + catch(DbException &e) + { + std::cerr << "Error opening database: " << dbFileName_ << "\n"; + std::cerr << e.what() << std::endl; + } + catch(std::exception &e) + { + std::cerr << "Error opening database: " << dbFileName_ << "\n"; + std::cerr << e.what() << std::endl; + } +} + +// Private member used to close a database. Called from the class +// destructor. +void +MyDb::close() +{ + // Close the db + try + { + db_.close(0); + std::cout << "Database " << dbFileName_ + << " is closed." << std::endl; + } + catch(DbException &e) + { + std::cerr << "Error closing database: " << dbFileName_ << "\n"; + std::cerr << e.what() << std::endl; + } + catch(std::exception &e) + { + std::cerr << "Error closing database: " << dbFileName_ << "\n"; + std::cerr << e.what() << std::endl; + } +} diff --git a/examples/cxx/getting_started/MyDb.hpp b/examples/cxx/getting_started/MyDb.hpp new file mode 100644 index 00000000..d1f54c24 --- /dev/null +++ b/examples/cxx/getting_started/MyDb.hpp @@ -0,0 +1,46 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +// File: MyDb.hpp + +#ifndef MYDB_H +#define MYDB_H + +#include +#include + +class MyDb +{ +public: + // Constructor requires a path to the database, + // and a database name. + MyDb(std::string &path, std::string &dbName, + bool isSecondary = false); + + // Our destructor just calls our private close method. + ~MyDb() { close(); } + + inline Db &getDb() {return db_;} + +private: + Db db_; + std::string dbFileName_; + u_int32_t cFlags_; + + // Make sure the default constructor is private + // We don't want it used. + MyDb() : db_(NULL, 0) {} + + // We put our database close activity here. + // This is called from our destructor. In + // a more complicated example, we might want + // to make this method public, but a private + // method is more appropriate for this example. + void close(); +}; +#endif diff --git a/examples/cxx/getting_started/excxx_example_database_load.cpp b/examples/cxx/getting_started/excxx_example_database_load.cpp new file mode 100644 index 00000000..446e686e --- /dev/null +++ b/examples/cxx/getting_started/excxx_example_database_load.cpp @@ -0,0 +1,229 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +// File: excxx_example_database_load.cpp +#include +#include +#include + +#include "MyDb.hpp" +#include "gettingStartedCommon.hpp" + +#ifdef _WIN32 +extern "C" { + extern int getopt(int, char * const *, const char *); + extern char *optarg; +} +#else +#include +#endif + +// Forward declarations +void loadInventoryDB(MyDb &, std::string &); +void loadVendorDB(MyDb &, std::string &); + +using namespace std; + +int +usage() +{ + std::cout << "example_database_load [-b ]" + << " [-h ]" << std::endl; + + std::cout << "Note: If -b -h is specified, then the path must end" + << " with your system's path delimiter (/ or \\)" + << std::endl; + return (-1); +} + +// Loads the contents of vendors.txt and inventory.txt into +// Berkeley DB databases. +int +main (int argc, char *argv[]) +{ + + int ch; + char lastChar; + + // Initialize the path to the database files + std::string basename("./"); + std::string databaseHome("./"); + + // Database names + std::string vDbName("vendordb.db"); + std::string iDbName("inventorydb.db"); + std::string itemSDbName("itemname.sdb"); + + // Parse the command line arguments + while ((ch = getopt(argc, argv, "b:h:")) != EOF) + switch (ch) { + case 'h': + databaseHome = optarg; + lastChar = databaseHome[databaseHome.size() -1]; + if (lastChar != '/' && lastChar != '\\') + return (usage()); + break; + case 'b': + basename = optarg; + lastChar = basename[basename.size() -1]; + if (lastChar != '/' && lastChar != '\\') + return (usage()); + break; + case '?': + default: + return (usage()); + break; + } + + // Identify the full name for our input files, which should + // also include some path information. + std::string inventoryFile = basename + "inventory.txt"; + std::string vendorFile = basename + "vendors.txt"; + + try + { + // Open all databases. + MyDb inventoryDB(databaseHome, iDbName); + MyDb vendorDB(databaseHome, vDbName); + MyDb itemnameSDB(databaseHome, itemSDbName, true); + + // Associate the primary and the secondary + inventoryDB.getDb().associate(NULL, + &(itemnameSDB.getDb()), + get_item_name, + 0); + + // Load the inventory database + loadInventoryDB(inventoryDB, inventoryFile); + + // Load the vendor database + loadVendorDB(vendorDB, vendorFile); + } catch(DbException &e) { + std::cerr << "Error loading databases. " << std::endl; + std::cerr << e.what() << std::endl; + return (e.get_errno()); + } catch(std::exception &e) { + std::cerr << "Error loading databases. " << std::endl; + std::cerr << e.what() << std::endl; + return (-1); + } + + // MyDb class constructors will close the databases when they + // go out of scope. + return (0); +} // End main + +// Used to locate the first pound sign (a field delimiter) +// in the input string. +size_t +getNextPound(std::string &theString, std::string &substring) +{ + size_t pos = theString.find("#"); + substring.assign(theString, 0, pos); + theString.assign(theString, pos + 1, theString.size()); + return (pos); +} + +// Loads the contents of the inventory.txt file into a database +void +loadInventoryDB(MyDb &inventoryDB, std::string &inventoryFile) +{ + InventoryData inventoryData; + std::string substring; + size_t nextPound; + + std::ifstream inFile(inventoryFile.c_str(), std::ios::in); + if ( !inFile ) + { + std::cerr << "Could not open file '" << inventoryFile + << "'. Giving up." << std::endl; + throw std::exception(); + } + + while (!inFile.eof()) + { + inventoryData.clear(); + std::string stringBuf; + std::getline(inFile, stringBuf); + + // Now parse the line + if (!stringBuf.empty()) + { + nextPound = getNextPound(stringBuf, substring); + inventoryData.setName(substring); + + nextPound = getNextPound(stringBuf, substring); + inventoryData.setSKU(substring); + + nextPound = getNextPound(stringBuf, substring); + inventoryData.setPrice(strtod(substring.c_str(), 0)); + + nextPound = getNextPound(stringBuf, substring); + inventoryData.setQuantity(strtol(substring.c_str(), 0, 10)); + + nextPound = getNextPound(stringBuf, substring); + inventoryData.setCategory(substring); + + nextPound = getNextPound(stringBuf, substring); + inventoryData.setVendor(substring); + + void *buff = (void *)inventoryData.getSKU().c_str(); + size_t size = inventoryData.getSKU().size()+1; + Dbt key(buff, (u_int32_t)size); + + buff = inventoryData.getBuffer(); + size = inventoryData.getBufferSize(); + Dbt data(buff, (u_int32_t)size); + + inventoryDB.getDb().put(NULL, &key, &data, 0); + } + + } + + inFile.close(); + +} + +// Loads the contents of the vendors.txt file into a database +void +loadVendorDB(MyDb &vendorDB, std::string &vendorFile) +{ + std::ifstream inFile(vendorFile.c_str(), std::ios::in); + if ( !inFile ) + { + std::cerr << "Could not open file '" << vendorFile + << "'. Giving up." << std::endl; + throw std::exception(); + } + + VENDOR my_vendor; + while (!inFile.eof()) + { + std::string stringBuf; + std::getline(inFile, stringBuf); + memset(&my_vendor, 0, sizeof(VENDOR)); + + // Scan the line into the structure. + // Convenient, but not particularly safe. + // In a real program, there would be a lot more + // defensive code here. + sscanf(stringBuf.c_str(), + "%20[^#]#%20[^#]#%20[^#]#%3[^#]#%6[^#]#%13[^#]#%20[^#]#%20[^\n]", + my_vendor.name, my_vendor.street, + my_vendor.city, my_vendor.state, + my_vendor.zipcode, my_vendor.phone_number, + my_vendor.sales_rep, my_vendor.sales_rep_phone); + + Dbt key(my_vendor.name, (u_int32_t)strlen(my_vendor.name) + 1); + Dbt data(&my_vendor, sizeof(VENDOR)); + + vendorDB.getDb().put(NULL, &key, &data, 0); + } + + inFile.close(); +} diff --git a/examples/cxx/getting_started/excxx_example_database_read.cpp b/examples/cxx/getting_started/excxx_example_database_read.cpp new file mode 100644 index 00000000..c00d65d9 --- /dev/null +++ b/examples/cxx/getting_started/excxx_example_database_read.cpp @@ -0,0 +1,236 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +// File: excxx_example_database_read.cpp + +#include +#include +#include + +#include "MyDb.hpp" +#include "gettingStartedCommon.hpp" + +#ifdef _WIN32 +extern "C" { + extern int getopt(int, char * const *, const char *); + extern char *optarg; +} +#else +#include +#endif + +// Forward declarations +int show_item(MyDb &itemnameSDB, MyDb &vendorDB, std::string &itemName); +int show_all_records(MyDb &inventoryDB, MyDb &vendorDB); +int show_vendor(MyDb &vendorDB, const char *vendor); + +int +usage() +{ + std::cout << "example_database_read [-i ]" + << " [-h ]" << std::endl; + + std::cout << "Note: Any path specified to the -h parameter must end" + << " with your system's path delimiter (/ or \\)" + << std::endl; + return (-1); +} + +int +main (int argc, char *argv[]) +{ + + int ch; + char lastChar; + + // Initialize the path to the database files + std::string databaseHome("./"); + std::string itemName; + + // Database names + std::string vDbName("vendordb.db"); + std::string iDbName("inventorydb.db"); + std::string itemSDbName("itemname.sdb"); + + // Parse the command line arguments + while ((ch = getopt(argc, argv, "h:i:")) != EOF) + switch (ch) { + case 'h': + databaseHome = optarg; + lastChar = databaseHome[databaseHome.size() -1]; + if (lastChar != '/' && lastChar != '\\') + return (usage()); + break; + case 'i': + itemName = optarg; + break; + case '?': + default: + return (usage()); + break; + } + + try + { + // Open all databases. + MyDb inventoryDB(databaseHome, iDbName); + MyDb vendorDB(databaseHome, vDbName); + MyDb itemnameSDB(databaseHome, itemSDbName, true); + + // Associate the secondary to the primary + inventoryDB.getDb().associate(NULL, + &(itemnameSDB.getDb()), + get_item_name, + 0); + + if (itemName.empty()) + { + show_all_records(inventoryDB, vendorDB); + } else { + show_item(itemnameSDB, vendorDB, itemName); + } + } catch(DbException &e) { + std::cerr << "Error reading databases. " << std::endl; + return (e.get_errno()); + } catch(std::exception &e) { + std::cerr << "Error reading databases. " << std::endl; + std::cerr << e.what() << std::endl; + return (-1); + } + + return (0); +} // End main + +// Shows the records in the inventory database that +// have a specific item name. For each inventory record +// shown, the appropriate vendor record is also displayed. +int +show_item(MyDb &itemnameSDB, MyDb &vendorDB, std::string &itemName) +{ + + // Get a cursor to the itemname secondary db + Dbc *cursorp; + + try { + itemnameSDB.getDb().cursor(NULL, &cursorp, 0); + + // Get the search key. This is the name on the inventory + // record that we want to examine. + std::cout << "Looking for " << itemName << std::endl; + Dbt key((void *)itemName.c_str(), (u_int32_t)itemName.length() + 1); + Dbt data; + + // Position the cursor to the first record in the secondary + // database that has the appropriate key. + int ret = cursorp->get(&key, &data, DB_SET); + if (!ret) { + do { + InventoryData inventoryItem(data.get_data()); + inventoryItem.show(); + + show_vendor(vendorDB, inventoryItem.getVendor().c_str()); + + } while (cursorp->get(&key, &data, DB_NEXT_DUP) == 0); + } else { + std::cerr << "No records found for '" << itemName + << "'" << std::endl; + } + } catch(DbException &e) { + itemnameSDB.getDb().err(e.get_errno(), "Error in show_item"); + cursorp->close(); + throw e; + } catch(std::exception &e) { + itemnameSDB.getDb().errx("Error in show_item: %s", e.what()); + cursorp->close(); + throw e; + } + + cursorp->close(); + return (0); +} + +// Shows all the records in the inventory database. +// For each inventory record shown, the appropriate +// vendor record is also displayed. +int +show_all_records(MyDb &inventoryDB, MyDb &vendorDB) +{ + + // Get a cursor to the inventory db + Dbc *cursorp; + try { + inventoryDB.getDb().cursor(NULL, &cursorp, 0); + + // Iterate over the inventory database, from the first record + // to the last, displaying each in turn + Dbt key, data; + int ret; + while ((ret = cursorp->get(&key, &data, DB_NEXT)) == 0 ) + { + InventoryData inventoryItem(data.get_data()); + inventoryItem.show(); + + show_vendor(vendorDB, inventoryItem.getVendor().c_str()); + } + } catch(DbException &e) { + inventoryDB.getDb().err(e.get_errno(), "Error in show_all_records"); + cursorp->close(); + throw e; + } catch(std::exception &e) { + cursorp->close(); + throw e; + } + + cursorp->close(); + return (0); +} + +// Shows a vendor record. Each vendor record is an instance of +// a vendor structure. See loadVendorDB() in +// example_database_load for how this structure was originally +// put into the database. +int +show_vendor(MyDb &vendorDB, const char *vendor) +{ + Dbt data; + VENDOR my_vendor; + + try { + // Set the search key to the vendor's name + // vendor is explicitly cast to char * to stop a compiler + // complaint. + Dbt key((char *)vendor, (u_int32_t)strlen(vendor) + 1); + + // Make sure we use the memory we set aside for the VENDOR + // structure rather than the memory that DB allocates. + // Some systems may require structures to be aligned in memory + // in a specific way, and DB may not get it right. + + data.set_data(&my_vendor); + data.set_ulen(sizeof(VENDOR)); + data.set_flags(DB_DBT_USERMEM); + + // Get the record + vendorDB.getDb().get(NULL, &key, &data, 0); + std::cout << " " << my_vendor.street << "\n" + << " " << my_vendor.city << ", " + << my_vendor.state << "\n" + << " " << my_vendor.zipcode << "\n" + << " " << my_vendor.phone_number << "\n" + << " Contact: " << my_vendor.sales_rep << "\n" + << " " << my_vendor.sales_rep_phone + << std::endl; + + } catch(DbException &e) { + vendorDB.getDb().err(e.get_errno(), "Error in show_vendor"); + throw e; + } catch(std::exception &e) { + throw e; + } + return (0); +} diff --git a/examples/cxx/getting_started/gettingStartedCommon.hpp b/examples/cxx/getting_started/gettingStartedCommon.hpp new file mode 100644 index 00000000..cbf7a6d8 --- /dev/null +++ b/examples/cxx/getting_started/gettingStartedCommon.hpp @@ -0,0 +1,198 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +// File: gettingStartedCommon.hpp + +#ifndef GETTINGSTARTEDCOMMON_H +#define GETTINGSTARTEDCOMMON_H + +#include +#include + +class InventoryData +{ +public: + inline void setPrice(double price) {price_ = price;} + inline void setQuantity(long quantity) {quantity_ = quantity;} + inline void setCategory(std::string &category) {category_ = category;} + inline void setName(std::string &name) {name_ = name;} + inline void setVendor(std::string &vendor) {vendor_ = vendor;} + inline void setSKU(std::string &sku) {sku_ = sku;} + + inline double& getPrice() {return(price_);} + inline long& getQuantity() {return(quantity_);} + inline std::string& getCategory() {return(category_);} + inline std::string& getName() {return(name_);} + inline std::string& getVendor() {return(vendor_);} + inline std::string& getSKU() {return(sku_);} + + /* Initialize our data members */ + void clear() + { + price_ = 0.0; + quantity_ = 0; + category_ = ""; + name_ = ""; + vendor_ = ""; + sku_ = ""; + } + + // Default constructor + InventoryData() { clear(); } + + // Constructor from a void * + // For use with the data returned from a bdb get + InventoryData(void *buffer) + { + char *buf = (char *)buffer; + + price_ = *((double *)buf); + bufLen_ = sizeof(double); + + quantity_ = *((long *)(buf + bufLen_)); + bufLen_ += sizeof(long); + + name_ = buf + bufLen_; + bufLen_ += name_.size() + 1; + + sku_ = buf + bufLen_; + bufLen_ += sku_.size() + 1; + + category_ = buf + bufLen_; + bufLen_ += category_.size() + 1; + + vendor_ = buf + bufLen_; + bufLen_ += vendor_.size() + 1; + } + + /* + * Marshalls this classes data members into a single + * contiguous memory location for the purpose of storing + * the data in a database. + */ + char * + getBuffer() + { + // Zero out the buffer + memset(databuf_, 0, 500); + /* + * Now pack the data into a single contiguous memory location for + * storage. + */ + bufLen_ = 0; + int dataLen = 0; + + dataLen = sizeof(double); + memcpy(databuf_, &price_, dataLen); + bufLen_ += dataLen; + + dataLen = sizeof(long); + memcpy(databuf_ + bufLen_, &quantity_, dataLen); + bufLen_ += dataLen; + + packString(databuf_, name_); + packString(databuf_, sku_); + packString(databuf_, category_); + packString(databuf_, vendor_); + + return (databuf_); + } + + /* + * Returns the size of the buffer. Used for storing + * the buffer in a database. + */ + inline size_t getBufferSize() { return (bufLen_); } + + /* Utility function used to show the contents of this class */ + void + show() { + std::cout << "\nName: " << name_ << std::endl; + std::cout << " SKU: " << sku_ << std::endl; + std::cout << " Price: " << price_ << std::endl; + std::cout << " Quantity: " << quantity_ << std::endl; + std::cout << " Category: " << category_ << std::endl; + std::cout << " Vendor: " << vendor_ << std::endl; + } + +private: + + /* + * Utility function that appends a char * to the end of + * the buffer. + */ + void + packString(char *buffer, std::string &theString) + { + size_t string_size = theString.size() + 1; + memcpy(buffer+bufLen_, theString.c_str(), string_size); + bufLen_ += string_size; + } + + /* Data members */ + std::string category_, name_, vendor_, sku_; + double price_; + long quantity_; + size_t bufLen_; + char databuf_[500]; + +}; + +#define MAXFIELD 20 + +typedef struct vendor { + char name[MAXFIELD]; /* Vendor name */ + char street[MAXFIELD]; /* Street name and number */ + char city[MAXFIELD]; /* City */ + char state[3]; /* Two-digit US state code */ + char zipcode[6]; /* US zipcode */ + char phone_number[13]; /* Vendor phone number */ + char sales_rep[MAXFIELD]; /* Name of sales representative */ + char sales_rep_phone[MAXFIELD]; /* Sales rep's phone number */ +} VENDOR; + +// Forward declarations +class Db; +class Dbt; + +// Used to extract an inventory item's name from an +// inventory database record. This function is used to create +// keys for secondary database records. +int +get_item_name(Db *dbp, const Dbt *pkey, const Dbt *pdata, Dbt *skey) +{ + /* + * First, obtain the buffer location where we placed the item's name. In + * this example, the item's name is located in the primary data. It is the + * first string in the buffer after the price (a double) and the quantity + * (a long). + */ + u_int32_t offset = sizeof(double) + sizeof(long); + char *itemname = (char *)pdata->get_data() + offset; + + // unused + (void)pkey; + + /* + * If the offset is beyond the end of the data, then there was a problem + * with the buffer contained in pdata, or there's a programming error in + * how the buffer is marshalled/unmarshalled. This should never happen! + */ + if (offset > pdata->get_size()) { + dbp->errx("get_item_name: buffer sizes do not match!"); + // When we return non-zero, the index record is not added/updated. + return (-1); + } + + /* Now set the secondary key's data to be the item name */ + skey->set_data(itemname); + skey->set_size((u_int32_t)strlen(itemname) + 1); + + return (0); +}; +#endif diff --git a/examples/cxx/getting_started/inventory.txt b/examples/cxx/getting_started/inventory.txt new file mode 100644 index 00000000..d6b68762 --- /dev/null +++ b/examples/cxx/getting_started/inventory.txt @@ -0,0 +1,800 @@ +Oranges#OranfruiRu6Ghr#0.71#451#fruits#TriCounty Produce +Oranges#OranfruiXRPFn1#0.73#263#fruits#Simply Fresh +Oranges#OranfruiLEuzQj#0.69#261#fruits#Off the Vine +Apples#ApplfruiZls4Du#1.20#472#fruits#TriCounty Produce +Apples#Applfrui8fewZe#1.21#402#fruits#Simply Fresh +Apples#ApplfruiXoT6xG#1.20#728#fruits#Off the Vine +Bananas#BanafruipIlluX#0.50#207#fruits#TriCounty Produce +Bananas#BanafruiEQhWuj#0.50#518#fruits#Simply Fresh +Bananas#BanafruimpRgPO#0.50#741#fruits#Off the Vine +Almonds#AlmofruiPPCLz8#0.55#600#fruits#TriCounty Produce +Almonds#AlmofruidMyKmp#0.54#745#fruits#Simply Fresh +Almonds#Almofrui7K0xzH#0.53#405#fruits#Off the Vine +Allspice#AllsfruibJGK4R#0.94#669#fruits#TriCounty Produce +Allspice#Allsfruilfvoeg#0.94#244#fruits#Simply Fresh +Allspice#Allsfruio12BOS#0.95#739#fruits#Off the Vine +Apricot#AprifruijphEpM#0.89#560#fruits#TriCounty Produce +Apricot#AprifruiU1zIDn#0.91#980#fruits#Simply Fresh +Apricot#AprifruichcwYS#0.95#668#fruits#Off the Vine +Avocado#AvocfruiwYYomu#0.99#379#fruits#TriCounty Produce +Avocado#AvocfruiT6IwWE#1.02#711#fruits#Simply Fresh +Avocado#AvocfruisbK1h5#0.97#856#fruits#Off the Vine +Bael Fruit#BaelfruilAU7Hj#0.41#833#fruits#TriCounty Produce +Bael Fruit#BaelfruiX2KvqV#0.40#770#fruits#Simply Fresh +Bael Fruit#Baelfruidjne4e#0.39#778#fruits#Off the Vine +Betel Nut#BetefruiQYdHqQ#0.34#926#fruits#TriCounty Produce +Betel Nut#Betefrui32BKAz#0.37#523#fruits#Simply Fresh +Betel Nut#BetefruisaWzY4#0.34#510#fruits#Off the Vine +Black Walnut#BlacfruiXxIuMU#0.57#923#fruits#TriCounty Produce +Black Walnut#BlacfruiZXgY9t#0.59#312#fruits#Simply Fresh +Black Walnut#BlacfruikWO0vz#0.60#877#fruits#Off the Vine +Blueberry#BluefruiCbxb4t#1.02#276#fruits#TriCounty Produce +Blueberry#BluefruiBuCfgO#1.03#522#fruits#Simply Fresh +Blueberry#Bluefruixz8MkE#1.01#278#fruits#Off the Vine +Boysenberry#BoysfruizxyMuz#1.05#239#fruits#TriCounty Produce +Boysenberry#Boysfrui3hTRQu#1.09#628#fruits#Simply Fresh +Boysenberry#BoysfruinpLvr3#1.02#349#fruits#Off the Vine +Breadnut#Breafrui0kDPs6#0.31#558#fruits#TriCounty Produce +Breadnut#Breafrui44s3og#0.32#879#fruits#Simply Fresh +Breadnut#BreafruiwyLKhJ#0.30#407#fruits#Off the Vine +Cactus#Cactfruiyo2ddH#0.56#601#fruits#TriCounty Produce +Cactus#CactfruixTOLv5#0.54#477#fruits#Simply Fresh +Cactus#Cactfrui4ioUav#0.55#896#fruits#Off the Vine +California Wild Grape#CalifruiZsWAa6#0.78#693#fruits#TriCounty Produce +California Wild Grape#Califruid84xyt#0.83#293#fruits#Simply Fresh +California Wild Grape#CalifruiLSJFoJ#0.81#543#fruits#Off the Vine +Cashew#CashfruihaOFVP#0.37#221#fruits#TriCounty Produce +Cashew#Cashfruizzcw1E#0.38#825#fruits#Simply Fresh +Cashew#CashfruiqtMe2Q#0.38#515#fruits#Off the Vine +Chico Sapote#ChicfruiY534SX#0.47#216#fruits#TriCounty Produce +Chico Sapote#ChicfruiSqL3Lc#0.45#476#fruits#Simply Fresh +Chico Sapote#ChicfruiurzIp4#0.47#200#fruits#Off the Vine +Chinese Jello#ChinfruiyRg75u#0.64#772#fruits#TriCounty Produce +Chinese Jello#ChinfruiuIUj0X#0.65#624#fruits#Simply Fresh +Chinese Jello#ChinfruiwXbRrL#0.67#719#fruits#Off the Vine +Common Guava#Commfruib6znSI#0.80#483#fruits#TriCounty Produce +Common Guava#Commfrui6eUivL#0.81#688#fruits#Simply Fresh +Common Guava#CommfruibWKnz3#0.84#581#fruits#Off the Vine +Crabapple#CrabfruioY2L63#0.94#582#fruits#TriCounty Produce +Crabapple#Crabfruijxcxyt#0.94#278#fruits#Simply Fresh +Crabapple#CrabfruibvWd8K#0.95#213#fruits#Off the Vine +Cranberry#CranfruiJxmKr5#0.83#923#fruits#TriCounty Produce +Cranberry#CranfruiPlklAF#0.84#434#fruits#Simply Fresh +Cranberry#Cranfrui3G5XL9#0.84#880#fruits#Off the Vine +Damson Plum#DamsfruibMRMwe#0.98#782#fruits#TriCounty Produce +Damson Plum#DamsfruiV6wFLk#1.03#400#fruits#Simply Fresh +Damson Plum#DamsfruiLhqFrQ#0.98#489#fruits#Off the Vine +Date Palm#DatefruigS31GU#1.14#315#fruits#TriCounty Produce +Date Palm#DatefruipKPaJK#1.09#588#fruits#Simply Fresh +Date Palm#Datefrui5fTyNS#1.14#539#fruits#Off the Vine +Dragon's Eye#DragfruirGJ3aI#0.28#315#fruits#TriCounty Produce +Dragon's Eye#DragfruiBotxqt#0.27#705#fruits#Simply Fresh +Dragon's Eye#DragfruiPsSnV9#0.29#482#fruits#Off the Vine +East Indian Wine Palm#EastfruiNXFJuG#0.43#992#fruits#TriCounty Produce +East Indian Wine Palm#Eastfruiq06fRr#0.40#990#fruits#Simply Fresh +East Indian Wine Palm#Eastfrui4QUwl2#0.43#351#fruits#Off the Vine +English Walnut#EnglfruiBMtHtW#1.04#787#fruits#TriCounty Produce +English Walnut#EnglfruiHmVzxV#1.03#779#fruits#Simply Fresh +English Walnut#Englfrui18Tc9n#1.06#339#fruits#Off the Vine +False Mangosteen#FalsfruibkmYqH#0.66#971#fruits#TriCounty Produce +False Mangosteen#FalsfruipBsbcX#0.68#250#fruits#Simply Fresh +False Mangosteen#FalsfruiPrFfhe#0.70#386#fruits#Off the Vine +Fried Egg Tree#FriefruiihHUdc#0.29#649#fruits#TriCounty Produce +Fried Egg Tree#FriefruimdD1rf#0.28#527#fruits#Simply Fresh +Fried Egg Tree#FriefruivyAzYq#0.29#332#fruits#Off the Vine +Genipap#GenifruiDtKusQ#0.62#986#fruits#TriCounty Produce +Genipap#GenifruiXq32eP#0.61#326#fruits#Simply Fresh +Genipap#Genifruiphwwyq#0.61#794#fruits#Off the Vine +Ginger#GingfruiQLbRZI#0.28#841#fruits#TriCounty Produce +Ginger#GingfruiS8kK4p#0.29#432#fruits#Simply Fresh +Ginger#GingfruioL3Y4S#0.27#928#fruits#Off the Vine +Grapefruit#Grapfruih86Zxh#1.07#473#fruits#TriCounty Produce +Grapefruit#GrapfruiwL1v0N#1.08#878#fruits#Simply Fresh +Grapefruit#GrapfruihmJzWm#1.02#466#fruits#Off the Vine +Hackberry#HackfruiQjomN7#0.22#938#fruits#TriCounty Produce +Hackberry#HackfruiWS0eKp#0.20#780#fruits#Simply Fresh +Hackberry#Hackfrui0MIv6J#0.21#345#fruits#Off the Vine +Honey Locust#HonefruiebXGRc#1.08#298#fruits#TriCounty Produce +Honey Locust#HonefruiPSqILB#1.00#427#fruits#Simply Fresh +Honey Locust#Honefrui6UXtvW#1.03#422#fruits#Off the Vine +Japanese Plum#JapafruihTmoYR#0.40#658#fruits#TriCounty Produce +Japanese Plum#JapafruifGqz0l#0.40#700#fruits#Simply Fresh +Japanese Plum#JapafruiufWkLx#0.39#790#fruits#Off the Vine +Jojoba#JojofruisE0wTh#0.97#553#fruits#TriCounty Produce +Jojoba#JojofruiwiYLp2#1.02#969#fruits#Simply Fresh +Jojoba#JojofruigMD1ej#0.96#899#fruits#Off the Vine +Jostaberry#JostfruiglsEGV#0.50#300#fruits#TriCounty Produce +Jostaberry#JostfruiV3oo1h#0.52#423#fruits#Simply Fresh +Jostaberry#JostfruiUBerur#0.53#562#fruits#Off the Vine +Kangaroo Apple#KangfruiEQknz8#0.60#661#fruits#TriCounty Produce +Kangaroo Apple#KangfruiNabdFq#0.60#377#fruits#Simply Fresh +Kangaroo Apple#Kangfrui7hky1i#0.60#326#fruits#Off the Vine +Ken's Red#Ken'fruinPUSIm#0.21#337#fruits#TriCounty Produce +Ken's Red#Ken'fruiAoZlpl#0.21#902#fruits#Simply Fresh +Ken's Red#Ken'frui5rmbd4#0.22#972#fruits#Off the Vine +Ketembilla#Ketefrui3yAKxQ#0.31#303#fruits#TriCounty Produce +Ketembilla#KetefruiROn6F5#0.34#283#fruits#Simply Fresh +Ketembilla#Ketefrui16Rsts#0.33#887#fruits#Off the Vine +King Orange#KingfruisOFzWk#0.74#429#fruits#TriCounty Produce +King Orange#KingfruiBmzRJT#0.74#500#fruits#Simply Fresh +King Orange#KingfruiGsrgRX#0.78#994#fruits#Off the Vine +Kola Nut#KolafruiBbtAuw#0.58#991#fruits#TriCounty Produce +Kola Nut#KolafruirbnLVS#0.62#733#fruits#Simply Fresh +Kola Nut#Kolafrui1ItXJx#0.58#273#fruits#Off the Vine +Kuko#Kukofrui6YH5Ds#0.41#647#fruits#TriCounty Produce +Kuko#Kukofrui7WZaZK#0.39#241#fruits#Simply Fresh +Kuko#Kukofruig9MQFT#0.40#204#fruits#Off the Vine +Kumquat#KumqfruiT6WKQL#0.73#388#fruits#TriCounty Produce +Kumquat#KumqfruidLiFLU#0.70#393#fruits#Simply Fresh +Kumquat#KumqfruiL6zhQX#0.71#994#fruits#Off the Vine +Kwai Muk#KwaifruiQK1zOE#1.10#249#fruits#TriCounty Produce +Kwai Muk#KwaifruifbCRlT#1.14#657#fruits#Simply Fresh +Kwai Muk#Kwaifruipe7T2m#1.09#617#fruits#Off the Vine +Lanzone#LanzfruijsPf1v#0.34#835#fruits#TriCounty Produce +Lanzone#LanzfruibU3QoL#0.34#404#fruits#Simply Fresh +Lanzone#LanzfruiYgHwv6#0.34#237#fruits#Off the Vine +Lemon#Lemofrui4Tgsg2#0.46#843#fruits#TriCounty Produce +Lemon#LemofruivK6qvj#0.43#207#fruits#Simply Fresh +Lemon#LemofruiXSXqJ0#0.44#910#fruits#Off the Vine +Lemon Grass#LemofruiVFgVh5#0.40#575#fruits#TriCounty Produce +Lemon Grass#LemofruiWIelvi#0.41#386#fruits#Simply Fresh +Lemon Grass#LemofruiGVAow0#0.39#918#fruits#Off the Vine +Lilly-pilly#LillfruiEQnW1m#1.21#974#fruits#TriCounty Produce +Lilly-pilly#LillfruiMqVuR5#1.23#303#fruits#Simply Fresh +Lilly-pilly#LillfruiVGH9p4#1.17#512#fruits#Off the Vine +Ling Nut#LingfruiGtOf8X#0.85#540#fruits#TriCounty Produce +Ling Nut#LingfruiuP0Jf9#0.83#200#fruits#Simply Fresh +Ling Nut#LingfruiuO5qf5#0.81#319#fruits#Off the Vine +Lipote#LipofruisxD2Qc#0.85#249#fruits#TriCounty Produce +Lipote#LipofruiHNdIqL#0.85#579#fruits#Simply Fresh +Lipote#LipofruiSQ2pKK#0.83#472#fruits#Off the Vine +Litchee#Litcfrui1R6Ydz#0.99#806#fruits#TriCounty Produce +Litchee#LitcfruiwtDM79#1.01#219#fruits#Simply Fresh +Litchee#LitcfruilpPZbC#1.05#419#fruits#Off the Vine +Longan#LongfruiEI0lWF#1.02#573#fruits#TriCounty Produce +Longan#LongfruiPQxxSF#1.04#227#fruits#Simply Fresh +Longan#LongfruisdI812#0.99#993#fruits#Off the Vine +Love-in-a-mist#LovefruiKYPW70#0.69#388#fruits#TriCounty Produce +Love-in-a-mist#LovefruiHrgjDa#0.67#478#fruits#Simply Fresh +Love-in-a-mist#LovefruipSOWVz#0.71#748#fruits#Off the Vine +Lychee#LychfruiicVLnY#0.38#276#fruits#TriCounty Produce +Lychee#LychfruiGY6yJr#0.38#602#fruits#Simply Fresh +Lychee#LychfruiTzDCq2#0.40#572#fruits#Off the Vine +Mabolo#MabofruiSY8RQS#0.97#263#fruits#TriCounty Produce +Mabolo#MabofruiOWWk0n#0.98#729#fruits#Simply Fresh +Mabolo#MabofruixQLOTF#0.98#771#fruits#Off the Vine +Macadamia Nut#MacafruiZppJPw#1.22#888#fruits#TriCounty Produce +Macadamia Nut#MacafruiI7XFMV#1.24#484#fruits#Simply Fresh +Macadamia Nut#Macafrui4x8bxV#1.20#536#fruits#Off the Vine +Madagascar Plum#MadafruiVj5fDf#1.14#596#fruits#TriCounty Produce +Madagascar Plum#MadafruivJhAFI#1.15#807#fruits#Simply Fresh +Madagascar Plum#Madafrui7MTe1x#1.17#355#fruits#Off the Vine +Magnolia Vine#MagnfruiigN4Y1#1.17#321#fruits#TriCounty Produce +Magnolia Vine#MagnfruicKtiHd#1.15#353#fruits#Simply Fresh +Magnolia Vine#MagnfruiLPDSCp#1.23#324#fruits#Off the Vine +Mamey#Mamefrui5rjLF6#0.36#683#fruits#TriCounty Produce +Mamey#MamefruiM6ndnR#0.38#404#fruits#Simply Fresh +Mamey#Mamefruiq9KntD#0.36#527#fruits#Off the Vine +Mandarin Orange#MandfruiRKpmKL#0.42#352#fruits#TriCounty Produce +Mandarin Orange#Mandfrui1V0KLG#0.42#548#fruits#Simply Fresh +Mandarin Orange#Mandfruig2o9Fg#0.41#686#fruits#Off the Vine +Marany Nut#MarafruiqkrwoJ#1.14#273#fruits#TriCounty Produce +Marany Nut#MarafruiCGKpke#1.12#482#fruits#Simply Fresh +Marany Nut#MarafruiB1YE5x#1.09#412#fruits#Off the Vine +Marula#MarufruiXF4biH#0.22#403#fruits#TriCounty Produce +Marula#MarufruidZiVKZ#0.23#317#fruits#Simply Fresh +Marula#MarufruiIS8BEp#0.21#454#fruits#Off the Vine +Mayhaw#MayhfruiCSrm7k#0.24#220#fruits#TriCounty Produce +Mayhaw#MayhfruiNRDzWs#0.25#710#fruits#Simply Fresh +Mayhaw#MayhfruiIUCyEg#0.24#818#fruits#Off the Vine +Meiwa Kumquat#MeiwfruiYhv3AY#0.21#997#fruits#TriCounty Produce +Meiwa Kumquat#MeiwfruiyzQFNR#0.22#347#fruits#Simply Fresh +Meiwa Kumquat#Meiwfruict4OUp#0.21#923#fruits#Off the Vine +Mexican Barberry#Mexifrui2P2dXi#0.28#914#fruits#TriCounty Produce +Mexican Barberry#MexifruiywUTMI#0.29#782#fruits#Simply Fresh +Mexican Barberry#MexifruijPHu5X#0.29#367#fruits#Off the Vine +Meyer Lemon#Meyefruin9901J#0.38#824#fruits#TriCounty Produce +Meyer Lemon#MeyefruiNeQpjO#0.37#617#fruits#Simply Fresh +Meyer Lemon#MeyefruiYEVznZ#0.37#741#fruits#Off the Vine +Mississippi Honeyberry#Missfruipb5iW3#0.95#595#fruits#TriCounty Produce +Mississippi Honeyberry#MissfruiINiDbB#0.96#551#fruits#Simply Fresh +Mississippi Honeyberry#MissfruiNUQ82a#0.93#396#fruits#Off the Vine +Monkey Pot#MonkfruiXlTW4j#0.90#896#fruits#TriCounty Produce +Monkey Pot#Monkfrui1p7a4h#0.88#344#fruits#Simply Fresh +Monkey Pot#Monkfrui4eKggb#0.92#917#fruits#Off the Vine +Monos Plum#Monofrui0Mv9aV#1.11#842#fruits#TriCounty Produce +Monos Plum#Monofrui6iTGQY#1.14#570#fruits#Simply Fresh +Monos Plum#MonofruiNu2uGH#1.13#978#fruits#Off the Vine +Moosewood#MoosfruiMXEGex#0.86#969#fruits#TriCounty Produce +Moosewood#Moosfrui8805mB#0.86#963#fruits#Simply Fresh +Moosewood#MoosfruiOsnDFL#0.88#594#fruits#Off the Vine +Natal Orange#NatafruitB8Kh2#0.42#332#fruits#TriCounty Produce +Natal Orange#NatafruiOhqRrd#0.42#982#fruits#Simply Fresh +Natal Orange#NatafruiRObMf6#0.41#268#fruits#Off the Vine +Nectarine#NectfruilNfeD8#0.36#601#fruits#TriCounty Produce +Nectarine#NectfruiQfjt6b#0.35#818#fruits#Simply Fresh +Nectarine#Nectfrui5U7U96#0.37#930#fruits#Off the Vine +Neem Tree#NeemfruiCruEMF#0.24#222#fruits#TriCounty Produce +Neem Tree#NeemfruiGv0pv5#0.24#645#fruits#Simply Fresh +Neem Tree#NeemfruiUFPVfk#0.25#601#fruits#Off the Vine +New Zealand Spinach#New fruihDIgec#0.87#428#fruits#TriCounty Produce +New Zealand Spinach#New fruiaoR9TP#0.87#630#fruits#Simply Fresh +New Zealand Spinach#New fruiy8LBul#0.94#570#fruits#Off the Vine +Olosapo#OlosfruiGXvaMm#0.76#388#fruits#TriCounty Produce +Olosapo#OlosfruiESlpB3#0.76#560#fruits#Simply Fresh +Olosapo#OlosfruiFNEkER#0.76#962#fruits#Off the Vine +Oregon Grape#OregfruiWxhzrf#1.14#892#fruits#TriCounty Produce +Oregon Grape#OregfruiMgjHUn#1.20#959#fruits#Simply Fresh +Oregon Grape#OregfruiC5UCxX#1.17#419#fruits#Off the Vine +Otaheite Apple#OtahfruilT0iFj#0.21#579#fruits#TriCounty Produce +Otaheite Apple#Otahfrui92PyMY#0.22#857#fruits#Simply Fresh +Otaheite Apple#OtahfruiLGD1EH#0.20#807#fruits#Off the Vine +Oyster Plant#OystfruimGxOsj#0.77#835#fruits#TriCounty Produce +Oyster Plant#Oystfrui1kudBX#0.81#989#fruits#Simply Fresh +Oyster Plant#OystfruiaX3uO2#0.80#505#fruits#Off the Vine +Panama Berry#PanafruiZG0Vp4#1.19#288#fruits#TriCounty Produce +Panama Berry#PanafruiobvXPE#1.21#541#fruits#Simply Fresh +Panama Berry#PanafruipaW8F3#1.16#471#fruits#Off the Vine +Peach Tomato#PeacfruiQpovYH#1.20#475#fruits#TriCounty Produce +Peach Tomato#PeacfruixYXLTN#1.18#655#fruits#Simply Fresh +Peach Tomato#PeacfruiILDYAp#1.23#876#fruits#Off the Vine +Peanut#Peanfruiy8M7pt#0.69#275#fruits#TriCounty Produce +Peanut#PeanfruiEimbED#0.65#307#fruits#Simply Fresh +Peanut#Peanfruic452Vc#0.68#937#fruits#Off the Vine +Peanut Butter Fruit#PeanfruixEDt9Y#0.27#628#fruits#TriCounty Produce +Peanut Butter Fruit#PeanfruiST0T0R#0.27#910#fruits#Simply Fresh +Peanut Butter Fruit#Peanfrui7jeRN2#0.27#938#fruits#Off the Vine +Pear#PearfruiB5YmSJ#0.20#945#fruits#TriCounty Produce +Pear#PearfruiA93XZx#0.21#333#fruits#Simply Fresh +Pear#PearfruioNKiIf#0.21#715#fruits#Off the Vine +Pecan#PecafruiiTIv1Z#0.26#471#fruits#TriCounty Produce +Pecan#PecafruiMGkqla#0.26#889#fruits#Simply Fresh +Pecan#Pecafrui1szYz2#0.25#929#fruits#Off the Vine +Purple Passion Fruit#Purpfrui4mMGkD#1.04#914#fruits#TriCounty Produce +Purple Passion Fruit#Purpfrui5XOW3K#1.06#423#fruits#Simply Fresh +Purple Passion Fruit#PurpfruifDTAgW#1.05#549#fruits#Off the Vine +Red Mulberry#Red fruiVLOXIW#1.24#270#fruits#TriCounty Produce +Red Mulberry#Red fruiXNXt4a#1.21#836#fruits#Simply Fresh +Red Mulberry#Red fruiUseWLG#1.21#795#fruits#Off the Vine +Red Princess#Red fruigJLR4V#0.23#829#fruits#TriCounty Produce +Red Princess#Red fruinVKps5#0.23#558#fruits#Simply Fresh +Red Princess#Red frui0jl9mg#0.24#252#fruits#Off the Vine +Striped Screw Pine#StrifruiUKzjoU#0.60#226#fruits#TriCounty Produce +Striped Screw Pine#StrifruivWLDzH#0.64#685#fruits#Simply Fresh +Striped Screw Pine#StrifruiiF7CGH#0.60#983#fruits#Off the Vine +Tapioca#Tapifruib4LCqt#0.40#955#fruits#TriCounty Produce +Tapioca#TapifruiwgQLj9#0.41#889#fruits#Simply Fresh +Tapioca#TapifruiZ6Igg3#0.41#655#fruits#Off the Vine +Tavola#Tavofrui0k9XOt#1.16#938#fruits#TriCounty Produce +Tavola#Tavofrui8DuRxL#1.08#979#fruits#Simply Fresh +Tavola#TavofruiNZEuJZ#1.16#215#fruits#Off the Vine +Tea#TeafruiL0357s#1.11#516#fruits#TriCounty Produce +Tea#TeafruiD5soTf#1.13#970#fruits#Simply Fresh +Tea#TeafruiOWq4oO#1.19#357#fruits#Off the Vine +Ugli Fruit#UglifruipKNCpf#0.24#501#fruits#TriCounty Produce +Ugli Fruit#UglifruifbDrzc#0.24#642#fruits#Simply Fresh +Ugli Fruit#Uglifruiwx8or4#0.24#280#fruits#Off the Vine +Vegetable Brain#VegefruieXLBoc#0.73#355#fruits#TriCounty Produce +Vegetable Brain#Vegefruik5FSdl#0.71#498#fruits#Simply Fresh +Vegetable Brain#VegefruiKBfzN0#0.72#453#fruits#Off the Vine +White Walnut#Whitfruit3oVHL#0.30#501#fruits#TriCounty Produce +White Walnut#WhitfruiHygydw#0.30#913#fruits#Simply Fresh +White Walnut#WhitfruieNtplo#0.30#401#fruits#Off the Vine +Wood Apple#WoodfruijVPRqA#0.68#501#fruits#TriCounty Produce +Wood Apple#Woodfrui4Zk69T#0.68#616#fruits#Simply Fresh +Wood Apple#WoodfruiuSLHZK#0.70#474#fruits#Off the Vine +Yellow Horn#Yellfrui5igjjf#1.18#729#fruits#TriCounty Produce +Yellow Horn#Yellfrui0DiPqa#1.13#517#fruits#Simply Fresh +Yellow Horn#Yellfrui0ljvqC#1.14#853#fruits#Off the Vine +Yellow Sapote#YellfruilGmCfq#0.93#204#fruits#TriCounty Produce +Yellow Sapote#Yellfrui4J2mke#0.88#269#fruits#Simply Fresh +Yellow Sapote#Yellfrui6PuXaL#0.86#575#fruits#Off the Vine +Ylang-ylang#Ylanfrui3rmByO#0.76#429#fruits#TriCounty Produce +Ylang-ylang#YlanfruiA80Nkq#0.76#886#fruits#Simply Fresh +Ylang-ylang#YlanfruinUEm5d#0.72#747#fruits#Off the Vine +Zapote Blanco#ZapofruisZ5sMA#0.67#428#fruits#TriCounty Produce +Zapote Blanco#ZapofruilKxl7N#0.65#924#fruits#Simply Fresh +Zapote Blanco#ZapofruiAe6Eu1#0.68#255#fruits#Off the Vine +Zulu Nut#Zulufrui469K4k#0.71#445#fruits#TriCounty Produce +Zulu Nut#ZulufruiWbz6vU#0.71#653#fruits#Simply Fresh +Zulu Nut#Zulufrui0LJnWK#0.71#858#fruits#Off the Vine +Artichoke#ArtivegeIuqmS4#0.71#282#vegetables#The Pantry +Artichoke#Artivegebljjnf#0.69#66#vegetables#TriCounty Produce +Artichoke#ArtivegeTa2lcF#0.70#618#vegetables#Off the Vine +Asparagus#AspavegezC0cDl#0.23#70#vegetables#The Pantry +Asparagus#AspavegeM1q5Kt#0.24#546#vegetables#TriCounty Produce +Asparagus#AspavegeXWbCb8#0.24#117#vegetables#Off the Vine +Basil#Basivegev08fzf#0.31#213#vegetables#The Pantry +Basil#BasivegeF3Uha7#0.29#651#vegetables#TriCounty Produce +Basil#BasivegeqR8SHC#0.31#606#vegetables#Off the Vine +Bean#BeanvegegCFUOp#0.27#794#vegetables#The Pantry +Bean#BeanvegeqMSEVq#0.27#468#vegetables#TriCounty Produce +Bean#Beanvege4IGUwX#0.27#463#vegetables#Off the Vine +Beet#BeetvegedEv4Ic#0.35#120#vegetables#The Pantry +Beet#Beetvegegi1bz1#0.35#540#vegetables#TriCounty Produce +Beet#BeetvegemztZcN#0.36#386#vegetables#Off the Vine +Blackeyed Pea#Blacvege3TPldr#0.86#133#vegetables#The Pantry +Blackeyed Pea#Blacvege3Zqnep#0.88#67#vegetables#TriCounty Produce +Blackeyed Pea#Blacvege3khffZ#0.90#790#vegetables#Off the Vine +Cabbage#CabbvegeY0c4Fw#0.82#726#vegetables#The Pantry +Cabbage#CabbvegeoaK7Co#0.85#439#vegetables#TriCounty Produce +Cabbage#CabbvegeVvO646#0.82#490#vegetables#Off the Vine +Carrot#CarrvegeEbI0sw#0.45#717#vegetables#The Pantry +Carrot#CarrvegeEZndWL#0.49#284#vegetables#TriCounty Produce +Carrot#CarrvegewUkHao#0.47#122#vegetables#Off the Vine +Cauliflower#Caulvege1CPeNG#0.68#756#vegetables#The Pantry +Cauliflower#CaulvegedrPqib#0.66#269#vegetables#TriCounty Produce +Cauliflower#CaulvegeT6cka8#0.65#728#vegetables#Off the Vine +Chayote#ChayvegePRReGE#0.14#233#vegetables#The Pantry +Chayote#Chayvegep058f7#0.14#88#vegetables#TriCounty Produce +Chayote#ChayvegeoxO40S#0.14#611#vegetables#Off the Vine +Corn#CornvegeukXkv6#0.72#632#vegetables#The Pantry +Corn#CornvegePnPREC#0.72#609#vegetables#TriCounty Produce +Corn#CornvegeO0GwoQ#0.70#664#vegetables#Off the Vine +Cucumber#CucuvegeEqQeA7#0.94#499#vegetables#The Pantry +Cucumber#CucuvegewmKbJ1#0.94#738#vegetables#TriCounty Produce +Cucumber#CucuvegeUW6JaA#0.94#565#vegetables#Off the Vine +Cantaloupe#CantvegeIHs9vJ#0.66#411#vegetables#The Pantry +Cantaloupe#CantvegeEaDdST#0.66#638#vegetables#TriCounty Produce +Cantaloupe#CantvegewWQEa0#0.64#682#vegetables#Off the Vine +Carraway#CarrvegewuL4Ma#0.32#740#vegetables#The Pantry +Carraway#CarrvegeyiWfBj#0.32#265#vegetables#TriCounty Produce +Carraway#CarrvegeMjb1i9#0.31#732#vegetables#Off the Vine +Celeriac#CelevegeoTBicd#0.74#350#vegetables#The Pantry +Celeriac#CelevegeCNABoZ#0.70#261#vegetables#TriCounty Produce +Celeriac#Celevege9LUeww#0.70#298#vegetables#Off the Vine +Celery#Celevegej40ZCc#0.59#740#vegetables#The Pantry +Celery#CelevegerYlVRy#0.58#734#vegetables#TriCounty Produce +Celery#Celevege67eimC#0.58#619#vegetables#Off the Vine +Chervil#ChervegeuH4Dge#0.09#502#vegetables#The Pantry +Chervil#Chervegea1OyKO#0.09#299#vegetables#TriCounty Produce +Chervil#Chervegeq56gMO#0.09#474#vegetables#Off the Vine +Chicory#Chicvege79qoQ8#0.09#709#vegetables#The Pantry +Chicory#ChicvegeTSVBQq#0.10#477#vegetables#TriCounty Produce +Chicory#Chicvege6qpcyi#0.10#282#vegetables#Off the Vine +Chinese Cabbage#ChinvegeFNsSRn#0.78#408#vegetables#The Pantry +Chinese Cabbage#Chinvege2ldNr3#0.80#799#vegetables#TriCounty Produce +Chinese Cabbage#ChinvegeK3R2Td#0.80#180#vegetables#Off the Vine +Chinese Beans#ChinvegebxbyPy#0.45#654#vegetables#The Pantry +Chinese Beans#ChinvegewKGwgx#0.45#206#vegetables#TriCounty Produce +Chinese Beans#ChinvegevVjzC0#0.47#643#vegetables#Off the Vine +Chines Kale#ChinvegeCfdkss#0.70#239#vegetables#The Pantry +Chines Kale#Chinvege6V6Dne#0.65#548#vegetables#TriCounty Produce +Chines Kale#ChinvegeB7vE3x#0.66#380#vegetables#Off the Vine +Chinese Radish#ChinvegeXcM4eq#0.22#190#vegetables#The Pantry +Chinese Radish#ChinvegeTdUBqN#0.22#257#vegetables#TriCounty Produce +Chinese Radish#ChinvegeMXMms8#0.22#402#vegetables#Off the Vine +Chinese Mustard#ChinvegeRDdpdl#0.33#149#vegetables#The Pantry +Chinese Mustard#ChinvegeABDhNd#0.32#320#vegetables#TriCounty Produce +Chinese Mustard#Chinvege8NPwa2#0.34#389#vegetables#Off the Vine +Cilantro#CilavegeQXBEsW#0.60#674#vegetables#The Pantry +Cilantro#CilavegeRgjkUG#0.60#355#vegetables#TriCounty Produce +Cilantro#CilavegelT2msu#0.59#464#vegetables#Off the Vine +Collard#CollvegesTGGNw#0.32#745#vegetables#The Pantry +Collard#CollvegeAwdor5#0.32#124#vegetables#TriCounty Produce +Collard#CollvegeQe900L#0.30#796#vegetables#Off the Vine +Coriander#CorivegeXxp4xY#0.26#560#vegetables#The Pantry +Coriander#Corivege9xBAT0#0.27#321#vegetables#TriCounty Produce +Coriander#CorivegeCfNjBx#0.27#709#vegetables#Off the Vine +Dandelion#DandvegeJNcnbr#0.11#285#vegetables#The Pantry +Dandelion#DandvegeGwBkHZ#0.11#733#vegetables#TriCounty Produce +Dandelion#DandvegeZfwVqn#0.11#57#vegetables#Off the Vine +Daikon Radish#DaikvegeHHsd7M#0.61#743#vegetables#The Pantry +Daikon Radish#DaikvegeIu17yC#0.62#459#vegetables#TriCounty Produce +Daikon Radish#DaikvegePzFjqf#0.63#296#vegetables#Off the Vine +Eggplant#EggpvegeKJtydN#0.55#200#vegetables#The Pantry +Eggplant#EggpvegeQMKrNs#0.53#208#vegetables#TriCounty Produce +Eggplant#EggpvegeN0WnSo#0.51#761#vegetables#Off the Vine +English Pea#Englvegea1ytIn#0.40#457#vegetables#The Pantry +English Pea#EnglvegerU9Vty#0.37#263#vegetables#TriCounty Produce +English Pea#EnglvegeCmkd3y#0.39#430#vegetables#Off the Vine +Fennel#Fennvegebz2UM7#0.76#545#vegetables#The Pantry +Fennel#FennvegeQzjtZ3#0.78#795#vegetables#TriCounty Produce +Fennel#FennvegeXSrW61#0.75#79#vegetables#Off the Vine +Garlic#GarlvegesR2yel#0.76#478#vegetables#The Pantry +Garlic#GarlvegeEQvt8W#0.77#349#vegetables#TriCounty Produce +Garlic#GarlvegedljBdK#0.80#708#vegetables#Off the Vine +Ginger#GingvegeMNiTc2#0.88#563#vegetables#The Pantry +Ginger#Gingvegeq366Sn#0.89#738#vegetables#TriCounty Produce +Ginger#GingvegeznyyVj#0.89#598#vegetables#Off the Vine +Horseradish#HorsvegemSwISt#0.12#622#vegetables#The Pantry +Horseradish#HorsvegetCOS0x#0.11#279#vegetables#TriCounty Produce +Horseradish#Horsvegew6XXaS#0.12#478#vegetables#Off the Vine +Japanese Eggplant#JapavegeTdKDCL#0.57#539#vegetables#The Pantry +Japanese Eggplant#JapavegevsJfGa#0.58#782#vegetables#TriCounty Produce +Japanese Eggplant#JapavegeCIrIxd#0.57#777#vegetables#Off the Vine +Jerusalem Artichoke#Jeruvege928cr0#0.13#231#vegetables#The Pantry +Jerusalem Artichoke#JeruvegeC2v086#0.14#123#vegetables#TriCounty Produce +Jerusalem Artichoke#JeruvegeehCYzi#0.14#196#vegetables#Off the Vine +Jicama#JicavegeRWYj9n#0.75#79#vegetables#The Pantry +Jicama#JicavegeGk5LKH#0.71#292#vegetables#TriCounty Produce +Jicama#JicavegeUjpaX1#0.70#308#vegetables#Off the Vine +Kale#Kalevegext6RNT#0.55#765#vegetables#The Pantry +Kale#KalevegeFsp17B#0.53#107#vegetables#TriCounty Produce +Kale#KalevegeAffBTS#0.57#573#vegetables#Off the Vine +Kiwifruit#KiwivegeloZBKJ#0.60#769#vegetables#The Pantry +Kiwifruit#KiwivegenCQAHw#0.59#307#vegetables#TriCounty Produce +Kiwifruit#Kiwivege0Gi3P2#0.59#235#vegetables#Off the Vine +Kohlrabi#KohlvegeJFKZDl#0.26#406#vegetables#The Pantry +Kohlrabi#Kohlvege32UTAj#0.28#613#vegetables#TriCounty Produce +Kohlrabi#KohlvegejNQC1M#0.28#326#vegetables#Off the Vine +Leek#Leekvege5iaFtg#0.70#580#vegetables#The Pantry +Leek#Leekvegei9Wxbz#0.68#188#vegetables#TriCounty Produce +Leek#LeekvegewY4mAc#0.70#473#vegetables#Off the Vine +Lettuce#LettvegesK9wDR#0.55#716#vegetables#The Pantry +Lettuce#LettvegeWzMyCM#0.57#83#vegetables#TriCounty Produce +Lettuce#LettvegeHgfGG8#0.56#268#vegetables#Off the Vine +Melons#Melovege6t93WF#0.11#252#vegetables#The Pantry +Melons#Melovegeq9kz7T#0.12#558#vegetables#TriCounty Produce +Melons#Melovege9kLTXN#0.12#382#vegetables#Off the Vine +Mushroom#MushvegeSq53h8#0.59#365#vegetables#The Pantry +Mushroom#Mushvegedq6lYP#0.59#444#vegetables#TriCounty Produce +Mushroom#Mushvege8o27D2#0.55#467#vegetables#Off the Vine +Okra#OkravegeTszQSL#0.55#62#vegetables#The Pantry +Okra#OkravegeJBWmfh#0.58#165#vegetables#TriCounty Produce +Okra#OkravegeD6tF9n#0.55#77#vegetables#Off the Vine +Onion#OniovegejwimQo#0.80#186#vegetables#The Pantry +Onion#OniovegeUOwwks#0.80#417#vegetables#TriCounty Produce +Onion#OniovegezcRDrc#0.80#435#vegetables#Off the Vine +Oregano#OregvegetlU7Ez#0.71#119#vegetables#The Pantry +Oregano#Oregvege9h9ZKy#0.70#173#vegetables#TriCounty Produce +Oregano#OregvegebXr0PJ#0.70#773#vegetables#Off the Vine +Parsley#ParsvegeXFEjjN#0.83#502#vegetables#The Pantry +Parsley#ParsvegejAg5C4#0.80#454#vegetables#TriCounty Produce +Parsley#ParsvegehAtH2H#0.84#523#vegetables#Off the Vine +Parsnip#Parsvegee9Lp6D#0.46#626#vegetables#The Pantry +Parsnip#ParsvegeSxXHSA#0.47#411#vegetables#TriCounty Produce +Parsnip#Parsvegea0stPf#0.44#403#vegetables#Off the Vine +Pea#Peavegecq4SxR#0.18#342#vegetables#The Pantry +Pea#Peavege46Gdp9#0.18#255#vegetables#TriCounty Produce +Pea#Peavegeov1gc5#0.18#251#vegetables#Off the Vine +Pepper#PeppvegeUcBYRp#0.33#52#vegetables#The Pantry +Pepper#PeppvegeB60btP#0.35#107#vegetables#TriCounty Produce +Pepper#PeppvegeG4tP3e#0.34#481#vegetables#Off the Vine +Pigeon Pea#Pigevegec5bAtm#0.94#391#vegetables#The Pantry +Pigeon Pea#Pigevegeb93eLi#0.91#447#vegetables#TriCounty Produce +Pigeon Pea#PigevegejEBDRa#0.89#259#vegetables#Off the Vine +Irish Potato#IrisvegeJNQqby#0.72#355#vegetables#The Pantry +Irish Potato#Irisvegewq1PLd#0.72#601#vegetables#TriCounty Produce +Irish Potato#IrisvegeAfFLdO#0.68#740#vegetables#Off the Vine +Pumpkin#PumpvegeiYsPR8#0.25#776#vegetables#The Pantry +Pumpkin#PumpvegelqP1Kh#0.25#189#vegetables#TriCounty Produce +Pumpkin#Pumpvegeb3nQU5#0.26#207#vegetables#Off the Vine +Radish#RadivegeNwwSBJ#0.16#613#vegetables#The Pantry +Radish#Radivege0tIBnL#0.16#779#vegetables#TriCounty Produce +Radish#RadivegeNLqJCf#0.16#731#vegetables#Off the Vine +Rhubarb#RhubvegeREfOti#0.12#301#vegetables#The Pantry +Rhubarb#Rhubvege4Jc3b7#0.12#557#vegetables#TriCounty Produce +Rhubarb#RhubvegeaXqF7H#0.12#378#vegetables#Off the Vine +Rosemary#Rosevege16QStc#0.73#380#vegetables#The Pantry +Rosemary#RosevegeNf6Oem#0.75#622#vegetables#TriCounty Produce +Rosemary#RosevegeFgsOyN#0.74#631#vegetables#Off the Vine +Rutabaga#RutavegecUYfQ3#0.55#676#vegetables#The Pantry +Rutabaga#RutavegejOG5DF#0.55#273#vegetables#TriCounty Produce +Rutabaga#RutavegewEVjzV#0.53#452#vegetables#Off the Vine +Salsify#SalsvegeViS9HF#0.11#537#vegetables#The Pantry +Salsify#Salsvegemd3HAL#0.11#753#vegetables#TriCounty Produce +Salsify#SalsvegeuRCnmq#0.10#787#vegetables#Off the Vine +Savory#Savovegee4DRWl#0.21#456#vegetables#The Pantry +Savory#SavovegerZ90Xm#0.21#642#vegetables#TriCounty Produce +Savory#Savovegeje7yy7#0.22#328#vegetables#Off the Vine +Sesame#Sesavege4NAWZE#0.84#54#vegetables#The Pantry +Sesame#SesavegeMTc9IN#0.84#458#vegetables#TriCounty Produce +Sesame#SesavegegOwAjo#0.83#125#vegetables#Off the Vine +Shallots#ShalvegeUO2pDO#0.26#599#vegetables#The Pantry +Shallots#ShalvegeY1sekb#0.27#647#vegetables#TriCounty Produce +Shallots#ShalvegeSDC8VY#0.27#369#vegetables#Off the Vine +Sugar Snap Peas#SugavegepUZDTl#0.47#308#vegetables#The Pantry +Sugar Snap Peas#Sugavege1XyzNH#0.48#205#vegetables#TriCounty Produce +Sugar Snap Peas#SugavegeJuaG7f#0.46#348#vegetables#Off the Vine +Soybean#SoybvegeqxSVRL#0.70#639#vegetables#The Pantry +Soybean#SoybvegezEMjOG#0.68#423#vegetables#TriCounty Produce +Soybean#SoybvegebanSFq#0.67#268#vegetables#Off the Vine +Spaghetti Squash#SpagvegeMNO1yC#0.12#753#vegetables#The Pantry +Spaghetti Squash#SpagvegeilpUaD#0.13#604#vegetables#TriCounty Produce +Spaghetti Squash#SpagvegeAOoZNX#0.13#431#vegetables#Off the Vine +Spinach#SpinvegeegXXou#0.10#742#vegetables#The Pantry +Spinach#SpinvegeVcqXL6#0.11#708#vegetables#TriCounty Produce +Spinach#SpinvegetZ26DN#0.11#625#vegetables#Off the Vine +Sweet Potato#SweevegepNDQWb#0.94#720#vegetables#The Pantry +Sweet Potato#Sweevegepnw7Tm#0.90#377#vegetables#TriCounty Produce +Sweet Potato#Sweevegeyk0C82#0.89#242#vegetables#Off the Vine +Swiss Chard#SwisvegeksalTA#0.54#545#vegetables#The Pantry +Swiss Chard#SwisvegeKm2Kze#0.54#472#vegetables#TriCounty Produce +Swiss Chard#SwisvegehteuMk#0.56#142#vegetables#Off the Vine +Taro#Tarovege3fpGV6#0.87#155#vegetables#The Pantry +Taro#TarovegerZkmof#0.86#371#vegetables#TriCounty Produce +Taro#TarovegeXKPuzc#0.89#443#vegetables#Off the Vine +Tarragon#TarrvegeCzVC6U#0.18#491#vegetables#The Pantry +Tarragon#TarrvegesIkEfS#0.17#65#vegetables#TriCounty Produce +Tarragon#TarrvegerZsKFP#0.18#180#vegetables#Off the Vine +Thyme#Thymvege8Rv72c#0.41#442#vegetables#The Pantry +Thyme#ThymvegeJoUdQS#0.42#237#vegetables#TriCounty Produce +Thyme#ThymvegeRck5uO#0.43#491#vegetables#Off the Vine +Tomato#Tomavegey0NHGK#0.31#60#vegetables#The Pantry +Tomato#TomavegeKAjRUn#0.30#630#vegetables#TriCounty Produce +Tomato#TomavegePZOHlH#0.30#70#vegetables#Off the Vine +Turnip#TurnvegeRVQiV5#0.44#580#vegetables#The Pantry +Turnip#TurnvegeVjIX9D#0.45#743#vegetables#TriCounty Produce +Turnip#TurnvegelFhvuJ#0.44#219#vegetables#Off the Vine +Watercress#WatevegelwzPLQ#0.54#230#vegetables#The Pantry +Watercress#Watevege8oeDCT#0.54#774#vegetables#TriCounty Produce +Watercress#Watevegexr8L1t#0.55#185#vegetables#Off the Vine +Watermelon#WatevegeL83MRH#0.19#698#vegetables#The Pantry +Watermelon#WatevegeR2S4Dq#0.21#488#vegetables#TriCounty Produce +Watermelon#WatevegepFPXQu#0.21#439#vegetables#Off the Vine +Kamote#KamovegegdON75#0.13#218#vegetables#The Pantry +Kamote#KamovegevupDBf#0.13#98#vegetables#TriCounty Produce +Kamote#KamovegeSQX7IA#0.14#703#vegetables#Off the Vine +Alogbati#AlogvegeB1WaJU#0.41#775#vegetables#The Pantry +Alogbati#AlogvegeVr5cPP#0.40#789#vegetables#TriCounty Produce +Alogbati#AlogvegeyTUQzy#0.40#416#vegetables#Off the Vine +Ampalaya#AmpavegemR9fSd#0.85#107#vegetables#The Pantry +Ampalaya#AmpavegeJDu9Im#0.90#676#vegetables#TriCounty Produce +Ampalaya#AmpavegepL8GH5#0.86#728#vegetables#Off the Vine +Dahon ng sili#Dahovege6X9grk#0.11#369#vegetables#The Pantry +Dahon ng sili#DahovegeiHZjQT#0.11#141#vegetables#TriCounty Produce +Dahon ng sili#DahovegeoCDAH8#0.12#517#vegetables#Off the Vine +Gabi#GabivegeVm4Xk3#0.44#396#vegetables#The Pantry +Gabi#Gabivegeu6woqK#0.42#722#vegetables#TriCounty Produce +Gabi#GabivegezcA7q1#0.42#394#vegetables#Off the Vine +Kabute#Kabuvege6Tqrif#0.16#123#vegetables#The Pantry +Kabute#KabuvegeA3uYdG#0.15#183#vegetables#TriCounty Produce +Kabute#KabuvegeXW6ZiI#0.16#624#vegetables#Off the Vine +Kamoteng Kahoy#KamovegeAdW37X#0.42#782#vegetables#The Pantry +Kamoteng Kahoy#KamovegetFlqpC#0.42#515#vegetables#TriCounty Produce +Kamoteng Kahoy#KamovegeMvxoLn#0.40#166#vegetables#Off the Vine +Kangkong#KangvegeSFTvEz#0.35#759#vegetables#The Pantry +Kangkong#KangvegeRLR6gL#0.34#695#vegetables#TriCounty Produce +Kangkong#Kangvege9BFo14#0.35#783#vegetables#Off the Vine +Labanos#Labavege3qrWJL#0.94#514#vegetables#The Pantry +Labanos#LabavegekgVWDH#0.89#210#vegetables#TriCounty Produce +Labanos#LabavegeiVPgMx#0.89#207#vegetables#Off the Vine +Labong#LabovegeX3O8yz#0.85#722#vegetables#The Pantry +Labong#LabovegeI1wSEs#0.87#472#vegetables#TriCounty Produce +Labong#LabovegeOPiQht#0.85#740#vegetables#Off the Vine +Malunggay#MaluvegeHkwAFm#0.30#252#vegetables#The Pantry +Malunggay#Maluvegez6TiSY#0.30#245#vegetables#TriCounty Produce +Malunggay#MaluvegewzY37D#0.31#405#vegetables#Off the Vine +Munggo#MungvegeqeuwGw#0.25#362#vegetables#The Pantry +Munggo#MungvegeNhqWvL#0.26#360#vegetables#TriCounty Produce +Munggo#MungvegeGxNxQC#0.25#555#vegetables#Off the Vine +Pechay#PechvegezDeHFZ#0.36#401#vegetables#The Pantry +Pechay#Pechvegehi4Fcx#0.35#723#vegetables#TriCounty Produce +Pechay#Pechvege8Pq8Eo#0.36#141#vegetables#Off the Vine +Sigarilyas#SigavegeMJrtlV#0.88#335#vegetables#The Pantry +Sigarilyas#SigavegeLhsoOB#0.87#768#vegetables#TriCounty Produce +Sigarilyas#SigavegeS6RJcA#0.93#356#vegetables#Off the Vine +Sitaw#Sitavege0hMi9z#0.65#153#vegetables#The Pantry +Sitaw#Sitavegeez1g6N#0.67#561#vegetables#TriCounty Produce +Sitaw#Sitavege0BCNeF#0.66#674#vegetables#Off the Vine +Talong#TalovegevZjVK6#0.10#530#vegetables#The Pantry +Talong#TalovegexX4MRw#0.09#305#vegetables#TriCounty Produce +Talong#TalovegeO3U2ze#0.10#126#vegetables#Off the Vine +Toge#TogevegeYelJUw#0.54#449#vegetables#The Pantry +Toge#Togevegeilr1xK#0.54#274#vegetables#TriCounty Produce +Toge#Togevegesvjnyn#0.51#316#vegetables#Off the Vine +Ube#UbevegeoPnxvb#0.56#397#vegetables#The Pantry +Ube#Ubevege2CNyve#0.55#450#vegetables#TriCounty Produce +Ube#UbevegeC43sVj#0.55#263#vegetables#Off the Vine +Upo#UpovegecOGRqC#0.22#404#vegetables#The Pantry +Upo#Upovegekjl2wl#0.22#541#vegetables#TriCounty Produce +Upo#UpovegemTTTwI#0.23#459#vegetables#Off the Vine +Edamame#EdamvegeVYtk8z#0.79#296#vegetables#The Pantry +Edamame#Edamvege608vXi#0.78#700#vegetables#TriCounty Produce +Edamame#Edamvege1jiqGY#0.75#115#vegetables#Off the Vine +Hairy melon#HairvegeFYFHIw#0.71#789#vegetables#The Pantry +Hairy melon#HairvegeS7AAqI#0.72#302#vegetables#TriCounty Produce +Hairy melon#HairvegeO6WJHL#0.72#444#vegetables#Off the Vine +Burdock#BurdvegeyLstLV#0.56#761#vegetables#The Pantry +Burdock#BurdvegeZsqAjT#0.56#582#vegetables#TriCounty Produce +Burdock#BurdvegeycF7mo#0.55#566#vegetables#Off the Vine +Snake gourd#SnakvegesfHGvt#0.92#626#vegetables#The Pantry +Snake gourd#SnakvegedlNiBk#0.92#669#vegetables#TriCounty Produce +Snake gourd#Snakvegec5n1UM#0.92#143#vegetables#Off the Vine +Wasabi#Wasavege5P5pZp#0.67#751#vegetables#The Pantry +Wasabi#Wasavege6EEE9r#0.68#559#vegetables#TriCounty Produce +Wasabi#Wasavege1ve7TY#0.65#61#vegetables#Off the Vine +Yam#YamvegeRN9ONH#0.57#438#vegetables#The Pantry +Yam#YamvegeWjdzeA#0.56#564#vegetables#TriCounty Produce +Yam#YamvegeI1AnyI#0.56#456#vegetables#Off the Vine +Apple Fritters#AppldessDj96hw#6.12#16#desserts#Mom's Kitchen +Apple Fritters#AppldessrN1kvM#6.06#7#desserts#The Baking Pan +Banana Split#Banadess7tpjkJ#10.86#10#desserts#Mom's Kitchen +Banana Split#Banadessfif758#11.07#14#desserts#The Baking Pan +Blueberry Boy Bait#BluedesseX2LVU#3.72#16#desserts#Mom's Kitchen +Blueberry Boy Bait#Bluedess9zLhaH#3.93#9#desserts#The Baking Pan +Candied Cranberries#CanddessjW92p3#1.77#9#desserts#Mom's Kitchen +Candied Cranberries#CanddesskhtVoQ#1.72#0#desserts#The Baking Pan +Daiquiri Souffle#DaiqdessebnYcy#9.54#15#desserts#Mom's Kitchen +Daiquiri Souffle#DaiqdessfM1DnX#9.72#6#desserts#The Baking Pan +Bananas Flambe#BanadesscczumD#6.94#12#desserts#Mom's Kitchen +Bananas Flambe#Banadess8qNfxd#7.07#16#desserts#The Baking Pan +Pie, Apple#Pie,desshcSHhT#7.88#11#desserts#Mom's Kitchen +Pie, Apple#Pie,dessTbiwDp#7.88#15#desserts#The Baking Pan +Pie, Pumpkin#Pie,desswhPBPB#6.00#20#desserts#Mom's Kitchen +Pie, Pumpkin#Pie,dessDg3NWl#6.24#19#desserts#The Baking Pan +Pie, Blueberry#Pie,dessw9VdgD#2.14#3#desserts#Mom's Kitchen +Pie, Blueberry#Pie,dessiSjZKD#2.12#1#desserts#The Baking Pan +Pie, Pecan#Pie,dess2NqhNR#12.70#20#desserts#Mom's Kitchen +Pie, Pecan#Pie,dessB1LfcE#12.33#12#desserts#The Baking Pan +Pie, Cranberry Apple#Pie,dess1mL7IS#10.16#7#desserts#Mom's Kitchen +Pie, Cranberry Apple#Pie,dessmDhkUA#10.16#11#desserts#The Baking Pan +Pie, Banana Cream#Pie,dessH80DuG#7.35#6#desserts#Mom's Kitchen +Pie, Banana Cream#Pie,dessf1YvFb#7.08#11#desserts#The Baking Pan +Pie, Key Lime#Pie,desshtli5N#4.85#2#desserts#Mom's Kitchen +Pie, Key Lime#Pie,dessMwQkKm#5.13#1#desserts#The Baking Pan +Pie, Lemon Meringue#Pie,dess9naVkX#3.74#7#desserts#Mom's Kitchen +Pie, Lemon Meringue#Pie,dessKYcNML#3.67#5#desserts#The Baking Pan +Pie, Caramel#Pie,dessSUuiIU#2.27#9#desserts#Mom's Kitchen +Pie, Caramel#Pie,dessvo8uHh#2.33#4#desserts#The Baking Pan +Pie, Raspberry#Pie,dessUHhMlS#2.36#0#desserts#Mom's Kitchen +Pie, Raspberry#Pie,dessJflbf5#2.36#2#desserts#The Baking Pan +Ice Cream, Chocolate#Ice desseXuyxx#1.44#9#desserts#Mom's Kitchen +Ice Cream, Chocolate#Ice dessASBohf#1.41#13#desserts#The Baking Pan +Ice Cream, Vanilla#Ice dessYnzbbt#11.92#19#desserts#Mom's Kitchen +Ice Cream, Vanilla#Ice dessUBBKp8#11.58#10#desserts#The Baking Pan +Ice Cream, Strawberry#Ice dessfTwKhD#1.90#14#desserts#Mom's Kitchen +Ice Cream, Strawberry#Ice dessaO9Fxf#1.99#6#desserts#The Baking Pan +Ice Cream, Rocky Road#Ice dessyIri3P#13.10#20#desserts#Mom's Kitchen +Ice Cream, Rocky Road#Ice dessZuLr8F#13.48#13#desserts#The Baking Pan +Ice Cream, Mint Chocolate Chip#Ice dessV1IGG7#5.75#4#desserts#Mom's Kitchen +Ice Cream, Mint Chocolate Chip#Ice dessX1gEQ4#5.64#1#desserts#The Baking Pan +Ice Cream Sundae#Ice dessbhlAXt#5.62#11#desserts#Mom's Kitchen +Ice Cream Sundae#Ice dessByapxl#5.72#16#desserts#The Baking Pan +Cobbler, Peach#CobbdessYUGeOB#10.14#20#desserts#Mom's Kitchen +Cobbler, Peach#CobbdessXfEtUK#10.43#16#desserts#The Baking Pan +Cobbler, Berry-Pecan#Cobbdessx3htak#5.36#12#desserts#Mom's Kitchen +Cobbler, Berry-Pecan#Cobbdesse4FUVI#5.41#8#desserts#The Baking Pan +Cobbler, Blueberry#CobbdessbiI0oF#3.78#11#desserts#Mom's Kitchen +Cobbler, Blueberry#CobbdessMXxbBN#3.57#2#desserts#The Baking Pan +Cobbler, Cherry#CobbdessNSa8QW#12.58#0#desserts#Mom's Kitchen +Cobbler, Cherry#CobbdessA1dADa#12.10#10#desserts#The Baking Pan +Cobbler, Huckleberry#Cobbdess3t6O8d#3.99#18#desserts#Mom's Kitchen +Cobbler, Huckleberry#CobbdessGI9euK#3.88#0#desserts#The Baking Pan +Cobbler, Rhubarb#Cobbdess22X40Z#9.54#0#desserts#Mom's Kitchen +Cobbler, Rhubarb#CobbdessPfnCT0#9.27#18#desserts#The Baking Pan +Cobbler, Strawberry#CobbdessI78188#12.43#0#desserts#Mom's Kitchen +Cobbler, Strawberry#CobbdessH3LdgQ#12.20#3#desserts#The Baking Pan +Cobbler, Zucchini#Cobbdess5rK4dP#11.24#3#desserts#Mom's Kitchen +Cobbler, Zucchini#Cobbdess4Ez8kS#10.51#10#desserts#The Baking Pan +Brownies#BrowdessmogdTl#7.62#9#desserts#Mom's Kitchen +Brownies#Browdess84Qc1z#7.55#9#desserts#The Baking Pan +Fudge Bar#Fudgdess8iXSyf#11.72#6#desserts#Mom's Kitchen +Fudge Bar#FudgdessakU1Id#12.29#5#desserts#The Baking Pan +Cookies, Oatmeal#Cookdessnq9Oya#2.84#15#desserts#Mom's Kitchen +Cookies, Oatmeal#CookdessBhgp7p#2.68#10#desserts#The Baking Pan +Cookies, Chocolate Chip#CookdessRVszsZ#12.73#17#desserts#Mom's Kitchen +Cookies, Chocolate Chip#CookdessSOoHmT#12.26#19#desserts#The Baking Pan +Cookies, Peanut Butter#Cookdess2UcMI2#7.82#5#desserts#Mom's Kitchen +Cookies, Peanut Butter#Cookdess1cILme#7.46#20#desserts#The Baking Pan +Mousse, Chocolate#MousdessDpN4sQ#6.25#20#desserts#Mom's Kitchen +Mousse, Chocolate#Mousdess8FyFT8#5.96#1#desserts#The Baking Pan +Mousse, Blueberry Maple#MousdessacwrkO#7.28#7#desserts#Mom's Kitchen +Mousse, Blueberry Maple#MousdessbiCMFg#7.21#12#desserts#The Baking Pan +Mousse, Chocolate Banana#MousdessIeW4qz#5.13#2#desserts#Mom's Kitchen +Mousse, Chocolate Banana#Mousdess1De9oL#5.08#19#desserts#The Baking Pan +Mousse, Cherry#Mousdesss1bF8H#13.05#20#desserts#Mom's Kitchen +Mousse, Cherry#Mousdess0ujevx#12.43#1#desserts#The Baking Pan +Mousse, Eggnog#MousdessZ38hXj#9.07#10#desserts#Mom's Kitchen +Mousse, Eggnog#Mousdesshs05ST#8.81#8#desserts#The Baking Pan +Mousse, Strawberry#MousdessHCDlBK#5.58#3#desserts#Mom's Kitchen +Mousse, Strawberry#MousdessSZ4PyW#5.36#6#desserts#The Baking Pan +Sherbet, Cantaloupe#Sherdess3DCxUg#3.11#9#desserts#Mom's Kitchen +Sherbet, Cantaloupe#Sherdesscp2VIz#2.99#7#desserts#The Baking Pan +Sherbet, Lemon Milk#Sherdess1JVFOS#7.57#9#desserts#Mom's Kitchen +Sherbet, Lemon Milk#SherdessC865vu#7.57#0#desserts#The Baking Pan +Sherbet, Orange Crush#Sherdess8W8Mb9#4.32#18#desserts#Mom's Kitchen +Sherbet, Orange Crush#SherdessxmVJBF#4.16#10#desserts#The Baking Pan +Sherbet, Blueberry#SherdessFAgxqp#3.46#9#desserts#Mom's Kitchen +Sherbet, Blueberry#SherdessMPL87u#3.60#6#desserts#The Baking Pan +Sherbet, Raspberry#Sherdesse86ugA#6.08#1#desserts#Mom's Kitchen +Sherbet, Raspberry#Sherdesslc1etR#5.85#12#desserts#The Baking Pan +Sherbet, Strawberry#SherdessFwv09m#4.63#17#desserts#Mom's Kitchen +Sherbet, Strawberry#SherdessKB0H7q#4.81#20#desserts#The Baking Pan +Tart, Apple#TartdessrsTyXA#3.35#18#desserts#Mom's Kitchen +Tart, Apple#Tartdessp7pyiy#3.13#11#desserts#The Baking Pan +Tart, Almond#TartdessC7FARL#6.62#10#desserts#Mom's Kitchen +Tart, Almond#Tartdess1V1A1c#6.68#13#desserts#The Baking Pan +Tart, Blueberry#TartdesssQZRXX#10.28#10#desserts#Mom's Kitchen +Tart, Blueberry#TartdessUSJSuc#10.28#9#desserts#The Baking Pan +Tart, Chocolate-Pear#Tartdess2pdOE4#5.67#17#desserts#Mom's Kitchen +Tart, Chocolate-Pear#TartdessL3aEDd#5.51#9#desserts#The Baking Pan +Tart, Lemon Fudge#Tartdess9DhZUT#3.88#3#desserts#Mom's Kitchen +Tart, Lemon Fudge#TartdesshzLOWt#3.96#13#desserts#The Baking Pan +Tart, Pecan#TartdessvSbXzd#11.80#3#desserts#Mom's Kitchen +Tart, Pecan#Tartdess6YXJec#11.04#13#desserts#The Baking Pan +Tart, Pineapple#TartdesseMfJFe#9.01#18#desserts#Mom's Kitchen +Tart, Pineapple#TartdessA2Wftr#8.44#13#desserts#The Baking Pan +Tart, Pear#Tartdess4a1BUc#10.09#2#desserts#Mom's Kitchen +Tart, Pear#TartdessNw8YPG#10.68#5#desserts#The Baking Pan +Tart, Raspberry#TartdessAVnpP6#6.18#7#desserts#Mom's Kitchen +Tart, Raspberry#TartdessfVxZFf#5.95#9#desserts#The Baking Pan +Tart, Strawberry#Tartdess4IUcZW#4.75#8#desserts#Mom's Kitchen +Tart, Strawberry#Tartdess2BeEDb#4.61#17#desserts#The Baking Pan +Tart, Raspberry#TartdesshyBd24#1.85#5#desserts#Mom's Kitchen +Tart, Raspberry#Tartdess5fqxgy#1.94#20#desserts#The Baking Pan +Trifle, Berry#TrifdessmEkbU2#12.48#19#desserts#Mom's Kitchen +Trifle, Berry#TrifdessAV9Ix8#12.60#18#desserts#The Baking Pan +Trifle, American#TrifdesscsdSCd#4.70#17#desserts#Mom's Kitchen +Trifle, American#TrifdessTArskm#4.35#11#desserts#The Baking Pan +Trifle, English#TrifdessX87q8T#8.20#9#desserts#Mom's Kitchen +Trifle, English#Trifdess52l955#8.12#11#desserts#The Baking Pan +Trifle, Orange#TrifdesslUwxwe#9.74#15#desserts#Mom's Kitchen +Trifle, Orange#TrifdessFrfCHP#10.22#1#desserts#The Baking Pan +Trifle, Pumpkin#TrifdessJKFN96#4.72#7#desserts#Mom's Kitchen +Trifle, Pumpkin#TrifdessMNw4EV#4.95#16#desserts#The Baking Pan +Trifle, Scottish#TrifdessFa0JdK#13.63#0#desserts#Mom's Kitchen +Trifle, Scottish#TrifdessAAUQCN#14.03#6#desserts#The Baking Pan +Trifle, Sherry#TrifdesscuttJg#4.42#5#desserts#Mom's Kitchen +Trifle, Sherry#TrifdesspRGpfP#4.21#19#desserts#The Baking Pan +Trifle, Strawberry#TrifdessAd5TpV#3.58#11#desserts#Mom's Kitchen +Trifle, Strawberry#Trifdess1rtW0A#3.58#3#desserts#The Baking Pan +Trifle, Scotch Whiskey#Trifdess2zJsGi#5.44#5#desserts#Mom's Kitchen +Trifle, Scotch Whiskey#TrifdessL8nuI6#5.18#5#desserts#The Baking Pan +Cheesecake, Amaretto#CheedessOJBqfD#11.89#5#desserts#Mom's Kitchen +Cheesecake, Amaretto#CheedessVnDf14#11.89#9#desserts#The Baking Pan +Cheesecake, Apple#Cheedessuks1YK#11.22#15#desserts#Mom's Kitchen +Cheesecake, Apple#CheedessMYKaKK#11.01#14#desserts#The Baking Pan +Cheesecake, Apricot#CheedessKUxTYY#12.34#16#desserts#Mom's Kitchen +Cheesecake, Apricot#CheedessMvB1pr#11.88#18#desserts#The Baking Pan +Cheesecake, Australian#CheedessQ9WAIn#2.70#9#desserts#Mom's Kitchen +Cheesecake, Australian#CheedessE6Jyjc#2.53#14#desserts#The Baking Pan +Cheesecake, Arkansas#CheedessTbqzmw#6.98#9#desserts#Mom's Kitchen +Cheesecake, Arkansas#CheedesstWJZfC#6.66#5#desserts#The Baking Pan +Cheesecake, Blueberry#Cheedessyo51KL#8.07#11#desserts#Mom's Kitchen +Cheesecake, Blueberry#Cheedess4Hz7P4#8.62#5#desserts#The Baking Pan +Cheesecake, Cherry#CheedessEahRkC#4.40#14#desserts#Mom's Kitchen +Cheesecake, Cherry#Cheedess3Nx4jZ#4.65#3#desserts#The Baking Pan +Cheesecake, Cran-Raspberry#CheedessrJsr9i#13.47#20#desserts#Mom's Kitchen +Cheesecake, Cran-Raspberry#CheedesshcuXCy#14.00#6#desserts#The Baking Pan +Cheesecake, German Chocolate#CheedesswayvJL#12.03#16#desserts#Mom's Kitchen +Cheesecake, German Chocolate#CheedessebTAeB#11.58#0#desserts#The Baking Pan +Cheesecake, Turtle#CheedessLqgeIA#12.19#6#desserts#Mom's Kitchen +Cheesecake, Turtle#CheedessvyNohA#12.07#19#desserts#The Baking Pan +Brownies, Apple#BrowdessIDW1Cc#5.44#12#desserts#Mom's Kitchen +Brownies, Apple#BrowdessyRMrAH#5.14#12#desserts#The Baking Pan +Brownies, Fudge#BrowdessmIHIFJ#5.19#8#desserts#Mom's Kitchen +Brownies, Fudge#BrowdessqewJ38#5.10#17#desserts#The Baking Pan +Brownies, Almond Macaroon#BrowdessniK7QI#10.57#3#desserts#Mom's Kitchen +Brownies, Almond Macaroon#BrowdessgkXURH#10.36#17#desserts#The Baking Pan +Brownies, Butterscotch#BrowdesslpA06E#7.16#13#desserts#Mom's Kitchen +Brownies, Butterscotch#BrowdessK5hofE#7.30#6#desserts#The Baking Pan +Brownies, Caramel#BrowdessVGfoA8#3.07#3#desserts#Mom's Kitchen +Brownies, Caramel#Browdess5jvVMM#3.13#11#desserts#The Baking Pan +Brownies, Cherry#Browdessyoa66A#3.39#17#desserts#Mom's Kitchen +Brownies, Cherry#BrowdessIg2JuF#3.39#11#desserts#The Baking Pan +Brownies, Chocolate Chip#Browdessb9dc59#6.18#10#desserts#Mom's Kitchen +Brownies, Chocolate Chip#BrowdessvW4nOx#6.43#14#desserts#The Baking Pan +Brownies, Coconut#BrowdessWPHrVR#3.06#15#desserts#Mom's Kitchen +Brownies, Coconut#BrowdessGVBlML#2.86#11#desserts#The Baking Pan +Brownies, Cream Cheese#Browdess1OyRay#12.74#4#desserts#Mom's Kitchen +Brownies, Cream Cheese#Browdess2fRsNv#12.61#19#desserts#The Baking Pan +Brownies, Fudge Mint#Browdessl7DP7k#11.45#14#desserts#Mom's Kitchen +Brownies, Fudge Mint#Browdessv70VKQ#11.34#16#desserts#The Baking Pan +Brownies, Mint Chip#BrowdessDDMvF7#1.81#15#desserts#Mom's Kitchen +Brownies, Mint Chip#Browdess0j9PBD#1.84#9#desserts#The Baking Pan +Cake, Angel Food#CakedessEaqGaE#11.18#3#desserts#Mom's Kitchen +Cake, Angel Food#CakedessJyAyFe#11.18#14#desserts#The Baking Pan +Cake, Chocolate#CakedessKLXFbn#10.11#7#desserts#Mom's Kitchen +Cake, Chocolate#CakedessfNP5Hg#9.91#14#desserts#The Baking Pan +Cake, Carrot#CakedessUTgMoV#4.20#13#desserts#Mom's Kitchen +Cake, Carrot#CakedessQdkaYg#4.00#3#desserts#The Baking Pan +Cake, Lemon Blueberry#CakedessepkeEW#11.73#16#desserts#Mom's Kitchen +Cake, Lemon Blueberry#CakedessHTKyQs#12.42#16#desserts#The Baking Pan +Cake Triple Fudge#CakedessiZ75lR#7.92#7#desserts#Mom's Kitchen +Cake Triple Fudge#CakedessWRrSXP#8.00#15#desserts#The Baking Pan +Cake, Walnut#CakedessveYVXZ#10.83#17#desserts#Mom's Kitchen +Cake, Walnut#Cakedesse22rT5#11.04#7#desserts#The Baking Pan +Cake, French Apple#CakedessjA2Kxv#1.95#0#desserts#Mom's Kitchen +Cake, French Apple#CakedessNBHCk0#1.86#20#desserts#The Baking Pan +Cake, Fig#CakedessOncX4y#6.82#3#desserts#Mom's Kitchen +Cake, Fig#CakedessTJtffn#7.08#10#desserts#The Baking Pan +Cake, Maple#CakedessnoGPRF#3.04#11#desserts#Mom's Kitchen +Cake, Maple#CakedessfVattM#3.22#4#desserts#The Baking Pan +Cake, Devil's Food#CakedessiXcDCt#4.73#7#desserts#Mom's Kitchen +Cake, Devil's Food#CakedessnBZk45#4.82#6#desserts#The Baking Pan +Cake, Double-Lemon#CakedesskeS0Vd#3.46#9#desserts#Mom's Kitchen +Cake, Double-Lemon#Cakedess50vx53#3.60#6#desserts#The Baking Pan +Sorbet, Blackberry#SorbdessQoa0CE#9.88#15#desserts#Mom's Kitchen +Sorbet, Blackberry#SorbdessqoOYzv#9.78#9#desserts#The Baking Pan diff --git a/examples/cxx/getting_started/vendors.txt b/examples/cxx/getting_started/vendors.txt new file mode 100644 index 00000000..528e1b11 --- /dev/null +++ b/examples/cxx/getting_started/vendors.txt @@ -0,0 +1,6 @@ +TriCounty Produce#309 S. Main Street#Middle Town#MN#55432#763 555 5761#Mort Dufresne#763 555 5765 +Simply Fresh#15612 Bogart Lane#Harrigan#WI#53704#420 333 3912#Cheryl Swedberg#420 333 3952 +Off the Vine#133 American Ct.#Centennial#IA#52002#563 121 3800#Bob King#563 121 3800 x54 +The Pantry#1206 N. Creek Way#Middle Town#MN#55432#763 555 3391#Sully Beckstrom#763 555 3391 +Mom's Kitchen#53 Yerman Ct.#Middle Town#MN#55432#763 554 9200#Maggie Kultgen#763 554 9200 x12 +The Baking Pan#1415 53rd Ave.#Dutchin#MN#56304#320 442 2277#Mike Roan#320 442 6879 diff --git a/examples/cxx/txn_guide/TxnGuide.cpp b/examples/cxx/txn_guide/TxnGuide.cpp new file mode 100644 index 00000000..89bee1fb --- /dev/null +++ b/examples/cxx/txn_guide/TxnGuide.cpp @@ -0,0 +1,399 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +// File TxnGuide.cpp + +#include +#include +#include +#include + +#ifdef _WIN32 +#include +extern "C" { + extern int getopt(int, char * const *, const char *); + extern char *optarg; +} +#define PATHD '\\' + +typedef HANDLE thread_t; +#define thread_create(thrp, attr, func, arg) \ + (((*(thrp) = CreateThread(NULL, 0, \ + (LPTHREAD_START_ROUTINE)(func), (arg), 0, NULL)) == NULL) ? -1 : 0) +#define thread_join(thr, statusp) \ + ((WaitForSingleObject((thr), INFINITE) == WAIT_OBJECT_0) && \ + ((statusp == NULL) ? 0 : \ + (GetExitCodeThread((thr), (LPDWORD)(statusp)) ? 0 : -1))) + +typedef HANDLE mutex_t; +#define mutex_init(m, attr) \ + (((*(m) = CreateMutex(NULL, FALSE, NULL)) != NULL) ? 0 : -1) +#define mutex_lock(m) \ + ((WaitForSingleObject(*(m), INFINITE) == WAIT_OBJECT_0) ? 0 : -1) +#define mutex_unlock(m) (ReleaseMutex(*(m)) ? 0 : -1) +#else +#include +#include +#define PATHD '/' + +typedef pthread_t thread_t; +#define thread_create(thrp, attr, func, arg) \ + pthread_create((thrp), (attr), (func), (arg)) +#define thread_join(thr, statusp) pthread_join((thr), (statusp)) + +typedef pthread_mutex_t mutex_t; +#define mutex_init(m, attr) pthread_mutex_init((m), (attr)) +#define mutex_lock(m) pthread_mutex_lock(m) +#define mutex_unlock(m) pthread_mutex_unlock(m) +#endif + +// Run 5 writers threads at a time. +#define NUMWRITERS 5 + +// Printing of thread_t is implementation-specific, so we +// create our own thread IDs for reporting purposes. +int global_thread_num; +mutex_t thread_num_lock; + +// Forward declarations +int countRecords(Db *, DbTxn *); +int openDb(Db **, const char *, const char *, DbEnv *, u_int32_t); +int usage(void); +void *writerThread(void *); + +// Usage function +int +usage() +{ + std::cerr << " [-h ]" << std::endl; + return (EXIT_FAILURE); +} + +int +main(int argc, char *argv[]) +{ + // Initialize our handles + Db *dbp = NULL; + DbEnv *envp = NULL; + + thread_t writerThreads[NUMWRITERS]; + int ch, i; + u_int32_t envFlags; + char *dbHomeDir; + extern char *optarg; + + // Application name + const char *progName = "TxnGuide"; + + // Database file name + const char *fileName = "mydb.db"; + + // Parse the command line arguments +#ifdef _WIN32 + dbHomeDir = ".\\"; +#else + dbHomeDir = (char *)"./"; +#endif + while ((ch = getopt(argc, argv, "h:")) != EOF) + switch (ch) { + case 'h': + dbHomeDir = optarg; + break; + case '?': + default: + return (usage()); + } + + + // Env open flags + envFlags = + DB_CREATE | // Create the environment if it does not exist + DB_RECOVER | // Run normal recovery. + DB_INIT_LOCK | // Initialize the locking subsystem + DB_INIT_LOG | // Initialize the logging subsystem + DB_INIT_TXN | // Initialize the transactional subsystem. This + // also turns on logging. + DB_INIT_MPOOL | // Initialize the memory pool (in-memory cache) + DB_THREAD; // Cause the environment to be free-threaded + + try { + // Create and open the environment + envp = new DbEnv(0); + + // Indicate that we want db to internally perform deadlock + // detection. Also indicate that the transaction with + // the fewest number of write locks will receive the + // deadlock notification in the event of a deadlock. + envp->set_lk_detect(DB_LOCK_MINWRITE); + + envp->open(dbHomeDir, envFlags, 0); + + + // If we had utility threads (for running checkpoints or + // deadlock detection, for example) we would spawn those + // here. However, for a simple example such as this, + // that is not required. + + // Open the database + openDb(&dbp, progName, fileName, + envp, DB_DUPSORT); + + // Initialize a mutex. Used to help provide thread ids. + (void)mutex_init(&thread_num_lock, NULL); + + // Start the writer threads. + for (i = 0; i < NUMWRITERS; i++) + (void)thread_create( + &writerThreads[i], NULL, + writerThread, (void *)dbp); + + // Join the writers + for (i = 0; i < NUMWRITERS; i++) + (void)thread_join(writerThreads[i], NULL); + + } catch(DbException &e) { + std::cerr << "Error opening database environment: " + << dbHomeDir << std::endl; + std::cerr << e.what() << std::endl; + return (EXIT_FAILURE); + } + + try { + // Close our database handle if it was opened. + if (dbp != NULL) + dbp->close(0); + + // Close our environment if it was opened. + if (envp != NULL) + envp->close(0); + } catch(DbException &e) { + std::cerr << "Error closing database and environment." + << std::endl; + std::cerr << e.what() << std::endl; + return (EXIT_FAILURE); + } + + // Final status message and return. + + std::cout << "I'm all done." << std::endl; + return (EXIT_SUCCESS); +} + +// A function that performs a series of writes to a +// Berkeley DB database. The information written +// to the database is largely nonsensical, but the +// mechanism of transactional commit/abort and +// deadlock detection is illustrated here. +void * +writerThread(void *args) +{ + int j, thread_num; + int max_retries = 20; // Max retry on a deadlock + const char *key_strings[] = {"key 1", "key 2", "key 3", "key 4", + "key 5", "key 6", "key 7", "key 8", + "key 9", "key 10"}; + + Db *dbp = (Db *)args; + DbEnv *envp = dbp->get_env(); + + // Get the thread number + (void)mutex_lock(&thread_num_lock); + global_thread_num++; + thread_num = global_thread_num; + (void)mutex_unlock(&thread_num_lock); + + // Initialize the random number generator + srand(thread_num); + + // Perform 50 transactions + for (int i=0; i<50; i++) { + DbTxn *txn; + bool retry = true; + int retry_count = 0; + // while loop is used for deadlock retries + while (retry) { + // try block used for deadlock detection and + // general db exception handling + try { + + // Begin our transaction. We group multiple writes in + // this thread under a single transaction so as to + // (1) show that you can atomically perform multiple + // writes at a time, and (2) to increase the chances + // of a deadlock occurring so that we can observe our + // deadlock detection at work. + + // Normally we would want to avoid the potential for + // deadlocks, so for this workload the correct thing + // would be to perform our puts with autocommit. But + // that would excessively simplify our example, so we + // do the "wrong" thing here instead. + txn = NULL; + envp->txn_begin(NULL, &txn, 0); + + // Perform the database write for this transaction. + for (j = 0; j < 10; j++) { + Dbt key, value; + key.set_data((void *)key_strings[j]); + key.set_size((u_int32_t)strlen(key_strings[j]) + 1); + + int payload = rand() + i; + value.set_data(&payload); + value.set_size(sizeof(int)); + + // Perform the database put + dbp->put(txn, &key, &value, 0); + } + + // countRecords runs a cursor over the entire database. + // We do this to illustrate issues of deadlocking + std::cout << thread_num << " : Found " + << countRecords(dbp, NULL) + << " records in the database." << std::endl; + + std::cout << thread_num << " : committing txn : " << i + << std::endl; + + // commit + try { + txn->commit(0); + retry = false; + txn = NULL; + } catch (DbException &e) { + std::cout << "Error on txn commit: " + << e.what() << std::endl; + } + } catch (DbDeadlockException &) { + // First thing that we MUST do is abort the transaction. + if (txn != NULL) + (void)txn->abort(); + + // Now we decide if we want to retry the operation. + // If we have retried less than max_retries, + // increment the retry count and goto retry. + if (retry_count < max_retries) { + std::cout << "############### Writer " << thread_num + << ": Got DB_LOCK_DEADLOCK.\n" + << "Retrying write operation." + << std::endl; + retry_count++; + retry = true; + } else { + // Otherwise, just give up. + std::cerr << "Writer " << thread_num + << ": Got DeadLockException and out of " + << "retries. Giving up." << std::endl; + retry = false; + } + } catch (DbException &e) { + std::cerr << "db put failed" << std::endl; + std::cerr << e.what() << std::endl; + if (txn != NULL) + txn->abort(); + retry = false; + } catch (std::exception &ee) { + std::cerr << "Unknown exception: " << ee.what() << std::endl; + return (0); + } + } + } + return (0); +} + + +// This simply counts the number of records contained in the +// database and returns the result. You can use this method +// in three ways: +// +// First call it with an active txn handle. +// Secondly, configure the cursor for uncommitted reads +// +// Third, call countRecords AFTER the writer has committed +// its transaction. +// +// If you do none of these things, the writer thread will +// self-deadlock. +// +// Note that this method exists only for illustrative purposes. +// A more straight-forward way to count the number of records in +// a database is to use the Database.getStats() method. +int +countRecords(Db *dbp, DbTxn *txn) +{ + + Dbc *cursorp = NULL; + int count = 0; + + try { + // Get the cursor + dbp->cursor(txn, &cursorp, DB_READ_UNCOMMITTED); + + Dbt key, value; + while (cursorp->get(&key, &value, DB_NEXT) == 0) { + count++; + } + } catch (DbDeadlockException &de) { + std::cerr << "countRecords: got deadlock" << std::endl; + cursorp->close(); + throw de; + } catch (DbException &e) { + std::cerr << "countRecords error:" << std::endl; + std::cerr << e.what() << std::endl; + } + + if (cursorp != NULL) { + try { + cursorp->close(); + } catch (DbException &e) { + std::cerr << "countRecords: cursor close failed:" << std::endl; + std::cerr << e.what() << std::endl; + } + } + + return (count); +} + + +// Open a Berkeley DB database +int +openDb(Db **dbpp, const char *progname, const char *fileName, + DbEnv *envp, u_int32_t extraFlags) +{ + int ret; + u_int32_t openFlags; + + try { + Db *dbp = new Db(envp, 0); + + // Point to the new'd Db + *dbpp = dbp; + + if (extraFlags != 0) + ret = dbp->set_flags(extraFlags); + + // Now open the database */ + openFlags = DB_CREATE | // Allow database creation + DB_READ_UNCOMMITTED | // Allow uncommitted reads + DB_AUTO_COMMIT | // Allow autocommit + DB_THREAD; // Cause the database to be free-threaded + + dbp->open(NULL, // Txn pointer + fileName, // File name + NULL, // Logical db name + DB_BTREE, // Database type (using btree) + openFlags, // Open flags + 0); // File mode. Using defaults + } catch (DbException &e) { + std::cerr << progname << ": openDb: db open failed:" << std::endl; + std::cerr << e.what() << std::endl; + return (EXIT_FAILURE); + } + + return (EXIT_SUCCESS); +} + diff --git a/examples/cxx/txn_guide/TxnGuideInMemory.cpp b/examples/cxx/txn_guide/TxnGuideInMemory.cpp new file mode 100644 index 00000000..ed684535 --- /dev/null +++ b/examples/cxx/txn_guide/TxnGuideInMemory.cpp @@ -0,0 +1,380 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +// File TxnGuideInMemory.cpp + +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#define PATHD '\\' +extern "C" { + extern int getopt(int, char * const *, const char *); + extern char *optarg; +} + +typedef HANDLE thread_t; +#define thread_create(thrp, attr, func, arg) \ + (((*(thrp) = CreateThread(NULL, 0, \ + (LPTHREAD_START_ROUTINE)(func), (arg), 0, NULL)) == NULL) ? -1 : 0) +#define thread_join(thr, statusp) \ + ((WaitForSingleObject((thr), INFINITE) == WAIT_OBJECT_0) && \ + ((statusp == NULL) ? 0 : \ + (GetExitCodeThread((thr), (LPDWORD)(statusp)) ? 0 : -1))) + +typedef HANDLE mutex_t; +#define mutex_init(m, attr) \ + (((*(m) = CreateMutex(NULL, FALSE, NULL)) != NULL) ? 0 : -1) +#define mutex_lock(m) \ + ((WaitForSingleObject(*(m), INFINITE) == WAIT_OBJECT_0) ? 0 : -1) +#define mutex_unlock(m) (ReleaseMutex(*(m)) ? 0 : -1) +#else +#include +#include +#define PATHD '/' + +typedef pthread_t thread_t; +#define thread_create(thrp, attr, func, arg) \ + pthread_create((thrp), (attr), (func), (arg)) +#define thread_join(thr, statusp) pthread_join((thr), (statusp)) + +typedef pthread_mutex_t mutex_t; +#define mutex_init(m, attr) pthread_mutex_init((m), (attr)) +#define mutex_lock(m) pthread_mutex_lock(m) +#define mutex_unlock(m) pthread_mutex_unlock(m) +#endif + +// Run 5 writers threads at a time. +#define NUMWRITERS 5 + +// Printing of pthread_t is implementation-specific, so we +// create our own thread IDs for reporting purposes. +int global_thread_num; +mutex_t thread_num_lock; + +// Forward declarations +int countRecords(Db *, DbTxn *); +int openDb(Db **, const char *, const char *, DbEnv *, u_int32_t); +int usage(void); +void *writerThread(void *); + +int +main(void) +{ + // Initialize our handles + Db *dbp = NULL; + DbEnv *envp = NULL; + + thread_t writerThreads[NUMWRITERS]; + int i; + u_int32_t envFlags; + + // Application name + const char *progName = "TxnGuideInMemory"; + + // Env open flags + envFlags = + DB_CREATE | // Create the environment if it does not exist + DB_RECOVER | // Run normal recovery. + DB_INIT_LOCK | // Initialize the locking subsystem + DB_INIT_LOG | // Initialize the logging subsystem + DB_INIT_TXN | // Initialize the transactional subsystem. This + // also turns on logging. + DB_INIT_MPOOL | // Initialize the memory pool (in-memory cache) + DB_PRIVATE | // Region files are not backed by the filesystem. + // Instead, they are backed by heap memory. + DB_THREAD; // Cause the environment to be free-threaded + + try { + // Create the environment + envp = new DbEnv(0); + + // Specify in-memory logging + envp->log_set_config(DB_LOG_IN_MEMORY, 1); + + // Specify the size of the in-memory log buffer. + envp->set_lg_bsize(10 * 1024 * 1024); + + // Specify the size of the in-memory cache + envp->set_cachesize(0, 10 * 1024 * 1024, 1); + + // Indicate that we want db to internally perform deadlock + // detection. Also indicate that the transaction with + // the fewest number of write locks will receive the + // deadlock notification in the event of a deadlock. + envp->set_lk_detect(DB_LOCK_MINWRITE); + + // Open the environment + envp->open(NULL, envFlags, 0); + + // If we had utility threads (for running checkpoints or + // deadlock detection, for example) we would spawn those + // here. However, for a simple example such as this, + // that is not required. + + // Open the database + openDb(&dbp, progName, NULL, + envp, DB_DUPSORT); + + // Initialize a mutex. Used to help provide thread ids. + (void)mutex_init(&thread_num_lock, NULL); + + // Start the writer threads. + for (i = 0; i < NUMWRITERS; i++) + (void)thread_create( + &writerThreads[i], NULL, + writerThread, + (void *)dbp); + + // Join the writers + for (i = 0; i < NUMWRITERS; i++) + (void)thread_join(writerThreads[i], NULL); + + } catch(DbException &e) { + std::cerr << "Error opening database environment: " + << std::endl; + std::cerr << e.what() << std::endl; + return (EXIT_FAILURE); + } + + try { + // Close our database handle if it was opened. + if (dbp != NULL) + dbp->close(0); + + // Close our environment if it was opened. + if (envp != NULL) + envp->close(0); + } catch(DbException &e) { + std::cerr << "Error closing database and environment." + << std::endl; + std::cerr << e.what() << std::endl; + return (EXIT_FAILURE); + } + + // Final status message and return. + + std::cout << "I'm all done." << std::endl; + return (EXIT_SUCCESS); +} + +// A function that performs a series of writes to a +// Berkeley DB database. The information written +// to the database is largely nonsensical, but the +// mechanism of transactional commit/abort and +// deadlock detection is illustrated here. +void * +writerThread(void *args) +{ + int j, thread_num; + int max_retries = 20; // Max retry on a deadlock + const char *key_strings[] = {"key 1", "key 2", "key 3", "key 4", + "key 5", "key 6", "key 7", "key 8", + "key 9", "key 10"}; + + Db *dbp = (Db *)args; + DbEnv *envp = dbp->get_env(); + + // Get the thread number + (void)mutex_lock(&thread_num_lock); + global_thread_num++; + thread_num = global_thread_num; + (void)mutex_unlock(&thread_num_lock); + + // Initialize the random number generator + srand(thread_num); + + // Perform 50 transactions + for (int i=0; i<50; i++) { + DbTxn *txn; + bool retry = true; + int retry_count = 0; + // while loop is used for deadlock retries + while (retry) { + // try block used for deadlock detection and + // general db exception handling + try { + + // Begin our transaction. We group multiple writes in + // this thread under a single transaction so as to + // (1) show that you can atomically perform multiple + // writes at a time, and (2) to increase the chances + // of a deadlock occurring so that we can observe our + // deadlock detection at work. + + // Normally we would want to avoid the potential for + // deadlocks, so for this workload the correct thing + // would be to perform our puts with autocommit. But + // that would excessively simplify our example, so we + // do the "wrong" thing here instead. + txn = NULL; + envp->txn_begin(NULL, &txn, 0); + + // Perform the database write for this transaction. + for (j = 0; j < 10; j++) { + Dbt key, value; + key.set_data((void *)key_strings[j]); + key.set_size((u_int32_t)strlen(key_strings[j]) + 1); + + int payload = rand() + i; + value.set_data(&payload); + value.set_size(sizeof(int)); + + // Perform the database put + dbp->put(txn, &key, &value, 0); + } + + // countRecords runs a cursor over the entire database. + // We do this to illustrate issues of deadlocking + std::cout << thread_num << " : Found " + << countRecords(dbp, txn) + << " records in the database." << std::endl; + + std::cout << thread_num << " : committing txn : " << i + << std::endl; + + // commit + try { + txn->commit(0); + retry = false; + txn = NULL; + } catch (DbException &e) { + std::cout << "Error on txn commit: " + << e.what() << std::endl; + } + } catch (DbDeadlockException &) { + // First thing that we MUST do is abort the transaction. + if (txn != NULL) + (void)txn->abort(); + + // Now we decide if we want to retry the operation. + // If we have retried less than max_retries, + // increment the retry count and goto retry. + if (retry_count < max_retries) { + std::cerr << "############### Writer " << thread_num + << ": Got DB_LOCK_DEADLOCK.\n" + << "Retrying write operation." << std::endl; + retry_count++; + retry = true; + } else { + // Otherwise, just give up. + std::cerr << "Writer " << thread_num + << ": Got DeadLockException and out of " + << "retries. Giving up." << std::endl; + retry = false; + } + } catch (DbException &e) { + std::cerr << "db put failed" << std::endl; + std::cerr << e.what() << std::endl; + if (txn != NULL) + txn->abort(); + retry = false; + } catch (std::exception &ee) { + std::cerr << "Unknown exception: " << ee.what() << std::endl; + return (0); + } + } + } + return (0); +} + + +// This simply counts the number of records contained in the +// database and returns the result. You can use this method +// in three ways: +// +// First call it with an active txn handle. +// +// Secondly, configure the cursor for uncommitted reads +// +// Third, call countRecords AFTER the writer has committed +// its transaction. +// +// If you do none of these things, the writer thread will +// self-deadlock. +// +// Note that this method exists only for illustrative purposes. +// A more straight-forward way to count the number of records in +// a database is to use the Database.getStats() method. +int +countRecords(Db *dbp, DbTxn *txn) +{ + + Dbc *cursorp = NULL; + int count = 0; + + try { + // Get the cursor + dbp->cursor(txn, &cursorp, 0); + + Dbt key, value; + while (cursorp->get(&key, &value, DB_NEXT) == 0) { + count++; + } + } catch (DbDeadlockException &de) { + std::cerr << "countRecords: got deadlock" << std::endl; + cursorp->close(); + throw de; + } catch (DbException &e) { + std::cerr << "countRecords error:" << std::endl; + std::cerr << e.what() << std::endl; + } + + if (cursorp != NULL) { + try { + cursorp->close(); + } catch (DbException &e) { + std::cerr << "countRecords: cursor close failed:" << std::endl; + std::cerr << e.what() << std::endl; + } + } + + return (count); +} + + +// Open a Berkeley DB database +int +openDb(Db **dbpp, const char *progname, const char *fileName, + DbEnv *envp, u_int32_t extraFlags) +{ + int ret; + u_int32_t openFlags; + + try { + Db *dbp = new Db(envp, 0); + + // Point to the new'd Db + *dbpp = dbp; + + if (extraFlags != 0) + ret = dbp->set_flags(extraFlags); + + // Now open the database */ + openFlags = DB_CREATE | // Allow database creation + DB_THREAD | + DB_AUTO_COMMIT; // Allow autocommit + + dbp->open(NULL, // Txn pointer + fileName, // File name + NULL, // Logical db name + DB_BTREE, // Database type (using btree) + openFlags, // Open flags + 0); // File mode. Using defaults + } catch (DbException &e) { + std::cerr << progname << ": openDb: db open failed:" << std::endl; + std::cerr << e.what() << std::endl; + return (EXIT_FAILURE); + } + + return (EXIT_SUCCESS); +} + diff --git a/examples/stl/README b/examples/stl/README new file mode 100644 index 00000000..43f568b5 --- /dev/null +++ b/examples/stl/README @@ -0,0 +1,41 @@ +# $Id$ + +StlAccessExample.cpp Simple Database Access. + + Exstl_access uses STL simple features based on the DB access methods. + + Build: make exstl_access + + +repquote/ Replication. + + Exstl_repquote creates a toy stock quote server + with DB's single-master, multiple-client replication + with communication over TCP, via STL API. See repquote/README. + + Build: make exstl_repquote + + +StlTransactionGuideExample.cpp Multithreaded DB Access. + + StlTxnGuide runs multiple threads to access databases via STL API. + + Build: make StlTxnGuide + + +StlTpcbExample.cpp TPC/B. + + Exstl_tpcb sets up a framework in which to run a TPC/B test + via the STL API. + + Database initialization (the -i flag) and running the + benchmark (-n flag) must take place separately (i.e., + first create the database, then run 1 or more copies of + the benchmark). Furthermore, when running more than one + TPCB process, it is necessary to run the deadlock detector + (db_deadlock), since it is possible for concurrent tpcb + processes to deadlock. For performance measurement, it + will also be beneficial to run the db_checkpoint process + as well. + + Build: make exstl_tpcb diff --git a/examples/stl/StlAccessExample.cpp b/examples/stl/StlAccessExample.cpp new file mode 100644 index 00000000..97d2b5fa --- /dev/null +++ b/examples/stl/StlAccessExample.cpp @@ -0,0 +1,143 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2008, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include + +#include +#include +#include +#include +#include +#include + +#include "dbstl_map.h" +#include "dbstl_vector.h" + +using namespace std; +using namespace dbstl; + +#ifdef _WIN32 +extern "C" { + extern int getopt(int, char * const *, const char *); + extern int optind; +} +#else +#include +#endif + +#include + +using std::cin; +using std::cout; +using std::cerr; + +class AccessExample +{ +public: + AccessExample(); + void run(); + +private: + // no need for copy and assignment + AccessExample(const AccessExample &); + void operator = (const AccessExample &); +}; + +int +usage() +{ + (void)fprintf(stderr, "usage: AccessExample"); + return (EXIT_FAILURE); +} + +int +main(int argc, char *argv[]) +{ + // Use a try block just to report any errors. + // An alternate approach to using exceptions is to + // use error models (see DbEnv::set_error_model()) so + // that error codes are returned for all Berkeley DB methods. + // + try { + AccessExample app; + app.run(); + return (EXIT_SUCCESS); + } + catch (DbException &dbe) { + cerr << "AccessExample: " << dbe.what() << "\n"; + return (EXIT_FAILURE); + } +} + +AccessExample::AccessExample() +{ +} + +void AccessExample::run() +{ + typedef db_map > strmap_t; + // Create a map container with inmemory anonymous database. + strmap_t dbmap; + + // Insert records into dbmap, where the key is the user + // input and the data is the user input in reverse order. + // + char buf[1024], rbuf[1024]; + char *p, *t; + u_int32_t len; + + for (;;) { + // Acquire user input string as key. + cout << "input> "; + cout.flush(); + + cin.getline(buf, sizeof(buf)); + if (cin.eof()) + break; + + if ((len = (u_int32_t)strlen(buf)) <= 0) + continue; + if (strcmp(buf, "quit") == 0) + break; + + // Reverse input string as data. + for (t = rbuf, p = buf + (len - 1); p >= buf;) + *t++ = *p--; + *t++ = '\0'; + + + // Insert key/data pair. + try { + dbmap.insert(make_pair(buf, rbuf)); + } catch (DbException ex) { + if (ex.get_errno() == DB_KEYEXIST) { + cout << "Key " << buf << " already exists.\n"; + } else + throw; + } catch (...) { + throw; + } + } + cout << "\n"; + + // We put a try block around this section of code + // to ensure that our database is properly closed + // in the event of an error. + // + try { + strmap_t::iterator itr; + for (itr = dbmap.begin(); + itr != dbmap.end(); ++itr) + cout<first<<" : "<second< m_int_t; + typedef ptint tpint; + ~StlAdvancedFeaturesExample(); + StlAdvancedFeaturesExample(void *param1); + void run() + { + arbitrary_object_storage(); + storing_std_strings(); + secondary_containers(); + char_star_string_storage(); + arbitray_sequence_storage(); + bulk_retrieval_read(); + primitive(); + queue_stack(); + } +private: + + // Store primitive types of data into dbstl containers. + void primitive(); + + // Use std::queue and std::stack as adapters, and dbstl::db_vector + // as container, to make a queue and a stack. + void queue_stack(); + + // Use two ways to store an object of arbitrary length. The object + // contains some varying length members, char* string for example. + void arbitrary_object_storage(); + + // Store std::string types of strings. + void storing_std_strings(); + + // Open a secondary db H and associate it with an exisitng db handle + // which is being used by a container C1, then use H to create another + // container C2, verify we can get C1's data via C2. + // This technique works for all types of db and containers. + void secondary_containers(); + + // Direct storage of char* strings. + void char_star_string_storage(); + + // Storage of arbitrary element type of sequence. + void arbitray_sequence_storage(); + + // Reading with bulk retrieval flag. + void bulk_retrieval_read(); + + + int flags, setflags, explicit_txn, test_autocommit, n; + DBTYPE dbtype; + dm_int_t::difference_type oddcnt; + Db *db3, *dmdb1, *dmdb2, *dmmdb1, *dmmdb2, *dmsdb1, + *dmsdb2, *dmmsdb1, *dmmsdb2, *dbstrv, *pquedb, *quedb; + Db *dbp3; + Db *dmdb6; + Db *dbp3sec; + Db *dmmdb4, *dbstrmap; + Db *dmstringdb; + Db *dbprim; + DbEnv *penv; + u_int32_t dboflags; + + map m1; + multimap mm1; +}; + +StlAdvancedFeaturesExample::~StlAdvancedFeaturesExample() +{ +} + +StlAdvancedFeaturesExample::StlAdvancedFeaturesExample(void *param1) +{ + check_expr(param1 != NULL); + TestParam *param = (TestParam*)param1; + TestParam *ptp = param; + + flags = 0, setflags = 0, explicit_txn = 1, test_autocommit = 0; + dbtype = DB_BTREE; + penv = param->dbenv; + dmdb1 = dmdb2 = dmmdb1 = dmmdb2 = dmsdb1 = dmsdb2 = dmmsdb1 = + dmmsdb2 = dbstrv = NULL; + flags = param->flags; + dbtype = param->dbtype; + setflags = param->setflags; + test_autocommit = param->test_autocommit; + explicit_txn = param->explicit_txn; + dboflags = ptp->dboflags; + n = 10; + + dmdb1 = dbstl::open_db(penv, "db_map.db", + dbtype, DB_CREATE | ptp->dboflags, 0); + dmdb2 = dbstl::open_db(penv, "db_map2.db", + dbtype, DB_CREATE | ptp->dboflags, 0); + dmdb6 = dbstl::open_db(penv, "db_map6.db", + dbtype, DB_CREATE | ptp->dboflags, 0); + + dmmdb1 = dbstl::open_db(penv, + "db_multimap.db", dbtype, DB_CREATE | ptp->dboflags, DB_DUP); + dmmdb2 = dbstl::open_db(penv, + "db_multimap2.db", dbtype, DB_CREATE | ptp->dboflags, DB_DUP); + + dmsdb1 = dbstl::open_db(penv, "db_set.db", + dbtype, DB_CREATE | ptp->dboflags, 0); + dmsdb2 = dbstl::open_db(penv, "db_set2.db", + dbtype, DB_CREATE | ptp->dboflags, 0); + + dmmsdb1 = dbstl::open_db(penv, + "db_multiset.db", dbtype, DB_CREATE | ptp->dboflags, DB_DUP); + dmmsdb2 = dbstl::open_db(penv, + "db_multiset2.db", dbtype, DB_CREATE | ptp->dboflags, DB_DUP); + + dbstrv = dbstl::open_db(penv, "dbstr.db", + DB_RECNO, DB_CREATE | ptp->dboflags, DB_RENUMBER); + dbp3sec = dbstl::open_db(penv, "db_map_sec.db", + dbtype, DB_CREATE | ptp->dboflags, DB_DUP); + + dmmdb4 = dbstl::open_db(penv, + "db_multimap4.db", dbtype, DB_CREATE | dboflags, DB_DUPSORT); + dbstrmap = dbstl::open_db(penv, "dbstrmap.db", + DB_BTREE, DB_CREATE, 0); + + dmstringdb = dbstl::open_db(penv, "db_map_stringdb.db", + dbtype, DB_CREATE | dboflags, 0); + db3 = dbstl::open_db(penv, "db3.db", + DB_RECNO, DB_CREATE | ptp->dboflags, DB_RENUMBER); + + // NO DB_RENUMBER needed + quedb = dbstl::open_db(penv, "dbquedb.db", + DB_RECNO, DB_CREATE | ptp->dboflags | DB_THREAD, 0); + + pquedb = dbstl::open_db(penv, "dbpquedb.db", + DB_RECNO, DB_CREATE | ptp->dboflags | DB_THREAD, DB_RENUMBER); + dbprim = dbstl::open_db(penv, "dbprim.db", + DB_RECNO, DB_CREATE | ptp->dboflags | DB_THREAD, DB_RENUMBER); + + dbp3 = dbstl::open_db(penv, "dbp3.db", + dbtype, DB_CREATE | ptp->dboflags, 0); + +} + +void StlAdvancedFeaturesExample::arbitrary_object_storage() +{ + int i; + + if (explicit_txn) + begin_txn(0, penv); + // varying length objects test + cout<<"\nArbitary object storage using Dbt..\n"; + + rand_str_dbt smsdbt; + DbstlDbt dbt, dbtmsg; + string msgstr; + SMSMsg *smsmsgs[10]; + + dbtmsg.set_flags(DB_DBT_USERMEM); + dbt.set_data(DbstlMalloc(256)); + dbt.set_flags(DB_DBT_USERMEM); + dbt.set_ulen(256); + db_map msgmap(dbp3, penv); + for (i = 0; i < 10; i++) { + smsdbt(dbt, msgstr, 10, 200); + SMSMsg *pmsg = SMSMsg::make_sms_msg(time(NULL), + (char *)dbt.get_data(), i); + smsmsgs[i] = SMSMsg::make_sms_msg(time(NULL), + (char *)dbt.get_data(), i); + dbtmsg.set_data(pmsg); + dbtmsg.set_ulen((u_int32_t)(pmsg->mysize)); + dbtmsg.set_size((u_int32_t)(pmsg->mysize)); + dbtmsg.set_flags(DB_DBT_USERMEM); + msgmap.insert(make_pair(i, dbtmsg)); + free(pmsg); + memset(&dbtmsg, 0, sizeof(dbtmsg)); + } + dbtmsg.set_data(NULL); + + SMSMsg *psmsmsg; + for (i = 0; i < 10; i++) { + db_map::data_type_wrap msgref = msgmap[i]; + psmsmsg = (SMSMsg *)msgref.get_data(); + check_expr(memcmp(smsmsgs[i], psmsmsg, + smsmsgs[i]->mysize) == 0); + } + + i = 0; + for (db_map::iterator msgitr = + msgmap.begin(ReadModifyWriteOption:: + read_modify_write()); msgitr != msgmap.end(); ++msgitr, i++) { + db_map::reference smsmsg = *msgitr; + (((SMSMsg*)(smsmsg.second.get_data())))->when = time(NULL); + smsmsg.second._DB_STL_StoreElement(); + + } + + for (i = 0; i < 10; i++) + free(smsmsgs[i]); + + msgmap.clear(); + + + cout<<"\nArbitary object(sparse, varying length) storage support using registered callbacks.\n"; + db_map msgmap2(dbp3, penv); + SMSMsg2 smsmsgs2[10]; + DbstlElemTraits::instance()->set_copy_function(SMSMsgCopy); + DbstlElemTraits::instance()->set_size_function(SMSMsgSize); + DbstlElemTraits::instance()->set_restore_function(SMSMsgRestore); + // use new technique to store varying length and inconsecutive objs + for (i = 0; i < 10; i++) { + smsdbt(dbt, msgstr, 10, 200); + SMSMsg2 msg2(time(NULL), msgstr.c_str(), i); + smsmsgs2[i] = msg2; + + msgmap2.insert(make_pair(i, msg2)); + + } + + // check that retrieved data is identical to stored data + SMSMsg2 tmpmsg2; + for (i = 0; i < 10; i++) { + tmpmsg2 = msgmap2[i]; + check_expr(smsmsgs2[i] == tmpmsg2); + } + for (db_map::iterator msgitr = + msgmap2.begin(ReadModifyWriteOption:: + read_modify_write()); msgitr != msgmap2.end(); msgitr++) { + db_map::reference smsmsg = *msgitr; + smsmsg.second.when = time(NULL); + smsmsg.second._DB_STL_StoreElement(); + + } + msgmap2.clear(); + if (explicit_txn) + commit_txn(penv); +} // arbitrary_object_storage + +// std::string persistent test. +void StlAdvancedFeaturesExample::storing_std_strings() +{ + string kstring = "hello world", *sstring = new string("hi there"); + if (explicit_txn) + begin_txn(0, penv); + + db_map pmap(dmstringdb, NULL); + + pmap[kstring] = *sstring + "!"; + *sstring = pmap[kstring]; + map spmap; + spmap.insert(make_pair(kstring, *sstring)); + cout<<"sstring append ! is : "<::iterator ii = pmap.begin(); + ii != pmap.end(); + ++ii) { + cout << (*ii).first << ": " << (*ii).second << endl; + } + close_db(dmstringdb); + + dmstringdb = dbstl::open_db(penv, "db_map_stringdb.db", + dbtype, DB_CREATE | dboflags, 0); + db_map pmap2(dmstringdb, NULL); + for (db_map::iterator ii = pmap2.begin(); + ii != pmap2.end(); ++ii) { + cout << (*ii).first << ": " << (*ii).second << endl; + // assert key/data pair set equal + check_expr((spmap.count(ii->first) == 1) && + (spmap[ii->first] == ii->second)); + } + if (explicit_txn) + commit_txn(penv); + + db_vector strvctor(10); + vector sstrvctor(10); + for (int i = 0; i < 10; i++) { + strvctor[i] = "abc"; + sstrvctor[i] = strvctor[i]; + } + check_expr(is_equal(strvctor, sstrvctor)); +} + +void StlAdvancedFeaturesExample::secondary_containers() +{ + int i; + + if (explicit_txn) + begin_txn(0, penv); + // test secondary db + cout<<"\ndb container backed by secondary database."; + + dbp3->associate(dbstl::current_txn(penv), dbp3sec, + get_dest_secdb_callback, DB_CREATE); + typedef db_multimap sec_mmap_t; + sec_mmap_t secmmap(dbp3sec, penv);// index "to" field + db_map basemsgs(dbp3, penv); + basemsgs.clear(); + BaseMsg tmpmsg; + multiset bsmsgs, bsmsgs2; + multiset::iterator bsitr1, bsitr2; + // populate primary and sec db + for (i = 0; i < 10; i++) { + tmpmsg.when = time(NULL); + tmpmsg.to = 100 - i % 3;// sec index multiple + tmpmsg.from = i + 20; + bsmsgs.insert( tmpmsg); + basemsgs.insert(make_pair(i, tmpmsg)); + + } + check_expr(basemsgs.size() == 10); + // check retrieved data is identical to those fed in + sec_mmap_t::iterator itrsec; + for (itrsec = secmmap.begin( + ReadModifyWriteOption::no_read_modify_write(), true); + itrsec != secmmap.end(); itrsec++) { + bsmsgs2.insert(itrsec->second); + } + for (bsitr1 = bsmsgs.begin(), bsitr2 = bsmsgs2.begin(); + bsitr1 != bsmsgs.end() && bsitr2 != bsmsgs2.end(); + bsitr1++, bsitr2++) { + check_expr(*bsitr1 == *bsitr2); + } + check_expr(bsitr1 == bsmsgs.end() && bsitr2 == bsmsgs2.end()); + + // search using sec index, check the retrieved data is expected + // and exists in bsmsgs + check_expr(secmmap.size() == 10); + pair secrg = + secmmap.equal_range(98); + + for (itrsec = secrg.first; itrsec != secrg.second; itrsec++) { + check_expr(itrsec->second.to == 98 && + bsmsgs.count(itrsec->second) > 0); + } + // delete via sec db + size_t nersd = secmmap.erase(98); + check_expr(10 - nersd == basemsgs.size()); + secrg = secmmap.equal_range(98); + check_expr(secrg.first == secrg.second); + + if (explicit_txn) + dbstl::commit_txn(penv); + +} // secondary_containers + +void StlAdvancedFeaturesExample::char_star_string_storage() +{ + int i; + // Varying length data element storage/retrieval + cout<<"\nchar*/wchar_t* string storage support...\n"; + + if (explicit_txn) + dbstl::begin_txn(0, penv); + // Use Dbt to wrap any object and store them. This is rarely needed, + // so this piece of code is only for test purpose. + db_vector strv(dbstrv, penv); + vector strsv; + vector strvdbts; + strv.clear(); + + int strlenmax = 256, strlenmin = 64; + string str; + DbstlDbt dbt; + rand_str_dbt rand_str_maker; + dbt.set_flags(DB_DBT_USERMEM); + dbt.set_data(DbstlMalloc(strlenmax + 10)); + dbt.set_ulen(strlenmax + 10); + + for (int jj = 0; jj < 10; jj++) { + rand_str_maker(dbt, str, strlenmin, strlenmax); + strsv.push_back(str); + strv.push_back(dbt); + } + + cout<<"\nstrings:\n"; + for (i = 0; i < 10; i++) { + db_vector::value_type_wrap elemref = strv[i]; + strvdbts.push_back(elemref); + printf("\n%s\n%s", (char*)(strvdbts[i].get_data()), + strsv[i].c_str()); + check_expr(strcmp((char*)(elemref.get_data()), + strsv[i].c_str()) == 0); + check_expr(strcmp((char*)(strvdbts[i].get_data()), + strsv[i].c_str()) == 0); + } + strv.clear(); + + if (explicit_txn) { + dbstl::commit_txn(penv); + dbstl::begin_txn(0, penv); + } + + // Use ordinary way to store strings. + TCHAR cstr1[32], cstr2[32], cstr3[32]; + strcpy(cstr1, "abc"); + strcpy(cstr2, "defcd"); + strcpy(cstr3, "edggsefcd"); + + typedef db_map > strmap_t; + strmap_t strmap(dmdb6, penv); + strmap.clear(); + strmap.insert(make_pair(1, cstr1)); + strmap.insert(make_pair(2, cstr2)); + strmap.insert(make_pair(3, cstr3)); + cout<<"\n strings in strmap:\n"; + for (strmap_t::const_iterator citr = strmap.begin(); + citr != strmap.end(); citr++) + cout<<(*citr).second<<'\t'; + cout< > cstrpairs_t; + cstrpairs_t strpairs(dmdb6, penv); + strpairs["abc"] = "def"; + strpairs["ghi"] = "jkl"; + strpairs["mno"] = "pqrs"; + strpairs["tuv"] = "wxyz"; + cstrpairs_t::const_iterator ciitr; + cstrpairs_t::iterator iitr; + for (ciitr = strpairs.begin(), iitr = strpairs.begin(); + iitr != strpairs.end(); ++iitr, ++ciitr) { + cout<<"\n"<first<<"\t"<second; + cout<<"\n"<first<<"\t"<second; + check_expr(strcmp(ciitr->first, iitr->first) == 0 && + strcmp(ciitr->second, iitr->second) == 0); + } + + typedef db_map > strpairs_t; + typedef std::map sstrpairs_t; + sstrpairs_t sstrpairs2; + strpairs_t strpairs2; + rand_str_dbt randstr; + + for (i = 0; i < 100; i++) { + string rdstr, rdstr2; + randstr(dbt, rdstr); + randstr(dbt, rdstr2); + strpairs2[(char *)rdstr.c_str()] = (char *)rdstr2.c_str(); + sstrpairs2[rdstr] = rdstr2; + } + strpairs_t::iterator itr; + strpairs_t::const_iterator citr; + + for (itr = strpairs2.begin(); itr != strpairs2.end(); ++itr) { + check_expr(strcmp(strpairs2[itr->first], itr->second) == 0); + check_expr(string(itr->second) == + sstrpairs2[string(itr->first)]); + strpairs_t::value_type_wrap::second_type&secref = itr->second; + std::reverse((char *)secref, (char *)secref + strlen(secref)); + secref._DB_STL_StoreElement(); + std::reverse(sstrpairs2[itr->first].begin(), + sstrpairs2[itr->first].end()); + } + + check_expr(strpairs2.size() == sstrpairs2.size()); + for (citr = strpairs2.begin( + ReadModifyWriteOption::no_read_modify_write(), + true, BulkRetrievalOption::bulk_retrieval()); + citr != strpairs2.end(); ++citr) { + check_expr(strcmp(strpairs2[citr->first], citr->second) == 0); + check_expr(string(citr->second) == + sstrpairs2[string(citr->first)]); + } + + + if (explicit_txn) + dbstl::commit_txn(penv); + + db_vector > csvct(10); + vector scsvct(10); + const char *pconststr = "abc"; + for (i = 0; i < 10; i++) { + scsvct[i] = pconststr; + csvct[i] = pconststr; + csvct[i] = scsvct[i]; + // scsvct[i] = csvct[i]; assignment won't work because scsvct + // only stores pointer but do not copy the sequence, thus it + // will refer to an invalid pointer when i changes. + } + for (i = 0; i < 10; i++) { + check_expr(strcmp(csvct[i], scsvct[i]) == 0); + cout< > wcsvct(10); + vector wscsvct(10); + const wchar_t *pconstwstr = L"abc"; + for (i = 0; i < 10; i++) { + wscsvct[i] = pconstwstr; + wcsvct[i] = pconstwstr; + wcsvct[i] = wscsvct[i]; + // scsvct[i] = csvct[i]; assignment won't work because scsvct + // only stores pointer but do not copy the sequence, thus it + // will refer to an invalid pointer when i changes. + } + for (i = 0; i < 10; i++) { + check_expr(wcscmp(wcsvct[i], wscsvct[i]) == 0); + + } + +} // char_star_string_storage + +void StlAdvancedFeaturesExample::arbitray_sequence_storage() +{ + int i, j; + + if (explicit_txn) + dbstl::begin_txn(0, penv); + // storing arbitary sequence test . + cout< > rgbmap_t; + rgbmap_t rgbsmap(dmdb6, penv); + + map srgbsmap; + + DbstlElemTraits::instance()->set_sequence_len_function(rgblen); + DbstlElemTraits::instance()->set_sequence_copy_function(rgbcpy); + // populate srgbsmap and rgbsmap + for (i = 0; i < 10; i++) { + n = abs(rand()) % 10 + 2; + rgbs[i] = new RGBB[n]; + memset(&rgbs[i][n - 1], 0, sizeof(RGBB));//make last element 0 + for (j = 0; j < n - 1; j++) { + rgbs[i][j].r_ = i + 128; + rgbs[i][j].g_ = 256 - i; + rgbs[i][j].b_ = 128 - i; + rgbs[i][j].bright_ = 256 / (i + 1); + + } + rgbsmap.insert(make_pair(i, rgbs[i])); + srgbsmap.insert(make_pair(i, rgbs[i])); + } + + // retrieve and assert equal, then modify and store + for (i = 0; i < 10; i++) { + rgbmap_t::value_type_wrap::second_type rgbelem = rgbsmap[i]; + prgb1 = rgbelem; + check_expr(memcmp(prgb1, prgb2 = srgbsmap[i], + (n = (int)rgblen(srgbsmap[i])) * sizeof(RGBB)) == 0); + for (j = 0; j < n - 1; j++) { + prgb1[j].r_ = 256 - prgb1[j].r_; + prgb1[j].g_ = 256 - prgb1[j].g_; + prgb1[j].b_ = 256 - prgb1[j].b_; + prgb2[j].r_ = 256 - prgb2[j].r_; + prgb2[j].g_ = 256 - prgb2[j].g_; + prgb2[j].b_ = 256 - prgb2[j].b_; + } + rgbelem._DB_STL_StoreElement(); + } + + // retrieve again and assert equal + for (i = 0; i < 10; i++) { + rgbmap_t::value_type_wrap::second_type rgbelem = rgbsmap[i]; + // Can't use rgbsmap[i] here because container::operator[] is + // an temporary value.; + prgb1 = rgbelem; + check_expr(memcmp(prgb1, prgb2 = srgbsmap[i], + sizeof(RGBB) * rgblen(srgbsmap[i])) == 0); + } + + rgbmap_t::iterator rmitr; + map::iterator srmitr; + + for (rmitr = rgbsmap.begin(); + rmitr != rgbsmap.end(); ++rmitr) { + rgbmap_t::value_type_wrap::second_type + rgbelem2 = (*rmitr).second; + prgb1 = (*rmitr).second; + srmitr = srgbsmap.find(rmitr->first); + rmitr.refresh(); + } + + for (i = 0; i < 10; i++) + delete []rgbs[i]; + if (explicit_txn) + dbstl::commit_txn(penv); +} // arbitray_sequence_storage + +void StlAdvancedFeaturesExample::bulk_retrieval_read() +{ + + int i; + + typedef db_map smsmap_t; + smsmap_t smsmap(dmdb6, penv); + map ssmsmap; + if (explicit_txn) + dbstl::begin_txn(0, penv); + + cout<<"\nBulk retrieval support:\n"; + sms_t smsmsg; + time_t now; + smsmap.clear(); + for (i = 0; i < 2008; i++) { + smsmsg.from = 1000 + i; + smsmsg.to = 10000 - i; + smsmsg.sz = sizeof(smsmsg); + time(&now); + smsmsg.when = now; + ssmsmap.insert(make_pair(i, smsmsg)); + smsmap.insert(make_pair(i, smsmsg)); + } + + // bulk retrieval test. + map::iterator ssmsitr = ssmsmap.begin(); + i = 0; + const smsmap_t &rosmsmap = smsmap; + smsmap_t::const_iterator smsitr; + for (smsitr = rosmsmap.begin( + BulkRetrievalOption::bulk_retrieval()); + smsitr != smsmap.end(); i++) { + // The order may be different, so if the two key set are + // identical, it is right. + check_expr((ssmsmap.count(smsitr->first) == 1)); + check_expr((smsitr->second == ssmsmap[smsitr->first])); + if (i % 2) + smsitr++; + else + ++smsitr; // Exercise both pre/post increment. + if (i % 100 == 0) + smsitr.set_bulk_buffer( + (u_int32_t)(smsitr.get_bulk_bufsize() * 1.1)); + } + + smsmap.clear(); + ssmsmap.clear(); + + // Using db_vector. when moving its iterator sequentially to end(), + // bulk retrieval works, if moving randomly, it dose not function + // for db_vector iterators. Also, note that we can create a read only + // iterator when using db_vector<>::iterator rather than + // db_vector<>::const_iterator. + db_vector vctsms; + db_vector::iterator itrv; + vector::iterator sitrv; + vector svctsms; + for (i = 0; i < 2008; i++) { + smsmsg.from = 1000 + i; + smsmsg.to = 10000 - i; + smsmsg.sz = sizeof(smsmsg); + time(&now); + smsmsg.when = now; + vctsms.push_back(smsmsg); + svctsms.push_back(smsmsg); + } + + for (itrv = vctsms.begin(ReadModifyWriteOption::no_read_modify_write(), + true, BulkRetrievalOption::bulk_retrieval(64 * 1024)), + sitrv = svctsms.begin(), i = 0; itrv != vctsms.end(); + ++itrv, ++sitrv, ++i) { + check_expr(*itrv == *sitrv); + if (i % 100 == 0) + itrv.set_bulk_buffer( + (u_int32_t)(itrv.get_bulk_bufsize() * 1.1)); + } + + if (explicit_txn) + dbstl::commit_txn(penv); + + +} //bulk_retrieval_read + +void StlAdvancedFeaturesExample::primitive() +{ + + int i; + + if ( explicit_txn) + dbstl::begin_txn(0, penv); + + + db_vector > ivi(dbprim, penv); + vector spvi4; + fill(ivi, spvi4); + check_expr(is_equal(ivi, spvi4)); + ivi.clear(false); + + db_vector > ivi2(dbprim, penv); + vector spvi5; + fill(ivi2, spvi5); + check_expr(is_equal(ivi2, spvi5)); + size_t vsz = ivi2.size(); + for (i = 0; i < (int)vsz - 1; i++) { + ivi2[i] += 3; + ivi2[i]--; + ivi2[i] <<= 2; + ivi2[i] = (~ivi2[i] | ivi2[i] & ivi2[i + 1] ^ + (2 * (-ivi2[i + 1]) + ivi2[i]) * 3) / + (ivi2[i] * ivi2[i + 1] + 1); + + spvi5[i] += 3; + spvi5[i]--; + spvi5[i] <<= 2; + spvi5[i] = (~spvi5[i] | spvi5[i] & spvi5[i + 1] ^ + (2 * (-spvi5[i + 1]) + spvi5[i]) * 3) / + (spvi5[i] * spvi5[i + 1] + 1); + } + check_expr(is_equal(ivi2, spvi5)); + ivi2.clear(false); + + typedef db_vector > dbl_vct_t; + dbl_vct_t dvi(dbprim, penv); + vector dsvi; + for (i = 0; i < 10; i++) { + dvi.push_back(i * 3.14); + dsvi.push_back(i * 3.14); + } + check_expr(is_equal(dvi, dsvi)); + + dbl_vct_t::iterator ditr; + vector::iterator sditr; + for (ditr = dvi.begin(), sditr = dsvi.begin(); + ditr != dvi.end(); ++ditr, ++sditr){ + *ditr *= 2; + *sditr *= 2; + } + + check_expr(is_equal(dvi, dsvi)); + + for (i = 0; i < 9; i++) { + dvi[i] /= (-dvi[i] / 3 + 2 * dvi[i + 1]) / (1 + dvi[i]) + 1; + dsvi[i] /= (-dsvi[i] / 3 + 2 * dsvi[i + 1]) / (1 + dsvi[i]) + 1; + } + + cout<<"\ndsvi after math operations: \n"; + for (i = 0; i <= 9; i++) + cout< intq(quev); + std::queue sintq; + check_expr(intq.empty()); + check_expr(intq.size() == 0); + for (i = 0; i < 100; i++) { + intq.push(ptint(i)); + sintq.push(i); + check_expr(intq.front() == 0); + check_expr(intq.back() == i); + } + check_expr(intq.size() == 100); + for (i = 0; i < 100; i++) { + check_expr(intq.front() == i); + check_expr(intq.back() == 99); + check_expr(intq.front() == sintq.front()); + check_expr(sintq.back() == intq.back()); + sintq.pop(); + intq.pop(); + } + check_expr(intq.size() == 0); + check_expr(intq.empty()); + quev.clear(); + + // std::priority_queue test + cout<<"\ndb_vector working with std::priority_queue\n"; + + std::vector squev; + intvec_t pquev(pquedb, penv); + pquev.clear(); + std::priority_queue + intpq(ptintless, pquev); + std::priority_queue, ptintless_ft> + sintpq(ptintless, squev); + + check_expr(intpq.empty()); + check_expr(intpq.size() == 0); + ptint tmppq, tmppq1; + set ptintset; + for (i = 0; i < 100; i++) { + for (;;) {// avoid duplicate + tmppq = rand(); + if (ptintset.count(tmppq) == 0) { + intpq.push(tmppq); + sintpq.push(tmppq); + ptintset.insert(tmppq); + break; + } + } + + } + check_expr(intpq.empty() == false); + check_expr(intpq.size() == 100); + for (i = 0; i < 100; i++) { + tmppq = intpq.top(); + tmppq1 = sintpq.top(); + if (i == 98 && tmppq != tmppq1) { + tmppq = intpq.top(); + } + if (i < 98) + check_expr(tmppq == tmppq1); + if (i == 97) + intpq.pop(); + else + intpq.pop(); + sintpq.pop(); + } + check_expr(intpq.empty()); + check_expr(intpq.size() == 0); + + + // std::stack test + cout<<"\ndb_vector working with std::stack\n"; + std::stack intstk(quev); + std::stack sintstk; + check_expr(intstk.size() == 0); + check_expr(intstk.empty()); + for (i = 0; i < 100; i++) { + intstk.push(ptint(i)); + sintstk.push(ptint(i)); + check_expr(intstk.top() == i); + check_expr(intstk.size() == (size_t)i + 1); + } + + for (i = 99; i >= 0; i--) { + check_expr(intstk.top() == ptint(i)); + check_expr(intstk.top() == sintstk.top()); + sintstk.pop(); + intstk.pop(); + check_expr(intstk.size() == (size_t)i); + } + check_expr(intstk.size() == 0); + check_expr(intstk.empty()); + + // Vector with no handles specified. + ptint_vector simple_vct(10); + vector ssvct(10); + for (i = 0; i < 10; i++) { + simple_vct[i] = ptint(i); + ssvct[i] = ptint(i); + } + check_expr(is_equal(simple_vct, ssvct)); + + if ( explicit_txn) + dbstl::commit_txn(penv); + + return; +} // queue_stack + +DbEnv *g_env; +int g_test_start_txn; + +int main(int argc, char *argv[]) +{ + int c, ret; + char *envhome = NULL, *mode = NULL; + int flags = DB_THREAD, setflags = 0, explicit_txn = 0; + int test_autocommit = 1, totalinsert = 100, verbose = 0; + DBTYPE dbtype = DB_BTREE; + int shortest = 50, longest = 200; + u_int32_t cachesize = 8 * 1024 * 1024; + DbEnv *penv = NULL; + + TestParam *ptp = new TestParam; + + while ((c = getopt(argc, argv, "T:c:hH:k:l:m:n:r:s:t:v")) != EOF) { + switch (c) { + case 'T': + totalinsert = atoi(optarg); + break; + case 'c': + cachesize = atoi(optarg); + break; + case 'h': + usage(); + return 0; + break; + case 'H': + envhome = strdup(optarg); + break; + case 'k': + shortest = atoi(optarg); + break; + case 'l': + longest = atoi(optarg); + break; + case 'm': + mode = optarg; + break; + case 's': // db type for associative containers + if (*optarg == 'h') // hash db type + dbtype = DB_HASH; + else if (*optarg == 'b') + dbtype = DB_BTREE; + else + usage(); + break; + case 't': + explicit_txn = 1; + if (*optarg == 'a') + setflags = DB_AUTO_COMMIT; + else if (*optarg == 'e') // explicit txn + test_autocommit = 0; + else + usage(); + break; + case 'v': + verbose = 1; + break; + default: + usage(); + break; + } + } + + if (mode == NULL) + flags |= 0; + else if (*mode == 'c') //cds + flags |= DB_INIT_CDB; + else if (*mode == 't') + flags |= DB_INIT_TXN | DB_RECOVER + | DB_INIT_LOG | DB_INIT_LOCK; + else + flags |= 0; + + ptp->explicit_txn = explicit_txn; + ptp->flags = flags; + ptp->dbtype = dbtype; + ptp->setflags = setflags; + ptp->test_autocommit = test_autocommit; + ptp->dboflags = DB_THREAD; + + // Call this method before any use of dbstl. + dbstl_startup(); + penv = new DbEnv(DB_CXX_NO_EXCEPTIONS); + BDBOP(penv->set_flags(setflags, 1), ret); + BDBOP(penv->set_cachesize(0, cachesize, 1), ret); + + // Methods of containers returning a reference costs + // locker/object/lock slots. + penv->set_lk_max_lockers(10000); + penv->set_lk_max_objects(10000); + penv->set_lk_max_locks(10000); + + penv->set_flags(DB_TXN_NOSYNC, 1); + + BDBOP(penv->open(envhome, + flags | DB_CREATE | DB_INIT_MPOOL, 0777), ret); + register_db_env(penv); + ptp->dbenv = penv; + g_env = penv; + g_test_start_txn = test_autocommit * explicit_txn; + + // Create the example and run. + StlAdvancedFeaturesExample example(ptp); + example.run(); + + // Clean up and exit. + delete ptp; + dbstl_exit();// Call this method before the process exits. + if (penv) + delete penv; +} + +void usage() +{ + cout<< + "\nUsage: StlAdvancedFeaturesExample\n\ +[-c number cache size]\n\ +[-h print this help message then exit]\n\ +[-H dir Specify dir as environment home]\n\ +[-k number shortest string inserted]\n\ +[-l number longest string inserted]\n\ +[-m ds/cds/tds use ds/cds/tds product]\n\ +[-s b/h use btree/hash type of DB for assocative \ +containers] \n\ +[-t a/e for tds, use autocommit/explicit transaction in \ +the test] \n\ +[-v verbose mode, output more info in multithread \ +test]\n\ +"; +} + +void using_charstr(TCHAR*str) +{ + cout<<_T("using str read only with non-const parameter type:")<get_data(); + + result->set_data(&(p->to)); + result->set_size(sizeof(p->to)); + return 0; +} + +void SMSMsgRestore(SMSMsg2& dest, const void *srcdata) +{ + char *p = dest.msg; + + memcpy(&dest, srcdata, sizeof(dest)); + + dest.msg = (char *)DbstlReAlloc(p, dest.szmsg); + strcpy(dest.msg, (char*)srcdata + sizeof(dest)); +} + +u_int32_t SMSMsgSize(const SMSMsg2& elem) +{ + return (u_int32_t)(sizeof(elem) + strlen(elem.msg) + 1); +} + +void SMSMsgCopy(void *dest, const SMSMsg2&elem) +{ + memcpy(dest, &elem, sizeof(elem)); + strcpy((char*)dest + sizeof(elem), elem.msg); +} + +u_int32_t rgblen(const RGBB *seq) +{ + size_t s = 0; + + const RGBB *p = seq, rgb0; + for (s = 0, p = seq; memcmp(p, &rgb0, sizeof(rgb0)) != 0; p++, s++); + // this size includes the all-0 last element used like '\0' + // for char* strings + return (u_int32_t)(s + 1); +} + +// The seqs sequence of RGBB objects may not reside in a consecutive chunk of +// memory but the seqd points to a consecutive chunk of mem large enough to +// hold all objects from seqs. +void rgbcpy(RGBB *seqd, const RGBB *seqs, size_t num) +{ + const RGBB *p = seqs; + RGBB rgb0; + RGBB *q = seqd; + + memset((void *)&rgb0, 0, sizeof(rgb0)); + for (p = seqs, q = seqd; memcmp(p, &rgb0, sizeof(rgb0)) != 0 && + num > 0; num--, p++, q++) + memcpy((void *)q, p, sizeof(RGBB)); + memcpy((void *)q, p, sizeof(RGBB));// append trailing end token. +} diff --git a/examples/stl/StlAdvancedFeatures.h b/examples/stl/StlAdvancedFeatures.h new file mode 100644 index 00000000..ae9c312b --- /dev/null +++ b/examples/stl/StlAdvancedFeatures.h @@ -0,0 +1,718 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_STL_AFE_H__ +#define _DB_STL_AFE_H__ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#define DB_STL_HAVE_DB_TIMESPEC 1 +#include "dbstl_map.h" +#include "dbstl_set.h" +#include "dbstl_vector.h" + +using namespace std; +using namespace dbstl; + +///////////////////////////////////////////////////////////////////////// +///////////////////////// Macro and typedef definitions /////////// + +#define check_expr(expression) do { \ + if (!(expression)) { \ + FailedAssertionException ex(__FILE__, __LINE__, #expression);\ + throw ex; } } while (0) + +#define ptint int +#define TOINT (int) +typedef db_vector > intvec_t; +typedef db_vector > ptint_vector; +typedef db_map > dm_int_t; +typedef db_multimap > dmm_int_t; +typedef db_set > dms_int_t; +typedef db_multiset > dmms_int_t; +typedef bool (*ptintless_ft)(const ptint& a, const ptint& b); + +extern "C" { +extern int getopt(int, char * const *, const char *); +extern char *optarg; +extern int optind; +} +extern int g_test_start_txn; +extern DbEnv *g_env; +/////////////////////////////////////////////////////////////////////// +//////////////////////// Function Declarations //////////////////// +// XXX!!! Function templates can't be declared here otherwise the declarations +// here will be deemed as the definition, so at link time these symbols are +// not resolved. So like class templates, function templates can't be separated +// as declarations and definitions, only definitions and only be built into one +// object file otherwise there will be "multiple symbol definitions". OTOH, compilers +// can avoid the multiple definitions if we are building a class template instantiation +// in multiple object files, so class tempaltes are recommended to use rather than +// function templates. Only use function templates if it is a simple one and used +// only in one code file. +// + +int get_dest_secdb_callback(Db *secondary, const Dbt *key, + const Dbt *data, Dbt *result); +void using_charstr(TCHAR*str); + +class RGBB; +class SMSMsg2; +void SMSMsgRestore(SMSMsg2& dest, const void *srcdata); +u_int32_t SMSMsgSize(const SMSMsg2& elem); +void SMSMsgCopy(void *dest, const SMSMsg2&elem); +u_int32_t rgblen(const RGBB * seq); +void rgbcpy(RGBB *seq, const RGBB *, size_t); + + +///////////////////////////////////////////////////////////////////////////////// +////////////////////////// Utility class definitions /////////////////////////// + +class BaseMsg +{ +public: + time_t when; + int to; + int from; + + BaseMsg() + { + to = from = 0; + when = 0; + } + + BaseMsg(const BaseMsg& msg) + { + to = msg.to; + from = msg.from; + when = msg.when; + } + + bool operator==(const BaseMsg& msg2) const + { + return when == msg2.when && to == msg2.to && from == msg2.from; + } + + bool operator<(const BaseMsg& msg2) const + { + return to < msg2.to; + } +}; + +// used to test arbitary obj storage(not in one chunk) +class SMSMsg2 : public BaseMsg +{ +public: + typedef SMSMsg2 self; + SMSMsg2(time_t tm, const char *msgp, int t) + { + memset(this, 0, sizeof(*this)); + when = tm; + szmsg = strlen(msgp) + 1; + msg = (char *)DbstlMalloc(szmsg); + strncpy(msg, msgp, szmsg); + to = t; + + mysize = sizeof(*this); //+ szmsg; + } + + SMSMsg2() + { + memset(this, 0, sizeof(SMSMsg2)); + } + + SMSMsg2(const self& obj) : BaseMsg(obj) + { + mysize = obj.mysize; + szmsg = obj.szmsg; + if (szmsg > 0 && obj.msg != NULL) { + msg = (char *)DbstlMalloc(szmsg); + strncpy(msg, obj.msg, szmsg); + } else + msg = NULL; + } + + ~SMSMsg2() + { + if (msg) + free(msg); + } + + const self& operator = (const self &obj) + { + + this->from = obj.from; + to = obj.to; + when = obj.when; + mysize = obj.mysize; + szmsg = obj.szmsg; + if (szmsg > 0 && obj.msg != NULL) { + msg = (char *)DbstlReAlloc(msg, szmsg); + strncpy(msg, obj.msg, szmsg); + } + return obj; + } + + bool operator == (const self&obj) const + { + return BaseMsg::operator==(obj) && strcmp(obj.msg, msg) == 0; + } + + const static size_t BUFLEN = 256; + size_t mysize; + size_t szmsg; + char *msg; + +};//SMSMsg2 + +// SMS message class +class SMSMsg : public BaseMsg +{ + + +public: + + size_t mysize; + size_t szmsg; + char msg[1]; + static SMSMsg* make_sms_msg(time_t t, const char*msg, int dest) + { + size_t mlen = 0, totalsz = 0; + + SMSMsg *p = (SMSMsg *)DbstlMalloc(totalsz = (sizeof(SMSMsg) + (mlen = strlen(msg) + 4))); + memset(p, 0, totalsz); + // adding sizeof(p->to) to avoid memory alignment issues + p->mysize = sizeof(SMSMsg) + mlen; + p->when = t; + p->szmsg = mlen - 3; + p->to = dest; + p->from = 0; + strcpy(&(p->msg[0]), msg); + + return p; + } + + SMSMsg() + { + + } +protected: + SMSMsg(time_t t, const char*msg1, int dest) + { + size_t mlen = 0; + + when = t; + szmsg = strlen(msg1) + 1; + mlen = strlen(msg1); + strncpy((char*)&(this->msg[0]), msg1, mlen); + *(int*)(((char*)&(this->msg[0])) + mlen + 1) = dest; + } + +};// SMSMsg + +class RGBB +{ +public: + typedef unsigned char color_t; + + color_t r_, g_, b_, bright_; + + RGBB() + { + memset(this, 0, sizeof(RGBB));// complete 0 means invalid + } + + RGBB(color_t r, color_t g, color_t b, color_t brightness) + { + r_ = r; + g_ = g; + b_ = b; + bright_ = brightness; + } + +};// RGBB + +class rand_str_dbt +{ +public: + static const size_t BUFLEN = 2048; + static bool init; + static char buf[BUFLEN]; + + rand_str_dbt() + { + int len = BUFLEN, i; + + if (!init) { + init = true; + + for (i = 0; i < len - 1; i++) { + buf[i] = 'a' + rand() % 26; + } + buf[i] = '\0'; + } + } + // dbt is of DB_DBT_USERMEM, mem allocated by DbstlMalloc + void operator()(Dbt&dbt, string&str, + size_t shortest = 30, size_t longest = 150) + { + int rd = rand(); + + if (rd < 0) + rd = -rd; + str.clear(); + + check_expr(shortest > 0 && longest < BUFLEN); + check_expr(dbt.get_flags() & DB_DBT_USERMEM);// USER PROVIDE MEM + size_t len = (u_int32_t)(rd % longest); + if (len < shortest) + len = shortest; + else if (len >= BUFLEN) + len = BUFLEN - 1; + // start must be less than BUFLEN - len, otherwise we have no + // len bytes to offer + size_t start = rand() % (BUFLEN - len); + + char c = buf[start + len]; + + buf[start + len] = '\0'; + str = buf + start; + if (dbt.get_ulen() < (len + 1)) { + free(dbt.get_data()); + dbt.set_data(DbstlMalloc(len + 1)); + check_expr(dbt.get_data() != NULL); + } + memcpy(dbt.get_data(), (void*)(buf + start), len + 1); + dbt.set_size(u_int32_t(len + 1));// store the '\0' at the end + buf[start + len] = c; + } +}; // rand_str_dbt +bool rand_str_dbt::init = false; +char rand_str_dbt::buf[BUFLEN]; + +struct TestParam{ + int flags, setflags, test_autocommit, dboflags, explicit_txn; + DBTYPE dbtype; + DbEnv *dbenv; +}; + +// a mobile phone SMS structure for test. will add more members in future +class sms_t +{ +public: + size_t sz; + time_t when; + int from; + int to; + char msg[512]; + + const sms_t& operator=(const sms_t&me) + { + memcpy(this, &me, sizeof(*this)); + return me; + } + + bool operator==(const sms_t& me) const + { + return memcmp(this, &me, sizeof(me)) == 0; + } + bool operator!=(const sms_t& me) + { + return memcmp(this, &me, sizeof(me)) != 0; + } +}; + +bool ptintless(const ptint& a, const ptint& b) +{ + return a < b; +} + +template +void fill(db_vector >&v, + vector&sv, T start = 0, int n = 5 ) +{ + int i; + + v.clear(); + sv.clear(); + for (i = 0; i < n; i++) { + v.push_back(i + start); + sv.push_back(i + start); + } +} + + + +template +void fill(db_map >&m, map&sm, + T start = 0, int n = 5) +{ + int i; + T pi; + + m.clear(); + sm.clear(); + for (i = 0; i < n; i++) { + pi = i + start; + m.insert(make_pair(pi, pi)); + sm.insert(make_pair(pi, pi)); + } + +} + + + + +template +void fill(db_set >&m, set&sm, + T start = 0, int n = 5) +{ + int i; + T pi; + + m.clear(); + sm.clear(); + for (i = 0; i < n; i++) { + pi = i + start; + m.insert(pi); + sm.insert(pi); + } + +} + + + + +template +void fill(db_multimap >&m, multimap&sm, + T start = 0, int n = 5, size_t randn = 5) +{ + int i; + size_t j, cnt = 0; + + if (randn < 5) + randn = 5; + + m.clear(); + sm.clear(); + for (i = 0; i < n; i++) { + cnt = abs(rand()) % randn; + if (cnt == 0) + cnt = randn; + i += start; + for (j = 0; j < cnt; j++) {// insert duplicates + m.insert(make_pair(i, i)); + sm.insert(make_pair(i, i)); + } + i -= start; + } + +} + + + + +template +void fill(db_multiset >&m, multiset&sm, + T start = 0, int n = 5 , size_t randn = 5) +{ + int i; + size_t j, cnt; + + if (randn < 5) + randn = 5; + + m.clear(); + sm.clear(); + for (i = 0; i < n; i++) { + cnt = abs(rand()) % randn; + if (cnt == 0) + cnt = randn; + i += start; + for (j = 0; j < cnt; j++) {// insert duplicates + m.insert(i); + sm.insert(i); + } + i -= start; + } +} + +template +bool is_equal(db_map >& dv, map&v) +{ + size_t s1, s2; + bool ret; + typename db_map >::iterator itr1; + typename map::iterator itr2; + + if (g_test_start_txn) + begin_txn(0, g_env); + if ((s1 = dv.size()) != (s2 = v.size())){ + ret = false; + goto done; + } + + for (itr1 = dv.begin(), itr2 = v.begin(); + itr1 != dv.end(); ++itr1, ++itr2) { + if (itr1->first != itr2->first || itr1->second != itr2->second){ + ret = false; + goto done; + } + + } + + ret = true; +done: + if (g_test_start_txn) + commit_txn(g_env); + return ret; + +} + +template +bool is_equal(db_map >& dv, map&v) +{ + size_t s1, s2; + bool ret; + typename db_map >::iterator itr1; + typename map::iterator itr2; + + if (g_test_start_txn) + begin_txn(0, g_env); + if ((s1 = dv.size()) != (s2 = v.size())){ + + ret = false; + goto done; + } + + + for (itr1 = dv.begin(), itr2 = v.begin(); + itr1 != dv.end(); ++itr1, ++itr2) { + if (itr1->first != itr2->first || itr1->second != itr2->second){ + ret = false; + goto done; + } + } + + ret = true; +done: + if (g_test_start_txn) + commit_txn(g_env); + return ret; + +} + + +template +bool is_equal(const db_set >&s1, const set&s2) +{ + bool ret; + typename db_set >::iterator itr; + + if (g_test_start_txn) + begin_txn(0, g_env); + if (s1.size() != s2.size()){ + ret = false; + goto done; + } + + for (itr = s1.begin(); itr != s1.end(); itr++) { + if (s2.count(*itr) == 0) { + ret = false; + goto done; + } + } + ret = true; +done: + if (g_test_start_txn) + commit_txn(g_env); + return ret; + +} + + +template +bool is_equal(const db_vector& dv, const vector&v) +{ + size_t s1, s2; + bool ret; + T1 t1; + size_t i, sz = v.size(); + + if (g_test_start_txn) + begin_txn(0, g_env); + if ((s1 = dv.size()) != (s2 = v.size())) { + ret = false; + goto done; + } + + + for (i = 0; i < sz; i++) { + t1 = T1(dv[(index_type)i] ); + if (t1 != v[i]){ + ret = false; + goto done; + } + } + ret = true; +done: + + if (g_test_start_txn) + commit_txn(g_env); + return ret; +} + +// The following four functions are designed to work with is_equal to compare +// char*/wchar_t* strings properly, unforturnately they can not override +// the default pointer value comparison behavior. +bool operator==(ElementHolders1, const char *s2); +bool operator==(ElementHolders1, const wchar_t *s2); +bool operator!=(ElementHolders1, const char *s2); +bool operator!=(ElementHolders1, const wchar_t *s2); + +template +bool is_equal(const db_vector >& dv, const vector&v) +{ + size_t s1, s2; + bool ret; + size_t i, sz = v.size(); + + if (g_test_start_txn) + begin_txn(0, g_env); + if ((s1 = dv.size()) != (s2 = v.size())) { + ret = false; + goto done; + } + + for (i = 0; i < sz; i++) { + if (dv[(index_type)i] != v[i]) { + ret = false; + goto done; + } + } + ret = true; +done: + + if (g_test_start_txn) + commit_txn(g_env); + return ret; +} + + +template +bool is_equal(db_vector& dv, list&v) +{ + size_t s1, s2; + bool ret; + typename db_vector::iterator itr; + typename list::iterator itr2; + + if (g_test_start_txn) + begin_txn(0, g_env); + if ((s1 = dv.size()) != (s2 = v.size())) { + ret = false; + goto done; + } + + + for (itr = dv.begin(), itr2 = v.begin(); + itr2 != v.end(); ++itr, ++itr2) { + if (*itr != *itr2) { + ret = false; + goto done; + } + } + ret = true; +done: + + if (g_test_start_txn) + commit_txn(g_env); + return ret; +} + +bool is_equal(db_vector >&v1, + std::list &v2) +{ + db_vector >::iterator itr; + std::list::iterator itr2; + + if (v1.size() != v2.size()) + return false; + + for (itr = v1.begin(), itr2 = v2.begin(); + itr2 != v2.end(); ++itr, ++itr2) + if (strcmp(*itr, (*itr2).c_str()) != 0) + return false; + + return true; +} + +template +class atom_equal { +public: + bool operator()(T1 a, T1 b) + { + return a == b; + } +}; +template<> +class atom_equal { +public: + bool operator()(const char *s1, const char *s2) + { + return strcmp(s1, s2) == 0; + } +}; + +template +bool is_equal(const db_vector >& dv, const list&v) +{ + size_t s1, s2; + bool ret; + typename db_vector >::const_iterator itr; + typename list::const_iterator itr2; + atom_equal eqcmp; + if (g_test_start_txn) + begin_txn(0, g_env); + if ((s1 = dv.size()) != (s2 = v.size())) { + ret = false; + goto done; + } + + + for (itr = dv.begin(), itr2 = v.begin(); + itr2 != v.end(); ++itr, ++itr2) { + if (!eqcmp(*itr, *itr2)) { + ret = false; + goto done; + } + } + ret = true; +done: + + if (g_test_start_txn) + commit_txn(g_env); + return ret; +} + +#endif // ! _DB_STL_AFE_H__ diff --git a/examples/stl/StlTpcbExample.cpp b/examples/stl/StlTpcbExample.cpp new file mode 100644 index 00000000..30b1afca --- /dev/null +++ b/examples/stl/StlTpcbExample.cpp @@ -0,0 +1,626 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include +#include +#include +#include + +#include + +#include "dbstl_vector.h" +#include "dbstl_map.h" + +using std::cout; +using std::cerr; + +typedef enum { ACCOUNT, BRANCH, TELLER } FTYPE; + +static int invarg(int, char *); +u_int32_t random_id(FTYPE, u_int32_t, u_int32_t, u_int32_t); +u_int32_t random_int(u_int32_t, u_int32_t); +static int usage(void); + +int verbose; +const char *progname = "StlTpcbExample"; // Program name. + +// Forward declared data classes +class Defrec; +class Histrec; + +typedef dbstl::db_map DefrecMap; +typedef dbstl::db_vector HistrecVector; + +class StlTpcbExample : public DbEnv +{ +public: + void populate(int, int, int, int); + void run(int, int, int, int); + int txn(DefrecMap *, DefrecMap *, DefrecMap *, HistrecVector *, + int accounts, int branches, int tellers); + void populateHistory( + HistrecVector *, int, u_int32_t, u_int32_t, u_int32_t); + void populateTable( + DefrecMap *, u_int32_t, u_int32_t, int, const char *); + + // Note: the constructor creates a DbEnv(), which is + // not fully initialized until the DbEnv::open() method + // is called. + // + StlTpcbExample(const char *home, int cachesize, int flags); + +private: + static const char FileName[]; + + // no need for copy and assignment + StlTpcbExample(const StlTpcbExample &); + void operator = (const StlTpcbExample &); +}; + +// +// This program implements a basic TPC/B driver program. To create the +// TPC/B database, run with the -i (init) flag. The number of records +// with which to populate the account, history, branch, and teller tables +// is specified by the a, s, b, and t flags respectively. To run a TPC/B +// test, use the n flag to indicate a number of transactions to run (note +// that you can run many of these processes in parallel to simulate a +// multiuser test run). +// +#define TELLERS_PER_BRANCH 100 +#define ACCOUNTS_PER_TELLER 1000 +#define HISTORY_PER_BRANCH 2592000 + +/* + * The default configuration that adheres to TPCB scaling rules requires + * nearly 3 GB of space. To avoid requiring that much space for testing, + * we set the parameters much lower. If you want to run a valid 10 TPS + * configuration, define VALID_SCALING. + */ +#ifdef VALID_SCALING +#define ACCOUNTS 1000000 +#define BRANCHES 10 +#define TELLERS 100 +#define HISTORY 25920000 +#endif + +#ifdef TINY +#define ACCOUNTS 1000 +#define BRANCHES 10 +#define TELLERS 100 +#define HISTORY 10000 +#endif + +#if !defined(VALID_SCALING) && !defined(TINY) +#define ACCOUNTS 100000 +#define BRANCHES 10 +#define TELLERS 100 +#define HISTORY 259200 +#endif + +#define HISTORY_LEN 100 +#define RECLEN 100 +#define BEGID 1000000 + +class Defrec { +public: + u_int32_t id; + u_int32_t balance; + u_int8_t pad[RECLEN - sizeof(u_int32_t) - sizeof(u_int32_t)]; +}; + +class Histrec { +public: + u_int32_t aid; + u_int32_t bid; + u_int32_t tid; + u_int32_t amount; + u_int8_t pad[RECLEN - 4 * sizeof(u_int32_t)]; +}; + +int +main(int argc, char *argv[]) +{ + unsigned long seed; + int accounts, branches, tellers, history; + int iflag, mpool, ntxns, txn_no_sync; + const char *home; + char *endarg; + + home = "TESTDIR"; + accounts = branches = history = tellers = 0; + txn_no_sync = 0; + mpool = ntxns = 0; + verbose = 0; + iflag = 0; + seed = (unsigned long)time(NULL); + + for (int i = 1; i < argc; ++i) { + + if (strcmp(argv[i], "-a") == 0) { + // Number of account records + if ((accounts = atoi(argv[++i])) <= 0) + return (invarg('a', argv[i])); + } + else if (strcmp(argv[i], "-b") == 0) { + // Number of branch records + if ((branches = atoi(argv[++i])) <= 0) + return (invarg('b', argv[i])); + } + else if (strcmp(argv[i], "-c") == 0) { + // Cachesize in bytes + if ((mpool = atoi(argv[++i])) <= 0) + return (invarg('c', argv[i])); + } + else if (strcmp(argv[i], "-f") == 0) { + // Fast mode: no txn sync. + txn_no_sync = 1; + } + else if (strcmp(argv[i], "-h") == 0) { + // DB home. + home = argv[++i]; + } + else if (strcmp(argv[i], "-i") == 0) { + // Initialize the test. + iflag = 1; + } + else if (strcmp(argv[i], "-n") == 0) { + // Number of transactions + if ((ntxns = atoi(argv[++i])) <= 0) + return (invarg('n', argv[i])); + } + else if (strcmp(argv[i], "-S") == 0) { + // Random number seed. + seed = strtoul(argv[++i], &endarg, 0); + if (*endarg != '\0') + return (invarg('S', argv[i])); + } + else if (strcmp(argv[i], "-s") == 0) { + // Number of history records + if ((history = atoi(argv[++i])) <= 0) + return (invarg('s', argv[i])); + } + else if (strcmp(argv[i], "-t") == 0) { + // Number of teller records + if ((tellers = atoi(argv[++i])) <= 0) + return (invarg('t', argv[i])); + } + else if (strcmp(argv[i], "-v") == 0) { + // Verbose option. + verbose = 1; + } + else { + return (usage()); + } + } + + srand((unsigned int)seed); + + accounts = accounts == 0 ? ACCOUNTS : accounts; + branches = branches == 0 ? BRANCHES : branches; + tellers = tellers == 0 ? TELLERS : tellers; + history = history == 0 ? HISTORY : history; + + if (verbose) + cout << (long)accounts << " Accounts, " + << (long)branches << " Branches, " + << (long)tellers << " Tellers, " + << (long)history << " History\n"; + + try { + // Initialize the database environment. + // Must be done in within a try block, unless you + // change the error model in the environment options. + // + StlTpcbExample app( + home, mpool, txn_no_sync ? DB_TXN_NOSYNC : 0); + + if (iflag) { + if (ntxns != 0) + return (usage()); + app.populate(accounts, branches, history, tellers); + } + else { + if (ntxns == 0) + return (usage()); + app.run(ntxns, accounts, branches, tellers); + } + + dbstl::dbstl_exit(); + return (EXIT_SUCCESS); + } + catch (DbException &dbe) { + cerr << "StlTpcbExample: " << dbe.what() << "\n"; + return (EXIT_FAILURE); + } +} + +static int +invarg(int arg, char *str) +{ + cerr << "StlTpcbExample: invalid argument for -" + << (char)arg << ": " << str << "\n"; + return (EXIT_FAILURE); +} + +static int +usage() +{ + cerr << "usage: StlTpcbExample [-fiv] [-a accounts] [-b branches]\n" + << " [-c cachesize] [-h home] [-n transactions]\n" + << " [-S seed] [-s history] [-t tellers]\n"; + return (EXIT_FAILURE); +} + +StlTpcbExample::StlTpcbExample(const char *home, int cachesize, int flags) +: DbEnv(DB_CXX_NO_EXCEPTIONS) +{ + u_int32_t local_flags; + + set_error_stream(&cerr); + set_errpfx("StlTpcbExample"); + (void)set_lk_detect(DB_LOCK_DEFAULT); + (void)set_cachesize(0, cachesize == 0 ? + 4 * 1024 * 1024 : (u_int32_t)cachesize, 0); + + set_lk_max_lockers(1024 * 128); + set_lk_max_locks(1024 * 128); + set_lk_max_objects(1024 * 128); + if (flags & (DB_TXN_NOSYNC)) + set_flags(DB_TXN_NOSYNC, 1); + flags &= ~(DB_TXN_NOSYNC); + + local_flags = flags | DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN; + open(home, local_flags, 0); + dbstl::register_db_env(this); +} + +// +// Initialize the database to the specified number of accounts, branches, +// history records, and tellers. +// +void +StlTpcbExample::populate(int accounts, int branches, int history, int tellers) +{ + Db *dbp; + DefrecMap *accounts_map, *branches_map, *tellers_map; + HistrecVector *history_vector; + + int err, oflags; + u_int32_t balance, idnum; + u_int32_t end_anum, end_bnum, end_tnum; + u_int32_t start_anum, start_bnum, start_tnum; + + idnum = BEGID; + balance = 500000; + oflags = DB_CREATE; + + dbp = new Db(this, DB_CXX_NO_EXCEPTIONS); + dbp->set_h_nelem((unsigned int)accounts); + + if ((err = dbp->open(NULL, "account", NULL, + DB_HASH, oflags, 0644)) != 0) { + DbException except("Account file create failed", err); + throw except; + } + + dbstl::register_db(dbp); + accounts_map = new DefrecMap(dbp, this); + start_anum = idnum; + populateTable(accounts_map, idnum, balance, accounts, "account"); + idnum += accounts; + end_anum = idnum - 1; + // Automatically closes the underlying database. + delete accounts_map; + dbstl::close_db(dbp); + delete dbp; + if (verbose) + cout << "Populated accounts: " + << (long)start_anum << " - " << (long)end_anum << "\n"; + + dbp = new Db(this, DB_CXX_NO_EXCEPTIONS); + // + // Since the number of branches is very small, we want to use very + // small pages and only 1 key per page. This is the poor-man's way + // of getting key locking instead of page locking. + // + dbp->set_h_ffactor(1); + dbp->set_h_nelem((unsigned int)branches); + dbp->set_pagesize(512); + + if ((err = dbp->open(NULL, + "branch", NULL, DB_HASH, oflags, 0644)) != 0) { + DbException except("Branch file create failed", err); + throw except; + } + dbstl::register_db(dbp); + branches_map = new DefrecMap(dbp, this); + start_bnum = idnum; + populateTable(branches_map, idnum, balance, branches, "branch"); + idnum += branches; + end_bnum = idnum - 1; + delete branches_map; + dbstl::close_db(dbp); + delete dbp; + + if (verbose) + cout << "Populated branches: " + << (long)start_bnum << " - " << (long)end_bnum << "\n"; + + dbp = new Db(this, DB_CXX_NO_EXCEPTIONS); + // + // In the case of tellers, we also want small pages, but we'll let + // the fill factor dynamically adjust itself. + // + dbp->set_h_ffactor(0); + dbp->set_h_nelem((unsigned int)tellers); + dbp->set_pagesize(512); + + if ((err = dbp->open(NULL, + "teller", NULL, DB_HASH, oflags, 0644)) != 0) { + DbException except("Teller file create failed", err); + throw except; + } + + dbstl::register_db(dbp); + tellers_map = new DefrecMap(dbp, this); + start_tnum = idnum; + populateTable(tellers_map, idnum, balance, tellers, "teller"); + idnum += tellers; + end_tnum = idnum - 1; + delete tellers_map; + dbstl::close_db(dbp); + delete dbp; + if (verbose) + cout << "Populated tellers: " + << (long)start_tnum << " - " << (long)end_tnum << "\n"; + + dbp = new Db(this, DB_CXX_NO_EXCEPTIONS); + dbp->set_re_len(HISTORY_LEN); + if ((err = dbp->open(NULL, + "history", NULL, DB_RECNO, oflags, 0644)) != 0) { + DbException except("Create of history file failed", err); + throw except; + } + + dbstl::register_db(dbp); + history_vector = new HistrecVector(dbp, this); + populateHistory(history_vector, history, accounts, branches, tellers); + delete history_vector; + dbstl::close_db(dbp); + delete dbp; +} + +void +StlTpcbExample::populateTable(DefrecMap *drm, u_int32_t start_id, + u_int32_t balance, int nrecs, const char *msg) +{ + Defrec drec; + int i; + dbstl::pair::iterator, bool > ib; + + memset(&drec.pad[0], 1, sizeof(drec.pad)); + try { + for (i = 0; i < nrecs; i++) { + drec.id = start_id + (u_int32_t)i; + drec.balance = balance; + ib = drm->insert(dbstl::make_pair(drec.id, drec)); + if (ib.second == false) + throw "failed to insert record"; + } + } catch (...) { + throw; + } +} + +void +StlTpcbExample::populateHistory(HistrecVector *hrm, int nrecs, + u_int32_t accounts, u_int32_t branches, + u_int32_t tellers) +{ + Histrec hrec; + int i; + + memset(&hrec.pad[0], 1, sizeof(hrec.pad)); + hrec.amount = 10; + try { + for (i = 1; i <= nrecs; i++) { + hrec.aid = random_id( + ACCOUNT, accounts, branches, tellers); + hrec.bid = random_id( + BRANCH, accounts, branches, tellers); + hrec.tid = random_id( + TELLER, accounts, branches, tellers); + hrm->push_back(hrec); + } + } catch (...) { + throw; + } +} + +u_int32_t +random_int(u_int32_t lo, u_int32_t hi) +{ + u_int32_t ret; + int t; + + t = rand(); + ret = (u_int32_t)(((double)t / ((double)(RAND_MAX) + 1)) * + (hi - lo + 1)); + ret += lo; + return (ret); +} + +u_int32_t +random_id(FTYPE type, u_int32_t accounts, u_int32_t branches, u_int32_t tellers) +{ + u_int32_t min, max, num; + + max = min = BEGID; + num = accounts; + switch (type) { + case TELLER: + min += branches; + num = tellers; + // Fallthrough + case BRANCH: + if (type == BRANCH) + num = branches; + min += accounts; + // Fallthrough + case ACCOUNT: + max = min + num - 1; + } + return (random_int(min, max)); +} + +void +StlTpcbExample::run(int n, int accounts, int branches, int tellers) +{ + Db *adb, *bdb, *hdb, *tdb; + DefrecMap *accounts_map, *branches_map, *tellers_map; + HistrecVector *history_vector; + int failed, oflags, ret, txns; + time_t start_time, end_time; + + // + // Open the database files. + // + oflags = DB_AUTO_COMMIT; + + int err; + adb = new Db(this, DB_CXX_NO_EXCEPTIONS); + if ((err = adb->open(NULL, + "account", NULL, DB_UNKNOWN, oflags, 0)) != 0) { + DbException except("Open of account file failed", err); + throw except; + } + dbstl::register_db(adb); + accounts_map = new DefrecMap(adb); + + bdb = new Db(this, DB_CXX_NO_EXCEPTIONS); + if ((err = bdb->open(NULL, + "branch", NULL, DB_UNKNOWN, oflags, 0)) != 0) { + DbException except("Open of branch file failed", err); + throw except; + } + dbstl::register_db(bdb); + branches_map = new DefrecMap(bdb); + + tdb = new Db(this, DB_CXX_NO_EXCEPTIONS); + if ((err = tdb->open(NULL, + "teller", NULL, DB_UNKNOWN, oflags, 0)) != 0) { + DbException except("Open of teller file failed", err); + throw except; + } + dbstl::register_db(tdb); + tellers_map = new DefrecMap(tdb); + + hdb = new Db(this, DB_CXX_NO_EXCEPTIONS); + if ((err = hdb->open(NULL, + "history", NULL, DB_UNKNOWN, oflags, 0)) != 0) { + DbException except("Open of history file failed", err); + throw except; + } + dbstl::register_db(hdb); + history_vector = new HistrecVector(hdb); + + (void)time(&start_time); + for (txns = n, failed = 0; n-- > 0;) + if ((ret = txn(accounts_map, branches_map, tellers_map, + history_vector, accounts, branches, tellers)) != 0) + ++failed; + (void)time(&end_time); + if (end_time == start_time) + ++end_time; + // We use printf because it provides much simpler + // formatting than iostreams. + // + printf("%s: %d txns: %d failed, %d sec, %.2f TPS\n", progname, + txns, failed, (int)(end_time - start_time), + (txns - failed) / (double)(end_time - start_time)); + + delete accounts_map; + delete branches_map; + delete tellers_map; + delete history_vector; + dbstl::close_all_dbs(); +} + +// +// XXX Figure out the appropriate way to pick out IDs. +// +int +StlTpcbExample::txn(DefrecMap *accounts_map, DefrecMap *branches_map, + DefrecMap *tellers_map, HistrecVector *history_vector, + int accounts, int branches, int tellers) +{ + Histrec hrec; + DefrecMap::value_type_wrap::second_type recref, recref2, recref3; + int account, branch, teller; + + /* + * !!! + * This is sample code -- we could move a lot of this into the driver + * to make it faster. + */ + account = random_id(ACCOUNT, accounts, branches, tellers); + branch = random_id(BRANCH, accounts, branches, tellers); + teller = random_id(TELLER, accounts, branches, tellers); + + hrec.aid = account; + hrec.bid = branch; + hrec.tid = teller; + hrec.amount = 10; + + /* + * START PER-TRANSACTION TIMING. + * + * Technically, TPCB requires a limit on response time, you only get + * to count transactions that complete within 2 seconds. That's not + * an issue for this sample application -- regardless, here's where + * the transaction begins. + */ + try { + dbstl::begin_txn(0, this); + + /* Account record */ + recref = (*accounts_map)[account]; + recref.balance += 10; + recref._DB_STL_StoreElement(); + + /* Branch record */ + recref2 = (*branches_map)[branch]; + recref2.balance += 10; + recref2._DB_STL_StoreElement(); + + /* Teller record */ + recref3 = (*tellers_map)[teller]; + recref3.balance += 10; + recref3._DB_STL_StoreElement(); + + /* History record */ + history_vector->push_back(hrec); + dbstl::commit_txn(this); + /* END PER-TRANSACTION TIMING. */ + return (0); + + } catch (DbDeadlockException) { + dbstl::abort_txn(this); + if (verbose) + cout << "Transaction A=" << (long)account + << " B=" << (long)branch + << " T=" << (long)teller << " failed\n"; + return (DB_LOCK_DEADLOCK); + } catch(...) { + dbstl::abort_txn(this); + throw; + } +} diff --git a/examples/stl/StlTransactionGuideExample.cpp b/examples/stl/StlTransactionGuideExample.cpp new file mode 100644 index 00000000..08e180ce --- /dev/null +++ b/examples/stl/StlTransactionGuideExample.cpp @@ -0,0 +1,372 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2008, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +// File txn_guide_stl.cpp +#include +#include + +#include "dbstl_map.h" + +#ifdef _WIN32 +#include +extern "C" { + extern int _tgetopt(int nargc, TCHAR* const* nargv, const TCHAR * ostr); + extern TCHAR *optarg; +} +#define PATHD '\\' + +typedef HANDLE thread_t; +#define thread_create(thrp, attr, func, arg) \ + (((*(thrp) = CreateThread(NULL, 0, \ + (LPTHREAD_START_ROUTINE)(func), (arg), 0, NULL)) == NULL) ? -1 : 0) +#define thread_join(thr, statusp) \ + ((WaitForSingleObject((thr), INFINITE) == WAIT_OBJECT_0) && \ + ((statusp == NULL) ? 0 : \ + (GetExitCodeThread((thr), (LPDWORD)(statusp)) ? 0 : -1))) + +typedef HANDLE mutex_t; +#define mutex_init(m, attr) \ + (((*(m) = CreateMutex(NULL, FALSE, NULL)) != NULL) ? 0 : -1) +#define mutex_lock(m) \ + ((WaitForSingleObject(*(m), INFINITE) == WAIT_OBJECT_0) ? 0 : -1) +#define mutex_unlock(m) (ReleaseMutex(*(m)) ? 0 : -1) +#else +#include +#include +#define PATHD '/' + +typedef pthread_t thread_t; +#define thread_create(thrp, attr, func, arg) \ + pthread_create((thrp), (attr), (func), (arg)) +#define thread_join(thr, statusp) pthread_join((thr), (statusp)) + +typedef pthread_mutex_t mutex_t; +#define mutex_init(m, attr) pthread_mutex_init((m), (attr)) +#define mutex_lock(m) pthread_mutex_lock(m) +#define mutex_unlock(m) pthread_mutex_unlock(m) +#endif + +// Run 5 writers threads at a time. +#define NUMWRITERS 5 +using namespace dbstl; +typedef db_multimap > strmap_t; +// Printing of thread_t is implementation-specific, so we +// create our own thread IDs for reporting purposes. +int global_thread_num; +mutex_t thread_num_lock; + +// Forward declarations +int countRecords(strmap_t *); +int openDb(Db **, const char *, const char *, DbEnv *, u_int32_t); +int usage(void); +void *writerThread(void *); + +// Usage function +int +usage() +{ + std::cerr << " [-h ] [-m (in memory use)]" + << std::endl; + return (EXIT_FAILURE); +} + +int +main(int argc, char *argv[]) +{ + // Initialize our handles + Db *dbp = NULL; + DbEnv *envp = NULL; + + thread_t writerThreads[NUMWRITERS]; + int i, inmem; + u_int32_t envFlags; + const char *dbHomeDir; + + inmem = 0; + // Application name + const char *progName = "TxnGuideStl"; + + // Database file name + const char *fileName = "mydb.db"; + + // Parse the command line arguments +#ifdef _WIN32 + dbHomeDir = ".\\TESTDIR"; +#else + dbHomeDir = "./TESTDIR"; +#endif + + // Env open flags + envFlags = + DB_CREATE | // Create the environment if it does not exist + DB_RECOVER | // Run normal recovery. + DB_INIT_LOCK | // Initialize the locking subsystem + DB_INIT_LOG | // Initialize the logging subsystem + DB_INIT_TXN | // Initialize the transactional subsystem. This + // also turns on logging. + DB_INIT_MPOOL | // Initialize the memory pool (in-memory cache) + DB_THREAD; // Cause the environment to be free-threaded + + try { + // Create and open the environment + envp = new DbEnv(DB_CXX_NO_EXCEPTIONS); + + // Indicate that we want db to internally perform deadlock + // detection. Also indicate that the transaction with + // the fewest number of write locks will receive the + // deadlock notification in the event of a deadlock. + envp->set_lk_detect(DB_LOCK_MINWRITE); + + if (inmem) { + envp->set_lg_bsize(64 * 1024 * 1024); + envp->open(NULL, envFlags, 0644); + fileName = NULL; + } else + envp->open(dbHomeDir, envFlags, 0644); + + // If we had utility threads (for running checkpoints or + // deadlock detection, for example) we would spawn those + // here. However, for a simple example such as this, + // that is not required. + + // Open the database + openDb(&dbp, progName, fileName, + envp, DB_DUP); + + // Call this function before any use of dbstl in a single thread + // if multiple threads are using dbstl. + dbstl::dbstl_startup(); + + // We created the dbp and envp handles not via dbstl::open_db/open_env + // functions, so we must register the handles in each thread using the + // container. + dbstl::register_db(dbp); + dbstl::register_db_env(envp); + + strmap_t *strmap = new strmap_t(dbp, envp); + // Initialize a mutex. Used to help provide thread ids. + (void)mutex_init(&thread_num_lock, NULL); + + // Start the writer threads. + for (i = 0; i < NUMWRITERS; i++) + (void)thread_create(&writerThreads[i], NULL, + writerThread, (void *)strmap); + + + // Join the writers + for (i = 0; i < NUMWRITERS; i++) + (void)thread_join(writerThreads[i], NULL); + + delete strmap; + + } catch(DbException &e) { + std::cerr << "Error opening database environment: " + << (inmem ? "NULL" : dbHomeDir) << std::endl; + std::cerr << e.what() << std::endl; + dbstl_exit(); + return (EXIT_FAILURE); + } + + // Environment and database will be automatically closed by dbstl. + + // Final status message and return. + + std::cout << "I'm all done." << std::endl; + + dbstl_exit(); + delete envp; + return (EXIT_SUCCESS); +} + +// A function that performs a series of writes to a +// Berkeley DB database. The information written +// to the database is largely nonsensical, but the +// mechanism of transactional commit/abort and +// deadlock detection is illustrated here. +void * +writerThread(void *args) +{ + int j, thread_num; + int max_retries = 1; // Max retry on a deadlock + const char *key_strings[] = {"key 1", "key 2", "key 3", "key 4", + "key 5", "key 6", "key 7", "key 8", + "key 9", "key 10"}; + + strmap_t *strmap = (strmap_t *)args; + DbEnv *envp = strmap->get_db_env_handle(); + + // We created the dbp and envp handles not via dbstl::open_db/open_env + // functions, so we must register the handles in each thread using the + // container. + dbstl::register_db(strmap->get_db_handle()); + dbstl::register_db_env(envp); + + // Get the thread number + (void)mutex_lock(&thread_num_lock); + global_thread_num++; + thread_num = global_thread_num; + (void)mutex_unlock(&thread_num_lock); + + // Initialize the random number generator + srand(thread_num); + + // Perform 50 transactions + for (int i = 0; i < 1; i++) { + DbTxn *txn; + int retry = 100; + int retry_count = 0, payload; + // while loop is used for deadlock retries + while (retry--) { + // try block used for deadlock detection and + // general db exception handling + try { + + // Begin our transaction. We group multiple writes in + // this thread under a single transaction so as to + // (1) show that you can atomically perform multiple + // writes at a time, and (2) to increase the chances + // of a deadlock occurring so that we can observe our + // deadlock detection at work. + + // Normally we would want to avoid the potential for + // deadlocks, so for this workload the correct thing + // would be to perform our puts with autocommit. But + // that would excessively simplify our example, so we + // do the "wrong" thing here instead. + txn = dbstl::begin_txn(0, envp); + + // Perform the database write for this transaction. + for (j = 0; j < 10; j++) { + payload = rand() + i; + strmap->insert(make_pair(key_strings[j], payload)); + } + + // countRecords runs a cursor over the entire database. + // We do this to illustrate issues of deadlocking + std::cout << thread_num << " : Found " + << countRecords(strmap) + << " records in the database." << std::endl; + + std::cout << thread_num << " : committing txn : " << i + << std::endl; + + // commit + try { + dbstl::commit_txn(envp); + } catch (DbException &e) { + std::cout << "Error on txn commit: " + << e.what() << std::endl; + } + } catch (DbDeadlockException &) { + // First thing that we MUST do is abort the transaction. + try { + dbstl::abort_txn(envp); + } catch (DbException ex1) { + std::cout< storage failed" << std::endl; + std::cerr << e.what() << std::endl; + dbstl::abort_txn(envp); + retry = 0; + } catch (std::exception &ee) { + std::cerr << "Unknown exception: " << ee.what() << std::endl; + return (0); + } + } + } + return (0); +} + + +// This simply counts the number of records contained in the +// database and returns the result. +// +// Note that this method exists only for illustrative purposes. +// A more straight-forward way to count the number of records in +// a database is to use the db_map<>::size() method. +int +countRecords(strmap_t *strmap) +{ + + int count = 0; + strmap_t::iterator itr; + try { + // Set the flag used by Db::cursor. + for (itr = strmap->begin(); itr != strmap->end(); ++itr) + count++; + } catch (DbDeadlockException &de) { + std::cerr << "countRecords: got deadlock" << std::endl; + // itr's cursor will be automatically closed when it is destructed. + throw de; + } catch (DbException &e) { + std::cerr << "countRecords error:" << std::endl; + std::cerr << e.what() << std::endl; + } + + // itr's cursor will be automatically closed when it is destructed. + + return (count); +} + + +// Open a Berkeley DB database +int +openDb(Db **dbpp, const char *progname, const char *fileName, + DbEnv *envp, u_int32_t extraFlags) +{ + int ret; + u_int32_t openFlags; + + try { + Db *dbp = new Db(envp, DB_CXX_NO_EXCEPTIONS); + + // Point to the new'd Db. + *dbpp = dbp; + + if (extraFlags != 0) + ret = dbp->set_flags(extraFlags); + + // Now open the database. + openFlags = DB_CREATE | // Allow database creation + DB_READ_UNCOMMITTED | // Allow uncommitted reads + DB_AUTO_COMMIT; // Allow autocommit + + dbp->open(NULL, // Txn pointer + fileName, // File name + NULL, // Logical db name + DB_BTREE, // Database type (using btree) + openFlags, // Open flags + 0); // File mode. Using defaults + } catch (DbException &e) { + std::cerr << progname << ": openDb: db open failed:" << std::endl; + std::cerr << e.what() << std::endl; + return (EXIT_FAILURE); + } + + return (EXIT_SUCCESS); +} + diff --git a/examples/stl/repquote/README b/examples/stl/repquote/README new file mode 100644 index 00000000..86d07591 --- /dev/null +++ b/examples/stl/repquote/README @@ -0,0 +1,17 @@ +# $Id$ + +This is the directory for the replication example program. + +The example is a toy stock quote server. It uses the replication manager to +make use of DB replication, and uses STL API to acces the database. + + +StlRepQuoteExample.cpp + Contains code to implement the basic functions of the + application, and code necessary to configure the + application to use Replication Manager. + +StlRepConfigInfo.h +StlRepConfigInfo.cpp + Contains code to manage user configurations to this program, + including those to the DB replication manager. diff --git a/examples/stl/repquote/StlRepConfigInfo.cpp b/examples/stl/repquote/StlRepConfigInfo.cpp new file mode 100644 index 00000000..35d268e5 --- /dev/null +++ b/examples/stl/repquote/StlRepConfigInfo.cpp @@ -0,0 +1,58 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "StlRepConfigInfo.h" + +#include + +RepConfigInfo::RepConfigInfo() +{ + start_policy = DB_REP_ELECTION; + home = "TESTDIR"; + got_listen_address = false; + nrsites = 0; + priority = 100; + verbose = false; + this_host.creator = false; + other_hosts = NULL; + ack_policy = DB_REPMGR_ACKS_QUORUM; + bulk = false; +} + +RepConfigInfo::~RepConfigInfo() +{ + // release any other_hosts structs. + if (other_hosts != NULL) { + REP_HOST_INFO *CurItem = other_hosts; + while (CurItem->next != NULL) + { + REP_HOST_INFO *TmpItem = CurItem->next; + free(CurItem); + CurItem = TmpItem; + } + free(CurItem); + } + other_hosts = NULL; +} + +void RepConfigInfo::addOtherHost(char* host, int port, bool peer) +{ + REP_HOST_INFO *newinfo; + newinfo = (REP_HOST_INFO*)malloc(sizeof(REP_HOST_INFO)); + newinfo->host = host; + newinfo->port = port; + newinfo->peer = peer; + if (other_hosts == NULL) { + other_hosts = newinfo; + newinfo->next = NULL; + } else { + newinfo->next = other_hosts; + other_hosts = newinfo; + } + nrsites++; +} diff --git a/examples/stl/repquote/StlRepConfigInfo.h b/examples/stl/repquote/StlRepConfigInfo.h new file mode 100644 index 00000000..cec996d2 --- /dev/null +++ b/examples/stl/repquote/StlRepConfigInfo.h @@ -0,0 +1,37 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include + +// Chainable struct used to store host information. +typedef struct RepHostInfoObj{ + bool creator; + char* host; + int port; + bool peer; // only relevant for "other" hosts + RepHostInfoObj* next; // used for chaining multiple "other" hosts. +} REP_HOST_INFO; + +class RepConfigInfo { +public: + RepConfigInfo(); + virtual ~RepConfigInfo(); + + void addOtherHost(char* host, int port, bool peer); +public: + u_int32_t start_policy; + const char* home; + bool got_listen_address; + REP_HOST_INFO this_host; + int nrsites; + int priority; + bool verbose; + // used to store a set of optional other hosts. + REP_HOST_INFO *other_hosts; + int ack_policy; + bool bulk; +}; diff --git a/examples/stl/repquote/StlRepQuoteExample.cpp b/examples/stl/repquote/StlRepQuoteExample.cpp new file mode 100644 index 00000000..80ead3fd --- /dev/null +++ b/examples/stl/repquote/StlRepQuoteExample.cpp @@ -0,0 +1,769 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * In this application, we specify all communication via the command line. In + * a real application, we would expect that information about the other sites + * in the system would be maintained in some sort of configuration file. The + * critical part of this interface is that we assume at startup that we can + * find out + * 1) what our Berkeley DB home environment is, + * 2) what host/port we wish to listen on for connections; and + * 3) an optional list of other sites we should attempt to connect to. + * + * These pieces of information are expressed by the following flags. + * -h home (required; h stands for home directory) + * -l host:port (required unless -L is specified; l stands for local) + * -L host:port (optional, L means group creator) + * -C or -M (optional; start up as client or master) + * -r host:port (optional; r stands for remote; any number of these may be + * specified) + * -R host:port (optional; R stands for remote peer; only one of these may + * be specified) + * -a all|quorum (optional; a stands for ack policy) + * -b (optional; b stands for bulk) + * -p priority (optional; defaults to 100) + * -v (optional; v stands for verbose) + */ + +#include +#include +#include + +#include +#include "StlRepConfigInfo.h" +#include "dbstl_map.h" + +using std::cout; +using std::cin; +using std::cerr; +using std::endl; +using std::flush; +using std::istream; +using std::istringstream; +using std::string; +using std::getline; +using namespace dbstl; +#define CACHESIZE (10 * 1024 * 1024) +#define DATABASE "quote.db" + +const char *progname = "exstl_repquote"; + +#include +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#define snprintf _snprintf +#define sleep(s) Sleep(1000 * (s)) + +extern "C" { +extern int getopt(int, char * const *, const char *); +extern char *optarg; +extern int optind; +} + +typedef HANDLE thread_t; +typedef DWORD thread_exit_status_t; +#define thread_create(thrp, attr, func, arg) \ + (((*(thrp) = CreateThread(NULL, 0, \ + (LPTHREAD_START_ROUTINE)(func), (arg), 0, NULL)) == NULL) ? -1 : 0) +#define thread_join(thr, statusp) \ + ((WaitForSingleObject((thr), INFINITE) == WAIT_OBJECT_0) && \ + GetExitCodeThread((thr), (LPDWORD)(statusp)) ? 0 : -1) +#else /* !_WIN32 */ +#include + +typedef pthread_t thread_t; +typedef void* thread_exit_status_t; +#define thread_create(thrp, attr, func, arg) \ + pthread_create((thrp), (attr), (func), (arg)) +#define thread_join(thr, statusp) pthread_join((thr), (statusp)) +#endif + +// Struct used to store information in Db app_private field. +typedef struct { + bool app_finished; + bool in_client_sync; + bool is_master; + bool verbose; +} APP_DATA; + +static void log(const char *); +void *checkpoint_thread (void *); +void *log_archive_thread (void *); + +class RepQuoteExample +{ +public: + typedef db_map > str_map_t; + RepQuoteExample(); + void init(RepConfigInfo* config); + void doloop(); + int terminate(); + + static void event_callback(DbEnv * dbenv, u_int32_t which, void *info); + +private: + // disable copy constructor. + RepQuoteExample(const RepQuoteExample &); + void operator = (const RepQuoteExample &); + + // internal data members. + APP_DATA app_data; + RepConfigInfo *app_config; + DbEnv *cur_env; + Db *dbp; + str_map_t *strmap; + thread_t ckp_thr; + thread_t lga_thr; + + // private methods. + void print_stocks(); + void prompt(); + bool open_db(bool creating); + void close_db(){ + delete strmap; + strmap = NULL; + dbstl::close_db(dbp); + dbp = NULL; + } + static void close_db(Db *&);// Close an unregistered Db handle. +}; + +bool RepQuoteExample::open_db(bool creating) +{ + int ret; + + if (dbp) + return true; + + dbp = new Db(cur_env, DB_CXX_NO_EXCEPTIONS); + + u_int32_t flags = DB_AUTO_COMMIT | DB_THREAD; + if (creating) + flags |= DB_CREATE; + + ret = dbp->open(NULL, DATABASE, NULL, DB_BTREE, flags, 0); + switch (ret) { + case 0: + register_db(dbp); + if (strmap) + delete strmap; + strmap = new str_map_t(dbp, cur_env); + return (true); + case DB_LOCK_DEADLOCK: // Fall through + case DB_REP_HANDLE_DEAD: + log("\nFailed to open stock db."); + break; + default: + if (ret == DB_REP_LOCKOUT) + break; // Fall through + else if (ret == ENOENT && !creating) + log("\nStock DB does not yet exist\n"); + else { + DbException ex(ret); + throw ex; + } + } // switch + + // (All retryable errors fall through to here.) + // + log("\nPlease retry the operation"); + close_db(dbp); + return (false); +} + +void RepQuoteExample::close_db(Db *&dbp) +{ + if (dbp) { + try { + dbp->close(0); + delete dbp; + dbp = 0; + } catch (...) { + delete dbp; + dbp = 0; + throw; + } + } + +} + +RepQuoteExample::RepQuoteExample() : app_config(0), cur_env(NULL) { + app_data.app_finished = 0; + app_data.in_client_sync = 0; + app_data.is_master = 0; // assume I start out as client + cur_env = new DbEnv(DB_CXX_NO_EXCEPTIONS); + strmap = NULL; + dbp = NULL; +} + +void RepQuoteExample::init(RepConfigInfo *config) { + app_config = config; + DbSite *dbsite; + int i; + + cur_env->set_app_private(&app_data); + cur_env->set_errfile(stderr); + cur_env->set_errpfx(progname); + cur_env->set_event_notify(event_callback); + + // Configure bulk transfer to send groups of records to clients + // in a single network transfer. This is useful for master sites + // and clients participating in client-to-client synchronization. + // + if (app_config->bulk) + cur_env->rep_set_config(DB_REP_CONF_BULK, 1); + + // Turn on debugging and informational output if requested. + if (app_config->verbose) { + cur_env->set_verbose(DB_VERB_REPLICATION, 1); + app_data.verbose = 1; + } + + // Set replication group election priority for this environment. + // An election first selects the site with the most recent log + // records as the new master. If multiple sites have the most + // recent log records, the site with the highest priority value + // is selected as master. + // + cur_env->rep_set_priority(app_config->priority); + + // Set the policy that determines how master and client sites + // handle acknowledgement of replication messages needed for + // permanent records. The default policy of "quorum" requires only + // a quorum of electable peers sufficient to ensure a permanent + // record remains durable if an election is held. The "all" option + // requires all clients to acknowledge a permanent replication + // message instead. + // + cur_env->repmgr_set_ack_policy(app_config->ack_policy); + + // Set the threshold for the minimum and maximum time the client + // waits before requesting retransmission of a missing message. + // Base these values on the performance and load characteristics + // of the master and client host platforms as well as the round + // trip message time. + // + cur_env->rep_set_request(20000, 500000); + + // Configure deadlock detection to ensure that any deadlocks + // are broken by having one of the conflicting lock requests + // rejected. DB_LOCK_DEFAULT uses the lock policy specified + // at environment creation time or DB_LOCK_RANDOM if none was + // specified. + // + cur_env->set_lk_detect(DB_LOCK_DEFAULT); + + // The following base replication features may also be useful to your + // application. See Berkeley DB documentation for more details. + // - Master leases: Provide stricter consistency for data reads + // on a master site. + // - Timeouts: Customize the amount of time Berkeley DB waits + // for such things as an election to be concluded or a master + // lease to be granted. + // - Delayed client synchronization: Manage the master site's + // resources by spreading out resource-intensive client + // synchronizations. + // - Blocked client operations: Return immediately with an error + // instead of waiting indefinitely if a client operation is + // blocked by an ongoing client synchronization. + + cur_env->repmgr_site(app_config->this_host.host, + app_config->this_host.port, &dbsite, 0); + dbsite->set_config(DB_LOCAL_SITE, 1); + if(app_config->this_host.creator) + dbsite->set_config(DB_GROUP_CREATOR, 1); + dbsite->close(); + + i = 1; + for ( REP_HOST_INFO *cur = app_config->other_hosts; + cur != NULL && i <= app_config->nrsites; + cur = cur->next, i++) { + cur_env->repmgr_site(cur->host, cur->port, &dbsite, 0); + dbsite->set_config(DB_BOOTSTRAP_HELPER, 1); + if(cur->peer) + dbsite->set_config(DB_REPMGR_PEER, 1); + dbsite->close(); + } + + // Configure heartbeat timeouts so that repmgr monitors the + // health of the TCP connection. Master sites broadcast a heartbeat + // at the frequency specified by the DB_REP_HEARTBEAT_SEND timeout. + // Client sites wait for message activity the length of the + // DB_REP_HEARTBEAT_MONITOR timeout before concluding that the + // connection to the master is lost. The DB_REP_HEARTBEAT_MONITOR + // timeout should be longer than the DB_REP_HEARTBEAT_SEND timeout. + // + cur_env->rep_set_timeout(DB_REP_HEARTBEAT_SEND, 5000000); + cur_env->rep_set_timeout(DB_REP_HEARTBEAT_MONITOR, 10000000); + + // The following repmgr features may also be useful to your + // application. See Berkeley DB documentation for more details. + // - Two-site strict majority rule - In a two-site replication + // group, require both sites to be available to elect a new + // master. + // - Timeouts - Customize the amount of time repmgr waits + // for such things as waiting for acknowledgements or attempting + // to reconnect to other sites. + // - Site list - return a list of sites currently known to repmgr. + + // We can now open our environment, although we're not ready to + // begin replicating. However, we want to have a dbenv around + // so that we can send it into any of our message handlers. + cur_env->set_cachesize(0, CACHESIZE, 0); + cur_env->set_flags(DB_TXN_NOSYNC, 1); + + cur_env->open(app_config->home, DB_CREATE | DB_RECOVER | + DB_THREAD | DB_INIT_REP | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN, 0); + + // Start checkpoint and log archive support threads. + (void)thread_create(&ckp_thr, NULL, checkpoint_thread, cur_env); + (void)thread_create(&lga_thr, NULL, log_archive_thread, cur_env); + + dbstl::register_db_env(cur_env); + cur_env->repmgr_start(3, app_config->start_policy); +} + +int RepQuoteExample::terminate() { + try { + // Wait for checkpoint and log archive threads to finish. + // Windows does not allow NULL pointer for exit code variable. + thread_exit_status_t exstat; + + (void)thread_join(lga_thr, &exstat); + (void)thread_join(ckp_thr, &exstat); + + // We have used the DB_TXN_NOSYNC environment flag for + // improved performance without the usual sacrifice of + // transactional durability, as discussed in the + // "Transactional guarantees" page of the Reference + // Guide: if one replication site crashes, we can + // expect the data to exist at another site. However, + // in case we shut down all sites gracefully, we push + // out the end of the log here so that the most + // recent transactions don't mysteriously disappear. + cur_env->log_flush(NULL); + } catch (DbException dbe) { + cout << "\nerror closing environment: " << dbe.what() << endl; + } + return 0; +} + +void RepQuoteExample::prompt() { + cout << "QUOTESERVER"; + if (!app_data.is_master) + cout << "(read-only)"; + cout << "> " << flush; +} + +void log(const char *msg) { + cerr << msg << endl; +} + +// Simple command-line user interface: +// - enter " " to insert or update a record in the +// database; +// - just press Return (i.e., blank input line) to print out the contents of +// the database; +// - enter "quit" or "exit" to quit. +// +void RepQuoteExample::doloop() { + string input; + + while (prompt(), getline(cin, input)) { + istringstream is(input); + string token1, token2; + + // Read 0, 1 or 2 tokens from the input. + // + int count = 0; + if (is >> token1) { + count++; + if (is >> token2) + count++; + } + + if (count == 1) { + if (token1 == "exit" || token1 == "quit") { + app_data.app_finished = 1; + break; + } else { + log("\nFormat: \n"); + continue; + } + } + + // Here we know count is either 0 or 2, so we're about to try a + // DB operation. + // + // Open database with DB_CREATE only if this is a master + // database. A client database uses polling to attempt + // to open the database without DB_CREATE until it is + // successful. + // + // This DB_CREATE polling logic can be simplified under + // some circumstances. For example, if the application can + // be sure a database is already there, it would never need + // to open it with DB_CREATE. + // + if (!open_db(app_data.is_master)) + continue; + + try { + if (count == 0) + if (app_data.in_client_sync) + log( + "Cannot read data during client initialization - please try again."); + else + print_stocks(); + else if (!app_data.is_master) + log("\nCan't update at client\n"); + else { + char *symbol = new char[token1.length() + 1]; + strcpy(symbol, token1.c_str()); + char *price = new char[token2.length() + 1]; + strcpy(price, token2.c_str()); + begin_txn(0, cur_env); + strmap->insert(make_pair(symbol, price)); + commit_txn(cur_env); + delete symbol; + delete price; + } + } catch (DbDeadlockException e) { + log("\nplease retry the operation\n"); + close_db(); + } catch (DbRepHandleDeadException e) { + log("\nplease retry the operation\n"); + close_db(); + } catch (DbException e) { + if (e.get_errno() == DB_REP_LOCKOUT) { + log("\nplease retry the operation\n"); + close_db(); + } else + throw; + } + + } + + close_db(); +} + +void RepQuoteExample::event_callback(DbEnv* dbenv, u_int32_t which, void *info) +{ + APP_DATA *app = (APP_DATA*)dbenv->get_app_private(); + + info = NULL; /* Currently unused. */ + + switch (which) { + case DB_EVENT_REP_MASTER: + app->in_client_sync = 0; + app->is_master = 1; + app->verbose = 0; + break; + + case DB_EVENT_REP_CLIENT: + app->is_master = 0; + app->in_client_sync = 1; + break; + + case DB_EVENT_REP_STARTUPDONE: + app->in_client_sync = 0; + break; + + case DB_EVENT_REP_NEWMASTER: + app->in_client_sync = 1; + break; + + case DB_EVENT_REP_PERM_FAILED: + // Did not get enough acks to guarantee transaction + // durability based on the configured ack policy. This + // transaction will be flushed to the master site's + // local disk storage for durability. + // + if(app->verbose) + log( + "EVENT: Insufficient acknowledgements to guarantee transaction durability."); + break; + + case DB_EVENT_PANIC: + if(app->verbose) + log("EVENT: receive panic event"); + break; + + case DB_EVENT_REP_CONNECT_BROKEN: + if(app->verbose) + log("EVENT: connection is broken"); + break; + + case DB_EVENT_REP_DUPMASTER: + if(app->verbose) + log("EVENT: duplicate master"); + break; + + case DB_EVENT_REP_ELECTED: + if(app->verbose) + log("EVENT: election in replication group"); + break; + + case DB_EVENT_REP_CONNECT_ESTD: + if(app->verbose) + log("EVENT: establish connection"); + break; + + case DB_EVENT_REP_CONNECT_TRY_FAILED: + if(app->verbose) + log("EVENT: fail to try connection"); + break; + + case DB_EVENT_REP_INIT_DONE: + if(app->verbose) + log("EVENT: finish initialization"); + break; + + case DB_EVENT_REP_LOCAL_SITE_REMOVED: + if(app->verbose) + log("EVENT: remove local site"); + break; + + case DB_EVENT_REP_SITE_ADDED: + if(app->verbose) + log("EVENT: add site"); + break; + + case DB_EVENT_REP_SITE_REMOVED: + if(app->verbose) + log("EVENT: remove site removed"); + break; + + default: + dbenv->errx("\nignoring event %d", which); + } +} + +void RepQuoteExample::print_stocks() { +#define MAXKEYSIZE 10 +#define MAXDATASIZE 20 + + cout << "\tSymbol\tPrice" << endl + << "\t======\t=====" << endl; + str_map_t::iterator itr; + if (strmap == NULL) + strmap = new str_map_t(dbp, cur_env); + begin_txn(0, cur_env); + for (itr = strmap->begin(); itr != strmap->end(); ++itr) + cout<<"\t"<first<<"\t"<second<get_app_private(); + + for (;;) { + // Wait for one minute, polling once per second to see if + // application has finished. When application has finished, + // terminate this thread. + // + for (i = 0; i < 60; i++) { + sleep(1); + if (app->app_finished == 1) + return ((void *)EXIT_SUCCESS); + } + + // Perform a checkpoint. + if ((ret = env->txn_checkpoint(0, 0, 0)) != 0) { + env->err(ret, "Could not perform checkpoint.\n"); + return ((void *)EXIT_FAILURE); + } + } +} + +// This is a simple log archive thread. Once per minute, it removes all but +// the most recent 3 logs that are safe to remove according to a call to +// DBENV->log_archive(). +// +// Log cleanup is needed to conserve disk space, but aggressive log cleanup +// can cause more frequent client initializations if a client lags too far +// behind the current master. This can happen in the event of a slow client, +// a network partition, or a new master that has not kept as many logs as the +// previous master. +// +// The approach in this routine balances the need to mitigate against a +// lagging client by keeping a few more of the most recent unneeded logs +// with the need to conserve disk space by regularly cleaning up log files. +// Use of automatic log removal (DBENV->log_set_config() DB_LOG_AUTO_REMOVE +// flag) is not recommended for replication due to the risk of frequent +// client initializations. +// +void *log_archive_thread(void *args) +{ + DbEnv *env; + APP_DATA *app; + char **begin, **list; + int i, listlen, logs_to_keep, minlog, ret; + + env = (DbEnv *)args; + app = (APP_DATA *)env->get_app_private(); + logs_to_keep = 3; + + for (;;) { + // Wait for one minute, polling once per second to see if + // application has finished. When application has finished, + // terminate this thread. + // + for (i = 0; i < 60; i++) { + sleep(1); + if (app->app_finished == 1) + return ((void *)EXIT_SUCCESS); + } + + // Get the list of unneeded log files. + if ((ret = env->log_archive(&list, DB_ARCH_ABS)) != 0) { + env->err(ret, "Could not get log archive list."); + return ((void *)EXIT_FAILURE); + } + if (list != NULL) { + listlen = 0; + // Get the number of logs in the list. + for (begin = list; *begin != NULL; begin++, listlen++); + // Remove all but the logs_to_keep most recent + // unneeded log files. + // + minlog = listlen - logs_to_keep; + for (begin = list, i= 0; i < minlog; list++, i++) { + if ((ret = unlink(*list)) != 0) { + env->err(ret, + "logclean: remove %s", *list); + env->errx( + "logclean: Error remove %s", *list); + free(begin); + return ((void *)EXIT_FAILURE); + } + } + free(begin); + } + } +} + diff --git a/lang/cxx/cxx_channel.cpp b/lang/cxx/cxx_channel.cpp new file mode 100644 index 00000000..280ed4c4 --- /dev/null +++ b/lang/cxx/cxx_channel.cpp @@ -0,0 +1,104 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" +#include "dbinc/cxx_int.h" + +// Helper macro for simple methods that pass through to the +// underlying C method. It may return an error or raise an exception. +// Note this macro expects that input _argspec is an argument +// list element (e.g., "char *arg") and that _arglist is the arguments +// that should be passed through to the C method (e.g., "(dbchannel, arg)") +// +#define DB_CHANNEL_METHOD(_name, _delete, _argspec, _arglist, _retok) \ +int DbChannel::_name _argspec \ +{ \ + int ret; \ + DB_CHANNEL *dbchannel = unwrap(this); \ + \ + if (dbchannel == NULL) \ + ret = EINVAL; \ + else \ + ret = dbchannel->_name _arglist; \ + if (_delete) \ + delete this; \ + if (!_retok(ret)) \ + DB_ERROR(dbenv_, "DbChannel::"#_name, ret, \ + ON_ERROR_UNKNOWN); \ + return (ret); \ +} + +DbChannel::DbChannel() +: imp_(0) +{ +} + +DbChannel::~DbChannel() +{ +} + +DB_CHANNEL_METHOD(close, 1, (), (dbchannel, 0), DB_RETOK_STD) + +int DbChannel::send_msg(Dbt *msg, u_int32_t nmsg, u_int32_t flags) +{ + DB_CHANNEL *dbchannel = unwrap(this); + DB_ENV *dbenv = unwrap(dbenv_); + DBT *dbtlist; + int i, ret; + + ret = __os_malloc(dbenv->env, sizeof(DBT) * nmsg, &dbtlist); + if (ret != 0) { + DB_ERROR(dbenv_, "DbChannel::send_msg", ret, ON_ERROR_UNKNOWN); + return (ret); + } + + for (i = 0; i < (int)nmsg; i++) + memcpy(&dbtlist[i], msg[i].get_DBT(), sizeof(DBT)); + + if ((ret = dbchannel->send_msg(dbchannel, dbtlist, nmsg, flags)) != 0) + DB_ERROR(dbenv_, "DbChannel::send_msg", ret, ON_ERROR_UNKNOWN); + + __os_free(dbenv->env, dbtlist); + + return (ret); +} + +int DbChannel::send_request(Dbt *request, u_int32_t nrequest, + Dbt *response, db_timeout_t timeout, u_int32_t flags) +{ + DB_CHANNEL *dbchannel = unwrap(this); + DB_ENV *dbenv = unwrap(dbenv_); + DBT *dbtlist; + int i, ret; + + ret = __os_malloc(dbenv->env, sizeof(DBT) * nrequest, &dbtlist); + if (ret != 0) { + DB_ERROR(dbenv_, "DbChannel::send_request", ret, + ON_ERROR_UNKNOWN); + return (ret); + } + + for (i = 0; i < (int)nrequest; i++) + memcpy(&dbtlist[i], request[i].get_DBT(), sizeof(DBT)); + + if ((ret = dbchannel->send_request(dbchannel, dbtlist, nrequest, + response->get_DBT(), timeout, flags)) != 0) + DB_ERROR(dbenv_, "DbChannel::send_request", ret, + ON_ERROR_UNKNOWN); + + __os_free(dbenv->env, dbtlist); + + return (ret); +} + +DB_CHANNEL_METHOD(set_timeout, 0, (db_timeout_t timeout), + (dbchannel, timeout), DB_RETOK_STD); diff --git a/lang/cxx/cxx_db.cpp b/lang/cxx/cxx_db.cpp new file mode 100644 index 00000000..6d3674dc --- /dev/null +++ b/lang/cxx/cxx_db.cpp @@ -0,0 +1,825 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" +#include "dbinc/cxx_int.h" + +#include "dbinc/db_page.h" +#include "dbinc_auto/db_auto.h" +#include "dbinc_auto/crdel_auto.h" +#include "dbinc/db_dispatch.h" +#include "dbinc_auto/db_ext.h" +#include "dbinc_auto/common_ext.h" + +// Helper macros for simple methods that pass through to the +// underlying C method. It may return an error or raise an exception. +// Note this macro expects that input _argspec is an argument +// list element (e.g., "char *arg") and that _arglist is the arguments +// that should be passed through to the C method (e.g., "(db, arg)") +// +#define DB_METHOD(_name, _argspec, _arglist, _retok) \ +int Db::_name _argspec \ +{ \ + int ret; \ + DB *db = unwrap(this); \ + \ + ret = db->_name _arglist; \ + if (!_retok(ret)) \ + DB_ERROR(dbenv_, "Db::" # _name, ret, error_policy()); \ + return (ret); \ +} + +#define DB_DESTRUCTOR(_name, _argspec, _arglist, _retok) \ +int Db::_name _argspec \ +{ \ + int ret; \ + DB *db = unwrap(this); \ + \ + if (!db) { \ + DB_ERROR(dbenv_, "Db::" # _name, EINVAL, error_policy()); \ + return (EINVAL); \ + } \ + ret = db->_name _arglist; \ + cleanup(); \ + if (!_retok(ret)) \ + DB_ERROR(dbenv_, "Db::" # _name, ret, error_policy()); \ + return (ret); \ +} + +#define DB_METHOD_QUIET(_name, _argspec, _arglist) \ +int Db::_name _argspec \ +{ \ + DB *db = unwrap(this); \ + \ + return (db->_name _arglist); \ +} + +#define DB_METHOD_VOID(_name, _argspec, _arglist) \ +void Db::_name _argspec \ +{ \ + DB *db = unwrap(this); \ + \ + db->_name _arglist; \ +} + +// A truism for the Db object is that there is a valid +// DB handle from the constructor until close(). +// After the close, the DB handle is invalid and +// no operations are permitted on the Db (other than +// destructor). Leaving the Db handle open and not +// doing a close is generally considered an error. +// +// We used to allow Db objects to be closed and reopened. +// This implied always keeping a valid DB object, and +// coordinating the open objects between Db/DbEnv turned +// out to be overly complicated. Now we do not allow this. + +Db::Db(DbEnv *dbenv, u_int32_t flags) +: imp_(0) +, dbenv_(dbenv) +, mpf_(0) +, construct_error_(0) +, flags_(0) +, construct_flags_(flags) +, append_recno_callback_(0) +, associate_callback_(0) +, associate_foreign_callback_(0) +, bt_compare_callback_(0) +, bt_compress_callback_(0) +, bt_decompress_callback_(0) +, bt_prefix_callback_(0) +, db_partition_callback_(0) +, dup_compare_callback_(0) +, feedback_callback_(0) +, h_compare_callback_(0) +, h_hash_callback_(0) +{ + if (dbenv_ == 0) + flags_ |= DB_CXX_PRIVATE_ENV; + + if ((construct_error_ = initialize()) != 0) + DB_ERROR(dbenv_, "Db::Db", construct_error_, error_policy()); +} + +// If the DB handle is still open, we close it. This is to make stack +// allocation of Db objects easier so that they are cleaned up in the error +// path. If the environment was closed prior to this, it may cause a trap, but +// an error message is generated during the environment close. Applications +// should call close explicitly in normal (non-exceptional) cases to check the +// return value. +// +Db::~Db() +{ + DB *db; + + db = unwrap(this); + if (db != NULL) { + (void)db->close(db, 0); + cleanup(); + } +} + +// private method to initialize during constructor. +// initialize must create a backing DB object, +// and if that creates a new DB_ENV, it must be tied to a new DbEnv. +// +int Db::initialize() +{ + DB *db; + DB_ENV *cenv = unwrap(dbenv_); + int ret; + u_int32_t cxx_flags; + + cxx_flags = construct_flags_ & DB_CXX_NO_EXCEPTIONS; + + // Create a new underlying DB object. + // We rely on the fact that if a NULL DB_ENV* is given, + // one is allocated by DB. + // + if ((ret = db_create(&db, cenv, + construct_flags_ & ~cxx_flags)) != 0) + return (ret); + + // Associate the DB with this object + imp_ = db; + db->api_internal = this; + db->alt_close = this->alt_close; + + // Create a new DbEnv from a DB_ENV* if it was created locally. + // It is deleted in Db::close(). + // + if ((flags_ & DB_CXX_PRIVATE_ENV) != 0) + dbenv_ = new DbEnv(db->dbenv, cxx_flags); + + // Create a DbMpoolFile from the DB_MPOOLFILE* in the DB handle. + mpf_ = new DbMpoolFile(); + mpf_->imp_ = db->mpf; + + return (0); +} + +// private method to cleanup after destructor or during close. +// If the environment was created by this Db object, we need to delete it. +// +void Db::cleanup() +{ + if (imp_ != 0) { + imp_ = 0; + + // we must dispose of the DbEnv object if + // we created it. This will be the case + // if a NULL DbEnv was passed into the constructor. + // The underlying DB_ENV object will be inaccessible + // after the close, so we must clean it up now. + // + if ((flags_ & DB_CXX_PRIVATE_ENV) != 0) { + dbenv_->cleanup(); + delete dbenv_; + dbenv_ = 0; + } + + delete mpf_; + } +} + +// Return a tristate value corresponding to whether we should +// throw exceptions on errors: +// ON_ERROR_RETURN +// ON_ERROR_THROW +// ON_ERROR_UNKNOWN +// +int Db::error_policy() +{ + if (dbenv_ != NULL) + return (dbenv_->error_policy()); + else { + // If the dbenv_ is null, that means that the user + // did not attach an environment, so the correct error + // policy can be deduced from constructor flags + // for this Db. + // + if ((construct_flags_ & DB_CXX_NO_EXCEPTIONS) != 0) { + return (ON_ERROR_RETURN); + } + else { + return (ON_ERROR_THROW); + } + } +} + +DB_DESTRUCTOR(close, (u_int32_t flags), (db, flags), DB_RETOK_STD) +DB_METHOD(compact, (DbTxn *txnid, Dbt *start, Dbt *stop, + DB_COMPACT *c_data, u_int32_t flags, Dbt *end), + (db, unwrap(txnid), start, stop, c_data, flags, end), DB_RETOK_STD) + +// The following cast implies that Dbc can be no larger than DBC +DB_METHOD(cursor, (DbTxn *txnid, Dbc **cursorp, u_int32_t flags), + (db, unwrap(txnid), (DBC **)cursorp, flags), + DB_RETOK_STD) + +DB_METHOD(del, (DbTxn *txnid, Dbt *key, u_int32_t flags), + (db, unwrap(txnid), key, flags), + DB_RETOK_DBDEL) + +void Db::err(int error, const char *format, ...) +{ + DB *db = unwrap(this); + + DB_REAL_ERR(db->dbenv, error, DB_ERROR_SET, 1, format); +} + +void Db::errx(const char *format, ...) +{ + DB *db = unwrap(this); + + DB_REAL_ERR(db->dbenv, 0, DB_ERROR_NOT_SET, 1, format); +} + +DB_METHOD(exists, (DbTxn *txnid, Dbt *key, u_int32_t flags), + (db, unwrap(txnid), key, flags), DB_RETOK_EXISTS) + +DB_METHOD(fd, (int *fdp), (db, fdp), DB_RETOK_STD) + +int Db::get(DbTxn *txnid, Dbt *key, Dbt *value, u_int32_t flags) +{ + DB *db = unwrap(this); + int ret; + + ret = db->get(db, unwrap(txnid), key, value, flags); + + if (!DB_RETOK_DBGET(ret)) { + if (ret == DB_BUFFER_SMALL) + DB_ERROR_DBT(dbenv_, "Db::get", value, error_policy()); + else + DB_ERROR(dbenv_, "Db::get", ret, error_policy()); + } + + return (ret); +} + +int Db::get_byteswapped(int *isswapped) +{ + DB *db = (DB *)unwrapConst(this); + return (db->get_byteswapped(db, isswapped)); +} + +DbEnv *Db::get_env() +{ + DB *db = (DB *)unwrapConst(this); + DB_ENV *dbenv = db->get_env(db); + return (dbenv != NULL ? DbEnv::get_DbEnv(dbenv) : NULL); +} + +DbMpoolFile *Db::get_mpf() +{ + return (mpf_); +} + +DB_METHOD(get_dbname, (const char **filenamep, const char **dbnamep), + (db, filenamep, dbnamep), DB_RETOK_STD) + +DB_METHOD(get_open_flags, (u_int32_t *flagsp), (db, flagsp), DB_RETOK_STD) + +int Db::get_type(DBTYPE *dbtype) +{ + DB *db = (DB *)unwrapConst(this); + return (db->get_type(db, dbtype)); +} + +// Dbc is a "compatible" subclass of DBC - that is, no virtual functions +// or even extra data members, so these casts, although technically +// non-portable, "should" always be okay. +DB_METHOD(join, (Dbc **curslist, Dbc **cursorp, u_int32_t flags), + (db, (DBC **)curslist, (DBC **)cursorp, flags), DB_RETOK_STD) + +DB_METHOD(key_range, + (DbTxn *txnid, Dbt *key, DB_KEY_RANGE *results, u_int32_t flags), + (db, unwrap(txnid), key, results, flags), DB_RETOK_STD) + +// If an error occurred during the constructor, report it now. +// Otherwise, call the underlying DB->open method. +// +int Db::open(DbTxn *txnid, const char *file, const char *database, + DBTYPE type, u_int32_t flags, int mode) +{ + int ret; + DB *db = unwrap(this); + + if (construct_error_ != 0) + ret = construct_error_; + else + ret = db->open(db, unwrap(txnid), file, database, type, flags, + mode); + + if (!DB_RETOK_STD(ret)) + DB_ERROR(dbenv_, "Db::open", ret, error_policy()); + + return (ret); +} + +int Db::pget(DbTxn *txnid, Dbt *key, Dbt *pkey, Dbt *value, u_int32_t flags) +{ + DB *db = unwrap(this); + int ret; + + ret = db->pget(db, unwrap(txnid), key, pkey, value, flags); + + /* The logic here is identical to Db::get - reuse the macro. */ + if (!DB_RETOK_DBGET(ret)) { + if (ret == DB_BUFFER_SMALL && DB_OVERFLOWED_DBT(value)) + DB_ERROR_DBT(dbenv_, "Db::pget", value, error_policy()); + else + DB_ERROR(dbenv_, "Db::pget", ret, error_policy()); + } + + return (ret); +} + +DB_METHOD(put, (DbTxn *txnid, Dbt *key, Dbt *value, u_int32_t flags), + (db, unwrap(txnid), key, value, flags), DB_RETOK_DBPUT) + +DB_DESTRUCTOR(rename, + (const char *file, const char *database, const char *newname, + u_int32_t flags), + (db, file, database, newname, flags), DB_RETOK_STD) + +DB_DESTRUCTOR(remove, (const char *file, const char *database, u_int32_t flags), + (db, file, database, flags), DB_RETOK_STD) + +DB_METHOD(truncate, (DbTxn *txnid, u_int32_t *countp, u_int32_t flags), + (db, unwrap(txnid), countp, flags), DB_RETOK_STD) + +DB_METHOD(stat, (DbTxn *txnid, void *sp, u_int32_t flags), + (db, unwrap(txnid), sp, flags), DB_RETOK_STD) + +DB_METHOD(stat_print, (u_int32_t flags), (db, flags), DB_RETOK_STD) + +DB_METHOD(sync, (u_int32_t flags), (db, flags), DB_RETOK_STD) + +DB_METHOD(upgrade, + (const char *name, u_int32_t flags), (db, name, flags), DB_RETOK_STD) + +//////////////////////////////////////////////////////////////////////// +// +// callbacks +// +// *_intercept_c are 'glue' functions that must be declared +// as extern "C" so to be typesafe. Using a C++ method, even +// a static class method with 'correct' arguments, will not pass +// the test; some picky compilers do not allow mixing of function +// pointers to 'C' functions with function pointers to C++ functions. +// +// One wart with this scheme is that the *_callback_ method pointer +// must be declared public to be accessible by the C intercept. +// It's possible to accomplish the goal without this, and with +// another public transfer method, but it's just too much overhead. +// These callbacks are supposed to be *fast*. +// +// The DBTs we receive in these callbacks from the C layer may be +// manufactured there, but we want to treat them as a Dbts. +// Technically speaking, these DBTs were not constructed as a Dbts, +// but it should be safe to cast them as such given that Dbt is a +// *very* thin extension of the DBT. That is, Dbt has no additional +// data elements, does not use virtual functions, virtual inheritance, +// multiple inheritance, RTI, or any other language feature that +// causes the structure to grow or be displaced. Although this may +// sound risky, a design goal of C++ is complete structure +// compatibility with C, and has the philosophy 'if you don't use it, +// you shouldn't incur the overhead'. If the C/C++ compilers you're +// using on a given machine do not have matching struct layouts, then +// a lot more things will be broken than just this. +// +// The alternative, creating a Dbt here in the callback, and populating +// it from the DBT, is just too slow and cumbersome to be very useful. + +// These macros avoid a lot of boilerplate code for callbacks + +#define DB_CALLBACK_C_INTERCEPT(_name, _rettype, _cargspec, \ + _return, _cxxargs) \ +extern "C" _rettype _db_##_name##_intercept_c _cargspec \ +{ \ + Db *cxxthis; \ + \ + /* We don't have a dbenv handle at this point. */ \ + DB_ASSERT(NULL, cthis != NULL); \ + cxxthis = Db::get_Db(cthis); \ + DB_ASSERT(cthis->dbenv->env, cxxthis != NULL); \ + DB_ASSERT(cthis->dbenv->env, cxxthis->_name##_callback_ != 0); \ + \ + _return (*cxxthis->_name##_callback_) _cxxargs; \ +} + +#define DB_SET_CALLBACK(_cxxname, _name, _cxxargspec, _cb) \ +int Db::_cxxname _cxxargspec \ +{ \ + DB *cthis = unwrap(this); \ + \ + _name##_callback_ = _cb; \ + return ((*(cthis->_cxxname))(cthis, \ + (_cb) ? _db_##_name##_intercept_c : NULL)); \ +} + +#define DB_GET_CALLBACK(_cxxname, _name, _cxxargspec, _cbp) \ +int Db::_cxxname _cxxargspec \ +{ \ + if (_cbp != NULL) \ + *(_cbp) = _name##_callback_; \ + return 0; \ +} + +/* associate callback - doesn't quite fit the pattern because of the flags */ +DB_CALLBACK_C_INTERCEPT(associate, + int, (DB *cthis, const DBT *key, const DBT *data, DBT *retval), + return, (cxxthis, Dbt::get_const_Dbt(key), Dbt::get_const_Dbt(data), + Dbt::get_Dbt(retval))) + +int Db::associate(DbTxn *txn, Db *secondary, int (*callback)(Db *, const Dbt *, + const Dbt *, Dbt *), u_int32_t flags) +{ + DB *cthis = unwrap(this); + + /* Since the secondary Db is used as the first argument + * to the callback, we store the C++ callback on it + * rather than on 'this'. + */ + secondary->associate_callback_ = callback; + return ((*(cthis->associate))(cthis, unwrap(txn), unwrap(secondary), + (callback) ? _db_associate_intercept_c : NULL, flags)); +} + +/* associate callback - doesn't quite fit the pattern because of the flags */ +DB_CALLBACK_C_INTERCEPT(associate_foreign, int, + (DB *cthis, const DBT *key, DBT *data, const DBT *fkey, int *changed), + return, (cxxthis, Dbt::get_const_Dbt(key), + Dbt::get_Dbt(data), Dbt::get_const_Dbt(fkey), changed)) + +int Db::associate_foreign(Db *secondary, int (*callback)(Db *, + const Dbt *, Dbt *, const Dbt *, int *), u_int32_t flags) +{ + DB *cthis = unwrap(this); + + secondary->associate_foreign_callback_ = callback; + return ((*(cthis->associate_foreign))(cthis, unwrap(secondary), + (callback) ? _db_associate_foreign_intercept_c : NULL, flags)); +} + +DB_CALLBACK_C_INTERCEPT(feedback, + void, (DB *cthis, int opcode, int pct), + /* no return */ (void), (cxxthis, opcode, pct)) + +DB_GET_CALLBACK(get_feedback, feedback, + (void (**argp)(Db *cxxthis, int opcode, int pct)), argp) + +DB_SET_CALLBACK(set_feedback, feedback, + (void (*arg)(Db *cxxthis, int opcode, int pct)), arg) + +DB_CALLBACK_C_INTERCEPT(append_recno, + int, (DB *cthis, DBT *data, db_recno_t recno), + return, (cxxthis, Dbt::get_Dbt(data), recno)) + +DB_GET_CALLBACK(get_append_recno, append_recno, + (int (**argp)(Db *cxxthis, Dbt *data, db_recno_t recno)), argp) + +DB_SET_CALLBACK(set_append_recno, append_recno, + (int (*arg)(Db *cxxthis, Dbt *data, db_recno_t recno)), arg) + +DB_CALLBACK_C_INTERCEPT(bt_compare, + int, (DB *cthis, const DBT *data1, const DBT *data2), + return, + (cxxthis, Dbt::get_const_Dbt(data1), Dbt::get_const_Dbt(data2))) + +DB_GET_CALLBACK(get_bt_compare, bt_compare, + (int (**argp)(Db *cxxthis, const Dbt *data1, const Dbt *data2)), argp) + +DB_SET_CALLBACK(set_bt_compare, bt_compare, + (int (*arg)(Db *cxxthis, const Dbt *data1, const Dbt *data2)), arg) + +DB_CALLBACK_C_INTERCEPT(bt_compress, + int, (DB *cthis, const DBT *data1, const DBT *data2, const DBT *data3, + const DBT *data4, DBT *data5), return, + (cxxthis, Dbt::get_const_Dbt(data1), Dbt::get_const_Dbt(data2), + Dbt::get_const_Dbt(data3), Dbt::get_const_Dbt(data4), Dbt::get_Dbt(data5))) + +DB_CALLBACK_C_INTERCEPT(bt_decompress, + int, (DB *cthis, const DBT *data1, const DBT *data2, DBT *data3, + DBT *data4, DBT *data5), return, + (cxxthis, Dbt::get_const_Dbt(data1), Dbt::get_const_Dbt(data2), + Dbt::get_Dbt(data3), Dbt::get_Dbt(data4), Dbt::get_Dbt(data5))) + +// The {g|s}et_bt_compress methods don't fit into the standard macro templates +// since they take two callback functions. +int Db::get_bt_compress( + int (**bt_compress) + (Db *, const Dbt *, const Dbt *, const Dbt *, const Dbt *, Dbt *), + int (**bt_decompress) + (Db *, const Dbt *, const Dbt *, Dbt *, Dbt *, Dbt *)) +{ + if (bt_compress != NULL) + *(bt_compress) = bt_compress_callback_; + if (bt_decompress != NULL) + *(bt_decompress) = bt_decompress_callback_; + return 0; +} + +int Db::set_bt_compress( + int (*bt_compress) + (Db *, const Dbt *, const Dbt *, const Dbt *, const Dbt *, Dbt *), + int (*bt_decompress)(Db *, const Dbt *, const Dbt *, Dbt *, Dbt *, Dbt *)) +{ + DB *cthis = unwrap(this); + + bt_compress_callback_ = bt_compress; + bt_decompress_callback_ = bt_decompress; + return ((*(cthis->set_bt_compress))(cthis, + (bt_compress ? _db_bt_compress_intercept_c : NULL), + (bt_decompress ? _db_bt_decompress_intercept_c : NULL))); + +} + +DB_CALLBACK_C_INTERCEPT(bt_prefix, + size_t, (DB *cthis, const DBT *data1, const DBT *data2), + return, + (cxxthis, Dbt::get_const_Dbt(data1), Dbt::get_const_Dbt(data2))) + +DB_GET_CALLBACK(get_bt_prefix, bt_prefix, + (size_t (**argp)(Db *cxxthis, const Dbt *data1, const Dbt *data2)), argp) + +DB_SET_CALLBACK(set_bt_prefix, bt_prefix, + (size_t (*arg)(Db *cxxthis, const Dbt *data1, const Dbt *data2)), arg) + +DB_CALLBACK_C_INTERCEPT(dup_compare, + int, (DB *cthis, const DBT *data1, const DBT *data2), + return, + (cxxthis, Dbt::get_const_Dbt(data1), Dbt::get_const_Dbt(data2))) + +DB_GET_CALLBACK(get_dup_compare, dup_compare, + (int (**argp)(Db *cxxthis, const Dbt *data1, const Dbt *data2)), argp) + +DB_SET_CALLBACK(set_dup_compare, dup_compare, + (int (*arg)(Db *cxxthis, const Dbt *data1, const Dbt *data2)), arg) + +DB_CALLBACK_C_INTERCEPT(h_compare, + int, (DB *cthis, const DBT *data1, const DBT *data2), + return, + (cxxthis, Dbt::get_const_Dbt(data1), Dbt::get_const_Dbt(data2))) + +DB_GET_CALLBACK(get_h_compare, h_compare, + (int (**argp)(Db *cxxthis, const Dbt *data1, const Dbt *data2)), argp) + +DB_SET_CALLBACK(set_h_compare, h_compare, + (int (*arg)(Db *cxxthis, const Dbt *data1, const Dbt *data2)), arg) + +DB_CALLBACK_C_INTERCEPT(h_hash, + u_int32_t, (DB *cthis, const void *data, u_int32_t len), + return, (cxxthis, data, len)) + +DB_GET_CALLBACK(get_h_hash, h_hash, + (u_int32_t (**argp)(Db *cxxthis, const void *data, u_int32_t len)), argp) + +DB_SET_CALLBACK(set_h_hash, h_hash, + (u_int32_t (*arg)(Db *cxxthis, const void *data, u_int32_t len)), arg) + +// This is a 'glue' function declared as extern "C" so it will +// be compatible with picky compilers that do not allow mixing +// of function pointers to 'C' functions with function pointers +// to C++ functions. +// +extern "C" +int _verify_callback_c(void *handle, const void *str_arg) +{ + char *str; + __DB_STD(ostream) *out; + + str = (char *)str_arg; + out = (__DB_STD(ostream) *)handle; + + (*out) << str; + if (out->fail()) + return (EIO); + + return (0); +} + +int Db::verify(const char *name, const char *subdb, + __DB_STD(ostream) *ostr, u_int32_t flags) +{ + DB *db = unwrap(this); + int ret; + + if (!db) + ret = EINVAL; + else { + ret = __db_verify_internal(db, name, subdb, ostr, + _verify_callback_c, flags); + + // After a DB->verify (no matter if success or failure), + // the underlying DB object must not be accessed. + // + cleanup(); + } + + if (!DB_RETOK_STD(ret)) + DB_ERROR(dbenv_, "Db::verify", ret, error_policy()); + + return (ret); +} + +DB_METHOD(set_bt_compare, (bt_compare_fcn_type func), + (db, func), DB_RETOK_STD) +DB_METHOD(get_bt_minkey, (u_int32_t *bt_minkeyp), + (db, bt_minkeyp), DB_RETOK_STD) +DB_METHOD(set_bt_minkey, (u_int32_t bt_minkey), + (db, bt_minkey), DB_RETOK_STD) +DB_METHOD(set_bt_prefix, (bt_prefix_fcn_type func), + (db, func), DB_RETOK_STD) +DB_METHOD(set_dup_compare, (dup_compare_fcn_type func), + (db, func), DB_RETOK_STD) +DB_METHOD(get_encrypt_flags, (u_int32_t *flagsp), + (db, flagsp), DB_RETOK_STD) +DB_METHOD(set_encrypt, (const char *passwd, u_int32_t flags), + (db, passwd, flags), DB_RETOK_STD) +DB_METHOD_VOID(get_errfile, (FILE **errfilep), (db, errfilep)) +DB_METHOD_VOID(set_errfile, (FILE *errfile), (db, errfile)) +DB_METHOD_VOID(get_errpfx, (const char **errpfx), (db, errpfx)) +DB_METHOD_VOID(set_errpfx, (const char *errpfx), (db, errpfx)) +DB_METHOD(get_flags, (u_int32_t *flagsp), (db, flagsp), + DB_RETOK_STD) +DB_METHOD(set_flags, (u_int32_t flags), (db, flags), + DB_RETOK_STD) +DB_METHOD(get_heapsize, (u_int32_t *gbytesp, u_int32_t *bytesp), + (db, gbytesp, bytesp), DB_RETOK_STD) +DB_METHOD(set_heapsize, (u_int32_t gbytes, u_int32_t bytes), + (db, gbytes, bytes, 0), DB_RETOK_STD) +DB_METHOD(set_h_compare, (h_compare_fcn_type func), + (db, func), DB_RETOK_STD) +DB_METHOD(get_h_ffactor, (u_int32_t *h_ffactorp), + (db, h_ffactorp), DB_RETOK_STD) +DB_METHOD(set_h_ffactor, (u_int32_t h_ffactor), + (db, h_ffactor), DB_RETOK_STD) +DB_METHOD(set_h_hash, (h_hash_fcn_type func), + (db, func), DB_RETOK_STD) +DB_METHOD(get_h_nelem, (u_int32_t *h_nelemp), + (db, h_nelemp), DB_RETOK_STD) +DB_METHOD(set_h_nelem, (u_int32_t h_nelem), + (db, h_nelem), DB_RETOK_STD) +DB_METHOD(get_lorder, (int *db_lorderp), (db, db_lorderp), + DB_RETOK_STD) +DB_METHOD(set_lorder, (int db_lorder), (db, db_lorder), + DB_RETOK_STD) +DB_METHOD_VOID(get_msgfile, (FILE **msgfilep), (db, msgfilep)) +DB_METHOD_VOID(set_msgfile, (FILE *msgfile), (db, msgfile)) +DB_METHOD_QUIET(get_multiple, (), (db)) +DB_METHOD(get_pagesize, (u_int32_t *db_pagesizep), + (db, db_pagesizep), DB_RETOK_STD) +DB_METHOD(set_pagesize, (u_int32_t db_pagesize), + (db, db_pagesize), DB_RETOK_STD) + +DB_CALLBACK_C_INTERCEPT(db_partition, + u_int32_t, (DB *cthis, DBT *key), + return, (cxxthis, Dbt::get_Dbt(key))) + +// set_partition and get_partition_callback do not fit into the macro +// templates, since there is an additional argument in the API calls. +int Db::set_partition(u_int32_t parts, Dbt *keys, + u_int32_t (*arg)(Db *cxxthis, Dbt *key)) +{ + DB *cthis = unwrap(this); + + db_partition_callback_ = arg; + return ((*(cthis->set_partition))(cthis, parts, keys, + arg ? _db_db_partition_intercept_c : NULL)); +} + +int Db::get_partition_callback(u_int32_t *parts, + u_int32_t (**argp)(Db *cxxthis, Dbt *key)) +{ + DB *cthis = unwrap(this); + if (argp != NULL) + *(argp) = db_partition_callback_; + if (parts != NULL) + (cthis->get_partition_callback)(cthis, parts, NULL); + return 0; +} + +DB_METHOD(set_partition_dirs, (const char **dirp), (db, dirp), DB_RETOK_STD) +DB_METHOD(get_partition_dirs, (const char ***dirpp), (db, dirpp), DB_RETOK_STD) +DB_METHOD(get_partition_keys, (u_int32_t *parts, Dbt **keys), + (db, parts, (DBT **)keys), DB_RETOK_STD) +DB_METHOD(get_priority, (DB_CACHE_PRIORITY *priorityp), + (db, priorityp), DB_RETOK_STD) +DB_METHOD(set_priority, (DB_CACHE_PRIORITY priority), + (db, priority), DB_RETOK_STD) +DB_METHOD(get_re_delim, (int *re_delimp), + (db, re_delimp), DB_RETOK_STD) +DB_METHOD(set_re_delim, (int re_delim), + (db, re_delim), DB_RETOK_STD) +DB_METHOD(get_re_len, (u_int32_t *re_lenp), + (db, re_lenp), DB_RETOK_STD) +DB_METHOD(set_re_len, (u_int32_t re_len), + (db, re_len), DB_RETOK_STD) +DB_METHOD(get_re_pad, (int *re_padp), + (db, re_padp), DB_RETOK_STD) +DB_METHOD(set_re_pad, (int re_pad), + (db, re_pad), DB_RETOK_STD) +DB_METHOD(get_re_source, (const char **re_source), + (db, re_source), DB_RETOK_STD) +DB_METHOD(set_re_source, (const char *re_source), + (db, re_source), DB_RETOK_STD) +DB_METHOD(sort_multiple, (Dbt *key, Dbt *data, u_int32_t flags), + (db, key, data, flags), DB_RETOK_STD) +DB_METHOD(get_q_extentsize, (u_int32_t *extentsizep), + (db, extentsizep), DB_RETOK_STD) +DB_METHOD(set_q_extentsize, (u_int32_t extentsize), + (db, extentsize), DB_RETOK_STD) + +DB_METHOD_QUIET(get_alloc, (db_malloc_fcn_type *malloc_fcnp, + db_realloc_fcn_type *realloc_fcnp, db_free_fcn_type *free_fcnp), + (db, malloc_fcnp, realloc_fcnp, free_fcnp)) + +DB_METHOD_QUIET(set_alloc, (db_malloc_fcn_type malloc_fcn, + db_realloc_fcn_type realloc_fcn, db_free_fcn_type free_fcn), + (db, malloc_fcn, realloc_fcn, free_fcn)) + +void Db::get_errcall(void (**argp)(const DbEnv *, const char *, const char *)) +{ + dbenv_->get_errcall(argp); +} + +void Db::set_errcall(void (*arg)(const DbEnv *, const char *, const char *)) +{ + dbenv_->set_errcall(arg); +} + +void Db::get_msgcall(void (**argp)(const DbEnv *, const char *)) +{ + dbenv_->get_msgcall(argp); +} + +void Db::set_msgcall(void (*arg)(const DbEnv *, const char *)) +{ + dbenv_->set_msgcall(arg); +} + +void *Db::get_app_private() const +{ + return unwrapConst(this)->app_private; +} + +void Db::set_app_private(void *value) +{ + unwrap(this)->app_private = value; +} + +DB_METHOD(get_cachesize, (u_int32_t *gbytesp, u_int32_t *bytesp, int *ncachep), + (db, gbytesp, bytesp, ncachep), DB_RETOK_STD) +DB_METHOD(set_cachesize, (u_int32_t gbytes, u_int32_t bytes, int ncache), + (db, gbytes, bytes, ncache), DB_RETOK_STD) + +DB_METHOD(get_create_dir, (const char **dirp), (db, dirp), DB_RETOK_STD) +DB_METHOD(set_create_dir, (const char *dir), (db, dir), DB_RETOK_STD) + +int Db::set_paniccall(void (*callback)(DbEnv *, int)) +{ + return (dbenv_->set_paniccall(callback)); +} + +__DB_STD(ostream) *Db::get_error_stream() +{ + return dbenv_->get_error_stream(); +} + +void Db::set_error_stream(__DB_STD(ostream) *error_stream) +{ + dbenv_->set_error_stream(error_stream); +} + +__DB_STD(ostream) *Db::get_message_stream() +{ + return dbenv_->get_message_stream(); +} + +void Db::set_message_stream(__DB_STD(ostream) *message_stream) +{ + dbenv_->set_message_stream(message_stream); +} + +DB_METHOD_QUIET(get_transactional, (), (db)) + +int Db::alt_close(DB *pdb, u_int32_t flags) +{ + int ret; + + ((Db *)(pdb->api_internal))->imp_ = NULL; + ret = pdb->close(pdb, flags); + + return ret; +} diff --git a/lang/cxx/cxx_dbc.cpp b/lang/cxx/cxx_dbc.cpp new file mode 100644 index 00000000..ae36eea9 --- /dev/null +++ b/lang/cxx/cxx_dbc.cpp @@ -0,0 +1,123 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" +#include "dbinc/cxx_int.h" + +#include "dbinc/db_page.h" +#include "dbinc_auto/db_auto.h" +#include "dbinc_auto/crdel_auto.h" +#include "dbinc/db_dispatch.h" +#include "dbinc_auto/db_ext.h" +#include "dbinc_auto/common_ext.h" + +// Helper macro for simple methods that pass through to the +// underlying C method. It may return an error or raise an exception. +// Note this macro expects that input _argspec is an argument +// list element (e.g., "char *arg") and that _arglist is the arguments +// that should be passed through to the C method (e.g., "(db, arg)") +// +#define DBC_METHOD(_name, _argspec, _arglist, _retok) \ +int Dbc::_name _argspec \ +{ \ + int ret; \ + DBC *dbc = this; \ + \ + ret = dbc->_name _arglist; \ + if (!_retok(ret)) \ + DB_ERROR(DbEnv::get_DbEnv(dbc->dbenv), \ + "Dbc::" # _name, ret, ON_ERROR_UNKNOWN); \ + return (ret); \ +} + +// It's private, and should never be called, but VC4.0 needs it resolved +// +Dbc::~Dbc() +{ +} + +DBC_METHOD(close, (void), (dbc), DB_RETOK_STD) +DBC_METHOD(cmp, (Dbc *other_cursor, int *result, u_int32_t _flags), + (dbc, other_cursor, result, _flags), DB_RETOK_STD) +DBC_METHOD(count, (db_recno_t *countp, u_int32_t _flags), + (dbc, countp, _flags), DB_RETOK_STD) +DBC_METHOD(del, (u_int32_t _flags), + (dbc, _flags), DB_RETOK_DBCDEL) + +int Dbc::dup(Dbc** cursorp, u_int32_t _flags) +{ + int ret; + DBC *dbc = this; + DBC *new_cursor = 0; + + ret = dbc->dup(dbc, &new_cursor, _flags); + + if (DB_RETOK_STD(ret)) + // The following cast implies that Dbc can be no larger than DBC + *cursorp = (Dbc*)new_cursor; + else + DB_ERROR(DbEnv::get_DbEnv(dbc->dbenv), + "Dbc::dup", ret, ON_ERROR_UNKNOWN); + + return (ret); +} + +int Dbc::get(Dbt* key, Dbt *data, u_int32_t _flags) +{ + int ret; + DBC *dbc = this; + + ret = dbc->get(dbc, key, data, _flags); + + if (!DB_RETOK_DBCGET(ret)) { + if (ret == DB_BUFFER_SMALL && DB_OVERFLOWED_DBT(key)) + DB_ERROR_DBT(DbEnv::get_DbEnv(dbc->dbenv), + "Dbc::get", key, ON_ERROR_UNKNOWN); + else if (ret == DB_BUFFER_SMALL && DB_OVERFLOWED_DBT(data)) + DB_ERROR_DBT(DbEnv::get_DbEnv(dbc->dbenv), + "Dbc::get", data, ON_ERROR_UNKNOWN); + else + DB_ERROR(DbEnv::get_DbEnv(dbc->dbenv), + "Dbc::get", ret, ON_ERROR_UNKNOWN); + } + + return (ret); +} + +int Dbc::pget(Dbt* key, Dbt *pkey, Dbt *data, u_int32_t _flags) +{ + int ret; + DBC *dbc = this; + + ret = dbc->pget(dbc, key, pkey, data, _flags); + + /* Logic is the same as for Dbc::get - reusing macro. */ + if (!DB_RETOK_DBCGET(ret)) { + if (ret == DB_BUFFER_SMALL && DB_OVERFLOWED_DBT(key)) + DB_ERROR_DBT(DbEnv::get_DbEnv(dbc->dbenv), + "Dbc::pget", key, ON_ERROR_UNKNOWN); + else if (ret == DB_BUFFER_SMALL && DB_OVERFLOWED_DBT(data)) + DB_ERROR_DBT(DbEnv::get_DbEnv(dbc->dbenv), + "Dbc::pget", data, ON_ERROR_UNKNOWN); + else + DB_ERROR(DbEnv::get_DbEnv(dbc->dbenv), + "Dbc::pget", ret, ON_ERROR_UNKNOWN); + } + + return (ret); +} + +DBC_METHOD(put, (Dbt* key, Dbt *data, u_int32_t _flags), + (dbc, key, data, _flags), DB_RETOK_DBCPUT) +DBC_METHOD(get_priority, (DB_CACHE_PRIORITY *priorityp), + (dbc, priorityp), DB_RETOK_STD) +DBC_METHOD(set_priority, (DB_CACHE_PRIORITY pri), (dbc, pri), DB_RETOK_STD) diff --git a/lang/cxx/cxx_dbt.cpp b/lang/cxx/cxx_dbt.cpp new file mode 100644 index 00000000..82820234 --- /dev/null +++ b/lang/cxx/cxx_dbt.cpp @@ -0,0 +1,56 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" +#include "dbinc/cxx_int.h" + +#include "dbinc/db_page.h" +#include "dbinc_auto/db_auto.h" +#include "dbinc_auto/crdel_auto.h" +#include "dbinc/db_dispatch.h" +#include "dbinc_auto/db_ext.h" +#include "dbinc_auto/common_ext.h" + +Dbt::Dbt() +{ + DBT *dbt = this; + memset(dbt, 0, sizeof(DBT)); +} + +Dbt::Dbt(void *data_arg, u_int32_t size_arg) +{ + DBT *dbt = this; + memset(dbt, 0, sizeof(DBT)); + set_data(data_arg); + set_size(size_arg); +} + +Dbt::~Dbt() +{ +} + +Dbt::Dbt(const Dbt &that) +{ + const DBT *from = &that; + DBT *to = this; + memcpy(to, from, sizeof(DBT)); +} + +Dbt &Dbt::operator = (const Dbt &that) +{ + if (this != &that) { + const DBT *from = &that; + DBT *to = this; + memcpy(to, from, sizeof(DBT)); + } + return (*this); +} diff --git a/lang/cxx/cxx_env.cpp b/lang/cxx/cxx_env.cpp new file mode 100644 index 00000000..5f0d85d8 --- /dev/null +++ b/lang/cxx/cxx_env.cpp @@ -0,0 +1,1354 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" +#include "dbinc/cxx_int.h" + +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/log.h" +#include "dbinc_auto/common_ext.h" +#include "dbinc_auto/log_ext.h" + +#ifdef HAVE_CXX_STDHEADERS +using std::cerr; +#endif + +// Helper macros for simple methods that pass through to the +// underlying C method. They may return an error or raise an exception. +// These macros expect that input _argspec is an argument +// list element (e.g., "char *arg") and that _arglist is the arguments +// that should be passed through to the C method (e.g., "(dbenv, arg)") +// +#define DBENV_METHOD_ERR(_name, _argspec, _arglist, _on_err) \ +int DbEnv::_name _argspec \ +{ \ + DB_ENV *dbenv = unwrap(this); \ + int ret; \ + \ + if ((ret = dbenv->_name _arglist) != 0) { \ + _on_err; \ + } \ + return (ret); \ +} + +#define DBENV_METHOD(_name, _argspec, _arglist) \ + DBENV_METHOD_ERR(_name, _argspec, _arglist, \ + DB_ERROR(this, "DbEnv::" # _name, ret, error_policy())) + +#define DBENV_METHOD_QUIET(_name, _argspec, _arglist) \ +int DbEnv::_name _argspec \ +{ \ + DB_ENV *dbenv = unwrap(this); \ + \ + return (dbenv->_name _arglist); \ +} + +#define DBENV_METHOD_VOID(_name, _argspec, _arglist) \ +void DbEnv::_name _argspec \ +{ \ + DB_ENV *dbenv = unwrap(this); \ + \ + dbenv->_name _arglist; \ +} + +// The reason for a static variable is that some structures +// (like Dbts) have no connection to any Db or DbEnv, so when +// errors occur in their methods, we must have some reasonable +// way to determine whether to throw or return errors. +// +// This variable is taken from flags whenever a DbEnv is constructed. +// Normally there is only one DbEnv per program, and even if not, +// there is typically a single policy of throwing or returning. +// +static int last_known_error_policy = ON_ERROR_UNKNOWN; + +// These 'glue' function are declared as extern "C" so they will +// be compatible with picky compilers that do not allow mixing +// of function pointers to 'C' functions with function pointers +// to C++ functions. +// +extern "C" +void _feedback_intercept_c(DB_ENV *dbenv, int opcode, int pct) +{ + DbEnv::_feedback_intercept(dbenv, opcode, pct); +} + +extern "C" +void _paniccall_intercept_c(DB_ENV *dbenv, int errval) +{ + DbEnv::_paniccall_intercept(dbenv, errval); +} + +extern "C" +void _event_func_intercept_c(DB_ENV *dbenv, u_int32_t event, void *event_info) +{ + DbEnv::_event_func_intercept(dbenv, event, event_info); +} + +extern "C" +void _stream_error_function_c(const DB_ENV *dbenv, + const char *prefix, const char *message) +{ + DbEnv::_stream_error_function(dbenv, prefix, message); +} + +extern "C" +void _stream_message_function_c(const DB_ENV *dbenv, const char *message) +{ + DbEnv::_stream_message_function(dbenv, message); +} + +extern "C" +int _app_dispatch_intercept_c(DB_ENV *dbenv, DBT *dbt, DB_LSN *lsn, db_recops op) +{ + return (DbEnv::_app_dispatch_intercept(dbenv, dbt, lsn, op)); +} + +extern "C" +int _rep_send_intercept_c(DB_ENV *dbenv, const DBT *cntrl, const DBT *data, + const DB_LSN *lsn, int id, u_int32_t flags) +{ + return (DbEnv::_rep_send_intercept(dbenv, + cntrl, data, lsn, id, flags)); +} + +extern "C" +void _message_dispatch_intercept_c(DB_ENV *dbenv, DB_CHANNEL *channel, + DBT *request, u_int32_t nrequest, u_int32_t cb_flags) +{ + DbEnv::_message_dispatch_intercept(dbenv, channel, request, nrequest, + cb_flags); +} + +extern "C" +int _isalive_intercept_c( + DB_ENV *dbenv, pid_t pid, db_threadid_t thrid, u_int32_t flags) +{ + return (DbEnv::_isalive_intercept(dbenv, pid, thrid, flags)); +} + +extern "C" +void _thread_id_intercept_c(DB_ENV *dbenv, pid_t *pidp, db_threadid_t *thridp) +{ + DbEnv::_thread_id_intercept(dbenv, pidp, thridp); +} + +extern "C" +char *_thread_id_string_intercept_c(DB_ENV *dbenv, pid_t pid, + db_threadid_t thrid, char *buf) +{ + return (DbEnv::_thread_id_string_intercept(dbenv, pid, thrid, buf)); +} + +void DbEnv::_feedback_intercept(DB_ENV *dbenv, int opcode, int pct) +{ + DbEnv *cxxenv = DbEnv::get_DbEnv(dbenv); + if (cxxenv == 0) { + DB_ERROR(0, + "DbEnv::feedback_callback", EINVAL, ON_ERROR_UNKNOWN); + return; + } + if (cxxenv->feedback_callback_ == 0) { + DB_ERROR(DbEnv::get_DbEnv(dbenv), + "DbEnv::feedback_callback", EINVAL, cxxenv->error_policy()); + return; + } + (*cxxenv->feedback_callback_)(cxxenv, opcode, pct); +} + +void DbEnv::_paniccall_intercept(DB_ENV *dbenv, int errval) +{ + DbEnv *cxxenv = DbEnv::get_DbEnv(dbenv); + if (cxxenv == 0) { + DB_ERROR(0, + "DbEnv::paniccall_callback", EINVAL, ON_ERROR_UNKNOWN); + return; + } + if (cxxenv->paniccall_callback_ == 0) { + DB_ERROR(cxxenv, "DbEnv::paniccall_callback", EINVAL, + cxxenv->error_policy()); + return; + } + (*cxxenv->paniccall_callback_)(cxxenv, errval); +} + +void DbEnv::_event_func_intercept( + DB_ENV *dbenv, u_int32_t event, void *event_info) +{ + DbEnv *cxxenv = DbEnv::get_DbEnv(dbenv); + if (cxxenv == 0) { + DB_ERROR(0, + "DbEnv::event_func_callback", EINVAL, ON_ERROR_UNKNOWN); + return; + } + if (cxxenv->event_func_callback_ == 0) { + DB_ERROR(cxxenv, "DbEnv::event_func_callback", EINVAL, + cxxenv->error_policy()); + return; + } + (*cxxenv->event_func_callback_)(cxxenv, event, event_info); +} + +int DbEnv::_app_dispatch_intercept(DB_ENV *dbenv, DBT *dbt, DB_LSN *lsn, + db_recops op) +{ + DbEnv *cxxenv = DbEnv::get_DbEnv(dbenv); + if (cxxenv == 0) { + DB_ERROR(DbEnv::get_DbEnv(dbenv), + "DbEnv::app_dispatch_callback", EINVAL, ON_ERROR_UNKNOWN); + return (EINVAL); + } + if (cxxenv->app_dispatch_callback_ == 0) { + DB_ERROR(DbEnv::get_DbEnv(dbenv), + "DbEnv::app_dispatch_callback", EINVAL, + cxxenv->error_policy()); + return (EINVAL); + } + Dbt *cxxdbt = (Dbt *)dbt; + DbLsn *cxxlsn = (DbLsn *)lsn; + return ((*cxxenv->app_dispatch_callback_)(cxxenv, cxxdbt, cxxlsn, op)); +} + +int DbEnv::_isalive_intercept( + DB_ENV *dbenv, pid_t pid, db_threadid_t thrid, u_int32_t flags) +{ + DbEnv *cxxenv = DbEnv::get_DbEnv(dbenv); + if (cxxenv == 0) { + DB_ERROR(DbEnv::get_DbEnv(dbenv), + "DbEnv::isalive_callback", EINVAL, ON_ERROR_UNKNOWN); + return (0); + } + return ((*cxxenv->isalive_callback_)(cxxenv, pid, thrid, flags)); +} + +void DbEnv::_message_dispatch_intercept(DB_ENV *dbenv, DB_CHANNEL *dbchannel, + DBT *request, u_int32_t nrequest, u_int32_t cb_flags) +{ + DbEnv *cxxenv = DbEnv::get_DbEnv(dbenv); + if (cxxenv == 0) + DB_ERROR(DbEnv::get_DbEnv(dbenv), + "DbEnv::message_dispatch_callback", EINVAL, + ON_ERROR_UNKNOWN); + else { + DbChannel *cxxchannel = (DbChannel *)dbchannel; + Dbt *cxxdbt = (Dbt *)request; + ((*cxxenv->message_dispatch_callback_)(cxxenv, cxxchannel, + cxxdbt, nrequest, cb_flags)); + } +} + +int DbEnv::_rep_send_intercept(DB_ENV *dbenv, const DBT *cntrl, const DBT *data, + const DB_LSN *lsn, int id, u_int32_t flags) +{ + DbEnv *cxxenv = DbEnv::get_DbEnv(dbenv); + if (cxxenv == 0) { + DB_ERROR(DbEnv::get_DbEnv(dbenv), + "DbEnv::rep_send_callback", EINVAL, ON_ERROR_UNKNOWN); + return (EINVAL); + } + const Dbt *cxxcntrl = (const Dbt *)cntrl; + const DbLsn *cxxlsn = (const DbLsn *)lsn; + Dbt *cxxdata = (Dbt *)data; + return ((*cxxenv->rep_send_callback_)(cxxenv, + cxxcntrl, cxxdata, cxxlsn, id, flags)); +} + +void DbEnv::_thread_id_intercept(DB_ENV *dbenv, + pid_t *pidp, db_threadid_t *thridp) +{ + DbEnv *cxxenv = DbEnv::get_DbEnv(dbenv); + if (cxxenv == 0) { + DB_ERROR(DbEnv::get_DbEnv(dbenv), + "DbEnv::thread_id_callback", EINVAL, ON_ERROR_UNKNOWN); + } else + cxxenv->thread_id_callback_(cxxenv, pidp, thridp); +} + +char *DbEnv::_thread_id_string_intercept(DB_ENV *dbenv, + pid_t pid, db_threadid_t thrid, char *buf) +{ + DbEnv *cxxenv = DbEnv::get_DbEnv(dbenv); + if (cxxenv == 0) { + DB_ERROR(DbEnv::get_DbEnv(dbenv), + "DbEnv::thread_id_string_callback", EINVAL, + ON_ERROR_UNKNOWN); + return (NULL); + } + return (cxxenv->thread_id_string_callback_(cxxenv, pid, thrid, buf)); +} + +// A truism for the DbEnv object is that there is a valid +// DB_ENV handle from the constructor until close(). +// After the close, the DB_ENV handle is invalid and +// no operations are permitted on the DbEnv (other than +// destructor). Leaving the DbEnv handle open and not +// doing a close is generally considered an error. +// +// We used to allow DbEnv objects to be closed and reopened. +// This implied always keeping a valid DB_ENV object, and +// coordinating the open objects between Db/DbEnv turned +// out to be overly complicated. Now we do not allow this. + +DbEnv::DbEnv(u_int32_t flags) +: imp_(0) +, construct_error_(0) +, construct_flags_(flags) +, error_stream_(0) +, message_stream_(0) +, app_dispatch_callback_(0) +, feedback_callback_(0) +, paniccall_callback_(0) +, event_func_callback_(0) +, rep_send_callback_(0) +, message_dispatch_callback_(0) +{ + if ((construct_error_ = initialize(0)) != 0) + DB_ERROR(this, "DbEnv::DbEnv", construct_error_, + error_policy()); +} + +DbEnv::DbEnv(DB_ENV *dbenv, u_int32_t flags) +: imp_(0) +, construct_error_(0) +, construct_flags_(flags) +, error_stream_(0) +, message_stream_(0) +, app_dispatch_callback_(0) +, feedback_callback_(0) +, paniccall_callback_(0) +, event_func_callback_(0) +, rep_send_callback_(0) +, message_dispatch_callback_(0) +{ + if ((construct_error_ = initialize(dbenv)) != 0) + DB_ERROR(this, "DbEnv::DbEnv", construct_error_, + error_policy()); +} + +// If the DB_ENV handle is still open, we close it. This is to make stack +// allocation of DbEnv objects easier so that they are cleaned up in the error +// path. Note that the C layer catches cases where handles are open in the +// environment at close time and reports an error. Applications should call +// close explicitly in normal (non-exceptional) cases to check the return +// value. +// +DbEnv::~DbEnv() +{ + DB_ENV *dbenv = unwrap(this); + + /* + * Specify DB_FORCESYNC to make sure databases are sync'ed to disk. + * Users can call DbEnv::close with 0 as real parameter to close all + * but the last environment object/handle. Doing so can avoid + * unnecessary database syncs. The last environment object/handle + * must be closed with DB_FORCESYNC parameter, or be closed via this + * function. + */ + if (dbenv != NULL) { + (void)dbenv->close(dbenv, DB_FORCESYNC); + cleanup(); + } +} + +// called by destructors before the DB_ENV is destroyed. +void DbEnv::cleanup() +{ + imp_ = 0; +} + +int DbEnv::close(u_int32_t flags) +{ + int ret; + DB_ENV *dbenv = unwrap(this); + + ret = dbenv->close(dbenv, flags); + + // after a close (no matter if success or failure), + // the underlying DB_ENV object must not be accessed. + cleanup(); + + // It's safe to throw an error after the close, + // since our error mechanism does not peer into + // the DB* structures. + // + if (ret != 0) + DB_ERROR(this, "DbEnv::close", ret, error_policy()); + + return (ret); +} + +DBENV_METHOD(dbremove, + (DbTxn *txn, const char *name, const char *subdb, u_int32_t flags), + (dbenv, unwrap(txn), name, subdb, flags)) +DBENV_METHOD(dbrename, (DbTxn *txn, const char *name, const char *subdb, + const char *newname, u_int32_t flags), + (dbenv, unwrap(txn), name, subdb, newname, flags)) + +void DbEnv::err(int error, const char *format, ...) +{ + DB_ENV *dbenv = unwrap(this); + + DB_REAL_ERR(dbenv, error, DB_ERROR_SET, 1, format); +} + +// Return a tristate value corresponding to whether we should +// throw exceptions on errors: +// ON_ERROR_RETURN +// ON_ERROR_THROW +// ON_ERROR_UNKNOWN +// +int DbEnv::error_policy() +{ + if ((construct_flags_ & DB_CXX_NO_EXCEPTIONS) != 0) { + return (ON_ERROR_RETURN); + } + else { + return (ON_ERROR_THROW); + } +} + +void DbEnv::errx(const char *format, ...) +{ + DB_ENV *dbenv = unwrap(this); + + DB_REAL_ERR(dbenv, 0, DB_ERROR_NOT_SET, 1, format); +} + +void *DbEnv::get_app_private() const +{ + return unwrapConst(this)->app_private; +} + +DBENV_METHOD(failchk, (u_int32_t flags), (dbenv, flags)) +DBENV_METHOD(fileid_reset, (const char *file, u_int32_t flags), + (dbenv, file, flags)) +DBENV_METHOD(get_home, (const char **homep), (dbenv, homep)) +DBENV_METHOD(get_open_flags, (u_int32_t *flagsp), (dbenv, flagsp)) +DBENV_METHOD(get_data_dirs, (const char ***dirspp), (dbenv, dirspp)) + +bool DbEnv::is_bigendian() +{ + return unwrap(this)->is_bigendian() ? true : false; +} + +DBENV_METHOD(get_thread_count, (u_int32_t *count), (dbenv, count)) +DBENV_METHOD(set_thread_count, (u_int32_t count), (dbenv, count)) + +// used internally during constructor +// to associate an existing DB_ENV with this DbEnv, +// or create a new one. +// +int DbEnv::initialize(DB_ENV *dbenv) +{ + int ret; + + last_known_error_policy = error_policy(); + + if (dbenv == 0) { + // Create a new DB_ENV environment. + if ((ret = ::db_env_create(&dbenv, + construct_flags_ & ~DB_CXX_NO_EXCEPTIONS)) != 0) + return (ret); + } + imp_ = dbenv; + dbenv->api1_internal = this; // for DB_ENV* to DbEnv* conversion + return (0); +} + +// lock methods +DBENV_METHOD(lock_detect, (u_int32_t flags, u_int32_t atype, int *aborted), + (dbenv, flags, atype, aborted)) +DBENV_METHOD_ERR(lock_get, + (u_int32_t locker, u_int32_t flags, Dbt *obj, + db_lockmode_t lock_mode, DbLock *lock), + (dbenv, locker, flags, obj, lock_mode, &lock->lock_), + DbEnv::runtime_error_lock_get(this, "DbEnv::lock_get", ret, + DB_LOCK_GET, lock_mode, obj, *lock, + -1, error_policy())) +DBENV_METHOD(lock_id, (u_int32_t *idp), (dbenv, idp)) +DBENV_METHOD(lock_id_free, (u_int32_t id), (dbenv, id)) +DBENV_METHOD(lock_put, (DbLock *lock), (dbenv, &lock->lock_)) +DBENV_METHOD(lock_stat, (DB_LOCK_STAT **statp, u_int32_t flags), + (dbenv, statp, flags)) +DBENV_METHOD(lock_stat_print, (u_int32_t flags), (dbenv, flags)) +DBENV_METHOD_ERR(lock_vec, + (u_int32_t locker, u_int32_t flags, DB_LOCKREQ list[], + int nlist, DB_LOCKREQ **elist_returned), + (dbenv, locker, flags, list, nlist, elist_returned), + DbEnv::runtime_error_lock_get(this, "DbEnv::lock_vec", ret, + (*elist_returned)->op, (*elist_returned)->mode, + Dbt::get_Dbt((*elist_returned)->obj), DbLock((*elist_returned)->lock), + (int)((*elist_returned) - list), error_policy())) +// log methods +DBENV_METHOD(log_archive, (char **list[], u_int32_t flags), + (dbenv, list, flags)) + +int DbEnv::log_compare(const DbLsn *lsn0, const DbLsn *lsn1) +{ + return (::log_compare(lsn0, lsn1)); +} + +// The following cast implies that DbLogc can be no larger than DB_LOGC +DBENV_METHOD(log_cursor, (DbLogc **cursorp, u_int32_t flags), + (dbenv, (DB_LOGC **)cursorp, flags)) +DBENV_METHOD(log_file, (DbLsn *lsn, char *namep, size_t len), + (dbenv, lsn, namep, len)) +DBENV_METHOD(log_flush, (const DbLsn *lsn), (dbenv, lsn)) +DBENV_METHOD(log_get_config, (u_int32_t which, int *onoffp), + (dbenv, which, onoffp)) +DBENV_METHOD(log_put, (DbLsn *lsn, const Dbt *data, u_int32_t flags), + (dbenv, lsn, data, flags)) + +int DbEnv::log_printf(DbTxn *txn, const char *fmt, ...) +{ + DB_ENV *dbenv = unwrap(this); + va_list ap; + int ret; + + va_start(ap, fmt); + ret = __log_printf_pp(dbenv, unwrap(txn), fmt, ap); + va_end(ap); + + return (ret); +} + +DBENV_METHOD(log_set_config, (u_int32_t which, int onoff), + (dbenv, which, onoff)) +DBENV_METHOD(log_stat, (DB_LOG_STAT **spp, u_int32_t flags), + (dbenv, spp, flags)) +DBENV_METHOD(log_stat_print, (u_int32_t flags), (dbenv, flags)) + +int DbEnv::log_verify(DB_LOG_VERIFY_CONFIG *config) +{ + DB_ENV *dbenv = unwrap(this); + + // Simply return the error, don't throw exceptions. + return dbenv->log_verify(dbenv, config); +} + +DBENV_METHOD(lsn_reset, (const char *file, u_int32_t flags), + (dbenv, file, flags)) + +int DbEnv::memp_fcreate(DbMpoolFile **dbmfp, u_int32_t flags) +{ + DB_ENV *dbenv = unwrap(this); + int ret; + DB_MPOOLFILE *mpf; + + if (dbenv == NULL) + ret = EINVAL; + else + ret = dbenv->memp_fcreate(dbenv, &mpf, flags); + + if (DB_RETOK_STD(ret)) { + *dbmfp = new DbMpoolFile(); + (*dbmfp)->imp_ = mpf; + } else + DB_ERROR(this, "DbMpoolFile::f_create", ret, ON_ERROR_UNKNOWN); + + return (ret); +} + +DBENV_METHOD(memp_register, + (int ftype, pgin_fcn_type pgin_fcn, pgout_fcn_type pgout_fcn), + (dbenv, ftype, pgin_fcn, pgout_fcn)) + +// memory pool methods +DBENV_METHOD(memp_stat, + (DB_MPOOL_STAT **gsp, DB_MPOOL_FSTAT ***fsp, u_int32_t flags), + (dbenv, gsp, fsp, flags)) +DBENV_METHOD(memp_stat_print, (u_int32_t flags), (dbenv, flags)) +DBENV_METHOD(memp_sync, (DbLsn *sn), (dbenv, sn)) +DBENV_METHOD(memp_trickle, (int pct, int *nwrotep), (dbenv, pct, nwrotep)) + +// If an error occurred during the constructor, report it now. +// Otherwise, call the underlying DB->open method. +// +int DbEnv::open(const char *db_home, u_int32_t flags, int mode) +{ + int ret; + DB_ENV *dbenv = unwrap(this); + + if (construct_error_ != 0) + ret = construct_error_; + else + ret = dbenv->open(dbenv, db_home, flags, mode); + + if (!DB_RETOK_STD(ret)) + DB_ERROR(this, "DbEnv::open", ret, error_policy()); + + return (ret); +} + +int DbEnv::remove(const char *db_home, u_int32_t flags) +{ + int ret; + DB_ENV *dbenv = unwrap(this); + + ret = dbenv->remove(dbenv, db_home, flags); + + // after a remove (no matter if success or failure), + // the underlying DB_ENV object must not be accessed, + // so we clean up in advance. + // + cleanup(); + + if (ret != 0) + DB_ERROR(this, "DbEnv::remove", ret, error_policy()); + + return (ret); +} + +// Report an error associated with the DbEnv. +// error_policy is one of: +// ON_ERROR_THROW throw an error +// ON_ERROR_RETURN do nothing here, the caller will return an error +// ON_ERROR_UNKNOWN defer the policy to policy saved in DbEnv::DbEnv +// +void DbEnv::runtime_error(DbEnv *dbenv, + const char *caller, int error, int error_policy) +{ + if (error_policy == ON_ERROR_UNKNOWN) + error_policy = last_known_error_policy; + if (error_policy == ON_ERROR_THROW) { + // Creating and throwing the object in two separate + // statements seems to be necessary for HP compilers. + switch (error) { + case DB_LOCK_DEADLOCK: + { + DbDeadlockException dl_except(caller); + dl_except.set_env(dbenv); + throw dl_except; + } + case DB_LOCK_NOTGRANTED: + { + DbLockNotGrantedException lng_except(caller); + lng_except.set_env(dbenv); + throw lng_except; + } + case DB_REP_HANDLE_DEAD: + { + DbRepHandleDeadException hd_except(caller); + hd_except.set_env(dbenv); + throw hd_except; + } + case DB_RUNRECOVERY: + { + DbRunRecoveryException rr_except(caller); + rr_except.set_env(dbenv); + throw rr_except; + } + default: + { + DbException except(caller, error); + except.set_env(dbenv); + throw except; + } + } + } +} + +// Like DbEnv::runtime_error, but issue a DbMemoryException +// based on the fact that this Dbt is not large enough. +void DbEnv::runtime_error_dbt(DbEnv *dbenv, + const char *caller, Dbt *dbt, int error_policy) +{ + if (error_policy == ON_ERROR_UNKNOWN) + error_policy = last_known_error_policy; + if (error_policy == ON_ERROR_THROW) { + // Creating and throwing the object in two separate + // statements seems to be necessary for HP compilers. + DbMemoryException except(caller, dbt); + except.set_env(dbenv); + throw except; + } +} + +// Like DbEnv::runtime_error, but issue a DbLockNotGrantedException, +// or a regular runtime error. +// call regular runtime_error if it +void DbEnv::runtime_error_lock_get(DbEnv *dbenv, + const char *caller, int error, + db_lockop_t op, db_lockmode_t mode, Dbt *obj, + DbLock lock, int index, int error_policy) +{ + if (error != DB_LOCK_NOTGRANTED) { + runtime_error(dbenv, caller, error, error_policy); + return; + } + + if (error_policy == ON_ERROR_UNKNOWN) + error_policy = last_known_error_policy; + if (error_policy == ON_ERROR_THROW) { + // Creating and throwing the object in two separate + // statements seems to be necessary for HP compilers. + DbLockNotGrantedException except(caller, op, mode, + obj, lock, index); + except.set_env(dbenv); + throw except; + } +} + +void DbEnv::_stream_error_function( + const DB_ENV *dbenv, const char *prefix, const char *message) +{ + const DbEnv *cxxenv = DbEnv::get_const_DbEnv(dbenv); + if (cxxenv == 0) { + DB_ERROR(0, + "DbEnv::stream_error", EINVAL, ON_ERROR_UNKNOWN); + return; + } + + if (cxxenv->error_callback_) + cxxenv->error_callback_(cxxenv, prefix, message); + else if (cxxenv->error_stream_) { + // HP compilers need the extra casts, we don't know why. + if (prefix) { + (*cxxenv->error_stream_) << prefix; + (*cxxenv->error_stream_) << (const char *)": "; + } + if (message) + (*cxxenv->error_stream_) << (const char *)message; + (*cxxenv->error_stream_) << (const char *)"\n"; + } +} + +void DbEnv::_stream_message_function(const DB_ENV *dbenv, const char *message) +{ + const DbEnv *cxxenv = DbEnv::get_const_DbEnv(dbenv); + if (cxxenv == 0) { + DB_ERROR(0, + "DbEnv::stream_message", EINVAL, ON_ERROR_UNKNOWN); + return; + } + + if (cxxenv->message_callback_) + cxxenv->message_callback_(cxxenv, message); + else if (cxxenv->message_stream_) { + // HP compilers need the extra casts, we don't know why. + (*cxxenv->message_stream_) << (const char *)message; + (*cxxenv->message_stream_) << (const char *)"\n"; + } +} + +// static method +char *DbEnv::strerror(int error) +{ + return (db_strerror(error)); +} + +// We keep these alphabetical by field name, +// for comparison with Java's list. +// +DBENV_METHOD(set_data_dir, (const char *dir), (dbenv, dir)) +DBENV_METHOD(get_encrypt_flags, (u_int32_t *flagsp), + (dbenv, flagsp)) +DBENV_METHOD(set_encrypt, (const char *passwd, u_int32_t flags), + (dbenv, passwd, flags)) +DBENV_METHOD_VOID(get_errfile, (FILE **errfilep), (dbenv, errfilep)) +DBENV_METHOD_VOID(set_errfile, (FILE *errfile), (dbenv, errfile)) +DBENV_METHOD_VOID(get_errpfx, (const char **errpfxp), (dbenv, errpfxp)) +DBENV_METHOD_VOID(set_errpfx, (const char *errpfx), (dbenv, errpfx)) +DBENV_METHOD(get_intermediate_dir_mode, (const char **modep), (dbenv, modep)) +DBENV_METHOD(set_intermediate_dir_mode, (const char *mode), (dbenv, mode)) +DBENV_METHOD(get_lg_bsize, (u_int32_t *bsizep), (dbenv, bsizep)) +DBENV_METHOD(set_lg_bsize, (u_int32_t bsize), (dbenv, bsize)) +DBENV_METHOD(get_lg_dir, (const char **dirp), (dbenv, dirp)) +DBENV_METHOD(set_lg_dir, (const char *dir), (dbenv, dir)) +DBENV_METHOD(get_lg_filemode, (int *modep), (dbenv, modep)) +DBENV_METHOD(set_lg_filemode, (int mode), (dbenv, mode)) +DBENV_METHOD(get_lg_max, (u_int32_t *maxp), (dbenv, maxp)) +DBENV_METHOD(set_lg_max, (u_int32_t max), (dbenv, max)) +DBENV_METHOD(get_lg_regionmax, (u_int32_t *regionmaxp), (dbenv, regionmaxp)) +DBENV_METHOD(set_lg_regionmax, (u_int32_t regionmax), (dbenv, regionmax)) +DBENV_METHOD(get_lk_conflicts, (const u_int8_t **lk_conflictsp, int *lk_maxp), + (dbenv, lk_conflictsp, lk_maxp)) +DBENV_METHOD(set_lk_conflicts, (u_int8_t *lk_conflicts, int lk_max), + (dbenv, lk_conflicts, lk_max)) +DBENV_METHOD(get_lk_detect, (u_int32_t *detectp), (dbenv, detectp)) +DBENV_METHOD(set_lk_detect, (u_int32_t detect), (dbenv, detect)) +DBENV_METHOD(get_lk_max_lockers, (u_int32_t *max_lockersp), + (dbenv, max_lockersp)) +DBENV_METHOD(set_lk_max_lockers, (u_int32_t max_lockers), (dbenv, max_lockers)) +DBENV_METHOD(get_lk_max_locks, (u_int32_t *max_locksp), (dbenv, max_locksp)) +DBENV_METHOD(set_lk_max_locks, (u_int32_t max_locks), (dbenv, max_locks)) +DBENV_METHOD(get_lk_max_objects, (u_int32_t *max_objectsp), + (dbenv, max_objectsp)) +DBENV_METHOD(set_lk_max_objects, (u_int32_t max_objects), (dbenv, max_objects)) +DBENV_METHOD(get_lk_partitions, (u_int32_t *partitionsp), (dbenv, partitionsp)) +DBENV_METHOD(set_lk_partitions, (u_int32_t partitions), (dbenv, partitions)) +DBENV_METHOD(get_lk_priority, (u_int32_t lockerid, u_int32_t *priorityp), (dbenv, lockerid, priorityp)) +DBENV_METHOD(set_lk_priority, (u_int32_t lockerid, u_int32_t priority), (dbenv, lockerid, priority)) +DBENV_METHOD(get_lk_tablesize, (u_int32_t *tablesize), (dbenv, tablesize)) +DBENV_METHOD(set_lk_tablesize, (u_int32_t tablesize), (dbenv, tablesize)) +DBENV_METHOD(get_memory_init, (DB_MEM_CONFIG type, u_int32_t *count), (dbenv, type, count)) +DBENV_METHOD(set_memory_init, (DB_MEM_CONFIG type, u_int32_t count), (dbenv, type, count)) +DBENV_METHOD(get_memory_max, (u_int32_t *gbytes, u_int32_t *bytes), (dbenv, gbytes, bytes)) +DBENV_METHOD(set_memory_max, (u_int32_t gbytes, u_int32_t bytes), (dbenv, gbytes, bytes)) +DBENV_METHOD(get_mp_max_openfd, (int *maxopenfdp), (dbenv, maxopenfdp)) +DBENV_METHOD(set_mp_max_openfd, (int maxopenfd), (dbenv, maxopenfd)) +DBENV_METHOD(get_mp_max_write, (int *maxwritep, db_timeout_t *maxwrite_sleepp), + (dbenv, maxwritep, maxwrite_sleepp)) +DBENV_METHOD(set_mp_max_write, (int maxwrite, db_timeout_t maxwrite_sleep), + (dbenv, maxwrite, maxwrite_sleep)) +DBENV_METHOD(get_mp_mmapsize, (size_t *mmapsizep), (dbenv, mmapsizep)) +DBENV_METHOD(set_mp_mmapsize, (size_t mmapsize), (dbenv, mmapsize)) +DBENV_METHOD(get_mp_pagesize, (u_int32_t *pagesizep), (dbenv, pagesizep)) +DBENV_METHOD(set_mp_pagesize, (u_int32_t pagesize), (dbenv, pagesize)) +DBENV_METHOD(get_mp_tablesize, (u_int32_t *tablesizep), (dbenv, tablesizep)) +DBENV_METHOD(set_mp_tablesize, (u_int32_t tablesize), (dbenv, tablesize)) +DBENV_METHOD_VOID(get_msgfile, (FILE **msgfilep), (dbenv, msgfilep)) +DBENV_METHOD_VOID(set_msgfile, (FILE *msgfile), (dbenv, msgfile)) +DBENV_METHOD(get_tmp_dir, (const char **tmp_dirp), (dbenv, tmp_dirp)) +DBENV_METHOD(set_tmp_dir, (const char *tmp_dir), (dbenv, tmp_dir)) +DBENV_METHOD(get_tx_max, (u_int32_t *tx_maxp), (dbenv, tx_maxp)) +DBENV_METHOD(set_tx_max, (u_int32_t tx_max), (dbenv, tx_max)) + +DBENV_METHOD(stat_print, (u_int32_t flags), (dbenv, flags)) + +DBENV_METHOD_QUIET(get_alloc, + (db_malloc_fcn_type *malloc_fcnp, db_realloc_fcn_type *realloc_fcnp, + db_free_fcn_type *free_fcnp), + (dbenv, malloc_fcnp, realloc_fcnp, free_fcnp)) + +DBENV_METHOD_QUIET(set_alloc, + (db_malloc_fcn_type malloc_fcn, db_realloc_fcn_type realloc_fcn, + db_free_fcn_type free_fcn), + (dbenv, malloc_fcn, realloc_fcn, free_fcn)) + +void DbEnv::set_app_private(void *value) +{ + unwrap(this)->app_private = value; +} + +DBENV_METHOD(get_cachesize, + (u_int32_t *gbytesp, u_int32_t *bytesp, int *ncachep), + (dbenv, gbytesp, bytesp, ncachep)) +DBENV_METHOD(set_cachesize, + (u_int32_t gbytes, u_int32_t bytes, int ncache), + (dbenv, gbytes, bytes, ncache)) +DBENV_METHOD(get_cache_max, (u_int32_t *gbytesp, u_int32_t *bytesp), + (dbenv, gbytesp, bytesp)) +DBENV_METHOD(set_cache_max, (u_int32_t gbytes, u_int32_t bytes), + (dbenv, gbytes, bytes)) +DBENV_METHOD(get_create_dir, (const char **dirp), (dbenv, dirp)) +DBENV_METHOD(set_create_dir, (const char *dir), (dbenv, dir)) + +void DbEnv::get_errcall(void (**argp)(const DbEnv *, const char *, const char *)) +{ + if (argp != NULL) + *argp = error_callback_; + return; +} + +void DbEnv::set_errcall(void (*arg)(const DbEnv *, const char *, const char *)) +{ + DB_ENV *dbenv = unwrap(this); + + error_callback_ = arg; + error_stream_ = 0; + + dbenv->set_errcall(dbenv, (arg == 0) ? 0 : + _stream_error_function_c); +} + +__DB_STD(ostream) *DbEnv::get_error_stream() +{ + return (error_stream_); +} + +void DbEnv::set_error_stream(__DB_STD(ostream) *stream) +{ + DB_ENV *dbenv = unwrap(this); + + error_stream_ = stream; + error_callback_ = 0; + + dbenv->set_errcall(dbenv, (stream == 0) ? 0 : + _stream_error_function_c); +} + +int DbEnv::get_feedback(void (**argp)(DbEnv *, int, int)) +{ + if (argp != NULL) + *argp = feedback_callback_; + return 0; +} + +int DbEnv::set_feedback(void (*arg)(DbEnv *, int, int)) +{ + DB_ENV *dbenv = unwrap(this); + + feedback_callback_ = arg; + + return (dbenv->set_feedback(dbenv, + arg == 0 ? 0 : _feedback_intercept_c)); +} + +DBENV_METHOD(get_flags, (u_int32_t *flagsp), (dbenv, flagsp)) +DBENV_METHOD(set_flags, (u_int32_t flags, int onoff), (dbenv, flags, onoff)) + +void DbEnv::get_msgcall(void (**argp)(const DbEnv *, const char *)) +{ + if (argp != NULL) + *argp = message_callback_; +} + +void DbEnv::set_msgcall(void (*arg)(const DbEnv *, const char *)) +{ + DB_ENV *dbenv = unwrap(this); + + message_callback_ = arg; + message_stream_ = 0; + + dbenv->set_msgcall(dbenv, (arg == 0) ? 0 : + _stream_message_function_c); +} + +__DB_STD(ostream) *DbEnv::get_message_stream() +{ + return (message_stream_); +} + +void DbEnv::set_message_stream(__DB_STD(ostream) *stream) +{ + DB_ENV *dbenv = unwrap(this); + + message_stream_ = stream; + message_callback_ = 0; + + dbenv->set_msgcall(dbenv, (stream == 0) ? 0 : + _stream_message_function_c); +} + +int DbEnv::set_paniccall(void (*arg)(DbEnv *, int)) +{ + DB_ENV *dbenv = unwrap(this); + + paniccall_callback_ = arg; + + return (dbenv->set_paniccall(dbenv, + arg == 0 ? 0 : _paniccall_intercept_c)); +} + +int DbEnv::set_event_notify(void (*arg)(DbEnv *, u_int32_t, void *)) +{ + DB_ENV *dbenv = unwrap(this); + + event_func_callback_ = arg; + + return (dbenv->set_event_notify(dbenv, + arg == 0 ? 0 : _event_func_intercept_c)); +} + +DBENV_METHOD(get_shm_key, (long *shm_keyp), (dbenv, shm_keyp)) +DBENV_METHOD(set_shm_key, (long shm_key), (dbenv, shm_key)) + +int DbEnv::get_app_dispatch + (int (**argp)(DbEnv *, Dbt *, DbLsn *, db_recops)) +{ + if (argp != NULL) + *argp = app_dispatch_callback_; + return 0; +} + +int DbEnv::set_app_dispatch + (int (*arg)(DbEnv *, Dbt *, DbLsn *, db_recops)) +{ + DB_ENV *dbenv = unwrap(this); + int ret; + + app_dispatch_callback_ = arg; + if ((ret = dbenv->set_app_dispatch(dbenv, + arg == 0 ? 0 : _app_dispatch_intercept_c)) != 0) + DB_ERROR(this, "DbEnv::set_app_dispatch", ret, error_policy()); + + return (ret); +} + +int DbEnv::get_isalive + (int (**argp)(DbEnv *, pid_t, db_threadid_t, u_int32_t)) +{ + if (argp != NULL) + *argp = isalive_callback_; + return 0; +} + +int DbEnv::set_isalive + (int (*arg)(DbEnv *, pid_t, db_threadid_t, u_int32_t)) +{ + DB_ENV *dbenv = unwrap(this); + int ret; + + isalive_callback_ = arg; + if ((ret = dbenv->set_isalive(dbenv, + arg == 0 ? 0 : _isalive_intercept_c)) != 0) + DB_ERROR(this, "DbEnv::set_isalive", ret, error_policy()); + + return (ret); +} + +DBENV_METHOD(get_tx_timestamp, (time_t *timestamp), (dbenv, timestamp)) +DBENV_METHOD(set_tx_timestamp, (time_t *timestamp), (dbenv, timestamp)) +DBENV_METHOD(get_verbose, (u_int32_t which, int *onoffp), + (dbenv, which, onoffp)) +DBENV_METHOD(set_verbose, (u_int32_t which, int onoff), (dbenv, which, onoff)) + +DBENV_METHOD(mutex_alloc, + (u_int32_t flags, db_mutex_t *mutexp), (dbenv, flags, mutexp)) +DBENV_METHOD(mutex_free, (db_mutex_t mutex), (dbenv, mutex)) +DBENV_METHOD(mutex_get_align, (u_int32_t *argp), (dbenv, argp)) +DBENV_METHOD(mutex_get_increment, (u_int32_t *argp), (dbenv, argp)) +DBENV_METHOD(mutex_get_init, (u_int32_t *argp), (dbenv, argp)) +DBENV_METHOD(mutex_get_max, (u_int32_t *argp), (dbenv, argp)) +DBENV_METHOD(mutex_get_tas_spins, (u_int32_t *argp), (dbenv, argp)) +DBENV_METHOD(mutex_lock, (db_mutex_t mutex), (dbenv, mutex)) +DBENV_METHOD(mutex_set_align, (u_int32_t arg), (dbenv, arg)) +DBENV_METHOD(mutex_set_increment, (u_int32_t arg), (dbenv, arg)) +DBENV_METHOD(mutex_set_init, (u_int32_t arg), (dbenv, arg)) +DBENV_METHOD(mutex_set_max, (u_int32_t arg), (dbenv, arg)) +DBENV_METHOD(mutex_set_tas_spins, (u_int32_t arg), (dbenv, arg)) +DBENV_METHOD(mutex_stat, + (DB_MUTEX_STAT **statp, u_int32_t flags), (dbenv, statp, flags)) +DBENV_METHOD(mutex_stat_print, (u_int32_t flags), (dbenv, flags)) +DBENV_METHOD(mutex_unlock, (db_mutex_t mutex), (dbenv, mutex)) + +int DbEnv::get_thread_id_fn(void (**argp)(DbEnv *, pid_t *, db_threadid_t *)) +{ + if (argp != NULL) + *argp = thread_id_callback_; + return 0; +} + +int DbEnv::set_thread_id(void (*arg)(DbEnv *, pid_t *, db_threadid_t *)) +{ + DB_ENV *dbenv = unwrap(this); + int ret; + + thread_id_callback_ = arg; + if ((ret = dbenv->set_thread_id(dbenv, + arg == 0 ? 0 : _thread_id_intercept_c)) != 0) + DB_ERROR(this, "DbEnv::set_thread_id", ret, error_policy()); + + return (ret); +} + +int DbEnv::get_thread_id_string_fn( + char *(**argp)(DbEnv *, pid_t, db_threadid_t, char *)) +{ + if (argp != NULL) + *argp = thread_id_string_callback_; + return 0; +} + +int DbEnv::set_thread_id_string( + char *(*arg)(DbEnv *, pid_t, db_threadid_t, char *)) +{ + DB_ENV *dbenv = unwrap(this); + int ret; + + thread_id_string_callback_ = arg; + if ((ret = dbenv->set_thread_id_string(dbenv, + arg == 0 ? 0 : _thread_id_string_intercept_c)) != 0) + DB_ERROR(this, "DbEnv::set_thread_id_string", ret, + error_policy()); + + return (ret); +} + +DBENV_METHOD(add_data_dir, (const char *dir), (dbenv, dir)) + +int DbEnv::cdsgroup_begin(DbTxn **tid) +{ + DB_ENV *dbenv = unwrap(this); + DB_TXN *txn; + int ret; + + ret = dbenv->cdsgroup_begin(dbenv, &txn); + if (DB_RETOK_STD(ret)) + *tid = new DbTxn(txn, NULL); + else + DB_ERROR(this, "DbEnv::cdsgroup_begin", ret, error_policy()); + + return (ret); +} + +int DbEnv::txn_begin(DbTxn *pid, DbTxn **tid, u_int32_t flags) +{ + DB_ENV *dbenv = unwrap(this); + DB_TXN *txn; + int ret; + + ret = dbenv->txn_begin(dbenv, unwrap(pid), &txn, flags); + if (DB_RETOK_STD(ret)) + *tid = new DbTxn(txn, pid); + else + DB_ERROR(this, "DbEnv::txn_begin", ret, error_policy()); + + return (ret); +} + +DBENV_METHOD(txn_checkpoint, (u_int32_t kbyte, u_int32_t min, u_int32_t flags), + (dbenv, kbyte, min, flags)) + +int DbEnv::txn_recover(DbPreplist *preplist, long count, + long *retp, u_int32_t flags) +{ + DB_ENV *dbenv = unwrap(this); + DB_PREPLIST *c_preplist; + long i; + int ret; + + /* + * We need to allocate some local storage for the + * returned preplist, and that requires us to do + * our own argument validation. + */ + if (count <= 0) + ret = EINVAL; + else + ret = __os_malloc(dbenv->env, sizeof(DB_PREPLIST) * count, + &c_preplist); + + if (ret != 0) { + DB_ERROR(this, "DbEnv::txn_recover", ret, error_policy()); + return (ret); + } + + if ((ret = + dbenv->txn_recover(dbenv, c_preplist, count, retp, flags)) != 0) { + __os_free(dbenv->env, c_preplist); + DB_ERROR(this, "DbEnv::txn_recover", ret, error_policy()); + return (ret); + } + + for (i = 0; i < *retp; i++) { + preplist[i].txn = new DbTxn(NULL); + preplist[i].txn->imp_ = c_preplist[i].txn; + memcpy(preplist[i].gid, c_preplist[i].gid, + sizeof(preplist[i].gid)); + } + + __os_free(dbenv->env, c_preplist); + + return (0); +} + +DBENV_METHOD(txn_stat, (DB_TXN_STAT **statp, u_int32_t flags), + (dbenv, statp, flags)) +DBENV_METHOD(txn_stat_print, (u_int32_t flags), (dbenv, flags)) + +int DbEnv::rep_set_transport(int myid, int (*arg)(DbEnv *, + const Dbt *, const Dbt *, const DbLsn *, int, u_int32_t)) +{ + DB_ENV *dbenv = unwrap(this); + int ret; + + rep_send_callback_ = arg; + if ((ret = dbenv->rep_set_transport(dbenv, myid, + arg == 0 ? 0 : _rep_send_intercept_c)) != 0) + DB_ERROR(this, "DbEnv::rep_set_transport", ret, error_policy()); + + return (ret); +} + +DBENV_METHOD(rep_elect, (u_int32_t nsites, u_int32_t nvotes, u_int32_t flags), + (dbenv, nsites, nvotes, flags)) +DBENV_METHOD(rep_flush, (), (dbenv)) +DBENV_METHOD(rep_get_config, (u_int32_t which, int *onoffp), + (dbenv, which, onoffp)) +DBENV_METHOD(rep_get_request, (u_int32_t *min, u_int32_t *max), + (dbenv, min, max)) +DBENV_METHOD(rep_set_request, (u_int32_t min, u_int32_t max), (dbenv, min, max)) + +int DbEnv::rep_process_message(Dbt *control, + Dbt *rec, int id, DbLsn *ret_lsnp) +{ + DB_ENV *dbenv = unwrap(this); + int ret; + + ret = dbenv->rep_process_message(dbenv, control, rec, id, ret_lsnp); + if (!DB_RETOK_REPPMSG(ret)) + DB_ERROR(this, "DbEnv::rep_process_message", ret, + error_policy()); + + return (ret); +} + +DBENV_METHOD(rep_set_config, + (u_int32_t which, int onoff), (dbenv, which, onoff)) +DBENV_METHOD(rep_start, + (Dbt *cookie, u_int32_t flags), + (dbenv, (DBT *)cookie, flags)) + +DBENV_METHOD(rep_stat, (DB_REP_STAT **statp, u_int32_t flags), + (dbenv, statp, flags)) +DBENV_METHOD(rep_stat_print, (u_int32_t flags), (dbenv, flags)) +DBENV_METHOD(rep_sync, (u_int32_t flags), (dbenv, flags)) + +DBENV_METHOD(rep_get_clockskew, (u_int32_t *fast_clockp, u_int32_t *slow_clockp), + (dbenv, fast_clockp, slow_clockp)) +DBENV_METHOD(rep_set_clockskew, (u_int32_t fast_clock, u_int32_t slow_clock), + (dbenv, fast_clock, slow_clock)) +DBENV_METHOD(rep_get_limit, (u_int32_t *gbytesp, u_int32_t *bytesp), + (dbenv, gbytesp, bytesp)) +DBENV_METHOD(rep_set_limit, (u_int32_t gbytes, u_int32_t bytes), + (dbenv, gbytes, bytes)) + +// +// Begin advanced replication API method implementations +DBENV_METHOD(rep_get_nsites, (u_int32_t *n), (dbenv, n)) +DBENV_METHOD(rep_set_nsites, (u_int32_t n), (dbenv, n)) +DBENV_METHOD(rep_get_priority, (u_int32_t *priority), + (dbenv, priority)) +DBENV_METHOD(rep_set_priority, (u_int32_t priority), + (dbenv, priority)) +DBENV_METHOD(rep_get_timeout, (int which, db_timeout_t * timeout), + (dbenv, which, timeout)) +DBENV_METHOD(rep_set_timeout, (int which, db_timeout_t timeout), + (dbenv, which, timeout)) +DBENV_METHOD(repmgr_get_ack_policy, (int *policy), (dbenv, policy)) +DBENV_METHOD(repmgr_set_ack_policy, (int policy), (dbenv, policy)) + +int DbEnv::repmgr_channel(int eid, DbChannel **dbchannel, u_int32_t flags) +{ + DB_ENV *dbenv = unwrap(this); + DB_CHANNEL *channel; + int ret; + + ret = dbenv->repmgr_channel(dbenv, eid, &channel, flags); + if (DB_RETOK_STD(ret)) { + *dbchannel = new DbChannel(); + (*dbchannel)->imp_ = channel; + (*dbchannel)->dbenv_ = this; + } else + DB_ERROR(this, "DbEnv::repmgr_channel", ret, error_policy()); + + return (ret); +} + +int DbEnv::repmgr_msg_dispatch( + void (*arg)(DbEnv *, DbChannel *, Dbt *, u_int32_t, u_int32_t), + u_int32_t flags) +{ + DB_ENV *dbenv = unwrap(this); + int ret; + + message_dispatch_callback_ = arg; + if ((ret = dbenv->repmgr_msg_dispatch(dbenv, + arg == 0 ? 0 : _message_dispatch_intercept_c, flags)) != 0) + DB_ERROR(this, "DbEnv::repmgr_msg_dispatch", ret, error_policy()); + + return (ret); +} + +int DbEnv::repmgr_local_site(DbSite **dbsite) +{ + DB_ENV *dbenv = unwrap(this); + DB_SITE *site; + int ret; + + ret = dbenv->repmgr_local_site(dbenv, &site); + if (DB_RETOK_REPMGR_LOCALSITE(ret)) { + if (ret == 0) { + *dbsite = new DbSite(); + (*dbsite)->imp_ = site; + } + } else + DB_ERROR(this, "DbEnv::repmgr_local_site", ret, error_policy()); + + return (ret); +} + +int DbEnv::repmgr_site(const char *host, u_int port, DbSite **dbsite, + u_int32_t flags) +{ + DB_ENV *dbenv = unwrap(this); + DB_SITE *site; + int ret; + + ret = dbenv->repmgr_site(dbenv, host, port, &site, flags); + if (DB_RETOK_STD(ret)) { + *dbsite = new DbSite(); + (*dbsite)->imp_ = site; + } else + DB_ERROR(this, "DbEnv::repmgr_site", ret, error_policy()); + + return (ret); +} + +int DbEnv::repmgr_site_by_eid(int eid, DbSite **dbsite) +{ + DB_ENV *dbenv = unwrap(this); + DB_SITE *site; + int ret; + + ret = dbenv->repmgr_site_by_eid(dbenv, eid, &site); + if (DB_RETOK_STD(ret)) { + *dbsite = new DbSite(); + (*dbsite)->imp_ = site; + } else + DB_ERROR(this, "DbEnv::repmgr_site_by_eid", ret, + error_policy()); + + return (ret); +} + +DBENV_METHOD(repmgr_site_list, (u_int *countp, DB_REPMGR_SITE **listp), + (dbenv, countp, listp)) + +int DbEnv::repmgr_start(int nthreads, u_int32_t flags) +{ + DB_ENV *dbenv = unwrap(this); + int ret; + + ret = dbenv->repmgr_start(dbenv, nthreads, flags); + if (!DB_RETOK_REPMGR_START(ret)) + DB_ERROR(this, "DbEnv::repmgr_start", ret, + error_policy()); + + return (ret); +} + +DBENV_METHOD(repmgr_stat, (DB_REPMGR_STAT **statp, u_int32_t flags), + (dbenv, statp, flags)) +DBENV_METHOD(repmgr_stat_print, (u_int32_t flags), (dbenv, flags)) + +// End advanced replication API method implementations. + +DBENV_METHOD(get_timeout, + (db_timeout_t *timeoutp, u_int32_t flags), + (dbenv, timeoutp, flags)) +DBENV_METHOD(set_timeout, + (db_timeout_t timeout, u_int32_t flags), + (dbenv, timeout, flags)) + +// static method +char *DbEnv::version(int *major, int *minor, int *patch) +{ + return (db_version(major, minor, patch)); +} + +// static method +char *DbEnv::full_version(int *family, int *release, + int *major, int *minor, int *patch) +{ + return (db_full_version(family, release, major, minor, patch)); +} + +// static method +DbEnv *DbEnv::wrap_DB_ENV(DB_ENV *dbenv) +{ + DbEnv *wrapped_env = get_DbEnv(dbenv); + return (wrapped_env != NULL) ? wrapped_env : new DbEnv(dbenv, 0); +} diff --git a/lang/cxx/cxx_except.cpp b/lang/cxx/cxx_except.cpp new file mode 100644 index 00000000..63c8377c --- /dev/null +++ b/lang/cxx/cxx_except.cpp @@ -0,0 +1,356 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" +#include "dbinc/cxx_int.h" + +static const int MAX_DESCRIPTION_LENGTH = 1024; + +// Note: would not be needed if we can inherit from exception +// It does not appear to be possible to inherit from exception +// with the current Microsoft library (VC5.0). +// +static char *dupString(const char *s) +{ + char *r = new char[strlen(s)+1]; + strcpy(r, s); + return (r); +} + +//////////////////////////////////////////////////////////////////////// +// // +// DbException // +// // +//////////////////////////////////////////////////////////////////////// + +DbException::~DbException() throw() +{ + delete [] what_; +} + +DbException::DbException(int err) +: err_(err) +, dbenv_(0) +{ + describe(0, 0); +} + +DbException::DbException(const char *description) +: err_(0) +, dbenv_(0) +{ + describe(0, description); +} + +DbException::DbException(const char *description, int err) +: err_(err) +, dbenv_(0) +{ + describe(0, description); +} + +DbException::DbException(const char *prefix, const char *description, int err) +: err_(err) +, dbenv_(0) +{ + describe(prefix, description); +} + +DbException::DbException(const DbException &that) +: __DB_STD(exception)() +, what_(dupString(that.what_)) +, err_(that.err_) +, dbenv_(0) +{ +} + +DbException &DbException::operator = (const DbException &that) +{ + if (this != &that) { + err_ = that.err_; + delete [] what_; + what_ = dupString(that.what_); + } + return (*this); +} + +void DbException::describe(const char *prefix, const char *description) +{ + char *msgbuf, *p, *end; + + msgbuf = new char[MAX_DESCRIPTION_LENGTH]; + p = msgbuf; + end = msgbuf + MAX_DESCRIPTION_LENGTH - 1; + + if (prefix != NULL) { + strncpy(p, prefix, (p < end) ? end - p: 0); + p += strlen(prefix); + strncpy(p, ": ", (p < end) ? end - p: 0); + p += 2; + } + if (description != NULL) { + strncpy(p, description, (p < end) ? end - p: 0); + p += strlen(description); + if (err_ != 0) { + strncpy(p, ": ", (p < end) ? end - p: 0); + p += 2; + } + } + if (err_ != 0) { + strncpy(p, db_strerror(err_), (p < end) ? end - p: 0); + p += strlen(db_strerror(err_)); + } + + /* + * If the result was too long, the buffer will not be null-terminated, + * so we need to fix that here before duplicating it. + */ + if (p >= end) + *end = '\0'; + + what_ = dupString(msgbuf); + delete [] msgbuf; +} + +int DbException::get_errno() const +{ + return (err_); +} + +const char *DbException::what() const throw() +{ + return (what_); +} + +DbEnv *DbException::get_env() const +{ + return dbenv_; +} + +void DbException::set_env(DbEnv *dbenv) +{ + dbenv_= dbenv; +} + +//////////////////////////////////////////////////////////////////////// +// // +// DbMemoryException // +// // +//////////////////////////////////////////////////////////////////////// + +static const char *memory_err_desc = "Dbt not large enough for available data"; +DbMemoryException::~DbMemoryException() throw() +{ +} + +DbMemoryException::DbMemoryException(Dbt *dbt) +: DbException(memory_err_desc, DB_BUFFER_SMALL) +, dbt_(dbt) +{ +} + +DbMemoryException::DbMemoryException(const char *prefix, Dbt *dbt) +: DbException(prefix, memory_err_desc, DB_BUFFER_SMALL) +, dbt_(dbt) +{ +} + +DbMemoryException::DbMemoryException(const DbMemoryException &that) +: DbException(that) +, dbt_(that.dbt_) +{ +} + +DbMemoryException +&DbMemoryException::operator =(const DbMemoryException &that) +{ + if (this != &that) { + DbException::operator=(that); + dbt_ = that.dbt_; + } + return (*this); +} + +Dbt *DbMemoryException::get_dbt() const +{ + return (dbt_); +} + +//////////////////////////////////////////////////////////////////////// +// // +// DbDeadlockException // +// // +//////////////////////////////////////////////////////////////////////// + +DbDeadlockException::~DbDeadlockException() throw() +{ +} + +DbDeadlockException::DbDeadlockException(const char *description) +: DbException(description, DB_LOCK_DEADLOCK) +{ +} + +DbDeadlockException::DbDeadlockException(const DbDeadlockException &that) +: DbException(that) +{ +} + +DbDeadlockException +&DbDeadlockException::operator =(const DbDeadlockException &that) +{ + if (this != &that) + DbException::operator=(that); + return (*this); +} + +//////////////////////////////////////////////////////////////////////// +// // +// DbLockNotGrantedException // +// // +//////////////////////////////////////////////////////////////////////// + +DbLockNotGrantedException::~DbLockNotGrantedException() throw() +{ + delete lock_; +} + +DbLockNotGrantedException::DbLockNotGrantedException(const char *prefix, + db_lockop_t op, db_lockmode_t mode, const Dbt *obj, const DbLock lock, + int index) +: DbException(prefix, DbEnv::strerror(DB_LOCK_NOTGRANTED), + DB_LOCK_NOTGRANTED) +, op_(op) +, mode_(mode) +, obj_(obj) +, lock_(new DbLock(lock)) +, index_(index) +{ +} + +DbLockNotGrantedException::DbLockNotGrantedException(const char *description) +: DbException(description, DB_LOCK_NOTGRANTED) +, op_(DB_LOCK_GET) +, mode_(DB_LOCK_NG) +, obj_(NULL) +, lock_(NULL) +, index_(0) +{ +} + +DbLockNotGrantedException::DbLockNotGrantedException + (const DbLockNotGrantedException &that) +: DbException(that) +{ + op_ = that.op_; + mode_ = that.mode_; + obj_ = that.obj_; + lock_ = (that.lock_ != NULL) ? new DbLock(*that.lock_) : NULL; + index_ = that.index_; +} + +DbLockNotGrantedException +&DbLockNotGrantedException::operator =(const DbLockNotGrantedException &that) +{ + if (this != &that) { + DbException::operator=(that); + op_ = that.op_; + mode_ = that.mode_; + obj_ = that.obj_; + lock_ = (that.lock_ != NULL) ? new DbLock(*that.lock_) : NULL; + index_ = that.index_; + } + return (*this); +} + +db_lockop_t DbLockNotGrantedException::get_op() const +{ + return op_; +} + +db_lockmode_t DbLockNotGrantedException::get_mode() const +{ + return mode_; +} + +const Dbt* DbLockNotGrantedException::get_obj() const +{ + return obj_; +} + +DbLock* DbLockNotGrantedException::get_lock() const +{ + return lock_; +} + +int DbLockNotGrantedException::get_index() const +{ + return index_; +} + +//////////////////////////////////////////////////////////////////////// +// // +// DbRepHandleDeadException // +// // +//////////////////////////////////////////////////////////////////////// + +DbRepHandleDeadException::~DbRepHandleDeadException() throw() +{ +} + +DbRepHandleDeadException::DbRepHandleDeadException(const char *description) +: DbException(description, DB_REP_HANDLE_DEAD) +{ +} + +DbRepHandleDeadException::DbRepHandleDeadException + (const DbRepHandleDeadException &that) +: DbException(that) +{ +} + +DbRepHandleDeadException +&DbRepHandleDeadException::operator =(const DbRepHandleDeadException &that) +{ + if (this != &that) + DbException::operator=(that); + return (*this); +} + +//////////////////////////////////////////////////////////////////////// +// // +// DbRunRecoveryException // +// // +//////////////////////////////////////////////////////////////////////// + +DbRunRecoveryException::~DbRunRecoveryException() throw() +{ +} + +DbRunRecoveryException::DbRunRecoveryException(const char *description) +: DbException(description, DB_RUNRECOVERY) +{ +} + +DbRunRecoveryException::DbRunRecoveryException + (const DbRunRecoveryException &that) +: DbException(that) +{ +} + +DbRunRecoveryException +&DbRunRecoveryException::operator =(const DbRunRecoveryException &that) +{ + if (this != &that) + DbException::operator=(that); + return (*this); +} diff --git a/lang/cxx/cxx_lock.cpp b/lang/cxx/cxx_lock.cpp new file mode 100644 index 00000000..4ee11fdd --- /dev/null +++ b/lang/cxx/cxx_lock.cpp @@ -0,0 +1,41 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" +#include "dbinc/cxx_int.h" + +//////////////////////////////////////////////////////////////////////// +// // +// DbLock // +// // +//////////////////////////////////////////////////////////////////////// + +DbLock::DbLock(DB_LOCK value) +: lock_(value) +{ +} + +DbLock::DbLock() +{ + memset(&lock_, 0, sizeof(DB_LOCK)); +} + +DbLock::DbLock(const DbLock &that) +: lock_(that.lock_) +{ +} + +DbLock &DbLock::operator = (const DbLock &that) +{ + lock_ = that.lock_; + return (*this); +} diff --git a/lang/cxx/cxx_logc.cpp b/lang/cxx/cxx_logc.cpp new file mode 100644 index 00000000..95b1766b --- /dev/null +++ b/lang/cxx/cxx_logc.cpp @@ -0,0 +1,78 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" +#include "dbinc/cxx_int.h" + +#include "dbinc/db_page.h" +#include "dbinc_auto/db_auto.h" +#include "dbinc_auto/crdel_auto.h" +#include "dbinc/db_dispatch.h" +#include "dbinc_auto/db_ext.h" +#include "dbinc_auto/common_ext.h" + +// It's private, and should never be called, +// but some compilers need it resolved +// +DbLogc::~DbLogc() +{ +} + +// The name _flags prevents a name clash with __db_log_cursor::flags +int DbLogc::close(u_int32_t _flags) +{ + DB_LOGC *logc = this; + int ret; + DbEnv *dbenv2 = DbEnv::get_DbEnv(logc->env->dbenv); + + ret = logc->close(logc, _flags); + + if (!DB_RETOK_STD(ret)) + DB_ERROR(dbenv2, "DbLogc::close", ret, ON_ERROR_UNKNOWN); + + return (ret); +} + +// The name _flags prevents a name clash with __db_log_cursor::flags +int DbLogc::get(DbLsn *get_lsn, Dbt *data, u_int32_t _flags) +{ + DB_LOGC *logc = this; + int ret; + + ret = logc->get(logc, get_lsn, data, _flags); + + if (!DB_RETOK_LGGET(ret)) { + if (ret == DB_BUFFER_SMALL) + DB_ERROR_DBT(DbEnv::get_DbEnv(logc->env->dbenv), + "DbLogc::get", data, ON_ERROR_UNKNOWN); + else + DB_ERROR(DbEnv::get_DbEnv(logc->env->dbenv), + "DbLogc::get", ret, ON_ERROR_UNKNOWN); + } + + return (ret); +} + +// The name _flags prevents a name clash with __db_log_cursor::flags +int DbLogc::version(u_int32_t *versionp, u_int32_t _flags) +{ + DB_LOGC *logc = this; + int ret; + + ret = logc->version(logc, versionp, _flags); + + if (!DB_RETOK_LGGET(ret)) + DB_ERROR(DbEnv::get_DbEnv(logc->env->dbenv), + "DbLogc::version", ret, ON_ERROR_UNKNOWN); + + return (ret); +} diff --git a/lang/cxx/cxx_mpool.cpp b/lang/cxx/cxx_mpool.cpp new file mode 100644 index 00000000..ec9ebcd0 --- /dev/null +++ b/lang/cxx/cxx_mpool.cpp @@ -0,0 +1,128 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" +#include "dbinc/cxx_int.h" + +// Helper macros for simple methods that pass through to the +// underlying C method. It may return an error or raise an exception. +// Note this macro expects that input _argspec is an argument +// list element (e.g., "char *arg") and that _arglist is the arguments +// that should be passed through to the C method (e.g., "(mpf, arg)") +// +#define DB_MPOOLFILE_METHOD(_name, _argspec, _arglist, _retok) \ +int DbMpoolFile::_name _argspec \ +{ \ + int ret; \ + DB_MPOOLFILE *mpf = unwrap(this); \ + \ + if (mpf == NULL) \ + ret = EINVAL; \ + else \ + ret = mpf->_name _arglist; \ + if (!_retok(ret)) \ + DB_ERROR(DbEnv::get_DbEnv(mpf->env->dbenv), \ + "DbMpoolFile::"#_name, ret, ON_ERROR_UNKNOWN); \ + return (ret); \ +} + +#define DB_MPOOLFILE_METHOD_VOID(_name, _argspec, _arglist) \ +void DbMpoolFile::_name _argspec \ +{ \ + DB_MPOOLFILE *mpf = unwrap(this); \ + \ + mpf->_name _arglist; \ +} + +//////////////////////////////////////////////////////////////////////// +// // +// DbMpoolFile // +// // +//////////////////////////////////////////////////////////////////////// + +DbMpoolFile::DbMpoolFile() +: imp_(0) +{ +} + +DbMpoolFile::~DbMpoolFile() +{ +} + +int DbMpoolFile::close(u_int32_t flags) +{ + DB_MPOOLFILE *mpf = unwrap(this); + int ret; + DbEnv *dbenv = DbEnv::get_DbEnv(mpf->env->dbenv); + + if (mpf == NULL) + ret = EINVAL; + else + ret = mpf->close(mpf, flags); + + imp_ = 0; // extra safety + + // This may seem weird, but is legal as long as we don't access + // any data before returning. + delete this; + + if (!DB_RETOK_STD(ret)) + DB_ERROR(dbenv, "DbMpoolFile::close", ret, ON_ERROR_UNKNOWN); + + return (ret); +} + +DB_MPOOLFILE_METHOD(get, + (db_pgno_t *pgnoaddr, DbTxn *txn, u_int32_t flags, void *pagep), + (mpf, pgnoaddr, unwrap(txn), flags, pagep), DB_RETOK_MPGET) +DB_MPOOLFILE_METHOD(open, + (const char *file, u_int32_t flags, int mode, size_t pagesize), + (mpf, file, flags, mode, pagesize), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(put, + (void *pgaddr, DB_CACHE_PRIORITY priority, u_int32_t flags), + (mpf, pgaddr, priority, flags), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(get_clear_len, (u_int32_t *lenp), + (mpf, lenp), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(set_clear_len, (u_int32_t len), + (mpf, len), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(get_fileid, (u_int8_t *fileid), + (mpf, fileid), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(set_fileid, (u_int8_t *fileid), + (mpf, fileid), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(get_flags, (u_int32_t *flagsp), + (mpf, flagsp), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(set_flags, (u_int32_t flags, int onoff), + (mpf, flags, onoff), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(get_ftype, (int *ftypep), + (mpf, ftypep), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(set_ftype, (int ftype), + (mpf, ftype), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(get_last_pgno, (db_pgno_t *pgnop), + (mpf, pgnop), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(get_lsn_offset, (int32_t *offsetp), + (mpf, offsetp), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(set_lsn_offset, (int32_t offset), + (mpf, offset), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(get_maxsize, (u_int32_t *gbytesp, u_int32_t *bytesp), + (mpf, gbytesp, bytesp), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(set_maxsize, (u_int32_t gbytes, u_int32_t bytes), + (mpf, gbytes, bytes), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(get_pgcookie, (DBT *dbt), + (mpf, dbt), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(set_pgcookie, (DBT *dbt), + (mpf, dbt), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(get_priority, (DB_CACHE_PRIORITY *priorityp), + (mpf, priorityp), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(set_priority, (DB_CACHE_PRIORITY priority), + (mpf, priority), DB_RETOK_STD) +DB_MPOOLFILE_METHOD(sync, (), + (mpf), DB_RETOK_STD) diff --git a/lang/cxx/cxx_multi.cpp b/lang/cxx/cxx_multi.cpp new file mode 100644 index 00000000..11a197c2 --- /dev/null +++ b/lang/cxx/cxx_multi.cpp @@ -0,0 +1,123 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" + +DbMultipleIterator::DbMultipleIterator(const Dbt &dbt) + : data_((u_int8_t*)dbt.get_data()), + p_((u_int32_t*)(data_ + dbt.get_ulen() - sizeof(u_int32_t))) +{ +} + +bool DbMultipleDataIterator::next(Dbt &data) +{ + if (*p_ == (u_int32_t)-1) { + data.set_data(0); + data.set_size(0); + p_ = 0; + } else { + data.set_data(data_ + *p_--); + data.set_size(*p_--); + if (data.get_size() == 0 && data.get_data() == data_) + data.set_data(0); + } + return (p_ != 0); +} + +bool DbMultipleKeyDataIterator::next(Dbt &key, Dbt &data) +{ + if (*p_ == (u_int32_t)-1) { + key.set_data(0); + key.set_size(0); + data.set_data(0); + data.set_size(0); + p_ = 0; + } else { + key.set_data(data_ + *p_--); + key.set_size(*p_--); + data.set_data(data_ + *p_--); + data.set_size(*p_--); + } + return (p_ != 0); +} + +bool DbMultipleRecnoDataIterator::next(db_recno_t &recno, Dbt &data) +{ + if (*p_ == (u_int32_t)0) { + recno = 0; + data.set_data(0); + data.set_size(0); + p_ = 0; + } else { + recno = *p_--; + data.set_data(data_ + *p_--); + data.set_size(*p_--); + } + return (p_ != 0); +} + + +DbMultipleBuilder::DbMultipleBuilder(Dbt &dbt) : dbt_(dbt) +{ + DB_MULTIPLE_WRITE_INIT(p_, dbt_.get_DBT()); +} + + +bool DbMultipleDataBuilder::append(void *dbuf, size_t dlen) +{ + DB_MULTIPLE_WRITE_NEXT(p_, dbt_.get_DBT(), dbuf, dlen); + return (p_ != 0); +} + +bool DbMultipleDataBuilder::reserve(void *&ddest, size_t dlen) +{ + DB_MULTIPLE_RESERVE_NEXT(p_, dbt_.get_DBT(), ddest, dlen); + return (ddest != 0); +} + +bool DbMultipleKeyDataBuilder::append( + void *kbuf, size_t klen, void *dbuf, size_t dlen) +{ + DB_MULTIPLE_KEY_WRITE_NEXT(p_, dbt_.get_DBT(), + kbuf, klen, dbuf, dlen); + return (p_ != 0); +} + +bool DbMultipleKeyDataBuilder::reserve( + void *&kdest, size_t klen, void *&ddest, size_t dlen) +{ + DB_MULTIPLE_KEY_RESERVE_NEXT(p_, dbt_.get_DBT(), + kdest, klen, ddest, dlen); + return (kdest != 0 && ddest != 0); +} + + +DbMultipleRecnoDataBuilder::DbMultipleRecnoDataBuilder(Dbt &dbt) : dbt_(dbt) +{ + DB_MULTIPLE_RECNO_WRITE_INIT(p_, dbt_.get_DBT()); +} + +bool DbMultipleRecnoDataBuilder::append( + db_recno_t recno, void *dbuf, size_t dlen) +{ + DB_MULTIPLE_RECNO_WRITE_NEXT(p_, dbt_.get_DBT(), + recno, dbuf, dlen); + return (p_ != 0); +} + +bool DbMultipleRecnoDataBuilder::reserve( + db_recno_t recno, void *&ddest, size_t dlen) +{ + DB_MULTIPLE_RECNO_RESERVE_NEXT(p_, dbt_.get_DBT(), + recno, ddest, dlen); + return (ddest != 0); +} diff --git a/lang/cxx/cxx_rid.cpp b/lang/cxx/cxx_rid.cpp new file mode 100644 index 00000000..23ad8ca8 --- /dev/null +++ b/lang/cxx/cxx_rid.cpp @@ -0,0 +1,46 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" + +DbHeapRecordId::DbHeapRecordId() +{ + DB_HEAP_RID *rid = this; + memset(rid, 0, sizeof(DB_HEAP_RID)); +} + +DbHeapRecordId::DbHeapRecordId(db_pgno_t pgno_arg, db_indx_t indx_arg) +{ + DB_HEAP_RID *rid = this; + memset(rid, 0, sizeof(DB_HEAP_RID)); + set_pgno(pgno_arg); + set_indx(indx_arg); +} + +DbHeapRecordId::~DbHeapRecordId() +{ +} + +DbHeapRecordId::DbHeapRecordId(const DbHeapRecordId &that) +{ + const DB_HEAP_RID *from = &that; + memcpy((DB_HEAP_RID *)this, from, sizeof(DB_HEAP_RID)); +} + +DbHeapRecordId &DbHeapRecordId::operator = (const DbHeapRecordId &that) +{ + if (this != &that) { + const DB_HEAP_RID *from = &that; + memcpy((DB_HEAP_RID *)this, from, sizeof(DB_HEAP_RID)); + } + return (*this); +} diff --git a/lang/cxx/cxx_seq.cpp b/lang/cxx/cxx_seq.cpp new file mode 100644 index 00000000..e6569572 --- /dev/null +++ b/lang/cxx/cxx_seq.cpp @@ -0,0 +1,109 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" +#include "dbinc/cxx_int.h" + +// Helper macro for simple methods that pass through to the +// underlying C method. It may return an error or raise an exception. +// Note this macro expects that input _argspec is an argument +// list element (e.g., "char *arg") and that _arglist is the arguments +// that should be passed through to the C method (e.g., "(db, arg)") +// +#define DBSEQ_METHOD(_name, _argspec, _arglist, _destructor) \ +int DbSequence::_name _argspec \ +{ \ + int ret; \ + DB_SEQUENCE *seq = unwrap(this); \ + DbEnv *dbenv = DbEnv::get_DbEnv(seq->seq_dbp->dbenv); \ + \ + ret = seq->_name _arglist; \ + if (_destructor) \ + imp_ = 0; \ + if (!DB_RETOK_STD(ret)) \ + DB_ERROR(dbenv, \ + "DbSequence::" # _name, ret, ON_ERROR_UNKNOWN); \ + return (ret); \ +} + +DbSequence::DbSequence(Db *db, u_int32_t flags) +: imp_(0) +{ + DB_SEQUENCE *seq; + int ret; + + if ((ret = db_sequence_create(&seq, unwrap(db), flags)) != 0) + DB_ERROR(db->get_env(), "DbSequence::DbSequence", ret, + ON_ERROR_UNKNOWN); + else { + imp_ = seq; + seq->api_internal = this; + } +} + +DbSequence::DbSequence(DB_SEQUENCE *seq) +: imp_(seq) +{ + seq->api_internal = this; +} + +DbSequence::~DbSequence() +{ + DB_SEQUENCE *seq; + + seq = unwrap(this); + if (seq != NULL) + (void)seq->close(seq, 0); +} + +DBSEQ_METHOD(open, (DbTxn *txnid, Dbt *key, u_int32_t flags), + (seq, unwrap(txnid), key, flags), 0) +DBSEQ_METHOD(initial_value, (db_seq_t value), (seq, value), 0) +DBSEQ_METHOD(close, (u_int32_t flags), (seq, flags), 1) +DBSEQ_METHOD(remove, (DbTxn *txnid, u_int32_t flags), + (seq, unwrap(txnid), flags), 1) +DBSEQ_METHOD(stat, (DB_SEQUENCE_STAT **sp, u_int32_t flags), + (seq, sp, flags), 0) +DBSEQ_METHOD(stat_print, (u_int32_t flags), (seq, flags), 0) + +DBSEQ_METHOD(get, + (DbTxn *txnid, int32_t delta, db_seq_t *retp, u_int32_t flags), + (seq, unwrap(txnid), delta, retp, flags), 0) +DBSEQ_METHOD(get_cachesize, (int32_t *sizep), (seq, sizep), 0) +DBSEQ_METHOD(set_cachesize, (int32_t size), (seq, size), 0) +DBSEQ_METHOD(get_flags, (u_int32_t *flagsp), (seq, flagsp), 0) +DBSEQ_METHOD(set_flags, (u_int32_t flags), (seq, flags), 0) +DBSEQ_METHOD(get_range, (db_seq_t *minp, db_seq_t *maxp), (seq, minp, maxp), 0) +DBSEQ_METHOD(set_range, (db_seq_t min, db_seq_t max), (seq, min, max), 0) + +Db *DbSequence::get_db() +{ + DB_SEQUENCE *seq = unwrap(this); + DB *db; + (void)seq->get_db(seq, &db); + return Db::get_Db(db); +} + +Dbt *DbSequence::get_key() +{ + DB_SEQUENCE *seq = unwrap(this); + memset(&key_, 0, sizeof(DBT)); + (void)seq->get_key(seq, &key_); + return Dbt::get_Dbt(&key_); +} + +// static method +DbSequence *DbSequence::wrap_DB_SEQUENCE(DB_SEQUENCE *seq) +{ + DbSequence *wrapped_seq = get_DbSequence(seq); + return (wrapped_seq != NULL) ? wrapped_seq : new DbSequence(seq); +} diff --git a/lang/cxx/cxx_site.cpp b/lang/cxx/cxx_site.cpp new file mode 100644 index 00000000..6c8d602d --- /dev/null +++ b/lang/cxx/cxx_site.cpp @@ -0,0 +1,57 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" +#include "dbinc/cxx_int.h" + +// Helper macro for simple methods that pass through to the +// underlying C method. It may return an error or raise an exception. +// Note this macro expects that input _argspec is an argument +// list element (e.g., "char *arg") and that _arglist is the arguments +// that should be passed through to the C method (e.g., "(dbsite, arg)") +// +#define DB_SITE_METHOD(_name, _delete, _argspec, _arglist, _retok) \ +int DbSite::_name _argspec \ +{ \ + int ret; \ + DB_SITE *dbsite = unwrap(this); \ + \ + if (dbsite == NULL) \ + ret = EINVAL; \ + else \ + ret = dbsite->_name _arglist; \ + if (_delete) \ + delete this; \ + if (!_retok(ret)) \ + DB_ERROR(DbEnv::get_DbEnv(dbsite->env->dbenv), \ + "DbSite::"#_name, ret, ON_ERROR_UNKNOWN); \ + return (ret); \ +} + +DbSite::DbSite() +: imp_(0) +{ +} + +DbSite::~DbSite() +{ +} + +DB_SITE_METHOD(close, 1, (), (dbsite), DB_RETOK_STD) +DB_SITE_METHOD(get_address, 0, (const char **hostp, u_int *port), + (dbsite, hostp, port), DB_RETOK_STD) +DB_SITE_METHOD(get_config, 0, (u_int32_t which, u_int32_t *valuep), + (dbsite, which, valuep), DB_RETOK_STD) +DB_SITE_METHOD(get_eid, 0, (int *eidp), (dbsite, eidp), DB_RETOK_STD) +DB_SITE_METHOD(remove, 1, (), (dbsite), DB_RETOK_STD) +DB_SITE_METHOD(set_config, 0, (u_int32_t which, u_int32_t value), + (dbsite, which, value), DB_RETOK_STD) diff --git a/lang/cxx/cxx_txn.cpp b/lang/cxx/cxx_txn.cpp new file mode 100644 index 00000000..2a95cf24 --- /dev/null +++ b/lang/cxx/cxx_txn.cpp @@ -0,0 +1,117 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "db_cxx.h" +#include "dbinc/cxx_int.h" + +#include "dbinc/txn.h" + +// Helper macro for simple methods that pass through to the +// underlying C method. It may return an error or raise an exception. +// Note this macro expects that input _argspec is an argument +// list element (e.g., "char *arg") and that _arglist is the arguments +// that should be passed through to the C method (e.g., "(db, arg)") +// +#define DBTXN_METHOD(_name, _delete, _argspec, _arglist) \ +int DbTxn::_name _argspec \ +{ \ + int ret; \ + DB_TXN *txn = unwrap(this); \ + DbEnv *dbenv = DbEnv::get_DbEnv(txn->mgrp->env->dbenv); \ + \ + ret = txn->_name _arglist; \ + /* Weird, but safe if we don't access this again. */ \ + if (_delete) { \ + /* Can't do this in the destructor. */ \ + if (parent_txn_ != NULL) \ + parent_txn_->remove_child_txn(this); \ + delete this; \ + } \ + if (!DB_RETOK_STD(ret)) \ + DB_ERROR(dbenv, "DbTxn::" # _name, ret, ON_ERROR_UNKNOWN); \ + return (ret); \ +} + +// private constructor, never called but needed by some C++ linkers +DbTxn::DbTxn(DbTxn *ptxn) +: imp_(0) +{ + TAILQ_INIT(&children); + memset(&child_entry, 0, sizeof(child_entry)); + parent_txn_ = ptxn; + if (parent_txn_ != NULL) + parent_txn_->add_child_txn(this); +} + +DbTxn::DbTxn(DB_TXN *txn, DbTxn *ptxn) +: imp_(txn) +{ + txn->api_internal = this; + TAILQ_INIT(&children); + memset(&child_entry, 0, sizeof(child_entry)); + parent_txn_ = ptxn; + if (parent_txn_ != NULL) + parent_txn_->add_child_txn(this); +} + +DbTxn::~DbTxn() +{ + DbTxn *txn, *pnext; + + for(txn = TAILQ_FIRST(&children); txn != NULL;) { + pnext = TAILQ_NEXT(txn, child_entry); + delete txn; + txn = pnext; + } +} + +DBTXN_METHOD(abort, 1, (), (txn)) +DBTXN_METHOD(commit, 1, (u_int32_t flags), (txn, flags)) +DBTXN_METHOD(discard, 1, (u_int32_t flags), (txn, flags)) + +void DbTxn::remove_child_txn(DbTxn *kid) +{ + TAILQ_REMOVE(&children, kid, child_entry); + kid->set_parent(NULL); +} + +void DbTxn::add_child_txn(DbTxn *kid) +{ + TAILQ_INSERT_HEAD(&children, kid, child_entry); + kid->set_parent(this); +} + +u_int32_t DbTxn::id() +{ + DB_TXN *txn; + + txn = unwrap(this); + return (txn->id(txn)); // no error +} + +DBTXN_METHOD(get_name, 0, (const char **namep), (txn, namep)) +DBTXN_METHOD(get_priority, 0, (u_int32_t *priorityp), (txn, priorityp)) +DBTXN_METHOD(prepare, 0, (u_int8_t *gid), (txn, gid)) +DBTXN_METHOD(set_name, 0, (const char *name), (txn, name)) +DBTXN_METHOD(set_priority, 0, (u_int32_t priority), (txn, priority)) +DBTXN_METHOD(set_timeout, 0, (db_timeout_t timeout, u_int32_t flags), + (txn, timeout, flags)) + +// static method +DbTxn *DbTxn::wrap_DB_TXN(DB_TXN *txn) +{ + DbTxn *wrapped_txn = get_DbTxn(txn); + // txn may have a valid parent transaction, but here we don't care. + // We maintain parent-kid relationship in DbTxn only to make sure + // unresolved kids of DbTxn objects are deleted. + return (wrapped_txn != NULL) ? wrapped_txn : new DbTxn(txn, NULL); +} diff --git a/lang/cxx/stl/dbstl_base_iterator.h b/lang/cxx/stl/dbstl_base_iterator.h new file mode 100644 index 00000000..e2cce74f --- /dev/null +++ b/lang/cxx/stl/dbstl_base_iterator.h @@ -0,0 +1,498 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_STL_DB_BASE_ITERATOR_H +#define _DB_STL_DB_BASE_ITERATOR_H + +#include "dbstl_common.h" + +START_NS(dbstl) + +template +class ElementRef; +template +class ElementHolder; + +/** \defgroup dbstl_iterators dbstl iterator classes +Common information for all dbstl iterators: + +1. Each instance of a dbstl iterator uniquely owns a Berkeley DB cursor, +so that the key/data pair it currently sits on is always valid before it moves +elsewhere. It also caches the current key/data pair values in order for member +functions like operator* /operator-> to work properly, but caching is not +compatible with standard C++ Stl behavior --- the C++ standard requires the +iterator refer to a shared piece of memory where the data is stored, +thus two iterators of the same container sitting on the same element should +point to the same memory location, which is false for dbstl iterators. + +2. There are some functions common to each child class of this class which have +identical behaviors, so we will document them here. + +@{ +This class is the base class for all dbstl iterators, there is no much to say +about this class itself, and users are not supposed to directly use this class +at all. So we will talk about some common functions of dbstl iterators in +this section. + +\sa db_vector_base_iterator db_vector_iterator db_map_base_iterator +db_map_iterator db_set_base_iterator db_set_iterator +*/ +template +class db_base_iterator +{ +protected: + typedef db_base_iterator self; + friend class ElementHolder; + friend class ElementRef; + + // The container from which this iterator is created. + mutable db_container *owner_; + + bool dead_; // Internally used to prevent recursive destruction calls. + + // Whether or not to always get current key/data pair directly from db; + // If true, always poll db, slower but safe for concurrent use; + // otherwise faster, but when multiple iterators point to the same + // key/data pair (they definitely have same locker id) and use some + // iterators to update current key/data pair, then other iterators' + // key/data pairs are obsolete, need to call iterator::refresh(). + // + bool directdb_get_; + + // Whether to do bulk retrieval for a read only iterator. If non-zero, + // do bulk retrieval and use this member as the bulk buffer size, + // otherwise, do not use bulk retrieval. While doing bulk retrieval, + // no longer read from database even if directdb_get is true. + u_int32_t bulk_retrieval_; + + // Whether to use DB_RMW flag in Dbc::get. Users can set this flag in + // db_container::begin(). The flag may be ignored when there is no + // locking subsystem initialized. + bool rmw_csr_; + + // Whether this iterator is a db_set/db_multiset iterator. + bool is_set_; + + // Whether this iterator is read only. Default value is false. + // The const version of begin(), or passing an explicit "readonly" + // parameter to non-const begin() will will create a read only + // iterator; + // + bool read_only_; + + // Iteration status. If in valid range, it is 0, otherwise it is + // INVALID_ITERATOR_POSITION. + mutable int itr_status_; + + // Distinguish the invalid iterator of end() and rend(). If an + // iterator was invalidated in operator++, inval_pos_type_ is + // IPT_AFTER_LAST, else if in operator-- it is IPT_BEFORE_FIRST. + // For random iterator, inval_pos_type_ is also updated in random + // movement functions. This member is only valid when itr_status_ is + // set to INVALID_ITERATOR_POSITION. + mutable char inval_pos_type_; + // Values to denote the invalid positions. + const static char IPT_BEFORE_FIRST = -1; + const static char IPT_AFTER_LAST = 1; + const static char IPT_UNSET = 0; + + virtual void delete_me() const { delete this;} + + virtual db_base_iterator* dup_itr() const + { + THROW(InvalidFunctionCall, ( + "\ndb_base_iterator<>::dup_itr can't be called.\n")); + + } + + inline bool is_set_iterator() const + { + return is_set_; + } + + // The following two functions can't be abstract virtual for + // compiler errors. + virtual int replace_current(const ddt&) + { + THROW(InvalidFunctionCall, ( + "\ndb_base_iterator<>::replace_current can't be called\n")); + } + + virtual int replace_current_key(const ddt&) + { + THROW(InvalidFunctionCall, ( + "\ndb_base_iterator<>::replace_current_key can't be called\n")); + } + +public: +#if NEVER_DEFINE_THIS_MACRO_IT_IS_FOR_DOXYGEN_ONLY + /// Read data from underlying database via its cursor, and update + /// its cached value. + /// \param from_db Whether retrieve data from database rather than + /// using the cached data in this iterator. + /// \return 0 if succeeded. Otherwise an DbstlException exception + /// will be thrown. + int refresh(bool from_db = true); + + /** Close its cursor. If you are sure the iterator is no longer + used, call this function so that its underlying cursor is closed + before this iterator is destructed, potentially increase performance + and concurrency. Note that the cursor is definitely + closed at iterator destruction if you don't close it explicitly. + */ + void close_cursor() const; + + /** Call this function to modify bulk buffer size. Bulk retrieval is + enabled when creating an iterator, so users later can only modify + the bulk buffer size to another value, but can't enable/disable + bulk read while an iterator is already alive. + \param sz The new buffer size in bytes. + \return true if succeeded, false otherwise. + */ + bool set_bulk_buffer(u_int32_t sz); + + /// Return current bulk buffer size. Returns 0 if bulk retrieval is + /// not enabled. + u_int32_t get_bulk_bufsize(); +#endif // NEVER_DEFINE_THIS_MACRO_IT_IS_FOR_DOXYGEN_ONLY + + //////////////////////////////////////////////////////////////////// + // + // Begin public constructors and destructor. + // + /// Default constructor. + db_base_iterator() + { + owner_ = NULL; + directdb_get_ = true; + dead_ = false; + itr_status_ = INVALID_ITERATOR_POSITION; + read_only_ = false; + is_set_ = false; + bulk_retrieval_ = 0; + rmw_csr_ = false; + inval_pos_type_ = IPT_UNSET; + } + + /// Constructor. + db_base_iterator(db_container *powner, bool directdbget, + bool b_read_only, u_int32_t bulk, bool rmw) + { + owner_ = powner; + directdb_get_ = directdbget; + dead_ = false; + itr_status_ = INVALID_ITERATOR_POSITION; + read_only_ = b_read_only; + is_set_ = false; + bulk_retrieval_ = bulk; + rmw_csr_ = rmw; + inval_pos_type_ = IPT_UNSET; + } + + /// Copy constructor. Copy all members of this class. + db_base_iterator(const db_base_iterator &bi) + { + owner_ = bi.owner_; + directdb_get_ = bi.directdb_get_; + dead_ = false; + itr_status_ = bi.itr_status_; + read_only_ = bi.read_only_; + is_set_ = bi.is_set_; + bulk_retrieval_ = bi.bulk_retrieval_; + rmw_csr_ = bi.rmw_csr_; + inval_pos_type_ = bi.inval_pos_type_; + } + + /** + Iterator assignment operator. + Iterator assignment will cause the underlying cursor of the right iterator + to be duplicated to the left iterator after its previous cursor is closed, + to make sure each iterator owns one unique cursor. The key/data cached + in the right iterator is copied to the left iterator. Consequently, + the left iterator points to the same key/data pair in the database + as the the right value after the assignment, and have identical cached + key/data pair. + \param bi The other iterator to assign with. + \return The iterator bi's reference. + */ + inline const self& operator=(const self& bi) + { + ASSIGNMENT_PREDCOND(bi) + owner_ = bi.owner_; + directdb_get_ = bi.directdb_get_; + dead_ = false; + itr_status_ = bi.itr_status_; + read_only_ = bi.read_only_; + is_set_ = bi.is_set_; + bulk_retrieval_ = bi.bulk_retrieval_; + rmw_csr_ = bi.rmw_csr_; + inval_pos_type_ = bi.inval_pos_type_; + return bi; + } + + /// Destructor. + virtual ~db_base_iterator() {} + + //////////////////////////////////////////////////////////////////// + + /// \brief Get bulk buffer size. + /// + /// Return bulk buffer size. If the size is 0, bulk retrieval is not + /// enabled. + u_int32_t get_bulk_retrieval() const { return bulk_retrieval_; } + + /// \brief Get DB_RMW setting. + /// + /// Return true if the iterator's cursor has DB_RMW flag set, false + /// otherwise. DB_RMW flag causes + /// a write lock to be acquired when reading a key/data pair, so + /// that the transaction won't block later when writing back the + /// updated value in a read-modify-write operation cycle. + bool is_rmw() const { return rmw_csr_; } + + /// \brief Get direct database get setting. + /// + /// Return true if every operation to retrieve the key/data pair the + /// iterator points to will read from database rather than using + /// the cached value, false otherwise. + bool is_directdb_get() const {return directdb_get_; } +}; // db_base_iterator + +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// +// +// db_reverse_iterator class template definition. +// +// db_reverse_iterator is the reverse iterator adapter for all iterator +// classes of dbstl. It makes an original iterator its reverse iterator. +// We don't want to use std::reverse_iterator<> adapter because it is more +// more expensive. Here we move the original iterator one position back to +// avoid unnecessary movement. +// +/// This class is the reverse class adaptor for all dbstl iterator classes. +/// It inherits from real iterator classes like db_vector_iterator, +/// db_map_iterator or db_set_iterator. When you call container::rbegin(), +/// you will get an instance of this class. +/// \sa db_vector_base_iterator db_vector_iterator db_map_base_iterator +/// db_map_iterator db_set_base_iterator db_set_iterator +template +class _exported db_reverse_iterator : public iterator +{ +public: + // typedefs are not inherited, so re-define them here. + // + typedef db_reverse_iterator self; + typedef typename iterator::value_type T; + typedef iterator iterator_type; + typedef typename iterator::value_type value_type; + typedef typename iterator::reference reference; + typedef typename iterator::pointer pointer; + typedef typename iterator::iterator_category iterator_category; + typedef typename iterator::key_type key_type; + typedef typename iterator::data_type data_type; + typedef typename iterator::difference_type difference_type; + typedef typename iterator::value_type_wrap value_type_wrap; + // Construct a reverse iterator from iterator vi. We don't duplicate + // iterator vi here because vi is not supposed to be used again. + // This function is supposed to be used by dbstl users, internally + // self::set_iterator method is used. + /// Constructor. Construct from an iterator of wrapped type. + db_reverse_iterator(const iterator& vi) : iterator(vi) + { + iterator::operator--(); + } + + /// Copy constructor. + db_reverse_iterator(const self& ritr) + : iterator(ritr) + { + } + + /// Copy constructor. + db_reverse_iterator(const + db_reverse_iterator& ritr) + : iterator(ritr) + { + } + + /// Default constructor. + db_reverse_iterator() : iterator() + { + } + + // Do not throw exceptions here because it is normal to iterate to + // "end()". + // + /// \name Reverse iterator movement functions + /// When we talk about reverse iterator movement, we think the + /// container is a uni-directional range, represented by [begin, end), + /// and this is true no matter we are using iterators or reverse + /// iterators. When an iterator is moved closer to "begin", we say it + /// is moved forward, otherwise we say it is moved backward. + //@{ + /// Move this iterator forward by one element. + /// \return The moved iterator at new position. + self& operator++() + { + iterator::operator--(); + return *this; + } + + /// Move this iterator forward by one element. + /// \return The original iterator at old position. + self operator++(int) + { + // XXX!!! This conversion relies on this class or + // db_set_iterator<> having no data members at all, which + // is currently the case. + return (self)iterator::operator--(1); + } + + /// Move this iterator backward by one element. + /// \return The moved iterator at new position. + self& operator--() + { + iterator::operator++(); + return *this; + } + + /// Move this iterator backward by one element. + /// \return The original iterator at old position. + self operator--(int) + { + // XXX!!! This conversion relies on this class or + // db_set_iterator<> having no data members at all, which + // is currently the case. + return (self)iterator::operator++(1); + } + //@} // Movement operators. + + /// Assignment operator. + /// \param ri The iterator to assign with. + /// \return The iterator ri. + /// \sa db_base_iterator::operator=(const self&) + const self& operator=(const self& ri) + { + ASSIGNMENT_PREDCOND(ri) + iterator::operator=(ri); + return ri; + } + + ////////////// Methods below only applies to random iterators. ///// + /// \name Operators for random reverse iterators + /// Return a new iterator by moving this iterator backward or forward + /// by n elements. + //@{ + /// Iterator shuffle operator. + /// Return a new iterator by moving this iterator forward + /// by n elements. + /// \param n The amount and direction of movement. If negative, + /// will move towards reverse direction. + /// \return A new iterator at new position. + self operator+(difference_type n) const + { + self ritr(*this); + ritr.iterator::operator-=(n); + return ritr; + } + + /// Iterator shuffle operator. + /// Return a new iterator by moving this iterator backward + /// by n elements. + /// \param n The amount and direction of movement. If negative, + /// will move towards reverse direction. + /// \return A new iterator at new position. + self operator-(difference_type n) const + { + self ritr(*this); + ritr.iterator::operator+=(n); + return ritr; + } + //@} + /// \name Operators for random reverse iterators + /// Move this iterator backward or forward by n elements and then + /// return it. + //@{ + /// Iterator shuffle operator. + /// Move this iterator forward by n elements and then + /// return it. + /// \param n The amount and direction of movement. If negative, + /// will move towards reverse direction. + /// \return This iterator at new position. + const self& operator+=(difference_type n) + { + iterator::operator-=(n); + return *this; + } + + /// Iterator shuffle operator. + /// Move this iterator backward by n elements and then + /// return it. + /// \param n The amount and direction of movement. If negative, + /// will move towards reverse direction. + /// \return This iterator at new position. + const self& operator-=(difference_type n) + { + iterator::operator+=(n); + return *this; + } + //@} + /// \name Operators for random reverse iterators + /// Reverse iterator comparison against reverse iterator itr, the one + /// sitting on elements with less index is returned to be greater. + //@{ + /// Less compare operator. + bool operator<(const self& itr) const + { + return (iterator::operator>(itr)); + } + + /// Greater compare operator. + bool operator>(const self& itr) const + { + return (iterator::operator<(itr)); + } + + /// Less equal compare operator. + bool operator<=(const self& itr) const + { + return (iterator::operator>=(itr)); + } + + /// Greater equal compare operator. + bool operator>=(const self& itr) const + { + return (iterator::operator<=(itr)); + } + //@} + // Because v.rend() - v.rbegin() < 0, we should negate base version. + /// Return the negative value of the difference of indices of elements + /// this iterator and itr are sitting on. + /// \return itr.index - this->index. + /// \param itr The other reverse iterator. + difference_type operator-(const self&itr) const + { + difference_type d = iterator::operator-(itr); + return -d; + } + + /// Return the reference of the element which can be reached by moving + /// this reverse iterator by Off times backward. If Off is negative, + /// the movement will be forward. + inline value_type_wrap operator[](difference_type Off) const + { + self ritr(*this); + value_type_wrap res = ritr.iterator::operator[](-Off); + return res; + } + +}; // reverse_iterator +//@} // dbstl_iterators +END_NS + +#endif // !_DB_STL_DB_BASE_ITERATOR_H diff --git a/lang/cxx/stl/dbstl_common.in b/lang/cxx/stl/dbstl_common.in new file mode 100644 index 00000000..7fd7bf5a --- /dev/null +++ b/lang/cxx/stl/dbstl_common.in @@ -0,0 +1,458 @@ +#ifndef _DB_STL_COMMON_H +#define _DB_STL_COMMON_H + +#ifdef DBSTL_DEBUG_LEAK +#include "vld.h" +#endif + +#include + +#include "db_cxx.h" + +// In release builds, the native assert will be disabled so we +// can't use it in dbstl in cases where we rely on the expression being +// evaluated to change the state of the application. +// +#if !defined(DEBUG) && !defined(_DEBUG) +#undef dbstl_assert +#define dbstl_assert(expression) +#else +#undef dbstl_assert +#define dbstl_assert(expression) do { \ + if (!(expression)) { \ + FailedAssertionException ex(__FILE__, __LINE__, #expression);\ + throw ex; } } while (0) +#endif + +#if defined( DB_WIN32) || defined(_WIN32) +#include +#include +#else +#define TCHAR char +#define _T(e) (e) +#define _ftprintf fprintf +#define _snprintf snprintf +#define _tcschr strchr +#define _tcscmp strcmp +#define _tcscpy strcpy +#define _tcslen strlen +#define _tgetopt getopt +#define _tmain main +#define _tprintf printf +#define _ttoi atoi +#endif + +#undef SIZE_T_MAX +// The max value for size_t variables, one fourth of 2 powers 32. +#define SIZE_T_MAX 1073741824 + +// Macro for HAVE_WSTRING (detected by configure) +@WSTRING_decl@ + +// Thread local storage modifier declaration. +@TLS_decl@ +@TLS_defn@ + +#if !defined(TLS_DECL_MODIFIER) && !defined(HAVE_PTHREAD_TLS) +#error "No appropriate TLS modifier defined." +#endif + +////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////// +// +// C++ compiler portability control macro definitions. +// If a C++ compiler does not support the following capabilities, disabling +// these flags will remove usage of the feature from DB STL. +// Where possible a DB STL has implemented work-arounds for the missing +// functionality. +// +#define HAVE_EXPLICIT_KEYWORD 1 +#define HAVE_NAMESPACE 1 +#define HAVE_TYPENAME 1 + +// Platform specific compiler capability configuration. +#ifdef WIN32 +#define CLS_SCOPE(clstmpl_name) +#else + +// C++ standard: It is not possible to define a full specialized version of +// a member function of a class template inside the class body. It needs to +// be defined outside the class template, and must be defined in the namespace +// scope. +#define CLS_SCOPE(clstmpl_name) clstmpl_name:: +#define NO_IN_CLASS_FULL_SPECIALIZATION 1 +#define NO_MEMBER_FUNCTION_PARTIAL_SPECIALIZATION 1 +#endif + +#if HAVE_NAMESPACE +#define START_NS(nsname) namespace nsname { +#define END_NS } +#else +#define START_NS(nsname) struct nsname { +#define END_NS }; +#endif + +#if HAVE_EXPLICIT_KEYWORD +#define EXPLICIT explicit +#else +#define EXPLICIT +#endif + +#if HAVE_TYPENAME +#define Typename typename +#else +#define Typename class +#endif + +////////////////////////////////////////////////////////////////////////// +// End of compiler portability control macro definitions. +//////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////// +// +// Iterator status macro definitions. +// +#define INVALID_ITERATOR_POSITION -1 // Iterator goes out of valid range. +#define INVALID_ITERATOR_CURSOR -2 // The iterator's dbc cursor is invalid. +#define ITERATOR_DUP_ERROR -3 // Failed to duplicate a cursor. + +// Current cursor's key or data dbt has no data. +#define INVALID_KEY_DATA -4 +#define EMPTY_DBT_DATA -5 // Current cursor's pointed data dbt has no data. +#define ITERATOR_AT_END -6 +#define CURSOR_NOT_OPEN -7 + +/////////////////////////////////////////////////////////////////////// +// End of iterator status macro definitions. +////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////// +// +// Helper macros definitions. +// +// Use BDBOP and BDBOP2 to wrap Berkeley DB calls. The macros validate the +// return value. On failure, the wrappers clean up, and generate the +// expected exception. +// +#define BDBOP(bdb_call, ret) do { \ + if ((ret = (bdb_call)) != 0) throw_bdb_exception(#bdb_call, ret);\ + } while(0) +#define BDBOP2(bdb_call, ret, cleanup) do { \ + if ((ret = (bdb_call)) != 0) { (cleanup); \ + throw_bdb_exception(#bdb_call, ret);} \ + } while (0) +// Do not throw the exception if bdb_call returned a specified error number. +#define BDBOP3(bdb_call, ret, exception, cleanup) do { \ + if (((ret = (bdb_call)) != 0) && (ret & exception) == 0) { \ + (cleanup); throw_bdb_exception(#bdb_call, ret);} \ + } while (0) + +#define THROW(exception_type, arg_list) do { \ + exception_type ex arg_list; throw ex; } while (0) + +#define THROW0(exception_type) do { \ + exception_type ex; throw ex; } while (0) + +#define INVALID_INDEX ((index_type)-1) +#define INVALID_DLEN ((u_int32_t)-1) + +#define DBSTL_MAX_DATA_BUF_LEN 1024 * 4096 +#define DBSTL_MAX_KEY_BUF_LEN 1024 * 4096 +#define DBSTL_MAX_MTX_ENV_MUTEX 4096 * 4 +#define DBSTL_BULK_BUF_SIZE 256 * 1024 + +#define COMPARE_CHECK(obj) if (this == &obj) return true; +#define ASSIGNMENT_PREDCOND(obj) if (this == &obj) return obj; +////////////////////////////////////////////////////////////////// +// End of helper macro definitions. +////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////// +// +// Public global function declarations. +// These functions are open/public functionalities of dbstl for +// dbstl users to call. +// +START_NS(dbstl) +// _exported is a macro we employ from db_cxx.h of Berkeley DB C++ +// API. If we want to export the symbols it decorates on Windows, +// we must define the macro "DB_CREATE_DLL", as is defined in dbstl +// project property. +/// \defgroup dbstl_global_functions dbstl global public functions +//@{ + +/// \name Functions to close database/environments. +/// Normally you don't have to close any database +/// or environment handles, they will be closed automatically. +/// Though you still have the following API to close them. +//@{ +/// Close pdb regardless of reference count. You must make sure pdb +/// is not used by others before calling this method. +/// You can close the underlying database of a container and assign +/// another database with right configurations to it, if the configuration +/// is not suitable for the container, there will be an +/// InvalidArgumentException type of exception thrown. +/// You can't use the container after you called close_db and before setting +/// another valid database handle to the container via +/// db_container::set_db_handle() function. +/// \param pdb The database handle to close. +_exported void close_db(Db *pdb); + +/// Close all open database handles regardless of reference count. +/// You can't use any container after you called close_all_dbs and +/// before setting another valid database handle to the +/// container via db_container::set_db_handle() function. +/// \sa close_db(Db *); +_exported void close_all_dbs(); + +/// \brief Close specified database environment handle regardless of reference +/// count. +/// +/// Make sure the environment is not used by any other databases. +/// \param pdbenv The database environment handle to close. +_exported void close_db_env(DbEnv *pdbenv); + +/// \brief Close all open database environment handles regardless of +/// reference count. +/// +/// You can't use the container after you called close_db and before setting +/// another valid database handle to the container via +/// db_container::set_db_handle() function. \sa close_db_env(DbEnv *); +_exported void close_all_db_envs(); +//@} + +/// \name Transaction control global functions. +/// dbstl transaction API. You should call these API rather than DB C/C++ +/// API to use Berkeley DB transaction features. +//@{ +/// Begin a new transaction from the specified environment "env". +/// This function is called by dbstl user to begin an external transaction. +/// The "flags" parameter is passed to DbEnv::txn_begin(). +/// If a transaction created from +/// the same database environment already exists and is unresolved, +/// the new transaction is started as a child transaction of that transaction, +/// and thus you can't specify the parent transaction. +/// \param env The environment to start a transaction from. +/// \param flags It is set to DbEnv::txn_begin() function. +/// \return The newly created transaction. +/// +_exported DbTxn* begin_txn(u_int32_t flags, DbEnv *env); + +/// Commit current transaction opened in the environment "env". +/// This function is called by user to commit an external explicit transaction. +/// \param env The environment whose current transaction is to be committed. +/// \param flags It is set to DbTxn::commit() funcion. +/// \sa commit_txn(DbEnv *, DbTxn *, u_int32_t); +/// +_exported void commit_txn(DbEnv *env, u_int32_t flags = 0); + +/// Commit a specified transaction and all its child transactions. +/// \param env The environment where txn is started from. +/// \param txn The transaction to commit, can be a parent transaction of a +/// nested transaction group, all un-aborted child transactions of +/// it will be committed. +/// \param flags It is passed to each DbTxn::commit() call. +/// \sa commit_txn(DbEnv *, u_int32_t); +_exported void commit_txn(DbEnv *env, DbTxn *txn, u_int32_t flags = 0); + +/// Abort current transaction of environment "env". This function is called by +/// dbstl user to abort an outside explicit transaction. +/// \param env The environment whose current transaction is to be aborted. +/// \sa abort_txn(DbEnv *, DbTxn *); +_exported void abort_txn(DbEnv *env); + +/// Abort specified transaction "txn" and all its child transactions. +/// That is, "txn" can be a parent transaction of a nested transaction group. +/// \param env The environment where txn is started from. +/// \param txn The transaction to abort, can be a parent transaction of a +/// nested transaction group, all child transactions of it will be aborted. +/// \sa abort_txn(DbEnv *); +/// +_exported void abort_txn(DbEnv *env, DbTxn *txn); + +/// Get current transaction of environment "env". +/// \param env The environment whose current transaction we want to get. +/// \return Current transaction of env. +_exported DbTxn* current_txn(DbEnv *env); + +/// Set environment env's current transaction handle to be newtxn. The original +/// transaction handle returned without aborting or commiting. This function +/// is used for users to use one transaction among multiple threads. +/// \param env The environment whose current transaction to replace. +/// \param newtxn The new transaction to be as the current transaction of env. +/// \return The old current transaction of env. It is not resolved. +_exported DbTxn* set_current_txn_handle(DbEnv *env, DbTxn *newtxn); +//@} + +/// \name Functions to open and register database/environment handles. +//@{ +/// Register a Db handle "pdb1". This handle and handles opened in it will be +/// closed by ResourceManager, so application code must not try to close or +/// delete it. Users can do enough configuration before opening the Db then +/// register it via this function. +/// All database handles should be registered via this function in each +/// thread using the handle. The only exception is the database handle opened +/// by dbstl::open_db should not be registered in the thread of the +/// dbstl::open_db call. +/// \param pdb1 The database handle to register into dbstl for current thread. +/// +_exported void register_db(Db *pdb1); + +/// Register a DbEnv handle env1, this handle and handles opened in it will be +/// closed by ResourceManager. Application code must not try to close or delete +/// it. Users can do enough config before opening the DbEnv and then register +/// it via this function. +/// All environment handles should be registered via this function in each +/// thread using the handle. The only exception is the environment handle +/// opened by dbstl::open_db_env should not be registered in the thread of +/// the dbstl::open_db_env call. +/// \param env1 The environment to register into dbstl for current thread. +/// +_exported void register_db_env(DbEnv *env1); + +/// Helper function to open a database and register it into dbstl for the +/// calling thread. +/// Users still need to register it in any other thread using it if it +/// is shared by multiple threads, via register_db() function. +/// Users don't need to delete or free the memory of the returned object, +/// dbstl will take care of that. +/// When you don't use dbstl::open_db() but explicitly call DB C++ API to +/// open a database, you must new the Db object, rather than create it +/// on stack, and you must delete the Db object by yourself. +/// \param penv The environment to open the database from. +/// \param cflags The create flags passed to Db class constructor. +/// \param filename The database file name, passed to Db::open. +/// \param dbname The database name, passed to Db::open. +/// \param dbtype The database type, passed to Db::open. +/// \param oflags The database open flags, passed to Db::open. +/// \param mode The database open mode, passed to Db::open. +/// \param txn The transaction to open the database from, passed to Db::open. +/// \param set_flags The flags to be set to the created database handle. +/// \return The opened database handle. +/// \sa register_db(Db *); +/// \sa open_db_env; +/// +_exported Db* open_db (DbEnv *penv, const char *filename, DBTYPE dbtype, + u_int32_t oflags, u_int32_t set_flags, int mode = 0644, DbTxn *txn = NULL, + u_int32_t cflags = 0, const char* dbname = NULL); + +/// Helper function to open an environment and register it into dbstl for the +/// calling thread. Users still need to register it in any other thread if it +/// is shared by multiple threads, via register_db_env() function above. +/// Users don't need to delete or free the memory of the returned object, +/// dbstl will take care of that. +/// +/// When you don't use dbstl::open_env() but explicitly call DB C++ API to +/// open an environment, you must new the DbEnv object, rather than create it +/// on stack, and you must delete the DbEnv object by yourself. +/// \param env_home Environment home directory, it must exist. Passed to +/// DbEnv::open. +/// \param cflags DbEnv constructor creation flags, passed to DbEnv::DbEnv. +/// \param set_flags Flags to set to the created environment before opening it. +/// \param oflags Environment open flags, passed to DbEnv::open. +/// \param mode Environment region files mode, passed to DbEnv::open. +/// \param cachesize Environment cache size, by default 4M bytes. +/// \return The opened database environment handle. +/// \sa register_db_env(DbEnv *); +/// \sa open_db; +/// +_exported DbEnv* open_env(const char *env_home, u_int32_t set_flags, + u_int32_t oflags = DB_CREATE | DB_INIT_MPOOL, + u_int32_t cachesize = 4 * 1024 * 1024, + int mode = 0644, + u_int32_t cflags = 0/* Flags for DbEnv constructor. */); +//@} + +/// @name Mutex API based on Berkeley DB mutex. +/// These functions are in-process mutex support which uses Berkeley DB +/// mutex mechanisms. You can call these functions to do portable +/// synchronization for your code. +//@{ +/// Allocate a Berkeley DB mutex. +/// \return Berkeley DB mutex handle. +_exported db_mutex_t alloc_mutex(); +/// Lock a mutex, wait if it is held by another thread. +/// \param mtx The mutex handle to lock. +/// \return 0 if succeed, non-zero otherwise, call db_strerror to get message. +_exported int lock_mutex(db_mutex_t mtx); +/// Unlock a mutex, and return immediately. +/// \param mtx The mutex handle to unlock. +/// \return 0 if succeed, non-zero otherwise, call db_strerror to get message. +_exported int unlock_mutex(db_mutex_t mtx); +/// Free a mutex, and return immediately. +/// \param mtx The mutex handle to free. +/// \return 0 if succeed, non-zero otherwise, call db_strerror to get message. +_exported void free_mutex(db_mutex_t mtx); +//@} + +/// Close cursors opened in dbp1. +/// \param dbp1 The database handle whose active cursors to close. +/// \return The number of cursors closed by this call. +_exported size_t close_db_cursors(Db* dbp1); + +/// \name Other global functions. +//@{ +/// If there are multiple threads within a process that make use of dbstl, then +/// this function should be called in a single thread mutual exclusively before +/// any use of dbstl in a process; Otherwise, you don't need to call it, but +/// are allowed to call it anyway. +_exported void dbstl_startup(); + +/// This function releases any memory allocated in the heap by code of dbstl, +/// and close all DB handles in the right order. +/// So you can only call dbstl_exit() right before the entire process exits. +/// It will release any memory allocated by dbstl that have to live during +/// the entire process lifetime. +_exported void dbstl_exit(); + +/// This function release all DB handles in the right order. The environment +/// and database handles are only closed when they are not used by other +/// threads, otherwise the reference cout is decremented. +_exported void dbstl_thread_exit(); + +/// Operators to compare two Dbt objects. +/// \param d1 Dbt object to compare. +/// \param d2 Dbt object to compare. +_exported bool operator==(const Dbt&d1, const Dbt&d2); +/// Operators to compare two DBT objects. +/// \param d1 DBT object to compare. +/// \param d2 DBT object to compare. +_exported bool operator==(const DBT&d1, const DBT&d2); + +/// If exisiting random temporary database name generation mechanism is still +/// causing name clashes, users can set this global suffix number which will +/// be append to each temporary database file name and incremented after each +/// append, and by default it is 0. +/// \param num Starting number to append to each temporary db file name. +_exported void set_global_dbfile_suffix_number(u_int32_t num); +//@} + +//@} // dbstl_global_functions + +// Internally used memory allocation functions, they will throw an exception +// of NotEnoughMemoryException if can't allocate memory. +_exported void * DbstlReAlloc(void *ptr, size_t size); +_exported void * DbstlMalloc(size_t size); + +_exported u_int32_t hash_default(Db * /*dbp*/, const void *key, u_int32_t len); + +// Default string manipulation callbacks. +_exported u_int32_t dbstl_strlen(const char *str); +_exported void dbstl_strcpy(char *dest, const char *src, size_t num); +_exported int dbstl_strncmp(const char *s1, const char *s2, size_t num); +_exported int dbstl_strcmp(const char *s1, const char *s2); +_exported int dbstl_wcscmp(const wchar_t *s1, const wchar_t *s2); +_exported int dbstl_wcsncmp(const wchar_t *s1, const wchar_t *s2, size_t num); +_exported u_int32_t dbstl_wcslen(const wchar_t *str); +_exported void dbstl_wcscpy(wchar_t *dest, const wchar_t *src, size_t num); + +END_NS + +////////////////////////////////////////////////////////////////// +// End of public global function declarations. +////////////////////////////////////////////////////////////////// + +#endif /* !_DB_STL_COMMON_H */ diff --git a/lang/cxx/stl/dbstl_container.cpp b/lang/cxx/stl/dbstl_container.cpp new file mode 100644 index 00000000..b187b26a --- /dev/null +++ b/lang/cxx/stl/dbstl_container.cpp @@ -0,0 +1,525 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include + +#include "dbstl_container.h" +#include "dbstl_resource_manager.h" +#include "dbstl_exception.h" +#include "dbstl_utility.h" +#include "dbstl_inner_utility.h" + +typedef struct { + time_t tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ +} db_timespec; + +extern "C"{ +void __os_id (DB_ENV *, pid_t *, db_threadid_t*); +void __os_gettime(ENV *env, db_timespec *tp, int monotonic); +} + +START_NS(dbstl) + +using std::string; +u_int32_t db_container::g_db_file_suffix_ = 0; + +void set_global_dbfile_suffix_number(u_int32_t num) +{ + db_container::g_db_file_suffix_ = num; +} + +// Internally used memory allocation functions, they will throw an exception +// of NotEnoughMemoryException if can't allocate memory. +void *DbstlReAlloc(void *ptr, size_t size) +{ + void *p; + + assert(size != 0); + if ((p = realloc(ptr, size)) == NULL) + THROW(NotEnoughMemoryException, + ("DbstlReAlloc failed to allocate memory", size)); + + return p; +} + +void *DbstlMalloc(size_t size) +{ + void *p; + + assert(size != 0); + if ((p = malloc(size)) == NULL) + THROW(NotEnoughMemoryException, + ("DbstlMalloc failed to allocate memory", size)); + return p; +} + +void db_container::init_members() +{ + txn_begin_flags_ = 0; + commit_flags_ = 0; + cursor_oflags_ = 0; + pdb_ = NULL; + is_set_ = false; + auto_commit_ = false; + dbenv_ = NULL; +} + +void db_container::init_members(Db *dbp, DbEnv *envp) +{ + txn_begin_flags_ = 0; + commit_flags_ = 0; + is_set_ = false; + cursor_oflags_ = 0; + pdb_ = dbp; + dbenv_ = envp; + set_auto_commit(pdb_); +} + +db_container::db_container() +{ + init_members(); +} + +db_container::db_container(const db_container& dbctnr) +{ + init_members(dbctnr); +} + +db_container::db_container(Db *dbp, DbEnv *envp) +{ + init_members(dbp, envp); +} + +void db_container::init_members(const db_container&dbctnr) +{ + txn_begin_flags_ = dbctnr.txn_begin_flags_; + commit_flags_ = dbctnr.commit_flags_; + cursor_oflags_ = dbctnr.cursor_oflags_; + // We don't copy database handles because we will clone another + // database from dbctnr's db, and get its handle. We will + // copy the following database properties because they will be + // definitely identical. + // + pdb_ = NULL; + is_set_ = dbctnr.is_set_; + auto_commit_ = dbctnr.auto_commit_; + dbenv_ = dbctnr.dbenv_; +} +void db_container::open_db_handles(Db *&pdb, DbEnv *&penv, DBTYPE dbtype, + u_int32_t oflags, u_int32_t sflags) +{ + if (pdb == NULL) { + pdb = open_db(penv, NULL, dbtype, oflags, sflags); + this->pdb_ = pdb; + } + + if (penv == NULL) { + penv = pdb->get_env(); + this->dbenv_ = penv; + set_auto_commit(pdb_); + } +} + +Db* db_container::clone_db_config(Db *dbp) +{ + string str; + + return clone_db_config(dbp, str); +} + +// Open a new db with identical configuration to dbp. The dbfname brings +// back the generated db file name. +Db* db_container::clone_db_config(Db *dbp, string &dbfname) +{ + Db *tdb = NULL; + int ret; + DBTYPE dbtype; + u_int32_t oflags, sflags; + const char *dbfilename, *dbname, *tdbname; + + BDBOP2(dbp->get_type(&dbtype), ret, dbp->close(0)); + BDBOP2(dbp->get_open_flags(&oflags), ret, dbp->close(0)); + BDBOP2(dbp->get_flags(&sflags), ret, dbp->close(0)); + + BDBOP (dbp->get_dbname(&dbfilename, &dbname), ret); + if (dbfilename == NULL) { + tdb = open_db(dbp->get_env(), + dbfilename, dbtype, oflags, sflags, 0420, NULL, 0, dbname); + dbfname.assign(""); + + } else { + construct_db_file_name(dbfname); + tdbname = dbfname.c_str(); + tdb = open_db(dbp->get_env(), tdbname, dbtype, oflags, sflags); + } + + return tdb; +} + +int db_container::construct_db_file_name(string &filename) const +{ + db_threadid_t tid; + db_timespec ts; + int len; + char name[64]; + + __os_gettime(NULL, &ts, 1); + __os_id(NULL, NULL, &tid); + + // avoid name clash + len = _snprintf(name, 64, "tmpdb_db_map_%lu_%d_%u.db", + (u_long)((uintptr_t)tid + ts.tv_nsec), rand(), g_db_file_suffix_++); + filename = name; + + return 0; +} + +void db_container::set_auto_commit(Db *db) +{ + u_int32_t envof, envf, dbf; + + if (db == NULL || dbenv_ == NULL) { + auto_commit_ = false; + return; + } + + dbenv_->get_open_flags(&envof); + if ((envof & DB_INIT_TXN) == 0) { + this->auto_commit_ = false; + } else { + dbenv_->get_flags(&envf); + db->get_flags(&dbf); + if (((envf & DB_AUTO_COMMIT) != 0) || + ((dbf & DB_AUTO_COMMIT) != 0)) + this->auto_commit_ = true; + else + this->auto_commit_ = false; + } +} + +void db_container::set_db_handle(Db *dbp, DbEnv *newenv) +{ + const char *errmsg; + + if ((errmsg = verify_config(dbp, newenv)) != NULL) { + THROW(InvalidArgumentException, ("Db*", errmsg)); + + } + + pdb_ = dbp; + if (newenv) + dbenv_ = newenv; + +} + +void db_container::verify_db_handles(const db_container &cntnr) const +{ + Db *pdb2 = cntnr.get_db_handle(); + const char *home = NULL, *home2 = NULL, *dbf = NULL, *dbn = NULL, + *dbf2 = NULL, *dbn2 = NULL; + int ret = 0; + u_int32_t flags = 0, flags2 = 0; + bool same_dbfile, same_dbname, anonymous_inmemdbs; + // Check the two database handles do not refer to the same database. + // If they don't point to two anonymous databases at the same time, + // then two identical file names and two identical database names + // mean the two databases are the same. + assert(this->pdb_ != pdb2); + if (pdb_ == NULL) + return; + + BDBOP(pdb_->get_dbname(&dbf, &dbn), ret); + BDBOP(pdb2->get_dbname(&dbf2, &dbn2), ret); + + anonymous_inmemdbs = (dbf == NULL && dbf2 == NULL && + dbn == NULL && dbn2 == NULL); + + same_dbfile = (dbf != NULL && dbf2 != NULL && (strcmp(dbf, dbf2) == 0)) + || (dbf == NULL && dbf2 == NULL); + + same_dbname = (dbn == NULL && dbn2 == NULL) || + (dbn != NULL && dbn2 != NULL && strcmp(dbn, dbn2) == 0); + + assert((!(anonymous_inmemdbs) && same_dbfile && same_dbname) == false); + + // If any one of the two environments are transactional, both of them + // should be opened in the same transactional environment. + DbEnv *penv2 = cntnr.get_db_env_handle(); + if (dbenv_ != penv2 ){ + BDBOP(this->dbenv_->get_open_flags(&flags), ret); + BDBOP(penv2->get_open_flags(&flags2), ret); + + if ((flags & DB_INIT_TXN) || (flags2 & DB_INIT_TXN)) { + BDBOP(dbenv_->get_home(&home), ret); + BDBOP(penv2->get_home(&home), ret); + assert(home != NULL && home2 != NULL && + strcmp(home, home2) == 0); + } + } +} + +bool operator==(const Dbt&d1, const Dbt&d2) +{ + if (d1.get_size() != d2.get_size()) + return false; + + return memcmp(d1.get_data(), d2.get_data(), + d2.get_size()) == 0; +} + +bool operator==(const DBT&d1, const DBT&d2) +{ + if (d1.size != d2.size) + return false; + return memcmp(d1.data, d2.data, d2.size) == 0; +} + +void close_all_dbs() +{ + ResourceManager::instance()->close_all_dbs(); +} + +void close_db(Db *pdb) +{ + ResourceManager::instance()->close_db(pdb); +} + +DbTxn* begin_txn(u_int32_t flags, DbEnv*env) +{ + return ResourceManager::instance()->begin_txn(flags, env, 1); +} + +void commit_txn(DbEnv *env, u_int32_t flags) +{ + ResourceManager::instance()->commit_txn(env, flags); +} + +void commit_txn(DbEnv *env, DbTxn *txn, u_int32_t flags) +{ + ResourceManager::instance()->commit_txn(env, txn, flags); +} + +void abort_txn(DbEnv *env) +{ + ResourceManager::instance()->abort_txn(env); +} + +void abort_txn(DbEnv *env, DbTxn *txn) +{ + ResourceManager::instance()->abort_txn(env, txn); +} + +DbTxn* current_txn(DbEnv *env) +{ + return ResourceManager::instance()->current_txn(env); +} + +DbTxn* set_current_txn_handle(DbEnv *env, DbTxn *newtxn) +{ + return ResourceManager::instance()-> + set_current_txn_handle(env, newtxn); +} + +void register_db(Db *pdb1) +{ + ResourceManager::instance()->register_db(pdb1); +} + +void register_db_env(DbEnv *env1) +{ + ResourceManager::instance()->register_db_env(env1); +} + +Db* open_db (DbEnv *penv, const char *filename, DBTYPE dbtype, + u_int32_t oflags, u_int32_t set_flags, int mode, + DbTxn *txn, u_int32_t cflags, const char *dbname) +{ + return ResourceManager::instance()->open_db( + penv, filename, dbtype, oflags, set_flags, mode, txn, + cflags, dbname); +} + +DbEnv* open_env(const char *env_home, u_int32_t set_flags, + u_int32_t oflags, u_int32_t cachesize, int mode, u_int32_t cflags) +{ + return ResourceManager::instance()->open_env( + env_home, set_flags, oflags, cachesize, mode, cflags); +} + +void close_db_env(DbEnv *pdbenv) +{ + + ResourceManager::instance()->close_db_env(pdbenv); +} + +void close_all_db_envs() +{ + ResourceManager::instance()->close_all_db_envs(); +} + +size_t close_db_cursors(Db *dbp1) +{ + return ResourceManager::instance()->close_db_cursors(dbp1); +} + +db_mutex_t alloc_mutex() +{ + int ret; + db_mutex_t mtx; + + BDBOP2(ResourceManager::instance()->get_mutex_env()->mutex_alloc( + DB_MUTEX_PROCESS_ONLY, &mtx), ret, ResourceManager:: + instance()->get_mutex_env()->mutex_free(mtx)); + return mtx; +} + +int lock_mutex(db_mutex_t mtx) +{ + int ret; + + BDBOP2(ResourceManager::instance()->global_lock(mtx), ret, + ResourceManager:: + instance()->get_mutex_env()->mutex_free(mtx)); + return 0; +} + +int unlock_mutex(db_mutex_t mtx) +{ + int ret; + + BDBOP2(ResourceManager::instance()->global_unlock(mtx), ret, + ResourceManager:: + instance()->get_mutex_env()->mutex_free(mtx)); + return 0; +} + +void free_mutex(db_mutex_t mtx) +{ + ResourceManager::instance()->get_mutex_env()->mutex_free(mtx); +} + +void dbstl_startup() +{ + ResourceManager::instance()->global_startup(); +} + +void dbstl_exit() +{ + ResourceManager::instance()->global_exit(); +} + +void dbstl_thread_exit() +{ + ResourceManager::thread_exit(); +} + +// Internally used only. +void throw_bdb_exception(const char *caller, int error) +{ + switch (error) { + case DB_LOCK_DEADLOCK: + { + DbDeadlockException dl_except(caller); + throw dl_except; + } + case DB_LOCK_NOTGRANTED: + { + DbLockNotGrantedException lng_except(caller); + throw lng_except; + } + case DB_REP_HANDLE_DEAD: + { + DbRepHandleDeadException hd_except(caller); + throw hd_except; + } + case DB_RUNRECOVERY: + { + DbRunRecoveryException rr_except(caller); + throw rr_except; + } + default: + { + DbException except(caller, error); + throw except; + } + } +} + +void register_global_object(DbstlGlobalInnerObject *gio) +{ + ResourceManager::instance()->register_global_object(gio); +} + +u_int32_t hash_default(Db * /* dbp */, const void *key, u_int32_t len) +{ + const u_int8_t *k, *e; + u_int32_t h; + + k = (const u_int8_t *)key; + e = k + len; + for (h = 0; k < e; ++k) { + h *= 16777619; + h ^= *k; + } + return (h); +} + +bool DbstlMultipleDataIterator::next(Dbt &data) +{ + if (*p_ == (u_int32_t)-1) { + data.set_data(0); + data.set_size(0); + p_ = 0; + } else { + data.set_data(data_ + *p_--); + data.set_size(*p_--); + if (data.get_size() == 0 && data.get_data() == data_) + data.set_data(0); + } + return (p_ != 0); +} + +bool DbstlMultipleKeyDataIterator::next(Dbt &key, Dbt &data) +{ + if (*p_ == (u_int32_t)-1) { + key.set_data(0); + key.set_size(0); + data.set_data(0); + data.set_size(0); + p_ = 0; + } else { + key.set_data(data_ + *p_); + p_--; + key.set_size(*p_); + p_--; + data.set_data(data_ + *p_); + p_--; + data.set_size(*p_); + p_--; + } + return (p_ != 0); +} + +bool DbstlMultipleRecnoDataIterator::next(db_recno_t &recno, Dbt &data) +{ + if (*p_ == (u_int32_t)0) { + recno = 0; + data.set_data(0); + data.set_size(0); + p_ = 0; + } else { + recno = *p_--; + data.set_data(data_ + *p_--); + data.set_size(*p_--); + } + return (p_ != 0); +} + +END_NS + diff --git a/lang/cxx/stl/dbstl_container.h b/lang/cxx/stl/dbstl_container.h new file mode 100644 index 00000000..3690862a --- /dev/null +++ b/lang/cxx/stl/dbstl_container.h @@ -0,0 +1,582 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_STL_CONTAINER_H__ +#define _DB_STL_CONTAINER_H__ + +#include "dbstl_common.h" +#include "dbstl_resource_manager.h" +#include + +START_NS(dbstl) + +class ResourceManager; + +////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////// +// +// db_container class definition +// +// This class's begin_txn, commit/abort_txn is used to wrap each DB related +// operation inside dbstl. When auto commit is enabled, each operation will +// be auto committed before returning, and aborted when an exception is thrown. +// +// It also contains members to hold all needed parameters and flags for +// transaction and cursor related calls. +// +// Container classes will inherit from this class. Each container will enclose +// every db related operation with db_container::begin_txn and +// db_container::commit_txn, and if exception is not caught, abort_txn() +// should be called. + +/** \defgroup dbstl_containers dbstl container classes +A dbstl container is very much like a C++ STL container. It stores a +collection of data items, or key/data pairs. +Each container is backed by a Berkeley DB database created in an explicit +database environment or an internal private environment; And the database +itself can be created explicitly with all kinds of configurations, or +by dbstl internally. For each type of container, some specific type of +database and/or configurations must be used or specified to the database +and its environment. dbstl will check the database and environment conform +to the requirement. When users don't have a chance to specify a container's +backing database and environment, like in copy constructors, dbstl will +create proper databases and/or environment for it. There are two helper +functions to make it easier to create/open an environment or database, they +are dbstl::open_db() and dbstl::open_env(); +\sa dbstl::open_db() dbstl::open_env() db_vector db_map db_multimap db_set +db_multiset +*/ + +/** \ingroup dbstl_containers +@{ +This class is the base class for all db container classes, you don't directly +use this class, but all container classes inherit from this class, so you need +to know the methods that can be accessed via concrete container classes. +This class is also used to support auto commit transactions. Autocommit is +enabled when DB_AUTO_COMMIT is set to the database or database environment +handle and the environment is transactional. + +Inside dbstl, there are transactions begun and committed/aborted if the backing +database and/or environment requires auto commit, and there are cursors +opened internally, and you can set the flags used by the transaction and cursor +functions via set functions of this class. + +All dbstl containers are fully multi-threaded, you should not need any +synchronization to use them in the correct way, but this class is not thread +safe, access to its members are not proctected by any mutex because the data +members of this class are supposed to be set before they are used, +and remain read only afterwards. If this is not the case, you must synchronize +the access. +*/ +class _exported db_container +{ +private: + // By default these flags are 0, users should configure these values + // on container initialization. + // + u_int32_t txn_begin_flags_, commit_flags_; + mutable u_int32_t cursor_oflags_; + + // Berkeley DB database handle for each container. Subclasses do not + // need to define it. + // + Db *pdb_; + + // Berkeley DB environment handle in which the db handle is opened. + DbEnv *dbenv_; + + // db_map_iterator<> needs to know whether the container is a + // db_(multi)set or not + // + bool is_set_; + + // Determined automatically, by inspecting users the Berkeley DB + // database and environment configuration options. + // + bool auto_commit_; + + // If exisiting random temporary database name generation mechanism is + // still causing name clashes, users can set this global suffix number + // which will be append to each temporary database file name and by + // default it is 0. there is a dbstl::set_global_dbfile_suffix_number + // to do so. + static u_int32_t g_db_file_suffix_; + friend void set_global_dbfile_suffix_number(u_int32_t); +protected: + + // Does not clone or copy data from database parameter. + // Return the db file name back in the dbfname parameter. + // We construct a default database name the user can rename it using + // either DbEnv::dbrename or Db::rename. + // + Db* clone_db_config(Db *dbp, std::string &dbfname); + Db* clone_db_config(Db *dbp); + + // Store the name into name parameter whose length is n. Return -1 if + // not enough space. + int construct_db_file_name(std::string &filename) const; + + // Check that this container and cntnr are backed by different databases + // and if any one of them is using transactions, both should be in the + // same transactional environment. + // Called by all deriving classes' methods + // which have a container parameter. + void verify_db_handles(const db_container &cntnr) const; + + void open_db_handles(Db *&pdb, DbEnv *&penv, DBTYPE dbtype, + u_int32_t oflags, u_int32_t sflags); + + inline void set_is_set(bool b) + { + is_set_ = b; + } + + inline bool is_set() const + { + return is_set_; + } + + // Database and environment handles and autocommit and is_set_ are + // not assigned, because they are the nature of my own db, not + // that of dbctnr. + inline const db_container &operator=(const db_container & dbctnr) + { + ASSIGNMENT_PREDCOND(dbctnr) + txn_begin_flags_ = dbctnr.txn_begin_flags_; + commit_flags_ = dbctnr.commit_flags_; + cursor_oflags_ = dbctnr.cursor_oflags_; + return dbctnr; + } + +public: + /// Default constructor. + db_container(); + + /// Copy constructor. + /// The new container will be backed by another database within the + /// same environment unless dbctnr's backing database is in its own + /// internal private environment. The name of the database is coined + /// based on current time and thread id and some random number. If + /// this is still causing naming clashes, you can set a suffix number + /// via "set_global_dbfile_suffix_number" function; And following db + /// file will suffix this number in the file name for additional + /// randomness. And the suffix will be incremented after each such use. + /// You can change the file name via DbEnv::rename. + /// If dbctnr is using an anonymous database, the newly constructed + /// container will also use an anonymous one. + /// \param dbctnr The container to initialize this container. + db_container(const db_container &dbctnr); + + /** + This constructor is not directly called by the user, but invoked by + constructors of concrete container classes. The statement about the + parameters applies to constructors of all container classes. + \param dbp Database handle. dbp is supposed to be opened inside envp. + Each dbstl container is backed by a Berkeley DB database, so dbstl + will create an internal anonymous database if dbp is NULL. + \param envp Environment handle. And envp can also be NULL, meaning the + dbp handle may be created in its internal private environment. + */ + db_container(Db *dbp, DbEnv *envp); + + /// The backing database is not closed in this function. It is closed + /// when current thread exits and the database is no longer referenced + /// by any other container instances in this process. + /// In order to make the reference counting work alright, you must call + /// register_db(Db*) and register_db_env(DbEnv*) correctly. + /// \sa register_db(Db*) register_db_env(DbEnv*) + virtual ~db_container(){} + + /// \name Get and set functions for data members. + /// Note that these functions are not thread safe, because all data + /// members of db_container are supposed to be set on container + /// construction and initialization, and remain read only afterwards. + //@{ + /// Get the backing database's open flags. + /// \return The backing database's open flags. + inline u_int32_t get_db_open_flags() const + { + u_int32_t oflags; + pdb_->get_open_flags(&oflags); + return oflags; + } + + /// Get the backing database's flags that are set via Db::set_flags() + /// function. + /// \return Flags set to this container's database handle. + inline u_int32_t get_db_set_flags() const + { + u_int32_t oflags; + pdb_->get_flags(&oflags); + return oflags; + } + + /// Get the backing database's handle. + /// \return The backing database handle of this container. + inline Db* get_db_handle() const + { + return pdb_; + } + + /// Get the backing database environment's handle. + /// \return The backing database environment handle of this container. + inline DbEnv* get_db_env_handle() const + { + return dbenv_; + } + + /** + Set the underlying database's handle, and optionally environment + handle if the environment has also changed. That is, users can change + the container object's underlying database while the object is alive. + dbstl will verify that the handles set conforms to the concrete + container's requirement to Berkeley DB database/environment handles. + \param dbp The database handle to set. + \param newenv The database environment handle to set. + */ + void set_db_handle(Db *dbp, DbEnv *newenv = NULL); + + /** Set the flags required by the Berkeley DB functions + DbEnv::txn_begin(), DbTxn::commit() and DbEnv::cursor(). These flags + will be set to this container's auto commit member functions when + auto commit transaction is used, except that cursor_oflags is set to + the Dbc::cursor when creating an iterator for this container. + By default the three flags are all zero. + You can also set the values of the flags individually by using the + appropriate set functions in this class. The corresponding get + functions return the flags actually used. + \param txn_begin_flags Flags to be set to DbEnv::txn_begin(). + \param commit_flags Flags to be set to DbTxn::commit(). + \param cursor_open_flags Flags to be set to Db::cursor(). + */ + inline void set_all_flags(u_int32_t txn_begin_flags, + u_int32_t commit_flags, u_int32_t cursor_open_flags) + { + this->txn_begin_flags_ = txn_begin_flags; + this->commit_flags_ = commit_flags; + this->cursor_oflags_ = cursor_open_flags; + } + + /// Set flag of DbEnv::txn_begin() call. + /// \param flag Flags to be set to DbEnv::txn_begin(). + inline void set_txn_begin_flags(u_int32_t flag ) + { + txn_begin_flags_ = flag; + } + + /// Get flag of DbEnv::txn_begin() call. + /// \return Flags to be set to DbEnv::txn_begin(). + inline u_int32_t get_txn_begin_flags() const + { + return txn_begin_flags_; + } + + /// Set flag of DbTxn::commit() call. + /// \param flag Flags to be set to DbTxn::commit(). + inline void set_commit_flags(u_int32_t flag) + { + commit_flags_ = flag; + } + + /// Get flag of DbTxn::commit() call. + /// \return Flags to be set to DbTxn::commit(). + inline u_int32_t get_commit_flags() const + { + return commit_flags_; + } + + /// Get flag of Db::cursor() call. + /// \return Flags to be set to Db::cursor(). + inline u_int32_t get_cursor_open_flags() const + { + return cursor_oflags_; + } + + /// Set flag of Db::cursor() call. + /// \param flag Flags to be set to Db::cursor(). + inline void set_cursor_open_flags(u_int32_t flag) + { + cursor_oflags_ = flag; + } + //@} // getter_setters + +protected: + void init_members(); + void init_members(Db *pdb, DbEnv *env); + void init_members(const db_container&cnt); + // Called internally by concrete container constructors. Verification + // is done by the specialized constructors + void set_db_handle_int(Db *dbp, DbEnv *envp) + { + this->pdb_ = dbp; + this->dbenv_ = envp; + } + + // Child classes override this function to check the db and environment + // handles are correctly configured. + virtual const char* verify_config(Db* pdb, DbEnv* penv) const + { + if (pdb != NULL && ((pdb->get_create_flags() & + DB_CXX_NO_EXCEPTIONS) == 0)) + return +"Db and DbEnv object must be constructed with DB_CXX_NO_EXCEPTIONS flag set."; + + if (penv != NULL && ((penv->get_create_flags() & + DB_CXX_NO_EXCEPTIONS) == 0)) + return +"Db and DbEnv object must be constructed with DB_CXX_NO_EXCEPTIONS flag set."; + return NULL; + } + + // Use db and dbenv_ to determine whether to enable autocommit. If + // DB_AUTOCOMMIT is set on dbenv_ or db and dbenv_ is transactional, + // that db should be autocommit. + // + void set_auto_commit(Db* db); + + // Begin a transaction. Used to make a container's db related + // operations auto commit when the operation completes and abort + // when the operation fails. If there is already a transaction for this + // container's environment, then that transaction is used. If the + // transaction was created by DB STL, its reference count is + // incremented (user created and external transactions are not + // reference counted because they can be nested.). + inline DbTxn* begin_txn() const + { + DbTxn *txn = NULL; + + if (this->auto_commit_) { + txn = ResourceManager::instance()->begin_txn( + this->txn_begin_flags_, dbenv_, 0); + } + return txn; + } + + inline void commit_txn() const + { + if (this->auto_commit_) { + ResourceManager::instance()-> + commit_txn(pdb_->get_env(), this->commit_flags_); + } + } + + inline void abort_txn() const + { + if (this->auto_commit_) + ResourceManager::instance()-> + abort_txn(pdb_->get_env()); + } + + inline DbTxn *current_txn() const + { + return ResourceManager::instance()-> + current_txn(pdb_->get_env()); + } +}; // db_container +/** @} */ // dbstl_containers + +/// \addtogroup dbstl_helper_classes +//@{ +/// Bulk retrieval configuration helper class. Used by the begin() function of +/// a container. +class _exported BulkRetrievalOption +{ +public: + enum Option {BulkRetrieval, NoBulkRetrieval}; + +protected: + Option bulk_retrieve; + u_int32_t bulk_buf_sz_; + + inline BulkRetrievalOption() + { + bulk_retrieve = NoBulkRetrieval; + bulk_buf_sz_ = DBSTL_BULK_BUF_SIZE; + } + +public: + inline BulkRetrievalOption(Option bulk_retrieve1, + u_int32_t bulk_buf_sz = DBSTL_BULK_BUF_SIZE) + { + this->bulk_retrieve = bulk_retrieve1; + this->bulk_buf_sz_ = bulk_buf_sz; + } + + // The following two static members should best be const, but this is + // impossible because in C++ this definition is a function declaration: + // const static BulkRetrievalOption BulkRetrieval(BulkRetrieval); + // i.e. const static members can only be inited with default copy + // constructor. + // + // Static data members can't be compiled into a .lib for a dll on + // Windows, code using the static members will have link error--- + // unresolved symbols, so we have to use a static function here. + /// This function indicates that you need a bulk retrieval iterator, + /// and it can be also used to optionally set the bulk read buffer size. + inline static BulkRetrievalOption bulk_retrieval( + u_int32_t bulk_buf_sz = DBSTL_BULK_BUF_SIZE) + { + return BulkRetrievalOption(BulkRetrieval, bulk_buf_sz); + } + + /// This function indicates that you do not need a bulk retrieval + /// iterator. + inline static BulkRetrievalOption no_bulk_retrieval() + { + return BulkRetrievalOption(NoBulkRetrieval, 0); + } + + /// Equality comparison. + inline bool operator==(const BulkRetrievalOption& bro) const + { + return bulk_retrieve == bro.bulk_retrieve; + } + + /// Assignment operator. + inline void operator=(BulkRetrievalOption::Option opt) + { + bulk_retrieve = opt; + } + + /// Return the buffer size set to this object. + inline u_int32_t bulk_buf_size() + { + return bulk_buf_sz_; + } +}; +//@} + +/// \addtogroup dbstl_helper_classes +//@{ +/// Read-modify-write cursor configuration helper class. Used by each begin() +/// function of all containers. +class _exported ReadModifyWriteOption +{ +protected: + enum Option{ReadModifyWrite, NoReadModifyWrite}; + Option rmw; + + inline ReadModifyWriteOption(Option rmw1) + { + this->rmw = rmw1; + } + + inline ReadModifyWriteOption() + { + rmw = NoReadModifyWrite; + } + +public: + /// Assignment operator. + inline void operator=(ReadModifyWriteOption::Option rmw1) + { + this->rmw = rmw1; + } + + /// Equality comparison. + inline bool operator==(const ReadModifyWriteOption &rmw1) const + { + return this->rmw == rmw1.rmw; + } + + /// Call this function to tell the container's begin() function that + /// you need a read-modify-write iterator. + inline static ReadModifyWriteOption read_modify_write() + { + return ReadModifyWriteOption(ReadModifyWrite); + } + + /// Call this function to tell the container's begin() function that + /// you do not need a read-modify-write iterator. This is the default + /// value for the parameter of any container's begin() function. + inline static ReadModifyWriteOption no_read_modify_write() + { + return ReadModifyWriteOption(NoReadModifyWrite); + } + +}; +//@} // dbstl_helper_classes + +// The classes in the Berkeley DB C++ API do not expose data and p_ member. +// Extend the class to provide this functionality, rather than altering the +// internal implementations. +// +class _exported DbstlMultipleIterator : protected DbMultipleIterator +{ +protected: + DbstlMultipleIterator(const Dbt &dbt) : DbMultipleIterator(dbt) {} +public: + u_int32_t get_pointer() + { + u_int32_t off; + off = (u_int32_t)((u_int8_t*)p_ - data_); + return off; + } + + inline void set_pointer(u_int32_t offset) + { + p_ = (u_int32_t*)(data_ + offset); + } + +}; + +class _exported DbstlMultipleKeyDataIterator : public DbstlMultipleIterator +{ +public: + DbstlMultipleKeyDataIterator(const Dbt &dbt) + : DbstlMultipleIterator(dbt) {} + bool next(Dbt &key, Dbt &data); +}; + +class _exported DbstlMultipleRecnoDataIterator : public DbstlMultipleIterator +{ +public: + DbstlMultipleRecnoDataIterator(const Dbt &dbt) + : DbstlMultipleIterator(dbt) {} + bool next(db_recno_t &recno, Dbt &data); +}; + +class _exported DbstlMultipleDataIterator : public DbstlMultipleIterator +{ +public: + DbstlMultipleDataIterator(const Dbt &dbt) + : DbstlMultipleIterator(dbt) {} + bool next(Dbt &data); +}; + +// These classes are used to give data values meaningful default +// initializations. They are only necessary for types that do not have a +// reasonable default constructor - explicitly char *, wchar_t * and T*. +// The string types don't have a reasonable default initializer since we store +// the underlying content, not a pointer. +// So we fully instantiate types for T* and const T* and set the pointers to +// NULL and leave other types intact. +template +class DbstlInitializeDefault +{ +public: + DbstlInitializeDefault(T&){} +}; + +template +class DbstlInitializeDefault +{ +public: + DbstlInitializeDefault(T *& t){t = NULL;} +}; + +template +class DbstlInitializeDefault +{ +public: + DbstlInitializeDefault(const T *& t){t = NULL;} +}; + +END_NS + +#endif //_DB_STL_CONTAINER_H__ diff --git a/lang/cxx/stl/dbstl_dbc.h b/lang/cxx/stl/dbstl_dbc.h new file mode 100644 index 00000000..7304af77 --- /dev/null +++ b/lang/cxx/stl/dbstl_dbc.h @@ -0,0 +1,1328 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_STL_DBC_H +#define _DB_STL_DBC_H + +#include + +#include + +#include "dbstl_common.h" +#include "dbstl_dbt.h" +#include "dbstl_exception.h" +#include "dbstl_container.h" +#include "dbstl_resource_manager.h" + +START_NS(dbstl) + +// Forward declarations. +class db_container; +class DbCursorBase; +template +class RandDbCursor; +class DbstlMultipleKeyDataIterator; +class DbstlMultipleRecnoDataIterator; +using std::set; + +///////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////// +// +// LazyDupCursor class template definition. +// +// This class allows us to make a shallow copy on construction. When the +// cursor pointer is first dereferenced a deep copy is made. +// +// The allowed type for BaseType is DbCursor<> and RandDbCursor<> +// The expected usage of this class is: +// 1. Create an iterator in container::begin(), the iterator::pcsr.csr_ptr_ +// points to an object, thus no need to duplicate. +// 2. The iterator is created with default argument, thus the +// iterator::pcsr.csr_ptr_ and dup_src_ is NULL, and this iterator is +// copied using copy constructor for may be many times, but until the +// cursor is really used, no cursor is duplicated. +// +// There is an informing mechanism between an instance of this class and +// its dup_src_ cursor: when that cursor is about to change state, it will +// inform all registered LazyDupCursor "listeners" of the change, so that +// they will duplicate from the cursor before the change, because that +// is the expected cursor state for the listeners. + +template +class LazyDupCursor +{ + // dup_src_ is used by this class internally to duplicate another + // cursor and set to csr_ptr_, and it is assigned in the copy + // constructor from another LazyDupCursor object's csr_ptr_; csr_ptr_ + // is the acutual pointer that is used to perform cursor operations. + // + BaseType *csr_ptr_, *dup_src_; + typedef LazyDupCursor self; + +public: + //////////////////////////////////////////////////////////////////// + // + // Begin public constructors and destructor. + // + inline LazyDupCursor() + { + csr_ptr_ = NULL; + dup_src_ = NULL; + } + + // Used in all iterator types' constructors, dbcptr is created + // solely for this object, and the cursor is not yet opened, so we + // simply assign it to csr_ptr_. + explicit inline LazyDupCursor(BaseType *dbcptr) + { + csr_ptr_ = dbcptr; + // Already have pointer, do not need to duplicate. + dup_src_ = NULL; + } + + // Do not copy to csr_ptr_, shallow copy from dp2.csr_ptr_. + LazyDupCursor(const self& dp2) + { + csr_ptr_ = NULL; + if (dp2.csr_ptr_) + dup_src_ = dp2.csr_ptr_; + else + dup_src_ = dp2.dup_src_; + if (dup_src_) + dup_src_->add_dupper(this); + } + + ~LazyDupCursor() + { + // Not duplicated yet, remove from dup_src_. + if (csr_ptr_ == NULL && dup_src_ != NULL) + dup_src_->erase_dupper(this); + if (csr_ptr_) + delete csr_ptr_;// Delete the cursor. + } + + //////////////////////////////////////////////////////////////////// + + // Deep copy. + inline const self& operator=(const self &dp2) + { + BaseType *dcb; + + dcb = dp2.csr_ptr_ ? dp2.csr_ptr_ : dp2.dup_src_; + this->operator=(dcb); + + return dp2; + } + + // Deep copy. + inline BaseType *operator=(BaseType *dcb) + { + + if (csr_ptr_) { + // Only dup_src_ will inform this, not csr_ptr_. + delete csr_ptr_; + csr_ptr_ = NULL; + } + + if (dcb) + csr_ptr_ = new BaseType(*dcb); + if (dup_src_ != NULL) { + dup_src_->erase_dupper(this); + dup_src_ = NULL; + } + + return dcb; + } + + void set_cursor(BaseType *dbc) + { + assert(dbc != NULL); + if (csr_ptr_) { + // Only dup_src_ will inform this, not csr_ptr_. + delete csr_ptr_; + csr_ptr_ = NULL; + } + + csr_ptr_ = dbc; + if (dup_src_ != NULL) { + dup_src_->erase_dupper(this); + dup_src_ = NULL; + } + } + + // If dup_src_ is informing this object, pass false parameter. + inline BaseType* duplicate(bool erase_dupper = true) + { + assert(dup_src_ != NULL); + if (csr_ptr_) { + // Only dup_src_ will inform this, not csr_ptr_. + delete csr_ptr_; + csr_ptr_ = NULL; + } + csr_ptr_ = new BaseType(*dup_src_); + if (erase_dupper) + dup_src_->erase_dupper(this); + dup_src_ = NULL; + return csr_ptr_; + } + + inline BaseType* operator->() + { + if (csr_ptr_) + return csr_ptr_; + + return duplicate(); + } + + inline operator bool() + { + return csr_ptr_ != NULL; + } + + inline bool operator!() + { + return !csr_ptr_; + } + + inline bool operator==(void *p) + { + return csr_ptr_ == p; + } + + inline BaseType* base_ptr(){ + if (csr_ptr_) + return csr_ptr_; + return duplicate(); + } +}; + + +///////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////// +// +// DbCursorBase class definition. +// +// DbCursorBase is the base class for DbCursor<> class template, this class +// wraps the Berkeley DB cursor, in order for the ResourceManager to close +// the Berkeley DB cursor and set the pointer to null. +// If we don't set the cursor to NULL, the handle could become valid again, +// since Berkeley DB recycles handles. DB STL would then try to use the same +// handle across different instances, which is not supported. +// +// In ResourceManager, whenver a cursor is opened, it stores the +// DbCursorBase* pointer, so that when need to close the cursor, it calls +// DbCursorBase::close() function. +// +class DbCursorBase +{ +protected: + Dbc *csr_; + DbTxn *owner_txn_; + Db *owner_db_; + int csr_status_; + +public: + enum DbcGetSkipOptions{SKIP_KEY, SKIP_DATA, SKIP_NONE}; + inline DbTxn *get_owner_txn() const { return owner_txn_;} + inline void set_owner_txn(DbTxn *otxn) { owner_txn_ = otxn;} + + inline Db *get_owner_db() const { return owner_db_;} + inline void set_owner_db(Db *odb) { owner_db_ = odb;} + + inline Dbc *get_cursor() const { return csr_;} + inline Dbc *&get_cursor_reference() { return csr_;} + inline void set_cursor(Dbc*csr1) + { + if (csr_) + ResourceManager::instance()->remove_cursor(this); + csr_ = csr1; + } + + inline int close() + { + int ret = 0; + + if (csr_ != NULL && (((DBC *)csr_)->flags & DBC_ACTIVE) != 0) { + ret = csr_->close(); + csr_ = NULL; + } + return ret; + } + + DbCursorBase(){ + owner_txn_ = NULL; + owner_db_ = NULL; + csr_ = NULL; + csr_status_ = 0; + } + + DbCursorBase(const DbCursorBase &csrbase) + { + this->operator=(csrbase); + } + + const DbCursorBase &operator=(const DbCursorBase &csrbase) + { + owner_txn_ = csrbase.owner_txn_; + owner_db_ = csrbase.owner_db_; + csr_ = NULL; // Need to call DbCursor<>::dup to duplicate. + csr_status_ = 0; + return csrbase; + } + + virtual ~DbCursorBase() + { + close(); + } +}; // DbCursorBase + +//////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////// +// +// DbCursor class template definition +// +// DbCursor is the connection between Berkeley DB and dbstl container classes +// it is the wrapper class for Dbc* cursor of Berkeley Db, to be used for +// iterator classes of Berkeley DB backed STL container classes. +// Requirement: +// 1. Deep copy using Dbc->dup. +// 2. Dbc*cursor management via ResourceManager class. +// 3. Provide methods to do increment, decrement and advance operations, +// advance is only available for random access iterator from DB_RECNO +// containers. +// + +template +class DbCursor : public DbCursorBase{ +protected: + // Lazy duplication support: store the LazyDupCursor objects which + // will duplicate from this cursor. + typedef LazyDupCursor > dupper_t; + typedef LazyDupCursor > dupperr_t; + typedef set >* > dupset_t; + typedef set >* > dupsetr_t; + + set >* > sduppers1_; + set >* > sduppers2_; + + // We must use DB_DBT_USERMEM for Dbc::get and Db::get if they are + // used in multi-threaded application, so we use key_buf_ and + // data_buf_ data members for get operations, and initialize them + // to use user memory. + Dbt key_buf_, data_buf_; + + // Similar to Berkeley DB C++ API's classes, used to iterate through + // bulk retrieved key/data pairs. + DbstlMultipleKeyDataIterator *multi_itr_; + DbstlMultipleRecnoDataIterator *recno_itr_; + + // Whether to use bulk retrieval. If non-zero, do bulk retrieval, + // bulk buffer size is this member, otherwise not bulk read. + // By default this member is 0. + u_int32_t bulk_retrieval_; + // Whether to use DB_RMW flag in Dbc::get, by default false. + bool rmw_get_; + + // Whether to poll data from cursor's current position on every + // get_current_key/data call. + // Note that curr_key_/curr_data_ members are always maintained + // to contain current k/d value of the pair pointed to by csr_. + // If doing bulk retrieval, this flag is ignored, we will always + // read data from bulk buffer. + bool directdb_get_; + + // Inform LazyDupCursor objects registered in this object to do + // duplication because this cursor is to be changed. + // This function should be called in any function of + // DbCursor and RandDbCursor whenever the cursor is about to change + // state(move/close, etc). + inline void inform_duppers() + { + typename dupset_t::iterator i1; + typename dupsetr_t::iterator i2; + for (i1 = sduppers1_.begin(); i1 != sduppers1_.end(); i1++) + (*i1)->duplicate(false); + for (i2 = sduppers2_.begin(); i2 != sduppers2_.end(); i2++) + (*i2)->duplicate(false); + sduppers1_.clear(); + sduppers2_.clear(); + } + +public: + friend class DataItem; + + // Current key/data pair pointed by "csr_" Dbc*cursor. They are both + // maintained on cursor movement. If directdb_get_ is true, + // they are both refreshed on every get_current{[_key][_data]} call and + // the retrieved key/data pair is returned to user. + DataItem curr_key_; + DataItem curr_data_; + + typedef DbCursor self; + + // This function is used by all iterators to do equals comparison. + // Random iterators will also use it to do less than/greater than + // comparisons. + // Internally, the page number difference or index difference is + // returned, so for btree and hash databases, if two cursors point to + // the same key/data pair, we will get 0 returned, meaning they are + // equal; if return value is not 0, it means no more than that they + // they are not equal. We can't assume any order information between + // the two cursors. For recno databases, we use the recno to do less + // than and greater than comparison. So we can get a reliable knowledge + // of the relative position of two iterators from the return value. + int compare(const self *csr2) const{ + int res, ret; + + BDBOP(((DBC *)csr_)->cmp((DBC *)csr_, (DBC *)csr2->csr_, + &res, 0), ret); + return res; + } + + //////////////////////////////////////////////////////////////////// + // + // Add and remove cursor change event listeners. + // + inline void add_dupper(dupper_t *dupper) + { + sduppers1_.insert(dupper); + } + + inline void add_dupper(dupperr_t *dupper) + { + sduppers2_.insert(dupper); + } + + inline void erase_dupper(dupper_t *dup1) + { + sduppers1_.erase(dup1); + } + + inline void erase_dupper(dupperr_t *dup1) + { + sduppers2_.erase(dup1); + } + + //////////////////////////////////////////////////////////////////// + +public: + + inline bool get_rmw() + { + return rmw_get_; + } + + bool set_rmw(bool rmw, DB_ENV *env = NULL ) + { + u_int32_t flag = 0; + DB_ENV *dbenv = NULL; + int ret; + + if (env) + dbenv = env; + else + dbenv = ((DBC*)csr_)->dbenv; + BDBOP(dbenv->get_open_flags(dbenv, &flag), ret); + + // DB_RMW flag requires locking subsystem started. + if (rmw && ((flag & DB_INIT_LOCK) || (flag & DB_INIT_CDB) || + (flag & DB_INIT_TXN))) + rmw_get_ = true; + else + rmw_get_ = false; + return rmw_get_; + } + + // Modify bulk buffer size. Bulk read is enabled when creating an + // iterator, so users later can only modify the bulk buffer size + // to another value, but can't enable/disable bulk read while an + // iterator is already alive. + // Returns true if succeeded, false otherwise. + inline bool set_bulk_buffer(u_int32_t sz) + { + if (bulk_retrieval_ && sz) { + normalize_bulk_bufsize(sz); + bulk_retrieval_ = sz; + return true; + } + + return false; + + } + + inline u_int32_t get_bulk_bufsize() + { + return bulk_retrieval_; + } + + inline void enlarge_dbt(Dbt &d, u_int32_t sz) + { + void *p; + + p = DbstlReAlloc(d.get_data(), sz); + dbstl_assert(p != NULL); + d.set_ulen(sz); + d.set_data(p); + d.set_size(sz); + } + // Move forward or backward, often by 1 key/data pair, we can use + // different flags for Dbc::get function. Then update the key/data + // pair and csr_status_ members. + // + int increment(int flag) + { + int ret = 0; + Dbt &k = key_buf_, &d = data_buf_; + u_int32_t sz, getflags = 0, bulk_bufsz; + + if (csr_ == NULL) + return INVALID_ITERATOR_CURSOR; + curr_key_.reset(); + curr_data_.reset(); + inform_duppers(); + + // Berkeley DB cursor flags are not bitwise set, so we can't + // use bit operations here. + // + if (this->bulk_retrieval_ != 0) + switch (flag) { + case DB_PREV: + case DB_PREV_DUP: + case DB_PREV_NODUP: + case DB_LAST: + case DB_JOIN_ITEM: + case DB_GET_RECNO: + case DB_SET_RECNO: + break; + default: + getflags |= DB_MULTIPLE_KEY; + if (data_buf_.get_ulen() != bulk_retrieval_) + enlarge_dbt(data_buf_, bulk_retrieval_); + break; + } + + if (this->rmw_get_) + getflags |= DB_RMW; + + // Do not use BDBOP or BDBOP2 here because it is likely + // that an iteration will step onto end() position. +retry: ret = csr_->get(&k, &d, flag | getflags); + if (ret == 0) { + if (bulk_retrieval_ && (getflags & DB_MULTIPLE_KEY)) { + // A new retrieval, so both multi_itr_ and + // recno_itr_ must be NULL. + if (((DBC*)csr_)->dbtype == DB_RECNO) { + if (recno_itr_) { + delete recno_itr_; + recno_itr_ = NULL; + } + recno_itr_ = + new DbstlMultipleRecnoDataIterator(d); + } else { + if (multi_itr_) { + delete multi_itr_; + multi_itr_ = NULL; + } + multi_itr_ = new + DbstlMultipleKeyDataIterator(d); + } + } else { + // Non bulk retrieval succeeded. + curr_key_.set_dbt(k, false); + curr_data_.set_dbt(d, false); + limit_buf_size_after_use(); + } + } else if (ret == DB_BUFFER_SMALL) { + // Either the key or data DBTs might trigger a + // DB_KEYSMALL return. Only enlarge the DBT if it + // is actually too small. + if (((sz = d.get_size()) > 0) && (sz > d.get_ulen())) + enlarge_dbt(d, sz); + + if (((sz = k.get_size()) > 0) && (sz > k.get_ulen())) + enlarge_dbt(k, sz); + + goto retry; + } else { + if (ret == DB_NOTFOUND) { + ret = INVALID_ITERATOR_POSITION; + this->curr_key_.reset(); + this->curr_data_.reset(); + } else if (bulk_retrieval_ && + (getflags & DB_MULTIPLE_KEY)){ + BDBOP(((DBC*)csr_)->dbp-> + get_pagesize(((DBC*)csr_)-> + dbp, &bulk_bufsz), ret); + if (bulk_bufsz > d.get_ulen()) {// buf size error + normalize_bulk_bufsize(bulk_bufsz); + bulk_retrieval_ = bulk_bufsz; + enlarge_dbt(d, bulk_bufsz); + goto retry; + } else + throw_bdb_exception( + "DbCursor<>::increment", ret); + } else + throw_bdb_exception( + "DbCursor<>::increment", ret); + } + + csr_status_ = ret; + return ret; + } + + // After each use of key_buf_ and data_buf_, limit their buffer size to + // a reasonable size so that they don't waste a big memory space. + inline void limit_buf_size_after_use() + { + if (bulk_retrieval_) + // Bulk buffer has to be huge, so don't check it. + return; + + if (key_buf_.get_ulen() > DBSTL_MAX_KEY_BUF_LEN) { + key_buf_.set_data(DbstlReAlloc(key_buf_.get_data(), + DBSTL_MAX_KEY_BUF_LEN)); + key_buf_.set_ulen(DBSTL_MAX_KEY_BUF_LEN); + } + if (data_buf_.get_ulen() > DBSTL_MAX_DATA_BUF_LEN) { + data_buf_.set_data(DbstlReAlloc(data_buf_.get_data(), + DBSTL_MAX_DATA_BUF_LEN)); + data_buf_.set_ulen(DBSTL_MAX_DATA_BUF_LEN); + } + } + + // Duplicate this object's cursor and set it to dbc1. + // + inline int dup(DbCursor& dbc1) const + { + Dbc* pcsr = 0; + int ret; + + if (csr_ != 0 && csr_->dup(&pcsr, DB_POSITION) == 0) { + dbc1.set_cursor(pcsr); + dbc1.set_owner_db(this->get_owner_db()); + dbc1.set_owner_txn(this->get_owner_txn()); + ResourceManager::instance()->add_cursor( + this->get_owner_db(), &dbc1); + ret = 0; + } else + ret = ITERATOR_DUP_ERROR; + + return ret; + } + +public: + // Open a cursor, do not move it, it is at an invalid position. + // All cursors should be opened using this method. + // + inline int open(db_container *pdbc, int flags) + { + int ret; + + Db *pdb = pdbc->get_db_handle(); + if (pdb == NULL) + return 0; + if (csr_) // Close before open. + return 0; + ret = ResourceManager::instance()-> + open_cursor(this, pdb, flags); + set_rmw(rmw_get_); + this->csr_status_ = ret; + return ret; + } + + // Move Berkeley DB cursor to specified key k, by default use DB_SET, + // but DB_SET_RANGE can and may also be used. + // + int move_to(const key_dt&k, u_int32_t flag = DB_SET) + { + Dbt &d1 = data_buf_; + int ret; + u_int32_t sz; + DataItem k1(k, true); + + if (csr_ == NULL) + return INVALID_ITERATOR_CURSOR; + + curr_key_.reset(); + curr_data_.reset(); + inform_duppers(); + + // It is likely that k is not in db, causing get(DB_SET) to + // fail, we should not throw an exception because of this. + // + if (rmw_get_) + flag |= DB_RMW; +retry: ret = csr_->get(&k1.get_dbt(), &d1, flag); + if (ret == 0) { + curr_key_ = k1; + curr_data_.set_dbt(d1, false); + limit_buf_size_after_use(); + } else if (ret == DB_BUFFER_SMALL) { + sz = d1.get_size(); + assert(sz > 0); + enlarge_dbt(d1, sz); + goto retry; + } else { + if (ret == DB_NOTFOUND) { + ret = INVALID_ITERATOR_POSITION; + // Invalidate current values because it is + // at an invalid position. + this->curr_key_.reset(); + this->curr_data_.reset(); + } else + throw_bdb_exception("DbCursor<>::move_to", ret); + } + + csr_status_ = ret; + return ret; + } + + // Returns the number of keys equal to the current one. + inline size_t count() + { + int ret; + db_recno_t cnt; + + BDBOP2(csr_->count(&cnt, 0), ret, close()); + return (size_t)cnt; + } + + int insert(const key_dt&k, const data_dt& d, int pos = DB_BEFORE) + { + // !!!XXX: + // We do a deep copy of the input data into a local + // variable. Apparently not doing so causes issues + // when using gcc. Even though the put completes prior + // to returning from this function call. + // It would be best to avoid this additional copy. + int ret; + // (k, d) pair may be a temporary pair, so we must copy them. + DataItem k1(k, false), d1(d, false); + + inform_duppers(); + if (pos == DB_AFTER) { + ret = this->csr_->put(&k1.get_dbt(), &d1.get_dbt(), + pos); + // May be using this flag for an empty database, + // because begin() an iterator of an empty db_vector + // equals its end() iterator, so use DB_KEYLAST to + // retry. + // + if (ret == EINVAL || ret == 0) + return ret; + else if (ret) + throw_bdb_exception("DbCursor<>::insert", ret); + } + if (pos == DB_NODUPDATA) + BDBOP3(this->csr_->put(&k1.get_dbt(), &d1.get_dbt(), + pos), ret, DB_KEYEXIST, close()); + else + BDBOP2(this->csr_->put(&k1.get_dbt(), &d1.get_dbt(), + pos), ret, close()); + this->csr_status_ = ret; + if (ret == 0) { + curr_key_ = k1; + curr_data_ = d1; + } + // This cursor points to the new key/data pair now. + return ret; + } + + // Replace current cursor-pointed data item with d. + inline int replace(const data_dt& d) + { + Dbt k1; + int ret; + // !!!XXX: + // We do a deep copy of the input data into a local + // variable. Apparently not doing so causes issues + // when using gcc. Even though the put completes prior + // to returning from this function call. + // It would be best to avoid this additional copy. + // d may be a temporary object, so we must copy it. + DataItem d1(d, false); + + + BDBOP2(this->csr_->put(&k1, &d1.get_dbt(), DB_CURRENT), + ret, close()); + curr_data_ = d1; // Update current data. + + this->csr_status_ = ret; + return ret; + } + + // Remove old key and insert new key-psuodo_data. First insert then + // move to old key and remove it so that the cursor remains at the + // old key's position, according to DB documentation. + // But from practice I can see + // the cursor after delete seems not at old position because a for + // loop iteration exits prematurelly, not all elements are passed. + // + inline int replace_key(const key_dt&k) + { + data_dt d; + key_dt k0; + int ret; + + this->get_current_key_data(k0, d); + if (k0 == k) + return 0; + + DbCursor csr2; + this->dup(csr2); + // Delete current, then insert new key/data pair. + ret = csr2.del(); + ret = csr2.insert(k, d, DB_KEYLAST); + this->csr_status_ = ret; + + // Now this->csr_ is sitting on an invalid position, its + // iterator is invalidated. Must first move it to the next + // position before using it. + return ret; + } + + inline int del() + { + int ret; + + inform_duppers(); + BDBOP2(csr_->del(0), ret, close()); + + // By default pos.csr_ will stay at where it was after delete, + // which now is an invalid position. So we need to move to + // next to conform to stl specifications, but we don't move it + // here, iterator::erase should move the iterator itself + // forward. + // + this->csr_status_ = ret; + return ret; + } + + // Make sure the bulk buffer is large enough, and a multiple of 1KB. + // This function may be called prior to cursor initialization, it is + // not possible to verify that the buffer size is a multiple of the + // page size here. + u_int32_t normalize_bulk_bufsize(u_int32_t &bulksz) + { + if (bulksz == 0) + return 0; + + while (bulksz < 16 * sizeof(data_dt)) + bulksz *= 2; + + bulksz = bulksz + 1024 - bulksz % 1024; + + return bulksz; + } + + //////////////////////////////////////////////////////////////////// + // + // Begin public constructors and destructor. + // + explicit DbCursor(u_int32_t b_bulk_retrieval = 0, bool brmw1 = false, + bool directdbget = true) : DbCursorBase(), + curr_key_(sizeof(key_dt)), curr_data_(sizeof(data_dt)) + { + u_int32_t bulksz = sizeof(data_dt); // non-bulk + rmw_get_ = brmw1; + this->bulk_retrieval_ = + normalize_bulk_bufsize(b_bulk_retrieval); + recno_itr_ = NULL; + multi_itr_ = NULL; + + if (bulk_retrieval_) { + if (bulksz <= bulk_retrieval_) + bulksz = bulk_retrieval_; + else { + normalize_bulk_bufsize(bulksz); + bulk_retrieval_ = bulksz; + } + } + key_buf_.set_data(DbstlMalloc(sizeof(key_dt))); + key_buf_.set_ulen(sizeof(key_dt)); + key_buf_.set_flags(DB_DBT_USERMEM); + data_buf_.set_data(DbstlMalloc(bulksz)); + data_buf_.set_ulen(bulksz); + data_buf_.set_flags(DB_DBT_USERMEM); + directdb_get_ = directdbget; + } + + // Copy constructor, duplicate cursor here. + DbCursor(const DbCursor& dbc) : + DbCursorBase(dbc), + curr_key_(dbc.curr_key_), curr_data_(dbc.curr_data_) + { + void *pk, *pd; + + dbc.dup(*this); + csr_status_ = dbc.csr_status_; + if (csr_ || dbc.csr_) + this->rmw_get_ = set_rmw(dbc.rmw_get_, + ((DBC*)dbc.csr_)->dbenv); + else + rmw_get_ = dbc.rmw_get_; + + bulk_retrieval_ = dbc.bulk_retrieval_; + + // Now we have to copy key_buf_ and data_buf_ to support + // multiple retrieval. + key_buf_.set_data(pk = DbstlMalloc(dbc.key_buf_.get_ulen())); + key_buf_.set_ulen(dbc.key_buf_.get_ulen()); + key_buf_.set_size(dbc.key_buf_.get_size()); + key_buf_.set_flags(DB_DBT_USERMEM); + memcpy(pk, dbc.key_buf_.get_data(), key_buf_.get_ulen()); + + data_buf_.set_data(pd = DbstlMalloc(dbc.data_buf_.get_ulen())); + data_buf_.set_ulen(dbc.data_buf_.get_ulen()); + data_buf_.set_size(dbc.data_buf_.get_size()); + data_buf_.set_flags(DB_DBT_USERMEM); + memcpy(pd, dbc.data_buf_.get_data(), data_buf_.get_ulen()); + if (dbc.recno_itr_) { + recno_itr_ = new DbstlMultipleRecnoDataIterator( + data_buf_); + recno_itr_->set_pointer(dbc.recno_itr_->get_pointer()); + } else + recno_itr_ = NULL; + if (dbc.multi_itr_) { + multi_itr_ = new DbstlMultipleKeyDataIterator( + data_buf_); + multi_itr_->set_pointer(dbc.multi_itr_->get_pointer()); + + } else + multi_itr_ = NULL; + + directdb_get_ = dbc.directdb_get_; + + // Do not copy sduppers, they are private to each DbCursor<> + // object. + } + + virtual ~DbCursor() + { + close(); // Call close() ahead of freeing following buffers. + free(key_buf_.get_data()); + free(data_buf_.get_data()); + if (multi_itr_) + delete multi_itr_; + if (recno_itr_) + delete recno_itr_; + } + + //////////////////////////////////////////////////////////////////// + + const DbCursor& operator= + (const DbCursor& dbc) + { + void *pk; + u_int32_t ulen; + + DbCursorBase::operator =(dbc); + dbc.dup(*this); + curr_key_ = dbc.curr_key_; + curr_data_ = dbc.curr_data_; + rmw_get_ = dbc.rmw_get_; + this->bulk_retrieval_ = dbc.bulk_retrieval_; + this->directdb_get_ = dbc.directdb_get_; + // Now we have to copy key_buf_ and data_buf_ to support + // bulk retrieval. + key_buf_.set_data(pk = DbstlReAlloc(key_buf_.get_data(), + ulen = dbc.key_buf_.get_ulen())); + key_buf_.set_ulen(ulen); + key_buf_.set_size(dbc.key_buf_.get_size()); + key_buf_.set_flags(DB_DBT_USERMEM); + memcpy(pk, dbc.key_buf_.get_data(), ulen); + + data_buf_.set_data(pk = DbstlReAlloc(key_buf_.get_data(), + ulen = dbc.key_buf_.get_ulen())); + data_buf_.set_ulen(ulen); + data_buf_.set_size(dbc.data_buf_.get_size()); + data_buf_.set_flags(DB_DBT_USERMEM); + memcpy(pk, dbc.key_buf_.get_data(), ulen); + + if (dbc.recno_itr_) { + if (recno_itr_) { + delete recno_itr_; + recno_itr_ = NULL; + } + recno_itr_ = new DbstlMultipleRecnoDataIterator( + data_buf_); + recno_itr_->set_pointer(dbc.recno_itr_->get_pointer()); + } else if (recno_itr_) { + delete recno_itr_; + recno_itr_ = NULL; + } + + if (dbc.multi_itr_) { + if (multi_itr_) { + delete multi_itr_; + multi_itr_ = NULL; + } + multi_itr_ = new DbstlMultipleKeyDataIterator( + data_buf_); + multi_itr_->set_pointer(dbc.multi_itr_->get_pointer()); + + } else if (multi_itr_) { + delete multi_itr_; + multi_itr_ = NULL; + } + + return dbc; + // Do not copy sduppers, they are private to each DbCursor<> + // object. + + } + + // Move Dbc*cursor to next position. If doing bulk read, read from + // the bulk buffer. If bulk buffer exhausted, do another bulk read + // from database, and then read from the bulk buffer. Quit if no + // more data in database. + // + int next(int flag = DB_NEXT) + { + Dbt k, d; + db_recno_t recno; + int ret; + +retry: if (bulk_retrieval_) { + if (multi_itr_) { + if (multi_itr_->next(k, d)) { + curr_key_.set_dbt(k, false); + curr_data_.set_dbt(d, false); + return 0; + } else { + delete multi_itr_; + multi_itr_ = NULL; + } + } + if (recno_itr_) { + if (recno_itr_->next(recno, d)) { + curr_key_.set_dbt(k, false); + curr_data_.set_dbt(d, false); + return 0; + } else { + delete recno_itr_; + recno_itr_ = NULL; + } + } + } + ret = increment(flag); + if (bulk_retrieval_ && ret == 0) + goto retry; + return ret; + } + + inline int prev(int flag = DB_PREV) + { + return increment(flag); + } + + // Move Dbc*cursor to first element. If doing bulk read, read data + // from bulk buffer. + int first() + { + Dbt k, d; + db_recno_t recno; + int ret; + + ret = increment(DB_FIRST); + if (bulk_retrieval_) { + if (multi_itr_) { + if (multi_itr_->next(k, d)) { + curr_key_.set_dbt(k, false); + curr_data_.set_dbt(d, false); + return 0; + } else { + delete multi_itr_; + multi_itr_ = NULL; + } + } + if (recno_itr_) { + if (recno_itr_->next(recno, d)) { + curr_key_.set_dbt(k, false); + curr_data_.set_dbt(d, false); + return 0; + } else { + delete recno_itr_; + recno_itr_ = NULL; + } + } + } + + return ret; + } + + inline int last() + { + return increment(DB_LAST); + } + + // Get current key/data pair, shallow copy. Return 0 on success, + // -1 if no data. + inline int get_current_key_data(key_dt&k, data_dt&d) + { + if (directdb_get_) + update_current_key_data_from_db( + DbCursorBase::SKIP_NONE); + if (curr_key_.get_data(k) == 0 && curr_data_.get_data(d) == 0) + return 0; + else + return INVALID_KEY_DATA; + } + + // Get current data, shallow copy. Return 0 on success, -1 if no data. + inline int get_current_data(data_dt&d) + { + if (directdb_get_) + update_current_key_data_from_db(DbCursorBase::SKIP_KEY); + if (curr_data_.get_data(d) == 0) + return 0; + else + return INVALID_KEY_DATA; + } + + // Get current key, shallow copy. Return 0 on success, -1 if no data. + inline int get_current_key(key_dt&k) + { + if (directdb_get_) + update_current_key_data_from_db( + DbCursorBase::SKIP_DATA); + if (curr_key_.get_data(k) == 0) + return 0; + else + return INVALID_KEY_DATA; + } + + inline void close() + { + if (csr_) { + inform_duppers(); + ResourceManager::instance()->remove_cursor(this); + } + csr_ = NULL; + } + + // Parameter skipkd specifies skip retrieving key or data: + // If 0, don't skip, retrieve both; + // If 1, skip retrieving key; + // If 2, skip retrieving data. + // Do not poll from db again if doing bulk retrieval. + void update_current_key_data_from_db(DbcGetSkipOptions skipkd) { + int ret; + u_int32_t sz, sz1, kflags = DB_DBT_USERMEM, + dflags = DB_DBT_USERMEM; + // Do not poll from db again if doing bulk retrieval. + if (this->bulk_retrieval_) + return; + if (this->csr_status_ != 0) { + curr_key_.reset(); + curr_data_.reset(); + return; + } + + // We will modify flags if skip key or data, so cache old + // value and set it after get calls. + if (skipkd != DbCursorBase::SKIP_NONE) { + kflags = key_buf_.get_flags(); + dflags = data_buf_.get_flags(); + } + if (skipkd == DbCursorBase::SKIP_KEY) { + key_buf_.set_dlen(0); + key_buf_.set_flags(DB_DBT_PARTIAL | DB_DBT_USERMEM); + } + + if (skipkd == DbCursorBase::SKIP_DATA) { + data_buf_.set_dlen(0); + data_buf_.set_flags(DB_DBT_PARTIAL | DB_DBT_USERMEM); + } +retry: ret = csr_->get(&key_buf_, &data_buf_, DB_CURRENT); + if (ret == 0) { + if (skipkd != DbCursorBase::SKIP_KEY) + curr_key_ = key_buf_; + if (skipkd != DbCursorBase::SKIP_DATA) + curr_data_ = data_buf_; + limit_buf_size_after_use(); + } else if (ret == DB_BUFFER_SMALL) { + if ((sz = key_buf_.get_size()) > 0) + enlarge_dbt(key_buf_, sz); + if ((sz1 = data_buf_.get_size()) > 0) + enlarge_dbt(data_buf_, sz1); + if (sz == 0 && sz1 == 0) + THROW0(InvalidDbtException); + goto retry; + } else { + if (skipkd != DbCursorBase::SKIP_NONE) { + key_buf_.set_flags(kflags); + data_buf_.set_flags(dflags); + } + throw_bdb_exception( + "DbCursor<>::update_current_key_data_from_db", ret); + } + + if (skipkd != DbCursorBase::SKIP_NONE) { + key_buf_.set_flags(kflags); + data_buf_.set_flags(dflags); + } + } +}; // DbCursor<> + +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// +// +// RandDbCursor class template definition +// +// RandDbCursor is a random accessible cursor wrapper for use by +// db_vector_iterator, it derives from DbCursor<> class. It has a fixed key +// data type, which is index_type. +// +typedef db_recno_t index_type; +template +class RandDbCursor : public DbCursor +{ +protected: + friend class DataItem; + typedef ssize_t difference_type; +public: + typedef RandDbCursor self; + typedef DbCursor base; + + // Return current csr_ pointed element's index in recno database + // (i.e. the index starting from 1). csr_ must be open and + // point to an existing key/data pair. + // + inline index_type get_current_index() const + { + index_type ndx; + + if (this->directdb_get_) + ((self *)this)->update_current_key_data_from_db( + DbCursorBase::SKIP_DATA); + this->curr_key_.get_data(ndx); + return ndx; + } + + inline int compare(const self *csr2) const{ + index_type i1, i2; + + i1 = this->get_current_index(); + i2 = csr2->get_current_index(); + return i1 - i2; + } + + // Insert data d before/after current position. + int insert(const data_dt& d, int pos = DB_BEFORE){ + int k = 1, ret; + //data_dt dta; + + // Inserting into empty db, must set key to 1. + if (pos == DB_KEYLAST) + k = 1; + + ret = base::insert(k, d, pos); + + // Inserting into a empty db using begin() itr, so flag is + // DB_AFTER and surely failed, so change to use DB_KEYLAST + // and try again. + if (ret == EINVAL) { + k = 1; + pos = DB_KEYLAST; + ret = base::insert(k, d, pos); + } + this->csr_status_ = ret; + return ret; + } + + /* + * Move the cursor n positions, if reaches the beginning or end, + * returns DB_NOTFOUND. + */ + int advance(difference_type n) + { + int ret = 0; + index_type indx; + u_int32_t sz, flags = 0; + + indx = this->get_current_index(); + if (n == 0) + return 0; + + index_type i = (index_type)n; + indx += i; + + if (n < 0 && indx < 1) { // Index in recno db starts from 1. + + ret = INVALID_ITERATOR_POSITION; + return ret; + } + this->inform_duppers(); + + // Do a search to determine whether new position is valid. + Dbt k, &d = this->data_buf_; + + + k.set_data(&indx); + k.set_size(sizeof(indx)); + if (this->rmw_get_) + flags |= DB_RMW; + +retry: if (this->csr_ && + ((ret = this->csr_->get(&k, &d, DB_SET)) == DB_NOTFOUND)) { + this->csr_status_ = ret = INVALID_ITERATOR_POSITION; + this->curr_key_.reset(); + this->curr_data_.reset(); + } else if (ret == DB_BUFFER_SMALL) { + sz = d.get_size(); + assert(sz > 0); + this->enlarge_dbt(d, sz); + goto retry; + } else if (ret == 0) { + this->curr_key_.set_dbt(k, false); + this->curr_data_.set_dbt(d, false); + this->limit_buf_size_after_use(); + } else + throw_bdb_exception("RandDbCursor<>::advance", ret); + this->csr_status_ = ret; + return ret; + } + + // Return the last index of recno db (index starting from 1), + // it will also move the underlying cursor to last key/data pair. + // + inline index_type last_index() + { + int ret; + + ret = this->last(); + if (ret) + return 0;// Invalid position. + else + return get_current_index(); + } + + explicit RandDbCursor(u_int32_t b_bulk_retrieval = 0, + bool b_rmw1 = false, bool directdbget = true) + : base(b_bulk_retrieval, b_rmw1, directdbget) + { + } + + RandDbCursor(const RandDbCursor& rdbc) : base(rdbc) + { + } + + explicit RandDbCursor(Dbc* csr1, int posidx = 0) : base(csr1) + { + } + + virtual ~RandDbCursor() + { + } + +}; // RandDbCursor<> + +END_NS //ns dbstl + +#endif // !_DB_STL_DBC_H diff --git a/lang/cxx/stl/dbstl_dbt.h b/lang/cxx/stl/dbstl_dbt.h new file mode 100644 index 00000000..43c5a190 --- /dev/null +++ b/lang/cxx/stl/dbstl_dbt.h @@ -0,0 +1,803 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_STL_DBT_H +#define _DB_STL_DBT_H + +#include +#include + +#include "dbstl_common.h" +#include "dbstl_exception.h" +#include "dbstl_utility.h" + +////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////// +// +// DataItem class template definition +// +// 1. DataItem is a Dbt wrapper, it provides both typed data to/from memory +// chunk mapping as well as iostream support. Note that iostream functionality +// is not yet implemented. +// 2. DataItem is used inside dbstl to provide consistent Dbt object memory +// management. +// 3. DataItem is not only capable of mapping fixed size objects, but also +// varying length objects and objects not located in a consecutive chunk of +// memory, with the condition that user configures the required methods in +// DbstlElemTraits. +// 4. DataItem can not be a class template because inside it, the "member +// function template override" support is needed. +// +START_NS(dbstl) + +using std::string; +#ifdef HAVE_WSTRING +using std::wstring; +#endif + +class DataItem +{ +private: + typedef DataItem self; + + //////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////// + // + // DataItem memory management + // + // The dbt_ member is the current dbt, data is stored in the dbt's + // referenced memory, it may + // deep copy from constructor and from other Dbt, depending on + // the constructors "onstack" parameter --- if true, this object + // is only used as a stack object inside a function, + // so do shallow copy; otherwise do deep copy. + // There is always a DB_DBT_USERMEM flag set to the dbt, + // its ulen data member stores the length of referenced memory, + // its size data member stores the actual size of data; + // If onstack is true, its dlen is INVALID_DLEN, and freemem() + // will not free such memory because this object only reference + // other object's memory, its the referenced object's responsibility + // to free their memory. + // + // A DataItem object is not used everywhere, so it is impossible for + // such an object to have two kinds of usages as above at the same + // time, so we are safe doing so. + Dbt dbt_; + + // Free dbt_'s referenced memory if that memory is allocated in heap + // and owned by dbt_. + inline void freemem() + { + void *buf = dbt_.get_data(); + + if (buf != NULL && (dbt_.get_flags() & DB_DBT_USERMEM) != 0 + && dbt_.get_dlen() != INVALID_DLEN) + free(buf); + memset(&dbt_, 0, sizeof(dbt_)); + } + +public: + + // Deep copy, because dbt2.data pointed memory may be short lived. + inline void set_dbt(const DbstlDbt&dbt2, bool onstack) + { + void *buf; + u_int32_t s1, s2; + DBT *pdbt2, *pdbt; + + pdbt2 = (DBT *)&dbt2; + pdbt = (DBT *)&dbt_; + + if (!onstack) { + buf = pdbt->data; + s1 = pdbt->ulen; + s2 = pdbt2->size; + if(s2 > s1) { + buf = DbstlReAlloc(buf, s2); + pdbt->size = s2; + pdbt->data = buf; + pdbt->ulen = s2; + pdbt->flags |= DB_DBT_USERMEM; + } else + pdbt->size = s2; + memcpy(buf, pdbt2->data, s2); + } else { + freemem(); + dbt_ = (const Dbt)dbt2; + pdbt->dlen = (INVALID_DLEN); + } + } + + // Deep copy, because dbt2.data pointed memory may be short lived. + inline void set_dbt(const Dbt&dbt2, bool onstack) + { + void *buf; + u_int32_t s1, s2; + DBT *pdbt2, *pdbt; + + pdbt2 = (DBT *)&dbt2; + pdbt = (DBT *)&dbt_; + + if (!onstack) { + buf = pdbt->data; + s1 = pdbt->ulen; + s2 = pdbt2->size; + if(s2 > s1) { + buf = DbstlReAlloc(buf, s2); + pdbt->size = s2; + pdbt->data = buf; + pdbt->ulen = s2; + pdbt->flags |= DB_DBT_USERMEM; + } else + pdbt->size = s2; + memcpy(buf, pdbt2->data, s2); + } else { + freemem(); + dbt_ = dbt2; + pdbt->dlen = (INVALID_DLEN); + } + } + + inline void set_dbt(const DBT&dbt2, bool onstack) + { + void *buf; + u_int32_t s1, s2; + DBT *pdbt = (DBT *)&dbt_; + + if (!onstack) { + buf = pdbt->data; + s1 = pdbt->ulen; + s2 = dbt2.size; + if(s2 > s1) { + buf = DbstlReAlloc(buf, s2); + pdbt->size = s2; + pdbt->data = buf; + pdbt->ulen = s2; + pdbt->flags |= DB_DBT_USERMEM; + } else + pdbt->size = s2; + memcpy(buf, dbt2.data, s2); + } else { + freemem(); + // The following is right because Dbt derives from + // DBT with no extra members or any virtual functions. + memcpy(&dbt_, &dbt2, sizeof(dbt2)); + pdbt->dlen = INVALID_DLEN; + } + } + + // Return to the initial state. + inline void reset() + { + void *buf = dbt_.get_data(); + if (buf) { + memset(buf, 0, dbt_.get_ulen()); + dbt_.set_size(0); + } + } + + inline Dbt& get_dbt() + { + return dbt_; + } + + // Return data of this object. If no data return -1, if it has data + // return 0. + // + // !!!XXX Note that the type parameter T can only be in this function + // because "template type parameter overload" applies only to a + // functions template argument list, rather than that of classes. + // If you put the "template" to this class's declaration, + // making it a class template, then when T is any of Dbt, DBT, or + // DataItem, there will be two copies of this function. One will be + // this function's instantiated version, the other one is one of the + // three functions defined below. + // + template + inline int get_data(T& data) const + { + int ret; + typedef DbstlElemTraits EM; + typename EM::ElemRstoreFunct restore; + void *pdata = NULL; + + if ((pdata = dbt_.get_data()) != NULL) { + if ((restore = EM::instance()-> + get_restore_function()) != NULL) + restore(data, pdata); + else + data = *((T*)pdata); + ret = 0; + } else + ret = -1; + return ret; + } + + //////////////////////////////////////////////////////////////////// + // + // Begin functions supporting direct naked string storage. + // + // Always store the data, rather than the container object. + // + // The returned string lives no longer than the next iterator + // movement call. + // + inline int get_data(char*& data) const + { + data = (char*)dbt_.get_data(); + return 0; + } + + inline int get_data(string &data) const + { + data = (string::pointer) dbt_.get_data(); + return 0; + } + + inline int get_data(wchar_t*& data) const + { + data = (wchar_t*)dbt_.get_data(); + return 0; + } + +#ifdef HAVE_WSTRING + inline int get_data(wstring &data) const + { + data = (wstring::pointer) dbt_.get_data(); + return 0; + } +#endif + + //////////////////////////////////////////////////////////////////// + + // Supporting storing arbitrary type of sequence. + template + inline int get_data(T*& data) const + { + data = (T*)dbt_.get_data(); + return 0; + } + + inline int get_data(DataItem& data) const + { + int ret; + + if (dbt_.get_data()) { + data.set_dbt(dbt_, false); + ret = 0; + } else + ret = -1; + return ret; + } + + //////////////////////////////////////////////////////////////////// + // + // Begin functions supporting Dbt storage. + // + // This member function allows storing a Dbt type, so that user can + // store the varying length data into Dbt. + // + // This method is required to copy a data element's bytes to another + // Dbt object, used inside by dbstl. + // If there is no data return -1, if it has data return 0. + // + inline int get_data(Dbt& data) const + { + int ret; + void *addr; + u_int32_t sz; + DBT *pdbt = (DBT *)&dbt_, *pdata = (DBT *)&data; + + if (pdbt->data) { + addr = pdata->data; + sz = pdbt->size; + if (pdata->ulen < sz) { + pdata->data = DbstlReAlloc(addr, sz); + pdata->size = sz; + pdata->ulen = sz; + pdata->flags |= DB_DBT_USERMEM; + } else + pdata->size = sz; + memcpy(pdata->data, pdbt->data, sz); + ret = 0; + } else + ret = -1; + return ret; + } + + inline int get_data(DBT& data) const + { + int ret; + void*addr; + u_int32_t sz; + + if (dbt_.get_data()) { + addr = data.data; + if (data.ulen < (sz = dbt_.get_size())) { + data.data = DbstlReAlloc(addr, sz); + // User need to free this memory + data.flags = data.flags | DB_DBT_USERMEM; + data.size = sz; + data.ulen = sz; + } else + data.size = sz; + memcpy(data.data, dbt_.get_data(), sz); + ret = 0; + } else + ret = -1; + return ret; + } + + inline int get_data(DbstlDbt& data) const + { + int ret; + void *addr; + u_int32_t sz; + DBT *pdbt = (DBT *)&dbt_, *pdata = (DBT *)&data; + + if (pdbt->data) { + addr = pdata->data; + sz = pdbt->size; + if (pdata->ulen < sz) { + pdata->data = DbstlReAlloc(addr, sz); + pdata->size = sz; + pdata->ulen = sz; + pdata->flags |= DB_DBT_USERMEM; + } else + pdata->size = sz; + memcpy(pdata->data, pdbt->data, sz); + ret = 0; + } else + ret = -1; + return ret; + } + + //////////////////////////////////////////////////////////////////// + + // Deep copy in assignment and copy constructor. + inline const DbstlDbt& operator=(const DbstlDbt& t2) + { + set_dbt(t2, false); + return t2; + } + + // Deep copy in assignment and copy constructor. + inline const Dbt& operator=(const Dbt& t2) + { + set_dbt(t2, false); + return t2; + } + + // Deep copy in assignment and copy constructor. + inline const DBT& operator=(const DBT& t2) + { + set_dbt(t2, false); + return t2; + } + + // Deep copy in assignment and copy constructor. + template + inline const T& operator = (const T&dt) + { + + make_dbt(dt, false); + return dt; + } + + // Generic way of storing an object or variable. Note that DataItem + // is not a class template but a class with function templates. + // Variable t locates on a consecutive chunk of memory, and objects + // of T have the same size. + // + template + void make_dbt(const T& dt, bool onstack) + { + typedef DbstlElemTraits EM; + u_int32_t sz; + typename EM::ElemSizeFunct sizef; + typename EM::ElemCopyFunct copyf; + DBT *pdbt = (DBT *)&dbt_; + + if ((sizef = EM::instance()->get_size_function()) != NULL) + sz = sizef(dt); + else + sz = sizeof(dt); + + if (onstack) { + freemem(); + pdbt->data = ((void*)&dt); + // We have to set DB_DBT_USERMEM for DB_THREAD to work. + pdbt->flags = (DB_DBT_USERMEM); + pdbt->size = (sz); + pdbt->ulen = (sz); + // This is a flag that this memory can't be freed + // because it is on stack. + pdbt->dlen = (INVALID_DLEN); + return; + } + + // Not on stack, allocate enough space and "copy" the object + // using shall copy or customized copy. + if (pdbt->ulen < sz) { + pdbt->data = (DbstlReAlloc(pdbt->data, sz)); + assert(pdbt->data != NULL); + pdbt->size = (sz); + pdbt->ulen = (sz); + pdbt->flags = (DB_DBT_USERMEM); + } else + pdbt->size = (sz); + + if ((copyf = EM::instance()->get_copy_function()) != NULL) + copyf(pdbt->data, dt); + else + memcpy(pdbt->data, &dt, sz); + } + + inline const char*&operator = (const char*&dt) + { + make_dbt(dt, false); + return dt; + } + + inline const wchar_t*&operator = (const wchar_t*&dt) + { + make_dbt(dt, false); + return dt; + } + + inline const string &operator=(const string &dt) + { + make_dbt(dt, false); + return dt; + } + +#ifdef HAVE_WSTRING + inline const wstring &operator=(const wstring &dt) + { + make_dbt(dt, false); + return dt; + } +#endif + + template + inline const T*&operator = (const T*&dt) + { + make_dbt(dt, false); + return dt; + } + + inline const self& operator=(const self&dbt1) + { + ASSIGNMENT_PREDCOND(dbt1) + this->set_dbt(dbt1.dbt_, false); + return dbt1; + } + + // Deep copy. + inline DataItem(const self&dbt1) + { + set_dbt(dbt1.dbt_, false); + } + + + inline DataItem(u_int32_t sz) + { + void *buf; + DBT *pdbt = (DBT *)&dbt_; + + buf = NULL; + buf = DbstlMalloc(sz); + memset(buf, 0, sz); + pdbt->size = sz; + pdbt->ulen = sz; + pdbt->data = buf; + pdbt->flags = DB_DBT_USERMEM; + } + + // Deep copy. The onstack parameter means whether the object referenced + // by this DataItem is on used with a function call where this DataItem + // object is used. If so, we don't deep copy the object, simply refer + // to its memory location. The meaining is the same for this parameter + // in constructors that follow. + inline DataItem(const Dbt&dbt2, bool onstack) + { + set_dbt(dbt2, onstack); + } + + inline DataItem(const DbstlDbt&dbt2, bool onstack) + { + set_dbt(dbt2, onstack); + } + + inline DataItem(const DBT&dbt2, bool onstack) + { + set_dbt(dbt2, onstack); + } + + // Deep copy. There is a partial specialization for char*/wchar_t*/ + // string/wstring. + template + inline DataItem(const T& dt, bool onstack) + { + make_dbt(dt, onstack); + } + + inline ~DataItem(void) + { + freemem(); + } + +protected: + + // Store a char*/wchar_t* string. Need four versions for char* + // and wchar_t* respectively to catch all + // possibilities otherwise the most generic one will be called. + // Note that the two const decorator matters when doing type + // matching. + inline void make_dbt_chars(const char *t, bool onstack) + { + DBT *d = (DBT *)&dbt_; + u_int32_t sz; + sz = ((t == NULL) ? + sizeof(char) : + (u_int32_t)((strlen(t) + 1) * sizeof(char))); + if (!onstack) { + if (d->ulen < sz) { + d->flags |= DB_DBT_USERMEM; + d->data = DbstlReAlloc(d->data, sz); + d->ulen = sz; + } + d->size = sz; + if (t != NULL) + strcpy((char*)d->data, t); + else + memset(d->data, '\0', sizeof(char)); + } else { + freemem(); + d->data = ((t == NULL) ? (void *)"" : (void *)t); + d->size = sz; + d->ulen = sz; + d->flags = (DB_DBT_USERMEM); + d->dlen = (INVALID_DLEN); + } + } + + inline void make_dbt_wchars(const wchar_t *t, bool onstack) + { + DBT *d = (DBT *)&dbt_; + u_int32_t sz; + sz = ((t == NULL) ? + sizeof(wchar_t) : + (u_int32_t)((wcslen(t) + 1) * sizeof(wchar_t))); + if (!onstack) { + if (d->ulen < sz) { + d->flags |= DB_DBT_USERMEM; + d->data = DbstlReAlloc(d->data, sz); + d->ulen = sz; + } + d->size = sz; + if (t != NULL) + wcscpy((wchar_t*)d->data, t); + else + memset(d->data, L'\0', sizeof(wchar_t)); + } else { + freemem(); + d->data = ((t == NULL) ? (void *)L"" : (void *)t); + d->size = sz; + d->ulen = sz; + d->flags = (DB_DBT_USERMEM); + d->dlen = (INVALID_DLEN); + } + } + + inline void make_dbt(const char*& t, bool onstack) + { + make_dbt_chars(t, onstack); + } + + inline void make_dbt(const char* const& t, bool onstack) + { + make_dbt_chars(t, onstack); + } + + inline void make_dbt(char*& t, bool onstack) + { + make_dbt_chars(t, onstack); + } + + inline void make_dbt(char* const& t, bool onstack) + { + make_dbt_chars(t, onstack); + } + + inline void make_dbt(const string& t, bool onstack) + { + make_dbt_chars(t.c_str(), onstack); + } + + inline void make_dbt(const wchar_t*& t, bool onstack) + { + make_dbt_wchars(t, onstack); + } + + inline void make_dbt(const wchar_t* const& t, bool onstack) + { + make_dbt_wchars(t, onstack); + } + + inline void make_dbt(wchar_t*& t, bool onstack) + { + make_dbt_wchars(t, onstack); + } + + inline void make_dbt(wchar_t* const& t, bool onstack) + { + make_dbt_wchars(t, onstack); + } + +#ifdef HAVE_WSTRING + inline void make_dbt(const wstring& t, bool onstack) + { + make_dbt_wchars(t.c_str(), onstack); + } +#endif + + template + void make_dbt_internal(const T*t, bool onstack) + { + u_int32_t i, sz, totalsz, sql; + DBT *pdbt = (DBT *)&dbt_; + typename DbstlElemTraits::ElemSizeFunct szf = NULL; + typename DbstlElemTraits::SequenceLenFunct + seqlenf = NULL; + + szf = DbstlElemTraits::instance()-> + get_size_function(); + seqlenf = DbstlElemTraits::instance()-> + get_sequence_len_function(); + + assert(seqlenf != NULL); + sql = sz = (u_int32_t)seqlenf(t); + if (szf) + for (i = 0, totalsz = 0; i < sz; i++) + totalsz += szf(t[i]); + else + totalsz = sz * sizeof(T); + + sz = totalsz; + + if (onstack) { + freemem(); + pdbt->data = (void *)t; + pdbt->size = sz; + pdbt->ulen = sz; + pdbt->flags = DB_DBT_USERMEM; + pdbt->dlen = INVALID_DLEN; // onstack flag; + } else { + // ulen stores the real length of the pointed memory. + if (pdbt->ulen < sz) { + pdbt->data = DbstlReAlloc(pdbt->data, sz); + pdbt->ulen = sz; + pdbt->flags |= DB_DBT_USERMEM; + } + pdbt->size = sz; + + DbstlElemTraits::instance()-> + get_sequence_copy_function() + ((T *)pdbt->data, t, sql); + } + } + + // Store a sequence of base type T. Need four versions to catch all + // possibilities otherwise the most generic one will be called. + template + inline void make_dbt(const T*const&tt, bool onstack) + { + make_dbt_internal((const T*)tt, onstack); + } + template + inline void make_dbt(T*const&tt, bool onstack) + { + make_dbt_internal((const T*)tt, onstack); + } + template + inline void make_dbt(T*&tt, bool onstack) + { + make_dbt_internal((const T*)tt, onstack); + } + template + inline void make_dbt(const T*&tt, bool onstack) + { + make_dbt_internal((const T*)tt, onstack); + } + + +public: + inline DataItem(const char*& t, bool onstack) + { + make_dbt_chars(t, onstack); + } + + inline DataItem(const char* const& t, bool onstack) + { + make_dbt_chars(t, onstack); + } + + inline DataItem(char*& t, bool onstack) + { + make_dbt_chars(t, onstack); + } + + inline DataItem(char* const& t, bool onstack) + { + make_dbt_chars(t, onstack); + } + + inline DataItem(const string& t, bool onstack) + { + make_dbt_chars(t.c_str(), onstack); + } + + inline DataItem(const wchar_t*& t, bool onstack) + { + make_dbt_wchars(t, onstack); + } + + inline DataItem(const wchar_t* const& t, bool onstack) + { + make_dbt_wchars(t, onstack); + } + + inline DataItem(wchar_t*& t, bool onstack) + { + make_dbt_wchars(t, onstack); + } + + inline DataItem(wchar_t* const& t, bool onstack) + { + make_dbt_wchars(t, onstack); + } + +#ifdef HAVE_WSTRING + inline DataItem(const wstring& t, bool onstack) + { + make_dbt_wchars(t.c_str(), onstack); + } +#endif + template + inline DataItem(T*&tt, bool onstack) + { + make_dbt_internal((const T*)tt, onstack); + } + + template + inline DataItem(const T*&tt, bool onstack) + { + make_dbt_internal((const T*)tt, onstack); + } + + template + inline DataItem(T*const&tt, bool onstack) + { + make_dbt_internal((const T*)tt, onstack); + } + + template + inline DataItem(const T*const&tt, bool onstack) + { + make_dbt_internal((const T*)tt, onstack); + } + + +}; // DataItem<> + +bool operator==(const Dbt&d1, const Dbt&d2); +bool operator==(const DBT&d1, const DBT&d2); +END_NS + +#endif // !_DB_STL_DBT_H diff --git a/lang/cxx/stl/dbstl_element_ref.h b/lang/cxx/stl/dbstl_element_ref.h new file mode 100644 index 00000000..992d0052 --- /dev/null +++ b/lang/cxx/stl/dbstl_element_ref.h @@ -0,0 +1,873 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_STL_KDPAIR_H +#define _DB_STL_KDPAIR_H + +#include + +#include "dbstl_common.h" +#include "dbstl_dbt.h" +#include "dbstl_exception.h" +#include "dbstl_base_iterator.h" +#include "dbstl_utility.h" + +START_NS(dbstl) + +using std::istream; +using std::ostream; +using std::basic_ostream; +using std::basic_istream; + +template +class db_base_iterator; +template +class ElementHolder; + +/** \ingroup dbstl_helper_classes +\defgroup Element_wrappers ElementRef and ElementHolder wrappers. +An ElementRef and ElementHolder object represents the reference to the +data element referenced by an iterator. Each iterator +object has an ElementRef or ElementHolder object that +stores the data element that the iterator points to. + +The ElementHolder class is used to store primitive types into STL containers. + +The ElementRef class is used to store other types into STL containers. + +The ElementRef and ElementHolder classes have identical interfaces, and are +treated the same by other STL classes. Since the ElementRef class inherits +from the template data class, all methods have a _DB_STL_ prefix to avoid name +clashes. + +An ElementRef or ElementHolder class corresponds to a single iterator instance. +An Element object is generally owned by an iterator object. The ownership +relationship is swapped in some specific situations, specifically for the +dereference and array index operator. +@{ +*/ +/// ElementRef element wrapper for classes and structures. +/// \sa ElementHolder +template +class _exported ElementRef : public ddt +{ +public: + typedef ElementRef self; + typedef ddt base; + typedef db_base_iterator iterator_type; + typedef ddt content_type; // Used by assoc classes. + +private: + // The iterator pointing the data element stored in this object. + iterator_type *_DB_STL_itr_; + + // Whether or not to delete itr on destruction, by default it is + // false because this object is supposed to live in the lifetime of + // its _DB_STL_itr_ owner. But there is one exception: in + // db_vector<>::operator[]/front/back and db_map<>::operator[] + // functions, an ElementRef object has to be + // returned instead of its reference, thus the + // returned ElementRef<> has to live longer than its _DB_STL_itr_, + // thus we new an iterator, and call _DB_STL_SetDelItr() method, + // setting this member to true, + // to tell this object that it should delete the + // _DB_STL_itr_ iterator on destruction, and duplicate the _DB_STL_itr_ + // iterator on copy construction. Although + // std::vector<> returns reference rather than value, this is not a + // problem because the returned ElementRef<> will duplicate cursor and + // still points to the same key/data pair. + // + mutable bool _DB_STL_delete_itr_; + +public: + //////////////////////////////////////////////////////////////////// + // + // Begin constructors and destructor. + // + /// \name Constructors and destructor. + //@{ + /// Destructor. + ~ElementRef() { + if (_DB_STL_delete_itr_) { + // Prevent recursive destruction. + _DB_STL_delete_itr_ = false; + _DB_STL_itr_->delete_me(); + } + } + + /// Constructor. + /// If the pitr parameter is NULL or the default value is used, the + /// object created is a simple wrapper and not connected to a container. + /// If a valid iterator parameter is passed in, the wrapped element will + /// be associated with the matching key/data pair in the underlying + /// container. + /// \param pitr The iterator owning this object. + explicit ElementRef(iterator_type *pitr = NULL) + { + _DB_STL_delete_itr_ = false; + _DB_STL_itr_ = pitr; + } + + /// Constructor. + /// Initializes an ElementRef wrapper without an iterator. It can only + /// be used to wrap a data element in memory, it can't access an + /// unerlying database. + /// \param dt The base class object to initialize this object. + ElementRef(const ddt &dt) : ddt(dt) + { + _DB_STL_delete_itr_ = false; + _DB_STL_itr_ = NULL; + } + + /// Copy constructor. + /// The constructor takes a "deep" copy. The created object will be + /// identical to, but independent from the original object. + /// \param other The object to clone from. + ElementRef(const self &other) : ddt(other) + { + // Duplicate iterator if this object lives longer than + // _DB_STL_itr_. + _DB_STL_delete_itr_ = other._DB_STL_delete_itr_; + if (_DB_STL_delete_itr_) { + // Avoid recursive duplicate iterator calls. + other._DB_STL_delete_itr_ = false; + _DB_STL_itr_ = other._DB_STL_itr_->dup_itr(); + other._DB_STL_delete_itr_ = true; + } else + _DB_STL_itr_ = other._DB_STL_itr_; + } + //@} + //////////////////////////////////////////////////////////////////// + + /// \name Assignment operators. + /// The assignment operators are used to store right-values into the + /// wrapped object, and also to store values into an underlying + /// container. + //@{ + /// Assignment Operator. + /// \param dt2 The data value to assign with. + /// \return The object dt2's reference. + inline const ddt& operator=(const ddt& dt2) + { + *((ddt*)this) = dt2; + if (_DB_STL_itr_ != NULL) { + if (!_DB_STL_itr_->is_set_iterator()) + _DB_STL_itr_->replace_current(dt2); + else + _DB_STL_itr_->replace_current_key(dt2); + } + return dt2; + } + + /// Assignment Operator. + /// \param me The object to assign with. + /// \return The object me's reference. + inline const self& operator=(const self& me) + { + ASSIGNMENT_PREDCOND(me) + *((ddt*)this) = (ddt)me; + if (_DB_STL_itr_ != NULL) { + // This object is the reference of an valid data + // element, so we must keep it that way, we don't + // use me's iterator here. + if (!_DB_STL_itr_->is_set_iterator()) + _DB_STL_itr_->replace_current( + me._DB_STL_value()); + else + _DB_STL_itr_->replace_current_key( + me._DB_STL_value()); + } else if (me._DB_STL_delete_itr_) { + // Duplicate an iterator from me. + _DB_STL_delete_itr_ = true; + me._DB_STL_delete_itr_ = false; + _DB_STL_itr_ = me._DB_STL_itr_->dup_itr(); + me._DB_STL_delete_itr_ = true; + } + + return me; + } + //@} + + /// Function to store the data element. + /// The user needs to call this method after modifying the underlying + /// object, so that the version stored in the container can be updated. + /// + /// When db_base_iterator's directdb_get_ member is true, this function + /// must be called after modifying the data member and before any + /// subsequent container iterator dereference operations. If this step + /// is not carried out any changes will be lost. + /// + /// If the data element is changed via ElementHolder<>::operator=(), + /// you don't need to call this function. + inline void _DB_STL_StoreElement() + { + assert(_DB_STL_itr_ != NULL); + _DB_STL_itr_->replace_current(*this); + } + + /// Returns the data element this wrapper object wraps. + inline const ddt& _DB_STL_value() const + { + return *((ddt*)this); + } + + /// Returns the data element this wrapper object wraps. + inline ddt& _DB_STL_value() + { + return *((ddt*)this); + } + +#ifndef DOXYGEN_CANNOT_SEE_THIS + //////////////////////////////////////////////////////////////////// + // + // The following methods are not part of the official public API, + // but can't be declared as protected, since it is not possible + // to declare template-specialised classes as friends. + // + + // Call this function to tell this object that it should delete the + // _DB_STL_itr_ iterator because that iterator was allocated in + // the heap. Methods like db_vector/db_map<>::operator[] should call + // this function. + inline void _DB_STL_SetDelItr() + { + _DB_STL_delete_itr_ = true; + } + + // Only copy data into this object, do not store into database. + inline void _DB_STL_CopyData(const self&dt2) + { + *((ddt*)this) = (ddt)dt2; + } + + inline void _DB_STL_CopyData(const ddt&dt2) + { + *((ddt*)this) = dt2; + } + + // Following functions are prefixed with _DB_STL_ to avoid + // potential name clash with ddt members. + // + inline iterator_type* _DB_STL_GetIterator() const + { + return _DB_STL_itr_; + } + + inline int _DB_STL_GetData(ddt& d) const + { + d = *((ddt*)this); + return 0; + } + + inline void _DB_STL_SetIterator(iterator_type*pitr) + { + _DB_STL_itr_ = pitr; + } + + inline void _DB_STL_SetData(const ddt&d) + { + *(ddt*)this = d; + } + //////////////////////////////////////////////////////////////////// + +}; // ElementRef<> +template +class DbstlSeqWriter; +#else +}; +#endif // DOXYGEN_CANNOT_SEE_THIS + + +// The ElementHolder class must have an identical public interface to +// the ElementRef class. +/// A wrapper class for primitive types. It has identical usage and public +/// interface to the ElementRef class. +/// \sa ElementRef. +template +class _exported ElementHolder +{ +protected: + typedef ElementHolder self; + + + inline void _DB_STL_put_new_value_to_db() + { + if (_DB_STL_itr_ != NULL) { + if (!_DB_STL_itr_->is_set_iterator()) + _DB_STL_itr_->replace_current(dbstl_my_value_); + else + _DB_STL_itr_->replace_current_key( + dbstl_my_value_); + } + } + + inline void _DB_STL_put_new_value_to_db(const self &me) + { + if (_DB_STL_itr_ != NULL) { + if (!_DB_STL_itr_->is_set_iterator()) + _DB_STL_itr_->replace_current(dbstl_my_value_); + else + _DB_STL_itr_->replace_current_key( + dbstl_my_value_); + } else if (me._DB_STL_delete_itr_) { + // Duplicate an iterator from me. + _DB_STL_delete_itr_ = true; + me._DB_STL_delete_itr_ = false; + _DB_STL_itr_ = me._DB_STL_itr_->dup_itr(); + me._DB_STL_delete_itr_ = true; + } + } + + + +public: + typedef ptype type1; + typedef db_base_iterator iterator_type; + typedef ptype content_type; + + //////////////////////////////////////////////////////////////////// + // + // Begin constructors and destructor. + // + /// \name Constructors and destructor. + //@{ + /// Constructor. + /// If the pitr parameter is NULL or the default value is used, the + /// object created is a simple wrapper and not connected to a container. + /// If a valid iterator parameter is passed in, the wrapped element will + /// be associated with the matching key/data pair in the underlying + /// container. + /// \param pitr The iterator owning this object. + explicit inline ElementHolder(iterator_type* pitr = NULL) + { + _DB_STL_delete_itr_ = false; + _DB_STL_itr_ = pitr; + dbstl_str_buf_ = NULL; + dbstl_str_buf_len_ = 0; + memset(&dbstl_my_value_, 0, sizeof(dbstl_my_value_)); + } + + /// Constructor. + /// Initializes an ElementRef wrapper without an iterator. It can only + /// be used to wrap a data element in memory, it can't access an + /// unerlying database. + /// \param dt The base class object to initialize this object. + inline ElementHolder(const ptype&dt) + { + dbstl_str_buf_ = NULL; + dbstl_str_buf_len_ = 0; + _DB_STL_delete_itr_ = false; + _DB_STL_itr_ = NULL; + _DB_STL_CopyData_int(dt); + } + + /// Copy constructor. + /// The constructor takes a "deep" copy. The created object will be + /// identical to, but independent from the original object. + /// \param other The object to clone from. + inline ElementHolder(const self& other) + { + dbstl_str_buf_ = NULL; + dbstl_str_buf_len_ = 0; + _DB_STL_delete_itr_ = other._DB_STL_delete_itr_; + _DB_STL_CopyData(other); + + // Duplicate iterator if this object lives longer than + // _DB_STL_itr_. + _DB_STL_delete_itr_ = other._DB_STL_delete_itr_; + if (_DB_STL_delete_itr_) { + // Avoid recursive duplicate iterator calls. + other._DB_STL_delete_itr_ = false; + _DB_STL_itr_ = other._DB_STL_itr_->dup_itr(); + other._DB_STL_delete_itr_ = true; + } else + _DB_STL_itr_ = other._DB_STL_itr_; + } + + /// Destructor. + ~ElementHolder() { + if (_DB_STL_delete_itr_) { + _DB_STL_delete_itr_ = false; + _DB_STL_itr_->delete_me(); + } + if (dbstl_str_buf_) { + free(dbstl_str_buf_); + dbstl_str_buf_ = NULL; + } + } + //@} + //////////////////////////////////////////////////////////////////// + + /// This operator is a type converter. Where an automatic type + /// conversion is needed, this function is called to convert this + /// object into the primitive type it wraps. + operator ptype () const + { + return dbstl_my_value_; + } + + // ElementHolder is a wrapper for primitive types, and backed by db, + // so we need to override all assignment operations to store updated + // value to database. We don't need to implement other operators for + // primitive types because we have a convert operator which can + // automatically convert to primitive type and use its C++ built in + // operator. + // + /** \name Math operators. + ElementHolder class templates also have all C/C++ self mutating + operators for numeric primitive types, including: + +=, -=, *=, /=, %=, <<=, >>=, &=, |=, ^=, ++, -- + These operators should not be used when ddt is a sequence pointer type + like char* or wchar_t* or T*, otherwise the behavior is undefined. + These methods exist only to override default bahavior to store the + new updated value, otherwise, the type convert operator could have + done all the job. + As you know, some of them are not applicable to float or double types + or ElementHolder wrapper types for float/double types. + These operators not only modifies the cached data element, but also + stores new value to database if it associates a database key/data pair. + @{ + */ + template + const self& operator +=(const ElementHolder &p2) + { + dbstl_my_value_ += p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(p2); + return *this; + } + + template + const self& operator -=(const ElementHolder &p2) + { + dbstl_my_value_ -= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(p2); + return *this; + } + template + const self& operator *=(const ElementHolder &p2) + { + dbstl_my_value_ *= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(p2); + return *this; + } + template + const self& operator /=(const ElementHolder &p2) + { + dbstl_my_value_ /= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(p2); + return *this; + } + template + const self& operator %=(const ElementHolder &p2) + { + dbstl_my_value_ %= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(p2); + return *this; + } + + template + const self& operator &=(const ElementHolder &p2) + { + dbstl_my_value_ &= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(p2); + return *this; + } + template + const self& operator |=(const ElementHolder &p2) + { + dbstl_my_value_ |= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(p2); + return *this; + } + template + const self& operator ^=(const ElementHolder &p2) + { + dbstl_my_value_ ^= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(p2); + return *this; + } + + const self& operator >>=(size_t n) + { + dbstl_my_value_ >>= n; + _DB_STL_put_new_value_to_db(); + return *this; + } + + const self& operator <<=(size_t n) + { + dbstl_my_value_ <<= n; + _DB_STL_put_new_value_to_db(); + return *this; + } + + const self& operator ^=(const self &p2) + { + dbstl_my_value_ ^= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(); + return *this; + } + + const self& operator &=(const self &p2) + { + dbstl_my_value_ &= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(); + return *this; + } + + const self& operator |=(const self &p2) + { + dbstl_my_value_ |= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(); + return *this; + } + + const self& operator %=(const self &p2) + { + dbstl_my_value_ %= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(); + return *this; + } + + const self& operator +=(const self &p2) + { + dbstl_my_value_ += p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(); + return *this; + } + const self& operator -=(const self &p2) + { + dbstl_my_value_ -= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(); + return *this; + } + const self& operator /=(const self &p2) + { + dbstl_my_value_ /= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(); + return *this; + } + const self& operator *=(const self &p2) + { + dbstl_my_value_ *= p2.dbstl_my_value_; + _DB_STL_put_new_value_to_db(); + return *this; + } + + self& operator++() + { + dbstl_my_value_++; + _DB_STL_put_new_value_to_db(); + return *this; + } + + self operator++(int) + { + self obj(*this); + dbstl_my_value_++; + _DB_STL_put_new_value_to_db(); + return obj; + } + + self& operator--() + { + dbstl_my_value_--; + _DB_STL_put_new_value_to_db(); + return *this; + } + + self operator--(int) + { + self obj(*this); + dbstl_my_value_--; + _DB_STL_put_new_value_to_db(); + return obj; + } + + inline const ptype& operator=(const ptype& dt2) + { + _DB_STL_CopyData_int(dt2); + _DB_STL_put_new_value_to_db(); + return dt2; + } + + inline const self& operator=(const self& dt2) + { + ASSIGNMENT_PREDCOND(dt2) + _DB_STL_CopyData(dt2); + _DB_STL_put_new_value_to_db(dt2); + return dt2; + } + //@} + + /// Returns the data element this wrapper object wraps; + inline const ptype& _DB_STL_value() const + { + return dbstl_my_value_; + } + + /// Returns the data element this wrapper object wraps; + inline ptype&_DB_STL_value() + { + return dbstl_my_value_; + } + + /// Function to store the data element. + /// The user needs to call this method after modifying the underlying + /// object, so that the version stored in the container can be updated. + /// + /// When db_base_iterator's directdb_get_ member is true, this function + /// must be called after modifying the data member and before any + /// subsequent container iterator dereference operations. If this step + /// is not carried out any changes will be lost. + /// + /// If the data element is changed via ElementHolder<>::operator=(), + /// you don't need to call this function. + inline void _DB_STL_StoreElement() + { + assert(_DB_STL_itr_ != NULL); + _DB_STL_itr_->replace_current(dbstl_my_value_); + } + +#ifndef DOXYGEN_CANNOT_SEE_THIS + //////////////////////////////////////////////////////////////////// + // + // The following methods are not part of the official public API, + // but can't be declared as protected, since it is not possible + // to declare template-specialised classes as friends. + // + inline void _DB_STL_CopyData(const self&dt2) + { + _DB_STL_CopyData_int(dt2.dbstl_my_value_); + } + + template + inline void _DB_STL_CopyData_int(const T&src) + { + dbstl_my_value_ = src; + } + + // Try to catch all types of pointers. + template + inline void _DB_STL_CopyData_int(T* const &src) + { + DbstlSeqWriter::copy_to_holder((ElementHolder *)this, + (T *)src); + } + + template + inline void _DB_STL_CopyData_int(const T* const &src) + { + DbstlSeqWriter::copy_to_holder((ElementHolder *)this, + (T *)src); + } + + template + inline void _DB_STL_CopyData_int(T* &src) + { + DbstlSeqWriter::copy_to_holder((ElementHolder *)this, + (T *)src); + } + + template + inline void _DB_STL_CopyData_int(const T*&src) + { + DbstlSeqWriter::copy_to_holder((ElementHolder *)this, + (T *)src); + } + + inline iterator_type* _DB_STL_GetIterator() const + { + return _DB_STL_itr_; + } + + inline int _DB_STL_GetData(ptype& d) const + { + d = dbstl_my_value_; + return 0; + } + + inline void _DB_STL_SetIterator(iterator_type*pitr) + { + _DB_STL_itr_ = pitr; + } + + inline void _DB_STL_SetData(const ptype&d) + { + _DB_STL_CopyData_int(d); + } + + inline void _DB_STL_SetDelItr() + { + _DB_STL_delete_itr_ = true; + } + + // The two member has to be public for DbstlSeqWriter to access, + // but can't be accessed by user. + size_t dbstl_str_buf_len_; + void *dbstl_str_buf_; // Stores a sequence, used when ptype is T* + + iterator_type *_DB_STL_itr_; + ptype dbstl_my_value_; + mutable bool _DB_STL_delete_itr_; +}; +#else +}; +#endif // DOXYGEN_CANNOT_SEE_THIS +//@} // Element_wrappers +//@} //dbstl_helper_classes + +// These operators help reading from and writing to iostreams, if the wrapped +// data type has iostream operators. +template +basic_istream<_CharT,_Traits>& +operator>>( basic_istream<_CharT,_Traits> & in, ElementRef&p) +{ + in>>(ddt)p; + return in; +} + +template +basic_ostream<_CharT,_Traits>& +operator<<( basic_ostream<_CharT,_Traits> & out, + const ElementRef&p) +{ + out<<(ddt)p; + return out; +} + +template +basic_istream<_CharT,_Traits>& +operator>>( basic_istream<_CharT,_Traits> & in, ElementHolder&p) +{ + in>>p._DB_STL_value(); + return in; +} + +template +basic_ostream<_CharT,_Traits>& +operator<<( basic_ostream<_CharT,_Traits> & out, + const ElementHolder&p) +{ + out< +class _exported DbstlSeqWriter +{ +public: + typedef ElementHolder HolderType; + static void copy_to_holder(HolderType *holder, T *src) + { + size_t i, slen, sql; + + if (src == NULL) { + free(holder->dbstl_str_buf_); + holder->dbstl_str_buf_ = NULL; + holder->dbstl_my_value_ = NULL; + return; + } + if (holder->dbstl_str_buf_len_ > DBSTL_MAX_DATA_BUF_LEN) { + free(holder->dbstl_str_buf_); + holder->dbstl_str_buf_ = NULL; + } + + typedef DbstlElemTraits DM; + typename DM::SequenceCopyFunct seqcpy = + DM::instance()->get_sequence_copy_function(); + typename DM::SequenceLenFunct seqlen = + DM::instance()->get_sequence_len_function(); + typename DM::ElemSizeFunct elemszf = + DM::instance()->get_size_function(); + + assert(seqcpy != NULL && seqlen != NULL); + sql = seqlen(src); + if (elemszf == NULL) + slen = sizeof(T) * (sql + 1); + else + // We don't add the terminating object if it has one. + // So the registered functions should take care of it. + for (slen = 0, i = 0; i < sql; i++) + slen += elemszf(src[i]); + + if (slen > holder->dbstl_str_buf_len_) + holder->dbstl_str_buf_ = DbstlReAlloc( + holder->dbstl_str_buf_, + holder->dbstl_str_buf_len_ = slen); + + seqcpy((T*)holder->dbstl_str_buf_, src, sql); + holder->dbstl_my_value_ = (T*)holder->dbstl_str_buf_; + } +}; + +template<> +class _exported DbstlSeqWriter +{ +public: + typedef ElementHolder HolderType; + static void copy_to_holder(HolderType *holder, char *src) + { + size_t slen; + + if (src == NULL) { + free(holder->dbstl_str_buf_); + holder->dbstl_str_buf_ = NULL; + holder->dbstl_my_value_ = NULL; + return; + } + if (holder->dbstl_str_buf_len_ > DBSTL_MAX_DATA_BUF_LEN) { + free(holder->dbstl_str_buf_); + holder->dbstl_str_buf_ = NULL; + } + + slen = sizeof(char) * (strlen(src) + 1); + if (slen > holder->dbstl_str_buf_len_) + holder->dbstl_str_buf_ = DbstlReAlloc( + holder->dbstl_str_buf_, + (u_int32_t)(holder->dbstl_str_buf_len_ = slen)); + + strcpy((char*)holder->dbstl_str_buf_, src); + holder->dbstl_my_value_ = (char*)holder->dbstl_str_buf_; + + } +}; + +template<> +class _exported DbstlSeqWriter +{ +public: + typedef ElementHolder HolderType; + static void copy_to_holder(HolderType *holder, wchar_t *src) + { + size_t slen; + + if (src == NULL) { + free(holder->dbstl_str_buf_); + holder->dbstl_str_buf_ = NULL; + holder->dbstl_my_value_ = NULL; + return; + } + if (holder->dbstl_str_buf_len_ > DBSTL_MAX_DATA_BUF_LEN) { + free(holder->dbstl_str_buf_); + holder->dbstl_str_buf_ = NULL; + } + + slen = sizeof(wchar_t) * (wcslen(src) + 1); + if (slen > holder->dbstl_str_buf_len_) + holder->dbstl_str_buf_ = DbstlReAlloc( + holder->dbstl_str_buf_, + holder->dbstl_str_buf_len_ = slen); + + wcscpy((wchar_t*)holder->dbstl_str_buf_, src); + holder->dbstl_my_value_ = (wchar_t*)holder->dbstl_str_buf_; + } +}; +END_NS + +#endif// !_DB_STL_KDPAIR_H diff --git a/lang/cxx/stl/dbstl_exception.h b/lang/cxx/stl/dbstl_exception.h new file mode 100644 index 00000000..a0ae4acb --- /dev/null +++ b/lang/cxx/stl/dbstl_exception.h @@ -0,0 +1,257 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_STL_EXCEPTION_H +#define _DB_STL_EXCEPTION_H + +#include +#include +#include + +#include +#include + +#include "dbstl_common.h" + +START_NS(dbstl) + +using std::cerr; + +// Internally used only. +void _exported throw_bdb_exception(const char *caller, int err_ret); +#define COPY_CONSTRUCTOR(type) type(const type& t) : DbstlException(t){} + +/** \defgroup Exception_classes_group dbstl exception classes +dbstl throws several types of exceptions on several kinds of errors, the +exception classes form a class hiarachy. First, there is the DbstlException, +which is the base class for all types of dbstl specific concrete exception +classes. +DbstlException inherits from the class DbException of Berkeley DB C++ API. Since +DbException class inherits from C++ STL exception base class std::exception, +you can make use of all Berkeley DB C++ and dbstl API exceptions in the same +way you use the C++ std::exception class. + +Besides exceptions of DbstlException and its subclasses, dbstl may also +throw exceptions of DbException and its subclasses, which happens when a +Berkeley DB call failed. So you should use the same way you catch Berkeley DB +C++ API exceptions when you want to catch exceptions throw by Berkeley DB +operations. + +When an exception occurs, dbstl initialize an local exception object on the +stack and throws the exception object, so you should catch an exception like +this: + +try { + // dbstl operations +} +catch(DbstlException ex){ + // Exception handling + throw ex; // Optionally throw ex again +} + +@{ +*/ + +/// Base class of all dbstl exception classes. It is derived from Berkeley +/// DB C++ API DbException class to maintain consistency with all +/// Berkeley DB exceptions. +/// +class _exported DbstlException : public DbException +{ +public: + explicit DbstlException(const char *msg) : DbException(msg) {} + DbstlException(const char *msg, int err) : DbException(msg, err) {} + DbstlException(const DbstlException&ex) : DbException(ex) {} + explicit DbstlException(int err) : DbException(err) {} + DbstlException(const char *prefix, const char *msg, int err) : + DbException(prefix, msg, err) {} + + const DbstlException& operator=(const DbstlException&exobj) + { + ASSIGNMENT_PREDCOND(exobj) + DbException::operator = + (dynamic_cast(exobj)); + return exobj; + } + + virtual ~DbstlException() throw(){} +}; + +/// Failed to allocate memory because memory is not enough. +class _exported NotEnoughMemoryException : public DbstlException +{ + size_t failed_size; // The size of the failed allocation. +public: + NotEnoughMemoryException(const char *msg, size_t sz) + : DbstlException(msg) + { + failed_size = sz; + } + + + NotEnoughMemoryException(const NotEnoughMemoryException &ex) + : DbstlException(ex) + { + this->failed_size = ex.failed_size; + } +}; + +/// The iterator has inconsistent status, it is unable to be used any more. +class _exported InvalidIteratorException : public DbstlException +{ +public: + InvalidIteratorException() : DbstlException("Invalid Iterator") + { + } + + explicit InvalidIteratorException(int error_code) : + DbstlException("Invalid Iterator", error_code) + { + } + COPY_CONSTRUCTOR(InvalidIteratorException) +}; + +/// The cursor has inconsistent status, it is unable to be used any more. +class _exported InvalidCursorException : public DbstlException +{ +public: + InvalidCursorException() : DbstlException("Invalid cursor") + { + } + + explicit InvalidCursorException(int error_code) : + DbstlException("Invalid cursor", error_code) + { + } + COPY_CONSTRUCTOR(InvalidCursorException) +}; + +/// The Dbt object has inconsistent status or has no valid data, it is unable +/// to be used any more. +class _exported InvalidDbtException : public DbstlException +{ +public: + InvalidDbtException() : DbstlException("Invalid Dbt object") + { + } + + explicit InvalidDbtException(int error_code) : + DbstlException("Invalid Dbt object", error_code) + { + } + COPY_CONSTRUCTOR(InvalidDbtException) +}; + +/// The assertions inside dbstl failed. The code file name and line number +/// will be passed to the exception object of this class. +class _exported FailedAssertionException : public DbstlException +{ +private: + char *err_msg_; +public: + virtual const char *what() const throw() + { + return err_msg_; + } + + FailedAssertionException(const char *fname, size_t lineno, + const char *msg) : DbstlException(0) + { + u_int32_t sz; + char *str; + + str = (char *)DbstlMalloc(sz = (u_int32_t)(strlen(msg) + + strlen(fname) + 128)); + _snprintf(str, sz, + "In file %s at line %u, %s expression failed", + fname, (unsigned int)lineno, msg); + err_msg_ = str; +#ifdef DEBUG + fprintf(stderr, "%s", str); +#endif + } + + FailedAssertionException(const FailedAssertionException&ex) : + DbstlException(ex) + { + err_msg_ = (char *)DbstlMalloc((u_int32_t) + strlen(ex.err_msg_) + 1); + strcpy(err_msg_, ex.err_msg_); + } + virtual ~FailedAssertionException() throw() + { + free(err_msg_); + } +}; + +/// There is no such key in the database. The key can't not be passed into +/// the exception instance because this class has to be a class template for +/// that to work. +class _exported NoSuchKeyException : public DbstlException +{ +public: + NoSuchKeyException() + : DbstlException("\nNo such key in the container.") + { + } + + COPY_CONSTRUCTOR(NoSuchKeyException) +}; + +/// Some argument of a function is invalid. +class _exported InvalidArgumentException : public DbstlException +{ +public: + explicit InvalidArgumentException(const char *errmsg) : + DbstlException(errmsg) + { +#ifdef DEBUG + cerr< +class _exported DbstlHeapObject : public DbstlGlobalInnerObject +{ +private: + typedef DbstlHeapObject self; + T *obj; + + // Only allow creating to heap. + DbstlHeapObject(T *obj1) { obj = obj1; } +public: + static self *instance(T *obj1) { return new self(obj1); } + virtual ~DbstlHeapObject() { delete obj; } +}; // DbstlHeapObject + +END_NS +#endif // !_DB_STL_GLOBAL_INNER_OBJECT_ + diff --git a/lang/cxx/stl/dbstl_map.h b/lang/cxx/stl/dbstl_map.h new file mode 100644 index 00000000..b118403e --- /dev/null +++ b/lang/cxx/stl/dbstl_map.h @@ -0,0 +1,3395 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_STL_DB_MAP_H_ +#define _DB_STL_DB_MAP_H_ + +#include + +#include "dbstl_common.h" +#include "dbstl_dbc.h" +#include "dbstl_container.h" +#include "dbstl_resource_manager.h" +#include "dbstl_element_ref.h" +#include "dbstl_base_iterator.h" + +START_NS(dbstl) + +using std::pair; +using std::make_pair; +using std::string; +// Forward declarations, DO NOT delete the default argument values +// because class templates defintions need them. No need for _export here. +// +template +class _DB_STL_set_value; + +template > +class db_map_iterator; + +template , Typename iterator_t = + db_map_iterator > +class db_map; + +template, Typename iterator_t = + db_map_iterator > +class db_multimap; + +template +class db_set_base_iterator; + +template > +class db_set_iterator; + +template > +class db_set; + +template > +class db_multiset; + +/** \ingroup dbstl_iterators +@{ +\defgroup db_map_iterators Iterator classes for db_map and db_multimap. +db_map has two iterator class templates -- db_map_base_iterator and +db_map_iterator. They are the const iterator class and iterator class for +db_map and db_multimap. db_map_iterator inherits from db_map_base_iterator. + +The two classes have identical behaviors to std::map::const_iterator and +std::map::iterator respectively. Note that the common public member function +behaviors are described in the db_base_iterator section. + +The differences between the two classes are that the db_map_base_iterator can +only be used to read its referenced value, while db_map_iterator allows both +read and write access. If your access pattern is readonly, it is strongly +recommended that you use the const iterator because it is faster and more +efficient. +@{ +*/ +////////////////////////////////////////////////////////////////////// +// db_map_base_iterator class definition +// +// This class is a const iterator class for db_map and db_multimap, it can +// be used only to read data under the iterator, can't be used to write. +// +// Iterator const-ness implementation: +// +// const iterators can not update key/data pairs, other than this, +// they can do anything else like non-const iterators, so we define +// db_map_base_iterator to be the const iterator which can only be used +// to read its underlying key/data pair, but not updating them; We +// derive the db_map_iterator from the base iterator to be the +// read-write iterator. We also maintain a "readonly" property in all +// iterators so that users can specify a db_map_iterator to be +// read only. db_map_base_iterator is more efficient to read data then +// db_map_iterator, so read only accesses are strongly recommended to be +// done using a const iterator. +// +template +class _exported db_map_base_iterator : public + db_base_iterator +{ +protected: + typedef db_map_base_iterator self; + typedef db_base_iterator base; + using base::replace_current_key; +public: + typedef kdt key_type; + typedef ddt data_type; + typedef pair value_type; + // Not used in this class, but required to satisfy + // db_reverse_iterator type extraction. + typedef ptrdiff_t difference_type; + typedef difference_type distance_type; + typedef value_type& reference; + typedef value_type* pointer; + typedef value_type value_type_wrap; + // We have to use standard iterator tags to match the parameter + // list of stl internal functions, we can't use our own tag + // classes, so we don't write tag classes in dbstl. + // + typedef std::bidirectional_iterator_tag iterator_category; + + //////////////////////////////////////////////////////////////////// + // + // Begin public constructors and destructor. + /// @name Constructors and destructor + /// Do not create iterators directly using these constructors, but + /// call db_map::begin or db_multimap_begin to get instances of + /// this class. + /// \sa db_map::begin() db_multimap::begin() + //@{ + /// Copy constructor. + /// \param vi The other iterator of the same type to initialize this. + db_map_base_iterator(const self& vi) + : db_base_iterator(vi) + { + // Lazy-dup another cursor, cursor to iterator mapping + // is 1 to 1. + pcsr_ = vi.pcsr_; + curpair_base_.first = vi.curpair_base_.first; + curpair_base_.second = vi.curpair_base_.second; + } + + /// Base copy constructor. + /// \param vi Initialize from a base class iterator. + db_map_base_iterator(const base& vi) : base(vi), + pcsr_(new cursor_type(vi.get_bulk_retrieval(), + vi.is_rmw(), vi.is_directdb_get())) + { + + } + + /// Constructor. + /// \param powner The container which creates this iterator. + /// \param b_bulk_retrieval The bulk read buffer size. 0 means + /// bulk read disabled. + /// \param rmw Whether set DB_RMW flag in underlying cursor. + /// \param directdbget Whether do direct database get rather than + /// using key/data values cached in the iterator whenever read. + /// \param readonly Whether open a read only cursor. Only effective + /// when using Berkeley DB Concurrent Data Store. + explicit db_map_base_iterator(db_container*powner, + u_int32_t b_bulk_retrieval = 0, bool rmw = false, + bool directdbget = true, bool readonly = false) + : db_base_iterator( + powner, directdbget, readonly, b_bulk_retrieval, rmw), + pcsr_(new cursor_type(b_bulk_retrieval, rmw, directdbget)) + { + } + + /// Default constructor, dose not create the cursor for now. + db_map_base_iterator() + { + } + + // Use virtual because ElementRef<> uses a db_base_iterator* pointer + // to refer to the iterator, and also use "dead_" flag to avoid + // multiple calls to the same destructor by ~ElementRef<>(). + /// Destructor. + virtual ~db_map_base_iterator() + { + this->dead_ = true; + if (pcsr_) + pcsr_->close(); + } + //@} + + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // Begin functions that shift iterator position. + // + // Do not throw exceptions here because it is likely and normal + // to iterate to the "end iterator". + // + /// @name Iterator increment movement functions. + /// The two functions moves the iterator one element backward, so that + /// the element it sits on has a bigger key. The btree/hash key + /// comparison routine determines which key is greater. + /// Use ++iter rather than iter++ where possible to avoid two useless + /// iterator copy constructions. + //@{ + /// Pre-increment. + /// \return This iterator after incremented. + inline self& operator++() + { + next(); + + return *this; + } + + /// Post-increment. + /// \return Another iterator having the old value of this iterator. + inline self operator++(int) + { + self itr = *this; + + next(); + + return itr; + } + //@} + + /// @name Iterator decrement movement functions. + /// The two functions moves the iterator one element forward, so that + /// the element it sits on has a smaller key. The btree/hash key + /// comparison routine determines which key is greater. + /// Use --iter rather than iter-- where possible to avoid two useless + /// iterator copy constructions. + //@{ + /// Pre-decrement. + /// \return This iterator after decremented. + inline self& operator--() + { + prev(); + + return *this; + } + + /// Post-decrement. + /// \return Another iterator having the old value of this iterator. + self operator--(int) + { + self itr = *this; + prev(); + + return itr; + } + //@} + + /// Assignment operator. This iterator will point to the same key/data + /// pair as itr, and have the same configurations as itr. + /// \param itr The right value of assignment. + /// \return The reference of itr. + /// \sa db_base_iterator::operator=(const self&) + // We will duplicate the Dbc cursor here. + inline const self& operator=(const self&itr) + { + ASSIGNMENT_PREDCOND(itr) + base::operator=(itr); + + curpair_base_.first = itr.curpair_base_.first; + curpair_base_.second = itr.curpair_base_.second; + if (pcsr_) + pcsr_->close(); + pcsr_ = itr.pcsr_; + + return itr; + } + + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // Begin iterator comparison functions. + // + /// \name Compare operators. + /// Only equal comparison is supported. + //@{ + /// Compare two iterators. + /// Two iterators compare equal when they are both invalid or + /// both valid and underlying cursor compare equal(i.e. sitting on the + /// same key/data pair). + // + // Note that the iterator itr or this iterator may be an invalid + // one, i.e. its this->itr_status_ is INVALID_ITERATOR_POSITION. + // We do not distinguish between end and rend iterators although + // we are able to do so, because they are never compared together. + /// Equal comparison operator. + /// \param itr The iterator to compare against. + /// \return Returns true if equal, false otherwise. + inline bool operator==(const self&itr) const + { + COMPARE_CHECK(itr) + if (((itr.itr_status_ == this->itr_status_) && + (this->itr_status_ == INVALID_ITERATOR_POSITION)) || + ((itr.itr_status_ == this->itr_status_) && + (pcsr_->compare((itr.pcsr_.base_ptr())) == 0))) + return true; + return false; + + } + + /// Unequal comparison operator. + /// \param itr The iterator to compare against. + /// \return Returns false if equal, true otherwise. + /// \sa bool operator==(const self&itr) const + inline bool operator!=(const self&itr) const + { + return !(*this == itr) ; + } + //@} + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // Begin functions that retrieve values from the iterator. + // + // curpair_base_ is always kept updated on iterator movement, but if + // directdb_get_ is true, curpair_base_ is also updated here before + // making use of the value it references. + // Even if this iterator is invalid, this call is allowed, the + // default value of type T is returned. + // + // Note that the returned reference can only be used to read data, + // can't be used to update data. + /// \name Functions that retrieve values from the iterator. + //@{ + /// Dereference operator. + /// Return the reference to the cached data element, which is an + /// pair. You can only read its referenced data via + /// this iterator but can not update it. + /// \return Current data element reference object, i.e. ElementHolder + /// or ElementRef object. + inline reference operator*() const + { + + if (this->directdb_get_) { + csrddt d; + pcsr_->get_current_key_data(curpair_base_.first, d); + assign_second0(curpair_base_, d); + } + // Returning reference, no copy construction. + return curpair_base_; + } + + // curpair_base_ is always kept updated on iterator movement, but if + // directdb_get_ is true, curpair_base_ is also updated here before + // making use of the value it references. + // Even if this iterator is invalid, this call is allowed, the + // default value of type T is returned. + // + // Note that the returned reference can only be used to read data, + // can't be used to update data. + /// Arrow operator. + /// Return the pointer to the cached data element, which is an + /// pair. You can only read its referenced data via + /// this iterator but can not update it. + /// \return Current data element reference object's address, i.e. + /// address of ElementHolder or ElementRef object. + inline pointer operator->() const + { + + if (this->directdb_get_) { + csrddt d; + pcsr_->get_current_key_data(curpair_base_.first, d); + assign_second0(curpair_base_, d); + } + + return &curpair_base_; + } + //@} + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // Begin dbstl specific functions. + // + // Refresh the underlying cursor's current data and this object's + // curpair_base_. It need to be called only if directdb_get is + // disabled, and other iterators updated + // the key/data pair this iterator points to and we are about to use + // this iterator to access that key/data pair. + // If direct db get is enabled, this method never needs to be called. + /// @name dbstl specific functions + //@{ + /// \brief Refresh iterator cached value. + /// \param from_db If not doing direct database get and this parameter + /// is true, we will retrieve data directly from db. + /// \sa db_base_iterator::refresh(bool) + virtual int refresh(bool from_db = true) const + { + csrddt d; + + if (from_db && !this->directdb_get_) + pcsr_->update_current_key_data_from_db( + DbCursorBase::SKIP_NONE); + pcsr_->get_current_key_data(curpair_base_.first, d); + assign_second0(curpair_base_, d); + + return 0; + } + + // By calling this function, users can choose to close the underlying + // cursor before iterator destruction to get better performance + // potentially. + /// \brief Close underlying Berkeley DB cursor of this iterator. + /// \sa db_base_iterator::close_cursor() const + inline void close_cursor() const + { + if (pcsr_) + pcsr_->close(); + } + + /// Iterator movement function. + /// Move this iterator to the specified key k, by default moves + /// exactly to k, and update cached data element, you can + /// also specify DB_SET_RANGE, to move to the biggest key smaller + /// than k. The btree/hash key comparison routine determines which + /// key is bigger. When the iterator is on a multiple container, + /// move_to will move itself to the first key/data pair of the + /// identical keys. + /// \param k The target key value to move to. + /// \param flag Flags available: DB_SET(default) or DB_SET_RANGE. + /// DB_SET will move this iterator exactly at k; DB_SET_RANGE moves + /// this iterator to k or the smallest key greater than k. If fail + /// to find such a key, this iterator will become invalid. + /// \return 0 if succeed; non-0 otherwise, and this iterator becomes + /// invalid. Call db_strerror with the return value to get the error + /// message. + inline int move_to(const kdt& k, int flag = DB_SET) const + { + int ret; + // Use tmpk2 to avoid k being modified. + kdt tmpk2 = k; + + this->itr_status_ = (ret = pcsr_->move_to(tmpk2, flag)); + if (ret != 0) { + this->inval_pos_type_ = base::IPT_UNSET; + return ret; + } + + refresh(); + + return ret; + } + + /// Modify bulk buffer size. + /// Bulk read is enabled when creating an + /// iterator, so users later can only modify the bulk buffer size + /// to another value, but can't enable/disable bulk read while an + /// iterator is already alive. + /// \param sz The new size of the bulk read buffer of this iterator. + /// \return Returns true if succeeded, false otherwise. + /// \sa db_base_iterator::set_bulk_buffer(u_int32_t ) + bool set_bulk_buffer(u_int32_t sz) + { + bool ret = this->pcsr_->set_bulk_buffer(sz); + if (ret) + this->bulk_retrieval_ = + this->pcsr_->get_bulk_bufsize(); + return ret; + } + + /// \brief Get bulk retrieval buffer size in bytes. + /// \return Return current bulk buffer size or 0 if bulk retrieval is + /// not enabled. + /// \sa db_base_iterator::get_bulk_bufsize() + u_int32_t get_bulk_bufsize() + { + this->bulk_retrieval_ = pcsr_->get_bulk_bufsize(); + return this->bulk_retrieval_; + } + //@} + //////////////////////////////////////////////////////////////////// + +protected: + + // The cursor_type is used to directly return the pair object, + // rather than a reference to it + typedef DbCursor cursor_type; + + friend class db_map_iterator >; + friend class db_map_iterator >; + // Use friend classes to hide internal members from users. + friend class db_map >; + friend class db_map, + db_set_iterator > >; + friend class db_map, ElementHolder, + db_set_iterator > >; + + friend class db_set >; + friend class db_set_iterator >; + friend class db_multiset >; + friend class db_multimap >; + friend class db_multimap, + ElementHolder, db_set_iterator > >; + + friend class db_map >; + friend class db_map, ElementRef, + db_set_iterator > >; + + friend class db_set >; + friend class db_set_iterator >; + friend class db_multiset >; + friend class db_multimap >; + friend class db_multimap, ElementRef, + db_set_iterator > >; + + + //////////////////////////////////////////////////////////////////// + // Begin db_map_base_iterator data members. + // + // Cursor of this iterator, note that each db_map_base_iterator has a + // unique DbCursor, not shared with any other iterator, and when copy + // constructing or assigning, the cursor is duplicated + // when it is actually used to access db. + // + mutable LazyDupCursor > pcsr_; + + // In order for std::map style iterator to work, we need a pair + // here to store the key-value pair this iterator currently points + // to in the db_map. + // + // curpair_base_ is always kept updated on every cursor/iterator + // movement and initialized to point to the first key-value pair when + // db_map<>::begin() is called. + // + mutable value_type curpair_base_; + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // Begin internal helper functions. + // + // Open the iterator and its cursor. + // + void open() const + { + u_int32_t oflags = 0, coflags = 0; + int ret; + Db *pdb = this->owner_->get_db_handle(); + DbEnv *penv = pdb->get_env(); + + coflags = this->owner_->get_cursor_open_flags(); + assert(this->owner_ != NULL); + if (!this->read_only_ && penv != NULL) { + BDBOP((penv->get_open_flags(&oflags)), ret); + if ((oflags & DB_INIT_CDB) != 0) + this->owner_->set_cursor_open_flags(coflags |= + DB_WRITECURSOR); + } + if (!pcsr_) + pcsr_.set_cursor(new DbCursor( + this->bulk_retrieval_, + this->rmw_csr_, this->directdb_get_)); + this->itr_status_ = pcsr_->open((db_container*)this->owner_, + coflags); + + } + + // Move this iterator as well as the underlying Dbc* cursor to + // first element and update cur_pair_. + // + int first() const + { + + assert(this->owner_ != NULL); + this->itr_status_ = pcsr_->first(); + if (this->itr_status_ == 0) + refresh(); + else + this->inval_pos_type_ = base::IPT_UNSET; + + return this->itr_status_; + + } + + // Move this iterator as well as the underlying Dbc* cursor + // to last effective(valid) element and update cur_pair_. + // + int last() const + { + + assert(this->owner_ != NULL); + this->itr_status_ = pcsr_->last(); + if (this->itr_status_ == 0) + refresh(); + else + this->inval_pos_type_ = base::IPT_UNSET; + + return this->itr_status_; + } + + // Move this iterator as well as the underlying Dbc* cursor + // to next element, then update its position flags and cur_pair_. + // + int next(int flags = DB_NEXT) const + { + + assert(this->owner_ != NULL); + + if (this->itr_status_ == INVALID_ITERATOR_POSITION) { + if (this->inval_pos_type_ == base::IPT_BEFORE_FIRST) { + // This rend itr must have an non-NULL owner. + open(); + // rend itr can go back to first element. + this->itr_status_ = first(); + } else if (this->inval_pos_type_ == base::IPT_UNSET) { + THROW0(InvalidIteratorException); + } + // Else, return itr_status_ in last line. + } else { + + this->itr_status_ = pcsr_->next(flags); + if (this->itr_status_ == 0) + refresh(); + else + this->inval_pos_type_ = base::IPT_AFTER_LAST; + + } + + return this->itr_status_; + } + + // Move this iterator as well as the underlying Dbc* cursor + // to previous element. + // + int prev(int flags = DB_PREV) const + { + + assert(this->owner_ != NULL); + if (this->itr_status_ == INVALID_ITERATOR_POSITION) { + if (this->inval_pos_type_ == base::IPT_AFTER_LAST) { + // This rend itr must have an non-NULL owner. + open(); + // end itr can go back to last element. + this->itr_status_ = last(); + } else if (this->inval_pos_type_ == base::IPT_UNSET) { + THROW0(InvalidIteratorException); + } + // Else, return itr stat in last line. + } else { + this->itr_status_ = pcsr_->prev(flags); + if (this->itr_status_ == 0) + refresh(); + else + this->inval_pos_type_ = base::IPT_BEFORE_FIRST; + + } + + return this->itr_status_; + } + + void set_curpair_base(const kdt& k, const csrddt &d) const + { + curpair_base_.first = k; + assign_second0(curpair_base_, d); + } + + //////////////////////////////////////////////////////////////////// + +protected: // Do not remove this line, otherwise assign_second0 may be public. +#ifndef DOXYGEN_CANNOT_SEE_THIS +#if NO_MEMBER_FUNCTION_PARTIAL_SPECIALIZATION +};// end of db_map_base_iterator<> +template +void assign_second0(pair& v, const datadt& d) +{ + v.second = d; +} + +template +void assign_second0(pair &v, + const _DB_STL_set_value& + /* d unused, use v.first to assign v.second */) +{ + v.second = v.first; +} +#else + +template +inline void assign_second0(value_type& v, const datadt& d) const +{ + v.second = d; +} + +template<> +inline void +assign_second0(value_type &v, const _DB_STL_set_value& + /* d unused, use v.first to assign v.second */) const +{ + v.second = v.first; +} + +};// end of db_map_base_iterator<> + +#endif + +#else +}; +#endif // DOXYGEN_CANNOT_SEE_THIS +//@} // db_map_iterators +//@} // dbstl_iterators + + +#if NO_MEMBER_FUNCTION_PARTIAL_SPECIALIZATION +template +void assign_second0(pair& v, const datadt& d) ; +#endif + +////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////// +// +// db_map_iterator class template definition +// +// db_map_iterator is the iterator class template for db_map and +// db_multimap, it is also the base class for db_set_iterator. It can be +// used to both read and write the database. +// +// Template parameters info: +// kdt is "key data type", ddt is "data data type", value_type_sub is +// either ElementRef (by default, in this case ElementRef inherits +// from ddt, so ddt must not be a primitive type) or ElementHolder, +// in this case ElementHolder has a data member of type ddt, so suitable for +// primitive types, but don't apply it to classes otherwise you can't access +// members like this : *iterator.member = value. +// +/// \ingroup dbstl_iterators +//@{ +/// \ingroup db_map_iterators +//@{ +template +class _exported db_map_iterator : public + db_map_base_iterator +{ +protected: + typedef db_map_iterator self; + typedef typename value_type_sub::content_type realddt; + using db_base_iterator:: + replace_current_key; +public: + typedef kdt key_type; + typedef ddt data_type; + typedef pair value_type; + typedef pair value_type_wrap; + // Not used in this class, but required to satisfy + // db_reverse_iterator type extraction. + typedef ptrdiff_t difference_type; + typedef difference_type distance_type; + typedef value_type_wrap& reference; + typedef value_type_wrap* pointer; + + // We have to use standard iterator tags to match the parameter + // list of stl internal functions, we can't use our own tag + // classes, so we don't write tag classes in dbstl. + // + typedef std::bidirectional_iterator_tag iterator_category; + + // Refresh the underlying cursor's current data and this object's + // curpair_. It need to be called only if other iterators updated the + // key/data pair this iterator points to and we are about to use + // this iterator to access that key/data pair. If direct db get is + // enabled, this method never needs to be called. + /// \brief Refresh iterator cached value. + /// \param from_db If not doing direct database get and this parameter + /// is true, we will retrieve data directly from db. + /// \sa db_base_iterator::refresh(bool ) + virtual int refresh(bool from_db = true) const + { + kdt k; + ddt d; + + if (from_db && !this->directdb_get_) + this->pcsr_->update_current_key_data_from_db( + DbCursorBase::SKIP_NONE); + this->pcsr_->get_current_key_data(k, d); + curpair_.first = k; + assign_second(curpair_, d); + this->set_curpair_base(k, d); + + return 0; + } + + //////////////////////////////////////////////////////////////// + // Begin constructors and destructor definitions. + /// \name Constructors and destructor + /// Do not create iterators directly using these constructors, but + /// call db_map::begin or db_multimap_begin to get instances of + /// this class. + /// \sa db_map::begin() db_multimap::begin() + //@{ + /// Copy constructor. + /// \param vi The other iterator of the same type to initialize this. + db_map_iterator(const db_map_iterator& vi) + : db_map_base_iterator(vi) + { + // Lazy-dup another cursor, cursor to iterator mapping + // is 1 to 1. + curpair_.first = vi.curpair_.first; + curpair_.second._DB_STL_CopyData(vi.curpair_.second); + curpair_.second._DB_STL_SetIterator(this); + } + + /// Base copy constructor. + /// \param vi Initialize from a base class iterator. + db_map_iterator(const db_map_base_iterator& vi) : + db_map_base_iterator(vi) + + { + curpair_.second._DB_STL_SetIterator(this); + curpair_.first = vi->first; + curpair_.second._DB_STL_CopyData(vi->second); + } + + /// Constructor. + /// \param powner The container which creates this iterator. + /// \param b_bulk_retrieval The bulk read buffer size. 0 means + /// bulk read disabled. + /// \param brmw Whether set DB_RMW flag in underlying cursor. + /// \param directdbget Whether do direct database get rather than + /// using key/data values cached in the iterator whenever read. + /// \param b_read_only Whether open a read only cursor. Only effective + /// when using Berkeley DB Concurrent Data Store. + explicit db_map_iterator(db_container*powner, + u_int32_t b_bulk_retrieval = 0, bool brmw = false, + bool directdbget = true, bool b_read_only = false) + : db_map_base_iterator + (powner, b_bulk_retrieval, brmw, directdbget, b_read_only) + { + curpair_.second._DB_STL_SetIterator(this); + } + + /// Default constructor, dose not create the cursor for now. + db_map_iterator() : db_map_base_iterator() + { + curpair_.second._DB_STL_SetIterator(this); + } + + // Use virtual because ElementRef<> uses a db_base_iterator* pointer + // to refer to the iterator, and also use "dead_" flag to avoid + // multiple call to the same destructor by ~ElementRef<>(). + /// Destructor. + virtual ~db_map_iterator() + { + // Required here though set in base destructor too. + this->dead_ = true; + } + //@} + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // Begin functions that shift iterator position. + // + // Do not throw exceptions here because it is likely and normal + // to iterate to the "end iterator". + /// \name Iterator movement operators. + //@{ + /// Pre-increment \sa db_map_base_iterator::operator++() + /// \return This iterator after incremented. + inline self& operator++() + { + this->next(); + + return *this; + } + + /// Post-increment \sa db_map_base_iterator::operator++(int) + /// \return Another iterator having the old value of this iterator. + inline self operator++(int) + { + self itr = *this; + + this->next(); + + return itr; + } + + /// Pre-decrement \sa db_map_base_iterator::operator--() + /// \return This iterator after decremented. + inline self& operator--() + { + this->prev(); + + return *this; + } + + /// Post-decrement \sa db_map_base_iterator::operator--(int) + /// \return Another iterator having the old value of this iterator. + self operator--(int) + { + self itr = *this; + this->prev(); + + return itr; + } + //@} + // Assignment operator, we will duplicate the Dbc cursor here. + /// Assignment operator. This iterator will point to the same key/data + /// pair as itr, and have the same configurations as itr. + /// \param itr The right value of assignment. + /// \return The reference of itr. + /// \sa db_base_iterator::operator=(const self&) + inline const self& operator=(const self&itr) + { + ASSIGNMENT_PREDCOND(itr) + base::operator=(itr); + + curpair_.first = itr.curpair_.first; + + // Only copy data from itr.curpair_ into curpair_, + // don't store into db. Note that we can not assign + // itr.curpair_ to curpair_ simply by curpair_ = itr.curpair_, + // otherwise, ElementRef<>::operator= is called, which will + // update the data element referenced by this iterator. + // + curpair_.second._DB_STL_CopyData(itr.curpair_.second); + + return itr; + } + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // Begin functions that retrieve values from the iterator. + // + // curpair_base_ is always kept updated on iterator movement, but if + // directdb_get_ is true, curpair_base_ is also updated here before + // making use of the value it references. + // Even if this iterator is invalid, this call is allowed, the + // default value of type T is returned. + // + /// \name Functions that retrieve values from the iterator. + //@{ + /// Dereference operator. + /// Return the reference to the cached data element, which is an + /// pair > object if T is a class type or an + /// pair > object if T is a C++ primitive + /// data type. + /// \return Current data element reference object, i.e. ElementHolder + /// or ElementRef object. + inline reference operator*() const + { + + if (this->directdb_get_) { + ddt d; + this->pcsr_->get_current_key_data(curpair_.first, d); + assign_second(curpair_, d); + } + + return curpair_;// returning reference, no copy construction + } + + // curpair_base_ is always kept updated on iterator movement, but if + // directdb_get_ is true, curpair_base_ is also updated here before + // making use of the value it references. + // Even if this iterator is invalid, this call is allowed, the + // default value of type T is returned. + /// Arrow operator. + /// Return the pointer to the cached data element, which is an + /// pair > object if T is a class type or an + /// pair > object if T is a C++ primitive + /// data type. + /// \return Current data element reference object's address, i.e. + /// address of ElementHolder or ElementRef object. + inline pointer operator->() const + { + + if (this->directdb_get_) { + ddt d; + this->pcsr_->get_current_key_data(curpair_.first, d); + assign_second(curpair_, d); + } + return &curpair_; + } + //@} + //////////////////////////////////////////////////////////////////// + +//@} // db_map_iterators +//@} // dbstl_iterators +protected: + // The cursor_type is used to directly return the pair object, + // rather than a reference to it. + typedef DbCursor cursor_type; + typedef db_map_base_iterator base; + typedef db_map_base_iterator const_version; + + // Use friend classes to hide internal members from users. + friend class db_map; + friend class db_map >; + friend class db_set; + friend class db_set_iterator; + friend class db_multiset; + friend class db_multimap; + friend class db_multimap, value_type_sub, + db_set_iterator >; + + //////////////////////////////////////////////////////////////// + // Begin db_map_iterator data members. + // + // In order for std::map style iterator to work, we need a pair + // here to store the key-value pair this iterator currently points + // to in the db_map. + // + // curpair_ is always kept updated on every cursor/iterator movement + // and initialized to point to the first key-value pair when + // db_map<>::begin() is called. + // + mutable value_type_wrap curpair_; + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin internal helper functions. + // + // Called by ElementRef<> object when this iterator belongs to the + // object---The only situation is in db_container::operator[] which + // has to return an ElementRef/Holder object A, and its iterator has + // to survive until A is destructed. + virtual void delete_me() const + { + if (!this->dead_) + delete this; + } + + // Duplicate this iterator. + virtual self* dup_itr() const + { + self *itr = new self(*this); + // The curpair_ of itr does not delete itr, the independent + // one does. + //itr->curpair_.second._DB_STL_SetDelItr(); + return itr; + } + + // Replace the current key/data pair's data pointed to by this + // iterator's underlying Dbc* cursor with the parameter d. + // + virtual int replace_current( + const typename value_type_sub::content_type& d) + { + int ret; + + if (this->read_only_) { + THROW(InvalidFunctionCall, ( +"db_map_iterator<>::replace_current can't be called via a read only iterator")); + } + ret = this->pcsr_->replace(d); + + return ret; + } + + // Used by set iterator to store another different key : + // remove the previous one then insert the new one. + // It has to be defined in this class because db_set_iterator + // inherits from db_map_iterator but we have no polymorphism when + // using stl because the object are always used rather the + // pointer/reference. + // + virtual int replace_current_key(const kdt& k) + { + int ret; + + if (this->read_only_) { + THROW(InvalidFunctionCall, ( +"db_map_iterator<>::replace_current_key can't be called via a read only iterator")); + } + ret = this->pcsr_->replace_key(k); + + return ret; + } + + //////////////////////////////////////////////////////////////// + + +protected: // Do not remove this line, otherwise assign_second may be public. +#ifndef DOXYGEN_CANNOT_SEE_THIS +#if NO_MEMBER_FUNCTION_PARTIAL_SPECIALIZATION +};// end of db_map_iterator<> + +template +void assign_second(pair& v, const datadt& d) +{ + v.second._DB_STL_CopyData(d); +} + +template +void assign_second(pair &v, + const _DB_STL_set_value& + /* d unused, use v.first to assign v.second */) +{ + v.second._DB_STL_CopyData(v.first); +} +#else + +template +inline void assign_second(value_type_wrap& v, const datadt& d) const +{ + v.second._DB_STL_CopyData(d); +} + +template<> +inline void +assign_second(value_type_wrap &v, const _DB_STL_set_value& + /* d unused, use v.first to assign v.second */) const +{ + v.second._DB_STL_CopyData(v.first); +} + +};// end of db_map_iterator<> + +#endif +#else +}; +#endif // DOXYGEN_CANNOT_SEE_THIS +u_int32_t hash_default(Db *dbp, const void *key, u_int32_t len); + +////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////// +// +// db_map container class definition +/// \ingroup dbstl_containers +//@{ +/// db_map has identical methods to std::map and the semantics for each +/// method is identical to its std::map counterpart, except that it stores data +/// into underlying Berkeley DB btree or hash database. Passing a database +/// handle of btree or hash type creates a db_map equivalent to std::map and +/// std::hashmap respectively. +/// Database(dbp) and environment(penv) handle requirement(applies to all +/// constructors in this class template): +/// 0. The dbp is opened inside the penv environment. Either one of the two +/// handles can be NULL. If dbp is NULL, an anonymous database is created +/// by dbstl. +/// 1. Database type of dbp should be DB_BTREE or DB_HASH. +/// 2. No DB_DUP or DB_DUPSORT flag set in dbp. +/// 3. No DB_RECNUM flag set in dbp. +/// 4. No DB_TRUNCATE specified in dbp's database open flags. +/// 5. DB_THREAD must be set if you are sharing the dbp across +/// multiple threads directly, or indirectly by sharing the container object +/// across multiple threads. +/// \param kdt The key data type. +/// \param ddt The data data type. db_map stores key/data pairs. +/// \param value_type_sub Do not specify anything if ddt type is a +/// class/struct type; Otherwise, specify ElementHolder to it. +/// \param iterator_t Never specify anything to this type parameter. It is +/// only used internally. +/// \sa db_container db_container(Db*, DbEnv*) db_container(const db_container&) +template +class _exported db_map : public db_container +{ + +public: + // iterator_t is default argument, see forward declaration at the + // head of this file + typedef iterator_t iterator; + typedef typename iterator::const_version const_iterator; + typedef db_reverse_iterator reverse_iterator; + typedef db_reverse_iterator + const_reverse_iterator; + typedef kdt key_type; + typedef ddt data_type; + typedef value_type_sub data_type_wrap; + typedef pair value_type; + typedef pair value_type_wrap; + typedef const value_type const_value_type; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + // The following three types are not used in db_map, but we define + // them to conform to stl specifications. + typedef value_type_wrap& reference; + typedef const value_type& const_reference; + typedef value_type_wrap* pointer; +protected: + typedef db_map self; + typedef typename value_type_sub::content_type realddt; + + // This constructor is for db_multimap's constructors to call, + // because other constructors of this class will verify db handles + // and create one if needed. We need a special one that don't do + // anything. The BulkRetrievalOption is randomly picked, no special + // implications at all. + db_map(BulkRetrievalOption& arg){ delete &arg; } +public: + //////////////////////////////////////////////////////////////// + // Begin inner class definitions. + // + // key_compare class definition, it is defined as an inner class, + // using underlying btree/hash db's compare function + // + class key_compare + { + private: + Db*pdb; + public: + key_compare(Db*pdb1) + { + pdb = pdb1; + } + bool operator()(const kdt& k1, const kdt& k2) const + { + return compare_keys(pdb, k1, k2); + } + + }; // key_compare class definition + + // value_compare class definition, it is defined as an inner class, + // using key_compare class to do comparison. + // + // The difference between key_compare and value_compare is the + // parameter its operator() function accepts, see the function + // signature. + // + class value_compare + { + key_compare kc; + public: + value_compare(Db*pdb) : kc(pdb) + { + + } + + bool operator()(const value_type& v1, + const value_type& v2) const + { + + return kc(v1.first, v2.first); + } + + }; // value_compare class definition + + class hasher + { + private: + Db*pdb; + public: + hasher(Db*db){pdb = db;} + size_t operator()(const kdt&k) const + { + DBTYPE dbtype; + int ret; + + assert(pdb != NULL); + ret = pdb->get_type(&dbtype); + assert(ret == 0); + if (dbtype != DB_HASH) { + THROW(InvalidFunctionCall, ( + "db_map<>::hasher")); + } + h_hash_fcn_t hash = NULL; + BDBOP(pdb->get_h_hash(&hash), ret); + if (hash == NULL) + hash = hash_default; + return hash(pdb, &k, sizeof(k)); + } + }; // hasher + + class key_equal + { + private: + Db*pdb; + public: + key_equal(Db*db){pdb = db;} + bool operator()(const kdt& kk1, const kdt&kk2) const + { + DBTYPE dbtype; + kdt k1 = kk1, k2 = kk2; + int ret; + + dbstl_assert(pdb != NULL); + ret = pdb->get_type(&dbtype); + dbstl_assert(ret == 0); + if (dbtype != DB_HASH) { + THROW(InvalidFunctionCall, ( + "db_map<>::key_equal")); + } + + db_compare_fcn_t comp = NULL; + BDBOP(pdb->get_h_compare(&comp), ret); + if (comp == NULL) + return memcmp(&kk1, &kk2, sizeof(kdt)) == 0; + Dbt kd1(&k1, sizeof(k1)), kd2(&k2, sizeof(k2)); + + return comp(pdb, &kd1, &kd2) == 0; + + + } + + };// key_equal + //////////////////////////////////////////////////////////////// + + /// Function to get key compare functor. + /// Used when this container is a hash_map, hash_multimap, + /// hash_set or hash_multiset equivalent. + /// \return key_equal type of compare functor. + /// \sa http://www.sgi.com/tech/stl/hash_map.html + inline key_equal key_eq() const + { + key_equal ke(this->get_db_handle()); + return ke; + } + + /// Function to get hash key generating functor. + /// Used when this container is a hash_map, hash_multimap, + /// hash_set or hash_multiset equivalent. + /// \return The hash key generating functor. + /// \sa http://www.sgi.com/tech/stl/hash_map.html + inline hasher hash_funct() const + { + hasher h(this->get_db_handle()); + return h; + + } + + /// Function to get value compare functor. Used when this container + /// is a std::map, std::multimap, std::set or std::multiset equivalent. + /// \return The value compare functor. + /// \sa http://www.cplusplus.com/reference/stl/map/value_comp/ + inline value_compare value_comp() const + { + value_compare vc(this->get_db_handle()); + return vc; + } + + /// Function to get key compare functor. Used when this container + /// is a std::map, std::multimap, std::set or std::multiset equivalent. + /// \return The key compare functor. + /// \sa http://www.cplusplus.com/reference/stl/map/key_comp/ + inline key_compare key_comp() const + { + key_compare kc(this->get_db_handle()); + return kc; + } + + //////////////////////////////////////////////////////////////// + // Begin constructors and destructor definitions. + /// \name Constructors and destructor + //@{ + // We don't need the equal compare or allocator here, user need to + // call Db::set_bt_compare or Db::set_h_compare to set comparison + // function. + /// Create a std::map/hash_map equivalent associative container. + /// See the handle requirement in class details to pass correct + /// database/environment handles. + /// \param dbp The database handle. + /// \param envp The database environment handle. + /// \sa db_container(Db*, DbEnv*) + explicit db_map(Db *dbp = NULL, DbEnv* envp = NULL) : + db_container(dbp, envp) + { + const char *errmsg; + + this->open_db_handles(dbp, envp, DB_BTREE, + DB_CREATE | DB_THREAD, 0); + + if ((errmsg = verify_config(dbp, envp)) != NULL) { + THROW(InvalidArgumentException, ("Db*", errmsg)); + } + this->set_db_handle_int(dbp, envp); + } + + /// Iteration constructor. Iterates between first and last, + /// setting a copy of each of the sequence of elements as the + /// content of the container object. + /// Create a std::map/hash_map equivalent associative container. + /// Insert a range of elements into the database. The range is + /// [first, last), which contains elements that can + /// be converted to type ddt automatically. + /// See the handle requirement in class details to pass correct + /// database/environment handles. + /// This function supports auto-commit. + /// \param dbp The database handle. + /// \param envp The database environment handle. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + /// \sa db_container(Db*, DbEnv*) + template + db_map(Db *dbp, DbEnv* envp, InputIterator first, + InputIterator last) : db_container(dbp, envp) + { + const char *errmsg; + + this->open_db_handles(dbp, envp, DB_BTREE, + DB_CREATE | DB_THREAD, 0); + if ((errmsg = verify_config(dbp, envp)) != NULL) { + THROW(InvalidArgumentException, ("Db*", errmsg)); + } + this->set_db_handle_int(dbp, envp); + + this->begin_txn(); + try { + insert(first, last); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + } + + // Copy constructor. The object is initialized to have the same + // contents as the x map object, do not copy properties because + // if we copy things like pdb, we are storing to the same db, so we + // create a new database, use it as the backing db, and store data + // into it. + /// Copy constructor. + /// Create an database and insert all key/data pairs in x into this + /// container. x's data members are not copied. + /// This function supports auto-commit. + /// \param x The other container to initialize this container. + /// \sa db_container(const db_container&) + db_map(const db_map& x) : + db_container(x) + { + verify_db_handles(x); + this->set_db_handle_int(this->clone_db_config( + x.get_db_handle()), x.get_db_env_handle()); + assert(this->get_db_handle() != NULL); + + this->begin_txn(); + try { + copy_db((db_map&)x); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + } + + virtual ~db_map(){} + //@} + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin insert function definitions. + /// Container content assignment operator. + /// This function supports auto-commit. + /// \param x The other container whose key/data pairs will be inserted + /// into this container. Old content in this containers are discarded. + /// \sa http://www.cplusplus.com/reference/stl/map/operator=/ + inline const self& operator=(const self& x) + { + ASSIGNMENT_PREDCOND(x) + db_container::operator =(x); + verify_db_handles(x); + assert(this->get_db_handle() != NULL); + this->begin_txn(); + try { + copy_db((self &)x); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + return x; + } + + /// \name Insert Functions + /// They have similiar usage as their C++ STL equivalents. + /// Note that when secondary index is enabled, each + /// db_container can create a db_multimap secondary container, + /// but the insert function is not functional for secondary containers. + /// \sa http://www.cplusplus.com/reference/stl/map/insert/ + //@{ + // + // Insert functions. Note that stl requires if the entry with x.key + // already exists, insert should not overwrite that entry and the + // insert should fail; but bdb Dbc::cursor(DB_KEYLAST) will replace + // existing data with new one, so we will first find whether we + // have this data, if have, return false; + // + // Can not internally use begin/commit_txn to wrap this call because + // it returns an iterator, which is closed after commit_txn(), and + // reopening it is wrong in multithreaded access. + /// Insert a single key/data pair if the key is not in the container. + /// \param x The key/data pair to insert. + /// \return A pair P, if insert OK, i.e. the inserted key wasn't in the + /// container, P.first will be the iterator sitting on the inserted + /// key/data pair, and P.second is true; otherwise P.first is an + /// invalid iterator and P.second is false. + pair insert (const value_type& x ) + { + pair ib; + iterator witr; + + init_itr(witr); + open_itr(witr); + + if (witr.move_to(x.first) == 0) {// has it + ib.first = witr; + ib.second = false; + // Cursor movements are not logged, no need to + // use transaction here. + return ib; + } + + witr.itr_status_ = witr.pcsr_->insert(x.first, x.second, + DB_KEYLAST); + assert(witr.itr_status_ == 0); + witr.refresh(false); + ib.first = witr; + ib.second = true; + + return ib; + } + + /// Insert with hint position. We ignore the hint position because + /// Berkeley DB knows better where to insert. + /// \param position The hint position. + /// \param x The key/data pair to insert. + /// \return The iterator sitting on the inserted key/data pair, or an + /// invalid iterator if the key was already in the container. + inline iterator insert (iterator position, const value_type& x ) + { + pair ib = insert(x); + return ib.first; + } + + // Member function template overload. + /// Range insertion. Insert a range [first, last) of key/data pairs + /// into this container. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + void insert (const db_map_base_iterator& first, + const db_map_base_iterator& last) + { + db_map_base_iterator ii; + iterator witr; + + init_itr(witr); + open_itr(witr); + + for (ii = first; ii != last; ++ii) + witr.pcsr_->insert(ii->first, ii->second, + DB_KEYLAST); + } + + /// Range insertion. Insert a range [first, last) of key/data pairs + /// into this container. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + template + void insert (InputIterator first, InputIterator last) + { + InputIterator ii; + iterator witr; + + init_itr(witr); + open_itr(witr); + + for (ii = first; ii != last; ++ii) + witr.pcsr_->insert(ii->first, ii->second, + DB_KEYLAST); + } + //@} + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin functions that create iterators. + /// \name Iterator Functions + /// The parameters in begin functions of this group have identical + /// meaning to thoes in db_vector::begin, refer to those functions + /// for details. + /// \sa db_vector::begin() + //@{ + /// Begin a read-write or readonly iterator which sits on the first + /// key/data pair of the database. + /// \param rmw Same as that of + /// db_vector::begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \param bulkretrieval Same as that of + /// db_vector::begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \param directdb_get Same as that of + /// db_vector::begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \param readonly Same as that of + /// db_vector::begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \return The created iterator. + /// \sa db_vector::begin(ReadModifyWriteOption, bool, + /// BulkRetrievalOption, bool) + // + iterator begin(ReadModifyWriteOption rmw = + ReadModifyWriteOption::no_read_modify_write(), + bool readonly = false, BulkRetrievalOption bulkretrieval = + BulkRetrievalOption::no_bulk_retrieval(), + bool directdb_get = true) + { + bool b_rmw; + u_int32_t bulk_retrieval = 0; + + b_rmw = (rmw == ReadModifyWriteOption::read_modify_write()); + // Read only cursor don't need acquire write lock. + if (readonly && b_rmw) + b_rmw = false; + if (readonly && bulkretrieval == BulkRetrievalOption:: + BulkRetrieval) + bulk_retrieval = bulkretrieval.bulk_buf_size(); + + iterator itr(dynamic_cast(this), + bulk_retrieval, b_rmw, directdb_get, readonly); + + open_itr(itr, readonly); + itr.first(); + return itr; + } + + /// Begin a read-only iterator. + /// \param bulkretrieval Same as that of + /// begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \param directdb_get Same as that of + /// begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \return The created const iterator. + /// \sa db_vector::begin(ReadModifyWrite, bool, BulkRetrievalOption, + /// bool); + const_iterator begin(BulkRetrievalOption bulkretrieval = + BulkRetrievalOption::no_bulk_retrieval(), + bool directdb_get = true) const + { + u_int32_t b_bulk_retrieval = (bulkretrieval == + BulkRetrievalOption::BulkRetrieval ? + bulkretrieval.bulk_buf_size() : 0); + + const_iterator itr((db_container*)this, + b_bulk_retrieval, false, directdb_get, true); + + open_itr(itr, true); + itr.first(); + return itr; + } + + /// \brief Create an open boundary iterator. + /// \return Returns an invalid iterator denoting the position after + /// the last valid element of the container. + /// \sa db_vector::end() + inline iterator end() + { + iterator itr; + + // end() is at an invalid position. We don't know what key it + // refers, so itr_status_ and inval_pos_type are the only + // data members to identify an iterator's position. + // + itr.itr_status_ = INVALID_ITERATOR_POSITION; + itr.inval_pos_type_ = iterator::IPT_AFTER_LAST; + itr.owner_ = (db_container*)this; + return itr; + } + + /// \brief Create an open boundary iterator. + /// \return Returns an invalid const iterator denoting the position + /// after the last valid element of the container. + /// \sa db_vector::end() const + inline const_iterator end() const + { + const_iterator itr; + + // end() is at an invalid position. We don't know what key it + // refers, so itr_status_ and inval_pos_type are the only + // data members to identify an iterator's position. + // + itr.itr_status_ = INVALID_ITERATOR_POSITION; + itr.inval_pos_type_ = iterator::IPT_AFTER_LAST; + itr.owner_ = (db_container*)this; + return itr; + } + + /// Begin a read-write or readonly reverse iterator which sits on the + /// first key/data pair of the database. + /// \param rmw Same as that of + /// db_vector::begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \param bulkretrieval Same as that of + /// db_vector::begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \param directdb_get Same as that of + /// db_vector::begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \param read_only Same as that of + /// db_vector::begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \return The created iterator. + /// \sa db_vector::begin(ReadModifyWriteOption, bool, + /// BulkRetrievalOption, bool) + /// \sa db_vector::begin(ReadModifyWrite, bool, BulkRetrievalOption, + /// bool); + reverse_iterator rbegin(ReadModifyWriteOption rmw = + ReadModifyWriteOption::no_read_modify_write(), + bool read_only = false, BulkRetrievalOption bulkretrieval = + BulkRetrievalOption::no_bulk_retrieval(), + bool directdb_get = true) + { + u_int32_t bulk_retrieval = 0; + + iterator itr = end(); + itr.rmw_csr_ = (rmw == ( + ReadModifyWriteOption::read_modify_write())) && !read_only; + itr.directdb_get_ = directdb_get; + itr.read_only_ = read_only; + if (read_only && bulkretrieval == BulkRetrievalOption:: + BulkRetrieval) + bulk_retrieval = bulkretrieval.bulk_buf_size(); + itr.bulk_retrieval_ = bulk_retrieval; + reverse_iterator ritr(itr); + + return ritr; + } + + /// Begin a read-only reverse iterator. + /// \param bulkretrieval Same as that of + /// begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \param directdb_get Same as that of + /// begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \return The created const iterator. + /// \sa db_vector::begin(ReadModifyWrite, bool, BulkRetrievalOption, + /// bool); + const_reverse_iterator rbegin(BulkRetrievalOption bulkretrieval = + BulkRetrievalOption::no_bulk_retrieval(), + bool directdb_get = true) const + { + const_iterator itr = end(); + itr.bulk_retrieval_ = (bulkretrieval == + BulkRetrievalOption::BulkRetrieval ? + bulkretrieval.bulk_buf_size() : 0); + itr.directdb_get_ = directdb_get; + itr.read_only_ = true; + const_reverse_iterator ritr(itr); + + return ritr; + } + + /// \brief Create an open boundary iterator. + /// \return Returns an invalid iterator denoting the position + /// before the first valid element of the container. + /// \sa db_vector::rend() + inline reverse_iterator rend() + { + reverse_iterator ritr; + ritr.inval_pos_type_ = iterator::IPT_BEFORE_FIRST; + return ritr; + } + + /// \brief Create an open boundary iterator. + /// \return Returns an invalid const iterator denoting the position + /// before the first valid element of the container. + /// \sa db_vector::rend() const + inline const_reverse_iterator rend() const + { + const_reverse_iterator ritr; + ritr.inval_pos_type_ = iterator::IPT_BEFORE_FIRST; + return ritr; + } + //@} // iterator functions + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // + // Begin functions that return container metadata. + /// \name Metadata Functions + /// These functions return metadata about the container. + //@{ + /// Get container category. + /// Determines whether this container object is a std::map<> + /// equivalent(when returns false) or that of hash_map<> + /// class(when returns true). This method is not in stl, but it + /// may be called by users because some operations are not supported + /// by both type(map/hash_map) of containers, you need to call this + /// function to distinguish the two types. dbstl will not stop you + /// from calling the wrong methods of this class. + /// \return Returns true if this container is a hash container based + /// on a Berkeley DB hash database; returns false if it is based on a + /// Berkeley DB btree database. + // + inline bool is_hash() const + { + DBTYPE dbtype = DB_UNKNOWN; + int ret; + + assert(this->get_db_handle() != NULL); + ret = this->get_db_handle()->get_type(&dbtype); + assert(ret == 0); + return dbtype == DB_HASH; + } + + /// Only for std::hash_map, return number of hash bucket in use. + /// This function supports auto-commit. + /// \return The number of hash buckets of the database. + size_type bucket_count() const + { + DBTYPE dbtype; + u_int32_t flags; + void *sp; + size_type sz; + int ret; + DbTxn*txn; + + assert(this->get_db_handle() != NULL); + ret = this->get_db_handle()->get_type(&dbtype); + assert(ret == 0); + if (dbtype != DB_HASH) { + THROW(InvalidFunctionCall, ("db_map<>::bucket_count")); + + } + flags = DB_FAST_STAT; + + // Here we use current_txn(), so we will get a valid + // transaction handle if we are using explicit transactions; + // and NULL if we are using autocommit, in which case bdb + // internal auto commit will be enabled automatically. + // + txn = ResourceManager::instance()-> + current_txn(this->get_db_handle()->get_env()); + BDBOP(this->get_db_handle()->stat(txn, &sp, flags), ret); + + sz = (size_type)(((DB_HASH_STAT*)sp)->hash_buckets); + free(sp); + return sz; + } + + /// Get container size. + // Return size of the map, can control whether compute + // accurately(slower if db is huge) or not. + /// This function supports auto-commit. + /// \return Return the number of key/data pairs in the container. + /// \param accurate This function uses database's statistics to get + /// the number of key/data pairs. The statistics mechanism will either + /// scan the whole database to find the accurate number or use the + /// number of last accurate scanning, and thus much faster. If there + /// are millions of key/data pairs, the scanning can take some while, + /// so in that case you may want to set the "accurate" parameter to + /// false. + size_type size(bool accurate = true) const + { + u_int32_t flags; + void *sp; + DBTYPE dbtype; + size_t sz; + int ret; + DbTxn*txn; + + flags = accurate ? 0 : DB_FAST_STAT; + BDBOP(this->get_db_handle()->get_type(&dbtype), ret); + + // Here we use current_txn(), so we will get a valid + // transaction handle if we are using explicit transactions; + // and NULL if we are using autocommit, in which case bdb + // internal auto commit will be enabled automatically. + // + txn = ResourceManager::instance()-> + current_txn(this->get_db_handle()->get_env()); + BDBOP(this->get_db_handle()->stat(txn, &sp, flags), ret); + + assert((dbtype == DB_BTREE) || (dbtype == DB_HASH)); + // dbtype is BTREE OR HASH, no others. + sz = dbtype == DB_BTREE ? ((DB_BTREE_STAT*)sp)-> + bt_ndata : ((DB_HASH_STAT*)sp)->hash_ndata; + free(sp); + return sz; + } + + /// Get max size. + /// The returned size is not the actual limit of database. See the + /// Berkeley DB limits to get real max size. + /// \return A meaningless huge number. + /// \sa db_vector::max_size() + inline size_type max_size() const + { + return SIZE_T_MAX; + } + + /// Returns whether this container is empty. + /// This function supports auto-commit. + /// \return True if empty, false otherwise. + bool empty() const + { + // If we fail to move to the first record, the db is + // supposed to be empty. + const_iterator witr; + bool ret; + + try { + this->begin_txn(); + init_itr(witr); + open_itr(witr, true); + ret = witr.first() != 0; + this->commit_txn(); + return ret; + } catch (...) { + this->abort_txn(); + throw; + } + } + //@} + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin element accessors. + // + // Don't use transaction wrapper(begin/commit_txn) here because + // even insert(the only logged operation) is only part of the + // whole expression---the use case is dmmap[key] = value; + // So insert and another put call should + // be atomic, so there must be an outside transaction. + // + // As stated in STL specification, this method can't have "const" + // modifier because it is likely to insert a new record. + /// Retrieve data element by key. + /// This function returns an reference to the underlying data element + /// of the specified key x. The returned object can be used to read or + /// write the data element of the key/data pair. + /// Do use a data_type_wrap of db_map or value_type::second_type(they + /// are the same) type of variable to hold the return value of this + /// function. + /// \param x The target key to get value from. + /// \return Data element reference. + // + data_type_wrap operator[] (const key_type& x) + { + iterator witr, *pitr; + int ret; + + init_itr(witr); + open_itr(witr, false); + + if (witr.move_to(x) != 0) { + ddt d;//default value + DbstlInitializeDefault initdef(d); + // Insert (x, d) as place holder. + witr.pcsr_->insert(x, d, DB_KEYLAST); + // Should be OK this time. + ret = witr.move_to(x); + assert(ret == 0); + // Return the reference to the data item of x. + } + + //witr->curpair_.second._DB_STL_SetDelItr(); + pitr = new iterator(witr); + data_type_wrap ref(pitr->curpair_.second); + ref._DB_STL_SetDelItr(); + return ref; + } + + // Only returns a right-value, no left value for assignment, so + // directly return the value rather than the ElementRef/ElementHolder + // wrapper. Must use a const reference to this container to call this + // const function. + // + /// Retrieve data element by key. + /// This function returns the value of the underlying data element of + /// specified key x. You can only read the element, but unable to + /// update the element via the return value of this function. And you + /// need to use the container's const reference to call this method. + /// \param x The target key to get value from. + /// \return Data element, read only, can't be used to modify it. + const ddt operator[] (const key_type& x) const + { + iterator witr; + + init_itr(witr); + open_itr(witr); + + // x is supposed to be in this map. + if (witr.move_to(x) != 0) { + THROW0(NoSuchKeyException); + + } + return witr.curpair_.second._DB_STL_value(); + } + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin functions that erase elements from the container. + // + // Can not reopen external/outside iterator's cursor, pos must + // already be in a transactional context. + // There is identical function in db_multimap<> and db_multiset + // for this function, we MUST keep the code consistent on update! + // Go to db_multimap<>::erase(const key_type&) to see why. + /// \name Erase Functions + /// \sa http://www.cplusplus.com/reference/stl/map/erase/ + //@{ + /// Erase a key/data pair at specified position. + /// \param pos An valid iterator of this container to erase. + inline void erase (iterator pos) + { + if (pos == end()) + return; + pos.pcsr_->del(); + } + + /// Erase elements by key. + /// All key/data pairs with specified key x will be removed from + /// underlying database. + /// This function supports auto-commit. + /// \param x The key to remove from the container. + /// \return The number of key/data pairs removed. + // There is identical function in db_multimap<> and db_multiset + // for this function, we MUST keep the code consistent on update! + // Go to db_multimap<>::erase(const key_type&) to see why. + // + size_type erase (const key_type& x) + { + size_type cnt; + iterator itr; + + this->begin_txn(); + try { + pair rg = equal_range(x); + for (itr = rg.first, cnt = 0; itr != rg.second; ++itr) { + cnt++; + itr.pcsr_->del(); + } + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + return cnt; + } + + // Can not be auto commit because first and last are already open. + // There is identical function in db_multimap<> and db_multiset + // for this function, we MUST keep the code consistent on update! + // Go to db_multimap<>::erase(const key_type&) to see why. + /// Range erase. Erase all key/data pairs within the valid range + /// [first, last). + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + inline void erase (iterator first, iterator last) + { + iterator i; + + for (i = first; i != last; ++i) + i.pcsr_->del(); + } + //@} + + /// Swap content with container mp. + /// This function supports auto-commit. + /// \param mp The container to swap content with. + /// \param b_truncate: See db_vector::swap() for details. + /// \sa http://www.cplusplus.com/reference/stl/map/swap/ + /// db_vector::clear() + void swap (db_map& mp, bool b_truncate = true) + { + Db *swapdb = NULL; + std::string dbfname(64, '\0'); + + verify_db_handles(mp); + this->begin_txn(); + try { + swapdb = this->clone_db_config(this->get_db_handle(), + dbfname); + db_map tmap(swapdb, + swapdb->get_env(), begin(), end()); + clear(b_truncate);// Clear this db_map<> object. + typename db_map:: + iterator itr1, itr2; + itr1 = mp.begin(); + itr2 = mp.end(); + insert(itr1, itr2); + mp.clear(b_truncate); + itr1 = tmap.begin(); + itr2 = tmap.end(); + mp.insert(itr1, itr2); + tmap.clear(); + + swapdb->close(0); + if (dbfname[0] != '\0') { + swapdb = new Db(NULL, DB_CXX_NO_EXCEPTIONS); + swapdb->remove(dbfname.c_str(), NULL, 0); + swapdb->close(0); + delete swapdb; + } + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + } + + /// Clear contents in this container. + /// This function supports auto-commit. + /// \param b_truncate See db_vector::clear(bool) for details. + /// \sa db_vector::clear(bool) + void clear (bool b_truncate = true) + { + int ret; + u_int32_t flag; + DbEnv *penv = this->get_db_handle()->get_env(); + + if (b_truncate) { + ResourceManager::instance()->close_db_cursors( + this->get_db_handle()); + BDBOP2(this->get_db_handle()->truncate( + ResourceManager::instance()->current_txn(penv), + NULL, 0), ret, this->abort_txn()); + } else { + ReadModifyWriteOption brmw( + ReadModifyWriteOption::no_read_modify_write()); + + BDBOP(penv->get_open_flags(&flag), ret); + + // DB_RMW flag requires locking subsystem started. + if ((flag & DB_INIT_LOCK) || (flag & DB_INIT_CDB) || + (flag & DB_INIT_TXN)) + brmw = + ReadModifyWriteOption::read_modify_write(); + try { + // In if branch, truncate is capable of + // autocommit internally. + this->begin_txn(); + erase(begin(brmw, false), end()); + this->commit_txn(); + } catch (...) { + this->abort_txn(); + throw; + } + } + } + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin functions that searches a key in the map. + /// \name Searching Functions + /// The following functions are returning iterators, and they by + /// default return read-write iterators. If you intend to use the + /// returned iterator only to read, you should call the const version + /// of each function using a const reference to this container. + /// Using const iterators can potentially promote concurrency a lot. + /// You can also set the readonly parameter to each non-const version + /// of the functions to true if you don't use the returned iterator + /// to write, which also promotes concurrency and overall performance. + //@{ + /// Find the key/data pair with specified key x. + /// \param x The target key to find. + /// \return The valid const iterator sitting on the key x, or an + /// invalid one. + /// \sa http://www.cplusplus.com/reference/stl/map/find/ + const_iterator find (const key_type& x) const + { + const_iterator witr; + + init_itr(witr); + open_itr(witr, true); + if (witr.move_to(x)) + return ((self *)this)->end(); + + return witr; + } + + /// Find the greatest key less than or equal to x. + /// \param x The target key to find. + /// \return The valid const iterator sitting on the key, or an + /// invalid one. + /// \sa http://www.cplusplus.com/reference/stl/map/lower_bound/ + const_iterator lower_bound (const key_type& x) const + { + const_iterator witr; + + init_itr(witr); + open_itr(witr, true); + if (witr.move_to(x, DB_SET_RANGE)) + return ((self *)this)->end(); + + return witr; + } + + /// Find the range within which all keys equal to specified key x. + /// \param x The target key to find. + /// \return The range [first, last). + /// \sa http://www.cplusplus.com/reference/stl/map/equal_range/ + pair + equal_range (const key_type& x) const + { + pair pr; + const_iterator witr; + kdt k; + + init_itr(witr); + open_itr(witr, true); + if (witr.move_to(x, DB_SET_RANGE)) { + pr.first = ((self *)this)->end(); + pr.second = ((self *)this)->end(); + } else { + pr.first = witr; + // If no duplicate keys, move one next is sufficient. + if (witr.pcsr_->get_current_key(k) == 0 && k == x) + ++witr; + pr.second = witr; + } + + + return pr; + } + + /// Find the key/data pair with specified key x. + /// \param x The target key to find. + /// \param readonly Whether the returned iterator is readonly. + /// \return The valid iterator sitting on the key x, or an + /// invalid one. + /// \sa http://www.cplusplus.com/reference/stl/map/find/ + iterator find (const key_type& x, bool readonly = false) + { + iterator witr; + + init_itr(witr); + open_itr(witr, readonly); + if (witr.move_to(x)) + return ((self *)this)->end(); + + return witr; + } + + /// Find the greatest key less than or equal to x. + /// \param x The target key to find. + /// \param readonly Whether the returned iterator is readonly. + /// \return The valid iterator sitting on the key, or an + /// invalid one. + /// \sa http://www.cplusplus.com/reference/stl/map/lower_bound/ + iterator lower_bound (const key_type& x, bool readonly = false) + { + iterator witr; + + init_itr(witr); + open_itr(witr, readonly); + if (witr.move_to(x, DB_SET_RANGE)) + return ((self *)this)->end(); + + return witr; + } + + /// Find the range within which all keys equal to specified key x. + /// \param x The target key to find. + /// \param readonly Whether the returned iterator is readonly. + /// \return The range [first, last). + /// \sa http://www.cplusplus.com/reference/stl/map/equal_range/ + pair + equal_range (const key_type& x, bool readonly = false) + { + pair pr; + iterator witr; + kdt k; + + init_itr(witr); + open_itr(witr, readonly); + if (witr.move_to(x, DB_SET_RANGE)) { + pr.first = ((self *)this)->end(); + pr.second = ((self *)this)->end(); + } else { + pr.first = witr; + // If no dup, move one next is sufficient. + if (witr.pcsr_->get_current_key(k) == 0 && k == x) + ++witr; + pr.second = witr; + } + + + return pr; + } + + /// Count the number of key/data pairs having specified key x. + /// \param x The key to count. + /// \return The number of key/data pairs having x as key within the + /// container. + /// \sa http://www.cplusplus.com/reference/stl/map/count/ + size_type count (const key_type& x) const + { + int ret; + const_iterator witr; + try { + this->begin_txn(); + init_itr(witr); + open_itr(witr, true); + ret = witr.move_to(x); + this->commit_txn(); + if (ret != 0) + return 0;// No such key/data pair. + // No duplicates, so it must be one, we don't call + // Dbc::count because we don't have to. + // + else + return 1; + } catch (...) { + this->abort_txn(); + throw; + } + + } + + /// Find the least key greater than x. + /// \param x The target key to find. + /// \return The valid iterator sitting on the key, or an + /// invalid one. + /// \sa http://www.cplusplus.com/reference/stl/map/upper_bound/ + const_iterator upper_bound (const key_type& x) const + { + const_iterator witr; + + init_itr(witr); + open_itr(witr, true); + + if (witr.move_to(x, DB_SET_RANGE)) + return ((self *)this)->end(); + + kdt k; + + // x exists in db, and witr.pcsr_ points to x in db. + if (witr.pcsr_->get_current_key(k) == 0 && k == x) + ++witr;// No dup, so move one next is sufficient. + + return witr; + } + + /// Find the least key greater than x. + /// \param x The target key to find. + /// \param readonly Whether the returned iterator is readonly. + /// \return The valid iterator sitting on the key, or an + /// invalid one. + /// \sa http://www.cplusplus.com/reference/stl/map/upper_bound/ + iterator upper_bound (const key_type& x, bool readonly = false) + { + iterator witr; + + init_itr(witr); + open_itr(witr, readonly); + + if (witr.move_to(x, DB_SET_RANGE)) + return ((self *)this)->end(); + + kdt k; + + // x exists in db, and witr.pcsr_ points to x in db. + if (witr.pcsr_->get_current_key(k) == 0 && k == x) + ++witr;// No dup, so move one next is sufficient. + + return witr; + } + //@} + //////////////////////////////////////////////////////////////// + + // Compare function, return true if contents in m1 and m2 are + // identical otherwise return false. + // Note that we don't require the key-data pairs' order be identical + // Put into db_map<> rather than global to utilize transactional + // support. + /// Map content equality comparison operator. + /// This function does not rely on key order. For a set of keys S1 in + /// this container and another set of keys S2 of container m2, if + /// set S1 contains S2 and S2 contains S1 (S1 equals to S2) and each + /// data element of a key K in S1 from this container equals the data + /// element of K in m2, the two db_map<> containers equal. Otherwise + /// they are not equal. + /// \param m2 The other container to compare against. + /// \return Returns true if they have equal content, false otherwise. + bool operator==(const db_map& m2) const + { + bool ret; + const db_map& m1 = *this; + COMPARE_CHECK(m2) + verify_db_handles(m2); + try { + this->begin_txn(); + if (m1.size() != m2.size()) + ret = false; + else { + typename db_map:: + const_iterator i1, i2; + + for (i1 = m1.begin(); i1 != m1.end(); ++i1) { + if (m2.count(i1->first) == 0) { + ret = false; + goto exit; + } + i2 = m2.find(i1->first); + if ((i2->second == i1->second) == + false) { + ret = false; + goto exit; + } + } // for + + ret = true; + } +exit: + this->commit_txn(); + return ret; + + } catch (...) { + this->abort_txn(); + throw; + } + // Now that m1 and m2 has the same number of unique elements and all + // elements of m1 are in m2, thus there can be no element of m2 + // that dose not belong to m1, so we won't verify each element of + // m2 are in m1. + // + } + + /// Container unequality comparison operator. + /// \param m2 The container to compare against. + /// \return Returns false if equal, true otherwise. + bool operator!=(const db_map& m2) const + { + return !this->operator ==(m2); + } + + +protected: + + virtual const char* verify_config(Db*dbp, DbEnv* envp) const + { + DBTYPE dbtype; + u_int32_t oflags, sflags; + int ret; + const char *err = NULL; + + err = db_container::verify_config(dbp, envp); + if (err) + return err; + + BDBOP(dbp->get_type(&dbtype), ret); + BDBOP(dbp->get_open_flags(&oflags), ret); + BDBOP(dbp->get_flags(&sflags), ret); + + if (dbtype != DB_BTREE && dbtype != DB_HASH) + err = +"wrong database type, only DB_BTREE and DB_HASH allowed for db_map<> class"; + + if (oflags & DB_TRUNCATE) + err = +"do not specify DB_TRUNCATE flag to create a db_map<> object"; + if ((sflags & DB_DUP) || (sflags & DB_DUPSORT)) + err = +"db_map<> can not be backed by database permitting duplicate keys"; + if (sflags & DB_RECNUM) + err = "no DB_RECNUM flag allowed in db_map<>"; + + + return err; + + } + + + typedef ddt mapped_type; + typedef int (*db_compare_fcn_t)(Db *db, const Dbt *dbt1, + const Dbt *dbt2); + typedef u_int32_t (*h_hash_fcn_t) + (Db *, const void *bytes, u_int32_t length); + typedef db_set_iterator db_multiset_iterator_t; + + static bool compare_keys(Db *pdb, const kdt& k1, const kdt& k2) + { + DBTYPE dbtype; + int ret; + bool bret; + u_int32_t sz1, sz2; + + assert(pdb != NULL); + ret = pdb->get_type(&dbtype); + assert(ret == 0); + db_compare_fcn_t comp = NULL; + + if (dbtype == DB_BTREE) + BDBOP(pdb->get_bt_compare(&comp), ret); + else // hash + BDBOP(pdb->get_h_compare(&comp), ret); + + DataItem key1(k1, true), key2(k2, true); + Dbt &kdbt1 = key1.get_dbt(); + Dbt &kdbt2 = key2.get_dbt(); + sz1 = kdbt1.get_size(); + sz2 = kdbt2.get_size(); + + if (comp == NULL) { + ret = memcmp(&k1, &k2, sz1 > sz2 ? sz2 : sz1); + return (ret == 0) ? (sz1 < sz2) : (ret < 0); + } + // Return strict weak ordering. + bret = (comp(pdb, &kdbt1, &kdbt2) < 0); + return bret; + } + + void open_itr(db_map_base_iterator&itr, + bool readonly = false) const + { + u_int32_t oflags = 0; + int ret; + DbEnv *penv = this->get_db_handle()->get_env(); + + if (!readonly && penv != NULL) { + BDBOP((penv->get_open_flags(&oflags)) , ret); + if ((oflags & DB_INIT_CDB) != 0) + ((self *)this)->set_cursor_open_flags( + this->get_cursor_open_flags() | + DB_WRITECURSOR); + } + + itr.itr_status_ = itr.pcsr_->open((db_container*)this, + this->get_cursor_open_flags()); + itr.owner_ = (db_container*)this; + } + + void open_itr(const_reverse_iterator + &itr, bool readonly = false) const + { + u_int32_t oflags = 0; + int ret; + DbEnv *penv = this->get_db_handle()->get_env(); + + if (!readonly && penv != NULL) { + BDBOP((penv->get_open_flags(&oflags)) , ret); + if ((oflags & DB_INIT_CDB) != 0) + ((self *)this)->set_cursor_open_flags( + this->get_cursor_open_flags() | + DB_WRITECURSOR); + } + itr.itr_status_ = itr.pcsr_->open((db_container*)this, + this->get_cursor_open_flags()); + itr.owner_ = (db_container*)this; + } + + inline void init_itr(db_map_base_iterator & + witr) const { + typedef DbCursor cursor_type; + witr.pcsr_.set_cursor(new cursor_type()); + witr.owner_ = (db_container*)this; + } + + // Do not use begin_txn/commit_txn in non-public(internal) methods, + // only wrap in public methods. + // + inline void copy_db(db_map &x) + { + + // Make sure clear can succeed if there are cursors + // open in other threads. + clear(false); + insert(x.begin(), x.end()); + + } + +};//db_map +//@} + + +////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////// +// +// db_multimap class template definition +// +// This class derives from db_map<>, using many of its methods, +// it also hides some functions that should not be used in +// this class, such as operator[]. +// +// The underlying db must allow duplicate. +// iterator_t is default argument, see forward declaration at the +// head of this file. +// iterator_t is default argument, see forward declaration at the +// head of this file +// +/// \ingroup dbstl_containers +//@{ +/// This class is the combination of std::multimap and hash_multimap. By +/// setting database handles as DB_BTREE or DB_HASH type respectively, you +/// will be using an equivalent of std::multimap or hash_multimap respectively. +/// Database(dbp) and environment(penv) handle requirement: +/// The dbp handle must meet the following requirement: +/// 1. Database type should be DB_BTREE or DB_HASH. +/// 2. Either DB_DUP or DB_DUPSORT flag must be set. Note that so far +/// Berkeley DB does not allow DB_DUPSORT be set and the database is storing +/// identical key/data pairs, i.e. we can't store two (1, 2), (1, 2) pairs +/// into a database D with DB_DUPSORT flag set, but only can do so with DB_DUP +/// flag set; But we can store a (1, 2) pair and a (1, 3) pair into D with +/// DB_DUPSORT flag set. So if your data set allows DB_DUPSORT flag, you +/// should set it to gain a lot of performance promotion. +/// 3. No DB_RECNUM flag set. +/// 4. No DB_TRUNCATE specified in database open flags. +/// 5. DB_THREAD must be set if you are sharing the database handle across +/// multiple threads directly, or indirectly by sharing the container object +/// across multiple threads. +/// \param kdt The key data type. +/// \param ddt The data data type. db_multimap stores key/data pairs. +/// \param value_type_sub Do not specify anything if ddt type is a +/// class/struct type; Otherwise, specify ElementHolder to it. +/// \param iterator_t Never specify anything to this type parameter. It is +/// only used internally. +/// \sa db_container db_map +template +class _exported db_multimap : public db_map +{ +protected: + typedef db_multimap self; + typedef db_map base; +public: + typedef iterator_t iterator; + typedef typename iterator::const_version const_iterator; + typedef db_reverse_iterator reverse_iterator; + typedef db_reverse_iterator + const_reverse_iterator; + typedef kdt key_type; + typedef ddt data_type; + typedef value_type_sub data_type_wrap; + typedef pair value_type_wrap; + typedef pair value_type; + typedef value_type_wrap* pointer; + typedef value_type_wrap& reference; + typedef const value_type& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + + //////////////////////////////////////////////////////////////// + // Begin constructors and destructor + /// Constructor. + /// See class detail for handle requirement. + /// \param dbp The database handle. + /// \param envp The database environment handle. + /// \sa db_map::db_map(Db*, DbEnv*) db_vector::db_vector(Db*, DbEnv*) + explicit db_multimap (Db *dbp = NULL, DbEnv* envp = NULL) : + base(*(new BulkRetrievalOption( + BulkRetrievalOption::BulkRetrieval))) + { + const char *errmsg; + + this->init_members(dbp, envp); + this->open_db_handles(dbp, envp, DB_BTREE, DB_CREATE | + DB_THREAD, DB_DUP); + // We can't call base(dbp, envp) here because it will verify + // failed and we can't call db_container directly, it is + // illegal to do so. + if ((errmsg = verify_config(dbp, envp)) != NULL) { + THROW(InvalidArgumentException, ("Db*", errmsg)); + + } + this->set_db_handle_int(dbp, envp); + this->set_auto_commit(dbp); + } + + /// Iteration constructor. + /// Iterates between first and last, setting + /// a copy of each of the sequence of elements as the content of + /// the container object. + /// This function supports auto-commit. + /// See class detail for handle requirement. + /// \param dbp The database handle. + /// \param envp The database environment handle. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + /// \sa db_map::db_map(Db*, DbEnv*, InputIterator, InputIterator) + /// db_vector::db_vector(Db*, DbEnv*) + // + template + db_multimap (Db *dbp, DbEnv* envp, InputIterator first, + InputIterator last) : base(*(new BulkRetrievalOption( + BulkRetrievalOption::BulkRetrieval))) + { + const char *errmsg; + + this->init_members(dbp, envp); + this->open_db_handles(dbp, envp, DB_BTREE, DB_CREATE | + DB_THREAD, DB_DUP); + // Note that we can't call base(dbp, envp) here because it + // will verify failed; And we can't call db_container + // directly because it is illegal to do so. + if ((errmsg = verify_config(dbp, envp)) != NULL) { + THROW(InvalidArgumentException, ("Db*", errmsg)); + + } + this->set_db_handle_int(dbp, envp); + this->set_auto_commit(dbp); + + + this->begin_txn(); + try { + insert(first, last); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + } + + /// Copy constructor. + /// Create an database and insert all key/data pairs in x into this + /// container. x's data members are not copied. + /// This function supports auto-commit. + /// \param x The other container to initialize this container. + /// \sa db_container(const db_container&) db_map(const db_map&) + db_multimap (const self& x) : base(*(new BulkRetrievalOption( + BulkRetrievalOption::BulkRetrieval))) + { + this->init_members(x); + verify_db_handles(x); + this->set_db_handle_int(this->clone_db_config( + x.get_db_handle()), x.get_db_env_handle()); + assert(this->get_db_handle() != NULL); + + this->begin_txn(); + try { + copy_db((self&)x); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + } + + virtual ~db_multimap(){} + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin functions that modify multimap content, e.g. insert, + // erase, assignment and swap. + // + /// Container content assignment operator. + /// This function supports auto-commit. + /// \param x The other container whose key/data pairs will be inserted + /// into this container. Old content in this containers are discarded. + /// \sa http://www.cplusplus.com/reference/stl/multimap/operator=/ + inline const self& operator=(const self&x) + { + ASSIGNMENT_PREDCOND(x) + db_container::operator =(x); + verify_db_handles(x); + assert(this->get_db_handle() != NULL); + this->begin_txn(); + try { + this->copy_db((self &)x); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + return x; + + } + + /// \name Insert Functions + /// \sa http://www.cplusplus.com/reference/stl/multimap/insert/ + //@{ + /// Range insertion. Insert a range [first, last) of key/data pairs + /// into this container. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + template + void insert (InputIterator first, InputIterator last) + { + InputIterator ii; + iterator witr; + + init_itr(witr); + open_itr(witr); + + for (ii = first; ii != last; ++ii) + witr.pcsr_->insert(ii->first, ii->second, + DB_KEYLAST); + } + + // Compiler can't see the inherited version, unknown why. + /// Range insertion. Insert a range [first, last) of key/data pairs + /// into this container. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + inline void insert (const_iterator& first, const_iterator& last) { + base::insert(first, last); + } + + // Insert x into this container, the other two versions are + // inherited from db_map<> class. + // Methods returning an iterator or using an iterator as parameter + // can not be internally wrapped by + // begin/commit_txn because a cursor is inside its transaction, it + // must have been closed after transaction commit, and reopen is + // unsafe in multithreaded access. + // + /// Insert a single key/data pair if the key is not in the container. + /// \param x The key/data pair to insert. + /// \return A pair P, if insert OK, i.e. the inserted key wasn't in the + /// container, P.first will be the iterator sitting on the inserted + /// key/data pair, and P.second is true; otherwise P.first is an + /// invalid iterator and P.second is false. + inline iterator insert (const value_type& x) + { + iterator witr; + + this->init_itr(witr); + this->open_itr(witr); + witr.itr_status_ = witr.pcsr_->insert(x.first, x.second, + DB_KEYLAST); + witr.refresh(false); + return witr; + } + //@} + + /// Swap content with another multimap container. + /// This function supports auto-commit. + /// \param mp The other container to swap content with. + /// \param b_truncate See db_map::swap() for details. + /// \sa db_vector::clear() + void swap (db_multimap& mp, + bool b_truncate = true) + { + Db *swapdb = NULL; + std::string dbfname(64, '\0'); + + verify_db_handles(mp); + this->begin_txn(); + try { + swapdb = this->clone_db_config(this->get_db_handle(), + dbfname); + + db_multimap tmap( + swapdb, swapdb->get_env(), + this->begin(), this->end()); + // Clear this db_multimap<> object. + this->clear(b_truncate); + typename db_multimap:: + iterator mpbitr, mpeitr; + + mpbitr = mp.begin(); + mpeitr = mp.end(); + insert(mpbitr, mpeitr); + mp.clear(b_truncate); + mpbitr = tmap.begin(); + mpeitr = tmap.end(); + mp.insert(mpbitr, mpeitr); + tmap.clear(); + + swapdb->close(0); + if (dbfname[0] != '\0') { + swapdb = new Db(NULL, DB_CXX_NO_EXCEPTIONS); + swapdb->remove(dbfname.c_str(), NULL, 0); + swapdb->close(0); + delete swapdb; + } + this->commit_txn(); + } catch (...) { + this->abort_txn(); + throw; + } + + } + + // This method has identical code to db_map<>::erase(const key_type&), + // but we can NOT simply inherit and use + // that version because: + // 1. The db_map<>::erase called equal_range which is overloaded in + // db_multimap, so if we want the inherited erase to call the right + // version of equal_range, we have to make equal_range virtual + // 2. Making equal_range virtual will make the code not build--- The + // default template parameter can't be replaced by real parameter, + // unknow why. + // So we have to copy the code from db_map<> to here, and keep the + // code consistent on each update. + // Also, when I copy only this function, I found other erase overloaded + // functions also have to be copied from db_map<> to db_multimap and + // db_multiset, otherwise the code don't build, so I + // finally have to copy all versions of erase functions into db_multiset + // and db_multimap. When updating an erase function, do update all + // three versions. + /// \name Erase Functions + /// \sa http://www.cplusplus.com/reference/stl/multimap/erase/ + //@{ + /// Erase elements by key. + /// All key/data pairs with specified key x will be removed from + /// underlying database. + /// This function supports auto-commit. + /// \param x The key to remove from the container. + /// \return The number of key/data pairs removed. + size_type erase (const key_type& x) + { + size_type cnt; + iterator itr; + + this->begin_txn(); + try { + pair rg = equal_range(x); + for (itr = rg.first, cnt = 0; itr != rg.second; ++itr) { + cnt++; + itr.pcsr_->del(); + } + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + return cnt; + } + + // Can not reopen external/outside iterator's cursor, pos must + // already be in a transactional context. + // There is identical function in db_multimap<> and db_multiset + // for this function, we MUST keep the code consistent on update! + // Go to db_multimap<>::erase(const key_type&) to see why. + // + /// Erase a key/data pair at specified position. + /// \param pos An valid iterator of this container to erase. + inline void erase (iterator pos) + { + if (pos == this->end()) + return; + pos.pcsr_->del(); + } + + // Can not be auto commit because first and last are already open. + // There is identical function in db_multimap<> and db_multiset + // for this function, we MUST keep the code consistent on update! + // Go to db_multimap<>::erase(const key_type&) to see why. + // + /// Range erase. Erase all key/data pairs within the valid range + /// [first, last). + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + inline void erase (iterator first, iterator last) + { + + for (iterator i = first; i != last; ++i) + i.pcsr_->del(); + } + //@} + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin functions that searches a key in the multimap. + /// \name Searching Functions + /// See of db_map's searching functions group for details about + /// iterator, function version and parameters. + /// \sa db_map + //@{ + /// Find the range within which all keys equal to specified key x. + /// \param x The target key to find. + /// \return The range [first, last). + /// \sa http://www.cplusplus.com/reference/stl/multimap/equal_range/ + pair + equal_range (const key_type& x) const + { + pair pr; + const_iterator witr; + kdt k; + + this->init_itr(witr); + this->open_itr(witr, true); + // Move witr to x if this contains x and return the itr, or if + // no x, position witr to the least key greater than x. + // + if (witr.move_to(x, DB_SET_RANGE)) { + pr.first = ((self *)this)->end(); + pr.second = ((self *)this)->end(); + } else { + pr.first = witr; + + // No dup, so move one next is sufficient. + if (witr.pcsr_->get_current_key(k) == 0 && k == x) + witr.next(DB_NEXT_NODUP); + pr.second = witr; + } + + return pr; + } + + /// Find the range within which all keys equal to specified key x. + /// \param x The target key to find. + /// \param readonly Whether the returned iterator is readonly. + /// \return The range [first, last). + /// \sa http://www.cplusplus.com/reference/stl/multimap/equal_range/ + pair + equal_range (const key_type& x, bool readonly = false) + { + pair pr; + iterator witr; + kdt k; + + this->init_itr(witr); + this->open_itr(witr, readonly); + // Move witr to x if this contains x and return the itr, or if + // no x, position witr to the least key greater than x. + // + if (witr.move_to(x, DB_SET_RANGE)) { + pr.first = ((self *)this)->end(); + pr.second = ((self *)this)->end(); + } else { + pr.first = witr; + + // No dup, so move one next is sufficient. + if (witr.pcsr_->get_current_key(k) == 0 && k == x) + witr.next(DB_NEXT_NODUP); + pr.second = witr; + } + + return pr; + } + + /// Find equal range and number of key/data pairs in the range. + /// This function also returns the number of elements within the + /// returned range via the out parameter nelem. + /// \param x The target key to find. + /// \param nelem The output parameter to take back the number of + /// key/data pair in the returned range. + /// \sa http://www.cplusplus.com/reference/stl/multimap/equal_range/ + pair + equal_range_N (const key_type& x, size_t& nelem) const + { + int ret; + pair pr; + size_t stepped; + const_iterator witr; + kdt k; + + this->init_itr(witr); + this->open_itr(witr, true); + // Move witr to x if this contains x and return the itr, or if + // no x, position witr to the least key greater than x. + // + if (witr.move_to(x, DB_SET_RANGE)) { + pr.first = ((self *)this)->end(); + pr.second = ((self *)this)->end(); + nelem = 0; + } else { + pr.first = witr; + if (witr.pcsr_->get_current_key(k) == 0 && k == x) { + for (stepped = 1, ret = + witr.pcsr_->next(DB_NEXT_DUP); ret == 0; + ret = witr.pcsr_->next(DB_NEXT_DUP), + stepped += 1) + ; + pr.second = ++witr; + nelem = stepped; + } else { + pr.second = witr; + nelem = 0; + } + } + return pr; + } + + /// Find equal range and number of key/data pairs in the range. + /// This function also returns the number of elements within the + /// returned range via the out parameter nelem. + /// \param x The target key to find. + /// \param nelem The output parameter to take back the number of + /// key/data pair in the returned range. + /// \param readonly Whether the returned iterator is readonly. + /// \sa http://www.cplusplus.com/reference/stl/multimap/equal_range/ + // + pair + equal_range_N (const key_type& x, size_t& nelem, + bool readonly = false) + { + int ret; + pair pr; + size_t stepped; + iterator witr; + kdt k; + + this->init_itr(witr); + this->open_itr(witr, readonly); + // Move witr to x if this contains x and return the itr, or if + // no x, position witr to the least key greater than x. + // + if (witr.move_to(x, DB_SET_RANGE)) { + pr.first = ((self *)this)->end(); + pr.second = ((self *)this)->end(); + nelem = 0; + } else { + pr.first = witr; + if (witr.pcsr_->get_current_key(k) == 0 && k == x) { + for (stepped = 1, ret = + witr.pcsr_->next(DB_NEXT_DUP); ret == 0; + ret = witr.pcsr_->next(DB_NEXT_DUP), + stepped += 1) + ; + pr.second = ++witr; + nelem = stepped; + } else { + pr.second = witr; + nelem = 0; + } + } + return pr; + } + + /// Count the number of key/data pairs having specified key x. + /// \param x The key to count. + /// \return The number of key/data pairs having x as key within the + /// container. + /// \sa http://www.cplusplus.com/reference/stl/multimap/count/ + size_type count (const key_type& x) const + { + int ret; + size_type cnt; + iterator witr; + + try { + this->begin_txn(); + this->init_itr(witr); + this->open_itr(witr, true); + ret = witr.move_to(x); + if (ret) + cnt = 0; + else + cnt = witr.pcsr_->count(); + this->commit_txn(); + } catch (...) { + this->abort_txn(); + throw; + } + + return cnt; + } + + /// Find the least key greater than x. + /// \param x The target key to find. + /// \return The valid iterator sitting on the key, or an + /// invalid one. + /// \sa http://www.cplusplus.com/reference/stl/multimap/upper_bound/ + const_iterator upper_bound ( + const key_type& x) const + { + int ret; + const_iterator witr; + + this->init_itr(witr); + this->open_itr(witr, true); + + // No key equal to or greater than x. + if (witr.move_to(x, DB_SET_RANGE)) + return ((self *)this)->end(); + + kdt k; + // x exists in db, and witr.pcsr_ points to x in db, + // need to move cursor to next different key. + // + if (witr.pcsr_->get_current_key(k) == 0 && k == x) + ret = witr.next(DB_NEXT_NODUP); + + return witr; + } + + /// Find the least key greater than x. + /// \param x The target key to find. + /// \param readonly Whether the returned iterator is readonly. + /// \return The valid iterator sitting on the key, or an + /// invalid one. + /// \sa http://www.cplusplus.com/reference/stl/multimap/upper_bound/ + iterator upper_bound (const key_type& x, bool readonly = false) + { + int ret; + iterator witr; + + this->init_itr(witr); + this->open_itr(witr, readonly); + + // No key equal to or greater than x. + if (witr.move_to(x, DB_SET_RANGE)) + return ((self *)this)->end(); + + kdt k; + // x exists in db, and witr.pcsr_ points to x in db, + // need to move cursor to next different key. + // + if (witr.pcsr_->get_current_key(k) == 0 && k == x) + ret = witr.next(DB_NEXT_NODUP); + + return witr; + } + //@} + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin functions that compare container content. + // + // In hash_multimap this function is a global compare function, + // return true if contents in m1 and m2 are identical + // otherwise return false. But we have multiple reasons to make + // it a member of db_multimap<>: + // 1. There need to be a temporary set to store values of a range, and + // db_multimap<> is quite likely to store huge amount of data, + // not suitable to store into std::set, let alone std::set is not + // completely multithread-safe, thus we store them into db_set<>, + // thus we need a temporary db handle, and call + // db_container::clone_db_handle() function to open the db handle. + // 2. We need the transactional support. Making this function + // autocommit is good to eliminate phantom issues. + // Note that we don't require the key-data pairs' order be identical, + // but we assume identical records of keys are adjacent, so that + // iteration will go through them all one by one; Also, the records' + // order of identical keys are unpridictable and irrelivent, so we + // should treat values of a equal range a set, and compare two value + // sets for equality when comparing a equal range of key X. + // + /** + Returns whether the two containers have identical content. + This function does not rely on key order. For a set of keys S1 in this + container and another set of keys S2 of container m2, if set S1 + contains S2 and S2 contains S1 (S1 equals to S2) and each set of data + elements of any key K in S1 from this container equals the set of data + elements of K in m2, the two db_multimap<> containers equal. Otherwise + they are not equal. Data element set comparison does not rely on order + either. + \param m2 The other container to compare against. + \return Returns true if they are equal, false otherwise. + */ + bool operator==(const db_multimap& m2) const + { + + typedef typename self::const_iterator mm_itr_t; + + COMPARE_CHECK(m2) + + bool ret = false, retset = false; + size_t n1, n2; + int ret2; + const self &m1 = *this; + DbTxn *ptxn = NULL; + DbEnv *penv; + Db *pdb; + const char *dbfilename, *dbname; + const char *pname1, *pname2; + string name1, name2; + u_int32_t oflags; + + verify_db_handles(m2); + pdb = this->get_db_handle(); + penv = pdb->get_env(); + try { + this->begin_txn(); + if (m1.size() != m2.size()) { + ret = false; + this->commit_txn(); + return ret; + } + BDBOP(pdb->get_dbname(&dbfilename, &dbname), ret2); + if (dbfilename == NULL) + pname1 = pname2 = NULL; + else { + + this->construct_db_file_name(name1); + this->construct_db_file_name(name2); + // Make name2 different from name1. + name2.push_back('2'); + pname1 = name1.c_str(); + pname2 = name2.c_str(); + } + + Db *value_set_db = open_db(penv, + pname1, DB_BTREE, DB_CREATE, 0); + + Db *value_set_db2 = open_db(penv, + pname2, DB_BTREE, DB_CREATE, 0); + + db_set s1(value_set_db, penv), + s2(value_set_db2, penv); + + mm_itr_t i1, i11; + pair resrg1, resrg2; + for (i1 = m1.begin(); + i1 != m1.end(); + i1 = resrg1.second) { + + resrg1 = m1.equal_range_N(i1->first, n1); + resrg2 = m2.equal_range_N(i1->first, n2); + if (n1 != n2) { + ret = false; + retset = true; + break; + } + + if (n2 == 1 && !(resrg2.first->second == + resrg1.first->second)) { + ret = false; + retset = true; + break; + } + + for (i11 = resrg1.first; i11 != resrg1.second; + ++i11) + s1.insert(i11->second); + + for (i11 = resrg2.first; i11 != resrg2.second; + ++i11) + s2.insert(i11->second); + if (!(s1 == s2)) { + ret = false; + retset = true; + break; + } + s1.clear(); + s2.clear(); + + // Skip all equal keys in the range. + + + } // for + + if (!retset) // Care: there are breaks in the for loop. + ret = true; + + close_db(value_set_db); + close_db(value_set_db2); + + ptxn = this->current_txn(); + BDBOP(penv->get_open_flags(&oflags), ret2); + // The transaction handle in CDS is not a real + // transaction. + if (oflags & DB_INIT_CDB) + ptxn = NULL; + if (name1.length() > 0) + BDBOP2(penv->dbremove(ptxn, name1.c_str(), + NULL, 0), ret2, this->abort_txn()); + if (name2.length() > 0) + BDBOP2(penv->dbremove(ptxn, name2.c_str(), + NULL, 0), ret2, this->abort_txn()); + + this->commit_txn(); + return ret; + + } catch (...) { + this->abort_txn(); + throw; + } + // Now that m1 and m2 has the same number of unique elements and all + // elements of m1 are in m2, thus there can be no element of m2 that + // dose not belong to m1, so we won't verify each element of m2 are + // in m1. + // + + } // operator== + + /// Container unequality comparison operator. + /// \param m2 The container to compare against. + /// \return Returns false if equal, true otherwise. + bool operator!=(const db_multimap& m2) const + { + return !this->operator==(m2); + } + //////////////////////////////////////////////////////////////// + +protected: + typedef ddt mapped_type; + typedef value_type_sub tkpair; + typedef int (*bt_compare_fcn_t)(Db *db, const Dbt *dbt1, + const Dbt *dbt2); + + friend class db_map_iterator, + value_type_sub>; + friend class db_map_iterator; + + db_multimap(BulkRetrievalOption &opt) : base(opt){} +private: + value_type_sub operator[] (const key_type& x) + { + THROW(NotSupportedException, ("db_multimap<>::operator[]")); + } + + value_type_sub operator[] (const key_type& x) const + { + THROW(NotSupportedException, ("db_multimap<>::operator[]")); + + } + + virtual const char* verify_config(Db*dbp, DbEnv* envp) const + { + DBTYPE dbtype; + u_int32_t oflags, sflags; + int ret; + const char *err = NULL; + + err = db_container::verify_config(dbp, envp); + if (err) + return err; + + BDBOP(dbp->get_type(&dbtype), ret); + BDBOP(dbp->get_open_flags(&oflags), ret); + BDBOP(dbp->get_flags(&sflags), ret); + + if (dbtype != DB_BTREE && dbtype != DB_HASH) + err = +"wrong database type, only DB_BTREE and DB_HASH allowed for db_map<> class"; + if (oflags & DB_TRUNCATE) + err = +"do not specify DB_TRUNCATE flag to create a db_map<> object"; + + // Can't go without no dup or dupsort flag set. + if (!((sflags & DB_DUP) || (sflags & DB_DUPSORT))) + err = +"db_multimap<> can not be backed by database not permitting duplicate keys"; + + if (sflags & DB_RECNUM) + err = "no DB_RECNUM flag allowed in db_map<>"; + + return err; + + } + + inline void copy_db(db_multimap &x) + { + + // Make sure clear can succeed if there are cursors + // open in other threads. + this->clear(false); + insert(x.begin(), x.end()); + + } + +};// db_multimap<> +//@} //dbstl_containers +END_NS + +#endif // !_DB_STL_DB_MAP_H_ diff --git a/lang/cxx/stl/dbstl_resource_manager.cpp b/lang/cxx/stl/dbstl_resource_manager.cpp new file mode 100644 index 00000000..cd664d3d --- /dev/null +++ b/lang/cxx/stl/dbstl_resource_manager.cpp @@ -0,0 +1,1069 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include +#include + +#include "dbstl_resource_manager.h" +#include "dbstl_exception.h" +#include "dbstl_dbc.h" + +START_NS(dbstl) + +typedef struct { + time_t tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ +} db_timespec; + +extern "C"{ +void __os_id (DB_ENV *, pid_t *, db_threadid_t*); +void __os_gettime(ENV *env, db_timespec *tp, int monotonic); +} + +using std::pair; +using std::make_pair; + +// Static data member definitions. +map ResourceManager::open_dbs_; +map ResourceManager::open_envs_; +set ResourceManager::glob_objs_; +set ResourceManager::deldbs; +set ResourceManager::delenvs; + +DbEnv * ResourceManager::mtx_env_ = NULL; +db_mutex_t ResourceManager::mtx_handle_ = 0; +db_mutex_t ResourceManager::mtx_globj_ = 0; + +#ifdef TLS_DEFN_MODIFIER +template TLS_DEFN_MODIFIER T *TlsWrapper::tinst_ = NULL; +#elif defined(HAVE_PTHREAD_TLS) +static pthread_once_t once_control_ = PTHREAD_ONCE_INIT; +template +pthread_key_t TlsWrapper::tls_key_; + +template +void tls_init_once(void) { + pthread_key_create(&TlsWrapper::tls_key_, NULL); +} + +template +TlsWrapper::TlsWrapper() +{ + pthread_once(&once_control_, tls_init_once); +} +#else +#error "No suitable thread-local storage model configured" +#endif + +int ResourceManager::global_lock(db_mutex_t dbcontainer_mtx) +{ + int ret; + + ret = mtx_env_->mutex_lock(dbcontainer_mtx); + dbstl_assert(ret == 0); + return 0; +} + +int ResourceManager::global_unlock(db_mutex_t dbcontainer_mtx) +{ + int ret; + + ret = mtx_env_->mutex_unlock(dbcontainer_mtx); + dbstl_assert(ret == 0); + return 0; +} + +u_int32_t dbstl_strlen(const char *str) +{ + return (u_int32_t)strlen(str); +} + +void dbstl_strcpy(char *dest, const char *src, size_t num) +{ + strncpy(dest, src, num); +} + +int dbstl_strncmp(const char *s1, const char *s2, size_t num) +{ + return strncmp(s1, s2, num); +} + +int dbstl_wcsncmp(const wchar_t *s1, const wchar_t *s2, size_t num) +{ + return wcsncmp(s1, s2, num); +} + +int dbstl_strcmp(const char *s1, const char *s2) +{ + return strcmp(s1, s2); +} + +int dbstl_wcscmp(const wchar_t *s1, const wchar_t *s2) +{ + return wcscmp(s1, s2); +} + +u_int32_t dbstl_wcslen(const wchar_t *str) +{ + return (u_int32_t)wcslen(str); +} + +void dbstl_wcscpy(wchar_t *dest, const wchar_t *src, size_t num) +{ + wcsncpy(dest, src, num); +} + +// This function should be called in a single thread inside a process, before +// any use of dbstl. We don't want to rely on platform dependent mutex API, +// so we defer the synchronization to users. +void ResourceManager::global_startup() +{ + int ret; + db_timespec tnow; + + if (mtx_env_ == NULL) { + mtx_env_ = new DbEnv(DB_CXX_NO_EXCEPTIONS); + // Set cache size to 32k, to save space. + BDBOP(mtx_env_->set_cachesize(0, 32 * 1024, 1), ret); + BDBOP(mtx_env_->mutex_set_max(DBSTL_MAX_MTX_ENV_MUTEX), ret); + BDBOP2(mtx_env_->open(NULL, DB_PRIVATE | DB_CREATE, 0777), + ret, mtx_env_->close(0)); + BDBOP2(mtx_env_->mutex_alloc(DB_MUTEX_PROCESS_ONLY, + &mtx_handle_), ret, mtx_env_->mutex_free(mtx_handle_)); + BDBOP2(mtx_env_->mutex_alloc(DB_MUTEX_PROCESS_ONLY, + &mtx_globj_), ret, mtx_env_->mutex_free(mtx_globj_)); + __os_gettime(NULL, &tnow, 0); + srand((unsigned int)tnow.tv_sec); + } + +} + +ResourceManager::ResourceManager(void) +{ + + // Initialize process wide dbstl settings. If there are multiple + // threads, the global_startup should be called in a single thread + // before any use of dbstl. + global_startup(); +} + +void ResourceManager::close_db(Db *pdb) +{ + bool berase = false; + + if (pdb == NULL) + return; + db_csr_map_t::iterator itr = all_csrs_.find(pdb); + if (itr == all_csrs_.end()) + return; + + this->close_db_cursors(pdb); + + delete all_csrs_[pdb]; + all_csrs_.erase(itr); + pdb->close(0); + set::iterator itrdb = deldbs.find(pdb); + // If new'ed by open_db, delete it. + if (itrdb != deldbs.end()) { + delete *itrdb; + berase = true; + } + + global_lock(mtx_handle_); + open_dbs_.erase(pdb); + if (berase) + deldbs.erase(itrdb); + + global_unlock(mtx_handle_); +} + +void ResourceManager::close_all_db_envs() +{ + u_int32_t oflags; + int ret; + size_t txnstk_sz; + + global_lock(mtx_handle_); + for (map::iterator i = open_envs_.begin(); + i != open_envs_.end(); ++i) { + BDBOP(i->first->get_open_flags(&oflags), ret); + txnstk_sz = env_txns_[i->first].size(); + if (oflags & DB_INIT_CDB) { + assert(txnstk_sz == 1); + BDBOP(env_txns_[i->first].top()->commit(0), ret); + } else + assert(txnstk_sz == 0); + + i->first->close(0); + } + + // Delete DbEnv objects new'ed by dbstl. + set::iterator itr2 = delenvs.begin(); + for (; itr2 != delenvs.end(); ++itr2) + delete *itr2; + + delenvs.clear(); + env_txns_.clear(); + open_envs_.clear(); + global_unlock(mtx_handle_); +} + +void ResourceManager::close_db_env(DbEnv *penv) +{ + u_int32_t oflags; + int ret; + size_t txnstk_sz; + bool berase = false; + + if (penv == NULL) + return; + map::iterator itr = env_txns_.find(penv); + if (itr == env_txns_.end()) + return; + BDBOP(penv->get_open_flags(&oflags), ret); + txnstk_sz = itr->second.size(); + if (oflags & DB_INIT_CDB) { + assert(txnstk_sz == 1); + BDBOP(itr->second.top()->commit(0), ret); + } else + assert(txnstk_sz == 0); + env_txns_.erase(itr); + penv->close(0); + + set::iterator itrdb = delenvs.find(penv); + // If new'ed by open_db, delete it. + if (itrdb != delenvs.end()) { + delete penv; + berase = true; + } + + global_lock(mtx_handle_); + open_envs_.erase(penv); + if (berase) + delenvs.erase(itrdb); + global_unlock(mtx_handle_); +} + +void ResourceManager::close_all_dbs() +{ + map::iterator itr; + set::iterator itr2; + Db *pdb; + + global_lock(mtx_handle_); + for (itr = open_dbs_.begin(); itr != open_dbs_.end(); ++itr) { + pdb = itr->first; + this->close_db_cursors(pdb); + + delete all_csrs_[pdb]; + all_csrs_.erase(pdb); + pdb->close(0); + } + + // Delete Db objects new'ed by dbstl. + for (itr2 = deldbs.begin(); itr2 != deldbs.end(); ++itr2) + delete *itr2; + + deldbs.clear(); + open_dbs_.clear(); + + global_unlock(mtx_handle_); +} + +ResourceManager::~ResourceManager(void) +{ + u_int32_t oflags; + int ret; + set dbs2del; + set envs2del; + + global_lock(mtx_handle_); + + for (map::iterator i = open_dbs_.begin(); + i != open_dbs_.end(); ++i) { + this->close_db_cursors(i->first); + (i->second)--; + if (i->second == 0) { + + delete all_csrs_[i->first]; // Delete the cursor set. + all_csrs_.erase(i->first); + i->first->close(0); + + set::iterator itrdb = deldbs.find(i->first); + // If new'ed by open_db, delete it. + if (itrdb != deldbs.end()) { + delete *itrdb; + deldbs.erase(itrdb); + } + dbs2del.insert(i->first); + } + } + + for (map::iterator i = open_envs_.begin(); + i != open_envs_.end(); ++i) { + BDBOP(i->first->get_open_flags(&oflags), ret); + if (oflags & DB_INIT_CDB) { + assert(env_txns_[i->first].size() == 1); + BDBOP(env_txns_[i->first].top()->commit(0), ret); + env_txns_[i->first].pop(); + } + + (i->second)--; + if (i->second == 0) { + assert(env_txns_[i->first].size() == 0); + i->first->close(0); + set::iterator itrdb = delenvs.find(i->first); + // If new'ed by open_db, delete it. + if (itrdb != delenvs.end()) { + delete *itrdb; + delenvs.erase(itrdb); + } + envs2del.insert(i->first); + } + } + + // Erase db/env entries that are just closed. + for (set::iterator i = dbs2del.begin(); i != dbs2del.end(); ++i) + open_dbs_.erase(*i); + for (set::iterator i = envs2del.begin(); + i != envs2del.end(); ++i) + open_envs_.erase(*i); + + global_unlock(mtx_handle_); + + for (db_csr_map_t::iterator itr3 = all_csrs_.begin(); + itr3 != all_csrs_.end(); ++itr3) + { + // Delete the cursor set. Above code may not have a chance to + // delete this set because the db(itr3->first) was already + // closed by another thread. + delete itr3->second; + } + // Don't bother to clear all_csrs_ since it is being destructed. + +// Don't handle transactions, leave them alone, because autocommit +// transactions must have been committed/aborted, and outside transactions +// should be handled by user code, and if they are not handled yet, +// the DbEnv::close will fail. +// Only handle the transaction for CDS mode---an DB_TXN* handle is opened +// at environment registration/creation by cdsgroup_begin, so we need to commit +// that transaction. +// +} + +Db* ResourceManager::open_db ( + DbEnv*penv, const char* filename, DBTYPE dbtype, + u_int32_t oflags, u_int32_t set_flags1, int mode, DbTxn* txn, + u_int32_t cflags, const char* dbname) +{ + int ret, ci = 0; + u_int32_t envf = 0, envoflags = 0; + DbTxn *ptxn = NULL; + Db *pdb = new Db(penv, cflags | DB_CXX_NO_EXCEPTIONS); + + if (penv) { + BDBOP(penv->get_open_flags(&envoflags), ret); + BDBOP(penv->get_flags(&envf), ret); + } + if (set_flags1) + BDBOP(pdb->set_flags(set_flags1), ret); + // If no transaction is specified and we really need one, begin a + // transaction and commit it before return, we don't commit + // passed-in transaction. + // + if (penv && ((envf & DB_AUTO_COMMIT) || + (envoflags & DB_INIT_TXN)) && txn == 0){ + ptxn = current_txn(penv); + BDBOP2(penv->txn_begin(ptxn, &txn, 0), ret, txn->abort()); + ci = 1; + } + if (txn == NULL) + BDBOP2(pdb->open(txn, filename, dbname, + dbtype, oflags, mode), + ret, (pdb->close(0))); + else + BDBOP2(pdb->open(txn, filename, dbname, + dbtype, oflags, mode), + ret, (pdb->close(0), txn->abort())); + if (ci && txn) + BDBOP(txn->commit(0), ret); + global_lock(mtx_handle_); + open_dbs_.insert(make_pair(pdb, 1u)); + pair::iterator, bool> delinsret = deldbs.insert(pdb); + assert(delinsret.second); + global_unlock(mtx_handle_); + csrset_t *mycsrs = new csrset_t(); + all_csrs_.insert(make_pair(pdb, mycsrs)); + + return pdb; +} + +// Only called if the user does not supply an environment handle. +DbEnv* ResourceManager::open_env(const char* env_home, u_int32_t set_flags1, + u_int32_t oflags, u_int32_t cachesize, int mode, u_int32_t cflags) +{ + int ret; + + DbEnv *penv = new DbEnv(cflags | DB_CXX_NO_EXCEPTIONS); + if (set_flags1) + BDBOP(penv->set_flags(set_flags1, 1), ret); + BDBOP(penv->set_cachesize(0, cachesize, 1), ret); + BDBOP(penv->set_lk_max_lockers(2000), ret); + BDBOP(penv->set_lk_max_locks(2000), ret); + BDBOP(penv->set_lk_max_objects(2000), ret); + BDBOP2(penv->open(env_home, oflags, mode), ret, penv->close(0)); + + stack stk; + DbTxn *ptxn = NULL; + if (oflags & DB_INIT_CDB) { + BDBOP2(penv->cdsgroup_begin(&ptxn), ret, ptxn->commit(0)); + stk.push(ptxn); + } + + env_txns_.insert(make_pair(penv, stk)); + global_lock(mtx_handle_); + open_envs_.insert(make_pair(penv, 1u)); + delenvs.insert(penv); + global_unlock(mtx_handle_); + return penv; +} + +DbTxn* ResourceManager::current_txn(DbEnv*env) +{ + if (env_txns_.count(env) <= 0) + return NULL; + + stack &pstk = env_txns_[env]; + return pstk.size() != 0 ? pstk.top() : NULL; +} + +void ResourceManager::thread_exit() +{ + ResourceManager *pinst; + + pinst = ResourceManager::instance(); + if (pinst == NULL) // Already deleted. + return; + + global_lock(mtx_globj_); + glob_objs_.erase(pinst); + global_unlock(mtx_globj_); + TlsWrapper::set_tls_obj(NULL); + + // Can't put this line between the lock and unlock pair above, + // because the destructor also locks the same mutex, there + // would be a self lock. + delete pinst; +} + +void ResourceManager::set_global_callbacks() +{ + DbstlElemTraits * cstarinst = + DbstlElemTraits::instance(); + cstarinst->set_sequence_len_function(dbstl_strlen); + cstarinst->set_sequence_copy_function(dbstl_strcpy); + cstarinst->set_sequence_compare_function(dbstl_strcmp); + cstarinst->set_sequence_n_compare_function(dbstl_strncmp); + + DbstlElemTraits *wcstarinst = + DbstlElemTraits::instance(); + wcstarinst->set_sequence_copy_function(dbstl_wcscpy); + wcstarinst->set_sequence_len_function(dbstl_wcslen); + wcstarinst->set_sequence_compare_function(dbstl_wcscmp); + wcstarinst->set_sequence_n_compare_function(dbstl_wcsncmp); +} + +ResourceManager* ResourceManager::instance() +{ + ResourceManager *pinst = NULL; +#ifdef HAVE_PTHREAD_TLS + // Initialize the tls key. + pthread_once(&once_control_, tls_init_once); +#endif + + if ((pinst = TlsWrapper::get_tls_obj()) == NULL){ + TlsWrapper::set_tls_obj( + pinst = new ResourceManager()); + register_global_object(pinst); + set_global_callbacks(); + } + return pinst; +} + +int ResourceManager::open_cursor(DbCursorBase *dcbcsr, + Db *pdb, int flags) +{ + u_int32_t oflags = 0; + int ret; + + if (!pdb || !dcbcsr) + return 0; + + csrset_t::iterator csitr; + Dbc* csr = NULL; + dcbcsr->set_owner_db(pdb); + + DbTxn *ptxn = NULL; + DbTxn *ptxn2 = this->current_txn(pdb->get_env()); + if (ptxn2) { + ptxn = ptxn2; + dcbcsr->set_owner_txn(ptxn); + } + + if (pdb->get_env() != NULL){ + ret = pdb->get_env()->get_open_flags(&oflags); + dbstl_assert(ret == 0); + } + + // Call Dbc->cursor only if there is no active open cursor in the + // current thread, otherwise duplicate one from the existing cursor + // and use the locks already held in this thread. + // + csrset_t *pcsrset = NULL; + db_csr_map_t::iterator itrpcsrset = all_csrs_.find(pdb); + if (itrpcsrset == all_csrs_.end()) { // No such pair in current thread. + pcsrset = new csrset_t; + pair insret0 = + all_csrs_.insert(make_pair(pdb, pcsrset)); + assert(insret0.second); + } else + pcsrset = itrpcsrset->second; + + assert(pcsrset != NULL); + if (pcsrset->size() == 0) { +newcursor: + BDBOP2(pdb->cursor(ptxn, &csr, flags), ret, + ((csr != NULL ? csr->close() : 1), + this->abort_txn(pdb->get_env()))); + } else { + // We have some open cursors, so try to dup from one. If we are + // in CDS mode, and trying to open a write cursor, we should + // duplicate from a write cursor. + csitr = pcsrset->begin(); + Dbc *csr22 = (*csitr)->get_cursor(); + assert(csr22 != NULL); + assert(!((oflags & DB_INIT_TXN) && (flags & DB_WRITECURSOR))); + // If opening a CDS write cursor, must find a write cursor + // to duplicate from. + if (((flags & DB_WRITECURSOR) != 0)) { + for (;csitr != pcsrset->end(); ++csitr) { + csr22 = (*csitr)->get_cursor(); + if (((DBC*)csr22)->flags & DBC_WRITECURSOR) { + // No need to abortTxn on fail in CDS. + BDBOP2(csr22->dup(&csr, DB_POSITION), + ret, csr->close()); + goto done; + } + } + goto newcursor; // No write cursor, create a new one. + + } else if (((oflags & DB_INIT_TXN) == 0) || + pdb->get_transactional() == 0) { + // We are opening a DS or CDS read cursor, or + // opening a cursor in + // a transactional environment from a database not + // transactionally created. + BDBOP2(csr22->dup(&csr, DB_POSITION), ret, + (csr->close(), this->abort_txn(pdb->get_env()))); + goto done; + } else { + // We are opening a transactional cursor, duplicate + // from a transactional one. + // We don't remove (close) the non-transactional ones, + // they are in use. + // Hold the locks already held in this thread, + // so need DB_POSITION flag. + // + DbTxn *ptxn3 = NULL; + DbCursorBase *dcbcursor = NULL; + csrset_t::iterator itr3, itr4; + int got_rg = 0; + + // Opening a cursor in a transactional environment + // with no transaction specified. This should not + // happen in the first place. + if (ptxn == NULL) + THROW(InvalidArgumentException, ("DbTxn*", +"Opening a cursor in a transactional environment but no transaction \ +is started specified")); + // When we check that there must be a valid transaction + // handle ptxn when opening a cursor in a + // transactional environment, the following code + // to delete cursors with no transaction + // is not required and never reached, + // but we will leave it there. + for (;csitr != pcsrset->end();) { + dcbcursor = *csitr; + ptxn3 = dcbcursor->get_owner_txn(); + if (ptxn3 == NULL) { + BDBOP(dcbcursor->close(), ret); + if (!got_rg){ + got_rg++; + itr3 = csitr; + } + } else if (got_rg) { + got_rg = 0; + itr4 = csitr; + pcsrset->erase(itr3, itr4); + csitr = pcsrset->begin(); + continue; + } + + if (ptxn3 == ptxn) { + csr22 = dcbcursor->get_cursor(); + BDBOP2(csr22->dup(&csr, DB_POSITION), + ret, (csr->close(), this-> + abort_txn(pdb->get_env()))); + goto done; + } + ++csitr; + + } + if (got_rg) { + pcsrset->erase(itr3, pcsrset->end()); + got_rg = 0; + } + + goto newcursor; + + } // else oflags & DB_INIT_TXN + } // else pcsrset->size() +done: + // Insert into current thread's db-cursor map and txn_csrs_ map, + // for later duplication. + // + dcbcsr->set_cursor(csr); + this->add_cursor(pdb, dcbcsr); + return 0; +} + +void ResourceManager::add_cursor(Db* dbp, DbCursorBase* dcbcsr) +{ + if (!dbp || !dcbcsr) + return; + assert(dcbcsr->get_cursor() != NULL); + + (all_csrs_[dbp])->insert(dcbcsr); + // Register to txncsrs_, we suppose current transaction is the context + // of this operation. + // + this->add_txn_cursor(dcbcsr, dbp->get_env()); +} + +// Close dbp's all open cursors opened in current thread, do not close +// those of dbp opened in other threads, Db::truncate requires dbp's +// all cursors of all threads should be closed, and it is user's duty +// to make sure other threads all close dbp's cursor, because if we +// close them here, it is also an error---multi-threaded access to the same +// Dbc* cursor should be serialized, we can't serialize with user code +// anyway. +// +size_t ResourceManager::close_db_cursors(Db* dbp1) +{ + int ret; + Db* dbp; + DbTxn *ptxn, *ptxn2; + csrset_t *pcset_txn; + + if (dbp1 == NULL) + return 0; + + dbp = dbp1; + db_csr_map_t::iterator itr0; + csrset_t::iterator itr; + + itr0 = all_csrs_.find(dbp1); + if (itr0 == all_csrs_.end()) + return 0; + + csrset_t *pcset = itr0->second; + + pcset_txn = NULL; + ptxn2 = ptxn = NULL; + size_t txncsr_sz = txn_csrs_.size(); + + for (itr = pcset->begin(), ret = 0; itr != pcset->end(); + ++itr, ret++) { + + BDBOP((*itr)->close(), ret); + if (txncsr_sz > 0) { + if (pcset_txn == NULL || ptxn != + (ptxn2 = (*itr)->get_owner_txn())) { + ptxn = ptxn2 ? ptxn2 : (*itr)->get_owner_txn(); + if (ptxn != NULL) + pcset_txn = txn_csrs_[ptxn]; + } + if (pcset_txn) + pcset_txn->erase(*itr); + } + + } + + // Don't delete the pcset or itr0 because this dbp1 may be used + // by other containers in this thread. + pcset->clear(); + // We don't delete the DbCursorBase object, it is still + // referenced by others. + return ret; +} + +// Close the cursor of csr and remove the entry containing csr from +// txn_csrs_ and all_csrs_. +int ResourceManager::remove_cursor(DbCursorBase*csr, + bool remove_from_txncsrs) +{ + int ret; + + if (csr == NULL) + return 0; + BDBOP(csr->close(), ret); + + if (remove_from_txncsrs) { + DbTxn *ptxn = csr->get_owner_txn(); + if (ptxn != NULL) { + txncsr_t::iterator itr = txn_csrs_.find(ptxn); + if (itr != txn_csrs_.end()) + itr->second->erase(csr); + } + } + + Db *pdb = csr->get_owner_db(); + if (pdb != NULL) + all_csrs_[pdb]->erase(csr); + + return ret; +} + +/* + * Remove cursors opened in transaction txn's context, should be called before + * commiting/aborting a transaction. + * Note that here we should remove the cursor from all_csrs_ too, + * by calling remove_cursor() function. + */ +void ResourceManager::remove_txn_cursor(DbTxn* txn) +{ + int ret; + + if (!txn) + return; + + txncsr_t::iterator itr0; + csrset_t::iterator itr; + itr0 = txn_csrs_.find(txn); + if (itr0 == txn_csrs_.end()) + return; // No cursor opened in this txn. + + csrset_t *pcsrs = itr0->second; + DbCursorBase *csr; + + // Remove(close and remove from csr registry) cursors + // opened in the transaction txn's context. + for (itr = pcsrs->begin(); itr != pcsrs->end(); ++itr) { + // This cursor should be closed now and removed + // from csr registry. + csr = *itr; + BDBOP(csr->close(), ret); + all_csrs_[csr->get_owner_db()]->erase(csr); + } + + delete pcsrs; + // Erase csrs belonging to txn. + txn_csrs_.erase(itr0); +} + +// Begin a new transaction from the specified environment env. +// When outtxn is non-zero, it supports nested txn, +// so the new transaction is started as a child transaction of the +// current one, and we push it into env1's transaction stack; +// Otherwise, we are starting an internal transaction for autocommit, +// no new transaction will be started, but current transaction's reference +// count will be incremented. +DbTxn* ResourceManager::begin_txn(u_int32_t flags, DbEnv*env1, int outtxn) +{ + DbEnv *env = env1; + DbTxn *ptxn, *txn = NULL; + int ret; + + if (!env1) + return NULL; + + assert(env_txns_.count(env1) > 0); + + stack&stk = env_txns_[env1]; + + // Not an outside transaction, so if there is transaction in stack, + // use it and increment its reference count. + if (outtxn == 0) { + // We have a transaction in stack, increment its reference + // count. + if (stk.size() > 0) { + txn = stk.top(); + // The txn was created externally, now we internally + // use it, so the reference count is 2. + map::iterator itr12; + if ((itr12 = txn_count_.find(txn)) == txn_count_.end()) + txn_count_.insert(make_pair(txn, 2u)); + else + txn_count_[txn]++; + } else { + // Empty stack, create a transaction and set reference count to 1. + BDBOP(env->txn_begin(NULL, &txn, flags), ret); + stk.push(txn); + txn_count_[txn] = 1;// the first to use it + txn_csrs_.insert(make_pair(txn, new csrset_t())); + } + + } else { // Creating a transaction by user, used outside of dbstl. + ptxn = stk.size() > 0 ? stk.top() : NULL; + + BDBOP(env->txn_begin(ptxn, &txn, flags), ret); + + // txn now is the current txn + stk.push(txn); + txn_csrs_.insert(make_pair(txn, new csrset_t())); + } + + return txn; +} + +void ResourceManager::commit_txn(DbEnv *env, u_int32_t flags) +{ + int ret; + DbTxn *ptxn; + + if (!env) + return; + + assert(env_txns_.count(env) > 0); + stack &stk = env_txns_[env]; + ptxn = stk.top(); + assert(ptxn != NULL); + size_t txncnt = txn_count_[ptxn]; + + if (txncnt > 1) // used internally + txn_count_[ptxn]--; + else { + txn_count_.erase(ptxn); + this->remove_txn_cursor(ptxn); + stk.pop(); + BDBOP(ptxn->commit(flags), ret); + + } + +} + +void ResourceManager::commit_txn(DbEnv *env, DbTxn *txn, u_int32_t flags) +{ + DbTxn *ptxn = NULL; + int ret; + + if (env == NULL || txn == NULL) + return; + + stack &stk = env_txns_[env]; + while (stk.size() > 0 && (ptxn = stk.top()) != txn) { + stk.pop(); + txn_count_.erase(ptxn);// may be in the txn_count_ map + this->remove_txn_cursor(ptxn); + // Child txns could be committed by parent txn, but c++ API + // can't delete the new'ed child txns when committing the + // parent txn, so have to commit them explicitly. + ptxn->commit(flags); + } + if (stk.size() == 0) + THROW(InvalidArgumentException, ( + "No such transaction created by dbstl")); + else { + stk.pop(); + txn_count_.erase(txn);// may be in the txn_count_ map + this->remove_txn_cursor(txn); + if (ptxn){ + BDBOP(ptxn->commit(flags), ret); + } else // could never happen + THROW(InvalidArgumentException, ( + "No such transaction created by dbstl")); + + } +} + +void ResourceManager::abort_txn(DbEnv *env, DbTxn *txn) +{ + int ret; + DbTxn *ptxn = NULL; + u_int32_t oflags; + + if (env == NULL || txn == NULL) + return; + + BDBOP (env->get_open_flags(&oflags), ret); + stack &stk = env_txns_[env]; + while (stk.size() > 0 && (ptxn = stk.top()) != txn) { + txn_count_.erase(ptxn);// may be in the txn_count_ map + this->remove_txn_cursor(ptxn); + stk.pop(); + // Child txns could be aborted by parent txn, but c++ API + // can't delete the new'ed child txns when aborting the + // parent txn, so have to abort them explicitly. + ptxn->abort(); + } + if (stk.size() == 0) + THROW(InvalidArgumentException, ( + "No such transaction created by dbstl")); + else { + stk.pop(); + txn_count_.erase(txn);// may be in the txn_count_ map + this->remove_txn_cursor(txn); + if (ptxn){ + if ((oflags & DB_INIT_CDB) == 0) + BDBOP(ptxn->abort(), ret); + } else // could never happen + THROW(InvalidArgumentException, ( + "No such transaction created by dbstl")); + + } +} + +// Abort current txn, close/remove its cursors and reference count. +void ResourceManager::abort_txn(DbEnv*env) +{ + int ret; + DbTxn *ptxn; + u_int32_t oflags; + + if (!env) + return; + env_txns_t::iterator itr(env_txns_.find(env)); + if (itr == env_txns_.end()) + return; + + stack &stk = itr->second; + if (stk.size() == 0) + return; + ptxn = stk.top(); + if (ptxn == NULL) + return; + this->remove_txn_cursor(ptxn); + BDBOP (env->get_open_flags(&oflags), ret); + + // Transactions handles created via cdsgroup_begin can not be aborted + // because they are not really transactions, they just borrow the + // DB_TXN structure to store a locker id. + if ((oflags & DB_INIT_CDB) == 0) + BDBOP(ptxn->abort(), ret); + txn_count_.erase(ptxn); + stk.pop(); +} + +DbTxn* ResourceManager::set_current_txn_handle(DbEnv *env, DbTxn *newtxn) +{ + assert(env_txns_.count(env) > 0); + stack &stk = env_txns_[env]; + DbTxn *ptxn = stk.top(); + stk.pop(); + stk.push(newtxn); + return ptxn; +} + +void ResourceManager::add_txn_cursor(DbCursorBase *dcbcsr, DbEnv *env) +{ + if (!env || !dcbcsr) + return; + + DbTxn *ptxn = this->current_txn(env); + if (ptxn == NULL) + return; + + u_int32_t oflags; + int ret; + + BDBOP(env->get_open_flags(&oflags), ret); + if ((oflags & DB_INIT_TXN) == 0) + return; + + txncsr_t::iterator itr; + csrset_t *pset; + + itr = txn_csrs_.find(ptxn); + pair insret; + + if (itr == txn_csrs_.end()) { + insret = txn_csrs_.insert(make_pair(ptxn, new csrset_t())); + assert(insret.second); + itr = insret.first; + } + pset = itr->second; + pset->insert(dcbcsr); +} + +void ResourceManager::register_db(Db*pdb1) +{ + if (!pdb1) + return; + global_lock(mtx_handle_); + if (open_dbs_.count(pdb1) == 0) + open_dbs_.insert(make_pair(pdb1, 1u)); + else + open_dbs_[pdb1]++; + global_unlock(mtx_handle_); + csrset_t *pcsrset = new csrset_t(); + pair insret = all_csrs_.insert( + make_pair(pdb1, pcsrset)); + if (!insret.second) + delete pcsrset; + +} + +void ResourceManager::register_db_env(DbEnv*env1) +{ + u_int32_t oflags = 0; + DbTxn *ptxn = NULL; + int ret; + + if (!env1) + return; + + stack stk; + BDBOP(env1->get_open_flags(&oflags), ret); + + if (oflags & DB_INIT_CDB) { + env1->cdsgroup_begin(&ptxn); + stk.push(ptxn); + } + + env_txns_.insert(make_pair(env1, stk)); + + global_lock(mtx_handle_); + if (open_envs_.count(env1) == 0) + open_envs_.insert(make_pair(env1, 1u)); + else + open_envs_[env1]++; + global_unlock(mtx_handle_); +} + +// Delete registered DbstlGlobalInnerObject objects. +void ResourceManager::global_exit() +{ + set::iterator itr; + global_lock(mtx_globj_); + for (itr = glob_objs_.begin(); itr != glob_objs_.end(); ++itr) + delete *itr; + global_unlock(mtx_globj_); + + mtx_env_->mutex_free(mtx_globj_); + mtx_env_->mutex_free(mtx_handle_); + delete mtx_env_; +} + +void ResourceManager::register_global_object(DbstlGlobalInnerObject *gio) +{ + global_lock(mtx_globj_); + glob_objs_.insert(gio); + global_unlock(mtx_globj_); +} + +END_NS diff --git a/lang/cxx/stl/dbstl_resource_manager.h b/lang/cxx/stl/dbstl_resource_manager.h new file mode 100644 index 00000000..16869b99 --- /dev/null +++ b/lang/cxx/stl/dbstl_resource_manager.h @@ -0,0 +1,359 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_STL_RESOURCE_MANAGER_H__ +#define _DB_STL_RESOURCE_MANAGER_H__ + +#include +#include +#include +#include + +#include "dbstl_common.h" +#include "dbstl_inner_utility.h" + +START_NS(dbstl) + +class DbCursorBase; +using std::map; +using std::multimap; +using std::set; +using std::stack; + +/////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////// +// +// ResourceManager class definition +// +// This class manages all the Berkeley DB handles and their mapping +// relationship. When it's only thread-specific instance is destructed, +// these handles are automatically closed in the correct order (the db and +// dbenv handles will be closed when the last thread using the handles in +// the process is destructed). +// +// The Db* and DbEnv* handles are process-wide global, they can be shared +// among multithreads, so they are stored into a static stl map, and access +// to the two map objects (open_dbs_ & open_envs_) is protected by a process +// wide mutex because few stl implementations support multithreaded access. +// We use reference counting in the two map objects to make sure each handle +// is closed when the last thread using it exits. Each thread sharing the +// handle should call ResourceManager::register_db/dbenv to tell DBSTL that +// it will use the handle, otherwise the handle may be closed prematurely. +// +// The transaction and cursor handles are thread specific. They are stored +// into stl containers and each instance of the ResourceManager is stored +// into thread local storage(TLS) of each thread. Thus the DB STL containers +// are thread safe. +// + +// This map contains cursors of all open databases opened in the same thread. +// We can only duplicate a cursor of the same database; We don't allow sharing +// Berkeley DB cursor handles across multiple threads, each thread needs to +// open their own cursor handle; +// +typedef map *> db_csr_map_t; + +// The set of cursors that belong to a db handle or a transaction. +typedef set csrset_t; +// The cursors opened in each transaction. +typedef map *> txncsr_t; + +// This stack contains the transactions started in current thread. Each +// transaction is the child transaction of the one under it in the stack. +// +// We support nested transactions for those created by the dbstl +// users, but still keep reference counting for dbstl internally +// created transactions so that the autocommit methods are really +// autocommit with least overhead (nested transactions are overheads). The +// way we keep both nested transaction and reference counting to internal +// transactions in the same stack is: +// 1. When creating an external transaction, look at the stack top, if there +// is a transaction, it must be an external one too, so use it as the parent +// transaction to create the external transaction. +// 2. When creating an internal transaction, look at the stack top, if there +// is one, call it T, look for its the reference counting, if there is a +// reference count for it, T is an internal one, so we increment its +// reference count; Otherwise, T is an external one, and according to the DB +// autocommit semantics, this function should be in T's context, so we +// simply use T and add it to the reference counting structure, and set its +// reference count to 2. +// +// We don't support expanding a transaction across multiple threads, +// because there are many restrictions to doing so, making it meaningless. +// +typedef stack txnstk_t; +typedef map env_txns_t; + +#ifdef WIN32 +#pragma warning( push ) +#pragma warning( disable:4251 ) +#endif +// This class is used to wrap a ResourceManager instance pointer, so that +// each thread has its own ResourceManager singleton. + +#ifdef TLS_DECL_MODIFIER +template +class TlsWrapper +{ +public: + static T *get_tls_obj() + { + return tinst_; + } + + static void set_tls_obj(T *objp) + { + tinst_ = objp; + } + +private: + TlsWrapper(){} + + // Thread local pointer to the instance of type T. + static TLS_DECL_MODIFIER T *tinst_; +}; // TlsWrapper<> + +#elif defined(HAVE_PTHREAD_TLS) +template +class TlsWrapper +{ +public: + static T *get_tls_obj() + { + return static_cast(pthread_getspecific(tls_key_)); + } + + static void set_tls_obj(T *objp) + { + pthread_setspecific(tls_key_, objp); + } + + // Friend declarations don't work portably, so we have to declare + // tls_key_ public. + static pthread_key_t tls_key_; +private: + TlsWrapper(); + +}; // TlsWrapper<> + +#else +#error "A multi-threaded build of STL for Berkeley DB requires thread local storage. None is configured." +#endif + +class _exported ResourceManager : public DbstlGlobalInnerObject +{ +private: + + ResourceManager(void); + // open_dbs_ & open_envs_ are shared among threads, protected by + // ghdl_mtx; + static map open_dbs_; + static mapopen_envs_; + + // Transaction stack of all environments. Use a stack to allow nested + // transactions. The transaction at the top of the stack is the + // current active transaction. + // + env_txns_t env_txns_; + + // Cursors opened in a corresponding transaction context. When + // committing or aborting a transaction, first close all open cursors. + // + txncsr_t txn_csrs_; + + // If in container X, its methods X::A and X::B both call begin and + // commit transaction. X::A calls X::B after it's begin transaction + // call, then X::B will commit the transaction prematurally. To avoid + // will commit the only transaction prematurally, to avoid this, we use + // this, we use this map to record each transaction's reference count. + // Each begin/commit_txn() will increment/decrement the reference + // count, when reference count goes to 0, the transaction is committed. + // Abort_txn will unconditionally abort the transaction. + // + map txn_count_; + + // Contains the cursors opened in the current thread for each database, + // So that we can close them in the right way, freeing any Berkeley DB + // handles before exiting. + // + db_csr_map_t all_csrs_; + + // Remove cursors opened in the transaction txn's context, should be + // called before commiting or aborting a transaction. + // + void remove_txn_cursor(DbTxn *txn); + + // Add a cursor to the current transaction's set of open cursors. + void add_txn_cursor(DbCursorBase *dcbcsr, DbEnv *env); + + // The environment mtx_env_ and mtx_handle_ are used for synchronizing + // multiple threads' access to open_dbs_ and open_envs_ and glob_objs_. + // They are discarded when program exits, no deallocation/release + // is done. + static DbEnv *mtx_env_; + static db_mutex_t mtx_handle_; + static db_mutex_t mtx_globj_; + static set glob_objs_; + + // This set stores db handles that are new'ed by open_db, and thus + // should be deleted after this db is closed automatically by dbstl. + // If a db is new'ed and created by user without using open_db, users + // should delete it. + static set deldbs; + static set delenvs; // Similar to deldbs, works with open_envs. + static void set_global_callbacks(); +public: + + // This function should be called in a single thread inside a process, + // before any use of dbstl. + static void global_startup(); + // Delete registered DbstlGlobalInnerObject objects. + static void global_exit(); + + // Delete the ResourceManager singleton of current thread, and remove it + // from glob_objs. + static void thread_exit(); + static void register_global_object(DbstlGlobalInnerObject *gio); + static DbEnv *get_mutex_env() { return mtx_env_; } + // Lock mtx_handle_, if it is 0, allocate it first. + static int global_lock(db_mutex_t dbcontainer_mtx); + static int global_unlock(db_mutex_t dbcontainer_mtx); + // Close pdb regardless of its reference count, users must make sure + // pdb is not used by others before calling this method. We can't + // close by reference count in this method, otherwise when the thread + // exits pdb's reference count is decremented twice. + void close_db(Db *pdb); + + // Close all db handles regardless of reference count, used to clean up + // the calling thread's ResourceManager singleton. + void close_all_dbs(); + + // Close specified db env handle and remove it from resource manager. + void close_db_env(DbEnv *penv); + + // Close and remove all db env registered in the resource manager. + // Used to clean up the calling thread's ResourceManager singleton. + void close_all_db_envs(); + + // Begin a new transaction in the specified environment. When outtxn + // is non-zero support nested transactions - the new transaction will + // be started as a child of the current transaction. If outtxn is 0 + // we are starting an internal transaction for autocommit, no new + // transaction will be started, but the current transaction's + // reference count will be incremented if it already has a reference + // count; otherwise, it was created by user, and we simply use this + // transaction, set its reference count to 2. + // + // This function is called by both containers to begin an internal + // transaction for autocommit, and by db stl users to begin an + // external transaction. + // + DbTxn *begin_txn(u_int32_t flags/* flags for DbEnv::txn_begin */, + DbEnv *env, int outtxn); + + // Decrement reference count if it exists and if it goes to 0, commit + // current transaction T opened in env; + // If T has no reference count(outside transaction), simply find + // it by popping the stack and commit it. + // + // This function is called by db_container to commit a transaction + // for auto commit, also can be called by db stl user to commit + // an external explicit transaction. + // + void commit_txn(DbEnv *env, u_int32_t flags = 0); + + // Commit specified transaction txn: find it by popping the stack, + // discard all its child transactions and commit it. + // + // This function is called by dbstl user to commit an external + // explicit transaction. + // + void commit_txn(DbEnv *env, DbTxn *txn, u_int32_t flags = 0); + + // Abort current transaction of the environment. + // + void abort_txn(DbEnv *env); + + // Abort specified transaction: find it by popping the stack, discard + // all its child transactions and abort it. + // + // This function is called by dbstl user to abort an external + // explicit transaction. + // + void abort_txn(DbEnv *env, DbTxn *txn); + + // Set env's current transaction handle. The original transaction + // handle is returned without aborting or commiting. This API can be + // used to share a transaction handle among multiple threads. + DbTxn* set_current_txn_handle(DbEnv *env, DbTxn *newtxn); + + // Register a Db handle. This handle and handles opened in it + // will be closed by ResourceManager, so application code must not + // try to close or delete it. Users can configure the handle before + // opening the Db and then register it via this function. + // + void register_db(Db *pdb1); + + // Register a DbEnv handle. This handle and handles opened in it + // will be closed by ResourceManager, so application code must not try + // to close or delete it. Users can configure the handle before + // opening the Environment and then register it via this function. + // + void register_db_env(DbEnv *env1); + + // Helper function to open a database and register it into + // ResourceManager. + // Users can set the create flags, open flags, db type, and flags + // needing to be set before open. + // + Db* open_db (DbEnv *penv, const char *filename, DBTYPE dbtype, + u_int32_t oflags, u_int32_t set_flags, int mode = 0644, + DbTxn* txn = NULL, u_int32_t cflags = 0, + const char *dbname = NULL); + + // Helper function to open a dbenv and register it into + // ResourceManager. + // Users can set the create flags, open flags, db type, and flags + // needing to be set before open. + // + DbEnv *open_env(const char *env_home, u_int32_t set_flags, + u_int32_t oflags = DB_CREATE | DB_INIT_MPOOL, + u_int32_t cachesize = 4 * 1024 * 1024, int mode = 0644, + u_int32_t cflags = 0/* Flags for DbEnv constructor. */); + + static ResourceManager *instance(); + + // Release all registered resource in the right order. + virtual ~ResourceManager(void); + + // Return current transaction of environment env, that is, the one on + // the transaction stack top, the active one. + // + DbTxn *current_txn(DbEnv *env); + + // Open a Berkeley DB cursor. + // + int open_cursor(DbCursorBase *dcbcsr, Db *pdb, int flags = 0); + + // Add a db-cursor mapping. + void add_cursor(Db *dbp, DbCursorBase *csr); + + // Close all cursors opened in the database. + size_t close_db_cursors(Db *dbp1); + + // Close and remove a cursor from ResourceManager. + // + int remove_cursor(DbCursorBase *csr, bool remove_from_txncsrs = true); + +}; // ResourceManager + +END_NS +#ifdef WIN32 +#pragma warning( pop ) +#endif +#endif // !_DB_STL_RESOURCE_MANAGER_H__ diff --git a/lang/cxx/stl/dbstl_set.h b/lang/cxx/stl/dbstl_set.h new file mode 100644 index 00000000..4832afbc --- /dev/null +++ b/lang/cxx/stl/dbstl_set.h @@ -0,0 +1,1583 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_STL_DB_SET_H_ +#define _DB_STL_DB_SET_H_ + + +#include "dbstl_common.h" +#include "dbstl_map.h" +#include "dbstl_dbc.h" +#include "dbstl_container.h" +#include "dbstl_resource_manager.h" +#include "dbstl_element_ref.h" +#include "dbstl_base_iterator.h" + +START_NS(dbstl) + +using std::pair; +using std::make_pair; + +///////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////// +// +// _DB_STL_set_value class template definition +// This class is used for db_set as value type because it inherits from +// db_map . It dose not need a byte for DB_HASH to work, hash db can store +// duplicated keys with empty data. +// +template +class _DB_STL_set_value +{ +public: + + inline _DB_STL_set_value(const ElementRef&){ } + inline _DB_STL_set_value(const T&){} + inline _DB_STL_set_value(){} +}; + +/** \ingroup dbstl_iterators +@{ +\defgroup dbset_iterators Iterator classes for db_set and db_multiset. +db_set_base_iterator and db_set_iterator are the const iterator and iterator +class for db_set and db_multiset. They have identical behaviors to +std::set::const_iterator and std::set::iterator respectively. + +The difference between the two classes is that the db_set_base_iterator +can only be used to read its referenced value, while db_set_iterator allows +both read and write access. If the access pattern is readonly, it is strongly +recommended that you use the const iterator because it is faster and more +efficient. + +The two classes inherit several functions from db_map_base_iterator and +db_map_iterator respectively. +\sa db_map_base_iterator db_map_iterator + +@{ +*/ + +///////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////// +// +// db_set_base_iterator class template definition +// +// db_set_base_iterator class template is the const iterator class for db_set +// and db_multiset, it can only be used to read data. +// This class need to override operator* and operator-> because the +// db_set<>::value_type is different from db_map<>::value_type. We also has +// to copy the iterator movement operators into this class because the "self" +// type is different in db_map_base_iterator and this class. +// In db_set_base_iterator's inherited curpair_base_ pair, we store key to +// its first and second member, to work with db_map and db_multimap. +// Besides this, there is no further difference, so we can still safely make +// use of its inherited methods. +// +template +class _exported db_set_base_iterator : public + db_map_base_iterator > +{ +protected: + typedef db_set_base_iterator self; + typedef db_map_base_iterator > base; + using db_base_iterator::replace_current_key; +public: + + typedef kdt key_type; + typedef _DB_STL_set_value ddt; + typedef kdt value_type; + typedef value_type& reference; + typedef value_type* pointer; + typedef value_type value_type_wrap; + typedef ptrdiff_t difference_type; + typedef difference_type distance_type; + // We have to use std iterator tags to match the parameter list of + // stl internal functions, so we don't need to write our own tag + // classes + // + typedef std::bidirectional_iterator_tag iterator_category; + + //////////////////////////////////////////////////////////////// + // Begin constructors and destructor definitions. + // + /// \name Constructors and destructor + /// Do not use these constructors to create iterators, but call + /// db_set::begin() const or db_multiset::begin() const to create + /// valid iterators. + //@{ + /// Destructor. + virtual ~db_set_base_iterator() + { + + } + + /// Constructor. + /// \param powner The container which creates this iterator. + /// \param b_bulk_retrieval The bulk read buffer size. 0 means + /// bulk read disabled. + /// \param brmw Whether set DB_RMW flag in underlying cursor. + /// \param directdbget Whether do direct database get rather than + /// using key/data values cached in the iterator whenever read. + /// \param b_read_only Whether open a read only cursor. Only effective + /// when using Berkeley DB Concurrent Data Store. + explicit db_set_base_iterator(db_container*powner, + u_int32_t b_bulk_retrieval = 0, bool brmw = false, + bool directdbget = true, bool b_read_only = false) + : base(powner, b_bulk_retrieval, brmw, directdbget, b_read_only) + { + this->is_set_ = true; + } + + /// Default constructor, dose not create the cursor for now. + db_set_base_iterator() : base() + { + this->is_set_ = true; + } + + /// Copy constructor. + /// \param s The other iterator of the same type to initialize this. + db_set_base_iterator(const db_set_base_iterator&s) : base(s) + { + this->is_set_ = true; + } + + /// Base copy constructor. + /// \param bo Initialize from a base class iterator. + db_set_base_iterator(const base& bo) : base(bo) + { + this->is_set_ = true; + } + //@} + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin functions that shift iterator positions. + /// \name Iterator movement operators. + /// These functions are identical to those of db_map_base_iterator + /// and db_map_iterator and db_set_iterator. Actually the iterator + /// movement functions in the four classes are the same. + //@{ + // But we have to copy them into the four classes because the + // return type, namely "self" is different in each class. + /// Post-increment. + /// \return This iterator after incremented. + /// \sa db_map_base_iterator::operator++() + inline self& operator++() + { + this->next(); + + return *this; + } + + /// Pre-increment. + /// \return Another iterator having the old value of this iterator. + /// \sa db_map_base_iterator::operator++(int) + inline self operator++(int) + { + self itr = *this; + + this->next(); + + return itr; + } + + /// Post-decrement. + /// \return This iterator after decremented. + /// \sa db_map_base_iterator::operator--() + inline self& operator--() + { + this->prev(); + + return *this; + } + + /// Pre-decrement. + /// \return Another iterator having the old value of this iterator. + /// \sa db_map_base_iterator::operator--(int) + self operator--(int) + { + self itr = *this; + this->prev(); + + return itr; + } + //@} + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin functions that retrieve values from iterator. + // + // This function returns a read only reference, you can only read + // its referenced data, not update it. + // curpair_base_ is always kept updated on iterator movement, but it + // is also updated here before making use of the value it points + // to, which is stored in curpair_base_ . + // Even if this iterator is invalid, this call is allowed, the + // default value of type T is returned. + // + /// \name Functions that retrieve values from iterator. + //@{ + /// Dereference operator. + /// Return the reference to the cached data element, which is an + /// object of type T. You can only use the return value to read + /// its referenced data element, can not update it. + /// \return Current data element reference object, i.e. ElementHolder + /// or ElementRef object. + inline reference operator*() + { + int ret; + + if (this->directdb_get_) { + ret = this->pcsr_->get_current_key( + this->curpair_base_.first); + dbstl_assert(ret == 0); + // curpair_base_.second is a _DB_STL_set_value object, + // not used at all. Since const iterators can't be used to + // write, so this is not a problem. + } + // Returning reference, no copy construction. + return this->curpair_base_.first; + } + + // curpair_base_ is always kept updated on iterator movement, but it + // is also updated here before making use of the value it points + // to, which is stored in curpair_base_ . + // Even if this iterator is invalid, this call is allowed, the + // default value of type T is returned. + /// Arrow operator. + /// Return the pointer to the cached data element, which is an + /// object of type T. You can only use the return value to read + /// its referenced data element, can not update it. + /// \return Current data element reference object's address, i.e. + /// address of ElementHolder or ElementRef object. + inline pointer operator->() const + { + int ret; + + if (this->directdb_get_) { + ret = this->pcsr_->get_current_key( + this->curpair_base_.first); + dbstl_assert(ret == 0); + } + + return &(this->curpair_base_.first); + } + //@} + //////////////////////////////////////////////////////////////// + + /// \brief Refresh iterator cached value. + /// \param from_db If not doing direct database get and this parameter + /// is true, we will retrieve data directly from db. + /// \sa db_base_iterator::refresh(bool) + virtual int refresh(bool from_db = true) const + { + + if (from_db && !this->directdb_get_) + this->pcsr_->update_current_key_data_from_db( + DbCursorBase::SKIP_NONE); + this->pcsr_->get_current_key(this->curpair_base_.first); + this->curpair_base_.second = this->curpair_base_.first; + return 0; + } + +protected: + + // Declare friend classes to access protected/private members, + // while expose to outside only methods in stl specifications. + // + friend class db_set; + + friend class db_map, + ElementHolder, self>; + friend class db_map, + ElementRef, self>; + + typedef pair curpair_type; + +}; // db_set_base_iterator + + +///////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////// +// +// db_set_iterator class template definition +// +// db_set_iterator class template is the iterator class for db_set and +// db_multiset, it can be used to update its referenced key/data pair. +// This class need to override operator* and operator-> because the +// db_set<>::value_type is different from db_map<>::value_type. besides +// this, there is no further difference, so we can still safely make +// use of db_set inherited methods. +// +template +class _exported db_set_iterator : + public db_map_iterator, value_type_sub> +{ +protected: + typedef db_set_iterator self; + typedef db_map_iterator, + value_type_sub> base; + +public: + + typedef kdt key_type; + typedef _DB_STL_set_value ddt; + typedef kdt value_type; + typedef value_type_sub value_type_wrap; + typedef value_type_sub& reference; + typedef value_type_sub* pointer; + typedef ptrdiff_t difference_type; + typedef difference_type distance_type; + // We have to use std iterator tags to match the parameter list of + // stl internal functions, so we don't need to write our own tag + // classes + // + typedef std::bidirectional_iterator_tag iterator_category; + + //////////////////////////////////////////////////////////////// + // Begin constructors and destructor definitions. + /// \name Constructors and destructor + /// Do not use these constructors to create iterators, but call + /// db_set::begin() or db_multiset::begin() to create valid ones. + //@{ + /// Destructor. + virtual ~db_set_iterator() + { + + } + + /// Constructor. + /// \param powner The container which creates this iterator. + /// \param b_bulk_retrieval The bulk read buffer size. 0 means + /// bulk read disabled. + /// \param brmw Whether set DB_RMW flag in underlying cursor. + /// \param directdbget Whether do direct database get rather than + /// using key/data values cached in the iterator whenever read. + /// \param b_read_only Whether open a read only cursor. Only effective + /// when using Berkeley DB Concurrent Data Store. + explicit db_set_iterator(db_container*powner, + u_int32_t b_bulk_retrieval = 0, bool brmw = false, + bool directdbget = true, bool b_read_only = false) + : base(powner, b_bulk_retrieval, brmw, directdbget, b_read_only) + { + this->is_set_ = true; + } + + /// Default constructor, dose not create the cursor for now. + db_set_iterator() : base() + { + this->is_set_ = true; + } + + /// Copy constructor. + /// \param s The other iterator of the same type to initialize this. + db_set_iterator(const db_set_iterator&s) : base(s) + { + this->is_set_ = true; + } + + /// Base copy constructor. + /// \param bo Initialize from a base class iterator. + db_set_iterator(const base& bo) : base(bo) + { + this->is_set_ = true; + } + + /// Sibling copy constructor. + /// Note that this class does not derive from db_set_base_iterator + /// but from db_map_iterator. + /// \param bs Initialize from a base class iterator. + db_set_iterator(const db_set_base_iterator&bs) : base(bs) + { + this->is_set_ = true; + } + //@} + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin functions that shift iterator positions. + /// \name Iterator movement operators. + //@{ + /// Pre-increment. + /// Identical to those of db_map_iterator. + /// \return This iterator after incremented. + /// \sa db_map_iterator::operator++() + inline self& operator++() + { + this->next(); + + return *this; + } + + /// Post-increment. + /// \return Another iterator having the old value of this iterator. + /// \sa db_map_iterator::operator++(int) + inline self operator++(int) + { + self itr = *this; + + this->next(); + + return itr; + } + + /// Pre-decrement. + /// \return This iterator after decremented. + /// \sa db_map_iterator::operator--() + inline self& operator--() + { + this->prev(); + + return *this; + } + + /// Post-decrement + /// \return Another iterator having the old value of this iterator. + /// \sa db_map_iterator::operator--(int) + self operator--(int) + { + self itr = *this; + this->prev(); + + return itr; + } + //@} + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin functions that retrieve values from iterator. + // + // This functions returns a read-write reference to its data element. + // Even if this iterator is invalid, this call is allowed, the + // default value of type T is returned. + /// \name Functions that retrieve values from iterator. + //@{ + /// Dereference operator. + /// Return the reference to the cached data element, which is an + /// ElementRef object if T is a class type or an ElementHolder + /// object if T is a C++ primitive data type. + /// \return Current data element reference object, i.e. ElementHolder + /// or ElementRef object. + inline reference operator*() + { + + if (this->directdb_get_) + refresh(true); + // returning reference, no copy construction + return this->curpair_.second; + } + + // curpair_ is always kept updated on iterator movement, but it + // is also updated here before making use of the value it points + // to, which is stored in curpair_ . + // Even if this iterator is invalid, this call is allowed, the + // default value of type T is returned. + /// Arrow operator. + /// Return the pointer to the cached data element, which is an + /// ElementRef object if T is a class type or an ElementHolder + /// object if T is a C++ primitive data type. + /// \return Current data element reference object's address, i.e. + /// address of ElementHolder or ElementRef object. + inline pointer operator->() const + { + + if (this->directdb_get_) + refresh(true); + + return &(this->curpair_.second); + } + //@} + //////////////////////////////////////////////////////////////// + + /// \brief Refresh iterator cached value. + /// \param from_db If not doing direct database get and this parameter + /// is true, we will retrieve data directly from db. + /// \sa db_base_iterator::refresh(bool) + virtual int refresh(bool from_db = true) const + { + + if (from_db && !this->directdb_get_) + this->pcsr_->update_current_key_data_from_db( + DbCursorBase::SKIP_NONE); + this->pcsr_->get_current_key(this->curpair_.first); + this->curpair_.second._DB_STL_CopyData(this->curpair_.first); + this->set_curpair_base(this->curpair_.first, this->curpair_.first); + return 0; + } + +protected: + typedef pair curpair_type; + typedef db_set_base_iterator const_version; + // Declare friend classes to access protected/private members, + // while expose to outside only methods in stl specifications. + // + friend class db_set >; + friend class db_set >; + friend class db_multiset >; + friend class db_multiset >; + friend class db_map, + ElementHolder, self>; + friend class db_multimap, + ElementHolder, self>; + friend class db_map, + ElementRef, self>; + friend class db_multimap, + ElementRef, self>; + + virtual void delete_me() const + { + delete this; + } + +}; // db_set_iterator +//@} // dbset_iterators +//@} // dbstl_iterators + +///////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////// +// +// db_set class template definition +// +// db_set<> inherits from db_map<>, storing nothing data, because +// we only need the key. It uses db_set_iterator<>/db_set_base_iterator +// as its iterator and const iterator respectively. +// The only difference between db_set<> and db_map<> classes +// is the value_type, so we redefine the insert function by directly copying +// the db_map<>::insert, in order to make use of the newly defined +// value_type of db_set. If typedef could be dynamically bound, we won't +// have to make this duplicated effort. +/// \ingroup dbstl_containers +/// This class is the combination of std::set and hash_set. By setting +/// database handles of DB_BTREE or DB_HASH type, you will be using the +/// equivalent of std::set or hash_set. This container stores the key in the +/// key element of a key/data pair in the underlying database, +/// but doesn't store anything in the data element. +/// Database and environment handle requirement: +/// The same as that of db_map. +/// \param kdt The key data type. +/// \param value_type_sub If kdt is a class/struct type, do not specify +/// anything in this parameter; Otherwise specify ElementHolder. +/// \sa db_map db_container +// +template +class _exported db_set : public db_map, + value_type_sub, db_set_iterator > +{ +protected: + typedef db_set self; +public: + typedef db_set_iterator iterator; + typedef typename iterator::const_version const_iterator; + typedef db_reverse_iterator reverse_iterator; + typedef db_reverse_iterator + const_reverse_iterator; + typedef kdt key_type; + typedef ptrdiff_t difference_type; + // the ElementRef should store key value because key is also data, + // and *itr is key and data value + // + typedef kdt value_type; + typedef value_type_sub value_type_wrap; + typedef value_type_sub* pointer; + typedef value_type_sub& reference; + typedef value_type& const_reference; + typedef size_t size_type; + + //////////////////////////////////////////////////////////////// + // Begin constructors and destructor. + /// \name Constructors and destructor + //@{ + /// Create a std::set/hash_set equivalent associative container. + /// See the handle requirement in class details to pass correct + /// database/environment handles. + /// \param dbp The database handle. + /// \param envp The database environment handle. + /// \sa db_map(Db*, DbEnv*) db_container(Db*, DbEnv*) + explicit db_set (Db *dbp = NULL, DbEnv* envp = NULL) : base(dbp, envp){ + // There are some special handling in db_map_iterator<> + // if owner is set. + this->set_is_set(true); + } + + /// Iteration constructor. Iterates between first and last, + /// copying each of the elements in the range into + /// this container. + /// Create a std::set/hash_set equivalent associative container. + /// Insert a range of elements into the database. The range is + /// [first, last), which contains elements that can + /// be converted to type ddt automatically. + /// This function supports auto-commit. + /// See the handle requirement in class details to pass correct + /// database/environment handles. + /// \param dbp The database handle. + /// \param envp The database environment handle. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + /// \sa db_map(Db*, DbEnv*, InputIterator, InputIterator) + template + db_set (Db *dbp, DbEnv* envp, InputIterator first, + InputIterator last) : base(*(new BulkRetrievalOption( + BulkRetrievalOption::BulkRetrieval))) + { + + const char *errmsg; + + this->init_members(dbp, envp); + this->open_db_handles(dbp, envp, DB_BTREE, + DB_CREATE | DB_THREAD, 0); + if ((errmsg = this->verify_config(dbp, envp)) != NULL) { + THROW(InvalidArgumentException, ("Db*", errmsg)); + } + this->set_db_handle_int(dbp, envp); + this->set_auto_commit(dbp); + this->begin_txn(); + try { + insert(first, last); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + // There are some special handling in db_map_iterator<> + // if owner is set. + this->set_is_set(true); + + } + + /// Copy constructor. + /// Create a database and insert all key/data pairs in x into this + /// container. x's data members are not copied. + /// This function supports auto-commit. + /// \param x The source container to initialize this container. + /// \sa db_map(const db_map&) db_container(const db_container&) + db_set(const self& x) : base(*(new BulkRetrievalOption( + BulkRetrievalOption::BulkRetrieval))) + { + this->init_members(x); + verify_db_handles(x); + this->set_db_handle_int(this->clone_db_config( + x.get_db_handle()), x.get_db_env_handle()); + assert(this->get_db_handle() != NULL); + + this->begin_txn(); + try { + copy_db((self&)x); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + // There are some special handling in db_map_iterator<> + // if owner is set. + this->set_is_set(true); + } + + virtual ~db_set(){} + //@} + //////////////////////////////////////////////////////////////// + + /// Container content assignment operator. + /// This function supports auto-commit. + /// \param x The source container whose key/data pairs will be + /// inserted into the target container. Old content in the target + /// container is discarded. + /// \return The container x's reference. + /// \sa http://www.cplusplus.com/reference/stl/set/operator=/ + const self& operator=(const self& x) + { + ASSIGNMENT_PREDCOND(x) + db_container::operator =(x); + verify_db_handles(x); + assert(this->get_db_handle() != NULL); + this->begin_txn(); + try { + this->copy_db((self &)x); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + return x; + } + + //////////////////////////////////////////////////////////////// + // Begin key/value compare classes/functions. + // + // key_compare class definition, it is defined as an inner class, + // using underlying btree/hash db's compare function. It is the same + // as that of db_map<>, but because we have to redefine value_compare + // in this class, gcc forces us to also define key_compare again. + class key_compare + { + private: + Db*pdb; + public: + key_compare(Db*pdb1) + { + pdb = pdb1; + } + bool operator()(const kdt& k1, const kdt& k2) const + { + return compare_keys(pdb, k1, k2); + } + + }; // key_compare class definition + + class value_compare + { + key_compare kc; + public: + value_compare(Db*pdb) : kc(pdb) + { + + } + + bool operator()(const value_type& v1, + const value_type& v2) const + { + + return kc(v1, v2); + } + + }; // value_compare class definition + + /// Get value comparison functor. + /// \return The value comparison functor. + /// \sa http://www.cplusplus.com/reference/stl/set/value_comp/ + inline value_compare value_comp() const + { + return value_compare(this->get_db_handle()); + } + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin insert functions and swap function. + // + // Note that when secondary index is enabled, each db_container + // can create a db_multimap secondary container, but the insert + // function is not functional. + // + // Insert functions. Note that stl requires if the entry with x.key + // already exists, insert should not overwrite that entry and the + // insert should fail; but bdb Dbc::cursor(DB_KEYLAST) will replace + // existing data with new one, so we will first find whether we + // have this data, if have, return false; + /// \name Insert Functions + /// \sa http://www.cplusplus.com/reference/stl/set/insert/ + //@{ + /// Insert a single key/data pair if the key is not in the container. + /// \param x The key/data pair to insert. + /// \return A pair P, if insert OK, i.e. the inserted key wasn't in the + /// container, P.first will be the iterator positioned on the inserted + /// key/data pair, and P.second is true; otherwise P.first is an invalid + /// iterator equal to that returned by end() and P.second is false. + pair insert (const value_type& x ) + { + pair ib; + + _DB_STL_set_value sv; + iterator witr; + + this->init_itr(witr); + this->open_itr(witr); + + if (witr.move_to(x) == 0) {// has it + ib.first = witr; + ib.second = false; + + return ib; + } + witr.itr_status_ = witr.pcsr_->insert(x, sv, DB_KEYLAST); + witr.refresh(false); + ib.first = witr; + ib.second = true; + + return ib; + } + + // NOT_AUTOCOMMIT_TAG + // Note that if first and last are on same db as this container, + // then insert may deadlock if there is no common transaction context + // for first and witr (when they are on the same page). + // The insert function in base class can not be directly used, + // got compile errors. + /// Range insertion. Insert a range [first, last) of key/data pairs + /// into this container. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + inline void insert (const_iterator& first, const_iterator& last) + { + const_iterator ii; + _DB_STL_set_value v; + iterator witr; + + this->init_itr(witr); + + this->open_itr(witr); + + for (ii = first; ii != last; ++ii) + witr.pcsr_->insert(*ii, v, DB_KEYLAST); + } + + /// Range insertion. Insert a range [first, last) of key/data pairs + /// into this container. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + void insert (iterator& first, iterator& last) + { + iterator ii, witr; + _DB_STL_set_value d; + + init_itr(witr); + open_itr(witr); + + for (ii = first; ii != last; ++ii) + witr.pcsr_->insert(*ii, d, DB_KEYLAST); + + } + + // Ignore the position parameter because bdb knows better + // where to insert. + /// Insert with hint position. We ignore the hint position because + /// Berkeley DB knows better where to insert. + /// \param position The hint position. + /// \param x The key/data pair to insert. + /// \return The iterator positioned on the inserted key/data pair, + /// or an invalid iterator if the key was already in the container. + inline iterator insert ( iterator position, const value_type& x ) + { + pair ib = insert(x); + return ib.first; + } + + /// Range insertion. Insert a range [first, last) of key/data pairs + /// into this container. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + template + void insert (InputIterator first, InputIterator last) + { + InputIterator ii; + _DB_STL_set_value v; + iterator witr; + + this->init_itr(witr); + + this->open_itr(witr); + + for (ii = first; ii != last; ++ii) + witr.pcsr_->insert(*ii, v, DB_KEYLAST); + + } + //@} + + /// Swap content with another container. + /// This function supports auto-commit. + /// \param mp The container to swap content with. + /// \param b_truncate See db_vector::swap's b_truncate parameter + /// for details. + /// \sa db_map::swap() db_vector::clear() + void swap (db_set& mp, bool b_truncate = true) + { + Db *swapdb = NULL; + std::string dbfname(64, '\0'); + + verify_db_handles(mp); + this->begin_txn(); + try { + swapdb = this->clone_db_config(this->get_db_handle(), + dbfname); + + db_set tmap(swapdb, + swapdb->get_env(), this->begin(), this->end()); + typename db_set:: + iterator itr1, itr2; + + this->clear(b_truncate);// Clear this db_map<> object. + itr1 = mp.begin(); + itr2 = mp.end(); + this->insert(itr1, itr2); + mp.clear(b_truncate); + itr1 = tmap.begin(); + itr2 = tmap.end(); + mp.insert(itr1, itr2); + // tmap has no opened cursor, so simply truncate. + tmap.clear(); + + swapdb->close(0); + if (dbfname[0] != '\0') { + swapdb = new Db(NULL, DB_CXX_NO_EXCEPTIONS); + swapdb->remove(dbfname.c_str(), NULL, 0); + swapdb->close(0); + delete swapdb; + } + this->commit_txn(); + } catch (...) { + this->abort_txn(); + throw; + } + + } + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin container comparison functions. + // + // Return true if contents in m1 and m2 are identical otherwise + // return false. + // + // Note that we don't require the keys' order be identical, we are + // doing mathmatical set comparisons. + /// Set content equality comparison operator. + /// Return if the two containers have identical content. This function + /// does not rely on key order. + /// Two sets A and B are equal if and only if A contains B and B + /// contains A. + /// \param m2 The container to compare against. + /// \return Returns true if the two containers are equal, + /// false otherwise. + // + bool operator==(const db_set& m2) const + { + bool ret; + + COMPARE_CHECK(m2) + verify_db_handles(m2); + const db_set& m1 = *this; + + try { + this->begin_txn(); + if (m1.size() != m2.size()) + ret = false; + else { + typename db_set:: + const_iterator i1; + + for (i1 = m1.begin(); i1 != m1.end(); ++i1) { + if (m2.count(*i1) == 0) { + ret = false; + goto exit; + } + + } // for + ret = true; + } // else +exit: + this->commit_txn(); + // Now that m1 and m2 has the same number of unique elements and + // all elements of m1 are in m2, thus there can be no element of m2 + // that dose not belong to m1, so we won't verify each element of + // m2 are in m1. + return ret; + } catch (...) { + this->abort_txn(); + throw; + } + }// operator== + + /// Inequality comparison operator. + bool operator!=(const db_set& m2) const + { + return !this->operator==(m2); + } + //////////////////////////////////////////////////////////////// + +protected: + typedef int (*db_compare_fcn_t)(Db *db, const Dbt *dbt1, + const Dbt *dbt2); + + + typedef db_map, value_type_sub, + db_set_iterator > base; +private: + + value_type_sub operator[] (const key_type& x) + { + THROW(NotSupportedException, ("db_set<>::operator[]")); + + } + + value_type_sub operator[] (const key_type& x) const + { + THROW(NotSupportedException, ("db_set<>::operator[]")); + + } + + inline void copy_db(db_set &x) + { + + // Make sure clear can succeed if there are cursors + // open in other threads. + this->clear(false); + insert(x.begin(), x.end()); + + } + +}; // db_set<> + + +///////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////// +// +// db_multiset class template definition +// +// db_multiset<> inherits from db_multimap, storing nothing as data, because +// we only need the key. It uses db_set_iterator<> and db_set_base_iterator<> +// as its iterator and const iterator respectively, so that +// we can safely make use of the inherited methods. The only difference +// is the value_type, so we redefine the insert functions by directly copying +// the db_map<>::insert versions, in order to make use of the newly defined +// value_type of db_multiset. If typedef could be dynamically bound, we won't +// have to make this duplicated effort. +/// \ingroup dbstl_containers +/// This class is the combination of std::multiset and hash_multiset. By +/// setting database handles of DB_BTREE or DB_HASH type respectively, you +/// will be using the equivalent of std::multiset or hash_multiset respectively. +/// This container stores the key in the key element of a key/data pair in +/// the underlying database, but doesn't store anything in the data element. +/// Database and environment handle requirement: +/// The requirement to these handles is the same as that to db_multimap. +/// \param kdt The key data type. +/// \param value_type_sub If kdt is a class/struct type, do not specify +/// anything in this parameter; Otherwise specify ElementHolder. +/// \sa db_multimap db_map db_container db_set +// +template +class _exported db_multiset : public db_multimap, + value_type_sub, db_set_iterator > +{ +protected: + typedef db_multiset self; +public: + typedef db_set_iterator iterator; + typedef typename iterator::const_version const_iterator; + typedef db_reverse_iterator reverse_iterator; + typedef db_reverse_iterator + const_reverse_iterator; + typedef kdt key_type; + typedef ptrdiff_t difference_type; + // The ElementRef should store key value because key is also data, + // and *itr is key and data value. + // + typedef kdt value_type; + typedef value_type_sub value_type_wrap; + typedef value_type_sub& reference; + typedef const value_type& const_reference; + typedef value_type_sub* pointer; + typedef size_t size_type; + +public: + //////////////////////////////////////////////////////////////// + // Begin constructors and destructor. + /// \name Constructors and destructor + //@{ + /// Create a std::multiset/hash_multiset equivalent associative + /// container. + /// See the handle requirement in class details to pass correct + /// database/environment handles. + /// \param dbp The database handle. + /// \param envp The database environment handle. + /// \sa db_multimap(Db*, DbEnv*) + explicit db_multiset (Db *dbp = NULL, DbEnv* envp = NULL) : + base(dbp, envp) { + // There are special handling in db_map_iterator<> if owner + // is a set. + this->set_is_set(true); + } + + /// Iteration constructor. Iterates between first and last, + /// copying each of the elements in the range into + /// this container. + /// Create a std::multi/hash_multiset equivalent associative container. + /// Insert a range of elements into the database. The range is + /// [first, last), which contains elements that can + /// be converted to type ddt automatically. + /// This function supports auto-commit. + /// See the handle requirement in class details to pass correct + /// database/environment handles. + /// \param dbp The database handle. + /// \param envp The database environment handle. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + /// \sa db_multimap(Db*, DbEnv*, InputIterator, InputIterator) + template + db_multiset (Db *dbp, DbEnv* envp, InputIterator first, + InputIterator last) : base(*(new BulkRetrievalOption( + BulkRetrievalOption::BulkRetrieval))) + { + + const char *errmsg; + + this->init_members(dbp, envp); + this->open_db_handles(dbp, envp, DB_BTREE, DB_CREATE | + DB_THREAD, DB_DUP); + // Note that we can't call base(dbp, envp) here because it + // will verify failed; And we can't call db_container + // directly because it is illegal to do so. + if ((errmsg = verify_config(dbp, envp)) != NULL) { + THROW(InvalidArgumentException, ("Db*", errmsg)); + + } + this->set_db_handle_int(dbp, envp); + this->set_auto_commit(dbp); + + this->begin_txn(); + try { + insert(first, last); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + + // There are special handling in db_map_iterator<> if owner + // is a set. + this->set_is_set(true); + + } + + /// Copy constructor. + /// Create a database and insert all key/data pairs in x into this + /// container. x's data members are not copied. + /// This function supports auto-commit. + /// \param x The source container to initialize this container. + /// \sa db_multimap(const db_multimap&) db_container(const db_container&) + db_multiset(const self& x) : base(*(new BulkRetrievalOption( + BulkRetrievalOption::BulkRetrieval))) + { + this->init_members(x); + verify_db_handles(x); + this->set_db_handle_int(this->clone_db_config( + x.get_db_handle()), x.get_db_env_handle()); + assert(this->get_db_handle() != NULL); + + this->begin_txn(); + try { + copy_db((self&)x); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + + // There are special handling in db_map_iterator<> if owner + // is a set. + this->set_is_set(true); + } + + virtual ~db_multiset(){} + //@} + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin functions that modify the container's content, i.e. insert, + // erase, assignment and swap functions. + // + /// Container content assignment operator. + /// This function supports auto-commit. + /// \param x The source container whose key/data pairs will be + /// inserted into the target container. Old content in the target + /// container is discarded. + /// \return The container x's reference. + /// \sa http://www.cplusplus.com/reference/stl/multiset/operator=/ + inline const self& operator=(const self& x) + { + ASSIGNMENT_PREDCOND(x) + db_container::operator =(x); + verify_db_handles(x); + assert(this->get_db_handle() != NULL); + this->begin_txn(); + try { + this->copy_db((self &)x); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + return x; + } + + // Note that when secondary index is enabled, each db_container + // can create a db_multimap secondary container, but the insert + // function is not functional. + /// \name Insert Functions + /// \sa http://www.cplusplus.com/reference/stl/multiset/insert/ + //@{ + /// Insert a single key if the key is not in the container. + /// \param x The key to insert. + /// \return An iterator positioned on the newly inserted key. If the + /// key x already exists, an invalid iterator equal to that returned + /// by end() function is returned. + inline iterator insert(const value_type& x ) + { + pair ib; + _DB_STL_set_value sv; + iterator witr; + + this->init_itr(witr); + this->open_itr(witr); + + witr.itr_status_ = witr.pcsr_->insert(x, sv, DB_KEYLAST); + witr.refresh(false); + return witr; + } + + // Ignore the position parameter because bdb knows better + // where to insert. + /// Insert a single key with hint if the key is not in the container. + /// The hint position is ignored because Berkeley DB controls where + /// to insert the key. + /// \param x The key to insert. + /// \param position The hint insert position, ignored. + /// \return An iterator positioned on the newly inserted key. If the + /// key x already exists, an invalid iterator equal to that returned + /// by end() function is returned. + inline iterator insert ( iterator position, const value_type& x ) + { + return insert(x); + } + + /// Range insertion. Insert a range [first, last) of key/data pairs + /// into this container. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + template + void insert (InputIterator first, InputIterator last) + { + InputIterator ii; + _DB_STL_set_value v; + + iterator witr; + + this->init_itr(witr); + this->open_itr(witr); + for (ii = first; ii != last; ++ii) + witr.pcsr_->insert(*ii, v, DB_KEYLAST); + + } + + // Member function template overload. + // This function is a specialization for the member template version + // dedicated to db_set<>. + // + /// Range insertion. Insert a range [first, last) of key/data pairs + /// into this container. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + void insert (db_set_iterator& first, + db_set_iterator& last) + { + db_set_iterator ii; + iterator witr; + _DB_STL_set_value d; + + init_itr(witr); + open_itr(witr); + + for (ii = first; ii != last; ++ii) + witr.pcsr_->insert(*ii, d, DB_KEYLAST); + + + } + + // Member function template overload. + // This function is a specialization for the member template version + // dedicated to db_set<>. + // + /// Range insertion. Insert a range [first, last) of key/data pairs + /// into this container. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + void insert (db_set_base_iterator& first, + db_set_base_iterator& last) + { + db_set_base_iterator ii; + iterator witr; + _DB_STL_set_value d; + + init_itr(witr); + open_itr(witr); + + for (ii = first; ii != last; ++ii) + witr.pcsr_->insert(*ii, d, DB_KEYLAST); + } + //@} + + // There is identical function in db_multimap<> and db_multiset + // for this function, we MUST keep the code consistent on update! + // Go to db_multimap<>::erase(const key_type&) to see why. + /// \name Erase Functions + /// \sa http://www.cplusplus.com/reference/stl/multiset/erase/ + //@{ + /// Erase elements by key. + /// All key/data pairs with specified key x will be removed from + /// the underlying database. + /// This function supports auto-commit. + /// \param x The key to remove from the container. + /// \return The number of key/data pairs removed. + size_type erase (const key_type& x) + { + size_type cnt; + iterator itr; + + this->begin_txn(); + try { + pair rg = equal_range(x); + for (itr = rg.first, cnt = 0; + itr != rg.second; ++itr) { + cnt++; + itr.pcsr_->del(); + } + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + return cnt; + } + + // Can not reopen external/outside iterator's cursor, pos must + // already have been in a transactional context. + // There is identical function in db_multimap<> and db_multiset + // for this function, we MUST keep the code consistent on update! + // Go to db_multimap<>::erase(const key_type&) to see why. + // + /// Erase a key/data pair at specified position. + /// \param pos A valid iterator of this container to erase. + inline void erase (iterator pos) + { + if (pos == this->end()) + return; + pos.pcsr_->del(); + } + + // Can not be auto commit because first and last are already open. + // There is identical function in db_multimap<> and db_multiset + // for this function, we MUST keep the code consistent on update! + // Go to db_multimap<>::erase(const key_type&) to see why. + // + /// Range erase. Erase all key/data pairs within the valid range + /// [first, last). + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + inline void erase (iterator first, iterator last) + { + iterator i; + + for (i = first; i != last; ++i) + i.pcsr_->del(); + } + //@} + + /// Swap content with another container. + /// This function supports auto-commit. + /// \param mp The container to swap content with. + /// \param b_truncate See db_multimap::swap() for details. + /// \sa db_map::swap() db_vector::clear() + void swap (db_multiset& mp, bool b_truncate = true) + { + Db *swapdb = NULL; + std::string dbfname(64, '\0'); + + verify_db_handles(mp); + this->begin_txn(); + try { + swapdb = this->clone_db_config(this->get_db_handle(), + dbfname); + + db_multiset tmap(swapdb, + swapdb->get_env(), this->begin(), this->end()); + this->clear(b_truncate);// Clear this db_map<> object. + typename db_multiset:: + iterator itr1, itr2; + itr1 = mp.begin(); + itr2 = mp.end(); + this->insert(itr1, itr2); + mp.clear(b_truncate); + itr1 = tmap.begin(); + itr2 = tmap.end(); + mp.insert(itr1, itr2); + + tmap.clear(); + + swapdb->close(0); + if (dbfname[0] != '\0') { + swapdb = new Db(NULL, DB_CXX_NO_EXCEPTIONS); + swapdb->remove(dbfname.c_str(), NULL, 0); + swapdb->close(0); + delete swapdb; + } + this->commit_txn(); + + } catch (...) { + swapdb->close(0); + this->abort_txn(); + throw; + } + + } + //////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////// + // Begin container comparison functions. + // + // Compare two db_multiset containers for equality, containers + // containing identical set of keys are considered equal, keys' + // order are not presumed or relied upon by this comparison. + /// Container content equality compare operator. + /// This function does not rely on key order. + /// Two sets A and B are equal if and only if + /// for each and every key K having n occurrences in A, K has n + /// occurrences in B, and for each and every key K` having N + /// occurrences in B, K` has n occurrences in A. + /// \param m2 The container to compare against. + /// \return Returns true if the two containers are equal, + /// false otherwise. + bool operator==(const self& m2) const + { + COMPARE_CHECK(m2) + verify_db_handles(m2); + + const db_multiset &m1 = *this; + const_iterator i1, i11; + pair resrg1, resrg2; + size_t n1, n2; + bool ret = false; + + try { + this->begin_txn(); + if (m1.size() != m2.size()) + ret = false; + else { + for (i1 = m1.begin(); i1 != m1.end(); ) { + resrg1 = m1.equal_range_N(*i1, n1); + resrg2 = m2.equal_range_N(*i1, n2); + if (n1 != n2) { + ret = false; + goto exit; + } + + // If both is 1, resrg2 may contain no i1->first. + if (n2 == 1 && !(*(resrg2.first) == + *(resrg1.first))) { + ret = false; + goto exit; + } + // m1 and m2 contains identical set of i1->first key, + // so move on, skip all equal keys in the range. + // + i1 = resrg1.second; + + } // for + ret = true; + + }// else +exit: + this->commit_txn(); + return ret; + } catch (...) { + this->abort_txn(); + throw; + } + + // Now that m1 and m2 has the same number of unique elements and all + // elements of m1 are in m2, thus there can be no element of m2 that + // dose not belong to m1, so we won't verify each element of m2 are + // in m1. + + } // operator== + + /// Inequality comparison operator. + bool operator!=(const self& m2) const + { + return !this->operator==(m2); + } + //////////////////////////////////////////////////////////////// + +protected: + + typedef int (*db_compare_fcn_t)(Db *db, const Dbt *dbt1, + const Dbt *dbt2); + typedef db_multimap, + value_type_sub, db_set_iterator > base; + + // Declare base our friend, we can't use 'friend class base' to do + // this declaration on gcc. + friend class db_multimap, + value_type_sub, db_set_iterator >; + // Declare iterator our friend, we can't use 'friend class iterator' + // to do this declaration on gcc. + friend class db_set_iterator; + friend class db_map_iterator, + value_type_sub>; + friend class db_map_iterator >; + +private: + + inline void copy_db(db_multiset &x) + { + + // Make sure clear can succeed if there are cursors + // open in other threads. + this->clear(false); + insert(x.begin(), x.end()); + + } + + // Prevent user calling the inherited version. + value_type operator[] (const key_type& x) + { + THROW(NotSupportedException, ("db_multiset<>::operator[]")); + + } + + value_type operator[] (const key_type& x) const + { + THROW(NotSupportedException, ("db_multiset<>::operator[]")); + + } + + virtual const char* verify_config(Db*dbp, DbEnv* envp) const + { + DBTYPE dbtype; + u_int32_t oflags, sflags; + int ret; + const char *err = NULL; + + err = db_container::verify_config(dbp, envp); + if (err) + return err; + + BDBOP(dbp->get_type(&dbtype), ret); + BDBOP(dbp->get_open_flags(&oflags), ret); + BDBOP(dbp->get_flags(&sflags), ret); + + if (dbtype != DB_BTREE && dbtype != DB_HASH) + err = +"wrong database type, only DB_BTREE and DB_HASH allowed for db_map<> class"; + if (oflags & DB_TRUNCATE) + err = +"do not specify DB_TRUNCATE flag to create a db_map<> object"; + + // Must set DB_DUP and NOT DB_DUPSORT. + if (!(sflags & DB_DUP) || (sflags & DB_DUPSORT)) + err = +"db_multiset<> requires a database with DB_DUP set and without DB_DUPSORT set."; + + if (sflags & DB_RECNUM) + err = "no DB_RECNUM flag allowed in db_map<>"; + + return err; + + } + +}; // db_multiset<> + + +END_NS + +#endif// !_DB_STL_DB_SET_H_ diff --git a/lang/cxx/stl/dbstl_utility.h b/lang/cxx/stl/dbstl_utility.h new file mode 100644 index 00000000..fe8f8b78 --- /dev/null +++ b/lang/cxx/stl/dbstl_utility.h @@ -0,0 +1,496 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_STL_UTILITY_H__ +#define _DB_STL_UTILITY_H__ + +#include "dbstl_inner_utility.h" + +START_NS(dbstl) + +// This class allows users to configure dynamically how a specific type of +// object is copied, stored, restored, and how to measure the type's +// instance size. +/** \defgroup dbstl_helper_classes dbstl helper classes +Classes of this module help to achieve various features of dbstl. +*/ +/** \ingroup dbstl_helper_classes +@{ +This class is used to register callbacks to manipulate an object of a +complex type. These callbacks are used by dbstl at runtime to +manipulate the object. + +A complex type is a type whose members are not located in a contiguous +chunk of memory. For example, the following class A is a complex type +because for any instance a of class A, a.b_ points to another object +of type B, and dbstl treats the object that a.b_ points to as part of +the data of the instance a. Hence, if the user needs to store a.b_ into +a dbstl container, the user needs to register an appropriate callback to +de-reference and store the object referenced by a.b. Similarly, the +user also needs to register callbacks to marshall an array as well as +to count the number of elements in such an array. + +class A { int m; B *p_; }; +class B { int n; }; + +The user also needs to register callbacks for +i). returning an object¡¯s size in bytes; +ii). Marshalling and unmarshalling an object; +iii). Copying a complex object and and assigning an object to another +object of the same type; +iv). Element comparison. +v). Compare two sequences of any type of objects; Measuring the length +of an object sequence and copy an object sequence. + +Several elements located in a contiguous chunk of memory form a sequence. +An element of a sequence may be a simple object located at a contigous +memory chunk, or a complex object, i.e. some of its members may contain +references (pointers) to another region of memory. It is not necessary +to store a special object to denote the end of the sequence. The callback +to traverse the constituent elements of the sequence needs to able to +determine the end of the sequence. + +Marshalling means packing the object's data members into a contiguous +chunk of memory; unmarshalling is the opposite of marshalling. In other +words, when you unmarshall an object, its data members are populated +with values from a previously marshalled version of the object. + +The callbacks need not be set to every type explicitly. . dbstl will +check if a needed callback function of this type is provided. +If one is available, dbstl will use the registered callback. If the +appropriate callback is not provided, dbstl will use reasonable defaults +to do the job. + +For returning the size of an object, the default behavior is to use the +sizeof() operator; For marshalling and unmarshalling, dbstl uses memcpy, +so the default behavior is sufficient for simple types whose data reside +in a contiguous chunk of memory; Dbstl uses uses >, == and < for +comparison operations; For char* and wchar_t * strings, dbstl already +provides the appropriate callbacks, so you do not need to register them. +In general, if the default behavior is adequate, you don't need to +register the corresponding callback. + +If you have registered proper callbacks, the DbstlElemTraits can also be +used as the char_traits class for std::basic_string >, +and you can enable your class T to form a basic_string>, +and use basic_string's functionality and the algorithms to manipulate it. +*/ +template +class _exported DbstlElemTraits : public DbstlGlobalInnerObject +{ +public: + /// \name Callback_typedefs Function callback type definitions. + /// Following are the callback function types, there is one function + /// pointer data member for each of the type, and a pair of set/get + /// functions for each function callback data member, and this is + /// the structure of this class. + //@{ + /// Assign object src to object dest. Most often assignment callback + /// is not needed - the class copy constructor is sufficient. + /// This description talks about the function of this type, rather + /// than the type itself, this is true to all the types in the group. + typedef void (*ElemAssignFunct)(T& dest, const T&src); + /// Read data from the chunk of memory pointed by srcdata, and assign + /// to the object dest. This is also called unmashall. + typedef void (*ElemRstoreFunct)(T& dest, const void *srcdata); + /// Return object elem's size in bytes. + typedef u_int32_t (*ElemSizeFunct)(const T& elem); + /// Copy object elem's data to be persisted into a memory chunk + /// referenced by dest. The dest memory is large enough. + /// elem may not reside on a + /// consecutive chunk of memory. This is also called marshal. + typedef void (*ElemCopyFunct)(void *dest, const T&elem); + typedef int (*ElemCompareFunct)(const T&a, const T&b); + /// Compares first num number of elements of sequence a and b, returns + /// negative/0/positive value if a is less/equal/greater than b. + typedef int (*SequenceNCompareFunct)(const T *a, const T *b, + size_t num); + /// Compares sequence a and b, returns negative/0/positive + /// value if a is less/equal/greater than b. + typedef int (*SequenceCompareFunct)(const T *a, const T *b); + /// Return the sequence's number of elements. + typedef u_int32_t (*SequenceLenFunct)(const T *seqs); + + /// Copy the sequence seqs's first num elements to seqd. + /// The sequence seqs of type T objects may not reside in a continuous + /// chunk of memory, but the seqd sequence points to a consecutive + /// chunk of memory large enough to hold all objects from seqs. + /// And if T is a complex type, you should register your ElemCopyFunct + /// object marshalling manipulator + typedef void (*SequenceCopyFunct)(T *seqd, const T *seqs, size_t num); + //@} + + typedef T char_type; + typedef long int_type; + + /// \name Interface compatible with std::string's char_traits. + /// Following are char_traits funcitons, which make this class + /// char_traits compatiable, so that it can be used in + /// std::basic_string template, and be manipulated by the c++ stl + /// algorithms. + //@{ + /// Assignone object to another. + static void assign(T& left, const T& right) + { + if (inst_->assign_) + inst_->assign_(left, right); + else + left = right; + } + + /// Check for equality of two objects. + static bool eq(const T& left, const T& right) + { + if (inst_->elemcmp_) + return inst_->elemcmp_(left, right) == 0; + else + return left == right; + } + + /// \brief Less than comparison. + /// + /// Returns if object left is less than object right. + static bool lt(const T& left, const T& right) + { + if (inst_->elemcmp_) + return inst_->elemcmp_(left, right) < 0; + else + return left < right; + } + + /// \brief Sequence comparison. + /// + /// Compares the first cnt number of elements in the two + /// sequences seq1 and seq2, returns negative/0/positive if seq1 is + /// less/equal/greater than seq2. + static int compare(const T *seq1, const T *seq2, size_t cnt) + { + if (inst_->seqncmp_) + return inst_->seqncmp_(seq1, seq2, cnt); + else { + for (; 0 < cnt; --cnt, ++seq1, ++seq2) + if (!eq(*seq1, *seq2)) + return (lt(*seq1, *seq2) ? -1 : +1); + } + return (0); + } + + /// Returns the number of elements in sequence seq1. Note that seq1 + /// may or may not end with a trailing '\0', it is completely user's + /// responsibility for this decision, though seq[0], seq[1],... + /// seq[length - 1] are all sequence seq's memory. + static size_t length(const T *seq) + { + assert(inst_->seqlen_ != NULL); + return (size_t)inst_->seqlen_(seq); + } + + /// Copy first cnt number of elements from seq2 to seq1. + static T * copy(T *seq1, const T *seq2, size_t cnt) + { + if (inst_->seqcpy_) + inst_->seqcpy_(seq1, seq2, cnt); + else { + T *pnext = seq1; + for (; 0 < cnt; --cnt, ++pnext, ++seq2) + assign(*pnext, *seq2); + } + return (seq1); + } + + /// Find within the first cnt elements of sequence seq the position + /// of element equal to elem. + static const T * find(const T *seq, size_t cnt, const T& elem) + { + for (; 0 < cnt; --cnt, ++seq) + if (eq(*seq, elem)) + return (seq); + return (0); + } + + /// \brief Sequence movement. + /// + /// Move first cnt number of elements from seq2 to seq1, seq1 and seq2 + /// may or may not overlap. + static T * move(T *seq1, const T *seq2, size_t cnt) + { + T *pnext = seq1; + if (seq2 < pnext && pnext < seq2 + cnt) + for (pnext += cnt, seq2 += cnt; 0 < cnt; --cnt) + assign(*--pnext, *--seq2); + else + for (; 0 < cnt; --cnt, ++pnext, ++seq2) + assign(*pnext, *seq2); + return (seq1); + } + + /// Assign first cnt number of elements of sequence seq with the + /// value of elem. + static T * assign(T *seq, size_t cnt, T elem) + { + T *pnext = seq; + for (; 0 < cnt; --cnt, ++pnext) + assign(*pnext, elem); + return (seq); + } + + static T to_char_type(const int_type& meta_elem) + { // convert metacharacter to character + return ((T)meta_elem); + } + + static int_type to_int_type(const T& elem) + { // convert character to metacharacter + return ((int_type)elem); + } + + static bool eq_int_type(const int_type& left, + const int_type& right) + { // test for metacharacter equality + return (left == right); + } + + static int_type eof() + { // return end-of-file metacharacter + return ((int_type)EOF); + } + + static int_type not_eof(const int_type& meta_elem) + { // return anything but EOF + return (meta_elem != eof() ? (int_type)meta_elem : + (int_type)!eof()); + } + + //@} + + /// Factory method to create a singeleton instance of this class. + /// The created object will be deleted by dbstl upon process exit. + inline static DbstlElemTraits *instance() + { + if (!inst_) { + inst_ = new DbstlElemTraits(); + register_global_object(inst_); + } + return inst_; + } + + /// \name Set/get functions for callback function pointers. + /// These are the setters and getters for each callback function + /// pointers. + //@{ + inline void set_restore_function(ElemRstoreFunct f) + { + restore_ = f; + } + + inline ElemRstoreFunct get_restore_function() { return restore_; } + + inline void set_assign_function(ElemAssignFunct f) + { + assign_ = f; + } + + inline ElemAssignFunct get_assign_function() { return assign_; } + + inline ElemSizeFunct get_size_function() { return size_; } + + inline void set_size_function(ElemSizeFunct f) { size_ = f; } + + inline ElemCopyFunct get_copy_function() { return copy_; } + + inline void set_copy_function(ElemCopyFunct f) { copy_ = f; } + + inline void set_sequence_len_function(SequenceLenFunct f) + { + seqlen_ = f; + } + + inline SequenceLenFunct get_sequence_len_function() { return seqlen_; } + + inline SequenceCopyFunct get_sequence_copy_function() + { + return seqcpy_; + } + + inline void set_sequence_copy_function(SequenceCopyFunct f) + { + seqcpy_ = f; + } + + inline void set_compare_function(ElemCompareFunct f) + { + elemcmp_ = f; + } + + inline ElemCompareFunct get_compare_function() + { + return elemcmp_; + } + + inline void set_sequence_compare_function(SequenceCompareFunct f) + { + seqcmp_ = f; + } + + inline SequenceCompareFunct get_sequence_compare_function() + { + return seqcmp_; + } + + inline void set_sequence_n_compare_function(SequenceNCompareFunct f) + { + seqncmp_ = f; + } + + inline SequenceNCompareFunct get_sequence_n_compare_function() + { + return seqncmp_; + } + //@} + + ~DbstlElemTraits(){} +protected: + inline DbstlElemTraits() + { + assign_ = NULL; + restore_ = NULL; + size_ = NULL; + copy_ = NULL; + seqlen_ = NULL; + seqcpy_ = NULL; + seqcmp_ = NULL; + seqncmp_ = NULL; + elemcmp_ = NULL; + } + + static DbstlElemTraits *inst_; + + // Data members to hold registered function pointers. + ElemAssignFunct assign_; + ElemRstoreFunct restore_; + ElemSizeFunct size_; + ElemCopyFunct copy_; + ElemCompareFunct elemcmp_; + SequenceCompareFunct seqcmp_; + SequenceNCompareFunct seqncmp_; + SequenceLenFunct seqlen_; + SequenceCopyFunct seqcpy_; +}; //DbstlElemTraits +//@} // dbstl_helper_classes + +template +DbstlElemTraits *DbstlElemTraits::inst_ = NULL; + +/** +\ingroup dbstl_helper_classes +@{ +You can persist all bytes in a chunk of contiguous memory by constructing +an DbstlDbt object A(use malloc to allocate the required number of bytes for +A.data and copy the bytes to be stored into A.data, set other +fields as necessary) and store A into a container, e.g. db_vector, +this stores the bytes rather than the object A into the underlying database. +The DbstlDbt class can help you avoid memory leaks, +so it is strongly recommended that you use DbstlDbt rather than Dbt class. + +DbstlDbt derives from Dbt class, and it does an deep copy on copy construction +and assignment --by calling malloc to allocate its own memory and then +copying the bytes to it; Conversely the destructor will free the memory on +destruction if the data pointer is non-NULL. The destructor assumes the +memory is allocated via malloc, hence why you are required to call +malloc to allocate memory in order to use DbstlDbt. + +DbstlDbt simply inherits all methods from Dbt with no extra +new methods except the constructors/destructor and assignment operator, so it +is easy to use. + +In practice you rarely need to use DbstlDbt +or Dbt because dbstl enables you to store any complex +objects or primitive data. Only when you need to store raw bytes, +e.g. a bitmap, do you need to use DbstlDbt. + +Hence, DbstlDbt is the right class to +use to store any object into Berkeley DB via dbstl without memory leaks. + +Don't free the memory referenced by DbstlDbt objects, it will be freed when the +DbstlDbt object is destructed. + +Please refer to the two examples using DbstlDbt in +TestAssoc::test_arbitrary_object_storage and +TestAssoc::test_char_star_string_storage member functions, +which illustrate how to correctly use DbstlDbt in order to store raw bytes. + +This class handles the task of allocating and de-allocating memory internally. +Although it can be used to store data which cannot be handled by the +DbstlElemTraits +class, in practice, it is usually more convenient to register callbacks in the +DbstlElemTraits class for the type you are storing/retrieving using dbstl. +*/ +class DbstlDbt : public Dbt +{ + inline void deep_copy(const DbstlDbt &d) + { + u_int32_t len; + if (d.get_data() != NULL && d.get_size() > 0) { + if (d.get_flags() & DB_DBT_USERMEM) + len = d.get_ulen(); + else + len = d.get_size(); + + set_data(DbstlMalloc(len)); + memcpy(get_data(), d.get_data(), len); + } + } + +public: + /// Construct an object with an existing chunk of memory of size1 + /// bytes, refered by data1, + DbstlDbt(void *data1, u_int32_t size1) : Dbt(data1, size1){} + DbstlDbt() : Dbt(){} + /// The memory will be free'ed by the destructor. + ~DbstlDbt() + { + free_mem(); + } + + /// This copy constructor does a deep copy. + DbstlDbt(const DbstlDbt &d) : Dbt(d) + { + deep_copy(d); + } + + /// The memory will be reallocated if neccessary. + inline const DbstlDbt &operator = (const DbstlDbt &d) + { + ASSIGNMENT_PREDCOND(d) + + if (d.get_data() != NULL && d.get_size() > 0) { + free_mem(); + memcpy(this, &d, sizeof(d)); + } + + deep_copy(d); + return d; + } + +protected: + /// Users don't need to call this function. + inline void free_mem() + { + if (get_data()) { + free(get_data()); + memset(this, 0, sizeof(*this)); + } + } + +}; +//@} // dbstl_help_classes +END_NS + +#endif // ! _DB_STL_UTILITY_H__ + + diff --git a/lang/cxx/stl/dbstl_vector.h b/lang/cxx/stl/dbstl_vector.h new file mode 100644 index 00000000..6e28ee10 --- /dev/null +++ b/lang/cxx/stl/dbstl_vector.h @@ -0,0 +1,3332 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_STL_DB_VECTOR_H +#define _DB_STL_DB_VECTOR_H + +#include "dbstl_common.h" +#include "dbstl_dbc.h" +#include "dbstl_element_ref.h" +#include "dbstl_resource_manager.h" +#include "dbstl_container.h" +#include "dbstl_base_iterator.h" +#include +#include + +START_NS(dbstl) + +using std::list; +using std::istream; +using std::ostream; +using std::sort; + +#define TRandDbCursor RandDbCursor + +// Forward Declarations +// The default parameter is needed for the following code to work. +template > +class db_vector; +template > +class db_vector_iterator; +template class DbCursor; +template class RandDbCursor; +template class ElementRef; +template +class DbstlListSpecialOps; + +/// \ingroup dbstl_iterators +/// @{ +/// \defgroup db_vector_iterators Iterator classes for db_vector. +/// db_vector has two iterator classes --- db_vector_base_iterator and +/// db_vector_iterator. The differences +/// between the two classes are that the db_vector_base_iterator +/// can only be used to read its referenced value, so it is intended as +/// db_vector's const iterator; While the other class allows both read and +/// write access. If your access pattern is readonly, it is strongly +/// recommended that you use the const iterator because it is faster +/// and more efficient. +/// The two classes have identical behaviors to std::vector::const_iterator and +/// std::vector::iterator respectively. Note that the common public member +/// function behaviors are described in the db_base_iterator section. +/// \sa db_base_iterator +//@{ +/////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////// +// +// db_vector_base_iterator class template definition +// +// db_vector_base_iterator is the const_iterator class for db_vector, and +// base class of db_vector_iterator -- iterator class for db_vector. Each +// db_vector_base_iterator object owns one RandDbCursor<> cursor (1:1 map). +// On copy construction, the cursor is not duplicated, it is only created +// when it is really used (ie: lazily). +// The value referred by this iterator is read only, can't be used to mutate +// its referenced data element. +// +/// This class is the const iterator class for db_vector, and it is +/// inheirted by the db_vector_iterator class, which is the iterator +/// class for db_vector. +template +class _exported db_vector_base_iterator : public db_base_iterator +{ +protected: + // typedef's can't be put after where it is used. + typedef db_vector_base_iterator self; + typedef db_recno_t index_type; + using db_base_iterator::replace_current_key; +public: + //////////////////////////////////////////////////////////////////// + // + // Begin public type definitions. + // + typedef T value_type; + typedef ptrdiff_t difference_type; + typedef difference_type distance_type; + + /// This is the return type for operator[]. + /// \sa value_type_wrap operator[](difference_type _Off) const + typedef value_type value_type_wrap; + typedef value_type& reference; + typedef value_type* pointer; + // Use the STL tag, to ensure compatability with interal STL functions. + // + typedef std::random_access_iterator_tag iterator_category; + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // Begin public constructors and destructor. + /// \name Constructors and destroctor + /// Do not construct iterators explictily using these constructors, + /// but call db_vector::begin() const to get an valid iterator. + /// \sa db_vector::begin() const + //@{ + db_vector_base_iterator(const db_vector_base_iterator& vi) + : base(vi), pcsr_(vi.pcsr_), curpair_base_(vi.curpair_base_) + { + + } + + explicit db_vector_base_iterator(db_container*powner, + u_int32_t b_bulk_retrieval = 0, bool rmw = false, + bool directdbget = true, bool readonly = false) + : base(powner, directdbget, readonly, b_bulk_retrieval, rmw), + pcsr_(new TRandDbCursor(b_bulk_retrieval, rmw, directdbget)) + { + + } + + db_vector_base_iterator() : pcsr_(NULL) + { + + } + + virtual ~db_vector_base_iterator() + { + this->dead_ = true; + if (pcsr_) + pcsr_->close(); + } + //@} + + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // + // Begin functions that compare iterator positions. + // + // Use itr_status_ to do comparison if it is non-zero because end + // iterator does not have an underlying key/data pair, and call + // underlying cursor comparison otherwise. + /// \name Iterator comparison operators + /// The way to compare two iterators is to compare the index values + /// of the two elements they point to. The iterator sitting on an + /// element with less index is regarded to be smaller. And the invalid + /// iterator sitting after last element is greater than any other + /// iterators, because it is assumed to have an index equal to last + /// element's index plus one; The invalid iterator sitting before first + /// element is less than any other iterators because it is assumed to + /// have an index -1. + //@{ + /// \brief Equality comparison operator. + /// + /// Invalid iterators are equal; Valid iterators + /// sitting on the same key/data pair equal; Otherwise not equal. + /// \param itr The iterator to compare against. + /// \return True if this iterator equals to itr; False otherwise. + inline bool operator==(const self&itr) const + { + COMPARE_CHECK(itr) + if ((itr.itr_status_ == this->itr_status_) && + (this->itr_status_ == INVALID_ITERATOR_POSITION)) + return true; + + if (this->itr_status_ != INVALID_ITERATOR_POSITION && + itr.itr_status_ != INVALID_ITERATOR_POSITION) { + return (pcsr_->compare((itr.pcsr_.base_ptr())) == 0); + } + return false; + } + + /// \brief Unequal compare, identical to !operator(==itr) + /// \param itr The iterator to compare against. + /// \return False if this iterator equals to itr; True otherwise. + inline bool operator!=(const self&itr) const + { + return !(*this == itr) ; + } + + // end() iterator is largest. If both are end() iterator return false. + /// \brief Less than comparison operator. + /// \param itr The iterator to compare against. + /// \return True if this iterator is less than itr. + inline bool operator < (const self& itr) const + { + bool ret; + + if (this == &itr) + return false; + + char after_last = base::IPT_AFTER_LAST, + bef_fst = base::IPT_BEFORE_FIRST; + + if (this->itr_status_ == INVALID_ITERATOR_POSITION && + this->inval_pos_type_ == after_last) + ret = false; + else if (this->itr_status_ == INVALID_ITERATOR_POSITION && + this->inval_pos_type_ == bef_fst) + ret = itr.inval_pos_type_ != bef_fst; + else { // This iterator is on an ordinary position. + if (itr.itr_status_ == INVALID_ITERATOR_POSITION && + itr.inval_pos_type_ == after_last) + ret = true; + else if (itr.itr_status_ == INVALID_ITERATOR_POSITION && + itr.inval_pos_type_ == bef_fst) + ret = false; + else { // Both iterators are on an ordinary position. + // By this stage all valid cases using + // INVALID_ITERATOR_POSITION have been dealt + // with. + assert((this->itr_status_ != + INVALID_ITERATOR_POSITION) && + (itr.itr_status_ != + INVALID_ITERATOR_POSITION)); + ret = pcsr_->compare( + (itr.pcsr_.base_ptr())) < 0; + } + } + return ret; + } + + /// \brief Less equal comparison operator. + /// \param itr The iterator to compare against. + /// \return True if this iterator is less than or equal to itr. + inline bool operator <= (const self& itr) const + { + return !(this->operator>(itr)); + } + + /// \brief Greater equal comparison operator. + /// \param itr The iterator to compare against. + /// \return True if this iterator is greater than or equal to itr. + inline bool operator >= (const self& itr) const + { + return !(this->operator<(itr)); + } + + // end() iterator is largest. If both are end() iterator return false. + /// \brief Greater comparison operator. + /// \param itr The iterator to compare against. + /// \return True if this iterator is greater than itr. + inline bool operator > (const self& itr) const + { + bool ret; + + if (this == &itr) + return false; + + char after_last = base::IPT_AFTER_LAST, + bef_fst = base::IPT_BEFORE_FIRST; + + if (this->itr_status_ == INVALID_ITERATOR_POSITION && + this->inval_pos_type_ == after_last) + ret = itr.inval_pos_type_ != after_last; + else if (this->itr_status_ == INVALID_ITERATOR_POSITION && + this->inval_pos_type_ == bef_fst) + ret = false; + else { // This iterator is on an ordinary position. + if (itr.itr_status_ == INVALID_ITERATOR_POSITION && + itr.inval_pos_type_ == after_last) + ret = false; + else if (itr.itr_status_ == INVALID_ITERATOR_POSITION && + itr.inval_pos_type_ == bef_fst) + ret = true; + else { // Both iterators are on an ordinary position. + // By this stage all valid cases using + // INVALID_ITERATOR_POSITION have been dealt + // with. + assert((this->itr_status_ != + INVALID_ITERATOR_POSITION) && + (itr.itr_status_ != + INVALID_ITERATOR_POSITION)); + ret = pcsr_->compare( + (itr.pcsr_.base_ptr())) > 0; + } + } + return ret; + } + //@} // vctitr_cmp + //////////////////////////////////////////////////////////////////// + + + //////////////////////////////////////////////////////////////////// + // + // Begin functions that shift the iterator position. + // + /// \name Iterator movement operators. + /// When we talk about iterator movement, we think the + /// container is a uni-directional range, represented by [begin, end), + /// and this is true no matter we are using iterators or reverse + /// iterators. When an iterator is moved closer to "begin", we say it + /// is moved forward, otherwise we say it is moved backward. + //@{ + /// \brief Pre-increment. + /// + /// Move the iterator one element backward, so that + /// the element it sits on has a bigger index. + /// Use ++iter rather than iter++ where possible to avoid two useless + /// iterator copy constructions. + /// \return This iterator after incremented. + inline self& operator++() + { + move_by(*this, 1, false); + return *this; + } + + /// \brief Post-increment. + /// Move the iterator one element backward, so that + /// the element it sits on has a bigger index. + /// Use ++iter rather than iter++ where possible to avoid two useless + /// iterator copy constructions. + /// \return A new iterator not incremented. + inline self operator++(int) + { + self itr(*this); + move_by(*this, 1, false); + + return itr; + } + + /// \brief Pre-decrement. + /// Move the iterator one element backward, so + /// that the element it sits on has a smaller index. + /// Use --iter rather than iter-- where possible to avoid two useless + /// iterator copy constructions. + /// \return This iterator after decremented. + inline self& operator--() + { + move_by(*this, 1, true); + return *this; + } + + /// \brief Post-decrement. + /// + /// Move the iterator one element backward, so + /// that the element it sits on has a smaller index. + /// Use --iter rather than iter-- where possible to avoid two useless + /// iterator copy constructions. + /// \return A new iterator not decremented. + inline self operator--(int) + { + self itr = *this; + move_by(*this, 1, true); + return itr; + } + + /// \brief Assignment operator. + /// + /// This iterator will point to the same key/data + /// pair as itr, and have the same configurations as itr. + /// \sa db_base_iterator::operator= + /// \param itr The right value of the assignment. + /// \return This iterator's reference. + inline const self& operator=(const self&itr) + { + ASSIGNMENT_PREDCOND(itr) + base::operator=(itr); + + if (pcsr_) + pcsr_->close(); + pcsr_ = itr.pcsr_; + curpair_base_ = itr.curpair_base_; + return itr; + } + + // Always move both iterator and cursor synchronously, keep iterators + // data synchronized. + /// Iterator movement operator. + /// Return another iterator by moving this iterator forward by n + /// elements. + /// \param n The amount and direction of movement. If negative, will + /// move forward by |n| element. + /// \return The new iterator at new position. + inline self operator+(difference_type n) const + { + self itr(*this); + move_by(itr, n, false); + return itr; + } + + /// \brief Move this iterator backward by n elements. + /// \param n The amount and direction of movement. If negative, will + /// move forward by |n| element. + /// \return Reference to this iterator at new position. + inline const self& operator+=(difference_type n) + { + move_by(*this, n, false); + return *this; + } + + /// \brief Iterator movement operator. + /// + /// Return another iterator by moving this iterator backward by n + /// elements. + /// \param n The amount and direction of movement. If negative, will + /// move backward by |n| element. + /// \return The new iterator at new position. + inline self operator-(difference_type n) const + { + self itr(*this); + move_by(itr, n); + + return itr; + } + + /// \brief Move this iterator forward by n elements. + /// \param n The amount and direction of movement. If negative, will + /// move backward by |n| element. + /// \return Reference to this iterator at new position. + inline const self& operator-=(difference_type n) + { + move_by(*this, n); + return *this; + } + //@} //itr_movement + + /// \brief Iterator distance operator. + /// + /// Return the index difference of this iterator and itr, so if this + /// iterator sits on an element with a smaller index, this call will + /// return a negative number. + /// \param itr The other iterator to substract. itr can be the invalid + /// iterator after last element or before first element, their index + /// will be regarded as last element's index + 1 and -1 respectively. + /// \return The index difference. + difference_type operator-(const self&itr) const + { + difference_type p1, p2; + + if (itr == end_itr_) { + if (itr.inval_pos_type_ == base::IPT_BEFORE_FIRST) + p2 = -1; + else if ( + this->inval_pos_type_ == base::IPT_AFTER_LAST) { + self pitr2(itr); + pitr2.open(); + pitr2.last(); + p2 = pitr2.get_current_index() + 1; + } else { + THROW0(InvalidIteratorException); + } + } else + p2 = itr.get_current_index(); + + if (*this == end_itr_) { + if (this->inval_pos_type_ == base::IPT_BEFORE_FIRST) + p1 = -1; + else if ( + this->inval_pos_type_ == base::IPT_AFTER_LAST) { + self pitr1(*this); + pitr1.open(); + pitr1.last(); + p1 = pitr1.get_current_index() + 1; + } else { + THROW0(InvalidIteratorException); + } + } else + p1 = this->get_current_index(); + + return (difference_type)(p1 - p2); + } + + //////////////////////////////////////////////////////////////////// + // + // Begin functions that retrieve values from the iterator. + // + // Each iterator has a dedicated cursor, and we keep the iterator + // and cursor synchronized all the time. The returned value is read + // only, can't be used to mutate its underlying data element. + // + /// \name Functions that retrieve values from the iterator. + //@{ + /// \brief Dereference operator. + /// + /// Return the reference to the cached data element, which is an + /// ElementRef object if T is a class type or an ElementHolder + /// object if T is a C++ primitive data type. + /// The returned value can only be used to read its referenced + /// element. + /// \return The reference to the element this iterator points to. + inline reference operator*() const + { + if (this->directdb_get_) + update_cur_pair(); + return curpair_base_; // Return reference, no copy construction. + } + + /// \brief Arrow operator. + /// + /// Return the pointer to the cached data element, which is an + /// ElementRef object if T is a class type or an ElementHolder + /// object if T is a C++ primitive data type. + /// The returned value can only be used to read its referenced + /// element. + /// \return The address of the referenced object. + inline pointer operator->() const + { + if (this->directdb_get_) + update_cur_pair(); + return &(curpair_base_); + } + + /// \brief Iterator index operator. + /// + /// If _Off not in a valid range, the returned value will be invalid. + /// Note that you should use a value_type_wrap type to hold the + /// returned value. + /// \param _Off The valid index relative to this iterator. + /// \return Return the element which is at position *this + _Off. + /// The returned value can only be used to read its referenced + /// element. + inline value_type_wrap operator[](difference_type _Off) const + { + self itr(*this + _Off); + return itr.curpair_base_; + } + //@} + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // + // Begin functions that are dbstl specific. + // + /// \name Functions that are dbstl specific. + //@{ + /// \brief Get current index of within the vector. + /// + /// Return the iterators current element's index (0 based). Requires + /// this iterator to be a valid iterator, not end_itr_. + /// \return current index of the iterator. + inline index_type get_current_index() const + { + return pcsr_->get_current_index() - 1; + } + + /// \brief Iterator movement function. + /// + /// Move this iterator to the index "n". If n is not in the valid + /// range, this iterator will be an invalid iterator equal to end() + /// iterator. + /// \param n target element's index. + /// \sa db_vector::end(); + inline void move_to(index_type n) const + { + T d; + int ret; + + this->itr_status_ = pcsr_->move_to(n + 1); + ret = pcsr_->get_current_data(d); + dbstl_assert(ret == 0); + if (this->itr_status_ == 0) + update_cur_pair(); + } + + /// \brief Refresh iterator cached value. + /// \param from_db If not doing direct database get and this parameter + /// is true, we will retrieve data directly from db. + /// \sa db_base_iterator::refresh(bool). + virtual int refresh(bool from_db = true) + { + + if (from_db && !this->directdb_get_) + this->pcsr_->update_current_key_data_from_db( + DbCursorBase::SKIP_NONE); + this->pcsr_->get_current_data(curpair_base_); + + return 0; + } + + /// \brief Close underlying Berkeley DB cursor of this iterator. + /// \sa db_base_iterator::close_cursor() const + inline void close_cursor() const + { + this->pcsr_->close(); + } + + /// \brief Modify bulk buffer size. + /// + /// Bulk read is enabled when creating an + /// iterator, so you later can only modify the bulk buffer size + /// to another value, but can't enable/disable bulk read while an + /// iterator is already alive. + /// \param sz The new size of the bulk read buffer of this iterator. + /// \return Returns true if succeeded, false otherwise. + /// \sa db_base_iterator::set_bulk_buffer(u_int32_t sz) + bool set_bulk_buffer(u_int32_t sz) + { + bool ret = this->pcsr_->set_bulk_buffer(sz); + if (ret) + this->bulk_retrieval_ = sz; + return ret; + } + + /// \brief Get bulk retrieval buffer size in bytes. + /// \return Return current bulk buffer size, or 0 if bulk retrieval is + /// not enabled. + /// \sa db_base_iterator::get_bulk_bufsize() + u_int32_t get_bulk_bufsize() + { + this->bulk_retrieval_ = pcsr_->get_bulk_bufsize(); + return this->bulk_retrieval_; + } + //@} + //////////////////////////////////////////////////////////////////// + +protected: + typedef T value_type_base; + typedef db_base_iterator base; + typedef index_type size_type; + typedef index_type key_type; + typedef T data_type; + + // end_itr_ is always the same, so share it across iterator instances. + static self end_itr_; + + ////////////////// iterator data members ////////////////////////// + // + mutable LazyDupCursor pcsr_; + + void open() const + { + u_int32_t oflags = 0, oflags2 = 0; + int ret; + DbEnv *penv = this->owner_->get_db_env_handle(); + + oflags2 = this->owner_->get_cursor_open_flags(); + if (!this->read_only_ && penv != NULL) { + BDBOP((penv->get_open_flags(&oflags)), ret); + // Open a writable cursor when in CDS mode and not + // requesting a read only iterator. + if ((oflags & DB_INIT_CDB) != 0) + oflags2 |= DB_WRITECURSOR; + } + + if (!this->pcsr_) { + + this->pcsr_.set_cursor(new TRandDbCursor( + this->bulk_retrieval_, + this->rmw_csr_, this->directdb_get_)); + } + this->itr_status_ = this->pcsr_->open(this->owner_, oflags2); + } + + // Move the underlying Dbc* cursor to the first element. + // + inline int first() const + { + int ret = 0; + + if ((ret = pcsr_->first()) == 0) + update_cur_pair(); + else + this->itr_status_ = ret; + + return ret; + } + + // Move the underlying Dbc* cursor to the last element. + // + inline index_type last() const + { + index_type pos_index, lsz; + + // last_index() will move the underlying cursor to last record. + lsz = pcsr_->last_index(); + if (lsz > 0) { + pos_index = lsz - 1; + this->itr_status_ = 0; + update_cur_pair(); + } else { + this->itr_status_ = INVALID_ITERATOR_POSITION; + pos_index = INVALID_INDEX; + } + + return pos_index; + } + + void set_curpair_base(const T& d) + { + curpair_base_ = d; + } + + // Update curpair_base_'s data using current underlying key/data pair's + // value. Called on every iterator movement. + // Even if this iterator is invalid, this call is allowed, the + // default value of type T is returned. + // + virtual void update_cur_pair() const + { + this->pcsr_->get_current_data(curpair_base_); + } + + // The 'back' parameter indicates whether to decrease or + // increase the index when moving. The default is to decrease. + // + // Do not throw exceptions here because it is normal to iterate to + // "end()". + // Always move both iterator and cursor synchronously, keep iterators + // data synchronized. + void move_by(const self&itr, difference_type n, bool back = true) const + { + if (n == 0) + return; + if (!back) + n = -n; + if (itr == end_itr_) { + if (n > 0) { // Go back from end() + itr.open(); + itr.last(); + // Moving from end to the last position is + // considered one place. + if (n > 1) { + itr.itr_status_ = + itr.pcsr_->advance(-n + 1); + if (itr.itr_status_ == + INVALID_ITERATOR_POSITION) + itr.inval_pos_type_ = + base::IPT_BEFORE_FIRST; + } + } else + // Can't go further forward from the end. + return; + + } else { + if (n == 1) + itr.itr_status_ = itr.pcsr_->prev(); + else if (n == -1) + itr.itr_status_ = itr.pcsr_->next(); + else + itr.itr_status_ = itr.pcsr_->advance(-n); + } + + itr.update_cur_pair(); + if (itr.itr_status_ != 0) { + if (n > 0) + itr.inval_pos_type_ = base::IPT_BEFORE_FIRST; + else + itr.inval_pos_type_ = base::IPT_AFTER_LAST; + } + } + +private: + // Current data element cached here. + mutable T curpair_base_; + + + friend class db_vector >; + friend class db_vector >; + friend class DbCursor; + friend class RandDbCursor; + friend class ElementRef; + friend class ElementHolder; +}; // db_vector_base_iterator<> + +template + db_vector_base_iterator db_vector_base_iterator::end_itr_; + +/////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////// +// +// db_vector_iterator class template definition +// This class is the iterator class for db_vector, its instances can +// be used to mutate their referenced data element. +// +template +class _exported db_vector_iterator : + public db_vector_base_iterator +{ +protected: + typedef db_vector_iterator self; + typedef db_recno_t index_type; + using db_base_iterator::replace_current_key; +public: + typedef T value_type; + typedef ptrdiff_t difference_type; + typedef difference_type distance_type; + typedef value_type_sub& reference; + + /// This is the return type for operator[]. + /// \sa value_type_wrap operator[](difference_type _Off) const + typedef value_type_sub value_type_wrap; + typedef value_type_sub* pointer; + // Use the STL tag, to ensure compatability with interal STL functions. + // + typedef std::random_access_iterator_tag iterator_category; + + //////////////////////////////////////////////////////////////////// + // Begin public constructors and destructor. + // + /// \name Constructors and destructor + /// Do not construct iterators explictily using these constructors, + /// but call db_vector::begin to get an valid iterator. + /// \sa db_vector::begin + //@{ + db_vector_iterator(const db_vector_iterator& vi) + : db_vector_base_iterator(vi), + curpair_(vi.curpair_) + { + curpair_._DB_STL_SetIterator(this); + } + + explicit db_vector_iterator(db_container*powner, + u_int32_t b_bulk_retrieval = 0, bool brmw = false, + bool directdbget = true, bool b_read_only = false) + : db_vector_base_iterator(powner, + b_bulk_retrieval, brmw, directdbget, b_read_only) + { + curpair_._DB_STL_SetIterator(this); + this->read_only_ = b_read_only; + this->rmw_csr_ = brmw; + } + + db_vector_iterator() : db_vector_base_iterator() + { + curpair_._DB_STL_SetIterator(this); + } + + db_vector_iterator(const db_vector_base_iterator&obj) + : db_vector_base_iterator(obj) + { + curpair_._DB_STL_CopyData(*obj); + } + + virtual ~db_vector_iterator() + { + this->dead_ = true; + } + //@} + + //////////////////////////////////////////////////////////////////// + + + //////////////////////////////////////////////////////////////////// + // + // Begin functions that shift the iterator position. + // + // These functions are identical to those defined in + // db_vector_base_iterator, but we have to redefine them here because + // the "self" have different definitions. + // + // Do not throw exceptions here because it is normal to iterate to + // "end()". + // Always move both iterator and cursor synchronously, keep iterators + // data synchronized. + // + /// \name Iterator movement operators. + /// These functions have identical behaviors and semantics as those of + /// db_vector_base_iterator, so please refer to equivalent in that + /// class. + //@{ + /// \brief Pre-increment. + /// \return This iterator after incremented. + /// \sa db_vector_base_iterator::operator++() + inline self& operator++() + { + move_by(*this, 1, false); + return *this; + } + + /// \brief Post-increment. + /// \return A new iterator not incremented. + /// \sa db_vector_base_iterator::operator++(int) + inline self operator++(int) + { + self itr(*this); + move_by(*this, 1, false); + + return itr; + } + + /// \brief Pre-decrement. + /// \return This iterator after decremented. + /// \sa db_vector_base_iterator::operator--() + inline self& operator--() + { + move_by(*this, 1, true); + return *this; + } + + /// \brief Post-decrement. + /// \return A new iterator not decremented. + /// \sa db_vector_base_iterator::operator--(int) + inline self operator--(int) + { + self itr = *this; + move_by(*this, 1, true); + return itr; + } + + /// \brief Assignment operator. + /// + /// This iterator will point to the same key/data + /// pair as itr, and have the same configurations as itr. + /// \param itr The right value of the assignment. + /// \return This iterator's reference. + /// \sa db_base_iterator::operator=(const self&) + inline const self& operator=(const self&itr) + { + ASSIGNMENT_PREDCOND(itr) + base::operator=(itr); + + curpair_._DB_STL_CopyData(itr.curpair_); + return itr; + } + + // Always move both iterator and cursor synchronously, keep iterators + // data synchronized. + /// \brief Iterator movement operator. + /// + /// Return another iterator by moving this iterator backward by n + /// elements. + /// \param n The amount and direction of movement. If negative, will + /// move forward by |n| element. + /// \return The new iterator at new position. + /// \sa db_vector_base_iterator::operator+(difference_type n) const + inline self operator+(difference_type n) const + { + self itr(*this); + move_by(itr, n, false); + return itr; + } + + /// \brief Move this iterator backward by n elements. + /// \param n The amount and direction of movement. If negative, will + /// move forward by |n| element. + /// \return Reference to this iterator at new position. + /// \sa db_vector_base_iterator::operator+=(difference_type n) + inline const self& operator+=(difference_type n) + { + move_by(*this, n, false); + return *this; + } + + /// \brief Iterator movement operator. + /// + /// Return another iterator by moving this iterator forward by n + /// elements. + /// \param n The amount and direction of movement. If negative, will + /// move backward by |n| element. + /// \return The new iterator at new position. + /// \sa db_vector_base_iterator::operator-(difference_type n) const + inline self operator-(difference_type n) const + { + self itr(*this); + move_by(itr, n); + + return itr; + } + + /// \brief Move this iterator forward by n elements. + /// \param n The amount and direction of movement. If negative, will + /// move backward by |n| element. + /// \return Reference to this iterator at new position. + /// \sa db_vector_base_iterator::operator-=(difference_type n) + inline const self& operator-=(difference_type n) + { + move_by(*this, n); + return *this; + } + //@} // itr_movement + + /// \brief Iterator distance operator. + /// + /// Return the index difference of this iterator and itr, so if this + /// iterator sits on an element with a smaller index, this call will + /// return a negative number. + /// \param itr The other iterator to substract. itr can be the invalid + /// iterator after last element or before first element, their index + /// will be regarded as last element's index + 1 and -1 respectively. + /// \return The index difference. + /// \sa db_vector_base_iterator::operator-(const self& itr) const + difference_type operator-(const self&itr) const + { + return base::operator-(itr); + } + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // + // Begin functions that retrieve values from the iterator. + // + // Each iterator has a dedicated cursor, and we keep the iterator + // and cursor synchronized all the time. The return value can be used + // to mutate its referenced data element. If directdb_get_ is true(the + // default value), users must call element._DB_STL_SaveElement() to + // store the changes they made to the data element before a next call + // of this function, otherwise the change is lost. + // + /// \name Functions that retrieve values from the iterator. + //@{ + /// \brief Dereference operator. + /// + /// Return the reference to the cached data element, which is an + /// ElementRef object if T is a class type or an ElementHolder + /// object if T is a C++ primitive data type. + /// The returned value can be used to read or update its referenced + /// element. + /// \return The reference to the element this iterator points to. + inline reference operator*() const + { + if (this->directdb_get_) + update_cur_pair(); + return curpair_; // Return reference, no copy construction. + } + + /// \brief Arrow operator. + /// + /// Return the pointer to the cached data element, which is an + /// ElementRef object if T is a class type or an ElementHolder + /// object if T is a C++ primitive data type. + /// The returned value can be used to read or update its referenced + /// element. + /// \return The address of the referenced object. + inline pointer operator->() const + { + if (this->directdb_get_) + update_cur_pair(); + return &curpair_; + } + + // We can't return reference here otherwise we are returning an + // reference to an local object. + /// \brief Iterator index operator. + /// + /// If _Off not in a valid range, the returned value will be invalid. + /// Note that you should use a value_type_wrap type to hold the + /// returned value. + /// \param _Off The valid index relative to this iterator. + /// \return Return the element which is at position *this + _Off, + /// which is an ElementRef object if T is a class type or + /// an ElementHolder object if T is a C++ primitive data type. + /// The returned value can be used to read or update its referenced + /// element. + inline value_type_wrap operator[](difference_type _Off) const + { + self *itr = new self(*this + _Off); + + value_type_sub ref(itr->curpair_); + ref._DB_STL_SetDelItr(); + return ref; + } + //@} // funcs_val + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // + // Begin dbstl specific functions. + // + /// \brief Refresh iterator cached value. + /// \param from_db If not doing direct database get and this parameter + /// is true, we will retrieve data directly from db. + /// \sa db_base_iterator::refresh(bool) + virtual int refresh(bool from_db = true) + { + T d; + + if (from_db && !this->directdb_get_) + this->pcsr_->update_current_key_data_from_db( + DbCursorBase::SKIP_NONE); + this->pcsr_->get_current_data(d); + curpair_._DB_STL_SetData(d); + this->set_curpair_base(d); + return 0; + } + //////////////////////////////////////////////////////////////////// + +protected: + typedef T value_type_base; + typedef db_vector_base_iterator base; + typedef index_type size_type; + typedef index_type key_type; + typedef T data_type; + typedef db_vector OwnerType; + + friend class db_vector; + friend class DbCursor; + friend class RandDbCursor; + friend class ElementRef; + friend class ElementHolder; + + // The data item this iterator currently points to. It is updated + // on every iterator movement. By default directdb_get_ member is true + // (can be set to false via container's begin() method), so whenever + // operator */-> is called, it is retrieved from db, to support + // concurrency. + mutable value_type_sub curpair_; + + virtual void delete_me() const + { + if (!this->dead_) + delete this; + } + + virtual self* dup_itr() const + { + self *itr = new self(*this); + // The curpair_ of itr does not delete itr, the independent + // one does. + // itr->curpair_._DB_STL_SetDelItr(); + return itr; + } + + // Replace the current key/data pair's data with d. Can only be called + // by non-const iterator. Normally internal functions do not wrap + // transactions, but replace_current is used in assignment by the user + // so it needs to be wrapped. + virtual int replace_current(const T& d) + { + int ret = 0; + + if (this->read_only_) { + THROW(InvalidFunctionCall, ( +"replace_current can't be called via a readonly iterator.")); + } + ret = this->pcsr_->replace(d); + + return ret; + } + + // This function is part of the db_base_iterator interface, but + // is not valid for db_vector_iterator. + virtual int replace_current_key(const T&) { + THROW(InvalidFunctionCall, ( +"replace_current_key not supported by db_vector_iterator<>")); + } + + // Update curpair_'s data using current underlying key/data pair's + // value. Called on every iterator movement. + // Even if this iterator is invalid, this call is allowed, the + // default value of type T is returned. + // + virtual void update_cur_pair() const + { + T t; + + this->pcsr_->get_current_data(t); + curpair_._DB_STL_CopyData(t); + base::update_cur_pair(); + } + + +}; // db_vector_iterator +//@} // db_vector_iterators +//@} // dbstl_iterators + +// These operators make "n + itr" expressions valid. Without it, you can only +// use "itr + n" +template +db_vector_base_iterator operator+(typename db_vector_base_iterator:: + difference_type n, db_vector_base_iterator itr) +{ + db_vector_base_iterator i2 = itr; + + i2 += n; + return i2; +} + +template +db_vector_iterator operator+( + typename db_vector_iterator:: + difference_type n, db_vector_iterator itr) +{ + db_vector_iterator i2 = itr; + + i2 += n; + return i2; +} + +template +db_reverse_iterator operator+(typename + db_reverse_iterator::difference_type n, + db_reverse_iterator itr) +{ + db_reverse_iterator i2 = itr; + // The db_reverse_iterator::operator+ will substract + // n in it, we pass the + here. + // + i2 += n; + return i2; +} + +/// \ingroup dbstl_containers +//@{ +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// +// +// db_vector class template definition +// +/// The db_vector class has the union set of public member functions as +/// std::vector, std::deque and std::list, and each method has identical +/// default semantics to that in the std equivalent containers. +/// The difference is that the data is maintained using a Berkeley DB database +/// as well as some Berkeley DB related extensions. +/// \param T The type of data to store. +/// \param value_type_sub If T is a class/struct type, do not specify anything +/// for this parameter; Otherwise, specify ElementHolder to it. +/// Database(dbp) and environment(penv) handle requirement(applies for all +/// constructors of this class template): +/// dbp must meet the following requirement: +/// 1. dbp must be a DB_RECNO type of database handle. +/// 2. DB_THREAD must be set to dbp's open flags. +/// 3. An optional flag DB_RENUMBER is required if the container object +/// is supposed to be a std::vector or std::deque equivalent; Not +/// required if it is a std::list equivalent. But dbstl will not check +/// whether DB_RENUMBER is set to this database handle. +/// Setting DB_RENUMBER will cause the index values of all elements in +/// the underlying databse to be maintained consecutive and in order, +/// which involves potentially a lot of work because many indices may +/// be updated. +/// See the db_container(Db*, DbEnv*) for more information about the two +/// parameters. +/// \sa db_container db_container(Db*, DbEnv*) db_container(const db_container&) +// +template +class _exported db_vector: public db_container +{ +private: + typedef db_vector self; + typedef db_recno_t index_type; +public: + typedef T value_type; + typedef value_type_sub value_type_wrap; + typedef value_type_sub* pointer; + typedef db_vector_iterator iterator; + typedef db_vector_base_iterator const_iterator; + typedef index_type size_type; + typedef db_reverse_iterator reverse_iterator; + typedef const value_type_sub* const_pointer; + typedef db_reverse_iterator + const_reverse_iterator; + + typedef typename value_type_sub::content_type const_value_type; + + // We don't use value_type_sub& as a reference type here because + // we created a local iterator object in operator[], and we must keep + // an active cursor on the key/data pair. Thus operator[] needs to + // return an object rather than a reference to a local object. + typedef value_type_sub reference; + + // We can't return reference here because we are using a local iterator. + typedef const_value_type const_reference; + typedef ptrdiff_t difference_type; + + ///////////////////////////////////////////////////////////////////// + // Begin functions that create iterators. + /// \name Iterator functions. + //@{ + /// \brief Create a read-write or read-only iterator. + /// + /// We allow users to create a readonly + /// iterator here so that they don't have to use a const container + /// to create a const_iterator. But using const_iterator + /// is faster. The flags set via db_container::set_cursor_oflags() is + /// used as the cursor open flags. + /// \param rmw Whether this iterator will open a Berkeley DB + /// cursor with DB_RMW flag set. If the iterator is used to read a + /// key/data pair, then update it and store back to db, it is good + /// to set the DB_RMW flag, by specifying RMWItrOpt::read_modify_write() + /// If you don't want to set the DB_RMW flag, specify + /// RMWItrOpt::no_read_modify_write(), which is the default behavior. + /// \param readonly Whether the iterator is created as a readonly + /// iterator. Read only iterators can not update its underlying + /// key/data pair. + /// \param bulk_read Whether read database key/data pairs in bulk, by + /// specifying DB_MULTIPLE_KEY flag to underlying cursor's Dbc::get + /// function. Only readonly iterators can do bulk retrieval, if + /// iterator is not read only, this parameter is ignored. Bulk + /// retrieval can accelerate reading speed because each database read + /// operation will read many key/data pairs, thus saved many database + /// read operations. The default bulk buffer size is 32KB, you can + /// set your desired bulk buffer size by specifying + /// BulkRetrievalOpt::bulk_retrieval(your_bulk_buffer_size); + /// If you don't want bulk retrieval, set + /// BulkRetrievalItrOpt::no_bulk_retrieval() as the real parameter. + /// \param directdb_get Whether always read key/data pair from backing + /// db rather than using the value cached in the iterator. The current + /// key/data pair is cached in the iterator and always kept updated on + /// iterator movement, but in some extreme conditions, errors can + /// happen if you use cached key/data pairs without always refreshing + /// them from database. By default we are always reading from database + /// when we are accessing the data the iterator sits on, except when + /// we are doing bulk retrievals. But your application can gain extra + /// performance promotion if you can set this flag to false. + /// \return The created iterator. + /// \sa db_container::set_cursor_oflags(); + iterator begin(ReadModifyWriteOption rmw = + ReadModifyWriteOption::no_read_modify_write(), + bool readonly = false, BulkRetrievalOption bulk_read = + BulkRetrievalOption::no_bulk_retrieval(), + bool directdb_get = true) + { + bool b_rmw; + u_int32_t bulk_retrieval; + + bulk_retrieval = 0; + b_rmw = (rmw == ReadModifyWriteOption::read_modify_write()); + if (readonly && b_rmw) + b_rmw = false; + // Bulk only available to readonly iterators. + if (readonly && bulk_read == + BulkRetrievalOption::BulkRetrieval) + bulk_retrieval = bulk_read.bulk_buf_size(); + + iterator itr((db_container*)this, bulk_retrieval, b_rmw, + directdb_get, readonly); + + open_itr(itr, readonly); + itr.first(); + + return itr; + } + + /// \brief Create a const iterator. + /// + /// The created iterator can only be used to read its referenced + /// data element. Can only be called when using a const reference to + /// the contaienr object. The parameters have identical meanings and + /// usage to those of the other non-const begin function. + /// \param bulkretrieval Same as that of + /// begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \param directdb_get Same as that of + /// begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \return The created const iterator. + /// \sa begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + const_iterator begin(BulkRetrievalOption bulkretrieval = + (BulkRetrievalOption::no_bulk_retrieval()), + bool directdb_get = true) const + { + u_int32_t b_bulk_retrieval = (bulkretrieval == + BulkRetrievalOption::BulkRetrieval) ? + bulkretrieval.bulk_buf_size() : 0; + + const_iterator itr((db_container*)this, b_bulk_retrieval, + false, directdb_get, true); + + open_itr(itr, true); + itr.first(); + + return itr; + } + + /// \brief Create an open boundary iterator. + /// \return Returns an invalid iterator denoting the position after + /// the last valid element of the container. + inline iterator end() + { + end_itr_.owner_ = (db_container*)this; + end_itr_.inval_pos_type_ = db_base_iterator::IPT_AFTER_LAST; + return end_itr_; + } + + /// \brief Create an open boundary iterator. + /// \return Returns an invalid const iterator denoting the position + /// after the last valid element of the container. + inline const_iterator end() const + { + end_itr_.owner_ = (db_container*)this; + end_itr_.inval_pos_type_ = db_base_iterator::IPT_AFTER_LAST; + return end_itr_; + } + + /// \brief Create a reverse iterator. + /// + /// This function creates a reverse iterator initialized to sit on the + /// last element in the underlying database, and can be used to + /// read/write. The meaning and usage of + /// its parameters are identical to the above begin function. + /// \param rmw Same as that of + /// begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \param bulk_read Same as that of + /// begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \param directdb_get Same as that of + /// begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \param readonly Same as that of + /// begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \return The created iterator. + /// \sa begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + reverse_iterator rbegin( + ReadModifyWriteOption rmw = + ReadModifyWriteOption::no_read_modify_write(), + bool readonly = false, BulkRetrievalOption bulk_read = + BulkRetrievalOption::no_bulk_retrieval(), + bool directdb_get = true) + { + u_int32_t bulk_retrieval = 0; + + reverse_iterator itr(end()); + itr.rmw_csr_ = + (rmw == (ReadModifyWriteOption::read_modify_write())); + itr.directdb_get_ = directdb_get; + itr.read_only_ = readonly; + itr.owner_ = (db_container*)this; + + // Bulk only available to readonly iterators. + if (readonly && + bulk_read == BulkRetrievalOption::BulkRetrieval) + bulk_retrieval = bulk_read.bulk_buf_size(); + itr.bulk_retrieval_ = bulk_retrieval; + + return itr; + } + + /// \brief Create a const reverse iterator. + /// + /// This function creates a const reverse iterator initialized to sit + /// on the last element in the backing database, and can only read the + /// element, it is only available to const db_vector containers. + /// The meaning and usage of its parameters are identical as above. + /// \param bulkretrieval Same as that of + /// begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \param directdb_get Same as that of + /// begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + /// \return The created iterator. + /// \sa begin(ReadModifyWrite, bool, BulkRetrievalOption, bool); + const_reverse_iterator rbegin(BulkRetrievalOption bulkretrieval = + BulkRetrievalOption(BulkRetrievalOption::no_bulk_retrieval()), + bool directdb_get = true) const + { + const_reverse_iterator itr(end()); + itr.bulk_retrieval_ = + (bulkretrieval == (BulkRetrievalOption::BulkRetrieval) ? + bulkretrieval.bulk_buf_size() : 0); + + itr.directdb_get_ = directdb_get; + itr.read_only_ = true; + itr.owner_ = (db_container*)this; + + return itr; + } + + /// \brief Create an open boundary iterator. + /// \return Returns an invalid iterator denoting the position + /// before the first valid element of the container. + inline reverse_iterator rend() + { + reverse_iterator itr; // No cursor created. + + itr.itr_status_ = INVALID_ITERATOR_POSITION; + itr.owner_ = (db_container*)this; + itr.inval_pos_type_ = db_base_iterator::IPT_BEFORE_FIRST; + return itr; + } + + /// \brief Create an open boundary iterator. + /// \return Returns an invalid const iterator denoting the position + /// before the first valid element of the container. + inline const_reverse_iterator rend() const + { + const_reverse_iterator itr; // No cursor created. + + itr.itr_status_ = INVALID_ITERATOR_POSITION; + itr.owner_ = (db_container*)this; + itr.inval_pos_type_ = db_base_iterator::IPT_BEFORE_FIRST; + return itr; + } + //@} // iterator_funcs + ///////////////////////////////////////////////////////////////////// + + /// \brief Get container size. + /// This function supports auto-commit. + // All container methods using internal working iterators can be used + // to implement autocommit if no DB operations can be directly used, + // because the work iterator is not supposed to point to a specific + // record before reopening it. + /// \return Return the number of elements in this container. + /// \sa http://www.cplusplus.com/reference/stl/vector/size.html + size_type size() const + { + index_type sz; + + try { + this->begin_txn(); + const_iterator derefitr; + init_itr(derefitr); + open_itr(derefitr, true); + sz = derefitr.last() + 1; + this->commit_txn(); + return (size_type)sz; // Largest index is the size. + } catch (...) { + this->abort_txn(); + throw; + } + } + + /// \name Huge return + /// These two functions return 2^30, denoting a huge number that does + /// not overflow, because dbstl does not have to manage memory space. + /// But the return value is not the real limit, see the Berkeley DB + /// database limits for the limits. + //@{ + /// Get max size. + /// The returned size is not the actual limit of database. See the + /// Berkeley DB limits to get real max size. + /// \return A meaningless huge number. + inline size_type max_size() const + { + return SIZE_T_MAX; + } + + /// Get capacity. + inline size_type capacity() const + { + return SIZE_T_MAX; + } + //@} + + /// Returns whether this container is empty. + /// \return True if empty, false otherwise. + inline bool empty() const + { + const_iterator itr = begin(); + return itr.itr_status_ == INVALID_ITERATOR_POSITION; + } + + ///////////////////////////////////////////////////////////////////// + // Begin element access functions. + // + /// \name Element access functions. + /// The operator[] and at() only come from std::vector and std::deque, + /// If you are using db_vector as std::list, you don't have + /// to set DB_RENUMBER flag to the backing database handle, and you get + /// better performance, but at the same time you can't use these + /// functions. Otherwise if you have set the DB_RENUMBER flag to the + /// backing database handle, you can use this function though it is an + /// std::list equivalent. + //@{ + /// Index operator, can act as both a left value and a right value. + /// \param n The valid index of the vector. + /// \return The reference to the element at specified position. + inline reference operator[](index_type n) + { + iterator derefitr, *pitr; + + init_itr(derefitr); + open_itr(derefitr); + if (n == INVALID_INDEX) + n = derefitr.last(); + derefitr.move_to(n); + + pitr = new iterator(derefitr); + reference ref(pitr->curpair_); + ref._DB_STL_SetDelItr(); + return ref; + } + + /// \brief Read only index operator. + /// + /// Only used as a right value, no need for assignment capability. + /// The return value can't be used to update the element. + /// \param n The valid index of the vector. + /// \return The const reference to the element at specified position. + inline const_reference operator[](index_type n) const + { + const_iterator derefitr; + + init_itr(derefitr); + open_itr(derefitr); + if (n == INVALID_INDEX) + n = derefitr.last(); + derefitr.move_to(n); + + // _DB_STL_value returns a reference + return (*derefitr); + } + + /// \brief Index function. + /// \param n The valid index of the vector. + /// \return The reference to the element at specified position, can + /// act as both a left value and a right value. + /// \sa http://www.cplusplus.com/reference/stl/vector/at.html + inline reference at(index_type n) + { + return (*this)[n]; + } + + /// \brief Read only index function. + /// + /// Only used as a right value, no need for assignment capability. + /// The return value can't be used to update the element. + /// \param n The valid index of the vector. + /// \return The const reference to the element at specified position. + /// \sa http://www.cplusplus.com/reference/stl/vector/at.html + inline const_reference at(index_type n) const + { + return (*this)[n]; + } + + /// Return a reference to the first element. + /// \return Return a reference to the first element. + /// \sa http://www.cplusplus.com/reference/stl/vector/front.html + inline reference front() + { + iterator itr, *pitr; + + init_itr(itr); + open_itr(itr); + itr.first(); + + pitr = new iterator(itr); + reference ref(pitr->curpair_); + ref._DB_STL_SetDelItr(); + return ref; + } + + /// \brief Return a const reference to the first element. + /// + /// The return value can't be used to update the element. + /// \return Return a const reference to the first element. + /// \sa http://www.cplusplus.com/reference/stl/vector/front.html + inline const_reference front() const + { + const_iterator itr; + + init_itr(itr); + open_itr(itr); + itr.first(); + return (*itr); + } + + /// Return a reference to the last element. + /// \return Return a reference to the last element. + /// \sa http://www.cplusplus.com/reference/stl/vector/back.html + inline reference back() + { + iterator itr, *pitr; + + init_itr(itr); + open_itr(itr); + itr.last(); + + pitr = new iterator(itr); + reference ref(pitr->curpair_); + ref._DB_STL_SetDelItr(); + return ref; + } + + /// \brief Return a reference to the last element. + /// + /// The return value can't be used to update the element. + /// \return Return a reference to the last element. + /// \sa http://www.cplusplus.com/reference/stl/vector/back.html + inline const_reference back() const + { + const_iterator itr; + + init_itr(itr); + open_itr(itr); + itr.last(); + return (*itr); + } + //@} + + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // + // Begin db_vector constructors and destructor. + /// \brief Constructor. + /// + /// Note that we do not need an allocator in db-stl containser, but + /// we need backing up Db* and DbEnv*, and we have to verify that the + /// passed in bdb handles are valid for use by the container class. + /// See class detail for handle requirement. + /// \param dbp The same as that of db_container(Db*, DbEnv*); + /// \param penv The same as that of db_container(Db*, DbEnv*); + /// \sa db_container(Db*, DbEnv*); + // + explicit db_vector(Db* dbp = NULL, DbEnv *penv = NULL) + : base(dbp, penv) + { + const char *errmsg; + + this->open_db_handles(dbp, penv, DB_RECNO, DB_CREATE | + DB_THREAD, DB_RENUMBER); + if ((errmsg = verify_config(dbp, penv)) != NULL) { + THROW(InvalidArgumentException, ("Db*", errmsg)); + } + this->set_db_handle_int(dbp, penv); + } + + /// \brief Constructor. + /// + /// This function supports auto-commit. + /// Insert n elements of T type into the database, the value of the + /// elements is the default value or user set value. + /// See class detail for handle requirement. + /// \param n The number of elements to insert. + /// \param val The value of elements to insert. + /// \param dbp The same as that of db_container(Db*, DbEnv*); + /// \param penv The same as that of db_container(Db*, DbEnv*); + /// \sa db_vector(Db*, DbEnv*); db_container(Db*, DbEnv*); + explicit db_vector(size_type n, const T& val = T(), Db* dbp = NULL, + DbEnv *penv = NULL) : base(dbp, penv) + { + size_type i; + const char *errmsg; + + this->open_db_handles(dbp, penv, DB_RECNO, DB_CREATE | + DB_THREAD, DB_RENUMBER); + if ((errmsg = verify_config(dbp, penv)) != NULL) { + THROW(InvalidArgumentException, ("Db*", errmsg)); + } + this->set_db_handle_int(dbp, penv); + + this->begin_txn(); + // This transaction will prevent push_back to autocommit, + // as expected, because a single push_back should not be + // automatic in this function + // + try { + for (i = 0; i < n; i++) + push_back(val); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + } + + /// \brief Copy constructor. + /// + /// This function supports auto-commit. + /// Insert all elements in x into this container. + /// \sa db_container(const db_container&) + db_vector(const self& x) : db_container(x) + { + // This objects underlying db should have been opened already, + // only copy db contents. + // + verify_db_handles(x); + this->set_db_handle_int(this->clone_db_config( + x.get_db_handle()), x.get_db_env_handle()); + + this->begin_txn(); + + try { + copydb(x); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + } + + /// Range constructor. + /// This function supports auto-commit. + // The order of parameters has to be altered in order to avoid + // clashing with the other constuctor above (the one with size_type + // as first parameter). + /// Insert a range of elements into this container. The range is + /// [first, last), which contains elements that can be converted to + /// type T automatically. + /// See class detail for handle requirement. + /// \param dbp The same as that of db_container(Db*, DbEnv*); + /// \param penv The same as that of db_container(Db*, DbEnv*); + /// \param first Range closed boundary. + /// \param last Range open boundary. + /// \sa db_vector(Db*, DbEnv*); + template + db_vector(Db*dbp, DbEnv *penv, + InputIterator first, InputIterator last) : base(dbp, penv) + { + const char *errmsg; + + this->open_db_handles(dbp, penv, DB_RECNO, DB_CREATE | + DB_THREAD, DB_RENUMBER); + if ((errmsg = verify_config(dbp, penv)) != NULL) + THROW(InvalidArgumentException, ("Db*", errmsg)); + + this->set_db_handle_int(dbp, penv); + + this->begin_txn(); + try { + push_range(first, last); + } catch (...) { + this->abort_txn(); + throw; + } + + this->commit_txn(); + } + + /// Range constructor. + /// This function supports auto-commit. + /// Insert the range of elements in [first, last) into this container. + /// See class detail for handle requirement. + /// \param dbp The same as that of db_container(Db*, DbEnv*); + /// \param penv The same as that of db_container(Db*, DbEnv*); + /// \param first Range closed boundary. + /// \param last Range open boundary. + /// \sa db_vector(Db*, DbEnv*); + db_vector(const_iterator first, const_iterator last, + Db*dbp = NULL, DbEnv *penv = NULL) : base(dbp, penv) + { + const char *errmsg; + + this->open_db_handles(dbp, penv, DB_RECNO, DB_CREATE | + DB_THREAD, DB_RENUMBER); + if ((errmsg = verify_config(dbp, penv)) != NULL) + THROW(InvalidArgumentException, ("Db*", errmsg)); + this->set_db_handle_int(dbp, penv); + + this->begin_txn(); + try { + push_range(first, last); + } catch (...) { + this->abort_txn(); + throw; + } + + this->commit_txn(); + } + + // We don't have to close Berkeley DB database or environment handles + // because all related handles are managed by ResourceManager and + // closed in the right order when the program exits. + // + virtual ~db_vector() { + } + + //////////////////////////////////////////////////////////////////// + + /// \brief Container assignment operator. + /// + /// This function supports auto-commit. + /// This db_vector is assumed to be valid for use, only copy + /// content of x into this container. + /// \param x The right value container. + /// \return The container x's reference. + const self& operator=(const self& x) + { + ASSIGNMENT_PREDCOND(x) + // TODO: rename verify_db_handles to validate_db_handle + db_container::operator =(x); + verify_db_handles(x); + this->begin_txn(); + try { + copydb(x); + } catch (...) { + this->abort_txn(); + throw; + } + + this->commit_txn(); + return x; + } + + //////////////////////////////////////////////////////////////////// + // + // Begin db_vector comparison functions. + /// \name Compare functions. + /// \sa http://www.sgi.com/tech/stl/Vector.html + //@{ + /// \brief Container equality comparison operator. + /// + /// This function supports auto-commit. + /// \return Compare two vectors, return true if they have identical + /// sequences of elements, otherwise return false. + /// \param v2 The vector to compare against. + template + bool operator==(const db_vector& v2) const + { + bool ret; + size_t sz; + + verify_db_handles(v2); + typename self::iterator i1; + typename db_vector::iterator i2; + + try { + this->begin_txn(); + if ((sz = this->size()) != v2.size()) { + ret = false; + this->commit_txn(); + return ret; + } + if (sz == 0) { + ret = true; + this->commit_txn(); + return ret; + } + + // Identical size, compare each element. + for (i1 = begin(), i2 = v2.begin(); i1 != end() && + i2 != v2.end(); ++i1, ++i2) + if (!((T)(*i1) == (T2)(*i2))) { + ret = false; + this->commit_txn(); + return ret; + } + + // All elements equal, the two vectors are equal. + ret = true; + this->commit_txn(); + return ret; + } catch (...) { + this->abort_txn(); + throw; + } + } + + /// \brief Container in-equality comparison operator. + /// + /// This function supports auto-commit. + /// \param v2 The vector to compare against. + /// \return Returns false if elements in each slot of both + /// containers equal; Returns true otherwise. + template + bool operator!=(const db_vector& v2) const + { + return !this->operator==(v2); + } + + /// \brief Container equality comparison operator. + /// + /// This function supports auto-commit. + /// \return Compare two vectors, return true if they have identical + /// elements, otherwise return false. + bool operator==(const self& v2) const + { + bool ret; + + COMPARE_CHECK(v2) + verify_db_handles(v2); + + try { + this->begin_txn(); + if (this->size() != v2.size()) { + ret = false; + this->commit_txn(); + return ret; + } + typename self::const_iterator i1; + typename self::const_iterator i2; + // Identical size, compare each element. + for (i1 = begin(), i2 = v2.begin(); i1 != end() && + i2 != v2.end(); ++i1, ++i2) + if (!(*i1 == *i2)) { + ret = false; + this->commit_txn(); + return ret; + } + + // All elements are equal, the two vectors are equal. + ret = true; + this->commit_txn(); + return ret; + } catch (...) { + this->abort_txn(); + throw; + } + } + + /// \brief Container in-equality comparison operator. + /// + /// This function supports auto-commit. + /// \param v2 The vector to compare against. + /// \return Returns false if elements in each slot of both + /// containers equal; Returns true otherwise. + bool operator!=(const self& v2) const + { + return !this->operator==(v2); + } + + /// \brief Container less than comparison operator. + /// + /// This function supports auto-commit. + /// \param v2 The container to compare against. + /// \return Compare two vectors, return true if this is less + /// than v2, otherwise return false. + bool operator<(const self& v2) const + { + bool ret; + + if (this == &v2) + return false; + + verify_db_handles(v2); + typename self::const_iterator i1; + typename self::const_iterator i2; + size_t s1, s2, sz, i; + + try { + this->begin_txn(); + s1 = this->size(); + s2 = v2.size(); + sz = s1 < s2 ? s1 : s2; + + // Compare each element. + for (i1 = begin(), i = 0, i2 = v2.begin(); + i < sz; + ++i1, ++i2, ++i) { + if (*i1 == *i2) + continue; + else { + if (*i1 < *i2) + ret = true; + else + ret = false; + this->commit_txn(); + return ret; + } + } + + ret = s1 < s2; + this->commit_txn(); + return ret; + } catch (...) { + this->abort_txn(); + throw; + } + } + //@} // cmp_funcs + //////////////////////////////////////////////////////////////////// + + /// \brief Resize this container to specified size n, insert values t + /// if need to enlarge the container. + /// + /// This function supports auto-commit. + /// \param n The number of elements in this container after the call. + /// \param t The value to insert when enlarging the container. + /// \sa http://www.cplusplus.com/reference/stl/vector/resize.html + inline void resize(size_type n, T t = T()) + { + size_t i, sz; + + try { + begin_txn(); + if (n == (sz = size())) { + commit_txn(); + return; + } + + if (n < sz) // Remove sz - n elements at tail. + erase(begin() + n, end()); + else + for (i = sz; i < n; i++) + push_back(t); + commit_txn(); + } catch (...) { + abort_txn(); + throw; + } + } + + /// \brief Reserve space. + /// + /// The vector is backed by Berkeley DB, we always have enough space. + /// This function does nothing, because dbstl does not have to manage + /// memory space. + inline void reserve(size_type /* n */) + { + } + + /** \name Assign functions + See the function documentation for the correct usage of b_truncate + parameter. + @{ + The following four member functions have default parameter b_truncate, + because they require all key/data pairs in the database be deleted + before the real operation, and by default we use Db::truncate to + truncate the database rather than delete the key/data pairs one by + one, but Db::truncate requirs no open cursors on the database handle, + and the four member functions will close any open cursors of backing + database handle in current thread, but can do nothing to cursors of + other threads opened from the same database handle. + So you must make sure there are no open cursors of the database handle + in any other threads. On the other hand, users can specify "false" to + the b_truncate parameter and thus the key/data pairs will be deleted + one by one. Other than that, they have identical behaviors as their + counterparts in std::vector. + \sa http://www.cplusplus.com/reference/stl/vector/assign.html + */ + /// Assign a range [first, last) to this container. + /// \param first The range closed boundary. + /// \param last The range open boundary. + /// \param b_truncate See its member group doc for details. + template + void assign ( InputIterator first, InputIterator last, + bool b_truncate = true) + { + if (this->get_db_handle() == NULL) + return; + + this->begin_txn(); + try { + clear(b_truncate); + push_range(first, last); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + } + + /// Assign a range [first, last) to this container. + /// \param first The range closed boundary. + /// \param last The range open boundary. + /// \param b_truncate See its member group doc for details. + void assign(const_iterator first, const_iterator last, + bool b_truncate = true) + { + if (this->get_db_handle() == NULL) + return; + + this->begin_txn(); + try { + clear(b_truncate); + push_range(first, last); + + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + } + + /// Assign n number of elements of value u into this container. + /// \param n The number of elements in this container after the call. + /// \param u The value of elements to insert. + /// \param b_truncate See its member group doc for details. + /// This function supports auto-commit. + void assign ( size_type n, const T& u, bool b_truncate = true) + { + if (this->get_db_handle() == NULL) + return; + + this->begin_txn(); + try { + clear(b_truncate); + size_t i; + for (i = 0; i < n; i++) + push_back(u); + } catch (...) { + this->abort_txn(); + throw; + } + + this->commit_txn(); + } + //@} // assign_funcs + + // Directly use DB->put, so that when there is no explicit transaction, + // it is autocommit. This + // method is often called by other db_vector methods, in that case + // those methods will begin/commit_txn internally, causing push_back + // to not autocommit, as expected. + /// \brief Push back an element into the vector. + /// + /// This function supports auto-commit. + /// \param x The value of element to push into this vector. + /// \sa http://www.cplusplus.com/reference/stl/vector/push_back.html + inline void push_back ( const T& x ) + { + index_type k0 = 0; // This value is ignored. + int ret; + + // x may be an temporary object, so must copy it. + DataItem dt(x, false), k(k0, true); + // In CDS mode, the current transaction is the DB_TXN created + // by cds_group_begin. + BDBOP(this->get_db_handle()->put(ResourceManager::instance()-> + current_txn(this->get_db_env_handle()), + &(k.get_dbt()), &(dt.get_dbt()), DB_APPEND), ret); + } + + /// \brief Pop out last element from the vector. + /// + /// This function supports auto-commit. + /// \sa http://www.cplusplus.com/reference/stl/vector/pop_back.html + void pop_back () + { + try { + iterator derefitr; + + this->begin_txn(); + init_itr(derefitr); + open_itr(derefitr); + derefitr.last(); + derefitr.pcsr_->del(); + this->commit_txn(); + } catch(...) { + this->abort_txn(); + throw; + } + } + + //////////////////////////////////////////////////////////////////// + // + // Begin std::deque and std::list specific public functions. + // + // These methods are not in std::vector, but are in std::list and + // std::deque. They are defined here so that db_vector can be used to + // replace std::list and std::deque. + /// \name Functions specific to deque and list + /// These functions come from std::list and std::deque, and have + /// identical behaviors to their counterparts in std::list/std::deque. + /// \sa http://www.cplusplus.com/reference/stl/deque/pop_front.html + /// http://www.cplusplus.com/reference/stl/deque/push_front.html + //@{ + /// \brief Push an element x into the vector from front. + /// \param x The element to push into this vector. + /// This function supports auto-commit. + void push_front (const T& x) + { + int flag, ret; + + try { + this->begin_txn(); + iterator derefitr; + init_itr(derefitr); + open_itr(derefitr); + // MOVE iterator and cursor to 1st element. + ret = derefitr.first(); + if (ret < 0) + flag = DB_KEYLAST; // empty + else + flag = DB_BEFORE; + derefitr.pcsr_->insert(x, flag); + this->commit_txn(); + } catch(...) { + this->abort_txn(); + throw; + } + } + + /// \brief Pop out the front element from the vector. + /// + /// This function supports auto-commit. + void pop_front () + { + try { + this->begin_txn(); + iterator derefitr; + init_itr(derefitr); + open_itr(derefitr); + derefitr.first(); + derefitr.pcsr_->del(); + this->commit_txn(); + } catch(...) { + this->abort_txn(); + throw; + } + + } + //@} + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // + // Begin insert and erase functions. + // + // This method can not be autocommit because pos can not be reopened + // while it already points to a dest position. In order to gain + // transaction it should have already been opened in a transactional + // context, so it is meaningless to wrap the insert operation with + // begin/commit transaction in this method. + /// \name Insert functions + /// The iterator pos in the functions must be a read-write iterator, + /// can't be read only. + /// \sa http://www.cplusplus.com/reference/stl/vector/insert.html + //@{ + /// \brief Insert x before position pos. + /// \param x The element to insert. + /// \param pos The position before which to insert. + iterator insert (iterator pos, const T& x) + { + u_int32_t flag; + bool isempty; + + make_insert_args(pos, flag, isempty); + + pos.pcsr_->insert(x, flag); + pos.update_cur_pair(); // Sync with underlying cursor. + + return pos; // Returns the new position's iterator. + } + + /// \brief Insert n number of elements x before position pos. + /// \param x The element to insert. + /// \param pos The position before which to insert. + /// \param n The number of elements to insert. + void insert (iterator pos, size_type n, const T& x) + { + u_int32_t flag; + size_t i; + bool isempty; + + make_insert_args(pos, flag, isempty); + + for (i = 0; i < n; i++) { + pos.pcsr_->insert(x, flag); + pos.update_cur_pair(); + // Move the underlying Dbc*cursor to next record + // (i.e. the orig record it pointed to before + // the insertion). So it will point to + // the new record after insertion. + // + if (flag == DB_BEFORE) + ++pos; + + // If using DB_AFTER flag, no need to move because + // cursor already points to the newly inserted record + // after the orig record it pointed to. + // + + // There is already data in the underlying database + // so use DB_BEFORE unless pos is begin() and the + // vector was empty before this insert call. + // + if (flag == DB_KEYLAST) { + if(isempty) + flag = DB_AFTER; + else + // This branch can never be reached because any + // iterator of a empty container can only have its + // cursor at the begin() position. + // + flag = DB_BEFORE; + } + } + } + + /// \brief Range insertion. + /// + /// Insert elements in range [first, last) into this vector before + /// position pos. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + /// \param pos The position before which to insert. + template + void insert (iterator pos, InputIterator first, InputIterator last) + { + u_int32_t flag; + InputIterator itr; + bool isempty; + + make_insert_args(pos, flag, isempty); + // !!!XXX + // The cursor will point to the newly inserted record, so we + // need to move the cursor to the original one. + // + + for (itr = first; itr != last; ++itr) { + pos.pcsr_->insert(*itr, flag); + pos.update_cur_pair(); + // Move the underlying Dbc*cursor to next record + // (i.e. the orig record it pointed to before + // the insertion). So it will point to + // the new record after insertion. + // + if (flag == DB_BEFORE) + ++pos; + // There is already data in the underlying database + // so use DB_BEFORE unless pos is begin() and the + // vector was empty before this insert call. + // + if (flag == DB_KEYLAST) { + // pos == begin() && this was empty + if(isempty) + flag = DB_AFTER; + else + // This branch can never be reached because any + // iterator of a empty container can only have its + // cursor at the begin() position. + // + flag = DB_BEFORE; + } + } + } + + // this method can not be autocommitted, reason as above + /// \brief Range insertion. + /// + /// Insert elements in range [first, last) into this vector before + /// position pos. + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + /// \param pos The position before which to insert. + void insert (iterator pos, const_iterator first, const_iterator last) + { + u_int32_t flag; + iterator itr; + bool isempty; + + make_insert_args(pos, flag, isempty); + + for (itr = first; itr != last; ++itr) { + pos.pcsr_->insert(*itr, flag); + pos.update_cur_pair(); + // Move the underlying Dbc*cursor to next record + // (i.e. the orig record it pointed to before + // the insertion). So it will point to + // the new record after insertion. + // + if (flag == DB_BEFORE) + ++pos; + // There is already data in the underlying database + // so use DB_BEFORE unless pos is begin() and the + // vector was empty before this insert call. + // + if (flag == DB_KEYLAST) { + // pos == begin() && this was empty. + if(isempty) + flag = DB_AFTER; + else + flag = DB_BEFORE; + } + } + } + //@} + + /// \name Erase functions + /// The iterator pos in the functions must be a read-write iterator, + /// can't be read only. + /// \sa http://www.cplusplus.com/reference/stl/vector/erase.html + //@{ + /// \brief Erase element at position pos. + /// \param pos The valid position in the container's range to erase. + /// \return The next position after the erased element. + inline iterator erase (iterator pos) + { + if (pos == end_itr_) + return pos; + pos.pcsr_->del(); + ++pos; + // Synchronized with underlying cursor. + return pos; + } + + /// \brief Erase elements in range [first, last) + /// \param first The closed boundary of the range. + /// \param last The open boundary of the range. + /// \return The next position after the erased elements. + iterator erase (iterator first, iterator last) + { + iterator itr; + int ret = 0; + Dbt k, d; + + // If ret is non-zero, it is because there is no element in + // this container any more. + // + for (itr = first; itr != last && ret == 0; ++itr) { + if (itr == end_itr_) + return itr; + ret = itr.pcsr_->del(); + } + return itr; + } + //@} + //////////////////////////////////////////////////////////////////// + + /// \brief Swap content with another vector vec. + /// \param vec The other vector to swap content with. + /// This function supports auto-commit. + /// \sa http://www.cplusplus.com/reference/stl/vector/swap.html + void swap (self& vec) + { + T tmp; + size_t sz, vsz, i, j, m; + self& me = *this; + self *longer, *shorter; + + verify_db_handles(vec); + this->begin_txn(); + try { + sz = this->size(); + vsz = vec.size(); + // do swap + for (i = 0; (i < sz) && (i < vsz); i++) { + tmp = me[(index_type)i]; + me[(index_type)i] = vec[(index_type)i]; + vec[(index_type)i] = tmp; + } + + // Move the longer vector's remaining part to the + // shorter one. + // + if (sz == vsz) + return; + else if (sz < vsz) { + longer = &vec; + shorter = &me; + j = vsz; + } else { + longer = &me; + shorter = &vec; + j = sz; + } + + self &lv = *longer; + self &sv = *shorter; + m = i; + for (; i < j; i++) + sv.push_back(lv[(index_type)i]); + + typename self::iterator itr1 = + lv.begin() + (int)m, itr2 = lv.end(); + + lv.erase(itr1, itr2); + } catch (...) { + this->abort_txn(); + throw; + } + this->commit_txn(); + } + + // When DB_AUTO_COMMIT is set, no transaction needs to be begun to + // support autocommit because DB->truncate internally supports it. + /// Remove all elements of the vector, make it an empty vector. + /// This function supports auto-commit. + /// \param b_truncate Same as that of db_vector::assign(). + /// \sa http://www.cplusplus.com/reference/stl/vector/clear.html + void clear(bool b_truncate = true) + { + int ret; + u_int32_t flag; + DbEnv *penv = this->get_db_handle()->get_env(); + + if (b_truncate) { + ResourceManager::instance()->close_db_cursors( + this->get_db_handle()); + + BDBOP2(this->get_db_handle()->truncate( + ResourceManager::instance()->current_txn(penv), + NULL, 0), ret, this->abort_txn()); + } else { + ReadModifyWriteOption brmw( + ReadModifyWriteOption::no_read_modify_write()); + + BDBOP(penv->get_open_flags(&flag), ret); + + // DB_RMW flag requires locking subsystem. + if ((flag & DB_INIT_LOCK) || (flag & DB_INIT_CDB) || + (flag & DB_INIT_TXN)) + brmw = + ReadModifyWriteOption::read_modify_write(); + + try { // Truncate is capable of autocommit internally. + this->begin_txn(); + erase(begin(brmw, false), end()); + this->commit_txn(); + } catch (...) { + this->abort_txn(); + throw; + } + } + } + + //////////////////////////////////////////////////////////////////// + // + // Begin methods only defined in std::list class. + /// \name std::list specific functions + /// \sa http://www.cplusplus.com/reference/stl/list/ + //@{ + /// \brief Remove all elements whose values are "value" from the list. + /// + /// This function supports auto-commit. + /// \param value The target value to remove. + /// \sa http://www.cplusplus.com/reference/stl/list/remove/ + void remove(const T& value) + { + iterator i; + + try { + begin_txn(); + for (i = begin(); i != end(); ++i) + if (*i == value) + erase(i); + commit_txn(); + } catch (...) { + abort_txn(); + throw; + } + } + + /// \brief Remove all elements making "pred" return true. + /// + /// This function supports auto-commit. + /// \param pred The binary predicate judging elements in this list. + /// \sa http://www.cplusplus.com/reference/stl/list/remove_if/ + template + void remove_if(Predicate pred) + { + iterator i; + + try { + begin_txn(); + for (i = begin(); i != end(); ++i) + if (pred(*i)) + erase(i); + commit_txn(); + } catch (...) { + abort_txn(); + throw; + } + } + + /// \brief Merge content with another container. + /// + /// This function supports auto-commit. + /// \param x The other list to merge with. + /// \sa http://www.cplusplus.com/reference/stl/list/merge/ + void merge(self& x) + { + DbstlListSpecialOps obj(this); + obj.merge(x); + } + + + /// \brief Merge content with another container. + /// + /// This function supports auto-commit. + /// \param x The other list to merge with. + /// \param comp The compare function to determine insertion position. + /// \sa http://www.cplusplus.com/reference/stl/list/merge/ + template + void merge(self& x, Compare comp) + { + verify_db_handles(x); + iterator itr, itrx; + + try { + begin_txn(); + for (itr = begin(), itrx = x.begin(); + itr != end_itr_ && itrx != x.end();) { + if (!comp(*itr, *itrx)) { + insert(itr, *itrx); + ++itrx; + } else + ++itr; + } + if (itr == end_itr_ && itrx != x.end()) + insert(itr, itrx, x.end()); + x.clear(); + commit_txn(); + } catch (...) { + abort_txn(); + throw; + } + } + + /// \brief Remove consecutive duplicate values from this list. + /// + /// This function supports auto-commit. + /// \sa http://www.cplusplus.com/reference/stl/list/unique/ + void unique() + { + DbstlListSpecialOps obj(this); + obj.unique(); + } + + /// \brief Remove consecutive duplicate values from this list. + /// + /// This function supports auto-commit. + /// \param binary_pred The compare predicate to dertermine uniqueness. + /// \sa http://www.cplusplus.com/reference/stl/list/unique/ + template + void unique(BinaryPredicate binary_pred) + { + DbstlListSpecialOps obj(this); + obj.unique(binary_pred); + } + + /// \brief Sort this list. + /// + /// This function supports auto-commit. + /// \sa http://www.cplusplus.com/reference/stl/list/sort/ + void sort() + { + DbstlListSpecialOps obj(this); + obj.sort(); + } + + /// \brief Sort this list. + /// + /// This function supports auto-commit. + /// \param comp The compare operator to determine element order. + /// \sa http://www.cplusplus.com/reference/stl/list/sort/ + template + void sort(Compare comp) + { + DbstlListSpecialOps obj(this); + obj.sort(comp); + } + + /// \brief Reverse this list. + /// + /// This function supports auto-commit. + /// \sa http://www.cplusplus.com/reference/stl/list/reverse/ + void reverse() + { + try { + self tmp; + const self &cthis = *this; + const_reverse_iterator ri; + + begin_txn(); + for (ri = cthis.rbegin(BulkRetrievalOption:: + bulk_retrieval()); ri != rend(); ++ri) + tmp.push_back(*ri); + assign(tmp.begin(), tmp.end()); + commit_txn(); + } catch (...) { + abort_txn(); + throw; + } + } + + /// \brief Moves elements from list x into this list. + /// + /// Moves all elements in list x into this list + /// container at the + /// specified position, effectively inserting the specified + /// elements into the container and removing them from x. + /// This function supports auto-commit. + /// \param position Position within the container where the elements + /// of x are inserted. + /// \param x The other list container to splice from. + /// \sa http://www.cplusplus.com/reference/stl/list/splice/ + void splice (iterator position, self& x) + { + verify_db_handles(x); + try { + begin_txn(); + insert(position, x.begin(), x.end()); + x.clear(); + commit_txn(); + } catch (...) { + abort_txn(); + throw; + } + } + + /// \brief Moves elements from list x into this list. + /// + /// Moves elements at position i of list x into this list + /// container at the + /// specified position, effectively inserting the specified + /// elements into the container and removing them from x. + /// This function supports auto-commit. + /// \param position Position within the container where the elements + /// of x are inserted. + /// \param x The other list container to splice from. + /// \param i The position of element in x to move into this list. + /// \sa http://www.cplusplus.com/reference/stl/list/splice/ + void splice (iterator position, self& x, iterator i) + { + verify_db_handles(x); + try { + begin_txn(); + assert(!(i == x.end())); + insert(position, *i); + x.erase(i); + commit_txn(); + } catch (...) { + abort_txn(); + throw; + } + } + + /// \brief Moves elements from list x into this list. + /// + /// Moves elements in range [first, last) of list x into this list + /// container at the + /// specified position, effectively inserting the specified + /// elements into the container and removing them from x. + /// This function supports auto-commit. + /// \param position Position within the container where the elements + /// of x are inserted. + /// \param x The other list container to splice from. + /// \param first The range's closed boundary. + /// \param last The range's open boundary. + /// \sa http://www.cplusplus.com/reference/stl/list/splice/ + void splice (iterator position, self& x, iterator first, iterator last) + { + verify_db_handles(x); + try { + begin_txn(); + insert(position, first, last); + x.erase(first, last); + commit_txn(); + } catch (...) { + abort_txn(); + throw; + } + } + //@} + //////////////////////////////////////////////////////////////////// + +private: + typedef db_vector_iterator iterator_type; + typedef db_container base; + friend class db_vector_iterator; + friend class db_vector_base_iterator; + friend class db_reverse_iterator; + friend class db_reverse_iterator; + friend class DbstlListSpecialOps; + + // Replace current contents with those in 'x'. + inline void copydb(const self&x) + { + const_iterator itr; + + // TODO: Make sure clear can succeed, it fails if there are + // cursors open in other threads. + clear(false); + for (itr = x.begin(); itr != x.end(); ++itr) + push_back(*itr); + } + + static iterator end_itr_; + + template + inline void push_range(InputIterator& first, InputIterator& last) + { + InputIterator itr; + + for (itr = first; itr != last; ++itr) + push_back(*itr); + } + + inline void push_range(const_iterator& first, const_iterator& last) + { + const_iterator itr; + + for (itr = first; itr != last; ++itr) + push_back(*itr); + + } + + // Move pos to last, pos must initially be the end() iterator. + inline void end_to_last(const const_iterator& pos) const + { + if (pos != end_itr_) + return; + pos.pcsr_.set_cursor(new TRandDbCursor()); + open_itr(pos); + pos.last(); + } + + // This function generate appropriate flags for cursor insert calls. + void make_insert_args(iterator& pos, u_int32_t& flag, bool &isempty) + { + isempty = false; + if (pos.itr_status_ == INVALID_ITERATOR_POSITION) { + ((self*)pos.owner_)->end_to_last(pos); + /* Empty db, iterator at "begin()". */ + if (((self*)pos.owner_)->empty()) { + flag = DB_KEYLAST; /* Empty */ + isempty = true; + } else + /* Move pos to last element. */ + flag = DB_AFTER; + } else + flag = DB_BEFORE; + } + + // Open iterator and move it to point the 1st key/data pair. + // + void open_itr(const const_iterator&itr, bool readonly = false) const + { + u_int32_t oflags = 0; + int ret; + DbEnv *penv = this->get_db_handle()->get_env(); + + itr.owner_ = (db_container*)this; + if (!readonly && penv != NULL) { + BDBOP((penv->get_open_flags(&oflags)), ret); + // Open a writable cursor when in CDS mode and not + // requesting a read only iterator. + if ((oflags & DB_INIT_CDB) != 0) + ((self *)this)->set_cursor_open_flags( + this->get_cursor_open_flags() | + DB_WRITECURSOR); + } + if (!itr.pcsr_) + itr.pcsr_.set_cursor(new TRandDbCursor( + itr.bulk_retrieval_, + itr.rmw_csr_, itr.directdb_get_)); + itr.itr_status_ = itr.pcsr_->open((db_container*)this, + this->get_cursor_open_flags()); + } + + void open_itr(const reverse_iterator &itr, bool readonly = false) const + { + u_int32_t oflags = 0; + int ret; + DbEnv *penv = this->get_db_handle()->get_env(); + + itr.owner_ = (db_container*)this; + if (!readonly && penv != NULL) { + BDBOP((penv->get_open_flags(&oflags)) , ret); + // Open a writable cursor when in CDS mode and not + // requesting a read only iterator. + if ((oflags & DB_INIT_CDB) != 0) + ((self *)this)->set_cursor_open_flags( + this->get_cursor_open_flags() | + DB_WRITECURSOR); + } + if (!itr.pcsr_) + itr.pcsr_.set_cursor(new TRandDbCursor( + itr.bulk_retrieval_, + itr.rmw_csr_, itr.directdb_get_)); + itr.itr_status_ = itr.pcsr_->open((db_container*)this, + this->get_cursor_open_flags()); + itr.update_cur_pair(); + } + + inline void init_itr(const_iterator &itr) const + { + itr.owner_ = (db_container*)this; + } + + // Certain flags and parameters need to be set to the database and + // environment handle for them to back-up a certain type of container. + // This function verifies that db and env handles are well configured + // to be suitable for this type of containers. + virtual const char* verify_config(Db*db, DbEnv*env) const + { + u_int32_t oflags, sflags, oflags2; + const char *errmsg = NULL; + int ret; + DBTYPE dbtype; + + errmsg = db_container::verify_config(db, env); + if (errmsg) + return errmsg; + + oflags = sflags = oflags2 = 0; + BDBOP((db->get_type(&dbtype)) || (db->get_open_flags(&oflags)) + || (db->get_flags(&sflags)) || + (env->get_open_flags(&oflags2)), ret); + + if (dbtype != DB_RECNO) + errmsg = "Must use DB_RECNO type of database."; + // DB_THREAD is not always required, only required if the db + // handle is shared among multiple threads, which is not a + // case we can detect here. + + return errmsg; + } + + +}; // db_vector + +template +typename db_vector::iterator + db_vector::end_itr_; + +// Partial spececialization version of std::swap for db_vector. +template +void swap(db_vector&v1, db_vector&v2) +{ + v1.swap(v2); +} + + + +template +class _exported DbstlListSpecialOps +{ + typedef db_vector partner; + typedef typename partner::iterator iterator; + partner *that; +public: + DbstlListSpecialOps(partner *that1) + { + that = that1; + } + + template + void unique(BinaryPredicate binary_pred) + { + T t, t2; + + try { + that->begin_txn(); + iterator i = that->begin(); + t2 = *i; + ++i; + for (; i != that->end_itr_; ++i) { + if (binary_pred((t = *i), t2)) + that->erase(i); + else + t2 = t; + } + that->commit_txn(); + } catch (...) { + that->abort_txn(); + throw; + } + } + + void unique() + { + T t, t2; + + try { + that->begin_txn(); + iterator i = that->begin(); + t2 = *i; + ++i; + for (; i != that->end_itr_; ++i) { + if ((t = *i) == t2) + that->erase(i); + else + t2 = t; + } + that->commit_txn(); + } catch (...) { + that->abort_txn(); + throw; + } + } + + /// This function supports auto-commit. + void merge(partner& x) + { + that->verify_db_handles(x); + T b; + iterator itr, itrx; + + try { + that->begin_txn(); + for (itr = that->begin(), itrx = x.begin(); + itr != that->end_itr_ && itrx != x.end();) { + if (*itr > (b = *itrx)) { + that->insert(itr, b); + ++itrx; + } else + ++itr; + } + if (itr == that->end_itr_ && itrx != x.end()) + that->insert(itr, itrx, x.end()); + x.clear(); + that->commit_txn(); + } catch (...) { + that->abort_txn(); + throw; + } + } + + /// This function supports auto-commit. + void sort() + { + try { + that->begin_txn(); + std::sort(that->begin(), that->end()); + that->commit_txn(); + } catch (...) { + that->abort_txn(); + throw; + } + } + + /// This function supports auto-commit. + template + void sort(Compare comp) + { + try { + that->begin_txn(); + std::sort(that->begin(), that->end(), comp); + that->commit_txn(); + } catch (...) { + that->abort_txn(); + throw; + } + } + + + +}; + + +template +class _exported DbstlListSpecialOps +{ + typedef db_vector partner; + typedef typename partner::iterator iterator; + typedef typename partner::const_iterator const_iterator; + partner *that; + DbstlElemTraits *inst; + typename DbstlElemTraits::ElemSizeFunct sizef; + typename DbstlElemTraits::SequenceLenFunct seqlenf; + typename DbstlElemTraits::SequenceCopyFunct seqcopyf; + typename DbstlElemTraits::SequenceCompareFunct seqcmpf; + + void seq_assign(T *&dest, const T*src) + { + size_t sz = 0; + size_t seql = seqlenf(src); + + + if (sizef == NULL) + sz = sizeof(T) * (seql + 1); + else { + + for (size_t i = 0; i < seql; i++) + sz += sizef(src[i]); + // Add space for terminating object, like '\0' + // for char *string. + T tmp; + sz += sizef(tmp); + } + dest = (T *)DbstlReAlloc(dest, sz); + memset(dest, 0, sz); + seqcopyf(dest, src, seql); + } + + template + class CompareInt + { + typename DbstlElemTraits::SequenceCompareFunct cmpf; + public: + CompareInt(typename DbstlElemTraits:: + SequenceCompareFunct cmpf1) + { + cmpf = cmpf1; + } + + bool operator()(const std::basic_string > + &a, const std::basic_string > &b) + { + return cmpf(a.c_str(), b.c_str()); + } + + }; + + template + class CompareInt2 + { + public: + Compare comp_; + + CompareInt2(Compare comp) + { + comp_ = comp; + } + + bool operator()(const std::basic_string > + &s1, const std::basic_string >& s2) + { + return comp_(s1.c_str(), s2.c_str()); + } + + }; +public: + DbstlListSpecialOps(partner *that1) + { + that = that1; + + // Though he following settings are called in ResourceManager + // singleton initialization, we still have to call them here + // because the global variable in the dll is not the same one + // as the one in this application. + DbstlElemTraits * cstarinst = + DbstlElemTraits::instance(); + cstarinst->set_sequence_len_function(dbstl_strlen); + cstarinst->set_sequence_copy_function(dbstl_strcpy); + cstarinst->set_sequence_compare_function(dbstl_strcmp); + cstarinst->set_sequence_n_compare_function(dbstl_strncmp); + + DbstlElemTraits *wcstarinst = + DbstlElemTraits::instance(); + wcstarinst->set_sequence_copy_function(dbstl_wcscpy); + wcstarinst->set_sequence_len_function(dbstl_wcslen); + wcstarinst->set_sequence_compare_function(dbstl_wcscmp); + wcstarinst->set_sequence_n_compare_function(dbstl_wcsncmp); + + inst = DbstlElemTraits::instance(); + sizef = inst->get_size_function(); + seqlenf = inst->get_sequence_len_function(); + seqcopyf = inst->get_sequence_copy_function(); + seqcmpf = inst->get_sequence_compare_function(); + } + + template + void unique(BinaryPredicate binary_pred) + { + T *t2 = NULL; + + try { + that->begin_txn(); + iterator i = that->begin(); + + seq_assign(t2, *i); + ++i; + for (; i != that->end(); ++i) { + if (binary_pred(*i, t2)) + that->erase(i); + else + seq_assign(t2, *i); + } + that->commit_txn(); + free(t2); + } catch (...) { + that->abort_txn(); + free(t2); + throw; + } + } + + void unique() + { + T *t2 = NULL; + + try { + that->begin_txn(); + iterator i = that->begin(); + seq_assign(t2, *i); + ++i; + for (; i != that->end(); ++i) { + if (seqcmpf(*i, t2) == 0) + that->erase(i); + else + seq_assign(t2, *i); + } + that->commit_txn(); + free(t2); + } catch (...) { + that->abort_txn(); + free(t2); + throw; + } + } + + /// This function supports auto-commit. + void merge(partner& x) + { + that->verify_db_handles(x); + iterator itr, itrx; + + try { + that->begin_txn(); + for (itr = that->begin(), itrx = x.begin(); + itr != that->end() && itrx != x.end();) { + if (seqcmpf(*itr, *itrx) > 0) { + that->insert(itr, *itrx); + ++itrx; + } else + ++itr; + } + if (itr == that->end() && itrx != x.end()) + that->insert(itr, itrx, x.end()); + x.clear(); + that->commit_txn(); + } catch (...) { + that->abort_txn(); + throw; + } + } + + void sort() + { + try { + typedef std::basic_string > + string_t; + CompareInt comp(DbstlElemTraits::instance()-> + get_sequence_compare_function()); + std::list tmplist(that->size()); + + that->begin_txn(); + const_iterator itr; + const partner&cthat = *that; + typename std::list::iterator itr1; + + for (itr = cthat.begin(BulkRetrievalOption:: + bulk_retrieval()), itr1 = tmplist.begin(); + itr1 != tmplist.end(); ++itr, ++itr1) + *itr1 = string_t(*itr); + + tmplist.sort(); + that->clear(false); + for (typename std::list::iterator + it = tmplist.begin(); + it != tmplist.end(); ++it) + that->push_back((T*)(it->c_str())); + + that->commit_txn(); + } catch (...) { + that->abort_txn(); + throw; + } + } + + /// This function supports auto-commit. + template + void sort(Compare comp) + { + try { + typedef std::basic_string > + string_t; + CompareInt2 comp2(comp); + + std::list tmplist(that->size()); + that->begin_txn(); + const_iterator itr; + const partner&cthat = *that; + typename std::list::iterator itr1; + + for (itr = cthat.begin(BulkRetrievalOption:: + bulk_retrieval()), itr1 = tmplist.begin(); + itr1 != tmplist.end(); ++itr, ++itr1) + *itr1 = string_t(*itr); + + tmplist.sort(comp2); + that->clear(false); + for (typename std::list::iterator it = + tmplist.begin(); + it != tmplist.end(); ++it) + that->push_back((T*)(it->c_str())); + + that->commit_txn(); + } catch (...) { + that->abort_txn(); + throw; + } + } + + +}; +//@} //dbstl_containers +END_NS +#endif //_DB_STL_DB_VECTOR_H + diff --git a/lang/tcl/docs/db.html b/lang/tcl/docs/db.html new file mode 100644 index 00000000..cbcbbda4 --- /dev/null +++ b/lang/tcl/docs/db.html @@ -0,0 +1,267 @@ + + + + + + + + +

+Database Commands

+The database commands provide a fairly straightforward mapping to the +DB method functions. + +

+> berkdb open +

+ +
[-btcompare proc]
+Sets the Btree comparison function to the Tcl procedure named +proc using the +DB->set_bt_compare +method. + +
[-btree|-hash|-recno|-queue|-unknown]
+ +Select the database type:
+DB_BTREE, DB_HASH, DB_RECNO, DB_QUEUE or DB_UNKNOWN. + + +
[-cachesize {gbytes bytes ncaches}]
+Sets the size of the database cache to the size specified by +gbytes and bytes, broken up into ncaches number of +caches using the +DB->set_cachesize +method. + +
[-create]
+Selects the DB_CREATE flag to create underlying files. + +
[-delim delim]
+Sets the delimiting byte for variable length records to delim +using the +DB->set_re_delim +method. + +
[-compress]
+Enables default compression using the +DB->set_bt_compress +method. + +
[-dup]
+Selects the DB_DUP flag to permit duplicates in the database. + +
[-dupcompare proc]
+Sets the duplicate data comparison function to the Tcl procedure named +proc using the +DB->set_dup_compare +method. + +
[-dupsort]
+Selects the DB_DUPSORT flag to support sorted duplicates. + +
[-env env]
+The database environment. + +
[-errfile filename]
+Specifies the error file to use for this environment to filename +by calling +DB->set_errfile. +If the file already exists then we will append to the end of the file. + +
[-excl]
+Selects the DB_EXCL flag to exclusively create underlying files. + +
[-extent size]
+Sets the size of a Queue database extent to the given size using +the +DB->set_q_extentsize +method. + +
[-ffactor density]
+Sets the hash table key density to the given density using the +DB->set_h_ffactor +method. + +
[-hashproc proc]
+Sets a user-defined hash function to the Tcl procedure named proc +using the +DB->set_h_hash method. + +
[-len len]
+Sets the length of fixed-length records to len using the +DB->set_re_len +method. + +
[-lorder order]
+Sets the byte order for integers stored in the database meta-data to +the given order using the +DB->set_lorder +method. + +
[-minkey minkey]
+Sets the minimum number of keys per Btree page to minkey using +the +DB->set_bt_minkey +method. + +
[-mode mode]
+Specifies the mode for created files. + +
[-nelem size]
+Sets the hash table size estimate to the given size using the +DB->set_h_nelem +method. + +
[-nommap]
+Selects the DB_NOMMAP flag to forbid mmaping of files. + +
[-pad pad]
+Sets the pad character used for fixed length records to pad using +the +DB->set_re_pad method. + +
[-pagesize pagesize]
+Sets the size of the database page to pagesize using the +DB->set_pagesize +method. + +
[-rdonly]
+Selects the DB_RDONLY flag for opening in read-only mode. + +
[-recnum]
+Selects the DB_RECNUM flag to support record numbers in Btrees. + +
[-renumber]
+Selects the DB_RENUMBER flag to support mutable record numbers. + +
[-revsplitoff]
+Selects the DB_REVSPLITOFF flag to suppress reverse splitting of pages +on deletion. + +
[-snapshot]
+Selects the DB_SNAPSHOT flag to support database snapshots. + +
[-source file]
+Sets the backing source file name to file using the +DB->set_re_source +method. + +
[-truncate]
+Selects the DB_TRUNCATE flag to truncate the database. + +
[--]
+Terminate the list of options and use remaining arguments as the file +or subdb names (thus allowing the use of filenames beginning with a dash +'-'). + +
[filename [subdbname]]
+The names of the database and sub-database. +
+ +
+> berkdb upgrade [-dupsort] [-env env] [--] [filename] +

This command will invoke the DB->upgrade +function.  If the command is given the -env option, then we +will accordingly upgrade the database filename within the context of that +environment. The -dupsort option selects the DB_DUPSORT flag for +upgrading. The use of -- terminates the list of options, thus allowing +filenames beginning with a dash. +

+ +


+> berkdb verify [-env env] [--] [filename] +

This command will invoke the DB->verify +function.  If the command is given the -env option, then we +will accordingly verify the database filename within the context of that +environment.  The use of -- terminates the list of options, +thus allowing filenames beginning with a dash. +

+ +


> db del +

There are no undocumented options. + +


+> db join [-nosort] db0.c0 db1.c0 ... +

This command will invoke the db_join +function.  After it successfully joins a database, we bind it to a +new Tcl command of the form dbN.cX, where X is an integer +starting at 0 (e.g. db2.c0, db3.c0, etc).  We use the Tcl_CreateObjCommand()  +to create the top level database function.  It is through this cursor +handle that the user can access the joined data items. +

The options are: +

    +
  • +-nosort - This flag causes DB not to sort the cursors based on the +number of data items they reference.  It results in the DB_JOIN_NOSORT +flag being set.
  • +
+ +

+This command will invoke the +db_create function. If +the command is given the -env option, then we will accordingly +creating the database within the context of that environment. After it +successfully gets a handle to a database, we bind it to a new Tcl +command of the form dbX, where X is an integer starting +at 0 (e.g. db0, db1, etc). + +

+We use the Tcl_CreateObjCommand() to create the top level +database function. It is through this handle that the user can access +all of the commands described in the +Database Commands section. Internally, the database handle +is sent as the ClientData portion of the new command set so that +all future database calls access the appropriate handle. + +

+After parsing all of the optional arguments affecting the setup of the +database and making the appropriate calls to DB to manipulate those +values, we open the database for the user. It translates to the +DB->open method call after +parsing all of the various optional arguments. We automatically set the +DB_THREAD flag. The arguments are: + +


+> db get_join [-nosort] {db key} {db key} ... +

This command performs a join operation on the keys specified and returns +a list of the joined {key data} pairs. +

The options are: +

    +
  • +-nosort This flag causes DB not to sort the cursors based on the +number of data items they reference.  It results in the DB_JOIN_NOSORT +flag being set.
  • +
+ +
+> db keyrange [-txn id] key +

This command returns the range for the given key.  It returns +a list of 3 double elements of the form {less equal greater} +where less is the percentage of keys less than the given +key, equal is the percentage equal to the given key and greater +is the percentage greater than the given key.  If the -txn option +is specified it performs this operation under transaction protection. + +


> db put +

The undocumented options are: +

+
-nodupdata
+This flag causes DB not to insert the key/data pair if it already +exists, that is, both the key and data items are already in the +database. The -nodupdata flag may only be specified if the underlying +database has been configured to support sorted duplicates. +
+ +
> dbc put +

The undocumented options are: +

+
-nodupdata
+This flag causes DB not to insert the key/data pair if it already +exists, that is, both the key and data items are already in the +database. The -nodupdata flag may only be specified if the underlying +database has been configured to support sorted duplicates. +
+ + + diff --git a/lang/tcl/docs/env.html b/lang/tcl/docs/env.html new file mode 100644 index 00000000..9a1f34bf --- /dev/null +++ b/lang/tcl/docs/env.html @@ -0,0 +1,344 @@ + + + + + + + + +

+Environment Commands

+Environments provide a structure for creating a consistent environment +for processes using one or more of the features of Berkeley DB.  Unlike +some of the database commands, the environment commands are very low level. +
+
+

The user may create and open a new DB environment  by invoking: +

> berkdb env +
    [-cdb] [-cdb_alldb] [-lock] [-log] [-txn [nosync]] +
    [-create] [-home directory] [-mode mode] +
    [-data_dir directory] [-log_dir directory] +[-tmp_dir directory] +
    [-nommap] [-private] [-recover] [-recover_fatal] +[-system_mem] [-errfile filename] +
    [-use_environ] [-use_environ_root] [-verbose +{which on|off}] +
    [-region_init] +
    [-cachesize {gbytes bytes ncaches}] +
    [-mmapsize size] +
    [-log_max max] +
    [-log_buffer size] +
    [-lock_conflict {nmodes {matrix}}] +
    [-lock_detect default|oldest|random|youngest] +
    [-lock_max max] +
    [-lock_max_locks max] +
    [-lock_max_lockers max] +
    [-lock_max_objects max] +
    [-lock_timeout timeout] +
    [-overwrite] +
    [-txn_max max] +
    [-txn_timeout timeout] +
    [-client_timeout seconds] +
    [-server_timeout seconds] +
    [-server hostname] +
    [-rep_master] [-rep_client] +
    [-rep_transport { machineid sendproc }] +
  +

This command opens up an environment.   We automatically set +the DB_THREAD and the DB_INIT_MPOOL flags.  The arguments are: +

    +
  • +-cdb selects the DB_INIT_CDB flag for Concurrent Data Store
  • + +
  • +-cdb_alldb selects the DB_CDB_ALLDB flag for Concurrent Data Store
  • + +
  • +-lock selects the DB_INIT_LOCK flag for the locking subsystem
  • + +
  • +-log selects the DB_INIT_LOG flag for the logging subsystem
  • + +
  • +-txn selects the DB_INIT_TXN, DB_INIT_LOCK and DB_INIT_LOG flags +for the transaction subsystem.  If nosync is specified, then +it will also select DB_TXN_NOSYNC to indicate no flushes of log on commits
  • + +
  • +-create selects the DB_CREATE flag to create underlying files
  • + +
  • +-home directory selects the home directory of the environment
  • + +
  • +-data_dir directory selects the data file directory of the +environment by calling DBENV->set_data_dir.
  • + +
  • +-log_dir directory selects the log file directory of the +environment  by calling DBENV->set_lg_dir.
  • + +
  • +-tmp_dir directory selects the temporary file directory of +the environment  by calling DBENV->set_tmp_dir.
  • + +
  • +-mode mode sets the permissions of created files to mode
  • + +
  • +-nommap selects the DB_NOMMAP flag to disallow using mmap'ed files
  • + +
  • +-private selects the DB_PRIVATE flag for a private environment
  • + +
  • +-recover selects the DB_RECOVER flag for recovery
  • + +
  • +-recover_fatal selects the DB_RECOVER_FATAL flag for catastrophic +recovery
  • + +
  • +-system_mem selects the DB_SYSTEM_MEM flag to use system memory
  • + +
  • +-errfile specifies the error file to use for this environment to +filename +by calling DBENV->set_errfile. +If +the file already exists then we will append to the end of the file
  • + +
  • +-use_environ selects the DB_USE_ENVIRON flag to affect file naming
  • + +
  • +-use_environ_root selects the DB_USE_ENVIRON_ROOT flag to have the +root environment affect file naming
  • + +
  • +-verbose produces verbose error output for the given which subsystem, +using the DBENV->set_verbose +method.   See the description of verbose +below for valid which values
  • + +
  • +-region_init specifies that the user wants to page fault the region +in on startup using the DBENV->set_region_init +method call
  • + +
  • +-cachesize sets the size of the database cache to the size  +specified by gbytes and bytes, broken up into +ncaches +number of caches using the DBENV->set_cachesize +method
  • + +
  • +-mmapsize sets the size of the database page to size using +the DBENV->set_mp_mmapsize +method
  • + +
  • +-log_max sets the maximum size of the log file to max +using the DBENV->set_lg_max +call
  • + +
  • +-log_regionmax sets the size of the log region to max +using the DBENV->set_lg_regionmax +call
  • + +
  • +-log_buffer sets the size of the log file in bytes to size +using the DBENV->set_lg_bsize +call
  • + +
  • +-lock_conflict sets the number of lock modes to nmodes +and sets the locking policy for those modes to the conflict_matrix +given using the DBENV->set_lk_conflict +method call
  • + +
  • +-lock_detect sets the deadlock detection policy to the given policy +using the DBENV->set_lk_detect +method call.  The policy choices are:
  • + +
      +
    • +default selects the DB_LOCK_DEFAULT policy for default detection
    • + +
    • +oldest selects DB_LOCK_OLDEST to abort the oldest locker on a deadlock
    • + +
    • +random selects DB_LOCK_RANDOM to abort a random locker on a deadlock
    • + +
    • +youngest selects DB_LOCK_YOUNGEST to abort the youngest locker on +a deadlock
    • +
    + +
  • +-lock_max_locks sets the maximum number of locks to max using +the DBENV->set_lk_max_locks +method call
  • + +
  • +-lock_max_lockers sets the maximum number of locking entities to +max +using the DBENV->set_lk_max_lockers +method call
  • + +
  • +-lock_max_objects sets the maximum number of simultaneously locked +objects to max using the DBENV->set_lk_max_objects +method call
  • + +
  • +-lock_timeout sets the timeout for locks in the environment
  • + +
  • +-overwrite sets DB_OVERWRITE flag
  • + +
  • +-txn_max sets the maximum size of the transaction table to max +using the DBENV->set_txn_max +method call
  • + +
  • +-txn_timeout sets the timeout for transactions in the environment
  • + +
  • +-client_timeout sets the timeout value for the client waiting for +a reply from the server for RPC operations to seconds.
  • + +
  • +-server_timeout sets the timeout value for the server to determine +an idle client is gone to seconds.
  • + +
  • +-server specifies the hostname of the server +to connect to in the DBENV->set_server +call.
  • + +
  • +-rep_client sets the newly created environment to be a +replication client, using the +DBENV->rep_client call.
  • + +
  • +-rep_master sets the newly created environment to be a +replication master, using the +DBENV->rep_master call.
  • + +
  • +-rep_transport specifies the replication transport function, +using the +DBENV->rep_set_transport +call. This site's machine ID is set to machineid and +the send function, a Tcl proc, is set to sendproc.
  • + +
+ +This command will invoke the db_env_create +function.  After it successfully gets a handle to an environment, +we bind it to a new Tcl command of the form envX, where X +is an integer starting at  0 (e.g. env0, env1, etc).  +We use the Tcl_CreateObjCommand() to create the top level environment +command function.  It is through this handle that the user can access +all the commands described in the Environment +Commands section.  Internally, the handle we get back from DB +will be stored as the ClientData portion of the new command set +so that all future environment calls will have that handle readily available.  +Then we call the DBENV->open +method call and possibly some number of setup calls as described above. +

+


+
> <env> verbose which +on|off +

This command controls the use of debugging output for the environment.  +This command directly translates to a call to the DBENV->set_verbose +method call.  It returns either a 0 (for success), a DB error message +or it throws a Tcl error with a system message.  The user specifies +which +subsystem to control, and indicates whether debug messages should be turned +on +or off for that subsystem.  The value of which +must be one of the following: +

    +
  • +deadlock - Chooses the deadlocking code by using the DB_VERB_DEADLOCK +value
  • + +
  • +recovery - Chooses the recovery code by using the DB_VERB_RECOVERY +value
  • + +
  • +wait - Chooses the waitsfor code by using the DB_VERB_WAITSFOR value
  • +
+ +
+

> <env> close +

This command closes an environment and deletes the handle.  This +command directly translates to a call to the DBENV->close +method call.  It returns either a 0 (for success), a DB error message +or it throws a Tcl error with a system message. +

Additionally, since the handle is no longer valid, we will call Tcl_DeleteCommand() +so +that further uses of the handle will be dealt with properly by Tcl itself. +

Also, the close command will automatically abort any transactions +and close any mpool memory files.  As such +we must maintain a list of open transaction and mpool handles so that we +can call Tcl_DeleteCommand on those as well. +

+


+ +> berkdb envremove
+[-data_dir directory]
+[-force]
+[-home directory]
+[-log_dir directory]
+[-overwrite]
+[-tmp_dir directory]
+[-use_environ]
+[-use_environ_root]
+ +

This command removes the environment if it is not in use and deletes +the handle.  This command directly translates to a call to the DBENV->remove +method call.  It returns either a 0 (for success), a DB error message +or it throws a Tcl error with a system message.  The arguments are: +

    +
  • +-force selects the DB_FORCE flag to remove even if other processes +have the environment open
  • + +
  • +-home directory specifies the home directory of the environment
  • + +
  • +-data_dir directory selects the data file directory of the +environment by calling DBENV->set_data_dir.
  • + +
  • +-log_dir directory selects the log file directory of the +environment  by calling DBENV->set_lg_dir.
  • + +
  • +-overwrite sets DB_OVERWRITE flag
  • + +
  • +-tmp_dir directory selects the temporary file directory of +the environment  by calling DBENV->set_tmp_dir.
  • + +
  • +-use_environ selects the DB_USE_ENVIRON flag to affect file naming
  • + +
  • +-use_environ_root selects the DB_USE_ENVIRON_ROOT flag to affect +file naming
  • +
+ + + diff --git a/lang/tcl/docs/historic.html b/lang/tcl/docs/historic.html new file mode 100644 index 00000000..9904a830 --- /dev/null +++ b/lang/tcl/docs/historic.html @@ -0,0 +1,168 @@ + + + + + + + + +

+Compatibility Commands

+The compatibility commands for old Dbm and Ndbm are described in the dbm +manpage. +

> berkdb dbminit filename +

This command will invoke the dbminit function.   Filename +is used as the name of the database. +

+


> berkdb dbmclose +

This command will invoke the dbmclose function. +

+


> berkdb fetch key +

This command will invoke the fetch function.   It will return +the data associated with the given key or a Tcl error. +

+


> berkdb store key data +

This command will invoke the store function.   It will store +the key/data pair.  It will return a 0 on success or +throw a Tcl error. +

+


> berkdb delete key +

This command will invoke the deletet function.   It will delete +the key from the database.  It will return a 0 on success +or throw a Tcl error. +

+


> berkdb firstkey +

This command will invoke the firstkey function.   It will +return the first key in the database or a Tcl error. +

+


> berkdb nextkey key +

This command will invoke the nextkey function.   It will return +the next key after the given key or a Tcl error. +

+


> berkdb hcreate nelem +

This command will invoke the hcreate function with nelem +elements.  It will return a 0 on success or a Tcl error. +

+


> berkdb hsearch key data action +

This command will invoke the hsearch function with key +and data.  The action must be either find +or enter.  If it is find, it will return the resultant +data.  If it is enter, it will return a 0 on success or a Tcl +error. +

+


> berkdb hdestroy +

This command will invoke the hdestroy function.  It will return +a 0. +


> berkdb ndbm_open [-create] [-rdonly] [-truncate] +[-mode +mode] [--] filename +

This command will invoke the dbm_open function.    After +it successfully gets a handle to a database, we bind it to a new Tcl command +of the form ndbmX, where X is an integer starting at 0 (e.g. +ndbm0, +ndbm1, etc).  We use the Tcl_CreateObjCommand()  to +create the top level database function.  It is through this handle +that the user can access all of the commands described below.  Internally, +the database handle is sent as the ClientData portion of the new +command set so that all future database calls access the appropriate handle. +

The arguments are: +

    +
  • +-- - Terminate the list of options and use remaining arguments as +the file or subdb names (thus allowing the use of filenames beginning with +a dash '-')
  • + +
  • +-create selects the O_CREAT flag  to create underlying files
  • + +
  • +-rdonly selects the O_RDONLY flag for opening in read-only mode
  • + +
  • +-truncate selects the O_TRUNC flag to truncate the database
  • + +
  • +-mode mode specifies the mode for created files
  • + +
  • +filename indicates the name of the database
  • +
+ +


+


+
> <ndbm> close +

This command closes the database and renders the handle invalid.   +This command directly translates to the dbm_close function call.  +It returns either a 0 (for success),  or it throws a Tcl error with +a system message. +

Additionally, since the handle is no longer valid, we will call Tcl_DeleteCommand() +so +that further uses of the handle will be dealt with properly by Tcl itself.  +


+
> <ndbm> clearerr +

This command clears errors  the database.   This command +directly translates to the dbm_clearerr function call.  It returns +either a 0 (for success),  or it throws a Tcl error with a system +message. +

+


+
> <ndbm> delete key +

This command deletes the key from thedatabase.   +This command directly translates to the dbm_delete function call.  +It returns either a 0 (for success),  or it throws a Tcl error with +a system message. +

+


+
> <ndbm> dirfno +

This command directly translates to the dbm_dirfno function call.  +It returns either resultts,  or it throws a Tcl error with a system +message. +

+


+
> <ndbm> error +

This command returns the last error.   This command directly +translates to the dbm_error function call.  It returns an error string.. +

+


+
> <ndbm> fetch key +

This command gets the given key from the database.   +This command directly translates to the dbm_fetch function call.  +It returns either the data,  or it throws a Tcl error with a system +message. +

+


+
> <ndbm> firstkey +

This command returns the first key in the database.   This +command directly translates to the dbm_firstkey function call.  It +returns either the key,  or it throws a Tcl error with a system message. +

+


+
> <ndbm> nextkey +

This command returns the next key in the database.   This +command directly translates to the dbm_nextkey function call.  It +returns either the key,  or it throws a Tcl error with a system message. +

+


+
> <ndbm> pagfno +

This command directly translates to the dbm_pagfno function call.  +It returns either resultts,  or it throws a Tcl error with a system +message. +
+


+
> <ndbm> rdonly +

This command changes the database to readonly.   This command +directly translates to the dbm_rdonly function call.  It returns either +a 0 (for success),  or it throws a Tcl error with a system message. +

+


+
> <ndbm> store key data insert|replace +

This command puts the given key and data +pair into the database.   This command directly translates to +the dbm_store function call.  It will either insert or replace +the data based on the action given in the third argument.  It returns +either a 0 (for success),  or it throws a Tcl error with a system +message. +
+


+ + diff --git a/lang/tcl/docs/index.html b/lang/tcl/docs/index.html new file mode 100644 index 00000000..f7d62d7d --- /dev/null +++ b/lang/tcl/docs/index.html @@ -0,0 +1,50 @@ + + + + + + + + +
+

+Complete Tcl Interface for Berkeley DB

+ + + + + + diff --git a/lang/tcl/docs/library.html b/lang/tcl/docs/library.html new file mode 100644 index 00000000..91cae9c0 --- /dev/null +++ b/lang/tcl/docs/library.html @@ -0,0 +1,26 @@ + + + + + + + +
+

+Convenience Commands

+The convenience commands are provided for ease of use with the DB test +suite. +

> berkdb rand +

This command will invoke the rand function and return the random number. +

+


> berkdb random_int low high +

This command will invoke the rand function and return a number between +low +and high. +

+


+

> berkdb srand seed +

This command will invoke the srand function with the given seed +and return 0. +

+


diff --git a/lang/tcl/docs/lock.html b/lang/tcl/docs/lock.html new file mode 100644 index 00000000..ada94dc7 --- /dev/null +++ b/lang/tcl/docs/lock.html @@ -0,0 +1,206 @@ + + + + + + + + +

+Locking Commands

+Most locking commands work with the environment handle.  However, +when a user gets a lock we create a new lock handle that they then use +with in a similar manner to all the other handles to release the lock.  +We present the general locking functions first, and then those that manipulate +locks. +

> <env> lock_detect [default|oldest|youngest|random] +

This command runs the deadlock detector.  It directly translates +to the lock_detect DB call.  +It returns either a 0 (for success), a DB error message or it throws a +Tcl error with a system message.  The first argument sets the policy +for deadlock as follows: +

    +
  • +default selects the DB_LOCK_DEFAULT policy for default detection +(default if not specified)
  • + +
  • +oldest selects DB_LOCK_OLDEST to abort the oldest locker on a deadlock
  • + +
  • +random selects DB_LOCK_RANDOM to abort a random locker on a deadlock
  • + +
  • +youngest selects DB_LOCK_YOUNGEST to abort the youngest locker on +a deadlock
  • +
+ +
+
> <env> lock_stat +

This command returns a list of name/value pairs where the names correspond +to the C-structure field names of DB_LOCK_STAT and the values are the data +returned.  This command is a direct translation of the lock_stat +DB call. +


+
> <env> lock_id +

This command returns a unique locker ID value.  It directly translates +to the lock_id DB call. +
+


+
> <env> lock_id_free  locker +

This command frees the locker allockated by the lock_id call. It directly +translates to the  lock_id_free +DB +call. +


+
> <env> lock_id_set  current +max +

This  is a diagnostic command to set the locker id that will get +allocated next and the maximum id that +
will trigger the id reclaim algorithm. +


+
> <env> lock_get [-nowait]lockmode +locker obj +

This command gets a lock. It will invoke the lock_get +function.  After it successfully gets a handle to a lock, we bind +it to a new Tcl command of the form $env.lockX, where X is +an integer starting at  0 (e.g. $env.lock0, $env.lock1, etc).  +We use the Tcl_CreateObjCommand() to create the top level locking +command function.  It is through this handle that the user can release +the lock.  Internally, the handle we get back from DB will be stored +as the ClientData portion of the new command set so that future +locking calls will have that handle readily available. +

The arguments are: +

    +
  • +locker specifies the locker ID returned from the lock_id +command
  • + +
  • +obj specifies an object to lock
  • + +
  • +the lock mode is specified as one of the following:
  • + +
      +
    • +ng specifies DB_LOCK_NG for not granted (always 0)
    • + +
    • +read specifies DB_LOCK_READ for a read (shared) lock
    • + +
    • +write specifies DB_LOCK_WRITE for an exclusive write lock
    • + +
    • +iwrite specifies DB_LOCK_IWRITE for intent for exclusive write lock
    • + +
    • +iread specifies DB_LOCK_IREAD for intent for shared read lock
    • + +
    • +iwr specifies DB_LOCK_IWR for intent for eread and write lock
    • +
    + +
  • +-nowait selects the DB_LOCK_NOWAIT to indicate that we do not want +to wait on the lock
  • +
+ +
+
> <lock> put +

This command releases the lock referenced by the command.  It is +a direct translation of the lock_put +function.  It returns either a 0 (for success), a DB error message +or it throws a Tcl error with a system message.  Additionally, since +the handle is no longer valid, we will call +Tcl_DeleteCommand() +so +that further uses of the handle will be dealt with properly by Tcl itself. +
+


+
> <env> lock_vec [-nowait] locker +{get|put|put_all|put_obj +[obj] [lockmode] [lock]} ... +

This command performs a series of lock calls.  It is a direct translation +of the lock_vec function.  +This command will return a list of the return values from each operation +specified in the argument list.  For the 'put' operations the entry +in the return value list is either a 0 (for success) or an error.  +For the 'get' operation, the entry is the lock widget handle, $env.lockN +(as described above in <env> lock_get) +or an error.  If an error occurs, the return list will contain the +return values for all the successful operations up the erroneous one and +the error code for that operation.  Subsequent operations will be +ignored. +

As for the other operations, if we are doing a 'get' we will create +the commands and if we are doing a 'put' we will have to delete the commands.  +Additionally, we will have to do this after the call to the DB lock_vec +and iterate over the results, creating and/or deleting Tcl commands.  +It is possible that we may return a lock widget from a get operation that +is considered invalid, if, for instance, there was a put_all operation +performed later in the vector of operations.  The arguments are: +

    +
  • +locker specifies the locker ID returned from the lock_id +command
  • + +
  • +-nowait selects the DB_LOCK_NOWAIT to indicate that we do not want +to wait on the lock
  • + +
  • +the lock vectors are tuple consisting of {an operation, lock object, lock +mode, lock handle} where what is required is based on the operation desired:
  • + +
      +
    • +get specifes DB_LOCK_GET to get a lock.  Requires a tuple {get +objmode} +where +mode +is:
    • + +
        +
      • +ng specifies DB_LOCK_NG for not granted (always 0)
      • + +
      • +read specifies DB_LOCK_READ for a read (shared) lock
      • + +
      • +write specifies DB_LOCK_WRITE for an exclusive write lock
      • + +
      • +iwrite specifies DB_LOCK_IWRITE for intent for exclusive write lock
      • + +
      • +iread specifies DB_LOCK_IREAD for intent for shared read lock
      • + +
      • +iwr specifies DB_LOCK_IWR for intent for eread and write lock
      • +
      + +
    • +put specifies DB_LOCK_PUT to release a lock.  +Requires a tuple {put lock}
    • + +
    • +put_all specifies DB_LOCK_PUT_ALL to release all locks held by locker.  +Requires a tuple {put_all}
    • + +
    • +put_obj specifies DB_LOCK_PUT_OBJ to release all locks held by locker +associated with the given obj.  Requires a tuple {put_obj +obj}
    • +
    +
+ +
+
> <env> lock_timeout timeout +

This command sets the lock timeout for all future locks in this environment.  +The timeout is in micorseconds. +
  +
  + + diff --git a/lang/tcl/docs/log.html b/lang/tcl/docs/log.html new file mode 100644 index 00000000..af14df1c --- /dev/null +++ b/lang/tcl/docs/log.html @@ -0,0 +1,123 @@ + + + + + + + + +

+Logging Commands

+Logging commands work from the environment handle to control the use of +the log files.  Log files are opened when the environment is opened +and closed when the environment is closed.  In all of the commands +in the logging subsystem that take or return a log sequence number, it +is of the form: +
{fileid offset} +
where the fileid is an identifier of the log file, as +returned from the log_get call. +

> <env> log_archive [-arch_abs] [-arch_data] [-arch_log] +

This command returns  a list of log files that are no longer in +use.  It is a direct call to the log_archive +function. The arguments are: +

    +
  • +-arch_abs selects DB_ARCH_ABS to return all pathnames as absolute +pathnames
  • + +
  • +-arch_data selects DB_ARCH_DATA to return a list of database files
  • + +
  • +-arch_log selects DB_ARCH_LOG to return a list of log files
  • +
+ +
+
> <env> log_compare lsn1 lsn2 +

This command compares two log sequence numbers, given as lsn1 +and lsn2.  It is a direct call to the log_compare +function.  It will return a -1, 0, 1 to indicate if lsn1 +is less than, equal to or greater than lsn2 respectively. +
+


+
> <env> log_file lsn +

This command returns  the file name associated with the given lsn.  +It is a direct call to the log_file +function. +
+


+
> <env> log_flush [lsn] +

This command  flushes the log up to the specified lsn +or flushes all records if none is given  It is a direct call to the +log_flush +function.  It returns either a 0 (for success), a DB error message +or it throws a Tcl error with a system message. +
+


+
> <env> log_get [-checkpoint] +[-current] [-first] [-last] [-next] [-prev] [-set lsn] +

This command retrieves a record from the log according to the lsn +given and returns it and the data.  It is a direct call to the log_get +function.  It is a way of implementing a manner of log iteration similar +to cursors.   +The information we return is similar to database information.  We +return a list where the first item is the LSN (which is a list itself) +and the second item is the data.  So it looks like, fully expanded, +{{fileid +offset} +data}.  +In the case where DB_NOTFOUND is returned, we return an empty list {}.  +All other errors return a Tcl error.  The arguments are: +

    +
  • +-checkpoint selects the DB_CHECKPOINT flag to return the LSN/data +pair of the last record written through log_put +with DB_CHECKPOINT specified
  • + +
  • +-current selects the DB_CURRENT flag to return the current record
  • + +
  • +-first selects the DB_FIRST flag to return the first record in the +log.
  • + +
  • +-last selects the DB_LAST flag to return the last record in the +log.
  • + +
  • +-next selects the DB_NEXT flag to return the next record in the +log.
  • + +
  • +-prev selects the DB_PREV flag to return the  previous record +in the log.
  • + +
  • +-set selects the DB_SET flag to return the record specified by the +given lsn
  • +
+ +
+
> <env> log_put [-checkpoint] +[-flush] record +

This command stores a record into the log and returns +the LSN of the log record.  It is a direct call to the log_put +function.  It returns either an LSN or it throws a Tcl error with +a system message.  The arguments are: +

    +
  • +-checkpoint selects the DB_CHECKPOINT flag
  • + +
  • +-flush selects the DB_FLUSH flag to flush the log to disk.
  • +
+ +
+
> <env> log_stat +

This command returns  the statistics associated with the logging +subsystem.  It is a direct call to the log_stat +function.  It returns a list of name/value pairs of the DB_LOG_STAT +structure. + + diff --git a/lang/tcl/docs/mpool.html b/lang/tcl/docs/mpool.html new file mode 100644 index 00000000..1f75abd3 --- /dev/null +++ b/lang/tcl/docs/mpool.html @@ -0,0 +1,189 @@ + + + + + + + + +

+Memory Pool Commands

+Memory pools are used in a manner similar to the other subsystems.  +We create a handle to the pool and  then use it for a variety of operations.  +Some of the memory pool commands use the environment instead. Those are +presented first. +

> <env> mpool_stat +

This command returns  the statistics associated with the memory +pool subsystem.  It is a direct call to the memp_stat +function.  It returns a list of name/value pairs of the DB_MPOOL_STAT +structure. +
+


+
> <env> mpool_sync lsn +

This command flushes the memory pool for all pages with a log sequence +number less than lsn.  It is a direct call to the memp_sync  +function.  It returns either a 0 (for success), a DB error message +or it throws a Tcl error with a system message. +
+


+
> <env> mpool_trickle percent +

This command tells DB to ensure that at least percent +percent of the pages are clean by writing out enough to dirty pages to +achieve that percentage.  It is a direct call to the memp_trickle +function.  The command will return the number of pages actually written.  +It returns either the number of pages on success, or it throws a Tcl error +with a system message. +
+


+

> <env> mpool [-create] [-nommap] [-rdonly] [-mode mode] +-pagesize size [file] +

This command creates a new memory pool.  It invokes the memp_fopen +function.  After it successfully gets a handle to a memory pool, we +bind it to a new Tcl command of the form $env.mpX, where +X is an integer starting at  0 (e.g. $env.mp0, $env.mp1, etc).  +We use the Tcl_CreateObjCommand() to create the top level memory +pool functions.  It is through this handle that the user can manipulate +the pool.  Internally, the handle we get back from DB will be stored +as the ClientData portion of the new command set so that future +memory pool calls will have that handle readily available.  Additionally, +we need to maintain this handle in relation to the environment so that +if the user calls <env> close without closing +the memory pool we can properly clean up.  The arguments are: +

    +
  • +file is the name of the file to open
  • + +
  • +-create selects the DB_CREATE flag to create underlying file
  • + +
  • +-mode mode sets the permissions of created file to mode
  • + +
  • +-nommap selects the DB_NOMMAP flag to disallow using mmap'ed files
  • + +
  • +-pagesize sets the underlying file page size to size
  • + +
  • +-rdonly selects the DB_RDONLY flag for read only access
  • +
+ +
+
> <mp> close +

This command closes the memory pool.  It is a direct call to the +memp_close +function.  It returns either a 0 (for success), a DB error message +or it throws a Tcl error with a system message. +

Additionally, since the handle is no longer valid, we will call +Tcl_DeleteCommand() +so +that further uses of the handle will be dealt with properly by Tcl itself.  +We must also remove the reference to this handle from the environment.  +We will go through the list of pinned pages that were acquired by the get +command and +put them back. +


+
> <mp> fsync +

This command flushes all of the file's dirty pages to disk.  It +is a direct call to the memp_fsync +function.  It returns either a 0 (for success), a DB error message +or it throws a Tcl error with a system message. +


+
> <mp> get [-create] [-last] [-new] +[pgno] +

This command gets the  pgno page from the memory +pool.  It invokes the memp_fget +function and possibly the memp_fset +function if any options are chosen to set the page characteristics.  +After it successfully gets a handle to a page,  we bind it to and +return a new Tcl command of the form $env.mpN.pX, where X +is an integer starting at  0 (e.g. $env.mp0.p0, $env.mp1.p0, etc).  +We use the Tcl_CreateObjCommand() to create the top level page functions.  +It is through this handle that the user can manipulate the page.  +Internally, the handle we get back from DB will be stored as the ClientData +portion of the new command set.  We need to store this handle in  +relation to the memory pool handle so that if the memory pool is closed, +we will put back the pages (setting the discard +flag) and delete that set of commands. +

The arguments are: +

    +
  • +-create selects the DB_MPOOL_CREATE flag  to create the page +if it does not exist.
  • + +
  • +-last selects the DB_MPOOL_LAST flag to return the last page in +the file
  • + +
  • +-new selects the DB_MPOOL_NEW flag to create a new page
  • +
+ +
+
> <pg> pgnum +

This command returns the page number associated with this memory pool +page.  Primarily it will be used after an <mp> +get call. +
+


> <pg> pgsize +

This command returns the page size associated with this memory pool +page.  Primarily it will be used after an <mp> +get call. +
+


> <pg> set [-clean] [-dirty] [-discard] +

This command sets the characteristics of the page.  It is a direct +call to the memp_fset function.  +It returns either a 0 (for success), a DB error message or it throws a +Tcl error with a system message.  The arguments are: +

    +
  • +-clean selects the DB_MPOOL_CLEAN flag to indicate this is a clean +page
  • + +
  • +-dirty selects the DB_MPOOL_DIRTY flag to indicate this page should +be flushed before eviction
  • + +
  • +-discard selects the DB_MPOOL_DISCARD flag to indicate this page +is unimportant
  • +
+ +
+
> <pg> put [-clean] [-dirty] [-discard] +

This command will put back the page to the memory pool.  It is +a direct call to the memp_fput +function.  It returns either a 0 (for success), a DB error message +or it throws a Tcl error with a system message. Additionally, since the +handle is no longer valid, we will call +Tcl_DeleteCommand() +so that +further uses of the handle will be dealt with properly by Tcl itself.  +We must also remove the reference to this handle from the memory pool. +

The arguments are: +

    +
  • +-clean selects the DB_MPOOL_CLEAN flag to indicate this is a clean +page
  • + +
  • +-dirty selects the DB_MPOOL_DIRTY flag to indicate this page should +be flushed before eviction
  • + +
  • +-discard selects the DB_MPOOL_DISCARD flag to indicate this page +is unimportant
  • +
+ +
+
> <pg> init val|string +

This command initializes the page to the val given or +places the string given at the beginning of the page.  +It returns a 0 for success or it throws a Tcl error with an error message. +

+


+
> <pg> is_setto val|string +

This command verifies the page contains the val given +or checks that the string given is at the beginning of the page.  +It returns a 1 if the page is correctly set to the value and a 0 otherwise. diff --git a/lang/tcl/docs/rep.html b/lang/tcl/docs/rep.html new file mode 100644 index 00000000..83ca4f38 --- /dev/null +++ b/lang/tcl/docs/rep.html @@ -0,0 +1,50 @@ + + + + + Replication commands + + + +

+Replication Commands

+Replication commands are invoked from the environment handle, after +it has been opened with the appropriate flags defined +here.
+
+

> <env> rep_process_message machid control +rec +

This command processes a single incoming replication message.  It +is a direct translation of the rep_process_message +function.  +It returns either a 0 (for success), a DB error message or it throws a +Tcl error with a system message.  The arguments are: +

    +
  • +machid is the machine ID of the machine that sent this +message.
  • + +
  • +control is a binary string containing the exact contents of the +control argument to the sendproc function +that was passed this message on another site.
  • + +
  • +rec is a binary string containing the exact contents of the +rec argument to the sendproc function +that was passed this message on another site.
  • +
+ +
+
> <env> rep_elect nsites pri wait +sleep +

This command causes a replication election.  It is a direct translation +of the rep_elect function.  +Its arguments, all integers, correspond exactly to that C function's +parameters. +It will return a list containing two integers, which contain, +respectively, the integer values returned in the C function's +midp and selfp parameters. + + diff --git a/lang/tcl/docs/sequence.html b/lang/tcl/docs/sequence.html new file mode 100644 index 00000000..ab669514 --- /dev/null +++ b/lang/tcl/docs/sequence.html @@ -0,0 +1,93 @@ + + + + + + Sequence Commands + + +

Sequence Commands

+> berkdb sequence [-auto_commit] [-txn txnid] [-create]
+
 Implements DBENV->sequence +function. The above options have the usual meanings.
+
+[-cachesize]
+
Set the size of the cache in this +handle.
+
+[-inc]
+
+
Sequence increments..
+
+[-dec]
+
+
Sequence decrements.
+
+[-init integer]
+
+
Set the initial value for sequence.
+
+[-max integer]
+
Set the maximum value for the sequence.
+
+[-max integer]
+
+
Set the minimum value for the sequence.
+
+[-wrap]
+
Wrap around at max or min.
+
+db +key
+
+
Database handle and key of sequence.
+
+
> seq get [-txn txn] +[-auto_commit] [-nosync] delta
+
+
Get the nexted sequence value and +increment the sequence by delta.
+
+
> seq close
+
Close the sequence
+
+
+
> seq remove [-auto_commit] [-nosync] +[-txn]
+
+
Remove the sequence.
+
+
> seq get_cachesize
+
+
Return the size of the cache.
+
+
> seq get_db
+
+
Return the underlying db handle.
+
+
> seq get_flags
+
Return the flags set on create.
+
+
> seq get_range
+
+
Return the min and max set at create.
+
+
> seq stat
+
+
Implements the SEQUENCE->stat function.
+
+
+ + diff --git a/lang/tcl/docs/test.html b/lang/tcl/docs/test.html new file mode 100644 index 00000000..551d4710 --- /dev/null +++ b/lang/tcl/docs/test.html @@ -0,0 +1,103 @@ + + + + + + + + +

+Debugging and Testing

+We have imported the debugging system from the old test suite into the +new interface to aid in debugging problems.  There are several variables +that are available both in gdb as globals to the C code, and variables +in Tcl that the user can set.  These variables are linked together +so that changes in one venue are reflected in the other.  The names +of the variables have been modified a bit to reduce the likelihood +
of namespace trampling.  We have added a double underscore to +all the names. +

The variables are all initialized to zero (0) thus resulting in debugging +being turned off.  The purpose of the debugging, fundamentally, is +to allow the user to set a breakpoint prior to making a DB call.  +This breakpoint is set in the __db_loadme() function.  The +user may selectively turn on various debugging areas each controlled by +a separate variable (note they all have two (2) underscores prepended to +the name): +

    +
  • +__debug_on - Turns on the debugging system.  This must be on +for any debugging to occur
  • + +
  • +__debug_print - Turns on printing a debug count statement on each +call
  • + +
  • +__debug_test - Hits the breakpoint in __db_loadme on the +specific iteration
  • + +
  • +__debug_stop - Hits the breakpoint in __db_loadme on every +(or the next) iteration
  • +
+Note to developers:  Anyone extending this interface must place +a call to _debug_check() (no arguments) before every call into the +DB library. +

There is also a command available that will force a call to the _debug_check +function. +

> berkdb debug_check +

+


+
For testing purposes we have added several hooks into the DB library +and a small interface into the environment and/or database commands to +manipulate the hooks.  This command interface and the hooks and everything +that goes with it is only enabled when the test option is configured into +DB. +

> <env> test copy location +
> <db> test copy location +
> <env> test abort location +
> <db> test abort location +

In order to test recovery we need to be able to abort the creation or +deletion process at various points.  Also we want to invoke a copy +function to copy the database file(s)  at various points as well so +that we can obtain before/after snapshots of the databases.  The interface +provides the test command to specify a location where we +wish to invoke a copy or an abort.  The command is available +from either the environment or the database for convenience.  The +location +can be one of the following: +

    +
  • +none - Clears the location
  • + +
  • +preopen - Sets the location prior to the __os_open call in the creation +process
  • + +
  • +postopen - Sets the location to immediately following the __os_open +call in creation
  • + +
  • +postlogmeta - Sets the location to immediately following the __db_log_page +call to log the meta data in creation.  Only valid for Btree.
  • + +
  • +postlog - Sets the location to immediately following the last (or +only) __db_log_page call in creation.
  • + +
  • +postsync - Sets the location to immediately following the sync of +the log page in creation.
  • + +
  • +prerename - Sets the location prior to the __os_rename call in the +deletion process.
  • + +
  • +postrename - Sets the location to immediately following the __os_rename +call in deletion
  • +
+ + + diff --git a/lang/tcl/docs/txn.html b/lang/tcl/docs/txn.html new file mode 100644 index 00000000..a79062c6 --- /dev/null +++ b/lang/tcl/docs/txn.html @@ -0,0 +1,69 @@ + + + + + + + + +

+Transaction Commands

+Transactions are used in a manner similar to the other subsystems.  +We create a handle to the transaction and  then use it for a variety +of operations.  Some of the transaction commands use the environment +instead.  Those are presented first.  The transaction command +handle returned is the handle used by the various commands that can be +transaction protected, such as cursors. +
+
+

> <env> txn_checkpoint [-kbyte kb] [-min min] +

This command causes a checkpoint of the transaction region.  It +is a direct translation of the txn_checkpoint +function.  +It returns either a 0 (for success), a DB error message or it throws a +Tcl error with a system message.  The arguments are: +

    +
  • +-forcecauses the checkpoint to occur regardless of inactivity + +
  • +-kbytecauses the checkpoint to occur only if kb kilobytes +of log data has been written since the last checkpoint + +
  • +-min causes the checkpoint to occur only if min minutes +have passed since the last checkpoint +
+ +
+
> <env> txn_stat +

This command returns transaction statistics.  It is a direct translation +of the txn_stat function.  +It will return a list of name/value pairs that correspond to the DB_TXN_STAT +structure. +


+
> <env> txn_id_set  current max +

This is a diagnostic command that sets the next transaction id to be +allocated and the maximum transaction +
id, which is the point at which the relcaimation algorthm is triggered. +


+
>  <txn> id +

This command returns the transaction id.  It is a direct call to +the txn_id function.  The +typical use of this identifier is as the locker value for +the lock_get and lock_vec +calls. +


+
> <txn> prepare +

This command initiates a two-phase commit.  It is a direct call +to the txn_prepare function.  +It returns either a 0 (for success), a DB error message or it throws a +Tcl error with a system message. +


> <env> txn_timeout +timeout +

This command sets thetransaction timeout for transactions started in +the future in this environment.  The timeout is in micorseconds. +
  +
  + + diff --git a/lang/tcl/tcl_compat.c b/lang/tcl/tcl_compat.c new file mode 100644 index 00000000..36e26566 --- /dev/null +++ b/lang/tcl/tcl_compat.c @@ -0,0 +1,737 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" +#ifdef CONFIG_TEST + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/tcl_db.h" + +/* + * bdb_HCommand -- + * Implements h* functions. + * + * PUBLIC: int bdb_HCommand __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); + */ +int +bdb_HCommand(interp, objc, objv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *hcmds[] = { + "hcreate", + "hdestroy", + "hsearch", + NULL + }; + enum hcmds { + HHCREATE, + HHDESTROY, + HHSEARCH + }; + static const char *srchacts[] = { + "enter", + "find", + NULL + }; + enum srchacts { + ACT_ENTER, + ACT_FIND + }; + ENTRY item, *hres; + ACTION action; + int actindex, cmdindex, nelem, result, ret; + Tcl_Obj *res; + + result = TCL_OK; + /* + * Get the command name index from the object based on the cmds + * defined above. This SHOULD NOT fail because we already checked + * in the 'berkdb' command. + */ + if (Tcl_GetIndexFromObj(interp, + objv[1], hcmds, "command", TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + + res = NULL; + switch ((enum hcmds)cmdindex) { + case HHCREATE: + /* + * Must be 1 arg, nelem. Error if not. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "nelem"); + return (TCL_ERROR); + } + result = Tcl_GetIntFromObj(interp, objv[2], &nelem); + if (result == TCL_OK) { + _debug_check(); + ret = hcreate((size_t)nelem) == 0 ? 1: 0; + (void)_ReturnSetup( + interp, ret, DB_RETOK_STD(ret), "hcreate"); + } + break; + case HHSEARCH: + /* + * 3 args for this. Error if different. + */ + if (objc != 5) { + Tcl_WrongNumArgs(interp, 2, objv, "key data action"); + return (TCL_ERROR); + } + item.key = Tcl_GetStringFromObj(objv[2], NULL); + item.data = Tcl_GetStringFromObj(objv[3], NULL); + if (Tcl_GetIndexFromObj(interp, objv[4], srchacts, + "action", TCL_EXACT, &actindex) != TCL_OK) + return (IS_HELP(objv[4])); + switch ((enum srchacts)actindex) { + case ACT_ENTER: + action = ENTER; + break; + default: + case ACT_FIND: + action = FIND; + break; + } + _debug_check(); + hres = hsearch(item, action); + if (hres == NULL) + Tcl_SetResult(interp, "-1", TCL_STATIC); + else if (action == FIND) + Tcl_SetResult(interp, (char *)hres->data, TCL_STATIC); + else + /* action is ENTER */ + Tcl_SetResult(interp, "0", TCL_STATIC); + + break; + case HHDESTROY: + /* + * No args for this. Error if there are some. + */ + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + hdestroy(); + res = Tcl_NewIntObj(0); + break; + } + /* + * Only set result if we have a res. Otherwise, lower + * functions have already done so. + */ + if (result == TCL_OK && res) + Tcl_SetObjResult(interp, res); + return (result); +} + +/* + * + * bdb_NdbmOpen -- + * Opens an ndbm database. + * + * PUBLIC: #if DB_DBM_HSEARCH != 0 + * PUBLIC: int bdb_NdbmOpen __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DBM **)); + * PUBLIC: #endif + */ +int +bdb_NdbmOpen(interp, objc, objv, dbpp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DBM **dbpp; /* Dbm pointer */ +{ + static const char *ndbopen[] = { + "-create", + "-mode", + "-rdonly", + "-truncate", + "--", + NULL + }; + enum ndbopen { + NDB_CREATE, + NDB_MODE, + NDB_RDONLY, + NDB_TRUNC, + NDB_ENDARG + }; + + int endarg, i, mode, open_flags, optindex, read_only, result, ret; + char *arg, *db; + + result = TCL_OK; + endarg = mode = open_flags = read_only = 0; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?args?"); + return (TCL_ERROR); + } + + /* + * Get the option name index from the object based on the args + * defined above. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], ndbopen, "option", + TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') { + result = IS_HELP(objv[i]); + goto error; + } else + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum ndbopen)optindex) { + case NDB_CREATE: + open_flags |= O_CREAT; + break; + case NDB_RDONLY: + read_only = 1; + break; + case NDB_TRUNC: + open_flags |= O_TRUNC; + break; + case NDB_MODE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-mode mode?"); + result = TCL_ERROR; + break; + } + /* + * Don't need to check result here because + * if TCL_ERROR, the error message is already + * set up, and we'll bail out below. If ok, + * the mode is set and we go on. + */ + result = Tcl_GetIntFromObj(interp, objv[i++], &mode); + break; + case NDB_ENDARG: + endarg = 1; + break; + } + + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + goto error; + if (endarg) + break; + } + if (result != TCL_OK) + goto error; + + /* + * Any args we have left, (better be 0, or 1 left) is a + * file name. If we have 0, then an in-memory db. If + * there is 1, a db name. + */ + db = NULL; + if (i != objc && i != objc - 1) { + Tcl_WrongNumArgs(interp, 2, objv, "?args? ?file?"); + result = TCL_ERROR; + goto error; + } + if (i != objc) + db = Tcl_GetStringFromObj(objv[objc - 1], NULL); + + /* + * When we get here, we have already parsed all of our args + * and made all our calls to set up the database. Everything + * is okay so far, no errors, if we get here. + * + * Now open the database. + */ + if (read_only) + open_flags |= O_RDONLY; + else + open_flags |= O_RDWR; + _debug_check(); + if ((*dbpp = dbm_open(db, open_flags, mode)) == NULL) { + ret = Tcl_GetErrno(); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db open"); + goto error; + } + return (TCL_OK); + +error: + *dbpp = NULL; + return (result); +} + +/* + * bdb_DbmCommand -- + * Implements "dbm" commands. + * + * PUBLIC: #if DB_DBM_HSEARCH != 0 + * PUBLIC: int bdb_DbmCommand + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST*, int, DBM *)); + * PUBLIC: #endif + */ +int +bdb_DbmCommand(interp, objc, objv, flag, dbm) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + int flag; /* Which db interface */ + DBM *dbm; /* DBM pointer */ +{ + static const char *dbmcmds[] = { + "dbmclose", + "dbminit", + "delete", + "fetch", + "firstkey", + "nextkey", + "store", + NULL + }; + enum dbmcmds { + DBMCLOSE, + DBMINIT, + DBMDELETE, + DBMFETCH, + DBMFIRST, + DBMNEXT, + DBMSTORE + }; + static const char *stflag[] = { + "insert", "replace", + NULL + }; + enum stflag { + STINSERT, STREPLACE + }; + datum key, data; + void *dtmp, *ktmp; + u_int32_t size; + int cmdindex, freedata, freekey, stindex, result, ret; + char *name, *t; + + result = TCL_OK; + freekey = freedata = 0; + dtmp = ktmp = NULL; + + /* + * Get the command name index from the object based on the cmds + * defined above. This SHOULD NOT fail because we already checked + * in the 'berkdb' command. + */ + if (Tcl_GetIndexFromObj(interp, + objv[1], dbmcmds, "command", TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + + switch ((enum dbmcmds)cmdindex) { + case DBMCLOSE: + /* + * No arg for this. Error if different. + */ + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + if (flag == DBTCL_DBM) + ret = dbmclose(); + else { + Tcl_SetResult(interp, + "Bad interface flag for command", TCL_STATIC); + return (TCL_ERROR); + } + (void)_ReturnSetup(interp, ret, DB_RETOK_STD(ret), "dbmclose"); + break; + case DBMINIT: + /* + * Must be 1 arg - file. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "file"); + return (TCL_ERROR); + } + name = Tcl_GetStringFromObj(objv[2], NULL); + if (flag == DBTCL_DBM) + ret = dbminit(name); + else { + Tcl_SetResult(interp, "Bad interface flag for command", + TCL_STATIC); + return (TCL_ERROR); + } + (void)_ReturnSetup(interp, ret, DB_RETOK_STD(ret), "dbminit"); + break; + case DBMFETCH: + /* + * 1 arg for this. Error if different. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "key"); + return (TCL_ERROR); + } + if ((ret = _CopyObjBytes( + interp, objv[2], &ktmp, &size, &freekey)) != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "dbm fetch"); + goto out; + } + key.dsize = (int)size; + key.dptr = (char *)ktmp; + _debug_check(); + if (flag == DBTCL_DBM) + data = fetch(key); + else if (flag == DBTCL_NDBM) + data = dbm_fetch(dbm, key); + else { + Tcl_SetResult(interp, + "Bad interface flag for command", TCL_STATIC); + result = TCL_ERROR; + goto out; + } + if (data.dptr == NULL || + (ret = __os_malloc(NULL, (size_t)data.dsize + 1, &t)) != 0) + Tcl_SetResult(interp, "-1", TCL_STATIC); + else { + memcpy(t, data.dptr, (size_t)data.dsize); + t[data.dsize] = '\0'; + Tcl_SetResult(interp, t, TCL_VOLATILE); + __os_free(NULL, t); + } + break; + case DBMSTORE: + /* + * 2 args for this. Error if different. + */ + if (objc != 4 && flag == DBTCL_DBM) { + Tcl_WrongNumArgs(interp, 2, objv, "key data"); + return (TCL_ERROR); + } + if (objc != 5 && flag == DBTCL_NDBM) { + Tcl_WrongNumArgs(interp, 2, objv, "key data action"); + return (TCL_ERROR); + } + if ((ret = _CopyObjBytes( + interp, objv[2], &ktmp, &size, &freekey)) != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "dbm fetch"); + goto out; + } + key.dsize = (int)size; + key.dptr = (char *)ktmp; + if ((ret = _CopyObjBytes( + interp, objv[3], &dtmp, &size, &freedata)) != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "dbm fetch"); + goto out; + } + data.dsize = (int)size; + data.dptr = (char *)dtmp; + _debug_check(); + if (flag == DBTCL_DBM) + ret = store(key, data); + else if (flag == DBTCL_NDBM) { + if (Tcl_GetIndexFromObj(interp, objv[4], stflag, + "flag", TCL_EXACT, &stindex) != TCL_OK) + return (IS_HELP(objv[4])); + switch ((enum stflag)stindex) { + case STINSERT: + flag = DBM_INSERT; + break; + case STREPLACE: + flag = DBM_REPLACE; + break; + } + ret = dbm_store(dbm, key, data, flag); + } else { + Tcl_SetResult(interp, + "Bad interface flag for command", TCL_STATIC); + return (TCL_ERROR); + } + (void)_ReturnSetup(interp, ret, DB_RETOK_STD(ret), "store"); + break; + case DBMDELETE: + /* + * 1 arg for this. Error if different. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "key"); + return (TCL_ERROR); + } + if ((ret = _CopyObjBytes( + interp, objv[2], &ktmp, &size, &freekey)) != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "dbm fetch"); + goto out; + } + key.dsize = (int)size; + key.dptr = (char *)ktmp; + _debug_check(); + if (flag == DBTCL_DBM) + ret = delete(key); + else if (flag == DBTCL_NDBM) + ret = dbm_delete(dbm, key); + else { + Tcl_SetResult(interp, + "Bad interface flag for command", TCL_STATIC); + return (TCL_ERROR); + } + (void)_ReturnSetup(interp, ret, DB_RETOK_STD(ret), "delete"); + break; + case DBMFIRST: + /* + * No arg for this. Error if different. + */ + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + if (flag == DBTCL_DBM) + key = firstkey(); + else if (flag == DBTCL_NDBM) + key = dbm_firstkey(dbm); + else { + Tcl_SetResult(interp, + "Bad interface flag for command", TCL_STATIC); + return (TCL_ERROR); + } + if (key.dptr == NULL || + (ret = __os_malloc(NULL, (size_t)key.dsize + 1, &t)) != 0) + Tcl_SetResult(interp, "-1", TCL_STATIC); + else { + memcpy(t, key.dptr, (size_t)key.dsize); + t[key.dsize] = '\0'; + Tcl_SetResult(interp, t, TCL_VOLATILE); + __os_free(NULL, t); + } + break; + case DBMNEXT: + /* + * 0 or 1 arg for this. Error if different. + */ + _debug_check(); + if (flag == DBTCL_DBM) { + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + if ((ret = _CopyObjBytes( + interp, objv[2], &ktmp, &size, &freekey)) != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "dbm fetch"); + goto out; + } + key.dsize = (int)size; + key.dptr = (char *)ktmp; + data = nextkey(key); + } else if (flag == DBTCL_NDBM) { + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + data = dbm_nextkey(dbm); + } else { + Tcl_SetResult(interp, + "Bad interface flag for command", TCL_STATIC); + return (TCL_ERROR); + } + if (data.dptr == NULL || + (ret = __os_malloc(NULL, (size_t)data.dsize + 1, &t)) != 0) + Tcl_SetResult(interp, "-1", TCL_STATIC); + else { + memcpy(t, data.dptr, (size_t)data.dsize); + t[data.dsize] = '\0'; + Tcl_SetResult(interp, t, TCL_VOLATILE); + __os_free(NULL, t); + } + break; + } + +out: if (dtmp != NULL && freedata) + __os_free(NULL, dtmp); + if (ktmp != NULL && freekey) + __os_free(NULL, ktmp); + return (result); +} + +/* + * ndbm_Cmd -- + * Implements the "ndbm" widget. + * + * PUBLIC: int ndbm_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); + */ +int +ndbm_Cmd(clientData, interp, objc, objv) + ClientData clientData; /* DB handle */ + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *ndbcmds[] = { + "clearerr", + "close", + "delete", + "dirfno", + "error", + "fetch", + "firstkey", + "nextkey", + "pagfno", + "rdonly", + "store", + NULL + }; + enum ndbcmds { + NDBCLRERR, + NDBCLOSE, + NDBDELETE, + NDBDIRFNO, + NDBERR, + NDBFETCH, + NDBFIRST, + NDBNEXT, + NDBPAGFNO, + NDBRDONLY, + NDBSTORE + }; + DBM *dbp; + DBTCL_INFO *dbip; + Tcl_Obj *res; + int cmdindex, result, ret; + + Tcl_ResetResult(interp); + dbp = (DBM *)clientData; + dbip = _PtrToInfo((void *)dbp); + result = TCL_OK; + if (objc <= 1) { + Tcl_WrongNumArgs(interp, 1, objv, "command cmdargs"); + return (TCL_ERROR); + } + if (dbp == NULL) { + Tcl_SetResult(interp, "NULL db pointer", TCL_STATIC); + return (TCL_ERROR); + } + if (dbip == NULL) { + Tcl_SetResult(interp, "NULL db info pointer", TCL_STATIC); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the dbcmds + * defined above. + */ + if (Tcl_GetIndexFromObj(interp, + objv[1], ndbcmds, "command", TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + + res = NULL; + switch ((enum ndbcmds)cmdindex) { + case NDBCLOSE: + _debug_check(); + dbm_close(dbp); + (void)Tcl_DeleteCommand(interp, dbip->i_name); + _DeleteInfo(dbip); + res = Tcl_NewIntObj(0); + break; + case NDBDELETE: + case NDBFETCH: + case NDBFIRST: + case NDBNEXT: + case NDBSTORE: + result = bdb_DbmCommand(interp, objc, objv, DBTCL_NDBM, dbp); + break; + case NDBCLRERR: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbm_clearerr(dbp); + if (ret) + (void)_ReturnSetup( + interp, ret, DB_RETOK_STD(ret), "clearerr"); + else + res = Tcl_NewIntObj(ret); + break; + case NDBDIRFNO: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbm_dirfno(dbp); + res = Tcl_NewIntObj(ret); + break; + case NDBPAGFNO: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbm_pagfno(dbp); + res = Tcl_NewIntObj(ret); + break; + case NDBERR: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbm_error(dbp); + Tcl_SetErrno(ret); + Tcl_SetResult(interp, + (char *)Tcl_PosixError(interp), TCL_STATIC); + break; + case NDBRDONLY: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbm_rdonly(dbp); + if (ret) + (void)_ReturnSetup( + interp, ret, DB_RETOK_STD(ret), "rdonly"); + else + res = Tcl_NewIntObj(ret); + break; + } + + /* + * Only set result if we have a res. Otherwise, lower functions have + * already done so. + */ + if (result == TCL_OK && res) + Tcl_SetObjResult(interp, res); + return (result); +} +#endif /* CONFIG_TEST */ diff --git a/lang/tcl/tcl_db.c b/lang/tcl/tcl_db.c new file mode 100644 index 00000000..6276f996 --- /dev/null +++ b/lang/tcl/tcl_db.c @@ -0,0 +1,4050 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/tcl_db.h" + +/* + * Prototypes for procedures defined later in this file: + */ +static int tcl_DbAssociate __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB *)); +static int tcl_DbAssociateForeign __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB *)); +static int tcl_DbClose __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB *, DBTCL_INFO *)); +static int tcl_DbDelete __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB *)); +static int tcl_DbGet __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB *, int)); +#ifdef CONFIG_TEST +static int tcl_DbKeyRange __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB *)); +#endif +static int tcl_DbPut __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB *)); +static int tcl_DbStat __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB *)); +static int tcl_DbStatPrint __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB *)); +static int tcl_DbTruncate __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB *)); +#ifdef CONFIG_TEST +static int tcl_DbCompact __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB *)); +static int tcl_DbCompactStat __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB *)); +#endif +static int tcl_DbCursor __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB *, DBC **)); +static int tcl_DbJoin __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB *, DBC **)); +static int tcl_DbGetFlags __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB *)); +static int tcl_DbGetOpenFlags __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB *)); +static int tcl_DbGetjoin __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB *)); +static int tcl_DbCount __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB *)); +static int tcl_second_call __P((DB *, const DBT *, const DBT *, DBT *)); +static int tcl_foreign_call __P((DB *, const DBT *, DBT *, + const DBT *, int *)); + +/* + * _DbInfoDelete -- + * + * PUBLIC: void _DbInfoDelete __P((Tcl_Interp *, DBTCL_INFO *)); + */ +void +_DbInfoDelete(interp, dbip) + Tcl_Interp *interp; + DBTCL_INFO *dbip; +{ + DBTCL_INFO *nextp, *p; + /* + * First we have to close any open cursors. Then we close + * our db. + */ + for (p = LIST_FIRST(&__db_infohead); p != NULL; p = nextp) { + nextp = LIST_NEXT(p, entries); + /* + * Check if this is a cursor info structure and if + * it is, if it belongs to this DB. If so, remove + * its commands and info structure. + */ + if (p->i_parent == dbip && p->i_type == I_DBC) { + (void)Tcl_DeleteCommand(interp, p->i_name); + _DeleteInfo(p); + } + } + (void)Tcl_DeleteCommand(interp, dbip->i_name); + _DeleteInfo(dbip); +} + +/* + * + * PUBLIC: int db_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); + * + * db_Cmd -- + * Implements the "db" widget. + */ +int +db_Cmd(clientData, interp, objc, objv) + ClientData clientData; /* DB handle */ + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *dbcmds[] = { +#ifdef CONFIG_TEST + "keyrange", + "pget", + "test", + "compact", + "compact_stat", +#endif + "associate", + "associate_foreign", + "close", + "count", + "cursor", + "del", + "get", + "get_bt_minkey", + "get_cachesize", + "get_dbname", + "get_encrypt_flags", + "get_env", + "get_errpfx", + "get_flags", + "get_h_ffactor", + "get_h_nelem", + "get_join", + "get_lorder", + "get_open_flags", + "get_pagesize", + "get_q_extentsize", + "get_re_delim", + "get_re_len", + "get_re_pad", + "get_re_source", + "get_type", + "is_byteswapped", + "join", + "put", + "stat", + "stat_print", + "sync", + "truncate", + NULL + }; + enum dbcmds { +#ifdef CONFIG_TEST + DBKEYRANGE, + DBPGET, + DBTEST, + DBCOMPACT, + DBCOMPACT_STAT, +#endif + DBASSOCIATE, + DBASSOCFOREIGN, + DBCLOSE, + DBCOUNT, + DBCURSOR, + DBDELETE, + DBGET, + DBGETBTMINKEY, + DBGETCACHESIZE, + DBGETDBNAME, + DBGETENCRYPTFLAGS, + DBGETENV, + DBGETERRPFX, + DBGETFLAGS, + DBGETHFFACTOR, + DBGETHNELEM, + DBGETJOIN, + DBGETLORDER, + DBGETOPENFLAGS, + DBGETPAGESIZE, + DBGETQEXTENTSIZE, + DBGETREDELIM, + DBGETRELEN, + DBGETREPAD, + DBGETRESOURCE, + DBGETTYPE, + DBSWAPPED, + DBJOIN, + DBPUT, + DBSTAT, + DBSTATPRINT, + DBSYNC, + DBTRUNCATE + }; + DB *dbp, *hrdbp, *hsdbp; + DB_ENV *dbenv; + DBC *dbc; + DBTCL_INFO *dbip, *ip; + DBTYPE type; + Tcl_Obj *res, *myobjv[3]; + int cmdindex, intval, ncache, result, ret; + char newname[MSG_SIZE]; + u_int32_t bytes, gbytes, value; + const char *strval, *filename, *dbname, *envid; + + Tcl_ResetResult(interp); + dbp = (DB *)clientData; + dbip = _PtrToInfo((void *)dbp); + memset(newname, 0, MSG_SIZE); + result = TCL_OK; + if (objc <= 1) { + Tcl_WrongNumArgs(interp, 1, objv, "command cmdargs"); + return (TCL_ERROR); + } + if (dbp == NULL) { + Tcl_SetResult(interp, "NULL db pointer", TCL_STATIC); + return (TCL_ERROR); + } + if (dbip == NULL) { + Tcl_SetResult(interp, "NULL db info pointer", TCL_STATIC); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the dbcmds + * defined above. + */ + if (Tcl_GetIndexFromObj(interp, + objv[1], dbcmds, "command", TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + + res = NULL; + switch ((enum dbcmds)cmdindex) { +#ifdef CONFIG_TEST + case DBKEYRANGE: + result = tcl_DbKeyRange(interp, objc, objv, dbp); + break; + case DBPGET: + result = tcl_DbGet(interp, objc, objv, dbp, 1); + break; + case DBTEST: + result = tcl_EnvTest(interp, objc, objv, dbp->dbenv); + break; + + case DBCOMPACT: + result = tcl_DbCompact(interp, objc, objv, dbp); + break; + + case DBCOMPACT_STAT: + result = tcl_DbCompactStat(interp, objc, objv, dbp); + break; + +#endif + case DBASSOCIATE: + result = tcl_DbAssociate(interp, objc, objv, dbp); + break; + case DBASSOCFOREIGN: + result = tcl_DbAssociateForeign(interp, objc, objv, dbp); + break; + case DBCLOSE: + result = tcl_DbClose(interp, objc, objv, dbp, dbip); + break; + case DBDELETE: + result = tcl_DbDelete(interp, objc, objv, dbp); + break; + case DBGET: + result = tcl_DbGet(interp, objc, objv, dbp, 0); + break; + case DBPUT: + result = tcl_DbPut(interp, objc, objv, dbp); + break; + case DBCOUNT: + result = tcl_DbCount(interp, objc, objv, dbp); + break; + case DBSWAPPED: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbp->get_byteswapped(dbp, &intval); + res = Tcl_NewIntObj(intval); + break; + case DBGETTYPE: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbp->get_type(dbp, &type); + if (type == DB_BTREE) + res = NewStringObj("btree", strlen("btree")); + else if (type == DB_HASH) + res = NewStringObj("hash", strlen("hash")); + else if (type == DB_RECNO) + res = NewStringObj("recno", strlen("recno")); + else if (type == DB_QUEUE) + res = NewStringObj("queue", strlen("queue")); + else if (type == DB_HEAP) + res = NewStringObj("heap", strlen("heap")); + else { + Tcl_SetResult(interp, + "db gettype: Returned unknown type\n", TCL_STATIC); + result = TCL_ERROR; + } + break; + case DBSTAT: + result = tcl_DbStat(interp, objc, objv, dbp); + break; + case DBSTATPRINT: + result = tcl_DbStatPrint(interp, objc, objv, dbp); + break; + case DBSYNC: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbp->sync(dbp, 0); + res = Tcl_NewIntObj(ret); + if (ret != 0) { + Tcl_SetObjResult(interp, res); + result = TCL_ERROR; + } + + /* If we are heap, we have more work to do. */ + ret = dbp->get_type(dbp, &type); + if (type == DB_HEAP) { + hrdbp = dbip->hrdbp; + hsdbp = dbip->hsdbp; + + /* sync the associated dbs also */ + ret = dbp->sync(hrdbp, 0); + res = Tcl_NewIntObj(ret); + if (ret != 0) { + Tcl_SetObjResult(interp, res); + result = TCL_ERROR; + } + + ret = dbp->sync(hsdbp, 0); + res = Tcl_NewIntObj(ret); + if (ret != 0) { + Tcl_SetObjResult(interp, res); + result = TCL_ERROR; + } + } + break; + case DBCURSOR: + snprintf(newname, sizeof(newname), + "%s.c%d", dbip->i_name, dbip->i_dbdbcid); + ip = _NewInfo(interp, NULL, newname, I_DBC); + if (ip != NULL) { + result = tcl_DbCursor(interp, objc, objv, dbp, &dbc); + if (result == TCL_OK) { + dbip->i_dbdbcid++; + ip->i_parent = dbip; + (void)Tcl_CreateObjCommand(interp, newname, + (Tcl_ObjCmdProc *)dbc_Cmd, + (ClientData)dbc, NULL); + res = NewStringObj(newname, strlen(newname)); + _SetInfoData(ip, dbc); + } else + _DeleteInfo(ip); + } else { + Tcl_SetResult(interp, + "Could not set up info", TCL_STATIC); + result = TCL_ERROR; + } + break; + case DBJOIN: + snprintf(newname, sizeof(newname), + "%s.c%d", dbip->i_name, dbip->i_dbdbcid); + ip = _NewInfo(interp, NULL, newname, I_DBC); + if (ip != NULL) { + result = tcl_DbJoin(interp, objc, objv, dbp, &dbc); + if (result == TCL_OK) { + dbip->i_dbdbcid++; + ip->i_parent = dbip; + (void)Tcl_CreateObjCommand(interp, newname, + (Tcl_ObjCmdProc *)dbc_Cmd, + (ClientData)dbc, NULL); + res = NewStringObj(newname, strlen(newname)); + _SetInfoData(ip, dbc); + } else + _DeleteInfo(ip); + } else { + Tcl_SetResult(interp, + "Could not set up info", TCL_STATIC); + result = TCL_ERROR; + } + break; + case DBGETBTMINKEY: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbp->get_bt_minkey(dbp, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db get_bt_minkey")) == TCL_OK) + res = Tcl_NewIntObj((int)value); + break; + case DBGETCACHESIZE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbp->get_cachesize(dbp, &gbytes, &bytes, &ncache); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db get_cachesize")) == TCL_OK) { + myobjv[0] = Tcl_NewIntObj((int)gbytes); + myobjv[1] = Tcl_NewIntObj((int)bytes); + myobjv[2] = Tcl_NewIntObj((int)ncache); + res = Tcl_NewListObj(3, myobjv); + } + break; + case DBGETDBNAME: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbp->get_dbname(dbp, &filename, &dbname); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db get_dbname")) == TCL_OK) { + myobjv[0] = filename == NULL ? NewStringObj("", 0) : + NewStringObj(filename, strlen(filename)); + myobjv[1] = dbname == NULL ? NewStringObj("", 0) : + NewStringObj(dbname, strlen(dbname)); + res = Tcl_NewListObj(2, myobjv); + } + break; + case DBGETENCRYPTFLAGS: + result = tcl_EnvGetEncryptFlags(interp, objc, objv, dbp->dbenv); + break; + case DBGETENV: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + dbenv = dbp->get_env(dbp); + if (dbenv != NULL && (ip = _PtrToInfo(dbenv)) != NULL) { + envid = ip->i_name; + res = NewStringObj(envid, strlen(envid)); + } else + Tcl_ResetResult(interp); + break; + case DBGETERRPFX: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + dbp->get_errpfx(dbp, &strval); + res = NewStringObj(strval, strlen(strval)); + break; + case DBGETFLAGS: + result = tcl_DbGetFlags(interp, objc, objv, dbp); + break; + case DBGETHFFACTOR: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbp->get_h_ffactor(dbp, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db get_h_ffactor")) == TCL_OK) + res = Tcl_NewIntObj((int)value); + break; + case DBGETHNELEM: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbp->get_h_nelem(dbp, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db get_h_nelem")) == TCL_OK) + res = Tcl_NewIntObj((int)value); + break; + case DBGETJOIN: + result = tcl_DbGetjoin(interp, objc, objv, dbp); + break; + case DBGETLORDER: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbp->get_lorder(dbp, &intval); + res = Tcl_NewIntObj(intval); + break; + case DBGETOPENFLAGS: + result = tcl_DbGetOpenFlags(interp, objc, objv, dbp); + break; + case DBGETPAGESIZE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbp->get_pagesize(dbp, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db get_pagesize")) == TCL_OK) + res = Tcl_NewIntObj((int)value); + break; + case DBGETQEXTENTSIZE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbp->get_q_extentsize(dbp, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db get_q_extentsize")) == TCL_OK) + res = Tcl_NewIntObj((int)value); + break; + case DBGETREDELIM: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbp->get_re_delim(dbp, &intval); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db get_re_delim")) == TCL_OK) + res = Tcl_NewIntObj(intval); + break; + case DBGETRELEN: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbp->get_re_len(dbp, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db get_re_len")) == TCL_OK) + res = Tcl_NewIntObj((int)value); + break; + case DBGETREPAD: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbp->get_re_pad(dbp, &intval); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db get_re_pad")) == TCL_OK) + res = Tcl_NewIntObj((int)intval); + break; + case DBGETRESOURCE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbp->get_re_source(dbp, &strval); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db get_re_source")) == TCL_OK) + res = NewStringObj(strval, strlen(strval)); + break; + case DBTRUNCATE: + result = tcl_DbTruncate(interp, objc, objv, dbp); + break; + } + /* + * Only set result if we have a res. Otherwise, lower + * functions have already done so. + */ + if (result == TCL_OK && res) + Tcl_SetObjResult(interp, res); + return (result); +} + +/* + * tcl_db_stat -- + */ +static int +tcl_DbStat(interp, objc, objv, dbp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ +{ + static const char *dbstatopts[] = { +#ifdef CONFIG_TEST + "-read_committed", + "-read_uncommitted", +#endif + "-faststat", + "-txn", + NULL + }; + enum dbstatopts { +#ifdef CONFIG_TEST + DBCUR_READ_COMMITTED, + DBCUR_READ_UNCOMMITTED, +#endif + DBCUR_FASTSTAT, + DBCUR_TXN + }; + DBTYPE type; + DB_BTREE_STAT *bsp; + DB_HASH_STAT *hsp; + DB_HEAP_STAT *hpsp; + DB_QUEUE_STAT *qsp; + DB_TXN *txn; + Tcl_Obj *res, *flaglist, *myobjv[2]; + u_int32_t flag; + int i, optindex, result, ret; + char *arg, msg[MSG_SIZE]; + void *sp; + + result = TCL_OK; + flag = 0; + txn = NULL; + sp = NULL; + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbstatopts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum dbstatopts)optindex) { +#ifdef CONFIG_TEST + case DBCUR_READ_COMMITTED: + flag |= DB_READ_COMMITTED; + break; + case DBCUR_READ_UNCOMMITTED: + flag |= DB_READ_UNCOMMITTED; + break; +#endif + case DBCUR_FASTSTAT: + flag |= DB_FAST_STAT; + break; + case DBCUR_TXN: + if (i == objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Stat: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto error; + + _debug_check(); + ret = dbp->stat(dbp, txn, &sp, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db stat"); + if (result == TCL_ERROR) + return (result); + + (void)dbp->get_type(dbp, &type); + /* + * Have our stats, now construct the name value + * list pairs and free up the memory. + */ + res = Tcl_NewObj(); + + /* + * MAKE_STAT_LIST assumes 'res' and 'error' label. + */ + if (type == DB_HASH) { + hsp = (DB_HASH_STAT *)sp; + MAKE_STAT_LIST("Magic", hsp->hash_magic); + MAKE_STAT_LIST("Version", hsp->hash_version); + MAKE_STAT_LIST("Page size", hsp->hash_pagesize); + MAKE_STAT_LIST("Page count", hsp->hash_pagecnt); + MAKE_STAT_LIST("Number of keys", hsp->hash_nkeys); + MAKE_STAT_LIST("Number of records", hsp->hash_ndata); + MAKE_STAT_LIST("Fill factor", hsp->hash_ffactor); + MAKE_STAT_LIST("Buckets", hsp->hash_buckets); + if (flag != DB_FAST_STAT) { + MAKE_STAT_LIST("Free pages", hsp->hash_free); + MAKE_WSTAT_LIST("Bytes free", hsp->hash_bfree); + MAKE_STAT_LIST("Number of big pages", + hsp->hash_bigpages); + MAKE_STAT_LIST("Big pages bytes free", + hsp->hash_big_bfree); + MAKE_STAT_LIST("Overflow pages", hsp->hash_overflows); + MAKE_STAT_LIST("Overflow bytes free", + hsp->hash_ovfl_free); + MAKE_STAT_LIST("Duplicate pages", hsp->hash_dup); + MAKE_STAT_LIST("Duplicate pages bytes free", + hsp->hash_dup_free); + } + } else if (type == DB_HEAP) { + hpsp = (DB_HEAP_STAT *)sp; + MAKE_STAT_LIST("Magic", hpsp->heap_magic); + MAKE_STAT_LIST("Version", hpsp->heap_version); + MAKE_STAT_LIST("Page size", hpsp->heap_pagesize); + MAKE_STAT_LIST("Page count", hpsp->heap_pagecnt); + MAKE_STAT_LIST("Number of records", hpsp->heap_nrecs); + } else if (type == DB_QUEUE) { + qsp = (DB_QUEUE_STAT *)sp; + MAKE_STAT_LIST("Magic", qsp->qs_magic); + MAKE_STAT_LIST("Version", qsp->qs_version); + MAKE_STAT_LIST("Page size", qsp->qs_pagesize); + MAKE_STAT_LIST("Extent size", qsp->qs_extentsize); + MAKE_STAT_LIST("Number of keys", qsp->qs_nkeys); + MAKE_STAT_LIST("Number of records", qsp->qs_ndata); + MAKE_STAT_LIST("Record length", qsp->qs_re_len); + MAKE_STAT_LIST("Record pad", qsp->qs_re_pad); + MAKE_STAT_LIST("First record number", qsp->qs_first_recno); + MAKE_STAT_LIST("Last record number", qsp->qs_cur_recno); + if (flag != DB_FAST_STAT) { + MAKE_STAT_LIST("Number of pages", qsp->qs_pages); + MAKE_WSTAT_LIST("Bytes free", qsp->qs_pgfree); + } + } else { /* BTREE and RECNO are same stats */ + bsp = (DB_BTREE_STAT *)sp; + MAKE_STAT_LIST("Magic", bsp->bt_magic); + MAKE_STAT_LIST("Version", bsp->bt_version); + MAKE_STAT_LIST("Number of keys", bsp->bt_nkeys); + MAKE_STAT_LIST("Number of records", bsp->bt_ndata); + MAKE_STAT_LIST("Minimum keys per page", bsp->bt_minkey); + MAKE_STAT_LIST("Fixed record length", bsp->bt_re_len); + MAKE_STAT_LIST("Record pad", bsp->bt_re_pad); + MAKE_STAT_LIST("Page size", bsp->bt_pagesize); + MAKE_STAT_LIST("Page count", bsp->bt_pagecnt); + if (flag != DB_FAST_STAT) { + MAKE_STAT_LIST("Levels", bsp->bt_levels); + MAKE_STAT_LIST("Internal pages", bsp->bt_int_pg); + MAKE_STAT_LIST("Leaf pages", bsp->bt_leaf_pg); + MAKE_STAT_LIST("Duplicate pages", bsp->bt_dup_pg); + MAKE_STAT_LIST("Overflow pages", bsp->bt_over_pg); + MAKE_STAT_LIST("Empty pages", bsp->bt_empty_pg); + MAKE_STAT_LIST("Pages on freelist", bsp->bt_free); + MAKE_STAT_LIST("Internal pages bytes free", + bsp->bt_int_pgfree); + MAKE_STAT_LIST("Leaf pages bytes free", + bsp->bt_leaf_pgfree); + MAKE_STAT_LIST("Duplicate pages bytes free", + bsp->bt_dup_pgfree); + MAKE_STAT_LIST("Bytes free in overflow pages", + bsp->bt_over_pgfree); + } + } + + /* + * Construct a {name {flag1 flag2 ... flagN}} list for the + * dbp flags. These aren't access-method dependent, but they + * include all the interesting flags, and the integer value + * isn't useful from Tcl--return the strings instead. + */ + myobjv[0] = NewStringObj("Flags", strlen("Flags")); + myobjv[1] = _GetFlagsList(interp, dbp->flags, __db_get_flags_fn()); + flaglist = Tcl_NewListObj(2, myobjv); + if (flaglist == NULL) { + result = TCL_ERROR; + goto error; + } + if ((result = + Tcl_ListObjAppendElement(interp, res, flaglist)) != TCL_OK) + goto error; + + Tcl_SetObjResult(interp, res); +error: + if (sp != NULL) + __os_ufree(dbp->env, sp); + return (result); +} + +/* + * tcl_db_stat_print -- + */ +static int +tcl_DbStatPrint(interp, objc, objv, dbp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ +{ + static const char *dbstatprtopts[] = { + "-fast", + "-all", + NULL + }; + enum dbstatprtopts { + DBSTATPRTFAST, + DBSTATPRTALL + }; + u_int32_t flag; + int i, optindex, result, ret; + + result = TCL_OK; + flag = 0; + i = 2; + + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbstatprtopts, + "option", TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum dbstatprtopts)optindex) { + case DBSTATPRTFAST: + flag |= DB_FAST_STAT; + break; + case DBSTATPRTALL: + flag |= DB_STAT_ALL; + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto error; + + _debug_check(); + ret = dbp->stat_print(dbp, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db stat_print"); +error: + return (result); +} + +/* + * tcl_db_close -- + */ +static int +tcl_DbClose(interp, objc, objv, dbp, dbip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ + DBTCL_INFO *dbip; /* Info pointer */ +{ + static const char *dbclose[] = { + "-nosync", "--", NULL + }; + enum dbclose { + TCL_DBCLOSE_NOSYNC, + TCL_DBCLOSE_ENDARG + }; + DB *recdbp, *secdbp; + DBTCL_INFO *rdbip, *sdbip; + u_int32_t flag; + int endarg, i, optindex, result, ret; + char *arg; + + result = TCL_OK; + endarg = 0; + flag = 0; + if (objc > 4) { + Tcl_WrongNumArgs(interp, 2, objv, "?-nosync?"); + return (TCL_ERROR); + } + + for (i = 2; i < objc; ++i) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbclose, + "option", TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') + return (IS_HELP(objv[i])); + else + Tcl_ResetResult(interp); + break; + } + switch ((enum dbclose)optindex) { + case TCL_DBCLOSE_NOSYNC: + flag = DB_NOSYNC; + break; + case TCL_DBCLOSE_ENDARG: + endarg = 1; + break; + } + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + return (result); + if (endarg) + break; + } + + /* If it looks like there might be aux dbs, try to close them. */ + recdbp = ((DBTCL_INFO *)dbp->api_internal)->hrdbp; + secdbp = ((DBTCL_INFO *)dbp->api_internal)->hsdbp; + if (recdbp != NULL && secdbp != NULL) { + rdbip = recdbp->api_internal; + _DbInfoDelete(interp, rdbip); + recdbp->api_internal = NULL; + ret = recdbp->close(recdbp, flag); + + sdbip = secdbp->api_internal; + _DbInfoDelete(interp, sdbip); + secdbp->api_internal = NULL; + ret = secdbp->close(secdbp, flag); + result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "db close"); + } + + if (dbip->i_cdata != NULL) + __os_free(dbp->env, dbip->i_cdata); + _DbInfoDelete(interp, dbip); + _debug_check(); + + /* Paranoia. */ + dbp->api_internal = NULL; + + ret = (dbp)->close(dbp, flag); + + /* As long as the 1st close above was ok, then we check this one. */ + if (result == TCL_OK) + result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "db close"); + + return (result); +} + +/* + * tcl_db_put -- + */ +static int +tcl_DbPut(interp, objc, objv, dbp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ +{ + static const char *dbputopts[] = { +#ifdef CONFIG_TEST + "-nodupdata", +#endif + "-append", + "-multiple", + "-multiple_key", + "-nooverwrite", + "-overwritedup", + "-partial", + "-txn", + NULL + }; + enum dbputopts { +#ifdef CONFIG_TEST + DBGET_NODUPDATA, +#endif + DBPUT_APPEND, + DBPUT_MULTIPLE, + DBPUT_MULTIPLE_KEY, + DBPUT_NOOVER, + DBPUT_OVER, + DBPUT_PART, + DBPUT_TXN + }; + static const char *dbputapp[] = { + "-append", + "-multiple_key", + NULL + }; + enum dbputapp { DBPUT_APPEND0, DBPUT_MULTIPLE_KEY0 }; + DBT data, hkey, key; + DBTYPE type; + DB *recdbp; + DB_HEAP_RID rid; + DB_TXN *txn; + Tcl_Obj **delemv, **elemv, *res; + void *dtmp, *ktmp, *ptr; + db_recno_t recno; + u_int32_t flag, hflag, multiflag; + int delemc, elemc, end, freekey, freedata, skiprecno; + int dlen, klen, i, optindex, result, ret; + char *arg, msg[MSG_SIZE]; + + txn = NULL; + result = TCL_OK; + flag = hflag = multiflag = 0; + if (objc <= 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?-args? ?key? data"); + return (TCL_ERROR); + } + + dtmp = ktmp = NULL; + freekey = freedata = skiprecno = 0; + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + COMPQUIET(recno, 0); + COMPQUIET(recdbp, NULL); + + /* + * If it is a QUEUE or RECNO database, the key is a record number + * and must be setup up to contain a db_recno_t. Otherwise the + * key is a "string". + */ + (void)dbp->get_type(dbp, &type); + + /* + * We need to determine where the end of required args are. If we are + * using a QUEUE/RECNO/HEAP db and -append, or -multiple_key + * is specified, then there is just one req arg (data). Otherwise + * there are two (key data). + * + * We preparse the list to determine this since we need to know + * to properly check # of args for other options below. + */ + end = objc - 2; + i = 2; + while (i < objc - 1) { + if (Tcl_GetIndexFromObj(interp, objv[i++], dbputapp, + "option", TCL_EXACT, &optindex) != TCL_OK) + continue; + switch ((enum dbputapp)optindex) { + case DBPUT_APPEND0: + case DBPUT_MULTIPLE_KEY0: + end = objc - 1; + break; + } + } + Tcl_ResetResult(interp); + + /* + * Get the command name index from the object based on the options + * defined above. + */ + i = 2; + while (i < end) { + if (Tcl_GetIndexFromObj(interp, objv[i], + dbputopts, "option", TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(objv[i])); + i++; + switch ((enum dbputopts)optindex) { +#ifdef CONFIG_TEST + case DBGET_NODUPDATA: + FLAG_CHECK(flag); + flag = DB_NODUPDATA; + break; +#endif + case DBPUT_TXN: + if (i > (end - 1)) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Put: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + case DBPUT_APPEND: + FLAG_CHECK(flag); + flag = DB_APPEND; + break; + case DBPUT_MULTIPLE: + FLAG_CHECK(multiflag); + multiflag = DB_MULTIPLE; + if (type == DB_HEAP) { + Tcl_SetResult(interp, + "-multiple not supported", TCL_STATIC); + result = TCL_ERROR; + } + break; + case DBPUT_MULTIPLE_KEY: + FLAG_CHECK(multiflag); + multiflag = DB_MULTIPLE_KEY; + if (type == DB_HEAP) { + Tcl_SetResult(interp, + "-multiple_key not supported", TCL_STATIC); + result = TCL_ERROR; + } + break; + case DBPUT_NOOVER: + FLAG_CHECK(flag); + flag = DB_NOOVERWRITE; + break; + case DBPUT_OVER: + FLAG_CHECK(flag); + flag = DB_OVERWRITE_DUP; + break; + case DBPUT_PART: + if (i > (end - 1)) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-partial {offset length}?"); + result = TCL_ERROR; + break; + } + /* + * Get sublist as {offset length} + */ + result = Tcl_ListObjGetElements(interp, objv[i++], + &elemc, &elemv); + if (elemc != 2) { + Tcl_SetResult(interp, + "List must be {offset length}", TCL_STATIC); + result = TCL_ERROR; + break; + } + data.flags = DB_DBT_PARTIAL; + result = _GetUInt32(interp, elemv[0], &data.doff); + if (result != TCL_OK) + break; + result = _GetUInt32(interp, elemv[1], &data.dlen); + /* + * NOTE: We don't check result here because all we'd + * do is break anyway, and we are doing that. If you + * add code here, you WILL need to add the check + * for result. (See the check for save.doff, a few + * lines above and copy that.) + */ + break; + } + if (result != TCL_OK) + break; + } + + if (result == TCL_ERROR) + return (result); + + if (multiflag == DB_MULTIPLE) { + /* + * To work out how big a buffer is needed, we first need to + * find out the total length of the data and the number of data + * items (elemc). + */ + ktmp = Tcl_GetByteArrayFromObj(objv[objc - 2], &klen); + result = Tcl_ListObjGetElements(interp, objv[objc - 2], + &elemc, &elemv); + if (result != TCL_OK) + return (result); + + dtmp = Tcl_GetByteArrayFromObj(objv[objc - 1], &dlen); + result = Tcl_ListObjGetElements(interp, objv[objc - 1], + &delemc, &delemv); + if (result != TCL_OK) + return (result); + + if (elemc < delemc) + delemc = elemc; + else + elemc = delemc; + + memset(&key, 0, sizeof(key)); + key.ulen = DB_ALIGN((u_int32_t)klen + + (u_int32_t)elemc * sizeof(u_int32_t) * 2, 1024UL); + key.flags = DB_DBT_USERMEM | DB_DBT_BULK; + if ((ret = __os_malloc(dbp->env, key.ulen, &key.data)) != 0) + return (ret); + freekey = 1; + + memset(&data, 0, sizeof(data)); + data.ulen = DB_ALIGN((u_int32_t)dlen + + (u_int32_t)delemc * sizeof(u_int32_t) * 2, 1024UL); + data.flags = DB_DBT_USERMEM | DB_DBT_BULK; + if ((ret = __os_malloc(dbp->env, data.ulen, &data.data)) != 0) + return (ret); + freedata = 1; + + if (type == DB_QUEUE || type == DB_RECNO) { + DB_MULTIPLE_RECNO_WRITE_INIT(ptr, &key); + for (i = 0; i < elemc; i++) { + result = _GetUInt32(interp, elemv[i], &recno); + DB_MULTIPLE_RECNO_WRITE_NEXT(ptr, &key, recno, + dtmp, 0); + DB_ASSERT(dbp->env, ptr != NULL); + } + } else { + DB_MULTIPLE_WRITE_INIT(ptr, &key); + for (i = 0; i < elemc; i++) { + ktmp = Tcl_GetByteArrayFromObj(elemv[i], &klen); + DB_MULTIPLE_WRITE_NEXT(ptr, + &key, ktmp, (u_int32_t)klen); + DB_ASSERT(dbp->env, ptr != NULL); + } + } + DB_MULTIPLE_WRITE_INIT(ptr, &data); + for (i = 0; i < elemc; i++) { + dtmp = Tcl_GetByteArrayFromObj(delemv[i], &dlen); + DB_MULTIPLE_WRITE_NEXT(ptr, + &data, dtmp, (u_int32_t)dlen); + DB_ASSERT(dbp->env, ptr != NULL); + } + } else if (multiflag == DB_MULTIPLE_KEY) { + /* + * To work out how big a buffer is needed, we first need to + * find out the total length of the data (len) and the number + * of data items (elemc). + */ + ktmp = Tcl_GetByteArrayFromObj(objv[objc - 1], &klen); + result = Tcl_ListObjGetElements(interp, objv[objc - 1], + &elemc, &elemv); + if (result != TCL_OK) + return (result); + + memset(&key, 0, sizeof(key)); + key.ulen = DB_ALIGN((u_int32_t)klen + + (u_int32_t)elemc * sizeof(u_int32_t) * 2, 1024UL); + key.flags = DB_DBT_USERMEM | DB_DBT_BULK; + if ((ret = __os_malloc(dbp->env, key.ulen, &key.data)) != 0) + return (ret); + freekey = 1; + + if (type == DB_QUEUE || type == DB_RECNO) { + DB_MULTIPLE_RECNO_WRITE_INIT(ptr, &key); + for (i = 0; i + 1 < elemc; i += 2) { + result = _GetUInt32(interp, elemv[i], &recno); + dtmp = Tcl_GetByteArrayFromObj(elemv[i + 1], + &dlen); + DB_MULTIPLE_RECNO_WRITE_NEXT(ptr, &key, + recno, dtmp, (u_int32_t)dlen); + DB_ASSERT(dbp->env, ptr != NULL); + } + } else { + DB_MULTIPLE_WRITE_INIT(ptr, &key); + for (i = 0; i + 1 < elemc; i += 2) { + ktmp = Tcl_GetByteArrayFromObj(elemv[i], &klen); + dtmp = Tcl_GetByteArrayFromObj(elemv[i + 1], + &dlen); + DB_MULTIPLE_KEY_WRITE_NEXT(ptr, + &key, ktmp, (u_int32_t)klen, + dtmp, (u_int32_t)dlen); + DB_ASSERT(dbp->env, ptr != NULL); + } + } + } else if (type == DB_QUEUE || type == DB_RECNO) { + /* + * If we are a recno db and we are NOT using append, then the + * 2nd last arg is the key. + */ + key.data = &recno; + key.ulen = key.size = sizeof(db_recno_t); + key.flags = DB_DBT_USERMEM; + if (flag == DB_APPEND) + recno = 0; + else { + result = _GetUInt32(interp, objv[objc-2], &recno); + if (result != TCL_OK) + return (result); + } + } else if (type == DB_HEAP) { + /* + * With heap we have 2 puts, one to the heap db and one + * to the recno db. Use the key passed in completely + * for the recno, and hkey for heap. The hkey will + * become the data for the recno db put. + */ + memset(&hkey, 0, sizeof(hkey)); + hkey.data = &rid; + hkey.ulen = hkey.size = sizeof(DB_HEAP_RID); + hkey.flags = DB_DBT_USERMEM; + rid.pgno = 0; + rid.indx = 0; + + /* set up key for recno auxiliary db */ + key.data = &recno; + key.ulen = key.size = sizeof(db_recno_t); + key.flags = DB_DBT_USERMEM; + + /* Set up the DB ptr for the recno db */ + recdbp = ((DBTCL_INFO *)dbp->api_internal)->hrdbp; + + /* + * If DB_APPEND is set, we want to append to the heap. If + * there's no flag set and the recno exists, we want to update + * the existing record (which means we need to find the heap rid + * in the recno db and we can skip the recno put.) If the + * recno does not exist, then we want to append to the heap. + */ + if (flag == DB_APPEND) { + recno = 0; + hflag = DB_APPEND; + } else { + result = _GetUInt32(interp, objv[objc-2], &recno); + if (result != TCL_OK) + return (result); + + ret = recdbp->get(recdbp, txn, &key, &hkey, 0); + if (ret == 0) { + hflag = flag; + skiprecno = 1; + } + else if (ret == DB_NOTFOUND || ret == DB_KEYEMPTY) + hflag = DB_APPEND; + else { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBPUT(ret), "db put heap"); + goto out; + } + } + + } else { + ret = _CopyObjBytes(interp, objv[objc-2], &ktmp, + &key.size, &freekey); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBPUT(ret), "db put"); + return (result); + } + key.data = ktmp; + } + + if (multiflag == 0) { + ret = _CopyObjBytes(interp, + objv[objc-1], &dtmp, &data.size, &freedata); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBPUT(ret), "db put"); + goto out; + } + data.data = dtmp; + } + _debug_check(); + if (type != DB_HEAP) { + ret = dbp->put(dbp, txn, &key, &data, flag | multiflag); + result = _ReturnSetup(interp, ret, DB_RETOK_DBPUT(ret), + "db put"); + } else { + /* Do put into heap db first, then recno db. Varieties of + * -multiple are not supported in heap (checked above), so + * multiflag has been removed from the code. + */ + ret = dbp->put(dbp, txn, &hkey, &data, hflag); + result = _ReturnSetup(interp, ret, + DB_RETOK_DBPUT(ret), "db put heap"); + if (ret) + goto out; + + /* set up data for recno put (the heaps key) and do put + * if we have not already gotten the recno record + */ + hkey.flags = DB_DBT_USERMEM; + if (!skiprecno) { + ret = recdbp->put(recdbp, txn, &key, &hkey, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_DBPUT(ret), + "db put"); + } + /* + * If for some reason the recno put does not work and the + * heap one does, lets try and delete the heap record if + * we are not in a txn -- just for consistency. Ignore the + * return value, because we want to preserve ret. + */ + if (!skiprecno && txn == NULL && ret != 0) + (void)dbp->del(dbp, txn, &hkey, 0); + } + + /* We may have a returned record number. */ + if (ret == 0 && + (type == DB_QUEUE || type == DB_RECNO || type == DB_HEAP) && + flag == DB_APPEND) { + res = Tcl_NewWideIntObj((Tcl_WideInt)recno); + Tcl_SetObjResult(interp, res); + } + + +out: if (freedata && data.data != NULL) + __os_free(dbp->env, data.data); + if (freekey && key.data != NULL) + __os_free(dbp->env, key.data); + return (result); +} + +/* + * tcl_db_get -- + */ +static int +tcl_DbGet(interp, objc, objv, dbp, ispget) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ + int ispget; /* 1 for pget, 0 for get */ +{ + static const char *dbgetopts[] = { +#ifdef CONFIG_TEST + "-data_buf_size", + "-multi", + "-nolease", + "-read_committed", + "-read_uncommitted", +#endif + "-consume", + "-consume_wait", + "-get_both", + "-glob", + "-partial", + "-recno", + "-rmw", + "-txn", + "--", + NULL + }; + enum dbgetopts { +#ifdef CONFIG_TEST + DBGET_DATA_BUF_SIZE, + DBGET_MULTI, + DBGET_NOLEASE, + DBGET_READ_COMMITTED, + DBGET_READ_UNCOMMITTED, +#endif + DBGET_CONSUME, + DBGET_CONSUME_WAIT, + DBGET_BOTH, + DBGET_GLOB, + DBGET_PART, + DBGET_RECNO, + DBGET_RMW, + DBGET_TXN, + DBGET_ENDARG + }; + DB *heapdbp, *recdbp; + DBC *dbc; + DBT key, hkey, pkey, data, rdata, save; + DBTYPE ptype, type; + DB_HEAP_RID rid; + DB_TXN *txn; + Tcl_Obj **elemv, *retlist; + db_recno_t precno, recno; + u_int32_t cflag, flag, hflag, isdup, mflag, rmw; + int elemc, end, endarg, freekey, freedata, i; + int optindex, result, ret, useglob, useprecno, userecno; + char *arg, *pattern, *prefix, msg[MSG_SIZE]; + void *dtmp, *ktmp; +#ifdef CONFIG_TEST + int bufsize, data_buf_size; +#endif + + heapdbp = recdbp = NULL; + result = TCL_OK; + freekey = freedata = 0; + cflag = endarg = flag = hflag = mflag = rmw = 0; + useglob = userecno = 0; + txn = NULL; + pattern = prefix = NULL; + dtmp = ktmp = NULL; + ptype = DB_UNKNOWN; +#ifdef CONFIG_TEST + COMPQUIET(bufsize, 0); + data_buf_size = 0; +#endif + + if (objc < 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?-args? key"); + return (TCL_ERROR); + } + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + memset(&save, 0, sizeof(save)); + + /* For the primary key in a pget call. */ + memset(&pkey, 0, sizeof(pkey)); + + /* For the key/data used in heap am. */ + memset(&hkey, 0, sizeof(hkey)); + memset(&rdata, 0, sizeof(rdata)); + + /* + * Get the command name index from the object based on the options + * defined above. + */ + i = 2; + (void)dbp->get_type(dbp, &type); + end = objc; + while (i < end) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbgetopts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') { + result = IS_HELP(objv[i]); + goto out; + } else + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum dbgetopts)optindex) { +#ifdef CONFIG_TEST + case DBGET_DATA_BUF_SIZE: + result = + Tcl_GetIntFromObj(interp, objv[i], &data_buf_size); + if (result != TCL_OK) + goto out; + i++; + break; + case DBGET_MULTI: + mflag |= DB_MULTIPLE; + result = + Tcl_GetIntFromObj(interp, objv[i], &bufsize); + if (result != TCL_OK) + goto out; + i++; + /* heap doesnt support multi yet, just catch here */ + if (type == DB_HEAP) { + Tcl_SetResult(interp, + "Heap doesnt support -multi", TCL_STATIC); + result = TCL_ERROR; + goto out; + } + break; + case DBGET_NOLEASE: + rmw |= DB_IGNORE_LEASE; + break; + case DBGET_READ_COMMITTED: + rmw |= DB_READ_COMMITTED; + break; + case DBGET_READ_UNCOMMITTED: + rmw |= DB_READ_UNCOMMITTED; + break; +#endif + case DBGET_BOTH: + /* + * Change 'end' and make sure we aren't already past + * the new end. + */ + if (i > objc - 2) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-get_both key data?"); + result = TCL_ERROR; + break; + } + end = objc - 2; + FLAG_CHECK(flag); + flag = DB_GET_BOTH; + break; + case DBGET_TXN: + if (i >= end) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Get: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + case DBGET_GLOB: + useglob = 1; + end = objc - 1; + break; + case DBGET_CONSUME: + FLAG_CHECK(flag); + flag = DB_CONSUME; + break; + case DBGET_CONSUME_WAIT: + FLAG_CHECK(flag); + flag = DB_CONSUME_WAIT; + break; + case DBGET_RECNO: + end = objc - 1; + userecno = 1; + if (type != DB_RECNO && + type != DB_QUEUE && type != DB_HEAP) { + FLAG_CHECK(flag); + flag = DB_SET_RECNO; + key.flags |= DB_DBT_MALLOC; + } + break; + case DBGET_RMW: + rmw |= DB_RMW; + break; + case DBGET_PART: + end = objc - 1; + if (i == end) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-partial {offset length}?"); + result = TCL_ERROR; + break; + } + /* + * Get sublist as {offset length} + */ + result = Tcl_ListObjGetElements(interp, objv[i++], + &elemc, &elemv); + if (elemc != 2) { + Tcl_SetResult(interp, + "List must be {offset length}", TCL_STATIC); + result = TCL_ERROR; + break; + } + save.flags = DB_DBT_PARTIAL; + result = _GetUInt32(interp, elemv[0], &save.doff); + if (result != TCL_OK) + break; + result = _GetUInt32(interp, elemv[1], &save.dlen); + /* + * NOTE: We don't check result here because all we'd + * do is break anyway, and we are doing that. If you + * add code here, you WILL need to add the check + * for result. (See the check for save.doff, a few + * lines above and copy that.) + */ + break; + case DBGET_ENDARG: + endarg = 1; + break; + } + if (result != TCL_OK) + break; + if (endarg) + break; + } + if (result != TCL_OK) + goto out; + + /* We treat heap on the tcl interface just like we do recno */ + if (type == DB_RECNO || type == DB_QUEUE || type == DB_HEAP) + userecno = 1; + + /* + * Check args we have left versus the flags we were given. + * We might have 0, 1 or 2 left. If we have 0, it must + * be DB_CONSUME*, if 2, then DB_GET_BOTH, all others should + * be 1. + */ + if (((flag == DB_CONSUME || flag == DB_CONSUME_WAIT) && i != objc) || + (flag == DB_GET_BOTH && i != objc - 2)) { + Tcl_SetResult(interp, + "Wrong number of key/data given based on flags specified\n", + TCL_STATIC); + result = TCL_ERROR; + goto out; + } else if (flag == 0 && i != objc - 1) { + Tcl_SetResult(interp, + "Wrong number of key/data given\n", TCL_STATIC); + result = TCL_ERROR; + goto out; + } + + /* Find out whether the primary key should also be a recno. */ + if (ispget && dbp->s_primary != NULL) { + (void)dbp->s_primary->get_type(dbp->s_primary, &ptype); + useprecno = ptype == DB_RECNO || + ptype == DB_QUEUE || ptype == DB_HEAP; + } else + useprecno = 0; + + /* + * Check for illegal combos of options. + */ + if (useglob && (userecno || flag == DB_SET_RECNO || + type == DB_RECNO || type == DB_QUEUE || type == DB_HEAP)) { + Tcl_SetResult(interp, + "Cannot use -glob and record numbers.\n", + TCL_STATIC); + result = TCL_ERROR; + goto out; + } +#ifdef CONFIG_TEST + if (data_buf_size != 0 && flag == DB_GET_BOTH) { + Tcl_SetResult(interp, + "Only one of -data_buf_size or -get_both can be specified.\n", + TCL_STATIC); + result = TCL_ERROR; + goto out; + } + if (data_buf_size != 0 && mflag != 0) { + Tcl_SetResult(interp, + "Only one of -data_buf_size or -multi can be specified.\n", + TCL_STATIC); + result = TCL_ERROR; + goto out; + } +#endif + if (useglob && flag == DB_GET_BOTH) { + Tcl_SetResult(interp, + "Only one of -glob or -get_both can be specified.\n", + TCL_STATIC); + result = TCL_ERROR; + goto out; + } + + if (useglob) + pattern = Tcl_GetStringFromObj(objv[objc - 1], NULL); + + /* + * This is the list we return + */ + retlist = Tcl_NewListObj(0, NULL); + save.flags |= DB_DBT_MALLOC; + + /* + *If we are using heap, either as a primary db or through a + * secondary relationship, then set up rdata. The data will + * contain a key for heap. + */ + if (type == DB_HEAP || ptype == DB_HEAP) { + hflag = flag; + rdata.ulen = sizeof(DB_HEAP_RID); + rdata.flags = DB_DBT_USERMEM; + ret = __os_malloc(dbp->env, rdata.ulen, &rdata.data); + if (ret != 0) { + Tcl_SetResult(interp, db_strerror(ret), TCL_STATIC); + return(TCL_ERROR); + } + } + + /* + * isdup is used to know if we support duplicates. If not, we + * can just do a db->get call and avoid using cursors. + */ + if ((ret = dbp->get_flags(dbp, &isdup)) != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db get"); + goto out; + } + isdup &= DB_DUP; + + /* + * If the database doesn't support duplicates or we're performing + * ops that don't require returning multiple items, use DB->get + * instead of a cursor operation. + */ + /* MJB: need to verify - do we have a case where heap could have + * a secondary of BTREE, in which case isdup could be 1. + */ + if (pattern == NULL && (isdup == 0 || mflag != 0 || +#ifdef CONFIG_TEST + data_buf_size != 0 || +#endif + flag == DB_SET_RECNO || flag == DB_GET_BOTH || + flag == DB_CONSUME || flag == DB_CONSUME_WAIT)) { +#ifdef CONFIG_TEST + if (data_buf_size == 0) { + F_CLR(&save, DB_DBT_USERMEM); + F_SET(&save, DB_DBT_MALLOC); + } else { + (void)__os_malloc( + NULL, (size_t)data_buf_size, &save.data); + save.ulen = (u_int32_t)data_buf_size; + F_CLR(&save, DB_DBT_MALLOC); + F_SET(&save, DB_DBT_USERMEM); + } +#endif + if (flag == DB_GET_BOTH) { + if (userecno) { + result = _GetUInt32(interp, + objv[(objc - 2)], &recno); + if (result == TCL_OK) { + key.data = &recno; + key.size = sizeof(db_recno_t); + } else + goto out; + } else { + /* + * Some get calls (SET_*) can change the + * key pointers. So, we need to store + * the allocated key space in a tmp. + */ + ret = _CopyObjBytes(interp, objv[objc-2], + &key.data, &key.size, &freekey); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBGET(ret), "db get"); + goto out; + } + } + ktmp = key.data; + /* + * Already checked args above. Fill in key and save. + * Save is used in the dbp->get call below to fill in + * data. + * + * If the "data" here is really a primary key--that + * is, if we're in a pget--and that primary key + * is a recno, treat it appropriately as an int. + */ + if (useprecno) { + result = _GetUInt32(interp, + objv[objc - 1], &precno); + if (result == TCL_OK) { + save.data = &precno; + save.size = sizeof(db_recno_t); + } else + goto out; + } else { + ret = _CopyObjBytes(interp, objv[objc-1], + &dtmp, &save.size, &freedata); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBGET(ret), "db get"); + goto out; + } + save.data = dtmp; + } + } else if (flag != DB_CONSUME && flag != DB_CONSUME_WAIT) { + if (userecno) { + result = _GetUInt32( + interp, objv[(objc - 1)], &recno); + if (result == TCL_OK) { + key.data = &recno; + key.size = sizeof(db_recno_t); + } else + goto out; + } else { + /* + * Some get calls (SET_*) can change the + * key pointers. So, we need to store + * the allocated key space in a tmp. + */ + ret = _CopyObjBytes(interp, objv[objc-1], + &key.data, &key.size, &freekey); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBGET(ret), "db get"); + goto out; + } + } + ktmp = key.data; +#ifdef CONFIG_TEST + if (mflag & DB_MULTIPLE) { + if ((ret = __os_malloc(dbp->env, + (size_t)bufsize, &save.data)) != 0) { + Tcl_SetResult(interp, + db_strerror(ret), TCL_STATIC); + goto out; + } + save.ulen = (u_int32_t)bufsize; + F_CLR(&save, DB_DBT_MALLOC); + F_SET(&save, DB_DBT_USERMEM); + } +#endif + } + + data = save; + + if (ispget) { + if (flag == DB_GET_BOTH) { + pkey.data = save.data; + pkey.size = save.size; + data.data = NULL; + data.size = 0; + /* + * If the primary is heap, we need to translate + * the given recno to the RID stored in the db. + */ + if (ptype == DB_HEAP) { + /* clear all flags for heap access */ + hflag = 0; + recdbp = ((DBTCL_INFO *) + dbp->s_primary->api_internal)->hrdbp; + ret = recdbp->get(recdbp, txn, + &pkey, &rdata, hflag | rmw | mflag); + if (ret != 0) { + result = _ReturnSetup(interp, + ret, DB_RETOK_DBGET(ret), + "db get"); + goto out; + } + } + } + F_SET(&pkey, DB_DBT_MALLOC); + _debug_check(); + /* + * In case data has DB_DBT_PARTIAL set, we want + * to use rdata if we are heap. + */ + ret = dbp->pget(dbp, txn, &key, + ptype == DB_HEAP ? &rdata : &pkey, &data, flag | rmw); + /* + * If the primary database is a heap, we need to + * translate the RID returned in rdata into a recno using + * the auxiliary database. + */ + if (ptype == DB_HEAP && flag != DB_GET_BOTH) { + rid.pgno = ((DB_HEAP_RID *)rdata.data)->pgno; + rid.indx = ((DB_HEAP_RID *)rdata.data)->indx; + hkey.data = &rid; + hkey.ulen = hkey.size = rdata.size; + hkey.flags = DB_DBT_USERMEM; + heapdbp = ((DBTCL_INFO *) + dbp->s_primary->api_internal)->hsdbp; + ret = heapdbp->get(heapdbp, + txn, &hkey, &pkey, hflag | rmw | mflag); + } + + } else if (type != DB_HEAP) { + _debug_check(); + ret = dbp->get(dbp, + txn, &key, &data, flag | rmw | mflag); + } else { + _debug_check(); + /* + * On the recno access we want to get the entire + * data as this is the heap key. Substitute + * rdata to get this data, and ignore DB_GET_BOTH + * if set at this point by using hflag. Other + * incorrect flags will get flagged within get code. + */ + recdbp = ((DBTCL_INFO *)dbp->api_internal)->hrdbp; + FLD_CLR(hflag, DB_GET_BOTH); + ret = recdbp->get(recdbp, + txn, &key, &rdata, hflag | rmw | mflag); + if (ret == 0) { + /* The rdata will be the key for the heap get. */ + rid.pgno = ((DB_HEAP_RID *)rdata.data)->pgno; + rid.indx = ((DB_HEAP_RID *)rdata.data)->indx; + hkey.data = &rid; + hkey.ulen = hkey.size = sizeof(DB_HEAP_RID); + hkey.flags = DB_DBT_USERMEM; + ret = dbp->get(dbp, + txn, &hkey, &data, flag | rmw | mflag); + } + } + result = _ReturnSetup(interp, ret, DB_RETOK_DBGET(ret), + "db get"); + if (ret == 0) { + /* + * Success. Return a list of the form {name value} + * If it was a recno in key.data, we need to convert + * into a string/object representation of that recno. + */ + if (mflag & DB_MULTIPLE) + result = _SetMultiList(interp, + retlist, &key, &data, type, flag); + else if (type == DB_RECNO || type == DB_QUEUE) + if (ispget) + result = _Set3DBTList(interp, + retlist, &key, 1, &pkey, + useprecno, &data); + else + result = _SetListRecnoElem(interp, + retlist, *(db_recno_t *)key.data, + data.data, data.size); + else if (type == DB_HEAP) + /* We return the key from the recno, and + * data from the heap. + */ + if (ispget) + result = _Set3DBTList(interp, + retlist, &key, 1, &pkey, + useprecno, &data); + else + result = _SetListRecnoElem(interp, + retlist, *(db_recno_t *)key.data, + data.data, data.size); + else { + if (ispget) + result = _Set3DBTList(interp, + retlist, &key, 0, &pkey, + useprecno, &data); + else + result = _SetListElem(interp, retlist, + key.data, key.size, + data.data, data.size); + } + } + /* + * Free space from DBT. + * + * If we set DB_DBT_MALLOC, we need to free the space if and + * only if we succeeded and if DB allocated anything (the + * pointer has changed from what we passed in). If + * DB_DBT_MALLOC is not set, this is a bulk get buffer, and + * needs to be freed no matter what. + */ + if (F_ISSET(&key, DB_DBT_MALLOC) && ret == 0 && + key.data != ktmp) + __os_ufree(dbp->env, key.data); + if (F_ISSET(&data, DB_DBT_MALLOC) && ret == 0 && + data.data != dtmp) + __os_ufree(dbp->env, data.data); + else if (!F_ISSET(&data, DB_DBT_MALLOC)) + __os_free(dbp->env, data.data); + if (ispget && ret == 0 && pkey.data != save.data) + __os_ufree(dbp->env, pkey.data); + if (F_ISSET(&rdata, DB_DBT_USERMEM)) + __os_free(dbp->env, rdata.data); + if (result == TCL_OK) + Tcl_SetObjResult(interp, retlist); + goto out; + } + + if (userecno) { + result = _GetUInt32(interp, objv[(objc - 1)], &recno); + if (result == TCL_OK) { + key.data = &recno; + key.size = sizeof(db_recno_t); + } else + goto out; + } else { + /* + * Some get calls (SET_*) can change the + * key pointers. So, we need to store + * the allocated key space in a tmp. + */ + ret = _CopyObjBytes(interp, objv[objc-1], &key.data, + &key.size, &freekey); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBGET(ret), "db get"); + return (result); + } + } + ktmp = key.data; + ret = dbp->cursor(dbp, txn, &dbc, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db cursor"); + if (result == TCL_ERROR) + goto out; + + /* + * At this point, we have a cursor, if we have a pattern, + * we go to the nearest one and step forward until we don't + * have any more that match the pattern prefix. If we have + * an exact key, we go to that key position, and step through + * all the duplicates. In either case we build up a list of + * the form {{key data} {key data}...} along the way. + */ + memset(&data, 0, sizeof(data)); + /* + * Restore any "partial" info we have saved. + */ + data = save; + if (pattern) { + /* + * Note, prefix is returned in new space. Must free it. + */ + ret = _GetGlobPrefix(pattern, &prefix); + if (ret) { + result = TCL_ERROR; + Tcl_SetResult(interp, + "Unable to allocate pattern space", TCL_STATIC); + goto out1; + } + key.data = prefix; + key.size = (u_int32_t)strlen(prefix); + /* + * If they give us an empty pattern string + * (i.e. -glob *), go through entire DB. + */ + if (strlen(prefix) == 0) + cflag = DB_FIRST; + else + cflag = DB_SET_RANGE; + } else + cflag = DB_SET; + if (ispget) { + _debug_check(); + F_SET(&pkey, DB_DBT_MALLOC); + ret = dbc->pget(dbc, &key, &pkey, &data, cflag | rmw); + } else { + _debug_check(); + ret = dbc->get(dbc, &key, &data, cflag | rmw); + } + result = _ReturnSetup(interp, ret, DB_RETOK_DBCGET(ret), + "db get (cursor)"); + if (result == TCL_ERROR) + goto out1; + if (pattern) { + if (ret == 0 && prefix != NULL && + memcmp(key.data, prefix, strlen(prefix)) != 0) { + /* + * Free space from DB_DBT_MALLOC + */ + __os_ufree(dbp->env, data.data); + goto out1; + } + cflag = DB_NEXT; + } else + cflag = DB_NEXT_DUP; + + while (ret == 0 && result == TCL_OK) { + /* + * Build up our {name value} sublist + */ + if (ispget) + result = _Set3DBTList(interp, retlist, &key, 0, + &pkey, useprecno, &data); + else + result = _SetListElem(interp, retlist, + key.data, key.size, data.data, data.size); + /* + * Free space from DB_DBT_MALLOC + */ + if (ispget) + __os_ufree(dbp->env, pkey.data); + __os_ufree(dbp->env, data.data); + if (result != TCL_OK) + break; + /* + * Append {name value} to return list + */ + memset(&key, 0, sizeof(key)); + memset(&pkey, 0, sizeof(pkey)); + memset(&data, 0, sizeof(data)); + /* + * Restore any "partial" info we have saved. + */ + data = save; + if (ispget) { + F_SET(&pkey, DB_DBT_MALLOC); + ret = dbc->pget(dbc, &key, &pkey, &data, cflag | rmw); + } else + ret = dbc->get(dbc, &key, &data, cflag | rmw); + if (ret == 0 && prefix != NULL && + memcmp(key.data, prefix, strlen(prefix)) != 0) { + /* + * Free space from DB_DBT_MALLOC + */ + __os_ufree(dbp->env, data.data); + break; + } + } +out1: + (void)dbc->close(dbc); + if (result == TCL_OK) + Tcl_SetObjResult(interp, retlist); +out: + /* + * _GetGlobPrefix(), the function which allocates prefix, works + * by copying and condensing another string. Thus prefix may + * have multiple nuls at the end, so we free using __os_free(). + */ + if (prefix != NULL) + __os_free(dbp->env, prefix); + if (dtmp != NULL && freedata) + __os_free(dbp->env, dtmp); + if (ktmp != NULL && freekey) + __os_free(dbp->env, ktmp); + return (result); +} + +/* + * tcl_db_delete -- + */ +static int +tcl_DbDelete(interp, objc, objv, dbp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ +{ + static const char *dbdelopts[] = { + "-consume", + "-glob", + "-multiple", + "-multiple_key", + "-txn", + NULL + }; + enum dbdelopts { + DBDEL_CONSUME, + DBDEL_GLOB, + DBDEL_MULTIPLE, + DBDEL_MULTIPLE_KEY, + DBDEL_TXN + }; + DB *recdbp; + DBC *dbc; + DBT key, data, hkey, rdata; + DBTYPE type; + DB_HEAP_RID rid; + DB_TXN *txn; + Tcl_Obj **elemv; + void *dtmp, *ktmp, *ptr; + db_recno_t recno; + int dlen, elemc, freekey, i, j, klen, optindex, result, ret; + u_int32_t dflag, flag, multiflag; + char *arg, *pattern, *prefix, msg[MSG_SIZE]; + + COMPQUIET(recdbp, NULL); + result = TCL_OK; + freekey = 0; + dflag = 0; + multiflag = 0; + pattern = prefix = NULL; + txn = NULL; + if (objc < 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?-args? key"); + return (TCL_ERROR); + } + + dtmp = ktmp = NULL; + memset(&key, 0, sizeof(key)); + memset(&hkey, 0, sizeof(hkey)); + memset(&rdata, 0, sizeof(rdata)); + + /* + * The first arg must be -glob, -txn or a list of keys. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbdelopts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + /* + * If we don't have a -glob or -txn, then the remaining + * args must be exact keys. Reset the result so we + * don't get an errant error message if there is another + * error. + */ + if (IS_HELP(objv[i]) == TCL_OK) + return (TCL_OK); + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum dbdelopts)optindex) { + case DBDEL_TXN: + if (i == objc) { + /* + * Someone could conceivably have a key of + * the same name. So just break and use it. + */ + i--; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Delete: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + case DBDEL_GLOB: + /* + * Get the pattern. Get the prefix and use cursors to + * get all the data items. + */ + if (i == objc) { + /* + * Someone could conceivably have a key of + * the same name. So just break and use it. + */ + i--; + break; + } + pattern = Tcl_GetStringFromObj(objv[i++], NULL); + break; + case DBDEL_CONSUME: + FLAG_CHECK(dflag); + dflag = DB_CONSUME; + break; + case DBDEL_MULTIPLE: + FLAG_CHECK(multiflag); + multiflag |= DB_MULTIPLE; + break; + case DBDEL_MULTIPLE_KEY: + FLAG_CHECK(multiflag); + multiflag |= DB_MULTIPLE_KEY; + break; + } + if (result != TCL_OK) + break; + } + + if (result != TCL_OK) + goto out; + /* + * XXX + * For consistency with get, we have decided for the moment, to + * allow -glob, or one key, not many. The code was originally + * written to take many keys and we'll leave it that way, because + * tcl_DbGet may one day accept many disjoint keys to get, rather + * than one, and at that time we'd make delete be consistent. In + * any case, the code is already here and there is no need to remove, + * just check that we only have one arg left. + * + * If we have a pattern AND more keys to process, there is an error. + * Either we have some number of exact keys, or we have a pattern. + */ + if (pattern == NULL) { + if (i != (objc - 1)) { + Tcl_WrongNumArgs( + interp, 2, objv, "?args? -glob pattern | key"); + result = TCL_ERROR; + goto out; + } + } else { + if (i != objc) { + Tcl_WrongNumArgs( + interp, 2, objv, "?args? -glob pattern | key"); + result = TCL_ERROR; + goto out; + } + } + + /* + * If we have remaining args, they are all exact keys. Call + * DB->del on each of those keys. + * + * If it is a RECNO database, the key is a record number and must be + * setup up to contain a db_recno_t. Otherwise the key is a "string". + */ + (void)dbp->get_type(dbp, &type); + ret = 0; + while (i < objc && ret == 0) { + memset(&key, 0, sizeof(key)); + if (multiflag == DB_MULTIPLE) { + /* + * Heap is not supporting bulk on initially, so + * check if type is heap and if so throw an error. + */ + if (type == DB_HEAP) { + Tcl_SetResult(interp, + "Heap doesnt support -multiple", TCL_STATIC); + result = TCL_ERROR; + goto out; + } + + /* + * To work out how big a buffer is needed, we first + * need to find out the total length of the data and + * the number of data items (elemc). + */ + ktmp = Tcl_GetByteArrayFromObj(objv[i], &klen); + result = Tcl_ListObjGetElements(interp, objv[i++], + &elemc, &elemv); + if (result != TCL_OK) + return (result); + + memset(&key, 0, sizeof(key)); + key.ulen = DB_ALIGN((u_int32_t)klen + (u_int32_t)elemc + * sizeof(u_int32_t) * 2, 1024UL); + key.flags = DB_DBT_USERMEM | DB_DBT_BULK; + if ((ret = + __os_malloc(dbp->env, key.ulen, &key.data)) != 0) + return (ret); + freekey = 1; + + if (type == DB_RECNO || type == DB_QUEUE) { + DB_MULTIPLE_RECNO_WRITE_INIT(ptr, &key); + for (j = 0; j < elemc; j++) { + result = + _GetUInt32(interp, + elemv[j], &recno); + if (result != TCL_OK) + return (result); + DB_MULTIPLE_RECNO_WRITE_NEXT(ptr, + &key, recno, dtmp, 0); + DB_ASSERT(dbp->env, ptr != NULL); + } + } else { + DB_MULTIPLE_WRITE_INIT(ptr, &key); + for (j = 0; j < elemc; j++) { + ktmp = Tcl_GetByteArrayFromObj(elemv[j], + &klen); + DB_MULTIPLE_WRITE_NEXT(ptr, + &key, ktmp, (u_int32_t)klen); + DB_ASSERT(dbp->env, ptr != NULL); + } + } + } else if (multiflag == DB_MULTIPLE_KEY) { + /* + * Heap is not supporting bulk on initially, so + * check if type is heap and if so throw an error. + */ + if (type == DB_HEAP) { + Tcl_SetResult(interp, + "Heap doesnt support -multiple_key", TCL_STATIC); + result = TCL_ERROR; + goto out; + } + + /* + * To work out how big a buffer is needed, we first + * need to find out the total length of the data (len) + * and the number of data items (elemc). + */ + ktmp = Tcl_GetByteArrayFromObj(objv[i], &klen); + result = Tcl_ListObjGetElements(interp, objv[i++], + &elemc, &elemv); + if (result != TCL_OK) + return (result); + + memset(&key, 0, sizeof(key)); + key.ulen = DB_ALIGN((u_int32_t)klen + + (u_int32_t)elemc * sizeof(u_int32_t) * 2, 1024UL); + key.flags = DB_DBT_USERMEM | DB_DBT_BULK; + if ((ret = + __os_malloc(dbp->env, key.ulen, &key.data)) != 0) + return (ret); + freekey = 1; + + if (type == DB_RECNO || type == DB_QUEUE) { + DB_MULTIPLE_RECNO_WRITE_INIT(ptr, &key); + for (j = 0; j + 1 < elemc; j += 2) { + result = + _GetUInt32(interp, + elemv[j], &recno); + if (result != TCL_OK) + return (result); + dtmp = Tcl_GetByteArrayFromObj( + elemv[j + 1], &dlen); + DB_MULTIPLE_RECNO_WRITE_NEXT(ptr, + &key, recno, dtmp, (u_int32_t)dlen); + DB_ASSERT(dbp->env, ptr != NULL); + } + } else { + DB_MULTIPLE_WRITE_INIT(ptr, &key); + for (j = 0; j + 1 < elemc; j += 2) { + ktmp = Tcl_GetByteArrayFromObj( + elemv[j], &klen); + dtmp = Tcl_GetByteArrayFromObj( + elemv[j + 1], &dlen); + DB_MULTIPLE_KEY_WRITE_NEXT(ptr, + &key, ktmp, (u_int32_t)klen, + dtmp, (u_int32_t)dlen); + DB_ASSERT(dbp->env, ptr != NULL); + } + } + } else if (type == DB_RECNO || type == DB_QUEUE) { + result = _GetUInt32(interp, objv[i++], &recno); + if (result == TCL_OK) { + key.data = &recno; + key.size = sizeof(db_recno_t); + } else + return (result); + } else if (type == DB_HEAP) { + /* For heap, the incoming key is to a recno db */ + result = _GetUInt32(interp, objv[i++], &recno); + if (result == TCL_OK) { + key.data = &recno; + key.size = sizeof(db_recno_t); + } else + return (result); + } else { + ret = _CopyObjBytes(interp, objv[i++], &ktmp, + &key.size, &freekey); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBDEL(ret), "db del"); + return (result); + } + key.data = ktmp; + } + _debug_check(); + if (type != DB_HEAP) + ret = dbp->del(dbp, txn, &key, dflag | multiflag); + else { + /* set up the recno data, which will be heap key */ + rdata.ulen = sizeof(DB_HEAP_RID); + rdata.flags = DB_DBT_USERMEM; + if (rdata.data == NULL) { + ret = __os_malloc(dbp->env, + rdata.ulen, &rdata.data); + if (ret != 0) { + Tcl_SetResult(interp, + db_strerror(ret), TCL_STATIC); + return(TCL_ERROR); + } + } + /* + * 3 steps for delete, first get the key from the + * recno db and create heap key, second delete + * recno record, third delete heap record. + */ + recdbp = ((DBTCL_INFO *)dbp->api_internal)->hrdbp; + ret = recdbp->get(recdbp, txn, &key, &rdata, 0); + if (ret) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "db del heap db"); + goto out; + } + + /* Set up the key for heap */ + rid.pgno = ((DB_HEAP_RID *)rdata.data)->pgno; + rid.indx = ((DB_HEAP_RID *)rdata.data)->indx; + hkey.data = &rid; + hkey.ulen = hkey.size = sizeof(DB_HEAP_RID); + hkey.flags = DB_DBT_USERMEM; + /* 2nd: Delete from recno db */ + _debug_check(); + ret = recdbp->del(recdbp, txn, &key, dflag | multiflag); + if (ret) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "db del heap db"); + goto out; + } + + /* 3rd: Delete from heap db */ + _debug_check(); + ret = dbp->del(dbp, txn, &hkey, dflag | multiflag); + if (ret) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "db del heap db"); + goto out; + } + } + /* + * If we have any error, set up return result and stop + * processing keys. + */ + if (freekey && key.data != NULL) + __os_free(dbp->env, key.data); + if (ret != 0) + break; + } + result = _ReturnSetup(interp, ret, DB_RETOK_DBDEL(ret), "db del"); + + /* + * At this point we've either finished or, if we have a pattern, + * we go to the nearest one and step forward until we don't + * have any more that match the pattern prefix. + */ + if (pattern) { + ret = dbp->cursor(dbp, txn, &dbc, 0); + if (ret != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db cursor"); + goto out; + } + /* + * Note, prefix is returned in new space. Must free it. + */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + ret = _GetGlobPrefix(pattern, &prefix); + if (ret) { + result = TCL_ERROR; + Tcl_SetResult(interp, + "Unable to allocate pattern space", TCL_STATIC); + goto out; + } + key.data = prefix; + key.size = (u_int32_t)strlen(prefix); + if (strlen(prefix) == 0) + flag = DB_FIRST; + else + flag = DB_SET_RANGE; + ret = dbc->get(dbc, &key, &data, flag); + while (ret == 0 && + memcmp(key.data, prefix, strlen(prefix)) == 0) { + /* + * Each time through here the cursor is pointing + * at the current valid item. Delete it and + * move ahead. + */ + _debug_check(); + ret = dbc->del(dbc, dflag); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBCDEL(ret), "db c_del"); + break; + } + /* + * Deleted the current, now move to the next item + * in the list, check if it matches the prefix pattern. + */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + ret = dbc->get(dbc, &key, &data, DB_NEXT); + } + if (ret == DB_NOTFOUND) + ret = 0; + /* + * _GetGlobPrefix(), the function which allocates prefix, works + * by copying and condensing another string. Thus prefix may + * have multiple nuls at the end, so we free using __os_free(). + */ + __os_free(dbp->env, prefix); + (void)dbc->close(dbc); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db del"); + } +out: + /* If we allocated memory for a heap delete, release it. */ + if (rdata.data != NULL) + __os_free(dbp->env, rdata.data); + + return (result); +} + +/* + * tcl_db_cursor -- + */ +static int +tcl_DbCursor(interp, objc, objv, dbp, dbcp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ + DBC **dbcp; /* Return cursor pointer */ +{ + static const char *dbcuropts[] = { +#ifdef CONFIG_TEST + "-read_committed", + "-read_uncommitted", + "-update", +#endif + "-bulk", + "-txn", + NULL + }; + enum dbcuropts { +#ifdef CONFIG_TEST + DBCUR_READ_COMMITTED, + DBCUR_READ_UNCOMMITTED, + DBCUR_UPDATE, +#endif + DBCUR_BULK, + DBCUR_TXN + }; + DB_TXN *txn; + u_int32_t flag; + int i, optindex, result, ret; + char *arg, msg[MSG_SIZE]; + + result = TCL_OK; + flag = 0; + txn = NULL; + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbcuropts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto out; + } + i++; + switch ((enum dbcuropts)optindex) { +#ifdef CONFIG_TEST + case DBCUR_READ_COMMITTED: + flag |= DB_READ_COMMITTED; + break; + case DBCUR_READ_UNCOMMITTED: + flag |= DB_READ_UNCOMMITTED; + break; + case DBCUR_UPDATE: + flag |= DB_WRITECURSOR; + break; +#endif + case DBCUR_BULK: + flag |= DB_CURSOR_BULK; + break; + case DBCUR_TXN: + if (i == objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Cursor: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto out; + + _debug_check(); + ret = dbp->cursor(dbp, txn, dbcp, flag); + if (ret != 0) + result = _ErrorSetup(interp, ret, "db cursor"); +out: + return (result); +} + +/* + * tcl_DbAssociate -- + * Call DB->associate(). + */ +static int +tcl_DbAssociate(interp, objc, objv, dbp) + Tcl_Interp *interp; + int objc; + Tcl_Obj *CONST objv[]; + DB *dbp; +{ + static const char *dbaopts[] = { + "-create", + "-immutable_key", + "-txn", + NULL + }; + enum dbaopts { + DBA_CREATE, + DBA_IMMUTABLE_KEY, + DBA_TXN + }; + DB *sdbp; + DB_TXN *txn; + DBTCL_INFO *sdbip; + int i, optindex, result, ret; + char *arg, msg[MSG_SIZE]; + u_int32_t flag; + + txn = NULL; + result = TCL_OK; + flag = 0; + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "[callback] secondary"); + return (TCL_ERROR); + } + + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbaopts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + if (result == TCL_OK) + return (result); + result = TCL_OK; + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum dbaopts)optindex) { + case DBA_CREATE: + flag |= DB_CREATE; + break; + case DBA_IMMUTABLE_KEY: + flag |= DB_IMMUTABLE_KEY; + break; + case DBA_TXN: + if (i > (objc - 1)) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Associate: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + } + } + if (result != TCL_OK) + return (result); + + /* + * Better be 1 or 2 args left. The last arg must be the sdb + * handle. If 2 args then objc-2 is the callback proc, else + * we have a NULL callback. + */ + /* Get the secondary DB handle. */ + arg = Tcl_GetStringFromObj(objv[objc - 1], NULL); + sdbp = NAME_TO_DB(arg); + if (sdbp == NULL) { + snprintf(msg, MSG_SIZE, + "Associate: Invalid database handle: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + return (TCL_ERROR); + } + + /* + * The callback is simply a Tcl object containing the name + * of the callback proc, which is the second-to-last argument. + * + * Note that the callback needs to go in the *secondary* DB handle's + * info struct; we may have multiple secondaries with different + * callbacks. + */ + sdbip = (DBTCL_INFO *)sdbp->api_internal; + +#ifdef CONFIG_TEST + if (i != objc - 1) { +#else + if (i != objc - 1) { +#endif + /* + * We have 2 args, get the callback. + */ + sdbip->i_second_call = objv[objc - 2]; + Tcl_IncrRefCount(sdbip->i_second_call); + + /* Now call associate. */ + _debug_check(); + + ret = dbp->associate(dbp, + txn, sdbp, tcl_second_call, flag); + } else { + /* + * We have a NULL callback. + */ + sdbip->i_second_call = NULL; + ret = dbp->associate(dbp, txn, sdbp, NULL, flag); + } + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "associate"); + + return (result); +} + +/* + * tcl_DbAssociateForeign -- + * Call DB->associate_foreign(). + */ +static int +tcl_DbAssociateForeign(interp, objc, objv, dbp) + Tcl_Interp *interp; + int objc; + Tcl_Obj *CONST objv[]; + DB *dbp; +{ + static const char *dbafopts[] = { + "-abort", + "-cascade", + "-nullify", + NULL + }; + enum dbafopts { + DBAF_ABORT, + DBAF_CASCADE, + DBAF_NULLIFY + }; + DB *sdbp; + DBTCL_INFO *sdbip; + int i, optindex, result, ret; + char *arg, msg[MSG_SIZE]; + u_int32_t flag; + int (*callback)(DB *, const DBT *, DBT *, const DBT*, int*); + + result = TCL_OK; + flag = 0; + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, + "?args? ?callback? secondary"); + return (TCL_ERROR); + } + + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbafopts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + if (result == TCL_OK) + return (result); + result = TCL_OK; + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum dbafopts)optindex) { + case DBAF_ABORT: + flag |= DB_FOREIGN_ABORT; + break; + case DBAF_CASCADE: + flag |= DB_FOREIGN_CASCADE; + break; + case DBAF_NULLIFY: + if (i > (objc - 1)) { + Tcl_WrongNumArgs(interp, 2, + objv, "?-nullify ?callback?? secondary"); + result = TCL_ERROR; + break; + } + flag |= DB_FOREIGN_NULLIFY; + break; + } + } + if (result != TCL_OK) + return (result); + + /* + * Better be 1 or 2 args left. The last arg must be the sdb + * handle. If 2 args then objc-2 is the callback proc, else + * we have a NULL callback. + */ + /* Get the secondary DB handle. */ + arg = Tcl_GetStringFromObj(objv[objc - 1], NULL); + sdbp = NAME_TO_DB(arg); + if (sdbp == NULL) { + snprintf(msg, MSG_SIZE, + "Associate_foreign: Invalid database handle: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + return (TCL_ERROR); + } + + /* + * The callback is simply a Tcl object containing the name + * of the callback proc, which is the second-to-last argument. + * + * Note that the callback needs to go in the *secondary* DB handle's + * info struct; we may have multiple secondaries with different + * callbacks. + */ + sdbip = (DBTCL_INFO *)sdbp->api_internal; + + callback = NULL; + sdbip->i_foreign_call = NULL; + + if (i != objc -1) { + /* Set the callback */ + callback = tcl_foreign_call; + sdbip->i_foreign_call = objv[objc - 2]; + Tcl_IncrRefCount(sdbip->i_foreign_call); + } else { + /* + * We have a NULL callback. + */ + callback = NULL; + } + + _debug_check(); + /* Now call associate_foreign. */ + ret = dbp->associate_foreign(dbp, sdbp, callback, flag); + + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "associate_foreign"); + + return (result); +} + +/* + * tcl_second_call -- + * Callback function for secondary indices. Get the callback + * out of ip->i_second_call and call it. + */ +static int +tcl_second_call(dbp, pkey, data, skey) + DB *dbp; + const DBT *pkey, *data; + DBT *skey; +{ + DBT *tskey; + DBTCL_INFO *ip; + Tcl_Interp *interp; + Tcl_Obj *pobj, *dobj, *objv[3], *robj, **skeylist; + size_t len; + int ilen, result, ret; + u_int32_t i, nskeys; + void *retbuf, *databuf; + + ip = (DBTCL_INFO *)dbp->api_internal; + interp = ip->i_interp; + objv[0] = ip->i_second_call; + + /* + * Create two ByteArray objects, with the contents of the pkey + * and data DBTs that are our inputs. + */ + pobj = Tcl_NewByteArrayObj(pkey->data, (int)pkey->size); + Tcl_IncrRefCount(pobj); + dobj = Tcl_NewByteArrayObj(data->data, (int)data->size); + Tcl_IncrRefCount(dobj); + + objv[1] = pobj; + objv[2] = dobj; + + result = Tcl_EvalObjv(interp, 3, objv, 0); + + Tcl_DecrRefCount(pobj); + Tcl_DecrRefCount(dobj); + + if (result != TCL_OK) { + __db_errx(dbp->env, + "Tcl callback function failed with code %d", result); + return (EINVAL); + } + + robj = Tcl_GetObjResult(interp); + if (robj->typePtr == NULL || strcmp(robj->typePtr->name, "list") != 0) { + nskeys = 1; + skeylist = &robj; + tskey = skey; + } else { + if ((result = Tcl_ListObjGetElements(interp, + robj, &ilen, &skeylist)) != TCL_OK) { + __db_errx(dbp->env, + "Could not get list elements from Tcl callback"); + return (EINVAL); + } + nskeys = (u_int32_t)ilen; + + /* + * It would be nice to check for nskeys == 0 and return + * DB_DONOTINDEX, but Tcl does not distinguish between an empty + * string and an empty list, so that would disallow empty + * secondary keys. + */ + if (nskeys == 0) { + nskeys = 1; + skeylist = &robj; + } + if (nskeys == 1) + tskey = skey; + else { + memset(skey, 0, sizeof(DBT)); + if ((ret = __os_umalloc(dbp->env, + nskeys * sizeof(DBT), &skey->data)) != 0) + return (ret); + skey->size = nskeys; + F_SET(skey, DB_DBT_MULTIPLE | DB_DBT_APPMALLOC); + tskey = (DBT *)skey->data; + } + } + + for (i = 0; i < nskeys; i++, tskey++) { + retbuf = Tcl_GetByteArrayFromObj(skeylist[i], &ilen); + len = (size_t)ilen; + + /* + * retbuf is owned by Tcl; copy it into malloc'ed memory. + * We need to use __os_umalloc rather than ufree because this + * will be freed by DB using __os_ufree--the DB_DBT_APPMALLOC + * flag tells DB to free application-allocated memory. + */ + if ((ret = __os_umalloc(dbp->env, len, &databuf)) != 0) + return (ret); + memcpy(databuf, retbuf, len); + + memset(tskey, 0, sizeof(DBT)); + tskey->data = databuf; + tskey->size = (u_int32_t)len; + F_SET(tskey, DB_DBT_APPMALLOC); + } + + return (0); +} + +/* + * tcl_foreign_call -- + * Foreign callback function for secondary indices. Get the callback + * out of ip->i_foreign_call and call it. + */ +static int tcl_foreign_call(sdbp, pkey, data, fkey, changed) + DB *sdbp; + const DBT *pkey, *fkey; + DBT *data; + int *changed; +{ + DBTCL_INFO *ip; + Tcl_Interp *interp; + Tcl_Obj *pobj, *dobj, *fobj, *objv[4], *robj; + size_t len, orig_len; + int ilen, result, ret; + void *retbuf; + + ip = (DBTCL_INFO *)sdbp->api_internal; + interp = ip->i_interp; + objv[0] = ip->i_foreign_call; + + /* + * Create two ByteArray objects, with the contents of the pkey + * and data DBTs that are our inputs. + */ + pobj = Tcl_NewByteArrayObj(pkey->data, (int)pkey->size); + Tcl_IncrRefCount(pobj); + dobj = Tcl_NewByteArrayObj(data->data, (int)data->size); + Tcl_IncrRefCount(dobj); + fobj = Tcl_NewByteArrayObj(fkey->data, (int)fkey->size); + Tcl_IncrRefCount(fobj); + + objv[1] = pobj; + objv[2] = dobj; + objv[3] = fobj; + + result = Tcl_EvalObjv(interp, 4, objv, 0); + + Tcl_DecrRefCount(pobj); + Tcl_DecrRefCount(dobj); + Tcl_DecrRefCount(fobj); + + if (result != TCL_OK) { + __db_errx(sdbp->env, + "Tcl foreign callback function failed with code %d", + result); + return (EINVAL); + } + + robj = Tcl_GetObjResult(interp); + retbuf = Tcl_GetByteArrayFromObj(robj, &ilen); + len = (size_t)ilen; + orig_len = (size_t)data->size; + if((len == orig_len) && (memcmp(retbuf, data->data, len) == 0)) { + *changed = 0; + return 0; + } else { + *changed = 1; + if(orig_len >= len) { + memcpy(data->data, retbuf, len); + data->size = (u_int32_t)len; + } else { + if((ret = __os_malloc( + sdbp->env, len, &data->data)) != 0) + return ret; + memcpy(data->data, retbuf, len); + data->size = (u_int32_t)len; + F_SET(data, DB_DBT_APPMALLOC); + } + } + + return (0); +} + +/* + * tcl_db_join -- + */ +static int +tcl_DbJoin(interp, objc, objv, dbp, dbcp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ + DBC **dbcp; /* Cursor pointer */ +{ + static const char *dbjopts[] = { + "-nosort", + NULL + }; + enum dbjopts { + DBJ_NOSORT + }; + DBC **listp; + size_t size; + u_int32_t flag; + int adj, i, j, optindex, result, ret; + char *arg, msg[MSG_SIZE]; + + result = TCL_OK; + flag = 0; + if (objc < 3) { + Tcl_WrongNumArgs(interp, 2, objv, "curs1 curs2 ..."); + return (TCL_ERROR); + } + + for (adj = i = 2; i < objc; i++) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbjopts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + if (result == TCL_OK) + return (result); + result = TCL_OK; + Tcl_ResetResult(interp); + break; + } + switch ((enum dbjopts)optindex) { + case DBJ_NOSORT: + flag |= DB_JOIN_NOSORT; + adj++; + break; + } + } + if (result != TCL_OK) + return (result); + /* + * Allocate one more for NULL ptr at end of list. + */ + size = sizeof(DBC *) * (size_t)((objc - adj) + 1); + ret = __os_malloc(dbp->env, size, &listp); + if (ret != 0) { + Tcl_SetResult(interp, db_strerror(ret), TCL_STATIC); + return (TCL_ERROR); + } + + memset(listp, 0, size); + for (j = 0, i = adj; i < objc; i++, j++) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + listp[j] = NAME_TO_DBC(arg); + if (listp[j] == NULL) { + snprintf(msg, MSG_SIZE, + "Join: Invalid cursor: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + goto out; + } + } + listp[j] = NULL; + _debug_check(); + ret = dbp->join(dbp, listp, dbcp, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db join"); + +out: + __os_free(dbp->env, listp); + return (result); +} + +/* + * tcl_db_getjoin -- + */ +static int +tcl_DbGetjoin(interp, objc, objv, dbp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ +{ + static const char *dbgetjopts[] = { +#ifdef CONFIG_TEST + "-nosort", +#endif + "-txn", + NULL + }; + enum dbgetjopts { +#ifdef CONFIG_TEST + DBGETJ_NOSORT, +#endif + DBGETJ_TXN + }; + DB_TXN *txn; + DB *elemdbp; + DBC **listp; + DBC *dbc; + DBT key, data; + Tcl_Obj **elemv, *retlist; + void *ktmp; + size_t size; + u_int32_t flag; + int adj, elemc, freekey, i, j, optindex, result, ret; + char *arg, msg[MSG_SIZE]; + + result = TCL_OK; + flag = 0; + ktmp = NULL; + freekey = 0; + if (objc < 3) { + Tcl_WrongNumArgs(interp, 2, objv, "{db1 key1} {db2 key2} ..."); + return (TCL_ERROR); + } + + txn = NULL; + i = 2; + adj = i; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbgetjopts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + if (result == TCL_OK) + return (result); + result = TCL_OK; + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum dbgetjopts)optindex) { +#ifdef CONFIG_TEST + case DBGETJ_NOSORT: + flag |= DB_JOIN_NOSORT; + adj++; + break; +#endif + case DBGETJ_TXN: + if (i == objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + adj += 2; + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "GetJoin: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + } + } + if (result != TCL_OK) + return (result); + size = sizeof(DBC *) * (size_t)((objc - adj) + 1); + ret = __os_malloc(NULL, size, &listp); + if (ret != 0) { + Tcl_SetResult(interp, db_strerror(ret), TCL_STATIC); + return (TCL_ERROR); + } + + memset(listp, 0, size); + for (j = 0, i = adj; i < objc; i++, j++) { + /* + * Get each sublist as {db key} + */ + result = Tcl_ListObjGetElements(interp, objv[i], + &elemc, &elemv); + if (elemc != 2) { + Tcl_SetResult(interp, "Lists must be {db key}", + TCL_STATIC); + result = TCL_ERROR; + goto out; + } + /* + * Get a pointer to that open db. Then, open a cursor in + * that db, and go to the "key" place. + */ + elemdbp = NAME_TO_DB(Tcl_GetStringFromObj(elemv[0], NULL)); + if (elemdbp == NULL) { + snprintf(msg, MSG_SIZE, "Get_join: Invalid db: %s\n", + Tcl_GetStringFromObj(elemv[0], NULL)); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + goto out; + } + ret = elemdbp->cursor(elemdbp, txn, &listp[j], 0); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db cursor")) == TCL_ERROR) + goto out; + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + ret = _CopyObjBytes(interp, elemv[elemc-1], &ktmp, + &key.size, &freekey); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "db join"); + goto out; + } + key.data = ktmp; + ret = (listp[j])->get(listp[j], &key, &data, DB_SET); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_DBCGET(ret), + "db cget")) == TCL_ERROR) + goto out; + } + listp[j] = NULL; + _debug_check(); + ret = dbp->join(dbp, listp, &dbc, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db join"); + if (result == TCL_ERROR) + goto out; + + retlist = Tcl_NewListObj(0, NULL); + while (ret == 0 && result == TCL_OK) { + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.flags |= DB_DBT_MALLOC; + data.flags |= DB_DBT_MALLOC; + ret = dbc->get(dbc, &key, &data, 0); + /* + * Build up our {name value} sublist + */ + if (ret == 0) { + result = _SetListElem(interp, retlist, + key.data, key.size, + data.data, data.size); + __os_ufree(dbp->env, key.data); + __os_ufree(dbp->env, data.data); + } + } + (void)dbc->close(dbc); + if (result == TCL_OK) + Tcl_SetObjResult(interp, retlist); +out: + if (ktmp != NULL && freekey) + __os_free(dbp->env, ktmp); + while (j) { + if (listp[j]) + (void)(listp[j])->close(listp[j]); + j--; + } + __os_free(dbp->env, listp); + return (result); +} + +/* + * tcl_DbGetFlags -- + */ +static int +tcl_DbGetFlags(interp, objc, objv, dbp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ +{ + int i, ret, result; + u_int32_t flags; + char buf[512]; + Tcl_Obj *res; + + static const struct { + u_int32_t flag; + char *arg; + } db_flags[] = { + { DB_CHKSUM, "-chksum" }, + { DB_DUP, "-dup" }, + { DB_DUPSORT, "-dupsort" }, + { DB_ENCRYPT, "-encrypt" }, + { DB_INORDER, "-inorder" }, + { DB_TXN_NOT_DURABLE, "-notdurable" }, + { DB_RECNUM, "-recnum" }, + { DB_RENUMBER, "-renumber" }, + { DB_REVSPLITOFF, "-revsplitoff" }, + { DB_SNAPSHOT, "-snapshot" }, + { 0, NULL } + }; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + + ret = dbp->get_flags(dbp, &flags); + if ((result = _ReturnSetup( + interp, ret, DB_RETOK_STD(ret), "db get_flags")) == TCL_OK) { + buf[0] = '\0'; + + for (i = 0; db_flags[i].flag != 0; i++) + if (LF_ISSET(db_flags[i].flag)) { + if (strlen(buf) > 0) + (void)strncat(buf, " ", sizeof(buf)); + (void)strncat( + buf, db_flags[i].arg, sizeof(buf)); + } + + res = NewStringObj(buf, strlen(buf)); + Tcl_SetObjResult(interp, res); + } + + return (result); +} + +/* + * tcl_DbGetOpenFlags -- + */ +static int +tcl_DbGetOpenFlags(interp, objc, objv, dbp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ +{ + int i, ret, result; + u_int32_t flags; + char buf[512]; + Tcl_Obj *res; + + static const struct { + u_int32_t flag; + char *arg; + } open_flags[] = { + { DB_AUTO_COMMIT, "-auto_commit" }, + { DB_CREATE, "-create" }, + { DB_EXCL, "-excl" }, + { DB_MULTIVERSION, "-multiversion" }, + { DB_NOMMAP, "-nommap" }, + { DB_RDONLY, "-rdonly" }, + { DB_READ_UNCOMMITTED, "-read_uncommitted" }, + { DB_THREAD, "-thread" }, + { DB_TRUNCATE, "-truncate" }, + { 0, NULL } + }; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + + ret = dbp->get_open_flags(dbp, &flags); + if ((result = _ReturnSetup( + interp, ret, DB_RETOK_STD(ret), "db get_open_flags")) == TCL_OK) { + buf[0] = '\0'; + + for (i = 0; open_flags[i].flag != 0; i++) + if (LF_ISSET(open_flags[i].flag)) { + if (strlen(buf) > 0) + (void)strncat(buf, " ", sizeof(buf)); + (void)strncat( + buf, open_flags[i].arg, sizeof(buf)); + } + + res = NewStringObj(buf, strlen(buf)); + Tcl_SetObjResult(interp, res); + } + + return (result); +} + +/* + * tcl_DbCount -- + */ +static int +tcl_DbCount(interp, objc, objv, dbp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ +{ + DB *recdbp; + DBC *dbc; + DBT key, data, hkey; + Tcl_Obj *res; + void *ktmp; + DB_HEAP_RID rid; + db_recno_t count, recno; + int freekey, result, ret; + + res = NULL; + count = 0; + freekey = ret = 0; + ktmp = NULL; + result = TCL_OK; + dbc = NULL; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "key"); + return (TCL_ERROR); + } + + /* + * Get the count for our key. + * We do this by getting a cursor for this DB. Moving the cursor + * to the set location, and getting a count on that cursor. + */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + memset(&hkey, 0, sizeof(hkey)); + + /* + * If it's a heap, queue or recno database, we must make sure to + * treat the key as a recno rather than as a byte string. + */ + if (dbp->type == DB_HEAP || + dbp->type == DB_RECNO || dbp->type == DB_QUEUE) { + result = _GetUInt32(interp, objv[2], &recno); + if (result == TCL_OK) { + if (dbp->type == DB_HEAP) { + hkey.data = &recno; + hkey.size = sizeof(db_recno_t); + + key.data = &rid; + key.ulen = key.size = sizeof(DB_HEAP_RID); + key.flags = DB_DBT_USERMEM; + } else { + key.data = &recno; + key.size = sizeof(db_recno_t); + } + } else + return (result); + } else { + ret = _CopyObjBytes(interp, objv[2], &ktmp, + &key.size, &freekey); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "db count"); + return (result); + } + key.data = ktmp; + } + _debug_check(); + + /* If it's a heap, translate recno to rid. */ + if (dbp->type == DB_HEAP) { + recdbp = ((DBTCL_INFO *)dbp->api_internal)->hrdbp; + + ret = recdbp->get(recdbp, NULL, &hkey, &key, 0); + if (ret == DB_NOTFOUND || ret == DB_KEYEMPTY) { + count = 0; + goto out1; + } else if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBGET(ret), "db get heap"); + return (result); + } + } + + ret = dbp->cursor(dbp, NULL, &dbc, 0); + if (ret != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db cursor"); + goto out; + } + /* + * Move our cursor to the key. + */ + ret = dbc->get(dbc, &key, &data, DB_SET); + if (ret == DB_KEYEMPTY || ret == DB_NOTFOUND) + count = 0; + else { + ret = dbc->count(dbc, &count, 0); + if (ret != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db c count"); + goto out; + } + } +out1: res = Tcl_NewWideIntObj((Tcl_WideInt)count); + Tcl_SetObjResult(interp, res); + +out: if (ktmp != NULL && freekey) + __os_free(dbp->env, ktmp); + if (dbc != NULL) + (void)dbc->close(dbc); + return (result); +} + +#ifdef CONFIG_TEST +/* + * tcl_DbKeyRange -- + */ +static int +tcl_DbKeyRange(interp, objc, objv, dbp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ +{ + static const char *dbkeyropts[] = { + "-txn", + NULL + }; + enum dbkeyropts { + DBKEYR_TXN + }; + DB_TXN *txn; + DB_KEY_RANGE range; + DBT key; + DBTYPE type; + Tcl_Obj *myobjv[3], *retlist; + void *ktmp; + db_recno_t recno; + u_int32_t flag; + int freekey, i, myobjc, optindex, result, ret; + char *arg, msg[MSG_SIZE]; + + ktmp = NULL; + flag = 0; + freekey = 0; + result = TCL_OK; + if (objc < 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id? key"); + return (TCL_ERROR); + } + + txn = NULL; + for (i = 2; i < objc;) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbkeyropts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + if (result == TCL_OK) + return (result); + result = TCL_OK; + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum dbkeyropts)optindex) { + case DBKEYR_TXN: + if (i == objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "KeyRange: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + } + } + if (result != TCL_OK) + return (result); + (void)dbp->get_type(dbp, &type); + ret = 0; + /* + * Make sure we have a key. + */ + if (i != (objc - 1)) { + Tcl_WrongNumArgs(interp, 2, objv, "?args? key"); + result = TCL_ERROR; + goto out; + } + memset(&key, 0, sizeof(key)); + if (type == DB_RECNO || type == DB_QUEUE) { + result = _GetUInt32(interp, objv[i], &recno); + if (result == TCL_OK) { + key.data = &recno; + key.size = sizeof(db_recno_t); + } else + return (result); + } else { + ret = _CopyObjBytes(interp, objv[i++], &ktmp, + &key.size, &freekey); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "db keyrange"); + return (result); + } + key.data = ktmp; + } + _debug_check(); + ret = dbp->key_range(dbp, txn, &key, &range, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db keyrange"); + if (result == TCL_ERROR) + goto out; + + /* + * If we succeeded, set up return list. + */ + myobjc = 3; + myobjv[0] = Tcl_NewDoubleObj(range.less); + myobjv[1] = Tcl_NewDoubleObj(range.equal); + myobjv[2] = Tcl_NewDoubleObj(range.greater); + retlist = Tcl_NewListObj(myobjc, myobjv); + if (result == TCL_OK) + Tcl_SetObjResult(interp, retlist); + +out: if (ktmp != NULL && freekey) + __os_free(dbp->env, ktmp); + return (result); +} +#endif + +/* + * tcl_DbTruncate -- + */ +static int +tcl_DbTruncate(interp, objc, objv, dbp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ +{ + static const char *dbcuropts[] = { + "-txn", + NULL + }; + enum dbcuropts { + DBTRUNC_TXN + }; + DB *recdbp; + DB_TXN *txn; + Tcl_Obj *res; + u_int32_t count; + int i, optindex, result, ret; + char *arg, msg[MSG_SIZE]; + + txn = NULL; + result = TCL_OK; + + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbcuropts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto out; + } + i++; + switch ((enum dbcuropts)optindex) { + case DBTRUNC_TXN: + if (i == objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Truncate: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto out; + + _debug_check(); + ret = dbp->truncate(dbp, txn, &count, 0); + if (dbp->type == DB_HEAP && ret == 0) { + /* The truncation of hrdbp will automatically truncate hsdbp. */ + recdbp = ((DBTCL_INFO *)dbp->api_internal)->hrdbp; + ret = recdbp->truncate(recdbp, txn, NULL, 0); + } + if (ret != 0) + result = _ErrorSetup(interp, ret, "db truncate"); + + else { + res = Tcl_NewWideIntObj((Tcl_WideInt)count); + Tcl_SetObjResult(interp, res); + } +out: + return (result); +} + +#ifdef CONFIG_TEST +/* + * tcl_DbCompact -- + */ +static int +tcl_DbCompact(interp, objc, objv, dbp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ +{ + static const char *dbcuropts[] = { + "-fillpercent", + "-freespace", + "-freeonly", + "-pages", + "-start", + "-stop", + "-timeout", + "-txn", + NULL + }; + enum dbcuropts { + DBREORG_FILLFACTOR, + DBREORG_FREESPACE, + DBREORG_FREEONLY, + DBREORG_PAGES, + DBREORG_START, + DBREORG_STOP, + DBREORG_TIMEOUT, + DBREORG_TXN + }; + DBTCL_INFO *ip; + DBT *key, end, start, stop; + DBTYPE type; + DB_TXN *txn; + Tcl_Obj *myobj, *retlist; + db_recno_t recno, srecno; + u_int32_t arg, fillfactor, flags, pages, timeout; + char *carg, msg[MSG_SIZE]; + int freekey, i, optindex, result, ret; + void *kp; + + flags = 0; + result = TCL_OK; + txn = NULL; + (void)dbp->get_type(dbp, &type); + memset(&start, 0, sizeof(start)); + memset(&stop, 0, sizeof(stop)); + memset(&end, 0, sizeof(end)); + ip = (DBTCL_INFO *)dbp->api_internal; + fillfactor = pages = timeout = 0; + + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbcuropts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto out; + } + i++; + switch ((enum dbcuropts)optindex) { + case DBREORG_FILLFACTOR: + if (i == objc) { + Tcl_WrongNumArgs(interp, + 2, objv, "?-fillfactor number?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &arg); + if (result != TCL_OK) + goto out; + i++; + fillfactor = arg; + break; + case DBREORG_FREESPACE: + LF_SET(DB_FREE_SPACE); + break; + + case DBREORG_FREEONLY: + LF_SET(DB_FREELIST_ONLY); + break; + + case DBREORG_PAGES: + if (i == objc) { + Tcl_WrongNumArgs(interp, + 2, objv, "?-pages number?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &arg); + if (result != TCL_OK) + goto out; + i++; + pages = arg; + break; + case DBREORG_TIMEOUT: + if (i == objc) { + Tcl_WrongNumArgs(interp, + 2, objv, "?-timeout number?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &arg); + if (result != TCL_OK) + goto out; + i++; + timeout = arg; + break; + + case DBREORG_START: + case DBREORG_STOP: + if (i == objc) { + Tcl_WrongNumArgs(interp, 1, objv, + "?-args? -start/stop key"); + result = TCL_ERROR; + goto out; + } + if ((enum dbcuropts)optindex == DBREORG_START) { + key = &start; + key->data = &recno; + } else { + key = &stop; + key->data = &srecno; + } + if (type == DB_RECNO || type == DB_QUEUE) { + result = _GetUInt32( + interp, objv[i], key->data); + if (result == TCL_OK) { + key->size = sizeof(db_recno_t); + } else + goto out; + } else { + ret = _CopyObjBytes(interp, objv[i], + &key->data, &key->size, &freekey); + if (ret != 0) + goto err; + if (freekey == 0) { + if ((ret = __os_malloc(NULL, + key->size, &kp)) != 0) + goto err; + + memcpy(kp, key->data, key->size); + key->data = kp; + key->ulen = key->size; + } + } + i++; + break; + case DBREORG_TXN: + if (i == objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + carg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(carg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Compact: Invalid txn: %s\n", carg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto out; + + if (ip->i_cdata == NULL) + if ((ret = __os_calloc(dbp->env, + 1, sizeof(DB_COMPACT), &ip->i_cdata)) != 0) { + Tcl_SetResult(interp, + db_strerror(ret), TCL_STATIC); + goto out; + } + + ip->i_cdata->compact_fillpercent = fillfactor; + ip->i_cdata->compact_timeout = timeout; + ip->i_cdata->compact_pages = pages; + + _debug_check(); + ret = dbp->compact(dbp, txn, &start, &stop, ip->i_cdata, flags, &end); + result = _ReturnSetup(interp, ret, DB_RETOK_DBCGET(ret), "dbp compact"); + if (result == TCL_ERROR) + goto out; + + retlist = Tcl_NewListObj(0, NULL); + if (ret != 0) + goto out; + if (type == DB_RECNO || type == DB_QUEUE) { + if (end.size == 0) + recno = 0; + else + recno = *((db_recno_t *)end.data); + myobj = Tcl_NewWideIntObj((Tcl_WideInt)recno); + } else + myobj = Tcl_NewByteArrayObj(end.data, (int)end.size); + result = Tcl_ListObjAppendElement(interp, retlist, myobj); + if (result == TCL_OK) + Tcl_SetObjResult(interp, retlist); + + if (0) { +err: result = _ReturnSetup(interp, + ret, DB_RETOK_DBCGET(ret), "dbc compact"); + } +out: + if (start.data != NULL && start.data != &recno) + __os_free(NULL, start.data); + if (stop.data != NULL && stop.data != &srecno) + __os_free(NULL, stop.data); + if (end.data != NULL) + __os_free(NULL, end.data); + + return (result); +} + +/* + * tcl_DbCompactStat + */ +static int +tcl_DbCompactStat(interp, objc, objv, dbp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB *dbp; /* Database pointer */ +{ + DBTCL_INFO *ip; + + COMPQUIET(objc, 0); + COMPQUIET(objv, NULL); + + ip = (DBTCL_INFO *)dbp->api_internal; + + return (tcl_CompactStat(interp, ip)); +} + +/* + * PUBLIC: int tcl_CompactStat __P((Tcl_Interp *, DBTCL_INFO *)); + */ +int +tcl_CompactStat(interp, ip) + Tcl_Interp *interp; /* Interpreter */ + DBTCL_INFO *ip; +{ + DB_COMPACT *rp; + Tcl_Obj *res; + int result; + char msg[MSG_SIZE]; + + result = TCL_OK; + rp = NULL; + + _debug_check(); + if ((rp = ip->i_cdata) == NULL) { + snprintf(msg, MSG_SIZE, + "Compact stat: No stats available\n"); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + goto error; + } + + res = Tcl_NewObj(); + + MAKE_STAT_LIST("Pages freed", rp->compact_pages_free); + MAKE_STAT_LIST("Pages truncated", rp->compact_pages_truncated); + MAKE_STAT_LIST("Pages examined", rp->compact_pages_examine); + MAKE_STAT_LIST("Levels removed", rp->compact_levels); + MAKE_STAT_LIST("Deadlocks encountered", rp->compact_deadlock); + MAKE_STAT_LIST("Empty buckets", rp->compact_empty_buckets); + + Tcl_SetObjResult(interp, res); +error: + return (result); +} +#endif diff --git a/lang/tcl/tcl_db_pkg.c b/lang/tcl/tcl_db_pkg.c new file mode 100644 index 00000000..0dbe9ef1 --- /dev/null +++ b/lang/tcl/tcl_db_pkg.c @@ -0,0 +1,5056 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/db_page.h" +#include "dbinc/hash.h" +#include "dbinc/tcl_db.h" + +/* XXX we must declare global data in just one place */ +DBTCL_GLOBAL __dbtcl_global; + +/* + * Prototypes for procedures defined later in this file: + */ +static int berkdb_Cmd __P((ClientData, Tcl_Interp *, int, + Tcl_Obj * CONST*)); +static int bdb_EnvOpen __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + DBTCL_INFO *, DB_ENV **)); +static int bdb_DbOpen __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + DBTCL_INFO *, DB **)); +static int bdb_DbRemove __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); +static int bdb_DbRename __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); +static int bdb_Version __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); + +#ifdef HAVE_64BIT_TYPES +static int bdb_SeqOpen __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + DBTCL_INFO *, DB_SEQUENCE **)); +#endif + +static int heap_callback __P((DB *dbp, const DBT *, const DBT *, DBT *)); + +#ifdef CONFIG_TEST +static int bdb_DbUpgrade __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); +static int bdb_DbVerify __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + DBTCL_INFO *)); +static int bdb_GetConfig __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); +static int bdb_Handles __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); +static int bdb_MsgType __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); + +static int tcl_bt_compare __P((DB *, const DBT *, const DBT *)); +static int tcl_compare_callback __P((DB *, const DBT *, const DBT *, + Tcl_Obj *, char *)); +static void tcl_db_free __P((void *)); +static void * tcl_db_malloc __P((size_t)); +static void * tcl_db_realloc __P((void *, size_t)); +static int tcl_dup_compare __P((DB *, const DBT *, const DBT *)); +static u_int32_t tcl_h_hash __P((DB *, const void *, u_int32_t)); +static int tcl_isalive __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t)); +static u_int32_t tcl_part_callback __P((DB *, DBT *)); +static int tcl_set_partition_dirs + __P((Tcl_Interp *, DB *, Tcl_Obj *)); +static int tcl_set_partition_keys + __P((Tcl_Interp *, DB *, Tcl_Obj *, DBT **)); +#endif + +int Db_tcl_Init __P((Tcl_Interp *)); + +/* + * Db_tcl_Init -- + * + * This is a package initialization procedure, which is called by Tcl when + * this package is to be added to an interpreter. The name is based on the + * name of the shared library, currently libdb_tcl-X.Y.so, which Tcl uses + * to determine the name of this function. + */ +int +Db_tcl_Init(interp) + Tcl_Interp *interp; /* Interpreter in which the package is + * to be made available. */ +{ + int code; + char pkg[12]; + + snprintf(pkg, sizeof(pkg), "%d.%d", DB_VERSION_MAJOR, DB_VERSION_MINOR); + code = Tcl_PkgProvide(interp, "Db_tcl", pkg); + if (code != TCL_OK) + return (code); + + /* + * Don't allow setuid/setgid scripts for the Tcl API because some Tcl + * functions evaluate the arguments and could otherwise allow a user + * to inject Tcl commands. + */ +#if defined(HAVE_SETUID) && defined(HAVE_GETUID) + (void)setuid(getuid()); +#endif +#if defined(HAVE_SETGID) && defined(HAVE_GETGID) + (void)setgid(getgid()); +#endif + + (void)Tcl_CreateObjCommand(interp, + "berkdb", (Tcl_ObjCmdProc *)berkdb_Cmd, (ClientData)0, NULL); + /* + * Create shared global debugging variables + */ + (void)Tcl_LinkVar( + interp, "__debug_on", (char *)&__debug_on, TCL_LINK_INT); + (void)Tcl_LinkVar( + interp, "__debug_print", (char *)&__debug_print, TCL_LINK_INT); + (void)Tcl_LinkVar( + interp, "__debug_stop", (char *)&__debug_stop, TCL_LINK_INT); + (void)Tcl_LinkVar( + interp, "__debug_test", (char *)&__debug_test, + TCL_LINK_INT); + LIST_INIT(&__db_infohead); + return (TCL_OK); +} + +/* + * berkdb_cmd -- + * Implements the "berkdb" command. + * This command supports three sub commands: + * berkdb version - Returns a list {major minor patch} + * berkdb env - Creates a new DB_ENV and returns a binding + * to a new command of the form dbenvX, where X is an + * integer starting at 0 (dbenv0, dbenv1, ...) + * berkdb open - Creates a new DB (optionally within + * the given environment. Returns a binding to a new + * command of the form dbX, where X is an integer + * starting at 0 (db0, db1, ...) + */ +static int +berkdb_Cmd(notused, interp, objc, objv) + ClientData notused; /* Not used. */ + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *berkdbcmds[] = { +#ifdef CONFIG_TEST + "dbverify", + "getconfig", + "handles", + "msgtype", + "upgrade", +#endif + "dbremove", + "dbrename", + "env", + "envremove", + "open", +#ifdef HAVE_64BIT_TYPES + "sequence", +#endif + "version", +#ifdef CONFIG_TEST + /* All below are compatibility functions */ + "hcreate", "hsearch", "hdestroy", + "dbminit", "fetch", "store", + "delete", "firstkey", "nextkey", + "ndbm_open", "dbmclose", +#endif + /* All below are convenience functions */ + "rand", "random_int", "srand", + "debug_check", + NULL + }; + /* + * All commands enums below ending in X are compatibility + */ + enum berkdbcmds { +#ifdef CONFIG_TEST + BDB_DBVERIFY, + BDB_GETCONFIG, + BDB_HANDLES, + BDB_MSGTYPE, + BDB_UPGRADE, +#endif + BDB_DBREMOVE, + BDB_DBRENAME, + BDB_ENV, + BDB_ENVREMOVE, + BDB_OPEN, +#ifdef HAVE_64BIT_TYPES + BDB_SEQUENCE, +#endif + BDB_VERSION, +#ifdef CONFIG_TEST + BDB_HCREATEX, BDB_HSEARCHX, BDB_HDESTROYX, + BDB_DBMINITX, BDB_FETCHX, BDB_STOREX, + BDB_DELETEX, BDB_FIRSTKEYX, BDB_NEXTKEYX, + BDB_NDBMOPENX, BDB_DBMCLOSEX, +#endif + BDB_RANDX, BDB_RAND_INTX, BDB_SRANDX, + BDB_DBGCKX + }; + static int env_id = 0; + static int db_id = 0; +#ifdef HAVE_64BIT_TYPES + static int seq_id = 0; +#endif + + DB *dbp, *hrdbp, *hsdbp; +#ifdef HAVE_64BIT_TYPES + DB_SEQUENCE *seq; +#endif +#ifdef CONFIG_TEST + DBM *ndbmp; + static int ndbm_id = 0; +#endif + DBTCL_INFO *ip, *hrip, *hsip; + DB_ENV *dbenv; + Tcl_Obj *res; + int cmdindex, result; + char newname[MSG_SIZE]; + + COMPQUIET(notused, NULL); + COMPQUIET(hrdbp, NULL); + COMPQUIET(hrip, NULL); + COMPQUIET(hsdbp, NULL); + COMPQUIET(hsip, NULL); + + Tcl_ResetResult(interp); + memset(newname, 0, MSG_SIZE); + result = TCL_OK; + if (objc <= 1) { + Tcl_WrongNumArgs(interp, 1, objv, "command cmdargs"); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the berkdbcmds + * defined above. + */ + if (Tcl_GetIndexFromObj(interp, + objv[1], berkdbcmds, "command", TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + res = NULL; + switch ((enum berkdbcmds)cmdindex) { +#ifdef CONFIG_TEST + case BDB_DBVERIFY: + snprintf(newname, sizeof(newname), "db%d", db_id); + ip = _NewInfo(interp, NULL, newname, I_DB); + if (ip != NULL) { + result = bdb_DbVerify(interp, objc, objv, ip); + _DeleteInfo(ip); + } else { + Tcl_SetResult(interp, "Could not set up info", + TCL_STATIC); + result = TCL_ERROR; + } + break; + case BDB_GETCONFIG: + result = bdb_GetConfig(interp, objc, objv); + break; + case BDB_HANDLES: + result = bdb_Handles(interp, objc, objv); + break; + case BDB_MSGTYPE: + result = bdb_MsgType(interp, objc, objv); + break; + case BDB_UPGRADE: + result = bdb_DbUpgrade(interp, objc, objv); + break; +#endif + case BDB_VERSION: + _debug_check(); + result = bdb_Version(interp, objc, objv); + break; + case BDB_ENV: + snprintf(newname, sizeof(newname), "env%d", env_id); + ip = _NewInfo(interp, NULL, newname, I_ENV); + if (ip != NULL) { + result = bdb_EnvOpen(interp, objc, objv, ip, &dbenv); + if (result == TCL_OK && dbenv != NULL) { + env_id++; + (void)Tcl_CreateObjCommand(interp, newname, + (Tcl_ObjCmdProc *)env_Cmd, + (ClientData)dbenv, NULL); + /* Use ip->i_name - newname is overwritten */ + res = NewStringObj(newname, strlen(newname)); + _SetInfoData(ip, dbenv); + } else + _DeleteInfo(ip); + } else { + Tcl_SetResult(interp, "Could not set up info", + TCL_STATIC); + result = TCL_ERROR; + } + break; + case BDB_DBREMOVE: + result = bdb_DbRemove(interp, objc, objv); + break; + case BDB_DBRENAME: + result = bdb_DbRename(interp, objc, objv); + break; + case BDB_ENVREMOVE: + result = tcl_EnvRemove(interp, objc, objv); + break; + case BDB_OPEN: + snprintf(newname, sizeof(newname), "db%d", db_id); + ip = _NewInfo(interp, NULL, newname, I_DB); + if (ip != NULL) { + result = bdb_DbOpen(interp, objc, objv, ip, &dbp); + if (result == TCL_OK && dbp != NULL) { + db_id++; + (void)Tcl_CreateObjCommand(interp, newname, + (Tcl_ObjCmdProc *)db_Cmd, + (ClientData)dbp, NULL); + /* Use ip->i_name - newname is overwritten */ + res = NewStringObj(newname, strlen(newname)); + _SetInfoData(ip, dbp); + /* + * If we are a DB_HEAP, we need to finish + * setting up the DB_RECNO db which was + * started in bdb_DbOpen call. + */ + if (dbp->type == DB_HEAP) { + snprintf(newname, + sizeof(newname), "db%d", db_id); + hrip = _NewInfo(interp, + NULL, newname, I_AUX); + if (hrip != NULL) { + db_id++; + hrdbp = ip->hrdbp; + _SetInfoData(hrip, hrdbp); + hrdbp->api_internal = hrip; + hrip->hrdbp = dbp; + if (hrdbp->dbenv->db_errpfx + == NULL) + hrdbp->set_errpfx( + hrdbp, hrip->i_name); + } else { + Tcl_SetResult(interp, + "Could not set up info", + TCL_STATIC); + result = TCL_ERROR; + } + snprintf(newname, + sizeof(newname), "db%d", db_id); + hsip = _NewInfo(interp, + NULL, newname, I_AUX); + if (hsip != NULL) { + db_id++; + hsdbp = ip->hsdbp; + _SetInfoData(hsip, hsdbp); + hsdbp->api_internal = hsip; + hsip->hsdbp = dbp; + if (hsdbp->dbenv->db_errpfx + == NULL) + hsdbp->set_errpfx( + hsdbp, hsip->i_name); + } else { + Tcl_SetResult(interp, + "Could not set up info", + TCL_STATIC); + result = TCL_ERROR; + } + } + } else + _DeleteInfo(ip); + } else { + Tcl_SetResult(interp, "Could not set up info", + TCL_STATIC); + result = TCL_ERROR; + } + break; +#ifdef HAVE_64BIT_TYPES + case BDB_SEQUENCE: + snprintf(newname, sizeof(newname), "seq%d", seq_id); + ip = _NewInfo(interp, NULL, newname, I_SEQ); + if (ip != NULL) { + result = bdb_SeqOpen(interp, objc, objv, ip, &seq); + if (result == TCL_OK && seq != NULL) { + seq_id++; + (void)Tcl_CreateObjCommand(interp, newname, + (Tcl_ObjCmdProc *)seq_Cmd, + (ClientData)seq, NULL); + /* Use ip->i_name - newname is overwritten */ + res = NewStringObj(newname, strlen(newname)); + _SetInfoData(ip, seq); + } else + _DeleteInfo(ip); + } else { + Tcl_SetResult(interp, "Could not set up info", + TCL_STATIC); + result = TCL_ERROR; + } + break; +#endif +#ifdef CONFIG_TEST + case BDB_HCREATEX: + case BDB_HSEARCHX: + case BDB_HDESTROYX: + result = bdb_HCommand(interp, objc, objv); + break; + case BDB_DBMINITX: + case BDB_DBMCLOSEX: + case BDB_FETCHX: + case BDB_STOREX: + case BDB_DELETEX: + case BDB_FIRSTKEYX: + case BDB_NEXTKEYX: + result = bdb_DbmCommand(interp, objc, objv, DBTCL_DBM, NULL); + break; + case BDB_NDBMOPENX: + snprintf(newname, sizeof(newname), "ndbm%d", ndbm_id); + ip = _NewInfo(interp, NULL, newname, I_NDBM); + if (ip != NULL) { + result = bdb_NdbmOpen(interp, objc, objv, &ndbmp); + if (result == TCL_OK) { + ndbm_id++; + (void)Tcl_CreateObjCommand(interp, newname, + (Tcl_ObjCmdProc *)ndbm_Cmd, + (ClientData)ndbmp, NULL); + /* Use ip->i_name - newname is overwritten */ + res = NewStringObj(newname, strlen(newname)); + _SetInfoData(ip, ndbmp); + } else + _DeleteInfo(ip); + } else { + Tcl_SetResult(interp, "Could not set up info", + TCL_STATIC); + result = TCL_ERROR; + } + break; +#endif + case BDB_RANDX: + case BDB_RAND_INTX: + case BDB_SRANDX: + result = bdb_RandCommand(interp, objc, objv); + break; + case BDB_DBGCKX: + _debug_check(); + res = Tcl_NewIntObj(0); + break; + } + /* + * For each different arg call different function to create + * new commands (or if version, get/return it). + */ + if (result == TCL_OK && res != NULL) + Tcl_SetObjResult(interp, res); + return (result); +} + +/* + * bdb_EnvOpen - + * Implements the environment open command. + * There are many, many options to the open command. + * Here is the general flow: + * + * 1. Call db_env_create to create the env handle. + * 2. Parse args tracking options. + * 3. Make any pre-open setup calls necessary. + * 4. Call DB_ENV->open to open the env. + * 5. Return env widget handle to user. + */ +static int +bdb_EnvOpen(interp, objc, objv, ip, dbenvp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DBTCL_INFO *ip; /* Our internal info */ + DB_ENV **dbenvp; /* Environment pointer */ +{ + static const char *envopen[] = { +#ifdef CONFIG_TEST + "-alloc", + "-auto_commit", + "-cdb", + "-cdb_alldb", + "-event", + "-failchk", + "-isalive", + "-lock", + "-lock_conflict", + "-lock_detect", + "-lock_locks", + "-lock_lockers", + "-lock_objects", + "-lock_max_locks", + "-lock_max_lockers", + "-lock_max_objects", + "-lock_partitions", + "-lock_tablesize", + "-lock_timeout", + "-log", + "-log_filemode", + "-log_buffer", + "-log_inmemory", + "-log_max", + "-log_regionmax", + "-log_remove", + "-memory_max", + "-mpool_max_openfd", + "-mpool_max_write", + "-mpool_mmap_size", + "-mpool_mutex_count", + "-mpool_nommap", + "-multiversion", + "-mutex_set_align", + "-mutex_set_incr", + "-mutex_set_init", + "-mutex_set_max", + "-mutex_set_tas_spins", + "-overwrite", + "-pagesize", + "-register", + "-reg_timeout", + "-region_init", + "-rep", + "-rep_client", + "-rep_inmem_files", + "-rep_lease", + "-rep_master", + "-rep_nsites", + "-rep_transport", + "-set_intermediate_dir_mode", + "-snapshot", + "-tablesize", + "-thread", + "-thread_count", + "-time_notgranted", + "-txn_nowait", + "-txn_timeout", + "-txn_timestamp", + "-verbose", + "-wrnosync", + "-zero_log", +#endif + "-add_dir", + "-cachesize", + "-cache_max", + "-create", + "-create_dir", + "-data_dir", + "-encryptaes", + "-encryptany", + "-errfile", + "-errpfx", + "-home", + "-log_dir", + "-mode", + "-msgfile", + "-private", + "-recover", + "-recover_fatal", + "-shm_key", + "-system_mem", + "-tmp_dir", + "-txn", + "-txn_init", + "-txn_max", + "-use_environ", + "-use_environ_root", + NULL + }; + /* + * !!! + * These have to be in the same order as the above, + * which is close to but not quite alphabetical. + */ + enum envopen { +#ifdef CONFIG_TEST + TCL_ENV_ALLOC, + TCL_ENV_AUTO_COMMIT, + TCL_ENV_CDB, + TCL_ENV_CDB_ALLDB, + TCL_ENV_EVENT, + TCL_ENV_FAILCHK, + TCL_ENV_ISALIVE, + TCL_ENV_LOCK, + TCL_ENV_CONFLICT, + TCL_ENV_DETECT, + TCL_ENV_LOCK_LOCKS, + TCL_ENV_LOCK_LOCKERS, + TCL_ENV_LOCK_OBJECTS, + TCL_ENV_LOCK_MAX_LOCKS, + TCL_ENV_LOCK_MAX_LOCKERS, + TCL_ENV_LOCK_MAX_OBJECTS, + TCL_ENV_LOCK_PARTITIONS, + TCL_ENV_LOCK_TABLESIZE, + TCL_ENV_LOCK_TIMEOUT, + TCL_ENV_LOG, + TCL_ENV_LOG_FILEMODE, + TCL_ENV_LOG_BUFFER, + TCL_ENV_LOG_INMEMORY, + TCL_ENV_LOG_MAX, + TCL_ENV_LOG_REGIONMAX, + TCL_ENV_LOG_REMOVE, + TCL_ENV_MEMORY_MAX, + TCL_ENV_MPOOL_MAX_OPENFD, + TCL_ENV_MPOOL_MAX_WRITE, + TCL_ENV_MPOOL_MMAP_SIZE, + TCL_ENV_MUTEXCOUNT, + TCL_ENV_MPOOL_NOMMAP, + TCL_ENV_MULTIVERSION, + TCL_ENV_MUTSETALIGN, + TCL_ENV_MUTSETINCR, + TCL_ENV_MUTSETINIT, + TCL_ENV_MUTSETMAX, + TCL_ENV_MUTSETTAS, + TCL_ENV_OVERWRITE, + TCL_ENV_PAGESIZE, + TCL_ENV_REGISTER, + TCL_ENV_REG_TIMEOUT, + TCL_ENV_REGION_INIT, + TCL_ENV_REP, + TCL_ENV_REP_CLIENT, + TCL_ENV_REP_INMEM_FILES, + TCL_ENV_REP_LEASE, + TCL_ENV_REP_MASTER, + TCL_ENV_REP_NSITES, + TCL_ENV_REP_TRANSPORT, + TCL_ENV_SET_INTERMEDIATE_DIR, + TCL_ENV_SNAPSHOT, + TCL_ENV_TABLESIZE, + TCL_ENV_THREAD, + TCL_ENV_THREAD_COUNT, + TCL_ENV_TIME_NOTGRANTED, + TCL_ENV_TXN_NOWAIT, + TCL_ENV_TXN_TIMEOUT, + TCL_ENV_TXN_TIME, + TCL_ENV_VERBOSE, + TCL_ENV_WRNOSYNC, + TCL_ENV_ZEROLOG, +#endif + TCL_ENV_ADD_DIR, + TCL_ENV_CACHESIZE, + TCL_ENV_CACHE_MAX, + TCL_ENV_CREATE, + TCL_ENV_CREATE_DIR, + TCL_ENV_DATA_DIR, + TCL_ENV_ENCRYPT_AES, + TCL_ENV_ENCRYPT_ANY, + TCL_ENV_ERRFILE, + TCL_ENV_ERRPFX, + TCL_ENV_HOME, + TCL_ENV_LOG_DIR, + TCL_ENV_MODE, + TCL_ENV_MSGFILE, + TCL_ENV_PRIVATE, + TCL_ENV_RECOVER, + TCL_ENV_RECOVER_FATAL, + TCL_ENV_SHM_KEY, + TCL_ENV_SYSTEM_MEM, + TCL_ENV_TMP_DIR, + TCL_ENV_TXN, + TCL_ENV_TXN_INIT, + TCL_ENV_TXN_MAX, + TCL_ENV_USE_ENVIRON, + TCL_ENV_USE_ENVIRON_ROOT + }; + DB_ENV *dbenv; + Tcl_Obj **myobjv; + u_int32_t cr_flags, gbytes, bytes, logbufset, logmaxset; + u_int32_t open_flags, rep_flags, set_flags, uintarg; + int i, mode, myobjc, ncaches, optindex, result, ret; + long shm; + char *arg, *home, *passwd; +#ifdef CONFIG_TEST + Tcl_Obj **myobjv1; + time_t timestamp; + long v; + u_int32_t detect, time_flag; + u_int8_t *conflicts; + int intarg, intarg2, j, nmodes, temp; +#endif + + result = TCL_OK; + mode = 0; + rep_flags = set_flags = cr_flags = 0; + home = NULL; + + /* + * XXX + * If/when our Tcl interface becomes thread-safe, we should enable + * DB_THREAD here in all cases. For now, we turn it on later in this + * function, and only when we're in testing and we specify the + * -thread flag, so that we can exercise MUTEX_THREAD_LOCK cases. + * + * In order to become truly thread-safe, we need to look at making sure + * DBTCL_INFO structs are safe to share across threads (they're not + * mutex-protected) before we declare the Tcl interface thread-safe. + * Meanwhile, there's no strong reason to enable DB_THREAD when not + * testing. + */ + open_flags = 0; + logmaxset = logbufset = 0; + + if (objc <= 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?args?"); + return (TCL_ERROR); + } + + i = 2; + if (result != TCL_OK) + return (TCL_ERROR); + if ((ret = db_env_create(&dbenv, cr_flags)) != 0) + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db_env_create")); + *dbenvp = dbenv; + + /* + * From here on we must 'goto error' in order to clean up the + * dbenv from db_env_create. + */ + dbenv->set_errpfx(dbenv, ip->i_name); + dbenv->set_errcall(dbenv, _ErrorFunc); + + /* Hang our info pointer on the dbenv handle, so we can do callbacks. */ + dbenv->app_private = ip; + + /* + * Get the command name index from the object based on the bdbcmds + * defined above. + */ + i = 2; + while (i < objc) { + Tcl_ResetResult(interp); + if (Tcl_GetIndexFromObj(interp, objv[i], envopen, "option", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum envopen)optindex) { +#ifdef CONFIG_TEST + case TCL_ENV_ALLOC: + /* + * Use a Tcl-local alloc and free function so that + * we're sure to test whether we use umalloc/ufree in + * the right places. + */ + (void)dbenv->set_alloc(dbenv, + tcl_db_malloc, tcl_db_realloc, tcl_db_free); + break; + case TCL_ENV_AUTO_COMMIT: + FLD_SET(set_flags, DB_AUTO_COMMIT); + break; + case TCL_ENV_CDB: + FLD_SET(open_flags, DB_INIT_CDB | DB_INIT_MPOOL); + break; + case TCL_ENV_CDB_ALLDB: + FLD_SET(set_flags, DB_CDB_ALLDB); + break; + case TCL_ENV_EVENT: + if ((ret = __os_calloc(dbenv->env, 1, + sizeof(DBTCL_EVENT_INFO), + &ip->i_event_info)) == 0) { + _debug_check(); + ret = dbenv->set_event_notify(dbenv, + _EventFunc); + } + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_event_notify"); + break; + case TCL_ENV_FAILCHK: + FLD_SET(open_flags, DB_FAILCHK); + break; + case TCL_ENV_ISALIVE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-isalive aliveproc"); + result = TCL_ERROR; + break; + } + + ip->i_isalive = objv[i++]; + Tcl_IncrRefCount(ip->i_isalive); + _debug_check(); + /* Choose an arbitrary thread count, for testing. */ + ret = dbenv->get_thread_count(dbenv, &uintarg); + if ((ret == 0) && (uintarg == 0)) + ret = dbenv->set_thread_count(dbenv, 5); + if (ret == 0) + ret = dbenv->set_isalive(dbenv, tcl_isalive); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_isalive"); + break; + case TCL_ENV_LOCK: + FLD_SET(open_flags, DB_INIT_LOCK | DB_INIT_MPOOL); + break; + case TCL_ENV_CONFLICT: + /* + * Get conflict list. List is: + * {nmodes {matrix}} + * + * Where matrix must be nmodes*nmodes big. + * Set up conflicts array to pass. + */ + result = Tcl_ListObjGetElements(interp, objv[i], + &myobjc, &myobjv); + if (result == TCL_OK) + i++; + else + break; + if (myobjc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-lock_conflict {nmodes {matrix}}?"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, myobjv[0], &nmodes); + if (result != TCL_OK) + break; + result = Tcl_ListObjGetElements(interp, myobjv[1], + &myobjc, &myobjv1); + if (myobjc != (nmodes * nmodes)) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-lock_conflict {nmodes {matrix}}?"); + result = TCL_ERROR; + break; + } + + ret = __os_malloc(dbenv->env, sizeof(u_int8_t) * + (size_t)nmodes * (size_t)nmodes, &conflicts); + if (ret != 0) { + result = TCL_ERROR; + break; + } + for (j = 0; j < myobjc; j++) { + result = Tcl_GetIntFromObj(interp, myobjv1[j], + &temp); + conflicts[j] = temp; + if (result != TCL_OK) { + __os_free(NULL, conflicts); + break; + } + } + _debug_check(); + ret = dbenv->set_lk_conflicts(dbenv, + (u_int8_t *)conflicts, nmodes); + __os_free(NULL, conflicts); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_lk_conflicts"); + break; + case TCL_ENV_DETECT: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-lock_detect policy?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + if (strcmp(arg, "default") == 0) + detect = DB_LOCK_DEFAULT; + else if (strcmp(arg, "expire") == 0) + detect = DB_LOCK_EXPIRE; + else if (strcmp(arg, "maxlocks") == 0) + detect = DB_LOCK_MAXLOCKS; + else if (strcmp(arg, "maxwrites") == 0) + detect = DB_LOCK_MAXWRITE; + else if (strcmp(arg, "minlocks") == 0) + detect = DB_LOCK_MINLOCKS; + else if (strcmp(arg, "minwrites") == 0) + detect = DB_LOCK_MINWRITE; + else if (strcmp(arg, "oldest") == 0) + detect = DB_LOCK_OLDEST; + else if (strcmp(arg, "youngest") == 0) + detect = DB_LOCK_YOUNGEST; + else if (strcmp(arg, "random") == 0) + detect = DB_LOCK_RANDOM; + else { + Tcl_AddErrorInfo(interp, + "lock_detect: illegal policy"); + result = TCL_ERROR; + break; + } + _debug_check(); + ret = dbenv->set_lk_detect(dbenv, detect); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "lock_detect"); + break; + case TCL_ENV_LOCK_LOCKS: + case TCL_ENV_LOCK_LOCKERS: + case TCL_ENV_LOCK_OBJECTS: + case TCL_ENV_LOCK_MAX_LOCKS: + case TCL_ENV_LOCK_MAX_LOCKERS: + case TCL_ENV_LOCK_MAX_OBJECTS: + case TCL_ENV_LOCK_PARTITIONS: + case TCL_ENV_LOCK_TABLESIZE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-lock_max max?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + switch ((enum envopen)optindex) { + case TCL_ENV_LOCK_LOCKS: + ret = dbenv->set_memory_init(dbenv, + DB_MEM_LOCK, uintarg); + break; + case TCL_ENV_LOCK_LOCKERS: + ret = dbenv->set_memory_init(dbenv, + DB_MEM_LOCKER, uintarg); + break; + case TCL_ENV_LOCK_OBJECTS: + ret = dbenv->set_memory_init(dbenv, + DB_MEM_LOCKOBJECT, uintarg); + break; + case TCL_ENV_LOCK_MAX_LOCKS: + ret = dbenv->set_lk_max_locks(dbenv, + uintarg); + break; + case TCL_ENV_LOCK_MAX_LOCKERS: + ret = dbenv->set_lk_max_lockers(dbenv, + uintarg); + break; + case TCL_ENV_LOCK_MAX_OBJECTS: + ret = dbenv->set_lk_max_objects(dbenv, + uintarg); + break; + case TCL_ENV_LOCK_PARTITIONS: + ret = dbenv->set_lk_partitions(dbenv, + uintarg); + break; + case TCL_ENV_LOCK_TABLESIZE: + ret = dbenv->set_lk_tablesize(dbenv, + uintarg); + break; + default: + break; + } + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "lock_max"); + } + break; + case TCL_ENV_MUTSETALIGN: + case TCL_ENV_MUTSETINCR: + case TCL_ENV_MUTSETINIT: + case TCL_ENV_MUTSETMAX: + case TCL_ENV_MUTSETTAS: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-mutex_set val"); + result = TCL_ERROR; + break; + } + intarg = 0; + switch ((enum envopen)optindex) { + case TCL_ENV_MUTSETALIGN: + intarg = DBTCL_MUT_ALIGN; + break; + case TCL_ENV_MUTSETINCR: + intarg = DBTCL_MUT_INCR; + break; + case TCL_ENV_MUTSETINIT: + intarg = DBTCL_MUT_INIT; + break; + case TCL_ENV_MUTSETMAX: + intarg = DBTCL_MUT_MAX; + break; + case TCL_ENV_MUTSETTAS: + intarg = DBTCL_MUT_TAS; + break; + default: + break; + } + result = tcl_MutSet(interp, objv[i++], dbenv, intarg); + break; + case TCL_ENV_TXN_NOWAIT: + FLD_SET(set_flags, DB_TXN_NOWAIT); + break; + case TCL_ENV_TXN_TIME: + case TCL_ENV_TXN_TIMEOUT: + case TCL_ENV_LOCK_TIMEOUT: + case TCL_ENV_REG_TIMEOUT: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-xxx_timeout time?"); + result = TCL_ERROR; + break; + } + + if ((result = Tcl_GetLongFromObj( + interp, objv[i++], &v)) != TCL_OK) + break; + timestamp = (time_t)v; + + _debug_check(); + if ((enum envopen)optindex == TCL_ENV_TXN_TIME) + ret = + dbenv->set_tx_timestamp(dbenv, ×tamp); + else { + if ((enum envopen)optindex == + TCL_ENV_LOCK_TIMEOUT) + time_flag = DB_SET_LOCK_TIMEOUT; + else if ((enum envopen)optindex == + TCL_ENV_REG_TIMEOUT) + time_flag = DB_SET_REG_TIMEOUT; + else + time_flag = DB_SET_TXN_TIMEOUT; + + ret = dbenv->set_timeout(dbenv, + (db_timeout_t)timestamp, time_flag); + } + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "txn_timestamp"); + break; + case TCL_ENV_LOG: + FLD_SET(open_flags, DB_INIT_LOG | DB_INIT_MPOOL); + break; + case TCL_ENV_LOG_BUFFER: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-log_buffer size?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv->set_lg_bsize(dbenv, uintarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "log_bsize"); + logbufset = 1; + if (logmaxset) { + _debug_check(); + ret = dbenv->set_lg_max(dbenv, + logmaxset); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "log_max"); + logmaxset = 0; + logbufset = 0; + } + } + break; + case TCL_ENV_LOG_FILEMODE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-log_filemode mode?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv->set_lg_filemode(dbenv, + (int)uintarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "log_filemode"); + } + break; + case TCL_ENV_LOG_INMEMORY: + ret = + dbenv->log_set_config(dbenv, DB_LOG_IN_MEMORY, 1); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "log_inmemory"); + break; + case TCL_ENV_LOG_MAX: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-log_max max?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK && logbufset) { + _debug_check(); + ret = dbenv->set_lg_max(dbenv, uintarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "log_max"); + logbufset = 0; + } else + logmaxset = uintarg; + break; + case TCL_ENV_LOG_REGIONMAX: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-log_regionmax size?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv->set_lg_regionmax(dbenv, uintarg); + result = + _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "log_regionmax"); + } + break; + case TCL_ENV_LOG_REMOVE: + ret = + dbenv->log_set_config(dbenv, DB_LOG_AUTO_REMOVE, 1); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "log_remove"); + break; + case TCL_ENV_MEMORY_MAX: + result = Tcl_ListObjGetElements(interp, objv[i], + &myobjc, &myobjv); + if (result == TCL_OK) + i++; + else + break; + if (myobjc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-memory_max {gbytes bytes}?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, myobjv[0], &gbytes); + if (result != TCL_OK) + break; + result = _GetUInt32(interp, myobjv[1], &bytes); + if (result != TCL_OK) + break; + _debug_check(); + ret = dbenv->set_memory_max(dbenv, gbytes, bytes); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "memory_max"); + break; + case TCL_ENV_MPOOL_MAX_OPENFD: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-mpool_max_openfd fd_count?"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, objv[i++], &intarg); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv->set_mp_max_openfd(dbenv, intarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "mpool_max_openfd"); + } + break; + case TCL_ENV_MPOOL_MAX_WRITE: + result = Tcl_ListObjGetElements(interp, objv[i], + &myobjc, &myobjv); + if (result == TCL_OK) + i++; + else + break; + if (myobjc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-mpool_max_write {nwrite nsleep}?"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, myobjv[0], &intarg); + if (result != TCL_OK) + break; + result = Tcl_GetIntFromObj(interp, myobjv[1], &intarg2); + if (result != TCL_OK) + break; + _debug_check(); + ret = dbenv->set_mp_max_write( + dbenv, intarg, (db_timeout_t)intarg2); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_mp_max_write"); + break; + case TCL_ENV_MPOOL_MMAP_SIZE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-mpool_mmap_size size?"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, objv[i++], &intarg); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv->set_mp_mmapsize(dbenv, + (size_t)intarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "mpool_mmap_size"); + } + break; + case TCL_ENV_MPOOL_NOMMAP: + FLD_SET(set_flags, DB_NOMMAP); + break; + case TCL_ENV_MULTIVERSION: + FLD_SET(set_flags, DB_MULTIVERSION); + break; + case TCL_ENV_OVERWRITE: + FLD_SET(set_flags, DB_OVERWRITE); + break; + case TCL_ENV_MUTEXCOUNT: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-mp_mutex_count count?"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, objv[i++], &intarg); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv->set_mp_mtxcount(dbenv, + (u_int32_t)intarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "mutex count"); + } + break; + case TCL_ENV_PAGESIZE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-pagesize size?"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, objv[i++], &intarg); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv->set_mp_pagesize(dbenv, + (u_int32_t)intarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "pagesize"); + } + break; + case TCL_ENV_TABLESIZE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-tablesize size?"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, objv[i++], &intarg); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv->set_mp_tablesize(dbenv, + (u_int32_t)intarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "tablesize"); + } + break; + case TCL_ENV_REGISTER: + FLD_SET(open_flags, DB_REGISTER); + break; + case TCL_ENV_REGION_INIT: + _debug_check(); + ret = dbenv->set_flags(dbenv, DB_REGION_INIT, 1); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "region_init"); + break; + case TCL_ENV_SET_INTERMEDIATE_DIR: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-set_intermediate_dir_mode mode?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + _debug_check(); + ret = dbenv->set_intermediate_dir_mode(dbenv, arg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_intermediate_dir_mode"); + break; + case TCL_ENV_REP: + FLD_SET(open_flags, DB_INIT_REP); + break; + case TCL_ENV_REP_CLIENT: + rep_flags = DB_REP_CLIENT; + FLD_SET(open_flags, DB_INIT_REP); + break; + case TCL_ENV_REP_MASTER: + rep_flags = DB_REP_MASTER; + FLD_SET(open_flags, DB_INIT_REP); + break; + case TCL_ENV_REP_NSITES: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-rep_nsites nsites?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv->rep_set_nsites(dbenv, uintarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "rep_set_nsites"); + } + if (result == TCL_OK) + FLD_SET(open_flags, DB_INIT_REP); + break; + case TCL_ENV_REP_INMEM_FILES: + result = tcl_RepInmemFiles(interp,dbenv); + if (result == TCL_OK) + FLD_SET(open_flags, DB_INIT_REP); + break; + case TCL_ENV_REP_LEASE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-rep_lease {timeout clockskew}"); + result = TCL_ERROR; + break; + } + result = Tcl_ListObjGetElements(interp, objv[i], + &myobjc, &myobjv); + if (result == TCL_OK) + i++; + else + break; + result = tcl_RepLease(interp, myobjc, myobjv, dbenv); + if (result == TCL_OK) + FLD_SET(open_flags, DB_INIT_REP); + break; + case TCL_ENV_REP_TRANSPORT: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-rep_transport {envid sendproc}"); + result = TCL_ERROR; + break; + } + result = Tcl_ListObjGetElements(interp, objv[i], + &myobjc, &myobjv); + if (result == TCL_OK) + i++; + else + break; + result = tcl_RepTransport( + interp, myobjc, myobjv, dbenv, ip); + if (result == TCL_OK) + FLD_SET(open_flags, DB_INIT_REP); + break; + case TCL_ENV_SNAPSHOT: + FLD_SET(set_flags, DB_TXN_SNAPSHOT); + break; + case TCL_ENV_THREAD: + /* Enable DB_THREAD when specified in testing. */ + FLD_SET(open_flags, DB_THREAD); + break; + case TCL_ENV_THREAD_COUNT: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-thread_count count?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv->set_thread_count(dbenv, uintarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_thread_count"); + } + break; + case TCL_ENV_TIME_NOTGRANTED: + FLD_SET(set_flags, DB_TIME_NOTGRANTED); + break; + case TCL_ENV_VERBOSE: + result = Tcl_ListObjGetElements(interp, objv[i], + &myobjc, &myobjv); + if (result == TCL_OK) + i++; + else + break; + if (myobjc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-verbose {which on|off}?"); + result = TCL_ERROR; + break; + } + result = tcl_EnvVerbose( + interp, dbenv, myobjv[0], myobjv[1]); + break; + case TCL_ENV_WRNOSYNC: + FLD_SET(set_flags, DB_TXN_WRITE_NOSYNC); + break; + case TCL_ENV_ZEROLOG: + if ((ret = + dbenv->log_set_config(dbenv, DB_LOG_ZERO, 1)) != 0) + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_log_config"); + break; +#endif + case TCL_ENV_TXN: + FLD_SET(open_flags, DB_INIT_LOCK | + DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN); + /* Make sure we have an arg to check against! */ + while (i < objc) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (strcmp(arg, "nosync") == 0) { + FLD_SET(set_flags, DB_TXN_NOSYNC); + i++; + } else if (strcmp(arg, "snapshot") == 0) { + FLD_SET(set_flags, DB_TXN_SNAPSHOT); + i++; + } else if (strcmp(arg, "wrnosync") == 0) { + FLD_SET(set_flags, DB_TXN_WRITE_NOSYNC); + i++; + } else + break; + } + break; + case TCL_ENV_CREATE: + FLD_SET(open_flags, DB_CREATE | DB_INIT_MPOOL); + break; + case TCL_ENV_ENCRYPT_AES: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-encryptaes passwd?"); + result = TCL_ERROR; + break; + } + passwd = Tcl_GetStringFromObj(objv[i++], NULL); + _debug_check(); + ret = dbenv->set_encrypt(dbenv, passwd, DB_ENCRYPT_AES); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_encrypt"); + break; + case TCL_ENV_ENCRYPT_ANY: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-encryptany passwd?"); + result = TCL_ERROR; + break; + } + passwd = Tcl_GetStringFromObj(objv[i++], NULL); + _debug_check(); + ret = dbenv->set_encrypt(dbenv, passwd, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_encrypt"); + break; + case TCL_ENV_HOME: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-home dir?"); + result = TCL_ERROR; + break; + } + home = Tcl_GetStringFromObj(objv[i++], NULL); + break; + case TCL_ENV_MODE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-mode mode?"); + result = TCL_ERROR; + break; + } + /* + * Don't need to check result here because + * if TCL_ERROR, the error message is already + * set up, and we'll bail out below. If ok, + * the mode is set and we go on. + */ + result = Tcl_GetIntFromObj(interp, objv[i++], &mode); + break; + case TCL_ENV_PRIVATE: + FLD_SET(open_flags, DB_PRIVATE | DB_INIT_MPOOL); + break; + case TCL_ENV_RECOVER: + FLD_SET(open_flags, DB_RECOVER); + break; + case TCL_ENV_RECOVER_FATAL: + FLD_SET(open_flags, DB_RECOVER_FATAL); + break; + case TCL_ENV_SYSTEM_MEM: + FLD_SET(open_flags, DB_SYSTEM_MEM); + break; + case TCL_ENV_USE_ENVIRON_ROOT: + FLD_SET(open_flags, DB_USE_ENVIRON_ROOT); + break; + case TCL_ENV_USE_ENVIRON: + FLD_SET(open_flags, DB_USE_ENVIRON); + break; + case TCL_ENV_CACHESIZE: + result = Tcl_ListObjGetElements(interp, objv[i], + &myobjc, &myobjv); + if (result == TCL_OK) + i++; + else + break; + if (myobjc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-cachesize {gbytes bytes ncaches}?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, myobjv[0], &gbytes); + if (result != TCL_OK) + break; + result = _GetUInt32(interp, myobjv[1], &bytes); + if (result != TCL_OK) + break; + result = Tcl_GetIntFromObj(interp, myobjv[2], &ncaches); + if (result != TCL_OK) + break; + _debug_check(); + ret = dbenv->set_cachesize(dbenv, gbytes, bytes, + ncaches); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_cachesize"); + break; + case TCL_ENV_CACHE_MAX: + result = Tcl_ListObjGetElements(interp, objv[i], + &myobjc, &myobjv); + if (result == TCL_OK) + i++; + else + break; + if (myobjc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-cache_max {gbytes bytes}?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, myobjv[0], &gbytes); + if (result != TCL_OK) + break; + result = _GetUInt32(interp, myobjv[1], &bytes); + if (result != TCL_OK) + break; + _debug_check(); + ret = dbenv->set_cache_max(dbenv, gbytes, bytes); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_cache_max"); + break; + case TCL_ENV_SHM_KEY: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-shm_key key?"); + result = TCL_ERROR; + break; + } + result = Tcl_GetLongFromObj(interp, objv[i++], &shm); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv->set_shm_key(dbenv, shm); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "shm_key"); + } + break; + case TCL_ENV_TXN_INIT: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-txn_init init?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv->set_memory_init(dbenv, + DB_MEM_TRANSACTION, uintarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "txn_init"); + } + break; + case TCL_ENV_TXN_MAX: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-txn_max max?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv->set_tx_max(dbenv, uintarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "txn_max"); + } + break; + case TCL_ENV_ERRFILE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-errfile file"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + tcl_EnvSetErrfile(interp, dbenv, ip, arg); + break; + case TCL_ENV_MSGFILE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-msgfile file"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + tcl_EnvSetMsgfile(interp, dbenv, ip, arg); + break; + case TCL_ENV_ERRPFX: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-errpfx prefix"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + _debug_check(); + result = tcl_EnvSetErrpfx(interp, dbenv, ip, arg); + break; + case TCL_ENV_DATA_DIR: + case TCL_ENV_ADD_DIR: + case TCL_ENV_CREATE_DIR: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-xxx_dir dir"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + _debug_check(); + switch ((enum envopen)optindex) { + case TCL_ENV_DATA_DIR: + ret = dbenv->set_data_dir(dbenv, arg); + break; + case TCL_ENV_ADD_DIR: + ret = dbenv->add_data_dir(dbenv, arg); + break; + case TCL_ENV_CREATE_DIR: + ret = dbenv->set_create_dir(dbenv, arg); + break; + default: + break; + } + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "xxx_dir"); + break; + case TCL_ENV_LOG_DIR: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-log_dir dir"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + _debug_check(); + ret = dbenv->set_lg_dir(dbenv, arg); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_lg_dir"); + break; + case TCL_ENV_TMP_DIR: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-tmp_dir dir"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + _debug_check(); + ret = dbenv->set_tmp_dir(dbenv, arg); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_tmp_dir"); + break; + } + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + goto error; + } + + /* + * We have to check this here. We want to set the log buffer + * size first, if it is specified. So if the user did so, + * then we took care of it above. But, if we get out here and + * logmaxset is non-zero, then they set the log_max without + * resetting the log buffer size, so we now have to do the + * call to set_lg_max, since we didn't do it above. + */ + if (logmaxset) { + _debug_check(); + ret = dbenv->set_lg_max(dbenv, (u_int32_t)logmaxset); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "log_max"); + } + + if (result != TCL_OK) + goto error; + + if (set_flags) { + ret = dbenv->set_flags(dbenv, set_flags, 1); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_flags"); + if (result == TCL_ERROR) + goto error; + /* + * If we are successful, clear the result so that the + * return from set_flags isn't part of the result. + */ + Tcl_ResetResult(interp); + } + /* + * When we get here, we have already parsed all of our args + * and made all our calls to set up the environment. Everything + * is okay so far, no errors, if we get here. + * + * Now open the environment. + */ + _debug_check(); + ret = dbenv->open(dbenv, home, open_flags, mode); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "dbenv open"); + + if (ip->i_event_info != NULL && result == TCL_OK) { + _debug_check(); + ret = __mutex_alloc(dbenv->env, MTX_TCL_EVENTS, + DB_MUTEX_PROCESS_ONLY, &ip->i_mutex); + result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "__mutex_alloc"); + } + + if (rep_flags != 0 && result == TCL_OK) { + _debug_check(); + ret = dbenv->rep_start(dbenv, NULL, rep_flags); + result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "rep_start"); + } + +error: if (result == TCL_ERROR) { + if (ip->i_err && ip->i_err != stdout && ip->i_err != stderr) { + (void)fclose(ip->i_err); + ip->i_err = NULL; + } + if (ip->i_msg && ip->i_msg != stdout && ip->i_msg != stderr) { + (void)fclose(ip->i_msg); + ip->i_msg = NULL; + } + (void)__mutex_free(dbenv->env, &ip->i_mutex); + (void)dbenv->close(dbenv, 0); + } + return (result); +} + +/* + * bdb_DbOpen -- + * Implements the "db_create/db_open" command. + * There are many, many options to the open command. + * Here is the general flow: + * + * 0. Preparse args to determine if we have -env. + * 1. Call db_create to create the db handle. + * 2. Parse args tracking options. + * 3. Make any pre-open setup calls necessary. + * 4. Call DB->open to open the database. + * 5. Return db widget handle to user. + */ +static int +bdb_DbOpen(interp, objc, objv, ip, dbp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DBTCL_INFO *ip; /* Our internal info */ + DB **dbp; /* DB handle */ +{ + static const char *bdbenvopen[] = { + "-env", NULL + }; + enum bdbenvopen { + TCL_DB_ENV0 + }; + static const char *bdbopen[] = { +#ifdef CONFIG_TEST + "-btcompare", + "-dupcompare", + "-hashcompare", + "-hashproc", + "-lorder", + "-minkey", + "-nommap", + "-notdurable", + "-partition", + "-partition_dirs", + "-partition_callback", + "-read_uncommitted", + "-revsplitoff", + "-test", + "-thread", +#endif + "-auto_commit", + "-btree", + "-cachesize", + "-chksum", + "-compress", + "-create", + "-create_dir", + "-delim", + "-dup", + "-dupsort", + "-encrypt", + "-encryptaes", + "-encryptany", + "-env", + "-errfile", + "-errpfx", + "-excl", + "-extent", + "-ffactor", + "-hash", + "-heap", + "-inorder", + "-len", + "-maxsize", + "-mode", + "-msgfile", + "-multiversion", + "-nelem", + "-pad", + "-pagesize", + "-queue", + "-rdonly", + "-recno", + "-recnum", + "-renumber", + "-snapshot", + "-source", + "-truncate", + "-txn", + "-unknown", + "--", + NULL + }; + enum bdbopen { +#ifdef CONFIG_TEST + TCL_DB_BTCOMPARE, + TCL_DB_DUPCOMPARE, + TCL_DB_HASHCOMPARE, + TCL_DB_HASHPROC, + TCL_DB_LORDER, + TCL_DB_MINKEY, + TCL_DB_NOMMAP, + TCL_DB_NOTDURABLE, + TCL_DB_PARTITION, + TCL_DB_PART_DIRS, + TCL_DB_PART_CALLBACK, + TCL_DB_READ_UNCOMMITTED, + TCL_DB_REVSPLIT, + TCL_DB_TEST, + TCL_DB_THREAD, +#endif + TCL_DB_AUTO_COMMIT, + TCL_DB_BTREE, + TCL_DB_CACHESIZE, + TCL_DB_CHKSUM, + TCL_DB_COMPRESS, + TCL_DB_CREATE, + TCL_DB_CREATE_DIR, + TCL_DB_DELIM, + TCL_DB_DUP, + TCL_DB_DUPSORT, + TCL_DB_ENCRYPT, + TCL_DB_ENCRYPT_AES, + TCL_DB_ENCRYPT_ANY, + TCL_DB_ENV, + TCL_DB_ERRFILE, + TCL_DB_ERRPFX, + TCL_DB_EXCL, + TCL_DB_EXTENT, + TCL_DB_FFACTOR, + TCL_DB_HASH, + TCL_DB_HEAP, + TCL_DB_INORDER, + TCL_DB_LEN, + TCL_DB_MAXSIZE, + TCL_DB_MODE, + TCL_DB_MSGFILE, + TCL_DB_MULTIVERSION, + TCL_DB_NELEM, + TCL_DB_PAD, + TCL_DB_PAGESIZE, + TCL_DB_QUEUE, + TCL_DB_RDONLY, + TCL_DB_RECNO, + TCL_DB_RECNUM, + TCL_DB_RENUMBER, + TCL_DB_SNAPSHOT, + TCL_DB_SOURCE, + TCL_DB_TRUNCATE, + TCL_DB_TXN, + TCL_DB_UNKNOWN, + TCL_DB_ENDARG + }; + DBT *keys; + DBTCL_INFO *envip, *errip; + DBTYPE type; + DB_ENV *dbenv; + DB_TXN *txn; + DB *hrdbp, *hsdbp; + ENV *env; + + Tcl_Obj **myobjv; + u_int32_t gbytes, bytes, open_flags, set_flags, uintarg; + int endarg, encenble, i, intarg, mode, myobjc, ncaches; + int optindex, result, ret, set_err, set_msg, set_pfx, subdblen; + u_char *subdbtmp; + char *arg, *db, *dbr, *passwd, *subdb, *subdbr, msg[MSG_SIZE]; + size_t nlen; + + type = DB_UNKNOWN; + endarg = encenble = mode = nlen = set_err = set_msg = set_flags = 0; + set_pfx = 0; + result = TCL_OK; + subdbtmp = NULL; + keys = NULL; + db = dbr = passwd = subdb = subdbr = NULL; + hrdbp = hsdbp = NULL; + + /* + * XXX + * If/when our Tcl interface becomes thread-safe, we should enable + * DB_THREAD here in all cases. For now, we turn it on later in this + * function, and only when we're in testing and we specify the + * -thread flag, so that we can exercise MUTEX_THREAD_LOCK cases. + * + * In order to become truly thread-safe, we need to look at making sure + * DBTCL_INFO structs are safe to share across threads (they're not + * mutex-protected) before we declare the Tcl interface thread-safe. + * Meanwhile, there's no strong reason to enable DB_THREAD when not + * testing. + */ + open_flags = 0; + + dbenv = NULL; + txn = NULL; + env = NULL; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?args?"); + return (TCL_ERROR); + } + + /* + * We must first parse for the environment flag, since that + * is needed for db_create. Then create the db handle. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i++], bdbenvopen, + "option", TCL_EXACT, &optindex) != TCL_OK) { + /* + * Reset the result so we don't get + * an errant error message if there is another error. + */ + Tcl_ResetResult(interp); + continue; + } + switch ((enum bdbenvopen)optindex) { + case TCL_DB_ENV0: + arg = Tcl_GetStringFromObj(objv[i], NULL); + dbenv = NAME_TO_ENV(arg); + if (dbenv == NULL) { + Tcl_SetResult(interp, + "db open: illegal environment", TCL_STATIC); + return (TCL_ERROR); + } + } + break; + } + + /* + * Create the db handle before parsing the args + * since we'll be modifying the database options as we parse. + */ + ret = db_create(dbp, dbenv, 0); + if (ret) + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db_create")); + + /* Hang our info pointer on the DB handle, so we can do callbacks. */ + (*dbp)->api_internal = ip; + + /* + * XXX + * Remove restriction if error handling not tied to env. + * + * The DB->set_err* functions overwrite the environment. So, if + * we are using an env, don't overwrite it; if not using an env, + * then configure error handling. + */ + if (dbenv == NULL) { + env = NULL; + (*dbp)->set_errpfx((*dbp), ip->i_name); + (*dbp)->set_errcall((*dbp), _ErrorFunc); + } else + env = dbenv->env; + + /* + * If we are using an env, we keep track of err info in the env's ip. + * Otherwise use the DB's ip. + */ + envip = _PtrToInfo(dbenv); /* XXX */ + if (envip) + errip = envip; + else + errip = ip; + + /* + * Get the option name index from the object based on the args + * defined above. + */ + i = 2; + while (i < objc) { + Tcl_ResetResult(interp); + if (Tcl_GetIndexFromObj(interp, objv[i], bdbopen, "option", + TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') { + result = IS_HELP(objv[i]); + goto error; + } else + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum bdbopen)optindex) { +#ifdef CONFIG_TEST + case TCL_DB_BTCOMPARE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-btcompare compareproc"); + result = TCL_ERROR; + break; + } + + /* + * Store the object containing the procedure name. + * We don't need to crack it out now--we'll want + * to bundle it up to pass into Tcl_EvalObjv anyway. + * Tcl's object refcounting will--I hope--take care + * of the memory management here. + */ + ip->i_compare = objv[i++]; + Tcl_IncrRefCount(ip->i_compare); + _debug_check(); + ret = (*dbp)->set_bt_compare(*dbp, tcl_bt_compare); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_bt_compare"); + break; + case TCL_DB_DUPCOMPARE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-dupcompare compareproc"); + result = TCL_ERROR; + break; + } + + /* + * Store the object containing the procedure name. + * See TCL_DB_BTCOMPARE. + */ + ip->i_dupcompare = objv[i++]; + Tcl_IncrRefCount(ip->i_dupcompare); + _debug_check(); + ret = (*dbp)->set_dup_compare(*dbp, tcl_dup_compare); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_dup_compare"); + break; + case TCL_DB_HASHCOMPARE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-hashcompare compareproc"); + result = TCL_ERROR; + break; + } + + /* + * Store the object containing the procedure name. + * We don't need to crack it out now--we'll want + * to bundle it up to pass into Tcl_EvalObjv anyway. + * Tcl's object refcounting will--I hope--take care + * of the memory management here. + */ + ip->i_compare = objv[i++]; + Tcl_IncrRefCount(ip->i_compare); + _debug_check(); + ret = (*dbp)->set_h_compare(*dbp, tcl_bt_compare); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_h_compare"); + break; + case TCL_DB_HASHPROC: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-hashproc hashproc"); + result = TCL_ERROR; + break; + } + + /* + * Store the object containing the procedure name. + * See TCL_DB_BTCOMPARE. + */ + ip->i_hashproc = objv[i++]; + Tcl_IncrRefCount(ip->i_hashproc); + _debug_check(); + ret = (*dbp)->set_h_hash(*dbp, tcl_h_hash); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_h_hash"); + break; + case TCL_DB_LORDER: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-lorder 1234|4321"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, objv[i++], &intarg); + if (result == TCL_OK) { + _debug_check(); + ret = (*dbp)->set_lorder(*dbp, intarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_lorder"); + } + break; + case TCL_DB_MINKEY: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-minkey minkey"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = (*dbp)->set_bt_minkey(*dbp, uintarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_bt_minkey"); + } + break; + case TCL_DB_NOMMAP: + open_flags |= DB_NOMMAP; + break; + case TCL_DB_NOTDURABLE: + set_flags |= DB_TXN_NOT_DURABLE; + break; + case TCL_DB_PART_CALLBACK: + if (i + 1 >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-partition_callback numparts callback"); + result = TCL_ERROR; + break; + } + + /* + * Store the object containing the procedure name. + * See TCL_DB_BTCOMPARE. + */ + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result != TCL_OK) + break; + ip->i_part_callback = objv[i++]; + Tcl_IncrRefCount(ip->i_part_callback); + _debug_check(); + ret = (*dbp)->set_partition( + *dbp, uintarg, NULL, tcl_part_callback); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_partition_callback"); + break; + case TCL_DB_PART_DIRS: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-partition {dir list}"); + result = TCL_ERROR; + break; + } + ret = tcl_set_partition_dirs(interp, *dbp, objv[i++]); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_partition_dirs"); + break; + case TCL_DB_PARTITION: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-partition {key list}"); + result = TCL_ERROR; + break; + } + _debug_check(); + ret = tcl_set_partition_keys(interp, + *dbp, objv[i++], &keys); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_partition_keys"); + break; + case TCL_DB_READ_UNCOMMITTED: + open_flags |= DB_READ_UNCOMMITTED; + break; + case TCL_DB_REVSPLIT: + set_flags |= DB_REVSPLITOFF; + break; + case TCL_DB_TEST: + ret = (*dbp)->set_h_hash(*dbp, __ham_test); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_h_hash"); + break; + case TCL_DB_THREAD: + /* Enable DB_THREAD when specified in testing. */ + open_flags |= DB_THREAD; + break; +#endif + case TCL_DB_AUTO_COMMIT: + open_flags |= DB_AUTO_COMMIT; + break; + case TCL_DB_ENV: + /* + * Already parsed this, skip it and the env pointer. + */ + i++; + continue; + case TCL_DB_TXN: + if (i > (objc - 1)) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Open: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + case TCL_DB_BTREE: + if (type != DB_UNKNOWN) { + Tcl_SetResult(interp, + "Too many DB types specified", TCL_STATIC); + result = TCL_ERROR; + goto error; + } + type = DB_BTREE; + break; + case TCL_DB_HASH: + if (type != DB_UNKNOWN) { + Tcl_SetResult(interp, + "Too many DB types specified", TCL_STATIC); + result = TCL_ERROR; + goto error; + } + type = DB_HASH; + break; + case TCL_DB_HEAP: + if (type != DB_UNKNOWN) { + Tcl_SetResult(interp, + "Too many DB types specified", TCL_STATIC); + result = TCL_ERROR; + goto error; + } + type = DB_HEAP; + break; + case TCL_DB_RECNO: + if (type != DB_UNKNOWN) { + Tcl_SetResult(interp, + "Too many DB types specified", TCL_STATIC); + result = TCL_ERROR; + goto error; + } + type = DB_RECNO; + break; + case TCL_DB_QUEUE: + if (type != DB_UNKNOWN) { + Tcl_SetResult(interp, + "Too many DB types specified", TCL_STATIC); + result = TCL_ERROR; + goto error; + } + type = DB_QUEUE; + break; + case TCL_DB_UNKNOWN: + if (type != DB_UNKNOWN) { + Tcl_SetResult(interp, + "Too many DB types specified", TCL_STATIC); + result = TCL_ERROR; + goto error; + } + break; + case TCL_DB_CREATE: + open_flags |= DB_CREATE; + break; + case TCL_DB_CREATE_DIR: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-create_dir dir"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + _debug_check(); + ret = (*dbp)->set_create_dir(*dbp, arg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_create_dir"); + break; + case TCL_DB_EXCL: + open_flags |= DB_EXCL; + break; + case TCL_DB_RDONLY: + open_flags |= DB_RDONLY; + break; + case TCL_DB_TRUNCATE: + open_flags |= DB_TRUNCATE; + break; + case TCL_DB_MODE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-mode mode?"); + result = TCL_ERROR; + break; + } + /* + * Don't need to check result here because + * if TCL_ERROR, the error message is already + * set up, and we'll bail out below. If ok, + * the mode is set and we go on. + */ + result = Tcl_GetIntFromObj(interp, objv[i++], &mode); + break; + case TCL_DB_DUP: + set_flags |= DB_DUP; + break; + case TCL_DB_DUPSORT: + set_flags |= DB_DUPSORT; + break; + case TCL_DB_INORDER: + set_flags |= DB_INORDER; + break; + case TCL_DB_RECNUM: + set_flags |= DB_RECNUM; + break; + case TCL_DB_RENUMBER: + set_flags |= DB_RENUMBER; + break; + case TCL_DB_SNAPSHOT: + set_flags |= DB_SNAPSHOT; + break; + case TCL_DB_CHKSUM: + set_flags |= DB_CHKSUM; + break; + case TCL_DB_ENCRYPT: + set_flags |= DB_ENCRYPT; + break; + case TCL_DB_ENCRYPT_AES: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-encryptaes passwd?"); + result = TCL_ERROR; + break; + } + passwd = Tcl_GetStringFromObj(objv[i++], NULL); + _debug_check(); + ret = (*dbp)->set_encrypt(*dbp, passwd, DB_ENCRYPT_AES); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_encrypt"); + encenble = 1; + break; + case TCL_DB_ENCRYPT_ANY: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-encryptany passwd?"); + result = TCL_ERROR; + break; + } + passwd = Tcl_GetStringFromObj(objv[i++], NULL); + _debug_check(); + ret = (*dbp)->set_encrypt(*dbp, passwd, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_encrypt"); + encenble = 2; + break; + case TCL_DB_COMPRESS: + ret = (*dbp)->set_bt_compress(*dbp, 0, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_bt_compress"); + break; + case TCL_DB_FFACTOR: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-ffactor density"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = (*dbp)->set_h_ffactor(*dbp, uintarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_h_ffactor"); + } + break; + case TCL_DB_MULTIVERSION: + open_flags |= DB_MULTIVERSION; + break; + case TCL_DB_NELEM: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-nelem nelem"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = (*dbp)->set_h_nelem(*dbp, uintarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_h_nelem"); + } + break; + case TCL_DB_DELIM: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-delim delim"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, objv[i++], &intarg); + if (result == TCL_OK) { + _debug_check(); + ret = (*dbp)->set_re_delim(*dbp, intarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_re_delim"); + } + break; + case TCL_DB_LEN: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-len length"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = (*dbp)->set_re_len(*dbp, uintarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_re_len"); + } + break; + case TCL_DB_MAXSIZE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-len length"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = (*dbp)->mpf->set_maxsize( + (*dbp)->mpf, 0, uintarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_maxsize"); + } + break; + case TCL_DB_PAD: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-pad pad"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, objv[i++], &intarg); + if (result == TCL_OK) { + _debug_check(); + ret = (*dbp)->set_re_pad(*dbp, intarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_re_pad"); + } + break; + case TCL_DB_SOURCE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-source file"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + _debug_check(); + ret = (*dbp)->set_re_source(*dbp, arg); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_re_source"); + break; + case TCL_DB_EXTENT: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-extent size"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = (*dbp)->set_q_extentsize(*dbp, uintarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_q_extentsize"); + } + break; + case TCL_DB_CACHESIZE: + result = Tcl_ListObjGetElements(interp, objv[i++], + &myobjc, &myobjv); + if (result != TCL_OK) + break; + if (myobjc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-cachesize {gbytes bytes ncaches}?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, myobjv[0], &gbytes); + if (result != TCL_OK) + break; + result = _GetUInt32(interp, myobjv[1], &bytes); + if (result != TCL_OK) + break; + result = Tcl_GetIntFromObj(interp, myobjv[2], &ncaches); + if (result != TCL_OK) + break; + _debug_check(); + ret = (*dbp)->set_cachesize(*dbp, gbytes, bytes, + ncaches); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_cachesize"); + break; + case TCL_DB_PAGESIZE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-pagesize size?"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, objv[i++], &intarg); + if (result == TCL_OK) { + _debug_check(); + ret = (*dbp)->set_pagesize(*dbp, + (size_t)intarg); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set pagesize"); + } + break; + case TCL_DB_ERRFILE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-errfile file"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + /* + * If the user already set one, close it. + */ + if (errip->i_err != NULL && + errip->i_err != stdout && errip->i_err != stderr) + (void)fclose(errip->i_err); + if (strcmp(arg, "/dev/stdout") == 0) + errip->i_err = stdout; + else if (strcmp(arg, "/dev/stderr") == 0) + errip->i_err = stderr; + else + errip->i_err = fopen(arg, "a"); + if (errip->i_err != NULL) { + _debug_check(); + (*dbp)->set_errfile(*dbp, errip->i_err); + set_err = 1; + } + break; + case TCL_DB_MSGFILE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-msgfile file"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + /* + * If the user already set one, close it. + */ + if (errip->i_msg != NULL && + errip->i_msg != stdout && errip->i_msg != stderr) + (void)fclose(errip->i_msg); + if (strcmp(arg, "/dev/stdout") == 0) + errip->i_msg = stdout; + else if (strcmp(arg, "/dev/stderr") == 0) + errip->i_msg = stderr; + else + errip->i_msg = fopen(arg, "a"); + if (errip->i_msg != NULL) { + _debug_check(); + (*dbp)->set_msgfile(*dbp, errip->i_msg); + set_msg = 1; + } + break; + case TCL_DB_ERRPFX: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-errpfx prefix"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + /* + * If the user already set one, free it. + */ + if (errip->i_errpfx != NULL) { + (*dbp)->set_errpfx(*dbp, NULL); + __os_free(NULL, errip->i_errpfx); + } + if ((ret = __os_strdup((*dbp)->env, + arg, &errip->i_errpfx)) != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "__os_strdup"); + break; + } + if (errip->i_errpfx != NULL) { + _debug_check(); + (*dbp)->set_errpfx(*dbp, errip->i_errpfx); + set_pfx = 1; + } + break; + case TCL_DB_ENDARG: + endarg = 1; + break; + } /* switch */ + + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + goto error; + if (endarg) + break; + } + if (result != TCL_OK) + goto error; + + /* + * Any args we have left, (better be 0, 1 or 2 left) are + * file names. If we have 0, then an in-memory db. If + * there is 1, a db name, if 2 a db and subdb name. + */ + if (i != objc) { + /* + * Dbs must be NULL terminated file names, but subdbs can + * be anything. Use Strings for the db name and byte + * arrays for the subdb. + */ + db = Tcl_GetStringFromObj(objv[i++], NULL); + if (strcmp(db, "") == 0) + db = NULL; + if (i != objc) { + subdbtmp = + Tcl_GetByteArrayFromObj(objv[i++], &subdblen); + if ((ret = __os_malloc(env, + (size_t)subdblen + 1, &subdb)) != 0) { + Tcl_SetResult(interp, db_strerror(ret), + TCL_STATIC); + return (0); + } + memcpy(subdb, subdbtmp, (size_t)subdblen); + subdb[subdblen] = '\0'; + } + } + if (set_flags) { + ret = (*dbp)->set_flags(*dbp, set_flags); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_flags"); + if (result == TCL_ERROR) + goto error; + /* + * If we are successful, clear the result so that the + * return from set_flags isn't part of the result. + */ + Tcl_ResetResult(interp); + } + + /* + * When we get here, we have already parsed all of our args and made + * all our calls to set up the database. Everything is okay so far, + * no errors, if we get here. + */ + _debug_check(); + + /* Open the database. */ + ret = (*dbp)->open(*dbp, txn, db, subdb, type, open_flags, mode); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db open"); + /* + * We may have been cleanly opened with DB_UNKNOWN, and we need to know + * if we opened a heap. + */ + if (ret == 0) + (void)(*dbp)->get_type(*dbp, &type); + + /* + * If we cleanly created a heap database, then we need to initiate the + * db handle for the auxiliary recno db. To have heap work like + * the other record based am, we pass along record numbers. The + * recno db maps the record number to the actual record id used + * by the heap am. This is all done underneath the covers. + */ + if (ret == 0 && type == DB_HEAP) { + /* Setup recno db mapping recno to RID. */ + ret = db_create(&hrdbp, dbenv, 0); + if (ret) + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db_create heap recno db")); + /* Set up file name by appending a 1 and open db */ + if (db != NULL) { + nlen = strlen(db); + if ((ret = __os_malloc(env, nlen + 2, &dbr)) != 0) { + Tcl_SetResult(interp, db_strerror(ret), + TCL_STATIC); + return (0); + } + memcpy(dbr, db, nlen); + dbr[nlen] = '1'; + dbr[nlen+1] = '\0'; + } + if (subdb != NULL) { + nlen = strlen(subdb); + if ((ret = __os_malloc(env, nlen + 2, &subdbr)) != 0) { + Tcl_SetResult(interp, db_strerror(ret), + TCL_STATIC); + return (0); + } + memcpy(subdbr, subdb, nlen); + subdbr[nlen] = '1'; + subdbr[nlen+1] = '\0'; + } + + /* + * Use same flags as heap, note: heap does not use of + * DB_AFTER/DB_BEFORE on cursor puts, but recno can. + * Since we use the same set flags, use of DB_RENUMBER gets + * caught above when heap db is created and heap create + * fails. So we never get this far. + */ + if (set_flags) { + ret = hrdbp->set_flags(hrdbp, set_flags); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_flags"); + if (result == TCL_ERROR) + goto error; + /* + * If we are successful, clear the result so that the + * return from set_flags isn't part of the result. + */ + Tcl_ResetResult(interp); + } + /* set up encryption if needed */ + if (encenble && passwd != NULL) { + if (encenble == 1) + ret = hrdbp->set_encrypt(hrdbp, + passwd, DB_ENCRYPT_AES); + else + ret = hrdbp->set_encrypt(hrdbp, passwd, 0); + } + ret = hrdbp->open(hrdbp, + txn, dbr, subdbr, DB_RECNO, open_flags, mode); + if (ret) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db open"); + goto error; + } + + /* Point heap's ip to recno db and do errcall (see above) */ + ip->hrdbp = hrdbp; + if (dbenv == NULL) + hrdbp->set_errcall(hrdbp, _ErrorFunc); + + /* Set up secondary db mapping RID to recno. */ + ret = db_create(&hsdbp, dbenv, 0); + if (ret) + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db_create heap secondary db")); + /* Set up file name by appending a 2 and open db */ + if (dbr != NULL) + dbr[nlen] = '2'; + if (subdbr != NULL) + subdbr[nlen] = '2'; + + /* + * Use same flags as heap, note: heap does not use of + * DB_AFTER/DB_BEFORE on cursor puts, but recno can. + * Since we use the same set flags, use of DB_RENUMBER gets + * caught above when heap db is created and heap create + * fails. So we never get this far. + */ + if (set_flags) { + ret = hsdbp->set_flags(hsdbp, set_flags); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_flags"); + if (result == TCL_ERROR) + goto error; + /* + * If we are successful, clear the result so that the + * return from set_flags isn't part of the result. + */ + Tcl_ResetResult(interp); + } + /* set up encryption if needed */ + if (encenble) { + if (encenble == 1) + ret = hsdbp->set_encrypt(hsdbp, + passwd, DB_ENCRYPT_AES); + else + ret = hsdbp->set_encrypt(hsdbp, passwd, 0); + } + ret = hsdbp->open(hsdbp, + txn, dbr, subdbr, DB_BTREE, open_flags, mode); + if (ret) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db open"); + goto error; + } + + ret = hrdbp->associate(hrdbp, txn, hsdbp, heap_callback, 0); + if (ret) { + result = _ReturnSetup( + interp, ret, DB_RETOK_STD(ret), "db associate"); + goto error; + } + + /* Point heap's ip to recno db and do errcall (see above) */ + ip->hsdbp = hsdbp; + if (dbenv == NULL) + hrdbp->set_errcall(hsdbp, _ErrorFunc); + } + +error: + if (keys != NULL) + __os_free(NULL, keys); + if (subdb) + __os_free(env, subdb); + if (subdbr != NULL) + __os_free(env, subdbr); + if (dbr) + __os_free(env, dbr); + if (result == TCL_ERROR) { + if (set_pfx && errip && errip->i_errpfx != NULL) { + (*dbp)->set_errpfx(*dbp, NULL); + __os_free(env, errip->i_errpfx); + errip->i_errpfx = NULL; + } + (void)(*dbp)->close(*dbp, 0); + if (type == DB_HEAP) { + if (hrdbp != NULL) { + (void)hrdbp->close(hrdbp, 0); + hrdbp = NULL; + } + if (hsdbp != NULL) { + (void)hsdbp->close(hsdbp, 0); + hsdbp = NULL; + } + } + /* + * If we opened and set up the error file in the environment + * on this open, but we failed for some other reason, clean + * up and close the file. + * + * XXX when err stuff isn't tied to env, change to use ip, + * instead of envip. Also, set_err is irrelevant when that + * happens. It will just read: + * if (ip->i_err) + * fclose(ip->i_err); + */ + if (set_err && errip && errip->i_err != NULL && + errip->i_err != stdout && errip->i_err != stderr) { + (void)fclose(errip->i_err); + errip->i_err = NULL; + } + if (set_msg && errip && errip->i_msg != NULL && + errip->i_msg != stdout && errip->i_msg != stderr) { + (void)fclose(errip->i_msg); + errip->i_msg = NULL; + } + *dbp = NULL; + } + return (result); +} + +#ifdef HAVE_64BIT_TYPES +/* + * bdb_SeqOpen -- + * Implements the "Seq_create/Seq_open" command. + */ +static int +bdb_SeqOpen(interp, objc, objv, ip, seqp) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DBTCL_INFO *ip; /* Our internal info */ + DB_SEQUENCE **seqp; /* DB_SEQUENCE handle */ +{ + static const char *seqopen[] = { + "-cachesize", + "-create", + "-inc", + "-init", + "-dec", + "-max", + "-min", + "-thread", + "-txn", + "-wrap", + "--", + NULL + } ; + enum seqopen { + TCL_SEQ_CACHESIZE, + TCL_SEQ_CREATE, + TCL_SEQ_INC, + TCL_SEQ_INIT, + TCL_SEQ_DEC, + TCL_SEQ_MAX, + TCL_SEQ_MIN, + TCL_SEQ_THREAD, + TCL_SEQ_TXN, + TCL_SEQ_WRAP, + TCL_SEQ_ENDARG + }; + DB *dbp; + DBT key; + DBTYPE type; + DB_TXN *txn; + db_recno_t recno; + db_seq_t min, max, value; + Tcl_WideInt tcl_value; + u_int32_t flags, oflags; + int cache, endarg, i, optindex, result, ret, setrange, setvalue, v; + char *arg, *db, msg[MSG_SIZE]; + + COMPQUIET(ip, NULL); + COMPQUIET(value, 0); + *seqp = NULL; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?args?"); + return (TCL_ERROR); + } + + txn = NULL; + endarg = 0; + flags = oflags = 0; + setrange = setvalue = 0; + min = INT64_MIN; + max = INT64_MAX; + cache = 0; + + for (i = 2; i < objc;) { + Tcl_ResetResult(interp); + if (Tcl_GetIndexFromObj(interp, objv[i], seqopen, "option", + TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') { + result = IS_HELP(objv[i]); + goto error; + } else + Tcl_ResetResult(interp); + break; + } + i++; + result = TCL_OK; + switch ((enum seqopen)optindex) { + case TCL_SEQ_CREATE: + oflags |= DB_CREATE; + break; + case TCL_SEQ_INC: + LF_SET(DB_SEQ_INC); + break; + case TCL_SEQ_CACHESIZE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-cachesize value?"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, objv[i++], &cache); + break; + case TCL_SEQ_INIT: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-init value?"); + result = TCL_ERROR; + break; + } + result = + Tcl_GetWideIntFromObj( + interp, objv[i++], &tcl_value); + value = tcl_value; + setvalue = 1; + break; + case TCL_SEQ_DEC: + LF_SET(DB_SEQ_DEC); + break; + case TCL_SEQ_MAX: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-max value?"); + result = TCL_ERROR; + break; + } + if ((result = + Tcl_GetWideIntFromObj(interp, + objv[i++], &tcl_value)) != TCL_OK) + goto error; + max = tcl_value; + setrange = 1; + break; + case TCL_SEQ_MIN: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-min value?"); + result = TCL_ERROR; + break; + } + if ((result = + Tcl_GetWideIntFromObj(interp, + objv[i++], &tcl_value)) != TCL_OK) + goto error; + min = tcl_value; + setrange = 1; + break; + case TCL_SEQ_THREAD: + oflags |= DB_THREAD; + break; + case TCL_SEQ_TXN: + if (i > (objc - 1)) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Sequence: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + case TCL_SEQ_WRAP: + LF_SET(DB_SEQ_WRAP); + break; + case TCL_SEQ_ENDARG: + endarg = 1; + break; + } + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + goto error; + if (endarg) + break; + } + + if (objc - i != 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?args?"); + return (TCL_ERROR); + } + /* + * The db must be a string but the sequence key may + * be anything. + */ + db = Tcl_GetStringFromObj(objv[i++], NULL); + if ((dbp = NAME_TO_DB(db)) == NULL) { + Tcl_SetResult(interp, "No such dbp", TCL_STATIC); + return (TCL_ERROR); + } + (void)dbp->get_type(dbp, &type); + + if (type == DB_QUEUE || type == DB_RECNO) { + result = _GetUInt32(interp, objv[i++], &recno); + if (result != TCL_OK) + return (result); + DB_INIT_DBT(key, &recno, sizeof(recno)); + } else + DB_INIT_DBT(key, Tcl_GetByteArrayFromObj(objv[i++], &v), v); + ret = db_sequence_create(seqp, dbp, 0); + if ((result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "sequence create")) != TCL_OK) { + *seqp = NULL; + return (result); + } + + ret = (*seqp)->set_flags(*seqp, flags); + if ((result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "sequence set_flags")) != TCL_OK) + goto error; + if (setrange) { + ret = (*seqp)->set_range(*seqp, min, max); + if ((result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "sequence set_range")) != TCL_OK) + goto error; + } + if (cache) { + ret = (*seqp)->set_cachesize(*seqp, cache); + if ((result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "sequence cachesize")) != TCL_OK) + goto error; + } + if (setvalue) { + ret = (*seqp)->initial_value(*seqp, value); + if ((result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "sequence init")) != TCL_OK) + goto error; + } + ret = (*seqp)->open(*seqp, txn, &key, oflags); + if ((result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "sequence open")) != TCL_OK) + goto error; + + if (0) { +error: if (*seqp != NULL) + (void)(*seqp)->close(*seqp, 0); + *seqp = NULL; + } + return (result); +} +#endif + +/* + * bdb_DbRemove -- + * Implements the DB_ENV->remove and DB->remove command. + */ +static int +bdb_DbRemove(interp, objc, objv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *bdbrem[] = { + "-auto_commit", + "-encrypt", + "-encryptaes", + "-encryptany", + "-env", + "-fast", + "-txn", + "--", + NULL + }; + enum bdbrem { + TCL_DBREM_AUTOCOMMIT, + TCL_DBREM_ENCRYPT, + TCL_DBREM_ENCRYPT_AES, + TCL_DBREM_ENCRYPT_ANY, + TCL_DBREM_ENV, + TCL_DBREM_FAST, + TCL_DBREM_TXN, + TCL_DBREM_ENDARG + }; + DB *dbp; + DB_ENV *dbenv; + DB_TXN *txn; + ENV *env; + u_int32_t enc_flag, iflags, set_flags; + int endarg, i, optindex, result, ret, subdblen; + u_char *subdbtmp; + char *arg, *db, *dbr, msg[MSG_SIZE], *passwd, *subdb, *subdbr; + size_t nlen; + + dbp = NULL; + dbenv = NULL; + txn = NULL; + env = NULL; + enc_flag = iflags = set_flags = subdblen = 0; + endarg = nlen = 0; + result = TCL_OK; + subdbtmp = NULL; + db = dbr = passwd = subdb = subdbr = NULL; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?args? filename ?database?"); + return (TCL_ERROR); + } + + /* + * We must first parse for the environment flag, since that + * is needed for db_create. Then create the db handle. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], bdbrem, + "option", TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') { + result = IS_HELP(objv[i]); + goto error; + } else + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum bdbrem)optindex) { + case TCL_DBREM_AUTOCOMMIT: + iflags |= DB_AUTO_COMMIT; + _debug_check(); + break; + case TCL_DBREM_ENCRYPT: + set_flags |= DB_ENCRYPT; + _debug_check(); + break; + case TCL_DBREM_ENCRYPT_AES: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-encryptaes passwd?"); + result = TCL_ERROR; + break; + } + passwd = Tcl_GetStringFromObj(objv[i++], NULL); + enc_flag = DB_ENCRYPT_AES; + break; + case TCL_DBREM_ENCRYPT_ANY: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-encryptany passwd?"); + result = TCL_ERROR; + break; + } + passwd = Tcl_GetStringFromObj(objv[i++], NULL); + enc_flag = 0; + break; + case TCL_DBREM_ENV: + arg = Tcl_GetStringFromObj(objv[i++], NULL); + dbenv = NAME_TO_ENV(arg); + if (dbenv == NULL) { + Tcl_SetResult(interp, + "db remove: illegal environment", + TCL_STATIC); + return (TCL_ERROR); + } + env = dbenv->env; + break; + case TCL_DBREM_ENDARG: + endarg = 1; + break; + case TCL_DBREM_FAST: + iflags |= DB_LOG_NO_DATA; + _debug_check(); + break; + case TCL_DBREM_TXN: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Put: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + } + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + goto error; + if (endarg) + break; + } + if (result != TCL_OK) + goto error; + /* + * Any args we have left, (better be 1 or 2 left) are + * file names. If there is 1, a db name, if 2 a db and subdb name. + */ + if ((i != (objc - 1)) || (i != (objc - 2))) { + /* + * Dbs must be NULL terminated file names, but subdbs can + * be anything. Use Strings for the db name and byte + * arrays for the subdb. + */ + db = Tcl_GetStringFromObj(objv[i++], NULL); + if (strcmp(db, "") == 0) + db = NULL; + if (i != objc) { + subdbtmp = + Tcl_GetByteArrayFromObj(objv[i++], &subdblen); + if ((ret = __os_malloc(env, (size_t)subdblen + 1, + &subdb)) != 0) { Tcl_SetResult(interp, + db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(subdb, subdbtmp, (size_t)subdblen); + subdb[subdblen] = '\0'; + } + } else { + Tcl_WrongNumArgs(interp, 2, objv, "?args? filename ?database?"); + result = TCL_ERROR; + goto error; + } + + + if (dbenv == NULL) { + ret = db_create(&dbp, dbenv, 0); + if (ret) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db_create"); + goto error; + } + + /* + * XXX + * Remove restriction if error handling not tied to env. + * + * The DB->set_err* functions overwrite the environment. So, if + * we are using an env, don't overwrite it; if not using an env, + * then configure error handling. + */ + dbp->set_errpfx(dbp, "DbRemove"); + dbp->set_errcall(dbp, _ErrorFunc); + + if (passwd != NULL) { + ret = dbp->set_encrypt(dbp, passwd, enc_flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_encrypt"); + } + if (set_flags != 0) { + ret = dbp->set_flags(dbp, set_flags); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_flags"); + } + } + + /* + * The dbremove method is a destructor, NULL out the dbp. + */ + _debug_check(); + if (dbenv != NULL) + ret = dbenv->dbremove(dbenv, txn, db, subdb, iflags); + else + ret = dbp->remove(dbp, db, subdb, 0); + + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db remove"); + + /* + * If the heap auxiliary databases exist, remove them too. If we have an + * environment, just try the dbremove and ignore ENOENT, instead of + * trying to create a path to the file. Without an environment, we can + * easily create the path. + */ + + if ((db != NULL || subdb != NULL) && ret == 0) { + /* set up file name for associated recno db */ + if (db != NULL) { + nlen = strlen(db); + if ((ret = __os_malloc(env, nlen + 2, &dbr)) != 0) { + Tcl_SetResult(interp, + db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(dbr, db, nlen); + dbr[nlen] = '1'; + dbr[nlen+1] = '\0'; + } + if (subdb != NULL) { + if ((ret = __os_malloc( + env, (size_t)subdblen + 2, &subdbr)) != 0) { + Tcl_SetResult( + interp, db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(subdbr, subdb, (size_t)subdblen); + subdbr[subdblen] = '1'; + subdbr[subdblen+1] = '\0'; + } + + if (dbenv != NULL) { + ret = dbenv->dbremove(dbenv, txn, dbr, subdbr, iflags); + if (ret == ENOENT) + ret = 0; + } else if (__os_exists(NULL, dbr, NULL) == 0) { + /* additional files not set up as subdbs */ + subdb = NULL; + + ret = db_create(&dbp, dbenv, 0); + if (ret) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "db_create"); + goto error; + } + + /* + * XXX + * Remove restriction if error handling not tied to env. + * + * The DB->set_err* functions overwrite the + * environment. So, if we are using an env, don't + * overwrite it; if not using an env, then configure + * error handling. + */ + dbp->set_errpfx(dbp, "DbRemove"); + dbp->set_errcall(dbp, _ErrorFunc); + + if (passwd != NULL) { + ret = dbp->set_encrypt(dbp, passwd, enc_flag); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_encrypt"); + } + if (set_flags != 0) { + ret = dbp->set_flags(dbp, set_flags); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_flags"); + } + + ret = dbp->remove(dbp, dbr, subdbr, 0); + /* if file doesnt exist, ignore error */ + if (ret == ENOENT) + ret = 0; + + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db remove associated recno"); + } + + /* remove the secondary db mapping RID to recno */ + if (dbr != NULL) + dbr[nlen] = '2'; + if (subdbr != NULL) + subdbr[subdblen] = '2'; + if (dbenv != NULL) { + ret = dbenv->dbremove(dbenv, txn, dbr, subdbr, iflags); + if (ret == ENOENT) + ret = 0; + } else if (__os_exists(NULL, dbr, NULL) == 0) { + ret = db_create(&dbp, dbenv, 0); + if (ret) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "db_create"); + goto error; + } + + /* + * XXX + * Remove restriction if error handling not tied to env. + * + * The DB->set_err* functions overwrite the + * environment. So, if we are using an env, don't + * overwrite it; if not using an env, then configure + * error handling. + */ + dbp->set_errpfx(dbp, "DbRemove"); + dbp->set_errcall(dbp, _ErrorFunc); + + if (passwd != NULL) { + ret = dbp->set_encrypt(dbp, passwd, enc_flag); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_encrypt"); + } + if (set_flags != 0) { + ret = dbp->set_flags(dbp, set_flags); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_flags"); + } + + ret = dbp->remove(dbp, dbr, subdbr, 0); + /* if file doesnt exist, ignore error */ + if (ret == ENOENT) + ret = 0; + + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db remove associated btree"); + } + } + dbp = NULL; +error: + if (subdb) + __os_free(env, subdb); + if (dbr) + __os_free(env, dbr); + if (subdbr) + __os_free(env, subdbr); + if (result == TCL_ERROR && dbp != NULL) + (void)dbp->close(dbp, 0); + return (result); +} + +/* + * bdb_DbRename -- + * Implements the DB_ENV->dbrename and DB->rename commands. + */ +static int +bdb_DbRename(interp, objc, objv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *bdbmv[] = { + "-auto_commit", + "-encrypt", + "-encryptaes", + "-encryptany", + "-env", + "-txn", + "--", + NULL + }; + enum bdbmv { + TCL_DBMV_AUTOCOMMIT, + TCL_DBMV_ENCRYPT, + TCL_DBMV_ENCRYPT_AES, + TCL_DBMV_ENCRYPT_ANY, + TCL_DBMV_ENV, + TCL_DBMV_TXN, + TCL_DBMV_ENDARG + }; + DB *dbp; + DB_ENV *dbenv; + DB_TXN *txn; + ENV *env; + u_int32_t enc_flag, iflags, set_flags; + int endarg, i, newlen, optindex, result, ret, subdblen; + u_char *subdbtmp; + char *arg, *db, *dbr, msg[MSG_SIZE], *newname, *newnamer, *passwd; + char *subdb, *subdbr; + size_t nlen; + + dbp = NULL; + dbenv = NULL; + txn = NULL; + env = NULL; + enc_flag = iflags = set_flags = subdblen = 0; + nlen = 0; + result = TCL_OK; + endarg = 0; + db = dbr = newname = newnamer = passwd = subdb = subdbr = NULL; + subdbtmp = NULL; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, + 3, objv, "?args? filename ?database? ?newname?"); + return (TCL_ERROR); + } + + /* + * We must first parse for the environment flag, since that + * is needed for db_create. Then create the db handle. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], bdbmv, + "option", TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') { + result = IS_HELP(objv[i]); + goto error; + } else + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum bdbmv)optindex) { + case TCL_DBMV_AUTOCOMMIT: + iflags |= DB_AUTO_COMMIT; + _debug_check(); + break; + case TCL_DBMV_ENCRYPT: + set_flags |= DB_ENCRYPT; + _debug_check(); + break; + case TCL_DBMV_ENCRYPT_AES: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-encryptaes passwd?"); + result = TCL_ERROR; + break; + } + passwd = Tcl_GetStringFromObj(objv[i++], NULL); + enc_flag = DB_ENCRYPT_AES; + break; + case TCL_DBMV_ENCRYPT_ANY: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-encryptany passwd?"); + result = TCL_ERROR; + break; + } + passwd = Tcl_GetStringFromObj(objv[i++], NULL); + enc_flag = 0; + break; + case TCL_DBMV_ENV: + arg = Tcl_GetStringFromObj(objv[i++], NULL); + dbenv = NAME_TO_ENV(arg); + if (dbenv == NULL) { + Tcl_SetResult(interp, + "db rename: illegal environment", + TCL_STATIC); + return (TCL_ERROR); + } + env = dbenv->env; + break; + case TCL_DBMV_ENDARG: + endarg = 1; + break; + case TCL_DBMV_TXN: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Put: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + } + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + goto error; + if (endarg) + break; + } + if (result != TCL_OK) + goto error; + /* + * Any args we have left, (better be 2 or 3 left) are + * file names. If there is 2, a file name, if 3 a file and db name. + */ + if ((i != (objc - 2)) || (i != (objc - 3))) { + /* + * Dbs must be NULL terminated file names, but subdbs can + * be anything. Use Strings for the db name and byte + * arrays for the subdb. + */ + db = Tcl_GetStringFromObj(objv[i++], NULL); + if (strcmp(db, "") == 0) + db = NULL; + if (i == objc - 2) { + subdbtmp = + Tcl_GetByteArrayFromObj(objv[i++], &subdblen); + if ((ret = __os_malloc(env, + (size_t)subdblen + 1, &subdb)) != 0) { + Tcl_SetResult(interp, + db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(subdb, subdbtmp, (size_t)subdblen); + subdb[subdblen] = '\0'; + } + subdbtmp = + Tcl_GetByteArrayFromObj(objv[i++], &newlen); + if ((ret = __os_malloc( + env, (size_t)newlen + 1, &newname)) != 0) { + Tcl_SetResult(interp, + db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(newname, subdbtmp, (size_t)newlen); + newname[newlen] = '\0'; + } else { + Tcl_WrongNumArgs( + interp, 3, objv, "?args? filename ?database? ?newname?"); + result = TCL_ERROR; + goto error; + } + if (dbenv == NULL) { + ret = db_create(&dbp, dbenv, 0); + if (ret) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db_create"); + goto error; + } + /* + * XXX + * Remove restriction if error handling not tied to env. + * + * The DB->set_err* functions overwrite the environment. So, if + * we are using an env, don't overwrite it; if not using an env, + * then configure error handling. + */ + dbp->set_errpfx(dbp, "DbRename"); + dbp->set_errcall(dbp, _ErrorFunc); + + if (passwd != NULL) { + ret = dbp->set_encrypt(dbp, passwd, enc_flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_encrypt"); + } + if (set_flags != 0) { + ret = dbp->set_flags(dbp, set_flags); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_flags"); + } + } + + /* + * The dbrename method is a destructor, NULL out the dbp. + */ + _debug_check(); + if (dbp == NULL) + ret = dbenv->dbrename(dbenv, txn, db, subdb, newname, iflags); + else + ret = dbp->rename(dbp, db, subdb, newname, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db rename"); + /* + * If the heap auxiliary databases exist, rename them too. If we have an + * environment, just try the dbrename and ignore ENOENT, instead of + * trying to create a path to the file. Without an environment, we can + * easily create the path. + */ + + if ((db != NULL || subdb != NULL) && ret == 0) { + /* set up file name for associated recno db */ + if (db != NULL) { + nlen = strlen(db); + if ((ret = __os_malloc(env, nlen + 2, &dbr)) != 0) { + Tcl_SetResult(interp, + db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(dbr, db, nlen); + dbr[nlen] = '1'; + dbr[nlen+1] = '\0'; + } + if (subdb != NULL) { + if ((ret = __os_malloc( + env, (size_t)subdblen + 2, &subdbr)) != 0) { + Tcl_SetResult( + interp, db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(subdbr, subdb, (size_t)subdblen); + subdbr[subdblen] = '1'; + subdbr[subdblen+1] = '\0'; + } + + if ((ret = __os_malloc(env, + (size_t)newlen + 2, &newnamer)) != 0) { + Tcl_SetResult(interp, db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(newnamer, newname, (size_t)newlen); + newnamer[newlen] = '1'; + newnamer[newlen+1] = '\0'; + + if (dbenv != NULL) { + ret = dbenv->dbrename(dbenv, + txn, dbr, subdbr, newnamer, iflags); + if (ret == ENOENT) + ret = 0; + } else if (__os_exists(NULL, dbr, NULL) == 0) { + /* additional files not set up as subdbs */ + subdb = NULL; + + ret = db_create(&dbp, dbenv, 0); + if (ret) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "db_create"); + goto error; + } + + /* + * XXX + * Remove restriction if error handling not tied to env. + * + * The DB->set_err* functions overwrite the + * environment. So, if we are using an env, don't + * overwrite it; if not using an env, then configure + * error handling. + */ + dbp->set_errpfx(dbp, "DbRename"); + dbp->set_errcall(dbp, _ErrorFunc); + + if (passwd != NULL) { + ret = dbp->set_encrypt(dbp, passwd, enc_flag); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_encrypt"); + } + if (set_flags != 0) { + ret = dbp->set_flags(dbp, set_flags); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_flags"); + } + + ret = dbp->rename(dbp, dbr, subdbr, newnamer, 0); + /* if file doesnt exist, ignore error */ + if (ret == ENOENT) + ret = 0; + + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db rename associated recno"); + } + + /* remove the secondary db mapping RID to recno */ + if (dbr != NULL) + dbr[nlen] = '2'; + if (subdbr != NULL) + subdbr[subdblen] = '2'; + newnamer[newlen] = '2'; + if (dbenv != NULL) { + ret = dbenv->dbrename(dbenv, + txn, dbr, subdbr, newnamer, iflags); + if (ret == ENOENT) + ret = 0; + } else if (__os_exists(NULL, dbr, NULL) == 0) { + ret = db_create(&dbp, dbenv, 0); + if (ret) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "db_create"); + goto error; + } + + /* + * XXX + * Remove restriction if error handling not tied to env. + * + * The DB->set_err* functions overwrite the + * environment. So, if we are using an env, don't + * overwrite it; if not using an env, then configure + * error handling. + */ + dbp->set_errpfx(dbp, "DbRename"); + dbp->set_errcall(dbp, _ErrorFunc); + + if (passwd != NULL) { + ret = dbp->set_encrypt(dbp, passwd, enc_flag); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_encrypt"); + } + if (set_flags != 0) { + ret = dbp->set_flags(dbp, set_flags); + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "set_flags"); + } + + ret = dbp->rename(dbp, dbr, subdbr, newnamer, 0); + /* if file doesnt exist, ignore error */ + if (ret == ENOENT) + ret = 0; + + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db rename associated btree"); + } + } + dbp = NULL; +error: + if (subdb) + __os_free(env, subdb); + if (newname) + __os_free(env, newname); + if (dbr) + __os_free(env, dbr); + if (subdbr) + __os_free(env, subdbr); + if (newnamer) + __os_free(env, newnamer); + if (result == TCL_ERROR && dbp != NULL) + (void)dbp->close(dbp, 0); + return (result); +} + +#ifdef CONFIG_TEST +/* + * bdb_DbVerify -- + * Implements the DB->verify command. + */ +static int +bdb_DbVerify(interp, objc, objv, ip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DBTCL_INFO *ip; /* Our internal info */ +{ + static const char *bdbverify[] = { + "-btcompare", + "-dupcompare", + "-hashcompare", + "-hashproc", + + "-encrypt", + "-encryptaes", + "-encryptany", + "-env", + "-errfile", + "-errpfx", + "-noorderchk", + "-orderchkonly", + "-unref", + "--", + NULL + }; + enum bdbvrfy { + TCL_DBVRFY_BTCOMPARE, + TCL_DBVRFY_DUPCOMPARE, + TCL_DBVRFY_HASHCOMPARE, + TCL_DBVRFY_HASHPROC, + + TCL_DBVRFY_ENCRYPT, + TCL_DBVRFY_ENCRYPT_AES, + TCL_DBVRFY_ENCRYPT_ANY, + TCL_DBVRFY_ENV, + TCL_DBVRFY_ERRFILE, + TCL_DBVRFY_ERRPFX, + TCL_DBVRFY_NOORDERCHK, + TCL_DBVRFY_ORDERCHKONLY, + TCL_DBVRFY_UNREF, + TCL_DBVRFY_ENDARG + }; + DB_ENV *dbenv; + DB *dbp; + FILE *errf; + int (*bt_compare) __P((DB *, const DBT *, const DBT *)); + int (*dup_compare) __P((DB *, const DBT *, const DBT *)); + int (*h_compare) __P((DB *, const DBT *, const DBT *)); + u_int32_t (*h_hash)__P((DB *, const void *, u_int32_t)); + u_int32_t enc_flag, flags, set_flags; + int endarg, i, optindex, result, ret, subdblen; + char *arg, *db, *errpfx, *passwd, *subdb; + u_char *subdbtmp; + + dbenv = NULL; + dbp = NULL; + passwd = NULL; + result = TCL_OK; + db = errpfx = subdb = NULL; + errf = NULL; + bt_compare = NULL; + dup_compare = NULL; + h_compare = NULL; + h_hash = NULL; + flags = endarg = 0; + enc_flag = set_flags = 0; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?args? filename"); + return (TCL_ERROR); + } + + /* + * We must first parse for the environment flag, since that + * is needed for db_create. Then create the db handle. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], bdbverify, + "option", TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') { + result = IS_HELP(objv[i]); + goto error; + } else + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum bdbvrfy)optindex) { + case TCL_DBVRFY_BTCOMPARE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-btcompare compareproc"); + result = TCL_ERROR; + break; + } + + /* + * Store the object containing the procedure name. + * We don't need to crack it out now--we'll want + * to bundle it up to pass into Tcl_EvalObjv anyway. + * Tcl's object refcounting will--I hope--take care + * of the memory management here. + */ + ip->i_compare = objv[i++]; + Tcl_IncrRefCount(ip->i_compare); + _debug_check(); + bt_compare = tcl_bt_compare; + break; + case TCL_DBVRFY_DUPCOMPARE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-dupcompare compareproc"); + result = TCL_ERROR; + break; + } + + /* + * Store the object containing the procedure name. + * See TCL_DBVRFY_BTCOMPARE. + */ + ip->i_dupcompare = objv[i++]; + Tcl_IncrRefCount(ip->i_dupcompare); + _debug_check(); + dup_compare = tcl_dup_compare; + break; + case TCL_DBVRFY_HASHCOMPARE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-hashcompare compareproc"); + result = TCL_ERROR; + break; + } + + /* + * Store the object containing the procedure name. + * We don't need to crack it out now--we'll want + * to bundle it up to pass into Tcl_EvalObjv anyway. + * Tcl's object refcounting will--I hope--take care + * of the memory management here. + */ + ip->i_compare = objv[i++]; + Tcl_IncrRefCount(ip->i_compare); + _debug_check(); + h_compare = tcl_bt_compare; + break; + case TCL_DBVRFY_HASHPROC: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-hashproc hashproc"); + result = TCL_ERROR; + break; + } + + /* + * Store the object containing the procedure name. + * See TCL_DBVRFY_BTCOMPARE. + */ + ip->i_hashproc = objv[i++]; + Tcl_IncrRefCount(ip->i_hashproc); + _debug_check(); + h_hash = tcl_h_hash; + break; + case TCL_DBVRFY_ENCRYPT: + set_flags |= DB_ENCRYPT; + _debug_check(); + break; + case TCL_DBVRFY_ENCRYPT_AES: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-encryptaes passwd?"); + result = TCL_ERROR; + break; + } + passwd = Tcl_GetStringFromObj(objv[i++], NULL); + enc_flag = DB_ENCRYPT_AES; + break; + case TCL_DBVRFY_ENCRYPT_ANY: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-encryptany passwd?"); + result = TCL_ERROR; + break; + } + passwd = Tcl_GetStringFromObj(objv[i++], NULL); + enc_flag = 0; + break; + case TCL_DBVRFY_ENV: + arg = Tcl_GetStringFromObj(objv[i++], NULL); + dbenv = NAME_TO_ENV(arg); + if (dbenv == NULL) { + Tcl_SetResult(interp, + "db verify: illegal environment", + TCL_STATIC); + result = TCL_ERROR; + break; + } + break; + case TCL_DBVRFY_ERRFILE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-errfile file"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + /* + * If the user already set one, close it. + */ + if (errf != NULL && errf != stdout && errf != stderr) + (void)fclose(errf); + if (strcmp(arg, "/dev/stdout") == 0) + errf = stdout; + else if (strcmp(arg, "/dev/stderr") == 0) + errf = stderr; + else + errf = fopen(arg, "a"); + break; + case TCL_DBVRFY_ERRPFX: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-errpfx prefix"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + /* + * If the user already set one, free it. + */ + if (errpfx != NULL) + __os_free(dbenv->env, errpfx); + if ((ret = __os_strdup(NULL, arg, &errpfx)) != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "__os_strdup"); + break; + } + break; + case TCL_DBVRFY_NOORDERCHK: + flags |= DB_NOORDERCHK; + break; + case TCL_DBVRFY_ORDERCHKONLY: + flags |= DB_ORDERCHKONLY; + break; + case TCL_DBVRFY_UNREF: + flags |= DB_UNREF; + break; + case TCL_DBVRFY_ENDARG: + endarg = 1; + break; + } + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + goto error; + if (endarg) + break; + } + if (result != TCL_OK) + goto error; + /* + * The remaining arg is the db filename. + */ + /* + * Any args we have left, (better be 1 or 2 left) are + * file names. If there is 1, a db name, if 2 a db and subdb name. + */ + if (i != objc) { + /* + * Dbs must be NULL terminated file names, but subdbs can + * be anything. Use Strings for the db name and byte + * arrays for the subdb. + */ + db = Tcl_GetStringFromObj(objv[i++], NULL); + if (strcmp(db, "") == 0) + db = NULL; + if (i != objc) { + subdbtmp = + Tcl_GetByteArrayFromObj(objv[i++], &subdblen); + if ((ret = __os_malloc(dbenv->env, + (size_t)subdblen + 1, &subdb)) != 0) { + Tcl_SetResult(interp, db_strerror(ret), + TCL_STATIC); + return (0); + } + memcpy(subdb, subdbtmp, (size_t)subdblen); + subdb[subdblen] = '\0'; + } + } else { + Tcl_WrongNumArgs(interp, 2, objv, "?args? filename"); + result = TCL_ERROR; + goto error; + } + + ret = db_create(&dbp, dbenv, 0); + if (ret) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db_create"); + goto error; + } + + /* Hang our info pointer on the DB handle, so we can do callbacks. */ + dbp->api_internal = ip; + + if (errf != NULL) + dbp->set_errfile(dbp, errf); + if (errpfx != NULL) + dbp->set_errpfx(dbp, errpfx); + + if (passwd != NULL && + (ret = dbp->set_encrypt(dbp, passwd, enc_flag)) != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_encrypt"); + goto error; + } + + if (set_flags != 0 && + (ret = dbp->set_flags(dbp, set_flags)) != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_flags"); + goto error; + } + if (bt_compare != NULL && + (ret = dbp->set_bt_compare(dbp, bt_compare)) != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_bt_compare"); + goto error; + } + if (dup_compare != NULL && + (ret = dbp->set_dup_compare(dbp, dup_compare)) != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_dup_compare"); + goto error; + } + if (h_compare != NULL && + (ret = dbp->set_h_compare(dbp, h_compare)) != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_h_compare"); + goto error; + } + if (h_hash != NULL && + (ret = dbp->set_h_hash(dbp, h_hash)) != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_h_hash"); + goto error; + } + + /* + * The verify method is a destructor, NULL out the dbp. + */ + ret = dbp->verify(dbp, db, subdb, NULL, flags); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db verify"); + dbp = NULL; +error: + if (errf != NULL && errf != stdout && errf != stderr) + (void)fclose(errf); + if (errpfx != NULL) + __os_free(dbenv->env, errpfx); + if (subdb != NULL) + __os_free(dbenv->env, subdb); + if (dbp) + (void)dbp->close(dbp, 0); + return (result); +} +#endif + +/* + * bdb_Version -- + * Implements the version command. + */ +static int +bdb_Version(interp, objc, objv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *bdbver[] = { + "-string", NULL + }; + enum bdbver { + TCL_VERSTRING + }; + int i, optindex, maj, min, patch, result, string, verobjc; + char *arg, *v; + Tcl_Obj *res, *verobjv[3]; + + result = TCL_OK; + string = 0; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?args?"); + return (TCL_ERROR); + } + + /* + * We must first parse for the environment flag, since that + * is needed for db_create. Then create the db handle. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], bdbver, + "option", TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') { + result = IS_HELP(objv[i]); + goto error; + } else + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum bdbver)optindex) { + case TCL_VERSTRING: + string = 1; + break; + } + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + goto error; + } + if (result != TCL_OK) + goto error; + + v = db_version(&maj, &min, &patch); + if (string) + res = NewStringObj(v, strlen(v)); + else { + verobjc = 3; + verobjv[0] = Tcl_NewIntObj(maj); + verobjv[1] = Tcl_NewIntObj(min); + verobjv[2] = Tcl_NewIntObj(patch); + res = Tcl_NewListObj(verobjc, verobjv); + } + Tcl_SetObjResult(interp, res); +error: + return (result); +} + +#ifdef CONFIG_TEST +/* + * bdb_GetConfig -- + * Implements the getconfig command. + */ +#define ADD_CONFIG_NAME(name) \ + conf = NewStringObj(name, strlen(name)); \ + if (Tcl_ListObjAppendElement(interp, res, conf) != TCL_OK) \ + return (TCL_ERROR); + +static int +bdb_GetConfig(interp, objc, objv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + Tcl_Obj *res, *conf; + + /* + * No args. Error if we have some + */ + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, ""); + return (TCL_ERROR); + } + res = Tcl_NewListObj(0, NULL); + conf = NULL; + + /* + * This command conditionally adds strings in based on + * how DB is configured so that the test suite can make + * decisions based on that. For now only implement the + * configuration pieces we need. + */ +#ifdef DEBUG + ADD_CONFIG_NAME("debug"); +#endif +#ifdef DEBUG_ROP + ADD_CONFIG_NAME("debug_rop"); +#endif +#ifdef DEBUG_WOP + ADD_CONFIG_NAME("debug_wop"); +#endif +#ifdef DIAGNOSTIC + ADD_CONFIG_NAME("diagnostic"); +#endif +#ifdef HAVE_PARTITION + ADD_CONFIG_NAME("partition"); +#endif +#ifdef HAVE_HASH + ADD_CONFIG_NAME("hash"); +#endif +#ifdef HAVE_HEAP + ADD_CONFIG_NAME("heap"); +#endif +#ifdef HAVE_QUEUE + ADD_CONFIG_NAME("queue"); +#endif +#ifdef HAVE_REPLICATION + ADD_CONFIG_NAME("rep"); +#endif +#ifdef HAVE_REPLICATION_THREADS + ADD_CONFIG_NAME("repmgr"); +#endif +#ifdef HAVE_VERIFY + ADD_CONFIG_NAME("verify"); +#endif + Tcl_SetObjResult(interp, res); + return (TCL_OK); +} + +/* + * bdb_Handles -- + * Implements the handles command. + */ +static int +bdb_Handles(interp, objc, objv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + DBTCL_INFO *p; + Tcl_Obj *res, *handle; + + /* + * No args. Error if we have some + */ + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, ""); + return (TCL_ERROR); + } + res = Tcl_NewListObj(0, NULL); + + LIST_FOREACH(p, &__db_infohead, entries) { + if (p->i_type == I_AUX) + continue; + handle = NewStringObj(p->i_name, strlen(p->i_name)); + if (Tcl_ListObjAppendElement(interp, res, handle) != TCL_OK) + return (TCL_ERROR); + } + Tcl_SetObjResult(interp, res); + return (TCL_OK); +} + +/* + * bdb_MsgType - + * Implements the msgtype command. + * Given a replication message return its message type name. + */ +static int +bdb_MsgType(interp, objc, objv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + __rep_control_args *rp; + Tcl_Obj *msgname; + u_int32_t len, msgtype, swaptype; + int freerp, ret; + + /* + * If the messages in rep.h change, this must change too! + * Add "no_type" for 0 so that we directly index. + */ + static const char *msgnames[] = { + "no_type", "alive", "alive_req", "all_req", + "bulk_log", "bulk_page", + "dupmaster", "file", "file_fail", "file_req", "lease_grant", + "log", "log_more", "log_req", "master_req", "newclient", + "newfile", "newmaster", "newsite", "page", + "page_fail", "page_more", "page_req", + "rerequest", "startsync", "update", "update_req", + "verify", "verify_fail", "verify_req", + "vote1", "vote2", NULL + }; + + /* + * 1 arg, the message. Error if different. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 3, objv, "msgtype msg"); + return (TCL_ERROR); + } + + ret = _CopyObjBytes(interp, objv[2], &rp, &len, &freerp); + if (ret != TCL_OK) { + Tcl_SetResult(interp, + "msgtype: bad control message", TCL_STATIC); + return (TCL_ERROR); + } + swaptype = msgtype = rp->rectype; + /* + * We have no DB_ENV or ENV here. The message type may be + * swapped. Get both and use the one that is in the message range. + */ + M_32_SWAP(swaptype); + if (msgtype > REP_MAX_MSG && swaptype <= REP_MAX_MSG) + msgtype = swaptype; + msgname = NewStringObj(msgnames[msgtype], strlen(msgnames[msgtype])); + Tcl_SetObjResult(interp, msgname); + if (rp != NULL && freerp) + __os_free(NULL, rp); + return (TCL_OK); +} + +/* + * bdb_DbUpgrade -- + * Implements the DB->upgrade command. + */ +static int +bdb_DbUpgrade(interp, objc, objv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *bdbupg[] = { + "-dupsort", "-env", "--", NULL + }; + enum bdbupg { + TCL_DBUPG_DUPSORT, + TCL_DBUPG_ENV, + TCL_DBUPG_ENDARG + }; + DB_ENV *dbenv; + DB *dbp; + u_int32_t flags; + int endarg, i, optindex, result, ret; + char *arg, *db; + + dbenv = NULL; + dbp = NULL; + result = TCL_OK; + db = NULL; + flags = endarg = 0; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?args? filename"); + return (TCL_ERROR); + } + + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], bdbupg, + "option", TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') { + result = IS_HELP(objv[i]); + goto error; + } else + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum bdbupg)optindex) { + case TCL_DBUPG_DUPSORT: + flags |= DB_DUPSORT; + break; + case TCL_DBUPG_ENV: + arg = Tcl_GetStringFromObj(objv[i++], NULL); + dbenv = NAME_TO_ENV(arg); + if (dbenv == NULL) { + Tcl_SetResult(interp, + "db upgrade: illegal environment", + TCL_STATIC); + return (TCL_ERROR); + } + break; + case TCL_DBUPG_ENDARG: + endarg = 1; + break; + } + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + goto error; + if (endarg) + break; + } + if (result != TCL_OK) + goto error; + /* + * The remaining arg is the db filename. + */ + if (i == (objc - 1)) + db = Tcl_GetStringFromObj(objv[i++], NULL); + else { + Tcl_WrongNumArgs(interp, 2, objv, "?args? filename"); + result = TCL_ERROR; + goto error; + } + ret = db_create(&dbp, dbenv, 0); + if (ret) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db_create"); + goto error; + } + + /* + * XXX + * Remove restriction if error handling not tied to env. + * + * The DB->set_err* functions overwrite the environment. So, if + * we are using an env, don't overwrite it; if not using an env, + * then configure error handling. + */ + if (dbenv == NULL) { + dbp->set_errpfx(dbp, "DbUpgrade"); + dbp->set_errcall(dbp, _ErrorFunc); + } + ret = dbp->upgrade(dbp, db, flags); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db upgrade"); +error: + if (dbp) + (void)dbp->close(dbp, 0); + return (result); +} + +/* + * tcl_bt_compare and tcl_dup_compare -- + * These two are basically identical internally, so may as well + * share code. The only differences are the name used in error + * reporting and the Tcl_Obj representing their respective procs. + */ +static int +tcl_bt_compare(dbp, dbta, dbtb) + DB *dbp; + const DBT *dbta, *dbtb; +{ + return (tcl_compare_callback(dbp, dbta, dbtb, + ((DBTCL_INFO *)dbp->api_internal)->i_compare, "bt_compare")); +} + +static int +tcl_dup_compare(dbp, dbta, dbtb) + DB *dbp; + const DBT *dbta, *dbtb; +{ + return (tcl_compare_callback(dbp, dbta, dbtb, + ((DBTCL_INFO *)dbp->api_internal)->i_dupcompare, "dup_compare")); +} + +/* + * tcl_compare_callback -- + * Tcl callback for set_bt_compare and set_dup_compare. What this + * function does is stuff the data fields of the two DBTs into Tcl ByteArray + * objects, then call the procedure stored in ip->i_compare on the two + * objects. Then we return that procedure's result as the comparison. + */ +static int +tcl_compare_callback(dbp, dbta, dbtb, procobj, errname) + DB *dbp; + const DBT *dbta, *dbtb; + Tcl_Obj *procobj; + char *errname; +{ + DBTCL_INFO *ip; + Tcl_Interp *interp; + Tcl_Obj *a, *b, *resobj, *objv[3]; + int result, cmp; + + ip = (DBTCL_INFO *)dbp->api_internal; + interp = ip->i_interp; + objv[0] = procobj; + + /* + * Create two ByteArray objects, with the two data we've been passed. + * This will involve a copy, which is unpleasantly slow, but there's + * little we can do to avoid this (I think). + */ + a = Tcl_NewByteArrayObj(dbta->data, (int)dbta->size); + Tcl_IncrRefCount(a); + b = Tcl_NewByteArrayObj(dbtb->data, (int)dbtb->size); + Tcl_IncrRefCount(b); + + objv[1] = a; + objv[2] = b; + + result = Tcl_EvalObjv(interp, 3, objv, 0); + if (result != TCL_OK) { + /* + * XXX + * If this or the next Tcl call fails, we're doomed. + * There's no way to return an error from comparison functions, + * no way to determine what the correct sort order is, and + * so no way to avoid corrupting the database if we proceed. + * We could play some games stashing return values on the + * DB handle, but it's not worth the trouble--no one with + * any sense is going to be using this other than for testing, + * and failure typically means that the bt_compare proc + * had a syntax error in it or something similarly dumb. + * + * So, drop core. If we're not running with diagnostic + * mode, panic--and always return a negative number. :-) + */ +err: __db_errx(dbp->env, "Tcl %s callback failed", errname); + return (__env_panic(dbp->env, DB_RUNRECOVERY)); + } + + resobj = Tcl_GetObjResult(interp); + result = Tcl_GetIntFromObj(interp, resobj, &cmp); + if (result != TCL_OK) + goto err; + + Tcl_DecrRefCount(a); + Tcl_DecrRefCount(b); + return (cmp); +} + +/* + * tcl_h_hash -- + * Tcl callback for the hashing function. See tcl_compare_callback-- + * this works much the same way, only we're given a buffer and a length + * instead of two DBTs. + */ +static u_int32_t +tcl_h_hash(dbp, buf, len) + DB *dbp; + const void *buf; + u_int32_t len; +{ + DBTCL_INFO *ip; + Tcl_Interp *interp; + Tcl_Obj *objv[2]; + int result, hval; + + ip = (DBTCL_INFO *)dbp->api_internal; + interp = ip->i_interp; + objv[0] = ip->i_hashproc; + + /* + * Create a ByteArray for the buffer. + */ + objv[1] = Tcl_NewByteArrayObj((void *)buf, (int)len); + Tcl_IncrRefCount(objv[1]); + result = Tcl_EvalObjv(interp, 2, objv, 0); + if (result != TCL_OK) + goto err; + + result = Tcl_GetIntFromObj(interp, Tcl_GetObjResult(interp), &hval); + if (result != TCL_OK) + goto err; + + Tcl_DecrRefCount(objv[1]); + return ((u_int32_t)hval); + +err: __db_errx(dbp->env, "Tcl h_hash callback failed"); + (void)__env_panic(dbp->env, DB_RUNRECOVERY); + return (0); +} + +static int +tcl_isalive(dbenv, pid, tid, flags) + DB_ENV *dbenv; + pid_t pid; + db_threadid_t tid; + u_int32_t flags; +{ + ENV *env; + DBTCL_INFO *ip; + Tcl_Interp *interp; + Tcl_Obj *objv[2]; + pid_t mypid; + db_threadid_t mytid; + int answer, result; + + __os_id(dbenv, &mypid, &mytid); + if (mypid == pid && (LF_ISSET(DB_MUTEX_PROCESS_ONLY) || + mytid == tid)) + return (1); + /* + * We only support the PROCESS_ONLY case for now, because that seems + * easiest, and that's all we need for our tests for the moment. + */ + if (!LF_ISSET(DB_MUTEX_PROCESS_ONLY)) + return (1); + + ip = (DBTCL_INFO *)dbenv->app_private; + interp = ip->i_interp; + objv[0] = ip->i_isalive; + + objv[1] = Tcl_NewLongObj((long)pid); + Tcl_IncrRefCount(objv[1]); + + result = Tcl_EvalObjv(interp, 2, objv, 0); + if (result != TCL_OK) + goto err; + Tcl_DecrRefCount(objv[1]); + result = Tcl_GetIntFromObj(interp, Tcl_GetObjResult(interp), &answer); + if (result != TCL_OK) + goto err; + + return (answer); + +err: env = dbenv->env; + __db_errx(env, "Tcl isalive callback failed: %s", + Tcl_GetStringResult(interp)); + + (void)__env_panic(env, DB_RUNRECOVERY); + return (0); +} + +/* + * tcl_part_callback -- + */ +static u_int32_t +tcl_part_callback(dbp, data) + DB *dbp; + DBT *data; +{ + DBTCL_INFO *ip; + Tcl_Interp *interp; + Tcl_Obj *objv[2]; + int result, hval; + + ip = (DBTCL_INFO *)dbp->api_internal; + interp = ip->i_interp; + objv[0] = ip->i_part_callback; + + objv[1] = Tcl_NewByteArrayObj(data->data, (int)data->size); + Tcl_IncrRefCount(objv[1]); + + result = Tcl_EvalObjv(interp, 2, objv, 0); + if (result != TCL_OK) + goto err; + + result = Tcl_GetIntFromObj(interp, Tcl_GetObjResult(interp), &hval); + if (result != TCL_OK) + goto err; + + Tcl_DecrRefCount(objv[1]); + return ((u_int32_t)hval); + +err: __db_errx(dbp->env, "Tcl part_callback callback failed"); + (void)__env_panic(dbp->env, DB_RUNRECOVERY); + return (0); +} + +/* + * tcl_rep_send -- + * Replication send callback. + * + * PUBLIC: int tcl_rep_send __P((DB_ENV *, + * PUBLIC: const DBT *, const DBT *, const DB_LSN *, int, u_int32_t)); + */ +int +tcl_rep_send(dbenv, control, rec, lsnp, eid, flags) + DB_ENV *dbenv; + const DBT *control, *rec; + const DB_LSN *lsnp; + int eid; + u_int32_t flags; +{ +#define TCLDB_SENDITEMS 7 +#define TCLDB_MAXREPFLAGS 32 + DBTCL_INFO *ip; + Tcl_Interp *interp; + Tcl_Obj *control_o, *eid_o, *flags_o, *lsn_o, *origobj, *rec_o; + Tcl_Obj *lsnobj[2], *myobjv[TCLDB_MAXREPFLAGS], *objv[TCLDB_SENDITEMS]; + Tcl_Obj *resobj; + int i, myobjc, result, ret; + + ip = (DBTCL_INFO *)dbenv->app_private; + interp = ip->i_interp; + objv[0] = ip->i_rep_send; + + control_o = Tcl_NewByteArrayObj(control->data, (int)control->size); + Tcl_IncrRefCount(control_o); + + rec_o = Tcl_NewByteArrayObj(rec->data, (int)rec->size); + Tcl_IncrRefCount(rec_o); + + eid_o = Tcl_NewIntObj(eid); + Tcl_IncrRefCount(eid_o); + + myobjv[myobjc = 0] = NULL; + if (flags == 0) + myobjv[myobjc++] = NewStringObj("none", strlen("none")); + if (LF_ISSET(DB_REP_ANYWHERE)) + myobjv[myobjc++] = NewStringObj("any", strlen("any")); + if (LF_ISSET(DB_REP_NOBUFFER)) + myobjv[myobjc++] = NewStringObj("nobuffer", strlen("nobuffer")); + if (LF_ISSET(DB_REP_PERMANENT)) + myobjv[myobjc++] = NewStringObj("perm", strlen("perm")); + if (LF_ISSET(DB_REP_REREQUEST)) + myobjv[myobjc++] = + NewStringObj("rerequest", strlen("rerequest")); + /* + * If we're given an unrecognized flag send "unknown". + */ + if (myobjc == 0) + myobjv[myobjc++] = NewStringObj("unknown", strlen("unknown")); + for (i = 0; i < myobjc; i++) + Tcl_IncrRefCount(myobjv[i]); + flags_o = Tcl_NewListObj(myobjc, myobjv); + Tcl_IncrRefCount(flags_o); + + lsnobj[0] = Tcl_NewLongObj((long)lsnp->file); + Tcl_IncrRefCount(lsnobj[0]); + lsnobj[1] = Tcl_NewLongObj((long)lsnp->offset); + Tcl_IncrRefCount(lsnobj[1]); + lsn_o = Tcl_NewListObj(2, lsnobj); + Tcl_IncrRefCount(lsn_o); + + objv[1] = control_o; + objv[2] = rec_o; + objv[3] = ip->i_rep_eid; /* From ID */ + objv[4] = eid_o; /* To ID */ + objv[5] = flags_o; /* Flags */ + objv[6] = lsn_o; /* LSN */ + + /* + * We really want to return the original result to the + * user. So, save the result obj here, and then after + * we've taken care of the Tcl_EvalObjv, set the result + * back to this original result. + */ + origobj = Tcl_GetObjResult(interp); + Tcl_IncrRefCount(origobj); + result = Tcl_EvalObjv(interp, TCLDB_SENDITEMS, objv, 0); + if (result != TCL_OK) { + /* + * XXX + * This probably isn't the right error behavior, but + * this error should only happen if the Tcl callback is + * somehow invalid, which is a fatal scripting bug. + */ +err: __db_errx(dbenv->env, + "Tcl rep_send failure: %s", Tcl_GetStringResult(interp)); + return (EINVAL); + } + + resobj = Tcl_GetObjResult(interp); + result = Tcl_GetIntFromObj(interp, resobj, &ret); + if (result != TCL_OK) + goto err; + + Tcl_SetObjResult(interp, origobj); + Tcl_DecrRefCount(origobj); + Tcl_DecrRefCount(control_o); + Tcl_DecrRefCount(rec_o); + Tcl_DecrRefCount(eid_o); + for (i = 0; i < myobjc; i++) + Tcl_DecrRefCount(myobjv[i]); + Tcl_DecrRefCount(flags_o); + Tcl_DecrRefCount(lsnobj[0]); + Tcl_DecrRefCount(lsnobj[1]); + Tcl_DecrRefCount(lsn_o); + + return (ret); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_db_malloc, tcl_db_realloc, tcl_db_free -- + * Tcl-local malloc, realloc, and free functions to use for user data + * to exercise umalloc/urealloc/ufree. Allocate the memory as a Tcl object + * so we're sure to exacerbate and catch any shared-library issues. + */ +static void * +tcl_db_malloc(size) + size_t size; +{ + Tcl_Obj *obj; + void *buf; + + obj = Tcl_NewObj(); + if (obj == NULL) + return (NULL); + Tcl_IncrRefCount(obj); + + Tcl_SetObjLength(obj, (int)(size + sizeof(Tcl_Obj *))); + buf = Tcl_GetString(obj); + memcpy(buf, &obj, sizeof(&obj)); + + buf = (Tcl_Obj **)buf + 1; + return (buf); +} + +static void * +tcl_db_realloc(ptr, size) + void *ptr; + size_t size; +{ + Tcl_Obj *obj; + + if (ptr == NULL) + return (tcl_db_malloc(size)); + + obj = *(Tcl_Obj **)((Tcl_Obj **)ptr - 1); + Tcl_SetObjLength(obj, (int)(size + sizeof(Tcl_Obj *))); + + ptr = Tcl_GetString(obj); + memcpy(ptr, &obj, sizeof(&obj)); + + ptr = (Tcl_Obj **)ptr + 1; + return (ptr); +} + +static void +tcl_db_free(ptr) + void *ptr; +{ + Tcl_Obj *obj; + + obj = *(Tcl_Obj **)((Tcl_Obj **)ptr - 1); + Tcl_DecrRefCount(obj); +} + +static int +tcl_set_partition_keys(interp, dbp, obj, keyp) + Tcl_Interp *interp; + DB *dbp; + Tcl_Obj *obj; + DBT **keyp; +{ + DBT *keys, *kp; + Tcl_Obj **obj_list; + u_int32_t i, count; + int ret; + + *keyp = NULL; + if ((ret = Tcl_ListObjGetElements(interp, + obj, (int *)&count, &obj_list)) != TCL_OK) + return (EINVAL); + + if ((ret = __os_calloc(NULL, count, sizeof(DBT), &keys)) != 0) + return (ret); + + *keyp = keys; + + kp = keys; + for (i = 0; i < count; i++, kp++) + kp->data = Tcl_GetStringFromObj(obj_list[i], (int*)&kp->size); + + if ((ret = dbp->set_partition(dbp, + (u_int32_t)count + 1, keys, NULL)) != 0) + return (ret); + + return (0); +} + +static int +tcl_set_partition_dirs(interp, dbp, obj) + Tcl_Interp *interp; + DB *dbp; + Tcl_Obj *obj; +{ + char **dp, **dirs; + Tcl_Obj **obj_list; + u_int32_t i, count; + int ret; + + if ((ret = Tcl_ListObjGetElements(interp, + obj, (int*)&count, &obj_list)) != TCL_OK) + return (EINVAL); + + if ((ret = __os_calloc(NULL, count + 1, sizeof(char *), &dirs)) != 0) + return (ret); + + dp = dirs; + for (i = 0; i < count; i++, dp++) + *dp = Tcl_GetStringFromObj(obj_list[i], NULL); + + if ((ret = dbp->set_partition_dirs(dbp, (const char **)dirs)) != 0) + return (ret); + + __os_free(NULL, dirs); + + return (0); +} +#endif + +static int +heap_callback(dbp, key, data, seckey) + DB *dbp; + const DBT *key, *data; + DBT *seckey; +{ + COMPQUIET(dbp, NULL); + COMPQUIET(key, NULL); + memset(seckey, 0, sizeof(DBT)); + seckey->data = data->data; + seckey->size = data->size; + return 0; +} diff --git a/lang/tcl/tcl_dbcursor.c b/lang/tcl/tcl_dbcursor.c new file mode 100644 index 00000000..6ecc34d4 --- /dev/null +++ b/lang/tcl/tcl_dbcursor.c @@ -0,0 +1,1332 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/tcl_db.h" + +/* + * Prototypes for procedures defined later in this file: + */ +static int tcl_DbcDel __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DBC *)); +static int tcl_DbcDup __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DBC *)); +static int tcl_DbcCompare __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DBC *)); +static int tcl_DbcGet __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DBC *, int)); +static int tcl_DbcHeapDel __P((Tcl_Interp *, DBC *)); +static int tcl_DbcPut __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DBC *)); + +/* + * PUBLIC: int dbc_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); + * + * dbc_cmd -- + * Implements the cursor command. + */ +int +dbc_Cmd(clientData, interp, objc, objv) + ClientData clientData; /* Cursor handle */ + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *dbccmds[] = { +#ifdef CONFIG_TEST + "pget", +#endif + "close", + "cmp", + "del", + "dup", + "get", + "put", + NULL + }; + enum dbccmds { +#ifdef CONFIG_TEST + DBCPGET, +#endif + DBCCLOSE, + DBCCOMPARE, + DBCDELETE, + DBCDUP, + DBCGET, + DBCPUT + }; + DBC *dbc; + DBTCL_INFO *dbip; + int cmdindex, result, ret; + + Tcl_ResetResult(interp); + dbc = (DBC *)clientData; + dbip = _PtrToInfo((void *)dbc); + result = TCL_OK; + + if (objc <= 1) { + Tcl_WrongNumArgs(interp, 1, objv, "command cmdargs"); + return (TCL_ERROR); + } + if (dbc == NULL) { + Tcl_SetResult(interp, "NULL dbc pointer", TCL_STATIC); + return (TCL_ERROR); + } + if (dbip == NULL) { + Tcl_SetResult(interp, "NULL dbc info pointer", TCL_STATIC); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the berkdbcmds + * defined above. + */ + if (Tcl_GetIndexFromObj(interp, objv[1], dbccmds, "command", + TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + switch ((enum dbccmds)cmdindex) { +#ifdef CONFIG_TEST + case DBCPGET: + result = tcl_DbcGet(interp, objc, objv, dbc, 1); + break; +#endif + case DBCCLOSE: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbc->close(dbc); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "dbc close"); + if (result == TCL_OK) { + (void)Tcl_DeleteCommand(interp, dbip->i_name); + _DeleteInfo(dbip); + } + break; + case DBCCOMPARE: + if (objc > 3) { + Tcl_WrongNumArgs(interp, 3, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + result = tcl_DbcCompare(interp, objc, objv, dbc); + break; + case DBCDELETE: + result = tcl_DbcDel(interp, objc, objv, dbc); + break; + case DBCDUP: + result = tcl_DbcDup(interp, objc, objv, dbc); + break; + case DBCGET: + result = tcl_DbcGet(interp, objc, objv, dbc, 0); + break; + case DBCPUT: + result = tcl_DbcPut(interp, objc, objv, dbc); + break; + } + return (result); +} + +/* + * tcl_DbcHeapDel -- + */ +static int +tcl_DbcHeapDel(interp, dbc) + Tcl_Interp *interp; + DBC *dbc; +{ + DB *dbp, *hrdbp, *hsdbp; + DBT hkey, key, tmpdata; + DB_HEAP_RID rid; + db_recno_t recno; + int result, ret, t_ret; + + dbp = dbc->dbp; + hrdbp = ((DBTCL_INFO *)dbp->api_internal)->hrdbp; + hsdbp = ((DBTCL_INFO *)dbp->api_internal)->hsdbp; + + memset(&hkey, 0, sizeof(DBT)); + hkey.data = &rid; + hkey.size = hkey.ulen = sizeof(DB_HEAP_RID); + hkey.flags = DB_DBT_USERMEM; + memset(&tmpdata, 0, sizeof(DBT)); + tmpdata.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM; + if ((t_ret = dbc->get(dbc, &hkey, &tmpdata, DB_CURRENT)) != 0) { + ret = t_ret; + goto err; + } + + memset(&key, 0, sizeof(DBT)); + key.data = &recno; + key.size = key.ulen = sizeof(db_recno_t); + key.flags = DB_DBT_USERMEM; + if ((t_ret = hsdbp->pget( + hsdbp, dbc->txn, &hkey, &key, &tmpdata, 0)) != 0) { + ret = t_ret; + goto err; + } + + ret = dbc->del(dbc, 0); + if ((t_ret = hrdbp->del(hrdbp, dbc->txn, &key, 0)) != 0 && ret == 0) + ret = t_ret; + +err: result = _ReturnSetup( + interp, ret, DB_RETOK_DBCDEL(ret), "dbc delete"); + return result; +} + +/* + * tcl_DbcPut -- + */ +static int +tcl_DbcPut(interp, objc, objv, dbc) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DBC *dbc; /* Cursor pointer */ +{ + static const char *dbcutopts[] = { +#ifdef CONFIG_TEST + "-nodupdata", +#endif + "-after", + "-before", + "-current", + "-keyfirst", + "-keylast", + "-overwritedup", + "-partial", + NULL + }; + enum dbcutopts { +#ifdef CONFIG_TEST + DBCPUT_NODUPDATA, +#endif + DBCPUT_AFTER, + DBCPUT_BEFORE, + DBCPUT_CURRENT, + DBCPUT_KEYFIRST, + DBCPUT_KEYLAST, + DBCPUT_OVERWRITE_DUP, + DBCPUT_PART + }; + DB *thisdbp, *hrdbp, *hsdbp; + DBT data, hkey, key, tmpdata; + DBTCL_INFO *dbcip, *dbip; + DBTYPE type; + DB_HEAP_RID rid; + Tcl_Obj **elemv, *res; + void *dtmp, *ktmp; + db_recno_t recno; + u_int32_t flag; + int elemc, freekey, freedata, i, optindex, result, ret; + + COMPQUIET(dtmp, NULL); + COMPQUIET(ktmp, NULL); + + result = TCL_OK; + flag = 0; + freekey = freedata = 0; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?-args? ?key?"); + return (TCL_ERROR); + } + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + memset(&hkey, 0, sizeof(hkey)); + + /* + * Get the command name index from the object based on the options + * defined above. + */ + i = 2; + while (i < (objc - 1)) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbcutopts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + /* + * Reset the result so we don't get + * an errant error message if there is another error. + */ + if (IS_HELP(objv[i]) == TCL_OK) { + result = TCL_OK; + goto out; + } + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum dbcutopts)optindex) { +#ifdef CONFIG_TEST + case DBCPUT_NODUPDATA: + FLAG_CHECK(flag); + flag = DB_NODUPDATA; + break; +#endif + case DBCPUT_AFTER: + FLAG_CHECK(flag); + flag = DB_AFTER; + break; + case DBCPUT_BEFORE: + FLAG_CHECK(flag); + flag = DB_BEFORE; + break; + case DBCPUT_CURRENT: + FLAG_CHECK(flag); + flag = DB_CURRENT; + break; + case DBCPUT_KEYFIRST: + FLAG_CHECK(flag); + flag = DB_KEYFIRST; + break; + case DBCPUT_KEYLAST: + FLAG_CHECK(flag); + flag = DB_KEYLAST; + break; + case DBCPUT_OVERWRITE_DUP: + FLAG_CHECK(flag); + flag = DB_OVERWRITE_DUP; + break; + case DBCPUT_PART: + if (i > (objc - 2)) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-partial {offset length}?"); + result = TCL_ERROR; + break; + } + /* + * Get sublist as {offset length} + */ + result = Tcl_ListObjGetElements(interp, objv[i++], + &elemc, &elemv); + if (elemc != 2) { + Tcl_SetResult(interp, + "List must be {offset length}", TCL_STATIC); + result = TCL_ERROR; + break; + } + data.flags |= DB_DBT_PARTIAL; + result = _GetUInt32(interp, elemv[0], &data.doff); + if (result != TCL_OK) + break; + result = _GetUInt32(interp, elemv[1], &data.dlen); + /* + * NOTE: We don't check result here because all we'd + * do is break anyway, and we are doing that. If you + * add code here, you WILL need to add the check + * for result. (See the check for save.doff, a few + * lines above and copy that.) + */ + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto out; + + /* + * We need to determine if we are a recno database or not. If we are, + * then key.data is a recno, not a string. + */ + dbcip = _PtrToInfo(dbc); + if (dbcip == NULL) { + type = DB_UNKNOWN; + thisdbp = NULL; + } else { + dbip = dbcip->i_parent; + if (dbip == NULL) { + Tcl_SetResult(interp, "Cursor without parent database", + TCL_STATIC); + result = TCL_ERROR; + return (result); + } + thisdbp = dbip->i_dbp; + (void)thisdbp->get_type(thisdbp, &type); + } + /* + * When we get here, we better have: + * 1 arg if -after, -before or -current + * 2 args in all other cases + */ + if (flag == DB_AFTER || flag == DB_BEFORE || flag == DB_CURRENT) { + if (i != (objc - 1)) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-args? data"); + result = TCL_ERROR; + goto out; + } + /* + * We want to get the key back, so we need to set + * up the location to get it back in. + */ + if (type == DB_RECNO || type == DB_QUEUE) { + recno = 0; + key.data = &recno; + key.size = sizeof(db_recno_t); + } + } else { + if (i != (objc - 2)) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-args? key data"); + result = TCL_ERROR; + goto out; + } + if (type == DB_HEAP || type == DB_RECNO || type == DB_QUEUE) { + result = _GetUInt32(interp, objv[objc-2], &recno); + if (result == TCL_OK) { + key.data = &recno; + key.size = sizeof(db_recno_t); + } else + return (result); + } else { + ret = _CopyObjBytes(interp, objv[objc-2], &ktmp, + &key.size, &freekey); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBCPUT(ret), "dbc put"); + return (result); + } + key.data = ktmp; + } + } + ret = _CopyObjBytes(interp, objv[objc-1], &dtmp, + &data.size, &freedata); + data.data = dtmp; + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBCPUT(ret), "dbc put"); + goto out; + } + _debug_check(); + if (type != DB_HEAP) { + ret = dbc->put(dbc, &key, &data, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_DBCPUT(ret), + "dbc put"); + } else { + hkey.data = &rid; + hkey.ulen = hkey.size = sizeof(DB_HEAP_RID); + hkey.flags = DB_DBT_USERMEM; + hrdbp = ((DBTCL_INFO *)thisdbp->api_internal)->hrdbp; + if (flag != DB_CURRENT) { + /* Given a recno, need to find the associated RID. */ + ret = hrdbp->get(hrdbp, dbc->txn, &key, &hkey, 0); + result = _ReturnSetup(interp, + ret, DB_RETOK_DBGET(ret), "db get recno"); + } else { + /* We have neither RID nor recno, but need both. */ + memset(&tmpdata, 0, sizeof(DBT)); + tmpdata.dlen = 0; + tmpdata.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM; + ret = dbc->get(dbc, &hkey, &tmpdata, DB_CURRENT); + result = _ReturnSetup(interp, + ret, DB_RETOK_DBGET(ret), "dbc get"); + + hsdbp = ((DBTCL_INFO *)thisdbp->api_internal)->hsdbp; + key.data = &recno; + key.ulen = sizeof(db_recno_t); + key.flags = DB_DBT_USERMEM; + ret = hsdbp->pget(hsdbp, + dbc->txn, &hkey, &key, &tmpdata, 0); + result = _ReturnSetup(interp, + ret, DB_RETOK_DBGET(ret), "db pget rid"); + } + + /* Do the put in the heap db first */ + ret = dbc->put(dbc, &hkey, &data, flag); + if (ret) { + result = _ReturnSetup(interp, + ret, DB_RETOK_DBCPUT(ret), "dbc put"); + goto out; + } + + hkey.flags = DB_DBT_USERMEM; + ret = hrdbp->put(hrdbp, dbc->txn, &key, &hkey, 0); + result = _ReturnSetup(interp, + ret, DB_RETOK_DBCPUT(ret), "dbc put recno"); + + /* + * To keep the consistency, if the put in recno db fails, + * the current key and data will be removed from the heap db. + */ + if (dbc->txn == NULL && ret != 0) + (void)thisdbp->del(thisdbp, NULL, &hkey, 0); + } + if (ret == 0 && + (flag == DB_AFTER || flag == DB_BEFORE) && + (type == DB_RECNO || type == DB_HEAP)) { + res = Tcl_NewWideIntObj((Tcl_WideInt)*(db_recno_t *)key.data); + Tcl_SetObjResult(interp, res); + } +out: + if (freedata) + __os_free(NULL, dtmp); + if (freekey) + __os_free(NULL, ktmp); + return (result); +} + +/* + * tcl_dbc_get -- + */ +static int +tcl_DbcGet(interp, objc, objv, dbc, ispget) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DBC *dbc; /* Cursor pointer */ + int ispget; /* 1 for pget, 0 for get */ +{ + static const char *dbcgetopts[] = { +#ifdef CONFIG_TEST + "-data_buf_size", + "-get_both_range", + "-key_buf_size", + "-multi", + "-multi_key", + "-nolease", + "-read_committed", + "-read_uncommitted", +#endif + "-current", + "-first", + "-get_both", + "-get_recno", + "-join_item", + "-last", + "-next", + "-nextdup", + "-nextnodup", + "-partial", + "-prev", + "-prevdup", + "-prevnodup", + "-rmw", + "-set", + "-set_range", + "-set_recno", + NULL + }; + enum dbcgetopts { +#ifdef CONFIG_TEST + DBCGET_DATA_BUF_SIZE, + DBCGET_BOTH_RANGE, + DBCGET_KEY_BUF_SIZE, + DBCGET_MULTI, + DBCGET_MULTI_KEY, + DBCGET_NOLEASE, + DBCGET_READ_COMMITTED, + DBCGET_READ_UNCOMMITTED, +#endif + DBCGET_CURRENT, + DBCGET_FIRST, + DBCGET_BOTH, + DBCGET_RECNO, + DBCGET_JOIN, + DBCGET_LAST, + DBCGET_NEXT, + DBCGET_NEXTDUP, + DBCGET_NEXTNODUP, + DBCGET_PART, + DBCGET_PREV, + DBCGET_PREVDUP, + DBCGET_PREVNODUP, + DBCGET_RMW, + DBCGET_SET, + DBCGET_SETRANGE, + DBCGET_SETRECNO + }; + DB *hrdbp, *hsdbp, *thisdbp; + DB_HEAP_RID rid; + DBT hkey, key, data, pdata, rkey, rdata, tmpdata; + DBTCL_INFO *dbcip, *dbip; + DBTYPE ptype, type; + Tcl_Obj **elemv, *myobj, *retlist; + void *dtmp, *ktmp; + db_recno_t precno, recno; + u_int32_t flag, heapflag, op; + int elemc, freekey, freedata, i, optindex, result, ret; +#ifdef CONFIG_TEST + int data_buf_size, key_buf_size; + + data_buf_size = key_buf_size = 0; +#endif + COMPQUIET(dtmp, NULL); + COMPQUIET(ktmp, NULL); + result = TCL_OK; + flag = heapflag = 0; + freekey = freedata = 0; + hrdbp = hsdbp = NULL; + type = ptype = DB_UNKNOWN; + memset(&hkey, 0, sizeof(hkey)); + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + memset(&pdata, 0, sizeof(DBT)); + memset(&rkey, 0, sizeof(DBT)); + memset(&rdata, 0, sizeof(DBT)); + memset(&tmpdata, 0, sizeof(DBT)); + tmpdata.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?-args? ?key?"); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the options + * defined above. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbcgetopts, + "option", TCL_EXACT, &optindex) != TCL_OK) { + /* + * Reset the result so we don't get + * an errant error message if there is another error. + */ + if (IS_HELP(objv[i]) == TCL_OK) { + result = TCL_OK; + goto out; + } + Tcl_ResetResult(interp); + break; + } + i++; + +#define FLAG_CHECK2_STDARG \ + (DB_RMW | DB_MULTIPLE | DB_MULTIPLE_KEY | DB_IGNORE_LEASE | \ + DB_READ_UNCOMMITTED | DB_READ_COMMITTED) + + switch ((enum dbcgetopts)optindex) { +#ifdef CONFIG_TEST + case DBCGET_DATA_BUF_SIZE: + result = + Tcl_GetIntFromObj(interp, objv[i], &data_buf_size); + if (result != TCL_OK) + goto out; + i++; + break; + case DBCGET_BOTH_RANGE: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_GET_BOTH_RANGE; + break; + case DBCGET_KEY_BUF_SIZE: + result = + Tcl_GetIntFromObj(interp, objv[i], &key_buf_size); + if (result != TCL_OK) + goto out; + i++; + break; + case DBCGET_MULTI: + flag |= DB_MULTIPLE; + result = + Tcl_GetIntFromObj(interp, objv[i], &data_buf_size); + if (result != TCL_OK) + goto out; + i++; + break; + case DBCGET_MULTI_KEY: + flag |= DB_MULTIPLE_KEY; + result = + Tcl_GetIntFromObj(interp, objv[i], &data_buf_size); + if (result != TCL_OK) + goto out; + i++; + break; + case DBCGET_NOLEASE: + flag |= DB_IGNORE_LEASE; + break; + case DBCGET_READ_COMMITTED: + flag |= DB_READ_COMMITTED; + break; + case DBCGET_READ_UNCOMMITTED: + flag |= DB_READ_UNCOMMITTED; + break; +#endif + case DBCGET_RMW: + flag |= DB_RMW; + break; + case DBCGET_CURRENT: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_CURRENT; + break; + case DBCGET_FIRST: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_FIRST; + break; + case DBCGET_LAST: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_LAST; + break; + case DBCGET_NEXT: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_NEXT; + break; + case DBCGET_PREV: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_PREV; + break; + case DBCGET_PREVDUP: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_PREV_DUP; + break; + case DBCGET_PREVNODUP: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_PREV_NODUP; + break; + case DBCGET_NEXTNODUP: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_NEXT_NODUP; + break; + case DBCGET_NEXTDUP: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_NEXT_DUP; + break; + case DBCGET_BOTH: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_GET_BOTH; + break; + case DBCGET_RECNO: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_GET_RECNO; + break; + case DBCGET_JOIN: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_JOIN_ITEM; + break; + case DBCGET_SET: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_SET; + break; + case DBCGET_SETRANGE: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_SET_RANGE; + break; + case DBCGET_SETRECNO: + FLAG_CHECK2(flag, FLAG_CHECK2_STDARG); + flag |= DB_SET_RECNO; + break; + case DBCGET_PART: + if (i == objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-partial {offset length}?"); + result = TCL_ERROR; + break; + } + /* + * Get sublist as {offset length} + */ + result = Tcl_ListObjGetElements(interp, objv[i++], + &elemc, &elemv); + if (elemc != 2) { + Tcl_SetResult(interp, + "List must be {offset length}", TCL_STATIC); + result = TCL_ERROR; + break; + } + data.flags |= DB_DBT_PARTIAL; + result = _GetUInt32(interp, elemv[0], &data.doff); + if (result != TCL_OK) + break; + result = _GetUInt32(interp, elemv[1], &data.dlen); + /* + * NOTE: We don't check result here because all we'd + * do is break anyway, and we are doing that. If you + * add code here, you WILL need to add the check + * for result. (See the check for save.doff, a few + * lines above and copy that.) + */ + break; + } + if (result != TCL_OK) + break; + } + + if (result != TCL_OK) + goto out; + heapflag = flag & ~DB_OPFLAGS_MASK; + heapflag &= ~DB_MULTIPLE_KEY; + if (F_ISSET(dbc, DBC_READ_COMMITTED)) + heapflag |= DB_READ_COMMITTED; + if (F_ISSET(dbc, DBC_READ_UNCOMMITTED)) + heapflag |= DB_READ_UNCOMMITTED; + + /* + * We need to determine if we are a recno database + * or not. If we are, then key.data is a recno, not + * a string. + */ + dbcip = _PtrToInfo(dbc); + if (dbcip != NULL) { + dbip = dbcip->i_parent; + if (dbip == NULL) { + Tcl_SetResult(interp, "Cursor without parent database", + TCL_STATIC); + result = TCL_ERROR; + goto out; + } + thisdbp = dbip->i_dbp; + (void)thisdbp->get_type(thisdbp, &type); + if (ispget && thisdbp->s_primary != NULL) + (void)thisdbp-> + s_primary->get_type(thisdbp->s_primary, &ptype); + else + ptype = DB_UNKNOWN; + if (type == DB_HEAP) { + hrdbp = dbip->hrdbp; + hsdbp = dbip->hsdbp; + } + } + /* + * When we get here, we better have: + * 2 args, key and data if GET_BOTH/GET_BOTH_RANGE was specified. + * 1 arg if -set, -set_range or -set_recno + * 0 in all other cases. + */ + op = flag & DB_OPFLAGS_MASK; + switch (op) { + case DB_GET_BOTH: +#ifdef CONFIG_TEST + case DB_GET_BOTH_RANGE: +#endif + if (i != (objc - 2)) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-args? -get_both key data"); + result = TCL_ERROR; + goto out; + } else { + if (type == DB_RECNO || + type == DB_QUEUE || type == DB_HEAP) { + result = _GetUInt32( + interp, objv[objc-2], &recno); + if (result == TCL_OK) { + key.data = &recno; + key.size = sizeof(db_recno_t); + } else + goto out; + } else { + /* + * Some get calls (SET_*) can change the + * key pointers. So, we need to store + * the allocated key space in a tmp. + */ + ret = _CopyObjBytes(interp, objv[objc-2], + &ktmp, &key.size, &freekey); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBCGET(ret), "dbc get"); + return (result); + } + key.data = ktmp; + } + if (ptype == DB_RECNO || + ptype == DB_QUEUE || ptype == DB_HEAP) { + result = _GetUInt32( + interp, objv[objc-1], &precno); + if (result == TCL_OK) { + data.data = &precno; + data.size = sizeof(db_recno_t); + } else + goto out; + } else { + ret = _CopyObjBytes(interp, objv[objc-1], + &dtmp, &data.size, &freedata); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBCGET(ret), "dbc get"); + goto out; + } + data.data = dtmp; + } + } + break; + case DB_SET: + case DB_SET_RANGE: + case DB_SET_RECNO: + if (i != (objc - 1)) { + Tcl_WrongNumArgs(interp, 2, objv, "?-args? key"); + result = TCL_ERROR; + goto out; + } +#ifdef CONFIG_TEST + if (data_buf_size != 0) { + (void)__os_malloc( + NULL, (size_t)data_buf_size, &data.data); + data.ulen = (u_int32_t)data_buf_size; + data.flags |= DB_DBT_USERMEM; + } else +#endif + data.flags |= DB_DBT_MALLOC; + if (op == DB_SET_RECNO || + type == DB_HEAP || type == DB_RECNO || type == DB_QUEUE) { + result = _GetUInt32(interp, objv[objc - 1], &recno); + key.data = &recno; + key.size = sizeof(db_recno_t); + } else { + /* + * Some get calls (SET_*) can change the + * key pointers. So, we need to store + * the allocated key space in a tmp. + */ + ret = _CopyObjBytes(interp, objv[objc-1], + &ktmp, &key.size, &freekey); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_DBCGET(ret), "dbc get"); + return (result); + } + key.data = ktmp; + } + break; + default: + if (i != objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-args?"); + result = TCL_ERROR; + goto out; + } +#ifdef CONFIG_TEST + if (key_buf_size != 0) { + (void)__os_malloc( + NULL, (size_t)key_buf_size, &key.data); + key.ulen = (u_int32_t)key_buf_size; + key.flags |= DB_DBT_USERMEM; + } else +#endif + key.flags |= DB_DBT_MALLOC; +#ifdef CONFIG_TEST + if (data_buf_size != 0) { + (void)__os_malloc( + NULL, (size_t)data_buf_size, &data.data); + data.ulen = (u_int32_t)data_buf_size; + data.flags |= DB_DBT_USERMEM; + } else +#endif + data.flags |= DB_DBT_MALLOC; + } + + _debug_check(); + + /* + * Heap cannot be a secondary, so with type == DB_HEAP we know that + * ispget is false. + */ + if (type == DB_HEAP && (op == DB_GET_BOTH || + op == DB_GET_BOTH_RANGE || op == DB_SET || op == DB_SET_RANGE)) { + rkey.data = &recno; + rkey.ulen = rkey.size = sizeof(db_recno_t); + rkey.flags |= DB_DBT_USERMEM; + if (key.data != NULL && F_ISSET(&key, DB_DBT_USERMEM)) + __os_free(NULL, key.data); + if (key.data != NULL && F_ISSET(&key, DB_DBT_MALLOC)) + __os_ufree(NULL, key.data); + memset(&key, 0, sizeof(DBT)); + key.data = &rid; + key.ulen = key.size = sizeof(DB_HEAP_RID); + key.flags |= DB_DBT_USERMEM; + + /* + * This is a noncursor get on recno db, use heapflag because + * the cursor op flags have been removed. + */ + ret = hrdbp->get(hrdbp, dbc->txn, &rkey, &key, heapflag); + if (ret != 0) { + result = _ReturnSetup( + interp, ret, DB_RETOK_DBGET(ret), "db get"); + retlist = Tcl_NewListObj(0, NULL); + goto out1; + } + } + + /* + * If we're doing a pget and DB_GET_BOTH is set, the primary key (stored + * in data) needs to match, too. For a HEAP primary, we're called with + * a recno primary key and we need to translate that to an RID. (ptype + * is only set if we're doing a pget.) + */ + if (ptype == DB_HEAP && + (op == DB_GET_BOTH || op == DB_GET_BOTH_RANGE)) { + rkey.data = &precno; + rkey.size = rkey.ulen = sizeof(db_recno_t); + rkey.flags = DB_DBT_USERMEM; + if (data.data != NULL && F_ISSET(&data, DB_DBT_USERMEM)) + __os_free(NULL, data.data); + if (data.data != NULL && F_ISSET(&data, DB_DBT_MALLOC)) + __os_ufree(NULL, data.data); + memset(&data, 0, sizeof(DBT)); + data.data = &rid; + data.size = data.ulen = sizeof(DB_HEAP_RID); + data.flags = DB_DBT_USERMEM; + ret = hrdbp->get(hrdbp, dbc->txn, &rkey, &data, heapflag); + if (ret != 0) { + result = _ReturnSetup( + interp, ret, DB_RETOK_DBGET(ret), "db get"); + retlist = Tcl_NewListObj(0, NULL); + goto out1; + } + } + + if (ispget) { + F_SET(&pdata, DB_DBT_MALLOC); + ret = dbc->pget(dbc, &key, &data, &pdata, flag); + if (ret == 0 && ptype == DB_HEAP) { + rid.pgno = ((DB_HEAP_RID *)data.data)->pgno; + rid.indx = ((DB_HEAP_RID *)data.data)->indx; + hkey.data = &rid; + hkey.ulen = hkey.size = data.size; + hkey.flags = DB_DBT_USERMEM; + hsdbp = ((DBTCL_INFO *) + dbc->dbp->s_primary->api_internal)->hsdbp; + ret = hsdbp->pget(hsdbp, + dbc->txn, &hkey, &data, &tmpdata, 0); + } + + } else + ret = dbc->get(dbc, &key, &data, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_DBCGET(ret), "dbc get"); + if (result == TCL_ERROR) + goto out; + + retlist = Tcl_NewListObj(0, NULL); + if (ret != 0) + goto out1; + if (op == DB_GET_RECNO) { + recno = *((db_recno_t *)data.data); + myobj = Tcl_NewWideIntObj((Tcl_WideInt)recno); + result = Tcl_ListObjAppendElement(interp, retlist, myobj); + } else { + if (flag & (DB_MULTIPLE|DB_MULTIPLE_KEY)) + result = _SetMultiList(interp, + retlist, &key, &data, type, flag); + else if ((type == DB_RECNO || type == DB_QUEUE) && + key.data != NULL) { + if (ispget) + result = _Set3DBTList(interp, retlist, &key, 1, + &data, + (ptype == DB_RECNO || ptype == DB_QUEUE), + &pdata); + else + result = _SetListRecnoElem(interp, retlist, + *(db_recno_t *)key.data, + data.data, data.size); + } else if (type == DB_HEAP) { + /* + * If given a record number, we're done. If we don't + * yet have a record number, we need to look it up. + */ + if (op != DB_GET_BOTH && op != DB_SET && + op != DB_GET_BOTH_RANGE && op != DB_SET_RANGE) { + rdata.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM; + rdata.dlen = 0; + rkey.data = &recno; + rkey.size = rkey.ulen = sizeof(db_recno_t); + rkey.flags = DB_DBT_USERMEM; + + ret = hsdbp->pget(hsdbp, dbc->txn, &key, + &rkey, &rdata, heapflag); + result = _ReturnSetup( + interp, ret, DB_RETOK_DBGET(ret), "db get"); + if (result == TCL_ERROR) + goto out; + retlist = Tcl_NewListObj(0, NULL); + if (ret != 0) + goto out1; + } + result = _SetListRecnoElem(interp, retlist, + *(db_recno_t *)rkey.data, data.data, data.size); + } else { + if (ispget) + result = _Set3DBTList(interp, retlist, &key, 0, + &data, + (ptype == DB_HEAP || + ptype == DB_RECNO || ptype == DB_QUEUE), + &pdata); + else + result = _SetListElem(interp, retlist, + key.data, key.size, data.data, data.size); + } + } +out1: + if (result == TCL_OK) + Tcl_SetObjResult(interp, retlist); + /* + * If DB_DBT_MALLOC is set we need to free if DB allocated anything. + * If DB_DBT_USERMEM is set we need to free it because + * we allocated it (for data_buf_size/key_buf_size). That + * allocation does not apply to the pdata DBT. For heap, we do not + * malloc anything but move pointers around so nothing to free. + */ +out: + if (key.data != NULL && F_ISSET(&key, DB_DBT_MALLOC)) + __os_ufree(dbc->env, key.data); + if (type != DB_HEAP && + key.data != NULL && F_ISSET(&key, DB_DBT_USERMEM)) + __os_free(dbc->env, key.data); + if (data.data != NULL && F_ISSET(&data, DB_DBT_MALLOC)) + __os_ufree(dbc->env, data.data); + if (data.data != NULL && F_ISSET(&data, DB_DBT_USERMEM)) + __os_free(dbc->env, data.data); + if (pdata.data != NULL && F_ISSET(&pdata, DB_DBT_MALLOC)) + __os_ufree(dbc->env, pdata.data); + if (freedata) + __os_free(NULL, dtmp); + if (freekey) + __os_free(NULL, ktmp); + return (result); + +} + +/* + * tcl_DbcCompare -- + */ +static int +tcl_DbcCompare(interp, objc, objv, dbc) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DBC *dbc; /* Cursor pointer */ +{ + DBC *odbc; + DBTCL_INFO *dbcip, *dbip; + Tcl_Obj *res; + int cmp_res, result, ret; + char *arg, msg[MSG_SIZE]; + + result = TCL_OK; + res = NULL; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 3, objv, "?-args?"); + return (TCL_ERROR); + } + + dbcip = _PtrToInfo(dbc); + if (dbcip == NULL) { + Tcl_SetResult(interp, "Cursor without info structure", + TCL_STATIC); + result = TCL_ERROR; + goto out; + } else { + dbip = dbcip->i_parent; + if (dbip == NULL) { + Tcl_SetResult(interp, "Cursor without parent database", + TCL_STATIC); + result = TCL_ERROR; + goto out; + } + } + /* + * When we get here, we better have: + * 2 args one DBC and an int address for the result + */ + arg = Tcl_GetStringFromObj(objv[2], NULL); + odbc = NAME_TO_DBC(arg); + if (odbc == NULL) { + snprintf(msg, MSG_SIZE, + "Cmp: Invalid cursor: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + goto out; + } + + ret = dbc->cmp(dbc, odbc, &cmp_res, 0); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "dbc cmp"); + return (result); + } + res = Tcl_NewIntObj(cmp_res); + Tcl_SetObjResult(interp, res); +out: + return (result); + +} + +/* + * tcl_DbcDup -- + */ +static int +tcl_DbcDup(interp, objc, objv, dbc) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DBC *dbc; /* Cursor pointer */ +{ + static const char *dbcdupopts[] = { + "-position", + NULL + }; + enum dbcdupopts { + DBCDUP_POS + }; + DBC *newdbc; + DBTCL_INFO *dbcip, *newdbcip, *dbip; + Tcl_Obj *res; + u_int32_t flag; + int i, optindex, result, ret; + char newname[MSG_SIZE]; + + result = TCL_OK; + flag = 0; + res = NULL; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?-args?"); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the options + * defined above. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbcdupopts, + "option", TCL_EXACT, &optindex) != TCL_OK) { + /* + * Reset the result so we don't get + * an errant error message if there is another error. + */ + if (IS_HELP(objv[i]) == TCL_OK) { + result = TCL_OK; + goto out; + } + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum dbcdupopts)optindex) { + case DBCDUP_POS: + flag = DB_POSITION; + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto out; + + /* + * We need to determine if we are a recno database + * or not. If we are, then key.data is a recno, not + * a string. + */ + dbcip = _PtrToInfo(dbc); + if (dbcip == NULL) { + Tcl_SetResult(interp, "Cursor without info structure", + TCL_STATIC); + result = TCL_ERROR; + goto out; + } else { + dbip = dbcip->i_parent; + if (dbip == NULL) { + Tcl_SetResult(interp, "Cursor without parent database", + TCL_STATIC); + result = TCL_ERROR; + goto out; + } + } + /* + * Now duplicate the cursor. If successful, we need to create + * a new cursor command. + */ + snprintf(newname, sizeof(newname), + "%s.c%d", dbip->i_name, dbip->i_dbdbcid); + newdbcip = _NewInfo(interp, NULL, newname, I_DBC); + if (newdbcip != NULL) { + ret = dbc->dup(dbc, &newdbc, flag); + if (ret == 0) { + dbip->i_dbdbcid++; + newdbcip->i_parent = dbip; + (void)Tcl_CreateObjCommand(interp, newname, + (Tcl_ObjCmdProc *)dbc_Cmd, + (ClientData)newdbc, NULL); + res = NewStringObj(newname, strlen(newname)); + _SetInfoData(newdbcip, newdbc); + Tcl_SetObjResult(interp, res); + } else { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db dup"); + _DeleteInfo(newdbcip); + } + } else { + Tcl_SetResult(interp, "Could not set up info", TCL_STATIC); + result = TCL_ERROR; + } +out: + return (result); + +} + +/* + * tcl_DbcDel -- + */ +static int +tcl_DbcDel(interp, objc, objv, dbc) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DBC *dbc; /* Cursor pointer */ +{ + static const char *dbcdelopts[] = { + "-consume", + NULL + }; + enum dbcdelopts { + DBCDEL_CONSUME + }; + u_int32_t flag; + int i, optindex, result, ret; + + result = TCL_OK; + flag = 0; + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?-args?"); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the options + * defined above. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], dbcdelopts, + "option", TCL_EXACT, &optindex) != TCL_OK) { + /* + * Reset the result so we don't get + * an errant error message if there is another error. + */ + if (IS_HELP(objv[i]) == TCL_OK) { + result = TCL_OK; + goto out; + } + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum dbcdelopts)optindex) { + case DBCDEL_CONSUME: + flag = DB_CONSUME; + break; + } + } + if (dbc->dbp->type == DB_HEAP) + result = tcl_DbcHeapDel(interp, dbc); + else { + _debug_check(); + ret = dbc->del(dbc, flag); + result = _ReturnSetup( + interp, ret, DB_RETOK_DBCDEL(ret), "dbc delete"); + } +out: + return (result); +} diff --git a/lang/tcl/tcl_env.c b/lang/tcl/tcl_env.c new file mode 100644 index 00000000..40b0767a --- /dev/null +++ b/lang/tcl/tcl_env.c @@ -0,0 +1,3252 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/lock.h" +#include "dbinc/txn.h" +#include "dbinc/tcl_db.h" + +/* + * Prototypes for procedures defined later in this file: + */ +static void _EnvInfoDelete __P((Tcl_Interp *, DBTCL_INFO *)); +static int env_DbRemove __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +static int env_DbRename __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +static int env_EventInfo __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *)); +static int env_GetFlags __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +static int env_GetOpenFlag + __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +static int env_GetLockDetect + __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +static int env_GetTimeout __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +static int env_GetVerbose __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); + +/* + * PUBLIC: int env_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); + * + * env_Cmd -- + * Implements the "env" command. + */ +int +env_Cmd(clientData, interp, objc, objv) + ClientData clientData; /* Env handle */ + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *envcmds[] = { +#ifdef CONFIG_TEST + "attributes", + "errfile", + "errpfx", + "event_info", + "failchk", + "id_reset", + "lock_detect", + "lock_id", + "lock_id_free", + "lock_id_set", + "lock_get", + "lock_set_priority", + "lock_stat", + "lock_stat_print", + "lock_timeout", + "lock_vec", + "log_archive", + "log_compare", + "log_config", + "log_cursor", + "log_file", + "log_flush", + "log_get", + "log_get_config", + "log_put", + "log_stat", + "log_stat_print", + "lsn_reset", + "mpool", + "mpool_stat", + "mpool_stat_print", + "mpool_sync", + "mpool_trickle", + "msgfile", + "mutex", + "mutex_free", + "mutex_get_align", + "mutex_get_incr", + "mutex_get_max", + "mutex_get_tas_spins", + "mutex_lock", + "mutex_set_tas_spins", + "mutex_stat", + "mutex_stat_print", + "mutex_unlock", + "rep_config", + "rep_elect", + "rep_flush", + "rep_get_clockskew", + "rep_get_config", + "rep_get_limit", + "rep_get_nsites", + "rep_get_priority", + "rep_get_request", + "rep_get_timeout", + "rep_lease", + "rep_limit", + "rep_nsites", + "rep_process_message", + "rep_request", + "rep_start", + "rep_stat", + "rep_stat_print", + "rep_sync", + "rep_transport", + "repmgr", + "repmgr_get_ack_policy", + "repmgr_get_local_site", + "repmgr_site_list", + "repmgr_stat", + "repmgr_stat_print", + "set_flags", + "stat_print", + "test", + "txn_applied", + "txn_id_set", + "txn_recover", + "txn_stat", + "txn_stat_print", + "txn_timeout", + "verbose", +#endif + "cdsgroup", + "close", + "dbremove", + "dbrename", + "get_cachesize", + "get_cache_max", + "get_create_dir", + "get_data_dirs", + "get_encrypt_flags", + "get_errpfx", + "get_flags", + "get_home", + "get_lg_bsize", + "get_lg_dir", + "get_lg_filemode", + "get_lg_max", + "get_lg_regionmax", + "get_lk_detect", + "get_lk_init_lockers", + "get_lk_init_locks", + "get_lk_init_objects", + "get_lk_max_lockers", + "get_lk_max_locks", + "get_lk_max_objects", + "get_lk_partitions", + "get_mp_max_openfd", + "get_mp_max_write", + "get_mp_mmapsize", + "get_mp_mtxcount", + "get_mp_pagesize", + "get_open_flags", + "get_shm_key", + "get_tas_spins", + "get_timeout", + "get_tmp_dir", + "get_tx_init", + "get_tx_max", + "get_tx_timestamp", + "get_verbose", + "resize_cache", + "set_data_dir", + "set_maxcache", + "txn", + "txn_checkpoint", + NULL + }; + enum envcmds { +#ifdef CONFIG_TEST + ENVATTR, + ENVERRFILE, + ENVERRPFX, + ENVEVENTINFO, + ENVFAILCHK, + ENVIDRESET, + ENVLKDETECT, + ENVLKID, + ENVLKFREEID, + ENVLKSETID, + ENVLKGET, + ENVLKPRI, + ENVLKSTAT, + ENVLKSTATPRT, + ENVLKTIMEOUT, + ENVLKVEC, + ENVLOGARCH, + ENVLOGCMP, + ENVLOGCONFIG, + ENVLOGCURSOR, + ENVLOGFILE, + ENVLOGFLUSH, + ENVLOGGET, + ENVLOGGETCONFIG, + ENVLOGPUT, + ENVLOGSTAT, + ENVLOGSTATPRT, + ENVLSNRESET, + ENVMP, + ENVMPSTAT, + ENVMPSTATPRT, + ENVMPSYNC, + ENVTRICKLE, + ENVMSGFILE, + ENVMUTEX, + ENVMUTFREE, + ENVMUTGETALIGN, + ENVMUTGETINCR, + ENVMUTGETMAX, + ENVMUTGETTASSPINS, + ENVMUTLOCK, + ENVMUTSETTASSPINS, + ENVMUTSTAT, + ENVMUTSTATPRT, + ENVMUTUNLOCK, + ENVREPCONFIG, + ENVREPELECT, + ENVREPFLUSH, + ENVREPGETCLOCKSKEW, + ENVREPGETCONFIG, + ENVREPGETLIMIT, + ENVREPGETNSITES, + ENVREPGETPRIORITY, + ENVREPGETREQUEST, + ENVREPGETTIMEOUT, + ENVREPLEASE, + ENVREPLIMIT, + ENVREPNSITES, + ENVREPPROCMESS, + ENVREPREQUEST, + ENVREPSTART, + ENVREPSTAT, + ENVREPSTATPRT, + ENVREPSYNC, + ENVREPTRANSPORT, + ENVREPMGR, + ENVREPMGRGETACK, + ENVREPMGRGETLOCAL, + ENVREPMGRSITELIST, + ENVREPMGRSTAT, + ENVREPMGRSTATPRT, + ENVSETFLAGS, + ENVSTATPRT, + ENVTEST, + ENVTXNAPPLIED, + ENVTXNSETID, + ENVTXNRECOVER, + ENVTXNSTAT, + ENVTXNSTATPRT, + ENVTXNTIMEOUT, + ENVVERB, +#endif + ENVCDSGROUP, + ENVCLOSE, + ENVDBREMOVE, + ENVDBRENAME, + ENVGETCACHESIZE, + ENVGETCACHEMAX, + ENVGETCREATEDIR, + ENVGETDATADIRS, + ENVGETENCRYPTFLAGS, + ENVGETERRPFX, + ENVGETFLAGS, + ENVGETHOME, + ENVGETLGBSIZE, + ENVGETLGDIR, + ENVGETLGFILEMODE, + ENVGETLGMAX, + ENVGETLGREGIONMAX, + ENVGETLKDETECT, + ENVGETLKINITLOCKERS, + ENVGETLKINITLOCKS, + ENVGETLKINITOBJECTS, + ENVGETLKMAXLOCKERS, + ENVGETLKMAXLOCKS, + ENVGETLKMAXOBJECTS, + ENVGETLKPARTITIONS, + ENVGETMPMAXOPENFD, + ENVGETMPMAXWRITE, + ENVGETMPMMAPSIZE, + ENVGETMPMTXCOUNT, + ENVGETMPPAGESIZE, + ENVGETOPENFLAG, + ENVGETSHMKEY, + ENVGETTASSPINS, + ENVGETTIMEOUT, + ENVGETTMPDIR, + ENVGETTXINIT, + ENVGETTXMAX, + ENVGETTXTIMESTAMP, + ENVGETVERBOSE, + ENVRESIZECACHE, + ENVSETDATADIR, + ENVSETMAXCACHE, + ENVTXN, + ENVTXNCKP + }; + DBTCL_INFO *envip; + DB_ENV *dbenv; + Tcl_Obj **listobjv, *myobjv[3], *res; + db_timeout_t timeout; + size_t size; + time_t timeval; + u_int32_t bytes, gbytes, value; + long shm_key; + int cmdindex, i, intvalue, listobjc, ncache, result, ret, t_ret; + const char *strval, **dirs; + char *strarg, newname[MSG_SIZE]; +#ifdef CONFIG_TEST + DBTCL_INFO *logcip; + DB_LOGC *logc; + u_int32_t lockid; + long newval, otherval; +#endif + + Tcl_ResetResult(interp); + dbenv = (DB_ENV *)clientData; + envip = _PtrToInfo((void *)dbenv); + result = TCL_OK; + memset(newname, 0, MSG_SIZE); + + if (objc <= 1) { + Tcl_WrongNumArgs(interp, 1, objv, "command cmdargs"); + return (TCL_ERROR); + } + if (dbenv == NULL) { + Tcl_SetResult(interp, "NULL env pointer", TCL_STATIC); + return (TCL_ERROR); + } + if (envip == NULL) { + Tcl_SetResult(interp, "NULL env info pointer", TCL_STATIC); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the berkdbcmds + * defined above. + */ + if (Tcl_GetIndexFromObj(interp, objv[1], envcmds, "command", + TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + res = NULL; + switch ((enum envcmds)cmdindex) { +#ifdef CONFIG_TEST + case ENVEVENTINFO: + result = env_EventInfo(interp, objc, objv, dbenv, envip); + break; + case ENVFAILCHK: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbenv->failchk(dbenv, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "failchk"); + break; + case ENVIDRESET: + result = tcl_EnvIdReset(interp, objc, objv, dbenv); + break; + case ENVLSNRESET: + result = tcl_EnvLsnReset(interp, objc, objv, dbenv); + break; + case ENVLKDETECT: + result = tcl_LockDetect(interp, objc, objv, dbenv); + break; + case ENVLKSTAT: + result = tcl_LockStat(interp, objc, objv, dbenv); + break; + case ENVLKSTATPRT: + result = tcl_LockStatPrint(interp, objc, objv, dbenv); + break; + case ENVLKTIMEOUT: + result = tcl_LockTimeout(interp, objc, objv, dbenv); + break; + case ENVLKID: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbenv->lock_id(dbenv, &lockid); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "lock_id"); + if (result == TCL_OK) + res = Tcl_NewWideIntObj((Tcl_WideInt)lockid); + break; + case ENVLKFREEID: + if (objc != 3) { + Tcl_WrongNumArgs(interp, 3, objv, NULL); + return (TCL_ERROR); + } + result = Tcl_GetLongFromObj(interp, objv[2], &newval); + if (result != TCL_OK) + return (result); + ret = dbenv->lock_id_free(dbenv, (u_int32_t)newval); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "lock id_free"); + break; + case ENVLKSETID: + if (objc != 4) { + Tcl_WrongNumArgs(interp, 4, objv, "current max"); + return (TCL_ERROR); + } + result = Tcl_GetLongFromObj(interp, objv[2], &newval); + if (result != TCL_OK) + return (result); + result = Tcl_GetLongFromObj(interp, objv[3], &otherval); + if (result != TCL_OK) + return (result); + ret = __lock_id_set(dbenv->env, + (u_int32_t)newval, (u_int32_t)otherval); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "lock id_free"); + break; + case ENVLKPRI: + if (objc != 4) { + Tcl_WrongNumArgs(interp, 4, objv, NULL); + return (TCL_ERROR); + } + result = _GetUInt32(interp, objv[2], &lockid); + if (result != TCL_OK) + return (result); + result = _GetUInt32(interp, objv[3], &value); + if (result != TCL_OK) + return (result); + if (dbenv->env->lk_handle == NULL) { + Tcl_SetResult(interp, "env not configured for locking", NULL); + return (TCL_ERROR); + } + ret = dbenv->set_lk_priority(dbenv, lockid, value); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "lock set priority"); + break; + case ENVLKGET: + result = tcl_LockGet(interp, objc, objv, dbenv); + break; + case ENVLKVEC: + result = tcl_LockVec(interp, objc, objv, dbenv); + break; + case ENVLOGARCH: + result = tcl_LogArchive(interp, objc, objv, dbenv); + break; + case ENVLOGCMP: + result = tcl_LogCompare(interp, objc, objv); + break; + case ENVLOGCONFIG: + /* + * Two args for this. Error if different. + */ + if (objc != 4) { + Tcl_WrongNumArgs(interp, 2, objv, "opt on|off"); + return (TCL_ERROR); + } + result = tcl_LogConfig(interp, dbenv, objv[2], objv[3]); + break; + case ENVLOGCURSOR: + snprintf(newname, sizeof(newname), + "%s.logc%d", envip->i_name, envip->i_envlogcid); + logcip = _NewInfo(interp, NULL, newname, I_LOGC); + if (logcip != NULL) { + ret = dbenv->log_cursor(dbenv, &logc, 0); + if (ret == 0) { + result = TCL_OK; + envip->i_envlogcid++; + /* + * We do NOT want to set i_parent to + * envip here because log cursors are + * not "tied" to the env. That is, they + * are NOT closed if the env is closed. + */ + (void)Tcl_CreateObjCommand(interp, newname, + (Tcl_ObjCmdProc *)logc_Cmd, + (ClientData)logc, NULL); + res = NewStringObj(newname, strlen(newname)); + _SetInfoData(logcip, logc); + } else { + _DeleteInfo(logcip); + result = _ErrorSetup(interp, ret, "log cursor"); + } + } else { + Tcl_SetResult(interp, + "Could not set up info", TCL_STATIC); + result = TCL_ERROR; + } + break; + case ENVLOGFILE: + result = tcl_LogFile(interp, objc, objv, dbenv); + break; + case ENVLOGFLUSH: + result = tcl_LogFlush(interp, objc, objv, dbenv); + break; + case ENVLOGGET: + result = tcl_LogGet(interp, objc, objv, dbenv); + break; + case ENVLOGGETCONFIG: + /* + * Two args for this. Error if different. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + result = tcl_LogGetConfig(interp, dbenv, objv[2]); + break; + case ENVLOGPUT: + result = tcl_LogPut(interp, objc, objv, dbenv); + break; + case ENVLOGSTAT: + result = tcl_LogStat(interp, objc, objv, dbenv); + break; + case ENVLOGSTATPRT: + result = tcl_LogStatPrint(interp, objc, objv, dbenv); + break; + case ENVMPSTAT: + result = tcl_MpStat(interp, objc, objv, dbenv); + break; + case ENVMPSTATPRT: + result = tcl_MpStatPrint(interp, objc, objv, dbenv); + break; + case ENVMPSYNC: + result = tcl_MpSync(interp, objc, objv, dbenv); + break; + case ENVTRICKLE: + result = tcl_MpTrickle(interp, objc, objv, dbenv); + break; + case ENVMP: + result = tcl_Mp(interp, objc, objv, dbenv, envip); + break; + case ENVMUTEX: + result = tcl_Mutex(interp, objc, objv, dbenv); + break; + case ENVMUTFREE: + result = tcl_MutFree(interp, objc, objv, dbenv); + break; + case ENVMUTGETALIGN: + result = tcl_MutGet(interp, dbenv, DBTCL_MUT_ALIGN); + break; + case ENVMUTGETINCR: + result = tcl_MutGet(interp, dbenv, DBTCL_MUT_INCR); + break; + case ENVMUTGETMAX: + result = tcl_MutGet(interp, dbenv, DBTCL_MUT_MAX); + break; + case ENVMUTGETTASSPINS: + result = tcl_MutGet(interp, dbenv, DBTCL_MUT_TAS); + break; + case ENVMUTLOCK: + result = tcl_MutLock(interp, objc, objv, dbenv); + break; + case ENVMUTSETTASSPINS: + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + result = tcl_MutSet(interp, objv[2], dbenv, DBTCL_MUT_TAS); + break; + case ENVMUTSTAT: + result = tcl_MutStat(interp, objc, objv, dbenv); + break; + case ENVMUTSTATPRT: + result = tcl_MutStatPrint(interp, objc, objv, dbenv); + break; + case ENVMUTUNLOCK: + result = tcl_MutUnlock(interp, objc, objv, dbenv); + break; + case ENVREPCONFIG: + /* + * Two args for this. Error if different. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + result = tcl_RepConfig(interp, dbenv, objv[2]); + break; + case ENVREPELECT: + result = tcl_RepElect(interp, objc, objv, dbenv); + break; + case ENVREPFLUSH: + result = tcl_RepFlush(interp, objc, objv, dbenv); + break; + case ENVREPGETCLOCKSKEW: + result = tcl_RepGetTwo(interp, dbenv, DBTCL_GETCLOCK); + break; + case ENVREPGETCONFIG: + /* + * Two args for this. Error if different. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + result = tcl_RepGetConfig(interp, dbenv, objv[2]); + break; + case ENVREPGETLIMIT: + result = tcl_RepGetTwo(interp, dbenv, DBTCL_GETLIMIT); + break; + case ENVREPGETNSITES: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->rep_get_nsites(dbenv, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env rep_get_nsites")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVREPGETPRIORITY: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->rep_get_priority(dbenv, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env rep_get_priority")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVREPGETREQUEST: + result = tcl_RepGetTwo(interp, dbenv, DBTCL_GETREQ); + break; + case ENVREPGETTIMEOUT: + /* + * Two args for this. Error if different. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + result = tcl_RepGetTimeout(interp, dbenv, objv[2]); + break; + case ENVREPLEASE: + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + result = Tcl_ListObjGetElements(interp, objv[2], + &listobjc, &listobjv); + if (result == TCL_OK) + result = tcl_RepLease(interp, + listobjc, listobjv, dbenv); + break; + case ENVREPLIMIT: + result = tcl_RepLimit(interp, objc, objv, dbenv); + break; + case ENVREPNSITES: + result = tcl_RepNSites(interp, objc, objv, dbenv); + break; + case ENVREPPROCMESS: + result = tcl_RepProcessMessage(interp, objc, objv, dbenv); + break; + case ENVREPREQUEST: + result = tcl_RepRequest(interp, objc, objv, dbenv); + break; + case ENVREPSTART: + result = tcl_RepStart(interp, objc, objv, dbenv); + break; + case ENVREPSTAT: + result = tcl_RepStat(interp, objc, objv, dbenv); + break; + case ENVREPSTATPRT: + result = tcl_RepStatPrint(interp, objc, objv, dbenv); + break; + case ENVREPSYNC: + result = tcl_RepSync(interp, objc, objv, dbenv); + break; + case ENVREPTRANSPORT: + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + result = Tcl_ListObjGetElements(interp, objv[2], + &listobjc, &listobjv); + if (result == TCL_OK) + result = tcl_RepTransport(interp, + listobjc, listobjv, dbenv, envip); + break; + case ENVREPMGR: + result = tcl_RepMgr(interp, objc, objv, dbenv); + break; + case ENVREPMGRGETACK: + result = tcl_RepGetAckPolicy(interp, objc, objv, dbenv); + break; + case ENVREPMGRGETLOCAL: + result = tcl_RepGetLocalSite(interp, objc, objv, dbenv); + break; + case ENVREPMGRSITELIST: + result = tcl_RepMgrSiteList(interp, objc, objv, dbenv); + break; + case ENVREPMGRSTAT: + result = tcl_RepMgrStat(interp, objc, objv, dbenv); + break; + case ENVREPMGRSTATPRT: + result = tcl_RepMgrStatPrint(interp, objc, objv, dbenv); + break; + case ENVTXNAPPLIED: + result = tcl_RepApplied(interp, objc, objv, dbenv); + break; + case ENVTXNSETID: + if (objc != 4) { + Tcl_WrongNumArgs(interp, 4, objv, "current max"); + return (TCL_ERROR); + } + result = Tcl_GetLongFromObj(interp, objv[2], &newval); + if (result != TCL_OK) + return (result); + result = Tcl_GetLongFromObj(interp, objv[3], &otherval); + if (result != TCL_OK) + return (result); + ret = __txn_id_set(dbenv->env, + (u_int32_t)newval, (u_int32_t)otherval); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "txn setid"); + break; + case ENVTXNRECOVER: + result = tcl_TxnRecover(interp, objc, objv, dbenv, envip); + break; + case ENVTXNSTAT: + result = tcl_TxnStat(interp, objc, objv, dbenv); + break; + case ENVTXNSTATPRT: + result = tcl_TxnStatPrint(interp, objc, objv, dbenv); + break; + case ENVTXNTIMEOUT: + result = tcl_TxnTimeout(interp, objc, objv, dbenv); + break; + case ENVATTR: + result = tcl_EnvAttr(interp, objc, objv, dbenv); + break; + case ENVERRFILE: + /* + * One args for this. Error if different. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "errfile"); + return (TCL_ERROR); + } + strarg = Tcl_GetStringFromObj(objv[2], NULL); + tcl_EnvSetErrfile(interp, dbenv, envip, strarg); + result = TCL_OK; + break; + case ENVERRPFX: + /* + * One args for this. Error if different. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "pfx"); + return (TCL_ERROR); + } + strarg = Tcl_GetStringFromObj(objv[2], NULL); + result = tcl_EnvSetErrpfx(interp, dbenv, envip, strarg); + break; + case ENVMSGFILE: + /* + * One args for this. Error if different. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "msgfile"); + return (TCL_ERROR); + } + strarg = Tcl_GetStringFromObj(objv[2], NULL); + tcl_EnvSetMsgfile(interp, dbenv, envip, strarg); + result = TCL_OK; + break; + case ENVSETFLAGS: + /* + * Two args for this. Error if different. + */ + if (objc != 4) { + Tcl_WrongNumArgs(interp, 2, objv, "which on|off"); + return (TCL_ERROR); + } + result = tcl_EnvSetFlags(interp, dbenv, objv[2], objv[3]); + break; + case ENVSTATPRT: + result = tcl_EnvStatPrint(interp, objc, objv, dbenv); + break; + case ENVTEST: + result = tcl_EnvTest(interp, objc, objv, dbenv); + break; + case ENVVERB: + /* + * Two args for this. Error if different. + */ + if (objc != 4) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + result = tcl_EnvVerbose(interp, dbenv, objv[2], objv[3]); + break; +#endif + case ENVCDSGROUP: + result = tcl_CDSGroup(interp, objc, objv, dbenv, envip); + break; + case ENVCLOSE: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + /* + * Any transactions will be aborted, and an mpools + * closed automatically. We must delete any txn + * and mp widgets we have here too for this env. + * NOTE: envip is freed when we come back from + * this function. Set it to NULL to make sure no + * one tries to use it later. + */ + ret = __mutex_free(dbenv->env, &envip->i_mutex); + _debug_check(); + if ((t_ret = dbenv->close(dbenv, 0)) != 0 && ret == 0) + ret = t_ret; + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env close"); + _EnvInfoDelete(interp, envip); + envip = NULL; + break; + case ENVDBREMOVE: + result = env_DbRemove(interp, objc, objv, dbenv); + break; + case ENVDBRENAME: + result = env_DbRename(interp, objc, objv, dbenv); + break; + case ENVGETCACHESIZE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_cachesize(dbenv, &gbytes, &bytes, &ncache); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_cachesize")) == TCL_OK) { + myobjv[0] = Tcl_NewLongObj((long)gbytes); + myobjv[1] = Tcl_NewLongObj((long)bytes); + myobjv[2] = Tcl_NewLongObj((long)ncache); + res = Tcl_NewListObj(3, myobjv); + } + break; + case ENVGETCACHEMAX: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_cache_max(dbenv, &gbytes, &bytes); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_cache_max")) == TCL_OK) { + myobjv[0] = Tcl_NewLongObj((long)gbytes); + myobjv[1] = Tcl_NewLongObj((long)bytes); + res = Tcl_NewListObj(2, myobjv); + } + break; + case ENVGETCREATEDIR: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_create_dir(dbenv, &strval); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_create_dir")) == TCL_OK) + res = NewStringObj(strval, strlen(strval)); + break; + case ENVGETDATADIRS: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_data_dirs(dbenv, &dirs); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_data_dirs")) == TCL_OK) { + res = Tcl_NewListObj(0, NULL); + for (i = 0; result == TCL_OK && dirs[i] != NULL; i++) + result = Tcl_ListObjAppendElement(interp, res, + NewStringObj(dirs[i], strlen(dirs[i]))); + } + break; + case ENVGETENCRYPTFLAGS: + result = tcl_EnvGetEncryptFlags(interp, objc, objv, dbenv); + break; + case ENVGETERRPFX: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + dbenv->get_errpfx(dbenv, &strval); + res = NewStringObj(strval, strlen(strval)); + break; + case ENVGETFLAGS: + result = env_GetFlags(interp, objc, objv, dbenv); + break; + case ENVGETHOME: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_home(dbenv, &strval); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_home")) == TCL_OK) + res = NewStringObj(strval, strlen(strval)); + break; + case ENVGETLGBSIZE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_lg_bsize(dbenv, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_lg_bsize")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETLGDIR: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_lg_dir(dbenv, &strval); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_lg_dir")) == TCL_OK) + res = NewStringObj(strval, strlen(strval)); + break; + case ENVGETLGFILEMODE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_lg_filemode(dbenv, &intvalue); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_lg_filemode")) == TCL_OK) + res = Tcl_NewLongObj((long)intvalue); + break; + case ENVGETLGMAX: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_lg_max(dbenv, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_lg_max")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETLGREGIONMAX: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_lg_regionmax(dbenv, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_lg_regionmax")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETLKDETECT: + result = env_GetLockDetect(interp, objc, objv, dbenv); + break; + case ENVGETLKINITLOCKERS: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_memory_init(dbenv, DB_MEM_LOCKER, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_lk_init_lockers")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETLKINITLOCKS: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_memory_init(dbenv, DB_MEM_LOCK, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_lk_init_locks")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETLKINITOBJECTS: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_memory_init(dbenv, DB_MEM_LOCKOBJECT, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_lk_init_objects")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETLKMAXLOCKERS: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_lk_max_lockers(dbenv, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_lk_max_lockers")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETLKMAXLOCKS: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_lk_max_locks(dbenv, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_lk_max_locks")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETLKMAXOBJECTS: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_lk_max_objects(dbenv, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_lk_max_objects")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETLKPARTITIONS: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_lk_partitions(dbenv, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_lk_partitions")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETMPMAXOPENFD: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_mp_max_openfd(dbenv, &intvalue); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_mp_max_openfd")) == TCL_OK) + res = Tcl_NewIntObj(intvalue); + break; + case ENVGETMPMAXWRITE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_mp_max_write(dbenv, &intvalue, &timeout); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_mp_max_write")) == TCL_OK) { + myobjv[0] = Tcl_NewIntObj(intvalue); + myobjv[1] = Tcl_NewIntObj((int)timeout); + res = Tcl_NewListObj(2, myobjv); + } + break; + case ENVGETMPMMAPSIZE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_mp_mmapsize(dbenv, &size); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_mp_mmapsize")) == TCL_OK) + res = Tcl_NewLongObj((long)size); + break; + case ENVGETMPMTXCOUNT: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_mp_mtxcount(dbenv, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_mp_mtxcount")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETMPPAGESIZE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_mp_pagesize(dbenv, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_mp_pagesize")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETOPENFLAG: + result = env_GetOpenFlag(interp, objc, objv, dbenv); + break; + case ENVGETSHMKEY: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_shm_key(dbenv, &shm_key); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env shm_key")) == TCL_OK) + res = Tcl_NewLongObj(shm_key); + break; + case ENVGETTASSPINS: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->mutex_get_tas_spins(dbenv, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_tas_spins")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETTIMEOUT: + result = env_GetTimeout(interp, objc, objv, dbenv); + break; + case ENVGETTMPDIR: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_tmp_dir(dbenv, &strval); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_tmp_dir")) == TCL_OK) + res = NewStringObj(strval, strlen(strval)); + break; + case ENVGETTXINIT: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_memory_init(dbenv, DB_MEM_TRANSACTION, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_tx_init")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETTXMAX: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_tx_max(dbenv, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_tx_max")) == TCL_OK) + res = Tcl_NewLongObj((long)value); + break; + case ENVGETTXTIMESTAMP: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_tx_timestamp(dbenv, &timeval); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_tx_timestamp")) == TCL_OK) + res = Tcl_NewLongObj((long)timeval); + break; + case ENVGETVERBOSE: + result = env_GetVerbose(interp, objc, objv, dbenv); + break; + case ENVRESIZECACHE: + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-resize_cache {gbytes bytes}?"); + result = TCL_ERROR; + break; + } + if ((result = Tcl_ListObjGetElements( + interp, objv[2], &listobjc, &listobjv)) != TCL_OK) + break; + if (listobjc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-resize_cache {gbytes bytes}?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, listobjv[0], &gbytes); + if (result != TCL_OK) + break; + result = _GetUInt32(interp, listobjv[1], &bytes); + if (result != TCL_OK) + break; + ret = dbenv->set_cachesize(dbenv, gbytes, bytes, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "resize_cache"); + break; + case ENVSETDATADIR: + /* + * One args for this. Error if different. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "pfx"); + return (TCL_ERROR); + } + strarg = Tcl_GetStringFromObj(objv[2], NULL); + ret = dbenv->set_data_dir(dbenv, strarg); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env set data dir")); + case ENVSETMAXCACHE: + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-set_max_cache {gbytes bytes}?"); + result = TCL_ERROR; + break; + } + if ((result = Tcl_ListObjGetElements( + interp, objv[2], &listobjc, &listobjv)) != TCL_OK) + break; + if (listobjc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-set_max_cache {gbytes bytes}?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, listobjv[0], &gbytes); + if (result != TCL_OK) + break; + result = _GetUInt32(interp, listobjv[1], &bytes); + if (result != TCL_OK) + break; + ret = dbenv->set_cache_max(dbenv, gbytes, bytes); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_max_cache"); + break; + case ENVTXN: + result = tcl_Txn(interp, objc, objv, dbenv, envip); + break; + case ENVTXNCKP: + result = tcl_TxnCheckpoint(interp, objc, objv, dbenv); + break; + } + /* + * Only set result if we have a res. Otherwise, lower + * functions have already done so. + */ + if (result == TCL_OK && res) + Tcl_SetObjResult(interp, res); + return (result); +} + +/* + * PUBLIC: int tcl_EnvRemove __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); + * + * tcl_EnvRemove -- + */ +int +tcl_EnvRemove(interp, objc, objv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *envremopts[] = { +#ifdef CONFIG_TEST + "-overwrite", +#endif + "-data_dir", + "-encryptaes", + "-encryptany", + "-force", + "-home", + "-log_dir", + "-tmp_dir", + "-use_environ", + "-use_environ_root", + NULL + }; + enum envremopts { +#ifdef CONFIG_TEST + ENVREM_OVERWRITE, +#endif + ENVREM_DATADIR, + ENVREM_ENCRYPT_AES, + ENVREM_ENCRYPT_ANY, + ENVREM_FORCE, + ENVREM_HOME, + ENVREM_LOGDIR, + ENVREM_TMPDIR, + ENVREM_USE_ENVIRON, + ENVREM_USE_ENVIRON_ROOT + }; + DB_ENV *dbenv; + u_int32_t cflag, enc_flag, flag, forceflag, sflag; + int i, optindex, result, ret; + char *datadir, *home, *logdir, *passwd, *tmpdir; + + result = TCL_OK; + cflag = flag = forceflag = sflag = 0; + home = NULL; + passwd = NULL; + datadir = logdir = tmpdir = NULL; + enc_flag = 0; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?args?"); + return (TCL_ERROR); + } + + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], envremopts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum envremopts)optindex) { + case ENVREM_ENCRYPT_AES: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-encryptaes passwd?"); + result = TCL_ERROR; + break; + } + passwd = Tcl_GetStringFromObj(objv[i++], NULL); + enc_flag = DB_ENCRYPT_AES; + break; + case ENVREM_ENCRYPT_ANY: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-encryptany passwd?"); + result = TCL_ERROR; + break; + } + passwd = Tcl_GetStringFromObj(objv[i++], NULL); + enc_flag = 0; + break; + case ENVREM_FORCE: + forceflag |= DB_FORCE; + break; + case ENVREM_HOME: + /* Make sure we have an arg to check against! */ + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-home dir?"); + result = TCL_ERROR; + break; + } + home = Tcl_GetStringFromObj(objv[i++], NULL); + break; +#ifdef CONFIG_TEST + case ENVREM_OVERWRITE: + sflag |= DB_OVERWRITE; + break; +#endif + case ENVREM_USE_ENVIRON: + flag |= DB_USE_ENVIRON; + break; + case ENVREM_USE_ENVIRON_ROOT: + flag |= DB_USE_ENVIRON_ROOT; + break; + case ENVREM_DATADIR: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-data_dir dir"); + result = TCL_ERROR; + break; + } + datadir = Tcl_GetStringFromObj(objv[i++], NULL); + break; + case ENVREM_LOGDIR: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-log_dir dir"); + result = TCL_ERROR; + break; + } + logdir = Tcl_GetStringFromObj(objv[i++], NULL); + break; + case ENVREM_TMPDIR: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "-tmp_dir dir"); + result = TCL_ERROR; + break; + } + tmpdir = Tcl_GetStringFromObj(objv[i++], NULL); + break; + } + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + goto error; + } + + if ((ret = db_env_create(&dbenv, cflag)) != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db_env_create"); + goto error; + } + if (datadir != NULL) { + _debug_check(); + ret = dbenv->set_data_dir(dbenv, datadir); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_data_dir"); + if (result != TCL_OK) + goto error; + } + if (logdir != NULL) { + _debug_check(); + ret = dbenv->set_lg_dir(dbenv, logdir); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_log_dir"); + if (result != TCL_OK) + goto error; + } + if (tmpdir != NULL) { + _debug_check(); + ret = dbenv->set_tmp_dir(dbenv, tmpdir); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_tmp_dir"); + if (result != TCL_OK) + goto error; + } + if (passwd != NULL) { + ret = dbenv->set_encrypt(dbenv, passwd, enc_flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_encrypt"); + } + if (sflag != 0 && + (ret = dbenv->set_flags(dbenv, sflag, 1)) != 0) { + _debug_check(); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "set_flags"); + if (result != TCL_OK) + goto error; + } + dbenv->set_errpfx(dbenv, "EnvRemove"); + dbenv->set_errcall(dbenv, _ErrorFunc); + + flag |= forceflag; + /* + * When we get here we have parsed all the args. Now remove + * the environment. + */ + _debug_check(); + ret = dbenv->remove(dbenv, home, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "env remove"); +error: + return (result); +} + +static void +_EnvInfoDelete(interp, envip) + Tcl_Interp *interp; /* Tcl Interpreter */ + DBTCL_INFO *envip; /* Info for env */ +{ + DBTCL_INFO *nextp, *p; + + /* + * Before we can delete the environment info, we must close + * any open subsystems in this env. We will: + * 1. Abort any transactions (which aborts any nested txns). + * 2. Close any mpools (which will put any pages itself). + * 3. Put any locks and close log cursors. + * 4. Close the error file. + */ + for (p = LIST_FIRST(&__db_infohead); p != NULL; p = nextp) { + /* + * Check if this info structure "belongs" to this + * env. If so, remove its commands and info structure. + * We do not close/abort/whatever here, because we + * don't want to replicate DB behavior. + * + * NOTE: Only those types that can nest need to be + * itemized in the switch below. That is txns and mps. + * Other types like log cursors and locks will just + * get cleaned up here. + */ + if (p->i_parent == envip) { + switch (p->i_type) { + case I_TXN: + _TxnInfoDelete(interp, p); + break; + case I_MP: + _MpInfoDelete(interp, p); + break; + case I_AUX: + case I_DB: + case I_DBC: + case I_ENV: + case I_LOCK: + case I_LOGC: + case I_NDBM: + case I_PG: + case I_SEQ: + Tcl_SetResult(interp, + "_EnvInfoDelete: bad info type", + TCL_STATIC); + break; + } + nextp = LIST_NEXT(p, entries); + (void)Tcl_DeleteCommand(interp, p->i_name); + _DeleteInfo(p); + } else + nextp = LIST_NEXT(p, entries); + } + (void)Tcl_DeleteCommand(interp, envip->i_name); + _DeleteInfo(envip); +} + +#ifdef CONFIG_TEST +/* + * PUBLIC: int tcl_EnvIdReset __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + * PUBLIC: DB_ENV *)); + * + * tcl_EnvIdReset -- + * Implements the ENV->fileid_reset command. + */ +int +tcl_EnvIdReset(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* arg count */ + Tcl_Obj * CONST* objv; /* args */ + DB_ENV *dbenv; /* Database pointer */ +{ + static const char *idwhich[] = { + "-encrypt", + NULL + }; + enum idwhich { + IDENCRYPT + }; + int enc, i, result, ret; + u_int32_t flags; + char *file; + + result = TCL_OK; + flags = 0; + i = 2; + Tcl_SetResult(interp, "0", TCL_STATIC); + if (objc < 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?-encrypt? filename"); + return (TCL_ERROR); + } else if (objc > 3) { + /* + * If there is an arg, make sure it is the right one. + */ + if (Tcl_GetIndexFromObj(interp, objv[2], idwhich, "option", + TCL_EXACT, &enc) != TCL_OK) + return (IS_HELP(objv[2])); + switch ((enum idwhich)enc) { + case IDENCRYPT: + flags |= DB_ENCRYPT; + break; + } + i = 3; + } + file = Tcl_GetStringFromObj(objv[i], NULL); + ret = dbenv->fileid_reset(dbenv, file, flags); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "fileid reset"); + return (result); +} + +/* + * PUBLIC: int tcl_EnvLsnReset __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + * PUBLIC: DB_ENV *)); + * + * tcl_EnvLsnReset -- + * Implements the ENV->lsn_reset command. + */ +int +tcl_EnvLsnReset(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* arg count */ + Tcl_Obj * CONST* objv; /* args */ + DB_ENV *dbenv; /* Database pointer */ +{ + static const char *lsnwhich[] = { + "-encrypt", + NULL + }; + enum lsnwhich { + IDENCRYPT + }; + int enc, i, result, ret; + u_int32_t flags; + char *file; + + result = TCL_OK; + flags = 0; + i = 2; + Tcl_SetResult(interp, "0", TCL_STATIC); + if (objc < 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?-encrypt? filename"); + return (TCL_ERROR); + } else if (objc > 3) { + /* + * If there is an arg, make sure it is the right one. + */ + if (Tcl_GetIndexFromObj(interp, objv[2], lsnwhich, "option", + TCL_EXACT, &enc) != TCL_OK) + return (IS_HELP(objv[2])); + + switch ((enum lsnwhich)enc) { + case IDENCRYPT: + flags |= DB_ENCRYPT; + break; + } + i = 3; + } + file = Tcl_GetStringFromObj(objv[i], NULL); + ret = dbenv->lsn_reset(dbenv, file, flags); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "lsn reset"); + return (result); +} + +/* + * PUBLIC: int tcl_EnvVerbose __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *, + * PUBLIC: Tcl_Obj *)); + * + * tcl_EnvVerbose -- + */ +int +tcl_EnvVerbose(interp, dbenv, which, onoff) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Env pointer */ + Tcl_Obj *which; /* Which subsystem */ + Tcl_Obj *onoff; /* On or off */ +{ + static const char *verbwhich[] = { + "deadlock", + "fileops", + "fileops_all", + "recovery", + "register", + "rep", + "rep_elect", + "rep_lease", + "rep_misc", + "rep_msgs", + "rep_sync", + "rep_system", + "rep_test", + "repmgr_connfail", + "repmgr_misc", + "wait", + NULL + }; + enum verbwhich { + ENVVERB_DEADLOCK, + ENVVERB_FILEOPS, + ENVVERB_FILEOPS_ALL, + ENVVERB_RECOVERY, + ENVVERB_REGISTER, + ENVVERB_REPLICATION, + ENVVERB_REP_ELECT, + ENVVERB_REP_LEASE, + ENVVERB_REP_MISC, + ENVVERB_REP_MSGS, + ENVVERB_REP_SYNC, + ENVVERB_REP_SYSTEM, + ENVVERB_REP_TEST, + ENVVERB_REPMGR_CONNFAIL, + ENVVERB_REPMGR_MISC, + ENVVERB_WAITSFOR + }; + static const char *verbonoff[] = { + "off", + "on", + NULL + }; + enum verbonoff { + ENVVERB_OFF, + ENVVERB_ON + }; + int on, optindex, ret; + u_int32_t wh; + + if (Tcl_GetIndexFromObj(interp, which, verbwhich, "option", + TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(which)); + + switch ((enum verbwhich)optindex) { + case ENVVERB_DEADLOCK: + wh = DB_VERB_DEADLOCK; + break; + case ENVVERB_FILEOPS: + wh = DB_VERB_FILEOPS; + break; + case ENVVERB_FILEOPS_ALL: + wh = DB_VERB_FILEOPS_ALL; + break; + case ENVVERB_RECOVERY: + wh = DB_VERB_RECOVERY; + break; + case ENVVERB_REGISTER: + wh = DB_VERB_REGISTER; + break; + case ENVVERB_REPLICATION: + wh = DB_VERB_REPLICATION; + break; + case ENVVERB_REP_ELECT: + wh = DB_VERB_REP_ELECT; + break; + case ENVVERB_REP_LEASE: + wh = DB_VERB_REP_LEASE; + break; + case ENVVERB_REP_MISC: + wh = DB_VERB_REP_MISC; + break; + case ENVVERB_REP_MSGS: + wh = DB_VERB_REP_MSGS; + break; + case ENVVERB_REP_SYNC: + wh = DB_VERB_REP_SYNC; + break; + case ENVVERB_REP_SYSTEM: + wh = DB_VERB_REP_SYSTEM; + break; + case ENVVERB_REP_TEST: + wh = DB_VERB_REP_TEST; + break; + case ENVVERB_REPMGR_CONNFAIL: + wh = DB_VERB_REPMGR_CONNFAIL; + break; + case ENVVERB_REPMGR_MISC: + wh = DB_VERB_REPMGR_MISC; + break; + case ENVVERB_WAITSFOR: + wh = DB_VERB_WAITSFOR; + break; + default: + return (TCL_ERROR); + } + if (Tcl_GetIndexFromObj(interp, onoff, verbonoff, "option", + TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(onoff)); + switch ((enum verbonoff)optindex) { + case ENVVERB_OFF: + on = 0; + break; + case ENVVERB_ON: + on = 1; + break; + default: + return (TCL_ERROR); + } + ret = dbenv->set_verbose(dbenv, wh, on); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env set verbose")); +} +#endif + +#ifdef CONFIG_TEST +/* + * PUBLIC: int tcl_EnvAttr __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); + * + * tcl_EnvAttr -- + * Return a list of the env's attributes + */ +int +tcl_EnvAttr(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Env pointer */ +{ + ENV *env; + Tcl_Obj *myobj, *retlist; + int result; + + env = dbenv->env; + result = TCL_OK; + + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + retlist = Tcl_NewListObj(0, NULL); + /* + * XXX + * We peek at the ENV to determine what subsystems we have available + * in this environment. + */ + myobj = NewStringObj("-home", strlen("-home")); + if ((result = Tcl_ListObjAppendElement(interp, + retlist, myobj)) != TCL_OK) + goto err; + myobj = NewStringObj(env->db_home, strlen(env->db_home)); + if ((result = Tcl_ListObjAppendElement(interp, + retlist, myobj)) != TCL_OK) + goto err; + if (CDB_LOCKING(env)) { + myobj = NewStringObj("-cdb", strlen("-cdb")); + if ((result = Tcl_ListObjAppendElement(interp, + retlist, myobj)) != TCL_OK) + goto err; + } + if (CRYPTO_ON(env)) { + myobj = NewStringObj("-crypto", strlen("-crypto")); + if ((result = Tcl_ListObjAppendElement(interp, + retlist, myobj)) != TCL_OK) + goto err; + } + if (LOCKING_ON(env)) { + myobj = NewStringObj("-lock", strlen("-lock")); + if ((result = Tcl_ListObjAppendElement(interp, + retlist, myobj)) != TCL_OK) + goto err; + } + if (LOGGING_ON(env)) { + myobj = NewStringObj("-log", strlen("-log")); + if ((result = Tcl_ListObjAppendElement(interp, + retlist, myobj)) != TCL_OK) + goto err; + } + if (MPOOL_ON(env)) { + myobj = NewStringObj("-mpool", strlen("-mpool")); + if ((result = Tcl_ListObjAppendElement(interp, + retlist, myobj)) != TCL_OK) + goto err; + } + if (REP_ON(env)) { + myobj = NewStringObj("-rep", strlen("-rep")); + if ((result = Tcl_ListObjAppendElement(interp, + retlist, myobj)) != TCL_OK) + goto err; + } + if (TXN_ON(env)) { + myobj = NewStringObj("-txn", strlen("-txn")); + if ((result = Tcl_ListObjAppendElement(interp, + retlist, myobj)) != TCL_OK) + goto err; + } + Tcl_SetObjResult(interp, retlist); +err: + return (result); +} + +/* + * env_EventInfo -- + * Implements the ENV->event_info command. + */ +static int +env_EventInfo(interp, objc, objv, dbenv, ip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; + DBTCL_INFO *ip; +{ + static const char *envinfo_option_names[] = { + "-clear", + NULL + }; + enum envinfo_options { + ENVINFCLEAR + }; + DB_LSN *lsn; + Tcl_Obj *lsnlist[2], *myobjv[2], *one_event, *res; + int clear, enc, i, ret, t_ret; + u_int32_t bit_flag; + + static const struct { + u_int32_t flag; + char *name; + } event_names[] = { + { DB_EVENT_PANIC, "panic" }, + { DB_EVENT_REG_ALIVE, "reg_alive" }, + { DB_EVENT_REG_PANIC, "reg_panic" }, + { DB_EVENT_REP_CLIENT, "client" }, + { DB_EVENT_REP_CONNECT_BROKEN, "connection_broken" }, + { DB_EVENT_REP_CONNECT_ESTD, "connection_established" }, + { DB_EVENT_REP_CONNECT_TRY_FAILED, "connection_retry_failed" }, + { DB_EVENT_REP_DUPMASTER, "dupmaster" }, + { DB_EVENT_REP_ELECTED, "elected" }, + { DB_EVENT_REP_ELECTION_FAILED, "election_failed" }, + { DB_EVENT_REP_JOIN_FAILURE, "join_failure" }, + { DB_EVENT_REP_LOCAL_SITE_REMOVED, "local_site_removed" }, + { DB_EVENT_REP_MASTER, "master" }, + { DB_EVENT_REP_MASTER_FAILURE, "master_failure" }, + { DB_EVENT_REP_NEWMASTER, "newmaster" }, + { DB_EVENT_REP_PERM_FAILED, "perm_failed" }, + { DB_EVENT_REP_SITE_ADDED, "site_added" }, + { DB_EVENT_REP_SITE_REMOVED, "site_removed" }, + { DB_EVENT_REP_STARTUPDONE, "startupdone" }, + { DB_EVENT_REP_WOULD_ROLLBACK, "would_rollback" }, + { DB_EVENT_WRITE_FAILED, "write_failed" }, + { DB_EVENT_NO_SUCH_EVENT, NULL } + }; + /* + * Note that when this list grows to more than 32 event types, the code + * below (the shift operation) will be broken. + */ + + if (objc > 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?-clear?"); + return (TCL_ERROR); + } + clear = 0; + if (objc == 3) { + if (Tcl_GetIndexFromObj(interp, objv[2], envinfo_option_names, + "option", TCL_EXACT, &enc) != TCL_OK) + return (IS_HELP(objv[2])); + switch ((enum envinfo_options)enc) { + case ENVINFCLEAR: + clear = 1; + break; + } + } + + if(ip->i_event_info == NULL) { + /* Script needs "-event" in "berkdb env" cmd. */ + Tcl_SetResult(interp, + "event collection not enabled on this env", TCL_STATIC); + return (TCL_ERROR); + } + + res = Tcl_NewListObj(0, NULL); + if ((ret = tcl_LockMutex(dbenv, ip->i_mutex)) != 0) + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "mutex lock")); + ret = TCL_OK; + for (i = 0; event_names[i].flag != DB_EVENT_NO_SUCH_EVENT; i++) { + bit_flag = 1 << event_names[i].flag; + if (FLD_ISSET(ip->i_event_info->events, bit_flag)) { + /* + * Build the 2-element list "one_event", consisting of + * the event name and the event info. The event info is + * sometimes more than a scalar value: in those cases, + * reuse the "myobjv" array to build the sub-list first. + * For this reason, set up the event info first + * (myobjv[1]), and only when that is done set up the + * event name (myobjv[0]) + */ + switch (event_names[i].flag) { + case DB_EVENT_PANIC: + myobjv[1] = Tcl_NewIntObj(ip-> + i_event_info->panic_error); + break; + case DB_EVENT_REG_ALIVE: + myobjv[1] = Tcl_NewLongObj((long)ip-> + i_event_info->attached_process); + break; + case DB_EVENT_REP_CONNECT_BROKEN: + myobjv[0] = Tcl_NewIntObj(ip-> + i_event_info->conn_broken_info.eid); + myobjv[1] = Tcl_NewIntObj(ip-> + i_event_info->conn_broken_info.error); + myobjv[1] = Tcl_NewListObj(2, myobjv); + break; + case DB_EVENT_REP_CONNECT_ESTD: + myobjv[1] = Tcl_NewIntObj(ip-> + i_event_info->connected_eid); + break; + case DB_EVENT_REP_CONNECT_TRY_FAILED: + myobjv[0] = Tcl_NewIntObj(ip-> + i_event_info->conn_failed_try_info.eid); + myobjv[1] = Tcl_NewIntObj(ip-> + i_event_info->conn_failed_try_info.error); + myobjv[1] = Tcl_NewListObj(2, myobjv); + break; + case DB_EVENT_REP_NEWMASTER: + myobjv[1] = Tcl_NewIntObj(ip-> + i_event_info->newmaster_eid); + break; + case DB_EVENT_REP_SITE_ADDED: + myobjv[1] = Tcl_NewIntObj(ip-> + i_event_info->added_eid); + break; + case DB_EVENT_REP_SITE_REMOVED: + myobjv[1] = Tcl_NewIntObj(ip-> + i_event_info->removed_eid); + break; + case DB_EVENT_REP_WOULD_ROLLBACK: + lsn = &ip->i_event_info->sync_point; + lsnlist[0] = Tcl_NewLongObj((long)lsn->file); + lsnlist[1] = Tcl_NewLongObj((long)lsn->offset); + myobjv[1] = Tcl_NewListObj(2, lsnlist); + break; + default: + myobjv[1] = NewStringObj("", 0); + break; + } + myobjv[0] = NewStringObj(event_names[i].name, + strlen(event_names[i].name)); + + one_event = Tcl_NewListObj(2, myobjv); + if ((ret = Tcl_ListObjAppendElement(interp, + res, one_event)) != TCL_OK) + break; + } + } + + /* + * Here, either everything is OK, or Tcl_ListObjAppendElement failed, + * above. Regardless, we need to make sure we unlock the mutex. Then, + * if either operation generated an error, return it, giving precedence + * to the earlier-occurring one. + */ + t_ret = tcl_UnlockMutex(dbenv, ip->i_mutex); + if (ret != TCL_OK) + return (ret); + if (t_ret != 0) + return (_ReturnSetup(interp, t_ret, DB_RETOK_STD(t_ret), + "mutex unlock")); + Tcl_SetObjResult(interp, res); + + if (clear) + ip->i_event_info->events = 0; + return (TCL_OK); +} + +/* + * PUBLIC: int tcl_EnvSetFlags __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *, + * PUBLIC: Tcl_Obj *)); + * + * tcl_EnvSetFlags -- + * Set flags in an env. + */ +int +tcl_EnvSetFlags(interp, dbenv, which, onoff) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Env pointer */ + Tcl_Obj *which; /* Which subsystem */ + Tcl_Obj *onoff; /* On or off */ +{ + static const char *sfwhich[] = { + "-auto_commit", + "-direct_db", + "-dsync_db", + "-multiversion", + "-nolock", + "-nommap", + "-nopanic", + "-nosync", + "-overwrite", + "-panic", + "-snapshot", + "-time_notgranted", + "-wrnosync", + "-hotbackup_in_progress", + NULL + }; + enum sfwhich { + ENVSF_AUTOCOMMIT, + ENVSF_DIRECTDB, + ENVSF_DSYNCDB, + ENVSF_MULTIVERSION, + ENVSF_NOLOCK, + ENVSF_NOMMAP, + ENVSF_NOPANIC, + ENVSF_NOSYNC, + ENVSF_OVERWRITE, + ENVSF_PANIC, + ENVSF_SNAPSHOT, + ENVSF_TIME_NOTGRANTED, + ENVSF_WRNOSYNC, + ENVSF_HOTBACKUP_IN_PROGRESS + }; + static const char *sfonoff[] = { + "off", + "on", + NULL + }; + enum sfonoff { + ENVSF_OFF, + ENVSF_ON + }; + int on, optindex, ret; + u_int32_t wh; + + if (Tcl_GetIndexFromObj(interp, which, sfwhich, "option", + TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(which)); + + switch ((enum sfwhich)optindex) { + case ENVSF_AUTOCOMMIT: + wh = DB_AUTO_COMMIT; + break; + case ENVSF_DIRECTDB: + wh = DB_DIRECT_DB; + break; + case ENVSF_DSYNCDB: + wh = DB_DSYNC_DB; + break; + case ENVSF_MULTIVERSION: + wh = DB_MULTIVERSION; + break; + case ENVSF_NOLOCK: + wh = DB_NOLOCKING; + break; + case ENVSF_NOMMAP: + wh = DB_NOMMAP; + break; + case ENVSF_NOSYNC: + wh = DB_TXN_NOSYNC; + break; + case ENVSF_NOPANIC: + wh = DB_NOPANIC; + break; + case ENVSF_PANIC: + wh = DB_PANIC_ENVIRONMENT; + break; + case ENVSF_OVERWRITE: + wh = DB_OVERWRITE; + break; + case ENVSF_SNAPSHOT: + wh = DB_TXN_SNAPSHOT; + break; + case ENVSF_TIME_NOTGRANTED: + wh = DB_TIME_NOTGRANTED; + break; + case ENVSF_WRNOSYNC: + wh = DB_TXN_WRITE_NOSYNC; + break; + case ENVSF_HOTBACKUP_IN_PROGRESS: + wh = DB_HOTBACKUP_IN_PROGRESS; + break; + default: + return (TCL_ERROR); + } + if (Tcl_GetIndexFromObj(interp, onoff, sfonoff, "option", + TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(onoff)); + switch ((enum sfonoff)optindex) { + case ENVSF_OFF: + on = 0; + break; + case ENVSF_ON: + on = 1; + break; + default: + return (TCL_ERROR); + } + ret = dbenv->set_flags(dbenv, wh, on); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env set flags")); +} + +/* + * tcl_EnvTest -- + * The "$env test ..." command is a sort of catch-all for any sort of + * desired test hook manipulation. The "abort", "check" and "copy" subcommands + * all set one or another certain location in the DB_ENV handle to a specific + * value. (In the case of "check", the value is an integer passed in with the + * command itself. For the other two, the "value" is a predefined enum + * constant, specified by name.) + * The "$env test force ..." subcommand invokes other, more arbitrary + * manipulations. + * Although these functions may not all seem closely related, putting them + * all under the name "test" has the aesthetic appeal of keeping the rest of the + * API clean. + * + * PUBLIC: int tcl_EnvTest __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_EnvTest(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Env pointer */ +{ + static const char *envtestcmd[] = { + "abort", + "check", + "copy", + "force", + NULL + }; + enum envtestcmd { + ENVTEST_ABORT, + ENVTEST_CHECK, + ENVTEST_COPY, + ENVTEST_FORCE + }; + static const char *envtestat[] = { + "electinit", + "electvote1", + "no_pages", + "none", + "predestroy", + "preopen", + "postdestroy", + "postlog", + "postlogmeta", + "postopen", + "postsync", + "repmgr_perm", + "subdb_lock", + NULL + }; + enum envtestat { + ENVTEST_ELECTINIT, + ENVTEST_ELECTVOTE1, + ENVTEST_NO_PAGES, + ENVTEST_NONE, + ENVTEST_PREDESTROY, + ENVTEST_PREOPEN, + ENVTEST_POSTDESTROY, + ENVTEST_POSTLOG, + ENVTEST_POSTLOGMETA, + ENVTEST_POSTOPEN, + ENVTEST_POSTSYNC, + ENVTEST_REPMGR_PERM, + ENVTEST_SUBDB_LOCKS + }; + static const char *envtestforce[] = { + "noarchive_timeout", + NULL + }; + enum envtestforce { + ENVTEST_NOARCHIVE_TIMEOUT + }; + ENV *env; + int *loc, optindex, result, testval; + + env = dbenv->env; + result = TCL_OK; + loc = NULL; + + if (objc != 4) { + Tcl_WrongNumArgs(interp, + 2, objv, "abort|check|copy|force "); + return (TCL_ERROR); + } + + /* + * This must be the "check", "copy" or "abort" portion of the command. + */ + if (Tcl_GetIndexFromObj(interp, objv[2], envtestcmd, "command", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[2]); + return (result); + } + switch ((enum envtestcmd)optindex) { + case ENVTEST_ABORT: + loc = &env->test_abort; + break; + case ENVTEST_CHECK: + loc = &env->test_check; + if (Tcl_GetIntFromObj(interp, objv[3], &testval) != TCL_OK) { + result = IS_HELP(objv[3]); + return (result); + } + goto done; + case ENVTEST_COPY: + loc = &env->test_copy; + break; + case ENVTEST_FORCE: + if (Tcl_GetIndexFromObj(interp, objv[3], envtestforce, "arg", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[3]); + return (result); + } + /* + * In the future we might add more, and then we'd use a switch + * statement. + */ + DB_ASSERT(env, + (enum envtestforce)optindex == ENVTEST_NOARCHIVE_TIMEOUT); + return (tcl_RepNoarchiveTimeout(interp, dbenv)); + default: + Tcl_SetResult(interp, "Illegal store location", TCL_STATIC); + return (TCL_ERROR); + } + + /* + * This must be the location portion of the command. + */ + if (Tcl_GetIndexFromObj(interp, objv[3], envtestat, "location", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[3]); + return (result); + } + switch ((enum envtestat)optindex) { + case ENVTEST_ELECTINIT: + DB_ASSERT(env, loc == &env->test_abort); + testval = DB_TEST_ELECTINIT; + break; + case ENVTEST_ELECTVOTE1: + DB_ASSERT(env, loc == &env->test_abort); + testval = DB_TEST_ELECTVOTE1; + break; + case ENVTEST_NO_PAGES: + DB_ASSERT(env, loc == &env->test_abort); + testval = DB_TEST_NO_PAGES; + break; + case ENVTEST_NONE: + testval = 0; + break; + case ENVTEST_PREOPEN: + testval = DB_TEST_PREOPEN; + break; + case ENVTEST_PREDESTROY: + testval = DB_TEST_PREDESTROY; + break; + case ENVTEST_POSTLOG: + testval = DB_TEST_POSTLOG; + break; + case ENVTEST_POSTLOGMETA: + testval = DB_TEST_POSTLOGMETA; + break; + case ENVTEST_POSTOPEN: + testval = DB_TEST_POSTOPEN; + break; + case ENVTEST_POSTDESTROY: + testval = DB_TEST_POSTDESTROY; + break; + case ENVTEST_POSTSYNC: + testval = DB_TEST_POSTSYNC; + break; + case ENVTEST_REPMGR_PERM: + DB_ASSERT(env, loc == &env->test_abort); + testval = DB_TEST_REPMGR_PERM; + break; + case ENVTEST_SUBDB_LOCKS: + DB_ASSERT(env, loc == &env->test_abort); + testval = DB_TEST_SUBDB_LOCKS; + break; + default: + Tcl_SetResult(interp, "Illegal test location", TCL_STATIC); + return (TCL_ERROR); + } +done: + *loc = testval; + Tcl_SetResult(interp, "0", TCL_STATIC); + return (result); +} +#endif + +/* + * env_DbRemove -- + * Implements the ENV->dbremove command. + */ +static int +env_DbRemove(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; +{ + static const char *envdbrem[] = { + "-auto_commit", + "-notdurable", + "-txn", + "--", + NULL + }; + enum envdbrem { + TCL_EDBREM_COMMIT, + TCL_EDBREM_NOTDURABLE, + TCL_EDBREM_TXN, + TCL_EDBREM_ENDARG + }; + DB_TXN *txn; + u_int32_t flag; + int endarg, i, optindex, result, ret, subdblen; + u_char *subdbtmp; + char *arg, *db, *dbr, *subdb, *subdbr, msg[MSG_SIZE]; + size_t nlen; + + txn = NULL; + result = TCL_OK; + subdbtmp = NULL; + db = dbr = subdb = subdbr = NULL; + endarg = nlen = subdblen = 0; + flag = 0; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?args? filename ?database?"); + return (TCL_ERROR); + } + + /* + * We must first parse for the environment flag, since that + * is needed for db_create. Then create the db handle. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], envdbrem, + "option", TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') { + result = IS_HELP(objv[i]); + goto error; + } else + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum envdbrem)optindex) { + case TCL_EDBREM_COMMIT: + flag |= DB_AUTO_COMMIT; + break; + case TCL_EDBREM_TXN: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "env dbremove: Invalid txn %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + return (TCL_ERROR); + } + break; + case TCL_EDBREM_ENDARG: + endarg = 1; + break; + case TCL_EDBREM_NOTDURABLE: + flag |= DB_TXN_NOT_DURABLE; + break; + } + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + goto error; + if (endarg) + break; + } + if (result != TCL_OK) + goto error; + /* + * Any args we have left, (better be 1 or 2 left) are + * file names. If there is 1, a db name, if 2 a db and subdb name. + */ + if ((i != (objc - 1)) || (i != (objc - 2))) { + /* + * Dbs must be NULL terminated file names, but subdbs can + * be anything. Use Strings for the db name and byte + * arrays for the subdb. + */ + db = Tcl_GetStringFromObj(objv[i++], NULL); + if (strcmp(db, "") == 0) + db = NULL; + if (i != objc) { + subdbtmp = + Tcl_GetByteArrayFromObj(objv[i++], &subdblen); + if ((ret = __os_malloc( + dbenv->env, (size_t)subdblen + 1, &subdb)) != 0) { + Tcl_SetResult(interp, + db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(subdb, subdbtmp, (size_t)subdblen); + subdb[subdblen] = '\0'; + } + } else { + Tcl_WrongNumArgs(interp, 2, objv, "?args? filename ?database?"); + result = TCL_ERROR; + goto error; + } + ret = dbenv->dbremove(dbenv, txn, db, subdb, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env dbremove"); + + /* + * If we are heap, we have auxiliary dbs. However in order to tell if + * we are heap we have to open the db to get its type. So we will just + * try the dbremove and ignore ENOENT. + */ + if ((db != NULL || subdb != NULL) && ret == 0) { + /* set up file name for associated recno db for heap*/ + if (db != NULL) { + nlen = strlen(db); + if ((ret = __os_malloc( + dbenv->env, nlen + 2, &dbr)) != 0) { + Tcl_SetResult( + interp, db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(dbr, db, nlen); + dbr[nlen] = '1'; + dbr[nlen+1] = '\0'; + } + + if (subdb != NULL) { + if ((ret = __os_malloc( + dbenv->env, (size_t)subdblen + 2, &subdbr)) != 0) { + Tcl_SetResult( + interp, db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(subdbr, subdb, (size_t)subdblen); + subdbr[subdblen] = '1'; + subdbr[subdblen+1] = '\0'; + } + + + ret = dbenv->dbremove(dbenv, txn, dbr, subdbr, flag); + if (ret == ENOENT) + ret = 0; + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db remove associated recno"); + if (ret) + goto error; + + /* Remove the btree which is doing the RID to recno mapping*/ + if (dbr != NULL) + dbr[nlen] = '2'; + if (subdbr != NULL) + subdbr[subdblen] = '2'; + ret = dbenv->dbremove(dbenv, txn, dbr, subdbr, flag); + if (ret == ENOENT) + ret = 0; + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db remove associated btree"); + } +error: + if (subdb) + __os_free(dbenv->env, subdb); + if (subdbr) + __os_free(dbenv->env, subdbr); + if (dbr) + __os_free(dbenv->env, dbr); + return (result); +} + +/* + * env_DbRename -- + * Implements the ENV->dbrename command. + */ +static int +env_DbRename(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; +{ + static const char *envdbmv[] = { + "-auto_commit", + "-txn", + "--", + NULL + }; + enum envdbmv { + TCL_EDBMV_COMMIT, + TCL_EDBMV_TXN, + TCL_EDBMV_ENDARG + }; + DB_TXN *txn; + u_int32_t flag; + int endarg, i, newlen, optindex, result, ret, subdblen; + u_char *subdbtmp; + char *arg, *db, *dbr, *newname, *newnamer, *subdb, *subdbr; + char msg[MSG_SIZE]; + size_t nlen; + + txn = NULL; + result = TCL_OK; + subdbtmp = NULL; + db = dbr = newname = newnamer = subdb = subdbr = NULL; + endarg = nlen = subdblen = 0; + flag = 0; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 3, objv, + "?args? filename ?database? ?newname?"); + return (TCL_ERROR); + } + + /* + * We must first parse for the environment flag, since that + * is needed for db_create. Then create the db handle. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], envdbmv, + "option", TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') { + result = IS_HELP(objv[i]); + goto error; + } else + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum envdbmv)optindex) { + case TCL_EDBMV_COMMIT: + flag |= DB_AUTO_COMMIT; + break; + case TCL_EDBMV_TXN: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "env dbrename: Invalid txn %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + return (TCL_ERROR); + } + break; + case TCL_EDBMV_ENDARG: + endarg = 1; + break; + } + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + goto error; + if (endarg) + break; + } + if (result != TCL_OK) + goto error; + /* + * Any args we have left, (better be 2 or 3 left) are + * file names. If there is 2, a db name, if 3 a db and subdb name. + */ + if ((i != (objc - 2)) || (i != (objc - 3))) { + /* + * Dbs must be NULL terminated file names, but subdbs can + * be anything. Use Strings for the db name and byte + * arrays for the subdb. + */ + db = Tcl_GetStringFromObj(objv[i++], NULL); + if (strcmp(db, "") == 0) + db = NULL; + if (i == objc - 2) { + subdbtmp = + Tcl_GetByteArrayFromObj(objv[i++], &subdblen); + if ((ret = __os_malloc( + dbenv->env, (size_t)subdblen + 1, &subdb)) != 0) { + Tcl_SetResult(interp, + db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(subdb, subdbtmp, (size_t)subdblen); + subdb[subdblen] = '\0'; + } + subdbtmp = Tcl_GetByteArrayFromObj(objv[i++], &newlen); + if ((ret = __os_malloc( + dbenv->env, (size_t)newlen + 1, &newname)) != 0) { + Tcl_SetResult(interp, + db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(newname, subdbtmp, (size_t)newlen); + newname[newlen] = '\0'; + } else { + Tcl_WrongNumArgs(interp, 3, objv, + "?args? filename ?database? ?newname?"); + result = TCL_ERROR; + goto error; + } + ret = dbenv->dbrename(dbenv, txn, db, subdb, newname, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env dbrename"); + + /* + * If we are heap, we have auxiliary dbs. However in order to tell if + * we are heap we have to open the db to get its type. So we will just + * try the dbrename and ignore ENOENT. + */ + if ((db != NULL || subdb != NULL) && ret == 0) { + /* set up file name for associated recno db for heap*/ + if (db != NULL) { + nlen = strlen(db); + if ((ret = __os_malloc( + dbenv->env, nlen + 2, &dbr)) != 0) { + Tcl_SetResult( + interp, db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(dbr, db, nlen); + dbr[nlen] = '1'; + dbr[nlen+1] = '\0'; + } + + if (subdb != NULL) { + if ((ret = __os_malloc( + dbenv->env, (size_t)subdblen + 2, &subdbr)) != 0) { + Tcl_SetResult( + interp, db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(subdbr, subdb, (size_t)subdblen); + subdbr[subdblen] = '1'; + subdbr[subdblen+1] = '\0'; + } + + if ((ret = __os_malloc(dbenv->env, + (size_t)newlen + 2, &newnamer)) != 0) { + Tcl_SetResult(interp, db_strerror(ret), TCL_STATIC); + return (0); + } + memcpy(newnamer, newname, (size_t)newlen); + newnamer[newlen] = '1'; + newnamer[newlen+1] = '\0'; + + ret = dbenv->dbrename(dbenv, txn, dbr, subdbr, newnamer, flag); + if (ret == ENOENT) + ret = 0; + + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db rename associated recno"); + if (ret != 0) + goto error; + + /* Rename the btree which is doing the RID to recno mapping*/ + if (dbr != NULL) + dbr[nlen] = '2'; + if (subdbr != NULL) + subdbr[subdblen] = '2'; + newnamer[newlen] = '2'; + ret = dbenv->dbrename(dbenv, txn, dbr, subdbr, newnamer, flag); + if (ret == ENOENT) + ret = 0; + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db rename associated btree"); + } + + +error: + if (subdb) + __os_free(dbenv->env, subdb); + if (newname) + __os_free(dbenv->env, newname); + if (dbr) + __os_free(dbenv->env, dbr); + if (subdbr) + __os_free(dbenv->env, subdbr); + if (newnamer) + __os_free(dbenv->env, newnamer); + return (result); +} + +/* + * env_GetFlags -- + * Implements the ENV->get_flags command. + */ +static int +env_GetFlags(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; +{ + int i, ret, result; + u_int32_t flags; + char buf[512]; + Tcl_Obj *res; + + static const struct { + u_int32_t flag; + char *arg; + } open_flags[] = { + { DB_AUTO_COMMIT, "-auto_commit" }, + { DB_CDB_ALLDB, "-cdb_alldb" }, + { DB_DIRECT_DB, "-direct_db" }, + { DB_DSYNC_DB, "-dsync_db" }, + { DB_MULTIVERSION, "-multiversion" }, + { DB_NOLOCKING, "-nolock" }, + { DB_NOMMAP, "-nommap" }, + { DB_NOPANIC, "-nopanic" }, + { DB_OVERWRITE, "-overwrite" }, + { DB_PANIC_ENVIRONMENT, "-panic" }, + { DB_REGION_INIT, "-region_init" }, + { DB_TIME_NOTGRANTED, "-time_notgranted" }, + { DB_TXN_NOSYNC, "-nosync" }, + { DB_TXN_NOWAIT, "-nowait" }, + { DB_TXN_SNAPSHOT, "-snapshot" }, + { DB_TXN_WRITE_NOSYNC, "-wrnosync" }, + { DB_YIELDCPU, "-yield" }, + { DB_HOTBACKUP_IN_PROGRESS, "-hotbackup_in_progress" }, + { 0, NULL } + }; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + + ret = dbenv->get_flags(dbenv, &flags); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_flags")) == TCL_OK) { + buf[0] = '\0'; + + for (i = 0; open_flags[i].flag != 0; i++) + if (LF_ISSET(open_flags[i].flag)) { + if (strlen(buf) > 0) + (void)strncat(buf, " ", sizeof(buf)); + (void)strncat( + buf, open_flags[i].arg, sizeof(buf)); + } + + res = NewStringObj(buf, strlen(buf)); + Tcl_SetObjResult(interp, res); + } + + return (result); +} + +/* + * env_GetOpenFlag -- + * Implements the ENV->get_open_flags command. + */ +static int +env_GetOpenFlag(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; +{ + int i, ret, result; + u_int32_t flags; + char buf[512]; + Tcl_Obj *res; + + static const struct { + u_int32_t flag; + char *arg; + } open_flags[] = { + { DB_CREATE, "-create" }, + { DB_FAILCHK, "-failchk" }, + { DB_INIT_CDB, "-cdb" }, + { DB_INIT_LOCK, "-lock" }, + { DB_INIT_LOG, "-log" }, + { DB_INIT_MPOOL, "-mpool" }, + { DB_INIT_REP, "-rep" }, + { DB_INIT_TXN, "-txn" }, + { DB_LOCKDOWN, "-lockdown" }, + { DB_PRIVATE, "-private" }, + { DB_RECOVER, "-recover" }, + { DB_RECOVER_FATAL, "-recover_fatal" }, + { DB_REGISTER, "-register" }, + { DB_FAILCHK, "-failchk" }, + { DB_SYSTEM_MEM, "-system_mem" }, + { DB_THREAD, "-thread" }, + { DB_USE_ENVIRON, "-use_environ" }, + { DB_USE_ENVIRON_ROOT, "-use_environ_root" }, + { 0, NULL } + }; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + + ret = dbenv->get_open_flags(dbenv, &flags); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_open_flags")) == TCL_OK) { + buf[0] = '\0'; + + for (i = 0; open_flags[i].flag != 0; i++) + if (LF_ISSET(open_flags[i].flag)) { + if (strlen(buf) > 0) + (void)strncat(buf, " ", sizeof(buf)); + (void)strncat( + buf, open_flags[i].arg, sizeof(buf)); + } + + res = NewStringObj(buf, strlen(buf)); + Tcl_SetObjResult(interp, res); + } + + return (result); +} + +/* + * PUBLIC: int tcl_EnvGetEncryptFlags __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + * PUBLIC: DB_ENV *)); + * + * tcl_EnvGetEncryptFlags -- + * Implements the ENV->get_encrypt_flags command. + */ +int +tcl_EnvGetEncryptFlags(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Database pointer */ +{ + int i, ret, result; + u_int32_t flags; + char buf[512]; + Tcl_Obj *res; + + static const struct { + u_int32_t flag; + char *arg; + } encrypt_flags[] = { + { DB_ENCRYPT_AES, "-encryptaes" }, + { 0, NULL } + }; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + + ret = dbenv->get_encrypt_flags(dbenv, &flags); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_encrypt_flags")) == TCL_OK) { + buf[0] = '\0'; + + for (i = 0; encrypt_flags[i].flag != 0; i++) + if (LF_ISSET(encrypt_flags[i].flag)) { + if (strlen(buf) > 0) + (void)strncat(buf, " ", sizeof(buf)); + (void)strncat( + buf, encrypt_flags[i].arg, sizeof(buf)); + } + + res = NewStringObj(buf, strlen(buf)); + Tcl_SetObjResult(interp, res); + } + + return (result); +} + +/* + * env_GetLockDetect -- + * Implements the ENV->get_lk_detect command. + */ +static int +env_GetLockDetect(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; +{ + int i, ret, result; + u_int32_t lk_detect; + const char *answer; + Tcl_Obj *res; + static const struct { + u_int32_t flag; + char *name; + } lk_detect_returns[] = { + { DB_LOCK_DEFAULT, "default" }, + { DB_LOCK_EXPIRE, "expire" }, + { DB_LOCK_MAXLOCKS, "maxlocks" }, + { DB_LOCK_MAXWRITE, "maxwrite" }, + { DB_LOCK_MINLOCKS, "minlocks" }, + { DB_LOCK_MINWRITE, "minwrite" }, + { DB_LOCK_OLDEST, "oldest" }, + { DB_LOCK_RANDOM, "random" }, + { DB_LOCK_YOUNGEST, "youngest" }, + { 0, NULL } + }; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = dbenv->get_lk_detect(dbenv, &lk_detect); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_lk_detect")) == TCL_OK) { + answer = "unknown"; + for (i = 0; lk_detect_returns[i].flag != 0; i++) + if (lk_detect == lk_detect_returns[i].flag) + answer = lk_detect_returns[i].name; + + res = NewStringObj(answer, strlen(answer)); + Tcl_SetObjResult(interp, res); + } + + return (result); +} + +/* + * env_GetTimeout -- + * Implements the ENV->get_timeout command. + */ +static int +env_GetTimeout(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; +{ + static const struct { + u_int32_t flag; + char *arg; + } timeout_flags[] = { + { DB_SET_LOCK_TIMEOUT, "lock" }, + { DB_SET_REG_TIMEOUT, "reg" }, + { DB_SET_TXN_TIMEOUT, "txn" }, + { 0, NULL } + }; + Tcl_Obj *res; + db_timeout_t timeout; + u_int32_t which; + int i, ret, result; + const char *arg; + + COMPQUIET(timeout, 0); + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + + arg = Tcl_GetStringFromObj(objv[2], NULL); + which = 0; + for (i = 0; timeout_flags[i].flag != 0; i++) + if (strcmp(arg, timeout_flags[i].arg) == 0) + which = timeout_flags[i].flag; + if (which == 0) { + ret = EINVAL; + goto err; + } + + ret = dbenv->get_timeout(dbenv, &timeout, which); +err: if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_timeout")) == TCL_OK) { + res = Tcl_NewLongObj((long)timeout); + Tcl_SetObjResult(interp, res); + } + + return (result); +} + +/* + * env_GetVerbose -- + * Implements the ENV->get_open_flags command. + */ +static int +env_GetVerbose(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; +{ + static const struct { + u_int32_t flag; + char *arg; + } verbose_flags[] = { + { DB_VERB_DEADLOCK, "deadlock" }, + { DB_VERB_FILEOPS, "fileops" }, + { DB_VERB_FILEOPS_ALL, "fileops_all" }, + { DB_VERB_RECOVERY, "recovery" }, + { DB_VERB_REGISTER, "register" }, + { DB_VERB_REPLICATION, "rep" }, + { DB_VERB_REP_ELECT, "rep_elect" }, + { DB_VERB_REP_LEASE, "rep_lease" }, + { DB_VERB_REP_MISC, "rep_misc" }, + { DB_VERB_REP_MSGS, "rep_msgs" }, + { DB_VERB_REP_SYNC, "rep_sync" }, + { DB_VERB_REP_SYSTEM, "rep_system" }, + { DB_VERB_REP_TEST, "rep_test" }, + { DB_VERB_REPMGR_CONNFAIL, "repmgr_connfail" }, + { DB_VERB_REPMGR_MISC, "repmgr_misc" }, + { DB_VERB_WAITSFOR, "wait" }, + { 0, NULL } + }; + Tcl_Obj *res; + u_int32_t which; + int i, onoff, ret, result; + const char *arg, *answer; + + COMPQUIET(onoff, 0); + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + + arg = Tcl_GetStringFromObj(objv[2], NULL); + which = 0; + for (i = 0; verbose_flags[i].flag != 0; i++) + if (strcmp(arg, verbose_flags[i].arg) == 0) + which = verbose_flags[i].flag; + if (which == 0) { + ret = EINVAL; + goto err; + } + + ret = dbenv->get_verbose(dbenv, which, &onoff); +err: if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env get_verbose")) == 0) { + answer = onoff ? "on" : "off"; + res = NewStringObj(answer, strlen(answer)); + Tcl_SetObjResult(interp, res); + } + + return (result); +} + +/* + * PUBLIC: void tcl_EnvSetErrfile __P((Tcl_Interp *, DB_ENV *, DBTCL_INFO *, + * PUBLIC: char *)); + * + * tcl_EnvSetErrfile -- + * Implements the ENV->set_errfile command. + */ +void +tcl_EnvSetErrfile(interp, dbenv, ip, errf) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Database pointer */ + DBTCL_INFO *ip; /* Our internal info */ + char *errf; +{ + COMPQUIET(interp, NULL); + /* + * If the user already set one, free it. + */ + if (ip->i_err != NULL && ip->i_err != stdout && + ip->i_err != stderr) + (void)fclose(ip->i_err); + if (strcmp(errf, "/dev/stdout") == 0) + ip->i_err = stdout; + else if (strcmp(errf, "/dev/stderr") == 0) + ip->i_err = stderr; + else + ip->i_err = fopen(errf, "a"); + if (ip->i_err != NULL) + dbenv->set_errfile(dbenv, ip->i_err); +} + +/* + * PUBLIC: void tcl_EnvSetMsgfile __P((Tcl_Interp *, DB_ENV *, DBTCL_INFO *, + * PUBLIC: char *)); + * + * tcl_EnvSetMsgfile -- + * Implements the ENV->set_msgfile command. + */ +void +tcl_EnvSetMsgfile(interp, dbenv, ip, msgf) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Database pointer */ + DBTCL_INFO *ip; /* Our internal info */ + char *msgf; +{ + COMPQUIET(interp, NULL); + /* + * If the user already set one, free it. + */ + if (ip->i_msg != NULL && ip->i_msg != stdout && + ip->i_msg != stderr) + (void)fclose(ip->i_msg); + if (strcmp(msgf, "/dev/stdout") == 0) + ip->i_msg = stdout; + else if (strcmp(msgf, "/dev/stderr") == 0) + ip->i_msg = stderr; + else + ip->i_msg = fopen(msgf, "a"); + if (ip->i_msg != NULL) + dbenv->set_msgfile(dbenv, ip->i_msg); +} + +/* + * PUBLIC: int tcl_EnvSetErrpfx __P((Tcl_Interp *, DB_ENV *, DBTCL_INFO *, + * PUBLIC: char *)); + * + * tcl_EnvSetErrpfx -- + * Implements the ENV->set_errpfx command. + */ +int +tcl_EnvSetErrpfx(interp, dbenv, ip, pfx) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Database pointer */ + DBTCL_INFO *ip; /* Our internal info */ + char *pfx; +{ + int result, ret; + + /* + * Assume success. The only thing that can fail is + * the __os_strdup. + */ + result = TCL_OK; + Tcl_SetResult(interp, "0", TCL_STATIC); + /* + * If the user already set one, free it. + */ + if (ip->i_errpfx != NULL) { + dbenv->set_errpfx(dbenv, NULL); + __os_free(dbenv->env, ip->i_errpfx); + ip->i_errpfx = NULL; + } + if ((ret = __os_strdup(dbenv->env, pfx, &ip->i_errpfx)) != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "__os_strdup"); + ip->i_errpfx = NULL; + } + if (ip->i_errpfx != NULL) + dbenv->set_errpfx(dbenv, ip->i_errpfx); + return (result); +} + +/* + * tcl_EnvStatPrint -- + * + * PUBLIC: int tcl_EnvStatPrint __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_EnvStatPrint(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + static const char *envstatprtopts[] = { + "-all", + "-clear", + "-subsystem", + NULL + }; + enum envstatprtopts { + ENVSTATPRTALL, + ENVSTATPRTCLEAR, + ENVSTATPRTSUB + }; + u_int32_t flag; + int i, optindex, result, ret; + + result = TCL_OK; + flag = 0; + i = 2; + + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], envstatprtopts, + "option", TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum envstatprtopts)optindex) { + case ENVSTATPRTALL: + flag |= DB_STAT_ALL; + break; + case ENVSTATPRTCLEAR: + flag |= DB_STAT_CLEAR; + break; + case ENVSTATPRTSUB: + flag |= DB_STAT_SUBSYSTEM; + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto error; + + _debug_check(); + ret = dbenv->stat_print(dbenv, flag); + result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "dbenv stat_print"); +error: + return (result); + +} diff --git a/lang/tcl/tcl_internal.c b/lang/tcl/tcl_internal.c new file mode 100644 index 00000000..cea70f35 --- /dev/null +++ b/lang/tcl/tcl_internal.c @@ -0,0 +1,879 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/tcl_db.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +/* + * + * internal.c -- + * + * This file contains internal functions we need to maintain + * state for our Tcl interface. + * + * NOTE: This all uses a linear linked list. If we end up with + * too many info structs such that this is a performance hit, it + * should be redone using hashes or a list per type. The assumption + * is that the user won't have more than a few dozen info structs + * in operation at any given point in time. Even a complicated + * application with a few environments, nested transactions, locking, + * and several databases open, using cursors should not have a + * negative performance impact, in terms of searching the list to + * get/manipulate the info structure. + */ + +#define GLOB_CHAR(c) ((c) == '*' || (c) == '?') + +/* + * PUBLIC: DBTCL_INFO *_NewInfo __P((Tcl_Interp *, + * PUBLIC: void *, char *, enum INFOTYPE)); + * + * _NewInfo -- + * + * This function will create a new info structure and fill it in + * with the name and pointer, id and type. + */ +DBTCL_INFO * +_NewInfo(interp, anyp, name, type) + Tcl_Interp *interp; + void *anyp; + char *name; + enum INFOTYPE type; +{ + DBTCL_INFO *p; + int ret; + + if ((ret = __os_calloc(NULL, sizeof(DBTCL_INFO), 1, &p)) != 0) { + Tcl_SetResult(interp, db_strerror(ret), TCL_STATIC); + return (NULL); + } + + if ((ret = __os_strdup(NULL, name, &p->i_name)) != 0) { + Tcl_SetResult(interp, db_strerror(ret), TCL_STATIC); + __os_free(NULL, p); + return (NULL); + } + p->i_interp = interp; + p->i_anyp = anyp; + p->i_type = type; + + LIST_INSERT_HEAD(&__db_infohead, p, entries); + return (p); +} + +/* + * PUBLIC: void *_NameToPtr __P((CONST char *)); + */ +void * +_NameToPtr(name) + CONST char *name; +{ + DBTCL_INFO *p; + + LIST_FOREACH(p, &__db_infohead, entries) + if (strcmp(name, p->i_name) == 0) + return (p->i_anyp); + return (NULL); +} + +/* + * PUBLIC: DBTCL_INFO *_PtrToInfo __P((CONST void *)); + */ +DBTCL_INFO * +_PtrToInfo(ptr) + CONST void *ptr; +{ + DBTCL_INFO *p; + + LIST_FOREACH(p, &__db_infohead, entries) + if (p->i_anyp == ptr) + return (p); + return (NULL); +} + +/* + * PUBLIC: DBTCL_INFO *_NameToInfo __P((CONST char *)); + */ +DBTCL_INFO * +_NameToInfo(name) + CONST char *name; +{ + DBTCL_INFO *p; + + LIST_FOREACH(p, &__db_infohead, entries) + if (strcmp(name, p->i_name) == 0) + return (p); + return (NULL); +} + +/* + * PUBLIC: void _SetInfoData __P((DBTCL_INFO *, void *)); + */ +void +_SetInfoData(p, data) + DBTCL_INFO *p; + void *data; +{ + if (p == NULL) + return; + p->i_anyp = data; + return; +} + +/* + * PUBLIC: void _DeleteInfo __P((DBTCL_INFO *)); + */ +void +_DeleteInfo(p) + DBTCL_INFO *p; +{ + if (p == NULL) + return; + LIST_REMOVE(p, entries); + if (p->i_lockobj.data != NULL) + __os_free(NULL, p->i_lockobj.data); + if (p->i_err != NULL && p->i_err != stderr && p->i_err != stdout) { + (void)fclose(p->i_err); + p->i_err = NULL; + } + if (p->i_msg != NULL && p->i_msg != stderr && p->i_msg != stdout) { + (void)fclose(p->i_msg); + p->i_msg = NULL; + } + if (p->i_errpfx != NULL) + __os_free(NULL, p->i_errpfx); + if (p->i_compare != NULL) { + Tcl_DecrRefCount(p->i_compare); + } + if (p->i_dupcompare != NULL) { + Tcl_DecrRefCount(p->i_dupcompare); + } + if (p->i_hashproc != NULL) { + Tcl_DecrRefCount(p->i_hashproc); + } + if (p->i_part_callback != NULL) { + Tcl_DecrRefCount(p->i_part_callback); + } + if (p->i_second_call != NULL) { + Tcl_DecrRefCount(p->i_second_call); + } + if (p->i_rep_eid != NULL) { + Tcl_DecrRefCount(p->i_rep_eid); + } + if (p->i_rep_send != NULL) { + Tcl_DecrRefCount(p->i_rep_send); + } + + if (p->i_type == I_ENV && p->i_event_info != NULL) + __os_free(NULL, p->i_event_info); + if (p->i_type == I_TXN && p->i_commit_token != NULL) + __os_free(NULL, p->i_commit_token); + __os_free(NULL, p->i_name); + __os_free(NULL, p); + + return; +} + +/* + * PUBLIC: int _SetListElem __P((Tcl_Interp *, + * PUBLIC: Tcl_Obj *, void *, u_int32_t, void *, u_int32_t)); + */ +int +_SetListElem(interp, list, elem1, e1cnt, elem2, e2cnt) + Tcl_Interp *interp; + Tcl_Obj *list; + void *elem1, *elem2; + u_int32_t e1cnt, e2cnt; +{ + Tcl_Obj *myobjv[2], *thislist; + int myobjc; + + myobjc = 2; + myobjv[0] = Tcl_NewByteArrayObj((u_char *)elem1, (int)e1cnt); + myobjv[1] = Tcl_NewByteArrayObj((u_char *)elem2, (int)e2cnt); + thislist = Tcl_NewListObj(myobjc, myobjv); + if (thislist == NULL) + return (TCL_ERROR); + return (Tcl_ListObjAppendElement(interp, list, thislist)); + +} + +/* + * PUBLIC: int _SetListElemInt __P((Tcl_Interp *, Tcl_Obj *, void *, long)); + */ +int +_SetListElemInt(interp, list, elem1, elem2) + Tcl_Interp *interp; + Tcl_Obj *list; + void *elem1; + long elem2; +{ + Tcl_Obj *myobjv[2], *thislist; + int myobjc; + + myobjc = 2; + myobjv[0] = + Tcl_NewByteArrayObj((u_char *)elem1, (int)strlen((char *)elem1)); + myobjv[1] = Tcl_NewLongObj(elem2); + thislist = Tcl_NewListObj(myobjc, myobjv); + if (thislist == NULL) + return (TCL_ERROR); + return (Tcl_ListObjAppendElement(interp, list, thislist)); +} + +#ifdef HAVE_64BIT_TYPES +/* + * PUBLIC: int _SetListElemWideInt __P((Tcl_Interp *, + * PUBLIC: Tcl_Obj *, void *, int64_t)); + */ +int +_SetListElemWideInt(interp, list, elem1, elem2) + Tcl_Interp *interp; + Tcl_Obj *list; + void *elem1; + int64_t elem2; +{ + Tcl_Obj *myobjv[2], *thislist; + int myobjc; + + myobjc = 2; + myobjv[0] = + Tcl_NewByteArrayObj((u_char *)elem1, (int)strlen((char *)elem1)); + myobjv[1] = Tcl_NewWideIntObj(elem2); + thislist = Tcl_NewListObj(myobjc, myobjv); + if (thislist == NULL) + return (TCL_ERROR); + return (Tcl_ListObjAppendElement(interp, list, thislist)); +} +#endif /* HAVE_64BIT_TYPES */ + +/* + * PUBLIC: int _SetListRecnoElem __P((Tcl_Interp *, Tcl_Obj *, + * PUBLIC: db_recno_t, u_char *, u_int32_t)); + */ +int +_SetListRecnoElem(interp, list, elem1, elem2, e2size) + Tcl_Interp *interp; + Tcl_Obj *list; + db_recno_t elem1; + u_char *elem2; + u_int32_t e2size; +{ + Tcl_Obj *myobjv[2], *thislist; + int myobjc; + + myobjc = 2; + myobjv[0] = Tcl_NewWideIntObj((Tcl_WideInt)elem1); + myobjv[1] = Tcl_NewByteArrayObj(elem2, (int)e2size); + thislist = Tcl_NewListObj(myobjc, myobjv); + if (thislist == NULL) + return (TCL_ERROR); + return (Tcl_ListObjAppendElement(interp, list, thislist)); + +} + +/* + * PUBLIC: int _SetListHeapElem __P((Tcl_Interp *, Tcl_Obj *, + * PUBLIC: DB_HEAP_RID, u_char *, u_int32_t)); + */ +int +_SetListHeapElem(interp, list, elem1, elem2, e2size) + Tcl_Interp *interp; + Tcl_Obj *list; + DB_HEAP_RID elem1; + u_char *elem2; + u_int32_t e2size; +{ + Tcl_Obj *intobj, *myobjv[2], *thislist; + int myobjc, result; + + result = 0; + myobjc = 2; + myobjv[0] = Tcl_NewListObj(0, NULL); + intobj = Tcl_NewWideIntObj((Tcl_WideInt)elem1.pgno); + result = Tcl_ListObjAppendElement(interp, myobjv[0], intobj); + if (result != TCL_OK) + return (TCL_ERROR); + intobj = Tcl_NewWideIntObj((Tcl_WideInt)elem1.indx); + result = Tcl_ListObjAppendElement(interp, myobjv[0], intobj); + if (result != TCL_OK) + return (TCL_ERROR); + myobjv[1] = Tcl_NewByteArrayObj(elem2, (int)e2size); + thislist = Tcl_NewListObj(myobjc, myobjv); + if (thislist == NULL) + return (TCL_ERROR); + return (Tcl_ListObjAppendElement(interp, list, thislist)); + +} + +/* + * _Set3DBTList -- + * This is really analogous to both _SetListElem and + * _SetListRecnoElem--it's used for three-DBT lists returned by + * DB->pget and DBC->pget(). We'd need a family of four functions + * to handle all the recno/non-recno cases, however, so we make + * this a little more aware of the internals and do the logic inside. + * + * XXX + * One of these days all these functions should probably be cleaned up + * to eliminate redundancy and bring them into the standard DB + * function namespace. + * + * PUBLIC: int _Set3DBTList __P((Tcl_Interp *, Tcl_Obj *, DBT *, int, + * PUBLIC: DBT *, int, DBT *)); + */ +int +_Set3DBTList(interp, list, elem1, is1recno, elem2, is2recno, elem3) + Tcl_Interp *interp; + Tcl_Obj *list; + DBT *elem1, *elem2, *elem3; + int is1recno, is2recno; +{ + + Tcl_Obj *myobjv[3], *thislist; + + if (is1recno) + myobjv[0] = Tcl_NewWideIntObj( + (Tcl_WideInt)*(db_recno_t *)elem1->data); + else + myobjv[0] = Tcl_NewByteArrayObj( + (u_char *)elem1->data, (int)elem1->size); + + if (is2recno) + myobjv[1] = Tcl_NewWideIntObj( + (Tcl_WideInt)*(db_recno_t *)elem2->data); + else + myobjv[1] = Tcl_NewByteArrayObj( + (u_char *)elem2->data, (int)elem2->size); + + myobjv[2] = Tcl_NewByteArrayObj( + (u_char *)elem3->data, (int)elem3->size); + + thislist = Tcl_NewListObj(3, myobjv); + + if (thislist == NULL) + return (TCL_ERROR); + return (Tcl_ListObjAppendElement(interp, list, thislist)); +} + +/* + * _SetMultiList -- build a list for return from multiple get. + * + * PUBLIC: int _SetMultiList __P((Tcl_Interp *, + * PUBLIC: Tcl_Obj *, DBT *, DBT*, DBTYPE, u_int32_t)); + */ +int +_SetMultiList(interp, list, key, data, type, flag) + Tcl_Interp *interp; + Tcl_Obj *list; + DBT *key, *data; + DBTYPE type; + u_int32_t flag; +{ + db_recno_t recno; + u_int32_t dlen, klen; + int result; + void *pointer, *dp, *kp; + + recno = 0; + dlen = 0; + kp = NULL; + + DB_MULTIPLE_INIT(pointer, data); + result = TCL_OK; + + if (type == DB_RECNO || type == DB_QUEUE) + recno = *(db_recno_t *) key->data; + else + kp = key->data; + klen = key->size; + do { + if (flag & DB_MULTIPLE_KEY) { + if (type == DB_RECNO || type == DB_QUEUE) + DB_MULTIPLE_RECNO_NEXT(pointer, + data, recno, dp, dlen); + else + DB_MULTIPLE_KEY_NEXT(pointer, + data, kp, klen, dp, dlen); + } else + DB_MULTIPLE_NEXT(pointer, data, dp, dlen); + + if (pointer == NULL) + break; + + if (type == DB_RECNO || type == DB_QUEUE) { + result = + _SetListRecnoElem(interp, list, recno, dp, dlen); + recno++; + /* Wrap around and skip zero. */ + if (recno == 0) + recno++; + } else + result = _SetListElem(interp, list, kp, klen, dp, dlen); + } while (result == TCL_OK); + + return (result); +} +/* + * PUBLIC: int _GetGlobPrefix __P((char *, char **)); + */ +int +_GetGlobPrefix(pattern, prefix) + char *pattern; + char **prefix; +{ + int i, j; + char *p; + + /* + * Duplicate it, we get enough space and most of the work is done. + */ + if (__os_strdup(NULL, pattern, prefix) != 0) + return (1); + + p = *prefix; + for (i = 0, j = 0; p[i] && !GLOB_CHAR(p[i]); i++, j++) + /* + * Check for an escaped character and adjust + */ + if (p[i] == '\\' && p[i+1]) { + p[j] = p[i+1]; + i++; + } else + p[j] = p[i]; + p[j] = 0; + return (0); +} + +/* + * PUBLIC: int _ReturnSetup __P((Tcl_Interp *, int, int, char *)); + */ +int +_ReturnSetup(interp, ret, ok, errmsg) + Tcl_Interp *interp; + int ret, ok; + char *errmsg; +{ + char *msg; + + if (ret > 0) + return (_ErrorSetup(interp, ret, errmsg)); + + /* + * We either have success or a DB error. If a DB error, set up the + * string. We return an error if not one of the errors we catch. + * If anyone wants to reset the result to return anything different, + * then the calling function is responsible for doing so via + * Tcl_ResetResult or another Tcl_SetObjResult. + */ + if (ret == 0) { + Tcl_SetResult(interp, "0", TCL_STATIC); + return (TCL_OK); + } + + msg = db_strerror(ret); + Tcl_AppendResult(interp, msg, NULL); + + if (ok) + return (TCL_OK); + else { + Tcl_SetErrorCode(interp, "BerkeleyDB", msg, NULL); + return (TCL_ERROR); + } +} + +/* + * PUBLIC: int _ErrorSetup __P((Tcl_Interp *, int, char *)); + */ +int +_ErrorSetup(interp, ret, errmsg) + Tcl_Interp *interp; + int ret; + char *errmsg; +{ + Tcl_SetErrno(ret); + Tcl_AppendResult(interp, errmsg, ":", Tcl_PosixError(interp), NULL); + return (TCL_ERROR); +} + +/* + * PUBLIC: void _ErrorFunc __P((const DB_ENV *, CONST char *, const char *)); + */ +void +_ErrorFunc(dbenv, pfx, msg) + const DB_ENV *dbenv; + CONST char *pfx; + const char *msg; +{ + DBTCL_INFO *p; + Tcl_Interp *interp; + size_t size; + char *err; + + COMPQUIET(dbenv, NULL); + + p = _NameToInfo(pfx); + if (p == NULL) + return; + interp = p->i_interp; + + size = strlen(pfx) + strlen(msg) + 4; + /* + * If we cannot allocate enough to put together the prefix + * and message then give them just the message. + */ + if (__os_malloc(NULL, size, &err) != 0) { + Tcl_AddErrorInfo(interp, msg); + Tcl_AppendResult(interp, msg, "\n", NULL); + return; + } + snprintf(err, size, "%s: %s", pfx, msg); + Tcl_AddErrorInfo(interp, err); + Tcl_AppendResult(interp, err, "\n", NULL); + __os_free(NULL, err); + return; +} + +#ifdef CONFIG_TEST +/* + * PUBLIC: #ifdef CONFIG_TEST + * PUBLIC: void _EventFunc __P((DB_ENV *, u_int32_t, void *)); + * PUBLIC: #endif + */ +void +_EventFunc(dbenv, event, info) + DB_ENV *dbenv; + u_int32_t event; + void *info; +{ + DBTCL_INFO *ip; + u_int32_t bit_flag; + + ip = (DBTCL_INFO *)dbenv->app_private; + DB_ASSERT(dbenv->env, ip->i_event_info != NULL); + DB_ASSERT(dbenv->env, event < 32); /* Flag bits fit in 32-bit word. */ + + if (tcl_LockMutex(dbenv, ip->i_mutex) != 0) { + (void)puts("FAIL: __mutex_lock failed"); + return; + } + + /* Record the fact that this event occurred. */ + bit_flag = 1 << event; + ip->i_event_info->events |= bit_flag; + + /* + * For events that have associated "info" (currently most don't), save + * the info too. + */ + switch (event) { + case DB_EVENT_PANIC: + /* + * Info is the original error code. + */ + ip->i_event_info->panic_error = *(int *)info; + break; + case DB_EVENT_REG_ALIVE: + /* + * Info is the attached process's PID. + */ + ip->i_event_info->attached_process = *(pid_t *)info; + break; + case DB_EVENT_REP_CONNECT_BROKEN: + /* + * Info is a struct containing the EID whose connection has + * broken, and the system error code indicating the reason. + */ + ip->i_event_info->conn_broken_info = + *(DB_REPMGR_CONN_ERR *)info; + break; + case DB_EVENT_REP_CONNECT_ESTD: + /* + * Info is the EID whose connection has been established. + */ + ip->i_event_info->connected_eid = *(int *)info; + break; + case DB_EVENT_REP_CONNECT_TRY_FAILED: + /* + * Info is a struct containing the EID of the site to which we + * failed to connect, and the system error code indicating the + * reason. + */ + ip->i_event_info->conn_failed_try_info = + *(DB_REPMGR_CONN_ERR *)info; + break; + case DB_EVENT_REP_NEWMASTER: + /* + * Info is the EID of the new master. + */ + ip->i_event_info->newmaster_eid = *(int *)info; + break; + case DB_EVENT_REP_SITE_ADDED: + /* + * Info is the EID of the added site. + */ + ip->i_event_info->added_eid = *(int *)info; + break; + case DB_EVENT_REP_SITE_REMOVED: + /* + * Info is the EID of the removed site. + */ + ip->i_event_info->removed_eid = *(int *)info; + break; + case DB_EVENT_REP_WOULD_ROLLBACK: + /* + * Info is the sync-point LSN. + */ + ip->i_event_info->sync_point = *(DB_LSN *)info; + break; + default: + /* Remaining events don't use "info": so nothing to do. */ + break; + } + if (tcl_UnlockMutex(dbenv, ip->i_mutex) != 0) + (void)puts("FAIL: __mutex_unlock failed"); +} +#endif + +#define INVALID_LSNMSG "Invalid LSN with %d parts. Should have 2.\n" + +/* + * PUBLIC: int _GetLsn __P((Tcl_Interp *, Tcl_Obj *, DB_LSN *)); + */ +int +_GetLsn(interp, obj, lsn) + Tcl_Interp *interp; + Tcl_Obj *obj; + DB_LSN *lsn; +{ + Tcl_Obj **myobjv; + char msg[MSG_SIZE]; + int myobjc, result; + u_int32_t tmp; + + result = Tcl_ListObjGetElements(interp, obj, &myobjc, &myobjv); + if (result == TCL_ERROR) + return (result); + if (myobjc != 2) { + result = TCL_ERROR; + snprintf(msg, MSG_SIZE, INVALID_LSNMSG, myobjc); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + return (result); + } + result = _GetUInt32(interp, myobjv[0], &tmp); + if (result == TCL_ERROR) + return (result); + lsn->file = tmp; + result = _GetUInt32(interp, myobjv[1], &tmp); + lsn->offset = tmp; + return (result); +} + +#define INVALID_RIDMSG "Invalid RID with %d parts. Should have 2.\n" + +/* + * PUBLIC: int _GetRid __P((Tcl_Interp *, Tcl_Obj *, DB_HEAP_RID *)); + */ +int +_GetRid(interp, obj, rid) + Tcl_Interp *interp; + Tcl_Obj *obj; + DB_HEAP_RID *rid; +{ + Tcl_Obj **myobjv; + char msg[MSG_SIZE]; + int myobjc, result; + u_int32_t tmp; + + result = Tcl_ListObjGetElements(interp, obj, &myobjc, &myobjv); + if (result == TCL_ERROR) + return (result); + if (myobjc != 2) { + result = TCL_ERROR; + snprintf(msg, MSG_SIZE, INVALID_RIDMSG, myobjc); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + return (result); + } + result = _GetUInt32(interp, myobjv[0], &tmp); + if (result == TCL_ERROR) + return (result); + rid->pgno = tmp; + result = _GetUInt32(interp, myobjv[1], &tmp); + rid->indx = (u_int16_t)tmp; + return (result); +} + +/* + * _GetUInt32 -- + * Get a u_int32_t from a Tcl object. Tcl_GetIntFromObj does the + * right thing most of the time, but on machines where a long is 8 bytes + * and an int is 4 bytes, it errors on integers between the maximum + * int32_t and the maximum u_int32_t. This is correct, but we generally + * want a u_int32_t in the end anyway, so we use Tcl_GetLongFromObj and do + * the bounds checking ourselves. + * + * This code looks much like Tcl_GetIntFromObj, only with a different + * bounds check. It's essentially Tcl_GetUnsignedIntFromObj, which + * unfortunately doesn't exist. + * + * PUBLIC: int _GetUInt32 __P((Tcl_Interp *, Tcl_Obj *, u_int32_t *)); + */ +int +_GetUInt32(interp, obj, resp) + Tcl_Interp *interp; + Tcl_Obj *obj; + u_int32_t *resp; +{ + int result; + long ltmp; + + result = Tcl_GetLongFromObj(interp, obj, <mp); + if (result != TCL_OK) + return (result); + + if ((unsigned long)ltmp != (u_int32_t)ltmp) { + if (interp != NULL) { + Tcl_ResetResult(interp); + Tcl_AppendToObj(Tcl_GetObjResult(interp), + "integer value too large for u_int32_t", -1); + } + return (TCL_ERROR); + } + + *resp = (u_int32_t)ltmp; + return (TCL_OK); +} + +/* + * _GetFlagsList -- + * Get a new Tcl object, containing a list of the string values + * associated with a particular set of flag values. + * + * PUBLIC: Tcl_Obj *_GetFlagsList __P((Tcl_Interp *, u_int32_t, const FN *)); + */ +Tcl_Obj * +_GetFlagsList(interp, flags, fnp) + Tcl_Interp *interp; + u_int32_t flags; + const FN *fnp; +{ + Tcl_Obj *newlist, *newobj; + int result; + + newlist = Tcl_NewObj(); + + /* + * If the Berkeley DB library wasn't compiled with statistics, then + * we may get a NULL reference. + */ + if (fnp == NULL) + return (newlist); + + /* + * Append a Tcl_Obj containing each pertinent flag string to the + * specified Tcl list. + */ + for (; fnp->mask != 0; ++fnp) + if (LF_ISSET(fnp->mask)) { + newobj = NewStringObj(fnp->name, strlen(fnp->name)); + result = + Tcl_ListObjAppendElement(interp, newlist, newobj); + + /* + * Tcl_ListObjAppendElement is defined to return TCL_OK + * unless newlist isn't actually a list (or convertible + * into one). If this is the case, we screwed up badly + * somehow. + */ + DB_ASSERT(NULL, result == TCL_OK); + } + + return (newlist); +} + +int __debug_stop, __debug_on, __debug_print, __debug_test; + +/* + * PUBLIC: void _debug_check __P((void)); + */ +void +_debug_check() +{ + if (__debug_on == 0) + return; + + if (__debug_print != 0) { + printf("\r%7d:", __debug_on); + (void)fflush(stdout); + } + if (__debug_on++ == __debug_test || __debug_stop) + __db_loadme(); +} + +/* + * XXX + * Tcl 8.1+ Tcl_GetByteArrayFromObj/Tcl_GetIntFromObj bug. + * + * There is a bug in Tcl 8.1+ and byte arrays in that if it happens + * to use an object as both a byte array and something else like + * an int, and you've done a Tcl_GetByteArrayFromObj, then you + * do a Tcl_GetIntFromObj, your memory is deleted. + * + * Workaround is for all byte arrays we want to use, if it can be + * represented as an integer, we copy it so that we don't lose the + * memory. + */ +/* + * PUBLIC: int _CopyObjBytes __P((Tcl_Interp *, Tcl_Obj *obj, void *, + * PUBLIC: u_int32_t *, int *)); + */ +int +_CopyObjBytes(interp, obj, newp, sizep, freep) + Tcl_Interp *interp; + Tcl_Obj *obj; + void *newp; + u_int32_t *sizep; + int *freep; +{ + void *tmp, *new; + int i, len, ret; + + /* + * If the object is not an int, then just return the byte + * array because it won't be transformed out from under us. + * If it is a number, we need to copy it. + */ + *freep = 0; + ret = Tcl_GetIntFromObj(interp, obj, &i); + tmp = Tcl_GetByteArrayFromObj(obj, &len); + *sizep = (u_int32_t)len; + if (ret == TCL_ERROR) { + Tcl_ResetResult(interp); + *(void **)newp = tmp; + return (0); + } + + /* + * If we get here, we have an integer that might be reused + * at some other point so we cannot count on GetByteArray + * keeping our pointer valid. + */ + if ((ret = __os_malloc(NULL, (size_t)len, &new)) != 0) + return (ret); + memcpy(new, tmp, (size_t)len); + *(void **)newp = new; + *freep = 1; + return (0); +} diff --git a/lang/tcl/tcl_lock.c b/lang/tcl/tcl_lock.c new file mode 100644 index 00000000..9a0dd5cf --- /dev/null +++ b/lang/tcl/tcl_lock.c @@ -0,0 +1,862 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/tcl_db.h" + +/* + * Prototypes for procedures defined later in this file: + */ +#ifdef CONFIG_TEST +static int lock_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +static int _LockMode __P((Tcl_Interp *, Tcl_Obj *, db_lockmode_t *)); +static int _GetThisLock __P((Tcl_Interp *, DB_ENV *, u_int32_t, + u_int32_t, DBT *, db_lockmode_t, char *)); +static void _LockPutInfo __P((Tcl_Interp *, db_lockop_t, DB_LOCK *, + u_int32_t, DBT *)); + +/* + * tcl_LockDetect -- + * + * PUBLIC: int tcl_LockDetect __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_LockDetect(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + static const char *ldopts[] = { + "default", + "expire", + "maxlocks", + "maxwrites", + "minlocks", + "minwrites", + "oldest", + "random", + "youngest", + NULL + }; + enum ldopts { + LD_DEFAULT, + LD_EXPIRE, + LD_MAXLOCKS, + LD_MAXWRITES, + LD_MINLOCKS, + LD_MINWRITES, + LD_OLDEST, + LD_RANDOM, + LD_YOUNGEST + }; + u_int32_t flag, policy; + int i, optindex, result, ret; + + result = TCL_OK; + flag = policy = 0; + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], + ldopts, "option", TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(objv[i])); + i++; + switch ((enum ldopts)optindex) { + case LD_DEFAULT: + FLAG_CHECK(policy); + policy = DB_LOCK_DEFAULT; + break; + case LD_EXPIRE: + FLAG_CHECK(policy); + policy = DB_LOCK_EXPIRE; + break; + case LD_MAXLOCKS: + FLAG_CHECK(policy); + policy = DB_LOCK_MAXLOCKS; + break; + case LD_MAXWRITES: + FLAG_CHECK(policy); + policy = DB_LOCK_MAXWRITE; + break; + case LD_MINLOCKS: + FLAG_CHECK(policy); + policy = DB_LOCK_MINLOCKS; + break; + case LD_MINWRITES: + FLAG_CHECK(policy); + policy = DB_LOCK_MINWRITE; + break; + case LD_OLDEST: + FLAG_CHECK(policy); + policy = DB_LOCK_OLDEST; + break; + case LD_RANDOM: + FLAG_CHECK(policy); + policy = DB_LOCK_RANDOM; + break; + case LD_YOUNGEST: + FLAG_CHECK(policy); + policy = DB_LOCK_YOUNGEST; + break; + } + } + + _debug_check(); + ret = dbenv->lock_detect(dbenv, flag, policy, NULL); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "lock detect"); + return (result); +} + +/* + * tcl_LockGet -- + * + * PUBLIC: int tcl_LockGet __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_LockGet(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + static const char *lgopts[] = { + "-nowait", + NULL + }; + enum lgopts { + LGNOWAIT + }; + DBT obj; + Tcl_Obj *res; + void *otmp; + db_lockmode_t mode; + u_int32_t flag, lockid; + int freeobj, optindex, result, ret; + char newname[MSG_SIZE]; + + result = TCL_OK; + freeobj = 0; + memset(newname, 0, MSG_SIZE); + if (objc != 5 && objc != 6) { + Tcl_WrongNumArgs(interp, 2, objv, "?-nowait? mode id obj"); + return (TCL_ERROR); + } + /* + * Work back from required args. + * Last arg is obj. + * Second last is lock id. + * Third last is lock mode. + */ + memset(&obj, 0, sizeof(obj)); + + if ((result = + _GetUInt32(interp, objv[objc-2], &lockid)) != TCL_OK) + return (result); + + ret = _CopyObjBytes(interp, objv[objc-1], &otmp, + &obj.size, &freeobj); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "lock get"); + return (result); + } + obj.data = otmp; + if ((result = _LockMode(interp, objv[(objc - 3)], &mode)) != TCL_OK) + goto out; + + /* + * Any left over arg is the flag. + */ + flag = 0; + if (objc == 6) { + if (Tcl_GetIndexFromObj(interp, objv[(objc - 4)], + lgopts, "option", TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(objv[(objc - 4)])); + switch ((enum lgopts)optindex) { + case LGNOWAIT: + flag |= DB_LOCK_NOWAIT; + break; + } + } + + result = _GetThisLock(interp, dbenv, lockid, flag, &obj, mode, newname); + if (result == TCL_OK) { + res = NewStringObj(newname, strlen(newname)); + Tcl_SetObjResult(interp, res); + } +out: + if (freeobj) + __os_free(dbenv->env, otmp); + return (result); +} + +/* + * tcl_LockStat -- + * + * PUBLIC: int tcl_LockStat __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_LockStat(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + DB_LOCK_STAT *sp; + Tcl_Obj *res; + int result, ret; + + result = TCL_OK; + /* + * No args for this. Error if there are some. + */ + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbenv->lock_stat(dbenv, &sp, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "lock stat"); + if (result == TCL_ERROR) + return (result); + /* + * Have our stats, now construct the name value + * list pairs and free up the memory. + */ + res = Tcl_NewObj(); +#ifdef HAVE_STATISTICS + /* + * MAKE_STAT_LIST assumes 'res' and 'error' label. + */ + MAKE_STAT_LIST("Region size", sp->st_regsize); + MAKE_STAT_LIST("Last allocated locker ID", sp->st_id); + MAKE_STAT_LIST("Current maximum unused locker ID", sp->st_cur_maxid); + MAKE_STAT_LIST("Initial locks", sp->st_initlocks); + MAKE_STAT_LIST("Allocated locks", sp->st_locks); + MAKE_STAT_LIST("Maximum locks", sp->st_maxlocks); + MAKE_STAT_LIST("Initial lockers", sp->st_initlockers); + MAKE_STAT_LIST("Allocated lockers", sp->st_lockers); + MAKE_STAT_LIST("Maximum lockers", sp->st_maxlockers); + MAKE_STAT_LIST("Initial objects", sp->st_initobjects); + MAKE_STAT_LIST("Allocated objects", sp->st_objects); + MAKE_STAT_LIST("Maximum objects", sp->st_maxobjects); + MAKE_STAT_LIST("Lock modes", sp->st_nmodes); + MAKE_STAT_LIST("Size of object hash table", sp->st_tablesize); + MAKE_STAT_LIST("Number of lock table partitions", sp->st_partitions); + MAKE_STAT_LIST("Current number of locks", sp->st_nlocks); + MAKE_STAT_LIST("Maximum number of locks so far", sp->st_maxnlocks); + MAKE_STAT_LIST("Maximum number of locks in any hash bucket", + sp->st_maxhlocks); + MAKE_WSTAT_LIST("Maximum number of lock steals for an empty partition", + sp->st_locksteals); + MAKE_WSTAT_LIST("Maximum number lock steals in any partition", + sp->st_maxlsteals); + MAKE_STAT_LIST("Current number of lockers", sp->st_nlockers); + MAKE_STAT_LIST("Maximum number of lockers so far", sp->st_maxnlockers); + MAKE_STAT_LIST("Current number of objects", sp->st_nobjects); + MAKE_STAT_LIST("Maximum number of objects so far", sp->st_maxnobjects); + MAKE_STAT_LIST("Maximum number of objects in any hash bucket", + sp->st_maxhobjects); + MAKE_WSTAT_LIST("Maximum number of object steals for an empty partition", + sp->st_objectsteals); + MAKE_WSTAT_LIST("Maximum number object steals in any partition", + sp->st_maxosteals); + MAKE_WSTAT_LIST("Lock requests", sp->st_nrequests); + MAKE_WSTAT_LIST("Lock releases", sp->st_nreleases); + MAKE_WSTAT_LIST("Lock upgrades", sp->st_nupgrade); + MAKE_WSTAT_LIST("Lock downgrades", sp->st_ndowngrade); + MAKE_STAT_LIST("Number of conflicted locks for which we waited", + sp->st_lock_wait); + MAKE_STAT_LIST("Number of conflicted locks for which we did not wait", + sp->st_lock_nowait); + MAKE_WSTAT_LIST("Deadlocks detected", sp->st_ndeadlocks); + MAKE_WSTAT_LIST("Number of region lock waits", sp->st_region_wait); + MAKE_WSTAT_LIST("Number of region lock nowaits", sp->st_region_nowait); + MAKE_WSTAT_LIST("Number of object allocation waits", sp->st_objs_wait); + MAKE_STAT_LIST("Number of object allocation nowaits", + sp->st_objs_nowait); + MAKE_STAT_LIST("Number of locker allocation waits", + sp->st_lockers_wait); + MAKE_STAT_LIST("Number of locker allocation nowaits", + sp->st_lockers_nowait); + MAKE_WSTAT_LIST("Maximum hash bucket length", sp->st_hash_len); + MAKE_STAT_LIST("Lock timeout value", sp->st_locktimeout); + MAKE_WSTAT_LIST("Number of lock timeouts", sp->st_nlocktimeouts); + MAKE_STAT_LIST("Transaction timeout value", sp->st_txntimeout); + MAKE_WSTAT_LIST("Number of transaction timeouts", sp->st_ntxntimeouts); + MAKE_WSTAT_LIST("Number lock partition mutex waits", sp->st_part_wait); + MAKE_STAT_LIST("Number lock partition mutex nowaits", + sp->st_part_nowait); + MAKE_STAT_LIST("Maximum number waits on any lock partition mutex", + sp->st_part_max_wait); + MAKE_STAT_LIST("Maximum number nowaits on any lock partition mutex", + sp->st_part_max_nowait); +#endif + Tcl_SetObjResult(interp, res); +error: + __os_ufree(dbenv->env, sp); + return (result); +} + +/* + * tcl_LockStatPrint -- + * + * PUBLIC: int tcl_LockStatPrint __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_LockStatPrint(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + static const char *lkstatprtopts[] = { + "-all", + "-clear", + "-lk_conf", + "-lk_lockers", + "-lk_objects", + "-lk_params", + NULL + }; + enum lkstatprtopts { + LKSTATPRTALL, + LKSTATPRTCLEAR, + LKSTATPRTCONF, + LKSTATPRTLOCKERS, + LKSTATPRTOBJECTS, + LKSTATPRTPARAMS + }; + u_int32_t flag; + int i, optindex, result, ret; + + result = TCL_OK; + flag = 0; + i = 2; + + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], lkstatprtopts, + "option", TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum lkstatprtopts)optindex) { + case LKSTATPRTALL: + flag |= DB_STAT_ALL; + break; + case LKSTATPRTCLEAR: + flag |= DB_STAT_CLEAR; + break; + case LKSTATPRTCONF: + flag |= DB_STAT_LOCK_CONF; + break; + case LKSTATPRTLOCKERS: + flag |= DB_STAT_LOCK_LOCKERS; + break; + case LKSTATPRTOBJECTS: + flag |= DB_STAT_LOCK_OBJECTS; + break; + case LKSTATPRTPARAMS: + flag |= DB_STAT_LOCK_PARAMS; + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto error; + + _debug_check(); + ret = dbenv->lock_stat_print(dbenv, flag); + result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "dbenv lock_stat_print"); +error: + return (result); + +} + +/* + * tcl_LockTimeout -- + * + * PUBLIC: int tcl_LockTimeout __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_LockTimeout(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + long timeout; + int result, ret; + + /* + * One arg, the timeout. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?timeout?"); + return (TCL_ERROR); + } + result = Tcl_GetLongFromObj(interp, objv[2], &timeout); + if (result != TCL_OK) + return (result); + _debug_check(); + ret = dbenv->set_timeout(dbenv, (u_int32_t)timeout, + DB_SET_LOCK_TIMEOUT); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "lock timeout"); + return (result); +} + +/* + * lock_Cmd -- + * Implements the "lock" widget. + */ +static int +lock_Cmd(clientData, interp, objc, objv) + ClientData clientData; /* Lock handle */ + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *lkcmds[] = { + "put", + NULL + }; + enum lkcmds { + LKPUT + }; + DB_ENV *dbenv; + DB_LOCK *lock; + DBTCL_INFO *lkip; + int cmdindex, result, ret; + + Tcl_ResetResult(interp); + lock = (DB_LOCK *)clientData; + lkip = _PtrToInfo((void *)lock); + result = TCL_OK; + + if (lock == NULL) { + Tcl_SetResult(interp, "NULL lock", TCL_STATIC); + return (TCL_ERROR); + } + if (lkip == NULL) { + Tcl_SetResult(interp, "NULL lock info pointer", TCL_STATIC); + return (TCL_ERROR); + } + + dbenv = NAME_TO_ENV(lkip->i_parent->i_name); + /* + * No args for this. Error if there are some. + */ + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + /* + * Get the command name index from the object based on the dbcmds + * defined above. + */ + if (Tcl_GetIndexFromObj(interp, + objv[1], lkcmds, "command", TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + + switch ((enum lkcmds)cmdindex) { + case LKPUT: + _debug_check(); + ret = dbenv->lock_put(dbenv, lock); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "lock put"); + (void)Tcl_DeleteCommand(interp, lkip->i_name); + _DeleteInfo(lkip); + __os_free(dbenv->env, lock); + break; + } + return (result); +} + +/* + * tcl_LockVec -- + * + * PUBLIC: int tcl_LockVec __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_LockVec(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* environment pointer */ +{ + static const char *lvopts[] = { + "-nowait", + NULL + }; + enum lvopts { + LVNOWAIT + }; + static const char *lkops[] = { + "get", + "put", + "put_all", + "put_obj", + "timeout", + NULL + }; + enum lkops { + LKGET, + LKPUT, + LKPUTALL, + LKPUTOBJ, + LKTIMEOUT + }; + + DB_LOCK *lock; + DB_LOCKREQ list; + DBT obj; + Tcl_Obj **myobjv, *res, *thisop; + void *otmp; + u_int32_t flag, lockid; + int freeobj, i, myobjc, optindex, result, ret; + char *lockname, msg[MSG_SIZE], newname[MSG_SIZE]; + + result = TCL_OK; + memset(newname, 0, MSG_SIZE); + memset(&list, 0, sizeof(DB_LOCKREQ)); + memset(&obj, 0, sizeof(DBT)); + flag = 0; + freeobj = 0; + otmp = NULL; + + /* + * If -nowait is given, it MUST be first arg. + */ + if (Tcl_GetIndexFromObj(interp, objv[2], + lvopts, "option", TCL_EXACT, &optindex) == TCL_OK) { + switch ((enum lvopts)optindex) { + case LVNOWAIT: + flag |= DB_LOCK_NOWAIT; + break; + } + i = 3; + } else { + if (IS_HELP(objv[2]) == TCL_OK) + return (TCL_OK); + Tcl_ResetResult(interp); + i = 2; + } + + /* + * Our next arg MUST be the locker ID. + */ + result = _GetUInt32(interp, objv[i++], &lockid); + if (result != TCL_OK) + return (result); + + /* + * All other remaining args are operation tuples. + * Go through sequentially to decode, execute and build + * up list of return values. + */ + res = Tcl_NewListObj(0, NULL); + while (i < objc) { + /* + * Get the list of the tuple. + */ + lock = NULL; + result = Tcl_ListObjGetElements(interp, objv[i], + &myobjc, &myobjv); + if (result == TCL_OK) + i++; + else + break; + /* + * First we will set up the list of requests. + * We will make a "second pass" after we get back + * the results from the lock_vec call to create + * the return list. + */ + if (Tcl_GetIndexFromObj(interp, myobjv[0], + lkops, "option", TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(myobjv[0]); + goto error; + } + switch ((enum lkops)optindex) { + case LKGET: + if (myobjc != 3) { + Tcl_WrongNumArgs(interp, 1, myobjv, + "{get obj mode}"); + result = TCL_ERROR; + goto error; + } + result = _LockMode(interp, myobjv[2], &list.mode); + if (result != TCL_OK) + goto error; + ret = _CopyObjBytes(interp, myobjv[1], &otmp, + &obj.size, &freeobj); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "lock vec"); + return (result); + } + obj.data = otmp; + ret = _GetThisLock(interp, dbenv, lockid, flag, + &obj, list.mode, newname); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "lock vec"); + thisop = Tcl_NewIntObj(ret); + (void)Tcl_ListObjAppendElement(interp, res, + thisop); + goto error; + } + thisop = NewStringObj(newname, strlen(newname)); + (void)Tcl_ListObjAppendElement(interp, res, thisop); + if (freeobj && otmp != NULL) { + __os_free(dbenv->env, otmp); + freeobj = 0; + } + continue; + case LKPUT: + if (myobjc != 2) { + Tcl_WrongNumArgs(interp, 1, myobjv, + "{put lock}"); + result = TCL_ERROR; + goto error; + } + list.op = DB_LOCK_PUT; + lockname = Tcl_GetStringFromObj(myobjv[1], NULL); + lock = NAME_TO_LOCK(lockname); + if (lock == NULL) { + snprintf(msg, MSG_SIZE, "Invalid lock: %s\n", + lockname); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + goto error; + } + list.lock = *lock; + break; + case LKPUTALL: + if (myobjc != 1) { + Tcl_WrongNumArgs(interp, 1, myobjv, + "{put_all}"); + result = TCL_ERROR; + goto error; + } + list.op = DB_LOCK_PUT_ALL; + break; + case LKPUTOBJ: + if (myobjc != 2) { + Tcl_WrongNumArgs(interp, 1, myobjv, + "{put_obj obj}"); + result = TCL_ERROR; + goto error; + } + list.op = DB_LOCK_PUT_OBJ; + ret = _CopyObjBytes(interp, myobjv[1], &otmp, + &obj.size, &freeobj); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "lock vec"); + return (result); + } + obj.data = otmp; + list.obj = &obj; + break; + case LKTIMEOUT: + list.op = DB_LOCK_TIMEOUT; + break; + + } + /* + * We get here, we have set up our request, now call + * lock_vec. + */ + _debug_check(); + ret = dbenv->lock_vec(dbenv, lockid, flag, &list, 1, NULL); + /* + * Now deal with whether or not the operation succeeded. + * Get's were done above, all these are only puts. + */ + thisop = Tcl_NewIntObj(ret); + result = Tcl_ListObjAppendElement(interp, res, thisop); + if (ret != 0 && result == TCL_OK) + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "lock put"); + if (freeobj && otmp != NULL) { + __os_free(dbenv->env, otmp); + freeobj = 0; + } + /* + * We did a put of some kind. Since we did that, + * we have to delete the commands associated with + * any of the locks we just put. + */ + _LockPutInfo(interp, list.op, lock, lockid, &obj); + } + + if (result == TCL_OK && res) + Tcl_SetObjResult(interp, res); +error: + return (result); +} + +static int +_LockMode(interp, obj, mode) + Tcl_Interp *interp; + Tcl_Obj *obj; + db_lockmode_t *mode; +{ + static const char *lkmode[] = { + "ng", + "read", + "write", + "iwrite", + "iread", + "iwr", + NULL + }; + enum lkmode { + LK_NG, + LK_READ, + LK_WRITE, + LK_IWRITE, + LK_IREAD, + LK_IWR + }; + int optindex; + + if (Tcl_GetIndexFromObj(interp, obj, lkmode, "option", + TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(obj)); + switch ((enum lkmode)optindex) { + case LK_NG: + *mode = DB_LOCK_NG; + break; + case LK_READ: + *mode = DB_LOCK_READ; + break; + case LK_WRITE: + *mode = DB_LOCK_WRITE; + break; + case LK_IREAD: + *mode = DB_LOCK_IREAD; + break; + case LK_IWRITE: + *mode = DB_LOCK_IWRITE; + break; + case LK_IWR: + *mode = DB_LOCK_IWR; + break; + } + return (TCL_OK); +} + +static void +_LockPutInfo(interp, op, lock, lockid, objp) + Tcl_Interp *interp; + db_lockop_t op; + DB_LOCK *lock; + u_int32_t lockid; + DBT *objp; +{ + DBTCL_INFO *p, *nextp; + int found; + + for (p = LIST_FIRST(&__db_infohead); p != NULL; p = nextp) { + found = 0; + nextp = LIST_NEXT(p, entries); + if ((op == DB_LOCK_PUT && (p->i_lock == lock)) || + (op == DB_LOCK_PUT_ALL && p->i_locker == lockid) || + (op == DB_LOCK_PUT_OBJ && p->i_lockobj.data && + memcmp(p->i_lockobj.data, objp->data, objp->size) == 0)) + found = 1; + if (found) { + (void)Tcl_DeleteCommand(interp, p->i_name); + __os_free(NULL, p->i_lock); + _DeleteInfo(p); + } + } +} + +static int +_GetThisLock(interp, dbenv, lockid, flag, objp, mode, newname) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Env handle */ + u_int32_t lockid; /* Locker ID */ + u_int32_t flag; /* Lock flag */ + DBT *objp; /* Object to lock */ + db_lockmode_t mode; /* Lock mode */ + char *newname; /* New command name */ +{ + DBTCL_INFO *envip, *ip; + DB_LOCK *lock; + int result, ret; + + result = TCL_OK; + envip = _PtrToInfo((void *)dbenv); + if (envip == NULL) { + Tcl_SetResult(interp, "Could not find env info\n", TCL_STATIC); + return (TCL_ERROR); + } + snprintf(newname, MSG_SIZE, "%s.lock%d", + envip->i_name, envip->i_envlockid); + ip = _NewInfo(interp, NULL, newname, I_LOCK); + if (ip == NULL) { + Tcl_SetResult(interp, "Could not set up info", + TCL_STATIC); + return (TCL_ERROR); + } + ret = __os_malloc(dbenv->env, sizeof(DB_LOCK), &lock); + if (ret != 0) { + Tcl_SetResult(interp, db_strerror(ret), TCL_STATIC); + return (TCL_ERROR); + } + _debug_check(); + ret = dbenv->lock_get(dbenv, lockid, flag, objp, mode, lock); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "lock get"); + if (result == TCL_ERROR) { + __os_free(dbenv->env, lock); + _DeleteInfo(ip); + return (result); + } + /* + * Success. Set up return. Set up new info + * and command widget for this lock. + */ + ret = __os_malloc(dbenv->env, objp->size, &ip->i_lockobj.data); + if (ret != 0) { + Tcl_SetResult(interp, "Could not duplicate obj", + TCL_STATIC); + (void)dbenv->lock_put(dbenv, lock); + __os_free(dbenv->env, lock); + _DeleteInfo(ip); + result = TCL_ERROR; + goto error; + } + memcpy(ip->i_lockobj.data, objp->data, objp->size); + ip->i_lockobj.size = objp->size; + envip->i_envlockid++; + ip->i_parent = envip; + ip->i_locker = lockid; + _SetInfoData(ip, lock); + (void)Tcl_CreateObjCommand(interp, newname, + (Tcl_ObjCmdProc *)lock_Cmd, (ClientData)lock, NULL); +error: + return (result); +} +#endif diff --git a/lang/tcl/tcl_log.c b/lang/tcl/tcl_log.c new file mode 100644 index 00000000..3a876e95 --- /dev/null +++ b/lang/tcl/tcl_log.c @@ -0,0 +1,824 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/log.h" +#include "dbinc/tcl_db.h" + +#ifdef CONFIG_TEST +static int tcl_LogcGet __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_LOGC *)); + +/* + * tcl_LogArchive -- + * + * PUBLIC: int tcl_LogArchive __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_LogArchive(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + static const char *archopts[] = { + "-arch_abs", "-arch_data", "-arch_log", "-arch_remove", + NULL + }; + enum archopts { + ARCH_ABS, ARCH_DATA, ARCH_LOG, ARCH_REMOVE + }; + Tcl_Obj *fileobj, *res; + u_int32_t flag; + int i, optindex, result, ret; + char **file, **list; + + result = TCL_OK; + flag = 0; + /* + * Get the flag index from the object based on the options + * defined above. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], + archopts, "option", TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(objv[i])); + i++; + switch ((enum archopts)optindex) { + case ARCH_ABS: + flag |= DB_ARCH_ABS; + break; + case ARCH_DATA: + flag |= DB_ARCH_DATA; + break; + case ARCH_LOG: + flag |= DB_ARCH_LOG; + break; + case ARCH_REMOVE: + flag |= DB_ARCH_REMOVE; + break; + } + } + _debug_check(); + list = NULL; + ret = dbenv->log_archive(dbenv, &list, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "log archive"); + if (result == TCL_OK) { + res = Tcl_NewListObj(0, NULL); + for (file = list; file != NULL && *file != NULL; file++) { + fileobj = NewStringObj(*file, strlen(*file)); + result = Tcl_ListObjAppendElement(interp, res, fileobj); + if (result != TCL_OK) + break; + } + Tcl_SetObjResult(interp, res); + } + if (list != NULL) + __os_ufree(dbenv->env, list); + return (result); +} + +/* + * tcl_LogCompare -- + * + * PUBLIC: int tcl_LogCompare __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*)); + */ +int +tcl_LogCompare(interp, objc, objv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + DB_LSN lsn0, lsn1; + Tcl_Obj *res; + int result, ret; + + result = TCL_OK; + /* + * No flags, must be 4 args. + */ + if (objc != 4) { + Tcl_WrongNumArgs(interp, 2, objv, "lsn1 lsn2"); + return (TCL_ERROR); + } + + result = _GetLsn(interp, objv[2], &lsn0); + if (result == TCL_ERROR) + return (result); + result = _GetLsn(interp, objv[3], &lsn1); + if (result == TCL_ERROR) + return (result); + + _debug_check(); + ret = log_compare(&lsn0, &lsn1); + res = Tcl_NewIntObj(ret); + Tcl_SetObjResult(interp, res); + return (result); +} + +/* + * tcl_LogFile -- + * + * PUBLIC: int tcl_LogFile __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_LogFile(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + DB_LSN lsn; + Tcl_Obj *res; + size_t len; + int result, ret; + char *name; + + result = TCL_OK; + /* + * No flags, must be 3 args. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "lsn"); + return (TCL_ERROR); + } + + result = _GetLsn(interp, objv[2], &lsn); + if (result == TCL_ERROR) + return (result); + + len = MSG_SIZE; + ret = ENOMEM; + name = NULL; + while (ret == ENOMEM) { + if (name != NULL) + __os_free(dbenv->env, name); + ret = __os_malloc(dbenv->env, len, &name); + if (ret != 0) { + Tcl_SetResult(interp, db_strerror(ret), TCL_STATIC); + break; + } + _debug_check(); + ret = dbenv->log_file(dbenv, &lsn, name, len); + len *= 2; + } + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "log_file"); + if (ret == 0) { + res = NewStringObj(name, strlen(name)); + Tcl_SetObjResult(interp, res); + } + + if (name != NULL) + __os_free(dbenv->env, name); + + return (result); +} + +/* + * tcl_LogFlush -- + * + * PUBLIC: int tcl_LogFlush __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_LogFlush(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + DB_LSN lsn, *lsnp; + int result, ret; + + result = TCL_OK; + /* + * No flags, must be 2 or 3 args. + */ + if (objc > 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?lsn?"); + return (TCL_ERROR); + } + + if (objc == 3) { + lsnp = &lsn; + result = _GetLsn(interp, objv[2], &lsn); + if (result == TCL_ERROR) + return (result); + } else + lsnp = NULL; + + _debug_check(); + ret = dbenv->log_flush(dbenv, lsnp); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "log_flush"); + return (result); +} + +/* + * tcl_LogGet -- + * + * PUBLIC: int tcl_LogGet __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_LogGet(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + + COMPQUIET(objv, NULL); + COMPQUIET(objc, 0); + COMPQUIET(dbenv, NULL); + + Tcl_SetResult(interp, "FAIL: log_get deprecated\n", TCL_STATIC); + return (TCL_ERROR); +} + +/* + * tcl_LogPut -- + * + * PUBLIC: int tcl_LogPut __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_LogPut(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + static const char *logputopts[] = { + "-flush", + NULL + }; + enum logputopts { + LOGPUT_FLUSH + }; + DB_LSN lsn; + DBT data; + Tcl_Obj *intobj, *res; + void *dtmp; + u_int32_t flag; + int freedata, optindex, result, ret; + + result = TCL_OK; + flag = 0; + freedata = 0; + if (objc < 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?-args? record"); + return (TCL_ERROR); + } + + /* + * Data/record must be the last arg. + */ + memset(&data, 0, sizeof(data)); + ret = _CopyObjBytes(interp, objv[objc-1], &dtmp, + &data.size, &freedata); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "log put"); + return (result); + } + data.data = dtmp; + + /* + * Get the command name index from the object based on the options + * defined above. + */ + if (objc == 4) { + if (Tcl_GetIndexFromObj(interp, objv[2], + logputopts, "option", TCL_EXACT, &optindex) != TCL_OK) { + return (IS_HELP(objv[2])); + } + switch ((enum logputopts)optindex) { + case LOGPUT_FLUSH: + flag = DB_FLUSH; + break; + } + } + + if (result == TCL_ERROR) + return (result); + + _debug_check(); + ret = dbenv->log_put(dbenv, &lsn, &data, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "log_put"); + if (result == TCL_ERROR) + return (result); + res = Tcl_NewListObj(0, NULL); + intobj = Tcl_NewWideIntObj((Tcl_WideInt)lsn.file); + result = Tcl_ListObjAppendElement(interp, res, intobj); + intobj = Tcl_NewWideIntObj((Tcl_WideInt)lsn.offset); + result = Tcl_ListObjAppendElement(interp, res, intobj); + Tcl_SetObjResult(interp, res); + if (freedata) + __os_free(NULL, dtmp); + return (result); +} +/* + * tcl_LogStat -- + * + * PUBLIC: int tcl_LogStat __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_LogStat(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + DB_LOG_STAT *sp; + Tcl_Obj *res; + int result, ret; + + result = TCL_OK; + /* + * No args for this. Error if there are some. + */ + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbenv->log_stat(dbenv, &sp, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "log stat"); + if (result == TCL_ERROR) + return (result); + + /* + * Have our stats, now construct the name value + * list pairs and free up the memory. + */ + res = Tcl_NewObj(); + /* + * MAKE_STAT_LIST assumes 'res' and 'error' label. + */ +#ifdef HAVE_STATISTICS + MAKE_STAT_LIST("Magic", sp->st_magic); + MAKE_STAT_LIST("Log file Version", sp->st_version); + MAKE_STAT_LIST("Region size", sp->st_regsize); + MAKE_STAT_LIST("Log file mode", sp->st_mode); + MAKE_STAT_LIST("Log record cache size", sp->st_lg_bsize); + MAKE_STAT_LIST("Current log file size", sp->st_lg_size); + MAKE_STAT_LIST("Initial fileid allocation", sp->st_fileid_init); + MAKE_STAT_LIST("Current fileids in use", sp->st_nfileid); + MAKE_STAT_LIST("Maximum fileids ever used", sp->st_maxnfileid); + MAKE_WSTAT_LIST("Log file records written", sp->st_record); + MAKE_STAT_LIST("Mbytes written", sp->st_w_mbytes); + MAKE_STAT_LIST("Bytes written (over Mb)", sp->st_w_bytes); + MAKE_STAT_LIST("Mbytes written since checkpoint", sp->st_wc_mbytes); + MAKE_STAT_LIST("Bytes written (over Mb) since checkpoint", + sp->st_wc_bytes); + MAKE_WSTAT_LIST("Times log written", sp->st_wcount); + MAKE_STAT_LIST("Times log written because cache filled up", + sp->st_wcount_fill); + MAKE_WSTAT_LIST("Times log read from disk", sp->st_rcount); + MAKE_WSTAT_LIST("Times log flushed to disk", sp->st_scount); + MAKE_STAT_LIST("Current log file number", sp->st_cur_file); + MAKE_STAT_LIST("Current log file offset", sp->st_cur_offset); + MAKE_STAT_LIST("On-disk log file number", sp->st_disk_file); + MAKE_STAT_LIST("On-disk log file offset", sp->st_disk_offset); + MAKE_STAT_LIST("Max commits in a log flush", sp->st_maxcommitperflush); + MAKE_STAT_LIST("Min commits in a log flush", sp->st_mincommitperflush); + MAKE_WSTAT_LIST("Number of region lock waits", sp->st_region_wait); + MAKE_WSTAT_LIST("Number of region lock nowaits", sp->st_region_nowait); +#endif + Tcl_SetObjResult(interp, res); +error: + __os_ufree(dbenv->env, sp); + return (result); +} + +/* + * tcl_LogStatPrint -- + * + * PUBLIC: int tcl_LogStatPrint __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_LogStatPrint(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ static const char *logstatprtopts[] = { + "-all", + "-clear", + NULL + }; + enum logstatprtopts { + LOGSTATPRTALL, + LOGSTATPRTCLEAR + }; + u_int32_t flag; + int i, optindex, result, ret; + + result = TCL_OK; + flag = 0; + i = 2; + + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], logstatprtopts, + "option", TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum logstatprtopts)optindex) { + case LOGSTATPRTALL: + flag |= DB_STAT_ALL; + break; + case LOGSTATPRTCLEAR: + flag |= DB_STAT_CLEAR; + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto error; + + _debug_check(); + ret = dbenv->log_stat_print(dbenv, flag); + result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "dbenv log_stat_print"); +error: + return (result); + +} + +/* + * logc_Cmd -- + * Implements the log cursor command. + * + * PUBLIC: int logc_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); + */ +int +logc_Cmd(clientData, interp, objc, objv) + ClientData clientData; /* Cursor handle */ + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *logccmds[] = { + "close", + "get", + "version", + NULL + }; + enum logccmds { + LOGCCLOSE, + LOGCGET, + LOGCVERSION + }; + DB_LOGC *logc; + DBTCL_INFO *logcip; + Tcl_Obj *res; + u_int32_t version; + int cmdindex, result, ret; + + Tcl_ResetResult(interp); + logc = (DB_LOGC *)clientData; + logcip = _PtrToInfo((void *)logc); + result = TCL_OK; + + if (objc <= 1) { + Tcl_WrongNumArgs(interp, 1, objv, "command cmdargs"); + return (TCL_ERROR); + } + if (logc == NULL) { + Tcl_SetResult(interp, "NULL logc pointer", TCL_STATIC); + return (TCL_ERROR); + } + if (logcip == NULL) { + Tcl_SetResult(interp, "NULL logc info pointer", TCL_STATIC); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the berkdbcmds + * defined above. + */ + if (Tcl_GetIndexFromObj(interp, objv[1], logccmds, "command", + TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + switch ((enum logccmds)cmdindex) { + case LOGCCLOSE: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = logc->close(logc, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "logc close"); + if (result == TCL_OK) { + (void)Tcl_DeleteCommand(interp, logcip->i_name); + _DeleteInfo(logcip); + } + break; + case LOGCGET: + result = tcl_LogcGet(interp, objc, objv, logc); + break; + case LOGCVERSION: + /* + * No args for this. Error if there are some. + */ + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = logc->version(logc, &version, 0); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "logc version")) == TCL_OK) { + res = Tcl_NewIntObj((int)version); + Tcl_SetObjResult(interp, res); + } + break; + } + + return (result); +} + +static int +tcl_LogcGet(interp, objc, objv, logc) + Tcl_Interp *interp; + int objc; + Tcl_Obj * CONST *objv; + DB_LOGC *logc; +{ + static const char *logcgetopts[] = { + "-current", + "-first", + "-last", + "-next", + "-prev", + "-set", + NULL + }; + enum logcgetopts { + LOGCGET_CURRENT, + LOGCGET_FIRST, + LOGCGET_LAST, + LOGCGET_NEXT, + LOGCGET_PREV, + LOGCGET_SET + }; + DB_LSN lsn; + DBT data; + Tcl_Obj *dataobj, *lsnlist, *myobjv[2], *res; + u_int32_t flag; + int i, myobjc, optindex, result, ret; + + result = TCL_OK; + res = NULL; + flag = 0; + + if (objc < 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?-args? lsn"); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the options + * defined above. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], + logcgetopts, "option", TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(objv[i])); + i++; + switch ((enum logcgetopts)optindex) { + case LOGCGET_CURRENT: + FLAG_CHECK(flag); + flag |= DB_CURRENT; + break; + case LOGCGET_FIRST: + FLAG_CHECK(flag); + flag |= DB_FIRST; + break; + case LOGCGET_LAST: + FLAG_CHECK(flag); + flag |= DB_LAST; + break; + case LOGCGET_NEXT: + FLAG_CHECK(flag); + flag |= DB_NEXT; + break; + case LOGCGET_PREV: + FLAG_CHECK(flag); + flag |= DB_PREV; + break; + case LOGCGET_SET: + FLAG_CHECK(flag); + flag |= DB_SET; + if (i == objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-set lsn?"); + result = TCL_ERROR; + break; + } + result = _GetLsn(interp, objv[i++], &lsn); + break; + } + } + + if (result == TCL_ERROR) + return (result); + + memset(&data, 0, sizeof(data)); + + _debug_check(); + ret = logc->get(logc, &lsn, &data, flag); + + res = Tcl_NewListObj(0, NULL); + if (res == NULL) + goto memerr; + + if (ret == 0) { + /* + * Success. Set up return list as {LSN data} where LSN + * is a sublist {file offset}. + */ + myobjc = 2; + myobjv[0] = Tcl_NewWideIntObj((Tcl_WideInt)lsn.file); + myobjv[1] = Tcl_NewWideIntObj((Tcl_WideInt)lsn.offset); + lsnlist = Tcl_NewListObj(myobjc, myobjv); + if (lsnlist == NULL) + goto memerr; + + result = Tcl_ListObjAppendElement(interp, res, lsnlist); + dataobj = NewStringObj(data.data, data.size); + if (dataobj == NULL) { + goto memerr; + } + result = Tcl_ListObjAppendElement(interp, res, dataobj); + } else + result = _ReturnSetup(interp, ret, DB_RETOK_LGGET(ret), + "DB_LOGC->get"); + + Tcl_SetObjResult(interp, res); + + if (0) { +memerr: if (res != NULL) { + Tcl_DecrRefCount(res); + } + Tcl_SetResult(interp, "allocation failed", TCL_STATIC); + } + + return (result); +} + +static const char *confwhich[] = { + "autoremove", + "direct", + "dsync", + "inmemory", + "zero", + NULL +}; +enum logwhich { + LOGCONF_AUTO, + LOGCONF_DIRECT, + LOGCONF_DSYNC, + LOGCONF_INMEMORY, + LOGCONF_ZERO +}; + +/* + * tcl_LogConfig -- + * Call DB_ENV->log_set_config(). + * + * PUBLIC: int tcl_LogConfig + * PUBLIC: __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *, Tcl_Obj *)); + */ +int +tcl_LogConfig(interp, dbenv, which, onoff) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Environment pointer */ + Tcl_Obj *which; /* {which on|off} */ + Tcl_Obj *onoff; +{ + static const char *confonoff[] = { + "off", + "on", + NULL + }; + enum confonoff { + LOGCONF_OFF, + LOGCONF_ON + }; + int on, optindex, ret; + u_int32_t wh; + + if (Tcl_GetIndexFromObj(interp, + which, confwhich, "option", TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(which)); + + switch ((enum logwhich)optindex) { + case LOGCONF_AUTO: + wh = DB_LOG_AUTO_REMOVE; + break; + case LOGCONF_DIRECT: + wh = DB_LOG_DIRECT; + break; + case LOGCONF_DSYNC: + wh = DB_LOG_DSYNC; + break; + case LOGCONF_INMEMORY: + wh = DB_LOG_IN_MEMORY; + break; + case LOGCONF_ZERO: + wh = DB_LOG_ZERO; + break; + default: + return (TCL_ERROR); + } + if (Tcl_GetIndexFromObj(interp, + onoff, confonoff, "option", TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(onoff)); + switch ((enum confonoff)optindex) { + case LOGCONF_OFF: + on = 0; + break; + case LOGCONF_ON: + on = 1; + break; + default: + return (TCL_ERROR); + } + ret = dbenv->log_set_config(dbenv, wh, on); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env rep_config")); +} + +/* + * tcl_LogGetConfig -- + * Call DB_ENV->rep_get_config(). + * + * PUBLIC: int tcl_LogGetConfig + * PUBLIC: __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *)); + */ +int +tcl_LogGetConfig(interp, dbenv, which) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Environment pointer */ + Tcl_Obj *which; /* which flag */ +{ + Tcl_Obj *res; + int on, optindex, result, ret; + u_int32_t wh; + + if (Tcl_GetIndexFromObj(interp, which, confwhich, "option", + TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(which)); + + res = NULL; + switch ((enum logwhich)optindex) { + case LOGCONF_AUTO: + wh = DB_LOG_AUTO_REMOVE; + break; + case LOGCONF_DIRECT: + wh = DB_LOG_DIRECT; + break; + case LOGCONF_DSYNC: + wh = DB_LOG_DSYNC; + break; + case LOGCONF_INMEMORY: + wh = DB_LOG_IN_MEMORY; + break; + case LOGCONF_ZERO: + wh = DB_LOG_ZERO; + break; + default: + return (TCL_ERROR); + } + ret = dbenv->log_get_config(dbenv, wh, &on); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env log_config")) == TCL_OK) { + res = Tcl_NewIntObj(on); + Tcl_SetObjResult(interp, res); + } + return (result); +} +#endif diff --git a/lang/tcl/tcl_mp.c b/lang/tcl/tcl_mp.c new file mode 100644 index 00000000..e7a46b71 --- /dev/null +++ b/lang/tcl/tcl_mp.c @@ -0,0 +1,1020 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/tcl_db.h" + +/* + * Prototypes for procedures defined later in this file: + */ +#ifdef CONFIG_TEST +static int mp_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +static int pg_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +static int tcl_MpGet __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + DB_MPOOLFILE *, DBTCL_INFO *)); +static int tcl_Pg __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + void *, DB_MPOOLFILE *, DBTCL_INFO *)); +static int tcl_PgInit __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + void *, DBTCL_INFO *)); +static int tcl_PgIsset __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + void *, DBTCL_INFO *)); +#endif + +/* + * _MpInfoDelete -- + * Removes "sub" mp page info structures that are children + * of this mp. + * + * PUBLIC: void _MpInfoDelete __P((Tcl_Interp *, DBTCL_INFO *)); + */ +void +_MpInfoDelete(interp, mpip) + Tcl_Interp *interp; /* Interpreter */ + DBTCL_INFO *mpip; /* Info for mp */ +{ + DBTCL_INFO *nextp, *p; + + for (p = LIST_FIRST(&__db_infohead); p != NULL; p = nextp) { + /* + * Check if this info structure "belongs" to this + * mp. Remove its commands and info structure. + */ + nextp = LIST_NEXT(p, entries); + if (p->i_parent == mpip && p->i_type == I_PG) { + (void)Tcl_DeleteCommand(interp, p->i_name); + _DeleteInfo(p); + } + } +} + +#ifdef CONFIG_TEST +/* + * tcl_MpSync -- + * + * PUBLIC: int tcl_MpSync __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_MpSync(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + + DB_LSN lsn, *lsnp; + int result, ret; + + result = TCL_OK; + lsnp = NULL; + /* + * No flags, must be 3 args. + */ + if (objc == 3) { + result = _GetLsn(interp, objv[2], &lsn); + if (result == TCL_ERROR) + return (result); + lsnp = &lsn; + } + else if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, "lsn"); + return (TCL_ERROR); + } + + _debug_check(); + ret = dbenv->memp_sync(dbenv, lsnp); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), "memp sync")); +} + +/* + * tcl_MpTrickle -- + * + * PUBLIC: int tcl_MpTrickle __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_MpTrickle(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + + Tcl_Obj *res; + int pages, percent, result, ret; + + result = TCL_OK; + /* + * No flags, must be 3 args. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "percent"); + return (TCL_ERROR); + } + + result = Tcl_GetIntFromObj(interp, objv[2], &percent); + if (result == TCL_ERROR) + return (result); + + _debug_check(); + ret = dbenv->memp_trickle(dbenv, percent, &pages); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "memp trickle"); + if (result == TCL_ERROR) + return (result); + + res = Tcl_NewIntObj(pages); + Tcl_SetObjResult(interp, res); + return (result); + +} + +/* + * tcl_Mp -- + * + * PUBLIC: int tcl_Mp __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *)); + */ +int +tcl_Mp(interp, objc, objv, dbenv, envip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ + DBTCL_INFO *envip; /* Info pointer */ +{ + static const char *mpopts[] = { + "-create", + "-mode", + "-multiversion", + "-nommap", + "-pagesize", + "-rdonly", + NULL + }; + enum mpopts { + MPCREATE, + MPMODE, + MPMULTIVERSION, + MPNOMMAP, + MPPAGE, + MPRDONLY + }; + DBTCL_INFO *ip; + DB_MPOOLFILE *mpf; + Tcl_Obj *res; + u_int32_t flag; + int i, pgsize, mode, optindex, result, ret; + char *file, newname[MSG_SIZE]; + + result = TCL_OK; + i = 2; + flag = 0; + mode = 0; + pgsize = 0; + memset(newname, 0, MSG_SIZE); + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], + mpopts, "option", TCL_EXACT, &optindex) != TCL_OK) { + /* + * Reset the result so we don't get an errant + * error message if there is another error. + * This arg is the file name. + */ + if (IS_HELP(objv[i]) == TCL_OK) + return (TCL_OK); + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum mpopts)optindex) { + case MPCREATE: + flag |= DB_CREATE; + break; + case MPMODE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-mode mode?"); + result = TCL_ERROR; + break; + } + /* + * Don't need to check result here because + * if TCL_ERROR, the error message is already + * set up, and we'll bail out below. If ok, + * the mode is set and we go on. + */ + result = Tcl_GetIntFromObj(interp, objv[i++], &mode); + break; + case MPMULTIVERSION: + flag |= DB_MULTIVERSION; + break; + case MPNOMMAP: + flag |= DB_NOMMAP; + break; + case MPPAGE: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-pagesize size?"); + result = TCL_ERROR; + break; + } + /* + * Don't need to check result here because + * if TCL_ERROR, the error message is already + * set up, and we'll bail out below. If ok, + * the mode is set and we go on. + */ + result = Tcl_GetIntFromObj(interp, objv[i++], &pgsize); + break; + case MPRDONLY: + flag |= DB_RDONLY; + break; + } + if (result != TCL_OK) + goto error; + } + /* + * Any left over arg is a file name. It better be the last arg. + */ + file = NULL; + if (i != objc) { + if (i != objc - 1) { + Tcl_WrongNumArgs(interp, 2, objv, "?args? ?file?"); + result = TCL_ERROR; + goto error; + } + file = Tcl_GetStringFromObj(objv[i++], NULL); + } + + snprintf(newname, sizeof(newname), "%s.mp%d", + envip->i_name, envip->i_envmpid); + ip = _NewInfo(interp, NULL, newname, I_MP); + if (ip == NULL) { + Tcl_SetResult(interp, "Could not set up info", + TCL_STATIC); + return (TCL_ERROR); + } + + _debug_check(); + if ((ret = dbenv->memp_fcreate(dbenv, &mpf, 0)) != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "mpool"); + _DeleteInfo(ip); + goto error; + } + + /* + * XXX + * Interface doesn't currently support DB_MPOOLFILE configuration. + */ + if ((ret = mpf->open(mpf, file, flag, mode, (size_t)pgsize)) != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "mpool"); + _DeleteInfo(ip); + + (void)mpf->close(mpf, 0); + goto error; + } + + /* + * Success. Set up return. Set up new info and command widget for + * this mpool. + */ + envip->i_envmpid++; + ip->i_parent = envip; + ip->i_pgsz = pgsize; + _SetInfoData(ip, mpf); + (void)Tcl_CreateObjCommand(interp, newname, + (Tcl_ObjCmdProc *)mp_Cmd, (ClientData)mpf, NULL); + res = NewStringObj(newname, strlen(newname)); + Tcl_SetObjResult(interp, res); + +error: + return (result); +} + +/* + * tcl_MpStat -- + * + * PUBLIC: int tcl_MpStat __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_MpStat(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + DB_MPOOL_FSTAT **fsp, **savefsp; + DB_MPOOL_STAT *sp; + char *arg; + int result; + int ret; + Tcl_Obj *res; + Tcl_Obj *res1; + u_int32_t flag; + + flag = 0; + result = TCL_OK; + savefsp = NULL; + /* + * No args for this. Error if there are some. + */ + if (objc > 3) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + + if (objc == 3) { + arg = Tcl_GetStringFromObj(objv[2], NULL); + if (strcmp(arg, "-clear") == 0) + flag = DB_STAT_CLEAR; + else { + Tcl_SetResult(interp, + "db stat: unknown arg", TCL_STATIC); + return (TCL_ERROR); + } + } + + _debug_check(); + ret = dbenv->memp_stat(dbenv, &sp, &fsp, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "memp stat"); + if (result == TCL_ERROR) + return (result); + + /* + * Have our stats, now construct the name value + * list pairs and free up the memory. + */ + res = Tcl_NewObj(); +#ifdef HAVE_STATISTICS + /* + * MAKE_STAT_LIST assumes 'res' and 'error' label. + */ + MAKE_STAT_LIST("Cache size (gbytes)", sp->st_gbytes); + MAKE_STAT_LIST("Cache size (bytes)", sp->st_bytes); + MAKE_STAT_LIST("Number of caches", sp->st_ncache); + MAKE_STAT_LIST("Maximum number of caches", sp->st_max_ncache); + MAKE_STAT_LIST("Region size", sp->st_regsize); + MAKE_STAT_LIST("Region max", sp->st_regmax); + MAKE_STAT_LIST("Maximum memory-mapped file size", sp->st_mmapsize); + MAKE_STAT_LIST("Maximum open file descriptors", sp->st_maxopenfd); + MAKE_STAT_LIST("Maximum sequential buffer writes", sp->st_maxwrite); + MAKE_STAT_LIST( + "Sleep after writing maximum buffers", sp->st_maxwrite_sleep); + MAKE_STAT_LIST("Pages mapped into address space", sp->st_map); + MAKE_WSTAT_LIST("Cache hits", sp->st_cache_hit); + MAKE_WSTAT_LIST("Cache misses", sp->st_cache_miss); + MAKE_WSTAT_LIST("Pages created", sp->st_page_create); + MAKE_WSTAT_LIST("Pages read in", sp->st_page_in); + MAKE_WSTAT_LIST("Pages written", sp->st_page_out); + MAKE_WSTAT_LIST("Clean page evictions", sp->st_ro_evict); + MAKE_WSTAT_LIST("Dirty page evictions", sp->st_rw_evict); + MAKE_WSTAT_LIST("Dirty pages trickled", sp->st_page_trickle); + MAKE_STAT_LIST("Cached pages", sp->st_pages); + MAKE_WSTAT_LIST("Cached clean pages", sp->st_page_clean); + MAKE_WSTAT_LIST("Cached dirty pages", sp->st_page_dirty); + MAKE_WSTAT_LIST("Hash buckets", sp->st_hash_buckets); + MAKE_WSTAT_LIST("Mutexes for hash buckets", sp->st_hash_mutexes); + MAKE_WSTAT_LIST("Default pagesize", sp->st_pagesize); + MAKE_WSTAT_LIST("Hash lookups", sp->st_hash_searches); + MAKE_WSTAT_LIST("Longest hash chain found", sp->st_hash_longest); + MAKE_WSTAT_LIST("Hash elements examined", sp->st_hash_examined); + MAKE_WSTAT_LIST("Number of hash bucket nowaits", sp->st_hash_nowait); + MAKE_WSTAT_LIST("Number of hash bucket waits", sp->st_hash_wait); + MAKE_STAT_LIST("Maximum number of hash bucket nowaits", + sp->st_hash_max_nowait); + MAKE_STAT_LIST("Maximum number of hash bucket waits", + sp->st_hash_max_wait); + MAKE_WSTAT_LIST("Number of region lock nowaits", sp->st_region_nowait); + MAKE_WSTAT_LIST("Number of region lock waits", sp->st_region_wait); + MAKE_WSTAT_LIST("Buffers frozen", sp->st_mvcc_frozen); + MAKE_WSTAT_LIST("Buffers thawed", sp->st_mvcc_thawed); + MAKE_WSTAT_LIST("Frozen buffers freed", sp->st_mvcc_freed); + MAKE_WSTAT_LIST("Page allocations", sp->st_alloc); + MAKE_STAT_LIST("Buckets examined during allocation", + sp->st_alloc_buckets); + MAKE_STAT_LIST("Maximum buckets examined during allocation", + sp->st_alloc_max_buckets); + MAKE_WSTAT_LIST("Pages examined during allocation", sp->st_alloc_pages); + MAKE_STAT_LIST("Maximum pages examined during allocation", + sp->st_alloc_max_pages); + MAKE_WSTAT_LIST("Threads waiting on buffer I/O", sp->st_io_wait); + MAKE_WSTAT_LIST("Number of syncs interrupted", sp->st_sync_interrupted); + + /* + * Save global stat list as res1. The MAKE_STAT_LIST + * macro assumes 'res' so we'll use that to build up + * our per-file sublist. + */ + res1 = res; + for (savefsp = fsp; fsp != NULL && *fsp != NULL; fsp++) { + res = Tcl_NewObj(); + MAKE_STAT_STRLIST("File Name", (*fsp)->file_name); + MAKE_STAT_LIST("Page size", (*fsp)->st_pagesize); + MAKE_STAT_LIST("Pages mapped into address space", + (*fsp)->st_map); + MAKE_WSTAT_LIST("Cache hits", (*fsp)->st_cache_hit); + MAKE_WSTAT_LIST("Cache misses", (*fsp)->st_cache_miss); + MAKE_WSTAT_LIST("Pages created", (*fsp)->st_page_create); + MAKE_WSTAT_LIST("Pages read in", (*fsp)->st_page_in); + MAKE_WSTAT_LIST("Pages written", (*fsp)->st_page_out); + /* + * Now that we have a complete "per-file" stat list, append + * that to the other list. + */ + result = Tcl_ListObjAppendElement(interp, res1, res); + if (result != TCL_OK) + goto error; + } +#endif + Tcl_SetObjResult(interp, res1); +error: + __os_ufree(dbenv->env, sp); + if (savefsp != NULL) + __os_ufree(dbenv->env, savefsp); + return (result); +} + +/* + * tcl_MpStatPrint -- + * + * PUBLIC: int tcl_MpStatPrint __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_MpStatPrint(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + static const char *mpstatprtopts[] = { + "-all", + "-clear", + "-hash", + NULL + }; + enum mpstatprtopts { + MPSTATPRTALL, + MPSTATPRTCLEAR, + MPSTATPRTHASH + }; + u_int32_t flag; + int i, optindex, result, ret; + + result = TCL_OK; + flag = 0; + i = 2; + + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], mpstatprtopts, + "option", TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum mpstatprtopts)optindex) { + case MPSTATPRTALL: + flag |= DB_STAT_ALL; + break; + case MPSTATPRTCLEAR: + flag |= DB_STAT_CLEAR; + break; + case MPSTATPRTHASH: + flag |= DB_STAT_MEMP_HASH; + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto error; + + _debug_check(); + ret = dbenv->memp_stat_print(dbenv, flag); + result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "dbenv memp_stat_print"); +error: + return (result); + +} + +/* + * mp_Cmd -- + * Implements the "mp" widget. + */ +static int +mp_Cmd(clientData, interp, objc, objv) + ClientData clientData; /* Mp handle */ + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *mpcmds[] = { + "close", + "fsync", + "get", + "get_clear_len", + "get_fileid", + "get_ftype", + "get_lsn_offset", + "get_pgcookie", + NULL + }; + enum mpcmds { + MPCLOSE, + MPFSYNC, + MPGET, + MPGETCLEARLEN, + MPGETFILEID, + MPGETFTYPE, + MPGETLSNOFFSET, + MPGETPGCOOKIE + }; + DB_MPOOLFILE *mp; + int cmdindex, ftype, length, result, ret; + DBTCL_INFO *mpip; + Tcl_Obj *res; + char *obj_name; + u_int32_t value; + int32_t intval; + u_int8_t fileid[DB_FILE_ID_LEN]; + DBT cookie; + + Tcl_ResetResult(interp); + mp = (DB_MPOOLFILE *)clientData; + obj_name = Tcl_GetStringFromObj(objv[0], &length); + mpip = _NameToInfo(obj_name); + result = TCL_OK; + + if (mp == NULL) { + Tcl_SetResult(interp, "NULL mp pointer", TCL_STATIC); + return (TCL_ERROR); + } + if (mpip == NULL) { + Tcl_SetResult(interp, "NULL mp info pointer", TCL_STATIC); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the dbcmds + * defined above. + */ + if (Tcl_GetIndexFromObj(interp, + objv[1], mpcmds, "command", TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + + res = NULL; + switch ((enum mpcmds)cmdindex) { + case MPCLOSE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = mp->close(mp, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "mp close"); + _MpInfoDelete(interp, mpip); + (void)Tcl_DeleteCommand(interp, mpip->i_name); + _DeleteInfo(mpip); + break; + case MPFSYNC: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = mp->sync(mp); + res = Tcl_NewIntObj(ret); + break; + case MPGET: + result = tcl_MpGet(interp, objc, objv, mp, mpip); + break; + case MPGETCLEARLEN: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = mp->get_clear_len(mp, &value); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "mp get_clear_len")) == TCL_OK) + res = Tcl_NewIntObj((int)value); + break; + case MPGETFILEID: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = mp->get_fileid(mp, fileid); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "mp get_fileid")) == TCL_OK) + res = NewStringObj((char *)fileid, DB_FILE_ID_LEN); + break; + case MPGETFTYPE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = mp->get_ftype(mp, &ftype); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "mp get_ftype")) == TCL_OK) + res = Tcl_NewIntObj(ftype); + break; + case MPGETLSNOFFSET: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = mp->get_lsn_offset(mp, &intval); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "mp get_lsn_offset")) == TCL_OK) + res = Tcl_NewIntObj(intval); + break; + case MPGETPGCOOKIE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + memset(&cookie, 0, sizeof(DBT)); + ret = mp->get_pgcookie(mp, &cookie); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "mp get_pgcookie")) == TCL_OK) + res = Tcl_NewByteArrayObj((u_char *)cookie.data, + (int)cookie.size); + break; + } + /* + * Only set result if we have a res. Otherwise, lower + * functions have already done so. + */ + if (result == TCL_OK && res) + Tcl_SetObjResult(interp, res); + return (result); +} + +/* + * tcl_MpGet -- + */ +static int +tcl_MpGet(interp, objc, objv, mp, mpip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_MPOOLFILE *mp; /* mp pointer */ + DBTCL_INFO *mpip; /* mp info pointer */ +{ + static const char *mpget[] = { + "-create", + "-dirty", + "-last", + "-new", + "-txn", + NULL + }; + enum mpget { + MPGET_CREATE, + MPGET_DIRTY, + MPGET_LAST, + MPGET_NEW, + MPGET_TXN + }; + + DBTCL_INFO *ip; + Tcl_Obj *res; + DB_TXN *txn; + db_pgno_t pgno; + u_int32_t flag; + int i, ipgno, optindex, result, ret; + char *arg, msg[MSG_SIZE], newname[MSG_SIZE]; + void *page; + + txn = NULL; + result = TCL_OK; + memset(newname, 0, MSG_SIZE); + i = 2; + flag = 0; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], + mpget, "option", TCL_EXACT, &optindex) != TCL_OK) { + /* + * Reset the result so we don't get an errant + * error message if there is another error. + * This arg is the page number. + */ + if (IS_HELP(objv[i]) == TCL_OK) + return (TCL_OK); + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum mpget)optindex) { + case MPGET_CREATE: + flag |= DB_MPOOL_CREATE; + break; + case MPGET_DIRTY: + flag |= DB_MPOOL_DIRTY; + break; + case MPGET_LAST: + flag |= DB_MPOOL_LAST; + break; + case MPGET_NEW: + flag |= DB_MPOOL_NEW; + break; + case MPGET_TXN: + if (i == objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "mpool get: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + } + if (result != TCL_OK) + goto error; + } + /* + * Any left over arg is a page number. It better be the last arg. + */ + ipgno = 0; + if (i != objc) { + if (i != objc - 1) { + Tcl_WrongNumArgs(interp, 2, objv, "?args? ?pgno?"); + result = TCL_ERROR; + goto error; + } + result = Tcl_GetIntFromObj(interp, objv[i++], &ipgno); + if (result != TCL_OK) + goto error; + } + + snprintf(newname, sizeof(newname), "%s.pg%d", + mpip->i_name, mpip->i_mppgid); + ip = _NewInfo(interp, NULL, newname, I_PG); + if (ip == NULL) { + Tcl_SetResult(interp, "Could not set up info", + TCL_STATIC); + return (TCL_ERROR); + } + _debug_check(); + pgno = (db_pgno_t)ipgno; + ret = mp->get(mp, &pgno, NULL, flag, &page); + result = _ReturnSetup(interp, ret, DB_RETOK_MPGET(ret), "mpool get"); + if (result == TCL_ERROR) + _DeleteInfo(ip); + else { + /* + * Success. Set up return. Set up new info + * and command widget for this mpool. + */ + mpip->i_mppgid++; + ip->i_parent = mpip; + ip->i_pgno = pgno; + ip->i_pgsz = mpip->i_pgsz; + _SetInfoData(ip, page); + (void)Tcl_CreateObjCommand(interp, newname, + (Tcl_ObjCmdProc *)pg_Cmd, (ClientData)page, NULL); + res = NewStringObj(newname, strlen(newname)); + Tcl_SetObjResult(interp, res); + } +error: + return (result); +} + +/* + * pg_Cmd -- + * Implements the "pg" widget. + */ +static int +pg_Cmd(clientData, interp, objc, objv) + ClientData clientData; /* Page handle */ + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *pgcmds[] = { + "init", + "is_setto", + "pgnum", + "pgsize", + "put", + NULL + }; + enum pgcmds { + PGINIT, + PGISSET, + PGNUM, + PGSIZE, + PGPUT + }; + DB_MPOOLFILE *mp; + int cmdindex, length, result; + char *obj_name; + void *page; + DBTCL_INFO *pgip; + Tcl_Obj *res; + + Tcl_ResetResult(interp); + page = (void *)clientData; + obj_name = Tcl_GetStringFromObj(objv[0], &length); + pgip = _NameToInfo(obj_name); + mp = NAME_TO_MP(pgip->i_parent->i_name); + result = TCL_OK; + + if (page == NULL) { + Tcl_SetResult(interp, "NULL page pointer", TCL_STATIC); + return (TCL_ERROR); + } + if (mp == NULL) { + Tcl_SetResult(interp, "NULL mp pointer", TCL_STATIC); + return (TCL_ERROR); + } + if (pgip == NULL) { + Tcl_SetResult(interp, "NULL page info pointer", TCL_STATIC); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the dbcmds + * defined above. + */ + if (Tcl_GetIndexFromObj(interp, + objv[1], pgcmds, "command", TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + + res = NULL; + switch ((enum pgcmds)cmdindex) { + case PGNUM: + res = Tcl_NewWideIntObj((Tcl_WideInt)pgip->i_pgno); + break; + case PGSIZE: + res = Tcl_NewWideIntObj((Tcl_WideInt)pgip->i_pgsz); + break; + case PGPUT: + result = tcl_Pg(interp, objc, objv, page, mp, pgip); + break; + case PGINIT: + result = tcl_PgInit(interp, objc, objv, page, pgip); + break; + case PGISSET: + result = tcl_PgIsset(interp, objc, objv, page, pgip); + break; + } + + /* + * Only set result if we have a res. Otherwise, lower + * functions have already done so. + */ + if (result == TCL_OK && res != NULL) + Tcl_SetObjResult(interp, res); + return (result); +} + +static int +tcl_Pg(interp, objc, objv, page, mp, pgip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + void *page; /* Page pointer */ + DB_MPOOLFILE *mp; /* Mpool pointer */ + DBTCL_INFO *pgip; /* Info pointer */ +{ + static const char *pgopt[] = { + "-discard", + NULL + }; + enum pgopt { + PGDISCARD + }; + u_int32_t flag; + int i, optindex, result, ret; + + result = TCL_OK; + i = 2; + flag = 0; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], + pgopt, "option", TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(objv[i])); + i++; + switch ((enum pgopt)optindex) { + case PGDISCARD: + flag |= DB_MPOOL_DISCARD; + break; + } + } + + _debug_check(); + ret = mp->put(mp, page, DB_PRIORITY_UNCHANGED, flag); + + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "page"); + + (void)Tcl_DeleteCommand(interp, pgip->i_name); + _DeleteInfo(pgip); + return (result); +} + +static int +tcl_PgInit(interp, objc, objv, page, pgip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + void *page; /* Page pointer */ + DBTCL_INFO *pgip; /* Info pointer */ +{ + Tcl_Obj *res; + long *p, *endp, newval; + int length, pgsz, result; + u_char *s; + + result = TCL_OK; + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "val"); + return (TCL_ERROR); + } + + pgsz = pgip->i_pgsz; + result = Tcl_GetLongFromObj(interp, objv[2], &newval); + if (result != TCL_OK) { + s = Tcl_GetByteArrayFromObj(objv[2], &length); + if (s == NULL) + return (TCL_ERROR); + memcpy(page, s, (size_t)((length < pgsz) ? length : pgsz)); + result = TCL_OK; + } else { + p = (long *)page; + for (endp = p + ((u_int)pgsz / sizeof(long)); p < endp; p++) + *p = newval; + } + res = Tcl_NewIntObj(0); + Tcl_SetObjResult(interp, res); + return (result); +} + +static int +tcl_PgIsset(interp, objc, objv, page, pgip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + void *page; /* Page pointer */ + DBTCL_INFO *pgip; /* Info pointer */ +{ + Tcl_Obj *res; + long *p, *endp, newval; + int length, pgsz, result; + u_char *s; + + result = TCL_OK; + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "val"); + return (TCL_ERROR); + } + + pgsz = pgip->i_pgsz; + result = Tcl_GetLongFromObj(interp, objv[2], &newval); + if (result != TCL_OK) { + if ((s = Tcl_GetByteArrayFromObj(objv[2], &length)) == NULL) + return (TCL_ERROR); + result = TCL_OK; + + if (memcmp(page, s, + (size_t)((length < pgsz) ? length : pgsz)) != 0) { + res = Tcl_NewIntObj(0); + Tcl_SetObjResult(interp, res); + return (result); + } + } else { + p = (long *)page; + /* + * If any value is not the same, return 0 (is not set to + * this value). Otherwise, if we finish the loop, we return 1 + * (is set to this value). + */ + for (endp = p + ((u_int)pgsz / sizeof(long)); p < endp; p++) + if (*p != newval) { + res = Tcl_NewIntObj(0); + Tcl_SetObjResult(interp, res); + return (result); + } + } + + res = Tcl_NewIntObj(1); + Tcl_SetObjResult(interp, res); + return (result); +} +#endif diff --git a/lang/tcl/tcl_mutex.c b/lang/tcl/tcl_mutex.c new file mode 100644 index 00000000..30570ec0 --- /dev/null +++ b/lang/tcl/tcl_mutex.c @@ -0,0 +1,389 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/tcl_db.h" + +#ifdef CONFIG_TEST +/* + * PUBLIC: int tcl_Mutex __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + * PUBLIC: DB_ENV *)); + * + * tcl_Mutex -- + * Implements dbenv->mutex_alloc method. + */ +int +tcl_Mutex(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment */ +{ + static const char *which[] = { + "-process_only", + "-self_block", + NULL + }; + enum which { + PROCONLY, + SELFBLOCK + }; + int arg, i, result, ret; + u_int32_t flags; + db_mutex_t indx; + Tcl_Obj *res; + + result = TCL_OK; + flags = 0; + Tcl_ResetResult(interp); + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, + "-proccess_only | -self_block"); + return (TCL_ERROR); + } + + i = 2; + while (i < objc) { + /* + * If there is an arg, make sure it is the right one. + */ + if (Tcl_GetIndexFromObj(interp, objv[i], which, "option", + TCL_EXACT, &arg) != TCL_OK) + return (IS_HELP(objv[i])); + i++; + switch ((enum which)arg) { + case PROCONLY: + flags |= DB_MUTEX_PROCESS_ONLY; + break; + case SELFBLOCK: + flags |= DB_MUTEX_SELF_BLOCK; + break; + } + } + res = NULL; + ret = dbenv->mutex_alloc(dbenv, flags, &indx); + if (ret != 0) { + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "mutex_alloc"); + Tcl_SetResult(interp, "allocation failed", TCL_STATIC); + } else { + res = Tcl_NewWideIntObj((Tcl_WideInt)indx); + Tcl_SetObjResult(interp, res); + } + return (result); +} + +/* + * PUBLIC: int tcl_MutFree __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + * PUBLIC: DB_ENV *)); + * + * tcl_MutFree -- + * Implements dbenv->mutex_free method. + */ +int +tcl_MutFree(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment */ +{ + int result, ret; + Tcl_WideInt tmp; + db_mutex_t indx; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 3, objv, "mutexid"); + return (TCL_ERROR); + } + if ((result = Tcl_GetWideIntFromObj(interp, objv[2], &tmp)) != TCL_OK) + return (result); + indx = (db_mutex_t)tmp; + ret = dbenv->mutex_free(dbenv, indx); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), "env mutex_free")); +} + +/* + * PUBLIC: int tcl_MutGet __P((Tcl_Interp *, DB_ENV *, int)); + * + * tcl_MutGet -- + * Implements dbenv->mutex_get_* methods. + */ +int +tcl_MutGet(interp, dbenv, op) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Environment */ + int op; /* Which item to get */ +{ + Tcl_Obj *res; + u_int32_t val; + int result, ret; + + res = NULL; + val = 0; + ret = 0; + + switch (op) { + case DBTCL_MUT_ALIGN: + ret = dbenv->mutex_get_align(dbenv, &val); + break; + case DBTCL_MUT_INCR: + ret = dbenv->mutex_get_increment(dbenv, &val); + break; + case DBTCL_MUT_INIT: + ret = dbenv->mutex_get_init(dbenv, &val); + break; + case DBTCL_MUT_MAX: + ret = dbenv->mutex_get_max(dbenv, &val); + break; + case DBTCL_MUT_TAS: + ret = dbenv->mutex_get_tas_spins(dbenv, &val); + break; + default: + return (TCL_ERROR); + } + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "mutex_get")) == TCL_OK) { + res = Tcl_NewLongObj((long)val); + Tcl_SetObjResult(interp, res); + } + return (result); +} + +/* + * PUBLIC: int tcl_MutLock __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + * PUBLIC: DB_ENV *)); + * + * tcl_MutLock -- + * Implements dbenv->mutex_lock method. + */ +int +tcl_MutLock(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment */ +{ + int result, ret; + Tcl_WideInt tmp; + db_mutex_t indx; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 3, objv, "mutexid"); + return (TCL_ERROR); + } + if ((result = Tcl_GetWideIntFromObj(interp, objv[2], &tmp)) != TCL_OK) + return (result); + indx = (db_mutex_t)tmp; + ret = dbenv->mutex_lock(dbenv, indx); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), "env mutex_lock")); +} + +/* + * PUBLIC: int tcl_MutSet __P((Tcl_Interp *, Tcl_Obj *, + * PUBLIC: DB_ENV *, int)); + * + * tcl_MutSet -- + * Implements dbenv->mutex_set methods. + */ +int +tcl_MutSet(interp, obj, dbenv, op) + Tcl_Interp *interp; /* Interpreter */ + Tcl_Obj *obj; /* The argument object */ + DB_ENV *dbenv; /* Environment */ + int op; /* Which item to set */ +{ + int result, ret; + u_int32_t val; + + if ((result = _GetUInt32(interp, obj, &val)) != TCL_OK) + return (result); + switch (op) { + case DBTCL_MUT_ALIGN: + ret = dbenv->mutex_set_align(dbenv, val); + break; + case DBTCL_MUT_INCR: + ret = dbenv->mutex_set_increment(dbenv, val); + break; + case DBTCL_MUT_INIT: + ret = dbenv->mutex_set_init(dbenv, val); + break; + case DBTCL_MUT_MAX: + ret = dbenv->mutex_set_max(dbenv, val); + break; + case DBTCL_MUT_TAS: + ret = dbenv->mutex_set_tas_spins(dbenv, val); + break; + default: + return (TCL_ERROR); + } + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), "env mutex_set")); +} + +/* + * PUBLIC: int tcl_MutStat __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + * PUBLIC: DB_ENV *)); + * + * tcl_MutStat -- + * Implements dbenv->mutex_stat method. + */ +int +tcl_MutStat(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment */ +{ + DB_MUTEX_STAT *sp; + Tcl_Obj *res; + u_int32_t flag; + int result, ret; + char *arg; + + result = TCL_OK; + flag = 0; + + if (objc > 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?-clear?"); + return (TCL_ERROR); + } + + if (objc == 3) { + arg = Tcl_GetStringFromObj(objv[2], NULL); + if (strcmp(arg, "-clear") == 0) + flag = DB_STAT_CLEAR; + else { + Tcl_SetResult(interp, + "db stat: unknown arg", TCL_STATIC); + return (TCL_ERROR); + } + } + + _debug_check(); + ret = dbenv->mutex_stat(dbenv, &sp, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "mutex stat"); + if (result == TCL_ERROR) + return (result); + + res = Tcl_NewObj(); + MAKE_STAT_LIST("Mutex align", sp->st_mutex_align); + MAKE_STAT_LIST("Mutex TAS spins", sp->st_mutex_tas_spins); + MAKE_STAT_LIST("Initial mutex count", sp->st_mutex_init); + MAKE_STAT_LIST("Mutex count", sp->st_mutex_cnt); + MAKE_STAT_LIST("Mutex max", sp->st_mutex_max); + MAKE_STAT_LIST("Free mutexes", sp->st_mutex_free); + MAKE_STAT_LIST("Mutexes in use", sp->st_mutex_inuse); + MAKE_STAT_LIST("Max in use", sp->st_mutex_inuse_max); + MAKE_STAT_LIST("Mutex region size", sp->st_regsize); + MAKE_STAT_LIST("Mutex region max", sp->st_regmax); + MAKE_WSTAT_LIST("Number of region waits", sp->st_region_wait); + MAKE_WSTAT_LIST("Number of region no waits", sp->st_region_nowait); + Tcl_SetObjResult(interp, res); + + /* + * The 'error' label is used by the MAKE_STAT_LIST macro. + * Therefore we cannot remove it, and also we know that + * sp is allocated at that time. + */ +error: __os_ufree(dbenv->env, sp); + return (result); +} + +/* + * PUBLIC: int tcl_MutStatPrint __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + * PUBLIC: DB_ENV *)); + * + * tcl_MutStat -- + * Implements dbenv->mutex_stat_print method. + */ +int +tcl_MutStatPrint(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment */ +{ + static const char *mutstatprtopts[] = { + "-all", + "-clear", + NULL + }; + enum mutstatprtopts { + MUTSTATPRTALL, + MUTSTATPRTCLEAR + }; + u_int32_t flag; + int i, optindex, result, ret; + + result = TCL_OK; + flag = 0; + i = 2; + + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], mutstatprtopts, + "option", TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum mutstatprtopts)optindex) { + case MUTSTATPRTALL: + flag |= DB_STAT_ALL; + break; + case MUTSTATPRTCLEAR: + flag |= DB_STAT_CLEAR; + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto error; + + _debug_check(); + ret = dbenv->mutex_stat_print(dbenv, flag); + result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "dbenv mutex_stat_print"); +error: + return (result); +} + +/* + * PUBLIC: int tcl_MutUnlock __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + * PUBLIC: DB_ENV *)); + * + * tcl_MutUnlock -- + * Implements dbenv->mutex_unlock method. + */ +int +tcl_MutUnlock(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment */ +{ + int result, ret; + Tcl_WideInt tmp; + db_mutex_t indx; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 3, objv, "mutexid"); + return (TCL_ERROR); + } + if ((result = Tcl_GetWideIntFromObj(interp, objv[2], &tmp)) != TCL_OK) + return (result); + indx = (db_mutex_t)tmp; + ret = dbenv->mutex_unlock(dbenv, indx); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env mutex_unlock")); +} +#endif diff --git a/lang/tcl/tcl_rep.c b/lang/tcl/tcl_rep.c new file mode 100644 index 00000000..bc372d14 --- /dev/null +++ b/lang/tcl/tcl_rep.c @@ -0,0 +1,1691 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/tcl_db.h" + +#ifdef CONFIG_TEST +typedef struct { + const char *name; + u_int32_t value; +} NAMEMAP; + +static const NAMEMAP rep_ack_policies[] = { + {"all", DB_REPMGR_ACKS_ALL}, + {"allavailable", DB_REPMGR_ACKS_ALL_AVAILABLE}, + {"allpeers", DB_REPMGR_ACKS_ALL_PEERS}, + {"none", DB_REPMGR_ACKS_NONE}, + {"one", DB_REPMGR_ACKS_ONE}, + {"onepeer", DB_REPMGR_ACKS_ONE_PEER}, + {"quorum", DB_REPMGR_ACKS_QUORUM}, + {NULL, 0} +}; + +static const NAMEMAP rep_config_types[] = { + {"autoinit", DB_REP_CONF_AUTOINIT}, + {"autorollback", DB_REP_CONF_AUTOROLLBACK}, + {"bulk", DB_REP_CONF_BULK}, + {"delayclient", DB_REP_CONF_DELAYCLIENT}, + {"inmem", DB_REP_CONF_INMEM}, + {"lease", DB_REP_CONF_LEASE}, + {"mgr2sitestrict", DB_REPMGR_CONF_2SITE_STRICT}, + {"mgrelections", DB_REPMGR_CONF_ELECTIONS}, + {"nowait", DB_REP_CONF_NOWAIT}, + {NULL, 0} +}; + +static const NAMEMAP rep_timeout_types[] = { + {"ack", DB_REP_ACK_TIMEOUT}, + {"checkpoint_delay", DB_REP_CHECKPOINT_DELAY}, + {"connection_retry", DB_REP_CONNECTION_RETRY}, + {"election", DB_REP_ELECTION_TIMEOUT}, + {"election_retry", DB_REP_ELECTION_RETRY}, + {"full_election", DB_REP_FULL_ELECTION_TIMEOUT}, + {"heartbeat_monitor", DB_REP_HEARTBEAT_MONITOR}, + {"heartbeat_send", DB_REP_HEARTBEAT_SEND}, + {"lease", DB_REP_LEASE_TIMEOUT}, + {NULL, 0} +}; + +static int tcl_RepNumberToName __P((const NAMEMAP *, u_int32_t, const char **)); + +/* + * tcl_RepConfig -- + * Call DB_ENV->rep_set_config(). + * + * PUBLIC: int tcl_RepConfig + * PUBLIC: __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *)); + */ +int +tcl_RepConfig(interp, dbenv, list) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Environment pointer */ + Tcl_Obj *list; /* {which on|off} */ +{ + static const char *confonoff[] = { + "off", + "on", + NULL + }; + enum confonoff { + REPCONF_OFF, + REPCONF_ON + }; + Tcl_Obj **myobjv, *onoff, *which; + int myobjc, on, optindex, result, ret; + u_int32_t wh; + + result = Tcl_ListObjGetElements(interp, list, &myobjc, &myobjv); + if (result != TCL_OK) + return (result); + which = myobjv[0]; + onoff = myobjv[1]; + if (Tcl_GetIndexFromObjStruct(interp, which, + &rep_config_types[0].name, sizeof(NAMEMAP), + "config type", TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(which)); + + wh = rep_config_types[optindex].value; + if (Tcl_GetIndexFromObj(interp, onoff, confonoff, "option", + TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(onoff)); + switch ((enum confonoff)optindex) { + case REPCONF_OFF: + on = 0; + break; + case REPCONF_ON: + on = 1; + break; + default: + return (TCL_ERROR); + } + ret = dbenv->rep_set_config(dbenv, wh, on); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env rep_config")); +} + +/* + * tcl_RepGetTwo -- + * Call replication getters that return 2 values. + * + * PUBLIC: int tcl_RepGetTwo + * PUBLIC: __P((Tcl_Interp *, DB_ENV *, int)); + */ +int +tcl_RepGetTwo(interp, dbenv, op) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Environment pointer */ + int op; /* which getter */ +{ + Tcl_Obj *myobjv[2], *res; + u_int32_t val1, val2; + int myobjc, result, ret; + + ret = 0; + val1 = val2 = 0; + switch (op) { + case DBTCL_GETCLOCK: + ret = dbenv->rep_get_clockskew(dbenv, &val1, &val2); + break; + case DBTCL_GETLIMIT: + ret = dbenv->rep_get_limit(dbenv, &val1, &val2); + break; + case DBTCL_GETREQ: + ret = dbenv->rep_get_request(dbenv, &val1, &val2); + break; + default: + return (TCL_ERROR); + } + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env rep_get")) == TCL_OK) { + myobjc = 2; + myobjv[0] = Tcl_NewLongObj((long)val1); + myobjv[1] = Tcl_NewLongObj((long)val2); + res = Tcl_NewListObj(myobjc, myobjv); + Tcl_SetObjResult(interp, res); + } + return (result); +} + +/* + * tcl_RepGetConfig -- + * + * PUBLIC: int tcl_RepGetConfig + * PUBLIC: __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *)); + */ +int +tcl_RepGetConfig(interp, dbenv, which) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Environment pointer */ + Tcl_Obj *which; /* which flag */ +{ + Tcl_Obj *res; + int on, optindex, result, ret; + u_int32_t wh; + + if (Tcl_GetIndexFromObjStruct(interp, which, + &rep_config_types[0].name, sizeof(NAMEMAP), + "config type", TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(which)); + wh = rep_config_types[optindex].value; + + ret = dbenv->rep_get_config(dbenv, wh, &on); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env rep_config")) == TCL_OK) { + res = Tcl_NewIntObj(on); + Tcl_SetObjResult(interp, res); + } + return (result); +} + +/* + * tcl_RepGetTimeout -- + * Get various replication timeout values. + * + * PUBLIC: int tcl_RepGetTimeout + * PUBLIC: __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *)); + */ +int +tcl_RepGetTimeout(interp, dbenv, which) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Environment pointer */ + Tcl_Obj *which; /* which flag */ +{ + Tcl_Obj *res; + int optindex, result, ret, wh; + u_int32_t to; + + if (Tcl_GetIndexFromObjStruct(interp, which, + &rep_timeout_types[0].name, sizeof(NAMEMAP), + "timeout type", TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(which)); + wh = (int)rep_timeout_types[optindex].value; + ret = dbenv->rep_get_timeout(dbenv, wh, &to); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env rep_get_timeout")) == TCL_OK) { + res = Tcl_NewLongObj((long)to); + Tcl_SetObjResult(interp, res); + } + return (result); +} + +/* + * tcl_RepGetAckPolicy + * Get Replication Manager acknowledgement policy + * + * PUBLIC: int tcl_RepGetAckPolicy + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepGetAckPolicy(interp, objc, objv, dbenv) + Tcl_Interp *interp; + int objc; + Tcl_Obj *CONST objv[]; + DB_ENV *dbenv; +{ + const char *name; + int policy, ret; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, ""); + return (TCL_ERROR); + } + + if ((ret = dbenv->repmgr_get_ack_policy(dbenv, &policy)) != 0 || + (ret = tcl_RepNumberToName(rep_ack_policies, + (u_int32_t)policy, &name)) != 0) + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env repmgr_get_ack_policy")); + + Tcl_SetObjResult(interp, NewStringObj(name, strlen(name))); + return (TCL_OK); +} + +/* + * tcl_RepGetLocalSite + * Get local site address. + * + * PUBLIC: int tcl_RepGetLocalSite + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepGetLocalSite(interp, objc, objv, dbenv) + Tcl_Interp *interp; + int objc; + Tcl_Obj *CONST objv[]; + DB_ENV *dbenv; +{ + Tcl_Obj *myobjv[2]; + DB_SITE *dbsite; + const char *host; + int ret, t_ret; + u_int port; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, ""); + return (TCL_ERROR); + } + + if ((ret = dbenv->repmgr_local_site(dbenv, &dbsite)) == 0) { + if ((ret = dbsite->get_address(dbsite, &host, &port)) == 0) { + myobjv[0] = NewStringObj(host, strlen(host)); + myobjv[1] = Tcl_NewIntObj((int)port); + Tcl_SetObjResult(interp, Tcl_NewListObj(2, myobjv)); + } + if ((t_ret = dbsite->close(dbsite)) != 0 && ret == 0) + ret = t_ret; + } + return (ret == 0 ? TCL_OK : + _ReturnSetup(interp, ret, + DB_RETOK_REPMGR_LOCALSITE(ret), "env repmgr_local_site")); +} + + + + +/* + * tcl_RepNumberToName + * Map a #define'd int value to the corresponding name. + */ +static int +tcl_RepNumberToName(map, value, namep) + const NAMEMAP *map; + u_int32_t value; + const char **namep; +{ + while (map->name) { + if (map->value == value) { + *namep = map->name; + return (0); + } + map++; + } + return (DB_NOTFOUND); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_RepElect -- + * Call DB_ENV->rep_elect(). + * + * PUBLIC: int tcl_RepElect + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepElect(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + int result, ret; + u_int32_t full_timeout, nsites, nvotes, pri, timeout; + + if (objc != 6 && objc != 7) { + Tcl_WrongNumArgs(interp, 2, objv, + "nsites nvotes pri timeout [full_timeout]"); + return (TCL_ERROR); + } + + if ((result = _GetUInt32(interp, objv[2], &nsites)) != TCL_OK) + return (result); + if ((result = _GetUInt32(interp, objv[3], &nvotes)) != TCL_OK) + return (result); + if ((result = _GetUInt32(interp, objv[4], &pri)) != TCL_OK) + return (result); + if ((result = _GetUInt32(interp, objv[5], &timeout)) != TCL_OK) + return (result); + full_timeout = 0; + if (objc == 7) + if ((result = _GetUInt32(interp, objv[6], &full_timeout)) + != TCL_OK) + return (result); + + _debug_check(); + + if ((ret = dbenv->rep_set_priority(dbenv, pri)) != 0) + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env rep_elect (rep_set_priority)")); + if ((ret = dbenv->rep_set_timeout(dbenv, DB_REP_ELECTION_TIMEOUT, + timeout)) != 0) + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env rep_elect (rep_set_timeout)")); + + if (full_timeout != 0 && (ret = dbenv->rep_set_timeout(dbenv, + DB_REP_FULL_ELECTION_TIMEOUT, full_timeout)) != 0) + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env rep_elect (rep_set_timeout)")); + + ret = dbenv->rep_elect(dbenv, nsites, nvotes, 0); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), "env rep_elect")); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_RepFlush -- + * Call DB_ENV->rep_flush(). + * + * PUBLIC: int tcl_RepFlush + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepFlush(interp, objc, objv, dbenv) + Tcl_Interp *interp; + int objc; + Tcl_Obj *CONST objv[]; + DB_ENV *dbenv; +{ + int ret; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, ""); + return TCL_ERROR; + } + + _debug_check(); + ret = dbenv->rep_flush(dbenv); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), "env rep_flush")); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_RepSync -- + * Call DB_ENV->rep_sync(). + * + * PUBLIC: int tcl_RepSync + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepSync(interp, objc, objv, dbenv) + Tcl_Interp *interp; + int objc; + Tcl_Obj *CONST objv[]; + DB_ENV *dbenv; +{ + int ret; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, ""); + return TCL_ERROR; + } + + _debug_check(); + ret = dbenv->rep_sync(dbenv, 0); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), "env rep_sync")); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_RepLease -- + * Call DB_ENV->rep_set_lease(). + * + * PUBLIC: int tcl_RepLease __P((Tcl_Interp *, int, Tcl_Obj * CONST *, + * PUBLIC: DB_ENV *)); + */ +int +tcl_RepLease(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; +{ + u_int32_t clock_fast, clock_slow, timeout; + int result, ret; + + COMPQUIET(clock_fast, 0); + COMPQUIET(clock_slow, 0); + + if (objc != 3 && objc != 1) { + Tcl_WrongNumArgs(interp, 1, objv, "{timeout fast slow}"); + return (TCL_ERROR); + } + + if ((result = _GetUInt32(interp, objv[0], &timeout)) != TCL_OK) + return (result); + if (objc == 4) { + if ((result = _GetUInt32(interp, objv[1], &clock_fast)) + != TCL_OK) + return (result); + if ((result = _GetUInt32(interp, objv[2], &clock_slow)) + != TCL_OK) + return (result); + } + ret = dbenv->rep_set_timeout(dbenv, DB_REP_LEASE_TIMEOUT, + (db_timeout_t)timeout); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "rep_set_timeout"); + ret = dbenv->rep_set_config(dbenv, DB_REP_CONF_LEASE, 1); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "rep_set_config"); + if (result != TCL_OK) + return (result); + if (objc == 3) + ret = dbenv->rep_set_clockskew(dbenv, clock_fast, clock_slow); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env rep_set_lease")); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_RepInmemFiles -- + * Set in-memory replication, which must be done before opening + * environment. + * + * PUBLIC: int tcl_RepInmemFiles __P((Tcl_Interp *, DB_ENV *)); + */ +int +tcl_RepInmemFiles(interp, dbenv) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; +{ + int ret; + + ret = dbenv->rep_set_config(dbenv, DB_REP_CONF_INMEM, 1); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "rep_set_config")); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_RepLimit -- + * Call DB_ENV->rep_set_limit(). + * + * PUBLIC: int tcl_RepLimit + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepLimit(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + int result, ret; + u_int32_t bytes, gbytes; + + if (objc != 4) { + Tcl_WrongNumArgs(interp, 2, objv, "gbytes bytes"); + return (TCL_ERROR); + } + + if ((result = _GetUInt32(interp, objv[2], &gbytes)) != TCL_OK) + return (result); + if ((result = _GetUInt32(interp, objv[3], &bytes)) != TCL_OK) + return (result); + + _debug_check(); + if ((ret = dbenv->rep_set_limit(dbenv, gbytes, bytes)) != 0) + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env set_rep_limit")); + + return (_ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "env set_rep_limit")); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_RepNSites -- + * Call DB_ENV->rep_set_nsites(). + * + * PUBLIC: int tcl_RepNSites + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepNSites(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + int result, ret; + u_int32_t nsites; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "nsites"); + return (TCL_ERROR); + } + + if ((result = _GetUInt32(interp, objv[2], &nsites)) != TCL_OK) + return (result); + + _debug_check(); + ret = dbenv->rep_set_nsites(dbenv, nsites); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), "env rep_nsites")); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_RepRequest -- + * Call DB_ENV->rep_set_request(). + * + * PUBLIC: int tcl_RepRequest + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepRequest(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + int result, ret; + long min, max; + + if (objc != 4) { + Tcl_WrongNumArgs(interp, 2, objv, "min max"); + return (TCL_ERROR); + } + + if ((result = Tcl_GetLongFromObj(interp, objv[2], &min)) != TCL_OK) + return (result); + if ((result = Tcl_GetLongFromObj(interp, objv[3], &max)) != TCL_OK) + return (result); + + _debug_check(); + if ((ret = dbenv->rep_set_request(dbenv, (db_timeout_t)min, + (db_timeout_t)max)) != 0) + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env rep_request")); + + return (_ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "env rep_request")); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_RepNoarchiveTimeout -- + * Reset the master update timer, to allow immediate log archiving. + * + * PUBLIC: int tcl_RepNoarchiveTimeout + * PUBLIC: __P((Tcl_Interp *, DB_ENV *)); + */ +int +tcl_RepNoarchiveTimeout(interp, dbenv) + Tcl_Interp *interp; /* Interpreter */ + DB_ENV *dbenv; /* Environment pointer */ +{ + ENV *env; + REGENV *renv; + REGINFO *infop; + + env = dbenv->env; + + _debug_check(); + infop = env->reginfo; + renv = infop->primary; + REP_SYSTEM_LOCK(env); + F_CLR(renv, DB_REGENV_REPLOCKED); + renv->op_timestamp = 0; + REP_SYSTEM_UNLOCK(env); + + return (_ReturnSetup(interp, + 0, DB_RETOK_STD(0), "env test force noarchive_timeout")); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_RepTransport -- + * Call DB_ENV->rep_set_transport(). + * + * PUBLIC: int tcl_RepTransport __P((Tcl_Interp *, int, Tcl_Obj * CONST *, + * PUBLIC: DB_ENV *, DBTCL_INFO *)); + * + * Note that this normally can/should be achieved as an argument to + * berkdb env, but we need to test changing the transport function on + * the fly. + */ +int +tcl_RepTransport(interp, objc, objv, dbenv, ip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; + DBTCL_INFO *ip; +{ + int intarg, result, ret; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, "{id transport_func}"); + return (TCL_ERROR); + } + + /* + * Store the objects containing the machine ID + * and the procedure name. We don't need to crack + * the send procedure out now, but we do convert the + * machine ID to an int, since rep_set_transport needs + * it. Even so, it'll be easier later to deal with + * the Tcl_Obj *, so we save that, not the int. + * + * Note that we Tcl_IncrRefCount both objects + * independently; Tcl is free to discard the list + * that they're bundled into. + */ + + /* + * Check that the machine ID is an int. Note that + * we do want to use GetIntFromObj; the machine + * ID is explicitly an int, not a u_int32_t. + */ + if (ip->i_rep_eid != NULL) { + Tcl_DecrRefCount(ip->i_rep_eid); + } + ip->i_rep_eid = objv[0]; + Tcl_IncrRefCount(ip->i_rep_eid); + result = Tcl_GetIntFromObj(interp, + ip->i_rep_eid, &intarg); + if (result != TCL_OK) + return (result); + + if (ip->i_rep_send != NULL) { + Tcl_DecrRefCount(ip->i_rep_send); + } + ip->i_rep_send = objv[1]; + Tcl_IncrRefCount(ip->i_rep_send); + _debug_check(); + ret = dbenv->rep_set_transport(dbenv, intarg, tcl_rep_send); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "env rep_transport")); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_RepStart -- + * Call DB_ENV->rep_start(). + * + * PUBLIC: int tcl_RepStart + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + * + * Note that this normally can/should be achieved as an argument to + * berkdb env, but we need to test forcible upgrading of clients, which + * involves calling this on an open environment handle. + */ +int +tcl_RepStart(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; +{ + static const char *tclrpstrt[] = { + "-client", + "-master", + NULL + }; + enum tclrpstrt { + TCL_RPSTRT_CLIENT, + TCL_RPSTRT_MASTER + }; + char *arg; + int i, optindex, ret; + u_int32_t flag; + + flag = 0; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "[-master/-client]"); + return (TCL_ERROR); + } + + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], tclrpstrt, + "option", TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') + return (IS_HELP(objv[i])); + else + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum tclrpstrt)optindex) { + case TCL_RPSTRT_CLIENT: + flag = DB_REP_CLIENT; + break; + case TCL_RPSTRT_MASTER: + flag = DB_REP_MASTER; + break; + } + } + + _debug_check(); + ret = dbenv->rep_start(dbenv, NULL, flag); + return (_ReturnSetup(interp, ret, DB_RETOK_STD(ret), "env rep_start")); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_RepProcessMessage -- + * Call DB_ENV->rep_process_message(). + * + * PUBLIC: int tcl_RepProcessMessage + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepProcessMessage(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + DBT control, rec; + DB_LSN permlsn; + Tcl_Obj *lsnlist, *myobjv[2], *res; + void *ctmp, *rtmp; + char *msg; + int eid; + int freectl, freerec, myobjc, result, ret; + + if (objc != 5) { + Tcl_WrongNumArgs(interp, 2, objv, "id control rec"); + return (TCL_ERROR); + } + freectl = freerec = 0; + + memset(&control, 0, sizeof(control)); + memset(&rec, 0, sizeof(rec)); + + if ((result = Tcl_GetIntFromObj(interp, objv[2], &eid)) != TCL_OK) + return (result); + + ret = _CopyObjBytes(interp, objv[3], &ctmp, + &control.size, &freectl); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_REPPMSG(ret), "rep_proc_msg"); + return (result); + } + control.data = ctmp; + ret = _CopyObjBytes(interp, objv[4], &rtmp, + &rec.size, &freerec); + if (ret != 0) { + result = _ReturnSetup(interp, ret, + DB_RETOK_REPPMSG(ret), "rep_proc_msg"); + goto out; + } + rec.data = rtmp; + _debug_check(); + ret = dbenv->rep_process_message(dbenv, &control, &rec, eid, &permlsn); + /* + * !!! + * The TCL API diverges from the C++/Java APIs here. For us, it + * is OK to get DUPMASTER and HOLDELECTION for testing purposes. + */ + result = _ReturnSetup(interp, ret, + DB_RETOK_REPPMSG(ret) || ret == DB_REP_DUPMASTER || + ret == DB_REP_HOLDELECTION, + "env rep_process_message"); + + if (result != TCL_OK) + goto out; + + /* + * We have a valid return. We need to return a variety of information. + * It will be one of the following: + * {0 0} - Make a 0 return a list for consistent return structure. + * {DUPMASTER 0} - DUPMASTER, no other info needed. + * {HOLDELECTION 0} - HOLDELECTION, no other info needed. + * {NEWMASTER #} - NEWMASTER and its ID. + * {NEWSITE 0} - NEWSITE, no other info needed. + * {IGNORE {LSN list}} - IGNORE and this msg's LSN. + * {ISPERM {LSN list}} - ISPERM and the perm LSN. + * {NOTPERM {LSN list}} - NOTPERM and this msg's LSN. + */ + myobjc = 2; + switch (ret) { + case 0: + myobjv[0] = Tcl_NewIntObj(0); + myobjv[1] = Tcl_NewIntObj(0); + break; + case DB_REP_DUPMASTER: + myobjv[0] = Tcl_NewByteArrayObj( + (u_char *)"DUPMASTER", (int)strlen("DUPMASTER")); + myobjv[1] = Tcl_NewIntObj(0); + break; + case DB_REP_HOLDELECTION: + myobjv[0] = Tcl_NewByteArrayObj( + (u_char *)"HOLDELECTION", (int)strlen("HOLDELECTION")); + myobjv[1] = Tcl_NewIntObj(0); + break; + case DB_REP_IGNORE: + myobjv[0] = Tcl_NewLongObj((long)permlsn.file); + myobjv[1] = Tcl_NewLongObj((long)permlsn.offset); + lsnlist = Tcl_NewListObj(myobjc, myobjv); + myobjv[0] = Tcl_NewByteArrayObj( + (u_char *)"IGNORE", (int)strlen("IGNORE")); + myobjv[1] = lsnlist; + break; + case DB_REP_ISPERM: + myobjv[0] = Tcl_NewLongObj((long)permlsn.file); + myobjv[1] = Tcl_NewLongObj((long)permlsn.offset); + lsnlist = Tcl_NewListObj(myobjc, myobjv); + myobjv[0] = Tcl_NewByteArrayObj( + (u_char *)"ISPERM", (int)strlen("ISPERM")); + myobjv[1] = lsnlist; + break; + case DB_REP_NEWSITE: + myobjv[0] = Tcl_NewByteArrayObj( + (u_char *)"NEWSITE", (int)strlen("NEWSITE")); + myobjv[1] = Tcl_NewIntObj(0); + break; + case DB_REP_NOTPERM: + myobjv[0] = Tcl_NewLongObj((long)permlsn.file); + myobjv[1] = Tcl_NewLongObj((long)permlsn.offset); + lsnlist = Tcl_NewListObj(myobjc, myobjv); + myobjv[0] = Tcl_NewByteArrayObj( + (u_char *)"NOTPERM", (int)strlen("NOTPERM")); + myobjv[1] = lsnlist; + break; + default: + msg = db_strerror(ret); + Tcl_AppendResult(interp, msg, NULL); + Tcl_SetErrorCode(interp, "BerkeleyDB", msg, NULL); + result = TCL_ERROR; + goto out; + } + res = Tcl_NewListObj(myobjc, myobjv); + if (res != NULL) + Tcl_SetObjResult(interp, res); +out: + if (freectl) + __os_free(NULL, ctmp); + if (freerec) + __os_free(NULL, rtmp); + + return (result); +} +#endif + +#ifdef CONFIG_TEST +/* + * tcl_RepStat -- + * Call DB_ENV->rep_stat(). + * + * PUBLIC: int tcl_RepStat + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepStat(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; +{ + DB_REP_STAT *sp; + Tcl_Obj *myobjv[2], *res, *thislist, *lsnlist; + u_int32_t flag; + int myobjc, result, ret; + char *arg, *role; + + flag = 0; + result = TCL_OK; + + if (objc > 3) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + if (objc == 3) { + arg = Tcl_GetStringFromObj(objv[2], NULL); + if (strcmp(arg, "-clear") == 0) + flag = DB_STAT_CLEAR; + else { + Tcl_SetResult(interp, + "db stat: unknown arg", TCL_STATIC); + return (TCL_ERROR); + } + } + + _debug_check(); + ret = dbenv->rep_stat(dbenv, &sp, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "rep stat"); + if (result == TCL_ERROR) + return (result); + + /* + * Have our stats, now construct the name value + * list pairs and free up the memory. + */ + res = Tcl_NewObj(); +#ifdef HAVE_STATISTICS + /* + * MAKE_STAT_* assumes 'res' and 'error' label. + */ + if (sp->st_status == DB_REP_MASTER) + role = "master"; + else if (sp->st_status == DB_REP_CLIENT) + role = "client"; + else + role = "none"; + MAKE_STAT_STRLIST("Role", role); + + MAKE_STAT_LSN("Next LSN expected", &sp->st_next_lsn); + MAKE_STAT_LSN("First missed LSN", &sp->st_waiting_lsn); + MAKE_STAT_LSN("Maximum permanent LSN", &sp->st_max_perm_lsn); + MAKE_WSTAT_LIST("Bulk buffer fills", sp->st_bulk_fills); + MAKE_WSTAT_LIST("Bulk buffer overflows", sp->st_bulk_overflows); + MAKE_WSTAT_LIST("Bulk records stored", sp->st_bulk_records); + MAKE_WSTAT_LIST("Bulk buffer transfers", sp->st_bulk_transfers); + MAKE_WSTAT_LIST("Client service requests", sp->st_client_svc_req); + MAKE_WSTAT_LIST("Client service req misses", sp->st_client_svc_miss); + MAKE_WSTAT_LIST("Client rerequests", sp->st_client_rerequests); + MAKE_STAT_LIST("Duplicate master conditions", sp->st_dupmasters); + MAKE_STAT_LIST("Environment ID", sp->st_env_id); + MAKE_STAT_LIST("Environment priority", sp->st_env_priority); + MAKE_STAT_LIST("Generation number", sp->st_gen); + MAKE_STAT_LIST("Election generation number", sp->st_egen); + MAKE_STAT_LIST("Startup complete", sp->st_startup_complete); + MAKE_WSTAT_LIST("Lease messages sent", sp->st_lease_sends); + MAKE_WSTAT_LIST("Lease checks", sp->st_lease_chk); + MAKE_WSTAT_LIST("Lease check invalid", sp->st_lease_chk_misses); + MAKE_WSTAT_LIST("Lease check refresh", sp->st_lease_chk_refresh); + MAKE_WSTAT_LIST("Duplicate log records received", sp->st_log_duplicated); + MAKE_WSTAT_LIST("Current log records queued", sp->st_log_queued); + MAKE_WSTAT_LIST("Maximum log records queued", sp->st_log_queued_max); + MAKE_WSTAT_LIST("Total log records queued", sp->st_log_queued_total); + MAKE_WSTAT_LIST("Log records received", sp->st_log_records); + MAKE_WSTAT_LIST("Log records requested", sp->st_log_requested); + MAKE_STAT_LIST("Master environment ID", sp->st_master); + MAKE_WSTAT_LIST("Master changes", sp->st_master_changes); + MAKE_STAT_LIST("Messages with bad generation number", + sp->st_msgs_badgen); + MAKE_WSTAT_LIST("Messages processed", sp->st_msgs_processed); + MAKE_WSTAT_LIST("Messages ignored for recovery", sp->st_msgs_recover); + MAKE_WSTAT_LIST("Message send failures", sp->st_msgs_send_failures); + MAKE_WSTAT_LIST("Messages sent", sp->st_msgs_sent); + MAKE_WSTAT_LIST("New site messages", sp->st_newsites); + MAKE_STAT_LIST("Number of sites in replication group", sp->st_nsites); + MAKE_WSTAT_LIST("Transmission limited", sp->st_nthrottles); + MAKE_WSTAT_LIST("Outdated conditions", sp->st_outdated); + MAKE_WSTAT_LIST("Transactions applied", sp->st_txns_applied); + MAKE_STAT_LIST("Next page expected", sp->st_next_pg); + MAKE_WSTAT_LIST("First missed page", sp->st_waiting_pg); + MAKE_WSTAT_LIST("Duplicate pages received", sp->st_pg_duplicated); + MAKE_WSTAT_LIST("Pages received", sp->st_pg_records); + MAKE_WSTAT_LIST("Pages requested", sp->st_pg_requested); + MAKE_WSTAT_LIST("Elections held", sp->st_elections); + MAKE_WSTAT_LIST("Elections won", sp->st_elections_won); + MAKE_STAT_LIST("Election phase", sp->st_election_status); + MAKE_STAT_LIST("Election winner", sp->st_election_cur_winner); + MAKE_STAT_LIST("Election generation number", sp->st_election_gen); + MAKE_STAT_LIST("Election data generation number", + sp->st_election_datagen); + MAKE_STAT_LSN("Election max LSN", &sp->st_election_lsn); + MAKE_STAT_LIST("Election sites", sp->st_election_nsites); + MAKE_STAT_LIST("Election nvotes", sp->st_election_nvotes); + MAKE_STAT_LIST("Election priority", sp->st_election_priority); + MAKE_STAT_LIST("Election tiebreaker", sp->st_election_tiebreaker); + MAKE_STAT_LIST("Election votes", sp->st_election_votes); + MAKE_STAT_LIST("Election seconds", sp->st_election_sec); + MAKE_STAT_LIST("Election usecs", sp->st_election_usec); + MAKE_STAT_LIST("Start-sync operations delayed", + sp->st_startsync_delayed); + MAKE_STAT_LIST("Maximum lease seconds", sp->st_max_lease_sec); + MAKE_STAT_LIST("Maximum lease usecs", sp->st_max_lease_usec); + MAKE_STAT_LIST("File fail cleanups done", sp->st_filefail_cleanups); +#endif + + Tcl_SetObjResult(interp, res); +error: + __os_ufree(dbenv->env, sp); + return (result); +} + +/* + * tcl_RepStatPrint -- + * + * PUBLIC: int tcl_RepStatPrint __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_RepStatPrint(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + static const char *repstatprtopts[] = { + "-all", + "-clear", + NULL + }; + enum repstatprtopts { + REPSTATPRTALL, + REPSTATPRTCLEAR + }; + u_int32_t flag; + int i, optindex, result, ret; + + result = TCL_OK; + flag = 0; + i = 2; + + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], repstatprtopts, + "option", TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum repstatprtopts)optindex) { + case REPSTATPRTALL: + flag |= DB_STAT_ALL; + break; + case REPSTATPRTCLEAR: + flag |= DB_STAT_CLEAR; + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto error; + + _debug_check(); + ret = dbenv->rep_stat_print(dbenv, flag); + result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "dbenv rep_stat_print"); +error: + return (result); + +} + +/* + * tcl_RepMgr -- + * Configure and start the Replication Manager. + * + * PUBLIC: int tcl_RepMgr + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepMgr(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + static const char *rmgr[] = { + "-ack", + "-local", + "-msgth", + "-pri", + "-remote", + "-remove", + "-start", + "-timeout", + NULL + }; + enum rmgr { + RMGR_ACK, + RMGR_LOCAL, + RMGR_MSGTH, + RMGR_PRI, + RMGR_REMOTE, + RMGR_REMOVE, + RMGR_START, + RMGR_TIMEOUT + }; + Tcl_Obj **myobjv; + DB_SITE *dbsite; + long to; + int ack, creator, i, j, legacy, myobjc, optindex; + int peer, result, ret, totype, t_ret; + u_int32_t msgth, start_flag, uintarg; + char *arg; + + result = TCL_OK; + ack = ret = totype = 0; + msgth = 1; + start_flag = 0; + + if (objc <= 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?args?"); + return (TCL_ERROR); + } + /* + * Get the command name index from the object based on the bdbcmds + * defined above. + */ + i = 2; + while (i < objc) { + Tcl_ResetResult(interp); + if (Tcl_GetIndexFromObj(interp, objv[i], rmgr, "option", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum rmgr)optindex) { + case RMGR_ACK: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-ack policy?"); + result = TCL_ERROR; + break; + } + + + if (Tcl_GetIndexFromObjStruct(interp, objv[i++], + &rep_ack_policies[0].name, sizeof(NAMEMAP), + "ack policy", TCL_EXACT, &optindex) != TCL_OK) { + result = TCL_ERROR; + break; + } + ack = (int)rep_ack_policies[optindex].value; + + _debug_check(); + ret = dbenv->repmgr_set_ack_policy(dbenv, ack); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "ack"); + break; + case RMGR_LOCAL: + result = Tcl_ListObjGetElements(interp, objv[i], + &myobjc, &myobjv); + if (result == TCL_OK) + i++; + else + break; + if (myobjc < 2 || myobjc > 4) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-local {host port [creator][legacy]}?"); + result = TCL_ERROR; + break; + } + creator = legacy = 0; + for (j = 2; j < myobjc; j++) { + arg = Tcl_GetString(myobjv[j]); + if (strcmp(arg, "creator") == 0) + creator = 1; + else if (strcmp(arg, "legacy") == 0) + legacy = 1; + else { + Tcl_AddErrorInfo(interp, + "local: illegal flag"); + result = TCL_ERROR; + break; + } + } + /* The "arg" is host name, and "uintarg" is port. */ + arg = Tcl_GetStringFromObj(myobjv[0], NULL); + if ((result = _GetUInt32(interp, myobjv[1], &uintarg)) + != TCL_OK) + break; + _debug_check(); + if ((ret = dbenv->repmgr_site(dbenv, + arg, uintarg, &dbsite, 0)) == 0) { + ret = dbsite->set_config(dbsite, + DB_LOCAL_SITE, 1); + if (ret == 0 && creator) + ret = dbsite->set_config(dbsite, + DB_GROUP_CREATOR, 1); + if (ret == 0 && legacy) + ret = dbsite->set_config(dbsite, + DB_LEGACY, 1); + if ((t_ret = dbsite->close(dbsite)) != 0 && + ret == 0) + ret = t_ret; + } + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "repmgr_site"); + break; + case RMGR_MSGTH: + if (i >= objc) { + Tcl_WrongNumArgs( + interp, 2, objv, "?-msgth nth?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &msgth); + break; + case RMGR_PRI: + if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-pri priority?"); + result = TCL_ERROR; + break; + } + result = _GetUInt32(interp, objv[i++], &uintarg); + if (result == TCL_OK) { + _debug_check(); + ret = dbenv-> + rep_set_priority(dbenv, uintarg); + } + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "rep_set_priority"); + break; + case RMGR_REMOTE: + result = Tcl_ListObjGetElements(interp, objv[i], + &myobjc, &myobjv); + if (result == TCL_OK) + i++; + else + break; + if (myobjc < 2 || myobjc > 4) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-remote {host port [peer][legacy]}?"); + result = TCL_ERROR; + break; + } + + /* + * Get the flags first so we can reuse 'arg'. + */ + legacy = peer = 0; + for (j = 2; j < myobjc; j++) { + arg = Tcl_GetString(myobjv[j]); + if (strcmp(arg, "legacy") == 0) + legacy = 1; + else if (strcmp(arg, "peer") == 0) + peer = 1; + else { + Tcl_AddErrorInfo(interp, + "remote: illegal flag"); + result = TCL_ERROR; + break; + } + } + arg = Tcl_GetString(myobjv[0]); + if ((result = _GetUInt32(interp, myobjv[1], &uintarg)) + != TCL_OK) + break; + _debug_check(); + + /* For now, all "-remote" sites become helpers. */ + if ((ret = dbenv->repmgr_site(dbenv, + arg, uintarg, &dbsite, 0)) == 0) { + ret = dbsite->set_config(dbsite, + DB_BOOTSTRAP_HELPER, 1); + if (ret == 0 && legacy) + ret = dbsite->set_config(dbsite, + DB_LEGACY, 1); + if (ret == 0 && peer) + ret = dbsite->set_config(dbsite, + DB_REPMGR_PEER, 1); + if ((t_ret = dbsite->close(dbsite)) != 0 && + ret == 0) + ret = t_ret; + } + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "repmgr_site"); + break; + case RMGR_REMOVE: + result = Tcl_ListObjGetElements(interp, objv[i], + &myobjc, &myobjv); + if (result == TCL_OK) + i++; + else + break; + if (myobjc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-remove {host port}?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetString(myobjv[0]); + if ((result = _GetUInt32(interp, myobjv[1], &uintarg)) + != TCL_OK) + break; + _debug_check(); + if ((ret = dbenv->repmgr_site(dbenv, + arg, uintarg, &dbsite, 0)) == 0) + ret = dbsite->remove(dbsite); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "repmgr_site->remove"); + break; + case RMGR_START: + if (i >= objc) { + Tcl_WrongNumArgs( + interp, 2, objv, "?-start state?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + if (strcmp(arg, "master") == 0) + start_flag = DB_REP_MASTER; + else if (strcmp(arg, "client") == 0) + start_flag = DB_REP_CLIENT; + else if (strcmp(arg, "elect") == 0) + start_flag = DB_REP_ELECTION; + else { + Tcl_AddErrorInfo( + interp, "start: illegal state"); + result = TCL_ERROR; + break; + } + /* + * Some config functions need to be called + * before repmgr_start. So finish parsing all + * the args and call repmgr_start at the end. + */ + break; + case RMGR_TIMEOUT: + result = Tcl_ListObjGetElements(interp, objv[i], + &myobjc, &myobjv); + if (result == TCL_OK) + i++; + else + break; + if (myobjc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-timeout {type to}?"); + result = TCL_ERROR; + break; + } + if (Tcl_GetIndexFromObjStruct(interp, myobjv[0], + &rep_timeout_types[0].name, sizeof(NAMEMAP), + "timeout type", TCL_EXACT, &optindex) != TCL_OK) { + result = TCL_ERROR; + break; + } + totype = (int)rep_timeout_types[optindex].value; + if ((result = Tcl_GetLongFromObj( + interp, myobjv[1], &to)) != TCL_OK) + break; + _debug_check(); + ret = dbenv->rep_set_timeout(dbenv, totype, + (db_timeout_t)to); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "rep_set_timeout"); + break; + } + /* + * If, at any time, parsing the args we get an error, + * bail out and return. + */ + if (result != TCL_OK) + goto error; + } + /* + * Only call repmgr_start if needed. The user may use this + * call just to reconfigure, change policy, etc. + */ + if (start_flag != 0 && result == TCL_OK) { + _debug_check(); + ret = dbenv->repmgr_start(dbenv, (int)msgth, start_flag); + result = _ReturnSetup( + interp, ret, DB_RETOK_REPMGR_START(ret), "repmgr_start"); + } +error: + return (result); +} + +/* + * tcl_RepMgrSiteList -- + * Call DB_ENV->repmgr_site_list(). + * + * PUBLIC: int tcl_RepMgrSiteList + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepMgrSiteList(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; +{ + DB_REPMGR_SITE *sp; + Tcl_Obj *myobjv[5], *res, *thislist; + u_int count, i; + char *pr, *st; + int myobjc, result, ret; + + result = TCL_OK; + + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + + _debug_check(); + ret = dbenv->repmgr_site_list(dbenv, &count, &sp); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "repmgr sitelist"); + if (result == TCL_ERROR) + return (result); + + /* + * Have our sites, now construct the {eid host port status peer} + * tuples and free up the memory. + */ + res = Tcl_NewObj(); + + for (i = 0; i < count; ++i) { + /* + * MAKE_SITE_LIST assumes 'res' and 'error' label. + */ + if (sp[i].status == DB_REPMGR_CONNECTED) + st = "connected"; + else if (sp[i].status == DB_REPMGR_DISCONNECTED) + st = "disconnected"; + else + st = "unknown"; + if (F_ISSET(&sp[i], DB_REPMGR_ISPEER)) + pr = "peer"; + else + pr = "non-peer"; + MAKE_SITE_LIST(sp[i].eid, sp[i].host, sp[i].port, st, pr); + } + + Tcl_SetObjResult(interp, res); +error: + __os_ufree(dbenv->env, sp); + return (result); +} + +/* + * tcl_RepMgrStat -- + * Call DB_ENV->repmgr_stat(). + * + * PUBLIC: int tcl_RepMgrStat + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepMgrStat(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; +{ + DB_REPMGR_STAT *sp; + Tcl_Obj *res; + u_int32_t flag; + int result, ret; + char *arg; + + flag = 0; + result = TCL_OK; + + if (objc > 3) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + if (objc == 3) { + arg = Tcl_GetStringFromObj(objv[2], NULL); + if (strcmp(arg, "-clear") == 0) + flag = DB_STAT_CLEAR; + else { + Tcl_SetResult(interp, + "db stat: unknown arg", TCL_STATIC); + return (TCL_ERROR); + } + } + + _debug_check(); + ret = dbenv->repmgr_stat(dbenv, &sp, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "repmgr stat"); + if (result == TCL_ERROR) + return (result); + + /* + * Have our stats, now construct the name value + * list pairs and free up the memory. + */ + res = Tcl_NewObj(); +#ifdef HAVE_STATISTICS + /* + * MAKE_STAT_* assumes 'res' and 'error' label. + */ + MAKE_WSTAT_LIST("Acknowledgement failures", sp->st_perm_failed); + MAKE_WSTAT_LIST("Messages delayed", sp->st_msgs_queued); + MAKE_WSTAT_LIST("Messages discarded", sp->st_msgs_dropped); + MAKE_WSTAT_LIST("Connections dropped", sp->st_connection_drop); + MAKE_WSTAT_LIST("Failed re-connects", sp->st_connect_fail); + MAKE_WSTAT_LIST("Election threads", sp->st_elect_threads); + MAKE_WSTAT_LIST("Max elect threads", sp->st_max_elect_threads); +#endif + + Tcl_SetObjResult(interp, res); +error: + __os_ufree(dbenv->env, sp); + return (result); +} + +/* + * tcl_RepMgrStatPrint -- + * + * PUBLIC: int tcl_RepMgrStatPrint __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_RepMgrStatPrint(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + static const char *repmgrstatprtopts[] = { + "-all", + "-clear", + NULL + }; + enum repmgrstatprtopts { + REPMGRSTATPRTALL, + REPMGRSTATPRTCLEAR + }; + u_int32_t flag; + int i, optindex, result, ret; + + result = TCL_OK; + flag = 0; + i = 2; + + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], repmgrstatprtopts, + "option", TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum repmgrstatprtopts)optindex) { + case REPMGRSTATPRTALL: + flag |= DB_STAT_ALL; + break; + case REPMGRSTATPRTCLEAR: + flag |= DB_STAT_CLEAR; + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto error; + + _debug_check(); + ret = dbenv->repmgr_stat_print(dbenv, flag); + result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "dbenv repmgr_stat_print"); +error: + return (result); + +} + +/* + * tcl_RepApplied - + * + * PUBLIC: int tcl_RepApplied + * PUBLIC: __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); + */ +int +tcl_RepApplied(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; +{ + static const char *repapplied_option_names[] = { + "-timeout", + NULL + }; + enum envinfo_options { + REPAPPLIEDTIMEOUT + }; + unsigned char *arg; + char msg[MSG_SIZE]; + db_timeout_t timeout; + int i, len, ptr, result, ret; + + if (objc != 3 && objc != 5) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-timeout t? token"); + return (TCL_ERROR); + } + timeout = 0; + i = 2; + if (objc == 5) { + if (Tcl_GetIndexFromObj(interp, objv[i], + repapplied_option_names, "option", TCL_EXACT, &ptr) + != TCL_OK) + return (IS_HELP(objv[i])); + i++; + switch ((enum envinfo_options)ptr) { + case REPAPPLIEDTIMEOUT: + result = _GetUInt32(interp, objv[i++], &timeout); + if (result != TCL_OK) + return (result); + break; + } + } + + arg = Tcl_GetByteArrayFromObj(objv[i], &len); + if (len != DB_TXN_TOKEN_SIZE) { + Tcl_SetErrorCode(interp, "BerkeleyDB", + "Commit token is the wrong size", NULL); + + snprintf(msg, MSG_SIZE, + "Bad commit token size %lu, should be %lu", + (u_long)len, (u_long)DB_TXN_TOKEN_SIZE); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + return (TCL_ERROR); + } + + _debug_check(); + ret = dbenv->txn_applied(dbenv, (DB_TXN_TOKEN*)arg, timeout, 0); + result = _ReturnSetup(interp, ret , DB_RETOK_TXNAPPLIED(ret), + "txn_applied"); + return (result); +} +#endif diff --git a/lang/tcl/tcl_seq.c b/lang/tcl/tcl_seq.c new file mode 100644 index 00000000..faea8d9e --- /dev/null +++ b/lang/tcl/tcl_seq.c @@ -0,0 +1,569 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" +#ifdef HAVE_64BIT_TYPES + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/tcl_db.h" +#include "dbinc_auto/sequence_ext.h" + +/* + * Prototypes for procedures defined later in this file: + */ +static int tcl_SeqClose __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB_SEQUENCE *, DBTCL_INFO *)); +static int tcl_SeqGet __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB_SEQUENCE *)); +static int tcl_SeqRemove __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB_SEQUENCE *, DBTCL_INFO *)); +static int tcl_SeqStat __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB_SEQUENCE *)); +static int tcl_SeqStatPrint __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB_SEQUENCE *)); +static int tcl_SeqGetFlags __P((Tcl_Interp *, + int, Tcl_Obj * CONST*, DB_SEQUENCE *)); + +/* + * + * PUBLIC: int seq_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); + * + * seq_Cmd -- + * Implements the "seq" widget. + */ +int +seq_Cmd(clientData, interp, objc, objv) + ClientData clientData; /* SEQ handle */ + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *seqcmds[] = { + "close", + "get", + "get_cachesize", + "get_db", + "get_flags", + "get_key", + "get_range", + "remove", + "stat", + "stat_print", + NULL + }; + enum seqcmds { + SEQCLOSE, + SEQGET, + SEQGETCACHESIZE, + SEQGETDB, + SEQGETFLAGS, + SEQGETKEY, + SEQGETRANGE, + SEQREMOVE, + SEQSTAT, + SEQSTATPRT + }; + DB *dbp; + DBT key; + DBTCL_INFO *dbip, *ip; + DB_SEQUENCE *seq; + Tcl_Obj *myobjv[2], *res; + db_seq_t min, max; + int cmdindex, ncache, result, ret; + + Tcl_ResetResult(interp); + seq = (DB_SEQUENCE *)clientData; + result = TCL_OK; + dbip = NULL; + if (objc <= 1) { + Tcl_WrongNumArgs(interp, 1, objv, "command cmdargs"); + return (TCL_ERROR); + } + if (seq == NULL) { + Tcl_SetResult(interp, "NULL sequence pointer", TCL_STATIC); + return (TCL_ERROR); + } + + ip = _PtrToInfo((void *)seq); + if (ip == NULL) { + Tcl_SetResult(interp, "NULL info pointer", TCL_STATIC); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the dbcmds + * defined above. + */ + if (Tcl_GetIndexFromObj(interp, + objv[1], seqcmds, "command", TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + + res = NULL; + switch ((enum seqcmds)cmdindex) { + case SEQGETRANGE: + ret = seq->get_range(seq, &min, &max); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "sequence get_range")) == TCL_OK) { + myobjv[0] = Tcl_NewWideIntObj(min); + myobjv[1] = Tcl_NewWideIntObj(max); + res = Tcl_NewListObj(2, myobjv); + } + break; + case SEQCLOSE: + result = tcl_SeqClose(interp, objc, objv, seq, ip); + break; + case SEQREMOVE: + result = tcl_SeqRemove(interp, objc, objv, seq, ip); + break; + case SEQGET: + result = tcl_SeqGet(interp, objc, objv, seq); + break; + case SEQSTAT: + result = tcl_SeqStat(interp, objc, objv, seq); + break; + case SEQSTATPRT: + result = tcl_SeqStatPrint(interp, objc, objv, seq); + break; + case SEQGETCACHESIZE: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = seq->get_cachesize(seq, &ncache); + if ((result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "sequence get_cachesize")) == TCL_OK) + res = Tcl_NewIntObj(ncache); + break; + case SEQGETDB: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = seq->get_db(seq, &dbp); + if (ret == 0 && (dbip = _PtrToInfo((void *)dbp)) == NULL) { + Tcl_SetResult(interp, + "NULL db info pointer", TCL_STATIC); + return (TCL_ERROR); + } + + if ((result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "sequence get_db")) == TCL_OK) + res = NewStringObj(dbip->i_name, strlen(dbip->i_name)); + break; + case SEQGETKEY: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + ret = seq->get_key(seq, &key); + if ((result = _ReturnSetup(interp, ret, + DB_RETOK_STD(ret), "sequence get_key")) == TCL_OK) + res = Tcl_NewByteArrayObj( + (u_char *)key.data, (int)key.size); + break; + case SEQGETFLAGS: + result = tcl_SeqGetFlags(interp, objc, objv, seq); + break; + } + + /* + * Only set result if we have a res. Otherwise, lower functions have + * already done so. + */ + if (result == TCL_OK && res) + Tcl_SetObjResult(interp, res); + return (result); +} + +/* + * tcl_db_stat -- + */ +static int +tcl_SeqStat(interp, objc, objv, seq) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_SEQUENCE *seq; /* Database pointer */ +{ + DB_SEQUENCE_STAT *sp; + u_int32_t flag; + Tcl_Obj *res, *flaglist, *myobjv[2]; + int result, ret; + char *arg; + + result = TCL_OK; + flag = 0; + + if (objc > 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?-clear?"); + return (TCL_ERROR); + } + + if (objc == 3) { + arg = Tcl_GetStringFromObj(objv[2], NULL); + if (strcmp(arg, "-clear") == 0) + flag = DB_STAT_CLEAR; + else { + Tcl_SetResult(interp, + "db stat: unknown arg", TCL_STATIC); + return (TCL_ERROR); + } + } + + _debug_check(); + ret = seq->stat(seq, &sp, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "db stat"); + if (result == TCL_ERROR) + return (result); + + res = Tcl_NewObj(); + MAKE_WSTAT_LIST("Wait", sp->st_wait); + MAKE_WSTAT_LIST("No wait", sp->st_nowait); + MAKE_WSTAT_LIST("Current", sp->st_current); + MAKE_WSTAT_LIST("Cached", sp->st_value); + MAKE_WSTAT_LIST("Max Cached", sp->st_last_value); + MAKE_WSTAT_LIST("Min", sp->st_min); + MAKE_WSTAT_LIST("Max", sp->st_max); + MAKE_STAT_LIST("Cache size", sp->st_cache_size); + /* + * Construct a {name {flag1 flag2 ... flagN}} list for the + * seq flags. + */ + myobjv[0] = NewStringObj("Flags", strlen("Flags")); + myobjv[1] = + _GetFlagsList(interp, sp->st_flags, __db_get_seq_flags_fn()); + flaglist = Tcl_NewListObj(2, myobjv); + if (flaglist == NULL) { + result = TCL_ERROR; + goto error; + } + if ((result = + Tcl_ListObjAppendElement(interp, res, flaglist)) != TCL_OK) + goto error; + + Tcl_SetObjResult(interp, res); + +error: __os_ufree(seq->seq_dbp->env, sp); + return (result); +} + +/* + * tcl_SeqStatPrint -- + */ +static int +tcl_SeqStatPrint(interp, objc, objv, seq) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_SEQUENCE *seq; /* Environment pointer */ +{ + static const char *seqstatprtopts[] = { + "-clear", + NULL + }; + enum seqstatprtopts { + SEQSTATPRTCLEAR + }; + u_int32_t flag; + int i, optindex, result, ret; + + result = TCL_OK; + flag = 0; + i = 2; + + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], seqstatprtopts, + "option", TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum seqstatprtopts)optindex) { + case SEQSTATPRTCLEAR: + flag |= DB_STAT_CLEAR; + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto error; + + _debug_check(); + ret = seq->stat_print(seq, flag); + result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "seq stat_print"); +error: + return (result); + +} + +/* + * tcl_db_close -- + */ +static int +tcl_SeqClose(interp, objc, objv, seq, ip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_SEQUENCE *seq; /* Database pointer */ + DBTCL_INFO *ip; /* Info pointer */ +{ + int result, ret; + + result = TCL_OK; + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, ""); + return (TCL_ERROR); + } + + _DeleteInfo(ip); + _debug_check(); + + ret = seq->close(seq, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "sequence close"); + return (result); +} + +/* + * tcl_SeqGet -- + */ +static int +tcl_SeqGet(interp, objc, objv, seq) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_SEQUENCE *seq; /* Sequence pointer */ +{ + static const char *seqgetopts[] = { + "-nosync", + "-txn", + NULL + }; + enum seqgetopts { + SEQGET_NOSYNC, + SEQGET_TXN + }; + DB_TXN *txn; + Tcl_Obj *res; + db_seq_t value; + u_int32_t aflag, delta; + int i, end, optindex, result, ret; + char *arg, msg[MSG_SIZE]; + + result = TCL_OK; + txn = NULL; + aflag = 0; + + if (objc < 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?-args? delta"); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the options + * defined above. + */ + i = 2; + end = objc; + while (i < end) { + if (Tcl_GetIndexFromObj(interp, objv[i], seqgetopts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') { + result = IS_HELP(objv[i]); + goto out; + } else + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum seqgetopts)optindex) { + case SEQGET_NOSYNC: + aflag |= DB_TXN_NOSYNC; + break; + case SEQGET_TXN: + if (i >= end) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Get: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + } /* switch */ + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto out; + + if (i != objc - 1) { + Tcl_SetResult(interp, + "Wrong number of key/data given\n", TCL_STATIC); + result = TCL_ERROR; + goto out; + } + + if ((result = _GetUInt32(interp, objv[objc - 1], &delta)) != TCL_OK) + goto out; + + ret = seq->get(seq, txn, (int32_t)delta, &value, aflag); + result = _ReturnSetup(interp, ret, DB_RETOK_DBGET(ret), "sequence get"); + if (ret == 0) { + res = Tcl_NewWideIntObj((Tcl_WideInt)value); + Tcl_SetObjResult(interp, res); + } +out: + return (result); +} +/* + */ +static int +tcl_SeqRemove(interp, objc, objv, seq, ip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_SEQUENCE *seq; /* Sequence pointer */ + DBTCL_INFO *ip; /* Info pointer */ +{ + static const char *seqgetopts[] = { + "-nosync", + "-txn", + NULL + }; + enum seqgetopts { + SEQGET_NOSYNC, + SEQGET_TXN + }; + DB_TXN *txn; + u_int32_t aflag; + int i, end, optindex, result, ret; + char *arg, msg[MSG_SIZE]; + + result = TCL_OK; + txn = NULL; + aflag = 0; + + _DeleteInfo(ip); + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 2, objv, "?-args?"); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the options + * defined above. + */ + i = 2; + end = objc; + while (i < end) { + if (Tcl_GetIndexFromObj(interp, objv[i], seqgetopts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + arg = Tcl_GetStringFromObj(objv[i], NULL); + if (arg[0] == '-') { + result = IS_HELP(objv[i]); + goto out; + } else + Tcl_ResetResult(interp); + break; + } + i++; + switch ((enum seqgetopts)optindex) { + case SEQGET_NOSYNC: + aflag |= DB_TXN_NOSYNC; + break; + case SEQGET_TXN: + if (i >= end) { + Tcl_WrongNumArgs(interp, 2, objv, "?-txn id?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + txn = NAME_TO_TXN(arg); + if (txn == NULL) { + snprintf(msg, MSG_SIZE, + "Remove: Invalid txn: %s\n", arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + result = TCL_ERROR; + } + break; + } /* switch */ + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto out; + + ret = seq->remove(seq, txn, aflag); + result = _ReturnSetup(interp, + ret, DB_RETOK_DBGET(ret), "sequence remove"); +out: + return (result); +} + +/* + * tcl_SeqGetFlags -- + */ +static int +tcl_SeqGetFlags(interp, objc, objv, seq) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_SEQUENCE *seq; /* Sequence pointer */ +{ + int i, ret, result; + u_int32_t flags; + char buf[512]; + Tcl_Obj *res; + + static const struct { + u_int32_t flag; + char *arg; + } seq_flags[] = { + { DB_SEQ_INC, "-inc" }, + { DB_SEQ_DEC, "-dec" }, + { DB_SEQ_WRAP, "-wrap" }, + { 0, NULL } + }; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + + ret = seq->get_flags(seq, &flags); + if ((result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "db get_flags")) == TCL_OK) { + buf[0] = '\0'; + + for (i = 0; seq_flags[i].flag != 0; i++) + if (LF_ISSET(seq_flags[i].flag)) { + if (strlen(buf) > 0) + (void)strncat(buf, " ", sizeof(buf)); + (void)strncat( + buf, seq_flags[i].arg, sizeof(buf)); + } + + res = NewStringObj(buf, strlen(buf)); + Tcl_SetObjResult(interp, res); + } + + return (result); +} +#endif /* HAVE_64BIT_TYPES */ diff --git a/lang/tcl/tcl_txn.c b/lang/tcl/tcl_txn.c new file mode 100644 index 00000000..fe842253 --- /dev/null +++ b/lang/tcl/tcl_txn.c @@ -0,0 +1,886 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/tcl_db.h" + +static int tcl_TxnCommit __P((Tcl_Interp *, + int, Tcl_Obj * CONST *, DB_TXN *, DBTCL_INFO *)); +static int txn_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST *)); + +/* + * _TxnInfoDelete -- + * Removes nested txn info structures that are children + * of this txn. + * RECURSIVE: Transactions can be arbitrarily nested, so we + * must recurse down until we get them all. + * + * PUBLIC: void _TxnInfoDelete __P((Tcl_Interp *, DBTCL_INFO *)); + */ +void +_TxnInfoDelete(interp, txnip) + Tcl_Interp *interp; /* Interpreter */ + DBTCL_INFO *txnip; /* Info for txn */ +{ + DBTCL_INFO *nextp, *p; + + for (p = LIST_FIRST(&__db_infohead); p != NULL; p = nextp) { + /* + * Check if this info structure "belongs" to this + * txn. Remove its commands and info structure. + */ + nextp = LIST_NEXT(p, entries); + if (p->i_parent == txnip && p->i_type == I_TXN) { + _TxnInfoDelete(interp, p); + (void)Tcl_DeleteCommand(interp, p->i_name); + _DeleteInfo(p); + } + } +} + +/* + * tcl_TxnCheckpoint -- + * + * PUBLIC: int tcl_TxnCheckpoint __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_TxnCheckpoint(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + static const char *txnckpopts[] = { + "-force", + "-kbyte", + "-min", + NULL + }; + enum txnckpopts { + TXNCKP_FORCE, + TXNCKP_KB, + TXNCKP_MIN + }; + u_int32_t flags; + int i, kb, min, optindex, result, ret; + + result = TCL_OK; + flags = 0; + kb = min = 0; + + /* + * Get the flag index from the object based on the options + * defined above. + */ + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], + txnckpopts, "option", TCL_EXACT, &optindex) != TCL_OK) { + return (IS_HELP(objv[i])); + } + i++; + switch ((enum txnckpopts)optindex) { + case TXNCKP_FORCE: + flags = DB_FORCE; + break; + case TXNCKP_KB: + if (i == objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-kbyte kb?"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, objv[i++], &kb); + break; + case TXNCKP_MIN: + if (i == objc) { + Tcl_WrongNumArgs(interp, 2, objv, "?-min min?"); + result = TCL_ERROR; + break; + } + result = Tcl_GetIntFromObj(interp, objv[i++], &min); + break; + } + } + _debug_check(); + ret = dbenv->txn_checkpoint(dbenv, (u_int32_t)kb, (u_int32_t)min, + flags); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "txn checkpoint"); + return (result); +} + +/* + * tcl_Txn -- + * + * PUBLIC: int tcl_Txn __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *)); + */ +int +tcl_Txn(interp, objc, objv, dbenv, envip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ + DBTCL_INFO *envip; /* Info pointer */ +{ + static const char *txnopts[] = { +#ifdef CONFIG_TEST + "-lock_timeout", + "-read_committed", + "-read_uncommitted", + "-token", + "-txn_timeout", + "-txn_wait", + "-txn_bulk", +#endif + "-nosync", + "-nowait", + "-parent", + "-snapshot", + "-sync", + "-wrnosync", + NULL + }; + enum txnopts { +#ifdef CONFIG_TEST + TXNLOCK_TIMEOUT, + TXNREAD_COMMITTED, + TXNREAD_UNCOMMITTED, + TXNTOKEN, + TXNTIMEOUT, + TXNWAIT, + TXNBULK, +#endif + TXNNOSYNC, + TXNNOWAIT, + TXNPARENT, + TXNSNAPSHOT, + TXNSYNC, + TXNWRNOSYNC + }; + DBTCL_INFO *ip; + DB_TXN *parent; + DB_TXN *txn; + Tcl_Obj *res; + u_int32_t flag; + int i, optindex, result, ret; + char *arg, *call, msg[MSG_SIZE], newname[MSG_SIZE]; +#ifdef CONFIG_TEST + db_timeout_t lk_time, tx_time; + u_int32_t lk_timeflag, tx_timeflag; + int use_token_buffer; +#endif + + result = TCL_OK; + memset(newname, 0, MSG_SIZE); + + txn = parent = NULL; + call = ""; + flag = 0; +#ifdef CONFIG_TEST + COMPQUIET(tx_time, 0); + COMPQUIET(lk_time, 0); + lk_timeflag = tx_timeflag = 0; + use_token_buffer = 0; +#endif + i = 2; + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], + txnopts, "option", TCL_EXACT, &optindex) != TCL_OK) { + return (IS_HELP(objv[i])); + } + i++; + switch ((enum txnopts)optindex) { +#ifdef CONFIG_TEST + case TXNLOCK_TIMEOUT: + lk_timeflag = DB_SET_LOCK_TIMEOUT; + goto get_timeout; + case TXNTIMEOUT: + tx_timeflag = DB_SET_TXN_TIMEOUT; +get_timeout: if (i >= objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-txn_timestamp time?"); + return (TCL_ERROR); + } + result = Tcl_GetLongFromObj(interp, objv[i++], (long *) + ((enum txnopts)optindex == TXNLOCK_TIMEOUT ? + &lk_time : &tx_time)); + if (result != TCL_OK) + return (TCL_ERROR); + break; + case TXNREAD_COMMITTED: + flag |= DB_READ_COMMITTED; + break; + case TXNREAD_UNCOMMITTED: + flag |= DB_READ_UNCOMMITTED; + break; + case TXNTOKEN: + use_token_buffer = 1; + break; + case TXNWAIT: + flag |= DB_TXN_WAIT; + break; + case TXNBULK: + flag |= DB_TXN_BULK; + break; +#endif + case TXNNOSYNC: + flag |= DB_TXN_NOSYNC; + break; + case TXNNOWAIT: + flag |= DB_TXN_NOWAIT; + break; + case TXNPARENT: + if (i == objc) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-parent txn?"); + result = TCL_ERROR; + break; + } + arg = Tcl_GetStringFromObj(objv[i++], NULL); + parent = NAME_TO_TXN(arg); + if (parent == NULL) { + snprintf(msg, MSG_SIZE, + "Invalid parent txn: %s\n", + arg); + Tcl_SetResult(interp, msg, TCL_VOLATILE); + return (TCL_ERROR); + } + break; + case TXNSNAPSHOT: + flag |= DB_TXN_SNAPSHOT; + break; + case TXNSYNC: + flag |= DB_TXN_SYNC; + break; + case TXNWRNOSYNC: + flag |= DB_TXN_WRITE_NOSYNC; + break; + } + } + snprintf(newname, sizeof(newname), "%s.txn%d", + envip->i_name, envip->i_envtxnid); + if ((ip = _NewInfo(interp, NULL, newname, I_TXN)) == NULL) + return (TCL_ERROR); + _debug_check(); + if ((ret = dbenv->txn_begin(dbenv, parent, &txn, flag)) != 0) { + call = "txn"; + goto err; + } + +#ifdef CONFIG_TEST + if (tx_timeflag != 0 && + (ret = txn->set_timeout(txn, tx_time, tx_timeflag)) != 0) { + call = "set_timeout(DB_SET_TXN_TIMEOUT)"; + goto err; + } + if (lk_timeflag != 0 && + (ret = txn->set_timeout(txn, lk_time, lk_timeflag)) != 0) { + call = "set_timeout(DB_SET_LOCK_TIMEOUT)"; + goto err; + } + if (use_token_buffer && + ((ret = __os_calloc(dbenv->env, 1, + DB_TXN_TOKEN_SIZE, &ip->i_commit_token)) != 0 || + (ret = txn->set_commit_token(txn, ip->i_commit_token)) != 0)) { + /* (_DeleteInfo() frees i_commit_token if necessary.) */ + call = "set_commit_token"; + goto err; + } +#endif + + /* + * Success. Set up return. Set up new info + * and command widget for this txn. + */ + envip->i_envtxnid++; + if (parent) + ip->i_parent = _PtrToInfo(parent); + else + ip->i_parent = envip; + _SetInfoData(ip, txn); + (void)Tcl_CreateObjCommand(interp, newname, + (Tcl_ObjCmdProc *)txn_Cmd, (ClientData)txn, NULL); + res = NewStringObj(newname, strlen(newname)); + Tcl_SetObjResult(interp, res); + return (TCL_OK); + +err: + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), call); + if (txn != NULL) + (void)txn->abort(txn); + _DeleteInfo(ip); + return (result); +} + +/* + * tcl_CDSGroup -- + * + * PUBLIC: int tcl_CDSGroup __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *)); + */ +int +tcl_CDSGroup(interp, objc, objv, dbenv, envip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ + DBTCL_INFO *envip; /* Info pointer */ +{ + DBTCL_INFO *ip; + DB_TXN *txn; + Tcl_Obj *res; + int result, ret; + char newname[MSG_SIZE]; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "env cdsgroup"); + return (TCL_ERROR); + } + + result = TCL_OK; + memset(newname, 0, MSG_SIZE); + + snprintf(newname, sizeof(newname), "%s.txn%d", + envip->i_name, envip->i_envtxnid); + ip = _NewInfo(interp, NULL, newname, I_TXN); + if (ip == NULL) { + Tcl_SetResult(interp, "Could not set up info", + TCL_STATIC); + return (TCL_ERROR); + } + _debug_check(); + ret = dbenv->cdsgroup_begin(dbenv, &txn); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "cdsgroup"); + if (result == TCL_ERROR) + _DeleteInfo(ip); + else { + /* + * Success. Set up return. Set up new info + * and command widget for this txn. + */ + envip->i_envtxnid++; + ip->i_parent = envip; + _SetInfoData(ip, txn); + (void)Tcl_CreateObjCommand(interp, newname, + (Tcl_ObjCmdProc *)txn_Cmd, (ClientData)txn, NULL); + res = NewStringObj(newname, strlen(newname)); + Tcl_SetObjResult(interp, res); + } + return (result); +} + +/* + * tcl_TxnStat -- + * + * PUBLIC: int tcl_TxnStat __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_TxnStat(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + DBTCL_INFO *ip; + DB_TXN_ACTIVE *p; + DB_TXN_STAT *sp; + Tcl_Obj *myobjv[2], *res, *thislist, *lsnlist; + u_int32_t i; + int myobjc, result, ret; + + result = TCL_OK; + /* + * No args for this. Error if there are some. + */ + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbenv->txn_stat(dbenv, &sp, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "txn stat"); + if (result == TCL_ERROR) + return (result); + + /* + * Have our stats, now construct the name value + * list pairs and free up the memory. + */ + res = Tcl_NewObj(); + /* + * MAKE_STAT_LIST assumes 'res' and 'error' label. + */ +#ifdef HAVE_STATISTICS + MAKE_STAT_LIST("Region size", sp->st_regsize); + MAKE_STAT_LSN("LSN of last checkpoint", &sp->st_last_ckp); + MAKE_STAT_LIST("Time of last checkpoint", sp->st_time_ckp); + MAKE_STAT_LIST("Last txn ID allocated", sp->st_last_txnid); + MAKE_STAT_LIST("Maximum txns", sp->st_maxtxns); + MAKE_STAT_LIST("Initial txns", sp->st_inittxns); + MAKE_WSTAT_LIST("Number aborted txns", sp->st_naborts); + MAKE_WSTAT_LIST("Number txns begun", sp->st_nbegins); + MAKE_WSTAT_LIST("Number committed txns", sp->st_ncommits); + MAKE_STAT_LIST("Number active txns", sp->st_nactive); + MAKE_STAT_LIST("Number of snapshot txns", sp->st_nsnapshot); + MAKE_STAT_LIST("Number restored txns", sp->st_nrestores); + MAKE_STAT_LIST("Maximum active txns", sp->st_maxnactive); + MAKE_STAT_LIST("Maximum snapshot txns", sp->st_maxnsnapshot); + MAKE_WSTAT_LIST("Number of region lock waits", sp->st_region_wait); + MAKE_WSTAT_LIST("Number of region lock nowaits", sp->st_region_nowait); + for (i = 0, p = sp->st_txnarray; i < sp->st_nactive; i++, p++) + LIST_FOREACH(ip, &__db_infohead, entries) { + if (ip->i_type != I_TXN) + continue; + if (ip->i_type == I_TXN && + (ip->i_txnp->id(ip->i_txnp) == p->txnid)) { + MAKE_STAT_LSN(ip->i_name, &p->lsn); + if (p->parentid != 0) + MAKE_STAT_STRLIST("Parent", + ip->i_parent->i_name); + else + MAKE_STAT_LIST("Parent", 0); + break; + } + } +#endif + Tcl_SetObjResult(interp, res); +error: + __os_ufree(dbenv->env, sp); + return (result); +} + +/* + * tcl_TxnStatPrint -- + * + * PUBLIC: int tcl_TxnStatPrint __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_TxnStatPrint(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + static const char *txnprtopts[] = { + "-all", + "-clear", + NULL + }; + enum txnprtopts { + TXNPRTALL, + TXNPRTCLEAR + }; + u_int32_t flag; + int i, optindex, result, ret; + + result = TCL_OK; + flag = 0; + i = 2; + + while (i < objc) { + if (Tcl_GetIndexFromObj(interp, objv[i], txnprtopts, "option", + TCL_EXACT, &optindex) != TCL_OK) { + result = IS_HELP(objv[i]); + goto error; + } + i++; + switch ((enum txnprtopts)optindex) { + case TXNPRTALL: + flag |= DB_STAT_ALL; + break; + case TXNPRTCLEAR: + flag |= DB_STAT_CLEAR; + break; + } + if (result != TCL_OK) + break; + } + if (result != TCL_OK) + goto error; + + _debug_check(); + ret = dbenv->txn_stat_print(dbenv, flag); + result = _ReturnSetup(interp, + ret, DB_RETOK_STD(ret), "dbenv txn_stat_print"); +error: + return (result); + +} + +/* + * tcl_TxnTimeout -- + * + * PUBLIC: int tcl_TxnTimeout __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *)); + */ +int +tcl_TxnTimeout(interp, objc, objv, dbenv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ +{ + long timeout; + int result, ret; + + /* + * One arg, the timeout. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?timeout?"); + return (TCL_ERROR); + } + result = Tcl_GetLongFromObj(interp, objv[2], &timeout); + if (result != TCL_OK) + return (result); + _debug_check(); + ret = dbenv->set_timeout(dbenv, (u_int32_t)timeout, DB_SET_TXN_TIMEOUT); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "lock timeout"); + return (result); +} + +/* + * txn_Cmd -- + * Implements the "txn" widget. + */ +static int +txn_Cmd(clientData, interp, objc, objv) + ClientData clientData; /* Txn handle */ + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *txncmds[] = { +#ifdef CONFIG_TEST + "discard", + "getname", + "id", + "prepare", + "setname", + "set_timeout", +#endif + "abort", + "commit", + NULL + }; + enum txncmds { +#ifdef CONFIG_TEST + TXNDISCARD, + TXNGETNAME, + TXNID, + TXNPREPARE, + TXNSETNAME, + TXNSETTIMEOUT, +#endif + TXNABORT, + TXNCOMMIT + }; + DBTCL_INFO *txnip; + DB_TXN *txnp; + Tcl_Obj *res; + int cmdindex, result, ret; +#ifdef CONFIG_TEST + u_int8_t *gid, garray[DB_GID_SIZE]; + int length; + const char *name; + u_int32_t timeout; +#endif + + Tcl_ResetResult(interp); + txnp = (DB_TXN *)clientData; + txnip = _PtrToInfo((void *)txnp); + result = TCL_OK; + if (txnp == NULL) { + Tcl_SetResult(interp, "NULL txn pointer", TCL_STATIC); + return (TCL_ERROR); + } + if (txnip == NULL) { + Tcl_SetResult(interp, "NULL txn info pointer", TCL_STATIC); + return (TCL_ERROR); + } + + /* + * Get the command name index from the object based on the dbcmds + * defined above. + */ + if (Tcl_GetIndexFromObj(interp, + objv[1], txncmds, "command", TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + + res = NULL; + switch ((enum txncmds)cmdindex) { +#ifdef CONFIG_TEST + case TXNDISCARD: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = txnp->discard(txnp, 0); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "txn discard"); + _TxnInfoDelete(interp, txnip); + (void)Tcl_DeleteCommand(interp, txnip->i_name); + _DeleteInfo(txnip); + break; + case TXNID: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + res = Tcl_NewIntObj((int)txnp->id(txnp)); + break; + case TXNPREPARE: + if (objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + gid = (u_int8_t *)Tcl_GetByteArrayFromObj(objv[2], &length); + memcpy(garray, gid, (size_t)length); + ret = txnp->prepare(txnp, garray); + /* + * !!! + * DB_TXN->prepare commits all outstanding children. But it + * does NOT destroy the current txn handle. So, we must call + * _TxnInfoDelete to recursively remove all nested txn handles, + * we do not call _DeleteInfo on ourselves. + */ + _TxnInfoDelete(interp, txnip); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "txn prepare"); + break; + case TXNGETNAME: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + name = NULL; + ret = txnp->get_name(txnp, &name); + if ((result = _ReturnSetup( + interp, ret, DB_RETOK_STD(ret), "txn getname")) == TCL_OK) { + if(name != NULL) { + res = NewStringObj(name, strlen(name)); + } else { + res = NewStringObj("", 0); + } + } + break; + case TXNSETNAME: + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "name"); + return (TCL_ERROR); + } + _debug_check(); + ret = txnp->set_name(txnp, Tcl_GetStringFromObj(objv[2], NULL)); + result = + _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "setname"); + break; + case TXNSETTIMEOUT: + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "timeout"); + return (TCL_ERROR); + } + _debug_check(); + timeout = 0; + ret = _GetUInt32(interp, objv[2], &timeout); + if (ret != TCL_OK) + return (TCL_ERROR); + ret = txnp->set_timeout(txnp, (db_timeout_t)timeout, + DB_SET_TXN_TIMEOUT); + result = + _ReturnSetup(interp, ret, DB_RETOK_STD(ret), "set_timeout"); + break; +#endif + case TXNABORT: + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = txnp->abort(txnp); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "txn abort"); + _TxnInfoDelete(interp, txnip); + (void)Tcl_DeleteCommand(interp, txnip->i_name); + _DeleteInfo(txnip); + break; + case TXNCOMMIT: + result = tcl_TxnCommit(interp, objc, objv, txnp, txnip); + _TxnInfoDelete(interp, txnip); + (void)Tcl_DeleteCommand(interp, txnip->i_name); + _DeleteInfo(txnip); + break; + } + /* + * Only set result if we have a res. Otherwise, lower + * functions have already done so. + */ + if (result == TCL_OK && res) + Tcl_SetObjResult(interp, res); + return (result); +} + +static int +tcl_TxnCommit(interp, objc, objv, txnp, txnip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_TXN *txnp; /* Transaction pointer */ + DBTCL_INFO *txnip; /* Info pointer */ +{ + static const char *commitopt[] = { + "-nosync", + "-sync", + "-wrnosync", + NULL + }; + enum commitopt { + COMNOSYNC, + COMSYNC, + COMWRNOSYNC + }; + Tcl_Obj *res; + void *p; + u_int32_t flag; + int optindex, result, ret; + +#ifndef CONFIG_TEST + COMPQUIET(txnip, NULL); +#endif + + result = TCL_OK; + flag = 0; + if (objc != 2 && objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return (TCL_ERROR); + } + if (objc == 3) { + if (Tcl_GetIndexFromObj(interp, objv[2], commitopt, + "option", TCL_EXACT, &optindex) != TCL_OK) + return (IS_HELP(objv[2])); + switch ((enum commitopt)optindex) { + case COMSYNC: + flag = DB_TXN_SYNC; + break; + case COMNOSYNC: + flag = DB_TXN_NOSYNC; + break; + case COMWRNOSYNC: + flag = DB_TXN_WRITE_NOSYNC; + break; + } + } + + _debug_check(); + ret = txnp->commit(txnp, flag); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "txn commit"); +#ifdef CONFIG_TEST + if (result == TCL_OK && txnip->i_commit_token != NULL) { + p = txnip->i_commit_token; + res = Tcl_NewByteArrayObj(p, DB_TXN_TOKEN_SIZE); + Tcl_SetObjResult(interp, res); + } +#endif + return (result); +} + +#ifdef CONFIG_TEST +/* + * tcl_TxnRecover -- + * + * PUBLIC: int tcl_TxnRecover __P((Tcl_Interp *, int, + * PUBLIC: Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *)); + */ +int +tcl_TxnRecover(interp, objc, objv, dbenv, envip) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ + DB_ENV *dbenv; /* Environment pointer */ + DBTCL_INFO *envip; /* Info pointer */ +{ +#define DO_PREPLIST(count) \ +for (i = 0; i < count; i++) { \ + snprintf(newname, sizeof(newname), "%s.txn%d", \ + envip->i_name, envip->i_envtxnid); \ + ip = _NewInfo(interp, NULL, newname, I_TXN); \ + if (ip == NULL) { \ + Tcl_SetResult(interp, "Could not set up info", \ + TCL_STATIC); \ + return (TCL_ERROR); \ + } \ + envip->i_envtxnid++; \ + ip->i_parent = envip; \ + p = &prep[i]; \ + _SetInfoData(ip, p->txn); \ + (void)Tcl_CreateObjCommand(interp, newname, \ + (Tcl_ObjCmdProc *)txn_Cmd, (ClientData)p->txn, NULL); \ + result = _SetListElem(interp, res, newname, \ + (u_int32_t)strlen(newname), p->gid, DB_GID_SIZE); \ + if (result != TCL_OK) \ + goto error; \ +} + + DBTCL_INFO *ip; + DB_PREPLIST prep[DBTCL_PREP], *p; + Tcl_Obj *res; + int result, ret; + long count, i; + char newname[MSG_SIZE]; + + result = TCL_OK; + /* + * No args for this. Error if there are some. + */ + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } + _debug_check(); + ret = dbenv->txn_recover(dbenv, prep, DBTCL_PREP, &count, DB_FIRST); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "txn recover"); + if (result == TCL_ERROR) + return (result); + res = Tcl_NewObj(); + DO_PREPLIST(count); + + /* + * If count returned is the maximum size we have, then there + * might be more. Keep going until we get them all. + */ + while (count == DBTCL_PREP) { + ret = dbenv->txn_recover( + dbenv, prep, DBTCL_PREP, &count, DB_NEXT); + result = _ReturnSetup(interp, ret, DB_RETOK_STD(ret), + "txn recover"); + if (result == TCL_ERROR) + return (result); + DO_PREPLIST(count); + } + Tcl_SetObjResult(interp, res); +error: + return (result); +} +#endif diff --git a/lang/tcl/tcl_util.c b/lang/tcl/tcl_util.c new file mode 100644 index 00000000..9560f596 --- /dev/null +++ b/lang/tcl/tcl_util.c @@ -0,0 +1,152 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#endif +#include "dbinc/tcl_db.h" + +/* + * bdb_RandCommand -- + * Implements rand* functions. + * + * PUBLIC: int bdb_RandCommand __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); + */ +int +bdb_RandCommand(interp, objc, objv) + Tcl_Interp *interp; /* Interpreter */ + int objc; /* How many arguments? */ + Tcl_Obj *CONST objv[]; /* The argument objects */ +{ + static const char *rcmds[] = { + "rand", "random_int", "srand", + NULL + }; + enum rcmds { + RRAND, RRAND_INT, RSRAND + }; + Tcl_Obj *res; + int cmdindex, hi, lo, result, ret; + + result = TCL_OK; + /* + * Get the command name index from the object based on the cmds + * defined above. This SHOULD NOT fail because we already checked + * in the 'berkdb' command. + */ + if (Tcl_GetIndexFromObj(interp, + objv[1], rcmds, "command", TCL_EXACT, &cmdindex) != TCL_OK) + return (IS_HELP(objv[1])); + + res = NULL; + switch ((enum rcmds)cmdindex) { + case RRAND: + /* + * Must be 0 args. Error if different. + */ + if (objc != 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return (TCL_ERROR); + } +#ifdef HAVE_RANDOM + ret = random(); +#else + ret = rand(); +#endif + res = Tcl_NewIntObj(ret); + break; + case RRAND_INT: + /* + * Must be 4 args. Error if different. + */ + if (objc != 4) { + Tcl_WrongNumArgs(interp, 2, objv, "lo hi"); + return (TCL_ERROR); + } + if ((result = + Tcl_GetIntFromObj(interp, objv[2], &lo)) != TCL_OK) + return (result); + if ((result = + Tcl_GetIntFromObj(interp, objv[3], &hi)) != TCL_OK) + return (result); + if (lo < 0 || hi < 0) { + Tcl_SetResult(interp, + "Range value less than 0", TCL_STATIC); + return (TCL_ERROR); + } + + _debug_check(); +#ifdef HAVE_RANDOM + ret = lo + random() % ((hi - lo) + 1); +#else + ret = lo + rand() % ((hi - lo) + 1); +#endif + res = Tcl_NewIntObj(ret); + break; + case RSRAND: + /* + * Must be 1 arg. Error if different. + */ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "seed"); + return (TCL_ERROR); + } + if ((result = + Tcl_GetIntFromObj(interp, objv[2], &lo)) == TCL_OK) { +#ifdef HAVE_RANDOM + srandom((u_int)lo); +#else + srand((u_int)lo); +#endif + res = Tcl_NewIntObj(0); + } + break; + } + + /* + * Only set result if we have a res. Otherwise, lower functions have + * already done so. + */ + if (result == TCL_OK && res) + Tcl_SetObjResult(interp, res); + return (result); +} + +/* + * PUBLIC: int tcl_LockMutex __P((DB_ENV *, db_mutex_t)); + */ +int +tcl_LockMutex(dbenv, mutex) + DB_ENV *dbenv; + db_mutex_t mutex; +{ + /* + * Why such a seemingly ridiculously trivial function? MUTEX_LOCK can't + * be invoked in a void function. The behavior of the macro could be + * unwrapped and duplicated in line; but by the time you account for + * HAVE_MUTEX_SUPPORT, checking for MUTEX_INVALID, etc., you've created + * a maintenance burden, and it's just not worth it. + */ + MUTEX_LOCK(dbenv->env, mutex); + return (0); +} + +/* + * PUBLIC: int tcl_UnlockMutex __P((DB_ENV *, db_mutex_t)); + */ +int +tcl_UnlockMutex(dbenv, mutex) + DB_ENV *dbenv; + db_mutex_t mutex; +{ + MUTEX_UNLOCK(dbenv->env, mutex); + return (0); +} diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c new file mode 100644 index 00000000..4a2843cb --- /dev/null +++ b/src/btree/bt_compact.c @@ -0,0 +1,2644 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __bam_compact_dups __P((DBC *, + PAGE **, u_int32_t, int, DB_COMPACT *, int *)); +static int __bam_compact_isdone __P((DBC *, DBT *, PAGE *, int *)); +static int __bam_csearch __P((DBC *, DBT *, u_int32_t, int)); +static int __bam_lock_tree __P((DBC *, EPG *, EPG *csp, u_int32_t, u_int32_t)); +static int __bam_lock_subtree __P((DBC *, PAGE *, u_int32_t, u_int32_t)); +static int __bam_merge __P((DBC *, + DBC *, u_int32_t, DBT *, DB_COMPACT *,int *)); +static int __bam_merge_internal __P((DBC *, DBC *, int, DB_COMPACT *, int *)); +static int __bam_merge_pages __P((DBC *, DBC *, DB_COMPACT *)); +static int __bam_merge_records __P((DBC *, DBC*, u_int32_t, DB_COMPACT *)); +static int __bam_truncate_internal_overflow __P((DBC *, PAGE *, DB_COMPACT *)); +static int __bam_truncate_root_page __P((DBC *, + PAGE *, u_int32_t, DB_COMPACT *)); + +#ifdef HAVE_FTRUNCATE +static int __bam_savekey __P((DBC *, int, DBT *)); +#endif + +/* + * __bam_csearch -- isolate search code for bam_compact. + * This routine hides the differences between searching + * a BTREE and a RECNO from the rest of the code. + */ +#define CS_READ 0 /* We are just reading. */ +#define CS_PARENT 1 /* We want the parent too, write lock. */ +#define CS_NEXT 2 /* Get the next page. */ +#define CS_NEXT_WRITE 3 /* Get the next page and write lock. */ +#define CS_DEL 4 /* Get a stack to delete a page. */ +#define CS_START 5 /* Starting level for stack, write lock. */ +#define CS_NEXT_BOTH 6 /* Get this page and the next, write lock. */ +#define CS_GETRECNO 0x80 /* Extract record number from start. */ + +static int +__bam_csearch(dbc, start, sflag, level) + DBC *dbc; + DBT *start; + u_int32_t sflag; + int level; +{ + BTREE_CURSOR *cp; + int not_used, ret; + + cp = (BTREE_CURSOR *)dbc->internal; + + if (dbc->dbtype == DB_RECNO) { + /* If GETRECNO is not set the cp->recno is what we want. */ + if (FLD_ISSET(sflag, CS_GETRECNO)) { + if (start == NULL || start->size == 0) + cp->recno = 1; + else if ((ret = + __ram_getno(dbc, start, &cp->recno, 0)) != 0) + return (ret); + FLD_CLR(sflag, CS_GETRECNO); + } + switch (sflag) { + case CS_READ: + sflag = SR_READ; + break; + case CS_NEXT: + sflag = SR_PARENT | SR_READ; + break; + case CS_START: + level = LEAFLEVEL; + /* FALLTHROUGH */ + case CS_DEL: + case CS_NEXT_WRITE: + sflag = SR_STACK; + break; + case CS_NEXT_BOTH: + sflag = SR_BOTH | SR_NEXT | SR_WRITE; + break; + case CS_PARENT: + sflag = SR_PARENT | SR_WRITE; + break; + default: + return (__env_panic(dbc->env, EINVAL)); + } + if ((ret = __bam_rsearch(dbc, + &cp->recno, sflag, level, ¬_used)) != 0) + return (ret); + /* Reset the cursor's recno to the beginning of the page. */ + cp->recno -= cp->csp->indx; + } else { + FLD_CLR(sflag, CS_GETRECNO); + switch (sflag) { + case CS_READ: + sflag = SR_READ | SR_DUPFIRST; + break; + case CS_DEL: + sflag = SR_DEL; + break; + case CS_NEXT: + sflag = SR_NEXT; + break; + case CS_NEXT_WRITE: + sflag = SR_NEXT | SR_WRITE; + break; + case CS_NEXT_BOTH: + sflag = SR_BOTH | SR_NEXT | SR_WRITE; + break; + case CS_START: + sflag = SR_START | SR_WRITE; + break; + case CS_PARENT: + sflag = SR_PARENT | SR_WRITE; + break; + default: + return (__env_panic(dbc->env, EINVAL)); + } + if (start == NULL || start->size == 0) + FLD_SET(sflag, SR_MIN); + + if ((ret = __bam_search(dbc, + PGNO_INVALID, start, sflag, level, NULL, ¬_used)) != 0) + return (ret); + } + + return (0); +} + +/* + * __bam_compact_int -- internal compaction routine. + * Called either with a cursor on the main database + * or a cursor initialized to the root of an off page duplicate + * tree. + * PUBLIC: int __bam_compact_int __P((DBC *, + * PUBLIC: DBT *, DBT *, u_int32_t, int *, DB_COMPACT *, int *)); + */ +int +__bam_compact_int(dbc, start, stop, factor, spanp, c_data, donep) + DBC *dbc; + DBT *start, *stop; + u_int32_t factor; + int *spanp; + DB_COMPACT *c_data; + int *donep; +{ + BTREE_CURSOR *cp, *ncp; + DB *dbp; + DBC *ndbc; + DB_LOCK metalock, next_lock, nnext_lock, prev_lock, saved_lock; + DB_MPOOLFILE *dbmp; + ENV *env; + EPG *epg; + PAGE *pg, *ppg, *npg; + db_pgno_t metapgno, npgno, nnext_pgno; + db_pgno_t pgno, prev_pgno, ppgno, saved_pgno; + db_recno_t next_recno; + u_int32_t nentry, sflag, pgs_free; + int check_dups, check_trunc, clear_root, do_commit, isdone; + int merged, next_p, pgs_done, ret, t_ret, tdone; + +#ifdef DEBUG +#define CTRACE(dbc, location, t, start, f) do { \ + DBT __trace; \ + DB_SET_DBT(__trace, t, strlen(t)); \ + DEBUG_LWRITE( \ + dbc, (dbc)->txn, location, &__trace, start, f) \ + } while (0) +#define PTRACE(dbc, location, p, start, f) do { \ + char __buf[32]; \ + (void)snprintf(__buf, \ + sizeof(__buf), "pgno: %lu", (u_long)p); \ + CTRACE(dbc, location, __buf, start, f); \ + } while (0) +#else +#define CTRACE(dbc, location, t, start, f) +#define PTRACE(dbc, location, p, start, f) +#endif + + ndbc = NULL; + pg = NULL; + npg = NULL; + + isdone = 0; + tdone = 0; + pgs_done = 0; + do_commit = 0; + next_recno = 0; + next_p = 0; + clear_root = 0; + metapgno = PGNO_BASE_MD; + ppgno = PGNO_INVALID; + LOCK_INIT(next_lock); + LOCK_INIT(nnext_lock); + LOCK_INIT(saved_lock); + LOCK_INIT(metalock); + LOCK_INIT(prev_lock); + check_trunc = c_data->compact_truncate != PGNO_INVALID; + check_dups = (!F_ISSET(dbc, DBC_OPD) && + F_ISSET(dbc->dbp, DB_AM_DUP)) || check_trunc; + + dbp = dbc->dbp; + env = dbp->env; + dbmp = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + pgs_free = c_data->compact_pages_free; + + /* Search down the tree for the starting point. */ + if ((ret = __bam_csearch(dbc, + start, CS_READ | CS_GETRECNO, LEAFLEVEL)) != 0) { + /* Its not an error to compact an empty db. */ + if (ret == DB_NOTFOUND) + ret = 0; + isdone = 1; + goto err; + } + + /* + * Get the first leaf page. The loop below will change pg so + * we clear the stack reference so we don't put a a page twice. + */ + pg = cp->csp->page; + cp->csp->page = NULL; + next_recno = cp->recno; +next: /* + * This is the start of the main compaction loop. There are 3 + * parts to the process: + * 1) Walk the leaf pages of the tree looking for a page to + * process. We do this with read locks. Save the + * key from the page and release it. + * 2) Set up a cursor stack which will write lock the page + * and enough of its ancestors to get the job done. + * This could go to the root if we might delete a subtree + * or we have record numbers to update. + * 3) Loop fetching pages after the above page and move enough + * data to fill it. + * We exit the loop if we are at the end of the leaf pages, are + * about to lock a new subtree (we span) or on error. + */ + + /* Walk the pages looking for something to fill up. */ + while ((npgno = NEXT_PGNO(pg)) != PGNO_INVALID) { + c_data->compact_pages_examine++; + PTRACE(dbc, "Next", PGNO(pg), start, 0); + + /* If we have fetched the next page, get the new key. */ + if (next_p == 1 && + dbc->dbtype != DB_RECNO && NUM_ENT(pg) != 0) { + if ((ret = __db_ret(dbc, pg, 0, start, + &start->data, &start->ulen)) != 0) + goto err; + } + next_recno += NUM_ENT(pg); + if (P_FREESPACE(dbp, pg) > factor || + (check_trunc && PGNO(pg) > c_data->compact_truncate)) + break; + if (stop != NULL && stop->size > 0) { + if ((ret = __bam_compact_isdone(dbc, + stop, pg, &isdone)) != 0) + goto err; + if (isdone) + goto done; + } + + /* + * The page does not need more data or to be swapped, + * check to see if we want to look at possible duplicate + * trees or overflow records and the move on to the next page. + */ + cp->recno += NUM_ENT(pg); + next_p = 1; + tdone = pgs_done; + PTRACE(dbc, "Dups", PGNO(pg), start, 0); + if (check_dups && (ret = __bam_compact_dups( + dbc, &pg, factor, 0, c_data, &pgs_done)) != 0) + goto err; + npgno = NEXT_PGNO(pg); + if ((ret = __memp_fput(dbmp, + dbc->thread_info, pg, dbc->priority)) != 0) + goto err; + pg = NULL; + /* + * If we don't do anything we don't need to hold + * the lock on the previous page, so couple always. + */ + if ((ret = __db_lget(dbc, + tdone == pgs_done ? LCK_COUPLE_ALWAYS : LCK_COUPLE, + npgno, DB_LOCK_READ, 0, &cp->csp->lock)) != 0) + goto err; + if ((ret = __memp_fget(dbmp, &npgno, + dbc->thread_info, dbc->txn, 0, &pg)) != 0) + goto err; + } + + /* + * When we get here we have 3 cases: + * 1) We've reached the end of the leaf linked list and are done. + * 2) A page whose freespace exceeds our target and therefore needs + * to have data added to it. + * 3) A page that doesn't have too much free space but needs to be + * checked for truncation. + * In both cases 2 and 3, we need that page's first key or record + * number. We may already have it, if not get it here. + */ + if ((nentry = NUM_ENT(pg)) != 0) { + /* Get a copy of the first recno on the page. */ + if (dbc->dbtype == DB_RECNO) { + if ((ret = __db_retcopy(dbp->env, start, + &cp->recno, sizeof(cp->recno), + &start->data, &start->ulen)) != 0) + goto err; + } else if (((next_p == 1 && npgno == PGNO_INVALID) || + start->size == 0) && (ret = __db_ret(dbc, + pg, 0, start, &start->data, &start->ulen)) != 0) + goto err; + + next_p = 0; + /* + * If there is no next page we can stop unless there is + * a possibility of moving this data to a lower numbered + * page. + */ + if (npgno == PGNO_INVALID && + (!check_trunc || PGNO(pg) <= c_data->compact_truncate || + PGNO(pg) == BAM_ROOT_PGNO(dbc))) { + /* End of the tree, check its duplicates and exit. */ + PTRACE(dbc, "GoDone", PGNO(pg), start, 0); + if (check_dups && (ret = __bam_compact_dups(dbc, + &pg, factor, 0, c_data, &pgs_done)) != 0) + goto err; + c_data->compact_pages_examine++; + isdone = 1; + goto done; + } + } + + /* Release the page so we don't deadlock getting its parent. */ + if ((ret = __memp_fput(dbmp, dbc->thread_info, pg, dbc->priority)) != 0) + goto err; + if ((ret = __LPUT(dbc, cp->csp->lock)) != 0) + goto err; + BT_STK_CLR(cp); + pg = NULL; + saved_pgno = PGNO_INVALID; + prev_pgno = PGNO_INVALID; + nnext_pgno = PGNO_INVALID; + + /* + * We must lock the metadata page first because we cannot block + * while holding interior nodes of the tree pinned. + */ + + if (!LOCK_ISSET(metalock) && pgs_free == c_data->compact_pages_free && + (ret = __db_lget(dbc, + LCK_ALWAYS, metapgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err; + + /* + * Setup the cursor stack. There are 3 cases: + * 1) the page is empty and will be deleted: nentry == 0. + * 2) the next page has the same parent: *spanp == 0. + * 3) the next page has a different parent: *spanp == 1. + * + * We now need to search the tree again, getting a write lock + * on the page we are going to merge or delete. We do this by + * searching down the tree and locking as much of the subtree + * above the page as needed. In the case of a delete we will + * find the maximal subtree that can be deleted. In the case + * of merge if the current page and the next page are siblings + * with the same parent then we only need to lock the parent. + * Otherwise *span will be set and we need to search to find the + * lowest common ancestor. Dbc will be set to contain the subtree + * containing the page to be merged or deleted. Ndbc will contain + * the minimal subtree containing that page and its next sibling. + * In all cases for DB_RECNO we simplify things and get the whole + * tree if we need more than a single parent. + * The tree can collapse while we don't have it locked, so the + * page we are looking for may be gone. If so we are at + * the right most end of the leaf pages and are done. + */ + +retry: pg = NULL; + if (npg != NULL && (ret = __memp_fput(dbmp, + dbc->thread_info, npg, dbc->priority)) != 0) + goto err; + npg = NULL; + if (ndbc != NULL) { + ncp = (BTREE_CURSOR *)ndbc->internal; + if (clear_root == 1) { + ncp->sp->page = NULL; + LOCK_INIT(ncp->sp->lock); + } + if ((ret = __bam_stkrel(ndbc, 0)) != 0) + goto err; + } + clear_root = 0; + /* Case 1 -- page is empty. */ + if (nentry == 0) { + CTRACE(dbc, "Empty", "", start, 0); + if (next_p == 1) + sflag = CS_NEXT_WRITE; + else + sflag = CS_DEL; + if ((ret = __bam_csearch(dbc, start, sflag, LEAFLEVEL)) != 0) { + isdone = 1; + if (ret == DB_NOTFOUND) + ret = 0; + goto err; + } + + pg = cp->csp->page; + /* Check to see if the page is still empty. */ + if (NUM_ENT(pg) != 0) + npgno = PGNO(pg); + else { + npgno = NEXT_PGNO(pg); + /* If this is now the root, we are very done. */ + if (PGNO(pg) == BAM_ROOT_PGNO(dbc)) + isdone = 1; + else { + if (npgno != PGNO_INVALID) { + TRY_LOCK(dbc, npgno, saved_pgno, + next_lock, DB_LOCK_WRITE, retry); + if (ret != 0) + goto err; + } + if (PREV_PGNO(pg) != PGNO_INVALID) { + TRY_LOCK(dbc, PREV_PGNO(pg), prev_pgno, + prev_lock, DB_LOCK_WRITE, retry); + if (ret != 0) + goto err; + } + if ((ret = + __bam_dpages(dbc, 0, BTD_RELINK)) != 0) + goto err; + c_data->compact_pages_free++; + if ((ret = __TLPUT(dbc, prev_lock)) != 0) + goto err; + LOCK_INIT(prev_lock); + if ((ret = __TLPUT(dbc, next_lock)) != 0) + goto err; + LOCK_INIT(next_lock); + saved_pgno = PGNO_INVALID; + goto next_no_release; + } + } + goto next_page; + } + + /* case 3 -- different parents. */ + if (*spanp) { + CTRACE(dbc, "Span", "", start, 0); + /* + * Search the tree looking for the page containing and + * the next page after the current key. + * The stack will be rooted at the page that spans + * the current and next pages. The two subtrees + * are returned below that. For BTREE the current + * page subtreee will be first while for RECNO the + * next page subtree will be first + */ + if (ndbc == NULL && (ret = __dbc_dup(dbc, &ndbc, 0)) != 0) + goto err; + DB_ASSERT(env, ndbc != NULL); + ncp = (BTREE_CURSOR *)ndbc->internal; + + ncp->recno = cp->recno; + cp->recno = next_recno; + + if ((ret = __bam_csearch(dbc, start, CS_NEXT_BOTH, 0)) != 0) { + if (ret == DB_NOTFOUND) { + isdone = 1; + ret = 0; + } + goto err; + } + + /* + * Find the top of the stack for the second subtree. + */ + for (epg = cp->csp - 1; epg > cp->sp; epg--) + if (LEVEL(epg->page) == LEAFLEVEL) + break; + DB_ASSERT(env, epg != cp->sp); + + /* + * Copy the root. We will have two instances of the + * same page, be careful not to free both. + */ + BT_STK_PUSH(env, ncp, cp->sp->page, cp->sp->indx, + cp->sp->lock, cp->sp->lock_mode, ret); + if (ret != 0) + goto err; + clear_root = 1; + + /* Copy the stack containing the next page. */ + for (epg++; epg <= cp->csp; epg++) { + BT_STK_PUSH(env, ncp, epg->page, epg->indx, + epg->lock, epg->lock_mode, ret); + if (ret != 0) + goto err; + } + /* adjust the stack pointer to remove these items. */ + ncp->csp--; + cp->csp -= ncp->csp - ncp->sp; + + /* + * If this is RECNO then we want to swap the stacks. + */ + if (dbc->dbtype == DB_RECNO) { + ndbc->internal = (DBC_INTERNAL *)cp; + dbc->internal = (DBC_INTERNAL *)ncp; + cp = ncp; + ncp = (BTREE_CURSOR *)ndbc->internal; + cp->sp->indx--; + } else + ncp->sp->indx++; + + DB_ASSERT(env, + NEXT_PGNO(cp->csp->page) == PGNO(ncp->csp->page)); + pg = cp->csp->page; + + /* + * The page may have emptied while we waited for the + * lock or the record we are looking for may have + * moved. + * Reset npgno so we re-get this page when we go back + * to the top. + */ + if (NUM_ENT(pg) == 0 || + (dbc->dbtype == DB_RECNO && + NEXT_PGNO(cp->csp->page) != PGNO(ncp->csp->page))) { + npgno = PGNO(pg); + *spanp = 0; + goto next_page; + } + + if (check_trunc && PGNO(pg) > c_data->compact_truncate) { + if (PREV_PGNO(pg) != PGNO_INVALID) { + TRY_LOCK2(dbc, ndbc, PREV_PGNO(pg), prev_pgno, + prev_lock, DB_LOCK_WRITE, retry); + if (ret != 0) + goto err1; + } + pgs_done++; + /* Get a fresh low numbered page. */ + if ((ret = __db_exchange_page(dbc, + &cp->csp->page, ncp->csp->page, + PGNO_INVALID, DB_EXCH_DEFAULT)) != 0) + goto err1; + if ((ret = __TLPUT(dbc, prev_lock)) != 0) + goto err1; + LOCK_INIT(prev_lock); + pg = cp->csp->page; + } + *spanp = 0; + PTRACE(dbc, "SDups", PGNO(ncp->csp->page), start, 0); + if (check_dups && (ret = __bam_compact_dups(ndbc, + &ncp->csp->page, factor, 1, c_data, &pgs_done)) != 0) + goto err1; + + DB_ASSERT(env, ndbc != NULL); + /* Check to see if the tree collapsed. */ + /*lint -e{794} */ + if (PGNO(ncp->csp->page) == BAM_ROOT_PGNO(ndbc)) + goto done; + + pg = cp->csp->page; + npgno = NEXT_PGNO(pg); + PTRACE(dbc, "SDups", PGNO(pg), start, 0); + if (check_dups && (ret = + __bam_compact_dups(dbc, &cp->csp->page, + factor, 1, c_data, &pgs_done)) != 0) + goto err1; + + /* + * We may have dropped our locks, check again + * to see if we still need to fill this page and + * we are in a spanning situation. + */ + + if (P_FREESPACE(dbp, pg) <= factor || + cp->csp[-1].indx != NUM_ENT(cp->csp[-1].page) - 1) + goto next_page; + + /* + * Try to move things into a single parent. + */ + merged = 0; + for (epg = cp->sp; epg != cp->csp; epg++) { + PTRACE(dbc, "PMerge", PGNO(epg->page), start, 0); + if ((ret = __bam_merge_internal(dbc, + ndbc, LEVEL(epg->page), c_data, &merged)) != 0) + break; + if (merged) + break; + } + + if (ret != 0 && ret != DB_LOCK_NOTGRANTED) + goto err1; + /* + * If we merged the parent, then we nolonger span. + * Otherwise if we tried to merge the parent but would + * block on one of the other leaf pages try again. + * If we did not merge any records of the parent, + * exit to commit any local transactions and try again. + */ + if (merged || (pgs_done > 0 && ret == DB_LOCK_NOTGRANTED)) { + if (merged) + pgs_done++; + else + goto done; + if (cp->csp->page == NULL) + goto deleted; + npgno = PGNO(pg); + next_recno = cp->recno; + goto next_page; + } + PTRACE(dbc, "SMerge", PGNO(cp->csp->page), start, 0); + + /* if we remove the next page, then we need its next locked */ + npgno = NEXT_PGNO(ncp->csp->page); + if (npgno != PGNO_INVALID) { + TRY_LOCK2(dbc, ndbc, npgno, + nnext_pgno, nnext_lock, DB_LOCK_WRITE, retry); + if (ret != 0) + goto err1; + } + /*lint -e{794} */ + if ((ret = __bam_merge(dbc, + ndbc, factor, stop, c_data, &isdone)) != 0) + goto err1; + pgs_done++; + /* + * __bam_merge could have freed our stack if it + * deleted a page possibly collapsing the tree. + */ + if (cp->csp->page == NULL) + goto deleted; + cp->recno += NUM_ENT(pg); + + if ((ret = __TLPUT(dbc, nnext_lock)) != 0) + goto err1; + LOCK_INIT(nnext_lock); + nnext_pgno = PGNO_INVALID; + + /* If we did not bump to the next page something did not fit. */ + if (npgno != NEXT_PGNO(pg)) { + npgno = NEXT_PGNO(pg); + goto next_page; + } + } else { + /* Case 2 -- same parents. */ + CTRACE(dbc, "Sib", "", start, 0); + if ((ret = + __bam_csearch(dbc, start, CS_PARENT, LEAFLEVEL)) != 0) { + if (ret == DB_NOTFOUND) { + isdone = 1; + ret = 0; + } + goto err; + } + + pg = cp->csp->page; + DB_ASSERT(env, IS_DIRTY(pg)); + DB_ASSERT(env, + PGNO(pg) == BAM_ROOT_PGNO(dbc) || + IS_DIRTY(cp->csp[-1].page)); + + /* Check to see if we moved to a new parent. */ + if (PGNO(pg) != BAM_ROOT_PGNO(dbc) && + ppgno != PGNO(cp->csp[-1].page) && pgs_done != 0) { + do_commit = 1; + goto next_page; + } + + /* We now have a write lock, recheck the page. */ + if ((nentry = NUM_ENT(pg)) == 0) { + npgno = PGNO(pg); + goto next_page; + } + + /* Check duplicate trees, we have a write lock on the page. */ + PTRACE(dbc, "SibDup", PGNO(pg), start, 0); + if (check_dups && (ret = + __bam_compact_dups(dbc, &cp->csp->page, + factor, 1, c_data, &pgs_done)) != 0) + goto err1; + pg = cp->csp->page; + npgno = NEXT_PGNO(pg); + + /* Check to see if the tree collapsed. */ + if (PGNO(pg) == BAM_ROOT_PGNO(dbc)) + goto err1; + DB_ASSERT(env, cp->csp - cp->sp == 1); + + /* After re-locking check to see if we still need to fill. */ + if (P_FREESPACE(dbp, pg) <= factor) { + if (check_trunc && + PGNO(pg) > c_data->compact_truncate) { + if (PREV_PGNO(pg) != PGNO_INVALID) { + TRY_LOCK(dbc, PREV_PGNO(pg), prev_pgno, + prev_lock, DB_LOCK_WRITE, retry); + if (ret != 0) + goto err1; + } + if (npgno != PGNO_INVALID) { + TRY_LOCK(dbc, npgno, saved_pgno, + next_lock, DB_LOCK_WRITE, retry); + if (ret != 0) + goto err1; + } + /* Get a fresh low numbered page. */ + pgno = PGNO(pg); + if ((ret = __db_exchange_page(dbc, + &cp->csp->page, NULL, + PGNO_INVALID, DB_EXCH_DEFAULT)) != 0) + goto err1; + if ((ret = __TLPUT(dbc, prev_lock)) != 0) + goto err1; + LOCK_INIT(prev_lock); + prev_pgno = PGNO_INVALID; + if ((ret = __TLPUT(dbc, next_lock)) != 0) + goto err1; + LOCK_INIT(next_lock); + saved_pgno = PGNO_INVALID; + pg = cp->csp->page; + if (pgno != PGNO(pg)) { + pgs_done++; + pgno = PGNO(pg); + } + } + /* + * If we are going to leave this parent commit + * the current transaction before continuing. + */ + epg = &cp->csp[-1]; + if ((ppgno != PGNO(epg->page) && + ppgno != PGNO_INVALID) || + epg->indx == NUM_ENT(epg->page) - 1) + do_commit = 1; + ppgno = PGNO(epg->page); + goto next_page; + } + + /* If they have the same parent, just dup the cursor */ + if (ndbc != NULL && (ret = __dbc_close(ndbc)) != 0) + goto err1; + if ((ret = __dbc_dup(dbc, &ndbc, DB_POSITION)) != 0) + goto err1; + ncp = (BTREE_CURSOR *)ndbc->internal; + + /* + * ncp->recno needs to have the recno of the next page. + * Bump it by the number of records on the current page. + */ + ncp->recno += NUM_ENT(pg); + } + + pgno = PGNO(cp->csp->page); + ppgno = PGNO(cp->csp[-1].page); + /* Fetch pages until we fill this one. */ + while (!isdone && npgno != PGNO_INVALID && + P_FREESPACE(dbp, pg) > factor && c_data->compact_pages != 0) { + /* + * merging may have to free the parent page, if it does, + * refetch it but do it decending the tree. + */ + epg = &cp->csp[-1]; + if ((ppg = epg->page) == NULL) { + if ((ret = __memp_fput(dbmp, dbc->thread_info, + cp->csp->page, dbc->priority)) != 0) + goto err1; + pg = cp->csp->page = NULL; + if (F_ISSET(dbc->dbp, DB_AM_READ_UNCOMMITTED) && + (ret = __db_lget(dbc, 0, ppgno, + DB_LOCK_WRITE, 0, &epg->lock)) != 0) + goto err1; + if ((ret = __memp_fget(dbmp, &ppgno, dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, &ppg)) != 0) + goto err1; + if (F_ISSET(dbc->dbp, DB_AM_READ_UNCOMMITTED) && + (ret = __db_lget(dbc, 0, pgno, + DB_LOCK_WRITE, 0, &cp->csp->lock)) != 0) + goto err1; + if ((ret = __memp_fget(dbmp, &pgno, dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, &pg)) != 0) + goto err1; + epg->page = ppg; + cp->csp->page = pg; + } + + /* + * If our current position is the last one on a parent + * page, then we are about to merge across different + * internal nodes. Thus, we need to lock higher up + * in the tree. We will exit the routine and commit + * what we have done so far. Set spanp so we know + * we are in this case when we come back. + */ + if (epg->indx == NUM_ENT(ppg) - 1) { + *spanp = 1; + do_commit = 1; + npgno = PGNO(pg); + next_recno = cp->recno; + epg->page = ppg; + goto next_page; + } + + /* Lock and get the next page. */ + TRY_LOCK(dbc, npgno, + saved_pgno, saved_lock, DB_LOCK_WRITE, retry); + if (ret != 0) + goto err1; + if ((ret = __LPUT(dbc, ncp->lock)) != 0) + goto err1; + ncp->lock = saved_lock; + LOCK_INIT(saved_lock); + saved_pgno = PGNO_INVALID; + + if ((ret = __memp_fget(dbmp, &npgno, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &npg)) != 0) + goto err1; + + if (check_trunc && + PGNO(pg) > c_data->compact_truncate) { + if (PREV_PGNO(pg) != PGNO_INVALID) { + TRY_LOCK(dbc, PREV_PGNO(pg), + prev_pgno, prev_lock, DB_LOCK_WRITE, retry); + if (ret != 0) + goto err1; + } + pgno = PGNO(pg); + /* Get a fresh low numbered page. */ + if ((ret = __db_exchange_page(dbc, &cp->csp->page, + npg, PGNO_INVALID, DB_EXCH_DEFAULT)) != 0) + goto err1; + if ((ret = __TLPUT(dbc, prev_lock)) != 0) + goto err1; + LOCK_INIT(prev_lock); + prev_pgno = PGNO_INVALID; + pg = cp->csp->page; + if (pgno != PGNO(pg)) { + pgs_done++; + pgno = PGNO(pg); + } + } + c_data->compact_pages_examine++; + + PTRACE(dbc, "MDups", PGNO(npg), start, 0); + if (check_dups && (ret = __bam_compact_dups(ndbc, + &npg, factor, 1, c_data, &pgs_done)) != 0) + goto err1; + + npgno = NEXT_PGNO(npg); + if (npgno != PGNO_INVALID) { + TRY_LOCK(dbc, npgno, + nnext_pgno, nnext_lock, DB_LOCK_WRITE, retry); + if (ret != 0) + goto err1; + } + + /* copy the common parent to the stack. */ + BT_STK_PUSH(env, ncp, ppg, + epg->indx + 1, epg->lock, epg->lock_mode, ret); + if (ret != 0) + goto err1; + + /* Put the page on the stack. */ + BT_STK_ENTER(env, ncp, npg, 0, ncp->lock, DB_LOCK_WRITE, ret); + + LOCK_INIT(ncp->lock); + npg = NULL; + + /* + * Merge the pages. This will either free the next + * page or just update its parent pointer. + */ + PTRACE(dbc, "Merge", PGNO(cp->csp->page), start, 0); + if ((ret = __bam_merge(dbc, + ndbc, factor, stop, c_data, &isdone)) != 0) + goto err1; + + pgs_done++; + + if ((ret = __TLPUT(dbc, nnext_lock)) != 0) + goto err1; + LOCK_INIT(nnext_lock); + nnext_pgno = PGNO_INVALID; + + /* + * __bam_merge could have freed our stack if it + * deleted a page possibly collapsing the tree. + */ + if (cp->csp->page == NULL) + goto deleted; + /* If we did not bump to the next page something did not fit. */ + if (npgno != NEXT_PGNO(pg)) + break; + } + + /* Bottom of the main loop. Move to the next page. */ + npgno = NEXT_PGNO(pg); + cp->recno += NUM_ENT(pg); + next_recno = cp->recno; + +next_page: + if (ndbc != NULL) { + ncp = (BTREE_CURSOR *)ndbc->internal; + if (ncp->sp->page == cp->sp->page) { + ncp->sp->page = NULL; + LOCK_INIT(ncp->sp->lock); + } + if ((ret = __bam_stkrel(ndbc, + pgs_done == 0 ? STK_NOLOCK : 0)) != 0) + goto err; + } + /* + * Unlatch the tree before trying to lock the next page. We must + * unlatch to avoid a latch deadlock but we want to hold the + * lock on the parent node so this leaf cannot be unlinked. + */ + pg = NULL; + if ((ret = __bam_stkrel(dbc, STK_PGONLY)) != 0) + goto err; + if (npgno != PGNO_INVALID && + (ret = __db_lget(dbc, 0, npgno, DB_LOCK_READ, 0, &next_lock)) != 0) + goto err; + if ((ret = __bam_stkrel(dbc, pgs_done == 0 ? STK_NOLOCK : 0)) != 0) + goto err; + if ((ret = __TLPUT(dbc, saved_lock)) != 0) + goto err; + if ((ret = __TLPUT(dbc, prev_lock)) != 0) + goto err; + +next_no_release: + pg = NULL; + + if (npgno == PGNO_INVALID || c_data->compact_pages == 0) + isdone = 1; + if (!isdone) { + /* + * If we are at the end of this parent commit the + * transaction so we don't tie things up. + */ + if (do_commit && !F_ISSET(dbc, DBC_OPD) && + (atomic_read(&dbp->mpf->mfp->multiversion) != 0 || + pgs_done != 0)) { +deleted: if (ndbc != NULL && + ((ret = __bam_stkrel(ndbc, 0)) != 0 || + (ret = __dbc_close(ndbc)) != 0)) + goto err; + goto out; + } + + /* Reget the next page to look at. */ + cp->recno = next_recno; + if ((ret = __memp_fget(dbmp, &npgno, + dbc->thread_info, dbc->txn, 0, &pg)) != 0) + goto err; + cp->csp->lock = next_lock; + LOCK_INIT(next_lock); + next_p = 1; + do_commit = 0; + /* If we did not do anything we can drop the metalock. */ + if (pgs_done == 0 && (ret = __LPUT(dbc, metalock)) != 0) + goto err; + goto next; + } + +done: + if (0) { + /* + * We come here if pg came from cp->csp->page and could + * have already been fput. + */ +err1: pg = NULL; + } +err: /* + * Don't release locks (STK_PGONLY)if we had an error, we could reveal + * a bad tree to a dirty reader. Wait till the abort to free the locks. + */ + sflag = STK_CLRDBC; + if (dbc->txn != NULL && ret != 0) + sflag |= STK_PGONLY; + if (ndbc != NULL) { + ncp = (BTREE_CURSOR *)ndbc->internal; + if (npg == ncp->csp->page) + npg = NULL; + if (ncp->sp->page == cp->sp->page) { + ncp->sp->page = NULL; + LOCK_INIT(ncp->sp->lock); + } + if ((t_ret = __bam_stkrel(ndbc, sflag)) != 0 && ret == 0) + ret = t_ret; + else if ((t_ret = __dbc_close(ndbc)) != 0 && ret == 0) + ret = t_ret; + } + if (pg == cp->csp->page) + pg = NULL; + if ((t_ret = __bam_stkrel(dbc, sflag)) != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + + if (pg != NULL && (t_ret = + __memp_fput(dbmp, + dbc->thread_info, pg, dbc->priority) != 0) && ret == 0) + ret = t_ret; + if (npg != NULL && (t_ret = + __memp_fput(dbmp, + dbc->thread_info, npg, dbc->priority) != 0) && ret == 0) + ret = t_ret; + +out: *donep = isdone; + + /* For OPD trees return if we did anything in the span variable. */ + if (F_ISSET(dbc, DBC_OPD)) + *spanp = pgs_done; + + return (ret); +} + +/* + * __bam_merge -- do actual merging of leaf pages. + */ +static int +__bam_merge(dbc, ndbc, factor, stop, c_data, donep) + DBC *dbc, *ndbc; + u_int32_t factor; + DBT *stop; + DB_COMPACT *c_data; + int *donep; +{ + BTREE_CURSOR *cp, *ncp; + DB *dbp; + PAGE *pg, *npg; + db_indx_t nent; + int ret; + + DB_ASSERT(NULL, dbc != NULL); + DB_ASSERT(NULL, ndbc != NULL); + dbp = dbc->dbp; + cp = (BTREE_CURSOR *)dbc->internal; + ncp = (BTREE_CURSOR *)ndbc->internal; + pg = cp->csp->page; + npg = ncp->csp->page; + + nent = NUM_ENT(npg); + + /* If the page is empty just throw it away. */ + if (nent == 0) + goto free_page; + + /* Find if the stopping point is on this page. */ + if (stop != NULL && stop->size != 0) { + if ((ret = __bam_compact_isdone(dbc, stop, npg, donep)) != 0) + return (ret); + if (*donep) + return (0); + } + + /* + * If there is too much data then just move records one at a time. + * Otherwise copy the data space over and fix up the index table. + * If we are on the left most child we will effect our parent's + * index entry so we call merge_records to figure out key sizes. + */ + if ((dbc->dbtype == DB_BTREE && + ncp->csp[-1].indx == 0 && ncp->csp[-1].entries != 1) || + (int)(P_FREESPACE(dbp, pg) - + ((dbp->pgsize - P_OVERHEAD(dbp)) - + P_FREESPACE(dbp, npg))) < (int)factor) + ret = __bam_merge_records(dbc, ndbc, factor, c_data); + else + /*lint -e{794} */ +free_page: ret = __bam_merge_pages(dbc, ndbc, c_data); + + return (ret); +} + +static int +__bam_merge_records(dbc, ndbc, factor, c_data) + DBC *dbc, *ndbc; + u_int32_t factor; + DB_COMPACT *c_data; +{ + BINTERNAL *bi; + BKEYDATA *bk, *tmp_bk; + BTREE *t; + BTREE_CURSOR *cp, *ncp; + DB *dbp; + DBT a, b, data, hdr; + ENV *env; + EPG *epg; + PAGE *pg, *npg; + db_indx_t adj, indx, nent, *ninp, pind; + int32_t adjust; + u_int32_t freespace, len, nksize, pfree, size; + int first_dup, is_dup, next_dup, n_ok, ret; + size_t (*func) __P((DB *, const DBT *, const DBT *)); + + dbp = dbc->dbp; + env = dbp->env; + t = dbp->bt_internal; + cp = (BTREE_CURSOR *)dbc->internal; + ncp = (BTREE_CURSOR *)ndbc->internal; + pg = cp->csp->page; + memset(&hdr, 0, sizeof(hdr)); + pind = NUM_ENT(pg); + n_ok = 0; + adjust = 0; + ret = 0; + + /* See if we want to swap out this page. */ + if (c_data->compact_truncate != PGNO_INVALID && + PGNO(ncp->csp->page) > c_data->compact_truncate) { + /* Get a fresh low numbered page. */ + if ((ret = __db_exchange_page(ndbc, + &ncp->csp->page, pg, PGNO_INVALID, DB_EXCH_DEFAULT)) != 0) + goto err; + } + + npg = ncp->csp->page; + nent = NUM_ENT(npg); + + DB_ASSERT(env, nent != 0); + + ninp = P_INP(dbp, npg); + + /* + * pg is the page that is being filled, it is in the stack in cp. + * npg is the next page, it is in the stack in ncp. + */ + freespace = P_FREESPACE(dbp, pg); + + adj = TYPE(npg) == P_LBTREE ? P_INDX : O_INDX; + /* + * Loop through the records and find the stopping point. + */ + for (indx = 0; indx < nent; indx += adj) { + bk = GET_BKEYDATA(dbp, npg, indx); + + /* Size of the key. */ + size = BITEM_PSIZE(bk); + + /* Size of the data. */ + if (TYPE(pg) == P_LBTREE) + size += BITEM_PSIZE(GET_BKEYDATA(dbp, npg, indx + 1)); + /* + * If we are at a duplicate set, skip ahead to see and + * get the total size for the group. + */ + n_ok = adj; + if (TYPE(pg) == P_LBTREE && + indx < nent - adj && + ninp[indx] == ninp[indx + adj]) { + do { + /* Size of index for key reference. */ + size += sizeof(db_indx_t); + n_ok++; + /* Size of data item. */ + size += BITEM_PSIZE( + GET_BKEYDATA(dbp, npg, indx + n_ok)); + n_ok++; + } while (indx + n_ok < nent && + ninp[indx] == ninp[indx + n_ok]); + } + /* if the next set will not fit on the page we are done. */ + if (freespace < size) + break; + + /* + * Otherwise figure out if we are past the goal and if + * adding this set will put us closer to the goal than + * we are now. + */ + if ((freespace - size) < factor) { + if (freespace - factor > factor - (freespace - size)) + indx += n_ok; + break; + } + freespace -= size; + indx += n_ok - adj; + } + + /* If we have hit the first record then there is nothing we can move. */ + if (indx == 0) + goto done; + if (TYPE(pg) != P_LBTREE && TYPE(pg) != P_LDUP) { + if (indx == nent) + return (__bam_merge_pages(dbc, ndbc, c_data)); + goto no_check; + } + /* + * We need to update npg's parent key. Avoid creating a new key + * that will be too big. Get what space will be available on the + * parents. Then if there will not be room for this key, see if + * prefix compression will make it work, if not backup till we + * find something that will. (Needless to say, this is a very + * unlikely event.) If we are deleting this page then we will + * need to propagate the next key to our grand parents, so we + * see if that will fit. + */ + pfree = dbp->pgsize; + for (epg = &ncp->csp[-1]; epg >= ncp->sp; epg--) + if ((freespace = P_FREESPACE(dbp, epg->page)) < pfree) { + bi = GET_BINTERNAL(dbp, epg->page, epg->indx); + /* Add back in the key we will be deleting. */ + freespace += BINTERNAL_PSIZE(bi->len); + if (freespace < pfree) + pfree = freespace; + if (epg->indx != 0) + break; + } + + /* + * If we are at the end, we will delete this page. We need to + * check the next parent key only if we are the leftmost page and + * will therefore have to propagate the key up the tree. + */ + if (indx == nent) { + if (ncp->csp[-1].indx != 0 || ncp->csp[-1].entries == 1 || + BINTERNAL_PSIZE(GET_BINTERNAL(dbp, + ncp->csp[-1].page, 1)->len) <= pfree) + return (__bam_merge_pages(dbc, ndbc, c_data)); + indx -= adj; + } + bk = GET_BKEYDATA(dbp, npg, indx); + len = (B_TYPE(bk->type) != B_KEYDATA) ? BOVERFLOW_SIZE : bk->len; + if (indx != 0 && BINTERNAL_SIZE(len) >= pfree) { + if (F_ISSET(dbc, DBC_OPD)) { + if (dbp->dup_compare == __bam_defcmp) + func = __bam_defpfx; + else + func = NULL; + } else + func = t->bt_prefix; + } else + func = NULL; + + /* Skip to the beginning of a duplicate set. */ + while (indx != 0 && ninp[indx] == ninp[indx - adj]) + indx -= adj; + + while (indx != 0 && BINTERNAL_SIZE(len) >= pfree) { + if (B_TYPE(bk->type) != B_KEYDATA) + goto noprefix; + /* + * Figure out if we can truncate this key. + * Code borrowed from bt_split.c + */ + if (func == NULL) + goto noprefix; + tmp_bk = GET_BKEYDATA(dbp, npg, indx - adj); + if (B_TYPE(tmp_bk->type) != B_KEYDATA) + goto noprefix; + memset(&a, 0, sizeof(a)); + a.size = tmp_bk->len; + a.data = tmp_bk->data; + memset(&b, 0, sizeof(b)); + b.size = bk->len; + b.data = bk->data; + nksize = (u_int32_t)func(dbp, &a, &b); + if (BINTERNAL_PSIZE(nksize) < pfree) + break; +noprefix: + /* Skip to the beginning of a duplicate set. */ + do { + indx -= adj; + } while (indx != 0 && ninp[indx] == ninp[indx - adj]); + + bk = GET_BKEYDATA(dbp, npg, indx); + len = + (B_TYPE(bk->type) != B_KEYDATA) ? BOVERFLOW_SIZE : bk->len; + } + + /* + * indx references the first record that will not move to the previous + * page. If it is 0 then we could not find a key that would fit in + * the parent that would permit us to move any records. + */ + if (indx == 0) + goto done; + DB_ASSERT(env, indx <= nent); + + /* Loop through the records and move them from npg to pg. */ +no_check: is_dup = first_dup = next_dup = 0; + pg = cp->csp->page; + npg = ncp->csp->page; + DB_ASSERT(env, IS_DIRTY(pg)); + DB_ASSERT(env, IS_DIRTY(npg)); + ninp = P_INP(dbp, npg); + do { + bk = GET_BKEYDATA(dbp, npg, 0); + /* Figure out if we are in a duplicate group or not. */ + if ((NUM_ENT(npg) % 2) == 0) { + if (NUM_ENT(npg) > 2 && ninp[0] == ninp[2]) { + if (!is_dup) { + first_dup = 1; + is_dup = 1; + } else + first_dup = 0; + + next_dup = 1; + } else if (next_dup) { + is_dup = 1; + first_dup = 0; + next_dup = 0; + } else + is_dup = 0; + } + + if (is_dup && !first_dup && (pind % 2) == 0) { + /* Duplicate key. */ + if ((ret = __bam_adjindx(dbc, + pg, pind, pind - P_INDX, 1)) != 0) + goto err; + if (!next_dup) + is_dup = 0; + } else switch (B_TYPE(bk->type)) { + case B_KEYDATA: + hdr.data = bk; + hdr.size = SSZA(BKEYDATA, data); + data.size = bk->len; + data.data = bk->data; + if ((ret = __db_pitem(dbc, pg, pind, + BKEYDATA_SIZE(bk->len), &hdr, &data)) != 0) + goto err; + break; + case B_OVERFLOW: + case B_DUPLICATE: + data.size = BOVERFLOW_SIZE; + data.data = bk; + if ((ret = __db_pitem(dbc, pg, pind, + BOVERFLOW_SIZE, &data, NULL)) != 0) + goto err; + break; + default: + __db_errx(env, DB_STR_A("1022", + "Unknown record format, page %lu, indx 0", + "%lu"), (u_long)PGNO(pg)); + ret = EINVAL; + goto err; + } + pind++; + if (next_dup && (NUM_ENT(npg) % 2) == 0) { + if ((ret = __bam_adjindx(ndbc, + npg, 0, O_INDX, 0)) != 0) + goto err; + } else { + if ((ret = __db_ditem(ndbc, + npg, 0, BITEM_SIZE(bk))) != 0) + goto err; + } + adjust++; + } while (--indx != 0); + + DB_ASSERT(env, NUM_ENT(npg) != 0); + + if (adjust != 0 && + (F_ISSET(cp, C_RECNUM) || F_ISSET(dbc, DBC_OPD))) { + if (TYPE(pg) == P_LBTREE) + adjust /= P_INDX; + if ((ret = __bam_adjust(ndbc, -adjust)) != 0) + goto err; + + if ((ret = __bam_adjust(dbc, adjust)) != 0) + goto err; + } + + /* Update parent with new key. */ + if (ndbc->dbtype == DB_BTREE && + (ret = __bam_pupdate(ndbc, pg)) != 0) + goto err; + +done: if (cp->sp->page == ncp->sp->page) { + cp->sp->page = NULL; + LOCK_INIT(cp->sp->lock); + } + ret = __bam_stkrel(ndbc, STK_CLRDBC); + +err: return (ret); +} + +static int +__bam_merge_pages(dbc, ndbc, c_data) + DBC *dbc, *ndbc; + DB_COMPACT *c_data; +{ + BTREE_CURSOR *cp, *ncp; + DB *dbp; + DBT data, hdr; + DB_LOCK root_lock; + DB_MPOOLFILE *dbmp; + PAGE *pg, *npg; + db_indx_t nent, *ninp, *pinp; + db_pgno_t pgno, ppgno; + u_int8_t *bp; + u_int32_t len; + int i, level, ret; + + LOCK_INIT(root_lock); + COMPQUIET(ppgno, PGNO_INVALID); + dbp = dbc->dbp; + dbmp = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + ncp = (BTREE_CURSOR *)ndbc->internal; + pg = cp->csp->page; + npg = ncp->csp->page; + memset(&hdr, 0, sizeof(hdr)); + nent = NUM_ENT(npg); + + /* If the page is empty just throw it away. */ + if (nent == 0) + goto free_page; + + pg = cp->csp->page; + npg = ncp->csp->page; + DB_ASSERT(dbp->env, IS_DIRTY(pg)); + DB_ASSERT(dbp->env, IS_DIRTY(npg)); + DB_ASSERT(dbp->env, nent == NUM_ENT(npg)); + + /* Bulk copy the data to the new page. */ + len = dbp->pgsize - HOFFSET(npg); + if (DBC_LOGGING(dbc)) { + memset(&hdr, 0, sizeof(hdr)); + hdr.data = npg; + hdr.size = LOFFSET(dbp, npg); + memset(&data, 0, sizeof(data)); + data.data = (u_int8_t *)npg + HOFFSET(npg); + data.size = len; + if ((ret = __db_merge_log(dbp, + dbc->txn, &LSN(pg), 0, PGNO(pg), + &LSN(pg), PGNO(npg), &LSN(npg), &hdr, &data, 0)) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(pg)); + LSN(npg) = LSN(pg); + bp = (u_int8_t *)pg + HOFFSET(pg) - len; + memcpy(bp, (u_int8_t *)npg + HOFFSET(npg), len); + + /* Copy index table offset by what was there already. */ + pinp = P_INP(dbp, pg) + NUM_ENT(pg); + ninp = P_INP(dbp, npg); + for (i = 0; i < NUM_ENT(npg); i++) + *pinp++ = *ninp++ - (dbp->pgsize - HOFFSET(pg)); + HOFFSET(pg) -= len; + NUM_ENT(pg) += i; + + NUM_ENT(npg) = 0; + HOFFSET(npg) += len; + + if (F_ISSET(cp, C_RECNUM) || F_ISSET(dbc, DBC_OPD)) { + /* + * There are two cases here regarding the stack. + * Either we have two two level stacks but only ndbc + * references the parent page or we have a multilevel + * stack and only ndbc has an entry for the spanning + * page. + */ + if (TYPE(pg) == P_LBTREE) + i /= P_INDX; + if ((ret = __bam_adjust(ndbc, -i)) != 0) + goto err; + + if ((ret = __bam_adjust(dbc, i)) != 0) + goto err; + } + +free_page: + /* + * __bam_dpages may decide to collapse the tree. + * This can happen if we have the root and there + * are exactly 2 pointers left in it. + * If it can collapse the tree we must free the other + * stack since it will nolonger be valid. This + * must be done before hand because we cannot + * hold a page pinned if it might be truncated. + */ + if ((ret = __db_relink(dbc, + ncp->csp->page, cp->csp->page, PGNO_INVALID)) != 0) + goto err; + /* Drop the duplicate reference to the sub tree root. */ + cp->sp->page = NULL; + LOCK_INIT(cp->sp->lock); + if (PGNO(ncp->sp->page) == BAM_ROOT_PGNO(ndbc) && + NUM_ENT(ncp->sp->page) == 2) { + if ((ret = __bam_stkrel(dbc, STK_CLRDBC | STK_PGONLY)) != 0) + goto err; + level = LEVEL(ncp->sp->page); + ppgno = PGNO(ncp->csp[-1].page); + } else + level = 0; + COMPACT_TRUNCATE(c_data); + if ((ret = __bam_dpages(ndbc, + 0, ndbc->dbtype == DB_RECNO ? 0 : BTD_UPDATE)) != 0) + goto err; + npg = NULL; + c_data->compact_pages_free++; + c_data->compact_pages--; + if (level != 0) { + pgno = PGNO_INVALID; + BAM_GET_ROOT(ndbc, pgno, npg, 0, DB_LOCK_READ, root_lock, ret); + if (ret != 0) + goto err; + DB_ASSERT(dbp->env, npg != NULL); + if (level == LEVEL(npg)) + level = 0; + if ((ret = __memp_fput(dbmp, + dbc->thread_info, npg, dbc->priority)) != 0) + goto err; + if ((ret = __LPUT(ndbc, root_lock)) != 0) + goto err; + npg = NULL; + if (level != 0) { + c_data->compact_levels++; + c_data->compact_pages_free++; + COMPACT_TRUNCATE(c_data); + if (c_data->compact_pages != 0) + c_data->compact_pages--; + } + } + +err: return (ret); +} + +/* + * __bam_merge_internal -- + * Merge internal nodes of the tree. + */ +static int +__bam_merge_internal(dbc, ndbc, level, c_data, merged) + DBC *dbc, *ndbc; + int level; + DB_COMPACT *c_data; + int *merged; +{ + BINTERNAL bi, *bip, *fip; + BTREE_CURSOR *cp, *ncp; + DB *dbp; + DBT data, hdr; + DB_LOCK root_lock; + DB_MPOOLFILE *dbmp; + EPG *epg, *save_csp, *nsave_csp; + PAGE *pg, *npg; + RINTERNAL *rk; + db_indx_t first, indx, pind; + db_pgno_t pgno, ppgno; + int32_t nrecs, trecs; + u_int16_t size; + u_int32_t freespace, pfree; + int ret; + + COMPQUIET(bip, NULL); + COMPQUIET(ppgno, PGNO_INVALID); + DB_ASSERT(NULL, dbc != NULL); + DB_ASSERT(NULL, ndbc != NULL); + LOCK_INIT(root_lock); + + /* + * ndbc will contain the the dominating parent of the subtree. + * dbc will have the tree containing the left child. + * + * The stacks descend to the leaf level. + * If this is a recno tree then both stacks will start at the root. + */ + dbp = dbc->dbp; + dbmp = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + ncp = (BTREE_CURSOR *)ndbc->internal; + *merged = 0; + ret = 0; + + /* + * Set the stacks to the level requested. + * Save the old value to restore when we exit. + */ + save_csp = cp->csp; + cp->csp = &cp->csp[-level + 1]; + pg = cp->csp->page; + pind = NUM_ENT(pg); + + nsave_csp = ncp->csp; + ncp->csp = &ncp->csp[-level + 1]; + npg = ncp->csp->page; + indx = NUM_ENT(npg); + + /* + * The caller may have two stacks that include common ancestors, we + * check here for convenience. + */ + if (npg == pg) + goto done; + + if (TYPE(pg) == P_IBTREE) { + /* + * Check for overflow keys on both pages while we have + * them locked. + */ + if ((ret = + __bam_truncate_internal_overflow(dbc, pg, c_data)) != 0) + goto err; + if ((ret = + __bam_truncate_internal_overflow(dbc, npg, c_data)) != 0) + goto err; + } + + /* + * If we are about to move data off the left most page of an + * internal node we will need to update its parents, make sure there + * will be room for the new key on all the parents in the stack. + * If not, move less data. + */ + fip = NULL; + if (TYPE(pg) == P_IBTREE) { + /* See where we run out of space. */ + freespace = P_FREESPACE(dbp, pg); + /* + * The leftmost key of an internal page is not accurate. + * Go up the tree to find a non-leftmost parent. + */ + epg = ncp->csp; + while (--epg >= ncp->sp && epg->indx == 0) + continue; + fip = bip = GET_BINTERNAL(dbp, epg->page, epg->indx); + epg = ncp->csp; + + for (indx = 0;;) { + size = BINTERNAL_PSIZE(bip->len); + if (size > freespace) + break; + freespace -= size; + if (++indx >= NUM_ENT(npg)) + break; + bip = GET_BINTERNAL(dbp, npg, indx); + } + + /* See if we are deleting the page and we are not left most. */ + if (indx == NUM_ENT(npg) && epg[-1].indx != 0) + goto fits; + + pfree = dbp->pgsize; + for (epg--; epg >= ncp->sp; epg--) + if ((freespace = P_FREESPACE(dbp, epg->page)) < pfree) { + bip = GET_BINTERNAL(dbp, epg->page, epg->indx); + /* Add back in the key we will be deleting. */ + freespace += BINTERNAL_PSIZE(bip->len); + if (freespace < pfree) + pfree = freespace; + if (epg->indx != 0) + break; + } + epg = ncp->csp; + + /* If we are at the end of the page we will delete it. */ + if (indx == NUM_ENT(npg)) { + if (NUM_ENT(epg[-1].page) == 1) + goto fits; + bip = + GET_BINTERNAL(dbp, epg[-1].page, epg[-1].indx + 1); + } else + bip = GET_BINTERNAL(dbp, npg, indx); + + /* Back up until we have a key that fits. */ + while (indx != 0 && BINTERNAL_PSIZE(bip->len) > pfree) { + indx--; + bip = GET_BINTERNAL(dbp, npg, indx); + } + if (indx == 0) + goto done; + } + +fits: memset(&bi, 0, sizeof(bi)); + memset(&hdr, 0, sizeof(hdr)); + memset(&data, 0, sizeof(data)); + trecs = 0; + + /* + * Copy data between internal nodes till one is full + * or the other is empty. + */ + first = 0; + nrecs = 0; + do { + if (dbc->dbtype == DB_BTREE) { + bip = GET_BINTERNAL(dbp, npg, 0); + size = fip == NULL ? + BINTERNAL_SIZE(bip->len) : + BINTERNAL_SIZE(fip->len); + if (P_FREESPACE(dbp, pg) < size + sizeof(db_indx_t)) + break; + + if (fip == NULL) { + data.size = bip->len; + data.data = bip->data; + } else { + data.size = fip->len; + data.data = fip->data; + } + bi.len = data.size; + B_TSET(bi.type, bip->type); + bi.pgno = bip->pgno; + bi.nrecs = bip->nrecs; + hdr.data = &bi; + hdr.size = SSZA(BINTERNAL, data); + if (F_ISSET(cp, C_RECNUM) || F_ISSET(dbc, DBC_OPD)) + nrecs = (int32_t)bip->nrecs; + } else { + rk = GET_RINTERNAL(dbp, npg, 0); + size = RINTERNAL_SIZE; + if (P_FREESPACE(dbp, pg) < size + sizeof(db_indx_t)) + break; + + hdr.data = rk; + hdr.size = size; + nrecs = (int32_t)rk->nrecs; + } + /* + * Try to lock the subtree leaf records without waiting. + * We must lock the subtree below the record we are merging + * and the one after it since that is were a search will wind + * up if it has already looked at our parent. After the first + * move we have the current subtree already locked. + * If we merged any records then we will revisit this + * node when we merge its leaves. If not we will return + * NOTGRANTED and our caller will do a retry. We only + * need to do this if we are in a transation. If not then + * we cannot abort and things will be hosed up on error + * anyway. + */ + if (dbc->txn != NULL && (ret = __bam_lock_tree(ndbc, + ncp->csp, nsave_csp, first, + NUM_ENT(ncp->csp->page) == 1 ? 1 : 2)) != 0) { + if (ret != DB_LOCK_NOTGRANTED) + goto err; + break; + } + first = 1; + if ((ret = __db_pitem(dbc, pg, pind, size, &hdr, &data)) != 0) + goto err; + pind++; + if (fip != NULL) { + /* reset size to be for the record being deleted. */ + size = BINTERNAL_SIZE(bip->len); + fip = NULL; + } + if ((ret = __db_ditem(ndbc, npg, 0, size)) != 0) + goto err; + *merged = 1; + trecs += nrecs; + } while (--indx != 0); + + if (!*merged) + goto done; + + if (trecs != 0) { + cp->csp--; + ret = __bam_adjust(dbc, trecs); + if (ret != 0) + goto err; + cp->csp++; + ncp->csp--; + if ((ret = __bam_adjust(ndbc, -trecs)) != 0) + goto err; + ncp->csp++; + } + + /* + * Either we emptied the page or we need to update its + * parent to reflect the first page we now point to. + * First get rid of the bottom of the stack, + * bam_dpages will clear the stack. Maintain transactional + * locks on the leaf pages to protect changes at this level. + */ + do { + if ((ret = __memp_fput(dbmp, dbc->thread_info, + nsave_csp->page, dbc->priority)) != 0) + goto err; + nsave_csp->page = NULL; + if ((ret = __TLPUT(dbc, nsave_csp->lock)) != 0) + goto err; + LOCK_INIT(nsave_csp->lock); + nsave_csp--; + } while (nsave_csp != ncp->csp); + + if (NUM_ENT(npg) == 0) { + /* + * __bam_dpages may decide to collapse the tree + * so we need to free our other stack. The tree + * will change in hight and our stack will nolonger + * be valid. + */ + cp->csp = save_csp; + cp->sp->page = NULL; + LOCK_INIT(cp->sp->lock); + if (PGNO(ncp->sp->page) == BAM_ROOT_PGNO(ndbc) && + NUM_ENT(ncp->sp->page) == 2) { + if ((ret = __bam_stkrel(dbc, STK_CLRDBC)) != 0) + goto err; + level = LEVEL(ncp->sp->page); + ppgno = PGNO(ncp->csp[-1].page); + } else + level = 0; + + COMPACT_TRUNCATE(c_data); + ret = __bam_dpages(ndbc, + 0, ndbc->dbtype == DB_RECNO ? + BTD_RELINK : BTD_UPDATE | BTD_RELINK); + c_data->compact_pages_free++; + if (ret == 0 && level != 0) { + pgno = PGNO_INVALID; + BAM_GET_ROOT(ndbc, + pgno, npg, 0, DB_LOCK_READ, root_lock, ret); + if (ret != 0) + goto err; + if (level == LEVEL(npg)) + level = 0; + if ((ret = __LPUT(ndbc, root_lock)) != 0) + goto err; + if ((ret = __memp_fput(dbmp, + dbc->thread_info, npg, dbc->priority)) != 0) + goto err; + npg = NULL; + if (level != 0) { + c_data->compact_levels++; + c_data->compact_pages_free++; + COMPACT_TRUNCATE(c_data); + if (c_data->compact_pages != 0) + c_data->compact_pages--; + } + } + } else { + ret = __bam_pupdate(ndbc, npg); + + if (NUM_ENT(npg) != 0 && + c_data->compact_truncate != PGNO_INVALID && + PGNO(npg) > c_data->compact_truncate && + ncp->csp != ncp->sp) { + if ((ret = __db_exchange_page(ndbc, &ncp->csp->page, + pg, PGNO_INVALID, DB_EXCH_DEFAULT)) != 0) + goto err; + } + if (c_data->compact_truncate != PGNO_INVALID && + PGNO(pg) > c_data->compact_truncate && cp->csp != cp->sp) { + if ((ret = __db_exchange_page(dbc, &cp->csp->page, + ncp->csp->page, + PGNO_INVALID, DB_EXCH_DEFAULT)) != 0) + goto err; + } + } + cp->csp = save_csp; + + return (ret); + +done: +err: cp->csp = save_csp; + ncp->csp = nsave_csp; + + return (ret); +} + +/* + * __bam_compact_dups -- try to compress off page dup trees. + * We may or may not have a write lock on this page. + */ +static int +__bam_compact_dups(dbc, ppg, factor, have_lock, c_data, donep) + DBC *dbc; + PAGE **ppg; + u_int32_t factor; + int have_lock; + DB_COMPACT *c_data; + int *donep; +{ + BOVERFLOW *bo; + BTREE_CURSOR *cp; + DB *dbp; + DB_MPOOLFILE *dbmp; + db_indx_t i; + db_pgno_t pgno; + int ret; + + ret = 0; + + DB_ASSERT(NULL, dbc != NULL); + dbp = dbc->dbp; + dbmp = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + + for (i = 0; i < NUM_ENT(*ppg); i++) { + bo = GET_BOVERFLOW(dbp, *ppg, i); + if (B_TYPE(bo->type) == B_KEYDATA) + continue; + c_data->compact_pages_examine++; + if (bo->pgno > c_data->compact_truncate) { + (*donep)++; + if (!have_lock) { + /* + * The caller should have the page at + * least read locked. Drop the buffer + * and get the write lock. + */ + pgno = PGNO(*ppg); + if ((ret = __memp_fput(dbmp, dbc->thread_info, + *ppg, dbc->priority)) != 0) + goto err; + *ppg = NULL; + if ((ret = __db_lget(dbc, 0, pgno, + DB_LOCK_WRITE, 0, &cp->csp->lock)) != 0) + goto err; + have_lock = 1; + if ((ret = __memp_fget(dbmp, &pgno, + dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, ppg)) != 0) + goto err; + } + if ((ret = __bam_truncate_root_page(dbc, + *ppg, i, c_data)) != 0) + goto err; + /* Just in case it should move. Could it? */ + bo = GET_BOVERFLOW(dbp, *ppg, i); + } + + if (B_TYPE(bo->type) == B_OVERFLOW) { + if ((ret = __db_truncate_overflow(dbc, + bo->pgno, have_lock ? NULL : ppg, c_data)) != 0) + goto err; + (*donep)++; + continue; + } + if ((ret = __bam_compact_opd(dbc, bo->pgno, + have_lock ? NULL : ppg, factor, c_data, donep)) != 0) + goto err; + } + +err: + return (ret); +} + +/* + * __bam_compact_opd -- compact an off page duplicate tree. + * + * PUBLIC: int __bam_compact_opd __P((DBC *, + * PUBLIC: db_pgno_t, PAGE **, u_int32_t, DB_COMPACT *, int *)); + */ +int +__bam_compact_opd(dbc, root_pgno, ppg, factor, c_data, donep) + DBC *dbc; + db_pgno_t root_pgno; + PAGE **ppg; + u_int32_t factor; + DB_COMPACT *c_data; + int *donep; +{ + BTREE_CURSOR *cp; + DBC *opd; + DBT start; + DB_MPOOLFILE *dbmp; + ENV *env; + PAGE *dpg; + int isdone, level, ret, span, t_ret; + db_pgno_t pgno; + + LOCK_CHECK_OFF(dbc->thread_info); + + opd = NULL; + env = dbc->dbp->env; + dbmp = dbc->dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + + /* + * Take a peek at the root. If it's a leaf then + * there is no tree here, avoid all the trouble. + */ + if ((ret = __memp_fget(dbmp, &root_pgno, + dbc->thread_info, dbc->txn, 0, &dpg)) != 0) + goto err; + + level = dpg->level; + if ((ret = __memp_fput(dbmp, + dbc->thread_info, dpg, dbc->priority)) != 0) + goto err; + if (level == LEAFLEVEL) + goto done; + if ((ret = __dbc_newopd(dbc, root_pgno, NULL, &opd)) != 0) + goto err; + if (ppg != NULL) { + /* + * The caller should have the page at + * least read locked. Drop the buffer + * and get the write lock. + */ + pgno = PGNO(*ppg); + if ((ret = __memp_fput(dbmp, dbc->thread_info, + *ppg, dbc->priority)) != 0) + goto err; + *ppg = NULL; + if ((ret = __db_lget(dbc, 0, pgno, + DB_LOCK_WRITE, 0, &cp->csp->lock)) != 0) + goto err; + if ((ret = __memp_fget(dbmp, &pgno, + dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, ppg)) != 0) + goto err; + } + memset(&start, 0, sizeof(start)); + do { + span = 0; + if ((ret = __bam_compact_int(opd, &start, + NULL, factor, &span, c_data, &isdone)) != 0) + break; + /* For OPD the number of pages dirtied is returned in span. */ + *donep += span; + } while (!isdone); + + if (start.data != NULL) + __os_free(env, start.data); + +err: if (opd != NULL && (t_ret = __dbc_close(opd)) != 0 && ret == 0) + ret = t_ret; +done: + LOCK_CHECK_ON(dbc->thread_info); + + return (ret); +} + +/* + * __bam_truncate_root_page -- swap a page which is + * the root of an off page dup tree or the head of an overflow. + * The page is reference by the pg/indx passed in. + */ +static int +__bam_truncate_root_page(dbc, pg, indx, c_data) + DBC *dbc; + PAGE *pg; + u_int32_t indx; + DB_COMPACT *c_data; +{ + BINTERNAL *bi; + BOVERFLOW *bo; + DB *dbp; + db_pgno_t *pgnop; + + COMPQUIET(c_data, NULL); + COMPQUIET(bo, NULL); + dbp = dbc->dbp; + if (TYPE(pg) == P_IBTREE) { + bi = GET_BINTERNAL(dbp, pg, indx); + if (B_TYPE(bi->type) == B_OVERFLOW) { + bo = (BOVERFLOW *)(bi->data); + pgnop = &bo->pgno; + } else + pgnop = &bi->pgno; + } else { + bo = GET_BOVERFLOW(dbp, pg, indx); + pgnop = &bo->pgno; + } + + DB_ASSERT(dbp->env, IS_DIRTY(pg)); + + return (__db_truncate_root(dbc, pg, indx, pgnop, bo->tlen)); +} + +/* + * -- bam_truncate_internal_overflow -- find overflow keys + * on internal pages and if they have high page + * numbers swap them with lower pages and truncate them. + * Note that if there are overflow keys in the internal + * nodes they will get copied adding pages to the database. + */ +static int +__bam_truncate_internal_overflow(dbc, page, c_data) + DBC *dbc; + PAGE *page; + DB_COMPACT *c_data; +{ + BINTERNAL *bi; + BOVERFLOW *bo; + db_indx_t indx; + int ret; + + COMPQUIET(bo, NULL); + ret = 0; + for (indx = 0; indx < NUM_ENT(page); indx++) { + bi = GET_BINTERNAL(dbc->dbp, page, indx); + if (B_TYPE(bi->type) != B_OVERFLOW) + continue; + bo = (BOVERFLOW *)(bi->data); + if (bo->pgno > c_data->compact_truncate && (ret = + __bam_truncate_root_page(dbc, page, indx, c_data)) != 0) + break; + if ((ret = __db_truncate_overflow( + dbc, bo->pgno, NULL, c_data)) != 0) + break; + } + return (ret); +} + +/* + * __bam_compact_isdone --- + * + * Check to see if the stop key specified by the caller is on the + * current page, in which case we are done compacting. + */ +static int +__bam_compact_isdone(dbc, stop, pg, isdone) + DBC *dbc; + DBT *stop; + PAGE *pg; + int *isdone; +{ + db_recno_t recno; + BTREE *t; + BTREE_CURSOR *cp; + int cmp, ret; + + *isdone = 0; + cp = (BTREE_CURSOR *)dbc->internal; + t = dbc->dbp->bt_internal; + + if (dbc->dbtype == DB_RECNO) { + if ((ret = __ram_getno(dbc, stop, &recno, 0)) != 0) + return (ret); + *isdone = cp->recno > recno; + } else { + DB_ASSERT(dbc->dbp->env, TYPE(pg) == P_LBTREE); + if ((ret = __bam_cmp(dbc, stop, pg, 0, + t->bt_compare, &cmp)) != 0) + return (ret); + + *isdone = cmp <= 0; + } + return (0); +} + +/* + * Lock the subtrees from the top of the stack. + * The 0'th child may be in the stack and locked otherwise iterate + * through the records by calling __bam_lock_subtree. + */ +static int +__bam_lock_tree(dbc, sp, csp, start, stop) + DBC *dbc; + EPG *sp, *csp; + u_int32_t start, stop; +{ + PAGE *cpage; + db_pgno_t pgno; + int ret; + + if (dbc->dbtype == DB_RECNO) + pgno = GET_RINTERNAL(dbc->dbp, sp->page, 0)->pgno; + else + pgno = GET_BINTERNAL(dbc->dbp, sp->page, 0)->pgno; + cpage = (sp + 1)->page; + /* + * First recurse down the left most sub tree if it is in the cursor + * stack. We already have these pages latched and locked if its a + * leaf. + */ + if (start == 0 && sp + 1 != csp && pgno == PGNO(cpage) && + (ret = __bam_lock_tree(dbc, sp + 1, csp, 0, NUM_ENT(cpage))) != 0) + return (ret); + + /* + * Then recurse on the other records on the page if needed. + * If the page is in the stack then its already locked or + * was processed above. + */ + if (start == 0 && pgno == PGNO(cpage)) + start = 1; + + if (start == stop) + return (0); + return (__bam_lock_subtree(dbc, sp->page, start, stop)); + +} + +/* + * Lock the subtree from the current node. + */ +static int +__bam_lock_subtree(dbc, page, indx, stop) + DBC *dbc; + PAGE *page; + u_int32_t indx, stop; +{ + DB *dbp; + DB_LOCK lock; + PAGE *cpage; + db_pgno_t pgno; + int ret, t_ret; + + dbp = dbc->dbp; + + for (; indx < stop; indx++) { + if (dbc->dbtype == DB_RECNO) + pgno = GET_RINTERNAL(dbc->dbp, page, indx)->pgno; + else + pgno = GET_BINTERNAL(dbc->dbp, page, indx)->pgno; + if (LEVEL(page) - 1 == LEAFLEVEL) { + if ((ret = __db_lget(dbc, 0, pgno, + DB_LOCK_WRITE, DB_LOCK_NOWAIT, &lock)) != 0) { + if (ret == DB_LOCK_DEADLOCK) + return (DB_LOCK_NOTGRANTED); + return (ret); + } + } else { + if ((ret = __memp_fget(dbp->mpf, &pgno, + dbc->thread_info, dbc->txn, 0, &cpage)) != 0) + return (ret); + ret = __bam_lock_subtree(dbc, cpage, 0, NUM_ENT(cpage)); + if ((t_ret = __memp_fput(dbp->mpf, dbc->thread_info, + cpage, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + return (ret); + } + } + return (0); +} + +#ifdef HAVE_FTRUNCATE +/* + * __bam_savekey -- save the key from an internal page. + * We need to save information so that we can + * fetch then next internal node of the tree. This means + * we need the btree key on this current page, or the + * next record number. + */ +static int +__bam_savekey(dbc, next, start) + DBC *dbc; + int next; + DBT *start; +{ + BINTERNAL *bi; + BKEYDATA *bk; + BOVERFLOW *bo; + BTREE_CURSOR *cp; + DB *dbp; + DB_LOCK lock; + ENV *env; + PAGE *pg; + RINTERNAL *ri; + db_indx_t indx, top; + db_pgno_t pgno, saved_pgno; + int ret, t_ret; + u_int32_t len; + u_int8_t *data; + int level; + + dbp = dbc->dbp; + env = dbp->env; + cp = (BTREE_CURSOR *)dbc->internal; + pg = cp->csp->page; + ret = 0; + + if (dbc->dbtype == DB_RECNO) { + if (next) + for (indx = 0, top = NUM_ENT(pg); indx != top; indx++) { + ri = GET_RINTERNAL(dbp, pg, indx); + cp->recno += ri->nrecs; + } + return (__db_retcopy(env, start, &cp->recno, + sizeof(cp->recno), &start->data, &start->ulen)); + + } + + bi = GET_BINTERNAL(dbp, pg, NUM_ENT(pg) - 1); + data = bi->data; + len = bi->len; + LOCK_INIT(lock); + saved_pgno = PGNO_INVALID; + /* If there is single record on the page it may have an empty key. */ + while (len == 0) { + /* + * We should not have an empty data page, since we just + * compacted things, check anyway and punt. + */ + if (NUM_ENT(pg) == 0) + goto no_key; + pgno = bi->pgno; + level = LEVEL(pg); + if (pg != cp->csp->page && + (ret = __memp_fput(dbp->mpf, + dbc->thread_info, pg, dbc->priority)) != 0) { + pg = NULL; + goto err; + } + pg = NULL; + if (level - 1 == LEAFLEVEL) { + TRY_LOCK(dbc, pgno, saved_pgno, + lock, DB_LOCK_READ, retry); + if (ret != 0) + goto err; + } + if ((ret = __memp_fget(dbp->mpf, &pgno, + dbc->thread_info, dbc->txn, 0, &pg)) != 0) + goto err; + + /* + * At the data level use the last key to try and avoid the + * possibility that the user has a zero length key, if they + * do, we punt. + */ + if (pg->level == LEAFLEVEL) { + bk = GET_BKEYDATA(dbp, pg, NUM_ENT(pg) - 2); + data = bk->data; + len = bk->len; + if (len == 0) { +no_key: __db_errx(env, DB_STR("1023", + "Compact cannot handle zero length key")); + ret = DB_NOTFOUND; + goto err; + } + } else { + bi = GET_BINTERNAL(dbp, pg, NUM_ENT(pg) - 1); + data = bi->data; + len = bi->len; + } + } + if (B_TYPE(bi->type) == B_OVERFLOW) { + bo = (BOVERFLOW *)(data); + ret = __db_goff(dbc, start, bo->tlen, bo->pgno, + &start->data, &start->ulen); + } + else + ret = __db_retcopy(env, + start, data, len, &start->data, &start->ulen); + +err: if (pg != NULL && pg != cp->csp->page && + (t_ret = __memp_fput(dbp->mpf, dbc->thread_info, + pg, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + return (ret); + +retry: return (DB_LOCK_NOTGRANTED); +} + +/* + * bam_truncate_ipages -- + * Find high numbered pages in the internal nodes of a tree and + * swap them for lower numbered pages. + * PUBLIC: int __bam_truncate_ipages __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DB_COMPACT *)); + */ +int +__bam_truncate_ipages(dbp, ip, txn, c_data) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DB_COMPACT *c_data; +{ + BTMETA *meta; + BTREE *bt; + BTREE_CURSOR *cp; + DBC *dbc; + DBMETA *dbmeta; + DBT start; + DB_LOCK meta_lock, root_lock; + DB_TXN *txn_orig; + PAGE *pg, *root; + db_pgno_t pgno; + u_int32_t sflag; + int level, local_txn, ret, rlevel, t_ret; + + COMPQUIET(pg, NULL); + dbc = NULL; + memset(&start, 0, sizeof(start)); + LOCK_INIT(root_lock); + txn_orig = txn; + + if (IS_DB_AUTO_COMMIT(dbp, txn)) { + local_txn = 1; + txn = NULL; + } else + local_txn = 0; + + level = LEAFLEVEL + 1; + sflag = CS_READ | CS_GETRECNO; + LOCK_INIT(meta_lock); + bt = dbp->bt_internal; + meta = NULL; + root = NULL; + +new_txn: + if (local_txn && + (ret = __txn_begin(dbp->env, ip, txn_orig, &txn, 0)) != 0) + goto err; + + if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) + goto err; + cp = (BTREE_CURSOR *)dbc->internal; + + /* + * If the the root is a leaf we have nothing to do. + * Searching an empty RECNO tree will return NOTFOUND below and loop. + */ + pgno = PGNO_INVALID; + BAM_GET_ROOT(dbc, pgno, root, 0, DB_LOCK_READ, root_lock, ret); + if (ret != 0) + goto err; + + rlevel = LEVEL(root); + if ((ret = __memp_fput(dbp->mpf, ip, root, dbp->priority)) != 0) + goto err; + root = NULL; + + if (rlevel == LEAFLEVEL) + goto again; + + pgno = PGNO_INVALID; + do { + if ((ret = __bam_csearch(dbc, &start, sflag, level)) != 0) { + /* No more at this level, go up one. */ + if (ret == DB_NOTFOUND) { + level++; + if (start.data != NULL) + __os_free(dbp->env, start.data); + memset(&start, 0, sizeof(start)); + sflag = CS_READ | CS_GETRECNO; + continue; + } + goto err; + } + c_data->compact_pages_examine++; + + pg = cp->csp->page; + pgno = PGNO(pg); + + sflag = CS_NEXT | CS_GETRECNO; + /* Grab info about the page and drop the stack. */ + if (pgno != BAM_ROOT_PGNO(dbc) && (ret = __bam_savekey(dbc, + pgno <= c_data->compact_truncate, &start)) != 0) { + if (ret == DB_LOCK_NOTGRANTED) + continue; + goto err; + } + + /* We only got read locks so we can drop them. */ + if ((ret = __bam_stkrel(dbc, STK_NOLOCK)) != 0) + goto err; + if (pgno == BAM_ROOT_PGNO(dbc)) + break; + + if (pgno <= c_data->compact_truncate) + continue; + + /* Get the meta page lock before latching interior nodes. */ + if (!LOCK_ISSET(meta_lock) && (ret = __db_lget(dbc, + 0, PGNO_BASE_MD, DB_LOCK_WRITE, 0, &meta_lock)) != 0) + goto err; + + /* Reget the page with a write latch, and its parent too. */ + if ((ret = __bam_csearch(dbc, + &start, CS_PARENT | CS_GETRECNO, level)) != 0) { + if (ret == DB_NOTFOUND) { + ret = 0; + } + goto err; + } + pgno = PGNO(cp->csp->page); + + if (pgno > c_data->compact_truncate) { + if ((ret = __db_exchange_page(dbc, &cp->csp->page, + NULL, PGNO_INVALID, DB_EXCH_DEFAULT)) != 0) + goto err; + } + + /* + * For RECNO we need to bump the saved key to the next + * page since CS_NEXT will not do that. + */ + if (dbc->dbtype == DB_RECNO && + (ret = __bam_savekey(dbc, 1, &start)) != 0) + goto err; + + pg = cp->csp->page; + if ((ret = __bam_stkrel(dbc, + pgno != PGNO(pg) ? 0 : STK_NOLOCK)) != 0) + goto err; + + /* We are locking subtrees, so drop the write locks asap. */ + if (local_txn && pgno != PGNO(pg)) + break; + /* We really break from the loop above on this condition. */ + } while (pgno != BAM_ROOT_PGNO(dbc)); + + if ((ret = __LPUT(dbc, root_lock)) != 0) + goto err; + if ((ret = __dbc_close(dbc)) != 0) + goto err; + dbc = NULL; + if (local_txn) { + if ((ret = __txn_commit(txn, DB_TXN_NOSYNC)) != 0) + goto err; + txn = NULL; + LOCK_INIT(meta_lock); + } + if (pgno != bt->bt_root) + goto new_txn; + + /* + * Attempt to move the subdatabase metadata and/or root pages. + * Grab the metadata page and verify the revision, if its out + * of date reopen and try again. + */ +again: if (F_ISSET(dbp, DB_AM_SUBDB) && + (bt->bt_root > c_data->compact_truncate || + bt->bt_meta > c_data->compact_truncate)) { + if (local_txn && txn == NULL && + (ret = __txn_begin(dbp->env, ip, txn_orig, &txn, 0)) != 0) + goto err; + if (dbc == NULL && + (ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) + goto err; + if ((ret = __db_lget(dbc, + 0, bt->bt_meta, DB_LOCK_WRITE, 0, &meta_lock)) != 0) + goto err; + if ((ret = __memp_fget(dbp->mpf, &bt->bt_meta, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &meta)) != 0) + goto err; + if (bt->revision != dbp->mpf->mfp->revision) { + if ((ret = __memp_fput(dbp->mpf, + ip, meta, dbp->priority)) != 0) + goto err; + meta = NULL; + if (local_txn) { + if ((ret = __dbc_close(dbc)) != 0) + goto err; + dbc = NULL; + ret = __txn_abort(txn); + txn = NULL; + if (ret != 0) + goto err; + } else { + if ((ret = __LPUT(dbc, meta_lock)) != 0) + goto err; + } + if ((ret = __db_reopen(dbc)) != 0) + goto err; + goto again; + } + if (PGNO(meta) > c_data->compact_truncate) { + dbmeta = (DBMETA *)meta; + ret = __db_move_metadata(dbc, &dbmeta, c_data); + meta = (BTMETA *)dbmeta; + if (ret != 0) + goto err; + } + if (bt->bt_root > c_data->compact_truncate) { + if ((ret = __db_lget(dbc, 0, + bt->bt_root, DB_LOCK_WRITE, 0, &root_lock)) != 0) + goto err; + if ((ret = __memp_fget(dbp->mpf, + &bt->bt_root, dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, &root)) != 0) + goto err; + c_data->compact_pages_examine++; + /* + * Bump the revision first since any reader will be + * blocked on the latch on the old page. That latch + * will get dropped when we free the page and the + * reader will do a __db_reopen and wait till the meta + * page latch is released. + */ + ++dbp->mpf->mfp->revision; + if ((ret = __db_exchange_page(dbc, + &root, NULL, PGNO_INVALID, DB_EXCH_FREE)) != 0) + goto err; + if (PGNO(root) == bt->bt_root) + goto err; + if (DBC_LOGGING(dbc)) { + if ((ret = + __bam_root_log(dbp, txn, &LSN(meta), 0, + PGNO(meta), PGNO(root), &LSN(meta))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(meta)); + bt->bt_root = meta->root = PGNO(root); + bt->revision = dbp->mpf->mfp->revision; + if ((ret = __memp_fput(dbp->mpf, + ip, root, dbp->priority)) != 0) + goto err; + root = NULL; + if (txn == NULL && (ret = __LPUT(dbc, root_lock)) != 0) + goto err; + + } + if ((ret = __memp_fput(dbp->mpf, ip, meta, dbp->priority)) != 0) + goto err; + meta = NULL; + if ((ret = __dbc_close(dbc)) != 0) + goto err; + dbc = NULL; + if (local_txn) { + ret = __txn_commit(txn, DB_TXN_NOSYNC); + txn = NULL; + LOCK_INIT(meta_lock); + LOCK_INIT(root_lock); + } + } + +err: if (txn != NULL && ret != 0) + sflag = STK_PGONLY; + else + sflag = 0; + if (txn == NULL) { + if ((t_ret = __LPUT(dbc, meta_lock)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __LPUT(dbc, root_lock)) != 0 && ret == 0) + ret = t_ret; + } + if (meta != NULL && (t_ret = __memp_fput(dbp->mpf, + ip, meta, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if (root != NULL && (t_ret = __memp_fput(dbp->mpf, + ip, root, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if (dbc != NULL && (t_ret = __bam_stkrel(dbc, sflag)) != 0 && ret == 0) + ret = t_ret; + if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + if (local_txn && + txn != NULL && (t_ret = __txn_abort(txn)) != 0 && ret == 0) + ret = t_ret; + if (start.data != NULL) + __os_free(dbp->env, start.data); + return (ret); +} + +#endif diff --git a/src/btree/bt_compare.c b/src/btree/bt_compare.c new file mode 100644 index 00000000..74be5110 --- /dev/null +++ b/src/btree/bt_compare.c @@ -0,0 +1,213 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" + +/* + * __bam_cmp -- + * Compare a key to a given record. + * + * PUBLIC: int __bam_cmp __P((DBC *, const DBT *, PAGE *, u_int32_t, + * PUBLIC: int (*)(DB *, const DBT *, const DBT *), int *)); + */ +int +__bam_cmp(dbc, dbt, h, indx, func, cmpp) + DBC *dbc; + const DBT *dbt; + PAGE *h; + u_int32_t indx; + int (*func)__P((DB *, const DBT *, const DBT *)); + int *cmpp; +{ + BINTERNAL *bi; + BKEYDATA *bk; + BOVERFLOW *bo; + DB *dbp; + DBT pg_dbt; + + dbp = dbc->dbp; + + /* + * Returns: + * < 0 if dbt is < page record + * = 0 if dbt is = page record + * > 0 if dbt is > page record + * + * !!! + * We do not clear the pg_dbt DBT even though it's likely to contain + * random bits. That should be okay, because the app's comparison + * routine had better not be looking at fields other than data, size + * and app_data. We don't clear it because we go through this path a + * lot and it's expensive. + */ + switch (TYPE(h)) { + case P_LBTREE: + case P_LDUP: + case P_LRECNO: + bk = GET_BKEYDATA(dbp, h, indx); + if (B_TYPE(bk->type) == B_OVERFLOW) + bo = (BOVERFLOW *)bk; + else { + pg_dbt.app_data = NULL; + pg_dbt.data = bk->data; + pg_dbt.size = bk->len; + *cmpp = func(dbp, dbt, &pg_dbt); + return (0); + } + break; + case P_IBTREE: + /* + * The following code guarantees that the left-most key on an + * internal page at any place in the tree sorts less than any + * user-specified key. The reason is that if we have reached + * this internal page, we know the user key must sort greater + * than the key we're storing for this page in any internal + * pages at levels above us in the tree. It then follows that + * any user-specified key cannot sort less than the first page + * which we reference, and so there's no reason to call the + * comparison routine. While this may save us a comparison + * routine call or two, the real reason for this is because + * we don't maintain a copy of the smallest key in the tree, + * so that we don't have to update all the levels of the tree + * should the application store a new smallest key. And, so, + * we may not have a key to compare, which makes doing the + * comparison difficult and error prone. + */ + if (indx == 0) { + *cmpp = 1; + return (0); + } + + bi = GET_BINTERNAL(dbp, h, indx); + if (B_TYPE(bi->type) == B_OVERFLOW) + bo = (BOVERFLOW *)(bi->data); + else { + pg_dbt.app_data = NULL; + pg_dbt.data = bi->data; + pg_dbt.size = bi->len; + *cmpp = func(dbp, dbt, &pg_dbt); + return (0); + } + break; + default: + return (__db_pgfmt(dbp->env, PGNO(h))); + } + + /* + * Overflow. + */ + return (__db_moff(dbc, dbt, bo->pgno, bo->tlen, + func == __bam_defcmp ? NULL : func, cmpp)); +} + +/* + * __bam_defcmp -- + * Default comparison routine. + * + * PUBLIC: int __bam_defcmp __P((DB *, const DBT *, const DBT *)); + */ +int +__bam_defcmp(dbp, a, b) + DB *dbp; + const DBT *a, *b; +{ + size_t len; + u_int8_t *p1, *p2; + + COMPQUIET(dbp, NULL); + + /* + * Returns: + * < 0 if a is < b + * = 0 if a is = b + * > 0 if a is > b + * + * XXX + * If a size_t doesn't fit into a long, or if the difference between + * any two characters doesn't fit into an int, this routine can lose. + * What we need is a signed integral type that's guaranteed to be at + * least as large as a size_t, and there is no such thing. + */ + len = a->size > b->size ? b->size : a->size; + for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2) + if (*p1 != *p2) + return ((long)*p1 - (long)*p2); + return ((long)a->size - (long)b->size); +} + +/* + * __bam_defpfx -- + * Default prefix routine. + * + * PUBLIC: size_t __bam_defpfx __P((DB *, const DBT *, const DBT *)); + */ +size_t +__bam_defpfx(dbp, a, b) + DB *dbp; + const DBT *a, *b; +{ + size_t cnt, len; + u_int8_t *p1, *p2; + + COMPQUIET(dbp, NULL); + + cnt = 1; + len = a->size > b->size ? b->size : a->size; + for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2, ++cnt) + if (*p1 != *p2) + return (cnt); + + /* + * They match up to the smaller of the two sizes. + * Collate the longer after the shorter. + */ + if (a->size < b->size) + return (a->size + 1); + if (b->size < a->size) + return (b->size + 1); + return (b->size); +} diff --git a/src/btree/bt_compress.c b/src/btree/bt_compress.c new file mode 100644 index 00000000..c768172b --- /dev/null +++ b/src/btree/bt_compress.c @@ -0,0 +1,3023 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" + +#ifdef HAVE_COMPRESSION + +static int __bam_compress_marshal_data __P((DB *, const DBT *, DBT *)); +static int __bam_compress_set_dbt __P((DB *, DBT *, const void *, u_int32_t)); +static int __bamc_compress_del_and_get_next __P((DBC *, DBT *, DBT *)); +static int __bamc_compress_get_bothc __P((DBC *, DBT *, u_int32_t)); +static int __bamc_compress_get_multiple_key __P((DBC *, DBT *, u_int32_t)); +static int __bamc_compress_get_multiple __P((DBC *, DBT *, DBT *,u_int32_t)); +static int __bamc_compress_get_next __P((DBC *, u_int32_t)); +static int __bamc_compress_get_next_dup __P((DBC *, DBT *, u_int32_t)); +static int __bamc_compress_get_next_nodup __P((DBC *, u_int32_t)); +static int __bamc_compress_get_prev __P((DBC *, u_int32_t)); +static int __bamc_compress_get_prev_dup __P((DBC *, u_int32_t)); +static int __bamc_compress_get_prev_nodup __P((DBC *, u_int32_t)); +static int __bamc_compress_get_set __P((DBC *, + DBT *, DBT *, u_int32_t, u_int32_t)); +static int __bamc_compress_ibulk_del __P((DBC *, DBT *, u_int32_t)); +static int __bamc_compress_idel __P((DBC *, u_int32_t)); +static int __bamc_compress_iget __P((DBC *, DBT *, DBT *, u_int32_t)); +static int __bamc_compress_iput __P((DBC *, DBT *, DBT *, u_int32_t)); +static int __bamc_compress_relocate __P((DBC *)); +static void __bamc_compress_reset __P((DBC *)); +static int __bamc_compress_seek __P((DBC *, + const DBT *, const DBT *, u_int32_t)); +static int __bamc_compress_store __P((DBC *, + DBT *, DBT*, DBT **, DBT **, DBT *, DBT *)); +static int __bamc_next_decompress __P((DBC *)); +static int __bamc_start_decompress __P((DBC *)); + +/* + * Call __dbc_iget(), resizing DBTs if DB_BUFFER_SMALL is returned. + * We're always using a transient cursor when this macro is used, so + * we have to replace the OP with DB_CURRENT when we retry. + */ +#define CMP_IGET_RETRY(ret, dbc, dbt1, dbt2, flags) do { \ + DB_ASSERT((dbc)->env, F_ISSET((dbt1), DB_DBT_USERMEM)); \ + DB_ASSERT((dbc)->env, F_ISSET((dbt2), DB_DBT_USERMEM)); \ + if (((ret) =__dbc_iget((dbc), \ + (dbt1), (dbt2), (flags))) == DB_BUFFER_SMALL) { \ + if ((CMP_RESIZE_DBT((ret), (dbc)->env, (dbt1))) != 0) \ + break; \ + if ((CMP_RESIZE_DBT((ret), (dbc)->env, (dbt2))) != 0) \ + break; \ + (ret) = __dbc_iget((dbc), (dbt1), (dbt2), \ + ((flags) & ~DB_OPFLAGS_MASK) | DB_CURRENT); \ + } \ +} while (0) + +#define CMP_INIT_DBT(dbt) do { \ + (dbt)->data = NULL; \ + (dbt)->size = 0; \ + (dbt)->ulen = 0; \ + (dbt)->doff = 0; \ + (dbt)->dlen = 0; \ + (dbt)->flags = DB_DBT_USERMEM; \ + (dbt)->app_data = NULL; \ +} while (0) + +#define CMP_FREE_DBT(env, dbt) do { \ + DB_ASSERT((env), F_ISSET((dbt), DB_DBT_USERMEM)); \ + __os_free((env), (dbt)->data); \ +} while (0) + +#define CMP_RESIZE_DBT(ret, env, dbt) \ + (((dbt)->size > (dbt)->ulen) ? \ + ((((ret) = __os_realloc((env), (dbt)->size, &(dbt)->data)) \ + != 0) ? (ret) : (((dbt)->ulen = (dbt)->size), 0)) : 0) + +static int +__bam_compress_set_dbt(dbp, dbt, data, size) + DB *dbp; + DBT *dbt; + const void *data; + u_int32_t size; +{ + int ret; + + ret = 0; + DB_ASSERT(dbp->env, F_ISSET(dbt, DB_DBT_USERMEM)); + + dbt->size = size; + if (CMP_RESIZE_DBT(ret, dbp->env, dbt) != 0) + return (ret); + + memcpy(dbt->data, data, size); + return (0); +} + +/******************************************************************************/ + +/* + * Very simple key/data stream to give __bamc_compress_merge_insert() + * a source of data to work on. + */ +struct __bam_compress_stream; +typedef struct __bam_compress_stream BTREE_COMPRESS_STREAM; +struct __bam_compress_stream +{ + int (*next)(BTREE_COMPRESS_STREAM *, DBT *, DBT *); + + void *kptr, *dptr; + DBT *key, *data; +}; + +/* + * These function prototypes can not go at the beginning because they rely on + * on BTREE_COMPRESS_STREAM defined above. + * The prototypes are required to avoid the Microsoft C++ compiler generating + * warnings about mismatching parameter lists. + */ +static int __bam_cs_next_done __P((BTREE_COMPRESS_STREAM *, DBT *, DBT *)); +static int __bam_cs_single_next __P((BTREE_COMPRESS_STREAM *, DBT *, DBT *)); +static void __bam_cs_create_single + __P((BTREE_COMPRESS_STREAM *, DBT *, DBT *)); +static int __bam_cs_single_keyonly_next + __P((BTREE_COMPRESS_STREAM *, DBT *, DBT *)); +static void __bam_cs_create_single_keyonly + __P((BTREE_COMPRESS_STREAM *, DBT *)); +static int __bam_cs_multiple_key_next + __P((BTREE_COMPRESS_STREAM *, DBT *, DBT *)); +static void __bam_cs_create_multiple_key __P((BTREE_COMPRESS_STREAM *, DBT *)); +static int __bam_cs_multiple_next __P((BTREE_COMPRESS_STREAM *, DBT *, DBT *)); +static void __bam_cs_create_multiple + __P((BTREE_COMPRESS_STREAM *, DBT *, DBT *)); +static int __bam_cs_multiple_keyonly_next + __P((BTREE_COMPRESS_STREAM *, DBT *, DBT *)); +static void __bam_cs_create_multiple_keyonly + __P((BTREE_COMPRESS_STREAM *, DBT *)); +static int __bamc_compress_merge_insert + __P((DBC *, BTREE_COMPRESS_STREAM *, u_int32_t *, u_int32_t)); +static int __bamc_compress_merge_delete + __P((DBC *, BTREE_COMPRESS_STREAM *, u_int32_t *)); +static int __bamc_compress_merge_delete_dups + __P((DBC *, BTREE_COMPRESS_STREAM *, u_int32_t *)); + +/* BTREE_COMPRESS_STREAM->next() for when the data has finished. */ +static int +__bam_cs_next_done(stream, key, data) + BTREE_COMPRESS_STREAM *stream; + DBT *key, *data; +{ + COMPQUIET(stream, NULL); + COMPQUIET(key, NULL); + COMPQUIET(data, NULL); + return (0); +} + +/* BTREE_COMPRESS_STREAM->next() for a single key/data pair. */ +static int +__bam_cs_single_next(stream, key, data) + BTREE_COMPRESS_STREAM *stream; + DBT *key, *data; +{ + key->data = stream->key->data; + key->size = stream->key->size; + data->data = stream->data->data; + data->size = stream->data->size; + stream->next = __bam_cs_next_done; + return (1); +} + +/* Create a BTREE_COMPRESS_STREAM for a single key/data pair */ +static void +__bam_cs_create_single(stream, key, data) + BTREE_COMPRESS_STREAM *stream; + DBT *key, *data; +{ + stream->next = __bam_cs_single_next; + stream->key = key; + stream->data = data; +} + +/* BTREE_COMPRESS_STREAM->next() for a single key. */ +static int +__bam_cs_single_keyonly_next(stream, key, data) + BTREE_COMPRESS_STREAM *stream; + DBT *key, *data; +{ + key->data = stream->key->data; + key->size = stream->key->size; + if (data != NULL) { + data->data = NULL; + data->size = 0; + } + stream->next = __bam_cs_next_done; + return (1); +} + +/* Create a BTREE_COMPRESS_STREAM for a single key/data pair */ +static void +__bam_cs_create_single_keyonly(stream, key) + BTREE_COMPRESS_STREAM *stream; + DBT *key; +{ + stream->next = __bam_cs_single_keyonly_next; + stream->key = key; +} + +/* + * BTREE_COMPRESS_STREAM->next() for a single buffer in the DB_MULTIPLE_KEY + * format. + */ +static int +__bam_cs_multiple_key_next(stream, key, data) + BTREE_COMPRESS_STREAM *stream; + DBT *key, *data; +{ + DB_MULTIPLE_KEY_NEXT(stream->kptr, stream->key, key->data, key->size, + data->data, data->size); + if (key->data == NULL) { + stream->next = __bam_cs_next_done; + return (0); + } + return (1); +} + +/* + * Create a BTREE_COMPRESS_STREAM for a single buffer in the DB_MULTIPLE_KEY + * format. + */ +static void +__bam_cs_create_multiple_key(stream, multiple) + BTREE_COMPRESS_STREAM *stream; + DBT *multiple; +{ + stream->next = __bam_cs_multiple_key_next; + stream->key = multiple; + DB_MULTIPLE_INIT(stream->kptr, stream->key); +} + +/* BTREE_COMPRESS_STREAM->next() for two buffers in the DB_MULTIPLE format. */ +static int +__bam_cs_multiple_next(stream, key, data) + BTREE_COMPRESS_STREAM *stream; + DBT *key, *data; +{ + DB_MULTIPLE_NEXT(stream->kptr, stream->key, key->data, key->size); + DB_MULTIPLE_NEXT(stream->dptr, stream->data, data->data, data->size); + if (key->data == NULL || data->data == NULL) { + stream->next = __bam_cs_next_done; + return (0); + } + return (1); +} + +/* Create a BTREE_COMPRESS_STREAM for two buffers in the DB_MULTIPLE format. */ +static void +__bam_cs_create_multiple(stream, key, data) + BTREE_COMPRESS_STREAM *stream; + DBT *key, *data; +{ + stream->next = __bam_cs_multiple_next; + stream->key = key; + stream->data = data; + DB_MULTIPLE_INIT(stream->kptr, stream->key); + DB_MULTIPLE_INIT(stream->dptr, stream->data); +} + +/* + * BTREE_COMPRESS_STREAM->next() for a single buffer in the DB_MULTIPLE + * format. + */ +static int +__bam_cs_multiple_keyonly_next(stream, key, data) + BTREE_COMPRESS_STREAM *stream; + DBT *key, *data; +{ + DB_MULTIPLE_NEXT(stream->kptr, stream->key, key->data, key->size); + if (key->data == NULL) { + stream->next = __bam_cs_next_done; + return (0); + } + if (data != NULL) { + data->data = NULL; + data->size = 0; + } + return (1); +} + +/* + * Create a BTREE_COMPRESS_STREAM for a single buffer in the DB_MULTIPLE + * format. + */ +static void +__bam_cs_create_multiple_keyonly(stream, key) + BTREE_COMPRESS_STREAM *stream; + DBT *key; +{ + stream->next = __bam_cs_multiple_keyonly_next; + stream->key = key; + DB_MULTIPLE_INIT(stream->kptr, stream->key); +} + +/******************************************************************************/ + +/* + * Marshal data in initial data format into destbuf, resizing destbuf if + * necessary. + */ +static int +__bam_compress_marshal_data(dbp, data, destbuf) + DB *dbp; + const DBT *data; + DBT *destbuf; +{ + int ret; + u_int8_t *ptr; + + ret = 0; + DB_ASSERT(dbp->env, F_ISSET(destbuf, DB_DBT_USERMEM)); + + destbuf->size = __db_compress_count_int(data->size); + destbuf->size += data->size; + if (CMP_RESIZE_DBT(ret, dbp->env, destbuf) != 0) + return (ret); + + ptr = (u_int8_t*)destbuf->data; + ptr += __db_compress_int(ptr, data->size); + memcpy(ptr, data->data, data->size); + + return (0); +} + +/* + * Unmarshal initial data from source into data - does not copy, points + * into source. + */ +#define CMP_UNMARSHAL_DATA(src, dest) do { \ + (dest)->data = ((u_int8_t*)(src)->data) + \ + __db_decompress_int32((u_int8_t*)(src)->data, \ + &(dest)->size); \ +} while (0) + +/******************************************************************************/ + +/* + * __bam_compress_dupcmp -- + * Duplicate comparison function for compressed BTrees. + * + * PUBLIC: int __bam_compress_dupcmp __P((DB *, const DBT *, const DBT *)); + */ +int +__bam_compress_dupcmp(db, a, b) + DB *db; + const DBT *a; + const DBT *b; +{ + DBT dcmp_a, dcmp_b; + + /* Decompress the initial data in a */ + CMP_UNMARSHAL_DATA(a, &dcmp_a); + dcmp_a.ulen = 0; + dcmp_a.doff = 0; + dcmp_a.dlen = 0; + dcmp_a.flags = 0; + dcmp_a.app_data = 0; + + /* Decompress the initial data in b */ + CMP_UNMARSHAL_DATA(b, &dcmp_b); + dcmp_b.ulen = 0; + dcmp_b.doff = 0; + dcmp_b.dlen = 0; + dcmp_b.flags = 0; + dcmp_b.app_data = 0; + + /* Call the user's duplicate compare function */ + return ((BTREE *)db->bt_internal)-> + compress_dup_compare(db, &dcmp_a, &dcmp_b); +} + +/* + * __bam_defcompress -- + * Default compression routine. + * + * PUBLIC: int __bam_defcompress __P((DB *, const DBT *, const DBT *, + * PUBLIC: const DBT *, const DBT *, DBT *)); + */ +int +__bam_defcompress(dbp, prevKey, prevData, key, data, dest) + DB *dbp; + const DBT *prevKey, *prevData, *key, *data; + DBT *dest; +{ + u_int8_t *ptr; + const u_int8_t *k, *p; + size_t len, prefix, suffix; + + COMPQUIET(dbp, NULL); + + k = (const u_int8_t*)key->data; + p = (const u_int8_t*)prevKey->data; + len = key->size > prevKey->size ? prevKey->size : key->size; + for (; len-- && *k == *p; ++k, ++p) + continue; + + prefix = (size_t)(k - (u_int8_t*)key->data); + suffix = key->size - prefix; + + if (prefix == prevKey->size && suffix == 0) { + /* It's a duplicate - do prefix compression on the value */ + k = (const u_int8_t*)data->data; + p = (const u_int8_t*)prevData->data; + len = data->size > prevData->size ? prevData->size : data->size; + for (; len-- && *k == *p; ++k, ++p) + continue; + + prefix = (size_t)(k - (u_int8_t*)data->data); + suffix = data->size - prefix; + + /* Check that we have enough space in dest */ + dest->size = (u_int32_t)(1 + __db_compress_count_int(prefix) + + __db_compress_count_int(suffix) + suffix); + if (dest->size > dest->ulen) + return (DB_BUFFER_SMALL); + + /* Magic identifying byte */ + ptr = (u_int8_t*)dest->data; + *ptr = CMP_INT_SPARE_VAL; + ++ptr; + + /* prefix length */ + ptr += __db_compress_int(ptr, prefix); + + /* suffix length */ + ptr += __db_compress_int(ptr, suffix); + + /* suffix */ + memcpy(ptr, k, suffix); + + return (0); + } + + /* Check that we have enough space in dest */ + dest->size = (u_int32_t)(__db_compress_count_int(prefix) + + __db_compress_count_int(suffix) + + __db_compress_count_int(data->size) + suffix + data->size); + if (dest->size > dest->ulen) + return (DB_BUFFER_SMALL); + + /* prefix length */ + ptr = (u_int8_t*)dest->data; + ptr += __db_compress_int(ptr, prefix); + + /* suffix length */ + ptr += __db_compress_int(ptr, suffix); + + /* data length */ + ptr += __db_compress_int(ptr, data->size); + + /* suffix */ + memcpy(ptr, k, suffix); + ptr += suffix; + + /* data */ + memcpy(ptr, data->data, data->size); + + return (0); +} + +/* + * __bam_defdecompress -- + * Default decompression routine. + * + * PUBLIC: int __bam_defdecompress __P((DB *, const DBT *, const DBT *, DBT *, + * PUBLIC: DBT *, DBT *)); + */ +int +__bam_defdecompress(dbp, prevKey, prevData, compressed, destKey, destData) + DB *dbp; + const DBT *prevKey, *prevData; + DBT *compressed, *destKey, *destData; +{ + u_int8_t *s, *d; + u_int32_t prefix, suffix, size; + + COMPQUIET(dbp, NULL); + + /* + * Check for the magic identifying byte, that tells us that this is a + * compressed duplicate value. + */ + s = (u_int8_t*)compressed->data; + if (*s == CMP_INT_SPARE_VAL) { + ++s; + size = 1; + + /* Unmarshal prefix and suffix */ + size += __db_decompress_count_int(s); + if (size > compressed->size) + return (EINVAL); + s += __db_decompress_int32(s, &prefix); + + size += __db_decompress_count_int(s); + if (size > compressed->size) + return (EINVAL); + s += __db_decompress_int32(s, &suffix); + + /* Check destination lengths */ + destKey->size = prevKey->size; + destData->size = prefix + suffix; + if (destKey->size > destKey->ulen || + destData->size > destData->ulen) + return (DB_BUFFER_SMALL); + + /* Write the key */ + memcpy(destKey->data, prevKey->data, destKey->size); + + /* Write the prefix */ + if (prefix > prevData->size) + return (EINVAL); + d = (u_int8_t*)destData->data; + memcpy(d, prevData->data, prefix); + d += prefix; + + /* Write the suffix */ + size += suffix; + if (size > compressed->size) + return (EINVAL); + memcpy(d, s, suffix); + s += suffix; + + /* Return bytes read */ + compressed->size = (u_int32_t)(s - (u_int8_t*)compressed->data); + return (0); + } + + /* Unmarshal prefix, suffix and data length */ + size = __db_decompress_count_int(s); + if (size > compressed->size) + return (EINVAL); + s += __db_decompress_int32(s, &prefix); + + size += __db_decompress_count_int(s); + if (size > compressed->size) + return (EINVAL); + s += __db_decompress_int32(s, &suffix); + + size += __db_decompress_count_int(s); + if (size > compressed->size) + return (EINVAL); + s += __db_decompress_int32(s, &destData->size); + + /* Check destination lengths */ + destKey->size = prefix + suffix; + if (destKey->size > destKey->ulen || destData->size > destData->ulen) + return (DB_BUFFER_SMALL); + + /* Write the prefix */ + if (prefix > prevKey->size) + return (EINVAL); + d = (u_int8_t*)destKey->data; + memcpy(d, prevKey->data, prefix); + d += prefix; + + /* Write the suffix */ + size += suffix; + if (size > compressed->size) + return (EINVAL); + memcpy(d, s, suffix); + s += suffix; + + /* Write the data */ + size += destData->size; + if (size > compressed->size) + return (EINVAL); + memcpy(destData->data, s, destData->size); + s += destData->size; + + /* Return bytes read */ + compressed->size = (u_int32_t)(s - (u_int8_t*)compressed->data); + return (0); +} + +/******************************************************************************/ + +/* + * Set dbc up to start decompressing the compressed key/data pair, dbc->key1 + * and dbc->compressed. + */ +static int +__bamc_start_decompress(dbc) + DBC *dbc; +{ + BTREE_CURSOR *cp; + int ret; + u_int32_t datasize; + + cp = (BTREE_CURSOR *)dbc->internal; + + cp->prevKey = NULL; + cp->prevData = NULL; + cp->currentKey = &cp->key1; + cp->currentData = &cp->data1; + cp->compcursor = (u_int8_t*)cp->compressed.data; + cp->compend = cp->compcursor + cp->compressed.size; + cp->prevcursor = NULL; + cp->prev2cursor = NULL; + + /* Unmarshal the first data */ + cp->compcursor += __db_decompress_int32(cp->compcursor, &datasize); + ret = __bam_compress_set_dbt(dbc->dbp, + cp->currentData, cp->compcursor, datasize); + + if (ret == 0) + cp->compcursor += datasize; + return (ret); +} + +/* Decompress the next key/data pair from dbc->compressed. */ +static int +__bamc_next_decompress(dbc) + DBC *dbc; +{ + DBT compressed; + int ret; + BTREE_CURSOR *cp; + DB *db; + + ret = 0; + cp = (BTREE_CURSOR *)dbc->internal; + db = dbc->dbp; + + if (cp->compcursor >= cp->compend) + return (DB_NOTFOUND); + + cp->prevKey = cp->currentKey; + cp->prevData = cp->currentData; + cp->prev2cursor = cp->prevcursor; + cp->prevcursor = cp->compcursor; + + if (cp->currentKey == &cp->key1) { + cp->currentKey = &cp->key2; + cp->currentData = &cp->data2; + } else { + cp->currentKey = &cp->key1; + cp->currentData = &cp->data1; + } + + compressed.flags = DB_DBT_USERMEM; + compressed.data = (void*)cp->compcursor; + compressed.ulen = compressed.size = + (u_int32_t)(cp->compend - cp->compcursor); + compressed.app_data = NULL; + + while ((ret = ((BTREE *)db->bt_internal)->bt_decompress(db, + cp->prevKey, cp->prevData, &compressed, + cp->currentKey, cp->currentData)) == DB_BUFFER_SMALL) { + if (CMP_RESIZE_DBT(ret, dbc->env, cp->currentKey) != 0) + break; + if (CMP_RESIZE_DBT(ret, dbc->env, cp->currentData) != 0) + break; + } + + if (ret == 0) + cp->compcursor += compressed.size; + return (ret); +} + +/* + * Store key and data into destkey and destbuf, using the compression + * callback given. + */ +static int +__bamc_compress_store(dbc, key, data, prevKey, prevData, destkey, destbuf) + DBC *dbc; + DBT *key, *data; + DBT **prevKey, **prevData; + DBT *destkey, *destbuf; +{ + int ret; + DBT dest; + + if (*prevKey == 0) { + if ((ret = __bam_compress_set_dbt(dbc->dbp, + destkey, key->data, key->size)) != 0) + return (ret); + + /* Marshal data - resize if it won't fit */ + ret = __bam_compress_marshal_data(dbc->dbp, data, destbuf); + + } else if (((BTREE_CURSOR *)dbc->internal)->ovflsize > destbuf->size) { + /* + * Don't write more than cp->ovflsize bytes to the destination + * buffer - destbuf must be at least cp->ovflsize in size. + */ + dest.flags = DB_DBT_USERMEM; + dest.data = (u_int8_t*)destbuf->data + destbuf->size; + dest.ulen = + ((BTREE_CURSOR *)dbc->internal)->ovflsize - destbuf->size; + dest.size = 0; + dest.app_data = NULL; + + ret = ((BTREE *)dbc->dbp->bt_internal)->bt_compress( + dbc->dbp, *prevKey, *prevData, key, data, &dest); + + if (ret == 0) + destbuf->size += dest.size; + } else + ret = DB_BUFFER_SMALL; + + if (ret == 0) { + *prevKey = key; + *prevData = data; + } + + return (ret); +} + +/* + * Move dbc->dbc to the correct position to start linear searching for + * seek_key/seek_data - the biggest key smaller than or equal to + * seek_key/seek_data. + */ +static int +__bamc_compress_seek(dbc, seek_key, seek_data, flags) + DBC *dbc; + const DBT *seek_key; + const DBT *seek_data; + u_int32_t flags; +{ + int ret; + u_int32_t method; + DB *dbp; + BTREE_CURSOR *cp; + + dbp = dbc->dbp; + cp = (BTREE_CURSOR *)dbc->internal; + + if ((ret = __bam_compress_set_dbt( + dbp, &cp->key1, seek_key->data, seek_key->size)) != 0) + return (ret); + + /* + * We allow seek_data to be 0 for __bamc_compress_get_set() with + * DB_SET + */ + if (F_ISSET(dbp, DB_AM_DUPSORT) && seek_data != NULL) { + if ((ret = __bam_compress_marshal_data( + dbp, seek_data, &cp->compressed)) != 0) + return (ret); + + method = DB_GET_BOTH_LTE; + } else + method = DB_SET_LTE; + + CMP_IGET_RETRY(ret, dbc, &cp->key1, &cp->compressed, method | flags); + + if (ret == 0 && + F_ISSET(dbp, DB_AM_DUPSORT) && seek_data == NULL && + __db_compare_both(dbp, seek_key, 0, &cp->key1, 0) == 0) { + /* + * Some entries for seek_key might be in the previous chunk, + * so we need to start searching there. + */ + CMP_IGET_RETRY(ret, + dbc, &cp->key1, &cp->compressed, DB_PREV | flags); + if (ret == DB_NOTFOUND) { + /* No previous, we must need the first entry */ + CMP_IGET_RETRY(ret, + dbc, &cp->key1, &cp->compressed, DB_FIRST | flags); + } + } + + return (ret); +} + +/* Reset the cursor to an uninitialized state */ +static void +__bamc_compress_reset(dbc) + DBC *dbc; +{ + BTREE_CURSOR *cp; + + cp = (BTREE_CURSOR *)dbc->internal; + + cp->prevKey = 0; + cp->prevData = 0; + cp->currentKey = 0; + cp->currentData = 0; + cp->compcursor = 0; + cp->compend = 0; + cp->prevcursor = 0; + cp->prev2cursor = 0; + + F_CLR(cp, C_COMPRESS_DELETED|C_COMPRESS_MODIFIED); +} + +/* + * Duplicate the cursor and delete the current entry, move the original cursor + * on and then close the cursor we used to delete. We do that to make sure that + * the close method runs __bamc_physdel(), and actually gets rid of the deleted + * entry! + */ +static int +__bamc_compress_del_and_get_next(dbc, nextk, nextc) + DBC *dbc; + DBT *nextk, *nextc; +{ + int ret, ret_n; + DBC *dbc_n; + + if ((ret = __dbc_dup(dbc, &dbc_n, DB_POSITION | DB_SHALLOW_DUP)) != 0) + return (ret); + F_SET(dbc_n, DBC_TRANSIENT); + + if ((ret = __dbc_idel(dbc_n, 0)) != 0) + goto err; + + /* Read the next position */ + CMP_IGET_RETRY(ret, dbc, nextk, nextc, DB_NEXT); + + err: + if ((ret_n = __dbc_close(dbc_n)) != 0 && ret == 0) + ret = ret_n; + + /* No need to relocate this cursor */ + F_CLR((BTREE_CURSOR *)dbc->internal, C_COMPRESS_MODIFIED); + + return (ret); +} + +/* + * Duplicate the cursor, re-locate the position that this cursor pointed to + * using the duplicate (it may have been deleted), and then swap + * the cursors. We do that to make sure that the close method runs + * __bamc_physdel(), and gets rid of the entry that may have been deleted. + */ +static int +__bamc_compress_relocate(dbc) + DBC *dbc; +{ + int ret, t_ret; + BTREE_CURSOR *cp, *cp_n; + DBC *dbc_n; + + cp = (BTREE_CURSOR *)dbc->internal; + + if ((ret = __dbc_dup(dbc, &dbc_n, 0)) != 0) + return (ret); + F_SET(dbc_n, DBC_TRANSIENT); + + cp_n = (BTREE_CURSOR *)dbc_n->internal; + + if (F_ISSET(cp, C_COMPRESS_DELETED)) { + /* Find the position after the deleted entry again */ + ret = __bamc_compress_get_set( + dbc_n, &cp->del_key, &cp->del_data, 0, 0); + if (ret == DB_NOTFOUND) { + __bamc_compress_reset(dbc_n); + ret = 0; + } else if (ret != 0) + goto err; + + F_SET(cp_n, C_COMPRESS_DELETED); + + } else if (cp->currentKey != NULL) { + /* Find the current entry again */ + ret = __bamc_compress_get_set( + dbc_n, cp->currentKey, cp->currentData, + F_ISSET(dbc->dbp, DB_AM_DUPSORT) ? DB_GET_BOTH : DB_SET, 0); + + if (ret == DB_NOTFOUND) { + /* The current entry has been deleted */ + if ((ret = __bam_compress_set_dbt(dbc_n->dbp, + &cp_n->del_key, + cp->currentKey->data, cp->currentKey->size)) != 0) + return (ret); + if ((ret = __bam_compress_set_dbt(dbc_n->dbp, + &cp_n->del_data, cp->currentData->data, + cp->currentData->size)) != 0) + return (ret); + F_SET(cp_n, C_COMPRESS_DELETED); + ret = 0; + } else if (ret != 0) + goto err; + } + + err: + /* Cleanup and cursor resolution. This also clears the + C_COMPRESS_MODIFIED flag. */ + if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/******************************************************************************/ + +#define CMP_STORE(key, data) do { \ + while ((ret = __bamc_compress_store(dbc, (key), (data), \ + &prevDestKey, &prevDestData, &destkey, &destbuf)) \ + == DB_BUFFER_SMALL) { \ + if ((ret = __dbc_iput(dbc, \ + &destkey, &destbuf, DB_KEYLAST)) != 0) \ + goto end; \ + prevDestKey = NULL; \ + prevDestData = NULL; \ + destbuf.size = 0; \ + } \ +} while (0) + +/* Merge the sorted key/data pairs from stream into the compressed database. */ +static int +__bamc_compress_merge_insert(dbc, stream, countp, flags) + DBC *dbc; + BTREE_COMPRESS_STREAM *stream; + u_int32_t *countp; + u_int32_t flags; +{ + DBT ikey1, ikey2, idata1, idata2, nextk, nextc, nextd, destkey, destbuf; + DBT *ikey, *idata, *prevIkey, *prevIdata, *prevDestKey, *prevDestData; + int ret, bulk_ret, cmp, nextExists, moreCompressed, iSmallEnough; + int moreStream; + u_int32_t chunk_count; + ENV *env; + BTREE_CURSOR *cp; + DB *dbp; + + env = dbc->env; + cp = (BTREE_CURSOR *)dbc->internal; + dbp = dbc->dbp; + bulk_ret = 0; + + memset(&ikey1, 0, sizeof(DBT)); + memset(&ikey2, 0, sizeof(DBT)); + memset(&idata1, 0, sizeof(DBT)); + memset(&idata2, 0, sizeof(DBT)); + + CMP_INIT_DBT(&nextk); + CMP_INIT_DBT(&nextc); + memset(&nextd, 0, sizeof(DBT)); + + CMP_INIT_DBT(&destkey); + CMP_INIT_DBT(&destbuf); + if ((ret = __os_malloc(env, cp->ovflsize, &destbuf.data)) != 0) + goto end; + destbuf.ulen = cp->ovflsize; + + if (countp != NULL) + *countp = 0; + chunk_count = 0; + + /* Get the first input key and data */ + ret = 0; + prevIkey = NULL; + prevIdata = NULL; + ikey = &ikey1; + idata = &idata1; + if (stream->next(stream, ikey, idata) == 0) + goto end; + + prevDestKey = NULL; + prevDestData = NULL; + + moreStream = 1; + while (moreStream != 0) { + nextExists = 1; + moreCompressed = 1; + + /* Seek the ikey/idata position */ + ret = __bamc_compress_seek(dbc, ikey, idata, 0); + if (ret == 0) { + /* + * Delete the key - we might overwrite it below + * but it's safer to just always delete it, and it + * doesn't seem significantly slower to do so. + */ + ret = __bamc_compress_del_and_get_next(dbc, &nextk, + &nextc); + if (ret == DB_NOTFOUND) { + ret = 0; + nextExists = 0; + } else if (ret == 0) { + CMP_UNMARSHAL_DATA(&nextc, &nextd); + } else + goto end; + ret = __bamc_start_decompress(dbc); + } else if (ret == DB_NOTFOUND) { + moreCompressed = 0; + + /* Read the next position */ + CMP_IGET_RETRY(ret, dbc, &nextk, &nextc, DB_FIRST); + if (ret == DB_NOTFOUND) { + ret = 0; + nextExists = 0; + } else if (ret == 0) { + CMP_UNMARSHAL_DATA(&nextc, &nextd); + } + } + + if (ret != 0) + goto end; + + /* !nextExists || ikey/idata < nextk/nextd */ + iSmallEnough = 1; + + while (moreCompressed != 0 || iSmallEnough != 0) { + if (moreCompressed == 0) + cmp = 1; + else if (iSmallEnough == 0) + cmp = -1; + else + cmp = __db_compare_both(dbp, cp->currentKey, + cp->currentData, ikey, idata); + + if (cmp < 0) { +store_current: CMP_STORE(cp->currentKey, cp->currentData); + if (ret != 0) + goto end; + } else { + switch (flags) { + case DB_KEYLAST: + case DB_KEYFIRST: + case DB_NODUPDATA: + if (cmp == 0 && bulk_ret == 0 && + F_ISSET(dbp, DB_AM_DUPSORT)) { + bulk_ret = __db_duperr(dbp, + flags); + + /* + * Continue until we store + * the current chunk, + * but don't insert any + * more entries. + */ + moreStream = 0; + iSmallEnough = 0; + + goto store_current; + } + break; + default: + break; + } + + CMP_STORE(ikey, idata); + if (ret != 0) + goto end; + ++chunk_count; + + /* + * prevDestKey/prevDestData now point to + * the same DBTs as ikey/idata. We don't + * want to overwrite them, so swap them + * to point to the other DBTs. + */ + if (ikey == &ikey1) { + ikey = &ikey2; + idata = &idata2; + prevIkey = &ikey1; + prevIdata = &idata1; + } else { + ikey = &ikey1; + idata = &idata1; + prevIkey = &ikey2; + prevIdata = &idata2; + } + + do { + /* Get the next input key and data */ + if (stream->next( + stream, ikey, idata) == 0) { + moreStream = 0; + iSmallEnough = 0; + break; + } + +#ifdef DIAGNOSTIC + /* Check that the stream is sorted */ + DB_ASSERT(env, __db_compare_both(dbp, + ikey, idata, prevIkey, + prevIdata) >= 0); +#endif + + /* Check for duplicates in the stream */ + } while (__db_compare_both(dbp, ikey, idata, + prevIkey, prevIdata) == 0); + + /* + * Check that !nextExists || + * ikey/idata < nextk/nextd + */ + if (moreStream != 0 && nextExists != 0 && + __db_compare_both(dbp, ikey, + idata, &nextk, &nextd) >= 0) + iSmallEnough = 0; + } + + if (cmp <= 0) { + ret = __bamc_next_decompress(dbc); + if (ret == DB_NOTFOUND) { + moreCompressed = 0; + ret = 0; + } else if (ret != 0) + goto end; + + } + } + + if (prevDestKey != NULL) { + if ((ret = __dbc_iput( + dbc, &destkey, &destbuf, DB_KEYLAST)) != 0) + goto end; + + if (countp != NULL) + *countp += chunk_count; + chunk_count = 0; + + prevDestKey = NULL; + prevDestData = NULL; + destbuf.size = 0; + } + } + + end: + CMP_FREE_DBT(env, &destkey); + CMP_FREE_DBT(env, &destbuf); + CMP_FREE_DBT(env, &nextk); + CMP_FREE_DBT(env, &nextc); + + return (ret != 0 ? ret : bulk_ret); +} + +/******************************************************************************/ + +/* Remove the sorted key/data pairs in stream from the compressed database. */ +static int +__bamc_compress_merge_delete(dbc, stream, countp) + DBC *dbc; + BTREE_COMPRESS_STREAM *stream; + u_int32_t *countp; +{ + DBT ikey, idata, nextk, nextc, nextd, destkey, destbuf, pdestkey; + DBT pdestdata; +#ifdef DIAGNOSTIC + DBT pikey, pidata; +#endif + DBT *prevDestKey, *prevDestData; + int ret, bulk_ret, cmp, moreCompressed, moreStream, nextExists; + int iSmallEnough; + u_int32_t chunk_count; + ENV *env; + BTREE_CURSOR *cp; + DB *dbp; + + env = dbc->env; + cp = (BTREE_CURSOR *)dbc->internal; + dbp = dbc->dbp; + bulk_ret = 0; + + memset(&ikey, 0, sizeof(DBT)); + memset(&idata, 0, sizeof(DBT)); + + CMP_INIT_DBT(&nextk); + CMP_INIT_DBT(&nextc); + memset(&nextd, 0, sizeof(DBT)); + + CMP_INIT_DBT(&pdestkey); + CMP_INIT_DBT(&pdestdata); + + CMP_INIT_DBT(&destkey); + CMP_INIT_DBT(&destbuf); + if ((ret = __os_malloc(env, cp->ovflsize, &destbuf.data)) != 0) + goto end; + destbuf.ulen = cp->ovflsize; + + if (countp != NULL) + *countp = 0; + chunk_count = 0; + + /* Get the first input key and data */ + ret = 0; + if (stream->next(stream, &ikey, &idata) == 0) + goto end; + + prevDestKey = NULL; + prevDestData = NULL; + + moreStream = 1; + while (moreStream != 0) { + nextExists = 1; + moreCompressed = 1; + + /* Seek the ikey/idata position */ + if ((ret = __bamc_compress_seek(dbc, &ikey, &idata, 0)) != 0) + goto end; + + /* + * Delete the key - we might overwrite it below but it's safer + * to just always delete it, and it doesn't seem significantly + * slower to do so. + */ + ret = __bamc_compress_del_and_get_next(dbc, &nextk, &nextc); + if (ret == DB_NOTFOUND) { + ret = 0; + nextExists = 0; + } else if (ret == 0) { + CMP_UNMARSHAL_DATA(&nextc, &nextd); + } else + goto end; + + if ((ret = __bamc_start_decompress(dbc)) != 0) + goto end; + + /* !nextExists || ikey/idata < nextk/nextd */ + iSmallEnough = 1; + + while (moreCompressed != 0 || iSmallEnough != 0) { + if (moreCompressed == 0) + cmp = 1; + else if (iSmallEnough == 0) + cmp = -1; + else + cmp = __db_compare_both(dbp, cp->currentKey, + cp->currentData, &ikey, &idata); + + if (cmp < 0) { + CMP_STORE(cp->currentKey, cp->currentData); + if (ret != 0) + goto end; + + if ((ret = __bam_compress_set_dbt(dbp, + &pdestkey, cp->currentKey->data, + cp->currentKey->size)) != 0) + goto end; + if ((ret = __bam_compress_set_dbt(dbp, + &pdestdata, cp->currentData->data, + cp->currentData->size)) != 0) + goto end; + prevDestKey = &pdestkey; + prevDestData = &pdestdata; + } else { + if (cmp != 0) { + /* + * Continue until we store the current + * chunk, but don't delete any more + * entries. + */ + bulk_ret = DB_NOTFOUND; + moreStream = 0; + iSmallEnough = 0; + } else + ++chunk_count; + +#ifdef DIAGNOSTIC + pikey = ikey; + pidata = idata; +#endif + + /* Get the next input key and data */ + if (stream->next(stream, &ikey, &idata) == 0) { + moreStream = 0; + iSmallEnough = 0; + } + +#ifdef DIAGNOSTIC + /* Check that the stream is sorted */ + DB_ASSERT(env, moreStream == 0 || + __db_compare_both(dbp, &ikey, &idata, + &pikey, &pidata) >= 0); +#endif + + /* + * Check that !nextExists || + * ikey/idata < nextk/nextd + */ + if (moreStream != 0 && nextExists != 0 && + __db_compare_both(dbp, &ikey, + &idata, &nextk, &nextd) >= 0) + iSmallEnough = 0; + } + + if (cmp <= 0) { + ret = __bamc_next_decompress(dbc); + if (ret == DB_NOTFOUND) { + moreCompressed = 0; + ret = 0; + } else if (ret != 0) + goto end; + } + } + + if (prevDestKey != NULL) { + if ((ret = __dbc_iput( + dbc, &destkey, &destbuf, DB_KEYLAST)) != 0) + goto end; + + if (countp) + *countp += chunk_count; + chunk_count = 0; + + prevDestKey = NULL; + prevDestData = NULL; + destbuf.size = 0; + } + } + + end: + CMP_FREE_DBT(env, &destkey); + CMP_FREE_DBT(env, &destbuf); + CMP_FREE_DBT(env, &pdestkey); + CMP_FREE_DBT(env, &pdestdata); + CMP_FREE_DBT(env, &nextk); + CMP_FREE_DBT(env, &nextc); + + return (ret != 0 ? ret : bulk_ret); +} + +/* + * Remove the sorted keys in stream along with all duplicate values from + * the compressed database. + */ +static int +__bamc_compress_merge_delete_dups(dbc, stream, countp) + DBC *dbc; + BTREE_COMPRESS_STREAM *stream; + u_int32_t *countp; +{ + DBC *dbc_n; + DBT ikey, nextk, noread, destkey, destbuf, pdestkey, pdestdata; +#ifdef DIAGNOSTIC + DBT pikey; +#endif + DBT *prevDestKey, *prevDestData; + int ret, ret_n, bulk_ret, cmp, moreCompressed, moreStream, nextExists; + int iSmallEnough, ifound; + u_int32_t chunk_count; + ENV *env; + BTREE_CURSOR *cp; + DB *dbp; + + env = dbc->env; + cp = (BTREE_CURSOR *)dbc->internal; + dbp = dbc->dbp; + bulk_ret = 0; + + memset(&ikey, 0, sizeof(DBT)); + + CMP_INIT_DBT(&nextk); + + memset(&noread, 0, sizeof(DBT)); + noread.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM; + + CMP_INIT_DBT(&pdestkey); + CMP_INIT_DBT(&pdestdata); + + CMP_INIT_DBT(&destkey); + CMP_INIT_DBT(&destbuf); + if ((ret = __os_malloc(env, cp->ovflsize, &destbuf.data)) != 0) + goto end; + destbuf.ulen = cp->ovflsize; + + if (countp != NULL) + *countp = 0; + chunk_count = 0; + + /* Get the first input key and data */ + ret = 0; + if (stream->next(stream, &ikey, NULL) == 0) + goto end; + ifound = 0; + + prevDestKey = NULL; + prevDestData = NULL; + + moreStream = 1; + iSmallEnough = 0; + nextExists = 0; + while (moreStream != 0) { + if (iSmallEnough != 0) { + if (nextExists == 0) { + /* + * We've finished deleting the last key + * in the database + */ + if (ifound == 0) { + bulk_ret = DB_NOTFOUND; + } else + ++chunk_count; + break; + } + + /* Move to the next chunk */ + CMP_IGET_RETRY( + ret, dbc, &cp->key1, &cp->compressed, DB_CURRENT); + if (ret == DB_NOTFOUND) { + ret = 0; + break; + } else if (ret != 0) + goto end; + } else + /* Seek the ikey position */ + if ((ret = + __bamc_compress_seek(dbc, &ikey, NULL, 0)) != 0) + goto end; + + nextExists = 1; + moreCompressed = 1; + + /* + * Delete the key - we might overwrite it below but it's + * safer to just always delete it, and it doesn't seem + * significantly slower to do so. + */ + ret = __bamc_compress_del_and_get_next(dbc, &nextk, &noread); + if (ret == DB_NOTFOUND) { + ret = 0; + nextExists = 0; + } else if (ret != 0) + goto end; + + if ((ret = __bamc_start_decompress(dbc)) != 0) + goto end; + + /* !nextExists || ikey <= nextk */ + iSmallEnough = 1; + + while (moreCompressed != 0) { + if (moreCompressed == 0) + cmp = 1; + else if (iSmallEnough == 0) + cmp = -1; + else + cmp = __db_compare_both( + dbp, cp->currentKey, NULL, &ikey, NULL); + + if (cmp < 0) { + if ((ret = __bamc_compress_store(dbc, + cp->currentKey, cp->currentData, + &prevDestKey, + &prevDestData, &destkey, &destbuf)) != 0) + goto end; + + if ((ret = __bam_compress_set_dbt(dbp, + &pdestkey, cp->currentKey->data, + cp->currentKey->size)) != 0) + goto end; + if ((ret = __bam_compress_set_dbt(dbp, + &pdestdata, cp->currentData->data, + cp->currentData->size)) != 0) + goto end; + prevDestKey = &pdestkey; + prevDestData = &pdestdata; + } else if (cmp > 0) { + if (ifound == 0) { + /* + * Continue until we store the + * current chunk, but don't delete + * any more entries. + */ + bulk_ret = DB_NOTFOUND; + moreStream = 0; + iSmallEnough = 0; + } else + ++chunk_count; + +#ifdef DIAGNOSTIC + pikey = ikey; +#endif + + /* Get the next input key */ + if (stream->next(stream, &ikey, NULL) == 0) { + moreStream = 0; + iSmallEnough = 0; + } + ifound = 0; + +#ifdef DIAGNOSTIC + /* Check that the stream is sorted */ + DB_ASSERT(env, moreStream == 0 || + __db_compare_both(dbp, &ikey, NULL, + &pikey, NULL) >= 0); +#endif + + /* Check that !nextExists || ikey <= nextk */ + if (moreStream != 0 && nextExists != 0 && + __db_compare_both(dbp, + &ikey, NULL, &nextk, NULL) > 0) + iSmallEnough = 0; + } else /* cmp == 0 */ + ifound = 1; + + if (cmp <= 0) { + ret = __bamc_next_decompress(dbc); + if (ret == DB_NOTFOUND) { + moreCompressed = 0; + ret = 0; + } else if (ret != 0) + goto end; + } + } + + if (prevDestKey != NULL) { + /* + * Do the DBC->put() with a duplicate cursor, so that + * the main cursor's position isn't changed - we might + * need it to be the same in order to use DB_CURRENT + * above. + */ + if ((ret = __dbc_dup(dbc, &dbc_n, 0)) != 0) + goto end; + F_SET(dbc_n, DBC_TRANSIENT); + + ret = __dbc_iput(dbc_n, &destkey, &destbuf, DB_KEYLAST); + + if ((ret_n = __dbc_close(dbc_n)) != 0 && ret == 0) + ret = ret_n; + + if (ret != 0) + goto end; + + if (countp) + *countp += chunk_count; + chunk_count = 0; + + prevDestKey = NULL; + prevDestData = NULL; + destbuf.size = 0; + } + } + + end: + CMP_FREE_DBT(env, &destkey); + CMP_FREE_DBT(env, &destbuf); + CMP_FREE_DBT(env, &pdestkey); + CMP_FREE_DBT(env, &pdestdata); + CMP_FREE_DBT(env, &nextk); + + return (ret != 0 ? ret : bulk_ret); +} + +/******************************************************************************/ + +/* Implements DB_PREV and DB_LAST for __bamc_compress_get() */ +static int +__bamc_compress_get_prev(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + int ret; + u_int32_t tofind; + BTREE_CURSOR *cp; + + ret = 0; + cp = (BTREE_CURSOR *)dbc->internal; + + F_CLR(cp, C_COMPRESS_DELETED); + + if (cp->prevKey != NULL) { + /* Return the stored previous key */ + cp->currentKey = cp->prevKey; + cp->currentData = cp->prevData; + cp->compcursor = cp->prevcursor; + cp->prevKey = 0; + cp->prevData = 0; + cp->prevcursor = cp->prev2cursor; + cp->prev2cursor = 0; + } else { + if (cp->currentKey == NULL) { + /* No current key, so fetch the last key */ + flags |= DB_LAST; + tofind = (u_int32_t)-1; + } else if (cp->prevcursor == 0) { + /* + * The current key is at the begining of the + * compressed block, so get the last key from the + * previous block + */ + flags |= DB_PREV; + tofind = (u_int32_t)-1; + } else { + /* + * We have to search for the previous key in the + * current block + */ + flags |= DB_CURRENT; + tofind = (u_int32_t) + (cp->prevcursor - (u_int8_t*)cp->compressed.data); + } + + CMP_IGET_RETRY(ret, dbc, &cp->key1, &cp->compressed, flags); + if (ret != 0) + return (ret); + + /* Decompress until we reach tofind */ + ret = __bamc_start_decompress(dbc); + while (ret == 0 && tofind > (u_int32_t) + (cp->compcursor - (u_int8_t*)cp->compressed.data)) { + ret = __bamc_next_decompress(dbc); + } + + if (ret == DB_NOTFOUND) + ret = 0; + } + + return (ret); +} + +/* Implements DB_PREV_DUP for __bamc_compress_get() */ +static int +__bamc_compress_get_prev_dup(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + int ret; + BTREE_CURSOR *cp; + DB *dbp; + BTREE *t; + + ret = 0; + cp = (BTREE_CURSOR *)dbc->internal; + dbp = dbc->dbp; + t = (BTREE *)dbp->bt_internal; + + if (cp->currentKey == 0) + return (EINVAL); + + /* If this is a deleted entry, del_key is already set, otherwise we + have to set it now */ + if (!F_ISSET(cp, C_COMPRESS_DELETED)) { + if ((ret = __bam_compress_set_dbt(dbp, &cp->del_key, + cp->currentKey->data, cp->currentKey->size)) != 0) + return (ret); + } + + if ((ret = __bamc_compress_get_prev(dbc, flags)) != 0) + return (ret); + + if (t->bt_compare(dbp, cp->currentKey, &cp->del_key) != 0) + return (DB_NOTFOUND); + + return (0); +} + +/* Implements DB_PREV_NODUP for __bamc_compress_get() */ +static int +__bamc_compress_get_prev_nodup(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + int ret; + BTREE_CURSOR *cp; + DB *dbp; + BTREE *t; + + cp = (BTREE_CURSOR *)dbc->internal; + dbp = dbc->dbp; + t = (BTREE *)dbp->bt_internal; + + if (cp->currentKey == 0) + return (__bamc_compress_get_prev(dbc, flags)); + + /* + * If this is a deleted entry, del_key is already set, otherwise we + * have to set it now. + */ + if (!F_ISSET(cp, C_COMPRESS_DELETED)) + if ((ret = __bam_compress_set_dbt(dbp, &cp->del_key, + cp->currentKey->data, cp->currentKey->size)) != 0) + return (ret); + + /* + * Linear search for the next non-duplicate key - this is + * especially inefficient for DB_PREV_NODUP, since we have to + * decompress from the begining of the chunk to find previous + * key/data pairs. Instead we could check for key equality as we + * decompress. + */ + do + if ((ret = __bamc_compress_get_prev(dbc, flags)) != 0) + return (ret); + while (t->bt_compare(dbp, cp->currentKey, &cp->del_key) == 0); + + return (0); +} + +/* Implements DB_NEXT and DB_FIRST for __bamc_compress_get() */ +static int +__bamc_compress_get_next(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + int ret; + BTREE_CURSOR *cp; + + cp = (BTREE_CURSOR *)dbc->internal; + + if (F_ISSET(cp, C_COMPRESS_DELETED)) { + if (cp->currentKey == 0) + return (DB_NOTFOUND); + F_CLR(cp, C_COMPRESS_DELETED); + return (0); + } else if (cp->currentKey) { + ret = __bamc_next_decompress(dbc); + if (ret != DB_NOTFOUND) + return (ret); + + flags |= DB_NEXT; + } else + flags |= DB_FIRST; + + CMP_IGET_RETRY(ret, dbc, &cp->key1, &cp->compressed, flags); + if (ret == DB_NOTFOUND) { + /* + * Reset the cursor, so that + * __bamc_compress_get_multiple_key will end up pointing + * to the right place + */ + __bamc_compress_reset(dbc); + return (DB_NOTFOUND); + } else if (ret != 0) + return (ret); + + ret = __bamc_start_decompress(dbc); + + return (ret); +} + +/* Implements DB_NEXT_DUP for __bamc_compress_get() */ +static int +__bamc_compress_get_next_dup(dbc, key, flags) + DBC *dbc; + DBT *key; + u_int32_t flags; +{ + int ret; + BTREE_CURSOR *cp; + DB *dbp; + BTREE *t; + + cp = (BTREE_CURSOR *)dbc->internal; + dbp = dbc->dbp; + t = (BTREE *)dbp->bt_internal; + + if (cp->currentKey == 0) + return (EINVAL); + + if (F_ISSET(cp, C_COMPRESS_DELETED)) { + /* + * Check that the next entry has the same key as the + * deleted entry. + */ + if (cp->currentKey == 0) + return (DB_NOTFOUND); + F_CLR(cp, C_COMPRESS_DELETED); + return (t->bt_compare(dbp, + cp->currentKey, &cp->del_key) == 0 ? 0 : DB_NOTFOUND); + } + + /* Check that the next entry has the same key as the previous entry */ + ret = __bamc_next_decompress(dbc); + if (ret == 0 && t->bt_compare(dbp, cp->currentKey, cp->prevKey) != 0) + return (DB_NOTFOUND); + if (ret != DB_NOTFOUND) + return (ret); + + if (key == NULL) { + /* Copy the current key to del_key */ + if ((ret = __bam_compress_set_dbt(dbp, &cp->del_key, + cp->currentKey->data, cp->currentKey->size)) != 0) + return (ret); + key = &cp->del_key; + } + + /* Fetch the next chunk */ + CMP_IGET_RETRY(ret, dbc, &cp->key1, &cp->compressed, DB_NEXT | flags); + if (ret == DB_NOTFOUND) { + /* + * Reset the cursor, so that __bamc_compress_get_multiple + * will end up pointing to the right place + */ + __bamc_compress_reset(dbc); + return (DB_NOTFOUND); + } else if (ret != 0) + return (ret); + + if ((ret = __bamc_start_decompress(dbc)) != 0) + return (ret); + + /* Check the keys are the same */ + if (t->bt_compare(dbp, cp->currentKey, key) != 0) + return (DB_NOTFOUND); + + return (0); +} + +/* Implements DB_NEXT_NODUP for __bamc_compress_get() */ +static int +__bamc_compress_get_next_nodup(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + int ret; + BTREE_CURSOR *cp; + DB *dbp; + BTREE *t; + + cp = (BTREE_CURSOR *)dbc->internal; + dbp = dbc->dbp; + t = (BTREE *)dbp->bt_internal; + + if (cp->currentKey == 0) + return (__bamc_compress_get_next(dbc, flags)); + + /* + * If this is a deleted entry, del_key is already set, otherwise + * we have to set it now + */ + if (!F_ISSET(cp, C_COMPRESS_DELETED)) + if ((ret = __bam_compress_set_dbt(dbp, &cp->del_key, + cp->currentKey->data, cp->currentKey->size)) != 0) + return (ret); + + /* Linear search for the next non-duplicate key */ + do + if ((ret = __bamc_compress_get_next(dbc, flags)) != 0) + return (ret); + while (t->bt_compare(dbp, cp->currentKey, &cp->del_key) == 0); + + return (ret); +} + +/* + * Implements DB_SET, DB_SET_RANGE, DB_GET_BOTH, and DB_GET_BOTH_RANGE + * for __bamc_compress_get() + */ +static int +__bamc_compress_get_set(dbc, key, data, method, flags) + DBC *dbc; + DBT *key; + DBT *data; + u_int32_t method; + u_int32_t flags; +{ + int ret, cmp; + BTREE_CURSOR *cp; + DB *dbp; + + cp = (BTREE_CURSOR *)dbc->internal; + dbp = dbc->dbp; + + if (method == DB_SET || method == DB_SET_RANGE) + data = NULL; + + F_CLR(cp, C_COMPRESS_DELETED); + + ret = __bamc_compress_seek(dbc, key, data, flags); + if (ret == DB_NOTFOUND) + CMP_IGET_RETRY(ret, dbc, + &cp->key1, &cp->compressed, DB_FIRST | flags); + if (ret != 0) + return (ret); + + /* Decompress and perform a linear search for the key */ + cmp = 0; + ret = __bamc_start_decompress(dbc); + while (ret == 0 && (cmp = __db_compare_both(dbp, + cp->currentKey, cp->currentData, key, data)) < 0) { + ret = __bamc_next_decompress(dbc); + if (ret == DB_NOTFOUND) { + CMP_IGET_RETRY(ret, dbc, + &cp->key1, &cp->compressed, DB_NEXT | flags); + if (ret == 0) + ret = __bamc_start_decompress(dbc); + } + } + + switch (method) { + case DB_SET: + case DB_GET_BOTH_RANGE: + /* + * We need to exactly match the key, and if cmp != 0 we + * might not have - so check again here. + */ + if (ret == 0 && + __db_compare_both(dbp, cp->currentKey, 0, key, 0) != 0) { + /* We didn't find the key */ + ret = DB_NOTFOUND; + } + break; + case DB_GET_BOTH: + if (ret == 0 && (cmp != 0 || (!F_ISSET(dbp, DB_AM_DUPSORT) && + __bam_defcmp(dbp, cp->currentData, data) != 0))) { + /* We didn't find the key/data pair */ + ret = DB_NOTFOUND; + } + break; + default: + DB_ASSERT(dbp->env, method == 0 || method == DB_SET_RANGE); + } + + return (ret); +} + +/* Implements DB_GET_BOTHC for __bamc_compress_get() */ +static int +__bamc_compress_get_bothc(dbc, data, flags) + DBC *dbc; + DBT *data; + u_int32_t flags; +{ + int ret, cmp; + BTREE_CURSOR *cp; + DB *dbp; + + cp = (BTREE_CURSOR *)dbc->internal; + dbp = dbc->dbp; + + /* Check that the data we are looking for comes after the current + position */ + if (__db_compare_both(dbp, cp->currentKey, + cp->currentData, cp->currentKey, data) >= 0) + return (DB_NOTFOUND); + + cmp = 0; + /* Perform a linear search for the data in the current chunk */ + while ((ret = __bamc_next_decompress(dbc)) == 0 && + (cmp = __db_compare_both( + dbp, cp->currentKey, cp->currentData, cp->prevKey, data)) < 0) + continue; + + if (ret == 0) + return (cmp == 0 ? 0 : DB_NOTFOUND); + if (ret != DB_NOTFOUND) + return (ret); + + /* Copy the current key to del_key */ + if ((ret = __bam_compress_set_dbt(dbp, &cp->del_key, + cp->currentKey->data, cp->currentKey->size)) != 0) + return (ret); + + /* Search for the data using DB_GET_BOTH */ + return __bamc_compress_get_set( + dbc, &cp->del_key, data, DB_GET_BOTH, flags); +} + +/* Implements DB_MULTIPLE_KEY for __bamc_compress_get() */ +static int +__bamc_compress_get_multiple_key(dbc, data, flags) + DBC *dbc; + DBT *data; + u_int32_t flags; +{ + int ret; + u_int8_t *writekey, *writedata; + void *mptr; + BTREE_CURSOR *cp; + + ret = 0; + cp = (BTREE_CURSOR *)dbc->internal; + + DB_MULTIPLE_WRITE_INIT(mptr, data); + DB_MULTIPLE_KEY_RESERVE_NEXT(mptr, data, writekey, cp->currentKey->size, + writedata, cp->currentData->size); + if (writekey == NULL) { + data->size = cp->currentKey->size + cp->currentData->size + + 4 * sizeof(u_int32_t); + return DB_BUFFER_SMALL; + } + DB_ASSERT(dbc->dbp->env, writedata != NULL); + + memcpy(writekey, cp->currentKey->data, cp->currentKey->size); + memcpy(writedata, cp->currentData->data, cp->currentData->size); + + while ((ret = __bamc_compress_get_next(dbc, flags)) == 0) { + DB_MULTIPLE_KEY_RESERVE_NEXT(mptr, data, writekey, + cp->currentKey->size, writedata, cp->currentData->size); + if (writekey == NULL) + break; + DB_ASSERT(dbc->dbp->env, writedata != NULL); + + /* + * We could choose to optimize this by just storing one + * copy of a key for each set of duplicate data. + */ + memcpy(writekey, cp->currentKey->data, cp->currentKey->size); + memcpy(writedata, cp->currentData->data, cp->currentData->size); + } + + if (ret == DB_NOTFOUND) + ret = 0; + + if (ret == 0) + /* + * Rewind to the previous key/data, since we can't fit + * this one in the buffer + */ + ret = __bamc_compress_get_prev(dbc, flags); + + return (ret); +} + +/* Implements DB_MULTIPLE for __bamc_compress_get() */ +static int +__bamc_compress_get_multiple(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + int ret; + u_int8_t *writedata; + void *mptr; + BTREE_CURSOR *cp; + + ret = 0; + cp = (BTREE_CURSOR *)dbc->internal; + + data->size = 0; + + DB_MULTIPLE_WRITE_INIT(mptr, data); + DB_MULTIPLE_RESERVE_NEXT(mptr, data, writedata, cp->currentData->size); + data->size += cp->currentData->size + 2 * sizeof(u_int32_t); + if (writedata == NULL) + return DB_BUFFER_SMALL; + + memcpy(writedata, cp->currentData->data, cp->currentData->size); + + while ((ret = __bamc_compress_get_next_dup(dbc, key, flags)) == 0) { + DB_MULTIPLE_RESERVE_NEXT( + mptr, data, writedata, cp->currentData->size); + data->size += cp->currentData->size + 2 * sizeof(u_int32_t); + if (writedata == NULL) { + /* DBC_FROM_DB_GET indicates we need to fit all the + * duplicates into the buffer or return DB_BUFFER_SMALL. + * [#17039] + */ + if (F_ISSET(dbc, DBC_FROM_DB_GET)) + return DB_BUFFER_SMALL; + break; + } + + memcpy(writedata, cp->currentData->data, cp->currentData->size); + } + + if (ret == DB_NOTFOUND) + ret = 0; + + if (ret == 0) + /* + * Rewind to the previous key/data, as that's now our current + * entry. + */ + ret = __bamc_compress_get_prev(dbc, flags); + + return (ret); +} + +/* + * __bamc_compress_iget -- + * Get using a compressed cursor. (internal) + */ +static int +__bamc_compress_iget(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + int ret; + u_int32_t multiple, method; + BTREE_CURSOR *cp; + DB *dbp; + + cp = (BTREE_CURSOR *)dbc->internal; + dbp = dbc->dbp; + ret = 0; + + multiple = flags & (DB_MULTIPLE|DB_MULTIPLE_KEY); + method = flags & DB_OPFLAGS_MASK; + flags = flags & ~(DB_OPFLAGS_MASK|DB_MULTIPLE|DB_MULTIPLE_KEY); + + switch (method) { + case DB_CURRENT: + if (F_ISSET(cp, C_COMPRESS_DELETED)) + ret = DB_KEYEMPTY; + else if (cp->currentKey == NULL) + ret = EINVAL; + break; + case DB_FIRST: + __bamc_compress_reset(dbc); + ret = __bamc_compress_get_next(dbc, flags); + break; + case DB_NEXT: + ret = __bamc_compress_get_next(dbc, flags); + break; + case DB_NEXT_DUP: + ret = __bamc_compress_get_next_dup(dbc, 0, flags); + break; + case DB_NEXT_NODUP: + ret = __bamc_compress_get_next_nodup(dbc, flags); + break; + case DB_LAST: + __bamc_compress_reset(dbc); + ret = __bamc_compress_get_prev(dbc, flags); + break; + case DB_PREV: + ret = __bamc_compress_get_prev(dbc, flags); + break; + case DB_PREV_DUP: + ret = __bamc_compress_get_prev_dup(dbc, flags); + break; + case DB_PREV_NODUP: + ret = __bamc_compress_get_prev_nodup(dbc, flags); + break; + case DB_SET: + if (((BTREE *) + dbc->dbp->bt_internal)->bt_compare == __bam_defcmp) + F_SET(key, DB_DBT_ISSET); + /* FALL THROUGH */ + case DB_SET_RANGE: + ret = __bamc_compress_get_set(dbc, key, 0, method, flags); + break; + case DB_GET_BOTH: + if (!F_ISSET(dbc->dbp, DB_AM_DUPSORT) || ((BTREE *)dbc->dbp-> + bt_internal)->compress_dup_compare == __bam_defcmp) + F_SET(data, DB_DBT_ISSET); + /* FALL THROUGH */ + case DB_GET_BOTH_RANGE: + if (((BTREE *) + dbc->dbp->bt_internal)->bt_compare == __bam_defcmp) + F_SET(key, DB_DBT_ISSET); + ret = __bamc_compress_get_set(dbc, key, data, method, flags); + break; + case DB_GET_BOTHC: + ret = __bamc_compress_get_bothc(dbc, data, flags); + break; + default: + ret = __db_unknown_flag(dbp->env, "__bamc_compress_iget", + method); + break; + } + + if (ret != 0) + goto err; + + switch (multiple) { + case 0: + if (!F_ISSET(key, DB_DBT_ISSET)) + ret = __db_retcopy(dbc->env, key, + cp->currentKey->data, cp->currentKey->size, + &dbc->rkey->data, &dbc->rkey->ulen); + if (!F_ISSET(data, DB_DBT_ISSET) && ret == 0) + ret = __db_retcopy(dbc->env, data, + cp->currentData->data, cp->currentData->size, + &dbc->rdata->data, &dbc->rdata->ulen); + break; + case DB_MULTIPLE: + if (!F_ISSET(key, DB_DBT_ISSET)) + ret = __db_retcopy(dbc->env, key, + cp->currentKey->data, cp->currentKey->size, + &dbc->rkey->data, &dbc->rkey->ulen); + if (ret == 0) + ret = + __bamc_compress_get_multiple(dbc, key, data, flags); + break; + case DB_MULTIPLE_KEY: + ret = __bamc_compress_get_multiple_key(dbc, data, flags); + break; + default: + ret = __db_unknown_flag(dbp->env, "__bamc_compress_iget", + multiple); + break; + } + + err: + F_CLR(key, DB_DBT_ISSET); + F_CLR(data, DB_DBT_ISSET); + + return (ret); +} + +/* + * __bamc_compress_get -- + * Get using a compressed cursor. + * + * PUBLIC: int __bamc_compress_get __P((DBC *, DBT *, DBT *, u_int32_t)); + */ +int +__bamc_compress_get(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DBC *dbc_n; + int ret, t_ret; + u_int32_t tmp_flags; + + switch (flags & DB_OPFLAGS_MASK) { + case DB_CURRENT: + case DB_GET_BOTHC: + case DB_NEXT: + case DB_NEXT_DUP: + case DB_NEXT_NODUP: + case DB_PREV: + case DB_PREV_DUP: + case DB_PREV_NODUP: + if (F_ISSET((BTREE_CURSOR *)dbc->internal, + C_COMPRESS_MODIFIED) && + (ret = __bamc_compress_relocate(dbc)) != 0) + return (ret); + tmp_flags = DB_POSITION; + break; + default: + F_CLR((BTREE_CURSOR *)dbc->internal, C_COMPRESS_MODIFIED); + tmp_flags = 0; + break; + } + + if (F_ISSET(dbc, DBC_TRANSIENT)) + dbc_n = dbc; + else { + if ((ret = __dbc_dup(dbc, &dbc_n, tmp_flags)) != 0) + goto err; + + /* + * We don't care about preserving the cursor's position on + * error. + */ + F_SET(dbc_n, DBC_TRANSIENT); + + COPY_RET_MEM(dbc, dbc_n); + } + + if ((ret = __bamc_compress_iget(dbc_n, key, data, flags)) != 0) + goto err; + +err: + /* Cleanup and cursor resolution. */ + if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && + (ret == 0 || ret == DB_BUFFER_SMALL)) + ret = t_ret; + return (ret); +} + +/* + * __bamc_compress_iput -- + * Put using a compressed cursor (internal) + */ +static int +__bamc_compress_iput(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + int ret; + u_int32_t multi; + DBT kcpy, pdata, empty; + BTREE_COMPRESS_STREAM stream; + BTREE_CURSOR *cp; + DB *dbp; + ENV *env; + + cp = (BTREE_CURSOR *)dbc->internal; + dbp = dbc->dbp; + env = dbc->env; + + memset(&pdata, 0, sizeof(DBT)); + memset(&empty, 0, sizeof(DBT)); + + multi = LF_ISSET(DB_MULTIPLE|DB_MULTIPLE_KEY); + LF_CLR(DB_MULTIPLE|DB_MULTIPLE_KEY); + + switch (flags) { + case DB_CURRENT: + if (cp->currentKey == 0 || F_ISSET(cp, C_COMPRESS_DELETED)) { + ret = DB_NOTFOUND; + goto end; + } + + if (F_ISSET(data, DB_DBT_PARTIAL)) { + if ((ret = __db_buildpartial( + dbp, cp->currentData, data, &pdata)) != 0) + goto end; + data = &pdata; + } + + if (F_ISSET(dbp, DB_AM_DUPSORT) && + ((BTREE *)dbp->bt_internal)->compress_dup_compare( + dbp, cp->currentData, data) != 0) { + __db_errx(env, DB_STR("1032", + "Existing data sorts differently from put data")); + ret = EINVAL; + goto end; + } + CMP_INIT_DBT(&kcpy); + if ((ret = __bam_compress_set_dbt(dbp, + &kcpy, cp->currentKey->data, cp->currentKey->size)) != 0) + goto end; + + __bam_cs_create_single(&stream, &kcpy, data); + ret = __bamc_compress_merge_insert(dbc, &stream, NULL, flags); + + if (ret == 0) + /* Position the cursor on the entry written */ + ret = __bamc_compress_get_set( + dbc, &kcpy, data, DB_GET_BOTH_RANGE, 0); + + CMP_FREE_DBT(env, &kcpy); + break; + case DB_KEYFIRST: + case DB_KEYLAST: + case DB_NODUPDATA: + case DB_OVERWRITE_DUP: + switch (multi) { + case 0: + if (F_ISSET(data, DB_DBT_PARTIAL)) { + if ((ret = __bamc_compress_get_set(dbc, key, + data, DB_SET, 0)) != 0 && + ret != DB_NOTFOUND) + goto end; + if ((ret = __db_buildpartial(dbp, + ret == DB_NOTFOUND ? &empty : + cp->currentData, data, &pdata)) != 0) + goto end; + data = &pdata; + } + + __bam_cs_create_single(&stream, key, data); + ret = __bamc_compress_merge_insert( + dbc, &stream, NULL, flags); + + if (ret == 0) + /* Position the cursor on the entry written */ + ret = __bamc_compress_get_set( + dbc, key, data, DB_GET_BOTH_RANGE, 0); + break; + case DB_MULTIPLE: + __bam_cs_create_multiple(&stream, key, data); + ret = __bamc_compress_merge_insert( + dbc, &stream, &key->doff, flags); + break; + case DB_MULTIPLE_KEY: + __bam_cs_create_multiple_key(&stream, key); + ret = __bamc_compress_merge_insert( + dbc, &stream, &key->doff, flags); + break; + default: + return (__db_unknown_flag( + dbp->env, "__bamc_compress_iput", multi)); + } + break; + case DB_NOOVERWRITE: + /* Check key doesn't already exist */ + ret = __bamc_compress_get_set(dbc, key, 0, DB_SET, 0); + if (ret != DB_NOTFOUND) { + if (ret == 0) + ret = DB_KEYEXIST; + goto end; + } + + if (F_ISSET(data, DB_DBT_PARTIAL)) { + if ((ret = __db_buildpartial( + dbp, &empty, data, &pdata)) != 0) + goto end; + data = &pdata; + } + + __bam_cs_create_single(&stream, key, data); + ret = __bamc_compress_merge_insert(dbc, &stream, NULL, flags); + + if (ret == 0) + /* Position the cursor on the entry written */ + ret = __bamc_compress_get_set( + dbc, key, data, DB_GET_BOTH_RANGE, 0); + break; + default: + return (__db_unknown_flag( + dbp->env, "__bamc_compress_iput", flags)); + } + + end: + if (pdata.data != NULL) + __os_free(env, pdata.data); + return (ret); +} + +/* + * __bamc_compress_put -- + * Put using a compressed cursor. + * + * PUBLIC: int __bamc_compress_put __P((DBC *, DBT *, DBT *, u_int32_t)); + */ +int +__bamc_compress_put(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DBC *dbc_n; + int ret, t_ret; + + if (F_ISSET((BTREE_CURSOR *)dbc->internal, C_COMPRESS_MODIFIED)) { + if ((flags & DB_OPFLAGS_MASK) == DB_CURRENT && + (ret = __bamc_compress_relocate(dbc)) != 0) + return (ret); + F_CLR((BTREE_CURSOR *)dbc->internal, C_COMPRESS_MODIFIED); + } + + if (F_ISSET(dbc, DBC_TRANSIENT)) + dbc_n = dbc; + else { + if ((ret = __dbc_dup(dbc, &dbc_n, + (flags & DB_OPFLAGS_MASK) == DB_CURRENT ? + DB_POSITION : 0)) != 0) + goto err; + + /* + * We don't care about preserving the cursor's position on + * error. + */ + F_SET(dbc_n, DBC_TRANSIENT); + } + + if ((ret = __bamc_compress_iput(dbc_n, key, data, flags)) != 0) + goto err; + +err: + /* Cleanup and cursor resolution. */ + if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && + (ret == 0 || ret == DB_BUFFER_SMALL)) + ret = t_ret; + return (ret); +} + +/* + * __bamc_compress_idel -- + * Del using a compressed cursor. (internal) + */ +static int +__bamc_compress_idel(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + int ret; + BTREE_COMPRESS_STREAM stream; + DB *dbp; + BTREE_CURSOR *cp; + + COMPQUIET(flags, 0); + + dbp = dbc->dbp; + cp = (BTREE_CURSOR *)dbc->internal; + + if (F_ISSET(cp, C_COMPRESS_DELETED)) + return DB_KEYEMPTY; + if (cp->currentKey == 0) + return DB_NOTFOUND; + + if ((ret = __bam_compress_set_dbt(dbp, &cp->del_key, + cp->currentKey->data, cp->currentKey->size)) != 0) + goto err; + if ((ret = __bam_compress_set_dbt(dbp, &cp->del_data, + cp->currentData->data, cp->currentData->size)) != 0) + goto err; + + __bam_cs_create_single(&stream, &cp->del_key, &cp->del_data); + if ((ret = __bamc_compress_merge_delete(dbc, &stream, NULL)) != 0) + goto err; + + /* Position the cursor on the entry after the key/data deleted */ + ret = __bamc_compress_get_set(dbc, &cp->del_key, &cp->del_data, 0, 0); + if (ret == DB_NOTFOUND) { + __bamc_compress_reset(dbc); + ret = 0; + } else if (ret != 0) + goto err; + + /* Mark current as being deleted */ + F_SET(cp, C_COMPRESS_DELETED); + + err: + return (ret); +} + +/* + * __bamc_compress_del -- + * Del using a compressed cursor. + * + * PUBLIC: int __bamc_compress_del __P((DBC *, u_int32_t)); + */ +int +__bamc_compress_del(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + int ret, t_ret; + DBC *dbc_n; + + if (F_ISSET((BTREE_CURSOR *)dbc->internal, C_COMPRESS_MODIFIED) && + (ret = __bamc_compress_relocate(dbc)) != 0) + return (ret); + + if (F_ISSET(dbc, DBC_TRANSIENT)) + dbc_n = dbc; + else { + if ((ret = __dbc_dup(dbc, &dbc_n, DB_POSITION)) != 0) + goto err; + + /* + * We don't care about preserving the cursor's position on + * error. + */ + F_SET(dbc_n, DBC_TRANSIENT); + + COPY_RET_MEM(dbc, dbc_n); + } + + if ((ret = __bamc_compress_idel(dbc_n, flags)) != 0) + goto err; + +err: + /* Cleanup and cursor resolution. */ + if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && + (ret == 0 || ret == DB_BUFFER_SMALL)) + ret = t_ret; + return (ret); +} + +/* + * __bamc_compress_ibulk_del -- + * Bulk del using a compressed cursor. (internal) + */ +static int +__bamc_compress_ibulk_del(dbc, key, flags) + DBC *dbc; + DBT *key; + u_int32_t flags; +{ + BTREE_COMPRESS_STREAM stream; + + switch (flags) { + case 0: + __bam_cs_create_single_keyonly(&stream, key); + return (__bamc_compress_merge_delete_dups(dbc, &stream, NULL)); + case DB_MULTIPLE: + __bam_cs_create_multiple_keyonly(&stream, key); + return (__bamc_compress_merge_delete_dups( + dbc, &stream, &key->doff)); + case DB_MULTIPLE_KEY: + __bam_cs_create_multiple_key(&stream, key); + return (__bamc_compress_merge_delete(dbc, &stream, &key->doff)); + default: + break; + } + + return (__db_unknown_flag( + dbc->env, "__bamc_compress_ibulk_del", flags)); +} + +/* + * __bamc_compress_bulk_del -- + * Bulk del using a compressed cursor. + * + * PUBLIC: int __bamc_compress_bulk_del __P((DBC *, DBT *, u_int32_t)); + */ +int +__bamc_compress_bulk_del(dbc, key, flags) + DBC *dbc; + DBT *key; + u_int32_t flags; +{ + int ret, t_ret; + DBC *dbc_n; + + F_CLR((BTREE_CURSOR *)dbc->internal, C_COMPRESS_MODIFIED); + + if (F_ISSET(dbc, DBC_TRANSIENT)) + dbc_n = dbc; + else { + if ((ret = __dbc_dup(dbc, &dbc_n, 0)) != 0) + goto err; + + /* + * We don't care about preserving the cursor's position on + * error. + */ + F_SET(dbc_n, DBC_TRANSIENT); + } + + if ((ret = __bamc_compress_ibulk_del(dbc_n, key, flags)) != 0) + goto err; + +err: + /* Cleanup and cursor resolution. */ + if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && + (ret == 0 || ret == DB_BUFFER_SMALL)) + ret = t_ret; + return (ret); +} + +/* + * __bamc_compress_count -- + * Count using a compressed cursor. + * + * PUBLIC: int __bamc_compress_count __P((DBC *, db_recno_t *)); + */ +int +__bamc_compress_count(dbc, countp) + DBC *dbc; + db_recno_t *countp; +{ + int ret, t_ret; + db_recno_t count; + DBT *key; + DBC *dbc_n; + BTREE_CURSOR *cp; + + cp = (BTREE_CURSOR *)dbc->internal; + + /* + * If the current entry is deleted use del_key, otherwise use + * currentKey. + */ + if (F_ISSET(cp, C_COMPRESS_DELETED)) + key = &cp->del_key; + else + key = cp->currentKey; + + /* Duplicate the cursor */ + if ((ret = __dbc_dup(dbc, &dbc_n, 0)) != 0) + return (ret); + + /* We don't care about preserving the cursor's position on error */ + F_SET(dbc_n, DBC_TRANSIENT); + + /* Find the first duplicate */ + if ((ret = __bamc_compress_get_set(dbc_n, key, 0, DB_SET, 0)) != 0) + goto err; + count = 1; + + /* Count subsequent duplicates */ + while ((ret = __bamc_compress_get_next_dup(dbc_n, key, 0)) == 0) + ++count; + + if (ret == DB_NOTFOUND) + ret = 0; + else if (ret != 0) + goto err; + + *countp = count; + + err: + if ((t_ret = __dbc_close(dbc_n)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __bamc_compress_cmp -- + * Compare which compressed value is pointed to. + * + * PUBLIC: int __bamc_compress_cmp __P((DBC *, DBC *, int *)); + */ +int +__bamc_compress_cmp(dbc, other_dbc, result) + DBC *dbc, *other_dbc; + int *result; +{ + DB *dbp; + BTREE_CURSOR *cp, *ocp; + + /* + * At this point, we already know that the cursors point to the same + * DB. + */ + + dbp = dbc->dbp; + cp = (BTREE_CURSOR *)dbc->internal; + ocp = (BTREE_CURSOR *)other_dbc->internal; + + if (F_ISSET(cp, C_COMPRESS_DELETED)) + if (F_ISSET(ocp, C_COMPRESS_DELETED)) + *result = __db_compare_both( + dbp, &cp->del_key, &cp->del_data, + &ocp->del_key, &ocp->del_data) == 0 ? 0 : 1; + else { + if (ocp->currentKey == 0) + goto err; + + *result = __db_compare_both( + dbp, &cp->del_key, &cp->del_data, + ocp->currentKey, ocp->currentData) == 0 ? 0 : 1; + } + else { + if (cp->currentKey == 0) + goto err; + + if (F_ISSET(ocp, C_COMPRESS_DELETED)) + *result = __db_compare_both( + dbp, cp->currentKey, cp->currentData, + &ocp->del_key, &ocp->del_data) == 0 ? 0 : 1; + else { + if (ocp->currentKey == 0) + goto err; + + *result = __db_compare_both( + dbp, cp->currentKey, cp->currentData, + ocp->currentKey, ocp->currentData) == 0 ? 0 : 1; + } + } + return (0); + + err: + __db_errx(dbc->env, DB_STR("1033", + "Both cursors must be initialized before calling DBC->cmp.")); + return (EINVAL); +} + +/* + * __bamc_compress_dup -- + * Duplicate the compression specific part of a btree cursor. + * + * PUBLIC: int __bamc_compress_dup __P((DBC *, DBC *, u_int32_t)); + */ +int +__bamc_compress_dup(orig_dbc, new_dbc, flags) + DBC *orig_dbc, *new_dbc; + u_int32_t flags; +{ + int ret; + DB *dbp; + BTREE_CURSOR *orig, *new; + + dbp = new_dbc->dbp; + + orig = (BTREE_CURSOR *)orig_dbc->internal; + new = (BTREE_CURSOR *)new_dbc->internal; + + if (orig->currentKey != NULL && !LF_ISSET(DB_SHALLOW_DUP)) { + new->currentKey = &new->key1; + new->currentData = &new->data1; + + if ((ret = __bam_compress_set_dbt(dbp, new->currentKey, + orig->currentKey->data, orig->currentKey->size)) != 0) + return (ret); + if ((ret = __bam_compress_set_dbt(dbp, new->currentData, + orig->currentData->data, orig->currentData->size)) != 0) + return (ret); + + if (orig->prevKey) { + new->prevKey = &new->key2; + new->prevData = &new->data2; + + if ((ret = __bam_compress_set_dbt(dbp, new->prevKey, + orig->prevKey->data, orig->prevKey->size)) != 0) + return (ret); + if ((ret = __bam_compress_set_dbt(dbp, new->prevData, + orig->prevData->data, orig->prevData->size)) != 0) + return (ret); + } + + if ((ret = __bam_compress_set_dbt(dbp, &new->compressed, + orig->compressed.data, orig->compressed.size)) != 0) + return (ret); + + new->compcursor = (u_int8_t*)new->compressed.data + + (orig->compcursor - (u_int8_t*)orig->compressed.data); + new->compend = (u_int8_t*)new->compressed.data + + (orig->compend - (u_int8_t*)orig->compressed.data); + new->prevcursor = orig->prevcursor == NULL ? NULL : + (u_int8_t*)new->compressed.data + (orig->prevcursor - + (u_int8_t*)orig->compressed.data); + new->prev2cursor = orig->prev2cursor == NULL ? NULL : + (u_int8_t*)new->compressed.data + (orig->prev2cursor - + (u_int8_t*)orig->compressed.data); + + if (F_ISSET(orig, C_COMPRESS_DELETED)) { + if ((ret = __bam_compress_set_dbt(dbp, &new->del_key, + orig->del_key.data, orig->del_key.size)) != 0) + return (ret); + if ((ret = __bam_compress_set_dbt(dbp, &new->del_data, + orig->del_data.data, orig->del_data.size)) != 0) + return (ret); + } + } + + return (0); +} + +/* + * __bam_compress_salvage -- + * Salvage the compressed data from the key/data pair + * + * PUBLIC: int __bam_compress_salvage __P((DB *, VRFY_DBINFO *, + * PUBLIC: void *, int (*)(void *, const void *), DBT *, DBT *)); + */ +int +__bam_compress_salvage(dbp, vdp, handle, callback, key, data) + DB *dbp; + VRFY_DBINFO *vdp; + void *handle; + int (*callback) __P((void *, const void *)); + DBT *key, *data; +{ + DBT key1, key2, data1, data2, compressed; + DBT *currentKey, *currentData, *prevKey, *prevData; + ENV *env; + int ret, t_ret; + u_int8_t *compcursor, *compend; + u_int32_t datasize, size; + + env = dbp->env; + + memset(&key1, 0, sizeof(DBT)); + memset(&key2, 0, sizeof(DBT)); + memset(&data1, 0, sizeof(DBT)); + memset(&data2, 0, sizeof(DBT)); + memset(&compressed, 0, sizeof(DBT)); + + key1.flags = DB_DBT_USERMEM; + key2.flags = DB_DBT_USERMEM; + data1.flags = DB_DBT_USERMEM; + data2.flags = DB_DBT_USERMEM; + compressed.flags = DB_DBT_USERMEM; + + prevKey = NULL; + prevData = NULL; + currentKey = key; + currentData = &data2; + compcursor = (u_int8_t*)data->data; + compend = compcursor + data->size; + + if (data->size == 0) { + ret = DB_VERIFY_FATAL; + goto unknown_data; + } + + /* Unmarshal the first data */ + size = __db_decompress_count_int(compcursor); + if (size == 0xFF || compcursor + size > compend) { + ret = DB_VERIFY_FATAL; + goto unknown_data; + } + compcursor += __db_decompress_int32(compcursor, &datasize); + + if (compcursor + datasize > compend) { + ret = DB_VERIFY_FATAL; + goto unknown_data; + } + if ((ret = __bam_compress_set_dbt( + dbp, currentData, compcursor, datasize)) != 0) + goto err; + compcursor += datasize; + + /* Output first data (first key has already been output by our caller */ + if ((ret = __db_vrfy_prdbt( + currentData, 0, " ", handle, callback, 0, 0, vdp)) != 0) + goto err; + + while (compcursor < compend) { + prevKey = currentKey; + prevData = currentData; + + if (currentKey == &key1) { + currentKey = &key2; + currentData = &data2; + } else { + currentKey = &key1; + currentData = &data1; + } + + compressed.data = (void*)compcursor; + compressed.ulen = compressed.size = + (u_int32_t)(compend - compcursor); + + /* Decompress the next key/data pair */ + while ((ret = ((BTREE *)dbp->bt_internal)->bt_decompress( + dbp, prevKey, prevData, + &compressed, currentKey, currentData)) == DB_BUFFER_SMALL) { + if (CMP_RESIZE_DBT(ret, env, currentKey) != 0) + break; + if (CMP_RESIZE_DBT(ret, env, currentData) != 0) + break; + } + + if (ret == EINVAL) { + ret = DB_VERIFY_FATAL; + goto err; + } + if (ret != 0) + goto err; + + compcursor += compressed.size; + + if (compcursor > compend) { + ret = DB_VERIFY_FATAL; + goto err; + } + + /* Output the next key/data pair */ + if ((ret = __db_vrfy_prdbt( + currentKey, 0, " ", handle, callback, 0, 0, vdp)) != 0) + goto err; + if ((ret = __db_vrfy_prdbt( + currentData, 0, " ", handle, callback, 0, 0, vdp)) != 0) + goto err; + } + + if (0) { + unknown_data: + /* + * Make sure we output a data value for the key that's + * already been output + */ + DB_INIT_DBT( + compressed, "UNKNOWN_DATA", sizeof("UNKNOWN_DATA") - 1); + if ((t_ret = __db_vrfy_prdbt( + &compressed, 0, " ", handle, callback, 0, 0, vdp)) != 0) + ret = t_ret; + } + + err: + __os_free(env, key1.data); + __os_free(env, key2.data); + __os_free(env, data1.data); + __os_free(env, data2.data); + return (ret); +} + +/* + * __bam_compress_count -- + * Calculate key and entry counts for the compressed BTree + * + * PUBLIC: int __bam_compress_count __P((DBC *, u_int32_t *, u_int32_t *)); + */ +int +__bam_compress_count(dbc, nkeysp, ndatap) + DBC *dbc; + u_int32_t *nkeysp, *ndatap; +{ + int ret, t_ret; + u_int32_t nkeys, ndata; + DB *dbp; + BTREE *t; + DBC *dbc_n; + BTREE_CURSOR *cp_n; + + dbp = dbc->dbp; + t = (BTREE *)dbp->bt_internal; + + /* Duplicate the cursor */ + if ((ret = __dbc_dup(dbc, &dbc_n, 0)) != 0) + return (ret); + + /* We don't care about preserving the cursor's position on error */ + F_SET(dbc_n, DBC_TRANSIENT); + + cp_n = (BTREE_CURSOR *)dbc_n->internal; + + nkeys = 0; + ndata = 0; + + CMP_IGET_RETRY(ret, dbc_n, &cp_n->key1, &cp_n->compressed, DB_FIRST); + if (ret != 0) + goto err; + + if ((ret = __bamc_start_decompress(dbc_n)) != 0) + goto err; + nkeys += 1; + + for (;;) { + ndata += 1; + + ret = __bamc_next_decompress(dbc_n); + if (ret == DB_NOTFOUND) { + if (cp_n->currentKey == &cp_n->key1) { + /* + * Make sure that the previous key isn't + * overwritten when we fetch the next chunk. + */ + if ((ret = __bam_compress_set_dbt(dbp, + &cp_n->key2, cp_n->key1.data, + cp_n->key1.size)) != 0) + goto err; + } + + CMP_IGET_RETRY(ret, dbc_n, &cp_n->key1, + &cp_n->compressed, DB_NEXT); + if (ret != 0) + goto err; + + ret = __bamc_start_decompress(dbc_n); + + cp_n->prevKey = &cp_n->key2; + } + + if (ret != 0) + goto err; + + if (t->bt_compare(dbp, cp_n->currentKey, cp_n->prevKey) != 0) + nkeys += 1; + } + +err: + if (ret == DB_NOTFOUND) + ret = 0; + + if ((t_ret = __dbc_close(dbc_n)) != 0 && ret == 0) + ret = t_ret; + + if (ret == 0) { + if (nkeysp != NULL) + *nkeysp = nkeys; + if (ndatap != NULL) + *ndatap = ndata; + } + + return (ret); +} + +#endif diff --git a/src/btree/bt_conv.c b/src/btree/bt_conv.c new file mode 100644 index 00000000..03a3867a --- /dev/null +++ b/src/btree/bt_conv.c @@ -0,0 +1,95 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/btree.h" + +/* + * __bam_pgin -- + * Convert host-specific page layout from the host-independent format + * stored on disk. + * + * PUBLIC: int __bam_pgin __P((DB *, db_pgno_t, void *, DBT *)); + */ +int +__bam_pgin(dbp, pg, pp, cookie) + DB *dbp; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + DB_PGINFO *pginfo; + PAGE *h; + + pginfo = (DB_PGINFO *)cookie->data; + if (!F_ISSET(pginfo, DB_AM_SWAP)) + return (0); + + h = pp; + return (TYPE(h) == P_BTREEMETA ? __bam_mswap(dbp->env, pp) : + __db_byteswap(dbp, pg, pp, pginfo->db_pagesize, 1)); +} + +/* + * __bam_pgout -- + * Convert host-specific page layout to the host-independent format + * stored on disk. + * + * PUBLIC: int __bam_pgout __P((DB *, db_pgno_t, void *, DBT *)); + */ +int +__bam_pgout(dbp, pg, pp, cookie) + DB *dbp; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + DB_PGINFO *pginfo; + PAGE *h; + + pginfo = (DB_PGINFO *)cookie->data; + if (!F_ISSET(pginfo, DB_AM_SWAP)) + return (0); + + h = pp; + return (TYPE(h) == P_BTREEMETA ? __bam_mswap(dbp->env, pp) : + __db_byteswap(dbp, pg, pp, pginfo->db_pagesize, 0)); +} + +/* + * __bam_mswap -- + * Swap the bytes on the btree metadata page. + * + * PUBLIC: int __bam_mswap __P((ENV *, PAGE *)); + */ +int +__bam_mswap(env, pg) + ENV *env; + PAGE *pg; +{ + u_int8_t *p; + + COMPQUIET(env, NULL); + + __db_metaswap(pg); + p = (u_int8_t *)pg + sizeof(DBMETA); + + p += sizeof(u_int32_t); /* unused */ + SWAP32(p); /* minkey */ + SWAP32(p); /* re_len */ + SWAP32(p); /* re_pad */ + SWAP32(p); /* root */ + p += 92 * sizeof(u_int32_t); /* unused */ + SWAP32(p); /* crypto_magic */ + + return (0); +} diff --git a/src/btree/bt_curadj.c b/src/btree/bt_curadj.c new file mode 100644 index 00000000..1caab975 --- /dev/null +++ b/src/btree/bt_curadj.c @@ -0,0 +1,694 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/mp.h" + +static int __bam_opd_cursor __P((DB *, DBC *, db_pgno_t, u_int32_t, u_int32_t)); +static int __bam_ca_delete_func + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +static int __ram_ca_delete_func + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +static int __bam_ca_di_func + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +static int __bam_ca_dup_func + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +static int __bam_ca_undodup_func + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +static int __bam_ca_rsplit_func + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +static int __bam_ca_split_func + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +static int __bam_ca_undosplit_func + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); + +/* + * Cursor adjustments are logged if they are for subtransactions. This is + * because it's possible for a subtransaction to adjust cursors which will + * still be active after the subtransaction aborts, and so which must be + * restored to their previous locations. Cursors that can be both affected + * by our cursor adjustments and active after our transaction aborts can + * only be found in our parent transaction -- cursors in other transactions, + * including other child transactions of our parent, must have conflicting + * locker IDs, and so cannot be affected by adjustments in this transaction. + */ + + /* + * __bam_ca_delete_func + * Callback function for walking cursors to update them due to a delete. + */ + static int + __bam_ca_delete_func(dbc, my_dbc, countp, pgno, indx, args) + DBC *dbc, *my_dbc; + u_int32_t *countp; + db_pgno_t pgno; + u_int32_t indx; + void *args; +{ + BTREE_CURSOR *cp; + u_int32_t del; + + COMPQUIET(my_dbc, NULL); + del = *(u_int32_t *)args; + + cp = (BTREE_CURSOR *)dbc->internal; + if (cp->pgno == pgno && cp->indx == indx && + !MVCC_SKIP_CURADJ(dbc, pgno)) { + /* + * [#8032] This assert is checking for possible race + * conditions where we hold a cursor position without + * a lock. Unfortunately, there are paths in the + * Btree code that do not satisfy these conditions. + * None of them are known to be a problem, but this + * assert should be re-activated when the Btree stack + * code is re-written. + DB_ASSERT(env, !STD_LOCKING(dbc) || + cp->lock_mode != DB_LOCK_NG); + */ + if (del) { + F_SET(cp, C_DELETED); + /* + * If we're deleting the item, we can't + * keep a streaming offset cached. + */ + cp->stream_start_pgno = PGNO_INVALID; + } else + F_CLR(cp, C_DELETED); + +#ifdef HAVE_COMPRESSION + /* + * We also set the C_COMPRESS_MODIFIED flag, which + * prompts the compression code to look for it's + * current entry again if it needs to. + * + * The flag isn't cleared, because the compression + * code still needs to do that even for an entry that + * becomes undeleted. + * + * This flag also needs to be set if an entry is + * updated, but since the compression code always + * deletes before an update, setting it here is + * sufficient. + */ + F_SET(cp, C_COMPRESS_MODIFIED); +#endif + + ++(*countp); + } + return (0); +} + +/* + * __bam_ca_delete -- + * Update the cursors when items are deleted and when already deleted + * items are overwritten. Return the number of relevant cursors found. + * + * PUBLIC: int __bam_ca_delete __P((DB *, + * PUBLIC: db_pgno_t, u_int32_t, int, u_int32_t *)); + */ +int +__bam_ca_delete(dbp, pgno, indx, del, countp) + DB *dbp; + db_pgno_t pgno; + u_int32_t indx; + int del; + u_int32_t *countp; +{ + int ret; + u_int32_t count; + + /* + * Adjust the cursors. We have the page write locked, so the + * only other cursors that can be pointing at a page are + * those in the same thread of control. Unfortunately, we don't + * know that they're using the same DB handle, so traverse + * all matching DB handles in the same ENV, then all cursors + * on each matching DB handle. + * + * Each cursor is single-threaded, so we only need to lock the + * list of DBs and then the list of cursors in each DB. + */ + if ((ret = __db_walk_cursors(dbp, NULL, + __bam_ca_delete_func, &count, pgno, indx, &del)) != 0) + return (ret); + + if (countp != NULL) + *countp = count; + return (0); +} + +static int +__ram_ca_delete_func(dbc, my_dbc, countp, root_pgno, indx, args) + DBC *dbc, *my_dbc; + u_int32_t *countp; + db_pgno_t root_pgno; + u_int32_t indx; + void *args; +{ + COMPQUIET(indx, 0); + COMPQUIET(my_dbc, NULL); + COMPQUIET(args, NULL); + + if (dbc->internal->root == root_pgno && + !MVCC_SKIP_CURADJ(dbc, root_pgno)) { + (*countp)++; + return (EEXIST); + } + return (0); +} + +/* + * __ram_ca_delete -- + * Return if any relevant cursors found. + * + * PUBLIC: int __ram_ca_delete __P((DB *, db_pgno_t, u_int32_t *)); + */ +int +__ram_ca_delete(dbp, root_pgno, foundp) + DB *dbp; + db_pgno_t root_pgno; + u_int32_t *foundp; +{ + int ret; + + if ((ret = __db_walk_cursors(dbp, NULL, __ram_ca_delete_func, + foundp, root_pgno, 0, NULL)) != 0 && ret != EEXIST) + return (ret); + + return (0); +} + +struct __bam_ca_di_args { + int adjust; + DB_TXN *my_txn; +}; + +static int +__bam_ca_di_func(dbc, my_dbc, foundp, pgno, indx, vargs) + DBC *dbc, *my_dbc; + u_int32_t *foundp; + db_pgno_t pgno; + u_int32_t indx; + void *vargs; +{ + DBC_INTERNAL *cp; + struct __bam_ca_di_args *args; + + if (dbc->dbtype == DB_RECNO) + return (0); + + cp = dbc->internal; + args = vargs; + if (cp->pgno == pgno && cp->indx >= indx && + (dbc == my_dbc || !MVCC_SKIP_CURADJ(dbc, pgno))) { + /* Cursor indices should never be negative. */ + DB_ASSERT(dbc->dbp->env, cp->indx != 0 || args->adjust > 0); + /* [#8032] + DB_ASSERT(env, !STD_LOCKING(dbc) || + cp->lock_mode != DB_LOCK_NG); + */ + cp->indx += args->adjust; + if (args->my_txn != NULL && args->my_txn != dbc->txn) + *foundp = 1; + } + return (0); +} +/* + * __bam_ca_di -- + * Adjust the cursors during a delete or insert. + * + * PUBLIC: int __bam_ca_di __P((DBC *, db_pgno_t, u_int32_t, int)); + */ +int +__bam_ca_di(my_dbc, pgno, indx, adjust) + DBC *my_dbc; + db_pgno_t pgno; + u_int32_t indx; + int adjust; +{ + DB *dbp; + DB_LSN lsn; + int ret; + u_int32_t found; + struct __bam_ca_di_args args; + + dbp = my_dbc->dbp; + args.adjust = adjust; + args.my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL; + + /* + * Adjust the cursors. See the comment in __bam_ca_delete(). + */ + if ((ret = __db_walk_cursors(dbp, my_dbc, __bam_ca_di_func, + &found, pgno, indx, &args)) != 0) + return (ret); + + if (found != 0 && DBC_LOGGING(my_dbc)) { + if ((ret = __bam_curadj_log(dbp, my_dbc->txn, &lsn, 0, + DB_CA_DI, pgno, 0, 0, (u_int32_t)adjust, indx, 0)) != 0) + return (ret); + } + + return (0); +} + +/* + * __bam_opd_cursor -- create a new opd cursor. + */ +static int +__bam_opd_cursor(dbp, dbc, first, tpgno, ti) + DB *dbp; + DBC *dbc; + db_pgno_t tpgno; + u_int32_t first, ti; +{ + BTREE_CURSOR *cp, *orig_cp; + DBC *dbc_nopd; + int ret; + + orig_cp = (BTREE_CURSOR *)dbc->internal; + dbc_nopd = NULL; + + /* + * Allocate a new cursor and create the stack. If duplicates + * are sorted, we've just created an off-page duplicate Btree. + * If duplicates aren't sorted, we've just created a Recno tree. + * + * Note that in order to get here at all, there shouldn't be + * an old off-page dup cursor--to augment the checking dbc_newopd + * will do, assert this. + */ + DB_ASSERT(dbp->env, orig_cp->opd == NULL); + if ((ret = __dbc_newopd(dbc, tpgno, orig_cp->opd, &dbc_nopd)) != 0) + return (ret); + + cp = (BTREE_CURSOR *)dbc_nopd->internal; + cp->pgno = tpgno; + cp->indx = ti; + + if (dbp->dup_compare == NULL) { + /* + * Converting to off-page Recno trees is tricky. The + * record number for the cursor is the index + 1 (to + * convert to 1-based record numbers). + */ + cp->recno = ti + 1; + } + + /* + * Transfer the deleted flag from the top-level cursor to the + * created one. + */ + if (F_ISSET(orig_cp, C_DELETED)) { + F_SET(cp, C_DELETED); + F_CLR(orig_cp, C_DELETED); + } + + /* Stack the cursors and reset the initial cursor's index. */ + orig_cp->opd = dbc_nopd; + orig_cp->indx = first; + return (0); +} + +struct __bam_ca_dup_args { + db_pgno_t tpgno; + db_indx_t first, ti; + DB_TXN *my_txn; +}; + +static int +__bam_ca_dup_func(dbc, my_dbc, foundp, fpgno, fi, vargs) + DBC *dbc; + DBC *my_dbc; + u_int32_t *foundp; + db_pgno_t fpgno; + u_int32_t fi; + void *vargs; +{ + BTREE_CURSOR *orig_cp; + DB *dbp; + int ret; + struct __bam_ca_dup_args *args; + + COMPQUIET(my_dbc, NULL); + + /* + * Since we rescan the list see if this is already + * converted. + */ + orig_cp = (BTREE_CURSOR *)dbc->internal; + if (orig_cp->opd != NULL) + return (0); + + /* Find cursors pointing to this record. */ + if (orig_cp->pgno != fpgno || orig_cp->indx != fi || + MVCC_SKIP_CURADJ(dbc, fpgno)) + return (0); + + dbp = dbc->dbp; + args = vargs; + + MUTEX_UNLOCK(dbp->env, dbp->mutex); + + if ((ret = __bam_opd_cursor(dbp, + dbc, args->first, args->tpgno, args->ti)) != 0) { + MUTEX_LOCK(dbp->env, dbp->mutex); + return (ret); + } + if (args->my_txn != NULL && args->my_txn != dbc->txn) + *foundp = 1; + /* We released the mutex to get a cursor, start over. */ + return (DB_LOCK_NOTGRANTED); +} + +/* + * __bam_ca_dup -- + * Adjust the cursors when moving items from a leaf page to a duplicates + * page. + * + * PUBLIC: int __bam_ca_dup __P((DBC *, + * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, db_pgno_t, u_int32_t)); + */ +int +__bam_ca_dup(my_dbc, first, fpgno, fi, tpgno, ti) + DBC *my_dbc; + db_pgno_t fpgno, tpgno; + u_int32_t first, fi, ti; +{ + DB *dbp; + DB_LSN lsn; + int ret, t_ret; + u_int32_t found; + struct __bam_ca_dup_args args; + + dbp = my_dbc->dbp; + + args.first = first; + args.tpgno = tpgno; + args.ti = ti; + args.my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL; + + if ((ret = __db_walk_cursors(dbp, + my_dbc, __bam_ca_dup_func, &found, fpgno, fi, &args)) != 0) + return (ret); + + if (found != 0 && DBC_LOGGING(my_dbc)) { + if ((t_ret = __bam_curadj_log(dbp, my_dbc->txn, + &lsn, 0, DB_CA_DUP, fpgno, tpgno, 0, first, fi, ti)) != 0 && + ret == 0) + ret = t_ret; + } + + return (ret); +} + +static int +__bam_ca_undodup_func(dbc, my_dbc, countp, fpgno, fi, vargs) + DBC *dbc; + DBC *my_dbc; + u_int32_t *countp; + db_pgno_t fpgno; + u_int32_t fi; + void *vargs; +{ + BTREE_CURSOR *orig_cp; + DB *dbp; + int ret; + struct __bam_ca_dup_args *args; + + COMPQUIET(my_dbc, NULL); + COMPQUIET(countp, NULL); + + orig_cp = (BTREE_CURSOR *)dbc->internal; + dbp = dbc->dbp; + args = vargs; + /* + * A note on the orig_cp->opd != NULL requirement here: + * it's possible that there's a cursor that refers to + * the same duplicate set, but which has no opd cursor, + * because it refers to a different item and we took + * care of it while processing a previous record. + */ + if (orig_cp->pgno != fpgno || + orig_cp->indx != args->first || + orig_cp->opd == NULL || ((BTREE_CURSOR *) + orig_cp->opd->internal)->indx != args->ti || + MVCC_SKIP_CURADJ(dbc, fpgno)) + return (0); + MUTEX_UNLOCK(dbp->env, dbp->mutex); + if ((ret = __dbc_close(orig_cp->opd)) != 0) { + MUTEX_LOCK(dbp->env, dbp->mutex); + return (ret); + } + orig_cp->opd = NULL; + orig_cp->indx = fi; + /* + * We released the mutex to free a cursor, + * start over. + */ + return (DB_LOCK_NOTGRANTED); +} + +/* + * __bam_ca_undodup -- + * Adjust the cursors when returning items to a leaf page + * from a duplicate page. + * Called only during undo processing. + * + * PUBLIC: int __bam_ca_undodup __P((DB *, + * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, u_int32_t)); + */ +int +__bam_ca_undodup(dbp, first, fpgno, fi, ti) + DB *dbp; + db_pgno_t fpgno; + u_int32_t first, fi, ti; +{ + u_int32_t count; + struct __bam_ca_dup_args args; + + args.first = first; + args.ti = ti; + return (__db_walk_cursors(dbp, NULL, + __bam_ca_undodup_func, &count, fpgno, fi, &args)); + +} + +static int +__bam_ca_rsplit_func(dbc, my_dbc, foundp, fpgno, indx, args) + DBC *dbc; + DBC *my_dbc; + u_int32_t *foundp; + db_pgno_t fpgno; + u_int32_t indx; + void *args; +{ + db_pgno_t tpgno; + + COMPQUIET(indx, 0); + + if (dbc->dbtype == DB_RECNO) + return (0); + + tpgno = *(db_pgno_t *)args; + if (dbc->internal->pgno == fpgno && + !MVCC_SKIP_CURADJ(dbc, fpgno)) { + dbc->internal->pgno = tpgno; + /* [#8032] + DB_ASSERT(env, !STD_LOCKING(dbc) || + dbc->internal->lock_mode != DB_LOCK_NG); + */ + if (IS_SUBTRANSACTION(my_dbc->txn) && dbc->txn != my_dbc->txn) + *foundp = 1; + } + return (0); +} + +/* + * __bam_ca_rsplit -- + * Adjust the cursors when doing reverse splits. + * + * PUBLIC: int __bam_ca_rsplit __P((DBC *, db_pgno_t, db_pgno_t)); + */ +int +__bam_ca_rsplit(my_dbc, fpgno, tpgno) + DBC* my_dbc; + db_pgno_t fpgno, tpgno; +{ + DB *dbp; + DB_LSN lsn; + int ret; + u_int32_t found; + + dbp = my_dbc->dbp; + + if ((ret = __db_walk_cursors(dbp, my_dbc, + __bam_ca_rsplit_func, &found, fpgno, 0, &tpgno)) != 0) + return (ret); + + if (found != 0 && DBC_LOGGING(my_dbc)) { + if ((ret = __bam_curadj_log(dbp, my_dbc->txn, + &lsn, 0, DB_CA_RSPLIT, fpgno, tpgno, 0, 0, 0, 0)) != 0) + return (ret); + } + return (0); +} + +struct __bam_ca_split_args { + db_pgno_t lpgno, rpgno; + int cleft; + DB_TXN *my_txn; +}; + +static int +__bam_ca_split_func(dbc, my_dbc, foundp, ppgno, split_indx, vargs) + DBC *dbc; + DBC *my_dbc; + u_int32_t *foundp; + db_pgno_t ppgno; + u_int32_t split_indx; + void *vargs; +{ + DBC_INTERNAL *cp; + struct __bam_ca_split_args *args; + + COMPQUIET(my_dbc, NULL); + + if (dbc->dbtype == DB_RECNO) + return (0); + cp = dbc->internal; + args = vargs; + if (cp->pgno == ppgno && + !MVCC_SKIP_CURADJ(dbc, ppgno)) { + /* [#8032] + DB_ASSERT(env, !STD_LOCKING(dbc) || + cp->lock_mode != DB_LOCK_NG); + */ + if (args->my_txn != NULL && args->my_txn != dbc->txn) + *foundp = 1; + if (cp->indx < split_indx) { + if (args->cleft) + cp->pgno = args->lpgno; + } else { + cp->pgno = args->rpgno; + cp->indx -= split_indx; + } + } + return (0); +} + +/* + * __bam_ca_split -- + * Adjust the cursors when splitting a page. + * + * PUBLIC: int __bam_ca_split __P((DBC *, + * PUBLIC: db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int)); + */ +int +__bam_ca_split(my_dbc, ppgno, lpgno, rpgno, split_indx, cleft) + DBC *my_dbc; + db_pgno_t ppgno, lpgno, rpgno; + u_int32_t split_indx; + int cleft; +{ + DB *dbp; + DB_LSN lsn; + int ret; + u_int32_t found; + struct __bam_ca_split_args args; + + dbp = my_dbc->dbp; + + /* + * If splitting the page that a cursor was on, the cursor has to be + * adjusted to point to the same record as before the split. Most + * of the time we don't adjust pointers to the left page, because + * we're going to copy its contents back over the original page. If + * the cursor is on the right page, it is decremented by the number of + * records split to the left page. + */ + args.lpgno = lpgno; + args.rpgno = rpgno; + args.cleft = cleft; + args.my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL; + if ((ret = __db_walk_cursors(dbp, my_dbc, + __bam_ca_split_func, &found, ppgno, split_indx, &args)) != 0) + return (ret); + + if (found != 0 && DBC_LOGGING(my_dbc)) { + if ((ret = __bam_curadj_log(dbp, + my_dbc->txn, &lsn, 0, DB_CA_SPLIT, ppgno, rpgno, + cleft ? lpgno : PGNO_INVALID, 0, split_indx, 0)) != 0) + return (ret); + } + + return (0); +} + +static int +__bam_ca_undosplit_func(dbc, my_dbc, foundp, frompgno, split_indx, vargs) + DBC *dbc; + DBC *my_dbc; + u_int32_t *foundp; + db_pgno_t frompgno; + u_int32_t split_indx; + void *vargs; +{ + DBC_INTERNAL *cp; + struct __bam_ca_split_args *args; + + COMPQUIET(my_dbc, NULL); + COMPQUIET(foundp, NULL); + + if (dbc->dbtype == DB_RECNO) + return (0); + cp = dbc->internal; + args = vargs; + if (cp->pgno == args->rpgno && + !MVCC_SKIP_CURADJ(dbc, args->rpgno)) { + cp->pgno = frompgno; + cp->indx += split_indx; + } else if (cp->pgno == args->lpgno && + !MVCC_SKIP_CURADJ(dbc, args->lpgno)) + cp->pgno = frompgno; + + return (0); +} + +/* + * __bam_ca_undosplit -- + * Adjust the cursors when undoing a split of a page. + * If we grew a level we will execute this for both the + * left and the right pages. + * Called only during undo processing. + * + * PUBLIC: int __bam_ca_undosplit __P((DB *, + * PUBLIC: db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t)); + */ +int +__bam_ca_undosplit(dbp, frompgno, topgno, lpgno, split_indx) + DB *dbp; + db_pgno_t frompgno, topgno, lpgno; + u_int32_t split_indx; +{ + u_int32_t count; + struct __bam_ca_split_args args; + + /* + * When backing out a split, we move the cursor back + * to the original offset and bump it by the split_indx. + */ + args.lpgno = lpgno; + args.rpgno = topgno; + return (__db_walk_cursors(dbp, NULL, + __bam_ca_undosplit_func, &count, frompgno, split_indx, &args)); +} diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c new file mode 100644 index 00000000..be1c9287 --- /dev/null +++ b/src/btree/bt_cursor.c @@ -0,0 +1,3076 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +static int __bam_bulk __P((DBC *, DBT *, u_int32_t)); +static int __bamc_close __P((DBC *, db_pgno_t, int *)); +static int __bamc_del __P((DBC *, u_int32_t)); +static int __bamc_destroy __P((DBC *)); +static int __bamc_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); +static int __bamc_getstack __P((DBC *)); +static int __bamc_next __P((DBC *, int, int)); +static int __bamc_physdel __P((DBC *)); +static int __bamc_prev __P((DBC *)); +static int __bamc_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); +static int __bamc_search __P((DBC *, + db_pgno_t, const DBT *, u_int32_t, int *)); +static int __bamc_writelock __P((DBC *)); +static int __bam_getboth_finddatum __P((DBC *, DBT *, u_int32_t)); +static int __bam_getbothc __P((DBC *, DBT *)); +static int __bam_get_prev __P((DBC *)); +static int __bam_isopd __P((DBC *, db_pgno_t *)); +#ifdef HAVE_COMPRESSION +static int __bam_getlte __P((DBC *, DBT *, DBT *)); +#endif + +/* + * Acquire a new page/lock. If we hold a page/lock, discard the page, and + * lock-couple the lock. + * + * !!! + * We have to handle both where we have a lock to lock-couple and where we + * don't -- we don't duplicate locks when we duplicate cursors if we are + * running in a transaction environment as there's no point if locks are + * never discarded. This means that the cursor may or may not hold a lock. + * In the case where we are descending the tree we always want to unlock + * the held interior page so we use ACQUIRE_COUPLE. + */ +#undef ACQUIRE +#define ACQUIRE(dbc, mode, lpgno, lock, fpgno, pagep, flags, ret) do { \ + DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \ + if ((pagep) != NULL) { \ + ret = __memp_fput(__mpf, \ + (dbc)->thread_info, pagep, dbc->priority); \ + pagep = NULL; \ + } else \ + ret = 0; \ + if ((ret) == 0 && STD_LOCKING(dbc)) \ + ret = __db_lget( \ + dbc, LCK_COUPLE, lpgno, mode, flags, &(lock)); \ + if ((ret) == 0) \ + ret = __memp_fget(__mpf, &(fpgno), \ + (dbc)->thread_info, (dbc)->txn, 0, &(pagep)); \ +} while (0) + +/* Acquire a new page/lock for a cursor. */ +#undef ACQUIRE_CUR +#define ACQUIRE_CUR(dbc, mode, p, flags, ret) do { \ + BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \ + if (p != __cp->pgno) \ + __cp->pgno = PGNO_INVALID; \ + ACQUIRE(dbc, mode, p, __cp->lock, p, __cp->page, flags, ret); \ + if ((ret) == 0) { \ + __cp->pgno = p; \ + __cp->lock_mode = (mode); \ + } \ +} while (0) + +/* + * Acquire a write lock if we don't already have one. + * + * !!! + * See ACQUIRE macro on why we handle cursors that don't have locks. + */ +#undef ACQUIRE_WRITE_LOCK +#define ACQUIRE_WRITE_LOCK(dbc, ret) do { \ + BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \ + DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \ + int __get_page = 0; \ + ret = 0; \ + if (STD_LOCKING(dbc) && __cp->lock_mode != DB_LOCK_WRITE) { \ + if (__cp->page != NULL) { \ + (ret) = __memp_fput(__mpf, (dbc)->thread_info, \ + __cp->page, (dbc)->priority); \ + __cp->page = NULL; \ + __get_page = 1; \ + if ((ret) !=0) \ + break; \ + } \ + if (((ret) = __db_lget((dbc), \ + LOCK_ISSET(__cp->lock) ? LCK_COUPLE : 0, \ + __cp->pgno, DB_LOCK_WRITE, 0, &__cp->lock)) != 0) \ + break; \ + __cp->lock_mode = DB_LOCK_WRITE; \ + if (__get_page == 0) \ + break; \ + (ret) = __memp_fget(__mpf, &__cp->pgno, \ + (dbc)->thread_info, \ + (dbc)->txn, DB_MPOOL_DIRTY, &__cp->page); \ + } \ +} while (0) + +/* Discard the current page/lock for a cursor. */ +#undef DISCARD_CUR +#define DISCARD_CUR(dbc, ret) do { \ + BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \ + DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \ + int __t_ret; \ + if ((__cp->page) != NULL) { \ + __t_ret = __memp_fput(__mpf, \ + (dbc)->thread_info, __cp->page, dbc->priority);\ + __cp->page = NULL; \ + } else \ + __t_ret = 0; \ + if (__t_ret != 0 && (ret) == 0) \ + ret = __t_ret; \ + __t_ret = __TLPUT((dbc), __cp->lock); \ + if (__t_ret != 0 && (ret) == 0) \ + ret = __t_ret; \ + if ((ret) == 0 && !LOCK_ISSET(__cp->lock)) \ + __cp->lock_mode = DB_LOCK_NG; \ + __cp->stream_start_pgno = PGNO_INVALID; \ +} while (0) + +/* If on-page item is a deleted record. */ +#undef IS_DELETED +#define IS_DELETED(dbp, page, indx) \ + B_DISSET(GET_BKEYDATA(dbp, page, \ + (indx) + (TYPE(page) == P_LBTREE ? O_INDX : 0))->type) +#undef IS_CUR_DELETED +#define IS_CUR_DELETED(dbc) \ + IS_DELETED((dbc)->dbp, (dbc)->internal->page, (dbc)->internal->indx) + +/* + * Test to see if two cursors could point to duplicates of the same key. + * In the case of off-page duplicates they are they same, as the cursors + * will be in the same off-page duplicate tree. In the case of on-page + * duplicates, the key index offsets must be the same. For the last test, + * as the original cursor may not have a valid page pointer, we use the + * current cursor's. + */ +#undef IS_DUPLICATE +#define IS_DUPLICATE(dbc, i1, i2) \ + (P_INP((dbc)->dbp,((PAGE *)(dbc)->internal->page))[i1] == \ + P_INP((dbc)->dbp,((PAGE *)(dbc)->internal->page))[i2]) +#undef IS_CUR_DUPLICATE +#define IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx) \ + (F_ISSET(dbc, DBC_OPD) || \ + (orig_pgno == (dbc)->internal->pgno && \ + IS_DUPLICATE(dbc, (dbc)->internal->indx, orig_indx))) + +/* + * __bamc_init -- + * Initialize the access private portion of a cursor + * + * PUBLIC: int __bamc_init __P((DBC *, DBTYPE)); + */ +int +__bamc_init(dbc, dbtype) + DBC *dbc; + DBTYPE dbtype; +{ + ENV *env; + int ret; +#ifdef HAVE_COMPRESSION + BTREE_CURSOR *cp; +#endif + + env = dbc->env; + + /* Allocate/initialize the internal structure. */ + if (dbc->internal == NULL) { + if ((ret = __os_calloc( + env, 1, sizeof(BTREE_CURSOR), &dbc->internal)) != 0) + return (ret); + +#ifdef HAVE_COMPRESSION + cp = (BTREE_CURSOR*)dbc->internal; + cp->compressed.flags = DB_DBT_USERMEM; + cp->key1.flags = DB_DBT_USERMEM; + cp->key2.flags = DB_DBT_USERMEM; + cp->data1.flags = DB_DBT_USERMEM; + cp->data2.flags = DB_DBT_USERMEM; + cp->del_key.flags = DB_DBT_USERMEM; + cp->del_data.flags = DB_DBT_USERMEM; +#endif + } + + /* Initialize methods. */ + dbc->close = dbc->c_close = __dbc_close_pp; + dbc->cmp = __dbc_cmp_pp; + dbc->count = dbc->c_count = __dbc_count_pp; + dbc->del = dbc->c_del = __dbc_del_pp; + dbc->dup = dbc->c_dup = __dbc_dup_pp; + dbc->get = dbc->c_get = __dbc_get_pp; + dbc->pget = dbc->c_pget = __dbc_pget_pp; + dbc->put = dbc->c_put = __dbc_put_pp; + if (dbtype == DB_BTREE) { + dbc->am_bulk = __bam_bulk; + dbc->am_close = __bamc_close; + dbc->am_del = __bamc_del; + dbc->am_destroy = __bamc_destroy; + dbc->am_get = __bamc_get; + dbc->am_put = __bamc_put; + dbc->am_writelock = __bamc_writelock; + } else { + dbc->am_bulk = __bam_bulk; + dbc->am_close = __bamc_close; + dbc->am_del = __ramc_del; + dbc->am_destroy = __bamc_destroy; + dbc->am_get = __ramc_get; + dbc->am_put = __ramc_put; + dbc->am_writelock = __bamc_writelock; + } + + return (0); +} + +/* + * __bamc_refresh + * Set things up properly for cursor re-use. + * + * PUBLIC: int __bamc_refresh __P((DBC *)); + */ +int +__bamc_refresh(dbc) + DBC *dbc; +{ + BTREE *t; + BTREE_CURSOR *cp; + DB *dbp; + + dbp = dbc->dbp; + t = dbp->bt_internal; + cp = (BTREE_CURSOR *)dbc->internal; + + /* + * If our caller set the root page number, it's because the root was + * known. This is always the case for off page dup cursors. Else, + * pull it out of our internal information, unless this is a subdb. + */ + if (cp->root == PGNO_INVALID && t->bt_meta == PGNO_BASE_MD) + cp->root = t->bt_root; + + LOCK_INIT(cp->lock); + cp->lock_mode = DB_LOCK_NG; + + if (cp->sp == NULL) { + cp->sp = cp->stack; + cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]); + } + BT_STK_CLR(cp); + +#ifdef HAVE_COMPRESSION + /* Initialize compression */ + cp->prevKey = 0; + cp->prevData = 0; + cp->currentKey = 0; + cp->currentData = 0; + cp->compcursor = 0; + cp->compend = 0; + cp->prevcursor = 0; + cp->prev2cursor = 0; +#endif + + /* + * The btree leaf page data structures require that two key/data pairs + * (or four items) fit on a page, but other than that there's no fixed + * requirement. The btree off-page duplicates only require two items, + * to be exact, but requiring four for them as well seems reasonable. + * + * Recno uses the btree bt_ovflsize value -- it's close enough. + */ + cp->ovflsize = B_MINKEY_TO_OVFLSIZE( + dbp, F_ISSET(dbc, DBC_OPD) ? 2 : t->bt_minkey, dbp->pgsize); + + cp->recno = RECNO_OOB; + cp->order = INVALID_ORDER; + cp->flags = 0; + + /* Initialize for record numbers. */ + if (F_ISSET(dbc, DBC_OPD) || + dbc->dbtype == DB_RECNO || F_ISSET(dbp, DB_AM_RECNUM)) { + F_SET(cp, C_RECNUM); + + /* + * All btrees that support record numbers, optionally standard + * recno trees, and all off-page duplicate recno trees have + * mutable record numbers. + */ + if ((F_ISSET(dbc, DBC_OPD) && dbc->dbtype == DB_RECNO) || + F_ISSET(dbp, DB_AM_RECNUM | DB_AM_RENUMBER)) + F_SET(cp, C_RENUMBER); + } + + return (0); +} + +/* + * __bamc_close -- + * Close down the cursor. + */ +static int +__bamc_close(dbc, root_pgno, rmroot) + DBC *dbc; + db_pgno_t root_pgno; + int *rmroot; +{ + BTREE_CURSOR *cp, *cp_opd, *cp_c; + DB *dbp; + DBC *dbc_opd, *dbc_c; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *h; + int cdb_lock, ret; + u_int32_t count; + + dbp = dbc->dbp; + env = dbp->env; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + cp_opd = (dbc_opd = cp->opd) == NULL ? + NULL : (BTREE_CURSOR *)dbc_opd->internal; + cdb_lock = ret = 0; + + /* + * There are 3 ways this function is called: + * + * 1. Closing a primary cursor: we get called with a pointer to a + * primary cursor that has a NULL opd field. This happens when + * closing a btree/recno database cursor without an associated + * off-page duplicate tree. + * + * 2. Closing a primary and an off-page duplicate cursor stack: we + * get called with a pointer to the primary cursor which has a + * non-NULL opd field. This happens when closing a btree cursor + * into database with an associated off-page btree/recno duplicate + * tree. (It can't be a primary recno database, recno databases + * don't support duplicates.) + * + * 3. Closing an off-page duplicate cursor stack: we get called with + * a pointer to the off-page duplicate cursor. This happens when + * closing a non-btree database that has an associated off-page + * btree/recno duplicate tree or for a btree database when the + * opd tree is not empty (root_pgno == PGNO_INVALID). + * + * If either the primary or off-page duplicate cursor deleted a btree + * key/data pair, check to see if the item is still referenced by a + * different cursor. If it is, confirm that cursor's delete flag is + * set and leave it to that cursor to do the delete. + * + * NB: The test for == 0 below is correct. Our caller already removed + * our cursor argument from the active queue, we won't find it when we + * search the queue in __bam_ca_delete(). + * NB: It can't be true that both the primary and off-page duplicate + * cursors have deleted a btree key/data pair. Either the primary + * cursor may have deleted an item and there's no off-page duplicate + * cursor, or there's an off-page duplicate cursor and it may have + * deleted an item. + * + * Primary recno databases aren't an issue here. Recno keys are either + * deleted immediately or never deleted, and do not have to be handled + * here. + * + * Off-page duplicate recno databases are an issue here, cases #2 and + * #3 above can both be off-page recno databases. The problem is the + * same as the final problem for off-page duplicate btree databases. + * If we no longer need the off-page duplicate tree, we want to remove + * it. For off-page duplicate btrees, we are done with the tree when + * we delete the last item it contains, i.e., there can be no further + * references to it when it's empty. For off-page duplicate recnos, + * we remove items from the tree as the application calls the remove + * function, so we are done with the tree when we close the last cursor + * that references it. + * + * We optionally take the root page number from our caller. If the + * primary database is a btree, we can get it ourselves because dbc + * is the primary cursor. If the primary database is not a btree, + * the problem is that we may be dealing with a stack of pages. The + * cursor we're using to do the delete points at the bottom of that + * stack and we need the top of the stack. + */ + if (F_ISSET(cp, C_DELETED)) { + dbc_c = dbc; + switch (dbc->dbtype) { + case DB_BTREE: /* Case #1, #3. */ + if ((ret = __bam_ca_delete( + dbp, cp->pgno, cp->indx, 1, &count)) != 0) + goto err; + if (count == 0) + goto lock; + goto done; + case DB_RECNO: + if (!F_ISSET(dbc, DBC_OPD)) /* Case #1. */ + goto done; + /* Case #3. */ + if ((ret = __ram_ca_delete(dbp, cp->root, &count)) != 0) + goto err; + if (count == 0) + goto lock; + goto done; + case DB_HASH: + case DB_QUEUE: + case DB_UNKNOWN: + default: + ret = __db_unknown_type( + env, "DbCursor.close", dbc->dbtype); + goto err; + } + } + + if (dbc_opd == NULL) + goto done; + + if (F_ISSET(cp_opd, C_DELETED)) { /* Case #2. */ + /* + * We will not have been provided a root page number. Acquire + * one from the primary database. + */ + if ((h = cp->page) == NULL && (ret = __memp_fget(mpf, &cp->pgno, + dbc->thread_info, dbc->txn, 0, &h)) != 0) + goto err; + root_pgno = GET_BOVERFLOW(dbp, h, cp->indx + O_INDX)->pgno; + if ((ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0) + goto err; + cp->page = NULL; + + dbc_c = dbc_opd; + switch (dbc_opd->dbtype) { + case DB_BTREE: + if ((ret = __bam_ca_delete( + dbp, cp_opd->pgno, cp_opd->indx, 1, &count)) != 0) + goto err; + if (count == 0) + goto lock; + goto done; + case DB_RECNO: + if ((ret = + __ram_ca_delete(dbp, cp_opd->root, &count)) != 0) + goto err; + if (count == 0) + goto lock; + goto done; + case DB_HASH: + case DB_QUEUE: + case DB_UNKNOWN: + default: + ret = __db_unknown_type( + env, "DbCursor.close", dbc->dbtype); + goto err; + } + } + goto done; + +lock: cp_c = (BTREE_CURSOR *)dbc_c->internal; + + /* + * If this is CDB, upgrade the lock if necessary. While we acquired + * the write lock to logically delete the record, we released it when + * we returned from that call, and so may not be holding a write lock + * at the moment. + */ + if (CDB_LOCKING(env)) { + if (F_ISSET(dbc, DBC_WRITECURSOR)) { + if ((ret = __lock_get(env, + dbc->locker, DB_LOCK_UPGRADE, &dbc->lock_dbt, + DB_LOCK_WRITE, &dbc->mylock)) != 0) + goto err; + cdb_lock = 1; + } + goto do_del; + } + + /* + * The variable dbc_c has been initialized to reference the cursor in + * which we're going to do the delete. Initialize the cursor's lock + * structures as necessary. + * + * First, we may not need to acquire any locks. If we're in case #3, + * that is, the primary database isn't a btree database, our caller + * is responsible for acquiring any necessary locks before calling us. + */ + if (F_ISSET(dbc, DBC_OPD)) + goto do_del; + + /* + * Otherwise, acquire a write lock on the primary database's page. + * + * Lock the primary database page, regardless of whether we're deleting + * an item on a primary database page or an off-page duplicates page. + * + * If the cursor that did the initial logical deletion (and had a write + * lock) is not the same cursor doing the physical deletion (which may + * have only ever had a read lock on the item), we need to upgrade to a + * write lock. The confusion comes as follows: + * + * C1 created, acquires item read lock + * C2 dup C1, create C2, also has item read lock. + * C1 acquire write lock, delete item + * C1 close + * C2 close, needs a write lock to physically delete item. + * + * If we're in a TXN, we know that C2 will be able to acquire the write + * lock, because no locker other than the one shared by C1 and C2 can + * acquire a write lock -- the original write lock C1 acquired was never + * discarded. + * + * If we're not in a TXN, it's nastier. Other cursors might acquire + * read locks on the item after C1 closed, discarding its write lock, + * and such locks would prevent C2 from acquiring a read lock. That's + * OK, though, we'll simply wait until we can acquire a write lock, or + * we'll deadlock. (Which better not happen, since we're not in a TXN.) + * + * There are similar scenarios with dirty reads, where the cursor may + * have downgraded its write lock to a was-write lock. + */ + if (STD_LOCKING(dbc)) + if ((ret = __db_lget(dbc, + LCK_COUPLE, cp->pgno, DB_LOCK_WRITE, 0, &cp->lock)) != 0) + goto err; + +do_del: /* + * If the delete occurred in a Btree, we're going to look at the page + * to see if the item has to be physically deleted. Otherwise, we do + * not need the actual page (and it may not even exist, it might have + * been truncated from the file after an allocation aborted). + * + * Delete the on-page physical item referenced by the cursor. + */ + if (F_ISSET(dbc_c, DBC_OPD)) + LOCK_CHECK_OFF(dbc_c->thread_info); + if (dbc_c->dbtype == DB_BTREE) { + if ((ret = __memp_fget(mpf, &cp_c->pgno, dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, &cp_c->page)) != 0) + goto err_c; + if ((ret = __bamc_physdel(dbc_c)) != 0) + goto err_c; + } + + /* + * If we're not working in an off-page duplicate tree, then we're + * done. + */ + if (!F_ISSET(dbc_c, DBC_OPD) || root_pgno == PGNO_INVALID) + goto done; + + /* + * We may have just deleted the last element in the off-page duplicate + * tree, and closed the last cursor in the tree. For an off-page btree + * there are no other cursors in the tree by definition, if the tree is + * empty. For an off-page recno we know we have closed the last cursor + * in the tree because the __ram_ca_delete call above returned 0 only + * in that case. So, if the off-page duplicate tree is empty at this + * point, we want to remove it. + */ + if (((h = dbc_c->internal->page) == NULL || h->pgno != root_pgno) && + (ret = __memp_fget(mpf, + &root_pgno, dbc->thread_info, dbc->txn, 0, &h)) != 0) + goto err_c; + if ((count = NUM_ENT(h)) == 0) { + if (h != dbc_c->internal->page) + DISCARD_CUR(dbc_c, ret); + else + dbc_c->internal->page = NULL; + if (ret == 0) + ret = __db_free(dbc, h, 0); + } else if (h != dbc_c->internal->page) + ret = __memp_fput(mpf, dbc->thread_info, h, dbc->priority); + +err_c: if (F_ISSET(dbc_c, DBC_OPD)) + LOCK_CHECK_ON(dbc_c->thread_info); + if (ret != 0) + goto err; + + if (count != 0) + goto done; + + /* + * When removing the tree, we have to do one of two things. If this is + * case #2, that is, the primary tree is a btree, delete the key that's + * associated with the tree from the btree leaf page. We know we are + * the only reference to it and we already have the correct lock. We + * detect this case because the cursor that was passed to us references + * an off-page duplicate cursor. + * + * If this is case #3, that is, the primary tree isn't a btree, pass + * the information back to our caller, it's their job to do cleanup on + * the primary page. + */ + if (dbc_opd != NULL) { + if ((ret = __memp_fget(mpf, &cp->pgno, dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, &cp->page)) != 0) + goto err; + if ((ret = __bamc_physdel(dbc)) != 0) + goto err; + } else + *rmroot = 1; +err: +done: /* + * Discard the page references and locks, and confirm that the stack + * has been emptied. + */ + if (dbc_opd != NULL) + DISCARD_CUR(dbc_opd, ret); + DISCARD_CUR(dbc, ret); + + /* Downgrade any CDB lock we acquired. */ + if (cdb_lock) + (void)__lock_downgrade(env, &dbc->mylock, DB_LOCK_IWRITE, 0); + + return (ret); +} + +/* + * __bamc_cmp -- + * Compare two btree cursors for equality. + * + * This function is only called with two cursors that point to the same item. + * It only distinguishes cursors pointing to deleted and undeleted items at + * the same location. + * + * PUBLIC: int __bamc_cmp __P((DBC *, DBC *, int *)); + */ +int +__bamc_cmp(dbc, other_dbc, result) + DBC *dbc, *other_dbc; + int *result; +{ + ENV *env; + BTREE_CURSOR *bcp, *obcp; + + env = dbc->env; + bcp = (BTREE_CURSOR *)dbc->internal; + obcp = (BTREE_CURSOR *)other_dbc->internal; + + DB_ASSERT (env, bcp->pgno == obcp->pgno); + DB_ASSERT (env, bcp->indx == obcp->indx); + + /* Check to see if both cursors have the same deleted flag. */ + *result = + ((F_ISSET(bcp, C_DELETED)) == F_ISSET(obcp, C_DELETED)) ? 0 : 1; + return (0); +} + +/* + * __bamc_destroy -- + * Close a single cursor -- internal version. + */ +static int +__bamc_destroy(dbc) + DBC *dbc; +{ + BTREE_CURSOR *cp; + ENV *env; + + cp = (BTREE_CURSOR *)dbc->internal; + env = dbc->env; + + /* Discard the structures. */ + if (cp->sp != cp->stack) + __os_free(env, cp->sp); + +#ifdef HAVE_COMPRESSION + /* Free the memory used for compression */ + __os_free(env, cp->compressed.data); + __os_free(env, cp->key1.data); + __os_free(env, cp->key2.data); + __os_free(env, cp->data1.data); + __os_free(env, cp->data2.data); + __os_free(env, cp->del_key.data); + __os_free(env, cp->del_data.data); +#endif + + __os_free(env, cp); + + return (0); +} + +/* + * __bamc_count -- + * Return a count of on and off-page duplicates. + * + * PUBLIC: int __bamc_count __P((DBC *, db_recno_t *)); + */ +int +__bamc_count(dbc, recnop) + DBC *dbc; + db_recno_t *recnop; +{ + BTREE_CURSOR *cp; + DB *dbp; + DB_MPOOLFILE *mpf; + db_indx_t indx, top; + db_recno_t recno; + int ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + + /* + * Called with the top-level cursor that may reference an off-page + * duplicates tree. We don't have to acquire any new locks, we have + * to have a read lock to even get here. + */ + if (cp->opd == NULL) { + /* + * On-page duplicates, get the page and count. + */ + DB_ASSERT(dbp->env, cp->page == NULL); + if ((ret = __memp_fget(mpf, &cp->pgno, + dbc->thread_info, dbc->txn, 0, &cp->page)) != 0) + return (ret); + + /* + * Move back to the beginning of the set of duplicates and + * then count forward. + */ + for (indx = cp->indx;; indx -= P_INDX) + if (indx == 0 || + !IS_DUPLICATE(dbc, indx, indx - P_INDX)) + break; + for (recno = 0, + top = NUM_ENT(cp->page) - P_INDX;; indx += P_INDX) { + if (!IS_DELETED(dbp, cp->page, indx)) + ++recno; + if (indx == top || + !IS_DUPLICATE(dbc, indx, indx + P_INDX)) + break; + } + } else { + /* + * Off-page duplicates tree, get the root page of the off-page + * duplicate tree. + */ + if ((ret = __memp_fget(mpf, &cp->opd->internal->root, + dbc->thread_info, dbc->txn, 0, &cp->page)) != 0) + return (ret); + + /* + * If the page is an internal page use the page's count as it's + * up-to-date and reflects the status of cursors in the tree. + * If the page is a leaf page for unsorted duplicates, use the + * page's count as cursors don't mark items deleted on the page + * and wait, cursor delete items immediately. + * If the page is a leaf page for sorted duplicates, there may + * be cursors on the page marking deleted items -- count. + */ + if (TYPE(cp->page) == P_LDUP) + for (recno = 0, indx = 0, + top = NUM_ENT(cp->page) - O_INDX;; indx += O_INDX) { + if (!IS_DELETED(dbp, cp->page, indx)) + ++recno; + if (indx == top) + break; + } + else + recno = RE_NREC(cp->page); + } + + *recnop = recno; + + ret = __memp_fput(mpf, dbc->thread_info, cp->page, dbc->priority); + cp->page = NULL; + + return (ret); +} + +/* + * __bamc_del -- + * Delete using a cursor. + */ +static int +__bamc_del(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + BTREE_CURSOR *cp; + DB *dbp; + DB_MPOOLFILE *mpf; + int ret, t_ret; + u_int32_t count; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + ret = 0; + COMPQUIET(flags, 0); + + /* If the item was already deleted, return failure. */ + if (F_ISSET(cp, C_DELETED)) + return (DB_KEYEMPTY); + + /* + * This code is always called with a page lock but no page. + */ + DB_ASSERT(dbp->env, cp->page == NULL); + + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_OFF(dbc->thread_info); + + /* + * We don't physically delete the record until the cursor moves, so + * we have to have a long-lived write lock on the page instead of a + * a long-lived read lock. Note, we have to have a read lock to even + * get here. + * + * If we're maintaining record numbers, we lock the entire tree, else + * we lock the single page. + */ + if (F_ISSET(cp, C_RECNUM)) { + if ((ret = __bamc_getstack(dbc)) != 0) + goto err; + cp->page = cp->csp->page; + } else { + ACQUIRE_CUR(dbc, DB_LOCK_WRITE, cp->pgno, 0, ret); + if (ret != 0) + goto err; + } + + /* Mark the page dirty. */ + if ((ret = __memp_dirty(mpf, + &cp->page, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err; + + /* Log the change. */ + if (DBC_LOGGING(dbc)) { + if ((ret = __bam_cdel_log(dbp, dbc->txn, &LSN(cp->page), 0, + PGNO(cp->page), &LSN(cp->page), cp->indx)) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + + /* Set the intent-to-delete flag on the page. */ + if (TYPE(cp->page) == P_LBTREE) + B_DSET(GET_BKEYDATA(dbp, cp->page, cp->indx + O_INDX)->type); + else + B_DSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type); + +err: /* + * If we've been successful so far and the tree has record numbers, + * adjust the record counts. Either way, release acquired page(s). + */ + if (F_ISSET(cp, C_RECNUM)) { + cp->csp->page = cp->page; + if (ret == 0) + ret = __bam_adjust(dbc, -1); + (void)__bam_stkrel(dbc, 0); + } else + if (cp->page != NULL && + (t_ret = __memp_fput(mpf, dbc->thread_info, + cp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + cp->page = NULL; + + /* + * Update the cursors last, after all chance of recoverable failure + * is past. + */ + if (ret == 0) + ret = __bam_ca_delete(dbp, cp->pgno, cp->indx, 1, &count); + + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_ON(dbc->thread_info); + return (ret); +} + +/* + * __bamc_dup -- + * Duplicate a btree cursor, such that the new one holds appropriate + * locks for the position of the original. + * + * PUBLIC: int __bamc_dup __P((DBC *, DBC *, u_int32_t)); + */ +int +__bamc_dup(orig_dbc, new_dbc, flags) + DBC *orig_dbc, *new_dbc; + u_int32_t flags; +{ + BTREE_CURSOR *orig, *new; + + orig = (BTREE_CURSOR *)orig_dbc->internal; + new = (BTREE_CURSOR *)new_dbc->internal; + + new->ovflsize = orig->ovflsize; + new->recno = orig->recno; + new->flags = orig->flags; + +#ifdef HAVE_COMPRESSION + /* Copy the compression state */ + return (__bamc_compress_dup(orig_dbc, new_dbc, flags)); +#else + COMPQUIET(flags, 0); + + return (0); +#endif +} + +/* + * __bamc_get -- + * Get using a cursor (btree). + */ +static int +__bamc_get(dbc, key, data, flags, pgnop) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; + db_pgno_t *pgnop; +{ + BTREE_CURSOR *cp; + DB *dbp; + DB_MPOOLFILE *mpf; + db_pgno_t orig_pgno; + db_indx_t orig_indx; + int exact, newopd, ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + orig_pgno = cp->pgno; + orig_indx = cp->indx; + + newopd = 0; + switch (flags) { + case DB_CURRENT: + /* It's not possible to return a deleted record. */ + if (F_ISSET(cp, C_DELETED)) { + ret = DB_KEYEMPTY; + goto err; + } + + /* + * Acquire the current page. We have at least a read-lock + * already. The caller may have set DB_RMW asking for a + * write lock, but upgrading to a write lock has no better + * chance of succeeding now instead of later, so don't try. + */ + if ((ret = __memp_fget(mpf, &cp->pgno, + dbc->thread_info, dbc->txn, 0, &cp->page)) != 0) + goto err; + break; + case DB_FIRST: + newopd = 1; + if ((ret = __bamc_search(dbc, + PGNO_INVALID, NULL, flags, &exact)) != 0) + goto err; + break; + case DB_GET_BOTH: + case DB_GET_BOTH_RANGE: + /* + * There are two ways to get here based on DBcursor->get + * with the DB_GET_BOTH/DB_GET_BOTH_RANGE flags set: + * + * 1. Searching a sorted off-page duplicate tree: do a tree + * search. + * + * 2. Searching btree: do a tree search. If it returns a + * reference to off-page duplicate tree, return immediately + * and let our caller deal with it. If the search doesn't + * return a reference to off-page duplicate tree, continue + * with an on-page search. + */ + if (F_ISSET(dbc, DBC_OPD)) { + if ((ret = __bamc_search( + dbc, PGNO_INVALID, data, flags, &exact)) != 0) + goto err; + if (flags == DB_GET_BOTH) { + if (!exact) { + ret = DB_NOTFOUND; + goto err; + } + break; + } + + /* + * We didn't require an exact match, so the search may + * may have returned an entry past the end of the page, + * or we may be referencing a deleted record. If so, + * move to the next entry. + */ + if ((cp->indx == NUM_ENT(cp->page) || + IS_CUR_DELETED(dbc)) && + (ret = __bamc_next(dbc, 1, 0)) != 0) + goto err; + } else { + if ((ret = __bamc_search( + dbc, PGNO_INVALID, key, flags, &exact)) != 0) + return (ret); + if (!exact) { + ret = DB_NOTFOUND; + goto err; + } + + if (pgnop != NULL && __bam_isopd(dbc, pgnop)) { + newopd = 1; + break; + } + if ((ret = + __bam_getboth_finddatum(dbc, data, flags)) != 0) + goto err; + } + break; +#ifdef HAVE_COMPRESSION + case DB_SET_LTE: + if ((ret = __bam_getlte(dbc, key, NULL)) != 0) + goto err; + break; + case DB_GET_BOTH_LTE: + if ((ret = __bam_getlte(dbc, key, data)) != 0) + goto err; + break; +#endif + case DB_GET_BOTHC: + if ((ret = __bam_getbothc(dbc, data)) != 0) + goto err; + break; + case DB_LAST: + newopd = 1; + if ((ret = __bamc_search(dbc, + PGNO_INVALID, NULL, flags, &exact)) != 0) + goto err; + break; + case DB_NEXT: + newopd = 1; + if (cp->pgno == PGNO_INVALID) { + if ((ret = __bamc_search(dbc, + PGNO_INVALID, NULL, DB_FIRST, &exact)) != 0) + goto err; + } else + if ((ret = __bamc_next(dbc, 1, 0)) != 0) + goto err; + break; + case DB_NEXT_DUP: + if ((ret = __bamc_next(dbc, 1, 0)) != 0) + goto err; + if (!IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx)) { + ret = DB_NOTFOUND; + goto err; + } + break; + case DB_NEXT_NODUP: + newopd = 1; + if (cp->pgno == PGNO_INVALID) { + if ((ret = __bamc_search(dbc, + PGNO_INVALID, NULL, DB_FIRST, &exact)) != 0) + goto err; + } else + do { + if ((ret = __bamc_next(dbc, 1, 0)) != 0) + goto err; + } while (IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx)); + break; + case DB_PREV: + newopd = 1; + if (cp->pgno == PGNO_INVALID) { + if ((ret = __bamc_search(dbc, + PGNO_INVALID, NULL, DB_LAST, &exact)) != 0) + goto err; + } else + if ((ret = __bamc_prev(dbc)) != 0) + goto err; + break; + case DB_PREV_DUP: + if ((ret = __bamc_prev(dbc)) != 0) + goto err; + if (!IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx)) { + ret = DB_NOTFOUND; + goto err; + } + break; + case DB_PREV_NODUP: + newopd = 1; + if (cp->pgno == PGNO_INVALID) { + if ((ret = __bamc_search(dbc, + PGNO_INVALID, NULL, DB_LAST, &exact)) != 0) + goto err; + } else + do { + if ((ret = __bamc_prev(dbc)) != 0) + goto err; + } while (IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx)); + break; + case DB_SET: + case DB_SET_RECNO: + newopd = 1; + if ((ret = __bamc_search(dbc, + PGNO_INVALID, key, flags, &exact)) != 0) + goto err; + break; + case DB_SET_RANGE: + newopd = 1; + if ((ret = __bamc_search(dbc, + PGNO_INVALID, key, flags, &exact)) != 0) + goto err; + + /* + * As we didn't require an exact match, the search function + * may have returned an entry past the end of the page. Or, + * we may be referencing a deleted record. If so, move to + * the next entry. + */ + if (cp->indx == NUM_ENT(cp->page) || IS_CUR_DELETED(dbc)) + if ((ret = __bamc_next(dbc, 0, 0)) != 0) + goto err; + break; + default: + ret = __db_unknown_flag(dbp->env, "__bamc_get", flags); + goto err; + } + + /* + * We may have moved to an off-page duplicate tree. Return that + * information to our caller. + */ + if (newopd && pgnop != NULL) + (void)__bam_isopd(dbc, pgnop); + +err: /* + * Regardless of whether we were successful or not, if the cursor + * moved, clear the delete flag, DBcursor->get never references a + * deleted key, if it moved at all. + */ + if (F_ISSET(cp, C_DELETED) && + (cp->pgno != orig_pgno || cp->indx != orig_indx)) + F_CLR(cp, C_DELETED); + + return (ret); +} + +static int +__bam_get_prev(dbc) + DBC *dbc; +{ + BTREE_CURSOR *cp; + DBT key, data; + db_pgno_t pgno; + int ret; + + if ((ret = __bamc_prev(dbc)) != 0) + return (ret); + + if (__bam_isopd(dbc, &pgno)) { + cp = (BTREE_CURSOR *)dbc->internal; + if ((ret = __dbc_newopd(dbc, pgno, cp->opd, &cp->opd)) != 0) + return (ret); + if ((ret = cp->opd->am_get(cp->opd, + &key, &data, DB_LAST, NULL)) != 0) + return (ret); + } + + return (0); +} + +/* + * __bam_bulk -- Return bulk data from a btree. + */ +static int +__bam_bulk(dbc, data, flags) + DBC *dbc; + DBT *data; + u_int32_t flags; +{ + BKEYDATA *bk; + BOVERFLOW *bo; + BTREE_CURSOR *cp; + PAGE *pg; + db_indx_t *inp, indx, pg_keyoff; + int32_t *endp, key_off, *offp, *saveoffp; + u_int8_t *dbuf, *dp, *np; + u_int32_t key_size, pagesize, size, space; + int adj, is_key, need_pg, next_key, no_dup, rec_key, ret; + + ret = 0; + key_off = 0; + size = 0; + pagesize = dbc->dbp->pgsize; + cp = (BTREE_CURSOR *)dbc->internal; + + /* + * dp tracks the beginning of the page in the buffer. + * np is the next place to copy things into the buffer. + * dbuf always stays at the beginning of the buffer. + */ + dbuf = data->data; + np = dp = dbuf; + + /* Keep track of space that is left. There is a termination entry */ + space = data->ulen; + space -= sizeof(*offp); + + /* Build the offset/size table from the end up. */ + endp = (int32_t *)((u_int8_t *)dbuf + data->ulen); + endp--; + offp = endp; + + key_size = 0; + + /* + * Distinguish between BTREE and RECNO. + * There are no keys in RECNO. If MULTIPLE_KEY is specified + * then we return the record numbers. + * is_key indicates that multiple btree keys are returned. + * rec_key is set if we are returning record numbers. + * next_key is set if we are going after the next key rather than dup. + */ + if (dbc->dbtype == DB_BTREE) { + is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1: 0; + rec_key = 0; + next_key = is_key && LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP; + adj = 2; + } else { + is_key = 0; + rec_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0; + next_key = LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP; + adj = 1; + } + no_dup = LF_ISSET(DB_OPFLAGS_MASK) == DB_NEXT_NODUP; + +next_pg: + indx = cp->indx; + pg = cp->page; + + inp = P_INP(dbc->dbp, pg); + /* The current page is not yet in the buffer. */ + need_pg = 1; + + /* + * Keep track of the offset of the current key on the page. + * If we are returning keys, set it to 0 first so we force + * the copy of the key to the buffer. + */ + pg_keyoff = 0; + if (is_key == 0) + pg_keyoff = inp[indx]; + + do { + if (IS_DELETED(dbc->dbp, pg, indx)) { + if (dbc->dbtype != DB_RECNO) + continue; + + cp->recno++; + /* + * If we are not returning recnos then we + * need to fill in every slot so the user + * can calculate the record numbers. + */ + if (rec_key != 0) + continue; + + space -= 2 * sizeof(*offp); + /* Check if space as underflowed. */ + if (space > data->ulen) + goto back_up; + + /* Just mark the empty recno slots. */ + *offp-- = 0; + *offp-- = 0; + continue; + } + + /* + * Check to see if we have a new key. + * If so, then see if we need to put the + * key on the page. If its already there + * then we just point to it. + */ + if (is_key && pg_keyoff != inp[indx]) { + bk = GET_BKEYDATA(dbc->dbp, pg, indx); + if (B_TYPE(bk->type) == B_OVERFLOW) { + bo = (BOVERFLOW *)bk; + size = key_size = bo->tlen; + if (key_size > space) + goto get_key_space; + if ((ret = __bam_bulk_overflow(dbc, + bo->tlen, bo->pgno, np)) != 0) + return (ret); + space -= key_size; + key_off = (int32_t)(np - dbuf); + np += key_size; + } else { + if (need_pg) { + dp = np; + size = pagesize - HOFFSET(pg); + if (space < size) { +get_key_space: + /* Nothing added, then error. */ + if (offp == endp) { + data->size = (u_int32_t) + DB_ALIGN(size + + pagesize, 1024); + return + (DB_BUFFER_SMALL); + } + /* + * We need to back up to the + * last record put into the + * buffer so that it is + * CURRENT. + */ + if (indx != 0) + indx -= P_INDX; + else { + if ((ret = + __bam_get_prev( + dbc)) != 0) + return (ret); + indx = cp->indx; + pg = cp->page; + } + break; + } + /* + * Move the data part of the page + * to the buffer. + */ + memcpy(dp, + (u_int8_t *)pg + HOFFSET(pg), size); + need_pg = 0; + space -= size; + np += size; + } + key_size = bk->len; + key_off = (int32_t)((inp[indx] - HOFFSET(pg)) + + (dp - dbuf) + SSZA(BKEYDATA, data)); + pg_keyoff = inp[indx]; + } + } + + /* + * Reserve space for the pointers and sizes. + * Either key/data pair or just for a data item. + */ + space -= (is_key ? 4 : 2) * sizeof(*offp); + if (rec_key) + space -= sizeof(*offp); + + /* Check to see if space has underflowed. */ + if (space > data->ulen) + goto back_up; + + /* + * Determine if the next record is in the + * buffer already or if it needs to be copied in. + * If we have an off page dup, then copy as many + * as will fit into the buffer. + */ + bk = GET_BKEYDATA(dbc->dbp, pg, indx + adj - 1); + if (B_TYPE(bk->type) == B_DUPLICATE) { + bo = (BOVERFLOW *)bk; + if (is_key) { + *offp-- = (int32_t)key_off; + *offp-- = (int32_t)key_size; + } + /* + * We pass the offset of the current key. + * On return we check to see if offp has + * moved to see if any data fit. + */ + saveoffp = offp; + if ((ret = __bam_bulk_duplicates(dbc, bo->pgno, + dbuf, is_key ? offp + P_INDX : NULL, + &offp, &np, &space, no_dup)) != 0) { + if (ret == DB_BUFFER_SMALL) { + size = space; + space = 0; + /* If nothing was added, then error. */ + if (offp == saveoffp) { + offp += 2; + goto back_up; + } + goto get_space; + } + return (ret); + } + } else if (B_TYPE(bk->type) == B_OVERFLOW) { + bo = (BOVERFLOW *)bk; + size = bo->tlen; + if (size > space) + goto back_up; + if ((ret = + __bam_bulk_overflow(dbc, + bo->tlen, bo->pgno, np)) != 0) + return (ret); + space -= size; + if (is_key) { + *offp-- = (int32_t)key_off; + *offp-- = (int32_t)key_size; + } else if (rec_key) + *offp-- = (int32_t)cp->recno; + *offp-- = (int32_t)(np - dbuf); + np += size; + *offp-- = (int32_t)size; + } else { + if (need_pg) { + dp = np; + size = pagesize - HOFFSET(pg); + if (space < size) { +back_up: + /* + * Back up the index so that the + * last record in the buffer is CURRENT + */ + if (indx >= adj) + indx -= adj; + else { + if ((ret = + __bam_get_prev(dbc)) != 0 && + ret != DB_NOTFOUND) + return (ret); + indx = cp->indx; + pg = cp->page; + } + if (dbc->dbtype == DB_RECNO) + cp->recno--; +get_space: + /* + * See if we put anything in the + * buffer or if we are doing a DBP->get + * did we get all of the data. + */ + if (offp >= + (is_key ? &endp[-1] : endp) || + F_ISSET(dbc, DBC_FROM_DB_GET)) { + data->size = (u_int32_t) + DB_ALIGN(size + + data->ulen - space, 1024); + return (DB_BUFFER_SMALL); + } + break; + } + memcpy(dp, (u_int8_t *)pg + HOFFSET(pg), size); + need_pg = 0; + space -= size; + np += size; + } + /* + * Add the offsets and sizes to the end of the buffer. + * First add the key info then the data info. + */ + if (is_key) { + *offp-- = (int32_t)key_off; + *offp-- = (int32_t)key_size; + } else if (rec_key) + *offp-- = (int32_t)cp->recno; + *offp-- = (int32_t)((inp[indx + adj - 1] - HOFFSET(pg)) + + (dp - dbuf) + SSZA(BKEYDATA, data)); + *offp-- = bk->len; + } + if (dbc->dbtype == DB_RECNO) + cp->recno++; + else if (no_dup) { + while (indx + adj < NUM_ENT(pg) && + pg_keyoff == inp[indx + adj]) + indx += adj; + } + /* + * Stop when we either run off the page or we move to the next key and + * we are not returning multiple keys. + */ + } while ((indx += adj) < NUM_ENT(pg) && + (next_key || pg_keyoff == inp[indx])); + + /* If we are off the page then try to the next page. */ + if (ret == 0 && next_key && indx >= NUM_ENT(pg)) { + cp->indx = indx; + ret = __bamc_next(dbc, 0, 1); + if (ret == 0) + goto next_pg; + if (ret != DB_NOTFOUND) + return (ret); + } + + /* + * If we did a DBP->get we must error if we did not return + * all the data for the current key because there is + * no way to know if we did not get it all, nor any + * interface to fetch the balance. + */ + + if (ret == 0 && indx < pg->entries && + F_ISSET(dbc, DBC_TRANSIENT) && pg_keyoff == inp[indx]) { + data->size = (data->ulen - space) + size; + return (DB_BUFFER_SMALL); + } + /* + * Must leave the index pointing at the last record fetched. + * If we are not fetching keys, we may have stepped to the + * next key. + */ + if (ret == DB_BUFFER_SMALL || next_key || pg_keyoff == inp[indx]) + cp->indx = indx; + else + cp->indx = indx - P_INDX; + + if (rec_key == 1) + *offp = RECNO_OOB; + else + *offp = -1; + return (0); +} + +/* + * __bam_bulk_overflow -- + * Dump overflow record into the buffer. + * The space requirements have already been checked. + * PUBLIC: int __bam_bulk_overflow + * PUBLIC: __P((DBC *, u_int32_t, db_pgno_t, u_int8_t *)); + */ +int +__bam_bulk_overflow(dbc, len, pgno, dp) + DBC *dbc; + u_int32_t len; + db_pgno_t pgno; + u_int8_t *dp; +{ + DBT dbt; + + memset(&dbt, 0, sizeof(dbt)); + F_SET(&dbt, DB_DBT_USERMEM); + dbt.ulen = len; + dbt.data = (void *)dp; + return (__db_goff(dbc, &dbt, len, pgno, NULL, NULL)); +} + +/* + * __bam_bulk_duplicates -- + * Put as many off page duplicates as will fit into the buffer. + * This routine will adjust the cursor to reflect the position in + * the overflow tree. + * PUBLIC: int __bam_bulk_duplicates __P((DBC *, + * PUBLIC: db_pgno_t, u_int8_t *, int32_t *, + * PUBLIC: int32_t **, u_int8_t **, u_int32_t *, int)); + */ +int +__bam_bulk_duplicates(dbc, pgno, dbuf, keyoff, offpp, dpp, spacep, no_dup) + DBC *dbc; + db_pgno_t pgno; + u_int8_t *dbuf; + int32_t *keyoff, **offpp; + u_int8_t **dpp; + u_int32_t *spacep; + int no_dup; +{ + BKEYDATA *bk; + BOVERFLOW *bo; + BTREE_CURSOR *cp; + DB *dbp; + DBC *opd; + DBT key, data; + PAGE *pg; + db_indx_t indx, *inp; + int32_t *offp; + u_int32_t pagesize, size, space; + u_int8_t *dp, *np; + int first, need_pg, ret, t_ret; + + ret = 0; + + dbp = dbc->dbp; + cp = (BTREE_CURSOR *)dbc->internal; + opd = cp->opd; + + if (opd == NULL) { + if ((ret = __dbc_newopd(dbc, pgno, NULL, &opd)) != 0) + return (ret); + cp->opd = opd; + if ((ret = opd->am_get(opd, + &key, &data, DB_FIRST, NULL)) != 0) + goto close_opd; + } + + pagesize = opd->dbp->pgsize; + cp = (BTREE_CURSOR *)opd->internal; + space = *spacep; + /* Get current offset slot. */ + offp = *offpp; + + /* + * np is the next place to put data. + * dp is the beginning of the current page in the buffer. + */ + np = dp = *dpp; + first = 1; + indx = cp->indx; + + do { + /* Fetch the current record. No initial move. */ + if ((ret = __bamc_next(opd, 0, 0)) != 0) + break; + pg = cp->page; + indx = cp->indx; + inp = P_INP(dbp, pg); + /* We need to copy the page to the buffer. */ + need_pg = 1; + + do { + if (IS_DELETED(dbp, pg, indx)) + goto contin; + bk = GET_BKEYDATA(dbp, pg, indx); + space -= 2 * sizeof(*offp); + /* Allocate space for key if needed. */ + if (first == 0 && keyoff != NULL) + space -= 2 * sizeof(*offp); + + /* Did space underflow? */ + if (space > *spacep) { + ret = DB_BUFFER_SMALL; + if (first == 1) { + /* Get the absolute value. */ + space = -(int32_t)space; + space = *spacep + space; + if (need_pg) + space += pagesize - HOFFSET(pg); + } + break; + } + if (B_TYPE(bk->type) == B_OVERFLOW) { + bo = (BOVERFLOW *)bk; + size = bo->tlen; + if (size > space) { + ret = DB_BUFFER_SMALL; + space = *spacep + size; + break; + } + if (first == 0 && keyoff != NULL) { + *offp-- = keyoff[0]; + *offp-- = keyoff[-1]; + } + if ((ret = __bam_bulk_overflow(dbc, + bo->tlen, bo->pgno, np)) != 0) + return (ret); + space -= size; + *offp-- = (int32_t)(np - dbuf); + np += size; + } else { + if (need_pg) { + dp = np; + size = pagesize - HOFFSET(pg); + if (space < size) { + ret = DB_BUFFER_SMALL; + /* Return space required. */ + space = *spacep + size; + break; + } + memcpy(dp, + (u_int8_t *)pg + HOFFSET(pg), size); + need_pg = 0; + space -= size; + np += size; + } + if (first == 0 && keyoff != NULL) { + *offp-- = keyoff[0]; + *offp-- = keyoff[-1]; + } + size = bk->len; + *offp-- = (int32_t)((inp[indx] - HOFFSET(pg)) + + (dp - dbuf) + SSZA(BKEYDATA, data)); + } + *offp-- = (int32_t)size; + first = 0; + if (no_dup) + break; +contin: + indx++; + if (opd->dbtype == DB_RECNO) + cp->recno++; + } while (indx < NUM_ENT(pg)); + if (no_dup) + break; + cp->indx = indx; + + } while (ret == 0); + + /* Return the updated information. */ + *spacep = space; + *offpp = offp; + *dpp = np; + + /* + * If we ran out of space back up the pointer. + * If we did not return any dups or reached the end, close the opd. + */ + if (ret == DB_BUFFER_SMALL) { + if (opd->dbtype == DB_RECNO) { + if (--cp->recno == 0) + goto close_opd; + } else if (indx != 0) + cp->indx--; + else { + t_ret = __bamc_prev(opd); + if (t_ret == DB_NOTFOUND) + goto close_opd; + if (t_ret != 0) + ret = t_ret; + } + } else if (keyoff == NULL && ret == DB_NOTFOUND) { + cp->indx--; + if (opd->dbtype == DB_RECNO) + --cp->recno; + } else if (indx == 0 || ret == DB_NOTFOUND) { +close_opd: + if (ret == DB_NOTFOUND) + ret = 0; + if ((t_ret = __dbc_close(opd)) != 0 && ret == 0) + ret = t_ret; + ((BTREE_CURSOR *)dbc->internal)->opd = NULL; + } + if (ret == DB_NOTFOUND) + ret = 0; + + return (ret); +} + +/* + * __bam_getbothc -- + * Search for a matching data item on a join. + */ +static int +__bam_getbothc(dbc, data) + DBC *dbc; + DBT *data; +{ + BTREE_CURSOR *cp; + DB *dbp; + DB_MPOOLFILE *mpf; + int cmp, exact, ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + + /* + * Acquire the current page. We have at least a read-lock + * already. The caller may have set DB_RMW asking for a + * write lock, but upgrading to a write lock has no better + * chance of succeeding now instead of later, so don't try. + */ + if ((ret = __memp_fget(mpf, &cp->pgno, + dbc->thread_info, dbc->txn, 0, &cp->page)) != 0) + return (ret); + + /* + * An off-page duplicate cursor. Search the remaining duplicates + * for one which matches (do a normal btree search, then verify + * that the retrieved record is greater than the original one). + */ + if (F_ISSET(dbc, DBC_OPD)) { + /* + * Check to make sure the desired item comes strictly after + * the current position; if it doesn't, return DB_NOTFOUND. + */ + if ((ret = __bam_cmp(dbc, data, cp->page, cp->indx, + dbp->dup_compare == NULL ? __bam_defcmp : dbp->dup_compare, + &cmp)) != 0) + return (ret); + + if (cmp <= 0) + return (DB_NOTFOUND); + + /* Discard the current page, we're going to do a full search. */ + if ((ret = __memp_fput(mpf, + dbc->thread_info, cp->page, dbc->priority)) != 0) + return (ret); + cp->page = NULL; + + return (__bamc_search(dbc, + PGNO_INVALID, data, DB_GET_BOTH, &exact)); + } + + /* + * We're doing a DBC->get(DB_GET_BOTHC) and we're already searching + * a set of on-page duplicates (either sorted or unsorted). Continue + * a linear search from after the current position. + * + * (Note that we could have just finished a "set" of one duplicate, + * i.e. not a duplicate at all, but the following check will always + * return DB_NOTFOUND in this case, which is the desired behavior.) + */ + if (cp->indx + P_INDX >= NUM_ENT(cp->page) || + !IS_DUPLICATE(dbc, cp->indx, cp->indx + P_INDX)) + return (DB_NOTFOUND); + cp->indx += P_INDX; + + return (__bam_getboth_finddatum(dbc, data, DB_GET_BOTH)); +} + +#ifdef HAVE_COMPRESSION +/* + * __bam_getlte -- + * Search for the largest entry <= key/data - used by compression. + * + * data == NULL indicates the DB_SET_LTE flag + * data != NULL indicates the DB_GET_BOTH_LTE flag + * + * Only works for a primary cursor - not an OPD cursor. Handles the + * OPD manipulation as well - no need to return to the caller to + * perform more OPD movements. + */ +static int +__bam_getlte(dbc, key, data) + DBC *dbc; + DBT *key, *data; +{ + BTREE_CURSOR *cp, *ocp; + DB *dbp; + db_pgno_t pgno; + int exact, ret; + + dbp = dbc->dbp; + cp = (BTREE_CURSOR *)dbc->internal; + + /* Begin by searching for the key */ + ret = __bamc_search(dbc, PGNO_INVALID, key, DB_SET_RANGE, &exact); + if (ret == DB_NOTFOUND) + goto find_last; + if (ret != 0) + goto end; + + if (cp->indx == NUM_ENT(cp->page) || IS_CUR_DELETED(dbc)) { + /* + * Move to the next entry if we're past the end of the + * page or on a deleted entry. + */ + ret = __bamc_next(dbc, 0, 0); + if (ret == DB_NOTFOUND) + goto find_last; + if (ret != 0) + goto end; + + /* Check if we're still on the correct key */ + if ((ret = __bam_cmp(dbc, key, cp->page, cp->indx, + ((BTREE*)dbp->bt_internal)->bt_compare, &exact)) != 0) + goto end; + exact = (exact == 0); + } + + if (exact == 0) { + ret = __bam_get_prev(dbc); + goto end; + } + + if (__bam_isopd(dbc, &pgno)) { + /* + * We want to do unusual things with off-page duplicates, so + * deal with them here rather than returning to handle them. + */ + if ((ret = __dbc_newopd(dbc, pgno, cp->opd, &cp->opd)) != 0) + goto end; + + /* Search for the correct duplicate */ + ret = __bamc_search(cp->opd, PGNO_INVALID, data, + data == NULL ? DB_FIRST : DB_SET_RANGE, &exact); + if (ret == DB_NOTFOUND) + goto find_last_dup; + if (ret != 0) + goto end; + + ocp = (BTREE_CURSOR *)cp->opd->internal; + if (ocp->indx == NUM_ENT(ocp->page) || + IS_CUR_DELETED(cp->opd)) { + /* + * Move to the next entry if we're past the end of the + * page or on a deleted entry. + */ + ret = __bamc_next(cp->opd, 0, 0); + if (ret == DB_NOTFOUND) + goto find_last_dup; + if (ret != 0) + goto end; + + if (data != NULL) { + /* Check if we're still on the correct data */ + if ((ret = __bam_cmp( + dbc, data, ocp->page, ocp->indx, + dbp->dup_compare, &exact)) != 0) + goto end; + exact = (exact == 0); + } else + exact = 1; + } + + if (exact == 0) { + /* Move to the previous entry */ + ret = __bamc_prev(cp->opd); + if (ret == DB_NOTFOUND) { + if ((ret = __dbc_close(cp->opd)) != 0) + goto end; + cp->opd = NULL; + ret = __bam_get_prev(dbc); + } + } + } else if (data != NULL) { + /* + * If we got an exact match with on-page duplicates, we need to + * search in them. + */ + ret = __bam_getboth_finddatum(dbc, data, DB_GET_BOTH_RANGE); + if (ret == DB_NOTFOUND) + exact = 0; + else if (ret != 0) + goto end; + else { + /* Check if we're still on the correct data */ + if ((ret = __bam_cmp(dbc, data, cp->page, + cp->indx + O_INDX, dbp->dup_compare, &exact)) != 0) + goto end; + exact = (exact == 0); + } + + if (exact == 0) { + ret = __bam_get_prev(dbc); + } + } + + end: + return (ret); + + find_last: + if ((ret = __bamc_search( + dbc, PGNO_INVALID, NULL, DB_LAST, &exact)) != 0) + return (ret); + + if (__bam_isopd(dbc, &pgno)) { + if ((ret = __dbc_newopd(dbc, pgno, cp->opd, &cp->opd)) != 0) + return (ret); + find_last_dup: + if ((ret = __bamc_search( + cp->opd, PGNO_INVALID, NULL, DB_LAST, &exact)) != 0) + return (ret); + } + + return (ret); +} +#endif + +/* + * __bam_getboth_finddatum -- + * Find a matching on-page data item. + */ +static int +__bam_getboth_finddatum(dbc, data, flags) + DBC *dbc; + DBT *data; + u_int32_t flags; +{ + BTREE_CURSOR *cp; + DB *dbp; + db_indx_t base, lim, top; + int cmp, ret; + + dbp = dbc->dbp; + cp = (BTREE_CURSOR *)dbc->internal; + + cmp = 0; + + /* + * Called (sometimes indirectly) from DBC->get to search on-page data + * item(s) for a matching value. If the original flag was DB_GET_BOTH + * or DB_GET_BOTH_RANGE, the cursor is set to the first undeleted data + * item for the key. If the original flag was DB_GET_BOTHC, the cursor + * argument is set to the first data item we can potentially return. + * In both cases, there may or may not be additional duplicate data + * items to search. + * + * If the duplicates are not sorted, do a linear search. + */ + if (dbp->dup_compare == NULL) { + for (;; cp->indx += P_INDX) { + if (!IS_CUR_DELETED(dbc)) { + if ((ret = __bam_cmp( + dbc, data, cp->page, cp->indx + O_INDX, + __bam_defcmp, &cmp)) != 0) + return (ret); + if (cmp == 0) + return (0); + } + + if (cp->indx + P_INDX >= NUM_ENT(cp->page) || + !IS_DUPLICATE(dbc, cp->indx, cp->indx + P_INDX)) + break; + } + return (DB_NOTFOUND); + } + + /* + * If the duplicates are sorted, do a binary search. The reason for + * this is that large pages and small key/data pairs result in large + * numbers of on-page duplicates before they get pushed off-page. + * + * Find the top and bottom of the duplicate set. Binary search + * requires at least two items, don't loop if there's only one. + */ + for (base = top = cp->indx; top < NUM_ENT(cp->page); top += P_INDX) + if (!IS_DUPLICATE(dbc, cp->indx, top)) + break; + if (base == (top - P_INDX)) { + if ((ret = __bam_cmp(dbc, data, cp->page, + cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0) + return (ret); + if (cmp == 0 || (cmp < 0 && flags == DB_GET_BOTH_RANGE)) + return (0); + cp->indx = top; + return DB_NOTFOUND; + } + + for (lim = (top - base) / (db_indx_t)P_INDX; lim != 0; lim >>= 1) { + cp->indx = base + ((lim >> 1) * P_INDX); + if ((ret = __bam_cmp(dbc, data, cp->page, + cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0) + return (ret); + if (cmp == 0) { + /* + * XXX + * No duplicate duplicates in sorted duplicate sets, + * so there can be only one. + */ + if (!IS_CUR_DELETED(dbc)) + return (0); + break; + } + if (cmp > 0) { + base = cp->indx + P_INDX; + --lim; + } + } + + /* No match found; if we're looking for an exact match, we're done. */ + if (flags == DB_GET_BOTH) + return (DB_NOTFOUND); + + /* + * Base is the smallest index greater than the data item, may be zero + * or a last + O_INDX index, and may be deleted. Find an undeleted + * item. + */ + cp->indx = base; + while (cp->indx < top && IS_CUR_DELETED(dbc)) + cp->indx += P_INDX; + return (cp->indx < top ? 0 : DB_NOTFOUND); +} + +/* + * __bamc_put -- + * Put using a cursor. + */ +static int +__bamc_put(dbc, key, data, flags, pgnop) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; + db_pgno_t *pgnop; +{ + BTREE *t; + BTREE_CURSOR *cp; + DB *dbp; + DBT dbt; + DB_MPOOLFILE *mpf; + db_pgno_t root_pgno; + int cmp, exact, own, ret, stack; + u_int32_t iiop; + void *arg; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + root_pgno = cp->root; + +split: ret = stack = 0; + switch (flags) { + case DB_CURRENT: + if (F_ISSET(cp, C_DELETED)) + return (DB_NOTFOUND); + /* FALLTHROUGH */ + case DB_AFTER: + case DB_BEFORE: + iiop = flags; + own = 1; + + /* Acquire the current page with a write lock. */ + ACQUIRE_WRITE_LOCK(dbc, ret); + if (ret != 0) + goto err; + if (cp->page == NULL && (ret = __memp_fget(mpf, &cp->pgno, + dbc->thread_info, dbc->txn, 0, &cp->page)) != 0) + goto err; + break; + case DB_KEYFIRST: + case DB_KEYLAST: + case DB_NODUPDATA: + case DB_NOOVERWRITE: + case DB_OVERWRITE_DUP: + own = 0; + /* + * Searching off-page, sorted duplicate tree: do a tree search + * for the correct item; __bamc_search returns the smallest + * slot greater than the key, use it. + * + * See comment below regarding where we can start the search. + */ + if (F_ISSET(dbc, DBC_OPD)) { + if ((ret = __bamc_search(dbc, + F_ISSET(cp, C_RECNUM) ? cp->root : root_pgno, + data, flags, &exact)) != 0) + goto err; + stack = 1; + + /* Disallow "sorted" duplicate duplicates. */ + if (exact != 0) { + if (flags == DB_OVERWRITE_DUP || + IS_DELETED(dbp, cp->page, cp->indx)) { + iiop = DB_CURRENT; + break; + } + ret = __db_duperr(dbp, flags); + goto err; + } + iiop = DB_BEFORE; + break; + } + + /* + * Searching a btree. + * + * If we've done a split, we can start the search from the + * parent of the split page, which __bam_split returned + * for us in root_pgno, unless we're in a Btree with record + * numbering. In that case, we'll need the true root page + * in order to adjust the record count. + */ + if ((ret = __bamc_search(dbc, + F_ISSET(cp, C_RECNUM) ? cp->root : root_pgno, key, + flags == DB_KEYFIRST || dbp->dup_compare != NULL ? + DB_KEYFIRST : DB_KEYLAST, &exact)) != 0) + goto err; + stack = 1; + + /* + * If we don't have an exact match, __bamc_search returned + * the smallest slot greater than the key, use it. + */ + if (!exact) { + iiop = DB_KEYFIRST; + break; + + /* + * Check for NOOVERWRITE. It is possible that there + * is a key with an empty duplicate page attached. + */ + } else if (flags == DB_NOOVERWRITE && !IS_CUR_DELETED(dbc)) { + if (pgnop != NULL && __bam_isopd(dbc, pgnop)) + ret = __bam_opd_exists(dbc, *pgnop); + else + ret = DB_KEYEXIST; + if (ret != 0) + goto err; + } + + /* + * If duplicates aren't supported, replace the current item. + */ + if (!F_ISSET(dbp, DB_AM_DUP)) { + iiop = DB_CURRENT; + break; + } + + /* + * If we find a matching entry, it may be an off-page duplicate + * tree. Return the page number to our caller, we need a new + * cursor. + */ + if (pgnop != NULL && __bam_isopd(dbc, pgnop)) + goto done; + + /* If the duplicates aren't sorted, move to the right slot. */ + if (dbp->dup_compare == NULL) { + if (flags == DB_KEYFIRST) + iiop = DB_BEFORE; + else + for (;; cp->indx += P_INDX) + if (cp->indx + P_INDX >= + NUM_ENT(cp->page) || + !IS_DUPLICATE(dbc, cp->indx, + cp->indx + P_INDX)) { + iiop = DB_AFTER; + break; + } + break; + } + + /* + * We know that we're looking at the first of a set of sorted + * on-page duplicates. Walk the list to find the right slot. + */ + for (;; cp->indx += P_INDX) { + if ((ret = __bam_cmp(dbc, data, cp->page, + cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0) + goto err; + if (cmp < 0) { + iiop = DB_BEFORE; + break; + } + + /* Disallow "sorted" duplicate duplicates. */ + if (cmp == 0) { + if (flags == DB_OVERWRITE_DUP || + IS_DELETED(dbp, cp->page, cp->indx)) { + iiop = DB_CURRENT; + break; + } + ret = __db_duperr(dbp, flags); + goto err; + } + + if (cp->indx + P_INDX >= NUM_ENT(cp->page) || + P_INP(dbp, ((PAGE *)cp->page))[cp->indx] != + P_INP(dbp, ((PAGE *)cp->page))[cp->indx + P_INDX]) { + iiop = DB_AFTER; + break; + } + } + break; + default: + ret = __db_unknown_flag(dbp->env, "__bamc_put", flags); + goto err; + } + + switch (ret = __bam_iitem(dbc, key, data, iiop, 0)) { + case 0: + break; + case DB_NEEDSPLIT: + /* + * To split, we need a key for the page. Either use the key + * argument or get a copy of the key from the page. + */ + if (flags == DB_AFTER || + flags == DB_BEFORE || flags == DB_CURRENT) { + memset(&dbt, 0, sizeof(DBT)); + if ((ret = __db_ret(dbc, cp->page, 0, &dbt, + &dbc->my_rkey.data, &dbc->my_rkey.ulen)) != 0) + goto err; + arg = &dbt; + } else + arg = F_ISSET(dbc, DBC_OPD) ? data : key; + + /* + * Discard any locks and pinned pages (the locks are discarded + * even if we're running with transactions, as they lock pages + * that we're sorry we ever acquired). If stack is set and the + * cursor entries are valid, they point to the same entries as + * the stack, don't free them twice. + */ + if (stack) + ret = __bam_stkrel(dbc, STK_CLRDBC | STK_NOLOCK); + else + DISCARD_CUR(dbc, ret); + if (ret != 0) + goto err; + + /* + * SR [#6059] + * If we do not own a lock on the page any more, then clear the + * cursor so we don't point at it. Even if we call __bam_stkrel + * above we still may have entered the routine with the cursor + * positioned to a particular record. This is in the case + * where C_RECNUM is set. + */ + if (own == 0) { + cp->pgno = PGNO_INVALID; + cp->indx = 0; + } + + /* Split the tree. */ + if ((ret = __bam_split(dbc, arg, &root_pgno)) != 0) + return (ret); + + goto split; + default: + goto err; + } + +err: +done: /* + * If we inserted a key into the first or last slot of the tree, + * remember where it was so we can do it more quickly next time. + * If the tree has record numbers, we need a complete stack so + * that we can adjust the record counts, so skipping the tree search + * isn't possible. For subdatabases we need to be careful that the + * page does not move from one db to another, so we track its LSN. + * + * If there are duplicates and we are inserting into the last slot, + * the cursor will point _to_ the last item, not after it, which + * is why we subtract P_INDX below. + */ + + t = dbp->bt_internal; + if (ret == 0 && TYPE(cp->page) == P_LBTREE && + (flags == DB_KEYFIRST || flags == DB_KEYLAST) && + !F_ISSET(cp, C_RECNUM) && + (!F_ISSET(dbp, DB_AM_SUBDB) || + (LOGGING_ON(dbp->env) && !F_ISSET(dbp, DB_AM_NOT_DURABLE))) && + ((NEXT_PGNO(cp->page) == PGNO_INVALID && + cp->indx >= NUM_ENT(cp->page) - P_INDX) || + (PREV_PGNO(cp->page) == PGNO_INVALID && cp->indx == 0))) { + t->bt_lpgno = cp->pgno; + if (F_ISSET(dbp, DB_AM_SUBDB)) + t->bt_llsn = LSN(cp->page); + } else + t->bt_lpgno = PGNO_INVALID; + /* + * Discard any pages pinned in the tree and their locks, except for + * the leaf page. Note, the leaf page participated in any stack we + * acquired, and so we have to adjust the stack as necessary. If + * there was only a single page on the stack, we don't have to free + * further stack pages. + */ + if (stack && BT_STK_POP(cp) != NULL) + (void)__bam_stkrel(dbc, 0); + + /* + * Regardless of whether we were successful or not, clear the delete + * flag. If we're successful, we either moved the cursor or the item + * is no longer deleted. If we're not successful, then we're just a + * copy, no need to have the flag set. + * + * We may have instantiated off-page duplicate cursors during the put, + * so clear the deleted bit from the off-page duplicate cursor as well. + */ + F_CLR(cp, C_DELETED); + if (cp->opd != NULL) { + cp = (BTREE_CURSOR *)cp->opd->internal; + F_CLR(cp, C_DELETED); + } + + return (ret); +} + +/* + * __bamc_rget -- + * Return the record number for a cursor. + * + * PUBLIC: int __bamc_rget __P((DBC *, DBT *)); + */ +int +__bamc_rget(dbc, data) + DBC *dbc; + DBT *data; +{ + BTREE_CURSOR *cp; + DB *dbp; + DBT dbt; + DB_MPOOLFILE *mpf; + db_recno_t recno; + int exact, ret, t_ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + + /* + * Get the page with the current item on it. + * Get a copy of the key. + * Release the page, making sure we don't release it twice. + */ + if ((ret = __memp_fget(mpf, &cp->pgno, + dbc->thread_info, dbc->txn, 0, &cp->page)) != 0) + return (ret); + memset(&dbt, 0, sizeof(DBT)); + if ((ret = __db_ret(dbc, cp->page, cp->indx, &dbt, + &dbc->my_rkey.data, &dbc->my_rkey.ulen)) != 0) + goto err; + ret = __memp_fput(mpf, dbc->thread_info, cp->page, dbc->priority); + cp->page = NULL; + if (ret != 0) + return (ret); + + if ((ret = __bam_search(dbc, PGNO_INVALID, &dbt, + F_ISSET(dbc, DBC_RMW) ? SR_FIND_WR : SR_FIND, + 1, &recno, &exact)) != 0) + goto err; + + ret = __db_retcopy(dbc->env, data, + &recno, sizeof(recno), &dbc->rdata->data, &dbc->rdata->ulen); + + /* Release the stack. */ +err: if ((t_ret = __bam_stkrel(dbc, 0)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __bamc_writelock -- + * Upgrade the cursor to a write lock. + */ +static int +__bamc_writelock(dbc) + DBC *dbc; +{ + BTREE_CURSOR *cp; + int ret; + + cp = (BTREE_CURSOR *)dbc->internal; + + if (cp->lock_mode == DB_LOCK_WRITE) + return (0); + + /* + * When writing to an off-page duplicate tree, we need to have the + * appropriate page in the primary tree locked. The general DBC + * code calls us first with the primary cursor so we can acquire the + * appropriate lock. + */ + ACQUIRE_WRITE_LOCK(dbc, ret); + return (ret); +} + +/* + * __bamc_next -- + * Move to the next record. + */ +static int +__bamc_next(dbc, initial_move, deleted_okay) + DBC *dbc; + int initial_move, deleted_okay; +{ + BTREE_CURSOR *cp; + db_indx_t adjust; + db_lockmode_t lock_mode; + db_pgno_t pgno; + int ret; + + cp = (BTREE_CURSOR *)dbc->internal; + ret = 0; + + /* + * We're either moving through a page of duplicates or a btree leaf + * page. + * + * !!! + * This code handles empty pages and pages with only deleted entries. + */ + if (F_ISSET(dbc, DBC_OPD)) { + adjust = O_INDX; + lock_mode = DB_LOCK_NG; + } else { + adjust = dbc->dbtype == DB_BTREE ? P_INDX : O_INDX; + lock_mode = + F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE : DB_LOCK_READ; + } + if (cp->page == NULL) { + ACQUIRE_CUR(dbc, lock_mode, cp->pgno, 0, ret); + if (ret != 0) + return (ret); + } + + if (initial_move) + cp->indx += adjust; + + for (;;) { + /* + * If at the end of the page, move to a subsequent page. + * + * !!! + * Check for >= NUM_ENT. If the original search landed us on + * NUM_ENT, we may have incremented indx before the test. + */ + if (cp->indx >= NUM_ENT(cp->page)) { + if ((pgno = NEXT_PGNO(cp->page)) == PGNO_INVALID) + return (DB_NOTFOUND); + + ACQUIRE_CUR(dbc, lock_mode, pgno, 0, ret); + if (ret != 0) + return (ret); + cp->indx = 0; + continue; + } + if (!deleted_okay && IS_CUR_DELETED(dbc)) { + cp->indx += adjust; + continue; + } + break; + } + return (0); +} + +/* + * __bamc_prev -- + * Move to the previous record. + */ +static int +__bamc_prev(dbc) + DBC *dbc; +{ + BTREE_CURSOR *cp; + db_indx_t adjust; + db_lockmode_t lock_mode; + db_pgno_t pgno; + int ret; + + cp = (BTREE_CURSOR *)dbc->internal; + ret = 0; + + /* + * We're either moving through a page of duplicates or a btree leaf + * page. + * + * !!! + * This code handles empty pages and pages with only deleted entries. + */ + if (F_ISSET(dbc, DBC_OPD)) { + adjust = O_INDX; + lock_mode = DB_LOCK_NG; + } else { + adjust = dbc->dbtype == DB_BTREE ? P_INDX : O_INDX; + lock_mode = + F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE : DB_LOCK_READ; + } + if (cp->page == NULL) { + ACQUIRE_CUR(dbc, lock_mode, cp->pgno, 0, ret); + if (ret != 0) + return (ret); + } + + for (;;) { + /* If at the beginning of the page, move to a previous one. */ + if (cp->indx == 0) { + if ((pgno = + PREV_PGNO(cp->page)) == PGNO_INVALID) + return (DB_NOTFOUND); + + ACQUIRE_CUR(dbc, lock_mode, pgno, 0, ret); + if (ret != 0) + return (ret); + + if ((cp->indx = NUM_ENT(cp->page)) == 0) + continue; + } + + /* Ignore deleted records. */ + cp->indx -= adjust; + if (IS_CUR_DELETED(dbc)) + continue; + + break; + } + return (0); +} + +/* + * __bamc_search -- + * Move to a specified record. + */ +static int +__bamc_search(dbc, root_pgno, key, flags, exactp) + DBC *dbc; + db_pgno_t root_pgno; + const DBT *key; + u_int32_t flags; + int *exactp; +{ + BTREE *t; + BTREE_CURSOR *cp; + DB *dbp; + PAGE *h; + db_indx_t base, indx, *inp, lim; + db_pgno_t bt_lpgno; + db_recno_t recno; + u_int32_t sflags; + int bulk, cmp, ret, t_ret; + + COMPQUIET(cmp, 0); + dbp = dbc->dbp; + cp = (BTREE_CURSOR *)dbc->internal; + t = dbp->bt_internal; + ret = 0; + bulk = (F_ISSET(dbc, DBC_BULK) && cp->pgno != PGNO_INVALID); + + /* + * Find an entry in the database. Discard any lock we currently hold, + * we're going to search the tree. + */ + DISCARD_CUR(dbc, ret); + if (ret != 0) + return (ret); + + switch (flags) { + case DB_FIRST: + sflags = (F_ISSET(dbc, DBC_RMW) ? SR_WRITE : SR_READ) | SR_MIN; + goto search; + case DB_LAST: + sflags = (F_ISSET(dbc, DBC_RMW) ? SR_WRITE : SR_READ) | SR_MAX; + goto search; + case DB_SET_RECNO: + if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0) + return (ret); + sflags = + (F_ISSET(dbc, DBC_RMW) ? SR_FIND_WR : SR_FIND) | SR_EXACT; + if ((ret = __bam_rsearch(dbc, &recno, sflags, 1, exactp)) != 0) + return (ret); + goto done; + case DB_SET: + case DB_GET_BOTH: + sflags = + (F_ISSET(dbc, DBC_RMW) ? SR_FIND_WR : SR_FIND) | SR_EXACT; + if (bulk) + break; + goto search; + case DB_GET_BOTH_RANGE: + sflags = (F_ISSET(dbc, DBC_RMW) ? SR_FIND_WR : SR_FIND); + goto search; + case DB_SET_RANGE: + sflags = + (F_ISSET(dbc, DBC_RMW) ? SR_WRITE : SR_READ) | SR_DUPFIRST; + goto search; + case DB_KEYFIRST: + case DB_NOOVERWRITE: + sflags = SR_KEYFIRST; + break; + case DB_KEYLAST: + case DB_NODUPDATA: + case DB_OVERWRITE_DUP: + sflags = SR_KEYLAST; + break; + default: + return (__db_unknown_flag(dbp->env, "__bamc_search", flags)); + } + + /* + * If the application has a history of inserting into the first or last + * pages of the database, we check those pages first to avoid doing a + * full search. Similarly, if the cursor is configured as a bulk + * cursor, check whether this operation belongs on the same page as the + * last one. + */ + if (bulk) + bt_lpgno = cp->pgno; + else { + if (F_ISSET(dbc, DBC_OPD)) + goto search; + + /* + * !!! + * We do not mutex protect the t->bt_lpgno field, which means + * that it can only be used in an advisory manner. If we find + * page we can use, great. If we don't, we don't care, we do + * it the slow way instead. Regardless, copy it into a local + * variable, otherwise we might acquire a lock for a page and + * then read a different page because it changed underfoot. + */ + bt_lpgno = t->bt_lpgno; + } + + /* + * If the tree has no history of insertion, do it the slow way. + */ + if (bt_lpgno == PGNO_INVALID) + goto search; + + /* + * Lock and retrieve the page on which we last inserted. + * + * The page may not exist: if a transaction created the page + * and then aborted, the page might have been truncated from + * the end of the file. We don't want to wait on the lock. + * The page may not even be relevant to this search. + */ + h = NULL; + ACQUIRE_CUR(dbc, DB_LOCK_WRITE, bt_lpgno, DB_LOCK_NOWAIT, ret); + if (ret != 0) { + if (ret == DB_LOCK_DEADLOCK || + ret == DB_LOCK_NOTGRANTED || + ret == DB_PAGE_NOTFOUND) + ret = 0; + goto fast_miss; + } + + h = cp->page; + inp = P_INP(dbp, h); + + /* + * It's okay if the page type isn't right or it's empty, it + * just means that the world changed. + */ + if (TYPE(h) != P_LBTREE || NUM_ENT(h) == 0) + goto fast_miss; + + /* Verify that this page cannot have moved to another db. */ + if (F_ISSET(dbp, DB_AM_SUBDB) && + LOG_COMPARE(&t->bt_llsn, &LSN(h)) != 0) + goto fast_miss; + + /* + * What we do here is test to see if we're at the beginning or + * end of the tree and if the new item sorts before/after the + * first/last page entry. We only try to catch inserts into + * the middle of the tree for bulk cursors. + */ + if (h->next_pgno == PGNO_INVALID) { + indx = NUM_ENT(h) - P_INDX; + if ((ret = __bam_cmp(dbc, key, h, indx, + t->bt_compare, &cmp)) != 0) + goto fast_miss; + if (cmp > 0) { + if (FLD_ISSET(sflags, SR_EXACT)) + return (DB_NOTFOUND); + else + indx += P_INDX; + } + if (cmp >= 0) + goto fast_hit; + } + if (h->prev_pgno == PGNO_INVALID) { + indx = 0; + if ((ret = __bam_cmp(dbc, key, h, indx, + t->bt_compare, &cmp)) != 0) + goto fast_miss; + if (cmp < 0 && FLD_ISSET(sflags, SR_EXACT)) + return (DB_NOTFOUND); + if (cmp <= 0) + goto fast_hit; + } + if (bulk) { + DB_BINARY_SEARCH_FOR(base, lim, NUM_ENT(h), P_INDX) { + DB_BINARY_SEARCH_INCR(indx, base, lim, P_INDX); + if ((ret = __bam_cmp(dbc, key, h, indx, + t->bt_compare, &cmp)) != 0) + goto fast_miss; + + if (cmp == 0) + goto fast_hit; + if (cmp > 0) + DB_BINARY_SEARCH_SHIFT_BASE(indx, base, + lim, P_INDX); + } + /* + * No match found: base is the smallest index greater than + * the key and may be zero or NUM_ENT(h). + */ + indx = base; + if (indx > 0 && indx < NUM_ENT(h)) { + if (FLD_ISSET(sflags, SR_EXACT)) + return (DB_NOTFOUND); + goto fast_hit; + } + } + goto fast_miss; + +fast_hit: + if (cmp == 0) { + /* + * Found a duplicate. Deal with DB_KEYFIRST / DB_KEYLAST. + */ + if (FLD_ISSET(sflags, SR_DUPFIRST)) + while (indx > 0 && inp[indx - P_INDX] == inp[indx]) + indx -= P_INDX; + else if (FLD_ISSET(sflags, SR_DUPLAST)) + while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) && + inp[indx] == inp[indx + P_INDX]) + indx += P_INDX; + } + + /* Set the exact match flag, we may have found a duplicate. */ + *exactp = (cmp == 0); + + /* + * Insert the entry in the stack. (Our caller is likely to + * call __bam_stkrel() after our return.) + */ + BT_STK_CLR(cp); + BT_STK_ENTER(dbp->env, + cp, h, indx, cp->lock, cp->lock_mode, ret); + if (ret != 0) + return (ret); + goto done; + +fast_miss: + /* + * This was not the right page, so we do not need to retain + * the lock even in the presence of transactions. + * + * This is also an error path, so ret may have been set. + */ + DISCARD_CUR(dbc, ret); + cp->pgno = PGNO_INVALID; + if ((t_ret = __LPUT(dbc, cp->lock)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + return (ret); + +search: + if ((ret = __bam_search(dbc, root_pgno, + key, sflags, 1, NULL, exactp)) != 0) + return (ret); + +done: /* Initialize the cursor from the stack. */ + cp->page = cp->csp->page; + cp->pgno = cp->csp->page->pgno; + cp->indx = cp->csp->indx; + cp->lock = cp->csp->lock; + cp->lock_mode = cp->csp->lock_mode; + + /* If on an empty page or a deleted record, move to the next one. */ + if (flags == DB_FIRST && + (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(dbc))) + if ((ret = __bamc_next(dbc, 0, 0)) != 0) + return (ret); + if (flags == DB_LAST && + (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(dbc))) + if ((ret = __bamc_prev(dbc)) != 0) + return (ret); + + return (0); +} + +/* + * __bamc_physdel -- + * Physically remove an item from the page. + */ +static int +__bamc_physdel(dbc) + DBC *dbc; +{ + BTREE_CURSOR *cp; + DB *dbp; + DBT key; + DB_LOCK next_lock, prev_lock; + db_pgno_t pgno; + int delete_page, empty_page, exact, ret; + + dbp = dbc->dbp; + memset(&key, 0, sizeof(DBT)); + cp = (BTREE_CURSOR *)dbc->internal; + delete_page = empty_page = ret = 0; + LOCK_INIT(next_lock); + LOCK_INIT(prev_lock); + + /* If the page is going to be emptied, consider deleting it. */ + delete_page = empty_page = + NUM_ENT(cp->page) == (TYPE(cp->page) == P_LBTREE ? 2 : 1); + + /* + * Check if the application turned off reverse splits. Applications + * can't turn off reverse splits in off-page duplicate trees, that + * space will never be reused unless the exact same key is specified. + */ + if (delete_page && + !F_ISSET(dbc, DBC_OPD) && F_ISSET(dbp, DB_AM_REVSPLITOFF)) + delete_page = 0; + + /* + * We never delete the last leaf page. (Not really true -- we delete + * the last leaf page of off-page duplicate trees, but that's handled + * by our caller, not down here.) + */ + if (delete_page && cp->pgno == BAM_ROOT_PGNO(dbc)) + delete_page = 0; + + /* + * To delete a leaf page other than an empty root page, we need a + * copy of a key from the page. Use the 0th page index since it's + * the last key the page held. + * + * !!! + * Note that because __bamc_physdel is always called from a cursor + * close, it should be safe to use the cursor's own "my_rkey" memory + * to temporarily hold this key. We shouldn't own any returned-data + * memory of interest--if we do, we're in trouble anyway. + */ + if (delete_page) { + if ((ret = __db_ret(dbc, cp->page, 0, &key, + &dbc->my_rkey.data, &dbc->my_rkey.ulen)) != 0) + goto err; + } + + /* + * Delete the items. If page isn't empty, we adjust the cursors. + * + * !!! + * The following operations to delete a page may deadlock. The easy + * scenario is if we're deleting an item because we're closing cursors + * because we've already deadlocked and want to call txn->abort. If + * we fail due to deadlock, we'll leave a locked, possibly empty page + * in the tree, which won't be empty long because we'll undo the delete + * when we undo the transaction's modifications. + * + * !!! + * Delete the key item first, otherwise the on-page duplicate checks + * in __bam_ditem() won't work! + */ + if ((ret = __memp_dirty(dbp->mpf, + &cp->page, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err; + if (TYPE(cp->page) == P_LBTREE) { + if ((ret = __bam_ditem(dbc, cp->page, cp->indx)) != 0) + goto err; + if (!empty_page) + if ((ret = __bam_ca_di(dbc, + PGNO(cp->page), cp->indx, -1)) != 0) + goto err; + } + if ((ret = __bam_ditem(dbc, cp->page, cp->indx)) != 0) + goto err; + + /* Clear the deleted flag, the item is gone. */ + F_CLR(cp, C_DELETED); + + if (!empty_page) + if ((ret = __bam_ca_di(dbc, PGNO(cp->page), cp->indx, -1)) != 0) + goto err; + + /* + * Need to downgrade write locks here or non-txn locks will get stuck. + */ + if (F_ISSET(dbc->dbp, DB_AM_READ_UNCOMMITTED)) { + if ((ret = __TLPUT(dbc, cp->lock)) != 0) + goto err; + cp->lock_mode = DB_LOCK_WWRITE; + if (cp->page != NULL && + (ret = __memp_shared(dbp->mpf, cp->page)) != 0) + goto err; + } + /* If we're not going to try and delete the page, we're done. */ + if (!delete_page) + return (0); + + /* + * Lock the previous and next pages before latching the parent + * sub tree. + */ + if (STD_LOCKING(dbc)) { + if ((pgno = PREV_PGNO(cp->page)) != PGNO_INVALID && + (ret = __db_lget(dbc, + 0, pgno, DB_LOCK_WRITE, 0, &prev_lock)) != 0) + return (ret); + if ((pgno = NEXT_PGNO(cp->page)) != PGNO_INVALID && + (ret = __db_lget(dbc, + 0, pgno, DB_LOCK_WRITE, 0, &next_lock)) != 0) { + (void)__TLPUT(dbc, next_lock); + return (ret); + } + } + DISCARD_CUR(dbc, ret); + if (ret != 0) + goto err; + ret = __bam_search(dbc, PGNO_INVALID, &key, SR_DEL, 0, NULL, &exact); + + /* + * If everything worked, delete the stack, otherwise, release the + * stack and page locks without further damage. + */ + if (ret == 0) + ret = __bam_dpages(dbc, 1, BTD_RELINK); + else + (void)__bam_stkrel(dbc, 0); + +err: if (ret != 0) + F_SET(dbc, DBC_ERROR); + (void)__TLPUT(dbc, prev_lock); + (void)__TLPUT(dbc, next_lock); + return (ret); +} + +/* + * __bamc_getstack -- + * Acquire a full stack for a cursor. + */ +static int +__bamc_getstack(dbc) + DBC *dbc; +{ + BTREE_CURSOR *cp; + DB *dbp; + DBT dbt; + DB_MPOOLFILE *mpf; + PAGE *h; + int exact, ret, t_ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + + /* + * Get the page with the current item on it. The caller of this + * routine has to already hold a read lock on the page, so there + * is no additional lock to acquire. + */ + if ((ret = __memp_fget(mpf, &cp->pgno, + dbc->thread_info, dbc->txn, 0, &h)) != 0) + return (ret); + + /* Get a copy of a key from the page. */ + memset(&dbt, 0, sizeof(DBT)); + ret = __db_ret(dbc, h, 0, &dbt, + &dbc->my_rkey.data, &dbc->my_rkey.ulen); + if ((t_ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + return (ret); + + /* Get a write-locked stack for the page. */ + exact = 0; + ret = __bam_search(dbc, PGNO_INVALID, + &dbt, SR_KEYFIRST, 1, NULL, &exact); + + return (ret); +} + +/* + * __bam_isopd -- + * Return if the cursor references an off-page duplicate tree via its + * page number. + */ +static int +__bam_isopd(dbc, pgnop) + DBC *dbc; + db_pgno_t *pgnop; +{ + BOVERFLOW *bo; + + if (TYPE(dbc->internal->page) != P_LBTREE) + return (0); + + bo = GET_BOVERFLOW(dbc->dbp, + dbc->internal->page, dbc->internal->indx + O_INDX); + if (B_TYPE(bo->type) == B_DUPLICATE) { + *pgnop = bo->pgno; + return (1); + } + return (0); +} + +/* + * __bam_opd_exists -- + * Return if the current position has any data. + * PUBLIC: int __bam_opd_exists __P((DBC *, db_pgno_t)); + */ +int +__bam_opd_exists(dbc, pgno) + DBC *dbc; + db_pgno_t pgno; +{ + PAGE *h; + int ret; + + if ((ret = __memp_fget(dbc->dbp->mpf, &pgno, + dbc->thread_info, dbc->txn, 0, &h)) != 0) + return (ret); + + /* + * We always collapse OPD trees so we only need to check + * the number of entries on the root. If there is a non-empty + * tree then there will be duplicates. + */ + if (NUM_ENT(h) == 0) + ret = 0; + else + ret = DB_KEYEXIST; + + (void)__memp_fput(dbc->dbp->mpf, dbc->thread_info, h, dbc->priority); + + return (ret); +} diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c new file mode 100644 index 00000000..6e1baa66 --- /dev/null +++ b/src/btree/bt_delete.c @@ -0,0 +1,541 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +/* + * __bam_ditem -- + * Delete one or more entries from a page. + * + * PUBLIC: int __bam_ditem __P((DBC *, PAGE *, u_int32_t)); + */ +int +__bam_ditem(dbc, h, indx) + DBC *dbc; + PAGE *h; + u_int32_t indx; +{ + BINTERNAL *bi; + BKEYDATA *bk; + DB *dbp; + u_int32_t nbytes; + int ret; + db_indx_t *inp; + + dbp = dbc->dbp; + inp = P_INP(dbp, h); + + /* The page should already have been dirtied by our caller. */ + DB_ASSERT(dbp->env, IS_DIRTY(h)); + + switch (TYPE(h)) { + case P_IBTREE: + bi = GET_BINTERNAL(dbp, h, indx); + switch (B_TYPE(bi->type)) { + case B_DUPLICATE: + case B_KEYDATA: + nbytes = BINTERNAL_SIZE(bi->len); + break; + case B_OVERFLOW: + nbytes = BINTERNAL_SIZE(bi->len); + if ((ret = + __db_doff(dbc, ((BOVERFLOW *)bi->data)->pgno)) != 0) + return (ret); + break; + default: + return (__db_pgfmt(dbp->env, PGNO(h))); + } + break; + case P_IRECNO: + nbytes = RINTERNAL_SIZE; + break; + case P_LBTREE: + /* + * If it's a duplicate key, discard the index and don't touch + * the actual page item. + * + * !!! + * This works because no data item can have an index matching + * any other index so even if the data item is in a key "slot", + * it won't match any other index. + */ + if ((indx % 2) == 0) { + /* + * Check for a duplicate after us on the page. NOTE: + * we have to delete the key item before deleting the + * data item, otherwise the "indx + P_INDX" calculation + * won't work! + */ + if (indx + P_INDX < (u_int32_t)NUM_ENT(h) && + inp[indx] == inp[indx + P_INDX]) + return (__bam_adjindx(dbc, + h, indx, indx + O_INDX, 0)); + /* + * Check for a duplicate before us on the page. It + * doesn't matter if we delete the key item before or + * after the data item for the purposes of this one. + */ + if (indx > 0 && inp[indx] == inp[indx - P_INDX]) + return (__bam_adjindx(dbc, + h, indx, indx - P_INDX, 0)); + } + /* FALLTHROUGH */ + case P_LDUP: + case P_LRECNO: + bk = GET_BKEYDATA(dbp, h, indx); + switch (B_TYPE(bk->type)) { + case B_DUPLICATE: + nbytes = BOVERFLOW_SIZE; + break; + case B_OVERFLOW: + nbytes = BOVERFLOW_SIZE; + if ((ret = __db_doff( + dbc, (GET_BOVERFLOW(dbp, h, indx))->pgno)) != 0) + return (ret); + break; + case B_KEYDATA: + nbytes = BKEYDATA_SIZE(bk->len); + break; + default: + return (__db_pgfmt(dbp->env, PGNO(h))); + } + break; + default: + return (__db_pgfmt(dbp->env, PGNO(h))); + } + + /* Delete the item and mark the page dirty. */ + if ((ret = __db_ditem(dbc, h, indx, nbytes)) != 0) + return (ret); + + return (0); +} + +/* + * __bam_adjindx -- + * Adjust an index on the page. + * + * PUBLIC: int __bam_adjindx __P((DBC *, PAGE *, u_int32_t, u_int32_t, int)); + */ +int +__bam_adjindx(dbc, h, indx, indx_copy, is_insert) + DBC *dbc; + PAGE *h; + u_int32_t indx, indx_copy; + int is_insert; +{ + DB *dbp; + db_indx_t copy, *inp; + int ret; + + dbp = dbc->dbp; + inp = P_INP(dbp, h); + + /* Log the change. */ + if (DBC_LOGGING(dbc)) { + if ((ret = __bam_adj_log(dbp, dbc->txn, &LSN(h), 0, + PGNO(h), &LSN(h), indx, indx_copy, (u_int32_t)is_insert)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(h)); + + /* Shuffle the indices and mark the page dirty. */ + if (is_insert) { + copy = inp[indx_copy]; + if (indx != NUM_ENT(h)) + memmove(&inp[indx + O_INDX], &inp[indx], + sizeof(db_indx_t) * (NUM_ENT(h) - indx)); + inp[indx] = copy; + ++NUM_ENT(h); + } else { + --NUM_ENT(h); + if (indx != NUM_ENT(h)) + memmove(&inp[indx], &inp[indx + O_INDX], + sizeof(db_indx_t) * (NUM_ENT(h) - indx)); + } + + return (0); +} + +/* + * __bam_dpages -- + * Delete a set of locked pages. + * + * PUBLIC: int __bam_dpages __P((DBC *, int, int)); + */ +int +__bam_dpages(dbc, use_top, flags) + DBC *dbc; + int use_top; + int flags; +{ + BINTERNAL *bi; + BTREE_CURSOR *cp; + DB *dbp; + DBT a, b; + DB_LOCK c_lock, p_lock; + DB_MPOOLFILE *mpf; + EPG *epg, *save_sp, *stack_epg; + PAGE *child, *parent; + db_indx_t nitems; + db_pgno_t pgno, root_pgno; + db_recno_t rcnt; + int done, ret, t_ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + nitems = 0; + pgno = PGNO_INVALID; + + /* + * We have the entire stack of deletable pages locked. + * + * Btree calls us with the first page in the stack is to have a + * single item deleted, and the rest of the pages are to be removed. + * + * Recno always has a stack to the root and __bam_merge operations + * may have unneeded items in the sack. We find the lowest page + * in the stack that has more than one record in it and start there. + */ + ret = 0; + if (use_top) + stack_epg = cp->sp; + else + for (stack_epg = cp->csp; stack_epg > cp->sp; --stack_epg) + if (NUM_ENT(stack_epg->page) > 1) + break; + epg = stack_epg; + /* + * !!! + * There is an interesting deadlock situation here. We have to relink + * the leaf page chain around the leaf page being deleted. Consider + * a cursor walking through the leaf pages, that has the previous page + * read-locked and is waiting on a lock for the page we're deleting. + * It will deadlock here. Before we unlink the subtree, we relink the + * leaf page chain. + */ + if (LF_ISSET(BTD_RELINK) && LEVEL(cp->csp->page) == 1 && + (ret = __db_relink(dbc, cp->csp->page, NULL, PGNO_INVALID)) != 0) + goto discard; + + /* + * Delete the last item that references the underlying pages that are + * to be deleted, and adjust cursors that reference that page. Then, + * save that page's page number and item count and release it. If + * the application isn't retaining locks because it's running without + * transactions, this lets the rest of the tree get back to business + * immediately. + */ + if ((ret = __memp_dirty(mpf, + &epg->page, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto discard; + if ((ret = __bam_ditem(dbc, epg->page, epg->indx)) != 0) + goto discard; + if ((ret = __bam_ca_di(dbc, PGNO(epg->page), epg->indx, -1)) != 0) + goto discard; + + if (LF_ISSET(BTD_UPDATE) && epg->indx == 0) { + save_sp = cp->csp; + cp->csp = epg; + ret = __bam_pupdate(dbc, epg->page); + cp->csp = save_sp; + if (ret != 0) + goto discard; + } + + pgno = PGNO(epg->page); + nitems = NUM_ENT(epg->page); + + ret = __memp_fput(mpf, dbc->thread_info, epg->page, dbc->priority); + epg->page = NULL; + if ((t_ret = __TLPUT(dbc, epg->lock)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err_inc; + + /* Then, discard any pages that we don't care about. */ +discard: for (epg = cp->sp; epg < stack_epg; ++epg) { + if ((t_ret = __memp_fput(mpf, dbc->thread_info, + epg->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + epg->page = NULL; + if ((t_ret = __TLPUT(dbc, epg->lock)) != 0 && ret == 0) + ret = t_ret; + } + if (ret != 0) + goto err; + + /* Free the rest of the pages in the stack. */ + while (++epg <= cp->csp) { + if ((ret = __memp_dirty(mpf, &epg->page, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err; + /* + * Delete page entries so they will be restored as part of + * recovery. We don't need to do cursor adjustment here as + * the pages are being emptied by definition and so cannot + * be referenced by a cursor. + */ + if (NUM_ENT(epg->page) != 0) { + DB_ASSERT(dbp->env, LEVEL(epg->page) != 1); + + if ((ret = __bam_ditem(dbc, epg->page, epg->indx)) != 0) + goto err; + /* + * Sheer paranoia: if we find any pages that aren't + * emptied by the delete, someone else added an item + * while we were walking the tree, and we discontinue + * the delete. Shouldn't be possible, but we check + * regardless. + */ + if (NUM_ENT(epg->page) != 0) + goto err; + } + + ret = __db_free(dbc, epg->page, 0); + if (cp->page == epg->page) + cp->page = NULL; + epg->page = NULL; + if ((t_ret = __TLPUT(dbc, epg->lock)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err_inc; + } + + if (0) { +err_inc: ++epg; +err: for (; epg <= cp->csp; ++epg) { + if (epg->page != NULL) { + (void)__memp_fput(mpf, dbc->thread_info, + epg->page, dbc->priority); + epg->page = NULL; + } + (void)__TLPUT(dbc, epg->lock); + } + BT_STK_CLR(cp); + return (ret); + } + BT_STK_CLR(cp); + + /* + * If we just deleted the next-to-last item from the root page, the + * tree can collapse one or more levels. While there remains only a + * single item on the root page, write lock the last page referenced + * by the root page and copy it over the root page. + * Note that if pgno is the root of a btree database then the root + * cannot change as we have it locked. + */ + if (nitems != 1) + return (0); + root_pgno = BAM_ROOT_PGNO(dbc); + if (pgno != root_pgno) + return (0); + + for (done = 0; !done;) { + /* Initialize. */ + parent = child = NULL; + LOCK_INIT(p_lock); + LOCK_INIT(c_lock); + + /* Get the root. */ + root_pgno = cp->root; + BAM_GET_ROOT(dbc, root_pgno, + parent, DB_MPOOL_DIRTY, DB_LOCK_WRITE, p_lock, ret); + + DB_ASSERT(dbp->env, parent != NULL); + if (ret != 0 || NUM_ENT(parent) != 1) + goto stop; + + switch (TYPE(parent)) { + case P_IBTREE: + /* + * If this is overflow, then try to delete it. + * The child may or may not still point at it. + */ + bi = GET_BINTERNAL(dbp, parent, 0); + if (B_TYPE(bi->type) == B_OVERFLOW) + if ((ret = __db_doff(dbc, + ((BOVERFLOW *)bi->data)->pgno)) != 0) + goto stop; + pgno = bi->pgno; + break; + case P_IRECNO: + pgno = GET_RINTERNAL(dbp, parent, 0)->pgno; + break; + default: + goto stop; + } + + /* Lock the child page. */ + if ((ret = + __db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, &c_lock)) != 0) + goto stop; + if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_DIRTY, &child)) != 0) + goto stop; + + /* Log the change. */ + if (DBC_LOGGING(dbc)) { + memset(&a, 0, sizeof(a)); + a.data = child; + a.size = dbp->pgsize; + memset(&b, 0, sizeof(b)); + b.data = P_ENTRY(dbp, parent, 0); + b.size = TYPE(parent) == P_IRECNO ? RINTERNAL_SIZE : + BINTERNAL_SIZE(((BINTERNAL *)b.data)->len); + if ((ret = __bam_rsplit_log(dbp, dbc->txn, + &child->lsn, 0, PGNO(child), &a, PGNO(parent), + RE_NREC(parent), &b, &parent->lsn)) != 0) + goto stop; + } else + LSN_NOT_LOGGED(child->lsn); + + /* + * Make the switch. + * + * One fixup -- internal pages below the top level do not store + * a record count, so we have to preserve it if we're not + * converting to a leaf page. Note also that we are about to + * overwrite the parent page, including its LSN. This is OK + * because the log message we wrote describing this update + * stores its LSN on the child page. When the child is copied + * onto the parent, the correct LSN is copied into place. + */ + COMPQUIET(rcnt, 0); + if (F_ISSET(cp, C_RECNUM) && LEVEL(child) > LEAFLEVEL) + rcnt = RE_NREC(parent); + memcpy(parent, child, dbp->pgsize); + PGNO(parent) = root_pgno; + if (F_ISSET(cp, C_RECNUM) && LEVEL(child) > LEAFLEVEL) + RE_NREC_SET(parent, rcnt); + + /* Adjust the cursors. */ + if ((ret = __bam_ca_rsplit(dbc, PGNO(child), root_pgno)) != 0) + goto stop; + + /* + * Free the page copied onto the root page and discard its + * lock. (The call to __db_free() discards our reference + * to the page.) + */ + if ((ret = __db_free(dbc, child, 0)) != 0) { + child = NULL; + goto stop; + } + child = NULL; + + if (0) { +stop: done = 1; + } + if ((t_ret = __TLPUT(dbc, p_lock)) != 0 && ret == 0) + ret = t_ret; + if (parent != NULL && + (t_ret = __memp_fput(mpf, dbc->thread_info, + parent, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, c_lock)) != 0 && ret == 0) + ret = t_ret; + if (child != NULL && + (t_ret = __memp_fput(mpf, dbc->thread_info, + child, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + } + + return (ret); +} + +/* + * __bam_pupdate -- + * Update parent key pointers up the tree. + * + * PUBLIC: int __bam_pupdate __P((DBC *, PAGE *)); + */ +int +__bam_pupdate(dbc, lpg) + DBC *dbc; + PAGE *lpg; +{ + BTREE_CURSOR *cp; + ENV *env; + EPG *epg; + int ret; + + env = dbc->env; + cp = (BTREE_CURSOR *)dbc->internal; + ret = 0; + + /* + * Update the parents up the tree. __bam_pinsert only looks at the + * left child if is a leaf page, so we don't need to change it. We + * just do a delete and insert; a replace is possible but reusing + * pinsert is better. + */ + for (epg = &cp->csp[-1]; epg >= cp->sp; epg--) { + if ((ret = __memp_dirty(dbc->dbp->mpf, &epg->page, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + return (ret); + epg->indx--; + if ((ret = __bam_pinsert(dbc, epg, 0, + lpg, epg[1].page, BPI_NORECNUM | BPI_REPLACE)) != 0) { + if (ret == DB_NEEDSPLIT) { + /* This should not happen. */ + __db_errx(env, DB_STR_A("1020", + "Not enough room in parent: %s: page %lu", + "%s %lu"), dbc->dbp->fname, + (u_long)PGNO(epg->page)); + ret = __env_panic(env, EINVAL); + } + epg->indx++; + return (ret); + } + epg->indx++; + } + return (ret); +} diff --git a/src/btree/bt_method.c b/src/btree/bt_method.c new file mode 100644 index 00000000..68bb3a9b --- /dev/null +++ b/src/btree/bt_method.c @@ -0,0 +1,745 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/qam.h" + +static int __bam_set_bt_minkey __P((DB *, u_int32_t)); +static int __bam_get_bt_compare + __P((DB *, int (**)(DB *, const DBT *, const DBT *))); +static int __bam_get_bt_prefix + __P((DB *, size_t(**)(DB *, const DBT *, const DBT *))); +static int __bam_set_bt_prefix + __P((DB *, size_t(*)(DB *, const DBT *, const DBT *))); +static int __bam_get_bt_compress __P((DB *, + int (**)(DB *, const DBT *, const DBT *, const DBT *, const DBT *, DBT *), + int (**)(DB *, const DBT *, const DBT *, DBT *, DBT *, DBT *))); +static int __ram_get_re_delim __P((DB *, int *)); +static int __ram_set_re_delim __P((DB *, int)); +static int __ram_set_re_len __P((DB *, u_int32_t)); +static int __ram_set_re_pad __P((DB *, int)); +static int __ram_get_re_source __P((DB *, const char **)); +static int __ram_set_re_source __P((DB *, const char *)); + +/* + * __bam_db_create -- + * Btree specific initialization of the DB structure. + * + * PUBLIC: int __bam_db_create __P((DB *)); + */ +int +__bam_db_create(dbp) + DB *dbp; +{ + BTREE *t; + int ret; + + /* Allocate and initialize the private btree structure. */ + if ((ret = __os_calloc(dbp->env, 1, sizeof(BTREE), &t)) != 0) + return (ret); + dbp->bt_internal = t; + + t->bt_minkey = DEFMINKEYPAGE; /* Btree */ + t->bt_compare = __bam_defcmp; + t->bt_prefix = __bam_defpfx; +#ifdef HAVE_COMPRESSION + t->bt_compress = NULL; + t->bt_decompress = NULL; + t->compress_dup_compare = NULL; + + /* + * DB_AM_COMPRESS may have been set in __bam_metachk before the + * bt_internal structure existed. + */ + if (F_ISSET(dbp, DB_AM_COMPRESS) && + (ret = __bam_set_bt_compress(dbp, NULL, NULL)) != 0) + return (ret); +#endif + + dbp->get_bt_compare = __bam_get_bt_compare; + dbp->set_bt_compare = __bam_set_bt_compare; + dbp->get_bt_minkey = __bam_get_bt_minkey; + dbp->set_bt_minkey = __bam_set_bt_minkey; + dbp->get_bt_prefix = __bam_get_bt_prefix; + dbp->set_bt_prefix = __bam_set_bt_prefix; + dbp->get_bt_compress = __bam_get_bt_compress; + dbp->set_bt_compress = __bam_set_bt_compress; + + t->re_pad = ' '; /* Recno */ + t->re_delim = '\n'; + t->re_eof = 1; + + dbp->get_re_delim = __ram_get_re_delim; + dbp->set_re_delim = __ram_set_re_delim; + dbp->get_re_len = __ram_get_re_len; + dbp->set_re_len = __ram_set_re_len; + dbp->get_re_pad = __ram_get_re_pad; + dbp->set_re_pad = __ram_set_re_pad; + dbp->get_re_source = __ram_get_re_source; + dbp->set_re_source = __ram_set_re_source; + + return (0); +} + +/* + * __bam_db_close -- + * Btree specific discard of the DB structure. + * + * PUBLIC: int __bam_db_close __P((DB *)); + */ +int +__bam_db_close(dbp) + DB *dbp; +{ + BTREE *t; + + if ((t = dbp->bt_internal) == NULL) + return (0); + /* Recno */ + /* Close any backing source file descriptor. */ + if (t->re_fp != NULL) + (void)fclose(t->re_fp); + + /* Free any backing source file name. */ + if (t->re_source != NULL) + __os_free(dbp->env, t->re_source); + + __os_free(dbp->env, t); + dbp->bt_internal = NULL; + + return (0); +} + +/* + * __bam_map_flags -- + * Map Btree specific flags from public to the internal values. + * + * PUBLIC: void __bam_map_flags __P((DB *, u_int32_t *, u_int32_t *)); + */ +void +__bam_map_flags(dbp, inflagsp, outflagsp) + DB *dbp; + u_int32_t *inflagsp, *outflagsp; +{ + COMPQUIET(dbp, NULL); + + if (FLD_ISSET(*inflagsp, DB_DUP)) { + FLD_SET(*outflagsp, DB_AM_DUP); + FLD_CLR(*inflagsp, DB_DUP); + } + if (FLD_ISSET(*inflagsp, DB_DUPSORT)) { + FLD_SET(*outflagsp, DB_AM_DUP | DB_AM_DUPSORT); + FLD_CLR(*inflagsp, DB_DUPSORT); + } + if (FLD_ISSET(*inflagsp, DB_RECNUM)) { + FLD_SET(*outflagsp, DB_AM_RECNUM); + FLD_CLR(*inflagsp, DB_RECNUM); + } + if (FLD_ISSET(*inflagsp, DB_REVSPLITOFF)) { + FLD_SET(*outflagsp, DB_AM_REVSPLITOFF); + FLD_CLR(*inflagsp, DB_REVSPLITOFF); + } +} + +/* + * __bam_set_flags -- + * Set Btree specific flags. + * + * PUBLIC: int __bam_set_flags __P((DB *, u_int32_t *flagsp)); + */ +int +__bam_set_flags(dbp, flagsp) + DB *dbp; + u_int32_t *flagsp; +{ + BTREE *t; + u_int32_t flags; + + t = dbp->bt_internal; + + flags = *flagsp; + if (LF_ISSET(DB_DUP | DB_DUPSORT | DB_RECNUM | DB_REVSPLITOFF)) + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_flags"); + + /* + * The DB_DUP and DB_DUPSORT flags are shared by the Hash + * and Btree access methods. + */ + if (LF_ISSET(DB_DUP | DB_DUPSORT)) + DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE | DB_OK_HASH); + + if (LF_ISSET(DB_RECNUM | DB_REVSPLITOFF)) + DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE | DB_OK_HASH); + + /* DB_DUP/DB_DUPSORT is incompatible with DB_RECNUM. */ + if (LF_ISSET(DB_DUP | DB_DUPSORT) && F_ISSET(dbp, DB_AM_RECNUM)) + goto incompat; + + /* DB_RECNUM is incompatible with DB_DUP/DB_DUPSORT. */ + if (LF_ISSET(DB_RECNUM) && F_ISSET(dbp, DB_AM_DUP)) + goto incompat; + + /* DB_RECNUM is incompatible with DB_DUP/DB_DUPSORT. */ + if (LF_ISSET(DB_RECNUM) && LF_ISSET(DB_DUP | DB_DUPSORT)) + goto incompat; + +#ifdef HAVE_COMPRESSION + /* DB_RECNUM is incompatible with compression */ + if (LF_ISSET(DB_RECNUM) && DB_IS_COMPRESSED(dbp)) { + __db_errx(dbp->env, DB_STR("1024", + "DB_RECNUM cannot be used with compression")); + return (EINVAL); + } + + /* DB_DUP without DB_DUPSORT is incompatible with compression */ + if (LF_ISSET(DB_DUP) && !LF_ISSET(DB_DUPSORT) && + !F_ISSET(dbp, DB_AM_DUPSORT) && DB_IS_COMPRESSED(dbp)) { + __db_errx(dbp->env, DB_STR("1025", + "DB_DUP cannot be used with compression without DB_DUPSORT")); + return (EINVAL); + } +#endif + + if (LF_ISSET(DB_DUPSORT) && dbp->dup_compare == NULL) { +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbp)) { + dbp->dup_compare = __bam_compress_dupcmp; + t->compress_dup_compare = __bam_defcmp; + } else +#endif + dbp->dup_compare = __bam_defcmp; + } + + __bam_map_flags(dbp, flagsp, &dbp->flags); + return (0); + +incompat: + return (__db_ferr(dbp->env, "DB->set_flags", 1)); +} + +/* + * __bam_get_bt_compare -- + * Get the comparison function. + */ +static int +__bam_get_bt_compare(dbp, funcp) + DB *dbp; + int (**funcp) __P((DB *, const DBT *, const DBT *)); +{ + BTREE *t; + + DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE); + + t = dbp->bt_internal; + + if (funcp != NULL) + *funcp = t->bt_compare; + + return (0); +} + +/* + * __bam_set_bt_compare -- + * Set the comparison function. + * + * PUBLIC: int __bam_set_bt_compare + * PUBLIC: __P((DB *, int (*)(DB *, const DBT *, const DBT *))); + */ +int +__bam_set_bt_compare(dbp, func) + DB *dbp; + int (*func) __P((DB *, const DBT *, const DBT *)); +{ + BTREE *t; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_bt_compare"); + DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE); + + t = dbp->bt_internal; + + /* + * Can't default the prefix routine if the user supplies a comparison + * routine; shortening the keys can break their comparison algorithm. + */ + t->bt_compare = func; + if (t->bt_prefix == __bam_defpfx) + t->bt_prefix = NULL; + + return (0); +} + +/* + * __bam_get_bt_compress -- + * Get the compression functions. + */ +static int +__bam_get_bt_compress(dbp, compressp, decompressp) + DB *dbp; + int (**compressp) __P((DB *, const DBT *, const DBT *, const DBT *, + const DBT *, DBT *)); + int (**decompressp) __P((DB *, const DBT *, const DBT *, DBT *, DBT *, + DBT *)); +{ +#ifdef HAVE_COMPRESSION + BTREE *t; + + DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE); + + t = dbp->bt_internal; + + if (compressp != NULL) + *compressp = t->bt_compress; + if (decompressp != NULL) + *decompressp = t->bt_decompress; + + return (0); +#else + COMPQUIET(compressp, NULL); + COMPQUIET(decompressp, NULL); + + __db_errx(dbp->env, DB_STR("1026", + "compression support has not been compiled in")); + return (EINVAL); +#endif +} + +/* + * __bam_set_bt_compress -- + * Set the compression functions. + * + * PUBLIC: int __bam_set_bt_compress __P((DB *, + * PUBLIC: int (*)(DB *, const DBT *, const DBT *, + * PUBLIC: const DBT *, const DBT *, DBT *), + * PUBLIC: int (*)(DB *, const DBT *, const DBT *, DBT *, DBT *, DBT *))); + */ +int +__bam_set_bt_compress(dbp, compress, decompress) + DB *dbp; + int (*compress) __P((DB *, const DBT *, const DBT *, const DBT *, + const DBT *, DBT *)); + int (*decompress) __P((DB *, const DBT *, const DBT *, DBT *, DBT *, + DBT *)); +{ +#ifdef HAVE_COMPRESSION + BTREE *t; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_bt_compress"); + DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE); + + t = dbp->bt_internal; + + /* compression is incompatible with DB_RECNUM */ + if (F_ISSET(dbp, DB_AM_RECNUM)) { + __db_errx(dbp->env, DB_STR("1027", + "compression cannot be used with DB_RECNUM")); + return (EINVAL); + } + + /* compression is incompatible with DB_DUP without DB_DUPSORT */ + if (F_ISSET(dbp, DB_AM_DUP) && !F_ISSET(dbp, DB_AM_DUPSORT)) { + __db_errx(dbp->env, DB_STR("1028", + "compression cannot be used with DB_DUP without DB_DUPSORT")); + return (EINVAL); + } + + if (compress != 0 && decompress != 0) { + t->bt_compress = compress; + t->bt_decompress = decompress; + } else if (compress == 0 && decompress == 0) { + t->bt_compress = __bam_defcompress; + t->bt_decompress = __bam_defdecompress; + } else { + __db_errx(dbp->env, DB_STR("1029", + "to enable compression you need to supply both function arguments")); + return (EINVAL); + } + F_SET(dbp, DB_AM_COMPRESS); + + /* Copy dup_compare to compress_dup_compare, and use the compression + duplicate compare */ + if (F_ISSET(dbp, DB_AM_DUPSORT)) { + t->compress_dup_compare = dbp->dup_compare; + dbp->dup_compare = __bam_compress_dupcmp; + } + + return (0); +#else + COMPQUIET(compress, NULL); + COMPQUIET(decompress, NULL); + + __db_errx(dbp->env, DB_STR("1030", + "compression support has not been compiled in")); + return (EINVAL); +#endif +} + +/* + * __db_get_bt_minkey -- + * Get the minimum keys per page. + * + * PUBLIC: int __bam_get_bt_minkey __P((DB *, u_int32_t *)); + */ +int +__bam_get_bt_minkey(dbp, bt_minkeyp) + DB *dbp; + u_int32_t *bt_minkeyp; +{ + BTREE *t; + + DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE); + + t = dbp->bt_internal; + *bt_minkeyp = t->bt_minkey; + return (0); +} + +/* + * __bam_set_bt_minkey -- + * Set the minimum keys per page. + */ +static int +__bam_set_bt_minkey(dbp, bt_minkey) + DB *dbp; + u_int32_t bt_minkey; +{ + BTREE *t; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_bt_minkey"); + DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE); + + t = dbp->bt_internal; + + if (bt_minkey < 2) { + __db_errx(dbp->env, DB_STR("1031", + "minimum bt_minkey value is 2")); + return (EINVAL); + } + + t->bt_minkey = bt_minkey; + return (0); +} + +/* + * __bam_get_bt_prefix -- + * Get the prefix function. + */ +static int +__bam_get_bt_prefix(dbp, funcp) + DB *dbp; + size_t (**funcp) __P((DB *, const DBT *, const DBT *)); +{ + BTREE *t; + + DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE); + + t = dbp->bt_internal; + if (funcp != NULL) + *funcp = t->bt_prefix; + return (0); +} + +/* + * __bam_set_bt_prefix -- + * Set the prefix function. + */ +static int +__bam_set_bt_prefix(dbp, func) + DB *dbp; + size_t (*func) __P((DB *, const DBT *, const DBT *)); +{ + BTREE *t; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_bt_prefix"); + DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE); + + t = dbp->bt_internal; + + t->bt_prefix = func; + return (0); +} + +/* + * __bam_copy_config + * Copy the configuration of one DB handle to another. + * PUBLIC: void __bam_copy_config __P((DB *, DB*, u_int32_t)); + */ +void +__bam_copy_config(src, dst, nparts) + DB *src, *dst; + u_int32_t nparts; +{ + BTREE *s, *d; + + COMPQUIET(nparts, 0); + + s = src->bt_internal; + d = dst->bt_internal; + d->bt_compare = s->bt_compare; + d->bt_minkey = s->bt_minkey; + d->bt_minkey = s->bt_minkey; + d->bt_prefix = s->bt_prefix; +#ifdef HAVE_COMPRESSION + d->bt_compress = s->bt_compress; + d->bt_decompress = s->bt_decompress; + d->compress_dup_compare = s->compress_dup_compare; +#endif +} + +/* + * __ram_map_flags -- + * Map Recno specific flags from public to the internal values. + * + * PUBLIC: void __ram_map_flags __P((DB *, u_int32_t *, u_int32_t *)); + */ +void +__ram_map_flags(dbp, inflagsp, outflagsp) + DB *dbp; + u_int32_t *inflagsp, *outflagsp; +{ + COMPQUIET(dbp, NULL); + + if (FLD_ISSET(*inflagsp, DB_RENUMBER)) { + FLD_SET(*outflagsp, DB_AM_RENUMBER); + FLD_CLR(*inflagsp, DB_RENUMBER); + } + if (FLD_ISSET(*inflagsp, DB_SNAPSHOT)) { + FLD_SET(*outflagsp, DB_AM_SNAPSHOT); + FLD_CLR(*inflagsp, DB_SNAPSHOT); + } +} + +/* + * __ram_set_flags -- + * Set Recno specific flags. + * + * PUBLIC: int __ram_set_flags __P((DB *, u_int32_t *flagsp)); + */ +int +__ram_set_flags(dbp, flagsp) + DB *dbp; + u_int32_t *flagsp; +{ + u_int32_t flags; + + flags = *flagsp; + if (LF_ISSET(DB_RENUMBER | DB_SNAPSHOT)) { + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_flags"); + DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO); + } + + __ram_map_flags(dbp, flagsp, &dbp->flags); + return (0); +} + +/* + * __db_get_re_delim -- + * Get the variable-length input record delimiter. + */ +static int +__ram_get_re_delim(dbp, re_delimp) + DB *dbp; + int *re_delimp; +{ + BTREE *t; + + DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO); + t = dbp->bt_internal; + *re_delimp = t->re_delim; + return (0); +} + +/* + * __ram_set_re_delim -- + * Set the variable-length input record delimiter. + */ +static int +__ram_set_re_delim(dbp, re_delim) + DB *dbp; + int re_delim; +{ + BTREE *t; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_re_delim"); + DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO); + + t = dbp->bt_internal; + + t->re_delim = re_delim; + F_SET(dbp, DB_AM_DELIMITER); + + return (0); +} + +/* + * __db_get_re_len -- + * Get the variable-length input record length. + * + * PUBLIC: int __ram_get_re_len __P((DB *, u_int32_t *)); + */ +int +__ram_get_re_len(dbp, re_lenp) + DB *dbp; + u_int32_t *re_lenp; +{ + BTREE *t; + QUEUE *q; + + DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO); + + /* + * This has to work for all access methods, before or after opening the + * database. When the record length is set with __ram_set_re_len, the + * value in both the BTREE and QUEUE structs will be correct. + * Otherwise, this only makes sense after the database in opened, in + * which case we know the type. + */ + if (dbp->type == DB_QUEUE) { + q = dbp->q_internal; + *re_lenp = q->re_len; + } else { + t = dbp->bt_internal; + *re_lenp = t->re_len; + } + + return (0); +} + +/* + * __ram_set_re_len -- + * Set the variable-length input record length. + */ +static int +__ram_set_re_len(dbp, re_len) + DB *dbp; + u_int32_t re_len; +{ + BTREE *t; +#ifdef HAVE_QUEUE + QUEUE *q; +#endif + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_re_len"); + DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO); + + t = dbp->bt_internal; + t->re_len = re_len; + +#ifdef HAVE_QUEUE + q = dbp->q_internal; + q->re_len = re_len; +#endif + + F_SET(dbp, DB_AM_FIXEDLEN); + + return (0); +} + +/* + * __db_get_re_pad -- + * Get the fixed-length record pad character. + * + * PUBLIC: int __ram_get_re_pad __P((DB *, int *)); + */ +int +__ram_get_re_pad(dbp, re_padp) + DB *dbp; + int *re_padp; +{ + BTREE *t; + QUEUE *q; + + DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO); + + /* + * This has to work for all access methods, before or after opening the + * database. When the record length is set with __ram_set_re_pad, the + * value in both the BTREE and QUEUE structs will be correct. + * Otherwise, this only makes sense after the database in opened, in + * which case we know the type. + */ + if (dbp->type == DB_QUEUE) { + q = dbp->q_internal; + *re_padp = q->re_pad; + } else { + t = dbp->bt_internal; + *re_padp = t->re_pad; + } + + return (0); +} + +/* + * __ram_set_re_pad -- + * Set the fixed-length record pad character. + */ +static int +__ram_set_re_pad(dbp, re_pad) + DB *dbp; + int re_pad; +{ + BTREE *t; +#ifdef HAVE_QUEUE + QUEUE *q; +#endif + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_re_pad"); + DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO); + + t = dbp->bt_internal; + t->re_pad = re_pad; + +#ifdef HAVE_QUEUE + q = dbp->q_internal; + q->re_pad = re_pad; +#endif + + F_SET(dbp, DB_AM_PAD); + + return (0); +} + +/* + * __db_get_re_source -- + * Get the backing source file name. + */ +static int +__ram_get_re_source(dbp, re_sourcep) + DB *dbp; + const char **re_sourcep; +{ + BTREE *t; + + DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO); + + t = dbp->bt_internal; + *re_sourcep = t->re_source; + return (0); +} + +/* + * __ram_set_re_source -- + * Set the backing source file name. + */ +static int +__ram_set_re_source(dbp, re_source) + DB *dbp; + const char *re_source; +{ + BTREE *t; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_re_source"); + DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO); + + t = dbp->bt_internal; + + return (__os_strdup(dbp->env, re_source, &t->re_source)); +} diff --git a/src/btree/bt_open.c b/src/btree/bt_open.c new file mode 100644 index 00000000..64618412 --- /dev/null +++ b/src/btree/bt_open.c @@ -0,0 +1,677 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" +#include "dbinc/fop.h" + +static void __bam_init_meta __P((DB *, BTMETA *, db_pgno_t, DB_LSN *)); + +/* + * __bam_open -- + * Open a btree. + * + * PUBLIC: int __bam_open __P((DB *, DB_THREAD_INFO *, + * PUBLIC: DB_TXN *, const char *, db_pgno_t, u_int32_t)); + */ +int +__bam_open(dbp, ip, txn, name, base_pgno, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name; + db_pgno_t base_pgno; + u_int32_t flags; +{ + BTREE *t; + + COMPQUIET(name, NULL); + t = dbp->bt_internal; + + /* + * We don't permit the user to specify a prefix routine if they didn't + * also specify a comparison routine, they can't know enough about our + * comparison routine to get it right. + */ + if (t->bt_compare == __bam_defcmp && t->bt_prefix != __bam_defpfx) { + __db_errx(dbp->env, DB_STR("1006", +"prefix comparison may not be specified for default comparison routine")); + return (EINVAL); + } + + /* + * Verify that the bt_minkey value specified won't cause the + * calculation of ovflsize to underflow [#2406] for this pagesize. + */ + if (B_MINKEY_TO_OVFLSIZE(dbp, t->bt_minkey, dbp->pgsize) > + B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) { + __db_errx(dbp->env, DB_STR_A("1007", + "bt_minkey value of %lu too high for page size of %lu", + "%lu %lu"), (u_long)t->bt_minkey, (u_long)dbp->pgsize); + return (EINVAL); + } + + /* Start up the tree. */ + return (__bam_read_root(dbp, ip, txn, base_pgno, flags)); +} + +/* + * __bam_metachk -- + * + * PUBLIC: int __bam_metachk __P((DB *, const char *, BTMETA *)); + */ +int +__bam_metachk(dbp, name, btm) + DB *dbp; + const char *name; + BTMETA *btm; +{ + ENV *env; + u_int32_t vers; + int ret; + + env = dbp->env; + + /* + * At this point, all we know is that the magic number is for a Btree. + * Check the version, the database may be out of date. + */ + vers = btm->dbmeta.version; + if (F_ISSET(dbp, DB_AM_SWAP)) + M_32_SWAP(vers); + switch (vers) { + case 6: + case 7: + __db_errx(env, DB_STR_A("1008", + "%s: btree version %lu requires a version upgrade", + "%s %lu"), name, (u_long)vers); + return (DB_OLD_VERSION); + case 8: + case 9: + break; + default: + __db_errx(env, DB_STR_A("1009", + "%s: unsupported btree version: %lu", "%s %lu"), + name, (u_long)vers); + return (EINVAL); + } + + /* Swap the page if we need to. */ + if (F_ISSET(dbp, DB_AM_SWAP) && + (ret = __bam_mswap(env, (PAGE *)btm)) != 0) + return (ret); + + /* + * Check application info against metadata info, and set info, flags, + * and type based on metadata info. + */ + if ((ret = + __db_fchk(env, "DB->open", btm->dbmeta.flags, BTM_MASK)) != 0) + return (ret); + + if (F_ISSET(&btm->dbmeta, BTM_RECNO)) { + if (dbp->type == DB_BTREE) + goto wrong_type; + dbp->type = DB_RECNO; + DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO); + } else { + if (dbp->type == DB_RECNO) + goto wrong_type; + dbp->type = DB_BTREE; + DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE); + } + + if (F_ISSET(&btm->dbmeta, BTM_DUP)) + F_SET(dbp, DB_AM_DUP); + else + if (F_ISSET(dbp, DB_AM_DUP)) { + __db_errx(env, DB_STR_A("1010", + "%s: DB_DUP specified to open method but not set in database", + "%s"), name); + return (EINVAL); + } + + if (F_ISSET(&btm->dbmeta, BTM_RECNUM)) { + if (dbp->type != DB_BTREE) + goto wrong_type; + F_SET(dbp, DB_AM_RECNUM); + + if ((ret = __db_fcchk(env, + "DB->open", dbp->flags, DB_AM_DUP, DB_AM_RECNUM)) != 0) + return (ret); + } else + if (F_ISSET(dbp, DB_AM_RECNUM)) { + __db_errx(env, DB_STR_A("1011", + "%s: DB_RECNUM specified to open method but not set in database", + "%s"), name); + return (EINVAL); + } + + if (F_ISSET(&btm->dbmeta, BTM_FIXEDLEN)) { + if (dbp->type != DB_RECNO) + goto wrong_type; + F_SET(dbp, DB_AM_FIXEDLEN); + } else + if (F_ISSET(dbp, DB_AM_FIXEDLEN)) { + __db_errx(env, DB_STR_A("1012", + "%s: DB_FIXEDLEN specified to open method but not set in database", + "%s"), name); + return (EINVAL); + } + + if (F_ISSET(&btm->dbmeta, BTM_RENUMBER)) { + if (dbp->type != DB_RECNO) + goto wrong_type; + F_SET(dbp, DB_AM_RENUMBER); + } else + if (F_ISSET(dbp, DB_AM_RENUMBER)) { + __db_errx(env, DB_STR_A("1013", + "%s: DB_RENUMBER specified to open method but not set in database", + "%s"), name); + return (EINVAL); + } + + if (F_ISSET(&btm->dbmeta, BTM_SUBDB)) + F_SET(dbp, DB_AM_SUBDB); + else + if (F_ISSET(dbp, DB_AM_SUBDB)) { + __db_errx(env, DB_STR_A("1014", + "%s: multiple databases specified but not supported by file", + "%s"), name); + return (EINVAL); + } + + if (F_ISSET(&btm->dbmeta, BTM_DUPSORT)) { + if (dbp->dup_compare == NULL) + dbp->dup_compare = __bam_defcmp; + F_SET(dbp, DB_AM_DUPSORT); + } else + if (dbp->dup_compare != NULL) { + __db_errx(env, DB_STR_A("1015", + "%s: duplicate sort specified but not supported in database", + "%s"), name); + return (EINVAL); + } + +#ifdef HAVE_COMPRESSION + if (F_ISSET(&btm->dbmeta, BTM_COMPRESS)) { + F_SET(dbp, DB_AM_COMPRESS); + if ((BTREE *)dbp->bt_internal != NULL && + !DB_IS_COMPRESSED(dbp) && + (ret = __bam_set_bt_compress(dbp, NULL, NULL)) != 0) + return (ret); + } else { + if ((BTREE *)dbp->bt_internal != NULL && + DB_IS_COMPRESSED(dbp)) { + __db_errx(env, DB_STR_A("1016", + "%s: compresssion specified to open method but not set in database", + "%s"), name); + return (EINVAL); + } + } +#else + if (F_ISSET(&btm->dbmeta, BTM_COMPRESS)) { + __db_errx(env, DB_STR_A("1017", + "%s: compression support has not been compiled in", "%s"), + name); + return (EINVAL); + } +#endif + + /* Set the page size. */ + dbp->pgsize = btm->dbmeta.pagesize; + + /* Copy the file's ID. */ + memcpy(dbp->fileid, btm->dbmeta.uid, DB_FILE_ID_LEN); + + return (0); + +wrong_type: + if (dbp->type == DB_BTREE) + __db_errx(env, DB_STR("1018", + "open method type is Btree, database type is Recno")); + else + __db_errx(env, DB_STR("1019", + "open method type is Recno, database type is Btree")); + return (EINVAL); +} + +/* + * __bam_read_root -- + * Read the root page and check a tree. + * + * PUBLIC: int __bam_read_root __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, db_pgno_t, u_int32_t)); + */ +int +__bam_read_root(dbp, ip, txn, base_pgno, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + db_pgno_t base_pgno; + u_int32_t flags; +{ + BTMETA *meta; + BTREE *t; + DBC *dbc; + DB_LOCK metalock; + DB_MPOOLFILE *mpf; + int ret, t_ret; + + COMPQUIET(flags, 0); + + meta = NULL; + t = dbp->bt_internal; + LOCK_INIT(metalock); + mpf = dbp->mpf; + ret = 0; + + /* Get a cursor. */ + if ((ret = __db_cursor(dbp, ip, txn, &dbc, + F_ISSET(dbp, DB_AM_RECOVER) ? DB_RECOVER : 0)) != 0) + return (ret); + + /* Get the metadata page. */ + if ((ret = + __db_lget(dbc, 0, base_pgno, DB_LOCK_READ, 0, &metalock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &base_pgno, ip, dbc->txn, 0, &meta)) != 0) + goto err; + + /* + * If the magic number is set, the tree has been created. Correct + * any fields that may not be right. Note, all of the local flags + * were set by DB->open. + * + * Otherwise, we'd better be in recovery or abort, in which case the + * metadata page will be created/initialized elsewhere. + * + * Ignore the last_pgno on the metadata page for snapshot transactions: + * we may be reading an old version of the page, and we've already + * set last_pgno from the file size. The only time this would matter + * is if we don't have ftruncate and there are some free pages at the + * end of the file: we could end up with holes. + */ + if (meta->dbmeta.magic == DB_BTREEMAGIC) { + t->bt_minkey = meta->minkey; + t->re_pad = (int)meta->re_pad; + t->re_len = meta->re_len; + + t->bt_meta = base_pgno; + t->bt_root = meta->root; + t->revision = dbp->mpf->mfp->revision; + if (PGNO(meta) == PGNO_BASE_MD && + !F_ISSET(dbp, DB_AM_RECOVER) && + (txn == NULL || !F_ISSET(txn, TXN_SNAPSHOT)) && (ret = + __memp_set_last_pgno(mpf, meta->dbmeta.last_pgno)) != 0) + goto err; + } else { + DB_ASSERT(dbp->env, + IS_RECOVERING(dbp->env) || F_ISSET(dbp, DB_AM_RECOVER)); + } + + /* + * !!! + * If creating a subdatabase, we've already done an insert when + * we put the subdatabase's entry into the master database, so + * our last-page-inserted value is wrongly initialized for the + * master database, not the subdatabase we're creating. I'm not + * sure where the *right* place to clear this value is, it's not + * intuitively obvious that it belongs here. + */ + t->bt_lpgno = PGNO_INVALID; + +err: /* Put the metadata page back. */ + if (meta != NULL && (t_ret = __memp_fput(mpf, + ip, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __bam_init_meta -- + * + * Initialize a btree meta-data page. The following fields may need + * to be updated later: last_pgno, root. + */ +static void +__bam_init_meta(dbp, meta, pgno, lsnp) + DB *dbp; + BTMETA *meta; + db_pgno_t pgno; + DB_LSN *lsnp; +{ + BTREE *t; +#ifdef HAVE_PARTITION + DB_PARTITION *part; +#endif + ENV *env; + + env = dbp->env; + t = dbp->bt_internal; + + memset(meta, 0, sizeof(BTMETA)); + meta->dbmeta.lsn = *lsnp; + meta->dbmeta.pgno = pgno; + meta->dbmeta.magic = DB_BTREEMAGIC; + meta->dbmeta.version = DB_BTREEVERSION; + meta->dbmeta.pagesize = dbp->pgsize; + if (F_ISSET(dbp, DB_AM_CHKSUM)) + FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM); + if (F_ISSET(dbp, DB_AM_ENCRYPT)) { + meta->dbmeta.encrypt_alg = env->crypto_handle->alg; + DB_ASSERT(env, meta->dbmeta.encrypt_alg != 0); + meta->crypto_magic = meta->dbmeta.magic; + } + meta->dbmeta.type = P_BTREEMETA; + meta->dbmeta.free = PGNO_INVALID; + meta->dbmeta.last_pgno = pgno; + if (F_ISSET(dbp, DB_AM_DUP)) + F_SET(&meta->dbmeta, BTM_DUP); + if (F_ISSET(dbp, DB_AM_FIXEDLEN)) + F_SET(&meta->dbmeta, BTM_FIXEDLEN); + if (F_ISSET(dbp, DB_AM_RECNUM)) + F_SET(&meta->dbmeta, BTM_RECNUM); + if (F_ISSET(dbp, DB_AM_RENUMBER)) + F_SET(&meta->dbmeta, BTM_RENUMBER); + if (F_ISSET(dbp, DB_AM_SUBDB)) + F_SET(&meta->dbmeta, BTM_SUBDB); + if (dbp->dup_compare != NULL) + F_SET(&meta->dbmeta, BTM_DUPSORT); +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbp)) + F_SET(&meta->dbmeta, BTM_COMPRESS); +#endif + if (dbp->type == DB_RECNO) + F_SET(&meta->dbmeta, BTM_RECNO); + memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); + + meta->minkey = t->bt_minkey; + meta->re_len = t->re_len; + meta->re_pad = (u_int32_t)t->re_pad; + +#ifdef HAVE_PARTITION + if ((part = dbp->p_internal) != NULL) { + meta->dbmeta.nparts = part->nparts; + if (F_ISSET(part, PART_CALLBACK)) + FLD_SET(meta->dbmeta.metaflags, DBMETA_PART_CALLBACK); + if (F_ISSET(part, PART_RANGE)) + FLD_SET(meta->dbmeta.metaflags, DBMETA_PART_RANGE); + } +#endif +} + +/* + * __bam_new_file -- + * Create the necessary pages to begin a new database file. + * + * This code appears more complex than it is because of the two cases (named + * and unnamed). The way to read the code is that for each page being created, + * there are three parts: 1) a "get page" chunk (which either uses malloc'd + * memory or calls __memp_fget), 2) the initialization, and 3) the "put page" + * chunk which either does a fop write or an __memp_fput. + * + * PUBLIC: int __bam_new_file __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *)); + */ +int +__bam_new_file(dbp, ip, txn, fhp, name) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DB_FH *fhp; + const char *name; +{ + BTMETA *meta; + DBT pdbt; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + DB_PGINFO pginfo; + ENV *env; + PAGE *root; + db_pgno_t pgno; + int ret, t_ret; + void *buf; + + env = dbp->env; + mpf = dbp->mpf; + root = NULL; + meta = NULL; + buf = NULL; + + if (F_ISSET(dbp, DB_AM_INMEM)) { + /* Build the meta-data page. */ + pgno = PGNO_BASE_MD; + if ((ret = __memp_fget(mpf, &pgno, + ip, txn, DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &meta)) != 0) + return (ret); + LSN_NOT_LOGGED(lsn); + __bam_init_meta(dbp, meta, PGNO_BASE_MD, &lsn); + meta->root = 1; + meta->dbmeta.last_pgno = 1; + if ((ret = + __db_log_page(dbp, txn, &lsn, pgno, (PAGE *)meta)) != 0) + goto err; + ret = __memp_fput(mpf, ip, meta, dbp->priority); + meta = NULL; + if (ret != 0) + goto err; + + /* Build the root page. */ + pgno = 1; + if ((ret = __memp_fget(mpf, &pgno, + ip, txn, DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &root)) != 0) + goto err; + P_INIT(root, dbp->pgsize, 1, PGNO_INVALID, PGNO_INVALID, + LEAFLEVEL, dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE); + LSN_NOT_LOGGED(root->lsn); + if ((ret = + __db_log_page(dbp, txn, &root->lsn, pgno, root)) != 0) + goto err; + ret = __memp_fput(mpf, ip, root, dbp->priority); + root = NULL; + if (ret != 0) + goto err; + } else { + memset(&pdbt, 0, sizeof(pdbt)); + + /* Build the meta-data page. */ + pginfo.db_pagesize = dbp->pgsize; + pginfo.flags = + F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); + pginfo.type = dbp->type; + pdbt.data = &pginfo; + pdbt.size = sizeof(pginfo); + if ((ret = __os_calloc(env, 1, dbp->pgsize, &buf)) != 0) + return (ret); + meta = (BTMETA *)buf; + LSN_NOT_LOGGED(lsn); + __bam_init_meta(dbp, meta, PGNO_BASE_MD, &lsn); + meta->root = 1; + meta->dbmeta.last_pgno = 1; + if ((ret = __db_pgout( + dbp->dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0) + goto err; + if ((ret = __fop_write(env, txn, name, dbp->dirname, + DB_APP_DATA, fhp, + dbp->pgsize, 0, 0, buf, dbp->pgsize, 1, F_ISSET( + dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0)) != 0) + goto err; + meta = NULL; + + /* Build the root page. */ +#ifdef DIAGNOSTIC + memset(buf, CLEAR_BYTE, dbp->pgsize); +#endif + root = (PAGE *)buf; + P_INIT(root, dbp->pgsize, 1, PGNO_INVALID, PGNO_INVALID, + LEAFLEVEL, dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE); + LSN_NOT_LOGGED(root->lsn); + if ((ret = + __db_pgout(dbp->dbenv, root->pgno, root, &pdbt)) != 0) + goto err; + if ((ret = + __fop_write(env, txn, name, dbp->dirname, DB_APP_DATA, + fhp, dbp->pgsize, 1, 0, buf, dbp->pgsize, 1, F_ISSET( + dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0)) != 0) + goto err; + root = NULL; + } + +err: if (buf != NULL) + __os_free(env, buf); + else { + if (meta != NULL && + (t_ret = __memp_fput(mpf, ip, + meta, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if (root != NULL && + (t_ret = __memp_fput(mpf, ip, + root, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + } + return (ret); +} + +/* + * __bam_new_subdb -- + * Create a metadata page and a root page for a new btree. + * + * PUBLIC: int __bam_new_subdb __P((DB *, DB *, DB_THREAD_INFO *, DB_TXN *)); + */ +int +__bam_new_subdb(mdbp, dbp, ip, txn) + DB *mdbp, *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; +{ + BTMETA *meta; + DBC *dbc; + DB_LOCK metalock; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *root; + int ret, t_ret; + + env = mdbp->env; + mpf = mdbp->mpf; + dbc = NULL; + meta = NULL; + root = NULL; + + if ((ret = __db_cursor(mdbp, ip, txn, + &dbc, CDB_LOCKING(env) ? DB_WRITECURSOR : 0)) != 0) + return (ret); + + /* Get, and optionally create the metadata page. */ + if ((ret = __db_lget(dbc, + 0, dbp->meta_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &dbp->meta_pgno, + ip, txn, DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &meta)) != 0) + goto err; + + /* Build meta-data page. */ + lsn = meta->dbmeta.lsn; + __bam_init_meta(dbp, meta, dbp->meta_pgno, &lsn); + if ((ret = __db_log_page(mdbp, + txn, &meta->dbmeta.lsn, dbp->meta_pgno, (PAGE *)meta)) != 0) + goto err; + + /* Create and initialize a root page. */ + if ((ret = __db_new(dbc, + dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE, NULL, &root)) != 0) + goto err; + root->level = LEAFLEVEL; + + if (DBENV_LOGGING(env) && +#if !defined(DEBUG_WOP) + txn != NULL && +#endif + + (ret = __bam_root_log(mdbp, txn, &meta->dbmeta.lsn, 0, + meta->dbmeta.pgno, root->pgno, &meta->dbmeta.lsn)) != 0) + goto err; + + meta->root = root->pgno; + if ((ret = + __db_log_page(mdbp, txn, &root->lsn, root->pgno, root)) != 0) + goto err; + + /* Release the metadata and root pages. */ + if ((ret = __memp_fput(mpf, ip, meta, dbc->priority)) != 0) + goto err; + meta = NULL; + if ((ret = __memp_fput(mpf, ip, root, dbc->priority)) != 0) + goto err; + root = NULL; +err: + if (meta != NULL) + if ((t_ret = __memp_fput(mpf, ip, + meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (root != NULL) + if ((t_ret = __memp_fput(mpf, ip, + root, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + if (dbc != NULL) + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} diff --git a/src/btree/bt_put.c b/src/btree/bt_put.c new file mode 100644 index 00000000..8265b571 --- /dev/null +++ b/src/btree/bt_put.c @@ -0,0 +1,1087 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +static int __bam_build + __P((DBC *, u_int32_t, DBT *, PAGE *, u_int32_t, u_int32_t)); +static int __bam_dup_check __P((DBC *, u_int32_t, + PAGE *, u_int32_t, u_int32_t, db_indx_t *)); +static int __bam_dup_convert __P((DBC *, PAGE *, u_int32_t, u_int32_t)); +static int __bam_ovput + __P((DBC *, u_int32_t, db_pgno_t, PAGE *, u_int32_t, DBT *)); +static u_int32_t + __bam_partsize __P((DB *, u_int32_t, DBT *, PAGE *, u_int32_t)); + +/* + * __bam_iitem -- + * Insert an item into the tree. + * + * PUBLIC: int __bam_iitem __P((DBC *, DBT *, DBT *, u_int32_t, u_int32_t)); + */ +int +__bam_iitem(dbc, key, data, op, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t op, flags; +{ + BKEYDATA *bk, bk_tmp; + BTREE *t; + BTREE_CURSOR *cp; + DB *dbp; + DBT bk_hdr, tdbt; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *h; + db_indx_t cnt, indx; + u_int32_t data_size, have_bytes, need_bytes, needed, pages, pagespace; + char tmp_ch; + int cmp, bigkey, bigdata, del, dupadjust; + int padrec, replace, ret, t_ret, was_deleted; + + COMPQUIET(cnt, 0); + + dbp = dbc->dbp; + env = dbp->env; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + t = dbp->bt_internal; + h = cp->page; + indx = cp->indx; + del = dupadjust = replace = was_deleted = 0; + + /* + * Fixed-length records with partial puts: it's an error to specify + * anything other simple overwrite. + */ + if (F_ISSET(dbp, DB_AM_FIXEDLEN) && + F_ISSET(data, DB_DBT_PARTIAL) && data->size != data->dlen) + return (__db_rec_repl(env, data->size, data->dlen)); + + /* + * Figure out how much space the data will take, including if it's a + * partial record. + * + * Fixed-length records: it's an error to specify a record that's + * longer than the fixed-length, and we never require less than + * the fixed-length record size. + */ + data_size = F_ISSET(data, DB_DBT_PARTIAL) ? + __bam_partsize(dbp, op, data, h, indx) : data->size; + padrec = 0; + if (F_ISSET(dbp, DB_AM_FIXEDLEN)) { + if (data_size > t->re_len) + return (__db_rec_toobig(env, data_size, t->re_len)); + + /* Records that are deleted anyway needn't be padded out. */ + if (!LF_ISSET(BI_DELETED) && data_size < t->re_len) { + padrec = 1; + data_size = t->re_len; + } + } + + /* + * Handle partial puts or short fixed-length records: check whether we + * can just append the data or else build the real record. We can't + * append if there are secondaries: we need the whole data item for the + * application's secondary callback. + */ + if (op == DB_CURRENT && dbp->dup_compare == NULL && + F_ISSET(data, DB_DBT_PARTIAL) && !DB_IS_PRIMARY(dbp)) { + bk = GET_BKEYDATA( + dbp, h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); + /* + * If the item is an overflow type, and the input DBT is + * partial, and begins at the length of the current item then + * it is an append. Avoid deleting and re-creating the entire + * offpage item. + */ + if (B_TYPE(bk->type) == B_OVERFLOW && + data->doff == ((BOVERFLOW *)bk)->tlen) { + /* + * If the cursor has not already cached the last page + * in the offpage chain. We need to walk the chain + * to be sure that the page has been read. + */ + if (cp->stream_start_pgno != ((BOVERFLOW *)bk)->pgno || + cp->stream_off > data->doff || data->doff > + cp->stream_off + P_MAXSPACE(dbp, dbp->pgsize)) { + memset(&tdbt, 0, sizeof(DBT)); + tdbt.doff = data->doff - 1; + /* + * Set the length to 1, to force __db_goff + * to do the traversal. + */ + tdbt.dlen = tdbt.ulen = 1; + tdbt.data = &tmp_ch; + tdbt.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM; + + /* + * Read to the last page. It will be cached + * in the cursor. + */ + if ((ret = __db_goff( + dbc, &tdbt, ((BOVERFLOW *)bk)->tlen, + ((BOVERFLOW *)bk)->pgno, NULL, NULL)) != 0) + return (ret); + } + + /* + * Since this is an append, dlen is irrelevant (there + * are no bytes to overwrite). We need the caller's + * DBT size to end up with the total size of the item. + * From now on, use dlen as the length of the user's + * data that we are going to append. + * Don't futz with the caller's DBT any more than we + * have to in order to send back the size. + */ + tdbt = *data; + tdbt.dlen = data->size; + tdbt.size = data_size; + data = &tdbt; + F_SET(data, DB_DBT_STREAMING); + } + } + if (!F_ISSET(data, DB_DBT_STREAMING) && + (padrec || F_ISSET(data, DB_DBT_PARTIAL))) { + tdbt = *data; + if ((ret = + __bam_build(dbc, op, &tdbt, h, indx, data_size)) != 0) + return (ret); + data = &tdbt; + } + + /* + * If the user has specified a duplicate comparison function, return + * an error if DB_CURRENT was specified and the replacement data + * doesn't compare equal to the current data. This stops apps from + * screwing up the duplicate sort order. We have to do this after + * we build the real record so that we're comparing the real items. + */ + if (op == DB_CURRENT && dbp->dup_compare != NULL) { + if ((ret = __bam_cmp(dbc, data, h, + indx + (TYPE(h) == P_LBTREE ? O_INDX : 0), + dbp->dup_compare, &cmp)) != 0) + return (ret); + if (cmp != 0) { + __db_errx(env, DB_STR("1004", + "Existing data sorts differently from put data")); + return (EINVAL); + } + } + + /* + * If the key or data item won't fit on a page, we'll have to store + * them on overflow pages. + */ + needed = 0; + bigdata = data_size > cp->ovflsize; + switch (op) { + case DB_KEYFIRST: + /* We're adding a new key and data pair. */ + bigkey = key->size > cp->ovflsize; + if (bigkey) + needed += BOVERFLOW_PSIZE; + else + needed += BKEYDATA_PSIZE(key->size); + if (bigdata) + needed += BOVERFLOW_PSIZE; + else + needed += BKEYDATA_PSIZE(data_size); + break; + case DB_AFTER: + case DB_BEFORE: + case DB_CURRENT: + /* + * We're either overwriting the data item of a key/data pair + * or we're creating a new on-page duplicate and only adding + * a data item. + * + * !!! + * We're not currently correcting for space reclaimed from + * already deleted items, but I don't think it's worth the + * complexity. + */ + bigkey = 0; + if (op == DB_CURRENT) { + bk = GET_BKEYDATA(dbp, h, + indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); + if (B_TYPE(bk->type) == B_KEYDATA) + have_bytes = BKEYDATA_PSIZE(bk->len); + else + have_bytes = BOVERFLOW_PSIZE; + need_bytes = 0; + } else { + have_bytes = 0; + need_bytes = sizeof(db_indx_t); + } + if (bigdata) + need_bytes += BOVERFLOW_PSIZE; + else + need_bytes += BKEYDATA_PSIZE(data_size); + + if (have_bytes < need_bytes) + needed += need_bytes - have_bytes; + break; + default: + return (__db_unknown_flag(env, "DB->put", op)); + } + + /* Split the page if there's not enough room. */ + if (P_FREESPACE(dbp, h) < needed) + return (DB_NEEDSPLIT); + + /* + * Check to see if we will convert to off page duplicates -- if + * so, we'll need a page. + */ + if (F_ISSET(dbp, DB_AM_DUP) && + TYPE(h) == P_LBTREE && op != DB_KEYFIRST && + P_FREESPACE(dbp, h) - needed <= dbp->pgsize / 2 && + __bam_dup_check(dbc, op, h, indx, needed, &cnt)) { + pages = 1; + dupadjust = 1; + } else + pages = 0; + + /* + * If we are not using transactions and there is a page limit + * set on the file, then figure out if things will fit before + * taking action. + */ + if (dbc->txn == NULL && mpf->mfp->maxpgno != 0) { + pagespace = P_MAXSPACE(dbp, dbp->pgsize); + if (bigdata) + pages += ((data_size - 1) / pagespace) + 1; + if (bigkey) + pages += ((key->size - 1) / pagespace) + 1; + + if (pages > (mpf->mfp->maxpgno - mpf->mfp->last_pgno)) + return (__db_space_err(dbp)); + } + + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_OFF(dbc->thread_info); + ret = __memp_dirty(mpf, &h, + dbc->thread_info, dbc->txn, dbc->priority, 0); + if (cp->csp->page == cp->page) + cp->csp->page = h; + cp->page = h; + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_ON(dbc->thread_info); + if (ret != 0) + return (ret); + + /* + * The code breaks it up into five cases: + * + * 1. Insert a new key/data pair. + * 2. Append a new data item (a new duplicate). + * 3. Insert a new data item (a new duplicate). + * 4. Delete and re-add the data item (overflow item). + * 5. Overwrite the data item. + */ + switch (op) { + case DB_KEYFIRST: /* 1. Insert a new key/data pair. */ + if (bigkey) { + if ((ret = __bam_ovput(dbc, + B_OVERFLOW, PGNO_INVALID, h, indx, key)) != 0) + return (ret); + } else + if ((ret = __db_pitem(dbc, h, indx, + BKEYDATA_SIZE(key->size), NULL, key)) != 0) + return (ret); + + if ((ret = __bam_ca_di(dbc, PGNO(h), indx, 1)) != 0) + return (ret); + ++indx; + break; + case DB_AFTER: /* 2. Append a new data item. */ + if (TYPE(h) == P_LBTREE) { + /* Copy the key for the duplicate and adjust cursors. */ + if ((ret = + __bam_adjindx(dbc, h, indx + P_INDX, indx, 1)) != 0) + return (ret); + if ((ret = + __bam_ca_di(dbc, PGNO(h), indx + P_INDX, 1)) != 0) + return (ret); + + indx += 3; + + cp->indx += 2; + } else { + ++indx; + cp->indx += 1; + } + break; + case DB_BEFORE: /* 3. Insert a new data item. */ + if (TYPE(h) == P_LBTREE) { + /* Copy the key for the duplicate and adjust cursors. */ + if ((ret = __bam_adjindx(dbc, h, indx, indx, 1)) != 0) + return (ret); + if ((ret = __bam_ca_di(dbc, PGNO(h), indx, 1)) != 0) + return (ret); + + ++indx; + } + break; + case DB_CURRENT: + /* + * Clear the cursor's deleted flag. The problem is that if + * we deadlock or fail while deleting the overflow item or + * replacing the non-overflow item, a subsequent cursor close + * will try and remove the item because the cursor's delete + * flag is set. + */ + if ((ret = __bam_ca_delete(dbp, PGNO(h), indx, 0, NULL)) != 0) + return (ret); + + if (TYPE(h) == P_LBTREE) + ++indx; + bk = GET_BKEYDATA(dbp, h, indx); + + /* + * In a Btree deleted records aren't counted (deleted records + * are counted in a Recno because all accesses are based on + * record number). If it's a Btree and it's a DB_CURRENT + * operation overwriting a previously deleted record, increment + * the record count. + */ + if (TYPE(h) == P_LBTREE || TYPE(h) == P_LDUP) + was_deleted = B_DISSET(bk->type); + + /* + * 4. Delete and re-add the data item. + * + * If we're changing the type of the on-page structure, or we + * are referencing offpage items, we have to delete and then + * re-add the item. We do not do any cursor adjustments here + * because we're going to immediately re-add the item into the + * same slot. + */ + if (bigdata || B_TYPE(bk->type) != B_KEYDATA) { + /* + * If streaming, don't delete the overflow item, + * just delete the item pointing to the overflow item. + * It will be added back in later, with the new size. + * We can't simply adjust the size of the item on the + * page, because there is no easy way to log a + * modification. + */ + if (F_ISSET(data, DB_DBT_STREAMING)) { + if ((ret = __db_ditem( + dbc, h, indx, BOVERFLOW_SIZE)) != 0) + return (ret); + } else if ((ret = __bam_ditem(dbc, h, indx)) != 0) + return (ret); + del = 1; + break; + } + + /* 5. Overwrite the data item. */ + replace = 1; + break; + default: + return (__db_unknown_flag(env, "DB->put", op)); + } + + /* Add the data. */ + if (bigdata) { + /* + * We do not have to handle deleted (BI_DELETED) records + * in this case; the actual records should never be created. + */ + DB_ASSERT(env, !LF_ISSET(BI_DELETED)); + ret = __bam_ovput(dbc, + B_OVERFLOW, PGNO_INVALID, h, indx, data); + } else { + if (LF_ISSET(BI_DELETED)) { + B_TSET_DELETED(bk_tmp.type, B_KEYDATA); + bk_tmp.len = data->size; + bk_hdr.data = &bk_tmp; + bk_hdr.size = SSZA(BKEYDATA, data); + ret = __db_pitem(dbc, h, indx, + BKEYDATA_SIZE(data->size), &bk_hdr, data); + } else if (replace) + ret = __bam_ritem(dbc, h, indx, data, 0); + else + ret = __db_pitem(dbc, h, indx, + BKEYDATA_SIZE(data->size), NULL, data); + } + if (ret != 0) { + if (del == 1 && (t_ret = + __bam_ca_di(dbc, PGNO(h), indx + 1, -1)) != 0) { + __db_err(env, t_ret, DB_STR("1005", + "cursor adjustment after delete failed")); + return (__env_panic(env, t_ret)); + } + return (ret); + } + + /* + * Re-position the cursors if necessary and reset the current cursor + * to point to the new item. + */ + if (op != DB_CURRENT) { + if ((ret = __bam_ca_di(dbc, PGNO(h), indx, 1)) != 0) + return (ret); + cp->indx = TYPE(h) == P_LBTREE ? indx - O_INDX : indx; + } + + /* + * If we've changed the record count, update the tree. There's no + * need to adjust the count if the operation not performed on the + * current record or when the current record was previously deleted. + */ + if (F_ISSET(cp, C_RECNUM) && (op != DB_CURRENT || was_deleted)) + if ((ret = __bam_adjust(dbc, 1)) != 0) + return (ret); + + /* + * If a Btree leaf page is at least 50% full and we may have added or + * modified a duplicate data item, see if the set of duplicates takes + * up at least 25% of the space on the page. If it does, move it onto + * its own page. + */ + if (dupadjust && + (ret = __bam_dup_convert(dbc, h, indx - O_INDX, cnt)) != 0) + return (ret); + + /* If we've modified a recno file, set the flag. */ + if (dbc->dbtype == DB_RECNO) + t->re_modified = 1; + + return (ret); +} + +/* + * __bam_partsize -- + * Figure out how much space a partial data item is in total. + */ +static u_int32_t +__bam_partsize(dbp, op, data, h, indx) + DB *dbp; + u_int32_t op, indx; + DBT *data; + PAGE *h; +{ + BKEYDATA *bk; + u_int32_t nbytes; + + /* + * If the record doesn't already exist, it's simply the data we're + * provided. + */ + if (op != DB_CURRENT) + return (data->doff + data->size); + + /* + * Otherwise, it's the data provided plus any already existing data + * that we're not replacing. + */ + bk = GET_BKEYDATA(dbp, h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); + nbytes = + B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len; + + return (__db_partsize(nbytes, data)); +} + +/* + * __bam_build -- + * Build the real record for a partial put, or short fixed-length record. + */ +static int +__bam_build(dbc, op, dbt, h, indx, nbytes) + DBC *dbc; + u_int32_t op, indx, nbytes; + DBT *dbt; + PAGE *h; +{ + BKEYDATA *bk, tbk; + BOVERFLOW *bo; + BTREE *t; + DB *dbp; + DBT copy, *rdata; + u_int32_t len, tlen; + u_int8_t *p; + int ret; + + COMPQUIET(bo, NULL); + + dbp = dbc->dbp; + t = dbp->bt_internal; + + /* We use the record data return memory, it's only a short-term use. */ + rdata = &dbc->my_rdata; + if (rdata->ulen < nbytes) { + if ((ret = __os_realloc(dbp->env, + nbytes, &rdata->data)) != 0) { + rdata->ulen = 0; + rdata->data = NULL; + return (ret); + } + rdata->ulen = nbytes; + } + + /* + * We use nul or pad bytes for any part of the record that isn't + * specified; get it over with. + */ + memset(rdata->data, + F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_pad : 0, nbytes); + + /* + * In the next clauses, we need to do three things: a) set p to point + * to the place at which to copy the user's data, b) set tlen to the + * total length of the record, not including the bytes contributed by + * the user, and c) copy any valid data from an existing record. If + * it's not a partial put (this code is called for both partial puts + * and fixed-length record padding) or it's a new key, we can cut to + * the chase. + */ + if (!F_ISSET(dbt, DB_DBT_PARTIAL) || op != DB_CURRENT) { + p = (u_int8_t *)rdata->data + dbt->doff; + tlen = dbt->doff; + goto user_copy; + } + + /* Find the current record. */ + if (indx < NUM_ENT(h)) { + bk = GET_BKEYDATA(dbp, h, indx + (TYPE(h) == P_LBTREE ? + O_INDX : 0)); + bo = (BOVERFLOW *)bk; + } else { + bk = &tbk; + B_TSET(bk->type, B_KEYDATA); + bk->len = 0; + } + if (B_TYPE(bk->type) == B_OVERFLOW) { + /* + * In the case of an overflow record, we shift things around + * in the current record rather than allocate a separate copy. + */ + memset(©, 0, sizeof(copy)); + if ((ret = __db_goff(dbc, ©, bo->tlen, bo->pgno, + &rdata->data, &rdata->ulen)) != 0) + return (ret); + + /* Skip any leading data from the original record. */ + tlen = dbt->doff; + p = (u_int8_t *)rdata->data + dbt->doff; + + /* + * Copy in any trailing data from the original record. + * + * If the original record was larger than the original offset + * plus the bytes being deleted, there is trailing data in the + * original record we need to preserve. If we aren't deleting + * the same number of bytes as we're inserting, copy it up or + * down, into place. + * + * Use memmove(), the regions may overlap. + */ + if (bo->tlen > dbt->doff + dbt->dlen) { + len = bo->tlen - (dbt->doff + dbt->dlen); + if (dbt->dlen != dbt->size) + memmove(p + dbt->size, p + dbt->dlen, len); + tlen += len; + } + } else { + /* Copy in any leading data from the original record. */ + memcpy(rdata->data, + bk->data, dbt->doff > bk->len ? bk->len : dbt->doff); + tlen = dbt->doff; + p = (u_int8_t *)rdata->data + dbt->doff; + + /* Copy in any trailing data from the original record. */ + len = dbt->doff + dbt->dlen; + if (bk->len > len) { + memcpy(p + dbt->size, bk->data + len, bk->len - len); + tlen += bk->len - len; + } + } + +user_copy: + /* + * Copy in the application provided data -- p and tlen must have been + * initialized above. + */ + memcpy(p, dbt->data, dbt->size); + tlen += dbt->size; + + /* Set the DBT to reference our new record. */ + rdata->size = F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_len : tlen; + rdata->dlen = 0; + rdata->doff = 0; + rdata->flags = 0; + *dbt = *rdata; + return (0); +} + +/* + * __bam_ritem -- + * Replace an item on a page. + * + * PUBLIC: int __bam_ritem __P((DBC *, PAGE *, u_int32_t, DBT *, u_int32_t)); + */ +int +__bam_ritem(dbc, h, indx, data, typeflag) + DBC *dbc; + PAGE *h; + u_int32_t indx; + DBT *data; + u_int32_t typeflag; +{ + BKEYDATA *bk; + DB *dbp; + DBT orig, repl; + db_indx_t min, prefix, suffix; + u_int32_t len; + int ret; + u_int8_t *dp, *p, *t, type; + + dbp = dbc->dbp; + + /* + * Replace a single item onto a page. The logic figuring out where + * to insert and whether it fits is handled in the caller. All we do + * here is manage the page shuffling. + */ + bk = GET_BKEYDATA(dbp, h, indx); + len = bk->len; + dp = bk->data; + type = bk->type; + typeflag = B_DISSET(type); + + /* Log the change. */ + if (DBC_LOGGING(dbc)) { + /* + * We might as well check to see if the two data items share + * a common prefix and suffix -- it can save us a lot of log + * message if they're large. + */ + min = data->size < len ? data->size : len; + for (prefix = 0, + p = dp, t = data->data; + prefix < min && *p == *t; ++prefix, ++p, ++t) + ; + + min -= prefix; + for (suffix = 0, + p = (u_int8_t *)dp + len - 1, + t = (u_int8_t *)data->data + data->size - 1; + suffix < min && *p == *t; ++suffix, --p, --t) + ; + + /* We only log the parts of the keys that have changed. */ + orig.data = (u_int8_t *)dp + prefix; + orig.size = len - (prefix + suffix); + repl.data = (u_int8_t *)data->data + prefix; + repl.size = data->size - (prefix + suffix); + if ((ret = __bam_repl_log(dbp, dbc->txn, &LSN(h), 0, PGNO(h), + &LSN(h), (u_int32_t)indx, typeflag, + &orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(h)); + + return (__bam_ritem_nolog(dbc, h, indx, NULL, data, type)); +} + +/* + * __bam_ritem_nolog -- + * Replace an item on a page. + * + * PUBLIC: int __bam_ritem_nolog __P((DBC *, + * PUBLIC: PAGE *, u_int32_t, DBT *, DBT *, u_int32_t)); + */ +int +__bam_ritem_nolog(dbc, h, indx, hdr, data, type) + DBC *dbc; + PAGE *h; + u_int32_t indx; + DBT *hdr, *data; + u_int32_t type; +{ + BKEYDATA *bk; + BINTERNAL *bi; + DB *dbp; + db_indx_t cnt, off, lo, ln; + db_indx_t *inp; + int32_t nbytes; + u_int8_t *p, *t; + + dbp = dbc->dbp; + /* + * Set references to the first in-use byte on the page and the + * first byte of the item being replaced. + */ + inp = P_INP(dbp, h); + p = (u_int8_t *)h + HOFFSET(h); + if (TYPE(h) == P_IBTREE) { + bi = GET_BINTERNAL(dbp, h, indx); + t = (u_int8_t *)bi; + lo = (db_indx_t)BINTERNAL_SIZE(bi->len); + if (data == NULL) { + DB_ASSERT(dbp->env, hdr != NULL); + bi = (BINTERNAL*)hdr->data; + P_16_COPY(&bi->len, &cnt); + ln = (db_indx_t)BINTERNAL_SIZE(cnt); + } else + ln = (db_indx_t)BINTERNAL_SIZE(data->size); + } else { + bk = GET_BKEYDATA(dbp, h, indx); + t = (u_int8_t *)bk; + lo = (db_indx_t)BKEYDATA_SIZE(bk->len); + ln = (db_indx_t)BKEYDATA_SIZE(data->size); + } + + /* + * If the entry is growing in size, shift the beginning of the data + * part of the page down. If the entry is shrinking in size, shift + * the beginning of the data part of the page up. Use memmove(3), + * the regions overlap. + */ + if (lo != ln) { + nbytes = (int32_t)(lo - ln); /* Signed difference. */ + if (p == t) /* First index is fast. */ + inp[indx] += (u_int32_t)nbytes; + else { /* Else, shift the page. */ + memmove(p + nbytes, p, (size_t)(t - p)); + + /* Adjust the indices' offsets. */ + off = (u_int32_t)inp[indx]; + for (cnt = 0; cnt < NUM_ENT(h); ++cnt) + if (inp[cnt] <= off) + inp[cnt] += (u_int32_t)nbytes; + } + + /* Clean up the page and adjust the item's reference. */ + HOFFSET(h) += (u_int32_t)nbytes; + t += nbytes; + } + + /* Copy the new item onto the page. */ + if (TYPE(h) == P_IBTREE) { + DB_ASSERT(dbp->env, hdr != NULL); + memcpy(t, hdr->data, hdr->size); + bi = (BINTERNAL *)t; + if (data != NULL && data->size != 0) + memcpy(bi->data, data->data, data->size); + } else { + bk = (BKEYDATA *)t; + bk->len = data->size; + B_TSET(bk->type, type); + memcpy(bk->data, data->data, bk->len); + } + + return (0); +} + +/* + * __bam_irep -- + * Replace an item on an internal page. + * + * PUBLIC: int __bam_irep __P((DBC *, PAGE *, u_int32_t, DBT *, DBT *)); + */ +int +__bam_irep(dbc, h, indx, hdr, data) + DBC *dbc; + PAGE *h; + u_int32_t indx; + DBT *hdr; + DBT *data; +{ + BINTERNAL *bi, *bn; + DB *dbp; + DBT dbt; + int ret; + + dbp = dbc->dbp; + + bi = GET_BINTERNAL(dbp, h, indx); + bn = (BINTERNAL *) hdr->data; + + if (B_TYPE(bi->type) == B_OVERFLOW && + (ret = __db_doff(dbc, ((BOVERFLOW *)bi->data)->pgno)) != 0) + return (ret); + + if (DBC_LOGGING(dbc)) { + dbt.data = bi; + dbt.size = BINTERNAL_SIZE(bi->len); + if ((ret = __bam_irep_log(dbp, dbc->txn, &LSN(h), 0, PGNO(h), + &LSN(h), (u_int32_t)indx, TYPE(h), hdr, data, &dbt)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(h)); + + return (__bam_ritem_nolog(dbc, h, indx, hdr, data, bn->type)); +} + +/* + * __bam_dup_check -- + * Check to see if the duplicate set at indx should have its own page. + */ +static int +__bam_dup_check(dbc, op, h, indx, sz, cntp) + DBC *dbc; + u_int32_t op; + PAGE *h; + u_int32_t indx, sz; + db_indx_t *cntp; +{ + BKEYDATA *bk; + DB *dbp; + db_indx_t cnt, first, *inp; + + dbp = dbc->dbp; + inp = P_INP(dbp, h); + + /* + * Count the duplicate records and calculate how much room they're + * using on the page. + */ + while (indx > 0 && inp[indx] == inp[indx - P_INDX]) + indx -= P_INDX; + + /* Count the key once. */ + bk = GET_BKEYDATA(dbp, h, indx); + sz += B_TYPE(bk->type) == B_KEYDATA ? + BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE; + + /* + * Sum up all the data items. + * Account for the record being inserted. If we are replacing it, + * don't count it twice. + * + * We execute the loop with first == indx to get the size of the + * first record. + */ + cnt = op == DB_CURRENT ? 0 : 1; + for (first = indx; + indx < NUM_ENT(h) && inp[first] == inp[indx]; + ++cnt, indx += P_INDX) { + bk = GET_BKEYDATA(dbp, h, indx + O_INDX); + sz += B_TYPE(bk->type) == B_KEYDATA ? + BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE; + } + + /* + * We have to do these checks when the user is replacing the cursor's + * data item -- if the application replaces a duplicate item with a + * larger data item, it can increase the amount of space used by the + * duplicates, requiring this check. But that means we may have done + * this check when it wasn't a duplicate item after all. + */ + if (cnt == 1) + return (0); + + /* + * If this set of duplicates is using more than 25% of the page, move + * them off. The choice of 25% is a WAG, but the value must be small + * enough that we can always split a page without putting duplicates + * on two different pages. + */ + if (sz < dbp->pgsize / 4) + return (0); + + *cntp = cnt; + return (1); +} + +/* + * __bam_dup_convert -- + * Move a set of duplicates off-page and into their own tree. + */ +static int +__bam_dup_convert(dbc, h, indx, cnt) + DBC *dbc; + PAGE *h; + u_int32_t indx, cnt; +{ + BKEYDATA *bk; + DB *dbp; + DBT hdr; + DB_LOCK lock; + DB_MPOOLFILE *mpf; + PAGE *dp; + db_indx_t cpindx, dindx, first, *inp; + int ret, t_ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + inp = P_INP(dbp, h); + + /* Move to the beginning of the dup set. */ + while (indx > 0 && inp[indx] == inp[indx - P_INDX]) + indx -= P_INDX; + + /* Get a new page. */ + if ((ret = __db_new(dbc, + dbp->dup_compare == NULL ? P_LRECNO : P_LDUP, &lock, &dp)) != 0) + return (ret); + P_INIT(dp, dbp->pgsize, dp->pgno, + PGNO_INVALID, PGNO_INVALID, LEAFLEVEL, TYPE(dp)); + + /* + * Move this set of duplicates off the page. First points to the first + * key of the first duplicate key/data pair, cnt is the number of pairs + * we're dealing with. + */ + memset(&hdr, 0, sizeof(hdr)); + first = indx; + dindx = indx; + cpindx = 0; + do { + /* Move cursors referencing the old entry to the new entry. */ + if ((ret = __bam_ca_dup(dbc, first, + PGNO(h), indx, PGNO(dp), cpindx)) != 0) + goto err; + + /* + * Copy the entry to the new page. If the off-duplicate page + * If the off-duplicate page is a Btree page (i.e. dup_compare + * will be non-NULL, we use Btree pages for sorted dups, + * and Recno pages for unsorted dups), move all entries + * normally, even deleted ones. If it's a Recno page, + * deleted entries are discarded (if the deleted entry is + * overflow, then free up those pages). + */ + bk = GET_BKEYDATA(dbp, h, dindx + 1); + hdr.data = bk; + hdr.size = B_TYPE(bk->type) == B_KEYDATA ? + BKEYDATA_SIZE(bk->len) : BOVERFLOW_SIZE; + if (dbp->dup_compare == NULL && B_DISSET(bk->type)) { + /* + * Unsorted dups, i.e. recno page, and we have + * a deleted entry, don't move it, but if it was + * an overflow entry, we need to free those pages. + */ + if (B_TYPE(bk->type) == B_OVERFLOW && + (ret = __db_doff(dbc, + (GET_BOVERFLOW(dbp, h, dindx + 1))->pgno)) != 0) + goto err; + } else { + if ((ret = __db_pitem( + dbc, dp, cpindx, hdr.size, &hdr, NULL)) != 0) + goto err; + ++cpindx; + } + /* Delete all but the last reference to the key. */ + if (cnt != 1) { + if ((ret = __bam_adjindx(dbc, + h, dindx, first + 1, 0)) != 0) + goto err; + } else + dindx++; + + /* Delete the data item. */ + if ((ret = __db_ditem(dbc, h, dindx, hdr.size)) != 0) + goto err; + indx += P_INDX; + } while (--cnt); + + /* Put in a new data item that points to the duplicates page. */ + if ((ret = __bam_ovput(dbc, + B_DUPLICATE, dp->pgno, h, first + 1, NULL)) != 0) + goto err; + + /* Adjust cursors for all the above movements. */ + ret = __bam_ca_di(dbc, + PGNO(h), first + P_INDX, (int)(first + P_INDX - indx)); + +err: if ((t_ret = __memp_fput(mpf, + dbc->thread_info, dp, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + (void)__TLPUT(dbc, lock); + return (ret); +} + +/* + * __bam_ovput -- + * Build an item for an off-page duplicates page or overflow page and + * insert it on the page. + */ +static int +__bam_ovput(dbc, type, pgno, h, indx, item) + DBC *dbc; + u_int32_t type, indx; + db_pgno_t pgno; + PAGE *h; + DBT *item; +{ + BOVERFLOW bo; + DBT hdr; + int ret; + + UMRW_SET(bo.unused1); + B_TSET(bo.type, type); + UMRW_SET(bo.unused2); + + /* + * If we're creating an overflow item, do so and acquire the page + * number for it. If we're creating an off-page duplicates tree, + * we are giving the page number as an argument. + */ + if (type == B_OVERFLOW) { + if ((ret = __db_poff(dbc, item, &bo.pgno)) != 0) + return (ret); + bo.tlen = item->size; + } else { + bo.pgno = pgno; + bo.tlen = 0; + } + + /* Store the new record on the page. */ + memset(&hdr, 0, sizeof(hdr)); + hdr.data = &bo; + hdr.size = BOVERFLOW_SIZE; + return (__db_pitem(dbc, h, indx, BOVERFLOW_SIZE, &hdr, NULL)); +} diff --git a/src/btree/bt_rec.c b/src/btree/bt_rec.c new file mode 100644 index 00000000..511f44bf --- /dev/null +++ b/src/btree/bt_rec.c @@ -0,0 +1,2036 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +#define IS_BTREE_PAGE(pagep) \ + (TYPE(pagep) == P_IBTREE || \ + TYPE(pagep) == P_LBTREE || TYPE(pagep) == P_LDUP) + +/* + * __bam_split_recover -- + * Recovery function for split. + * + * PUBLIC: int __bam_split_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_split_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_split_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_LSN *plsnp; + DB_MPOOLFILE *mpf; + PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp; + db_pgno_t pgno, parent_pgno; + u_int32_t opflags, size; + int cmp, l_update, p_update, r_update, ret, rootsplit, t_ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__bam_split_print); + + _lp = lp = np = pp = _rp = rp = NULL; + sp = NULL; + + REC_INTRO(__bam_split_read, ip, 0); + + opflags = OP_MODE_GET(argp->opflags); + if ((ret = __db_cursor_int(file_dbp, ip, NULL, + (opflags & SPL_RECNO) ? DB_RECNO : DB_BTREE, + PGNO_INVALID, DB_RECOVER, NULL, &dbc)) != 0) + goto out; + if (opflags & SPL_NRECS) + F_SET((BTREE_CURSOR *)dbc->internal, C_RECNUM); + + /* + * There are two kinds of splits that we have to recover from. The + * first is a root-page split, where the root page is split from a + * leaf page into an internal page and two new leaf pages are created. + * The second is where a page is split into two pages, and a new key + * is inserted into the parent page. + * + * DBTs are not aligned in log records, so we need to copy the page + * so that we can access fields within it throughout this routine. + * Although we could hardcode the unaligned copies in this routine, + * we will be calling into regular btree functions with this page, + * so it's got to be aligned. Copying it into allocated memory is + * the only way to guarantee this. + */ + if ((ret = __os_malloc(env, argp->pg.size, &sp)) != 0) + goto out; + memcpy(sp, argp->pg.data, argp->pg.size); + + pgno = PGNO(sp); + parent_pgno = argp->ppgno; + rootsplit = parent_pgno == pgno; + + /* Get the pages going down the tree. */ + REC_FGET(mpf, ip, parent_pgno, &pp, left); +left: REC_FGET(mpf, ip, argp->left, &lp, right); +right: REC_FGET(mpf, ip, argp->right, &rp, redo); + +redo: if (DB_REDO(op)) { + l_update = r_update = p_update = 0; + /* + * Decide if we need to resplit the page. + * + * If this is a root split, then the root has to exist unless + * we have truncated it due to a future deallocation. + */ + if (pp != NULL) { + if (rootsplit) + plsnp = &LSN(argp->pg.data); + else + plsnp = &argp->plsn; + cmp = LOG_COMPARE(&LSN(pp), plsnp); + CHECK_LSN(env, op, cmp, &LSN(pp), plsnp); + if (cmp == 0) + p_update = 1; + } + + if (lp != NULL) { + cmp = LOG_COMPARE(&LSN(lp), &argp->llsn); + CHECK_LSN(env, op, cmp, &LSN(lp), &argp->llsn); + if (cmp == 0) + l_update = 1; + } + + if (rp != NULL) { + cmp = LOG_COMPARE(&LSN(rp), &argp->rlsn); + CHECK_LSN(env, op, cmp, &LSN(rp), &argp->rlsn); + if (cmp == 0) + r_update = 1; + } + + if (!p_update && !l_update && !r_update) + goto check_next; + + /* Allocate and initialize new left/right child pages. */ + if ((ret = __os_malloc(env, file_dbp->pgsize, &_lp)) != 0 || + (ret = __os_malloc(env, file_dbp->pgsize, &_rp)) != 0) + goto out; + if (rootsplit) { + P_INIT(_lp, file_dbp->pgsize, argp->left, + PGNO_INVALID, + ISINTERNAL(sp) ? PGNO_INVALID : argp->right, + LEVEL(sp), TYPE(sp)); + P_INIT(_rp, file_dbp->pgsize, argp->right, + ISINTERNAL(sp) ? PGNO_INVALID : argp->left, + PGNO_INVALID, LEVEL(sp), TYPE(sp)); + } else { + P_INIT(_lp, file_dbp->pgsize, PGNO(sp), + ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp), + ISINTERNAL(sp) ? PGNO_INVALID : argp->right, + LEVEL(sp), TYPE(sp)); + P_INIT(_rp, file_dbp->pgsize, argp->right, + ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno, + ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp), + LEVEL(sp), TYPE(sp)); + } + + /* Split the page. */ + if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 || + (ret = __bam_copy(file_dbp, sp, _rp, argp->indx, + NUM_ENT(sp))) != 0) + goto out; + + if (l_update) { + REC_DIRTY(mpf, ip, file_dbp->priority, &lp); + memcpy(lp, _lp, file_dbp->pgsize); + lp->lsn = *lsnp; + } + + if (r_update) { + REC_DIRTY(mpf, ip, file_dbp->priority, &rp); + memcpy(rp, _rp, file_dbp->pgsize); + rp->lsn = *lsnp; + } + + /* + * Drop the latches on the lower level pages before + * getting an exclusive latch on the higher level page. + */ + if (lp != NULL && (ret = __memp_fput(mpf, + ip, lp, file_dbp->priority)) && ret == 0) + goto out; + lp = NULL; + if (rp != NULL && (ret = __memp_fput(mpf, + ip, rp, file_dbp->priority)) && ret == 0) + goto out; + rp = NULL; + /* + * If the parent page is wrong, update it. + * For recno the insert into an existing parent + * was logged separately. + * If it is a root page update initialize the page and + * update the record counts if needed. + * Then insert the record for the right hand child page. + */ + if (p_update) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pp); + + if (rootsplit) { + P_INIT(pp, file_dbp->pgsize, pgno, PGNO_INVALID, + PGNO_INVALID, _lp->level + 1, + (opflags & SPL_RECNO) ? + P_IRECNO : P_IBTREE); + if (opflags & SPL_NRECS) { + RE_NREC_SET(pp, + __bam_total(file_dbp, _lp) + + __bam_total(file_dbp, _rp)); + } + if ((ret = __db_pitem_nolog(dbc, pp, + argp->pindx, argp->pentry.size, + &argp->pentry, NULL)) != 0) + goto out; + + } else if (opflags & SPL_NRECS) + goto recno; + if ((ret = __db_pitem_nolog(dbc, pp, argp->pindx + 1, + argp->rentry.size, &argp->rentry, NULL)) != 0) + goto out; +recno: pp->lsn = *lsnp; + } + +check_next: /* + * Finally, redo the next-page link if necessary. This is of + * interest only if it wasn't a root split -- inserting a new + * page in the tree requires that any following page have its + * previous-page pointer updated to our new page. The next + * page must exist because we're redoing the operation. + */ + if (!rootsplit && argp->npgno != PGNO_INVALID) { + REC_FGET(mpf, ip, argp->npgno, &np, done); + cmp = LOG_COMPARE(&LSN(np), &argp->nlsn); + CHECK_LSN(env, op, cmp, &LSN(np), &argp->nlsn); + if (cmp == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &np); + PREV_PGNO(np) = argp->right; + np->lsn = *lsnp; + } + } + } else { + /* + * If it's a root split and the left child ever existed, update + * its LSN. Otherwise its the split page. If + * right child ever existed, root split or not, update its LSN. + * The undo of the page allocation(s) will restore them to the + * free list. + */ + if (rootsplit && lp != NULL && + LOG_COMPARE(lsnp, &LSN(lp)) == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &lp); + lp->lsn = argp->llsn; + } + if (rp != NULL && + LOG_COMPARE(lsnp, &LSN(rp)) == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &rp); + rp->lsn = argp->rlsn; + } + /* + * Drop the lower level pages before getting an exclusive + * latch on the parent. + */ + if (rp != NULL && (ret = __memp_fput(mpf, + ip, rp, file_dbp->priority))) + goto out; + rp = NULL; + + /* + * Check the state of the split page. If its a rootsplit + * then thats the rootpage otherwise its the left page. + */ + if (rootsplit) { + DB_ASSERT(env, pgno == argp->ppgno); + if (lp != NULL && (ret = __memp_fput(mpf, ip, + lp, file_dbp->priority)) != 0) + goto out; + lp = pp; + pp = NULL; + } + if (lp != NULL) { + cmp = LOG_COMPARE(lsnp, &LSN(lp)); + CHECK_ABORT(env, op, cmp, &LSN(lp), lsnp); + if (cmp == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &lp); + memcpy(lp, argp->pg.data, argp->pg.size); + if ((ret = __memp_fput(mpf, + ip, lp, file_dbp->priority))) + goto out; + lp = NULL; + } + } + + /* + * Next we can update the parent removing the new index. + * If this has record numbers, then we log this separately. + */ + if (pp != NULL) { + DB_ASSERT(env, !rootsplit); + cmp = LOG_COMPARE(lsnp, &LSN(pp)); + CHECK_ABORT(env, op, cmp, &LSN(pp), lsnp); + if (cmp == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pp); + if ((opflags & SPL_NRECS) == 0) { + size = BINTERNAL_SIZE( + GET_BINTERNAL(file_dbp, + pp, argp->pindx + 1)->len); + + if ((ret = __db_ditem(dbc, pp, + argp->pindx + 1, size)) != 0) + goto out; + } + pp->lsn = argp->plsn; + } + } + + /* + * Finally, undo the next-page link if necessary. This is of + * interest only if it wasn't a root split -- inserting a new + * page in the tree requires that any following page have its + * previous-page pointer updated to our new page. Since it's + * possible that the next-page never existed, we ignore it as + * if there's nothing to undo. + */ + if (!rootsplit && argp->npgno != PGNO_INVALID) { + if ((ret = __memp_fget(mpf, &argp->npgno, + ip, NULL, DB_MPOOL_EDIT, &np)) != 0) { + np = NULL; + goto done; + } + if (LOG_COMPARE(lsnp, &LSN(np)) == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &np); + PREV_PGNO(np) = argp->left; + np->lsn = argp->nlsn; + } + } + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: /* Free any pages that are left. */ + if (lp != NULL && (t_ret = __memp_fput(mpf, + ip, lp, file_dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if (np != NULL && (t_ret = __memp_fput(mpf, + ip, np, file_dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if (rp != NULL && (t_ret = __memp_fput(mpf, + ip, rp, file_dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if (pp != NULL && (t_ret = __memp_fput(mpf, + ip, pp, file_dbp->priority)) != 0 && ret == 0) + ret = t_ret; + + /* Free any allocated space. */ + if (_lp != NULL) + __os_free(env, _lp); + if (_rp != NULL) + __os_free(env, _rp); + if (sp != NULL) + __os_free(env, sp); + + REC_CLOSE; +} +/* + * __bam_split_48_recover -- + * Recovery function for split. + * + * PUBLIC: int __bam_split_48_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_split_48_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_split_48_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_LSN *plsnp; + DB_MPOOLFILE *mpf; + PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp; + db_pgno_t pgno, parent_pgno; + u_int32_t ptype, size; + int cmp, l_update, p_update, r_update, ret, rootsplit, t_ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__bam_split_print); + + _lp = lp = np = pp = _rp = rp = NULL; + sp = NULL; + + REC_INTRO(__bam_split_48_read, ip, 0); + + if ((ret = __db_cursor_int(file_dbp, ip, NULL, + (argp->opflags & SPL_RECNO) ? DB_RECNO : DB_BTREE, + PGNO_INVALID, DB_RECOVER, NULL, &dbc)) != 0) + goto out; + if (argp->opflags & SPL_NRECS) + F_SET((BTREE_CURSOR *)dbc->internal, C_RECNUM); + + /* + * There are two kinds of splits that we have to recover from. The + * first is a root-page split, where the root page is split from a + * leaf page into an internal page and two new leaf pages are created. + * The second is where a page is split into two pages, and a new key + * is inserted into the parent page. + * + * DBTs are not aligned in log records, so we need to copy the page + * so that we can access fields within it throughout this routine. + * Although we could hardcode the unaligned copies in this routine, + * we will be calling into regular btree functions with this page, + * so it's got to be aligned. Copying it into allocated memory is + * the only way to guarantee this. + */ + if ((ret = __os_malloc(env, argp->pg.size, &sp)) != 0) + goto out; + memcpy(sp, argp->pg.data, argp->pg.size); + + pgno = PGNO(sp); + parent_pgno = argp->ppgno; + rootsplit = parent_pgno == pgno; + + /* Get the pages going down the tree. */ + REC_FGET(mpf, ip, parent_pgno, &pp, left); +left: REC_FGET(mpf, ip, argp->left, &lp, right); +right: REC_FGET(mpf, ip, argp->right, &rp, redo); + +redo: if (DB_REDO(op)) { + l_update = r_update = p_update = 0; + /* + * Decide if we need to resplit the page. + * + * If this is a root split, then the root has to exist unless + * we have truncated it due to a future deallocation. + */ + if (pp != NULL) { + if (rootsplit) + plsnp = &LSN(argp->pg.data); + else + plsnp = &argp->plsn; + cmp = LOG_COMPARE(&LSN(pp), plsnp); + CHECK_LSN(env, op, cmp, &LSN(pp), plsnp); + if (cmp == 0) + p_update = 1; + } + + if (lp != NULL) { + cmp = LOG_COMPARE(&LSN(lp), &argp->llsn); + CHECK_LSN(env, op, cmp, &LSN(lp), &argp->llsn); + if (cmp == 0) + l_update = 1; + } + + if (rp != NULL) { + cmp = LOG_COMPARE(&LSN(rp), &argp->rlsn); + CHECK_LSN(env, op, cmp, &LSN(rp), &argp->rlsn); + if (cmp == 0) + r_update = 1; + } + + if (!p_update && !l_update && !r_update) + goto check_next; + + /* Allocate and initialize new left/right child pages. */ + if ((ret = __os_malloc(env, file_dbp->pgsize, &_lp)) != 0 || + (ret = __os_malloc(env, file_dbp->pgsize, &_rp)) != 0) + goto out; + if (rootsplit) { + P_INIT(_lp, file_dbp->pgsize, argp->left, + PGNO_INVALID, + ISINTERNAL(sp) ? PGNO_INVALID : argp->right, + LEVEL(sp), TYPE(sp)); + P_INIT(_rp, file_dbp->pgsize, argp->right, + ISINTERNAL(sp) ? PGNO_INVALID : argp->left, + PGNO_INVALID, LEVEL(sp), TYPE(sp)); + } else { + P_INIT(_lp, file_dbp->pgsize, PGNO(sp), + ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp), + ISINTERNAL(sp) ? PGNO_INVALID : argp->right, + LEVEL(sp), TYPE(sp)); + P_INIT(_rp, file_dbp->pgsize, argp->right, + ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno, + ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp), + LEVEL(sp), TYPE(sp)); + } + + /* Split the page. */ + if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 || + (ret = __bam_copy(file_dbp, sp, _rp, argp->indx, + NUM_ENT(sp))) != 0) + goto out; + + if (l_update) { + REC_DIRTY(mpf, ip, file_dbp->priority, &lp); + memcpy(lp, _lp, file_dbp->pgsize); + lp->lsn = *lsnp; + } + + if (r_update) { + REC_DIRTY(mpf, ip, file_dbp->priority, &rp); + memcpy(rp, _rp, file_dbp->pgsize); + rp->lsn = *lsnp; + } + + /* + * Drop the latches on the lower level pages before + * getting an exclusive latch on the higher level page. + */ + if (lp != NULL && (ret = __memp_fput(mpf, + ip, lp, file_dbp->priority)) && ret == 0) + goto out; + lp = NULL; + if (rp != NULL && (ret = __memp_fput(mpf, + ip, rp, file_dbp->priority)) && ret == 0) + goto out; + rp = NULL; + /* + * If the parent page is wrong, update it. + * Initialize the page. If it is a root page update + * the record counts if needed and put the first record in. + * Then insert the record for the right hand child page. + */ + if (p_update) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pp); + if (argp->opflags & SPL_RECNO) + ptype = P_IRECNO; + else + ptype = P_IBTREE; + + if (rootsplit) { + P_INIT(pp, file_dbp->pgsize, pgno, PGNO_INVALID, + PGNO_INVALID, _lp->level + 1, ptype); + if (argp->opflags & SPL_NRECS) { + RE_NREC_SET(pp, + __bam_total(file_dbp, _lp) + + __bam_total(file_dbp, _rp)); + } + if ((ret = __db_pitem_nolog(dbc, pp, + argp->pindx, argp->pentry.size, + &argp->pentry, NULL)) != 0) + goto out; + + } + if ((ret = __db_pitem_nolog(dbc, pp, argp->pindx + 1, + argp->rentry.size, &argp->rentry, NULL)) != 0) + goto out; + pp->lsn = *lsnp; + } + +check_next: /* + * Finally, redo the next-page link if necessary. This is of + * interest only if it wasn't a root split -- inserting a new + * page in the tree requires that any following page have its + * previous-page pointer updated to our new page. The next + * page must exist because we're redoing the operation. + */ + if (!rootsplit && argp->npgno != PGNO_INVALID) { + REC_FGET(mpf, ip, argp->npgno, &np, done); + cmp = LOG_COMPARE(&LSN(np), &argp->nlsn); + CHECK_LSN(env, op, cmp, &LSN(np), &argp->nlsn); + if (cmp == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &np); + PREV_PGNO(np) = argp->right; + np->lsn = *lsnp; + } + } + } else { + /* + * If it's a root split and the left child ever existed, update + * its LSN. Otherwise its the split page. If + * right child ever existed, root split or not, update its LSN. + * The undo of the page allocation(s) will restore them to the + * free list. + */ + if (rootsplit && lp != NULL && + LOG_COMPARE(lsnp, &LSN(lp)) == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &lp); + lp->lsn = argp->llsn; + } + if (rp != NULL && + LOG_COMPARE(lsnp, &LSN(rp)) == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &rp); + rp->lsn = argp->rlsn; + } + /* + * Drop the lower level pages before getting an exclusive + * latch on the parent. + */ + if (rp != NULL && (ret = __memp_fput(mpf, + ip, rp, file_dbp->priority))) + goto out; + rp = NULL; + + /* + * Check the state of the split page. If its a rootsplit + * then thats the rootpage otherwise its the left page. + */ + if (rootsplit) { + DB_ASSERT(env, pgno == argp->ppgno); + if (lp != NULL && (ret = __memp_fput(mpf, ip, + lp, file_dbp->priority)) != 0) + goto out; + lp = pp; + pp = NULL; + } + if (lp != NULL) { + cmp = LOG_COMPARE(lsnp, &LSN(lp)); + CHECK_ABORT(env, op, cmp, &LSN(lp), lsnp); + if (cmp == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &lp); + memcpy(lp, argp->pg.data, argp->pg.size); + if ((ret = __memp_fput(mpf, + ip, lp, file_dbp->priority))) + goto out; + lp = NULL; + } + } + + /* + * Next we can update the parent removing the new index. + */ + if (pp != NULL) { + DB_ASSERT(env, !rootsplit); + cmp = LOG_COMPARE(lsnp, &LSN(pp)); + CHECK_ABORT(env, op, cmp, &LSN(pp), lsnp); + if (cmp == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pp); + if (argp->opflags & SPL_RECNO) + size = RINTERNAL_SIZE; + else + size = BINTERNAL_SIZE( + GET_BINTERNAL(file_dbp, + pp, argp->pindx + 1)->len); + + if ((ret = __db_ditem(dbc, pp, + argp->pindx + 1, size)) != 0) + goto out; + pp->lsn = argp->plsn; + } + } + + /* + * Finally, undo the next-page link if necessary. This is of + * interest only if it wasn't a root split -- inserting a new + * page in the tree requires that any following page have its + * previous-page pointer updated to our new page. Since it's + * possible that the next-page never existed, we ignore it as + * if there's nothing to undo. + */ + if (!rootsplit && argp->npgno != PGNO_INVALID) { + if ((ret = __memp_fget(mpf, &argp->npgno, + ip, NULL, DB_MPOOL_EDIT, &np)) != 0) { + np = NULL; + goto done; + } + if (LOG_COMPARE(lsnp, &LSN(np)) == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &np); + PREV_PGNO(np) = argp->left; + np->lsn = argp->nlsn; + } + } + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: /* Free any pages that are left. */ + if (lp != NULL && (t_ret = __memp_fput(mpf, + ip, lp, file_dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if (np != NULL && (t_ret = __memp_fput(mpf, + ip, np, file_dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if (rp != NULL && (t_ret = __memp_fput(mpf, + ip, rp, file_dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if (pp != NULL && (t_ret = __memp_fput(mpf, + ip, pp, file_dbp->priority)) != 0 && ret == 0) + ret = t_ret; + + /* Free any allocated space. */ + if (_lp != NULL) + __os_free(env, _lp); + if (_rp != NULL) + __os_free(env, _rp); + if (sp != NULL) + __os_free(env, sp); + + REC_CLOSE; +} +/* + * __bam_split_recover -- + * Recovery function for split. + * + * PUBLIC: int __bam_split_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_split_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_split_42_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp; + db_pgno_t pgno, root_pgno; + u_int32_t ptype; + int cmp, l_update, p_update, r_update, rc, ret, rootsplit, t_ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__bam_split_print); + + _lp = lp = np = pp = _rp = rp = NULL; + sp = NULL; + + REC_INTRO(__bam_split_42_read, ip, 0); + + /* + * There are two kinds of splits that we have to recover from. The + * first is a root-page split, where the root page is split from a + * leaf page into an internal page and two new leaf pages are created. + * The second is where a page is split into two pages, and a new key + * is inserted into the parent page. + * + * DBTs are not aligned in log records, so we need to copy the page + * so that we can access fields within it throughout this routine. + * Although we could hardcode the unaligned copies in this routine, + * we will be calling into regular btree functions with this page, + * so it's got to be aligned. Copying it into allocated memory is + * the only way to guarantee this. + */ + if ((ret = __os_malloc(env, argp->pg.size, &sp)) != 0) + goto out; + memcpy(sp, argp->pg.data, argp->pg.size); + + pgno = PGNO(sp); + root_pgno = argp->root_pgno; + rootsplit = root_pgno != PGNO_INVALID; + REC_FGET(mpf, ip, argp->left, &lp, right); +right: REC_FGET(mpf, ip, argp->right, &rp, redo); + +redo: if (DB_REDO(op)) { + l_update = r_update = p_update = 0; + /* + * Decide if we need to resplit the page. + * + * If this is a root split, then the root has to exist unless + * we have truncated it due to a future deallocation. + */ + if (rootsplit) { + REC_FGET(mpf, ip, root_pgno, &pp, do_left); + cmp = LOG_COMPARE(&LSN(pp), &LSN(argp->pg.data)); + CHECK_LSN(env, op, + cmp, &LSN(pp), &LSN(argp->pg.data)); + p_update = cmp == 0; + } + +do_left: if (lp != NULL) { + cmp = LOG_COMPARE(&LSN(lp), &argp->llsn); + CHECK_LSN(env, op, cmp, &LSN(lp), &argp->llsn); + if (cmp == 0) + l_update = 1; + } + + if (rp != NULL) { + cmp = LOG_COMPARE(&LSN(rp), &argp->rlsn); + CHECK_LSN(env, op, cmp, &LSN(rp), &argp->rlsn); + if (cmp == 0) + r_update = 1; + } + + if (!p_update && !l_update && !r_update) + goto check_next; + + /* Allocate and initialize new left/right child pages. */ + if ((ret = __os_malloc(env, file_dbp->pgsize, &_lp)) != 0 || + (ret = __os_malloc(env, file_dbp->pgsize, &_rp)) != 0) + goto out; + if (rootsplit) { + P_INIT(_lp, file_dbp->pgsize, argp->left, + PGNO_INVALID, + ISINTERNAL(sp) ? PGNO_INVALID : argp->right, + LEVEL(sp), TYPE(sp)); + P_INIT(_rp, file_dbp->pgsize, argp->right, + ISINTERNAL(sp) ? PGNO_INVALID : argp->left, + PGNO_INVALID, LEVEL(sp), TYPE(sp)); + } else { + P_INIT(_lp, file_dbp->pgsize, PGNO(sp), + ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp), + ISINTERNAL(sp) ? PGNO_INVALID : argp->right, + LEVEL(sp), TYPE(sp)); + P_INIT(_rp, file_dbp->pgsize, argp->right, + ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno, + ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp), + LEVEL(sp), TYPE(sp)); + } + + /* Split the page. */ + if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 || + (ret = __bam_copy(file_dbp, sp, _rp, argp->indx, + NUM_ENT(sp))) != 0) + goto out; + + if (l_update) { + REC_DIRTY(mpf, ip, file_dbp->priority, &lp); + memcpy(lp, _lp, file_dbp->pgsize); + lp->lsn = *lsnp; + if ((ret = __memp_fput(mpf, + ip, lp, file_dbp->priority)) != 0) + goto out; + lp = NULL; + } + + if (r_update) { + REC_DIRTY(mpf, ip, file_dbp->priority, &rp); + memcpy(rp, _rp, file_dbp->pgsize); + rp->lsn = *lsnp; + if ((ret = __memp_fput(mpf, + ip, rp, file_dbp->priority)) != 0) + goto out; + rp = NULL; + } + + /* + * If the parent page is wrong, update it. This is of interest + * only if it was a root split, since root splits create parent + * pages. All other splits modify a parent page, but those are + * separately logged and recovered. + */ + if (rootsplit && p_update) { + if (IS_BTREE_PAGE(sp)) { + ptype = P_IBTREE; + rc = argp->opflags & SPL_NRECS ? 1 : 0; + } else { + ptype = P_IRECNO; + rc = 1; + } + + REC_DIRTY(mpf, ip, file_dbp->priority, &pp); + P_INIT(pp, file_dbp->pgsize, root_pgno, + PGNO_INVALID, PGNO_INVALID, _lp->level + 1, ptype); + RE_NREC_SET(pp, rc ? __bam_total(file_dbp, _lp) + + __bam_total(file_dbp, _rp) : 0); + + pp->lsn = *lsnp; + if ((ret = __memp_fput(mpf, + ip, pp, file_dbp->priority)) != 0) + goto out; + pp = NULL; + } + +check_next: /* + * Finally, redo the next-page link if necessary. This is of + * interest only if it wasn't a root split -- inserting a new + * page in the tree requires that any following page have its + * previous-page pointer updated to our new page. The next + * page must exist because we're redoing the operation. + */ + if (!rootsplit && argp->npgno != PGNO_INVALID) { + if ((ret = __memp_fget(mpf, &argp->npgno, + ip, NULL, 0, &np)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr( + file_dbp, argp->npgno, ret); + goto out; + } else + goto done; + } + cmp = LOG_COMPARE(&LSN(np), &argp->nlsn); + CHECK_LSN(env, op, cmp, &LSN(np), &argp->nlsn); + if (cmp == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &np); + PREV_PGNO(np) = argp->right; + np->lsn = *lsnp; + if ((ret = __memp_fput(mpf, ip, + np, file_dbp->priority)) != 0) + goto out; + np = NULL; + } + } + } else { + /* + * If the split page is wrong, replace its contents with the + * logged page contents. If the page doesn't exist, it means + * that the create of the page never happened, nor did any of + * the adds onto the page that caused the split, and there's + * really no undo-ing to be done. + */ + if ((ret = __memp_fget(mpf, &pgno, ip, NULL, + DB_MPOOL_EDIT, &pp)) != 0) { + pp = NULL; + goto lrundo; + } + if (LOG_COMPARE(lsnp, &LSN(pp)) == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pp); + memcpy(pp, argp->pg.data, argp->pg.size); + if ((ret = __memp_fput(mpf, + ip, pp, file_dbp->priority)) != 0) + goto out; + pp = NULL; + } + + /* + * If it's a root split and the left child ever existed, update + * its LSN. (If it's not a root split, we've updated the left + * page already -- it's the same as the split page.) If the + * right child ever existed, root split or not, update its LSN. + * The undo of the page allocation(s) will restore them to the + * free list. + */ +lrundo: if ((rootsplit && lp != NULL) || rp != NULL) { + if (rootsplit && lp != NULL && + LOG_COMPARE(lsnp, &LSN(lp)) == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &lp); + lp->lsn = argp->llsn; + if ((ret = __memp_fput(mpf, ip, + lp, file_dbp->priority)) != 0) + goto out; + lp = NULL; + } + if (rp != NULL && + LOG_COMPARE(lsnp, &LSN(rp)) == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &rp); + rp->lsn = argp->rlsn; + if ((ret = __memp_fput(mpf, ip, + rp, file_dbp->priority)) != 0) + goto out; + rp = NULL; + } + } + + /* + * Finally, undo the next-page link if necessary. This is of + * interest only if it wasn't a root split -- inserting a new + * page in the tree requires that any following page have its + * previous-page pointer updated to our new page. Since it's + * possible that the next-page never existed, we ignore it as + * if there's nothing to undo. + */ + if (!rootsplit && argp->npgno != PGNO_INVALID) { + if ((ret = __memp_fget(mpf, &argp->npgno, + ip, NULL, DB_MPOOL_EDIT, &np)) != 0) { + np = NULL; + goto done; + } + if (LOG_COMPARE(lsnp, &LSN(np)) == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &np); + PREV_PGNO(np) = argp->left; + np->lsn = argp->nlsn; + if (__memp_fput(mpf, + ip, np, file_dbp->priority)) + goto out; + np = NULL; + } + } + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: /* Free any pages that weren't dirtied. */ + if (pp != NULL && (t_ret = __memp_fput(mpf, + ip, pp, file_dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if (lp != NULL && (t_ret = __memp_fput(mpf, + ip, lp, file_dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if (np != NULL && (t_ret = __memp_fput(mpf, + ip, np, file_dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if (rp != NULL && (t_ret = __memp_fput(mpf, + ip, rp, file_dbp->priority)) != 0 && ret == 0) + ret = t_ret; + + /* Free any allocated space. */ + if (_lp != NULL) + __os_free(env, _lp); + if (_rp != NULL) + __os_free(env, _rp); + if (sp != NULL) + __os_free(env, sp); + + REC_CLOSE; +} + +/* + * __bam_rsplit_recover -- + * Recovery function for a reverse split. + * + * PUBLIC: int __bam_rsplit_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_rsplit_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_rsplit_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_LSN copy_lsn; + DB_MPOOLFILE *mpf; + PAGE *pagep; + db_pgno_t pgno, root_pgno; + db_recno_t rcnt; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__bam_rsplit_print); + REC_INTRO(__bam_rsplit_read, ip, 1); + + /* Fix the root page. */ + pgno = root_pgno = argp->root_pgno; + if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, pgno, ret); + goto out; + } else + goto do_page; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->rootlsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->rootlsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + /* + * Copy the new data to the root page. If it is not now a + * leaf page we need to restore the record number. We could + * try to determine if C_RECNUM was set in the btree, but + * that's not really necessary since the field is not used + * otherwise. + */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + rcnt = RE_NREC(pagep); + memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size); + if (LEVEL(pagep) > LEAFLEVEL) + RE_NREC_SET(pagep, rcnt); + pagep->pgno = root_pgno; + pagep->lsn = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to undo update described. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, root_pgno, + argp->nrec, PGNO_INVALID, pagep->level + 1, + IS_BTREE_PAGE(pagep) ? P_IBTREE : P_IRECNO); + if ((ret = __db_pitem(dbc, pagep, 0, + argp->rootent.size, &argp->rootent, NULL)) != 0) + goto out; + pagep->lsn = argp->rootlsn; + } + if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + goto out; + pagep = NULL; + +do_page: + /* + * Fix the page copied over the root page. It's possible that the + * page never made it to disk, or was truncated so if the page + * doesn't exist, it's okay and there's nothing further to do. + */ + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } else + goto done; + } + (void)__ua_memcpy(©_lsn, &LSN(argp->pgdbt.data), sizeof(DB_LSN)); + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + pagep->lsn = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to undo update described. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size); + } + if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, dbc->priority); + REC_CLOSE; +} + +/* + * __bam_adj_recover -- + * Recovery function for adj. + * + * PUBLIC: int __bam_adj_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_adj_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_adj_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__bam_adj_print); + REC_INTRO(__bam_adj_read, ip, 1); + + /* Get the page; if it never existed and we're undoing, we're done. */ + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } else + goto done; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if ((ret = __bam_adjindx(dbc, + pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0) + goto out; + + LSN(pagep) = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to undo update described. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if ((ret = __bam_adjindx(dbc, + pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0) + goto out; + + LSN(pagep) = argp->lsn; + } + if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, dbc->priority); + REC_CLOSE; +} + +/* + * __bam_cadjust_recover -- + * Recovery function for the adjust of a count change in an internal + * page. + * + * PUBLIC: int __bam_cadjust_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_cadjust_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_cadjust_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__bam_cadjust_print); + REC_INTRO(__bam_cadjust_read, ip, 0); + + /* Get the page; if it never existed and we're undoing, we're done. */ + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } else + goto done; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + if (IS_BTREE_PAGE(pagep)) { + GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs += + argp->adjust; + if (argp->opflags & CAD_UPDATEROOT) + RE_NREC_ADJ(pagep, argp->adjust); + } else { + GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs += + argp->adjust; + if (argp->opflags & CAD_UPDATEROOT) + RE_NREC_ADJ(pagep, argp->adjust); + } + + LSN(pagep) = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to undo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + if (IS_BTREE_PAGE(pagep)) { + GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs -= + argp->adjust; + if (argp->opflags & CAD_UPDATEROOT) + RE_NREC_ADJ(pagep, -(argp->adjust)); + } else { + GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs -= + argp->adjust; + if (argp->opflags & CAD_UPDATEROOT) + RE_NREC_ADJ(pagep, -(argp->adjust)); + } + LSN(pagep) = argp->lsn; + } + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * __bam_cdel_recover -- + * Recovery function for the intent-to-delete of a cursor record. + * + * PUBLIC: int __bam_cdel_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_cdel_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_cdel_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + u_int32_t indx; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__bam_cdel_print); + REC_INTRO(__bam_cdel_read, ip, 0); + + /* Get the page; if it never existed and we're undoing, we're done. */ + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } else + goto done; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0); + B_DSET(GET_BKEYDATA(file_dbp, pagep, indx)->type); + + LSN(pagep) = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to undo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0); + B_DCLR(GET_BKEYDATA(file_dbp, pagep, indx)->type); + + if ((ret = __bam_ca_delete( + file_dbp, argp->pgno, argp->indx, 0, NULL)) != 0) + goto out; + + LSN(pagep) = argp->lsn; + } + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * __bam_repl_recover -- + * Recovery function for page item replacement. + * + * PUBLIC: int __bam_repl_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_repl_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_repl_args *argp; + DB_THREAD_INFO *ip; + BKEYDATA *bk; + DB *file_dbp; + DBC *dbc; + DBT dbt; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, ret; + u_int32_t len; + u_int8_t *dp, *p; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__bam_repl_print); + REC_INTRO(__bam_repl_read, ip, 1); + + /* Get the page; if it never existed and we're undoing, we're done. */ + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } else + goto done; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + /* + * Need to redo update described. + * + * Re-build the replacement item. + */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + bk = GET_BKEYDATA(file_dbp, pagep, argp->indx); + dp = bk->data; + len = bk->len; + memset(&dbt, 0, sizeof(dbt)); + dbt.size = argp->prefix + argp->suffix + argp->repl.size; + if ((ret = __os_malloc(env, dbt.size, &dbt.data)) != 0) + goto out; + p = dbt.data; + memcpy(p, dp, argp->prefix); + p += argp->prefix; + memcpy(p, argp->repl.data, argp->repl.size); + p += argp->repl.size; + memcpy(p, dp + (len - argp->suffix), argp->suffix); + + ret = __bam_ritem(dbc, pagep, argp->indx, &dbt, 0); + __os_free(env, dbt.data); + if (ret != 0) + goto out; + + LSN(pagep) = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* + * Need to undo update described. + * + * Re-build the original item. + */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + bk = GET_BKEYDATA(file_dbp, pagep, argp->indx); + dp = bk->data; + len = bk->len; + memset(&dbt, 0, sizeof(dbt)); + dbt.size = argp->prefix + argp->suffix + argp->orig.size; + if ((ret = __os_malloc(env, dbt.size, &dbt.data)) != 0) + goto out; + p = dbt.data; + memcpy(p, dp, argp->prefix); + p += argp->prefix; + memcpy(p, argp->orig.data, argp->orig.size); + p += argp->orig.size; + memcpy(p, dp + (len - argp->suffix), argp->suffix); + + ret = __bam_ritem(dbc, pagep, argp->indx, &dbt, 0); + __os_free(env, dbt.data); + if (ret != 0) + goto out; + + /* Reset the deleted flag, if necessary. */ + if (argp->isdeleted) + B_DSET(GET_BKEYDATA(file_dbp, pagep, argp->indx)->type); + + LSN(pagep) = argp->lsn; + } + if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, dbc->priority); + REC_CLOSE; +} + +/* + * __bam_irep_recover -- + * Recovery function for internal page item replacement. + * + * PUBLIC: int __bam_irep_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_irep_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_irep_args *argp; + BINTERNAL *bn; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__bam_irep_print); + REC_INTRO(__bam_irep_read, ip, 1); + + /* Get the page; if it never existed and we're undoing, we're done. */ + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } else + goto done; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + bn = (BINTERNAL *)argp->hdr.data; + if ((ret = __bam_ritem_nolog(dbc, + pagep, argp->indx, &argp->hdr, &argp->data, bn->type)) != 0) + goto out; + LSN(pagep) = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + bn = (BINTERNAL *)argp->old.data; + if ((ret = __bam_ritem_nolog(dbc, + pagep, argp->indx, &argp->old, NULL, bn->type)) != 0) + goto out; + LSN(pagep) = argp->lsn; + } + + if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, dbc->priority); + REC_CLOSE; +} + +/* + * __bam_root_recover -- + * Recovery function for setting the root page on the meta-data page. + * + * PUBLIC: int __bam_root_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_root_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_root_args *argp; + DB_THREAD_INFO *ip; + BTMETA *meta; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + meta = NULL; + REC_PRINT(__bam_root_print); + REC_INTRO(__bam_root_read, ip, 0); + + if ((ret = __memp_fget(mpf, &argp->meta_pgno, ip, NULL, + 0, &meta)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->meta_pgno, ret); + goto out; + } else + goto done; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); + cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); + CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); + CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &meta); + meta->root = argp->root_pgno; + meta->dbmeta.lsn = *lsnp; + ((BTREE *)file_dbp->bt_internal)->bt_root = meta->root; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Nothing to undo except lsn. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &meta); + meta->dbmeta.lsn = argp->meta_lsn; + } + if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) + goto out; + meta = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (meta != NULL) + (void)__memp_fput(mpf, ip, meta, file_dbp->priority); + REC_CLOSE; +} + +/* + * __bam_curadj_recover -- + * Transaction abort function to undo cursor adjustments. + * This should only be triggered by subtransaction aborts. + * + * PUBLIC: int __bam_curadj_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_curadj_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_curadj_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + int ret; + + COMPQUIET(mpf, NULL); + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__bam_curadj_print); + REC_INTRO(__bam_curadj_read, ip, 1); + + ret = 0; + if (op != DB_TXN_ABORT) + goto done; + + switch (argp->mode) { + case DB_CA_DI: + if ((ret = __bam_ca_di(dbc, argp->from_pgno, + argp->from_indx, -(int)argp->first_indx)) != 0) + goto out; + break; + case DB_CA_DUP: + if ((ret = __bam_ca_undodup(file_dbp, argp->first_indx, + argp->from_pgno, argp->from_indx, argp->to_indx)) != 0) + goto out; + break; + + case DB_CA_RSPLIT: + if ((ret = + __bam_ca_rsplit(dbc, argp->to_pgno, argp->from_pgno)) != 0) + goto out; + break; + + case DB_CA_SPLIT: + if ((ret = __bam_ca_undosplit(file_dbp, argp->from_pgno, + argp->to_pgno, argp->left_pgno, argp->from_indx)) != 0) + goto out; + break; + } + +done: *lsnp = argp->prev_lsn; +out: REC_CLOSE; +} + +/* + * __bam_rcuradj_recover -- + * Transaction abort function to undo cursor adjustments in rrecno. + * This should only be triggered by subtransaction aborts. + * + * PUBLIC: int __bam_rcuradj_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_rcuradj_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_rcuradj_args *argp; + DB_THREAD_INFO *ip; + BTREE_CURSOR *cp; + DB *file_dbp; + DBC *dbc, *rdbc; + DB_MPOOLFILE *mpf; + int ret, t_ret; + + COMPQUIET(mpf, NULL); + + ip = ((DB_TXNHEAD *)info)->thread_info; + rdbc = NULL; + REC_PRINT(__bam_rcuradj_print); + REC_INTRO(__bam_rcuradj_read, ip, 1); + + ret = t_ret = 0; + + if (op != DB_TXN_ABORT) + goto done; + + /* + * We don't know whether we're in an offpage dup set, and + * thus don't know whether the dbc REC_INTRO has handed us is + * of a reasonable type. It's certainly unset, so if this is + * an offpage dup set, we don't have an OPD cursor. The + * simplest solution is just to allocate a whole new cursor + * for our use; we're only really using it to hold pass some + * state into __ram_ca, and this way we don't need to make + * this function know anything about how offpage dups work. + */ + if ((ret = __db_cursor_int(file_dbp, NULL, + NULL, DB_RECNO, argp->root, DB_RECOVER, NULL, &rdbc)) != 0) + goto out; + + cp = (BTREE_CURSOR *)rdbc->internal; + F_SET(cp, C_RENUMBER); + cp->recno = argp->recno; + + switch (argp->mode) { + case CA_DELETE: + /* + * The way to undo a delete is with an insert. Since + * we're undoing it, the delete flag must be set. + */ + F_SET(cp, C_DELETED); + F_SET(cp, C_RENUMBER); /* Just in case. */ + cp->order = argp->order; + if ((ret = __ram_ca(rdbc, CA_ICURRENT, NULL)) != 0) + goto out; + break; + case CA_IAFTER: + case CA_IBEFORE: + case CA_ICURRENT: + /* + * The way to undo an insert is with a delete. The delete + * flag is unset to start with. + */ + F_CLR(cp, C_DELETED); + cp->order = INVALID_ORDER; + if ((ret = __ram_ca(rdbc, CA_DELETE, NULL)) != 0) + goto out; + break; + } + +done: *lsnp = argp->prev_lsn; +out: if (rdbc != NULL && (t_ret = __dbc_close(rdbc)) != 0 && ret == 0) + ret = t_ret; + REC_CLOSE; +} + +/* + * __bam_merge_44_recover -- + * Recovery function for merge. + * + * PUBLIC: int __bam_merge_44_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_merge_44_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_merge_44_args *argp; + DB_THREAD_INFO *ip; + BKEYDATA *bk; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + db_indx_t indx, *ninp, *pinp; + u_int32_t size; + u_int8_t *bp; + int cmp_n, cmp_p, i, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__bam_merge_44_print); + REC_INTRO(__bam_merge_44_read, ip, 1); + + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } else + goto next; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); + CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn); + + if (cmp_p == 0 && DB_REDO(op)) { + /* + * If the header is provided the page is empty, copy the + * needed data. + */ + DB_ASSERT(env, argp->hdr.size == 0 || NUM_ENT(pagep) == 0); + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if (argp->hdr.size != 0) { + P_INIT(pagep, file_dbp->pgsize, pagep->pgno, + PREV_PGNO(argp->hdr.data), + NEXT_PGNO(argp->hdr.data), + LEVEL(argp->hdr.data), TYPE(argp->hdr.data)); + } + if (TYPE(pagep) == P_OVERFLOW) { + OV_REF(pagep) = OV_REF(argp->hdr.data); + OV_LEN(pagep) = OV_LEN(argp->hdr.data); + bp = (u_int8_t *) pagep + P_OVERHEAD(file_dbp); + memcpy(bp, argp->data.data, argp->data.size); + } else { + /* Copy the data segment. */ + bp = (u_int8_t *)pagep + + (db_indx_t)(HOFFSET(pagep) - argp->data.size); + memcpy(bp, argp->data.data, argp->data.size); + + /* Copy index table offset past the current entries. */ + pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep); + ninp = argp->ind.data; + for (i = 0; + i < (int)(argp->ind.size / sizeof(*ninp)); i++) + *pinp++ = *ninp++ + - (file_dbp->pgsize - HOFFSET(pagep)); + HOFFSET(pagep) -= argp->data.size; + NUM_ENT(pagep) += i; + } + pagep->lsn = *lsnp; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* + * Since logging is logical at the page level + * we cannot just truncate the data space. Delete + * the proper number of items from the logical end + * of the page. + */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + for (i = 0; i < (int)(argp->ind.size / sizeof(*ninp)); i++) { + indx = NUM_ENT(pagep) - 1; + if (P_INP(file_dbp, pagep)[indx] == + P_INP(file_dbp, pagep)[indx - P_INDX]) { + NUM_ENT(pagep)--; + continue; + } + switch (TYPE(pagep)) { + case P_LBTREE: + case P_LRECNO: + case P_LDUP: + bk = GET_BKEYDATA(file_dbp, pagep, indx); + size = BITEM_SIZE(bk); + break; + + case P_IBTREE: + size = BINTERNAL_SIZE( + GET_BINTERNAL(file_dbp, pagep, indx)->len); + break; + case P_IRECNO: + size = RINTERNAL_SIZE; + break; + + default: + ret = __db_pgfmt(env, PGNO(pagep)); + goto out; + } + if ((ret = + __db_ditem(dbc, pagep, indx, size)) != 0) + goto out; + } + if (argp->ind.size == 0) + HOFFSET(pagep) = file_dbp->pgsize; + pagep->lsn = argp->lsn; + } + + if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + goto out; + +next: if ((ret = __memp_fget(mpf, &argp->npgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } else + goto done; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nlsn); + CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->nlsn); + + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to truncate the page. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + HOFFSET(pagep) = file_dbp->pgsize; + NUM_ENT(pagep) = 0; + pagep->lsn = *lsnp; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to put the data back on the page. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if (TYPE(pagep) == P_OVERFLOW) { + OV_REF(pagep) = OV_REF(argp->hdr.data); + OV_LEN(pagep) = OV_LEN(argp->hdr.data); + bp = (u_int8_t *) pagep + P_OVERHEAD(file_dbp); + memcpy(bp, argp->data.data, argp->data.size); + } else { + bp = (u_int8_t *)pagep + + (db_indx_t)(HOFFSET(pagep) - argp->data.size); + memcpy(bp, argp->data.data, argp->data.size); + + /* Copy index table. */ + pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep); + ninp = argp->ind.data; + for (i = 0; + i < (int)(argp->ind.size / sizeof(*ninp)); i++) + *pinp++ = *ninp++; + HOFFSET(pagep) -= argp->data.size; + NUM_ENT(pagep) = i; + } + pagep->lsn = argp->nlsn; + } + + if ((ret = __memp_fput(mpf, + ip, pagep, dbc->priority)) != 0) + goto out; +done: + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __bam_relink_43_recover -- + * Recovery function for relink. + * + * PUBLIC: int __bam_relink_43_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__bam_relink_43_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __bam_relink_43_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__bam_relink_43_print); + REC_INTRO(__bam_relink_43_read, ip, 0); + + /* + * There are up to three pages we need to check -- the page, and the + * previous and next pages, if they existed. For a page add operation, + * the current page is the result of a split and is being recovered + * elsewhere, so all we need do is recover the next page. + */ + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } else + goto next2; + } + + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); + if (cmp_p == 0 && DB_REDO(op)) { + /* Redo the relink. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->lsn = *lsnp; + } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { + /* Undo the relink. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->next_pgno = argp->next; + pagep->prev_pgno = argp->prev; + pagep->lsn = argp->lsn; + } + if ((ret = __memp_fput(mpf, + ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +next2: if ((ret = __memp_fget(mpf, &argp->next, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->next, ret); + goto out; + } else + goto prev; + } + + modified = 0; + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next); + if (cmp_p == 0 && DB_REDO(op)) { + /* Redo the remove or undo the add. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->prev_pgno = argp->prev; + modified = 1; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Undo the remove or redo the add. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->prev_pgno = argp->pgno; + modified = 1; + } + if (modified) { + if (DB_UNDO(op)) + pagep->lsn = argp->lsn_next; + else + pagep->lsn = *lsnp; + } + if ((ret = __memp_fput(mpf, + ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +prev: if ((ret = __memp_fget(mpf, &argp->prev, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->prev, ret); + goto out; + } else + goto done; + } + + modified = 0; + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev); + if (cmp_p == 0 && DB_REDO(op)) { + /* Redo the relink. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->next_pgno = argp->next; + modified = 1; + } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { + /* Undo the relink. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->next_pgno = argp->pgno; + modified = 1; + } + if (modified) { + if (DB_UNDO(op)) + pagep->lsn = argp->lsn_prev; + else + pagep->lsn = *lsnp; + } + if ((ret = __memp_fput(mpf, + ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} diff --git a/src/btree/bt_reclaim.c b/src/btree/bt_reclaim.c new file mode 100644 index 00000000..c17a0d16 --- /dev/null +++ b/src/btree/bt_reclaim.c @@ -0,0 +1,98 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" + +/* + * __bam_reclaim -- + * Free a database. + * + * PUBLIC: int __bam_reclaim __P((DB *, DB_THREAD_INFO *, DB_TXN *, u_int32_t)); + */ +int +__bam_reclaim(dbp, ip, txn, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + u_int32_t flags; +{ + DBC *dbc; + DB_LOCK meta_lock; + int ret, t_ret; + + /* Acquire a cursor. */ + if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) + return (ret); + + /* Write lock the metapage for deallocations. */ + if ((ret = __db_lget(dbc, + 0, PGNO_BASE_MD, DB_LOCK_WRITE, 0, &meta_lock)) != 0) + goto err; + + /* Avoid locking every page, we have the handle locked exclusive. */ + F_SET(dbc, DBC_DONTLOCK); + + /* Walk the tree, freeing pages. */ + ret = __bam_traverse(dbc, DB_LOCK_WRITE, + PGNO_INVALID, __db_reclaim_callback, (void*)(uintptr_t)flags); + + if ((t_ret = __TLPUT(dbc, meta_lock)) != 0 && ret == 0) + ret = t_ret; + + /* Discard the cursor. */ +err: if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __bam_truncate -- + * Truncate a database. + * + * PUBLIC: int __bam_truncate __P((DBC *, u_int32_t *)); + */ +int +__bam_truncate(dbc, countp) + DBC *dbc; + u_int32_t *countp; +{ + u_int32_t count; + int ret; + +#ifdef HAVE_COMPRESSION + u_int32_t comp_count; + + comp_count = 0; + if (DB_IS_COMPRESSED(dbc->dbp) && + (ret = __bam_compress_count(dbc, NULL, &comp_count)) != 0) + return (ret); +#endif + + count = 0; + + /* Walk the tree, freeing pages. */ + ret = __bam_traverse(dbc, + DB_LOCK_WRITE, PGNO_INVALID, __db_truncate_callback, &count); + +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbc->dbp)) { + if (countp != NULL) + *countp = comp_count; + } else +#endif + if (countp != NULL) + *countp = count; + + return (ret); +} diff --git a/src/btree/bt_recno.c b/src/btree/bt_recno.c new file mode 100644 index 00000000..d9162ab1 --- /dev/null +++ b/src/btree/bt_recno.c @@ -0,0 +1,1427 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +static int __ram_add __P((DBC *, db_recno_t *, DBT *, u_int32_t, u_int32_t)); +static int __ram_source __P((DB *)); +static int __ram_sread __P((DBC *, db_recno_t)); +static int __ram_update __P((DBC *, db_recno_t, int)); +static int __ram_ca_getorder + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +static int __ram_ca_setorder + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +/* + * In recno, there are two meanings to the on-page "deleted" flag. If we're + * re-numbering records, it means the record was implicitly created. We skip + * over implicitly created records if doing a cursor "next" or "prev", and + * return DB_KEYEMPTY if they're explicitly requested.. If not re-numbering + * records, it means that the record was implicitly created, or was deleted. + * We skip over implicitly created or deleted records if doing a cursor "next" + * or "prev", and return DB_KEYEMPTY if they're explicitly requested. + * + * If we're re-numbering records, then we have to detect in the cursor that + * a record was deleted, and adjust the cursor as necessary on the next get. + * If we're not re-numbering records, then we can detect that a record has + * been deleted by looking at the actual on-page record, so we completely + * ignore the cursor's delete flag. This is different from the B+tree code. + * It also maintains whether the cursor references a deleted record in the + * cursor, and it doesn't always check the on-page value. + */ +#define CD_SET(cp) { \ + if (F_ISSET(cp, C_RENUMBER)) \ + F_SET(cp, C_DELETED); \ +} +#define CD_CLR(cp) { \ + if (F_ISSET(cp, C_RENUMBER)) { \ + F_CLR(cp, C_DELETED); \ + cp->order = INVALID_ORDER; \ + } \ +} +#define CD_ISSET(cp) \ + (F_ISSET(cp, C_RENUMBER) && F_ISSET(cp, C_DELETED) ? 1 : 0) + +/* + * Macros for comparing the ordering of two cursors. + * cp1 comes before cp2 iff one of the following holds: + * cp1's recno is less than cp2's recno + * recnos are equal, both deleted, and cp1's order is less than cp2's + * recnos are equal, cp1 deleted, and cp2 not deleted + */ +#define C_LESSTHAN(cp1, cp2) \ + (((cp1)->recno < (cp2)->recno) || \ + (((cp1)->recno == (cp2)->recno) && \ + ((CD_ISSET((cp1)) && CD_ISSET((cp2)) && (cp1)->order < (cp2)->order) || \ + (CD_ISSET((cp1)) && !CD_ISSET((cp2)))))) + +/* + * cp1 is equal to cp2 iff their recnos and delete flags are identical, + * and if the delete flag is set their orders are also identical. + */ +#define C_EQUAL(cp1, cp2) \ + ((cp1)->recno == (cp2)->recno && CD_ISSET((cp1)) == CD_ISSET((cp2)) && \ + (!CD_ISSET((cp1)) || (cp1)->order == (cp2)->order)) + +/* + * Do we need to log the current cursor adjustment? + */ +#define CURADJ_LOG(dbc) \ + (DBC_LOGGING((dbc)) && (dbc)->txn != NULL && (dbc)->txn->parent != NULL) + +/* + * After a search, copy the found page into the cursor, discarding any + * currently held lock. + */ +#define STACK_TO_CURSOR(cp, ret) { \ + int __t_ret; \ + (cp)->page = (cp)->csp->page; \ + (cp)->pgno = (cp)->csp->page->pgno; \ + (cp)->indx = (cp)->csp->indx; \ + if ((__t_ret = __TLPUT(dbc, (cp)->lock)) != 0 && (ret) == 0) \ + ret = __t_ret; \ + (cp)->lock = (cp)->csp->lock; \ + (cp)->lock_mode = (cp)->csp->lock_mode; \ +} + +/* + * __ram_open -- + * Recno open function. + * + * PUBLIC: int __ram_open __P((DB *, DB_THREAD_INFO *, + * PUBLIC: DB_TXN *, const char *, db_pgno_t, u_int32_t)); + */ +int +__ram_open(dbp, ip, txn, name, base_pgno, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name; + db_pgno_t base_pgno; + u_int32_t flags; +{ + BTREE *t; + DBC *dbc; + int ret, t_ret; + + COMPQUIET(name, NULL); + t = dbp->bt_internal; + + /* Start up the tree. */ + if ((ret = __bam_read_root(dbp, ip, txn, base_pgno, flags)) != 0) + return (ret); + + /* + * If the user specified a source tree, open it and map it in. + * + * !!! + * We don't complain if the user specified transactions or threads. + * It's possible to make it work, but you'd better know what you're + * doing! + */ + if (t->re_source != NULL && (ret = __ram_source(dbp)) != 0) + return (ret); + + /* If we're snapshotting an underlying source file, do it now. */ + if (F_ISSET(dbp, DB_AM_SNAPSHOT)) { + /* Allocate a cursor. */ + if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0) + return (ret); + + /* Do the snapshot. */ + if ((ret = __ram_update(dbc, + DB_MAX_RECORDS, 0)) != 0 && ret == DB_NOTFOUND) + ret = 0; + + /* Discard the cursor. */ + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + } + + return (ret); +} + +/* + * __ram_append -- + * Recno append function. + * + * PUBLIC: int __ram_append __P((DBC *, DBT *, DBT *)); + */ +int +__ram_append(dbc, key, data) + DBC *dbc; + DBT *key, *data; +{ + BTREE_CURSOR *cp; + int ret; + + cp = (BTREE_CURSOR *)dbc->internal; + + /* + * Make sure we've read in all of the backing source file. If + * we found the record or it simply didn't exist, add the + * user's record. + */ + ret = __ram_update(dbc, DB_MAX_RECORDS, 0); + if (ret == 0 || ret == DB_NOTFOUND) + ret = __ram_add(dbc, &cp->recno, data, DB_APPEND, 0); + + /* Return the record number. */ + if (ret == 0 && key != NULL) + ret = __db_retcopy(dbc->env, key, &cp->recno, + sizeof(cp->recno), &dbc->rkey->data, &dbc->rkey->ulen); + + if (!DB_RETOK_DBCPUT(ret)) + F_SET(dbc, DBC_ERROR); + return (ret); +} + +/* + * __ramc_del -- + * Recno DBC->del function. + * + * PUBLIC: int __ramc_del __P((DBC *, u_int32_t)); + */ +int +__ramc_del(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + BKEYDATA bk; + BTREE *t; + BTREE_CURSOR *cp; + DB *dbp; + DBT hdr, data; + DB_LOCK next_lock, prev_lock; + DB_LSN lsn; + db_pgno_t npgno, ppgno, save_npgno, save_ppgno; + int exact, nc, ret, stack, t_ret; + + dbp = dbc->dbp; + cp = (BTREE_CURSOR *)dbc->internal; + t = dbp->bt_internal; + stack = 0; + save_npgno = save_ppgno = PGNO_INVALID; + LOCK_INIT(next_lock); + LOCK_INIT(prev_lock); + COMPQUIET(flags, 0); + + /* + * The semantics of cursors during delete are as follows: in + * non-renumbering recnos, records are replaced with a marker + * containing a delete flag. If the record referenced by this cursor + * has already been deleted, we will detect that as part of the delete + * operation, and fail. + * + * In renumbering recnos, cursors which represent deleted items + * are flagged with the C_DELETED flag, and it is an error to + * call c_del a second time without an intervening cursor motion. + */ + if (CD_ISSET(cp)) + return (DB_KEYEMPTY); + + /* Search the tree for the key; delete only deletes exact matches. */ +retry: if ((ret = __bam_rsearch(dbc, &cp->recno, SR_DELETE, 1, &exact)) != 0) + goto err; + if (!exact) { + ret = DB_NOTFOUND; + goto err; + } + stack = 1; + + /* Copy the page into the cursor. */ + STACK_TO_CURSOR(cp, ret); + if (ret != 0) + goto err; + + /* + * If re-numbering records, the on-page deleted flag can only mean + * that this record was implicitly created. Applications aren't + * permitted to delete records they never created, return an error. + * + * If not re-numbering records, the on-page deleted flag means that + * this record was implicitly created, or, was deleted at some time. + * The former is an error because applications aren't permitted to + * delete records they never created, the latter is an error because + * if the record was "deleted", we could never have found it. + */ + if (B_DISSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type)) { + ret = DB_KEYEMPTY; + goto err; + } + + if (F_ISSET(cp, C_RENUMBER)) { + /* If we are going to drop the page, lock its neighbors. */ + if (STD_LOCKING(dbc) && NUM_ENT(cp->page) == 1 && + PGNO(cp->page) != BAM_ROOT_PGNO(dbc)) { + if ((npgno = NEXT_PGNO(cp->page)) != PGNO_INVALID) + TRY_LOCK(dbc, npgno, save_npgno, + next_lock, DB_LOCK_WRITE, retry); + if (ret != 0) + goto err; + if ((ppgno = PREV_PGNO(cp->page)) != PGNO_INVALID) + TRY_LOCK(dbc, ppgno, save_ppgno, + prev_lock, DB_LOCK_WRITE, retry); + if (ret != 0) + goto err; + } + /* Delete the item, adjust the counts, adjust the cursors. */ + if ((ret = __bam_ditem(dbc, cp->page, cp->indx)) != 0) + goto err; + if ((ret = __bam_adjust(dbc, -1)) != 0) + goto err; + if ((ret = __ram_ca(dbc, CA_DELETE, &nc)) != 0) + goto err; + if (nc > 0 && CURADJ_LOG(dbc) && + (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, + CA_DELETE, BAM_ROOT_PGNO(dbc), cp->recno, cp->order)) != 0) + goto err; + + /* + * If the page is empty, delete it. + * + * We never delete a root page. First, root pages of primary + * databases never go away, recno or otherwise. However, if + * it's the root page of an off-page duplicates database, then + * it can be deleted. We don't delete it here because we have + * no way of telling the primary database page holder (e.g., + * the hash access method) that its page element should cleaned + * up because the underlying tree is gone. So, we keep the page + * around until the last cursor referencing the empty tree is + * are closed, and then clean it up. + */ + if (NUM_ENT(cp->page) == 0 && + PGNO(cp->page) != BAM_ROOT_PGNO(dbc)) { + /* + * We want to delete a single item out of the last page + * that we're not deleting. + */ + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_OFF(dbc->thread_info); + ret = __bam_dpages(dbc, 0, BTD_RELINK); + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_ON(dbc->thread_info); + + /* + * Regardless of the return from __bam_dpages, it will + * discard our stack and pinned page. + */ + stack = 0; + cp->page = NULL; + LOCK_INIT(cp->lock); + cp->lock_mode = DB_LOCK_NG; + } + } else { + /* Use a delete/put pair to replace the record with a marker. */ + if ((ret = __bam_ditem(dbc, cp->page, cp->indx)) != 0) + goto err; + + B_TSET_DELETED(bk.type, B_KEYDATA); + bk.len = 0; + DB_INIT_DBT(hdr, &bk, SSZA(BKEYDATA, data)); + DB_INIT_DBT(data, "", 0); + if ((ret = __db_pitem(dbc, + cp->page, cp->indx, BKEYDATA_SIZE(0), &hdr, &data)) != 0) + goto err; + } + + t->re_modified = 1; + +err: if (!DB_RETOK_DBCDEL(ret)) + F_SET(dbc, DBC_ERROR); + if (stack && (t_ret = __bam_stkrel(dbc, STK_CLRDBC)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, next_lock)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, prev_lock)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __ramc_get -- + * Recno DBC->get function. + * + * PUBLIC: int __ramc_get + * PUBLIC: __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); + */ +int +__ramc_get(dbc, key, data, flags, pgnop) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; + db_pgno_t *pgnop; +{ + BTREE_CURSOR *cp; + DB *dbp; + int cmp, exact, ret; + + COMPQUIET(pgnop, NULL); + + dbp = dbc->dbp; + cp = (BTREE_CURSOR *)dbc->internal; + + LF_CLR(DB_MULTIPLE|DB_MULTIPLE_KEY); +retry: switch (flags) { + case DB_CURRENT: + /* + * If we're using mutable records and the deleted flag is + * set, the cursor is pointing at a nonexistent record; + * return an error. + */ + if (CD_ISSET(cp)) + return (DB_KEYEMPTY); + break; + case DB_NEXT_DUP: + /* + * If we're not in an off-page dup set, we know there's no + * next duplicate since recnos don't have them. If we + * are in an off-page dup set, the next item assuredly is + * a dup, so we set flags to DB_NEXT and keep going. + */ + if (!F_ISSET(dbc, DBC_OPD)) + return (DB_NOTFOUND); + /* FALLTHROUGH */ + case DB_NEXT_NODUP: + /* + * Recno databases don't have duplicates, set flags to DB_NEXT + * and keep going. + */ + /* FALLTHROUGH */ + case DB_NEXT: + flags = DB_NEXT; + /* + * If record numbers are mutable: if we just deleted a record, + * we have to avoid incrementing the record number so that we + * return the right record by virtue of renumbering the tree. + */ + if (CD_ISSET(cp)) { + /* + * Clear the flag, we've moved off the deleted record. + */ + CD_CLR(cp); + break; + } + + if (cp->recno != RECNO_OOB) { + ++cp->recno; + break; + } + /* FALLTHROUGH */ + case DB_FIRST: + flags = DB_NEXT; + cp->recno = 1; + break; + case DB_PREV_DUP: + /* + * If we're not in an off-page dup set, we know there's no + * previous duplicate since recnos don't have them. If we + * are in an off-page dup set, the previous item assuredly + * is a dup, so we set flags to DB_PREV and keep going. + */ + if (!F_ISSET(dbc, DBC_OPD)) + return (DB_NOTFOUND); + /* FALLTHROUGH */ + case DB_PREV_NODUP: + /* + * Recno databases don't have duplicates, set flags to DB_PREV + * and keep going. + */ + /* FALLTHROUGH */ + case DB_PREV: + flags = DB_PREV; + if (cp->recno != RECNO_OOB) { + if (cp->recno == 1) { + ret = DB_NOTFOUND; + goto err; + } + --cp->recno; + break; + } + /* FALLTHROUGH */ + case DB_LAST: + flags = DB_PREV; + if (((ret = __ram_update(dbc, + DB_MAX_RECORDS, 0)) != 0) && ret != DB_NOTFOUND) + goto err; + if ((ret = __bam_nrecs(dbc, &cp->recno)) != 0) + goto err; + if (cp->recno == 0) { + ret = DB_NOTFOUND; + goto err; + } + break; + case DB_GET_BOTHC: + /* + * If we're doing a join and these are offpage dups, + * we want to keep searching forward from after the + * current cursor position. Increment the recno by 1, + * then proceed as for a DB_SET. + * + * Otherwise, we know there are no additional matching + * data, as recnos don't have dups. return DB_NOTFOUND. + */ + if (F_ISSET(dbc, DBC_OPD)) { + cp->recno++; + break; + } + ret = DB_NOTFOUND; + goto err; + /* NOTREACHED */ + case DB_GET_BOTH: + case DB_GET_BOTH_RANGE: + /* + * If we're searching a set of off-page dups, we start + * a new linear search from the first record. Otherwise, + * we compare the single data item associated with the + * requested record for a match. + */ + if (F_ISSET(dbc, DBC_OPD)) { + cp->recno = 1; + break; + } + /* FALLTHROUGH */ + case DB_SET: + case DB_SET_RANGE: + if ((ret = __ram_getno(dbc, key, &cp->recno, 0)) != 0) + goto err; + break; + default: + ret = __db_unknown_flag(dbp->env, "__ramc_get", flags); + goto err; + } + + /* + * For DB_PREV, DB_LAST, DB_SET and DB_SET_RANGE, we have already + * called __ram_update() to make sure sufficient records have been + * read from the backing source file. Do it now for DB_CURRENT (if + * the current record was deleted we may need more records from the + * backing file for a DB_CURRENT operation), DB_FIRST and DB_NEXT. + * (We don't have to test for flags == DB_FIRST, because the switch + * statement above re-set flags to DB_NEXT in that case.) + */ + if ((flags == DB_NEXT || flags == DB_CURRENT) && ((ret = + __ram_update(dbc, cp->recno, 0)) != 0) && ret != DB_NOTFOUND) + goto err; + + for (;; ++cp->recno) { + /* Search the tree for the record. */ + if ((ret = __bam_rsearch(dbc, &cp->recno, + F_ISSET(dbc, DBC_RMW) ? SR_FIND_WR : SR_FIND, + 1, &exact)) != 0) + goto err; + if (!exact) { + ret = DB_NOTFOUND; + goto err; + } + + /* Copy the page into the cursor. */ + STACK_TO_CURSOR(cp, ret); + if (ret != 0) + goto err; + + /* + * If re-numbering records, the on-page deleted flag means this + * record was implicitly created. If not re-numbering records, + * the on-page deleted flag means this record was implicitly + * created, or, it was deleted at some time. Regardless, we + * skip such records if doing cursor next/prev operations or + * walking through off-page duplicates, and fail if they were + * requested explicitly by the application. + */ + if (B_DISSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type)) + switch (flags) { + case DB_NEXT: + case DB_PREV: + (void)__bam_stkrel(dbc, STK_CLRDBC); + PERFMON4(env, race, ramc_get, + dbp->fname, dbp->dname, cp->page, flags); + goto retry; + case DB_GET_BOTH: + case DB_GET_BOTH_RANGE: + /* + * If we're an OPD tree, we don't care about + * matching a record number on a DB_GET_BOTH + * -- everything belongs to the same tree. A + * normal recno should give up and return + * DB_NOTFOUND if the matching recno is deleted. + */ + if (F_ISSET(dbc, DBC_OPD)) { + (void)__bam_stkrel(dbc, STK_CLRDBC); + continue; + } + ret = DB_NOTFOUND; + goto err; + default: + ret = DB_KEYEMPTY; + goto err; + } + + if (flags == DB_GET_BOTH || + flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) { + if ((ret = __bam_cmp(dbc, data, cp->page, cp->indx, + __bam_defcmp, &cmp)) != 0) + return (ret); + if (cmp == 0) + break; + if (!F_ISSET(dbc, DBC_OPD)) { + ret = DB_NOTFOUND; + goto err; + } + (void)__bam_stkrel(dbc, STK_CLRDBC); + } else + break; + } + + /* Return the key if the user didn't give us one. */ + if (!F_ISSET(dbc, DBC_OPD) && !F_ISSET(key, DB_DBT_ISSET)) { + ret = __db_retcopy(dbp->env, + key, &cp->recno, sizeof(cp->recno), + &dbc->rkey->data, &dbc->rkey->ulen); + F_SET(key, DB_DBT_ISSET); + } + + /* The cursor was reset, no further delete adjustment is necessary. */ +err: CD_CLR(cp); + + return (ret); +} + +/* + * __ramc_put -- + * Recno DBC->put function. + * + * PUBLIC: int __ramc_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); + */ +int +__ramc_put(dbc, key, data, flags, pgnop) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; + db_pgno_t *pgnop; +{ + BTREE_CURSOR *cp; + DB *dbp; + DB_LSN lsn; + ENV *env; + u_int32_t iiflags; + int exact, nc, ret, t_ret; + void *arg; + + COMPQUIET(pgnop, NULL); + + dbp = dbc->dbp; + env = dbp->env; + cp = (BTREE_CURSOR *)dbc->internal; + + /* + * DB_KEYFIRST and DB_KEYLAST mean different things if they're + * used in an off-page duplicate tree. If we're an off-page + * duplicate tree, they really mean "put at the beginning of the + * tree" and "put at the end of the tree" respectively, so translate + * them to something else. + */ + if (F_ISSET(dbc, DBC_OPD)) + switch (flags) { + case DB_KEYFIRST: + cp->recno = 1; + flags = DB_BEFORE; + break; + case DB_KEYLAST: + if ((ret = __ram_add(dbc, + &cp->recno, data, DB_APPEND, 0)) != 0) + return (ret); + if (CURADJ_LOG(dbc) && + (ret = __bam_rcuradj_log(dbp, dbc->txn, + &lsn, 0, CA_ICURRENT, + BAM_ROOT_PGNO(dbc), cp->recno, cp->order)) != 0) + return (ret); + return (0); + default: + break; + } + + /* + * Handle normal DB_KEYFIRST/DB_KEYLAST; for a recno, which has + * no duplicates, these are identical and mean "put the given + * datum at the given recno". + */ + if (flags == DB_KEYFIRST || flags == DB_KEYLAST || + flags == DB_NOOVERWRITE || flags == DB_OVERWRITE_DUP) { + ret = __ram_getno(dbc, key, &cp->recno, 1); + if (ret == 0 || ret == DB_NOTFOUND) + ret = __ram_add(dbc, &cp->recno, data, flags, 0); + return (ret); + } + + /* + * If we're putting with a cursor that's marked C_DELETED, we need to + * take special care; the cursor doesn't "really" reference the item + * corresponding to its current recno, but instead is "between" that + * record and the current one. Translate the actual insert into + * DB_BEFORE, and let the __ram_ca work out the gory details of what + * should wind up pointing where. + */ + if (CD_ISSET(cp)) + iiflags = DB_BEFORE; + else + iiflags = flags; + +split: if ((ret = __bam_rsearch(dbc, &cp->recno, SR_INSERT, 1, &exact)) != 0) + goto err; + /* + * An inexact match is okay; it just means we're one record past the + * end, which is reasonable if we're marked deleted. + */ + DB_ASSERT(env, exact || CD_ISSET(cp)); + + /* Copy the page into the cursor. */ + STACK_TO_CURSOR(cp, ret); + if (ret != 0) + goto err; + + ret = __bam_iitem(dbc, key, data, iiflags, 0); + t_ret = __bam_stkrel(dbc, STK_CLRDBC); + + if (t_ret != 0 && (ret == 0 || ret == DB_NEEDSPLIT)) + ret = t_ret; + else if (ret == DB_NEEDSPLIT) { + arg = &cp->recno; + if ((ret = __bam_split(dbc, arg, NULL)) != 0) + goto err; + goto split; + } + if (ret != 0) + goto err; + + switch (flags) { /* Adjust the cursors. */ + case DB_AFTER: + if ((ret = __ram_ca(dbc, CA_IAFTER, &nc)) != 0) + goto err; + + /* + * We only need to adjust this cursor forward if we truly added + * the item after the current recno, rather than remapping it + * to DB_BEFORE. + */ + if (iiflags == DB_AFTER) + ++cp->recno; + + /* Only log if __ram_ca found any relevant cursors. */ + if (nc > 0 && CURADJ_LOG(dbc) && + (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, CA_IAFTER, + BAM_ROOT_PGNO(dbc), cp->recno, cp->order)) != 0) + goto err; + break; + case DB_BEFORE: + if ((ret = __ram_ca(dbc, CA_IBEFORE, &nc)) != 0) + goto err; + --cp->recno; + + /* Only log if __ram_ca found any relevant cursors. */ + if (nc > 0 && CURADJ_LOG(dbc) && + (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, CA_IBEFORE, + BAM_ROOT_PGNO(dbc), cp->recno, cp->order)) != 0) + goto err; + break; + case DB_CURRENT: + /* + * We only need to do an adjustment if we actually + * added an item, which we only would have done if the + * cursor was marked deleted. + */ + if (!CD_ISSET(cp)) + break; + + /* Only log if __ram_ca found any relevant cursors. */ + if ((ret = __ram_ca(dbc, CA_ICURRENT, &nc)) != 0) + goto err; + if (nc > 0 && CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(dbp, + dbc->txn, &lsn, 0, CA_ICURRENT, + BAM_ROOT_PGNO(dbc), cp->recno, cp->order)) != 0) + goto err; + break; + default: + break; + } + + /* Return the key if we've created a new record. */ + if (!F_ISSET(dbc, DBC_OPD) && + (flags == DB_AFTER || flags == DB_BEFORE) && key != NULL) + ret = __db_retcopy(env, key, &cp->recno, + sizeof(cp->recno), &dbc->rkey->data, &dbc->rkey->ulen); + + /* The cursor was reset, no further delete adjustment is necessary. */ +err: CD_CLR(cp); + + if (!DB_RETOK_DBCDEL(ret)) + F_SET(dbc, DBC_ERROR); + return (ret); +} + +static int +__ram_ca_getorder(dbc, my_dbc, orderp, root_pgno, recno, args) + DBC *dbc, *my_dbc; + u_int32_t *orderp; + db_pgno_t root_pgno; + u_int32_t recno; + void *args; +{ + BTREE_CURSOR *cp; + + COMPQUIET(my_dbc, NULL); + COMPQUIET(args, NULL); + + cp = (BTREE_CURSOR *)dbc->internal; + if (root_pgno == BAM_ROOT_PGNO(dbc) && + recno == cp->recno && CD_ISSET(cp) && + *orderp <= cp->order && + !MVCC_SKIP_CURADJ(dbc, BAM_ROOT_PGNO(dbc))) + *orderp = cp->order; + return (0); +} + +static int +__ram_ca_setorder(dbc, my_dbc, foundp, pgno, order, args) + DBC *dbc, *my_dbc; + u_int32_t *foundp; + db_pgno_t pgno; + u_int32_t order; + void *args; +{ + BTREE_CURSOR *cp, *cp_arg; + int adjusted; + ca_recno_arg op; + db_recno_t recno; + + COMPQUIET(pgno, 0); + + cp = (BTREE_CURSOR *)dbc->internal; + cp_arg = (BTREE_CURSOR *)my_dbc->internal; + op = *(ca_recno_arg *)args; + + if (cp_arg->root != cp->root || + MVCC_SKIP_CURADJ(dbc, BAM_ROOT_PGNO(dbc))) + return (0); + ++(*foundp); + adjusted = 0; + recno = cp_arg->recno; + switch (op) { + case CA_DELETE: + if (recno < cp->recno) { + --cp->recno; + /* + * If the adjustment made them equal, + * we have to merge the orders. + */ + if (recno == cp->recno && CD_ISSET(cp)) + cp->order += order; + } else if (recno == cp->recno && + !CD_ISSET(cp)) { + CD_SET(cp); + cp->order = order; + /* + * If we're deleting the item, we can't + * keep a streaming offset cached. + */ + cp->stream_start_pgno = PGNO_INVALID; + } + break; + case CA_IBEFORE: + /* + * IBEFORE is just like IAFTER, except that we + * adjust cursors on the current record too. + */ + if (C_EQUAL(cp_arg, cp)) { + ++cp->recno; + adjusted = 1; + } + goto iafter; + case CA_ICURRENT: + + /* + * If the original cursor wasn't deleted, we + * just did a replacement and so there's no + * need to adjust anything--we shouldn't have + * gotten this far. Otherwise, we behave + * much like an IAFTER, except that all + * cursors pointing to the current item get + * marked undeleted and point to the new + * item. + */ + DB_ASSERT(dbc->dbp->env, CD_ISSET(cp_arg)); + if (C_EQUAL(cp_arg, cp)) { + CD_CLR(cp); + break; + } + /* FALLTHROUGH */ + case CA_IAFTER: +iafter: if (!adjusted && C_LESSTHAN(cp_arg, cp)) { + ++cp->recno; + adjusted = 1; + } + if (recno == cp->recno && adjusted) + /* + * If we've moved this cursor's recno, + * split its order number--i.e., + * decrement it by enough so that + * the lowest cursor moved has order 1. + * cp_arg->order is the split point, + * so decrement by one less than that. + */ + cp->order -= (cp_arg->order - 1); + break; + } + return (0); +} + +/* + * __ram_ca -- + * Adjust cursors. Returns the number of relevant cursors. + * + * PUBLIC: int __ram_ca __P((DBC *, ca_recno_arg, int *)); + */ +int +__ram_ca(dbc_arg, op, foundp) + DBC *dbc_arg; + ca_recno_arg op; + int *foundp; +{ + BTREE_CURSOR *cp_arg; + DB *dbp; + ENV *env; + db_recno_t recno; + u_int32_t found, order; + int ret; + + dbp = dbc_arg->dbp; + env = dbp->env; + cp_arg = (BTREE_CURSOR *)dbc_arg->internal; + recno = cp_arg->recno; + + /* + * It only makes sense to adjust cursors if we're a renumbering + * recno; we should only be called if this is one. + */ + DB_ASSERT(env, F_ISSET(cp_arg, C_RENUMBER)); + + /* + * Adjust the cursors. See the comment in __bam_ca_delete(). + * + * If we're doing a delete, we need to find the highest + * order of any cursor currently pointing at this item, + * so we can assign a higher order to the newly deleted + * cursor. Unfortunately, this requires a second pass through + * the cursor list. + */ + if (op == CA_DELETE) { + if ((ret = __db_walk_cursors(dbp, NULL, __ram_ca_getorder, + &order, BAM_ROOT_PGNO(dbc_arg), recno, NULL)) != 0) + return (ret); + order++; + } else + order = INVALID_ORDER; + + if ((ret = __db_walk_cursors(dbp, dbc_arg, + __ram_ca_setorder, &found, 0, order, &op)) != 0) + return (ret); + if (foundp != NULL) + *foundp = (int)found; + return (0); +} + +/* + * __ram_getno -- + * Check the user's record number, and make sure we've seen it. + * + * PUBLIC: int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int)); + */ +int +__ram_getno(dbc, key, rep, can_create) + DBC *dbc; + const DBT *key; + db_recno_t *rep; + int can_create; +{ + DB *dbp; + db_recno_t recno; + + dbp = dbc->dbp; + + /* If passed an empty DBT from Java, key->data may be NULL */ + if (key->size != sizeof(db_recno_t)) { + __db_errx(dbp->env, DB_STR("1001", + "illegal record number size")); + return (EINVAL); + } + + /* Check the user's record number. */ + if ((recno = *(db_recno_t *)key->data) == 0) { + __db_errx(dbp->env, DB_STR("1002", + "illegal record number of 0")); + return (EINVAL); + } + if (rep != NULL) + *rep = recno; + + /* + * Btree can neither create records nor read them in. Recno can + * do both, see if we can find the record. + */ + return (dbc->dbtype == DB_RECNO ? + __ram_update(dbc, recno, can_create) : 0); +} + +/* + * __ram_update -- + * Ensure the tree has records up to and including the specified one. + */ +static int +__ram_update(dbc, recno, can_create) + DBC *dbc; + db_recno_t recno; + int can_create; +{ + BTREE *t; + DB *dbp; + DBT *rdata; + db_recno_t nrecs; + int ret; + + dbp = dbc->dbp; + t = dbp->bt_internal; + + /* + * If we can't create records and we've read the entire backing input + * file, we're done. + */ + if (!can_create && t->re_eof) + return (0); + + /* + * If we haven't seen this record yet, try to get it from the original + * file. + */ + if ((ret = __bam_nrecs(dbc, &nrecs)) != 0) + return (ret); + if (!t->re_eof && recno > nrecs) { + if ((ret = __ram_sread(dbc, recno)) != 0 && ret != DB_NOTFOUND) + return (ret); + if ((ret = __bam_nrecs(dbc, &nrecs)) != 0) + return (ret); + } + + /* + * If we can create records, create empty ones up to the requested + * record. + */ + if (!can_create || recno <= nrecs + 1) + return (0); + + rdata = &dbc->my_rdata; + rdata->flags = 0; + rdata->size = 0; + + while (recno > ++nrecs) + if ((ret = __ram_add(dbc, + &nrecs, rdata, 0, BI_DELETED)) != 0) + return (ret); + return (0); +} + +/* + * __ram_source -- + * Load information about the backing file. + */ +static int +__ram_source(dbp) + DB *dbp; +{ + BTREE *t; + ENV *env; + char *source; + int ret; + + env = dbp->env; + t = dbp->bt_internal; + + /* Find the real name, and swap out the one we had before. */ + if ((ret = __db_appname(env, + DB_APP_DATA, t->re_source, NULL, &source)) != 0) + return (ret); + __os_free(env, t->re_source); + t->re_source = source; + + /* + * !!! + * It's possible that the backing source file is read-only. We don't + * much care other than we'll complain if there are any modifications + * when it comes time to write the database back to the source. + */ + if ((t->re_fp = fopen(t->re_source, "rb")) == NULL) { + ret = __os_get_errno(); + __db_err(env, ret, "%s", t->re_source); + return (ret); + } + + t->re_eof = 0; + return (0); +} + +/* + * __ram_writeback -- + * Rewrite the backing file. + * + * PUBLIC: int __ram_writeback __P((DB *)); + */ +int +__ram_writeback(dbp) + DB *dbp; +{ + BTREE *t; + DBC *dbc; + DBT key, data; + DB_THREAD_INFO *ip; + ENV *env; + FILE *fp; + db_recno_t keyno; + int ret, t_ret; + u_int8_t delim, *pad; + + t = dbp->bt_internal; + env = dbp->env; + fp = NULL; + pad = NULL; + + /* If the file wasn't modified, we're done. */ + if (!t->re_modified) + return (0); + + /* If there's no backing source file, we're done. */ + if (t->re_source == NULL) { + t->re_modified = 0; + return (0); + } + + /* + * We step through the records, writing each one out. Use the record + * number and the dbp->get() function, instead of a cursor, so we find + * and write out "deleted" or non-existent records. The DB handle may + * be threaded, so allocate memory as we go. + */ + memset(&key, 0, sizeof(key)); + key.size = sizeof(db_recno_t); + key.data = &keyno; + memset(&data, 0, sizeof(data)); + F_SET(&data, DB_DBT_REALLOC); + + /* Allocate a cursor. */ + ENV_GET_THREAD_INFO(env, ip); + if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0) + return (ret); + + /* + * Read any remaining records into the tree. + * + * !!! + * This is why we can't support transactions when applications specify + * backing (re_source) files. At this point we have to read in the + * rest of the records from the file so that we can write all of the + * records back out again, which could modify a page for which we'd + * have to log changes and which we don't have locked. This could be + * partially fixed by taking a snapshot of the entire file during the + * DB->open as DB->open is transaction protected. But, if a checkpoint + * occurs then, the part of the log holding the copy of the file could + * be discarded, and that would make it impossible to recover in the + * face of disaster. This could all probably be fixed, but it would + * require transaction protecting the backing source file. + * + * XXX + * This could be made to work now that we have transactions protecting + * file operations. Margo has specifically asked for the privilege of + * doing this work. + */ + if ((ret = + __ram_update(dbc, DB_MAX_RECORDS, 0)) != 0 && ret != DB_NOTFOUND) + goto err; + + /* + * Close any existing file handle and re-open the file, truncating it. + */ + if (t->re_fp != NULL) { + if (fclose(t->re_fp) != 0) { + ret = __os_get_errno(); + __db_err(env, ret, "%s", t->re_source); + goto err; + } + t->re_fp = NULL; + } + if ((fp = fopen(t->re_source, "wb")) == NULL) { + ret = __os_get_errno(); + __db_err(env, ret, "%s", t->re_source); + goto err; + } + + /* + * We'll need the delimiter if we're doing variable-length records, + * and the pad character if we're doing fixed-length records. + */ + delim = t->re_delim; + for (keyno = 1;; ++keyno) { + switch (ret = __db_get(dbp, ip, NULL, &key, &data, 0)) { + case 0: + if (data.size != 0 && + fwrite(data.data, 1, data.size, fp) != data.size) + goto write_err; + break; + case DB_KEYEMPTY: + if (F_ISSET(dbp, DB_AM_FIXEDLEN)) { + if (pad == NULL) { + if ((ret = __os_malloc( + env, t->re_len, &pad)) != 0) + goto err; + memset(pad, t->re_pad, t->re_len); + } + if (fwrite(pad, 1, t->re_len, fp) != t->re_len) + goto write_err; + } + break; + case DB_NOTFOUND: + ret = 0; + goto done; + default: + goto err; + } + if (!F_ISSET(dbp, DB_AM_FIXEDLEN) && + fwrite(&delim, 1, 1, fp) != 1) { +write_err: ret = __os_get_errno(); + __db_err(env, ret, DB_STR_A("1003", + "%s: write failed to backing file", "%s"), + t->re_source); + goto err; + } + } + +err: +done: /* Close the file descriptor. */ + if (fp != NULL && fclose(fp) != 0) { + t_ret = __os_get_errno(); + __db_err(env, t_ret, "%s", t->re_source); + if (ret == 0) + ret = t_ret; + } + + /* Discard the cursor. */ + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + /* Discard memory allocated to hold the data items. */ + if (data.data != NULL) + __os_ufree(env, data.data); + if (pad != NULL) + __os_free(env, pad); + + if (ret == 0) + t->re_modified = 0; + + return (ret); +} + +/* + * __ram_sread -- + * Read records from a source file. + */ +static int +__ram_sread(dbc, top) + DBC *dbc; + db_recno_t top; +{ + BTREE *t; + DB *dbp; + DBT data, *rdata; + db_recno_t recno; + size_t len; + int ch, ret, was_modified; + + t = dbc->dbp->bt_internal; + dbp = dbc->dbp; + was_modified = t->re_modified; + + if ((ret = __bam_nrecs(dbc, &recno)) != 0) + return (ret); + + /* + * Use the record key return memory, it's only a short-term use. + * The record data return memory is used by __bam_iitem, which + * we'll indirectly call, so use the key so as not to collide. + */ + len = F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_len : 256; + rdata = &dbc->my_rkey; + if (rdata->ulen < len) { + if ((ret = __os_realloc( + dbp->env, len, &rdata->data)) != 0) { + rdata->ulen = 0; + rdata->data = NULL; + return (ret); + } + rdata->ulen = (u_int32_t)len; + } + + memset(&data, 0, sizeof(data)); + while (recno < top) { + data.data = rdata->data; + data.size = 0; + if (F_ISSET(dbp, DB_AM_FIXEDLEN)) + for (len = t->re_len; len > 0; --len) { + if ((ch = fgetc(t->re_fp)) == EOF) { + if (data.size == 0) + goto eof; + break; + } + ((u_int8_t *)data.data)[data.size++] = ch; + } + else + for (;;) { + if ((ch = fgetc(t->re_fp)) == EOF) { + if (data.size == 0) + goto eof; + break; + } + if (ch == t->re_delim) + break; + + ((u_int8_t *)data.data)[data.size++] = ch; + if (data.size == rdata->ulen) { + if ((ret = __os_realloc(dbp->env, + rdata->ulen *= 2, + &rdata->data)) != 0) { + rdata->ulen = 0; + rdata->data = NULL; + return (ret); + } else + data.data = rdata->data; + } + } + + /* + * Another process may have read this record from the input + * file and stored it into the database already, in which + * case we don't need to repeat that operation. We detect + * this by checking if the last record we've read is greater + * or equal to the number of records in the database. + */ + if (t->re_last >= recno) { + ++recno; + if ((ret = __ram_add(dbc, &recno, &data, 0, 0)) != 0) + goto err; + } + ++t->re_last; + } + + if (0) { +eof: t->re_eof = 1; + ret = DB_NOTFOUND; + } +err: if (!was_modified) + t->re_modified = 0; + + return (ret); +} + +/* + * __ram_add -- + * Add records into the tree. + */ +static int +__ram_add(dbc, recnop, data, flags, bi_flags) + DBC *dbc; + db_recno_t *recnop; + DBT *data; + u_int32_t flags, bi_flags; +{ + BTREE_CURSOR *cp; + int exact, ret, stack, t_ret; + + cp = (BTREE_CURSOR *)dbc->internal; + +retry: /* Find the slot for insertion. */ + if ((ret = __bam_rsearch(dbc, recnop, + SR_INSERT | (flags == DB_APPEND ? SR_APPEND : 0), 1, &exact)) != 0) + return (ret); + stack = 1; + + /* Copy the page into the cursor. */ + STACK_TO_CURSOR(cp, ret); + if (ret != 0) + goto err; + + if (exact && flags == DB_NOOVERWRITE && !CD_ISSET(cp) && + !B_DISSET(GET_BKEYDATA(dbc->dbp, cp->page, cp->indx)->type)) { + ret = DB_KEYEXIST; + goto err; + } + + /* + * The application may modify the data based on the selected record + * number. + */ + if (flags == DB_APPEND && dbc->dbp->db_append_recno != NULL && + (ret = dbc->dbp->db_append_recno(dbc->dbp, data, *recnop)) != 0) + goto err; + + /* + * Select the arguments for __bam_iitem() and do the insert. If the + * key is an exact match, or we're replacing the data item with a + * new data item, replace the current item. If the key isn't an exact + * match, we're inserting a new key/data pair, before the search + * location. + */ + switch (ret = __bam_iitem(dbc, + NULL, data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) { + case 0: + /* + * Don't adjust anything. + * + * If we inserted a record, no cursors need adjusting because + * the only new record it's possible to insert is at the very + * end of the tree. The necessary adjustments to the internal + * page counts were made by __bam_iitem(). + * + * If we overwrote a record, no cursors need adjusting because + * future DBcursor->get calls will simply return the underlying + * record (there's no adjustment made for the DB_CURRENT flag + * when a cursor get operation immediately follows a cursor + * delete operation, and the normal adjustment for the DB_NEXT + * flag is still correct). + */ + break; + case DB_NEEDSPLIT: + /* Discard the stack of pages and split the page. */ + (void)__bam_stkrel(dbc, STK_CLRDBC); + stack = 0; + + if ((ret = __bam_split(dbc, recnop, NULL)) != 0) + goto err; + + goto retry; + /* NOTREACHED */ + default: + goto err; + } + +err: if (stack && (t_ret = __bam_stkrel(dbc, STK_CLRDBC)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} diff --git a/src/btree/bt_rsearch.c b/src/btree/bt_rsearch.c new file mode 100644 index 00000000..af6f2b2a --- /dev/null +++ b/src/btree/bt_rsearch.c @@ -0,0 +1,513 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +/* + * __bam_rsearch -- + * Search a btree for a record number. + * + * PUBLIC: int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *)); + */ +int +__bam_rsearch(dbc, recnop, flags, stop, exactp) + DBC *dbc; + db_recno_t *recnop; + u_int32_t flags; + int stop, *exactp; +{ + BINTERNAL *bi; + BTREE_CURSOR *cp; + DB *dbp; + DB_LOCK lock; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *h; + RINTERNAL *ri; + db_indx_t adjust, deloffset, indx, top; + db_lockmode_t lock_mode; + db_pgno_t pg; + db_recno_t recno, t_recno, total; + u_int32_t get_mode; + int ret, stack, t_ret; + + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_OFF(dbc->thread_info); + + dbp = dbc->dbp; + env = dbp->env; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + h = NULL; + ret = 0; + + BT_STK_CLR(cp); + + /* + * There are several ways we search a btree tree. The flags argument + * specifies if we're acquiring read or write locks and if we are + * locking pairs of pages. In addition, if we're adding or deleting + * an item, we have to lock the entire tree, regardless. See btree.h + * for more details. + * + * If write-locking pages, we need to know whether or not to acquire a + * write lock on a page before getting it. This depends on how deep it + * is in tree, which we don't know until we acquire the root page. So, + * if we need to lock the root page we may have to upgrade it later, + * because we won't get the correct lock initially. + * + * Retrieve the root page. + */ + + if ((ret = __bam_get_root(dbc, PGNO_INVALID, stop, flags, &stack)) != 0) + goto done; + lock_mode = cp->csp->lock_mode; + get_mode = lock_mode == DB_LOCK_WRITE ? DB_MPOOL_DIRTY : 0; + lock = cp->csp->lock; + h = cp->csp->page; + + BT_STK_CLR(cp); + /* + * If appending to the tree, set the record number now -- we have the + * root page locked. + * + * Delete only deletes exact matches, read only returns exact matches. + * Note, this is different from __bam_search(), which returns non-exact + * matches for read. + * + * The record may not exist. We can only return the correct location + * for the record immediately after the last record in the tree, so do + * a fast check now. + */ + total = RE_NREC(h); + if (LF_ISSET(SR_APPEND)) { + *exactp = 0; + *recnop = recno = total + 1; + } else { + recno = *recnop; + if (recno <= total) + *exactp = 1; + else { + *exactp = 0; + if (!LF_ISSET(SR_PAST_EOF) || recno > total + 1) { + /* + * Keep the page locked for serializability. + * + * XXX + * This leaves the root page locked, which will + * eliminate any concurrency. A possible fix + * would be to lock the last leaf page instead. + */ + ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority); + if ((t_ret = + __TLPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + if (ret == 0) + ret = DB_NOTFOUND; + goto done; + } + } + } + + /* + * !!! + * Record numbers in the tree are 0-based, but the recno is + * 1-based. All of the calculations below have to take this + * into account. + */ + for (total = 0;;) { + switch (TYPE(h)) { + case P_LBTREE: + if (LF_ISSET(SR_MAX)) { + indx = NUM_ENT(h) - 2; + goto enter; + } + /* FALLTHROUGH */ + case P_LDUP: + if (LF_ISSET(SR_MAX)) { + indx = NUM_ENT(h) - 1; + goto enter; + } + recno -= total; + /* + * There may be logically deleted records on the page. + * If there are enough, the record may not exist. + */ + if (TYPE(h) == P_LBTREE) { + adjust = P_INDX; + deloffset = O_INDX; + } else { + adjust = O_INDX; + deloffset = 0; + } + for (t_recno = 0, indx = 0;; indx += adjust) { + if (indx >= NUM_ENT(h)) { + *exactp = 0; + if (!LF_ISSET(SR_PAST_EOF) || + recno > t_recno + 1) { + ret = __memp_fput(mpf, + dbc->thread_info, + h, dbc->priority); + h = NULL; + if ((t_ret = __TLPUT(dbc, + lock)) != 0 && ret == 0) + ret = t_ret; + if (ret == 0) + ret = DB_NOTFOUND; + goto err; + } + } + if (!B_DISSET(GET_BKEYDATA(dbp, h, + indx + deloffset)->type) && + ++t_recno == recno) + break; + } + + BT_STK_ENTER(env, cp, h, indx, lock, lock_mode, ret); + if (ret != 0) + goto err; + if (LF_ISSET(SR_BOTH)) + goto get_prev; + goto done; + case P_IBTREE: + if (LF_ISSET(SR_MAX)) { + indx = NUM_ENT(h); + bi = GET_BINTERNAL(dbp, h, indx - 1); + } else for (indx = 0, top = NUM_ENT(h);;) { + bi = GET_BINTERNAL(dbp, h, indx); + if (++indx == top || total + bi->nrecs >= recno) + break; + total += bi->nrecs; + } + pg = bi->pgno; + break; + case P_LRECNO: + if (LF_ISSET(SR_MAX)) + recno = NUM_ENT(h); + else + recno -= total; + + /* Correct from 1-based to 0-based for a page offset. */ + --recno; +enter: BT_STK_ENTER(env, cp, h, recno, lock, lock_mode, ret); + if (ret != 0) + goto err; + if (LF_ISSET(SR_BOTH)) { +get_prev: DB_ASSERT(env, LF_ISSET(SR_NEXT)); + /* + * We have a NEXT tree, now add the sub tree + * that points gets to the previous page. + */ + cp->csp++; + indx = cp->sp->indx - 1; + h = cp->sp->page; + if (TYPE(h) == P_IRECNO) { + ri = GET_RINTERNAL(dbp, h, indx); + pg = ri->pgno; + } else { + DB_ASSERT(env, TYPE(h) == P_IBTREE); + bi = GET_BINTERNAL(dbp, h, indx); + pg = bi->pgno; + } + LF_CLR(SR_NEXT | SR_BOTH); + LF_SET(SR_MAX); + stack = 1; + h = NULL; + goto lock_next; + } + goto done; + case P_IRECNO: + if (LF_ISSET(SR_MAX)) { + indx = NUM_ENT(h); + ri = GET_RINTERNAL(dbp, h, indx - 1); + } else for (indx = 0, top = NUM_ENT(h);;) { + ri = GET_RINTERNAL(dbp, h, indx); + if (++indx == top || total + ri->nrecs >= recno) + break; + total += ri->nrecs; + } + pg = ri->pgno; + break; + default: + ret = __db_pgfmt(env, h->pgno); + goto done; + } + --indx; + + /* Return if this is the lowest page wanted. */ + if (stop == LEVEL(h)) { + BT_STK_ENTER(env, cp, h, indx, lock, lock_mode, ret); + if (ret != 0) + goto err; + goto done; + } + if (stack) { + BT_STK_PUSH(env, cp, h, indx, lock, lock_mode, ret); + if (ret != 0) + goto err; + h = NULL; + + lock_mode = DB_LOCK_WRITE; + get_mode = DB_MPOOL_DIRTY; + if ((ret = + __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0) + goto err; + } else if (LF_ISSET(SR_NEXT)) { + /* + * For RECNO if we are doing a NEXT search the + * search recno is the one we are looking for + * but we want to keep the stack from the spanning + * node on down. We only know we have the spanning + * node when its child's index is 0, so save + * each node and discard the tree when we find out + * its not needed. + */ + if (indx != 0 && cp->sp->page != NULL) { + BT_STK_POP(cp); + if ((ret = __bam_stkrel(dbc, STK_NOLOCK)) != 0) + goto err; + } + + BT_STK_PUSH(env, cp, h, indx, lock, lock_mode, ret); + h = NULL; + if (ret != 0) + goto err; +lock_next: if ((ret = + __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0) + goto err; + } else { + /* + * Decide if we want to return a pointer to the next + * page in the stack. If we do, write lock it and + * never unlock it. + */ + if ((LF_ISSET(SR_PARENT) && + (u_int8_t)(stop + 1) >= (u_int8_t)(LEVEL(h) - 1)) || + (LEVEL(h) - 1) == LEAFLEVEL) + stack = 1; + + if ((ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0) + goto err; + h = NULL; + + lock_mode = stack && + LF_ISSET(SR_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ; + if (lock_mode == DB_LOCK_WRITE) + get_mode = DB_MPOOL_DIRTY; + if ((ret = __db_lget(dbc, + LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) { + /* + * If we fail, discard the lock we held. This + * is OK because this only happens when we are + * descending the tree holding read-locks. + */ + (void)__LPUT(dbc, lock); + goto err; + } + } + + if ((ret = __memp_fget(mpf, &pg, + dbc->thread_info, dbc->txn, get_mode, &h)) != 0) + goto err; + } + /* NOTREACHED */ + +err: if (h != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + BT_STK_POP(cp); + (void)__bam_stkrel(dbc, 0); + +done: + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_ON(dbc->thread_info); + return (ret); +} + +/* + * __bam_adjust -- + * Adjust the tree after adding or deleting a record. + * + * PUBLIC: int __bam_adjust __P((DBC *, int32_t)); + */ +int +__bam_adjust(dbc, adjust) + DBC *dbc; + int32_t adjust; +{ + BTREE_CURSOR *cp; + DB *dbp; + DB_MPOOLFILE *mpf; + EPG *epg; + PAGE *h; + db_pgno_t root_pgno; + int ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + root_pgno = BAM_ROOT_PGNO(dbc); + + /* Update the record counts for the tree. */ + for (epg = cp->sp; epg <= cp->csp; ++epg) { + h = epg->page; + if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) { + ret = __memp_dirty(mpf, &h, + dbc->thread_info, dbc->txn, dbc->priority, 0); + epg->page = h; + if (ret != 0) + return (ret); + if (DBC_LOGGING(dbc)) { + if ((ret = __bam_cadjust_log(dbp, dbc->txn, + &LSN(h), 0, PGNO(h), &LSN(h), + (u_int32_t)epg->indx, adjust, + PGNO(h) == root_pgno ? + CAD_UPDATEROOT : 0)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(h)); + + if (TYPE(h) == P_IBTREE) + GET_BINTERNAL(dbp, h, epg->indx)->nrecs += + adjust; + else + GET_RINTERNAL(dbp, h, epg->indx)->nrecs += + adjust; + + if (PGNO(h) == root_pgno) + RE_NREC_ADJ(h, adjust); + } + } + return (0); +} + +/* + * __bam_nrecs -- + * Return the number of records in the tree. + * + * PUBLIC: int __bam_nrecs __P((DBC *, db_recno_t *)); + */ +int +__bam_nrecs(dbc, rep) + DBC *dbc; + db_recno_t *rep; +{ + DB *dbp; + DB_LOCK lock; + DB_MPOOLFILE *mpf; + PAGE *h; + db_pgno_t pgno; + int ret, t_ret; + + COMPQUIET(h, NULL); + dbp = dbc->dbp; + mpf = dbp->mpf; + LOCK_INIT(lock); + + pgno = PGNO_INVALID; + BAM_GET_ROOT(dbc, pgno, h, 0, DB_LOCK_READ, lock, ret); + if (ret != 0) + goto err; + DB_ASSERT(dbp->env, h != NULL); + + *rep = RE_NREC(h); + + ret = __memp_fput(mpf, dbc->thread_info, h, dbc->priority); +err: if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __bam_total -- + * Return the number of records below a page. + * + * PUBLIC: db_recno_t __bam_total __P((DB *, PAGE *)); + */ +db_recno_t +__bam_total(dbp, h) + DB *dbp; + PAGE *h; +{ + db_recno_t nrecs; + db_indx_t indx, top; + + nrecs = 0; + top = NUM_ENT(h); + + switch (TYPE(h)) { + case P_LBTREE: + /* Check for logically deleted records. */ + for (indx = 0; indx < top; indx += P_INDX) + if (!B_DISSET( + GET_BKEYDATA(dbp, h, indx + O_INDX)->type)) + ++nrecs; + break; + case P_LDUP: + /* Check for logically deleted records. */ + for (indx = 0; indx < top; indx += O_INDX) + if (!B_DISSET(GET_BKEYDATA(dbp, h, indx)->type)) + ++nrecs; + break; + case P_IBTREE: + for (indx = 0; indx < top; indx += O_INDX) + nrecs += GET_BINTERNAL(dbp, h, indx)->nrecs; + break; + case P_LRECNO: + nrecs = NUM_ENT(h); + break; + case P_IRECNO: + for (indx = 0; indx < top; indx += O_INDX) + nrecs += GET_RINTERNAL(dbp, h, indx)->nrecs; + break; + } + + return (nrecs); +} diff --git a/src/btree/bt_search.c b/src/btree/bt_search.c new file mode 100644 index 00000000..e1054698 --- /dev/null +++ b/src/btree/bt_search.c @@ -0,0 +1,1028 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +/* + * __bam_get_root -- + * Fetch the root of a tree and see if we want to keep + * it in the stack. + * + * PUBLIC: int __bam_get_root __P((DBC *, db_pgno_t, int, u_int32_t, int *)); + */ +int +__bam_get_root(dbc, root_pgno, slevel, flags, stack) + DBC *dbc; + db_pgno_t root_pgno; + int slevel; + u_int32_t flags; + int *stack; +{ + BTREE_CURSOR *cp; + DB *dbp; + DB_LOCK lock; + DB_MPOOLFILE *mpf; + PAGE *h; + db_lockmode_t lock_mode; + u_int32_t get_mode; + int ret, t_ret; + + COMPQUIET(h, NULL); + LOCK_INIT(lock); + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + /* + * If write-locking pages, we need to know whether or not to acquire a + * write lock on a page before getting it. This depends on how deep it + * is in tree, which we don't know until we acquire the root page. So, + * if we need to lock the root page we may have to upgrade it later, + * because we won't get the correct lock initially. + * + * Retrieve the root page. + */ +try_again: + *stack = LF_ISSET(SR_STACK) && + (dbc->dbtype == DB_RECNO || F_ISSET(cp, C_RECNUM)); + lock_mode = DB_LOCK_READ; + if (*stack || + LF_ISSET(SR_DEL) || (LF_ISSET(SR_NEXT) && LF_ISSET(SR_WRITE))) + lock_mode = DB_LOCK_WRITE; + + /* + * Get the root. If the root happens to be a leaf page then + * we are supposed to get a read lock on it before latching + * it. So if we have not locked it do a try get first. + * If we can't get the root shared, then get a lock on it and + * then wait for the latch. + */ +retry: if (lock_mode == DB_LOCK_WRITE) + get_mode = DB_MPOOL_DIRTY; + else if (LOCK_ISSET(lock) || !STD_LOCKING(dbc) || + F_ISSET(dbc, DBC_DOWNREV) || + dbc->dbtype == DB_RECNO || F_ISSET(cp, C_RECNUM)) + get_mode = 0; + else + get_mode = DB_MPOOL_TRY; + + BAM_GET_ROOT(dbc, root_pgno, h, get_mode, lock_mode, lock, ret); + if (ret == DB_LOCK_NOTGRANTED && get_mode == DB_MPOOL_TRY) { + DB_ASSERT(dbp->env, !LOCK_ISSET(lock)); + if ((ret = __db_lget(dbc, 0, + root_pgno == PGNO_INVALID ? BAM_ROOT_PGNO(dbc) : root_pgno, + lock_mode, 0, &lock)) != 0) + return (ret); + goto retry; + } + if (ret != 0) { + /* Did not read it, so we can release the lock */ + (void)__LPUT(dbc, lock); + return (ret); + } + DB_ASSERT(dbp->env, TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO || + TYPE(h) == P_LBTREE || TYPE(h) == P_LRECNO || TYPE(h) == P_LDUP); + + /* + * Decide if we need to dirty and/or lock this page. + * We must not hold the latch while we get the lock. + */ + if (!*stack && + ((LF_ISSET(SR_PARENT) && (u_int8_t)(slevel + 1) >= LEVEL(h)) || + LEVEL(h) == LEAFLEVEL || + (LF_ISSET(SR_START) && slevel == LEVEL(h)))) { + *stack = 1; + /* If we already have the write lock, we are done. */ + if (dbc->dbtype == DB_RECNO || F_ISSET(cp, C_RECNUM)) { + if (lock_mode == DB_LOCK_WRITE) + goto done; + if ((ret = __LPUT(dbc, lock)) != 0) + return (ret); + } + + /* + * Now that we know what level the root is at, do we need a + * write lock? If not or we got the lock before latching + * we are done. + */ + if (LEVEL(h) != LEAFLEVEL || LF_ISSET(SR_WRITE)) { + lock_mode = DB_LOCK_WRITE; + /* Drop the read lock if we got it above. */ + if ((ret = __LPUT(dbc, lock)) != 0) + return (ret); + } else if (LOCK_ISSET(lock)) + goto done; + if (!STD_LOCKING(dbc)) { + if (lock_mode != DB_LOCK_WRITE) + goto done; + if ((ret = __memp_dirty(mpf, &h, dbc->thread_info, + dbc->txn, dbc->priority, 0)) != 0) { + if (h != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, h, dbc->priority); + return (ret); + } + } else { + /* Try to lock the page without waiting first. */ + if ((ret = __db_lget(dbc, 0, root_pgno, + lock_mode, DB_LOCK_NOWAIT, &lock)) == 0) { + if (lock_mode == DB_LOCK_WRITE && (ret = + __memp_dirty(mpf, &h, dbc->thread_info, + dbc->txn, dbc->priority, 0)) != 0) { + if (h != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, h, + dbc->priority); + return (ret); + } + goto done; + } + + t_ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority); + h = NULL; + + if (ret == DB_LOCK_DEADLOCK || + ret == DB_LOCK_NOTGRANTED) + ret = 0; + if (ret == 0) + ret = t_ret; + + if (ret != 0) + return (ret); + get_mode = 0; + if (lock_mode == DB_LOCK_WRITE) + get_mode = DB_MPOOL_DIRTY; + + if ((ret = __db_lget(dbc, + 0, root_pgno, lock_mode, 0, &lock)) != 0) + return (ret); + if ((ret = __memp_fget(mpf, + &root_pgno, dbc->thread_info, dbc->txn, + (atomic_read(&mpf->mfp->multiversion) == 0 && + lock_mode == DB_LOCK_WRITE) ? DB_MPOOL_DIRTY : 0, + &h)) != 0) { + /* Did not read it, release the lock */ + (void)__LPUT(dbc, lock); + return (ret); + } + } + /* + * While getting dirty or locked we need to drop the mutex + * so someone else could get in and split the root. + */ + if (!((LF_ISSET(SR_PARENT) && + (u_int8_t)(slevel + 1) >= LEVEL(h)) || + LEVEL(h) == LEAFLEVEL || + (LF_ISSET(SR_START) && slevel == LEVEL(h)))) { + /* Someone else split the root, start over. */ + ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority); + h = NULL; + if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + return (ret); + goto try_again; + } else if (atomic_read(&mpf->mfp->multiversion) != 0 && + lock_mode == DB_LOCK_WRITE && (ret = __memp_dirty(mpf, &h, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) { + (void)__memp_fput(mpf, + dbc->thread_info, h, dbc->priority); + (void)__LPUT(dbc, lock); + } + } + +done: BT_STK_ENTER(dbp->env, cp, h, 0, lock, lock_mode, ret); + + return (ret); +} + +/* + * __bam_search -- + * Search a btree for a key. + * + * PUBLIC: int __bam_search __P((DBC *, db_pgno_t, + * PUBLIC: const DBT *, u_int32_t, int, db_recno_t *, int *)); + */ +int +__bam_search(dbc, root_pgno, key, flags, slevel, recnop, exactp) + DBC *dbc; + db_pgno_t root_pgno; + const DBT *key; + u_int32_t flags; + int slevel, *exactp; + db_recno_t *recnop; +{ + BTREE *t; + BTREE_CURSOR *cp; + DB *dbp; + DB_LOCK lock, saved_lock; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *h, *parent_h; + db_indx_t base, i, indx, *inp, lim; + db_lockmode_t lock_mode; + db_pgno_t pg, saved_pg, start_pgno; + db_recno_t recno; + int adjust, cmp, deloffset, ret, set_stack, stack, t_ret; + int getlock, was_next; + int (*func) __P((DB *, const DBT *, const DBT *)); + u_int32_t get_mode, wait; + u_int8_t level, saved_level; + + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_OFF(dbc->thread_info); + + dbp = dbc->dbp; + env = dbp->env; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + h = NULL; + parent_h = NULL; + t = dbp->bt_internal; + recno = 0; + t_ret = 0; + + BT_STK_CLR(cp); + LOCK_INIT(saved_lock); + LOCK_INIT(lock); + was_next = LF_ISSET(SR_NEXT); + wait = DB_LOCK_NOWAIT; + + /* + * There are several ways we search a btree tree. The flags argument + * specifies if we're acquiring read or write latches, if we position + * to the first or last item in a set of duplicates, if we return + * deleted items, and if we are latching pairs of pages. In addition, + * if we're modifying record numbers, we have to latch the entire tree + * regardless. See btree.h for more details. + */ + + start_pgno = saved_pg = root_pgno; + saved_level = MAXBTREELEVEL; +retry: if ((ret = __bam_get_root(dbc, start_pgno, slevel, flags, &stack)) != 0) + goto err; + lock_mode = cp->csp->lock_mode; + get_mode = lock_mode == DB_LOCK_WRITE ? DB_MPOOL_DIRTY : 0; + h = cp->csp->page; + root_pgno = pg = PGNO(h); + lock = cp->csp->lock; + set_stack = stack; + /* + * Determine if we need to lock interiror nodes. + * If we have record numbers we always lock. Otherwise we only + * need to do this if we are write locking and we are returning + * a stack of nodes. SR_NEXT will eventually get a stack and + * release the locks above that level. + */ + if (F_ISSET(dbc, DBC_DOWNREV)) { + getlock = 1; + wait = 0; + } else + getlock = F_ISSET(cp, C_RECNUM) || + (lock_mode == DB_LOCK_WRITE && + (stack || LF_ISSET(SR_NEXT | SR_DEL))); + + /* + * If we are asked a level that is above the root, + * just return the root. This can happen if the tree + * collapses while we are trying to lock the root. + */ + if (!LF_ISSET(SR_START) && LEVEL(h) < slevel) + goto done; + + BT_STK_CLR(cp); + + /* Choose a comparison function. */ + func = F_ISSET(dbc, DBC_OPD) ? + (dbp->dup_compare == NULL ? __bam_defcmp : dbp->dup_compare) : + t->bt_compare; + + for (;;) { + if (TYPE(h) == P_LBTREE) + adjust = P_INDX; + else { + /* + * It is possible to catch an internal page as a change + * is being backed out. Its leaf pages will be locked + * but we must be sure we get to one. If the page + * is not populated enough lock it. + */ + if (TYPE(h) != P_LDUP && NUM_ENT(h) == 0) { + getlock = 1; + level = LEVEL(h) + 1; + if ((ret = __memp_fput(mpf, dbc->thread_info, + h, dbc->priority)) != 0) + goto err; + goto lock_next; + } + adjust = O_INDX; + } + inp = P_INP(dbp, h); + if (LF_ISSET(SR_MIN | SR_MAX)) { + if (LF_ISSET(SR_MIN) || NUM_ENT(h) == 0) + indx = 0; + else if (TYPE(h) == P_LBTREE) + indx = NUM_ENT(h) - 2; + else + indx = NUM_ENT(h) - 1; + + if (LEVEL(h) == LEAFLEVEL || + (!LF_ISSET(SR_START) && LEVEL(h) == slevel)) { + if (LF_ISSET(SR_NEXT)) + goto get_next; + goto found; + } + goto next; + } + /* + * Do a binary search on the current page. If we're searching + * a Btree leaf page, we have to walk the indices in groups of + * two. If we're searching an internal page or a off-page dup + * page, they're an index per page item. If we find an exact + * match on a leaf page, we're done. + */ + DB_BINARY_SEARCH_FOR(base, lim, NUM_ENT(h), adjust) { + DB_BINARY_SEARCH_INCR(indx, base, lim, adjust); + if ((ret = __bam_cmp(dbc, key, h, indx, + func, &cmp)) != 0) + goto err; + if (cmp == 0) { + if (LEVEL(h) == LEAFLEVEL || + (!LF_ISSET(SR_START) && + LEVEL(h) == slevel)) { + if (LF_ISSET(SR_NEXT)) + goto get_next; + goto found; + } + goto next; + } + if (cmp > 0) + DB_BINARY_SEARCH_SHIFT_BASE(indx, base, + lim, adjust); + } + + /* + * No match found. Base is the smallest index greater than + * key and may be zero or a last + O_INDX index. + * + * If it's a leaf page or the stopping point, + * return base as the "found" value. + * Delete only deletes exact matches. + */ + if (LEVEL(h) == LEAFLEVEL || + (!LF_ISSET(SR_START) && LEVEL(h) == slevel)) { + *exactp = 0; + + if (LF_ISSET(SR_EXACT)) { + ret = DB_NOTFOUND; + goto err; + } + + if (LF_ISSET(SR_STK_ONLY)) { + BT_STK_NUM(env, cp, h, base, ret); + if ((t_ret = + __LPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __memp_fput(mpf, dbc->thread_info, + h, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + h = NULL; + if (ret != 0) + goto err; + goto done; + } + if (LF_ISSET(SR_NEXT)) { +get_next: /* + * The caller could have asked for a NEXT + * at the root if the tree recently collapsed. + */ + if (PGNO(h) == root_pgno) { + ret = DB_NOTFOUND; + goto err; + } + + indx = cp->sp->indx + 1; + if (indx == NUM_ENT(cp->sp->page)) { + ret = DB_NOTFOUND; + cp->csp++; + goto err; + } + /* + * If we want both the key page and the next + * page, push the key page on the stack + * otherwise save the root of the subtree + * and drop the rest of the subtree. + * Search down again starting at the + * next child of the root of this subtree. + */ + LF_SET(SR_MIN); + LF_CLR(SR_NEXT); + set_stack = stack = 1; + if (LF_ISSET(SR_BOTH)) { + cp->csp++; + BT_STK_PUSH(env, + cp, h, indx, lock, lock_mode, ret); + if (ret != 0) + goto err; + LOCK_INIT(lock); + h = cp->sp->page; + pg = GET_BINTERNAL(dbp, h, indx)->pgno; + level = LEVEL(h); + h = NULL; + goto lock_next; + } else { + if ((ret = __LPUT(dbc, lock)) != 0) + goto err; + if ((ret = __memp_fput(mpf, + dbc->thread_info, + h, dbc->priority)) != 0) + goto err; + h = cp->sp->page; + cp->sp->page = NULL; + lock = cp->sp->lock; + LOCK_INIT(cp->sp->lock); + if ((ret = __bam_stkrel(dbc, + STK_NOLOCK)) != 0) + goto err; + goto next; + } + } + + /* + * !!! + * Possibly returning a deleted record -- DB_SET_RANGE, + * DB_KEYFIRST and DB_KEYLAST don't require an exact + * match, and we don't want to walk multiple pages here + * to find an undeleted record. This is handled by the + * calling routine. + */ + if (LF_ISSET(SR_DEL) && cp->csp == cp->sp) + cp->csp++; + BT_STK_ENTER(env, cp, h, base, lock, lock_mode, ret); + if (ret != 0) + goto err; + goto done; + } + + /* + * If it's not a leaf page, record the internal page (which is + * a parent page for the key). Decrement the base by 1 if it's + * non-zero so that if a split later occurs, the inserted page + * will be to the right of the saved page. + */ + indx = base > 0 ? base - O_INDX : base; + + /* + * If we're trying to calculate the record number, sum up + * all the record numbers on this page up to the indx point. + */ +next: if (recnop != NULL) + for (i = 0; i < indx; ++i) + recno += GET_BINTERNAL(dbp, h, i)->nrecs; + + pg = GET_BINTERNAL(dbp, h, indx)->pgno; + level = LEVEL(h); + + /* See if we are at the level to start stacking. */ + if (LF_ISSET(SR_START) && slevel == level) + set_stack = stack = 1; + + if (LF_ISSET(SR_STK_ONLY)) { + if (slevel == LEVEL(h)) { + BT_STK_NUM(env, cp, h, indx, ret); + if ((t_ret = __memp_fput(mpf, dbc->thread_info, + h, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + h = NULL; + if (ret != 0) + goto err; + goto done; + } + BT_STK_NUMPUSH(env, cp, h, indx, ret); + (void)__memp_fput(mpf, + dbc->thread_info, h, dbc->priority); + h = NULL; + } else if (stack) { + /* Return if this is the lowest page wanted. */ + if (LF_ISSET(SR_PARENT) && slevel == level) { + BT_STK_ENTER(env, + cp, h, indx, lock, lock_mode, ret); + if (ret != 0) + goto err; + goto done; + } + if (LF_ISSET(SR_DEL) && NUM_ENT(h) > 1) { + /* + * There was a page with a singleton pointer + * to a non-empty subtree. + */ + cp->csp--; + if ((ret = __bam_stkrel(dbc, STK_NOLOCK)) != 0) + goto err; + set_stack = stack = 0; + goto do_del; + } + BT_STK_PUSH(env, + cp, h, indx, lock, lock_mode, ret); + if (ret != 0) + goto err; + + LOCK_INIT(lock); + get_mode = DB_MPOOL_DIRTY; + lock_mode = DB_LOCK_WRITE; + getlock = 1; + goto lock_next; + } else { + /* + * Decide if we want to return a reference to the next + * page in the return stack. If so, latch it and don't + * unlatch it. We will want to stack things on the + * next iteration. The stack variable cannot be + * set until we leave this clause. If we are locking + * then we must lock this level before getting the page. + */ + if ((LF_ISSET(SR_PARENT) && + (u_int8_t)(slevel + 1) >= (level - 1)) || + (level - 1) == LEAFLEVEL) + set_stack = 1; + + /* + * Check for a normal search. If so, we need to + * latch couple the parent/chid buffers. + */ + if (!LF_ISSET(SR_DEL | SR_NEXT)) { + parent_h = h; + goto lock_next; + } + + /* + * Returning a subtree. See if we have hit the start + * point if so save the parent and set stack. + * Otherwise free the parent and temporarily + * save this one. + * For SR_DEL we need to find a page with 1 entry. + * For SR_NEXT we want find the minimal subtree + * that contains the key and the next page. + * We save pages as long as we are at the right + * edge of the subtree. When we leave the right + * edge, then drop the subtree. + */ + + if ((LF_ISSET(SR_DEL) && NUM_ENT(h) == 1)) { + /* + * We are pushing the things on the stack, + * set the stack variable now to indicate this + * has happened. + */ + stack = set_stack = 1; + LF_SET(SR_WRITE); + /* Push the parent. */ + cp->csp++; + /* Push this node. */ + BT_STK_PUSH(env, cp, h, + indx, lock, DB_LOCK_NG, ret); + if (ret != 0) + goto err; + LOCK_INIT(lock); + } else { + /* + * See if we want to save the tree so far. + * If we are looking for the next key, + * then we must save this node if we are + * at the end of the page. If not then + * discard anything we have saved so far. + * For delete only keep one node until + * we find a singleton. + */ +do_del: if (cp->csp->page != NULL) { + if (LF_ISSET(SR_NEXT) && + indx == NUM_ENT(h) - 1) + cp->csp++; + else if ((ret = + __bam_stkrel(dbc, STK_NOLOCK)) != 0) + goto err; + } + /* Save this node. */ + BT_STK_ENTER(env, cp, + h, indx, lock, lock_mode, ret); + if (ret != 0) + goto err; + LOCK_INIT(lock); + } + +lock_next: h = NULL; + + if (set_stack && LF_ISSET(SR_WRITE)) { + lock_mode = DB_LOCK_WRITE; + get_mode = DB_MPOOL_DIRTY; + getlock = 1; + } + /* + * If we are retrying and we are back at the same + * page then we already have it locked. If we are + * at a different page we want to lock couple and + * release that lock. + */ + if (level - 1 == saved_level) { + if ((ret = __LPUT(dbc, lock)) != 0) + goto err; + lock = saved_lock; + LOCK_INIT(saved_lock); + saved_level = MAXBTREELEVEL; + if (pg == saved_pg) + goto skip_lock; + } + if ((getlock || level - 1 == LEAFLEVEL) && + (ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, + pg, lock_mode, wait, &lock)) != 0) { + /* + * If we are doing DEL or NEXT then we + * have an extra level saved in the stack, + * push it so it will get freed. + */ + if (LF_ISSET(SR_DEL | SR_NEXT) && !stack) + cp->csp++; + PERFMON6(env, race, bam_search, dbp->fname, + dbp->dname, ret, h, parent_h, flags); + /* + * If we fail, discard the lock we held. + * This is ok because we will either search + * again or exit without actually looking + * at the data. + */ + if ((t_ret = __LPUT(dbc, lock)) != 0) + ret = t_ret; + /* + * If we blocked at a different level release + * the previous saved lock. + */ + if ((t_ret = __LPUT(dbc, saved_lock)) != 0 && + ret == 0) + ret = t_ret; + if (wait == 0 || (ret != DB_LOCK_NOTGRANTED && + ret != DB_LOCK_DEADLOCK)) + goto err; + + /* Relase the parent if we are holding it. */ + if (parent_h != NULL && + (ret = __memp_fput(mpf, dbc->thread_info, + parent_h, dbc->priority)) != 0) + goto err; + parent_h = NULL; + + BT_STK_POP(cp); + if ((ret = __bam_stkrel(dbc, STK_NOLOCK)) != 0) + goto err; + if ((ret = __db_lget(dbc, + 0, pg, lock_mode, 0, &saved_lock)) != 0) + goto err; + /* + * A very strange case: if this page was + * freed while we wait then we cannot hold + * the lock on it while we reget the root + * latch because allocation is one place + * we lock while holding a latch. + * We want to hold the lock but must ensure + * that the page is not free or cannot become + * free. If we are at the LEAF level we can + * hold on to the lock if the page is still + * of the right type. Otherwise we need to + * besure this page cannot move to an off page + * duplicate tree (which are not locked) and + * masquerade as the page we want. + */ + + /* + * If the page is not at leaf level + * then see if OPD trees are around. + * If the page could appear as an + * interior offpage duplicate node + * at the right level the it will + * not be locked and subsequently be + * freed. If there are multiple + * databases in the file then they + * could have OPDs. + */ + if (level - 1 > LEAFLEVEL && + (F_ISSET(dbp, DB_AM_SUBDB) || + (dbp->type == DB_BTREE && + F_ISSET(dbp, DB_AM_DUPSORT)))) + goto drop_lock; + + /* + * Take a look at the page. If it got + * freed it could be very gone. + */ + if ((ret = __memp_fget(mpf, &pg, + dbc->thread_info, dbc->txn, 0, &h)) != 0 && + ret != DB_PAGE_NOTFOUND) + goto err; + + /* + * Check for right level and page type. + */ + if (ret != 0 || LEVEL(h) != level - 1 || + (LEVEL(h) == LEAFLEVEL ? + TYPE(h) != (dbc->dbtype == DB_BTREE ? + P_LBTREE : P_LRECNO) : + TYPE(h) != (dbc->dbtype == DB_BTREE ? + P_IBTREE : P_IRECNO))) { +drop_lock: ret = __LPUT(dbc, saved_lock); + if (ret != 0) + goto err; + pg = root_pgno; + saved_level = MAXBTREELEVEL; + } + if (h != NULL && (ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0) + goto err; + h = NULL; + + if (was_next) { + LF_CLR(SR_MIN); + LF_SET(SR_NEXT); + } + /* + * We have the lock but we dropped the + * latch so we need to search again. If + * we get back to the same page then all + * is good, otherwise we need to try to + * lock the new page. + */ + saved_pg = pg; + saved_level = level - 1; + goto retry; + } +skip_lock: stack = set_stack; + } + /* Get the child page. */ + if ((ret = __memp_fget(mpf, &pg, + dbc->thread_info, dbc->txn, get_mode, &h)) != 0) + goto err; + /* Release the parent. */ + if (parent_h != NULL && (ret = __memp_fput(mpf, + dbc->thread_info, parent_h, dbc->priority)) != 0) + goto err; + parent_h = NULL; + } + /* NOTREACHED */ + +found: *exactp = 1; + + /* + * If we got here, we know that we have a Btree leaf or off-page + * duplicates page. If it's a Btree leaf page, we have to handle + * on-page duplicates. + * + * If there are duplicates, go to the first/last one. This is + * safe because we know that we're not going to leave the page, + * all duplicate sets that are not on overflow pages exist on a + * single leaf page. + */ + if (TYPE(h) == P_LBTREE && NUM_ENT(h) > P_INDX) { + if (LF_ISSET(SR_DUPLAST)) + while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) && + inp[indx] == inp[indx + P_INDX]) + indx += P_INDX; + else if (LF_ISSET(SR_DUPFIRST)) + while (indx > 0 && + inp[indx] == inp[indx - P_INDX]) + indx -= P_INDX; + } + + /* + * Now check if we are allowed to return deleted items; if not, then + * find the next (or previous) non-deleted duplicate entry. (We do + * not move from the original found key on the basis of the SR_DELNO + * flag.) + */ + DB_ASSERT(env, recnop == NULL || LF_ISSET(SR_DELNO)); + if (LF_ISSET(SR_DELNO)) { + deloffset = TYPE(h) == P_LBTREE ? O_INDX : 0; + if (LF_ISSET(SR_DUPLAST)) + while (B_DISSET(GET_BKEYDATA(dbp, + h, indx + deloffset)->type) && indx > 0 && + inp[indx] == inp[indx - adjust]) + indx -= adjust; + else + while (B_DISSET(GET_BKEYDATA(dbp, + h, indx + deloffset)->type) && + indx < (db_indx_t)(NUM_ENT(h) - adjust) && + inp[indx] == inp[indx + adjust]) + indx += adjust; + + /* + * If we weren't able to find a non-deleted duplicate, return + * DB_NOTFOUND. + */ + if (B_DISSET(GET_BKEYDATA(dbp, h, indx + deloffset)->type)) { + ret = DB_NOTFOUND; + goto err; + } + + /* + * Increment the record counter to point to the found element. + * Ignore any deleted key/data pairs. There doesn't need to + * be any correction for duplicates, as Btree doesn't support + * duplicates and record numbers in the same tree. + */ + if (recnop != NULL) { + DB_ASSERT(env, TYPE(h) == P_LBTREE); + + for (i = 0; i < indx; i += P_INDX) + if (!B_DISSET( + GET_BKEYDATA(dbp, h, i + O_INDX)->type)) + ++recno; + + /* Correct the number for a 0-base. */ + *recnop = recno + 1; + } + } + + if (LF_ISSET(SR_STK_ONLY)) { + BT_STK_NUM(env, cp, h, indx, ret); + if ((t_ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + h = NULL; + } else { + if (LF_ISSET(SR_DEL) && cp->csp == cp->sp) + cp->csp++; + BT_STK_ENTER(env, cp, h, indx, lock, lock_mode, ret); + } + if (ret != 0) + goto err; + + cp->csp->lock = lock; + DB_ASSERT(env, parent_h == NULL); + +done: + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_ON(dbc->thread_info); + + if ((ret = __LPUT(dbc, saved_lock)) != 0) + return (ret); + + return (0); + +err: if (ret == 0) + ret = t_ret; + if (h != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (parent_h != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, parent_h, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + /* Keep any not-found page locked for serializability. */ + if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + + (void)__LPUT(dbc, saved_lock); + + BT_STK_POP(cp); + (void)__bam_stkrel(dbc, 0); + + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_ON(dbc->thread_info); + + return (ret); +} + +/* + * __bam_stkrel -- + * Release all pages currently held in the stack. + * + * PUBLIC: int __bam_stkrel __P((DBC *, u_int32_t)); + */ +int +__bam_stkrel(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + BTREE_CURSOR *cp; + DB *dbp; + DB_MPOOLFILE *mpf; + EPG *epg; + int ret, t_ret; + + DB_ASSERT(NULL, dbc != NULL); + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (BTREE_CURSOR *)dbc->internal; + + /* + * Release inner pages first. + * + * The caller must be sure that setting STK_NOLOCK will not effect + * either serializability or recoverability. + */ + for (ret = 0, epg = cp->sp; epg <= cp->csp; ++epg) { + if (epg->page != NULL) { + if (LF_ISSET(STK_CLRDBC) && cp->page == epg->page) { + cp->page = NULL; + LOCK_INIT(cp->lock); + } + if ((t_ret = __memp_fput(mpf, dbc->thread_info, + epg->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + epg->page = NULL; + } + /* + * We set this if we need to release our pins, + * but are not logically ready to have the pages + * visible. + */ + if (LF_ISSET(STK_PGONLY)) + continue; + if (LF_ISSET(STK_NOLOCK) && + (epg->lock.mode == DB_LOCK_READ || + atomic_read(&mpf->mfp->multiversion) == 0)) { + if ((t_ret = __LPUT(dbc, epg->lock)) != 0 && ret == 0) + ret = t_ret; + } else + if ((t_ret = __TLPUT(dbc, epg->lock)) != 0 && ret == 0) + ret = t_ret; + } + + /* Clear the stack, all pages have been released. */ + if (!LF_ISSET(STK_PGONLY)) + BT_STK_CLR(cp); + + return (ret); +} + +/* + * __bam_stkgrow -- + * Grow the stack. + * + * PUBLIC: int __bam_stkgrow __P((ENV *, BTREE_CURSOR *)); + */ +int +__bam_stkgrow(env, cp) + ENV *env; + BTREE_CURSOR *cp; +{ + EPG *p; + size_t entries; + int ret; + + entries = cp->esp - cp->sp; + + if ((ret = __os_calloc(env, entries * 2, sizeof(EPG), &p)) != 0) + return (ret); + memcpy(p, cp->sp, entries * sizeof(EPG)); + if (cp->sp != cp->stack) + __os_free(env, cp->sp); + cp->sp = p; + cp->csp = p + entries; + cp->esp = p + entries * 2; + return (0); +} diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c new file mode 100644 index 00000000..fe27b947 --- /dev/null +++ b/src/btree/bt_split.c @@ -0,0 +1,1332 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/btree.h" + +static int __bam_page __P((DBC *, EPG *, EPG *)); +static int __bam_psplit __P((DBC *, EPG *, PAGE *, PAGE *, db_indx_t *)); +static int __bam_root __P((DBC *, EPG *)); + +/* + * __bam_split -- + * Split a page. + * + * PUBLIC: int __bam_split __P((DBC *, void *, db_pgno_t *)); + */ +int +__bam_split(dbc, arg, root_pgnop) + DBC *dbc; + void *arg; + db_pgno_t *root_pgnop; +{ + BTREE_CURSOR *cp; + DB_LOCK metalock, next_lock; + enum { UP, DOWN } dir; + db_pgno_t pgno, next_pgno, root_pgno; + int exact, level, ret; + + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_OFF(dbc->thread_info); + + cp = (BTREE_CURSOR *)dbc->internal; + LOCK_INIT(next_lock); + next_pgno = PGNO_INVALID; + + /* + * First get a lock on the metadata page, we will have to allocate + * pages and cannot get a lock while we have the search tree pinnned. + */ + + pgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, + 0, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err; + root_pgno = BAM_ROOT_PGNO(dbc); + + /* + * The locking protocol we use to avoid deadlock to acquire locks by + * walking down the tree, but we do it as lazily as possible, locking + * the root only as a last resort. We expect all stack pages to have + * been discarded before we're called; we discard all short-term locks. + * + * When __bam_split is first called, we know that a leaf page was too + * full for an insert. We don't know what leaf page it was, but we + * have the key/recno that caused the problem. We call XX_search to + * reacquire the leaf page, but this time get both the leaf page and + * its parent, locked. We then split the leaf page and see if the new + * internal key will fit into the parent page. If it will, we're done. + * + * If it won't, we discard our current locks and repeat the process, + * only this time acquiring the parent page and its parent, locked. + * This process repeats until we succeed in the split, splitting the + * root page as the final resort. The entire process then repeats, + * as necessary, until we split a leaf page. + * + * XXX + * A traditional method of speeding this up is to maintain a stack of + * the pages traversed in the original search. You can detect if the + * stack is correct by storing the page's LSN when it was searched and + * comparing that LSN with the current one when it's locked during the + * split. This would be an easy change for this code, but I have no + * numbers that indicate it's worthwhile. + */ + for (dir = UP, level = LEAFLEVEL;; dir == UP ? ++level : --level) { + /* + * Acquire a page and its parent, locked. + */ +retry: if ((ret = (dbc->dbtype == DB_BTREE ? + __bam_search(dbc, PGNO_INVALID, + arg, SR_WRPAIR, level, NULL, &exact) : + __bam_rsearch(dbc, + (db_recno_t *)arg, SR_WRPAIR, level, &exact))) != 0) + break; + + if (cp->csp[0].page->pgno == root_pgno) { + /* we can overshoot the top of the tree. */ + level = cp->csp[0].page->level; + if (root_pgnop != NULL) + *root_pgnop = root_pgno; + } else if (root_pgnop != NULL) + *root_pgnop = cp->csp[-1].page->pgno; + + /* + * Split the page if it still needs it (it's possible another + * thread of control has already split the page). If we are + * guaranteed that two items will fit on the page, the split + * is no longer necessary. + */ + if (2 * B_MAXSIZEONPAGE(cp->ovflsize) + <= (db_indx_t)P_FREESPACE(dbc->dbp, cp->csp[0].page)) { + if ((ret = __bam_stkrel(dbc, STK_NOLOCK)) != 0) + goto err; + goto no_split; + } + + /* + * We need to try to lock the next page so we can update + * its PREV. + */ + if (ISLEAF(cp->csp->page) && + (pgno = NEXT_PGNO(cp->csp->page)) != PGNO_INVALID) { + TRY_LOCK(dbc, pgno, + next_pgno, next_lock, DB_LOCK_WRITE, retry); + if (ret != 0) + goto err; + } + ret = cp->csp[0].page->pgno == root_pgno ? + __bam_root(dbc, &cp->csp[0]) : + __bam_page(dbc, &cp->csp[-1], &cp->csp[0]); + BT_STK_CLR(cp); + + switch (ret) { + case 0: +no_split: /* Once we've split the leaf page, we're done. */ + if (level == LEAFLEVEL) + goto done; + + /* Switch directions. */ + if (dir == UP) + dir = DOWN; + break; + case DB_NEEDSPLIT: + /* + * It's possible to fail to split repeatedly, as other + * threads may be modifying the tree, or the page usage + * is sufficiently bad that we don't get enough space + * the first time. + */ + if (dir == DOWN) + dir = UP; + break; + default: + goto err; + } + } + + if (root_pgnop != NULL) + *root_pgnop = BAM_ROOT_PGNO(dbc); +err: +done: (void)__LPUT(dbc, metalock); + (void)__TLPUT(dbc, next_lock); + + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_ON(dbc->thread_info); + return (ret); +} + +/* + * __bam_root -- + * Split the root page of a btree. + */ +static int +__bam_root(dbc, cp) + DBC *dbc; + EPG *cp; +{ + DB *dbp; + DBT log_dbt, rootent[2]; + DB_LOCK llock, rlock; + DB_LSN log_lsn; + DB_MPOOLFILE *mpf; + PAGE *lp, *rp; + db_indx_t split; + u_int32_t opflags; + int ret, t_ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + lp = rp = NULL; + LOCK_INIT(llock); + LOCK_INIT(rlock); + COMPQUIET(log_dbt.data, NULL); + + /* Yeah, right. */ + if (cp->page->level >= MAXBTREELEVEL) { + __db_errx(dbp->env, DB_STR_A("1021", + "Too many btree levels: %d", "%d"), cp->page->level); + return (ENOSPC); + } + + if ((ret = __memp_dirty(mpf, + &cp->page, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err; + + /* Create new left and right pages for the split. */ + if ((ret = __db_new(dbc, TYPE(cp->page), &llock, &lp)) != 0 || + (ret = __db_new(dbc, TYPE(cp->page), &rlock, &rp)) != 0) + goto err; + P_INIT(lp, dbp->pgsize, lp->pgno, + PGNO_INVALID, ISINTERNAL(cp->page) ? PGNO_INVALID : rp->pgno, + cp->page->level, TYPE(cp->page)); + P_INIT(rp, dbp->pgsize, rp->pgno, + ISINTERNAL(cp->page) ? PGNO_INVALID : lp->pgno, PGNO_INVALID, + cp->page->level, TYPE(cp->page)); + + PERFMON5(env, alloc, btree_split, + dbp->fname, dbp->dname, lp->pgno, cp->page->pgno, lp->level); + + /* Split the page. */ + if ((ret = __bam_psplit(dbc, cp, lp, rp, &split)) != 0) + goto err; + + if (DBC_LOGGING(dbc)) { + memset(&log_dbt, 0, sizeof(log_dbt)); + if ((ret = + __os_malloc(dbp->env, dbp->pgsize, &log_dbt.data)) != 0) + goto err; + log_dbt.size = dbp->pgsize; + memcpy(log_dbt.data, cp->page, dbp->pgsize); + } + + /* Clean up the new root page. */ + if ((ret = (dbc->dbtype == DB_RECNO ? + __ram_root(dbc, cp->page, lp, rp) : + __bam_broot(dbc, cp->page, split, lp, rp))) != 0) { + if (DBC_LOGGING(dbc)) + __os_free(dbp->env, log_dbt.data); + goto err; + } + + /* Log the change. */ + if (DBC_LOGGING(dbc)) { + memset(rootent, 0, sizeof(rootent)); + rootent[0].data = GET_BINTERNAL(dbp, cp->page, 0); + rootent[1].data = GET_BINTERNAL(dbp, cp->page, 1); + if (dbc->dbtype == DB_RECNO) + rootent[0].size = rootent[1].size = RINTERNAL_SIZE; + else { + rootent[0].size = BINTERNAL_SIZE( + ((BINTERNAL *)rootent[0].data)->len); + rootent[1].size = BINTERNAL_SIZE( + ((BINTERNAL *)rootent[1].data)->len); + } + ZERO_LSN(log_lsn); + opflags = F_ISSET( + (BTREE_CURSOR *)dbc->internal, C_RECNUM) ? SPL_NRECS : 0; + if (dbc->dbtype == DB_RECNO) + opflags |= SPL_RECNO; + ret = __bam_split_log(dbp, dbc->txn, &LSN(cp->page), 0, + OP_SET(opflags, cp->page), PGNO(lp), &LSN(lp), + PGNO(rp), &LSN(rp), (u_int32_t)NUM_ENT(lp), + PGNO_INVALID, &log_lsn, PGNO(cp->page), + &LSN(cp->page), 0, &log_dbt, &rootent[0], &rootent[1]); + + /* On failure, restore the page. */ + if (ret != 0) + memcpy(cp->page, log_dbt.data, dbp->pgsize); + __os_free(dbp->env, log_dbt.data); + + if (ret != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + LSN(lp) = LSN(cp->page); + LSN(rp) = LSN(cp->page); + + /* Adjust any cursors. */ + ret = __bam_ca_split(dbc, cp->page->pgno, lp->pgno, rp->pgno, split, 1); + + /* Success or error: release pages and locks. */ +err: if (cp->page != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, cp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + cp->page = NULL; + + /* + * We are done. Put or downgrade all our locks and release + * the pages. + */ + if ((t_ret = __TLPUT(dbc, llock)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, rlock)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, cp->lock)) != 0 && ret == 0) + ret = t_ret; + if (lp != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, lp, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (rp != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, rp, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __bam_page -- + * Split the non-root page of a btree. + */ +static int +__bam_page(dbc, pp, cp) + DBC *dbc; + EPG *pp, *cp; +{ + BTREE_CURSOR *bc; + DB *dbp; + DBT log_dbt, rentry; + DB_LOCK rplock; + DB_LSN log_lsn; + DB_LSN save_lsn; + DB_MPOOLFILE *mpf; + PAGE *lp, *rp, *alloc_rp, *tp; + db_indx_t split; + u_int32_t opflags; + int ret, t_ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + alloc_rp = lp = rp = tp = NULL; + LOCK_INIT(rplock); + ret = -1; + + /* + * Create new left page for the split, and fill in everything + * except its LSN and next-page page number. + * + * Create a new right page for the split, and fill in everything + * except its LSN and page number. + * + * We malloc space for both the left and right pages, so we don't get + * a new page from the underlying buffer pool until we know the split + * is going to succeed. The reason is that we can't release locks + * acquired during the get-a-new-page process because metadata page + * locks can't be discarded on failure since we may have modified the + * free list. So, if you assume that we're holding a write lock on the + * leaf page which ran out of space and started this split (e.g., we + * have already written records to the page, or we retrieved a record + * from it with the DB_RMW flag set), failing in a split with both a + * leaf page locked and the metadata page locked can potentially lock + * up the tree badly, because we've violated the rule of always locking + * down the tree, and never up. + */ + if ((ret = __os_malloc(dbp->env, dbp->pgsize * 2, &lp)) != 0) + goto err; + P_INIT(lp, dbp->pgsize, PGNO(cp->page), + ISINTERNAL(cp->page) ? PGNO_INVALID : PREV_PGNO(cp->page), + ISINTERNAL(cp->page) ? PGNO_INVALID : 0, + cp->page->level, TYPE(cp->page)); + + rp = (PAGE *)((u_int8_t *)lp + dbp->pgsize); + P_INIT(rp, dbp->pgsize, 0, + ISINTERNAL(cp->page) ? PGNO_INVALID : PGNO(cp->page), + ISINTERNAL(cp->page) ? PGNO_INVALID : NEXT_PGNO(cp->page), + cp->page->level, TYPE(cp->page)); + + /* + * Split right. + * + * Only the indices are sorted on the page, i.e., the key/data pairs + * aren't, so it's simpler to copy the data from the split page onto + * two new pages instead of copying half the data to a new right page + * and compacting the left page in place. Since the left page can't + * change, we swap the original and the allocated left page after the + * split. + */ + if ((ret = __bam_psplit(dbc, cp, lp, rp, &split)) != 0) + goto err; + + /* + * Test to see if we are going to be able to insert the new pages into + * the parent page. The interesting failure here is that the parent + * page can't hold the new keys, and has to be split in turn, in which + * case we want to release all the locks we can. + */ + if ((ret = __bam_pinsert(dbc, pp, split, lp, rp, BPI_SPACEONLY)) != 0) + goto err; + + /* + * We've got everything locked down we need, and we know the split + * is going to succeed. Go and get the additional page we'll need. + */ + if ((ret = __db_new(dbc, TYPE(cp->page), &rplock, &alloc_rp)) != 0) + goto err; + + /* + * Prepare to fix up the previous pointer of any leaf page following + * the split page. Our caller has already write locked the page so + * we can get it without deadlocking on the parent latch. + */ + if (ISLEAF(cp->page) && NEXT_PGNO(cp->page) != PGNO_INVALID && + (ret = __memp_fget(mpf, &NEXT_PGNO(cp->page), + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &tp)) != 0) + goto err; + + PERFMON5(env, alloc, btree_split, dbp->fname, + dbp->dname, cp->page->pgno, pp->page->pgno, cp->page->level); + + /* + * Fix up the page numbers we didn't have before. We have to do this + * before calling __bam_pinsert because it may copy a page number onto + * the parent page and it takes the page number from its page argument. + */ + PGNO(rp) = NEXT_PGNO(lp) = PGNO(alloc_rp); + + DB_ASSERT(dbp->env, IS_DIRTY(cp->page)); + DB_ASSERT(dbp->env, IS_DIRTY(pp->page)); + + bc = (BTREE_CURSOR *)dbc->internal; + + /* Actually update the parent page. */ + if ((ret = __bam_pinsert(dbc, + pp, split, lp, rp, F_ISSET(bc, C_RECNUM) ? 0 : BPI_NOLOGGING)) != 0) + goto err; + + /* Log the change. */ + if (DBC_LOGGING(dbc)) { + memset(&log_dbt, 0, sizeof(log_dbt)); + log_dbt.data = cp->page; + log_dbt.size = dbp->pgsize; + memset(&rentry, 0, sizeof(rentry)); + rentry.data = GET_BINTERNAL(dbp, pp->page, pp->indx + 1); + opflags = F_ISSET(bc, C_RECNUM) ? SPL_NRECS : 0; + if (dbc->dbtype == DB_RECNO) { + opflags |= SPL_RECNO; + rentry.size = RINTERNAL_SIZE; + } else + rentry.size = + BINTERNAL_SIZE(((BINTERNAL *)rentry.data)->len); + if (tp == NULL) + ZERO_LSN(log_lsn); + if ((ret = __bam_split_log(dbp, dbc->txn, &LSN(cp->page), + 0, OP_SET(opflags, pp->page), PGNO(cp->page), + &LSN(cp->page), PGNO(alloc_rp), &LSN(alloc_rp), + (u_int32_t)NUM_ENT(lp), tp == NULL ? 0 : PGNO(tp), + tp == NULL ? &log_lsn : &LSN(tp), PGNO(pp->page), + &LSN(pp->page), pp->indx, &log_dbt, NULL, &rentry)) != 0) { + /* + * If this is not RECNO then undo the update + * to the parent page, which has not been + * logged yet. This must succeed. Renco + * database trees are locked and therefore + * the parent can be logged independently. + */ + if (F_ISSET(bc, C_RECNUM) == 0) { + t_ret = __db_ditem_nolog(dbc, pp->page, + pp->indx + 1, rentry.size); + DB_ASSERT(dbp->env, t_ret == 0); + } + + goto err; + } + + } else + LSN_NOT_LOGGED(LSN(cp->page)); + + /* Update the LSNs for all involved pages. */ + LSN(alloc_rp) = LSN(cp->page); + LSN(lp) = LSN(cp->page); + LSN(rp) = LSN(cp->page); + LSN(pp->page) = LSN(cp->page); + if (tp != NULL) { + /* Log record has been written; so safe to update next page. */ + PREV_PGNO(tp) = PGNO(rp); + LSN(tp) = LSN(cp->page); + } + + /* + * Copy the left and right pages into place. There are two paths + * through here. Either we are logging and we set the LSNs in the + * logging path. However, if we are not logging, then we do not + * have valid LSNs on lp or rp. The correct LSNs to use are the + * ones on the page we got from __db_new or the one that was + * originally on cp->page. In both cases, we save the LSN from the + * real database page (not a malloc'd one) and reapply it after we + * do the copy. + */ + save_lsn = alloc_rp->lsn; + memcpy(alloc_rp, rp, LOFFSET(dbp, rp)); + memcpy((u_int8_t *)alloc_rp + HOFFSET(rp), + (u_int8_t *)rp + HOFFSET(rp), dbp->pgsize - HOFFSET(rp)); + alloc_rp->lsn = save_lsn; + + save_lsn = cp->page->lsn; + memcpy(cp->page, lp, LOFFSET(dbp, lp)); + memcpy((u_int8_t *)cp->page + HOFFSET(lp), + (u_int8_t *)lp + HOFFSET(lp), dbp->pgsize - HOFFSET(lp)); + cp->page->lsn = save_lsn; + + /* Adjust any cursors. */ + if ((ret = __bam_ca_split(dbc, + PGNO(cp->page), PGNO(cp->page), PGNO(rp), split, 0)) != 0) + goto err; + + __os_free(dbp->env, lp); + + /* + * Success -- write the real pages back to the store. + */ + if ((t_ret = __memp_fput(mpf, + dbc->thread_info, alloc_rp, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, rplock)) != 0 && ret == 0) + ret = t_ret; + if (tp != NULL) { + if ((t_ret = __memp_fput(mpf, + dbc->thread_info, tp, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + } + if ((t_ret = __bam_stkrel(dbc, STK_CLRDBC)) != 0 && ret == 0) + ret = t_ret; + return (ret); + +err: if (lp != NULL) + __os_free(dbp->env, lp); + if (alloc_rp != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, alloc_rp, dbc->priority); + if (tp != NULL) + (void)__memp_fput(mpf, dbc->thread_info, tp, dbc->priority); + + if (pp->page != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, pp->page, dbc->priority); + + if (ret == DB_NEEDSPLIT && atomic_read(&mpf->mfp->multiversion) == 0) + (void)__LPUT(dbc, pp->lock); + else + (void)__TLPUT(dbc, pp->lock); + + (void)__memp_fput(mpf, dbc->thread_info, cp->page, dbc->priority); + + /* + * We don't drop the left and right page locks. If we doing dirty + * reads then we need to hold the locks until we abort the transaction. + * If we are not transactional, we are hosed anyway as the tree + * is trashed. It may be better not to leak the locks. + */ + + if (dbc->txn == NULL) + (void)__LPUT(dbc, rplock); + + if (dbc->txn == NULL || ret == DB_NEEDSPLIT) + (void)__LPUT(dbc, cp->lock); + + return (ret); +} + +/* + * __bam_broot -- + * Fix up the btree root page after it has been split. + * PUBLIC: int __bam_broot __P((DBC *, PAGE *, u_int32_t, PAGE *, PAGE *)); + */ +int +__bam_broot(dbc, rootp, split, lp, rp) + DBC *dbc; + u_int32_t split; + PAGE *rootp, *lp, *rp; +{ + BINTERNAL bi, bi0, *child_bi; + BKEYDATA *child_bk; + BOVERFLOW bo, *child_bo; + BTREE_CURSOR *cp; + DB *dbp; + DBT hdr, hdr0, data; + db_pgno_t root_pgno; + int ret; + + dbp = dbc->dbp; + cp = (BTREE_CURSOR *)dbc->internal; + child_bo = NULL; + data.data = NULL; + memset(&bi, 0, sizeof(bi)); + + switch (TYPE(rootp)) { + case P_IBTREE: + /* Copy the first key of the child page onto the root page. */ + child_bi = GET_BINTERNAL(dbp, rootp, split); + switch (B_TYPE(child_bi->type)) { + case B_KEYDATA: + bi.len = child_bi->len; + B_TSET(bi.type, B_KEYDATA); + bi.pgno = rp->pgno; + DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data)); + if ((ret = __os_malloc(dbp->env, + child_bi->len, &data.data)) != 0) + return (ret); + memcpy(data.data, child_bi->data, child_bi->len); + data.size = child_bi->len; + break; + case B_OVERFLOW: + /* Reuse the overflow key. */ + child_bo = (BOVERFLOW *)child_bi->data; + memset(&bo, 0, sizeof(bo)); + bo.type = B_OVERFLOW; + bo.tlen = child_bo->tlen; + bo.pgno = child_bo->pgno; + bi.len = BOVERFLOW_SIZE; + B_TSET(bi.type, B_OVERFLOW); + bi.pgno = rp->pgno; + DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data)); + DB_SET_DBT(data, &bo, BOVERFLOW_SIZE); + break; + case B_DUPLICATE: + default: + goto pgfmt; + } + break; + case P_LDUP: + case P_LBTREE: + /* Copy the first key of the child page onto the root page. */ + child_bk = GET_BKEYDATA(dbp, rootp, split); + switch (B_TYPE(child_bk->type)) { + case B_KEYDATA: + bi.len = child_bk->len; + B_TSET(bi.type, B_KEYDATA); + bi.pgno = rp->pgno; + DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data)); + if ((ret = __os_malloc(dbp->env, + child_bk->len, &data.data)) != 0) + return (ret); + memcpy(data.data, child_bk->data, child_bk->len); + data.size = child_bk->len; + break; + case B_OVERFLOW: + /* Copy the overflow key. */ + child_bo = (BOVERFLOW *)child_bk; + memset(&bo, 0, sizeof(bo)); + bo.type = B_OVERFLOW; + bo.tlen = child_bo->tlen; + memset(&hdr, 0, sizeof(hdr)); + if ((ret = __db_goff(dbc, &hdr, child_bo->tlen, + child_bo->pgno, &hdr.data, &hdr.size)) == 0) + ret = __db_poff(dbc, &hdr, &bo.pgno); + + if (hdr.data != NULL) + __os_free(dbp->env, hdr.data); + if (ret != 0) + return (ret); + + bi.len = BOVERFLOW_SIZE; + B_TSET(bi.type, B_OVERFLOW); + bi.pgno = rp->pgno; + DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data)); + DB_SET_DBT(data, &bo, BOVERFLOW_SIZE); + break; + case B_DUPLICATE: + default: + goto pgfmt; + } + break; + default: +pgfmt: return (__db_pgfmt(dbp->env, rp->pgno)); + } + /* + * If the root page was a leaf page, change it into an internal page. + * We copy the key we split on (but not the key's data, in the case of + * a leaf page) to the new root page. + */ + root_pgno = BAM_ROOT_PGNO(dbc); + P_INIT(rootp, dbp->pgsize, + root_pgno, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IBTREE); + + /* + * The btree comparison code guarantees that the left-most key on any + * internal btree page is never used, so it doesn't need to be filled + * in. Set the record count if necessary. + */ + memset(&bi0, 0, sizeof(bi0)); + B_TSET(bi0.type, B_KEYDATA); + bi0.pgno = lp->pgno; + if (F_ISSET(cp, C_RECNUM)) { + bi0.nrecs = __bam_total(dbp, lp); + RE_NREC_SET(rootp, bi0.nrecs); + bi.nrecs = __bam_total(dbp, rp); + RE_NREC_ADJ(rootp, bi.nrecs); + } + DB_SET_DBT(hdr0, &bi0, SSZA(BINTERNAL, data)); + if ((ret = __db_pitem_nolog(dbc, rootp, + 0, BINTERNAL_SIZE(0), &hdr0, NULL)) != 0) + goto err; + ret = __db_pitem_nolog(dbc, rootp, 1, + BINTERNAL_SIZE(data.size), &hdr, &data); + +err: if (data.data != NULL && child_bo == NULL) + __os_free(dbp->env, data.data); + return (ret); +} + +/* + * __ram_root -- + * Fix up the recno root page after it has been split. + * PUBLIC: int __ram_root __P((DBC *, PAGE *, PAGE *, PAGE *)); + */ +int +__ram_root(dbc, rootp, lp, rp) + DBC *dbc; + PAGE *rootp, *lp, *rp; +{ + DB *dbp; + DBT hdr; + RINTERNAL ri; + db_pgno_t root_pgno; + int ret; + + dbp = dbc->dbp; + root_pgno = BAM_ROOT_PGNO(dbc); + + /* Initialize the page. */ + P_INIT(rootp, dbp->pgsize, + root_pgno, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IRECNO); + + /* Initialize the header. */ + DB_SET_DBT(hdr, &ri, RINTERNAL_SIZE); + + /* Insert the left and right keys, set the header information. */ + ri.pgno = lp->pgno; + ri.nrecs = __bam_total(dbp, lp); + if ((ret = __db_pitem_nolog(dbc, + rootp, 0, RINTERNAL_SIZE, &hdr, NULL)) != 0) + return (ret); + RE_NREC_SET(rootp, ri.nrecs); + ri.pgno = rp->pgno; + ri.nrecs = __bam_total(dbp, rp); + if ((ret = __db_pitem_nolog(dbc, + rootp, 1, RINTERNAL_SIZE, &hdr, NULL)) != 0) + return (ret); + RE_NREC_ADJ(rootp, ri.nrecs); + return (0); +} + +/* + * __bam_pinsert -- + * Insert a new key into a parent page, completing the split. + * + * PUBLIC: int __bam_pinsert + * PUBLIC: __P((DBC *, EPG *, u_int32_t, PAGE *, PAGE *, int)); + */ +int +__bam_pinsert(dbc, parent, split, lchild, rchild, flags) + DBC *dbc; + EPG *parent; + u_int32_t split; + PAGE *lchild, *rchild; + int flags; +{ + BINTERNAL bi, *child_bi; + BKEYDATA *child_bk, *tmp_bk; + BOVERFLOW bo, *child_bo; + BTREE *t; + BTREE_CURSOR *cp; + DB *dbp; + DBT a, b, hdr, data; + EPG *child; + PAGE *ppage; + RINTERNAL ri; + db_indx_t off; + db_recno_t nrecs; + size_t (*func) __P((DB *, const DBT *, const DBT *)); + int (*pitem) __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); + u_int32_t n, nbytes, nksize, oldsize, size; + int ret; + + dbp = dbc->dbp; + cp = (BTREE_CURSOR *)dbc->internal; + t = dbp->bt_internal; + ppage = parent->page; + child = parent + 1; + + /* If handling record numbers, count records split to the right page. */ + nrecs = F_ISSET(cp, C_RECNUM) && + !LF_ISSET(BPI_SPACEONLY) ? __bam_total(dbp, rchild) : 0; + + /* + * Now we insert the new page's first key into the parent page, which + * completes the split. The parent points to a PAGE and a page index + * offset, where the new key goes ONE AFTER the index, because we split + * to the right. + * + * XXX + * Some btree algorithms replace the key for the old page as well as + * the new page. We don't, as there's no reason to believe that the + * first key on the old page is any better than the key we have, and, + * in the case of a key being placed at index 0 causing the split, the + * key is unavailable. + */ + off = parent->indx + O_INDX; + if (LF_ISSET(BPI_REPLACE)) + oldsize = TYPE(ppage) == P_IRECNO ? RINTERNAL_PSIZE : + BINTERNAL_PSIZE(GET_BINTERNAL(dbp, ppage, off)->len); + else + oldsize = 0; + + /* + * Calculate the space needed on the parent page. + * + * Prefix trees: space hack used when inserting into BINTERNAL pages. + * Retain only what's needed to distinguish between the new entry and + * the LAST entry on the page to its left. If the keys compare equal, + * retain the entire key. We ignore overflow keys, and the entire key + * must be retained for the next-to-leftmost key on the leftmost page + * of each level, or the search will fail. Applicable ONLY to internal + * pages that have leaf pages as children. Further reduction of the + * key between pairs of internal pages loses too much information. + */ + switch (TYPE(child->page)) { + case P_IBTREE: + child_bi = GET_BINTERNAL(dbp, child->page, split); + nbytes = BINTERNAL_PSIZE(child_bi->len); + + if (P_FREESPACE(dbp, ppage) + oldsize < nbytes) + return (DB_NEEDSPLIT); + if (LF_ISSET(BPI_SPACEONLY)) + return (0); + + switch (B_TYPE(child_bi->type)) { + case B_KEYDATA: + /* Add a new record for the right page. */ + memset(&bi, 0, sizeof(bi)); + bi.len = child_bi->len; + B_TSET(bi.type, B_KEYDATA); + bi.pgno = rchild->pgno; + bi.nrecs = nrecs; + DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data)); + DB_SET_DBT(data, child_bi->data, child_bi->len); + size = BINTERNAL_SIZE(child_bi->len); + break; + case B_OVERFLOW: + /* Reuse the overflow key. */ + child_bo = (BOVERFLOW *)child_bi->data; + memset(&bo, 0, sizeof(bo)); + bo.type = B_OVERFLOW; + bo.tlen = child_bo->tlen; + bo.pgno = child_bo->pgno; + bi.len = BOVERFLOW_SIZE; + B_TSET(bi.type, B_OVERFLOW); + bi.pgno = rchild->pgno; + bi.nrecs = nrecs; + DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data)); + DB_SET_DBT(data, &bo, BOVERFLOW_SIZE); + size = BINTERNAL_SIZE(BOVERFLOW_SIZE); + break; + case B_DUPLICATE: + default: + goto pgfmt; + } + break; + case P_LDUP: + case P_LBTREE: + child_bk = GET_BKEYDATA(dbp, child->page, split); + switch (B_TYPE(child_bk->type)) { + case B_KEYDATA: + nbytes = BINTERNAL_PSIZE(child_bk->len); + nksize = child_bk->len; + + /* + * Prefix compression: + * We set t->bt_prefix to NULL if we have a comparison + * callback but no prefix compression callback. But, + * if we're splitting in an off-page duplicates tree, + * we still have to do some checking. If using the + * default off-page duplicates comparison routine we + * can use the default prefix compression callback. If + * not using the default off-page duplicates comparison + * routine, we can't do any kind of prefix compression + * as there's no way for an application to specify a + * prefix compression callback that corresponds to its + * comparison callback. + * + * No prefix compression if we don't have a compression + * function, or the key we'd compress isn't a normal + * key (for example, it references an overflow page). + * + * Generate a parent page key for the right child page + * from a comparison of the last key on the left child + * page and the first key on the right child page. + */ + if (F_ISSET(dbc, DBC_OPD)) { + if (dbp->dup_compare == __bam_defcmp) + func = __bam_defpfx; + else + func = NULL; + } else + func = t->bt_prefix; + if (func == NULL) + goto noprefix; + tmp_bk = GET_BKEYDATA(dbp, lchild, NUM_ENT(lchild) - + (TYPE(lchild) == P_LDUP ? O_INDX : P_INDX)); + if (B_TYPE(tmp_bk->type) != B_KEYDATA) + goto noprefix; + DB_INIT_DBT(a, tmp_bk->data, tmp_bk->len); + DB_INIT_DBT(b, child_bk->data, child_bk->len); + nksize = (u_int32_t)func(dbp, &a, &b); + if ((n = BINTERNAL_PSIZE(nksize)) < nbytes) + nbytes = n; + else + nksize = child_bk->len; + +noprefix: if (P_FREESPACE(dbp, ppage) + oldsize < nbytes) + return (DB_NEEDSPLIT); + if (LF_ISSET(BPI_SPACEONLY)) + return (0); + + memset(&bi, 0, sizeof(bi)); + bi.len = nksize; + B_TSET(bi.type, B_KEYDATA); + bi.pgno = rchild->pgno; + bi.nrecs = nrecs; + DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data)); + DB_SET_DBT(data, child_bk->data, nksize); + size = BINTERNAL_SIZE(nksize); + break; + case B_OVERFLOW: + nbytes = BINTERNAL_PSIZE(BOVERFLOW_SIZE); + + if (P_FREESPACE(dbp, ppage) + oldsize < nbytes) + return (DB_NEEDSPLIT); + if (LF_ISSET(BPI_SPACEONLY)) + return (0); + + /* Copy the overflow key. */ + child_bo = (BOVERFLOW *)child_bk; + memset(&bo, 0, sizeof(bo)); + bo.type = B_OVERFLOW; + bo.tlen = child_bo->tlen; + memset(&hdr, 0, sizeof(hdr)); + if ((ret = __db_goff(dbc, &hdr, child_bo->tlen, + child_bo->pgno, &hdr.data, &hdr.size)) == 0) + ret = __db_poff(dbc, &hdr, &bo.pgno); + + if (hdr.data != NULL) + __os_free(dbp->env, hdr.data); + if (ret != 0) + return (ret); + + memset(&bi, 0, sizeof(bi)); + bi.len = BOVERFLOW_SIZE; + B_TSET(bi.type, B_OVERFLOW); + bi.pgno = rchild->pgno; + bi.nrecs = nrecs; + DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data)); + DB_SET_DBT(data, &bo, BOVERFLOW_SIZE); + size = BINTERNAL_SIZE(BOVERFLOW_SIZE); + + break; + case B_DUPLICATE: + default: + goto pgfmt; + } + break; + case P_IRECNO: + case P_LRECNO: + nbytes = RINTERNAL_PSIZE; + + if (P_FREESPACE(dbp, ppage) + oldsize < nbytes) + return (DB_NEEDSPLIT); + if (LF_ISSET(BPI_SPACEONLY)) + return (0); + + /* Add a new record for the right page. */ + DB_SET_DBT(hdr, &ri, RINTERNAL_SIZE); + ri.pgno = rchild->pgno; + ri.nrecs = nrecs; + size = RINTERNAL_SIZE; + data.size = 0; + /* + * For now, we are locking internal recno nodes so + * use two steps. + */ + if (LF_ISSET(BPI_REPLACE)) { + if ((ret = __bam_ditem(dbc, ppage, off)) != 0) + return (ret); + LF_CLR(BPI_REPLACE); + } + break; + default: +pgfmt: return (__db_pgfmt(dbp->env, PGNO(child->page))); + } + + if (LF_ISSET(BPI_REPLACE)) { + DB_ASSERT(dbp->env, !LF_ISSET(BPI_NOLOGGING)); + if ((ret = __bam_irep(dbc, ppage, off, &hdr, &data)) != 0) + return (ret); + } else { + if (LF_ISSET(BPI_NOLOGGING)) + pitem = __db_pitem_nolog; + else + pitem = __db_pitem; + + if ((ret = pitem(dbc, ppage, + off, size, &hdr, data.size != 0 ? &data : NULL)) != 0) + return (ret); + } + + /* + * If a Recno or Btree with record numbers AM page, or an off-page + * duplicates tree, adjust the parent page's left page record count. + */ + if (F_ISSET(cp, C_RECNUM) && !LF_ISSET(BPI_NORECNUM)) { + /* Log the change. */ + if (DBC_LOGGING(dbc)) { + if ((ret = __bam_cadjust_log(dbp, dbc->txn, + &LSN(ppage), 0, PGNO(ppage), &LSN(ppage), + parent->indx, -(int32_t)nrecs, 0)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(ppage)); + + /* Update the left page count. */ + if (dbc->dbtype == DB_RECNO) + GET_RINTERNAL(dbp, ppage, parent->indx)->nrecs -= nrecs; + else + GET_BINTERNAL(dbp, ppage, parent->indx)->nrecs -= nrecs; + } + + return (0); +} + +/* + * __bam_psplit -- + * Do the real work of splitting the page. + */ +static int +__bam_psplit(dbc, cp, lp, rp, splitret) + DBC *dbc; + EPG *cp; + PAGE *lp, *rp; + db_indx_t *splitret; +{ + DB *dbp; + PAGE *pp; + db_indx_t half, *inp, nbytes, off, splitp, top; + int adjust, cnt, iflag, isbigkey, ret; + + dbp = dbc->dbp; + pp = cp->page; + inp = P_INP(dbp, pp); + adjust = TYPE(pp) == P_LBTREE ? P_INDX : O_INDX; + + /* + * If we're splitting the first (last) page on a level because we're + * inserting (appending) a key to it, it's likely that the data is + * sorted. Moving a single item to the new page is less work and can + * push the fill factor higher than normal. This is trivial when we + * are splitting a new page before the beginning of the tree, all of + * the interesting tests are against values of 0. + * + * Catching appends to the tree is harder. In a simple append, we're + * inserting an item that sorts past the end of the tree; the cursor + * will point past the last element on the page. But, in trees with + * duplicates, the cursor may point to the last entry on the page -- + * in this case, the entry will also be the last element of a duplicate + * set (the last because the search call specified the SR_DUPLAST flag). + * The only way to differentiate between an insert immediately before + * the last item in a tree or an append after a duplicate set which is + * also the last item in the tree is to call the comparison function. + * When splitting internal pages during an append, the search code + * guarantees the cursor always points to the largest page item less + * than the new internal entry. To summarize, we want to catch three + * possible index values: + * + * NUM_ENT(page) Btree/Recno leaf insert past end-of-tree + * NUM_ENT(page) - O_INDX Btree or Recno internal insert past EOT + * NUM_ENT(page) - P_INDX Btree leaf insert past EOT after a set + * of duplicates + * + * two of which, (NUM_ENT(page) - O_INDX or P_INDX) might be an insert + * near the end of the tree, and not after the end of the tree at all. + * Do a simple test which might be wrong because calling the comparison + * functions is expensive. Regardless, it's not a big deal if we're + * wrong, we'll do the split the right way next time. + */ + off = 0; + if (NEXT_PGNO(pp) == PGNO_INVALID && cp->indx >= NUM_ENT(pp) - adjust) + off = NUM_ENT(pp) - adjust; + else if (PREV_PGNO(pp) == PGNO_INVALID && cp->indx == 0) + off = adjust; + if (off != 0) + goto sort; + + /* + * Split the data to the left and right pages. Try not to split on + * an overflow key. (Overflow keys on internal pages will slow down + * searches.) Refuse to split in the middle of a set of duplicates. + * + * First, find the optimum place to split. + * + * It's possible to try and split past the last record on the page if + * there's a very large record at the end of the page. Make sure this + * doesn't happen by bounding the check at the next-to-last entry on + * the page. + * + * Note, we try and split half the data present on the page. This is + * because another process may have already split the page and left + * it half empty. We don't try and skip the split -- we don't know + * how much space we're going to need on the page, and we may need up + * to half the page for a big item, so there's no easy test to decide + * if we need to split or not. Besides, if two threads are inserting + * data into the same place in the database, we're probably going to + * need more space soon anyway. + */ + top = NUM_ENT(pp) - adjust; + half = (dbp->pgsize - HOFFSET(pp)) / 2; + for (nbytes = 0, off = 0; off < top && nbytes < half; ++off) + switch (TYPE(pp)) { + case P_IBTREE: + if (B_TYPE( + GET_BINTERNAL(dbp, pp, off)->type) == B_KEYDATA) + nbytes += BINTERNAL_SIZE( + GET_BINTERNAL(dbp, pp, off)->len); + else + nbytes += BINTERNAL_SIZE(BOVERFLOW_SIZE); + break; + case P_LBTREE: + if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) == + B_KEYDATA) + nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp, + pp, off)->len); + else + nbytes += BOVERFLOW_SIZE; + + ++off; + /* FALLTHROUGH */ + case P_LDUP: + case P_LRECNO: + if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) == + B_KEYDATA) + nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp, + pp, off)->len); + else + nbytes += BOVERFLOW_SIZE; + break; + case P_IRECNO: + nbytes += RINTERNAL_SIZE; + break; + default: + return (__db_pgfmt(dbp->env, pp->pgno)); + } +sort: splitp = off; + + /* + * Splitp is either at or just past the optimum split point. If the + * tree type is such that we're going to promote a key to an internal + * page, and our current choice is an overflow key, look for something + * close by that's smaller. + */ + switch (TYPE(pp)) { + case P_IBTREE: + iflag = 1; + isbigkey = + B_TYPE(GET_BINTERNAL(dbp, pp, off)->type) != B_KEYDATA; + break; + case P_LBTREE: + case P_LDUP: + iflag = 0; + isbigkey = B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) != + B_KEYDATA; + break; + default: + iflag = isbigkey = 0; + } + if (isbigkey) + for (cnt = 1; cnt <= 3; ++cnt) { + off = splitp + cnt * adjust; + if (off < (db_indx_t)NUM_ENT(pp) && + ((iflag && B_TYPE( + GET_BINTERNAL(dbp, pp,off)->type) == B_KEYDATA) || + B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) == + B_KEYDATA)) { + splitp = off; + break; + } + if (splitp <= (db_indx_t)(cnt * adjust)) + continue; + off = splitp - cnt * adjust; + if (iflag ? B_TYPE( + GET_BINTERNAL(dbp, pp, off)->type) == B_KEYDATA : + B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) == + B_KEYDATA) { + splitp = off; + break; + } + } + + /* + * We can't split in the middle a set of duplicates. We know that + * no duplicate set can take up more than about 25% of the page, + * because that's the point where we push it off onto a duplicate + * page set. So, this loop can't be unbounded. + */ + if (TYPE(pp) == P_LBTREE && + inp[splitp] == inp[splitp - adjust]) + for (cnt = 1;; ++cnt) { + off = splitp + cnt * adjust; + if (off < NUM_ENT(pp) && + inp[splitp] != inp[off]) { + splitp = off; + break; + } + if (splitp <= (db_indx_t)(cnt * adjust)) + continue; + off = splitp - cnt * adjust; + if (inp[splitp] != inp[off]) { + splitp = off + adjust; + break; + } + } + + /* We're going to split at splitp. */ + if ((ret = __bam_copy(dbp, pp, lp, 0, splitp)) != 0) + return (ret); + if ((ret = __bam_copy(dbp, pp, rp, splitp, NUM_ENT(pp))) != 0) + return (ret); + + *splitret = splitp; + return (0); +} + +/* + * __bam_copy -- + * Copy a set of records from one page to another. + * + * PUBLIC: int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t)); + */ +int +__bam_copy(dbp, pp, cp, nxt, stop) + DB *dbp; + PAGE *pp, *cp; + u_int32_t nxt, stop; +{ + BINTERNAL internal; + db_indx_t *cinp, nbytes, off, *pinp; + + cinp = P_INP(dbp, cp); + pinp = P_INP(dbp, pp); + /* + * Nxt is the offset of the next record to be placed on the target page. + */ + for (off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) { + switch (TYPE(pp)) { + case P_IBTREE: + if (off == 0 && nxt != 0) + nbytes = BINTERNAL_SIZE(0); + else if (B_TYPE( + GET_BINTERNAL(dbp, pp, nxt)->type) == B_KEYDATA) + nbytes = BINTERNAL_SIZE( + GET_BINTERNAL(dbp, pp, nxt)->len); + else + nbytes = BINTERNAL_SIZE(BOVERFLOW_SIZE); + break; + case P_LBTREE: + /* + * If we're on a key and it's a duplicate, just copy + * the offset. + */ + if (off != 0 && (nxt % P_INDX) == 0 && + pinp[nxt] == pinp[nxt - P_INDX]) { + cinp[off] = cinp[off - P_INDX]; + continue; + } + /* FALLTHROUGH */ + case P_LDUP: + case P_LRECNO: + if (B_TYPE(GET_BKEYDATA(dbp, pp, nxt)->type) == + B_KEYDATA) + nbytes = BKEYDATA_SIZE(GET_BKEYDATA(dbp, + pp, nxt)->len); + else + nbytes = BOVERFLOW_SIZE; + break; + case P_IRECNO: + nbytes = RINTERNAL_SIZE; + break; + default: + return (__db_pgfmt(dbp->env, pp->pgno)); + } + cinp[off] = HOFFSET(cp) -= nbytes; + if (off == 0 && nxt != 0 && TYPE(pp) == P_IBTREE) { + internal.len = 0; + UMRW_SET(internal.unused); + internal.type = B_KEYDATA; + internal.pgno = GET_BINTERNAL(dbp, pp, nxt)->pgno; + internal.nrecs = GET_BINTERNAL(dbp, pp, nxt)->nrecs; + memcpy(P_ENTRY(dbp, cp, off), &internal, nbytes); + } + else + memcpy(P_ENTRY(dbp, cp, off), + P_ENTRY(dbp, pp, nxt), nbytes); + } + return (0); +} diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c new file mode 100644 index 00000000..9f1a65ee --- /dev/null +++ b/src/btree/bt_stat.c @@ -0,0 +1,658 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" + +#ifdef HAVE_STATISTICS +/* + * __bam_stat -- + * Gather/print the btree statistics + * + * PUBLIC: int __bam_stat __P((DBC *, void *, u_int32_t)); + */ +int +__bam_stat(dbc, spp, flags) + DBC *dbc; + void *spp; + u_int32_t flags; +{ + BTMETA *meta; + BTREE *t; + DB *dbp; + DB_BTREE_STAT *sp; + DB_LOCK lock, metalock; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *h; + db_pgno_t pgno; + int ret, t_ret, write_meta; + + dbp = dbc->dbp; + env = dbp->env; + + meta = NULL; + t = dbp->bt_internal; + sp = NULL; + LOCK_INIT(metalock); + LOCK_INIT(lock); + mpf = dbp->mpf; + h = NULL; + ret = write_meta = 0; + + /* Allocate and clear the structure. */ + if ((ret = __os_umalloc(env, sizeof(*sp), &sp)) != 0) + goto err; + memset(sp, 0, sizeof(*sp)); + + /* Get the metadata page for the entire database. */ + pgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &metalock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &pgno, + dbc->thread_info, dbc->txn, 0, &meta)) != 0) + goto err; + + if (flags == DB_FAST_STAT) + goto meta_only; + + /* Walk the metadata free list, counting pages. */ + for (sp->bt_free = 0, pgno = meta->dbmeta.free; pgno != PGNO_INVALID;) { + ++sp->bt_free; + + if ((ret = __memp_fget(mpf, &pgno, + dbc->thread_info, dbc->txn, 0, &h)) != 0) + goto err; + + pgno = h->next_pgno; + if ((ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0) + goto err; + h = NULL; + } + + /* Get the root page. */ + BAM_GET_ROOT(dbc, pgno, h, 0, DB_LOCK_READ, lock, ret); + if (ret != 0) + goto err; + DB_ASSERT(env, h != NULL); + + /* Get the levels from the root page. */ + sp->bt_levels = h->level; + + /* Discard the root page. */ + ret = __memp_fput(mpf, dbc->thread_info, h, dbc->priority); + h = NULL; + if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + + /* Walk the tree. */ + if ((ret = __bam_traverse(dbc, + DB_LOCK_READ, PGNO_INVALID, __bam_stat_callback, sp)) != 0) + goto err; + +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbp) && (ret = __bam_compress_count(dbc, + &sp->bt_nkeys, &sp->bt_ndata)) != 0) + goto err; +#endif + + /* + * Get the subdatabase metadata page if it's not the same as the + * one we already have. + */ + write_meta = !F_ISSET(dbp, DB_AM_RDONLY) && + (!MULTIVERSION(dbp) || dbc->txn != NULL); +meta_only: + if (t->bt_meta != PGNO_BASE_MD || write_meta) { + ret = __memp_fput(mpf, dbc->thread_info, meta, dbc->priority); + meta = NULL; + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + + if ((ret = __db_lget(dbc, + 0, t->bt_meta, write_meta ? DB_LOCK_WRITE : DB_LOCK_READ, + 0, &metalock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &t->bt_meta, + dbc->thread_info, dbc->txn, + write_meta ? DB_MPOOL_DIRTY : 0, &meta)) != 0) + goto err; + } + if (flags == DB_FAST_STAT) { + if (dbp->type == DB_RECNO || + (dbp->type == DB_BTREE && F_ISSET(dbp, DB_AM_RECNUM))) { + BAM_GET_ROOT(dbc, pgno, h, 0, DB_LOCK_READ, lock, ret); + if (ret != 0) + goto err; + + sp->bt_nkeys = RE_NREC(h); + } else + sp->bt_nkeys = meta->dbmeta.key_count; + + sp->bt_ndata = dbp->type == DB_RECNO ? + sp->bt_nkeys : meta->dbmeta.record_count; + } + + /* Get metadata page statistics. */ + sp->bt_metaflags = meta->dbmeta.flags; + sp->bt_minkey = meta->minkey; + sp->bt_re_len = meta->re_len; + sp->bt_re_pad = meta->re_pad; + /* + * Don't take the page number from the meta-data page -- that value is + * only maintained in the primary database, we may have been called on + * a subdatabase. (Yes, I read the primary database meta-data page + * earlier in this function, but I'm asking the underlying cache so the + * code for the Hash and Btree methods is the same.) + */ + if ((ret = __memp_get_last_pgno(dbp->mpf, &pgno)) != 0) + goto err; + sp->bt_pagecnt = pgno + 1; + sp->bt_pagesize = meta->dbmeta.pagesize; + sp->bt_magic = meta->dbmeta.magic; + sp->bt_version = meta->dbmeta.version; + + if (write_meta != 0) { + meta->dbmeta.key_count = sp->bt_nkeys; + meta->dbmeta.record_count = sp->bt_ndata; + } + + *(DB_BTREE_STAT **)spp = sp; + +err: /* Discard the second page. */ + if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + if (h != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + /* Discard the metadata page. */ + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + if (meta != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + if (ret != 0 && sp != NULL) { + __os_ufree(env, sp); + *(DB_BTREE_STAT **)spp = NULL; + } + + return (ret); +} + +/* + * __bam_stat_print -- + * Display btree/recno statistics. + * + * PUBLIC: int __bam_stat_print __P((DBC *, u_int32_t)); + */ +int +__bam_stat_print(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + static const FN fn[] = { + { BTM_DUP, "duplicates" }, + { BTM_RECNO, "recno" }, + { BTM_RECNUM, "record-numbers" }, + { BTM_FIXEDLEN, "fixed-length" }, + { BTM_RENUMBER, "renumber" }, + { BTM_SUBDB, "multiple-databases" }, + { BTM_DUPSORT, "sorted duplicates" }, + { BTM_COMPRESS, "compressed" }, + { 0, NULL } + }; + DB *dbp; + DB_BTREE_STAT *sp; + ENV *env; + int lorder, ret; + const char *s; + + dbp = dbc->dbp; + env = dbp->env; +#ifdef HAVE_PARTITION + if (DB_IS_PARTITIONED(dbp)) { + if ((ret = __partition_stat(dbc, &sp, flags)) != 0) + return (ret); + } else +#endif + if ((ret = __bam_stat(dbc, &sp, LF_ISSET(DB_FAST_STAT))) != 0) + return (ret); + + if (LF_ISSET(DB_STAT_ALL)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Default Btree/Recno database information:"); + } + + __db_msg(env, "%lx\tBtree magic number", (u_long)sp->bt_magic); + __db_msg(env, "%lu\tBtree version number", (u_long)sp->bt_version); + + (void)__db_get_lorder(dbp, &lorder); + switch (lorder) { + case 1234: + s = "Little-endian"; + break; + case 4321: + s = "Big-endian"; + break; + default: + s = "Unrecognized byte order"; + break; + } + __db_msg(env, "%s\tByte order", s); + __db_prflags(env, NULL, sp->bt_metaflags, fn, NULL, "\tFlags"); + if (dbp->type == DB_BTREE) + __db_dl(env, "Minimum keys per-page", (u_long)sp->bt_minkey); + if (dbp->type == DB_RECNO) { + __db_dl(env, + "Fixed-length record size", (u_long)sp->bt_re_len); + __db_msg(env, + "%#x\tFixed-length record pad", (u_int)sp->bt_re_pad); + } + __db_dl(env, + "Underlying database page size", (u_long)sp->bt_pagesize); + if (dbp->type == DB_BTREE) + __db_dl(env, "Overflow key/data size", + ((BTREE_CURSOR *)dbc->internal)->ovflsize); + __db_dl(env, "Number of levels in the tree", (u_long)sp->bt_levels); + __db_dl(env, dbp->type == DB_BTREE ? + "Number of unique keys in the tree" : + "Number of records in the tree", (u_long)sp->bt_nkeys); + __db_dl(env, + "Number of data items in the tree", (u_long)sp->bt_ndata); + + __db_dl(env, + "Number of tree internal pages", (u_long)sp->bt_int_pg); + __db_dl_pct(env, + "Number of bytes free in tree internal pages", + (u_long)sp->bt_int_pgfree, + DB_PCT_PG(sp->bt_int_pgfree, sp->bt_int_pg, sp->bt_pagesize), "ff"); + + __db_dl(env, + "Number of tree leaf pages", (u_long)sp->bt_leaf_pg); + __db_dl_pct(env, "Number of bytes free in tree leaf pages", + (u_long)sp->bt_leaf_pgfree, DB_PCT_PG( + sp->bt_leaf_pgfree, sp->bt_leaf_pg, sp->bt_pagesize), "ff"); + + __db_dl(env, + "Number of tree duplicate pages", (u_long)sp->bt_dup_pg); + __db_dl_pct(env, + "Number of bytes free in tree duplicate pages", + (u_long)sp->bt_dup_pgfree, + DB_PCT_PG(sp->bt_dup_pgfree, sp->bt_dup_pg, sp->bt_pagesize), "ff"); + + __db_dl(env, + "Number of tree overflow pages", (u_long)sp->bt_over_pg); + __db_dl_pct(env, "Number of bytes free in tree overflow pages", + (u_long)sp->bt_over_pgfree, DB_PCT_PG( + sp->bt_over_pgfree, sp->bt_over_pg, sp->bt_pagesize), "ff"); + __db_dl(env, "Number of empty pages", (u_long)sp->bt_empty_pg); + + __db_dl(env, "Number of pages on the free list", (u_long)sp->bt_free); + + __os_ufree(env, sp); + + return (0); +} + +/* + * __bam_stat_callback -- + * Statistics callback. + * + * PUBLIC: int __bam_stat_callback __P((DBC *, PAGE *, void *, int *)); + */ +int +__bam_stat_callback(dbc, h, cookie, putp) + DBC *dbc; + PAGE *h; + void *cookie; + int *putp; +{ + DB *dbp; + DB_BTREE_STAT *sp; + db_indx_t indx, *inp, top; + u_int8_t type; + + dbp = dbc->dbp; + sp = cookie; + *putp = 0; + top = NUM_ENT(h); + inp = P_INP(dbp, h); + + switch (TYPE(h)) { + case P_IBTREE: + case P_IRECNO: + ++sp->bt_int_pg; + sp->bt_int_pgfree += P_FREESPACE(dbp, h); + break; + case P_LBTREE: + if (top == 0) + ++sp->bt_empty_pg; + + /* Correct for on-page duplicates and deleted items. */ + for (indx = 0; indx < top; indx += P_INDX) { + type = GET_BKEYDATA(dbp, h, indx + O_INDX)->type; + /* Ignore deleted items. */ + if (B_DISSET(type)) + continue; + + /* Ignore duplicate keys. */ + if (indx + P_INDX >= top || + inp[indx] != inp[indx + P_INDX]) + ++sp->bt_nkeys; + + /* Ignore off-page duplicates. */ + if (B_TYPE(type) != B_DUPLICATE) + ++sp->bt_ndata; + } + + ++sp->bt_leaf_pg; + sp->bt_leaf_pgfree += P_FREESPACE(dbp, h); + break; + case P_LRECNO: + if (top == 0) + ++sp->bt_empty_pg; + + /* + * If walking a recno tree, then each of these items is a key. + * Otherwise, we're walking an off-page duplicate set. + */ + if (dbp->type == DB_RECNO) { + /* + * Correct for deleted items in non-renumbering Recno + * databases. + */ + if (F_ISSET(dbp, DB_AM_RENUMBER)) { + sp->bt_nkeys += top; + sp->bt_ndata += top; + } else + for (indx = 0; indx < top; indx += O_INDX) { + type = GET_BKEYDATA(dbp, h, indx)->type; + if (!B_DISSET(type)) { + ++sp->bt_ndata; + ++sp->bt_nkeys; + } + } + + ++sp->bt_leaf_pg; + sp->bt_leaf_pgfree += P_FREESPACE(dbp, h); + } else { + sp->bt_ndata += top; + + ++sp->bt_dup_pg; + sp->bt_dup_pgfree += P_FREESPACE(dbp, h); + } + break; + case P_LDUP: + if (top == 0) + ++sp->bt_empty_pg; + + /* Correct for deleted items. */ + for (indx = 0; indx < top; indx += O_INDX) + if (!B_DISSET(GET_BKEYDATA(dbp, h, indx)->type)) + ++sp->bt_ndata; + + ++sp->bt_dup_pg; + sp->bt_dup_pgfree += P_FREESPACE(dbp, h); + break; + case P_OVERFLOW: + ++sp->bt_over_pg; + sp->bt_over_pgfree += P_OVFLSPACE(dbp, dbp->pgsize, h); + break; + default: + return (__db_pgfmt(dbp->env, h->pgno)); + } + return (0); +} + +/* + * __bam_print_cursor -- + * Display the current internal cursor. + * + * PUBLIC: void __bam_print_cursor __P((DBC *)); + */ +void +__bam_print_cursor(dbc) + DBC *dbc; +{ + static const FN fn[] = { + { C_DELETED, "C_DELETED" }, + { C_RECNUM, "C_RECNUM" }, + { C_RENUMBER, "C_RENUMBER" }, + { 0, NULL } + }; + ENV *env; + BTREE_CURSOR *cp; + + env = dbc->env; + cp = (BTREE_CURSOR *)dbc->internal; + + STAT_ULONG("Overflow size", cp->ovflsize); + if (dbc->dbtype == DB_RECNO) + STAT_ULONG("Recno", cp->recno); + STAT_ULONG("Order", cp->order); + __db_prflags(env, NULL, cp->flags, fn, NULL, "\tInternal Flags"); +} + +#else /* !HAVE_STATISTICS */ + +int +__bam_stat(dbc, spp, flags) + DBC *dbc; + void *spp; + u_int32_t flags; +{ + COMPQUIET(spp, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbc->env)); +} + +int +__bam_stat_print(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbc->env)); +} +#endif + +/* + * __bam_key_range -- + * Return proportion of keys relative to given key. The numbers are + * slightly skewed due to on page duplicates. + * + * PUBLIC: int __bam_key_range __P((DBC *, DBT *, DB_KEY_RANGE *, u_int32_t)); + */ +int +__bam_key_range(dbc, dbt, kp, flags) + DBC *dbc; + DBT *dbt; + DB_KEY_RANGE *kp; + u_int32_t flags; +{ + BTREE_CURSOR *cp; + EPG *sp; + double factor; + int exact, ret; + + COMPQUIET(flags, 0); + + if ((ret = __bam_search(dbc, PGNO_INVALID, + dbt, SR_STK_ONLY, 1, NULL, &exact)) != 0) + return (ret); + + cp = (BTREE_CURSOR *)dbc->internal; + kp->less = kp->greater = 0.0; + + factor = 1.0; + + /* Correct the leaf page. */ + cp->csp->entries /= 2; + cp->csp->indx /= 2; + for (sp = cp->sp; sp <= cp->csp; ++sp) { + /* + * At each level we know that pages greater than indx contain + * keys greater than what we are looking for and those less + * than indx are less than. The one pointed to by indx may + * have some less, some greater or even equal. If indx is + * equal to the number of entries, then the key is out of range + * and everything is less. + */ + if (sp->indx == 0) + kp->greater += factor * (sp->entries - 1)/sp->entries; + else if (sp->indx == sp->entries) + kp->less += factor; + else { + kp->less += factor * sp->indx / sp->entries; + kp->greater += factor * + ((sp->entries - sp->indx) - 1) / sp->entries; + } + factor *= 1.0/sp->entries; + } + + /* + * If there was an exact match then assign 1 n'th to the key itself. + * Otherwise that factor belongs to those greater than the key, unless + * the key was out of range. + */ + if (exact) + kp->equal = factor; + else { + if (kp->less != 1) + kp->greater += factor; + kp->equal = 0; + } + + if ((ret = __bam_stkrel(dbc, 0)) != 0) + return (ret); + + return (0); +} + +/* + * __bam_traverse -- + * Walk a Btree database. + * + * PUBLIC: int __bam_traverse __P((DBC *, db_lockmode_t, + * PUBLIC: db_pgno_t, int (*)(DBC *, PAGE *, void *, int *), void *)); + */ +int +__bam_traverse(dbc, mode, root_pgno, callback, cookie) + DBC *dbc; + db_lockmode_t mode; + db_pgno_t root_pgno; + int (*callback)__P((DBC *, PAGE *, void *, int *)); + void *cookie; +{ + BINTERNAL *bi; + BKEYDATA *bk; + DB *dbp; + DB_LOCK lock; + DB_MPOOLFILE *mpf; + PAGE *h; + RINTERNAL *ri; + db_indx_t indx, *inp; + int already_put, ret, t_ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + already_put = 0; + LOCK_INIT(lock); + + COMPQUIET(h, NULL); + BAM_GET_ROOT(dbc, root_pgno, h, 0, mode, lock, ret); + if (ret != 0) + goto err1; + + switch (TYPE(h)) { + case P_IBTREE: + for (indx = 0; indx < NUM_ENT(h); indx += O_INDX) { + bi = GET_BINTERNAL(dbp, h, indx); + if (B_TYPE(bi->type) == B_OVERFLOW && + (ret = __db_traverse_big(dbc, + ((BOVERFLOW *)bi->data)->pgno, + callback, cookie)) != 0) + goto err; + if ((ret = __bam_traverse( + dbc, mode, bi->pgno, callback, cookie)) != 0) + goto err; + } + break; + case P_IRECNO: + for (indx = 0; indx < NUM_ENT(h); indx += O_INDX) { + ri = GET_RINTERNAL(dbp, h, indx); + if ((ret = __bam_traverse( + dbc, mode, ri->pgno, callback, cookie)) != 0) + goto err; + } + break; + case P_LBTREE: + inp = P_INP(dbp, h); + for (indx = 0; indx < NUM_ENT(h); indx += P_INDX) { + bk = GET_BKEYDATA(dbp, h, indx); + if (B_TYPE(bk->type) == B_OVERFLOW && + (indx + P_INDX >= NUM_ENT(h) || + inp[indx] != inp[indx + P_INDX])) { + if ((ret = __db_traverse_big(dbc, + GET_BOVERFLOW(dbp, h, indx)->pgno, + callback, cookie)) != 0) + goto err; + } + bk = GET_BKEYDATA(dbp, h, indx + O_INDX); + if (B_TYPE(bk->type) == B_DUPLICATE && + (ret = __bam_traverse(dbc, mode, + GET_BOVERFLOW(dbp, h, indx + O_INDX)->pgno, + callback, cookie)) != 0) + goto err; + if (B_TYPE(bk->type) == B_OVERFLOW && + (ret = __db_traverse_big(dbc, + GET_BOVERFLOW(dbp, h, indx + O_INDX)->pgno, + callback, cookie)) != 0) + goto err; + } + break; + case P_LDUP: + case P_LRECNO: + for (indx = 0; indx < NUM_ENT(h); indx += O_INDX) { + bk = GET_BKEYDATA(dbp, h, indx); + if (B_TYPE(bk->type) == B_OVERFLOW && + (ret = __db_traverse_big(dbc, + GET_BOVERFLOW(dbp, h, indx)->pgno, + callback, cookie)) != 0) + goto err; + } + break; + default: + return (__db_pgfmt(dbp->env, h->pgno)); + } + + ret = callback(dbc, h, cookie, &already_put); + +err: if (!already_put && (t_ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0 && ret == 0) + ret = t_ret; +err1: if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} diff --git a/src/btree/bt_upgrade.c b/src/btree/bt_upgrade.c new file mode 100644 index 00000000..e0c5a407 --- /dev/null +++ b/src/btree/bt_upgrade.c @@ -0,0 +1,153 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_upgrade.h" +#include "dbinc/btree.h" + +/* + * __bam_30_btreemeta -- + * Upgrade the metadata pages from version 6 to version 7. + * + * PUBLIC: int __bam_30_btreemeta __P((DB *, char *, u_int8_t *)); + */ +int +__bam_30_btreemeta(dbp, real_name, buf) + DB *dbp; + char *real_name; + u_int8_t *buf; +{ + BTMETA2X *oldmeta; + BTMETA30 *newmeta; + ENV *env; + int ret; + + env = dbp->env; + + newmeta = (BTMETA30 *)buf; + oldmeta = (BTMETA2X *)buf; + + /* + * Move things from the end up, so we do not overwrite things. + * We are going to create a new uid, so we can move the stuff + * at the end of the structure first, overwriting the uid. + */ + + newmeta->re_pad = oldmeta->re_pad; + newmeta->re_len = oldmeta->re_len; + newmeta->minkey = oldmeta->minkey; + newmeta->maxkey = oldmeta->maxkey; + newmeta->dbmeta.free = oldmeta->free; + newmeta->dbmeta.flags = oldmeta->flags; + newmeta->dbmeta.type = P_BTREEMETA; + + newmeta->dbmeta.version = 7; + /* Replace the unique ID. */ + if ((ret = __os_fileid(env, real_name, 1, buf + 36)) != 0) + return (ret); + + newmeta->root = 1; + + return (0); +} + +/* + * __bam_31_btreemeta -- + * Upgrade the database from version 7 to version 8. + * + * PUBLIC: int __bam_31_btreemeta + * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); + */ +int +__bam_31_btreemeta(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + BTMETA30 *oldmeta; + BTMETA31 *newmeta; + + COMPQUIET(dbp, NULL); + COMPQUIET(real_name, NULL); + COMPQUIET(fhp, NULL); + + newmeta = (BTMETA31 *)h; + oldmeta = (BTMETA30 *)h; + + /* + * Copy the effected fields down the page. + * The fields may overlap each other so we + * start at the bottom and use memmove. + */ + newmeta->root = oldmeta->root; + newmeta->re_pad = oldmeta->re_pad; + newmeta->re_len = oldmeta->re_len; + newmeta->minkey = oldmeta->minkey; + newmeta->maxkey = oldmeta->maxkey; + memmove(newmeta->dbmeta.uid, + oldmeta->dbmeta.uid, sizeof(oldmeta->dbmeta.uid)); + newmeta->dbmeta.flags = oldmeta->dbmeta.flags; + newmeta->dbmeta.record_count = 0; + newmeta->dbmeta.key_count = 0; + ZERO_LSN(newmeta->dbmeta.unused3); + + /* Set the version number. */ + newmeta->dbmeta.version = 8; + + /* Upgrade the flags. */ + if (LF_ISSET(DB_DUPSORT)) + F_SET(&newmeta->dbmeta, BTM_DUPSORT); + + *dirtyp = 1; + return (0); +} + +/* + * __bam_31_lbtree -- + * Upgrade the database btree leaf pages. + * + * PUBLIC: int __bam_31_lbtree + * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); + */ +int +__bam_31_lbtree(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + BKEYDATA *bk; + db_pgno_t pgno; + db_indx_t indx; + int ret; + + ret = 0; + for (indx = O_INDX; indx < NUM_ENT(h); indx += P_INDX) { + bk = GET_BKEYDATA(dbp, h, indx); + if (B_TYPE(bk->type) == B_DUPLICATE) { + pgno = GET_BOVERFLOW(dbp, h, indx)->pgno; + if ((ret = __db_31_offdup(dbp, real_name, fhp, + LF_ISSET(DB_DUPSORT) ? 1 : 0, &pgno)) != 0) + break; + if (pgno != GET_BOVERFLOW(dbp, h, indx)->pgno) { + *dirtyp = 1; + GET_BOVERFLOW(dbp, h, indx)->pgno = pgno; + } + } + } + + return (ret); +} diff --git a/src/btree/bt_verify.c b/src/btree/bt_verify.c new file mode 100644 index 00000000..b1cc9a36 --- /dev/null +++ b/src/btree/bt_verify.c @@ -0,0 +1,2772 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_verify.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +static int __bam_safe_getdata __P((DB *, DB_THREAD_INFO *, + PAGE *, u_int32_t, int, DBT *, int *)); +static int __bam_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, + db_indx_t *, u_int32_t)); +static int __bam_vrfy_treeorder __P((DB *, DB_THREAD_INFO *, PAGE *, + BINTERNAL *, BINTERNAL *, int (*)(DB *, const DBT *, const DBT *), + u_int32_t)); +static int __ram_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, + db_indx_t *, u_int32_t)); + +/* + * __bam_vrfy_meta -- + * Verify the btree-specific part of a metadata page. + * + * PUBLIC: int __bam_vrfy_meta __P((DB *, VRFY_DBINFO *, BTMETA *, + * PUBLIC: db_pgno_t, u_int32_t)); + */ +int +__bam_vrfy_meta(dbp, vdp, meta, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + BTMETA *meta; + db_pgno_t pgno; + u_int32_t flags; +{ + ENV *env; + VRFY_PAGEINFO *pip; + int isbad, t_ret, ret; + db_indx_t ovflsize; + + env = dbp->env; + isbad = 0; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + /* + * If VRFY_INCOMPLETE is not set, then we didn't come through + * __db_vrfy_pagezero and didn't incompletely + * check this page--we haven't checked it at all. + * Thus we need to call __db_vrfy_meta and check the common fields. + * + * If VRFY_INCOMPLETE is set, we've already done all the same work + * in __db_vrfy_pagezero, so skip the check. + */ + if (!F_ISSET(pip, VRFY_INCOMPLETE) && + (ret = __db_vrfy_meta(dbp, vdp, &meta->dbmeta, pgno, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + + /* bt_minkey: must be >= 2; must produce sensible ovflsize */ + + /* avoid division by zero */ + ovflsize = meta->minkey > 0 ? + B_MINKEY_TO_OVFLSIZE(dbp, meta->minkey, dbp->pgsize) : 0; + + if (meta->minkey < 2 || + ovflsize > B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) { + pip->bt_minkey = 0; + isbad = 1; + EPRINT((env, DB_STR_A("1034", + "Page %lu: nonsensical bt_minkey value %lu on metadata page", + "%lu %lu"), (u_long)pgno, (u_long)meta->minkey)); + } else + pip->bt_minkey = meta->minkey; + + /* re_len: no constraints on this (may be zero or huge--we make rope) */ + pip->re_pad = meta->re_pad; + pip->re_len = meta->re_len; + + /* + * The root must not be current page or 0 and it must be within + * database. If this metadata page is the master meta data page + * of the file, then the root page had better be page 1. + */ + pip->root = 0; + if (meta->root == PGNO_INVALID || + meta->root == pgno || !IS_VALID_PGNO(meta->root) || + (pgno == PGNO_BASE_MD && meta->root != 1)) { + isbad = 1; + EPRINT((env, DB_STR_A("1035", + "Page %lu: nonsensical root page %lu on metadata page", + "%lu %lu"), (u_long)pgno, (u_long)meta->root)); + } else + pip->root = meta->root; + + /* Flags. */ + if (F_ISSET(&meta->dbmeta, BTM_RENUMBER)) + F_SET(pip, VRFY_IS_RRECNO); + + if (F_ISSET(&meta->dbmeta, BTM_SUBDB)) { + /* + * If this is a master db meta page, it had better not have + * duplicates. + */ + if (F_ISSET(&meta->dbmeta, BTM_DUP) && pgno == PGNO_BASE_MD) { + isbad = 1; + EPRINT((env, DB_STR_A("1036", +"Page %lu: Btree metadata page has both duplicates and multiple databases", + "%lu"), (u_long)pgno)); + } + F_SET(pip, VRFY_HAS_SUBDBS); + } + + if (F_ISSET(&meta->dbmeta, BTM_DUP)) + F_SET(pip, VRFY_HAS_DUPS); + if (F_ISSET(&meta->dbmeta, BTM_DUPSORT)) + F_SET(pip, VRFY_HAS_DUPSORT); + if (F_ISSET(&meta->dbmeta, BTM_RECNUM)) + F_SET(pip, VRFY_HAS_RECNUMS); + if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_DUPS)) { + EPRINT((env, DB_STR_A("1037", + "Page %lu: Btree metadata page illegally has both recnums and dups", + "%lu"), (u_long)pgno)); + isbad = 1; + } + + if (F_ISSET(&meta->dbmeta, BTM_RECNO)) { + F_SET(pip, VRFY_IS_RECNO); + dbp->type = DB_RECNO; + } else if (F_ISSET(pip, VRFY_IS_RRECNO)) { + isbad = 1; + EPRINT((env, DB_STR_A("1038", + "Page %lu: metadata page has renumber flag set but is not recno", + "%lu"), (u_long)pgno)); + } + +#ifdef HAVE_COMPRESSION + if (F_ISSET(&meta->dbmeta, BTM_COMPRESS)) { + F_SET(pip, VRFY_HAS_COMPRESS); + if (!DB_IS_COMPRESSED(dbp)) { + ((BTREE *)dbp->bt_internal)->bt_compress = + __bam_defcompress; + ((BTREE *)dbp->bt_internal)->bt_decompress = + __bam_defdecompress; + } + /* + * Copy dup_compare to compress_dup_compare, and use the + * compression duplicate compare. + */ + if (F_ISSET(pip, VRFY_HAS_DUPSORT)) { + if (dbp->dup_compare == NULL) + dbp->dup_compare = __bam_defcmp; + if (((BTREE *)dbp->bt_internal)->compress_dup_compare + == NULL) { + ((BTREE *)dbp->bt_internal)-> + compress_dup_compare = dbp->dup_compare; + dbp->dup_compare = __bam_compress_dupcmp; + } + } + } + + if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_COMPRESS)) { + EPRINT((env, DB_STR_A("1039", + "Page %lu: Btree metadata page illegally has both recnums and compression", + "%lu"), (u_long)pgno)); + isbad = 1; + } + if (F_ISSET(pip, VRFY_HAS_DUPS) && !F_ISSET(pip, VRFY_HAS_DUPSORT) && + F_ISSET(pip, VRFY_HAS_COMPRESS)) { + EPRINT((env, DB_STR_A("1040", + "Page %lu: Btree metadata page illegally has both " + "unsorted duplicates and compression", + "%lu"), (u_long)pgno)); + isbad = 1; + } +#endif + + if (F_ISSET(pip, VRFY_IS_RECNO) && F_ISSET(pip, VRFY_HAS_DUPS)) { + EPRINT((env, DB_STR_A("1041", + "Page %lu: recno metadata page specifies duplicates", + "%lu"), (u_long)pgno)); + isbad = 1; + } + + if (F_ISSET(&meta->dbmeta, BTM_FIXEDLEN)) + F_SET(pip, VRFY_IS_FIXEDLEN); + else if (pip->re_len > 0) { + /* + * It's wrong to have an re_len if it's not a fixed-length + * database + */ + isbad = 1; + EPRINT((env, DB_STR_A("1042", + "Page %lu: re_len of %lu in non-fixed-length database", + "%lu %lu"), (u_long)pgno, (u_long)pip->re_len)); + } + + /* + * We do not check that the rest of the page is 0, because it may + * not be and may still be correct. + */ + +err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + if (LF_ISSET(DB_SALVAGE) && + (t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0) + ret = t_ret; + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +/* + * __ram_vrfy_leaf -- + * Verify a recno leaf page. + * + * PUBLIC: int __ram_vrfy_leaf __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, + * PUBLIC: u_int32_t)); + */ +int +__ram_vrfy_leaf(dbp, vdp, h, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *h; + db_pgno_t pgno; + u_int32_t flags; +{ + BKEYDATA *bk; + ENV *env; + VRFY_PAGEINFO *pip; + db_indx_t i; + int ret, t_ret, isbad; + u_int32_t re_len_guess, len; + + env = dbp->env; + isbad = 0; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + if (TYPE(h) != P_LRECNO) { + ret = __db_unknown_path(env, "__ram_vrfy_leaf"); + goto err; + } + + /* + * Verify (and, if relevant, save off) page fields common to + * all PAGEs. + */ + if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + + /* + * Verify inp[]. Return immediately if it returns DB_VERIFY_BAD; + * further checks are dangerous. + */ + if ((ret = __bam_vrfy_inp(dbp, + vdp, h, pgno, &pip->entries, flags)) != 0) + goto err; + + if (F_ISSET(pip, VRFY_HAS_DUPS)) { + EPRINT((env, DB_STR_A("1043", + "Page %lu: Recno database has dups", + "%lu"), (u_long)pgno)); + ret = DB_VERIFY_BAD; + goto err; + } + + /* + * Walk through inp and see if the lengths of all the records are the + * same--if so, this may be a fixed-length database, and we want to + * save off this value. We know inp to be safe if we've gotten this + * far. + */ + re_len_guess = 0; + for (i = 0; i < NUM_ENT(h); i++) { + bk = GET_BKEYDATA(dbp, h, i); + /* KEYEMPTY. Go on. */ + if (B_DISSET(bk->type)) + continue; + if (bk->type == B_OVERFLOW) + len = ((BOVERFLOW *)bk)->tlen; + else if (bk->type == B_KEYDATA) + len = bk->len; + else { + isbad = 1; + EPRINT((env, DB_STR_A("1044", + "Page %lu: nonsensical type for item %lu", + "%lu %lu"), (u_long)pgno, (u_long)i)); + continue; + } + if (re_len_guess == 0) + re_len_guess = len; + + /* + * Is this item's len the same as the last one's? If not, + * reset to 0 and break--we don't have a single re_len. + * Otherwise, go on to the next item. + */ + if (re_len_guess != len) { + re_len_guess = 0; + break; + } + } + pip->re_len = re_len_guess; + + /* Save off record count. */ + pip->rec_cnt = NUM_ENT(h); + +err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +/* + * __bam_vrfy -- + * Verify a btree leaf or internal page. + * + * PUBLIC: int __bam_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, + * PUBLIC: u_int32_t)); + */ +int +__bam_vrfy(dbp, vdp, h, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *h; + db_pgno_t pgno; + u_int32_t flags; +{ + ENV *env; + VRFY_PAGEINFO *pip; + int ret, t_ret, isbad; + + env = dbp->env; + isbad = 0; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + switch (TYPE(h)) { + case P_IBTREE: + case P_IRECNO: + case P_LBTREE: + case P_LDUP: + break; + default: + ret = __db_unknown_path(env, "__bam_vrfy"); + goto err; + } + + /* + * Verify (and, if relevant, save off) page fields common to + * all PAGEs. + */ + if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + + /* + * The record count is, on internal pages, stored in an overloaded + * next_pgno field. Save it off; we'll verify it when we check + * overall database structure. We could overload the field + * in VRFY_PAGEINFO, too, but this seems gross, and space + * is not at such a premium. + */ + pip->rec_cnt = RE_NREC(h); + + /* + * Verify inp[]. + */ + if (TYPE(h) == P_IRECNO) { + if ((ret = __ram_vrfy_inp(dbp, + vdp, h, pgno, &pip->entries, flags)) != 0) + goto err; + } else if ((ret = __bam_vrfy_inp(dbp, + vdp, h, pgno, &pip->entries, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + EPRINT((env, DB_STR_A("1045", + "Page %lu: item order check unsafe: skipping", + "%lu"), (u_long)pgno)); + } else if (!LF_ISSET(DB_NOORDERCHK) && (ret = + __bam_vrfy_itemorder(dbp, + vdp, vdp->thread_info, h, pgno, 0, 0, 0, flags)) != 0) { + /* + * We know that the elements of inp are reasonable. + * + * Check that elements fall in the proper order. + */ + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + +err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +/* + * __ram_vrfy_inp -- + * Verify that all entries in a P_IRECNO inp[] array are reasonable, + * and count them. Note that P_LRECNO uses __bam_vrfy_inp; + * P_IRECNOs are a special, and simpler, case, since they have + * RINTERNALs rather than BKEYDATA/BINTERNALs. + */ +static int +__ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *h; + db_pgno_t pgno; + db_indx_t *nentriesp; + u_int32_t flags; +{ + ENV *env; + RINTERNAL *ri; + VRFY_CHILDINFO child; + VRFY_PAGEINFO *pip; + int ret, t_ret, isbad; + u_int32_t himark, i, offset, nentries; + db_indx_t *inp; + u_int8_t *pagelayout, *p; + + env = dbp->env; + isbad = 0; + memset(&child, 0, sizeof(VRFY_CHILDINFO)); + nentries = 0; + pagelayout = NULL; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + if (TYPE(h) != P_IRECNO) { + ret = __db_unknown_path(env, "__ram_vrfy_inp"); + goto err; + } + + himark = dbp->pgsize; + if ((ret = __os_malloc(env, dbp->pgsize, &pagelayout)) != 0) + goto err; + memset(pagelayout, 0, dbp->pgsize); + inp = P_INP(dbp, h); + for (i = 0; i < NUM_ENT(h); i++) { + if ((u_int8_t *)inp + i >= (u_int8_t *)h + himark) { + EPRINT((env, DB_STR_A("1046", + "Page %lu: entries listing %lu overlaps data", + "%lu %lu"), (u_long)pgno, (u_long)i)); + ret = DB_VERIFY_BAD; + goto err; + } + offset = inp[i]; + /* + * Check that the item offset is reasonable: it points + * somewhere after the inp array and before the end of the + * page. + */ + if (offset <= (u_int32_t)((u_int8_t *)inp + i - + (u_int8_t *)h) || + offset > (u_int32_t)(dbp->pgsize - RINTERNAL_SIZE)) { + isbad = 1; + EPRINT((env, DB_STR_A("1047", + "Page %lu: bad offset %lu at index %lu", + "%lu %lu %lu"), (u_long)pgno, (u_long)offset, + (u_long)i)); + continue; + } + + /* Update the high-water mark (what HOFFSET should be) */ + if (offset < himark) + himark = offset; + + nentries++; + + /* Make sure this RINTERNAL is not multiply referenced. */ + ri = GET_RINTERNAL(dbp, h, i); + if (pagelayout[offset] == 0) { + pagelayout[offset] = 1; + child.pgno = ri->pgno; + child.type = V_RECNO; + child.nrecs = ri->nrecs; + if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0) + goto err; + } else { + EPRINT((env, DB_STR_A("1048", + "Page %lu: RINTERNAL structure at offset %lu referenced twice", + "%lu %lu"), (u_long)pgno, (u_long)offset)); + isbad = 1; + } + } + + for (p = pagelayout + himark; + p < pagelayout + dbp->pgsize; + p += RINTERNAL_SIZE) + if (*p != 1) { + EPRINT((env, DB_STR_A("1049", + "Page %lu: gap between items at offset %lu", + "%lu %lu"), (u_long)pgno, + (u_long)(p - pagelayout))); + isbad = 1; + } + + if ((db_indx_t)himark != HOFFSET(h)) { + EPRINT((env, DB_STR_A("1050", + "Page %lu: bad HOFFSET %lu, appears to be %lu", + "%lu %lu %lu"), (u_long)pgno, (u_long)(HOFFSET(h)), + (u_long)himark)); + isbad = 1; + } + + *nentriesp = nentries; + +err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + if (pagelayout != NULL) + __os_free(env, pagelayout); + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +typedef enum { VRFY_ITEM_NOTSET=0, VRFY_ITEM_BEGIN, VRFY_ITEM_END } VRFY_ITEM; + +/* + * __bam_vrfy_inp -- + * Verify that all entries in inp[] array are reasonable; + * count them. + */ +static int +__bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *h; + db_pgno_t pgno; + db_indx_t *nentriesp; + u_int32_t flags; +{ + BKEYDATA *bk; + BOVERFLOW *bo; + ENV *env; + VRFY_CHILDINFO child; + VRFY_ITEM *pagelayout; + VRFY_PAGEINFO *pip; + u_int32_t himark, offset; /* + * These would be db_indx_ts + * but for alignment. + */ + u_int32_t i, endoff, nentries; + int isbad, initem, isdupitem, ret, t_ret; + + env = dbp->env; + isbad = isdupitem = 0; + nentries = 0; + memset(&child, 0, sizeof(VRFY_CHILDINFO)); + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + switch (TYPE(h)) { + case P_IBTREE: + case P_LBTREE: + case P_LDUP: + case P_LRECNO: + break; + default: + /* + * In the salvager, we might call this from a page which + * we merely suspect is a btree page. Otherwise, it + * shouldn't get called--if it is, that's a verifier bug. + */ + if (LF_ISSET(DB_SALVAGE)) + break; + ret = __db_unknown_path(env, "__bam_vrfy_inp"); + goto err; + } + + /* + * Loop through inp[], the array of items, until we either + * run out of entries or collide with the data. Keep track + * of h_offset in himark. + * + * For each element in inp[i], make sure it references a region + * that starts after the end of the inp array (as defined by + * NUM_ENT(h)), ends before the beginning of the page, doesn't + * overlap any other regions, and doesn't have a gap between + * it and the region immediately after it. + */ + himark = dbp->pgsize; + if ((ret = __os_calloc( + env, dbp->pgsize, sizeof(pagelayout[0]), &pagelayout)) != 0) + goto err; + for (i = 0; i < NUM_ENT(h); i++) { + switch (ret = __db_vrfy_inpitem(dbp, + h, pgno, i, 1, flags, &himark, &offset)) { + case 0: + break; + case DB_VERIFY_BAD: + isbad = 1; + continue; + case DB_VERIFY_FATAL: + isbad = 1; + goto err; + default: + DB_ASSERT(env, ret != 0); + break; + } + + /* + * We now have a plausible beginning for the item, and we know + * its length is safe. + * + * Mark the beginning and end in pagelayout so we can make sure + * items have no overlaps or gaps. + */ + bk = GET_BKEYDATA(dbp, h, i); + if (pagelayout[offset] == VRFY_ITEM_NOTSET) + pagelayout[offset] = VRFY_ITEM_BEGIN; + else if (pagelayout[offset] == VRFY_ITEM_BEGIN) { + /* + * Having two inp entries that point at the same patch + * of page is legal if and only if the page is + * a btree leaf and they're onpage duplicate keys-- + * that is, if (i % P_INDX) == 0. + */ + if ((i % P_INDX == 0) && (TYPE(h) == P_LBTREE)) { + /* Flag for later. */ + F_SET(pip, VRFY_HAS_DUPS); + + /* Bump up nentries so we don't undercount. */ + nentries++; + + /* + * We'll check to make sure the end is + * equal, too. + */ + isdupitem = 1; + } else { + isbad = 1; + EPRINT((env, DB_STR_A("1051", + "Page %lu: duplicated item %lu", + "%lu %lu"), (u_long)pgno, (u_long)i)); + } + } + + /* + * Mark the end. Its location varies with the page type + * and the item type. + * + * If the end already has a sign other than 0, do nothing-- + * it's an overlap that we'll catch later. + */ + switch (B_TYPE(bk->type)) { + case B_KEYDATA: + if (TYPE(h) == P_IBTREE) + /* It's a BINTERNAL. */ + endoff = offset + BINTERNAL_SIZE(bk->len) - 1; + else + endoff = offset + BKEYDATA_SIZE(bk->len) - 1; + break; + case B_DUPLICATE: + /* + * Flag that we have dups; we'll check whether + * that's okay during the structure check. + */ + F_SET(pip, VRFY_HAS_DUPS); + /* FALLTHROUGH */ + case B_OVERFLOW: + /* + * Overflow entries on internal pages are stored + * as the _data_ of a BINTERNAL; overflow entries + * on leaf pages are stored as the entire entry. + */ + endoff = offset + + ((TYPE(h) == P_IBTREE) ? + BINTERNAL_SIZE(BOVERFLOW_SIZE) : + BOVERFLOW_SIZE) - 1; + break; + default: + /* + * We'll complain later; for now, just mark + * a minimum. + */ + endoff = offset + BKEYDATA_SIZE(0) - 1; + break; + } + + /* + * If this is an onpage duplicate key we've seen before, + * the end had better coincide too. + */ + if (isdupitem && pagelayout[endoff] != VRFY_ITEM_END) { + EPRINT((env, DB_STR_A("1052", + "Page %lu: duplicated item %lu", + "%lu %lu"), (u_long)pgno, (u_long)i)); + isbad = 1; + } else if (pagelayout[endoff] == VRFY_ITEM_NOTSET) + pagelayout[endoff] = VRFY_ITEM_END; + isdupitem = 0; + + /* + * There should be no deleted items in a quiescent tree, + * except in recno. + */ + if (B_DISSET(bk->type) && TYPE(h) != P_LRECNO) { + isbad = 1; + EPRINT((env, DB_STR_A("1053", + "Page %lu: item %lu marked deleted", "%lu %lu"), + (u_long)pgno, (u_long)i)); + } + + /* + * Check the type and such of bk--make sure it's reasonable + * for the pagetype. + */ + switch (B_TYPE(bk->type)) { + case B_KEYDATA: + /* + * This is a normal, non-overflow BKEYDATA or BINTERNAL. + * The only thing to check is the len, and that's + * already been done. + */ + break; + case B_DUPLICATE: + if (TYPE(h) == P_IBTREE) { + isbad = 1; + EPRINT((env, DB_STR_A("1054", + "Page %lu: duplicate page referenced by internal btree page at item %lu", + "%lu %lu"), (u_long)pgno, (u_long)i)); + break; + } else if (TYPE(h) == P_LRECNO) { + isbad = 1; + EPRINT((env, DB_STR_A("1055", + "Page %lu: duplicate page referenced by recno page at item %lu", + "%lu %lu"), (u_long)pgno, (u_long)i)); + break; + } + /* FALLTHROUGH */ + case B_OVERFLOW: + bo = (TYPE(h) == P_IBTREE) ? + (BOVERFLOW *)(((BINTERNAL *)bk)->data) : + (BOVERFLOW *)bk; + + if (B_TYPE(bk->type) == B_OVERFLOW) + /* Make sure tlen is reasonable. */ + if (bo->tlen > dbp->pgsize * vdp->last_pgno) { + isbad = 1; + EPRINT((env, DB_STR_A("1056", + "Page %lu: impossible tlen %lu, item %lu", + "%lu %lu %lu"), (u_long)pgno, + (u_long)bo->tlen, (u_long)i)); + /* Don't save as a child. */ + break; + } + + if (!IS_VALID_PGNO(bo->pgno) || bo->pgno == pgno || + bo->pgno == PGNO_INVALID) { + isbad = 1; + EPRINT((env, DB_STR_A("1057", + "Page %lu: offpage item %lu has bad pgno %lu", + "%lu %lu %lu"), (u_long)pgno, (u_long)i, + (u_long)bo->pgno)); + /* Don't save as a child. */ + break; + } + + child.pgno = bo->pgno; + child.type = (B_TYPE(bk->type) == B_OVERFLOW ? + V_OVERFLOW : V_DUPLICATE); + child.tlen = bo->tlen; + if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0) + goto err; + break; + default: + isbad = 1; + EPRINT((env, DB_STR_A("1058", + "Page %lu: item %lu of invalid type %lu", + "%lu %lu %lu"), (u_long)pgno, (u_long)i, + (u_long)B_TYPE(bk->type))); + break; + } + } + + /* + * Now, loop through and make sure the items are contiguous and + * non-overlapping. + */ + initem = 0; + for (i = himark; i < dbp->pgsize; i++) + if (initem == 0) + switch (pagelayout[i]) { + case VRFY_ITEM_NOTSET: + /* May be just for alignment. */ + if (i != DB_ALIGN(i, sizeof(u_int32_t))) + continue; + + isbad = 1; + EPRINT((env, DB_STR_A("1059", + "Page %lu: gap between items at offset %lu", + "%lu %lu"), (u_long)pgno, (u_long)i)); + /* Find the end of the gap */ + for (; pagelayout[i + 1] == VRFY_ITEM_NOTSET && + (size_t)(i + 1) < dbp->pgsize; i++) + ; + break; + case VRFY_ITEM_BEGIN: + /* We've found an item. Check its alignment. */ + if (i != DB_ALIGN(i, sizeof(u_int32_t))) { + isbad = 1; + EPRINT((env, DB_STR_A("1060", + "Page %lu: offset %lu unaligned", + "%lu %lu"), (u_long)pgno, + (u_long)i)); + } + initem = 1; + nentries++; + break; + case VRFY_ITEM_END: + /* + * We've hit the end of an item even though + * we don't think we're in one; must + * be an overlap. + */ + isbad = 1; + EPRINT((env, DB_STR_A("1061", + "Page %lu: overlapping items at offset %lu", + "%lu %lu"), (u_long)pgno, (u_long)i)); + break; + } + else + switch (pagelayout[i]) { + case VRFY_ITEM_NOTSET: + /* In the middle of an item somewhere. Okay. */ + break; + case VRFY_ITEM_END: + /* End of an item; switch to out-of-item mode.*/ + initem = 0; + break; + case VRFY_ITEM_BEGIN: + /* + * Hit a second item beginning without an + * end. Overlap. + */ + isbad = 1; + EPRINT((env, DB_STR_A("1062", + "Page %lu: overlapping items at offset %lu", + "%lu %lu"), (u_long)pgno, (u_long)i)); + break; + } + + __os_free(env, pagelayout); + + /* Verify HOFFSET. */ + if ((db_indx_t)himark != HOFFSET(h)) { + EPRINT((env, DB_STR_A("1063", + "Page %lu: bad HOFFSET %lu, appears to be %lu", + "%lu %lu %lu"), (u_long)pgno, (u_long)HOFFSET(h), + (u_long)himark)); + isbad = 1; + } + +err: if (nentriesp != NULL) + *nentriesp = nentries; + + if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + + return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret); +} + +/* + * __bam_vrfy_itemorder -- + * Make sure the items on a page sort correctly. + * + * Assumes that NUM_ENT(h) and inp[0]..inp[NUM_ENT(h) - 1] are + * reasonable; be sure that __bam_vrfy_inp has been called first. + * + * If ovflok is set, it also assumes that overflow page chains + * hanging off the current page have been sanity-checked, and so we + * can use __bam_cmp to verify their ordering. If it is not set, + * and we run into an overflow page, carp and return DB_VERIFY_BAD; + * we shouldn't be called if any exist. + * + * PUBLIC: int __bam_vrfy_itemorder __P((DB *, VRFY_DBINFO *, DB_THREAD_INFO *, + * PUBLIC: PAGE *, db_pgno_t, u_int32_t, int, int, u_int32_t)); + */ +int +__bam_vrfy_itemorder(dbp, vdp, ip, h, pgno, nentries, ovflok, hasdups, flags) + DB *dbp; + VRFY_DBINFO *vdp; + DB_THREAD_INFO *ip; + PAGE *h; + db_pgno_t pgno; + u_int32_t nentries; + int ovflok, hasdups; + u_int32_t flags; +{ + BINTERNAL *bi; + BKEYDATA *bk; + BOVERFLOW *bo; + BTREE *bt; + DBC *dbc; + DBT dbta, dbtb, dup_1, dup_2, *p1, *p2, *tmp; + ENV *env; + VRFY_PAGEINFO *pip; + db_indx_t i, *inp; + int adj, cmp, freedup_1, freedup_2, isbad, ret, t_ret; + int (*dupfunc) __P((DB *, const DBT *, const DBT *)); + int (*func) __P((DB *, const DBT *, const DBT *)); + void *buf1, *buf2, *tmpbuf; + + /* + * We need to work in the ORDERCHKONLY environment where we might + * not have a pip, but we also may need to work in contexts where + * NUM_ENT isn't safe. + */ + if (vdp != NULL) { + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + nentries = pip->entries; + } else + pip = NULL; + + env = dbp->env; + ret = isbad = 0; + bo = NULL; /* Shut up compiler. */ + + memset(&dbta, 0, sizeof(DBT)); + F_SET(&dbta, DB_DBT_REALLOC); + + memset(&dbtb, 0, sizeof(DBT)); + F_SET(&dbtb, DB_DBT_REALLOC); + + buf1 = buf2 = NULL; + + DB_ASSERT(env, !LF_ISSET(DB_NOORDERCHK)); + + dupfunc = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare; + if (TYPE(h) == P_LDUP) + func = dupfunc; + else { + func = __bam_defcmp; + if (dbp->bt_internal != NULL) { + bt = (BTREE *)dbp->bt_internal; + if (bt->bt_compare != NULL) + func = bt->bt_compare; + } + } + + /* + * We alternate our use of dbta and dbtb so that we can walk + * through the page key-by-key without copying a dbt twice. + * p1 is always the dbt for index i - 1, and p2 for index i. + * Reset the data pointers in case we are retrying. + */ +retry: p1 = &dbta; + p1->data = NULL; + p2 = &dbtb; + p2->data = NULL; + + /* + * Loop through the entries. nentries ought to contain the + * actual count, and so is a safe way to terminate the loop; whether + * we inc. by one or two depends on whether we're a leaf page-- + * on a leaf page, we care only about keys. On internal pages + * and LDUP pages, we want to check the order of all entries. + * + * Note that on IBTREE pages or the index page of a partitioned + * database, we start with item 1, since item 0 doesn't get looked + * at by __bam_cmp. + */ + inp = P_INP(dbp, h); + adj = (TYPE(h) == P_LBTREE) ? P_INDX : O_INDX; + for (i = (TYPE(h) == P_IBTREE || dbp->p_internal != NULL) ? adj : 0; + i < nentries; i += adj) { + /* + * Put key i-1, now in p2, into p1, by swapping DBTs and bufs. + */ + tmp = p1; + p1 = p2; + p2 = tmp; + tmpbuf = buf1; + buf1 = buf2; + buf2 = tmpbuf; + + /* + * Get key i into p2. + */ + switch (TYPE(h)) { + case P_IBTREE: + bi = GET_BINTERNAL(dbp, h, i); + if (B_TYPE(bi->type) == B_OVERFLOW) { + bo = (BOVERFLOW *)(bi->data); + goto overflow; + } else { + p2->data = bi->data; + p2->size = bi->len; + } + + /* + * The leftmost key on an internal page must be + * len 0, since it's just a placeholder and + * automatically sorts less than all keys. + * + * XXX + * This criterion does not currently hold! + * See todo list item #1686. Meanwhile, it's harmless + * to just not check for it. + */ +#if 0 + if (i == 0 && bi->len != 0) { + isbad = 1; + EPRINT((env, DB_STR_A("1064", + "Page %lu: lowest key on internal page of nonzero length", + "%lu"), (u_long)pgno)); + } +#endif + break; + case P_LBTREE: + case P_LDUP: + bk = GET_BKEYDATA(dbp, h, i); + if (B_TYPE(bk->type) == B_OVERFLOW) { + bo = (BOVERFLOW *)bk; + goto overflow; + } else { + p2->data = bk->data; + p2->size = bk->len; + } + break; + default: + /* + * This means our caller screwed up and sent us + * an inappropriate page. + */ + ret = __db_unknown_path(env, "__bam_vrfy_itemorder"); + goto err; + } + + if (0) { + /* + * If ovflok != 1, we can't safely go chasing + * overflow pages with the normal routines now; + * they might be unsafe or nonexistent. Mark this + * page as incomplete and return. + * + * Note that we don't need to worry about freeing + * buffers, since they can't have been allocated + * if overflow items are unsafe. + */ +overflow: if (!ovflok) { + F_SET(pip, VRFY_INCOMPLETE); + goto err; + } + + /* + * Overflow items are safe to chase. Do so. + * Fetch the overflow item into p2->data, + * NULLing it or reallocing it as appropriate. + * + * (We set p2->data to buf2 before the call + * so we're sure to realloc if we can and if p2 + * was just pointing at a non-overflow item.) + */ + p2->data = buf2; + if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE, + PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0) + goto err; + if ((ret = __db_goff(dbc, + p2, bo->tlen, bo->pgno, NULL, NULL)) != 0) { + isbad = 1; + EPRINT((env, DB_STR_A("1065", + "Page %lu: error %lu in fetching overflow item %lu", + "%lu %lu %lu"), (u_long)pgno, (u_long)ret, + (u_long)i)); + } + /* In case it got realloc'ed and thus changed. */ + buf2 = p2->data; + } + + /* Compare with the last key. */ + if (p1->data != NULL && p2->data != NULL) { + cmp = inp[i] == inp[i - adj] ? 0 : func(dbp, p1, p2); + + /* comparison succeeded */ + if (cmp > 0) { + /* + * If we are looking at an internal page, we + * don't know whether it is part of the main + * database or in an off-page-duplicate tree. + * If the main comparator fails, retry with + * the duplicate comparator. + */ + if (TYPE(h) == P_IBTREE && func != dupfunc) { + func = dupfunc; + goto retry; + } + + isbad = 1; + EPRINT((env, DB_STR_A("1066", + "Page %lu: out-of-order key at entry %lu", + "%lu %lu"), (u_long)pgno, (u_long)i)); + /* proceed */ + } else if (cmp == 0) { + if (inp[i] != inp[i - adj]) { + /* See above. */ + if (TYPE(h) == P_IBTREE && + func != dupfunc) { + func = dupfunc; + goto retry; + } + isbad = 1; + EPRINT((env, DB_STR_A("1067", + "Page %lu: non-dup dup key at entry %lu", + "%lu %lu"), (u_long)pgno, + (u_long)i)); + } + /* + * If they compared equally, this + * had better be a (sub)database with dups. + * Mark it so we can check during the + * structure check. + */ + if (pip != NULL) + F_SET(pip, VRFY_HAS_DUPS); + else if (hasdups == 0) { + /* See above. */ + if (TYPE(h) == P_IBTREE && + func != dupfunc) { + func = dupfunc; + goto retry; + } + isbad = 1; + EPRINT((env, DB_STR_A("1068", + "Page %lu: database with no duplicates has duplicated keys", + "%lu"), (u_long)pgno)); + } + + /* + * If we're a btree leaf, check to see + * if the data items of these on-page dups are + * in sorted order. If not, flag this, so + * that we can make sure during the + * structure checks that the DUPSORT flag + * is unset. + * + * At this point i points to a duplicate key. + * Compare the datum before it (same key) + * to the datum after it, i.e. i-1 to i+1. + */ + if (TYPE(h) == P_LBTREE) { + /* + * Unsafe; continue and we'll pick + * up the bogus nentries later. + */ + if (i + 1 >= (db_indx_t)nentries) + continue; + + /* + * We don't bother with clever memory + * management with on-page dups, + * as it's only really a big win + * in the overflow case, and overflow + * dups are probably (?) rare. + */ + if (((ret = __bam_safe_getdata(dbp, + ip, h, i - 1, ovflok, + &dup_1, &freedup_1)) != 0) || + ((ret = __bam_safe_getdata(dbp, + ip, h, i + 1, ovflok, + &dup_2, &freedup_2)) != 0)) + goto err; + + /* + * If either of the data are NULL, + * it's because they're overflows and + * it's not safe to chase them now. + * Mark an incomplete and return. + */ + if (dup_1.data == NULL || + dup_2.data == NULL) { + DB_ASSERT(env, !ovflok); + F_SET(pip, VRFY_INCOMPLETE); + goto err; + } + + /* + * If the dups are out of order, + * flag this. It's not an error + * until we do the structure check + * and see whether DUPSORT is set. + */ + if (dupfunc(dbp, &dup_1, &dup_2) > 0) + F_SET(pip, VRFY_DUPS_UNSORTED); + + if (freedup_1) + __os_ufree(env, dup_1.data); + if (freedup_2) + __os_ufree(env, dup_2.data); + } + } + } + } + +err: if (pip != NULL && ((t_ret = + __db_vrfy_putpageinfo(env, vdp, pip)) != 0) && ret == 0) + ret = t_ret; + + if (buf1 != NULL) + __os_ufree(env, buf1); + if (buf2 != NULL) + __os_ufree(env, buf2); + + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +/* + * __bam_vrfy_structure -- + * Verify the tree structure of a btree database (including the master + * database containing subdbs). + * + * PUBLIC: int __bam_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, + * PUBLIC: void *, void *, u_int32_t)); + */ +int +__bam_vrfy_structure(dbp, vdp, meta_pgno, lp, rp, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t meta_pgno; + void *lp, *rp; + u_int32_t flags; +{ + DB *pgset; + ENV *env; + VRFY_PAGEINFO *mip, *rip; + db_pgno_t root, p; + int t_ret, ret; + u_int32_t nrecs, level, relen, stflags; + + env = dbp->env; + mip = rip = 0; + pgset = vdp->pgset; + + if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &mip)) != 0) + return (ret); + + if ((ret = __db_vrfy_pgset_get(pgset, + vdp->thread_info, vdp->txn, meta_pgno, (int *)&p)) != 0) + goto err; + if (p != 0) { + EPRINT((env, DB_STR_A("1069", + "Page %lu: btree metadata page observed twice", + "%lu"), (u_long)meta_pgno)); + ret = DB_VERIFY_BAD; + goto err; + } + if ((ret = __db_vrfy_pgset_inc( + pgset, vdp->thread_info, vdp->txn, meta_pgno)) != 0) + goto err; + + root = mip->root; + + if (root == 0) { + EPRINT((env, DB_STR_A("1070", + "Page %lu: btree metadata page has no root", + "%lu"), (u_long)meta_pgno)); + ret = DB_VERIFY_BAD; + goto err; + } + + if ((ret = __db_vrfy_getpageinfo(vdp, root, &rip)) != 0) + goto err; + + switch (rip->type) { + case P_IBTREE: + case P_LBTREE: + stflags = flags | DB_ST_TOPLEVEL; + if (F_ISSET(mip, VRFY_HAS_DUPS)) + stflags |= DB_ST_DUPOK; + if (F_ISSET(mip, VRFY_HAS_DUPSORT)) + stflags |= DB_ST_DUPSORT; + if (F_ISSET(mip, VRFY_HAS_RECNUMS)) + stflags |= DB_ST_RECNUM; + ret = __bam_vrfy_subtree(dbp, + vdp, root, lp, rp, stflags, NULL, NULL, NULL); + break; + case P_IRECNO: + case P_LRECNO: + stflags = + flags | DB_ST_RECNUM | DB_ST_IS_RECNO | DB_ST_TOPLEVEL; + if (mip->re_len > 0) + stflags |= DB_ST_RELEN; + if ((ret = __bam_vrfy_subtree(dbp, vdp, + root, NULL, NULL, stflags, &level, &nrecs, &relen)) != 0) + goto err; + /* + * Even if mip->re_len > 0, re_len may come back zero if the + * tree is empty. It should be okay to just skip the check in + * this case, as if there are any non-deleted keys at all, + * that should never happen. + */ + if (mip->re_len > 0 && relen > 0 && mip->re_len != relen) { + EPRINT((env, DB_STR_A("1071", + "Page %lu: recno database has bad re_len %lu", + "%lu %lu"), (u_long)meta_pgno, (u_long)relen)); + ret = DB_VERIFY_BAD; + goto err; + } + ret = 0; + break; + case P_LDUP: + EPRINT((env, DB_STR_A("1072", + "Page %lu: duplicate tree referenced from metadata page", + "%lu"), (u_long)meta_pgno)); + ret = DB_VERIFY_BAD; + break; + default: + EPRINT((env, DB_STR_A("1073", + "Page %lu: btree root of incorrect type %lu on metadata page", + "%lu %lu"), (u_long)meta_pgno, (u_long)rip->type)); + ret = DB_VERIFY_BAD; + break; + } + +err: if (mip != NULL && ((t_ret = + __db_vrfy_putpageinfo(env, vdp, mip)) != 0) && ret == 0) + ret = t_ret; + if (rip != NULL && ((t_ret = + __db_vrfy_putpageinfo(env, vdp, rip)) != 0) && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __bam_vrfy_subtree-- + * Verify a subtree (or entire) btree with specified root. + * + * Note that this is public because it must be called to verify + * offpage dup trees, including from hash. + * + * PUBLIC: int __bam_vrfy_subtree __P((DB *, VRFY_DBINFO *, db_pgno_t, void *, + * PUBLIC: void *, u_int32_t, u_int32_t *, u_int32_t *, u_int32_t *)); + */ +int +__bam_vrfy_subtree(dbp, vdp, pgno, l, r, flags, levelp, nrecsp, relenp) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + void *l, *r; + u_int32_t flags, *levelp, *nrecsp, *relenp; +{ + BINTERNAL *li, *ri; + DB *pgset; + DBC *cc; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *h; + VRFY_CHILDINFO *child; + VRFY_PAGEINFO *pip; + db_indx_t i; + db_pgno_t next_pgno, prev_pgno; + db_recno_t child_nrecs, nrecs; + u_int32_t child_level, child_relen, j, level, relen, stflags; + u_int8_t leaf_type; + int (*func) __P((DB *, const DBT *, const DBT *)); + int isbad, p, ret, t_ret, toplevel; + + if (levelp != NULL) /* Don't leave uninitialized on error. */ + *levelp = 0; + if (nrecsp != NULL) + *nrecsp = 0; + + env = dbp->env; + mpf = dbp->mpf; + h = NULL; + next_pgno = prev_pgno = PGNO_INVALID; + nrecs = 0; + relen = 0; + leaf_type = P_INVALID; + isbad = ret = 0; + + /* Provide feedback on our progress to the application. */ + if (!LF_ISSET(DB_SALVAGE)) + __db_vrfy_struct_feedback(dbp, vdp); + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + cc = NULL; + level = pip->bt_level; + + toplevel = LF_ISSET(DB_ST_TOPLEVEL) ? 1 : 0; + LF_CLR(DB_ST_TOPLEVEL); + + /* + * If this is the root, initialize the vdp's prev- and next-pgno + * accounting. + * + * For each leaf page we hit, we'll want to make sure that + * vdp->prev_pgno is the same as pip->prev_pgno and vdp->next_pgno is + * our page number. Then, we'll set vdp->next_pgno to pip->next_pgno + * and vdp->prev_pgno to our page number, and the next leaf page in + * line should be able to do the same verification. + */ + if (toplevel) { + /* + * Cache the values stored in the vdp so that if we're an + * auxiliary tree such as an off-page duplicate set, our + * caller's leaf page chain doesn't get lost. + */ + prev_pgno = vdp->prev_pgno; + next_pgno = vdp->next_pgno; + leaf_type = vdp->leaf_type; + vdp->next_pgno = vdp->prev_pgno = PGNO_INVALID; + vdp->leaf_type = P_INVALID; + } + + /* + * We are recursively descending a btree, starting from the root + * and working our way out to the leaves. + * + * There are four cases we need to deal with: + * 1. pgno is a recno leaf page. Any children are overflows. + * 2. pgno is a duplicate leaf page. Any children + * are overflow pages; traverse them, and then return + * level and nrecs. + * 3. pgno is an ordinary leaf page. Check whether dups are + * allowed, and if so, traverse any off-page dups or + * overflows. Then return nrecs and level. + * 4. pgno is a recno internal page. Recursively check any + * child pages, making sure their levels are one lower + * and their nrecs sum to ours. + * 5. pgno is a btree internal page. Same as #4, plus we + * must verify that for each pair of BINTERNAL entries + * N and N+1, the leftmost item on N's child sorts + * greater than N, and the rightmost item on N's child + * sorts less than N+1. + * + * Furthermore, in any sorted page type (P_LDUP, P_LBTREE, P_IBTREE), + * we need to verify the internal sort order is correct if, + * due to overflow items, we were not able to do so earlier. + */ + switch (pip->type) { + case P_LRECNO: + case P_LDUP: + case P_LBTREE: + /* + * Cases 1, 2 and 3. + * + * We're some sort of leaf page; verify + * that our linked list of leaves is consistent. + */ + if (vdp->leaf_type == P_INVALID) { + /* + * First leaf page. Set the type that all its + * successors should be, and verify that our prev_pgno + * is PGNO_INVALID. + */ + vdp->leaf_type = pip->type; + if (pip->prev_pgno != PGNO_INVALID) + goto bad_prev; + } else { + /* + * Successor leaf page. Check our type, the previous + * page's next_pgno, and our prev_pgno. + */ + if (pip->type != vdp->leaf_type) { + isbad = 1; + EPRINT((env, DB_STR_A("1074", + "Page %lu: unexpected page type %lu found in leaf chain (expected %lu)", + "%lu %lu %lu"), (u_long)pip->pgno, + (u_long)pip->type, + (u_long)vdp->leaf_type)); + } + + /* + * Don't do the prev/next_pgno checks if we've lost + * leaf pages due to another corruption. + */ + if (!F_ISSET(vdp, VRFY_LEAFCHAIN_BROKEN)) { + if (pip->pgno != vdp->next_pgno) { + isbad = 1; + EPRINT((env, DB_STR_A("1075", + "Page %lu: incorrect next_pgno %lu found in leaf chain (should be %lu)", + "%lu %lu %lu"), + (u_long)vdp->prev_pgno, + (u_long)vdp->next_pgno, + (u_long)pip->pgno)); + } + if (pip->prev_pgno != vdp->prev_pgno) { +bad_prev: isbad = 1; + EPRINT((env, DB_STR_A("1076", + "Page %lu: incorrect prev_pgno %lu found in leaf chain (should be %lu)", + "%lu %lu %lu"), + (u_long)pip->pgno, + (u_long)pip->prev_pgno, + (u_long)vdp->prev_pgno)); + } + } + } + vdp->prev_pgno = pip->pgno; + vdp->next_pgno = pip->next_pgno; + F_CLR(vdp, VRFY_LEAFCHAIN_BROKEN); + + /* + * Overflow pages are common to all three leaf types; + * traverse the child list, looking for overflows. + */ + if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) + goto err; + for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0; + ret = __db_vrfy_ccnext(cc, &child)) + if (child->type == V_OVERFLOW && + (ret = __db_vrfy_ovfl_structure(dbp, vdp, + child->pgno, child->tlen, + flags | DB_ST_OVFL_LEAF)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto done; + } + + if ((ret = __db_vrfy_ccclose(cc)) != 0) + goto err; + cc = NULL; + + /* Case 1 */ + if (pip->type == P_LRECNO) { + if (!LF_ISSET(DB_ST_IS_RECNO) && + !(LF_ISSET(DB_ST_DUPOK) && + !LF_ISSET(DB_ST_DUPSORT))) { + isbad = 1; + EPRINT((env, DB_STR_A("1077", + "Page %lu: recno leaf page non-recno tree", + "%lu"), (u_long)pgno)); + goto done; + } + goto leaf; + } else if (LF_ISSET(DB_ST_IS_RECNO)) { + /* + * It's a non-recno leaf. Had better not be a recno + * subtree. + */ + isbad = 1; + EPRINT((env, DB_STR_A("1078", + "Page %lu: non-recno leaf page in recno tree", + "%lu"), (u_long)pgno)); + goto done; + } + + /* Case 2--no more work. */ + if (pip->type == P_LDUP) + goto leaf; + + /* Case 3 */ + + /* Check if we have any dups. */ + if (F_ISSET(pip, VRFY_HAS_DUPS)) { + /* If dups aren't allowed in this btree, trouble. */ + if (!LF_ISSET(DB_ST_DUPOK)) { + isbad = 1; + EPRINT((env, DB_STR_A("1079", + "Page %lu: duplicates in non-dup btree", + "%lu"), (u_long)pgno)); + } else { + /* + * We correctly have dups. If any are off-page, + * traverse those btrees recursively. + */ + if ((ret = + __db_vrfy_childcursor(vdp, &cc)) != 0) + goto err; + for (ret = __db_vrfy_ccset(cc, pgno, &child); + ret == 0; + ret = __db_vrfy_ccnext(cc, &child)) { + stflags = + flags | DB_ST_RECNUM | DB_ST_DUPSET; + /* Skip any overflow entries. */ + if (child->type == V_DUPLICATE) { + if ((ret = __db_vrfy_duptype( + dbp, vdp, child->pgno, + stflags)) != 0) { + isbad = 1; + /* Next child. */ + continue; + } + if ((ret = __bam_vrfy_subtree( + dbp, vdp, child->pgno, + NULL, NULL, + stflags | DB_ST_TOPLEVEL, + NULL, NULL, NULL)) != 0) { + if (ret == + DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + } + } + + if ((ret = __db_vrfy_ccclose(cc)) != 0) + goto err; + cc = NULL; + + /* + * If VRFY_DUPS_UNSORTED is set, + * DB_ST_DUPSORT had better not be. + */ + if (F_ISSET(pip, VRFY_DUPS_UNSORTED) && + LF_ISSET(DB_ST_DUPSORT)) { + isbad = 1; + EPRINT((env, DB_STR_A("1080", + "Page %lu: unsorted duplicate set in sorted-dup database", + "%lu"), (u_long)pgno)); + } + } + } + goto leaf; + case P_IBTREE: + case P_IRECNO: + /* We handle these below. */ + break; + default: + /* + * If a P_IBTREE or P_IRECNO contains a reference to an + * invalid page, we'll wind up here; handle it gracefully. + * Note that the code at the "done" label assumes that the + * current page is a btree/recno one of some sort; this + * is not the case here, so we goto err. + * + * If the page is entirely zeroed, its pip->type will be a lie + * (we assumed it was a hash page, as they're allowed to be + * zeroed); handle this case specially. + */ + if (F_ISSET(pip, VRFY_IS_ALLZEROES)) + ZEROPG_ERR_PRINT(env, pgno, DB_STR_P( + "btree or recno page")); + else + EPRINT((env, DB_STR_A("1081", + "Page %lu: btree or recno page is of inappropriate type %lu", + "%lu %lu"), (u_long)pgno, (u_long)pip->type)); + + /* + * We probably lost a leaf page (or more if this was an + * internal page) from our prev/next_pgno chain. Flag + * that this is expected; we don't want or need to + * spew error messages about erroneous prev/next_pgnos, + * since that's probably not the real problem. + */ + F_SET(vdp, VRFY_LEAFCHAIN_BROKEN); + + ret = DB_VERIFY_BAD; + goto err; + } + + /* + * Cases 4 & 5: This is a btree or recno internal page. For each child, + * recurse, keeping a running count of nrecs and making sure the level + * is always reasonable. + */ + if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) + goto err; + for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0; + ret = __db_vrfy_ccnext(cc, &child)) + if (child->type == V_RECNO) { + if (pip->type != P_IRECNO) { + ret = __db_unknown_path( + env, "__bam_vrfy_subtree"); + goto err; + } + if ((ret = __bam_vrfy_subtree(dbp, vdp, child->pgno, + NULL, NULL, flags, &child_level, &child_nrecs, + &child_relen)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto done; + } + + if (LF_ISSET(DB_ST_RELEN)) { + if (relen == 0) + relen = child_relen; + /* + * child_relen may be zero if the child subtree + * is empty. + */ + else if (child_relen > 0 && + relen != child_relen) { + isbad = 1; + EPRINT((env, DB_STR_A("1082", + "Page %lu: recno page returned bad re_len %lu", + "%lu %lu"), (u_long)child->pgno, + (u_long)child_relen)); + } + if (relenp) + *relenp = relen; + } + if (LF_ISSET(DB_ST_RECNUM)) { + if (child->nrecs != child_nrecs) { + isbad = 1; + EPRINT((env, DB_STR_A("1083", + "Page %lu: record count incorrect: actual %lu, in record %lu", + "%lu %lu %lu"), + (u_long)child->pgno, + (u_long)child_nrecs, + (u_long)child->nrecs)); + } + nrecs += child_nrecs; + } + if (isbad == 0 && level != child_level + 1) { + isbad = 1; + EPRINT((env, DB_STR_A("1084", + "Page %lu: recno level incorrect: got %lu, expected %lu", + "%lu %lu %lu"), + (u_long)child->pgno, (u_long)child_level, + (u_long)(level - 1))); + } + } else if (child->type == V_OVERFLOW) { + /* + * It is possible for one internal page to reference + * a single overflow page twice, if all the items + * in the subtree referenced by slot 0 are deleted, + * then a similar number of items are put back + * before the key that formerly had been in slot 1. + * + * (Btree doesn't look at the key in slot 0, so the + * fact that the key formerly at slot 1 is the "wrong" + * parent of the stuff in the slot 0 subtree isn't + * really incorrect.) + * + * __db_vrfy_ovfl_structure is designed to be + * efficiently called multiple times for multiple + * references; call it here as many times as is + * appropriate. + */ + + /* Otherwise, __db_vrfy_childput would be broken. */ + DB_ASSERT(env, child->refcnt >= 1); + + /* + * An overflow referenced more than twice here + * shouldn't happen. + */ + if (child->refcnt > 2) { + isbad = 1; + EPRINT((env, DB_STR_A("1085", + "Page %lu: overflow page %lu referenced more than twice from internal page", + "%lu %lu"), (u_long)pgno, + (u_long)child->pgno)); + } else + for (j = 0; j < child->refcnt; j++) + if ((ret = __db_vrfy_ovfl_structure(dbp, + vdp, child->pgno, child->tlen, + flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto done; + } + } + + if ((ret = __db_vrfy_ccclose(cc)) != 0) + goto err; + cc = NULL; + + /* We're done with case 4. */ + if (pip->type == P_IRECNO) + goto done; + + /* + * Case 5. Btree internal pages. + * As described above, we need to iterate through all the + * items on the page and make sure that our children sort appropriately + * with respect to them. + * + * For each entry, li will be the "left-hand" key for the entry + * itself, which must sort lower than all entries on its child; + * ri will be the key to its right, which must sort greater. + */ + if (h == NULL && + (ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0) + goto err; + for (i = 0; i < pip->entries; i += O_INDX) { + li = GET_BINTERNAL(dbp, h, i); + ri = (i + O_INDX < pip->entries) ? + GET_BINTERNAL(dbp, h, i + O_INDX) : r; + + /* + * The leftmost key is forcibly sorted less than all entries, + * so don't bother passing it. + */ + if ((ret = __bam_vrfy_subtree(dbp, vdp, li->pgno, + i == 0 ? NULL : li, ri, flags, &child_level, + &child_nrecs, NULL)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto done; + } + + if (LF_ISSET(DB_ST_RECNUM)) { + /* + * Keep a running tally on the actual record count so + * we can return it to our parent (if we have one) or + * compare it to the NRECS field if we're a root page. + */ + nrecs += child_nrecs; + + /* + * Make sure the actual record count of the child + * is equal to the value in the BINTERNAL structure. + */ + if (li->nrecs != child_nrecs) { + isbad = 1; + EPRINT((env, DB_STR_A("1086", + "Page %lu: item %lu has incorrect record count of %lu, should be %lu", + "%lu %lu %lu %lu"), (u_long)pgno, + (u_long)i, (u_long)li->nrecs, + (u_long)child_nrecs)); + } + } + + if (level != child_level + 1) { + isbad = 1; + EPRINT((env, DB_STR_A("1087", + "Page %lu: Btree level incorrect: got %lu, expected %lu", + "%lu %lu %lu"), (u_long)li->pgno, + (u_long)child_level, (u_long)(level - 1))); + } + } + + if (0) { +leaf: level = LEAFLEVEL; + if (LF_ISSET(DB_ST_RECNUM)) + nrecs = pip->rec_cnt; + + /* XXX + * We should verify that the record count on a leaf page + * is the sum of the number of keys and the number of + * records in its off-page dups. This requires looking + * at the page again, however, and it may all be changing + * soon, so for now we don't bother. + */ + + if (LF_ISSET(DB_ST_RELEN) && relenp) + *relenp = pip->re_len; + } +done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) { + /* + * During the page-by-page pass, item order verification was + * not finished due to the presence of overflow items. If + * isbad == 0, though, it's now safe to do so, as we've + * traversed any child overflow pages. Do it. + */ + if (h == NULL && (ret = __memp_fget(mpf, &pgno, + vdp->thread_info, NULL, 0, &h)) != 0) + goto err; + if ((ret = __bam_vrfy_itemorder(dbp, + vdp, vdp->thread_info, h, pgno, 0, 1, 0, flags)) != 0) + goto err; + F_CLR(pip, VRFY_INCOMPLETE); + } + + /* + * It's possible to get to this point with a page that has no + * items, but without having detected any sort of failure yet. + * Having zero items is legal if it's a leaf--it may be the + * root page in an empty tree, or the tree may have been + * modified with the DB_REVSPLITOFF flag set (there's no way + * to tell from what's on disk). For an internal page, + * though, having no items is a problem (all internal pages + * must have children). + */ + if (isbad == 0 && ret == 0) { + if (h == NULL && (ret = __memp_fget(mpf, &pgno, + vdp->thread_info, NULL, 0, &h)) != 0) + goto err; + + if (NUM_ENT(h) == 0 && ISINTERNAL(h)) { + isbad = 1; + EPRINT((env, DB_STR_A("1088", + "Page %lu: internal page is empty and should not be", + "%lu"), (u_long)pgno)); + goto err; + } + } + + /* + * Our parent has sent us BINTERNAL pointers to parent records + * so that we can verify our place with respect to them. If it's + * appropriate--we have a default sort function--verify this. + */ + if (isbad == 0 && ret == 0 && !LF_ISSET(DB_NOORDERCHK) && + pip->type != P_IRECNO && pip->type != P_LRECNO) { + if (h == NULL && (ret = __memp_fget(mpf, &pgno, + vdp->thread_info, NULL, 0, &h)) != 0) + goto err; + + /* + * __bam_vrfy_treeorder needs to know what comparison function + * to use. If DB_ST_DUPSET is set, we're in a duplicate tree + * and we use the duplicate comparison function; otherwise, + * use the btree one. If unset, use the default, of course. + */ + func = LF_ISSET(DB_ST_DUPSET) ? dbp->dup_compare : + ((BTREE *)dbp->bt_internal)->bt_compare; + if (func == NULL) + func = __bam_defcmp; + + if ((ret = __bam_vrfy_treeorder(dbp, + vdp->thread_info, h, l, r, func, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + } + + /* + * This is guaranteed to succeed for leaf pages, but no harm done. + * + * Internal pages below the top level do not store their own + * record numbers, so we skip them. + */ + if (LF_ISSET(DB_ST_RECNUM) && nrecs != pip->rec_cnt && toplevel) { + isbad = 1; + EPRINT((env, DB_STR_A("1089", + "Page %lu: bad record count: has %lu records, claims %lu", + "%lu %lu %lu"), (u_long)pgno, (u_long)nrecs, + (u_long)pip->rec_cnt)); + } + + if (levelp) + *levelp = level; + if (nrecsp) + *nrecsp = nrecs; + + pgset = vdp->pgset; + if ((ret = __db_vrfy_pgset_get(pgset, + vdp->thread_info, vdp->txn, pgno, &p)) != 0) + goto err; + if (p != 0) { + isbad = 1; + EPRINT((env, DB_STR_A("1090", + "Page %lu: linked twice", "%lu"), (u_long)pgno)); + } else if ((ret = + __db_vrfy_pgset_inc(pgset, vdp->thread_info, vdp->txn, pgno)) != 0) + goto err; + + if (toplevel) + /* + * The last page's next_pgno in the leaf chain should have been + * PGNO_INVALID. + */ + if (vdp->next_pgno != PGNO_INVALID) { + isbad = 1; + EPRINT((env, DB_STR_A("1091", + "Page %lu: unterminated leaf chain", + "%lu"), (u_long)vdp->prev_pgno)); + } + +err: if (toplevel) { + /* Restore our caller's settings. */ + vdp->next_pgno = next_pgno; + vdp->prev_pgno = prev_pgno; + vdp->leaf_type = leaf_type; + } + + if (h != NULL && (t_ret = __memp_fput(mpf, + vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0) + ret = t_ret; + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +/* + * __bam_vrfy_treeorder -- + * Verify that the lowest key on a page sorts greater than the + * BINTERNAL which points to it (lp), and the highest key + * sorts less than the BINTERNAL above that (rp). + * + * If lp is NULL, this means that it was the leftmost key on the + * parent, which (regardless of sort function) sorts less than + * all keys. No need to check it. + * + * If rp is NULL, lp was the highest key on the parent, so there's + * no higher key we must sort less than. + */ +static int +__bam_vrfy_treeorder(dbp, ip, h, lp, rp, func, flags) + DB *dbp; + DB_THREAD_INFO *ip; + PAGE *h; + BINTERNAL *lp, *rp; + int (*func) __P((DB *, const DBT *, const DBT *)); + u_int32_t flags; +{ + BOVERFLOW *bo; + DBC *dbc; + DBT dbt; + ENV *env; + db_indx_t last; + int ret, cmp; + + env = dbp->env; + memset(&dbt, 0, sizeof(DBT)); + F_SET(&dbt, DB_DBT_MALLOC); + ret = 0; + + /* + * Empty pages are sorted correctly by definition. We check + * to see whether they ought to be empty elsewhere; leaf + * pages legally may be. + */ + if (NUM_ENT(h) == 0) + return (0); + + switch (TYPE(h)) { + case P_IBTREE: + case P_LDUP: + last = NUM_ENT(h) - O_INDX; + break; + case P_LBTREE: + last = NUM_ENT(h) - P_INDX; + break; + default: + return (__db_unknown_path(env, "__bam_vrfy_treeorder")); + } + + /* Populate a dummy cursor. */ + if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE, + PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0) + return (ret); + /* + * The key on page h, the child page, is more likely to be + * an overflow page, so we pass its offset, rather than lp/rp's, + * into __bam_cmp. This will take advantage of __db_moff. + */ + + /* + * Skip first-item check if we're an internal page--the first + * entry on an internal page is treated specially by __bam_cmp, + * so what's on the page shouldn't matter. (Plus, since we're passing + * our page and item 0 as to __bam_cmp, we'll sort before our + * parent and falsely report a failure.) + */ + if (lp != NULL && TYPE(h) != P_IBTREE) { + if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE, + PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0) + return (ret); + if (lp->type == B_KEYDATA) { + dbt.data = lp->data; + dbt.size = lp->len; + } else if (lp->type == B_OVERFLOW) { + bo = (BOVERFLOW *)lp->data; + if ((ret = __db_goff(dbc, &dbt, + bo->tlen, bo->pgno, NULL, NULL)) != 0) + return (ret); + } else + return ( + __db_unknown_path(env, "__bam_vrfy_treeorder")); + + /* On error, fall through, free if needed, and return. */ + if ((ret = __bam_cmp(dbc, &dbt, h, 0, func, &cmp)) == 0) { + if (cmp > 0) { + EPRINT((env, DB_STR_A("1092", + "Page %lu: first item on page sorted greater than parent entry", + "%lu"), (u_long)PGNO(h))); + ret = DB_VERIFY_BAD; + } + } else + EPRINT((env, DB_STR_A("1093", + "Page %lu: first item on page had comparison error", + "%lu"), (u_long)PGNO(h))); + + if (dbt.data != lp->data) + __os_ufree(env, dbt.data); + if (ret != 0) + return (ret); + } + + if (rp != NULL) { + if (rp->type == B_KEYDATA) { + dbt.data = rp->data; + dbt.size = rp->len; + } else if (rp->type == B_OVERFLOW) { + bo = (BOVERFLOW *)rp->data; + if ((ret = __db_goff(dbc, &dbt, + bo->tlen, bo->pgno, NULL, NULL)) != 0) + return (ret); + } else + return ( + __db_unknown_path(env, "__bam_vrfy_treeorder")); + + /* On error, fall through, free if needed, and return. */ + if ((ret = __bam_cmp(dbc, &dbt, h, last, func, &cmp)) == 0) { + if (cmp < 0) { + EPRINT((env, DB_STR_A("1094", + "Page %lu: last item on page sorted greater than parent entry", + "%lu"), (u_long)PGNO(h))); + ret = DB_VERIFY_BAD; + } + } else + EPRINT((env, DB_STR_A("1095", + "Page %lu: last item on page had comparison error", + "%lu"), (u_long)PGNO(h))); + + if (dbt.data != rp->data) + __os_ufree(env, dbt.data); + } + + return (ret); +} + +/* + * __bam_salvage -- + * Safely dump out anything that looks like a key on an alleged + * btree leaf page, also mark overflow pages as seen. For internal btree + * pages, just mark any overflow pages as seen. + * + * PUBLIC: int __bam_salvage __P((DB *, VRFY_DBINFO *, + * PUBLIC: db_pgno_t, u_int32_t, PAGE *, void *, + * PUBLIC: int (*)(void *, const void *), DBT *, u_int32_t)); + */ +int +__bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + u_int32_t pgtype; + PAGE *h; + void *handle; + int (*callback) __P((void *, const void *)); + DBT *key; + u_int32_t flags; +{ + BKEYDATA *bk; + BOVERFLOW *bo; + DBT dbt, repldbt, unknown_key, unknown_data; + ENV *env; + VRFY_ITEM *pgmap; + db_indx_t i, last, beg, end, *inp; + db_pgno_t ovflpg; + u_int32_t himark, ovfl_bufsz; + void *ovflbuf; + int adj, ret, t_ret, t2_ret; +#ifdef HAVE_COMPRESSION + DBT kcpy, *last_key; + int unknown_dup_key; +#endif + + env = dbp->env; + ovflbuf = pgmap = NULL; + inp = P_INP(dbp, h); + + memset(&dbt, 0, sizeof(DBT)); + dbt.flags = DB_DBT_REALLOC; + memset(&repldbt, 0, sizeof(DBT)); + +#ifdef HAVE_COMPRESSION + memset(&kcpy, 0, sizeof(DBT)); + unknown_dup_key = LF_ISSET(DB_SA_UNKNOWNKEY); + last_key = unknown_dup_key ? NULL : key; +#endif + LF_CLR(DB_SA_UNKNOWNKEY); + + DB_INIT_DBT(unknown_key, "UNKNOWN_KEY", sizeof("UNKNOWN_KEY") - 1); + DB_INIT_DBT(unknown_data, "UNKNOWN_DATA", sizeof("UNKNOWN_DATA") - 1); + + /* + * Allocate a buffer for overflow items. Start at one page; + * __db_safe_goff will realloc as needed. + */ + if ((ret = __os_malloc(env, dbp->pgsize, &ovflbuf)) != 0) + goto err; + ovfl_bufsz = dbp->pgsize; + + if (LF_ISSET(DB_AGGRESSIVE) && (ret = + __os_calloc(env, dbp->pgsize, sizeof(pgmap[0]), &pgmap)) != 0) + goto err; + + /* + * Loop through the inp array, spitting out key/data pairs. + * + * If we're salvaging normally, loop from 0 through NUM_ENT(h). If + * we're being aggressive, loop until we hit the end of the page -- + * NUM_ENT() may be bogus. + */ + himark = dbp->pgsize; + for (i = 0, last = UINT16_MAX;; i += O_INDX) { + /* + * If we're not aggressive, or if we're on an internal page, + * break when we hit NUM_ENT(h). + */ + if ((!LF_ISSET(DB_AGGRESSIVE) || + pgtype == P_IBTREE) && i >= NUM_ENT(h)) + break; + + /* Verify the current item. */ + t_ret = + __db_vrfy_inpitem(dbp, h, pgno, i, 1, flags, &himark, NULL); + + if (t_ret != 0) { + /* + * If this is a btree leaf and we've printed out a key + * but not its associated data item, fix this imbalance + * by printing an "UNKNOWN_DATA". + */ + if (pgtype == P_LBTREE && i % P_INDX == 1 && + last == i - 1 && (t2_ret = __db_vrfy_prdbt( + &unknown_data, + 0, " ", handle, callback, 0, 0, vdp)) != 0) { + if (ret == 0) + ret = t2_ret; + goto err; + } + + /* + * Don't return DB_VERIFY_FATAL; it's private and means + * only that we can't go on with this page, not with + * the whole database. It's not even an error if we've + * run into it after NUM_ENT(h). + */ + if (t_ret == DB_VERIFY_FATAL) { + if (i < NUM_ENT(h) && ret == 0) + ret = DB_VERIFY_BAD; + break; + } + continue; + } + + /* + * If this returned 0, it's safe to print or (carefully) + * try to fetch. + * + * We only print deleted items if DB_AGGRESSIVE is set. + */ + bk = GET_BKEYDATA(dbp, h, i); + if (!LF_ISSET(DB_AGGRESSIVE) && B_DISSET(bk->type)) + continue; + + /* + * If this is a btree leaf and we're about to print out a data + * item for which we didn't print out a key, fix this imbalance + * by printing an "UNKNOWN_KEY". + */ + if (pgtype == P_LBTREE && i % P_INDX == 1 && last != i - 1) { +#ifdef HAVE_COMPRESSION + last_key = NULL; +#endif + if ((t_ret = __db_vrfy_prdbt(&unknown_key, + 0, " ", handle, callback, 0, 0, vdp)) != 0) { + if (ret == 0) + ret = t_ret; + goto err; + } + } + last = i; + + /* + * We're going to go try to print the next item. If key is + * non-NULL, we're a dup page, so we've got to print the key + * first, unless DB_SA_SKIPFIRSTKEY is set and we're on the + * first entry. + */ + if (key != NULL && (i != 0 || !LF_ISSET(DB_SA_SKIPFIRSTKEY))) { +#ifdef HAVE_COMPRESSION + last_key = unknown_dup_key ? NULL : key; +#endif + if ((t_ret = __db_vrfy_prdbt(key, + 0, " ", handle, callback, 0, 0, vdp)) != 0) { + if (ret == 0) + ret = t_ret; + goto err; + } + } + + beg = end = inp[i]; + switch (B_TYPE(bk->type)) { + case B_DUPLICATE: + if (pgtype == P_IBTREE) + break; + + end = beg + BOVERFLOW_SIZE - 1; + /* + * If we're not on a normal btree leaf page, there + * shouldn't be off-page dup sets. Something's + * confused; just drop it, and the code to pick up + * unlinked offpage dup sets will print it out + * with key "UNKNOWN" later. + */ + if (pgtype != P_LBTREE) + break; + + bo = (BOVERFLOW *)bk; + + /* + * If the page number is unreasonable, or if this is + * supposed to be a key item, output "UNKNOWN_KEY" -- + * the best we can do is run into the data items in + * the unlinked offpage dup pass. + */ + if (!IS_VALID_PGNO(bo->pgno) || (i % P_INDX == 0)) { + /* Not much to do on failure. */ +#ifdef HAVE_COMPRESSION + if (key == NULL && i % P_INDX == 0) + last_key = NULL; +#endif + if ((t_ret = __db_vrfy_prdbt( + i % P_INDX == 0 ? &unknown_key : &unknown_data, + 0, " ", handle, callback, 0, 0,vdp)) != 0) { + if (ret == 0) + ret = t_ret; + goto err; + } + break; + } + + /* Don't stop on error. */ + if ((t_ret = __db_salvage_duptree(dbp, + vdp, bo->pgno, &dbt, handle, callback, + flags | DB_SA_SKIPFIRSTKEY +#ifdef HAVE_COMPRESSION + | (last_key == NULL ? DB_SA_UNKNOWNKEY : 0) +#endif + )) != 0 && ret == 0) + ret = t_ret; + + break; + case B_KEYDATA: + if (pgtype == P_IBTREE) + break; + + end = (db_indx_t)DB_ALIGN( + beg + bk->len, sizeof(u_int32_t)) - 1; + + dbt.data = bk->data; + dbt.size = bk->len; + +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbp) && last_key != NULL && + (key != NULL || (i % P_INDX == 1))) { + /* Decompress the key/data pair - the key + is in last_key, and the data is in dbt */ + if ((t_ret = __bam_compress_salvage(dbp, vdp, + handle, callback, last_key, &dbt)) != 0) { + if (t_ret == DB_VERIFY_FATAL) { + if (ret == 0) + ret = DB_VERIFY_BAD; + if (!LF_ISSET(DB_AGGRESSIVE)) + goto err; + } else if (ret == 0) { + ret = t_ret; + goto err; + } + } + } else { + if (key == NULL && i % P_INDX == 0) { + if ((ret = __os_realloc( + env, dbt.size, &kcpy.data)) != 0) + goto err; + memcpy(kcpy.data, dbt.data, dbt.size); + kcpy.size = dbt.size; + last_key = &kcpy; + } +#endif + + if ((t_ret = __db_vrfy_prdbt(&dbt, + 0, " ", handle, callback, 0, 0, vdp)) != 0) { + if (ret == 0) + ret = t_ret; + goto err; + } +#ifdef HAVE_COMPRESSION + } +#endif + break; + case B_OVERFLOW: + if (pgtype != P_IBTREE) + end = beg + BOVERFLOW_SIZE - 1; + bo = (BOVERFLOW *)bk; + + /* + * Check for replicated overflow keys, so that we only + * call __db_safe_goff once per overflow page. If we + * get the same offset as the previous key just re-use + * the previous dbt. + * + * P_IBTREE pages will never have replicated overflow + * keys. + */ + adj = pgtype == P_IBTREE ? O_INDX : P_INDX; + if (pgtype == P_IBTREE) { + /* + * If we're looking at a P_IBTREE, we just want + * to mark the overflow page as seen. + * + * Note that this call to __db_safe_goff differs + * from the non-P_IBTREE call. + * + * Only call __db_safe_goff if the overflow page + * hasn't been seen. + */ + ovflpg = ((BOVERFLOW *) + ((BINTERNAL *)bk)->data)->pgno; + if (__db_salvage_isdone(vdp, ovflpg) == 0 && + (t_ret =__db_safe_goff(dbp, vdp, ovflpg, + &dbt, &ovflbuf, + &ovfl_bufsz, flags)) != 0 && ret == 0) + ret = t_ret; + break; + } else if (i > adj - 1 && + i % adj == 0 && inp[i] == inp[i - adj]) + dbt = repldbt; + else { + /* Don't stop on error. */ + if ((t_ret = __db_safe_goff(dbp, vdp, + bo->pgno, &dbt, &ovflbuf, + &ovfl_bufsz, flags)) != 0 && ret == 0) + ret = t_ret; + + /* + * If this is a key, save it in case the next + * key is a replicated overflow, so we don't + * call __db_safe_goff again. Copy out dbt.data + * in case that pointer gets realloc'd when + * getting a data item. + */ + if (i % P_INDX == 0) { + if (t_ret == 0) { + if ((t_ret = __os_realloc(env, + dbt.size, + &repldbt.data)) != 0) { + if (ret == 0) + ret = t_ret; + goto err; + } + memcpy(repldbt.data, + dbt.data, dbt.size); + repldbt.size = dbt.size; + } else { + if (__os_realloc(env, + unknown_key.size, + &repldbt.data) != 0) + goto err; + memcpy(repldbt.data, + unknown_key.data, + unknown_key.size); + repldbt.size = unknown_key.size; + } + } + + } + +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbp) && last_key && t_ret == 0 && + (key != NULL || (i % P_INDX == 1))) { + /* Decompress the key/data pair - the key + is in last_key, and the data is in dbt */ + if ((t_ret = __bam_compress_salvage(dbp, vdp, + handle, callback, last_key, &dbt)) != 0) { + if (t_ret == DB_VERIFY_FATAL) { + if (ret == 0) + ret = DB_VERIFY_BAD; + if (!LF_ISSET(DB_AGGRESSIVE)) + goto err; + } else if (ret == 0) { + ret = t_ret; + goto err; + } + } + } else { + if (key == NULL && i % P_INDX == 0) { + if (t_ret == 0) { + if ((ret = __os_realloc(env, + dbt.size, &kcpy.data)) != 0) + goto err; + memcpy(kcpy.data, dbt.data, + dbt.size); + kcpy.size = dbt.size; + last_key = &kcpy; + } else + last_key = NULL; + } +#endif + + if ((t_ret = __db_vrfy_prdbt( + t_ret == 0 ? &dbt : &unknown_key, + 0, " ", handle, callback, 0, 0, vdp)) + != 0 && ret == 0) + ret = t_ret; +#ifdef HAVE_COMPRESSION + } +#endif + break; + default: + /* + * We should never get here; __db_vrfy_inpitem should + * not be returning 0 if bk->type is unrecognizable. + */ + t_ret = __db_unknown_path(env, "__bam_salvage"); + if (ret == 0) + ret = t_ret; + goto err; + } + + /* + * If we're being aggressive, mark the beginning and end of + * the item; we'll come back and print whatever "junk" is in + * the gaps in case we had any bogus inp elements and thereby + * missed stuff. + */ + if (LF_ISSET(DB_AGGRESSIVE) && pgtype != P_IBTREE) { + pgmap[beg] = VRFY_ITEM_BEGIN; + pgmap[end] = VRFY_ITEM_END; + } + } + +err: if (pgmap != NULL) + __os_free(env, pgmap); + if (ovflbuf != NULL) + __os_free(env, ovflbuf); + if (repldbt.data != NULL) + __os_free(env, repldbt.data); +#ifdef HAVE_COMPRESSION + if (kcpy.data != NULL) + __os_free(env, kcpy.data); +#endif + + /* Mark this page as done. */ + if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __bam_salvage_walkdupint -- + * Walk a known-good btree or recno internal page which is part of + * a dup tree, calling __db_salvage_duptree on each child page. + * + * PUBLIC: int __bam_salvage_walkdupint __P((DB *, VRFY_DBINFO *, PAGE *, + * PUBLIC: DBT *, void *, int (*)(void *, const void *), u_int32_t)); + */ +int +__bam_salvage_walkdupint(dbp, vdp, h, key, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *h; + DBT *key; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + BINTERNAL *bi; + ENV *env; + RINTERNAL *ri; + int ret, t_ret; + db_indx_t i; + + env = dbp->env; + ret = 0; + + for (i = 0; i < NUM_ENT(h); i++) { + switch (TYPE(h)) { + case P_IBTREE: + bi = GET_BINTERNAL(dbp, h, i); + if ((t_ret = __db_salvage_duptree(dbp, + vdp, bi->pgno, key, handle, callback, flags)) != 0) + ret = t_ret; + break; + case P_IRECNO: + ri = GET_RINTERNAL(dbp, h, i); + if ((t_ret = __db_salvage_duptree(dbp, + vdp, ri->pgno, key, handle, callback, flags)) != 0) + ret = t_ret; + break; + default: + return (__db_unknown_path( + env, "__bam_salvage_walkdupint")); + } + /* Pass DB_SA_SKIPFIRSTKEY, if set, on to the 0th child only. */ + flags &= ~LF_ISSET(DB_SA_SKIPFIRSTKEY); + } + + return (ret); +} + +/* + * __bam_meta2pgset -- + * Given a known-good meta page, return in pgsetp a 0-terminated list of + * db_pgno_t's corresponding to the pages in the btree. + * + * We do this by a somewhat sleazy method, to avoid having to traverse the + * btree structure neatly: we walk down the left side to the very + * first leaf page, then we mark all the pages in the chain of + * NEXT_PGNOs (being wary of cycles and invalid ones), then we + * consolidate our scratch array into a nice list, and return. This + * avoids the memory management hassles of recursion and the + * trouble of walking internal pages--they just don't matter, except + * for the left branch. + * + * PUBLIC: int __bam_meta2pgset __P((DB *, VRFY_DBINFO *, BTMETA *, + * PUBLIC: u_int32_t, DB *)); + */ +int +__bam_meta2pgset(dbp, vdp, btmeta, flags, pgset) + DB *dbp; + VRFY_DBINFO *vdp; + BTMETA *btmeta; + u_int32_t flags; + DB *pgset; +{ + BINTERNAL *bi; + DB_MPOOLFILE *mpf; + PAGE *h; + RINTERNAL *ri; + db_pgno_t current, p; + int err_ret, ret; + + DB_ASSERT(dbp->env, pgset != NULL); + + mpf = dbp->mpf; + h = NULL; + ret = err_ret = 0; + + for (current = btmeta->root;;) { + if (!IS_VALID_PGNO(current) || current == PGNO(btmeta)) { + err_ret = DB_VERIFY_BAD; + goto err; + } + if ((ret = __memp_fget(mpf, ¤t, + vdp->thread_info, NULL, 0, &h)) != 0) { + err_ret = ret; + goto err; + } + + switch (TYPE(h)) { + case P_IBTREE: + case P_IRECNO: + if ((ret = __bam_vrfy(dbp, + vdp, h, current, flags | DB_NOORDERCHK)) != 0) { + err_ret = ret; + goto err; + } + if (TYPE(h) == P_IBTREE) { + bi = GET_BINTERNAL(dbp, h, 0); + current = bi->pgno; + } else { /* P_IRECNO */ + ri = GET_RINTERNAL(dbp, h, 0); + current = ri->pgno; + } + break; + case P_LBTREE: + case P_LRECNO: + goto traverse; + default: + err_ret = DB_VERIFY_BAD; + goto err; + } + + if ((ret = __memp_fput(mpf, + vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0) + err_ret = ret; + h = NULL; + } + + /* + * At this point, current is the pgno of leaf page h, the 0th in the + * tree we're concerned with. + */ +traverse: + while (IS_VALID_PGNO(current) && current != PGNO_INVALID) { + if (h == NULL && (ret = __memp_fget(mpf, + ¤t, vdp->thread_info, NULL, 0, &h)) != 0) { + err_ret = ret; + break; + } + + if ((ret = __db_vrfy_pgset_get(pgset, + vdp->thread_info, vdp->txn, current, (int *)&p)) != 0) + goto err; + + if (p != 0) { + /* + * We've found a cycle. Return success anyway-- + * our caller may as well use however much of + * the pgset we've come up with. + */ + break; + } + if ((ret = __db_vrfy_pgset_inc( + pgset, vdp->thread_info, vdp->txn, current)) != 0) + goto err; + + current = NEXT_PGNO(h); + if ((ret = __memp_fput(mpf, + vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0) + err_ret = ret; + h = NULL; + } + +err: if (h != NULL) + (void)__memp_fput(mpf, + vdp->thread_info, h, DB_PRIORITY_UNCHANGED); + + return (ret == 0 ? err_ret : ret); +} + +/* + * __bam_safe_getdata -- + * + * Utility function for __bam_vrfy_itemorder. Safely gets the datum at + * index i, page h, and sticks it in DBT dbt. If ovflok is 1 and i's an + * overflow item, we do a safe_goff to get the item and signal that we need + * to free dbt->data; if ovflok is 0, we leaves the DBT zeroed. + */ +static int +__bam_safe_getdata(dbp, ip, h, i, ovflok, dbt, freedbtp) + DB *dbp; + DB_THREAD_INFO *ip; + PAGE *h; + u_int32_t i; + int ovflok; + DBT *dbt; + int *freedbtp; +{ + BKEYDATA *bk; + BOVERFLOW *bo; + DBC *dbc; + int ret; + + memset(dbt, 0, sizeof(DBT)); + *freedbtp = 0; + + bk = GET_BKEYDATA(dbp, h, i); + if (B_TYPE(bk->type) == B_OVERFLOW) { + if (!ovflok) + return (0); + + if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE, + PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0) + return (ret); + bo = (BOVERFLOW *)bk; + F_SET(dbt, DB_DBT_MALLOC); + + *freedbtp = 1; + return (__db_goff(dbc, dbt, bo->tlen, bo->pgno, NULL, NULL)); + } else { + dbt->data = bk->data; + dbt->size = bk->len; + } + + return (0); +} diff --git a/src/btree/btree.src b/src/btree/btree.src new file mode 100644 index 00000000..6155d902 --- /dev/null +++ b/src/btree/btree.src @@ -0,0 +1,290 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +DBPRIVATE +PREFIX __bam + +INCLUDE #include "db_int.h" +INCLUDE #include "dbinc/crypto.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/btree.h" +INCLUDE #include "dbinc/txn.h" +INCLUDE + +/* + * BTREE-split: used to log a page split. + * + * left: the page number for the low-order contents. + * llsn: the left page's original LSN. + * right: the page number for the high-order contents. + * rlsn: the right page's original LSN. + * indx: the number of entries that went to the left page. + * npgno: the next page number + * nlsn: the next page's original LSN (or 0 if no next page). + * pgno: the parent page number + * plsn: the parent page's original LSN. + * pg: the split page's contents before the split. + * opflags: SPL_NRECS: if splitting a tree that maintains a record count. + * pindx: index of new record in parent page. + */ +BEGIN split 50 62 +DB fileid int32_t ld +OP opflags u_int32_t lu +ARG left db_pgno_t lu +POINTER llsn DB_LSN * lu +ARG right db_pgno_t lu +POINTER rlsn DB_LSN * lu +ARG indx u_int32_t lu +ARG npgno db_pgno_t lu +POINTER nlsn DB_LSN * lu +ARG ppgno db_pgno_t lu +POINTER plsn DB_LSN * lu +ARG pindx u_int32_t lu +PGDBT pg DBT s +HDR pentry DBT s +HDR rentry DBT s +END + +BEGIN_COMPAT split 48 62 +DB fileid int32_t ld +ARG left db_pgno_t lu +POINTER llsn DB_LSN * lu +ARG right db_pgno_t lu +POINTER rlsn DB_LSN * lu +ARG indx u_int32_t lu +ARG npgno db_pgno_t lu +POINTER nlsn DB_LSN * lu +ARG ppgno db_pgno_t lu +POINTER plsn DB_LSN * lu +ARG pindx u_int32_t lu +PGDBT pg DBT s +DBT pentry DBT s +DBT rentry DBT s +ARG opflags u_int32_t lu +END + +BEGIN_COMPAT split 42 62 +DB fileid int32_t ld +ARG left db_pgno_t lu +POINTER llsn DB_LSN * lu +ARG right db_pgno_t lu +POINTER rlsn DB_LSN * lu +ARG indx u_int32_t lu +ARG npgno db_pgno_t lu +POINTER nlsn DB_LSN * lu +ARG root_pgno db_pgno_t lu +PGDBT pg DBT s +ARG opflags u_int32_t lu +END + +/* + * BTREE-rsplit: used to log a reverse-split + * + * pgno: the page number of the page copied over the root. + * pgdbt: the page being copied on the root page. + * root_pgno: the root page number. + * nrec: the tree's record count. + * rootent: last entry on the root page. + * rootlsn: the root page's original lsn. + */ +BEGIN rsplit 42 63 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +PGDBT pgdbt DBT s +ARG root_pgno db_pgno_t lu +ARG nrec db_pgno_t lu +DBT rootent DBT s +POINTER rootlsn DB_LSN * lu +END + +/* + * BTREE-adj: used to log the adjustment of an index. + * + * pgno: the page modified. + * lsn: the page's original lsn. + * indx: the index adjusted. + * indx_copy: the index to copy if inserting. + * is_insert: 0 if a delete, 1 if an insert. + */ +BEGIN adj 42 55 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER lsn DB_LSN * lu +ARG indx u_int32_t lu +ARG indx_copy u_int32_t lu +ARG is_insert u_int32_t lu +END + +/* + * BTREE-cadjust: used to adjust the count change in an internal page. + * + * pgno: the page modified. + * lsn: the page's original lsn. + * indx: the index to be adjusted. + * adjust: the signed adjustment. + * opflags: CAD_UPDATEROOT: if root page count was adjusted. + */ +BEGIN cadjust 42 56 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER lsn DB_LSN * lu +ARG indx u_int32_t lu +ARG adjust int32_t ld +ARG opflags u_int32_t lu +END + +/* + * BTREE-cdel: used to log the intent-to-delete of a cursor record. + * + * pgno: the page modified. + * lsn: the page's original lsn. + * indx: the index to be deleted. + */ +BEGIN cdel 42 57 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER lsn DB_LSN * lu +ARG indx u_int32_t lu +END + +/* + * BTREE-repl: used to log the replacement of an item. + * + * pgno: the page modified. + * lsn: the page's original lsn. + * indx: the index to be replaced. + * isdeleted: set if the record was previously deleted. + * orig: the original data. + * repl: the replacement data. + * prefix: the prefix of the replacement that matches the original. + * suffix: the suffix of the replacement that matches the original. + */ +BEGIN repl 42 58 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER lsn DB_LSN * lu +ARG indx u_int32_t lu +ARG isdeleted u_int32_t lu +DBT orig DBT s +DBT repl DBT s +ARG prefix u_int32_t lu +ARG suffix u_int32_t lu +END + +/* + * BTREE-irep: used to log the replacement of an item on an internal page. + * + * pgno: the page modified. + * lsn: the page's original lsn. + * indx: the index to be replaced. + * ptype: type of the page. + * hdr: header of the record. + * data: data of the record. + */ +BEGIN irep 50 67 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER lsn DB_LSN * lu +ARG indx u_int32_t lu +OP ptype u_int32_t lu +HDR hdr DBT s +DATA data DBT s +HDR old DBT s +END + +/* + * BTREE-root: log the assignment of a root btree page. + */ +BEGIN root 42 59 +DB fileid int32_t ld +ARG meta_pgno db_pgno_t lu +ARG root_pgno db_pgno_t lu +POINTER meta_lsn DB_LSN * lu +END + +/* + * BTREE-curadj: undo cursor adjustments on txn abort. + * Should only be processed during DB_TXN_ABORT. + * NOTE: the first_indx field gets used to hold + * signed index adjustment in one case. + * care should be taken if its size is changed. + */ +BEGIN curadj 42 64 +/* Fileid of db affected. */ +DB fileid int32_t ld +/* Which adjustment. */ +ARG mode db_ca_mode ld +/* Page entry is from. */ +ARG from_pgno db_pgno_t lu +/* Page entry went to. */ +ARG to_pgno db_pgno_t lu +/* Left page of root split. */ +ARG left_pgno db_pgno_t lu +/* First index of dup set. Also used as adjustment. */ +ARG first_indx u_int32_t lu +/* Index entry is from. */ +ARG from_indx u_int32_t lu +/* Index where entry went. */ +ARG to_indx u_int32_t lu +END + +/* + * BTREE-rcuradj: undo cursor adjustments on txn abort in + * renumbering recno trees. + * Should only be processed during DB_TXN_ABORT. + */ +BEGIN rcuradj 42 65 +/* Fileid of db affected. */ +DB fileid int32_t ld +/* Which adjustment. */ +ARG mode ca_recno_arg ld +/* Root page number. */ +ARG root db_pgno_t ld +/* Recno of the adjustment. */ +ARG recno db_recno_t ld +/* Order number of the adjustment. */ +ARG order u_int32_t lu +END + +/* + * BTREE-relink -- Handles relinking around a deleted leaf page. + * Current routine moved to __db_relink. + * + */ +BEGIN_COMPAT relink 43 147 +/* Fileid of db affected. */ +DB fileid int32_t ld +/* The page being removed. */ +ARG pgno db_pgno_t lu +/* The page's original lsn. */ +POINTER lsn DB_LSN * lu +/* The previous page. */ +ARG prev db_pgno_t lu +/* The previous page's original lsn. */ +POINTER lsn_prev DB_LSN * lu +/* The next page. */ +ARG next db_pgno_t lu +/* The previous page's original lsn. */ +POINTER lsn_next DB_LSN * lu +END + +/* + * BTREE-merge -- Handles merging of pages during a compaction. + * Current routine moved to __db_merge. + */ +BEGIN_COMPAT merge 44 148 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER lsn DB_LSN * lu +ARG npgno db_pgno_t lu +POINTER nlsn DB_LSN * lu +DBT hdr DBT s +DBT data DBT s +DBT ind DBT s +END diff --git a/src/btree/btree_auto.c b/src/btree/btree_auto.c new file mode 100644 index 00000000..e5e148c5 --- /dev/null +++ b/src/btree/btree_auto.c @@ -0,0 +1,207 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/btree.h" +#include "dbinc/txn.h" + +DB_LOG_RECSPEC __bam_split_desc[] = { + {LOGREC_DB, SSZ(__bam_split_args, fileid), "fileid", ""}, + {LOGREC_OP, SSZ(__bam_split_args, opflags), "opflags", "%lu"}, + {LOGREC_ARG, SSZ(__bam_split_args, left), "left", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_split_args, llsn), "llsn", ""}, + {LOGREC_ARG, SSZ(__bam_split_args, right), "right", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_split_args, rlsn), "rlsn", ""}, + {LOGREC_ARG, SSZ(__bam_split_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__bam_split_args, npgno), "npgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_split_args, nlsn), "nlsn", ""}, + {LOGREC_ARG, SSZ(__bam_split_args, ppgno), "ppgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_split_args, plsn), "plsn", ""}, + {LOGREC_ARG, SSZ(__bam_split_args, pindx), "pindx", "%lu"}, + {LOGREC_PGDBT, SSZ(__bam_split_args, pg), "pg", ""}, + {LOGREC_HDR, SSZ(__bam_split_args, pentry), "pentry", ""}, + {LOGREC_HDR, SSZ(__bam_split_args, rentry), "rentry", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __bam_split_48_desc[] = { + {LOGREC_DB, SSZ(__bam_split_48_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__bam_split_48_args, left), "left", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_split_48_args, llsn), "llsn", ""}, + {LOGREC_ARG, SSZ(__bam_split_48_args, right), "right", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_split_48_args, rlsn), "rlsn", ""}, + {LOGREC_ARG, SSZ(__bam_split_48_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__bam_split_48_args, npgno), "npgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_split_48_args, nlsn), "nlsn", ""}, + {LOGREC_ARG, SSZ(__bam_split_48_args, ppgno), "ppgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_split_48_args, plsn), "plsn", ""}, + {LOGREC_ARG, SSZ(__bam_split_48_args, pindx), "pindx", "%lu"}, + {LOGREC_PGDBT, SSZ(__bam_split_48_args, pg), "pg", ""}, + {LOGREC_DBT, SSZ(__bam_split_48_args, pentry), "pentry", ""}, + {LOGREC_DBT, SSZ(__bam_split_48_args, rentry), "rentry", ""}, + {LOGREC_ARG, SSZ(__bam_split_48_args, opflags), "opflags", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __bam_split_42_desc[] = { + {LOGREC_DB, SSZ(__bam_split_42_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__bam_split_42_args, left), "left", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_split_42_args, llsn), "llsn", ""}, + {LOGREC_ARG, SSZ(__bam_split_42_args, right), "right", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_split_42_args, rlsn), "rlsn", ""}, + {LOGREC_ARG, SSZ(__bam_split_42_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__bam_split_42_args, npgno), "npgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_split_42_args, nlsn), "nlsn", ""}, + {LOGREC_ARG, SSZ(__bam_split_42_args, root_pgno), "root_pgno", "%lu"}, + {LOGREC_PGDBT, SSZ(__bam_split_42_args, pg), "pg", ""}, + {LOGREC_ARG, SSZ(__bam_split_42_args, opflags), "opflags", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __bam_rsplit_desc[] = { + {LOGREC_DB, SSZ(__bam_rsplit_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__bam_rsplit_args, pgno), "pgno", "%lu"}, + {LOGREC_PGDBT, SSZ(__bam_rsplit_args, pgdbt), "pgdbt", ""}, + {LOGREC_ARG, SSZ(__bam_rsplit_args, root_pgno), "root_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__bam_rsplit_args, nrec), "nrec", "%lu"}, + {LOGREC_DBT, SSZ(__bam_rsplit_args, rootent), "rootent", ""}, + {LOGREC_POINTER, SSZ(__bam_rsplit_args, rootlsn), "rootlsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __bam_adj_desc[] = { + {LOGREC_DB, SSZ(__bam_adj_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__bam_adj_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_adj_args, lsn), "lsn", ""}, + {LOGREC_ARG, SSZ(__bam_adj_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__bam_adj_args, indx_copy), "indx_copy", "%lu"}, + {LOGREC_ARG, SSZ(__bam_adj_args, is_insert), "is_insert", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __bam_cadjust_desc[] = { + {LOGREC_DB, SSZ(__bam_cadjust_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__bam_cadjust_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_cadjust_args, lsn), "lsn", ""}, + {LOGREC_ARG, SSZ(__bam_cadjust_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__bam_cadjust_args, adjust), "adjust", "%ld"}, + {LOGREC_ARG, SSZ(__bam_cadjust_args, opflags), "opflags", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __bam_cdel_desc[] = { + {LOGREC_DB, SSZ(__bam_cdel_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__bam_cdel_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_cdel_args, lsn), "lsn", ""}, + {LOGREC_ARG, SSZ(__bam_cdel_args, indx), "indx", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __bam_repl_desc[] = { + {LOGREC_DB, SSZ(__bam_repl_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__bam_repl_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_repl_args, lsn), "lsn", ""}, + {LOGREC_ARG, SSZ(__bam_repl_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__bam_repl_args, isdeleted), "isdeleted", "%lu"}, + {LOGREC_DBT, SSZ(__bam_repl_args, orig), "orig", ""}, + {LOGREC_DBT, SSZ(__bam_repl_args, repl), "repl", ""}, + {LOGREC_ARG, SSZ(__bam_repl_args, prefix), "prefix", "%lu"}, + {LOGREC_ARG, SSZ(__bam_repl_args, suffix), "suffix", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __bam_irep_desc[] = { + {LOGREC_DB, SSZ(__bam_irep_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__bam_irep_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_irep_args, lsn), "lsn", ""}, + {LOGREC_ARG, SSZ(__bam_irep_args, indx), "indx", "%lu"}, + {LOGREC_OP, SSZ(__bam_irep_args, ptype), "ptype", "%lu"}, + {LOGREC_HDR, SSZ(__bam_irep_args, hdr), "hdr", ""}, + {LOGREC_DATA, SSZ(__bam_irep_args, data), "data", ""}, + {LOGREC_HDR, SSZ(__bam_irep_args, old), "old", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __bam_root_desc[] = { + {LOGREC_DB, SSZ(__bam_root_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__bam_root_args, meta_pgno), "meta_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__bam_root_args, root_pgno), "root_pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_root_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __bam_curadj_desc[] = { + {LOGREC_DB, SSZ(__bam_curadj_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__bam_curadj_args, mode), "mode", "%ld"}, + {LOGREC_ARG, SSZ(__bam_curadj_args, from_pgno), "from_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__bam_curadj_args, to_pgno), "to_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__bam_curadj_args, left_pgno), "left_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__bam_curadj_args, first_indx), "first_indx", "%lu"}, + {LOGREC_ARG, SSZ(__bam_curadj_args, from_indx), "from_indx", "%lu"}, + {LOGREC_ARG, SSZ(__bam_curadj_args, to_indx), "to_indx", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __bam_rcuradj_desc[] = { + {LOGREC_DB, SSZ(__bam_rcuradj_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__bam_rcuradj_args, mode), "mode", "%ld"}, + {LOGREC_ARG, SSZ(__bam_rcuradj_args, root), "root", "%ld"}, + {LOGREC_ARG, SSZ(__bam_rcuradj_args, recno), "recno", "%ld"}, + {LOGREC_ARG, SSZ(__bam_rcuradj_args, order), "order", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __bam_relink_43_desc[] = { + {LOGREC_DB, SSZ(__bam_relink_43_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__bam_relink_43_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_relink_43_args, lsn), "lsn", ""}, + {LOGREC_ARG, SSZ(__bam_relink_43_args, prev), "prev", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_relink_43_args, lsn_prev), "lsn_prev", ""}, + {LOGREC_ARG, SSZ(__bam_relink_43_args, next), "next", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_relink_43_args, lsn_next), "lsn_next", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __bam_merge_44_desc[] = { + {LOGREC_DB, SSZ(__bam_merge_44_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__bam_merge_44_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_merge_44_args, lsn), "lsn", ""}, + {LOGREC_ARG, SSZ(__bam_merge_44_args, npgno), "npgno", "%lu"}, + {LOGREC_POINTER, SSZ(__bam_merge_44_args, nlsn), "nlsn", ""}, + {LOGREC_DBT, SSZ(__bam_merge_44_args, hdr), "hdr", ""}, + {LOGREC_DBT, SSZ(__bam_merge_44_args, data), "data", ""}, + {LOGREC_DBT, SSZ(__bam_merge_44_args, ind), "ind", ""}, + {LOGREC_Done, 0, "", ""} +}; +/* + * PUBLIC: int __bam_init_recover __P((ENV *, DB_DISTAB *)); + */ +int +__bam_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_split_recover, DB___bam_split)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_rsplit_recover, DB___bam_rsplit)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_adj_recover, DB___bam_adj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_cadjust_recover, DB___bam_cadjust)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_cdel_recover, DB___bam_cdel)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_repl_recover, DB___bam_repl)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_irep_recover, DB___bam_irep)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_root_recover, DB___bam_root)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_curadj_recover, DB___bam_curadj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_rcuradj_recover, DB___bam_rcuradj)) != 0) + return (ret); + return (0); +} diff --git a/src/btree/btree_autop.c b/src/btree/btree_autop.c new file mode 100644 index 00000000..d2bee7d0 --- /dev/null +++ b/src/btree/btree_autop.c @@ -0,0 +1,291 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/btree.h" +#include "dbinc/txn.h" + +/* + * PUBLIC: int __bam_split_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_split_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_split", __bam_split_desc, info)); +} + +/* + * PUBLIC: int __bam_split_48_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_split_48_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_split_48", __bam_split_48_desc, info)); +} + +/* + * PUBLIC: int __bam_split_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_split_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_split_42", __bam_split_42_desc, info)); +} + +/* + * PUBLIC: int __bam_rsplit_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_rsplit_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_rsplit", __bam_rsplit_desc, info)); +} + +/* + * PUBLIC: int __bam_adj_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_adj_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_adj", __bam_adj_desc, info)); +} + +/* + * PUBLIC: int __bam_cadjust_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_cadjust_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_cadjust", __bam_cadjust_desc, info)); +} + +/* + * PUBLIC: int __bam_cdel_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_cdel_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_cdel", __bam_cdel_desc, info)); +} + +/* + * PUBLIC: int __bam_repl_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_repl_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_repl", __bam_repl_desc, info)); +} + +/* + * PUBLIC: int __bam_irep_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_irep_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_irep", __bam_irep_desc, info)); +} + +/* + * PUBLIC: int __bam_root_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_root_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_root", __bam_root_desc, info)); +} + +/* + * PUBLIC: int __bam_curadj_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_curadj_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_curadj", __bam_curadj_desc, info)); +} + +/* + * PUBLIC: int __bam_rcuradj_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_rcuradj_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_rcuradj", __bam_rcuradj_desc, info)); +} + +/* + * PUBLIC: int __bam_relink_43_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_relink_43_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_relink_43", __bam_relink_43_desc, info)); +} + +/* + * PUBLIC: int __bam_merge_44_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_merge_44_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__bam_merge_44", __bam_merge_44_desc, info)); +} + +/* + * PUBLIC: int __bam_init_print __P((ENV *, DB_DISTAB *)); + */ +int +__bam_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_split_print, DB___bam_split)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_rsplit_print, DB___bam_rsplit)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_adj_print, DB___bam_adj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_cadjust_print, DB___bam_cadjust)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_cdel_print, DB___bam_cdel)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_repl_print, DB___bam_repl)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_irep_print, DB___bam_irep)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_root_print, DB___bam_root)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_curadj_print, DB___bam_curadj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_rcuradj_print, DB___bam_rcuradj)) != 0) + return (ret); + return (0); +} diff --git a/src/clib/atoi.c b/src/clib/atoi.c new file mode 100644 index 00000000..d064ffb0 --- /dev/null +++ b/src/clib/atoi.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * atoi -- + * + * PUBLIC: #ifndef HAVE_ATOI + * PUBLIC: int atoi __P((const char *)); + * PUBLIC: #endif + */ +int +atoi(str) + const char *str; +{ + return (int)strtol(str, (char **)NULL, 10); +} diff --git a/src/clib/atol.c b/src/clib/atol.c new file mode 100644 index 00000000..9aefcd5a --- /dev/null +++ b/src/clib/atol.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * atol -- + * + * PUBLIC: #ifndef HAVE_ATOL + * PUBLIC: long atol __P((const char *)); + * PUBLIC: #endif + */ +long +atol(str) + const char *str; +{ + return strtol(str, (char **)NULL, 10); +} diff --git a/src/clib/bsearch.c b/src/clib/bsearch.c new file mode 100644 index 00000000..b59ce5fd --- /dev/null +++ b/src/clib/bsearch.c @@ -0,0 +1,38 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * bsearch -- + * + * PUBLIC: #ifndef HAVE_BSEARCH + * PUBLIC: void *bsearch __P((const void *, const void *, size_t, + * PUBLIC: size_t, int (*)(const void *, const void *))); + * PUBLIC: #endif + */ + +void *bsearch(key, base, nmemb, size, cmp) + const void *key; + const void *base; + size_t nmemb; + size_t size; + int (*cmp) __P((const void *, const void *)); +{ + size_t i; + + /* not doing a binary search, but searching linearly */ + for (i=0; i < nmemb; i++) { + if (*(pid_t*)key - *((pid_t*)base + i) == 0) + return ((pid_t*)base + i); + } + + return (NULL); +} diff --git a/src/clib/getcwd.c b/src/clib/getcwd.c new file mode 100644 index 00000000..5490f587 --- /dev/null +++ b/src/clib/getcwd.c @@ -0,0 +1,261 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1989, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#if HAVE_DIRENT_H +# include +# define NAMLEN(dirent) strlen((dirent)->d_name) +#else +# define dirent direct +# define NAMLEN(dirent) (dirent)->d_namlen +# if HAVE_SYS_NDIR_H +# include +# endif +# if HAVE_SYS_DIR_H +# include +# endif +# if HAVE_NDIR_H +# include +# endif +#endif +#endif + +#define ISDOT(dp) \ + (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || \ + (dp->d_name[1] == '.' && dp->d_name[2] == '\0'))) + +#ifndef dirfd +#define dirfd(dirp) ((dirp)->dd_fd) +#endif + +/* + * getcwd -- + * Get the current working directory. + * + * PUBLIC: #ifndef HAVE_GETCWD + * PUBLIC: char *getcwd __P((char *, size_t)); + * PUBLIC: #endif + */ +char * +getcwd(pt, size) + char *pt; + size_t size; +{ + register struct dirent *dp; + register DIR *dir; + register dev_t dev; + register ino_t ino; + register int first; + register char *bpt, *bup; + struct stat s; + dev_t root_dev; + ino_t root_ino; + size_t ptsize, upsize; + int ret, save_errno; + char *ept, *eup, *up; + + /* + * If no buffer specified by the user, allocate one as necessary. + * If a buffer is specified, the size has to be non-zero. The path + * is built from the end of the buffer backwards. + */ + if (pt) { + ptsize = 0; + if (!size) { + __os_set_errno(EINVAL); + return (NULL); + } + if (size == 1) { + __os_set_errno(ERANGE); + return (NULL); + } + ept = pt + size; + } else { + if ((ret = + __os_malloc(NULL, ptsize = 1024 - 4, &pt)) != 0) { + __os_set_errno(ret); + return (NULL); + } + ept = pt + ptsize; + } + bpt = ept - 1; + *bpt = '\0'; + + /* + * Allocate bytes (1024 - malloc space) for the string of "../"'s. + * Should always be enough (it's 340 levels). If it's not, allocate + * as necessary. Special case the first stat, it's ".", not "..". + */ + if ((ret = __os_malloc(NULL, upsize = 1024 - 4, &up)) != 0) + goto err; + eup = up + 1024; + bup = up; + up[0] = '.'; + up[1] = '\0'; + + /* Save root values, so know when to stop. */ + if (stat("/", &s)) + goto err; + root_dev = s.st_dev; + root_ino = s.st_ino; + + __os_set_errno(0); /* XXX readdir has no error return. */ + + for (first = 1;; first = 0) { + /* Stat the current level. */ + if (lstat(up, &s)) + goto err; + + /* Save current node values. */ + ino = s.st_ino; + dev = s.st_dev; + + /* Check for reaching root. */ + if (root_dev == dev && root_ino == ino) { + *--bpt = PATH_SEPARATOR[0]; + /* + * It's unclear that it's a requirement to copy the + * path to the beginning of the buffer, but it's always + * been that way and stuff would probably break. + */ + bcopy(bpt, pt, ept - bpt); + __os_free(NULL, up); + return (pt); + } + + /* + * Build pointer to the parent directory, allocating memory + * as necessary. Max length is 3 for "../", the largest + * possible component name, plus a trailing NULL. + */ + if (bup + 3 + MAXNAMLEN + 1 >= eup) { + if (__os_realloc(NULL, upsize *= 2, &up) != 0) + goto err; + bup = up; + eup = up + upsize; + } + *bup++ = '.'; + *bup++ = '.'; + *bup = '\0'; + + /* Open and stat parent directory. */ + if (!(dir = opendir(up)) || fstat(dirfd(dir), &s)) + goto err; + + /* Add trailing slash for next directory. */ + *bup++ = PATH_SEPARATOR[0]; + + /* + * If it's a mount point, have to stat each element because + * the inode number in the directory is for the entry in the + * parent directory, not the inode number of the mounted file. + */ + save_errno = 0; + if (s.st_dev == dev) { + for (;;) { + if (!(dp = readdir(dir))) + goto notfound; + if (dp->d_fileno == ino) + break; + } + } else + for (;;) { + if (!(dp = readdir(dir))) + goto notfound; + if (ISDOT(dp)) + continue; + bcopy(dp->d_name, bup, dp->d_namlen + 1); + + /* Save the first error for later. */ + if (lstat(up, &s)) { + if (save_errno == 0) + save_errno = __os_get_errno(); + __os_set_errno(0); + continue; + } + if (s.st_dev == dev && s.st_ino == ino) + break; + } + + /* + * Check for length of the current name, preceding slash, + * leading slash. + */ + if (bpt - pt < dp->d_namlen + (first ? 1 : 2)) { + size_t len, off; + + if (!ptsize) { + __os_set_errno(ERANGE); + goto err; + } + off = bpt - pt; + len = ept - bpt; + if (__os_realloc(NULL, ptsize *= 2, &pt) != 0) + goto err; + bpt = pt + off; + ept = pt + ptsize; + bcopy(bpt, ept - len, len); + bpt = ept - len; + } + if (!first) + *--bpt = PATH_SEPARATOR[0]; + bpt -= dp->d_namlen; + bcopy(dp->d_name, bpt, dp->d_namlen); + (void)closedir(dir); + + /* Truncate any file name. */ + *bup = '\0'; + } + +notfound: + /* + * If readdir set errno, use it, not any saved error; otherwise, + * didn't find the current directory in its parent directory, set + * errno to ENOENT. + */ + if (__os_get_errno_ret_zero() == 0) + __os_set_errno(save_errno == 0 ? ENOENT : save_errno); + /* FALLTHROUGH */ +err: + if (ptsize) + __os_free(NULL, pt); + __os_free(NULL, up); + return (NULL); +} diff --git a/src/clib/getopt.c b/src/clib/getopt.c new file mode 100644 index 00000000..1ee2321e --- /dev/null +++ b/src/clib/getopt.c @@ -0,0 +1,153 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1987, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +/* + * Avoid inclusion of internal header files as this + * file is used by example code. + * + * Unconditional inclusion of stdio and string are + * OK in this file. It will work on all platforms + * for which this file is used + */ +extern char *__db_rpath(const char *); +#include +#include + +int __db_getopt_reset; /* global reset for VxWorks. */ + +int opterr = 1, /* if error message should be printed */ + optind = 1, /* index into parent argv vector */ + optopt, /* character checked for validity */ + optreset; /* reset getopt */ +char *optarg; /* argument associated with option */ + +#undef BADCH +#define BADCH (int)'?' +#undef BADARG +#define BADARG (int)':' +#undef EMSG +#define EMSG "" + +/* + * getopt -- + * Parse argc/argv argument vector. + * + * PUBLIC: #ifndef HAVE_GETOPT + * PUBLIC: int getopt __P((int, char * const *, const char *)); + * PUBLIC: #endif + */ +int +getopt(nargc, nargv, ostr) + int nargc; + char * const *nargv; + const char *ostr; +{ + static char *progname; + static char *place = EMSG; /* option letter processing */ + char *oli; /* option letter list index */ + + /* + * VxWorks needs to be able to repeatedly call getopt from multiple + * programs within its global name space. + */ + if (__db_getopt_reset) { + __db_getopt_reset = 0; + + opterr = optind = 1; + optopt = optreset = 0; + optarg = NULL; + progname = NULL; + place = EMSG; + } + if (!progname) { + if ((progname = __db_rpath(*nargv)) == NULL) + progname = *nargv; + else + ++progname; + } + + if (optreset || !*place) { /* update scanning pointer */ + optreset = 0; + if (optind >= nargc || *(place = nargv[optind]) != '-') { + place = EMSG; + return (EOF); + } + if (place[1] && *++place == '-') { /* found "--" */ + ++optind; + place = EMSG; + return (EOF); + } + } /* option letter okay? */ + if ((optopt = (int)*place++) == (int)':' || + !(oli = strchr(ostr, optopt))) { + /* + * if the user didn't specify '-' as an option, + * assume it means EOF. + */ + if (optopt == (int)'-') + return (EOF); + if (!*place) + ++optind; + if (opterr && *ostr != ':') + (void)fprintf(stderr, + "%s: illegal option -- %c\n", progname, optopt); + return (BADCH); + } + if (*++oli != ':') { /* don't need argument */ + optarg = NULL; + if (!*place) + ++optind; + } + else { /* need an argument */ + if (*place) /* no white space */ + optarg = place; + else if (nargc <= ++optind) { /* no arg */ + place = EMSG; + if (*ostr == ':') + return (BADARG); + if (opterr) + (void)fprintf(stderr, + "%s: option requires an argument -- %c\n", + progname, optopt); + return (BADCH); + } + else /* white space */ + optarg = nargv[optind]; + place = EMSG; + ++optind; + } + return (optopt); /* dump back option letter */ +} diff --git a/src/clib/isalpha.c b/src/clib/isalpha.c new file mode 100644 index 00000000..352dcf6c --- /dev/null +++ b/src/clib/isalpha.c @@ -0,0 +1,28 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * isalpha -- + * + * PUBLIC: #ifndef HAVE_ISALPHA + * PUBLIC: int isalpha __P((int)); + * PUBLIC: #endif + */ +int +isalpha(c) + int c; +{ + /* + * Depends on ASCII-like character ordering. + */ + return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ? 1 : 0); +} diff --git a/src/clib/isprint.c b/src/clib/isprint.c new file mode 100644 index 00000000..64d79369 --- /dev/null +++ b/src/clib/isprint.c @@ -0,0 +1,28 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * isprint -- + * + * PUBLIC: #ifndef HAVE_ISPRINT + * PUBLIC: int isprint __P((int)); + * PUBLIC: #endif + */ +int +isprint(c) + int c; +{ + /* + * Depends on ASCII character values. + */ + return ((c >= ' ' && c <= '~') ? 1 : 0); +} diff --git a/src/clib/isspace.c b/src/clib/isspace.c new file mode 100644 index 00000000..55a17f30 --- /dev/null +++ b/src/clib/isspace.c @@ -0,0 +1,26 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * isspace -- + * + * PUBLIC: #ifndef HAVE_ISSPACE + * PUBLIC: int isspace __P((int)); + * PUBLIC: #endif + */ +int +isspace(c) + int c; +{ + return (c == '\t' || c == '\n' || + c == '\v' || c == '\f' || c == '\r' || c == ' ' ? 1 : 0); +} diff --git a/src/clib/memcmp.c b/src/clib/memcmp.c new file mode 100644 index 00000000..d402996c --- /dev/null +++ b/src/clib/memcmp.c @@ -0,0 +1,62 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * memcmp -- + * + * PUBLIC: #ifndef HAVE_MEMCMP + * PUBLIC: int memcmp __P((const void *, const void *, size_t)); + * PUBLIC: #endif + */ +int +memcmp(s1, s2, n) + const void *s1, *s2; + size_t n; +{ + if (n != 0) { + unsigned char *p1 = (unsigned char *)s1, + *p2 = (unsigned char *)s2; + do { + if (*p1++ != *p2++) + return (*--p1 - *--p2); + } while (--n != 0); + } + return (0); +} diff --git a/src/clib/memmove.c b/src/clib/memmove.c new file mode 100644 index 00000000..a4bd7d97 --- /dev/null +++ b/src/clib/memmove.c @@ -0,0 +1,150 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * sizeof(word) MUST BE A POWER OF TWO + * SO THAT wmask BELOW IS ALL ONES + */ +typedef int word; /* "word" used for optimal copy speed */ + +#undef wsize +#define wsize sizeof(word) +#undef wmask +#define wmask (wsize - 1) + +/* + * Copy a block of memory, handling overlap. + * This is the routine that actually implements + * (the portable versions of) bcopy, memcpy, and memmove. + */ +#ifdef MEMCOPY +/* + * PUBLIC: #ifndef HAVE_MEMCPY + * PUBLIC: void *memcpy __P((void *, const void *, size_t)); + * PUBLIC: #endif + */ +void * +memcpy(dst0, src0, length) +#else +#ifdef MEMMOVE +/* + * PUBLIC: #ifndef HAVE_MEMMOVE + * PUBLIC: void *memmove __P((void *, const void *, size_t)); + * PUBLIC: #endif + */ +void * +memmove(dst0, src0, length) +#else +void +bcopy(src0, dst0, length) +#endif +#endif + void *dst0; + const void *src0; + register size_t length; +{ + register char *dst = dst0; + register const char *src = src0; + register size_t t; + + if (length == 0 || dst == src) /* nothing to do */ + goto done; + + /* + * Macros: loop-t-times; and loop-t-times, t>0 + */ +#undef TLOOP +#define TLOOP(s) if (t) TLOOP1(s) +#undef TLOOP1 +#define TLOOP1(s) do { s; } while (--t) + + if ((unsigned long)dst < (unsigned long)src) { + /* + * Copy forward. + */ + t = (size_t)src; /* only need low bits */ + if ((t | (size_t)dst) & wmask) { + /* + * Try to align operands. This cannot be done + * unless the low bits match. + */ + if ((t ^ (size_t)dst) & wmask || length < wsize) + t = length; + else + t = wsize - (t & wmask); + length -= t; + TLOOP1(*dst++ = *src++); + } + /* + * Copy whole words, then mop up any trailing bytes. + */ + t = length / wsize; + TLOOP(*(word *)dst = *(word *)src; src += wsize; dst += wsize); + t = length & wmask; + TLOOP(*dst++ = *src++); + } else { + /* + * Copy backwards. Otherwise essentially the same. + * Alignment works as before, except that it takes + * (t&wmask) bytes to align, not wsize-(t&wmask). + */ + src += length; + dst += length; + t = (size_t)src; + if ((t | (size_t)dst) & wmask) { + if ((t ^ (size_t)dst) & wmask || length <= wsize) + t = length; + else + t &= wmask; + length -= t; + TLOOP1(*--dst = *--src); + } + t = length / wsize; + TLOOP(src -= wsize; dst -= wsize; *(word *)dst = *(word *)src); + t = length & wmask; + TLOOP(*--dst = *--src); + } +done: +#if defined(MEMCOPY) || defined(MEMMOVE) + return (dst0); +#else + return; +#endif +} diff --git a/src/clib/printf.c b/src/clib/printf.c new file mode 100644 index 00000000..be8c4818 --- /dev/null +++ b/src/clib/printf.c @@ -0,0 +1,116 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * printf -- + * + * PUBLIC: #ifndef HAVE_PRINTF + * PUBLIC: int printf __P((const char *, ...)); + * PUBLIC: #endif + */ +#ifndef HAVE_PRINTF +int +#ifdef STDC_HEADERS +printf(const char *fmt, ...) +#else +printf(fmt, va_alist) + const char *fmt; + va_dcl +#endif +{ + va_list ap; + size_t len; + char buf[2048]; /* !!!: END OF THE STACK DON'T TRUST SPRINTF. */ + +#ifdef STDC_HEADERS + va_start(ap, fmt); +#else + va_start(ap); +#endif + len = (size_t)vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + /* + * We implement printf/fprintf with fwrite, because Berkeley DB uses + * fwrite in other places. + */ + return (fwrite( + buf, sizeof(char), (size_t)len, stdout) == len ? (int)len: -1); +} +#endif /* HAVE_PRINTF */ + +/* + * fprintf -- + * + * PUBLIC: #ifndef HAVE_PRINTF + * PUBLIC: int fprintf __P((FILE *, const char *, ...)); + * PUBLIC: #endif + */ +#ifndef HAVE_PRINTF +int +#ifdef STDC_HEADERS +fprintf(FILE *fp, const char *fmt, ...) +#else +fprintf(fp, fmt, va_alist) + FILE *fp; + const char *fmt; + va_dcl +#endif +{ + va_list ap; + size_t len; + char buf[2048]; /* !!!: END OF THE STACK DON'T TRUST SPRINTF. */ + +#ifdef STDC_HEADERS + va_start(ap, fmt); +#else + va_start(ap); +#endif + len = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + /* + * We implement printf/fprintf with fwrite, because Berkeley DB uses + * fwrite in other places. + */ + return (fwrite( + buf, sizeof(char), (size_t)len, fp) == len ? (int)len: -1); +} +#endif /* HAVE_PRINTF */ + +/* + * vfprintf -- + * + * PUBLIC: #ifndef HAVE_PRINTF + * PUBLIC: int vfprintf __P((FILE *, const char *, va_list)); + * PUBLIC: #endif + */ +#ifndef HAVE_PRINTF +int +vfprintf(fp, fmt, ap) + FILE *fp; + const char *fmt; + va_list ap; +{ + size_t len; + char buf[2048]; /* !!!: END OF THE STACK DON'T TRUST SPRINTF. */ + + len = vsnprintf(buf, sizeof(buf), fmt, ap); + + /* + * We implement printf/fprintf with fwrite, because Berkeley DB uses + * fwrite in other places. + */ + return (fwrite( + buf, sizeof(char), (size_t)len, fp) == len ? (int)len: -1); +} +#endif /* HAVE_PRINTF */ diff --git a/src/clib/qsort.c b/src/clib/qsort.c new file mode 100644 index 00000000..cec6288c --- /dev/null +++ b/src/clib/qsort.c @@ -0,0 +1,181 @@ +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93"; + * Id: qsort.c,v 1.4 1996/04/19 18:40:20 bde + */ + +#include "db_config.h" + +#include "db_int.h" + +static char *med3 __P((char *, + char *, char *, int (*)(const void *, const void *))); +static void swapfunc __P((char *, char *, int, int)); + +#define min(a, b) (a) < (b) ? a : b + +/* + * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function". + */ +#define swapcode(TYPE, parmi, parmj, n) { \ + long i = (n) / sizeof(TYPE); \ + register TYPE *pi = (TYPE *) (parmi); \ + register TYPE *pj = (TYPE *) (parmj); \ + do { \ + register TYPE t = *pi; \ + *pi++ = *pj; \ + *pj++ = t; \ + } while (--i > 0); \ +} + +#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \ + es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1; + +static inline void +swapfunc(a, b, n, swaptype) + char *a, *b; + int n, swaptype; +{ + if (swaptype <= 1) + swapcode(long, a, b, n) + else + swapcode(char, a, b, n) +} + +#define swap(a, b) \ + if (swaptype == 0) { \ + long t = *(long *)(a); \ + *(long *)(a) = *(long *)(b); \ + *(long *)(b) = t; \ + } else \ + swapfunc(a, b, es, swaptype) + +#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype) + +static inline char * +med3(a, b, c, cmp) + char *a, *b, *c; + int (*cmp)(const void *, const void *); +{ + return cmp(a, b) < 0 ? + (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a )) + :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c )); +} + +/* + * PUBLIC: #ifndef HAVE_QSORT + * PUBLIC: void qsort __P((void *, + * PUBLIC: size_t, size_t, int(*)(const void *, const void *))); + * PUBLIC: #endif + */ +void +qsort(a, n, es, cmp) + void *a; + size_t n, es; + int (*cmp) __P((const void *, const void *)); +{ + char *pa, *pb, *pc, *pd, *pl, *pm, *pn; + int d, r, swaptype, swap_cnt; + +loop: SWAPINIT(a, es); + swap_cnt = 0; + if (n < 7) { + for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) + for (pl = pm; pl > (char *)a && cmp(pl - es, pl) > 0; + pl -= es) + swap(pl, pl - es); + return; + } + pm = (char *)a + (n / 2) * es; + if (n > 7) { + pl = a; + pn = (char *)a + (n - 1) * es; + if (n > 40) { + d = (n / 8) * es; + pl = med3(pl, pl + d, pl + 2 * d, cmp); + pm = med3(pm - d, pm, pm + d, cmp); + pn = med3(pn - 2 * d, pn - d, pn, cmp); + } + pm = med3(pl, pm, pn, cmp); + } + swap(a, pm); + pa = pb = (char *)a + es; + + pc = pd = (char *)a + (n - 1) * es; + for (;;) { + while (pb <= pc && (r = cmp(pb, a)) <= 0) { + if (r == 0) { + swap_cnt = 1; + swap(pa, pb); + pa += es; + } + pb += es; + } + while (pb <= pc && (r = cmp(pc, a)) >= 0) { + if (r == 0) { + swap_cnt = 1; + swap(pc, pd); + pd -= es; + } + pc -= es; + } + if (pb > pc) + break; + swap(pb, pc); + swap_cnt = 1; + pb += es; + pc -= es; + } + if (swap_cnt == 0) { /* Switch to insertion sort */ + for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) + for (pl = pm; pl > (char *)a && cmp(pl - es, pl) > 0; + pl -= es) + swap(pl, pl - es); + return; + } + + pn = (char *)a + n * es; + r = min(pa - (char *)a, pb - pa); + vecswap(a, pb - r, r); + r = min((int)(pd - pc), (int)(pn - pd - es)); + vecswap(pb, pn - r, r); + if ((r = (int)(pb - pa)) > (int)es) + qsort(a, r / es, es, cmp); + if ((r = (int)(pd - pc)) > (int)es) { + /* Iterate rather than recurse to save stack space */ + a = pn - r; + n = r / es; + goto loop; + } +/* qsort(pn - r, r / es, es, cmp);*/ +} diff --git a/src/clib/raise.c b/src/clib/raise.c new file mode 100644 index 00000000..253444c0 --- /dev/null +++ b/src/clib/raise.c @@ -0,0 +1,26 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * raise -- + * Send a signal to the current process. + * + * PUBLIC: #ifndef HAVE_RAISE + * PUBLIC: int raise __P((int)); + * PUBLIC: #endif + */ +int +raise(s) + int s; +{ + return (kill(getpid(), s)); +} diff --git a/src/clib/rand.c b/src/clib/rand.c new file mode 100644 index 00000000..6b810060 --- /dev/null +++ b/src/clib/rand.c @@ -0,0 +1,25 @@ +/* + * Copied from the ANSI C standard 4.10.2.2. + */ +#include "db_config.h" + +#include "db_int.h" + +/* + * rand, srand -- + * + * PUBLIC: #ifndef HAVE_RAND + * PUBLIC: int rand __P((void)); + * PUBLIC: void srand __P((unsigned int)); + * PUBLIC: #endif + */ +int rand(void) /* RAND_MAX assumed to be 32767 */ +{ + DB_GLOBAL(rand_next) = DB_GLOBAL(rand_next) * 1103515245 + 12345; + return (unsigned int) (DB_GLOBAL(rand_next)/65536) % 32768; +} + +void srand(unsigned int seed) +{ + DB_GLOBAL(rand_next) = seed; +} diff --git a/src/clib/snprintf.c b/src/clib/snprintf.c new file mode 100644 index 00000000..ded44e22 --- /dev/null +++ b/src/clib/snprintf.c @@ -0,0 +1,149 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#if !defined(HAVE_SNPRINTF) || !defined(HAVE_VSNPRINTF) +static void sprintf_overflow __P((void)); +static int sprintf_retcharpnt __P((void)); +#endif + +/* + * snprintf -- + * Bounded version of sprintf. + * + * PUBLIC: #ifndef HAVE_SNPRINTF + * PUBLIC: int snprintf __P((char *, size_t, const char *, ...)); + * PUBLIC: #endif + */ +#ifndef HAVE_SNPRINTF +int +#ifdef STDC_HEADERS +snprintf(char *str, size_t n, const char *fmt, ...) +#else +snprintf(str, n, fmt, va_alist) + char *str; + size_t n; + const char *fmt; + va_dcl +#endif +{ + static int ret_charpnt = -1; + va_list ap; + size_t len; + + if (ret_charpnt == -1) + ret_charpnt = sprintf_retcharpnt(); + +#ifdef STDC_HEADERS + va_start(ap, fmt); +#else + va_start(ap); +#endif + len = (size_t)vsprintf(str, fmt, ap); + if (ret_charpnt) + len = strlen(str); + + va_end(ap); + + if (len >= n) { + sprintf_overflow(); + /* NOTREACHED */ + } + return ((int)len); +} +#endif + +/* + * vsnprintf -- + * Bounded version of vsprintf. + * + * PUBLIC: #ifndef HAVE_VSNPRINTF + * PUBLIC: int vsnprintf __P((char *, size_t, const char *, va_list)); + * PUBLIC: #endif + */ +#ifndef HAVE_VSNPRINTF +int +vsnprintf(str, n, fmt, ap) + char *str; + size_t n; + const char *fmt; + va_list ap; +{ + static int ret_charpnt = -1; + size_t len; + + if (ret_charpnt == -1) + ret_charpnt = sprintf_retcharpnt(); + + len = (size_t)vsprintf(str, fmt, ap); + if (ret_charpnt) + len = strlen(str); + + if (len >= n) { + sprintf_overflow(); + /* NOTREACHED */ + } + return ((int)len); +} +#endif + +#if !defined(HAVE_SNPRINTF) || !defined(HAVE_VSNPRINTF) +static void +sprintf_overflow() +{ + /* + * !!! + * We're potentially manipulating strings handed us by the application, + * and on systems without a real snprintf() the sprintf() calls could + * have overflowed the buffer. We can't do anything about it now, but + * we don't want to return control to the application, we might have + * overwritten the stack with a Trojan horse. We're not trying to do + * anything recoverable here because systems without snprintf support + * are pretty rare anymore. + */ +#define OVERFLOW_ERROR "internal buffer overflow, process ended\n" +#ifndef STDERR_FILENO +#define STDERR_FILENO 2 +#endif + (void)write(STDERR_FILENO, OVERFLOW_ERROR, sizeof(OVERFLOW_ERROR) - 1); + + /* Be polite. */ + exit(1); + + /* But firm. */ + __os_abort(NULL); + + /* NOTREACHED */ +} + +static int +sprintf_retcharpnt() +{ + int ret_charpnt; + char buf[10]; + + /* + * Some old versions of sprintf return a pointer to the first argument + * instead of a character count. Assume the return value of snprintf, + * vsprintf, etc. will be the same as sprintf, and check the easy one. + * + * We do this test at run-time because it's not a test we can do in a + * cross-compilation environment. + */ + + ret_charpnt = + (int)sprintf(buf, "123") != 3 || + (int)sprintf(buf, "123456789") != 9 || + (int)sprintf(buf, "1234") != 4; + + return (ret_charpnt); +} +#endif diff --git a/src/clib/strcasecmp.c b/src/clib/strcasecmp.c new file mode 100644 index 00000000..287895ce --- /dev/null +++ b/src/clib/strcasecmp.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 1987, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * strcasecmp -- + * Do strcmp(3) in a case-insensitive manner. + * + * PUBLIC: #ifndef HAVE_STRCASECMP + * PUBLIC: int strcasecmp __P((const char *, const char *)); + * PUBLIC: #endif + */ +int +strcasecmp(s1, s2) + const char *s1, *s2; +{ + u_char s1ch, s2ch; + + for (;;) { + s1ch = *s1++; + s2ch = *s2++; + if (s1ch >= 'A' && s1ch <= 'Z') /* tolower() */ + s1ch += 32; + if (s2ch >= 'A' && s2ch <= 'Z') /* tolower() */ + s2ch += 32; + if (s1ch != s2ch) + return (s1ch - s2ch); + if (s1ch == '\0') + return (0); + } + /* NOTREACHED */ +} + +/* + * strncasecmp -- + * Do strncmp(3) in a case-insensitive manner. + * + * PUBLIC: #ifndef HAVE_STRCASECMP + * PUBLIC: int strncasecmp __P((const char *, const char *, size_t)); + * PUBLIC: #endif + */ +int +strncasecmp(s1, s2, n) + const char *s1, *s2; + register size_t n; +{ + u_char s1ch, s2ch; + + for (; n != 0; --n) { + s1ch = *s1++; + s2ch = *s2++; + if (s1ch >= 'A' && s1ch <= 'Z') /* tolower() */ + s1ch += 32; + if (s2ch >= 'A' && s2ch <= 'Z') /* tolower() */ + s2ch += 32; + if (s1ch != s2ch) + return (s1ch - s2ch); + if (s1ch == '\0') + return (0); + } + return (0); +} diff --git a/src/clib/strcat.c b/src/clib/strcat.c new file mode 100644 index 00000000..d99c9070 --- /dev/null +++ b/src/clib/strcat.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * strcat -- + * + * PUBLIC: #ifndef HAVE_STRCAT + * PUBLIC: char *strcat __P((char *, const char *)); + * PUBLIC: #endif + */ +char * +strcat(char *s, const char *append) +{ + char *save = s; + + for (; *s; ++s); + while ((*s++ = *append++)); + return (save); +} diff --git a/src/clib/strchr.c b/src/clib/strchr.c new file mode 100644 index 00000000..a8ac4ce0 --- /dev/null +++ b/src/clib/strchr.c @@ -0,0 +1,57 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * strchr -- + * + * PUBLIC: #ifndef HAVE_STRCHR + * PUBLIC: char *strchr __P((const char *, int)); + * PUBLIC: #endif + */ +char *strchr(const char *p, int ch) +{ + char c; + + c = ch; + for (;; ++p) { + if (*p == c) + return ((char *)p); + if (*p == '\0') + return (NULL); + } + /* NOTREACHED */ +} diff --git a/src/clib/strdup.c b/src/clib/strdup.c new file mode 100644 index 00000000..5863340c --- /dev/null +++ b/src/clib/strdup.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * strdup -- + * + * PUBLIC: #ifndef HAVE_STRDUP + * PUBLIC: char *strdup __P((const char *)); + * PUBLIC: #endif + */ +char * +strdup(str) + const char *str; +{ + size_t len; + char *copy; + + len = strlen(str) + 1; + if (!(copy = malloc((u_int)len))) + return (NULL); + memcpy(copy, str, len); + return (copy); +} diff --git a/src/clib/strerror.c b/src/clib/strerror.c new file mode 100644 index 00000000..6637791c --- /dev/null +++ b/src/clib/strerror.c @@ -0,0 +1,225 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +/* + * Copyright (c) 1982, 1985, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * __FBSDID("FreeBSD: /repoman/r/ncvs/src/lib/libc/gen/errlst.c,v 1.8 2005/04/02 12:33:28 das Exp $"); + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __db_strerror -- + * Return the string associated with an errno. + * + * PUBLIC: #ifndef HAVE_STRERROR + * PUBLIC: char *strerror __P((int)); + * PUBLIC: #endif + */ +char * +strerror(num) + int num; +{ +#define ERRSTR(v, s) do { \ + if (num == (v)) \ + return (s); \ +} while (0) + ERRSTR(0, "Undefined error: 0"); + ERRSTR(EPERM, "Operation not permitted"); + ERRSTR(ENOENT, "No such file or directory"); + ERRSTR(ESRCH, "No such process"); + ERRSTR(EINTR, "Interrupted system call"); + ERRSTR(EIO, "Input/output error"); + ERRSTR(ENXIO, "Device not configured"); + ERRSTR(E2BIG, "Argument list too long"); + ERRSTR(ENOEXEC, "Exec format error"); + ERRSTR(EBADF, "Bad file descriptor"); + ERRSTR(ECHILD, "No child processes"); + ERRSTR(EDEADLK, "Resource deadlock avoided"); + ERRSTR(ENOMEM, "Cannot allocate memory"); + ERRSTR(EACCES, "Permission denied"); + ERRSTR(EFAULT, "Bad address"); + ERRSTR(ENOTBLK, "Block device required"); + ERRSTR(EBUSY, "Device busy"); + ERRSTR(EEXIST, "File exists"); + ERRSTR(EXDEV, "Cross-device link"); + ERRSTR(ENODEV, "Operation not supported by device"); + ERRSTR(ENOTDIR, "Not a directory"); + ERRSTR(EISDIR, "Is a directory"); + ERRSTR(EINVAL, "Invalid argument"); + ERRSTR(ENFILE, "Too many open files in system"); + ERRSTR(EMFILE, "Too many open files"); + ERRSTR(ENOTTY, "Inappropriate ioctl for device"); + ERRSTR(ETXTBSY, "Text file busy"); + ERRSTR(EFBIG, "File too large"); + ERRSTR(ENOSPC, "No space left on device"); + ERRSTR(ESPIPE, "Illegal seek"); + ERRSTR(EROFS, "Read-only file system"); + ERRSTR(EMLINK, "Too many links"); + ERRSTR(EPIPE, "Broken pipe"); + +/* math software */ + ERRSTR(EDOM, "Numerical argument out of domain"); + ERRSTR(ERANGE, "Result too large"); + +/* non-blocking and interrupt i/o */ + ERRSTR(EAGAIN, "Resource temporarily unavailable"); + ERRSTR(EWOULDBLOCK, "Resource temporarily unavailable"); + ERRSTR(EINPROGRESS, "Operation now in progress"); + ERRSTR(EALREADY, "Operation already in progress"); + +/* ipc/network software -- argument errors */ + ERRSTR(ENOTSOCK, "Socket operation on non-socket"); + ERRSTR(EDESTADDRREQ, "Destination address required"); + ERRSTR(EMSGSIZE, "Message too long"); + ERRSTR(EPROTOTYPE, "Protocol wrong type for socket"); + ERRSTR(ENOPROTOOPT, "Protocol not available"); + ERRSTR(EPROTONOSUPPORT, "Protocol not supported"); + ERRSTR(ESOCKTNOSUPPORT, "Socket type not supported"); + ERRSTR(EOPNOTSUPP, "Operation not supported"); + ERRSTR(EPFNOSUPPORT, "Protocol family not supported"); + ERRSTR(EAFNOSUPPORT, "Address family not supported by protocol family"); + ERRSTR(EADDRINUSE, "Address already in use"); + ERRSTR(EADDRNOTAVAIL, "Can't assign requested address"); + +/* ipc/network software -- operational errors */ + ERRSTR(ENETDOWN, "Network is down"); + ERRSTR(ENETUNREACH, "Network is unreachable"); + ERRSTR(ENETRESET, "Network dropped connection on reset"); + ERRSTR(ECONNABORTED, "Software caused connection abort"); + ERRSTR(ECONNRESET, "Connection reset by peer"); + ERRSTR(ENOBUFS, "No buffer space available"); + ERRSTR(EISCONN, "Socket is already connected"); + ERRSTR(ENOTCONN, "Socket is not connected"); + ERRSTR(ESHUTDOWN, "Can't send after socket shutdown"); + ERRSTR(ETOOMANYREFS, "Too many references: can't splice"); + ERRSTR(ETIMEDOUT, "Operation timed out"); + ERRSTR(ECONNREFUSED, "Connection refused"); + + ERRSTR(ELOOP, "Too many levels of symbolic links"); + ERRSTR(ENAMETOOLONG, "File name too long"); + +/* should be rearranged */ + ERRSTR(EHOSTDOWN, "Host is down"); + ERRSTR(EHOSTUNREACH, "No route to host"); + ERRSTR(ENOTEMPTY, "Directory not empty"); + +/* quotas & mush */ + ERRSTR(EPROCLIM, "Too many processes"); + ERRSTR(EUSERS, "Too many users"); + ERRSTR(EDQUOT, "Disc quota exceeded"); + +/* Network File System */ + ERRSTR(ESTALE, "Stale NFS file handle"); + ERRSTR(EREMOTE, "Too many levels of remote in path"); + ERRSTR(EBADRPC, "RPC struct is bad"); + ERRSTR(ERPCMISMATCH, "RPC version wrong"); + ERRSTR(EPROGUNAVAIL, "RPC prog. not avail"); + ERRSTR(EPROGMISMATCH, "Program version wrong"); + ERRSTR(EPROCUNAVAIL, "Bad procedure for program"); + + ERRSTR(ENOLCK, "No locks available"); + ERRSTR(ENOSYS, "Function not implemented"); + ERRSTR(EFTYPE, "Inappropriate file type or format"); +#ifdef EAUTH + ERRSTR(EAUTH, "Authentication error"); +#endif +#ifdef ENEEDAUTH + ERRSTR(ENEEDAUTH, "Need authenticator"); +#endif + ERRSTR(EIDRM, "Identifier removed"); + ERRSTR(ENOMSG, "No message of desired type"); +#ifdef EOVERFLOW + ERRSTR(EOVERFLOW, "Value too large to be stored in data type"); +#endif + ERRSTR(ECANCELED, "Operation canceled"); + ERRSTR(EILSEQ, "Illegal byte sequence"); +#ifdef ENOATTR + ERRSTR(ENOATTR, "Attribute not found"); +#endif + +/* General */ +#ifdef EDOOFUS + ERRSTR(EDOOFUS, "Programming error"); +#endif + +#ifdef EBADMSG + ERRSTR(EBADMSG, "Bad message"); +#endif +#ifdef EMULTIHOP + ERRSTR(EMULTIHOP, "Multihop attempted"); +#endif +#ifdef ENOLINK + ERRSTR(ENOLINK, "Link has been severed"); +#endif +#ifdef EPROTO + ERRSTR(EPROTO, "Protocol error"); +#endif + + return (__db_unknown_error(num)); +} diff --git a/src/clib/strncat.c b/src/clib/strncat.c new file mode 100644 index 00000000..ce8273a4 --- /dev/null +++ b/src/clib/strncat.c @@ -0,0 +1,69 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * strncat -- + * + * PUBLIC: #ifndef HAVE_STRNCAT + * PUBLIC: char *strncat __P((char *, const char *, size_t)); + * PUBLIC: #endif + */ +/* + * Concatenate src on the end of dst. At most strlen(dst)+n+1 bytes + * are written at dst (at most n+1 bytes being appended). Return dst. + */ +char * +strncat(char *dst, const char *src, size_t n) +{ + if (n != 0) { + char *d = dst; + const char *s = src; + + while (*d != 0) + d++; + do { + if ((*d = *s++) == 0) + break; + d++; + } while (--n != 0); + *d = 0; + } + return (dst); +} diff --git a/src/clib/strncmp.c b/src/clib/strncmp.c new file mode 100644 index 00000000..9738b5b2 --- /dev/null +++ b/src/clib/strncmp.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * strncmp -- + * + * PUBLIC: #ifndef HAVE_STRNCMP + * PUBLIC: int strncmp __P((const char *, const char *, size_t)); + * PUBLIC: #endif + */ +int +strncmp(s1, s2, n) + const char *s1, *s2; + size_t n; +{ + + if (n == 0) + return (0); + do { + if (*s1 != *s2++) + return (*(const unsigned char *)s1 - + *(const unsigned char *)(s2 - 1)); + if (*s1++ == 0) + break; + } while (--n != 0); + return (0); +} diff --git a/src/clib/strrchr.c b/src/clib/strrchr.c new file mode 100644 index 00000000..8753e943 --- /dev/null +++ b/src/clib/strrchr.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * strrchr -- + * + * PUBLIC: #ifndef HAVE_STRRCHR + * PUBLIC: char *strrchr __P((const char *, int)); + * PUBLIC: #endif + */ +char *strrchr(const char *p, int ch) +{ + char *save; + char c; + + c = ch; + for (save = NULL;; ++p) { + if (*p == c) + save = (char *)p; + if (*p == '\0') + return (save); + } + /* NOTREACHED */ +} diff --git a/src/clib/strsep.c b/src/clib/strsep.c new file mode 100644 index 00000000..f79d0f5c --- /dev/null +++ b/src/clib/strsep.c @@ -0,0 +1,80 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * Get next token from string *stringp, where tokens are possibly-empty + * strings separated by characters from delim. + * + * Writes NULs into the string at *stringp to end tokens. + * delim need not remain constant from call to call. + * On return, *stringp points past the last NUL written (if there might + * be further tokens), or is NULL (if there are definitely no more tokens). + * + * If *stringp is NULL, strsep returns NULL. + * + * PUBLIC: #ifndef HAVE_STRSEP + * PUBLIC: char *strsep __P((char **, const char *)); + * PUBLIC: #endif + */ +char * +strsep(stringp, delim) + char **stringp; + const char *delim; +{ + char *s; + const char *spanp; + int c, sc; + char *tok; + + if ((s = *stringp) == NULL) + return (NULL); + for (tok = s;;) { + c = *s++; + spanp = delim; + do { + if ((sc = *spanp++) == c) { + if (c == 0) + s = NULL; + else + s[-1] = 0; + *stringp = s; + return (tok); + } + } while (sc != 0); + } + /* NOTREACHED */ +} diff --git a/src/clib/strtol.c b/src/clib/strtol.c new file mode 100644 index 00000000..eb76b8f4 --- /dev/null +++ b/src/clib/strtol.c @@ -0,0 +1,142 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * Convert a string to a long integer. + * + * Assumes that the upper and lower case + * alphabets and digits are each contiguous. + * + * PUBLIC: #ifndef HAVE_STRTOL + * PUBLIC: long strtol __P((const char *, char **, int)); + * PUBLIC: #endif + */ +long +strtol(nptr, endptr, base) + const char * nptr; + char ** endptr; + int base; +{ + const char *s; + unsigned long acc; + char c; + unsigned long cutoff; + int neg, any, cutlim; + + /* + * Skip white space and pick up leading +/- sign if any. + * If base is 0, allow 0x for hex and 0 for octal, else + * assume decimal; if base is already 16, allow 0x. + */ + s = nptr; + do { + c = *s++; + } while (isspace((unsigned char)c)); + if (c == '-') { + neg = 1; + c = *s++; + } else { + neg = 0; + if (c == '+') + c = *s++; + } + if ((base == 0 || base == 16) && + c == '0' && (*s == 'x' || *s == 'X')) { + c = s[1]; + s += 2; + base = 16; + } + if (base == 0) + base = c == '0' ? 8 : 10; + acc = any = 0; + if (base < 2 || base > 36) + goto noconv; + + /* + * Compute the cutoff value between legal numbers and illegal + * numbers. That is the largest legal value, divided by the + * base. An input number that is greater than this value, if + * followed by a legal input character, is too big. One that + * is equal to this value may be valid or not; the limit + * between valid and invalid numbers is then based on the last + * digit. For instance, if the range for longs is + * [-2147483648..2147483647] and the input base is 10, + * cutoff will be set to 214748364 and cutlim to either + * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated + * a value > 214748364, or equal but the next digit is > 7 (or 8), + * the number is too big, and we will return a range error. + * + * Set 'any' if any `digits' consumed; make it negative to indicate + * overflow. + */ + cutoff = neg ? (unsigned long)-(LONG_MIN + LONG_MAX) + LONG_MAX + : LONG_MAX; + cutlim = cutoff % base; + cutoff /= base; + for ( ; ; c = *s++) { + if (c >= '0' && c <= '9') + c -= '0'; + else if (c >= 'A' && c <= 'Z') + c -= 'A' - 10; + else if (c >= 'a' && c <= 'z') + c -= 'a' - 10; + else + break; + if (c >= base) + break; + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) + any = -1; + else { + any = 1; + acc *= base; + acc += c; + } + } + if (any < 0) { + acc = neg ? LONG_MIN : LONG_MAX; + errno = ERANGE; + } else if (!any) { +noconv: + errno = EINVAL; + } else if (neg) + acc = -(long)acc; + if (endptr != NULL) + *endptr = (char *)(any ? s - 1 : nptr); + return (acc); +} diff --git a/src/clib/strtoul.c b/src/clib/strtoul.c new file mode 100644 index 00000000..d0495a33 --- /dev/null +++ b/src/clib/strtoul.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * Convert a string to an unsigned long integer. + * + * Assumes that the upper and lower case + * alphabets and digits are each contiguous. + * + * PUBLIC: #ifndef HAVE_STRTOUL + * PUBLIC: unsigned long strtoul __P((const char *, char **, int)); + * PUBLIC: #endif + */ +unsigned long +strtoul(nptr, endptr, base) + const char * nptr; + char ** endptr; + int base; +{ + const char *s; + unsigned long acc; + char c; + unsigned long cutoff; + int neg, any, cutlim; + + /* + * See strtol for comments as to the logic used. + */ + s = nptr; + do { + c = *s++; + } while (isspace((unsigned char)c)); + if (c == '-') { + neg = 1; + c = *s++; + } else { + neg = 0; + if (c == '+') + c = *s++; + } + if ((base == 0 || base == 16) && + c == '0' && (*s == 'x' || *s == 'X')) { + c = s[1]; + s += 2; + base = 16; + } + if (base == 0) + base = c == '0' ? 8 : 10; + acc = any = 0; + if (base < 2 || base > 36) + goto noconv; + + cutoff = ULONG_MAX / base; + cutlim = ULONG_MAX % base; + for ( ; ; c = *s++) { + if (c >= '0' && c <= '9') + c -= '0'; + else if (c >= 'A' && c <= 'Z') + c -= 'A' - 10; + else if (c >= 'a' && c <= 'z') + c -= 'a' - 10; + else + break; + if (c >= base) + break; + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) + any = -1; + else { + any = 1; + acc *= base; + acc += c; + } + } + if (any < 0) { + acc = ULONG_MAX; + errno = ERANGE; + } else if (!any) { +noconv: + errno = EINVAL; + } else if (neg) + acc = -acc; + if (endptr != NULL) + *endptr = (char *)(any ? s - 1 : nptr); + return (acc); +} diff --git a/src/clib/time.c b/src/clib/time.c new file mode 100644 index 00000000..debb661b --- /dev/null +++ b/src/clib/time.c @@ -0,0 +1,34 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * time -- + * + * PUBLIC: #ifndef HAVE_TIME + * PUBLIC: time_t time __P((time_t *)); + * PUBLIC: #endif + */ +time_t +time(nowp) + time_t *nowp; +{ + db_timespec t; + time_t res; + + __os_gettime(NULL, &t, 0); + + res = t.tv_sec + t.tv_nsec / NS_PER_SEC; + + if (nowp != NULL) + *nowp = res; + return (res); +} diff --git a/src/common/clock.c b/src/common/clock.c new file mode 100644 index 00000000..720fcbf5 --- /dev/null +++ b/src/common/clock.c @@ -0,0 +1,57 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +/* + * __clock_set_expires -- + * Set the expire time given the time to live. + * + * PUBLIC: void __clock_set_expires __P((ENV *, db_timespec *, db_timeout_t)); + */ +void +__clock_set_expires(env, timespecp, timeout) + ENV *env; + db_timespec *timespecp; + db_timeout_t timeout; +{ + db_timespec v; + + /* + * If timespecp is set then it contains "now". This avoids repeated + * system calls to get the time. + */ + if (!timespecisset(timespecp)) + __os_gettime(env, timespecp, 1); + + /* Convert the microsecond timeout argument to a timespec. */ + DB_TIMEOUT_TO_TIMESPEC(timeout, &v); + + /* Add the timeout to "now". */ + timespecadd(timespecp, &v); +} + +/* + * __clock_expired -- determine if a timeout has expired. + * + * PUBLIC: int __clock_expired __P((ENV *, db_timespec *, db_timespec *)); + */ +int +__clock_expired(env, now, timespecp) + ENV *env; + db_timespec *now, *timespecp; +{ + if (!timespecisset(timespecp)) + return (0); + + if (!timespecisset(now)) + __os_gettime(env, now, 1); + + return (timespeccmp(now, timespecp, >=)); +} diff --git a/src/common/crypto_stub.c b/src/common/crypto_stub.c new file mode 100644 index 00000000..33b6ae62 --- /dev/null +++ b/src/common/crypto_stub.c @@ -0,0 +1,44 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __crypto_region_init -- + * Initialize crypto. + * + * + * !!! + * We don't put this stub file in the crypto/ directory of the distribution + * because that entire directory is removed for non-crypto distributions. + * + * PUBLIC: int __crypto_region_init __P((ENV *)); + */ +int +__crypto_region_init(env) + ENV *env; +{ + REGENV *renv; + REGINFO *infop; + int ret; + + infop = env->reginfo; + renv = infop->primary; + MUTEX_LOCK(env, renv->mtx_regenv); + ret = !(renv->cipher_off == INVALID_ROFF); + MUTEX_UNLOCK(env, renv->mtx_regenv); + + if (ret == 0) + return (0); + + __db_errx(env, DB_STR("0040", +"Encrypted environment: library build did not include cryptography support")); + return (DB_OPNOTSUP); +} diff --git a/src/common/db_byteorder.c b/src/common/db_byteorder.c new file mode 100644 index 00000000..24a5acfa --- /dev/null +++ b/src/common/db_byteorder.c @@ -0,0 +1,63 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __db_isbigendian -- + * Return 1 if big-endian (Motorola and Sparc), not little-endian + * (Intel and Vax). We do this work at run-time, rather than at + * configuration time so cross-compilation and general embedded + * system support is simpler. + * + * PUBLIC: int __db_isbigendian __P((void)); + */ +int +__db_isbigendian() +{ + union { /* From Harbison & Steele. */ + long l; + char c[sizeof(long)]; + } u; + + u.l = 1; + return (u.c[sizeof(long) - 1] == 1); +} + +/* + * __db_byteorder -- + * Return if we need to do byte swapping, checking for illegal + * values. + * + * PUBLIC: int __db_byteorder __P((ENV *, int)); + */ +int +__db_byteorder(env, lorder) + ENV *env; + int lorder; +{ + switch (lorder) { + case 0: + break; + case 1234: + if (!F_ISSET(env, ENV_LITTLEENDIAN)) + return (DB_SWAPBYTES); + break; + case 4321: + if (F_ISSET(env, ENV_LITTLEENDIAN)) + return (DB_SWAPBYTES); + break; + default: + __db_errx(env, DB_STR("0041", + "unsupported byte order, only big and little-endian supported")); + return (EINVAL); + } + return (0); +} diff --git a/src/common/db_compint.c b/src/common/db_compint.c new file mode 100644 index 00000000..180055a4 --- /dev/null +++ b/src/common/db_compint.c @@ -0,0 +1,555 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifdef HAVE_COMPRESSION + +/* + * Integer compression + * + * First byte | Next | Maximum + * byte | bytes| value + * ------------+------+--------------------------------------------------------- + * [0 xxxxxxx] | 0 | 2^7 - 1 + * [10 xxxxxx] | 1 | 2^14 + 2^7 - 1 + * [110 xxxxx] | 2 | 2^21 + 2^14 + 2^7 - 1 + * [1110 xxxx] | 3 | 2^28 + 2^21 + 2^14 + 2^7 - 1 + * [11110 xxx] | 4 | 2^35 + 2^28 + 2^21 + 2^14 + 2^7 - 1 + * [11111 000] | 5 | 2^40 + 2^35 + 2^28 + 2^21 + 2^14 + 2^7 - 1 + * [11111 001] | 6 | 2^48 + 2^40 + 2^35 + 2^28 + 2^21 + 2^14 + 2^7 - 1 + * [11111 010] | 7 | 2^56 + 2^48 + 2^40 + 2^35 + 2^28 + 2^21 + 2^14 + 2^7 - 1 + * [11111 011] | 8 | 2^64 + 2^56 + 2^48 + 2^40 + 2^35 + 2^28 + 2^21 + 2^14 + + * | | 2^7 - 1 + * + * NOTE: this compression algorithm depends + * on big-endian order, so swap if necessary. + * + */ + +#define CMP_INT_1BYTE_MAX 0x7F +#define CMP_INT_2BYTE_MAX 0x407F +#define CMP_INT_3BYTE_MAX 0x20407F +#define CMP_INT_4BYTE_MAX 0x1020407F + +#if defined(_MSC_VER) && _MSC_VER < 1300 +#define CMP_INT_5BYTE_MAX 0x081020407Fi64 +#define CMP_INT_6BYTE_MAX 0x01081020407Fi64 +#define CMP_INT_7BYTE_MAX 0x0101081020407Fi64 +#define CMP_INT_8BYTE_MAX 0x010101081020407Fi64 +#else +#define CMP_INT_5BYTE_MAX 0x081020407FLL +#define CMP_INT_6BYTE_MAX 0x01081020407FLL +#define CMP_INT_7BYTE_MAX 0x0101081020407FLL +#define CMP_INT_8BYTE_MAX 0x010101081020407FLL +#endif + +#define CMP_INT_2BYTE_VAL 0x80 +#define CMP_INT_3BYTE_VAL 0xC0 +#define CMP_INT_4BYTE_VAL 0xE0 +#define CMP_INT_5BYTE_VAL 0xF0 +#define CMP_INT_6BYTE_VAL 0xF8 +#define CMP_INT_7BYTE_VAL 0xF9 +#define CMP_INT_8BYTE_VAL 0xFA +#define CMP_INT_9BYTE_VAL 0xFB +/* CMP_INT_SPARE_VAL is defined in db_int.h */ + +#define CMP_INT_2BYTE_MASK 0x3F +#define CMP_INT_3BYTE_MASK 0x1F +#define CMP_INT_4BYTE_MASK 0x0F +#define CMP_INT_5BYTE_MASK 0x07 + +static const u_int8_t __db_marshaled_int_size[] = { + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF +}; + +/* + * __db_compress_count_int -- + * Return the number of bytes that the compressed version + * of the argument will occupy. + * + * PUBLIC: u_int32_t __db_compress_count_int __P((u_int64_t)); + */ +u_int32_t +__db_compress_count_int(i) + u_int64_t i; +{ + if (i <= CMP_INT_1BYTE_MAX) + return 1; + else if (i <= CMP_INT_2BYTE_MAX) + return 2; + else if (i <= CMP_INT_3BYTE_MAX) + return 3; + else if (i <= CMP_INT_4BYTE_MAX) + return 4; + else if (i <= CMP_INT_5BYTE_MAX) + return 5; + else if (i <= CMP_INT_6BYTE_MAX) + return 6; + else if (i <= CMP_INT_7BYTE_MAX) + return 7; + else if (i <= CMP_INT_8BYTE_MAX) + return 8; + else + return 9; +} + +/* + * __db_compress_int -- + * Compresses the integer into the buffer, returning the number of + * bytes occupied. + * + * PUBLIC: int __db_compress_int __P((u_int8_t *, u_int64_t)); + */ +int +__db_compress_int(buf, i) + u_int8_t *buf; + u_int64_t i; +{ + if (i <= CMP_INT_1BYTE_MAX) { + /* no swapping for one byte value */ + buf[0] = (u_int8_t)i; + return 1; + } else { + u_int8_t *p = (u_int8_t*)&i; + if (i <= CMP_INT_2BYTE_MAX) { + i -= CMP_INT_1BYTE_MAX + 1; + if (__db_isbigendian() != 0) { + buf[0] = p[6] | CMP_INT_2BYTE_VAL; + buf[1] = p[7]; + } else { + buf[0] = p[1] | CMP_INT_2BYTE_VAL; + buf[1] = p[0]; + } + return 2; + } else if (i <= CMP_INT_3BYTE_MAX) { + i -= CMP_INT_2BYTE_MAX + 1; + if (__db_isbigendian() != 0) { + buf[0] = p[5] | CMP_INT_3BYTE_VAL; + buf[1] = p[6]; + buf[2] = p[7]; + } else { + buf[0] = p[2] | CMP_INT_3BYTE_VAL; + buf[1] = p[1]; + buf[2] = p[0]; + } + return 3; + } else if (i <= CMP_INT_4BYTE_MAX) { + i -= CMP_INT_3BYTE_MAX + 1; + if (__db_isbigendian() != 0) { + buf[0] = p[4] | CMP_INT_4BYTE_VAL; + buf[1] = p[5]; + buf[2] = p[6]; + buf[3] = p[7]; + } else { + buf[0] = p[3] | CMP_INT_4BYTE_VAL; + buf[1] = p[2]; + buf[2] = p[1]; + buf[3] = p[0]; + } + return 4; + } else if (i <= CMP_INT_5BYTE_MAX) { + i -= CMP_INT_4BYTE_MAX + 1; + if (__db_isbigendian() != 0) { + buf[0] = p[3] | CMP_INT_5BYTE_VAL; + buf[1] = p[4]; + buf[2] = p[5]; + buf[3] = p[6]; + buf[4] = p[7]; + } else { + buf[0] = p[4] | CMP_INT_5BYTE_VAL; + buf[1] = p[3]; + buf[2] = p[2]; + buf[3] = p[1]; + buf[4] = p[0]; + } + return 5; + } else if (i <= CMP_INT_6BYTE_MAX) { + i -= CMP_INT_5BYTE_MAX + 1; + if (__db_isbigendian() != 0) { + buf[0] = CMP_INT_6BYTE_VAL; + buf[1] = p[3]; + buf[2] = p[4]; + buf[3] = p[5]; + buf[4] = p[6]; + buf[5] = p[7]; + } else { + buf[0] = CMP_INT_6BYTE_VAL; + buf[1] = p[4]; + buf[2] = p[3]; + buf[3] = p[2]; + buf[4] = p[1]; + buf[5] = p[0]; + } + return 6; + } else if (i <= CMP_INT_7BYTE_MAX) { + i -= CMP_INT_6BYTE_MAX + 1; + if (__db_isbigendian() != 0) { + buf[0] = CMP_INT_7BYTE_VAL; + buf[1] = p[2]; + buf[2] = p[3]; + buf[3] = p[4]; + buf[4] = p[5]; + buf[5] = p[6]; + buf[6] = p[7]; + } else { + buf[0] = CMP_INT_7BYTE_VAL; + buf[1] = p[5]; + buf[2] = p[4]; + buf[3] = p[3]; + buf[4] = p[2]; + buf[5] = p[1]; + buf[6] = p[0]; + } + return 7; + } else if (i <= CMP_INT_8BYTE_MAX) { + i -= CMP_INT_7BYTE_MAX + 1; + if (__db_isbigendian() != 0) { + buf[0] = CMP_INT_8BYTE_VAL; + buf[1] = p[1]; + buf[2] = p[2]; + buf[3] = p[3]; + buf[4] = p[4]; + buf[5] = p[5]; + buf[6] = p[6]; + buf[7] = p[7]; + } else { + buf[0] = CMP_INT_8BYTE_VAL; + buf[1] = p[6]; + buf[2] = p[5]; + buf[3] = p[4]; + buf[4] = p[3]; + buf[5] = p[2]; + buf[6] = p[1]; + buf[7] = p[0]; + } + return 8; + } else { + i -= CMP_INT_8BYTE_MAX + 1; + if (__db_isbigendian() != 0) { + buf[0] = CMP_INT_9BYTE_VAL; + buf[1] = p[0]; + buf[2] = p[1]; + buf[3] = p[2]; + buf[4] = p[3]; + buf[5] = p[4]; + buf[6] = p[5]; + buf[7] = p[6]; + buf[8] = p[7]; + } else { + buf[0] = CMP_INT_9BYTE_VAL; + buf[1] = p[7]; + buf[2] = p[6]; + buf[3] = p[5]; + buf[4] = p[4]; + buf[5] = p[3]; + buf[6] = p[2]; + buf[7] = p[1]; + buf[8] = p[0]; + } + return 9; + } + } +} + +/* + * __db_decompress_count_int -- + * Return the number of bytes occupied by the compressed + * integer pointed to by buf. + * + * PUBLIC: u_int32_t __db_decompress_count_int __P((const u_int8_t *)); + */ +u_int32_t +__db_decompress_count_int(buf) + const u_int8_t *buf; +{ + return __db_marshaled_int_size[*buf]; +} + +/* + * __db_decompress_int -- + * Decompresses the compressed integer pointer to by buf into i, + * returning the number of bytes read. + * + * PUBLIC: int __db_decompress_int __P((const u_int8_t *, u_int64_t *)); + */ +int +__db_decompress_int(buf, i) + const u_int8_t *buf; + u_int64_t *i; +{ + int len; + u_int64_t tmp; + u_int8_t *p; + u_int8_t c; + + tmp = 0; + p = (u_int8_t*)&tmp; + c = buf[0]; + len = __db_marshaled_int_size[c]; + + switch (len) { + case 1: + *i = c; + return 1; + case 2: + if (__db_isbigendian() != 0) { + p[6] = (c & CMP_INT_2BYTE_MASK); + p[7] = buf[1]; + } else { + p[1] = (c & CMP_INT_2BYTE_MASK); + p[0] = buf[1]; + } + tmp += CMP_INT_1BYTE_MAX + 1; + break; + case 3: + if (__db_isbigendian() != 0) { + p[5] = (c & CMP_INT_3BYTE_MASK); + p[6] = buf[1]; + p[7] = buf[2]; + } else { + p[2] = (c & CMP_INT_3BYTE_MASK); + p[1] = buf[1]; + p[0] = buf[2]; + } + tmp += CMP_INT_2BYTE_MAX + 1; + break; + case 4: + if (__db_isbigendian() != 0) { + p[4] = (c & CMP_INT_4BYTE_MASK); + p[5] = buf[1]; + p[6] = buf[2]; + p[7] = buf[3]; + } else { + p[3] = (c & CMP_INT_4BYTE_MASK); + p[2] = buf[1]; + p[1] = buf[2]; + p[0] = buf[3]; + } + tmp += CMP_INT_3BYTE_MAX + 1; + break; + case 5: + if (__db_isbigendian() != 0) { + p[3] = (c & CMP_INT_5BYTE_MASK); + p[4] = buf[1]; + p[5] = buf[2]; + p[6] = buf[3]; + p[7] = buf[4]; + } else { + p[4] = (c & CMP_INT_5BYTE_MASK); + p[3] = buf[1]; + p[2] = buf[2]; + p[1] = buf[3]; + p[0] = buf[4]; + } + tmp += CMP_INT_4BYTE_MAX + 1; + break; + case 6: + if (__db_isbigendian() != 0) { + p[3] = buf[1]; + p[4] = buf[2]; + p[5] = buf[3]; + p[6] = buf[4]; + p[7] = buf[5]; + } else { + p[4] = buf[1]; + p[3] = buf[2]; + p[2] = buf[3]; + p[1] = buf[4]; + p[0] = buf[5]; + } + tmp += CMP_INT_5BYTE_MAX + 1; + break; + case 7: + if (__db_isbigendian() != 0) { + p[2] = buf[1]; + p[3] = buf[2]; + p[4] = buf[3]; + p[5] = buf[4]; + p[6] = buf[5]; + p[7] = buf[6]; + } else { + p[5] = buf[1]; + p[4] = buf[2]; + p[3] = buf[3]; + p[2] = buf[4]; + p[1] = buf[5]; + p[0] = buf[6]; + } + tmp += CMP_INT_6BYTE_MAX + 1; + break; + case 8: + if (__db_isbigendian() != 0) { + p[1] = buf[1]; + p[2] = buf[2]; + p[3] = buf[3]; + p[4] = buf[4]; + p[5] = buf[5]; + p[6] = buf[6]; + p[7] = buf[7]; + } else { + p[6] = buf[1]; + p[5] = buf[2]; + p[4] = buf[3]; + p[3] = buf[4]; + p[2] = buf[5]; + p[1] = buf[6]; + p[0] = buf[7]; + } + tmp += CMP_INT_7BYTE_MAX + 1; + break; + case 9: + if (__db_isbigendian() != 0) { + p[0] = buf[1]; + p[1] = buf[2]; + p[2] = buf[3]; + p[3] = buf[4]; + p[4] = buf[5]; + p[5] = buf[6]; + p[6] = buf[7]; + p[7] = buf[8]; + } else { + p[7] = buf[1]; + p[6] = buf[2]; + p[5] = buf[3]; + p[4] = buf[4]; + p[3] = buf[5]; + p[2] = buf[6]; + p[1] = buf[7]; + p[0] = buf[8]; + } + tmp += CMP_INT_8BYTE_MAX + 1; + break; + default: + break; + } + + *i = tmp; + return len; +} + +/* + * __db_decompress_int32 -- + * Decompresses the compressed 32 bit integer pointer to by buf into i, + * returning the number of bytes read. + * + * PUBLIC: int __db_decompress_int32 __P((const u_int8_t *, u_int32_t *)); + */ +int +__db_decompress_int32(buf, i) + const u_int8_t *buf; + u_int32_t *i; +{ + int len; + u_int32_t tmp; + u_int8_t *p; + u_int8_t c; + + tmp = 0; + p = (u_int8_t*)&tmp; + c = buf[0]; + len = __db_marshaled_int_size[c]; + + switch (len) { + case 1: + *i = c; + return 1; + case 2: + if (__db_isbigendian() != 0) { + p[2] = (c & CMP_INT_2BYTE_MASK); + p[3] = buf[1]; + } else { + p[1] = (c & CMP_INT_2BYTE_MASK); + p[0] = buf[1]; + } + tmp += CMP_INT_1BYTE_MAX + 1; + break; + case 3: + if (__db_isbigendian() != 0) { + p[1] = (c & CMP_INT_3BYTE_MASK); + p[2] = buf[1]; + p[3] = buf[2]; + } else { + p[2] = (c & CMP_INT_3BYTE_MASK); + p[1] = buf[1]; + p[0] = buf[2]; + } + tmp += CMP_INT_2BYTE_MAX + 1; + break; + case 4: + if (__db_isbigendian() != 0) { + p[0] = (c & CMP_INT_4BYTE_MASK); + p[1] = buf[1]; + p[2] = buf[2]; + p[3] = buf[3]; + } else { + p[3] = (c & CMP_INT_4BYTE_MASK); + p[2] = buf[1]; + p[1] = buf[2]; + p[0] = buf[3]; + } + tmp += CMP_INT_3BYTE_MAX + 1; + break; + case 5: + if (__db_isbigendian() != 0) { + p[0] = buf[1]; + p[1] = buf[2]; + p[2] = buf[3]; + p[3] = buf[4]; + } else { + p[3] = buf[1]; + p[2] = buf[2]; + p[1] = buf[3]; + p[0] = buf[4]; + } + tmp += CMP_INT_4BYTE_MAX + 1; + break; + default: + break; + } + + *i = tmp; + return len; +} + +#endif diff --git a/src/common/db_err.c b/src/common/db_err.c new file mode 100644 index 00000000..9d62559d --- /dev/null +++ b/src/common/db_err.c @@ -0,0 +1,1114 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static void __db_msgcall __P((const DB_ENV *, const char *, va_list)); +static void __db_msgfile __P((const DB_ENV *, const char *, va_list)); + +/* + * __db_fchk -- + * General flags checking routine. + * + * PUBLIC: int __db_fchk __P((ENV *, const char *, u_int32_t, u_int32_t)); + */ +int +__db_fchk(env, name, flags, ok_flags) + ENV *env; + const char *name; + u_int32_t flags, ok_flags; +{ + return (LF_ISSET(~ok_flags) ? __db_ferr(env, name, 0) : 0); +} + +/* + * __db_fcchk -- + * General combination flags checking routine. + * + * PUBLIC: int __db_fcchk + * PUBLIC: __P((ENV *, const char *, u_int32_t, u_int32_t, u_int32_t)); + */ +int +__db_fcchk(env, name, flags, flag1, flag2) + ENV *env; + const char *name; + u_int32_t flags, flag1, flag2; +{ + return (LF_ISSET(flag1) && + LF_ISSET(flag2) ? __db_ferr(env, name, 1) : 0); +} + +/* + * __db_ferr -- + * Common flag errors. + * + * PUBLIC: int __db_ferr __P((const ENV *, const char *, int)); + */ +int +__db_ferr(env, name, iscombo) + const ENV *env; + const char *name; + int iscombo; +{ + if (iscombo) + __db_errx(env, DB_STR_A("0054", + "illegal flag combination specified to %s", "%s"), name); + else + __db_errx(env, DB_STR_A("0055", + "illegal flag specified to %s", "%s"), name); + + return (EINVAL); +} + +/* + * __db_fnl -- + * Common flag-needs-locking message. + * + * PUBLIC: int __db_fnl __P((const ENV *, const char *)); + */ +int +__db_fnl(env, name) + const ENV *env; + const char *name; +{ + __db_errx(env, DB_STR_A("0056", + "%s: DB_READ_COMMITTED, DB_READ_UNCOMMITTED and DB_RMW require locking", + "%s"), name); + return (EINVAL); +} + +/* + * __db_pgerr -- + * Error when unable to retrieve a specified page. + * + * PUBLIC: int __db_pgerr __P((DB *, db_pgno_t, int)); + */ +int +__db_pgerr(dbp, pgno, errval) + DB *dbp; + db_pgno_t pgno; + int errval; +{ + /* + * Three things are certain: + * Death, taxes, and lost data. + * Guess which has occurred. + */ + __db_errx(dbp->env, DB_STR_A("0057", + "unable to create/retrieve page %lu", "%lu"), (u_long)pgno); + return (__env_panic(dbp->env, errval)); +} + +/* + * __db_pgfmt -- + * Error when a page has the wrong format. + * + * PUBLIC: int __db_pgfmt __P((ENV *, db_pgno_t)); + */ +int +__db_pgfmt(env, pgno) + ENV *env; + db_pgno_t pgno; +{ + __db_errx(env, DB_STR_A("0058", + "page %lu: illegal page type or format", "%lu"), (u_long)pgno); + return (__env_panic(env, EINVAL)); +} + +#ifdef DIAGNOSTIC +/* + * __db_assert -- + * Error when an assertion fails. Only checked if #DIAGNOSTIC defined. + * + * PUBLIC: #ifdef DIAGNOSTIC + * PUBLIC: void __db_assert __P((ENV *, const char *, const char *, int)); + * PUBLIC: #endif + */ +void +__db_assert(env, e, file, line) + ENV *env; + const char *e, *file; + int line; +{ + if (DB_GLOBAL(j_assert) != NULL) + DB_GLOBAL(j_assert)(e, file, line); + else { + __db_errx(env, DB_STR_A("0059", + "assert failure: %s/%d: \"%s\"", + "%s %d %s"), file, line, e); + + __os_abort(env); + /* NOTREACHED */ + } +} +#endif + +/* + * __env_panic_msg -- + * Just report that someone else paniced. + * + * PUBLIC: int __env_panic_msg __P((ENV *)); + */ +int +__env_panic_msg(env) + ENV *env; +{ + DB_ENV *dbenv; + int ret; + + dbenv = env->dbenv; + + ret = DB_RUNRECOVERY; + + __db_errx(env, DB_STR("0060", + "PANIC: fatal region error detected; run recovery")); + + if (dbenv->db_paniccall != NULL) /* Deprecated */ + dbenv->db_paniccall(dbenv, ret); + + /* Must check for DB_EVENT_REG_PANIC panic first because it is never + * set by itself. If set, it means panic came from DB_REGISTER code + * only, otherwise it could be from many possible places in the code. + */ + if ((env->reginfo != NULL) && + (((REGENV *)env->reginfo->primary)->reg_panic)) + DB_EVENT(env, DB_EVENT_REG_PANIC, &ret); + else + DB_EVENT(env, DB_EVENT_PANIC, &ret); + + return (ret); +} + +/* + * __env_panic -- + * Lock out the database environment due to unrecoverable error. + * + * PUBLIC: int __env_panic __P((ENV *, int)); + */ +int +__env_panic(env, errval) + ENV *env; + int errval; +{ + DB_ENV *dbenv; + + dbenv = env->dbenv; + + if (env != NULL) { + __env_panic_set(env, 1); + + __db_err(env, errval, DB_STR("0061", "PANIC")); + + if (dbenv->db_paniccall != NULL) /* Deprecated */ + dbenv->db_paniccall(dbenv, errval); + + /* Must check for DB_EVENT_REG_PANIC first because it is never + * set by itself. If set, it means panic came from DB_REGISTER + * code only, otherwise it could be from many possible places + * in the code. + */ + if ((env->reginfo != NULL) && + (((REGENV *)env->reginfo->primary)->reg_panic)) + DB_EVENT(env, DB_EVENT_REG_PANIC, &errval); + else + DB_EVENT(env, DB_EVENT_PANIC, &errval); + } + +#if defined(DIAGNOSTIC) && !defined(CONFIG_TEST) + /* + * We want a stack trace of how this could possibly happen. + * + * Don't drop core if it's the test suite -- it's reasonable for the + * test suite to check to make sure that DB_RUNRECOVERY is returned + * under certain conditions. + */ + __os_abort(env); + /* NOTREACHED */ +#endif + + /* + * Chaos reigns within. + * Reflect, repent, and reboot. + * Order shall return. + */ + return (DB_RUNRECOVERY); +} + +/* + * db_strerror -- + * ANSI C strerror(3) for DB. + * + * EXTERN: char *db_strerror __P((int)); + */ +char * +db_strerror(error) + int error; +{ + char *p; + + if (error == 0) + return (DB_STR("0062", "Successful return: 0")); + if (error > 0) { + if ((p = strerror(error)) != NULL) + return (p); + return (__db_unknown_error(error)); + } + + /* + * !!! + * The Tcl API requires that some of these return strings be compared + * against strings stored in application scripts. So, any of these + * errors that do not invariably result in a Tcl exception may not be + * altered. + */ + switch (error) { + case DB_BUFFER_SMALL: + return (DB_STR("0063", + "DB_BUFFER_SMALL: User memory too small for return value")); + case DB_DONOTINDEX: + return (DB_STR("0064", + "DB_DONOTINDEX: Secondary index callback returns null")); + case DB_FOREIGN_CONFLICT: + return (DB_STR("0065", + "DB_FOREIGN_CONFLICT: A foreign database constraint has been violated")); + case DB_HEAP_FULL: + return (DB_STR("0208","DB_HEAP_FULL: no free space in db")); + case DB_KEYEMPTY: + return (DB_STR("0066", + "DB_KEYEMPTY: Non-existent key/data pair")); + case DB_KEYEXIST: + return (DB_STR("0067", + "DB_KEYEXIST: Key/data pair already exists")); + case DB_LOCK_DEADLOCK: + return (DB_STR("0068", + "DB_LOCK_DEADLOCK: Locker killed to resolve a deadlock")); + case DB_LOCK_NOTGRANTED: + return (DB_STR("0069", "DB_LOCK_NOTGRANTED: Lock not granted")); + case DB_LOG_BUFFER_FULL: + return (DB_STR("0070", + "DB_LOG_BUFFER_FULL: In-memory log buffer is full")); + case DB_LOG_VERIFY_BAD: + return (DB_STR("0071", + "DB_LOG_VERIFY_BAD: Log verification failed")); + case DB_NOSERVER: + return (DB_STR("0072", + "DB_NOSERVER: No message dispatch call-back function has been configured")); + case DB_NOTFOUND: + return (DB_STR("0073", + "DB_NOTFOUND: No matching key/data pair found")); + case DB_OLD_VERSION: + return (DB_STR("0074", + "DB_OLDVERSION: Database requires a version upgrade")); + case DB_PAGE_NOTFOUND: + return (DB_STR("0075", + "DB_PAGE_NOTFOUND: Requested page not found")); + case DB_REP_DUPMASTER: + return (DB_STR("0076", + "DB_REP_DUPMASTER: A second master site appeared")); + case DB_REP_HANDLE_DEAD: + return (DB_STR("0077", + "DB_REP_HANDLE_DEAD: Handle is no longer valid")); + case DB_REP_HOLDELECTION: + return (DB_STR("0078", + "DB_REP_HOLDELECTION: Need to hold an election")); + case DB_REP_IGNORE: + return (DB_STR("0079", + "DB_REP_IGNORE: Replication record/operation ignored")); + case DB_REP_ISPERM: + return (DB_STR("0080", + "DB_REP_ISPERM: Permanent record written")); + case DB_REP_JOIN_FAILURE: + return (DB_STR("0081", + "DB_REP_JOIN_FAILURE: Unable to join replication group")); + case DB_REP_LEASE_EXPIRED: + return (DB_STR("0082", + "DB_REP_LEASE_EXPIRED: Replication leases have expired")); + case DB_REP_LOCKOUT: + return (DB_STR("0083", + "DB_REP_LOCKOUT: Waiting for replication recovery to complete")); + case DB_REP_NEWSITE: + return (DB_STR("0084", + "DB_REP_NEWSITE: A new site has entered the system")); + case DB_REP_NOTPERM: + return (DB_STR("0085", + "DB_REP_NOTPERM: Permanent log record not written")); + case DB_REP_UNAVAIL: + return (DB_STR("0086", + "DB_REP_UNAVAIL: Too few remote sites to complete operation")); + case DB_REP_WOULDROLLBACK: /* Undocumented; C API only. */ + return (DB_STR("0207", + "DB_REP_WOULDROLLBACK: Client data has diverged")); + case DB_RUNRECOVERY: + return (DB_STR("0087", + "DB_RUNRECOVERY: Fatal error, run database recovery")); + case DB_SECONDARY_BAD: + return (DB_STR("0088", + "DB_SECONDARY_BAD: Secondary index inconsistent with primary")); + case DB_TIMEOUT: + return (DB_STR("0089", "DB_TIMEOUT: Operation timed out")); + case DB_VERIFY_BAD: + return (DB_STR("0090", + "DB_VERIFY_BAD: Database verification failed")); + case DB_VERSION_MISMATCH: + return (DB_STR("0091", + "DB_VERSION_MISMATCH: Database environment version mismatch")); + default: + break; + } + + return (__db_unknown_error(error)); +} + +/* + * __db_unknown_error -- + * Format an unknown error value into a static buffer. + * + * PUBLIC: char *__db_unknown_error __P((int)); + */ +char * +__db_unknown_error(error) + int error; +{ + /* + * !!! + * Room for a 64-bit number + slop. This buffer is only used + * if we're given an unknown error number, which should never + * happen. + * + * We're no longer thread-safe if it does happen, but the worst + * result is a corrupted error string because there will always + * be a trailing nul byte since the error buffer is nul filled + * and longer than any error message. + */ + (void)snprintf(DB_GLOBAL(error_buf), + sizeof(DB_GLOBAL(error_buf)), DB_STR_A("0092", + "Unknown error: %d", "%d"), error); + return (DB_GLOBAL(error_buf)); +} + +/* + * __db_syserr -- + * Standard error routine. + * + * PUBLIC: void __db_syserr __P((const ENV *, int, const char *, ...)) + * PUBLIC: __attribute__ ((__format__ (__printf__, 3, 4))); + */ +void +#ifdef STDC_HEADERS +__db_syserr(const ENV *env, int error, const char *fmt, ...) +#else +__db_syserr(env, error, fmt, va_alist) + const ENV *env; + int error; + const char *fmt; + va_dcl +#endif +{ + DB_ENV *dbenv; + + dbenv = env == NULL ? NULL : env->dbenv; + + /* + * The same as DB->err, except we don't default to writing to stderr + * after any output channel has been configured, and we use a system- + * specific function to translate errors to strings. + */ + DB_REAL_ERR(dbenv, error, DB_ERROR_SYSTEM, 0, fmt); +} + +/* + * __db_err -- + * Standard error routine. + * + * PUBLIC: void __db_err __P((const ENV *, int, const char *, ...)) + * PUBLIC: __attribute__ ((__format__ (__printf__, 3, 4))); + */ +void +#ifdef STDC_HEADERS +__db_err(const ENV *env, int error, const char *fmt, ...) +#else +__db_err(env, error, fmt, va_alist) + const ENV *env; + int error; + const char *fmt; + va_dcl +#endif +{ + DB_ENV *dbenv; + + dbenv = env == NULL ? NULL : env->dbenv; + + /* + * The same as DB->err, except we don't default to writing to stderr + * once an output channel has been configured. + */ + DB_REAL_ERR(dbenv, error, DB_ERROR_SET, 0, fmt); +} + +/* + * __db_errx -- + * Standard error routine. + * + * PUBLIC: void __db_errx __P((const ENV *, const char *, ...)) + * PUBLIC: __attribute__ ((__format__ (__printf__, 2, 3))); + */ +void +#ifdef STDC_HEADERS +__db_errx(const ENV *env, const char *fmt, ...) +#else +__db_errx(env, fmt, va_alist) + const ENV *env; + const char *fmt; + va_dcl +#endif +{ + DB_ENV *dbenv; + + dbenv = env == NULL ? NULL : env->dbenv; + + /* + * The same as DB->errx, except we don't default to writing to stderr + * once an output channel has been configured. + */ + DB_REAL_ERR(dbenv, 0, DB_ERROR_NOT_SET, 0, fmt); +} + +/* + * __db_errcall -- + * Do the error message work for callback functions. + * + * PUBLIC: void __db_errcall + * PUBLIC: __P((const DB_ENV *, int, db_error_set_t, const char *, va_list)); + */ +void +__db_errcall(dbenv, error, error_set, fmt, ap) + const DB_ENV *dbenv; + int error; + db_error_set_t error_set; + const char *fmt; + va_list ap; +{ + char *p; + char buf[2048]; /* !!!: END OF THE STACK DON'T TRUST SPRINTF. */ + char sysbuf[1024]; /* !!!: END OF THE STACK DON'T TRUST SPRINTF. */ + + p = buf; + if (fmt != NULL) + p += vsnprintf(buf, sizeof(buf), fmt, ap); + if (error_set != DB_ERROR_NOT_SET) + p += snprintf(p, + sizeof(buf) - (size_t)(p - buf), ": %s", + error_set == DB_ERROR_SET ? db_strerror(error) : + __os_strerror(error, sysbuf, sizeof(sysbuf))); + + dbenv->db_errcall(dbenv, dbenv->db_errpfx, buf); +} + +/* + * __db_errfile -- + * Do the error message work for FILE *s. + * + * PUBLIC: void __db_errfile + * PUBLIC: __P((const DB_ENV *, int, db_error_set_t, const char *, va_list)); + */ +void +__db_errfile(dbenv, error, error_set, fmt, ap) + const DB_ENV *dbenv; + int error; + db_error_set_t error_set; + const char *fmt; + va_list ap; +{ + FILE *fp; + int need_sep; + char sysbuf[1024]; /* !!!: END OF THE STACK DON'T TRUST SPRINTF. */ + + fp = dbenv == NULL || + dbenv->db_errfile == NULL ? stderr : dbenv->db_errfile; + need_sep = 0; + + if (dbenv != NULL && dbenv->db_errpfx != NULL) { + (void)fprintf(fp, "%s", dbenv->db_errpfx); + need_sep = 1; + } + if (fmt != NULL && fmt[0] != '\0') { + if (need_sep) + (void)fprintf(fp, ": "); + need_sep = 1; + (void)vfprintf(fp, fmt, ap); + } + if (error_set != DB_ERROR_NOT_SET) + (void)fprintf(fp, "%s%s", + need_sep ? ": " : "", + error_set == DB_ERROR_SET ? db_strerror(error) : + __os_strerror(error, sysbuf, sizeof(sysbuf))); + (void)fprintf(fp, "\n"); + (void)fflush(fp); +} + +/* + * __db_msgadd -- + * Aggregate a set of strings into a buffer for the callback API. + * + * PUBLIC: void __db_msgadd __P((ENV *, DB_MSGBUF *, const char *, ...)) + * PUBLIC: __attribute__ ((__format__ (__printf__, 3, 4))); + */ +void +#ifdef STDC_HEADERS +__db_msgadd(ENV *env, DB_MSGBUF *mbp, const char *fmt, ...) +#else +__db_msgadd(env, mbp, fmt, va_alist) + ENV *env; + DB_MSGBUF *mbp; + const char *fmt; + va_dcl +#endif +{ + va_list ap; + +#ifdef STDC_HEADERS + va_start(ap, fmt); +#else + va_start(ap); +#endif + __db_msgadd_ap(env, mbp, fmt, ap); + va_end(ap); +} + +/* + * __db_msgadd_ap -- + * Aggregate a set of strings into a buffer for the callback API. + * + * PUBLIC: void __db_msgadd_ap + * PUBLIC: __P((ENV *, DB_MSGBUF *, const char *, va_list)); + */ +void +__db_msgadd_ap(env, mbp, fmt, ap) + ENV *env; + DB_MSGBUF *mbp; + const char *fmt; + va_list ap; +{ + size_t len, olen; + char buf[2048]; /* !!!: END OF THE STACK DON'T TRUST SPRINTF. */ + + len = (size_t)vsnprintf(buf, sizeof(buf), fmt, ap); + + /* + * There's a heap buffer in the ENV handle we use to aggregate the + * message chunks. We maintain a pointer to the buffer, the next slot + * to be filled in in the buffer, and a total buffer length. + */ + olen = (size_t)(mbp->cur - mbp->buf); + if (olen + len >= mbp->len) { + if (__os_realloc(env, mbp->len + len + 256, &mbp->buf)) + return; + mbp->len += (len + 256); + mbp->cur = mbp->buf + olen; + } + + memcpy(mbp->cur, buf, len + 1); + mbp->cur += len; +} + +/* + * __db_msg -- + * Standard DB stat message routine. + * + * PUBLIC: void __db_msg __P((const ENV *, const char *, ...)) + * PUBLIC: __attribute__ ((__format__ (__printf__, 2, 3))); + */ +void +#ifdef STDC_HEADERS +__db_msg(const ENV *env, const char *fmt, ...) +#else +__db_msg(env, fmt, va_alist) + const ENV *env; + const char *fmt; + va_dcl +#endif +{ + DB_ENV *dbenv; + + dbenv = env == NULL ? NULL : env->dbenv; + + DB_REAL_MSG(dbenv, fmt); +} + +/* + * __db_repmsg -- + * Replication system message routine. + * + * PUBLIC: void __db_repmsg __P((const ENV *, const char *, ...)) + * PUBLIC: __attribute__ ((__format__ (__printf__, 2, 3))); + */ +void +#ifdef STDC_HEADERS +__db_repmsg(const ENV *env, const char *fmt, ...) +#else +__db_repmsg(env, fmt, va_alist) + const ENV *env; + const char *fmt; + va_dcl +#endif +{ + va_list ap; + char buf[2048]; /* !!!: END OF THE STACK DON'T TRUST SPRINTF. */ + +#ifdef STDC_HEADERS + va_start(ap, fmt); +#else + va_start(ap); +#endif + (void)vsnprintf(buf, sizeof(buf), fmt, ap); + __rep_msg(env, buf); + va_end(ap); +} + +/* + * __db_msgcall -- + * Do the message work for callback functions. + */ +static void +__db_msgcall(dbenv, fmt, ap) + const DB_ENV *dbenv; + const char *fmt; + va_list ap; +{ + char buf[2048]; /* !!!: END OF THE STACK DON'T TRUST SPRINTF. */ + + (void)vsnprintf(buf, sizeof(buf), fmt, ap); + + dbenv->db_msgcall(dbenv, buf); +} + +/* + * __db_msgfile -- + * Do the message work for FILE *s. + */ +static void +__db_msgfile(dbenv, fmt, ap) + const DB_ENV *dbenv; + const char *fmt; + va_list ap; +{ + FILE *fp; + + fp = dbenv == NULL || + dbenv->db_msgfile == NULL ? stdout : dbenv->db_msgfile; + (void)vfprintf(fp, fmt, ap); + + (void)fprintf(fp, "\n"); + (void)fflush(fp); +} + +/* + * __db_unknown_flag -- report internal error + * + * PUBLIC: int __db_unknown_flag __P((ENV *, char *, u_int32_t)); + */ +int +__db_unknown_flag(env, routine, flag) + ENV *env; + char *routine; + u_int32_t flag; +{ + __db_errx(env, DB_STR_A("0093", "%s: Unknown flag: %#x", "%s %#x"), + routine, (u_int)flag); + +#ifdef DIAGNOSTIC + __os_abort(env); + /* NOTREACHED */ +#endif + return (EINVAL); +} + +/* + * __db_unknown_type -- report internal database type error + * + * PUBLIC: int __db_unknown_type __P((ENV *, char *, DBTYPE)); + */ +int +__db_unknown_type(env, routine, type) + ENV *env; + char *routine; + DBTYPE type; +{ + __db_errx(env, DB_STR_A("0094", "%s: Unexpected database type: %s", + "%s %s"), routine, __db_dbtype_to_string(type)); + +#ifdef DIAGNOSTIC + __os_abort(env); + /* NOTREACHED */ +#endif + return (EINVAL); +} + +/* + * __db_unknown_path -- report unexpected database code path error. + * + * PUBLIC: int __db_unknown_path __P((ENV *, char *)); + */ +int +__db_unknown_path(env, routine) + ENV *env; + char *routine; +{ + __db_errx(env, DB_STR_A("0095", "%s: Unexpected code path error", + "%s"), routine); + +#ifdef DIAGNOSTIC + __os_abort(env); + /* NOTREACHED */ +#endif + return (EINVAL); +} + +/* + * __db_check_txn -- + * Check for common transaction errors. + * + * PUBLIC: int __db_check_txn __P((DB *, DB_TXN *, DB_LOCKER *, int)); + */ +int +__db_check_txn(dbp, txn, assoc_locker, read_op) + DB *dbp; + DB_TXN *txn; + DB_LOCKER *assoc_locker; + int read_op; +{ + ENV *env; + int related, ret; + + env = dbp->env; + + /* + * If we are in recovery or aborting a transaction, then we + * don't need to enforce the rules about dbp's not allowing + * transactional operations in non-transactional dbps and + * vica-versa. This happens all the time as the dbp during + * an abort may be transactional, but we undo operations + * outside a transaction since we're aborting. + */ + if (IS_RECOVERING(env) || F_ISSET(dbp, DB_AM_RECOVER)) + return (0); + + /* + * Check for common transaction errors: + * an operation on a handle whose open commit hasn't completed. + * a transaction handle in a non-transactional environment + * a transaction handle for a non-transactional database + */ + if (!read_op && txn != NULL && F_ISSET(txn, TXN_READONLY)) { + __db_errx(env, DB_STR("0096", + "Read-only transaction cannot be used for an update")); + return (EINVAL); + } else if (txn == NULL || F_ISSET(txn, TXN_PRIVATE)) { + if (dbp->cur_locker != NULL && + dbp->cur_locker->id >= TXN_MINIMUM) + goto open_err; + + if (!read_op && F_ISSET(dbp, DB_AM_TXN)) { + __db_errx(env, DB_STR("0097", + "Transaction not specified for a transactional database")); + return (EINVAL); + } + } else if (F_ISSET(txn, TXN_FAMILY)) { + /* + * Family transaction handles can be passed to any method, + * since they only determine locker IDs. + */ + return (0); + } else { + if (!TXN_ON(env)) + return (__db_not_txn_env(env)); + + if (!F_ISSET(dbp, DB_AM_TXN)) { + __db_errx(env, DB_STR("0098", + "Transaction specified for a non-transactional database")); + return (EINVAL); + } + + if (F_ISSET(txn, TXN_DEADLOCK)) + return (__db_txn_deadlock_err(env, txn)); + + if (dbp->cur_locker != NULL && + dbp->cur_locker->id >= TXN_MINIMUM && + dbp->cur_locker->id != txn->txnid) { + if ((ret = __lock_locker_same_family(env, + dbp->cur_locker, txn->locker, &related)) != 0) + return (ret); + if (!related) + goto open_err; + } + } + + /* + * If dbp->associate_locker is not NULL, that means we're in + * the middle of a DB->associate with DB_CREATE (i.e., a secondary index + * creation). + * + * In addition to the usual transaction rules, we need to lock out + * non-transactional updates that aren't part of the associate (and + * thus are using some other locker ID). + * + * Transactional updates should simply block; from the time we + * decide to build the secondary until commit, we'll hold a write + * lock on all of its pages, so it should be safe to attempt to update + * the secondary in another transaction (presumably by updating the + * primary). + */ + if (!read_op && dbp->associate_locker != NULL && + txn != NULL && dbp->associate_locker != assoc_locker) { + __db_errx(env, DB_STR("0099", + "Operation forbidden while secondary index is being created")); + return (EINVAL); + } + + /* + * Check the txn and dbp are from the same env. + */ + if (txn != NULL && env != txn->mgrp->env) { + __db_errx(env, DB_STR("0100", + "Transaction and database from different environments")); + return (EINVAL); + } + + return (0); +open_err: + __db_errx(env, DB_STR("0101", + "Transaction that opened the DB handle is still active")); + return (EINVAL); +} + +/* + * __db_txn_deadlock_err -- + * Transaction has allready been deadlocked. + * + * PUBLIC: int __db_txn_deadlock_err __P((ENV *, DB_TXN *)); + */ +int +__db_txn_deadlock_err(env, txn) + ENV *env; + DB_TXN *txn; +{ + const char *name; + + name = NULL; + (void)__txn_get_name(txn, &name); + + __db_errx(env, DB_STR_A("0102", + "%s%sprevious transaction deadlock return not resolved", + "%s %s"), name == NULL ? "" : name, name == NULL ? "" : ": "); + + return (EINVAL); +} + +/* + * __db_not_txn_env -- + * DB handle must be in an environment that supports transactions. + * + * PUBLIC: int __db_not_txn_env __P((ENV *)); + */ +int +__db_not_txn_env(env) + ENV *env; +{ + __db_errx(env, DB_STR("0103", + "DB environment not configured for transactions")); + return (EINVAL); +} + +/* + * __db_rec_toobig -- + * Fixed record length exceeded error message. + * + * PUBLIC: int __db_rec_toobig __P((ENV *, u_int32_t, u_int32_t)); + */ +int +__db_rec_toobig(env, data_len, fixed_rec_len) + ENV *env; + u_int32_t data_len, fixed_rec_len; +{ + __db_errx(env, DB_STR_A("0104", + "%lu larger than database's maximum record length %lu", + "%lu %lu"), (u_long)data_len, (u_long)fixed_rec_len); + return (EINVAL); +} + +/* + * __db_rec_repl -- + * Fixed record replacement length error message. + * + * PUBLIC: int __db_rec_repl __P((ENV *, u_int32_t, u_int32_t)); + */ +int +__db_rec_repl(env, data_size, data_dlen) + ENV *env; + u_int32_t data_size, data_dlen; +{ + __db_errx(env, DB_STR_A("0105", + "Record length error: " + "replacement length %lu differs from replaced length %lu", + "%lu %lu"), (u_long)data_size, (u_long)data_dlen); + return (EINVAL); +} + +#if defined(DIAGNOSTIC) || defined(DEBUG_ROP) || defined(DEBUG_WOP) +/* + * __dbc_logging -- + * In DIAGNOSTIC mode, check for bad replication combinations. + * + * PUBLIC: int __dbc_logging __P((DBC *)); + */ +int +__dbc_logging(dbc) + DBC *dbc; +{ + DB_REP *db_rep; + ENV *env; + int ret; + + env = dbc->env; + db_rep = env->rep_handle; + + ret = LOGGING_ON(env) && + !F_ISSET(dbc, DBC_RECOVER) && !IS_REP_CLIENT(env); + + /* + * If we're not using replication or running recovery, return. + */ + if (db_rep == NULL || F_ISSET(dbc, DBC_RECOVER)) + return (ret); + +#ifndef DEBUG_ROP + /* + * Only check when DEBUG_ROP is not configured. People often do + * non-transactional reads, and debug_rop is going to write + * a log record. + */ + { + REP *rep; + + rep = db_rep->region; + + /* + * If we're a client and not running recovery or non durably, error. + */ + if (IS_REP_CLIENT(env) && !F_ISSET(dbc->dbp, DB_AM_NOT_DURABLE)) { + __db_errx(env, DB_STR("0106", + "dbc_logging: Client update")); + goto err; + } + +#ifndef DEBUG_WOP + /* + * If DEBUG_WOP is enabled, then we'll generate debugging log records + * that are non-transactional. This is OK. + */ + if (IS_REP_MASTER(env) && + dbc->txn == NULL && !F_ISSET(dbc->dbp, DB_AM_NOT_DURABLE)) { + __db_errx(env, DB_STR("0107", + "Dbc_logging: Master non-txn update")); + goto err; + } +#endif + + if (0) { +err: __db_errx(env, DB_STR_A("0108", "Rep: flags 0x%lx msg_th %lu", + "%lx %lu"), (u_long)rep->flags, (u_long)rep->msg_th); + __db_errx(env, DB_STR_A("0109", "Rep: handle %lu, opcnt %lu", + "%lu %lu"), (u_long)rep->handle_cnt, (u_long)rep->op_cnt); + __os_abort(env); + /* NOTREACHED */ + } + } +#endif + return (ret); +} +#endif + +/* + * __db_check_lsn -- + * Display the log sequence error message. + * + * PUBLIC: int __db_check_lsn __P((ENV *, DB_LSN *, DB_LSN *)); + */ +int +__db_check_lsn(env, lsn, prev) + ENV *env; + DB_LSN *lsn, *prev; +{ + __db_errx(env, DB_STR_A("0110", + "Log sequence error: page LSN %lu %lu; previous LSN %lu %lu", + "%lu %lu %lu %lu"), (u_long)(lsn)->file, + (u_long)(lsn)->offset, (u_long)(prev)->file, + (u_long)(prev)->offset); + return (EINVAL); +} + +/* + * __db_rdonly -- + * Common readonly message. + * PUBLIC: int __db_rdonly __P((const ENV *, const char *)); + */ +int +__db_rdonly(env, name) + const ENV *env; + const char *name; +{ + __db_errx(env, DB_STR_A("0111", + "%s: attempt to modify a read-only database", "%s"), name); + return (EACCES); +} + +/* + * __db_space_err -- + * Common out of space message. + * PUBLIC: int __db_space_err __P((const DB *)); + */ +int +__db_space_err(dbp) + const DB *dbp; +{ + __db_errx(dbp->env, DB_STR_A("0112", + "%s: file limited to %lu pages", "%s %lu"), + dbp->fname, (u_long)dbp->mpf->mfp->maxpgno); + return (ENOSPC); +} + +/* + * __db_failed -- + * Common failed thread message. + * + * PUBLIC: int __db_failed __P((const ENV *, + * PUBLIC: const char *, pid_t, db_threadid_t)); + */ +int +__db_failed(env, msg, pid, tid) + const ENV *env; + const char *msg; + pid_t pid; + db_threadid_t tid; +{ + DB_ENV *dbenv; + char buf[DB_THREADID_STRLEN]; + + dbenv = env->dbenv; + + __db_errx(env, DB_STR_A("0113", "Thread/process %s failed: %s", + "%s %s"), dbenv->thread_id_string(dbenv, pid, tid, buf), msg); + return (DB_RUNRECOVERY); +} diff --git a/src/common/db_getlong.c b/src/common/db_getlong.c new file mode 100644 index 00000000..e1ae09e6 --- /dev/null +++ b/src/common/db_getlong.c @@ -0,0 +1,146 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __db_getlong -- + * Return a long value inside of basic parameters. + * + * PUBLIC: int __db_getlong + * PUBLIC: __P((DB_ENV *, const char *, char *, long, long, long *)); + */ +int +__db_getlong(dbenv, progname, p, min, max, storep) + DB_ENV *dbenv; + const char *progname; + char *p; + long min, max, *storep; +{ + long val; + char *end; + + __os_set_errno(0); + val = strtol(p, &end, 10); + if ((val == LONG_MIN || val == LONG_MAX) && + __os_get_errno() == ERANGE) { + if (dbenv == NULL) + fprintf(stderr, "%s: %s: %s\n", + progname, p, strerror(ERANGE)); + else + dbenv->err(dbenv, ERANGE, "%s", p); + return (ERANGE); + } + if (p[0] == '\0' || (end[0] != '\0' && end[0] != '\n')) { + if (dbenv == NULL) + fprintf(stderr, DB_STR_A("0042", + "%s: %s: Invalid numeric argument\n", + "%s %s\n"), progname, p); + else + dbenv->errx(dbenv, DB_STR_A("0043", + "%s: Invalid numeric argument", "%s"), p); + return (EINVAL); + } + if (val < min) { + if (dbenv == NULL) + fprintf(stderr, DB_STR_A("0044", + "%s: %s: Less than minimum value (%ld)\n", + "%s %s %ld\n"), progname, p, min); + else + dbenv->errx(dbenv, DB_STR_A("0045", + "%s: Less than minimum value (%ld)", + "%s %ld"), p, min); + return (ERANGE); + } + if (val > max) { + if (dbenv == NULL) + fprintf(stderr, DB_STR_A("0046", + "%s: %s: Greater than maximum value (%ld)\n", + "%s %s %ld\n"), progname, p, max); + else + dbenv->errx(dbenv, DB_STR_A("0047", + "%s: Greater than maximum value (%ld)", + "%s %ld"), p, max); + return (ERANGE); + } + *storep = val; + return (0); +} + +/* + * __db_getulong -- + * Return an unsigned long value inside of basic parameters. + * + * PUBLIC: int __db_getulong + * PUBLIC: __P((DB_ENV *, const char *, char *, u_long, u_long, u_long *)); + */ +int +__db_getulong(dbenv, progname, p, min, max, storep) + DB_ENV *dbenv; + const char *progname; + char *p; + u_long min, max, *storep; +{ + u_long val; + char *end; + + __os_set_errno(0); + val = strtoul(p, &end, 10); + if (val == ULONG_MAX && __os_get_errno() == ERANGE) { + if (dbenv == NULL) + fprintf(stderr, "%s: %s: %s\n", + progname, p, strerror(ERANGE)); + else + dbenv->err(dbenv, ERANGE, "%s", p); + return (ERANGE); + } + if (p[0] == '\0' || (end[0] != '\0' && end[0] != '\n')) { + if (dbenv == NULL) + fprintf(stderr, DB_STR_A("0048", + "%s: %s: Invalid numeric argument\n", + "%s %s\n"), progname, p); + else + dbenv->errx(dbenv, DB_STR_A("0049", + "%s: Invalid numeric argument", + "%s"), p); + return (EINVAL); + } + if (val < min) { + if (dbenv == NULL) + fprintf(stderr, DB_STR_A("0050", + "%s: %s: Less than minimum value (%lu)\n", + "%s %s %lu\n"), progname, p, min); + else + dbenv->errx(dbenv, DB_STR_A("0051", + "%s: Less than minimum value (%lu)", + "%s %lu"), p, min); + return (ERANGE); + } + + /* + * We allow a 0 to substitute as a max value for ULONG_MAX because + * 1) accepting only a 0 value is unlikely to be necessary, and 2) + * we don't want callers to have to use ULONG_MAX explicitly, as it + * may not exist on all platforms. + */ + if (max != 0 && val > max) { + if (dbenv == NULL) + fprintf(stderr, DB_STR_A("0052", + "%s: %s: Greater than maximum value (%lu)\n", + "%s %s %lu\n"), progname, p, max); + else + dbenv->errx(dbenv, DB_STR_A("0053", + "%s: Greater than maximum value (%lu)", + "%s %lu"), p, max); + return (ERANGE); + } + *storep = val; + return (0); +} diff --git a/src/common/db_idspace.c b/src/common/db_idspace.c new file mode 100644 index 00000000..a04deeb7 --- /dev/null +++ b/src/common/db_idspace.c @@ -0,0 +1,85 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +static int __db_idcmp __P((const void *, const void *)); + +static int +__db_idcmp(a, b) + const void *a; + const void *b; +{ + u_int32_t i, j; + + i = *(u_int32_t *)a; + j = *(u_int32_t *)b; + + if (i < j) + return (-1); + else if (i > j) + return (1); + else + return (0); +} + +/* + * __db_idspace -- + * + * On input, minp and maxp contain the minimum and maximum valid values for + * the name space and on return, they contain the minimum and maximum ids + * available (by finding the biggest gap). The minimum can be an inuse + * value, but the maximum cannot be. + * + * PUBLIC: void __db_idspace __P((u_int32_t *, int, u_int32_t *, u_int32_t *)); + */ +void +__db_idspace(inuse, n, minp, maxp) + u_int32_t *inuse; + int n; + u_int32_t *minp, *maxp; +{ + int i, low; + u_int32_t gap, t; + + /* A single locker ID is a special case. */ + if (n == 1) { + /* + * If the single item in use is the last one in the range, + * then we've got to perform wrap which means that we set + * the min to the minimum ID, which is what we came in with, + * so we don't do anything. + */ + if (inuse[0] != *maxp) + *minp = inuse[0]; + *maxp = inuse[0] - 1; + return; + } + + gap = 0; + low = 0; + qsort(inuse, (size_t)n, sizeof(u_int32_t), __db_idcmp); + for (i = 0; i < n - 1; i++) + if ((t = (inuse[i + 1] - inuse[i])) > gap) { + gap = t; + low = i; + } + + /* Check for largest gap at the end of the space. */ + if ((*maxp - inuse[n - 1]) + (inuse[0] - *minp) > gap) { + /* Do same check as we do in the n == 1 case. */ + if (inuse[n - 1] != *maxp) + *minp = inuse[n - 1]; + *maxp = inuse[0] - 1; + } else { + *minp = inuse[low]; + *maxp = inuse[low + 1] - 1; + } +} diff --git a/src/common/db_log2.c b/src/common/db_log2.c new file mode 100644 index 00000000..1edd8af6 --- /dev/null +++ b/src/common/db_log2.c @@ -0,0 +1,57 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * PUBLIC: u_int32_t __db_log2 __P((u_int32_t)); + */ +u_int32_t +__db_log2(num) + u_int32_t num; +{ + u_int32_t i, limit; + + limit = 1; + for (i = 0; limit < num; limit = limit << 1) + ++i; + return (i); +} diff --git a/src/common/db_shash.c b/src/common/db_shash.c new file mode 100644 index 00000000..f79e7023 --- /dev/null +++ b/src/common/db_shash.c @@ -0,0 +1,104 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __db_tablesize -- + * Choose a size for the hash table. + * + * PUBLIC: u_int32_t __db_tablesize __P((u_int32_t)); + */ +u_int32_t +__db_tablesize(n_buckets) + u_int32_t n_buckets; +{ + /* + * We try to be clever about how big we make the hash tables. Use a + * prime number close to the "suggested" number of elements that will + * be in the hash table. Use 32 as the minimum hash table size. + * + * Ref: Sedgewick, Algorithms in C, "Hash Functions" + * + * Up to ~250,000 buckets, we use powers of 2. After that, we slow + * the rate of increase by half. For each choice, we then use a + * nearby prime number as the hash value. + * + * If a terabyte is the maximum cache we'll see, and we assume there + * are 10 1K buckets on each hash chain, then 107374182 is the maximum + * number of buckets we'll ever need. + * + * We don't use the obvious static data structure because some C + * compilers (and I use the term loosely), can't handle them. + */ +#define HASH_SIZE(power, prime) { \ + if ((power) >= n_buckets) \ + return (prime); \ +} + HASH_SIZE(32, 37); /* 2^5 */ + HASH_SIZE(64, 67); /* 2^6 */ + HASH_SIZE(128, 131); /* 2^7 */ + HASH_SIZE(256, 257); /* 2^8 */ + HASH_SIZE(512, 521); /* 2^9 */ + HASH_SIZE(1024, 1031); /* 2^10 */ + HASH_SIZE(2048, 2053); /* 2^11 */ + HASH_SIZE(4096, 4099); /* 2^12 */ + HASH_SIZE(8192, 8191); /* 2^13 */ + HASH_SIZE(16384, 16381); /* 2^14 */ + HASH_SIZE(32768, 32771); /* 2^15 */ + HASH_SIZE(65536, 65537); /* 2^16 */ + HASH_SIZE(131072, 131071); /* 2^17 */ + HASH_SIZE(262144, 262147); /* 2^18 */ + HASH_SIZE(393216, 393209); /* 2^18 + 2^18/2 */ + HASH_SIZE(524288, 524287); /* 2^19 */ + HASH_SIZE(786432, 786431); /* 2^19 + 2^19/2 */ + HASH_SIZE(1048576, 1048573); /* 2^20 */ + HASH_SIZE(1572864, 1572869); /* 2^20 + 2^20/2 */ + HASH_SIZE(2097152, 2097169); /* 2^21 */ + HASH_SIZE(3145728, 3145721); /* 2^21 + 2^21/2 */ + HASH_SIZE(4194304, 4194301); /* 2^22 */ + HASH_SIZE(6291456, 6291449); /* 2^22 + 2^22/2 */ + HASH_SIZE(8388608, 8388617); /* 2^23 */ + HASH_SIZE(12582912, 12582917); /* 2^23 + 2^23/2 */ + HASH_SIZE(16777216, 16777213); /* 2^24 */ + HASH_SIZE(25165824, 25165813); /* 2^24 + 2^24/2 */ + HASH_SIZE(33554432, 33554393); /* 2^25 */ + HASH_SIZE(50331648, 50331653); /* 2^25 + 2^25/2 */ + HASH_SIZE(67108864, 67108859); /* 2^26 */ + HASH_SIZE(100663296, 100663291); /* 2^26 + 2^26/2 */ + HASH_SIZE(134217728, 134217757); /* 2^27 */ + HASH_SIZE(201326592, 201326611); /* 2^27 + 2^27/2 */ + HASH_SIZE(268435456, 268435459); /* 2^28 */ + HASH_SIZE(402653184, 402653189); /* 2^28 + 2^28/2 */ + HASH_SIZE(536870912, 536870909); /* 2^29 */ + HASH_SIZE(805306368, 805306357); /* 2^29 + 2^29/2 */ + HASH_SIZE(1073741824, 1073741827); /* 2^30 */ + return (1073741827); +} + +/* + * __db_hashinit -- + * Initialize a hash table that resides in shared memory. + * + * PUBLIC: void __db_hashinit __P((void *, u_int32_t)); + */ +void +__db_hashinit(begin, nelements) + void *begin; + u_int32_t nelements; +{ + u_int32_t i; + SH_TAILQ_HEAD(hash_head) *headp; + + headp = (struct hash_head *)begin; + + for (i = 0; i < nelements; i++, headp++) + SH_TAILQ_INIT(headp); +} diff --git a/src/common/dbt.c b/src/common/dbt.c new file mode 100644 index 00000000..4ae1c5a9 --- /dev/null +++ b/src/common/dbt.c @@ -0,0 +1,74 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __dbt_usercopy -- + * Take a copy of the user's data, if a callback is supplied. + * + * PUBLIC: int __dbt_usercopy __P((ENV *, DBT *)); + */ +int +__dbt_usercopy(env, dbt) + ENV *env; + DBT *dbt; +{ + void *buf; + int ret; + + if (dbt == NULL || !F_ISSET(dbt, DB_DBT_USERCOPY) || dbt->size == 0 || + dbt->data != NULL) + return (0); + + buf = NULL; + if ((ret = __os_umalloc(env, dbt->size, &buf)) != 0 || + (ret = env->dbt_usercopy(dbt, 0, buf, dbt->size, + DB_USERCOPY_GETDATA)) != 0) + goto err; + dbt->data = buf; + + return (0); + +err: if (buf != NULL) { + __os_ufree(env, buf); + dbt->data = NULL; + } + + return (ret); +} + +/* + * __dbt_userfree -- + * Free a copy of the user's data, if necessary. + * + * PUBLIC: void __dbt_userfree __P((ENV *, DBT *, DBT *, DBT *)); + */ +void +__dbt_userfree(env, key, pkey, data) + ENV *env; + DBT *key, *pkey, *data; +{ + if (key != NULL && + F_ISSET(key, DB_DBT_USERCOPY) && key->data != NULL) { + __os_ufree(env, key->data); + key->data = NULL; + } + if (pkey != NULL && + F_ISSET(pkey, DB_DBT_USERCOPY) && pkey->data != NULL) { + __os_ufree(env, pkey->data); + pkey->data = NULL; + } + if (data != NULL && + F_ISSET(data, DB_DBT_USERCOPY) && data->data != NULL) { + __os_ufree(env, data->data); + data->data = NULL; + } +} diff --git a/src/common/mkpath.c b/src/common/mkpath.c new file mode 100644 index 00000000..575a584d --- /dev/null +++ b/src/common/mkpath.c @@ -0,0 +1,68 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __db_mkpath -- -- + * Create intermediate directories. + * + * PUBLIC: int __db_mkpath __P((ENV *, const char *)); + */ +int +__db_mkpath(env, name) + ENV *env; + const char *name; +{ + size_t len; + int ret; + char *p, *t, savech; + + /* + * Get a copy so we can modify the string. It's a path and potentially + * quite long, so don't allocate the space on the stack. + */ + len = strlen(name) + 1; + if ((ret = __os_malloc(env, len, &t)) != 0) + return (ret); + memcpy(t, name, len); + + /* + * Cycle through the path, creating intermediate directories. + * + * Skip the first byte if it's a path separator, it's the start of an + * absolute pathname. + */ + if (PATH_SEPARATOR[1] == '\0') { + for (p = t + 1; p[0] != '\0'; ++p) + if (p[0] == PATH_SEPARATOR[0]) { + savech = *p; + *p = '\0'; + if (__os_exists(env, t, NULL) && + (ret = __os_mkdir( + env, t, env->dir_mode)) != 0) + break; + *p = savech; + } + } else + for (p = t + 1; p[0] != '\0'; ++p) + if (strchr(PATH_SEPARATOR, p[0]) != NULL) { + savech = *p; + *p = '\0'; + if (__os_exists(env, t, NULL) && + (ret = __os_mkdir( + env, t, env->dir_mode)) != 0) + break; + *p = savech; + } + + __os_free(env, t); + return (ret); +} diff --git a/src/common/openflags.c b/src/common/openflags.c new file mode 100644 index 00000000..2c4d00e6 --- /dev/null +++ b/src/common/openflags.c @@ -0,0 +1,51 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __db_openflags -- + * Convert open(2) flags to DB flags. + * + * PUBLIC: u_int32_t __db_openflags __P((int)); + */ +u_int32_t +__db_openflags(oflags) + int oflags; +{ + u_int32_t dbflags; + + dbflags = 0; + + if (oflags & O_CREAT) + dbflags |= DB_CREATE; + + if (oflags & O_TRUNC) + dbflags |= DB_TRUNCATE; + + /* + * !!! + * Convert POSIX 1003.1 open(2) mode flags to DB flags. This isn't + * an exact science as few POSIX implementations have a flag value + * for O_RDONLY, it's simply the lack of a write flag. + */ +#ifndef O_ACCMODE +#define O_ACCMODE (O_RDONLY | O_RDWR | O_WRONLY) +#endif + switch (oflags & O_ACCMODE) { + case O_RDWR: + case O_WRONLY: + break; + default: + dbflags |= DB_RDONLY; + break; + } + return (dbflags); +} diff --git a/src/common/os_method.c b/src/common/os_method.c new file mode 100644 index 00000000..f0cfb4ff --- /dev/null +++ b/src/common/os_method.c @@ -0,0 +1,270 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * EXTERN: int db_env_set_func_assert + * EXTERN: __P((void (*)(const char *, const char *, int))); + */ +int +db_env_set_func_assert(func_assert) + void (*func_assert) __P((const char *, const char *, int)); +{ + DB_GLOBAL(j_assert) = func_assert; + return (0); +} + +/* + * EXTERN: int db_env_set_func_close __P((int (*)(int))); + */ +int +db_env_set_func_close(func_close) + int (*func_close) __P((int)); +{ + DB_GLOBAL(j_close) = func_close; + return (0); +} + +/* + * EXTERN: int db_env_set_func_dirfree __P((void (*)(char **, int))); + */ +int +db_env_set_func_dirfree(func_dirfree) + void (*func_dirfree) __P((char **, int)); +{ + DB_GLOBAL(j_dirfree) = func_dirfree; + return (0); +} + +/* + * EXTERN: int db_env_set_func_dirlist + * EXTERN: __P((int (*)(const char *, char ***, int *))); + */ +int +db_env_set_func_dirlist(func_dirlist) + int (*func_dirlist) __P((const char *, char ***, int *)); +{ + DB_GLOBAL(j_dirlist) = func_dirlist; + return (0); +} + +/* + * EXTERN: int db_env_set_func_exists __P((int (*)(const char *, int *))); + */ +int +db_env_set_func_exists(func_exists) + int (*func_exists) __P((const char *, int *)); +{ + DB_GLOBAL(j_exists) = func_exists; + return (0); +} + +/* + * EXTERN: int db_env_set_func_free __P((void (*)(void *))); + */ +int +db_env_set_func_free(func_free) + void (*func_free) __P((void *)); +{ + DB_GLOBAL(j_free) = func_free; + return (0); +} + +/* + * EXTERN: int db_env_set_func_fsync __P((int (*)(int))); + */ +int +db_env_set_func_fsync(func_fsync) + int (*func_fsync) __P((int)); +{ + DB_GLOBAL(j_fsync) = func_fsync; + return (0); +} + +/* + * EXTERN: int db_env_set_func_ftruncate __P((int (*)(int, off_t))); + */ +int +db_env_set_func_ftruncate(func_ftruncate) + int (*func_ftruncate) __P((int, off_t)); +{ + DB_GLOBAL(j_ftruncate) = func_ftruncate; + return (0); +} + +/* + * EXTERN: int db_env_set_func_ioinfo __P((int (*)(const char *, + * EXTERN: int, u_int32_t *, u_int32_t *, u_int32_t *))); + */ +int +db_env_set_func_ioinfo(func_ioinfo) + int (*func_ioinfo) + __P((const char *, int, u_int32_t *, u_int32_t *, u_int32_t *)); +{ + DB_GLOBAL(j_ioinfo) = func_ioinfo; + return (0); +} + +/* + * EXTERN: int db_env_set_func_malloc __P((void *(*)(size_t))); + */ +int +db_env_set_func_malloc(func_malloc) + void *(*func_malloc) __P((size_t)); +{ + DB_GLOBAL(j_malloc) = func_malloc; + return (0); +} + +/* + * EXTERN: int db_env_set_func_file_map + * EXTERN: __P((int (*)(DB_ENV *, char *, size_t, int, void **), + * EXTERN: int (*)(DB_ENV *, void *))); + */ +int +db_env_set_func_file_map(func_file_map, func_file_unmap) + int (*func_file_map) __P((DB_ENV *, char *, size_t, int, void **)); + int (*func_file_unmap) __P((DB_ENV *, void *)); +{ + DB_GLOBAL(j_file_map) = func_file_map; + DB_GLOBAL(j_file_unmap) = func_file_unmap; + return (0); +} + +/* + * EXTERN: int db_env_set_func_region_map + * EXTERN: __P((int (*)(DB_ENV *, char *, size_t, int *, void **), + * EXTERN: int (*)(DB_ENV *, void *))); + */ +int +db_env_set_func_region_map(func_region_map, func_region_unmap) + int (*func_region_map) __P((DB_ENV *, char *, size_t, int *, void **)); + int (*func_region_unmap) __P((DB_ENV *, void *)); +{ + DB_GLOBAL(j_region_map) = func_region_map; + DB_GLOBAL(j_region_unmap) = func_region_unmap; + return (0); +} + +/* + * EXTERN: int db_env_set_func_pread + * EXTERN: __P((ssize_t (*)(int, void *, size_t, off_t))); + */ +int +db_env_set_func_pread(func_pread) + ssize_t (*func_pread) __P((int, void *, size_t, off_t)); +{ + DB_GLOBAL(j_pread) = func_pread; + return (0); +} + +/* + * EXTERN: int db_env_set_func_pwrite + * EXTERN: __P((ssize_t (*)(int, const void *, size_t, off_t))); + */ +int +db_env_set_func_pwrite(func_pwrite) + ssize_t (*func_pwrite) __P((int, const void *, size_t, off_t)); +{ + DB_GLOBAL(j_pwrite) = func_pwrite; + return (0); +} + +/* + * EXTERN: int db_env_set_func_open __P((int (*)(const char *, int, ...))); + */ +int +db_env_set_func_open(func_open) + int (*func_open) __P((const char *, int, ...)); +{ + DB_GLOBAL(j_open) = func_open; + return (0); +} + +/* + * EXTERN: int db_env_set_func_read __P((ssize_t (*)(int, void *, size_t))); + */ +int +db_env_set_func_read(func_read) + ssize_t (*func_read) __P((int, void *, size_t)); +{ + DB_GLOBAL(j_read) = func_read; + return (0); +} + +/* + * EXTERN: int db_env_set_func_realloc __P((void *(*)(void *, size_t))); + */ +int +db_env_set_func_realloc(func_realloc) + void *(*func_realloc) __P((void *, size_t)); +{ + DB_GLOBAL(j_realloc) = func_realloc; + return (0); +} + +/* + * EXTERN: int db_env_set_func_rename + * EXTERN: __P((int (*)(const char *, const char *))); + */ +int +db_env_set_func_rename(func_rename) + int (*func_rename) __P((const char *, const char *)); +{ + DB_GLOBAL(j_rename) = func_rename; + return (0); +} + +/* + * EXTERN: int db_env_set_func_seek + * EXTERN: __P((int (*)(int, off_t, int))); + */ +int +db_env_set_func_seek(func_seek) + int (*func_seek) __P((int, off_t, int)); +{ + DB_GLOBAL(j_seek) = func_seek; + return (0); +} + +/* + * EXTERN: int db_env_set_func_unlink __P((int (*)(const char *))); + */ +int +db_env_set_func_unlink(func_unlink) + int (*func_unlink) __P((const char *)); +{ + DB_GLOBAL(j_unlink) = func_unlink; + return (0); +} + +/* + * EXTERN: int db_env_set_func_write + * EXTERN: __P((ssize_t (*)(int, const void *, size_t))); + */ +int +db_env_set_func_write(func_write) + ssize_t (*func_write) __P((int, const void *, size_t)); +{ + DB_GLOBAL(j_write) = func_write; + return (0); +} + +/* + * EXTERN: int db_env_set_func_yield __P((int (*)(u_long, u_long))); + */ +int +db_env_set_func_yield(func_yield) + int (*func_yield) __P((u_long, u_long)); +{ + DB_GLOBAL(j_yield) = func_yield; + return (0); +} diff --git a/src/common/util_arg.c b/src/common/util_arg.c new file mode 100644 index 00000000..92824b33 --- /dev/null +++ b/src/common/util_arg.c @@ -0,0 +1,56 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#if DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 5 +/* + * !!! + * We build this file in old versions of Berkeley DB when we're doing test + * runs using the test_micro tool. Without a prototype in place, we get + * warnings, and there's no simple workaround. + */ +char *strsep(); +#endif + +/* + * __db_util_arg -- + * Convert a string into an argc/argv pair. + * + * PUBLIC: int __db_util_arg __P((char *, char *, int *, char ***)); + */ +int +__db_util_arg(arg0, str, argcp, argvp) + char *arg0, *str, ***argvp; + int *argcp; +{ + int n, ret; + char **ap, **argv; + +#define MAXARGS 25 + if ((ret = + __os_malloc(NULL, (MAXARGS + 1) * sizeof(char **), &argv)) != 0) + return (ret); + + ap = argv; + *ap++ = arg0; + for (n = 1; (*ap = strsep(&str, " \t")) != NULL;) + if (**ap != '\0') { + ++ap; + if (++n == MAXARGS) + break; + } + *ap = NULL; + + *argcp = (int)(ap - argv); + *argvp = argv; + + return (0); +} diff --git a/src/common/util_cache.c b/src/common/util_cache.c new file mode 100644 index 00000000..5489e9cd --- /dev/null +++ b/src/common/util_cache.c @@ -0,0 +1,47 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __db_util_cache -- + * Compute if we have enough cache. + * + * PUBLIC: int __db_util_cache __P((DB *, u_int32_t *, int *)); + */ +int +__db_util_cache(dbp, cachep, resizep) + DB *dbp; + u_int32_t *cachep; + int *resizep; +{ + u_int32_t pgsize; + int ret; + + /* Get the current page size. */ + if ((ret = dbp->get_pagesize(dbp, &pgsize)) != 0) + return (ret); + + /* + * The current cache size is in cachep. If it's insufficient, set the + * the memory referenced by resizep to 1 and set cachep to the minimum + * size needed. + * + * Make sure our current cache is big enough. We want at least + * DB_MINPAGECACHE pages in the cache. + */ + if ((*cachep / pgsize) < DB_MINPAGECACHE) { + *resizep = 1; + *cachep = pgsize * DB_MINPAGECACHE; + } else + *resizep = 0; + + return (0); +} diff --git a/src/common/util_log.c b/src/common/util_log.c new file mode 100644 index 00000000..ed50c5f1 --- /dev/null +++ b/src/common/util_log.c @@ -0,0 +1,45 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __db_util_logset -- + * Log that we're running. + * + * PUBLIC: int __db_util_logset __P((const char *, char *)); + */ +int +__db_util_logset(progname, fname) + const char *progname; + char *fname; +{ + pid_t pid; + FILE *fp; + time_t now; + char time_buf[CTIME_BUFLEN]; + + if ((fp = fopen(fname, "w")) == NULL) + goto err; + + (void)time(&now); + + __os_id(NULL, &pid, NULL); + fprintf(fp, + "%s: %lu %s", progname, (u_long)pid, __os_ctime(&now, time_buf)); + + if (fclose(fp) == EOF) + goto err; + + return (0); + +err: fprintf(stderr, "%s: %s: %s\n", progname, fname, strerror(errno)); + return (1); +} diff --git a/src/common/util_sig.c b/src/common/util_sig.c new file mode 100644 index 00000000..6a71f4c1 --- /dev/null +++ b/src/common/util_sig.c @@ -0,0 +1,110 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +static int interrupt; +static void set_signal __P((int, int)); +static void signal_handler __P((int)); + +/* + * signal_handler -- + * Interrupt signal handler. + */ +static void +signal_handler(signo) + int signo; +{ +#ifndef HAVE_SIGACTION + /* Assume signal() is unreliable and reset it, first thing. */ + set_signal(signo, 0); +#endif + /* Some systems don't pass in the correct signal value -- check. */ + if ((interrupt = signo) == 0) + interrupt = SIGINT; +} + +/* + * set_signal + */ +static void +set_signal(s, is_dflt) + int s, is_dflt; +{ + /* + * Use sigaction if it's available, otherwise use signal(). + */ +#ifdef HAVE_SIGACTION + struct sigaction sa, osa; + + sa.sa_handler = is_dflt ? SIG_DFL : signal_handler; + (void)sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + (void)sigaction(s, &sa, &osa); +#else + (void)signal(s, is_dflt ? SIG_DFL : signal_handler); +#endif +} + +/* + * __db_util_siginit -- + * + * PUBLIC: void __db_util_siginit __P((void)); + */ +void +__db_util_siginit() +{ + /* + * Initialize the set of signals for which we want to clean up. + * Generally, we try not to leave the shared regions locked if + * we can. + */ +#ifdef SIGHUP + set_signal(SIGHUP, 0); +#endif +#ifdef SIGINT + set_signal(SIGINT, 0); +#endif +#ifdef SIGPIPE + set_signal(SIGPIPE, 0); +#endif +#ifdef SIGTERM + set_signal(SIGTERM, 0); +#endif +} + +/* + * __db_util_interrupted -- + * Return if interrupted. + * + * PUBLIC: int __db_util_interrupted __P((void)); + */ +int +__db_util_interrupted() +{ + return (interrupt != 0); +} + +/* + * __db_util_sigresend -- + * + * PUBLIC: void __db_util_sigresend __P((void)); + */ +void +__db_util_sigresend() +{ + /* Resend any caught signal. */ + if (interrupt != 0) { + set_signal(interrupt, 1); + + (void)raise(interrupt); + /* NOTREACHED */ + } +} diff --git a/src/common/zerofill.c b/src/common/zerofill.c new file mode 100644 index 00000000..11bb018f --- /dev/null +++ b/src/common/zerofill.c @@ -0,0 +1,129 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __db_zero_fill -- + * Zero out bytes in the file. + * + * Pages allocated by writing pages past end-of-file are not zeroed, + * on some systems. Recovery could theoretically be fooled by a page + * showing up that contained garbage. In order to avoid this, we + * have to write the pages out to disk, and flush them. The reason + * for the flush is because if we don't sync, the allocation of another + * page subsequent to this one might reach the disk first, and if we + * crashed at the right moment, leave us with this page as the one + * allocated by writing a page past it in the file. + * + * PUBLIC: int __db_zero_fill __P((ENV *, DB_FH *)); + */ +int +__db_zero_fill(env, fhp) + ENV *env; + DB_FH *fhp; +{ +#ifdef HAVE_FILESYSTEM_NOTZERO + off_t stat_offset, write_offset; + size_t blen, nw; + u_int32_t bytes, mbytes; + int group_sync, ret; + u_int8_t *bp; + + /* Calculate the byte offset of the next write. */ + write_offset = (off_t)fhp->pgno * fhp->pgsize + fhp->offset; + + /* Stat the file. */ + if ((ret = __os_ioinfo(env, NULL, fhp, &mbytes, &bytes, NULL)) != 0) + return (ret); + stat_offset = (off_t)mbytes * MEGABYTE + bytes; + + /* Check if the file is large enough. */ + if (stat_offset >= write_offset) + return (0); + + /* Get a large buffer if we're writing lots of data. */ +#undef ZF_LARGE_WRITE +#define ZF_LARGE_WRITE (64 * 1024) + if ((ret = __os_calloc(env, 1, ZF_LARGE_WRITE, &bp)) != 0) + return (ret); + blen = ZF_LARGE_WRITE; + + /* Seek to the current end of the file. */ + if ((ret = __os_seek(env, fhp, mbytes, MEGABYTE, bytes)) != 0) + goto err; + + /* + * Hash is the only access method that allocates groups of pages. Hash + * uses the existence of the last page in a group to signify the entire + * group is OK; so, write all the pages but the last one in the group, + * flush them to disk, then write the last one to disk and flush it. + */ + for (group_sync = 0; stat_offset < write_offset; group_sync = 1) { + if (write_offset - stat_offset <= (off_t)blen) { + blen = (size_t)(write_offset - stat_offset); + if (group_sync && (ret = __os_fsync(env, fhp)) != 0) + goto err; + } + if ((ret = __os_physwrite(env, fhp, bp, blen, &nw)) != 0) + goto err; + stat_offset += blen; + } + if ((ret = __os_fsync(env, fhp)) != 0) + goto err; + + /* Seek back to where we started. */ + mbytes = (u_int32_t)(write_offset / MEGABYTE); + bytes = (u_int32_t)(write_offset % MEGABYTE); + ret = __os_seek(env, fhp, mbytes, MEGABYTE, bytes); + +err: __os_free(env, bp); + return (ret); +#else + COMPQUIET(env, NULL); + COMPQUIET(fhp, NULL); + return (0); +#endif /* HAVE_FILESYSTEM_NOTZERO */ +} + +/* + * __db_zero -- + * Zero to the end of the file. + * + * PUBLIC: int __db_zero_extend __P((ENV *, + * PUBLIC: DB_FH *, db_pgno_t, db_pgno_t, u_int32_t)); + */ +int +__db_zero_extend(env, fhp, pgno, last_pgno, pgsize) + ENV *env; + DB_FH *fhp; + db_pgno_t pgno, last_pgno; + u_int32_t pgsize; +{ + int ret; + size_t nwrote; + u_int8_t *buf; + + if ((ret = __os_calloc(env, 1, pgsize, &buf)) != 0) + return (ret); + memset(buf, 0, pgsize); + for (; pgno <= last_pgno; pgno++) + if ((ret = __os_io(env, DB_IO_WRITE, + fhp, pgno, pgsize, 0, pgsize, buf, &nwrote)) != 0) { + if (ret == 0) { + ret = EIO; + goto err; + } + goto err; + } + +err: __os_free(env, buf); + return (ret); +} diff --git a/src/crypto/aes_method.c b/src/crypto/aes_method.c new file mode 100644 index 00000000..22dfa823 --- /dev/null +++ b/src/crypto/aes_method.c @@ -0,0 +1,357 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * Some parts of this code originally written by Adam Stubblefield, + * -- astubble@rice.edu. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/hmac.h" + +#ifdef HAVE_CRYPTO_IPP +#include +#endif + +static void __aes_err __P((ENV *, int)); +static int __aes_derivekeys __P((ENV *, DB_CIPHER *, u_int8_t *, size_t)); + +/* + * __aes_setup -- + * Setup AES functions. + * + * PUBLIC: int __aes_setup __P((ENV *, DB_CIPHER *)); + */ +int +__aes_setup(env, db_cipher) + ENV *env; + DB_CIPHER *db_cipher; +{ + AES_CIPHER *aes_cipher; + int ret; +#ifdef HAVE_CRYPTO_IPP + int ctx_size = 0; + IppStatus ipp_ret; +#endif + + db_cipher->adj_size = __aes_adj_size; + db_cipher->close = __aes_close; + db_cipher->decrypt = __aes_decrypt; + db_cipher->encrypt = __aes_encrypt; + db_cipher->init = __aes_init; + if ((ret = __os_calloc(env, 1, sizeof(AES_CIPHER), &aes_cipher)) != 0) + return (ret); +#ifdef HAVE_CRYPTO_IPP + /* + * IPP AES encryption context size can only be obtained through this + * function call, cannot directly declare IppsRijndael128Spec within + * AES_CIPHER struct. + */ + if ((ipp_ret = ippsRijndael128GetSize(&ctx_size)) != ippStsNoErr) { + __aes_err(env, (int)ipp_ret); + return (EAGAIN); + } + if ((ret = __os_malloc(env, ctx_size, &aes_cipher->ipp_ctx)) != 0) { + __os_free(env, aes_cipher); + return (ret); + } +#endif + db_cipher->data = aes_cipher; + return (0); +} + +/* + * __aes_adj_size -- + * Given a size, return an addition amount needed to meet the + * "chunk" needs of the algorithm. + * + * PUBLIC: u_int __aes_adj_size __P((size_t)); + */ +u_int +__aes_adj_size(len) + size_t len; +{ + if (len % DB_AES_CHUNK == 0) + return (0); + return (DB_AES_CHUNK - (u_int)(len % DB_AES_CHUNK)); +} + +/* + * __aes_close -- + * Destroy the AES encryption instantiation. + * + * PUBLIC: int __aes_close __P((ENV *, void *)); + */ +int +__aes_close(env, data) + ENV *env; + void *data; +{ +#ifdef HAVE_CRYPTO_IPP + AES_CIPHER *aes_cipher = (AES_CIPHER *)data; + __os_free(env, aes_cipher->ipp_ctx); +#endif + __os_free(env, data); + return (0); +} + +/* + * __aes_decrypt -- + * Decrypt data with AES. + * + * PUBLIC: int __aes_decrypt __P((ENV *, void *, void *, + * PUBLIC: u_int8_t *, size_t)); + */ +int +__aes_decrypt(env, aes_data, iv, cipher, cipher_len) + ENV *env; + void *aes_data; + void *iv; + u_int8_t *cipher; + size_t cipher_len; +{ + AES_CIPHER *aes; +#ifdef HAVE_CRYPTO_IPP + IppStatus ipp_ret; +#else + cipherInstance c; +#endif + int ret; + + aes = (AES_CIPHER *)aes_data; + if (iv == NULL || cipher == NULL) + return (EINVAL); + if ((cipher_len % DB_AES_CHUNK) != 0) + return (EINVAL); + +#ifdef HAVE_CRYPTO_IPP + if ((ipp_ret = ippsRijndael128DecryptCBC((const Ipp8u *)cipher, + (Ipp8u *)cipher, cipher_len, (IppsRijndael128Spec *)aes->ipp_ctx, + (const Ipp8u *)iv, 0)) != ippStsNoErr) { + __aes_err(env, (int)ipp_ret); + return (EAGAIN); + } +#else + /* + * Initialize the cipher + */ + if ((ret = __db_cipherInit(&c, MODE_CBC, iv)) < 0) { + __aes_err(env, ret); + return (EAGAIN); + } + + /* Do the decryption */ + if ((ret = __db_blockDecrypt(&c, &aes->decrypt_ki, cipher, + cipher_len * 8, cipher)) < 0) { + __aes_err(env, ret); + return (EAGAIN); + } +#endif + return (0); +} + +/* + * __aes_encrypt -- + * Encrypt data with AES. + * + * PUBLIC: int __aes_encrypt __P((ENV *, void *, void *, + * PUBLIC: u_int8_t *, size_t)); + */ +int +__aes_encrypt(env, aes_data, iv, data, data_len) + ENV *env; + void *aes_data; + void *iv; + u_int8_t *data; + size_t data_len; +{ + AES_CIPHER *aes; +#ifdef HAVE_CRYPTO_IPP + IppStatus ipp_ret; +#else + cipherInstance c; +#endif + u_int32_t tmp_iv[DB_IV_BYTES/4]; + int ret; + + aes = (AES_CIPHER *)aes_data; + if (aes == NULL || data == NULL) + return (EINVAL); + if ((data_len % DB_AES_CHUNK) != 0) + return (EINVAL); + /* + * Generate the IV here. We store it in a tmp IV because + * the IV might be stored within the data we are encrypting + * and so we will copy it over to the given location after + * encryption is done. + * We don't do this outside of there because some encryption + * algorithms someone might add may not use IV's and we always + * want on here. + */ + if ((ret = __db_generate_iv(env, tmp_iv)) != 0) + return (ret); + +#ifdef HAVE_CRYPTO_IPP + if ((ipp_ret = ippsRijndael128EncryptCBC((const Ipp8u *)data, + (Ipp8u *)data, data_len, (IppsRijndael128Spec *)aes->ipp_ctx, + (const Ipp8u *)tmp_iv, 0)) != ippStsNoErr) { + __aes_err(env, (int)ipp_ret); + return (EAGAIN); + } +#else + /* + * Initialize the cipher + */ + if ((ret = __db_cipherInit(&c, MODE_CBC, (char *)tmp_iv)) < 0) { + __aes_err(env, ret); + return (EAGAIN); + } + + /* Do the encryption */ + if ((ret = __db_blockEncrypt(&c, &aes->encrypt_ki, data, data_len * 8, + data)) < 0) { + __aes_err(env, ret); + return (EAGAIN); + } +#endif + memcpy(iv, tmp_iv, DB_IV_BYTES); + return (0); +} + +/* + * __aes_init -- + * Initialize the AES encryption instantiation. + * + * PUBLIC: int __aes_init __P((ENV *, DB_CIPHER *)); + */ +int +__aes_init(env, db_cipher) + ENV *env; + DB_CIPHER *db_cipher; +{ + DB_ENV *dbenv; + + dbenv = env->dbenv; + + return (__aes_derivekeys( + env, db_cipher, (u_int8_t *)dbenv->passwd, dbenv->passwd_len)); +} + +static int +__aes_derivekeys(env, db_cipher, passwd, plen) + ENV *env; + DB_CIPHER *db_cipher; + u_int8_t *passwd; + size_t plen; +{ + AES_CIPHER *aes; + SHA1_CTX ctx; +#ifdef HAVE_CRYPTO_IPP + IppStatus ipp_ret; +#else + int ret; +#endif + u_int32_t temp[DB_MAC_KEY/4]; + + if (passwd == NULL) + return (EINVAL); + + aes = (AES_CIPHER *)db_cipher->data; + + /* Derive the crypto keys */ + __db_SHA1Init(&ctx); + __db_SHA1Update(&ctx, passwd, plen); + __db_SHA1Update(&ctx, (u_int8_t *)DB_ENC_MAGIC, strlen(DB_ENC_MAGIC)); + __db_SHA1Update(&ctx, passwd, plen); + __db_SHA1Final((u_int8_t *)temp, &ctx); + +#ifdef HAVE_CRYPTO_IPP + if ((ipp_ret = ippsRijndael128Init((const Ipp8u *)temp, + IppsRijndaelKey128, (IppsRijndael128Spec *)aes->ipp_ctx)) + != ippStsNoErr) { + __aes_err(env, (int)ipp_ret); + return (EAGAIN); + } +#else + if ((ret = __db_makeKey(&aes->encrypt_ki, DIR_ENCRYPT, + DB_AES_KEYLEN, (char *)temp)) != TRUE) { + __aes_err(env, ret); + return (EAGAIN); + } + if ((ret = __db_makeKey(&aes->decrypt_ki, DIR_DECRYPT, + DB_AES_KEYLEN, (char *)temp)) != TRUE) { + __aes_err(env, ret); + return (EAGAIN); + } +#endif + return (0); +} + +/* + * __aes_err -- + * Handle AES-specific errors. Codes and messages derived from + * rijndael/rijndael-api-fst.h. + */ +static void +__aes_err(env, err) + ENV *env; + int err; +{ + char *errstr; + + switch (err) { +#ifdef HAVE_CRYPTO_IPP + case ippStsNullPtrErr: + errstr = DB_STR("0182", "IPP AES NULL pointer error"); + break; + case ippStsLengthErr: + errstr = DB_STR("0183", "IPP AES length error"); + break; + case ippStsContextMatchErr: + errstr = DB_STR("0184", + "IPP AES context does not match operation"); + break; + case ippStsUnderRunErr: + errstr = DB_STR("0185", "IPP AES srclen size error"); + break; +#else + case BAD_KEY_DIR: + errstr = DB_STR("0186", "AES key direction is invalid"); + break; + case BAD_KEY_MAT: + errstr = DB_STR("0187", + "AES key material not of correct length"); + break; + case BAD_KEY_INSTANCE: + errstr = DB_STR("0188", "AES key passwd not valid"); + break; + case BAD_CIPHER_MODE: + errstr = DB_STR("0189", + "AES cipher in wrong state (not initialized)"); + break; + case BAD_BLOCK_LENGTH: + errstr = DB_STR("0190", "AES bad block length"); + break; + case BAD_CIPHER_INSTANCE: + errstr = DB_STR("0191", "AES cipher instance is invalid"); + break; + case BAD_DATA: + errstr = DB_STR("0192", "AES data contents are invalid"); + break; + case BAD_OTHER: + errstr = DB_STR("0193", "AES unknown error"); + break; +#endif + default: + errstr = DB_STR("0194", "AES error unrecognized"); + break; + } + __db_errx(env, "%s", errstr); + return; +} diff --git a/src/crypto/crypto.c b/src/crypto/crypto.c new file mode 100644 index 00000000..d4b771c0 --- /dev/null +++ b/src/crypto/crypto.c @@ -0,0 +1,411 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * Some parts of this code originally written by Adam Stubblefield + * -- astubble@rice.edu + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/crypto.h" + +/* + * __crypto_region_init -- + * Initialize crypto. + */ +int +__crypto_region_init(env) + ENV *env; +{ + CIPHER *cipher; + DB_CIPHER *db_cipher; + DB_ENV *dbenv; + REGENV *renv; + REGINFO *infop; + char *sh_passwd; + int ret; + + dbenv = env->dbenv; + infop = env->reginfo; + renv = infop->primary; + db_cipher = env->crypto_handle; + ret = 0; + + if (renv->cipher_off == INVALID_ROFF) { + if (!CRYPTO_ON(env)) + return (0); + if (!F_ISSET(infop, REGION_CREATE)) { + __db_errx(env, DB_STR("0172", + "Joining non-encrypted environment with encryption key")); + return (EINVAL); + } + if (F_ISSET(db_cipher, CIPHER_ANY)) { + __db_errx(env, DB_STR("0173", + "Encryption algorithm not supplied")); + return (EINVAL); + } + /* + * Must create the shared information. We need: Shared cipher + * information that contains the passwd. After we copy the + * passwd, we smash and free the one in the env. + */ + MUTEX_LOCK(env, renv->mtx_regenv); + if ((ret = __env_alloc(infop, sizeof(CIPHER), &cipher)) != 0) { + MUTEX_UNLOCK(env, renv->mtx_regenv); + return (ret); + } + memset(cipher, 0, sizeof(*cipher)); + if ((ret = + __env_alloc(infop, dbenv->passwd_len, &sh_passwd)) != 0) { + __env_alloc_free(infop, cipher); + MUTEX_UNLOCK(env, renv->mtx_regenv); + return (ret); + } + MUTEX_UNLOCK(env, renv->mtx_regenv); + memset(sh_passwd, 0, dbenv->passwd_len); + cipher->passwd = R_OFFSET(infop, sh_passwd); + cipher->passwd_len = dbenv->passwd_len; + cipher->flags = db_cipher->alg; + memcpy(sh_passwd, dbenv->passwd, cipher->passwd_len); + renv->cipher_off = R_OFFSET(infop, cipher); + } else { + if (!CRYPTO_ON(env)) { + __db_errx(env, DB_STR("0174", + "Encrypted environment: no encryption key supplied")); + return (EINVAL); + } + cipher = R_ADDR(infop, renv->cipher_off); + sh_passwd = R_ADDR(infop, cipher->passwd); + if ((cipher->passwd_len != dbenv->passwd_len) || + memcmp(dbenv->passwd, sh_passwd, cipher->passwd_len) != 0) { + __db_errx(env, DB_STR("0175", "Invalid password")); + return (EPERM); + } + if (!F_ISSET(db_cipher, CIPHER_ANY) && + db_cipher->alg != cipher->flags) { + __db_errx(env, DB_STR("0176", + "Environment encrypted using a different algorithm")); + return (EINVAL); + } + if (F_ISSET(db_cipher, CIPHER_ANY)) + /* + * We have CIPHER_ANY and we are joining the existing + * env. Setup our cipher structure for whatever + * algorithm this env has. + */ + if ((ret = __crypto_algsetup(env, db_cipher, + cipher->flags, 0)) != 0) + return (ret); + } + ret = db_cipher->init(env, db_cipher); + + /* + * On success, no matter if we allocated it or are using the already + * existing one, we are done with the passwd in the env. We smash + * N-1 bytes so that we don't overwrite the nul. + */ + memset(dbenv->passwd, 0xff, dbenv->passwd_len-1); + __os_free(env, dbenv->passwd); + dbenv->passwd = NULL; + dbenv->passwd_len = 0; + + return (ret); +} + +/* + * __crypto_env_close -- + * Crypto-specific destruction of ENV structure. + * + * PUBLIC: int __crypto_env_close __P((ENV *)); + */ +int +__crypto_env_close(env) + ENV *env; +{ + DB_CIPHER *db_cipher; + DB_ENV *dbenv; + int ret; + + dbenv = env->dbenv; + + if (dbenv->passwd != NULL) { + memset(dbenv->passwd, 0xff, dbenv->passwd_len-1); + __os_free(env, dbenv->passwd); + dbenv->passwd = NULL; + } + + if (!CRYPTO_ON(env)) + return (0); + + ret = 0; + db_cipher = env->crypto_handle; + if (!F_ISSET(db_cipher, CIPHER_ANY)) + ret = db_cipher->close(env, db_cipher->data); + __os_free(env, db_cipher); + + env->crypto_handle = NULL; + return (ret); +} + +/* + * __crypto_env_refresh -- + * Clean up after the crpto system on a close or failed open. + * + * PUBLIC: int __crypto_env_refresh __P((ENV *)); + */ +int +__crypto_env_refresh(env) + ENV *env; +{ + CIPHER *cipher; + REGENV *renv; + REGINFO *infop; + + /* + * If a private region, return the memory to the heap. Not needed for + * filesystem-backed or system shared memory regions, that memory isn't + * owned by any particular process. + */ + if (F_ISSET(env, ENV_PRIVATE)) { + infop = env->reginfo; + renv = infop->primary; + if (renv->cipher_off != INVALID_ROFF) { + cipher = R_ADDR(infop, renv->cipher_off); + MUTEX_LOCK(env, renv->mtx_regenv); + __env_alloc_free(infop, R_ADDR(infop, cipher->passwd)); + __env_alloc_free(infop, cipher); + MUTEX_UNLOCK(env, renv->mtx_regenv); + } + } + return (0); +} + +/* + * __crypto_algsetup -- + * Given a db_cipher structure and a valid algorithm flag, call + * the specific algorithm setup function. + * + * PUBLIC: int __crypto_algsetup __P((ENV *, DB_CIPHER *, u_int32_t, int)); + */ +int +__crypto_algsetup(env, db_cipher, alg, do_init) + ENV *env; + DB_CIPHER *db_cipher; + u_int32_t alg; + int do_init; +{ + int ret; + + ret = 0; + if (!CRYPTO_ON(env)) { + __db_errx(env, DB_STR("0177", + "No cipher structure given")); + return (EINVAL); + } + F_CLR(db_cipher, CIPHER_ANY); + switch (alg) { + case CIPHER_AES: + db_cipher->alg = CIPHER_AES; + ret = __aes_setup(env, db_cipher); + break; + default: + ret = __env_panic(env, EINVAL); + break; + } + if (ret == 0 && do_init) + ret = db_cipher->init(env, db_cipher); + return (ret); +} + +/* + * __crypto_decrypt_meta -- + * Perform decryption on a metapage if needed. + * + * PUBLIC: int __crypto_decrypt_meta __P((ENV *, DB *, u_int8_t *, int)); + */ +int +__crypto_decrypt_meta(env, dbp, mbuf, do_metachk) + ENV *env; + DB *dbp; + u_int8_t *mbuf; + int do_metachk; +{ + DB dummydb; + DBMETA *meta; + DB_CIPHER *db_cipher; + size_t pg_off; + int ret; + u_int8_t *iv; + + /* + * If we weren't given a dbp, we just want to decrypt the page on + * behalf of some internal subsystem, not on behalf of a user with + * a dbp. Therefore, set up a dummy dbp so that the call to + * P_OVERHEAD below works. + */ + if (dbp == NULL) { + memset(&dummydb, 0, sizeof(DB)); + dbp = &dummydb; + } + + ret = 0; + meta = (DBMETA *)mbuf; + + /* + * !!! + * We used an "unused" field in the meta-data page to flag whether or + * not the database is encrypted. Unfortunately, that unused field + * was used in Berkeley DB releases before 3.0 (for example, 2.7.7). + * It would have been OK, except encryption doesn't follow the usual + * rules of "upgrade before doing anything else", we check encryption + * before checking for old versions of the database. + * + * We don't have to check Btree databases -- before 3.0, the field of + * interest was the bt_maxkey field (which was never supported and has + * since been removed). + * + * Ugly check to jump out if this format is older than what we support. + * This works because we do not encrypt the page header. + */ + if (meta->magic == DB_HASHMAGIC && meta->version <= 5) + return (0); + + /* + * Meta-pages may be encrypted for DBMETASIZE bytes. If we have a + * non-zero IV (that is written after encryption) then we decrypt (or + * error if the user isn't set up for security). We guarantee that + * the IV space on non-encrypted pages will be zero and a zero-IV is + * illegal for encryption. Therefore any non-zero IV means an + * encrypted database. This basically checks the passwd on the file + * if we cannot find a good magic number. We walk through all the + * algorithms we know about attempting to decrypt (and possibly + * byteswap). + * + * !!! + * All method meta pages have the IV and checksum at the exact same + * location, but not in DBMETA, use BTMETA. + */ + if (meta->encrypt_alg != 0) { + db_cipher = env->crypto_handle; + if (!F_ISSET(dbp, DB_AM_ENCRYPT)) { + if (!CRYPTO_ON(env)) { + __db_errx(env, DB_STR("0178", + "Encrypted database: no encryption flag specified")); + return (EINVAL); + } + /* + * User has a correct, secure env, but has encountered + * a database in that env that is secure, but user + * didn't dbp->set_flags. Since it is existing, use + * encryption if it is that way already. + */ + F_SET(dbp, DB_AM_ENCRYPT|DB_AM_CHKSUM); + } + /* + * This was checked in set_flags when DB_AM_ENCRYPT was set. + * So it better still be true here. + */ + DB_ASSERT(env, CRYPTO_ON(env)); + if (!F_ISSET(db_cipher, CIPHER_ANY) && + meta->encrypt_alg != db_cipher->alg) { + __db_errx(env, DB_STR("0179", + "Database encrypted using a different algorithm")); + return (EINVAL); + } + DB_ASSERT(env, F_ISSET(dbp, DB_AM_CHKSUM)); + iv = ((BTMETA *)mbuf)->iv; + /* + * For ALL pages, we do not encrypt the beginning of the page + * that contains overhead information. This is true of meta + * and all other pages. + */ + pg_off = P_OVERHEAD(dbp); +alg_retry: + /* + * If they asked for a specific algorithm, then + * use it. Otherwise walk through those we know. + */ + if (!F_ISSET(db_cipher, CIPHER_ANY)) { + if (do_metachk && (ret = db_cipher->decrypt(env, + db_cipher->data, iv, mbuf + pg_off, + DBMETASIZE - pg_off))) + return (ret); + if (((BTMETA *)meta)->crypto_magic != + meta->magic) { + __db_errx(env, DB_STR("0180", + "Invalid password")); + return (EINVAL); + } + /* + * Success here. The algorithm asked for and the one + * on the file match. We've just decrypted the meta + * page and checked the magic numbers. They match, + * indicating the password is right. All is right + * with the world. + */ + return (0); + } + /* + * If we get here, CIPHER_ANY must be set. + */ + ret = __crypto_algsetup(env, db_cipher, meta->encrypt_alg, 1); + goto alg_retry; + } else if (F_ISSET(dbp, DB_AM_ENCRYPT)) { + /* + * They gave us a passwd, but the database is not encrypted. + * This is an error. We do NOT want to silently allow them + * to write data in the clear when the user set up and expects + * encrypted data. + * + * This covers at least the following scenario. + * 1. User creates and sets up an encrypted database. + * 2. Attacker cannot read the actual data in the database + * because it is encrypted, but can remove/replace the file + * with an empty, unencrypted database file. + * 3. User sets encryption and we get to this code now. + * If we allowed the file to be used in the clear since + * it is that way on disk, the user would unsuspectingly + * write sensitive data in the clear. + * 4. Attacker reads data that user thought was encrypted. + * + * Therefore, asking for encryption with a database that + * was not encrypted is an error. + */ + __db_errx(env, DB_STR("0181", + "Unencrypted database with a supplied encryption key")); + return (EINVAL); + } + return (ret); +} + +/* + * __crypto_set_passwd -- + * Get the password from the shared region; and set it in a new + * environment handle. Use this to duplicate environment handles. + * + * PUBLIC: int __crypto_set_passwd __P((ENV *, ENV *)); + */ +int +__crypto_set_passwd(env_src, env_dest) + ENV *env_src, *env_dest; +{ + CIPHER *cipher; + REGENV *renv; + REGINFO *infop; + char *sh_passwd; + + infop = env_src->reginfo; + renv = infop->primary; + + DB_ASSERT(env_src, CRYPTO_ON(env_src)); + + cipher = R_ADDR(infop, renv->cipher_off); + sh_passwd = R_ADDR(infop, cipher->passwd); + return (__env_set_encrypt(env_dest->dbenv, sh_passwd, DB_ENCRYPT_AES)); +} diff --git a/src/crypto/crypto.html b/src/crypto/crypto.html new file mode 100644 index 00000000..1a2dc0c1 --- /dev/null +++ b/src/crypto/crypto.html @@ -0,0 +1,638 @@ + + + + + + + + +

+

+ Security Interface for Berkeley DB

+ +
Susan LoVerso +
Rev 1.6 +
2002 Feb 26
+ +

We provide an interface allowing secure access to Berkeley DB.   +Our goal is to allow users to have encrypted secure databases.  In +this document, the term ciphering means the act of encryption or +decryption.  They are equal but opposite actions and the same issues +apply to both just in the opposite direction. +

+Requirements

+The overriding requirement is to provide a simple mechanism to allow users +to have a secure database.  A secure database means that all of the +pages of a database will be encrypted, and all of the log files will be +encrypted. +

Falling out from this work will be a simple mechanism to allow users +to request that we checksum their data for additional error detection (without +encryption/decryption). +

We expect that data in process memory or stored in shared memory, potentially +backed by disk, is not encrypted or secure. +

+DB Method Interface Modifications

+With a logging environment, all database changes are recorded in the log +files.  Therefore, users requiring secure databases in such environments +also require secure log files. +

A prior thought had been to allow different passwords on the environment +and the databases within.  However, such a scheme, then requires that +the password be logged in order for recovery to be able to restore the +database.  Therefore, any application having the password for the +log could get the password for any databases by reading the log.  +So having a different password on a database does not gain any additional +security and it makes certain things harder and more complex.  Some +of those more complex things include the need to handle database and env +passwords differently since they'd need to be stored and accessed from +different places.  Also resolving the issue of how db_checkpoint +or db_sync, which flush database pages to disk, would find the passwords +of various databases without any dbps was unsolved.  The feature didn't +gain anything and caused significant pain.  Therefore the decision +is that there will be a single password protecting an environment and all +the logs and some databases within that environment.  We do allow +users to have a secure environment and clear databases.  Users that +want secure databases within a secure environment must set a flag. +

Users wishing to enable encryption on a database in a secure environment +or enable just checksumming on their database pages will use new flags +to DB->set_flags().  +Providing ciphering over an entire environment is accomplished by adding +a single environment method: DBENV->set_encrypt().  +Providing encryption for a database (not part of an environment) is accomplished +by adding a new database method: DB->set_encrypt(). +

Both of the set_encrypt methods must be called before their respective +open calls.  The environment method must be before the environment +open because we must know about security before there is any possibility +of writing any log records out.  The database method must be before +the database open in order to read the root page.  The planned interfaces +for these methods are: +

DBENV->set_encrypt(DBENV *dbenv,        /* DB_ENV structure */
+                  char *passwd          /* Password */
+                  u_int32_t flags);     /* Flags */
+ +
DB->set_encrypt(DB *dbp,             /* DB structure */
+               char *passwd          /* Password */
+               u_int32_t flags);     /* Flags */
+The flags accepted by these functions are: +
#define DB_ENCRYPT_AES  0x00000001  /* Use the AES encryption algorithm */
+Passwords are NULL-terminated strings.  NULL or zero length strings +are illegal.  These flags enable the checksumming and encryption using +the particular algorithms we have chosen for this implementation.  +The flags are named such that there is a logical naming pattern if additional +checksum or encryption algorithms are used. If a user gives a flag of zero, +it will behave in a manner similar to DB_UNKNOWN. It will be illegal if +they are creating the environment or database, as an algorithm must be +specified. If they are joining an existing environment or opening an existing +database, they will use whatever algorithm is in force at the time.  +Using DB_ENCRYPT_AES automatically implies SHA1 checksumming. +

These functions will perform several initialization steps.  We +will allocate crypto_handle for our env handle and set up our function +pointers.  We will allocate space and copy the password into our env +handle password area.  Similar to DB->set_cachesize, calling +DB->set_encrypt +will actually reflect back into the local environment created by DB. +

Lastly, we will add a new flag, DB_OVERWRITE, to the DBENV->remove +method.  The purpose of this flag is to force all of the memory used +by the shared regions to be overwritten before removal.  We will use +rm_overwrite, +a function that overwrites and syncs a file 3 times with varying bit patterns +to really remove a file.  Additionally, this flag will force a sync +of the overwritten regions to disk, if the regions are backed by the file +system.  That way there is no residual information left in the clear +in memory or freed disk blocks.  Although we expect that this flag +will be used by customers using security, primarily, its action is not +dependent on passwords or a secure setup, and so can be used by anyone. +

+Initialization of the Environment

+The setup of the security subsystem will be similar to replication initialization +since it is a sort of subsystem, but it does not have its own region.  +When the environment handle is created via db_env_create, we initialize +our set_encrypt method to be the RPC or local version.  Therefore +the DB_ENV structure needs a new pointer: +
    void    *crypto_handle;   /* Security handle */
+The crypto handle will really point to a new __db_cipher structure +that will contain a set of functions and a pointer to the in-memory information +needed by the specific encryption algorithm.  It will look like: +
typedef struct __db_cipher {
+    int      (*init)__P((...));    /* Alg-specific initialization function */
+    int      (*encrypt)__P((...)); /* Alg-specific encryption algorithm */
+    int      (*decrypt)__P((...)); /* Alg-specific decryption function */
+    void      *data;               /* Pointer to alg-specific information (AES_CIPHER) */
+    u_int32_t flags;               /* Cipher flags */
+} DB_CIPHER;
+ +
#define DB_MAC_KEY    20    /* Size of the MAC key */
+typedef struct __aes_cipher {
+    keyInstance    encrypt_ki;   /* Encrypt keyInstance temp. */
+    keyInstance    decrypt_ki;   /* Decrypt keyInstance temp. */
+    u_int8_t       mac_key[DB_MAC_KEY]; /* MAC key */
+    u_int32_t      flags;        /* AES-specific flags */
+} AES_CIPHER;
+It should be noted that none of these structures have their own mutex.  +We hold the environment region locked while we are creating this, but once +this is set up, it is read-only forever. +

During dbenv->set_encrypt, +we set the encryption, decryption and checksumming methods to the appropriate +functions based on the flags.  This function will allocate us a crypto +handle that we store in the DB_ENV structure just like all the +other subsystems.  For now, only AES ciphering functions and SHA1 +checksumming functions are supported.  Also we will copy the password +into the DB_ENV structure.  We ultimately need to keep the +password in the environment's shared memory region or compare this one +against the one that is there, if we are joining an existing environment, +but we do not have it yet because open has not yet been called.  We +will allocate a structure that will be used in initialization and set up +the function pointers to point to the algorithm-specific functions. +

In the  __env_open path, in __db_e_attach, if we +are creating the region and the dbenv->passwd field is set, we need +to use the length of the password in the initial computation of the environment's +size.  This guarantees sufficient space for storing the password in +shared memory.  Then we will call a new function to initialize the +security region, __crypto_region_init in __env_open.  +If we are the creator, we will allocate space in the shared region to store +the password and copy the password into that space.  Or, if we are +not the creator we will compare the password stored in the dbenv with the +one in shared memory.   Additionally, we will compare the ciphering +algorithm to the one stored in the shared region.We'll smash the dbenv +password and free it.  If they do not match, we return an error.  +If we are the creator we store the offset into the REGENV structure.  +Then __crypto_region_init  will call the initialization function +set up earlier based on the ciphering algorithm specified.  For now +we will call __aes_init.  Additionally this function will allocate +and set up the per-process state vector for this encryption's IVs.  +See Generating the Initialization +Vector for a detailed description of the IV and state vector. +

In the AES-specific initialization function, __aes_init,  +we will initialize it by calling +__aes_derivekeys in order to fill +in the keyInstance and mac_key fields in that structure.  The REGENV +structure will have one additional item +

   roff_t         passwd_off;   /* Offset of passwd */
+ +

+Initializing a Database

+During db->set_encrypt, +we set the encryption, decryption and checksumming methods to the appropriate +functions based on the flags.  Basically, we test that we are not +in an existing environment and we haven't called open.  Then we just +call through the environment handle to set the password. +

Also, we will need to add a flag in the database meta-data page that +indicates that the database is encrypted and what its algorithm is.  +This will be used when the meta-page is read after reopening a file. We +need this information on the meta-page in order to detect a user opening +a secure database without a password.  I propose using the first unused1 +byte (renaming it too) in the meta page for this purpose. +

All pages will not be encrypted for the first 64 bytes of data.  +Database meta-pages will be encrypted on the first 512 bytes only.  +All meta-page types will have an IV and checksum added within the first +512 bytes as well as a crypto magic number.  This will expand the +size of the meta-page from 256 bytes to 512 bytes. The page in/out routines, +__db_pgin and __db_pgout know the page type of the page and +will apply the 512 bytes ciphering to meta pages.  In __db_pgout, +if we have a crypto handle in our (private) environment, we will apply +ciphering to either the entire page, or the first 512 bytes if it is a +meta-page.  In __db_pgin, we will decrypt if the page we have +a crypto handle. +

When multiple processes share a database, all must use the same password +as the database creator. Using an existing database requires several conditions +to be true.  First, if the creator of the database did not create +with security, then opening later with security is an error.  Second, +if the creator did create it with security, then opening later without +security is an error.  Third, we need to be able to test and check +that when another process opens a secure database that the password they +provided is the same as the one in use by the creator. +

When reading the meta-page, in __db_file_setup, we do not go +through the paging functions, but directly read via __os_read.  +It is at this point that we will determine if the user is configured correctly.  +If the meta-page we read has an IV and checksum, they better have a crypto +handle.  If they have a crypto handle, then the meta-page must have +an IV and checksum.  If both of those are true, we test the password.  +We compare the unencrypted magic number to the newly-decrypted crypto magic +number and if they are not the same, then we report that the user gave +us a bad password. +

On a mostly unrelated topic, even when we go to very large pagesizes, +the meta information will still be within a disk sector.  So, after +talking it over with Keith and Margo, we determined that unencrypted meta-pages +still will not need a checksum. +

+Encryption and Checksum Routines

+These routines are provided to us by Adam Stubblefield at Rice University +(astubble@rice.edu).  The functional interfaces are: +
__aes_derivekeys(DB_ENV *dbenv,           /* dbenv */
+                 u_int8_t *passwd,           /* Password */
+                 size_t passwd_len,          /* Length of passwd */
+                 u_int8_t *mac_key,          /* 20 byte array to store MAC key */
+                 keyInstance *encrypt_key,   /* Encryption key of passwd */
+                 keyInstance *decrypt_key);  /* Decryption key of passwd */
+This is the only function requiring the textual user password.  From +the password, this function generates a key used in the checksum function, +__db_chksum.  +It also fills in keyInstance structures which are then used in the +encryption and decryption routines.  The keyInstance structures must +already be allocated.  These will be stored in the AES_CIPHER structure. +
 __db_chksum(u_int8_t *data,    /* Data to checksum */
+          size_t data_len,      /* Length of data */
+          u_int8_t *mac_key,    /* 20 byte array from __db_derive_keys */
+          u_int8_t *checksum);  /* 20 byte array to store checksum */
+This function generates a checksum on the data given.  This function +will do double-duty for users that simply want error detection on their +pages.  When users are using encryption, the mac_key will contain +the 20-byte key set up in __aes_derivekeys.  If they just want +checksumming, then mac_key will be NULL.  According to Adam, +we can safely use the first N-bytes of the checksum.  So for seeding +the generator for initialization vectors, we'll hash the time and then +send in the first 4 bytes for the seed.  I believe we can probably +do the same thing for checksumming log records.  We can only use 4 +bytes for the checksum in the non-secure case.  So when we want to +verify the log checksum we can compute the mac but just compare the first +4 bytes to the one we read.  All locations where we generate or check +log record checksums that currently call __ham_func4 will now call +__db_chksum.  +I believe there are 5 such locations, +__log_put, __log_putr, __log_newfile, +__log_rep_put +and __txn_force_abort. +
__aes_encrypt(DB_ENV *dbenv,        /* dbenv */
+             keyInstance *key,      /* Password key instance from __db_derive_keys */
+             u_int8_t *iv,          /* Initialization vector */
+             u_int8_t *data,        /* Data to encrypt */
+             size_t data_len);      /* Length of data to encrypt - 16 byte multiple */
+This is the function to encrypt data.  It will be called to encrypt +pages and log records.  The key instance is initialized in +__aes_derivekeys.  +The initialization vector, iv, is the 16 byte random value set up +by the Mersenne Twister pseudo-random generator.  Lastly, we pass +in a pointer to the data to encrypt and its length in data_len.  +The data_len must be a multiple of 16 bytes. The encryption is done +in-place so that when the encryption code returns our encrypted data is +in the same location as the original data. +
__aes_decrypt(DB_ENV *dbenv,    /* dbenv */
+             keyInstance *key,  /* Password key instance from __db_derive_keys */
+             u_int8_t *iv,      /* Initialization vector */
+             u_int8_t *data,    /* Data to decrypt */
+             size_t data_len);  /* Length of data to decrypt - 16 byte multiple */
+This is the function to decrypt the data.  It is exactly the same +as the encryption function except for the action it performs.  All +of the args and issues are the same.  It also decrypts in place. +

+Generating the Initialization +Vector

+Internally, we need to provide a unique initialization vector (IV) of 16 +bytes every time we encrypt any data with the same password.  For +the IV we are planning on using mt19937, the Mersenne Twister, a random +number generator that has a period of 2**19937-1. This package can be found +at http://www.math.keio.ac.jp/~matumoto/emt.html.  +Tests show that although it repeats a single integer every once in a while, +that after several million iterations, it doesn't repeat any 4 integers +that we'd be stuffing into our 16-byte IV.  We plan on seeding this +generator with the time (tv_sec) hashed through SHA1 when we create the +environment.  This package uses a global state vector that contains +624 unsigned long integers.  We do not allow a 16-byte IV of zero.  +It is simpler just to reject any 4-byte value of 0 and if we get one, just +call the generator again and get a different number.  We need to detect +holes in files and if we read an IV of zero that is a simple indication +that we need to check for an entire page of zero.  The IVs are stored +on the page after encryption and are not encrypted themselves so it is +not possible for an entire encrypted page to be read as all zeroes, unless +it was a hole in a file.  See Holes in Files +for more details. +

We will not be holding any locks when we need to generate our IV but +we need to protect access to the state vector and the index.  Calls +to the MT code will come while encrypting some data in __aes_encrypt.   +The MT code will assume that all necessary locks are held in the caller.  +We will have per-process state vectors that are set up when a process begins.  +That way we minimize the contention and only multi-threaded processes need +acquire locks for the IV.  We will have the state vector in the environment +handle in heap memory, as well as the index and there will be a mutex protecting +it for threaded access.  This will be added to the DB_ENV +structure: +

    DB_MUTEX    *mt_mutexp;   /* Mersenne Twister mutex */
+    int         *mti;         /* MT index */
+    u_long      *mt;          /* MT state vector */
+This portion of the environment will be initialized at the end of __dbenv_open, +right after we initialize the other mutex for the dblist. When we +allocate the space, we will generate our initial state vector. If we are +multi-threaded we'll allocate and initialize our mutex also. +

We need to make changes to the MT code to make it work in our namespace +and  to take  a pointer to the location of the state vector and +the index.   There will be a wrapper function __db_generate_iv +that DB will call and it will call the appropriate MT function.  I +am also going to change the default seed to use a hashed time instead of +a hard coded value.  I have looked at other implementations of the +MT code available on the web site.  The C++ version does a hash on +the current time.  I will modify our MT code to seed with the hashed +time as well.  That way the code to seed is contained within the MT +code and we can just write the wrapper to get an IV.  We will not +be changing the core computational code of MT. +

+DB Internal Issues

+ +

+When do we Cipher?

+All of the page ciphering is done in the __db_pgin/__db_pgout functions.  +We will encrypt after the method-specific function on page-out and decrypt +before the method-specfic function on page-in.  We do not hold any +locks when entering these functions.  We determine that we need to +cipher based on the existence of the encryption flag in the dbp. +

For ciphering log records, the encryption will be done as the first +thing (or a new wrapper) in __log_put.  See Log +Record Encryption for those details. +
  +

+Page Changes

+The checksum and IV values will be stored prior to the first index of the +page.  We have a new P_INP macro that replaces use of inp[X] in the +code.  This macro takes a dbp as an argument and determines where +our first index is based on whether we have DB_AM_CHKSUM and DB_AM_ENCRYPT +set.  If neither is set, then our first index is where it always was. + If just checksumming is set, then we reserve a 4-byte checksum.  +If encryption is set, then we reserve 36 bytes for our checksum/IV as well +as some space to get proper alignment to encrypt on a 16-byte boundary. +

Since several paging macros use inp[X] in them, those macros must now +take a dbp.  There are a lot of changes to make all the necessary +paging macros take a dbp, although these changes are trivial in nature. +

Also, there is a new function __db_chk_meta to perform checksumming +and decryption checking on meta pages specifically.  This function +is where we check that the database algorithm matches what the user gave +(or if they set DB_CIPHER_ANY then we set it), and other encryption related +testing for bad combinations of what is in the file versus what is in the +user structures. +

+Verification

+The verification code will also need to be updated to deal with secure +pages.  Basically when the verification code reads in the meta page +it will call __db_chk_meta to perform any checksumming and decryption. +

+Holes in Files

+Holes in files will be dealt with rather simply.  We need to be able +to distinguish reading a hole in a file from an encrypted page that happened +to encrypt to all zero's.  If we read a hole in a file, we do not +want to send that empty page through the decryption routine.  This +can be determined simply without incurring the performance penalty of comparing +every byte on a page on every read until we get a non-zero byte. +
The __db_pgin function is only given an invalid page P_INVALID in this +case.  So, if the page type, which is always unencrypted, is +P_INVALID, then we do not perform any checksum verification or decryption. +

+Errors and Recovery

+Dealing with a checksum error is tricky.  Ultimately, if a checksum +error occurs it is extremely likely that the user must do catastrophic +recovery.  There is no other failure return other than  DB_RUNRECOVERY +for indicating that the user should run catastrophic recovery.  We +do not want to add a new error return for applications to check because +a lot of applications already look for and deal with DB_RUNRECOVERY as +an error condition and we want to fit ourselves into that application model.  +We already indicate to the user that when they get that error, then they +need to run recovery.  If recovery fails, then they need to run catastrophic +recovery.  We need to get ourselves to the point where users will +run catastrophic recovery. +

If we get a checksum error, then we need to log a message stating a +checksum error occurred on page N.  In __db_pgin, we can check +if logging is on in the environment.  If so, we want to log the message. +

When the application gets the DB_RUNRECOVERY error, they'll have to +shut down their application and run recovery.  When the recovery encounters +the record indicating checksum failure, then normal recovery will fail +and the user will have to perform catastrophic recovery.  When catastrophic +recovery encounters that record, it will simply ignore it. +

+Log Record Encryption

+Log records will be ciphered.  It might make sense to wrap __log_put +to encrypt the DBT we send down.  The __log_put function is +where the checksum is computed before acquiring the region lock.  +But also this function is where we call __rep_send_message to send +the DBT to the replication clients.  Therefore, we need the DBT to +be encrypted prior to there.  We also need it encrypted before checksumming. +I think __log_put will become __log_put_internal, and the +new __log_put will encrypt if needed and then call __log_put_internal +(the +function formerly known as __log_put).  Log records are kept +in a shared memory region buffer prior to going out to disk.  Records +in the buffer will be encrypted.  No locks are held at the time we +will need to encrypt. +

On reading the log, via log cursors, the log code stores log records +in the log buffer.  Records in that buffer will be encrypted, so decryption +will occur no matter whether we are returning records from the buffer or +if we are returning log records directly from the disk. Current checksum +checking is done in +__logc_get_int.  Decryption will be done +after the checksum is checked. +

There are currently two nasty issues with encrypted log records.  +The first is that __txn_force_abort overwrites a commit record in +the log buffer with an abort record.  Well, our log buffer will be +encrypted.  Therefore, __txn_force_abort is going to need to +do encryption of its new record.  This can be accomplished by sending +in the dbenv handle to the function.  It is available to us in __log_flush_commit +and we can just pass it in.  I don't like putting log encryption in +the txn code, but the layering violation is already there. +

The second issue is that the encryption code requires data that is a +multiple of 16 bytes and log record lengths are variable.  We will +need to pad log records to meet the requirement.  Since the callers +of __log_put set up the given DBT it is a logical place to pad if +necessary. We will modify the gen_rec.awk script to have all of the generated +logging functions pad for us if we have a crypto handle. This padding will +also expand the size of log files. Anyone calling log_put and using +security from the application will have to pad on their own or it will +return an error. +

When ciphering the log file, we will need a different header than the +current one.  The current header only has space for a 4 byte checksum.  +Our secure header will need space for the 16 byte IV and 20 byte checksum.  +This will blow up our log files when running securely since every single +log record header will now consume 32 additional bytes.  I believe +that the log header does not need to be encrypted.  It contains an +offset, a length and our IV and checksum.  Our IV and checksum are +never encrypted.  I don't believe there to be any risk in having the +offset and length in the clear. +

I would prefer not to have two types of log headers that are incompatible +with each other.  It is not acceptable to increase the log headers +of all users from 12 bytes to 44 bytes.  Such a change would also +make log files incompatible with earlier releases.  Worse even, is +that the cksum field of the header is in between the offset and +len.  It would be really convenient if we could have just made a bigger +cksum portion without affecting the location of the other fields.  +Oh well.  Most customers will not be using encryption and we won't +make them pay the price of the expanded header.  Keith indicates that +the log file format is changing with the next release so I will move the +cksum field so it can at least be overlaid. +

One method around this would be to have a single internal header that +contains all the information both mechanisms need, but when we write out +the header we choose which pieces to write.  By appending the security +information to the end of the existing structure, and adding a size field, +we can modify a few places to use the size field to write out only the +current first 12 bytes, or the entire security header needed. +

+Replication

+Replication clients are going to need to start all of their individual +environment handles with the same password.  The log records are going +to be sent to the clients decrypted and the clients will have to encrypt +them on their way to the client log files.  We cannot send encrypted +log records to clients.  The reason is that the checksum and IV are +stored in the log header and the master only sends the log record itself +to the client.  Therefore, the client has no way to decrypt a log +record from the master.  Therefore, anyone wanting to use truly secure +replication is going to have to have a secure transport mechanism.  +By not encrypting records, clients can theoretically have different passwords +and DB won't care. +

On the master side we must copy the DBT sent in.  We encrypt the +original and send to clients the clear record.  On the client side, +support for encryption is added into __log_rep_put. +

+Sharing the Environment

+When multiple processes join the environment, all must use the same password +as the creator. +

Joining an existing environment requires several conditions to be true.  +First, if the creator of the environment did not create with security, +then joining later with security is an error.  Second, if the creator +did create it with security, then joining later without security is an +error.  Third, we need to be able to test and check that when another +process joins a secure environment that the password they provided is the +same as the one in use by the creator. +

The first two scenarios should be fairly trivial to determine, if we +aren't creating the environment, we can compare what is there with what +we have.  In the third case, the __crypto_region_init function +will see that the environment region has a valid passwd_off and we'll then +compare that password to the one we have in our dbenv handle.  In +any case we'll smash the dbenv handle's passwd and free that memory before +returning whether we have a password match or not. +

We need to store the passwords themselves in the region because multiple +calls to the __aes_derivekeys function with the same password yields +different keyInstance contents.  Therefore we don't have any way to +check passwords other than retaining and comparing the actual passwords. +

+Other APIs

+All of the other APIs will need interface enhancements to support the new +security methods.  The Java and C++ interfaces will likely be done +by Michael Cahill and Sue will implement the Tcl and RPC changes.  +Tcl will need the changes for testing purposes but the interface should +be public, not test-only.  RPC should fully support security.  +The biggest risk that I can see is that the client will send the password +to the server in the clear.  Anyone sniffing the wires or running +tcpdump or other packet grabbing code could grab that.  Someone really +interested in using security over RPC probably ought to add authentication +and other measures to the RPC server as well. +

+Utilities

+All should take a -P flag to specify a password for the environment or +password.  Those that take an env and a database might need something +more to distinguish between env passwds and db passwds. Here is what we +do for each utility: +
    +
  • +berkeley_db_svc - Needs -P after each -h specified.
  • + +
  • +db_archive - Needs -P if the env is encrypted.
  • + +
  • +db_checkpoint - Needs -P if the env is encrypted.
  • + +
  • +db_deadlock - No changes
  • + +
  • +db_dump - Needs -P if the env or database is encrypted.
  • + +
  • +db_load - Needs -P if the env or database is encrypted.
  • + +
  • +db_printlog - Needs -P if the env is encrypted.
  • + +
  • +db_recover - Needs -P if the env is encrypted.
  • + +
  • +db_stat - Needs -P if the env or database is encrypted.
  • + +
  • +db_upgrade - Needs -P if the env or database is encrypted.
  • + +
  • +db_verify - Needs -P if the env or database is encrypted.
  • +
+ +

+Testing

+All testing should be able to be accomplished via Tcl.  The following +tests (and probably others I haven't thought of yet) should be performed: +
    +
  • +Basic functionality - basically a test001 but encrypted without an env
  • + +
  • +Basic functionality, w/ env - like the previous test but with an env.
  • + +
  • +Basic functionality, multiple processes - like first test, but make sure +others can correctly join.
  • + +
  • +Basic functionality, mult. processes - like above test, but initialize/close +environment/database first so that the next test processes are all joiners +of an existing env, but creator no longer exists and the shared region +must be opened.
  • + +
  • +Recovery test - Run recovery over an encrypted environment.
  • + +
  • +Subdb test - Run with subdbs that are encrypted.
  • + +
  • +Utility test - Verify the new options to all the utilities.
  • + +
  • +Error handling - Test the basic setup errors for both env's and databases +with multiple processes.  They are:
  • + +
      +
    1. +Attempt to set a NULL or zero-length passwd.
    2. + +
    3. +Create Env w/ security and attempt to create database w/ its own password.
    4. + +
    5. +Env/DB creates with security.  Proc2 joins without - should get an +error.
    6. + +
    7. +Env/DB creates without security.  Proc2 joins with - should get an +error.
    8. + +
    9. +Env/DB creates with security.  Proc2 joins with different password +- should get an error.
    10. + +
    11. +Env/DB creates with security.  Closes.  Proc2 reopens with different +password - should get an error.
    12. + +
    13. +Env/DB creates with security.  Closes.  Tcl overwrites a page +of the database with garbage.  Proc2 reopens with the correct password.  +Code should detect checksum error.
    14. + +
    15. +Env/DB creates with security.  Open a 2nd identical DB with a different +password.  Put the exact same data into both databases.  Close.  +Overwrite the identical page of DB1 with the one from DB2.  Reopen +the database with correct DB1 password.  Code should detect an encryption +error on that page.
    16. +
    +
+ +

+Risks

+There are several holes in this design.  It is important to document +them clearly. +

The first is that all of the pages are stored in memory and possibly +the file system in the clear.  The password is stored in the shared +data regions in the clear.  Therefore if an attacker can read the +process memory, they can do whatever they want.  If the attacker can +read system memory or swap they can access the data as well.  Since +everything in the shared data regions (with the exception of the buffered +log) will be in the clear, it is important to realize that file backed +regions will be written in the clear, including the portion of the regions +containing passwords.  We recommend to users that they use system +memory instead of file backed shared memory. + + diff --git a/src/crypto/mersenne/mt19937db.c b/src/crypto/mersenne/mt19937db.c new file mode 100644 index 00000000..2d53c312 --- /dev/null +++ b/src/crypto/mersenne/mt19937db.c @@ -0,0 +1,187 @@ +/* + * $Id$ + */ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/hmac.h" + +/* A C-program for MT19937: Integer version (1999/10/28) */ +/* genrand() generates one pseudorandom unsigned integer (32bit) */ +/* which is uniformly distributed among 0 to 2^32-1 for each */ +/* call. sgenrand(seed) sets initial values to the working area */ +/* of 624 words. Before genrand(), sgenrand(seed) must be */ +/* called once. (seed is any 32-bit integer.) */ +/* Coded by Takuji Nishimura, considering the suggestions by */ +/* Topher Cooper and Marc Rieffel in July-Aug. 1997. */ + +/* This library is free software under the Artistic license: */ +/* see the file COPYING distributed together with this code. */ +/* For the verification of the code, its output sequence file */ +/* mt19937int.out is attached (2001/4/2) */ + +/* Copyright (C) 1997, 1999 Makoto Matsumoto and Takuji Nishimura. */ +/* Any feedback is very welcome. For any question, comments, */ +/* see http://www.math.keio.ac.jp/matumoto/emt.html or email */ +/* matumoto@math.keio.ac.jp */ + +/* REFERENCE */ +/* M. Matsumoto and T. Nishimura, */ +/* "Mersenne Twister: A 623-Dimensionally Equidistributed Uniform */ +/* Pseudo-Random Number Generator", */ +/* ACM Transactions on Modeling and Computer Simulation, */ +/* Vol. 8, No. 1, January 1998, pp 3--30. */ + +/* Period parameters */ +#define N 624 +#define M 397 +#define MATRIX_A 0x9908b0df /* constant vector a */ +#define UPPER_MASK 0x80000000 /* most significant w-r bits */ +#define LOWER_MASK 0x7fffffff /* least significant r bits */ + +/* Tempering parameters */ +#define TEMPERING_MASK_B 0x9d2c5680 +#define TEMPERING_MASK_C 0xefc60000 +#define TEMPERING_SHIFT_U(y) (y >> 11) +#define TEMPERING_SHIFT_S(y) (y << 7) +#define TEMPERING_SHIFT_T(y) (y << 15) +#define TEMPERING_SHIFT_L(y) (y >> 18) + +static void __db_sgenrand __P((unsigned long, unsigned long *, int *)); +#ifdef NOT_USED +static void __db_lsgenrand __P((unsigned long *, unsigned long *, int *)); +#endif +static unsigned long __db_genrand __P((ENV *)); + +/* + * __db_generate_iv -- + * Generate an initialization vector (IV) + * + * PUBLIC: int __db_generate_iv __P((ENV *, u_int32_t *)); + */ +int +__db_generate_iv(env, iv) + ENV *env; + u_int32_t *iv; +{ + int i, n, ret; + + ret = 0; + n = DB_IV_BYTES / sizeof(u_int32_t); + MUTEX_LOCK(env, env->mtx_mt); + if (env->mt == NULL) { + if ((ret = __os_calloc(env, 1, N*sizeof(unsigned long), + &env->mt)) != 0) + return (ret); + /* mti==N+1 means mt[N] is not initialized */ + env->mti = N + 1; + } + for (i = 0; i < n; i++) { + /* + * We do not allow 0. If we get one just try again. + */ + do { + iv[i] = (u_int32_t)__db_genrand(env); + } while (iv[i] == 0); + } + + MUTEX_UNLOCK(env, env->mtx_mt); + return (0); +} + +/* Initializing the array with a seed */ +static void +__db_sgenrand(seed, mt, mtip) + unsigned long seed; + unsigned long mt[]; + int *mtip; +{ + int i; + + DB_ASSERT(NULL, seed != 0); + for (i=0;i> 16; + seed = 69069 * seed + 1; + } + *mtip = N; +} + +#ifdef NOT_USED +/* Initialization by "sgenrand()" is an example. Theoretically, */ +/* there are 2^19937-1 possible states as an intial state. */ +/* This function allows to choose any of 2^19937-1 ones. */ +/* Essential bits in "seed_array[]" is following 19937 bits: */ +/* (seed_array[0]&UPPER_MASK), seed_array[1], ..., seed_array[N-1]. */ +/* (seed_array[0]&LOWER_MASK) is discarded. */ +/* Theoretically, */ +/* (seed_array[0]&UPPER_MASK), seed_array[1], ..., seed_array[N-1] */ +/* can take any values except all zeros. */ +static void +__db_lsgenrand(seed_array, mt, mtip) + unsigned long seed_array[]; + unsigned long mt[]; + int *mtip; + /* the length of seed_array[] must be at least N */ +{ + int i; + + for (i=0;imtx_mt locked. + */ + if (env->mti >= N) { /* generate N words at one time */ + int kk; + + if (env->mti == N+1) { /* if sgenrand() has not been called, */ + /* + * Seed the generator with the hashed time. The __db_mac + * function will return 4 bytes if we don't send in a key. + */ + do { + __os_gettime(env, &ts, 1); + __db_chksum(NULL, (u_int8_t *)&ts.tv_sec, + sizeof(ts.tv_sec), NULL, (u_int8_t *)&seed); + } while (seed == 0); + __db_sgenrand((unsigned long)seed, env->mt, &env->mti); + } + + for (kk=0;kkmt[kk]&UPPER_MASK)|(env->mt[kk+1]&LOWER_MASK); + env->mt[kk] = env->mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1]; + } + for (;kkmt[kk]&UPPER_MASK)|(env->mt[kk+1]&LOWER_MASK); + env->mt[kk] = env->mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1]; + } + y = (env->mt[N-1]&UPPER_MASK)|(env->mt[0]&LOWER_MASK); + env->mt[N-1] = env->mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1]; + + env->mti = 0; + } + + y = env->mt[env->mti++]; + y ^= TEMPERING_SHIFT_U(y); + y ^= TEMPERING_SHIFT_S(y) & TEMPERING_MASK_B; + y ^= TEMPERING_SHIFT_T(y) & TEMPERING_MASK_C; + y ^= TEMPERING_SHIFT_L(y); + + return y; +} diff --git a/src/crypto/rijndael/rijndael-alg-fst.c b/src/crypto/rijndael/rijndael-alg-fst.c new file mode 100644 index 00000000..322ad5ff --- /dev/null +++ b/src/crypto/rijndael/rijndael-alg-fst.c @@ -0,0 +1,1466 @@ +/** + * rijndael-alg-fst.c + * + * @version 3.0 (December 2000) + * + * Optimised ANSI C code for the Rijndael cipher (now AES) + * + * @author Vincent Rijmen + * @author Antoon Bosselaers + * @author Paulo Barreto + * + * This code is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" + +#include "crypto/rijndael/rijndael-alg-fst.h" + +/* +Te0[x] = S [x].[02, 01, 01, 03]; +Te1[x] = S [x].[03, 02, 01, 01]; +Te2[x] = S [x].[01, 03, 02, 01]; +Te3[x] = S [x].[01, 01, 03, 02]; +Te4[x] = S [x].[01, 01, 01, 01]; + +Td0[x] = Si[x].[0e, 09, 0d, 0b]; +Td1[x] = Si[x].[0b, 0e, 09, 0d]; +Td2[x] = Si[x].[0d, 0b, 0e, 09]; +Td3[x] = Si[x].[09, 0d, 0b, 0e]; +Td4[x] = Si[x].[01, 01, 01, 01]; +*/ + +static const u32 Te0[256] = { + (u_int)0xc66363a5, (u_int)0xf87c7c84, (u_int)0xee777799, (u_int)0xf67b7b8d, + (u_int)0xfff2f20d, (u_int)0xd66b6bbd, (u_int)0xde6f6fb1, (u_int)0x91c5c554, + (u_int)0x60303050, (u_int)0x02010103, (u_int)0xce6767a9, (u_int)0x562b2b7d, + (u_int)0xe7fefe19, (u_int)0xb5d7d762, (u_int)0x4dababe6, (u_int)0xec76769a, + (u_int)0x8fcaca45, (u_int)0x1f82829d, (u_int)0x89c9c940, (u_int)0xfa7d7d87, + (u_int)0xeffafa15, (u_int)0xb25959eb, (u_int)0x8e4747c9, (u_int)0xfbf0f00b, + (u_int)0x41adadec, (u_int)0xb3d4d467, (u_int)0x5fa2a2fd, (u_int)0x45afafea, + (u_int)0x239c9cbf, (u_int)0x53a4a4f7, (u_int)0xe4727296, (u_int)0x9bc0c05b, + (u_int)0x75b7b7c2, (u_int)0xe1fdfd1c, (u_int)0x3d9393ae, (u_int)0x4c26266a, + (u_int)0x6c36365a, (u_int)0x7e3f3f41, (u_int)0xf5f7f702, (u_int)0x83cccc4f, + (u_int)0x6834345c, (u_int)0x51a5a5f4, (u_int)0xd1e5e534, (u_int)0xf9f1f108, + (u_int)0xe2717193, (u_int)0xabd8d873, (u_int)0x62313153, (u_int)0x2a15153f, + (u_int)0x0804040c, (u_int)0x95c7c752, (u_int)0x46232365, (u_int)0x9dc3c35e, + (u_int)0x30181828, (u_int)0x379696a1, (u_int)0x0a05050f, (u_int)0x2f9a9ab5, + (u_int)0x0e070709, (u_int)0x24121236, (u_int)0x1b80809b, (u_int)0xdfe2e23d, + (u_int)0xcdebeb26, (u_int)0x4e272769, (u_int)0x7fb2b2cd, (u_int)0xea75759f, + (u_int)0x1209091b, (u_int)0x1d83839e, (u_int)0x582c2c74, (u_int)0x341a1a2e, + (u_int)0x361b1b2d, (u_int)0xdc6e6eb2, (u_int)0xb45a5aee, (u_int)0x5ba0a0fb, + (u_int)0xa45252f6, (u_int)0x763b3b4d, (u_int)0xb7d6d661, (u_int)0x7db3b3ce, + (u_int)0x5229297b, (u_int)0xdde3e33e, (u_int)0x5e2f2f71, (u_int)0x13848497, + (u_int)0xa65353f5, (u_int)0xb9d1d168, (u_int)0x00000000, (u_int)0xc1eded2c, + (u_int)0x40202060, (u_int)0xe3fcfc1f, (u_int)0x79b1b1c8, (u_int)0xb65b5bed, + (u_int)0xd46a6abe, (u_int)0x8dcbcb46, (u_int)0x67bebed9, (u_int)0x7239394b, + (u_int)0x944a4ade, (u_int)0x984c4cd4, (u_int)0xb05858e8, (u_int)0x85cfcf4a, + (u_int)0xbbd0d06b, (u_int)0xc5efef2a, (u_int)0x4faaaae5, (u_int)0xedfbfb16, + (u_int)0x864343c5, (u_int)0x9a4d4dd7, (u_int)0x66333355, (u_int)0x11858594, + (u_int)0x8a4545cf, (u_int)0xe9f9f910, (u_int)0x04020206, (u_int)0xfe7f7f81, + (u_int)0xa05050f0, (u_int)0x783c3c44, (u_int)0x259f9fba, (u_int)0x4ba8a8e3, + (u_int)0xa25151f3, (u_int)0x5da3a3fe, (u_int)0x804040c0, (u_int)0x058f8f8a, + (u_int)0x3f9292ad, (u_int)0x219d9dbc, (u_int)0x70383848, (u_int)0xf1f5f504, + (u_int)0x63bcbcdf, (u_int)0x77b6b6c1, (u_int)0xafdada75, (u_int)0x42212163, + (u_int)0x20101030, (u_int)0xe5ffff1a, (u_int)0xfdf3f30e, (u_int)0xbfd2d26d, + (u_int)0x81cdcd4c, (u_int)0x180c0c14, (u_int)0x26131335, (u_int)0xc3ecec2f, + (u_int)0xbe5f5fe1, (u_int)0x359797a2, (u_int)0x884444cc, (u_int)0x2e171739, + (u_int)0x93c4c457, (u_int)0x55a7a7f2, (u_int)0xfc7e7e82, (u_int)0x7a3d3d47, + (u_int)0xc86464ac, (u_int)0xba5d5de7, (u_int)0x3219192b, (u_int)0xe6737395, + (u_int)0xc06060a0, (u_int)0x19818198, (u_int)0x9e4f4fd1, (u_int)0xa3dcdc7f, + (u_int)0x44222266, (u_int)0x542a2a7e, (u_int)0x3b9090ab, (u_int)0x0b888883, + (u_int)0x8c4646ca, (u_int)0xc7eeee29, (u_int)0x6bb8b8d3, (u_int)0x2814143c, + (u_int)0xa7dede79, (u_int)0xbc5e5ee2, (u_int)0x160b0b1d, (u_int)0xaddbdb76, + (u_int)0xdbe0e03b, (u_int)0x64323256, (u_int)0x743a3a4e, (u_int)0x140a0a1e, + (u_int)0x924949db, (u_int)0x0c06060a, (u_int)0x4824246c, (u_int)0xb85c5ce4, + (u_int)0x9fc2c25d, (u_int)0xbdd3d36e, (u_int)0x43acacef, (u_int)0xc46262a6, + (u_int)0x399191a8, (u_int)0x319595a4, (u_int)0xd3e4e437, (u_int)0xf279798b, + (u_int)0xd5e7e732, (u_int)0x8bc8c843, (u_int)0x6e373759, (u_int)0xda6d6db7, + (u_int)0x018d8d8c, (u_int)0xb1d5d564, (u_int)0x9c4e4ed2, (u_int)0x49a9a9e0, + (u_int)0xd86c6cb4, (u_int)0xac5656fa, (u_int)0xf3f4f407, (u_int)0xcfeaea25, + (u_int)0xca6565af, (u_int)0xf47a7a8e, (u_int)0x47aeaee9, (u_int)0x10080818, + (u_int)0x6fbabad5, (u_int)0xf0787888, (u_int)0x4a25256f, (u_int)0x5c2e2e72, + (u_int)0x381c1c24, (u_int)0x57a6a6f1, (u_int)0x73b4b4c7, (u_int)0x97c6c651, + (u_int)0xcbe8e823, (u_int)0xa1dddd7c, (u_int)0xe874749c, (u_int)0x3e1f1f21, + (u_int)0x964b4bdd, (u_int)0x61bdbddc, (u_int)0x0d8b8b86, (u_int)0x0f8a8a85, + (u_int)0xe0707090, (u_int)0x7c3e3e42, (u_int)0x71b5b5c4, (u_int)0xcc6666aa, + (u_int)0x904848d8, (u_int)0x06030305, (u_int)0xf7f6f601, (u_int)0x1c0e0e12, + (u_int)0xc26161a3, (u_int)0x6a35355f, (u_int)0xae5757f9, (u_int)0x69b9b9d0, + (u_int)0x17868691, (u_int)0x99c1c158, (u_int)0x3a1d1d27, (u_int)0x279e9eb9, + (u_int)0xd9e1e138, (u_int)0xebf8f813, (u_int)0x2b9898b3, (u_int)0x22111133, + (u_int)0xd26969bb, (u_int)0xa9d9d970, (u_int)0x078e8e89, (u_int)0x339494a7, + (u_int)0x2d9b9bb6, (u_int)0x3c1e1e22, (u_int)0x15878792, (u_int)0xc9e9e920, + (u_int)0x87cece49, (u_int)0xaa5555ff, (u_int)0x50282878, (u_int)0xa5dfdf7a, + (u_int)0x038c8c8f, (u_int)0x59a1a1f8, (u_int)0x09898980, (u_int)0x1a0d0d17, + (u_int)0x65bfbfda, (u_int)0xd7e6e631, (u_int)0x844242c6, (u_int)0xd06868b8, + (u_int)0x824141c3, (u_int)0x299999b0, (u_int)0x5a2d2d77, (u_int)0x1e0f0f11, + (u_int)0x7bb0b0cb, (u_int)0xa85454fc, (u_int)0x6dbbbbd6, (u_int)0x2c16163a, +}; +static const u32 Te1[256] = { + (u_int)0xa5c66363, (u_int)0x84f87c7c, (u_int)0x99ee7777, (u_int)0x8df67b7b, + (u_int)0x0dfff2f2, (u_int)0xbdd66b6b, (u_int)0xb1de6f6f, (u_int)0x5491c5c5, + (u_int)0x50603030, (u_int)0x03020101, (u_int)0xa9ce6767, (u_int)0x7d562b2b, + (u_int)0x19e7fefe, (u_int)0x62b5d7d7, (u_int)0xe64dabab, (u_int)0x9aec7676, + (u_int)0x458fcaca, (u_int)0x9d1f8282, (u_int)0x4089c9c9, (u_int)0x87fa7d7d, + (u_int)0x15effafa, (u_int)0xebb25959, (u_int)0xc98e4747, (u_int)0x0bfbf0f0, + (u_int)0xec41adad, (u_int)0x67b3d4d4, (u_int)0xfd5fa2a2, (u_int)0xea45afaf, + (u_int)0xbf239c9c, (u_int)0xf753a4a4, (u_int)0x96e47272, (u_int)0x5b9bc0c0, + (u_int)0xc275b7b7, (u_int)0x1ce1fdfd, (u_int)0xae3d9393, (u_int)0x6a4c2626, + (u_int)0x5a6c3636, (u_int)0x417e3f3f, (u_int)0x02f5f7f7, (u_int)0x4f83cccc, + (u_int)0x5c683434, (u_int)0xf451a5a5, (u_int)0x34d1e5e5, (u_int)0x08f9f1f1, + (u_int)0x93e27171, (u_int)0x73abd8d8, (u_int)0x53623131, (u_int)0x3f2a1515, + (u_int)0x0c080404, (u_int)0x5295c7c7, (u_int)0x65462323, (u_int)0x5e9dc3c3, + (u_int)0x28301818, (u_int)0xa1379696, (u_int)0x0f0a0505, (u_int)0xb52f9a9a, + (u_int)0x090e0707, (u_int)0x36241212, (u_int)0x9b1b8080, (u_int)0x3ddfe2e2, + (u_int)0x26cdebeb, (u_int)0x694e2727, (u_int)0xcd7fb2b2, (u_int)0x9fea7575, + (u_int)0x1b120909, (u_int)0x9e1d8383, (u_int)0x74582c2c, (u_int)0x2e341a1a, + (u_int)0x2d361b1b, (u_int)0xb2dc6e6e, (u_int)0xeeb45a5a, (u_int)0xfb5ba0a0, + (u_int)0xf6a45252, (u_int)0x4d763b3b, (u_int)0x61b7d6d6, (u_int)0xce7db3b3, + (u_int)0x7b522929, (u_int)0x3edde3e3, (u_int)0x715e2f2f, (u_int)0x97138484, + (u_int)0xf5a65353, (u_int)0x68b9d1d1, (u_int)0x00000000, (u_int)0x2cc1eded, + (u_int)0x60402020, (u_int)0x1fe3fcfc, (u_int)0xc879b1b1, (u_int)0xedb65b5b, + (u_int)0xbed46a6a, (u_int)0x468dcbcb, (u_int)0xd967bebe, (u_int)0x4b723939, + (u_int)0xde944a4a, (u_int)0xd4984c4c, (u_int)0xe8b05858, (u_int)0x4a85cfcf, + (u_int)0x6bbbd0d0, (u_int)0x2ac5efef, (u_int)0xe54faaaa, (u_int)0x16edfbfb, + (u_int)0xc5864343, (u_int)0xd79a4d4d, (u_int)0x55663333, (u_int)0x94118585, + (u_int)0xcf8a4545, (u_int)0x10e9f9f9, (u_int)0x06040202, (u_int)0x81fe7f7f, + (u_int)0xf0a05050, (u_int)0x44783c3c, (u_int)0xba259f9f, (u_int)0xe34ba8a8, + (u_int)0xf3a25151, (u_int)0xfe5da3a3, (u_int)0xc0804040, (u_int)0x8a058f8f, + (u_int)0xad3f9292, (u_int)0xbc219d9d, (u_int)0x48703838, (u_int)0x04f1f5f5, + (u_int)0xdf63bcbc, (u_int)0xc177b6b6, (u_int)0x75afdada, (u_int)0x63422121, + (u_int)0x30201010, (u_int)0x1ae5ffff, (u_int)0x0efdf3f3, (u_int)0x6dbfd2d2, + (u_int)0x4c81cdcd, (u_int)0x14180c0c, (u_int)0x35261313, (u_int)0x2fc3ecec, + (u_int)0xe1be5f5f, (u_int)0xa2359797, (u_int)0xcc884444, (u_int)0x392e1717, + (u_int)0x5793c4c4, (u_int)0xf255a7a7, (u_int)0x82fc7e7e, (u_int)0x477a3d3d, + (u_int)0xacc86464, (u_int)0xe7ba5d5d, (u_int)0x2b321919, (u_int)0x95e67373, + (u_int)0xa0c06060, (u_int)0x98198181, (u_int)0xd19e4f4f, (u_int)0x7fa3dcdc, + (u_int)0x66442222, (u_int)0x7e542a2a, (u_int)0xab3b9090, (u_int)0x830b8888, + (u_int)0xca8c4646, (u_int)0x29c7eeee, (u_int)0xd36bb8b8, (u_int)0x3c281414, + (u_int)0x79a7dede, (u_int)0xe2bc5e5e, (u_int)0x1d160b0b, (u_int)0x76addbdb, + (u_int)0x3bdbe0e0, (u_int)0x56643232, (u_int)0x4e743a3a, (u_int)0x1e140a0a, + (u_int)0xdb924949, (u_int)0x0a0c0606, (u_int)0x6c482424, (u_int)0xe4b85c5c, + (u_int)0x5d9fc2c2, (u_int)0x6ebdd3d3, (u_int)0xef43acac, (u_int)0xa6c46262, + (u_int)0xa8399191, (u_int)0xa4319595, (u_int)0x37d3e4e4, (u_int)0x8bf27979, + (u_int)0x32d5e7e7, (u_int)0x438bc8c8, (u_int)0x596e3737, (u_int)0xb7da6d6d, + (u_int)0x8c018d8d, (u_int)0x64b1d5d5, (u_int)0xd29c4e4e, (u_int)0xe049a9a9, + (u_int)0xb4d86c6c, (u_int)0xfaac5656, (u_int)0x07f3f4f4, (u_int)0x25cfeaea, + (u_int)0xafca6565, (u_int)0x8ef47a7a, (u_int)0xe947aeae, (u_int)0x18100808, + (u_int)0xd56fbaba, (u_int)0x88f07878, (u_int)0x6f4a2525, (u_int)0x725c2e2e, + (u_int)0x24381c1c, (u_int)0xf157a6a6, (u_int)0xc773b4b4, (u_int)0x5197c6c6, + (u_int)0x23cbe8e8, (u_int)0x7ca1dddd, (u_int)0x9ce87474, (u_int)0x213e1f1f, + (u_int)0xdd964b4b, (u_int)0xdc61bdbd, (u_int)0x860d8b8b, (u_int)0x850f8a8a, + (u_int)0x90e07070, (u_int)0x427c3e3e, (u_int)0xc471b5b5, (u_int)0xaacc6666, + (u_int)0xd8904848, (u_int)0x05060303, (u_int)0x01f7f6f6, (u_int)0x121c0e0e, + (u_int)0xa3c26161, (u_int)0x5f6a3535, (u_int)0xf9ae5757, (u_int)0xd069b9b9, + (u_int)0x91178686, (u_int)0x5899c1c1, (u_int)0x273a1d1d, (u_int)0xb9279e9e, + (u_int)0x38d9e1e1, (u_int)0x13ebf8f8, (u_int)0xb32b9898, (u_int)0x33221111, + (u_int)0xbbd26969, (u_int)0x70a9d9d9, (u_int)0x89078e8e, (u_int)0xa7339494, + (u_int)0xb62d9b9b, (u_int)0x223c1e1e, (u_int)0x92158787, (u_int)0x20c9e9e9, + (u_int)0x4987cece, (u_int)0xffaa5555, (u_int)0x78502828, (u_int)0x7aa5dfdf, + (u_int)0x8f038c8c, (u_int)0xf859a1a1, (u_int)0x80098989, (u_int)0x171a0d0d, + (u_int)0xda65bfbf, (u_int)0x31d7e6e6, (u_int)0xc6844242, (u_int)0xb8d06868, + (u_int)0xc3824141, (u_int)0xb0299999, (u_int)0x775a2d2d, (u_int)0x111e0f0f, + (u_int)0xcb7bb0b0, (u_int)0xfca85454, (u_int)0xd66dbbbb, (u_int)0x3a2c1616, +}; +static const u32 Te2[256] = { + (u_int)0x63a5c663, (u_int)0x7c84f87c, (u_int)0x7799ee77, (u_int)0x7b8df67b, + (u_int)0xf20dfff2, (u_int)0x6bbdd66b, (u_int)0x6fb1de6f, (u_int)0xc55491c5, + (u_int)0x30506030, (u_int)0x01030201, (u_int)0x67a9ce67, (u_int)0x2b7d562b, + (u_int)0xfe19e7fe, (u_int)0xd762b5d7, (u_int)0xabe64dab, (u_int)0x769aec76, + (u_int)0xca458fca, (u_int)0x829d1f82, (u_int)0xc94089c9, (u_int)0x7d87fa7d, + (u_int)0xfa15effa, (u_int)0x59ebb259, (u_int)0x47c98e47, (u_int)0xf00bfbf0, + (u_int)0xadec41ad, (u_int)0xd467b3d4, (u_int)0xa2fd5fa2, (u_int)0xafea45af, + (u_int)0x9cbf239c, (u_int)0xa4f753a4, (u_int)0x7296e472, (u_int)0xc05b9bc0, + (u_int)0xb7c275b7, (u_int)0xfd1ce1fd, (u_int)0x93ae3d93, (u_int)0x266a4c26, + (u_int)0x365a6c36, (u_int)0x3f417e3f, (u_int)0xf702f5f7, (u_int)0xcc4f83cc, + (u_int)0x345c6834, (u_int)0xa5f451a5, (u_int)0xe534d1e5, (u_int)0xf108f9f1, + (u_int)0x7193e271, (u_int)0xd873abd8, (u_int)0x31536231, (u_int)0x153f2a15, + (u_int)0x040c0804, (u_int)0xc75295c7, (u_int)0x23654623, (u_int)0xc35e9dc3, + (u_int)0x18283018, (u_int)0x96a13796, (u_int)0x050f0a05, (u_int)0x9ab52f9a, + (u_int)0x07090e07, (u_int)0x12362412, (u_int)0x809b1b80, (u_int)0xe23ddfe2, + (u_int)0xeb26cdeb, (u_int)0x27694e27, (u_int)0xb2cd7fb2, (u_int)0x759fea75, + (u_int)0x091b1209, (u_int)0x839e1d83, (u_int)0x2c74582c, (u_int)0x1a2e341a, + (u_int)0x1b2d361b, (u_int)0x6eb2dc6e, (u_int)0x5aeeb45a, (u_int)0xa0fb5ba0, + (u_int)0x52f6a452, (u_int)0x3b4d763b, (u_int)0xd661b7d6, (u_int)0xb3ce7db3, + (u_int)0x297b5229, (u_int)0xe33edde3, (u_int)0x2f715e2f, (u_int)0x84971384, + (u_int)0x53f5a653, (u_int)0xd168b9d1, (u_int)0x00000000, (u_int)0xed2cc1ed, + (u_int)0x20604020, (u_int)0xfc1fe3fc, (u_int)0xb1c879b1, (u_int)0x5bedb65b, + (u_int)0x6abed46a, (u_int)0xcb468dcb, (u_int)0xbed967be, (u_int)0x394b7239, + (u_int)0x4ade944a, (u_int)0x4cd4984c, (u_int)0x58e8b058, (u_int)0xcf4a85cf, + (u_int)0xd06bbbd0, (u_int)0xef2ac5ef, (u_int)0xaae54faa, (u_int)0xfb16edfb, + (u_int)0x43c58643, (u_int)0x4dd79a4d, (u_int)0x33556633, (u_int)0x85941185, + (u_int)0x45cf8a45, (u_int)0xf910e9f9, (u_int)0x02060402, (u_int)0x7f81fe7f, + (u_int)0x50f0a050, (u_int)0x3c44783c, (u_int)0x9fba259f, (u_int)0xa8e34ba8, + (u_int)0x51f3a251, (u_int)0xa3fe5da3, (u_int)0x40c08040, (u_int)0x8f8a058f, + (u_int)0x92ad3f92, (u_int)0x9dbc219d, (u_int)0x38487038, (u_int)0xf504f1f5, + (u_int)0xbcdf63bc, (u_int)0xb6c177b6, (u_int)0xda75afda, (u_int)0x21634221, + (u_int)0x10302010, (u_int)0xff1ae5ff, (u_int)0xf30efdf3, (u_int)0xd26dbfd2, + (u_int)0xcd4c81cd, (u_int)0x0c14180c, (u_int)0x13352613, (u_int)0xec2fc3ec, + (u_int)0x5fe1be5f, (u_int)0x97a23597, (u_int)0x44cc8844, (u_int)0x17392e17, + (u_int)0xc45793c4, (u_int)0xa7f255a7, (u_int)0x7e82fc7e, (u_int)0x3d477a3d, + (u_int)0x64acc864, (u_int)0x5de7ba5d, (u_int)0x192b3219, (u_int)0x7395e673, + (u_int)0x60a0c060, (u_int)0x81981981, (u_int)0x4fd19e4f, (u_int)0xdc7fa3dc, + (u_int)0x22664422, (u_int)0x2a7e542a, (u_int)0x90ab3b90, (u_int)0x88830b88, + (u_int)0x46ca8c46, (u_int)0xee29c7ee, (u_int)0xb8d36bb8, (u_int)0x143c2814, + (u_int)0xde79a7de, (u_int)0x5ee2bc5e, (u_int)0x0b1d160b, (u_int)0xdb76addb, + (u_int)0xe03bdbe0, (u_int)0x32566432, (u_int)0x3a4e743a, (u_int)0x0a1e140a, + (u_int)0x49db9249, (u_int)0x060a0c06, (u_int)0x246c4824, (u_int)0x5ce4b85c, + (u_int)0xc25d9fc2, (u_int)0xd36ebdd3, (u_int)0xacef43ac, (u_int)0x62a6c462, + (u_int)0x91a83991, (u_int)0x95a43195, (u_int)0xe437d3e4, (u_int)0x798bf279, + (u_int)0xe732d5e7, (u_int)0xc8438bc8, (u_int)0x37596e37, (u_int)0x6db7da6d, + (u_int)0x8d8c018d, (u_int)0xd564b1d5, (u_int)0x4ed29c4e, (u_int)0xa9e049a9, + (u_int)0x6cb4d86c, (u_int)0x56faac56, (u_int)0xf407f3f4, (u_int)0xea25cfea, + (u_int)0x65afca65, (u_int)0x7a8ef47a, (u_int)0xaee947ae, (u_int)0x08181008, + (u_int)0xbad56fba, (u_int)0x7888f078, (u_int)0x256f4a25, (u_int)0x2e725c2e, + (u_int)0x1c24381c, (u_int)0xa6f157a6, (u_int)0xb4c773b4, (u_int)0xc65197c6, + (u_int)0xe823cbe8, (u_int)0xdd7ca1dd, (u_int)0x749ce874, (u_int)0x1f213e1f, + (u_int)0x4bdd964b, (u_int)0xbddc61bd, (u_int)0x8b860d8b, (u_int)0x8a850f8a, + (u_int)0x7090e070, (u_int)0x3e427c3e, (u_int)0xb5c471b5, (u_int)0x66aacc66, + (u_int)0x48d89048, (u_int)0x03050603, (u_int)0xf601f7f6, (u_int)0x0e121c0e, + (u_int)0x61a3c261, (u_int)0x355f6a35, (u_int)0x57f9ae57, (u_int)0xb9d069b9, + (u_int)0x86911786, (u_int)0xc15899c1, (u_int)0x1d273a1d, (u_int)0x9eb9279e, + (u_int)0xe138d9e1, (u_int)0xf813ebf8, (u_int)0x98b32b98, (u_int)0x11332211, + (u_int)0x69bbd269, (u_int)0xd970a9d9, (u_int)0x8e89078e, (u_int)0x94a73394, + (u_int)0x9bb62d9b, (u_int)0x1e223c1e, (u_int)0x87921587, (u_int)0xe920c9e9, + (u_int)0xce4987ce, (u_int)0x55ffaa55, (u_int)0x28785028, (u_int)0xdf7aa5df, + (u_int)0x8c8f038c, (u_int)0xa1f859a1, (u_int)0x89800989, (u_int)0x0d171a0d, + (u_int)0xbfda65bf, (u_int)0xe631d7e6, (u_int)0x42c68442, (u_int)0x68b8d068, + (u_int)0x41c38241, (u_int)0x99b02999, (u_int)0x2d775a2d, (u_int)0x0f111e0f, + (u_int)0xb0cb7bb0, (u_int)0x54fca854, (u_int)0xbbd66dbb, (u_int)0x163a2c16, +}; +static const u32 Te3[256] = { + + (u_int)0x6363a5c6, (u_int)0x7c7c84f8, (u_int)0x777799ee, (u_int)0x7b7b8df6, + (u_int)0xf2f20dff, (u_int)0x6b6bbdd6, (u_int)0x6f6fb1de, (u_int)0xc5c55491, + (u_int)0x30305060, (u_int)0x01010302, (u_int)0x6767a9ce, (u_int)0x2b2b7d56, + (u_int)0xfefe19e7, (u_int)0xd7d762b5, (u_int)0xababe64d, (u_int)0x76769aec, + (u_int)0xcaca458f, (u_int)0x82829d1f, (u_int)0xc9c94089, (u_int)0x7d7d87fa, + (u_int)0xfafa15ef, (u_int)0x5959ebb2, (u_int)0x4747c98e, (u_int)0xf0f00bfb, + (u_int)0xadadec41, (u_int)0xd4d467b3, (u_int)0xa2a2fd5f, (u_int)0xafafea45, + (u_int)0x9c9cbf23, (u_int)0xa4a4f753, (u_int)0x727296e4, (u_int)0xc0c05b9b, + (u_int)0xb7b7c275, (u_int)0xfdfd1ce1, (u_int)0x9393ae3d, (u_int)0x26266a4c, + (u_int)0x36365a6c, (u_int)0x3f3f417e, (u_int)0xf7f702f5, (u_int)0xcccc4f83, + (u_int)0x34345c68, (u_int)0xa5a5f451, (u_int)0xe5e534d1, (u_int)0xf1f108f9, + (u_int)0x717193e2, (u_int)0xd8d873ab, (u_int)0x31315362, (u_int)0x15153f2a, + (u_int)0x04040c08, (u_int)0xc7c75295, (u_int)0x23236546, (u_int)0xc3c35e9d, + (u_int)0x18182830, (u_int)0x9696a137, (u_int)0x05050f0a, (u_int)0x9a9ab52f, + (u_int)0x0707090e, (u_int)0x12123624, (u_int)0x80809b1b, (u_int)0xe2e23ddf, + (u_int)0xebeb26cd, (u_int)0x2727694e, (u_int)0xb2b2cd7f, (u_int)0x75759fea, + (u_int)0x09091b12, (u_int)0x83839e1d, (u_int)0x2c2c7458, (u_int)0x1a1a2e34, + (u_int)0x1b1b2d36, (u_int)0x6e6eb2dc, (u_int)0x5a5aeeb4, (u_int)0xa0a0fb5b, + (u_int)0x5252f6a4, (u_int)0x3b3b4d76, (u_int)0xd6d661b7, (u_int)0xb3b3ce7d, + (u_int)0x29297b52, (u_int)0xe3e33edd, (u_int)0x2f2f715e, (u_int)0x84849713, + (u_int)0x5353f5a6, (u_int)0xd1d168b9, (u_int)0x00000000, (u_int)0xeded2cc1, + (u_int)0x20206040, (u_int)0xfcfc1fe3, (u_int)0xb1b1c879, (u_int)0x5b5bedb6, + (u_int)0x6a6abed4, (u_int)0xcbcb468d, (u_int)0xbebed967, (u_int)0x39394b72, + (u_int)0x4a4ade94, (u_int)0x4c4cd498, (u_int)0x5858e8b0, (u_int)0xcfcf4a85, + (u_int)0xd0d06bbb, (u_int)0xefef2ac5, (u_int)0xaaaae54f, (u_int)0xfbfb16ed, + (u_int)0x4343c586, (u_int)0x4d4dd79a, (u_int)0x33335566, (u_int)0x85859411, + (u_int)0x4545cf8a, (u_int)0xf9f910e9, (u_int)0x02020604, (u_int)0x7f7f81fe, + (u_int)0x5050f0a0, (u_int)0x3c3c4478, (u_int)0x9f9fba25, (u_int)0xa8a8e34b, + (u_int)0x5151f3a2, (u_int)0xa3a3fe5d, (u_int)0x4040c080, (u_int)0x8f8f8a05, + (u_int)0x9292ad3f, (u_int)0x9d9dbc21, (u_int)0x38384870, (u_int)0xf5f504f1, + (u_int)0xbcbcdf63, (u_int)0xb6b6c177, (u_int)0xdada75af, (u_int)0x21216342, + (u_int)0x10103020, (u_int)0xffff1ae5, (u_int)0xf3f30efd, (u_int)0xd2d26dbf, + (u_int)0xcdcd4c81, (u_int)0x0c0c1418, (u_int)0x13133526, (u_int)0xecec2fc3, + (u_int)0x5f5fe1be, (u_int)0x9797a235, (u_int)0x4444cc88, (u_int)0x1717392e, + (u_int)0xc4c45793, (u_int)0xa7a7f255, (u_int)0x7e7e82fc, (u_int)0x3d3d477a, + (u_int)0x6464acc8, (u_int)0x5d5de7ba, (u_int)0x19192b32, (u_int)0x737395e6, + (u_int)0x6060a0c0, (u_int)0x81819819, (u_int)0x4f4fd19e, (u_int)0xdcdc7fa3, + (u_int)0x22226644, (u_int)0x2a2a7e54, (u_int)0x9090ab3b, (u_int)0x8888830b, + (u_int)0x4646ca8c, (u_int)0xeeee29c7, (u_int)0xb8b8d36b, (u_int)0x14143c28, + (u_int)0xdede79a7, (u_int)0x5e5ee2bc, (u_int)0x0b0b1d16, (u_int)0xdbdb76ad, + (u_int)0xe0e03bdb, (u_int)0x32325664, (u_int)0x3a3a4e74, (u_int)0x0a0a1e14, + (u_int)0x4949db92, (u_int)0x06060a0c, (u_int)0x24246c48, (u_int)0x5c5ce4b8, + (u_int)0xc2c25d9f, (u_int)0xd3d36ebd, (u_int)0xacacef43, (u_int)0x6262a6c4, + (u_int)0x9191a839, (u_int)0x9595a431, (u_int)0xe4e437d3, (u_int)0x79798bf2, + (u_int)0xe7e732d5, (u_int)0xc8c8438b, (u_int)0x3737596e, (u_int)0x6d6db7da, + (u_int)0x8d8d8c01, (u_int)0xd5d564b1, (u_int)0x4e4ed29c, (u_int)0xa9a9e049, + (u_int)0x6c6cb4d8, (u_int)0x5656faac, (u_int)0xf4f407f3, (u_int)0xeaea25cf, + (u_int)0x6565afca, (u_int)0x7a7a8ef4, (u_int)0xaeaee947, (u_int)0x08081810, + (u_int)0xbabad56f, (u_int)0x787888f0, (u_int)0x25256f4a, (u_int)0x2e2e725c, + (u_int)0x1c1c2438, (u_int)0xa6a6f157, (u_int)0xb4b4c773, (u_int)0xc6c65197, + (u_int)0xe8e823cb, (u_int)0xdddd7ca1, (u_int)0x74749ce8, (u_int)0x1f1f213e, + (u_int)0x4b4bdd96, (u_int)0xbdbddc61, (u_int)0x8b8b860d, (u_int)0x8a8a850f, + (u_int)0x707090e0, (u_int)0x3e3e427c, (u_int)0xb5b5c471, (u_int)0x6666aacc, + (u_int)0x4848d890, (u_int)0x03030506, (u_int)0xf6f601f7, (u_int)0x0e0e121c, + (u_int)0x6161a3c2, (u_int)0x35355f6a, (u_int)0x5757f9ae, (u_int)0xb9b9d069, + (u_int)0x86869117, (u_int)0xc1c15899, (u_int)0x1d1d273a, (u_int)0x9e9eb927, + (u_int)0xe1e138d9, (u_int)0xf8f813eb, (u_int)0x9898b32b, (u_int)0x11113322, + (u_int)0x6969bbd2, (u_int)0xd9d970a9, (u_int)0x8e8e8907, (u_int)0x9494a733, + (u_int)0x9b9bb62d, (u_int)0x1e1e223c, (u_int)0x87879215, (u_int)0xe9e920c9, + (u_int)0xcece4987, (u_int)0x5555ffaa, (u_int)0x28287850, (u_int)0xdfdf7aa5, + (u_int)0x8c8c8f03, (u_int)0xa1a1f859, (u_int)0x89898009, (u_int)0x0d0d171a, + (u_int)0xbfbfda65, (u_int)0xe6e631d7, (u_int)0x4242c684, (u_int)0x6868b8d0, + (u_int)0x4141c382, (u_int)0x9999b029, (u_int)0x2d2d775a, (u_int)0x0f0f111e, + (u_int)0xb0b0cb7b, (u_int)0x5454fca8, (u_int)0xbbbbd66d, (u_int)0x16163a2c, +}; +static const u32 Te4[256] = { + (u_int)0x63636363, (u_int)0x7c7c7c7c, (u_int)0x77777777, (u_int)0x7b7b7b7b, + (u_int)0xf2f2f2f2, (u_int)0x6b6b6b6b, (u_int)0x6f6f6f6f, (u_int)0xc5c5c5c5, + (u_int)0x30303030, (u_int)0x01010101, (u_int)0x67676767, (u_int)0x2b2b2b2b, + (u_int)0xfefefefe, (u_int)0xd7d7d7d7, (u_int)0xabababab, (u_int)0x76767676, + (u_int)0xcacacaca, (u_int)0x82828282, (u_int)0xc9c9c9c9, (u_int)0x7d7d7d7d, + (u_int)0xfafafafa, (u_int)0x59595959, (u_int)0x47474747, (u_int)0xf0f0f0f0, + (u_int)0xadadadad, (u_int)0xd4d4d4d4, (u_int)0xa2a2a2a2, (u_int)0xafafafaf, + (u_int)0x9c9c9c9c, (u_int)0xa4a4a4a4, (u_int)0x72727272, (u_int)0xc0c0c0c0, + (u_int)0xb7b7b7b7, (u_int)0xfdfdfdfd, (u_int)0x93939393, (u_int)0x26262626, + (u_int)0x36363636, (u_int)0x3f3f3f3f, (u_int)0xf7f7f7f7, (u_int)0xcccccccc, + (u_int)0x34343434, (u_int)0xa5a5a5a5, (u_int)0xe5e5e5e5, (u_int)0xf1f1f1f1, + (u_int)0x71717171, (u_int)0xd8d8d8d8, (u_int)0x31313131, (u_int)0x15151515, + (u_int)0x04040404, (u_int)0xc7c7c7c7, (u_int)0x23232323, (u_int)0xc3c3c3c3, + (u_int)0x18181818, (u_int)0x96969696, (u_int)0x05050505, (u_int)0x9a9a9a9a, + (u_int)0x07070707, (u_int)0x12121212, (u_int)0x80808080, (u_int)0xe2e2e2e2, + (u_int)0xebebebeb, (u_int)0x27272727, (u_int)0xb2b2b2b2, (u_int)0x75757575, + (u_int)0x09090909, (u_int)0x83838383, (u_int)0x2c2c2c2c, (u_int)0x1a1a1a1a, + (u_int)0x1b1b1b1b, (u_int)0x6e6e6e6e, (u_int)0x5a5a5a5a, (u_int)0xa0a0a0a0, + (u_int)0x52525252, (u_int)0x3b3b3b3b, (u_int)0xd6d6d6d6, (u_int)0xb3b3b3b3, + (u_int)0x29292929, (u_int)0xe3e3e3e3, (u_int)0x2f2f2f2f, (u_int)0x84848484, + (u_int)0x53535353, (u_int)0xd1d1d1d1, (u_int)0x00000000, (u_int)0xedededed, + (u_int)0x20202020, (u_int)0xfcfcfcfc, (u_int)0xb1b1b1b1, (u_int)0x5b5b5b5b, + (u_int)0x6a6a6a6a, (u_int)0xcbcbcbcb, (u_int)0xbebebebe, (u_int)0x39393939, + (u_int)0x4a4a4a4a, (u_int)0x4c4c4c4c, (u_int)0x58585858, (u_int)0xcfcfcfcf, + (u_int)0xd0d0d0d0, (u_int)0xefefefef, (u_int)0xaaaaaaaa, (u_int)0xfbfbfbfb, + (u_int)0x43434343, (u_int)0x4d4d4d4d, (u_int)0x33333333, (u_int)0x85858585, + (u_int)0x45454545, (u_int)0xf9f9f9f9, (u_int)0x02020202, (u_int)0x7f7f7f7f, + (u_int)0x50505050, (u_int)0x3c3c3c3c, (u_int)0x9f9f9f9f, (u_int)0xa8a8a8a8, + (u_int)0x51515151, (u_int)0xa3a3a3a3, (u_int)0x40404040, (u_int)0x8f8f8f8f, + (u_int)0x92929292, (u_int)0x9d9d9d9d, (u_int)0x38383838, (u_int)0xf5f5f5f5, + (u_int)0xbcbcbcbc, (u_int)0xb6b6b6b6, (u_int)0xdadadada, (u_int)0x21212121, + (u_int)0x10101010, (u_int)0xffffffff, (u_int)0xf3f3f3f3, (u_int)0xd2d2d2d2, + (u_int)0xcdcdcdcd, (u_int)0x0c0c0c0c, (u_int)0x13131313, (u_int)0xecececec, + (u_int)0x5f5f5f5f, (u_int)0x97979797, (u_int)0x44444444, (u_int)0x17171717, + (u_int)0xc4c4c4c4, (u_int)0xa7a7a7a7, (u_int)0x7e7e7e7e, (u_int)0x3d3d3d3d, + (u_int)0x64646464, (u_int)0x5d5d5d5d, (u_int)0x19191919, (u_int)0x73737373, + (u_int)0x60606060, (u_int)0x81818181, (u_int)0x4f4f4f4f, (u_int)0xdcdcdcdc, + (u_int)0x22222222, (u_int)0x2a2a2a2a, (u_int)0x90909090, (u_int)0x88888888, + (u_int)0x46464646, (u_int)0xeeeeeeee, (u_int)0xb8b8b8b8, (u_int)0x14141414, + (u_int)0xdededede, (u_int)0x5e5e5e5e, (u_int)0x0b0b0b0b, (u_int)0xdbdbdbdb, + (u_int)0xe0e0e0e0, (u_int)0x32323232, (u_int)0x3a3a3a3a, (u_int)0x0a0a0a0a, + (u_int)0x49494949, (u_int)0x06060606, (u_int)0x24242424, (u_int)0x5c5c5c5c, + (u_int)0xc2c2c2c2, (u_int)0xd3d3d3d3, (u_int)0xacacacac, (u_int)0x62626262, + (u_int)0x91919191, (u_int)0x95959595, (u_int)0xe4e4e4e4, (u_int)0x79797979, + (u_int)0xe7e7e7e7, (u_int)0xc8c8c8c8, (u_int)0x37373737, (u_int)0x6d6d6d6d, + (u_int)0x8d8d8d8d, (u_int)0xd5d5d5d5, (u_int)0x4e4e4e4e, (u_int)0xa9a9a9a9, + (u_int)0x6c6c6c6c, (u_int)0x56565656, (u_int)0xf4f4f4f4, (u_int)0xeaeaeaea, + (u_int)0x65656565, (u_int)0x7a7a7a7a, (u_int)0xaeaeaeae, (u_int)0x08080808, + (u_int)0xbabababa, (u_int)0x78787878, (u_int)0x25252525, (u_int)0x2e2e2e2e, + (u_int)0x1c1c1c1c, (u_int)0xa6a6a6a6, (u_int)0xb4b4b4b4, (u_int)0xc6c6c6c6, + (u_int)0xe8e8e8e8, (u_int)0xdddddddd, (u_int)0x74747474, (u_int)0x1f1f1f1f, + (u_int)0x4b4b4b4b, (u_int)0xbdbdbdbd, (u_int)0x8b8b8b8b, (u_int)0x8a8a8a8a, + (u_int)0x70707070, (u_int)0x3e3e3e3e, (u_int)0xb5b5b5b5, (u_int)0x66666666, + (u_int)0x48484848, (u_int)0x03030303, (u_int)0xf6f6f6f6, (u_int)0x0e0e0e0e, + (u_int)0x61616161, (u_int)0x35353535, (u_int)0x57575757, (u_int)0xb9b9b9b9, + (u_int)0x86868686, (u_int)0xc1c1c1c1, (u_int)0x1d1d1d1d, (u_int)0x9e9e9e9e, + (u_int)0xe1e1e1e1, (u_int)0xf8f8f8f8, (u_int)0x98989898, (u_int)0x11111111, + (u_int)0x69696969, (u_int)0xd9d9d9d9, (u_int)0x8e8e8e8e, (u_int)0x94949494, + (u_int)0x9b9b9b9b, (u_int)0x1e1e1e1e, (u_int)0x87878787, (u_int)0xe9e9e9e9, + (u_int)0xcececece, (u_int)0x55555555, (u_int)0x28282828, (u_int)0xdfdfdfdf, + (u_int)0x8c8c8c8c, (u_int)0xa1a1a1a1, (u_int)0x89898989, (u_int)0x0d0d0d0d, + (u_int)0xbfbfbfbf, (u_int)0xe6e6e6e6, (u_int)0x42424242, (u_int)0x68686868, + (u_int)0x41414141, (u_int)0x99999999, (u_int)0x2d2d2d2d, (u_int)0x0f0f0f0f, + (u_int)0xb0b0b0b0, (u_int)0x54545454, (u_int)0xbbbbbbbb, (u_int)0x16161616, +}; +static const u32 Td0[256] = { + (u_int)0x51f4a750, (u_int)0x7e416553, (u_int)0x1a17a4c3, (u_int)0x3a275e96, + (u_int)0x3bab6bcb, (u_int)0x1f9d45f1, (u_int)0xacfa58ab, (u_int)0x4be30393, + (u_int)0x2030fa55, (u_int)0xad766df6, (u_int)0x88cc7691, (u_int)0xf5024c25, + (u_int)0x4fe5d7fc, (u_int)0xc52acbd7, (u_int)0x26354480, (u_int)0xb562a38f, + (u_int)0xdeb15a49, (u_int)0x25ba1b67, (u_int)0x45ea0e98, (u_int)0x5dfec0e1, + (u_int)0xc32f7502, (u_int)0x814cf012, (u_int)0x8d4697a3, (u_int)0x6bd3f9c6, + (u_int)0x038f5fe7, (u_int)0x15929c95, (u_int)0xbf6d7aeb, (u_int)0x955259da, + (u_int)0xd4be832d, (u_int)0x587421d3, (u_int)0x49e06929, (u_int)0x8ec9c844, + (u_int)0x75c2896a, (u_int)0xf48e7978, (u_int)0x99583e6b, (u_int)0x27b971dd, + (u_int)0xbee14fb6, (u_int)0xf088ad17, (u_int)0xc920ac66, (u_int)0x7dce3ab4, + (u_int)0x63df4a18, (u_int)0xe51a3182, (u_int)0x97513360, (u_int)0x62537f45, + (u_int)0xb16477e0, (u_int)0xbb6bae84, (u_int)0xfe81a01c, (u_int)0xf9082b94, + (u_int)0x70486858, (u_int)0x8f45fd19, (u_int)0x94de6c87, (u_int)0x527bf8b7, + (u_int)0xab73d323, (u_int)0x724b02e2, (u_int)0xe31f8f57, (u_int)0x6655ab2a, + (u_int)0xb2eb2807, (u_int)0x2fb5c203, (u_int)0x86c57b9a, (u_int)0xd33708a5, + (u_int)0x302887f2, (u_int)0x23bfa5b2, (u_int)0x02036aba, (u_int)0xed16825c, + (u_int)0x8acf1c2b, (u_int)0xa779b492, (u_int)0xf307f2f0, (u_int)0x4e69e2a1, + (u_int)0x65daf4cd, (u_int)0x0605bed5, (u_int)0xd134621f, (u_int)0xc4a6fe8a, + (u_int)0x342e539d, (u_int)0xa2f355a0, (u_int)0x058ae132, (u_int)0xa4f6eb75, + (u_int)0x0b83ec39, (u_int)0x4060efaa, (u_int)0x5e719f06, (u_int)0xbd6e1051, + (u_int)0x3e218af9, (u_int)0x96dd063d, (u_int)0xdd3e05ae, (u_int)0x4de6bd46, + (u_int)0x91548db5, (u_int)0x71c45d05, (u_int)0x0406d46f, (u_int)0x605015ff, + (u_int)0x1998fb24, (u_int)0xd6bde997, (u_int)0x894043cc, (u_int)0x67d99e77, + (u_int)0xb0e842bd, (u_int)0x07898b88, (u_int)0xe7195b38, (u_int)0x79c8eedb, + (u_int)0xa17c0a47, (u_int)0x7c420fe9, (u_int)0xf8841ec9, (u_int)0x00000000, + (u_int)0x09808683, (u_int)0x322bed48, (u_int)0x1e1170ac, (u_int)0x6c5a724e, + (u_int)0xfd0efffb, (u_int)0x0f853856, (u_int)0x3daed51e, (u_int)0x362d3927, + (u_int)0x0a0fd964, (u_int)0x685ca621, (u_int)0x9b5b54d1, (u_int)0x24362e3a, + (u_int)0x0c0a67b1, (u_int)0x9357e70f, (u_int)0xb4ee96d2, (u_int)0x1b9b919e, + (u_int)0x80c0c54f, (u_int)0x61dc20a2, (u_int)0x5a774b69, (u_int)0x1c121a16, + (u_int)0xe293ba0a, (u_int)0xc0a02ae5, (u_int)0x3c22e043, (u_int)0x121b171d, + (u_int)0x0e090d0b, (u_int)0xf28bc7ad, (u_int)0x2db6a8b9, (u_int)0x141ea9c8, + (u_int)0x57f11985, (u_int)0xaf75074c, (u_int)0xee99ddbb, (u_int)0xa37f60fd, + (u_int)0xf701269f, (u_int)0x5c72f5bc, (u_int)0x44663bc5, (u_int)0x5bfb7e34, + (u_int)0x8b432976, (u_int)0xcb23c6dc, (u_int)0xb6edfc68, (u_int)0xb8e4f163, + (u_int)0xd731dcca, (u_int)0x42638510, (u_int)0x13972240, (u_int)0x84c61120, + (u_int)0x854a247d, (u_int)0xd2bb3df8, (u_int)0xaef93211, (u_int)0xc729a16d, + (u_int)0x1d9e2f4b, (u_int)0xdcb230f3, (u_int)0x0d8652ec, (u_int)0x77c1e3d0, + (u_int)0x2bb3166c, (u_int)0xa970b999, (u_int)0x119448fa, (u_int)0x47e96422, + (u_int)0xa8fc8cc4, (u_int)0xa0f03f1a, (u_int)0x567d2cd8, (u_int)0x223390ef, + (u_int)0x87494ec7, (u_int)0xd938d1c1, (u_int)0x8ccaa2fe, (u_int)0x98d40b36, + (u_int)0xa6f581cf, (u_int)0xa57ade28, (u_int)0xdab78e26, (u_int)0x3fadbfa4, + (u_int)0x2c3a9de4, (u_int)0x5078920d, (u_int)0x6a5fcc9b, (u_int)0x547e4662, + (u_int)0xf68d13c2, (u_int)0x90d8b8e8, (u_int)0x2e39f75e, (u_int)0x82c3aff5, + (u_int)0x9f5d80be, (u_int)0x69d0937c, (u_int)0x6fd52da9, (u_int)0xcf2512b3, + (u_int)0xc8ac993b, (u_int)0x10187da7, (u_int)0xe89c636e, (u_int)0xdb3bbb7b, + (u_int)0xcd267809, (u_int)0x6e5918f4, (u_int)0xec9ab701, (u_int)0x834f9aa8, + (u_int)0xe6956e65, (u_int)0xaaffe67e, (u_int)0x21bccf08, (u_int)0xef15e8e6, + (u_int)0xbae79bd9, (u_int)0x4a6f36ce, (u_int)0xea9f09d4, (u_int)0x29b07cd6, + (u_int)0x31a4b2af, (u_int)0x2a3f2331, (u_int)0xc6a59430, (u_int)0x35a266c0, + (u_int)0x744ebc37, (u_int)0xfc82caa6, (u_int)0xe090d0b0, (u_int)0x33a7d815, + (u_int)0xf104984a, (u_int)0x41ecdaf7, (u_int)0x7fcd500e, (u_int)0x1791f62f, + (u_int)0x764dd68d, (u_int)0x43efb04d, (u_int)0xccaa4d54, (u_int)0xe49604df, + (u_int)0x9ed1b5e3, (u_int)0x4c6a881b, (u_int)0xc12c1fb8, (u_int)0x4665517f, + (u_int)0x9d5eea04, (u_int)0x018c355d, (u_int)0xfa877473, (u_int)0xfb0b412e, + (u_int)0xb3671d5a, (u_int)0x92dbd252, (u_int)0xe9105633, (u_int)0x6dd64713, + (u_int)0x9ad7618c, (u_int)0x37a10c7a, (u_int)0x59f8148e, (u_int)0xeb133c89, + (u_int)0xcea927ee, (u_int)0xb761c935, (u_int)0xe11ce5ed, (u_int)0x7a47b13c, + (u_int)0x9cd2df59, (u_int)0x55f2733f, (u_int)0x1814ce79, (u_int)0x73c737bf, + (u_int)0x53f7cdea, (u_int)0x5ffdaa5b, (u_int)0xdf3d6f14, (u_int)0x7844db86, + (u_int)0xcaaff381, (u_int)0xb968c43e, (u_int)0x3824342c, (u_int)0xc2a3405f, + (u_int)0x161dc372, (u_int)0xbce2250c, (u_int)0x283c498b, (u_int)0xff0d9541, + (u_int)0x39a80171, (u_int)0x080cb3de, (u_int)0xd8b4e49c, (u_int)0x6456c190, + (u_int)0x7bcb8461, (u_int)0xd532b670, (u_int)0x486c5c74, (u_int)0xd0b85742, +}; +static const u32 Td1[256] = { + (u_int)0x5051f4a7, (u_int)0x537e4165, (u_int)0xc31a17a4, (u_int)0x963a275e, + (u_int)0xcb3bab6b, (u_int)0xf11f9d45, (u_int)0xabacfa58, (u_int)0x934be303, + (u_int)0x552030fa, (u_int)0xf6ad766d, (u_int)0x9188cc76, (u_int)0x25f5024c, + (u_int)0xfc4fe5d7, (u_int)0xd7c52acb, (u_int)0x80263544, (u_int)0x8fb562a3, + (u_int)0x49deb15a, (u_int)0x6725ba1b, (u_int)0x9845ea0e, (u_int)0xe15dfec0, + (u_int)0x02c32f75, (u_int)0x12814cf0, (u_int)0xa38d4697, (u_int)0xc66bd3f9, + (u_int)0xe7038f5f, (u_int)0x9515929c, (u_int)0xebbf6d7a, (u_int)0xda955259, + (u_int)0x2dd4be83, (u_int)0xd3587421, (u_int)0x2949e069, (u_int)0x448ec9c8, + (u_int)0x6a75c289, (u_int)0x78f48e79, (u_int)0x6b99583e, (u_int)0xdd27b971, + (u_int)0xb6bee14f, (u_int)0x17f088ad, (u_int)0x66c920ac, (u_int)0xb47dce3a, + (u_int)0x1863df4a, (u_int)0x82e51a31, (u_int)0x60975133, (u_int)0x4562537f, + (u_int)0xe0b16477, (u_int)0x84bb6bae, (u_int)0x1cfe81a0, (u_int)0x94f9082b, + (u_int)0x58704868, (u_int)0x198f45fd, (u_int)0x8794de6c, (u_int)0xb7527bf8, + (u_int)0x23ab73d3, (u_int)0xe2724b02, (u_int)0x57e31f8f, (u_int)0x2a6655ab, + (u_int)0x07b2eb28, (u_int)0x032fb5c2, (u_int)0x9a86c57b, (u_int)0xa5d33708, + (u_int)0xf2302887, (u_int)0xb223bfa5, (u_int)0xba02036a, (u_int)0x5ced1682, + (u_int)0x2b8acf1c, (u_int)0x92a779b4, (u_int)0xf0f307f2, (u_int)0xa14e69e2, + (u_int)0xcd65daf4, (u_int)0xd50605be, (u_int)0x1fd13462, (u_int)0x8ac4a6fe, + (u_int)0x9d342e53, (u_int)0xa0a2f355, (u_int)0x32058ae1, (u_int)0x75a4f6eb, + (u_int)0x390b83ec, (u_int)0xaa4060ef, (u_int)0x065e719f, (u_int)0x51bd6e10, + (u_int)0xf93e218a, (u_int)0x3d96dd06, (u_int)0xaedd3e05, (u_int)0x464de6bd, + (u_int)0xb591548d, (u_int)0x0571c45d, (u_int)0x6f0406d4, (u_int)0xff605015, + (u_int)0x241998fb, (u_int)0x97d6bde9, (u_int)0xcc894043, (u_int)0x7767d99e, + (u_int)0xbdb0e842, (u_int)0x8807898b, (u_int)0x38e7195b, (u_int)0xdb79c8ee, + (u_int)0x47a17c0a, (u_int)0xe97c420f, (u_int)0xc9f8841e, (u_int)0x00000000, + (u_int)0x83098086, (u_int)0x48322bed, (u_int)0xac1e1170, (u_int)0x4e6c5a72, + (u_int)0xfbfd0eff, (u_int)0x560f8538, (u_int)0x1e3daed5, (u_int)0x27362d39, + (u_int)0x640a0fd9, (u_int)0x21685ca6, (u_int)0xd19b5b54, (u_int)0x3a24362e, + (u_int)0xb10c0a67, (u_int)0x0f9357e7, (u_int)0xd2b4ee96, (u_int)0x9e1b9b91, + (u_int)0x4f80c0c5, (u_int)0xa261dc20, (u_int)0x695a774b, (u_int)0x161c121a, + (u_int)0x0ae293ba, (u_int)0xe5c0a02a, (u_int)0x433c22e0, (u_int)0x1d121b17, + (u_int)0x0b0e090d, (u_int)0xadf28bc7, (u_int)0xb92db6a8, (u_int)0xc8141ea9, + (u_int)0x8557f119, (u_int)0x4caf7507, (u_int)0xbbee99dd, (u_int)0xfda37f60, + (u_int)0x9ff70126, (u_int)0xbc5c72f5, (u_int)0xc544663b, (u_int)0x345bfb7e, + (u_int)0x768b4329, (u_int)0xdccb23c6, (u_int)0x68b6edfc, (u_int)0x63b8e4f1, + (u_int)0xcad731dc, (u_int)0x10426385, (u_int)0x40139722, (u_int)0x2084c611, + (u_int)0x7d854a24, (u_int)0xf8d2bb3d, (u_int)0x11aef932, (u_int)0x6dc729a1, + (u_int)0x4b1d9e2f, (u_int)0xf3dcb230, (u_int)0xec0d8652, (u_int)0xd077c1e3, + (u_int)0x6c2bb316, (u_int)0x99a970b9, (u_int)0xfa119448, (u_int)0x2247e964, + (u_int)0xc4a8fc8c, (u_int)0x1aa0f03f, (u_int)0xd8567d2c, (u_int)0xef223390, + (u_int)0xc787494e, (u_int)0xc1d938d1, (u_int)0xfe8ccaa2, (u_int)0x3698d40b, + (u_int)0xcfa6f581, (u_int)0x28a57ade, (u_int)0x26dab78e, (u_int)0xa43fadbf, + (u_int)0xe42c3a9d, (u_int)0x0d507892, (u_int)0x9b6a5fcc, (u_int)0x62547e46, + (u_int)0xc2f68d13, (u_int)0xe890d8b8, (u_int)0x5e2e39f7, (u_int)0xf582c3af, + (u_int)0xbe9f5d80, (u_int)0x7c69d093, (u_int)0xa96fd52d, (u_int)0xb3cf2512, + (u_int)0x3bc8ac99, (u_int)0xa710187d, (u_int)0x6ee89c63, (u_int)0x7bdb3bbb, + (u_int)0x09cd2678, (u_int)0xf46e5918, (u_int)0x01ec9ab7, (u_int)0xa8834f9a, + (u_int)0x65e6956e, (u_int)0x7eaaffe6, (u_int)0x0821bccf, (u_int)0xe6ef15e8, + (u_int)0xd9bae79b, (u_int)0xce4a6f36, (u_int)0xd4ea9f09, (u_int)0xd629b07c, + (u_int)0xaf31a4b2, (u_int)0x312a3f23, (u_int)0x30c6a594, (u_int)0xc035a266, + (u_int)0x37744ebc, (u_int)0xa6fc82ca, (u_int)0xb0e090d0, (u_int)0x1533a7d8, + (u_int)0x4af10498, (u_int)0xf741ecda, (u_int)0x0e7fcd50, (u_int)0x2f1791f6, + (u_int)0x8d764dd6, (u_int)0x4d43efb0, (u_int)0x54ccaa4d, (u_int)0xdfe49604, + (u_int)0xe39ed1b5, (u_int)0x1b4c6a88, (u_int)0xb8c12c1f, (u_int)0x7f466551, + (u_int)0x049d5eea, (u_int)0x5d018c35, (u_int)0x73fa8774, (u_int)0x2efb0b41, + (u_int)0x5ab3671d, (u_int)0x5292dbd2, (u_int)0x33e91056, (u_int)0x136dd647, + (u_int)0x8c9ad761, (u_int)0x7a37a10c, (u_int)0x8e59f814, (u_int)0x89eb133c, + (u_int)0xeecea927, (u_int)0x35b761c9, (u_int)0xede11ce5, (u_int)0x3c7a47b1, + (u_int)0x599cd2df, (u_int)0x3f55f273, (u_int)0x791814ce, (u_int)0xbf73c737, + (u_int)0xea53f7cd, (u_int)0x5b5ffdaa, (u_int)0x14df3d6f, (u_int)0x867844db, + (u_int)0x81caaff3, (u_int)0x3eb968c4, (u_int)0x2c382434, (u_int)0x5fc2a340, + (u_int)0x72161dc3, (u_int)0x0cbce225, (u_int)0x8b283c49, (u_int)0x41ff0d95, + (u_int)0x7139a801, (u_int)0xde080cb3, (u_int)0x9cd8b4e4, (u_int)0x906456c1, + (u_int)0x617bcb84, (u_int)0x70d532b6, (u_int)0x74486c5c, (u_int)0x42d0b857, +}; +static const u32 Td2[256] = { + (u_int)0xa75051f4, (u_int)0x65537e41, (u_int)0xa4c31a17, (u_int)0x5e963a27, + (u_int)0x6bcb3bab, (u_int)0x45f11f9d, (u_int)0x58abacfa, (u_int)0x03934be3, + (u_int)0xfa552030, (u_int)0x6df6ad76, (u_int)0x769188cc, (u_int)0x4c25f502, + (u_int)0xd7fc4fe5, (u_int)0xcbd7c52a, (u_int)0x44802635, (u_int)0xa38fb562, + (u_int)0x5a49deb1, (u_int)0x1b6725ba, (u_int)0x0e9845ea, (u_int)0xc0e15dfe, + (u_int)0x7502c32f, (u_int)0xf012814c, (u_int)0x97a38d46, (u_int)0xf9c66bd3, + (u_int)0x5fe7038f, (u_int)0x9c951592, (u_int)0x7aebbf6d, (u_int)0x59da9552, + (u_int)0x832dd4be, (u_int)0x21d35874, (u_int)0x692949e0, (u_int)0xc8448ec9, + (u_int)0x896a75c2, (u_int)0x7978f48e, (u_int)0x3e6b9958, (u_int)0x71dd27b9, + (u_int)0x4fb6bee1, (u_int)0xad17f088, (u_int)0xac66c920, (u_int)0x3ab47dce, + (u_int)0x4a1863df, (u_int)0x3182e51a, (u_int)0x33609751, (u_int)0x7f456253, + (u_int)0x77e0b164, (u_int)0xae84bb6b, (u_int)0xa01cfe81, (u_int)0x2b94f908, + (u_int)0x68587048, (u_int)0xfd198f45, (u_int)0x6c8794de, (u_int)0xf8b7527b, + (u_int)0xd323ab73, (u_int)0x02e2724b, (u_int)0x8f57e31f, (u_int)0xab2a6655, + (u_int)0x2807b2eb, (u_int)0xc2032fb5, (u_int)0x7b9a86c5, (u_int)0x08a5d337, + (u_int)0x87f23028, (u_int)0xa5b223bf, (u_int)0x6aba0203, (u_int)0x825ced16, + (u_int)0x1c2b8acf, (u_int)0xb492a779, (u_int)0xf2f0f307, (u_int)0xe2a14e69, + (u_int)0xf4cd65da, (u_int)0xbed50605, (u_int)0x621fd134, (u_int)0xfe8ac4a6, + (u_int)0x539d342e, (u_int)0x55a0a2f3, (u_int)0xe132058a, (u_int)0xeb75a4f6, + (u_int)0xec390b83, (u_int)0xefaa4060, (u_int)0x9f065e71, (u_int)0x1051bd6e, + + (u_int)0x8af93e21, (u_int)0x063d96dd, (u_int)0x05aedd3e, (u_int)0xbd464de6, + (u_int)0x8db59154, (u_int)0x5d0571c4, (u_int)0xd46f0406, (u_int)0x15ff6050, + (u_int)0xfb241998, (u_int)0xe997d6bd, (u_int)0x43cc8940, (u_int)0x9e7767d9, + (u_int)0x42bdb0e8, (u_int)0x8b880789, (u_int)0x5b38e719, (u_int)0xeedb79c8, + (u_int)0x0a47a17c, (u_int)0x0fe97c42, (u_int)0x1ec9f884, (u_int)0x00000000, + (u_int)0x86830980, (u_int)0xed48322b, (u_int)0x70ac1e11, (u_int)0x724e6c5a, + (u_int)0xfffbfd0e, (u_int)0x38560f85, (u_int)0xd51e3dae, (u_int)0x3927362d, + (u_int)0xd9640a0f, (u_int)0xa621685c, (u_int)0x54d19b5b, (u_int)0x2e3a2436, + (u_int)0x67b10c0a, (u_int)0xe70f9357, (u_int)0x96d2b4ee, (u_int)0x919e1b9b, + (u_int)0xc54f80c0, (u_int)0x20a261dc, (u_int)0x4b695a77, (u_int)0x1a161c12, + (u_int)0xba0ae293, (u_int)0x2ae5c0a0, (u_int)0xe0433c22, (u_int)0x171d121b, + (u_int)0x0d0b0e09, (u_int)0xc7adf28b, (u_int)0xa8b92db6, (u_int)0xa9c8141e, + (u_int)0x198557f1, (u_int)0x074caf75, (u_int)0xddbbee99, (u_int)0x60fda37f, + (u_int)0x269ff701, (u_int)0xf5bc5c72, (u_int)0x3bc54466, (u_int)0x7e345bfb, + (u_int)0x29768b43, (u_int)0xc6dccb23, (u_int)0xfc68b6ed, (u_int)0xf163b8e4, + (u_int)0xdccad731, (u_int)0x85104263, (u_int)0x22401397, (u_int)0x112084c6, + (u_int)0x247d854a, (u_int)0x3df8d2bb, (u_int)0x3211aef9, (u_int)0xa16dc729, + (u_int)0x2f4b1d9e, (u_int)0x30f3dcb2, (u_int)0x52ec0d86, (u_int)0xe3d077c1, + (u_int)0x166c2bb3, (u_int)0xb999a970, (u_int)0x48fa1194, (u_int)0x642247e9, + (u_int)0x8cc4a8fc, (u_int)0x3f1aa0f0, (u_int)0x2cd8567d, (u_int)0x90ef2233, + (u_int)0x4ec78749, (u_int)0xd1c1d938, (u_int)0xa2fe8cca, (u_int)0x0b3698d4, + (u_int)0x81cfa6f5, (u_int)0xde28a57a, (u_int)0x8e26dab7, (u_int)0xbfa43fad, + (u_int)0x9de42c3a, (u_int)0x920d5078, (u_int)0xcc9b6a5f, (u_int)0x4662547e, + (u_int)0x13c2f68d, (u_int)0xb8e890d8, (u_int)0xf75e2e39, (u_int)0xaff582c3, + (u_int)0x80be9f5d, (u_int)0x937c69d0, (u_int)0x2da96fd5, (u_int)0x12b3cf25, + (u_int)0x993bc8ac, (u_int)0x7da71018, (u_int)0x636ee89c, (u_int)0xbb7bdb3b, + (u_int)0x7809cd26, (u_int)0x18f46e59, (u_int)0xb701ec9a, (u_int)0x9aa8834f, + (u_int)0x6e65e695, (u_int)0xe67eaaff, (u_int)0xcf0821bc, (u_int)0xe8e6ef15, + (u_int)0x9bd9bae7, (u_int)0x36ce4a6f, (u_int)0x09d4ea9f, (u_int)0x7cd629b0, + (u_int)0xb2af31a4, (u_int)0x23312a3f, (u_int)0x9430c6a5, (u_int)0x66c035a2, + (u_int)0xbc37744e, (u_int)0xcaa6fc82, (u_int)0xd0b0e090, (u_int)0xd81533a7, + (u_int)0x984af104, (u_int)0xdaf741ec, (u_int)0x500e7fcd, (u_int)0xf62f1791, + (u_int)0xd68d764d, (u_int)0xb04d43ef, (u_int)0x4d54ccaa, (u_int)0x04dfe496, + (u_int)0xb5e39ed1, (u_int)0x881b4c6a, (u_int)0x1fb8c12c, (u_int)0x517f4665, + (u_int)0xea049d5e, (u_int)0x355d018c, (u_int)0x7473fa87, (u_int)0x412efb0b, + (u_int)0x1d5ab367, (u_int)0xd25292db, (u_int)0x5633e910, (u_int)0x47136dd6, + (u_int)0x618c9ad7, (u_int)0x0c7a37a1, (u_int)0x148e59f8, (u_int)0x3c89eb13, + (u_int)0x27eecea9, (u_int)0xc935b761, (u_int)0xe5ede11c, (u_int)0xb13c7a47, + (u_int)0xdf599cd2, (u_int)0x733f55f2, (u_int)0xce791814, (u_int)0x37bf73c7, + (u_int)0xcdea53f7, (u_int)0xaa5b5ffd, (u_int)0x6f14df3d, (u_int)0xdb867844, + (u_int)0xf381caaf, (u_int)0xc43eb968, (u_int)0x342c3824, (u_int)0x405fc2a3, + (u_int)0xc372161d, (u_int)0x250cbce2, (u_int)0x498b283c, (u_int)0x9541ff0d, + (u_int)0x017139a8, (u_int)0xb3de080c, (u_int)0xe49cd8b4, (u_int)0xc1906456, + (u_int)0x84617bcb, (u_int)0xb670d532, (u_int)0x5c74486c, (u_int)0x5742d0b8, +}; +static const u32 Td3[256] = { + (u_int)0xf4a75051, (u_int)0x4165537e, (u_int)0x17a4c31a, (u_int)0x275e963a, + (u_int)0xab6bcb3b, (u_int)0x9d45f11f, (u_int)0xfa58abac, (u_int)0xe303934b, + (u_int)0x30fa5520, (u_int)0x766df6ad, (u_int)0xcc769188, (u_int)0x024c25f5, + (u_int)0xe5d7fc4f, (u_int)0x2acbd7c5, (u_int)0x35448026, (u_int)0x62a38fb5, + (u_int)0xb15a49de, (u_int)0xba1b6725, (u_int)0xea0e9845, (u_int)0xfec0e15d, + (u_int)0x2f7502c3, (u_int)0x4cf01281, (u_int)0x4697a38d, (u_int)0xd3f9c66b, + (u_int)0x8f5fe703, (u_int)0x929c9515, (u_int)0x6d7aebbf, (u_int)0x5259da95, + (u_int)0xbe832dd4, (u_int)0x7421d358, (u_int)0xe0692949, (u_int)0xc9c8448e, + (u_int)0xc2896a75, (u_int)0x8e7978f4, (u_int)0x583e6b99, (u_int)0xb971dd27, + (u_int)0xe14fb6be, (u_int)0x88ad17f0, (u_int)0x20ac66c9, (u_int)0xce3ab47d, + (u_int)0xdf4a1863, (u_int)0x1a3182e5, (u_int)0x51336097, (u_int)0x537f4562, + (u_int)0x6477e0b1, (u_int)0x6bae84bb, (u_int)0x81a01cfe, (u_int)0x082b94f9, + (u_int)0x48685870, (u_int)0x45fd198f, (u_int)0xde6c8794, (u_int)0x7bf8b752, + (u_int)0x73d323ab, (u_int)0x4b02e272, (u_int)0x1f8f57e3, (u_int)0x55ab2a66, + (u_int)0xeb2807b2, (u_int)0xb5c2032f, (u_int)0xc57b9a86, (u_int)0x3708a5d3, + (u_int)0x2887f230, (u_int)0xbfa5b223, (u_int)0x036aba02, (u_int)0x16825ced, + (u_int)0xcf1c2b8a, (u_int)0x79b492a7, (u_int)0x07f2f0f3, (u_int)0x69e2a14e, + (u_int)0xdaf4cd65, (u_int)0x05bed506, (u_int)0x34621fd1, (u_int)0xa6fe8ac4, + (u_int)0x2e539d34, (u_int)0xf355a0a2, (u_int)0x8ae13205, (u_int)0xf6eb75a4, + (u_int)0x83ec390b, (u_int)0x60efaa40, (u_int)0x719f065e, (u_int)0x6e1051bd, + (u_int)0x218af93e, (u_int)0xdd063d96, (u_int)0x3e05aedd, (u_int)0xe6bd464d, + (u_int)0x548db591, (u_int)0xc45d0571, (u_int)0x06d46f04, (u_int)0x5015ff60, + (u_int)0x98fb2419, (u_int)0xbde997d6, (u_int)0x4043cc89, (u_int)0xd99e7767, + (u_int)0xe842bdb0, (u_int)0x898b8807, (u_int)0x195b38e7, (u_int)0xc8eedb79, + (u_int)0x7c0a47a1, (u_int)0x420fe97c, (u_int)0x841ec9f8, (u_int)0x00000000, + (u_int)0x80868309, (u_int)0x2bed4832, (u_int)0x1170ac1e, (u_int)0x5a724e6c, + (u_int)0x0efffbfd, (u_int)0x8538560f, (u_int)0xaed51e3d, (u_int)0x2d392736, + (u_int)0x0fd9640a, (u_int)0x5ca62168, (u_int)0x5b54d19b, (u_int)0x362e3a24, + (u_int)0x0a67b10c, (u_int)0x57e70f93, (u_int)0xee96d2b4, (u_int)0x9b919e1b, + (u_int)0xc0c54f80, (u_int)0xdc20a261, (u_int)0x774b695a, (u_int)0x121a161c, + (u_int)0x93ba0ae2, (u_int)0xa02ae5c0, (u_int)0x22e0433c, (u_int)0x1b171d12, + (u_int)0x090d0b0e, (u_int)0x8bc7adf2, (u_int)0xb6a8b92d, (u_int)0x1ea9c814, + (u_int)0xf1198557, (u_int)0x75074caf, (u_int)0x99ddbbee, (u_int)0x7f60fda3, + (u_int)0x01269ff7, (u_int)0x72f5bc5c, (u_int)0x663bc544, (u_int)0xfb7e345b, + (u_int)0x4329768b, (u_int)0x23c6dccb, (u_int)0xedfc68b6, (u_int)0xe4f163b8, + (u_int)0x31dccad7, (u_int)0x63851042, (u_int)0x97224013, (u_int)0xc6112084, + (u_int)0x4a247d85, (u_int)0xbb3df8d2, (u_int)0xf93211ae, (u_int)0x29a16dc7, + (u_int)0x9e2f4b1d, (u_int)0xb230f3dc, (u_int)0x8652ec0d, (u_int)0xc1e3d077, + (u_int)0xb3166c2b, (u_int)0x70b999a9, (u_int)0x9448fa11, (u_int)0xe9642247, + (u_int)0xfc8cc4a8, (u_int)0xf03f1aa0, (u_int)0x7d2cd856, (u_int)0x3390ef22, + (u_int)0x494ec787, (u_int)0x38d1c1d9, (u_int)0xcaa2fe8c, (u_int)0xd40b3698, + (u_int)0xf581cfa6, (u_int)0x7ade28a5, (u_int)0xb78e26da, (u_int)0xadbfa43f, + (u_int)0x3a9de42c, (u_int)0x78920d50, (u_int)0x5fcc9b6a, (u_int)0x7e466254, + (u_int)0x8d13c2f6, (u_int)0xd8b8e890, (u_int)0x39f75e2e, (u_int)0xc3aff582, + (u_int)0x5d80be9f, (u_int)0xd0937c69, (u_int)0xd52da96f, (u_int)0x2512b3cf, + (u_int)0xac993bc8, (u_int)0x187da710, (u_int)0x9c636ee8, (u_int)0x3bbb7bdb, + (u_int)0x267809cd, (u_int)0x5918f46e, (u_int)0x9ab701ec, (u_int)0x4f9aa883, + (u_int)0x956e65e6, (u_int)0xffe67eaa, (u_int)0xbccf0821, (u_int)0x15e8e6ef, + (u_int)0xe79bd9ba, (u_int)0x6f36ce4a, (u_int)0x9f09d4ea, (u_int)0xb07cd629, + (u_int)0xa4b2af31, (u_int)0x3f23312a, (u_int)0xa59430c6, (u_int)0xa266c035, + (u_int)0x4ebc3774, (u_int)0x82caa6fc, (u_int)0x90d0b0e0, (u_int)0xa7d81533, + (u_int)0x04984af1, (u_int)0xecdaf741, (u_int)0xcd500e7f, (u_int)0x91f62f17, + (u_int)0x4dd68d76, (u_int)0xefb04d43, (u_int)0xaa4d54cc, (u_int)0x9604dfe4, + (u_int)0xd1b5e39e, (u_int)0x6a881b4c, (u_int)0x2c1fb8c1, (u_int)0x65517f46, + (u_int)0x5eea049d, (u_int)0x8c355d01, (u_int)0x877473fa, (u_int)0x0b412efb, + (u_int)0x671d5ab3, (u_int)0xdbd25292, (u_int)0x105633e9, (u_int)0xd647136d, + (u_int)0xd7618c9a, (u_int)0xa10c7a37, (u_int)0xf8148e59, (u_int)0x133c89eb, + (u_int)0xa927eece, (u_int)0x61c935b7, (u_int)0x1ce5ede1, (u_int)0x47b13c7a, + (u_int)0xd2df599c, (u_int)0xf2733f55, (u_int)0x14ce7918, (u_int)0xc737bf73, + (u_int)0xf7cdea53, (u_int)0xfdaa5b5f, (u_int)0x3d6f14df, (u_int)0x44db8678, + (u_int)0xaff381ca, (u_int)0x68c43eb9, (u_int)0x24342c38, (u_int)0xa3405fc2, + (u_int)0x1dc37216, (u_int)0xe2250cbc, (u_int)0x3c498b28, (u_int)0x0d9541ff, + (u_int)0xa8017139, (u_int)0x0cb3de08, (u_int)0xb4e49cd8, (u_int)0x56c19064, + (u_int)0xcb84617b, (u_int)0x32b670d5, (u_int)0x6c5c7448, (u_int)0xb85742d0, +}; +static const u32 Td4[256] = { + (u_int)0x52525252, (u_int)0x09090909, (u_int)0x6a6a6a6a, (u_int)0xd5d5d5d5, + (u_int)0x30303030, (u_int)0x36363636, (u_int)0xa5a5a5a5, (u_int)0x38383838, + (u_int)0xbfbfbfbf, (u_int)0x40404040, (u_int)0xa3a3a3a3, (u_int)0x9e9e9e9e, + (u_int)0x81818181, (u_int)0xf3f3f3f3, (u_int)0xd7d7d7d7, (u_int)0xfbfbfbfb, + (u_int)0x7c7c7c7c, (u_int)0xe3e3e3e3, (u_int)0x39393939, (u_int)0x82828282, + (u_int)0x9b9b9b9b, (u_int)0x2f2f2f2f, (u_int)0xffffffff, (u_int)0x87878787, + (u_int)0x34343434, (u_int)0x8e8e8e8e, (u_int)0x43434343, (u_int)0x44444444, + (u_int)0xc4c4c4c4, (u_int)0xdededede, (u_int)0xe9e9e9e9, (u_int)0xcbcbcbcb, + (u_int)0x54545454, (u_int)0x7b7b7b7b, (u_int)0x94949494, (u_int)0x32323232, + (u_int)0xa6a6a6a6, (u_int)0xc2c2c2c2, (u_int)0x23232323, (u_int)0x3d3d3d3d, + (u_int)0xeeeeeeee, (u_int)0x4c4c4c4c, (u_int)0x95959595, (u_int)0x0b0b0b0b, + (u_int)0x42424242, (u_int)0xfafafafa, (u_int)0xc3c3c3c3, (u_int)0x4e4e4e4e, + (u_int)0x08080808, (u_int)0x2e2e2e2e, (u_int)0xa1a1a1a1, (u_int)0x66666666, + (u_int)0x28282828, (u_int)0xd9d9d9d9, (u_int)0x24242424, (u_int)0xb2b2b2b2, + (u_int)0x76767676, (u_int)0x5b5b5b5b, (u_int)0xa2a2a2a2, (u_int)0x49494949, + (u_int)0x6d6d6d6d, (u_int)0x8b8b8b8b, (u_int)0xd1d1d1d1, (u_int)0x25252525, + (u_int)0x72727272, (u_int)0xf8f8f8f8, (u_int)0xf6f6f6f6, (u_int)0x64646464, + (u_int)0x86868686, (u_int)0x68686868, (u_int)0x98989898, (u_int)0x16161616, + (u_int)0xd4d4d4d4, (u_int)0xa4a4a4a4, (u_int)0x5c5c5c5c, (u_int)0xcccccccc, + (u_int)0x5d5d5d5d, (u_int)0x65656565, (u_int)0xb6b6b6b6, (u_int)0x92929292, + (u_int)0x6c6c6c6c, (u_int)0x70707070, (u_int)0x48484848, (u_int)0x50505050, + (u_int)0xfdfdfdfd, (u_int)0xedededed, (u_int)0xb9b9b9b9, (u_int)0xdadadada, + (u_int)0x5e5e5e5e, (u_int)0x15151515, (u_int)0x46464646, (u_int)0x57575757, + (u_int)0xa7a7a7a7, (u_int)0x8d8d8d8d, (u_int)0x9d9d9d9d, (u_int)0x84848484, + (u_int)0x90909090, (u_int)0xd8d8d8d8, (u_int)0xabababab, (u_int)0x00000000, + (u_int)0x8c8c8c8c, (u_int)0xbcbcbcbc, (u_int)0xd3d3d3d3, (u_int)0x0a0a0a0a, + (u_int)0xf7f7f7f7, (u_int)0xe4e4e4e4, (u_int)0x58585858, (u_int)0x05050505, + (u_int)0xb8b8b8b8, (u_int)0xb3b3b3b3, (u_int)0x45454545, (u_int)0x06060606, + (u_int)0xd0d0d0d0, (u_int)0x2c2c2c2c, (u_int)0x1e1e1e1e, (u_int)0x8f8f8f8f, + (u_int)0xcacacaca, (u_int)0x3f3f3f3f, (u_int)0x0f0f0f0f, (u_int)0x02020202, + (u_int)0xc1c1c1c1, (u_int)0xafafafaf, (u_int)0xbdbdbdbd, (u_int)0x03030303, + (u_int)0x01010101, (u_int)0x13131313, (u_int)0x8a8a8a8a, (u_int)0x6b6b6b6b, + (u_int)0x3a3a3a3a, (u_int)0x91919191, (u_int)0x11111111, (u_int)0x41414141, + (u_int)0x4f4f4f4f, (u_int)0x67676767, (u_int)0xdcdcdcdc, (u_int)0xeaeaeaea, + (u_int)0x97979797, (u_int)0xf2f2f2f2, (u_int)0xcfcfcfcf, (u_int)0xcececece, + (u_int)0xf0f0f0f0, (u_int)0xb4b4b4b4, (u_int)0xe6e6e6e6, (u_int)0x73737373, + (u_int)0x96969696, (u_int)0xacacacac, (u_int)0x74747474, (u_int)0x22222222, + (u_int)0xe7e7e7e7, (u_int)0xadadadad, (u_int)0x35353535, (u_int)0x85858585, + (u_int)0xe2e2e2e2, (u_int)0xf9f9f9f9, (u_int)0x37373737, (u_int)0xe8e8e8e8, + (u_int)0x1c1c1c1c, (u_int)0x75757575, (u_int)0xdfdfdfdf, (u_int)0x6e6e6e6e, + (u_int)0x47474747, (u_int)0xf1f1f1f1, (u_int)0x1a1a1a1a, (u_int)0x71717171, + (u_int)0x1d1d1d1d, (u_int)0x29292929, (u_int)0xc5c5c5c5, (u_int)0x89898989, + (u_int)0x6f6f6f6f, (u_int)0xb7b7b7b7, (u_int)0x62626262, (u_int)0x0e0e0e0e, + (u_int)0xaaaaaaaa, (u_int)0x18181818, (u_int)0xbebebebe, (u_int)0x1b1b1b1b, + (u_int)0xfcfcfcfc, (u_int)0x56565656, (u_int)0x3e3e3e3e, (u_int)0x4b4b4b4b, + (u_int)0xc6c6c6c6, (u_int)0xd2d2d2d2, (u_int)0x79797979, (u_int)0x20202020, + (u_int)0x9a9a9a9a, (u_int)0xdbdbdbdb, (u_int)0xc0c0c0c0, (u_int)0xfefefefe, + (u_int)0x78787878, (u_int)0xcdcdcdcd, (u_int)0x5a5a5a5a, (u_int)0xf4f4f4f4, + (u_int)0x1f1f1f1f, (u_int)0xdddddddd, (u_int)0xa8a8a8a8, (u_int)0x33333333, + (u_int)0x88888888, (u_int)0x07070707, (u_int)0xc7c7c7c7, (u_int)0x31313131, + (u_int)0xb1b1b1b1, (u_int)0x12121212, (u_int)0x10101010, (u_int)0x59595959, + (u_int)0x27272727, (u_int)0x80808080, (u_int)0xecececec, (u_int)0x5f5f5f5f, + (u_int)0x60606060, (u_int)0x51515151, (u_int)0x7f7f7f7f, (u_int)0xa9a9a9a9, + (u_int)0x19191919, (u_int)0xb5b5b5b5, (u_int)0x4a4a4a4a, (u_int)0x0d0d0d0d, + (u_int)0x2d2d2d2d, (u_int)0xe5e5e5e5, (u_int)0x7a7a7a7a, (u_int)0x9f9f9f9f, + (u_int)0x93939393, (u_int)0xc9c9c9c9, (u_int)0x9c9c9c9c, (u_int)0xefefefef, + (u_int)0xa0a0a0a0, (u_int)0xe0e0e0e0, (u_int)0x3b3b3b3b, (u_int)0x4d4d4d4d, + (u_int)0xaeaeaeae, (u_int)0x2a2a2a2a, (u_int)0xf5f5f5f5, (u_int)0xb0b0b0b0, + (u_int)0xc8c8c8c8, (u_int)0xebebebeb, (u_int)0xbbbbbbbb, (u_int)0x3c3c3c3c, + (u_int)0x83838383, (u_int)0x53535353, (u_int)0x99999999, (u_int)0x61616161, + (u_int)0x17171717, (u_int)0x2b2b2b2b, (u_int)0x04040404, (u_int)0x7e7e7e7e, + (u_int)0xbabababa, (u_int)0x77777777, (u_int)0xd6d6d6d6, (u_int)0x26262626, + (u_int)0xe1e1e1e1, (u_int)0x69696969, (u_int)0x14141414, (u_int)0x63636363, + (u_int)0x55555555, (u_int)0x21212121, (u_int)0x0c0c0c0c, (u_int)0x7d7d7d7d, +}; +static const u32 rcon[] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ +}; + +#define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00) + +#ifdef _MSC_VER +#define GETU32(p) SWAP(*((u32 *)(p))) +#define PUTU32(ct, st) { *((u32 *)(ct)) = SWAP((st)); } +#else +#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3])) +#define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); } +#endif + +/** + * Expand the cipher key into the encryption key schedule. + * + * @return the number of rounds for the given cipher key size. + */ +/* + * __db_rijndaelKeySetupEnc -- + * + * PUBLIC: int __db_rijndaelKeySetupEnc __P((u32 *, const u8 *, int)); + */ +int +__db_rijndaelKeySetupEnc(rk, cipherKey, keyBits) + u32 *rk; /* rk[4*(Nr + 1)] */ + const u8 *cipherKey; + int keyBits; +{ + int i = 0; + u32 temp; + + rk[0] = GETU32(cipherKey ); + rk[1] = GETU32(cipherKey + 4); + rk[2] = GETU32(cipherKey + 8); + rk[3] = GETU32(cipherKey + 12); + if (keyBits == 128) { + for (;;) { + temp = rk[3]; + rk[4] = rk[0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[5] = rk[1] ^ rk[4]; + rk[6] = rk[2] ^ rk[5]; + rk[7] = rk[3] ^ rk[6]; + if (++i == 10) { + return 10; + } + rk += 4; + } + } + rk[4] = GETU32(cipherKey + 16); + rk[5] = GETU32(cipherKey + 20); + if (keyBits == 192) { + for (;;) { + temp = rk[ 5]; + rk[ 6] = rk[ 0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[ 7] = rk[ 1] ^ rk[ 6]; + rk[ 8] = rk[ 2] ^ rk[ 7]; + rk[ 9] = rk[ 3] ^ rk[ 8]; + if (++i == 8) { + return 12; + } + rk[10] = rk[ 4] ^ rk[ 9]; + rk[11] = rk[ 5] ^ rk[10]; + rk += 6; + } + } + rk[6] = GETU32(cipherKey + 24); + rk[7] = GETU32(cipherKey + 28); + if (keyBits == 256) { + for (;;) { + temp = rk[ 7]; + rk[ 8] = rk[ 0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[ 9] = rk[ 1] ^ rk[ 8]; + rk[10] = rk[ 2] ^ rk[ 9]; + rk[11] = rk[ 3] ^ rk[10]; + if (++i == 7) { + return 14; + } + temp = rk[11]; + rk[12] = rk[ 4] ^ + (Te4[(temp >> 24) ] & 0xff000000) ^ + (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(temp ) & 0xff] & 0x000000ff); + rk[13] = rk[ 5] ^ rk[12]; + rk[14] = rk[ 6] ^ rk[13]; + rk[15] = rk[ 7] ^ rk[14]; + + rk += 8; + } + } + return 0; +} + +/** + * Expand the cipher key into the decryption key schedule. + * + * @return the number of rounds for the given cipher key size. + */ +/* + * __db_rijndaelKeySetupDec -- + * + * PUBLIC: int __db_rijndaelKeySetupDec __P((u32 *, const u8 *, int)); + */ +int +__db_rijndaelKeySetupDec(rk, cipherKey, keyBits) + u32 *rk; /* rk[4*(Nr + 1)] */ + const u8 *cipherKey; + int keyBits; +{ + int Nr, i, j; + u32 temp; + + /* expand the cipher key: */ + Nr = __db_rijndaelKeySetupEnc(rk, cipherKey, keyBits); + /* invert the order of the round keys: */ + for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) { + temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; + temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; + temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; + temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; + } + /* apply the inverse MixColumn transform to all round keys but the first and the last: */ + for (i = 1; i < Nr; i++) { + rk += 4; + rk[0] = + Td0[Te4[(rk[0] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[0] ) & 0xff] & 0xff]; + rk[1] = + Td0[Te4[(rk[1] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[1] ) & 0xff] & 0xff]; + rk[2] = + Td0[Te4[(rk[2] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[2] ) & 0xff] & 0xff]; + rk[3] = + Td0[Te4[(rk[3] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[3] ) & 0xff] & 0xff]; + } + return Nr; +} + +/* + * __db_rijndaelEncrypt -- + * + * PUBLIC: void __db_rijndaelEncrypt __P((u32 *, int, const u8 *, u8 *)); + */ +void +__db_rijndaelEncrypt(rk, Nr, pt, ct) + u32 *rk; /* rk[4*(Nr + 1)] */ + int Nr; + const u8 *pt; + u8 *ct; +{ + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(pt ) ^ rk[0]; + s1 = GETU32(pt + 4) ^ rk[1]; + s2 = GETU32(pt + 8) ^ rk[2]; + s3 = GETU32(pt + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39]; + if (Nr > 10) { + /* round 10: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47]; + if (Nr > 12) { + /* round 12: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55]; + } + } + rk += Nr << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = Nr >> 1; + for (;;) { + t0 = + Te0[(s0 >> 24) ] ^ + Te1[(s1 >> 16) & 0xff] ^ + Te2[(s2 >> 8) & 0xff] ^ + Te3[(s3 ) & 0xff] ^ + rk[4]; + t1 = + Te0[(s1 >> 24) ] ^ + Te1[(s2 >> 16) & 0xff] ^ + Te2[(s3 >> 8) & 0xff] ^ + Te3[(s0 ) & 0xff] ^ + rk[5]; + t2 = + Te0[(s2 >> 24) ] ^ + Te1[(s3 >> 16) & 0xff] ^ + Te2[(s0 >> 8) & 0xff] ^ + Te3[(s1 ) & 0xff] ^ + rk[6]; + t3 = + Te0[(s3 >> 24) ] ^ + Te1[(s0 >> 16) & 0xff] ^ + Te2[(s1 >> 8) & 0xff] ^ + Te3[(s2 ) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Te0[(t0 >> 24) ] ^ + Te1[(t1 >> 16) & 0xff] ^ + Te2[(t2 >> 8) & 0xff] ^ + Te3[(t3 ) & 0xff] ^ + rk[0]; + s1 = + Te0[(t1 >> 24) ] ^ + Te1[(t2 >> 16) & 0xff] ^ + Te2[(t3 >> 8) & 0xff] ^ + Te3[(t0 ) & 0xff] ^ + rk[1]; + s2 = + Te0[(t2 >> 24) ] ^ + Te1[(t3 >> 16) & 0xff] ^ + Te2[(t0 >> 8) & 0xff] ^ + Te3[(t1 ) & 0xff] ^ + rk[2]; + s3 = + Te0[(t3 >> 24) ] ^ + Te1[(t0 >> 16) & 0xff] ^ + Te2[(t1 >> 8) & 0xff] ^ + Te3[(t2 ) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (Te4[(t0 >> 24) ] & 0xff000000) ^ + (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t3 ) & 0xff] & 0x000000ff) ^ + rk[0]; + PUTU32(ct , s0); + s1 = + (Te4[(t1 >> 24) ] & 0xff000000) ^ + (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t0 ) & 0xff] & 0x000000ff) ^ + rk[1]; + PUTU32(ct + 4, s1); + s2 = + (Te4[(t2 >> 24) ] & 0xff000000) ^ + (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t1 ) & 0xff] & 0x000000ff) ^ + rk[2]; + PUTU32(ct + 8, s2); + s3 = + (Te4[(t3 >> 24) ] & 0xff000000) ^ + (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t2 ) & 0xff] & 0x000000ff) ^ + rk[3]; + PUTU32(ct + 12, s3); +} + +/* + * __db_rijndaelDecrypt -- + * + * PUBLIC: void __db_rijndaelDecrypt __P((u32 *, int, const u8 *, u8 *)); + */ +void +__db_rijndaelDecrypt(rk, Nr, ct, pt) + u32 *rk; /* rk[4*(Nr + 1)] */ + int Nr; + const u8 *ct; + u8 *pt; +{ + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(ct ) ^ rk[0]; + s1 = GETU32(ct + 4) ^ rk[1]; + s2 = GETU32(ct + 8) ^ rk[2]; + s3 = GETU32(ct + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39]; + if (Nr > 10) { + /* round 10: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47]; + if (Nr > 12) { + /* round 12: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55]; + } + } + rk += Nr << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = Nr >> 1; + for (;;) { + t0 = + Td0[(s0 >> 24) ] ^ + Td1[(s3 >> 16) & 0xff] ^ + Td2[(s2 >> 8) & 0xff] ^ + Td3[(s1 ) & 0xff] ^ + rk[4]; + t1 = + Td0[(s1 >> 24) ] ^ + Td1[(s0 >> 16) & 0xff] ^ + Td2[(s3 >> 8) & 0xff] ^ + Td3[(s2 ) & 0xff] ^ + rk[5]; + t2 = + Td0[(s2 >> 24) ] ^ + Td1[(s1 >> 16) & 0xff] ^ + Td2[(s0 >> 8) & 0xff] ^ + Td3[(s3 ) & 0xff] ^ + rk[6]; + t3 = + Td0[(s3 >> 24) ] ^ + Td1[(s2 >> 16) & 0xff] ^ + Td2[(s1 >> 8) & 0xff] ^ + Td3[(s0 ) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Td0[(t0 >> 24) ] ^ + Td1[(t3 >> 16) & 0xff] ^ + Td2[(t2 >> 8) & 0xff] ^ + Td3[(t1 ) & 0xff] ^ + rk[0]; + s1 = + Td0[(t1 >> 24) ] ^ + Td1[(t0 >> 16) & 0xff] ^ + Td2[(t3 >> 8) & 0xff] ^ + Td3[(t2 ) & 0xff] ^ + rk[1]; + s2 = + Td0[(t2 >> 24) ] ^ + Td1[(t1 >> 16) & 0xff] ^ + Td2[(t0 >> 8) & 0xff] ^ + Td3[(t3 ) & 0xff] ^ + rk[2]; + s3 = + Td0[(t3 >> 24) ] ^ + Td1[(t2 >> 16) & 0xff] ^ + Td2[(t1 >> 8) & 0xff] ^ + Td3[(t0 ) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (Td4[(t0 >> 24) ] & 0xff000000) ^ + (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t1 ) & 0xff] & 0x000000ff) ^ + rk[0]; + PUTU32(pt , s0); + s1 = + (Td4[(t1 >> 24) ] & 0xff000000) ^ + (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t2 ) & 0xff] & 0x000000ff) ^ + rk[1]; + PUTU32(pt + 4, s1); + s2 = + (Td4[(t2 >> 24) ] & 0xff000000) ^ + (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t3 ) & 0xff] & 0x000000ff) ^ + rk[2]; + PUTU32(pt + 8, s2); + s3 = + (Td4[(t3 >> 24) ] & 0xff000000) ^ + (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t0 ) & 0xff] & 0x000000ff) ^ + rk[3]; + PUTU32(pt + 12, s3); +} + +#ifdef INTERMEDIATE_VALUE_KAT + +/* + * __db_rijndaelEncryptRound -- + * + * PUBLIC: void __db_rijndaelEncryptRound __P((const u32 *, int, u8 *, int)); + */ +void +__db_rijndaelEncryptRound(rk, Nr, pt, ct) + const u32 *rk; /* rk[4*(Nr + 1)] */ + int Nr; + u8 *block; + int rounds; +{ + int r; + u32 s0, s1, s2, s3, t0, t1, t2, t3; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(block ) ^ rk[0]; + s1 = GETU32(block + 4) ^ rk[1]; + s2 = GETU32(block + 8) ^ rk[2]; + s3 = GETU32(block + 12) ^ rk[3]; + rk += 4; + + /* + * Nr - 1 full rounds: + */ + for (r = (rounds < Nr ? rounds : Nr - 1); r > 0; r--) { + t0 = + Te0[(s0 >> 24) ] ^ + Te1[(s1 >> 16) & 0xff] ^ + Te2[(s2 >> 8) & 0xff] ^ + Te3[(s3 ) & 0xff] ^ + rk[0]; + t1 = + Te0[(s1 >> 24) ] ^ + Te1[(s2 >> 16) & 0xff] ^ + Te2[(s3 >> 8) & 0xff] ^ + Te3[(s0 ) & 0xff] ^ + rk[1]; + t2 = + Te0[(s2 >> 24) ] ^ + Te1[(s3 >> 16) & 0xff] ^ + Te2[(s0 >> 8) & 0xff] ^ + Te3[(s1 ) & 0xff] ^ + rk[2]; + t3 = + Te0[(s3 >> 24) ] ^ + Te1[(s0 >> 16) & 0xff] ^ + Te2[(s1 >> 8) & 0xff] ^ + Te3[(s2 ) & 0xff] ^ + rk[3]; + + s0 = t0; + s1 = t1; + s2 = t2; + s3 = t3; + rk += 4; + + } + + /* + * apply last round and + * map cipher state to byte array block: + */ + if (rounds == Nr) { + t0 = + (Te4[(s0 >> 24) ] & 0xff000000) ^ + (Te4[(s1 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s2 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s3 ) & 0xff] & 0x000000ff) ^ + rk[0]; + t1 = + (Te4[(s1 >> 24) ] & 0xff000000) ^ + (Te4[(s2 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s3 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s0 ) & 0xff] & 0x000000ff) ^ + rk[1]; + t2 = + (Te4[(s2 >> 24) ] & 0xff000000) ^ + (Te4[(s3 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s0 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s1 ) & 0xff] & 0x000000ff) ^ + rk[2]; + t3 = + (Te4[(s3 >> 24) ] & 0xff000000) ^ + (Te4[(s0 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s1 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s2 ) & 0xff] & 0x000000ff) ^ + rk[3]; + + s0 = t0; + s1 = t1; + s2 = t2; + s3 = t3; + } + + PUTU32(block , s0); + PUTU32(block + 4, s1); + PUTU32(block + 8, s2); + PUTU32(block + 12, s3); +} + +/* + * __db_rijndaelDecryptRound -- + * + * PUBLIC: void __db_rijndaelDecryptRound __P((const u32 *, int, u8 *, int)); + */ +void +__db_rijndaelDecryptRound(rk, Nr, pt, ct) + const u32 *rk; /* rk[4*(Nr + 1)] */ + int Nr; + u8 *block; + int rounds; +{ + int r; + u32 s0, s1, s2, s3, t0, t1, t2, t3; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(block ) ^ rk[0]; + s1 = GETU32(block + 4) ^ rk[1]; + s2 = GETU32(block + 8) ^ rk[2]; + s3 = GETU32(block + 12) ^ rk[3]; + rk += 4; + + /* + * Nr - 1 full rounds: + */ + for (r = (rounds < Nr ? rounds : Nr) - 1; r > 0; r--) { + t0 = + Td0[(s0 >> 24) ] ^ + Td1[(s3 >> 16) & 0xff] ^ + Td2[(s2 >> 8) & 0xff] ^ + Td3[(s1 ) & 0xff] ^ + rk[0]; + t1 = + Td0[(s1 >> 24) ] ^ + Td1[(s0 >> 16) & 0xff] ^ + Td2[(s3 >> 8) & 0xff] ^ + Td3[(s2 ) & 0xff] ^ + rk[1]; + t2 = + Td0[(s2 >> 24) ] ^ + Td1[(s1 >> 16) & 0xff] ^ + Td2[(s0 >> 8) & 0xff] ^ + Td3[(s3 ) & 0xff] ^ + rk[2]; + t3 = + Td0[(s3 >> 24) ] ^ + Td1[(s2 >> 16) & 0xff] ^ + Td2[(s1 >> 8) & 0xff] ^ + Td3[(s0 ) & 0xff] ^ + rk[3]; + + s0 = t0; + s1 = t1; + s2 = t2; + s3 = t3; + rk += 4; + + } + + /* + * complete the last round and + * map cipher state to byte array block: + */ + t0 = + (Td4[(s0 >> 24) ] & 0xff000000) ^ + (Td4[(s3 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s2 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s1 ) & 0xff] & 0x000000ff); + t1 = + (Td4[(s1 >> 24) ] & 0xff000000) ^ + (Td4[(s0 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s3 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s2 ) & 0xff] & 0x000000ff); + t2 = + (Td4[(s2 >> 24) ] & 0xff000000) ^ + (Td4[(s1 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s0 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s3 ) & 0xff] & 0x000000ff); + t3 = + (Td4[(s3 >> 24) ] & 0xff000000) ^ + (Td4[(s2 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s1 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s0 ) & 0xff] & 0x000000ff); + + if (rounds == Nr) { + t0 ^= rk[0]; + t1 ^= rk[1]; + t2 ^= rk[2]; + t3 ^= rk[3]; + } + + PUTU32(block , t0); + PUTU32(block + 4, t1); + PUTU32(block + 8, t2); + PUTU32(block + 12, t3); +} + +#endif /* INTERMEDIATE_VALUE_KAT */ diff --git a/src/crypto/rijndael/rijndael-alg-fst.h b/src/crypto/rijndael/rijndael-alg-fst.h new file mode 100644 index 00000000..7d5e228c --- /dev/null +++ b/src/crypto/rijndael/rijndael-alg-fst.h @@ -0,0 +1,40 @@ +/* + * $Id$ + */ +/** + * rijndael-alg-fst.h + * + * @version 3.0 (December 2000) + * + * Optimised ANSI C code for the Rijndael cipher (now AES) + * + * @author Vincent Rijmen + * @author Antoon Bosselaers + * @author Paulo Barreto + * + * This code is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __RIJNDAEL_ALG_FST_H +#define __RIJNDAEL_ALG_FST_H + +#define MAXKC (256/32) +#define MAXKB (256/8) +#define MAXNR 14 + +typedef u_int8_t u8; +typedef u_int16_t u16; +typedef u_int32_t u32; + +#endif /* __RIJNDAEL_ALG_FST_H */ diff --git a/src/crypto/rijndael/rijndael-api-fst.c b/src/crypto/rijndael/rijndael-api-fst.c new file mode 100644 index 00000000..3fd6489d --- /dev/null +++ b/src/crypto/rijndael/rijndael-api-fst.c @@ -0,0 +1,491 @@ +/** + * rijndael-api-fst.c + * + * @version 2.9 (December 2000) + * + * Optimised ANSI C code for the Rijndael cipher (now AES) + * + * @author Vincent Rijmen + * @author Antoon Bosselaers + * @author Paulo Barreto + * + * This code is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Acknowledgements: + * + * We are deeply indebted to the following people for their bug reports, + * fixes, and improvement suggestions to this implementation. Though we + * tried to list all contributions, we apologise in advance for any + * missing reference. + * + * Andrew Bales + * Markus Friedl + * John Skodon + */ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" + +#include "crypto/rijndael/rijndael-alg-fst.h" +#include "crypto/rijndael/rijndael-api-fst.h" + +/* + * __db_makeKey -- + * + * PUBLIC: int __db_makeKey __P((keyInstance *, int, int, char *)); + */ +int +__db_makeKey(key, direction, keyLen, keyMaterial) + keyInstance *key; + int direction; + int keyLen; + char *keyMaterial; +{ + u8 cipherKey[MAXKB]; + + if (key == NULL) { + return BAD_KEY_INSTANCE; + } + + if ((direction == DIR_ENCRYPT) || (direction == DIR_DECRYPT)) { + key->direction = direction; + } else { + return BAD_KEY_DIR; + } + + if ((keyLen == 128) || (keyLen == 192) || (keyLen == 256)) { + key->keyLen = keyLen; + } else { + return BAD_KEY_MAT; + } + + if (keyMaterial != NULL) { + memcpy(cipherKey, keyMaterial, key->keyLen/8); + } + + if (direction == DIR_ENCRYPT) { + key->Nr = __db_rijndaelKeySetupEnc(key->rk, cipherKey, keyLen); + } else { + key->Nr = __db_rijndaelKeySetupDec(key->rk, cipherKey, keyLen); + } + __db_rijndaelKeySetupEnc(key->ek, cipherKey, keyLen); + return TRUE; +} + +/* + * __db_cipherInit -- + * + * PUBLIC: int __db_cipherInit __P((cipherInstance *, int, char *)); + */ +int +__db_cipherInit(cipher, mode, IV) + cipherInstance *cipher; + int mode; + char *IV; +{ + if ((mode == MODE_ECB) || (mode == MODE_CBC) || (mode == MODE_CFB1)) { + cipher->mode = mode; + } else { + return BAD_CIPHER_MODE; + } + if (IV != NULL) { + memcpy(cipher->IV, IV, MAX_IV_SIZE); + } + return TRUE; +} + +/* + * __db_blockEncrypt -- + * + * PUBLIC: int __db_blockEncrypt __P((cipherInstance *, keyInstance *, u_int8_t *, + * PUBLIC: size_t, u_int8_t *)); + */ +int +__db_blockEncrypt(cipher, key, input, inputLen, outBuffer) + cipherInstance *cipher; + keyInstance *key; + u_int8_t *input; + size_t inputLen; + u_int8_t *outBuffer; +{ + int i, k, t, numBlocks; + u8 block[16], *iv; + u32 tmpiv[4]; + + if (cipher == NULL || + key == NULL || + key->direction == DIR_DECRYPT) { + return BAD_CIPHER_STATE; + } + if (input == NULL || inputLen <= 0) { + return 0; /* nothing to do */ + } + + numBlocks = (int)(inputLen/128); + + switch (cipher->mode) { + case MODE_ECB: + for (i = numBlocks; i > 0; i--) { + __db_rijndaelEncrypt(key->rk, key->Nr, input, outBuffer); + input += 16; + outBuffer += 16; + } + break; + + case MODE_CBC: + iv = cipher->IV; + for (i = numBlocks; i > 0; i--) { + memcpy(tmpiv, iv, MAX_IV_SIZE); + ((u32*)block)[0] = ((u32*)input)[0] ^ tmpiv[0]; + ((u32*)block)[1] = ((u32*)input)[1] ^ tmpiv[1]; + ((u32*)block)[2] = ((u32*)input)[2] ^ tmpiv[2]; + ((u32*)block)[3] = ((u32*)input)[3] ^ tmpiv[3]; + __db_rijndaelEncrypt(key->rk, key->Nr, block, outBuffer); + iv = outBuffer; + input += 16; + outBuffer += 16; + } + break; + + case MODE_CFB1: + iv = cipher->IV; + for (i = numBlocks; i > 0; i--) { + memcpy(outBuffer, input, 16); + for (k = 0; k < 128; k++) { + __db_rijndaelEncrypt(key->ek, key->Nr, iv, block); + outBuffer[k >> 3] ^= (block[0] & (u_int)0x80) >> (k & 7); + for (t = 0; t < 15; t++) { + iv[t] = (iv[t] << 1) | (iv[t + 1] >> 7); + } + iv[15] = (iv[15] << 1) | ((outBuffer[k >> 3] >> (7 - (k & 7))) & 1); + } + outBuffer += 16; + input += 16; + } + break; + + default: + return BAD_CIPHER_STATE; + } + + return 128*numBlocks; +} + +/** + * Encrypt data partitioned in octets, using RFC 2040-like padding. + * + * @param input data to be encrypted (octet sequence) + * @param inputOctets input length in octets (not bits) + * @param outBuffer encrypted output data + * + * @return length in octets (not bits) of the encrypted output buffer. + */ +/* + * __db_padEncrypt -- + * + * PUBLIC: int __db_padEncrypt __P((cipherInstance *, keyInstance *, u_int8_t *, + * PUBLIC: int, u_int8_t *)); + */ +int +__db_padEncrypt(cipher, key, input, inputOctets, outBuffer) + cipherInstance *cipher; + keyInstance *key; + u_int8_t *input; + int inputOctets; + u_int8_t *outBuffer; +{ + int i, numBlocks, padLen; + u8 block[16], *iv; + u32 tmpiv[4]; + + if (cipher == NULL || + key == NULL || + key->direction == DIR_DECRYPT) { + return BAD_CIPHER_STATE; + } + if (input == NULL || inputOctets <= 0) { + return 0; /* nothing to do */ + } + + numBlocks = inputOctets/16; + + switch (cipher->mode) { + case MODE_ECB: + for (i = numBlocks; i > 0; i--) { + __db_rijndaelEncrypt(key->rk, key->Nr, input, outBuffer); + input += 16; + outBuffer += 16; + } + padLen = 16 - (inputOctets - 16*numBlocks); + DB_ASSERT(NULL, padLen > 0 && padLen <= 16); + memcpy(block, input, 16 - padLen); + memset(block + 16 - padLen, padLen, padLen); + __db_rijndaelEncrypt(key->rk, key->Nr, block, outBuffer); + break; + + case MODE_CBC: + iv = cipher->IV; + for (i = numBlocks; i > 0; i--) { + memcpy(tmpiv, iv, MAX_IV_SIZE); + ((u32*)block)[0] = ((u32*)input)[0] ^ tmpiv[0]; + ((u32*)block)[1] = ((u32*)input)[1] ^ tmpiv[1]; + ((u32*)block)[2] = ((u32*)input)[2] ^ tmpiv[2]; + ((u32*)block)[3] = ((u32*)input)[3] ^ tmpiv[3]; + __db_rijndaelEncrypt(key->rk, key->Nr, block, outBuffer); + iv = outBuffer; + input += 16; + outBuffer += 16; + } + padLen = 16 - (inputOctets - 16*numBlocks); + DB_ASSERT(NULL, padLen > 0 && padLen <= 16); + for (i = 0; i < 16 - padLen; i++) { + block[i] = input[i] ^ iv[i]; + } + for (i = 16 - padLen; i < 16; i++) { + block[i] = (u_int8_t)padLen ^ iv[i]; + } + __db_rijndaelEncrypt(key->rk, key->Nr, block, outBuffer); + break; + + default: + return BAD_CIPHER_STATE; + } + + return 16*(numBlocks + 1); +} + +/* + * __db_blockDecrypt -- + * + * PUBLIC: int __db_blockDecrypt __P((cipherInstance *, keyInstance *, u_int8_t *, + * PUBLIC: size_t, u_int8_t *)); + */ +int +__db_blockDecrypt(cipher, key, input, inputLen, outBuffer) + cipherInstance *cipher; + keyInstance *key; + u_int8_t *input; + size_t inputLen; + u_int8_t *outBuffer; +{ + int i, k, t, numBlocks; + u8 block[16], *iv; + u32 tmpiv[4]; + + if (cipher == NULL || + key == NULL || + (cipher->mode != MODE_CFB1 && key->direction == DIR_ENCRYPT)) { + return BAD_CIPHER_STATE; + } + if (input == NULL || inputLen <= 0) { + return 0; /* nothing to do */ + } + + numBlocks = (int)(inputLen/128); + + switch (cipher->mode) { + case MODE_ECB: + for (i = numBlocks; i > 0; i--) { + __db_rijndaelDecrypt(key->rk, key->Nr, input, outBuffer); + input += 16; + outBuffer += 16; + } + break; + + case MODE_CBC: + memcpy(tmpiv, cipher->IV, MAX_IV_SIZE); + for (i = numBlocks; i > 0; i--) { + __db_rijndaelDecrypt(key->rk, key->Nr, input, block); + ((u32*)block)[0] ^= tmpiv[0]; + ((u32*)block)[1] ^= tmpiv[1]; + ((u32*)block)[2] ^= tmpiv[2]; + ((u32*)block)[3] ^= tmpiv[3]; + memcpy(tmpiv, input, 16); + memcpy(outBuffer, block, 16); + input += 16; + outBuffer += 16; + } + break; + + case MODE_CFB1: + iv = cipher->IV; + for (i = numBlocks; i > 0; i--) { + memcpy(outBuffer, input, 16); + for (k = 0; k < 128; k++) { + __db_rijndaelEncrypt(key->ek, key->Nr, iv, block); + for (t = 0; t < 15; t++) { + iv[t] = (iv[t] << 1) | (iv[t + 1] >> 7); + } + iv[15] = (iv[15] << 1) | ((input[k >> 3] >> (7 - (k & 7))) & 1); + outBuffer[k >> 3] ^= (block[0] & (u_int)0x80) >> (k & 7); + } + outBuffer += 16; + input += 16; + } + break; + + default: + return BAD_CIPHER_STATE; + } + + return 128*numBlocks; +} + +/* + * __db_padDecrypt -- + * + * PUBLIC: int __db_padDecrypt __P((cipherInstance *, keyInstance *, u_int8_t *, + * PUBLIC: int, u_int8_t *)); + */ +int +__db_padDecrypt(cipher, key, input, inputOctets, outBuffer) + cipherInstance *cipher; + keyInstance *key; + u_int8_t *input; + int inputOctets; + u_int8_t *outBuffer; +{ + int i, numBlocks, padLen; + u8 block[16]; + u32 tmpiv[4]; + + if (cipher == NULL || + key == NULL || + key->direction == DIR_ENCRYPT) { + return BAD_CIPHER_STATE; + } + if (input == NULL || inputOctets <= 0) { + return 0; /* nothing to do */ + } + if (inputOctets % 16 != 0) { + return BAD_DATA; + } + + numBlocks = inputOctets/16; + + switch (cipher->mode) { + case MODE_ECB: + /* all blocks but last */ + for (i = numBlocks - 1; i > 0; i--) { + __db_rijndaelDecrypt(key->rk, key->Nr, input, outBuffer); + input += 16; + outBuffer += 16; + } + /* last block */ + __db_rijndaelDecrypt(key->rk, key->Nr, input, block); + padLen = block[15]; + if (padLen >= 16) { + return BAD_DATA; + } + for (i = 16 - padLen; i < 16; i++) { + if (block[i] != padLen) { + return BAD_DATA; + } + } + memcpy(outBuffer, block, 16 - padLen); + break; + + case MODE_CBC: + /* all blocks but last */ + memcpy(tmpiv, cipher->IV, MAX_IV_SIZE); + for (i = numBlocks - 1; i > 0; i--) { + __db_rijndaelDecrypt(key->rk, key->Nr, input, block); + ((u32*)block)[0] ^= tmpiv[0]; + ((u32*)block)[1] ^= tmpiv[1]; + ((u32*)block)[2] ^= tmpiv[2]; + ((u32*)block)[3] ^= tmpiv[3]; + memcpy(tmpiv, input, 16); + memcpy(outBuffer, block, 16); + input += 16; + outBuffer += 16; + } + /* last block */ + __db_rijndaelDecrypt(key->rk, key->Nr, input, block); + ((u32*)block)[0] ^= tmpiv[0]; + ((u32*)block)[1] ^= tmpiv[1]; + ((u32*)block)[2] ^= tmpiv[2]; + ((u32*)block)[3] ^= tmpiv[3]; + padLen = block[15]; + if (padLen <= 0 || padLen > 16) { + return BAD_DATA; + } + for (i = 16 - padLen; i < 16; i++) { + if (block[i] != padLen) { + return BAD_DATA; + } + } + memcpy(outBuffer, block, 16 - padLen); + break; + + default: + return BAD_CIPHER_STATE; + } + + return 16*numBlocks - padLen; +} + +#ifdef INTERMEDIATE_VALUE_KAT +/** + * cipherUpdateRounds: + * + * Encrypts/Decrypts exactly one full block a specified number of rounds. + * Only used in the Intermediate Value Known Answer Test. + * + * Returns: + * TRUE - on success + * BAD_CIPHER_STATE - cipher in bad state (e.g., not initialized) + */ +/* + * __db_cipherUpdateRounds -- + * + * PUBLIC: int __db_cipherUpdateRounds __P((cipherInstance *, keyInstance *, + * PUBLIC: u_int8_t *, int, u_int8_t *, int)); + */ +int +__db_cipherUpdateRounds(cipher, key, input, inputLen, outBuffer, rounds) + cipherInstance *cipher; + keyInstance *key; + u_int8_t *input; + size_t inputLen; + u_int8_t *outBuffer; + int rounds; +{ + u8 block[16]; + + if (cipher == NULL || key == NULL) { + return BAD_CIPHER_STATE; + } + + memcpy(block, input, 16); + + switch (key->direction) { + case DIR_ENCRYPT: + __db_rijndaelEncryptRound(key->rk, key->Nr, block, rounds); + break; + + case DIR_DECRYPT: + __db_rijndaelDecryptRound(key->rk, key->Nr, block, rounds); + break; + + default: + return BAD_KEY_DIR; + } + + memcpy(outBuffer, block, 16); + + return TRUE; +} +#endif /* INTERMEDIATE_VALUE_KAT */ diff --git a/src/crypto/rijndael/rijndael-api-fst.h b/src/crypto/rijndael/rijndael-api-fst.h new file mode 100644 index 00000000..3e31920a --- /dev/null +++ b/src/crypto/rijndael/rijndael-api-fst.h @@ -0,0 +1,91 @@ +/* + * $Id$ + */ +/** + * rijndael-api-fst.h + * + * @version 2.9 (December 2000) + * + * Optimised ANSI C code for the Rijndael cipher (now AES) + * + * @author Vincent Rijmen + * @author Antoon Bosselaers + * @author Paulo Barreto + * + * This code is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Acknowledgements: + * + * We are deeply indebted to the following people for their bug reports, + * fixes, and improvement suggestions to this implementation. Though we + * tried to list all contributions, we apologise in advance for any + * missing reference. + * + * Andrew Bales + * Markus Friedl + * John Skodon + */ + +#ifndef __RIJNDAEL_API_FST_H +#define __RIJNDAEL_API_FST_H + +#include "crypto/rijndael/rijndael-alg-fst.h" + +/* Generic Defines */ +#define DIR_ENCRYPT 0 /* Are we encrpyting? */ +#define DIR_DECRYPT 1 /* Are we decrpyting? */ +#define MODE_ECB 1 /* Are we ciphering in ECB mode? */ +#define MODE_CBC 2 /* Are we ciphering in CBC mode? */ +#define MODE_CFB1 3 /* Are we ciphering in 1-bit CFB mode? */ +#undef TRUE +#define TRUE 1 +#undef FALSE +#define FALSE 0 +#define BITSPERBLOCK 128 /* Default number of bits in a cipher block */ + +/* Error Codes */ +#define BAD_KEY_DIR -1 /* Key direction is invalid, e.g., unknown value */ +#define BAD_KEY_MAT -2 /* Key material not of correct length */ +#define BAD_KEY_INSTANCE -3 /* Key passed is not valid */ +#define BAD_CIPHER_MODE -4 /* Params struct passed to cipherInit invalid */ +#define BAD_CIPHER_STATE -5 /* Cipher in wrong state (e.g., not initialized) */ +#define BAD_BLOCK_LENGTH -6 +#define BAD_CIPHER_INSTANCE -7 +#define BAD_DATA -8 /* Data contents are invalid, e.g., invalid padding */ +#define BAD_OTHER -9 /* Unknown error */ + +/* Algorithm-specific Defines */ +#define MAX_KEY_SIZE 64 /* # of ASCII char's needed to represent a key */ +#define MAX_IV_SIZE 16 /* # bytes needed to represent an IV */ + +/* Typedefs */ + +/* The structure for key information */ +typedef struct { + u_int8_t direction; /* Key used for encrypting or decrypting? */ + int keyLen; /* Length of the key */ + char keyMaterial[MAX_KEY_SIZE+1]; /* Raw key data in ASCII, e.g., user input or KAT values */ + int Nr; /* key-length-dependent number of rounds */ + u32 rk[4*(MAXNR + 1)]; /* key schedule */ + u32 ek[4*(MAXNR + 1)]; /* CFB1 key schedule (encryption only) */ +} keyInstance; + +/* The structure for cipher information */ +typedef struct { /* changed order of the components */ + u_int8_t mode; /* MODE_ECB, MODE_CBC, or MODE_CFB1 */ + u_int8_t IV[MAX_IV_SIZE]; /* A possible Initialization Vector for ciphering */ +} cipherInstance; + +#endif /* __RIJNDAEL_API_FST_H */ diff --git a/src/db/crdel.src b/src/db/crdel.src new file mode 100644 index 00000000..02d80c8f --- /dev/null +++ b/src/db/crdel.src @@ -0,0 +1,71 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +DBPRIVATE +PREFIX __crdel + +INCLUDE #include "db_int.h" +INCLUDE #include "dbinc/crypto.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_dispatch.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/txn.h" +INCLUDE + +/* + * Metasub: log the creation of a subdatabase meta data page. + * + * fileid: identifies the file being acted upon. + * pgno: page number on which to write this meta-data page + * page: the actual meta-data page + * lsn: lsn of the page. + */ +BEGIN metasub 42 142 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +PGDBT page DBT s +POINTER lsn DB_LSN * lu +END + +/* + * Inmem_create: Log the creation of an in-memory database. + * + * name: Name of the database + * fid: File id of the database + */ +BEGIN inmem_create 44 138 +ARG fileid int32_t ld +DBT name DBT s +DBT fid DBT s +ARG pgsize u_int32_t lu +END + +/* + * Inmem_rename: Log the renaming of an in-memory only database. + * + * oldname: database's starting name + * newname: database's ending name + * fid: fileid + */ +BEGIN inmem_rename 44 139 +DBT oldname DBT s +DBT newname DBT s +DBT fid DBT s +END + +/* + * Inmem_remove: Log the removal of an in-memory only database. + * + * name: database's ending name + * fid: fileid + */ +BEGIN inmem_remove 44 140 +DBT name DBT s +DBT fid DBT s +END + diff --git a/src/db/crdel_auto.c b/src/db/crdel_auto.c new file mode 100644 index 00000000..a2a3f54b --- /dev/null +++ b/src/db/crdel_auto.c @@ -0,0 +1,59 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" + +DB_LOG_RECSPEC __crdel_metasub_desc[] = { + {LOGREC_DB, SSZ(__crdel_metasub_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__crdel_metasub_args, pgno), "pgno", "%lu"}, + {LOGREC_PGDBT, SSZ(__crdel_metasub_args, page), "page", ""}, + {LOGREC_POINTER, SSZ(__crdel_metasub_args, lsn), "lsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __crdel_inmem_create_desc[] = { + {LOGREC_ARG, SSZ(__crdel_inmem_create_args, fileid), "fileid", "%ld"}, + {LOGREC_DBT, SSZ(__crdel_inmem_create_args, name), "name", ""}, + {LOGREC_DBT, SSZ(__crdel_inmem_create_args, fid), "fid", ""}, + {LOGREC_ARG, SSZ(__crdel_inmem_create_args, pgsize), "pgsize", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __crdel_inmem_rename_desc[] = { + {LOGREC_DBT, SSZ(__crdel_inmem_rename_args, oldname), "oldname", ""}, + {LOGREC_DBT, SSZ(__crdel_inmem_rename_args, newname), "newname", ""}, + {LOGREC_DBT, SSZ(__crdel_inmem_rename_args, fid), "fid", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __crdel_inmem_remove_desc[] = { + {LOGREC_DBT, SSZ(__crdel_inmem_remove_args, name), "name", ""}, + {LOGREC_DBT, SSZ(__crdel_inmem_remove_args, fid), "fid", ""}, + {LOGREC_Done, 0, "", ""} +}; +/* + * PUBLIC: int __crdel_init_recover __P((ENV *, DB_DISTAB *)); + */ +int +__crdel_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_metasub_recover, DB___crdel_metasub)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_inmem_create_recover, DB___crdel_inmem_create)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_inmem_rename_recover, DB___crdel_inmem_rename)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_inmem_remove_recover, DB___crdel_inmem_remove)) != 0) + return (ret); + return (0); +} diff --git a/src/db/crdel_autop.c b/src/db/crdel_autop.c new file mode 100644 index 00000000..79bd4d99 --- /dev/null +++ b/src/db/crdel_autop.c @@ -0,0 +1,103 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" + +/* + * PUBLIC: int __crdel_metasub_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__crdel_metasub_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__crdel_metasub", __crdel_metasub_desc, info)); +} + +/* + * PUBLIC: int __crdel_inmem_create_print __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_create_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__crdel_inmem_create", __crdel_inmem_create_desc, info)); +} + +/* + * PUBLIC: int __crdel_inmem_rename_print __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_rename_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__crdel_inmem_rename", __crdel_inmem_rename_desc, info)); +} + +/* + * PUBLIC: int __crdel_inmem_remove_print __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_remove_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__crdel_inmem_remove", __crdel_inmem_remove_desc, info)); +} + +/* + * PUBLIC: int __crdel_init_print __P((ENV *, DB_DISTAB *)); + */ +int +__crdel_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_metasub_print, DB___crdel_metasub)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_inmem_create_print, DB___crdel_inmem_create)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_inmem_rename_print, DB___crdel_inmem_rename)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_inmem_remove_print, DB___crdel_inmem_remove)) != 0) + return (ret); + return (0); +} diff --git a/src/db/crdel_rec.c b/src/db/crdel_rec.c new file mode 100644 index 00000000..d63074364 --- /dev/null +++ b/src/db/crdel_rec.c @@ -0,0 +1,301 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/fop.h" +#include "dbinc/hash.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +/* + * __crdel_metasub_recover -- + * Recovery function for metasub. + * + * PUBLIC: int __crdel_metasub_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__crdel_metasub_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __crdel_metasub_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_p, ret, t_ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__crdel_metasub_print); + REC_INTRO(__crdel_metasub_read, ip, 0); + + /* + * If we are undoing this operation, but the DB that we got back + * was never really opened, then this open was an in-memory open + * that did not finish. We can let the file creation take care + * of any necessary undo/cleanup. + */ + if (DB_UNDO(op) && !F_ISSET(file_dbp, DB_AM_OPEN_CALLED)) + goto done; + + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + /* + * If this is an in-memory file, this might be OK. Also, heap + * can get there through a truncate and we have to redo page 1 + */ + if ((file_dbp->type == DB_HEAP || + F_ISSET(file_dbp, DB_AM_INMEM)) && + (ret = __memp_fget(mpf, &argp->pgno, ip, NULL, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &pagep)) == 0) { + if (F_ISSET(file_dbp, DB_AM_INMEM)) + LSN_NOT_LOGGED(LSN(pagep)); + } else { + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } + } + + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); + + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + memcpy(pagep, argp->page.data, argp->page.size); + LSN(pagep) = *lsnp; + + /* + * If this was an in-memory database and we are re-creating + * and this is the meta-data page, then we need to set up a + * bunch of fields in the dbo as well. + */ + if (F_ISSET(file_dbp, DB_AM_INMEM) && + argp->pgno == PGNO_BASE_MD && + (ret = __db_meta_setup(file_dbp->env, file_dbp, + file_dbp->dname, (DBMETA *)pagep, 0, DB_CHK_META)) != 0) + goto out; + } else if (DB_UNDO(op)) { + /* + * We want to undo this page creation. The page creation + * happened in two parts. First, we called __db_pg_alloc which + * was logged separately. Then we wrote the meta-data onto + * the page. So long as we restore the LSN, then the recovery + * for __db_pg_alloc will do everything else. + * + * Don't bother checking the lsn on the page. If we are + * rolling back the next thing is that this page will get + * freed. Opening the subdb will have reinitialized the + * page, but not the lsn. + */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + LSN(pagep) = argp->lsn; + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL && (t_ret = __memp_fput(mpf, + ip, pagep, file_dbp->priority)) != 0 && + ret == 0) + ret = t_ret; + + REC_CLOSE; +} + +/* + * __crdel_inmem_create_recover -- + * Recovery function for inmem_create. + * + * PUBLIC: int __crdel_inmem_create_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_create_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __crdel_inmem_create_args *argp; + DB *dbp; + int do_close, ret, t_ret; + + COMPQUIET(info, NULL); + + dbp = NULL; + do_close = 0; + REC_PRINT(__crdel_inmem_create_print); + REC_NOOP_INTRO(__crdel_inmem_create_read); + + /* First, see if the DB handle already exists. */ + if (argp->fileid == DB_LOGFILEID_INVALID) { + if (DB_REDO(op)) + ret = ENOENT; + else + ret = 0; + } else + ret = __dbreg_id_to_db(env, argp->txnp, &dbp, argp->fileid, 0); + + if (DB_REDO(op)) { + /* + * If the dbreg failed, that means that we're creating a + * tmp file. + */ + if (ret != 0) { + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + goto out; + + F_SET(dbp, DB_AM_RECOVER | DB_AM_INMEM); + memcpy(dbp->fileid, argp->fid.data, DB_FILE_ID_LEN); + if (((ret = __os_strdup(env, + argp->name.data, &dbp->dname)) != 0)) + goto out; + + /* + * This DBP is never going to be entered into the + * dbentry table, so if we leave it open here, + * then we're going to lose it. + */ + do_close = 1; + } + + /* Now, set the fileid. */ + memcpy(dbp->fileid, argp->fid.data, argp->fid.size); + if ((ret = __memp_set_fileid(dbp->mpf, dbp->fileid)) != 0) + goto out; + dbp->preserve_fid = 1; + MAKE_INMEM(dbp); + if ((ret = __env_setup(dbp, + NULL, NULL, argp->name.data, TXN_INVALID, 0)) != 0) + goto out; + ret = __env_mpool(dbp, argp->name.data, 0); + + if (ret == ENOENT) { + dbp->pgsize = argp->pgsize; + if ((ret = __env_mpool(dbp, + argp->name.data, DB_CREATE)) != 0) + goto out; + } else if (ret != 0) + goto out; + } + + if (DB_UNDO(op)) { + if (ret == 0) + ret = __memp_nameop(env, argp->fid.data, NULL, + (const char *)argp->name.data, NULL, 1); + + if (ret == ENOENT || ret == DB_DELETED) + ret = 0; + else + goto out; + } + + *lsnp = argp->prev_lsn; + +out: if (dbp != NULL) { + t_ret = 0; + + if (do_close || ret != 0) + t_ret = __db_close(dbp, NULL, DB_NOSYNC); + if (t_ret != 0 && ret == 0) + ret = t_ret; + } + REC_NOOP_CLOSE; +} + +/* + * __crdel_inmem_rename_recover -- + * Recovery function for inmem_rename. + * + * PUBLIC: int __crdel_inmem_rename_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_rename_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __crdel_inmem_rename_args *argp; + u_int8_t *fileid; + int ret; + + COMPQUIET(info, NULL); + + REC_PRINT(__crdel_inmem_rename_print); + REC_NOOP_INTRO(__crdel_inmem_rename_read); + fileid = argp->fid.data; + + /* Void out errors because the files may or may not still exist. */ + if (DB_REDO(op)) + (void)__memp_nameop(env, fileid, + (const char *)argp->newname.data, + (const char *)argp->oldname.data, + (const char *)argp->newname.data, 1); + + if (DB_UNDO(op)) + (void)__memp_nameop(env, fileid, + (const char *)argp->oldname.data, + (const char *)argp->newname.data, + (const char *)argp->oldname.data, 1); + + *lsnp = argp->prev_lsn; + ret = 0; + + REC_NOOP_CLOSE; +} + +/* + * __crdel_inmem_remove_recover -- + * Recovery function for inmem_remove. + * + * PUBLIC: int __crdel_inmem_remove_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_remove_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __crdel_inmem_remove_args *argp; + int ret; + + COMPQUIET(info, NULL); + + REC_PRINT(__crdel_inmem_remove_print); + REC_NOOP_INTRO(__crdel_inmem_remove_read); + + /* + * Since removes are delayed; there is no undo for a remove; only redo. + * The remove may fail, which is OK. + */ + if (DB_REDO(op)) { + (void)__memp_nameop(env, + argp->fid.data, NULL, argp->name.data, NULL, 1); + } + + *lsnp = argp->prev_lsn; + ret = 0; + + REC_NOOP_CLOSE; +} diff --git a/src/db/db.c b/src/db/db.c new file mode 100644 index 00000000..f8b7bacb --- /dev/null +++ b/src/db/db.c @@ -0,0 +1,1650 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/btree.h" +#include "dbinc/fop.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +static int __db_disassociate __P((DB *)); +static int __db_disassociate_foreign __P ((DB *)); + +#ifdef CONFIG_TEST +static int __db_makecopy __P((ENV *, const char *, const char *)); +static int __qam_testdocopy __P((DB *, const char *)); +#endif + +/* + * DB.C -- + * This file contains the utility functions for the DBP layer. + */ + +/* + * __db_master_open -- + * Open up a handle on a master database. + * + * PUBLIC: int __db_master_open __P((DB *, DB_THREAD_INFO *, + * PUBLIC: DB_TXN *, const char *, u_int32_t, int, DB **)); + */ +int +__db_master_open(subdbp, ip, txn, name, flags, mode, dbpp) + DB *subdbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name; + u_int32_t flags; + int mode; + DB **dbpp; +{ + DB *dbp; + int ret; + + *dbpp = NULL; + + /* Open up a handle on the main database. */ + if ((ret = __db_create_internal(&dbp, subdbp->env, 0)) != 0) + return (ret); + + /* + * It's always a btree. + * Run in the transaction we've created. + * Set the pagesize in case we're creating a new database. + * Flag that we're creating a database with subdatabases. + */ + dbp->pgsize = subdbp->pgsize; + F_SET(dbp, DB_AM_SUBDB); + F_SET(dbp, F_ISSET(subdbp, + DB_AM_RECOVER | DB_AM_SWAP | + DB_AM_ENCRYPT | DB_AM_CHKSUM | DB_AM_NOT_DURABLE)); + + /* + * If there was a subdb specified, then we only want to apply + * DB_EXCL to the subdb, not the actual file. We only got here + * because there was a subdb specified. + */ + LF_CLR(DB_EXCL); + LF_SET(DB_RDWRMASTER); + if ((ret = __db_open(dbp, ip, txn, + name, NULL, DB_BTREE, flags, mode, PGNO_BASE_MD)) != 0) + goto err; + + /* + * The items in dbp are initialized from the master file's meta page. + * Other items such as checksum and encryption are checked when we + * read the meta-page, so we do not check those here. However, if + * the meta-page caused checksumming to be turned on and it wasn't + * already, set it here. + */ + if (F_ISSET(dbp, DB_AM_CHKSUM)) + F_SET(subdbp, DB_AM_CHKSUM); + + /* + * The user may have specified a page size for an existing file, + * which we want to ignore. + */ + subdbp->pgsize = dbp->pgsize; + *dbpp = dbp; + + if (0) { +err: if (!F_ISSET(dbp, DB_AM_DISCARD)) + (void)__db_close(dbp, txn, DB_NOSYNC); + } + + return (ret); +} + +/* + * __db_master_update -- + * Add/Open/Remove a subdatabase from a master database. + * + * PUBLIC: int __db_master_update __P((DB *, DB *, DB_THREAD_INFO *, DB_TXN *, + * PUBLIC: const char *, DBTYPE, mu_action, const char *, u_int32_t)); + */ +int +__db_master_update(mdbp, sdbp, ip, txn, subdb, type, action, newname, flags) + DB *mdbp, *sdbp; + DB_TXN *txn; + DB_THREAD_INFO *ip; + const char *subdb; + DBTYPE type; + mu_action action; + const char *newname; + u_int32_t flags; +{ + DBC *dbc, *ndbc; + DBT key, data, ndata; + ENV *env; + PAGE *p, *r; + db_pgno_t t_pgno; + int modify, ret, t_ret; + + env = mdbp->env; + dbc = ndbc = NULL; + p = NULL; + + /* + * Open up a cursor. If this is CDB and we're creating the database, + * make it an update cursor. + * + * Might we modify the master database? If so, we'll need to lock. + */ + modify = (!F_ISSET(mdbp, DB_AM_RDONLY) && + (action != MU_OPEN || LF_ISSET(DB_CREATE))) ? 1 : 0; + + if ((ret = __db_cursor(mdbp, ip, txn, &dbc, + (CDB_LOCKING(env) && modify) ? DB_WRITECURSOR : 0)) != 0) + return (ret); + + /* + * Point the cursor at the record. + * + * If we're removing or potentially creating an entry, lock the page + * with DB_RMW. + * + * We do multiple cursor operations with the cursor in some cases and + * subsequently access the data DBT information. Set DB_DBT_MALLOC so + * we don't risk modification of the data between our uses of it. + * + * !!! + * We don't include the name's nul termination in the database. + */ + DB_INIT_DBT(key, subdb, strlen(subdb)); + memset(&data, 0, sizeof(data)); + F_SET(&data, DB_DBT_MALLOC); + + ret = __dbc_get(dbc, &key, &data, + DB_SET | ((STD_LOCKING(dbc) && modify) ? DB_RMW : 0)); + + /* + * What we do next--whether or not we found a record for the + * specified subdatabase--depends on what the specified action is. + * Handle ret appropriately as the first statement of each case. + */ + switch (action) { + case MU_REMOVE: + /* + * We should have found something if we're removing it. Note + * that in the common case where the DB we're asking to remove + * doesn't exist, we won't get this far; __db_subdb_remove + * will already have returned an error from __db_open. + */ + if (ret != 0) + goto err; + + /* + * Delete the subdatabase entry first; if this fails, + * we don't want to touch the actual subdb pages. + */ + if ((ret = __dbc_del(dbc, 0)) != 0) + goto err; + + /* + * We're handling actual data, not on-page meta-data, + * so it hasn't been converted to/from opposite + * endian architectures. Do it explicitly, now. + */ + memcpy(&sdbp->meta_pgno, data.data, sizeof(db_pgno_t)); + DB_NTOHL_SWAP(env, &sdbp->meta_pgno); + if ((ret = __memp_fget(mdbp->mpf, &sdbp->meta_pgno, + ip, dbc->txn, DB_MPOOL_DIRTY, &p)) != 0) + goto err; + + /* Free the root on the master db if it was created. */ + if (TYPE(p) == P_BTREEMETA && + ((BTMETA *)p)->root != PGNO_INVALID) { + if ((ret = __memp_fget(mdbp->mpf, + &((BTMETA *)p)->root, ip, dbc->txn, + DB_MPOOL_DIRTY, &r)) != 0) + goto err; + + /* Free and put the page. */ + if ((ret = __db_free(dbc, r, 0)) != 0) { + r = NULL; + goto err; + } + } + /* Free and put the page. */ + if ((ret = __db_free(dbc, p, 0)) != 0) { + p = NULL; + goto err; + } + p = NULL; + break; + case MU_RENAME: + /* We should have found something if we're renaming it. */ + if (ret != 0) + goto err; + + /* + * Before we rename, we need to make sure we're not + * overwriting another subdatabase, or else this operation + * won't be undoable. Open a second cursor and check + * for the existence of newname; it shouldn't appear under + * us since we hold the metadata lock. + */ + if ((ret = __db_cursor(mdbp, ip, txn, &ndbc, + CDB_LOCKING(env) ? DB_WRITECURSOR : 0)) != 0) + goto err; + DB_SET_DBT(key, newname, strlen(newname)); + + /* + * We don't actually care what the meta page of the potentially- + * overwritten DB is; we just care about existence. + */ + memset(&ndata, 0, sizeof(ndata)); + F_SET(&ndata, DB_DBT_USERMEM | DB_DBT_PARTIAL); + + if ((ret = __dbc_get(ndbc, &key, &ndata, DB_SET)) == 0) { + /* A subdb called newname exists. Bail. */ + ret = EEXIST; + __db_errx(env, DB_STR_A("0673", + "rename: database %s exists", "%s"), newname); + goto err; + } else if (ret != DB_NOTFOUND) + goto err; + + /* + * Now do the put first; we don't want to lose our only + * reference to the subdb. Use the second cursor so the + * first one continues to point to the old record. + */ + if ((ret = __dbc_put(ndbc, &key, &data, DB_KEYFIRST)) != 0) + goto err; + if ((ret = __dbc_del(dbc, 0)) != 0) { + /* + * If the delete fails, try to delete the record + * we just put, in case we're not txn-protected. + */ + (void)__dbc_del(ndbc, 0); + goto err; + } + + break; + case MU_OPEN: + /* + * Get the subdatabase information. If it already exists, + * copy out the page number and we're done. + */ + switch (ret) { + case 0: + if (LF_ISSET(DB_CREATE) && LF_ISSET(DB_EXCL)) { + ret = EEXIST; + goto err; + } + memcpy(&sdbp->meta_pgno, data.data, sizeof(db_pgno_t)); + DB_NTOHL_SWAP(env, &sdbp->meta_pgno); + goto done; + case DB_NOTFOUND: + if (LF_ISSET(DB_CREATE)) + break; + /* + * No db_err, it is reasonable to remove a + * nonexistent db. + */ + ret = ENOENT; + goto err; + default: + goto err; + } + + /* Create a subdatabase. */ + if (F_ISSET(mdbp, DB_AM_RDONLY)) { + ret = EBADF; + goto err; + } + if ((ret = __db_new(dbc, + type == DB_HASH ? P_HASHMETA : P_BTREEMETA, NULL, &p)) != 0) + goto err; + sdbp->meta_pgno = PGNO(p); + + /* + * XXX + * We're handling actual data, not on-page meta-data, so it + * hasn't been converted to/from opposite endian architectures. + * Do it explicitly, now. + */ + t_pgno = PGNO(p); + DB_HTONL_SWAP(env, &t_pgno); + memset(&ndata, 0, sizeof(ndata)); + ndata.data = &t_pgno; + ndata.size = sizeof(db_pgno_t); + if ((ret = __dbc_put(dbc, &key, &ndata, 0)) != 0) + goto err; + F_SET(sdbp, DB_AM_CREATED); + break; + + case MU_MOVE: + /* We should have found something if we're moving it. */ + if (ret != 0) + goto err; + t_pgno = sdbp->meta_pgno; + DB_HTONL_SWAP(env, &t_pgno); + memset(&ndata, 0, sizeof(ndata)); + ndata.data = &t_pgno; + ndata.size = sizeof(db_pgno_t); + if ((ret = __dbc_put(dbc, &key, &ndata, 0)) != 0) + goto err; + mdbp->mpf->mfp->revision++; + } + +err: +done: /* + * If we allocated a page: if we're successful, mark the page dirty + * and return it to the cache, otherwise, discard/free it. + */ + if (p != NULL && (t_ret = __memp_fput(mdbp->mpf, + dbc->thread_info, p, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + /* Discard the cursor(s) and data. */ + if (data.data != NULL) + __os_ufree(env, data.data); + if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + if (ndbc != NULL && (t_ret = __dbc_close(ndbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __env_dbreg_setup -- + * + * PUBLIC: int __env_dbreg_setup __P((DB *, + * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t)); + */ +int +__env_dbreg_setup(dbp, txn, fname, dname, id) + DB *dbp; + DB_TXN *txn; + const char *fname, *dname; + u_int32_t id; +{ + ENV *env; + int ret; + + env = dbp->env; + if (dbp->log_filename == NULL +#if !defined(DEBUG_ROP) && !defined(DEBUG_WOP) && !defined(DIAGNOSTIC) + && (txn != NULL || F_ISSET(dbp, DB_AM_RECOVER)) +#endif +#if !defined(DEBUG_ROP) + && !F_ISSET(dbp, DB_AM_RDONLY) +#endif + ) { + if ((ret = __dbreg_setup(dbp, + F_ISSET(dbp, DB_AM_INMEM) ? dname: fname, + F_ISSET(dbp, DB_AM_INMEM) ? NULL : dname, id)) != 0) + return (ret); + + /* + * If we're actively logging and our caller isn't a + * recovery function that already did so, then assign + * this dbp a log fileid. + */ + if (DBENV_LOGGING(env) && !F_ISSET(dbp, DB_AM_RECOVER) && + (ret = __dbreg_new_id(dbp, txn)) != 0) + return (ret); + } + return (0); +} + +/* + * __env_setup -- + * Set up the underlying environment during a db_open. + * + * PUBLIC: int __env_setup __P((DB *, + * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t, u_int32_t)); + */ +int +__env_setup(dbp, txn, fname, dname, id, flags) + DB *dbp; + DB_TXN *txn; + const char *fname, *dname; + u_int32_t id, flags; +{ + DB *ldbp; + DB_ENV *dbenv; + ENV *env; + u_int32_t maxid; + int ret; + + env = dbp->env; + dbenv = env->dbenv; + + /* If we don't yet have an environment, it's time to create it. */ + if (!F_ISSET(env, ENV_OPEN_CALLED)) { +#if defined(HAVE_MIXED_SIZE_ADDRESSING) && (SIZEOF_CHAR_P == 8) + __db_errx(env, DB_STR("0701", "DB_PRIVATE is not supported by" + " 64-bit applications in mixed-size-addressing mode")); + return (EINVAL); +#endif + /* Make sure we have at least DB_MINCACHE pages in our cache. */ + if (dbenv->mp_gbytes == 0 && + dbenv->mp_bytes < dbp->pgsize * DB_MINPAGECACHE && + (ret = __memp_set_cachesize( + dbenv, 0, dbp->pgsize * DB_MINPAGECACHE, 0)) != 0) + return (ret); + + if ((ret = __env_open(dbenv, NULL, DB_CREATE | + DB_INIT_MPOOL | DB_PRIVATE | LF_ISSET(DB_THREAD), 0)) != 0) + return (ret); + } + + /* Join the underlying cache. */ + if ((!F_ISSET(dbp, DB_AM_INMEM) || dname == NULL) && + (ret = __env_mpool(dbp, fname, flags)) != 0) + return (ret); + + /* We may need a per-thread mutex. */ + if (LF_ISSET(DB_THREAD) && (ret = __mutex_alloc( + env, MTX_DB_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbp->mutex)) != 0) + return (ret); + + /* + * Set up a bookkeeping entry for this database in the log region, + * if such a region exists. Note that even if we're in recovery + * or a replication client, where we won't log registries, we'll + * still need an FNAME struct, so LOGGING_ON is the correct macro. + */ + if (LOGGING_ON(env) && + (!F_ISSET(dbp, DB_AM_INMEM) || dname == NULL) && + (ret = __env_dbreg_setup(dbp, txn, fname, dname, id)) != 0) + return (ret); + + /* + * Insert ourselves into the ENV's dblist. We allocate a + * unique ID to each {fileid, meta page number} pair, and to + * each temporary file (since they all have a zero fileid). + * This ID gives us something to use to tell which DB handles + * go with which databases in all the cursor adjustment + * routines, where we don't want to do a lot of ugly and + * expensive memcmps. + */ + MUTEX_LOCK(env, env->mtx_dblist); + maxid = 0; + TAILQ_FOREACH(ldbp, &env->dblist, dblistlinks) { + /* + * There are three cases: on-disk database (first clause), + * named in-memory database (second clause), temporary database + * (never matches; no clause). + */ + if (!F_ISSET(dbp, DB_AM_INMEM)) { + if (memcmp(ldbp->fileid, dbp->fileid, DB_FILE_ID_LEN) + == 0 && ldbp->meta_pgno == dbp->meta_pgno) + break; + } else if (dname != NULL) { + if (F_ISSET(ldbp, DB_AM_INMEM) && + ldbp->dname != NULL && + strcmp(ldbp->dname, dname) == 0) + break; + } + if (ldbp->adj_fileid > maxid) + maxid = ldbp->adj_fileid; + } + + /* + * If ldbp is NULL, we didn't find a match. Assign the dbp an + * adj_fileid one higher than the largest we found, and + * insert it at the head of the master dbp list. + * + * If ldbp is not NULL, it is a match for our dbp. Give dbp + * the same ID that ldbp has, and add it after ldbp so they're + * together in the list. + */ + if (ldbp == NULL) { + dbp->adj_fileid = maxid + 1; + TAILQ_INSERT_HEAD(&env->dblist, dbp, dblistlinks); + } else { + dbp->adj_fileid = ldbp->adj_fileid; + TAILQ_INSERT_AFTER(&env->dblist, ldbp, dbp, dblistlinks); + } + MUTEX_UNLOCK(env, env->mtx_dblist); + + return (0); +} + +/* + * __env_mpool -- + * Set up the underlying environment cache during a db_open. + * + * PUBLIC: int __env_mpool __P((DB *, const char *, u_int32_t)); + */ +int +__env_mpool(dbp, fname, flags) + DB *dbp; + const char *fname; + u_int32_t flags; +{ + DBT pgcookie; + DB_MPOOLFILE *mpf; + DB_PGINFO pginfo; + ENV *env; + int fidset, ftype, ret; + int32_t lsn_off; + u_int8_t nullfid[DB_FILE_ID_LEN]; + u_int32_t clear_len; + + env = dbp->env; + + /* The LSN is the first entry on a DB page, byte offset 0. */ + lsn_off = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LSN_OFF_NOTSET : 0; + + /* It's possible that this database is already open. */ + if (F_ISSET(dbp, DB_AM_OPEN_CALLED)) + return (0); + + /* + * If we need to pre- or post-process a file's pages on I/O, set the + * file type. If it's a hash file, always call the pgin and pgout + * routines. This means that hash files can never be mapped into + * process memory. If it's a btree file and requires swapping, we + * need to page the file in and out. This has to be right -- we can't + * mmap files that are being paged in and out. + */ + switch (dbp->type) { + case DB_BTREE: + case DB_HEAP: + case DB_RECNO: + ftype = F_ISSET(dbp, DB_AM_SWAP | DB_AM_ENCRYPT | DB_AM_CHKSUM) + ? DB_FTYPE_SET : DB_FTYPE_NOTSET; + clear_len = CRYPTO_ON(env) ? + (dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET) : + DB_PAGE_DB_LEN; + break; + case DB_HASH: + ftype = DB_FTYPE_SET; + clear_len = CRYPTO_ON(env) ? + (dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET) : + DB_PAGE_DB_LEN; + break; + case DB_QUEUE: + ftype = F_ISSET(dbp, + DB_AM_SWAP | DB_AM_ENCRYPT | DB_AM_CHKSUM) ? + DB_FTYPE_SET : DB_FTYPE_NOTSET; + + /* + * If we came in here without a pagesize set, then we need + * to mark the in-memory handle as having clear_len not + * set, because we don't really know the clear length or + * the page size yet (since the file doesn't yet exist). + */ + clear_len = dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET; + break; + case DB_UNKNOWN: + /* + * If we're running in the verifier, our database might + * be corrupt and we might not know its type--but we may + * still want to be able to verify and salvage. + * + * If we can't identify the type, it's not going to be safe + * to call __db_pgin--we pretty much have to give up all + * hope of salvaging cross-endianness. Proceed anyway; + * at worst, the database will just appear more corrupt + * than it actually is, but at best, we may be able + * to salvage some data even with no metadata page. + */ + if (F_ISSET(dbp, DB_AM_VERIFYING)) { + ftype = DB_FTYPE_NOTSET; + clear_len = DB_PAGE_DB_LEN; + break; + } + + /* + * This might be an in-memory file and we won't know its + * file type until after we open it and read the meta-data + * page. + */ + if (F_ISSET(dbp, DB_AM_INMEM)) { + clear_len = DB_CLEARLEN_NOTSET; + ftype = DB_FTYPE_NOTSET; + lsn_off = DB_LSN_OFF_NOTSET; + break; + } + /* FALLTHROUGH */ + default: + return (__db_unknown_type(env, "DB->open", dbp->type)); + } + + mpf = dbp->mpf; + + memset(nullfid, 0, DB_FILE_ID_LEN); + fidset = memcmp(nullfid, dbp->fileid, DB_FILE_ID_LEN); + if (fidset) + (void)__memp_set_fileid(mpf, dbp->fileid); + + (void)__memp_set_clear_len(mpf, clear_len); + (void)__memp_set_ftype(mpf, ftype); + (void)__memp_set_lsn_offset(mpf, lsn_off); + + pginfo.db_pagesize = dbp->pgsize; + pginfo.flags = + F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); + pginfo.type = dbp->type; + pgcookie.data = &pginfo; + pgcookie.size = sizeof(DB_PGINFO); + (void)__memp_set_pgcookie(mpf, &pgcookie); + +#ifndef DIAG_MVCC + if (F_ISSET(env->dbenv, DB_ENV_MULTIVERSION)) +#endif + if (F_ISSET(dbp, DB_AM_TXN) && + dbp->type != DB_QUEUE && dbp->type != DB_UNKNOWN) + LF_SET(DB_MULTIVERSION); + + if ((ret = __memp_fopen(mpf, NULL, fname, &dbp->dirname, + LF_ISSET(DB_CREATE | DB_DURABLE_UNKNOWN | DB_MULTIVERSION | + DB_NOMMAP | DB_ODDFILESIZE | DB_RDONLY | DB_TRUNCATE) | + (F_ISSET(env->dbenv, DB_ENV_DIRECT_DB) ? DB_DIRECT : 0) | + (F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_TXN_NOT_DURABLE : 0), + 0, dbp->pgsize)) != 0) { + /* + * The open didn't work; we need to reset the mpf, + * retaining the in-memory semantics (if any). + */ + (void)__memp_fclose(dbp->mpf, 0); + (void)__memp_fcreate(env, &dbp->mpf); + if (F_ISSET(dbp, DB_AM_INMEM)) + MAKE_INMEM(dbp); + return (ret); + } + + /* + * Set the open flag. We use it to mean that the dbp has gone + * through mpf setup, including dbreg_register. Also, below, + * the underlying access method open functions may want to do + * things like acquire cursors, so the open flag has to be set + * before calling them. + */ + F_SET(dbp, DB_AM_OPEN_CALLED); + if (!fidset && fname != NULL) { + (void)__memp_get_fileid(dbp->mpf, dbp->fileid); + dbp->preserve_fid = 1; + } + + return (0); +} + +/* + * __db_close -- + * DB->close method. + * + * PUBLIC: int __db_close __P((DB *, DB_TXN *, u_int32_t)); + */ +int +__db_close(dbp, txn, flags) + DB *dbp; + DB_TXN *txn; + u_int32_t flags; +{ + ENV *env; + int db_ref, deferred_close, ret, t_ret; + + env = dbp->env; + deferred_close = 0; + + PERFMON4(env, db, close, + dbp->fname, dbp->dname, flags, &dbp->fileid[0]); + + /* Refresh the structure and close any underlying resources. */ + ret = __db_refresh(dbp, txn, flags, &deferred_close, 0); + + /* + * If we've deferred the close because the logging of the close failed, + * return our failure right away without destroying the handle. + */ + if (deferred_close) + return (ret); + + /* !!! + * This code has an apparent race between the moment we read and + * decrement env->db_ref and the moment we check whether it's 0. + * However, if the environment is DBLOCAL, the user shouldn't have a + * reference to the env handle anyway; the only way we can get + * multiple dbps sharing a local env is if we open them internally + * during something like a subdatabase open. If any such thing is + * going on while the user is closing the original dbp with a local + * env, someone's already badly screwed up, so there's no reason + * to bother engineering around this possibility. + */ + MUTEX_LOCK(env, env->mtx_dblist); + db_ref = --env->db_ref; + MUTEX_UNLOCK(env, env->mtx_dblist); + if (F_ISSET(env, ENV_DBLOCAL) && db_ref == 0 && + (t_ret = __env_close(env->dbenv, 0)) != 0 && ret == 0) + ret = t_ret; + + /* Free the database handle. */ + memset(dbp, CLEAR_BYTE, sizeof(*dbp)); + __os_free(env, dbp); + + return (ret); +} + +/* + * __db_refresh -- + * Refresh the DB structure, releasing any allocated resources. + * This does most of the work of closing files now because refresh + * is what is used during abort processing (since we can't destroy + * the actual handle) and during abort processing, we may have a + * fully opened handle. + * + * PUBLIC: int __db_refresh __P((DB *, DB_TXN *, u_int32_t, int *, int)); + */ +int +__db_refresh(dbp, txn, flags, deferred_closep, reuse) + DB *dbp; + DB_TXN *txn; + u_int32_t flags; + int *deferred_closep, reuse; +{ + DB *sdbp; + DBC *dbc; + DB_FOREIGN_INFO *f_info, *tmp; + DB_LOCKER *locker; + DB_LOCKREQ lreq; + ENV *env; + REGENV *renv; + REGINFO *infop; + u_int32_t save_flags; + int resync, ret, t_ret; + + ret = 0; + + env = dbp->env; + infop = env->reginfo; + if (infop != NULL) + renv = infop->primary; + else + renv = NULL; + + /* + * If this dbp is not completely open, avoid trapping by trying to + * sync without an mpool file. + */ + if (dbp->mpf == NULL) + LF_SET(DB_NOSYNC); + + /* If never opened, or not currently open, it's easy. */ + if (!F_ISSET(dbp, DB_AM_OPEN_CALLED)) + goto never_opened; + + /* + * If we have any secondary indices, disassociate them from us. + * We don't bother with the mutex here; it only protects some + * of the ops that will make us core-dump mid-close anyway, and + * if you're trying to do something with a secondary *while* you're + * closing the primary, you deserve what you get. The disassociation + * is mostly done just so we can close primaries and secondaries in + * any order--but within one thread of control. + */ + LIST_FOREACH(sdbp, &dbp->s_secondaries, s_links) { + LIST_REMOVE(sdbp, s_links); + if ((t_ret = __db_disassociate(sdbp)) != 0 && ret == 0) + ret = t_ret; + } + if (F_ISSET(dbp, DB_AM_SECONDARY)) + LIST_REMOVE(dbp, s_links); + + /* + * Disassociate ourself from any databases using us as a foreign key + * database by clearing the referring db's pointer. Reclaim memory. + */ + f_info = LIST_FIRST(&dbp->f_primaries); + while (f_info != NULL) { + tmp = LIST_NEXT(f_info, f_links); + LIST_REMOVE(f_info, f_links); + f_info->dbp->s_foreign = NULL; + __os_free(env, f_info); + f_info = tmp; + } + + if (dbp->s_foreign != NULL && + (t_ret = __db_disassociate_foreign(dbp)) != 0 && ret == 0) + ret = t_ret; + + /* + * Sync the underlying access method. Do before closing the cursors + * because DB->sync allocates cursors in order to write Recno backing + * source text files. + * + * Sync is slow on some systems, notably Solaris filesystems where the + * entire buffer cache is searched. If we're in recovery, don't flush + * the file, it's not necessary. + */ + if (!LF_ISSET(DB_NOSYNC) && + !F_ISSET(dbp, DB_AM_DISCARD | DB_AM_RECOVER) && + (t_ret = __db_sync(dbp)) != 0 && ret == 0) + ret = t_ret; + + /* + * Go through the active cursors, unregister each cursor from its + * transaction if any, and call the cursor recycle routine, + * which resolves pending operations and moves the cursors onto the + * free list. Then, walk the free list and call the cursor destroy + * routine. Note that any failure on a close is considered "really + * bad" and we just break out of the loop and force forward. + */ + resync = TAILQ_FIRST(&dbp->active_queue) == NULL ? 0 : 1; + while ((dbc = TAILQ_FIRST(&dbp->active_queue)) != NULL) { + if (dbc->txn != NULL) + TAILQ_REMOVE(&(dbc->txn->my_cursors), dbc, txn_cursors); + + if ((t_ret = __dbc_close(dbc)) != 0) { + if (ret == 0) + ret = t_ret; + break; + } + } + + while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL) + if ((t_ret = __dbc_destroy(dbc)) != 0) { + if (ret == 0) + ret = t_ret; + break; + } + + /* + * Close any outstanding join cursors. Join cursors destroy themselves + * on close and have no separate destroy routine. We don't have to set + * the resync flag here, because join cursors aren't write cursors. + */ + while ((dbc = TAILQ_FIRST(&dbp->join_queue)) != NULL) + if ((t_ret = __db_join_close(dbc)) != 0) { + if (ret == 0) + ret = t_ret; + break; + } + + /* + * Sync the memory pool, even though we've already called DB->sync, + * because closing cursors can dirty pages by deleting items they + * referenced. + * + * Sync is slow on some systems, notably Solaris filesystems where the + * entire buffer cache is searched. If we're in recovery, don't flush + * the file, it's not necessary. + */ + if (resync && !LF_ISSET(DB_NOSYNC) && + !F_ISSET(dbp, DB_AM_DISCARD | DB_AM_RECOVER) && + (t_ret = __memp_fsync(dbp->mpf)) != 0 && ret == 0) + ret = t_ret; + + /* + * If there is a file extension watermark associated with this + * database, we don't need it any more. + */ + __txn_remove_fe_watermark(txn, dbp); + +never_opened: + MUTEX_LOCK(env, env->mtx_dblist); + /* + * At this point, we haven't done anything to render the DB handle + * unusable, at least by a transaction abort. Take the opportunity + * now to log the file close if we have initialized the logging + * information. If this log fails and we're in a transaction, + * we have to bail out of the attempted close; we'll need a dbp in + * order to successfully abort the transaction, and we can't conjure + * a new one up because we haven't gotten out the dbreg_register + * record that represents the close. In this case, we put off + * actually closing the dbp until we've performed the abort. + */ + if (!reuse && LOGGING_ON(dbp->env) && dbp->log_filename != NULL) { + /* + * Discard the log file id, if any. We want to log the close + * if and only if this is not a recovery dbp or a client dbp, + * or a dead dbp handle. + */ + DB_ASSERT(env, renv != NULL); + if (F_ISSET(dbp, DB_AM_RECOVER) || IS_REP_CLIENT(env) || + dbp->timestamp != renv->rep_timestamp) { + if ((t_ret = __dbreg_revoke_id(dbp, + 0, DB_LOGFILEID_INVALID)) == 0 && ret == 0) + ret = t_ret; + if ((t_ret = __dbreg_teardown(dbp)) != 0 && ret == 0) + ret = t_ret; + } else { + if ((t_ret = __dbreg_close_id(dbp, + txn, DBREG_CLOSE)) != 0 && txn != NULL) { + MUTEX_UNLOCK(env, env->mtx_dblist); + /* + * We're in a txn and the attempt to log the + * close failed; let the txn subsystem know + * that we need to destroy this dbp once we're + * done with the abort, then bail from the + * close. + * + * Note that if the attempt to put off the + * close -also- fails--which it won't unless + * we're out of heap memory--we're really + * screwed. Panic. + */ + if ((ret = + __txn_closeevent(env, txn, dbp)) != 0) + return (__env_panic(env, ret)); + if (deferred_closep != NULL) + *deferred_closep = 1; + return (t_ret); + } + /* + * If dbreg_close_id failed and we were not in a + * transaction, then we need to finish this close + * because the caller can't do anything with the + * handle after we return an error. We rely on + * dbreg_close_id to mark the entry in some manner + * so that we do not do a clean shutdown of this + * environment. If shutdown isn't clean, then the + * application *must* run recovery and that will + * generate the RCLOSE record. + */ + } + + } + + /* Close any handle we've been holding since the open. */ + if (dbp->saved_open_fhp != NULL && + (t_ret = __os_closehandle(env, dbp->saved_open_fhp)) != 0 && + ret == 0) + ret = t_ret; + + /* + * Remove this DB handle from the ENV's dblist, if it's been added. + * + * Close our reference to the underlying cache while locked, we don't + * want to race with a thread searching for our underlying cache link + * while opening a DB handle. + * + * The DB handle may not yet have been added to the ENV list, don't + * blindly call the underlying TAILQ_REMOVE macro. Explicitly reset + * the field values to NULL so that we can't call TAILQ_REMOVE twice. + */ + if (!reuse && + (dbp->dblistlinks.tqe_next != NULL || + dbp->dblistlinks.tqe_prev != NULL)) { + TAILQ_REMOVE(&env->dblist, dbp, dblistlinks); + dbp->dblistlinks.tqe_next = NULL; + dbp->dblistlinks.tqe_prev = NULL; + } + + /* Close the memory pool file handle. */ + if (dbp->mpf != NULL) { + if ((t_ret = __memp_fclose(dbp->mpf, + F_ISSET(dbp, DB_AM_DISCARD) ? DB_MPOOL_DISCARD : 0)) != 0 && + ret == 0) + ret = t_ret; + dbp->mpf = NULL; + if (reuse && + (t_ret = __memp_fcreate(env, &dbp->mpf)) != 0 && + ret == 0) + ret = t_ret; + } + + MUTEX_UNLOCK(env, env->mtx_dblist); + + /* + * Call the access specific close function. + * + * We do this here rather than in __db_close as we need to do this when + * aborting an open so that file descriptors are closed and abort of + * renames can succeed on platforms that lock open files (such as + * Windows). In particular, we need to ensure that all the extents + * associated with a queue are closed so that queue renames can be + * aborted. + * + * It is also important that we do this before releasing the handle + * lock, because dbremove and dbrename assume that once they have the + * handle lock, it is safe to modify the underlying file(s). + * + * !!! + * Because of where these functions are called in the DB handle close + * process, these routines can't do anything that would dirty pages or + * otherwise affect closing down the database. Specifically, we can't + * abort and recover any of the information they control. + */ +#ifdef HAVE_PARTITION + if (dbp->p_internal != NULL && + (t_ret = __partition_close(dbp, txn, flags)) != 0 && ret == 0) + ret = t_ret; +#endif + if ((t_ret = __bam_db_close(dbp)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __ham_db_close(dbp)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __heap_db_close(dbp)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __qam_db_close(dbp, dbp->flags)) != 0 && ret == 0) + ret = t_ret; + + /* + * !!! + * At this point, the access-method specific information has been + * freed. From now on, we can use the dbp, but not touch any + * access-method specific data. + */ + + if (!reuse && dbp->locker != NULL) { + /* We may have pending trade operations on this dbp. */ + if (txn == NULL) + txn = dbp->cur_txn; + if (IS_REAL_TXN(txn)) + __txn_remlock(env, + txn, &dbp->handle_lock, dbp->locker); + + /* We may be holding the handle lock; release it. */ + lreq.op = DB_LOCK_PUT_ALL; + lreq.obj = NULL; + if ((t_ret = __lock_vec(env, + dbp->locker, 0, &lreq, 1, NULL)) != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = + __lock_id_free(env, dbp->locker)) != 0 && ret == 0) + ret = t_ret; + dbp->locker = NULL; + LOCK_INIT(dbp->handle_lock); + } + + /* + * If this is a temporary file (un-named in-memory file), then + * discard the locker ID allocated as the fileid. + */ + if (LOCKING_ON(env) && + F_ISSET(dbp, DB_AM_INMEM) && !dbp->preserve_fid && + *(u_int32_t *)dbp->fileid != DB_LOCK_INVALIDID) { + if ((t_ret = __lock_getlocker(env->lk_handle, + *(u_int32_t *)dbp->fileid, 0, &locker)) == 0) + t_ret = __lock_id_free(env, locker); + if (ret == 0) + ret = t_ret; + } + + if (reuse) { + /* + * If we are reusing this dbp, then we're done now. Re-init + * the handle, preserving important flags, and then return. + * This code is borrowed from __db_init, which does more + * than we can do here. + */ + save_flags = F_ISSET(dbp, DB_AM_INMEM | + DB_AM_RDONLY | DB_AM_TXN); + + if ((ret = __bam_db_create(dbp)) != 0) + return (ret); + if ((ret = __ham_db_create(dbp)) != 0) + return (ret); + if ((ret = __qam_db_create(dbp)) != 0) + return (ret); + + /* Restore flags */ + dbp->flags = dbp->orig_flags | save_flags; + + if (FLD_ISSET(save_flags, DB_AM_INMEM)) { + /* + * If this is inmem, then it may have a fileid + * even if it was never opened, and we need to + * clear out that fileid. + */ + memset(dbp->fileid, 0, sizeof(dbp->fileid)); + MAKE_INMEM(dbp); + } + return (ret); + } + + dbp->type = DB_UNKNOWN; + + /* + * The thread mutex may have been invalidated in __dbreg_close_id if the + * fname refcount did not go to 0. If not, discard the thread mutex. + */ + if ((t_ret = __mutex_free(env, &dbp->mutex)) != 0 && ret == 0) + ret = t_ret; + + /* Discard any memory allocated for the file and database names. */ + if (dbp->fname != NULL) { + __os_free(dbp->env, dbp->fname); + dbp->fname = NULL; + } + if (dbp->dname != NULL) { + __os_free(dbp->env, dbp->dname); + dbp->dname = NULL; + } + + /* Discard any memory used to store returned data. */ + if (dbp->my_rskey.data != NULL) + __os_free(dbp->env, dbp->my_rskey.data); + if (dbp->my_rkey.data != NULL) + __os_free(dbp->env, dbp->my_rkey.data); + if (dbp->my_rdata.data != NULL) + __os_free(dbp->env, dbp->my_rdata.data); + + /* For safety's sake; we may refresh twice. */ + memset(&dbp->my_rskey, 0, sizeof(DBT)); + memset(&dbp->my_rkey, 0, sizeof(DBT)); + memset(&dbp->my_rdata, 0, sizeof(DBT)); + + /* Clear out fields that normally get set during open. */ + memset(dbp->fileid, 0, sizeof(dbp->fileid)); + dbp->adj_fileid = 0; + dbp->meta_pgno = 0; + dbp->cur_locker = NULL; + dbp->cur_txn = NULL; + dbp->associate_locker = NULL; + dbp->open_flags = 0; + + /* + * If we are being refreshed with a txn specified, then we need + * to make sure that we clear out the lock handle field, because + * releasing all the locks for this transaction will release this + * lock and we don't want close to stumble upon this handle and + * try to close it. + */ + if (txn != NULL) + LOCK_INIT(dbp->handle_lock); + + /* Reset flags to whatever the user configured. */ + dbp->flags = dbp->orig_flags; + + return (ret); +} + +/* + * __db_disassociate -- + * Destroy the association between a given secondary and its primary. + */ +static int +__db_disassociate(sdbp) + DB *sdbp; +{ + DBC *dbc; + int ret, t_ret; + + ret = 0; + + sdbp->s_callback = NULL; + sdbp->s_primary = NULL; + sdbp->get = sdbp->stored_get; + sdbp->close = sdbp->stored_close; + + /* + * Complain, but proceed, if we have any active cursors. (We're in + * the middle of a close, so there's really no turning back.) + */ + if (sdbp->s_refcnt != 1 || + TAILQ_FIRST(&sdbp->active_queue) != NULL || + TAILQ_FIRST(&sdbp->join_queue) != NULL) { + __db_errx(sdbp->env, DB_STR("0674", +"Closing a primary DB while a secondary DB has active cursors is unsafe")); + ret = EINVAL; + } + sdbp->s_refcnt = 0; + + while ((dbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL) + if ((t_ret = __dbc_destroy(dbc)) != 0 && ret == 0) + ret = t_ret; + + F_CLR(sdbp, DB_AM_SECONDARY); + return (ret); +} + +/* + * __db_disassociate_foreign -- + * Destroy the association between a given secondary and its foreign. + */ +static int +__db_disassociate_foreign(sdbp) + DB *sdbp; +{ + DB *fdbp; + DB_FOREIGN_INFO *f_info, *tmp; + int ret; + + if (sdbp->s_foreign == NULL) + return (0); + if ((ret = __os_malloc(sdbp->env, sizeof(DB_FOREIGN_INFO), &tmp)) != 0) + return (ret); + + fdbp = sdbp->s_foreign; + ret = 0; + f_info = LIST_FIRST(&fdbp->f_primaries); + while (f_info != NULL) { + tmp = LIST_NEXT(f_info, f_links); + if (f_info ->dbp == sdbp) { + LIST_REMOVE(f_info, f_links); + __os_free(sdbp->env, f_info); + } + f_info = tmp; + } + + return (ret); +} + +/* + * __db_log_page + * Log a meta-data or root page during a subdatabase create operation. + * + * PUBLIC: int __db_log_page __P((DB *, DB_TXN *, DB_LSN *, db_pgno_t, PAGE *)); + */ +int +__db_log_page(dbp, txn, lsn, pgno, page) + DB *dbp; + DB_TXN *txn; + DB_LSN *lsn; + db_pgno_t pgno; + PAGE *page; +{ + DBT page_dbt; + DB_LSN new_lsn; + int ret; + + if (!LOGGING_ON(dbp->env) || txn == NULL) + return (0); + + memset(&page_dbt, 0, sizeof(page_dbt)); + page_dbt.size = dbp->pgsize; + page_dbt.data = page; + + ret = __crdel_metasub_log(dbp, txn, &new_lsn, F_ISSET(dbp, + DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0, pgno, &page_dbt, lsn); + + if (ret == 0) + page->lsn = new_lsn; + return (ret); +} + +/* + * __db_walk_cursors + * Walk all cursors for a database. + * + * PUBLIC: int __db_walk_cursors __P((DB *, DBC *, + * PUBLIC: int (*) __P((DBC *, DBC *, + * PUBLIC: u_int32_t *, db_pgno_t, u_int32_t, void *)), + * PUBLIC: u_int32_t *, db_pgno_t, u_int32_t, void *)); + */ + int + __db_walk_cursors(dbp, my_dbc, func, countp, pgno, indx, args) + DB *dbp; + DBC *my_dbc; + int (*func)__P((DBC *, DBC *, + u_int32_t *, db_pgno_t, u_int32_t, void *)); + u_int32_t *countp; + db_pgno_t pgno; + u_int32_t indx; + void *args; +{ + ENV *env; + DB *ldbp; + DBC *dbc; + int ret; + + env = dbp->env; + ret = 0; + + MUTEX_LOCK(env, env->mtx_dblist); + FIND_FIRST_DB_MATCH(env, dbp, ldbp); + for (*countp = 0; + ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; + ldbp = TAILQ_NEXT(ldbp, dblistlinks)) { +loop: MUTEX_LOCK(env, ldbp->mutex); + TAILQ_FOREACH(dbc, &ldbp->active_queue, links) + if ((ret = (func)(dbc, my_dbc, + countp, pgno, indx, args)) != 0) + break; + /* + * We use the error to communicate that function + * dropped the mutex. + */ + if (ret == DB_LOCK_NOTGRANTED) + goto loop; + MUTEX_UNLOCK(env, ldbp->mutex); + if (ret != 0) + break; + } + MUTEX_UNLOCK(env, env->mtx_dblist); + return (ret); +} + +/* + * __db_backup_name + * Create the backup file name for a given file. + * + * PUBLIC: int __db_backup_name __P((ENV *, + * PUBLIC: const char *, DB_TXN *, char **)); + */ +#undef BACKUP_PREFIX +#define BACKUP_PREFIX "__db." + +#undef MAX_INT_TO_HEX +#define MAX_INT_TO_HEX 8 + +int +__db_backup_name(env, name, txn, backup) + ENV *env; + const char *name; + DB_TXN *txn; + char **backup; +{ + u_int32_t id; + size_t len; + int ret; + char *p, *retp; + + *backup = NULL; + + /* + * Part of the name may be a full path, so we need to make sure that + * we allocate enough space for it, even in the case where we don't + * use the entire filename for the backup name. + */ + len = strlen(name) + strlen(BACKUP_PREFIX) + 2 * MAX_INT_TO_HEX + 1; + if ((ret = __os_malloc(env, len, &retp)) != 0) + return (ret); + + /* + * Create the name. Backup file names are in one of 2 forms: in a + * transactional env "__db.TXNID.ID", where ID is a random number, + * and in any other env "__db.FILENAME". + * + * In addition, the name passed may contain an env-relative path. + * In that case, put the "__db." in the right place (in the last + * component of the pathname). + * + * There are four cases here: + * 1. simple path w/out transaction + * 2. simple path + transaction + * 3. multi-component path w/out transaction + * 4. multi-component path + transaction + */ + p = __db_rpath(name); + if (IS_REAL_TXN(txn)) { + __os_unique_id(env, &id); + if (p == NULL) /* Case 2. */ + snprintf(retp, len, "%s%x.%x", + BACKUP_PREFIX, txn->txnid, id); + else /* Case 4. */ + snprintf(retp, len, "%.*s%x.%x", + (int)(p - name) + 1, name, txn->txnid, id); + } else { + if (p == NULL) /* Case 1. */ + snprintf(retp, len, "%s%s", BACKUP_PREFIX, name); + else /* Case 3. */ + snprintf(retp, len, "%.*s%s%s", + (int)(p - name) + 1, name, BACKUP_PREFIX, p + 1); + } + + *backup = retp; + return (0); +} + +#ifdef CONFIG_TEST +/* + * __db_testcopy + * Create a copy of all backup files and our "main" DB. + * + * PUBLIC: #ifdef CONFIG_TEST + * PUBLIC: int __db_testcopy __P((ENV *, DB *, const char *)); + * PUBLIC: #endif + */ +int +__db_testcopy(env, dbp, name) + ENV *env; + DB *dbp; + const char *name; +{ + DB_MPOOL *dbmp; + DB_MPOOLFILE *mpf; + + DB_ASSERT(env, dbp != NULL || name != NULL); + + if (name == NULL) { + dbmp = env->mp_handle; + mpf = dbp->mpf; + name = R_ADDR(dbmp->reginfo, mpf->mfp->path_off); + } + + if (dbp != NULL && dbp->type == DB_QUEUE) + return (__qam_testdocopy(dbp, name)); + else +#ifdef HAVE_PARTITION + if (dbp != NULL && DB_IS_PARTITIONED(dbp)) + return (__part_testdocopy(dbp, name)); + else +#endif + return (__db_testdocopy(env, name)); +} + +static int +__qam_testdocopy(dbp, name) + DB *dbp; + const char *name; +{ + DB_THREAD_INFO *ip; + QUEUE_FILELIST *filelist, *fp; + int ret; + char buf[DB_MAXPATHLEN], *dir; + + filelist = NULL; + if ((ret = __db_testdocopy(dbp->env, name)) != 0) + return (ret); + + /* Call ENV_GET_THREAD_INFO to get a valid DB_THREAD_INFO */ + ENV_GET_THREAD_INFO(dbp->env, ip); + if (dbp->mpf != NULL && + (ret = __qam_gen_filelist(dbp, ip, &filelist)) != 0) + goto done; + + if (filelist == NULL) + return (0); + dir = ((QUEUE *)dbp->q_internal)->dir; + for (fp = filelist; fp->mpf != NULL; fp++) { + snprintf(buf, sizeof(buf), + QUEUE_EXTENT, dir, PATH_SEPARATOR[0], name, fp->id); + if ((ret = __db_testdocopy(dbp->env, buf)) != 0) + return (ret); + } + +done: __os_free(dbp->env, filelist); + return (0); +} + +/* + * __db_testdocopy + * Create a copy of all backup files and our "main" DB. + * PUBLIC: int __db_testdocopy __P((ENV *, const char *)); + */ +int +__db_testdocopy(env, name) + ENV *env; + const char *name; +{ + size_t len; + int dircnt, i, ret; + char *copy, **namesp, *p, *real_name; + + dircnt = 0; + copy = NULL; + namesp = NULL; + + /* Create the real backing file name. */ + if ((ret = __db_appname(env, + DB_APP_DATA, name, NULL, &real_name)) != 0) + return (ret); + + /* + * !!! + * There are tests that attempt to copy non-existent files. I'd guess + * it's a testing bug, but I don't have time to figure it out. Block + * the case here. + */ + if (__os_exists(env, real_name, NULL) != 0) { + __os_free(env, real_name); + return (0); + } + + /* + * Copy the file itself. + * + * Allocate space for the file name, including adding an ".afterop" and + * trailing nul byte. + */ + len = strlen(real_name) + sizeof(".afterop"); + if ((ret = __os_malloc(env, len, ©)) != 0) + goto err; + snprintf(copy, len, "%s.afterop", real_name); + if ((ret = __db_makecopy(env, real_name, copy)) != 0) + goto err; + + /* + * Get the directory path to call __os_dirlist(). + */ + if ((p = __db_rpath(real_name)) != NULL) + *p = '\0'; + if ((ret = __os_dirlist(env, real_name, 0, &namesp, &dircnt)) != 0) + goto err; + + /* + * Walk the directory looking for backup files. Backup file names in + * transactional environments are of the form: + * + * BACKUP_PREFIX.TXNID.ID + */ + for (i = 0; i < dircnt; i++) { + /* Check for a related backup file name. */ + if (strncmp( + namesp[i], BACKUP_PREFIX, sizeof(BACKUP_PREFIX) - 1) != 0) + continue; + p = namesp[i] + sizeof(BACKUP_PREFIX); + p += strspn(p, "0123456789ABCDEFabcdef"); + if (*p != '.') + continue; + ++p; + p += strspn(p, "0123456789ABCDEFabcdef"); + if (*p != '\0') + continue; + + /* + * Copy the backup file. + * + * Allocate space for the file name, including adding a + * ".afterop" and trailing nul byte. + */ + if (real_name != NULL) { + __os_free(env, real_name); + real_name = NULL; + } + if ((ret = __db_appname(env, + DB_APP_DATA, namesp[i], NULL, &real_name)) != 0) + goto err; + if (copy != NULL) { + __os_free(env, copy); + copy = NULL; + } + len = strlen(real_name) + sizeof(".afterop"); + if ((ret = __os_malloc(env, len, ©)) != 0) + goto err; + snprintf(copy, len, "%s.afterop", real_name); + if ((ret = __db_makecopy(env, real_name, copy)) != 0) + goto err; + } + +err: if (namesp != NULL) + __os_dirfree(env, namesp, dircnt); + if (copy != NULL) + __os_free(env, copy); + if (real_name != NULL) + __os_free(env, real_name); + return (ret); +} + +static int +__db_makecopy(env, src, dest) + ENV *env; + const char *src, *dest; +{ + DB_FH *rfhp, *wfhp; + size_t rcnt, wcnt; + int ret; + char *buf; + + rfhp = wfhp = NULL; + + if ((ret = __os_malloc(env, 64 * 1024, &buf)) != 0) + goto err; + + if ((ret = __os_open(env, src, 0, + DB_OSO_RDONLY, DB_MODE_600, &rfhp)) != 0) + goto err; + if ((ret = __os_open(env, dest, 0, + DB_OSO_CREATE | DB_OSO_TRUNC, DB_MODE_600, &wfhp)) != 0) + goto err; + + for (;;) { + if ((ret = + __os_read(env, rfhp, buf, sizeof(buf), &rcnt)) != 0) + goto err; + if (rcnt == 0) + break; + if ((ret = + __os_write(env, wfhp, buf, sizeof(buf), &wcnt)) != 0) + goto err; + } + + if (0) { +err: __db_err(env, ret, "__db_makecopy: %s -> %s", src, dest); + } + + if (buf != NULL) + __os_free(env, buf); + if (rfhp != NULL) + (void)__os_closehandle(env, rfhp); + if (wfhp != NULL) + (void)__os_closehandle(env, wfhp); + return (ret); +} +#endif diff --git a/src/db/db.src b/src/db/db.src new file mode 100644 index 00000000..2e8a08e2 --- /dev/null +++ b/src/db/db.src @@ -0,0 +1,431 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +DBPRIVATE +PREFIX __db + +INCLUDE #include "db_int.h" +INCLUDE #include "dbinc/crypto.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_dispatch.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/txn.h" +INCLUDE + +/* + * addrem -- Add or remove an entry from a duplicate page. + * + * opcode: identifies if this is an add or delete. + * fileid: file identifier of the file being modified. + * pgno: duplicate page number. + * indx: location at which to insert or delete. + * nbytes: number of bytes added/removed to/from the page. + * hdr: header for the data item. + * dbt: data that is deleted or is to be added. + * pagelsn: former lsn of the page. + * + * If the hdr was NULL then, the dbt is a regular B_KEYDATA. + * If the dbt was NULL then the hdr is a complete item to be + * pasted on the page. + */ +BEGIN addrem 50 41 +OP opcode u_int32_t lu +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG indx u_int32_t lu +ARG nbytes u_int32_t lu +HDR hdr DBT s +DBT dbt DBT s +POINTER pagelsn DB_LSN * lu +END + +BEGIN_COMPAT addrem 42 41 +ARG opcode u_int32_t lu +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG indx u_int32_t lu +ARG nbytes u_int32_t lu +DBT hdr DBT s +DBT dbt DBT s +POINTER pagelsn DB_LSN * lu +END + +/* + * big -- Handles addition and deletion of big key/data items. + * + * opcode: identifies get/put. + * fileid: file identifier of the file being modified. + * pgno: page onto which data is being added/removed. + * prev_pgno: the page before the one we are logging. + * next_pgno: the page after the one we are logging. + * dbt: data being written onto the page. + * pagelsn: former lsn of the orig_page. + * prevlsn: former lsn of the prev_pgno. + * nextlsn: former lsn of the next_pgno. This is not currently used, but + * may be used later if we actually do overwrites of big key/ + * data items in place. + */ +BEGIN big 50 43 +OP opcode u_int32_t lu +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG prev_pgno db_pgno_t lu +ARG next_pgno db_pgno_t lu +HDR dbt DBT s +POINTER pagelsn DB_LSN * lu +POINTER prevlsn DB_LSN * lu +POINTER nextlsn DB_LSN * lu +END + +BEGIN_COMPAT big 42 43 +ARG opcode u_int32_t lu +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG prev_pgno db_pgno_t lu +ARG next_pgno db_pgno_t lu +DBT dbt DBT s +POINTER pagelsn DB_LSN * lu +POINTER prevlsn DB_LSN * lu +POINTER nextlsn DB_LSN * lu +END + +/* + * ovref -- Handles increment/decrement of overflow page reference count. + * + * fileid: identifies the file being modified. + * pgno: page number whose ref count is being incremented/decremented. + * adjust: the adjustment being made. + * lsn: the page's original lsn. + */ +BEGIN ovref 42 44 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG adjust int32_t ld +POINTER lsn DB_LSN * lu +END + +/* + * relink -- Handles relinking around a page. + * + * opcode: indicates if this is an addpage or delete page + * pgno: the page being changed. + * lsn the page's original lsn. + * prev: the previous page. + * lsn_prev: the previous page's original lsn. + * next: the next page. + * lsn_next: the previous page's original lsn. + */ +BEGIN_COMPAT relink 42 45 +ARG opcode u_int32_t lu +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER lsn DB_LSN * lu +ARG prev db_pgno_t lu +POINTER lsn_prev DB_LSN * lu +ARG next db_pgno_t lu +POINTER lsn_next DB_LSN * lu +END + +/* + * Debug -- log an operation upon entering an access method. + * op: Operation (cursor, c_close, c_get, c_put, c_del, + * get, put, delete). + * fileid: identifies the file being acted upon. + * key: key paramater + * data: data parameter + * flags: flags parameter + */ +BEGIN debug 42 47 +DBT op DBT s +ARG fileid int32_t ld +DBT key DBT s +DBT data DBT s +ARG arg_flags u_int32_t lu +END + +/* + * noop -- do nothing, but get an LSN. + */ +BEGIN noop 42 48 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER prevlsn DB_LSN * lu +END + +/* + * pg_alloc: used to record allocating a new page. + * + * meta_lsn: the original lsn of the page reference by meta_pgno. + * meta_pgno the page pointing at the allocated page in the free list. + * If the list is unsorted this is the metadata page. + * page_lsn: the allocated page's original lsn. + * pgno: the page allocated. + * ptype: the type of the page allocated. + * next: the next page on the free list. + * last_pgno: the last page in the file after this op (4.3+). + */ +BEGIN_COMPAT pg_alloc 42 49 +DB fileid int32_t ld +POINTER meta_lsn DB_LSN * lu +ARG meta_pgno db_pgno_t lu +POINTER page_lsn DB_LSN * lu +ARG pgno db_pgno_t lu +ARG ptype u_int32_t lu +ARG next db_pgno_t lu +END + +BEGIN pg_alloc 43 49 +DB fileid int32_t ld +POINTER meta_lsn DB_LSN * lu +ARG meta_pgno db_pgno_t lu +POINTER page_lsn DB_LSN * lu +ARG pgno db_pgno_t lu +ARG ptype u_int32_t lu +ARG next db_pgno_t lu +ARG last_pgno db_pgno_t lu +END + +/* + * pg_free: used to record freeing a page. + * If we are maintaining a sorted free list (during compact) meta_pgno + * will be non-zero and refer to the page that preceeds the one we are freeing + * in the free list. Meta_lsn will then be the lsn of that page. + * + * pgno: the page being freed. + * meta_lsn: the meta-data page's original lsn. + * meta_pgno: the meta-data page number. + * header: the header from the free'd page. + * next: the previous next pointer on the metadata page. + * last_pgno: the last page in the file before this op (4.3+). + */ +BEGIN_COMPAT pg_free 42 50 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER meta_lsn DB_LSN * lu +ARG meta_pgno db_pgno_t lu +PGDBT header DBT s +ARG next db_pgno_t lu +END + +BEGIN pg_free 43 50 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER meta_lsn DB_LSN * lu +ARG meta_pgno db_pgno_t lu +PGDBT header DBT s +ARG next db_pgno_t lu +ARG last_pgno db_pgno_t lu +END + +/* + * cksum -- + * This log record is written when we're unable to checksum a page, + * before returning DB_RUNRECOVERY. This log record causes normal + * recovery to itself return DB_RUNRECOVERY, as only catastrophic + * recovery can fix things. + */ +BEGIN cksum 42 51 +END + +/* + * pg_freedata: used to record freeing a page with data on it. + * + * pgno: the page being freed. + * meta_lsn: the meta-data page's original lsn. + * meta_pgno: the meta-data page number. + * header: the header and index entries from the free'd page. + * data: the data from the free'd page. + * next: the previous next pointer on the metadata page. + * last_pgno: the last page in the file before this op (4.3+). + */ +BEGIN_COMPAT pg_freedata 42 52 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER meta_lsn DB_LSN * lu +ARG meta_pgno db_pgno_t lu +PGDBT header DBT s +ARG next db_pgno_t lu +PGDDBT data DBT s +END + +BEGIN pg_freedata 43 52 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER meta_lsn DB_LSN * lu +ARG meta_pgno db_pgno_t lu +PGDBT header DBT s +ARG next db_pgno_t lu +ARG last_pgno db_pgno_t lu +PGDDBT data DBT s +END + +/* + * pg_prepare: used to record an aborted page in a prepared transaction. + * + * pgno: the page being freed. + */ +X BEGIN pg_prepare 42 53 +X DB fileid int32_t ld +X ARG pgno db_pgno_t lu +X END + +/* + * pg_new: used to record a new page put on the free list. + * + * pgno: the page being freed. + * meta_lsn: the meta-data page's original lsn. + * meta_pgno: the meta-data page number. + * header: the header from the free'd page. + * next: the previous next pointer on the metadata page. + */ +X BEGIN pg_new 42 54 +X DB fileid int32_t ld +X ARG pgno db_pgno_t lu +X POINTER meta_lsn DB_LSN * lu +X ARG meta_pgno db_pgno_t lu +X PGDBT header DBT s +X ARG next db_pgno_t lu +X END + +/* + * pg_init: used to reinitialize a page during truncate. + * + * pgno: the page being initialized. + * header: the header from the page. + * data: data that used to be on the page. + */ +BEGIN pg_init 43 60 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +PGDBT header DBT s +PGDDBT data DBT s +END + +/* + * pg_sort: sort the free list + * + * meta: meta page number + * meta_lsn: lsn on meta page. + * last_free: page number of new last free page. + * last_lsn; lsn of last free page. + * last_pgno: current last page number. + * list: list of pages and lsns to sort. + */ +BEGIN_COMPAT pg_sort 44 61 +DB fileid int32_t ld +ARG meta db_pgno_t lu +POINTER meta_lsn DB_LSN * lu +ARG last_free db_pgno_t lu +POINTER last_lsn DB_LSN * lu +ARG last_pgno db_pgno_t lu +DBT list DBT s +END + + +/* + * pg_truc: truncate the free list + * + * meta: meta page number + * meta_lsn: lsn on meta page. + * last_free: page number of new last free page. + * last_lsn; lsn of last free page. + * last_pgno: current last page number. + * list: list of pages and lsns on free list. + */ +BEGIN pg_trunc 50 66 +DB fileid int32_t ld +ARG meta db_pgno_t lu +POINTER meta_lsn DB_LSN * lu +ARG last_free db_pgno_t lu +POINTER last_lsn DB_LSN * lu +ARG next_free db_pgno_t lu +ARG last_pgno db_pgno_t lu +PGLIST list DBT s +END + +/* + * realloc: allocate a range of pages from the free list + * prev_pgno: page number of the page preceeding the set of pages to + * be allocated + * prev_lsn LSN from the prev_pgno page + * next_free page number of the page immediately following the set + * of pages to be allocated + * ptype The type of page being allocated + * list: pairs of page numbers and LSNs corresponding to the pages on + * the free list that are being reallocated + */ +BEGIN realloc 50 36 +DB fileid int32_t ld +ARG prev_pgno db_pgno_t lu +POINTER page_lsn DB_LSN * lu +ARG next_free db_pgno_t lu +ARG ptype u_int32_t lu +PGLIST list DBT s +END + +/* + * relink: relink next and previous page pointers + * NOTE: moved from btree so its number is from that range. + * pgno: The page being removed. + * new_pgno: The new page number, if any. + * prev_pgno: The previous page, if any. + * lsn_prev: The previous page's original lsn. + * next_pgno: The next page, if any. + * lsn_next: The previous page's original lsn. + */ +BEGIN relink 44 147 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG new_pgno db_pgno_t lu +ARG prev_pgno db_pgno_t lu +POINTER lsn_prev DB_LSN * lu +ARG next_pgno db_pgno_t lu +POINTER lsn_next DB_LSN * lu +END + +/* + * Merge: merge two pages. + * NOTE: moved from btree so its number is from that range. + * pgno: The page number of the target page. + * lsn: Orignial LSN of the page. + * npgno: The page number of the next, or merged, page. + * nlsn: The LSN of hte next page. + * hdr: The page header of the next page. + * data: The data from the next page. + * pg_copy: If 1, then the whole page was copied. + */ +BEGIN merge 47 148 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER lsn DB_LSN * lu +ARG npgno db_pgno_t lu +POINTER nlsn DB_LSN * lu +PGDBT hdr DBT s +PGDDBT data DBT s +ARG pg_copy int32_t lu +END + + +/* + * pgno -- Handles replacing a page number in a record + * reference on pgno by indx. + * NOTE: moved from btree so its number is from that range. + * pgno: The page that is being updated. + * lsn: The LSN of the page. + * indx: The index of the record being updated. + * opgno: Old page number. + * npgno: New page number. + */ +BEGIN pgno 44 149 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER lsn DB_LSN * lu +ARG indx u_int32_t lu +ARG opgno db_pgno_t lu +ARG npgno db_pgno_t lu +END diff --git a/src/db/db_am.c b/src/db/db_am.c new file mode 100644 index 00000000..da2251cc --- /dev/null +++ b/src/db/db_am.c @@ -0,0 +1,1098 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +static int __db_secondary_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); +static int __dbc_set_priority __P((DBC *, DB_CACHE_PRIORITY)); +static int __dbc_get_priority __P((DBC *, DB_CACHE_PRIORITY* )); + +/* + * __db_cursor_int -- + * Internal routine to create a cursor. + * + * PUBLIC: int __db_cursor_int __P((DB *, DB_THREAD_INFO *, + * PUBLIC: DB_TXN *, DBTYPE, db_pgno_t, int, DB_LOCKER *, DBC **)); + */ +int +__db_cursor_int(dbp, ip, txn, dbtype, root, flags, locker, dbcp) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DBTYPE dbtype; + db_pgno_t root; + int flags; + DB_LOCKER *locker; + DBC **dbcp; +{ + DBC *dbc; + DBC_INTERNAL *cp; + ENV *env; + db_threadid_t tid; + int allocated, envlid, ret; + pid_t pid; + + env = dbp->env; + allocated = envlid = 0; + + /* + * If dbcp is non-NULL it is assumed to point to an area to initialize + * as a cursor. + * + * Take one from the free list if it's available. Take only the + * right type. With off page dups we may have different kinds + * of cursors on the queue for a single database. + */ + MUTEX_LOCK(env, dbp->mutex); + +#ifndef HAVE_NO_DB_REFCOUNT + /* + * If this DBP is being logged then refcount the log filename + * relative to this transaction. We do this here because we have + * the dbp->mutex which protects the refcount. We want to avoid + * calling the function if the transaction handle has a shared parent + * locker or we are duplicating a cursor. This includes the case of + * creating an off page duplicate cursor. + * If we knew this cursor will not be used in an update, we could avoid + * this, but we don't have that information. + */ + if (IS_REAL_TXN(txn) && + !LF_ISSET(DBC_OPD | DBC_DUPLICATE) && + !F_ISSET(dbp, DB_AM_RECOVER) && + dbp->log_filename != NULL && !IS_REP_CLIENT(env) && + (ret = __txn_record_fname(env, txn, dbp->log_filename)) != 0) { + MUTEX_UNLOCK(env, dbp->mutex); + return (ret); + } + +#endif + + TAILQ_FOREACH(dbc, &dbp->free_queue, links) + if (dbtype == dbc->dbtype) { + TAILQ_REMOVE(&dbp->free_queue, dbc, links); + F_CLR(dbc, ~DBC_OWN_LID); + break; + } + MUTEX_UNLOCK(env, dbp->mutex); + + if (dbc == NULL) { + if ((ret = __os_calloc(env, 1, sizeof(DBC), &dbc)) != 0) + return (ret); + allocated = 1; + dbc->flags = 0; + + dbc->dbp = dbp; + dbc->dbenv = dbp->dbenv; + dbc->env = dbp->env; + + /* Set up locking information. */ + if (LOCKING_ON(env)) { + /* + * If we are not threaded, we share a locker ID among + * all cursors opened in the environment handle, + * allocating one if this is the first cursor. + * + * This relies on the fact that non-threaded DB handles + * always have non-threaded environment handles, since + * we set DB_THREAD on DB handles created with threaded + * environment handles. + */ + if (!DB_IS_THREADED(dbp)) { + if (env->env_lref == NULL) { + if ((ret = __lock_id(env, + NULL, &env->env_lref)) != 0) + goto err; + envlid = 1; + } + dbc->lref = env->env_lref; + } + + /* + * In CDB, secondary indices should share a lock file + * ID with the primary; otherwise we're susceptible + * to deadlocks. We also use __db_cursor_int rather + * than __db_cursor to create secondary update cursors + * in c_put and c_del; these won't acquire a new lock. + * + * !!! + * Since this is in the one-time cursor allocation + * code, we need to be sure to destroy, not just + * close, all cursors in the secondary when we + * associate. + */ + if (CDB_LOCKING(env) && + F_ISSET(dbp, DB_AM_SECONDARY)) + memcpy(dbc->lock.fileid, + dbp->s_primary->fileid, DB_FILE_ID_LEN); + else + memcpy(dbc->lock.fileid, + dbp->fileid, DB_FILE_ID_LEN); + + if (CDB_LOCKING(env)) { + if (F_ISSET(env->dbenv, DB_ENV_CDB_ALLDB)) { + /* + * If we are doing a single lock per + * environment, set up the global + * lock object just like we do to + * single thread creates. + */ + DB_ASSERT(env, sizeof(db_pgno_t) == + sizeof(u_int32_t)); + dbc->lock_dbt.size = sizeof(u_int32_t); + dbc->lock_dbt.data = &dbc->lock.pgno; + dbc->lock.pgno = 0; + } else { + dbc->lock_dbt.size = DB_FILE_ID_LEN; + dbc->lock_dbt.data = dbc->lock.fileid; + } + } else { + dbc->lock.type = DB_PAGE_LOCK; + dbc->lock_dbt.size = sizeof(dbc->lock); + dbc->lock_dbt.data = &dbc->lock; + } + } + /* Init the DBC internal structure. */ +#ifdef HAVE_PARTITION + if (DB_IS_PARTITIONED(dbp)) { + if ((ret = __partc_init(dbc)) != 0) + goto err; + } else +#endif + switch (dbtype) { + case DB_BTREE: + case DB_RECNO: + if ((ret = __bamc_init(dbc, dbtype)) != 0) + goto err; + break; + case DB_HASH: + if ((ret = __hamc_init(dbc)) != 0) + goto err; + break; + case DB_HEAP: + if ((ret = __heapc_init(dbc)) != 0) + goto err; + break; + case DB_QUEUE: + if ((ret = __qamc_init(dbc)) != 0) + goto err; + break; + case DB_UNKNOWN: + default: + ret = __db_unknown_type(env, "DB->cursor", dbtype); + goto err; + } + + cp = dbc->internal; + } + + /* Refresh the DBC structure. */ + dbc->dbtype = dbtype; + RESET_RET_MEM(dbc); + dbc->set_priority = __dbc_set_priority; + dbc->get_priority = __dbc_get_priority; + dbc->priority = dbp->priority; + dbc->txn_cursors.tqe_next = NULL; + dbc->txn_cursors.tqe_prev = NULL; + + /* + * If the DB handle is not threaded, there is one locker ID for the + * whole environment. There should only one family transaction active + * as well. This doesn't apply to CDS group transactions, where the + * cursor can simply use the transaction's locker directly. + */ + if (!CDB_LOCKING(env) && txn != NULL && F_ISSET(txn, TXN_FAMILY) && + (F_ISSET(dbc, DBC_OWN_LID) || dbc->lref == NULL || envlid)) { + if (LOCKING_ON(env)) { + if (dbc->lref == NULL) { + if ((ret = + __lock_id(env, NULL, &dbc->lref)) != 0) + goto err; + F_SET(dbc, DBC_OWN_LID); + } + if ((ret = __lock_addfamilylocker(env, + txn->txnid, dbc->lref->id, 1)) != 0) + goto err; + } + F_SET(dbc, DBC_FAMILY); + txn = NULL; + } + + if ((dbc->txn = txn) != NULL) + dbc->locker = txn->locker; + else if (LOCKING_ON(env)) { + /* + * There are certain cases in which we want to create a + * new cursor with a particular locker ID that is known + * to be the same as (and thus not conflict with) an + * open cursor. + * + * The most obvious case is cursor duplication; when we + * call DBC->dup or __dbc_idup, we want to use the original + * cursor's locker ID. + * + * Another case is when updating secondary indices. Standard + * CDB locking would mean that we might block ourself: we need + * to open an update cursor in the secondary while an update + * cursor in the primary is open, and when the secondary and + * primary are subdatabases or we're using env-wide locking, + * this is disastrous. + * + * In these cases, our caller will pass a nonzero locker + * ID into this function. Use this locker ID instead of + * the default as the locker ID for our new cursor. + */ + if (locker != NULL) + dbc->locker = locker; + else if (LF_ISSET(DB_RECOVER)) + dbc->locker = NULL; + else { + if (dbc->lref == NULL) { + if ((ret = + __lock_id(env, NULL, &dbc->lref)) != 0) + goto err; + F_SET(dbc, DBC_OWN_LID); + } + /* + * If we are threaded then we need to set the + * proper thread id into the locker. + */ + if (DB_IS_THREADED(dbp)) { + env->dbenv->thread_id(env->dbenv, &pid, &tid); + __lock_set_thread_id(dbc->lref, pid, tid); + } + dbc->locker = dbc->lref; + } + } + + /* + * These fields change when we are used as a secondary index, so + * if the DB is a secondary, make sure they're set properly just + * in case we opened some cursors before we were associated. + * + * __dbc_get is used by all access methods, so this should be safe. + */ + if (F_ISSET(dbp, DB_AM_SECONDARY)) + dbc->get = dbc->c_get = __dbc_secondary_get_pp; + + /* + * Don't enable bulk for btrees with record numbering, since avoiding + * a full search avoids taking write locks necessary to maintain + * consistent numbering. + */ + if (LF_ISSET(DB_CURSOR_BULK) && dbtype == DB_BTREE && + !F_ISSET(dbp, DB_AM_RECNUM)) + F_SET(dbc, DBC_BULK); + if (LF_ISSET(DB_CURSOR_TRANSIENT)) + F_SET(dbc, DBC_TRANSIENT); + if (LF_ISSET(DBC_OPD)) + F_SET(dbc, DBC_OPD); + if (F_ISSET(dbp, DB_AM_RECOVER) || LF_ISSET(DB_RECOVER)) + F_SET(dbc, DBC_RECOVER); + if (F_ISSET(dbp, DB_AM_COMPENSATE)) + F_SET(dbc, DBC_DONTLOCK); +#ifdef HAVE_REPLICATION + /* + * If we are replicating from a down rev version then we must + * use old locking protocols. + */ + if (LOGGING_ON(env) && + ((LOG *)env->lg_handle-> + reginfo.primary)->persist.version < DB_LOGVERSION_LATCHING) + F_SET(dbc, DBC_DOWNREV); +#endif + + /* Refresh the DBC internal structure. */ + cp = dbc->internal; + cp->opd = NULL; + cp->pdbc = NULL; + + cp->indx = 0; + cp->page = NULL; + cp->pgno = PGNO_INVALID; + cp->root = root; + cp->stream_start_pgno = cp->stream_curr_pgno = PGNO_INVALID; + cp->stream_off = 0; + + if (DB_IS_PARTITIONED(dbp)) { + DBC_PART_REFRESH(dbc); + } else switch (dbtype) { + case DB_BTREE: + case DB_RECNO: + if ((ret = __bamc_refresh(dbc)) != 0) + goto err; + break; + case DB_HEAP: + if ((ret = __heapc_refresh(dbc)) != 0) + goto err; + break; + case DB_HASH: + case DB_QUEUE: + break; + case DB_UNKNOWN: + default: + ret = __db_unknown_type(env, "DB->cursor", dbp->type); + goto err; + } + + /* + * The transaction keeps track of how many cursors were opened within + * it to catch application errors where the cursor isn't closed when + * the transaction is resolved. + */ + if (txn != NULL) + ++txn->cursors; + if (ip != NULL) { + dbc->thread_info = ip; +#ifdef DIAGNOSTIC + if (dbc->locker != NULL) + ip->dbth_locker = + R_OFFSET(&(env->lk_handle->reginfo), dbc->locker); + else + ip->dbth_locker = INVALID_ROFF; +#endif + } else if (txn != NULL) + dbc->thread_info = txn->thread_info; + else + ENV_GET_THREAD_INFO(env, dbc->thread_info); + + MUTEX_LOCK(env, dbp->mutex); + TAILQ_INSERT_TAIL(&dbp->active_queue, dbc, links); + F_SET(dbc, DBC_ACTIVE); + MUTEX_UNLOCK(env, dbp->mutex); + + *dbcp = dbc; + return (0); + +err: if (allocated) + __os_free(env, dbc); + return (ret); +} + +/* + * __db_put -- + * Store a key/data pair. + * + * PUBLIC: int __db_put __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, u_int32_t)); + */ +int +__db_put(dbp, ip, txn, key, data, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DBT *key, *data; + u_int32_t flags; +{ + DBC *dbc; + DBT tdata, tkey; + ENV *env; + void *bulk_kptr, *bulk_ptr; + db_recno_t recno; + u_int32_t cursor_flags; + int ret, t_ret; + + env = dbp->env; + + /* + * See the comment in __db_get() regarding DB_CURSOR_TRANSIENT. + * + * Note that the get in the DB_NOOVERWRITE case is safe to do with this + * flag set; if it errors in any way other than DB_NOTFOUND, we're + * going to close the cursor without doing anything else, and if it + * returns DB_NOTFOUND then it's safe to do a c_put(DB_KEYLAST) even if + * an access method moved the cursor, since that's not + * position-dependent. + */ + cursor_flags = DB_WRITELOCK; + if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) + cursor_flags |= DB_CURSOR_BULK; + else + cursor_flags |= DB_CURSOR_TRANSIENT; + if ((ret = __db_cursor(dbp, ip, txn, &dbc, cursor_flags)) != 0) + return (ret); + + DEBUG_LWRITE(dbc, txn, "DB->put", key, data, flags); + PERFMON6(env, db, put, dbp->fname, + dbp->dname, txn == NULL ? 0 : txn->txnid, key, data, flags); + + SET_RET_MEM(dbc, dbp); + + if (flags == DB_APPEND && !DB_IS_PRIMARY(dbp)) { + /* + * If there is an append callback, the value stored in + * data->data may be replaced and then freed. To avoid + * passing a freed pointer back to the user, just operate + * on a copy of the data DBT. + */ + tdata = *data; + + /* + * Append isn't a normal put operation; call the appropriate + * access method's append function. + */ + switch (dbp->type) { + case DB_HEAP: + if ((ret = __heap_append(dbc, key, &tdata)) != 0) + goto err; + break; + case DB_QUEUE: + if ((ret = __qam_append(dbc, key, &tdata)) != 0) + goto err; + break; + case DB_RECNO: + if ((ret = __ram_append(dbc, key, &tdata)) != 0) + goto err; + break; + case DB_BTREE: + case DB_HASH: + case DB_UNKNOWN: + default: + /* The interface should prevent this. */ + DB_ASSERT(env, + dbp->type == DB_QUEUE || dbp->type == DB_RECNO); + + ret = __db_ferr(env, "DB->put", 0); + goto err; + } + + /* + * The append callback, if one exists, may have allocated + * a new tdata.data buffer. If so, free it. + */ + FREE_IF_NEEDED(env, &tdata); + + /* No need for a cursor put; we're done. */ +#ifdef HAVE_COMPRESSION + } else if (DB_IS_COMPRESSED(dbp) && !F_ISSET(dbp, DB_AM_SECONDARY) && + !DB_IS_PRIMARY(dbp) && LIST_FIRST(&dbp->f_primaries) == NULL) { + ret = __dbc_put(dbc, key, data, flags); +#endif + } else if (LF_ISSET(DB_MULTIPLE)) { + ret = 0; + memset(&tkey, 0, sizeof(tkey)); + if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) { + tkey.data = &recno; + tkey.size = sizeof(recno); + } + memset(&tdata, 0, sizeof(tdata)); + DB_MULTIPLE_INIT(bulk_kptr, key); + DB_MULTIPLE_INIT(bulk_ptr, data); + key->doff = 0; + while (ret == 0) { + if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) + DB_MULTIPLE_RECNO_NEXT(bulk_kptr, key, + recno, tdata.data, tdata.size); + else + DB_MULTIPLE_NEXT(bulk_kptr, key, + tkey.data, tkey.size); + DB_MULTIPLE_NEXT(bulk_ptr, data, + tdata.data, tdata.size); + if (bulk_kptr == NULL || bulk_ptr == NULL) + break; + ret = __dbc_put(dbc, &tkey, &tdata, + LF_ISSET(DB_OPFLAGS_MASK)); + if (ret == 0) + ++key->doff; + } + } else if (LF_ISSET(DB_MULTIPLE_KEY)) { + ret = 0; + memset(&tkey, 0, sizeof(tkey)); + if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) { + tkey.data = &recno; + tkey.size = sizeof(recno); + } + memset(&tdata, 0, sizeof(tdata)); + DB_MULTIPLE_INIT(bulk_ptr, key); + while (ret == 0) { + if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) + DB_MULTIPLE_RECNO_NEXT(bulk_ptr, key, recno, + tdata.data, tdata.size); + else + DB_MULTIPLE_KEY_NEXT(bulk_ptr, key, tkey.data, + tkey.size, tdata.data, tdata.size); + if (bulk_ptr == NULL) + break; + ret = __dbc_put(dbc, &tkey, &tdata, + LF_ISSET(DB_OPFLAGS_MASK)); + if (ret == 0) + ++key->doff; + } + } else + ret = __dbc_put(dbc, key, data, flags); + +err: /* Close the cursor. */ + if (!DB_RETOK_DBPUT(ret)) + F_SET(dbc, DBC_ERROR); + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_del -- + * Delete the items referenced by a key. + * + * PUBLIC: int __db_del __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, u_int32_t)); + */ +int +__db_del(dbp, ip, txn, key, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DBT *key; + u_int32_t flags; +{ + DBC *dbc; + DBT data, tkey; + void *bulk_ptr; + db_recno_t recno; + u_int32_t cursor_flags, f_init, f_next; + int ret, t_ret; + + COMPQUIET(bulk_ptr, NULL); + /* Allocate a cursor. */ + cursor_flags = DB_WRITELOCK; + if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) + cursor_flags |= DB_CURSOR_BULK; + if ((ret = __db_cursor(dbp, ip, txn, &dbc, cursor_flags)) != 0) + return (ret); + + DEBUG_LWRITE(dbc, txn, "DB->del", key, NULL, flags); + PERFMON5(env, db, del, + dbp->fname, dbp->dname, txn == NULL ? 0 : txn->txnid, key, flags); + +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbp) && !F_ISSET(dbp, DB_AM_SECONDARY) && + !DB_IS_PRIMARY(dbp) && LIST_FIRST(&dbp->f_primaries) == NULL) { + F_SET(dbc, DBC_TRANSIENT); + ret = __dbc_bulk_del(dbc, key, flags); + goto err; + } +#endif + + /* + * Walk a cursor through the key/data pairs, deleting as we go. Set + * the DB_DBT_USERMEM flag, as this might be a threaded application + * and the flags checking will catch us. We don't actually want the + * keys or data, set DB_DBT_ISSET. We rely on __dbc_get to clear + * this. + */ + memset(&data, 0, sizeof(data)); + F_SET(&data, DB_DBT_USERMEM); + tkey = *key; + + f_init = LF_ISSET(DB_MULTIPLE_KEY) ? DB_GET_BOTH : DB_SET; + f_next = DB_NEXT_DUP; + + /* + * If locking (and we haven't already acquired CDB locks), set the + * read-modify-write flag. + */ + if (STD_LOCKING(dbc)) { + f_init |= DB_RMW; + f_next |= DB_RMW; + } + + if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { + if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) { + memset(&tkey, 0, sizeof(tkey)); + tkey.data = &recno; + tkey.size = sizeof(recno); + } + DB_MULTIPLE_INIT(bulk_ptr, key); + /* We return the number of keys deleted in doff. */ + key->doff = 0; +bulk_next: if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) + DB_MULTIPLE_RECNO_NEXT(bulk_ptr, key, + recno, data.data, data.size); + else if (LF_ISSET(DB_MULTIPLE)) + DB_MULTIPLE_NEXT(bulk_ptr, key, tkey.data, tkey.size); + else + DB_MULTIPLE_KEY_NEXT(bulk_ptr, key, + tkey.data, tkey.size, data.data, data.size); + if (bulk_ptr == NULL) + goto err; + } + + /* We're not interested in the data -- do not return it. */ + F_SET(&tkey, DB_DBT_ISSET); + F_SET(&data, DB_DBT_ISSET); + + /* + * Optimize the simple cases. For all AMs if we don't have secondaries + * and are not a secondary and we aren't a foreign database and there + * are no dups then we can avoid a bunch of overhead. For queue we + * don't need to fetch the record since we delete by direct calculation + * from the record number. + * + * Hash permits an optimization in DB->del: since on-page duplicates are + * stored in a single HKEYDATA structure, it's possible to delete an + * entire set of them at once, and as the HKEYDATA has to be rebuilt + * and re-put each time it changes, this is much faster than deleting + * the duplicates one by one. Thus, if not pointing at an off-page + * duplicate set, and we're not using secondary indices (in which case + * we'd have to examine the items one by one anyway), let hash do this + * "quick delete". + * + * !!! + * Note that this is the only application-executed delete call in + * Berkeley DB that does not go through the __dbc_del function. + * If anything other than the delete itself (like a secondary index + * update) has to happen there in a particular situation, the + * conditions here should be modified not to use these optimizations. + * The ordinary AM-independent alternative will work just fine; + * it'll just be slower. + */ + if (!F_ISSET(dbp, DB_AM_SECONDARY) && !DB_IS_PRIMARY(dbp) && + LIST_FIRST(&dbp->f_primaries) == NULL) { +#ifdef HAVE_QUEUE + if (dbp->type == DB_QUEUE) { + ret = __qam_delete(dbc, &tkey, flags); + goto next; + } +#endif + + /* Fetch the first record. */ + if ((ret = __dbc_get(dbc, &tkey, &data, f_init)) != 0) + goto err; + +#ifdef HAVE_HASH + /* + * Hash "quick delete" removes all on-page duplicates. We + * can't do that if deleting specific key/data pairs. + */ + if (dbp->type == DB_HASH && !LF_ISSET(DB_MULTIPLE_KEY)) { + DBC *sdbc; + sdbc = dbc; +#ifdef HAVE_PARTITION + if (F_ISSET(dbc, DBC_PARTITIONED)) + sdbc = + ((PART_CURSOR*)dbc->internal)->sub_cursor; +#endif + if (sdbc->internal->opd == NULL) { + ret = __ham_quick_delete(sdbc); + goto next; + } + } +#endif + + if (!F_ISSET(dbp, DB_AM_DUP)) { + ret = dbc->am_del(dbc, 0); + goto next; + } + } else if ((ret = __dbc_get(dbc, &tkey, &data, f_init)) != 0) + goto err; + + /* Walk through the set of key/data pairs, deleting as we go. */ + for (;;) { + if ((ret = __dbc_del(dbc, flags)) != 0) + break; + /* + * With DB_MULTIPLE_KEY, the application has specified the + * exact records they want deleted. We don't need to walk + * through a set of duplicates. + */ + if (LF_ISSET(DB_MULTIPLE_KEY)) + break; + + F_SET(&tkey, DB_DBT_ISSET); + F_SET(&data, DB_DBT_ISSET); + if ((ret = __dbc_get(dbc, &tkey, &data, f_next)) != 0) { + if (ret == DB_NOTFOUND) + ret = 0; + break; + } + } + +next: if (ret == 0 && LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { + ++key->doff; + goto bulk_next; + } +err: /* Discard the cursor. */ + if (!DB_RETOK_DBDEL(ret)) + F_SET(dbc, DBC_ERROR); + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_sync -- + * Flush the database cache. + * + * PUBLIC: int __db_sync __P((DB *)); + */ +int +__db_sync(dbp) + DB *dbp; +{ + int ret, t_ret; + + ret = 0; + + /* If the database was read-only, we're done. */ + if (F_ISSET(dbp, DB_AM_RDONLY)) + return (0); + + /* If it's a Recno tree, write the backing source text file. */ + if (dbp->type == DB_RECNO) + ret = __ram_writeback(dbp); + + /* If the database was never backed by a database file, we're done. */ + if (F_ISSET(dbp, DB_AM_INMEM)) + return (ret); +#ifdef HAVE_PARTITION + if (DB_IS_PARTITIONED(dbp)) + ret = __partition_sync(dbp); + else +#endif + if (dbp->type == DB_QUEUE) + ret = __qam_sync(dbp); + else + /* Flush any dirty pages from the cache to the backing file. */ + if ((t_ret = __memp_fsync(dbp->mpf)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_associate -- + * Associate another database as a secondary index to this one. + * + * PUBLIC: int __db_associate __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB *, + * PUBLIC: int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t)); + */ +int +__db_associate(dbp, ip, txn, sdbp, callback, flags) + DB *dbp, *sdbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + int (*callback) __P((DB *, const DBT *, const DBT *, DBT *)); + u_int32_t flags; +{ + DBC *pdbc, *sdbc; + DBT key, data, skey, *tskeyp; + ENV *env; + int build, ret, t_ret; + u_int32_t nskey; + + env = dbp->env; + pdbc = sdbc = NULL; + ret = 0; + + memset(&skey, 0, sizeof(DBT)); + nskey = 0; + tskeyp = NULL; + + /* + * Check to see if the secondary is empty -- and thus if we should + * build it -- before we link it in and risk making it show up in other + * threads. Do this first so that the databases remain unassociated on + * error. + */ + build = 0; + if (LF_ISSET(DB_CREATE)) { + FLD_SET(sdbp->s_assoc_flags, DB_ASSOC_CREATE); + + if ((ret = __db_cursor(sdbp, ip, txn, &sdbc, 0)) != 0) + goto err; + + /* + * We don't care about key or data; we're just doing + * an existence check. + */ + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + F_SET(&key, DB_DBT_PARTIAL | DB_DBT_USERMEM); + F_SET(&data, DB_DBT_PARTIAL | DB_DBT_USERMEM); + if ((ret = __dbc_get(sdbc, &key, &data, + (STD_LOCKING(sdbc) ? DB_RMW : 0) | + DB_FIRST)) == DB_NOTFOUND) { + build = 1; + ret = 0; + } + + if (ret != 0) + F_SET(sdbc, DBC_ERROR); + if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0) + ret = t_ret; + + /* Reset for later error check. */ + sdbc = NULL; + + if (ret != 0) + goto err; + } + + /* + * Set up the database handle as a secondary. + */ + sdbp->s_callback = callback; + sdbp->s_primary = dbp; + + sdbp->stored_get = sdbp->get; + sdbp->get = __db_secondary_get; + + sdbp->stored_close = sdbp->close; + sdbp->close = __db_secondary_close_pp; + + F_SET(sdbp, DB_AM_SECONDARY); + + if (LF_ISSET(DB_IMMUTABLE_KEY)) + FLD_SET(sdbp->s_assoc_flags, DB_ASSOC_IMMUTABLE_KEY); + + /* + * Add the secondary to the list on the primary. Do it here + * so that we see any updates that occur while we're walking + * the primary. + */ + MUTEX_LOCK(env, dbp->mutex); + + /* See __db_s_next for an explanation of secondary refcounting. */ + DB_ASSERT(env, sdbp->s_refcnt == 0); + sdbp->s_refcnt = 1; + LIST_INSERT_HEAD(&dbp->s_secondaries, sdbp, s_links); + MUTEX_UNLOCK(env, dbp->mutex); + + if (build) { + /* + * We loop through the primary, putting each item we + * find into the new secondary. + * + * If we're using CDB, opening these two cursors puts us + * in a bit of a locking tangle: CDB locks are done on the + * primary, so that we stay deadlock-free, but that means + * that updating the secondary while we have a read cursor + * open on the primary will self-block. To get around this, + * we force the primary cursor to use the same locker ID + * as the secondary, so they won't conflict. This should + * be harmless even if we're not using CDB. + */ + if ((ret = __db_cursor(sdbp, ip, txn, &sdbc, + CDB_LOCKING(sdbp->env) ? DB_WRITECURSOR : 0)) != 0) + goto err; + if ((ret = __db_cursor_int(dbp, ip, + txn, dbp->type, PGNO_INVALID, 0, sdbc->locker, &pdbc)) != 0) + goto err; + + /* Lock out other threads, now that we have a locker. */ + dbp->associate_locker = sdbc->locker; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + while ((ret = __dbc_get(pdbc, &key, &data, DB_NEXT)) == 0) { + if ((ret = callback(sdbp, &key, &data, &skey)) != 0) { + if (ret == DB_DONOTINDEX) + continue; + goto err; + } + if (F_ISSET(&skey, DB_DBT_MULTIPLE)) { +#ifdef DIAGNOSTIC + __db_check_skeyset(sdbp, &skey); +#endif + nskey = skey.size; + tskeyp = (DBT *)skey.data; + } else { + nskey = 1; + tskeyp = &skey; + } + SWAP_IF_NEEDED(sdbp, &key); + for (; nskey > 0; nskey--, tskeyp++) { + if ((ret = __dbc_put(sdbc, + tskeyp, &key, DB_UPDATE_SECONDARY)) != 0) + goto err; + FREE_IF_NEEDED(env, tskeyp); + } + SWAP_IF_NEEDED(sdbp, &key); + FREE_IF_NEEDED(env, &skey); + } + if (ret == DB_NOTFOUND) + ret = 0; + } + +err: if (sdbc != NULL && (t_ret = __dbc_close(sdbc)) != 0 && ret == 0) + ret = t_ret; + + if (pdbc != NULL && (t_ret = __dbc_close(pdbc)) != 0 && ret == 0) + ret = t_ret; + + dbp->associate_locker = NULL; + + for (; nskey > 0; nskey--, tskeyp++) + FREE_IF_NEEDED(env, tskeyp); + FREE_IF_NEEDED(env, &skey); + + return (ret); +} + +/* + * __db_secondary_get -- + * This wrapper function for DB->pget() is the DB->get() function + * on a database which has been made into a secondary index. + */ +static int +__db_secondary_get(sdbp, txn, skey, data, flags) + DB *sdbp; + DB_TXN *txn; + DBT *skey, *data; + u_int32_t flags; +{ + DB_ASSERT(sdbp->env, F_ISSET(sdbp, DB_AM_SECONDARY)); + return (__db_pget_pp(sdbp, txn, skey, NULL, data, flags)); +} + +/* + * __db_secondary_close -- + * Wrapper function for DB->close() which we use on secondaries to + * manage refcounting and make sure we don't close them underneath + * a primary that is updating. + * + * PUBLIC: int __db_secondary_close __P((DB *, u_int32_t)); + */ +int +__db_secondary_close(sdbp, flags) + DB *sdbp; + u_int32_t flags; +{ + DB *primary; + ENV *env; + int doclose; + + /* + * If the opening trasaction is rolled back then the db handle + * will have already been refreshed, we just need to call + * __db_close to free the data. + */ + if (!F_ISSET(sdbp, DB_AM_OPEN_CALLED)) { + doclose = 1; + goto done; + } + doclose = 0; + primary = sdbp->s_primary; + env = primary->env; + + MUTEX_LOCK(env, primary->mutex); + /* + * Check the refcount--if it was at 1 when we were called, no + * thread is currently updating this secondary through the primary, + * so it's safe to close it for real. + * + * If it's not safe to do the close now, we do nothing; the + * database will actually be closed when the refcount is decremented, + * which can happen in either __db_s_next or __db_s_done. + */ + DB_ASSERT(env, sdbp->s_refcnt != 0); + if (--sdbp->s_refcnt == 0) { + LIST_REMOVE(sdbp, s_links); + /* We don't want to call close while the mutex is held. */ + doclose = 1; + } + MUTEX_UNLOCK(env, primary->mutex); + + /* + * sdbp->close is this function; call the real one explicitly if + * need be. + */ +done: return (doclose ? __db_close(sdbp, NULL, flags) : 0); +} + +/* + * __db_associate_foreign -- + * Associate this database (fdbp) as a foreign constraint to another + * database (pdbp). That is, dbp's keys appear as foreign key values in + * pdbp. + * + * PUBLIC: int __db_associate_foreign __P((DB *, DB *, + * PUBLIC: int (*)(DB *, const DBT *, DBT *, const DBT *, int *), + * PUBLIC: u_int32_t)); + */ +int +__db_associate_foreign(fdbp, pdbp, callback, flags) + DB *fdbp, *pdbp; + int (*callback)(DB *, const DBT *, DBT *, const DBT *, int *); + u_int32_t flags; +{ + DB_FOREIGN_INFO *f_info; + ENV *env; + int ret; + + env = fdbp->env; + ret = 0; + + if ((ret = __os_malloc(env, sizeof(DB_FOREIGN_INFO), &f_info)) != 0) { + return (ret); + } + memset(f_info, 0, sizeof(DB_FOREIGN_INFO)); + + f_info->dbp = pdbp; + f_info->callback = callback; + + /* + * It might be wise to filter this, but for now the flags only + * set the delete action type. + */ + FLD_SET(f_info->flags, flags); + + /* + * Add f_info to the foreign database's list of primaries. That is to + * say, fdbp->f_primaries lists all databases for which fdbp is a + * foreign constraint. + */ + MUTEX_LOCK(env, fdbp->mutex); + LIST_INSERT_HEAD(&fdbp->f_primaries, f_info, f_links); + MUTEX_UNLOCK(env, fdbp->mutex); + + /* + * Associate fdbp as pdbp's foreign db, for referential integrity + * checks. We don't allow the foreign db to be changed, because we + * currently have no way of removing pdbp from the old foreign db's list + * of primaries. + */ + if (pdbp->s_foreign != NULL) + return (EINVAL); + pdbp->s_foreign = fdbp; + + return (ret); +} + +static int +__dbc_set_priority(dbc, priority) + DBC *dbc; + DB_CACHE_PRIORITY priority; +{ + dbc->priority = priority; + return (0); +} + +static int +__dbc_get_priority(dbc, priority) + DBC *dbc; + DB_CACHE_PRIORITY *priority; +{ + if (dbc->priority == DB_PRIORITY_UNCHANGED) + return (__memp_get_priority(dbc->dbp->mpf, priority)); + else + *priority = dbc->priority; + + return (0); +} diff --git a/src/db/db_auto.c b/src/db/db_auto.c new file mode 100644 index 00000000..7c6b7e66 --- /dev/null +++ b/src/db/db_auto.c @@ -0,0 +1,276 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" + +DB_LOG_RECSPEC __db_addrem_desc[] = { + {LOGREC_OP, SSZ(__db_addrem_args, opcode), "opcode", "%lu"}, + {LOGREC_DB, SSZ(__db_addrem_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_addrem_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__db_addrem_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__db_addrem_args, nbytes), "nbytes", "%lu"}, + {LOGREC_HDR, SSZ(__db_addrem_args, hdr), "hdr", ""}, + {LOGREC_DBT, SSZ(__db_addrem_args, dbt), "dbt", ""}, + {LOGREC_POINTER, SSZ(__db_addrem_args, pagelsn), "pagelsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_addrem_42_desc[] = { + {LOGREC_ARG, SSZ(__db_addrem_42_args, opcode), "opcode", "%lu"}, + {LOGREC_DB, SSZ(__db_addrem_42_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_addrem_42_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__db_addrem_42_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__db_addrem_42_args, nbytes), "nbytes", "%lu"}, + {LOGREC_DBT, SSZ(__db_addrem_42_args, hdr), "hdr", ""}, + {LOGREC_DBT, SSZ(__db_addrem_42_args, dbt), "dbt", ""}, + {LOGREC_POINTER, SSZ(__db_addrem_42_args, pagelsn), "pagelsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_big_desc[] = { + {LOGREC_OP, SSZ(__db_big_args, opcode), "opcode", "%lu"}, + {LOGREC_DB, SSZ(__db_big_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_big_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__db_big_args, prev_pgno), "prev_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__db_big_args, next_pgno), "next_pgno", "%lu"}, + {LOGREC_HDR, SSZ(__db_big_args, dbt), "dbt", ""}, + {LOGREC_POINTER, SSZ(__db_big_args, pagelsn), "pagelsn", ""}, + {LOGREC_POINTER, SSZ(__db_big_args, prevlsn), "prevlsn", ""}, + {LOGREC_POINTER, SSZ(__db_big_args, nextlsn), "nextlsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_big_42_desc[] = { + {LOGREC_ARG, SSZ(__db_big_42_args, opcode), "opcode", "%lu"}, + {LOGREC_DB, SSZ(__db_big_42_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_big_42_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__db_big_42_args, prev_pgno), "prev_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__db_big_42_args, next_pgno), "next_pgno", "%lu"}, + {LOGREC_DBT, SSZ(__db_big_42_args, dbt), "dbt", ""}, + {LOGREC_POINTER, SSZ(__db_big_42_args, pagelsn), "pagelsn", ""}, + {LOGREC_POINTER, SSZ(__db_big_42_args, prevlsn), "prevlsn", ""}, + {LOGREC_POINTER, SSZ(__db_big_42_args, nextlsn), "nextlsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_ovref_desc[] = { + {LOGREC_DB, SSZ(__db_ovref_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_ovref_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__db_ovref_args, adjust), "adjust", "%ld"}, + {LOGREC_POINTER, SSZ(__db_ovref_args, lsn), "lsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_relink_42_desc[] = { + {LOGREC_ARG, SSZ(__db_relink_42_args, opcode), "opcode", "%lu"}, + {LOGREC_DB, SSZ(__db_relink_42_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_relink_42_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_relink_42_args, lsn), "lsn", ""}, + {LOGREC_ARG, SSZ(__db_relink_42_args, prev), "prev", "%lu"}, + {LOGREC_POINTER, SSZ(__db_relink_42_args, lsn_prev), "lsn_prev", ""}, + {LOGREC_ARG, SSZ(__db_relink_42_args, next), "next", "%lu"}, + {LOGREC_POINTER, SSZ(__db_relink_42_args, lsn_next), "lsn_next", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_debug_desc[] = { + {LOGREC_DBT, SSZ(__db_debug_args, op), "op", ""}, + {LOGREC_ARG, SSZ(__db_debug_args, fileid), "fileid", "%ld"}, + {LOGREC_DBT, SSZ(__db_debug_args, key), "key", ""}, + {LOGREC_DBT, SSZ(__db_debug_args, data), "data", ""}, + {LOGREC_ARG, SSZ(__db_debug_args, arg_flags), "arg_flags", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_noop_desc[] = { + {LOGREC_DB, SSZ(__db_noop_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_noop_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_noop_args, prevlsn), "prevlsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_pg_alloc_42_desc[] = { + {LOGREC_DB, SSZ(__db_pg_alloc_42_args, fileid), "fileid", ""}, + {LOGREC_POINTER, SSZ(__db_pg_alloc_42_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_ARG, SSZ(__db_pg_alloc_42_args, meta_pgno), "meta_pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_pg_alloc_42_args, page_lsn), "page_lsn", ""}, + {LOGREC_ARG, SSZ(__db_pg_alloc_42_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__db_pg_alloc_42_args, ptype), "ptype", "%lu"}, + {LOGREC_ARG, SSZ(__db_pg_alloc_42_args, next), "next", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_pg_alloc_desc[] = { + {LOGREC_DB, SSZ(__db_pg_alloc_args, fileid), "fileid", ""}, + {LOGREC_POINTER, SSZ(__db_pg_alloc_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_ARG, SSZ(__db_pg_alloc_args, meta_pgno), "meta_pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_pg_alloc_args, page_lsn), "page_lsn", ""}, + {LOGREC_ARG, SSZ(__db_pg_alloc_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__db_pg_alloc_args, ptype), "ptype", "%lu"}, + {LOGREC_ARG, SSZ(__db_pg_alloc_args, next), "next", "%lu"}, + {LOGREC_ARG, SSZ(__db_pg_alloc_args, last_pgno), "last_pgno", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_pg_free_42_desc[] = { + {LOGREC_DB, SSZ(__db_pg_free_42_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_pg_free_42_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_pg_free_42_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_ARG, SSZ(__db_pg_free_42_args, meta_pgno), "meta_pgno", "%lu"}, + {LOGREC_PGDBT, SSZ(__db_pg_free_42_args, header), "header", ""}, + {LOGREC_ARG, SSZ(__db_pg_free_42_args, next), "next", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_pg_free_desc[] = { + {LOGREC_DB, SSZ(__db_pg_free_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_pg_free_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_pg_free_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_ARG, SSZ(__db_pg_free_args, meta_pgno), "meta_pgno", "%lu"}, + {LOGREC_PGDBT, SSZ(__db_pg_free_args, header), "header", ""}, + {LOGREC_ARG, SSZ(__db_pg_free_args, next), "next", "%lu"}, + {LOGREC_ARG, SSZ(__db_pg_free_args, last_pgno), "last_pgno", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_cksum_desc[] = { + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_pg_freedata_42_desc[] = { + {LOGREC_DB, SSZ(__db_pg_freedata_42_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_pg_freedata_42_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_pg_freedata_42_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_ARG, SSZ(__db_pg_freedata_42_args, meta_pgno), "meta_pgno", "%lu"}, + {LOGREC_PGDBT, SSZ(__db_pg_freedata_42_args, header), "header", ""}, + {LOGREC_ARG, SSZ(__db_pg_freedata_42_args, next), "next", "%lu"}, + {LOGREC_PGDDBT, SSZ(__db_pg_freedata_42_args, data), "data", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_pg_freedata_desc[] = { + {LOGREC_DB, SSZ(__db_pg_freedata_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_pg_freedata_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_pg_freedata_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_ARG, SSZ(__db_pg_freedata_args, meta_pgno), "meta_pgno", "%lu"}, + {LOGREC_PGDBT, SSZ(__db_pg_freedata_args, header), "header", ""}, + {LOGREC_ARG, SSZ(__db_pg_freedata_args, next), "next", "%lu"}, + {LOGREC_ARG, SSZ(__db_pg_freedata_args, last_pgno), "last_pgno", "%lu"}, + {LOGREC_PGDDBT, SSZ(__db_pg_freedata_args, data), "data", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_pg_init_desc[] = { + {LOGREC_DB, SSZ(__db_pg_init_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_pg_init_args, pgno), "pgno", "%lu"}, + {LOGREC_PGDBT, SSZ(__db_pg_init_args, header), "header", ""}, + {LOGREC_PGDDBT, SSZ(__db_pg_init_args, data), "data", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_pg_sort_44_desc[] = { + {LOGREC_DB, SSZ(__db_pg_sort_44_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_pg_sort_44_args, meta), "meta", "%lu"}, + {LOGREC_POINTER, SSZ(__db_pg_sort_44_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_ARG, SSZ(__db_pg_sort_44_args, last_free), "last_free", "%lu"}, + {LOGREC_POINTER, SSZ(__db_pg_sort_44_args, last_lsn), "last_lsn", ""}, + {LOGREC_ARG, SSZ(__db_pg_sort_44_args, last_pgno), "last_pgno", "%lu"}, + {LOGREC_DBT, SSZ(__db_pg_sort_44_args, list), "list", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_pg_trunc_desc[] = { + {LOGREC_DB, SSZ(__db_pg_trunc_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_pg_trunc_args, meta), "meta", "%lu"}, + {LOGREC_POINTER, SSZ(__db_pg_trunc_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_ARG, SSZ(__db_pg_trunc_args, last_free), "last_free", "%lu"}, + {LOGREC_POINTER, SSZ(__db_pg_trunc_args, last_lsn), "last_lsn", ""}, + {LOGREC_ARG, SSZ(__db_pg_trunc_args, next_free), "next_free", "%lu"}, + {LOGREC_ARG, SSZ(__db_pg_trunc_args, last_pgno), "last_pgno", "%lu"}, + {LOGREC_PGLIST, SSZ(__db_pg_trunc_args, list), "list", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_realloc_desc[] = { + {LOGREC_DB, SSZ(__db_realloc_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_realloc_args, prev_pgno), "prev_pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_realloc_args, page_lsn), "page_lsn", ""}, + {LOGREC_ARG, SSZ(__db_realloc_args, next_free), "next_free", "%lu"}, + {LOGREC_ARG, SSZ(__db_realloc_args, ptype), "ptype", "%lu"}, + {LOGREC_PGLIST, SSZ(__db_realloc_args, list), "list", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_relink_desc[] = { + {LOGREC_DB, SSZ(__db_relink_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_relink_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__db_relink_args, new_pgno), "new_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__db_relink_args, prev_pgno), "prev_pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_relink_args, lsn_prev), "lsn_prev", ""}, + {LOGREC_ARG, SSZ(__db_relink_args, next_pgno), "next_pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_relink_args, lsn_next), "lsn_next", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_merge_desc[] = { + {LOGREC_DB, SSZ(__db_merge_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_merge_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_merge_args, lsn), "lsn", ""}, + {LOGREC_ARG, SSZ(__db_merge_args, npgno), "npgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_merge_args, nlsn), "nlsn", ""}, + {LOGREC_PGDBT, SSZ(__db_merge_args, hdr), "hdr", ""}, + {LOGREC_PGDDBT, SSZ(__db_merge_args, data), "data", ""}, + {LOGREC_ARG, SSZ(__db_merge_args, pg_copy), "pg_copy", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __db_pgno_desc[] = { + {LOGREC_DB, SSZ(__db_pgno_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__db_pgno_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__db_pgno_args, lsn), "lsn", ""}, + {LOGREC_ARG, SSZ(__db_pgno_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__db_pgno_args, opgno), "opgno", "%lu"}, + {LOGREC_ARG, SSZ(__db_pgno_args, npgno), "npgno", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +/* + * PUBLIC: int __db_init_recover __P((ENV *, DB_DISTAB *)); + */ +int +__db_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __db_addrem_recover, DB___db_addrem)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_big_recover, DB___db_big)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_ovref_recover, DB___db_ovref)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_debug_recover, DB___db_debug)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_noop_recover, DB___db_noop)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_alloc_recover, DB___db_pg_alloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_free_recover, DB___db_pg_free)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_cksum_recover, DB___db_cksum)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_freedata_recover, DB___db_pg_freedata)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_init_recover, DB___db_pg_init)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_trunc_recover, DB___db_pg_trunc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_realloc_recover, DB___db_realloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_relink_recover, DB___db_relink)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_merge_recover, DB___db_merge)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pgno_recover, DB___db_pgno)) != 0) + return (ret); + return (0); +} diff --git a/src/db/db_autop.c b/src/db/db_autop.c new file mode 100644 index 00000000..6fe77039 --- /dev/null +++ b/src/db/db_autop.c @@ -0,0 +1,441 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" + +/* + * PUBLIC: int __db_addrem_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_addrem_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_addrem", __db_addrem_desc, info)); +} + +/* + * PUBLIC: int __db_addrem_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_addrem_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_addrem_42", __db_addrem_42_desc, info)); +} + +/* + * PUBLIC: int __db_big_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_big_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_big", __db_big_desc, info)); +} + +/* + * PUBLIC: int __db_big_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_big_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_big_42", __db_big_42_desc, info)); +} + +/* + * PUBLIC: int __db_ovref_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_ovref_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_ovref", __db_ovref_desc, info)); +} + +/* + * PUBLIC: int __db_relink_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_relink_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_relink_42", __db_relink_42_desc, info)); +} + +/* + * PUBLIC: int __db_debug_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_debug_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_debug", __db_debug_desc, info)); +} + +/* + * PUBLIC: int __db_noop_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_noop_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_noop", __db_noop_desc, info)); +} + +/* + * PUBLIC: int __db_pg_alloc_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_alloc_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_pg_alloc_42", __db_pg_alloc_42_desc, info)); +} + +/* + * PUBLIC: int __db_pg_alloc_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_alloc_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_pg_alloc", __db_pg_alloc_desc, info)); +} + +/* + * PUBLIC: int __db_pg_free_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_free_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_pg_free_42", __db_pg_free_42_desc, info)); +} + +/* + * PUBLIC: int __db_pg_free_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_free_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_pg_free", __db_pg_free_desc, info)); +} + +/* + * PUBLIC: int __db_cksum_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_cksum_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_cksum", __db_cksum_desc, info)); +} + +/* + * PUBLIC: int __db_pg_freedata_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_freedata_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_pg_freedata_42", __db_pg_freedata_42_desc, info)); +} + +/* + * PUBLIC: int __db_pg_freedata_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_freedata_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_pg_freedata", __db_pg_freedata_desc, info)); +} + +/* + * PUBLIC: int __db_pg_init_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_init_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_pg_init", __db_pg_init_desc, info)); +} + +/* + * PUBLIC: int __db_pg_sort_44_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_sort_44_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_pg_sort_44", __db_pg_sort_44_desc, info)); +} + +/* + * PUBLIC: int __db_pg_trunc_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_trunc_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_pg_trunc", __db_pg_trunc_desc, info)); +} + +/* + * PUBLIC: int __db_realloc_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_realloc_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_realloc", __db_realloc_desc, info)); +} + +/* + * PUBLIC: int __db_relink_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_relink_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_relink", __db_relink_desc, info)); +} + +/* + * PUBLIC: int __db_merge_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_merge_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_merge", __db_merge_desc, info)); +} + +/* + * PUBLIC: int __db_pgno_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pgno_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__db_pgno", __db_pgno_desc, info)); +} + +/* + * PUBLIC: int __db_init_print __P((ENV *, DB_DISTAB *)); + */ +int +__db_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __db_addrem_print, DB___db_addrem)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_big_print, DB___db_big)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_ovref_print, DB___db_ovref)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_debug_print, DB___db_debug)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_noop_print, DB___db_noop)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_alloc_print, DB___db_pg_alloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_free_print, DB___db_pg_free)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_cksum_print, DB___db_cksum)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_freedata_print, DB___db_pg_freedata)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_init_print, DB___db_pg_init)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_trunc_print, DB___db_pg_trunc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_realloc_print, DB___db_realloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_relink_print, DB___db_relink)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_merge_print, DB___db_merge)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pgno_print, DB___db_pgno)) != 0) + return (ret); + return (0); +} diff --git a/src/db/db_cam.c b/src/db/db_cam.c new file mode 100644 index 00000000..76b0c6ee --- /dev/null +++ b/src/db/db_cam.c @@ -0,0 +1,3496 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +static int __db_s_count __P((DB *)); +static int __db_wrlock_err __P((ENV *)); +static int __dbc_del_foreign __P((DBC *)); +static int __dbc_del_oldskey __P((DB *, DBC *, DBT *, DBT *, DBT *)); +static int __dbc_del_secondary __P((DBC *)); +static int __dbc_pget_recno __P((DBC *, DBT *, DBT *, u_int32_t)); +static inline int __dbc_put_append __P((DBC *, + DBT *, DBT *, u_int32_t *, u_int32_t)); +static inline int __dbc_put_fixed_len __P((DBC *, DBT *, DBT *)); +static inline int __dbc_put_partial __P((DBC *, + DBT *, DBT *, DBT *, DBT *, u_int32_t *, u_int32_t)); +static int __dbc_put_primary __P((DBC *, DBT *, DBT *, u_int32_t)); +static inline int __dbc_put_resolve_key __P((DBC *, + DBT *, DBT *, u_int32_t *, u_int32_t)); +static inline int __dbc_put_secondaries __P((DBC *, + DBT *, DBT *, DBT *, int, DBT *, u_int32_t *)); + +#define CDB_LOCKING_INIT(env, dbc) \ + /* \ + * If we are running CDB, this had better be either a write \ + * cursor or an immediate writer. If it's a regular writer, \ + * that means we have an IWRITE lock and we need to upgrade \ + * it to a write lock. \ + */ \ + if (CDB_LOCKING(env)) { \ + if (!F_ISSET(dbc, DBC_WRITECURSOR | DBC_WRITER)) \ + return (__db_wrlock_err(env)); \ + \ + if (F_ISSET(dbc, DBC_WRITECURSOR) && \ + (ret = __lock_get(env, \ + (dbc)->locker, DB_LOCK_UPGRADE, &(dbc)->lock_dbt, \ + DB_LOCK_WRITE, &(dbc)->mylock)) != 0) \ + return (ret); \ + } +#define CDB_LOCKING_DONE(env, dbc) \ + /* Release the upgraded lock. */ \ + if (F_ISSET(dbc, DBC_WRITECURSOR)) \ + (void)__lock_downgrade( \ + env, &(dbc)->mylock, DB_LOCK_IWRITE, 0); + +#define SET_READ_LOCKING_FLAGS(dbc, var) do { \ + var = 0; \ + if (!F_ISSET(dbc, DBC_READ_COMMITTED | DBC_READ_UNCOMMITTED)) { \ + if (LF_ISSET(DB_READ_COMMITTED)) \ + var = DBC_READ_COMMITTED | DBC_WAS_READ_COMMITTED; \ + if (LF_ISSET(DB_READ_UNCOMMITTED)) \ + var = DBC_READ_UNCOMMITTED; \ + } \ + LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED); \ +} while (0) + +/* + * __dbc_close -- + * DBC->close. + * + * PUBLIC: int __dbc_close __P((DBC *)); + */ +int +__dbc_close(dbc) + DBC *dbc; +{ + DB *dbp; + DBC *opd; + DBC_INTERNAL *cp; + DB_TXN *txn; + ENV *env; + int ret, t_ret; + + dbp = dbc->dbp; + env = dbp->env; + cp = dbc->internal; + opd = cp->opd; + ret = 0; + + /* + * Remove the cursor(s) from the active queue. We may be closing two + * cursors at once here, a top-level one and a lower-level, off-page + * duplicate one. The access-method specific cursor close routine must + * close both of them in a single call. + * + * !!! + * Cursors must be removed from the active queue before calling the + * access specific cursor close routine, btree depends on having that + * order of operations. + */ + MUTEX_LOCK(env, dbp->mutex); + + if (opd != NULL) { + DB_ASSERT(env, F_ISSET(opd, DBC_ACTIVE)); + F_CLR(opd, DBC_ACTIVE); + TAILQ_REMOVE(&dbp->active_queue, opd, links); + } + DB_ASSERT(env, F_ISSET(dbc, DBC_ACTIVE)); + F_CLR(dbc, DBC_ACTIVE); + TAILQ_REMOVE(&dbp->active_queue, dbc, links); + + MUTEX_UNLOCK(env, dbp->mutex); + + /* Call the access specific cursor close routine. */ + if ((t_ret = + dbc->am_close(dbc, PGNO_INVALID, NULL)) != 0 && ret == 0) + ret = t_ret; + + /* + * Release the lock after calling the access method specific close + * routine, a Btree cursor may have had pending deletes. + * + * Also, be sure not to free anything if mylock.off is INVALID; in + * some cases, such as idup'ed read cursors and secondary update + * cursors, a cursor in a CDB environment may not have a lock at all. + */ + if (LOCK_ISSET(dbc->mylock)) { + if ((t_ret = __LPUT(dbc, dbc->mylock)) != 0 && ret == 0) + ret = t_ret; + + /* For safety's sake, since this is going on the free queue. */ + memset(&dbc->mylock, 0, sizeof(dbc->mylock)); + if (opd != NULL) + memset(&opd->mylock, 0, sizeof(opd->mylock)); + } + + /* + * Remove this cursor's locker ID from its family. + */ + if (F_ISSET(dbc, DBC_OWN_LID) && F_ISSET(dbc, DBC_FAMILY)) { + if ((t_ret = __lock_familyremove(env->lk_handle, + dbc->lref)) != 0 && ret == 0) + ret = t_ret; + F_CLR(dbc, DBC_FAMILY); + } + + if ((txn = dbc->txn) != NULL) + txn->cursors--; + + /* Move the cursor(s) to the free queue. */ + MUTEX_LOCK(env, dbp->mutex); + if (opd != NULL) { + if (txn != NULL) + txn->cursors--; + TAILQ_INSERT_TAIL(&dbp->free_queue, opd, links); + } + TAILQ_INSERT_TAIL(&dbp->free_queue, dbc, links); + MUTEX_UNLOCK(env, dbp->mutex); + + if (txn != NULL && F_ISSET(txn, TXN_PRIVATE) && txn->cursors == 0 && + (t_ret = __txn_commit(txn, 0)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __dbc_destroy -- + * Destroy the cursor, called after DBC->close. + * + * PUBLIC: int __dbc_destroy __P((DBC *)); + */ +int +__dbc_destroy(dbc) + DBC *dbc; +{ + DB *dbp; + ENV *env; + int ret, t_ret; + + dbp = dbc->dbp; + env = dbp->env; + + /* Remove the cursor from the free queue. */ + MUTEX_LOCK(env, dbp->mutex); + TAILQ_REMOVE(&dbp->free_queue, dbc, links); + MUTEX_UNLOCK(env, dbp->mutex); + + /* Free up allocated memory. */ + if (dbc->my_rskey.data != NULL) + __os_free(env, dbc->my_rskey.data); + if (dbc->my_rkey.data != NULL) + __os_free(env, dbc->my_rkey.data); + if (dbc->my_rdata.data != NULL) + __os_free(env, dbc->my_rdata.data); + + /* Call the access specific cursor destroy routine. */ + ret = dbc->am_destroy == NULL ? 0 : dbc->am_destroy(dbc); + + /* + * Release the lock id for this cursor. + */ + if (LOCKING_ON(env) && + F_ISSET(dbc, DBC_OWN_LID) && + (t_ret = __lock_id_free(env, dbc->lref)) != 0 && ret == 0) + ret = t_ret; + + __os_free(env, dbc); + + return (ret); +} + +/* + * __dbc_cmp -- + * Compare the position of two cursors. Return whether two cursors are + * pointing to the same key/data pair. + * + * result == 0 if both cursors refer to the same item. + * result == 1 otherwise + * + * PUBLIC: int __dbc_cmp __P((DBC *, DBC *, int *)); + */ +int +__dbc_cmp(dbc, other_dbc, result) + DBC *dbc, *other_dbc; + int *result; +{ + DBC *curr_dbc, *curr_odbc; + DBC_INTERNAL *dbc_int, *odbc_int; + ENV *env; + int ret; + + env = dbc->env; + ret = 0; + +#ifdef HAVE_PARTITION + if (DB_IS_PARTITIONED(dbc->dbp)) { + dbc = ((PART_CURSOR *)dbc->internal)->sub_cursor; + other_dbc = ((PART_CURSOR *)other_dbc->internal)->sub_cursor; + } + /* Both cursors must still be valid. */ + if (dbc == NULL || other_dbc == NULL) { + __db_errx(env, DB_STR("0692", +"Both cursors must be initialized before calling DBC->cmp.")); + return (EINVAL); + } + + if (dbc->dbp != other_dbc->dbp) { + *result = 1; + return (0); + } +#endif + +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbc->dbp)) + return (__bamc_compress_cmp(dbc, other_dbc, result)); +#endif + + curr_dbc = dbc; + curr_odbc = other_dbc; + dbc_int = dbc->internal; + odbc_int = other_dbc->internal; + + /* Both cursors must be on valid positions. */ + if (dbc_int->pgno == PGNO_INVALID || odbc_int->pgno == PGNO_INVALID) { + __db_errx(env, DB_STR("0693", +"Both cursors must be initialized before calling DBC->cmp.")); + return (EINVAL); + } + + /* + * Use a loop since cursors can be nested. Off page duplicate + * sets can only be nested one level deep, so it is safe to use a + * while (true) loop. + */ + while (1) { + if (dbc_int->pgno == odbc_int->pgno && + dbc_int->indx == odbc_int->indx) { + /* + * If one cursor is sitting on an off page duplicate + * set, the other will be pointing to the same set. Be + * careful, and check anyway. + */ + if (dbc_int->opd != NULL && odbc_int->opd != NULL) { + curr_dbc = dbc_int->opd; + curr_odbc = odbc_int->opd; + dbc_int = dbc_int->opd->internal; + odbc_int= odbc_int->opd->internal; + continue; + } else if (dbc_int->opd == NULL && + odbc_int->opd == NULL) + *result = 0; + else { + __db_errx(env, DB_STR("0694", + "DBCursor->cmp mismatched off page duplicate cursor pointers.")); + return (EINVAL); + } + + switch (curr_dbc->dbtype) { + case DB_HASH: + /* + * Make sure that on-page duplicate data + * indexes match, and that the deleted + * flags are consistent. + */ + ret = __hamc_cmp(curr_dbc, curr_odbc, result); + break; + case DB_BTREE: + case DB_RECNO: + /* + * Check for consisted deleted flags on btree + * specific cursors. + */ + ret = __bamc_cmp(curr_dbc, curr_odbc, result); + break; + default: + /* NO-OP break out. */ + break; + } + } else + *result = 1; + return (ret); + } + /* NOTREACHED. */ + return (ret); +} + +/* + * __dbc_count -- + * Return a count of duplicate data items. + * + * PUBLIC: int __dbc_count __P((DBC *, db_recno_t *)); + */ +int +__dbc_count(dbc, recnop) + DBC *dbc; + db_recno_t *recnop; +{ + ENV *env; + int ret; + + env = dbc->env; + +#ifdef HAVE_PARTITION + if (DB_IS_PARTITIONED(dbc->dbp)) + dbc = ((PART_CURSOR *)dbc->internal)->sub_cursor; +#endif + /* + * Cursor Cleanup Note: + * All of the cursors passed to the underlying access methods by this + * routine are not duplicated and will not be cleaned up on return. + * So, pages/locks that the cursor references must be resolved by the + * underlying functions. + */ + switch (dbc->dbtype) { + case DB_HEAP: + case DB_QUEUE: + case DB_RECNO: + *recnop = 1; + break; + case DB_HASH: + if (dbc->internal->opd == NULL) { + if ((ret = __hamc_count(dbc, recnop)) != 0) + return (ret); + break; + } + /* FALLTHROUGH */ + case DB_BTREE: +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbc->dbp)) + return (__bamc_compress_count(dbc, recnop)); +#endif + if ((ret = __bamc_count(dbc, recnop)) != 0) + return (ret); + break; + case DB_UNKNOWN: + default: + return (__db_unknown_type(env, "__dbc_count", dbc->dbtype)); + } + return (0); +} + +/* + * __dbc_del -- + * DBC->del. + * + * PUBLIC: int __dbc_del __P((DBC *, u_int32_t)); + */ +int +__dbc_del(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + DB *dbp; + ENV *env; + int ret; + + dbp = dbc->dbp; + env = dbp->env; + + CDB_LOCKING_INIT(env, dbc); + F_CLR(dbc, DBC_ERROR); + + /* + * If we're a secondary index, and DB_UPDATE_SECONDARY isn't set + * (which it only is if we're being called from a primary update), + * then we need to call through to the primary and delete the item. + * + * Note that this will delete the current item; we don't need to + * delete it ourselves as well, so we can just goto done. + */ + if (flags != DB_UPDATE_SECONDARY && F_ISSET(dbp, DB_AM_SECONDARY)) { + ret = __dbc_del_secondary(dbc); + goto done; + } + + /* + * If we are a foreign db, go through and check any foreign key + * constraints first, which will make rolling back changes on an abort + * simpler. + */ + if (LIST_FIRST(&dbp->f_primaries) != NULL && + (ret = __dbc_del_foreign(dbc)) != 0) + goto done; + + /* + * If we are a primary and have secondary indices, go through + * and delete any secondary keys that point at the current record. + */ + if (DB_IS_PRIMARY(dbp) && + (ret = __dbc_del_primary(dbc)) != 0) + goto done; + +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbp)) + ret = __bamc_compress_del(dbc, flags); + else +#endif + ret = __dbc_idel(dbc, flags); + +done: CDB_LOCKING_DONE(env, dbc); + + if (!DB_RETOK_DBCDEL(ret)) + F_SET(dbc, DBC_ERROR); + return (ret); +} + +/* + * __dbc_del -- + * Implemenation of DBC->del. + * + * PUBLIC: int __dbc_idel __P((DBC *, u_int32_t)); + */ +int +__dbc_idel(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + DB *dbp; + DBC *opd; + int ret, t_ret; + + COMPQUIET(flags, 0); + + dbp = dbc->dbp; + + /* + * Cursor Cleanup Note: + * All of the cursors passed to the underlying access methods by this + * routine are not duplicated and will not be cleaned up on return. + * So, pages/locks that the cursor references must be resolved by the + * underlying functions. + */ + + /* + * Off-page duplicate trees are locked in the primary tree, that is, + * we acquire a write lock in the primary tree and no locks in the + * off-page dup tree. If the del operation is done in an off-page + * duplicate tree, call the primary cursor's upgrade routine first. + */ + opd = dbc->internal->opd; + if (opd == NULL) + ret = dbc->am_del(dbc, flags); + else if ((ret = dbc->am_writelock(dbc)) == 0) + ret = opd->am_del(opd, flags); + + /* + * If this was an update that is supporting dirty reads + * then we may have just swapped our read for a write lock + * which is held by the surviving cursor. We need + * to explicitly downgrade this lock. The closed cursor + * may only have had a read lock. + */ + if (ret == 0 && F_ISSET(dbp, DB_AM_READ_UNCOMMITTED) && + dbc->internal->lock_mode == DB_LOCK_WRITE) { + if ((ret = __TLPUT(dbc, dbc->internal->lock)) == 0) + dbc->internal->lock_mode = DB_LOCK_WWRITE; + if (dbc->internal->page != NULL && (t_ret = + __memp_shared(dbp->mpf, dbc->internal->page)) != 0 && + ret == 0) + ret = t_ret; + } + + return (ret); +} + +#ifdef HAVE_COMPRESSION +/* + * __dbc_bulk_del -- + * Bulk del for a cursor. + * + * Only implemented for compressed BTrees. In this file in order to + * use the CDB_LOCKING_* macros. + * + * PUBLIC: #ifdef HAVE_COMPRESSION + * PUBLIC: int __dbc_bulk_del __P((DBC *, DBT *, u_int32_t)); + * PUBLIC: #endif + */ +int +__dbc_bulk_del(dbc, key, flags) + DBC *dbc; + DBT *key; + u_int32_t flags; +{ + ENV *env; + int ret; + + env = dbc->env; + + DB_ASSERT(env, DB_IS_COMPRESSED(dbc->dbp)); + + CDB_LOCKING_INIT(env, dbc); + F_CLR(dbc, DBC_ERROR); + + ret = __bamc_compress_bulk_del(dbc, key, flags); + + CDB_LOCKING_DONE(env, dbc); + + return (ret); +} +#endif + +/* + * __dbc_dup -- + * Duplicate a cursor + * + * PUBLIC: int __dbc_dup __P((DBC *, DBC **, u_int32_t)); + */ +int +__dbc_dup(dbc_orig, dbcp, flags) + DBC *dbc_orig; + DBC **dbcp; + u_int32_t flags; +{ + DBC *dbc_n, *dbc_nopd; + int ret; + + dbc_n = dbc_nopd = NULL; + + /* Allocate a new cursor and initialize it. */ + if ((ret = __dbc_idup(dbc_orig, &dbc_n, flags)) != 0) + goto err; + *dbcp = dbc_n; + + /* + * If the cursor references an off-page duplicate tree, allocate a + * new cursor for that tree and initialize it. + */ + if (dbc_orig->internal->opd != NULL) { + if ((ret = + __dbc_idup(dbc_orig->internal->opd, &dbc_nopd, flags)) != 0) + goto err; + dbc_n->internal->opd = dbc_nopd; + dbc_nopd->internal->pdbc = dbc_n; + } + return (0); + +err: if (dbc_n != NULL) + (void)__dbc_close(dbc_n); + if (dbc_nopd != NULL) + (void)__dbc_close(dbc_nopd); + + return (ret); +} + +/* + * __dbc_idup -- + * Internal version of __dbc_dup. + * + * PUBLIC: int __dbc_idup __P((DBC *, DBC **, u_int32_t)); + */ +int +__dbc_idup(dbc_orig, dbcp, flags) + DBC *dbc_orig, **dbcp; + u_int32_t flags; +{ + DB *dbp; + DBC *dbc_n; + DBC_INTERNAL *int_n, *int_orig; + ENV *env; + int ret; + + dbp = dbc_orig->dbp; + dbc_n = *dbcp; + env = dbp->env; + + if ((ret = __db_cursor_int(dbp, dbc_orig->thread_info, + dbc_orig->txn, dbc_orig->dbtype, dbc_orig->internal->root, + F_ISSET(dbc_orig, DBC_OPD) | DBC_DUPLICATE, + dbc_orig->locker, &dbc_n)) != 0) + return (ret); + + /* Position the cursor if requested, acquiring the necessary locks. */ + if (LF_ISSET(DB_POSITION)) { + int_n = dbc_n->internal; + int_orig = dbc_orig->internal; + + dbc_n->flags |= dbc_orig->flags & ~DBC_OWN_LID; + + int_n->indx = int_orig->indx; + int_n->pgno = int_orig->pgno; + int_n->root = int_orig->root; + int_n->lock_mode = int_orig->lock_mode; + + int_n->stream_start_pgno = int_orig->stream_start_pgno; + int_n->stream_off = int_orig->stream_off; + int_n->stream_curr_pgno = int_orig->stream_curr_pgno; + + switch (dbc_orig->dbtype) { + case DB_QUEUE: + if ((ret = __qamc_dup(dbc_orig, dbc_n)) != 0) + goto err; + break; + case DB_BTREE: + case DB_RECNO: + if ((ret = __bamc_dup(dbc_orig, dbc_n, flags)) != 0) + goto err; + break; + case DB_HASH: + if ((ret = __hamc_dup(dbc_orig, dbc_n)) != 0) + goto err; + break; + case DB_HEAP: + if ((ret = __heapc_dup(dbc_orig, dbc_n)) != 0) + goto err; + break; + case DB_UNKNOWN: + default: + ret = __db_unknown_type(env, + "__dbc_idup", dbc_orig->dbtype); + goto err; + } + } else if (F_ISSET(dbc_orig, DBC_BULK)) { + /* + * For bulk cursors, remember what page were on, even if we + * don't know that the next operation will be nearby. + */ + dbc_n->internal->pgno = dbc_orig->internal->pgno; + } + + /* Copy the locking flags to the new cursor. */ + F_SET(dbc_n, F_ISSET(dbc_orig, DBC_BULK | + DBC_READ_COMMITTED | DBC_READ_UNCOMMITTED | DBC_WRITECURSOR)); + + /* + * If we're in CDB and this isn't an offpage dup cursor, then + * we need to get a lock for the duplicated cursor. + */ + if (CDB_LOCKING(env) && !F_ISSET(dbc_n, DBC_OPD) && + (ret = __lock_get(env, dbc_n->locker, 0, + &dbc_n->lock_dbt, F_ISSET(dbc_orig, DBC_WRITECURSOR) ? + DB_LOCK_IWRITE : DB_LOCK_READ, &dbc_n->mylock)) != 0) + goto err; + + dbc_n->priority = dbc_orig->priority; + dbc_n->internal->pdbc = dbc_orig->internal->pdbc; + *dbcp = dbc_n; + return (0); + +err: (void)__dbc_close(dbc_n); + return (ret); +} + +/* + * __dbc_newopd -- + * Create a new off-page duplicate cursor. + * + * PUBLIC: int __dbc_newopd __P((DBC *, db_pgno_t, DBC *, DBC **)); + */ +int +__dbc_newopd(dbc_parent, root, oldopd, dbcp) + DBC *dbc_parent; + db_pgno_t root; + DBC *oldopd; + DBC **dbcp; +{ + DB *dbp; + DBC *opd; + DBTYPE dbtype; + int ret; + + dbp = dbc_parent->dbp; + dbtype = (dbp->dup_compare == NULL) ? DB_RECNO : DB_BTREE; + + /* + * On failure, we want to default to returning the old off-page dup + * cursor, if any; our caller can't be left with a dangling pointer + * to a freed cursor. On error the only allowable behavior is to + * close the cursor (and the old OPD cursor it in turn points to), so + * this should be safe. + */ + *dbcp = oldopd; + + if ((ret = __db_cursor_int(dbp, dbc_parent->thread_info, + dbc_parent->txn, + dbtype, root, DBC_OPD, dbc_parent->locker, &opd)) != 0) + return (ret); + + opd->priority = dbc_parent->priority; + opd->internal->pdbc = dbc_parent; + *dbcp = opd; + + /* + * Check to see if we already have an off-page dup cursor that we've + * passed in. If we do, close it. It'd be nice to use it again + * if it's a cursor belonging to the right tree, but if we're doing + * a cursor-relative operation this might not be safe, so for now + * we'll take the easy way out and always close and reopen. + * + * Note that under no circumstances do we want to close the old + * cursor without returning a valid new one; we don't want to + * leave the main cursor in our caller with a non-NULL pointer + * to a freed off-page dup cursor. + */ + if (oldopd != NULL && (ret = __dbc_close(oldopd)) != 0) + return (ret); + + return (0); +} + +/* + * __dbc_get -- + * Get using a cursor. + * + * PUBLIC: int __dbc_get __P((DBC *, DBT *, DBT *, u_int32_t)); + */ +int +__dbc_get(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + F_CLR(dbc, DBC_ERROR); +#ifdef HAVE_PARTITION + if (F_ISSET(dbc, DBC_PARTITIONED)) + return (__partc_get(dbc, key, data, flags)); +#endif + +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbc->dbp)) + return (__bamc_compress_get(dbc, key, data, flags)); +#endif + + return (__dbc_iget(dbc, key, data, flags)); +} + +/* + * __dbc_iget -- + * Implementation of get using a cursor. + * + * PUBLIC: int __dbc_iget __P((DBC *, DBT *, DBT *, u_int32_t)); + */ +int +__dbc_iget(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DB *dbp; + DBC *ddbc, *dbc_n, *opd; + DBC_INTERNAL *cp, *cp_n; + DB_MPOOLFILE *mpf; + ENV *env; + db_pgno_t pgno; + db_indx_t indx_off; + u_int32_t multi, orig_ulen, tmp_flags, tmp_read_locking, tmp_rmw; + u_int8_t type; + int key_small, ret, t_ret; + + COMPQUIET(orig_ulen, 0); + + key_small = 0; + + /* + * Cursor Cleanup Note: + * All of the cursors passed to the underlying access methods by this + * routine are duplicated cursors. On return, any referenced pages + * will be discarded, and, if the cursor is not intended to be used + * again, the close function will be called. So, pages/locks that + * the cursor references do not need to be resolved by the underlying + * functions. + */ + dbp = dbc->dbp; + env = dbp->env; + mpf = dbp->mpf; + dbc_n = NULL; + opd = NULL; + + PERFMON6(env, db, get, dbp->fname, dbp->dname, + dbc->txn == NULL ? 0 : dbc->txn->txnid, key, data, flags); + + /* Clear OR'd in additional bits so we can check for flag equality. */ + tmp_rmw = LF_ISSET(DB_RMW); + LF_CLR(DB_RMW); + + SET_READ_LOCKING_FLAGS(dbc, tmp_read_locking); + + multi = LF_ISSET(DB_MULTIPLE|DB_MULTIPLE_KEY); + LF_CLR(DB_MULTIPLE|DB_MULTIPLE_KEY); + + /* + * Return a cursor's record number. It has nothing to do with the + * cursor get code except that it was put into the interface. + */ + if (flags == DB_GET_RECNO) { + if (tmp_rmw) + F_SET(dbc, DBC_RMW); + F_SET(dbc, tmp_read_locking); + ret = __bamc_rget(dbc, data); + if (tmp_rmw) + F_CLR(dbc, DBC_RMW); + /* Clear the temp flags, but leave WAS_READ_COMMITTED. */ + F_CLR(dbc, tmp_read_locking & ~DBC_WAS_READ_COMMITTED); + return (ret); + } + + if (flags == DB_CONSUME || flags == DB_CONSUME_WAIT) + CDB_LOCKING_INIT(env, dbc); + + /* Don't return the key or data if it was passed to us. */ + if (!DB_RETURNS_A_KEY(dbp, flags)) + F_SET(key, DB_DBT_ISSET); + if (flags == DB_GET_BOTH && + (dbp->dup_compare == NULL || dbp->dup_compare == __bam_defcmp)) + F_SET(data, DB_DBT_ISSET); + + /* + * If we have an off-page duplicates cursor, and the operation applies + * to it, perform the operation. Duplicate the cursor and call the + * underlying function. + * + * Off-page duplicate trees are locked in the primary tree, that is, + * we acquire a write lock in the primary tree and no locks in the + * off-page dup tree. If the DB_RMW flag was specified and the get + * operation is done in an off-page duplicate tree, call the primary + * cursor's upgrade routine first. + */ + cp = dbc->internal; + if (cp->opd != NULL && + (flags == DB_CURRENT || flags == DB_GET_BOTHC || + flags == DB_NEXT || flags == DB_NEXT_DUP || + flags == DB_PREV || flags == DB_PREV_DUP)) { + if (tmp_rmw && (ret = dbc->am_writelock(dbc)) != 0) + goto err; + if (F_ISSET(dbc, DBC_TRANSIENT)) + opd = cp->opd; + else if ((ret = __dbc_idup(cp->opd, &opd, DB_POSITION)) != 0) + goto err; + + if ((ret = opd->am_get(opd, key, data, flags, NULL)) == 0) + goto done; + /* + * Another cursor may have deleted all of the off-page + * duplicates, so for operations that are moving a cursor, we + * need to skip the empty tree and retry on the parent cursor. + */ + if (ret == DB_NOTFOUND && + (flags == DB_PREV || flags == DB_NEXT)) { + ret = __dbc_close(opd); + opd = NULL; + if (F_ISSET(dbc, DBC_TRANSIENT)) + cp->opd = NULL; + } + if (ret != 0) + goto err; + } else if (cp->opd != NULL && F_ISSET(dbc, DBC_TRANSIENT)) { + if ((ret = __dbc_close(cp->opd)) != 0) + goto err; + cp->opd = NULL; + } + + /* + * Perform an operation on the main cursor. Duplicate the cursor, + * upgrade the lock as required, and call the underlying function. + */ + switch (flags) { + case DB_CURRENT: + case DB_GET_BOTHC: + case DB_NEXT: + case DB_NEXT_DUP: + case DB_NEXT_NODUP: + case DB_PREV: + case DB_PREV_DUP: + case DB_PREV_NODUP: + tmp_flags = DB_POSITION; + break; + default: + tmp_flags = 0; + break; + } + + /* + * If this cursor is going to be closed immediately, we don't + * need to take precautions to clean it up on error. + */ + if (F_ISSET(dbc, DBC_TRANSIENT | DBC_PARTITIONED)) + dbc_n = dbc; + else { + ret = __dbc_idup(dbc, &dbc_n, tmp_flags); + + if (ret != 0) + goto err; + COPY_RET_MEM(dbc, dbc_n); + } + + if (tmp_rmw) + F_SET(dbc_n, DBC_RMW); + F_SET(dbc_n, tmp_read_locking); + + switch (multi) { + case DB_MULTIPLE: + F_SET(dbc_n, DBC_MULTIPLE); + break; + case DB_MULTIPLE_KEY: + F_SET(dbc_n, DBC_MULTIPLE_KEY); + break; + case DB_MULTIPLE | DB_MULTIPLE_KEY: + F_SET(dbc_n, DBC_MULTIPLE|DBC_MULTIPLE_KEY); + break; + case 0: + default: + break; + } + +retry: pgno = PGNO_INVALID; + ret = dbc_n->am_get(dbc_n, key, data, flags, &pgno); + if (tmp_rmw) + F_CLR(dbc_n, DBC_RMW); + /* + * Clear the temporary locking flags in the new cursor. The user's + * (old) cursor needs to have the WAS_READ_COMMITTED flag because this + * is used on the next call on that cursor. + */ + F_CLR(dbc_n, tmp_read_locking); + F_SET(dbc, tmp_read_locking & DBC_WAS_READ_COMMITTED); + F_CLR(dbc_n, DBC_MULTIPLE|DBC_MULTIPLE_KEY); + if (ret != 0) + goto err; + + cp_n = dbc_n->internal; + + /* + * We may be referencing a new off-page duplicates tree. Acquire + * a new cursor and call the underlying function. + */ + if (pgno != PGNO_INVALID) { + if ((ret = __dbc_newopd(dbc, + pgno, cp_n->opd, &cp_n->opd)) != 0) + goto err; + + switch (flags) { + case DB_FIRST: + case DB_NEXT: + case DB_NEXT_NODUP: + case DB_SET: + case DB_SET_RECNO: + case DB_SET_RANGE: + tmp_flags = DB_FIRST; + break; + case DB_LAST: + case DB_PREV: + case DB_PREV_NODUP: + tmp_flags = DB_LAST; + break; + case DB_GET_BOTH: + case DB_GET_BOTHC: + case DB_GET_BOTH_RANGE: + tmp_flags = flags; + break; + default: + ret = __db_unknown_flag(env, "__dbc_get", flags); + goto err; + } + ret = cp_n->opd->am_get(cp_n->opd, key, data, tmp_flags, NULL); + /* + * Another cursor may have deleted all of the off-page + * duplicates, so for operations that are moving a cursor, we + * need to skip the empty tree and retry on the parent cursor. + */ + if (ret == DB_NOTFOUND) { + PERFMON5(env, race, dbc_get, + dbp->fname, dbp->dname, ret, tmp_flags, key); + + switch (flags) { + case DB_FIRST: + case DB_NEXT: + case DB_NEXT_NODUP: + flags = DB_NEXT; + break; + case DB_LAST: + case DB_PREV: + case DB_PREV_NODUP: + flags = DB_PREV; + break; + default: + goto err; + } + + ret = __dbc_close(cp_n->opd); + cp_n->opd = NULL; + if (ret == 0) + goto retry; + } + if (ret != 0) + goto err; + } + +done: /* + * Return a key/data item. The only exception is that we don't return + * a key if the user already gave us one, that is, if the DB_SET flag + * was set. The DB_SET flag is necessary. In a Btree, the user's key + * doesn't have to be the same as the key stored the tree, depending on + * the magic performed by the comparison function. As we may not have + * done any key-oriented operation here, the page reference may not be + * valid. Fill it in as necessary. We don't have to worry about any + * locks, the cursor must already be holding appropriate locks. + * + * XXX + * If not a Btree and DB_SET_RANGE is set, we shouldn't return a key + * either, should we? + */ + cp_n = dbc_n == NULL ? dbc->internal : dbc_n->internal; + if (!F_ISSET(key, DB_DBT_ISSET)) { + if (cp_n->page == NULL && (ret = __memp_fget(mpf, &cp_n->pgno, + dbc->thread_info, dbc->txn, 0, &cp_n->page)) != 0) + goto err; + + if ((ret = __db_ret(dbc, cp_n->page, cp_n->indx, key, + &dbc->rkey->data, &dbc->rkey->ulen)) != 0) { + /* + * If the key DBT is too small, we still want to return + * the size of the data. Otherwise applications are + * forced to check each one with a separate call. We + * don't want to copy the data, so we set the ulen to + * zero before calling __db_ret. + */ + if (ret == DB_BUFFER_SMALL && + F_ISSET(data, DB_DBT_USERMEM)) { + key_small = 1; + orig_ulen = data->ulen; + data->ulen = 0; + } else + goto err; + } + } + if (multi != 0 && dbc->am_bulk != NULL) { + /* + * Even if fetching from the OPD cursor we need a duplicate + * primary cursor if we are going after multiple keys. + */ + if (dbc_n == NULL) { + /* + * Non-"_KEY" DB_MULTIPLE doesn't move the main cursor, + * so it's safe to just use dbc, unless the cursor + * has an open off-page duplicate cursor whose state + * might need to be preserved. + */ + if ((!(multi & DB_MULTIPLE_KEY) && + dbc->internal->opd == NULL) || + F_ISSET(dbc, DBC_TRANSIENT | DBC_PARTITIONED)) + dbc_n = dbc; + else { + if ((ret = __dbc_idup(dbc, + &dbc_n, DB_POSITION)) != 0) + goto err; + if ((ret = dbc_n->am_get(dbc_n, + key, data, DB_CURRENT, &pgno)) != 0) + goto err; + } + cp_n = dbc_n->internal; + } + + /* + * If opd is set then we dupped the opd that we came in with. + * When we return we may have a new opd if we went to another + * key. + */ + if (opd != NULL) { + DB_ASSERT(env, cp_n->opd == NULL); + cp_n->opd = opd; + opd = NULL; + } + + /* + * Bulk get doesn't use __db_retcopy, so data.size won't + * get set up unless there is an error. Assume success + * here. This is the only call to am_bulk, and it avoids + * setting it exactly the same everywhere. If we have an + * DB_BUFFER_SMALL error, it'll get overwritten with the + * needed value. + */ + data->size = data->ulen; + ret = dbc_n->am_bulk(dbc_n, data, flags | multi); + } else if (!F_ISSET(data, DB_DBT_ISSET)) { + ddbc = opd != NULL ? opd : + cp_n->opd != NULL ? cp_n->opd : dbc_n; + cp = ddbc->internal; + if (cp->page == NULL && + (ret = __memp_fget(mpf, &cp->pgno, + dbc->thread_info, ddbc->txn, 0, &cp->page)) != 0) + goto err; + + type = TYPE(cp->page); + indx_off = ((type == P_LBTREE || + type == P_HASH || type == P_HASH_UNSORTED) ? O_INDX : 0); + ret = __db_ret(ddbc, cp->page, cp->indx + indx_off, + data, &dbc->rdata->data, &dbc->rdata->ulen); + } + +err: /* Don't pass DB_DBT_ISSET back to application level, error or no. */ + F_CLR(key, DB_DBT_ISSET); + F_CLR(data, DB_DBT_ISSET); + + /* Cleanup and cursor resolution. */ + if (opd != NULL) { + /* + * To support dirty reads we must reget the write lock + * if we have just stepped off a deleted record. + * Since the OPD cursor does not know anything + * about the referencing page or cursor we need + * to peek at the OPD cursor and get the lock here. + */ + if (F_ISSET(dbp, DB_AM_READ_UNCOMMITTED) && + F_ISSET((BTREE_CURSOR *) + dbc->internal->opd->internal, C_DELETED)) + if ((t_ret = + dbc->am_writelock(dbc)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __dbc_cleanup( + dbc->internal->opd, opd, ret)) != 0 && ret == 0) + ret = t_ret; + } + + if (key_small) { + data->ulen = orig_ulen; + if (ret == 0) + ret = DB_BUFFER_SMALL; + } + + if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && + (ret == 0 || ret == DB_BUFFER_SMALL)) + ret = t_ret; + + if (flags == DB_CONSUME || flags == DB_CONSUME_WAIT) + CDB_LOCKING_DONE(env, dbc); + return (ret); +} + +/* Internal flags shared by the dbc_put functions. */ +#define DBC_PUT_RMW 0x001 +#define DBC_PUT_NODEL 0x002 +#define DBC_PUT_HAVEREC 0x004 + +/* + * __dbc_put_resolve_key -- + * Get the current key and data so that we can correctly update the + * secondary and foreign databases. + */ +static inline int +__dbc_put_resolve_key(dbc, oldkey, olddata, put_statep, flags) + DBC *dbc; + DBT *oldkey, *olddata; + u_int32_t flags, *put_statep; +{ + DB *dbp; + ENV *env; + int ret, rmw; + + dbp = dbc->dbp; + env = dbp->env; + rmw = FLD_ISSET(*put_statep, DBC_PUT_RMW) ? DB_RMW : 0; + + DB_ASSERT(env, flags == DB_CURRENT); + COMPQUIET(flags, 0); + + /* + * This is safe to do on the cursor we already have; + * error or no, it won't move. + * + * We use DB_RMW for all of these gets because we'll be + * writing soon enough in the "normal" put code. In + * transactional databases we'll hold those write locks + * even if we close the cursor we're reading with. + * + * The DB_KEYEMPTY return needs special handling -- if the + * cursor is on a deleted key, we return DB_NOTFOUND. + */ + memset(oldkey, 0, sizeof(DBT)); + if ((ret = __dbc_get(dbc, oldkey, olddata, rmw | DB_CURRENT)) != 0) + return (ret == DB_KEYEMPTY ? DB_NOTFOUND : ret); + + /* Record that we've looked for the old record. */ + FLD_SET(*put_statep, DBC_PUT_HAVEREC); + return (0); +} + +/* + * __dbc_put_append -- + * Handle an append to a primary. + */ +static inline int +__dbc_put_append(dbc, key, data, put_statep, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags, *put_statep; +{ + DB *dbp; + ENV *env; + DBC *dbc_n; + DBT tdata; + int ret, t_ret; + + dbp = dbc->dbp; + env = dbp->env; + ret = 0; + dbc_n = NULL; + + DB_ASSERT(env, flags == DB_APPEND); + COMPQUIET(flags, 0); + + /* + * With DB_APPEND, we need to do the insert to populate the key value. + * So we swap the 'normal' order of updating secondary / verifying + * foreign databases and inserting. + * + * If there is an append callback, the value stored in data->data may + * be replaced and then freed. To avoid passing a freed pointer back + * to the user, just operate on a copy of the data DBT. + */ + tdata = *data; + + /* + * If this cursor is going to be closed immediately, we don't + * need to take precautions to clean it up on error. + */ + if (F_ISSET(dbc, DBC_TRANSIENT)) + dbc_n = dbc; + else if ((ret = __dbc_idup(dbc, &dbc_n, 0)) != 0) + goto err; + + /* + * Append isn't a normal put operation; call the appropriate access + * method's append function. + */ + switch (dbp->type) { + case DB_HEAP: + if ((ret = __heap_append(dbc_n, key, &tdata)) != 0) + goto err; + break; + case DB_QUEUE: + if ((ret = __qam_append(dbc_n, key, &tdata)) != 0) + goto err; + break; + case DB_RECNO: + if ((ret = __ram_append(dbc_n, key, &tdata)) != 0) + goto err; + break; + default: + /* The interface should prevent this. */ + DB_ASSERT(env, + dbp->type == DB_QUEUE || dbp->type == DB_RECNO); + + ret = __db_ferr(env, "DBC->put", 0); + goto err; + } + + /* + * The append callback, if one exists, may have allocated a new + * tdata.data buffer. If so, free it. + */ + FREE_IF_NEEDED(env, &tdata); + + /* + * The key value may have been generated by the above operation, but + * not set in the data buffer. Make sure it is there so that secondary + * updates can complete. + */ + __dbt_userfree(env, key, NULL, NULL); + if ((ret = __dbt_usercopy(env, key)) != 0) + goto err; + + /* An append cannot be replacing an existing item. */ + FLD_SET(*put_statep, DBC_PUT_NODEL); + +err: if (dbc_n != NULL && + (t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __dbc_put_partial -- + * Ensure that the data item we are using is complete and correct. + * Otherwise we could break the secondary constraints. + */ +static inline int +__dbc_put_partial(dbc, pkey, data, orig_data, out_data, put_statep, flags) + DBC *dbc; + DBT *pkey, *data, *orig_data, *out_data; + u_int32_t *put_statep, flags; +{ + DB *dbp; + DBC *pdbc; + ENV *env; + int ret, rmw, t_ret; + + dbp = dbc->dbp; + env = dbp->env; + ret = t_ret = 0; + rmw = FLD_ISSET(*put_statep, DBC_PUT_RMW) ? DB_RMW : 0; + + if (!FLD_ISSET(*put_statep, DBC_PUT_HAVEREC) && + !FLD_ISSET(*put_statep, DBC_PUT_NODEL)) { + /* + * We're going to have to search the tree for the + * specified key. Dup a cursor (so we have the same + * locking info) and do a c_get. + */ + if ((ret = __dbc_idup(dbc, &pdbc, 0)) != 0) + return (ret); + + /* + * When doing a put with DB_CURRENT, partial data items have + * already been resolved. + */ + DB_ASSERT(env, flags != DB_CURRENT); + + F_SET(pkey, DB_DBT_ISSET); + ret = __dbc_get(pdbc, pkey, orig_data, rmw | DB_SET); + if (ret == DB_KEYEMPTY || ret == DB_NOTFOUND) { + FLD_SET(*put_statep, DBC_PUT_NODEL); + ret = 0; + } + if ((t_ret = __dbc_close(pdbc)) != 0) + ret = t_ret; + if (ret != 0) + return (ret); + + FLD_SET(*put_statep, DBC_PUT_HAVEREC); + } + + COMPQUIET(flags, 0); + + /* + * Now build the new datum from orig_data and the partial data + * we were given. It's okay to do this if no record was + * returned above: a partial put on an empty record is allowed, + * if a little strange. The data is zero-padded. + */ + return (__db_buildpartial(dbp, orig_data, data, out_data)); +} + +/* + * __dbc_put_fixed_len -- + * Handle padding for fixed-length records. + */ +static inline int +__dbc_put_fixed_len(dbc, data, out_data) + DBC *dbc; + DBT *data, *out_data; +{ + DB *dbp; + ENV *env; + int re_pad, ret; + u_int32_t re_len, size; + + dbp = dbc->dbp; + env = dbp->env; + ret = 0; + + /* + * Handle fixed-length records. If the primary database has + * fixed-length records, we need to pad out the datum before + * we pass it into the callback function; we always index the + * "real" record. + */ + if (dbp->type == DB_QUEUE) { + re_len = ((QUEUE *)dbp->q_internal)->re_len; + re_pad = ((QUEUE *)dbp->q_internal)->re_pad; + } else { + re_len = ((BTREE *)dbp->bt_internal)->re_len; + re_pad = ((BTREE *)dbp->bt_internal)->re_pad; + } + + size = data->size; + if (size > re_len) { + ret = __db_rec_toobig(env, size, re_len); + return (ret); + } else if (size < re_len) { + /* + * If we're not doing a partial put, copy data->data into + * out_data->data, then pad out out_data->data. This overrides + * the assignment made above, which is used in the more common + * case when padding is not needed. + * + * If we're doing a partial put, the data we want are already + * in out_data.data; we just need to pad. + */ + if (F_ISSET(data, DB_DBT_PARTIAL)) { + if ((ret = __os_realloc( + env, re_len, &out_data->data)) != 0) + return (ret); + /* + * In the partial case, we have built the item into + * out_data already using __db_buildpartial. Just need + * to pad from the end of out_data, not from data->size. + */ + size = out_data->size; + } else { + if ((ret = __os_malloc( + env, re_len, &out_data->data)) != 0) + return (ret); + memcpy(out_data->data, data->data, size); + } + memset((u_int8_t *)out_data->data + size, re_pad, + re_len - size); + out_data->size = re_len; + } + + return (ret); +} + +/* + * __dbc_put_secondaries -- + * Insert the secondary keys, and validate the foreign key constraints. + */ +static inline int +__dbc_put_secondaries(dbc, + pkey, data, orig_data, s_count, s_keys_buf, put_statep) + DBC *dbc; + DBT *pkey, *data, *orig_data, *s_keys_buf; + int s_count; + u_int32_t *put_statep; +{ + DB *dbp, *sdbp; + DBC *fdbc, *sdbc; + DBT fdata, oldpkey, *skeyp, temppkey, tempskey, *tskeyp; + ENV *env; + int cmp, ret, rmw, t_ret; + u_int32_t nskey; + + dbp = dbc->dbp; + env = dbp->env; + fdbc = sdbc = NULL; + sdbp = NULL; + t_ret = 0; + rmw = FLD_ISSET(*put_statep, DBC_PUT_RMW) ? DB_RMW : 0; + + /* + * Loop through the secondaries. (Step 3.) + * + * Note that __db_s_first and __db_s_next will take care of + * thread-locking and refcounting issues. + */ + for (ret = __db_s_first(dbp, &sdbp), skeyp = s_keys_buf; + sdbp != NULL && ret == 0; + ret = __db_s_next(&sdbp, dbc->txn), ++skeyp) { + DB_ASSERT(env, skeyp - s_keys_buf < s_count); + /* + * Don't process this secondary if the key is immutable and we + * know that the old record exists. This optimization can't be + * used if we have not checked for the old record yet. + */ + if (FLD_ISSET(*put_statep, DBC_PUT_HAVEREC) && + !FLD_ISSET(*put_statep, DBC_PUT_NODEL) && + FLD_ISSET(sdbp->s_assoc_flags, DB_ASSOC_IMMUTABLE_KEY)) + continue; + + /* + * Call the callback for this secondary, to get the + * appropriate secondary key. + */ + if ((ret = sdbp->s_callback(sdbp, + pkey, data, skeyp)) != 0) { + /* Not indexing is equivalent to an empty key set. */ + if (ret == DB_DONOTINDEX) { + F_SET(skeyp, DB_DBT_MULTIPLE); + skeyp->size = 0; + ret = 0; + } else + goto err; + } + + if (sdbp->s_foreign != NULL && + (ret = __db_cursor_int(sdbp->s_foreign, + dbc->thread_info, dbc->txn, sdbp->s_foreign->type, + PGNO_INVALID, 0, dbc->locker, &fdbc)) != 0) + goto err; + + /* + * Mark the secondary key DBT(s) as set -- that is, the + * callback returned at least one secondary key. + * + * Also, if this secondary index is associated with a foreign + * database, check that the foreign db contains the key(s) to + * maintain referential integrity. Set flags in fdata to avoid + * mem copying, we just need to know existence. We need to do + * this check before setting DB_DBT_ISSET, otherwise __dbc_get + * will overwrite the flag values. + */ + if (F_ISSET(skeyp, DB_DBT_MULTIPLE)) { +#ifdef DIAGNOSTIC + __db_check_skeyset(sdbp, skeyp); +#endif + for (tskeyp = (DBT *)skeyp->data, nskey = skeyp->size; + nskey > 0; nskey--, tskeyp++) { + if (fdbc != NULL) { + memset(&fdata, 0, sizeof(DBT)); + F_SET(&fdata, + DB_DBT_PARTIAL | DB_DBT_USERMEM); + if ((ret = __dbc_get( + fdbc, tskeyp, &fdata, + DB_SET | rmw)) == DB_NOTFOUND || + ret == DB_KEYEMPTY) { + ret = DB_FOREIGN_CONFLICT; + break; + } + } + F_SET(tskeyp, DB_DBT_ISSET); + } + tskeyp = (DBT *)skeyp->data; + nskey = skeyp->size; + } else { + if (fdbc != NULL) { + memset(&fdata, 0, sizeof(DBT)); + F_SET(&fdata, DB_DBT_PARTIAL | DB_DBT_USERMEM); + if ((ret = __dbc_get(fdbc, skeyp, &fdata, + DB_SET | rmw)) == DB_NOTFOUND || + ret == DB_KEYEMPTY) + ret = DB_FOREIGN_CONFLICT; + } + F_SET(skeyp, DB_DBT_ISSET); + tskeyp = skeyp; + nskey = 1; + } + if (fdbc != NULL && (t_ret = __dbc_close(fdbc)) != 0 && + ret == 0) + ret = t_ret; + fdbc = NULL; + if (ret != 0) + goto err; + + /* + * If we have the old record, we can generate and remove any + * old secondary key(s) now. We can also skip the secondary + * put if there is no change. + */ + if (FLD_ISSET(*put_statep, DBC_PUT_HAVEREC)) { + if ((ret = __dbc_del_oldskey(sdbp, dbc, + skeyp, pkey, orig_data)) == DB_KEYEXIST) + continue; + else if (ret != 0) + goto err; + } + if (nskey == 0) + continue; + + /* + * Open a cursor in this secondary. + * + * Use the same locker ID as our primary cursor, so that + * we're guaranteed that the locks don't conflict (e.g. in CDB + * or if we're subdatabases that share and want to lock a + * metadata page). + */ + if ((ret = __db_cursor_int(sdbp, dbc->thread_info, dbc->txn, + sdbp->type, PGNO_INVALID, 0, dbc->locker, &sdbc)) != 0) + goto err; + + /* + * If we're in CDB, updates will fail since the new cursor + * isn't a writer. However, we hold the WRITE lock in the + * primary and will for as long as our new cursor lasts, + * and the primary and secondary share a lock file ID, + * so it's safe to consider this a WRITER. The close + * routine won't try to put anything because we don't + * really have a lock. + */ + if (CDB_LOCKING(env)) { + DB_ASSERT(env, sdbc->mylock.off == LOCK_INVALID); + F_SET(sdbc, DBC_WRITER); + } + + /* + * Swap the primary key to the byte order of this secondary, if + * necessary. By doing this now, we can compare directly + * against the data already in the secondary without having to + * swap it after reading. + */ + SWAP_IF_NEEDED(sdbp, pkey); + + for (; nskey > 0 && ret == 0; nskey--, tskeyp++) { + /* Skip this key if it is already in the database. */ + if (!F_ISSET(tskeyp, DB_DBT_ISSET)) + continue; + + /* + * There are three cases here-- + * 1) The secondary supports sorted duplicates. + * If we attempt to put a secondary/primary pair + * that already exists, that's a duplicate + * duplicate, and c_put will return DB_KEYEXIST + * (see __db_duperr). This will leave us with + * exactly one copy of the secondary/primary pair, + * and this is just right--we'll avoid deleting it + * later, as the old and new secondaries will + * match (since the old secondary is the dup dup + * that's already there). + * 2) The secondary supports duplicates, but they're not + * sorted. We need to avoid putting a duplicate + * duplicate, because the matching old and new + * secondaries will prevent us from deleting + * anything and we'll wind up with two secondary + * records that point to the same primary key. Do + * a c_get(DB_GET_BOTH); only do the put if the + * secondary doesn't exist. + * 3) The secondary doesn't support duplicates at all. + * In this case, secondary keys must be unique; + * if another primary key already exists for this + * secondary key, we have to either overwrite it + * or not put this one, and in either case we've + * corrupted the secondary index. Do a + * c_get(DB_SET). If the secondary/primary pair + * already exists, do nothing; if the secondary + * exists with a different primary, return an + * error; and if the secondary does not exist, + * put it. + */ + if (!F_ISSET(sdbp, DB_AM_DUP)) { + /* Case 3. */ + memset(&oldpkey, 0, sizeof(DBT)); + F_SET(&oldpkey, DB_DBT_MALLOC); + ret = __dbc_get(sdbc, + tskeyp, &oldpkey, rmw | DB_SET); + if (ret == 0) { + cmp = __bam_defcmp(sdbp, + &oldpkey, pkey); + __os_ufree(env, oldpkey.data); + /* + * If the secondary key is unchanged, + * skip the put and go on to the next + * one. + */ + if (cmp == 0) + continue; + + __db_errx(env, DB_STR("0695", + "Put results in a non-unique secondary key in an " + "index not configured to support duplicates")); + ret = EINVAL; + } + if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY) + break; + } else if (!F_ISSET(sdbp, DB_AM_DUPSORT)) { + /* Case 2. */ + DB_INIT_DBT(tempskey, + tskeyp->data, tskeyp->size); + DB_INIT_DBT(temppkey, + pkey->data, pkey->size); + ret = __dbc_get(sdbc, &tempskey, &temppkey, + rmw | DB_GET_BOTH); + if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY) + break; + } + + ret = __dbc_put(sdbc, tskeyp, pkey, + DB_UPDATE_SECONDARY); + + /* + * We don't know yet whether this was a put-overwrite + * that in fact changed nothing. If it was, we may get + * DB_KEYEXIST. This is not an error. + */ + if (ret == DB_KEYEXIST) + ret = 0; + } + + /* Make sure the primary key is back in native byte-order. */ + SWAP_IF_NEEDED(sdbp, pkey); + + if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0) + ret = t_ret; + + if (ret != 0) + goto err; + + /* + * Mark that we have a key for this secondary so we can check + * it later before deleting the old one. We can't set it + * earlier or it would be cleared in the calls above. + */ + F_SET(skeyp, DB_DBT_ISSET); + } +err: if (sdbp != NULL && + (t_ret = __db_s_done(sdbp, dbc->txn)) != 0 && ret == 0) + ret = t_ret; + COMPQUIET(s_count, 0); + return (ret); +} + +static int +__dbc_put_primary(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DB *dbp, *sdbp; + DBC *dbc_n, *pdbc; + DBT oldkey, olddata, newdata; + DBT *all_skeys, *skeyp, *tskeyp; + ENV *env; + int ret, t_ret, s_count; + u_int32_t nskey, put_state, rmw; + + dbp = dbc->dbp; + env = dbp->env; + t_ret = 0; + put_state = 0; + sdbp = NULL; + pdbc = dbc_n = NULL; + all_skeys = NULL; + memset(&newdata, 0, sizeof(DBT)); + memset(&olddata, 0, sizeof(DBT)); + + /* + * We do multiple cursor operations in some cases and subsequently + * access the data DBT information. Set DB_DBT_MALLOC so we don't risk + * modification of the data between our uses of it. + */ + F_SET(&olddata, DB_DBT_MALLOC); + + /* + * We have at least one secondary which we may need to update. + * + * There is a rather vile locking issue here. Secondary gets + * will always involve acquiring a read lock in the secondary, + * then acquiring a read lock in the primary. Ideally, we + * would likewise perform puts by updating all the secondaries + * first, then doing the actual put in the primary, to avoid + * deadlock (since having multiple threads doing secondary + * gets and puts simultaneously is probably a common case). + * + * However, if this put is a put-overwrite--and we have no way to + * tell in advance whether it will be--we may need to delete + * an outdated secondary key. In order to find that old + * secondary key, we need to get the record we're overwriting, + * before we overwrite it. + * + * (XXX: It would be nice to avoid this extra get, and have the + * underlying put routines somehow pass us the old record + * since they need to traverse the tree anyway. I'm saving + * this optimization for later, as it's a lot of work, and it + * would be hard to fit into this locking paradigm anyway.) + * + * The simple thing to do would be to go get the old record before + * we do anything else. Unfortunately, though, doing so would + * violate our "secondary, then primary" lock acquisition + * ordering--even in the common case where no old primary record + * exists, we'll still acquire and keep a lock on the page where + * we're about to do the primary insert. + * + * To get around this, we do the following gyrations, which + * hopefully solve this problem in the common case: + * + * 1) If this is a c_put(DB_CURRENT), go ahead and get the + * old record. We already hold the lock on this page in + * the primary, so no harm done, and we'll need the primary + * key (which we weren't passed in this case) to do any + * secondary puts anyway. + * If this is a put(DB_APPEND), then we need to insert the item, + * so that we can know the key value. So go ahead and insert. In + * the case of a put(DB_APPEND) without secondaries it is + * implemented in the __db_put method as an optimization. + * + * 2) If we're doing a partial put, we need to perform the + * get on the primary key right away, since we don't have + * the whole datum that the secondary key is based on. + * We may also need to pad out the record if the primary + * has a fixed record length. + * + * 3) Loop through the secondary indices, putting into each a + * new secondary key that corresponds to the new record. + * + * 4) If we haven't done so in (1) or (2), get the old primary + * key/data pair. If one does not exist--the common case--we're + * done with secondary indices, and can go straight on to the + * primary put. + * + * 5) If we do have an old primary key/data pair, however, we need + * to loop through all the secondaries a second time and delete + * the old secondary in each. + */ + s_count = __db_s_count(dbp); + if ((ret = __os_calloc(env, + (u_int)s_count, sizeof(DBT), &all_skeys)) != 0) + goto err; + + /* + * Primary indices can't have duplicates, so only DB_APPEND, + * DB_CURRENT, DB_KEYFIRST, and DB_KEYLAST make any sense. Other flags + * should have been caught by the checking routine, but + * add a sprinkling of paranoia. + */ + DB_ASSERT(env, flags == DB_APPEND || flags == DB_CURRENT || + flags == DB_KEYFIRST || flags == DB_KEYLAST || + flags == DB_NOOVERWRITE || flags == DB_OVERWRITE_DUP); + + /* + * We'll want to use DB_RMW in a few places, but it's only legal + * when locking is on. + */ + rmw = STD_LOCKING(dbc) ? DB_RMW : 0; + if (rmw) + FLD_SET(put_state, DBC_PUT_RMW); + + /* Resolve the primary key if required (Step 1). */ + if (flags == DB_CURRENT) { + if ((ret = __dbc_put_resolve_key(dbc, + &oldkey, &olddata, &put_state, flags)) != 0) + goto err; + key = &oldkey; + } else if (flags == DB_APPEND) { + if ((ret = __dbc_put_append(dbc, + key, data, &put_state, flags)) != 0) + goto err; + } + + /* + * PUT_NOOVERWRITE with secondaries is a troublesome case. We need + * to check that the insert will work prior to making any changes + * to secondaries. Try to work within the locking constraints outlined + * above. + * + * This is DB->put (DB_NOOVERWRITE). DBC->put(DB_NODUPDATA) is not + * relevant since it is only valid on DBs that support duplicates, + * which primaries with secondaries can't have. + */ + if (flags == DB_NOOVERWRITE) { + /* Don't bother retrieving the data. */ + F_SET(key, DB_DBT_ISSET); + olddata.dlen = 0; + olddata.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM; + ret = __dbc_get(dbc, key, &olddata, DB_SET); + if (ret == 0) { + ret = DB_KEYEXIST; + goto done; + } else if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY) + goto err; + } + + /* + * Check for partial puts using DB_DBT_PARTIAL (Step 2). + */ + if (F_ISSET(data, DB_DBT_PARTIAL)) { + if ((ret = __dbc_put_partial(dbc, + key, data, &olddata, &newdata, &put_state, flags)) != 0) + goto err; + } else { + newdata = *data; + } + + /* + * Check for partial puts, with fixed length record databases (Step 2). + */ + if ((dbp->type == DB_RECNO && F_ISSET(dbp, DB_AM_FIXEDLEN)) || + (dbp->type == DB_QUEUE)) { + if ((ret = __dbc_put_fixed_len(dbc, data, &newdata)) != 0) + goto err; + } + + /* Validate any foreign databases, and update secondaries. (Step 3). */ + if ((ret = __dbc_put_secondaries(dbc, key, &newdata, + &olddata, s_count, all_skeys, &put_state)) + != 0) + goto err; + /* + * If we've already got the old primary key/data pair, the secondary + * updates are already done. + */ + if (FLD_ISSET(put_state, DBC_PUT_HAVEREC)) + goto done; + + /* + * If still necessary, go get the old primary key/data. (Step 4.) + * + * See the comments in step 2. This is real familiar. + */ + if ((ret = __dbc_idup(dbc, &pdbc, 0)) != 0) + goto err; + DB_ASSERT(env, flags != DB_CURRENT); + F_SET(key, DB_DBT_ISSET); + ret = __dbc_get(pdbc, key, &olddata, rmw | DB_SET); + if (ret == DB_KEYEMPTY || ret == DB_NOTFOUND) { + FLD_SET(put_state, DBC_PUT_NODEL); + ret = 0; + } + if ((t_ret = __dbc_close(pdbc)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + + /* + * Check whether we do in fact have an old record we may need to + * delete. (Step 5). + */ + if (FLD_ISSET(put_state, DBC_PUT_NODEL)) + goto done; + + for (ret = __db_s_first(dbp, &sdbp), skeyp = all_skeys; + sdbp != NULL && ret == 0; + ret = __db_s_next(&sdbp, dbc->txn), skeyp++) { + DB_ASSERT(env, skeyp - all_skeys < s_count); + /* + * Don't process this secondary if the key is immutable. We + * know that the old record exists, so this optimization can + * always be used. + */ + if (FLD_ISSET(sdbp->s_assoc_flags, DB_ASSOC_IMMUTABLE_KEY)) + continue; + + if ((ret = __dbc_del_oldskey(sdbp, dbc, + skeyp, key, &olddata)) != 0 && ret != DB_KEYEXIST) + goto err; + } + if (ret != 0) + goto err; + +done: +err: + if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && ret == 0) + ret = t_ret; + + /* If newdata or olddata were used, free their buffers. */ + if (newdata.data != NULL && newdata.data != data->data) + __os_free(env, newdata.data); + if (olddata.data != NULL) + __os_ufree(env, olddata.data); + + CDB_LOCKING_DONE(env, dbc); + + if (sdbp != NULL && + (t_ret = __db_s_done(sdbp, dbc->txn)) != 0 && ret == 0) + ret = t_ret; + + for (skeyp = all_skeys; skeyp - all_skeys < s_count; skeyp++) { + if (F_ISSET(skeyp, DB_DBT_MULTIPLE)) { + for (nskey = skeyp->size, tskeyp = (DBT *)skeyp->data; + nskey > 0; + nskey--, tskeyp++) + FREE_IF_NEEDED(env, tskeyp); + } + FREE_IF_NEEDED(env, skeyp); + } + if (all_skeys != NULL) + __os_free(env, all_skeys); + return (ret); +} + +/* + * __dbc_put -- + * Put using a cursor. + * + * PUBLIC: int __dbc_put __P((DBC *, DBT *, DBT *, u_int32_t)); + */ +int +__dbc_put(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DB *dbp; + int ret; + + dbp = dbc->dbp; + ret = 0; + F_CLR(dbc, DBC_ERROR); + + /* + * Putting to secondary indices is forbidden; when we need to + * internally update one, we're called with a private flag, + * DB_UPDATE_SECONDARY, which does the right thing but won't return an + * error during flag checking. + * + * As a convenience, many places that want the default DB_KEYLAST + * behavior call DBC->put with flags == 0. Protect lower-level code + * here by translating that. + * + * Lastly, the DB_OVERWRITE_DUP flag is equivalent to DB_KEYLAST unless + * there are sorted duplicates. Limit the number of places that need + * to test for it explicitly. + */ + if (flags == DB_UPDATE_SECONDARY || flags == 0 || + (flags == DB_OVERWRITE_DUP && !F_ISSET(dbp, DB_AM_DUPSORT))) + flags = DB_KEYLAST; + + CDB_LOCKING_INIT(dbc->env, dbc); + + PERFMON6(env, db, put, dbp->fname, dbp->dname, + dbc->txn == NULL ? 0 : dbc->txn->txnid, key, data, flags); + /* + * Check to see if we are a primary and have secondary indices. + * If we are not, we save ourselves a good bit of trouble and + * just skip to the "normal" put. + */ + if (DB_IS_PRIMARY(dbp) && + ((ret = __dbc_put_primary(dbc, key, data, flags)) != 0)) + return (ret); + + /* + * If this is an append operation, the insert was done prior to the + * secondary updates, so we are finished. + */ + if (flags == DB_APPEND) + return (ret); + +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbp)) + return (__bamc_compress_put(dbc, key, data, flags)); +#endif + + return (__dbc_iput(dbc, key, data, flags)); +} + +/* + * __dbc_iput -- + * Implementation of put using a cursor. + * + * PUBLIC: int __dbc_iput __P((DBC *, DBT *, DBT *, u_int32_t)); + */ +int +__dbc_iput(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DBC *dbc_n, *oldopd, *opd; + db_pgno_t pgno; + int ret, t_ret; + u_int32_t tmp_flags; + + /* + * Cursor Cleanup Note: + * All of the cursors passed to the underlying access methods by this + * routine are duplicated cursors. On return, any referenced pages + * will be discarded, and, if the cursor is not intended to be used + * again, the close function will be called. So, pages/locks that + * the cursor references do not need to be resolved by the underlying + * functions. + */ + dbc_n = NULL; + ret = t_ret = 0; + + /* + * If we have an off-page duplicates cursor, and the operation applies + * to it, perform the operation. Duplicate the cursor and call the + * underlying function. + * + * Off-page duplicate trees are locked in the primary tree, that is, + * we acquire a write lock in the primary tree and no locks in the + * off-page dup tree. If the put operation is done in an off-page + * duplicate tree, call the primary cursor's upgrade routine first. + */ + if (dbc->internal->opd != NULL && + (flags == DB_AFTER || flags == DB_BEFORE || flags == DB_CURRENT)) { + /* + * A special case for hash off-page duplicates. Hash doesn't + * support (and is documented not to support) put operations + * relative to a cursor which references an already deleted + * item. For consistency, apply the same criteria to off-page + * duplicates as well. + */ + if (dbc->dbtype == DB_HASH && F_ISSET( + ((BTREE_CURSOR *)(dbc->internal->opd->internal)), + C_DELETED)) { + ret = DB_NOTFOUND; + goto err; + } + + if ((ret = dbc->am_writelock(dbc)) != 0 || + (ret = __dbc_dup(dbc, &dbc_n, DB_POSITION)) != 0) + goto err; + opd = dbc_n->internal->opd; + if ((ret = opd->am_put( + opd, key, data, flags, NULL)) != 0) + goto err; + goto done; + } + + /* + * Perform an operation on the main cursor. Duplicate the cursor, + * and call the underlying function. + */ + if (flags == DB_AFTER || flags == DB_BEFORE || flags == DB_CURRENT) + tmp_flags = DB_POSITION; + else + tmp_flags = 0; + + /* + * If this cursor is going to be closed immediately, we don't + * need to take precautions to clean it up on error. + */ + if (F_ISSET(dbc, DBC_TRANSIENT | DBC_PARTITIONED)) + dbc_n = dbc; + else if ((ret = __dbc_idup(dbc, &dbc_n, tmp_flags)) != 0) + goto err; + + pgno = PGNO_INVALID; + if ((ret = dbc_n->am_put(dbc_n, key, data, flags, &pgno)) != 0) + goto err; + + /* + * We may be referencing a new off-page duplicates tree. Acquire + * a new cursor and call the underlying function. + */ + if (pgno != PGNO_INVALID) { + oldopd = dbc_n->internal->opd; + if ((ret = __dbc_newopd(dbc, pgno, oldopd, &opd)) != 0) { + dbc_n->internal->opd = opd; + goto err; + } + + dbc_n->internal->opd = opd; + opd->internal->pdbc = dbc_n; + + if (flags == DB_NOOVERWRITE) + flags = DB_KEYLAST; + if ((ret = opd->am_put( + opd, key, data, flags, NULL)) != 0) + goto err; + } + +done: +err: /* Cleanup and cursor resolution. */ + if (dbc_n != NULL && !DB_RETOK_DBCPUT(ret)) + F_SET(dbc_n, DBC_ERROR); + if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __dbc_del_oldskey -- + * Delete an old secondary key, if necessary. + * Returns DB_KEYEXIST if the new and old keys match.. + */ +static int +__dbc_del_oldskey(sdbp, dbc, skey, pkey, olddata) + DB *sdbp; + DBC *dbc; + DBT *skey, *pkey, *olddata; +{ + DB *dbp; + DBC *sdbc; + DBT *toldskeyp, *tskeyp; + DBT oldskey, temppkey, tempskey; + ENV *env; + int ret, t_ret; + u_int32_t i, noldskey, nsame, nskey, rmw; + + sdbc = NULL; + dbp = sdbp->s_primary; + env = dbp->env; + nsame = 0; + rmw = STD_LOCKING(dbc) ? DB_RMW : 0; + + /* + * Get the old secondary key. + */ + memset(&oldskey, 0, sizeof(DBT)); + if ((ret = sdbp->s_callback(sdbp, pkey, olddata, &oldskey)) != 0) { + if (ret == DB_DONOTINDEX || + (F_ISSET(&oldskey, DB_DBT_MULTIPLE) && oldskey.size == 0)) + /* There's no old key to delete. */ + ret = 0; + return (ret); + } + + if (F_ISSET(&oldskey, DB_DBT_MULTIPLE)) { +#ifdef DIAGNOSTIC + __db_check_skeyset(sdbp, &oldskey); +#endif + toldskeyp = (DBT *)oldskey.data; + noldskey = oldskey.size; + } else { + toldskeyp = &oldskey; + noldskey = 1; + } + + if (F_ISSET(skey, DB_DBT_MULTIPLE)) { + nskey = skey->size; + skey = (DBT *)skey->data; + } else + nskey = F_ISSET(skey, DB_DBT_ISSET) ? 1 : 0; + + for (; noldskey > 0 && ret == 0; noldskey--, toldskeyp++) { + /* + * Check whether this old secondary key is also a new key + * before we delete it. Note that bt_compare is (and must be) + * set no matter what access method we're in. + */ + for (i = 0, tskeyp = skey; i < nskey; i++, tskeyp++) + if (((BTREE *)sdbp->bt_internal)->bt_compare(sdbp, + toldskeyp, tskeyp) == 0) { + nsame++; + F_CLR(tskeyp, DB_DBT_ISSET); + break; + } + + if (i < nskey) { + FREE_IF_NEEDED(env, toldskeyp); + continue; + } + + if (sdbc == NULL) { + if ((ret = __db_cursor_int(sdbp, + dbc->thread_info, dbc->txn, sdbp->type, + PGNO_INVALID, 0, dbc->locker, &sdbc)) != 0) + goto err; + if (CDB_LOCKING(env)) { + DB_ASSERT(env, + sdbc->mylock.off == LOCK_INVALID); + F_SET(sdbc, DBC_WRITER); + } + } + + /* + * Don't let c_get(DB_GET_BOTH) stomp on our data. Use + * temporary DBTs instead. + */ + SWAP_IF_NEEDED(sdbp, pkey); + DB_INIT_DBT(temppkey, pkey->data, pkey->size); + DB_INIT_DBT(tempskey, toldskeyp->data, toldskeyp->size); + if ((ret = __dbc_get(sdbc, + &tempskey, &temppkey, rmw | DB_GET_BOTH)) == 0) + ret = __dbc_del(sdbc, DB_UPDATE_SECONDARY); + else if (ret == DB_NOTFOUND) + ret = __db_secondary_corrupt(dbp); + SWAP_IF_NEEDED(sdbp, pkey); + FREE_IF_NEEDED(env, toldskeyp); + } + +err: for (; noldskey > 0; noldskey--, toldskeyp++) + FREE_IF_NEEDED(env, toldskeyp); + FREE_IF_NEEDED(env, &oldskey); + if (sdbc != NULL && (t_ret = __dbc_close(sdbc)) != 0 && ret == 0) + ret = t_ret; + if (ret == 0 && nsame == nskey) + return (DB_KEYEXIST); + return (ret); +} + +/* + * __db_duperr() + * Error message: we don't currently support sorted duplicate duplicates. + * PUBLIC: int __db_duperr __P((DB *, u_int32_t)); + */ +int +__db_duperr(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + /* + * If we run into this error while updating a secondary index, + * don't yell--there's no clean way to pass DB_NODUPDATA in along + * with DB_UPDATE_SECONDARY, but we may run into this problem + * in a normal, non-error course of events. + * + * !!! + * If and when we ever permit duplicate duplicates in sorted-dup + * databases, we need to either change the secondary index code + * to check for dup dups, or we need to maintain the implicit + * "DB_NODUPDATA" behavior for databases with DB_AM_SECONDARY set. + */ + if (flags != DB_NODUPDATA && !F_ISSET(dbp, DB_AM_SECONDARY)) + __db_errx(dbp->env, DB_STR("0696", + "Duplicate data items are not supported with sorted data")); + return (DB_KEYEXIST); +} + +/* + * __dbc_cleanup -- + * Clean up duplicate cursors. + * + * PUBLIC: int __dbc_cleanup __P((DBC *, DBC *, int)); + */ +int +__dbc_cleanup(dbc, dbc_n, failed) + DBC *dbc, *dbc_n; + int failed; +{ + DB *dbp; + DBC *opd; + DBC_INTERNAL *internal; + DB_MPOOLFILE *mpf; + int ret, t_ret; + + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_OFF(dbc->thread_info); + + dbp = dbc->dbp; + mpf = dbp->mpf; + internal = dbc->internal; + ret = 0; + + /* Discard any pages we're holding. */ + if (internal->page != NULL) { + if ((t_ret = __memp_fput(mpf, dbc->thread_info, + internal->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + internal->page = NULL; + } + opd = internal->opd; + if (opd != NULL && opd->internal->page != NULL) { + if ((t_ret = __memp_fput(mpf, dbc->thread_info, + opd->internal->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + opd->internal->page = NULL; + } + + /* + * If dbc_n is NULL, there's no internal cursor swapping to be done + * and no dbc_n to close--we probably did the entire operation on an + * offpage duplicate cursor. Just return. + * + * If dbc and dbc_n are the same, we're either inside a DB->{put/get} + * operation, and as an optimization we performed the operation on + * the main cursor rather than on a duplicated one, or we're in a + * bulk get that can't have moved the cursor (DB_MULTIPLE with the + * initial c_get operation on an off-page dup cursor). Just + * return--either we know we didn't move the cursor, or we're going + * to close it before we return to application code, so we're sure + * not to visibly violate the "cursor stays put on error" rule. + */ + if (dbc_n == NULL || dbc == dbc_n) + goto done; + + if (dbc_n->internal->page != NULL) { + if ((t_ret = __memp_fput(mpf, dbc->thread_info, + dbc_n->internal->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + dbc_n->internal->page = NULL; + } + opd = dbc_n->internal->opd; + if (opd != NULL && opd->internal->page != NULL) { + if ((t_ret = __memp_fput(mpf, dbc->thread_info, + opd->internal->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + opd->internal->page = NULL; + } + + /* + * If we didn't fail before entering this routine or just now when + * freeing pages, swap the interesting contents of the old and new + * cursors. + */ + if (!failed && ret == 0) { + if (opd != NULL) + opd->internal->pdbc = dbc; + if (internal->opd != NULL) + internal->opd->internal->pdbc = dbc_n; + dbc->internal = dbc_n->internal; + dbc_n->internal = internal; + } + + /* + * Close the cursor we don't care about anymore. The close can fail, + * but we only expect DB_LOCK_DEADLOCK failures. This violates our + * "the cursor is unchanged on error" semantics, but since all you can + * do with a DB_LOCK_DEADLOCK failure is close the cursor, I believe + * that's OK. + * + * XXX + * There's no way to recover from failure to close the old cursor. + * All we can do is move to the new position and return an error. + * + * XXX + * We might want to consider adding a flag to the cursor, so that any + * subsequent operations other than close just return an error? + */ + if ((t_ret = __dbc_close(dbc_n)) != 0 && ret == 0) + ret = t_ret; + + /* + * If this was an update that is supporting dirty reads + * then we may have just swapped our read for a write lock + * which is held by the surviving cursor. We need + * to explicitly downgrade this lock. The closed cursor + * may only have had a read lock. + */ + if (ret == 0 && failed == 0 && F_ISSET(dbp, DB_AM_READ_UNCOMMITTED) && + dbc->internal->lock_mode == DB_LOCK_WRITE && + (ret = __TLPUT(dbc, dbc->internal->lock)) == 0) + dbc->internal->lock_mode = DB_LOCK_WWRITE; + +done: + if (F_ISSET(dbc, DBC_OPD)) + LOCK_CHECK_ON(dbc->thread_info); + + return (ret); +} + +/* + * __dbc_secondary_get_pp -- + * This wrapper function for DBC->pget() is the DBC->get() function + * for a secondary index cursor. + * + * PUBLIC: int __dbc_secondary_get_pp __P((DBC *, DBT *, DBT *, u_int32_t)); + */ +int +__dbc_secondary_get_pp(dbc, skey, data, flags) + DBC *dbc; + DBT *skey, *data; + u_int32_t flags; +{ + DB_ASSERT(dbc->env, F_ISSET(dbc->dbp, DB_AM_SECONDARY)); + return (__dbc_pget_pp(dbc, skey, NULL, data, flags)); +} + +/* + * __dbc_pget -- + * Get a primary key/data pair through a secondary index. + * + * PUBLIC: int __dbc_pget __P((DBC *, DBT *, DBT *, DBT *, u_int32_t)); + */ +int +__dbc_pget(dbc, skey, pkey, data, flags) + DBC *dbc; + DBT *skey, *pkey, *data; + u_int32_t flags; +{ + DB *pdbp, *sdbp; + DBC *dbc_n, *pdbc; + DBT nullpkey; + u_int32_t save_pkey_flags, tmp_flags, tmp_read_locking, tmp_rmw; + int pkeymalloc, ret, t_ret; + + sdbp = dbc->dbp; + pdbp = sdbp->s_primary; + dbc_n = NULL; + pkeymalloc = t_ret = 0; + + /* + * The challenging part of this function is getting the behavior + * right for all the various permutations of DBT flags. The + * next several blocks handle the various cases we need to + * deal with specially. + */ + + /* + * We may be called with a NULL pkey argument, if we've been + * wrapped by a 2-DBT get call. If so, we need to use our + * own DBT. + */ + if (pkey == NULL) { + memset(&nullpkey, 0, sizeof(DBT)); + pkey = &nullpkey; + } + + /* Clear OR'd in additional bits so we can check for flag equality. */ + tmp_rmw = LF_ISSET(DB_RMW); + LF_CLR(DB_RMW); + + SET_READ_LOCKING_FLAGS(dbc, tmp_read_locking); + /* + * DB_GET_RECNO is a special case, because we're interested not in + * the primary key/data pair, but rather in the primary's record + * number. + */ + if (flags == DB_GET_RECNO) { + if (tmp_rmw) + F_SET(dbc, DBC_RMW); + F_SET(dbc, tmp_read_locking); + ret = __dbc_pget_recno(dbc, pkey, data, flags); + if (tmp_rmw) + F_CLR(dbc, DBC_RMW); + /* Clear the temp flags, but leave WAS_READ_COMMITTED. */ + F_CLR(dbc, tmp_read_locking & ~DBC_WAS_READ_COMMITTED); + return (ret); + } + + /* + * If the DBTs we've been passed don't have any of the + * user-specified memory management flags set, we want to make sure + * we return values using the DBTs dbc->rskey, dbc->rkey, and + * dbc->rdata, respectively. + * + * There are two tricky aspects to this: first, we need to pass + * skey and pkey *in* to the initial c_get on the secondary key, + * since either or both may be looked at by it (depending on the + * get flag). Second, we must not use a normal DB->get call + * on the secondary, even though that's what we want to accomplish, + * because the DB handle may be free-threaded. Instead, + * we open a cursor, then take steps to ensure that we actually use + * the rkey/rdata from the *secondary* cursor. + * + * We accomplish all this by passing in the DBTs we started out + * with to the c_get, but swapping the contents of rskey and rkey, + * respectively, into rkey and rdata; __db_ret will treat them like + * the normal key/data pair in a c_get call, and will realloc them as + * need be (this is "step 1"). Then, for "step 2", we swap back + * rskey/rkey/rdata to normal, and do a get on the primary with the + * secondary dbc appointed as the owner of the returned-data memory. + * + * Note that in step 2, we copy the flags field in case we need to + * pass down a DB_DBT_PARTIAL or other flag that is compatible with + * letting DB do the memory management. + */ + + /* + * It is correct, though slightly sick, to attempt a partial get of a + * primary key. However, if we do so here, we'll never find the + * primary record; clear the DB_DBT_PARTIAL field of pkey just for the + * duration of the next call. + */ + save_pkey_flags = pkey->flags; + F_CLR(pkey, DB_DBT_PARTIAL); + + /* + * Now we can go ahead with the meat of this call. First, get the + * primary key from the secondary index. (What exactly we get depends + * on the flags, but the underlying cursor get will take care of the + * dirty work.) Duplicate the cursor, in case the later get on the + * primary fails. + */ + switch (flags) { + case DB_CURRENT: + case DB_GET_BOTHC: + case DB_NEXT: + case DB_NEXT_DUP: + case DB_NEXT_NODUP: + case DB_PREV: + case DB_PREV_DUP: + case DB_PREV_NODUP: + tmp_flags = DB_POSITION; + break; + default: + tmp_flags = 0; + break; + } + + if (F_ISSET(dbc, DBC_PARTITIONED | DBC_TRANSIENT)) + dbc_n = dbc; + else { + if ((ret = __dbc_dup(dbc, &dbc_n, tmp_flags)) != 0) + return (ret); + F_SET(dbc_n, DBC_TRANSIENT); + } + + if (tmp_rmw) + F_SET(dbc_n, DBC_RMW); + F_SET(dbc_n, tmp_read_locking); + + /* + * If we've been handed a primary key, it will be in native byte order, + * so we need to swap it before reading from the secondary. + */ + if (flags == DB_GET_BOTH || flags == DB_GET_BOTHC || + flags == DB_GET_BOTH_RANGE) + SWAP_IF_NEEDED(sdbp, pkey); + +retry: /* Step 1. */ + dbc_n->rdata = dbc->rkey; + dbc_n->rkey = dbc->rskey; + ret = __dbc_get(dbc_n, skey, pkey, flags); + /* Restore pkey's flags in case we stomped the PARTIAL flag. */ + pkey->flags = save_pkey_flags; + + /* + * We need to swap the primary key to native byte order if we read it + * successfully, or if we swapped it on entry above. We can't return + * with the application's data modified. + */ + if (ret == 0 || flags == DB_GET_BOTH || flags == DB_GET_BOTHC || + flags == DB_GET_BOTH_RANGE) + SWAP_IF_NEEDED(sdbp, pkey); + + if (ret != 0) + goto err; + + /* + * Now we're ready for "step 2". If either or both of pkey and data do + * not have memory management flags set--that is, if DB is managing + * their memory--we need to swap around the rkey/rdata structures so + * that we don't wind up trying to use memory managed by the primary + * database cursor, which we'll close before we return. + * + * !!! + * If you're carefully following the bouncing ball, you'll note that in + * the DB-managed case, the buffer hanging off of pkey is the same as + * dbc->rkey->data. This is just fine; we may well realloc and stomp + * on it when we return, if we're doing a DB_GET_BOTH and need to + * return a different partial or key (depending on the comparison + * function), but this is safe. + * + * !!! + * We need to use __db_cursor_int here rather than simply calling + * pdbp->cursor, because otherwise, if we're in CDB, we'll allocate a + * new locker ID and leave ourselves open to deadlocks. (Even though + * we're only acquiring read locks, we'll still block if there are any + * waiters.) + */ + if ((ret = __db_cursor_int(pdbp, dbc->thread_info, + dbc->txn, pdbp->type, PGNO_INVALID, 0, dbc->locker, &pdbc)) != 0) + goto err; + + F_SET(pdbc, tmp_read_locking | + F_ISSET(dbc, DBC_READ_UNCOMMITTED | DBC_READ_COMMITTED | DBC_RMW)); + + /* + * We're about to use pkey a second time. If DB_DBT_MALLOC is set on + * it, we'll leak the memory we allocated the first time. Thus, set + * DB_DBT_REALLOC instead so that we reuse that memory instead of + * leaking it. + * + * Alternatively, if the application is handling copying for pkey, we + * need to take a copy now. The copy will be freed on exit from + * __dbc_pget_pp (and we must be coming through there if DB_DBT_USERCOPY + * is set). In the case of DB_GET_BOTH_RANGE, the pkey supplied by + * the application has already been copied in but the value may have + * changed in the search. In that case, free the original copy and get + * a new one. + * + * !!! + * This assumes that the user must always specify a compatible realloc + * function if a malloc function is specified. I think this is a + * reasonable requirement. + */ + if (F_ISSET(pkey, DB_DBT_MALLOC)) { + F_CLR(pkey, DB_DBT_MALLOC); + F_SET(pkey, DB_DBT_REALLOC); + pkeymalloc = 1; + } else if (F_ISSET(pkey, DB_DBT_USERCOPY)) { + if (flags == DB_GET_BOTH_RANGE) + __dbt_userfree(sdbp->env, NULL, pkey, NULL); + if ((ret = __dbt_usercopy(sdbp->env, pkey)) != 0) + goto err; + } + + /* + * Do the actual get. Set DBC_TRANSIENT since we don't care about + * preserving the position on error, and it's faster. SET_RET_MEM so + * that the secondary DBC owns any returned-data memory. + */ + F_SET(pdbc, DBC_TRANSIENT); + SET_RET_MEM(pdbc, dbc); + ret = __dbc_get(pdbc, pkey, data, DB_SET); + + /* + * If the item wasn't found in the primary, this is a bug; our + * secondary has somehow gotten corrupted, and contains elements that + * don't correspond to anything in the primary. Complain. + */ + + /* Now close the primary cursor. */ + if ((t_ret = __dbc_close(pdbc)) != 0 && ret == 0) + ret = t_ret; + + else if (ret == DB_NOTFOUND) { + if (!F_ISSET(pdbc, DBC_READ_UNCOMMITTED)) + ret = __db_secondary_corrupt(pdbp); + else switch (flags) { + case DB_GET_BOTHC: + case DB_NEXT: + case DB_NEXT_DUP: + case DB_NEXT_NODUP: + case DB_PREV: + case DB_PREV_DUP: + case DB_PREV_NODUP: + PERFMON5(env, race, dbc_get, + sdbp->fname, sdbp->dname, ret, flags, pkey); + goto retry; + default: + break; + } + } + +err: /* Cleanup and cursor resolution. */ + if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && ret == 0) + ret = t_ret; + if (pkeymalloc) { + /* + * If pkey had a MALLOC flag, we need to restore it; otherwise, + * if the user frees the buffer but reuses the DBT without + * NULL'ing its data field or changing the flags, we may drop + * core. + */ + F_CLR(pkey, DB_DBT_REALLOC); + F_SET(pkey, DB_DBT_MALLOC); + } + + return (ret); +} + +/* + * __dbc_pget_recno -- + * Perform a DB_GET_RECNO c_pget on a secondary index. Returns + * the secondary's record number in the pkey field and the primary's + * in the data field. + */ +static int +__dbc_pget_recno(sdbc, pkey, data, flags) + DBC *sdbc; + DBT *pkey, *data; + u_int32_t flags; +{ + DB *pdbp, *sdbp; + DBC *pdbc; + DBT discardme, primary_key; + ENV *env; + db_recno_t oob; + u_int32_t rmw; + int ret, t_ret; + + sdbp = sdbc->dbp; + pdbp = sdbp->s_primary; + env = sdbp->env; + pdbc = NULL; + ret = t_ret = 0; + + rmw = LF_ISSET(DB_RMW); + + memset(&discardme, 0, sizeof(DBT)); + F_SET(&discardme, DB_DBT_USERMEM | DB_DBT_PARTIAL); + + oob = RECNO_OOB; + + /* + * If the primary is an rbtree, we want its record number, whether + * or not the secondary is one too. Fetch the recno into "data". + * + * If it's not an rbtree, return RECNO_OOB in "data". + */ + if (F_ISSET(pdbp, DB_AM_RECNUM)) { + /* + * Get the primary key, so we can find the record number + * in the primary. (We're uninterested in the secondary key.) + */ + memset(&primary_key, 0, sizeof(DBT)); + F_SET(&primary_key, DB_DBT_MALLOC); + if ((ret = __dbc_get(sdbc, + &discardme, &primary_key, rmw | DB_CURRENT)) != 0) + return (ret); + + /* + * Open a cursor on the primary, set it to the right record, + * and fetch its recno into "data". + * + * (See __dbc_pget for comments on the use of __db_cursor_int.) + * + * SET_RET_MEM so that the secondary DBC owns any returned-data + * memory. + */ + if ((ret = __db_cursor_int(pdbp, sdbc->thread_info, sdbc->txn, + pdbp->type, PGNO_INVALID, 0, sdbc->locker, &pdbc)) != 0) + goto perr; + SET_RET_MEM(pdbc, sdbc); + if ((ret = __dbc_get(pdbc, + &primary_key, &discardme, rmw | DB_SET)) != 0) + goto perr; + + ret = __dbc_get(pdbc, &discardme, data, rmw | DB_GET_RECNO); + +perr: __os_ufree(env, primary_key.data); + if (pdbc != NULL && + (t_ret = __dbc_close(pdbc)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + return (ret); + } else if ((ret = __db_retcopy(env, data, &oob, + sizeof(oob), &sdbc->rkey->data, &sdbc->rkey->ulen)) != 0) + return (ret); + + /* + * If the secondary is an rbtree, we want its record number, whether + * or not the primary is one too. Fetch the recno into "pkey". + * + * If it's not an rbtree, return RECNO_OOB in "pkey". + */ + if (F_ISSET(sdbp, DB_AM_RECNUM)) + return (__dbc_get(sdbc, &discardme, pkey, flags)); + else + return (__db_retcopy(env, pkey, &oob, + sizeof(oob), &sdbc->rdata->data, &sdbc->rdata->ulen)); +} + +/* + * __db_wrlock_err -- do not have a write lock. + */ +static int +__db_wrlock_err(env) + ENV *env; +{ + __db_errx(env, DB_STR("0697", "Write attempted on read-only cursor")); + return (EPERM); +} + +/* + * __dbc_del_secondary -- + * Perform a delete operation on a secondary index: call through + * to the primary and delete the primary record that this record + * points to. + * + * Note that deleting the primary record will call c_del on all + * the secondaries, including this one; thus, it is not necessary + * to execute both this function and an actual delete. + */ +static int +__dbc_del_secondary(dbc) + DBC *dbc; +{ + DB *pdbp; + DBC *pdbc; + DBT skey, pkey; + ENV *env; + int ret, t_ret; + u_int32_t rmw; + + pdbp = dbc->dbp->s_primary; + env = pdbp->env; + rmw = STD_LOCKING(dbc) ? DB_RMW : 0; + + /* + * Get the current item that we're pointing at. + * We don't actually care about the secondary key, just + * the primary. + */ + memset(&skey, 0, sizeof(DBT)); + memset(&pkey, 0, sizeof(DBT)); + F_SET(&skey, DB_DBT_PARTIAL | DB_DBT_USERMEM); + if ((ret = __dbc_get(dbc, &skey, &pkey, DB_CURRENT)) != 0) + return (ret); + + SWAP_IF_NEEDED(dbc->dbp, &pkey); + DEBUG_LWRITE(dbc, dbc->txn, "del_secondary", &skey, &pkey, 0); + + /* + * Create a cursor on the primary with our locker ID, + * so that when it calls back, we don't conflict. + * + * We create a cursor explicitly because there's no + * way to specify the same locker ID if we're using + * locking but not transactions if we use the DB->del + * interface. This shouldn't be any less efficient + * anyway. + */ + if ((ret = __db_cursor_int(pdbp, dbc->thread_info, dbc->txn, + pdbp->type, PGNO_INVALID, 0, dbc->locker, &pdbc)) != 0) + return (ret); + + /* + * See comment in __dbc_put--if we're in CDB, + * we already hold the locks we need, and we need to flag + * the cursor as a WRITER so we don't run into errors + * when we try to delete. + */ + if (CDB_LOCKING(env)) { + DB_ASSERT(env, pdbc->mylock.off == LOCK_INVALID); + F_SET(pdbc, DBC_WRITER); + } + + /* + * Set the new cursor to the correct primary key. Then + * delete it. We don't really care about the datum; + * just reuse our skey DBT. + * + * If the primary get returns DB_NOTFOUND, something is amiss-- + * every record in the secondary should correspond to some record + * in the primary. + */ + if ((ret = __dbc_get(pdbc, &pkey, &skey, DB_SET | rmw)) == 0) + ret = __dbc_del(pdbc, 0); + else if (ret == DB_NOTFOUND) + ret = __db_secondary_corrupt(pdbp); + + if ((t_ret = __dbc_close(pdbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __dbc_del_primary -- + * Perform a delete operation on a primary index. Loop through + * all the secondary indices which correspond to this primary + * database, and delete any secondary keys that point at the current + * record. + * + * PUBLIC: int __dbc_del_primary __P((DBC *)); + */ +int +__dbc_del_primary(dbc) + DBC *dbc; +{ + DB *dbp, *sdbp; + DBC *sdbc; + DBT *tskeyp; + DBT data, pkey, skey, temppkey, tempskey; + ENV *env; + u_int32_t nskey, rmw; + int ret, t_ret; + + dbp = dbc->dbp; + env = dbp->env; + sdbp = NULL; + rmw = STD_LOCKING(dbc) ? DB_RMW : 0; + + /* + * If we're called at all, we have at least one secondary. + * (Unfortunately, we can't assert this without grabbing the mutex.) + * Get the current record so that we can construct appropriate + * secondary keys as needed. + */ + memset(&pkey, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + if ((ret = __dbc_get(dbc, &pkey, &data, DB_CURRENT)) != 0) + return (ret); + + memset(&skey, 0, sizeof(DBT)); + for (ret = __db_s_first(dbp, &sdbp); + sdbp != NULL && ret == 0; + ret = __db_s_next(&sdbp, dbc->txn)) { + /* + * Get the secondary key for this secondary and the current + * item. + */ + if ((ret = sdbp->s_callback(sdbp, &pkey, &data, &skey)) != 0) { + /* Not indexing is equivalent to an empty key set. */ + if (ret == DB_DONOTINDEX) { + F_SET(&skey, DB_DBT_MULTIPLE); + skey.size = 0; + } else /* We had a substantive error. Bail. */ + goto err; + } + +#ifdef DIAGNOSTIC + if (F_ISSET(&skey, DB_DBT_MULTIPLE)) + __db_check_skeyset(sdbp, &skey); +#endif + + if (F_ISSET(&skey, DB_DBT_MULTIPLE)) { + tskeyp = (DBT *)skey.data; + nskey = skey.size; + if (nskey == 0) + continue; + } else { + tskeyp = &skey; + nskey = 1; + } + + /* Open a secondary cursor. */ + if ((ret = __db_cursor_int(sdbp, + dbc->thread_info, dbc->txn, sdbp->type, + PGNO_INVALID, 0, dbc->locker, &sdbc)) != 0) + goto err; + /* See comment above and in __dbc_put. */ + if (CDB_LOCKING(env)) { + DB_ASSERT(env, sdbc->mylock.off == LOCK_INVALID); + F_SET(sdbc, DBC_WRITER); + } + + for (; nskey > 0; nskey--, tskeyp++) { + /* + * Set the secondary cursor to the appropriate item. + * Delete it. + * + * We want to use DB_RMW if locking is on; it's only + * legal then, though. + * + * !!! + * Don't stomp on any callback-allocated buffer in skey + * when we do a c_get(DB_GET_BOTH); use a temp DBT + * instead. Similarly, don't allow pkey to be + * invalidated when the cursor is closed. + */ + DB_INIT_DBT(tempskey, tskeyp->data, tskeyp->size); + SWAP_IF_NEEDED(sdbp, &pkey); + DB_INIT_DBT(temppkey, pkey.data, pkey.size); + if ((ret = __dbc_get(sdbc, &tempskey, &temppkey, + DB_GET_BOTH | rmw)) == 0) + ret = __dbc_del(sdbc, DB_UPDATE_SECONDARY); + else if (ret == DB_NOTFOUND) + ret = __db_secondary_corrupt(dbp); + SWAP_IF_NEEDED(sdbp, &pkey); + FREE_IF_NEEDED(env, tskeyp); + } + + if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + + /* + * In the common case where there is a single secondary key, we + * will have freed any application-allocated data in skey + * already. In the multiple key case, we need to free it here. + * It is safe to do this twice as the macro resets the data + * field. + */ + FREE_IF_NEEDED(env, &skey); + } + +err: if (sdbp != NULL && + (t_ret = __db_s_done(sdbp, dbc->txn)) != 0 && ret == 0) + ret = t_ret; + FREE_IF_NEEDED(env, &skey); + return (ret); +} + +/* + * __dbc_del_foreign -- + * Apply the foreign database constraints for a particular foreign + * database when an item is being deleted (dbc points at item being deleted + * in the foreign database.) + * + * Delete happens in dbp, check for occurrences of key in pdpb. + * Terminology: + * Foreign db = Where delete occurs (dbp). + * Secondary db = Where references to dbp occur (sdbp, a secondary) + * Primary db = sdbp's primary database, references to dbp are secondary + * keys here + * Foreign Key = Key being deleted in dbp (fkey) + * Primary Key = Key of the corresponding entry in sdbp's primary (pkey). + */ +static int +__dbc_del_foreign(dbc) + DBC *dbc; +{ + DB_FOREIGN_INFO *f_info; + DB *dbp, *pdbp, *sdbp; + DBC *pdbc, *sdbc; + DBT data, fkey, pkey; + ENV *env; + u_int32_t flags, rmw; + int changed, ret, t_ret; + + dbp = dbc->dbp; + env = dbp->env; + + memset(&fkey, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + if ((ret = __dbc_get(dbc, &fkey, &data, DB_CURRENT)) != 0) + return (ret); + + LIST_FOREACH(f_info, &(dbp->f_primaries), f_links) { + sdbp = f_info->dbp; + pdbp = sdbp->s_primary; + flags = f_info->flags; + + rmw = (STD_LOCKING(dbc) && + !LF_ISSET(DB_FOREIGN_ABORT)) ? DB_RMW : 0; + + /* + * Handle CDB locking. Some of this is copied from + * __dbc_del_primary, but a bit more acrobatics are required. + * If we're not going to abort, then we need to get a write + * cursor. If CDB_ALLDB is set, then only one write cursor is + * allowed and we hold it, so we fudge things and promote the + * cursor on the other DBs manually, it won't cause a problem. + * If CDB_ALLDB is not set, then we go through the usual route + * to make sure we block as necessary. If there are any open + * read cursors on sdbp, the delete or put call later will + * block. + * + * If NULLIFY is set, we'll need a cursor on the primary to + * update it with the nullified data. Because primary and + * secondary dbs share a lock file ID in CDB, we open a cursor + * on the secondary and then get another writable cursor on the + * primary via __db_cursor_int to avoid deadlocking. + */ + sdbc = pdbc = NULL; + if (!LF_ISSET(DB_FOREIGN_ABORT) && CDB_LOCKING(env) && + !F_ISSET(env->dbenv, DB_ENV_CDB_ALLDB)) { + ret = __db_cursor(sdbp, + dbc->thread_info, dbc->txn, &sdbc, DB_WRITECURSOR); + if (LF_ISSET(DB_FOREIGN_NULLIFY) && ret == 0) { + ret = __db_cursor_int(pdbp, + dbc->thread_info, dbc->txn, pdbp->type, + PGNO_INVALID, 0, dbc->locker, &pdbc); + F_SET(pdbc, DBC_WRITER); + } + } else { + ret = __db_cursor_int(sdbp, dbc->thread_info, dbc->txn, + sdbp->type, PGNO_INVALID, 0, dbc->locker, &sdbc); + if (LF_ISSET(DB_FOREIGN_NULLIFY) && ret == 0) + ret = __db_cursor_int(pdbp, dbc->thread_info, + dbc->txn, pdbp->type, PGNO_INVALID, 0, + dbc->locker, &pdbc); + } + if (ret != 0) { + if (sdbc != NULL) + (void)__dbc_close(sdbc); + return (ret); + } + if (CDB_LOCKING(env) && F_ISSET(env->dbenv, DB_ENV_CDB_ALLDB)) { + DB_ASSERT(env, sdbc->mylock.off == LOCK_INVALID); + F_SET(sdbc, DBC_WRITER); + if (LF_ISSET(DB_FOREIGN_NULLIFY) && pdbc != NULL) { + DB_ASSERT(env, + pdbc->mylock.off == LOCK_INVALID); + F_SET(pdbc, DBC_WRITER); + } + } + + /* + * There are three actions possible when a foreign database has + * items corresponding to a deleted item: + * DB_FOREIGN_ABORT - The delete operation should be aborted. + * DB_FOREIGN_CASCADE - All corresponding foreign items should + * be deleted. + * DB_FOREIGN_NULLIFY - A callback needs to be made, allowing + * the application to modify the data DBT from the + * associated database. If the callback makes a + * modification, the updated item needs to replace the + * original item in the foreign db + */ + memset(&pkey, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + ret = __dbc_pget(sdbc, &fkey, &pkey, &data, DB_SET|rmw); + + if (ret == DB_NOTFOUND) { + /* No entry means no constraint */ + ret = __dbc_close(sdbc); + if (LF_ISSET(DB_FOREIGN_NULLIFY) && + (t_ret = __dbc_close(pdbc)) != 0) + ret = t_ret; + if (ret != 0) + return (ret); + continue; + } else if (ret != 0) { + /* Just return the error code from the pget */ + (void)__dbc_close(sdbc); + if (LF_ISSET(DB_FOREIGN_NULLIFY)) + (void)__dbc_close(pdbc); + return (ret); + } else if (LF_ISSET(DB_FOREIGN_ABORT)) { + /* If the record exists and ABORT is set, we're done */ + if ((ret = __dbc_close(sdbc)) != 0) + return (ret); + return (DB_FOREIGN_CONFLICT); + } + + /* + * There were matching items in the primary DB, and the action + * is either DB_FOREIGN_CASCADE or DB_FOREIGN_NULLIFY. + */ + while (ret == 0) { + if (LF_ISSET(DB_FOREIGN_CASCADE)) { + /* + * Don't use the DB_UPDATE_SECONDARY flag, + * since we want the delete to cascade into the + * secondary's primary. + */ + if ((ret = __dbc_del(sdbc, 0)) != 0) { + __db_err(env, ret, DB_STR("0698", + "Attempt to execute cascading delete in a foreign index failed")); + break; + } + } else if (LF_ISSET(DB_FOREIGN_NULLIFY)) { + changed = 0; + if ((ret = f_info->callback(sdbp, + &pkey, &data, &fkey, &changed)) != 0) { + __db_err(env, ret, DB_STR("0699", + "Foreign database application callback")); + break; + } + + /* + * If the user callback modified the DBT and + * a put on the primary failed. + */ + if (changed && (ret = __dbc_put(pdbc, + &pkey, &data, DB_KEYFIRST)) != 0) { + __db_err(env, ret, DB_STR("0700", +"Attempt to overwrite item in foreign database with nullified value failed")); + break; + } + } + /* retrieve the next matching item from the prim. db */ + memset(&pkey, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + ret = __dbc_pget(sdbc, + &fkey, &pkey, &data, DB_NEXT_DUP|rmw); + } + + if (ret == DB_NOTFOUND) + ret = 0; + if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0) + ret = t_ret; + if (LF_ISSET(DB_FOREIGN_NULLIFY) && + (t_ret = __dbc_close(pdbc)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + return (ret); + } + + return (ret); +} + +/* + * __db_s_first -- + * Get the first secondary, if any are present, from the primary. + * + * PUBLIC: int __db_s_first __P((DB *, DB **)); + */ +int +__db_s_first(pdbp, sdbpp) + DB *pdbp, **sdbpp; +{ + DB *sdbp; + + MUTEX_LOCK(pdbp->env, pdbp->mutex); + sdbp = LIST_FIRST(&pdbp->s_secondaries); + + /* See __db_s_next. */ + if (sdbp != NULL) + sdbp->s_refcnt++; + MUTEX_UNLOCK(pdbp->env, pdbp->mutex); + + *sdbpp = sdbp; + + return (0); +} + +/* + * __db_s_next -- + * Get the next secondary in the list. + * + * PUBLIC: int __db_s_next __P((DB **, DB_TXN *)); + */ +int +__db_s_next(sdbpp, txn) + DB **sdbpp; + DB_TXN *txn; +{ + DB *sdbp, *pdbp, *closeme; + ENV *env; + int ret; + + /* + * Secondary indices are kept in a linked list, s_secondaries, + * off each primary DB handle. If a primary is free-threaded, + * this list may only be traversed or modified while the primary's + * thread mutex is held. + * + * The tricky part is that we don't want to hold the thread mutex + * across the full set of secondary puts necessary for each primary + * put, or we'll wind up essentially single-threading all the puts + * to the handle; the secondary puts will each take about as + * long as the primary does, and may require I/O. So we instead + * hold the thread mutex only long enough to follow one link to the + * next secondary, and then we release it before performing the + * actual secondary put. + * + * The only danger here is that we might legitimately close a + * secondary index in one thread while another thread is performing + * a put and trying to update that same secondary index. To + * prevent this from happening, we refcount the secondary handles. + * If close is called on a secondary index handle while we're putting + * to it, it won't really be closed--the refcount will simply drop, + * and we'll be responsible for closing it here. + */ + sdbp = *sdbpp; + pdbp = sdbp->s_primary; + env = pdbp->env; + closeme = NULL; + + MUTEX_LOCK(env, pdbp->mutex); + DB_ASSERT(env, sdbp->s_refcnt != 0); + if (--sdbp->s_refcnt == 0) { + LIST_REMOVE(sdbp, s_links); + closeme = sdbp; + } + sdbp = LIST_NEXT(sdbp, s_links); + if (sdbp != NULL) + sdbp->s_refcnt++; + MUTEX_UNLOCK(env, pdbp->mutex); + + *sdbpp = sdbp; + + /* + * closeme->close() is a wrapper; call __db_close explicitly. + */ + if (closeme == NULL) + ret = 0; + else + ret = __db_close(closeme, txn, 0); + + return (ret); +} + +/* + * __db_s_done -- + * Properly decrement the refcount on a secondary database handle we're + * using, without calling __db_s_next. + * + * PUBLIC: int __db_s_done __P((DB *, DB_TXN *)); + */ +int +__db_s_done(sdbp, txn) + DB *sdbp; + DB_TXN *txn; +{ + DB *pdbp; + ENV *env; + int doclose, ret; + + pdbp = sdbp->s_primary; + env = pdbp->env; + doclose = 0; + + MUTEX_LOCK(env, pdbp->mutex); + DB_ASSERT(env, sdbp->s_refcnt != 0); + if (--sdbp->s_refcnt == 0) { + LIST_REMOVE(sdbp, s_links); + doclose = 1; + } + MUTEX_UNLOCK(env, pdbp->mutex); + + if (doclose == 0) + ret = 0; + else + ret = __db_close(sdbp, txn, 0); + return (ret); +} + +/* + * __db_s_count -- + * Count the number of secondaries associated with a given primary. + */ +static int +__db_s_count(pdbp) + DB *pdbp; +{ + DB *sdbp; + ENV *env; + int count; + + env = pdbp->env; + count = 0; + + MUTEX_LOCK(env, pdbp->mutex); + for (sdbp = LIST_FIRST(&pdbp->s_secondaries); + sdbp != NULL; + sdbp = LIST_NEXT(sdbp, s_links)) + ++count; + MUTEX_UNLOCK(env, pdbp->mutex); + + return (count); +} + +/* + * __db_buildpartial -- + * Build the record that will result after a partial put is applied to + * an existing record. + * + * This should probably be merged with __bam_build, but that requires + * a little trickery if we plan to keep the overflow-record optimization + * in that function. + * + * PUBLIC: int __db_buildpartial __P((DB *, DBT *, DBT *, DBT *)); + */ +int +__db_buildpartial(dbp, oldrec, partial, newrec) + DB *dbp; + DBT *oldrec, *partial, *newrec; +{ + ENV *env; + u_int32_t len, nbytes; + u_int8_t *buf; + int ret; + + env = dbp->env; + + DB_ASSERT(env, F_ISSET(partial, DB_DBT_PARTIAL)); + + memset(newrec, 0, sizeof(DBT)); + + nbytes = __db_partsize(oldrec->size, partial); + newrec->size = nbytes; + + if ((ret = __os_malloc(env, nbytes, &buf)) != 0) + return (ret); + newrec->data = buf; + + /* Nul or pad out the buffer, for any part that isn't specified. */ + memset(buf, + F_ISSET(dbp, DB_AM_FIXEDLEN) ? ((BTREE *)dbp->bt_internal)->re_pad : + 0, nbytes); + + /* Copy in any leading data from the original record. */ + memcpy(buf, oldrec->data, + partial->doff > oldrec->size ? oldrec->size : partial->doff); + + /* Copy the data from partial. */ + memcpy(buf + partial->doff, partial->data, partial->size); + + /* Copy any trailing data from the original record. */ + len = partial->doff + partial->dlen; + if (oldrec->size > len) + memcpy(buf + partial->doff + partial->size, + (u_int8_t *)oldrec->data + len, oldrec->size - len); + + return (0); +} + +/* + * __db_partsize -- + * Given the number of bytes in an existing record and a DBT that + * is about to be partial-put, calculate the size of the record + * after the put. + * + * This code is called from __bam_partsize. + * + * PUBLIC: u_int32_t __db_partsize __P((u_int32_t, DBT *)); + */ +u_int32_t +__db_partsize(nbytes, data) + u_int32_t nbytes; + DBT *data; +{ + + /* + * There are really two cases here: + * + * Case 1: We are replacing some bytes that do not exist (i.e., they + * are past the end of the record). In this case the number of bytes + * we are replacing is irrelevant and all we care about is how many + * bytes we are going to add from offset. So, the new record length + * is going to be the size of the new bytes (size) plus wherever those + * new bytes begin (doff). + * + * Case 2: All the bytes we are replacing exist. Therefore, the new + * size is the oldsize (nbytes) minus the bytes we are replacing (dlen) + * plus the bytes we are adding (size). + */ + if (nbytes < data->doff + data->dlen) /* Case 1 */ + return (data->doff + data->size); + + return (nbytes + data->size - data->dlen); /* Case 2 */ +} + +#ifdef DIAGNOSTIC +/* + * __db_check_skeyset -- + * Diagnostic check that the application's callback returns a set of + * secondary keys without repeats. + * + * PUBLIC: #ifdef DIAGNOSTIC + * PUBLIC: void __db_check_skeyset __P((DB *, DBT *)); + * PUBLIC: #endif + */ +void +__db_check_skeyset(sdbp, skeyp) + DB *sdbp; + DBT *skeyp; +{ + DBT *first_key, *last_key, *key1, *key2; + ENV *env; + + env = sdbp->env; + + first_key = (DBT *)skeyp->data; + last_key = first_key + skeyp->size; + for (key1 = first_key; key1 < last_key; key1++) + for (key2 = key1 + 1; key2 < last_key; key2++) + DB_ASSERT(env, + ((BTREE *)sdbp->bt_internal)->bt_compare(sdbp, + key1, key2) != 0); +} +#endif diff --git a/src/db/db_cds.c b/src/db/db_cds.c new file mode 100644 index 00000000..2e157624 --- /dev/null +++ b/src/db/db_cds.c @@ -0,0 +1,201 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/txn.h" + +static int __cdsgroup_abort __P((DB_TXN *txn)); +static int __cdsgroup_commit __P((DB_TXN *txn, u_int32_t flags)); +static int __cdsgroup_discard __P((DB_TXN *txn, u_int32_t flags)); +static u_int32_t __cdsgroup_id __P((DB_TXN *txn)); +static int __cdsgroup_notsup __P((ENV *env, const char *meth)); +static int __cdsgroup_prepare __P((DB_TXN *txn, u_int8_t *gid)); +static int __cdsgroup_get_name __P((DB_TXN *txn, const char **namep)); +static int __cdsgroup_set_name __P((DB_TXN *txn, const char *name)); +static int __cdsgroup_set_timeout + __P((DB_TXN *txn, db_timeout_t timeout, u_int32_t flags)); + +/* + * __cdsgroup_notsup -- + * Error when CDS groups don't support a method. + */ +static int +__cdsgroup_notsup(env, meth) + ENV *env; + const char *meth; +{ + __db_errx(env, DB_STR_A("0687", "CDS groups do not support %s", "%s"), + meth); + return (DB_OPNOTSUP); +} + +static int +__cdsgroup_abort(txn) + DB_TXN *txn; +{ + return (__cdsgroup_notsup(txn->mgrp->env, "abort")); +} + +static int +__cdsgroup_commit(txn, flags) + DB_TXN *txn; + u_int32_t flags; +{ + DB_LOCKER *locker; + DB_LOCKREQ lreq; + ENV *env; + int ret, t_ret; + + COMPQUIET(flags, 0); + env = txn->mgrp->env; + + /* Check for live cursors. */ + if (txn->cursors != 0) { + __db_errx(env, DB_STR("0688", "CDS group has active cursors")); + return (EINVAL); + } + + /* We may be holding handle locks; release them. */ + lreq.op = DB_LOCK_PUT_ALL; + lreq.obj = NULL; + ret = __lock_vec(env, txn->locker, 0, &lreq, 1, NULL); + + env = txn->mgrp->env; + locker = txn->locker; + __os_free(env, txn->mgrp); + __os_free(env, txn); + if ((t_ret = __lock_id_free(env, locker)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +static int __cdsgroup_discard(txn, flags) + DB_TXN *txn; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + return (__cdsgroup_notsup(txn->mgrp->env, "discard")); +} + +static u_int32_t __cdsgroup_id(txn) + DB_TXN *txn; +{ + return (txn->txnid); +} + +static int __cdsgroup_prepare(txn, gid) + DB_TXN *txn; + u_int8_t *gid; +{ + COMPQUIET(gid, NULL); + return (__cdsgroup_notsup(txn->mgrp->env, "prepare")); +} + +static int __cdsgroup_get_name(txn, namep) + DB_TXN *txn; + const char **namep; +{ + COMPQUIET(namep, NULL); + return (__cdsgroup_notsup(txn->mgrp->env, "get_name")); +} + +static int __cdsgroup_set_name(txn, name) + DB_TXN *txn; + const char *name; +{ + COMPQUIET(name, NULL); + return (__cdsgroup_notsup(txn->mgrp->env, "set_name")); +} + +static int __cdsgroup_set_timeout(txn, timeout, flags) + DB_TXN *txn; + db_timeout_t timeout; + u_int32_t flags; +{ + COMPQUIET(timeout, 0); + COMPQUIET(flags, 0); + return (__cdsgroup_notsup(txn->mgrp->env, "set_timeout")); +} + +/* + * PUBLIC: int __cdsgroup_begin __P((ENV *, DB_TXN **)); + */ +int +__cdsgroup_begin(env, txnpp) + ENV *env; + DB_TXN **txnpp; +{ + DB_TXN *txn; + int ret; + + *txnpp = txn = NULL; + if ((ret = __os_calloc(env, 1, sizeof(DB_TXN), &txn)) != 0) + goto err; + /* + * We need a dummy DB_TXNMGR -- it's the only way to get from a + * transaction handle to the environment handle. + */ + if ((ret = __os_calloc(env, 1, sizeof(DB_TXNMGR), &txn->mgrp)) != 0) + goto err; + txn->mgrp->env = env; + + if ((ret = __lock_id(env, &txn->txnid, &txn->locker)) != 0) + goto err; + + txn->flags = TXN_FAMILY; + txn->abort = __cdsgroup_abort; + txn->commit = __cdsgroup_commit; + txn->discard = __cdsgroup_discard; + txn->id = __cdsgroup_id; + txn->prepare = __cdsgroup_prepare; + txn->get_name = __cdsgroup_get_name; + txn->set_name = __cdsgroup_set_name; + txn->set_timeout = __cdsgroup_set_timeout; + + *txnpp = txn; + + if (0) { +err: if (txn != NULL) { + if (txn->mgrp != NULL) + __os_free(env, txn->mgrp); + __os_free(env, txn); + } + } + return (ret); +} + +/* + * __cds_txn_begin_pp -- + * DB_ENV->cdsgroup_begin + * + * PUBLIC: int __cdsgroup_begin_pp __P((DB_ENV *, DB_TXN **)); + */ +int __cdsgroup_begin_pp(dbenv, txnpp) + DB_ENV *dbenv; + DB_TXN **txnpp; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_ILLEGAL_BEFORE_OPEN(env, "cdsgroup_begin"); + if (!CDB_LOCKING(env)) + return (__env_not_config(env, "cdsgroup_begin", DB_INIT_CDB)); + + ENV_ENTER(env, ip); + ret = __cdsgroup_begin(env, txnpp); + ENV_LEAVE(env, ip); + return (ret); + } diff --git a/src/db/db_compact.c b/src/db/db_compact.c new file mode 100644 index 00000000..f4975d61 --- /dev/null +++ b/src/db/db_compact.c @@ -0,0 +1,1083 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" +#include "dbinc/fop.h" +#ifdef HAVE_FTRUNCATE +static int __db_free_freelist __P((DB *, DB_THREAD_INFO *, DB_TXN *)); +static int __db_setup_freelist __P((DB *, db_pglist_t *, u_int32_t)); +#endif + +#define SAVE_START \ + do { \ + save_data = *c_data; \ + ret = __db_retcopy(env, \ + &save_start, current.data, current.size, \ + &save_start.data, &save_start.ulen); \ + } while (0) + +/* + * Only restore those things that are negated by aborting the + * transaction. We don't restore the number of deadlocks, for example. + */ + +#define RESTORE_START \ + do { \ + c_data->compact_pages_free = \ + save_data.compact_pages_free; \ + c_data->compact_levels = save_data.compact_levels; \ + c_data->compact_truncate = save_data.compact_truncate; \ + c_data->compact_empty_buckets = \ + save_data.compact_empty_buckets; \ + ret = __db_retcopy(env, ¤t, \ + save_start.data, save_start.size, \ + ¤t.data, ¤t.ulen); \ + } while (0) + +/* + * __db_compact_int -- compact a database. + * + * PUBLIC: int __db_compact_int __P((DB *, DB_THREAD_INFO *, DB_TXN *, + * PUBLIC: DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *)); + */ +int +__db_compact_int(dbp, ip, txn, start, stop, c_data, flags, end) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DBT *start, *stop; + DB_COMPACT *c_data; + u_int32_t flags; + DBT *end; +{ + DBC *dbc; + DBT current, save_start; + DB_COMPACT save_data; + DB_TXN *txn_orig; + ENV *env; + u_int32_t empty_buckets, factor, retry; + int deadlock, have_freelist, isdone, ret, span, t_ret, txn_local; + +#ifdef HAVE_FTRUNCATE + db_pglist_t *list; + db_pgno_t last_pgno; + u_int32_t nelems, truncated; +#endif + + env = dbp->env; + + memset(¤t, 0, sizeof(current)); + memset(&save_start, 0, sizeof(save_start)); + dbc = NULL; + factor = 0; + have_freelist = deadlock = isdone = span = 0; + ret = retry = 0; + txn_orig = txn; + +#ifdef HAVE_FTRUNCATE + list = NULL; + last_pgno = 0; + nelems = truncated = 0; +#endif + + /* + * We pass "current" to the internal routine, indicating where that + * routine should begin its work and expecting that it will return to + * us the last key that it processed. + */ + if (start != NULL && (ret = __db_retcopy(env, + ¤t, start->data, start->size, + ¤t.data, ¤t.ulen)) != 0) + return (ret); + + empty_buckets = c_data->compact_empty_buckets; + + if (IS_DB_AUTO_COMMIT(dbp, txn)) { + txn_local = 1; + LF_SET(DB_AUTO_COMMIT); + } else + txn_local = 0; + if (!LF_ISSET(DB_FREE_SPACE | DB_FREELIST_ONLY)) + goto no_free; + if (LF_ISSET(DB_FREELIST_ONLY)) + LF_SET(DB_FREE_SPACE); + +#ifdef HAVE_FTRUNCATE + /* Sort the freelist and set up the in-memory list representation. */ + if (txn_local && (ret = __txn_begin(env, ip, txn_orig, &txn, 0)) != 0) + goto err; + + if ((ret = __db_free_truncate(dbp, ip, + txn, flags, c_data, &list, &nelems, &last_pgno)) != 0) { + LF_CLR(DB_FREE_SPACE); + goto terr; + } + + /* If the freelist is empty and we are not filling, get out. */ + if (nelems == 0 && LF_ISSET(DB_FREELIST_ONLY)) { + ret = 0; + LF_CLR(DB_FREE_SPACE); + goto terr; + } + if ((ret = __db_setup_freelist(dbp, list, nelems)) != 0) { + /* Someone else owns the free list. */ + if (ret == EBUSY) + ret = 0; + } + if (ret == 0) + have_freelist = 1; + + /* Commit the txn and release the meta page lock. */ +terr: if (txn_local) { + if ((t_ret = __txn_commit(txn, DB_TXN_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + txn = NULL; + } + if (ret != 0) + goto err; + + /* Save the number truncated so far, we will add what we get below. */ + truncated = c_data->compact_pages_truncated; + if (LF_ISSET(DB_FREELIST_ONLY)) + goto done; +#endif + + /* + * We want factor to be the target number of free bytes on each page, + * so we know when to stop adding items to a page. Make sure to + * subtract the page overhead when computing this target. This can + * result in a 1-2% error on the smallest page. + * First figure out how many bytes we should use: + */ +no_free: + factor = dbp->pgsize - SIZEOF_PAGE; + if (c_data->compact_fillpercent != 0) { + factor *= c_data->compact_fillpercent; + factor /= 100; + } + /* Now convert to the number of free bytes to target. */ + factor = (dbp->pgsize - SIZEOF_PAGE) - factor; + + if (c_data->compact_pages == 0) + c_data->compact_pages = DB_MAX_PAGES; + + do { + deadlock = 0; + + SAVE_START; + if (ret != 0) + break; + + if (txn_local) { + if ((ret = + __txn_begin(env, ip, txn_orig, &txn, 0)) != 0) + break; + + if (c_data->compact_timeout != 0 && + (ret = __txn_set_timeout(txn, + c_data->compact_timeout, DB_SET_LOCK_TIMEOUT)) != 0) + goto err; + } + + if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) + goto err; + +#ifdef HAVE_HASH + if (dbp->type == DB_HASH) + ret = __ham_compact_int(dbc, + ¤t, stop, factor, c_data, &isdone, flags); + else +#endif + ret = __bam_compact_int(dbc, ¤t, stop, factor, + &span, c_data, &isdone); + if (ret == DB_LOCK_DEADLOCK && txn_local) { + /* + * We retry on deadlock. Cancel the statistics + * and reset the start point to before this + * iteration. + */ + deadlock = 1; + c_data->compact_deadlock++; + RESTORE_START; + } + /* + * If we could not get a lock while holding an internal + * node latched, commit the current local transaction otherwise + * report a deadlock. + */ + if (ret == DB_LOCK_NOTGRANTED) { + if (txn_local || retry++ < 5) + ret = 0; + else + ret = DB_LOCK_DEADLOCK; + } else + retry = 0; + + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + +err: if (txn_local && txn != NULL) { + if (ret == 0 && deadlock == 0) + ret = __txn_commit(txn, DB_TXN_NOSYNC); + else if ((t_ret = __txn_abort(txn)) != 0 && ret == 0) + ret = t_ret; + txn = NULL; + } + DB_ASSERT(env, ip == NULL || ip->dbth_pincount == 0); + } while (ret == 0 && !isdone); + + if (ret == 0 && end != NULL) + ret = __db_retcopy(env, end, current.data, current.size, + &end->data, &end->ulen); + if (current.data != NULL) + __os_free(env, current.data); + if (save_start.data != NULL) + __os_free(env, save_start.data); + +#ifdef HAVE_FTRUNCATE + /* + * Finish up truncation work. If there are pages left in the free + * list we can try to move the internal structures around so that we + * can remove more pages from the file. + * For BTREE search the internal nodes of the tree as we may have + * missed some while walking the leaf nodes. + * For HASH we will compact the hash table itself, moving segments + * to lower number pages where possible. + * Then calculate how many pages we have truncated and release + * the in-memory free list. + */ +done: if (LF_ISSET(DB_FREE_SPACE)) { + DBMETA *meta; + db_pgno_t pgno; + + pgno = PGNO_BASE_MD; + isdone = 1; + if (ret == 0 && !LF_ISSET(DB_FREELIST_ONLY) && + __memp_fget(dbp->mpf, &pgno, ip, txn, 0, &meta) == 0) { + isdone = meta->free == PGNO_INVALID; + ret = __memp_fput(dbp->mpf, ip, meta, dbp->priority); + } + +#ifdef HAVE_HASH + if (dbp->type == DB_HASH) { + c_data->compact_empty_buckets -= empty_buckets; + if (!isdone || c_data->compact_empty_buckets != 0) + ret = __ham_compact_hash(dbp, + ip, txn_orig, c_data); + c_data->compact_empty_buckets += empty_buckets; + } else +#endif + if (!isdone) + ret = __bam_truncate_ipages(dbp, ip, txn_orig, c_data); + + /* Clean up the free list. */ + if (list != NULL) + __os_free(env, list); + + if ((t_ret = + __memp_fget(dbp->mpf, &pgno, ip, txn, 0, &meta)) == 0) { + c_data->compact_pages_truncated = + truncated + last_pgno - meta->last_pgno; + if ((t_ret = __memp_fput(dbp->mpf, ip, + meta, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + } else if (ret == 0) + ret = t_ret; + + if (have_freelist && (t_ret = + __db_free_freelist(dbp, ip, txn_orig)) != 0 && ret == 0) + t_ret = ret; + } +#endif + + return (ret); +} + +#ifdef HAVE_FTRUNCATE +static int +__db_setup_freelist(dbp, list, nelems) + DB *dbp; + db_pglist_t *list; + u_int32_t nelems; +{ + DB_MPOOLFILE *mpf; + db_pgno_t *plist; + int ret; + + mpf = dbp->mpf; + + if ((ret = __memp_alloc_freelist(mpf, nelems, &plist)) != 0) + return (ret); + + while (nelems-- != 0) + *plist++ = list++->pgno; + + return (0); +} + +static int +__db_free_freelist(dbp, ip, txn) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; +{ + DBC *dbc; + DB_LOCK lock; + int auto_commit, ret, t_ret; + + LOCK_INIT(lock); + auto_commit = ret = 0; + + /* + * If we are not in a transaction then we need to get + * a lock on the meta page, otherwise we should already + * have the lock. + */ + + dbc = NULL; + if (IS_DB_AUTO_COMMIT(dbp, txn)) { + /* + * We must not timeout the lock or we will not free the list. + * We ignore errors from txn_begin as there is little that + * the application can do with the error and we want to + * get the lock and free the list if at all possible. + */ + if (__txn_begin(dbp->env, ip, txn, &txn, 0) == 0) { + (void)__lock_set_timeout(dbp->env, + txn->locker, 0, DB_SET_TXN_TIMEOUT); + (void)__lock_set_timeout(dbp->env, + txn->locker, 0, DB_SET_LOCK_TIMEOUT); + auto_commit = 1; + } + /* Get a cursor so we can call __db_lget. */ + if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) + return (ret); + + if ((ret = __db_lget(dbc, + 0, PGNO_BASE_MD, DB_LOCK_WRITE, 0, &lock)) != 0) + goto err; + } + + ret = __memp_free_freelist(dbp->mpf); + +err: if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + + if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + if (auto_commit && __txn_abort(txn) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} +#endif + +/* + * __db_exchange_page -- swap a page with a lower numbered page. + * The routine will optionally free the higher numbered page. The cursor + * has a stack which includes at least the immediate parent of this page. + * PUBLIC: int __db_exchange_page __P((DBC *, PAGE **, PAGE *, db_pgno_t, int)); + */ +int +__db_exchange_page(dbc, pgp, opg, newpgno, flags) + DBC *dbc; + PAGE **pgp, *opg; + db_pgno_t newpgno; + int flags; +{ + BTREE_CURSOR *cp; + DB *dbp; + DBT data, *dp, hdr; + DB_LSN lsn; + DB_LOCK lock; + EPG *epg; + PAGE *newpage; + db_pgno_t oldpgno, *pgnop; + int ret; + + DB_ASSERT(NULL, dbc != NULL); + dbp = dbc->dbp; + LOCK_INIT(lock); + + /* + * We want to free a page that lives in the part of the file that + * can be truncated, so we're going to move it onto a free page + * that is in the part of the file that need not be truncated. + * In the case of compacting hash table segments the caller already + * identified a contiguous set of pages to use. Otherwise + * since the freelist is ordered now, we can simply call __db_new + * which will grab the first element off the freelist; we know this + * is the lowest numbered free page. + */ + if (newpgno != PGNO_INVALID) { + if ((ret = __memp_fget(dbp->mpf, &newpgno, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &newpage)) != 0) + return (ret); + } else if ((ret = __db_new(dbc, P_DONTEXTEND | TYPE(*pgp), + STD_LOCKING(dbc) && TYPE(*pgp) != P_OVERFLOW ? &lock : NULL, + &newpage)) != 0) + return (ret); + + /* + * If newpage is null then __db_new would have had to allocate + * a new page from the filesystem, so there is no reason + * to continue this action. + */ + if (newpage == NULL) + return (0); + + /* + * It is possible that a higher page is allocated if other threads + * are allocating at the same time, if so, just put it back. + */ + if (PGNO(newpage) > PGNO(*pgp)) { + /* Its unfortunate but you can't just free a new overflow. */ + if (TYPE(newpage) == P_OVERFLOW) + OV_LEN(newpage) = 0; + if ((ret = __LPUT(dbc, lock)) != 0) + return (ret); + return (__db_free(dbc, newpage, 0)); + } + + /* Log if necessary. */ + if (DBC_LOGGING(dbc)) { + memset(&hdr, 0, sizeof(hdr)); + hdr.data = *pgp; + hdr.size = P_OVERHEAD(dbp); + memset(&data, 0, sizeof(data)); + dp = &data; + switch (TYPE(*pgp)) { + case P_OVERFLOW: + data.data = (u_int8_t *)*pgp + P_OVERHEAD(dbp); + data.size = OV_LEN(*pgp); + break; + case P_BTREEMETA: + hdr.size = sizeof(BTMETA); + dp = NULL; + break; + case P_HASHMETA: + hdr.size = sizeof(HMETA); + dp = NULL; + break; + default: + data.data = (u_int8_t *)*pgp + HOFFSET(*pgp); + data.size = dbp->pgsize - HOFFSET(*pgp); + hdr.size += NUM_ENT(*pgp) * sizeof(db_indx_t); + } + if ((ret = __db_merge_log(dbp, dbc->txn, + &LSN(newpage), 0, PGNO(newpage), &LSN(newpage), + PGNO(*pgp), &LSN(*pgp), &hdr, dp, 1)) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(newpage)); + + oldpgno = PGNO(*pgp); + newpgno = PGNO(newpage); + lsn = LSN(newpage); + memcpy(newpage, *pgp, dbp->pgsize); + PGNO(newpage) = newpgno; + LSN(newpage) = lsn; + + /* Empty the old page. */ + if ((ret = __memp_dirty(dbp->mpf, + pgp, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err; + if (TYPE(*pgp) == P_OVERFLOW) + OV_LEN(*pgp) = 0; + else { + HOFFSET(*pgp) = dbp->pgsize; + NUM_ENT(*pgp) = 0; + } + LSN(*pgp) = lsn; + + /* Update siblings. */ + switch (TYPE(newpage)) { + case P_OVERFLOW: + case P_LBTREE: + case P_LRECNO: + case P_LDUP: + case P_HASH: + if (NEXT_PGNO(newpage) == PGNO_INVALID && + PREV_PGNO(newpage) == PGNO_INVALID) + break; + if ((ret = __db_relink(dbc, *pgp, opg, PGNO(newpage))) != 0) + goto err; + break; + default: + break; + } + + /* + * For HASH we may reuse the old page for an even higher numbered + * page. Otherwise we free the old page. + */ + if (!LF_ISSET(DB_EXCH_FREE)) { + NEXT_PGNO(*pgp) = PREV_PGNO(*pgp) = PGNO_INVALID; + ret = __memp_fput(dbp->mpf, + dbc->thread_info, *pgp, dbc->priority); + } else + ret = __db_free(dbc, *pgp, 0); + *pgp = newpage; + + if (ret != 0) + return (ret); + + if (!LF_ISSET(DB_EXCH_PARENT)) + goto done; + + /* Update the parent. */ + cp = (BTREE_CURSOR *)dbc->internal; + epg = &cp->csp[-1]; + + switch (TYPE(epg->page)) { + case P_IBTREE: + pgnop = &GET_BINTERNAL(dbp, epg->page, epg->indx)->pgno; + break; + case P_IRECNO: + pgnop = &GET_RINTERNAL(dbp, epg->page, epg->indx)->pgno; + break; + case P_LBTREE: + case P_LRECNO: + case P_LDUP: + pgnop = &GET_BOVERFLOW(dbp, epg->page, epg->indx)->pgno; + break; + default: + return (__db_pgfmt(dbp->env, PGNO(epg->page))); + } + DB_ASSERT(dbp->env, oldpgno == *pgnop); + if (DBC_LOGGING(dbc)) { + if ((ret = __db_pgno_log(dbp, dbc->txn, &LSN(epg->page), + 0, PGNO(epg->page), &LSN(epg->page), (u_int32_t)epg->indx, + *pgnop, PGNO(newpage))) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(epg->page)); + + *pgnop = PGNO(newpage); + cp->csp->page = newpage; + if ((ret = __TLPUT(dbc, lock)) != 0) + return (ret); + +done: return (0); + +err: (void)__memp_fput(dbp->mpf, dbc->thread_info, newpage, dbc->priority); + (void)__TLPUT(dbc, lock); + return (ret); +} + +/* + * __db_truncate_overflow -- find overflow pages to truncate. + * Walk the pages of an overflow chain and swap out + * high numbered pages. We are passed the first page + * but only deal with the second and subsequent pages. + * PUBLIC: int __db_truncate_overflow __P((DBC *, + * PUBLIC: db_pgno_t, PAGE **, DB_COMPACT *)); + */ +int +__db_truncate_overflow(dbc, pgno, ppg, c_data) + DBC *dbc; + db_pgno_t pgno; + PAGE **ppg; + DB_COMPACT *c_data; +{ + DB *dbp; + DB_LOCK lock; + PAGE *page; + db_pgno_t ppgno; + int have_lock, ret, t_ret; + + dbp = dbc->dbp; + page = NULL; + LOCK_INIT(lock); + have_lock = ppg == NULL; + + if ((ret = __memp_fget(dbp->mpf, &pgno, + dbc->thread_info, dbc->txn, 0, &page)) != 0) + return (ret); + + while ((pgno = NEXT_PGNO(page)) != PGNO_INVALID) { + if ((ret = __memp_fput(dbp->mpf, + dbc->thread_info, page, dbc->priority)) != 0) + return (ret); + if ((ret = __memp_fget(dbp->mpf, &pgno, + dbc->thread_info, dbc->txn, 0, &page)) != 0) + return (ret); + if (pgno <= c_data->compact_truncate) + continue; + if (have_lock == 0) { + DB_ASSERT(dbp->env, ppg != NULL); + ppgno = PGNO(*ppg); + if ((ret = __memp_fput(dbp->mpf, dbc->thread_info, + *ppg, dbc->priority)) != 0) + goto err; + *ppg = NULL; + if ((ret = __db_lget(dbc, 0, ppgno, + DB_LOCK_WRITE, 0, &lock)) != 0) + goto err; + if ((ret = __memp_fget(dbp->mpf, &ppgno, + dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, ppg)) != 0) + goto err; + have_lock = 1; + } + if ((ret = __db_exchange_page(dbc, + &page, NULL, PGNO_INVALID, DB_EXCH_FREE)) != 0) + break; + } + +err: if (page != NULL && + (t_ret = __memp_fput( dbp->mpf, + dbc->thread_info, page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} +/* + * __db_truncate_root -- swap a root page for a lower numbered page. + * PUBLIC: int __db_truncate_root __P((DBC *, + * PUBLIC: PAGE *, u_int32_t, db_pgno_t *, u_int32_t)); + */ +int +__db_truncate_root(dbc, ppg, indx, pgnop, tlen) + DBC *dbc; + PAGE *ppg; + u_int32_t indx; + db_pgno_t *pgnop; + u_int32_t tlen; +{ + DB *dbp; + DBT orig; + PAGE *page; + int ret, t_ret; + db_pgno_t newpgno; + + dbp = dbc->dbp; + + if ((ret = __memp_fget(dbp->mpf, pgnop, + dbc->thread_info, dbc->txn, 0, &page)) != 0) + goto err; + + /* + * If this is a multiply reference overflow key, then we will just + * copy it and decrement the reference count. This is part of a + * fix to get rid of multiple references. + */ + if (TYPE(page) == P_OVERFLOW && OV_REF(page) > 1) { + COMPQUIET(newpgno, 0); + if ((ret = __db_ovref(dbc, *pgnop)) != 0) + goto err; + memset(&orig, 0, sizeof(orig)); + if ((ret = __db_goff(dbc, &orig, tlen, *pgnop, + &orig.data, &orig.size)) == 0) + ret = __db_poff(dbc, &orig, &newpgno); + if (orig.data != NULL) + __os_free(dbp->env, orig.data); + if (ret != 0) + goto err; + } else { + LOCK_CHECK_OFF(dbc->thread_info); + ret = __db_exchange_page(dbc, + &page, NULL, PGNO_INVALID, DB_EXCH_FREE); + LOCK_CHECK_ON(dbc->thread_info); + if (ret != 0) + goto err; + newpgno = PGNO(page); + /* If we could not allocate from the free list, give up.*/ + if (newpgno == *pgnop) + goto err; + } + + /* Update the reference. */ + if (DBC_LOGGING(dbc)) { + if ((ret = __db_pgno_log(dbp, + dbc->txn, &LSN(ppg), 0, PGNO(ppg), + &LSN(ppg), (u_int32_t)indx, *pgnop, newpgno)) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(ppg)); + + *pgnop = newpgno; + +err: if (page != NULL && (t_ret = + __memp_fput(dbp->mpf, dbc->thread_info, + page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +#ifdef HAVE_FTRUNCATE +/* + * __db_find_free -- + * Find a contiguous "size" range of free pages that are lower numbers + * than the pages starting at "bstart". We can also return a set of pages + * that overlaps with the pages at "bstart". + * PUBLIC: int __db_find_free __P((DBC *, u_int32_t, + * PUBLIC: u_int32_t, db_pgno_t, db_pgno_t *)); + */ +int +__db_find_free(dbc, type, size, bstart, freep) + DBC *dbc; + u_int32_t type; + u_int32_t size; + db_pgno_t bstart, *freep; +{ + DB *dbp; + DBMETA *meta; + DBT listdbt; + DB_LOCK metalock; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + PAGE *page, *freepg; + u_int32_t i, j, start, nelems; + db_pgno_t *list, next_free, pgno; + db_pglist_t *lp, *pglist; + int hash, ret, t_ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + nelems = 0; + hash = 0; + page = NULL; + pglist = NULL; + meta = NULL; + LOCK_INIT(metalock); + +#ifdef HAVE_HASH + if (dbp->type == DB_HASH) { + if ((ret = __ham_return_meta(dbc, DB_MPOOL_DIRTY, &meta)) != 0) + return (ret); + if (meta != NULL) + hash = 1; + } +#endif + if (meta == NULL) { + pgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, + LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_DIRTY, &meta)) != 0) + goto err; + } + + if ((ret = __memp_get_freelist(mpf, &nelems, &list)) != 0) + goto err; + + if (nelems == 0) { + ret = DB_NOTFOUND; + goto err; + } + + for (i = 0; i < nelems; i++) { + if (list[i] > bstart) { + ret = DB_NOTFOUND; + goto err; + } + start = i; + while (i < nelems - 1 && list[i] + 1 == list[i + 1]) { + i++; + if (i - start == size - 1) + goto found; + } + if (i - start == size - 1) + goto found; + /* + * If the last set of contiguous free pages we found + * are contiguous to the chunk we are trying to move, + * then we can slide the allocated chunk back some number + * of pages -- figure out how many by calculating the + * number of pages before the allocated ones that we have + * found in the free list. + */ + if (list[i] == bstart - 1) { + size = (i - start) + 1; + goto found; + } + } + ret = DB_NOTFOUND; + goto err; + +found: /* We have size range of pages. Remove them. */ + next_free = i == nelems - 1 ? PGNO_INVALID : list[i + 1]; + *freep = list[start]; + if (start == 0) { + page = (PAGE *)meta; + } else if ((ret = __memp_fget(mpf, &list[start - 1], + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &page)) != 0) + return (ret); + if (DBC_LOGGING(dbc)) { + if ((ret = __os_malloc(dbp->env, + size * sizeof(db_pglist_t), &pglist)) != 0) + goto err; + lp = pglist; + for (j = start; j < start + size; j++, lp++) { + if ((ret = __memp_fget(mpf, &list[j], + dbc->thread_info, dbc->txn, 0, &freepg)) != 0) + goto err; + lp->pgno = PGNO(freepg); + lp->next_pgno = NEXT_PGNO(freepg); + lp->lsn = LSN(freepg); + if ((ret = __memp_fput(mpf, + dbc->thread_info, freepg, dbc->priority)) != 0) + goto err; + } + listdbt.size = size * sizeof(*pglist); + listdbt.data = pglist; + if ((ret = __db_realloc_log(dbp, dbc->txn, &lsn, 0, + PGNO(page), &LSN(page), next_free, type, &listdbt)) != 0) + goto err; + __os_free(dbp->env, pglist); + pglist = NULL; + } else + LSN_NOT_LOGGED(lsn); + + LSN(page) = lsn; + if (start == 0) + meta->free = next_free; + else + NEXT_PGNO(page) = next_free; + + if (page != (PAGE *)meta && (ret = __memp_fput(mpf, + dbc->thread_info, page, dbc->priority)) != 0) + goto err; + + for (j = start; j < start + size; j++) { + if ((ret = __memp_fget(mpf, + &list[j], dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, &freepg)) != 0) + goto err; + P_INIT(freepg, dbp->pgsize, + list[j], PGNO_INVALID, PGNO_INVALID, 0, type); + LSN(freepg) = lsn; + if ((ret = __memp_fput(mpf, + dbc->thread_info, freepg, dbc->priority)) != 0) + goto err; + } + + if (++i != nelems) + memmove(&list[start], &list[i], (nelems - i) * sizeof(*list)); + if ((ret = __memp_extend_freelist(mpf, nelems - size, &list)) != 0) + goto err; + if (hash == 0) + ret = __memp_fput(mpf, dbc->thread_info, meta, dbc->priority); + t_ret = __TLPUT(dbc, metalock); + + return (ret == 0 ? t_ret : ret); + +err: if (page != NULL && page != (PAGE *)meta) + (void)__memp_fput(mpf, dbc->thread_info, page, dbc->priority); + if (pglist != NULL) + __os_free(dbp->env, pglist); + if (meta != NULL && hash == 0) + (void)__memp_fput(mpf, dbc->thread_info, meta, dbc->priority); + (void)__TLPUT(dbc, metalock); + return (ret); +} +#endif + +/* + * __db_relink -- + * Relink around a deleted page. + * + * PUBLIC: int __db_relink __P((DBC *, PAGE *, PAGE *, db_pgno_t)); + * Otherp can be either the previous or the next page to use if + * the caller already holds that page. + */ +int +__db_relink(dbc, pagep, otherp, new_pgno) + DBC *dbc; + PAGE *pagep, *otherp; + db_pgno_t new_pgno; +{ + DB *dbp; + DB_LOCK npl, ppl; + DB_LSN *nlsnp, *plsnp, ret_lsn; + DB_MPOOLFILE *mpf; + PAGE *np, *pp; + int ret, t_ret; + + dbp = dbc->dbp; + np = pp = NULL; + LOCK_INIT(npl); + LOCK_INIT(ppl); + nlsnp = plsnp = NULL; + mpf = dbp->mpf; + ret = 0; + + /* + * Retrieve the one/two pages. The caller must have them locked + * because the parent is latched. For a remove, we may need + * two pages (the before and after). For an add, we only need one + * because, the split took care of the prev. + */ + if (pagep->next_pgno != PGNO_INVALID) { + if (((np = otherp) == NULL || + PGNO(otherp) != pagep->next_pgno) && + (ret = __memp_fget(mpf, &pagep->next_pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &np)) != 0) { + ret = __db_pgerr(dbp, pagep->next_pgno, ret); + goto err; + } + nlsnp = &np->lsn; + } + if (pagep->prev_pgno != PGNO_INVALID) { + if (((pp = otherp) == NULL || + PGNO(otherp) != pagep->prev_pgno) && + (ret = __memp_fget(mpf, &pagep->prev_pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &pp)) != 0) { + ret = __db_pgerr(dbp, pagep->prev_pgno, ret); + goto err; + } + plsnp = &pp->lsn; + } + + /* Log the change. */ + if (DBC_LOGGING(dbc)) { + if ((ret = __db_relink_log(dbp, dbc->txn, &ret_lsn, 0, + pagep->pgno, new_pgno, pagep->prev_pgno, plsnp, + pagep->next_pgno, nlsnp)) != 0) + goto err; + } else + LSN_NOT_LOGGED(ret_lsn); + if (np != NULL) + np->lsn = ret_lsn; + if (pp != NULL) + pp->lsn = ret_lsn; + + /* + * Modify and release the two pages. + */ + if (np != NULL) { + if (new_pgno == PGNO_INVALID) + np->prev_pgno = pagep->prev_pgno; + else + np->prev_pgno = new_pgno; + if (np != otherp) + ret = __memp_fput(mpf, + dbc->thread_info, np, dbc->priority); + if ((t_ret = __TLPUT(dbc, npl)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + } + + if (pp != NULL) { + if (new_pgno == PGNO_INVALID) + pp->next_pgno = pagep->next_pgno; + else + pp->next_pgno = new_pgno; + if (pp != otherp) + ret = __memp_fput(mpf, + dbc->thread_info, pp, dbc->priority); + if ((t_ret = __TLPUT(dbc, ppl)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + } + return (0); + +err: if (np != NULL && np != otherp) + (void)__memp_fput(mpf, dbc->thread_info, np, dbc->priority); + if (pp != NULL && pp != otherp) + (void)__memp_fput(mpf, dbc->thread_info, pp, dbc->priority); + return (ret); +} + +/* + * __db_move_metadata -- move a meta data page to a lower page number. + * The meta data page must be exclusively latched on entry. + * + * PUBLIC: int __db_move_metadata __P((DBC *, DBMETA **, DB_COMPACT *)); + */ +int +__db_move_metadata(dbc, metap, c_data) + DBC *dbc; + DBMETA **metap; + DB_COMPACT *c_data; +{ + BTREE *bt; + DB *dbp, *mdbp; + DB_LOCK handle_lock; + HASH *ht; + int ret, t_ret; + + dbp = dbc->dbp; + + c_data->compact_pages_examine++; + if ((ret = __db_exchange_page(dbc, + (PAGE**)metap, NULL, PGNO_INVALID, DB_EXCH_FREE)) != 0) + return (ret); + + if (PGNO(*metap) == dbp->meta_pgno) + return (0); + + if ((ret = __db_master_open(dbp, + dbc->thread_info, dbc->txn, dbp->fname, 0, 0, &mdbp)) != 0) + return (ret); + + dbp->meta_pgno = PGNO(*metap); + + if ((ret = __db_master_update(mdbp, dbp, dbc->thread_info, + dbc->txn, dbp->dname, dbp->type, MU_MOVE, NULL, 0)) != 0) + goto err; + + /* + * The handle lock for subdb's depends on the metadata page number: + * swap the old one for the new one. + */ + if (STD_LOCKING(dbc)) { + /* + * If this dbp is still in an opening transaction we need to + * change its lock in the event. + */ + if (dbp->cur_txn != NULL) + __txn_remlock(dbp->env, + dbp->cur_txn, &dbp->handle_lock, DB_LOCK_INVALIDID); + + handle_lock = dbp->handle_lock; + if ((ret = __fop_lock_handle(dbp->env, dbp, + dbp->cur_locker != NULL ? dbp->cur_locker : dbp->locker, + dbp->cur_txn != NULL ? DB_LOCK_WRITE : DB_LOCK_READ, + NULL, 0)) != 0) + goto err; + + /* Move all the other handles to the new lock. */ + if ((ret = __lock_change(dbp->env, + &handle_lock, &dbp->handle_lock)) != 0) + goto err; + + /* Reregister the event. */ + if (dbp->cur_txn != NULL) + ret = __txn_lockevent(dbp->env, + dbp->cur_txn, dbp, &dbp->handle_lock, dbp->locker); + } + if (dbp->type == DB_HASH) { + ht = dbp->h_internal; + ht->meta_pgno = dbp->meta_pgno; + ht->revision = ++dbp->mpf->mfp->revision; + } else { + bt = dbp->bt_internal; + bt->bt_meta = dbp->meta_pgno; + bt->revision = ++dbp->mpf->mfp->revision; + } + + +err: if ((t_ret = __db_close(mdbp, dbc->txn, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} diff --git a/src/db/db_conv.c b/src/db/db_conv.c new file mode 100644 index 00000000..4c3e7f89 --- /dev/null +++ b/src/db/db_conv.c @@ -0,0 +1,890 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/hmac.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/qam.h" + +/* + * __db_pgin -- + * Primary page-swap routine. + * + * PUBLIC: int __db_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *)); + */ +int +__db_pgin(dbenv, pg, pp, cookie) + DB_ENV *dbenv; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + DB dummydb, *dbp; + DB_CIPHER *db_cipher; + DB_LSN not_used; + DB_PGINFO *pginfo; + ENV *env; + PAGE *pagep; + size_t sum_len; + int is_hmac, ret; + u_int8_t *chksum; + + pginfo = (DB_PGINFO *)cookie->data; + env = dbenv->env; + pagep = (PAGE *)pp; + + ret = is_hmac = 0; + chksum = NULL; + memset(&dummydb, 0, sizeof(DB)); + dbp = &dummydb; + dbp->dbenv = dbenv; + dbp->env = env; + dbp->flags = pginfo->flags; + dbp->pgsize = pginfo->db_pagesize; + db_cipher = env->crypto_handle; + switch (pagep->type) { + case P_HASHMETA: + case P_HEAPMETA: + case P_BTREEMETA: + case P_QAMMETA: + /* + * If checksumming is set on the meta-page, we must set + * it in the dbp. + */ + if (FLD_ISSET(((DBMETA *)pp)->metaflags, DBMETA_CHKSUM)) + F_SET(dbp, DB_AM_CHKSUM); + else + F_CLR(dbp, DB_AM_CHKSUM); + if (((DBMETA *)pp)->encrypt_alg != 0 || + F_ISSET(dbp, DB_AM_ENCRYPT)) + is_hmac = 1; + /* + * !!! + * For all meta pages it is required that the chksum + * be at the same location. Use BTMETA to get to it + * for any meta type. + */ + chksum = ((BTMETA *)pp)->chksum; + sum_len = DBMETASIZE; + break; + case P_INVALID: + /* + * We assume that we've read a file hole if we have + * a zero LSN, zero page number and P_INVALID. Otherwise + * we have an invalid page that might contain real data. + */ + if (IS_ZERO_LSN(LSN(pagep)) && pagep->pgno == PGNO_INVALID) { + sum_len = 0; + break; + } + /* FALLTHROUGH */ + default: + chksum = P_CHKSUM(dbp, pagep); + sum_len = pginfo->db_pagesize; + /* + * If we are reading in a non-meta page, then if we have + * a db_cipher then we are using hmac. + */ + is_hmac = CRYPTO_ON(env) ? 1 : 0; + break; + } + + /* + * We expect a checksum error if there was a configuration problem. + * If there is no configuration problem and we don't get a match, + * it's fatal: panic the system. + */ + if (F_ISSET(dbp, DB_AM_CHKSUM) && sum_len != 0) { + if (F_ISSET(dbp, DB_AM_SWAP) && is_hmac == 0) + P_32_SWAP(chksum); + switch (ret = __db_check_chksum( + env, NULL, db_cipher, chksum, pp, sum_len, is_hmac)) { + case 0: + break; + case -1: + if (DBENV_LOGGING(env)) + (void)__db_cksum_log( + env, NULL, ¬_used, DB_FLUSH); + __db_errx(env, DB_STR_A("0684", + "checksum error: page %lu: catastrophic recovery required", + "%lu"), (u_long)pg); + return (__env_panic(env, DB_RUNRECOVERY)); + default: + return (ret); + } + } + if ((ret = __db_decrypt_pg(env, dbp, pagep)) != 0) + return (ret); + switch (pagep->type) { + case P_INVALID: + if (pginfo->type == DB_QUEUE) + return (__qam_pgin_out(env, pg, pp, cookie)); + else if (pginfo->type == DB_HEAP) + return (__heap_pgin(dbp, pg, pp, cookie)); + /* + * This page is either newly allocated from the end of the + * file, or from the free list, or it is an as-yet unwritten + * hash bucket page. In this last case it needs to be + * initialized, but never byte-swapped. Otherwise the header + * may need swapping. It will not be a metadata page, so the + * byte swapping code of __ham_pgin is adequate. If hash + * is not configured fall back to btree swapping. + */ +#ifdef HAVE_HASH + return (__ham_pgin(dbp, pg, pp, cookie)); +#else + return (__bam_pgin(dbp, pg, pp, cookie)); +#endif + /* NOTREACHED. */ + break; + case P_HASH_UNSORTED: + case P_HASH: + case P_HASHMETA: + return (__ham_pgin(dbp, pg, pp, cookie)); + case P_HEAP: + case P_HEAPMETA: + case P_IHEAP: + return (__heap_pgin(dbp, pg, pp, cookie)); + case P_BTREEMETA: + case P_IBTREE: + case P_IRECNO: + case P_LBTREE: + case P_LDUP: + case P_LRECNO: + case P_OVERFLOW: + return (__bam_pgin(dbp, pg, pp, cookie)); + case P_QAMMETA: + case P_QAMDATA: + return (__qam_pgin_out(env, pg, pp, cookie)); + default: + break; + } + return (__db_pgfmt(env, pg)); +} + +/* + * __db_pgout -- + * Primary page-swap routine. + * + * PUBLIC: int __db_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *)); + */ +int +__db_pgout(dbenv, pg, pp, cookie) + DB_ENV *dbenv; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + DB dummydb, *dbp; + DB_PGINFO *pginfo; + ENV *env; + PAGE *pagep; + int ret; + + pginfo = (DB_PGINFO *)cookie->data; + env = dbenv->env; + pagep = (PAGE *)pp; + + memset(&dummydb, 0, sizeof(DB)); + dbp = &dummydb; + dbp->dbenv = dbenv; + dbp->env = env; + dbp->flags = pginfo->flags; + dbp->pgsize = pginfo->db_pagesize; + ret = 0; + switch (pagep->type) { + case P_INVALID: + switch (pginfo->type) { + case DB_QUEUE: + ret = __qam_pgin_out(env, pg, pp, cookie); + break; +#ifdef HAVE_HASH + case DB_HASH: + ret = __ham_pgout(dbp, pg, pp, cookie); + break; +#endif +#ifdef HAVE_HEAP + case DB_HEAP: + ret = __heap_pgout(dbp, pg, pp, cookie); + break; +#endif + case DB_BTREE: + case DB_RECNO: + ret = __bam_pgout(dbp, pg, pp, cookie); + break; + default: + return (__db_pgfmt(env, pg)); + } + break; + case P_HASH: + case P_HASH_UNSORTED: + /* + * Support pgout of unsorted hash pages - since online + * replication upgrade can cause pages of this type to be + * written out. + * + * FALLTHROUGH + */ + case P_HASHMETA: + ret = __ham_pgout(dbp, pg, pp, cookie); + break; + case P_HEAP: + case P_HEAPMETA: + case P_IHEAP: + ret = __heap_pgout(dbp, pg, pp, cookie); + break; + case P_BTREEMETA: + case P_IBTREE: + case P_IRECNO: + case P_LBTREE: + case P_LDUP: + case P_LRECNO: + case P_OVERFLOW: + ret = __bam_pgout(dbp, pg, pp, cookie); + break; + case P_QAMMETA: + case P_QAMDATA: + ret = __qam_pgin_out(env, pg, pp, cookie); + break; + default: + return (__db_pgfmt(env, pg)); + } + if (ret) + return (ret); + + return (__db_encrypt_and_checksum_pg(env, dbp, pagep)); +} + +/* + * __db_decrypt_pg -- + * Utility function to decrypt a db page. + * + * PUBLIC: int __db_decrypt_pg __P((ENV *, DB *, PAGE *)); + */ +int +__db_decrypt_pg (env, dbp, pagep) + ENV *env; + DB *dbp; + PAGE *pagep; +{ + DB_CIPHER *db_cipher; + size_t pg_len, pg_off; + u_int8_t *iv; + int ret; + + db_cipher = env->crypto_handle; + ret = 0; + iv = NULL; + if (F_ISSET(dbp, DB_AM_ENCRYPT)) { + DB_ASSERT(env, db_cipher != NULL); + DB_ASSERT(env, F_ISSET(dbp, DB_AM_CHKSUM)); + + pg_off = P_OVERHEAD(dbp); + DB_ASSERT(env, db_cipher->adj_size(pg_off) == 0); + + switch (pagep->type) { + case P_HASHMETA: + case P_HEAPMETA: + case P_BTREEMETA: + case P_QAMMETA: + /* + * !!! + * For all meta pages it is required that the iv + * be at the same location. Use BTMETA to get to it + * for any meta type. + */ + iv = ((BTMETA *)pagep)->iv; + pg_len = DBMETASIZE; + break; + case P_INVALID: + if (IS_ZERO_LSN(LSN(pagep)) && + pagep->pgno == PGNO_INVALID) { + pg_len = 0; + break; + } + /* FALLTHROUGH */ + default: + iv = P_IV(dbp, pagep); + pg_len = dbp->pgsize; + break; + } + if (pg_len != 0) + ret = db_cipher->decrypt(env, db_cipher->data, + iv, ((u_int8_t *)pagep) + pg_off, + pg_len - pg_off); + } + return (ret); +} + +/* + * __db_encrypt_and_checksum_pg -- + * Utility function to encrypt and checksum a db page. + * + * PUBLIC: int __db_encrypt_and_checksum_pg + * PUBLIC: __P((ENV *, DB *, PAGE *)); + */ +int +__db_encrypt_and_checksum_pg (env, dbp, pagep) + ENV *env; + DB *dbp; + PAGE *pagep; +{ + DB_CIPHER *db_cipher; + int ret; + size_t pg_off, pg_len, sum_len; + u_int8_t *chksum, *iv, *key; + + chksum = iv = key = NULL; + db_cipher = env->crypto_handle; + + if (F_ISSET(dbp, DB_AM_ENCRYPT)) { + DB_ASSERT(env, db_cipher != NULL); + DB_ASSERT(env, F_ISSET(dbp, DB_AM_CHKSUM)); + + pg_off = P_OVERHEAD(dbp); + DB_ASSERT(env, db_cipher->adj_size(pg_off) == 0); + + key = db_cipher->mac_key; + + switch (pagep->type) { + case P_HASHMETA: + case P_HEAPMETA: + case P_BTREEMETA: + case P_QAMMETA: + /* + * !!! + * For all meta pages it is required that the iv + * be at the same location. Use BTMETA to get to it + * for any meta type. + */ + iv = ((BTMETA *)pagep)->iv; + pg_len = DBMETASIZE; + break; + default: + iv = P_IV(dbp, pagep); + pg_len = dbp->pgsize; + break; + } + if ((ret = db_cipher->encrypt(env, db_cipher->data, + iv, ((u_int8_t *)pagep) + pg_off, pg_len - pg_off)) != 0) + return (ret); + } + if (F_ISSET(dbp, DB_AM_CHKSUM)) { + switch (pagep->type) { + case P_HASHMETA: + case P_HEAPMETA: + case P_BTREEMETA: + case P_QAMMETA: + /* + * !!! + * For all meta pages it is required that the chksum + * be at the same location. Use BTMETA to get to it + * for any meta type. + */ + chksum = ((BTMETA *)pagep)->chksum; + sum_len = DBMETASIZE; + break; + default: + chksum = P_CHKSUM(dbp, pagep); + sum_len = dbp->pgsize; + break; + } + __db_chksum(NULL, (u_int8_t *)pagep, sum_len, key, chksum); + if (F_ISSET(dbp, DB_AM_SWAP) && !F_ISSET(dbp, DB_AM_ENCRYPT)) + P_32_SWAP(chksum); + } + return (0); +} + +/* + * __db_metaswap -- + * Byteswap the common part of the meta-data page. + * + * PUBLIC: void __db_metaswap __P((PAGE *)); + */ +void +__db_metaswap(pg) + PAGE *pg; +{ + u_int8_t *p; + + p = (u_int8_t *)pg; + + /* Swap the meta-data information. */ + SWAP32(p); /* lsn.file */ + SWAP32(p); /* lsn.offset */ + SWAP32(p); /* pgno */ + SWAP32(p); /* magic */ + SWAP32(p); /* version */ + SWAP32(p); /* pagesize */ + p += 4; /* unused, page type, unused, unused */ + SWAP32(p); /* free */ + SWAP32(p); /* alloc_lsn part 1 */ + SWAP32(p); /* alloc_lsn part 2 */ + SWAP32(p); /* cached key count */ + SWAP32(p); /* cached record count */ + SWAP32(p); /* flags */ +} + +/* + * __db_byteswap -- + * Byteswap an ordinary database page. + * + * PUBLIC: int __db_byteswap + * PUBLIC: __P((DB *, db_pgno_t, PAGE *, size_t, int)); + */ +int +__db_byteswap(dbp, pg, h, pagesize, pgin) + DB *dbp; + db_pgno_t pg; + PAGE *h; + size_t pagesize; + int pgin; +{ + ENV *env; + BINTERNAL *bi; + BKEYDATA *bk; + BOVERFLOW *bo; + RINTERNAL *ri; + db_indx_t i, *inp, len, tmp; + u_int8_t *end, *p, *pgend; + + if (pagesize == 0) + return (0); + + if (pgin) { + M_32_SWAP(h->lsn.file); + M_32_SWAP(h->lsn.offset); + M_32_SWAP(h->pgno); + M_32_SWAP(h->prev_pgno); + M_32_SWAP(h->next_pgno); + M_16_SWAP(h->entries); + M_16_SWAP(h->hf_offset); + } + + if (dbp == NULL) + return (0); + env = dbp->env; + + pgend = (u_int8_t *)h + pagesize; + + inp = P_INP(dbp, h); + if ((u_int8_t *)inp >= pgend) + goto out; + + switch (TYPE(h)) { + case P_HASH_UNSORTED: + case P_HASH: + for (i = 0; i < NUM_ENT(h); i++) { + if (pgin) + M_16_SWAP(inp[i]); + + if (P_ENTRY(dbp, h, i) >= pgend) + continue; + + switch (HPAGE_TYPE(dbp, h, i)) { + case H_KEYDATA: + break; + case H_DUPLICATE: + len = LEN_HKEYDATA(dbp, h, pagesize, i); + p = HKEYDATA_DATA(P_ENTRY(dbp, h, i)); + for (end = p + len; p < end;) { + if (pgin) { + P_16_SWAP(p); + memcpy(&tmp, + p, sizeof(db_indx_t)); + p += sizeof(db_indx_t); + } else { + memcpy(&tmp, + p, sizeof(db_indx_t)); + SWAP16(p); + } + p += tmp; + SWAP16(p); + } + break; + case H_OFFDUP: + p = HOFFPAGE_PGNO(P_ENTRY(dbp, h, i)); + SWAP32(p); /* pgno */ + break; + case H_OFFPAGE: + p = HOFFPAGE_PGNO(P_ENTRY(dbp, h, i)); + SWAP32(p); /* pgno */ + SWAP32(p); /* tlen */ + break; + default: + return (__db_pgfmt(env, pg)); + } + + } + + /* + * The offsets in the inp array are used to determine + * the size of entries on a page; therefore they + * cannot be converted until we've done all the + * entries. + */ + if (!pgin) + for (i = 0; i < NUM_ENT(h); i++) + M_16_SWAP(inp[i]); + break; + case P_LBTREE: + case P_LDUP: + case P_LRECNO: + for (i = 0; i < NUM_ENT(h); i++) { + if (pgin) + M_16_SWAP(inp[i]); + + /* + * In the case of on-page duplicates, key information + * should only be swapped once. + */ + if (h->type == P_LBTREE && i > 1) { + if (pgin) { + if (inp[i] == inp[i - 2]) + continue; + } else { + M_16_SWAP(inp[i]); + if (inp[i] == inp[i - 2]) + continue; + M_16_SWAP(inp[i]); + } + } + + bk = GET_BKEYDATA(dbp, h, i); + if ((u_int8_t *)bk >= pgend) + continue; + switch (B_TYPE(bk->type)) { + case B_KEYDATA: + M_16_SWAP(bk->len); + break; + case B_DUPLICATE: + case B_OVERFLOW: + bo = (BOVERFLOW *)bk; + M_32_SWAP(bo->pgno); + M_32_SWAP(bo->tlen); + break; + default: + return (__db_pgfmt(env, pg)); + } + + if (!pgin) + M_16_SWAP(inp[i]); + } + break; + case P_IBTREE: + for (i = 0; i < NUM_ENT(h); i++) { + if (pgin) + M_16_SWAP(inp[i]); + + bi = GET_BINTERNAL(dbp, h, i); + if ((u_int8_t *)bi >= pgend) + continue; + + M_16_SWAP(bi->len); + M_32_SWAP(bi->pgno); + M_32_SWAP(bi->nrecs); + + switch (B_TYPE(bi->type)) { + case B_KEYDATA: + break; + case B_DUPLICATE: + case B_OVERFLOW: + bo = (BOVERFLOW *)bi->data; + M_32_SWAP(bo->pgno); + M_32_SWAP(bo->tlen); + break; + default: + return (__db_pgfmt(env, pg)); + } + + if (!pgin) + M_16_SWAP(inp[i]); + } + break; + case P_IRECNO: + for (i = 0; i < NUM_ENT(h); i++) { + if (pgin) + M_16_SWAP(inp[i]); + + ri = GET_RINTERNAL(dbp, h, i); + if ((u_int8_t *)ri >= pgend) + continue; + + M_32_SWAP(ri->pgno); + M_32_SWAP(ri->nrecs); + + if (!pgin) + M_16_SWAP(inp[i]); + } + break; + case P_HEAP: + case P_IHEAP: + case P_INVALID: + case P_OVERFLOW: + case P_QAMDATA: + /* Nothing to do. */ + break; + default: + return (__db_pgfmt(env, pg)); + } + +out: if (!pgin) { + /* Swap the header information. */ + M_32_SWAP(h->lsn.file); + M_32_SWAP(h->lsn.offset); + M_32_SWAP(h->pgno); + M_32_SWAP(h->prev_pgno); + M_32_SWAP(h->next_pgno); + M_16_SWAP(h->entries); + M_16_SWAP(h->hf_offset); + } + return (0); +} + +/* + * __db_pageswap -- + * Byteswap any database page. Normally, the page to be swapped will be + * referenced by the "pp" argument and the pdata argument will be NULL. + * This function is also called by automatically generated log functions, + * where the page may be split into separate header and data parts. In + * that case, pdata is not NULL we reconsitute + * + * PUBLIC: int __db_pageswap + * PUBLIC: __P((ENV *, DB *, void *, size_t, DBT *, int)); + */ +int +__db_pageswap(env, dbp, pp, len, pdata, pgin) + ENV *env; + DB *dbp; + void *pp; + size_t len; + DBT *pdata; + int pgin; +{ + db_pgno_t pg; + size_t pgsize; + void *pgcopy; + int ret; + u_int16_t hoffset; + + switch (TYPE(pp)) { + case P_BTREEMETA: + return (__bam_mswap(env, pp)); + + case P_HASHMETA: + return (__ham_mswap(env, pp)); + + case P_QAMMETA: + return (__qam_mswap(env, pp)); + + case P_INVALID: + case P_OVERFLOW: + case P_QAMDATA: + /* + * We may have been passed an invalid page, or a queue data + * page, or an overflow page where fields like hoffset have a + * special meaning. In that case, no swapping of the page data + * is required, just the fields in the page header. + */ + pdata = NULL; + break; + + default: + break; + } + + if (pgin) { + P_32_COPYSWAP(&PGNO(pp), &pg); + P_16_COPYSWAP(&HOFFSET(pp), &hoffset); + } else { + pg = PGNO(pp); + hoffset = HOFFSET(pp); + } + + if (pdata == NULL) + ret = __db_byteswap(dbp, pg, (PAGE *)pp, len, pgin); + else { + pgsize = hoffset + pdata->size; + if ((ret = __os_malloc(env, pgsize, &pgcopy)) != 0) + return (ret); + memset(pgcopy, 0, pgsize); + memcpy(pgcopy, pp, len); + memcpy((u_int8_t *)pgcopy + hoffset, pdata->data, pdata->size); + + ret = __db_byteswap(dbp, pg, (PAGE *)pgcopy, pgsize, pgin); + memcpy(pp, pgcopy, len); + + /* + * If we are swapping data to be written to the log, we can't + * overwrite the buffer that was passed in: it may be a pointer + * into a page in cache. We set DB_DBT_APPMALLOC here so that + * the calling code can free the memory we allocate here. + */ + if (!pgin) { + if ((ret = + __os_malloc(env, pdata->size, &pdata->data)) != 0) { + __os_free(env, pgcopy); + return (ret); + } + F_SET(pdata, DB_DBT_APPMALLOC); + } + memcpy(pdata->data, (u_int8_t *)pgcopy + hoffset, pdata->size); + __os_free(env, pgcopy); + } + + return (ret); +} + +/* + * __db_recordswap -- + * Byteswap any database record. + * + * PUBLIC: void __db_recordswap __P((u_int32_t, + * PUBLIC: u_int32_t, void *, void *, u_int32_t)); + */ +void +__db_recordswap(op, size, hdr, data, pgin) + u_int32_t op; + u_int32_t size; + void *hdr, *data; + u_int32_t pgin; +{ + BKEYDATA *bk; + BOVERFLOW *bo; + BINTERNAL *bi; + RINTERNAL *ri; + db_indx_t tmp; + u_int8_t *p, *end; + + if (size == 0) + return; + switch (OP_PAGE_GET(op)) { + case P_LDUP: + case P_LBTREE: + case P_LRECNO: + bk = (BKEYDATA *)hdr; + switch (B_TYPE(bk->type)) { + case B_KEYDATA: + M_16_SWAP(bk->len); + break; + case B_DUPLICATE: + case B_OVERFLOW: + bo = (BOVERFLOW *)hdr; + M_32_SWAP(bo->pgno); + M_32_SWAP(bo->tlen); + break; + default: + DB_ASSERT(NULL, bk->type != bk->type); + } + break; + case P_IBTREE: + bi = (BINTERNAL *)hdr; + M_16_SWAP(bi->len); + M_32_SWAP(bi->pgno); + M_32_SWAP(bi->nrecs); + if (B_TYPE(bi->type) == B_OVERFLOW) { + if (data == NULL) { + DB_ASSERT(NULL, + size == BINTERNAL_SIZE(BOVERFLOW_SIZE)); + bo = (BOVERFLOW *)bi->data; + } else + bo = (BOVERFLOW *)data; + M_32_SWAP(bo->pgno); + } + break; + case P_IRECNO: + ri = (RINTERNAL *)hdr; + M_32_SWAP(ri->pgno); + M_32_SWAP(ri->nrecs); + break; + case P_OVERFLOW: + break; + case P_HASH: + case P_HASH_UNSORTED: + switch (OP_MODE_GET(op)) { + /* KEYDATA and DUPLICATE records do not inclued the header. */ + case H_KEYDATA: + break; + case H_DUPLICATE: + p = (u_int8_t *)hdr; + for (end = p + size; p < end;) { + if (pgin) { + P_16_SWAP(p); + memcpy(&tmp, + p, sizeof(db_indx_t)); + p += sizeof(db_indx_t); + } else { + memcpy(&tmp, + p, sizeof(db_indx_t)); + SWAP16(p); + } + p += tmp; + SWAP16(p); + } + break; + /* These two record types include the full header. */ + case H_OFFDUP: + p = (u_int8_t *)hdr; + p += SSZ(HOFFPAGE, pgno); + SWAP32(p); /* pgno */ + break; + case H_OFFPAGE: + p = (u_int8_t *)hdr; + p += SSZ(HOFFPAGE, pgno); + SWAP32(p); /* pgno */ + SWAP32(p); /* tlen */ + break; + default: + DB_ASSERT(NULL, op != op); + } + break; + + default: + DB_ASSERT(NULL, op != op); + } +} diff --git a/src/db/db_copy.c b/src/db/db_copy.c new file mode 100644 index 00000000..2722c9d3 --- /dev/null +++ b/src/db/db_copy.c @@ -0,0 +1,256 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +#ifdef HAVE_QUEUE +#include "dbinc/qam.h" +static int copy_queue_extents __P((DB *, const char *, const char *)); +#endif + + +/* + * db_copy -- + * Copy a database file using mpool. + * EXTERN: int db_copy __P((DB_ENV *, + * EXTERN: const char *, const char *, const char *)); + */ +int +db_copy(dbenv, dbfile, target, passwd) + DB_ENV *dbenv; + const char *dbfile; + const char *target; + const char *passwd; +{ + DB *dbp; + DB_FH *fp; + DB_MPOOLFILE *mpf; + ENV *env; + void *pagep; + db_pgno_t pgno; + int ret, t_ret; + char *path; + size_t nw; + +#ifdef HAVE_QUEUE + DBTYPE type; + u_int32_t extentsize; +#endif + + path = NULL; + dbp = NULL; + fp = NULL; + env = dbenv->env; +retry: if ((ret = db_create(&dbp, dbenv, 0)) != 0) + return (ret); + /* + * If the database is encrypted we need to encript the pages + * before outputting them since we will read decrypted pages. + */ + if (passwd != NULL && + (ret = dbp->set_encrypt(dbp, passwd, DB_ENCRYPT_AES)) != 0) + goto err; + + if ((ret = dbp->open(dbp, NULL, + dbfile, NULL, DB_UNKNOWN, DB_AUTO_COMMIT | DB_RDONLY, 0)) != 0) { + if (ret == DB_LOCK_DEADLOCK || ret == DB_LOCK_NOTGRANTED) { + dbenv->errx(dbenv, DB_STR_A("0702", + "Deadlock while opening %s, retrying", "%s"), dbfile); + (void)dbp->close(dbp, DB_NOSYNC); + goto retry; + } + goto err; + } + if ((ret = __os_malloc(env, + strlen(target) + strlen(dbfile) + 2, &path)) != 0) { + dbenv->err(dbenv, ret, DB_STR_A("0703", + "Cannot allocate space for path: %s", "%s"), target); + goto err; + } + + (void)strcpy(path, target); + (void)strncat(path, &PATH_SEPARATOR[0], 1); + (void)strcat(path, dbfile); + + if ((ret = __os_open(env, + path, 0, DB_OSO_CREATE | DB_OSO_TRUNC, DB_MODE_600, &fp)) != 0) { + dbenv->err(dbenv, ret, DB_STR_A("0704", + "Cannot open traget file: %s", "%s"), path); + goto err; + } + + mpf = dbp->get_mpf(dbp); + for (pgno = 0; ret == 0 ; pgno++) { + if ((ret = mpf->get(mpf, &pgno, NULL, 0, &pagep)) != 0) + break; + if (F_ISSET(dbp, DB_AM_CHKSUM) || passwd != NULL) + ret = __db_encrypt_and_checksum_pg(env, dbp, pagep); + if (ret == 0 && ((ret = __os_write(env, + fp, pagep, dbp->pgsize, &nw)) != 0 || nw != dbp->pgsize)) { + if (ret == 0) + ret = EIO; + } + if ((t_ret = mpf->put(mpf, + pagep, DB_PRIORITY_VERY_LOW, 0)) != 0 && ret == 0) + ret = t_ret; + } + + if (ret == DB_PAGE_NOTFOUND) + ret = 0; + +#ifdef HAVE_QUEUE + /* Queue exents cannot be read directly, use the internal interface. */ + if (ret == 0) { + if ((ret = dbp->get_type(dbp, &type) != 0)) + goto err; + if (type == DB_QUEUE && + (ret = dbp->get_q_extentsize(dbp, &extentsize)) == 0 && + extentsize != 0) + ret = copy_queue_extents(dbp, target, passwd); + } +#endif + /* We have read pages for which log records may still be in cache. */ + if (ret == 0) + ret = dbenv->log_flush(dbenv, NULL); + +err: if (path != NULL) + __os_free(env, path); + if (fp != NULL && (t_ret = __os_closehandle(env, fp)) != 0 && ret == 0) + ret = t_ret; + if (dbp != NULL && + (t_ret = dbp->close(dbp, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +#ifdef HAVE_QUEUE +/* + * copy_queue_extents -- + * Routine to safely copy the active queue extents of a database. + * This routine must use internal BDB interfaces. + */ +static int +copy_queue_extents(dbp, target, passwd) + DB *dbp; + const char *target; + const char *passwd; +{ + DBC *dbc; + DB_ENV *dbenv; + DB_FH *fp; + DB_QUEUE_STAT *sp; + ENV *env; + void *pagep; + db_recno_t current, first; + db_pgno_t pgno, stop; + u_int32_t extid, page_ext; + char *path; + size_t nw; + int ret, t_ret; + + /* Find out the first and last record numbers in the database. */ + if ((ret = dbp->stat(dbp, NULL, &sp, DB_FAST_STAT)) != 0) + return (ret); + + current = sp->qs_cur_recno; + first = sp->qs_first_recno; + page_ext = sp->qs_extentsize; + + dbenv = dbp->dbenv; + env = dbp->env; + fp = NULL; + if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) + return (ret); + if ((ret = __os_malloc(env, + strlen(target) + strlen(dbp->fname) + strlen(QUEUE_EXTENT), + &path)) != 0) { + dbenv->err(dbenv, ret, DB_STR_A("0705", + "Cannot allocate space for path: %s", "%s"), target); + goto err; + } + + extid = UINT32_MAX; +again: + if (current >= first) + stop = QAM_RECNO_PAGE(dbp, current); + else + stop = QAM_RECNO_PAGE(dbp, UINT32_MAX); + + for (pgno = QAM_RECNO_PAGE(dbp, first); pgno <= stop; pgno++) { + if (extid != QAM_PAGE_EXTENT(dbp, pgno)) { + if (fp != NULL && + (ret = __os_closehandle(env, fp)) != 0) + goto err; + fp = NULL; + extid = QAM_PAGE_EXTENT(dbp, pgno); + (void)sprintf(path, QUEUE_EXTENT, + target, PATH_SEPARATOR[0], dbp->fname, extid); + if ((ret = __os_open(env, path, 0, + DB_OSO_CREATE | DB_OSO_TRUNC, + DB_MODE_600, &fp)) != 0) { + dbenv->err(dbenv, ret, DB_STR_A("0706", + "Cannot open traget file: %s", "%s"), path); + goto err; + } + } + ret = __qam_fget(dbc, &pgno, 0, &pagep); + + /* + * Skip to the next extent if this extent has not + * been created yet or if it is not completly populated. + */ + if (ret == DB_PAGE_NOTFOUND || ret == ENOENT) { + /* + * Compute the page number of the first page in + * the next extent. + */ + pgno = QAM_PAGE_EXTENT( + dbp, pgno + page_ext) * page_ext; + /* Decrement, the loop will increment. */ + pgno--; + ret = 0; + continue; + } + if (ret != 0) + goto err; + + if (F_ISSET(dbp, DB_AM_CHKSUM) || passwd != NULL) + ret = __db_encrypt_and_checksum_pg(env, dbp, pagep); + if (ret == 0 && ((ret = __os_write(env, + fp, pagep, dbp->pgsize, &nw)) != 0 || nw != dbp->pgsize)) { + if (ret == 0) + ret = EIO; + dbenv->err(dbenv, ret, DB_STR_A("0707", + "Failed to write page %lu output to %s", "%s"), + (u_long)pgno, path); + } + if ((t_ret = __qam_fput(dbc, + pgno, pagep, DB_PRIORITY_VERY_LOW)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + } + + if (current < first) { + first = 1; + goto again; + } + +err: if (fp != NULL && (t_ret = __os_closehandle(env, fp)) != 0 && ret == 0) + ret = t_ret; + + if (dbc != NULL && (t_ret = dbc->close(dbc)) != 0 && ret == 0) + ret = t_ret; + + __os_free(env, path); + return (ret); +} +#endif diff --git a/src/db/db_dispatch.c b/src/db/db_dispatch.c new file mode 100644 index 00000000..9d12b947 --- /dev/null +++ b/src/db/db_dispatch.c @@ -0,0 +1,977 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" +#include "dbinc/fop.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" +#include "dbinc/log_verify.h" + +static int __db_txnlist_find_internal __P((ENV *, DB_TXNHEAD *, + db_txnlist_type, u_int32_t, DB_TXNLIST **, + int, u_int32_t *)); + +/* + * __db_dispatch -- + * + * This is the transaction dispatch function used by the db access methods. + * It is designed to handle the record format used by all the access + * methods (the one automatically generated by the db_{h,log,read}.sh + * scripts in the tools directory). An application using a different + * recovery paradigm will supply a different dispatch function to txn_open. + * + * PUBLIC: int __db_dispatch __P((ENV *, + * PUBLIC: DB_DISTAB *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_dispatch(env, dtab, db, lsnp, redo, params) + ENV *env; /* The environment. */ + DB_DISTAB *dtab; + DBT *db; /* The log record upon which to dispatch. */ + DB_LSN *lsnp; /* The lsn of the record being dispatched. */ + db_recops redo; /* Redo this op (or undo it). */ + void *params; +{ + DB_ENV *dbenv; + DB_TXNHEAD *info; /* Transaction list. */ + DB_LOG_VRFY_INFO *lvh; + DB_LSN prev_lsn; + u_int32_t rectype, status, txnid, urectype; + int make_call, ret; + + dbenv = env->dbenv; + make_call = ret = 0; + lvh = NULL; + info = NULL; + LOGCOPY_32(env, &rectype, db->data); + LOGCOPY_32(env, &txnid, (u_int8_t *)db->data + sizeof(rectype)); + + /* + * Log verification passes a DB_LOG_VRFY_INFO structure, others + * pass a DB_TXNHEAD structure. + */ + if (redo != DB_TXN_LOG_VERIFY) + info = (DB_TXNHEAD *)params; + else + lvh = (DB_LOG_VRFY_INFO *)params; + + /* If we don't have a dispatch table, it's hard to dispatch. */ + DB_ASSERT(env, dtab != NULL); + + /* + * If we find a record that is in the user's number space and they + * have specified a recovery routine, let them handle it. If they + * didn't specify a recovery routine, then we expect that they've + * followed all our rules and registered new recovery functions. + */ + switch (redo) { + case DB_TXN_ABORT: + case DB_TXN_APPLY: + case DB_TXN_LOG_VERIFY: + case DB_TXN_PRINT: + make_call = 1; + break; + case DB_TXN_OPENFILES: + /* + * We collect all the transactions that have + * "begin" records, those with no previous LSN, + * so that we do not abort partial transactions. + * These are known to be undone, otherwise the + * log would not have been freeable. + */ + LOGCOPY_TOLSN(env, &prev_lsn, (u_int8_t *)db->data + + sizeof(rectype) + sizeof(txnid)); + if (txnid != 0 && prev_lsn.file == 0 && (ret = + __db_txnlist_add(env, info, txnid, TXN_OK, NULL)) != 0) + return (ret); + + /* FALLTHROUGH */ + case DB_TXN_POPENFILES: + if (rectype == DB___dbreg_register || + rectype == DB___txn_child || + rectype == DB___txn_ckp || rectype == DB___txn_recycle) + return ((dtab->int_dispatch[rectype])(env, + db, lsnp, redo, info)); + break; + case DB_TXN_BACKWARD_ROLL: + /* + * Running full recovery in the backward pass. In general, + * we only process records during this pass that belong + * to aborted transactions. Unfortunately, there are several + * exceptions: + * 1. If this is a meta-record, one not associated with + * a transaction, then we must always process it. + * 2. If this is a transaction commit/abort, we must + * always process it, so that we know the status of + * every transaction. + * 3. If this is a child commit, we need to process it + * because the outcome of the child transaction depends + * on the outcome of the parent. + * 4. If this is a dbreg_register record, we must always + * process is because they contain non-transactional + * closes that must be properly handled. + * 5. If this is a noop, we must always undo it so that we + * properly handle any aborts before a file was closed. + * 6. If this a file remove, we need to process it to + * determine if the on-disk file is the same as the + * one being described. + */ + switch (rectype) { + /* + * These either do not belong to a transaction or (regop) + * must be processed regardless of the status of the + * transaction. + */ + case DB___txn_regop: + case DB___txn_recycle: + case DB___txn_ckp: + make_call = 1; + break; + /* + * These belong to a transaction whose status must be + * checked. + */ + case DB___txn_child: + case DB___db_noop: + case DB___fop_file_remove: + case DB___dbreg_register: + make_call = 1; + + /* FALLTHROUGH */ + default: + if (txnid == 0) + break; + + ret = __db_txnlist_find(env, info, txnid, &status); + + /* If not found, this is an incomplete abort. */ + if (ret == DB_NOTFOUND) + return (__db_txnlist_add(env, + info, txnid, TXN_IGNORE, lsnp)); + if (ret != 0) + return (ret); + + /* + * If we ignore the transaction, ignore the operation + * UNLESS this is a child commit in which case we need + * to make sure that the child also gets marked as + * ignore. + */ + if (status == TXN_IGNORE && rectype != DB___txn_child) { + make_call = 0; + break; + } + if (status == TXN_COMMIT) + break; + + /* Set make_call in case we came through default */ + make_call = 1; + if (status == TXN_OK && + (ret = __db_txnlist_update(env, + info, txnid, rectype == DB___txn_prepare ? + TXN_PREPARE : TXN_ABORT, NULL, &status, 0)) != 0) + return (ret); + } + break; + case DB_TXN_FORWARD_ROLL: + /* + * In the forward pass, if we haven't seen the transaction, + * do nothing, else recover it. + * + * We need to always redo DB___db_noop records, so that we + * properly handle any commits after the file was closed. + */ + switch (rectype) { + case DB___txn_recycle: + case DB___txn_ckp: + case DB___db_noop: + case DB___dbreg_register: + make_call = 1; + break; + + default: + if (txnid == 0) + status = 0; + else { + ret = __db_txnlist_find(env, + info, txnid, &status); + + if (ret == DB_NOTFOUND) + /* Break out out of if clause. */ + ; + else if (ret != 0) + return (ret); + else if (status == TXN_COMMIT) { + make_call = 1; + break; + } + } + + } + break; + default: + return (__db_unknown_flag( + env, "__db_dispatch", (u_int32_t)redo)); + } + + if (make_call) { + /* + * If the debug flag is set then we are logging + * records for a non-durable update so that they + * may be examined for diagnostic purposes. + * So only make the call if we are printing, + * otherwise we need to extract the previous + * lsn so undo will work properly. + */ + if (rectype & DB_debug_FLAG) { + if (redo == DB_TXN_PRINT) + rectype &= ~DB_debug_FLAG; + else { + LOGCOPY_TOLSN(env, lsnp, + (u_int8_t *)db->data + + sizeof(rectype) + + sizeof(txnid)); + return (0); + } + } + if (rectype >= DB_user_BEGIN) { + /* + * Increment user log count, we can't pass any extra + * args into app_dispatch, so this has to be done here. + */ + if (redo == DB_TXN_LOG_VERIFY) + lvh->external_logrec_cnt++; + if (dbenv->app_dispatch != NULL) + return (dbenv->app_dispatch(dbenv, + db, lsnp, redo)); + + /* No application-specific dispatch */ + urectype = rectype - DB_user_BEGIN; + if (urectype > dtab->ext_size || + dtab->ext_dispatch[urectype] == NULL) { + __db_errx(env, DB_STR_A("0512", + "Illegal application-specific record type %lu in log", + "%lu"), (u_long)rectype); + return (EINVAL); + } + + return ((dtab->ext_dispatch[urectype])(dbenv, + db, lsnp, redo)); + } else { + if (rectype > dtab->int_size || + dtab->int_dispatch[rectype] == NULL) { + __db_errx(env, DB_STR_A("0513", + "Illegal record type %lu in log", "%lu"), + (u_long)rectype); + if (redo == DB_TXN_LOG_VERIFY) + lvh->unknown_logrec_cnt++; + + return (EINVAL); + } + + return ((dtab->int_dispatch[rectype])(env, + db, lsnp, redo, params)); + } + } + + return (0); +} + +/* + * __db_add_recovery -- Add recovery functions to the dispatch table. + * + * We have two versions of this, an external one and an internal one, + * because application-specific functions take different arguments + * for dispatch (ENV versus DB_ENV). + * + * This is the external version. + * + * PUBLIC: int __db_add_recovery __P((DB_ENV *, DB_DISTAB *, + * PUBLIC: int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops), u_int32_t)); + */ +int +__db_add_recovery(dbenv, dtab, func, ndx) + DB_ENV *dbenv; + DB_DISTAB *dtab; + int (*func) __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); + u_int32_t ndx; +{ + size_t i, nsize; + int ret; + + /* Make sure this is an application-specific record. */ + if (ndx < DB_user_BEGIN) { + __db_errx(dbenv->env, DB_STR_A("0514", + "Attempting to add application-specific record with invalid type %lu", + "%lu"), (u_long)ndx); + return (EINVAL); + } + ndx -= DB_user_BEGIN; + + /* Check if we have to grow the table. */ + if (ndx >= dtab->ext_size) { + nsize = ndx + 40; + if ((ret = + __os_realloc(dbenv->env, nsize * + sizeof((dtab->ext_dispatch)[0]), &dtab->ext_dispatch)) + != 0) + return (ret); + for (i = dtab->ext_size; i < nsize; ++i) + (dtab->ext_dispatch)[i] = NULL; + dtab->ext_size = nsize; + } + + (dtab->ext_dispatch)[ndx] = func; + return (0); +} + +/* + * __db_add_recovery_int -- + * + * Internal version of dispatch addition function. + * + * + * PUBLIC: int __db_add_recovery_int __P((ENV *, DB_DISTAB *, + * PUBLIC: int (*)(ENV *, DBT *, DB_LSN *, db_recops, void *), u_int32_t)); + */ +int +__db_add_recovery_int(env, dtab, func, ndx) + ENV *env; + DB_DISTAB *dtab; + int (*func) __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + u_int32_t ndx; +{ + size_t i, nsize; + int ret; + + if (ndx >= DB_user_BEGIN) { + __db_errx(env, DB_STR_A("0515", + "Attempting to add internal record with invalid type %lu", + "%lu"), (u_long)ndx); + return (EINVAL); + } + + /* Check if we have to grow the table. */ + if (ndx >= dtab->int_size) { + nsize = ndx + 40; + if ((ret = + __os_realloc(env, nsize * sizeof((dtab->int_dispatch)[0]), + &dtab->int_dispatch)) != 0) + return (ret); + for (i = dtab->int_size; i < nsize; ++i) + (dtab->int_dispatch)[i] = NULL; + dtab->int_size = nsize; + } + + (dtab->int_dispatch)[ndx] = func; + return (0); +} + +/* + * __db_txnlist_init -- + * Initialize transaction linked list. + * + * PUBLIC: int __db_txnlist_init __P((ENV *, DB_THREAD_INFO *, + * PUBLIC: u_int32_t, u_int32_t, DB_LSN *, DB_TXNHEAD **)); + */ +int +__db_txnlist_init(env, ip, low_txn, hi_txn, trunc_lsn, retp) + ENV *env; + DB_THREAD_INFO *ip; + u_int32_t low_txn, hi_txn; + DB_LSN *trunc_lsn; + DB_TXNHEAD **retp; +{ + DB_TXNHEAD *headp; + u_int32_t size, tmp; + int ret; + + /* + * Size a hash table. + * If low is zero then we are being called during rollback + * and we need only one slot. + * Hi maybe lower than low if we have recycled txnid's. + * The numbers here are guesses about txn density, we can afford + * to look at a few entries in each slot. + */ + if (low_txn == 0) + size = 1; + else { + if (hi_txn < low_txn) { + tmp = hi_txn; + hi_txn = low_txn; + low_txn = tmp; + } + tmp = hi_txn - low_txn; + /* See if we wrapped around. */ + if (tmp > (TXN_MAXIMUM - TXN_MINIMUM) / 2) + tmp = (low_txn - TXN_MINIMUM) + (TXN_MAXIMUM - hi_txn); + size = tmp / 5; + if (size < 100) + size = 100; + } + if ((ret = __os_malloc(env, + sizeof(DB_TXNHEAD) + size * sizeof(headp->head), &headp)) != 0) + return (ret); + + memset(headp, 0, sizeof(DB_TXNHEAD) + size * sizeof(headp->head)); + headp->maxid = hi_txn; + headp->generation = 0; + headp->nslots = size; + headp->gen_alloc = 8; + headp->thread_info = ip; + if ((ret = __os_malloc(env, headp->gen_alloc * + sizeof(headp->gen_array[0]), &headp->gen_array)) != 0) { + __os_free(env, headp); + return (ret); + } + headp->gen_array[0].generation = 0; + headp->gen_array[0].txn_min = TXN_MINIMUM; + headp->gen_array[0].txn_max = TXN_MAXIMUM; + if (trunc_lsn != NULL) { + headp->trunc_lsn = *trunc_lsn; + headp->maxlsn = *trunc_lsn; + } else { + ZERO_LSN(headp->trunc_lsn); + ZERO_LSN(headp->maxlsn); + } + ZERO_LSN(headp->ckplsn); + + *retp = headp; + return (0); +} + +#define FIND_GENERATION(hp, txnid, gen) do { \ + u_int32_t __i; \ + for (__i = 0; __i <= (hp)->generation; __i++) \ + /* The range may wrap around the end. */ \ + if ((hp)->gen_array[__i].txn_min < \ + (hp)->gen_array[__i].txn_max ? \ + ((txnid) >= (hp)->gen_array[__i].txn_min && \ + (txnid) <= (hp)->gen_array[__i].txn_max) : \ + ((txnid) >= (hp)->gen_array[__i].txn_min || \ + (txnid) <= (hp)->gen_array[__i].txn_max)) \ + break; \ + DB_ASSERT(env, __i <= (hp)->generation); \ + gen = (hp)->gen_array[__i].generation; \ +} while (0) + +/* + * __db_txnlist_add -- + * Add an element to our transaction linked list. + * + * PUBLIC: int __db_txnlist_add __P((ENV *, + * PUBLIC: DB_TXNHEAD *, u_int32_t, u_int32_t, DB_LSN *)); + */ +int +__db_txnlist_add(env, hp, txnid, status, lsn) + ENV *env; + DB_TXNHEAD *hp; + u_int32_t txnid, status; + DB_LSN *lsn; +{ + DB_TXNLIST *elp; + int ret; + + if ((ret = __os_malloc(env, sizeof(DB_TXNLIST), &elp)) != 0) + return (ret); + + LIST_INSERT_HEAD(&hp->head[DB_TXNLIST_MASK(hp, txnid)], elp, links); + + /* Find the most recent generation containing this ID */ + FIND_GENERATION(hp, txnid, elp->u.t.generation); + elp->type = TXNLIST_TXNID; + elp->u.t.txnid = txnid; + elp->u.t.status = status; + if (txnid > hp->maxid) + hp->maxid = txnid; + if (lsn != NULL && IS_ZERO_LSN(hp->maxlsn) && status == TXN_COMMIT) + hp->maxlsn = *lsn; + + DB_ASSERT(env, lsn == NULL || + status != TXN_COMMIT || LOG_COMPARE(&hp->maxlsn, lsn) >= 0); + + return (0); +} + +/* + * __db_txnlist_remove -- + * Remove an element from our transaction linked list. + * + * PUBLIC: int __db_txnlist_remove __P((ENV *, DB_TXNHEAD *, u_int32_t)); + */ +int +__db_txnlist_remove(env, hp, txnid) + ENV *env; + DB_TXNHEAD *hp; + u_int32_t txnid; +{ + DB_TXNLIST *entry; + u_int32_t status; + + return (__db_txnlist_find_internal(env, + hp, TXNLIST_TXNID, txnid, &entry, 1, &status)); +} + +/* + * __db_txnlist_ckp -- + * Used to record the maximum checkpoint that will be retained + * after recovery. Typically this is simply the max checkpoint, but + * if we are doing client replication recovery or timestamp-based + * recovery, we are going to virtually truncate the log and we need + * to retain the last checkpoint before the truncation point. + * + * PUBLIC: void __db_txnlist_ckp __P((ENV *, DB_TXNHEAD *, DB_LSN *)); + */ +void +__db_txnlist_ckp(env, hp, ckp_lsn) + ENV *env; + DB_TXNHEAD *hp; + DB_LSN *ckp_lsn; +{ + + COMPQUIET(env, NULL); + + if (IS_ZERO_LSN(hp->ckplsn) && !IS_ZERO_LSN(hp->maxlsn) && + LOG_COMPARE(&hp->maxlsn, ckp_lsn) >= 0) + hp->ckplsn = *ckp_lsn; +} + +/* + * __db_txnlist_end -- + * Discard transaction linked list. + * + * PUBLIC: void __db_txnlist_end __P((ENV *, DB_TXNHEAD *)); + */ +void +__db_txnlist_end(env, hp) + ENV *env; + DB_TXNHEAD *hp; +{ + u_int32_t i; + DB_TXNLIST *p; + + if (hp == NULL) + return; + + for (i = 0; i < hp->nslots; i++) + while (hp != NULL && (p = LIST_FIRST(&hp->head[i])) != NULL) { + switch (p->type) { + case TXNLIST_LSN: + __os_free(env, p->u.l.lsn_stack); + break; + case TXNLIST_DELETE: + case TXNLIST_TXNID: + default: + /* + * Possibly an incomplete DB_TXNLIST; just + * free it. + */ + break; + } + LIST_REMOVE(p, links); + __os_free(env, p); + } + + if (hp->gen_array != NULL) + __os_free(env, hp->gen_array); + __os_free(env, hp); +} + +/* + * __db_txnlist_find -- + * Checks to see if a txnid with the current generation is in the + * txnid list. This returns DB_NOTFOUND if the item isn't in the + * list otherwise it returns (like __db_txnlist_find_internal) + * the status of the transaction. A txnid of 0 means the record + * was generated while not in a transaction. + * + * PUBLIC: int __db_txnlist_find __P((ENV *, + * PUBLIC: DB_TXNHEAD *, u_int32_t, u_int32_t *)); + */ +int +__db_txnlist_find(env, hp, txnid, statusp) + ENV *env; + DB_TXNHEAD *hp; + u_int32_t txnid, *statusp; +{ + DB_TXNLIST *entry; + + if (txnid == 0) + return (DB_NOTFOUND); + + return (__db_txnlist_find_internal(env, hp, + TXNLIST_TXNID, txnid, &entry, 0, statusp)); +} + +/* + * __db_txnlist_update -- + * Change the status of an existing transaction entry. + * Returns DB_NOTFOUND if no such entry exists. + * + * PUBLIC: int __db_txnlist_update __P((ENV *, DB_TXNHEAD *, + * PUBLIC: u_int32_t, u_int32_t, DB_LSN *, u_int32_t *, int)); + */ +int +__db_txnlist_update(env, hp, txnid, status, lsn, ret_status, add_ok) + ENV *env; + DB_TXNHEAD *hp; + u_int32_t txnid, status; + DB_LSN *lsn; + u_int32_t *ret_status; + int add_ok; +{ + DB_TXNLIST *elp; + int ret; + + if (txnid == 0) + return (DB_NOTFOUND); + + ret = __db_txnlist_find_internal(env, + hp, TXNLIST_TXNID, txnid, &elp, 0, ret_status); + + if (ret == DB_NOTFOUND && add_ok) { + *ret_status = status; + return (__db_txnlist_add(env, hp, txnid, status, lsn)); + } + if (ret != 0) + return (ret); + + if (*ret_status == TXN_IGNORE) + return (0); + + elp->u.t.status = status; + + if (lsn != NULL && IS_ZERO_LSN(hp->maxlsn) && status == TXN_COMMIT) + hp->maxlsn = *lsn; + + return (ret); +} + +/* + * __db_txnlist_find_internal -- + * Find an entry on the transaction list. If the entry is not there or + * the list pointer is not initialized we return DB_NOTFOUND. If the + * item is found, we return the status. Currently we always call this + * with an initialized list pointer but checking for NULL keeps it general. + */ +static int +__db_txnlist_find_internal(env, + hp, type, txnid, txnlistp, del, statusp) + ENV *env; + DB_TXNHEAD *hp; + db_txnlist_type type; + u_int32_t txnid; + DB_TXNLIST **txnlistp; + int del; + u_int32_t *statusp; +{ + struct __db_headlink *head; + DB_TXNLIST *p; + u_int32_t generation, hash; + int ret; + + ret = 0; + + if (hp == NULL) + return (DB_NOTFOUND); + + switch (type) { + case TXNLIST_TXNID: + hash = txnid; + FIND_GENERATION(hp, txnid, generation); + break; + case TXNLIST_DELETE: + case TXNLIST_LSN: + default: + return (__env_panic(env, EINVAL)); + } + + head = &hp->head[DB_TXNLIST_MASK(hp, hash)]; + LIST_FOREACH(p, head, links) { + if (p->type != type) + continue; + switch (type) { + case TXNLIST_TXNID: + if (p->u.t.txnid != txnid || + generation != p->u.t.generation) + continue; + *statusp = p->u.t.status; + break; + + case TXNLIST_DELETE: + case TXNLIST_LSN: + default: + return (__env_panic(env, EINVAL)); + } + if (del == 1) { + LIST_REMOVE(p, links); + __os_free(env, p); + *txnlistp = NULL; + } else if (p != LIST_FIRST(head)) { + /* Move it to head of list. */ + LIST_REMOVE(p, links); + LIST_INSERT_HEAD(head, p, links); + *txnlistp = p; + } else + *txnlistp = p; + return (ret); + } + + return (DB_NOTFOUND); +} + +/* + * __db_txnlist_gen -- + * Change the current generation number. + * + * PUBLIC: int __db_txnlist_gen __P((ENV *, + * PUBLIC: DB_TXNHEAD *, int, u_int32_t, u_int32_t)); + */ +int +__db_txnlist_gen(env, hp, incr, min, max) + ENV *env; + DB_TXNHEAD *hp; + int incr; + u_int32_t min, max; +{ + int ret; + + /* + * During recovery generation numbers keep track of "restart" + * checkpoints and recycle records. Restart checkpoints occur + * whenever we take a checkpoint and there are no outstanding + * transactions. When that happens, we can reset transaction IDs + * back to TXNID_MINIMUM. Currently we only do the reset + * at then end of recovery. Recycle records occur when txnids + * are exhausted during runtime. A free range of ids is identified + * and logged. This code maintains a stack of ranges. A txnid + * is given the generation number of the first range it falls into + * in the stack. + */ + if (incr < 0) { + --hp->generation; + memmove(hp->gen_array, &hp->gen_array[1], + (hp->generation + 1) * sizeof(hp->gen_array[0])); + } else { + ++hp->generation; + if (hp->generation >= hp->gen_alloc) { + hp->gen_alloc *= 2; + if ((ret = __os_realloc(env, hp->gen_alloc * + sizeof(hp->gen_array[0]), &hp->gen_array)) != 0) + return (ret); + } + memmove(&hp->gen_array[1], &hp->gen_array[0], + hp->generation * sizeof(hp->gen_array[0])); + hp->gen_array[0].generation = hp->generation; + hp->gen_array[0].txn_min = min; + hp->gen_array[0].txn_max = max; + } + return (0); +} + +/* + * __db_txnlist_lsnadd -- + * Save the prev_lsn from a txn_child record. + * + * PUBLIC: int __db_txnlist_lsnadd __P((ENV *, DB_TXNHEAD *, DB_LSN *)); + */ +int +__db_txnlist_lsnadd(env, hp, lsnp) + ENV *env; + DB_TXNHEAD *hp; + DB_LSN *lsnp; +{ + DB_TXNLIST *elp; + int ret; + + if (IS_ZERO_LSN(*lsnp)) + return (0); + + LIST_FOREACH(elp, &hp->head[0], links) + if (elp->type == TXNLIST_LSN) + break; + + if (elp == NULL) { + if ((ret = __db_txnlist_lsninit(env, hp, lsnp)) != 0) + return (ret); + return (DB_SURPRISE_KID); + } + + if (elp->u.l.stack_indx == elp->u.l.stack_size) { + elp->u.l.stack_size <<= 1; + if ((ret = __os_realloc(env, sizeof(DB_LSN) * + elp->u.l.stack_size, &elp->u.l.lsn_stack)) != 0) { + __db_txnlist_end(env, hp); + return (ret); + } + } + elp->u.l.lsn_stack[elp->u.l.stack_indx++] = *lsnp; + + return (0); +} + +/* + * __db_txnlist_lsnget -- + * + * PUBLIC: int __db_txnlist_lsnget __P((ENV *, + * PUBLIC: DB_TXNHEAD *, DB_LSN *, u_int32_t)); + * Get the lsn saved from a txn_child record. + */ +int +__db_txnlist_lsnget(env, hp, lsnp, flags) + ENV *env; + DB_TXNHEAD *hp; + DB_LSN *lsnp; + u_int32_t flags; +{ + DB_TXNLIST *elp; + + COMPQUIET(env, NULL); + COMPQUIET(flags, 0); + + LIST_FOREACH(elp, &hp->head[0], links) + if (elp->type == TXNLIST_LSN) + break; + + if (elp == NULL || elp->u.l.stack_indx == 0) { + ZERO_LSN(*lsnp); + return (0); + } + + *lsnp = elp->u.l.lsn_stack[--elp->u.l.stack_indx]; + + return (0); +} + +/* + * __db_txnlist_lsninit -- + * Initialize a transaction list with an lsn array entry. + * + * PUBLIC: int __db_txnlist_lsninit __P((ENV *, DB_TXNHEAD *, DB_LSN *)); + */ +int +__db_txnlist_lsninit(env, hp, lsnp) + ENV *env; + DB_TXNHEAD *hp; + DB_LSN *lsnp; +{ + DB_TXNLIST *elp; + int ret; + + elp = NULL; + + if ((ret = __os_malloc(env, sizeof(DB_TXNLIST), &elp)) != 0) + goto err; + LIST_INSERT_HEAD(&hp->head[0], elp, links); + + elp->type = TXNLIST_LSN; + if ((ret = __os_malloc(env, + sizeof(DB_LSN) * DB_LSN_STACK_SIZE, &elp->u.l.lsn_stack)) != 0) + goto err; + elp->u.l.stack_indx = 1; + elp->u.l.stack_size = DB_LSN_STACK_SIZE; + elp->u.l.lsn_stack[0] = *lsnp; + + return (0); + +err: __db_txnlist_end(env, hp); + return (ret); +} + +#ifdef DEBUG +/* + * __db_txnlist_print -- + * Print out the transaction list. + * + * PUBLIC: void __db_txnlist_print __P((DB_TXNHEAD *)); + */ +void +__db_txnlist_print(hp) + DB_TXNHEAD *hp; +{ + DB_TXNLIST *p; + u_int32_t i; + char *txntype; + + printf("Maxid: %lu Generation: %lu\n", + (u_long)hp->maxid, (u_long)hp->generation); + for (i = 0; i < hp->nslots; i++) + LIST_FOREACH(p, &hp->head[i], links) { + if (p->type != TXNLIST_TXNID) { + printf("Unrecognized type: %d\n", p->type); + continue; + } + switch (p->u.t.status) { + case TXN_OK: + txntype = "OK"; + break; + case TXN_COMMIT: + txntype = "commit"; + break; + case TXN_PREPARE: + txntype = "prepare"; + break; + case TXN_ABORT: + txntype = "abort"; + break; + case TXN_IGNORE: + txntype = "ignore"; + break; + case TXN_EXPECTED: + txntype = "expected"; + break; + case TXN_UNEXPECTED: + txntype = "unexpected"; + break; + default: + txntype = "UNKNOWN"; + break; + } + printf("TXNID: %lx(%lu): %s\n", + (u_long)p->u.t.txnid, + (u_long)p->u.t.generation, txntype); + } +} +#endif diff --git a/src/db/db_dup.c b/src/db/db_dup.c new file mode 100644 index 00000000..ff00da87 --- /dev/null +++ b/src/db/db_dup.c @@ -0,0 +1,214 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/mp.h" +#include "dbinc/log.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" + +/* + * __db_ditem_nolog -- + * Remove an item from a page without affecting its recoverability. + * + * PUBLIC: int __db_ditem_nolog __P((DBC *, PAGE *, u_int32_t, u_int32_t)); + */ +int +__db_ditem_nolog(dbc, pagep, indx, nbytes) + DBC *dbc; + PAGE *pagep; + u_int32_t indx, nbytes; +{ + DB *dbp; + db_indx_t cnt, *inp, offset; + u_int8_t *from; + + dbp = dbc->dbp; + DB_ASSERT(dbp->env, IS_DIRTY(pagep)); + DB_ASSERT(dbp->env, indx < NUM_ENT(pagep)); + + /* + * If there's only a single item on the page, we don't have to + * work hard. + */ + if (NUM_ENT(pagep) == 1) { + NUM_ENT(pagep) = 0; + HOFFSET(pagep) = dbp->pgsize; + return (0); + } + + inp = P_INP(dbp, pagep); + /* + * Pack the remaining key/data items at the end of the page. Use + * memmove(3), the regions may overlap. + */ + from = (u_int8_t *)pagep + HOFFSET(pagep); + DB_ASSERT(dbp->env, inp[indx] >= HOFFSET(pagep)); + memmove(from + nbytes, from, inp[indx] - HOFFSET(pagep)); + HOFFSET(pagep) += nbytes; + + /* Adjust the indices' offsets. */ + offset = inp[indx]; + for (cnt = 0; cnt < NUM_ENT(pagep); ++cnt) + if (inp[cnt] < offset) + inp[cnt] += nbytes; + + /* Shift the indices down. */ + --NUM_ENT(pagep); + if (indx != NUM_ENT(pagep)) + memmove(&inp[indx], &inp[indx + 1], + sizeof(db_indx_t) * (NUM_ENT(pagep) - indx)); + + return (0); +} + +/* + * __db_ditem -- + * Remove an item from a page, logging it if enabled. + * + * PUBLIC: int __db_ditem __P((DBC *, PAGE *, u_int32_t, u_int32_t)); + */ +int +__db_ditem(dbc, pagep, indx, nbytes) + DBC *dbc; + PAGE *pagep; + u_int32_t indx, nbytes; +{ + DB *dbp; + DBT ldbt; + int ret; + + dbp = dbc->dbp; + + if (DBC_LOGGING(dbc)) { + ldbt.data = P_ENTRY(dbp, pagep, indx); + ldbt.size = nbytes; + if ((ret = __db_addrem_log(dbp, dbc->txn, &LSN(pagep), 0, + OP_SET(DB_REM_DUP, pagep), PGNO(pagep), + (u_int32_t)indx, nbytes, &ldbt, NULL, &LSN(pagep))) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(pagep)); + + return (__db_ditem_nolog(dbc, pagep, indx, nbytes)); +} + +/* + * __db_pitem_nolog -- + * Put an item on a page without logging. + * + * PUBLIC: int __db_pitem_nolog + * PUBLIC: __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); + */ +int +__db_pitem_nolog(dbc, pagep, indx, nbytes, hdr, data) + DBC *dbc; + PAGE *pagep; + u_int32_t indx; + u_int32_t nbytes; + DBT *hdr, *data; +{ + BKEYDATA bk; + DB *dbp; + DBT thdr; + db_indx_t *inp; + u_int8_t *p; + + dbp = dbc->dbp; + + DB_ASSERT(dbp->env, IS_DIRTY(pagep)); + + if (nbytes > P_FREESPACE(dbp, pagep)) { + DB_ASSERT(dbp->env, nbytes <= P_FREESPACE(dbp, pagep)); + return (EINVAL); + } + + if (hdr == NULL) { + B_TSET(bk.type, B_KEYDATA); + bk.len = data == NULL ? 0 : data->size; + + thdr.data = &bk; + thdr.size = SSZA(BKEYDATA, data); + hdr = &thdr; + } + inp = P_INP(dbp, pagep); + + /* Adjust the index table, then put the item on the page. */ + if (indx != NUM_ENT(pagep)) + memmove(&inp[indx + 1], &inp[indx], + sizeof(db_indx_t) * (NUM_ENT(pagep) - indx)); + HOFFSET(pagep) -= nbytes; + inp[indx] = HOFFSET(pagep); + ++NUM_ENT(pagep); + + p = P_ENTRY(dbp, pagep, indx); + memcpy(p, hdr->data, hdr->size); + if (data != NULL) + memcpy(p + hdr->size, data->data, data->size); + + return (0); +} + +/* + * __db_pitem -- + * Put an item on a page. + * + * PUBLIC: int __db_pitem + * PUBLIC: __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); + */ +int +__db_pitem(dbc, pagep, indx, nbytes, hdr, data) + DBC *dbc; + PAGE *pagep; + u_int32_t indx; + u_int32_t nbytes; + DBT *hdr, *data; +{ + DB *dbp; + MPOOLFILE *mpf; + int ret; + + dbp = dbc->dbp; + mpf = dbp->mpf->mfp; + /* + * Put a single item onto a page. The logic figuring out where to + * insert and whether it fits is handled in the caller. All we do + * here is manage the page shuffling. We cheat a little bit in that + * we don't want to copy the dbt on a normal put twice. If hdr is + * NULL, we create a BKEYDATA structure on the page, otherwise, just + * copy the caller's information onto the page. + * + * This routine is also used to put entries onto the page where the + * entry is pre-built, e.g., during recovery. In this case, the hdr + * will point to the entry, and the data argument will be NULL. + * + * If transactional bulk loading is enabled in this + * transaction, and the page is above the file's extension + * watermark, skip logging, but do not invoke LSN_NOT_LOGGED. + * + * !!! + * There's a tremendous potential for off-by-one errors here, since + * the passed in header sizes must be adjusted for the structure's + * placeholder for the trailing variable-length data field. + */ + if (DBC_LOGGING(dbc)) { + if (__txn_pg_above_fe_watermark(dbc->txn, mpf, PGNO(pagep))) { + mpf->fe_nlws++; /* Note that logging was skipped. */ + } else if ((ret = __db_addrem_log(dbp, dbc->txn, &LSN(pagep), + 0, OP_SET(DB_ADD_DUP, pagep), PGNO(pagep), + (u_int32_t)indx, nbytes, hdr, data, &LSN(pagep)))) { + return (ret); + } + } else + LSN_NOT_LOGGED(LSN(pagep)); + + return (__db_pitem_nolog(dbc, pagep, indx, nbytes, hdr, data)); +} diff --git a/src/db/db_iface.c b/src/db/db_iface.c new file mode 100644 index 00000000..6e9949b9 --- /dev/null +++ b/src/db/db_iface.c @@ -0,0 +1,2980 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#ifndef HAVE_QUEUE +#include "dbinc/qam.h" /* For __db_no_queue_am(). */ +#endif +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" +#include "dbinc/txn.h" + +static int __db_associate_arg __P((DB *, DB *, + int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t)); +static int __dbc_del_arg __P((DBC *, u_int32_t)); +static int __dbc_pget_arg __P((DBC *, DBT *, u_int32_t)); +static int __dbc_put_arg __P((DBC *, DBT *, DBT *, u_int32_t)); +static int __db_curinval __P((const ENV *)); +static int __db_cursor_arg __P((DB *, u_int32_t)); +static int __db_del_arg __P((DB *, DBT *, u_int32_t)); +static int __db_get_arg __P((const DB *, DBT *, DBT *, u_int32_t)); +static int __db_join_arg __P((DB *, DBC **, u_int32_t)); +static int __db_open_arg __P((DB *, + DB_TXN *, const char *, const char *, DBTYPE, u_int32_t)); +static int __db_pget_arg __P((DB *, DBT *, u_int32_t)); +static int __db_put_arg __P((DB *, DBT *, DBT *, u_int32_t)); +static int __dbt_ferr __P((const DB *, const char *, const DBT *, int)); +static int __db_compact_func + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +static int __db_associate_foreign_arg __P((DB *, DB *, + int (*)(DB *, const DBT *, DBT *, const DBT *, int *), + u_int32_t)); + +/* + * These functions implement the Berkeley DB API. They are organized in a + * layered fashion. The interface functions (XXX_pp) perform all generic + * error checks (for example, PANIC'd region, replication state change + * in progress, inconsistent transaction usage), call function-specific + * check routines (_arg) to check for proper flag usage, etc., do pre-amble + * processing (incrementing handle counts, handling local transactions), + * call the function and then do post-amble processing (local transactions, + * decrement handle counts). + * + * The basic structure is: + * Check for simple/generic errors (PANIC'd region) + * Check if replication is changing state (increment handle count). + * Call function-specific argument checking routine + * Create internal transaction if necessary + * Call underlying worker function + * Commit/abort internal transaction if necessary + * Decrement handle count + */ + +/* + * __db_associate_pp -- + * DB->associate pre/post processing. + * + * PUBLIC: int __db_associate_pp __P((DB *, DB_TXN *, DB *, + * PUBLIC: int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t)); + */ +int +__db_associate_pp(dbp, txn, sdbp, callback, flags) + DB *dbp, *sdbp; + DB_TXN *txn; + int (*callback) __P((DB *, const DBT *, const DBT *, DBT *)); + u_int32_t flags; +{ + DBC *sdbc; + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret, txn_local; + + env = dbp->env; + txn_local = 0; + + STRIP_AUTO_COMMIT(flags); + + ENV_ENTER(env, ip); + XA_CHECK_TXN(ip, txn); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { + handle_check = 0; + goto err; + } + + /* + * Secondary cursors may have the primary's lock file ID, so we need + * to make sure that no older cursors are lying around when we make + * the transition. + */ + if (TAILQ_FIRST(&sdbp->active_queue) != NULL || + TAILQ_FIRST(&sdbp->join_queue) != NULL) { + __db_errx(env, DB_STR("0572", + "Databases may not become secondary indices while cursors are open")); + ret = EINVAL; + goto err; + } + + if ((ret = __db_associate_arg(dbp, sdbp, callback, flags)) != 0) + goto err; + + /* + * Create a local transaction as necessary, check for consistent + * transaction usage, and, if we have no transaction but do have + * locking on, acquire a locker id for the handle lock acquisition. + */ + if (IS_DB_AUTO_COMMIT(dbp, txn)) { + if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) + goto err; + txn_local = 1; + } + + /* Check for consistent transaction usage. */ + if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0) + goto err; + + while ((sdbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL) + if ((ret = __dbc_destroy(sdbc)) != 0) + goto err; + + ret = __db_associate(dbp, ip, txn, sdbp, callback, flags); + +err: if (txn_local && + (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) + ret = t_ret; + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_associate_arg -- + * Check DB->associate arguments. + */ +static int +__db_associate_arg(dbp, sdbp, callback, flags) + DB *dbp, *sdbp; + int (*callback) __P((DB *, const DBT *, const DBT *, DBT *)); + u_int32_t flags; +{ + ENV *env; + int ret; + + env = dbp->env; + + if (sdbp->type == DB_HEAP) { + __db_errx(env, + "Heap databases may not be used as secondary databases"); + return (EINVAL); + } + + if (F_ISSET(sdbp, DB_AM_SECONDARY)) { + __db_errx(env, DB_STR("0573", + "Secondary index handles may not be re-associated")); + return (EINVAL); + } + if (F_ISSET(dbp, DB_AM_SECONDARY)) { + __db_errx(env, DB_STR("0574", + "Secondary indices may not be used as primary databases")); + return (EINVAL); + } + if (F_ISSET(dbp, DB_AM_DUP)) { + __db_errx(env, DB_STR("0575", + "Primary databases may not be configured with duplicates")); + return (EINVAL); + } + if (F_ISSET(dbp, DB_AM_RENUMBER)) { + __db_errx(env, DB_STR("0576", + "Renumbering recno databases may not be used as primary databases")); + return (EINVAL); + } + + /* + * It's OK for the primary and secondary to not share an environment IFF + * the environments are local to the DB handle. (Specifically, cursor + * adjustment will work correctly in this case.) The environment being + * local implies the environment is not configured for either locking or + * transactions, as neither of those could work correctly. + */ + if (dbp->env != sdbp->env && + (!F_ISSET(dbp->env, ENV_DBLOCAL) || + !F_ISSET(sdbp->env, ENV_DBLOCAL))) { + __db_errx(env, DB_STR("0577", + "The primary and secondary must be opened in the same environment")); + return (EINVAL); + } + if ((DB_IS_THREADED(dbp) && !DB_IS_THREADED(sdbp)) || + (!DB_IS_THREADED(dbp) && DB_IS_THREADED(sdbp))) { + __db_errx(env, DB_STR("0578", + "The DB_THREAD setting must be the same for primary and secondary")); + return (EINVAL); + } + if (callback == NULL && + (!F_ISSET(dbp, DB_AM_RDONLY) || !F_ISSET(sdbp, DB_AM_RDONLY))) { + __db_errx(env, DB_STR("0579", +"Callback function may be NULL only when database handles are read-only")); + return (EINVAL); + } + + if ((ret = __db_fchk(env, "DB->associate", flags, DB_CREATE | + DB_IMMUTABLE_KEY)) != 0) + return (ret); + + return (0); +} + +/* + * __db_close_pp -- + * DB->close pre/post processing. + * + * PUBLIC: int __db_close_pp __P((DB *, u_int32_t)); + */ +int +__db_close_pp(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + env = dbp->env; + ret = 0; + + /* + * Close a DB handle -- as a handle destructor, we can't fail. + * + * !!! + * The actual argument checking is simple, do it inline, outside of + * the replication block. + */ + if (flags != 0 && flags != DB_NOSYNC) + ret = __db_ferr(env, "DB->close", 0); + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (t_ret = __db_rep_enter(dbp, 0, 0, 0)) != 0) { + handle_check = 0; + if (ret == 0) + ret = t_ret; + } + + if ((t_ret = __db_close(dbp, NULL, flags)) != 0 && ret == 0) + ret = t_ret; + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_cursor_pp -- + * DB->cursor pre/post processing. + * + * PUBLIC: int __db_cursor_pp __P((DB *, DB_TXN *, DBC **, u_int32_t)); + */ +int +__db_cursor_pp(dbp, txn, dbcp, flags) + DB *dbp; + DB_TXN *txn; + DBC **dbcp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + REGENV *renv; + int rep_blocked, ret; + + env = dbp->env; + + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->cursor"); + + ENV_ENTER(env, ip); + XA_CHECK_TXN(ip, txn); + + /* Check for replication block. */ + rep_blocked = 0; + if (IS_ENV_REPLICATED(env)) { + if (!IS_REAL_TXN(txn)) { + if ((ret = __op_rep_enter(env, 0, 1)) != 0) + goto err; + rep_blocked = 1; + } + renv = env->reginfo->primary; + if (dbp->timestamp != renv->rep_timestamp) { + __db_errx(env, DB_STR("0580", + "replication recovery unrolled committed transactions;" + "open DB and DBcursor handles must be closed")); + ret = DB_REP_HANDLE_DEAD; + goto err; + } + } + if ((ret = __db_cursor_arg(dbp, flags)) != 0) + goto err; + + /* + * Check for consistent transaction usage. For now, assume this + * cursor might be used for read operations only (in which case + * it may not require a txn). We'll check more stringently in + * c_del and c_put. (Note this means the read-op txn tests have + * to be a subset of the write-op ones.) + */ + if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0) + goto err; + + ret = __db_cursor(dbp, ip, txn, dbcp, flags); + + /* + * Register externally created cursors into the valid transaction. + * If a family transaction was passed in, the transaction handle in + * the cursor may not match. + */ + txn = (*dbcp)->txn; + if (txn != NULL && ret == 0) + TAILQ_INSERT_HEAD(&(txn->my_cursors), *dbcp, txn_cursors); + +err: /* Release replication block on error. */ + if (ret != 0 && rep_blocked) + (void)__op_rep_exit(env); + + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_cursor -- + * DB->cursor. + * + * PUBLIC: int __db_cursor __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBC **, u_int32_t)); + */ +int +__db_cursor(dbp, ip, txn, dbcp, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DBC **dbcp; + u_int32_t flags; +{ + DBC *dbc; + ENV *env; + db_lockmode_t mode; + int ret; + + env = dbp->env; + + if (MULTIVERSION(dbp) && txn == NULL && (LF_ISSET(DB_TXN_SNAPSHOT) || + F_ISSET(env->dbenv, DB_ENV_TXN_SNAPSHOT))) { + if ((ret = + __txn_begin(env, ip, NULL, &txn, DB_TXN_SNAPSHOT)) != 0) + return (ret); + F_SET(txn, TXN_PRIVATE); + } + + PERFMON5(env, db, cursor, dbp->fname, + dbp->dname, txn == NULL ? 0 : txn->txnid, flags, &dbp->fileid[0]); + + if ((ret = __db_cursor_int(dbp, ip, txn, dbp->type, PGNO_INVALID, + LF_ISSET(DB_CURSOR_BULK | DB_CURSOR_TRANSIENT | DB_RECOVER), + NULL, &dbc)) != 0) + return (ret); + + /* + * If this is CDB, do all the locking in the interface, which is + * right here. + */ + if (CDB_LOCKING(env)) { + mode = (LF_ISSET(DB_WRITELOCK)) ? DB_LOCK_WRITE : + ((LF_ISSET(DB_WRITECURSOR) || txn != NULL) ? + DB_LOCK_IWRITE : DB_LOCK_READ); + if ((ret = __lock_get(env, dbc->locker, 0, + &dbc->lock_dbt, mode, &dbc->mylock)) != 0) + goto err; + if (LF_ISSET(DB_WRITECURSOR)) + F_SET(dbc, DBC_WRITECURSOR); + if (LF_ISSET(DB_WRITELOCK)) + F_SET(dbc, DBC_WRITER); + } + + if (LF_ISSET(DB_READ_UNCOMMITTED) || + (txn != NULL && F_ISSET(txn, TXN_READ_UNCOMMITTED))) + F_SET(dbc, DBC_READ_UNCOMMITTED); + + if (LF_ISSET(DB_READ_COMMITTED) || + (txn != NULL && F_ISSET(txn, TXN_READ_COMMITTED))) + F_SET(dbc, DBC_READ_COMMITTED); + + *dbcp = dbc; + return (0); + +err: (void)__dbc_close(dbc); + return (ret); +} + +/* + * __db_cursor_arg -- + * Check DB->cursor arguments. + */ +static int +__db_cursor_arg(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + ENV *env; + + env = dbp->env; + + /* + * DB_READ_COMMITTED and DB_READ_UNCOMMITTED require locking. + */ + if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED)) { + if (!LOCKING_ON(env)) + return (__db_fnl(env, "DB->cursor")); + } + + LF_CLR(DB_CURSOR_BULK | + DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT); + + /* Check for invalid function flags. */ + if (LF_ISSET(DB_WRITECURSOR)) { + if (DB_IS_READONLY(dbp)) + return (__db_rdonly(env, "DB->cursor")); + if (!CDB_LOCKING(env)) + return (__db_ferr(env, "DB->cursor", 0)); + LF_CLR(DB_WRITECURSOR); + } else if (LF_ISSET(DB_WRITELOCK)) { + if (DB_IS_READONLY(dbp)) + return (__db_rdonly(env, "DB->cursor")); + LF_CLR(DB_WRITELOCK); + } + + if (flags != 0) + return (__db_ferr(env, "DB->cursor", 0)); + + return (0); +} + +/* + * __db_del_pp -- + * DB->del pre/post processing. + * + * PUBLIC: int __db_del_pp __P((DB *, DB_TXN *, DBT *, u_int32_t)); + */ +int +__db_del_pp(dbp, txn, key, flags) + DB *dbp; + DB_TXN *txn; + DBT *key; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret, txn_local; + + env = dbp->env; + txn_local = 0; + + STRIP_AUTO_COMMIT(flags); + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del"); + +#ifdef CONFIG_TEST + if (IS_REP_MASTER(env)) + DB_TEST_WAIT(env, env->test_check); +#endif + ENV_ENTER(env, ip); + XA_CHECK_TXN(ip, txn); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { + handle_check = 0; + goto err; + } + + if ((ret = __db_del_arg(dbp, key, flags)) != 0) + goto err; + + /* Create local transaction as necessary. */ + if (IS_DB_AUTO_COMMIT(dbp, txn)) { + if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) + goto err; + txn_local = 1; + } + + /* Check for consistent transaction usage. */ + if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0) + goto err; + + ret = __db_del(dbp, ip, txn, key, flags); + +err: if (txn_local && + (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) + ret = t_ret; + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + ENV_LEAVE(env, ip); + __dbt_userfree(env, key, NULL, NULL); + return (ret); +} + +/* + * __db_del_arg -- + * Check DB->delete arguments. + */ +static int +__db_del_arg(dbp, key, flags) + DB *dbp; + DBT *key; + u_int32_t flags; +{ + ENV *env; + int ret; + + env = dbp->env; + + /* Check for changes to a read-only tree. */ + if (DB_IS_READONLY(dbp)) + return (__db_rdonly(env, "DB->del")); + + /* Check for invalid function flags. */ + switch (flags) { + case DB_CONSUME: + if (dbp->type != DB_QUEUE) + return (__db_ferr(env, "DB->del", 0)); + goto copy; + case DB_MULTIPLE: + case DB_MULTIPLE_KEY: + if (!F_ISSET(key, DB_DBT_BULK)) { + __db_errx(env, DB_STR("0581", + "DB->del with DB_MULTIPLE(_KEY) requires multiple key records")); + return (EINVAL); + } + /* FALL THROUGH */ + case 0: +copy: if ((ret = __dbt_usercopy(env, key)) != 0) + return (ret); + break; + default: + return (__db_ferr(env, "DB->del", 0)); + } + + return (0); +} + +/* + * __db_exists -- + * DB->exists implementation. + * + * PUBLIC: int __db_exists __P((DB *, DB_TXN *, DBT *, u_int32_t)); + */ +int +__db_exists(dbp, txn, key, flags) + DB *dbp; + DB_TXN *txn; + DBT *key; + u_int32_t flags; +{ + DBT data; + int ret; + + /* + * Most flag checking is done in the DB->get call, we only check for + * specific incompatibilities here. This saves making __get_arg + * aware of the exist method's API constraints. + */ + STRIP_AUTO_COMMIT(flags); + + if ((ret = __db_fchk(dbp->env, "DB->exists", flags, + DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) != 0) + return (ret); + + /* + * Configure a data DBT that returns no bytes so there's no copy + * of the data. + */ + memset(&data, 0, sizeof(data)); + data.dlen = 0; + data.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM; + + return (dbp->get(dbp, txn, key, &data, flags)); +} + +/* + * db_fd_pp -- + * DB->fd pre/post processing. + * + * PUBLIC: int __db_fd_pp __P((DB *, int *)); + */ +int +__db_fd_pp(dbp, fdp) + DB *dbp; + int *fdp; +{ + DB_FH *fhp; + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + env = dbp->env; + + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->fd"); + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) + goto err; + + /* + * !!! + * There's no argument checking to be done. + * + * !!! + * The actual method call is simple, do it inline. + * + * XXX + * Truly spectacular layering violation. + */ + if ((ret = __mp_xxx_fh(dbp->mpf, &fhp)) == 0) { + if (fhp == NULL) { + *fdp = -1; + __db_errx(env, DB_STR("0582", + "Database does not have a valid file handle")); + ret = ENOENT; + } else + *fdp = fhp->fd; + } + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + +err: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_get_pp -- + * DB->get pre/post processing. + * + * PUBLIC: int __db_get_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + */ +int +__db_get_pp(dbp, txn, key, data, flags) + DB *dbp; + DB_TXN *txn; + DBT *key, *data; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + u_int32_t mode; + int handle_check, ignore_lease, ret, t_ret, txn_local; + + env = dbp->env; + mode = 0; + txn_local = 0; + + STRIP_AUTO_COMMIT(flags); + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get"); + + ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; + LF_CLR(DB_IGNORE_LEASE); + + if ((ret = __db_get_arg(dbp, key, data, flags)) != 0) { + __dbt_userfree(env, key, NULL, data); + return (ret); + } + + ENV_ENTER(env, ip); + XA_CHECK_TXN(ip, txn); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { + handle_check = 0; + goto err; + } + + if (LF_ISSET(DB_READ_UNCOMMITTED)) + mode = DB_READ_UNCOMMITTED; + else if ((flags & DB_OPFLAGS_MASK) == DB_CONSUME || + (flags & DB_OPFLAGS_MASK) == DB_CONSUME_WAIT) { + mode = DB_WRITELOCK; + if (IS_DB_AUTO_COMMIT(dbp, txn)) { + if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) + goto err; + txn_local = 1; + } + } + + /* Check for consistent transaction usage. */ + if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, + mode == DB_WRITELOCK || LF_ISSET(DB_RMW) ? 0 : 1)) != 0) + goto err; + + ret = __db_get(dbp, ip, txn, key, data, flags); + /* + * Check for master leases. + */ + if (ret == 0 && + IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) + ret = __rep_lease_check(env, 1); + +err: if (txn_local && + (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) + ret = t_ret; + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + __dbt_userfree(env, key, NULL, data); + return (ret); +} + +/* + * __db_get -- + * DB->get. + * + * PUBLIC: int __db_get __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, u_int32_t)); + */ +int +__db_get(dbp, ip, txn, key, data, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DBT *key, *data; + u_int32_t flags; +{ + DBC *dbc; + u_int32_t mode; + int ret, t_ret; + + /* + * The DB_CURSOR_TRANSIENT flag indicates that we're just doing a single + * operation with this cursor, and that in case of error we don't need + * to restore it to its old position. Thus, we can perform the get + * without duplicating the cursor, saving some cycles in this common + * case. + */ + mode = DB_CURSOR_TRANSIENT; + if (LF_ISSET(DB_READ_UNCOMMITTED)) { + mode |= DB_READ_UNCOMMITTED; + LF_CLR(DB_READ_UNCOMMITTED); + } else if (LF_ISSET(DB_READ_COMMITTED)) { + mode |= DB_READ_COMMITTED; + LF_CLR(DB_READ_COMMITTED); + } else if ((flags & DB_OPFLAGS_MASK) == DB_CONSUME || + (flags & DB_OPFLAGS_MASK) == DB_CONSUME_WAIT) + mode |= DB_WRITELOCK; + + if ((ret = __db_cursor(dbp, ip, txn, &dbc, mode)) != 0) + return (ret); + + DEBUG_LREAD(dbc, txn, "DB->get", key, NULL, flags); + + /* + * The semantics of bulk gets are different for DB->get vs DBC->get. + * Mark the cursor so the low-level bulk get routines know which + * behavior we want. + */ + F_SET(dbc, DBC_FROM_DB_GET); + + /* + * SET_RET_MEM indicates that if key and/or data have no DBT + * flags set and DB manages the returned-data memory, that memory + * will belong to this handle, not to the underlying cursor. + */ + SET_RET_MEM(dbc, dbp); + + if (LF_ISSET(~(DB_RMW | DB_MULTIPLE)) == 0) + LF_SET(DB_SET); + +#ifdef HAVE_PARTITION + if (F_ISSET(dbc, DBC_PARTITIONED)) + ret = __partc_get(dbc, key, data, flags); + else +#endif + ret = __dbc_get(dbc, key, data, flags); + + if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_get_arg -- + * DB->get argument checking, used by both DB->get and DB->pget. + */ +static int +__db_get_arg(dbp, key, data, flags) + const DB *dbp; + DBT *key, *data; + u_int32_t flags; +{ + ENV *env; + int dirty, multi, ret; + + env = dbp->env; + + /* + * Check for read-modify-write validity. DB_RMW doesn't make sense + * with CDB cursors since if you're going to write the cursor, you + * had to create it with DB_WRITECURSOR. Regardless, we check for + * LOCKING_ON and not STD_LOCKING, as we don't want to disallow it. + * If this changes, confirm that DB does not itself set the DB_RMW + * flag in a path where CDB may have been configured. + */ + dirty = 0; + if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) { + if (!LOCKING_ON(env)) + return (__db_fnl(env, "DB->get")); + if ((ret = __db_fcchk(env, "DB->get", + flags, DB_READ_UNCOMMITTED, DB_READ_COMMITTED)) != 0) + return (ret); + if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED)) + dirty = 1; + LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); + } + + multi = 0; + if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { + if (LF_ISSET(DB_MULTIPLE_KEY)) + goto multi_err; + multi = LF_ISSET(DB_MULTIPLE) ? 1 : 0; + LF_CLR(DB_MULTIPLE); + } + + /* Check for invalid function flags. */ + switch (flags) { + case DB_GET_BOTH: + if ((ret = __dbt_usercopy(env, data)) != 0) + return (ret); + /* FALLTHROUGH */ + case 0: + if ((ret = __dbt_usercopy(env, key)) != 0) { + __dbt_userfree(env, key, NULL, data); + return (ret); + } + break; + case DB_SET_RECNO: + if (!F_ISSET(dbp, DB_AM_RECNUM)) + goto err; + if ((ret = __dbt_usercopy(env, key)) != 0) + return (ret); + break; + case DB_CONSUME: + case DB_CONSUME_WAIT: + if (dirty) { + __db_errx(env, DB_STR_A("0583", + "%s is not supported with DB_CONSUME or DB_CONSUME_WAIT", + "%s"), LF_ISSET(DB_READ_UNCOMMITTED) ? + "DB_READ_UNCOMMITTED" : "DB_READ_COMMITTED"); + return (EINVAL); + } + if (multi) +multi_err: return (__db_ferr(env, "DB->get", 1)); + if (dbp->type == DB_QUEUE) + break; + /* FALLTHROUGH */ + default: +err: return (__db_ferr(env, "DB->get", 0)); + } + + /* + * Check for invalid key/data flags. + */ + if ((ret = + __dbt_ferr(dbp, "key", key, DB_RETURNS_A_KEY(dbp, flags))) != 0) + return (ret); + + if (F_ISSET(data, DB_DBT_READONLY)) { + __db_errx(env, DB_STR("0584", + "DB_DBT_READONLY should not be set on data DBT.")); + return (EINVAL); + } + if ((ret = __dbt_ferr(dbp, "data", data, 1)) != 0) + return (ret); + + if (multi) { + if (!F_ISSET(data, DB_DBT_USERMEM)) { + __db_errx(env, DB_STR("0585", + "DB_MULTIPLE requires DB_DBT_USERMEM be set")); + return (EINVAL); + } + if (F_ISSET(key, DB_DBT_PARTIAL) || + F_ISSET(data, DB_DBT_PARTIAL)) { + __db_errx(env, DB_STR("0586", + "DB_MULTIPLE does not support DB_DBT_PARTIAL")); + return (EINVAL); + } + if (data->ulen < 1024 || + data->ulen < dbp->pgsize || data->ulen % 1024 != 0) { + __db_errx(env, DB_STR("0587", + "DB_MULTIPLE buffers must be aligned, " + "at least page size and multiples of 1KB")); + return (EINVAL); + } + } + + /* Check invalid partial key. */ + if (F_ISSET(key, DB_DBT_PARTIAL) && !(LF_ISSET(DB_CONSUME) && + LF_ISSET(DB_CONSUME_WAIT) && LF_ISSET(DB_SET_RECNO))) { + __db_errx(env, DB_STR("0708", + "Invalid positioning flag combined with DB_DBT_PARTIAL")); + return (EINVAL); + } + + return (0); +} + +/* + * __db_join_pp -- + * DB->join pre/post processing. + * + * PUBLIC: int __db_join_pp __P((DB *, DBC **, DBC **, u_int32_t)); + */ +int +__db_join_pp(primary, curslist, dbcp, flags) + DB *primary; + DBC **curslist, **dbcp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + env = primary->env; + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __db_rep_enter( + primary, 1, 0, IS_REAL_TXN(curslist[0]->txn))) != 0) { + handle_check = 0; + goto err; + } + + if ((ret = __db_join_arg(primary, curslist, flags)) == 0) + ret = __db_join(primary, curslist, dbcp, flags); + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + +err: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_join_arg -- + * Check DB->join arguments. + */ +static int +__db_join_arg(primary, curslist, flags) + DB *primary; + DBC **curslist; + u_int32_t flags; +{ + DB_TXN *txn; + ENV *env; + int i; + + env = primary->env; + + switch (flags) { + case 0: + case DB_JOIN_NOSORT: + break; + default: + return (__db_ferr(env, "DB->join", 0)); + } + + if (curslist == NULL || curslist[0] == NULL) { + __db_errx(env, DB_STR("0588", + "At least one secondary cursor must be specified to DB->join")); + return (EINVAL); + } + + txn = curslist[0]->txn; + for (i = 1; curslist[i] != NULL; i++) + if (curslist[i]->txn != txn) { + __db_errx(env, DB_STR("0589", + "All secondary cursors must share the same transaction")); + return (EINVAL); + } + + return (0); +} + +/* + * __db_key_range_pp -- + * DB->key_range pre/post processing. + * + * PUBLIC: int __db_key_range_pp + * PUBLIC: __P((DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t)); + */ +int +__db_key_range_pp(dbp, txn, key, kr, flags) + DB *dbp; + DB_TXN *txn; + DBT *key; + DB_KEY_RANGE *kr; + u_int32_t flags; +{ + DBC *dbc; + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + env = dbp->env; + + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->key_range"); + + /* + * !!! + * The actual argument checking is simple, do it inline, outside of + * the replication block. + */ + if (flags != 0) + return (__db_ferr(env, "DB->key_range", 0)); + + ENV_ENTER(env, ip); + XA_CHECK_TXN(ip, txn); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { + handle_check = 0; + goto err; + } + + /* Check for consistent transaction usage. */ + if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0) + goto err; + + /* + * !!! + * The actual method call is simple, do it inline. + */ + switch (dbp->type) { + case DB_BTREE: + if ((ret = __dbt_usercopy(env, key)) != 0) + goto err; + + /* Acquire a cursor. */ + if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) { + __dbt_userfree(env, key, NULL, NULL); + break; + } + + DEBUG_LWRITE(dbc, NULL, "bam_key_range", NULL, NULL, 0); +#ifdef HAVE_PARTITION + if (DB_IS_PARTITIONED(dbp)) + ret = __part_key_range(dbc, key, kr, flags); + else +#endif + ret = __bam_key_range(dbc, key, kr, flags); + + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + __dbt_userfree(env, key, NULL, NULL); + break; + case DB_HASH: + case DB_QUEUE: + case DB_RECNO: + ret = __dbh_am_chk(dbp, DB_OK_BTREE); + break; + case DB_UNKNOWN: + default: + ret = __db_unknown_type(env, "DB->key_range", dbp->type); + break; + } + +err: /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_open_pp -- + * DB->open pre/post processing. + * + * PUBLIC: int __db_open_pp __P((DB *, DB_TXN *, + * PUBLIC: const char *, const char *, DBTYPE, u_int32_t, int)); + */ +int +__db_open_pp(dbp, txn, fname, dname, type, flags, mode) + DB *dbp; + DB_TXN *txn; + const char *fname, *dname; + DBTYPE type; + u_int32_t flags; + int mode; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, nosync, remove_me, ret, t_ret, txn_local; + + env = dbp->env; + nosync = 1; + handle_check = remove_me = txn_local = 0; + + ENV_ENTER(env, ip); + + /* + * Save the flags. We do this here because we don't pass all of the + * flags down into the actual DB->open method call, we strip + * DB_AUTO_COMMIT at this layer. + */ + dbp->open_flags = flags; + + /* Save the current DB handle flags for refresh. */ + dbp->orig_flags = dbp->flags; + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { + handle_check = 0; + goto err; + } + + /* + * A replication client can't create a database, but it's convenient to + * allow a repmgr application to specify DB_CREATE anyway. Thus for + * such an application the meaning of DB_CREATE becomes "create it if + * I'm a master, and otherwise ignore the flag". A repmgr application + * running as master can't be sure that it won't spontaneously become a + * client, so there's a race condition. + */ + if (IS_REP_CLIENT(env) && !F_ISSET(dbp, DB_AM_NOT_DURABLE)) + LF_CLR(DB_CREATE); + + /* + * Create local transaction as necessary, check for consistent + * transaction usage. + */ + if (IS_ENV_AUTO_COMMIT(env, txn, flags)) { + if ((ret = __db_txn_auto_init(env, ip, &txn)) != 0) + goto err; + txn_local = 1; + } else if (txn != NULL && !TXN_ON(env) && + (!CDB_LOCKING(env) || !F_ISSET(txn, TXN_FAMILY))) { + ret = __db_not_txn_env(env); + goto err; + } + LF_CLR(DB_AUTO_COMMIT); + + /* + * We check arguments after possibly creating a local transaction, + * which is unusual -- the reason is some flags are illegal if any + * kind of transaction is in effect. + */ + if ((ret = __db_open_arg(dbp, txn, fname, dname, type, flags)) == 0) + if ((ret = __db_open(dbp, ip, txn, fname, dname, type, + flags, mode, PGNO_BASE_MD)) != 0) + goto txnerr; + + /* + * You can open the database that describes the subdatabases in the + * rest of the file read-only. The content of each key's data is + * unspecified and applications should never be adding new records + * or updating existing records. However, during recovery, we need + * to open these databases R/W so we can redo/undo changes in them. + * Likewise, we need to open master databases read/write during + * rename and remove so we can be sure they're fully sync'ed, so + * we provide an override flag for the purpose. + */ + if (dname == NULL && !IS_RECOVERING(env) && !LF_ISSET(DB_RDONLY) && + !LF_ISSET(DB_RDWRMASTER) && F_ISSET(dbp, DB_AM_SUBDB)) { + __db_errx(env, DB_STR("0590", + "files containing multiple databases may only be opened read-only")); + ret = EINVAL; + goto txnerr; + } + + /* + * Success: file creations have to be synchronous, otherwise we don't + * care. + */ + if (F_ISSET(dbp, DB_AM_CREATED | DB_AM_CREATED_MSTR)) + nosync = 0; + + /* Success: don't discard the file on close. */ + F_CLR(dbp, DB_AM_DISCARD | DB_AM_CREATED | DB_AM_CREATED_MSTR); + + /* + * If not transactional, remove the databases/subdatabases if it is + * persistent. If we're transactional, the child transaction abort + * cleans up. + */ +txnerr: if (ret != 0 && !IS_REAL_TXN(txn)) { + remove_me = (F_ISSET(dbp, DB_AM_CREATED) && + (fname != NULL || dname != NULL)) ? 1 : 0; + if (F_ISSET(dbp, DB_AM_CREATED_MSTR) || + (dname == NULL && remove_me)) + /* Remove file. */ + (void)__db_remove_int(dbp, + ip, txn, fname, NULL, DB_FORCE); + else if (remove_me) + /* Remove subdatabase. */ + (void)__db_remove_int(dbp, + ip, txn, fname, dname, DB_FORCE); + } + + if (txn_local && (t_ret = + __db_txn_auto_resolve(env, txn, nosync, ret)) && ret == 0) + ret = t_ret; + +err: /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_open_arg -- + * Check DB->open arguments. + */ +static int +__db_open_arg(dbp, txn, fname, dname, type, flags) + DB *dbp; + DB_TXN *txn; + const char *fname, *dname; + DBTYPE type; + u_int32_t flags; +{ + ENV *env; + u_int32_t ok_flags; + int ret; + + env = dbp->env; + + /* Validate arguments. */ +#undef OKFLAGS +#define OKFLAGS \ + (DB_AUTO_COMMIT | DB_CREATE | DB_EXCL | DB_FCNTL_LOCKING | \ + DB_MULTIVERSION | DB_NOMMAP | DB_NO_AUTO_COMMIT | DB_RDONLY | \ + DB_RDWRMASTER | DB_READ_UNCOMMITTED | DB_THREAD | DB_TRUNCATE) + if ((ret = __db_fchk(env, "DB->open", flags, OKFLAGS)) != 0) + return (ret); + if (LF_ISSET(DB_EXCL) && !LF_ISSET(DB_CREATE)) + return (__db_ferr(env, "DB->open", 1)); + if (LF_ISSET(DB_RDONLY) && LF_ISSET(DB_CREATE)) + return (__db_ferr(env, "DB->open", 1)); + +#ifdef HAVE_VXWORKS + if (LF_ISSET(DB_TRUNCATE)) { + __db_errx(env, DB_STR("0591", + "DB_TRUNCATE not supported on VxWorks")); + return (DB_OPNOTSUP); + } +#endif + switch (type) { + case DB_UNKNOWN: + if (LF_ISSET(DB_CREATE|DB_TRUNCATE)) { + __db_errx(env, DB_STR("0592", + "DB_UNKNOWN type specified with DB_CREATE or DB_TRUNCATE")); + return (EINVAL); + } + ok_flags = 0; + break; + case DB_BTREE: + ok_flags = DB_OK_BTREE; + break; + case DB_HASH: +#ifndef HAVE_HASH + return (__db_no_hash_am(env)); +#endif + ok_flags = DB_OK_HASH; + break; + case DB_HEAP: + ok_flags = DB_OK_HEAP; + break; + case DB_QUEUE: +#ifndef HAVE_QUEUE + return (__db_no_queue_am(env)); +#endif + ok_flags = DB_OK_QUEUE; + break; + case DB_RECNO: + ok_flags = DB_OK_RECNO; + break; + default: + __db_errx(env, DB_STR_A("0593", + "unknown type: %lu", "%lu"), (u_long)type); + return (EINVAL); + } + if (ok_flags) + DB_ILLEGAL_METHOD(dbp, ok_flags); + + /* The environment may have been created, but never opened. */ + if (!F_ISSET(env, ENV_DBLOCAL | ENV_OPEN_CALLED)) { + __db_errx(env, DB_STR("0594", + "database environment not yet opened")); + return (EINVAL); + } + + /* + * Historically, you could pass in an environment that didn't have a + * mpool, and DB would create a private one behind the scenes. This + * no longer works. + */ + if (!F_ISSET(env, ENV_DBLOCAL) && !MPOOL_ON(env)) { + __db_errx(env, DB_STR("0595", + "environment did not include a memory pool")); + return (EINVAL); + } + + /* + * You can't specify threads during DB->open if subsystems in the + * environment weren't configured with them. + */ + if (LF_ISSET(DB_THREAD) && !F_ISSET(env, ENV_DBLOCAL | ENV_THREAD)) { + __db_errx(env, DB_STR("0596", + "environment not created using DB_THREAD")); + return (EINVAL); + } + + /* DB_MULTIVERSION requires a database configured for transactions. */ + if (LF_ISSET(DB_MULTIVERSION) && !IS_REAL_TXN(txn)) { + __db_errx(env, DB_STR("0597", + "DB_MULTIVERSION illegal without a transaction specified")); + return (EINVAL); + } + + if (LF_ISSET(DB_MULTIVERSION) && type == DB_QUEUE) { + __db_errx(env, DB_STR("0598", + "DB_MULTIVERSION illegal with queue databases")); + return (EINVAL); + } + + /* DB_TRUNCATE is neither transaction recoverable nor lockable. */ + if (LF_ISSET(DB_TRUNCATE) && (LOCKING_ON(env) || txn != NULL)) { + __db_errx(env, DB_STR_A("0599", + "DB_TRUNCATE illegal with %s specified", "%s"), + LOCKING_ON(env) ? "locking" : "transactions"); + return (EINVAL); + } + + /* Subdatabase checks. */ + if (dname != NULL) { + /* QAM can only be done on in-memory subdatabases. */ + if (type == DB_QUEUE && fname != NULL) { + __db_errx(env, DB_STR("0600", + "Queue databases must be one-per-file")); + return (EINVAL); + } + + /* + * Named in-memory databases can't support certain flags, + * so check here. + */ + if (fname == NULL) + F_CLR(dbp, DB_AM_CHKSUM | DB_AM_ENCRYPT); + } + + return (0); +} + +/* + * __db_pget_pp -- + * DB->pget pre/post processing. + * + * PUBLIC: int __db_pget_pp + * PUBLIC: __P((DB *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t)); + */ +int +__db_pget_pp(dbp, txn, skey, pkey, data, flags) + DB *dbp; + DB_TXN *txn; + DBT *skey, *pkey, *data; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ignore_lease, ret, t_ret; + + env = dbp->env; + + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->pget"); + + ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; + LF_CLR(DB_IGNORE_LEASE); + + if ((ret = __db_pget_arg(dbp, pkey, flags)) != 0 || + (ret = __db_get_arg(dbp, skey, data, flags)) != 0) { + __dbt_userfree(env, skey, pkey, data); + return (ret); + } + + ENV_ENTER(env, ip); + XA_CHECK_TXN(ip, txn); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { + handle_check = 0; + goto err; + } + + ret = __db_pget(dbp, ip, txn, skey, pkey, data, flags); + /* + * Check for master leases. + */ + if (ret == 0 && + IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) + ret = __rep_lease_check(env, 1); + +err: /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + __dbt_userfree(env, skey, pkey, data); + return (ret); +} + +/* + * __db_pget -- + * DB->pget. + * + * PUBLIC: int __db_pget __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t)); + */ +int +__db_pget(dbp, ip, txn, skey, pkey, data, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DBT *skey, *pkey, *data; + u_int32_t flags; +{ + DBC *dbc; + u_int32_t mode; + int ret, t_ret; + + mode = DB_CURSOR_TRANSIENT; + if (LF_ISSET(DB_READ_UNCOMMITTED)) { + mode |= DB_READ_UNCOMMITTED; + LF_CLR(DB_READ_UNCOMMITTED); + } else if (LF_ISSET(DB_READ_COMMITTED)) { + mode |= DB_READ_COMMITTED; + LF_CLR(DB_READ_COMMITTED); + } + + if ((ret = __db_cursor(dbp, ip, txn, &dbc, mode)) != 0) + return (ret); + + SET_RET_MEM(dbc, dbp); + + DEBUG_LREAD(dbc, txn, "__db_pget", skey, NULL, flags); + + /* + * !!! + * The actual method call is simple, do it inline. + * + * The underlying cursor pget will fill in a default DBT for null + * pkeys, and use the cursor's returned-key memory internally to + * store any intermediate primary keys. However, we've just set + * the returned-key memory to the DB handle's key memory, which + * is unsafe to use if the DB handle is threaded. If the pkey + * argument is NULL, use the DBC-owned returned-key memory + * instead; it'll go away when we close the cursor before we + * return, but in this case that's just fine, as we're not + * returning the primary key. + */ + if (pkey == NULL) + dbc->rkey = &dbc->my_rkey; + + /* + * The cursor is just a perfectly ordinary secondary database cursor. + * Call its c_pget() method to do the dirty work. + */ + if (flags == 0 || flags == DB_RMW) + flags |= DB_SET; + + ret = __dbc_pget(dbc, skey, pkey, data, flags); + + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_pget_arg -- + * Check DB->pget arguments. + */ +static int +__db_pget_arg(dbp, pkey, flags) + DB *dbp; + DBT *pkey; + u_int32_t flags; +{ + ENV *env; + int ret; + + env = dbp->env; + + if (!F_ISSET(dbp, DB_AM_SECONDARY)) { + __db_errx(env, DB_STR("0601", + "DB->pget may only be used on secondary indices")); + return (EINVAL); + } + + if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { + __db_errx(env,DB_STR("0602", +"DB_MULTIPLE and DB_MULTIPLE_KEY may not be used on secondary indices")); + return (EINVAL); + } + + /* DB_CONSUME makes no sense on a secondary index. */ + LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); + switch (flags) { + case DB_CONSUME: + case DB_CONSUME_WAIT: + return (__db_ferr(env, "DB->pget", 0)); + default: + /* __db_get_arg will catch the rest. */ + break; + } + + /* + * We allow the pkey field to be NULL, so that we can make the + * two-DBT get calls into wrappers for the three-DBT ones. + */ + if (pkey != NULL && + (ret = __dbt_ferr(dbp, "primary key", pkey, 1)) != 0) + return (ret); + + /* Check invalid partial pkey. */ + if (pkey != NULL && F_ISSET(pkey, DB_DBT_PARTIAL)) { + __db_errx(env, DB_STR("0709", + "The primary key returned by pget can't be partial")); + return (EINVAL); + } + + if (flags == DB_GET_BOTH) { + /* The pkey field can't be NULL if we're doing a DB_GET_BOTH. */ + if (pkey == NULL) { + __db_errx(env, DB_STR("0603", + "DB_GET_BOTH on a secondary index requires a primary key")); + return (EINVAL); + } + if ((ret = __dbt_usercopy(env, pkey)) != 0) + return (ret); + } + + return (0); +} + +/* + * __db_put_pp -- + * DB->put pre/post processing. + * + * PUBLIC: int __db_put_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + */ +int +__db_put_pp(dbp, txn, key, data, flags) + DB *dbp; + DB_TXN *txn; + DBT *key, *data; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, txn_local, t_ret; + + env = dbp->env; + txn_local = 0; + + STRIP_AUTO_COMMIT(flags); + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->put"); + + if ((ret = __db_put_arg(dbp, key, data, flags)) != 0) + return (ret); + + ENV_ENTER(env, ip); + XA_CHECK_TXN(ip, txn); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { + handle_check = 0; + goto err; + } + + /* Create local transaction as necessary. */ + if (IS_DB_AUTO_COMMIT(dbp, txn)) { + if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) + goto err; + txn_local = 1; + } + + /* Check for consistent transaction usage. */ + if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0) + goto err; + + ret = __db_put(dbp, ip, txn, key, data, flags); + +err: if (txn_local && + (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) + ret = t_ret; + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + __dbt_userfree(env, key, NULL, data); + return (ret); +} + +/* + * __db_put_arg -- + * Check DB->put arguments. + */ +static int +__db_put_arg(dbp, key, data, flags) + DB *dbp; + DBT *key, *data; + u_int32_t flags; +{ + ENV *env; + int ret, returnkey; + + env = dbp->env; + returnkey = 0; + + /* Check for changes to a read-only tree. */ + if (DB_IS_READONLY(dbp)) + return (__db_rdonly(env, "DB->put")); + + /* Check for puts on a secondary. */ + if (F_ISSET(dbp, DB_AM_SECONDARY)) { + __db_errx(env, DB_STR("0604", + "DB->put forbidden on secondary indices")); + return (EINVAL); + } + + if (LF_ISSET(DB_MULTIPLE_KEY | DB_MULTIPLE)) { + if (LF_ISSET(DB_MULTIPLE) && LF_ISSET(DB_MULTIPLE_KEY)) + goto err; + + switch (LF_ISSET(DB_OPFLAGS_MASK)) { + case 0: + case DB_OVERWRITE_DUP: + break; + default: + __db_errx(env, DB_STR("0605", +"DB->put: DB_MULTIPLE(_KEY) can only be combined with DB_OVERWRITE_DUP")); + return (EINVAL); + } + + if (!F_ISSET(key, DB_DBT_BULK)) { + __db_errx(env, DB_STR("0606", + "DB->put with DB_MULTIPLE(_KEY) requires a bulk key buffer")); + return (EINVAL); + } + } + if (LF_ISSET(DB_MULTIPLE)) { + if (!F_ISSET(data, DB_DBT_BULK)) { + __db_errx(env, DB_STR("0607", + "DB->put with DB_MULTIPLE requires a bulk data buffer")); + return (EINVAL); + } + } + + /* Check for invalid function flags. */ + switch (LF_ISSET(DB_OPFLAGS_MASK)) { + case 0: + case DB_NOOVERWRITE: + case DB_OVERWRITE_DUP: + break; + case DB_APPEND: + if (dbp->type != DB_RECNO && + dbp->type != DB_QUEUE && dbp->type != DB_HEAP) + goto err; + returnkey = 1; + break; + case DB_NODUPDATA: + if (F_ISSET(dbp, DB_AM_DUPSORT)) + break; + /* FALLTHROUGH */ + default: +err: return (__db_ferr(env, "DB->put", 0)); + } + + /* + * Check for invalid key/data flags. The key may reasonably be NULL + * if DB_APPEND is set and the application doesn't care about the + * returned key. + */ + if (((returnkey && key != NULL) || !returnkey) && + (ret = __dbt_ferr(dbp, "key", key, returnkey)) != 0) + return (ret); + if (!LF_ISSET(DB_MULTIPLE_KEY) && + (ret = __dbt_ferr(dbp, "data", data, 0)) != 0) + return (ret); + + /* + * The key parameter should not be NULL or have the "partial" flag set + * in a put call unless the user doesn't care about a key value we'd + * return. The user tells us they don't care about the returned key by + * setting the key parameter to NULL or configuring the key DBT to not + * return any information. (Returned keys from a put are always record + * numbers, and returning part of a record number doesn't make sense: + * only accept a partial return if the length returned is 0.) + */ + if ((returnkey && + key != NULL && F_ISSET(key, DB_DBT_PARTIAL) && key->dlen != 0) || + (!returnkey && F_ISSET(key, DB_DBT_PARTIAL))) + return (__db_ferr(env, "key DBT", 0)); + + /* Check for partial puts in the presence of duplicates. */ + if (data != NULL && F_ISSET(data, DB_DBT_PARTIAL) && + (F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))) { + __db_errx(env, DB_STR("0608", +"a partial put in the presence of duplicates requires a cursor operation")); + return (EINVAL); + } + + if ((flags != DB_APPEND && (ret = __dbt_usercopy(env, key)) != 0) || + (!LF_ISSET(DB_MULTIPLE_KEY) && + (ret = __dbt_usercopy(env, data)) != 0)) + return (ret); + + return (0); +} + +/* + * __db_compact_func + * Callback routine to report if the txn has open cursors. + */ +static int +__db_compact_func(dbc, my_dbc, countp, pgno, indx, args) + DBC *dbc, *my_dbc; + u_int32_t *countp; + db_pgno_t pgno; + u_int32_t indx; + void *args; +{ + DB_TXN *txn; + + COMPQUIET(my_dbc, NULL); + COMPQUIET(countp, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(indx, 0); + + txn = (DB_TXN *)args; + + if (txn == dbc->txn) + return (EEXIST); + return (0); +} +/* + * __db_compact_pp -- + * DB->compact pre/post processing. + * + * PUBLIC: int __db_compact_pp __P((DB *, DB_TXN *, + * PUBLIC: DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *)); + */ +int +__db_compact_pp(dbp, txn, start, stop, c_data, flags, end) + DB *dbp; + DB_TXN *txn; + DBT *start, *stop; + DB_COMPACT *c_data; + u_int32_t flags; + DBT *end; +{ + DB_COMPACT *dp, l_data; + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + u_int32_t count; + + env = dbp->env; + + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->compact"); + + /* + * !!! + * The actual argument checking is simple, do it inline, outside of + * the replication block. + */ + if ((ret = __db_fchk( + env, "DB->compact", flags, DB_FREELIST_ONLY | DB_FREE_SPACE)) != 0) + return (ret); + + /* Check for changes to a read-only database. */ + if (DB_IS_READONLY(dbp)) + return (__db_rdonly(env, "DB->compact")); + + if (start != NULL && (ret = __dbt_usercopy(env, start)) != 0) + return (ret); + if (stop != NULL && (ret = __dbt_usercopy(env, stop)) != 0) { + __dbt_userfree(env, start, NULL, NULL); + return (ret); + } + + ENV_ENTER(env, ip); + XA_CHECK_TXN(ip, txn); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, + IS_REAL_TXN(txn))) != 0) { + handle_check = 0; + goto err; + } + + if (txn != NULL) { + if ((ret = __db_walk_cursors(dbp, + NULL, __db_compact_func, &count, 0, 0, txn)) != 0) { + if (ret == EEXIST) { + __db_errx(env, DB_STR("0609", +"DB->compact may not be called with active cursors in the transaction.")); + ret = EINVAL; + } + goto err; + } + } + + if (c_data == NULL) { + dp = &l_data; + memset(dp, 0, sizeof(*dp)); + } else + dp = c_data; +#ifdef HAVE_PARTITION + if (DB_IS_PARTITIONED(dbp)) + ret = __part_compact(dbp, ip, txn, start, stop, dp, flags, end); + else +#endif + switch (dbp->type) { + case DB_HASH: + case DB_BTREE: + case DB_RECNO: + ret = __db_compact_int(dbp, ip, + txn, start, stop, dp, flags, end); + break; + case DB_HEAP: + break; + default: + ret = __dbh_am_chk(dbp, DB_OK_BTREE); + break; + } + + /* Release replication block. */ +err: if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + __dbt_userfree(env, start, stop, NULL); + return (ret); +} + +/* + * __db_associate_foreign_pp -- + * DB->associate_foreign pre/post processing. + * + * PUBLIC: int __db_associate_foreign_pp __P((DB *, DB *, + * PUBLIC: int (*)(DB *, const DBT *, DBT *, const DBT *, int *), + * PUBLIC: u_int32_t)); + */ +int +__db_associate_foreign_pp(fdbp, dbp, callback, flags) + DB *dbp, *fdbp; + int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *)); + u_int32_t flags; +{ + /* Most of this is based on the implementation of associate */ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + env = dbp->env; + + PANIC_CHECK(env); + STRIP_AUTO_COMMIT(flags); + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) { + handle_check = 0; + goto err; + } + + if ((ret = __db_associate_foreign_arg(fdbp, dbp, callback, flags)) != 0) + goto err; + + ret = __db_associate_foreign(fdbp, dbp, callback, flags); + +err: /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_associate_foreign_arg -- + * DB->associate_foreign argument checking. + */ +static int +__db_associate_foreign_arg(fdbp, dbp, callback, flags) + DB *dbp, *fdbp; + int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *)); + u_int32_t flags; +{ + ENV *env; + + env = fdbp->env; + + if (F_ISSET(fdbp, DB_AM_SECONDARY)) { + __db_errx(env, DB_STR("0610", + "Secondary indices may not be used as foreign databases")); + return (EINVAL); + } + if (F_ISSET(fdbp, DB_AM_DUP)) { + __db_errx(env, DB_STR("0611", + "Foreign databases may not be configured with duplicates")); + return (EINVAL); + } + if (F_ISSET(fdbp, DB_AM_RENUMBER)) { + __db_errx(env, DB_STR("0612", + "Renumbering recno databases may not be used as foreign databases")); + return (EINVAL); + } + if (!F_ISSET(dbp, DB_AM_SECONDARY)) { + __db_errx(env, DB_STR("0613", + "The associating database must be a secondary index.")); + return (EINVAL); + } + if (LF_ISSET(DB_FOREIGN_NULLIFY) && callback == NULL) { + __db_errx(env, DB_STR("0614", + "When specifying a delete action of nullify, a callback " + "function needs to be configured")); + return (EINVAL); + } else if (!LF_ISSET(DB_FOREIGN_NULLIFY) && callback != NULL) { + __db_errx(env, DB_STR("0615", + "When not specifying a delete action of nullify, a " + "callback function cannot be configured")); + return (EINVAL); + } + + return (0); +} + +/* + * __db_sync_pp -- + * DB->sync pre/post processing. + * + * PUBLIC: int __db_sync_pp __P((DB *, u_int32_t)); + */ +int +__db_sync_pp(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + env = dbp->env; + + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->sync"); + + /* + * !!! + * The actual argument checking is simple, do it inline, outside of + * the replication block. + */ + if (flags != 0) + return (__db_ferr(env, "DB->sync", 0)); + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) { + handle_check = 0; + goto err; + } + + ret = __db_sync(dbp); + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + +err: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __dbc_close_pp -- + * DBC->close pre/post processing. + * + * PUBLIC: int __dbc_close_pp __P((DBC *)); + */ +int +__dbc_close_pp(dbc) + DBC *dbc; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + DB_TXN *txn; + int handle_check, ret, t_ret; + + dbp = dbc->dbp; + env = dbp->env; + txn = dbc->txn; + + /* + * If the cursor is already closed we have a serious problem, and we + * assume that the cursor isn't on the active queue. Don't do any of + * the remaining cursor close processing. + */ + if (!F_ISSET(dbc, DBC_ACTIVE)) { + __db_errx(env, DB_STR("0616", + "Closing already-closed cursor")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = !IS_REAL_TXN(dbc->txn) && IS_ENV_REPLICATED(env); + + /* Unregister the cursor from its transaction, regardless of ret. */ + if (txn != NULL) { + TAILQ_REMOVE(&(txn->my_cursors), dbc, txn_cursors); + dbc->txn_cursors.tqe_next = NULL; + dbc->txn_cursors.tqe_prev = NULL; + } else { + DB_ASSERT(env, dbc->txn_cursors.tqe_next == NULL && + dbc->txn_cursors.tqe_prev == NULL); + } + + ret = __dbc_close(dbc); + + /* Release replication block. */ + if (handle_check && + (t_ret = __op_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __dbc_cmp_pp -- + * DBC->cmp pre/post processing. + * + * PUBLIC: int __dbc_cmp_pp __P((DBC *, DBC *, int*, u_int32_t)); + */ +int +__dbc_cmp_pp(dbc, other_cursor, result, flags) + DBC *dbc, *other_cursor; + int *result; + u_int32_t flags; +{ + DB *dbp, *odbp; + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + dbp = dbc->dbp; + odbp = other_cursor->dbp; + env = dbp->env; + + if (flags != 0) + return (__db_ferr(env, "DBcursor->cmp", 0)); + + if (other_cursor == NULL) { + __db_errx(env, DB_STR("0617", + "DBcursor->cmp dbc pointer must not be null")); + return (EINVAL); + } + + if (dbp != odbp) { + __db_errx(env, DB_STR("0618", +"DBcursor->cmp both cursors must refer to the same database.")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + ret = __dbc_cmp(dbc, other_cursor, result); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __dbc_count_pp -- + * DBC->count pre/post processing. + * + * PUBLIC: int __dbc_count_pp __P((DBC *, db_recno_t *, u_int32_t)); + */ +int +__dbc_count_pp(dbc, recnop, flags) + DBC *dbc; + db_recno_t *recnop; + u_int32_t flags; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + dbp = dbc->dbp; + env = dbp->env; + + /* + * !!! + * The actual argument checking is simple, do it inline, outside of + * the replication block. + * + * The cursor must be initialized, return EINVAL for an invalid cursor. + */ + if (flags != 0) + return (__db_ferr(env, "DBcursor->count", 0)); + + if (!IS_INITIALIZED(dbc)) + return (__db_curinval(env)); + + ENV_ENTER(env, ip); + ret = __dbc_count(dbc, recnop); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __dbc_del_pp -- + * DBC->del pre/post processing. + * + * PUBLIC: int __dbc_del_pp __P((DBC *, u_int32_t)); + */ +int +__dbc_del_pp(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + dbp = dbc->dbp; + env = dbp->env; + + if ((ret = __dbc_del_arg(dbc, flags)) != 0) + return (ret); + + ENV_ENTER(env, ip); + + /* Check for consistent transaction usage. */ + if ((ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0) + goto err; + + DEBUG_LWRITE(dbc, dbc->txn, "DBcursor->del", NULL, NULL, flags); + ret = __dbc_del(dbc, flags); + +err: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __dbc_del_arg -- + * Check DBC->del arguments. + */ +static int +__dbc_del_arg(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + DB *dbp; + ENV *env; + + dbp = dbc->dbp; + env = dbp->env; + + /* Check for changes to a read-only tree. */ + if (DB_IS_READONLY(dbp)) + return (__db_rdonly(env, "DBcursor->del")); + + /* Check for invalid function flags. */ + switch (flags) { + case 0: + break; + case DB_CONSUME: + if (dbp->type != DB_QUEUE) + return (__db_ferr(env, "DBC->del", 0)); + break; + case DB_UPDATE_SECONDARY: + DB_ASSERT(env, F_ISSET(dbp, DB_AM_SECONDARY)); + break; + default: + return (__db_ferr(env, "DBcursor->del", 0)); + } + + /* + * The cursor must be initialized, return EINVAL for an invalid cursor, + * otherwise 0. + */ + if (!IS_INITIALIZED(dbc)) + return (__db_curinval(env)); + + return (0); +} + +/* + * __dbc_dup_pp -- + * DBC->dup pre/post processing. + * + * PUBLIC: int __dbc_dup_pp __P((DBC *, DBC **, u_int32_t)); + */ +int +__dbc_dup_pp(dbc, dbcp, flags) + DBC *dbc, **dbcp; + u_int32_t flags; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + int rep_blocked, ret; + + dbp = dbc->dbp; + env = dbp->env; + + /* + * !!! + * The actual argument checking is simple, do it inline, outside of + * the replication block. + */ + if (flags != 0 && flags != DB_POSITION) + return (__db_ferr(env, "DBcursor->dup", 0)); + + ENV_ENTER(env, ip); + rep_blocked = 0; + if (dbc->txn == NULL && IS_ENV_REPLICATED(env)) { + if ((ret = __op_rep_enter(env, 1, 1)) != 0) + goto err; + rep_blocked = 1; + } + ret = __dbc_dup(dbc, dbcp, flags); + + /* Register externally created cursors into the valid transaction. */ + DB_ASSERT(env, (*dbcp)->txn == dbc->txn); + if ((*dbcp)->txn != NULL && ret == 0) + TAILQ_INSERT_HEAD(&((*dbcp)->txn->my_cursors), *dbcp, + txn_cursors); +err: + if (ret != 0 && rep_blocked) + (void)__op_rep_exit(env); + + ENV_LEAVE(env, ip); + + return (ret); +} + +/* + * __dbc_get_pp -- + * DBC->get pre/post processing. + * + * PUBLIC: int __dbc_get_pp __P((DBC *, DBT *, DBT *, u_int32_t)); + */ +int +__dbc_get_pp(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + int ignore_lease, ret; + + dbp = dbc->dbp; + env = dbp->env; + + ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; + LF_CLR(DB_IGNORE_LEASE); + if ((ret = __dbc_get_arg(dbc, key, data, flags)) != 0) { + __dbt_userfree(env, key, NULL, data); + return (ret); + } + + ENV_ENTER(env, ip); + + DEBUG_LREAD(dbc, dbc->txn, "DBcursor->get", + flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags); + ret = __dbc_get(dbc, key, data, flags); + + /* + * Check for master leases. + */ + if (ret == 0 && + IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) + ret = __rep_lease_check(env, 1); + + ENV_LEAVE(env, ip); + __dbt_userfree(env, key, NULL, data); + return (ret); +} + +/* + * __dbc_get_arg -- + * Common DBC->get argument checking, used by both DBC->get and DBC->pget. + * PUBLIC: int __dbc_get_arg __P((DBC *, DBT *, DBT *, u_int32_t)); + */ +int +__dbc_get_arg(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DB *dbp; + ENV *env; + int dirty, multi, ret; + + dbp = dbc->dbp; + env = dbp->env; + + /* + * Typically in checking routines that modify the flags, we have + * to save them and restore them, because the checking routine + * calls the work routine. However, this is a pure-checking + * routine which returns to a function that calls the work routine, + * so it's OK that we do not save and restore the flags, even though + * we modify them. + * + * Check for read-modify-write validity. DB_RMW doesn't make sense + * with CDB cursors since if you're going to write the cursor, you + * had to create it with DB_WRITECURSOR. Regardless, we check for + * LOCKING_ON and not STD_LOCKING, as we don't want to disallow it. + * If this changes, confirm that DB does not itself set the DB_RMW + * flag in a path where CDB may have been configured. + */ + dirty = 0; + if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) { + if (!LOCKING_ON(env)) + return (__db_fnl(env, "DBcursor->get")); + if (LF_ISSET(DB_READ_UNCOMMITTED)) + dirty = 1; + LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); + } + + multi = 0; + if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { + multi = 1; + if (LF_ISSET(DB_MULTIPLE) && LF_ISSET(DB_MULTIPLE_KEY)) + goto multi_err; + LF_CLR(DB_MULTIPLE | DB_MULTIPLE_KEY); + } + + /* Check for invalid function flags. */ + switch (flags) { + case DB_CONSUME: + case DB_CONSUME_WAIT: + if (dirty) { + __db_errx(env, DB_STR("0619", +"DB_READ_UNCOMMITTED is not supported with DB_CONSUME or DB_CONSUME_WAIT")); + return (EINVAL); + } + if (dbp->type != DB_QUEUE) + goto err; + break; + case DB_CURRENT: + case DB_FIRST: + case DB_NEXT: + case DB_NEXT_DUP: + case DB_NEXT_NODUP: + break; + case DB_LAST: + case DB_PREV: + case DB_PREV_DUP: + case DB_PREV_NODUP: + if (multi) +multi_err: return (__db_ferr(env, "DBcursor->get", 1)); + break; + case DB_GET_BOTHC: + if (dbp->type == DB_QUEUE) + goto err; + /* FALLTHROUGH */ + case DB_GET_BOTH: + case DB_GET_BOTH_RANGE: + if ((ret = __dbt_usercopy(env, data)) != 0) + goto err; + /* FALLTHROUGH */ + case DB_SET: + case DB_SET_RANGE: + if ((ret = __dbt_usercopy(env, key)) != 0) + goto err; + break; + case DB_GET_RECNO: + /* + * The one situation in which this might be legal with a + * non-RECNUM dbp is if dbp is a secondary and its primary is + * DB_AM_RECNUM. + */ + if (!F_ISSET(dbp, DB_AM_RECNUM) && + (!F_ISSET(dbp, DB_AM_SECONDARY) || + !F_ISSET(dbp->s_primary, DB_AM_RECNUM))) + goto err; + break; + case DB_SET_RECNO: + if (!F_ISSET(dbp, DB_AM_RECNUM)) + goto err; + if ((ret = __dbt_usercopy(env, key)) != 0) + goto err; + break; + default: +err: __dbt_userfree(env, key, NULL, data); + return (__db_ferr(env, "DBcursor->get", 0)); + } + + /* Check for invalid key/data flags. */ + if ((ret = __dbt_ferr(dbp, "key", key, 0)) != 0) + return (ret); + if (F_ISSET(data, DB_DBT_READONLY)) { + __db_errx(env, DB_STR("0620", + "DB_DBT_READONLY should not be set on data DBT.")); + return (EINVAL); + } + if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0) + return (ret); + + if (multi) { + if (!F_ISSET(data, DB_DBT_USERMEM)) { + __db_errx(env, DB_STR("0621", + "DB_MULTIPLE/DB_MULTIPLE_KEY require DB_DBT_USERMEM be set")); + return (EINVAL); + } + if (F_ISSET(key, DB_DBT_PARTIAL) || + F_ISSET(data, DB_DBT_PARTIAL)) { + __db_errx(env, DB_STR("0622", + "DB_MULTIPLE/DB_MULTIPLE_KEY do not support DB_DBT_PARTIAL")); + return (EINVAL); + } + if (data->ulen < 1024 || + data->ulen < dbp->pgsize || data->ulen % 1024 != 0) { + __db_errx(env, DB_STR("0623", + "DB_MULTIPLE/DB_MULTIPLE_KEY buffers must be " + "aligned, at least page size and multiples of 1KB")); + return (EINVAL); + } + } + + /* Check compatible flags for partial key. */ + if (F_ISSET(key, DB_DBT_PARTIAL) && (flags == DB_GET_BOTH || + flags == DB_GET_BOTH_RANGE || flags == DB_SET)) { + __db_errx(env, DB_STR("0710", + "Invalid positioning flag combined with DB_DBT_PARTIAL")); + return (EINVAL); + } + + /* + * The cursor must be initialized for DB_CURRENT, DB_GET_RECNO, + * DB_PREV_DUP and DB_NEXT_DUP. Return EINVAL for an invalid + * cursor, otherwise 0. + */ + if (!IS_INITIALIZED(dbc) && (flags == DB_CURRENT || + flags == DB_GET_RECNO || + flags == DB_NEXT_DUP || flags == DB_PREV_DUP)) + return (__db_curinval(env)); + + /* Check for consistent transaction usage. */ + if (LF_ISSET(DB_RMW) && + (ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0) + return (ret); + + return (0); +} + +/* + * __db_secondary_close_pp -- + * DB->close for secondaries + * + * PUBLIC: int __db_secondary_close_pp __P((DB *, u_int32_t)); + */ +int +__db_secondary_close_pp(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + env = dbp->env; + ret = 0; + + /* + * As a DB handle destructor, we can't fail. + * + * !!! + * The actual argument checking is simple, do it inline, outside of + * the replication block. + */ + if (flags != 0 && flags != DB_NOSYNC) + ret = __db_ferr(env, "DB->close", 0); + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (t_ret = __db_rep_enter(dbp, 0, 0, 0)) != 0) { + handle_check = 0; + if (ret == 0) + ret = t_ret; + } + + if ((t_ret = __db_secondary_close(dbp, flags)) != 0 && ret == 0) + ret = t_ret; + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __dbc_pget_pp -- + * DBC->pget pre/post processing. + * + * PUBLIC: int __dbc_pget_pp __P((DBC *, DBT *, DBT *, DBT *, u_int32_t)); + */ +int +__dbc_pget_pp(dbc, skey, pkey, data, flags) + DBC *dbc; + DBT *skey, *pkey, *data; + u_int32_t flags; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + int ignore_lease, ret; + + dbp = dbc->dbp; + env = dbp->env; + + ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; + LF_CLR(DB_IGNORE_LEASE); + if ((ret = __dbc_pget_arg(dbc, pkey, flags)) != 0 || + (ret = __dbc_get_arg(dbc, skey, data, flags)) != 0) { + __dbt_userfree(env, skey, pkey, data); + return (ret); + } + + ENV_ENTER(env, ip); + DEBUG_LREAD(dbc, dbc->txn, "DBcursor->pget", + flags == DB_SET || + flags == DB_SET_RANGE ? skey : NULL, NULL, flags); + ret = __dbc_pget(dbc, skey, pkey, data, flags); + /* + * Check for master leases. + */ + if (ret == 0 && + IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) + ret = __rep_lease_check(env, 1); + + ENV_LEAVE(env, ip); + + __dbt_userfree(env, skey, pkey, data); + return (ret); +} + +/* + * __dbc_pget_arg -- + * Check DBC->pget arguments. + */ +static int +__dbc_pget_arg(dbc, pkey, flags) + DBC *dbc; + DBT *pkey; + u_int32_t flags; +{ + DB *dbp; + ENV *env; + int ret; + + dbp = dbc->dbp; + env = dbp->env; + + if (!F_ISSET(dbp, DB_AM_SECONDARY)) { + __db_errx(env, DB_STR("0624", + "DBcursor->pget may only be used on secondary indices")); + return (EINVAL); + } + + if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { + __db_errx(env, DB_STR("0625", + "DB_MULTIPLE and DB_MULTIPLE_KEY may not be used on secondary indices")); + return (EINVAL); + } + + switch (LF_ISSET(DB_OPFLAGS_MASK)) { + case DB_CONSUME: + case DB_CONSUME_WAIT: + /* These flags make no sense on a secondary index. */ + return (__db_ferr(env, "DBcursor->pget", 0)); + case DB_GET_BOTH: + case DB_GET_BOTH_RANGE: + /* BOTH is "get both the primary and the secondary". */ + if (pkey == NULL) { + __db_errx(env, DB_STR_A("0626", + "%s requires both a secondary and a primary key", + "%s"), LF_ISSET(DB_GET_BOTH) ? + "DB_GET_BOTH" : "DB_GET_BOTH_RANGE"); + return (EINVAL); + } + if ((ret = __dbt_usercopy(env, pkey)) != 0) + return (ret); + break; + default: + /* __dbc_get_arg will catch the rest. */ + break; + } + + /* + * We allow the pkey field to be NULL, so that we can make the + * two-DBT get calls into wrappers for the three-DBT ones. + */ + if (pkey != NULL && + (ret = __dbt_ferr(dbp, "primary key", pkey, 0)) != 0) + return (ret); + + /* Check invalid partial pkey. */ + if (pkey != NULL && F_ISSET(pkey, DB_DBT_PARTIAL)) { + __db_errx(env, DB_STR("0711", + "The primary key returned by pget can't be partial.")); + return (EINVAL); + } + + /* But the pkey field can't be NULL if we're doing a DB_GET_BOTH. */ + if (pkey == NULL && (flags & DB_OPFLAGS_MASK) == DB_GET_BOTH) { + __db_errx(env, DB_STR("0627", + "DB_GET_BOTH on a secondary index requires a primary key")); + return (EINVAL); + } + + return (0); +} + +/* + * __dbc_put_pp -- + * DBC->put pre/post processing. + * + * PUBLIC: int __dbc_put_pp __P((DBC *, DBT *, DBT *, u_int32_t)); + */ +int +__dbc_put_pp(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + dbp = dbc->dbp; + env = dbp->env; + + if ((ret = __dbc_put_arg(dbc, key, data, flags)) != 0) { + __dbt_userfree(env, key, NULL, data); + return (ret); + } + + ENV_ENTER(env, ip); + + /* Check for consistent transaction usage. */ + if ((ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0) + goto err; + + DEBUG_LWRITE(dbc, dbc->txn, "DBcursor->put", + flags == DB_KEYFIRST || flags == DB_KEYLAST || + flags == DB_NODUPDATA || flags == DB_UPDATE_SECONDARY ? + key : NULL, data, flags); + ret = __dbc_put(dbc, key, data, flags); + +err: ENV_LEAVE(env, ip); + __dbt_userfree(env, key, NULL, data); + return (ret); +} + +/* + * __dbc_put_arg -- + * Check DBC->put arguments. + */ +static int +__dbc_put_arg(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DB *dbp; + ENV *env; + int key_flags, ret; + + dbp = dbc->dbp; + env = dbp->env; + key_flags = 0; + + /* Check for changes to a read-only tree. */ + if (DB_IS_READONLY(dbp)) + return (__db_rdonly(env, "DBcursor->put")); + + /* Check for puts on a secondary. */ + if (F_ISSET(dbp, DB_AM_SECONDARY)) { + if (flags == DB_UPDATE_SECONDARY) + flags = 0; + else { + __db_errx(env, DB_STR("0628", + "DBcursor->put forbidden on secondary indices")); + return (EINVAL); + } + } + + if ((ret = __dbt_usercopy(env, data)) != 0) + return (ret); + + /* Check for invalid function flags. */ + switch (flags) { + case DB_AFTER: + case DB_BEFORE: + switch (dbp->type) { + case DB_BTREE: + case DB_HASH: /* Only with unsorted duplicates. */ + if (!F_ISSET(dbp, DB_AM_DUP)) + goto err; + if (dbp->dup_compare != NULL) + goto err; + break; + case DB_QUEUE: /* Not permitted. */ + goto err; + case DB_RECNO: /* Only with mutable record numbers. */ + if (!F_ISSET(dbp, DB_AM_RENUMBER)) + goto err; + key_flags = key == NULL ? 0 : 1; + break; + case DB_UNKNOWN: + default: + goto err; + } + break; + case DB_CURRENT: + /* + * If there is a comparison function, doing a DB_CURRENT + * must not change the part of the data item that is used + * for the comparison. + */ + break; + case DB_NODUPDATA: + if (!F_ISSET(dbp, DB_AM_DUPSORT)) + goto err; + /* FALLTHROUGH */ + case DB_KEYFIRST: + case DB_KEYLAST: + case DB_OVERWRITE_DUP: + key_flags = 1; + if ((ret = __dbt_usercopy(env, key)) != 0) + return (ret); + break; + default: +err: return (__db_ferr(env, "DBcursor->put", 0)); + } + + /* + * Check for invalid key/data flags. The key may reasonably be NULL + * if DB_AFTER or DB_BEFORE is set and the application doesn't care + * about the returned key, or if the DB_CURRENT flag is set. + */ + if (key_flags && (ret = __dbt_ferr(dbp, "key", key, 0)) != 0) + return (ret); + if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0) + return (ret); + + /* + * The key parameter should not be NULL or have the "partial" flag set + * in a put call unless the user doesn't care about a key value we'd + * return. The user tells us they don't care about the returned key by + * setting the key parameter to NULL or configuring the key DBT to not + * return any information. (Returned keys from a put are always record + * numbers, and returning part of a record number doesn't make sense: + * only accept a partial return if the length returned is 0.) + */ + if (key_flags && F_ISSET(key, DB_DBT_PARTIAL) && key->dlen != 0) + return (__db_ferr(env, "key DBT", 0)); + + /* + * The cursor must be initialized for anything other than DB_KEYFIRST, + * DB_KEYLAST or zero: return EINVAL for an invalid cursor, otherwise 0. + */ + if (!IS_INITIALIZED(dbc) && flags != 0 && flags != DB_KEYFIRST && + flags != DB_KEYLAST && flags != DB_NODUPDATA && + flags != DB_OVERWRITE_DUP) + return (__db_curinval(env)); + + return (0); +} + +/* + * __dbt_ferr -- + * Check a DBT for flag errors. + */ +static int +__dbt_ferr(dbp, name, dbt, check_thread) + const DB *dbp; + const char *name; + const DBT *dbt; + int check_thread; +{ + ENV *env; + int ret; + + env = dbp->env; + + /* + * Check for invalid DBT flags. We allow any of the flags to be + * specified to any DB or DBcursor call so that applications can + * set DB_DBT_MALLOC when retrieving a data item from a secondary + * database and then specify that same DBT as a key to a primary + * database, without having to clear flags. + */ + if ((ret = __db_fchk(env, name, dbt->flags, + DB_DBT_APPMALLOC | DB_DBT_BULK | DB_DBT_DUPOK | + DB_DBT_MALLOC | DB_DBT_REALLOC | DB_DBT_USERCOPY | + DB_DBT_USERMEM | DB_DBT_PARTIAL | DB_DBT_READONLY)) != 0) + return (ret); + switch (F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_REALLOC | + DB_DBT_USERCOPY | DB_DBT_USERMEM)) { + case 0: + case DB_DBT_MALLOC: + case DB_DBT_REALLOC: + case DB_DBT_USERCOPY: + case DB_DBT_USERMEM: + break; + default: + return (__db_ferr(env, name, 1)); + } + + if (F_ISSET(dbt, DB_DBT_BULK) && F_ISSET(dbt, DB_DBT_PARTIAL)) { + __db_errx(env, DB_STR_A("0629", + "Bulk and partial operations cannot be combined on %s DBT", + "%s"), name); + return (EINVAL); + } + + if (check_thread && DB_IS_THREADED(dbp) && + !F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_REALLOC | + DB_DBT_USERCOPY | DB_DBT_USERMEM | DB_DBT_READONLY)) { + __db_errx(env, DB_STR_A("0630", + "DB_THREAD mandates memory allocation flag on %s DBT", + "%s"), name); + return (EINVAL); + } + return (0); +} + +/* + * __db_curinval + * Report that a cursor is in an invalid state. + */ +static int +__db_curinval(env) + const ENV *env; +{ + __db_errx(env, DB_STR("0631", + "Cursor position must be set before performing this operation")); + return (EINVAL); +} + +/* + * __db_txn_auto_init -- + * Handle DB_AUTO_COMMIT initialization. + * + * PUBLIC: int __db_txn_auto_init __P((ENV *, DB_THREAD_INFO *, DB_TXN **)); + */ +int +__db_txn_auto_init(env, ip, txnidp) + ENV *env; + DB_THREAD_INFO *ip; + DB_TXN **txnidp; +{ + /* + * Method calls where applications explicitly specify DB_AUTO_COMMIT + * require additional validation: the DB_AUTO_COMMIT flag cannot be + * specified if a transaction cookie is also specified, nor can the + * flag be specified in a non-transactional environment. + */ + if (*txnidp != NULL && !F_ISSET(*txnidp, TXN_FAMILY)) { + __db_errx(env, DB_STR("0632", + "DB_AUTO_COMMIT may not be specified along with a transaction handle")); + return (EINVAL); + } + + if (!TXN_ON(env)) { + __db_errx(env, DB_STR("0633", + "DB_AUTO_COMMIT may not be specified in non-transactional environment")); + return (EINVAL); + } + + /* + * Our caller checked to see if replication is making a state change. + * Don't call the user-level API (which would repeat that check). + */ + return (__txn_begin(env, ip, *txnidp, txnidp, 0)); +} + +/* + * __db_txn_auto_resolve -- + * Resolve local transactions. + * + * PUBLIC: int __db_txn_auto_resolve __P((ENV *, DB_TXN *, int, int)); + */ +int +__db_txn_auto_resolve(env, txn, nosync, ret) + ENV *env; + DB_TXN *txn; + int nosync, ret; +{ + int t_ret; + + if (ret == 0) + return (__txn_commit(txn, nosync ? DB_TXN_NOSYNC : 0)); + + if ((t_ret = __txn_abort(txn)) != 0) + return (__env_panic(env, t_ret)); + + return (ret); +} diff --git a/src/db/db_join.c b/src/db/db_join.c new file mode 100644 index 00000000..9e03776d --- /dev/null +++ b/src/db/db_join.c @@ -0,0 +1,940 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_join.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" + +static int __db_join_close_pp __P((DBC *)); +static int __db_join_cmp __P((const void *, const void *)); +static int __db_join_del __P((DBC *, u_int32_t)); +static int __db_join_get __P((DBC *, DBT *, DBT *, u_int32_t)); +static int __db_join_get_pp __P((DBC *, DBT *, DBT *, u_int32_t)); +static int __db_join_getnext __P((DBC *, DBT *, DBT *, u_int32_t, u_int32_t)); +static int __db_join_primget __P((DB *, DB_THREAD_INFO *, + DB_TXN *, DB_LOCKER *, DBT *, DBT *, u_int32_t)); +static int __db_join_put __P((DBC *, DBT *, DBT *, u_int32_t)); + +/* + * Check to see if the Nth secondary cursor of join cursor jc is pointing + * to a sorted duplicate set. + */ +#define SORTED_SET(jc, n) ((jc)->j_curslist[(n)]->dbp->dup_compare != NULL) + +/* + * This is the duplicate-assisted join functionality. Right now we're + * going to write it such that we return one item at a time, although + * I think we may need to optimize it to return them all at once. + * It should be easier to get it working this way, and I believe that + * changing it should be fairly straightforward. + * + * We optimize the join by sorting cursors from smallest to largest + * cardinality. In most cases, this is indeed optimal. However, if + * a cursor with large cardinality has very few data in common with the + * first cursor, it is possible that the join will be made faster by + * putting it earlier in the cursor list. Since we have no way to detect + * cases like this, we simply provide a flag, DB_JOIN_NOSORT, which retains + * the sort order specified by the caller, who may know more about the + * structure of the data. + * + * The first cursor moves sequentially through the duplicate set while + * the others search explicitly for the duplicate in question. + * + */ + +/* + * __db_join -- + * This is the interface to the duplicate-assisted join functionality. + * In the same way that cursors mark a position in a database, a cursor + * can mark a position in a join. While most cursors are created by the + * cursor method of a DB, join cursors are created through an explicit + * call to DB->join. + * + * The curslist is an array of existing, initialized cursors and primary + * is the DB of the primary file. The data item that joins all the + * cursors in the curslist is used as the key into the primary and that + * key and data are returned. When no more items are left in the join + * set, the c_next operation off the join cursor will return DB_NOTFOUND. + * + * PUBLIC: int __db_join __P((DB *, DBC **, DBC **, u_int32_t)); + */ +int +__db_join(primary, curslist, dbcp, flags) + DB *primary; + DBC **curslist, **dbcp; + u_int32_t flags; +{ + DBC *dbc; + ENV *env; + JOIN_CURSOR *jc; + size_t ncurs, nslots; + u_int32_t i; + int ret; + + env = primary->env; + dbc = NULL; + jc = NULL; + + if ((ret = __os_calloc(env, 1, sizeof(DBC), &dbc)) != 0) + goto err; + + if ((ret = __os_calloc(env, 1, sizeof(JOIN_CURSOR), &jc)) != 0) + goto err; + + if ((ret = __os_malloc(env, 256, &jc->j_key.data)) != 0) + goto err; + jc->j_key.ulen = 256; + F_SET(&jc->j_key, DB_DBT_USERMEM); + + F_SET(&jc->j_rdata, DB_DBT_REALLOC); + + for (jc->j_curslist = curslist; + *jc->j_curslist != NULL; jc->j_curslist++) + ; + + /* + * The number of cursor slots we allocate is one greater than + * the number of cursors involved in the join, because the + * list is NULL-terminated. + */ + ncurs = (size_t)(jc->j_curslist - curslist); + nslots = ncurs + 1; + + /* + * !!! -- A note on the various lists hanging off jc. + * + * j_curslist is the initial NULL-terminated list of cursors passed + * into __db_join. The original cursors are not modified; pristine + * copies are required because, in databases with unsorted dups, we + * must reset all of the secondary cursors after the first each + * time the first one is incremented, or else we will lose data + * which happen to be sorted differently in two different cursors. + * + * j_workcurs is where we put those copies that we're planning to + * work with. They're lazily c_dup'ed from j_curslist as we need + * them, and closed when the join cursor is closed or when we need + * to reset them to their original values (in which case we just + * c_dup afresh). + * + * j_fdupcurs is an array of cursors which point to the first + * duplicate in the duplicate set that contains the data value + * we're currently interested in. We need this to make + * __db_join_get correctly return duplicate duplicates; i.e., if a + * given data value occurs twice in the set belonging to cursor #2, + * and thrice in the set belonging to cursor #3, and once in all + * the other cursors, successive calls to __db_join_get need to + * return that data item six times. To make this happen, each time + * cursor N is allowed to advance to a new datum, all cursors M + * such that M > N have to be reset to the first duplicate with + * that datum, so __db_join_get will return all the dup-dups again. + * We could just reset them to the original cursor from j_curslist, + * but that would be a bit slower in the unsorted case and a LOT + * slower in the sorted one. + * + * j_exhausted is a list of boolean values which represent + * whether or not their corresponding cursors are "exhausted", + * i.e. whether the datum under the corresponding cursor has + * been found not to exist in any unreturned combinations of + * later secondary cursors, in which case they are ready to be + * incremented. + */ + + /* We don't want to free regions whose callocs have failed. */ + jc->j_curslist = NULL; + jc->j_workcurs = NULL; + jc->j_fdupcurs = NULL; + jc->j_exhausted = NULL; + + if ((ret = __os_calloc(env, nslots, sizeof(DBC *), + &jc->j_curslist)) != 0) + goto err; + if ((ret = __os_calloc(env, nslots, sizeof(DBC *), + &jc->j_workcurs)) != 0) + goto err; + if ((ret = __os_calloc(env, nslots, sizeof(DBC *), + &jc->j_fdupcurs)) != 0) + goto err; + if ((ret = __os_calloc(env, nslots, sizeof(u_int8_t), + &jc->j_exhausted)) != 0) + goto err; + for (i = 0; curslist[i] != NULL; i++) { + jc->j_curslist[i] = curslist[i]; + jc->j_workcurs[i] = NULL; + jc->j_fdupcurs[i] = NULL; + jc->j_exhausted[i] = 0; + } + jc->j_ncurs = (u_int32_t)ncurs; + + /* + * If DB_JOIN_NOSORT is not set, optimize secondary cursors by + * sorting in order of increasing cardinality. + */ + if (!LF_ISSET(DB_JOIN_NOSORT)) + qsort(jc->j_curslist, ncurs, sizeof(DBC *), __db_join_cmp); + + /* + * We never need to reset the 0th cursor, so there's no + * solid reason to use workcurs[0] rather than curslist[0] in + * join_get. Nonetheless, it feels cleaner to do it for symmetry, + * and this is the most logical place to copy it. + * + * !!! + * There's no need to close the new cursor if we goto err only + * because this is the last thing that can fail. Modifier of this + * function beware! + */ + if ((ret = + __dbc_dup(jc->j_curslist[0], jc->j_workcurs, DB_POSITION)) != 0) + goto err; + + dbc->close = dbc->c_close = __db_join_close_pp; + dbc->del = dbc->c_del = __db_join_del; + dbc->get = dbc->c_get = __db_join_get_pp; + dbc->put = dbc->c_put = __db_join_put; + dbc->internal = (DBC_INTERNAL *)jc; + dbc->dbp = primary; + jc->j_primary = primary; + + /* Stash the first cursor's transaction here for easy access. */ + dbc->txn = curslist[0]->txn; + + *dbcp = dbc; + + MUTEX_LOCK(env, primary->mutex); + TAILQ_INSERT_TAIL(&primary->join_queue, dbc, links); + MUTEX_UNLOCK(env, primary->mutex); + + return (0); + +err: if (jc != NULL) { + if (jc->j_curslist != NULL) + __os_free(env, jc->j_curslist); + if (jc->j_workcurs != NULL) { + if (jc->j_workcurs[0] != NULL) + (void)__dbc_close(jc->j_workcurs[0]); + __os_free(env, jc->j_workcurs); + } + if (jc->j_fdupcurs != NULL) + __os_free(env, jc->j_fdupcurs); + if (jc->j_exhausted != NULL) + __os_free(env, jc->j_exhausted); + __os_free(env, jc); + } + if (dbc != NULL) + __os_free(env, dbc); + return (ret); +} + +/* + * __db_join_close_pp -- + * DBC->close pre/post processing for join cursors. + */ +static int +__db_join_close_pp(dbc) + DBC *dbc; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + dbp = dbc->dbp; + env = dbp->env; + + ENV_ENTER(env, ip); + + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(dbc->txn))) != 0) { + handle_check = 0; + goto err; + } + + ret = __db_join_close(dbc); + + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + +err: ENV_LEAVE(env, ip); + return (ret); +} + +static int +__db_join_put(dbc, key, data, flags) + DBC *dbc; + DBT *key; + DBT *data; + u_int32_t flags; +{ + COMPQUIET(dbc, NULL); + COMPQUIET(key, NULL); + COMPQUIET(data, NULL); + COMPQUIET(flags, 0); + return (EINVAL); +} + +static int +__db_join_del(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + COMPQUIET(dbc, NULL); + COMPQUIET(flags, 0); + return (EINVAL); +} + +/* + * __db_join_get_pp -- + * DBjoin->get pre/post processing. + */ +static int +__db_join_get_pp(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + u_int32_t handle_check, save_flags; + int ret, t_ret; + + dbp = dbc->dbp; + env = dbp->env; + + /* Save the original flags value. */ + save_flags = flags; + + if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) { + if (!LOCKING_ON(env)) + return (__db_fnl(env, "DBC->get")); + LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); + } + + switch (flags) { + case 0: + case DB_JOIN_ITEM: + break; + default: + return (__db_ferr(env, "DBC->get", 0)); + } + + /* + * A partial get of the key of a join cursor don't make much sense; + * the entire key is necessary to query the primary database + * and find the datum, and so regardless of the size of the key + * it would not be a performance improvement. Since it would require + * special handling, we simply disallow it. + * + * A partial get of the data, however, potentially makes sense (if + * all possible data are a predictable large structure, for instance) + * and causes us no headaches, so we permit it. + */ + if (F_ISSET(key, DB_DBT_PARTIAL)) { + __db_errx(env, DB_STR("0516", + "DB_DBT_PARTIAL may not be set on key during join_get")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(dbc->txn))) != 0) { + handle_check = 0; + goto err; + } + + /* Restore the original flags value. */ + flags = save_flags; + + ret = __db_join_get(dbc, key, data, flags); + + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + +err: ENV_LEAVE(env, ip); + __dbt_userfree(env, key, NULL, NULL); + return (ret); +} + +static int +__db_join_get(dbc, key_arg, data_arg, flags) + DBC *dbc; + DBT *key_arg, *data_arg; + u_int32_t flags; +{ + DB *dbp; + DBC *cp; + DBT *key_n, key_n_mem; + ENV *env; + JOIN_CURSOR *jc; + int db_manage_data, ret; + u_int32_t i, j, operation, opmods; + + dbp = dbc->dbp; + env = dbp->env; + jc = (JOIN_CURSOR *)dbc->internal; + + operation = LF_ISSET(DB_OPFLAGS_MASK); + + /* !!! + * If the set of flags here changes, check that __db_join_primget + * is updated to handle them properly. + */ + opmods = LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); + + /* + * Since we are fetching the key as a datum in the secondary indices, + * we must be careful of caller-specified DB_DBT_* memory + * management flags. If necessary, use a stack-allocated DBT; + * we'll appropriately copy and/or allocate the data later. + */ + if (F_ISSET(key_arg, + DB_DBT_MALLOC | DB_DBT_USERCOPY | DB_DBT_USERMEM)) { + /* We just use the default buffer; no need to go malloc. */ + key_n = &key_n_mem; + memset(key_n, 0, sizeof(DBT)); + } else { + /* + * Either DB_DBT_REALLOC or the default buffer will work + * fine if we have to reuse it, as we do. + */ + key_n = key_arg; + } + if (F_ISSET(key_arg, DB_DBT_USERCOPY)) + key_arg->data = NULL; + + /* + * If our last attempt to do a get on the primary key failed, + * short-circuit the join and try again with the same key. + */ + if (F_ISSET(jc, JOIN_RETRY)) + goto samekey; + F_CLR(jc, JOIN_RETRY); + +retry: ret = __dbc_get(jc->j_workcurs[0], &jc->j_key, key_n, + opmods | (jc->j_exhausted[0] ? DB_NEXT_DUP : DB_CURRENT)); + + if (ret == DB_BUFFER_SMALL) { + jc->j_key.ulen <<= 1; + if ((ret = __os_realloc(env, + jc->j_key.ulen, &jc->j_key.data)) != 0) + goto mem_err; + goto retry; + } + + /* + * If ret == DB_NOTFOUND, we're out of elements of the first + * secondary cursor. This is how we finally finish the join + * if all goes well. + */ + if (ret != 0) + goto err; + + /* + * If jc->j_exhausted[0] == 1, we've just advanced the first cursor, + * and we're going to want to advance all the cursors that point to + * the first member of a duplicate duplicate set (j_fdupcurs[1..N]). + * Close all the cursors in j_fdupcurs; we'll reopen them the + * first time through the upcoming loop. + */ + for (i = 1; i < jc->j_ncurs; i++) { + if (jc->j_fdupcurs[i] != NULL && + (ret = __dbc_close(jc->j_fdupcurs[i])) != 0) + goto err; + jc->j_fdupcurs[i] = NULL; + } + + /* + * If jc->j_curslist[1] == NULL, we have only one cursor in the join. + * Thus, we can safely increment that one cursor on each call + * to __db_join_get, and we signal this by setting jc->j_exhausted[0] + * right away. + * + * Otherwise, reset jc->j_exhausted[0] to 0, so that we don't + * increment it until we know we're ready to. + */ + if (jc->j_curslist[1] == NULL) + jc->j_exhausted[0] = 1; + else + jc->j_exhausted[0] = 0; + + /* We have the first element; now look for it in the other cursors. */ + for (i = 1; i < jc->j_ncurs; i++) { + DB_ASSERT(env, jc->j_curslist[i] != NULL); + if (jc->j_workcurs[i] == NULL) + /* If this is NULL, we need to dup curslist into it. */ + if ((ret = __dbc_dup(jc->j_curslist[i], + &jc->j_workcurs[i], DB_POSITION)) != 0) + goto err; + +retry2: cp = jc->j_workcurs[i]; + + if ((ret = __db_join_getnext(cp, &jc->j_key, key_n, + jc->j_exhausted[i], opmods)) == DB_NOTFOUND) { + /* + * jc->j_workcurs[i] has no more of the datum we're + * interested in. Go back one cursor and get + * a new dup. We can't just move to a new + * element of the outer relation, because that way + * we might miss duplicate duplicates in cursor i-1. + * + * If this takes us back to the first cursor, + * -then- we can move to a new element of the outer + * relation. + */ + --i; + jc->j_exhausted[i] = 1; + + if (i == 0) { + for (j = 1; jc->j_workcurs[j] != NULL; j++) { + /* + * We're moving to a new element of + * the first secondary cursor. If + * that cursor is sorted, then any + * other sorted cursors can be safely + * reset to the first duplicate + * duplicate in the current set if we + * have a pointer to it (we can't just + * leave them be, or we'll miss + * duplicate duplicates in the outer + * relation). + * + * If the first cursor is unsorted, or + * if cursor j is unsorted, we can + * make no assumptions about what + * we're looking for next or where it + * will be, so we reset to the very + * beginning (setting workcurs NULL + * will achieve this next go-round). + * + * XXX: This is likely to break + * horribly if any two cursors are + * both sorted, but have different + * specified sort functions. For, + * now, we dismiss this as pathology + * and let strange things happen--we + * can't make rope childproof. + */ + if ((ret = __dbc_close( + jc->j_workcurs[j])) != 0) + goto err; + if (!SORTED_SET(jc, 0) || + !SORTED_SET(jc, j) || + jc->j_fdupcurs[j] == NULL) + /* + * Unsafe conditions; + * reset fully. + */ + jc->j_workcurs[j] = NULL; + else + /* Partial reset suffices. */ + if ((__dbc_dup( + jc->j_fdupcurs[j], + &jc->j_workcurs[j], + DB_POSITION)) != 0) + goto err; + jc->j_exhausted[j] = 0; + } + goto retry; + /* NOTREACHED */ + } + + /* + * We're about to advance the cursor and need to + * reset all of the workcurs[j] where j>i, so that + * we don't miss any duplicate duplicates. + */ + for (j = i + 1; + jc->j_workcurs[j] != NULL; + j++) { + if ((ret = + __dbc_close(jc->j_workcurs[j])) != 0) + goto err; + jc->j_exhausted[j] = 0; + if (jc->j_fdupcurs[j] == NULL) + jc->j_workcurs[j] = NULL; + else if ((ret = __dbc_dup(jc->j_fdupcurs[j], + &jc->j_workcurs[j], DB_POSITION)) != 0) + goto err; + } + goto retry2; + /* NOTREACHED */ + } + + if (ret == DB_BUFFER_SMALL) { + jc->j_key.ulen <<= 1; + if ((ret = __os_realloc(env, jc->j_key.ulen, + &jc->j_key.data)) != 0) { +mem_err: __db_errx(env, DB_STR_A("0517", + "Allocation failed for join key, len = %lu", + "%lu"), (u_long)jc->j_key.ulen); + goto err; + } + goto retry2; + } + + if (ret != 0) + goto err; + + /* + * If we made it this far, we've found a matching + * datum in cursor i. Mark the current cursor + * unexhausted, so we don't miss any duplicate + * duplicates the next go-round--unless this is the + * very last cursor, in which case there are none to + * miss, and we'll need that exhausted flag to finally + * get a DB_NOTFOUND and move on to the next datum in + * the outermost cursor. + */ + if (i + 1 != jc->j_ncurs) + jc->j_exhausted[i] = 0; + else + jc->j_exhausted[i] = 1; + + /* + * If jc->j_fdupcurs[i] is NULL and the ith cursor's dups are + * sorted, then we're here for the first time since advancing + * cursor 0, and we have a new datum of interest. + * jc->j_workcurs[i] points to the beginning of a set of + * duplicate duplicates; store this into jc->j_fdupcurs[i]. + */ + if (SORTED_SET(jc, i) && jc->j_fdupcurs[i] == NULL && (ret = + __dbc_dup(cp, &jc->j_fdupcurs[i], DB_POSITION)) != 0) + goto err; + } + +err: if (ret != 0) + return (ret); + + if (0) { +samekey: /* + * Get the key we tried and failed to return last time; + * it should be the current datum of all the secondary cursors. + */ + if ((ret = __dbc_get(jc->j_workcurs[0], + &jc->j_key, key_n, DB_CURRENT | opmods)) != 0) + return (ret); + F_CLR(jc, JOIN_RETRY); + } + + /* + * ret == 0; we have a key to return. + * + * If DB_DBT_USERMEM or DB_DBT_MALLOC is set, we need to copy the key + * back into the dbt we were given for the key; call __db_retcopy. + * Otherwise, assert that we do not need to copy anything and proceed. + */ + DB_ASSERT(env, F_ISSET(key_arg, DB_DBT_USERMEM | DB_DBT_MALLOC | + DB_DBT_USERCOPY) || key_n == key_arg); + + if ((F_ISSET(key_arg, DB_DBT_USERMEM | DB_DBT_MALLOC | + DB_DBT_USERCOPY)) && + (ret = __db_retcopy(env, + key_arg, key_n->data, key_n->size, NULL, NULL)) != 0) { + /* + * The retcopy failed, most commonly because we have a user + * buffer for the key which is too small. Set things up to + * retry next time, and return. + */ + F_SET(jc, JOIN_RETRY); + return (ret); + } + + /* + * If DB_JOIN_ITEM is set, we return it; otherwise we do the lookup + * in the primary and then return. + */ + if (operation == DB_JOIN_ITEM) + return (0); + + /* + * If data_arg->flags == 0--that is, if DB is managing the + * data DBT's memory--it's not safe to just pass the DBT + * through to the primary get call, since we don't want that + * memory to belong to the primary DB handle (and if the primary + * is free-threaded, it can't anyway). + * + * Instead, use memory that is managed by the join cursor, in + * jc->j_rdata. + */ + if (!F_ISSET(data_arg, DB_DBT_MALLOC | DB_DBT_REALLOC | + DB_DBT_USERMEM | DB_DBT_USERCOPY)) + db_manage_data = 1; + else + db_manage_data = 0; + if ((ret = __db_join_primget(jc->j_primary, dbc->thread_info, + jc->j_curslist[0]->txn, jc->j_curslist[0]->locker, key_n, + db_manage_data ? &jc->j_rdata : data_arg, opmods)) != 0) { + if (ret == DB_NOTFOUND) { + if (LF_ISSET(DB_READ_UNCOMMITTED) || + (jc->j_curslist[0]->txn != NULL && F_ISSET( + jc->j_curslist[0]->txn, TXN_READ_UNCOMMITTED))) + goto retry; + /* + * If ret == DB_NOTFOUND, the primary and secondary + * are out of sync; every item in each secondary + * should correspond to something in the primary, + * or we shouldn't have done the join this way. + * Wail. + */ + ret = __db_secondary_corrupt(jc->j_primary); + } else + /* + * The get on the primary failed for some other + * reason, most commonly because we're using a user + * buffer that's not big enough. Flag our failure + * so we can return the same key next time. + */ + F_SET(jc, JOIN_RETRY); + } + if (db_manage_data && ret == 0) { + data_arg->data = jc->j_rdata.data; + data_arg->size = jc->j_rdata.size; + } + + return (ret); +} + +/* + * __db_join_close -- + * DBC->close for join cursors. + * + * PUBLIC: int __db_join_close __P((DBC *)); + */ +int +__db_join_close(dbc) + DBC *dbc; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + JOIN_CURSOR *jc; + int ret, t_ret; + u_int32_t i; + + jc = (JOIN_CURSOR *)dbc->internal; + dbp = dbc->dbp; + env = dbp->env; + ret = t_ret = 0; + + /* + * Remove from active list of join cursors. Note that this + * must happen before any action that can fail and return, or else + * __db_close may loop indefinitely. + */ + MUTEX_LOCK(env, dbp->mutex); + TAILQ_REMOVE(&dbp->join_queue, dbc, links); + MUTEX_UNLOCK(env, dbp->mutex); + + ENV_ENTER(env, ip); + /* + * Close any open scratch cursors. In each case, there may + * not be as many outstanding as there are cursors in + * curslist, but we want to close whatever's there. + * + * If any close fails, there's no reason not to close everything else; + * we'll just return the error code of the last one to fail. There's + * not much the caller can do anyway, since these cursors only exist + * hanging off a db-internal data structure that they shouldn't be + * mucking with. + */ + for (i = 0; i < jc->j_ncurs; i++) { + if (jc->j_workcurs[i] != NULL && + (t_ret = __dbc_close(jc->j_workcurs[i])) != 0) + ret = t_ret; + if (jc->j_fdupcurs[i] != NULL && + (t_ret = __dbc_close(jc->j_fdupcurs[i])) != 0) + ret = t_ret; + } + ENV_LEAVE(env, ip); + + __os_free(env, jc->j_exhausted); + __os_free(env, jc->j_curslist); + __os_free(env, jc->j_workcurs); + __os_free(env, jc->j_fdupcurs); + __os_free(env, jc->j_key.data); + if (jc->j_rdata.data != NULL) + __os_ufree(env, jc->j_rdata.data); + __os_free(env, jc); + __os_free(env, dbc); + + return (ret); +} + +/* + * __db_join_getnext -- + * This function replaces the DBC_CONTINUE and DBC_KEYSET + * functionality inside the various cursor get routines. + * + * If exhausted == 0, we're not done with the current datum; + * return it if it matches "matching", otherwise search + * using DB_GET_BOTHC (which is faster than iteratively doing + * DB_NEXT_DUP) forward until we find one that does. + * + * If exhausted == 1, we are done with the current datum, so just + * leap forward to searching NEXT_DUPs. + * + * If no matching datum exists, returns DB_NOTFOUND, else 0. + */ +static int +__db_join_getnext(dbc, key, data, exhausted, opmods) + DBC *dbc; + DBT *key, *data; + u_int32_t exhausted, opmods; +{ + int ret, cmp; + DB *dbp; + DBT ldata; + int (*func) __P((DB *, const DBT *, const DBT *)); + + dbp = dbc->dbp; + func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare; + + switch (exhausted) { + case 0: + /* + * We don't want to step on data->data; use a new + * DBT and malloc so we don't step on dbc's rdata memory. + */ + memset(&ldata, 0, sizeof(DBT)); + F_SET(&ldata, DB_DBT_MALLOC); + if ((ret = __dbc_get(dbc, + key, &ldata, opmods | DB_CURRENT)) != 0) + break; + cmp = func(dbp, data, &ldata); + if (cmp == 0) { + /* + * We have to return the real data value. Copy + * it into data, then free the buffer we malloc'ed + * above. + */ + if ((ret = __db_retcopy(dbp->env, data, ldata.data, + ldata.size, &data->data, &data->size)) != 0) + return (ret); + __os_ufree(dbp->env, ldata.data); + return (0); + } + + /* + * Didn't match--we want to fall through and search future + * dups. We just forget about ldata and free + * its buffer--data contains the value we're searching for. + */ + __os_ufree(dbp->env, ldata.data); + /* FALLTHROUGH */ + case 1: + ret = __dbc_get(dbc, key, data, opmods | DB_GET_BOTHC); + break; + default: + ret = EINVAL; + break; + } + + return (ret); +} + +/* + * __db_join_cmp -- + * Comparison function for sorting DBCs in cardinality order. + */ +static int +__db_join_cmp(a, b) + const void *a, *b; +{ + DBC *dbca, *dbcb; + db_recno_t counta, countb; + + dbca = *((DBC * const *)a); + dbcb = *((DBC * const *)b); + + if (__dbc_count(dbca, &counta) != 0 || + __dbc_count(dbcb, &countb) != 0) + return (0); + + return ((long)counta - (long)countb); +} + +/* + * __db_join_primget -- + * Perform a DB->get in the primary, being careful not to use a new + * locker ID if we're doing CDB locking. + */ +static int +__db_join_primget(dbp, ip, txn, locker, key, data, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DB_LOCKER *locker; + DBT *key, *data; + u_int32_t flags; +{ + DBC *dbc; + u_int32_t rmw; + int ret, t_ret; + + if ((ret = __db_cursor_int(dbp, ip, + txn, dbp->type, PGNO_INVALID, 0, locker, &dbc)) != 0) + return (ret); + + /* + * The only allowable flags here are the two flags copied into "opmods" + * in __db_join_get, DB_RMW and DB_READ_UNCOMMITTED. The former is an + * op on the c_get call, the latter on the cursor call. It's a DB bug + * if we allow any other flags down in here. + */ + rmw = LF_ISSET(DB_RMW); + if (LF_ISSET(DB_READ_UNCOMMITTED) || + (txn != NULL && F_ISSET(txn, TXN_READ_UNCOMMITTED))) + F_SET(dbc, DBC_READ_UNCOMMITTED); + + if (LF_ISSET(DB_READ_COMMITTED) || + (txn != NULL && F_ISSET(txn, TXN_READ_COMMITTED))) + F_SET(dbc, DBC_READ_COMMITTED); + + LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); + DB_ASSERT(dbp->env, flags == 0); + + F_SET(dbc, DBC_TRANSIENT); + + /* + * This shouldn't be necessary, thanks to the fact that join cursors + * swap in their own DB_DBT_REALLOC'ed buffers, but just for form's + * sake, we mirror what __db_get does. + */ + SET_RET_MEM(dbc, dbp); + + ret = __dbc_get(dbc, key, data, DB_SET | rmw); + + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_secondary_corrupt -- + * Report primary/secondary inconsistencies. + * + * PUBLIC: int __db_secondary_corrupt __P((DB *)); + */ +int +__db_secondary_corrupt(dbp) + DB *dbp; +{ + __db_err(dbp->env, DB_SECONDARY_BAD, "%s%s%s", + dbp->fname == NULL ? "unnamed" : dbp->fname, + dbp->dname == NULL ? "" : "/", + dbp->dname == NULL ? "" : dbp->dname); + return (DB_SECONDARY_BAD); +} diff --git a/src/db/db_meta.c b/src/db/db_meta.c new file mode 100644 index 00000000..914a66d0 --- /dev/null +++ b/src/db/db_meta.c @@ -0,0 +1,1433 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" +#include "dbinc/db_am.h" +#include "dbinc/hash.h" + +static void __db_init_meta __P((DB *, void *, db_pgno_t, u_int32_t)); +#ifdef HAVE_FTRUNCATE +static int __db_pglistcmp __P((const void *, const void *)); +static int __db_truncate_freelist __P((DBC *, DBMETA *, + PAGE *, db_pgno_t *, u_int32_t, u_int32_t)); +#endif + +/* + * __db_init_meta -- + * Helper function for __db_new that initializes the important fields in + * a meta-data page (used instead of P_INIT). We need to make sure that we + * retain the page number and LSN of the existing page. + */ +static void +__db_init_meta(dbp, p, pgno, pgtype) + DB *dbp; + void *p; + db_pgno_t pgno; + u_int32_t pgtype; +{ + DBMETA *meta; + DB_LSN save_lsn; + + meta = (DBMETA *)p; + save_lsn = meta->lsn; + memset(meta, 0, sizeof(DBMETA)); + meta->lsn = save_lsn; + meta->pagesize = dbp->pgsize; + if (F_ISSET(dbp, DB_AM_CHKSUM)) + FLD_SET(meta->metaflags, DBMETA_CHKSUM); + meta->pgno = pgno; + meta->type = (u_int8_t)pgtype; +} + +/* + * __db_new -- + * Get a new page, preferably from the freelist. + * + * PUBLIC: int __db_new __P((DBC *, u_int32_t, DB_LOCK *, PAGE **)); + */ +int +__db_new(dbc, type, lockp, pagepp) + DBC *dbc; + u_int32_t type; + DB_LOCK *lockp; + PAGE **pagepp; +{ + DB *dbp; + DBMETA *meta; + DB_LOCK metalock; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *h; + db_pgno_t last, *list, pgno, newnext; + int extend, hash, ret; + + meta = NULL; + dbp = dbc->dbp; + env = dbp->env; + mpf = dbp->mpf; + h = NULL; + newnext = PGNO_INVALID; + if (lockp != NULL) + LOCK_INIT(*lockp); + + hash = 0; + ret = 0; + LOCK_INIT(metalock); + +#ifdef HAVE_HASH + if (dbp->type == DB_HASH) { + if ((ret = __ham_return_meta(dbc, DB_MPOOL_DIRTY, &meta)) != 0) + goto err; + if (meta != NULL) + hash = 1; + } +#endif + if (meta == NULL) { + pgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, + LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_DIRTY, &meta)) != 0) + goto err; + } + + last = meta->last_pgno; + if (meta->free == PGNO_INVALID) { + if (FLD_ISSET(type, P_DONTEXTEND)) { + *pagepp = NULL; + goto err; + } + last = pgno = meta->last_pgno + 1; + ZERO_LSN(lsn); + extend = 1; + } else { + pgno = meta->free; + /* + * Lock the new page. Do this here because we must do it + * before getting the page and the caller may need the lock + * to keep readers from seeing the page before the transaction + * commits. We can do this because no one will hold a free + * page locked. + */ + if (lockp != NULL && (ret = + __db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, lockp)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_DIRTY, &h)) != 0) + goto err; + + /* + * We want to take the first page off the free list and + * then set meta->free to the that page's next_pgno, but + * we need to log the change first. + */ + newnext = h->next_pgno; + lsn = h->lsn; + extend = 0; + DB_ASSERT(env, TYPE(h) == P_INVALID); + + if (TYPE(h) != P_INVALID) { + __db_errx(env, DB_STR_A("0689", + "%s page %lu is on free list with type %lu", + "%s %lu %lu"), dbp->fname, (u_long)PGNO(h), + (u_long)TYPE(h)); + return (__env_panic(env, EINVAL)); + } + + } + + FLD_CLR(type, P_DONTEXTEND); + + /* + * Log the allocation before fetching the new page. If we + * don't have room in the log then we don't want to tell + * mpool to extend the file. + */ + if (DBC_LOGGING(dbc)) { + if ((ret = __db_pg_alloc_log(dbp, dbc->txn, &LSN(meta), 0, + &LSN(meta), PGNO_BASE_MD, &lsn, + pgno, (u_int32_t)type, newnext, meta->last_pgno)) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(meta)); + + meta->free = newnext; + + if (extend == 1) { + if (lockp != NULL && (ret = + __db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, lockp)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_NEW, &h)) != 0) + goto err; + DB_ASSERT(env, last == pgno); + meta->last_pgno = pgno; + ZERO_LSN(h->lsn); + h->pgno = pgno; + + /* + * If the file was extended for the first time in this + * transaction, set the MPOOLFILE's file extension + * watermark. + */ + __txn_add_fe_watermark(dbc->txn, dbp, h->pgno); + + } + LSN(h) = LSN(meta); + + if (hash == 0 && (ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0) + goto err; + meta = NULL; + + switch (type) { + case P_BTREEMETA: + case P_HASHMETA: + case P_QAMMETA: + __db_init_meta(dbp, h, h->pgno, type); + break; + default: + P_INIT(h, dbp->pgsize, + h->pgno, PGNO_INVALID, PGNO_INVALID, 0, type); + break; + } + + /* Fix up the sorted free list if necessary. */ +#ifdef HAVE_FTRUNCATE + if (extend == 0) { + u_int32_t nelems = 0; + + if ((ret = __memp_get_freelist(dbp->mpf, &nelems, &list)) != 0) + goto err; + if (nelems != 0) { + DB_ASSERT(env, h->pgno == list[0]); + memmove(list, &list[1], (nelems - 1) * sizeof(*list)); + if ((ret = __memp_extend_freelist( + dbp->mpf, nelems - 1, &list)) != 0) + goto err; + } + } +#else + COMPQUIET(list, NULL); +#endif + + if ((ret = __TLPUT(dbc, metalock)) != 0) + return (ret); + *pagepp = h; + PERFMON6(env, alloc, new, dbp->fname, dbp->dname, pgno, type, h, 0); + return (0); + +err: if (h != NULL) + (void)__memp_fput(mpf, dbc->thread_info, h, dbc->priority); + if (meta != NULL && hash == 0) + (void)__memp_fput(mpf, dbc->thread_info, meta, dbc->priority); + (void)__TLPUT(dbc, metalock); + if (lockp != NULL) + (void)__LPUT(dbc, *lockp); + /* Failure return - report 0 pgno, null page address. */ + PERFMON6(env, alloc, new, dbp->fname, dbp->dname, 0, type, NULL, ret); + return (ret); +} + +/* + * __db_free -- + * Add a page to the head of the freelist. + * + * PUBLIC: int __db_free __P((DBC *, PAGE *, u_int32_t)); + */ +int +__db_free(dbc, h, flags) + DBC *dbc; + PAGE *h; + u_int32_t flags; +{ + DB *dbp; + DBMETA *meta; + DBT ddbt, ldbt; + DB_LOCK metalock; + DB_LSN *lsnp; + DB_MPOOLFILE *mpf; + PAGE *prev; + db_pgno_t last_pgno, next_pgno, pgno, prev_pgno; + u_int32_t lflag; + int hash, ret, t_ret; +#ifdef HAVE_FTRUNCATE + db_pgno_t *list, *lp; + u_int32_t nelem, position, start; + int do_truncate; +#endif + + dbp = dbc->dbp; + mpf = dbp->mpf; + prev_pgno = PGNO_INVALID; + meta = NULL; + prev = NULL; + LOCK_INIT(metalock); +#ifdef HAVE_FTRUNCATE + lp = NULL; + nelem = 0; + do_truncate = 0; +#endif + + /* + * Retrieve the metadata page. If we are not keeping a sorted + * free list put the page at the head of the the free list. + * If we are keeping a sorted free list, for truncation, + * then figure out where this page belongs and either + * link it in or truncate the file as much as possible. + * If either the lock get or page get routines + * fail, then we need to put the page with which we were called + * back because our caller assumes we take care of it. + */ + hash = 0; + + pgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, + LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err; + +#ifdef HAVE_HASH + if (dbp->type == DB_HASH) { + if ((ret = __ham_return_meta(dbc, +#ifdef HAVE_FTRUNCATE + 0, +#else + DB_MPOOL_DIRTY, +#endif + &meta)) != 0) + goto err; + if (meta != NULL) + hash = 1; + } +#endif + if (meta == NULL) { + /* If we support truncate, we might not dirty the meta page. */ + if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, +#ifdef HAVE_FTRUNCATE + 0, +#else + DB_MPOOL_DIRTY, +#endif + &meta)) != 0) + goto err1; + } + + last_pgno = meta->last_pgno; + next_pgno = meta->free; + /* + * Assign lsnp here so it always initialized when + * HAVE_FTRUNCATE is not defined. + */ + lsnp = &LSN(meta); + + DB_ASSERT(dbp->env, h->pgno != next_pgno); + +#ifdef HAVE_FTRUNCATE + /* + * If we are maintaining a sorted free list see if we either have a + * new truncation point or the page goes somewhere in the middle of + * the list. If it goes in the middle of the list, we will drop the + * meta page and get the previous page. + */ + COMPQUIET(position, 0); + if ((ret = __memp_get_freelist(mpf, &nelem, &list)) != 0) + goto err1; + if (list == NULL) + goto no_sort; + + if (h->pgno != last_pgno) { + /* + * Put the page number in the sorted list. Find its + * position and the previous page. After logging we + * will extend the list, make room and insert the page in + * the list. + */ + position = 0; + if (nelem != 0) { + __db_freelist_pos(h->pgno, list, nelem, &position); + + DB_ASSERT(dbp->env, h->pgno != list[position]); + + /* Get the previous page if this is not the smallest. */ + if (position != 0 || h->pgno > list[0]) + prev_pgno = list[position]; + } + + } else if (nelem != 0) { + /* Find the truncation point. */ + for (lp = &list[nelem - 1]; lp >= list; lp--) + if (--last_pgno != *lp) + break; + if (lp < list || last_pgno < h->pgno - 1) + do_truncate = 1; + last_pgno = meta->last_pgno; + } + +no_sort: + if (prev_pgno == PGNO_INVALID) { +#ifdef HAVE_HASH + if (hash) { + if ((ret = + __ham_return_meta(dbc, DB_MPOOL_DIRTY, &meta)) != 0) + goto err1; + } else +#endif + if ((ret = __memp_dirty(mpf, + &meta, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err1; + lsnp = &LSN(meta); + } else { + pgno = prev_pgno; + if ((ret = __memp_fget(mpf, &pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &prev)) != 0) + goto err1; + next_pgno = NEXT_PGNO(prev); + lsnp = &LSN(prev); + } +#endif + + /* + * Log the change. + * We are either logging an update to the metapage or to the + * previous page in the sorted list. + */ + if (DBC_LOGGING(dbc)) { + memset(&ldbt, 0, sizeof(ldbt)); + ldbt.data = h; + ldbt.size = P_OVERHEAD(dbp); + /* + * If we are truncating the file, we need to make sure + * the logging happens before the truncation. If we + * are truncating multiple pages we don't need to flush the + * log here as it will be flushed by __db_truncate_freelist. + * If we are zeroing pages rather than truncating we still + * need to flush since they will not have valid LSNs. + */ + lflag = 0; + + if (h->pgno == last_pgno +#ifdef HAVE_FTRUNCATE + && do_truncate == 0 +#endif + ) + lflag = DB_FLUSH; + switch (h->type) { + case P_HASH: + case P_IBTREE: + case P_IRECNO: + case P_LBTREE: + case P_LRECNO: + case P_LDUP: + if (h->entries > 0 && (h->pgno == last_pgno || + !LF_ISSET(DB_LOG_NO_DATA))) { + ldbt.size += h->entries * sizeof(db_indx_t); + ddbt.data = (u_int8_t *)h + HOFFSET(h); + ddbt.size = dbp->pgsize - HOFFSET(h); + if ((ret = __db_pg_freedata_log(dbp, dbc->txn, + lsnp, lflag, + h->pgno, lsnp, pgno, + &ldbt, next_pgno, last_pgno, &ddbt)) != 0) + goto err1; + goto logged; + } + break; + case P_HASHMETA: + ldbt.size = sizeof(HMETA); + break; + case P_BTREEMETA: + ldbt.size = sizeof(BTMETA); + break; + case P_OVERFLOW: + ldbt.size += OV_LEN(h); + break; + default: + DB_ASSERT(dbp->env, h->type != P_QAMDATA); + } + + if ((ret = __db_pg_free_log(dbp, + dbc->txn, lsnp, lflag, h->pgno, + lsnp, pgno, &ldbt, next_pgno, last_pgno)) != 0) + goto err1; + } else + LSN_NOT_LOGGED(*lsnp); + +logged: +#ifdef HAVE_FTRUNCATE + if (do_truncate) { + start = (u_int32_t) (lp - list) + 1; + meta->last_pgno--; + ret = __db_truncate_freelist( + dbc, meta, h, list, start, nelem); + h = NULL; + } else +#endif + if (h->pgno == last_pgno) { + /* + * We are going to throw this page away, but if we are + * using MVCC then this version may stick around and we + * might have to make a copy. + */ + if (atomic_read(&mpf->mfp->multiversion) && + (ret = __memp_dirty(mpf, + &h, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err1; + LSN(h) = *lsnp; + P_INIT(h, dbp->pgsize, + h->pgno, PGNO_INVALID, next_pgno, 0, P_INVALID); + if ((ret = __memp_fput(mpf, + dbc->thread_info, h, DB_PRIORITY_VERY_LOW)) != 0) + goto err1; + h = NULL; + /* Give the page back to the OS. */ + if ((ret = __memp_ftruncate(mpf, dbc->txn, dbc->thread_info, + last_pgno, 0)) != 0) + goto err1; + DB_ASSERT(dbp->env, meta->pgno == PGNO_BASE_MD); + meta->last_pgno--; + } else { +#ifdef HAVE_FTRUNCATE + if (list != NULL) { + /* Put the page number into the list. */ + if ((ret = + __memp_extend_freelist(mpf, nelem + 1, &list)) != 0) + goto err1; + if (prev_pgno != PGNO_INVALID) + lp = &list[position + 1]; + else + lp = list; + if (nelem != 0 && position != nelem) + memmove(lp + 1, lp, (size_t) + ((u_int8_t*)&list[nelem] - (u_int8_t*)lp)); + *lp = h->pgno; + } +#endif + /* + * If we are not truncating the page then we + * reinitialize it and put it at the head of + * the free list. + */ + if ((ret = __memp_dirty(mpf, + &h, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err1; + LSN(h) = *lsnp; + P_INIT(h, dbp->pgsize, + h->pgno, PGNO_INVALID, next_pgno, 0, P_INVALID); +#ifdef DIAGNOSTIC + memset((u_int8_t *) h + P_OVERHEAD(dbp), + CLEAR_BYTE, dbp->pgsize - P_OVERHEAD(dbp)); +#endif + if (prev_pgno == PGNO_INVALID) + meta->free = h->pgno; + else + NEXT_PGNO(prev) = h->pgno; + } + + /* Discard the metadata or previous page. */ +err1: if (hash == 0 && meta != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, (PAGE *)meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + if (prev != (PAGE*) meta && prev != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, prev, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + /* Discard the caller's page reference. */ +err: if (h != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + PERFMON4(dbp->env, alloc, free, dbp->fname, dbp->dname, pgno, ret); + /* + * XXX + * We have to unlock the caller's page in the caller! + */ + return (ret); +} + +#ifdef HAVE_FTRUNCATE +/* + * __db_freelist_pos -- find the position of a page in the freelist. + * The list is sorted, we do a binary search. + * + * PUBLIC: #ifdef HAVE_FTRUNCATE + * PUBLIC: void __db_freelist_pos __P((db_pgno_t, + * PUBLIC: db_pgno_t *, u_int32_t, u_int32_t *)); + * PUBLIC: #endif + */ +void +__db_freelist_pos(pgno, list, nelem, posp) + db_pgno_t pgno; + db_pgno_t *list; + u_int32_t nelem; + u_int32_t *posp; +{ + u_int32_t base, indx, lim; + + indx = 0; + for (base = 0, lim = nelem; lim != 0; lim >>= 1) { + indx = base + (lim >> 1); + if (pgno == list[indx]) { + *posp = indx; + return; + } + if (pgno > list[indx]) { + base = indx + 1; + --lim; + } + } + if (base != 0) + base--; + *posp = base; + return; +} + +static int +__db_pglistcmp(a, b) + const void *a, *b; +{ + db_pglist_t *ap, *bp; + + ap = (db_pglist_t *)a; + bp = (db_pglist_t *)b; + + return ((ap->pgno > bp->pgno) ? 1 : (ap->pgno < bp->pgno) ? -1: 0); +} + +/* + * __db_freelist_sort -- sort a list of free pages. + * PUBLIC: void __db_freelist_sort __P((db_pglist_t *, u_int32_t)); + */ +void +__db_freelist_sort(list, nelems) + db_pglist_t *list; + u_int32_t nelems; +{ + qsort(list, (size_t)nelems, sizeof(db_pglist_t), __db_pglistcmp); +} + +/* + * __db_pg_truncate -- find the truncation point in a sorted freelist. + * + * PUBLIC: #ifdef HAVE_FTRUNCATE + * PUBLIC: int __db_pg_truncate __P((DBC *, DB_TXN *, + * PUBLIC: db_pglist_t *, DB_COMPACT *, u_int32_t *, + * PUBLIC: db_pgno_t , db_pgno_t *, DB_LSN *, int)); + * PUBLIC: #endif + */ +int +__db_pg_truncate(dbc, txn, + list, c_data, nelemp, free_pgno, last_pgno, lsnp, in_recovery) + DBC *dbc; + DB_TXN *txn; + db_pglist_t *list; + DB_COMPACT *c_data; + u_int32_t *nelemp; + db_pgno_t free_pgno, *last_pgno; + DB_LSN *lsnp; + int in_recovery; +{ + DB *dbp; + DBT ddbt; + DB_LSN null_lsn; + DB_MPOOLFILE *mpf; + PAGE *h; + db_pglist_t *lp, *slp; + db_pgno_t lpgno, pgno; + u_int32_t elems, log_size, tpoint; + int last, ret; + + ret = 0; + h = NULL; + + dbp = dbc->dbp; + mpf = dbp->mpf; + elems = tpoint = *nelemp; + + /* + * Figure out what (if any) pages can be truncated immediately and + * record the place from which we can truncate, so we can do the + * memp_ftruncate below. We also use this to avoid ever putting + * these pages on the freelist, which we are about to relink. + */ + pgno = *last_pgno; + lp = &list[elems - 1]; + last = 1; + while (tpoint != 0) { + if (lp->pgno != pgno) + break; + pgno--; + tpoint--; + lp--; + } + + lp = list; + slp = &list[elems]; + /* + * Log the sorted list. We log the whole list so it can be rebuilt. + * Don't overflow the log file. + */ +again: if (DBC_LOGGING(dbc)) { + last = 1; + lpgno = *last_pgno; + ddbt.size = elems * sizeof(*lp); + ddbt.data = lp; + log_size = ((LOG *)dbc->env-> + lg_handle->reginfo.primary)->log_size; + if (ddbt.size > log_size / 2) { + elems = (log_size / 2) / sizeof(*lp); + ddbt.size = elems * sizeof(*lp); + last = 0; + /* + * If we stopped after the truncation point + * then we need to truncate from here. + */ + if (lp + elems >= &list[tpoint]) + lpgno = lp[elems - 1].pgno; + } + /* + * If this is not the begining of the list fetch the end + * of the previous segment. This page becomes the last_free + * page and will link to this segment if it is not truncated. + */ + if (lp != list) { + if ((ret = __memp_fget(mpf, &lp[-1].pgno, + dbc->thread_info, txn, 0, &h)) != 0) + goto err; + } + + slp = &lp[elems]; + + ZERO_LSN(null_lsn); + if ((ret = __db_pg_trunc_log(dbp, dbc->txn, + lsnp, last == 1 ? DB_FLUSH : 0, PGNO_BASE_MD, + lsnp, h != NULL ? PGNO(h) : PGNO_INVALID, + h != NULL ? &LSN(h) : &null_lsn, + free_pgno, lpgno, &ddbt)) != 0) + goto err; + if (h != NULL) { + LSN(h) = *lsnp; + if ((ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0) + goto err; + } + h = NULL; + } else if (!in_recovery) + LSN_NOT_LOGGED(*lsnp); + + for (; lp < slp && lp < &list[tpoint]; lp++) { + if ((ret = __memp_fget(mpf, &lp->pgno, dbc->thread_info, + txn, !in_recovery ? DB_MPOOL_DIRTY : 0, &h)) != 0) { + /* Page may have been truncated later. */ + if (in_recovery && ret == DB_PAGE_NOTFOUND) { + ret = 0; + continue; + } + goto err; + } + if (in_recovery) { + if (LOG_COMPARE(&LSN(h), &lp->lsn) == 0) { + if ((ret = __memp_dirty(mpf, &h, + dbc->thread_info, + txn, dbp->priority, 0)) != 0) { + (void)__memp_fput(mpf, + dbc->thread_info, h, dbp->priority); + goto err; + } + } else + goto skip; + } + + if (lp == &list[tpoint - 1]) + NEXT_PGNO(h) = PGNO_INVALID; + else + NEXT_PGNO(h) = lp[1].pgno; + DB_ASSERT(mpf->env, NEXT_PGNO(h) < *last_pgno); + + LSN(h) = *lsnp; +skip: if ((ret = __memp_fput(mpf, + dbc->thread_info, h, dbp->priority)) != 0) + goto err; + h = NULL; + } + + /* + * If we did not log everything try again. We start from slp and + * try to go to the end of the list. + */ + if (last == 0) { + elems = (u_int32_t)(&list[*nelemp] - slp); + lp = slp; + goto again; + } + + /* + * Truncate the file. Its possible that the last page is the + * only one that got truncated and that's done in the caller. + */ + if (pgno != *last_pgno) { + if (tpoint != *nelemp && + (ret = __memp_ftruncate(mpf, dbc->txn, dbc->thread_info, + pgno + 1, in_recovery ? MP_TRUNC_RECOVER : 0)) != 0) + goto err; + if (c_data) + c_data->compact_pages_truncated += *last_pgno - pgno; + *last_pgno = pgno; + } + *nelemp = tpoint; + + if (0) { +err: if (h != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, h, dbc->priority); + } + return (ret); +} + +/* + * __db_free_truncate -- + * Build a sorted free list and truncate free pages at the end + * of the file. + * + * PUBLIC: #ifdef HAVE_FTRUNCATE + * PUBLIC: int __db_free_truncate __P((DB *, DB_THREAD_INFO *, DB_TXN *, + * PUBLIC: u_int32_t, DB_COMPACT *, db_pglist_t **, u_int32_t *, + * PUBLIC: db_pgno_t *)); + * PUBLIC: #endif + */ +int +__db_free_truncate(dbp, ip, txn, flags, c_data, listp, nelemp, last_pgnop) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + u_int32_t flags; + DB_COMPACT *c_data; + db_pglist_t **listp; + u_int32_t *nelemp; + db_pgno_t *last_pgnop; +{ + DBC *dbc; + DBMETA *meta; + DB_LOCK metalock; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *h; + db_pglist_t *list, *lp; + db_pgno_t pgno; + u_int32_t nelems; + int ret, t_ret; + size_t size; + + COMPQUIET(flags, 0); + list = NULL; + meta = NULL; + env = dbp->env; + mpf = dbp->mpf; + h = NULL; + nelems = 0; + if (listp != NULL) { + *listp = NULL; + DB_ASSERT(env, nelemp != NULL); + *nelemp = 0; + } + + if ((ret = __db_cursor(dbp, ip, txn, &dbc, DB_WRITELOCK)) != 0) + return (ret); + + pgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, + LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, 0, + &meta)) != 0) + goto err; + + if (last_pgnop != NULL) + *last_pgnop = meta->last_pgno; + if ((pgno = meta->free) == PGNO_INVALID) + goto done; + + size = 128; + if ((ret = __os_malloc(env, size * sizeof(*list), &list)) != 0) + goto err; + lp = list; + + do { + if (lp == &list[size]) { + size *= 2; + if ((ret = __os_realloc(env, + size * sizeof(*list), &list)) != 0) + goto err; + lp = &list[size / 2]; + } + if ((ret = __memp_fget(mpf, &pgno, + dbc->thread_info, dbc->txn, 0, &h)) != 0) + goto err; + + lp->pgno = pgno; + lp->next_pgno = NEXT_PGNO(h); + lp->lsn = LSN(h); + pgno = NEXT_PGNO(h); + if ((ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0) + goto err; + lp++; + } while (pgno != PGNO_INVALID); + nelems = (u_int32_t)(lp - list); + + if ((ret = __memp_dirty(mpf, + &meta, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err; + + /* Sort the list */ + __db_freelist_sort(list, nelems); + + if ((ret = __db_pg_truncate(dbc, txn, list, c_data, + &nelems, meta->free, &meta->last_pgno, &LSN(meta), 0)) != 0) + goto err; + + if (nelems == 0) + meta->free = PGNO_INVALID; + else + meta->free = list[0].pgno; + +done: if (last_pgnop != NULL) + *last_pgnop = meta->last_pgno; + + /* + * The truncate point is the number of pages in the free + * list back from the last page. The number of pages + * in the free list are the number that we can swap in. + * Adjust it down slightly so if we find higher numbered + * pages early and then free other pages later we can + * truncate them. + */ + if (c_data) { + c_data->compact_truncate = (u_int32_t)meta->last_pgno - nelems; + if (c_data->compact_truncate > nelems >> 2) + c_data->compact_truncate -= nelems >> 2; + } + + if (nelems != 0 && listp != NULL) { + *listp = list; + *nelemp = nelems; + list = NULL; + } + +err: if (list != NULL) + __os_free(env, list); + if (meta != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, (PAGE *)meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +static int +__db_truncate_freelist(dbc, meta, h, list, start, nelem) + DBC *dbc; + DBMETA *meta; + PAGE *h; + db_pgno_t *list; + u_int32_t start, nelem; +{ + DB *dbp; + DBT ddbt; + DB_LSN null_lsn; + DB_MPOOLFILE *mpf; + PAGE *last_free, *pg; + db_pgno_t *lp, free_pgno, lpgno; + db_pglist_t *plist, *pp, *spp; + u_int32_t elem, log_size; + int last, ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + plist = NULL; + last_free = NULL; + pg = NULL; + + if (start != 0 && + (ret = __memp_fget(mpf, &list[start - 1], + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &last_free)) != 0) + goto err; + + if (DBC_LOGGING(dbc)) { + if ((ret = __os_malloc(dbp->env, + (nelem - start) * sizeof(*pp), &plist)) != 0) + goto err; + + pp = plist; + for (lp = &list[start]; lp < &list[nelem]; lp++) { + pp->pgno = *lp; + if ((ret = __memp_fget(mpf, lp, + dbc->thread_info, dbc->txn, 0, &pg)) != 0) + goto err; + pp->lsn = LSN(pg); + pp->next_pgno = NEXT_PGNO(pg); + if ((ret = __memp_fput(mpf, + dbc->thread_info, pg, DB_PRIORITY_VERY_LOW)) != 0) + goto err; + pg = NULL; + pp++; + } + ZERO_LSN(null_lsn); + pp = plist; + elem = nelem - start; + log_size = ((LOG *)dbc->env-> + lg_handle->reginfo.primary)->log_size; +again: ddbt.data = spp = pp; + free_pgno = pp->pgno; + lpgno = meta->last_pgno; + ddbt.size = elem * sizeof(*pp); + if (ddbt.size > log_size / 2) { + elem = (log_size / 2) / (u_int32_t)sizeof(*pp); + ddbt.size = elem * sizeof(*pp); + pp += elem; + elem = (nelem - start) - (u_int32_t)(pp - plist); + lpgno = pp[-1].pgno; + last = 0; + } else + last = 1; + /* + * Get the page which will link to this section if we abort. + * If this is the first segment then its last_free. + */ + if (spp == plist) + pg = last_free; + else if ((ret = __memp_fget(mpf, &spp[-1].pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &pg)) != 0) + goto err; + + if ((ret = __db_pg_trunc_log(dbp, dbc->txn, + &LSN(meta), last == 1 ? DB_FLUSH : 0, + PGNO(meta), &LSN(meta), + pg != NULL ? PGNO(pg) : PGNO_INVALID, + pg != NULL ? &LSN(pg) : &null_lsn, + free_pgno, lpgno, &ddbt)) != 0) + goto err; + if (pg != NULL) { + LSN(pg) = LSN(meta); + if (pg != last_free && (ret = __memp_fput(mpf, + dbc->thread_info, pg, DB_PRIORITY_VERY_LOW)) != 0) + goto err; + pg = NULL; + } + if (last == 0) + goto again; + } else + LSN_NOT_LOGGED(LSN(meta)); + + if ((ret = __memp_fput(mpf, + dbc->thread_info, h, DB_PRIORITY_VERY_LOW)) != 0) + goto err; + h = NULL; + if ((ret = __memp_ftruncate(mpf, dbc->txn, dbc->thread_info, + list[start], 0)) != 0) + goto err; + meta->last_pgno = list[start] - 1; + + if (start == 0) + meta->free = PGNO_INVALID; + else { + NEXT_PGNO(last_free) = PGNO_INVALID; + if ((ret = __memp_fput(mpf, + dbc->thread_info, last_free, dbc->priority)) != 0) + goto err; + last_free = NULL; + } + + /* Shrink the number of elements in the list. */ + ret = __memp_extend_freelist(mpf, start, &list); + +err: if (plist != NULL) + __os_free(dbp->env, plist); + + /* We need to put the page on error. */ + if (h != NULL) + (void)__memp_fput(mpf, dbc->thread_info, h, dbc->priority); + if (pg != NULL && pg != last_free) + (void)__memp_fput(mpf, dbc->thread_info, pg, dbc->priority); + if (last_free != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, last_free, dbc->priority); + + return (ret); +} +#endif + +#ifdef DEBUG +/* + * __db_lprint -- + * Print out the list of locks currently held by a cursor. + * + * PUBLIC: int __db_lprint __P((DBC *)); + */ +int +__db_lprint(dbc) + DBC *dbc; +{ + DB *dbp; + DB_LOCKREQ req; + ENV *env; + + dbp = dbc->dbp; + env = dbp->env; + + if (LOCKING_ON(env)) { + req.op = DB_LOCK_DUMP; + (void)__lock_vec(env, dbc->locker, 0, &req, 1, NULL); + } + return (0); +} +#endif + +/* + * __db_lget -- + * The standard lock get call. + * + * PUBLIC: int __db_lget __P((DBC *, + * PUBLIC: int, db_pgno_t, db_lockmode_t, u_int32_t, DB_LOCK *)); + */ +int +__db_lget(dbc, action, pgno, mode, lkflags, lockp) + DBC *dbc; + int action; + db_pgno_t pgno; + db_lockmode_t mode; + u_int32_t lkflags; + DB_LOCK *lockp; +{ + DB *dbp; + DB_LOCKREQ couple[3], *reqp; + DB_TXN *txn; + ENV *env; + int has_timeout, i, ret; + + dbp = dbc->dbp; + env = dbp->env; + txn = dbc->txn; + + /* + * We do not always check if we're configured for locking before + * calling __db_lget to acquire the lock. + */ + if (CDB_LOCKING(env) || !LOCKING_ON(env) || + (MULTIVERSION(dbp) && mode == DB_LOCK_READ && + dbc->txn != NULL && F_ISSET(dbc->txn, TXN_SNAPSHOT)) || + F_ISSET(dbc, DBC_DONTLOCK) || (F_ISSET(dbc, DBC_RECOVER) && + (action != LCK_ROLLBACK || IS_REP_CLIENT(env))) || + (action != LCK_ALWAYS && F_ISSET(dbc, DBC_OPD))) { + LOCK_INIT(*lockp); + return (0); + } + + /* + * If the transaction enclosing this cursor has DB_LOCK_NOWAIT set, + * pass that along to the lock call. + */ + if (DB_NONBLOCK(dbc)) + lkflags |= DB_LOCK_NOWAIT; + + /* + * If we're trying to run in exclusive mode, attempt to get an + * exclusive database lock. If it is not available then wait + * for the lock on the database and clear the exclusive bit. + * + * If we get an exclusive lock on the database, mark the cursor + * with DBC_DONTLOCK to avoid any further locking. + */ + if (F_ISSET(dbp->mpf->mfp, MP_DATABASE_LOCKING)) { + dbc->lock.type = DB_DATABASE_LOCK; + dbc->lock.pgno = PGNO_BASE_MD; + if ((ret = __lock_get(env, dbc->locker, DB_LOCK_NOWAIT, + &dbc->lock_dbt, F_ISSET(dbp, DB_AM_RDONLY) ? + DB_LOCK_READ : DB_LOCK_WRITE, lockp)) == 0) { + if (F_ISSET(dbp->mpf->mfp, MP_DATABASE_LOCKING)) { + F_SET(dbc, DBC_DONTLOCK); + if (!IS_REAL_TXN(txn)) + dbc->mylock = *lockp; + LOCK_INIT(*lockp); + return (0); + } + } else if (ret == DB_LOCK_NOTGRANTED && + (lkflags & DB_LOCK_NOWAIT) == 0) { + if ((ret = __lock_get(env, dbc->locker, 0, + &dbc->lock_dbt, DB_LOCK_WRITE, lockp)) != 0) + return (ret); + F_CLR(dbp->mpf->mfp, MP_DATABASE_LOCKING); + if ((ret = __lock_put(env, lockp)) != 0) + return (ret); + LOCK_INIT(*lockp); + } else if (ret != 0) + return (ret); + } + + dbc->lock.pgno = pgno; + if (lkflags & DB_LOCK_RECORD) + dbc->lock.type = DB_RECORD_LOCK; + else + dbc->lock.type = DB_PAGE_LOCK; + lkflags &= ~DB_LOCK_RECORD; + + if (F_ISSET(dbc, DBC_READ_UNCOMMITTED) && mode == DB_LOCK_READ) + mode = DB_LOCK_READ_UNCOMMITTED; + + has_timeout = F_ISSET(dbc, DBC_RECOVER) || + (txn != NULL && F_ISSET(txn, TXN_LOCKTIMEOUT)); + + /* + * Transactional locking. + * Hold on to the previous read lock only if we are in full isolation. + * COUPLE_ALWAYS indicates we are holding an interior node which need + * not be isolated. + * Downgrade write locks if we are supporting dirty readers and the + * update did not have an error. + */ + if ((action != LCK_COUPLE && action != LCK_COUPLE_ALWAYS) || + !LOCK_ISSET(*lockp)) + action = 0; + else if (dbc->txn == NULL || action == LCK_COUPLE_ALWAYS) + action = LCK_COUPLE; + else if (F_ISSET(dbc, DBC_READ_COMMITTED | DBC_WAS_READ_COMMITTED) && + lockp->mode == DB_LOCK_READ) + action = LCK_COUPLE; + else if (lockp->mode == DB_LOCK_READ_UNCOMMITTED) + action = LCK_COUPLE; + else if (F_ISSET(dbc->dbp, DB_AM_READ_UNCOMMITTED) && + !F_ISSET(dbc, DBC_ERROR) && lockp->mode == DB_LOCK_WRITE) + action = LCK_DOWNGRADE; + else + action = 0; + + i = 0; + switch (action) { + default: + if (has_timeout) + goto do_couple; + ret = __lock_get(env, + dbc->locker, lkflags, &dbc->lock_dbt, mode, lockp); + break; + + case LCK_DOWNGRADE: + couple[0].op = DB_LOCK_GET; + couple[0].obj = NULL; + couple[0].lock = *lockp; + couple[0].mode = DB_LOCK_WWRITE; + UMRW_SET(couple[0].timeout); + i++; + /* FALLTHROUGH */ + case LCK_COUPLE: +do_couple: couple[i].op = has_timeout? DB_LOCK_GET_TIMEOUT : DB_LOCK_GET; + couple[i].obj = &dbc->lock_dbt; + couple[i].mode = mode; + UMRW_SET(couple[i].timeout); + i++; + if (has_timeout) + couple[0].timeout = + F_ISSET(dbc, DBC_RECOVER) ? 0 : txn->lock_timeout; + if (action == LCK_COUPLE || action == LCK_DOWNGRADE) { + couple[i].op = DB_LOCK_PUT; + couple[i].lock = *lockp; + i++; + } + + ret = __lock_vec(env, + dbc->locker, lkflags, couple, i, &reqp); + if (ret == 0 || reqp == &couple[i - 1]) + *lockp = i == 1 ? couple[0].lock : couple[i - 2].lock; + break; + } + + if (txn != NULL && ret == DB_LOCK_DEADLOCK) + F_SET(txn, TXN_DEADLOCK); + return ((ret == DB_LOCK_NOTGRANTED && !F_ISSET(env->dbenv, + DB_ENV_TIME_NOTGRANTED)) ? DB_LOCK_DEADLOCK : ret); +} + +#ifdef DIAGNOSTIC +/* + * __db_haslock -- + * Determine if this locker holds a particular lock. + * Returns 0 if lock is held, non-zero otherwise. + * + * PUBLIC: #ifdef DIAGNOSTIC + * PUBLIC: int __db_haslock __P((ENV *, DB_LOCKER *, + * PUBLIC: DB_MPOOLFILE *, db_pgno_t, db_lockmode_t, u_int32_t)); + * PUBLIC: #endif + */ +int +__db_haslock(env, locker, dbmfp, pgno, mode, type) + ENV *env; + DB_LOCKER *locker; + DB_MPOOLFILE *dbmfp; + db_pgno_t pgno; + db_lockmode_t mode; + u_int32_t type; +{ + DBT lkdata; + DB_LOCK lock; + DB_LOCK_ILOCK ilock; + + memset(&lkdata, 0, sizeof(lkdata)); + lkdata.data = &ilock; + lkdata.size = sizeof(ilock); + + memcpy(ilock.fileid, dbmfp->fileid, DB_FILE_ID_LEN); + ilock.pgno = pgno; + ilock.type = type; + + return (__lock_get(env, locker, DB_LOCK_CHECK, &lkdata, mode, &lock)); +} +/* + * __db_has_pagelock -- + * Determine if this locker holds a particular page lock. + * Returns 0 if lock is held, non-zero otherwise. + * + * PUBLIC: #ifdef DIAGNOSTIC + * PUBLIC: int __db_has_pagelock __P((ENV *, DB_LOCKER *, + * PUBLIC: DB_MPOOLFILE *, PAGE *, db_lockmode_t)); + * PUBLIC: #endif + */ +int +__db_has_pagelock(env, locker, dbmfp, pagep, mode) + ENV *env; + DB_LOCKER *locker; + DB_MPOOLFILE *dbmfp; + PAGE *pagep; + db_lockmode_t mode; +{ + int ret; + + switch (pagep->type) { + case P_OVERFLOW: + case P_INVALID: + case P_QAMDATA: + case P_QAMMETA: + case P_IHEAP: + return (0); + case P_HASH: + if (PREV_PGNO(pagep) != PGNO_INVALID) + return (0); + break; + default: + break; + } + if ((ret = __db_haslock(env, + locker, dbmfp, pagep->pgno, mode, DB_PAGE_LOCK)) != 0) + ret = __db_haslock(env, + locker, dbmfp, PGNO_BASE_MD, mode, DB_DATABASE_LOCK); + return (ret); +} +#endif + +/* + * __db_lput -- + * The standard lock put call. + * + * PUBLIC: int __db_lput __P((DBC *, DB_LOCK *)); + */ +int +__db_lput(dbc, lockp) + DBC *dbc; + DB_LOCK *lockp; +{ + DB_LOCKREQ couple[2], *reqp; + ENV *env; + int action, ret; + + /* + * Transactional locking. + * Hold on to the read locks only if we are in full isolation. + * Downgrade write locks if we are supporting dirty readers unless + * there was an error. + */ + if (F_ISSET(dbc->dbp, DB_AM_READ_UNCOMMITTED) && + !F_ISSET(dbc, DBC_ERROR) && lockp->mode == DB_LOCK_WRITE) + action = LCK_DOWNGRADE; + else if (dbc->txn == NULL) + action = LCK_COUPLE; + else if (F_ISSET(dbc, DBC_READ_COMMITTED | DBC_WAS_READ_COMMITTED) && + lockp->mode == DB_LOCK_READ) + action = LCK_COUPLE; + else if (lockp->mode == DB_LOCK_READ_UNCOMMITTED) + action = LCK_COUPLE; + else + action = 0; + + env = dbc->env; + switch (action) { + case LCK_COUPLE: + ret = __lock_put(env, lockp); + break; + case LCK_DOWNGRADE: + couple[0].op = DB_LOCK_GET; + couple[0].obj = NULL; + couple[0].mode = DB_LOCK_WWRITE; + couple[0].lock = *lockp; + UMRW_SET(couple[0].timeout); + couple[1].op = DB_LOCK_PUT; + couple[1].lock = *lockp; + ret = __lock_vec(env, dbc->locker, 0, couple, 2, &reqp); + if (ret == 0 || reqp == &couple[1]) + *lockp = couple[0].lock; + break; + default: + ret = 0; + break; + } + + return (ret); +} diff --git a/src/db/db_method.c b/src/db/db_method.c new file mode 100644 index 00000000..63ad63d9 --- /dev/null +++ b/src/db/db_method.c @@ -0,0 +1,1089 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +static int __db_get_byteswapped __P((DB *, int *)); +static int __db_get_dbname __P((DB *, const char **, const char **)); +static DB_ENV *__db_get_env __P((DB *)); +static void __db_get_msgcall + __P((DB *, void (**)(const DB_ENV *, const char *))); +static DB_MPOOLFILE *__db_get_mpf __P((DB *)); +static int __db_get_multiple __P((DB *)); +static int __db_get_transactional __P((DB *)); +static int __db_get_type __P((DB *, DBTYPE *dbtype)); +static int __db_init __P((DB *, u_int32_t)); +static int __db_get_alloc __P((DB *, void *(**)(size_t), + void *(**)(void *, size_t), void (**)(void *))); +static int __db_set_alloc __P((DB *, void *(*)(size_t), + void *(*)(void *, size_t), void (*)(void *))); +static int __db_get_append_recno __P((DB *, + int (**)(DB *, DBT *, db_recno_t))); +static int __db_set_append_recno __P((DB *, int (*)(DB *, DBT *, db_recno_t))); +static int __db_get_cachesize __P((DB *, u_int32_t *, u_int32_t *, int *)); +static int __db_set_cachesize __P((DB *, u_int32_t, u_int32_t, int)); +static int __db_get_create_dir __P((DB *, const char **)); +static int __db_set_create_dir __P((DB *, const char *)); +static int __db_get_dup_compare + __P((DB *, int (**)(DB *, const DBT *, const DBT *))); +static int __db_set_dup_compare + __P((DB *, int (*)(DB *, const DBT *, const DBT *))); +static int __db_get_encrypt_flags __P((DB *, u_int32_t *)); +static int __db_set_encrypt __P((DB *, const char *, u_int32_t)); +static int __db_get_feedback __P((DB *, void (**)(DB *, int, int))); +static int __db_set_feedback __P((DB *, void (*)(DB *, int, int))); +static void __db_map_flags __P((DB *, u_int32_t *, u_int32_t *)); +static int __db_get_pagesize __P((DB *, u_int32_t *)); +static int __db_set_paniccall __P((DB *, void (*)(DB_ENV *, int))); +static int __db_set_priority __P((DB *, DB_CACHE_PRIORITY)); +static int __db_get_priority __P((DB *, DB_CACHE_PRIORITY *)); +static void __db_get_errcall __P((DB *, + void (**)(const DB_ENV *, const char *, const char *))); +static void __db_set_errcall + __P((DB *, void (*)(const DB_ENV *, const char *, const char *))); +static void __db_get_errfile __P((DB *, FILE **)); +static void __db_set_errfile __P((DB *, FILE *)); +static void __db_get_errpfx __P((DB *, const char **)); +static void __db_set_errpfx __P((DB *, const char *)); +static void __db_set_msgcall + __P((DB *, void (*)(const DB_ENV *, const char *))); +static void __db_get_msgfile __P((DB *, FILE **)); +static void __db_set_msgfile __P((DB *, FILE *)); +static int __db_get_assoc_flags __P((DB *, u_int32_t *)); +static void __dbh_err __P((DB *, int, const char *, ...)); +static void __dbh_errx __P((DB *, const char *, ...)); + +/* + * db_create -- + * DB constructor. + * + * EXTERN: int db_create __P((DB **, DB_ENV *, u_int32_t)); + */ +int +db_create(dbpp, dbenv, flags) + DB **dbpp; + DB_ENV *dbenv; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + ip = NULL; + env = dbenv == NULL ? NULL : dbenv->env; + + /* Check for invalid function flags. */ + switch (flags) { + case 0: + break; + case DB_XA_CREATE: + if (dbenv != NULL) { + __db_errx(env, DB_STR("0504", + "XA applications may not specify an environment to db_create")); + return (EINVAL); + } + + /* + * If it's an XA database, open it within the XA environment, + * taken from the global list of environments. (When the XA + * transaction manager called our xa_start() routine the + * "current" environment was moved to the start of the list. + */ + env = TAILQ_FIRST(&DB_GLOBAL(envq)); + if (env == NULL) { + __db_errx(env, DB_STR("0505", + "Cannot open XA database before XA is enabled")); + return (EINVAL); + } + break; + default: + return (__db_ferr(env, "db_create", 0)); + } + + if (env != NULL) + ENV_ENTER(env, ip); + + /* + * If we are opening an XA database, make sure we don't have a global XA + * transaction running. + */ + if (LF_ISSET(DB_XA_CREATE)) { + XA_NO_TXN(ip, ret); + if (ret != 0) + goto err; + } + + ret = __db_create_internal(dbpp, env, flags); +err: if (env != NULL) + ENV_LEAVE(env, ip); + + return (ret); +} + +/* + * __db_create_internal -- + * DB constructor internal routine. + * + * PUBLIC: int __db_create_internal __P((DB **, ENV *, u_int32_t)); + */ +int +__db_create_internal(dbpp, env, flags) + DB **dbpp; + ENV *env; + u_int32_t flags; +{ + DB *dbp; + DB_ENV *dbenv; + DB_REP *db_rep; + int ret; + + *dbpp = NULL; + + /* If we don't have an environment yet, allocate a local one. */ + if (env == NULL) { + if ((ret = db_env_create(&dbenv, 0)) != 0) + return (ret); + env = dbenv->env; + F_SET(env, ENV_DBLOCAL); + } else + dbenv = env->dbenv; + + /* Allocate and initialize the DB handle. */ + if ((ret = __os_calloc(env, 1, sizeof(*dbp), &dbp)) != 0) + goto err; + + dbp->dbenv = env->dbenv; + dbp->env = env; + if ((ret = __db_init(dbp, flags)) != 0) + goto err; + + MUTEX_LOCK(env, env->mtx_dblist); + ++env->db_ref; + MUTEX_UNLOCK(env, env->mtx_dblist); + + /* + * Set the replication timestamp; it's 0 if we're not in a replicated + * environment. Don't acquire a lock to read the value, even though + * it's opaque: all we check later is value equality, nothing else. + */ + dbp->timestamp = REP_ON(env) ? + ((REGENV *)env->reginfo->primary)->rep_timestamp : 0; + /* + * Set the replication generation number for fid management; valid + * replication generations start at 1. Don't acquire a lock to + * read the value. All we check later is value equality. + */ + db_rep = env->rep_handle; + dbp->fid_gen = REP_ON(env) ? ((REP *)db_rep->region)->gen : 0; + + /* Open a backing DB_MPOOLFILE handle in the memory pool. */ + if ((ret = __memp_fcreate(env, &dbp->mpf)) != 0) + goto err; + + dbp->type = DB_UNKNOWN; + + *dbpp = dbp; + return (0); + +err: if (dbp != NULL) { + if (dbp->mpf != NULL) + (void)__memp_fclose(dbp->mpf, 0); + __os_free(env, dbp); + } + + if (F_ISSET(env, ENV_DBLOCAL)) + (void)__env_close(dbp->dbenv, 0); + + return (ret); +} + +/* + * __db_init -- + * Initialize a DB structure. + */ +static int +__db_init(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + int ret; + + dbp->locker = NULL; + dbp->alt_close = NULL; + LOCK_INIT(dbp->handle_lock); + + TAILQ_INIT(&dbp->free_queue); + TAILQ_INIT(&dbp->active_queue); + TAILQ_INIT(&dbp->join_queue); + LIST_INIT(&dbp->s_secondaries); + + FLD_SET(dbp->am_ok, + DB_OK_BTREE | DB_OK_HASH | DB_OK_HEAP | DB_OK_QUEUE | DB_OK_RECNO); + + /* DB PUBLIC HANDLE LIST BEGIN */ + dbp->associate = __db_associate_pp; + dbp->associate_foreign = __db_associate_foreign_pp; + dbp->close = __db_close_pp; + dbp->compact = __db_compact_pp; + dbp->cursor = __db_cursor_pp; + dbp->del = __db_del_pp; + dbp->dump = __db_dump_pp; + dbp->err = __dbh_err; + dbp->errx = __dbh_errx; + dbp->exists = __db_exists; + dbp->fd = __db_fd_pp; + dbp->get = __db_get_pp; + dbp->get_alloc = __db_get_alloc; + dbp->get_append_recno = __db_get_append_recno; + dbp->get_assoc_flags = __db_get_assoc_flags; + dbp->get_byteswapped = __db_get_byteswapped; + dbp->get_cachesize = __db_get_cachesize; + dbp->get_create_dir = __db_get_create_dir; + dbp->get_dbname = __db_get_dbname; + dbp->get_dup_compare = __db_get_dup_compare; + dbp->get_encrypt_flags = __db_get_encrypt_flags; + dbp->get_env = __db_get_env; + dbp->get_errcall = __db_get_errcall; + dbp->get_errfile = __db_get_errfile; + dbp->get_errpfx = __db_get_errpfx; + dbp->get_feedback = __db_get_feedback; + dbp->get_flags = __db_get_flags; + dbp->get_lorder = __db_get_lorder; + dbp->get_mpf = __db_get_mpf; + dbp->get_msgcall = __db_get_msgcall; + dbp->get_msgfile = __db_get_msgfile; + dbp->get_multiple = __db_get_multiple; + dbp->get_open_flags = __db_get_open_flags; + dbp->get_partition_dirs = __partition_get_dirs; + dbp->get_partition_callback = __partition_get_callback; + dbp->get_partition_keys = __partition_get_keys; + dbp->get_pagesize = __db_get_pagesize; + dbp->get_priority = __db_get_priority; + dbp->get_transactional = __db_get_transactional; + dbp->get_type = __db_get_type; + dbp->join = __db_join_pp; + dbp->key_range = __db_key_range_pp; + dbp->open = __db_open_pp; + dbp->pget = __db_pget_pp; + dbp->put = __db_put_pp; + dbp->remove = __db_remove_pp; + dbp->rename = __db_rename_pp; + dbp->set_alloc = __db_set_alloc; + dbp->set_append_recno = __db_set_append_recno; + dbp->set_cachesize = __db_set_cachesize; + dbp->set_create_dir = __db_set_create_dir; + dbp->set_dup_compare = __db_set_dup_compare; + dbp->set_encrypt = __db_set_encrypt; + dbp->set_errcall = __db_set_errcall; + dbp->set_errfile = __db_set_errfile; + dbp->set_errpfx = __db_set_errpfx; + dbp->set_feedback = __db_set_feedback; + dbp->set_flags = __db_set_flags; + dbp->set_lorder = __db_set_lorder; + dbp->set_msgcall = __db_set_msgcall; + dbp->set_msgfile = __db_set_msgfile; + dbp->set_pagesize = __db_set_pagesize; + dbp->set_paniccall = __db_set_paniccall; + dbp->set_partition = __partition_set; + dbp->set_partition_dirs = __partition_set_dirs; + dbp->set_priority = __db_set_priority; + dbp->sort_multiple = __db_sort_multiple; + dbp->stat = __db_stat_pp; + dbp->stat_print = __db_stat_print_pp; + dbp->sync = __db_sync_pp; + dbp->truncate = __db_truncate_pp; + dbp->upgrade = __db_upgrade_pp; + dbp->verify = __db_verify_pp; + /* DB PUBLIC HANDLE LIST END */ + + /* Access method specific. */ + if ((ret = __bam_db_create(dbp)) != 0) + return (ret); + if ((ret = __ham_db_create(dbp)) != 0) + return (ret); + if ((ret = __heap_db_create(dbp)) != 0) + return (ret); + if ((ret = __qam_db_create(dbp)) != 0) + return (ret); + + COMPQUIET(flags, 0); + + return (0); +} + +/* + * __dbh_am_chk -- + * Error if an unreasonable method is called. + * + * PUBLIC: int __dbh_am_chk __P((DB *, u_int32_t)); + */ +int +__dbh_am_chk(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + /* + * We start out allowing any access methods to be called, and as the + * application calls the methods the options become restricted. The + * idea is to quit as soon as an illegal method combination is called. + */ + if ((LF_ISSET(DB_OK_BTREE) && FLD_ISSET(dbp->am_ok, DB_OK_BTREE)) || + (LF_ISSET(DB_OK_HASH) && FLD_ISSET(dbp->am_ok, DB_OK_HASH)) || + (LF_ISSET(DB_OK_HEAP) && FLD_ISSET(dbp->am_ok, DB_OK_HEAP)) || + (LF_ISSET(DB_OK_QUEUE) && FLD_ISSET(dbp->am_ok, DB_OK_QUEUE)) || + (LF_ISSET(DB_OK_RECNO) && FLD_ISSET(dbp->am_ok, DB_OK_RECNO))) { + FLD_CLR(dbp->am_ok, ~flags); + return (0); + } + + __db_errx(dbp->env, DB_STR("0506", +"call implies an access method which is inconsistent with previous calls")); + return (EINVAL); +} + +/* + * __dbh_err -- + * Db.err method. + */ +static void +#ifdef STDC_HEADERS +__dbh_err(DB *dbp, int error, const char *fmt, ...) +#else +__dbh_err(dbp, error, fmt, va_alist) + DB *dbp; + int error; + const char *fmt; + va_dcl +#endif +{ + /* Message with error string, to stderr by default. */ + DB_REAL_ERR(dbp->dbenv, error, DB_ERROR_SET, 1, fmt); +} + +/* + * __dbh_errx -- + * Db.errx method. + */ +static void +#ifdef STDC_HEADERS +__dbh_errx(DB *dbp, const char *fmt, ...) +#else +__dbh_errx(dbp, fmt, va_alist) + DB *dbp; + const char *fmt; + va_dcl +#endif +{ + /* Message without error string, to stderr by default. */ + DB_REAL_ERR(dbp->dbenv, 0, DB_ERROR_NOT_SET, 1, fmt); +} + +/* + * __db_get_byteswapped -- + * Return if database requires byte swapping. + */ +static int +__db_get_byteswapped(dbp, isswapped) + DB *dbp; + int *isswapped; +{ + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_byteswapped"); + + *isswapped = F_ISSET(dbp, DB_AM_SWAP) ? 1 : 0; + return (0); +} + +/* + * __db_get_dbname -- + * Get the name of the database as passed to DB->open. + */ +static int +__db_get_dbname(dbp, fnamep, dnamep) + DB *dbp; + const char **fnamep, **dnamep; +{ + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_dbname"); + + if (fnamep != NULL) + *fnamep = dbp->fname; + if (dnamep != NULL) + *dnamep = dbp->dname; + return (0); +} + +/* + * __db_get_env -- + * Get the DB_ENV handle that was passed to db_create. + */ +static DB_ENV * +__db_get_env(dbp) + DB *dbp; +{ + return (dbp->dbenv); +} + +/* + * __db_get_mpf -- + * Get the underlying DB_MPOOLFILE handle. + */ +static DB_MPOOLFILE * +__db_get_mpf(dbp) + DB *dbp; +{ + return (dbp->mpf); +} + +/* + * get_multiple -- + * Return whether this DB handle references a physical file with multiple + * databases. + */ +static int +__db_get_multiple(dbp) + DB *dbp; +{ + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_multiple"); + + /* + * Only return TRUE if the handle is for the master database, not for + * any subdatabase in the physical file. If it's a Btree, with the + * subdatabases flag set, and the meta-data page has the right value, + * return TRUE. (We don't need to check it's a Btree, I suppose, but + * it doesn't hurt.) + */ + return (dbp->type == DB_BTREE && + F_ISSET(dbp, DB_AM_SUBDB) && + dbp->meta_pgno == PGNO_BASE_MD ? 1 : 0); +} + +/* + * get_transactional -- + * Return whether this database was created in a transaction. + */ +static int +__db_get_transactional(dbp) + DB *dbp; +{ + return (F_ISSET(dbp, DB_AM_TXN) ? 1 : 0); +} + +/* + * __db_get_type -- + * Return type of underlying database. + */ +static int +__db_get_type(dbp, dbtype) + DB *dbp; + DBTYPE *dbtype; +{ + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_type"); + + *dbtype = dbp->type; + return (0); +} + +/* + * __db_get_append_recno -- + * Get record number append routine. + */ +static int +__db_get_append_recno(dbp, funcp) + DB *dbp; + int (**funcp) __P((DB *, DBT *, db_recno_t)); +{ + DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO); + if (funcp) + *funcp = dbp->db_append_recno; + + return (0); +} +/* + * __db_set_append_recno -- + * Set record number append routine. + */ +static int +__db_set_append_recno(dbp, func) + DB *dbp; + int (*func) __P((DB *, DBT *, db_recno_t)); +{ + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_append_recno"); + DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO); + + dbp->db_append_recno = func; + + return (0); +} + +/* + * __db_get_cachesize -- + * Get underlying cache size. + */ +static int +__db_get_cachesize(dbp, cache_gbytesp, cache_bytesp, ncachep) + DB *dbp; + u_int32_t *cache_gbytesp, *cache_bytesp; + int *ncachep; +{ + DB_ILLEGAL_IN_ENV(dbp, "DB->get_cachesize"); + + return (__memp_get_cachesize(dbp->dbenv, + cache_gbytesp, cache_bytesp, ncachep)); +} + +/* + * __db_set_cachesize -- + * Set underlying cache size. + */ +static int +__db_set_cachesize(dbp, cache_gbytes, cache_bytes, ncache) + DB *dbp; + u_int32_t cache_gbytes, cache_bytes; + int ncache; +{ + DB_ILLEGAL_IN_ENV(dbp, "DB->set_cachesize"); + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_cachesize"); + + return (__memp_set_cachesize( + dbp->dbenv, cache_gbytes, cache_bytes, ncache)); +} + +static int +__db_set_create_dir(dbp, dir) + DB *dbp; + const char *dir; +{ + DB_ENV *dbenv; + int i; + + dbenv = dbp->dbenv; + + for (i = 0; i < dbenv->data_next; i++) + if (strcmp(dir, dbenv->db_data_dir[i]) == 0) + break; + + if (i == dbenv->data_next) { + __db_errx(dbp->env, DB_STR_A("0507", + "Directory %s not in environment list.", "%s"), dir); + return (EINVAL); + } + + dbp->dirname = dbenv->db_data_dir[i]; + return (0); +} + +static int +__db_get_create_dir(dbp, dirp) + DB *dbp; + const char **dirp; +{ + *dirp = dbp->dirname; + return (0); +} + +/* + * __db_get_dup_compare -- + * Get duplicate comparison routine. + */ +static int +__db_get_dup_compare(dbp, funcp) + DB *dbp; + int (**funcp) __P((DB *, const DBT *, const DBT *)); +{ + + DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE | DB_OK_HASH); + + if (funcp != NULL) { +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbp)) { + *funcp = + ((BTREE *)dbp->bt_internal)->compress_dup_compare; + } else +#endif + *funcp = dbp->dup_compare; + } + + return (0); +} + +/* + * __db_set_dup_compare -- + * Set duplicate comparison routine. + */ +static int +__db_set_dup_compare(dbp, func) + DB *dbp; + int (*func) __P((DB *, const DBT *, const DBT *)); +{ + int ret; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_dup_compare"); + DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE | DB_OK_HASH); + + if ((ret = __db_set_flags(dbp, DB_DUPSORT)) != 0) + return (ret); + +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbp)) { + dbp->dup_compare = __bam_compress_dupcmp; + ((BTREE *)dbp->bt_internal)->compress_dup_compare = func; + } else +#endif + dbp->dup_compare = func; + + return (0); +} + +/* + * __db_get_encrypt_flags -- + */ +static int +__db_get_encrypt_flags(dbp, flagsp) + DB *dbp; + u_int32_t *flagsp; +{ + DB_ILLEGAL_IN_ENV(dbp, "DB->get_encrypt_flags"); + + return (__env_get_encrypt_flags(dbp->dbenv, flagsp)); +} + +/* + * __db_set_encrypt -- + * Set database passwd. + */ +static int +__db_set_encrypt(dbp, passwd, flags) + DB *dbp; + const char *passwd; + u_int32_t flags; +{ + DB_CIPHER *db_cipher; + int ret; + + DB_ILLEGAL_IN_ENV(dbp, "DB->set_encrypt"); + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_encrypt"); + + if ((ret = __env_set_encrypt(dbp->dbenv, passwd, flags)) != 0) + return (ret); + + /* + * In a real env, this gets initialized with the region. In a local + * env, we must do it here. + */ + db_cipher = dbp->env->crypto_handle; + if (!F_ISSET(db_cipher, CIPHER_ANY) && + (ret = db_cipher->init(dbp->env, db_cipher)) != 0) + return (ret); + + return (__db_set_flags(dbp, DB_ENCRYPT)); +} + +static void +__db_get_errcall(dbp, errcallp) + DB *dbp; + void (**errcallp) __P((const DB_ENV *, const char *, const char *)); +{ + __env_get_errcall(dbp->dbenv, errcallp); +} + +static void +__db_set_errcall(dbp, errcall) + DB *dbp; + void (*errcall) __P((const DB_ENV *, const char *, const char *)); +{ + __env_set_errcall(dbp->dbenv, errcall); +} + +static void +__db_get_errfile(dbp, errfilep) + DB *dbp; + FILE **errfilep; +{ + __env_get_errfile(dbp->dbenv, errfilep); +} + +static void +__db_set_errfile(dbp, errfile) + DB *dbp; + FILE *errfile; +{ + __env_set_errfile(dbp->dbenv, errfile); +} + +static void +__db_get_errpfx(dbp, errpfxp) + DB *dbp; + const char **errpfxp; +{ + __env_get_errpfx(dbp->dbenv, errpfxp); +} + +static void +__db_set_errpfx(dbp, errpfx) + DB *dbp; + const char *errpfx; +{ + __env_set_errpfx(dbp->dbenv, errpfx); +} + +static int +__db_get_feedback(dbp, feedbackp) + DB *dbp; + void (**feedbackp) __P((DB *, int, int)); +{ + if (feedbackp != NULL) + *feedbackp = dbp->db_feedback; + return (0); +} + +static int +__db_set_feedback(dbp, feedback) + DB *dbp; + void (*feedback) __P((DB *, int, int)); +{ + dbp->db_feedback = feedback; + return (0); +} + +/* + * __db_map_flags -- + * Maps between public and internal flag values. + * This function doesn't check for validity, so it can't fail. + */ +static void +__db_map_flags(dbp, inflagsp, outflagsp) + DB *dbp; + u_int32_t *inflagsp, *outflagsp; +{ + COMPQUIET(dbp, NULL); + + if (FLD_ISSET(*inflagsp, DB_CHKSUM)) { + FLD_SET(*outflagsp, DB_AM_CHKSUM); + FLD_CLR(*inflagsp, DB_CHKSUM); + } + if (FLD_ISSET(*inflagsp, DB_ENCRYPT)) { + FLD_SET(*outflagsp, DB_AM_ENCRYPT | DB_AM_CHKSUM); + FLD_CLR(*inflagsp, DB_ENCRYPT); + } + if (FLD_ISSET(*inflagsp, DB_TXN_NOT_DURABLE)) { + FLD_SET(*outflagsp, DB_AM_NOT_DURABLE); + FLD_CLR(*inflagsp, DB_TXN_NOT_DURABLE); + } +} + +/* + * __db_get_assoc_flags -- + */ +static int +__db_get_assoc_flags(dbp, flagsp) + DB *dbp; + u_int32_t *flagsp; +{ + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_assoc_flags"); + + *flagsp = dbp->s_assoc_flags; + return (0); +} + +/* + * __db_get_flags -- + * The DB->get_flags method. + * + * PUBLIC: int __db_get_flags __P((DB *, u_int32_t *)); + */ +int +__db_get_flags(dbp, flagsp) + DB *dbp; + u_int32_t *flagsp; +{ + static const u_int32_t db_flags[] = { + DB_CHKSUM, + DB_DUP, + DB_DUPSORT, + DB_ENCRYPT, +#ifdef HAVE_QUEUE + DB_INORDER, +#endif + DB_RECNUM, + DB_RENUMBER, + DB_REVSPLITOFF, + DB_SNAPSHOT, + DB_TXN_NOT_DURABLE, + 0 + }; + u_int32_t f, flags, mapped_flag; + int i; + + flags = 0; + for (i = 0; (f = db_flags[i]) != 0; i++) { + mapped_flag = 0; + __db_map_flags(dbp, &f, &mapped_flag); + __bam_map_flags(dbp, &f, &mapped_flag); + __ram_map_flags(dbp, &f, &mapped_flag); +#ifdef HAVE_QUEUE + __qam_map_flags(dbp, &f, &mapped_flag); +#endif + DB_ASSERT(dbp->env, f == 0); + if (F_ISSET(dbp, mapped_flag) == mapped_flag) + LF_SET(db_flags[i]); + } + + *flagsp = flags; + return (0); +} + +/* + * __db_set_flags -- + * DB->set_flags. + * + * PUBLIC: int __db_set_flags __P((DB *, u_int32_t)); + */ +int +__db_set_flags(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + ENV *env; + int ret; + + env = dbp->env; + + if (LF_ISSET(DB_ENCRYPT) && !CRYPTO_ON(env)) { + __db_errx(env, DB_STR("0508", + "Database environment not configured for encryption")); + return (EINVAL); + } + if (LF_ISSET(DB_TXN_NOT_DURABLE)) + ENV_REQUIRES_CONFIG(env, + env->tx_handle, "DB_NOT_DURABLE", DB_INIT_TXN); + + __db_map_flags(dbp, &flags, &dbp->flags); + + if ((ret = __bam_set_flags(dbp, &flags)) != 0) + return (ret); + if ((ret = __ram_set_flags(dbp, &flags)) != 0) + return (ret); +#ifdef HAVE_QUEUE + if ((ret = __qam_set_flags(dbp, &flags)) != 0) + return (ret); +#endif + + return (flags == 0 ? 0 : __db_ferr(env, "DB->set_flags", 0)); +} + +/* + * __db_get_lorder -- + * Get whether lorder is swapped or not. + * + * PUBLIC: int __db_get_lorder __P((DB *, int *)); + */ +int +__db_get_lorder(dbp, db_lorderp) + DB *dbp; + int *db_lorderp; +{ + int ret; + + /* Flag if the specified byte order requires swapping. */ + switch (ret = __db_byteorder(dbp->env, 1234)) { + case 0: + *db_lorderp = F_ISSET(dbp, DB_AM_SWAP) ? 4321 : 1234; + break; + case DB_SWAPBYTES: + *db_lorderp = F_ISSET(dbp, DB_AM_SWAP) ? 1234 : 4321; + break; + default: + return (ret); + /* NOTREACHED */ + } + + return (0); +} + +/* + * __db_set_lorder -- + * Set whether lorder is swapped or not. + * + * PUBLIC: int __db_set_lorder __P((DB *, int)); + */ +int +__db_set_lorder(dbp, db_lorder) + DB *dbp; + int db_lorder; +{ + int ret; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_lorder"); + + /* Flag if the specified byte order requires swapping. */ + switch (ret = __db_byteorder(dbp->env, db_lorder)) { + case 0: + F_CLR(dbp, DB_AM_SWAP); + break; + case DB_SWAPBYTES: + F_SET(dbp, DB_AM_SWAP); + break; + default: + return (ret); + /* NOTREACHED */ + } + return (0); +} + +static int +__db_get_alloc(dbp, mal_funcp, real_funcp, free_funcp) + DB *dbp; + void *(**mal_funcp) __P((size_t)); + void *(**real_funcp) __P((void *, size_t)); + void (**free_funcp) __P((void *)); +{ + DB_ILLEGAL_IN_ENV(dbp, "DB->get_alloc"); + + return (__env_get_alloc(dbp->dbenv, mal_funcp, + real_funcp, free_funcp)); +} + +static int +__db_set_alloc(dbp, mal_func, real_func, free_func) + DB *dbp; + void *(*mal_func) __P((size_t)); + void *(*real_func) __P((void *, size_t)); + void (*free_func) __P((void *)); +{ + DB_ILLEGAL_IN_ENV(dbp, "DB->set_alloc"); + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_alloc"); + + return (__env_set_alloc(dbp->dbenv, mal_func, real_func, free_func)); +} + +static void +__db_get_msgcall(dbp, msgcallp) + DB *dbp; + void (**msgcallp) __P((const DB_ENV *, const char *)); +{ + __env_get_msgcall(dbp->dbenv, msgcallp); +} + +static void +__db_set_msgcall(dbp, msgcall) + DB *dbp; + void (*msgcall) __P((const DB_ENV *, const char *)); +{ + __env_set_msgcall(dbp->dbenv, msgcall); +} + +static void +__db_get_msgfile(dbp, msgfilep) + DB *dbp; + FILE **msgfilep; +{ + __env_get_msgfile(dbp->dbenv, msgfilep); +} + +static void +__db_set_msgfile(dbp, msgfile) + DB *dbp; + FILE *msgfile; +{ + __env_set_msgfile(dbp->dbenv, msgfile); +} + +static int +__db_get_pagesize(dbp, db_pagesizep) + DB *dbp; + u_int32_t *db_pagesizep; +{ + *db_pagesizep = dbp->pgsize; + return (0); +} + +/* + * __db_set_pagesize -- + * DB->set_pagesize + * + * PUBLIC: int __db_set_pagesize __P((DB *, u_int32_t)); + */ +int +__db_set_pagesize(dbp, db_pagesize) + DB *dbp; + u_int32_t db_pagesize; +{ + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_pagesize"); + + if (db_pagesize < DB_MIN_PGSIZE) { + __db_errx(dbp->env, DB_STR_A("0509", + "page sizes may not be smaller than %lu", "%lu"), + (u_long)DB_MIN_PGSIZE); + return (EINVAL); + } + if (db_pagesize > DB_MAX_PGSIZE) { + __db_errx(dbp->env, DB_STR_A("0510", + "page sizes may not be larger than %lu", "%lu"), + (u_long)DB_MAX_PGSIZE); + return (EINVAL); + } + + /* + * We don't want anything that's not a power-of-2, as we rely on that + * for alignment of various types on the pages. + */ + if (!POWER_OF_TWO(db_pagesize)) { + __db_errx(dbp->env, DB_STR("0511", + "page sizes must be a power-of-2")); + return (EINVAL); + } + + /* + * XXX + * Should we be checking for a page size that's not a multiple of 512, + * so that we never try and write less than a disk sector? + */ + dbp->pgsize = db_pagesize; + + return (0); +} + +static int +__db_set_paniccall(dbp, paniccall) + DB *dbp; + void (*paniccall) __P((DB_ENV *, int)); +{ + return (__env_set_paniccall(dbp->dbenv, paniccall)); +} + +static int +__db_set_priority(dbp, priority) + DB *dbp; + DB_CACHE_PRIORITY priority; +{ + dbp->priority = priority; + return (0); +} + +static int +__db_get_priority(dbp, priority) + DB *dbp; + DB_CACHE_PRIORITY *priority; +{ + if (dbp->priority == DB_PRIORITY_UNCHANGED) + return (__memp_get_priority(dbp->mpf, priority)); + else + *priority = dbp->priority; + + return (0); +} diff --git a/src/db/db_open.c b/src/db/db_open.c new file mode 100644 index 00000000..5d2f6434 --- /dev/null +++ b/src/db/db_open.c @@ -0,0 +1,800 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/btree.h" +#include "dbinc/crypto.h" +#include "dbinc/hmac.h" +#include "dbinc/fop.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +/* + * __db_open -- + * DB->open method. + * + * This routine gets called in three different ways: + * + * 1. It can be called to open a file/database. In this case, subdb will + * be NULL and meta_pgno will be PGNO_BASE_MD. + * 2. It can be called to open a subdatabase during normal operation. In + * this case, name and subname will both be non-NULL and meta_pgno will + * be PGNO_BASE_MD (also PGNO_INVALID). + * 3. It can be called to open an in-memory database (name == NULL; + * subname = name). + * 4. It can be called during recovery to open a file/database, in which case + * name will be non-NULL, subname will be NULL, and meta-pgno will be + * PGNO_BASE_MD. + * 5. It can be called during recovery to open a subdatabase, in which case + * name will be non-NULL, subname may be NULL and meta-pgno will be + * a valid pgno (i.e., not PGNO_BASE_MD). + * 6. It can be called during recovery to open an in-memory database. + * + * PUBLIC: int __db_open __P((DB *, DB_THREAD_INFO *, DB_TXN *, + * PUBLIC: const char *, const char *, DBTYPE, u_int32_t, int, db_pgno_t)); + */ +int +__db_open(dbp, ip, txn, fname, dname, type, flags, mode, meta_pgno) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *fname, *dname; + DBTYPE type; + u_int32_t flags; + int mode; + db_pgno_t meta_pgno; +{ + DB *tdbp; + ENV *env; + int ret; + u_int32_t id; + + env = dbp->env; + id = TXN_INVALID; + + /* + * We must flush any existing pages before truncating the file + * since they could age out of mpool and overwrite new pages. + */ + if (LF_ISSET(DB_TRUNCATE)) { + if ((ret = __db_create_internal(&tdbp, dbp->env, 0)) != 0) + goto err; + ret = __db_open(tdbp, ip, txn, fname, dname, DB_UNKNOWN, + DB_NOERROR | (flags & ~(DB_TRUNCATE|DB_CREATE)), + mode, meta_pgno); + if (ret == 0) + ret = __memp_ftruncate(tdbp->mpf, txn, ip, 0, 0); + (void)__db_close(tdbp, txn, DB_NOSYNC); + if (ret != 0 && ret != ENOENT && ret != EINVAL) + goto err; + ret = 0; + } + + DB_TEST_RECOVERY(dbp, DB_TEST_PREOPEN, ret, fname); + + /* + * If the environment was configured with threads, the DB handle + * must also be free-threaded, so we force the DB_THREAD flag on. + * (See SR #2033 for why this is a requirement--recovery needs + * to be able to grab a dbp using __db_fileid_to_dbp, and it has + * no way of knowing which dbp goes with which thread, so whichever + * one it finds has to be usable in any of them.) + */ + if (F_ISSET(env, ENV_THREAD)) + LF_SET(DB_THREAD); + + /* Convert any DB->open flags. */ + if (LF_ISSET(DB_RDONLY)) + F_SET(dbp, DB_AM_RDONLY); + if (LF_ISSET(DB_READ_UNCOMMITTED)) + F_SET(dbp, DB_AM_READ_UNCOMMITTED); + + if (IS_REAL_TXN(txn)) + F_SET(dbp, DB_AM_TXN); + + /* Fill in the type. */ + dbp->type = type; + + /* Save the file and database names. */ + if ((fname != NULL && + (ret = __os_strdup(env, fname, &dbp->fname)) != 0)) + goto err; + if ((dname != NULL && + (ret = __os_strdup(env, dname, &dbp->dname)) != 0)) + goto err; + + /* + * If both fname and subname are NULL, it's always a create, so make + * sure that we have both DB_CREATE and a type specified. It would + * be nice if this checking were done in __db_open where most of the + * interface checking is done, but this interface (__db_dbopen) is + * used by the recovery and limbo system, so we need to safeguard + * this interface as well. + */ + if (fname == NULL) { + if (dbp->p_internal != NULL) { + __db_errx(env, DB_STR("0634", + "Partitioned databases may not be in memory.")); + return (ENOENT); + } + if (dname == NULL) { + if (!LF_ISSET(DB_CREATE)) { + __db_errx(env, DB_STR("0635", + "DB_CREATE must be specified to create databases.")); + return (ENOENT); + } + + F_SET(dbp, DB_AM_INMEM); + F_SET(dbp, DB_AM_CREATED); + + if (dbp->type == DB_UNKNOWN) { + __db_errx(env, DB_STR("0636", + "DBTYPE of unknown without existing file")); + return (EINVAL); + } + + if (dbp->pgsize == 0) + dbp->pgsize = DB_DEF_IOSIZE; + + /* + * If the file is a temporary file and we're + * doing locking, then we have to create a + * unique file ID. We can't use our normal + * dev/inode pair (or whatever this OS uses + * in place of dev/inode pairs) because no + * backing file will be created until the + * mpool cache is filled forcing the buffers + * to disk. Grab a random locker ID to use + * as a file ID. The created ID must never + * match a potential real file ID -- we know + * it won't because real file IDs contain a + * time stamp after the dev/inode pair, and + * we're simply storing a 4-byte value. + + * !!! + * Store the locker in the file id structure + * -- we can get it from there as necessary, + * and it saves having two copies. + */ + if (LOCKING_ON(env) && (ret = __lock_id(env, + (u_int32_t *)dbp->fileid, NULL)) != 0) + return (ret); + } else + MAKE_INMEM(dbp); + + /* + * Normally we would do handle locking here, however, with + * in-memory files, we cannot do any database manipulation + * until the mpool is open, so it happens later. + */ + } else if (dname == NULL && meta_pgno == PGNO_BASE_MD) { + /* Open/create the underlying file. Acquire locks. */ + if ((ret = __fop_file_setup(dbp, ip, + txn, fname, mode, flags, &id)) != 0) + return (ret); + /* + * If we are creating the first sub-db then this is the + * call to create the master db and we tried to open it + * read-only. The create will force it to be read/write + * So clear the RDONLY flag if we just created it. + */ + if (!F_ISSET(dbp, DB_AM_RDONLY)) + LF_CLR(DB_RDONLY); + } else { + if (dbp->p_internal != NULL) { + __db_errx(env, DB_STR("0637", + "Partitioned databases may not be included with multiple databases.")); + return (ENOENT); + } + if ((ret = __fop_subdb_setup(dbp, ip, + txn, fname, dname, mode, flags)) != 0) + return (ret); + meta_pgno = dbp->meta_pgno; + } + + /* Set up the underlying environment. */ + if ((ret = __env_setup(dbp, txn, fname, dname, id, flags)) != 0) + return (ret); + + /* For in-memory databases, we now need to open/create the database. */ + if (F_ISSET(dbp, DB_AM_INMEM)) { + if (dname == NULL) + ret = __db_new_file(dbp, ip, txn, NULL, NULL); + else { + id = TXN_INVALID; + ret = __fop_file_setup(dbp, + ip, txn, dname, mode, flags, &id); + } + if (ret != 0) + goto err; + } + + switch (dbp->type) { + case DB_BTREE: + ret = __bam_open(dbp, ip, txn, fname, meta_pgno, flags); + break; + case DB_HASH: + ret = __ham_open(dbp, ip, txn, fname, meta_pgno, flags); + break; + case DB_HEAP: + ret = __heap_open(dbp, + ip, txn, fname, meta_pgno, flags); + break; + case DB_RECNO: + ret = __ram_open(dbp, ip, txn, fname, meta_pgno, flags); + break; + case DB_QUEUE: + ret = __qam_open( + dbp, ip, txn, fname, meta_pgno, mode, flags); + break; + case DB_UNKNOWN: + return ( + __db_unknown_type(env, "__db_dbopen", dbp->type)); + } + if (ret != 0) + goto err; + +#ifdef HAVE_PARTITION + if (dbp->p_internal != NULL && (ret = + __partition_open(dbp, ip, txn, fname, type, flags, mode, 1)) != 0) + goto err; +#endif + DB_TEST_RECOVERY(dbp, DB_TEST_POSTOPEN, ret, fname); + + /* + * Temporary files don't need handle locks, so we only have to check + * for a handle lock downgrade or lockevent in the case of named + * files. + */ + if (!F_ISSET(dbp, DB_AM_RECOVER) && (fname != NULL || dname != NULL) && + LOCK_ISSET(dbp->handle_lock)) { + if (IS_REAL_TXN(txn)) + ret = __txn_lockevent(env, + txn, dbp, &dbp->handle_lock, dbp->locker); + else if (LOCKING_ON(env)) + /* Trade write handle lock for read handle lock. */ + ret = __lock_downgrade(env, + &dbp->handle_lock, DB_LOCK_READ, 0); + } +DB_TEST_RECOVERY_LABEL +err: + PERFMON4(env, + db, open, (char *) fname, (char *) dname, flags, &dbp->fileid[0]); + return (ret); +} + +/* + * __db_get_open_flags -- + * Accessor for flags passed into DB->open call + * + * PUBLIC: int __db_get_open_flags __P((DB *, u_int32_t *)); + */ +int +__db_get_open_flags(dbp, flagsp) + DB *dbp; + u_int32_t *flagsp; +{ + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_open_flags"); + + *flagsp = dbp->open_flags; + return (0); +} + +/* + * __db_new_file -- + * Create a new database file. + * + * PUBLIC: int __db_new_file __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *)); + */ +int +__db_new_file(dbp, ip, txn, fhp, name) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DB_FH *fhp; + const char *name; +{ + int ret; + + /* + * For in-memory database, it is created by mpool and doesn't + * take any lock, so temporarily turn off the lock checking here. + */ + if (F_ISSET(dbp, DB_AM_INMEM)) + LOCK_CHECK_OFF(ip); + + switch (dbp->type) { + case DB_BTREE: + case DB_RECNO: + ret = __bam_new_file(dbp, ip, txn, fhp, name); + break; + case DB_HASH: + ret = __ham_new_file(dbp, ip, txn, fhp, name); + break; + case DB_HEAP: + ret = __heap_new_file(dbp, ip, txn, fhp, name); + break; + case DB_QUEUE: + ret = __qam_new_file(dbp, ip, txn, fhp, name); + break; + case DB_UNKNOWN: + default: + __db_errx(dbp->env, DB_STR_A("0638", + "%s: Invalid type %d specified", "%s %d"), + name, dbp->type); + ret = EINVAL; + break; + } + + DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOGMETA, ret, name); + /* Sync the file in preparation for moving it into place. */ + if (ret == 0 && fhp != NULL) + ret = __os_fsync(dbp->env, fhp); + + DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name); + + if (F_ISSET(dbp, DB_AM_INMEM)) + LOCK_CHECK_ON(ip); + +DB_TEST_RECOVERY_LABEL + return (ret); +} + +/* + * __db_init_subdb -- + * Initialize the dbp for a subdb. + * + * PUBLIC: int __db_init_subdb __P((DB *, + * PUBLIC: DB *, const char *, DB_THREAD_INFO *, DB_TXN *)); + */ +int +__db_init_subdb(mdbp, dbp, name, ip, txn) + DB *mdbp, *dbp; + const char *name; + DB_THREAD_INFO *ip; + DB_TXN *txn; +{ + DBMETA *meta; + DB_MPOOLFILE *mpf; + int ret, t_ret; + + ret = 0; + if (!F_ISSET(dbp, DB_AM_CREATED)) { + /* Subdb exists; read meta-data page and initialize. */ + mpf = mdbp->mpf; + if ((ret = __memp_fget(mpf, &dbp->meta_pgno, + ip, txn, 0, &meta)) != 0) + goto err; + ret = __db_meta_setup(mdbp->env, dbp, name, meta, 0, 0); + if ((t_ret = __memp_fput(mpf, + ip, meta, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + /* + * If __db_meta_setup found that the meta-page hadn't + * been written out during recovery, we can just return. + */ + if (ret == ENOENT) + ret = 0; + goto err; + } + + /* Handle the create case here. */ + switch (dbp->type) { + case DB_BTREE: + case DB_RECNO: + ret = __bam_new_subdb(mdbp, dbp, ip, txn); + break; + case DB_HASH: + ret = __ham_new_subdb(mdbp, dbp, ip, txn); + break; + case DB_QUEUE: + ret = EINVAL; + break; + case DB_UNKNOWN: + default: + __db_errx(dbp->env, DB_STR_A("0639", + "Invalid subdatabase type %d specified", "%d"), + dbp->type); + return (EINVAL); + } + +err: return (ret); +} + +/* + * __db_chk_meta -- + * Take a buffer containing a meta-data page and check it for a valid LSN, + * checksum (and verify the checksum if necessary) and possibly decrypt it. + * + * Return 0 on success, >0 (errno) on error, -1 on checksum mismatch. + * + * PUBLIC: int __db_chk_meta __P((ENV *, DB *, DBMETA *, u_int32_t)); + */ +int +__db_chk_meta(env, dbp, meta, flags) + ENV *env; + DB *dbp; + DBMETA *meta; + u_int32_t flags; +{ + DB_LSN swap_lsn; + int is_hmac, ret, swapped; + u_int32_t magic, orig_chk; + u_int8_t *chksum; + + ret = 0; + swapped = 0; + + if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM)) { + if (dbp != NULL) + F_SET(dbp, DB_AM_CHKSUM); + + is_hmac = meta->encrypt_alg == 0 ? 0 : 1; + chksum = ((BTMETA *)meta)->chksum; + + /* + * If we need to swap, the checksum function overwrites the + * original checksum with 0, so we need to save a copy of the + * original for swapping later. + */ + orig_chk = *(u_int32_t *)chksum; + + /* + * We cannot add this to __db_metaswap because that gets done + * later after we've verified the checksum or decrypted. + */ + if (LF_ISSET(DB_CHK_META)) { + swapped = 0; +chk_retry: if ((ret = + __db_check_chksum(env, NULL, env->crypto_handle, + chksum, meta, DBMETASIZE, is_hmac)) != 0) { + if (is_hmac || swapped) + return (ret); + + M_32_SWAP(orig_chk); + swapped = 1; + *(u_int32_t *)chksum = orig_chk; + goto chk_retry; + } + } + } else if (dbp != NULL) + F_CLR(dbp, DB_AM_CHKSUM); + +#ifdef HAVE_CRYPTO + ret = __crypto_decrypt_meta(env, + dbp, (u_int8_t *)meta, LF_ISSET(DB_CHK_META)); +#endif + + /* Now that we're decrypted, we can check LSN. */ + if (LOGGING_ON(env) && !LF_ISSET(DB_CHK_NOLSN)) { + /* + * This gets called both before and after swapping, so we + * need to check ourselves. If we already swapped it above, + * we'll know that here. + */ + + swap_lsn = meta->lsn; + magic = meta->magic; +lsn_retry: + if (swapped) { + M_32_SWAP(swap_lsn.file); + M_32_SWAP(swap_lsn.offset); + M_32_SWAP(magic); + } + switch (magic) { + case DB_BTREEMAGIC: + case DB_HASHMAGIC: + case DB_HEAPMAGIC: + case DB_QAMMAGIC: + case DB_RENAMEMAGIC: + break; + default: + if (swapped) + return (EINVAL); + swapped = 1; + goto lsn_retry; + } + if (!IS_REP_CLIENT(env) && + !IS_NOT_LOGGED_LSN(swap_lsn) && !IS_ZERO_LSN(swap_lsn)) + /* Need to do check. */ + ret = __log_check_page_lsn(env, dbp, &swap_lsn); + } + return (ret); +} + +/* + * __db_meta_setup -- + * + * Take a buffer containing a meta-data page and figure out if it's + * valid, and if so, initialize the dbp from the meta-data page. + * + * PUBLIC: int __db_meta_setup __P((ENV *, + * PUBLIC: DB *, const char *, DBMETA *, u_int32_t, u_int32_t)); + */ +int +__db_meta_setup(env, dbp, name, meta, oflags, flags) + ENV *env; + DB *dbp; + const char *name; + DBMETA *meta; + u_int32_t oflags; + u_int32_t flags; +{ + u_int32_t magic; + int ret; + + ret = 0; + + /* + * Figure out what access method we're dealing with, and then + * call access method specific code to check error conditions + * based on conflicts between the found file and application + * arguments. A found file overrides some user information -- + * we don't consider it an error, for example, if the user set + * an expected byte order and the found file doesn't match it. + */ + F_CLR(dbp, DB_AM_SWAP | DB_AM_IN_RENAME); + magic = meta->magic; + +swap_retry: + switch (magic) { + case DB_BTREEMAGIC: + case DB_HASHMAGIC: + case DB_HEAPMAGIC: + case DB_QAMMAGIC: + case DB_RENAMEMAGIC: + break; + case 0: + /* + * The only time this should be 0 is if we're in the + * midst of opening a subdb during recovery and that + * subdatabase had its meta-data page allocated, but + * not yet initialized. + */ + if (F_ISSET(dbp, DB_AM_SUBDB) && ((IS_RECOVERING(env) && + F_ISSET(env->lg_handle, DBLOG_FORCE_OPEN)) || + meta->pgno != PGNO_INVALID)) + return (ENOENT); + + goto bad_format; + default: + if (F_ISSET(dbp, DB_AM_SWAP)) + goto bad_format; + + M_32_SWAP(magic); + F_SET(dbp, DB_AM_SWAP); + goto swap_retry; + } + + /* + * We can only check the meta page if we are sure we have a meta page. + * If it is random data, then this check can fail. So only now can we + * checksum and decrypt. Don't distinguish between configuration and + * checksum match errors here, because we haven't opened the database + * and even a checksum error isn't a reason to panic the environment. + */ + if ((ret = __db_chk_meta(env, dbp, meta, flags)) != 0) { + if (ret == -1) + __db_errx(env, DB_STR_A("0640", + "%s: metadata page checksum error", "%s"), name); + goto bad_format; + } + + switch (magic) { + case DB_BTREEMAGIC: + if (dbp->type != DB_UNKNOWN && + dbp->type != DB_RECNO && dbp->type != DB_BTREE) + goto bad_format; + + flags = meta->flags; + if (F_ISSET(dbp, DB_AM_SWAP)) + M_32_SWAP(flags); + if (LF_ISSET(BTM_RECNO)) + dbp->type = DB_RECNO; + else + dbp->type = DB_BTREE; + if ((oflags & DB_TRUNCATE) == 0 && (ret = + __bam_metachk(dbp, name, (BTMETA *)meta)) != 0) + return (ret); + break; + case DB_HASHMAGIC: + if (dbp->type != DB_UNKNOWN && dbp->type != DB_HASH) + goto bad_format; + + dbp->type = DB_HASH; + if ((oflags & DB_TRUNCATE) == 0 && (ret = + __ham_metachk(dbp, name, (HMETA *)meta)) != 0) + return (ret); + break; + case DB_HEAPMAGIC: + if (dbp->type != DB_UNKNOWN && dbp->type != DB_HEAP) + goto bad_format; + + dbp->type = DB_HEAP; + if ((oflags & DB_TRUNCATE) == 0 && (ret = + __heap_metachk(dbp, name, (HEAPMETA *)meta)) != 0) + return (ret); + break; + case DB_QAMMAGIC: + if (dbp->type != DB_UNKNOWN && dbp->type != DB_QUEUE) + goto bad_format; + dbp->type = DB_QUEUE; + if ((oflags & DB_TRUNCATE) == 0 && (ret = + __qam_metachk(dbp, name, (QMETA *)meta)) != 0) + return (ret); + break; + case DB_RENAMEMAGIC: + F_SET(dbp, DB_AM_IN_RENAME); + + /* Copy the file's ID. */ + memcpy(dbp->fileid, ((DBMETA *)meta)->uid, DB_FILE_ID_LEN); + + break; + default: + goto bad_format; + } + + if (FLD_ISSET(meta->metaflags, + DBMETA_PART_RANGE | DBMETA_PART_CALLBACK)) + if ((ret = + __partition_init(dbp, meta->metaflags)) != 0) + return (ret); + return (0); + +bad_format: + if (F_ISSET(dbp, DB_AM_RECOVER)) + ret = ENOENT; + else + __db_errx(env, DB_STR_A("0641", + "__db_meta_setup: %s: unexpected file type or format", + "%s"), name); + return (ret == 0 ? EINVAL : ret); +} + +/* + * __db_reopen -- + * Reopen a subdatabase if its meta/root pages move. + * PUBLIC: int __db_reopen __P((DBC *)); + */ +int +__db_reopen(arg_dbc) + DBC *arg_dbc; +{ + BTREE *bt; + DBC *dbc; + DB_TXN *txn; + HASH *ht; + DB *dbp, *mdbp; + DB_LOCK new_lock, old_lock; + PAGE *new_page, *old_page; + db_pgno_t newpgno, oldpgno; + int ret, t_ret; + + dbc = arg_dbc; + dbp = dbc->dbp; + old_page = new_page = NULL; + mdbp = NULL; + + COMPQUIET(bt, NULL); + COMPQUIET(ht, NULL); + COMPQUIET(txn, NULL); + LOCK_INIT(new_lock); + LOCK_INIT(old_lock); + + /* + * This must be done in the context of a transaction. If the + * requester does not have a transaction, create one. + */ + + if (TXN_ON(dbp->env) && (txn = dbc->txn) == NULL) { + if ((ret = __txn_begin(dbp->env, + dbc->thread_info, NULL, &txn, 0)) != 0) + return (ret); + if ((ret = __db_cursor(dbp, + dbc->thread_info, txn, &dbc, 0)) != 0) { + (void)__txn_abort(txn); + return (ret); + } + } + + /* + * Lock and latch the old metadata page before re-opening the + * database so that the information is stable. Then lock + * and latch the new page before getting the revision so that + * it cannot change. + */ + + if (dbp->type == DB_HASH) { + ht = (HASH*)dbp->h_internal; + oldpgno = ht->meta_pgno; + } else { + bt = (BTREE *)dbp->bt_internal; + oldpgno = bt->bt_root; + } + if (STD_LOCKING(dbc) && (ret = __db_lget(dbc, + 0, oldpgno, DB_LOCK_READ, 0, &old_lock)) != 0) + goto err; + + if ((ret = __memp_fget(dbp->mpf, &oldpgno, + dbc->thread_info, dbc->txn, 0, &old_page)) != 0 && + ret != DB_PAGE_NOTFOUND) + goto err; + + /* If the page is free we must not hold its lock. */ + if (ret == DB_PAGE_NOTFOUND || TYPE(old_page) == P_INVALID) { + if ((ret = __LPUT(dbc, old_lock)) != 0) + goto err; + /* Drop the latch too. */ + if (old_page != NULL && (ret = __memp_fput(dbp->mpf, + dbc->thread_info, old_page, dbc->priority)) != 0) + goto err; + old_page = NULL; + } + + if ((ret = __db_master_open(dbp, + dbc->thread_info, dbc->txn, dbp->fname, 0, 0, &mdbp)) != 0) + goto err; + + if ((ret = __db_master_update(mdbp, dbp, dbc->thread_info, + dbc->txn, dbp->dname, dbp->type, MU_OPEN, NULL, 0)) != 0) + goto err; + + if (dbp->type == DB_HASH) + newpgno = ht->meta_pgno = dbp->meta_pgno; + else { + bt->bt_meta = dbp->meta_pgno; + if ((ret = __bam_read_root(dbp, + dbc->thread_info, dbc->txn, bt->bt_meta, 0)) != 0) + goto err; + newpgno = bt->bt_root; + } + + if (oldpgno == newpgno) + goto done; + + if (STD_LOCKING(dbc) && (ret = __db_lget(dbc, + 0, newpgno, DB_LOCK_READ, 0, &new_lock)) != 0) + goto err; + + if ((ret = __memp_fget(dbp->mpf, &newpgno, + dbc->thread_info, dbc->txn, 0, &new_page)) != 0) + goto err; + +done: if (dbp->type == DB_HASH) + ht->revision = dbp->mpf->mfp->revision; + else + bt->revision = dbp->mpf->mfp->revision; + +err: if (old_page != NULL && (t_ret = __memp_fput(dbp->mpf, + dbc->thread_info, old_page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (new_page != NULL && (t_ret = __memp_fput(dbp->mpf, + dbc->thread_info, new_page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + if (mdbp != NULL && + (t_ret = __db_close(mdbp, dbc->txn, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + + if (dbc != arg_dbc) { + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __txn_commit(txn, 0)) != 0 && ret == 0) + ret = t_ret; + } + return (ret); +} diff --git a/src/db/db_overflow.c b/src/db/db_overflow.c new file mode 100644 index 00000000..fb83c938 --- /dev/null +++ b/src/db/db_overflow.c @@ -0,0 +1,705 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/mp.h" + +/* + * Big key/data code. + * + * Big key and data entries are stored on linked lists of pages. The initial + * reference is a structure with the total length of the item and the page + * number where it begins. Each entry in the linked list contains a pointer + * to the next page of data, and so on. + */ + +/* + * __db_goff -- + * Get an offpage item. + * + * PUBLIC: int __db_goff __P((DBC *, + * PUBLIC: DBT *, u_int32_t, db_pgno_t, void **, u_int32_t *)); + */ +int +__db_goff(dbc, dbt, tlen, pgno, bpp, bpsz) + DBC *dbc; + DBT *dbt; + u_int32_t tlen; + db_pgno_t pgno; + void **bpp; + u_int32_t *bpsz; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + DB_TXN *txn; + DBC_INTERNAL *cp; + ENV *env; + PAGE *h; + DB_THREAD_INFO *ip; + db_indx_t bytes; + u_int32_t curoff, needed, start; + u_int8_t *p, *src; + int ret; + + dbp = dbc->dbp; + cp = dbc->internal; + env = dbp->env; + ip = dbc->thread_info; + mpf = dbp->mpf; + txn = dbc->txn; + + /* + * Check if the buffer is big enough; if it is not and we are + * allowed to malloc space, then we'll malloc it. If we are + * not (DB_DBT_USERMEM), then we'll set the dbt and return + * appropriately. + */ + if (F_ISSET(dbt, DB_DBT_PARTIAL)) { + start = dbt->doff; + if (start > tlen) + needed = 0; + else if (dbt->dlen > tlen - start) + needed = tlen - start; + else + needed = dbt->dlen; + } else { + start = 0; + needed = tlen; + } + + /* + * If the caller has not requested any data, return success. This + * "early-out" also avoids setting up the streaming optimization when + * no page would be retrieved. If it were removed, the streaming code + * should only initialize when needed is not 0. + */ + if (needed == 0) { + dbt->size = 0; + return (0); + } + + if (F_ISSET(dbt, DB_DBT_USERCOPY)) + goto skip_alloc; + + /* Allocate any necessary memory. */ + if (F_ISSET(dbt, DB_DBT_USERMEM)) { + if (needed > dbt->ulen) { + dbt->size = needed; + return (DB_BUFFER_SMALL); + } + } else if (F_ISSET(dbt, DB_DBT_MALLOC)) { + if ((ret = __os_umalloc(env, needed, &dbt->data)) != 0) + return (ret); + } else if (F_ISSET(dbt, DB_DBT_REALLOC)) { + if ((ret = __os_urealloc(env, needed, &dbt->data)) != 0) + return (ret); + } else if (bpsz != NULL && (*bpsz == 0 || *bpsz < needed)) { + if ((ret = __os_realloc(env, needed, bpp)) != 0) + return (ret); + *bpsz = needed; + dbt->data = *bpp; + } else if (bpp != NULL) + dbt->data = *bpp; + else { + DB_ASSERT(env, + F_ISSET(dbt, + DB_DBT_USERMEM | DB_DBT_MALLOC | DB_DBT_REALLOC) || + bpsz != NULL); + return (DB_BUFFER_SMALL); + } + +skip_alloc: + /* Set up a start page in the overflow chain if streaming. */ + if (cp->stream_start_pgno != PGNO_INVALID && + pgno == cp->stream_start_pgno && start >= cp->stream_off && + start < cp->stream_off + P_MAXSPACE(dbp, dbp->pgsize)) { + pgno = cp->stream_curr_pgno; + curoff = cp->stream_off; + } else { + cp->stream_start_pgno = cp->stream_curr_pgno = pgno; + cp->stream_off = curoff = 0; + } + + /* + * Step through the linked list of pages, copying the data on each + * one into the buffer. Never copy more than the total data length. + */ + dbt->size = needed; + for (p = dbt->data; pgno != PGNO_INVALID && needed > 0;) { + if ((ret = __memp_fget(mpf, + &pgno, ip, txn, 0, &h)) != 0) + return (ret); + DB_ASSERT(env, TYPE(h) == P_OVERFLOW); + + /* Check if we need any bytes from this page. */ + if (curoff + OV_LEN(h) >= start) { + bytes = OV_LEN(h); + src = (u_int8_t *)h + P_OVERHEAD(dbp); + if (start > curoff) { + src += start - curoff; + bytes -= start - curoff; + } + if (bytes > needed) + bytes = needed; + if (F_ISSET(dbt, DB_DBT_USERCOPY)) { + /* + * The offset into the DBT is the total size + * less the amount of data still needed. Care + * needs to be taken if doing a partial copy + * beginning at an offset other than 0. + */ + if ((ret = env->dbt_usercopy( + dbt, dbt->size - needed, + src, bytes, DB_USERCOPY_SETDATA)) != 0) { + (void)__memp_fput(mpf, + ip, h, dbp->priority); + return (ret); + } + } else + memcpy(p, src, bytes); + p += bytes; + needed -= bytes; + } + cp->stream_off = curoff; + curoff += OV_LEN(h); + cp->stream_curr_pgno = pgno; + pgno = h->next_pgno; + (void)__memp_fput(mpf, ip, h, dbp->priority); + } + + return (0); +} + +/* + * __db_poff -- + * Put an offpage item. + * + * PUBLIC: int __db_poff __P((DBC *, const DBT *, db_pgno_t *)); + */ +int +__db_poff(dbc, dbt, pgnop) + DBC *dbc; + const DBT *dbt; + db_pgno_t *pgnop; +{ + DB *dbp; + DBT tmp_dbt; + DB_LSN null_lsn; + DB_MPOOLFILE *mpf; + PAGE *pagep, *lastp; + db_indx_t pagespace; + db_pgno_t pgno; + u_int32_t space, sz, tlen; + u_int8_t *p; + int ret, t_ret; + + /* + * Allocate pages and copy the key/data item into them. Calculate the + * number of bytes we get for pages we fill completely with a single + * item. + */ + dbp = dbc->dbp; + lastp = NULL; + mpf = dbp->mpf; + pagespace = P_MAXSPACE(dbp, dbp->pgsize); + p = dbt->data; + sz = dbt->size; + + /* + * Check whether we are streaming at the end of the overflow item. + * If so, the last pgno and offset will be cached in the cursor. + */ + if (F_ISSET(dbt, DB_DBT_STREAMING)) { + tlen = dbt->size - dbt->dlen; + pgno = dbc->internal->stream_curr_pgno; + if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, &lastp)) != 0) + return (ret); + + /* + * Calculate how much we can write on the last page of the + * overflow item. + */ + DB_ASSERT(dbp->env, + OV_LEN(lastp) == (tlen - dbc->internal->stream_off)); + space = pagespace - OV_LEN(lastp); + + /* Only copy as much data as we have. */ + if (space > dbt->dlen) + space = dbt->dlen; + + if (DBC_LOGGING(dbc)) { + tmp_dbt.data = dbt->data; + tmp_dbt.size = space; + ZERO_LSN(null_lsn); + if ((ret = __db_big_log(dbp, dbc->txn, &LSN(lastp), 0, + OP_SET(DB_APPEND_BIG, lastp), pgno, + PGNO_INVALID, PGNO_INVALID, &tmp_dbt, + &LSN(lastp), &null_lsn, &null_lsn)) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(lastp)); + + memcpy((u_int8_t *)lastp + P_OVERHEAD(dbp) + OV_LEN(lastp), + dbt->data, space); + OV_LEN(lastp) += space; + sz -= space + dbt->doff; + p += space; + *pgnop = dbc->internal->stream_start_pgno; + } + + ret = 0; + for (; sz > 0; p += pagespace, sz -= pagespace) { + /* + * Reduce pagespace so we terminate the loop correctly and + * don't copy too much data. + */ + if (sz < pagespace) + pagespace = sz; + + /* + * Allocate and initialize a new page and copy all or part of + * the item onto the page. If sz is less than pagespace, we + * have a partial record. + */ + if ((ret = __db_new(dbc, P_OVERFLOW, NULL, &pagep)) != 0) + break; + if (DBC_LOGGING(dbc)) { + tmp_dbt.data = p; + tmp_dbt.size = pagespace; + ZERO_LSN(null_lsn); + if ((ret = __db_big_log(dbp, dbc->txn, &LSN(pagep), 0, + OP_SET(DB_ADD_BIG, pagep), + PGNO(pagep), lastp ? PGNO(lastp) : PGNO_INVALID, + PGNO_INVALID, &tmp_dbt, &LSN(pagep), + lastp == NULL ? &null_lsn : &LSN(lastp), + &null_lsn)) != 0) { + (void)__memp_fput(mpf, dbc->thread_info, + pagep, dbc->priority); + goto err; + } + } else + LSN_NOT_LOGGED(LSN(pagep)); + + /* Move LSN onto page. */ + if (lastp != NULL) + LSN(lastp) = LSN(pagep); + + OV_LEN(pagep) = pagespace; + OV_REF(pagep) = 1; + memcpy((u_int8_t *)pagep + P_OVERHEAD(dbp), p, pagespace); + + /* + * If this is the first entry, update the user's info and + * initialize the cursor to allow for streaming of subsequent + * updates. Otherwise, update the entry on the last page + * filled in and release that page. + */ + if (lastp == NULL) { + *pgnop = PGNO(pagep); + dbc->internal->stream_start_pgno = + dbc->internal->stream_curr_pgno = *pgnop; + dbc->internal->stream_off = 0; + } else { + lastp->next_pgno = PGNO(pagep); + pagep->prev_pgno = PGNO(lastp); + if ((ret = __memp_fput(mpf, + dbc->thread_info, lastp, dbc->priority)) != 0) { + lastp = NULL; + goto err; + } + } + lastp = pagep; + } +err: if (lastp != NULL) { + if (ret == 0) { + dbc->internal->stream_curr_pgno = PGNO(lastp); + dbc->internal->stream_off = dbt->size - OV_LEN(lastp); + } + + if ((t_ret = __memp_fput(mpf, dbc->thread_info, lastp, + dbc->priority)) != 0 && ret == 0) + ret = t_ret; + } + return (ret); +} + +/* + * __db_ovref -- + * Decrement the reference count on an overflow page. + * + * PUBLIC: int __db_ovref __P((DBC *, db_pgno_t)); + */ +int +__db_ovref(dbc, pgno) + DBC *dbc; + db_pgno_t pgno; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + PAGE *h; + int ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + + if ((ret = __memp_fget(mpf, &pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &h)) != 0) + return (ret); + + if (DBC_LOGGING(dbc)) { + if ((ret = __db_ovref_log(dbp, + dbc->txn, &LSN(h), 0, h->pgno, -1, &LSN(h))) != 0) { + (void)__memp_fput(mpf, + dbc->thread_info, h, dbc->priority); + return (ret); + } + } else + LSN_NOT_LOGGED(LSN(h)); + + /* + * In BDB releases before 4.5, the overflow reference counts were + * incremented when an overflow item was split onto an internal + * page. There was a lock race in that code, and rather than fix + * the race, we changed BDB to copy overflow items when splitting + * them onto internal pages. The code to decrement reference + * counts remains so databases already in the field continue to + * work. + */ + --OV_REF(h); + + return (__memp_fput(mpf, dbc->thread_info, h, dbc->priority)); +} + +/* + * __db_doff -- + * Delete an offpage chain of overflow pages. + * + * PUBLIC: int __db_doff __P((DBC *, db_pgno_t)); + */ +int +__db_doff(dbc, pgno) + DBC *dbc; + db_pgno_t pgno; +{ + DB *dbp; + DBT tmp_dbt; + DB_LSN null_lsn; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + + do { + if ((ret = __memp_fget(mpf, &pgno, + dbc->thread_info, dbc->txn, 0, &pagep)) != 0) + return (ret); + + DB_ASSERT(dbp->env, TYPE(pagep) == P_OVERFLOW); + /* + * If it's referenced by more than one key/data item, + * decrement the reference count and return. + */ + if (OV_REF(pagep) > 1) { + (void)__memp_fput(mpf, + dbc->thread_info, pagep, dbc->priority); + return (__db_ovref(dbc, pgno)); + } + + if ((ret = __memp_dirty(mpf, &pagep, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) { + if (pagep != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, pagep, dbc->priority); + return (ret); + } + + if (DBC_LOGGING(dbc)) { + tmp_dbt.data = (u_int8_t *)pagep + P_OVERHEAD(dbp); + tmp_dbt.size = OV_LEN(pagep); + ZERO_LSN(null_lsn); + if ((ret = __db_big_log(dbp, dbc->txn, &LSN(pagep), 0, + OP_SET(DB_REM_BIG, pagep), PGNO(pagep), + PREV_PGNO(pagep), NEXT_PGNO(pagep), &tmp_dbt, + &LSN(pagep), &null_lsn, &null_lsn)) != 0) { + (void)__memp_fput(mpf, + dbc->thread_info, pagep, dbc->priority); + return (ret); + } + } else + LSN_NOT_LOGGED(LSN(pagep)); + pgno = pagep->next_pgno; + OV_LEN(pagep) = 0; + if ((ret = __db_free(dbc, pagep, 0)) != 0) + return (ret); + } while (pgno != PGNO_INVALID); + + return (0); +} + +/* + * __db_moff -- + * Match on overflow pages. + * + * Given a starting page number and a key, return <0, 0, >0 to indicate if the + * key on the page is less than, equal to or greater than the key specified. + * We optimize this by doing chunk at a time comparison unless the user has + * specified a comparison function. In this case, we need to materialize + * the entire object and call their comparison routine. + * + * __db_moff and __db_coff are generic functions useful in searching and + * ordering off page items. __db_moff matches an overflow DBT with an offpage + * item. __db_coff compares two offpage items for lexicographic sort order. + * + * PUBLIC: int __db_moff __P((DBC *, const DBT *, db_pgno_t, u_int32_t, + * PUBLIC: int (*)(DB *, const DBT *, const DBT *), int *)); + */ +int +__db_moff(dbc, dbt, pgno, tlen, cmpfunc, cmpp) + DBC *dbc; + const DBT *dbt; + db_pgno_t pgno; + u_int32_t tlen; + int (*cmpfunc) __P((DB *, const DBT *, const DBT *)), *cmpp; +{ + DB *dbp; + DBT local_dbt; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + void *buf; + u_int32_t bufsize, cmp_bytes, key_left; + u_int8_t *p1, *p2; + int ret; + + dbp = dbc->dbp; + ip = dbc->thread_info; + mpf = dbp->mpf; + + /* + * If there is a user-specified comparison function, build a + * contiguous copy of the key, and call it. + */ + if (cmpfunc != NULL) { + memset(&local_dbt, 0, sizeof(local_dbt)); + buf = NULL; + bufsize = 0; + + if ((ret = __db_goff(dbc, + &local_dbt, tlen, pgno, &buf, &bufsize)) != 0) + return (ret); + /* Pass the key as the first argument */ + *cmpp = cmpfunc(dbp, dbt, &local_dbt); + __os_free(dbp->env, buf); + return (0); + } + + /* While there are both keys to compare. */ + for (*cmpp = 0, p1 = dbt->data, + key_left = dbt->size; key_left > 0 && pgno != PGNO_INVALID;) { + if ((ret = + __memp_fget(mpf, &pgno, ip, dbc->txn, 0, &pagep)) != 0) + return (ret); + + cmp_bytes = OV_LEN(pagep) < key_left ? OV_LEN(pagep) : key_left; + tlen -= cmp_bytes; + key_left -= cmp_bytes; + for (p2 = (u_int8_t *)pagep + P_OVERHEAD(dbp); + cmp_bytes-- > 0; ++p1, ++p2) + if (*p1 != *p2) { + *cmpp = (long)*p1 - (long)*p2; + break; + } + pgno = NEXT_PGNO(pagep); + if ((ret = __memp_fput(mpf, ip, pagep, dbp->priority)) != 0) + return (ret); + if (*cmpp != 0) + return (0); + } + if (key_left > 0) /* DBT is longer than the page key. */ + *cmpp = 1; + else if (tlen > 0) /* DBT is shorter than the page key. */ + *cmpp = -1; + else + *cmpp = 0; + + return (0); +} + +/* + * __db_coff -- + * Match two offpage dbts. + * + * The DBTs must both refer to offpage items. + * The match happens a chunk (page) at a time unless a user defined comparison + * function exists. It is not possible to optimize this comparison away when + * a lexicographic sort order is required on mismatch. + * + * NOTE: For now this function only works for H_OFFPAGE type items. It would + * be simple to extend it for use with B_OVERFLOW type items. It would only + * require extracting the total length, and page number, dependent on the + * DBT type. + * + * PUBLIC: int __db_coff __P((DBC *, const DBT *, const DBT *, + * PUBLIC: int (*)(DB *, const DBT *, const DBT *), int *)); + */ +int +__db_coff(dbc, dbt, match, cmpfunc, cmpp) + DBC *dbc; + const DBT *dbt, *match; + int (*cmpfunc) __P((DB *, const DBT *, const DBT *)), *cmpp; +{ + DB *dbp; + DB_THREAD_INFO *ip; + DB_MPOOLFILE *mpf; + DB_TXN *txn; + DBT local_key, local_match; + PAGE *dbt_pagep, *match_pagep; + db_pgno_t dbt_pgno, match_pgno; + u_int32_t cmp_bytes, dbt_bufsz, dbt_len, match_bufsz; + u_int32_t match_len, max_data, page_sz; + u_int8_t *p1, *p2; + int ret; + void *dbt_buf, *match_buf; + + dbp = dbc->dbp; + ip = dbc->thread_info; + txn = dbc->txn; + mpf = dbp->mpf; + page_sz = dbp->pgsize; + *cmpp = 0; + dbt_buf = match_buf = NULL; + + DB_ASSERT(dbp->env, HPAGE_PTYPE(dbt->data) == H_OFFPAGE); + DB_ASSERT(dbp->env, HPAGE_PTYPE(match->data) == H_OFFPAGE); + + /* Extract potentially unaligned length and pgno fields from DBTs */ + memcpy(&dbt_len, HOFFPAGE_TLEN(dbt->data), sizeof(u_int32_t)); + memcpy(&dbt_pgno, HOFFPAGE_PGNO(dbt->data), sizeof(db_pgno_t)); + memcpy(&match_len, HOFFPAGE_TLEN(match->data), sizeof(u_int32_t)); + memcpy(&match_pgno, HOFFPAGE_PGNO(match->data), sizeof(db_pgno_t)); + max_data = (dbt_len < match_len ? dbt_len : match_len); + + /* + * If there is a custom comparator, fully resolve both DBTs. + * Then call the users comparator. + */ + if (cmpfunc != NULL) { + memset(&local_key, 0, sizeof(local_key)); + memset(&local_match, 0, sizeof(local_match)); + dbt_buf = match_buf = NULL; + dbt_bufsz = match_bufsz = 0; + + if ((ret = __db_goff(dbc, &local_key, dbt_len, + dbt_pgno, &dbt_buf, &dbt_bufsz)) != 0) + goto err1; + if ((ret = __db_goff(dbc, &local_match, match_len, + match_pgno, &match_buf, &match_bufsz)) != 0) + goto err1; + /* The key needs to be the first argument for sort order */ + *cmpp = cmpfunc(dbp, &local_key, &local_match); + +err1: if (dbt_buf != NULL) + __os_free(dbp->env, dbt_buf); + if (match_buf != NULL) + __os_free(dbp->env, match_buf); + return (ret); + } + + /* Match the offpage DBTs a page at a time. */ + while (dbt_pgno != PGNO_INVALID && match_pgno != PGNO_INVALID) { + if ((ret = + __memp_fget(mpf, &dbt_pgno, ip, txn, 0, &dbt_pagep)) != 0) + return (ret); + if ((ret = + __memp_fget(mpf, &match_pgno, + ip, txn, 0, &match_pagep)) != 0) { + (void)__memp_fput( + mpf, ip, dbt_pagep, DB_PRIORITY_UNCHANGED); + return (ret); + } + cmp_bytes = page_sz < max_data ? page_sz : max_data; + for (p1 = (u_int8_t *)dbt_pagep + P_OVERHEAD(dbp), + p2 = (u_int8_t *)match_pagep + P_OVERHEAD(dbp); + cmp_bytes-- > 0; ++p1, ++p2) + if (*p1 != *p2) { + *cmpp = (long)*p1 - (long)*p2; + break; + } + + dbt_pgno = NEXT_PGNO(dbt_pagep); + match_pgno = NEXT_PGNO(match_pagep); + max_data -= page_sz; + if ((ret = __memp_fput(mpf, + ip, dbt_pagep, DB_PRIORITY_UNCHANGED)) != 0) { + (void)__memp_fput(mpf, + ip, match_pagep, DB_PRIORITY_UNCHANGED); + return (ret); + } + if ((ret = __memp_fput(mpf, + ip, match_pagep, DB_PRIORITY_UNCHANGED)) != 0) + return (ret); + if (*cmpp != 0) + return (0); + } + + /* If a lexicographic mismatch was found, then the result has already + * been returned. If the DBTs matched, consider the lengths of the + * items, and return appropriately. + */ + if (dbt_len > match_len) /* DBT is longer than the match key. */ + *cmpp = 1; + else if (match_len > dbt_len) /* DBT is shorter than the match key. */ + *cmpp = -1; + else + *cmpp = 0; + + return (0); + +} diff --git a/src/db/db_ovfl_vrfy.c b/src/db/db_ovfl_vrfy.c new file mode 100644 index 00000000..2956b137 --- /dev/null +++ b/src/db/db_ovfl_vrfy.c @@ -0,0 +1,410 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/db_verify.h" +#include "dbinc/mp.h" + +/* + * __db_vrfy_overflow -- + * Verify overflow page. + * + * PUBLIC: int __db_vrfy_overflow __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, + * PUBLIC: u_int32_t)); + */ +int +__db_vrfy_overflow(dbp, vdp, h, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *h; + db_pgno_t pgno; + u_int32_t flags; +{ + VRFY_PAGEINFO *pip; + int isbad, ret, t_ret; + + isbad = 0; + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + + pip->refcount = OV_REF(h); + if (pip->refcount < 1) { + EPRINT((dbp->env, DB_STR_A("0676", + "Page %lu: overflow page has zero reference count", "%lu"), + (u_long)pgno)); + isbad = 1; + } + + /* Just store for now. */ + pip->olen = HOFFSET(h); + +err: if ((t_ret = __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0) + ret = t_ret; + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +/* + * __db_vrfy_ovfl_structure -- + * Walk a list of overflow pages, avoiding cycles and marking + * pages seen. + * + * PUBLIC: int __db_vrfy_ovfl_structure + * PUBLIC: __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, u_int32_t)); + */ +int +__db_vrfy_ovfl_structure(dbp, vdp, pgno, tlen, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + u_int32_t tlen; + u_int32_t flags; +{ + DB *pgset; + ENV *env; + VRFY_PAGEINFO *pip; + db_pgno_t next, prev; + int isbad, ret, seen_cnt, t_ret; + u_int32_t refcount; + + env = dbp->env; + pgset = vdp->pgset; + DB_ASSERT(env, pgset != NULL); + isbad = 0; + + /* This shouldn't happen, but just to be sure. */ + if (!IS_VALID_PGNO(pgno)) + return (DB_VERIFY_BAD); + + /* + * Check the first prev_pgno; it ought to be PGNO_INVALID, + * since there's no prev page. + */ + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + /* The refcount is stored on the first overflow page. */ + refcount = pip->refcount; + + if (pip->type != P_OVERFLOW) { + EPRINT((env, DB_STR_A("0677", + "Page %lu: overflow page of invalid type %lu", "%lu %lu"), + (u_long)pgno, (u_long)pip->type)); + ret = DB_VERIFY_BAD; + goto err; /* Unsafe to continue. */ + } + + prev = pip->prev_pgno; + if (prev != PGNO_INVALID) { + EPRINT((env, DB_STR_A("0678", + "Page %lu: first page in overflow chain has a prev_pgno %lu", + "%lu %lu"), (u_long)pgno, (u_long)prev)); + isbad = 1; + } + + for (;;) { + /* + * We may have seen this page elsewhere, if the overflow entry + * has been promoted to an internal page; we just want to + * make sure that each overflow page is seen exactly as many + * times as its refcount dictates. + * + * Note that this code also serves to keep us from looping + * infinitely if there's a cycle in an overflow chain. + */ + if ((ret = __db_vrfy_pgset_get(pgset, + vdp->thread_info, vdp->txn, pgno, &seen_cnt)) != 0) + goto err; + if ((u_int32_t)seen_cnt > refcount) { + EPRINT((env, DB_STR_A("0679", + "Page %lu: encountered too many times in overflow traversal", + "%lu"), (u_long)pgno)); + ret = DB_VERIFY_BAD; + goto err; + } + if ((ret = __db_vrfy_pgset_inc( + pgset, vdp->thread_info, vdp->txn, pgno)) != 0) + goto err; + + /* + * Each overflow page can be referenced multiple times, + * because it's possible for overflow Btree keys to get + * promoted to internal pages. We want to make sure that + * each page is referenced from a Btree leaf (or Hash data + * page, which we consider a "leaf" here) exactly once; if + * the parent was a leaf, set a flag to indicate that we've + * seen this page in a leaf context. + * + * If the parent is not a leaf--in which case it's a Btree + * internal page--we don't need to bother doing any further + * verification, as we'll do it when we hit the leaf (or + * complain that we never saw the leaf). Only the first + * page in an overflow chain should ever have a refcount + * greater than 1, and the combination of the LEAFSEEN check + * and the fact that we bail after the first page for + * non-leaves should ensure this. + * + * Note that each "child" of a page, such as an overflow page, + * is stored and verified in a structure check exactly once, + * so this code does not need to contend with the fact that + * overflow chains used as Btree duplicate keys may be + * referenced multiply from a single Btree leaf page. + */ + if (LF_ISSET(DB_ST_OVFL_LEAF)) { + if (F_ISSET(pip, VRFY_OVFL_LEAFSEEN)) { + EPRINT((env, DB_STR_A("0680", + "Page %lu: overflow page linked twice from leaf or data page", + "%lu"), (u_long)pgno)); + ret = DB_VERIFY_BAD; + goto err; + } + F_SET(pip, VRFY_OVFL_LEAFSEEN); + } + + /* + * We want to verify each overflow chain only once, and + * although no chain should be linked more than once from a + * leaf page, we can't guarantee that it'll be linked that + * once if it's linked from an internal page and the key + * is gone. + * + * seen_cnt is the number of times we'd encountered this page + * before calling this function. + */ + if (seen_cnt == 0) { + /* + * Keep a running tab on how much of the item we've + * seen. + */ + tlen -= pip->olen; + + /* Send the application feedback about our progress. */ + if (!LF_ISSET(DB_SALVAGE)) + __db_vrfy_struct_feedback(dbp, vdp); + } else + goto done; + + next = pip->next_pgno; + + /* Are we there yet? */ + if (next == PGNO_INVALID) + break; + + /* + * We've already checked this when we saved it, but just + * to be sure... + */ + if (!IS_VALID_PGNO(next)) { + EPRINT((env, DB_STR_A("0681", + "Page %lu: bad next_pgno %lu on overflow page", + "%lu %lu"), (u_long)pgno, (u_long)next)); + ret = DB_VERIFY_BAD; + goto err; + } + + if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 || + (ret = __db_vrfy_getpageinfo(vdp, next, &pip)) != 0) + return (ret); + if (pip->prev_pgno != pgno) { + EPRINT((env, DB_STR_A("0682", + "Page %lu: bad prev_pgno %lu on overflow page (should be %lu)", + "%lu %lu %lu"), (u_long)next, + (u_long)pip->prev_pgno, (u_long)pgno)); + isbad = 1; + /* + * It's safe to continue because we have separate + * cycle detection. + */ + } + + pgno = next; + } + + if (tlen > 0) { + isbad = 1; + EPRINT((env, DB_STR_A("0683", + "Page %lu: overflow item incomplete", "%lu"), + (u_long)pgno)); + } + +done: +err: if ((t_ret = + __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +/* + * __db_safe_goff -- + * Get an overflow item, very carefully, from an untrusted database, + * in the context of the salvager. + * + * PUBLIC: int __db_safe_goff __P((DB *, VRFY_DBINFO *, + * PUBLIC: db_pgno_t, DBT *, void *, u_int32_t *, u_int32_t)); + */ +int +__db_safe_goff(dbp, vdp, pgno, dbt, buf, bufsz, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + DBT *dbt; + void *buf; + u_int32_t *bufsz; + u_int32_t flags; +{ + DB_MPOOLFILE *mpf; + PAGE *h; + int ret, t_ret; + u_int32_t bytesgot, bytes; + u_int8_t *src, *dest; + + mpf = dbp->mpf; + h = NULL; + ret = t_ret = 0; + bytesgot = bytes = 0; + + DB_ASSERT(dbp->env, bufsz != NULL); + + /* + * Back up to the start of the overflow chain (if necessary) via the + * prev pointer of the overflow page. This guarantees we transverse the + * longest possible chains of overflow pages and won't be called again + * with a pgno earlier in the chain, stepping on ourselves. + */ + for (;;) { + if ((ret = __memp_fget( + mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0) + return (ret); + + if (PREV_PGNO(h) == PGNO_INVALID || + !IS_VALID_PGNO(PREV_PGNO(h))) + break; + + pgno = PREV_PGNO(h); + + if ((ret = __memp_fput(mpf, + vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0) + return (ret); + } + if ((ret = __memp_fput( + mpf, vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0) + return (ret); + + h = NULL; + + while ((pgno != PGNO_INVALID) && (IS_VALID_PGNO(pgno))) { + /* + * Mark that we're looking at this page; if we've seen it + * already, quit. + */ + if ((ret = __db_salvage_markdone(vdp, pgno)) != 0) + break; + + if ((ret = __memp_fget(mpf, &pgno, + vdp->thread_info, NULL, 0, &h)) != 0) + break; + + /* + * Make sure it's really an overflow page, unless we're + * being aggressive, in which case we pretend it is. + */ + if (!LF_ISSET(DB_AGGRESSIVE) && TYPE(h) != P_OVERFLOW) { + ret = DB_VERIFY_BAD; + break; + } + + src = (u_int8_t *)h + P_OVERHEAD(dbp); + bytes = OV_LEN(h); + + if (bytes + P_OVERHEAD(dbp) > dbp->pgsize) + bytes = dbp->pgsize - P_OVERHEAD(dbp); + + /* + * Realloc if buf is too small + */ + if (bytesgot + bytes > *bufsz) { + if ((ret = + __os_realloc(dbp->env, bytesgot + bytes, buf)) != 0) + break; + *bufsz = bytesgot + bytes; + } + + dest = *(u_int8_t **)buf + bytesgot; + bytesgot += bytes; + + memcpy(dest, src, bytes); + + pgno = NEXT_PGNO(h); + + if ((ret = __memp_fput(mpf, + vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0) + break; + h = NULL; + } + + /* + * If we're being aggressive, salvage a partial datum if there + * was an error somewhere along the way. + */ + if (ret == 0 || LF_ISSET(DB_AGGRESSIVE)) { + dbt->size = bytesgot; + dbt->data = *(void **)buf; + } + + /* If we broke out on error, don't leave pages pinned. */ + if (h != NULL && (t_ret = __memp_fput(mpf, + vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} diff --git a/src/db/db_pr.c b/src/db/db_pr.c new file mode 100644 index 00000000..3add6a92 --- /dev/null +++ b/src/db/db_pr.c @@ -0,0 +1,1921 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" +#include "dbinc/qam.h" +#include "dbinc/db_verify.h" + +static int __db_bmeta __P((ENV *, DB *, BTMETA *, u_int32_t)); +static int __db_heapmeta __P((ENV *, DB *, HEAPMETA *, u_int32_t)); +static int __db_heapint __P((DB *, HEAPPG *, u_int32_t)); +static int __db_hmeta __P((ENV *, DB *, HMETA *, u_int32_t)); +static void __db_meta __P((ENV *, DB *, DBMETA *, FN const *, u_int32_t)); +static void __db_proff __P((ENV *, DB_MSGBUF *, void *)); +static int __db_qmeta __P((ENV *, DB *, QMETA *, u_int32_t)); +#ifdef HAVE_STATISTICS +static void __db_prdb __P((DB *, u_int32_t)); +static int __db_prtree __P((DB *, DB_TXN *, + u_int32_t, db_pgno_t, db_pgno_t)); +#endif + +/* + * __db_loadme -- + * A nice place to put a breakpoint. + * + * PUBLIC: void __db_loadme __P((void)); + */ +void +__db_loadme() +{ + pid_t pid; + + __os_id(NULL, &pid, NULL); +} + +#ifdef HAVE_STATISTICS +/* + * __db_dumptree -- + * Dump the tree to a file. + * + * PUBLIC: int __db_dumptree __P((DB *, DB_TXN *, + * PUBLIC: char *, char *, db_pgno_t, db_pgno_t)); + */ +int +__db_dumptree(dbp, txn, op, name, first, last) + DB *dbp; + DB_TXN *txn; + char *op, *name; + db_pgno_t first, last; +{ + ENV *env; + FILE *fp, *orig_fp; + u_int32_t flags; + int ret; + + env = dbp->env; + + for (flags = 0; *op != '\0'; ++op) + switch (*op) { + case 'a': + LF_SET(DB_PR_PAGE); + break; + case 'h': + break; + case 'r': + LF_SET(DB_PR_RECOVERYTEST); + break; + default: + return (EINVAL); + } + + if (name != NULL) { + if ((fp = fopen(name, "w")) == NULL) + return (__os_get_errno()); + + orig_fp = dbp->dbenv->db_msgfile; + dbp->dbenv->db_msgfile = fp; + } else + fp = orig_fp = NULL; + + __db_prdb(dbp, flags); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + + ret = __db_prtree(dbp, txn, flags, first, last); + + if (fp != NULL) { + (void)fclose(fp); + env->dbenv->db_msgfile = orig_fp; + } + + return (ret); +} + +static const FN __db_flags_fn[] = { + { DB_AM_CHKSUM, "checksumming" }, + { DB_AM_COMPENSATE, "created by compensating transaction" }, + { DB_AM_CREATED, "database created" }, + { DB_AM_CREATED_MSTR, "encompassing file created" }, + { DB_AM_DBM_ERROR, "dbm/ndbm error" }, + { DB_AM_DELIMITER, "variable length" }, + { DB_AM_DISCARD, "discard cached pages" }, + { DB_AM_DUP, "duplicates" }, + { DB_AM_DUPSORT, "sorted duplicates" }, + { DB_AM_ENCRYPT, "encrypted" }, + { DB_AM_FIXEDLEN, "fixed-length records" }, + { DB_AM_INMEM, "in-memory" }, + { DB_AM_IN_RENAME, "file is being renamed" }, + { DB_AM_NOT_DURABLE, "changes not logged" }, + { DB_AM_OPEN_CALLED, "open called" }, + { DB_AM_PAD, "pad value" }, + { DB_AM_PGDEF, "default page size" }, + { DB_AM_RDONLY, "read-only" }, + { DB_AM_READ_UNCOMMITTED, "read-uncommitted" }, + { DB_AM_RECNUM, "Btree record numbers" }, + { DB_AM_RECOVER, "opened for recovery" }, + { DB_AM_RENUMBER, "renumber" }, + { DB_AM_REVSPLITOFF, "no reverse splits" }, + { DB_AM_SECONDARY, "secondary" }, + { DB_AM_SNAPSHOT, "load on open" }, + { DB_AM_SUBDB, "subdatabases" }, + { DB_AM_SWAP, "needswap" }, + { DB_AM_TXN, "transactional" }, + { DB_AM_VERIFYING, "verifier" }, + { 0, NULL } +}; + +/* + * __db_get_flags_fn -- + * Return the __db_flags_fn array. + * + * PUBLIC: const FN * __db_get_flags_fn __P((void)); + */ +const FN * +__db_get_flags_fn() +{ + return (__db_flags_fn); +} + +/* + * __db_prdb -- + * Print out the DB structure information. + */ +static void +__db_prdb(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + BTREE *bt; + DB_MSGBUF mb; + ENV *env; + HASH *h; + QUEUE *q; + HEAP *hp; + + env = dbp->env; + + DB_MSGBUF_INIT(&mb); + __db_msg(env, "In-memory DB structure:"); + __db_msgadd(env, &mb, "%s: %#lx", + __db_dbtype_to_string(dbp->type), (u_long)dbp->flags); + __db_prflags(env, &mb, dbp->flags, __db_flags_fn, " (", ")"); + DB_MSGBUF_FLUSH(env, &mb); + + switch (dbp->type) { + case DB_BTREE: + case DB_RECNO: + bt = dbp->bt_internal; + __db_msg(env, "bt_meta: %lu bt_root: %lu", + (u_long)bt->bt_meta, (u_long)bt->bt_root); + __db_msg(env, "bt_minkey: %lu", (u_long)bt->bt_minkey); + if (!LF_ISSET(DB_PR_RECOVERYTEST)) + __db_msg(env, "bt_compare: %#lx bt_prefix: %#lx", + P_TO_ULONG(bt->bt_compare), + P_TO_ULONG(bt->bt_prefix)); +#ifdef HAVE_COMPRESSION + if (!LF_ISSET(DB_PR_RECOVERYTEST)) + __db_msg(env, "bt_compress: %#lx bt_decompress: %#lx", + P_TO_ULONG(bt->bt_compress), + P_TO_ULONG(bt->bt_decompress)); +#endif + __db_msg(env, "bt_lpgno: %lu", (u_long)bt->bt_lpgno); + if (dbp->type == DB_RECNO) { + __db_msg(env, + "re_pad: %#lx re_delim: %#lx re_len: %lu re_source: %s", + (u_long)bt->re_pad, (u_long)bt->re_delim, + (u_long)bt->re_len, + bt->re_source == NULL ? "" : bt->re_source); + __db_msg(env, + "re_modified: %d re_eof: %d re_last: %lu", + bt->re_modified, bt->re_eof, (u_long)bt->re_last); + } + break; + case DB_HASH: + h = dbp->h_internal; + __db_msg(env, "meta_pgno: %lu", (u_long)h->meta_pgno); + __db_msg(env, "h_ffactor: %lu", (u_long)h->h_ffactor); + __db_msg(env, "h_nelem: %lu", (u_long)h->h_nelem); + if (!LF_ISSET(DB_PR_RECOVERYTEST)) + __db_msg(env, "h_hash: %#lx", P_TO_ULONG(h->h_hash)); + break; + case DB_QUEUE: + q = dbp->q_internal; + __db_msg(env, "q_meta: %lu", (u_long)q->q_meta); + __db_msg(env, "q_root: %lu", (u_long)q->q_root); + __db_msg(env, "re_pad: %#lx re_len: %lu", + (u_long)q->re_pad, (u_long)q->re_len); + __db_msg(env, "rec_page: %lu", (u_long)q->rec_page); + __db_msg(env, "page_ext: %lu", (u_long)q->page_ext); + break; + case DB_HEAP: + hp = dbp->heap_internal; + __db_msg(env, "gbytes: %lu", (u_long)hp->gbytes); + __db_msg(env, "bytes: %lu", (u_long)hp->bytes); + __db_msg(env, "curregion: %lu", (u_long)hp->curregion); + __db_msg(env, "maxpgno: %lu", (u_long)hp->maxpgno); + break; + case DB_UNKNOWN: + default: + break; + } +} + +/* + * __db_prtree -- + * Print out the entire tree. + */ +static int +__db_prtree(dbp, txn, flags, first, last) + DB *dbp; + DB_TXN *txn; + u_int32_t flags; + db_pgno_t first, last; +{ + DB_MPOOLFILE *mpf; + PAGE *h; + db_pgno_t i; + int ret; + + mpf = dbp->mpf; + + if (dbp->type == DB_QUEUE) + return (__db_prqueue(dbp, flags)); + + /* + * Find out the page number of the last page in the database, then + * dump each page. + */ + if (last == PGNO_INVALID && + (ret = __memp_get_last_pgno(mpf, &last)) != 0) + return (ret); + for (i = first; i <= last; ++i) { + if ((ret = __memp_fget(mpf, &i, NULL, txn, 0, &h)) != 0) + return (ret); + (void)__db_prpage(dbp, h, flags); + if ((ret = __memp_fput(mpf, NULL, h, dbp->priority)) != 0) + return (ret); + } + + return (0); +} + +/* + * __db_prnpage + * -- Print out a specific page. + * + * PUBLIC: int __db_prnpage __P((DB *, DB_TXN *, db_pgno_t)); + */ +int +__db_prnpage(dbp, txn, pgno) + DB *dbp; + DB_TXN *txn; + db_pgno_t pgno; +{ + DB_MPOOLFILE *mpf; + PAGE *h; + int ret, t_ret; + + mpf = dbp->mpf; + + if ((ret = __memp_fget(mpf, &pgno, NULL, txn, 0, &h)) != 0) + return (ret); + + ret = __db_prpage(dbp, h, DB_PR_PAGE); + + if ((t_ret = __memp_fput(mpf, NULL, h, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_prpage + * -- Print out a page. + * + * PUBLIC: int __db_prpage __P((DB *, PAGE *, u_int32_t)); + */ +int +__db_prpage(dbp, h, flags) + DB *dbp; + PAGE *h; + u_int32_t flags; +{ + DB_MSGBUF mb; + u_int32_t pagesize; + /* + * !!! + * Find out the page size. We don't want to do it the "right" way, + * by reading the value from the meta-data page, that's going to be + * slow. Reach down into the mpool region. + */ + pagesize = (u_int32_t)dbp->mpf->mfp->pagesize; + DB_MSGBUF_INIT(&mb); + return (__db_prpage_int(dbp->env, + &mb, dbp, "", h, pagesize, NULL, flags)); +} + +/* + * __db_lockmode_to_string -- + * Return the name of the lock mode. + * + * PUBLIC: const char * __db_lockmode_to_string __P((db_lockmode_t)); + */ +const char * +__db_lockmode_to_string(mode) + db_lockmode_t mode; +{ + switch (mode) { + case DB_LOCK_NG: + return ("Not granted"); + case DB_LOCK_READ: + return ("Shared/read"); + case DB_LOCK_WRITE: + return ("Exclusive/write"); + case DB_LOCK_WAIT: + return ("Wait for event"); + case DB_LOCK_IWRITE: + return ("Intent exclusive/write"); + case DB_LOCK_IREAD: + return ("Intent shared/read"); + case DB_LOCK_IWR: + return ("Intent to read/write"); + case DB_LOCK_READ_UNCOMMITTED: + return ("Read uncommitted"); + case DB_LOCK_WWRITE: + return ("Was written"); + default: + break; + } + return ("UNKNOWN LOCK MODE"); +} + +#else /* !HAVE_STATISTICS */ + +/* + * __db_dumptree -- + * Dump the tree to a file. + * + * PUBLIC: int __db_dumptree __P((DB *, DB_TXN *, + * PUBLIC: char *, char *, db_pgno_t, db_pgno_t)); + */ +int +__db_dumptree(dbp, txn, op, name, first, last) + DB *dbp; + DB_TXN *txn; + char *op, *name; + db_pgno_t first, last; +{ + COMPQUIET(txn, NULL); + COMPQUIET(op, NULL); + COMPQUIET(name, NULL); + COMPQUIET(first, last); + + return (__db_stat_not_built(dbp->env)); +} + +/* + * __db_get_flags_fn -- + * Return the __db_flags_fn array. + * + * PUBLIC: const FN * __db_get_flags_fn __P((void)); + */ +const FN * +__db_get_flags_fn() +{ + /* + * !!! + * The Tcl API uses this interface, stub it off. + */ + return (NULL); +} +#endif + +/* + * __db_meta -- + * Print out common metadata information. + */ +static void +__db_meta(env, dbp, dbmeta, fn, flags) + DB *dbp; + ENV *env; + DBMETA *dbmeta; + FN const *fn; + u_int32_t flags; +{ + DB_MPOOLFILE *mpf; + DB_MSGBUF mb; + PAGE *h; + db_pgno_t pgno; + u_int8_t *p; + int cnt, ret; + const char *sep; + + DB_MSGBUF_INIT(&mb); + + __db_msg(env, "\tmagic: %#lx", (u_long)dbmeta->magic); + __db_msg(env, "\tversion: %lu", (u_long)dbmeta->version); + __db_msg(env, "\tpagesize: %lu", (u_long)dbmeta->pagesize); + __db_msg(env, "\ttype: %lu", (u_long)dbmeta->type); + __db_msg(env, "\tmetaflags %#lx", (u_long)dbmeta->metaflags); + __db_msg(env, "\tkeys: %lu\trecords: %lu", + (u_long)dbmeta->key_count, (u_long)dbmeta->record_count); + if (dbmeta->nparts) + __db_msg(env, "\tnparts: %lu", (u_long)dbmeta->nparts); + + /* + * If we're doing recovery testing, don't display the free list, + * it may have changed and that makes the dump diff not work. + */ + if (dbp != NULL && !LF_ISSET(DB_PR_RECOVERYTEST)) { + mpf = dbp->mpf; + __db_msgadd( + env, &mb, "\tfree list: %lu", (u_long)dbmeta->free); + for (pgno = dbmeta->free, + cnt = 0, sep = ", "; pgno != PGNO_INVALID;) { + if ((ret = __memp_fget(mpf, + &pgno, NULL, NULL, 0, &h)) != 0) { + DB_MSGBUF_FLUSH(env, &mb); + __db_msg(env, + "Unable to retrieve free-list page: %lu: %s", + (u_long)pgno, db_strerror(ret)); + break; + } + pgno = h->next_pgno; + (void)__memp_fput(mpf, NULL, h, dbp->priority); + __db_msgadd(env, &mb, "%s%lu", sep, (u_long)pgno); + if (++cnt % 10 == 0) { + DB_MSGBUF_FLUSH(env, &mb); + cnt = 0; + sep = "\t"; + } else + sep = ", "; + } + DB_MSGBUF_FLUSH(env, &mb); + __db_msg(env, "\tlast_pgno: %lu", (u_long)dbmeta->last_pgno); + } + + if (fn != NULL) { + DB_MSGBUF_FLUSH(env, &mb); + __db_msgadd(env, &mb, "\tflags: %#lx", (u_long)dbmeta->flags); + __db_prflags(env, &mb, dbmeta->flags, fn, " (", ")"); + } + + DB_MSGBUF_FLUSH(env, &mb); + __db_msgadd(env, &mb, "\tuid: "); + for (p = (u_int8_t *)dbmeta->uid, + cnt = 0; cnt < DB_FILE_ID_LEN; ++cnt) { + __db_msgadd(env, &mb, "%x", *p++); + if (cnt < DB_FILE_ID_LEN - 1) + __db_msgadd(env, &mb, " "); + } + DB_MSGBUF_FLUSH(env, &mb); +} + +/* + * __db_bmeta -- + * Print out the btree meta-data page. + */ +static int +__db_bmeta(env, dbp, h, flags) + ENV *env; + DB *dbp; + BTMETA *h; + u_int32_t flags; +{ + static const FN fn[] = { + { BTM_DUP, "duplicates" }, + { BTM_RECNO, "recno" }, + { BTM_RECNUM, "btree:recnum" }, + { BTM_FIXEDLEN, "recno:fixed-length" }, + { BTM_RENUMBER, "recno:renumber" }, + { BTM_SUBDB, "multiple-databases" }, + { BTM_DUPSORT, "sorted duplicates" }, + { BTM_COMPRESS, "compressed" }, + { 0, NULL } + }; + + __db_meta(env, dbp, (DBMETA *)h, fn, flags); + + __db_msg(env, "\tminkey: %lu", (u_long)h->minkey); + if (F_ISSET(&h->dbmeta, BTM_RECNO)) + __db_msg(env, "\tre_len: %#lx re_pad: %#lx", + (u_long)h->re_len, (u_long)h->re_pad); + __db_msg(env, "\troot: %lu", (u_long)h->root); + + return (0); +} + +/* + * __db_hmeta -- + * Print out the hash meta-data page. + */ +static int +__db_hmeta(env, dbp, h, flags) + ENV *env; + DB *dbp; + HMETA *h; + u_int32_t flags; +{ + static const FN fn[] = { + { DB_HASH_DUP, "duplicates" }, + { DB_HASH_SUBDB, "multiple-databases" }, + { DB_HASH_DUPSORT, "sorted duplicates" }, + { 0, NULL } + }; + DB_MSGBUF mb; + int i; + + DB_MSGBUF_INIT(&mb); + + __db_meta(env, dbp, (DBMETA *)h, fn, flags); + + __db_msg(env, "\tmax_bucket: %lu", (u_long)h->max_bucket); + __db_msg(env, "\thigh_mask: %#lx", (u_long)h->high_mask); + __db_msg(env, "\tlow_mask: %#lx", (u_long)h->low_mask); + __db_msg(env, "\tffactor: %lu", (u_long)h->ffactor); + __db_msg(env, "\tnelem: %lu", (u_long)h->nelem); + __db_msg(env, "\th_charkey: %#lx", (u_long)h->h_charkey); + __db_msgadd(env, &mb, "\tspare points:\n\t"); + for (i = 0; i < NCACHED; i++) { + __db_msgadd(env, &mb, "%lu (%lu) ", (u_long)h->spares[i], + (u_long)(h->spares[i] == 0 ? + 0 : h->spares[i] + (i == 0 ? 0 : 1 << (i-1)))); + if ((i + 1) % 8 == 0) + __db_msgadd(env, &mb, "\n\t"); + } + DB_MSGBUF_FLUSH(env, &mb); + + return (0); +} + +/* + * __db_qmeta -- + * Print out the queue meta-data page. + */ +static int +__db_qmeta(env, dbp, h, flags) + ENV *env; + DB *dbp; + QMETA *h; + u_int32_t flags; +{ + + __db_meta(env, dbp, (DBMETA *)h, NULL, flags); + + __db_msg(env, "\tfirst_recno: %lu", (u_long)h->first_recno); + __db_msg(env, "\tcur_recno: %lu", (u_long)h->cur_recno); + __db_msg(env, "\tre_len: %#lx re_pad: %lu", + (u_long)h->re_len, (u_long)h->re_pad); + __db_msg(env, "\trec_page: %lu", (u_long)h->rec_page); + __db_msg(env, "\tpage_ext: %lu", (u_long)h->page_ext); + + return (0); +} + +/* + * __db_heapmeta -- + * Print out the heap meta-data page. + */ +static int +__db_heapmeta(env, dbp, h, flags) + ENV *env; + DB *dbp; + HEAPMETA *h; + u_int32_t flags; +{ + __db_meta(env, dbp, (DBMETA *)h, NULL, flags); + + __db_msg(env, "\tcurregion: %lu", (u_long)h->curregion); + __db_msg(env, "\tnregions: %lu", (u_long)h->nregions); + __db_msg(env, "\tgbytes: %lu", (u_long)h->gbytes); + __db_msg(env, "\tbytes: %lu", (u_long)h->bytes); + + return (0); +} + +/* + * __db_heapint -- + * Print out the heap internal-data page. + */ +static int +__db_heapint(dbp, h, flags) + DB *dbp; + HEAPPG *h; + u_int32_t flags; +{ + DB_MSGBUF mb; + ENV *env; + int count, printed; + u_int32_t i, max; + u_int8_t avail; + + env = dbp->env; + DB_MSGBUF_INIT(&mb); + count = printed = 0; + COMPQUIET(flags, 0); + + __db_msgadd(env, &mb, "\thigh: %4lu\n", (u_long)h->high_pgno); + /* How many entries could there be on a page */ + max = HEAP_REGION_SIZE(dbp); + + for (i = 0; i < max; i++, count++) { + avail = HEAP_SPACE(dbp, h, i); + if (avail != 0) { + __db_msgadd(env, &mb, + "%5lu:%1lu ", (u_long)i, (u_long)avail); + printed = 1; + } + /* We get 10 entries per line this way */ + if (count == 9) { + DB_MSGBUF_FLUSH(env, &mb); + count = -1; + } + } + /* All pages were less than 33% full */ + if (printed == 0) + __db_msgadd(env, &mb, + "All pages in this region less than 33 percent full"); + + DB_MSGBUF_FLUSH(env, &mb); + return (0); +} + +/* + * For printing pages from the log we may be passed the data segment + * separate from the header, if so then it starts at HOFFSET. + */ +#define PR_ENTRY(dbp, h, i, data) \ + (data == NULL ? P_ENTRY(dbp, h, i) : \ + (u_int8_t *)data + P_INP(dbp, h)[i] - HOFFSET(h)) +/* + * __db_prpage_int + * -- Print out a page. + * + * PUBLIC: int __db_prpage_int __P((ENV *, DB_MSGBUF *, + * PUBLIC: DB *, char *, PAGE *, u_int32_t, u_int8_t *, u_int32_t)); + */ +int +__db_prpage_int(env, mbp, dbp, lead, h, pagesize, data, flags) + ENV *env; + DB_MSGBUF *mbp; + DB *dbp; + char *lead; + PAGE *h; + u_int32_t pagesize; + u_int8_t *data; + u_int32_t flags; +{ + BINTERNAL *bi; + BKEYDATA *bk; + HOFFPAGE a_hkd; + QAMDATA *qp, *qep; + RINTERNAL *ri; + HEAPHDR *hh; + HEAPSPLITHDR *hs; + db_indx_t dlen, len, i, *inp, max; + db_pgno_t pgno; + db_recno_t recno; + u_int32_t qlen; + u_int8_t *ep, *hk, *p; + int deleted, ret; + const char *s; + void *hdata, *sp; + + /* + * If we're doing recovery testing and this page is P_INVALID, + * assume it's a page that's on the free list, and don't display it. + */ + if (LF_ISSET(DB_PR_RECOVERYTEST) && TYPE(h) == P_INVALID) + return (0); + + if ((s = __db_pagetype_to_string(TYPE(h))) == NULL) { + __db_msg(env, "%sILLEGAL PAGE TYPE: page: %lu type: %lu", + lead, (u_long)h->pgno, (u_long)TYPE(h)); + return (EINVAL); + } + + /* Page number, page type. */ + __db_msgadd(env, mbp, "%spage %lu: %s:", lead, (u_long)h->pgno, s); + + /* + * LSNs on a metadata page will be different from the original after an + * abort, in some cases. Don't display them if we're testing recovery. + */ + if (!LF_ISSET(DB_PR_RECOVERYTEST) || + (TYPE(h) != P_BTREEMETA && TYPE(h) != P_HASHMETA && + TYPE(h) != P_QAMMETA && TYPE(h) != P_QAMDATA && + TYPE(h) != P_HEAPMETA)) + __db_msgadd(env, mbp, " LSN [%lu][%lu]:", + (u_long)LSN(h).file, (u_long)LSN(h).offset); + + /* + * Page level (only applicable for Btree/Recno, but we always display + * it, for no particular reason, except for Heap. + */ + if (!HEAPTYPE(h)) + __db_msgadd(env, mbp, " level %lu", (u_long)h->level); + + /* Record count. */ + if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO || + (dbp != NULL && TYPE(h) == P_LRECNO && + h->pgno == ((BTREE *)dbp->bt_internal)->bt_root)) + __db_msgadd(env, mbp, " records: %lu", (u_long)RE_NREC(h)); + DB_MSGBUF_FLUSH(env, mbp); + + switch (TYPE(h)) { + case P_BTREEMETA: + return (__db_bmeta(env, dbp, (BTMETA *)h, flags)); + case P_HASHMETA: + return (__db_hmeta(env, dbp, (HMETA *)h, flags)); + case P_QAMMETA: + return (__db_qmeta(env, dbp, (QMETA *)h, flags)); + case P_QAMDATA: /* Should be meta->start. */ + if (!LF_ISSET(DB_PR_PAGE) || dbp == NULL) + return (0); + + qlen = ((QUEUE *)dbp->q_internal)->re_len; + recno = (h->pgno - 1) * QAM_RECNO_PER_PAGE(dbp) + 1; + i = 0; + qep = (QAMDATA *)((u_int8_t *)h + pagesize - qlen); + for (qp = QAM_GET_RECORD(dbp, h, i); qp < qep; + recno++, i++, qp = QAM_GET_RECORD(dbp, h, i)) { + if (!F_ISSET(qp, QAM_SET)) + continue; + + __db_msgadd(env, mbp, "%s", + F_ISSET(qp, QAM_VALID) ? "\t" : " D"); + __db_msgadd(env, mbp, "[%03lu] %4lu ", (u_long)recno, + (u_long)((u_int8_t *)qp - (u_int8_t *)h)); + __db_prbytes(env, mbp, qp->data, qlen); + } + return (0); + case P_HEAPMETA: + return (__db_heapmeta(env, dbp, (HEAPMETA *)h, flags)); + case P_IHEAP: + if (!LF_ISSET(DB_PR_PAGE)) + return (0); + return (__db_heapint(dbp, (HEAPPG *)h, flags)); + default: + break; + } + + s = "\t"; + if (!HEAPTYPE(h) && TYPE(h) != P_IBTREE && TYPE(h) != P_IRECNO) { + __db_msgadd(env, mbp, "%sprev: %4lu next: %4lu", + s, (u_long)PREV_PGNO(h), (u_long)NEXT_PGNO(h)); + s = " "; + } + + if (HEAPTYPE(h)) { + __db_msgadd(env, mbp, "%shigh indx: %4lu free indx: %4lu", s, + (u_long)HEAP_HIGHINDX(h), (u_long)HEAP_FREEINDX(h)); + s = " "; + } + + if (TYPE(h) == P_OVERFLOW) { + __db_msgadd(env, mbp, + "%sref cnt: %4lu ", s, (u_long)OV_REF(h)); + if (dbp == NULL) + __db_msgadd(env, mbp, + " len: %4lu ", (u_long)OV_LEN(h)); + else + __db_prbytes(env, + mbp, (u_int8_t *)h + P_OVERHEAD(dbp), OV_LEN(h)); + return (0); + } + __db_msgadd(env, mbp, "%sentries: %4lu", s, (u_long)NUM_ENT(h)); + __db_msgadd(env, mbp, " offset: %4lu", (u_long)HOFFSET(h)); + DB_MSGBUF_FLUSH(env, mbp); + + if (dbp == NULL || TYPE(h) == P_INVALID || !LF_ISSET(DB_PR_PAGE)) + return (0); + + if (data != NULL) + pagesize += HOFFSET(h); + else if (pagesize < HOFFSET(h)) + return (0); + + ret = 0; + inp = P_INP(dbp, h); + max = TYPE(h) == P_HEAP ? HEAP_HIGHINDX(h) + 1 : NUM_ENT(h); + for (i = 0; i < max; i++) { + if (TYPE(h) == P_HEAP && inp[i] == 0) + continue; + if ((uintptr_t)(P_ENTRY(dbp, h, i) - (u_int8_t *)h) < + (uintptr_t)(P_OVERHEAD(dbp)) || + (size_t)(P_ENTRY(dbp, h, i) - (u_int8_t *)h) >= pagesize) { + __db_msg(env, + "ILLEGAL PAGE OFFSET: indx: %lu of %lu", + (u_long)i, (u_long)inp[i]); + ret = EINVAL; + continue; + } + deleted = 0; + switch (TYPE(h)) { + case P_HASH_UNSORTED: + case P_HASH: + case P_IBTREE: + case P_IRECNO: + sp = PR_ENTRY(dbp, h, i, data); + break; + case P_HEAP: + sp = P_ENTRY(dbp, h, i); + break; + case P_LBTREE: + sp = PR_ENTRY(dbp, h, i, data); + deleted = i % 2 == 0 && + B_DISSET(GET_BKEYDATA(dbp, h, i + O_INDX)->type); + break; + case P_LDUP: + case P_LRECNO: + sp = PR_ENTRY(dbp, h, i, data); + deleted = B_DISSET(GET_BKEYDATA(dbp, h, i)->type); + break; + default: + goto type_err; + } + __db_msgadd(env, mbp, "%s", deleted ? " D" : "\t"); + __db_msgadd( + env, mbp, "[%03lu] %4lu ", (u_long)i, (u_long)inp[i]); + switch (TYPE(h)) { + case P_HASH_UNSORTED: + case P_HASH: + hk = sp; + switch (HPAGE_PTYPE(hk)) { + case H_OFFDUP: + memcpy(&pgno, + HOFFDUP_PGNO(hk), sizeof(db_pgno_t)); + __db_msgadd(env, mbp, + "%4lu [offpage dups]", (u_long)pgno); + DB_MSGBUF_FLUSH(env, mbp); + break; + case H_DUPLICATE: + /* + * If this is the first item on a page, then + * we cannot figure out how long it is, so + * we only print the first one in the duplicate + * set. + */ + if (i != 0) + len = LEN_HKEYDATA(dbp, h, 0, i); + else + len = 1; + + __db_msgadd(env, mbp, "Duplicates:"); + DB_MSGBUF_FLUSH(env, mbp); + for (p = HKEYDATA_DATA(hk), + ep = p + len; p < ep;) { + memcpy(&dlen, p, sizeof(db_indx_t)); + p += sizeof(db_indx_t); + __db_msgadd(env, mbp, "\t\t"); + __db_prbytes(env, mbp, p, dlen); + p += sizeof(db_indx_t) + dlen; + } + break; + case H_KEYDATA: + __db_prbytes(env, mbp, HKEYDATA_DATA(hk), + LEN_HKEYDATA(dbp, h, i == 0 ? + pagesize : 0, i)); + break; + case H_OFFPAGE: + memcpy(&a_hkd, hk, HOFFPAGE_SIZE); + __db_msgadd(env, mbp, + "overflow: total len: %4lu page: %4lu", + (u_long)a_hkd.tlen, (u_long)a_hkd.pgno); + DB_MSGBUF_FLUSH(env, mbp); + break; + default: + DB_MSGBUF_FLUSH(env, mbp); + __db_msg(env, "ILLEGAL HASH PAGE TYPE: %lu", + (u_long)HPAGE_PTYPE(hk)); + ret = EINVAL; + break; + } + break; + case P_IBTREE: + bi = sp; + + if (F_ISSET(dbp, DB_AM_RECNUM)) + __db_msgadd(env, mbp, + "count: %4lu ", (u_long)bi->nrecs); + __db_msgadd(env, mbp, + "pgno: %4lu type: %lu ", + (u_long)bi->pgno, (u_long)bi->type); + switch (B_TYPE(bi->type)) { + case B_KEYDATA: + __db_prbytes(env, mbp, bi->data, bi->len); + break; + case B_DUPLICATE: + case B_OVERFLOW: + __db_proff(env, mbp, bi->data); + break; + default: + DB_MSGBUF_FLUSH(env, mbp); + __db_msg(env, "ILLEGAL BINTERNAL TYPE: %lu", + (u_long)B_TYPE(bi->type)); + ret = EINVAL; + break; + } + break; + case P_IRECNO: + ri = sp; + __db_msgadd(env, mbp, "entries %4lu pgno %4lu", + (u_long)ri->nrecs, (u_long)ri->pgno); + DB_MSGBUF_FLUSH(env, mbp); + break; + case P_LBTREE: + case P_LDUP: + case P_LRECNO: + bk = sp; + switch (B_TYPE(bk->type)) { + case B_KEYDATA: + __db_prbytes(env, mbp, bk->data, bk->len); + break; + case B_DUPLICATE: + case B_OVERFLOW: + __db_proff(env, mbp, bk); + break; + default: + DB_MSGBUF_FLUSH(env, mbp); + __db_msg(env, + "ILLEGAL DUPLICATE/LBTREE/LRECNO TYPE: %lu", + (u_long)B_TYPE(bk->type)); + ret = EINVAL; + break; + } + break; + case P_HEAP: + hh = sp; + if (!F_ISSET(hh,HEAP_RECSPLIT)) + hdata = (u_int8_t *)hh + sizeof(HEAPHDR); + else { + hs = sp; + __db_msgadd(env, mbp, + "split: 0x%02x tsize: %lu next: %lu.%lu ", + hh->flags, (u_long)hs->tsize, + (u_long)hs->nextpg, (u_long)hs->nextindx); + + hdata = (u_int8_t *)hh + sizeof(HEAPSPLITHDR); + } + __db_prbytes(env, mbp, hdata, hh->size); + break; + default: +type_err: DB_MSGBUF_FLUSH(env, mbp); + __db_msg(env, + "ILLEGAL PAGE TYPE: %lu", (u_long)TYPE(h)); + ret = EINVAL; + continue; + } + } + return (ret); +} + +/* + * __db_prbytes -- + * Print out a data element. + * + * PUBLIC: void __db_prbytes __P((ENV *, DB_MSGBUF *, u_int8_t *, u_int32_t)); + */ +void +__db_prbytes(env, mbp, bytes, len) + ENV *env; + DB_MSGBUF *mbp; + u_int8_t *bytes; + u_int32_t len; +{ + u_int8_t *p; + u_int32_t i, not_printable; + int msg_truncated; + + __db_msgadd(env, mbp, "len: %3lu", (u_long)len); + if (len != 0) { + __db_msgadd(env, mbp, " data: "); + + /* + * Print the first N bytes of the data. If that + * chunk is at least 3/4 printable characters, print + * it as text, else print it in hex. We have this + * heuristic because we're displaying things like + * lock objects that could be either text or data. + */ + if (len > env->data_len) { + len = env->data_len; + msg_truncated = 1; + } else + msg_truncated = 0; + not_printable = 0; + for (p = bytes, i = 0; i < len; ++i, ++p) { + if (!isprint((int)*p) && *p != '\t' && *p != '\n') { + if (i == len - 1 && *p == '\0') + break; + if (++not_printable >= (len >> 2)) + break; + } + } + if (not_printable < (len >> 2)) + for (p = bytes, i = len; i > 0; --i, ++p) { + if (isprint((int)*p)) + __db_msgadd(env, mbp, "%c", *p); + else + __db_msgadd(env, + mbp, "\\%x", (u_int)*p); + } + else + for (p = bytes, i = len; i > 0; --i, ++p) + __db_msgadd(env, mbp, "%.2x", (u_int)*p); + if (msg_truncated) + __db_msgadd(env, mbp, "..."); + } + DB_MSGBUF_FLUSH(env, mbp); +} + +/* + * __db_proff -- + * Print out an off-page element. + */ +static void +__db_proff(env, mbp, vp) + ENV *env; + DB_MSGBUF *mbp; + void *vp; +{ + BOVERFLOW *bo; + + bo = vp; + switch (B_TYPE(bo->type)) { + case B_OVERFLOW: + __db_msgadd(env, mbp, "overflow: total len: %4lu page: %4lu", + (u_long)bo->tlen, (u_long)bo->pgno); + break; + case B_DUPLICATE: + __db_msgadd( + env, mbp, "duplicate: page: %4lu", (u_long)bo->pgno); + break; + default: + /* NOTREACHED */ + break; + } + DB_MSGBUF_FLUSH(env, mbp); +} + +/* + * __db_prflags -- + * Print out flags values. + * + * PUBLIC: void __db_prflags __P((ENV *, DB_MSGBUF *, + * PUBLIC: u_int32_t, const FN *, const char *, const char *)); + */ +void +__db_prflags(env, mbp, flags, fn, prefix, suffix) + ENV *env; + DB_MSGBUF *mbp; + u_int32_t flags; + FN const *fn; + const char *prefix, *suffix; +{ + DB_MSGBUF mb; + const FN *fnp; + int found, standalone; + const char *sep; + + if (fn == NULL) + return; + + /* + * If it's a standalone message, output the suffix (which will be the + * label), regardless of whether we found anything or not, and flush + * the line. + */ + if (mbp == NULL) { + standalone = 1; + mbp = &mb; + DB_MSGBUF_INIT(mbp); + } else + standalone = 0; + + sep = prefix == NULL ? "" : prefix; + for (found = 0, fnp = fn; fnp->mask != 0; ++fnp) + if (LF_ISSET(fnp->mask)) { + __db_msgadd(env, mbp, "%s%s", sep, fnp->name); + sep = ", "; + found = 1; + } + + if ((standalone || found) && suffix != NULL) + __db_msgadd(env, mbp, "%s", suffix); + if (standalone) + DB_MSGBUF_FLUSH(env, mbp); +} + +/* + * __db_pagetype_to_string -- + * Return the name of the specified page type. + * PUBLIC: const char *__db_pagetype_to_string __P((u_int32_t)); + */ +const char * +__db_pagetype_to_string(type) + u_int32_t type; +{ + char *s; + + s = NULL; + switch (type) { + case P_BTREEMETA: + s = "btree metadata"; + break; + case P_LDUP: + s = "duplicate"; + break; + case P_HASH_UNSORTED: + s = "hash unsorted"; + break; + case P_HASH: + s = "hash"; + break; + case P_HASHMETA: + s = "hash metadata"; + break; + case P_IBTREE: + s = "btree internal"; + break; + case P_INVALID: + s = "invalid"; + break; + case P_IRECNO: + s = "recno internal"; + break; + case P_LBTREE: + s = "btree leaf"; + break; + case P_LRECNO: + s = "recno leaf"; + break; + case P_OVERFLOW: + s = "overflow"; + break; + case P_QAMMETA: + s = "queue metadata"; + break; + case P_QAMDATA: + s = "queue"; + break; + case P_HEAPMETA: + s = "heap metadata"; + break; + case P_HEAP: + s = "heap data"; + break; + case P_IHEAP: + s = "heap internal"; + break; + default: + /* Just return a NULL. */ + break; + } + return (s); +} + +/* + * __db_dump_pp -- + * DB->dump pre/post processing. + * + * PUBLIC: int __db_dump_pp __P((DB *, const char *, + * PUBLIC: int (*)(void *, const void *), void *, int, int)); + */ +int +__db_dump_pp(dbp, subname, callback, handle, pflag, keyflag) + DB *dbp; + const char *subname; + int (*callback) __P((void *, const void *)); + void *handle; + int pflag, keyflag; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + env = dbp->env; + + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->dump"); + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 1)) != 0) { + handle_check = 0; + goto err; + } + + ret = __db_dump(dbp, subname, callback, handle, pflag, keyflag); + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + +err: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_dump -- + * DB->dump. + * + * PUBLIC: int __db_dump __P((DB *, const char *, + * PUBLIC: int (*)(void *, const void *), void *, int, int)); + */ +int +__db_dump(dbp, subname, callback, handle, pflag, keyflag) + DB *dbp; + const char *subname; + int (*callback) __P((void *, const void *)); + void *handle; + int pflag, keyflag; +{ + DBC *dbcp; + DBT key, data; + DBT keyret, dataret; + DB_HEAP_RID rid; + ENV *env; + db_recno_t recno; + int is_recno, is_heap, ret, t_ret; + void *pointer; + + env = dbp->env; + is_heap = 0; + + if ((ret = __db_prheader( + dbp, subname, pflag, keyflag, handle, callback, NULL, 0)) != 0) + return (ret); + + /* + * Get a cursor and step through the database, printing out each + * key/data pair. + */ + if ((ret = __db_cursor(dbp, NULL, NULL, &dbcp, 0)) != 0) + return (ret); + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + if ((ret = __os_malloc(env, 1024 * 1024, &data.data)) != 0) + goto err; + data.ulen = 1024 * 1024; + data.flags = DB_DBT_USERMEM; + is_recno = (dbp->type == DB_RECNO || dbp->type == DB_QUEUE); + keyflag = is_recno ? keyflag : 1; + if (is_recno) { + keyret.data = &recno; + keyret.size = sizeof(recno); + } + + if (dbp->type == DB_HEAP) { + is_heap = 1; + key.data = &rid; + key.size = key.ulen = sizeof(DB_HEAP_RID); + key.flags = DB_DBT_USERMEM; + } + +retry: while ((ret = + __dbc_get(dbcp, &key, &data, + !is_heap ? DB_NEXT | DB_MULTIPLE_KEY : DB_NEXT )) == 0) { + if (is_heap) { + /* Never dump keys for HEAP */ + if ((ret = __db_prdbt( + &data, pflag, " ", handle, callback, 0, 0)) != 0) + goto err; + continue; + } + DB_MULTIPLE_INIT(pointer, &data); + for (;;) { + if (is_recno) + DB_MULTIPLE_RECNO_NEXT(pointer, &data, + recno, dataret.data, dataret.size); + else + DB_MULTIPLE_KEY_NEXT(pointer, &data, + keyret.data, keyret.size, + dataret.data, dataret.size); + + if (dataret.data == NULL) + break; + + if ((keyflag && + (ret = __db_prdbt(&keyret, pflag, " ", + handle, callback, is_recno, 0)) != 0) || + (ret = __db_prdbt(&dataret, pflag, " ", + handle, callback, 0, 0)) != 0) + goto err; + } + } + if (ret == DB_BUFFER_SMALL) { + data.size = (u_int32_t)DB_ALIGN(data.size, 1024); + if ((ret = __os_realloc(env, data.size, &data.data)) != 0) + goto err; + data.ulen = data.size; + goto retry; + } + if (ret == DB_NOTFOUND) + ret = 0; + + if ((t_ret = __db_prfooter(handle, callback)) != 0 && ret == 0) + ret = t_ret; + +err: if ((t_ret = __dbc_close(dbcp)) != 0 && ret == 0) + ret = t_ret; + if (data.data != NULL) + __os_free(env, data.data); + + return (ret); +} + +/* + * __db_prdbt -- + * Print out a DBT data element. + * + * PUBLIC: int __db_prdbt __P((DBT *, int, const char *, void *, + * PUBLIC: int (*)(void *, const void *), int, int)); + */ +int +__db_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno, is_heap) + DBT *dbtp; + int checkprint; + const char *prefix; + void *handle; + int (*callback) __P((void *, const void *)); + int is_recno; + int is_heap; +{ + static const u_char hex[] = "0123456789abcdef"; + db_recno_t recno; + DB_HEAP_RID rid; + size_t len; + int ret; +#define DBTBUFLEN 100 + u_int8_t *p, *hp; + char buf[DBTBUFLEN], hbuf[DBTBUFLEN]; + + /* + * !!! + * This routine is the routine that dumps out items in the format + * used by db_dump(1) and db_load(1). This means that the format + * cannot change. + */ + if (prefix != NULL && (ret = callback(handle, prefix)) != 0) + return (ret); + if (is_recno) { + /* + * We're printing a record number, and this has to be done + * in a platform-independent way. So we use the numeral in + * straight ASCII. + */ + (void)__ua_memcpy(&recno, dbtp->data, sizeof(recno)); + snprintf(buf, DBTBUFLEN, "%lu", (u_long)recno); + + /* If we're printing data as hex, print keys as hex too. */ + if (!checkprint) { + for (len = strlen(buf), p = (u_int8_t *)buf, + hp = (u_int8_t *)hbuf; len-- > 0; ++p) { + *hp++ = hex[(u_int8_t)(*p & 0xf0) >> 4]; + *hp++ = hex[*p & 0x0f]; + } + *hp = '\0'; + ret = callback(handle, hbuf); + } else + ret = callback(handle, buf); + + if (ret != 0) + return (ret); + } else if (is_heap) { + /* + * We're printing a heap record number, and this has to be + * done in a platform-independent way. So we use the numeral + * in straight ASCII. + */ + (void)__ua_memcpy(&rid, dbtp->data, sizeof(rid)); + snprintf(buf, DBTBUFLEN, "%lu %hu", + (u_long)rid.pgno, (u_short)rid.indx); + + /* If we're printing data as hex, print keys as hex too. */ + if (!checkprint) { + for (len = strlen(buf), p = (u_int8_t *)buf, + hp = (u_int8_t *)hbuf; len-- > 0; ++p) { + *hp++ = hex[(u_int8_t)(*p & 0xf0) >> 4]; + *hp++ = hex[*p & 0x0f]; + } + *hp = '\0'; + ret = callback(handle, hbuf); + } else + ret = callback(handle, buf); + + if (ret != 0) + return (ret); + } else if (checkprint) { + for (len = dbtp->size, p = dbtp->data; len--; ++p) + if (isprint((int)*p)) { + if (*p == '\\' && + (ret = callback(handle, "\\")) != 0) + return (ret); + snprintf(buf, DBTBUFLEN, "%c", *p); + if ((ret = callback(handle, buf)) != 0) + return (ret); + } else { + snprintf(buf, DBTBUFLEN, "\\%c%c", + hex[(u_int8_t)(*p & 0xf0) >> 4], + hex[*p & 0x0f]); + if ((ret = callback(handle, buf)) != 0) + return (ret); + } + } else + for (len = dbtp->size, p = dbtp->data; len--; ++p) { + snprintf(buf, DBTBUFLEN, "%c%c", + hex[(u_int8_t)(*p & 0xf0) >> 4], + hex[*p & 0x0f]); + if ((ret = callback(handle, buf)) != 0) + return (ret); + } + + return (callback(handle, "\n")); +} + +/* + * __db_prheader -- + * Write out header information in the format expected by db_load. + * + * PUBLIC: int __db_prheader __P((DB *, const char *, int, int, void *, + * PUBLIC: int (*)(void *, const void *), VRFY_DBINFO *, db_pgno_t)); + */ +int +__db_prheader(dbp, subname, pflag, keyflag, handle, callback, vdp, meta_pgno) + DB *dbp; + const char *subname; + int pflag, keyflag; + void *handle; + int (*callback) __P((void *, const void *)); + VRFY_DBINFO *vdp; + db_pgno_t meta_pgno; +{ + DBT dbt; + DBTYPE dbtype; + ENV *env; + VRFY_PAGEINFO *pip; + u_int32_t flags, tmp_u_int32; + size_t buflen; + char *buf; + int using_vdp, ret, t_ret, tmp_int; +#ifdef HAVE_HEAP + u_int32_t tmp2_u_int32; +#endif + + ret = 0; + buf = NULL; + COMPQUIET(buflen, 0); + + /* + * If dbp is NULL, then pip is guaranteed to be non-NULL; we only ever + * call __db_prheader with a NULL dbp from one case inside __db_prdbt, + * and this is a special subdatabase for "lost" items. In this case + * we have a vdp (from which we'll get a pip). In all other cases, we + * will have a non-NULL dbp (and vdp may or may not be NULL depending + * on whether we're salvaging). + */ + if (dbp == NULL) + env = NULL; + else + env = dbp->env; + DB_ASSERT(env, dbp != NULL || vdp != NULL); + + /* + * If we've been passed a verifier statistics object, use that; we're + * being called in a context where dbp->stat is unsafe. + * + * Also, the verifier may set the pflag on a per-salvage basis. If so, + * respect that. + */ + if (vdp != NULL) { + if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &pip)) != 0) + return (ret); + + if (F_ISSET(vdp, SALVAGE_PRINTABLE)) + pflag = 1; + using_vdp = 1; + } else { + pip = NULL; + using_vdp = 0; + } + + /* + * If dbp is NULL, make it a btree. Otherwise, set dbtype to whatever + * appropriate type for the specified meta page, or the type of the dbp. + */ + if (dbp == NULL) + dbtype = DB_BTREE; + else if (using_vdp) + switch (pip->type) { + case P_BTREEMETA: + if (F_ISSET(pip, VRFY_IS_RECNO)) + dbtype = DB_RECNO; + else + dbtype = DB_BTREE; + break; + case P_HASHMETA: + dbtype = DB_HASH; + break; + case P_HEAPMETA: + dbtype = DB_HEAP; + break; + case P_QAMMETA: + dbtype = DB_QUEUE; + break; + default: + /* + * If the meta page is of a bogus type, it's because + * we have a badly corrupt database. (We must be in + * the verifier for pip to be non-NULL.) Pretend we're + * a Btree and salvage what we can. + */ + DB_ASSERT(env, F_ISSET(dbp, DB_AM_VERIFYING)); + dbtype = DB_BTREE; + break; + } + else + dbtype = dbp->type; + + if ((ret = callback(handle, "VERSION=3\n")) != 0) + goto err; + if (pflag) { + if ((ret = callback(handle, "format=print\n")) != 0) + goto err; + } else if ((ret = callback(handle, "format=bytevalue\n")) != 0) + goto err; + + /* + * 64 bytes is long enough, as a minimum bound, for any of the + * fields besides subname. Subname uses __db_prdbt and therefore + * does not need buffer space here. + */ + buflen = 64; + if ((ret = __os_malloc(env, buflen, &buf)) != 0) + goto err; + if (subname != NULL) { + snprintf(buf, buflen, "database="); + if ((ret = callback(handle, buf)) != 0) + goto err; + DB_INIT_DBT(dbt, subname, strlen(subname)); + if ((ret = __db_prdbt(&dbt, 1, + NULL, handle, callback, 0, 0)) != 0) + goto err; + } + switch (dbtype) { + case DB_BTREE: + if ((ret = callback(handle, "type=btree\n")) != 0) + goto err; + if (using_vdp) + tmp_int = F_ISSET(pip, VRFY_HAS_RECNUMS) ? 1 : 0; + else { + if ((ret = __db_get_flags(dbp, &flags)) != 0) { + __db_err(env, ret, "DB->get_flags"); + goto err; + } + tmp_int = F_ISSET(dbp, DB_AM_RECNUM) ? 1 : 0; + } + if (tmp_int && (ret = callback(handle, "recnum=1\n")) != 0) + goto err; + + if (using_vdp) + tmp_u_int32 = pip->bt_minkey; + else + if ((ret = + __bam_get_bt_minkey(dbp, &tmp_u_int32)) != 0) { + __db_err(env, ret, "DB->get_bt_minkey"); + goto err; + } + if (tmp_u_int32 != 0 && tmp_u_int32 != DEFMINKEYPAGE) { + snprintf(buf, buflen, + "bt_minkey=%lu\n", (u_long)tmp_u_int32); + if ((ret = callback(handle, buf)) != 0) + goto err; + } + break; + case DB_HASH: +#ifdef HAVE_HASH + if ((ret = callback(handle, "type=hash\n")) != 0) + goto err; + if (using_vdp) + tmp_u_int32 = pip->h_ffactor; + else + if ((ret = + __ham_get_h_ffactor(dbp, &tmp_u_int32)) != 0) { + __db_err(env, ret, "DB->get_h_ffactor"); + goto err; + } + if (tmp_u_int32 != 0) { + snprintf(buf, buflen, + "h_ffactor=%lu\n", (u_long)tmp_u_int32); + if ((ret = callback(handle, buf)) != 0) + goto err; + } + + if (using_vdp) + tmp_u_int32 = pip->h_nelem; + else + if ((ret = __ham_get_h_nelem(dbp, &tmp_u_int32)) != 0) { + __db_err(env, ret, "DB->get_h_nelem"); + goto err; + } + /* + * Hash databases have an h_nelem field of 0 or 1, neither + * of those values is interesting. + */ + if (tmp_u_int32 > 1) { + snprintf(buf, buflen, + "h_nelem=%lu\n", (u_long)tmp_u_int32); + if ((ret = callback(handle, buf)) != 0) + goto err; + } + break; +#else + ret = __db_no_hash_am(env); + goto err; +#endif + case DB_HEAP: +#ifdef HAVE_HEAP + if ((ret = callback(handle, "type=heap\n")) != 0) + goto err; + + if ((ret = __heap_get_heapsize( + dbp, &tmp_u_int32, &tmp2_u_int32)) != 0) { + __db_err(env, ret, "DB->get_heapsize"); + goto err; + } + if (tmp_u_int32 != 0) { + snprintf(buf, + buflen, "heap_gbytes=%lu\n", (u_long)tmp_u_int32); + if ((ret = callback(handle, buf)) != 0) + goto err; + } + + if (tmp2_u_int32 != 0) { + snprintf(buf, + buflen, "heap_bytes=%lu\n", (u_long)tmp2_u_int32); + if ((ret = callback(handle, buf)) != 0) + goto err; + } + break; +#else + ret = __db_no_heap_am(env); + goto err; +#endif + case DB_QUEUE: +#ifdef HAVE_QUEUE + if ((ret = callback(handle, "type=queue\n")) != 0) + goto err; + if (using_vdp) + tmp_u_int32 = vdp->re_len; + else + if ((ret = __ram_get_re_len(dbp, &tmp_u_int32)) != 0) { + __db_err(env, ret, "DB->get_re_len"); + goto err; + } + snprintf(buf, buflen, "re_len=%lu\n", (u_long)tmp_u_int32); + if ((ret = callback(handle, buf)) != 0) + goto err; + + if (using_vdp) + tmp_int = (int)vdp->re_pad; + else + if ((ret = __ram_get_re_pad(dbp, &tmp_int)) != 0) { + __db_err(env, ret, "DB->get_re_pad"); + goto err; + } + if (tmp_int != 0 && tmp_int != ' ') { + snprintf(buf, buflen, "re_pad=%#x\n", tmp_int); + if ((ret = callback(handle, buf)) != 0) + goto err; + } + + if (using_vdp) + tmp_u_int32 = vdp->page_ext; + else + if ((ret = + __qam_get_extentsize(dbp, &tmp_u_int32)) != 0) { + __db_err(env, ret, "DB->get_q_extentsize"); + goto err; + } + if (tmp_u_int32 != 0) { + snprintf(buf, buflen, + "extentsize=%lu\n", (u_long)tmp_u_int32); + if ((ret = callback(handle, buf)) != 0) + goto err; + } + break; +#else + ret = __db_no_queue_am(env); + goto err; +#endif + case DB_RECNO: + if ((ret = callback(handle, "type=recno\n")) != 0) + goto err; + if (using_vdp) + tmp_int = F_ISSET(pip, VRFY_IS_RRECNO) ? 1 : 0; + else + tmp_int = F_ISSET(dbp, DB_AM_RENUMBER) ? 1 : 0; + if (tmp_int != 0 && + (ret = callback(handle, "renumber=1\n")) != 0) + goto err; + + if (using_vdp) + tmp_int = F_ISSET(pip, VRFY_IS_FIXEDLEN) ? 1 : 0; + else + tmp_int = F_ISSET(dbp, DB_AM_FIXEDLEN) ? 1 : 0; + if (tmp_int) { + if (using_vdp) + tmp_u_int32 = pip->re_len; + else + if ((ret = + __ram_get_re_len(dbp, &tmp_u_int32)) != 0) { + __db_err(env, ret, "DB->get_re_len"); + goto err; + } + snprintf(buf, buflen, + "re_len=%lu\n", (u_long)tmp_u_int32); + if ((ret = callback(handle, buf)) != 0) + goto err; + + if (using_vdp) + tmp_int = (int)pip->re_pad; + else + if ((ret = + __ram_get_re_pad(dbp, &tmp_int)) != 0) { + __db_err(env, ret, "DB->get_re_pad"); + goto err; + } + if (tmp_int != 0 && tmp_int != ' ') { + snprintf(buf, + buflen, "re_pad=%#x\n", (u_int)tmp_int); + if ((ret = callback(handle, buf)) != 0) + goto err; + } + } + break; + case DB_UNKNOWN: /* Impossible. */ + ret = __db_unknown_path(env, "__db_prheader"); + goto err; + } + + if (using_vdp) { + if (F_ISSET(pip, VRFY_HAS_CHKSUM)) + if ((ret = callback(handle, "chksum=1\n")) != 0) + goto err; + if (F_ISSET(pip, VRFY_HAS_DUPS)) + if ((ret = callback(handle, "duplicates=1\n")) != 0) + goto err; + if (F_ISSET(pip, VRFY_HAS_DUPSORT)) + if ((ret = callback(handle, "dupsort=1\n")) != 0) + goto err; +#ifdef HAVE_COMPRESSION + if (F_ISSET(pip, VRFY_HAS_COMPRESS)) + if ((ret = callback(handle, "compressed=1\n")) != 0) + goto err; +#endif + /* + * !!! + * We don't know if the page size was the default if we're + * salvaging. It doesn't seem that interesting to have, so + * we ignore it for now. + */ + } else { + if (F_ISSET(dbp, DB_AM_CHKSUM)) + if ((ret = callback(handle, "chksum=1\n")) != 0) + goto err; + if (F_ISSET(dbp, DB_AM_DUP)) + if ((ret = callback(handle, "duplicates=1\n")) != 0) + goto err; + if (F_ISSET(dbp, DB_AM_DUPSORT)) + if ((ret = callback(handle, "dupsort=1\n")) != 0) + goto err; +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(dbp)) + if ((ret = callback(handle, "compressed=1\n")) != 0) + goto err; +#endif + if (!F_ISSET(dbp, DB_AM_PGDEF)) { + snprintf(buf, buflen, + "db_pagesize=%lu\n", (u_long)dbp->pgsize); + if ((ret = callback(handle, buf)) != 0) + goto err; + } + } + +#ifdef HAVE_PARTITION + if (dbp != NULL && DB_IS_PARTITIONED(dbp) && + F_ISSET((DB_PARTITION *)dbp->p_internal, PART_RANGE)) { + DBT *keys; + u_int32_t i; + + if ((ret = __partition_get_keys(dbp, &tmp_u_int32, &keys)) != 0) + goto err; + if (tmp_u_int32 != 0) { + snprintf(buf, + buflen, "nparts=%lu\n", (u_long)tmp_u_int32); + if ((ret = callback(handle, buf)) != 0) + goto err; + for (i = 0; i < tmp_u_int32 - 1; i++) + if ((ret = __db_prdbt(&keys[i], + pflag, " ", handle, callback, 0, 0)) != 0) + goto err; + } + } +#endif + + if (keyflag && (ret = callback(handle, "keys=1\n")) != 0) + goto err; + + ret = callback(handle, "HEADER=END\n"); + +err: if (using_vdp && + (t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + if (buf != NULL) + __os_free(env, buf); + + return (ret); +} + +/* + * __db_prfooter -- + * Print the footer that marks the end of a DB dump. This is trivial, + * but for consistency's sake we don't want to put its literal contents + * in multiple places. + * + * PUBLIC: int __db_prfooter __P((void *, int (*)(void *, const void *))); + */ +int +__db_prfooter(handle, callback) + void *handle; + int (*callback) __P((void *, const void *)); +{ + return (callback(handle, "DATA=END\n")); +} + +/* + * __db_pr_callback -- + * Callback function for using pr_* functions from C. + * + * PUBLIC: int __db_pr_callback __P((void *, const void *)); + */ +int +__db_pr_callback(handle, str_arg) + void *handle; + const void *str_arg; +{ + char *str; + FILE *f; + + str = (char *)str_arg; + f = (FILE *)handle; + + if (fprintf(f, "%s", str) != (int)strlen(str)) + return (EIO); + + return (0); +} + +/* + * __db_dbtype_to_string -- + * Return the name of the database type. + * + * PUBLIC: const char * __db_dbtype_to_string __P((DBTYPE)); + */ +const char * +__db_dbtype_to_string(type) + DBTYPE type; +{ + switch (type) { + case DB_BTREE: + return ("btree"); + case DB_HASH: + return ("hash"); + case DB_RECNO: + return ("recno"); + case DB_QUEUE: + return ("queue"); + case DB_HEAP: + return ("heap"); + case DB_UNKNOWN: + default: + break; + } + return ("UNKNOWN TYPE"); +} diff --git a/src/db/db_rec.c b/src/db/db_rec.c new file mode 100644 index 00000000..329cce47 --- /dev/null +++ b/src/db/db_rec.c @@ -0,0 +1,2779 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/lock.h" +#include "dbinc/fop.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" + +static int __db_pg_free_recover_int __P((ENV *, DB_THREAD_INFO *, + __db_pg_freedata_args *, DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int)); +static int __db_pg_free_recover_42_int __P((ENV *, DB_THREAD_INFO *, + __db_pg_freedata_42_args *, + DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int)); + +/* + * PUBLIC: int __db_addrem_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + * + * This log message is generated whenever we add or remove a duplicate + * to/from a duplicate page. On recover, we just do the opposite. + */ +int +__db_addrem_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_addrem_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + u_int32_t opcode; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__db_addrem_print); + REC_INTRO(__db_addrem_read, ip, 1); + + REC_FGET(mpf, ip, argp->pgno, &pagep, done); + modified = 0; + + opcode = OP_MODE_GET(argp->opcode); + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_DUP) || + (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_DUP)) { + /* Need to redo an add, or undo a delete. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes, + argp->hdr.size == 0 ? NULL : &argp->hdr, + argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0) + goto out; + modified = 1; + + } else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_DUP) || + (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_DUP)) { + /* Need to undo an add, or redo a delete. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if ((ret = __db_ditem(dbc, + pagep, argp->indx, argp->nbytes)) != 0) + goto out; + modified = 1; + } + + if (modified) { + if (DB_REDO(op)) + LSN(pagep) = *lsnp; + else + LSN(pagep) = argp->pagelsn; + } + + if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, dbc->priority); + REC_CLOSE; +} + +/* + * PUBLIC: int __db_addrem_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + * + * This log message is generated whenever we add or remove a duplicate + * to/from a duplicate page. On recover, we just do the opposite. + */ +int +__db_addrem_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_addrem_42_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__db_addrem_print); + REC_INTRO(__db_addrem_42_read, ip, 1); + + REC_FGET(mpf, ip, argp->pgno, &pagep, done); + modified = 0; + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_DUP) || + (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_DUP)) { + /* Need to redo an add, or undo a delete. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes, + argp->hdr.size == 0 ? NULL : &argp->hdr, + argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0) + goto out; + modified = 1; + + } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_DUP) || + (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_DUP)) { + /* Need to undo an add, or redo a delete. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if ((ret = __db_ditem(dbc, + pagep, argp->indx, argp->nbytes)) != 0) + goto out; + modified = 1; + } + + if (modified) { + if (DB_REDO(op)) + LSN(pagep) = *lsnp; + else + LSN(pagep) = argp->pagelsn; + } + + if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, dbc->priority); + REC_CLOSE; +} + +/* + * PUBLIC: int __db_big_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_big_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_big_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + u_int32_t opcode; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__db_big_print); + REC_INTRO(__db_big_read, ip, 0); + + opcode = OP_MODE_GET(argp->opcode); + REC_FGET(mpf, ip, argp->pgno, &pagep, ppage); + modified = 0; + + /* + * There are three pages we need to check. The one on which we are + * adding data, the previous one whose next_pointer may have + * been updated, and the next one whose prev_pointer may have + * been updated. + */ + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_BIG) || + (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_BIG)) { + /* We are either redo-ing an add, or undoing a delete. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno, + argp->next_pgno, 0, P_OVERFLOW); + OV_LEN(pagep) = argp->dbt.size; + OV_REF(pagep) = 1; + memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data, + argp->dbt.size); + PREV_PGNO(pagep) = argp->prev_pgno; + modified = 1; + } else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_BIG) || + (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_BIG)) { + /* + * We are either undo-ing an add or redo-ing a delete. + * The page is about to be reclaimed in either case, so + * there really isn't anything to do here. + */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + modified = 1; + } else if (cmp_p == 0 && DB_REDO(op) && opcode == DB_APPEND_BIG) { + /* We are redoing an append. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp) + + OV_LEN(pagep), argp->dbt.data, argp->dbt.size); + OV_LEN(pagep) += argp->dbt.size; + modified = 1; + } else if (cmp_n == 0 && DB_UNDO(op) && opcode == DB_APPEND_BIG) { + /* We are undoing an append. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + OV_LEN(pagep) -= argp->dbt.size; + memset((u_int8_t *)pagep + P_OVERHEAD(file_dbp) + + OV_LEN(pagep), 0, argp->dbt.size); + modified = 1; + } + if (modified) + LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; + + ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); + pagep = NULL; + if (ret != 0) + goto out; + + /* + * We only delete a whole chain of overflow items, and appends only + * apply to a single page. Adding a page is the only case that + * needs to update the chain. + */ +ppage: if (opcode != DB_ADD_BIG) + goto done; + + /* Now check the previous page. */ + if (argp->prev_pgno != PGNO_INVALID) { + REC_FGET(mpf, ip, argp->prev_pgno, &pagep, npage); + modified = 0; + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + + if (cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_BIG) { + /* Redo add, undo delete. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + NEXT_PGNO(pagep) = argp->pgno; + modified = 1; + } else if (cmp_n == 0 && + DB_UNDO(op) && opcode == DB_ADD_BIG) { + /* Redo delete, undo add. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + NEXT_PGNO(pagep) = argp->next_pgno; + modified = 1; + } + if (modified) + LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn; + ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); + pagep = NULL; + if (ret != 0) + goto out; + } + pagep = NULL; + + /* Now check the next page. Can only be set on a delete. */ +npage: if (argp->next_pgno != PGNO_INVALID) { + REC_FGET(mpf, ip, argp->next_pgno, &pagep, done); + modified = 0; + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + PREV_PGNO(pagep) = PGNO_INVALID; + modified = 1; + } else if (cmp_n == 0 && DB_UNDO(op)) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + PREV_PGNO(pagep) = argp->pgno; + modified = 1; + } + if (modified) + LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn; + ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); + pagep = NULL; + if (ret != 0) + goto out; + } + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * PUBLIC: int __db_big_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_big_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_big_42_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__db_big_print); + REC_INTRO(__db_big_42_read, ip, 0); + + REC_FGET(mpf, ip, argp->pgno, &pagep, ppage); + modified = 0; + + /* + * There are three pages we need to check. The one on which we are + * adding data, the previous one whose next_pointer may have + * been updated, and the next one whose prev_pointer may have + * been updated. + */ + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) || + (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_BIG)) { + /* We are either redo-ing an add, or undoing a delete. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno, + argp->next_pgno, 0, P_OVERFLOW); + OV_LEN(pagep) = argp->dbt.size; + OV_REF(pagep) = 1; + memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data, + argp->dbt.size); + PREV_PGNO(pagep) = argp->prev_pgno; + modified = 1; + } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_BIG) || + (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_BIG)) { + /* + * We are either undo-ing an add or redo-ing a delete. + * The page is about to be reclaimed in either case, so + * there really isn't anything to do here. + */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + modified = 1; + } else if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_APPEND_BIG) { + /* We are redoing an append. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp) + + OV_LEN(pagep), argp->dbt.data, argp->dbt.size); + OV_LEN(pagep) += argp->dbt.size; + modified = 1; + } else if (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_APPEND_BIG) { + /* We are undoing an append. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + OV_LEN(pagep) -= argp->dbt.size; + memset((u_int8_t *)pagep + P_OVERHEAD(file_dbp) + + OV_LEN(pagep), 0, argp->dbt.size); + modified = 1; + } + if (modified) + LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; + + ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); + pagep = NULL; + if (ret != 0) + goto out; + + /* + * We only delete a whole chain of overflow items, and appends only + * apply to a single page. Adding a page is the only case that + * needs to update the chain. + */ +ppage: if (argp->opcode != DB_ADD_BIG) + goto done; + + /* Now check the previous page. */ + if (argp->prev_pgno != PGNO_INVALID) { + REC_FGET(mpf, ip, argp->prev_pgno, &pagep, npage); + modified = 0; + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + + if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) { + /* Redo add, undo delete. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + NEXT_PGNO(pagep) = argp->pgno; + modified = 1; + } else if (cmp_n == 0 && + DB_UNDO(op) && argp->opcode == DB_ADD_BIG) { + /* Redo delete, undo add. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + NEXT_PGNO(pagep) = argp->next_pgno; + modified = 1; + } + if (modified) + LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn; + ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); + pagep = NULL; + if (ret != 0) + goto out; + } + pagep = NULL; + + /* Now check the next page. Can only be set on a delete. */ +npage: if (argp->next_pgno != PGNO_INVALID) { + REC_FGET(mpf, ip, argp->next_pgno, &pagep, done); + modified = 0; + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + PREV_PGNO(pagep) = PGNO_INVALID; + modified = 1; + } else if (cmp_n == 0 && DB_UNDO(op)) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + PREV_PGNO(pagep) = argp->pgno; + modified = 1; + } + if (modified) + LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn; + ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); + pagep = NULL; + if (ret != 0) + goto out; + } + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} +/* + * __db_ovref_recover -- + * Recovery function for __db_ovref(). + * + * PUBLIC: int __db_ovref_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_ovref_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_ovref_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__db_ovref_print); + REC_INTRO(__db_ovref_read, ip, 0); + + REC_FGET(mpf, ip, argp->pgno, &pagep, done); + + cmp = LOG_COMPARE(&LSN(pagep), &argp->lsn); + CHECK_LSN(env, op, cmp, &LSN(pagep), &argp->lsn); + if (cmp == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + OV_REF(pagep) += argp->adjust; + pagep->lsn = *lsnp; + } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { + /* Need to undo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + OV_REF(pagep) -= argp->adjust; + pagep->lsn = argp->lsn; + } + ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); + pagep = NULL; + if (ret != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * __db_debug_recover -- + * Recovery function for debug. + * + * PUBLIC: int __db_debug_recover __P((ENV *, + * PUBLIC: DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_debug_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_debug_args *argp; + int ret; + + COMPQUIET(op, DB_TXN_ABORT); + COMPQUIET(info, NULL); + + REC_PRINT(__db_debug_print); + REC_NOOP_INTRO(__db_debug_read); + + *lsnp = argp->prev_lsn; + ret = 0; + + REC_NOOP_CLOSE; +} + +/* + * __db_noop_recover -- + * Recovery function for noop. + * + * PUBLIC: int __db_noop_recover __P((ENV *, + * PUBLIC: DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_noop_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_noop_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__db_noop_print); + REC_INTRO(__db_noop_read, ip, 0); + + REC_FGET(mpf, ip, argp->pgno, &pagep, done); + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + LSN(pagep) = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + LSN(pagep) = argp->prevlsn; + } + ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); + pagep = NULL; + +done: *lsnp = argp->prev_lsn; +out: if (pagep != NULL) + (void)__memp_fput(mpf, + ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * __db_pg_alloc_recover -- + * Recovery function for pg_alloc. + * + * PUBLIC: int __db_pg_alloc_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_alloc_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_alloc_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DBMETA *meta; + DB_MPOOLFILE *mpf; + PAGE *pagep; + db_pgno_t pgno; + int cmp_n, cmp_p, created, level, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + meta = NULL; + pagep = NULL; + created = 0; + REC_PRINT(__db_pg_alloc_print); + REC_INTRO(__db_pg_alloc_read, ip, 0); + + /* + * Fix up the metadata page. If we're redoing the operation, we have + * to get the metadata page and update its LSN and its free pointer. + * If we're undoing the operation and the page was ever created, we put + * it on the freelist. + */ + pgno = PGNO_BASE_MD; + if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &meta)) != 0) { + /* The metadata page must always exist on redo. */ + if (DB_REDO(op)) { + ret = __db_pgerr(file_dbp, pgno, ret); + goto out; + } else + goto done; + } + cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); + cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); + CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); + CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &meta); + LSN(meta) = *lsnp; + meta->free = argp->next; + if (argp->pgno > meta->last_pgno) + meta->last_pgno = argp->pgno; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to undo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &meta); + LSN(meta) = argp->meta_lsn; + /* + * If the page has a zero LSN then its newly created and + * will be truncated rather than go on the free list. + */ + if (!IS_ZERO_LSN(argp->page_lsn)) + meta->free = argp->pgno; + meta->last_pgno = argp->last_pgno; + } + +#ifdef HAVE_FTRUNCATE + /* + * check to see if we are keeping a sorted freelist, if so put + * this back in the in memory list. It must be the first element. + */ + if (op == DB_TXN_ABORT && !IS_ZERO_LSN(argp->page_lsn)) { + db_pgno_t *list; + u_int32_t nelem; + + if ((ret = __memp_get_freelist(mpf, &nelem, &list)) != 0) + goto out; + if (list != NULL && (nelem == 0 || *list != argp->pgno)) { + if ((ret = + __memp_extend_freelist(mpf, nelem + 1, &list)) != 0) + goto out; + if (nelem != 0) + memmove(list + 1, list, nelem * sizeof(*list)); + *list = argp->pgno; + } + } +#endif + + /* + * Fix up the allocated page. If the page does not exist + * and we can truncate it then don't create it. + * Otherwise if we're redoing the operation, we have + * to get the page (creating it if it doesn't exist), and update its + * LSN. If we're undoing the operation, we have to reset the page's + * LSN and put it on the free list. + */ + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + /* + * We have to be able to identify if a page was newly + * created so we can recover it properly. We cannot simply + * look for an empty header, because hash uses a pgin + * function that will set the header. Instead, we explicitly + * try for the page without CREATE and if that fails, then + * create it. + */ + if (DB_UNDO(op)) + goto do_truncate; + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, + DB_MPOOL_CREATE, &pagep)) != 0) { + if (DB_UNDO(op) && ret == ENOSPC) + goto do_truncate; + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } + created = 1; + } + + /* Fix up the allocated page. */ + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn); + + /* + * If an initial allocation is aborted and then reallocated during + * an archival restore the log record will have an LSN for the page + * but the page will be empty. + */ + if (IS_ZERO_LSN(LSN(pagep))) + cmp_p = 0; + + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn); + /* + * Another special case we have to handle is if we ended up with a + * page of all 0's which can happen if we abort between allocating a + * page in mpool and initializing it. In that case, even if we're + * undoing, we need to re-initialize the page. + */ + if (DB_REDO(op) && cmp_p == 0) { + /* Need to redo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + switch (argp->ptype) { + case P_LBTREE: + case P_LRECNO: + case P_LDUP: + level = LEAFLEVEL; + break; + default: + level = 0; + break; + } + P_INIT(pagep, file_dbp->pgsize, + argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype); + + pagep->lsn = *lsnp; + } else if (DB_UNDO(op) && (cmp_n == 0 || created)) { + /* + * This is where we handle the case of a 0'd page (pagep->pgno + * is equal to PGNO_INVALID). + * Undo the allocation, reinitialize the page and + * link its next pointer to the free list. + */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, + argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); + + pagep->lsn = argp->page_lsn; + } + +do_truncate: + /* + * If the page was newly created, give it back. + */ + if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) && + IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) { + /* Discard the page. */ + if (pagep != NULL) { + if ((ret = __memp_fput(mpf, ip, + pagep, DB_PRIORITY_VERY_LOW)) != 0) + goto out; + pagep = NULL; + } + /* Give the page back to the OS. */ + if (meta->last_pgno <= argp->pgno && (ret = __memp_ftruncate( + mpf, NULL, ip, argp->pgno, MP_TRUNC_RECOVER)) != 0) + goto out; + } + + if (pagep != NULL) { + ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); + pagep = NULL; + if (ret != 0) + goto out; + } + + ret = __memp_fput(mpf, ip, meta, file_dbp->priority); + meta = NULL; + if (ret != 0) + goto out; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + if (meta != NULL) + (void)__memp_fput(mpf, ip, meta, file_dbp->priority); + REC_CLOSE; +} + +/* + * __db_pg_free_recover_int -- + */ +static int +__db_pg_free_recover_int(env, ip, argp, file_dbp, lsnp, mpf, op, data) + ENV *env; + DB_THREAD_INFO *ip; + __db_pg_freedata_args *argp; + DB *file_dbp; + DB_LSN *lsnp; + DB_MPOOLFILE *mpf; + db_recops op; + int data; +{ + DBMETA *meta; + DB_LSN copy_lsn; + PAGE *pagep, *prevp; + int cmp_n, cmp_p, is_meta, ret; + + meta = NULL; + pagep = prevp = NULL; + + /* + * Get the "metapage". This will either be the metapage + * or the previous page in the free list if we are doing + * sorted allocations. If its a previous page then + * we will not be truncating. + */ + is_meta = argp->meta_pgno == PGNO_BASE_MD; + + REC_FGET(mpf, ip, argp->meta_pgno, &meta, check_meta); + + if (argp->meta_pgno != PGNO_BASE_MD) + prevp = (PAGE *)meta; + + cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); + cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); + CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); + CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp); + + /* + * Fix up the metadata page. If we're redoing or undoing the operation + * we get the page and update its LSN, last and free pointer. + */ + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, file_dbp->priority, &meta); + /* + * If we are at the end of the file truncate, otherwise + * put on the free list. + */ + if (argp->pgno == argp->last_pgno) + meta->last_pgno = argp->pgno - 1; + else if (is_meta) + meta->free = argp->pgno; + else + NEXT_PGNO(prevp) = argp->pgno; + LSN(meta) = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to undo the deallocation. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &meta); + if (is_meta) { + if (meta->last_pgno < argp->pgno) + meta->last_pgno = argp->pgno; + meta->free = argp->next; + } else + NEXT_PGNO(prevp) = argp->next; + LSN(meta) = argp->meta_lsn; + } + +check_meta: + if (ret != 0 && is_meta) { + /* The metadata page must always exist. */ + ret = __db_pgerr(file_dbp, argp->meta_pgno, ret); + goto out; + } + + /* + * Get the freed page. Don't create the page if we are going to + * free it. If we're redoing the operation we get the page and + * explicitly discard its contents, then update its LSN. If we're + * undoing the operation, we get the page and restore its header. + */ + if (DB_REDO(op) || (is_meta && meta->last_pgno < argp->pgno)) { + if ((ret = __memp_fget(mpf, &argp->pgno, + ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) + goto out; + if (is_meta && + DB_REDO(op) && meta->last_pgno <= argp->pgno) + goto trunc; + goto done; + } + } else if ((ret = __memp_fget(mpf, &argp->pgno, + ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN)); + cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn); + + /* + * This page got extended by a later allocation, + * but its allocation was not in the scope of this + * recovery pass. + */ + if (IS_ZERO_LSN(LSN(pagep))) + cmp_p = 0; + + CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn); + /* + * We need to check that the page could have the current LSN + * which was copied before it was truncated in addition to + * the usual of having the previous LSN. + */ + if (DB_REDO(op) && + (cmp_p == 0 || cmp_n == 0 || + (IS_ZERO_LSN(copy_lsn) && + LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) { + /* Need to redo the deallocation. */ + /* + * The page can be truncated if it was truncated at runtime + * and the current metapage reflects the truncation. + */ + if (is_meta && meta->last_pgno <= argp->pgno && + argp->last_pgno <= argp->pgno) { + if ((ret = __memp_fput(mpf, ip, + pagep, DB_PRIORITY_VERY_LOW)) != 0) + goto out; + pagep = NULL; +trunc: if ((ret = __memp_ftruncate(mpf, NULL, ip, + argp->pgno, MP_TRUNC_RECOVER)) != 0) + goto out; + } else if (argp->last_pgno == argp->pgno) { + /* The page was truncated at runtime, zero it out. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + P_INIT(pagep, 0, PGNO_INVALID, + PGNO_INVALID, PGNO_INVALID, 0, P_INVALID); + ZERO_LSN(pagep->lsn); + } else if (cmp_p == 0 || IS_ZERO_LSN(LSN(pagep))) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, + argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); + pagep->lsn = *lsnp; + + } + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to reallocate the page. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + memcpy(pagep, argp->header.data, argp->header.size); + if (data) + memcpy((u_int8_t*)pagep + HOFFSET(pagep), + argp->data.data, argp->data.size); + } + if (pagep != NULL && + (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + + pagep = NULL; +#ifdef HAVE_FTRUNCATE + /* + * If we are keeping an in memory free list remove this + * element from the list. + */ + if (op == DB_TXN_ABORT && argp->pgno != argp->last_pgno) { + db_pgno_t *lp; + u_int32_t nelem, pos; + + if ((ret = __memp_get_freelist(mpf, &nelem, &lp)) != 0) + goto out; + if (lp != NULL) { + pos = 0; + if (!is_meta) { + __db_freelist_pos(argp->pgno, lp, nelem, &pos); + + /* + * If we aborted after logging but before + * updating the free list don't do anything. + */ + if (argp->pgno != lp[pos]) { + DB_ASSERT(env, + argp->meta_pgno == lp[pos]); + goto done; + } + DB_ASSERT(env, + argp->meta_pgno == lp[pos - 1]); + } else if (nelem != 0 && argp->pgno != lp[pos]) + goto done; + + if (pos < nelem) + memmove(&lp[pos], &lp[pos + 1], + ((nelem - pos) - 1) * sizeof(*lp)); + + /* Shrink the list */ + if ((ret = + __memp_extend_freelist(mpf, nelem - 1, &lp)) != 0) + goto out; + } + } +#endif +done: + if (meta != NULL && + (ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) + goto out; + meta = NULL; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + if (meta != NULL) + (void)__memp_fput(mpf, ip, meta, file_dbp->priority); + + return (ret); +} + +/* + * __db_pg_free_recover -- + * Recovery function for pg_free. + * + * PUBLIC: int __db_pg_free_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_free_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_free_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + int ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__db_pg_free_print); + REC_INTRO(__db_pg_free_read, ip, 0); + + if ((ret = __db_pg_free_recover_int(env, ip, + (__db_pg_freedata_args *)argp, file_dbp, lsnp, mpf, op, 0)) != 0) + goto out; + +done: *lsnp = argp->prev_lsn; +out: + REC_CLOSE; +} + +/* + * __db_pg_freedata_recover -- + * Recovery function for pg_freedata. + * + * PUBLIC: int __db_pg_freedata_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_freedata_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_freedata_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + int ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__db_pg_freedata_print); + REC_INTRO(__db_pg_freedata_read, ip, 0); + + if ((ret = __db_pg_free_recover_int(env, + ip, argp, file_dbp, lsnp, mpf, op, 1)) != 0) + goto out; + +done: *lsnp = argp->prev_lsn; +out: + REC_CLOSE; +} + +/* + * __db_cksum_recover -- + * Recovery function for checksum failure log record. + * + * PUBLIC: int __db_cksum_recover __P((ENV *, + * PUBLIC: DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_cksum_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_cksum_args *argp; + int ret; + + COMPQUIET(info, NULL); + COMPQUIET(lsnp, NULL); + COMPQUIET(op, DB_TXN_ABORT); + + REC_PRINT(__db_cksum_print); + + if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0) + return (ret); + + /* + * We had a checksum failure -- the only option is to run catastrophic + * recovery. + */ + if (F_ISSET(env, ENV_RECOVER_FATAL)) + ret = 0; + else { + __db_errx(env, DB_STR("0642", + "Checksum failure requires catastrophic recovery")); + ret = __env_panic(env, DB_RUNRECOVERY); + } + + __os_free(env, argp); + return (ret); +} + +/* + * __db_pg_init_recover -- + * Recovery function to reinit pages after truncation. + * + * PUBLIC: int __db_pg_init_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_init_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_init_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_LSN copy_lsn; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, ret, type; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__db_pg_init_print); + REC_INTRO(__db_pg_init_read, ip, 0); + + mpf = file_dbp->mpf; + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (DB_UNDO(op)) { + if (ret == DB_PAGE_NOTFOUND) + goto done; + else { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } + } + + /* + * This page was truncated and may simply not have + * had an item written to it yet. This should only + * happen on hash databases, so confirm that. + */ + DB_ASSERT(env, file_dbp->type == DB_HASH); + if ((ret = __memp_fget(mpf, &argp->pgno, + ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } + } + + (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN)); + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + + if (cmp_p == 0 && DB_REDO(op)) { + if (TYPE(pagep) == P_HASH) + type = P_HASH; + else + type = file_dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE; + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, PGNO(pagep), PGNO_INVALID, + PGNO_INVALID, TYPE(pagep) == P_HASH ? 0 : 1, type); + pagep->lsn = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Put the data back on the page. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + memcpy(pagep, argp->header.data, argp->header.size); + if (argp->data.size > 0) + memcpy((u_int8_t*)pagep + HOFFSET(pagep), + argp->data.data, argp->data.size); + } + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + +done: *lsnp = argp->prev_lsn; +out: + REC_CLOSE; +} + +/* + * __db_pg_trunc_recover -- + * Recovery function for pg_trunc. + * + * PUBLIC: int __db_pg_trunc_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_trunc_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ +#ifdef HAVE_FTRUNCATE + __db_pg_trunc_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DBMETA *meta; + DB_MPOOLFILE *mpf; + PAGE *pagep; + db_pglist_t *pglist, *lp; + db_pgno_t last_pgno, *list; + u_int32_t felem, nelem, pos; + int ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__db_pg_trunc_print); + REC_INTRO(__db_pg_trunc_read, ip, 1); + + pglist = (db_pglist_t *) argp->list.data; + nelem = argp->list.size / sizeof(db_pglist_t); + if (DB_REDO(op)) { + /* + * First call __db_pg_truncate to find the truncation + * point, truncate the file and return the new last_pgno. + */ + last_pgno = argp->last_pgno; + if ((ret = __db_pg_truncate(dbc, NULL, pglist, + NULL, &nelem, argp->next_free, &last_pgno, lsnp, 1)) != 0) + goto out; + + if (argp->last_free != PGNO_INVALID) { + /* + * Update the next pointer of the last page in + * the freelist. If the truncation point is + * beyond next_free then this is still in the freelist + * otherwise the last_free page is at the end. + */ + if ((ret = __memp_fget(mpf, + &argp->last_free, ip, NULL, 0, &meta)) == 0) { + if (LOG_COMPARE(&LSN(meta), + &argp->last_lsn) == 0) { + REC_DIRTY(mpf, + ip, dbc->priority, &meta); + if (pglist->pgno > last_pgno) + NEXT_PGNO(meta) = PGNO_INVALID; + else + NEXT_PGNO(meta) = pglist->pgno; + LSN(meta) = *lsnp; + } + if ((ret = __memp_fput(mpf, ip, + meta, file_dbp->priority)) != 0) + goto out; + meta = NULL; + } else if (ret != DB_PAGE_NOTFOUND) + goto out; + } + if ((ret = __memp_fget(mpf, &argp->meta, ip, NULL, + 0, &meta)) != 0) + goto out; + if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) { + REC_DIRTY(mpf, ip, dbc->priority, &meta); + if (argp->last_free == PGNO_INVALID) { + if (nelem == 0) + meta->free = PGNO_INVALID; + else + meta->free = pglist->pgno; + } + meta->last_pgno = last_pgno; + LSN(meta) = *lsnp; + } + } else { + /* Put the free list back in its original order. */ + for (lp = pglist; lp < &pglist[nelem]; lp++) { + if ((ret = __memp_fget(mpf, &lp->pgno, ip, + NULL, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + if (IS_ZERO_LSN(LSN(pagep)) || + LOG_COMPARE(&LSN(pagep), lsnp) == 0) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, lp->pgno, + PGNO_INVALID, lp->next_pgno, 0, P_INVALID); + LSN(pagep) = lp->lsn; + } + if ((ret = __memp_fput(mpf, + ip, pagep, file_dbp->priority)) != 0) + goto out; + } + /* + * Link the truncated part back into the free list. + * Its either after the last_free page or direclty + * linked to the metadata page. + */ + if (argp->last_free != PGNO_INVALID) { + if ((ret = __memp_fget(mpf, &argp->last_free, + ip, NULL, DB_MPOOL_EDIT, &meta)) == 0) { + if (LOG_COMPARE(&LSN(meta), lsnp) == 0) { + NEXT_PGNO(meta) = argp->next_free; + LSN(meta) = argp->last_lsn; + } + if ((ret = __memp_fput(mpf, ip, + meta, file_dbp->priority)) != 0) + goto out; + } else if (ret != DB_PAGE_NOTFOUND) + goto out; + meta = NULL; + } + if ((ret = __memp_fget(mpf, &argp->meta, + ip, NULL, DB_MPOOL_EDIT, &meta)) != 0) + goto out; + if (LOG_COMPARE(&LSN(meta), lsnp) == 0) { + REC_DIRTY(mpf, ip, dbc->priority, &meta); + /* + * If we had to break up the list last_pgno + * may only represent the end of the block. + */ + if (meta->last_pgno < argp->last_pgno) + meta->last_pgno = argp->last_pgno; + if (argp->last_free == PGNO_INVALID) + meta->free = argp->next_free; + LSN(meta) = argp->meta_lsn; + } + } + + if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) + goto out; + + if (op == DB_TXN_ABORT) { + /* + * Put the pages back on the in memory free list. + * If this is part of a multi-record truncate then + * we need to find this batch, it may not be at the end. + * If we aborted while writing one of the log records + * then this set may still be in the list. + */ + if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0) + goto out; + if (list != NULL) { + if (felem != 0 && list[felem - 1] > pglist->pgno) { + __db_freelist_pos( + pglist->pgno, list, felem, &pos); + DB_ASSERT(env, pos < felem); + if (pglist->pgno == list[pos]) + goto done; + pos++; + } else if (felem != 0 && + list[felem - 1] == pglist->pgno) + goto done; + else + pos = felem; + if ((ret = __memp_extend_freelist( + mpf, felem + nelem, &list)) != 0) + goto out; + if (pos != felem) + memmove(&list[nelem + pos], &list[pos], + sizeof(*list) * (felem - pos)); + for (lp = pglist; lp < &pglist[nelem]; lp++) + list[pos++] = lp->pgno; + } + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +#else + /* + * If HAVE_FTRUNCATE is not defined, we'll never see pg_trunc records + * to recover. + */ + COMPQUIET(env, NULL); + COMPQUIET(dbtp, NULL); + COMPQUIET(lsnp, NULL); + COMPQUIET(op, DB_TXN_ABORT); + COMPQUIET(info, NULL); + return (EINVAL); +#endif +} +/* + * __db_realloc_recover -- + * Recovery function for realloc. + * + * PUBLIC: int __db_realloc_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_realloc_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_realloc_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + db_pglist_t *pglist, *lp; + db_pgno_t *list; + u_int32_t felem, nelem, pos; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__db_realloc_print); + REC_INTRO(__db_realloc_read, ip, 1); + mpf = file_dbp->mpf; + + /* + * First, iterate over all the pages and make sure they are all in + * their prior or new states (according to the op). + */ + pglist = (db_pglist_t *) argp->list.data; + nelem = argp->list.size / sizeof(db_pglist_t); + for (lp = pglist; lp < &pglist[nelem]; lp++) { + if ((ret = __memp_fget(mpf, &lp->pgno, ip, + NULL, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + if (DB_REDO(op) && LOG_COMPARE(&LSN(pagep), &lp->lsn) == 0) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, lp->pgno, + PGNO_INVALID, PGNO_INVALID, 0, argp->ptype); + LSN(pagep) = *lsnp; + } else if (DB_UNDO(op) && (IS_ZERO_LSN(LSN(pagep)) || + LOG_COMPARE(&LSN(pagep), lsnp) == 0)) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, lp->pgno, + PGNO_INVALID, lp->next_pgno, 0, P_INVALID); + LSN(pagep) = lp->lsn; + } + if ((ret = __memp_fput(mpf, + ip, pagep, file_dbp->priority)) != 0) + goto out; + } + + /* Now, fix up the free list. */ + if ((ret = __memp_fget(mpf, + &argp->prev_pgno, ip, NULL, 0, &pagep)) != 0) + goto out; + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + + if (DB_REDO(op) && cmp_p == 0) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if (argp->prev_pgno == PGNO_BASE_MD) + ((DBMETA *)pagep)->free = argp->next_free; + else + NEXT_PGNO(pagep) = argp->next_free; + LSN(pagep) = *lsnp; + } else if (DB_UNDO(op) && cmp_n == 0) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if (argp->prev_pgno == PGNO_BASE_MD) + ((DBMETA *)pagep)->free = pglist->pgno; + else + NEXT_PGNO(pagep) = pglist->pgno; + LSN(pagep) = argp->page_lsn; + } + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + +#ifdef HAVE_FTRUNCATE + if (op == DB_TXN_ABORT) { + /* Put the pages back in the sorted list. */ + if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0) + goto out; + if (list != NULL) { + __db_freelist_pos(pglist->pgno, list, felem, &pos); + if (pglist->pgno == list[pos]) + goto done; + if ((ret = __memp_extend_freelist( + mpf, felem + nelem, &list)) != 0) + goto out; + pos++; + if (pos != felem) + memmove(&list[pos+nelem], + &list[pos], nelem * sizeof(*list)); + for (lp = pglist; lp < &pglist[nelem]; lp++) + list[pos++] = lp->pgno; + } + } +#endif + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} +/* + * __db_pg_sort_44_recover -- + * Recovery function for pg_sort. + * This is deprecated and kept for replication upgrades. + * + * PUBLIC: int __db_pg_sort_44_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_sort_44_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ +#ifdef HAVE_FTRUNCATE + __db_pg_sort_44_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DBMETA *meta; + DB_MPOOLFILE *mpf; + PAGE *pagep; + db_pglist_t *pglist, *lp; + db_pgno_t pgno, *list; + u_int32_t felem, nelem; + int ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__db_pg_sort_44_print); + REC_INTRO(__db_pg_sort_44_read, ip, 1); + + pglist = (db_pglist_t *) argp->list.data; + nelem = argp->list.size / sizeof(db_pglist_t); + if (DB_REDO(op)) { + pgno = argp->last_pgno; + __db_freelist_sort(pglist, nelem); + if ((ret = __db_pg_truncate(dbc, NULL, + pglist, NULL, &nelem, PGNO_INVALID, &pgno, lsnp, 1)) != 0) + goto out; + + if (argp->last_free != PGNO_INVALID) { + if ((ret = __memp_fget(mpf, + &argp->last_free, ip, NULL, 0, &meta)) == 0) { + if (LOG_COMPARE(&LSN(meta), + &argp->last_lsn) == 0) { + REC_DIRTY(mpf, + ip, dbc->priority, &meta); + NEXT_PGNO(meta) = PGNO_INVALID; + LSN(meta) = *lsnp; + } + if ((ret = __memp_fput(mpf, ip, + meta, file_dbp->priority)) != 0) + goto out; + meta = NULL; + } else if (ret != DB_PAGE_NOTFOUND) + goto out; + } + if ((ret = __memp_fget(mpf, &argp->meta, ip, NULL, + 0, &meta)) != 0) + goto out; + if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) { + REC_DIRTY(mpf, ip, dbc->priority, &meta); + if (argp->last_free == PGNO_INVALID) { + if (nelem == 0) + meta->free = PGNO_INVALID; + else + meta->free = pglist->pgno; + } + meta->last_pgno = pgno; + LSN(meta) = *lsnp; + } + } else { + /* Put the free list back in its original order. */ + for (lp = pglist; lp < &pglist[nelem]; lp++) { + if ((ret = __memp_fget(mpf, &lp->pgno, ip, + NULL, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + if (IS_ZERO_LSN(LSN(pagep)) || + LOG_COMPARE(&LSN(pagep), lsnp) == 0) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if (lp == &pglist[nelem - 1]) + pgno = PGNO_INVALID; + else + pgno = lp[1].pgno; + + P_INIT(pagep, file_dbp->pgsize, + lp->pgno, PGNO_INVALID, pgno, 0, P_INVALID); + LSN(pagep) = lp->lsn; + } + if ((ret = __memp_fput(mpf, + ip, pagep, file_dbp->priority)) != 0) + goto out; + } + if (argp->last_free != PGNO_INVALID) { + if ((ret = __memp_fget(mpf, &argp->last_free, + ip, NULL, DB_MPOOL_EDIT, &meta)) == 0) { + if (LOG_COMPARE(&LSN(meta), lsnp) == 0) { + NEXT_PGNO(meta) = pglist->pgno; + LSN(meta) = argp->last_lsn; + } + if ((ret = __memp_fput(mpf, ip, + meta, file_dbp->priority)) != 0) + goto out; + } else if (ret != DB_PAGE_NOTFOUND) + goto out; + meta = NULL; + } + if ((ret = __memp_fget(mpf, &argp->meta, + ip, NULL, DB_MPOOL_EDIT, &meta)) != 0) + goto out; + if (LOG_COMPARE(&LSN(meta), lsnp) == 0) { + REC_DIRTY(mpf, ip, dbc->priority, &meta); + meta->last_pgno = argp->last_pgno; + if (argp->last_free == PGNO_INVALID) + meta->free = pglist->pgno; + LSN(meta) = argp->meta_lsn; + } + } + if (op == DB_TXN_ABORT) { + if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0) + goto out; + if (list != NULL) { + DB_ASSERT(env, felem == 0 || + argp->last_free == list[felem - 1]); + if ((ret = __memp_extend_freelist( + mpf, felem + nelem, &list)) != 0) + goto out; + for (lp = pglist; lp < &pglist[nelem]; lp++) + list[felem++] = lp->pgno; + } + } + + if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) + goto out; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +#else + /* + * If HAVE_FTRUNCATE is not defined, we'll never see pg_sort records + * to recover. + */ + COMPQUIET(env, NULL); + COMPQUIET(dbtp, NULL); + COMPQUIET(lsnp, NULL); + COMPQUIET(op, DB_TXN_ABORT); + COMPQUIET(info, NULL); + return (EINVAL); +#endif +} + +/* + * __db_pg_alloc_42_recover -- + * Recovery function for pg_alloc. + * + * PUBLIC: int __db_pg_alloc_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_alloc_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_alloc_42_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DBMETA *meta; + DB_MPOOLFILE *mpf; + PAGE *pagep; + db_pgno_t pgno; + int cmp_n, cmp_p, created, level, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + meta = NULL; + pagep = NULL; + created = 0; + REC_PRINT(__db_pg_alloc_42_print); + REC_INTRO(__db_pg_alloc_42_read, ip, 0); + + /* + * Fix up the metadata page. If we're redoing the operation, we have + * to get the metadata page and update its LSN and its free pointer. + * If we're undoing the operation and the page was ever created, we put + * it on the freelist. + */ + pgno = PGNO_BASE_MD; + if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &meta)) != 0) { + /* The metadata page must always exist on redo. */ + if (DB_REDO(op)) { + ret = __db_pgerr(file_dbp, pgno, ret); + goto out; + } else + goto done; + } + cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); + cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); + CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &meta); + LSN(meta) = *lsnp; + meta->free = argp->next; + if (argp->pgno > meta->last_pgno) + meta->last_pgno = argp->pgno; + } else if (cmp_n == 0 && DB_UNDO(op)) { + goto no_rollback; + } + + /* + * Fix up the allocated page. If the page does not exist + * and we can truncate it then don't create it. + * Otherwise if we're redoing the operation, we have + * to get the page (creating it if it doesn't exist), and update its + * LSN. If we're undoing the operation, we have to reset the page's + * LSN and put it on the free list, or truncate it. + */ + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + /* + * We have to be able to identify if a page was newly + * created so we can recover it properly. We cannot simply + * look for an empty header, because hash uses a pgin + * function that will set the header. Instead, we explicitly + * try for the page without CREATE and if that fails, then + * create it. + */ + if ((ret = __memp_fget(mpf, &argp->pgno, + ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) { + if (DB_UNDO(op) && ret == ENOSPC) + goto do_truncate; + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } + created = 1; + } + + /* Fix up the allocated page. */ + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn); + + /* + * If an initial allocation is aborted and then reallocated during + * an archival restore the log record will have an LSN for the page + * but the page will be empty. + */ + if (IS_ZERO_LSN(LSN(pagep)) || + (IS_ZERO_LSN(argp->page_lsn) && IS_INIT_LSN(LSN(pagep)))) + cmp_p = 0; + + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn); + /* + * Another special case we have to handle is if we ended up with a + * page of all 0's which can happen if we abort between allocating a + * page in mpool and initializing it. In that case, even if we're + * undoing, we need to re-initialize the page. + */ + if (DB_REDO(op) && cmp_p == 0) { + /* Need to redo update described. */ + switch (argp->ptype) { + case P_LBTREE: + case P_LRECNO: + case P_LDUP: + level = LEAFLEVEL; + break; + default: + level = 0; + break; + } + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, + argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype); + + pagep->lsn = *lsnp; + } else if (DB_UNDO(op) && (cmp_n == 0 || created)) { + /* + * This is where we handle the case of a 0'd page (pagep->pgno + * is equal to PGNO_INVALID). + * Undo the allocation, reinitialize the page and + * link its next pointer to the free list. + */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, + argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); + + pagep->lsn = argp->page_lsn; + } + +do_truncate: + /* + * We cannot undo things from 4.2 land, because we nolonger + * have limbo processing. + */ + if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) && + IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) { +no_rollback: __db_errx(env, DB_STR("0643", +"Cannot replicate prepared transactions from master running release 4.2 ")); + ret = __env_panic(env, EINVAL); + } + + if (pagep != NULL && + (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + + if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) + goto out; + meta = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + if (meta != NULL) + (void)__memp_fput(mpf, ip, meta, file_dbp->priority); + REC_CLOSE; +} + +/* + * __db_pg_free_recover_42_int -- + */ +static int +__db_pg_free_recover_42_int(env, ip, argp, file_dbp, lsnp, mpf, op, data) + ENV *env; + DB_THREAD_INFO *ip; + __db_pg_freedata_42_args *argp; + DB *file_dbp; + DB_LSN *lsnp; + DB_MPOOLFILE *mpf; + db_recops op; + int data; +{ + DBMETA *meta; + DB_LSN copy_lsn; + PAGE *pagep, *prevp; + int cmp_n, cmp_p, is_meta, ret; + + meta = NULL; + pagep = NULL; + prevp = NULL; + + /* + * Get the "metapage". This will either be the metapage + * or the previous page in the free list if we are doing + * sorted allocations. If its a previous page then + * we will not be truncating. + */ + is_meta = argp->meta_pgno == PGNO_BASE_MD; + + REC_FGET(mpf, ip, argp->meta_pgno, &meta, check_meta); + + if (argp->meta_pgno != PGNO_BASE_MD) + prevp = (PAGE *)meta; + + cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); + cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); + CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); + + /* + * Fix up the metadata page. If we're redoing or undoing the operation + * we get the page and update its LSN, last and free pointer. + */ + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo the deallocation. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &meta); + if (prevp == NULL) + meta->free = argp->pgno; + else + NEXT_PGNO(prevp) = argp->pgno; + /* + * If this was a compensating transaction and + * we are a replica, then we never executed the + * original allocation which incremented meta->free. + */ + if (prevp == NULL && meta->last_pgno < meta->free) + meta->last_pgno = meta->free; + LSN(meta) = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to undo the deallocation. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &meta); + if (prevp == NULL) + meta->free = argp->next; + else + NEXT_PGNO(prevp) = argp->next; + LSN(meta) = argp->meta_lsn; + if (prevp == NULL && meta->last_pgno < argp->pgno) + meta->last_pgno = argp->pgno; + } + +check_meta: + if (ret != 0 && is_meta) { + /* The metadata page must always exist. */ + ret = __db_pgerr(file_dbp, argp->meta_pgno, ret); + goto out; + } + + /* + * Get the freed page. If we support truncate then don't + * create the page if we are going to free it. If we're + * redoing the operation we get the page and explicitly discard + * its contents, then update its LSN. If we're undoing the + * operation, we get the page and restore its header. + * If we don't support truncate, then we must create the page + * and roll it back. + */ + if ((ret = __memp_fget(mpf, &argp->pgno, + ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN)); + cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn); + + CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn); + if (DB_REDO(op) && + (cmp_p == 0 || + (IS_ZERO_LSN(copy_lsn) && + LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) { + /* Need to redo the deallocation. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, + argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); + pagep->lsn = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to reallocate the page. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + memcpy(pagep, argp->header.data, argp->header.size); + if (data) + memcpy((u_int8_t*)pagep + HOFFSET(pagep), + argp->data.data, argp->data.size); + } + if (pagep != NULL && + (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + + pagep = NULL; + if (meta != NULL && + (ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) + goto out; + meta = NULL; + + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + if (meta != NULL) + (void)__memp_fput(mpf, ip, meta, file_dbp->priority); + + return (ret); +} + +/* + * __db_pg_free_42_recover -- + * Recovery function for pg_free. + * + * PUBLIC: int __db_pg_free_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_free_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_free_42_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + int ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__db_pg_free_42_print); + REC_INTRO(__db_pg_free_42_read, ip, 0); + + ret = __db_pg_free_recover_42_int(env, ip, + (__db_pg_freedata_42_args *)argp, file_dbp, lsnp, mpf, op, 0); + +done: *lsnp = argp->prev_lsn; +out: + REC_CLOSE; +} + +/* + * __db_pg_freedata_42_recover -- + * Recovery function for pg_freedata. + * + * PUBLIC: int __db_pg_freedata_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pg_freedata_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_pg_freedata_42_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + int ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__db_pg_freedata_42_print); + REC_INTRO(__db_pg_freedata_42_read, ip, 0); + + ret = __db_pg_free_recover_42_int( + env, ip, argp, file_dbp, lsnp, mpf, op, 1); + +done: *lsnp = argp->prev_lsn; +out: + REC_CLOSE; +} + +/* + * __db_relink_42_recover -- + * Recovery function for relink. + * + * PUBLIC: int __db_relink_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_relink_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_relink_42_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__db_relink_42_print); + REC_INTRO(__db_relink_42_read, ip, 0); + + /* + * There are up to three pages we need to check -- the page, and the + * previous and next pages, if they existed. For a page add operation, + * the current page is the result of a split and is being recovered + * elsewhere, so all we need do is recover the next page. + */ + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (DB_REDO(op)) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } + goto next2; + } + if (argp->opcode == DB_ADD_PAGE_COMPAT) + goto next1; + + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); + if (cmp_p == 0 && DB_REDO(op)) { + /* Redo the relink. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->lsn = *lsnp; + } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { + /* Undo the relink. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->next_pgno = argp->next; + pagep->prev_pgno = argp->prev; + pagep->lsn = argp->lsn; + } +next1: if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +next2: if ((ret = __memp_fget(mpf, &argp->next, ip, NULL, 0, &pagep)) != 0) { + if (DB_REDO(op)) { + ret = __db_pgerr(file_dbp, argp->next, ret); + goto out; + } + goto prev; + } + modified = 0; + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next); + if ((argp->opcode == DB_REM_PAGE_COMPAT && cmp_p == 0 && DB_REDO(op)) || + (argp->opcode == DB_ADD_PAGE_COMPAT && cmp_n == 0 && DB_UNDO(op))) { + /* Redo the remove or undo the add. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->prev_pgno = argp->prev; + modified = 1; + } else if ((argp->opcode == DB_REM_PAGE_COMPAT && + cmp_n == 0 && DB_UNDO(op)) || + (argp->opcode == DB_ADD_PAGE_COMPAT && cmp_p == 0 && DB_REDO(op))) { + /* Undo the remove or redo the add. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->prev_pgno = argp->pgno; + modified = 1; + } + if (modified) { + if (DB_UNDO(op)) + pagep->lsn = argp->lsn_next; + else + pagep->lsn = *lsnp; + } + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + if (argp->opcode == DB_ADD_PAGE_COMPAT) + goto done; + +prev: if ((ret = __memp_fget(mpf, &argp->prev, ip, NULL, 0, &pagep)) != 0) { + if (DB_REDO(op)) { + ret = __db_pgerr(file_dbp, argp->prev, ret); + goto out; + } + goto done; + } + modified = 0; + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev); + if (cmp_p == 0 && DB_REDO(op)) { + /* Redo the relink. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->next_pgno = argp->next; + modified = 1; + } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { + /* Undo the relink. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->next_pgno = argp->pgno; + modified = 1; + } + if (modified) { + if (DB_UNDO(op)) + pagep->lsn = argp->lsn_prev; + else + pagep->lsn = *lsnp; + } + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * __db_relink_recover -- + * Recovery function for relink. + * + * PUBLIC: int __db_relink_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_relink_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_relink_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__db_relink_print); + REC_INTRO(__db_relink_read, ip, 0); + + /* + * There are up to three pages we need to check -- the page, and the + * previous and next pages, if they existed. For a page add operation, + * the current page is the result of a split and is being recovered + * elsewhere, so all we need do is recover the next page. + */ + if (argp->next_pgno == PGNO_INVALID) + goto prev; + if ((ret = __memp_fget(mpf, + &argp->next_pgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->next_pgno, ret); + goto out; + } else + goto prev; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + /* Redo the remove or replace. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + if (argp->new_pgno == PGNO_INVALID) + pagep->prev_pgno = argp->prev_pgno; + else + pagep->prev_pgno = argp->new_pgno; + + pagep->lsn = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Undo the remove or replace. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->prev_pgno = argp->pgno; + + pagep->lsn = argp->lsn_next; + } + + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +prev: if (argp->prev_pgno == PGNO_INVALID) + goto done; + if ((ret = __memp_fget(mpf, + &argp->prev_pgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->prev_pgno, ret); + goto out; + } else + goto done; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + /* Redo the relink. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + if (argp->new_pgno == PGNO_INVALID) + pagep->next_pgno = argp->next_pgno; + else + pagep->next_pgno = argp->new_pgno; + + pagep->lsn = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Undo the relink. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->next_pgno = argp->pgno; + pagep->lsn = argp->lsn_prev; + } + + if ((ret = __memp_fput(mpf, + ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * __db_merge_recover -- + * Recovery function for merge. + * + * PUBLIC: int __db_merge_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_merge_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __db_merge_args *argp; + BTREE *bt; + DB_THREAD_INFO *ip; + BKEYDATA *bk; + DB *file_dbp; + DBC *dbc; + DB_LOCK handle_lock; + DB_LOCKREQ request; + DB_MPOOLFILE *mpf; + HASH *ht; + PAGE *pagep; + db_indx_t indx, *ninp, *pinp; + u_int32_t size; + u_int8_t *bp; + int cmp_n, cmp_p, i, ret, t_ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__db_merge_print); + REC_INTRO(__db_merge_read, ip, op != DB_TXN_APPLY); + + /* Allocate our own cursor without DB_RECOVER as we need a locker. */ + if (op == DB_TXN_APPLY && (ret = __db_cursor_int(file_dbp, ip, NULL, + DB_QUEUE, PGNO_INVALID, 0, NULL, &dbc)) != 0) + goto out; + F_SET(dbc, DBC_RECOVER); + + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } else + goto next; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); + CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn); + CHECK_ABORT(file_dbp->env, op, cmp_n, &LSN(pagep), lsnp); + + if (cmp_p == 0 && DB_REDO(op)) { + /* + * When pg_copy is set, we are copying onto a new page. + */ + DB_ASSERT(env, !argp->pg_copy || NUM_ENT(pagep) == 0); + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if (argp->pg_copy) { + if (argp->data.size == 0) { + memcpy(pagep, argp->hdr.data, argp->hdr.size); + pagep->pgno = argp->pgno; + goto do_lsn; + } + P_INIT(pagep, file_dbp->pgsize, pagep->pgno, + PREV_PGNO(argp->hdr.data), + NEXT_PGNO(argp->hdr.data), + LEVEL(argp->hdr.data), TYPE(argp->hdr.data)); + } + if (TYPE(pagep) == P_OVERFLOW) { + OV_REF(pagep) = OV_REF(argp->hdr.data); + OV_LEN(pagep) = OV_LEN(argp->hdr.data); + bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp); + memcpy(bp, argp->data.data, argp->data.size); + } else { + /* Copy the data segment. */ + bp = (u_int8_t *)pagep + + (db_indx_t)(HOFFSET(pagep) - argp->data.size); + memcpy(bp, argp->data.data, argp->data.size); + + /* Copy index table offset past the current entries. */ + pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep); + ninp = P_INP(file_dbp, argp->hdr.data); + for (i = 0; i < NUM_ENT(argp->hdr.data); i++) + *pinp++ = *ninp++ + - (file_dbp->pgsize - HOFFSET(pagep)); + HOFFSET(pagep) -= argp->data.size; + NUM_ENT(pagep) += i; + } +do_lsn: pagep->lsn = *lsnp; + if (op == DB_TXN_APPLY) { + /* + * If applying to an active system we must bump + * the revision number so that the db will get + * reopened. We also need to move the handle + * locks. Note that the dbp will not have a + * locker in a replication client apply thread. + */ + if (file_dbp->type == DB_HASH) { + if (argp->npgno == file_dbp->meta_pgno) + file_dbp->mpf->mfp->revision++; + } else { + bt = file_dbp->bt_internal; + if (argp->npgno == bt->bt_meta || + argp->npgno == bt->bt_root) + file_dbp->mpf->mfp->revision++; + } + if (argp->npgno == file_dbp->meta_pgno) { + F_CLR(file_dbp, DB_AM_RECOVER); + if ((ret = __fop_lock_handle(file_dbp->env, + file_dbp, dbc->locker, DB_LOCK_READ, + NULL, 0)) != 0) + goto err; + handle_lock = file_dbp->handle_lock; + + file_dbp->meta_pgno = argp->pgno; + if ((ret = __fop_lock_handle(file_dbp->env, + file_dbp, dbc->locker, DB_LOCK_READ, + NULL, 0)) != 0) + goto err; + + /* Move the other handles to the new lock. */ + ret = __lock_change(file_dbp->env, + &handle_lock, &file_dbp->handle_lock); + +err: memset(&request, 0, sizeof(request)); + request.op = DB_LOCK_PUT_ALL; + if ((t_ret = __lock_vec( + file_dbp->env, dbc->locker, + 0, &request, 1, NULL)) != 0 && ret == 0) + ret = t_ret; + F_SET(file_dbp, DB_AM_RECOVER); + if (ret != 0) + goto out; + } + } + + } else if (cmp_n == 0 && !DB_REDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if (TYPE(pagep) == P_OVERFLOW) { + HOFFSET(pagep) = file_dbp->pgsize; + goto setlsn; + } + + if (argp->pg_copy) { + /* The page was empty when we started. */ + P_INIT(pagep, file_dbp->pgsize, + pagep->pgno, PGNO_INVALID, + PGNO_INVALID, 0, TYPE(argp->hdr.data)); + goto setlsn; + } + + /* + * Since logging is logical at the page level we cannot just + * truncate the data space. Delete the proper number of items + * from the logical end of the page. + */ + for (i = 0; i < NUM_ENT(argp->hdr.data); i++) { + indx = NUM_ENT(pagep) - 1; + if (TYPE(pagep) == P_LBTREE && indx != 0 && + P_INP(file_dbp, pagep)[indx] == + P_INP(file_dbp, pagep)[indx - P_INDX]) { + NUM_ENT(pagep)--; + continue; + } + switch (TYPE(pagep)) { + case P_LBTREE: + case P_LRECNO: + case P_LDUP: + bk = GET_BKEYDATA(file_dbp, pagep, indx); + size = BITEM_SIZE(bk); + break; + + case P_IBTREE: + size = BINTERNAL_SIZE( + GET_BINTERNAL(file_dbp, pagep, indx)->len); + break; + case P_IRECNO: + size = RINTERNAL_SIZE; + break; + case P_HASH: + size = LEN_HITEM(file_dbp, + pagep, file_dbp->pgsize, indx); + break; + default: + ret = __db_pgfmt(env, PGNO(pagep)); + goto out; + } + if ((ret = __db_ditem(dbc, pagep, indx, size)) != 0) + goto out; + } +setlsn: pagep->lsn = argp->lsn; + } + + if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + goto out; + +next: if ((ret = __memp_fget(mpf, &argp->npgno, ip, NULL, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } else + goto done; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nlsn); + CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->nlsn); + + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to truncate the page. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + HOFFSET(pagep) = file_dbp->pgsize; + NUM_ENT(pagep) = 0; + pagep->lsn = *lsnp; + } else if (cmp_n == 0 && !DB_REDO(op)) { + /* Need to put the data back on the page. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if (TYPE(pagep) == P_OVERFLOW) { + OV_REF(pagep) = OV_REF(argp->hdr.data); + OV_LEN(pagep) = OV_LEN(argp->hdr.data); + bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp); + memcpy(bp, argp->data.data, argp->data.size); + } else { + bp = (u_int8_t *)pagep + + (db_indx_t)(HOFFSET(pagep) - argp->data.size); + memcpy(bp, argp->data.data, argp->data.size); + + if (argp->pg_copy) + memcpy(pagep, argp->hdr.data, argp->hdr.size); + else { + /* Copy index table. */ + pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep); + ninp = P_INP(file_dbp, argp->hdr.data); + for (i = 0; i < NUM_ENT(argp->hdr.data); i++) + *pinp++ = *ninp++; + HOFFSET(pagep) -= argp->data.size; + NUM_ENT(pagep) += i; + } + } + pagep->lsn = argp->nlsn; + if (op == DB_TXN_ABORT) { + /* + * If we are undoing a meta/root page move we must + * bump the revision number. Put the handle + * locks back to their original state if we + * moved the metadata page. + */ + i = 0; + if (file_dbp->type == DB_HASH) { + ht = file_dbp->h_internal; + if (argp->pgno == ht->meta_pgno) { + ht->meta_pgno = argp->npgno; + file_dbp->mpf->mfp->revision++; + i = 1; + } + } else { + bt = file_dbp->bt_internal; + if (argp->pgno == bt->bt_meta) { + file_dbp->mpf->mfp->revision++; + bt->bt_meta = argp->npgno; + i = 1; + } else if (argp->pgno == bt->bt_root) { + file_dbp->mpf->mfp->revision++; + bt->bt_root = argp->npgno; + } + } + if (argp->pgno == file_dbp->meta_pgno) + file_dbp->meta_pgno = argp->npgno; + + /* + * If we detected a metadata page above, move + * the handle locks to the new page. + */ + if (i == 1) { + handle_lock = file_dbp->handle_lock; + if ((ret = __fop_lock_handle(file_dbp->env, + file_dbp, file_dbp->locker, DB_LOCK_READ, + NULL, 0)) != 0) + goto out; + + /* Move the other handles to the new lock. */ + if ((ret = __lock_change(file_dbp->env, + &handle_lock, &file_dbp->handle_lock)) != 0) + goto out; + } + } + } + + if ((ret = __memp_fput(mpf, + ip, pagep, dbc->priority)) != 0) + goto out; +done: + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_pgno_recover -- + * Recovery function for page number replacment. + * + * PUBLIC: int __db_pgno_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__db_pgno_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + BINTERNAL *bi; + __db_pgno_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep, *npagep; + db_pgno_t pgno, *pgnop; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__db_pgno_print); + REC_INTRO(__db_pgno_read, ip, 0); + + REC_FGET(mpf, ip, argp->pgno, &pagep, done); + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); + CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn); + CHECK_ABORT(file_dbp->env, op, cmp_n, &LSN(pagep), lsnp); + + if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && !DB_REDO(op))) { + switch (TYPE(pagep)) { + case P_IBTREE: + /* + * An internal record can have both a overflow + * and child pointer. Fetch the page to see + * which it is. + */ + bi = GET_BINTERNAL(file_dbp, pagep, argp->indx); + if (B_TYPE(bi->type) == B_OVERFLOW) { + REC_FGET(mpf, ip, argp->npgno, &npagep, out); + + if (TYPE(npagep) == P_OVERFLOW) + pgnop = + &((BOVERFLOW *)(bi->data))->pgno; + else + pgnop = &bi->pgno; + if ((ret = __memp_fput(mpf, ip, + npagep, file_dbp->priority)) != 0) + goto out; + break; + } + pgnop = &bi->pgno; + break; + case P_IRECNO: + pgnop = + &GET_RINTERNAL(file_dbp, pagep, argp->indx)->pgno; + break; + case P_HASH: + pgnop = &pgno; + break; + default: + pgnop = + &GET_BOVERFLOW(file_dbp, pagep, argp->indx)->pgno; + break; + } + + if (DB_REDO(op)) { + /* Need to redo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + *pgnop = argp->npgno; + pagep->lsn = *lsnp; + } else { + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + *pgnop = argp->opgno; + pagep->lsn = argp->lsn; + } + if (TYPE(pagep) == P_HASH) + memcpy(HOFFDUP_PGNO(P_ENTRY(file_dbp, + pagep, argp->indx)), pgnop, sizeof(db_pgno_t)); + } + + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + +done: + *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * __db_pglist_swap -- swap a list of freelist pages. + * PUBLIC: void __db_pglist_swap __P((u_int32_t, void *)); + */ +void +__db_pglist_swap(size, list) + u_int32_t size; + void *list; +{ + db_pglist_t *lp; + u_int32_t nelem; + + nelem = size / sizeof(db_pglist_t); + + lp = (db_pglist_t *)list; + while (nelem-- > 0) { + P_32_SWAP(&lp->pgno); + P_32_SWAP(&lp->lsn.file); + P_32_SWAP(&lp->lsn.offset); + lp++; + } +} + +/* + * __db_pglist_print -- print a list of freelist pages. + * PUBLIC: void __db_pglist_print __P((ENV *, DB_MSGBUF *, DBT *)); + */ +void +__db_pglist_print(env, mbp, list) + ENV *env; + DB_MSGBUF *mbp; + DBT *list; +{ + db_pglist_t *lp; + u_int32_t nelem; + + nelem = list->size / sizeof(db_pglist_t); + lp = (db_pglist_t *)list->data; + __db_msgadd(env, mbp, "\t"); + while (nelem-- > 0) { + __db_msgadd(env, mbp, "%lu [%lu][%lu]", (u_long)lp->pgno, + (u_long)lp->lsn.file, (u_long)lp->lsn.offset); + if (nelem % 4 == 0) + __db_msgadd(env, mbp, "\n\t"); + else + __db_msgadd(env, mbp, " "); + lp++; + } +} diff --git a/src/db/db_reclaim.c b/src/db/db_reclaim.c new file mode 100644 index 00000000..ea298d77 --- /dev/null +++ b/src/db/db_reclaim.c @@ -0,0 +1,245 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/mp.h" + +/* + * __db_traverse_big + * Traverse a chain of overflow pages and call the callback routine + * on each one. The calling convention for the callback is: + * callback(dbc, page, cookie, did_put), + * where did_put is a return value indicating if the page in question has + * already been returned to the mpool. + * + * PUBLIC: int __db_traverse_big __P((DBC *, db_pgno_t, + * PUBLIC: int (*)(DBC *, PAGE *, void *, int *), void *)); + */ +int +__db_traverse_big(dbc, pgno, callback, cookie) + DBC *dbc; + db_pgno_t pgno; + int (*callback) __P((DBC *, PAGE *, void *, int *)); + void *cookie; +{ + DB_MPOOLFILE *mpf; + PAGE *p; + int did_put, ret; + + mpf = dbc->dbp->mpf; + + do { + did_put = 0; + if ((ret = __memp_fget(mpf, + &pgno, dbc->thread_info, dbc->txn, 0, &p)) != 0) + return (ret); + /* + * If we are freeing pages only process the overflow + * chain if the head of the chain has a refcount of 1. + */ + pgno = NEXT_PGNO(p); + if (callback == __db_truncate_callback && OV_REF(p) != 1) + pgno = PGNO_INVALID; + if ((ret = callback(dbc, p, cookie, &did_put)) == 0 && + !did_put) + ret = __memp_fput(mpf, + dbc->thread_info, p, dbc->priority); + } while (ret == 0 && pgno != PGNO_INVALID); + + return (ret); +} + +/* + * __db_reclaim_callback + * This is the callback routine used during a delete of a subdatabase. + * we are traversing a btree or hash table and trying to free all the + * pages. Since they share common code for duplicates and overflow + * items, we traverse them identically and use this routine to do the + * actual free. The reason that this is callback is because hash uses + * the same traversal code for statistics gathering. + * + * PUBLIC: int __db_reclaim_callback __P((DBC *, PAGE *, void *, int *)); + */ +int +__db_reclaim_callback(dbc, p, cookie, putp) + DBC *dbc; + PAGE *p; + void *cookie; + int *putp; +{ + DB *dbp; + int ret; + + dbp = dbc->dbp; + + /* + * We don't want to log the free of the root with the subdb. + * If we abort then the subdb may not be openable to undo + * the free. + */ + if ((dbp->type == DB_BTREE || dbp->type == DB_RECNO) && + PGNO(p) == ((BTREE *)dbp->bt_internal)->bt_root) + return (0); + if ((ret = __db_free(dbc, p, P_TO_UINT32(cookie))) != 0) + return (ret); + *putp = 1; + + return (0); +} + +/* + * __db_truncate_callback + * This is the callback routine used during a truncate. + * we are traversing a btree or hash table and trying to free all the + * pages. + * + * PUBLIC: int __db_truncate_callback __P((DBC *, PAGE *, void *, int *)); + */ +int +__db_truncate_callback(dbc, p, cookie, putp) + DBC *dbc; + PAGE *p; + void *cookie; + int *putp; +{ + DB *dbp; + DBT ddbt, ldbt; + DB_MPOOLFILE *mpf; + db_indx_t indx, len, off, tlen, top; + u_int8_t *hk, type; + u_int32_t *countp; + int ret; + + top = NUM_ENT(p); + dbp = dbc->dbp; + mpf = dbp->mpf; + countp = cookie; + *putp = 1; + + switch (TYPE(p)) { + case P_LBTREE: + /* Skip for off-page duplicates and deleted items. */ + for (indx = 0; indx < top; indx += P_INDX) { + type = GET_BKEYDATA(dbp, p, indx + O_INDX)->type; + if (!B_DISSET(type) && B_TYPE(type) != B_DUPLICATE) + ++*countp; + } + /* FALLTHROUGH */ + case P_IBTREE: + case P_IRECNO: + case P_INVALID: + if (dbp->type != DB_HASH && + ((BTREE *)dbp->bt_internal)->bt_root == PGNO(p)) { + type = dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE; + goto reinit; + } + break; + case P_OVERFLOW: + if ((ret = __memp_dirty(mpf, + &p, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + return (ret); + if (DBC_LOGGING(dbc)) { + if ((ret = __db_ovref_log(dbp, dbc->txn, + &LSN(p), 0, p->pgno, -1, &LSN(p))) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(p)); + if (--OV_REF(p) != 0) + *putp = 0; + break; + case P_LRECNO: + for (indx = 0; indx < top; indx += O_INDX) { + type = GET_BKEYDATA(dbp, p, indx)->type; + if (!B_DISSET(type)) + ++*countp; + } + + if (((BTREE *)dbp->bt_internal)->bt_root == PGNO(p)) { + type = P_LRECNO; + goto reinit; + } + break; + case P_LDUP: + /* Correct for deleted items. */ + for (indx = 0; indx < top; indx += O_INDX) + if (!B_DISSET(GET_BKEYDATA(dbp, p, indx)->type)) + ++*countp; + + break; + case P_HASH: + /* Correct for on-page duplicates and deleted items. */ + for (indx = 0; indx < top; indx += P_INDX) { + switch (*H_PAIRDATA(dbp, p, indx)) { + case H_OFFDUP: + break; + case H_OFFPAGE: + case H_KEYDATA: + ++*countp; + break; + case H_DUPLICATE: + tlen = LEN_HDATA(dbp, p, 0, indx); + hk = H_PAIRDATA(dbp, p, indx); + for (off = 0; off < tlen; + off += len + 2 * sizeof(db_indx_t)) { + ++*countp; + memcpy(&len, + HKEYDATA_DATA(hk) + + off, sizeof(db_indx_t)); + } + break; + default: + return (__db_pgfmt(dbp->env, p->pgno)); + } + } + /* Don't free the head of the bucket. */ + if (PREV_PGNO(p) == PGNO_INVALID) { + type = P_HASH; + +reinit: if ((ret = __memp_dirty(mpf, &p, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + return (ret); + *putp = 0; + if (DBC_LOGGING(dbc)) { + memset(&ldbt, 0, sizeof(ldbt)); + memset(&ddbt, 0, sizeof(ddbt)); + ldbt.data = p; + ldbt.size = P_OVERHEAD(dbp); + ldbt.size += p->entries * sizeof(db_indx_t); + ddbt.data = (u_int8_t *)p + HOFFSET(p); + ddbt.size = dbp->pgsize - HOFFSET(p); + if ((ret = __db_pg_init_log(dbp, + dbc->txn, &LSN(p), 0, + p->pgno, &ldbt, &ddbt)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(p)); + + P_INIT(p, dbp->pgsize, PGNO(p), PGNO_INVALID, + PGNO_INVALID, type == P_HASH ? 0 : 1, type); + } + break; + default: + return (__db_pgfmt(dbp->env, p->pgno)); + } + + if (*putp == 1) { + if ((ret = __db_free(dbc, p, 0)) != 0) + return (ret); + } else { + if ((ret = __memp_fput(mpf, dbc->thread_info, p, + dbc->priority)) != 0) + return (ret); + *putp = 1; + } + + return (0); +} diff --git a/src/db/db_remove.c b/src/db/db_remove.c new file mode 100644 index 00000000..897e1899 --- /dev/null +++ b/src/db/db_remove.c @@ -0,0 +1,515 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/fop.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __db_dbtxn_remove __P((DB *, + DB_THREAD_INFO *, DB_TXN *, const char *, const char *)); +static int __db_subdb_remove __P((DB *, + DB_THREAD_INFO *, DB_TXN *, const char *, const char *, u_int32_t)); + +/* + * __env_dbremove_pp + * ENV->dbremove pre/post processing. + * + * PUBLIC: int __env_dbremove_pp __P((DB_ENV *, + * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t)); + */ +int +__env_dbremove_pp(dbenv, txn, name, subdb, flags) + DB_ENV *dbenv; + DB_TXN *txn; + const char *name, *subdb; + u_int32_t flags; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret, txn_local; + + dbp = NULL; + env = dbenv->env; + txn_local = 0; + handle_check = 0; + + ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->dbremove"); + + /* + * The actual argument checking is simple, do it inline, outside of + * the replication block. + */ + if ((ret = __db_fchk(env, "DB->remove", flags, + DB_AUTO_COMMIT | DB_LOG_NO_DATA | + DB_NOSYNC | DB_TXN_NOT_DURABLE)) != 0) + return (ret); + + ENV_ENTER(env, ip); + XA_NO_TXN(ip, ret); + if (ret != 0) + goto err; + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __env_rep_enter(env, 1)) != 0) { + handle_check = 0; + goto err; + } + + /* + * Create local transaction as necessary, check for consistent + * transaction usage. + */ + if (IS_ENV_AUTO_COMMIT(env, txn, flags)) { + if ((ret = __db_txn_auto_init(env, ip, &txn)) != 0) + goto err; + txn_local = 1; + } else if (txn != NULL && !TXN_ON(env) && + (!CDB_LOCKING(env) || !F_ISSET(txn, TXN_FAMILY))) { + ret = __db_not_txn_env(env); + goto err; + } else if (txn != NULL && LF_ISSET(DB_LOG_NO_DATA)) { + ret = EINVAL; + __db_errx(env, DB_STR("0690", + "DB_LOG_NO_DATA may not be specified within a transaction.")); + goto err; + } + LF_CLR(DB_AUTO_COMMIT); + + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + goto err; + if (LF_ISSET(DB_TXN_NOT_DURABLE) && + (ret = __db_set_flags(dbp, DB_TXN_NOT_DURABLE)) != 0) + goto err; + LF_CLR(DB_TXN_NOT_DURABLE); + + ret = __db_remove_int(dbp, ip, txn, name, subdb, flags); + + if (txn_local) { + /* + * We created the DBP here and when we commit/abort, we'll + * release all the transactional locks, including the handle + * lock; mark the handle cleared explicitly. + */ + LOCK_INIT(dbp->handle_lock); + dbp->locker = NULL; + } else if (IS_REAL_TXN(txn)) { + /* + * We created this handle locally so we need to close it + * and clean it up. Unfortunately, it's holding transactional + * locks that need to persist until the end of transaction. + * If we invalidate the locker id (dbp->locker), then the close + * won't free these locks prematurely. + */ + dbp->locker = NULL; + } + +err: if (txn_local && (t_ret = + __db_txn_auto_resolve(env, txn, 0, ret)) != 0 && ret == 0) + ret = t_ret; + + /* + * We never opened this dbp for real, so don't include a transaction + * handle, and use NOSYNC to avoid calling into mpool. + * + * !!! + * Note we're reversing the order of operations: we started the txn and + * then opened the DB handle; we're resolving the txn and then closing + * closing the DB handle -- a DB handle cannot be closed before + * resolving the txn. + */ + if (dbp != NULL && + (t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_remove_pp + * DB->remove pre/post processing. + * + * PUBLIC: int __db_remove_pp + * PUBLIC: __P((DB *, const char *, const char *, u_int32_t)); + */ +int +__db_remove_pp(dbp, name, subdb, flags) + DB *dbp; + const char *name, *subdb; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + env = dbp->env; + + /* + * Validate arguments, continuing to destroy the handle on failure. + * + * Cannot use DB_ILLEGAL_AFTER_OPEN directly because it returns. + * + * !!! + * We have a serious problem if we're here with a handle used to open + * a database -- we'll destroy the handle, and the application won't + * ever be able to close the database. + */ + if (F_ISSET(dbp, DB_AM_OPEN_CALLED)) + return (__db_mi_open(env, "DB->remove", 1)); + + /* Validate arguments. */ + if ((ret = __db_fchk(env, "DB->remove", flags, DB_NOSYNC)) != 0) + return (ret); + + /* Check for consistent transaction usage. */ + if ((ret = __db_check_txn(dbp, NULL, DB_LOCK_INVALIDID, 0)) != 0) + return (ret); + + ENV_ENTER(env, ip); + + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __db_rep_enter(dbp, 1, 1, 0)) != 0) { + handle_check = 0; + goto err; + } + + /* Remove the file. */ + ret = __db_remove(dbp, ip, NULL, name, subdb, flags); + + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + +err: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_remove + * DB->remove method. + * + * PUBLIC: int __db_remove __P((DB *, DB_THREAD_INFO *, + * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t)); + */ +int +__db_remove(dbp, ip, txn, name, subdb, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name, *subdb; + u_int32_t flags; +{ + int ret, t_ret; + + ret = __db_remove_int(dbp, ip, txn, name, subdb, flags); + + if ((t_ret = __db_close(dbp, txn, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_remove_int + * Worker function for the DB->remove method. + * + * PUBLIC: int __db_remove_int __P((DB *, DB_THREAD_INFO *, + * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t)); + */ +int +__db_remove_int(dbp, ip, txn, name, subdb, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name, *subdb; + u_int32_t flags; +{ + ENV *env; + int ret; + char *real_name, *tmpname; + + env = dbp->env; + real_name = tmpname = NULL; + + if (name == NULL && subdb == NULL) { + __db_errx(env, DB_STR("0691", + "Remove on temporary files invalid")); + ret = EINVAL; + goto err; + } + + if (name == NULL) { + MAKE_INMEM(dbp); + real_name = (char *)subdb; + } else if (subdb != NULL) { + ret = __db_subdb_remove(dbp, ip, txn, name, subdb, flags); + goto err; + } + + /* Handle transactional file removes separately. */ + if (IS_REAL_TXN(txn)) { + ret = __db_dbtxn_remove(dbp, ip, txn, name, subdb); + goto err; + } + + /* + * The remaining case is a non-transactional file remove. + * + * Find the real name of the file. + */ + if (!F_ISSET(dbp, DB_AM_INMEM) && (ret = __db_appname(env, + DB_APP_DATA, name, &dbp->dirname, &real_name)) != 0) + goto err; + + /* + * If this is a file and force is set, remove the temporary file, which + * may have been left around. Ignore errors because the temporary file + * might not exist. + */ + if (!F_ISSET(dbp, DB_AM_INMEM) && LF_ISSET(DB_FORCE) && + (ret = __db_backup_name(env, real_name, NULL, &tmpname)) == 0) + (void)__os_unlink(env, tmpname, 0); + + if ((ret = __fop_remove_setup(dbp, NULL, real_name, 0)) != 0) + goto err; + + if (dbp->db_am_remove != NULL && + (ret = dbp->db_am_remove(dbp, ip, NULL, name, subdb, flags)) != 0) + goto err; + + ret = F_ISSET(dbp, DB_AM_INMEM) ? + __db_inmem_remove(dbp, NULL, real_name) : + __fop_remove(env, + NULL, dbp->fileid, name, &dbp->dirname, DB_APP_DATA, + F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0); + +err: if (!F_ISSET(dbp, DB_AM_INMEM) && real_name != NULL) + __os_free(env, real_name); + if (tmpname != NULL) + __os_free(env, tmpname); + + return (ret); +} + +/* + * __db_inmem_remove -- + * Removal of a named in-memory database. + * + * PUBLIC: int __db_inmem_remove __P((DB *, DB_TXN *, const char *)); + */ +int +__db_inmem_remove(dbp, txn, name) + DB *dbp; + DB_TXN *txn; + const char *name; +{ + DBT fid_dbt, name_dbt; + DB_LOCKER *locker; + DB_LSN lsn; + ENV *env; + int ret; + + env = dbp->env; + locker = NULL; + + DB_ASSERT(env, name != NULL); + + /* This had better exist if we are trying to do a remove. */ + (void)__memp_set_flags(dbp->mpf, DB_MPOOL_NOFILE, 1); + if ((ret = __memp_fopen(dbp->mpf, NULL, + name, &dbp->dirname, 0, 0, 0)) != 0) + return (ret); + if ((ret = __memp_get_fileid(dbp->mpf, dbp->fileid)) != 0) + return (ret); + dbp->preserve_fid = 1; + + if (LOCKING_ON(env)) { + if (dbp->locker == NULL && + (ret = __lock_id(env, NULL, &dbp->locker)) != 0) + return (ret); + if (!CDB_LOCKING(env) && + txn != NULL && F_ISSET(txn, TXN_INFAMILY)) { + if ((ret = __lock_addfamilylocker(env, + txn->txnid, dbp->locker->id, 1)) != 0) + return (ret); + txn = NULL; + } + locker = txn == NULL ? dbp->locker : txn->locker; + } + + /* + * In a transactional environment, we'll play the same game we play + * for databases in the file system -- create a temporary database + * and put it in with the current name and then rename this one to + * another name. We'll then use a commit-time event to remove the + * entry. + */ + if ((ret = + __fop_lock_handle(env, dbp, locker, DB_LOCK_WRITE, NULL, 0)) != 0) + return (ret); + + if (!IS_REAL_TXN(txn)) + ret = __memp_nameop(env, dbp->fileid, NULL, name, NULL, 1); + else if (LOGGING_ON(env)) { + if (txn != NULL && (ret = + __txn_remevent(env, txn, name, dbp->fileid, 1)) != 0) + return (ret); + + DB_INIT_DBT(name_dbt, name, strlen(name) + 1); + DB_INIT_DBT(fid_dbt, dbp->fileid, DB_FILE_ID_LEN); + ret = __crdel_inmem_remove_log( + env, txn, &lsn, 0, &name_dbt, &fid_dbt); + } + + return (ret); +} + +/* + * __db_subdb_remove -- + * Remove a subdatabase. + */ +static int +__db_subdb_remove(dbp, ip, txn, name, subdb, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name, *subdb; + u_int32_t flags; +{ + DB *mdbp, *sdbp; + int ret, t_ret; + + mdbp = sdbp = NULL; + + /* Open the subdatabase. */ + if ((ret = __db_create_internal(&sdbp, dbp->env, 0)) != 0) + goto err; + if (F_ISSET(dbp, DB_AM_NOT_DURABLE) && + (ret = __db_set_flags(sdbp, DB_TXN_NOT_DURABLE)) != 0) + goto err; + if ((ret = __db_open(sdbp, ip, + txn, name, subdb, DB_UNKNOWN, DB_WRITEOPEN, 0, PGNO_BASE_MD)) != 0) + goto err; + + DB_TEST_RECOVERY(sdbp, DB_TEST_PREDESTROY, ret, name); + + /* Have the handle locked so we will not lock pages. */ + LOCK_CHECK_OFF(ip); + + /* Free up the pages in the subdatabase. */ + switch (sdbp->type) { + case DB_BTREE: + case DB_RECNO: + if ((ret = __bam_reclaim(sdbp, ip, txn, flags)) != 0) + goto err; + break; + case DB_HASH: + if ((ret = __ham_reclaim(sdbp, ip, txn, flags)) != 0) + goto err; + break; + case DB_QUEUE: + case DB_UNKNOWN: + default: + ret = __db_unknown_type( + sdbp->env, "__db_subdb_remove", sdbp->type); + goto err; + } + + /* + * Remove the entry from the main database and free the subdatabase + * metadata page. + */ + if ((ret = __db_master_open(sdbp, ip, txn, name, 0, 0, &mdbp)) != 0) + goto err; + + if ((ret = __db_master_update(mdbp, + sdbp, ip, txn, subdb, sdbp->type, MU_REMOVE, NULL, 0)) != 0) + goto err; + + DB_TEST_RECOVERY(sdbp, DB_TEST_POSTDESTROY, ret, name); + +DB_TEST_RECOVERY_LABEL +err: + /* Close the main and subdatabases. */ + if ((t_ret = __db_close(sdbp, txn, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + + if (mdbp != NULL && (t_ret = __db_close(mdbp, txn, + (LF_ISSET(DB_NOSYNC) || txn != NULL) ? DB_NOSYNC : 0)) != 0 && + ret == 0) + ret = t_ret; + + LOCK_CHECK_ON(ip); + return (ret); +} + +static int +__db_dbtxn_remove(dbp, ip, txn, name, subdb) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name, *subdb; +{ + ENV *env; + int ret; + char *tmpname; + + env = dbp->env; + tmpname = NULL; + + /* + * This is a transactional remove, so we have to keep the name + * of the file locked until the transaction commits. As a result, + * we implement remove by renaming the file to some other name + * (which creates a dummy named file as a placeholder for the + * file being rename/dremoved) and then deleting that file as + * a delayed remove at commit. + */ + if ((ret = __db_backup_name(env, + F_ISSET(dbp, DB_AM_INMEM) ? subdb : name, txn, &tmpname)) != 0) + return (ret); + + DB_TEST_RECOVERY(dbp, DB_TEST_PREDESTROY, ret, name); + + if ((ret = __db_rename_int(dbp, + txn->thread_info, txn, name, subdb, tmpname, DB_NOSYNC)) != 0) + goto err; + + /* + * The internal removes will also translate into delayed removes. + */ + if (dbp->db_am_remove != NULL && + (ret = dbp->db_am_remove(dbp, ip, txn, tmpname, NULL, 0)) != 0) + goto err; + + ret = F_ISSET(dbp, DB_AM_INMEM) ? + __db_inmem_remove(dbp, txn, tmpname) : + __fop_remove(env, + txn, dbp->fileid, tmpname, &dbp->dirname, DB_APP_DATA, + F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0); + + DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, name); + +err: +DB_TEST_RECOVERY_LABEL + if (tmpname != NULL) + __os_free(env, tmpname); + + return (ret); +} diff --git a/src/db/db_rename.c b/src/db/db_rename.c new file mode 100644 index 00000000..744b8aca --- /dev/null +++ b/src/db/db_rename.c @@ -0,0 +1,383 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/fop.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __db_rename __P((DB *, DB_THREAD_INFO *, + DB_TXN *, const char *, const char *, const char *, u_int32_t)); +static int __db_subdb_rename __P((DB *, DB_THREAD_INFO *, + DB_TXN *, const char *, const char *, const char *, u_int32_t)); + +/* + * __env_dbrename_pp + * ENV->dbrename pre/post processing. + * + * PUBLIC: int __env_dbrename_pp __P((DB_ENV *, DB_TXN *, + * PUBLIC: const char *, const char *, const char *, u_int32_t)); + */ +int +__env_dbrename_pp(dbenv, txn, name, subdb, newname, flags) + DB_ENV *dbenv; + DB_TXN *txn; + const char *name, *subdb, *newname; + u_int32_t flags; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret, txn_local; + + env = dbenv->env; + dbp = NULL; + txn_local = 0; + handle_check = 0; + + ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->dbrename"); + + /* + * The actual argument checking is simple, do it inline, outside of + * the replication block. + */ + if ((ret = __db_fchk(env, "DB->rename", flags, + DB_AUTO_COMMIT | DB_NOSYNC)) != 0) + return (ret); + + ENV_ENTER(env, ip); + XA_NO_TXN(ip, ret); + if (ret != 0) + goto err; + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __env_rep_enter(env, 1)) != 0) { + handle_check = 0; + goto err; + } + + /* + * Create local transaction as necessary, check for consistent + * transaction usage. + */ + if (IS_ENV_AUTO_COMMIT(env, txn, flags)) { + if ((ret = __db_txn_auto_init(env, ip, &txn)) != 0) + goto err; + txn_local = 1; + } else + if (txn != NULL && !TXN_ON(env) && + (!CDB_LOCKING(env) || !F_ISSET(txn, TXN_FAMILY))) { + ret = __db_not_txn_env(env); + goto err; + } + + LF_CLR(DB_AUTO_COMMIT); + + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + goto err; + + ret = __db_rename_int(dbp, ip, txn, name, subdb, newname, flags); + + if (txn_local) { + /* + * We created the DBP here and when we commit/abort, we'll + * release all the transactional locks, including the handle + * lock; mark the handle cleared explicitly. + */ + LOCK_INIT(dbp->handle_lock); + dbp->locker = NULL; + } else if (IS_REAL_TXN(txn)) { + /* + * We created this handle locally so we need to close it and + * clean it up. Unfortunately, it's holding transactional + * or CDS group locks that need to persist until the end of + * transaction. If we invalidate the locker (dbp->locker), + * then the close won't free these locks prematurely. + */ + dbp->locker = NULL; + } + +err: if (txn_local && (t_ret = + __db_txn_auto_resolve(env, txn, 0, ret)) != 0 && ret == 0) + ret = t_ret; + + /* + * We never opened this dbp for real, so don't include a transaction + * handle, and use NOSYNC to avoid calling into mpool. + * + * !!! + * Note we're reversing the order of operations: we started the txn and + * then opened the DB handle; we're resolving the txn and then closing + * closing the DB handle -- it's safer. + */ + if (dbp != NULL && + (t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_rename_pp + * DB->rename pre/post processing. + * + * PUBLIC: int __db_rename_pp __P((DB *, + * PUBLIC: const char *, const char *, const char *, u_int32_t)); + */ +int +__db_rename_pp(dbp, name, subdb, newname, flags) + DB *dbp; + const char *name, *subdb, *newname; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + env = dbp->env; + handle_check = 0; + + /* + * Validate arguments, continuing to destroy the handle on failure. + * + * Cannot use DB_ILLEGAL_AFTER_OPEN directly because it returns. + * + * !!! + * We have a serious problem if we're here with a handle used to open + * a database -- we'll destroy the handle, and the application won't + * ever be able to close the database. + */ + if (F_ISSET(dbp, DB_AM_OPEN_CALLED)) + return (__db_mi_open(env, "DB->rename", 1)); + + /* Validate arguments. */ + if ((ret = __db_fchk(env, "DB->rename", flags, DB_NOSYNC)) != 0) + return (ret); + + /* Check for consistent transaction usage. */ + if ((ret = __db_check_txn(dbp, NULL, DB_LOCK_INVALIDID, 0)) != 0) + return (ret); + + ENV_ENTER(env, ip); + + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __db_rep_enter(dbp, 1, 1, 0)) != 0) { + handle_check = 0; + goto err; + } + + /* Rename the file. */ + ret = __db_rename(dbp, ip, NULL, name, subdb, newname, flags); + + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; +err: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_rename + * DB->rename method. + * + */ +static int +__db_rename(dbp, ip, txn, name, subdb, newname, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name, *subdb, *newname; + u_int32_t flags; +{ + int ret, t_ret; + + ret = __db_rename_int(dbp, ip, txn, name, subdb, newname, flags); + + if ((t_ret = __db_close(dbp, txn, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_rename_int + * Worker function for DB->rename method; the close of the dbp is + * left in the wrapper routine. + * + * PUBLIC: int __db_rename_int __P((DB *, DB_THREAD_INFO *, + * PUBLIC: DB_TXN *, const char *, const char *, const char *, u_int32_t)); + */ +int +__db_rename_int(dbp, ip, txn, name, subdb, newname, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name, *subdb, *newname; + u_int32_t flags; +{ + ENV *env; + int ret; + char *old, *real_name; + + env = dbp->env; + real_name = NULL; + + DB_TEST_RECOVERY(dbp, DB_TEST_PREDESTROY, ret, name); + + if (name == NULL && subdb == NULL) { + __db_errx(env, DB_STR("0503", + "Rename on temporary files invalid")); + ret = EINVAL; + goto err; + } + + if (name == NULL) + MAKE_INMEM(dbp); + else if (subdb != NULL) { + ret = __db_subdb_rename(dbp, ip, + txn, name, subdb, newname, flags); + goto err; + } + + /* + * From here on down, this pertains to files or in-memory databases. + * + * Find the real name of the file. + */ + if (F_ISSET(dbp, DB_AM_INMEM)) { + old = (char *)subdb; + real_name = (char *)subdb; + } else { + if ((ret = __db_appname(env, DB_APP_DATA, + name, &dbp->dirname, &real_name)) != 0) + goto err; + old = (char *)name; + } + DB_ASSERT(env, old != NULL); + + if ((ret = __fop_remove_setup(dbp, txn, real_name, 0)) != 0) + goto err; + + if (dbp->db_am_rename != NULL && + (ret = dbp->db_am_rename(dbp, ip, txn, name, subdb, newname)) != 0) + goto err; + + /* + * The transactional case and non-transactional case are + * quite different. In the non-transactional case, we simply + * do the rename. In the transactional case, since we need + * the ability to back out and maintain locking, we have to + * create a temporary object as a placeholder. This is all + * taken care of in the fop layer. + */ + if (IS_REAL_TXN(txn)) { + if ((ret = __fop_dummy(dbp, txn, old, newname)) != 0) + goto err; + } else { + if ((ret = __fop_dbrename(dbp, old, newname)) != 0) + goto err; + } + + /* + * I am pretty sure that we haven't gotten a dbreg id, so calling + * dbreg_filelist_update is not necessary. + */ + DB_ASSERT(env, dbp->log_filename == NULL || + dbp->log_filename->id == DB_LOGFILEID_INVALID); + + DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, newname); + +DB_TEST_RECOVERY_LABEL +err: if (!F_ISSET(dbp, DB_AM_INMEM) && real_name != NULL) + __os_free(env, real_name); + + return (ret); +} + +/* + * __db_subdb_rename -- + * Rename a subdatabase. + */ +static int +__db_subdb_rename(dbp, ip, txn, name, subdb, newname, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name, *subdb, *newname; + u_int32_t flags; +{ + DB *mdbp; + ENV *env; + PAGE *meta; + int ret, t_ret; + + mdbp = NULL; + meta = NULL; + env = dbp->env; + + /* + * We have not opened this dbp so it isn't marked as a subdb, + * but it ought to be. + */ + F_SET(dbp, DB_AM_SUBDB); + + /* + * Rename the entry in the main database. We need to first + * get the meta-data page number (via MU_OPEN) so that we can + * read the meta-data page and obtain a handle lock. Once we've + * done that, we can proceed to do the rename in the master. + */ + if ((ret = __db_master_open(dbp, ip, txn, name, 0, 0, &mdbp)) != 0) + goto err; + + if ((ret = __db_master_update(mdbp, dbp, ip, txn, subdb, dbp->type, + MU_OPEN, NULL, 0)) != 0) + goto err; + + if ((ret = __memp_fget(mdbp->mpf, &dbp->meta_pgno, + ip, txn, 0, &meta)) != 0) + goto err; + memcpy(dbp->fileid, ((DBMETA *)meta)->uid, DB_FILE_ID_LEN); + if ((ret = __fop_lock_handle(env, dbp, + (mdbp->cur_locker != NULL) ? mdbp->cur_locker : mdbp->locker, + DB_LOCK_WRITE, NULL, NOWAIT_FLAG(txn))) != 0) + goto err; + + ret = __memp_fput(mdbp->mpf, ip, meta, dbp->priority); + meta = NULL; + if (ret != 0) + goto err; + + if ((ret = __db_master_update(mdbp, dbp, ip, txn, + subdb, dbp->type, MU_RENAME, newname, 0)) != 0) + goto err; + + DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, name); + +DB_TEST_RECOVERY_LABEL +err: + if (meta != NULL && (t_ret = + __memp_fput(mdbp->mpf, ip, meta, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + + if (mdbp != NULL && (t_ret = __db_close(mdbp, txn, + (LF_ISSET(DB_NOSYNC) || txn != NULL) ? DB_NOSYNC : 0)) != 0 && + ret == 0) + ret = t_ret; + + return (ret); +} diff --git a/src/db/db_ret.c b/src/db/db_ret.c new file mode 100644 index 00000000..02fec8bc --- /dev/null +++ b/src/db/db_ret.c @@ -0,0 +1,169 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/heap.h" + +/* + * __db_ret -- + * Build return DBT. + * + * PUBLIC: int __db_ret __P((DBC *, + * PUBLIC: PAGE *, u_int32_t, DBT *, void **, u_int32_t *)); + */ +int +__db_ret(dbc, h, indx, dbt, memp, memsize) + DBC *dbc; + PAGE *h; + u_int32_t indx; + DBT *dbt; + void **memp; + u_int32_t *memsize; +{ + BKEYDATA *bk; + BOVERFLOW *bo; + DB *dbp; + HEAPHDR *hdr; + HOFFPAGE ho; + u_int32_t len; + u_int8_t *hk; + void *data; + + if (F_ISSET(dbt, DB_DBT_READONLY)) + return (0); + dbp = dbc->dbp; + + switch (TYPE(h)) { + case P_HASH_UNSORTED: + case P_HASH: + hk = P_ENTRY(dbp, h, indx); + if (HPAGE_PTYPE(hk) == H_OFFPAGE) { + memcpy(&ho, hk, sizeof(HOFFPAGE)); + return (__db_goff(dbc, dbt, + ho.tlen, ho.pgno, memp, memsize)); + } + len = LEN_HKEYDATA(dbp, h, dbp->pgsize, indx); + data = HKEYDATA_DATA(hk); + break; + case P_HEAP: + hdr = (HEAPHDR *)P_ENTRY(dbp, h, indx); + if (F_ISSET(hdr,(HEAP_RECSPLIT | HEAP_RECFIRST))) + return (__heapc_gsplit(dbc, dbt, memp, memsize)); + len = hdr->size; + data = (u_int8_t *)hdr + sizeof(HEAPHDR); + break; + case P_LBTREE: + case P_LDUP: + case P_LRECNO: + bk = GET_BKEYDATA(dbp, h, indx); + if (B_TYPE(bk->type) == B_OVERFLOW) { + bo = (BOVERFLOW *)bk; + return (__db_goff(dbc, dbt, + bo->tlen, bo->pgno, memp, memsize)); + } + len = bk->len; + data = bk->data; + break; + default: + return (__db_pgfmt(dbp->env, h->pgno)); + } + + return (__db_retcopy(dbp->env, dbt, data, len, memp, memsize)); +} + +/* + * __db_retcopy -- + * Copy the returned data into the user's DBT, handling special flags. + * + * PUBLIC: int __db_retcopy __P((ENV *, DBT *, + * PUBLIC: void *, u_int32_t, void **, u_int32_t *)); + */ +int +__db_retcopy(env, dbt, data, len, memp, memsize) + ENV *env; + DBT *dbt; + void *data; + u_int32_t len; + void **memp; + u_int32_t *memsize; +{ + int ret; + + if (F_ISSET(dbt, DB_DBT_READONLY)) + return (0); + ret = 0; + + /* If returning a partial record, reset the length. */ + if (F_ISSET(dbt, DB_DBT_PARTIAL)) { + data = (u_int8_t *)data + dbt->doff; + if (len > dbt->doff) { + len -= dbt->doff; + if (len > dbt->dlen) + len = dbt->dlen; + } else + len = 0; + } + + /* + * Allocate memory to be owned by the application: DB_DBT_MALLOC, + * DB_DBT_REALLOC. + * + * !!! + * We always allocate memory, even if we're copying out 0 bytes. This + * guarantees consistency, i.e., the application can always free memory + * without concern as to how many bytes of the record were requested. + * + * Use the memory specified by the application: DB_DBT_USERMEM. + * + * !!! + * If the length we're going to copy is 0, the application-supplied + * memory pointer is allowed to be NULL. + */ + if (F_ISSET(dbt, DB_DBT_USERCOPY)) { + dbt->size = len; + return (len == 0 ? 0 : env->dbt_usercopy(dbt, 0, data, + len, DB_USERCOPY_SETDATA)); + + } else if (F_ISSET(dbt, DB_DBT_MALLOC)) + ret = __os_umalloc(env, len, &dbt->data); + else if (F_ISSET(dbt, DB_DBT_REALLOC)) { + if (dbt->data == NULL || dbt->size == 0 || dbt->size < len) + ret = __os_urealloc(env, len, &dbt->data); + } else if (F_ISSET(dbt, DB_DBT_USERMEM)) { + if (len != 0 && (dbt->data == NULL || dbt->ulen < len)) + ret = DB_BUFFER_SMALL; + } else if (memp == NULL || memsize == NULL) + ret = EINVAL; + else { + if (len != 0 && (*memsize == 0 || *memsize < len)) { + if ((ret = __os_realloc(env, len, memp)) == 0) + *memsize = len; + else + *memsize = 0; + } + if (ret == 0) + dbt->data = *memp; + } + + if (ret == 0 && len != 0) + memcpy(dbt->data, data, len); + + /* + * Return the length of the returned record in the DBT size field. + * This satisfies the requirement that if we're using user memory + * and insufficient memory was provided, return the amount necessary + * in the size field. + */ + dbt->size = len; + + return (ret); +} diff --git a/src/db/db_setid.c b/src/db/db_setid.c new file mode 100644 index 00000000..e4a274d2 --- /dev/null +++ b/src/db/db_setid.c @@ -0,0 +1,213 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/db_am.h" +#include "dbinc/mp.h" + +/* + * __env_fileid_reset_pp -- + * ENV->fileid_reset pre/post processing. + * + * PUBLIC: int __env_fileid_reset_pp __P((DB_ENV *, const char *, u_int32_t)); + */ +int +__env_fileid_reset_pp(dbenv, name, flags) + DB_ENV *dbenv; + const char *name; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->fileid_reset"); + + /* + * !!! + * The actual argument checking is simple, do it inline, outside of + * the replication block. + */ + if (flags != 0 && flags != DB_ENCRYPT) + return (__db_ferr(env, "DB_ENV->fileid_reset", 0)); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, + (__env_fileid_reset(env, ip, name, LF_ISSET(DB_ENCRYPT) ? 1 : 0)), + 1, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __env_fileid_reset -- + * Reset the file IDs for every database in the file. + * PUBLIC: int __env_fileid_reset + * PUBLIC: __P((ENV *, DB_THREAD_INFO *, const char *, int)); + */ +int +__env_fileid_reset(env, ip, name, encrypted) + ENV *env; + DB_THREAD_INFO *ip; + const char *name; + int encrypted; +{ + DB *dbp; + DBC *dbcp; + DBMETA *meta; + DBT key, data; + DB_FH *fhp; + DB_MPOOLFILE *mpf; + DB_PGINFO cookie; + db_pgno_t pgno; + int subdb, t_ret, ret; + size_t n; + char *real_name; + u_int8_t fileid[DB_FILE_ID_LEN], mbuf[DBMETASIZE]; + void *pagep; + + dbp = NULL; + dbcp = NULL; + fhp = NULL; + real_name = NULL; + + /* Get the real backing file name. */ + if ((ret = __db_appname(env, + DB_APP_DATA, name, NULL, &real_name)) != 0) + return (ret); + + /* Get a new file ID. */ + if ((ret = __os_fileid(env, real_name, 1, fileid)) != 0) + goto err; + + /* + * The user may have physically copied a file currently open in the + * cache, which means if we open this file through the cache before + * updating the file ID on page 0, we might connect to the file from + * which the copy was made. + */ + if ((ret = __os_open(env, real_name, 0, 0, 0, &fhp)) != 0) { + __db_err(env, ret, "%s", real_name); + goto err; + } + if ((ret = __os_read(env, fhp, mbuf, sizeof(mbuf), &n)) != 0) + goto err; + + if (n != sizeof(mbuf)) { + ret = EINVAL; + __db_errx(env, DB_STR_A("0675", + "__env_fileid_reset: %s: unexpected file type or format", + "%s"), real_name); + goto err; + } + + /* + * Create the DB object. + */ + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + goto err; + + /* If configured with a password, the databases are encrypted. */ + if (encrypted && (ret = __db_set_flags(dbp, DB_ENCRYPT)) != 0) + goto err; + + if ((ret = __db_meta_setup(env, + dbp, real_name, (DBMETA *)mbuf, 0, DB_CHK_META)) != 0) + goto err; + + meta = (DBMETA *)mbuf; + if (FLD_ISSET(meta->metaflags, + DBMETA_PART_RANGE | DBMETA_PART_CALLBACK) && (ret = + __part_fileid_reset(env, ip, name, meta->nparts, encrypted)) != 0) + goto err; + + subdb = meta->type == P_BTREEMETA && F_ISSET(meta, BTM_SUBDB); + + memcpy(meta->uid, fileid, DB_FILE_ID_LEN); + cookie.db_pagesize = sizeof(mbuf); + cookie.flags = dbp->flags; + cookie.type = dbp->type; + key.data = &cookie; + + if ((ret = __db_pgout(env->dbenv, 0, mbuf, &key)) != 0) + goto err; + if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) + goto err; + if ((ret = __os_write(env, fhp, mbuf, sizeof(mbuf), &n)) != 0) + goto err; + if ((ret = __os_fsync(env, fhp)) != 0) + goto err; + + /* + * Page 0 of the file has an updated file ID, and we can open it in + * the cache without connecting to a different, existing file. Open + * the file in the cache, and update the file IDs for subdatabases. + */ + + /* + * If the database file doesn't support subdatabases, we only have + * to update a single metadata page. Otherwise, we have to open a + * cursor and step through the master database, and update all of + * the subdatabases' metadata pages. + */ + if (!subdb) + goto err; + + /* + * Open the DB file. + * + * !!! + * Note DB_RDWRMASTER flag, we need to open the master database file + * for writing in this case. + */ + if ((ret = __db_open(dbp, ip, NULL, + name, NULL, DB_UNKNOWN, DB_RDWRMASTER, 0, PGNO_BASE_MD)) != 0) + goto err; + + mpf = dbp->mpf; + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + if ((ret = __db_cursor(dbp, ip, NULL, &dbcp, 0)) != 0) + goto err; + while ((ret = __dbc_get(dbcp, &key, &data, DB_NEXT)) == 0) { + /* + * XXX + * We're handling actual data, not on-page meta-data, so it + * hasn't been converted to/from opposite endian architectures. + * Do it explicitly, now. + */ + memcpy(&pgno, data.data, sizeof(db_pgno_t)); + DB_NTOHL_SWAP(env, &pgno); + if ((ret = __memp_fget(mpf, &pgno, ip, NULL, + DB_MPOOL_DIRTY, &pagep)) != 0) + goto err; + memcpy(((DBMETA *)pagep)->uid, fileid, DB_FILE_ID_LEN); + if ((ret = __memp_fput(mpf, ip, pagep, dbcp->priority)) != 0) + goto err; + } + if (ret == DB_NOTFOUND) + ret = 0; + +err: if (dbcp != NULL && (t_ret = __dbc_close(dbcp)) != 0 && ret == 0) + ret = t_ret; + if (dbp != NULL && (t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0) + ret = t_ret; + if (fhp != NULL && + (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0) + ret = t_ret; + if (real_name != NULL) + __os_free(env, real_name); + + return (ret); +} diff --git a/src/db/db_setlsn.c b/src/db/db_setlsn.c new file mode 100644 index 00000000..824fab85 --- /dev/null +++ b/src/db/db_setlsn.c @@ -0,0 +1,137 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" +#include "dbinc/qam.h" + +static int __env_lsn_reset __P((ENV *, DB_THREAD_INFO *, const char *, int)); + +/* + * __env_lsn_reset_pp -- + * ENV->lsn_reset pre/post processing. + * + * PUBLIC: int __env_lsn_reset_pp __P((DB_ENV *, const char *, u_int32_t)); + */ +int +__env_lsn_reset_pp(dbenv, name, flags) + DB_ENV *dbenv; + const char *name; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->lsn_reset"); + + /* + * !!! + * The actual argument checking is simple, do it inline, outside of + * the replication block. + */ + if (flags != 0 && flags != DB_ENCRYPT) + return (__db_ferr(env, "DB_ENV->lsn_reset", 0)); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, + (__env_lsn_reset(env, ip, name, LF_ISSET(DB_ENCRYPT) ? 1 : 0)), + 1, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __env_lsn_reset -- + * Reset the LSNs for every page in the file. + */ +static int +__env_lsn_reset(env, ip, name, encrypted) + ENV *env; + DB_THREAD_INFO *ip; + const char *name; + int encrypted; +{ + DB *dbp; + int t_ret, ret; + + /* Create the DB object. */ + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + return (ret); + + /* If configured with a password, the databases are encrypted. */ + if (encrypted && (ret = __db_set_flags(dbp, DB_ENCRYPT)) != 0) + goto err; + + /* + * Open the DB file. + * + * !!! + * Note DB_RDWRMASTER flag, we need to open the master database file + * for writing in this case. + */ + if ((ret = __db_open(dbp, ip, NULL, + name, NULL, DB_UNKNOWN, DB_RDWRMASTER, 0, PGNO_BASE_MD)) != 0) { + __db_err(env, ret, "%s", name); + goto err; + } + + ret = __db_lsn_reset(dbp->mpf, ip); +#ifdef HAVE_PARTITION + if (ret == 0 && DB_IS_PARTITIONED(dbp)) + ret = __part_lsn_reset(dbp, ip); + else +#endif + if (ret == 0 && dbp->type == DB_QUEUE) +#ifdef HAVE_QUEUE + ret = __qam_lsn_reset(dbp, ip); +#else + ret = __db_no_queue_am(env); +#endif + +err: if ((t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __db_lsn_reset -- reset the lsn for a db mpool handle. + * PUBLIC: int __db_lsn_reset __P((DB_MPOOLFILE *, DB_THREAD_INFO *)); + */ +int +__db_lsn_reset(mpf, ip) + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; +{ + PAGE *pagep; + db_pgno_t pgno; + int ret; + + /* Reset the LSN on every page of the database file. */ + for (pgno = 0; + (ret = __memp_fget(mpf, + &pgno, ip, NULL, DB_MPOOL_DIRTY, &pagep)) == 0; + ++pgno) { + LSN_NOT_LOGGED(pagep->lsn); + if ((ret = __memp_fput(mpf, + ip, pagep, DB_PRIORITY_UNCHANGED)) != 0) + break; + } + + if (ret == DB_PAGE_NOTFOUND) + ret = 0; + + return (ret); +} diff --git a/src/db/db_sort_multiple.c b/src/db/db_sort_multiple.c new file mode 100644 index 00000000..8ed61e58 --- /dev/null +++ b/src/db/db_sort_multiple.c @@ -0,0 +1,327 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" + +static int __db_quicksort __P((DB *, DBT *, DBT *, u_int32_t *, u_int32_t *, + u_int32_t *, u_int32_t *, u_int32_t)); + +/* + * __db_compare_both -- + * Use the comparison functions from db to compare akey and bkey, and if + * DB_DUPSORT adata and bdata. + * + * PUBLIC: int __db_compare_both __P((DB *, const DBT *, const DBT *, + * PUBLIC: const DBT *, const DBT *)); + */ +int +__db_compare_both(db, akey, adata, bkey, bdata) + DB *db; + const DBT *akey; + const DBT *adata; + const DBT *bkey; + const DBT *bdata; +{ + BTREE *t; + int cmp; + + t = (BTREE *)db->bt_internal; + + cmp = t->bt_compare(db, akey, bkey); + if (cmp != 0) return cmp; + if (!F_ISSET(db, DB_AM_DUPSORT)) + return (0); + + if (adata == 0) return bdata == 0 ? 0 : -1; + if (bdata == 0) return 1; + +#ifdef HAVE_COMPRESSION + if (DB_IS_COMPRESSED(db)) + return t->compress_dup_compare(db, adata, bdata); +#endif + return db->dup_compare(db, adata, bdata); +} + +#define DB_SORT_SWAP(a, ad, b, bd) \ +do { \ + tmp = (a)[0]; (a)[0] = (b)[0]; (b)[0] = tmp; \ + tmp = (a)[-1]; (a)[-1] = (b)[-1]; (b)[-1] = tmp; \ + if (data != NULL) { \ + tmp = (ad)[0]; (ad)[0] = (bd)[0]; (bd)[0] = tmp; \ + tmp = (ad)[-1]; (ad)[-1] = (bd)[-1]; (bd)[-1] = tmp; \ + } \ +} while (0) + +#define DB_SORT_LOAD_DBT(a, ad, aptr, adptr) \ +do { \ + (a).data = (u_int8_t*)key->data + (aptr)[0]; \ + (a).size = (aptr)[-1]; \ + if (data != NULL) { \ + (ad).data = (u_int8_t*)data->data + (adptr)[0]; \ + (ad).size = (adptr)[-1]; \ + } \ +} while (0) + +#define DB_SORT_COMPARE(a, ad, b, bd) (data != NULL ? \ + __db_compare_both(db, &(a), &(ad), &(b), &(bd)) : \ + __db_compare_both(db, &(a), 0, &(b), 0)) + +#define DB_SORT_STACKSIZE 32 + +/* + * __db_quicksort -- + * The quicksort implementation for __db_sort_multiple() and + * __db_sort_multiple_key(). + */ +static int +__db_quicksort(db, key, data, kstart, kend, dstart, dend, size) + DB *db; + DBT *key, *data; + u_int32_t *kstart, *kend, *dstart, *dend; + u_int32_t size; +{ + int ret, cmp; + u_int32_t tmp, len; + u_int32_t *kptr, *dptr, *kl, *dl, *kr, *dr; + DBT a, ad, b, bd, m, md; + ENV *env; + + struct DB_SORT_quicksort_stack { + u_int32_t *kstart; + u_int32_t *kend; + u_int32_t *dstart; + u_int32_t *dend; + } stackbuf[DB_SORT_STACKSIZE], *stack; + u_int32_t soff, slen; + + ret = 0; + env = db->env; + + memset(&a, 0, sizeof(DBT)); + memset(&ad, 0, sizeof(DBT)); + memset(&b, 0, sizeof(DBT)); + memset(&bd, 0, sizeof(DBT)); + memset(&m, 0, sizeof(DBT)); + memset(&md, 0, sizeof(DBT)); + + /* NB end is smaller than start */ + + stack = stackbuf; + soff = 0; + slen = DB_SORT_STACKSIZE; + + start: + if (kend >= kstart) goto pop; + + /* If there's only one value, it's already sorted */ + len = (u_int32_t)(kstart - kend) / size; + if (len == 1) goto pop; + + DB_SORT_LOAD_DBT(a, ad, kstart, dstart); + DB_SORT_LOAD_DBT(b, bd, kend + size, dend + size); + + if (len == 2) { + /* Special case the sorting of two value sequences */ + if (DB_SORT_COMPARE(a, ad, b, bd) > 0) { + DB_SORT_SWAP(kstart, dstart, kend + size, + dend + size); + } + goto pop; + } + + kptr = kstart - (len / 2) * size; + dptr = dstart - (len / 2) * size; + DB_SORT_LOAD_DBT(m, md, kptr, dptr); + + /* Find the median of three */ + if (DB_SORT_COMPARE(a, ad, b, bd) < 0) { + if (DB_SORT_COMPARE(m, md, a, ad) < 0) { + /* m < a < b */ + if (len == 3) { + DB_SORT_SWAP(kstart, dstart, kptr, dptr); + goto pop; + } + DB_SORT_SWAP(kstart, dstart, kend + size, dend + size); + } else if (DB_SORT_COMPARE(m, md, b, bd) < 0) { + /* a <= m < b */ + if (len == 3) { + goto pop; + } + DB_SORT_SWAP(kptr, dptr, kend + size, dend + size); + } else { + /* a < b <= m */ + if (len == 3) { + DB_SORT_SWAP(kptr, dptr, kend + size, + dend + size); + goto pop; + } + /* Do nothing */ + } + } else { + if (DB_SORT_COMPARE(a, ad, m, md) < 0) { + /* b <= a < m */ + DB_SORT_SWAP(kstart, dstart, kend + size, + dend + size); + if (len == 3) { + DB_SORT_SWAP(kptr, dptr, kend + size, + dend + size); + goto pop; + } + } else if (DB_SORT_COMPARE(b, bd, m, md) < 0) { + /* b < m <= a */ + if (len == 3) { + DB_SORT_SWAP(kstart, dstart, kend + size, + dend + size); + goto pop; + } + DB_SORT_SWAP(kptr, dptr, kend + size, dend + size); + } else { + /* m <= b <= a */ + if (len == 3) { + DB_SORT_SWAP(kstart, dstart, kptr, dptr); + DB_SORT_SWAP(kptr, dptr, kend + size, + dend + size); + goto pop; + } + /* Do nothing */ + } + } + + /* partition */ + DB_SORT_LOAD_DBT(b, bd, kend + size, dend + size); + kl = kstart; + dl = dstart; + kr = kend + size; + dr = dend + size; + kptr = kstart; + dptr = dstart; + while (kptr >= kr) { + DB_SORT_LOAD_DBT(a, ad, kptr, dptr); + cmp = DB_SORT_COMPARE(a, ad, b, bd); + if (cmp < 0) { + DB_SORT_SWAP(kl, dl, kptr, dptr); + kl -= size; + dl -= size; + kptr -= size; + dptr -= size; + } else if (cmp > 0) { + DB_SORT_SWAP(kr, dr, kptr, dptr); + kr += size; + dr += size; + } else { + kptr -= size; + dptr -= size; + } + } + + if (soff == slen) { + /* Grow the stack */ + slen = slen * 2; + if (stack == stackbuf) { + ret = __os_malloc(env, slen * + sizeof(struct DB_SORT_quicksort_stack), &stack); + if (ret != 0) goto error; + memcpy(stack, stackbuf, soff * + sizeof(struct DB_SORT_quicksort_stack)); + } else { + ret = __os_realloc(env, slen * + sizeof(struct DB_SORT_quicksort_stack), &stack); + if (ret != 0) goto error; + } + } + + /* divide and conquer */ + stack[soff].kstart = kr - size; + stack[soff].kend = kend; + stack[soff].dstart = dr - size; + stack[soff].dend = dend; + ++soff; + + kend = kl; + dend = dl; + + goto start; + + pop: + if (soff != 0) { + --soff; + kstart = stack[soff].kstart; + kend = stack[soff].kend; + dstart = stack[soff].dstart; + dend = stack[soff].dend; + goto start; + } + + error: + if (stack != stackbuf) + __os_free(env, stack); + + return (ret); +} + +#undef DB_SORT_SWAP +#undef DB_SORT_LOAD_DBT + +/* + * __db_sort_multiple -- + * If flags == DB_MULTIPLE_KEY, sorts a DB_MULTIPLE_KEY format DBT using + * the BTree comparison function and duplicate comparison function. + * + * If flags == DB_MULTIPLE, sorts one or two DB_MULTIPLE format DBTs using + * the BTree comparison function and duplicate comparison function. Will + * assume key and data specifies pairs of key/data to sort together. If + * data is NULL, will just sort key according to the btree comparison + * function. + * + * Uses an in-place quicksort algorithm, with median of three for the pivot + * point. + * + * PUBLIC: int __db_sort_multiple __P((DB *, DBT *, DBT *, u_int32_t)); + */ +int +__db_sort_multiple(db, key, data, flags) + DB *db; + DBT *key, *data; + u_int32_t flags; +{ + u_int32_t *kstart, *kend, *dstart, *dend; + + /* TODO: sanity checks on the DBTs */ + /* DB_ILLEGAL_METHOD(db, DB_OK_BTREE); */ + + kstart = (u_int32_t*)((u_int8_t *)key->data + key->ulen) - 1; + + switch (flags) { + case DB_MULTIPLE: + if (data != NULL) + dstart = (u_int32_t*)((u_int8_t *)data->data + + data->ulen) - 1; + else + dstart = kstart; + + /* Find the end */ + for (kend = kstart, dend = dstart; + *kend != (u_int32_t)-1 && *dend != (u_int32_t)-1; + kend -= 2, dend -= 2) + ; + + return (__db_quicksort(db, key, data, kstart, kend, dstart, + dend, 2)); + case DB_MULTIPLE_KEY: + /* Find the end */ + for (kend = kstart; *kend != (u_int32_t)-1; kend -= 4) + ; + + return (__db_quicksort(db, key, key, kstart, kend, kstart - 2, + kend - 2, 4)); + default: + return (__db_ferr(db->env, "DB->sort_multiple", 0)); + } +} diff --git a/src/db/db_stati.c b/src/db/db_stati.c new file mode 100644 index 00000000..418b470b --- /dev/null +++ b/src/db/db_stati.c @@ -0,0 +1,502 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/qam.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" + +#ifdef HAVE_STATISTICS +static int __db_print_all __P((DB *, u_int32_t)); +static int __db_print_citem __P((DBC *)); +static int __db_print_cursor __P((DB *)); +static int __db_print_stats __P((DB *, DB_THREAD_INFO *, u_int32_t)); +static int __db_stat __P((DB *, DB_THREAD_INFO *, DB_TXN *, void *, u_int32_t)); +static int __db_stat_arg __P((DB *, u_int32_t)); + +/* + * __db_stat_pp -- + * DB->stat pre/post processing. + * + * PUBLIC: int __db_stat_pp __P((DB *, DB_TXN *, void *, u_int32_t)); + */ +int +__db_stat_pp(dbp, txn, spp, flags) + DB *dbp; + DB_TXN *txn; + void *spp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + env = dbp->env; + + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat"); + + if ((ret = __db_stat_arg(dbp, flags)) != 0) + return (ret); + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, + IS_REAL_TXN(txn))) != 0) { + handle_check = 0; + goto err; + } + + ret = __db_stat(dbp, ip, txn, spp, flags); + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + +err: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_stat -- + * DB->stat. + * + */ +static int +__db_stat(dbp, ip, txn, spp, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + void *spp; + u_int32_t flags; +{ + DBC *dbc; + ENV *env; + int ret, t_ret; + + env = dbp->env; + + /* Acquire a cursor. */ + if ((ret = __db_cursor(dbp, ip, txn, + &dbc, LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED))) != 0) + return (ret); + + DEBUG_LWRITE(dbc, NULL, "DB->stat", NULL, NULL, flags); + LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED); +#ifdef HAVE_PARTITION + if (DB_IS_PARTITIONED(dbp)) + ret = __partition_stat(dbc, spp, flags); + else +#endif + switch (dbp->type) { + case DB_BTREE: + case DB_RECNO: + ret = __bam_stat(dbc, spp, flags); + break; + case DB_HASH: + ret = __ham_stat(dbc, spp, flags); + break; + case DB_HEAP: + ret = __heap_stat(dbc, spp, flags); + break; + case DB_QUEUE: + ret = __qam_stat(dbc, spp, flags); + break; + case DB_UNKNOWN: + default: + ret = (__db_unknown_type(env, "DB->stat", dbp->type)); + break; + } + + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_stat_arg -- + * Check DB->stat arguments. + */ +static int +__db_stat_arg(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + ENV *env; + + env = dbp->env; + + /* Check for invalid function flags. */ + LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED); + switch (flags) { + case 0: + case DB_FAST_STAT: + break; + default: + return (__db_ferr(env, "DB->stat", 0)); + } + + return (0); +} + +/* + * __db_stat_print_pp -- + * DB->stat_print pre/post processing. + * + * PUBLIC: int __db_stat_print_pp __P((DB *, u_int32_t)); + */ +int +__db_stat_print_pp(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + env = dbp->env; + + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat_print"); + + /* + * !!! + * The actual argument checking is simple, do it inline. + */ + if ((ret = __db_fchk(env, + "DB->stat_print", flags, DB_FAST_STAT | DB_STAT_ALL)) != 0) + return (ret); + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) { + handle_check = 0; + goto err; + } + + ret = __db_stat_print(dbp, ip, flags); + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + +err: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_stat_print -- + * DB->stat_print. + * + * PUBLIC: int __db_stat_print __P((DB *, DB_THREAD_INFO *, u_int32_t)); + */ +int +__db_stat_print(dbp, ip, flags) + DB *dbp; + DB_THREAD_INFO *ip; + u_int32_t flags; +{ + time_t now; + int ret; + char time_buf[CTIME_BUFLEN]; + + (void)time(&now); + __db_msg(dbp->env, "%.24s\tLocal time", __os_ctime(&now, time_buf)); + + if (LF_ISSET(DB_STAT_ALL) && (ret = __db_print_all(dbp, flags)) != 0) + return (ret); + + if ((ret = __db_print_stats(dbp, ip, flags)) != 0) + return (ret); + + return (0); +} + +/* + * __db_print_stats -- + * Display default DB handle statistics. + */ +static int +__db_print_stats(dbp, ip, flags) + DB *dbp; + DB_THREAD_INFO *ip; + u_int32_t flags; +{ + DBC *dbc; + ENV *env; + int ret, t_ret; + + env = dbp->env; + + /* Acquire a cursor. */ + if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0) + return (ret); + + DEBUG_LWRITE(dbc, NULL, "DB->stat_print", NULL, NULL, 0); + + switch (dbp->type) { + case DB_BTREE: + case DB_RECNO: + ret = __bam_stat_print(dbc, flags); + break; + case DB_HASH: + ret = __ham_stat_print(dbc, flags); + break; + case DB_HEAP: + ret = __heap_stat_print(dbc, flags); + break; + case DB_QUEUE: + ret = __qam_stat_print(dbc, flags); + break; + case DB_UNKNOWN: + default: + ret = (__db_unknown_type(env, "DB->stat_print", dbp->type)); + break; + } + + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_print_all -- + * Display debugging DB handle statistics. + */ +static int +__db_print_all(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + static const FN fn[] = { + { DB_AM_CHKSUM, "DB_AM_CHKSUM" }, + { DB_AM_COMPENSATE, "DB_AM_COMPENSATE" }, + { DB_AM_CREATED, "DB_AM_CREATED" }, + { DB_AM_CREATED_MSTR, "DB_AM_CREATED_MSTR" }, + { DB_AM_DBM_ERROR, "DB_AM_DBM_ERROR" }, + { DB_AM_DELIMITER, "DB_AM_DELIMITER" }, + { DB_AM_DISCARD, "DB_AM_DISCARD" }, + { DB_AM_DUP, "DB_AM_DUP" }, + { DB_AM_DUPSORT, "DB_AM_DUPSORT" }, + { DB_AM_ENCRYPT, "DB_AM_ENCRYPT" }, + { DB_AM_FIXEDLEN, "DB_AM_FIXEDLEN" }, + { DB_AM_INMEM, "DB_AM_INMEM" }, + { DB_AM_IN_RENAME, "DB_AM_IN_RENAME" }, + { DB_AM_NOT_DURABLE, "DB_AM_NOT_DURABLE" }, + { DB_AM_OPEN_CALLED, "DB_AM_OPEN_CALLED" }, + { DB_AM_PAD, "DB_AM_PAD" }, + { DB_AM_PGDEF, "DB_AM_PGDEF" }, + { DB_AM_RDONLY, "DB_AM_RDONLY" }, + { DB_AM_READ_UNCOMMITTED, "DB_AM_READ_UNCOMMITTED" }, + { DB_AM_RECNUM, "DB_AM_RECNUM" }, + { DB_AM_RECOVER, "DB_AM_RECOVER" }, + { DB_AM_RENUMBER, "DB_AM_RENUMBER" }, + { DB_AM_REVSPLITOFF, "DB_AM_REVSPLITOFF" }, + { DB_AM_SECONDARY, "DB_AM_SECONDARY" }, + { DB_AM_SNAPSHOT, "DB_AM_SNAPSHOT" }, + { DB_AM_SUBDB, "DB_AM_SUBDB" }, + { DB_AM_SWAP, "DB_AM_SWAP" }, + { DB_AM_TXN, "DB_AM_TXN" }, + { DB_AM_VERIFYING, "DB_AM_VERIFYING" }, + { 0, NULL } + }; + ENV *env; + char time_buf[CTIME_BUFLEN]; + + env = dbp->env; + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "DB handle information:"); + STAT_ULONG("Page size", dbp->pgsize); + STAT_ISSET("Append recno", dbp->db_append_recno); + STAT_ISSET("Feedback", dbp->db_feedback); + STAT_ISSET("Dup compare", dbp->dup_compare); + STAT_ISSET("App private", dbp->app_private); + STAT_ISSET("DbEnv", dbp->env); + STAT_STRING("Type", __db_dbtype_to_string(dbp->type)); + + __mutex_print_debug_single(env, "Thread mutex", dbp->mutex, flags); + + STAT_STRING("File", dbp->fname); + STAT_STRING("Database", dbp->dname); + STAT_HEX("Open flags", dbp->open_flags); + + __db_print_fileid(env, dbp->fileid, "\tFile ID"); + + STAT_ULONG("Cursor adjust ID", dbp->adj_fileid); + STAT_ULONG("Meta pgno", dbp->meta_pgno); + if (dbp->locker != NULL) + STAT_ULONG("Locker ID", dbp->locker->id); + if (dbp->cur_locker != NULL) + STAT_ULONG("Handle lock", dbp->cur_locker->id); + if (dbp->associate_locker != NULL) + STAT_ULONG("Associate lock", dbp->associate_locker->id); + + __db_msg(env, + "%.24s\tReplication handle timestamp", + dbp->timestamp == 0 ? "0" : __os_ctime(&dbp->timestamp, time_buf)); + + STAT_ISSET("Secondary callback", dbp->s_callback); + STAT_ISSET("Primary handle", dbp->s_primary); + + STAT_ISSET("api internal", dbp->api_internal); + STAT_ISSET("Btree/Recno internal", dbp->bt_internal); + STAT_ISSET("Hash internal", dbp->h_internal); + STAT_ISSET("Queue internal", dbp->q_internal); + + __db_prflags(env, NULL, dbp->flags, fn, NULL, "\tFlags"); + + if (dbp->log_filename == NULL) + STAT_ISSET("File naming information", dbp->log_filename); + else + __dbreg_print_fname(env, dbp->log_filename); + + (void)__db_print_cursor(dbp); + + return (0); +} + +/* + * __db_print_cursor -- + * Display the cursor active and free queues. + */ +static int +__db_print_cursor(dbp) + DB *dbp; +{ + DBC *dbc; + ENV *env; + int ret, t_ret; + + env = dbp->env; + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "DB handle cursors:"); + + ret = 0; + MUTEX_LOCK(dbp->env, dbp->mutex); + __db_msg(env, "Active queue:"); + TAILQ_FOREACH(dbc, &dbp->active_queue, links) + if ((t_ret = __db_print_citem(dbc)) != 0 && ret == 0) + ret = t_ret; + __db_msg(env, "Join queue:"); + TAILQ_FOREACH(dbc, &dbp->join_queue, links) + if ((t_ret = __db_print_citem(dbc)) != 0 && ret == 0) + ret = t_ret; + __db_msg(env, "Free queue:"); + TAILQ_FOREACH(dbc, &dbp->free_queue, links) + if ((t_ret = __db_print_citem(dbc)) != 0 && ret == 0) + ret = t_ret; + MUTEX_UNLOCK(dbp->env, dbp->mutex); + + return (ret); +} + +static int +__db_print_citem(dbc) + DBC *dbc; +{ + static const FN fn[] = { + { DBC_ACTIVE, "DBC_ACTIVE" }, + { DBC_DONTLOCK, "DBC_DONTLOCK" }, + { DBC_MULTIPLE, "DBC_MULTIPLE" }, + { DBC_MULTIPLE_KEY, "DBC_MULTIPLE_KEY" }, + { DBC_OPD, "DBC_OPD" }, + { DBC_OWN_LID, "DBC_OWN_LID" }, + { DBC_READ_COMMITTED, "DBC_READ_COMMITTED" }, + { DBC_READ_UNCOMMITTED, "DBC_READ_UNCOMMITTED" }, + { DBC_RECOVER, "DBC_RECOVER" }, + { DBC_RMW, "DBC_RMW" }, + { DBC_TRANSIENT, "DBC_TRANSIENT" }, + { DBC_WAS_READ_COMMITTED,"DBC_WAS_READ_COMMITTED" }, + { DBC_WRITECURSOR, "DBC_WRITECURSOR" }, + { DBC_WRITER, "DBC_WRITER" }, + { 0, NULL } + }; + DB *dbp; + DBC_INTERNAL *cp; + ENV *env; + + dbp = dbc->dbp; + env = dbp->env; + cp = dbc->internal; + + STAT_POINTER("DBC", dbc); + STAT_POINTER("Associated dbp", dbc->dbp); + STAT_POINTER("Associated txn", dbc->txn); + STAT_POINTER("Internal", cp); + STAT_HEX("Default locker ID", dbc->lref == NULL ? 0 : dbc->lref->id); + STAT_HEX("Locker", dbc->locker == NULL ? 0 : dbc->locker->id); + STAT_STRING("Type", __db_dbtype_to_string(dbc->dbtype)); + + STAT_POINTER("Off-page duplicate cursor", cp->opd); + STAT_POINTER("Referenced page", cp->page); + STAT_ULONG("Root", cp->root); + STAT_ULONG("Page number", cp->pgno); + STAT_ULONG("Page index", cp->indx); + STAT_STRING("Lock mode", __db_lockmode_to_string(cp->lock_mode)); + __db_prflags(env, NULL, dbc->flags, fn, NULL, "\tFlags"); + + switch (dbc->dbtype) { + case DB_BTREE: + case DB_RECNO: + __bam_print_cursor(dbc); + break; + case DB_HASH: + __ham_print_cursor(dbc); + break; + case DB_HEAP: + __heap_print_cursor(dbc); + break; + case DB_UNKNOWN: + DB_ASSERT(env, dbp->type != DB_UNKNOWN); + /* FALLTHROUGH */ + case DB_QUEUE: + default: + break; + } + return (0); +} + +#else /* !HAVE_STATISTICS */ + +int +__db_stat_pp(dbp, txn, spp, flags) + DB *dbp; + DB_TXN *txn; + void *spp; + u_int32_t flags; +{ + COMPQUIET(spp, NULL); + COMPQUIET(txn, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbp->env)); +} + +int +__db_stat_print_pp(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbp->env)); +} +#endif diff --git a/src/db/db_truncate.c b/src/db/db_truncate.c new file mode 100644 index 00000000..e990152f --- /dev/null +++ b/src/db/db_truncate.c @@ -0,0 +1,233 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/qam.h" +#include "dbinc/lock.h" +#include "dbinc/partition.h" +#include "dbinc/txn.h" + +static int __db_cursor_check_func + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +static int __db_cursor_check __P((DB *)); + +/* + * __db_truncate_pp + * DB->truncate pre/post processing. + * + * PUBLIC: int __db_truncate_pp __P((DB *, DB_TXN *, u_int32_t *, u_int32_t)); + */ +int +__db_truncate_pp(dbp, txn, countp, flags) + DB *dbp; + DB_TXN *txn; + u_int32_t *countp, flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret, txn_local; + + env = dbp->env; + handle_check = txn_local = 0; + + STRIP_AUTO_COMMIT(flags); + + /* Check for invalid flags. */ + if (F_ISSET(dbp, DB_AM_SECONDARY)) { + __db_errx(env, DB_STR("0685", + "DB->truncate forbidden on secondary indices")); + return (EINVAL); + } + if ((ret = __db_fchk(env, "DB->truncate", flags, 0)) != 0) + return (ret); + + ENV_ENTER(env, ip); + XA_CHECK_TXN(ip, txn); + + /* + * Make sure there are no active cursors on this db. Since we drop + * pages we cannot really adjust cursors. + */ + if ((ret = __db_cursor_check(dbp)) != 0) { + __db_errx(env, DB_STR("0686", + "DB->truncate not permitted with active cursors")); + goto err; + } + +#ifdef CONFIG_TEST + if (IS_REP_MASTER(env)) + DB_TEST_WAIT(env, env->test_check); +#endif + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { + handle_check = 0; + goto err; + } + + /* + * Check for changes to a read-only database. This must be after the + * replication block so that we cannot race master/client state changes. + */ + if (DB_IS_READONLY(dbp)) { + ret = __db_rdonly(env, "DB->truncate"); + goto err; + } + + /* + * Create local transaction as necessary, check for consistent + * transaction usage. + */ + if (IS_DB_AUTO_COMMIT(dbp, txn)) { + if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) + goto err; + txn_local = 1; + } + + /* Check for consistent transaction usage. */ + if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0) + goto err; + + ret = __db_truncate(dbp, ip, txn, countp); + +err: if (txn_local && + (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) + ret = t_ret; + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_truncate + * DB->truncate. + * + * PUBLIC: int __db_truncate __P((DB *, DB_THREAD_INFO *, DB_TXN *, + * PUBLIC: u_int32_t *)); + */ +int +__db_truncate(dbp, ip, txn, countp) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + u_int32_t *countp; +{ + DB *sdbp; + DBC *dbc; + ENV *env; + u_int32_t scount; + int ret, t_ret; + + env = dbp->env; + dbc = NULL; + ret = 0; + + /* + * Run through all secondaries and truncate them first. The count + * returned is the count of the primary only. QUEUE uses normal + * processing to truncate so it will update the secondaries normally. + */ + if (dbp->type != DB_QUEUE && DB_IS_PRIMARY(dbp)) { + if ((ret = __db_s_first(dbp, &sdbp)) != 0) + return (ret); + for (; sdbp != NULL && ret == 0; ret = __db_s_next(&sdbp, txn)) + if ((ret = __db_truncate(sdbp, ip, txn, &scount)) != 0) + break; + if (sdbp != NULL) + (void)__db_s_done(sdbp, txn); + if (ret != 0) + return (ret); + } + + DB_TEST_RECOVERY(dbp, DB_TEST_PREDESTROY, ret, NULL); + + /* Acquire a cursor. */ + if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) + return (ret); + + DEBUG_LWRITE(dbc, txn, "DB->truncate", NULL, NULL, 0); +#ifdef HAVE_PARTITION + if (DB_IS_PARTITIONED(dbp)) + ret = __part_truncate(dbc, countp); + else +#endif + switch (dbp->type) { + case DB_BTREE: + case DB_RECNO: + ret = __bam_truncate(dbc, countp); + break; + case DB_HASH: + ret = __ham_truncate(dbc, countp); + break; + case DB_HEAP: + ret = __heap_truncate(dbc, countp); + break; + case DB_QUEUE: + ret = __qam_truncate(dbc, countp); + break; + case DB_UNKNOWN: + default: + ret = __db_unknown_type(env, "DB->truncate", dbp->type); + break; + } + + /* Discard the cursor. */ + if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, NULL); + +DB_TEST_RECOVERY_LABEL + + return (ret); +} + +static int +__db_cursor_check_func(dbc, my_dbc, foundp, pgno, indx, args) + DBC *dbc, *my_dbc; + u_int32_t *foundp; + db_pgno_t pgno; + u_int32_t indx; + void *args; +{ + COMPQUIET(my_dbc, NULL); + COMPQUIET(args, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(indx, 0); + if (IS_INITIALIZED(dbc)) { + *foundp = 1; + return (EEXIST); + } + return (0); +} +/* + * __db_cursor_check -- + * See if there are any active cursors on this db. + */ +static int +__db_cursor_check(dbp) + DB *dbp; +{ + int ret; + u_int32_t found; + + ret = __db_walk_cursors(dbp, NULL, + __db_cursor_check_func, &found, 0, 0, NULL); + return (ret == EEXIST ? EINVAL : ret); +} diff --git a/src/db/db_upg.c b/src/db/db_upg.c new file mode 100644 index 00000000..3693f71d --- /dev/null +++ b/src/db/db_upg.c @@ -0,0 +1,521 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/qam.h" + +/* + * __db_upgrade_pp -- + * DB->upgrade pre/post processing. + * + * PUBLIC: int __db_upgrade_pp __P((DB *, const char *, u_int32_t)); + */ +int +__db_upgrade_pp(dbp, fname, flags) + DB *dbp; + const char *fname; + u_int32_t flags; +{ +#ifdef HAVE_UPGRADE_SUPPORT + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbp->env; + + /* + * !!! + * The actual argument checking is simple, do it inline. + */ + if ((ret = __db_fchk(env, "DB->upgrade", flags, DB_DUPSORT)) != 0) + return (ret); + + ENV_ENTER(env, ip); + ret = __db_upgrade(dbp, fname, flags); + ENV_LEAVE(env, ip); + return (ret); +#else + COMPQUIET(dbp, NULL); + COMPQUIET(fname, NULL); + COMPQUIET(flags, 0); + + __db_errx(dbp->env, DB_STR("0665", "upgrade not supported")); + return (EINVAL); +#endif +} + +#ifdef HAVE_UPGRADE_SUPPORT +static int (* const func_31_list[P_PAGETYPE_MAX]) + __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)) = { + NULL, /* P_INVALID */ + NULL, /* __P_DUPLICATE */ + __ham_31_hash, /* P_HASH_UNSORTED */ + NULL, /* P_IBTREE */ + NULL, /* P_IRECNO */ + __bam_31_lbtree, /* P_LBTREE */ + NULL, /* P_LRECNO */ + NULL, /* P_OVERFLOW */ + __ham_31_hashmeta, /* P_HASHMETA */ + __bam_31_btreemeta, /* P_BTREEMETA */ + NULL, /* P_QAMMETA */ + NULL, /* P_QAMDATA */ + NULL, /* P_LDUP */ + NULL, /* P_HASH */ + NULL, /* P_HEAPMETA */ + NULL, /* P_HEAP */ + NULL, /* P_IHEAP */ +}; + +static int (* const func_46_list[P_PAGETYPE_MAX]) + __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)) = { + NULL, /* P_INVALID */ + NULL, /* __P_DUPLICATE */ + __ham_46_hash, /* P_HASH_UNSORTED */ + NULL, /* P_IBTREE */ + NULL, /* P_IRECNO */ + NULL, /* P_LBTREE */ + NULL, /* P_LRECNO */ + NULL, /* P_OVERFLOW */ + __ham_46_hashmeta, /* P_HASHMETA */ + NULL, /* P_BTREEMETA */ + NULL, /* P_QAMMETA */ + NULL, /* P_QAMDATA */ + NULL, /* P_LDUP */ + NULL, /* P_HASH */ + NULL, /* P_HEAPMETA */ + NULL, /* P_HEAP */ + NULL, /* P_IHEAP */ +}; + +static int __db_page_pass __P((DB *, char *, u_int32_t, int (* const []) + (DB *, char *, u_int32_t, DB_FH *, PAGE *, int *), DB_FH *)); +static int __db_set_lastpgno __P((DB *, char *, DB_FH *)); + +/* + * __db_upgrade -- + * Upgrade an existing database. + * + * PUBLIC: int __db_upgrade __P((DB *, const char *, u_int32_t)); + */ +int +__db_upgrade(dbp, fname, flags) + DB *dbp; + const char *fname; + u_int32_t flags; +{ + DBMETA *meta; + DB_FH *fhp; + ENV *env; + size_t n; + int ret, t_ret, use_mp_open; + u_int8_t mbuf[256], tmpflags; + char *real_name; + + use_mp_open = 0; + env = dbp->env; + fhp = NULL; + + /* Get the real backing file name. */ + if ((ret = __db_appname(env, + DB_APP_DATA, fname, NULL, &real_name)) != 0) + return (ret); + + /* Open the file. */ + if ((ret = __os_open(env, real_name, 0, 0, 0, &fhp)) != 0) { + __db_err(env, ret, "%s", real_name); + return (ret); + } + + /* Initialize the feedback. */ + if (dbp->db_feedback != NULL) + dbp->db_feedback(dbp, DB_UPGRADE, 0); + + /* + * Read the metadata page. We read 256 bytes, which is larger than + * any access method's metadata page and smaller than any disk sector. + */ + if ((ret = __os_read(env, fhp, mbuf, sizeof(mbuf), &n)) != 0) + goto err; + + switch (((DBMETA *)mbuf)->magic) { + case DB_BTREEMAGIC: + switch (((DBMETA *)mbuf)->version) { + case 6: + /* + * Before V7 not all pages had page types, so we do the + * single meta-data page by hand. + */ + if ((ret = + __bam_30_btreemeta(dbp, real_name, mbuf)) != 0) + goto err; + if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) + goto err; + if ((ret = __os_write(env, fhp, mbuf, 256, &n)) != 0) + goto err; + /* FALLTHROUGH */ + case 7: + /* + * We need the page size to do more. Rip it out of + * the meta-data page. + */ + memcpy(&dbp->pgsize, mbuf + 20, sizeof(u_int32_t)); + + if ((ret = __db_page_pass( + dbp, real_name, flags, func_31_list, fhp)) != 0) + goto err; + /* FALLTHROUGH */ + case 8: + if ((ret = + __db_set_lastpgno(dbp, real_name, fhp)) != 0) + goto err; + /* FALLTHROUGH */ + case 9: + break; + default: + __db_errx(env, DB_STR_A("0666", + "%s: unsupported btree version: %lu", "%s %lu"), + real_name, (u_long)((DBMETA *)mbuf)->version); + ret = DB_OLD_VERSION; + goto err; + } + break; + case DB_HASHMAGIC: + switch (((DBMETA *)mbuf)->version) { + case 4: + case 5: + /* + * Before V6 not all pages had page types, so we do the + * single meta-data page by hand. + */ + if ((ret = + __ham_30_hashmeta(dbp, real_name, mbuf)) != 0) + goto err; + if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) + goto err; + if ((ret = __os_write(env, fhp, mbuf, 256, &n)) != 0) + goto err; + + /* + * Before V6, we created hash pages one by one as they + * were needed, using hashhdr.ovfl_point to reserve + * a block of page numbers for them. A consequence + * of this was that, if no overflow pages had been + * created, the current doubling might extend past + * the end of the database file. + * + * In DB 3.X, we now create all the hash pages + * belonging to a doubling atomically; it's not + * safe to just save them for later, because when + * we create an overflow page we'll just create + * a new last page (whatever that may be). Grow + * the database to the end of the current doubling. + */ + if ((ret = + __ham_30_sizefix(dbp, fhp, real_name, mbuf)) != 0) + goto err; + /* FALLTHROUGH */ + case 6: + /* + * We need the page size to do more. Rip it out of + * the meta-data page. + */ + memcpy(&dbp->pgsize, mbuf + 20, sizeof(u_int32_t)); + + if ((ret = __db_page_pass( + dbp, real_name, flags, func_31_list, fhp)) != 0) + goto err; + /* FALLTHROUGH */ + case 7: + if ((ret = + __db_set_lastpgno(dbp, real_name, fhp)) != 0) + goto err; + /* FALLTHROUGH */ + case 8: + /* + * Any upgrade that has proceeded this far has metadata + * pages compatible with hash version 8 metadata pages, + * so casting mbuf to a dbmeta is safe. + * If a newer revision moves the pagesize, checksum or + * encrypt_alg flags in the metadata, then the + * extraction of the fields will need to use hard coded + * offsets. + */ + meta = (DBMETA*)mbuf; + /* + * We need the page size to do more. Extract it from + * the meta-data page. + */ + memcpy(&dbp->pgsize, &meta->pagesize, + sizeof(u_int32_t)); + /* + * Rip out metadata and encrypt_alg fields from the + * metadata page. So the upgrade can know how big + * the page metadata pre-amble is. Any upgrade that has + * proceeded this far has metadata pages compatible + * with hash version 8 metadata pages, so extracting + * the fields is safe. + */ + memcpy(&tmpflags, &meta->metaflags, sizeof(u_int8_t)); + if (FLD_ISSET(tmpflags, DBMETA_CHKSUM)) + F_SET(dbp, DB_AM_CHKSUM); + memcpy(&tmpflags, &meta->encrypt_alg, sizeof(u_int8_t)); + if (tmpflags != 0) { + if (!CRYPTO_ON(dbp->env)) { + __db_errx(env, DB_STR("0667", +"Attempt to upgrade an encrypted database without providing a password.")); + ret = EINVAL; + goto err; + } + F_SET(dbp, DB_AM_ENCRYPT); + } + + /* + * This is ugly. It is necessary to have a usable + * mpool in the dbp to upgrade from an unsorted + * to a sorted hash database. The mpool file is used + * to resolve offpage key items, which are needed to + * determine sort order. Having mpool open and access + * the file does not affect the page pass, since the + * page pass only updates DB_HASH_UNSORTED pages + * in-place, and the mpool file is only used to read + * OFFPAGE items. + */ + use_mp_open = 1; + if ((ret = __os_closehandle(env, fhp)) != 0) + return (ret); + dbp->type = DB_HASH; + if ((ret = __env_mpool(dbp, fname, + DB_AM_NOT_DURABLE | DB_AM_VERIFYING)) != 0) + return (ret); + fhp = dbp->mpf->fhp; + + /* Do the actual conversion pass. */ + if ((ret = __db_page_pass( + dbp, real_name, flags, func_46_list, fhp)) != 0) + goto err; + + /* FALLTHROUGH */ + case 9: + break; + default: + __db_errx(env, DB_STR_A("0668", + "%s: unsupported hash version: %lu", "%s %lu"), + real_name, (u_long)((DBMETA *)mbuf)->version); + ret = DB_OLD_VERSION; + goto err; + } + break; + case DB_QAMMAGIC: + switch (((DBMETA *)mbuf)->version) { + case 1: + /* + * If we're in a Queue database, the only page that + * needs upgrading is the meta-database page, don't + * bother with a full pass. + */ + if ((ret = __qam_31_qammeta(dbp, real_name, mbuf)) != 0) + return (ret); + /* FALLTHROUGH */ + case 2: + if ((ret = __qam_32_qammeta(dbp, real_name, mbuf)) != 0) + return (ret); + if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) + goto err; + if ((ret = __os_write(env, fhp, mbuf, 256, &n)) != 0) + goto err; + /* FALLTHROUGH */ + case 3: + case 4: + break; + default: + __db_errx(env, DB_STR_A("0669", + "%s: unsupported queue version: %lu", + "%s %lu"), real_name, + (u_long)((DBMETA *)mbuf)->version); + ret = DB_OLD_VERSION; + goto err; + } + break; + default: + M_32_SWAP(((DBMETA *)mbuf)->magic); + switch (((DBMETA *)mbuf)->magic) { + case DB_BTREEMAGIC: + case DB_HASHMAGIC: + case DB_QAMMAGIC: + __db_errx(env, DB_STR_A("0670", + "%s: DB->upgrade only supported on native byte-order systems", + "%s"), real_name); + break; + default: + __db_errx(env, DB_STR_A("0671", + "%s: unrecognized file type", "%s"), real_name); + break; + } + ret = EINVAL; + goto err; + } + + ret = __os_fsync(env, fhp); + + /* + * If mp_open was used, then rely on the database close to clean up + * any file handles. + */ +err: if (use_mp_open == 0 && fhp != NULL && + (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0) + ret = t_ret; + __os_free(env, real_name); + + /* We're done. */ + if (dbp->db_feedback != NULL) + dbp->db_feedback(dbp, DB_UPGRADE, 100); + + return (ret); +} + +/* + * __db_page_pass -- + * Walk the pages of the database, upgrading whatever needs it. + */ +static int +__db_page_pass(dbp, real_name, flags, fl, fhp) + DB *dbp; + char *real_name; + u_int32_t flags; + int (* const fl[P_PAGETYPE_MAX]) + __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); + DB_FH *fhp; +{ + ENV *env; + PAGE *page; + db_pgno_t i, pgno_last; + size_t n; + int dirty, ret; + + env = dbp->env; + + /* Determine the last page of the file. */ + if ((ret = __db_lastpgno(dbp, real_name, fhp, &pgno_last)) != 0) + return (ret); + + /* Allocate memory for a single page. */ + if ((ret = __os_malloc(env, dbp->pgsize, &page)) != 0) + return (ret); + + /* Walk the file, calling the underlying conversion functions. */ + for (i = 0; i < pgno_last; ++i) { + if (dbp->db_feedback != NULL) + dbp->db_feedback( + dbp, DB_UPGRADE, (int)((i * 100)/pgno_last)); + if ((ret = __os_seek(env, fhp, i, dbp->pgsize, 0)) != 0) + break; + if ((ret = __os_read(env, fhp, page, dbp->pgsize, &n)) != 0) + break; + dirty = 0; + /* Always decrypt the page. */ + if ((ret = __db_decrypt_pg(env, dbp, page)) != 0) + break; + if (fl[TYPE(page)] != NULL && (ret = fl[TYPE(page)] + (dbp, real_name, flags, fhp, page, &dirty)) != 0) + break; + if (dirty) { + if ((ret = __db_encrypt_and_checksum_pg( + env, dbp, page)) != 0) + break; + if ((ret = + __os_seek(env, fhp, i, dbp->pgsize, 0)) != 0) + break; + if ((ret = __os_write(env, + fhp, page, dbp->pgsize, &n)) != 0) + break; + } + } + + __os_free(dbp->env, page); + return (ret); +} + +/* + * __db_lastpgno -- + * Return the current last page number of the file. + * + * PUBLIC: int __db_lastpgno __P((DB *, char *, DB_FH *, db_pgno_t *)); + */ +int +__db_lastpgno(dbp, real_name, fhp, pgno_lastp) + DB *dbp; + char *real_name; + DB_FH *fhp; + db_pgno_t *pgno_lastp; +{ + ENV *env; + db_pgno_t pgno_last; + u_int32_t mbytes, bytes; + int ret; + + env = dbp->env; + + if ((ret = __os_ioinfo(env, + real_name, fhp, &mbytes, &bytes, NULL)) != 0) { + __db_err(env, ret, "%s", real_name); + return (ret); + } + + /* Page sizes have to be a power-of-two. */ + if (bytes % dbp->pgsize != 0) { + __db_errx(env, DB_STR_A("0672", + "%s: file size not a multiple of the pagesize", "%s"), + real_name); + return (EINVAL); + } + pgno_last = mbytes * (MEGABYTE / dbp->pgsize); + pgno_last += bytes / dbp->pgsize; + + *pgno_lastp = pgno_last; + return (0); +} + +/* + * __db_set_lastpgno -- + * Update the meta->last_pgno field. + * + * Code assumes that we do not have checksums/crypto on the page. + */ +static int +__db_set_lastpgno(dbp, real_name, fhp) + DB *dbp; + char *real_name; + DB_FH *fhp; +{ + DBMETA meta; + ENV *env; + int ret; + size_t n; + + env = dbp->env; + if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) + return (ret); + if ((ret = __os_read(env, fhp, &meta, sizeof(meta), &n)) != 0) + return (ret); + dbp->pgsize = meta.pagesize; + if ((ret = __db_lastpgno(dbp, real_name, fhp, &meta.last_pgno)) != 0) + return (ret); + if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) + return (ret); + if ((ret = __os_write(env, fhp, &meta, sizeof(meta), &n)) != 0) + return (ret); + + return (0); +} +#endif /* HAVE_UPGRADE_SUPPORT */ diff --git a/src/db/db_upg_opd.c b/src/db/db_upg_opd.c new file mode 100644 index 00000000..7e1130b9 --- /dev/null +++ b/src/db/db_upg_opd.c @@ -0,0 +1,343 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" + +static int __db_build_bi __P((DB *, DB_FH *, PAGE *, PAGE *, u_int32_t, int *)); +static int __db_build_ri __P((DB *, DB_FH *, PAGE *, PAGE *, u_int32_t, int *)); +static int __db_up_ovref __P((DB *, DB_FH *, db_pgno_t)); + +#define GET_PAGE(dbp, fhp, pgno, page) { \ + if ((ret = __os_seek( \ + dbp->env, fhp, pgno, (dbp)->pgsize, 0)) != 0) \ + goto err; \ + if ((ret = __os_read(dbp->env, \ + fhp, page, (dbp)->pgsize, &n)) != 0) \ + goto err; \ +} +#define PUT_PAGE(dbp, fhp, pgno, page) { \ + if ((ret = __os_seek( \ + dbp->env, fhp, pgno, (dbp)->pgsize, 0)) != 0) \ + goto err; \ + if ((ret = __os_write(dbp->env, \ + fhp, page, (dbp)->pgsize, &n)) != 0) \ + goto err; \ +} + +/* + * __db_31_offdup -- + * Convert 3.0 off-page duplicates to 3.1 off-page duplicates. + * + * PUBLIC: int __db_31_offdup __P((DB *, char *, DB_FH *, int, db_pgno_t *)); + */ +int +__db_31_offdup(dbp, real_name, fhp, sorted, pgnop) + DB *dbp; + char *real_name; + DB_FH *fhp; + int sorted; + db_pgno_t *pgnop; +{ + PAGE *ipage, *page; + db_indx_t indx; + db_pgno_t cur_cnt, i, next_cnt, pgno, *pgno_cur, pgno_last; + db_pgno_t *pgno_next, pgno_max, *tmp; + db_recno_t nrecs; + size_t n; + int level, nomem, ret; + + ipage = page = NULL; + pgno_cur = pgno_next = NULL; + + /* Allocate room to hold a page. */ + if ((ret = __os_malloc(dbp->env, dbp->pgsize, &page)) != 0) + goto err; + + /* + * Walk the chain of 3.0 off-page duplicates. Each one is converted + * in place to a 3.1 off-page duplicate page. If the duplicates are + * sorted, they are converted to a Btree leaf page, otherwise to a + * Recno leaf page. + */ + for (nrecs = 0, cur_cnt = pgno_max = 0, + pgno = *pgnop; pgno != PGNO_INVALID;) { + if (pgno_max == cur_cnt) { + pgno_max += 20; + if ((ret = __os_realloc(dbp->env, pgno_max * + sizeof(db_pgno_t), &pgno_cur)) != 0) + goto err; + } + pgno_cur[cur_cnt++] = pgno; + + GET_PAGE(dbp, fhp, pgno, page); + nrecs += NUM_ENT(page); + LEVEL(page) = LEAFLEVEL; + TYPE(page) = sorted ? P_LDUP : P_LRECNO; + /* + * !!! + * DB didn't zero the LSNs on off-page duplicates pages. + */ + ZERO_LSN(LSN(page)); + PUT_PAGE(dbp, fhp, pgno, page); + + pgno = NEXT_PGNO(page); + } + + /* If we only have a single page, it's easy. */ + if (cur_cnt <= 1) + goto done; + + /* + * pgno_cur is the list of pages we just converted. We're + * going to walk that list, but we'll need to create a new + * list while we do so. + */ + if ((ret = __os_malloc(dbp->env, + cur_cnt * sizeof(db_pgno_t), &pgno_next)) != 0) + goto err; + + /* Figure out where we can start allocating new pages. */ + if ((ret = __db_lastpgno(dbp, real_name, fhp, &pgno_last)) != 0) + goto err; + + /* Allocate room for an internal page. */ + if ((ret = __os_malloc(dbp->env, dbp->pgsize, &ipage)) != 0) + goto err; + PGNO(ipage) = PGNO_INVALID; + + /* + * Repeatedly walk the list of pages, building internal pages, until + * there's only one page at a level. + */ + for (level = LEAFLEVEL + 1; cur_cnt > 1; ++level) { + for (indx = 0, i = next_cnt = 0; i < cur_cnt;) { + if (indx == 0) { + P_INIT(ipage, dbp->pgsize, pgno_last, + PGNO_INVALID, PGNO_INVALID, + level, sorted ? P_IBTREE : P_IRECNO); + ZERO_LSN(LSN(ipage)); + + pgno_next[next_cnt++] = pgno_last++; + } + + GET_PAGE(dbp, fhp, pgno_cur[i], page); + + /* + * If the duplicates are sorted, put the first item on + * the lower-level page onto a Btree internal page. If + * the duplicates are not sorted, create an internal + * Recno structure on the page. If either case doesn't + * fit, push out the current page and start a new one. + */ + nomem = 0; + if (sorted) { + if ((ret = __db_build_bi( + dbp, fhp, ipage, page, indx, &nomem)) != 0) + goto err; + } else + if ((ret = __db_build_ri( + dbp, fhp, ipage, page, indx, &nomem)) != 0) + goto err; + if (nomem) { + indx = 0; + PUT_PAGE(dbp, fhp, PGNO(ipage), ipage); + } else { + ++indx; + ++NUM_ENT(ipage); + ++i; + } + } + + /* + * Push out the last internal page. Set the top-level record + * count if we've reached the top. + */ + if (next_cnt == 1) + RE_NREC_SET(ipage, nrecs); + PUT_PAGE(dbp, fhp, PGNO(ipage), ipage); + + /* Swap the current and next page number arrays. */ + cur_cnt = next_cnt; + tmp = pgno_cur; + pgno_cur = pgno_next; + pgno_next = tmp; + } + +done: *pgnop = pgno_cur[0]; + +err: if (pgno_cur != NULL) + __os_free(dbp->env, pgno_cur); + if (pgno_next != NULL) + __os_free(dbp->env, pgno_next); + if (ipage != NULL) + __os_free(dbp->env, ipage); + if (page != NULL) + __os_free(dbp->env, page); + + return (ret); +} + +/* + * __db_build_bi -- + * Build a BINTERNAL entry for a parent page. + */ +static int +__db_build_bi(dbp, fhp, ipage, page, indx, nomemp) + DB *dbp; + DB_FH *fhp; + PAGE *ipage, *page; + u_int32_t indx; + int *nomemp; +{ + BINTERNAL bi, *child_bi; + BKEYDATA *child_bk; + u_int8_t *p; + int ret; + db_indx_t *inp; + + inp = P_INP(dbp, ipage); + switch (TYPE(page)) { + case P_IBTREE: + child_bi = GET_BINTERNAL(dbp, page, 0); + if (P_FREESPACE(dbp, ipage) < BINTERNAL_PSIZE(child_bi->len)) { + *nomemp = 1; + return (0); + } + inp[indx] = + HOFFSET(ipage) -= BINTERNAL_SIZE(child_bi->len); + p = P_ENTRY(dbp, ipage, indx); + + bi.len = child_bi->len; + B_TSET(bi.type, child_bi->type); + bi.pgno = PGNO(page); + bi.nrecs = __bam_total(dbp, page); + memcpy(p, &bi, SSZA(BINTERNAL, data)); + p += SSZA(BINTERNAL, data); + memcpy(p, child_bi->data, child_bi->len); + + /* Increment the overflow ref count. */ + if (B_TYPE(child_bi->type) == B_OVERFLOW) + if ((ret = __db_up_ovref(dbp, fhp, + ((BOVERFLOW *)(child_bi->data))->pgno)) != 0) + return (ret); + break; + case P_LDUP: + child_bk = GET_BKEYDATA(dbp, page, 0); + switch (B_TYPE(child_bk->type)) { + case B_KEYDATA: + if (P_FREESPACE(dbp, ipage) < + BINTERNAL_PSIZE(child_bk->len)) { + *nomemp = 1; + return (0); + } + inp[indx] = + HOFFSET(ipage) -= BINTERNAL_SIZE(child_bk->len); + p = P_ENTRY(dbp, ipage, indx); + + bi.len = child_bk->len; + B_TSET(bi.type, child_bk->type); + bi.pgno = PGNO(page); + bi.nrecs = __bam_total(dbp, page); + memcpy(p, &bi, SSZA(BINTERNAL, data)); + p += SSZA(BINTERNAL, data); + memcpy(p, child_bk->data, child_bk->len); + break; + case B_OVERFLOW: + if (P_FREESPACE(dbp, ipage) < + BINTERNAL_PSIZE(BOVERFLOW_SIZE)) { + *nomemp = 1; + return (0); + } + inp[indx] = + HOFFSET(ipage) -= BINTERNAL_SIZE(BOVERFLOW_SIZE); + p = P_ENTRY(dbp, ipage, indx); + + bi.len = BOVERFLOW_SIZE; + B_TSET(bi.type, child_bk->type); + bi.pgno = PGNO(page); + bi.nrecs = __bam_total(dbp, page); + memcpy(p, &bi, SSZA(BINTERNAL, data)); + p += SSZA(BINTERNAL, data); + memcpy(p, child_bk, BOVERFLOW_SIZE); + + /* Increment the overflow ref count. */ + if ((ret = __db_up_ovref(dbp, fhp, + ((BOVERFLOW *)child_bk)->pgno)) != 0) + return (ret); + break; + default: + return (__db_pgfmt(dbp->env, PGNO(page))); + } + break; + default: + return (__db_pgfmt(dbp->env, PGNO(page))); + } + + return (0); +} + +/* + * __db_build_ri -- + * Build a RINTERNAL entry for an internal parent page. + */ +static int +__db_build_ri(dbp, fhp, ipage, page, indx, nomemp) + DB *dbp; + DB_FH *fhp; + PAGE *ipage, *page; + u_int32_t indx; + int *nomemp; +{ + RINTERNAL ri; + db_indx_t *inp; + + COMPQUIET(fhp, NULL); + inp = P_INP(dbp, ipage); + if (P_FREESPACE(dbp, ipage) < RINTERNAL_PSIZE) { + *nomemp = 1; + return (0); + } + + ri.pgno = PGNO(page); + ri.nrecs = __bam_total(dbp, page); + inp[indx] = HOFFSET(ipage) -= RINTERNAL_SIZE; + memcpy(P_ENTRY(dbp, ipage, indx), &ri, RINTERNAL_SIZE); + + return (0); +} + +/* + * __db_up_ovref -- + * Increment/decrement the reference count on an overflow page. + */ +static int +__db_up_ovref(dbp, fhp, pgno) + DB *dbp; + DB_FH *fhp; + db_pgno_t pgno; +{ + PAGE *page; + size_t n; + int ret; + + /* Allocate room to hold a page. */ + if ((ret = __os_malloc(dbp->env, dbp->pgsize, &page)) != 0) + return (ret); + + GET_PAGE(dbp, fhp, pgno, page); + ++OV_REF(page); + PUT_PAGE(dbp, fhp, pgno, page); + +err: __os_free(dbp->env, page); + + return (ret); +} diff --git a/src/db/db_vrfy.c b/src/db/db_vrfy.c new file mode 100644 index 00000000..e3e0c6a7 --- /dev/null +++ b/src/db/db_vrfy.c @@ -0,0 +1,2970 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/db_verify.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +/* + * This is the code for DB->verify, the DB database consistency checker. + * For now, it checks all subdatabases in a database, and verifies + * everything it knows how to (i.e. it's all-or-nothing, and one can't + * check only for a subset of possible problems). + */ + +static u_int __db_guesspgsize __P((ENV *, DB_FH *)); +static int __db_is_valid_magicno __P((u_int32_t, DBTYPE *)); +static int __db_meta2pgset + __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, DB *)); +static int __db_salvage __P((DB *, VRFY_DBINFO *, + db_pgno_t, void *, int (*)(void *, const void *), u_int32_t)); +static int __db_salvage_subdbpg __P((DB *, VRFY_DBINFO *, + PAGE *, void *, int (*)(void *, const void *), u_int32_t)); +static int __db_salvage_all __P((DB *, VRFY_DBINFO *, void *, + int(*)(void *, const void *), u_int32_t, int *)); +static int __db_salvage_unknowns __P((DB *, VRFY_DBINFO *, void *, + int (*)(void *, const void *), u_int32_t)); +static int __db_verify_arg __P((DB *, const char *, void *, u_int32_t)); +static int __db_vrfy_freelist + __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t)); +static int __db_vrfy_invalid + __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); +static int __db_vrfy_orderchkonly __P((DB *, + VRFY_DBINFO *, const char *, const char *, u_int32_t)); +static int __db_vrfy_pagezero __P((DB *, VRFY_DBINFO *, DB_FH *, u_int32_t)); +static int __db_vrfy_subdbs + __P((DB *, VRFY_DBINFO *, const char *, u_int32_t)); +static int __db_vrfy_structure __P((DB *, VRFY_DBINFO *, + const char *, db_pgno_t, void *, void *, u_int32_t)); +static int __db_vrfy_walkpages __P((DB *, VRFY_DBINFO *, + void *, int (*)(void *, const void *), u_int32_t)); + +#define VERIFY_FLAGS \ + (DB_AGGRESSIVE | \ + DB_NOORDERCHK | DB_ORDERCHKONLY | DB_PRINTABLE | DB_SALVAGE | DB_UNREF) + +/* + * __db_verify_pp -- + * DB->verify public interface. + * + * PUBLIC: int __db_verify_pp + * PUBLIC: __P((DB *, const char *, const char *, FILE *, u_int32_t)); + */ +int +__db_verify_pp(dbp, file, database, outfile, flags) + DB *dbp; + const char *file, *database; + FILE *outfile; + u_int32_t flags; +{ + /* + * __db_verify_pp is a wrapper to __db_verify_internal, which lets + * us pass appropriate equivalents to FILE * in from the non-C APIs. + * That's why the usual ENV_ENTER macros are in __db_verify_internal, + * not here. + */ + return (__db_verify_internal(dbp, + file, database, outfile, __db_pr_callback, flags)); +} + +/* + * __db_verify_internal -- + * + * PUBLIC: int __db_verify_internal __P((DB *, const char *, + * PUBLIC: const char *, void *, int (*)(void *, const void *), u_int32_t)); + */ +int +__db_verify_internal(dbp, fname, dname, handle, callback, flags) + DB *dbp; + const char *fname, *dname; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret, t_ret; + + env = dbp->env; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->verify"); + + if (!LF_ISSET(DB_SALVAGE)) + LF_SET(DB_UNREF); + + ENV_ENTER(env, ip); + + if ((ret = __db_verify_arg(dbp, dname, handle, flags)) == 0) + ret = __db_verify(dbp, ip, + fname, dname, handle, callback, NULL, NULL, flags); + + /* Db.verify is a DB handle destructor. */ + if ((t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __db_verify_arg -- + * Check DB->verify arguments. + */ +static int +__db_verify_arg(dbp, dname, handle, flags) + DB *dbp; + const char *dname; + void *handle; + u_int32_t flags; +{ + ENV *env; + int ret; + + env = dbp->env; + + if ((ret = __db_fchk(env, "DB->verify", flags, VERIFY_FLAGS)) != 0) + return (ret); + + /* + * DB_SALVAGE is mutually exclusive with the other flags except + * DB_AGGRESSIVE, DB_PRINTABLE. + * + * DB_AGGRESSIVE and DB_PRINTABLE are only meaningful when salvaging. + * + * DB_SALVAGE requires an output stream. + */ + if (LF_ISSET(DB_SALVAGE)) { + if (LF_ISSET(~(DB_AGGRESSIVE | DB_PRINTABLE | DB_SALVAGE))) + return (__db_ferr(env, "DB->verify", 1)); + if (handle == NULL) { + __db_errx(env, DB_STR("0518", + "DB_SALVAGE requires a an output handle")); + return (EINVAL); + } + } else + if (LF_ISSET(DB_AGGRESSIVE | DB_PRINTABLE)) + return (__db_ferr(env, "DB->verify", 1)); + + /* + * DB_ORDERCHKONLY is mutually exclusive with DB_SALVAGE and + * DB_NOORDERCHK, and requires a database name. + */ + if ((ret = __db_fcchk(env, "DB->verify", flags, + DB_ORDERCHKONLY, DB_SALVAGE | DB_NOORDERCHK)) != 0) + return (ret); + if (LF_ISSET(DB_ORDERCHKONLY) && dname == NULL) { + __db_errx(env, DB_STR("0519", + "DB_ORDERCHKONLY requires a database name")); + return (EINVAL); + } + return (0); +} + +/* + * __db_verify -- + * Walk the entire file page-by-page, either verifying with or without + * dumping in db_dump -d format, or DB_SALVAGE-ing whatever key/data + * pairs can be found and dumping them in standard (db_load-ready) + * dump format. + * + * (Salvaging isn't really a verification operation, but we put it + * here anyway because it requires essentially identical top-level + * code.) + * + * flags may be 0, DB_NOORDERCHK, DB_ORDERCHKONLY, or DB_SALVAGE + * (and optionally DB_AGGRESSIVE). + * PUBLIC: int __db_verify __P((DB *, DB_THREAD_INFO *, const char *, + * PUBLIC: const char *, void *, int (*)(void *, const void *), + * PUBLIC: void *, void *, u_int32_t)); + */ +int +__db_verify(dbp, ip, name, subdb, handle, callback, lp, rp, flags) + DB *dbp; + DB_THREAD_INFO *ip; + const char *name, *subdb; + void *handle; + int (*callback) __P((void *, const void *)); + void *lp, *rp; + u_int32_t flags; +{ + DB_FH *fhp; + ENV *env; + VRFY_DBINFO *vdp; + u_int32_t sflags; + int has_subdbs, isbad, ret, t_ret; + char *real_name; + + env = dbp->env; + fhp = NULL; + vdp = NULL; + real_name = NULL; + has_subdbs = isbad = ret = t_ret = 0; + + F_SET(dbp, DB_AM_VERIFYING); + + /* Initialize any feedback function. */ + if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL) + dbp->db_feedback(dbp, DB_VERIFY, 0); + + /* + * We don't know how large the cache is, and if the database + * in question uses a small page size--which we don't know + * yet!--it may be uncomfortably small for the default page + * size [#2143]. However, the things we need temporary + * databases for in dbinfo are largely tiny, so using a + * 1024-byte pagesize is probably not going to be a big hit, + * and will make us fit better into small spaces. + */ + if ((ret = __db_vrfy_dbinfo_create(env, ip, 1024, &vdp)) != 0) + goto err; + + /* + * Note whether the user has requested that we use printable + * chars where possible. We won't get here with this flag if + * we're not salvaging. + */ + if (LF_ISSET(DB_PRINTABLE)) + F_SET(vdp, SALVAGE_PRINTABLE); + + /* Find the real name of the file. */ + if ((ret = __db_appname(env, + DB_APP_DATA, name, &dbp->dirname, &real_name)) != 0) + goto err; + + /* + * Our first order of business is to verify page 0, which is + * the metadata page for the master database of subdatabases + * or of the only database in the file. We want to do this by hand + * rather than just calling __db_open in case it's corrupt--various + * things in __db_open might act funny. + * + * Once we know the metadata page is healthy, I believe that it's + * safe to open the database normally and then use the page swapping + * code, which makes life easier. + */ + if ((ret = __os_open(env, real_name, 0, DB_OSO_RDONLY, 0, &fhp)) != 0) + goto err; + + /* Verify the metadata page 0; set pagesize and type. */ + if ((ret = __db_vrfy_pagezero(dbp, vdp, fhp, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + + /* + * We can assume at this point that dbp->pagesize and dbp->type are + * set correctly, or at least as well as they can be, and that + * locking, logging, and txns are not in use. Thus we can trust + * the memp code not to look at the page, and thus to be safe + * enough to use. + * + * The dbp is not open, but the file is open in the fhp, and we + * cannot assume that __db_open is safe. Call __env_setup, + * the [safe] part of __db_open that initializes the environment-- + * and the mpool--manually. + */ + if ((ret = __env_setup(dbp, NULL, + name, subdb, TXN_INVALID, DB_ODDFILESIZE | DB_RDONLY)) != 0) + goto err; + + /* + * Set our name in the Queue subsystem; we may need it later + * to deal with extents. + */ + if (dbp->type == DB_QUEUE && + (ret = __qam_set_ext_data(dbp, name)) != 0) + goto err; + + /* Mark the dbp as opened, so that we correctly handle its close. */ + F_SET(dbp, DB_AM_OPEN_CALLED); + + /* Find out the page number of the last page in the database. */ + if ((ret = __memp_get_last_pgno(dbp->mpf, &vdp->last_pgno)) != 0) + goto err; + + /* + * DB_ORDERCHKONLY is a special case; our file consists of + * several subdatabases, which use different hash, bt_compare, + * and/or dup_compare functions. Consequently, we couldn't verify + * sorting and hashing simply by calling DB->verify() on the file. + * DB_ORDERCHKONLY allows us to come back and check those things; it + * requires a subdatabase, and assumes that everything but that + * database's sorting/hashing is correct. + */ + if (LF_ISSET(DB_ORDERCHKONLY)) { + ret = __db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags); + goto done; + } + + sflags = flags; + if (dbp->p_internal != NULL) + LF_CLR(DB_SALVAGE); + + /* + * When salvaging, we use a db to keep track of whether we've seen a + * given overflow or dup page in the course of traversing normal data. + * If in the end we have not, we assume its key got lost and print it + * with key "UNKNOWN". + */ + if (LF_ISSET(DB_SALVAGE)) { + if ((ret = __db_salvage_init(vdp)) != 0) + goto err; + + /* + * If we're not being aggressive, salvage by walking the tree + * and only printing the leaves we find. "has_subdbs" will + * indicate whether we found subdatabases. + */ + if (!LF_ISSET(DB_AGGRESSIVE) && __db_salvage_all( + dbp, vdp, handle, callback, flags, &has_subdbs) != 0) + isbad = 1; + + /* + * If we have subdatabases, flag if any keys are found that + * don't belong to a subdatabase -- they'll need to have an + * "__OTHER__" subdatabase header printed first. + */ + if (has_subdbs) { + F_SET(vdp, SALVAGE_PRINTHEADER); + F_SET(vdp, SALVAGE_HASSUBDBS); + } + } + + /* Walk all the pages, if a page cannot be read, verify structure. */ + if ((ret = + __db_vrfy_walkpages(dbp, vdp, handle, callback, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else if (ret != DB_PAGE_NOTFOUND) + goto err; + } + + /* If we're verifying, verify inter-page structure. */ + if (!LF_ISSET(DB_SALVAGE) && isbad == 0) + if ((t_ret = __db_vrfy_structure(dbp, + vdp, name, 0, lp, rp, flags)) != 0) { + if (t_ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + + /* + * If we're salvaging, output with key UNKNOWN any overflow or dup pages + * we haven't been able to put in context. Then destroy the salvager's + * state-saving database. + */ + if (LF_ISSET(DB_SALVAGE)) { + if ((ret = __db_salvage_unknowns(dbp, + vdp, handle, callback, flags)) != 0) + isbad = 1; + } + + flags = sflags; + +#ifdef HAVE_PARTITION + if (t_ret == 0 && dbp->p_internal != NULL) + t_ret = __part_verify(dbp, vdp, name, handle, callback, flags); +#endif + + if (ret == 0) + ret = t_ret; + + /* Don't display a footer for a database holding other databases. */ + if (LF_ISSET(DB_SALVAGE | DB_VERIFY_PARTITION) == DB_SALVAGE && + (!has_subdbs || F_ISSET(vdp, SALVAGE_PRINTFOOTER))) + (void)__db_prfooter(handle, callback); + +done: err: + /* Send feedback that we're done. */ + if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL) + dbp->db_feedback(dbp, DB_VERIFY, 100); + + if (LF_ISSET(DB_SALVAGE) && + (t_ret = __db_salvage_destroy(vdp)) != 0 && ret == 0) + ret = t_ret; + if (fhp != NULL && + (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0) + ret = t_ret; + if (vdp != NULL && + (t_ret = __db_vrfy_dbinfo_destroy(env, vdp)) != 0 && ret == 0) + ret = t_ret; + if (real_name != NULL) + __os_free(env, real_name); + + /* + * DB_VERIFY_FATAL is a private error, translate to a public one. + * + * If we didn't find a page, it's probably a page number was corrupted. + * Return the standard corruption error. + * + * Otherwise, if we found corruption along the way, set the return. + */ + if (ret == DB_VERIFY_FATAL || + ret == DB_PAGE_NOTFOUND || (ret == 0 && isbad == 1)) + ret = DB_VERIFY_BAD; + + /* Make sure there's a public complaint if we found corruption. */ + if (ret != 0) + __db_err(env, ret, "%s", name); + + return (ret); +} + +/* + * __db_vrfy_pagezero -- + * Verify the master metadata page. Use seek, read, and a local buffer + * rather than the DB paging code, for safety. + * + * Must correctly (or best-guess) set dbp->type and dbp->pagesize. + */ +static int +__db_vrfy_pagezero(dbp, vdp, fhp, flags) + DB *dbp; + VRFY_DBINFO *vdp; + DB_FH *fhp; + u_int32_t flags; +{ + DBMETA *meta; + ENV *env; + VRFY_PAGEINFO *pip; + db_pgno_t freelist; + size_t nr; + int isbad, ret, swapped; + u_int8_t mbuf[DBMETASIZE]; + + isbad = ret = swapped = 0; + freelist = 0; + env = dbp->env; + meta = (DBMETA *)mbuf; + dbp->type = DB_UNKNOWN; + + if ((ret = __db_vrfy_getpageinfo(vdp, PGNO_BASE_MD, &pip)) != 0) + return (ret); + + /* + * Seek to the metadata page. + * Note that if we're just starting a verification, dbp->pgsize + * may be zero; this is okay, as we want page zero anyway and + * 0*0 == 0. + */ + if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0 || + (ret = __os_read(env, fhp, mbuf, DBMETASIZE, &nr)) != 0) { + __db_err(env, ret, DB_STR_A("0520", + "Metadata page %lu cannot be read", "%lu"), + (u_long)PGNO_BASE_MD); + return (ret); + } + + if (nr != DBMETASIZE) { + EPRINT((env, DB_STR_A("0521", + "Page %lu: Incomplete metadata page", "%lu"), + (u_long)PGNO_BASE_MD)); + return (DB_VERIFY_FATAL); + } + + if ((ret = __db_chk_meta(env, dbp, meta, 1)) != 0) { + EPRINT((env, DB_STR_A("0522", + "Page %lu: metadata page corrupted", "%lu"), + (u_long)PGNO_BASE_MD)); + isbad = 1; + if (ret != -1) { + EPRINT((env, DB_STR_A("0523", + "Page %lu: could not check metadata page", "%lu"), + (u_long)PGNO_BASE_MD)); + return (DB_VERIFY_FATAL); + } + } + + /* + * Check all of the fields that we can. + * + * 08-11: Current page number. Must == pgno. + * Note that endianness doesn't matter--it's zero. + */ + if (meta->pgno != PGNO_BASE_MD) { + isbad = 1; + EPRINT((env, DB_STR_A("0524", + "Page %lu: pgno incorrectly set to %lu", "%lu %lu"), + (u_long)PGNO_BASE_MD, (u_long)meta->pgno)); + } + + /* 12-15: Magic number. Must be one of valid set. */ + if (__db_is_valid_magicno(meta->magic, &dbp->type)) + swapped = 0; + else { + M_32_SWAP(meta->magic); + if (__db_is_valid_magicno(meta->magic, + &dbp->type)) + swapped = 1; + else { + isbad = 1; + EPRINT((env, DB_STR_A("0525", + "Page %lu: bad magic number %lu", "%lu %lu"), + (u_long)PGNO_BASE_MD, (u_long)meta->magic)); + } + } + + /* + * 16-19: Version. Must be current; for now, we + * don't support verification of old versions. + */ + if (swapped) + M_32_SWAP(meta->version); + if ((dbp->type == DB_BTREE && + (meta->version > DB_BTREEVERSION || + meta->version < DB_BTREEOLDVER)) || + (dbp->type == DB_HASH && + (meta->version > DB_HASHVERSION || + meta->version < DB_HASHOLDVER)) || + (dbp->type == DB_HEAP && + (meta->version > DB_HEAPVERSION || + meta->version < DB_HEAPOLDVER)) || + (dbp->type == DB_QUEUE && + (meta->version > DB_QAMVERSION || + meta->version < DB_QAMOLDVER))) { + isbad = 1; + EPRINT((env, DB_STR_A("0526", + "Page %lu: unsupported DB version %lu; extraneous errors may result", + "%lu %lu"), (u_long)PGNO_BASE_MD, (u_long)meta->version)); + } + + /* + * 20-23: Pagesize. Must be power of two, + * greater than 512, and less than 64K. + */ + if (swapped) + M_32_SWAP(meta->pagesize); + if (IS_VALID_PAGESIZE(meta->pagesize)) + dbp->pgsize = meta->pagesize; + else { + isbad = 1; + EPRINT((env, DB_STR_A("0527", "Page %lu: bad page size %lu", + "%lu %lu"), (u_long)PGNO_BASE_MD, (u_long)meta->pagesize)); + + /* + * Now try to settle on a pagesize to use. + * If the user-supplied one is reasonable, + * use it; else, guess. + */ + if (!IS_VALID_PAGESIZE(dbp->pgsize)) + dbp->pgsize = __db_guesspgsize(env, fhp); + } + + /* + * 25: Page type. Must be correct for dbp->type, + * which is by now set as well as it can be. + */ + /* Needs no swapping--only one byte! */ + if ((dbp->type == DB_BTREE && meta->type != P_BTREEMETA) || + (dbp->type == DB_HASH && meta->type != P_HASHMETA) || + (dbp->type == DB_HEAP && meta->type != P_HEAPMETA) || + (dbp->type == DB_QUEUE && meta->type != P_QAMMETA)) { + isbad = 1; + EPRINT((env, DB_STR_A("0528", "Page %lu: bad page type %lu", + "%lu %lu"), (u_long)PGNO_BASE_MD, (u_long)meta->type)); + } + + /* + * 26: Meta-flags. + */ + if (meta->metaflags != 0) { + if (FLD_ISSET(meta->metaflags, + ~(DBMETA_CHKSUM|DBMETA_PART_RANGE|DBMETA_PART_CALLBACK))) { + isbad = 1; + EPRINT((env, DB_STR_A("0529", + "Page %lu: bad meta-data flags value %#lx", + "%lu %#lx"), (u_long)PGNO_BASE_MD, + (u_long)meta->metaflags)); + } + if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM)) + F_SET(pip, VRFY_HAS_CHKSUM); + if (FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE)) + F_SET(pip, VRFY_HAS_PART_RANGE); + if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK)) + F_SET(pip, VRFY_HAS_PART_CALLBACK); + + if (FLD_ISSET(meta->metaflags, + DBMETA_PART_RANGE | DBMETA_PART_CALLBACK) && + (ret = __partition_init(dbp, meta->metaflags)) != 0) + return (ret); + } + + /* + * 28-31: Free list page number. + * 32-35: Last page in database file. + * We'll verify last_pgno once we open the db in the mpool; + * for now, just store it. + */ + if (swapped) + M_32_SWAP(meta->free); + freelist = meta->free; + if (swapped) + M_32_SWAP(meta->last_pgno); + vdp->meta_last_pgno = meta->last_pgno; + + /* + * Initialize vdp->pages to fit a single pageinfo structure for + * this one page. We'll realloc later when we know how many + * pages there are. + */ + pip->pgno = PGNO_BASE_MD; + pip->type = meta->type; + + /* + * Signal that we still have to check the info specific to + * a given type of meta page. + */ + F_SET(pip, VRFY_INCOMPLETE); + + pip->free = freelist; + + if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) + return (ret); + + /* Set up the dbp's fileid. We don't use the regular open path. */ + memcpy(dbp->fileid, meta->uid, DB_FILE_ID_LEN); + + if (swapped == 1) + F_SET(dbp, DB_AM_SWAP); + + return (isbad ? DB_VERIFY_BAD : 0); +} + +/* + * __db_vrfy_walkpages -- + * Main loop of the verifier/salvager. Walks through, + * page by page, and verifies all pages and/or prints all data pages. + */ +static int +__db_vrfy_walkpages(dbp, vdp, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *h; + VRFY_PAGEINFO *pip; + db_pgno_t i; + int ret, t_ret, isbad; + + env = dbp->env; + mpf = dbp->mpf; + h = NULL; + ret = isbad = t_ret = 0; + + for (i = 0; i <= vdp->last_pgno; i++) { + /* + * If DB_SALVAGE is set, we inspect our database of completed + * pages, and skip any we've already printed in the subdb pass. + */ + if (LF_ISSET(DB_SALVAGE) && (__db_salvage_isdone(vdp, i) != 0)) + continue; + + /* + * An individual page get can fail if: + * * This is a hash database, it is expected to find + * empty buckets, which don't have allocated pages. Create + * a dummy page so the verification can proceed. + * * We are salvaging, flag the error and continue. + */ + if ((t_ret = __memp_fget(mpf, &i, + vdp->thread_info, NULL, 0, &h)) != 0) { + if (dbp->type == DB_HASH) { + if ((t_ret = + __db_vrfy_getpageinfo(vdp, i, &pip)) != 0) + goto err1; + pip->type = P_INVALID; + pip->pgno = i; + F_CLR(pip, VRFY_IS_ALLZEROES); + if ((t_ret = __db_vrfy_putpageinfo( + env, vdp, pip)) != 0) + goto err1; + continue; + } + if (t_ret == DB_PAGE_NOTFOUND) { + EPRINT((env, DB_STR_A("0530", + "Page %lu: beyond the end of the file, metadata page has last page as %lu", + "%lu %lu"), (u_long)i, + (u_long)vdp->last_pgno)); + if (ret == 0) + return (t_ret); + } + +err1: if (ret == 0) + ret = t_ret; + if (LF_ISSET(DB_SALVAGE)) + continue; + return (ret); + } + + if (LF_ISSET(DB_SALVAGE)) { + /* + * We pretty much don't want to quit unless a + * bomb hits. May as well return that something + * was screwy, however. + */ + if ((t_ret = __db_salvage_pg(dbp, + vdp, i, h, handle, callback, flags)) != 0) { + if (ret == 0) + ret = t_ret; + isbad = 1; + } + } else { + /* + * If we are not salvaging, and we get any error + * other than DB_VERIFY_BAD, return immediately; + * it may not be safe to proceed. If we get + * DB_VERIFY_BAD, keep going; listing more errors + * may make it easier to diagnose problems and + * determine the magnitude of the corruption. + * + * Verify info common to all page types. + */ + if (i != PGNO_BASE_MD) { + ret = __db_vrfy_common(dbp, vdp, h, i, flags); + if (ret == DB_VERIFY_BAD) + isbad = 1; + else if (ret != 0) + goto err; + } + + switch (TYPE(h)) { + case P_INVALID: + ret = __db_vrfy_invalid(dbp, vdp, h, i, flags); + break; + case __P_DUPLICATE: + isbad = 1; + EPRINT((env, DB_STR_A("0531", + "Page %lu: old-style duplicate page", + "%lu"), (u_long)i)); + break; + case P_HASH_UNSORTED: + case P_HASH: + ret = __ham_vrfy(dbp, vdp, h, i, flags); + break; + case P_HEAP: + case P_IHEAP: + ret = __heap_vrfy(dbp, vdp, h, i, flags); + break; + case P_IBTREE: + case P_IRECNO: + case P_LBTREE: + case P_LDUP: + ret = __bam_vrfy(dbp, vdp, h, i, flags); + break; + case P_LRECNO: + ret = __ram_vrfy_leaf(dbp, vdp, h, i, flags); + break; + case P_OVERFLOW: + ret = __db_vrfy_overflow(dbp, vdp, h, i, flags); + break; + case P_HASHMETA: + ret = __ham_vrfy_meta(dbp, + vdp, (HMETA *)h, i, flags); + break; + case P_HEAPMETA: + ret = __heap_vrfy_meta(dbp, + vdp, (HEAPMETA *)h, i, flags); + break; + case P_BTREEMETA: + ret = __bam_vrfy_meta(dbp, + vdp, (BTMETA *)h, i, flags); + break; + case P_QAMMETA: + ret = __qam_vrfy_meta(dbp, + vdp, (QMETA *)h, i, flags); + break; + case P_QAMDATA: + ret = __qam_vrfy_data(dbp, + vdp, (QPAGE *)h, i, flags); + break; + default: + EPRINT((env, DB_STR_A("0532", + "Page %lu: unknown page type %lu", + "%lu %lu"), (u_long)i, (u_long)TYPE(h))); + isbad = 1; + break; + } + + /* + * Set up error return. + */ + if (ret == DB_VERIFY_BAD) + isbad = 1; + else if (ret != 0) + goto err; + + /* + * Provide feedback to the application about our + * progress. The range 0-50% comes from the fact + * that this is the first of two passes through the + * database (front-to-back, then top-to-bottom). + */ + if (dbp->db_feedback != NULL) + dbp->db_feedback(dbp, DB_VERIFY, + (int)((i + 1) * 50 / (vdp->last_pgno + 1))); + } + + /* + * Just as with the page get, bail if and only if we're + * not salvaging. + */ + if ((t_ret = __memp_fput(mpf, + vdp->thread_info, h, dbp->priority)) != 0) { + if (ret == 0) + ret = t_ret; + if (!LF_ISSET(DB_SALVAGE)) + return (ret); + } + } + + /* + * If we've seen a Queue metadata page, we may need to walk Queue + * extent pages that won't show up between 0 and vdp->last_pgno. + */ + if (F_ISSET(vdp, VRFY_QMETA_SET) && (t_ret = + __qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags)) != 0) { + if (ret == 0) + ret = t_ret; + if (t_ret == DB_VERIFY_BAD) + isbad = 1; + else if (!LF_ISSET(DB_SALVAGE)) + return (ret); + } + + if (0) { +err: if (h != NULL && (t_ret = __memp_fput(mpf, + vdp->thread_info, h, dbp->priority)) != 0) + return (ret == 0 ? t_ret : ret); + } + + return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret); +} + +/* + * __db_vrfy_structure-- + * After a beginning-to-end walk through the database has been + * completed, put together the information that has been collected + * to verify the overall database structure. + * + * Should only be called if we want to do a database verification, + * i.e. if DB_SALVAGE is not set. + */ +static int +__db_vrfy_structure(dbp, vdp, dbname, meta_pgno, lp, rp, flags) + DB *dbp; + VRFY_DBINFO *vdp; + const char *dbname; + db_pgno_t meta_pgno; + void *lp, *rp; + u_int32_t flags; +{ + DB *pgset; + ENV *env; + VRFY_PAGEINFO *pip; + db_pgno_t i; + int ret, isbad, hassubs, p; + + isbad = 0; + pip = NULL; + env = dbp->env; + pgset = vdp->pgset; + + /* + * Providing feedback here is tricky; in most situations, + * we fetch each page one more time, but we do so in a top-down + * order that depends on the access method. Worse, we do this + * recursively in btree, such that on any call where we're traversing + * a subtree we don't know where that subtree is in the whole database; + * worse still, any given database may be one of several subdbs. + * + * The solution is to decrement a counter vdp->pgs_remaining each time + * we verify (and call feedback on) a page. We may over- or + * under-count, but the structure feedback function will ensure that we + * never give a percentage under 50 or over 100. (The first pass + * covered the range 0-50%.) + */ + if (dbp->db_feedback != NULL) + vdp->pgs_remaining = vdp->last_pgno + 1; + + /* + * Call the appropriate function to downwards-traverse the db type. + */ + switch (dbp->type) { + case DB_BTREE: + case DB_RECNO: + if ((ret = + __bam_vrfy_structure(dbp, vdp, 0, lp, rp, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + + /* + * If we have subdatabases and we know that the database is, + * thus far, sound, it's safe to walk the tree of subdatabases. + * Do so, and verify the structure of the databases within. + */ + if ((ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) != 0) + goto err; + hassubs = F_ISSET(pip, VRFY_HAS_SUBDBS) ? 1 : 0; + if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) + goto err; + pip = NULL; + + if (isbad == 0 && hassubs) + if ((ret = + __db_vrfy_subdbs(dbp, vdp, dbname, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + break; + case DB_HASH: + if ((ret = __ham_vrfy_structure(dbp, vdp, 0, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + break; + case DB_HEAP: + if ((ret = __heap_vrfy_structure(dbp, vdp, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + } + /* Skip the freelist check for heap, it doesn't apply. */ + goto err; + case DB_QUEUE: + if ((ret = __qam_vrfy_structure(dbp, vdp, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + } + + /* + * Queue pages may be unreferenced and totally zeroed, if + * they're empty; queue doesn't have much structure, so + * this is unlikely to be wrong in any troublesome sense. + * Skip to "err". + */ + goto err; + case DB_UNKNOWN: + default: + ret = __db_unknown_path(env, "__db_vrfy_structure"); + goto err; + } + + /* Walk free list. */ + if ((ret = + __db_vrfy_freelist(dbp, vdp, meta_pgno, flags)) == DB_VERIFY_BAD) + isbad = 1; + + /* + * If structure checks up until now have failed, it's likely that + * checking what pages have been missed will result in oodles of + * extraneous error messages being EPRINTed. Skip to the end + * if this is the case; we're going to be printing at least one + * error anyway, and probably all the more salient ones. + */ + if (ret != 0 || isbad == 1) + goto err; + + /* + * Make sure no page has been missed and that no page is still marked + * "all zeroes" unless we are looking at unused hash bucket pages or + * pagesoff the end of database. + */ + for (i = 0; i < vdp->last_pgno + 1; i++) { + if ((ret = __db_vrfy_getpageinfo(vdp, i, &pip)) != 0) + goto err; + if ((ret = __db_vrfy_pgset_get(pgset, + vdp->thread_info, vdp->txn, i, &p)) != 0) + goto err; + if (pip->type == P_OVERFLOW) { + if ((u_int32_t)p != pip->refcount) { + EPRINT((env, DB_STR_A("0533", + "Page %lu: overflow refcount %lu, referenced %lu times", + "%lu %lu %lu"), (u_long)i, + (u_long)pip->refcount, (u_long)p)); + isbad = 1; + } + } else if (p == 0 && +#ifndef HAVE_FTRUNCATE + !(i > vdp->meta_last_pgno && + (F_ISSET(pip, VRFY_IS_ALLZEROES) || pip->type == P_HASH)) && +#endif + !(dbp->type == DB_HASH && + (pip->type == P_HASH || pip->type == P_INVALID))) { + /* + * It is OK for unreferenced hash buckets to be + * marked invalid and unreferenced. + */ + EPRINT((env, DB_STR_A("0534", + "Page %lu: unreferenced page", "%lu"), (u_long)i)); + isbad = 1; + } + + if (F_ISSET(pip, VRFY_IS_ALLZEROES) +#ifndef HAVE_FTRUNCATE + && i <= vdp->meta_last_pgno +#endif + ) { + EPRINT((env, DB_STR_A("0535", + "Page %lu: totally zeroed page", "%lu"), + (u_long)i)); + isbad = 1; + } + if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) + goto err; + pip = NULL; + } + +err: if (pip != NULL) + (void)__db_vrfy_putpageinfo(env, vdp, pip); + + return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret); +} + +/* + * __db_is_valid_magicno + */ +static int +__db_is_valid_magicno(magic, typep) + u_int32_t magic; + DBTYPE *typep; +{ + switch (magic) { + case DB_BTREEMAGIC: + *typep = DB_BTREE; + return (1); + case DB_HASHMAGIC: + *typep = DB_HASH; + return (1); + case DB_HEAPMAGIC: + *typep = DB_HEAP; + return (1); + case DB_QAMMAGIC: + *typep = DB_QUEUE; + return (1); + default: + break; + } + *typep = DB_UNKNOWN; + return (0); +} + +/* + * __db_vrfy_common -- + * Verify info common to all page types. + * + * PUBLIC: int __db_vrfy_common + * PUBLIC: __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); + */ +int +__db_vrfy_common(dbp, vdp, h, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *h; + db_pgno_t pgno; + u_int32_t flags; +{ + ENV *env; + VRFY_PAGEINFO *pip; + int ret, t_ret; + u_int8_t *p; + + env = dbp->env; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + pip->pgno = pgno; + F_CLR(pip, VRFY_IS_ALLZEROES); + + /* + * Hash expands the table by leaving some pages between the + * old last and the new last totally zeroed. These pages may + * not be all zero if they were used, freed and then reallocated. + * + * Queue will create sparse files if sparse record numbers are used. + */ + if (pgno != 0 && PGNO(h) == 0) { + F_SET(pip, VRFY_IS_ALLZEROES); + for (p = (u_int8_t *)h; p < (u_int8_t *)h + dbp->pgsize; p++) + if (*p != 0) { + F_CLR(pip, VRFY_IS_ALLZEROES); + break; + } + /* + * Mark it as a hash, and we'll + * check that that makes sense structurally later. + * (The queue verification doesn't care, since queues + * don't really have much in the way of structure.) + */ + if (dbp->type != DB_HEAP) + pip->type = P_HASH; + ret = 0; + goto err; /* well, not really an err. */ + } + + if (PGNO(h) != pgno) { + EPRINT((env, DB_STR_A("0536", "Page %lu: bad page number %lu", + "%lu %lu"), (u_long)pgno, (u_long)h->pgno)); + ret = DB_VERIFY_BAD; + } + + switch (h->type) { + case P_INVALID: /* Order matches ordinal value. */ + case P_HASH_UNSORTED: + case P_IBTREE: + case P_IRECNO: + case P_LBTREE: + case P_LRECNO: + case P_OVERFLOW: + case P_HASHMETA: + case P_BTREEMETA: + case P_QAMMETA: + case P_QAMDATA: + case P_LDUP: + case P_HASH: + case P_HEAP: + case P_IHEAP: + case P_HEAPMETA: + break; + default: + EPRINT((env, DB_STR_A("0537", "Page %lu: bad page type %lu", + "%lu %lu"), (u_long)pgno, (u_long)h->type)); + ret = DB_VERIFY_BAD; + } + pip->type = h->type; + +err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_vrfy_invalid -- + * Verify P_INVALID page. + * (Yes, there's not much to do here.) + */ +static int +__db_vrfy_invalid(dbp, vdp, h, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *h; + db_pgno_t pgno; + u_int32_t flags; +{ + ENV *env; + VRFY_PAGEINFO *pip; + int ret, t_ret; + + env = dbp->env; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + pip->next_pgno = pip->prev_pgno = 0; + + if (!IS_VALID_PGNO(NEXT_PGNO(h))) { + EPRINT((env, DB_STR_A("0538", "Page %lu: invalid next_pgno %lu", + "%lu %lu"), (u_long)pgno, (u_long)NEXT_PGNO(h))); + ret = DB_VERIFY_BAD; + } else + pip->next_pgno = NEXT_PGNO(h); + + if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __db_vrfy_datapage -- + * Verify elements common to data pages (P_HASH, P_LBTREE, + * P_IBTREE, P_IRECNO, P_LRECNO, P_OVERFLOW, P_DUPLICATE)--i.e., + * those defined in the PAGE structure. + * + * Called from each of the per-page routines, after the + * all-page-type-common elements of pip have been verified and filled + * in. + * + * PUBLIC: int __db_vrfy_datapage + * PUBLIC: __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); + */ +int +__db_vrfy_datapage(dbp, vdp, h, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *h; + db_pgno_t pgno; + u_int32_t flags; +{ + ENV *env; + VRFY_PAGEINFO *pip; + u_int32_t smallest_entry; + int isbad, ret, t_ret; + + env = dbp->env; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + isbad = 0; + + /* + * prev_pgno and next_pgno: store for inter-page checks, + * verify that they point to actual pages and not to self. + * + * !!! + * Internal btree pages, as well as heap pages, do not maintain these + * fields (indeed, they overload them). Skip. + */ + if (TYPE(h) != P_IBTREE && + TYPE(h) != P_IRECNO && TYPE(h) != P_HEAP && TYPE(h) != P_IHEAP) { + if (!IS_VALID_PGNO(PREV_PGNO(h)) || PREV_PGNO(h) == pip->pgno) { + isbad = 1; + EPRINT((env, DB_STR_A("0539", + "Page %lu: invalid prev_pgno %lu", "%lu %lu"), + (u_long)pip->pgno, (u_long)PREV_PGNO(h))); + } + if (!IS_VALID_PGNO(NEXT_PGNO(h)) || NEXT_PGNO(h) == pip->pgno) { + isbad = 1; + EPRINT((env, DB_STR_A("0540", + "Page %lu: invalid next_pgno %lu", "%lu %lu"), + (u_long)pip->pgno, (u_long)NEXT_PGNO(h))); + } + pip->prev_pgno = PREV_PGNO(h); + pip->next_pgno = NEXT_PGNO(h); + } + + /* + * Verify the number of entries on the page: there's no good way to + * determine if this is accurate. The best we can do is verify that + * it's not more than can, in theory, fit on the page. Then, we make + * sure there are at least this many valid elements in inp[], and + * hope the test catches most cases. + */ + switch (TYPE(h)) { + case P_HASH_UNSORTED: + case P_HASH: + smallest_entry = HKEYDATA_PSIZE(0); + break; + case P_HEAP: + smallest_entry = sizeof(HEAPHDR) + sizeof(db_indx_t); + break; + case P_IHEAP: + /* Really high_pgno. */ + pip->prev_pgno = PREV_PGNO(h); + smallest_entry = 0; + break; + case P_IBTREE: + smallest_entry = BINTERNAL_PSIZE(0); + break; + case P_IRECNO: + smallest_entry = RINTERNAL_PSIZE; + break; + case P_LBTREE: + case P_LDUP: + case P_LRECNO: + smallest_entry = BKEYDATA_PSIZE(0); + break; + default: + smallest_entry = 0; + break; + } + if (smallest_entry * NUM_ENT(h) / 2 > dbp->pgsize) { + isbad = 1; + EPRINT((env, DB_STR_A("0541", + "Page %lu: too many entries: %lu", + "%lu %lu"), (u_long)pgno, (u_long)NUM_ENT(h))); + } + + if (TYPE(h) != P_OVERFLOW) + pip->entries = NUM_ENT(h); + + /* + * btree level. Should be zero unless we're a btree; + * if we are a btree, should be between LEAFLEVEL and MAXBTREELEVEL, + * and we need to save it off. + */ + switch (TYPE(h)) { + case P_IBTREE: + case P_IRECNO: + if (LEVEL(h) < LEAFLEVEL + 1) { + isbad = 1; + EPRINT((env, DB_STR_A("0542", + "Page %lu: bad btree level %lu", "%lu %lu"), + (u_long)pgno, (u_long)LEVEL(h))); + } + pip->bt_level = LEVEL(h); + break; + case P_LBTREE: + case P_LDUP: + case P_LRECNO: + if (LEVEL(h) != LEAFLEVEL) { + isbad = 1; + EPRINT((env, DB_STR_A("0543", + "Page %lu: btree leaf page has incorrect level %lu", + "%lu %lu"), (u_long)pgno, (u_long)LEVEL(h))); + } + break; + default: + if (LEVEL(h) != 0) { + isbad = 1; + EPRINT((env, DB_STR_A("0544", + "Page %lu: nonzero level %lu in non-btree database", + "%lu %lu"), (u_long)pgno, (u_long)LEVEL(h))); + } + break; + } + + /* + * Even though inp[] occurs in all PAGEs, we look at it in the + * access-method-specific code, since btree and hash treat + * item lengths very differently, and one of the most important + * things we want to verify is that the data--as specified + * by offset and length--cover the right part of the page + * without overlaps, gaps, or violations of the page boundary. + */ + if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +/* + * __db_vrfy_meta-- + * Verify the access-method common parts of a meta page, using + * normal mpool routines. + * + * PUBLIC: int __db_vrfy_meta + * PUBLIC: __P((DB *, VRFY_DBINFO *, DBMETA *, db_pgno_t, u_int32_t)); + */ +int +__db_vrfy_meta(dbp, vdp, meta, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + DBMETA *meta; + db_pgno_t pgno; + u_int32_t flags; +{ + DBTYPE dbtype, magtype; + ENV *env; + VRFY_PAGEINFO *pip; + int isbad, ret, t_ret; + + isbad = 0; + env = dbp->env; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + /* type plausible for a meta page */ + switch (meta->type) { + case P_BTREEMETA: + dbtype = DB_BTREE; + break; + case P_HASHMETA: + dbtype = DB_HASH; + break; + case P_HEAPMETA: + dbtype = DB_HEAP; + break; + case P_QAMMETA: + dbtype = DB_QUEUE; + break; + default: + ret = __db_unknown_path(env, "__db_vrfy_meta"); + goto err; + } + + /* magic number valid */ + if (!__db_is_valid_magicno(meta->magic, &magtype)) { + isbad = 1; + EPRINT((env, DB_STR_A("0545", "Page %lu: invalid magic number", + "%lu"), (u_long)pgno)); + } + if (magtype != dbtype) { + isbad = 1; + EPRINT((env, DB_STR_A("0546", + "Page %lu: magic number does not match database type", + "%lu"), (u_long)pgno)); + } + + /* version */ + if ((dbtype == DB_BTREE && + (meta->version > DB_BTREEVERSION || + meta->version < DB_BTREEOLDVER)) || + (dbtype == DB_HASH && + (meta->version > DB_HASHVERSION || + meta->version < DB_HASHOLDVER)) || + (dbtype == DB_HEAP && + (meta->version > DB_HEAPVERSION || + meta->version < DB_HEAPOLDVER)) || + (dbtype == DB_QUEUE && + (meta->version > DB_QAMVERSION || + meta->version < DB_QAMOLDVER))) { + isbad = 1; + EPRINT((env, DB_STR_A("0547", + "Page %lu: unsupported database version %lu; extraneous errors may result", + "%lu %lu"), (u_long)pgno, (u_long)meta->version)); + } + + /* pagesize */ + if (meta->pagesize != dbp->pgsize) { + isbad = 1; + EPRINT((env, DB_STR_A("0548", "Page %lu: invalid pagesize %lu", + "%lu %lu"), (u_long)pgno, (u_long)meta->pagesize)); + } + + /* Flags */ + if (meta->metaflags != 0) { + if (FLD_ISSET(meta->metaflags, + ~(DBMETA_CHKSUM|DBMETA_PART_RANGE|DBMETA_PART_CALLBACK))) { + isbad = 1; + EPRINT((env, DB_STR_A("0549", + "Page %lu: bad meta-data flags value %#lx", + "%lu %#lx"), (u_long)PGNO_BASE_MD, + (u_long)meta->metaflags)); + } + if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM)) + F_SET(pip, VRFY_HAS_CHKSUM); + if (FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE)) + F_SET(pip, VRFY_HAS_PART_RANGE); + if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK)) + F_SET(pip, VRFY_HAS_PART_CALLBACK); + } + + /* + * Free list. + * + * If this is not the main, master-database meta page, it + * should not have a free list. + */ + if (pgno != PGNO_BASE_MD && meta->free != PGNO_INVALID) { + isbad = 1; + EPRINT((env, DB_STR_A("0550", + "Page %lu: nonempty free list on subdatabase metadata page", + "%lu"), (u_long)pgno)); + } + + /* Can correctly be PGNO_INVALID--that's just the end of the list. */ + if (meta->free != PGNO_INVALID && IS_VALID_PGNO(meta->free)) + pip->free = meta->free; + else if (!IS_VALID_PGNO(meta->free)) { + isbad = 1; + EPRINT((env, DB_STR_A("0551", + "Page %lu: nonsensical free list pgno %lu", "%lu %lu"), + (u_long)pgno, (u_long)meta->free)); + } + + /* + * Check that the meta page agrees with what we got from mpool. + * If we don't have FTRUNCATE then mpool could include some + * zeroed pages at the end of the file, we assume the meta page + * is correct. + */ + if (pgno == PGNO_BASE_MD && meta->last_pgno != vdp->last_pgno) { +#ifdef HAVE_FTRUNCATE + isbad = 1; + EPRINT((env, DB_STR_A("0552", + "Page %lu: last_pgno is not correct: %lu != %lu", + "%lu %lu %lu"), (u_long)pgno, + (u_long)meta->last_pgno, (u_long)vdp->last_pgno)); +#endif + vdp->meta_last_pgno = meta->last_pgno; + } + + /* + * We have now verified the common fields of the metadata page. + * Clear the flag that told us they had been incompletely checked. + */ + F_CLR(pip, VRFY_INCOMPLETE); + +err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +/* + * __db_vrfy_freelist -- + * Walk free list, checking off pages and verifying absence of + * loops. + */ +static int +__db_vrfy_freelist(dbp, vdp, meta, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t meta; + u_int32_t flags; +{ + DB *pgset; + ENV *env; + VRFY_PAGEINFO *pip; + db_pgno_t cur_pgno, next_pgno; + int p, ret, t_ret; + + env = dbp->env; + pgset = vdp->pgset; + DB_ASSERT(env, pgset != NULL); + + if ((ret = __db_vrfy_getpageinfo(vdp, meta, &pip)) != 0) + return (ret); + for (next_pgno = pip->free; + next_pgno != PGNO_INVALID; next_pgno = pip->next_pgno) { + cur_pgno = pip->pgno; + if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) + return (ret); + + /* This shouldn't happen, but just in case. */ + if (!IS_VALID_PGNO(next_pgno)) { + EPRINT((env, DB_STR_A("0553", + "Page %lu: invalid next_pgno %lu on free list page", + "%lu %lu"), (u_long)cur_pgno, (u_long)next_pgno)); + return (DB_VERIFY_BAD); + } + + /* Detect cycles. */ + if ((ret = __db_vrfy_pgset_get(pgset, + vdp->thread_info, vdp->txn, next_pgno, &p)) != 0) + return (ret); + if (p != 0) { + EPRINT((env, DB_STR_A("0554", + "Page %lu: page %lu encountered a second time on free list", + "%lu %lu"), (u_long)cur_pgno, (u_long)next_pgno)); + return (DB_VERIFY_BAD); + } + if ((ret = __db_vrfy_pgset_inc(pgset, + vdp->thread_info, vdp->txn, next_pgno)) != 0) + return (ret); + + if ((ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0) + return (ret); + + if (pip->type != P_INVALID) { + EPRINT((env, DB_STR_A("0555", + "Page %lu: non-invalid page %lu on free list", + "%lu %lu"), (u_long)cur_pgno, (u_long)next_pgno)); + ret = DB_VERIFY_BAD; /* unsafe to continue */ + break; + } + } + + if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) + ret = t_ret; + return (ret); +} + +/* + * __db_vrfy_subdbs -- + * Walk the known-safe master database of subdbs with a cursor, + * verifying the structure of each subdatabase we encounter. + */ +static int +__db_vrfy_subdbs(dbp, vdp, dbname, flags) + DB *dbp; + VRFY_DBINFO *vdp; + const char *dbname; + u_int32_t flags; +{ + DB *mdbp; + DBC *dbc; + DBT key, data; + ENV *env; + VRFY_PAGEINFO *pip; + db_pgno_t meta_pgno; + int ret, t_ret, isbad; + u_int8_t type; + + isbad = 0; + dbc = NULL; + env = dbp->env; + + if ((ret = __db_master_open(dbp, + vdp->thread_info, NULL, dbname, DB_RDONLY, 0, &mdbp)) != 0) + return (ret); + + if ((ret = __db_cursor_int(mdbp, NULL, + vdp->txn, DB_BTREE, PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0) + goto err; + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + while ((ret = __dbc_get(dbc, &key, &data, DB_NEXT)) == 0) { + if (data.size != sizeof(db_pgno_t)) { + EPRINT((env, DB_STR("0556", + "Subdatabase entry not page-number size"))); + isbad = 1; + goto err; + } + memcpy(&meta_pgno, data.data, data.size); + /* + * Subdatabase meta pgnos are stored in network byte + * order for cross-endian compatibility. Swap if appropriate. + */ + DB_NTOHL_SWAP(env, &meta_pgno); + if (meta_pgno == PGNO_INVALID || meta_pgno > vdp->last_pgno) { + EPRINT((env, DB_STR_A("0557", + "Subdatabase entry references invalid page %lu", + "%lu"), (u_long)meta_pgno)); + isbad = 1; + goto err; + } + if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &pip)) != 0) + goto err; + type = pip->type; + if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) + goto err; + switch (type) { + case P_BTREEMETA: + if ((ret = __bam_vrfy_structure( + dbp, vdp, meta_pgno, NULL, NULL, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + break; + case P_HASHMETA: + if ((ret = __ham_vrfy_structure( + dbp, vdp, meta_pgno, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + break; + case P_QAMMETA: + default: + EPRINT((env, DB_STR_A("0558", + "Subdatabase entry references page %lu of invalid type %lu", + "%lu %lu"), (u_long)meta_pgno, (u_long)type)); + ret = DB_VERIFY_BAD; + goto err; + } + } + + if (ret == DB_NOTFOUND) + ret = 0; + +err: if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = __db_close(mdbp, NULL, 0)) != 0 && ret == 0) + ret = t_ret; + + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +/* + * __db_vrfy_struct_feedback -- + * Provide feedback during top-down database structure traversal. + * (See comment at the beginning of __db_vrfy_structure.) + * + * PUBLIC: void __db_vrfy_struct_feedback __P((DB *, VRFY_DBINFO *)); + */ +void +__db_vrfy_struct_feedback(dbp, vdp) + DB *dbp; + VRFY_DBINFO *vdp; +{ + int progress; + + if (dbp->db_feedback == NULL) + return; + + if (vdp->pgs_remaining > 0) + vdp->pgs_remaining--; + + /* Don't allow a feedback call of 100 until we're really done. */ + progress = 100 - (int)(vdp->pgs_remaining * 50 / (vdp->last_pgno + 1)); + dbp->db_feedback(dbp, DB_VERIFY, progress == 100 ? 99 : progress); +} + +/* + * __db_vrfy_orderchkonly -- + * Do an sort-order/hashing check on a known-otherwise-good subdb. + */ +static int +__db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags) + DB *dbp; + VRFY_DBINFO *vdp; + const char *name, *subdb; + u_int32_t flags; +{ + BTMETA *btmeta; + DB *mdbp, *pgset; + DBC *pgsc; + DBT key, data; + DB_MPOOLFILE *mpf; + ENV *env; + HASH *h_internal; + HMETA *hmeta; + PAGE *h, *currpg; + db_pgno_t meta_pgno, p, pgno; + u_int32_t bucket; + int t_ret, ret; + + pgset = NULL; + pgsc = NULL; + env = dbp->env; + mpf = dbp->mpf; + currpg = h = NULL; + + LF_CLR(DB_NOORDERCHK); + + /* Open the master database and get the meta_pgno for the subdb. */ + if ((ret = __db_master_open(dbp, + vdp->thread_info, NULL, name, DB_RDONLY, 0, &mdbp)) != 0) + goto err; + + DB_INIT_DBT(key, subdb, strlen(subdb)); + memset(&data, 0, sizeof(data)); + if ((ret = __db_get(mdbp, + vdp->thread_info, NULL, &key, &data, 0)) != 0) { + if (ret == DB_NOTFOUND) + ret = ENOENT; + goto err; + } + + if (data.size != sizeof(db_pgno_t)) { + EPRINT((env, DB_STR("0559", + "Subdatabase entry of invalid size"))); + ret = DB_VERIFY_BAD; + goto err; + } + + memcpy(&meta_pgno, data.data, data.size); + + /* + * Subdatabase meta pgnos are stored in network byte + * order for cross-endian compatibility. Swap if appropriate. + */ + DB_NTOHL_SWAP(env, &meta_pgno); + + if ((ret = __memp_fget(mpf, + &meta_pgno, vdp->thread_info, NULL, 0, &h)) != 0) + goto err; + + if ((ret = __db_vrfy_pgset(env, + vdp->thread_info, dbp->pgsize, &pgset)) != 0) + goto err; + + switch (TYPE(h)) { + case P_BTREEMETA: + btmeta = (BTMETA *)h; + if (F_ISSET(&btmeta->dbmeta, BTM_RECNO)) { + /* Recnos have no order to check. */ + ret = 0; + goto err; + } + if ((ret = + __db_meta2pgset(dbp, vdp, meta_pgno, flags, pgset)) != 0) + goto err; + if ((ret = __db_cursor_int(pgset, NULL, vdp->txn, dbp->type, + PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0) + goto err; + while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) { + if ((ret = __memp_fget(mpf, &p, + vdp->thread_info, NULL, 0, &currpg)) != 0) + goto err; + if ((ret = __bam_vrfy_itemorder(dbp, NULL, + vdp->thread_info, currpg, p, NUM_ENT(currpg), 1, + F_ISSET(&btmeta->dbmeta, BTM_DUP), flags)) != 0) + goto err; + if ((ret = __memp_fput(mpf, + vdp->thread_info, currpg, dbp->priority)) != 0) + goto err; + currpg = NULL; + } + + /* + * The normal exit condition for the loop above is DB_NOTFOUND. + * If we see that, zero it and continue on to cleanup. + * Otherwise, it's a real error and will be returned. + */ + if (ret == DB_NOTFOUND) + ret = 0; + break; + case P_HASHMETA: + hmeta = (HMETA *)h; + h_internal = (HASH *)dbp->h_internal; + /* + * Make sure h_charkey is right. + */ + if (h_internal == NULL) { + EPRINT((env, DB_STR_A("0560", + "Page %lu: DB->h_internal field is NULL", "%lu"), + (u_long)meta_pgno)); + ret = DB_VERIFY_BAD; + goto err; + } + if (h_internal->h_hash == NULL) + h_internal->h_hash = hmeta->dbmeta.version < 5 + ? __ham_func4 : __ham_func5; + if (hmeta->h_charkey != + h_internal->h_hash(dbp, CHARKEY, sizeof(CHARKEY))) { + EPRINT((env, DB_STR_A("0561", + "Page %lu: incorrect hash function for database", + "%lu"), (u_long)meta_pgno)); + ret = DB_VERIFY_BAD; + goto err; + } + + /* + * Foreach bucket, verify hashing on each page in the + * corresponding chain of pages. + */ + if ((ret = __db_cursor_int(dbp, NULL, vdp->txn, dbp->type, + PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0) + goto err; + for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) { + pgno = BS_TO_PAGE(bucket, hmeta->spares); + while (pgno != PGNO_INVALID) { + if ((ret = __memp_fget(mpf, &pgno, + vdp->thread_info, NULL, 0, &currpg)) != 0) + goto err; + if ((ret = __ham_vrfy_hashing(pgsc, + NUM_ENT(currpg), hmeta, bucket, pgno, + flags, h_internal->h_hash)) != 0) + goto err; + pgno = NEXT_PGNO(currpg); + if ((ret = __memp_fput(mpf, vdp->thread_info, + currpg, dbp->priority)) != 0) + goto err; + currpg = NULL; + } + } + break; + default: + EPRINT((env, DB_STR_A("0562", + "Page %lu: database metapage of bad type %lu", + "%lu %lu"), (u_long)meta_pgno, (u_long)TYPE(h))); + ret = DB_VERIFY_BAD; + break; + } + +err: if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0) + ret = t_ret; + if (pgset != NULL && + (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret == 0) + ret = t_ret; + if (h != NULL && (t_ret = __memp_fput(mpf, + vdp->thread_info, h, dbp->priority)) != 0) + ret = t_ret; + if (currpg != NULL && + (t_ret = __memp_fput(mpf, + vdp->thread_info, currpg, dbp->priority)) != 0) + ret = t_ret; + if ((t_ret = __db_close(mdbp, NULL, 0)) != 0) + ret = t_ret; + return (ret); +} + +/* + * __db_salvage_pg -- + * Walk through a page, salvaging all likely or plausible (w/ + * DB_AGGRESSIVE) key/data pairs and marking seen pages in vdp. + * + * PUBLIC: int __db_salvage_pg __P((DB *, VRFY_DBINFO *, db_pgno_t, + * PUBLIC: PAGE *, void *, int (*)(void *, const void *), u_int32_t)); + */ +int +__db_salvage_pg(dbp, vdp, pgno, h, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + PAGE *h; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + ENV *env; + VRFY_PAGEINFO *pip; + int keyflag, ret, t_ret; + + env = dbp->env; + DB_ASSERT(env, LF_ISSET(DB_SALVAGE)); + + /* + * !!! + * We dump record numbers when salvaging Queue databases, but not for + * immutable Recno databases. The problem is we can't figure out the + * record number from the database page in the Recno case, while the + * offset in the file is sufficient for Queue. + */ + keyflag = 0; + + /* If we got this page in the subdb pass, we can safely skip it. */ + if (__db_salvage_isdone(vdp, pgno)) + return (0); + + switch (TYPE(h)) { + case P_BTREEMETA: + ret = __bam_vrfy_meta(dbp, vdp, (BTMETA *)h, pgno, flags); + break; + case P_HASH: + case P_HASH_UNSORTED: + case P_HEAP: + case P_LBTREE: + case P_QAMDATA: + return (__db_salvage_leaf(dbp, + vdp, pgno, h, handle, callback, flags)); + case P_HASHMETA: + ret = __ham_vrfy_meta(dbp, vdp, (HMETA *)h, pgno, flags); + break; + case P_HEAPMETA: + ret = __heap_vrfy_meta(dbp, vdp, (HEAPMETA *)h, pgno, flags); + break; + case P_IBTREE: + /* + * We need to mark any overflow keys on internal pages as seen, + * so we don't print them out in __db_salvage_unknowns. But if + * we're an upgraded database, a P_LBTREE page may very well + * have a reference to the same overflow pages (this practice + * stopped somewhere around db4.5). To give P_LBTREEs a chance + * to print out any keys on shared pages, mark the page now and + * deal with it at the end. + */ + return (__db_salvage_markneeded(vdp, pgno, SALVAGE_IBTREE)); + case P_IHEAP: + /* + * There's nothing to salvage from heap region pages. Just mark + * that we've seen the page. + */ + return (__db_salvage_markdone(vdp, pgno)); + case P_LDUP: + return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LDUP)); + case P_LRECNO: + /* + * Recno leaves are tough, because the leaf could be (1) a dup + * page, or it could be (2) a regular database leaf page. + * Fortunately, RECNO databases are not allowed to have + * duplicates. + * + * If there are no subdatabases, dump the page immediately if + * it's a leaf in a RECNO database, otherwise wait and hopefully + * it will be dumped by the leaf page that refers to it, + * otherwise we'll get it with the unknowns. + * + * If there are subdatabases, there might be mixed types and + * dbp->type can't be trusted. We'll only get here after + * salvaging each database, though, so salvaging this page + * immediately isn't important. If this page is a dup, it might + * get salvaged later on, otherwise the unknowns pass will pick + * it up. Note that SALVAGE_HASSUBDBS won't get set if we're + * salvaging aggressively. + * + * If we're salvaging aggressively, we don't know whether or not + * there's subdatabases, so we wait on all recno pages. + */ + if (!LF_ISSET(DB_AGGRESSIVE) && + !F_ISSET(vdp, SALVAGE_HASSUBDBS) && dbp->type == DB_RECNO) + return (__db_salvage_leaf(dbp, + vdp, pgno, h, handle, callback, flags)); + return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LRECNODUP)); + case P_OVERFLOW: + return (__db_salvage_markneeded(vdp, pgno, SALVAGE_OVERFLOW)); + case P_QAMMETA: + keyflag = 1; + ret = __qam_vrfy_meta(dbp, vdp, (QMETA *)h, pgno, flags); + break; + case P_INVALID: + case P_IRECNO: + case __P_DUPLICATE: + default: + /* + * There's no need to display an error, the page type was + * already checked and reported on. + */ + return (0); + } + if (ret != 0) + return (ret); + + /* + * We have to display the dump header if it's a metadata page. It's + * our last chance as the page was marked "seen" in the vrfy routine, + * and we won't see the page again. We don't display headers for + * the first database in a multi-database file, that database simply + * contains a list of subdatabases. + */ + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + if (!F_ISSET(pip, VRFY_HAS_SUBDBS) && !LF_ISSET(DB_VERIFY_PARTITION)) + ret = __db_prheader( + dbp, NULL, 0, keyflag, handle, callback, vdp, pgno); + if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __db_salvage_leaf -- + * Walk through a leaf, salvaging all likely key/data pairs and marking + * seen pages in vdp. + * + * PUBLIC: int __db_salvage_leaf __P((DB *, VRFY_DBINFO *, db_pgno_t, + * PUBLIC: PAGE *, void *, int (*)(void *, const void *), u_int32_t)); + */ +int +__db_salvage_leaf(dbp, vdp, pgno, h, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + PAGE *h; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + ENV *env; + + env = dbp->env; + DB_ASSERT(env, LF_ISSET(DB_SALVAGE)); + + /* If we got this page in the subdb pass, we can safely skip it. */ + if (__db_salvage_isdone(vdp, pgno)) + return (0); + + switch (TYPE(h)) { + case P_HASH_UNSORTED: + case P_HASH: + return (__ham_salvage(dbp, vdp, + pgno, h, handle, callback, flags)); + case P_HEAP: + return (__heap_salvage(dbp, vdp, + pgno, h, handle, callback, flags)); + case P_LBTREE: + case P_LRECNO: + return (__bam_salvage(dbp, vdp, + pgno, TYPE(h), h, handle, callback, NULL, flags)); + case P_QAMDATA: + return (__qam_salvage(dbp, vdp, + pgno, h, handle, callback, flags)); + default: + /* + * There's no need to display an error, the page type was + * already checked and reported on. + */ + return (0); + } +} + +/* + * __db_salvage_unknowns -- + * Walk through the salvager database, printing with key "UNKNOWN" + * any pages we haven't dealt with. + */ +static int +__db_salvage_unknowns(dbp, vdp, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + DBC *dbc; + DBT unkdbt, key, *dbt; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *h; + db_pgno_t pgno; + u_int32_t pgtype, ovfl_bufsz, tmp_flags; + int ret, t_ret; + void *ovflbuf; + + dbc = NULL; + env = dbp->env; + mpf = dbp->mpf; + + DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1); + + if ((ret = __os_malloc(env, dbp->pgsize, &ovflbuf)) != 0) + return (ret); + ovfl_bufsz = dbp->pgsize; + + /* + * We make two passes -- in the first pass, skip SALVAGE_OVERFLOW + * pages, because they may be referenced by the standard database + * pages that we're resolving. + */ + while ((t_ret = + __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 1)) == 0) { + if ((t_ret = __memp_fget(mpf, + &pgno, vdp->thread_info, NULL, 0, &h)) != 0) { + if (ret == 0) + ret = t_ret; + continue; + } + + dbt = NULL; + tmp_flags = 0; + switch (pgtype) { + case SALVAGE_LDUP: + case SALVAGE_LRECNODUP: + dbt = &unkdbt; + tmp_flags = DB_SA_UNKNOWNKEY; + /* FALLTHROUGH */ + case SALVAGE_IBTREE: + case SALVAGE_LBTREE: + case SALVAGE_LRECNO: + if ((t_ret = __bam_salvage( + dbp, vdp, pgno, pgtype, h, handle, + callback, dbt, tmp_flags | flags)) != 0 && ret == 0) + ret = t_ret; + break; + case SALVAGE_OVERFLOW: + DB_ASSERT(env, 0); /* Shouldn't ever happen. */ + break; + case SALVAGE_HASH: + if ((t_ret = __ham_salvage(dbp, vdp, + pgno, h, handle, callback, flags)) != 0 && ret == 0) + ret = t_ret; + break; + case SALVAGE_INVALID: + case SALVAGE_IGNORE: + default: + /* + * Shouldn't happen, but if it does, just do what the + * nice man says. + */ + DB_ASSERT(env, 0); + break; + } + if ((t_ret = __memp_fput(mpf, + vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + } + + /* We should have reached the end of the database. */ + if (t_ret == DB_NOTFOUND) + t_ret = 0; + if (t_ret != 0 && ret == 0) + ret = t_ret; + + /* Re-open the cursor so we traverse the database again. */ + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + dbc = NULL; + + /* Now, deal with any remaining overflow pages. */ + while ((t_ret = + __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 0)) == 0) { + if ((t_ret = __memp_fget(mpf, + &pgno, vdp->thread_info, NULL, 0, &h)) != 0) { + if (ret == 0) + ret = t_ret; + continue; + } + + switch (pgtype) { + case SALVAGE_OVERFLOW: + /* + * XXX: + * This may generate multiple "UNKNOWN" keys in + * a database with no dups. What to do? + */ + if ((t_ret = __db_safe_goff(dbp, vdp, + pgno, &key, &ovflbuf, &ovfl_bufsz, flags)) != 0 || + ((vdp->type == DB_BTREE || vdp->type == DB_HASH) && + (t_ret = __db_vrfy_prdbt(&unkdbt, + 0, " ", handle, callback, 0, 0, vdp)) != 0) || + (t_ret = __db_vrfy_prdbt( + &key, 0, " ", handle, callback, 0, 0, vdp)) != 0) + if (ret == 0) + ret = t_ret; + break; + default: + DB_ASSERT(env, 0); /* Shouldn't ever happen. */ + break; + } + if ((t_ret = __memp_fput(mpf, + vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + } + + /* We should have reached the end of the database. */ + if (t_ret == DB_NOTFOUND) + t_ret = 0; + if (t_ret != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + __os_free(env, ovflbuf); + + return (ret); +} + +/* + * Offset of the ith inp array entry, which we can compare to the offset + * the entry stores. + */ +#define INP_OFFSET(dbp, h, i) \ + ((db_indx_t)((u_int8_t *)((P_INP(dbp,(h))) + (i)) - (u_int8_t *)(h))) + +/* + * __db_vrfy_inpitem -- + * Verify that a single entry in the inp array is sane, and update + * the high water mark and current item offset. (The former of these is + * used for state information between calls, and is required; it must + * be initialized to the pagesize before the first call.) + * + * Returns DB_VERIFY_FATAL if inp has collided with the data, + * since verification can't continue from there; returns DB_VERIFY_BAD + * if anything else is wrong. + * + * PUBLIC: int __db_vrfy_inpitem __P((DB *, PAGE *, + * PUBLIC: db_pgno_t, u_int32_t, int, u_int32_t, u_int32_t *, u_int32_t *)); + */ +int +__db_vrfy_inpitem(dbp, h, pgno, i, is_btree, flags, himarkp, offsetp) + DB *dbp; + PAGE *h; + db_pgno_t pgno; + u_int32_t i; + int is_btree; + u_int32_t flags, *himarkp, *offsetp; +{ + BKEYDATA *bk; + ENV *env; + db_indx_t *inp, offset, len; + + env = dbp->env; + + DB_ASSERT(env, himarkp != NULL); + inp = P_INP(dbp, h); + + /* + * Check that the inp array, which grows from the beginning of the + * page forward, has not collided with the data, which grow from the + * end of the page backward. + */ + if (inp + i >= (db_indx_t *)((u_int8_t *)h + *himarkp)) { + /* We've collided with the data. We need to bail. */ + EPRINT((env, DB_STR_A("0563", + "Page %lu: entries listing %lu overlaps data", + "%lu %lu"), (u_long)pgno, (u_long)i)); + return (DB_VERIFY_FATAL); + } + + offset = inp[i]; + + /* + * Check that the item offset is reasonable: it points somewhere + * after the inp array and before the end of the page. + */ + if (offset <= INP_OFFSET(dbp, h, i) || offset >= dbp->pgsize) { + EPRINT((env, DB_STR_A("0564", + "Page %lu: bad offset %lu at page index %lu", + "%lu %lu %lu"), (u_long)pgno, (u_long)offset, (u_long)i)); + return (DB_VERIFY_BAD); + } + + /* Update the high-water mark (what HOFFSET should be) */ + if (offset < *himarkp) + *himarkp = offset; + + if (is_btree) { + /* + * Check alignment; if it's unaligned, it's unsafe to + * manipulate this item. + */ + if (offset != DB_ALIGN(offset, sizeof(u_int32_t))) { + EPRINT((env, DB_STR_A("0565", + "Page %lu: unaligned offset %lu at page index %lu", + "%lu %lu %lu"), (u_long)pgno, (u_long)offset, + (u_long)i)); + return (DB_VERIFY_BAD); + } + + /* + * Check that the item length remains on-page. + */ + bk = GET_BKEYDATA(dbp, h, i); + + /* + * We need to verify the type of the item here; + * we can't simply assume that it will be one of the + * expected three. If it's not a recognizable type, + * it can't be considered to have a verifiable + * length, so it's not possible to certify it as safe. + */ + switch (B_TYPE(bk->type)) { + case B_KEYDATA: + len = bk->len; + break; + case B_DUPLICATE: + case B_OVERFLOW: + len = BOVERFLOW_SIZE; + break; + default: + EPRINT((env, DB_STR_A("0566", + "Page %lu: item %lu of unrecognizable type", + "%lu %lu"), (u_long)pgno, (u_long)i)); + return (DB_VERIFY_BAD); + } + + if ((size_t)(offset + len) > dbp->pgsize) { + EPRINT((env, DB_STR_A("0567", + "Page %lu: item %lu extends past page boundary", + "%lu %lu"), (u_long)pgno, (u_long)i)); + return (DB_VERIFY_BAD); + } + } + + if (offsetp != NULL) + *offsetp = offset; + return (0); +} + +/* + * __db_vrfy_duptype-- + * Given a page number and a set of flags to __bam_vrfy_subtree, + * verify that the dup tree type is correct--i.e., it's a recno + * if DUPSORT is not set and a btree if it is. + * + * PUBLIC: int __db_vrfy_duptype + * PUBLIC: __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t)); + */ +int +__db_vrfy_duptype(dbp, vdp, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + u_int32_t flags; +{ + ENV *env; + VRFY_PAGEINFO *pip; + int ret, isbad; + + env = dbp->env; + isbad = 0; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + switch (pip->type) { + case P_IBTREE: + case P_LDUP: + if (!LF_ISSET(DB_ST_DUPSORT)) { + EPRINT((env, DB_STR_A("0568", + "Page %lu: sorted duplicate set in unsorted-dup database", + "%lu"), (u_long)pgno)); + isbad = 1; + } + break; + case P_IRECNO: + case P_LRECNO: + if (LF_ISSET(DB_ST_DUPSORT)) { + EPRINT((env, DB_STR_A("0569", + "Page %lu: unsorted duplicate set in sorted-dup database", + "%lu"), (u_long)pgno)); + isbad = 1; + } + break; + default: + /* + * If the page is entirely zeroed, its pip->type will be a lie + * (we assumed it was a hash page, as they're allowed to be + * zeroed); handle this case specially. + */ + if (F_ISSET(pip, VRFY_IS_ALLZEROES)) + ZEROPG_ERR_PRINT(env, pgno, DB_STR_P("duplicate page")); + else + EPRINT((env, DB_STR_A("0570", + "Page %lu: duplicate page of inappropriate type %lu", + "%lu %lu"), (u_long)pgno, (u_long)pip->type)); + isbad = 1; + break; + } + + if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) + return (ret); + return (isbad == 1 ? DB_VERIFY_BAD : 0); +} + +/* + * __db_salvage_duptree -- + * Attempt to salvage a given duplicate tree, given its alleged root. + * + * The key that corresponds to this dup set has been passed to us + * in DBT *key. Because data items follow keys, though, it has been + * printed once already. + * + * The basic idea here is that pgno ought to be a P_LDUP, a P_LRECNO, a + * P_IBTREE, or a P_IRECNO. If it's an internal page, use the verifier + * functions to make sure it's safe; if it's not, we simply bail and the + * data will have to be printed with no key later on. if it is safe, + * recurse on each of its children. + * + * Whether or not it's safe, if it's a leaf page, __bam_salvage it. + * + * At all times, use the DB hanging off vdp to mark and check what we've + * done, so each page gets printed exactly once and we don't get caught + * in any cycles. + * + * PUBLIC: int __db_salvage_duptree __P((DB *, VRFY_DBINFO *, db_pgno_t, + * PUBLIC: DBT *, void *, int (*)(void *, const void *), u_int32_t)); + */ +int +__db_salvage_duptree(dbp, vdp, pgno, key, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + DBT *key; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + DB_MPOOLFILE *mpf; + PAGE *h; + int ret, t_ret; + + mpf = dbp->mpf; + + if (pgno == PGNO_INVALID || !IS_VALID_PGNO(pgno)) + return (DB_VERIFY_BAD); + + /* We have a plausible page. Try it. */ + if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0) + return (ret); + + switch (TYPE(h)) { + case P_IBTREE: + case P_IRECNO: + if ((ret = __db_vrfy_common(dbp, vdp, h, pgno, flags)) != 0) + goto err; + if ((ret = __bam_vrfy(dbp, + vdp, h, pgno, flags | DB_NOORDERCHK)) != 0 || + (ret = __db_salvage_markdone(vdp, pgno)) != 0) + goto err; + /* + * We have a known-healthy internal page. Walk it. + */ + if ((ret = __bam_salvage_walkdupint(dbp, vdp, h, key, + handle, callback, flags)) != 0) + goto err; + break; + case P_LRECNO: + case P_LDUP: + if ((ret = __bam_salvage(dbp, + vdp, pgno, TYPE(h), h, handle, callback, key, flags)) != 0) + goto err; + break; + default: + ret = DB_VERIFY_BAD; + goto err; + } + +err: if ((t_ret = __memp_fput(mpf, + vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __db_salvage_all -- + * Salvage only the leaves we find by walking the tree. If we have subdbs, + * salvage each of them individually. + */ +static int +__db_salvage_all(dbp, vdp, handle, callback, flags, hassubsp) + DB *dbp; + VRFY_DBINFO *vdp; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; + int *hassubsp; +{ + DB *pgset; + DBC *pgsc; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *h; + VRFY_PAGEINFO *pip; + db_pgno_t p, meta_pgno; + int ret, t_ret; + + *hassubsp = 0; + + env = dbp->env; + pgset = NULL; + pgsc = NULL; + mpf = dbp->mpf; + h = NULL; + pip = NULL; + ret = 0; + + /* + * Check to make sure the page is OK and find out if it contains + * subdatabases. + */ + meta_pgno = PGNO_BASE_MD; + if ((t_ret = __memp_fget(mpf, + &meta_pgno, vdp->thread_info, NULL, 0, &h)) == 0 && + (t_ret = __db_vrfy_common(dbp, vdp, h, PGNO_BASE_MD, flags)) == 0 && + (t_ret = __db_salvage_pg( + dbp, vdp, PGNO_BASE_MD, h, handle, callback, flags)) == 0 && + (t_ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) == 0) + if (F_ISSET(pip, VRFY_HAS_SUBDBS)) + *hassubsp = 1; + if (pip != NULL && + (t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + if (h != NULL) { + if ((t_ret = __memp_fput(mpf, + vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + h = NULL; + } + if (ret != 0) + return (ret); + + /* Without subdatabases, we can just dump from the meta pgno. */ + if (*hassubsp == 0) + return (__db_salvage(dbp, + vdp, PGNO_BASE_MD, handle, callback, flags)); + + /* + * We have subdbs. Try to crack them. + * + * To do so, get a set of leaf pages in the master database, and then + * walk each of the valid ones, salvaging subdbs as we go. If any + * prove invalid, just drop them; we'll pick them up on a later pass. + */ + if ((ret = __db_vrfy_pgset(env, + vdp->thread_info, dbp->pgsize, &pgset)) != 0) + goto err; + if ((ret = __db_meta2pgset(dbp, vdp, PGNO_BASE_MD, flags, pgset)) != 0) + goto err; + if ((ret = __db_cursor(pgset, vdp->thread_info, NULL, &pgsc, 0)) != 0) + goto err; + while ((t_ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) { + if ((t_ret = __memp_fget(mpf, + &p, vdp->thread_info, NULL, 0, &h)) == 0 && + (t_ret = __db_vrfy_common(dbp, vdp, h, p, flags)) == 0 && + (t_ret = + __bam_vrfy(dbp, vdp, h, p, flags | DB_NOORDERCHK)) == 0) + t_ret = __db_salvage_subdbpg( + dbp, vdp, h, handle, callback, flags); + if (t_ret != 0 && ret == 0) + ret = t_ret; + if (h != NULL) { + if ((t_ret = __memp_fput(mpf, vdp->thread_info, + h, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + h = NULL; + } + } + + if (t_ret != DB_NOTFOUND && ret == 0) + ret = t_ret; + +err: if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0) + ret = t_ret; + if (pgset != NULL && + (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret ==0) + ret = t_ret; + if (h != NULL && + (t_ret = __memp_fput(mpf, + vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __db_salvage_subdbpg -- + * Given a known-good leaf page in the master database, salvage all + * leaf pages corresponding to each subdb. + */ +static int +__db_salvage_subdbpg(dbp, vdp, master, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *master; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + BKEYDATA *bkkey, *bkdata; + BOVERFLOW *bo; + DB *pgset; + DBC *pgsc; + DBT key; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *subpg; + db_indx_t i; + db_pgno_t meta_pgno; + int ret, err_ret, t_ret; + char *subdbname; + u_int32_t ovfl_bufsz; + + env = dbp->env; + mpf = dbp->mpf; + ret = err_ret = 0; + subdbname = NULL; + pgsc = NULL; + pgset = NULL; + ovfl_bufsz = 0; + + /* + * For each entry, get and salvage the set of pages + * corresponding to that entry. + */ + for (i = 0; i < NUM_ENT(master); i += P_INDX) { + bkkey = GET_BKEYDATA(dbp, master, i); + bkdata = GET_BKEYDATA(dbp, master, i + O_INDX); + + /* Get the subdatabase name. */ + if (B_TYPE(bkkey->type) == B_OVERFLOW) { + /* + * We can, in principle anyway, have a subdb + * name so long it overflows. Ick. + */ + bo = (BOVERFLOW *)bkkey; + if ((ret = __db_safe_goff(dbp, vdp, bo->pgno, + &key, &subdbname, &ovfl_bufsz, flags)) != 0) { + err_ret = DB_VERIFY_BAD; + continue; + } + + /* Nul-terminate it. */ + if (ovfl_bufsz < key.size + 1) { + if ((ret = __os_realloc(env, + key.size + 1, &subdbname)) != 0) + goto err; + ovfl_bufsz = key.size + 1; + } + subdbname[key.size] = '\0'; + } else if (B_TYPE(bkkey->type) == B_KEYDATA) { + if (ovfl_bufsz < (u_int32_t)bkkey->len + 1) { + if ((ret = __os_realloc(env, + bkkey->len + 1, &subdbname)) != 0) + goto err; + ovfl_bufsz = bkkey->len + 1; + } + DB_ASSERT(env, subdbname != NULL); + memcpy(subdbname, bkkey->data, bkkey->len); + subdbname[bkkey->len] = '\0'; + } + + /* Get the corresponding pgno. */ + if (bkdata->len != sizeof(db_pgno_t)) { + err_ret = DB_VERIFY_BAD; + continue; + } + memcpy(&meta_pgno, + (db_pgno_t *)bkdata->data, sizeof(db_pgno_t)); + + /* + * Subdatabase meta pgnos are stored in network byte + * order for cross-endian compatibility. Swap if appropriate. + */ + DB_NTOHL_SWAP(env, &meta_pgno); + + /* If we can't get the subdb meta page, just skip the subdb. */ + if (!IS_VALID_PGNO(meta_pgno) || (ret = __memp_fget(mpf, + &meta_pgno, vdp->thread_info, NULL, 0, &subpg)) != 0) { + err_ret = ret; + continue; + } + + /* + * Verify the subdatabase meta page. This has two functions. + * First, if it's bad, we have no choice but to skip the subdb + * and let the pages just get printed on a later pass. Second, + * the access-method-specific meta verification routines record + * the various state info (such as the presence of dups) + * that we need for __db_prheader(). + */ + if ((ret = + __db_vrfy_common(dbp, vdp, subpg, meta_pgno, flags)) != 0) { + err_ret = ret; + (void)__memp_fput(mpf, + vdp->thread_info, subpg, dbp->priority); + continue; + } + switch (TYPE(subpg)) { + case P_BTREEMETA: + if ((ret = __bam_vrfy_meta(dbp, + vdp, (BTMETA *)subpg, meta_pgno, flags)) != 0) { + err_ret = ret; + (void)__memp_fput(mpf, + vdp->thread_info, subpg, dbp->priority); + continue; + } + break; + case P_HASHMETA: + if ((ret = __ham_vrfy_meta(dbp, + vdp, (HMETA *)subpg, meta_pgno, flags)) != 0) { + err_ret = ret; + (void)__memp_fput(mpf, + vdp->thread_info, subpg, dbp->priority); + continue; + } + break; + default: + /* This isn't an appropriate page; skip this subdb. */ + err_ret = DB_VERIFY_BAD; + continue; + } + + if ((ret = __memp_fput(mpf, + vdp->thread_info, subpg, dbp->priority)) != 0) { + err_ret = ret; + continue; + } + + /* Print a subdatabase header. */ + if ((ret = __db_prheader(dbp, + subdbname, 0, 0, handle, callback, vdp, meta_pgno)) != 0) + goto err; + + /* Salvage meta_pgno's tree. */ + if ((ret = __db_salvage(dbp, + vdp, meta_pgno, handle, callback, flags)) != 0) + err_ret = ret; + + /* Print a subdatabase footer. */ + if ((ret = __db_prfooter(handle, callback)) != 0) + goto err; + } + +err: if (subdbname) + __os_free(env, subdbname); + + if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0) + ret = t_ret; + + if (pgset != NULL && (t_ret = __db_close(pgset, NULL, 0)) != 0) + ret = t_ret; + + if ((t_ret = __db_salvage_markdone(vdp, PGNO(master))) != 0) + return (t_ret); + + return ((err_ret != 0) ? err_ret : ret); +} + +/* + * __db_salvage -- + * Given a meta page number, salvage all data from leaf pages found by + * walking the meta page's tree. + */ +static int +__db_salvage(dbp, vdp, meta_pgno, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t meta_pgno; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; + +{ + DB *pgset; + DBC *dbc, *pgsc; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *subpg; + db_pgno_t p; + int err_ret, ret, t_ret; + + env = dbp->env; + mpf = dbp->mpf; + err_ret = ret = t_ret = 0; + pgsc = NULL; + pgset = NULL; + dbc = NULL; + + if ((ret = __db_vrfy_pgset(env, + vdp->thread_info, dbp->pgsize, &pgset)) != 0) + goto err; + + /* Get all page numbers referenced from this meta page. */ + if ((ret = __db_meta2pgset(dbp, vdp, meta_pgno, + flags, pgset)) != 0) { + err_ret = ret; + goto err; + } + + if ((ret = __db_cursor(pgset, + vdp->thread_info, NULL, &pgsc, 0)) != 0) + goto err; + + if (dbp->type == DB_QUEUE && + (ret = __db_cursor(dbp, vdp->thread_info, NULL, &dbc, 0)) != 0) + goto err; + + /* Salvage every page in pgset. */ + while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) { + if (dbp->type == DB_QUEUE) { +#ifdef HAVE_QUEUE + ret = __qam_fget(dbc, &p, 0, &subpg); +#else + ret = __db_no_queue_am(env); +#endif + /* Don't report an error for pages not found in a queue. + * The pgset is a best guess, it doesn't know about + * deleted extents which leads to this error. + */ + if (ret == ENOENT || ret == DB_PAGE_NOTFOUND) + continue; + } else + ret = __memp_fget(mpf, + &p, vdp->thread_info, NULL, 0, &subpg); + if (ret != 0) { + err_ret = ret; + continue; + } + + if ((ret = __db_salvage_pg(dbp, vdp, p, subpg, + handle, callback, flags)) != 0) + err_ret = ret; + + if (dbp->type == DB_QUEUE) +#ifdef HAVE_QUEUE + ret = __qam_fput(dbc, p, subpg, dbp->priority); +#else + ret = __db_no_queue_am(env); +#endif + else + ret = __memp_fput(mpf, + vdp->thread_info, subpg, dbp->priority); + if (ret != 0) + err_ret = ret; + } + + if (ret == DB_NOTFOUND) + ret = 0; + +err: + if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0) + ret = t_ret; + if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0) + ret = t_ret; + if (pgset != NULL && (t_ret = __db_close(pgset, NULL, 0)) != 0) + ret = t_ret; + + return ((err_ret != 0) ? err_ret : ret); +} + +/* + * __db_meta2pgset -- + * Given a known-safe meta page number, return the set of pages + * corresponding to the database it represents. Return DB_VERIFY_BAD if + * it's not a suitable meta page or is invalid. + */ +static int +__db_meta2pgset(dbp, vdp, pgno, flags, pgset) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + u_int32_t flags; + DB *pgset; +{ + DB_MPOOLFILE *mpf; + PAGE *h; + int ret, t_ret; + + mpf = dbp->mpf; + + if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0) + return (ret); + + switch (TYPE(h)) { + case P_BTREEMETA: + ret = __bam_meta2pgset(dbp, vdp, (BTMETA *)h, flags, pgset); + break; + case P_HASHMETA: + ret = __ham_meta2pgset(dbp, vdp, (HMETA *)h, flags, pgset); + break; + case P_HEAPMETA: + ret = __heap_meta2pgset(dbp, vdp, (HEAPMETA *)h, pgset); + break; + case P_QAMMETA: +#ifdef HAVE_QUEUE + ret = __qam_meta2pgset(dbp, vdp, pgset); + break; +#endif + default: + ret = DB_VERIFY_BAD; + break; + } + + if ((t_ret = __memp_fput(mpf, vdp->thread_info, h, dbp->priority)) != 0) + return (t_ret); + return (ret); +} + +/* + * __db_guesspgsize -- + * Try to guess what the pagesize is if the one on the meta page + * and the one in the db are invalid. + */ +static u_int +__db_guesspgsize(env, fhp) + ENV *env; + DB_FH *fhp; +{ + db_pgno_t i; + size_t nr; + u_int32_t guess; + u_int8_t type; + + for (guess = DB_MAX_PGSIZE; guess >= DB_MIN_PGSIZE; guess >>= 1) { + /* + * We try to read three pages ahead after the first one + * and make sure we have plausible types for all of them. + * If the seeks fail, continue with a smaller size; + * we're probably just looking past the end of the database. + * If they succeed and the types are reasonable, also continue + * with a size smaller; we may be looking at pages N, + * 2N, and 3N for some N > 1. + * + * As soon as we hit an invalid type, we stop and return + * our previous guess; that last one was probably the page size. + */ + for (i = 1; i <= 3; i++) { + if (__os_seek( + env, fhp, i, guess, SSZ(DBMETA, type)) != 0) + break; + if (__os_read(env, + fhp, &type, 1, &nr) != 0 || nr == 0) + break; + if (type == P_INVALID || type >= P_PAGETYPE_MAX) + return (guess << 1); + } + } + + /* + * If we're just totally confused--the corruption takes up most of the + * beginning pages of the database--go with the default size. + */ + return (DB_DEF_IOSIZE); +} diff --git a/src/db/db_vrfy_stub.c b/src/db/db_vrfy_stub.c new file mode 100644 index 00000000..b3acda2d --- /dev/null +++ b/src/db/db_vrfy_stub.c @@ -0,0 +1,120 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef HAVE_VERIFY +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/db_verify.h" + +/* + * If the library wasn't compiled with the verification support, various + * routines aren't available. Stub them here, returning an appropriate + * error. + */ + +static int __db_novrfy __P((ENV *)); + +/* + * __db_novrfy -- + * Error when a Berkeley DB build doesn't include the access method. + */ +static int +__db_novrfy(env) + ENV *env; +{ + __db_errx(env, DB_STR("0571", + "library build did not include support for database verification")); + return (DB_OPNOTSUP); +} + +int +__db_verify_pp(dbp, file, database, outfile, flags) + DB *dbp; + const char *file, *database; + FILE *outfile; + u_int32_t flags; +{ + int ret; + + COMPQUIET(file, NULL); + COMPQUIET(database, NULL); + COMPQUIET(outfile, NULL); + COMPQUIET(flags, 0); + + ret = __db_novrfy(dbp->env); + + /* The verify method is a destructor. */ + (void)__db_close(dbp, NULL, 0); + + return (ret); +} + +int +__db_verify_internal(dbp, name, subdb, handle, callback, flags) + DB *dbp; + const char *name, *subdb; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + COMPQUIET(dbp, NULL); + COMPQUIET(name, NULL); + COMPQUIET(subdb, NULL); + COMPQUIET(handle, NULL); + COMPQUIET(callback, NULL); + COMPQUIET(flags, 0); + return (0); +} + +int +__db_vrfy_getpageinfo(vdp, pgno, pipp) + VRFY_DBINFO *vdp; + db_pgno_t pgno; + VRFY_PAGEINFO **pipp; +{ + COMPQUIET(pgno, 0); + COMPQUIET(pipp, NULL); + return (__db_novrfy(vdp->pgdbp->env)); +} + +int +__db_vrfy_putpageinfo(env, vdp, pip) + ENV *env; + VRFY_DBINFO *vdp; + VRFY_PAGEINFO *pip; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(pip, NULL); + return (__db_novrfy(env)); +} + +int +__db_vrfy_prdbt(dbtp, checkprint, prefix, + handle, callback, is_recno, is_heap, vdp) + DBT *dbtp; + int checkprint; + const char *prefix; + void *handle; + int (*callback) __P((void *, const void *)); + int is_recno; + int is_heap; + VRFY_DBINFO *vdp; +{ + COMPQUIET(dbtp, NULL); + COMPQUIET(checkprint, 0); + COMPQUIET(prefix, NULL); + COMPQUIET(handle, NULL); + COMPQUIET(callback, NULL); + COMPQUIET(is_recno, 0); + COMPQUIET(is_heap, 0); + return (__db_novrfy(vdp->pgdbp->env)); +} +#endif /* !HAVE_VERIFY */ diff --git a/src/db/db_vrfyutil.c b/src/db/db_vrfyutil.c new file mode 100644 index 00000000..863f0624 --- /dev/null +++ b/src/db/db_vrfyutil.c @@ -0,0 +1,932 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_verify.h" +#include "dbinc/db_am.h" + +static int __db_vrfy_childinc __P((DBC *, VRFY_CHILDINFO *)); +static int __db_vrfy_pageinfo_create __P((ENV *, VRFY_PAGEINFO **)); + +/* + * __db_vrfy_dbinfo_create -- + * Allocate and initialize a VRFY_DBINFO structure. + * + * PUBLIC: int __db_vrfy_dbinfo_create + * PUBLIC: __P((ENV *, DB_THREAD_INFO *, u_int32_t, VRFY_DBINFO **)); + */ +int +__db_vrfy_dbinfo_create(env, ip, pgsize, vdpp) + ENV *env; + DB_THREAD_INFO *ip; + u_int32_t pgsize; + VRFY_DBINFO **vdpp; +{ + DB *cdbp, *pgdbp, *pgset; + VRFY_DBINFO *vdp; + int ret; + + vdp = NULL; + cdbp = pgdbp = pgset = NULL; + + if ((ret = __os_calloc(NULL, 1, sizeof(VRFY_DBINFO), &vdp)) != 0) + goto err; + + if ((ret = __db_create_internal(&cdbp, env, 0)) != 0) + goto err; + + if ((ret = __db_set_flags(cdbp, DB_DUP)) != 0) + goto err; + + if ((ret = __db_set_pagesize(cdbp, pgsize)) != 0) + goto err; + + /* If transactional, make sure we don't log. */ + if (TXN_ON(env) && + (ret = __db_set_flags(cdbp, DB_TXN_NOT_DURABLE)) != 0) + goto err; + if ((ret = __db_open(cdbp, ip, + NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0600, PGNO_BASE_MD)) != 0) + goto err; + + if ((ret = __db_create_internal(&pgdbp, env, 0)) != 0) + goto err; + + if ((ret = __db_set_pagesize(pgdbp, pgsize)) != 0) + goto err; + + /* If transactional, make sure we don't log. */ + if (TXN_ON(env) && + (ret = __db_set_flags(pgdbp, DB_TXN_NOT_DURABLE)) != 0) + goto err; + + if ((ret = __db_open(pgdbp, ip, + NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0600, PGNO_BASE_MD)) != 0) + goto err; + + if ((ret = __db_vrfy_pgset(env, ip, pgsize, &pgset)) != 0) + goto err; + + if (CDB_LOCKING(env) && + (ret = __cdsgroup_begin(env, &vdp->txn)) != 0) + goto err; + + LIST_INIT(&vdp->subdbs); + LIST_INIT(&vdp->activepips); + + vdp->cdbp = cdbp; + vdp->pgdbp = pgdbp; + vdp->pgset = pgset; + vdp->thread_info = ip; + *vdpp = vdp; + return (0); + +err: if (cdbp != NULL) + (void)__db_close(cdbp, NULL, 0); + if (pgdbp != NULL) + (void)__db_close(pgdbp, NULL, 0); + if (vdp->txn != NULL) + (void)vdp->txn->commit(vdp->txn, 0); + if (vdp != NULL) + __os_free(env, vdp); + return (ret); +} + +/* + * __db_vrfy_dbinfo_destroy -- + * Destructor for VRFY_DBINFO. Destroys VRFY_PAGEINFOs and deallocates + * structure. + * + * PUBLIC: int __db_vrfy_dbinfo_destroy __P((ENV *, VRFY_DBINFO *)); + */ +int +__db_vrfy_dbinfo_destroy(env, vdp) + ENV *env; + VRFY_DBINFO *vdp; +{ + VRFY_CHILDINFO *c; + int t_ret, ret; + + ret = 0; + + /* + * Discard active page structures. Ideally there wouldn't be any, + * but in some error cases we may not have cleared them all out. + */ + while (LIST_FIRST(&vdp->activepips) != NULL) + if ((t_ret = __db_vrfy_putpageinfo( + env, vdp, LIST_FIRST(&vdp->activepips))) != 0) { + if (ret == 0) + ret = t_ret; + break; + } + + /* Discard subdatabase list structures. */ + while ((c = LIST_FIRST(&vdp->subdbs)) != NULL) { + LIST_REMOVE(c, links); + __os_free(NULL, c); + } + + if ((t_ret = __db_close(vdp->pgdbp, NULL, 0)) != 0) + ret = t_ret; + + if ((t_ret = __db_close(vdp->cdbp, NULL, 0)) != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = __db_close(vdp->pgset, NULL, 0)) != 0 && ret == 0) + ret = t_ret; + + if (vdp->txn != NULL && + (t_ret = vdp->txn->commit(vdp->txn, 0)) != 0 && ret == 0) + ret = t_ret; + + if (vdp->extents != NULL) + __os_free(env, vdp->extents); + __os_free(env, vdp); + return (ret); +} + +/* + * __db_vrfy_getpageinfo -- + * Get a PAGEINFO structure for a given page, creating it if necessary. + * + * PUBLIC: int __db_vrfy_getpageinfo + * PUBLIC: __P((VRFY_DBINFO *, db_pgno_t, VRFY_PAGEINFO **)); + */ +int +__db_vrfy_getpageinfo(vdp, pgno, pipp) + VRFY_DBINFO *vdp; + db_pgno_t pgno; + VRFY_PAGEINFO **pipp; +{ + DB *pgdbp; + DBT key, data; + ENV *env; + VRFY_PAGEINFO *pip; + int ret; + + /* + * We want a page info struct. There are three places to get it from, + * in decreasing order of preference: + * + * 1. vdp->activepips. If it's already "checked out", we're + * already using it, we return the same exact structure with a + * bumped refcount. This is necessary because this code is + * replacing array accesses, and it's common for f() to make some + * changes to a pip, and then call g() and h() which each make + * changes to the same pip. vdps are never shared between threads + * (they're never returned to the application), so this is safe. + * 2. The pgdbp. It's not in memory, but it's in the database, so + * get it, give it a refcount of 1, and stick it on activepips. + * 3. malloc. It doesn't exist yet; create it, then stick it on + * activepips. We'll put it in the database when we putpageinfo + * later. + */ + + /* Case 1. */ + LIST_FOREACH(pip, &vdp->activepips, links) + if (pip->pgno == pgno) + goto found; + + /* Case 2. */ + pgdbp = vdp->pgdbp; + env = pgdbp->env; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + F_SET(&data, DB_DBT_MALLOC); + key.data = &pgno; + key.size = sizeof(db_pgno_t); + + if ((ret = __db_get(pgdbp, + vdp->thread_info, vdp->txn, &key, &data, 0)) == 0) { + /* Found it. */ + DB_ASSERT(env, data.size == sizeof(VRFY_PAGEINFO)); + pip = data.data; + LIST_INSERT_HEAD(&vdp->activepips, pip, links); + goto found; + } else if (ret != DB_NOTFOUND) /* Something nasty happened. */ + return (ret); + + /* Case 3 */ + if ((ret = __db_vrfy_pageinfo_create(env, &pip)) != 0) + return (ret); + + LIST_INSERT_HEAD(&vdp->activepips, pip, links); +found: pip->pi_refcount++; + + *pipp = pip; + return (0); +} + +/* + * __db_vrfy_putpageinfo -- + * Put back a VRFY_PAGEINFO that we're done with. + * + * PUBLIC: int __db_vrfy_putpageinfo __P((ENV *, + * PUBLIC: VRFY_DBINFO *, VRFY_PAGEINFO *)); + */ +int +__db_vrfy_putpageinfo(env, vdp, pip) + ENV *env; + VRFY_DBINFO *vdp; + VRFY_PAGEINFO *pip; +{ + DB *pgdbp; + DBT key, data; + VRFY_PAGEINFO *p; + int ret; + + if (--pip->pi_refcount > 0) + return (0); + + pgdbp = vdp->pgdbp; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + key.data = &pip->pgno; + key.size = sizeof(db_pgno_t); + data.data = pip; + data.size = sizeof(VRFY_PAGEINFO); + + if ((ret = __db_put(pgdbp, + vdp->thread_info, vdp->txn, &key, &data, 0)) != 0) + return (ret); + + LIST_FOREACH(p, &vdp->activepips, links) + if (p == pip) + break; + if (p != NULL) + LIST_REMOVE(p, links); + + __os_ufree(env, p); + return (0); +} + +/* + * __db_vrfy_pgset -- + * Create a temporary database for the storing of sets of page numbers. + * (A mapping from page number to int, used by the *_meta2pgset functions, + * as well as for keeping track of which pages the verifier has seen.) + * + * PUBLIC: int __db_vrfy_pgset __P((ENV *, + * PUBLIC: DB_THREAD_INFO *, u_int32_t, DB **)); + */ +int +__db_vrfy_pgset(env, ip, pgsize, dbpp) + ENV *env; + DB_THREAD_INFO *ip; + u_int32_t pgsize; + DB **dbpp; +{ + DB *dbp; + int ret; + + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + return (ret); + if ((ret = __db_set_pagesize(dbp, pgsize)) != 0) + goto err; + + /* If transactional, make sure we don't log. */ + if (TXN_ON(env) && + (ret = __db_set_flags(dbp, DB_TXN_NOT_DURABLE)) != 0) + goto err; + if ((ret = __db_open(dbp, ip, + NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0600, PGNO_BASE_MD)) == 0) + *dbpp = dbp; + else +err: (void)__db_close(dbp, NULL, 0); + + return (ret); +} + +/* + * __db_vrfy_pgset_get -- + * Get the value associated in a page set with a given pgno. Return + * a 0 value (and succeed) if we've never heard of this page. + * + * PUBLIC: int __db_vrfy_pgset_get __P((DB *, DB_THREAD_INFO *, DB_TXN *, + * PUBLIC: db_pgno_t, int *)); + */ +int +__db_vrfy_pgset_get(dbp, ip, txn, pgno, valp) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + db_pgno_t pgno; + int *valp; +{ + DBT key, data; + int ret, val; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + key.data = &pgno; + key.size = sizeof(db_pgno_t); + data.data = &val; + data.ulen = sizeof(int); + F_SET(&data, DB_DBT_USERMEM); + + if ((ret = __db_get(dbp, ip, txn, &key, &data, 0)) == 0) { + DB_ASSERT(dbp->env, data.size == sizeof(int)); + } else if (ret == DB_NOTFOUND) + val = 0; + else + return (ret); + + *valp = val; + return (0); +} + +/* + * __db_vrfy_pgset_inc -- + * Increment the value associated with a pgno by 1. + * + * PUBLIC: int __db_vrfy_pgset_inc __P((DB *, DB_THREAD_INFO *, DB_TXN *, + * PUBLIC: db_pgno_t)); + */ +int +__db_vrfy_pgset_inc(dbp, ip, txn, pgno) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + db_pgno_t pgno; +{ + DBT key, data; + int ret; + int val; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + val = 0; + + key.data = &pgno; + key.size = sizeof(db_pgno_t); + data.data = &val; + data.ulen = sizeof(int); + F_SET(&data, DB_DBT_USERMEM); + + if ((ret = __db_get(dbp, ip, txn, &key, &data, 0)) == 0) { + DB_ASSERT(dbp->env, data.size == sizeof(int)); + } else if (ret != DB_NOTFOUND) + return (ret); + + data.size = sizeof(int); + ++val; + + return (__db_put(dbp, ip, txn, &key, &data, 0)); +} + +/* + * __db_vrfy_pgset_next -- + * Given a cursor open in a pgset database, get the next page in the + * set. + * + * PUBLIC: int __db_vrfy_pgset_next __P((DBC *, db_pgno_t *)); + */ +int +__db_vrfy_pgset_next(dbc, pgnop) + DBC *dbc; + db_pgno_t *pgnop; +{ + DBT key, data; + db_pgno_t pgno; + int ret; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + /* We don't care about the data, just the keys. */ + F_SET(&data, DB_DBT_USERMEM | DB_DBT_PARTIAL); + F_SET(&key, DB_DBT_USERMEM); + key.data = &pgno; + key.ulen = sizeof(db_pgno_t); + + if ((ret = __dbc_get(dbc, &key, &data, DB_NEXT)) != 0) + return (ret); + + DB_ASSERT(dbc->env, key.size == sizeof(db_pgno_t)); + *pgnop = pgno; + + return (0); +} + +/* + * __db_vrfy_childcursor -- + * Create a cursor to walk the child list with. Returns with a nonzero + * final argument if the specified page has no children. + * + * PUBLIC: int __db_vrfy_childcursor __P((VRFY_DBINFO *, DBC **)); + */ +int +__db_vrfy_childcursor(vdp, dbcp) + VRFY_DBINFO *vdp; + DBC **dbcp; +{ + DB *cdbp; + DBC *dbc; + int ret; + + cdbp = vdp->cdbp; + + if ((ret = __db_cursor(cdbp, vdp->thread_info, vdp->txn, &dbc, 0)) == 0) + *dbcp = dbc; + + return (ret); +} + +/* + * __db_vrfy_childput -- + * Add a child structure to the set for a given page. + * + * PUBLIC: int __db_vrfy_childput + * PUBLIC: __P((VRFY_DBINFO *, db_pgno_t, VRFY_CHILDINFO *)); + */ +int +__db_vrfy_childput(vdp, pgno, cip) + VRFY_DBINFO *vdp; + db_pgno_t pgno; + VRFY_CHILDINFO *cip; +{ + DB *cdbp; + DBC *cc; + DBT key, data; + VRFY_CHILDINFO *oldcip; + int ret; + + cdbp = vdp->cdbp; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + key.data = &pgno; + key.size = sizeof(db_pgno_t); + + /* + * We want to avoid adding multiple entries for a single child page; + * we only need to verify each child once, even if a child (such + * as an overflow key) is multiply referenced. + * + * However, we also need to make sure that when walking the list + * of children, we encounter them in the order they're referenced + * on a page. (This permits us, for example, to verify the + * prev_pgno/next_pgno chain of Btree leaf pages.) + * + * Check the child database to make sure that this page isn't + * already a child of the specified page number. If it's not, + * put it at the end of the duplicate set. + */ + if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) + return (ret); + for (ret = __db_vrfy_ccset(cc, pgno, &oldcip); ret == 0; + ret = __db_vrfy_ccnext(cc, &oldcip)) + if (oldcip->pgno == cip->pgno) { + /* + * Found a matching child. Increment its reference + * count--we've run into it again--but don't put it + * again. + */ + if ((ret = __db_vrfy_childinc(cc, oldcip)) != 0 || + (ret = __db_vrfy_ccclose(cc)) != 0) + return (ret); + return (0); + } + if (ret != DB_NOTFOUND) { + (void)__db_vrfy_ccclose(cc); + return (ret); + } + if ((ret = __db_vrfy_ccclose(cc)) != 0) + return (ret); + + cip->refcnt = 1; + data.data = cip; + data.size = sizeof(VRFY_CHILDINFO); + + return (__db_put(cdbp, vdp->thread_info, vdp->txn, &key, &data, 0)); +} + +/* + * __db_vrfy_childinc -- + * Increment the refcount of the VRFY_CHILDINFO struct that the child + * cursor is pointing to. (The caller has just retrieved this struct, and + * passes it in as cip to save us a get.) + */ +static int +__db_vrfy_childinc(dbc, cip) + DBC *dbc; + VRFY_CHILDINFO *cip; +{ + DBT key, data; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + cip->refcnt++; + data.data = cip; + data.size = sizeof(VRFY_CHILDINFO); + + return (__dbc_put(dbc, &key, &data, DB_CURRENT)); +} + +/* + * __db_vrfy_ccset -- + * Sets a cursor created with __db_vrfy_childcursor to the first + * child of the given pgno, and returns it in the third arg. + * + * PUBLIC: int __db_vrfy_ccset __P((DBC *, db_pgno_t, VRFY_CHILDINFO **)); + */ +int +__db_vrfy_ccset(dbc, pgno, cipp) + DBC *dbc; + db_pgno_t pgno; + VRFY_CHILDINFO **cipp; +{ + DBT key, data; + int ret; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + key.data = &pgno; + key.size = sizeof(db_pgno_t); + + if ((ret = __dbc_get(dbc, &key, &data, DB_SET)) != 0) + return (ret); + + DB_ASSERT(dbc->env, data.size == sizeof(VRFY_CHILDINFO)); + *cipp = (VRFY_CHILDINFO *)data.data; + + return (0); +} + +/* + * __db_vrfy_ccnext -- + * Gets the next child of the given cursor created with + * __db_vrfy_childcursor, and returns it in the memory provided in the + * second arg. + * + * PUBLIC: int __db_vrfy_ccnext __P((DBC *, VRFY_CHILDINFO **)); + */ +int +__db_vrfy_ccnext(dbc, cipp) + DBC *dbc; + VRFY_CHILDINFO **cipp; +{ + DBT key, data; + int ret; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + if ((ret = __dbc_get(dbc, &key, &data, DB_NEXT_DUP)) != 0) + return (ret); + + DB_ASSERT(dbc->env, data.size == sizeof(VRFY_CHILDINFO)); + *cipp = (VRFY_CHILDINFO *)data.data; + + return (0); +} + +/* + * __db_vrfy_ccclose -- + * Closes the cursor created with __db_vrfy_childcursor. + * + * This doesn't actually do anything interesting now, but it's + * not inconceivable that we might change the internal database usage + * and keep the interfaces the same, and a function call here or there + * seldom hurts anyone. + * + * PUBLIC: int __db_vrfy_ccclose __P((DBC *)); + */ +int +__db_vrfy_ccclose(dbc) + DBC *dbc; +{ + + return (__dbc_close(dbc)); +} + +/* + * __db_vrfy_pageinfo_create -- + * Constructor for VRFY_PAGEINFO; allocates and initializes. + */ +static int +__db_vrfy_pageinfo_create(env, pipp) + ENV *env; + VRFY_PAGEINFO **pipp; +{ + VRFY_PAGEINFO *pip; + int ret; + + /* + * pageinfo structs are sometimes allocated here and sometimes + * allocated by fetching them from a database with DB_DBT_MALLOC. + * There's no easy way for the destructor to tell which was + * used, and so we always allocate with __os_umalloc so we can free + * with __os_ufree. + */ + if ((ret = __os_umalloc(env, sizeof(VRFY_PAGEINFO), &pip)) != 0) + return (ret); + memset(pip, 0, sizeof(VRFY_PAGEINFO)); + + *pipp = pip; + return (0); +} + +/* + * __db_salvage_init -- + * Set up salvager database. + * + * PUBLIC: int __db_salvage_init __P((VRFY_DBINFO *)); + */ +int +__db_salvage_init(vdp) + VRFY_DBINFO *vdp; +{ + DB *dbp; + int ret; + + if ((ret = __db_create_internal(&dbp, NULL, 0)) != 0) + return (ret); + + if ((ret = __db_set_pagesize(dbp, 1024)) != 0) + goto err; + + if ((ret = __db_open(dbp, vdp->thread_info, + NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0, PGNO_BASE_MD)) != 0) + goto err; + + vdp->salvage_pages = dbp; + return (0); + +err: (void)__db_close(dbp, NULL, 0); + return (ret); +} + +/* + * __db_salvage_destroy -- + * Close salvager database. + * PUBLIC: int __db_salvage_destroy __P((VRFY_DBINFO *)); + */ +int +__db_salvage_destroy(vdp) + VRFY_DBINFO *vdp; +{ + return (vdp->salvage_pages == NULL ? 0 : + __db_close(vdp->salvage_pages, NULL, 0)); +} + +/* + * __db_salvage_getnext -- + * Get the next (first) unprinted page in the database of pages we need to + * print still. Delete entries for any already-printed pages we encounter + * in this search, as well as the page we're returning. + * + * PUBLIC: int __db_salvage_getnext + * PUBLIC: __P((VRFY_DBINFO *, DBC **, db_pgno_t *, u_int32_t *, int)); + */ +int +__db_salvage_getnext(vdp, dbcp, pgnop, pgtypep, skip_overflow) + VRFY_DBINFO *vdp; + DBC **dbcp; + db_pgno_t *pgnop; + u_int32_t *pgtypep; + int skip_overflow; +{ + DB *dbp; + DBT key, data; + int ret; + u_int32_t pgtype; + + dbp = vdp->salvage_pages; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + if (*dbcp == NULL && + (ret = __db_cursor(dbp, vdp->thread_info, vdp->txn, dbcp, 0)) != 0) + return (ret); + + while ((ret = __dbc_get(*dbcp, &key, &data, DB_NEXT)) == 0) { + DB_ASSERT(dbp->env, data.size == sizeof(u_int32_t)); + memcpy(&pgtype, data.data, sizeof(pgtype)); + + if (skip_overflow && pgtype == SALVAGE_OVERFLOW) + continue; + + if ((ret = __dbc_del(*dbcp, 0)) != 0) + return (ret); + if (pgtype != SALVAGE_IGNORE) { + DB_ASSERT(dbp->env, key.size == sizeof(db_pgno_t)); + DB_ASSERT(dbp->env, data.size == sizeof(u_int32_t)); + + *pgnop = *(db_pgno_t *)key.data; + *pgtypep = *(u_int32_t *)data.data; + break; + } + } + + return (ret); +} + +/* + * __db_salvage_isdone -- + * Return whether or not the given pgno is already marked + * SALVAGE_IGNORE (meaning that we don't need to print it again). + * + * Returns DB_KEYEXIST if it is marked, 0 if not, or another error on + * error. + * + * PUBLIC: int __db_salvage_isdone __P((VRFY_DBINFO *, db_pgno_t)); + */ +int +__db_salvage_isdone(vdp, pgno) + VRFY_DBINFO *vdp; + db_pgno_t pgno; +{ + DB *dbp; + DBT key, data; + int ret; + u_int32_t currtype; + + dbp = vdp->salvage_pages; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + currtype = SALVAGE_INVALID; + data.data = &currtype; + data.ulen = sizeof(u_int32_t); + data.flags = DB_DBT_USERMEM; + + key.data = &pgno; + key.size = sizeof(db_pgno_t); + + /* + * Put an entry for this page, with pgno as key and type as data, + * unless it's already there and is marked done. + * If it's there and is marked anything else, that's fine--we + * want to mark it done. + */ + if ((ret = __db_get(dbp, + vdp->thread_info, vdp->txn, &key, &data, 0)) == 0) { + /* + * The key's already here. Check and see if it's already + * marked done. If it is, return DB_KEYEXIST. If it's not, + * return 0. + */ + if (currtype == SALVAGE_IGNORE) + return (DB_KEYEXIST); + else + return (0); + } else if (ret != DB_NOTFOUND) + return (ret); + + /* The pgno is not yet marked anything; return 0. */ + return (0); +} + +/* + * __db_salvage_markdone -- + * Mark as done a given page. + * + * PUBLIC: int __db_salvage_markdone __P((VRFY_DBINFO *, db_pgno_t)); + */ +int +__db_salvage_markdone(vdp, pgno) + VRFY_DBINFO *vdp; + db_pgno_t pgno; +{ + DB *dbp; + DBT key, data; + int pgtype, ret; + u_int32_t currtype; + + pgtype = SALVAGE_IGNORE; + dbp = vdp->salvage_pages; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + currtype = SALVAGE_INVALID; + data.data = &currtype; + data.ulen = sizeof(u_int32_t); + data.flags = DB_DBT_USERMEM; + + key.data = &pgno; + key.size = sizeof(db_pgno_t); + + /* + * Put an entry for this page, with pgno as key and type as data, + * unless it's already there and is marked done. + * If it's there and is marked anything else, that's fine--we + * want to mark it done, but db_salvage_isdone only lets + * us know if it's marked IGNORE. + * + * We don't want to return DB_KEYEXIST, though; this will + * likely get passed up all the way and make no sense to the + * application. Instead, use DB_VERIFY_BAD to indicate that + * we've seen this page already--it probably indicates a + * multiply-linked page. + */ + if ((ret = __db_salvage_isdone(vdp, pgno)) != 0) + return (ret == DB_KEYEXIST ? DB_VERIFY_BAD : ret); + + data.size = sizeof(u_int32_t); + data.data = &pgtype; + + return (__db_put(dbp, vdp->thread_info, vdp->txn, &key, &data, 0)); +} + +/* + * __db_salvage_markneeded -- + * If it has not yet been printed, make note of the fact that a page + * must be dealt with later. + * + * PUBLIC: int __db_salvage_markneeded + * PUBLIC: __P((VRFY_DBINFO *, db_pgno_t, u_int32_t)); + */ +int +__db_salvage_markneeded(vdp, pgno, pgtype) + VRFY_DBINFO *vdp; + db_pgno_t pgno; + u_int32_t pgtype; +{ + DB *dbp; + DBT key, data; + int ret; + + dbp = vdp->salvage_pages; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + key.data = &pgno; + key.size = sizeof(db_pgno_t); + + data.data = &pgtype; + data.size = sizeof(u_int32_t); + + /* + * Put an entry for this page, with pgno as key and type as data, + * unless it's already there, in which case it's presumably + * already been marked done. + */ + ret = __db_put(dbp, + vdp->thread_info, vdp->txn, &key, &data, DB_NOOVERWRITE); + return (ret == DB_KEYEXIST ? 0 : ret); +} + +/* + * __db_vrfy_prdbt -- + * Print out a DBT data element from a verification routine. + * + * PUBLIC: int __db_vrfy_prdbt __P((DBT *, int, const char *, void *, + * PUBLIC: int (*)(void *, const void *), int, int, VRFY_DBINFO *)); + */ +int +__db_vrfy_prdbt(dbtp, checkprint, prefix, + handle, callback, is_recno, is_heap, vdp) + DBT *dbtp; + int checkprint; + const char *prefix; + void *handle; + int (*callback) __P((void *, const void *)); + int is_recno; + int is_heap; + VRFY_DBINFO *vdp; +{ + if (vdp != NULL) { + /* + * If vdp is non-NULL, we might be the first key in the + * "fake" subdatabase used for key/data pairs we can't + * associate with a known subdb. + * + * Check and clear the SALVAGE_PRINTHEADER flag; if + * it was set, print a subdatabase header. + */ + if (F_ISSET(vdp, SALVAGE_PRINTHEADER)) { + (void)__db_prheader( + NULL, "__OTHER__", 0, 0, handle, callback, vdp, 0); + F_CLR(vdp, SALVAGE_PRINTHEADER); + F_SET(vdp, SALVAGE_PRINTFOOTER); + } + + /* + * Even if the printable flag wasn't set by our immediate + * caller, it may be set on a salvage-wide basis. + */ + if (F_ISSET(vdp, SALVAGE_PRINTABLE)) + checkprint = 1; + } + return ( + __db_prdbt(dbtp, checkprint, + prefix, handle, callback, is_recno, is_heap)); +} diff --git a/src/db/partition.c b/src/db/partition.c new file mode 100644 index 00000000..c18bbaa4 --- /dev/null +++ b/src/db/partition.c @@ -0,0 +1,2058 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_verify.h" +#include "dbinc/btree.h" +#ifdef HAVE_HASH +#include "dbinc/hash.h" +#endif +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" +#include "dbinc/txn.h" +#ifdef HAVE_PARTITION + +static int __part_rr __P((DB *, DB_THREAD_INFO *, DB_TXN *, + const char *, const char *, const char *, u_int32_t)); +static int __partc_close __P((DBC *, db_pgno_t, int *)); +static int __partc_del __P((DBC*, u_int32_t)); +static int __partc_destroy __P((DBC*)); +static int __partc_get_pp __P((DBC*, DBT *, DBT *, u_int32_t)); +static int __partc_put __P((DBC*, DBT *, DBT *, u_int32_t, db_pgno_t *)); +static int __partc_writelock __P((DBC*)); +static int __partition_chk_meta __P((DB *, + DB_THREAD_INFO *, DB_TXN *, u_int32_t)); +static int __partition_setup_keys __P((DBC *, + DB_PARTITION *, DBMETA *, u_int32_t)); +static int __part_key_cmp __P((const void *, const void *)); +static inline void __part_search __P((DB *, + DB_PARTITION *, DBT *, u_int32_t *)); + +static char *Alloc_err = DB_STR_A("0644", + "Partition open failed to allocate %d bytes", "%d"); + +/* + * Allocate a partition cursor and copy flags to the partition cursor. + * Not passed: + * DBC_PARTITIONED -- the subcursors are not. + * DBC_OWN_LID -- the arg dbc owns the lock id. + * DBC_WRITECURSOR DBC_WRITER -- CDS locking happens on + * the whole DB, not the partition. + */ +#define GET_PART_CURSOR(dbc, new_dbc, part_id) do { \ + DB *__part_dbp; \ + __part_dbp = part->handles[part_id]; \ + if ((ret = __db_cursor_int(__part_dbp, \ + (dbc)->thread_info, (dbc)->txn, __part_dbp->type, \ + PGNO_INVALID, 0, (dbc)->locker, &new_dbc)) != 0) \ + goto err; \ + (new_dbc)->flags = (dbc)->flags & \ + ~(DBC_PARTITIONED|DBC_OWN_LID|DBC_WRITECURSOR|DBC_WRITER); \ +} while (0) + +/* + * Search for the correct partition. + */ +static inline void __part_search(dbp, part, key, part_idp) + DB *dbp; + DB_PARTITION *part; + DBT *key; + u_int32_t *part_idp; +{ + db_indx_t base, indx, limit; + int cmp; + int (*func) __P((DB *, const DBT *, const DBT *)); + + DB_ASSERT(dbp->env, part->nparts != 0); + COMPQUIET(cmp, 0); + COMPQUIET(indx, 0); + + func = ((BTREE *)dbp->bt_internal)->bt_compare; + DB_BINARY_SEARCH_FOR(base, limit, part->nparts, O_INDX) { + DB_BINARY_SEARCH_INCR(indx, base, limit, O_INDX); + cmp = func(dbp, key, &part->keys[indx]); + if (cmp == 0) + break; + if (cmp > 0) + DB_BINARY_SEARCH_SHIFT_BASE(indx, base, limit, O_INDX); + } + if (cmp == 0) + *part_idp = indx; + else if ((*part_idp = base) != 0) + (*part_idp)--; +} + +/* + * __partition_init -- + * Initialize the partition structure. + * Called when the meta data page is read in during database open or + * when partition keys or a callback are set. + * + * PUBLIC: int __partition_init __P((DB *, u_int32_t)); + */ +int +__partition_init(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + DB_PARTITION *part; + int ret; + + if ((part = dbp->p_internal) != NULL) { + if ((LF_ISSET(DBMETA_PART_RANGE) && + F_ISSET(part, PART_CALLBACK)) || + (LF_ISSET(DBMETA_PART_CALLBACK) && + F_ISSET(part, PART_RANGE))) { + __db_errx(dbp->env, DB_STR("0645", + "Cannot specify callback and range keys.")); + return (EINVAL); + } + } else if ((ret = __os_calloc(dbp->env, 1, sizeof(*part), &part)) != 0) + return (ret); + + if (LF_ISSET(DBMETA_PART_RANGE)) + F_SET(part, PART_RANGE); + if (LF_ISSET(DBMETA_PART_CALLBACK)) + F_SET(part, PART_CALLBACK); + dbp->p_internal = part; + /* Set up AM-specific methods that do not require an open. */ + dbp->db_am_rename = __part_rename; + dbp->db_am_remove = __part_remove; + return (0); +} +/* + * __partition_set -- + * Set the partitioning keys or callback function. + * This routine must be called prior to creating the database. + * PUBLIC: int __partition_set __P((DB *, u_int32_t, DBT *, + * PUBLIC: u_int32_t (*callback)(DB *, DBT *key))); + */ + +int +__partition_set(dbp, parts, keys, callback) + DB *dbp; + u_int32_t parts; + DBT *keys; + u_int32_t (*callback)(DB *, DBT *key); +{ + DB_PARTITION *part; + ENV *env; + int ret; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_partition"); + env = dbp->dbenv->env; + + if (parts < 2) { + __db_errx(env, DB_STR("0646", + "Must specify at least 2 partitions.")); + return (EINVAL); + } + + if (keys == NULL && callback == NULL) { + __db_errx(env, DB_STR("0647", + "Must specify either keys or a callback.")); + return (EINVAL); + } + if (keys != NULL && callback != NULL) { +bad: __db_errx(env, DB_STR("0648", + "May not specify both keys and a callback.")); + return (EINVAL); + } + + if ((ret = __partition_init(dbp, + keys != NULL ? + DBMETA_PART_RANGE : DBMETA_PART_CALLBACK)) != 0) + return (ret); + part = dbp->p_internal; + + if ((part->keys != NULL && callback != NULL) || + (part->callback != NULL && keys != NULL)) + goto bad; + + part->nparts = parts; + part->keys = keys; + part->callback = callback; + + return (0); +} + +/* + * __partition_set_dirs -- + * Set the directories for creating the partition databases. + * They must be in the environment. + * PUBLIC: int __partition_set_dirs __P((DB *, const char **)); + */ +int +__partition_set_dirs(dbp, dirp) + DB *dbp; + const char **dirp; +{ + DB_ENV *dbenv; + DB_PARTITION *part; + ENV *env; + u_int32_t ndirs, slen; + int i, ret; + const char **dir; + char *cp, **part_dirs, **pd; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_partition_dirs"); + dbenv = dbp->dbenv; + env = dbp->env; + + ndirs = 1; + slen = 0; + for (dir = dirp; *dir != NULL; dir++) { + if (F_ISSET(env, ENV_DBLOCAL)) + slen += (u_int32_t)strlen(*dir) + 1; + ndirs++; + } + + slen += sizeof(char *) * ndirs; + if ((ret = __os_malloc(env, slen, &part_dirs)) != 0) + return (EINVAL); + memset(part_dirs, 0, slen); + + cp = (char *) part_dirs + (sizeof(char *) * ndirs); + pd = part_dirs; + for (dir = dirp; *dir != NULL; dir++, pd++) { + if (F_ISSET(env, ENV_DBLOCAL)) { + (void)strcpy(cp, *dir); + *pd = cp; + cp += strlen(*dir) + 1; + continue; + } + for (i = 0; i < dbenv->data_next; i++) + if (strcmp(*dir, dbenv->db_data_dir[i]) == 0) + break; + if (i == dbenv->data_next) { + __db_errx(dbp->env, DB_STR_A("0649", + "Directory not in environment list %s", + "%s"), *dir); + __os_free(env, part_dirs); + return (EINVAL); + } + *pd = dbenv->db_data_dir[i]; + } + + if ((part = dbp->p_internal) == NULL) { + if ((ret = __partition_init(dbp, 0)) != 0) + return (ret); + part = dbp->p_internal; + } + + part->dirs = (const char **)part_dirs; + + return (0); +} + +/* + * __partition_open -- + * Open/create a partitioned database. + * PUBLIC: int __partition_open __P((DB *, DB_THREAD_INFO *, + * PUBLIC: DB_TXN *, const char *, DBTYPE, u_int32_t, int, int)); + */ +int +__partition_open(dbp, ip, txn, fname, type, flags, mode, do_open) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *fname; + DBTYPE type; + u_int32_t flags; + int mode, do_open; +{ + DB *part_db; + DB_PARTITION *part; + DBC *dbc; + ENV *env; + u_int32_t part_id; + int ret; + char *name, *sp; + const char **dirp, *np; + + part = dbp->p_internal; + env = dbp->dbenv->env; + name = NULL; + + if ((ret = __partition_chk_meta(dbp, ip, txn, flags)) != 0 && do_open) + goto err; + + if ((ret = __os_calloc(env, + part->nparts, sizeof(*part->handles), &part->handles)) != 0) { + __db_errx(env, + Alloc_err, part->nparts * sizeof(*part->handles)); + goto err; + } + + DB_ASSERT(env, fname != NULL); + if ((ret = __os_malloc(env, + strlen(fname) + PART_LEN + 1, &name)) != 0) { + __db_errx(env, Alloc_err, strlen(fname) + PART_LEN + 1); + goto err; + } + + sp = name; + np = __db_rpath(fname); + if (np == NULL) + np = fname; + else { + np++; + (void)strncpy(name, fname, (size_t)(np - fname)); + sp = name + (np - fname); + } + + if (F_ISSET(dbp, DB_AM_RECOVER)) + goto done; + dirp = part->dirs; + for (part_id = 0; part_id < part->nparts; part_id++) { + if ((ret = __db_create_internal( + &part->handles[part_id], dbp->env, 0)) != 0) + goto err; + + part_db = part->handles[part_id]; + part_db->flags = F_ISSET(dbp, + ~(DB_AM_CREATED | DB_AM_CREATED_MSTR | DB_AM_OPEN_CALLED)); + part_db->adj_fileid = dbp->adj_fileid; + part_db->pgsize = dbp->pgsize; + part_db->priority = dbp->priority; + part_db->db_append_recno = dbp->db_append_recno; + part_db->db_feedback = dbp->db_feedback; + part_db->dup_compare = dbp->dup_compare; + part_db->app_private = dbp->app_private; + part_db->api_internal = dbp->api_internal; + + if (dbp->type == DB_BTREE) + __bam_copy_config(dbp, part_db, part->nparts); +#ifdef HAVE_HASH + if (dbp->type == DB_HASH) + __ham_copy_config(dbp, part_db, part->nparts); +#endif + + (void)sprintf(sp, PART_NAME, np, part_id); + if (do_open) { + /* + * Cycle through the directory names passed in, + * if any. + */ + if (dirp != NULL && + (part_db->dirname = *dirp++) == NULL) { + part_db->dirname = *(dirp = part->dirs); + dirp++; + } + if ((ret = __db_open(part_db, ip, txn, + name, NULL, type, flags, mode, PGNO_BASE_MD)) != 0) + goto err; + } else if ((ret = __os_strdup(env, name, &part_db->fname)) != 0) + goto err; + } + + /* Get rid of the cursor used to open the database its the wrong type */ +done: while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL) + if ((ret = __dbc_destroy(dbc)) != 0) + break; + + if (0) { +err: (void)__partition_close(dbp, txn, 0); + } + if (name != NULL) + __os_free(env, name); + return (ret); +} + +/* + * __partition_chk_meta -- + * Check for a consistent meta data page and parameters when opening a + * partitioned database. + */ +static int +__partition_chk_meta(dbp, ip, txn, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + u_int32_t flags; +{ + DBMETA *meta; + DB_PARTITION *part; + DBC *dbc; + DB_LOCK metalock; + DB_MPOOLFILE *mpf; + ENV *env; + db_pgno_t base_pgno; + int ret, t_ret; + + dbc = NULL; + meta = NULL; + LOCK_INIT(metalock); + part = dbp->p_internal; + mpf = dbp->mpf; + env = dbp->env; + ret = 0; + + /* Get a cursor on the main db. */ + dbp->p_internal = NULL; + if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) + goto err; + + /* Get the metadata page. */ + base_pgno = PGNO_BASE_MD; + if ((ret = + __db_lget(dbc, 0, base_pgno, DB_LOCK_READ, 0, &metalock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &base_pgno, ip, dbc->txn, 0, &meta)) != 0) + goto err; + + if (meta->magic != DB_HASHMAGIC && + (meta->magic != DB_BTREEMAGIC || F_ISSET(meta, BTM_RECNO))) { + __db_errx(env, DB_STR("0650", + "Partitioning may only specified on BTREE and HASH databases.")); + ret = EINVAL; + goto err; + } + if (!FLD_ISSET(meta->metaflags, + DBMETA_PART_RANGE | DBMETA_PART_CALLBACK)) { + __db_errx(env, DB_STR("0651", + "Partitioning specified on a non-partitioned database.")); + ret = EINVAL; + goto err; + } + + if ((F_ISSET(part, PART_RANGE) && + FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK)) || + (F_ISSET(part, PART_CALLBACK) && + FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE))) { + __db_errx(env, DB_STR("0652", + "Incompatible partitioning specified.")); + ret = EINVAL; + goto err; + } + + if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK) && + part->callback == NULL && !IS_RECOVERING(env) && + !F_ISSET(dbp, DB_AM_RECOVER) && !LF_ISSET(DB_RDWRMASTER)) { + __db_errx(env, DB_STR("0653", + "Partition callback not specified.")); + ret = EINVAL; + goto err; + } + + if (F_ISSET(dbp, DB_AM_RECNUM)) { + __db_errx(env, DB_STR("0654", + "Record numbers are not supported in partitioned databases.")); + ret = EINVAL; + goto err; + } + + if (part->nparts == 0) { + if (LF_ISSET(DB_CREATE) && meta->nparts == 0) { + __db_errx(env, DB_STR("0655", + "Zero paritions specified.")); + ret = EINVAL; + goto err; + } else + part->nparts = meta->nparts; + } else if (meta->nparts != 0 && part->nparts != meta->nparts) { + __db_errx(env, DB_STR("0656", + "Number of partitions does not match.")); + ret = EINVAL; + goto err; + } + + if (meta->magic == DB_HASHMAGIC) { + if (!F_ISSET(part, PART_CALLBACK)) { + __db_errx(env, DB_STR("0657", + "Hash database must specify a partition callback.")); + ret = EINVAL; + } + } else if (meta->magic != DB_BTREEMAGIC) { + __db_errx(env, DB_STR("0658", + "Partitioning only supported on BTREE nad HASH.")); + ret = EINVAL; + } else + ret = __partition_setup_keys(dbc, part, meta, flags); + +err: /* Put the metadata page back. */ + if (meta != NULL && (t_ret = __memp_fput(mpf, + ip, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + + if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + dbp->p_internal = part; + return (ret); +} + +/* + * Support for sorting keys. Keys must be sorted using the btree + * compare function so if we call qsort in __partiton_setup_keys + * we use this structure to pass the DBP and compare function. + */ +struct key_sort { + DB *dbp; + DBT *key; + int (*compare) __P((DB *, const DBT *, const DBT *)); +}; + +static int __part_key_cmp(a, b) + const void *a, *b; +{ + const struct key_sort *ka, *kb; + + ka = a; + kb = b; + return (ka->compare(ka->dbp, ka->key, kb->key)); +} +/* + * __partition_setup_keys -- + * Get the partition keys into memory, or put them to disk if we + * are creating a partitioned database. + */ +static int +__partition_setup_keys(dbc, part, meta, flags) + DBC *dbc; + DB_PARTITION *part; + DBMETA *meta; + u_int32_t flags; +{ + BTREE *t; + DB *dbp; + DBT data, key, *keys, *kp; + ENV *env; + u_int32_t ds, i, j; + u_int8_t *dd; + struct key_sort *ks; + int have_keys, ret; + int (*compare) __P((DB *, const DBT *, const DBT *)); + void *dp; + + COMPQUIET(dd, NULL); + COMPQUIET(ds, 0); + memset(&data, 0, sizeof(data)); + memset(&key, 0, sizeof(key)); + ks = NULL; + + dbp = dbc->dbp; + env = dbp->env; + + /* Need to just read the main database. */ + dbp->p_internal = NULL; + have_keys = 0; + + /* First verify that things what we expect. */ + if ((ret = __dbc_get(dbc, &key, &data, DB_FIRST)) != 0) { + if (ret != DB_NOTFOUND) + goto err; + if (F_ISSET(part, PART_CALLBACK)) { + ret = 0; + goto done; + } + if (!LF_ISSET(DB_CREATE) && !F_ISSET(dbp, DB_AM_RECOVER) && + !LF_ISSET(DB_RDWRMASTER)) { + __db_errx(env, DB_STR("0659", "No range keys found.")); + ret = EINVAL; + goto err; + } + } else { + if (F_ISSET(part, PART_CALLBACK)) { + __db_errx(env, DB_STR("0660", + "Keys found and callback set.")); + ret = EINVAL; + goto err; + } + if (key.size != 0) { + __db_errx(env, DB_STR("0661", + "Partition key 0 is not empty.")); + ret = EINVAL; + goto err; + } + have_keys = 1; + } + + if (LF_ISSET(DB_CREATE) && have_keys == 0) { + /* Insert the keys into the master database. */ + for (i = 0; i < part->nparts - 1; i++) { + if ((ret = __db_put(dbp, dbc->thread_info, + dbc->txn, &part->keys[i], &data, 0)) != 0) + goto err; + } + + /* + * Insert the "0" pointer. All records less than the first + * given key go into this partition. We must use the default + * compare to insert this key, otherwise it might not be first. + */ + t = dbc->dbp->bt_internal; + compare = t->bt_compare; + t->bt_compare = __bam_defcmp; + memset(&key, 0, sizeof(key)); + ret = __db_put(dbp, dbc->thread_info, dbc->txn, &key, &data, 0); + t->bt_compare = compare; + if (ret != 0) + goto err; + } +done: if (F_ISSET(part, PART_RANGE)) { + /* + * Allocate one page to hold the keys plus space at the + * end of the buffer to put an array of DBTs. If there + * is not enough space __dbc_get will return how much + * is needed and we realloc. + */ + if ((ret = __os_malloc(env, + meta->pagesize + (sizeof(DBT) * part->nparts), + &part->data)) != 0) { + __db_errx(env, Alloc_err, meta->pagesize); + goto err; + } + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + data.data = part->data; + data.ulen = meta->pagesize; + data.flags = DB_DBT_USERMEM; +again: if ((ret = __dbc_get(dbc, &key, &data, + DB_FIRST | DB_MULTIPLE_KEY)) == DB_BUFFER_SMALL) { + if ((ret = __os_realloc(env, + data.size + (sizeof(DBT) * part->nparts), + &part->data)) != 0) + goto err; + data.data = part->data; + data.ulen = data.size; + goto again; + } + if (ret == 0) { + /* + * They passed in keys, they must match. + */ + keys = NULL; + compare = NULL; + if (have_keys == 1 && (keys = part->keys) != NULL) { + t = dbc->dbp->bt_internal; + compare = t->bt_compare; + if ((ret = __os_malloc(env, (part->nparts - 1) + * sizeof(struct key_sort), &ks)) != 0) + goto err; + for (j = 0; j < part->nparts - 1; j++) { + ks[j].dbp = dbc->dbp; + ks[j].compare = compare; + ks[j].key = &keys[j]; + } + + qsort(ks, (size_t)part->nparts - 1, + sizeof(struct key_sort), __part_key_cmp); + } + DB_MULTIPLE_INIT(dp, &data); + part->keys = (DBT *) + ((u_int8_t *)part->data + data.size); + j = 0; + for (kp = part->keys; + kp < &part->keys[part->nparts]; kp++, j++) { + DB_MULTIPLE_KEY_NEXT(dp, + &data, kp->data, kp->size, dd, ds); + if (dp == NULL) { + ret = DB_NOTFOUND; + break; + } + if (keys != NULL && j != 0 && + compare(dbc->dbp, ks[j - 1].key, kp) != 0) { + if (kp->data == NULL && + F_ISSET(dbp, DB_AM_RECOVER)) + goto err; + __db_errx(env, DB_STR_A("0662", + "Partition key %d does not match", + "%d"), j); + ret = EINVAL; + goto err; + } + } + } + } + if (ret == DB_NOTFOUND && F_ISSET(dbp, DB_AM_RECOVER)) + ret = 0; + +err: dbp->p_internal = part; + if (ks != NULL) + __os_free(env, ks); + return (ret); +} + +/* + * __partition_get_callback -- + * Get the partition callback function. + * PUBLIC: int __partition_get_callback __P((DB *, + * PUBLIC: u_int32_t *, u_int32_t (**callback)(DB *, DBT *key))); + */ +int +__partition_get_callback(dbp, parts, callback) + DB *dbp; + u_int32_t *parts; + u_int32_t (**callback)(DB *, DBT *key); +{ + DB_PARTITION *part; + + part = dbp->p_internal; + /* Only return populated results if partitioned using callbacks. */ + if (part != NULL && !F_ISSET(part, PART_CALLBACK)) + part = NULL; + if (parts != NULL) + *parts = (part != NULL ? part->nparts : 0); + if (callback != NULL) + *callback = (part != NULL ? part->callback : NULL); + + return (0); +} + +/* + * __partition_get_keys -- + * Get partition keys. + * PUBLIC: int __partition_get_keys __P((DB *, u_int32_t *, DBT **)); + */ +int +__partition_get_keys(dbp, parts, keys) + DB *dbp; + u_int32_t *parts; + DBT **keys; +{ + DB_PARTITION *part; + + part = dbp->p_internal; + /* Only return populated results if partitioned using ranges. */ + if (part != NULL && !F_ISSET(part, PART_RANGE)) + part = NULL; + if (parts != NULL) + *parts = (part != NULL ? part->nparts : 0); + if (keys != NULL) + *keys = (part != NULL ? &part->keys[1] : NULL); + + return (0); +} + +/* + * __partition_get_dirs -- + * Get partition dirs. + * PUBLIC: int __partition_get_dirs __P((DB *, const char ***)); + */ +int +__partition_get_dirs(dbp, dirpp) + DB *dbp; + const char ***dirpp; +{ + DB_PARTITION *part; + ENV *env; + u_int32_t i; + int ret; + + env = dbp->env; + if ((part = dbp->p_internal) == NULL) { + *dirpp = NULL; + return (0); + } + if (!F_ISSET(dbp, DB_AM_OPEN_CALLED)) { + *dirpp = part->dirs; + return (0); + } + + /* + * We build a list once when asked. The original directory list, + * if any, was discarded at open time. + */ + if ((*dirpp = part->dirs) != NULL) + return (0); + + if ((ret = __os_calloc(env, + sizeof(char *), part->nparts + 1, (char **)&part->dirs)) != 0) + return (ret); + + for (i = 0; i < part->nparts; i++) + part->dirs[i] = part->handles[i]->dirname; + + *dirpp = part->dirs; + return (0); +} + +/* + * __partc_init -- + * Initialize the access private portion of a cursor + * + * PUBLIC: int __partc_init __P((DBC *)); + */ +int +__partc_init(dbc) + DBC *dbc; +{ + ENV *env; + int ret; + + env = dbc->env; + + /* Allocate/initialize the internal structure. */ + if (dbc->internal == NULL && (ret = + __os_calloc(env, 1, sizeof(PART_CURSOR), &dbc->internal)) != 0) + return (ret); + + /* Initialize methods. */ + dbc->close = dbc->c_close = __dbc_close_pp; + dbc->cmp = __dbc_cmp_pp; + dbc->count = dbc->c_count = __dbc_count_pp; + dbc->del = dbc->c_del = __dbc_del_pp; + dbc->dup = dbc->c_dup = __dbc_dup_pp; + dbc->get = dbc->c_get = __partc_get_pp; + dbc->pget = dbc->c_pget = __dbc_pget_pp; + dbc->put = dbc->c_put = __dbc_put_pp; + dbc->am_bulk = NULL; + dbc->am_close = __partc_close; + dbc->am_del = __partc_del; + dbc->am_destroy = __partc_destroy; + dbc->am_get = NULL; + dbc->am_put = __partc_put; + dbc->am_writelock = __partc_writelock; + + /* We avoid swapping partition cursors since we swap the sub cursors */ + F_SET(dbc, DBC_PARTITIONED); + + return (0); +} +/* + * __partc_get_pp -- + * cursor get opeartion on a partitioned database. + */ +static int +__partc_get_pp(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + int ignore_lease, ret; + + dbp = dbc->dbp; + env = dbp->env; + + ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; + LF_CLR(DB_IGNORE_LEASE); + if ((ret = __dbc_get_arg(dbc, key, data, flags)) != 0) + return (ret); + + ENV_ENTER(env, ip); + + DEBUG_LREAD(dbc, dbc->txn, "DBcursor->get", + flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags); + + ret = __partc_get(dbc, key, data, flags); + /* + * Check for master leases. + */ + if (ret == 0 && + IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) + ret = __rep_lease_check(env, 1); + + ENV_LEAVE(env, ip); + __dbt_userfree(env, key, NULL, data); + return (ret); +} +/* + * __partiton_get -- + * cursor get opeartion on a partitioned database. + * + * PUBLIC: int __partc_get __P((DBC*, DBT *, DBT *, u_int32_t)); + */ +int +__partc_get(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DB *dbp; + DBC *orig_dbc, *new_dbc; + DB_PARTITION *part; + PART_CURSOR *cp; + u_int32_t multi, part_id; + int ret, retry, search; + + dbp = dbc->dbp; + cp = (PART_CURSOR*)dbc->internal; + orig_dbc = cp->sub_cursor; + part = dbp->p_internal; + + new_dbc = NULL; + retry = search = 0; + part_id = cp->part_id; + multi = flags & ~DB_OPFLAGS_MASK; + + switch (flags & DB_OPFLAGS_MASK) { + case DB_CURRENT: + break; + case DB_FIRST: + part_id = 0; + retry = 1; + break; + case DB_GET_BOTH: + case DB_GET_BOTHC: + case DB_GET_BOTH_RANGE: + search = 1; + break; + case DB_SET_RANGE: + search = 1; + retry = 1; + break; + case DB_LAST: + part_id = part->nparts - 1; + retry = 1; + break; + case DB_NEXT: + case DB_NEXT_NODUP: + if (orig_dbc == NULL) + part_id = 0; + else + part_id = cp->part_id; + retry = 1; + break; + case DB_NEXT_DUP: + break; + case DB_PREV: + case DB_PREV_NODUP: + if (orig_dbc == NULL) + part_id = part->nparts - 1; + else + part_id = cp->part_id; + retry = 1; + break; + case DB_PREV_DUP: + break; + case DB_SET: + search = 1; + break; + default: + return (__db_unknown_flag(dbp->env, "__partc_get", flags)); + } + + /* + * If we need to find the partition to start on, then + * do a binary search of the in memory partition table. + */ + if (search == 1 && F_ISSET(part, PART_CALLBACK)) + part_id = part->callback(dbp, key) % part->nparts; + else if (search == 1) + __part_search(dbp, part, key, &part_id); + + /* Get a new cursor if necessary */ + if (orig_dbc == NULL || cp->part_id != part_id) { + GET_PART_CURSOR(dbc, new_dbc, part_id); + } else + new_dbc = orig_dbc; + + while ((ret = __dbc_get(new_dbc, + key, data, flags)) == DB_NOTFOUND && retry == 1) { + switch (flags & DB_OPFLAGS_MASK) { + case DB_FIRST: + case DB_NEXT: + case DB_NEXT_NODUP: + case DB_SET_RANGE: + if (++part_id < part->nparts) { + flags = DB_FIRST | multi; + break; + } + goto err; + case DB_LAST: + case DB_PREV: + case DB_PREV_NODUP: + if (part_id-- > 0) { + flags = DB_LAST | multi; + break; + } + goto err; + default: + goto err; + } + + if (new_dbc != orig_dbc && (ret = __dbc_close(new_dbc)) != 0) + goto err; + GET_PART_CURSOR(dbc, new_dbc, part_id); + } + + if (ret != 0) + goto err; + + /* Success: swap original and new cursors. */ + if (new_dbc != orig_dbc) { + if (orig_dbc != NULL) { + cp->sub_cursor = NULL; + if ((ret = __dbc_close(orig_dbc)) != 0) + goto err; + } + cp->sub_cursor = new_dbc; + cp->part_id = part_id; + } + + return (0); + +err: if (new_dbc != NULL && new_dbc != orig_dbc) + (void)__dbc_close(new_dbc); + return (ret); +} + +/* + * __partc_put -- + * cursor put opeartion on a partitioned cursor. + * + */ +static int +__partc_put(dbc, key, data, flags, pgnop) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; + db_pgno_t *pgnop; +{ + DB *dbp; + DB_PARTITION *part; + DBC *new_dbc; + PART_CURSOR *cp; + u_int32_t part_id; + int ret; + + dbp = dbc->dbp; + cp = (PART_CURSOR*)dbc->internal; + part_id = cp->part_id; + part = dbp->p_internal; + *pgnop = PGNO_INVALID; + + switch (flags) { + case DB_KEYFIRST: + case DB_KEYLAST: + case DB_NODUPDATA: + case DB_NOOVERWRITE: + case DB_OVERWRITE_DUP: + if (F_ISSET(part, PART_CALLBACK)) { + part_id = part->callback(dbp, key) % part->nparts; + break; + } + __part_search(dbp, part, key, &part_id); + break; + default: + break; + } + + if ((new_dbc = cp->sub_cursor) == NULL || cp->part_id != part_id) { + if ((ret = __db_cursor_int(part->handles[part_id], + dbc->thread_info, dbc->txn, part->handles[part_id]->type, + PGNO_INVALID, 0, dbc->locker, &new_dbc)) != 0) + goto err; + } + + if (F_ISSET(dbc, DBC_WRITER | DBC_WRITECURSOR)) + F_SET(new_dbc, DBC_WRITER); + if ((ret = __dbc_put(new_dbc, key, data, flags)) != 0) + goto err; + + if (new_dbc != cp->sub_cursor) { + if (cp->sub_cursor != NULL) { + if ((ret = __dbc_close(cp->sub_cursor)) != 0) + goto err; + cp->sub_cursor = NULL; + } + cp->sub_cursor = new_dbc; + cp->part_id = part_id; + } + + return (0); + +err: if (new_dbc != NULL && cp->sub_cursor != new_dbc) + (void)__dbc_close(new_dbc); + return (ret); +} + +/* + * __partc_del + * Delete interface to partitioned cursors. + * + */ +static int +__partc_del(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + PART_CURSOR *cp; + cp = (PART_CURSOR*)dbc->internal; + + if (F_ISSET(dbc, DBC_WRITER | DBC_WRITECURSOR)) + F_SET(cp->sub_cursor, DBC_WRITER); + return (__dbc_del(cp->sub_cursor, flags)); +} + +/* + * __partc_writelock + * Writelock interface to partitioned cursors. + * + */ +static int +__partc_writelock(dbc) + DBC *dbc; +{ + PART_CURSOR *cp; + cp = (PART_CURSOR*)dbc->internal; + + return (cp->sub_cursor->am_writelock(cp->sub_cursor)); +} + +/* + * __partc_close + * Close interface to partitioned cursors. + * + */ +static int +__partc_close(dbc, root_pgno, rmroot) + DBC *dbc; + db_pgno_t root_pgno; + int *rmroot; +{ + PART_CURSOR *cp; + int ret; + + COMPQUIET(root_pgno, 0); + COMPQUIET(rmroot, NULL); + + cp = (PART_CURSOR*)dbc->internal; + + if (cp->sub_cursor == NULL) + return (0); + ret = __dbc_close(cp->sub_cursor); + cp->sub_cursor = NULL; + return (ret); +} + +/* + * __partc_destroy -- + * Destroy a single cursor. + */ +static int +__partc_destroy(dbc) + DBC *dbc; +{ + PART_CURSOR *cp; + ENV *env; + + cp = (PART_CURSOR *)dbc->internal; + env = dbc->env; + + /* Discard the structure. Don't recurse. */ + __os_free(env, cp); + + return (0); +} + +/* + * __partiton_close + * Close a partitioned database. + * + * PUBLIC: int __partition_close __P((DB *, DB_TXN *, u_int32_t)); + */ +int +__partition_close(dbp, txn, flags) + DB *dbp; + DB_TXN *txn; + u_int32_t flags; +{ + DB **pdbp; + DB_PARTITION *part; + ENV *env; + u_int32_t i; + int ret, t_ret; + + if ((part = dbp->p_internal) == NULL) + return (0); + + env = dbp->env; + ret = 0; + + if ((pdbp = part->handles) != NULL) { + for (i = 0; i < part->nparts; i++, pdbp++) + if (*pdbp != NULL && (t_ret = + __db_close(*pdbp, txn, flags)) != 0 && ret == 0) + ret = t_ret; + __os_free(env, part->handles); + } + if (part->dirs != NULL) + __os_free(env, (char **)part->dirs); + if (part->data != NULL) + __os_free(env, (char **)part->data); + __os_free(env, part); + dbp->p_internal = NULL; + + return (ret); +} + +/* + * __partiton_sync + * Sync a partitioned database. + * + * PUBLIC: int __partition_sync __P((DB *)); + */ +int +__partition_sync(dbp) + DB *dbp; +{ + DB **pdbp; + DB_PARTITION *part; + u_int32_t i; + int ret, t_ret; + + ret = 0; + part = dbp->p_internal; + + if ((pdbp = part->handles) != NULL) { + for (i = 0; i < part->nparts; i++, pdbp++) + if (*pdbp != NULL && + F_ISSET(*pdbp, DB_AM_OPEN_CALLED) && (t_ret = + __memp_fsync((*pdbp)->mpf)) != 0 && ret == 0) + ret = t_ret; + } + if ((t_ret = __memp_fsync(dbp->mpf)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __partiton_stat + * Stat a partitioned database. + * + * PUBLIC: int __partition_stat __P((DBC *, void *, u_int32_t)); + */ +int +__partition_stat(dbc, spp, flags) + DBC *dbc; + void *spp; + u_int32_t flags; +{ + DB *dbp, **pdbp; + DB_BTREE_STAT *fsp, *bsp; +#ifdef HAVE_HASH + DB_HASH_STAT *hfsp, *hsp; +#endif + DB_PARTITION *part; + DBC *new_dbc; + ENV *env; + u_int32_t i; + int ret; + + dbp = dbc->dbp; + part = dbp->p_internal; + env = dbp->env; + fsp = NULL; +#ifdef HAVE_HASH + hfsp = NULL; +#endif + + pdbp = part->handles; + for (i = 0; i < part->nparts; i++, pdbp++) { + if ((ret = __db_cursor_int(*pdbp, dbc->thread_info, dbc->txn, + (*pdbp)->type, PGNO_INVALID, + 0, dbc->locker, &new_dbc)) != 0) + goto err; + switch (new_dbc->dbtype) { + case DB_BTREE: + if ((ret = __bam_stat(new_dbc, &bsp, flags)) != 0) + goto err; + if (fsp == NULL) { + fsp = bsp; + *(DB_BTREE_STAT **)spp = fsp; + } else { + fsp->bt_nkeys += bsp->bt_nkeys; + fsp->bt_ndata += bsp->bt_ndata; + fsp->bt_pagecnt += bsp->bt_pagecnt; + if (fsp->bt_levels < bsp->bt_levels) + fsp->bt_levels = bsp->bt_levels; + fsp->bt_int_pg += bsp->bt_int_pg; + fsp->bt_leaf_pg += bsp->bt_leaf_pg; + fsp->bt_dup_pg += bsp->bt_dup_pg; + fsp->bt_over_pg += bsp->bt_over_pg; + fsp->bt_free += bsp->bt_free; + fsp->bt_int_pgfree += bsp->bt_int_pgfree; + fsp->bt_leaf_pgfree += bsp->bt_leaf_pgfree; + fsp->bt_dup_pgfree += bsp->bt_dup_pgfree; + fsp->bt_over_pgfree += bsp->bt_over_pgfree; + __os_ufree(env, bsp); + } + break; +#ifdef HAVE_HASH + case DB_HASH: + if ((ret = __ham_stat(new_dbc, &hsp, flags)) != 0) + goto err; + if (hfsp == NULL) { + hfsp = hsp; + *(DB_HASH_STAT **)spp = hfsp; + } else { + hfsp->hash_nkeys += hsp->hash_nkeys; + hfsp->hash_ndata += hsp->hash_ndata; + hfsp->hash_pagecnt += hsp->hash_pagecnt; + hfsp->hash_ffactor += hsp->hash_ffactor; + hfsp->hash_buckets += hsp->hash_buckets; + hfsp->hash_free += hsp->hash_free; + hfsp->hash_bfree += hsp->hash_bfree; + hfsp->hash_bigpages += hsp->hash_bigpages; + hfsp->hash_big_bfree += hsp->hash_big_bfree; + hfsp->hash_overflows += hsp->hash_overflows; + hfsp->hash_ovfl_free += hsp->hash_ovfl_free; + hfsp->hash_dup += hsp->hash_dup; + hfsp->hash_dup_free += hsp->hash_dup_free; + __os_ufree(env, hsp); + } + break; +#endif + default: + break; + } + if ((ret = __dbc_close(new_dbc)) != 0) + goto err; + } + return (0); + +err: + if (fsp != NULL) + __os_ufree(env, fsp); + *(DB_BTREE_STAT **)spp = NULL; + return (ret); +} + +/* + * __part_truncate -- + * Truncate a database. + * + * PUBLIC: int __part_truncate __P((DBC *, u_int32_t *)); + */ +int +__part_truncate(dbc, countp) + DBC *dbc; + u_int32_t *countp; +{ + DB *dbp, **pdbp; + DB_PARTITION *part; + DBC *new_dbc; + u_int32_t count, i; + int ret, t_ret; + + dbp = dbc->dbp; + part = dbp->p_internal; + pdbp = part->handles; + ret = 0; + + if (countp != NULL) + *countp = 0; + for (i = 0; ret == 0 && i < part->nparts; i++, pdbp++) { + if ((ret = __db_cursor_int(*pdbp, dbc->thread_info, dbc->txn, + (*pdbp)->type, PGNO_INVALID, + 0, dbc->locker, &new_dbc)) != 0) + break; + switch (dbp->type) { + case DB_BTREE: + case DB_RECNO: + ret = __bam_truncate(new_dbc, &count); + break; + case DB_HASH: +#ifdef HAVE_HASH + ret = __ham_truncate(new_dbc, &count); + break; +#endif + case DB_QUEUE: + case DB_UNKNOWN: + default: + ret = __db_unknown_type(dbp->env, + "DB->truncate", dbp->type); + count = 0; + break; + } + if ((t_ret = __dbc_close(new_dbc)) != 0 && ret == 0) + ret = t_ret; + if (countp != NULL) + *countp += count; + } + + return (ret); +} +/* + * __part_compact -- compact a partitioned database. + * + * PUBLIC: int __part_compact __P((DB *, DB_THREAD_INFO *, DB_TXN *, + * PUBLIC: DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *)); + */ +int +__part_compact(dbp, ip, txn, start, stop, c_data, flags, end) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DBT *start, *stop; + DB_COMPACT *c_data; + u_int32_t flags; + DBT *end; +{ + DB **pdbp; + DB_PARTITION *part; + u_int32_t i; + int ret; + + part = dbp->p_internal; + pdbp = part->handles; + ret = 0; + + for (i = 0; ret == 0 && i < part->nparts; i++, pdbp++) { + switch (dbp->type) { + case DB_HASH: + case DB_BTREE: + case DB_RECNO: + ret = __db_compact_int(*pdbp, + ip, txn, start, stop, c_data, flags, end); + break; + + default: + ret = __dbh_am_chk(dbp, DB_OK_BTREE); + break; + } + } + return (ret); +} + +/* + * __part_lsn_reset -- + * reset the lsns on each partition. + * + * PUBLIC: int __part_lsn_reset __P((DB *, DB_THREAD_INFO *)); + */ +int +__part_lsn_reset(dbp, ip) + DB *dbp; + DB_THREAD_INFO *ip; +{ + DB **pdbp; + DB_PARTITION *part; + u_int32_t i; + int ret; + + part = dbp->p_internal; + pdbp = part->handles; + ret = 0; + + for (i = 0; ret == 0 && i < part->nparts; i++, pdbp++) + ret = __db_lsn_reset((*pdbp)->mpf, ip); + + return (ret); +} + +/* + * __part_fileid_reset -- + * reset the fileid on each partition. + * + * PUBLIC: int __part_fileid_reset + * PUBLIC: __P((ENV *, DB_THREAD_INFO *, const char *, u_int32_t, int)); + */ +int +__part_fileid_reset(env, ip, fname, nparts, encrypted) + ENV *env; + DB_THREAD_INFO *ip; + const char *fname; + u_int32_t nparts; + int encrypted; +{ + int ret; + u_int32_t part_id; + char *name, *sp; + const char *np; + + if ((ret = __os_malloc(env, + strlen(fname) + PART_LEN + 1, &name)) != 0) { + __db_errx(env, Alloc_err, strlen(fname) + PART_LEN + 1); + return (ret); + } + + sp = name; + np = __db_rpath(fname); + if (np == NULL) + np = fname; + else { + np++; + (void)strncpy(name, fname, (size_t)(np - fname)); + sp = name + (np - fname); + } + + for (part_id = 0; ret == 0 && part_id < nparts; part_id++) { + (void)sprintf(sp, PART_NAME, np, part_id); + ret = __env_fileid_reset(env, ip, sp, encrypted); + } + + __os_free(env, name); + return (ret); +} + +/* + * __part_key_range -- + * Return proportion of keys relative to given key. + * + * PUBLIC: int __part_key_range __P((DBC *, DBT *, DB_KEY_RANGE *, u_int32_t)); + */ +int +__part_key_range(dbc, dbt, kp, flags) + DBC *dbc; + DBT *dbt; + DB_KEY_RANGE *kp; + u_int32_t flags; +{ + BTREE_CURSOR *cp; + DBC *new_dbc; + DB_PARTITION *part; + PAGE *h; + u_int32_t id, part_id; + u_int32_t elems, empty, less_elems, my_elems, greater_elems; + u_int32_t levels, max_levels, my_levels; + db_pgno_t root_pgno; + int ret; + double total_elems; + + COMPQUIET(flags, 0); + + part = dbc->dbp->p_internal; + + /* + * First we find the key range for the partition that contains the + * key. Then we scale based on estimates of the other partitions. + */ + if (F_ISSET(part, PART_CALLBACK)) + part_id = part->callback(dbc->dbp, dbt) % part->nparts; + else + __part_search(dbc->dbp, part, dbt, &part_id); + GET_PART_CURSOR(dbc, new_dbc, part_id); + + if ((ret = __bam_key_range(new_dbc, dbt, kp, flags)) != 0) + goto err; + + cp = (BTREE_CURSOR *)new_dbc->internal; + + root_pgno = BAM_ROOT_PGNO(new_dbc); + if ((ret = __memp_fget(new_dbc->dbp->mpf, &root_pgno, + new_dbc->thread_info, new_dbc->txn, 0, &h)) != 0) + goto c_err; + + my_elems = NUM_ENT(h); + my_levels = LEVEL(h); + max_levels = my_levels; + + if ((ret = __memp_fput(new_dbc->dbp->mpf, + new_dbc->thread_info, h, new_dbc->priority)) != 0) + goto c_err; + + if ((ret = __dbc_close(new_dbc)) != 0) + goto err; + /* + * We have the range within one subtree. Now estimate + * what part of the whole range that subtree is. Figure + * out how many levels each part has and how many entries + * in the level below the root. + */ + empty = less_elems = greater_elems = 0; + for (id = 0; id < part->nparts; id++) { + if (id == part_id) { + empty = 0; + continue; + } + GET_PART_CURSOR(dbc, new_dbc, id); + cp = (BTREE_CURSOR *)new_dbc->internal; + if ((ret = __memp_fget(new_dbc->dbp->mpf, &cp->root, + new_dbc->thread_info, new_dbc->txn, 0, &h)) != 0) + goto c_err; + + elems = NUM_ENT(h); + levels = LEVEL(h); + if (levels == 1) + elems /= 2; + + if ((ret = __memp_fput(new_dbc->dbp->mpf, + new_dbc->thread_info, h, new_dbc->priority)) != 0) + goto c_err; + + if ((ret = __dbc_close(new_dbc)) != 0) + goto err; + + /* If the tree is empty, ignore it. */ + if (elems == 0) { + empty++; + continue; + } + + /* + * If a tree has fewer levels than the max just count + * it as a single element in the higher level. + */ + if (id < part_id) { + if (levels > max_levels) { + max_levels = levels; + less_elems = id + elems - empty; + } else if (levels < max_levels) + less_elems++; + else + less_elems += elems; + } else { + if (levels > max_levels) { + max_levels = levels; + greater_elems = (id - part_id) + elems - empty; + } else if (levels < max_levels) + greater_elems++; + else + greater_elems += elems; + } + + } + + if (my_levels < max_levels) { + /* + * The subtree containing the key is not the tallest one. + * Reduce its share by the number of records at the highest + * level. Scale the greater and lesser components up + * by the number of records on either side of this + * subtree. + */ + total_elems = 1 + greater_elems + less_elems; + kp->equal /= total_elems; + kp->less /= total_elems; + kp->less += less_elems/total_elems; + kp->greater /= total_elems; + kp->greater += greater_elems/total_elems; + } else if (my_levels == max_levels) { + /* + * The key is in one of the tallest subtrees. We will + * scale the values by the ratio of the records at the + * top of this stubtree to the number of records at the + * highest level. + */ + total_elems = greater_elems + less_elems; + if (total_elems != 0) { + /* + * First scale down by the fraction of elements + * in this subtree. + */ + total_elems += my_elems; + kp->equal *= my_elems; + kp->equal /= total_elems; + kp->less *= my_elems; + kp->less /= total_elems; + kp->greater *= my_elems; + kp->greater /= total_elems; + /* + * Proportially add weight from the subtrees to the + * left and right of this one. + */ + kp->less += less_elems / total_elems; + kp->greater += greater_elems / total_elems; + } + } + + if (0) { +c_err: (void)__dbc_close(new_dbc); + } + +err: return (ret); +} + +/* + * __part_remove -- + * Remove method for a partitioned database. + * + * PUBLIC: int __part_remove __P((DB *, DB_THREAD_INFO *, + * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t)); + */ +int +__part_remove(dbp, ip, txn, name, subdb, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name, *subdb; + u_int32_t flags; +{ + return (__part_rr(dbp, ip, txn, name, subdb, NULL, flags)); +} + +/* + * __part_rename -- + * Rename method for a partitioned database. + * + * PUBLIC: int __part_rename __P((DB *, DB_THREAD_INFO *, + * PUBLIC: DB_TXN *, const char *, const char *, const char *)); + */ +int +__part_rename(dbp, ip, txn, name, subdb, newname) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name, *subdb, *newname; +{ + return (__part_rr(dbp, ip, txn, name, subdb, newname, 0)); +} + +/* + * __part_rr -- + * Remove/Rename method for a partitioned database. + */ +static int +__part_rr(dbp, ip, txn, name, subdb, newname, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name, *subdb, *newname; + u_int32_t flags; +{ + DB **pdbp, *ptmpdbp, *tmpdbp; + DB_PARTITION *part; + ENV *env; + u_int32_t i; + int ret, t_ret; + char *np; + + env = dbp->env; + ret = 0; + + if (subdb != NULL && name != NULL) { + __db_errx(env, DB_STR("0663", + "A partitioned database can not be in a multiple databases file")); + return (EINVAL); + } + ENV_GET_THREAD_INFO(env, ip); + + /* + * Since rename no longer opens the database, we have + * to do it here. + */ + if ((ret = __db_create_internal(&tmpdbp, env, 0)) != 0) + return (ret); + + /* + * We need to make sure we don't self-deadlock, so give + * this dbp the same locker as the incoming one. + */ + tmpdbp->locker = dbp->locker; + if ((ret = __db_open(tmpdbp, ip, txn, name, NULL, dbp->type, + DB_RDWRMASTER | DB_RDONLY, 0, PGNO_BASE_MD)) != 0) + goto err; + + part = tmpdbp->p_internal; + pdbp = part->handles; + COMPQUIET(np, NULL); + if (newname != NULL && (ret = __os_malloc(env, + strlen(newname) + PART_LEN + 1, &np)) != 0) { + __db_errx(env, Alloc_err, strlen(newname) + PART_LEN + 1); + goto err; + } + for (i = 0; i < part->nparts; i++, pdbp++) { + if ((ret = __db_create_internal(&ptmpdbp, env, 0)) != 0) + break; + ptmpdbp->locker = (*pdbp)->locker; + if (newname == NULL) + ret = __db_remove_int(ptmpdbp, + ip, txn, (*pdbp)->fname, NULL, flags); + else { + DB_ASSERT(env, np != NULL); + (void)sprintf(np, PART_NAME, newname, i); + ret = __db_rename_int(ptmpdbp, + ip, txn, (*pdbp)->fname, NULL, np, flags); + } + ptmpdbp->locker = NULL; + (void)__db_close(ptmpdbp, NULL, DB_NOSYNC); + if (ret != 0) + break; + } + + if (newname != NULL) + __os_free(env, np); + + if (!F_ISSET(dbp, DB_AM_OPEN_CALLED)) { +err: /* + * Since we copied the locker ID from the dbp, we'd better not + * free it here. + */ + tmpdbp->locker = NULL; + + /* We need to remove the lock event we associated with this. */ + if (txn != NULL) + __txn_remlock(env, + txn, &tmpdbp->handle_lock, DB_LOCK_INVALIDID); + + if ((t_ret = __db_close(tmpdbp, + txn, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + } + return (ret); +} +#ifdef HAVE_VERIFY +/* + * __part_verify -- + * Verify a partitioned database. + * + * PUBLIC: int __part_verify __P((DB *, VRFY_DBINFO *, const char *, + * PUBLIC: void *, int (*)(void *, const void *), u_int32_t)); + */ +int +__part_verify(dbp, vdp, fname, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + const char *fname; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + BINTERNAL *lp, *rp; + DB **pdbp; + DB_PARTITION *part; + DBC *dbc; + DBT *key; + ENV *env; + DB_THREAD_INFO *ip; + u_int32_t i; + int ret, t_ret; + + env = dbp->env; + lp = rp = NULL; + dbc = NULL; + ip = vdp->thread_info; + + if (dbp->type == DB_BTREE) { + if ((ret = __bam_open(dbp, ip, + NULL, fname, PGNO_BASE_MD, flags)) != 0) + goto err; + } +#ifdef HAVE_HASH + else if ((ret = __ham_open(dbp, ip, + NULL, fname, PGNO_BASE_MD, flags)) != 0) + goto err; +#endif + + /* + * Initalize partition db handles and get the names. Set DB_RDWRMASTER + * because we may not have the partition callback, but we can still + * look at the structure of the tree. + */ + if ((ret = __partition_open(dbp, + ip, NULL, fname, dbp->type, flags | DB_RDWRMASTER, 0, 0)) != 0) + goto err; + part = dbp->p_internal; + + if (LF_ISSET(DB_SALVAGE)) { + /* If we are being aggressive we don't want to dump the keys. */ + if (LF_ISSET(DB_AGGRESSIVE)) + dbp->p_internal = NULL; + ret = __db_prheader(dbp, + NULL, 0, 0, handle, callback, vdp, PGNO_BASE_MD); + dbp->p_internal = part; + if (ret != 0) + goto err; + } + + if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0) + goto err; + + pdbp = part->handles; + for (i = 0; i < part->nparts; i++, pdbp++) { + if (!F_ISSET(part, PART_RANGE) || part->keys == NULL) + goto vrfy; + if (lp != NULL) + __os_free(env, lp); + lp = rp; + rp = NULL; + if (i + 1 < part->nparts) { + key = &part->keys[i + 1]; + if ((ret = __os_malloc(env, + BINTERNAL_SIZE(key->size), &rp)) != 0) + goto err; + rp->len = key->size; + memcpy(rp->data, key->data, key->size); + B_TSET(rp->type, B_KEYDATA); + } +vrfy: if ((t_ret = __db_verify(*pdbp, ip, (*pdbp)->fname, + NULL, handle, callback, + lp, rp, flags | DB_VERIFY_PARTITION)) != 0 && ret == 0) + ret = t_ret; + } + +err: if (lp != NULL) + __os_free(env, lp); + if (rp != NULL) + __os_free(env, rp); + return (ret); +} +#endif + +#ifdef CONFIG_TEST +/* + * __part_testdocopy -- copy all partitions for testing purposes. + * + * PUBLIC: int __part_testdocopy __P((DB *, const char *)); + */ +int +__part_testdocopy(dbp, name) + DB *dbp; + const char *name; +{ + DB **pdbp; + DB_PARTITION *part; + u_int32_t i; + int ret; + + if ((ret = __db_testdocopy(dbp->env, name)) != 0) + return (ret); + + part = dbp->p_internal; + pdbp = part->handles; + for (i = 0; i < part->nparts; i++, pdbp++) + if ((ret = __db_testdocopy(dbp->env, (*pdbp)->fname)) != 0) + return (ret); + + return (0); +} +#endif +#else +/* + * __db_nopartition -- + * Error when a Berkeley DB build doesn't include partitioning. + * + * PUBLIC: int __db_no_partition __P((ENV *)); + */ +int +__db_no_partition(env) + ENV *env; +{ + __db_errx(env, DB_STR("0664", + "library build did not include support for the database partitioning")); + return (DB_OPNOTSUP); +} +/* + * __partition_set -- + * Set the partitioning keys or callback function. + * This routine must be called prior to creating the database. + * PUBLIC: int __partition_set __P((DB *, u_int32_t, DBT *, + * PUBLIC: u_int32_t (*callback)(DB *, DBT *key))); + */ + +int +__partition_set(dbp, parts, keys, callback) + DB *dbp; + u_int32_t parts; + DBT *keys; + u_int32_t (*callback)(DB *, DBT *key); +{ + COMPQUIET(parts, 0); + COMPQUIET(keys, NULL); + COMPQUIET(callback, NULL); + + return (__db_no_partition(dbp->env)); +} + +/* + * __partition_get_callback -- + * Set the partition callback function. This routine must be called + * prior to opening a partition database that requires a function. + * PUBLIC: int __partition_get_callback __P((DB *, + * PUBLIC: u_int32_t *, u_int32_t (**callback)(DB *, DBT *key))); + */ +int +__partition_get_callback(dbp, parts, callback) + DB *dbp; + u_int32_t *parts; + u_int32_t (**callback)(DB *, DBT *key); +{ + COMPQUIET(parts, NULL); + COMPQUIET(callback, NULL); + + return (__db_no_partition(dbp->env)); +} + +/* + * __partition_get_dirs -- + * Get partition dirs. + * PUBLIC: int __partition_get_dirs __P((DB *, const char ***)); + */ +int +__partition_get_dirs(dbp, dirpp) + DB *dbp; + const char ***dirpp; +{ + COMPQUIET(dirpp, NULL); + return (__db_no_partition(dbp->env)); +} + +/* + * __partition_get_keys -- + * Get partition keys. + * PUBLIC: int __partition_get_keys __P((DB *, u_int32_t *, DBT **)); + */ +int +__partition_get_keys(dbp, parts, keys) + DB *dbp; + u_int32_t *parts; + DBT **keys; +{ + COMPQUIET(parts, NULL); + COMPQUIET(keys, NULL); + + return (__db_no_partition(dbp->env)); +} +/* + * __partition_init -- + * Initialize the partition structure. + * Called when the meta data page is read in during database open or + * when partition keys or a callback are set. + * + * PUBLIC: int __partition_init __P((DB *, u_int32_t)); + */ +int +__partition_init(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + + return (__db_no_partition(dbp->env)); +} +/* + * __part_fileid_reset -- + * reset the fileid on each partition. + * + * PUBLIC: int __part_fileid_reset + * PUBLIC: __P((ENV *, DB_THREAD_INFO *, const char *, u_int32_t, int)); + */ +int +__part_fileid_reset(env, ip, fname, nparts, encrypted) + ENV *env; + DB_THREAD_INFO *ip; + const char *fname; + u_int32_t nparts; + int encrypted; +{ + COMPQUIET(ip, NULL); + COMPQUIET(fname, NULL); + COMPQUIET(nparts, 0); + COMPQUIET(encrypted, 0); + + return (__db_no_partition(env)); +} +/* + * __partition_set_dirs -- + * Set the directories for creating the partition databases. + * They must be in the environment. + * PUBLIC: int __partition_set_dirs __P((DB *, const char **)); + */ +int +__partition_set_dirs(dbp, dirp) + DB *dbp; + const char **dirp; +{ + COMPQUIET(dirp, NULL); + + return (__db_no_partition(dbp->env)); +} +#endif diff --git a/src/dbinc/atomic.h b/src/dbinc/atomic.h new file mode 100644 index 00000000..1a96e2af --- /dev/null +++ b/src/dbinc/atomic.h @@ -0,0 +1,220 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_ATOMIC_H_ +#define _DB_ATOMIC_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Atomic operation support for Oracle Berkeley DB + * + * HAVE_ATOMIC_SUPPORT configures whether to use the assembly language + * or system calls to perform: + * + * atomic_inc(env, valueptr) + * Adds 1 to the db_atomic_t value, returning the new value. + * + * atomic_dec(env, valueptr) + * Subtracts 1 from the db_atomic_t value, returning the new value. + * + * atomic_compare_exchange(env, valueptr, oldval, newval) + * If the db_atomic_t's value is still oldval, set it to newval. + * It returns 1 for success or 0 for failure. + * + * The ENV * paramter is used only when HAVE_ATOMIC_SUPPORT is undefined. + * + * If the platform does not natively support any one of these operations, + * then atomic operations will be emulated with this sequence: + * MUTEX_LOCK() + * + * MUTEX_UNLOCK(); + * Uses where mutexes are not available (e.g. the environment has not yet + * attached to the mutex region) must be avoided. + */ +#if defined(DB_WIN32) +typedef DWORD atomic_value_t; +#else +typedef int32_t atomic_value_t; +#endif + +/* + * Windows CE has strange issues using the Interlocked APIs with variables + * stored in shared memory. It seems like the page needs to have been written + * prior to the API working as expected. Work around this by allocating an + * additional 32-bit value that can be harmlessly written for each value + * used in Interlocked instructions. + */ +#if defined(DB_WINCE) +typedef struct { + volatile atomic_value_t value; + volatile atomic_value_t dummy; +} db_atomic_t; +#else +typedef struct { + volatile atomic_value_t value; +} db_atomic_t; +#endif + +/* + * These macro hide the db_atomic_t structure layout and help detect + * non-atomic_t actual argument to the atomic_xxx() calls. DB requires + * aligned 32-bit reads to be atomic even outside of explicit 'atomic' calls. + * These have no memory barriers; the caller must include them when necessary. + */ +#define atomic_read(p) ((p)->value) +#define atomic_init(p, val) ((p)->value = (val)) + +#ifdef HAVE_ATOMIC_SUPPORT + +#if defined(DB_WIN32) +#if defined(DB_WINCE) +#define WINCE_ATOMIC_MAGIC(p) \ + /* \ + * Memory mapped regions on Windows CE cause problems with \ + * InterlockedXXX calls. Each page in a mapped region needs to \ + * have been written to prior to an InterlockedXXX call, or the \ + * InterlockedXXX call hangs. This does not seem to be \ + * documented anywhere. For now, read/write a non-critical \ + * piece of memory from the shared region prior to attempting \ + * shared region prior to attempting an InterlockedExchange \ + * InterlockedXXX operation. \ + */ \ + (p)->dummy = 0 +#else +#define WINCE_ATOMIC_MAGIC(p) 0 +#endif + +#if defined(DB_WINCE) || (defined(_MSC_VER) && _MSC_VER < 1300) +/* + * The Interlocked instructions on Windows CE have different parameter + * definitions. The parameters lost their 'volatile' qualifier, + * cast it away, to avoid compiler warnings. + * These definitions should match those in dbinc/mutex_int.h for tsl_t, except + * that the WINCE version drops the volatile qualifier. + */ +typedef PLONG interlocked_val; +#define atomic_inc(env, p) \ + (WINCE_ATOMIC_MAGIC(p), \ + InterlockedIncrement((interlocked_val)(&(p)->value))) + +#else +typedef LONG volatile *interlocked_val; +#define atomic_inc(env, p) \ + InterlockedIncrement((interlocked_val)(&(p)->value)) +#endif + +#define atomic_dec(env, p) \ + (WINCE_ATOMIC_MAGIC(p), \ + InterlockedDecrement((interlocked_val)(&(p)->value))) +#if defined(_MSC_VER) && _MSC_VER < 1300 +#define atomic_compare_exchange(env, p, oldval, newval) \ + (WINCE_ATOMIC_MAGIC(p), \ + (InterlockedCompareExchange((PVOID *)(&(p)->value), \ + (PVOID)(newval), (PVOID)(oldval)) == (PVOID)(oldval))) +#else +#define atomic_compare_exchange(env, p, oldval, newval) \ + (WINCE_ATOMIC_MAGIC(p), \ + (InterlockedCompareExchange((interlocked_val)(&(p)->value), \ + (newval), (oldval)) == (oldval))) +#endif +#endif + +#if defined(HAVE_ATOMIC_SOLARIS) +/* Solaris sparc & x86/64 */ +#include +#define atomic_inc(env, p) \ + atomic_inc_uint_nv((volatile unsigned int *) &(p)->value) +#define atomic_dec(env, p) \ + atomic_dec_uint_nv((volatile unsigned int *) &(p)->value) +#define atomic_compare_exchange(env, p, oval, nval) \ + (atomic_cas_32((volatile unsigned int *) &(p)->value, \ + (oval), (nval)) == (oval)) +#endif + +#if defined(HAVE_ATOMIC_X86_GCC_ASSEMBLY) +/* x86/x86_64 gcc */ +#define atomic_inc(env, p) __atomic_inc(p) +#define atomic_dec(env, p) __atomic_dec(p) +#define atomic_compare_exchange(env, p, o, n) \ + __atomic_compare_exchange((p), (o), (n)) +static inline int __atomic_inc(db_atomic_t *p) +{ + int temp; + + temp = 1; + __asm__ __volatile__("lock; xadd %0, (%1)" + : "+r"(temp) + : "r"(p)); + return (temp + 1); +} + +static inline int __atomic_dec(db_atomic_t *p) +{ + int temp; + + temp = -1; + __asm__ __volatile__("lock; xadd %0, (%1)" + : "+r"(temp) + : "r"(p)); + return (temp - 1); +} + +/* + * x86/gcc Compare exchange for shared latches. i486+ + * Returns 1 for success, 0 for failure + * + * GCC 4.1+ has an equivalent __sync_bool_compare_and_swap() as well as + * __sync_val_compare_and_swap() which returns the value read from *dest + * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html + * which configure could be changed to use. + */ +static inline int __atomic_compare_exchange( + db_atomic_t *p, atomic_value_t oldval, atomic_value_t newval) +{ + atomic_value_t was; + + if (p->value != oldval) /* check without expensive cache line locking */ + return 0; + __asm__ __volatile__("lock; cmpxchgl %1, (%2);" + :"=a"(was) + :"r"(newval), "r"(p), "a"(oldval) + :"memory", "cc"); + return (was == oldval); +} +#endif + +#else +/* + * No native hardware support for atomic increment, decrement, and + * compare-exchange. Emulate them when mutexes are supported; + * do them without concern for atomicity when no mutexes. + */ +#ifndef HAVE_MUTEX_SUPPORT +/* + * These minimal versions are correct to use only for single-threaded, + * single-process environments. + */ +#define atomic_inc(env, p) (++(p)->value) +#define atomic_dec(env, p) (--(p)->value) +#define atomic_compare_exchange(env, p, oldval, newval) \ + (DB_ASSERT(env, atomic_read(p) == (oldval)), \ + atomic_init(p, (newval)), 1) +#else +#define atomic_inc(env, p) __atomic_inc(env, p) +#define atomic_dec(env, p) __atomic_dec(env, p) +#endif +#endif + +#if defined(__cplusplus) +} +#endif + +#endif /* !_DB_ATOMIC_H_ */ diff --git a/src/dbinc/btree.h b/src/dbinc/btree.h new file mode 100644 index 00000000..6dde03d9 --- /dev/null +++ b/src/dbinc/btree.h @@ -0,0 +1,553 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ +#ifndef _DB_BTREE_H_ +#define _DB_BTREE_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Forward structure declarations. */ +struct __btree; typedef struct __btree BTREE; +struct __cursor; typedef struct __cursor BTREE_CURSOR; +struct __epg; typedef struct __epg EPG; + +#define DEFMINKEYPAGE (2) + +/* + * A recno order of 0 indicates that we don't have an order, not that we've + * an order less than 1. + */ +#define INVALID_ORDER 0 + +#define ISINTERNAL(p) (TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO) +#define ISLEAF(p) (TYPE(p) == P_LBTREE || \ + TYPE(p) == P_LRECNO || TYPE(p) == P_LDUP) + +/* Flags for __bam_cadjust_log(). */ +#define CAD_UPDATEROOT 0x01 /* Root page count was updated. */ + +/* Flags for __bam_split_log(). */ +#define SPL_NRECS 0x01 /* Split tree has record count. */ +#define SPL_RECNO 0x02 /* This is a Recno cursor. */ + +/* Flags for __bam_iitem(). */ +#define BI_DELETED 0x01 /* Key/data pair only placeholder. */ + +/* Flags for __bam_stkrel(). */ +#define STK_CLRDBC 0x01 /* Clear dbc->page reference. */ +#define STK_NOLOCK 0x02 /* Don't retain locks. */ +#define STK_PGONLY 0x04 + +/* Flags for __ram_ca(). These get logged, so make the values explicit. */ +typedef enum { + CA_DELETE = 0, /* Delete the current record. */ + CA_IAFTER = 1, /* Insert before the current record. */ + CA_IBEFORE = 2, /* Insert after the current record. */ + CA_ICURRENT = 3 /* Overwrite the current record. */ +} ca_recno_arg; + +/* + * Flags for __bam_search() and __bam_rsearch(). + * + * Note, internal page searches must find the largest record less than key in + * the tree so that descents work. Leaf page searches must find the smallest + * record greater than key so that the returned index is the record's correct + * position for insertion. + * + * The flags parameter to the search routines describes three aspects of the + * search: the type of locking required (including if we're locking a pair of + * pages), the item to return in the presence of duplicates and whether or not + * to return deleted entries. To simplify both the mnemonic representation + * and the code that checks for various cases, we construct a set of bitmasks. + */ +#define SR_READ 0x00001 /* Read locks. */ +#define SR_WRITE 0x00002 /* Write locks. */ + +#define SR_APPEND 0x00040 /* Append to the tree. */ +#define SR_DELNO 0x00080 /* Don't return deleted items. */ +#define SR_DUPFIRST 0x00100 /* Return first duplicate. */ +#define SR_DUPLAST 0x00200 /* Return last duplicate. */ +#define SR_EXACT 0x00400 /* Exact items only. */ +#define SR_PARENT 0x00800 /* Lock page pair. */ +#define SR_STACK 0x01000 /* Need a complete stack. */ +#define SR_PAST_EOF 0x02000 /* If doing insert search (or keyfirst + * or keylast operations), or a split + * on behalf of an insert, it's okay to + * return an entry one past end-of-page. + */ +#define SR_STK_ONLY 0x04000 /* Just return info in the stack */ +#define SR_MAX 0x08000 /* Get the right most key */ +#define SR_MIN 0x10000 /* Get the left most key */ +#define SR_NEXT 0x20000 /* Get the page after this key */ +#define SR_DEL 0x40000 /* Get the tree to delete this key. */ +#define SR_START 0x80000 /* Level to start stack. */ +#define SR_BOTH 0x100000 /* Get this and the NEXT page */ + +#define SR_DELETE \ + (SR_WRITE | SR_DUPFIRST | SR_DELNO | SR_EXACT | SR_STACK) +#define SR_FIND (SR_READ | SR_DUPFIRST | SR_DELNO) +#define SR_FIND_WR (SR_WRITE | SR_DUPFIRST | SR_DELNO) +#define SR_INSERT (SR_WRITE | SR_DUPLAST | SR_PAST_EOF | SR_STACK) +#define SR_KEYFIRST (SR_WRITE | SR_DUPFIRST | SR_PAST_EOF | SR_STACK) +#define SR_KEYLAST (SR_WRITE | SR_DUPLAST | SR_PAST_EOF | SR_STACK) +#define SR_WRPAIR (SR_WRITE | SR_DUPLAST | SR_PAST_EOF | SR_PARENT) + +/* + * Various routines pass around page references. A page reference is + * a pointer to the page, and the indx indicates an item on the page. + * Each page reference may include a lock. + */ +struct __epg { + PAGE *page; /* The page. */ + db_indx_t indx; /* The index on the page. */ + db_indx_t entries; /* The number of entries on page */ + DB_LOCK lock; /* The page's lock. */ + db_lockmode_t lock_mode; /* The lock mode. */ +}; + +/* + * We maintain a stack of the pages that we're locking in the tree. Grow + * the stack as necessary. + * + * XXX + * Temporary fix for #3243 -- clear the page and lock from the stack entry. + * The correct fix is to never release a stack that doesn't hold items. + */ +#define BT_STK_CLR(c) do { \ + (c)->csp = (c)->sp; \ + (c)->csp->page = NULL; \ + LOCK_INIT((c)->csp->lock); \ +} while (0) + +#define BT_STK_ENTER(env, c, pagep, page_indx, l, mode, ret) do { \ + if ((ret = ((c)->csp == (c)->esp ? \ + __bam_stkgrow(env, c) : 0)) == 0) { \ + (c)->csp->page = pagep; \ + (c)->csp->indx = (page_indx); \ + (c)->csp->entries = NUM_ENT(pagep); \ + (c)->csp->lock = l; \ + (c)->csp->lock_mode = mode; \ + } \ +} while (0) + +#define BT_STK_PUSH(env, c, pagep, page_indx, lock, mode, ret) do { \ + BT_STK_ENTER(env, c, pagep, page_indx, lock, mode, ret); \ + ++(c)->csp; \ +} while (0) + +#define BT_STK_NUM(env, c, pagep, page_indx, ret) do { \ + if ((ret = ((c)->csp == \ + (c)->esp ? __bam_stkgrow(env, c) : 0)) == 0) { \ + (c)->csp->page = NULL; \ + (c)->csp->indx = (page_indx); \ + (c)->csp->entries = NUM_ENT(pagep); \ + LOCK_INIT((c)->csp->lock); \ + (c)->csp->lock_mode = DB_LOCK_NG; \ + } \ +} while (0) + +#define BT_STK_NUMPUSH(env, c, pagep, page_indx, ret) do { \ + BT_STK_NUM(env, cp, pagep, page_indx, ret); \ + ++(c)->csp; \ +} while (0) + +#define BT_STK_POP(c) \ + ((c)->csp == (c)->sp ? NULL : --(c)->csp) + +/* + * Flags for __bam_dpages. + */ +#define BTD_UPDATE 0x0001 /* Update parents. */ +#define BTD_RELINK 0x0002 /* Relink leaf pages. */ + +/* + * TRY_LOCK + * When holding a stack we have pages latched but not locked so + * we must avoid an undetectable deadlock by not then blocking on a + * lock. + */ +#define TRY_LOCK(dbc, pgno, saved_pgno, saved_lock, lock_mode, label) \ + TRY_LOCK2(dbc, NULL, pgno, saved_pgno, saved_lock, lock_mode, label) +/* + * TRY_LOCK2 + * This is a special call for __bam_compact_int which uses 2 + * overlapping stacks. + */ + +#ifdef BTREE_DEBUG +#define TRY_LOCK2(dbc, ndbc, pgno, \ + saved_pgno, saved_lock, lock_mode, label) do { \ + static int BTcount = 0; \ + if ((pgno) != (saved_pgno) && \ + ((BTcount++ % 5) == 0 || \ + (ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, pgno, \ + lock_mode, DB_LOCK_NOWAIT, &(saved_lock))) != 0)) { \ + if (ret != 0 && ret != DB_LOCK_NOTGRANTED && \ + ret != DB_LOCK_DEADLOCK) \ + break; \ + if ((ndbc) != NULL) { \ + BTREE_CURSOR *__cp; \ + __cp = (BTREE_CURSOR *) (dbc)->internal; \ + __cp->sp->page = NULL; \ + LOCK_INIT(__cp->sp->lock); \ + if ((ret = __bam_stkrel(ndbc, 0)) != 0) \ + break; \ + } \ + if ((ret = __bam_stkrel(dbc, 0)) != 0) \ + break; \ + if ((ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, pgno, \ + lock_mode, 0, &(saved_lock))) != 0) \ + break; \ + saved_pgno = pgno; \ + goto label; \ + } \ + saved_pgno = pgno; \ +} while (0) +#else +#define TRY_LOCK2(dbc, ndbc, pgno, \ + saved_pgno, saved_lock, lock_mode, label) do { \ + if ((pgno) != (saved_pgno) && \ + (ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, pgno, \ + lock_mode, DB_LOCK_NOWAIT, &(saved_lock))) != 0) { \ + if (ret != DB_LOCK_NOTGRANTED && \ + ret != DB_LOCK_DEADLOCK) \ + break; \ + if ((ndbc) != NULL) { \ + BTREE_CURSOR *__cp; \ + __cp = (BTREE_CURSOR *) (dbc)->internal; \ + __cp->sp->page = NULL; \ + LOCK_INIT(__cp->sp->lock); \ + if ((ret = __bam_stkrel(ndbc, 0)) != 0) \ + break; \ + } \ + if ((ret = __bam_stkrel(dbc, 0)) != 0) \ + break; \ + if ((ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, pgno, \ + lock_mode, 0, &(saved_lock))) != 0) \ + break; \ + saved_pgno = pgno; \ + goto label; \ + } \ + saved_pgno = pgno; \ +} while (0) +#endif + +/* Btree/Recno cursor. */ +struct __cursor { + /* struct __dbc_internal */ + __DBC_INTERNAL + + /* btree private part */ + EPG *sp; /* Stack pointer. */ + EPG *csp; /* Current stack entry. */ + EPG *esp; /* End stack pointer. */ + EPG stack[5]; + + db_indx_t ovflsize; /* Maximum key/data on-page size. */ + + db_recno_t recno; /* Current record number. */ + u_int32_t order; /* Relative order among deleted curs. */ + +#ifdef HAVE_COMPRESSION + /* + * Compression: + * + * We need to hold the current compressed chunk, as well as the previous + * key/data, in order to decompress the next key/data. We do that by + * swapping whether prevKey/Data and currentKey/Data point to + * key1/data1, or key2/data2. + * + * We store prevcursor in order to be able to perform one level of + * DB_PREV by returning prevKey/prevData. We need prev2cursor to more + * efficiently do a subsequent DB_PREV with a linear search from the + * begining of the compressed chunk. + * + * When we delete entries, we set the cursor to point to the next entry + * after the last deleted key, and set C_COMPRESS_DELETED. The del_key + * DBT holds the key of the deleted entry supposedly pointed to by a + * compressed cursor, and is used to implement DB_PREV_DUP, + * DB_PREV_NODUP, DB_NEXT_DUP, and DB_NEXT_NODUP on a deleted entry. + */ + DBT compressed; /* Current compressed chunk */ + DBT key1; /* Holds prevKey or currentKey */ + DBT key2; /* Holds prevKey or currentKey */ + DBT data1; /* Holds prevData or currentData */ + DBT data2; /* Holds prevData or currentData */ + DBT del_key; /* Holds key from the deleted entry */ + DBT del_data; /* Holds data from the deleted entry */ + DBT *prevKey; /* Previous key decompressed */ + DBT *prevData; /* Previous data decompressed */ + DBT *currentKey; /* Current key decompressed */ + DBT *currentData; /* Current data decompressed */ + u_int8_t *compcursor; /* Current position in compressed */ + u_int8_t *compend; /* End of compressed */ + u_int8_t *prevcursor; /* Previous current position */ + u_int8_t *prev2cursor; /* Previous previous current position */ +#endif + + /* + * Btree: + * We set a flag in the cursor structure if the underlying object has + * been deleted. It's not strictly necessary, we could get the same + * information by looking at the page itself, but this method doesn't + * require us to retrieve the page on cursor delete. + * + * Recno: + * When renumbering recno databases during deletes, cursors referencing + * "deleted" records end up positioned between two records, and so must + * be specially adjusted on the next operation. + */ +#define C_DELETED 0x0001 /* Record was deleted. */ + /* + * There are three tree types that require maintaining record numbers. + * Recno AM trees, Btree AM trees for which the DB_RECNUM flag was set, + * and Btree off-page duplicate trees. + */ +#define C_RECNUM 0x0002 /* Tree requires record counts. */ + /* + * Recno trees have immutable record numbers by default, but optionally + * support mutable record numbers. Off-page duplicate Recno trees have + * mutable record numbers. All Btrees with record numbers (including + * off-page duplicate trees) are mutable by design, no flag is needed. + */ +#define C_RENUMBER 0x0004 /* Tree records are mutable. */ + /* + * The current compressed key/data could be deleted, as well as the + * key/data that the underlying BTree cursor points to. + */ +#define C_COMPRESS_DELETED 0x0008 /* Compressed record was deleted. */ + /* + * The current compressed chunk has been modified by another DBC. A + * compressed cursor will have to seek it's position again if necessary + * when it is next accessed. + */ +#define C_COMPRESS_MODIFIED 0x0010 /* Compressed record was modified. */ + u_int32_t flags; +}; + +/* + * Threshhold value, as a function of bt_minkey, of the number of + * bytes a key/data pair can use before being placed on an overflow + * page. Assume every item requires the maximum alignment for + * padding, out of sheer paranoia. + */ +#define B_MINKEY_TO_OVFLSIZE(dbp, minkey, pgsize) \ + ((u_int16_t)(((pgsize) - P_OVERHEAD(dbp)) / ((minkey) * P_INDX) -\ + (BKEYDATA_PSIZE(0) + DB_ALIGN(1, sizeof(int32_t))))) + +/* + * The maximum space that a single item can ever take up on one page. + * Used by __bam_split to determine whether a split is still necessary. + */ +#define B_MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define B_MAXSIZEONPAGE(ovflsize) \ + (B_MAX(BOVERFLOW_PSIZE, BKEYDATA_PSIZE(ovflsize))) + +/* + * BAM_GET_ROOT -- + * This macro is used to isolate the fact that the root page of + * a subdatabase may move if DB->compact is called on it. + * The dbp->mpf->mfp->revision will be incremented every time + * a subdatabase root or meta page moves. If this is the case then + * we must call __db_reopen to read the master database to find it. + * We leave the loop only by breaking out if we do not have a subdb + * or we are sure the have the right revision. + * + * It must be guranteed that we cannot read an old root pgno and a + * current revision number. We note that the global revision number + * and DB handle information are only updated while holding the latches + * and locks of the master database pages. + * If another thread is sychronizing the DB handle with the master + * database it will exclusively latch both the old and new pages so we will + * sychronize on that. + */ +#define BAM_GET_ROOT(dbc, root_pgno, \ + page, get_mode, lock_mode, lock, ret) do { \ + BTREE *__t = (dbc)->dbp->bt_internal; \ + BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \ + db_pgno_t __root; \ + u_int32_t __rev = 0; \ + if ((root_pgno) == PGNO_INVALID) { \ + if (__cp->root == PGNO_INVALID) { \ + __root = __t->bt_root; \ + __rev = __t->revision; \ + } else \ + __root = root_pgno = __cp->root; \ + } else \ + __root = root_pgno; \ + if (STD_LOCKING(dbc) && \ + ((lock_mode) == DB_LOCK_WRITE || F_ISSET(dbc, DBC_DOWNREV) \ + || dbc->dbtype == DB_RECNO || F_ISSET(__cp, C_RECNUM)) && \ + (ret = \ + __db_lget(dbc, 0, __root, lock_mode, 0, &(lock))) != 0) \ + break; \ + if ((ret = __memp_fget((dbc)->dbp->mpf, &__root, \ + (dbc)->thread_info, dbc->txn, get_mode, &page)) == 0) { \ + if (__root == root_pgno) \ + break; \ + if (F_ISSET(dbc, DBC_OPD) || \ + !F_ISSET((dbc)->dbp, DB_AM_SUBDB) || \ + (__t->bt_root == __root && \ + (LEVEL(page) == LEAFLEVEL || TYPE(page) == \ + (dbc->dbtype == DB_BTREE ? P_IBTREE : P_IRECNO)) &&\ + __rev == (dbc)->dbp->mpf->mfp->revision)) { \ + root_pgno = __root; \ + break; \ + } \ + if ((ret = __memp_fput((dbc)->dbp->mpf, \ + (dbc)->thread_info, page, (dbc)->priority)) != 0) \ + break; \ + } else if (ret != DB_PAGE_NOTFOUND) \ + break; \ + if ((ret = __LPUT(dbc, lock)) != 0) \ + break; \ + if ((ret = __db_reopen(dbc)) != 0) \ + break; \ +} while (1) + +/* + * Return the root of this tree. If this is an off page duplicate tree + * then its in the cursor, otherwise we must look in the db handle. + */ +#define BAM_ROOT_PGNO(dbc) \ + (((BTREE_CURSOR *)(dbc)->internal)->root == PGNO_INVALID ? \ + ((BTREE*)(dbc)->dbp->bt_internal)->bt_root : \ + ((BTREE_CURSOR *)(dbc)->internal)->root) + + + +/* + * The in-memory, per-tree btree/recno data structure. + */ +struct __btree { /* Btree access method. */ + /* + * These fields may change if this is a subdatabase and + * it gets compacted. + */ + db_pgno_t bt_meta; /* Database meta-data page. */ + db_pgno_t bt_root; /* Database root page. */ + u_int32_t revision; /* Revision of root/meta. */ + + u_int32_t bt_minkey; /* Minimum keys per page. */ + + /* Btree comparison function. */ + int (*bt_compare) __P((DB *, const DBT *, const DBT *)); + /* Btree prefix function. */ + size_t (*bt_prefix) __P((DB *, const DBT *, const DBT *)); + /* Btree compress function. */ +#ifdef HAVE_COMPRESSION + int (*bt_compress) __P((DB *, const DBT *, const DBT *, const DBT *, + const DBT *, DBT *)); + /* Btree decompress function. */ + int (*bt_decompress) __P((DB *, const DBT *, const DBT *, DBT *, DBT *, + DBT *)); + /* dup_compare for compression */ + int (*compress_dup_compare) __P((DB *, const DBT *, const DBT *)); +#endif + + /* Recno access method. */ + int re_pad; /* Fixed-length padding byte. */ + int re_delim; /* Variable-length delimiting byte. */ + u_int32_t re_len; /* Length for fixed-length records. */ + char *re_source; /* Source file name. */ + + /* + * !!! + * The bt_lpgno field is NOT protected by any mutex, and for this + * reason must be advisory only, so, while it is read/written by + * multiple threads, DB is completely indifferent to the quality + * of its information. + */ + db_pgno_t bt_lpgno; /* Last insert location. */ + DB_LSN bt_llsn; /* Last insert LSN. */ + + /* + * !!! + * The re_modified field is NOT protected by any mutex, and for this + * reason cannot be anything more complicated than a zero/non-zero + * value. The actual writing of the backing source file cannot be + * threaded, so clearing the flag isn't a problem. + */ + int re_modified; /* If the tree was modified. */ + + /* + * !!! + * These fields are ignored as far as multi-threading is concerned. + * There are no transaction semantics associated with backing files, + * nor is there any thread protection. + */ + FILE *re_fp; /* Source file handle. */ + int re_eof; /* Backing source file EOF reached. */ + db_recno_t re_last; /* Last record number read. */ + +}; + +/* + * Modes for the __bam_curadj recovery records (btree_curadj). + * These appear in log records, so we wire the values and + * do not leave it up to the compiler. + */ +typedef enum { + DB_CA_DI = 1, + DB_CA_DUP = 2, + DB_CA_RSPLIT = 3, + DB_CA_SPLIT = 4 +} db_ca_mode; + +/* + * Flags for __bam_pinsert. + */ +#define BPI_SPACEONLY 0x01 /* Only check for space to update. */ +#define BPI_NORECNUM 0x02 /* Not update the recnum on the left. */ +#define BPI_NOLOGGING 0x04 /* Don't log the update. */ +#define BPI_REPLACE 0x08 /* Repleace the record. */ + +#if defined(__cplusplus) +} +#endif + +#include "dbinc_auto/btree_auto.h" +#include "dbinc_auto/btree_ext.h" +#include "dbinc/db_am.h" +#endif /* !_DB_BTREE_H_ */ diff --git a/src/dbinc/clock.h b/src/dbinc/clock.h new file mode 100644 index 00000000..8a3bfd5e --- /dev/null +++ b/src/dbinc/clock.h @@ -0,0 +1,131 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)time.h 8.5 (Berkeley) 5/4/95 + * FreeBSD: src/sys/sys/time.h,v 1.65 2004/04/07 04:19:49 imp Exp + */ + +#ifndef _DB_CLOCK_H_ +#define _DB_CLOCK_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * This declaration is POSIX-compatible. Because there are lots of different + * time.h include file patterns out there, it's easier to declare our own name + * in all cases than to try and discover if a system has a struct timespec. + * For the same reason, and because we'd have to #include in db.h, + * we don't export any timespec structures in the DB API, even in places where + * it would make sense, like the replication statistics information. + */ +typedef struct { + time_t tv_sec; /* seconds */ +#ifdef HAVE_MIXED_SIZE_ADDRESSING + int32_t tv_nsec; +#else + long tv_nsec; /* nanoseconds */ +#endif +} db_timespec; + +/* Operations on timespecs */ +#undef timespecclear +#define timespecclear(tvp) ((tvp)->tv_sec = (tvp)->tv_nsec = 0) +#undef timespecisset +#define timespecisset(tvp) ((tvp)->tv_sec || (tvp)->tv_nsec) +#undef timespeccmp +#define timespeccmp(tvp, uvp, cmp) \ + (((tvp)->tv_sec == (uvp)->tv_sec) ? \ + ((tvp)->tv_nsec cmp (uvp)->tv_nsec) : \ + ((tvp)->tv_sec cmp (uvp)->tv_sec)) +#undef timespecadd +/* + * Note that using timespecadd to add to yourself (i.e. doubling) + * must be supported. + */ +#define timespecadd(vvp, uvp) \ + do { \ + (vvp)->tv_sec += (uvp)->tv_sec; \ + (vvp)->tv_nsec += (uvp)->tv_nsec; \ + if ((vvp)->tv_nsec >= 1000000000) { \ + (vvp)->tv_sec++; \ + (vvp)->tv_nsec -= 1000000000; \ + } \ + } while (0) +#undef timespecsub +#define timespecsub(vvp, uvp) \ + do { \ + (vvp)->tv_sec -= (uvp)->tv_sec; \ + (vvp)->tv_nsec -= (uvp)->tv_nsec; \ + if ((vvp)->tv_nsec < 0) { \ + (vvp)->tv_sec--; \ + (vvp)->tv_nsec += 1000000000; \ + } \ + } while (0) + +#undef timespecset +#define timespecset(vvp, sec, nsec) \ + do { \ + (vvp)->tv_sec = (time_t)(sec); \ + (vvp)->tv_nsec = (long)(nsec); \ + } while (0) + +#define DB_TIMEOUT_TO_TIMESPEC(t, vvp) \ + do { \ + (vvp)->tv_sec = (time_t)((t) / 1000000); \ + (vvp)->tv_nsec = (long)(((t) % 1000000) * 1000); \ + } while (0) + +#define DB_TIMESPEC_TO_TIMEOUT(t, vvp, prec) \ + do { \ + t = (u_long)((vvp)->tv_sec * 1000000); \ + t += (u_long)((vvp)->tv_nsec / 1000); \ + /* Add in 1 usec for lost nsec precision if wanted. */ \ + if (prec) \ + t++; \ + } while (0) + +#define TIMESPEC_ADD_DB_TIMEOUT(vvp, t) \ + do { \ + db_timespec __tmp; \ + DB_TIMEOUT_TO_TIMESPEC(t, &__tmp); \ + timespecadd((vvp), &__tmp); \ + } while (0) + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_CLOCK_H_ */ diff --git a/src/dbinc/crypto.h b/src/dbinc/crypto.h new file mode 100644 index 00000000..bd53c704 --- /dev/null +++ b/src/dbinc/crypto.h @@ -0,0 +1,93 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_CRYPTO_H_ +#define _DB_CRYPTO_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +#ifdef HAVE_CRYPTO_IPP +#include +#endif + +/* + * !!! + * These are the internal representations of the algorithm flags. + * They are used in both the DB_CIPHER structure and the CIPHER + * structure so we can tell if users specified both passwd and alg + * correctly. + * + * CIPHER_ANY is used when an app joins an existing env but doesn't + * know the algorithm originally used. This is only valid in the + * DB_CIPHER structure until we open and can set the alg. + */ +/* + * We store the algorithm in an 8-bit field on the meta-page. So we + * use a numeric value, not bit fields. + * now we are limited to 8 algorithms before we cannot use bits and + * need numeric values. That should be plenty. It is okay for the + * CIPHER_ANY flag to go beyond that since that is never stored on disk. + */ + +/* + * This structure is per-process, not in shared memory. + */ +struct __db_cipher { + u_int (*adj_size) __P((size_t)); + int (*close) __P((ENV *, void *)); + int (*decrypt) __P((ENV *, void *, void *, u_int8_t *, size_t)); + int (*encrypt) __P((ENV *, void *, void *, u_int8_t *, size_t)); + int (*init) __P((ENV *, DB_CIPHER *)); + + u_int8_t mac_key[DB_MAC_KEY]; /* MAC key. */ + void *data; /* Algorithm-specific information */ + +#define CIPHER_AES 1 /* AES algorithm */ + u_int8_t alg; /* Algorithm used - See above */ + u_int8_t spare[3]; /* Spares */ + +#define CIPHER_ANY 0x00000001 /* Only for DB_CIPHER */ + u_int32_t flags; /* Other flags */ +}; + +#ifdef HAVE_CRYPTO + +#include "crypto/rijndael/rijndael-api-fst.h" + +/* + * Shared ciphering structure + * No mutex needed because all information is read-only after creation. + */ +typedef struct __cipher { + roff_t passwd; /* Offset to shared passwd */ + size_t passwd_len; /* Length of passwd */ + u_int32_t flags; /* Algorithm used - see above */ +} CIPHER; + +#define DB_AES_KEYLEN 128 /* AES key length */ +#define DB_AES_CHUNK 16 /* AES byte unit size */ + +typedef struct __aes_cipher { +#ifdef HAVE_CRYPTO_IPP + void *ipp_ctx; /* IPP key instance */ +#else + keyInstance decrypt_ki; /* Decryption key instance */ + keyInstance encrypt_ki; /* Encryption key instance */ +#endif + u_int32_t flags; /* AES-specific flags */ +} AES_CIPHER; + +#include "dbinc_auto/crypto_ext.h" +#endif /* HAVE_CRYPTO */ + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_CRYPTO_H_ */ diff --git a/src/dbinc/cxx_int.h b/src/dbinc/cxx_int.h new file mode 100644 index 00000000..626937b8 --- /dev/null +++ b/src/dbinc/cxx_int.h @@ -0,0 +1,77 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_CXX_INT_H_ +#define _DB_CXX_INT_H_ + +// private data structures known to the implementation only + +// +// Using FooImp classes will allow the implementation to change in the +// future without any modification to user code or even to header files +// that the user includes. FooImp * is just like void * except that it +// provides a little extra protection, since you cannot randomly assign +// any old pointer to a FooImp* as you can with void *. Currently, a +// pointer to such an opaque class is always just a pointer to the +// appropriate underlying implementation struct. These are converted +// back and forth using the various overloaded wrap()/unwrap() methods. +// This is essentially a use of the "Bridge" Design Pattern. +// +// WRAPPED_CLASS implements the appropriate wrap() and unwrap() methods +// for a wrapper class that has an underlying pointer representation. +// +#define WRAPPED_CLASS(_WRAPPER_CLASS, _IMP_CLASS, _WRAPPED_TYPE) \ + class _IMP_CLASS {}; \ + \ + inline _WRAPPED_TYPE *unwrap(_WRAPPER_CLASS *val) \ + { \ + if (!val) return (0); \ + return (val->get_##_WRAPPED_TYPE()); \ + } \ + \ + inline const _WRAPPED_TYPE *unwrapConst(const _WRAPPER_CLASS *val) \ + { \ + if (!val) return (0); \ + return (val->get_const_##_WRAPPED_TYPE()); \ + } + +WRAPPED_CLASS(Db, DbImp, DB) +WRAPPED_CLASS(DbChannel, DbChannelImp, DB_CHANNEL) +WRAPPED_CLASS(DbEnv, DbEnvImp, DB_ENV) +WRAPPED_CLASS(DbMpoolFile, DbMpoolFileImp, DB_MPOOLFILE) +WRAPPED_CLASS(DbSequence, DbSequenceImp, DB_SEQUENCE) +WRAPPED_CLASS(DbSite, DbSiteImp, DB_SITE) +WRAPPED_CLASS(DbTxn, DbTxnImp, DB_TXN) + +// A tristate integer value used by the DB_ERROR macro below. +// We chose not to make this an enumerated type so it can +// be kept private, even though methods that return the +// tristate int can be declared in db_cxx.h . +// +#define ON_ERROR_THROW 1 +#define ON_ERROR_RETURN 0 +#define ON_ERROR_UNKNOWN (-1) + +// Macros that handle detected errors, in case we want to +// change the default behavior. The 'policy' is one of +// the tristate values given above. If UNKNOWN is specified, +// the behavior is taken from the last initialized DbEnv. +// +#define DB_ERROR(dbenv, caller, ecode, policy) \ + DbEnv::runtime_error(dbenv, caller, ecode, policy) + +#define DB_ERROR_DBT(dbenv, caller, dbt, policy) \ + DbEnv::runtime_error_dbt(dbenv, caller, dbt, policy) + +#define DB_OVERFLOWED_DBT(dbt) \ + (F_ISSET(dbt, DB_DBT_USERMEM) && dbt->size > dbt->ulen) + +/* values for Db::flags_ */ +#define DB_CXX_PRIVATE_ENV 0x00000001 + +#endif /* !_DB_CXX_INT_H_ */ diff --git a/src/dbinc/db.in b/src/dbinc/db.in new file mode 100644 index 00000000..f4d3f6ba --- /dev/null +++ b/src/dbinc/db.in @@ -0,0 +1,2769 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + * + * db.h include file layout: + * General. + * Database Environment. + * Locking subsystem. + * Logging subsystem. + * Shared buffer cache (mpool) subsystem. + * Transaction subsystem. + * Access methods. + * Access method cursors. + * Dbm/Ndbm, Hsearch historic interfaces. + */ + +#ifndef _DB_H_ +#define _DB_H_ + +#ifndef __NO_SYSTEM_INCLUDES +#include +@inttypes_h_decl@ +@stdint_h_decl@ +@stddef_h_decl@ +#include +@unistd_h_decl@ +@thread_h_decl@ +#endif + +@platform_header@ +#if defined(__cplusplus) +extern "C" { +#endif + +@DB_CONST@ +@DB_PROTO1@ +@DB_PROTO2@ + +/* + * Berkeley DB version information. + */ +#define DB_VERSION_FAMILY @DB_VERSION_FAMILY@ +#define DB_VERSION_RELEASE @DB_VERSION_RELEASE@ +#define DB_VERSION_MAJOR @DB_VERSION_MAJOR@ +#define DB_VERSION_MINOR @DB_VERSION_MINOR@ +#define DB_VERSION_PATCH @DB_VERSION_PATCH@ +#define DB_VERSION_STRING @DB_VERSION_STRING@ +#define DB_VERSION_FULL_STRING @DB_VERSION_FULL_STRING@ + +/* + * !!! + * Berkeley DB uses specifically sized types. If they're not provided by + * the system, typedef them here. + * + * We protect them against multiple inclusion using __BIT_TYPES_DEFINED__, + * as does BIND and Kerberos, since we don't know for sure what #include + * files the user is using. + * + * !!! + * We also provide the standard u_int, u_long etc., if they're not provided + * by the system. + */ +#ifndef __BIT_TYPES_DEFINED__ +#define __BIT_TYPES_DEFINED__ +@u_int8_decl@ +@int16_decl@ +@u_int16_decl@ +@int32_decl@ +@u_int32_decl@ +@int64_decl@ +@u_int64_decl@ +#endif + +@u_char_decl@ +@u_int_decl@ +@u_long_decl@ +@u_short_decl@ + +/* + * Missing ANSI types. + * + * uintmax_t -- + * Largest unsigned type, used to align structures in memory. We don't store + * floating point types in structures, so integral types should be sufficient + * (and we don't have to worry about systems that store floats in other than + * power-of-2 numbers of bytes). Additionally this fixes compilers that rewrite + * structure assignments and ANSI C memcpy calls to be in-line instructions + * that happen to require alignment. + * + * uintptr_t -- + * Unsigned type that's the same size as a pointer. There are places where + * DB modifies pointers by discarding the bottom bits to guarantee alignment. + * We can't use uintmax_t, it may be larger than the pointer, and compilers + * get upset about that. So far we haven't run on any machine where there's + * no unsigned type the same size as a pointer -- here's hoping. + */ +@uintmax_t_decl@ +@uintptr_t_decl@ + +@FILE_t_decl@ +@off_t_decl@ +@pid_t_decl@ +@size_t_decl@ +#ifdef HAVE_MIXED_SIZE_ADDRESSING +typedef u_int32_t db_size_t; +#else +typedef size_t db_size_t; +#endif +@ssize_t_decl@ +#ifdef HAVE_MIXED_SIZE_ADDRESSING +typedef int32_t db_ssize_t; +#else +typedef ssize_t db_ssize_t; +#endif +@time_t_decl@ + +/* + * Sequences are only available on machines with 64-bit integral types. + */ +@db_seq_decl@ + +/* Thread and process identification. */ +@db_threadid_t_decl@ + +/* Basic types that are exported or quasi-exported. */ +typedef u_int32_t db_pgno_t; /* Page number type. */ +typedef u_int16_t db_indx_t; /* Page offset type. */ +#define DB_MAX_PAGES 0xffffffff /* >= # of pages in a file */ + +typedef u_int32_t db_recno_t; /* Record number type. */ +#define DB_MAX_RECORDS 0xffffffff /* >= # of records in a tree */ + +typedef u_int32_t db_timeout_t; /* Type of a timeout. */ + +/* + * Region offsets are the difference between a pointer in a region and the + * region's base address. With private environments, both addresses are the + * result of calling malloc, and we can't assume anything about what malloc + * will return, so region offsets have to be able to hold differences between + * arbitrary pointers. + */ +typedef db_size_t roff_t; + +/* + * Forward structure declarations, so we can declare pointers and + * applications can get type checking. + */ +struct __channel; typedef struct __channel CHANNEL; +struct __db; typedef struct __db DB; +struct __db_bt_stat; typedef struct __db_bt_stat DB_BTREE_STAT; +struct __db_channel; typedef struct __db_channel DB_CHANNEL; +struct __db_cipher; typedef struct __db_cipher DB_CIPHER; +struct __db_compact; typedef struct __db_compact DB_COMPACT; +struct __db_dbt; typedef struct __db_dbt DBT; +struct __db_distab; typedef struct __db_distab DB_DISTAB; +struct __db_env; typedef struct __db_env DB_ENV; +struct __db_h_stat; typedef struct __db_h_stat DB_HASH_STAT; +struct __db_heap_rid; typedef struct __db_heap_rid DB_HEAP_RID; +struct __db_heap_stat; typedef struct __db_heap_stat DB_HEAP_STAT; +struct __db_ilock; typedef struct __db_ilock DB_LOCK_ILOCK; +struct __db_lock_hstat; typedef struct __db_lock_hstat DB_LOCK_HSTAT; +struct __db_lock_pstat; typedef struct __db_lock_pstat DB_LOCK_PSTAT; +struct __db_lock_stat; typedef struct __db_lock_stat DB_LOCK_STAT; +struct __db_lock_u; typedef struct __db_lock_u DB_LOCK; +struct __db_locker; typedef struct __db_locker DB_LOCKER; +struct __db_lockreq; typedef struct __db_lockreq DB_LOCKREQ; +struct __db_locktab; typedef struct __db_locktab DB_LOCKTAB; +struct __db_log; typedef struct __db_log DB_LOG; +struct __db_log_cursor; typedef struct __db_log_cursor DB_LOGC; +struct __db_log_stat; typedef struct __db_log_stat DB_LOG_STAT; +struct __db_lsn; typedef struct __db_lsn DB_LSN; +struct __db_mpool; typedef struct __db_mpool DB_MPOOL; +struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT; +struct __db_mpool_stat; typedef struct __db_mpool_stat DB_MPOOL_STAT; +struct __db_mpoolfile; typedef struct __db_mpoolfile DB_MPOOLFILE; +struct __db_mutex_stat; typedef struct __db_mutex_stat DB_MUTEX_STAT; +struct __db_mutex_t; typedef struct __db_mutex_t DB_MUTEX; +struct __db_mutexmgr; typedef struct __db_mutexmgr DB_MUTEXMGR; +struct __db_preplist; typedef struct __db_preplist DB_PREPLIST; +struct __db_qam_stat; typedef struct __db_qam_stat DB_QUEUE_STAT; +struct __db_rep; typedef struct __db_rep DB_REP; +struct __db_rep_stat; typedef struct __db_rep_stat DB_REP_STAT; +struct __db_repmgr_conn_err; + typedef struct __db_repmgr_conn_err DB_REPMGR_CONN_ERR; +struct __db_repmgr_site;typedef struct __db_repmgr_site DB_REPMGR_SITE; +struct __db_repmgr_stat;typedef struct __db_repmgr_stat DB_REPMGR_STAT; +struct __db_seq_record; typedef struct __db_seq_record DB_SEQ_RECORD; +struct __db_seq_stat; typedef struct __db_seq_stat DB_SEQUENCE_STAT; +struct __db_site; typedef struct __db_site DB_SITE; +struct __db_sequence; typedef struct __db_sequence DB_SEQUENCE; +struct __db_thread_info;typedef struct __db_thread_info DB_THREAD_INFO; +struct __db_txn; typedef struct __db_txn DB_TXN; +struct __db_txn_active; typedef struct __db_txn_active DB_TXN_ACTIVE; +struct __db_txn_stat; typedef struct __db_txn_stat DB_TXN_STAT; +struct __db_txn_token; typedef struct __db_txn_token DB_TXN_TOKEN; +struct __db_txnmgr; typedef struct __db_txnmgr DB_TXNMGR; +struct __dbc; typedef struct __dbc DBC; +struct __dbc_internal; typedef struct __dbc_internal DBC_INTERNAL; +struct __env; typedef struct __env ENV; +struct __fh_t; typedef struct __fh_t DB_FH; +struct __fname; typedef struct __fname FNAME; +struct __key_range; typedef struct __key_range DB_KEY_RANGE; +struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE; +struct __db_logvrfy_config; +typedef struct __db_logvrfy_config DB_LOG_VERIFY_CONFIG; + +/* + * The Berkeley DB API flags are automatically-generated -- the following flag + * names are no longer used, but remain for compatibility reasons. + */ +#define DB_DEGREE_2 DB_READ_COMMITTED +#define DB_DIRTY_READ DB_READ_UNCOMMITTED +#define DB_JOINENV 0x0 + +/* Key/data structure -- a Data-Base Thang. */ +struct __db_dbt { + void *data; /* Key/data */ + u_int32_t size; /* key/data length */ + + u_int32_t ulen; /* RO: length of user buffer. */ + u_int32_t dlen; /* RO: get/put record length. */ + u_int32_t doff; /* RO: get/put record offset. */ + + void *app_data; + +#define DB_DBT_APPMALLOC 0x001 /* Callback allocated memory. */ +#define DB_DBT_BULK 0x002 /* Internal: Insert if duplicate. */ +#define DB_DBT_DUPOK 0x004 /* Internal: Insert if duplicate. */ +#define DB_DBT_ISSET 0x008 /* Lower level calls set value. */ +#define DB_DBT_MALLOC 0x010 /* Return in malloc'd memory. */ +#define DB_DBT_MULTIPLE 0x020 /* References multiple records. */ +#define DB_DBT_PARTIAL 0x040 /* Partial put/get. */ +#define DB_DBT_REALLOC 0x080 /* Return in realloc'd memory. */ +#define DB_DBT_READONLY 0x100 /* Readonly, don't update. */ +#define DB_DBT_STREAMING 0x200 /* Internal: DBT is being streamed. */ +#define DB_DBT_USERCOPY 0x400 /* Use the user-supplied callback. */ +#define DB_DBT_USERMEM 0x800 /* Return in user's memory. */ + u_int32_t flags; +}; + +/******************************************************* + * Mutexes. + *******************************************************/ +/* + * When mixed size addressing is supported mutexes need to be the same size + * independent of the process address size is. + */ +#ifdef HAVE_MIXED_SIZE_ADDRESSING +typedef db_size_t db_mutex_t; +#else +typedef uintptr_t db_mutex_t; +#endif + +struct __db_mutex_stat { /* SHARED */ + /* The following fields are maintained in the region's copy. */ + u_int32_t st_mutex_align; /* Mutex alignment */ + u_int32_t st_mutex_tas_spins; /* Mutex test-and-set spins */ + u_int32_t st_mutex_init; /* Initial mutex count */ + u_int32_t st_mutex_cnt; /* Mutex count */ + u_int32_t st_mutex_max; /* Mutex max */ + u_int32_t st_mutex_free; /* Available mutexes */ + u_int32_t st_mutex_inuse; /* Mutexes in use */ + u_int32_t st_mutex_inuse_max; /* Maximum mutexes ever in use */ + + /* The following fields are filled-in from other places. */ +#ifndef __TEST_DB_NO_STATISTICS + uintmax_t st_region_wait; /* Region lock granted after wait. */ + uintmax_t st_region_nowait; /* Region lock granted without wait. */ + roff_t st_regsize; /* Region size. */ + roff_t st_regmax; /* Region max. */ +#endif +}; + +/* This is the length of the buffer passed to DB_ENV->thread_id_string() */ +#define DB_THREADID_STRLEN 128 + +/******************************************************* + * Locking. + *******************************************************/ +#define DB_LOCKVERSION 1 + +#define DB_FILE_ID_LEN 20 /* Unique file ID length. */ + +/* + * Deadlock detector modes; used in the DB_ENV structure to configure the + * locking subsystem. + */ +#define DB_LOCK_NORUN 0 +#define DB_LOCK_DEFAULT 1 /* Default policy. */ +#define DB_LOCK_EXPIRE 2 /* Only expire locks, no detection. */ +#define DB_LOCK_MAXLOCKS 3 /* Select locker with max locks. */ +#define DB_LOCK_MAXWRITE 4 /* Select locker with max writelocks. */ +#define DB_LOCK_MINLOCKS 5 /* Select locker with min locks. */ +#define DB_LOCK_MINWRITE 6 /* Select locker with min writelocks. */ +#define DB_LOCK_OLDEST 7 /* Select oldest locker. */ +#define DB_LOCK_RANDOM 8 /* Select random locker. */ +#define DB_LOCK_YOUNGEST 9 /* Select youngest locker. */ + +/* + * Simple R/W lock modes and for multi-granularity intention locking. + * + * !!! + * These values are NOT random, as they are used as an index into the lock + * conflicts arrays, i.e., DB_LOCK_IWRITE must be == 3, and DB_LOCK_IREAD + * must be == 4. + */ +typedef enum { + DB_LOCK_NG=0, /* Not granted. */ + DB_LOCK_READ=1, /* Shared/read. */ + DB_LOCK_WRITE=2, /* Exclusive/write. */ + DB_LOCK_WAIT=3, /* Wait for event */ + DB_LOCK_IWRITE=4, /* Intent exclusive/write. */ + DB_LOCK_IREAD=5, /* Intent to share/read. */ + DB_LOCK_IWR=6, /* Intent to read and write. */ + DB_LOCK_READ_UNCOMMITTED=7, /* Degree 1 isolation. */ + DB_LOCK_WWRITE=8 /* Was Written. */ +} db_lockmode_t; + +/* + * Request types. + */ +typedef enum { + DB_LOCK_DUMP=0, /* Display held locks. */ + DB_LOCK_GET=1, /* Get the lock. */ + DB_LOCK_GET_TIMEOUT=2, /* Get lock with a timeout. */ + DB_LOCK_INHERIT=3, /* Pass locks to parent. */ + DB_LOCK_PUT=4, /* Release the lock. */ + DB_LOCK_PUT_ALL=5, /* Release locker's locks. */ + DB_LOCK_PUT_OBJ=6, /* Release locker's locks on obj. */ + DB_LOCK_PUT_READ=7, /* Release locker's read locks. */ + DB_LOCK_TIMEOUT=8, /* Force a txn to timeout. */ + DB_LOCK_TRADE=9, /* Trade locker ids on a lock. */ + DB_LOCK_UPGRADE_WRITE=10 /* Upgrade writes for dirty reads. */ +} db_lockop_t; + +/* + * Status of a lock. + */ +typedef enum { + DB_LSTAT_ABORTED=1, /* Lock belongs to an aborted txn. */ + DB_LSTAT_EXPIRED=2, /* Lock has expired. */ + DB_LSTAT_FREE=3, /* Lock is unallocated. */ + DB_LSTAT_HELD=4, /* Lock is currently held. */ + DB_LSTAT_PENDING=5, /* Lock was waiting and has been + * promoted; waiting for the owner + * to run and upgrade it to held. */ + DB_LSTAT_WAITING=6 /* Lock is on the wait queue. */ +}db_status_t; + +/* Lock statistics structure. */ +struct __db_lock_stat { /* SHARED */ + u_int32_t st_id; /* Last allocated locker ID. */ + u_int32_t st_cur_maxid; /* Current maximum unused ID. */ + u_int32_t st_initlocks; /* Initial number of locks in table. */ + u_int32_t st_initlockers; /* Initial num of lockers in table. */ + u_int32_t st_initobjects; /* Initial num of objects in table. */ + u_int32_t st_locks; /* Current number of locks in table. */ + u_int32_t st_lockers; /* Current num of lockers in table. */ + u_int32_t st_objects; /* Current num of objects in table. */ + u_int32_t st_maxlocks; /* Maximum number of locks in table. */ + u_int32_t st_maxlockers; /* Maximum num of lockers in table. */ + u_int32_t st_maxobjects; /* Maximum num of objects in table. */ + u_int32_t st_partitions; /* number of partitions. */ + u_int32_t st_tablesize; /* Size of object hash table. */ + int32_t st_nmodes; /* Number of lock modes. */ + u_int32_t st_nlockers; /* Current number of lockers. */ +#ifndef __TEST_DB_NO_STATISTICS + u_int32_t st_nlocks; /* Current number of locks. */ + u_int32_t st_maxnlocks; /* Maximum number of locks so far. */ + u_int32_t st_maxhlocks; /* Maximum number of locks in any bucket. */ + uintmax_t st_locksteals; /* Number of lock steals so far. */ + uintmax_t st_maxlsteals; /* Maximum number steals in any partition. */ + u_int32_t st_maxnlockers; /* Maximum number of lockers so far. */ + u_int32_t st_nobjects; /* Current number of objects. */ + u_int32_t st_maxnobjects; /* Maximum number of objects so far. */ + u_int32_t st_maxhobjects; /* Maximum number of objectsin any bucket. */ + uintmax_t st_objectsteals; /* Number of objects steals so far. */ + uintmax_t st_maxosteals; /* Maximum number of steals in any partition. */ + uintmax_t st_nrequests; /* Number of lock gets. */ + uintmax_t st_nreleases; /* Number of lock puts. */ + uintmax_t st_nupgrade; /* Number of lock upgrades. */ + uintmax_t st_ndowngrade; /* Number of lock downgrades. */ + uintmax_t st_lock_wait; /* Lock conflicts w/ subsequent wait */ + uintmax_t st_lock_nowait; /* Lock conflicts w/o subsequent wait */ + uintmax_t st_ndeadlocks; /* Number of lock deadlocks. */ + db_timeout_t st_locktimeout; /* Lock timeout. */ + uintmax_t st_nlocktimeouts; /* Number of lock timeouts. */ + db_timeout_t st_txntimeout; /* Transaction timeout. */ + uintmax_t st_ntxntimeouts; /* Number of transaction timeouts. */ + uintmax_t st_part_wait; /* Partition lock granted after wait. */ + uintmax_t st_part_nowait; /* Partition lock granted without wait. */ + uintmax_t st_part_max_wait; /* Max partition lock granted after wait. */ + uintmax_t st_part_max_nowait; /* Max partition lock granted without wait. */ + uintmax_t st_objs_wait; /* Object lock granted after wait. */ + uintmax_t st_objs_nowait; /* Object lock granted without wait. */ + uintmax_t st_lockers_wait; /* Locker lock granted after wait. */ + uintmax_t st_lockers_nowait; /* Locker lock granted without wait. */ + uintmax_t st_region_wait; /* Region lock granted after wait. */ + uintmax_t st_region_nowait; /* Region lock granted without wait. */ + u_int32_t st_hash_len; /* Max length of bucket. */ + roff_t st_regsize; /* Region size. */ +#endif +}; + +struct __db_lock_hstat { /* SHARED */ + uintmax_t st_nrequests; /* Number of lock gets. */ + uintmax_t st_nreleases; /* Number of lock puts. */ + uintmax_t st_nupgrade; /* Number of lock upgrades. */ + uintmax_t st_ndowngrade; /* Number of lock downgrades. */ + u_int32_t st_nlocks; /* Current number of locks. */ + u_int32_t st_maxnlocks; /* Maximum number of locks so far. */ + u_int32_t st_nobjects; /* Current number of objects. */ + u_int32_t st_maxnobjects; /* Maximum number of objects so far. */ + uintmax_t st_lock_wait; /* Lock conflicts w/ subsequent wait */ + uintmax_t st_lock_nowait; /* Lock conflicts w/o subsequent wait */ + uintmax_t st_nlocktimeouts; /* Number of lock timeouts. */ + uintmax_t st_ntxntimeouts; /* Number of transaction timeouts. */ + u_int32_t st_hash_len; /* Max length of bucket. */ +}; + +struct __db_lock_pstat { /* SHARED */ + u_int32_t st_nlocks; /* Current number of locks. */ + u_int32_t st_maxnlocks; /* Maximum number of locks so far. */ + u_int32_t st_nobjects; /* Current number of objects. */ + u_int32_t st_maxnobjects; /* Maximum number of objects so far. */ + uintmax_t st_locksteals; /* Number of lock steals so far. */ + uintmax_t st_objectsteals; /* Number of objects steals so far. */ +}; + +/* + * DB_LOCK_ILOCK -- + * Internal DB access method lock. + */ +struct __db_ilock { /* SHARED */ + db_pgno_t pgno; /* Page being locked. */ + u_int8_t fileid[DB_FILE_ID_LEN];/* File id. */ +#define DB_HANDLE_LOCK 1 +#define DB_RECORD_LOCK 2 +#define DB_PAGE_LOCK 3 +#define DB_DATABASE_LOCK 4 + u_int32_t type; /* Type of lock. */ +}; + +/* + * DB_LOCK -- + * The structure is allocated by the caller and filled in during a + * lock_get request (or a lock_vec/DB_LOCK_GET). + */ +struct __db_lock_u { /* SHARED */ + roff_t off; /* Offset of the lock in the region */ + u_int32_t ndx; /* Index of the object referenced by + * this lock; used for locking. */ + u_int32_t gen; /* Generation number of this lock. */ + db_lockmode_t mode; /* mode of this lock. */ +}; + +/* Lock request structure. */ +struct __db_lockreq { + db_lockop_t op; /* Operation. */ + db_lockmode_t mode; /* Requested mode. */ + db_timeout_t timeout; /* Time to expire lock. */ + DBT *obj; /* Object being locked. */ + DB_LOCK lock; /* Lock returned. */ +}; + +/******************************************************* + * Logging. + *******************************************************/ +#define DB_LOGVERSION 18 /* Current log version. */ +#define DB_LOGVERSION_LATCHING 15 /* Log version using latching. */ +#define DB_LOGCHKSUM 12 /* Check sum headers. */ +#define DB_LOGOLDVER 8 /* Oldest log version supported. */ +#define DB_LOGMAGIC 0x040988 + +/* + * A DB_LSN has two parts, a fileid which identifies a specific file, and an + * offset within that file. The fileid is an unsigned 4-byte quantity that + * uniquely identifies a file within the log directory -- currently a simple + * counter inside the log. The offset is also an unsigned 4-byte value. The + * log manager guarantees the offset is never more than 4 bytes by switching + * to a new log file before the maximum length imposed by an unsigned 4-byte + * offset is reached. + */ +struct __db_lsn { /* SHARED */ + u_int32_t file; /* File ID. */ + u_int32_t offset; /* File offset. */ +}; + +/* + * Application-specified log record types start at DB_user_BEGIN, and must not + * equal or exceed DB_debug_FLAG. + * + * DB_debug_FLAG is the high-bit of the u_int32_t that specifies a log record + * type. If the flag is set, it's a log record that was logged for debugging + * purposes only, even if it reflects a database change -- the change was part + * of a non-durable transaction. + */ +#define DB_user_BEGIN 10000 +#define DB_debug_FLAG 0x80000000 + +/* + * DB_LOGC -- + * Log cursor. + */ +struct __db_log_cursor { + ENV *env; /* Environment */ + + DB_FH *fhp; /* File handle. */ + DB_LSN lsn; /* Cursor: LSN */ + u_int32_t len; /* Cursor: record length */ + u_int32_t prev; /* Cursor: previous record's offset */ + + DBT dbt; /* Return DBT. */ + DB_LSN p_lsn; /* Persist LSN. */ + u_int32_t p_version; /* Persist version. */ + + u_int8_t *bp; /* Allocated read buffer. */ + u_int32_t bp_size; /* Read buffer length in bytes. */ + u_int32_t bp_rlen; /* Read buffer valid data length. */ + DB_LSN bp_lsn; /* Read buffer first byte LSN. */ + + u_int32_t bp_maxrec; /* Max record length in the log file. */ + + /* DB_LOGC PUBLIC HANDLE LIST BEGIN */ + int (*close) __P((DB_LOGC *, u_int32_t)); + int (*get) __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); + int (*version) __P((DB_LOGC *, u_int32_t *, u_int32_t)); + /* DB_LOGC PUBLIC HANDLE LIST END */ + +#define DB_LOG_DISK 0x01 /* Log record came from disk. */ +#define DB_LOG_LOCKED 0x02 /* Log region already locked */ +#define DB_LOG_SILENT_ERR 0x04 /* Turn-off error messages. */ + u_int32_t flags; +}; + +/* Log statistics structure. */ +struct __db_log_stat { /* SHARED */ + u_int32_t st_magic; /* Log file magic number. */ + u_int32_t st_version; /* Log file version number. */ + int32_t st_mode; /* Log file permissions mode. */ + u_int32_t st_lg_bsize; /* Log buffer size. */ + u_int32_t st_lg_size; /* Log file size. */ + u_int32_t st_wc_bytes; /* Bytes to log since checkpoint. */ + u_int32_t st_wc_mbytes; /* Megabytes to log since checkpoint. */ + u_int32_t st_fileid_init; /* Inital allocation for fileids. */ +#ifndef __TEST_DB_NO_STATISTICS + u_int32_t st_nfileid; /* Current number of fileids. */ + u_int32_t st_maxnfileid; /* Maximum number of fileids used. */ + uintmax_t st_record; /* Records entered into the log. */ + u_int32_t st_w_bytes; /* Bytes to log. */ + u_int32_t st_w_mbytes; /* Megabytes to log. */ + uintmax_t st_wcount; /* Total I/O writes to the log. */ + uintmax_t st_wcount_fill; /* Overflow writes to the log. */ + uintmax_t st_rcount; /* Total I/O reads from the log. */ + uintmax_t st_scount; /* Total syncs to the log. */ + uintmax_t st_region_wait; /* Region lock granted after wait. */ + uintmax_t st_region_nowait; /* Region lock granted without wait. */ + u_int32_t st_cur_file; /* Current log file number. */ + u_int32_t st_cur_offset; /* Current log file offset. */ + u_int32_t st_disk_file; /* Known on disk log file number. */ + u_int32_t st_disk_offset; /* Known on disk log file offset. */ + u_int32_t st_maxcommitperflush; /* Max number of commits in a flush. */ + u_int32_t st_mincommitperflush; /* Min number of commits in a flush. */ + roff_t st_regsize; /* Region size. */ +#endif +}; + +/* + * We need to record the first log record of a transaction. For user + * defined logging this macro returns the place to put that information, + * if it is need in rlsnp, otherwise it leaves it unchanged. We also + * need to track the last record of the transaction, this returns the + * place to put that info. + */ +#define DB_SET_TXN_LSNP(txn, blsnp, llsnp) \ + ((txn)->set_txn_lsnp(txn, blsnp, llsnp)) + +/* + * Definition of the structure which specifies marshalling of log records. + */ +typedef enum { + LOGREC_Done, + LOGREC_ARG, + LOGREC_HDR, + LOGREC_DATA, + LOGREC_DB, + LOGREC_DBOP, + LOGREC_DBT, + LOGREC_LOCKS, + LOGREC_OP, + LOGREC_PGDBT, + LOGREC_PGDDBT, + LOGREC_PGLIST, + LOGREC_POINTER, + LOGREC_TIME +} log_rec_type_t; + +typedef const struct __log_rec_spec { + log_rec_type_t type; + u_int32_t offset; + const char *name; + const char fmt[4]; +} DB_LOG_RECSPEC; + +/* + * Size of a DBT in a log record. + */ +#define LOG_DBT_SIZE(dbt) \ + (sizeof(u_int32_t) + ((dbt) == NULL ? 0 : (dbt)->size)) + +/******************************************************* + * Shared buffer cache (mpool). + *******************************************************/ +/* Priority values for DB_MPOOLFILE->{put,set_priority}. */ +typedef enum { + DB_PRIORITY_UNCHANGED=0, + DB_PRIORITY_VERY_LOW=1, + DB_PRIORITY_LOW=2, + DB_PRIORITY_DEFAULT=3, + DB_PRIORITY_HIGH=4, + DB_PRIORITY_VERY_HIGH=5 +} DB_CACHE_PRIORITY; + +/* Per-process DB_MPOOLFILE information. */ +struct __db_mpoolfile { + DB_FH *fhp; /* Underlying file handle. */ + + /* + * !!! + * The ref, pinref and q fields are protected by the region lock. + */ + u_int32_t ref; /* Reference count. */ + + u_int32_t pinref; /* Pinned block reference count. */ + + /* + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_ENTRY(__db_mpoolfile) q; + */ + struct { + struct __db_mpoolfile *tqe_next; + struct __db_mpoolfile **tqe_prev; + } q; /* Linked list of DB_MPOOLFILE's. */ + + /* + * !!! + * The rest of the fields (with the exception of the MP_FLUSH flag) + * are not thread-protected, even when they may be modified at any + * time by the application. The reason is the DB_MPOOLFILE handle + * is single-threaded from the viewpoint of the application, and so + * the only fields needing to be thread-protected are those accessed + * by checkpoint or sync threads when using DB_MPOOLFILE structures + * to flush buffers from the cache. + */ + ENV *env; /* Environment */ + MPOOLFILE *mfp; /* Underlying MPOOLFILE. */ + + u_int32_t clear_len; /* Cleared length on created pages. */ + u_int8_t /* Unique file ID. */ + fileid[DB_FILE_ID_LEN]; + int ftype; /* File type. */ + int32_t lsn_offset; /* LSN offset in page. */ + u_int32_t gbytes, bytes; /* Maximum file size. */ + DBT *pgcookie; /* Byte-string passed to pgin/pgout. */ + int32_t priority; /* Cache priority. */ + + void *addr; /* Address of mmap'd region. */ + size_t len; /* Length of mmap'd region. */ + + u_int32_t config_flags; /* Flags to DB_MPOOLFILE->set_flags. */ + + /* DB_MPOOLFILE PUBLIC HANDLE LIST BEGIN */ + int (*close) __P((DB_MPOOLFILE *, u_int32_t)); + int (*get) + __P((DB_MPOOLFILE *, db_pgno_t *, DB_TXN *, u_int32_t, void *)); + int (*get_clear_len) __P((DB_MPOOLFILE *, u_int32_t *)); + int (*get_fileid) __P((DB_MPOOLFILE *, u_int8_t *)); + int (*get_flags) __P((DB_MPOOLFILE *, u_int32_t *)); + int (*get_ftype) __P((DB_MPOOLFILE *, int *)); + int (*get_last_pgno) __P((DB_MPOOLFILE *, db_pgno_t *)); + int (*get_lsn_offset) __P((DB_MPOOLFILE *, int32_t *)); + int (*get_maxsize) __P((DB_MPOOLFILE *, u_int32_t *, u_int32_t *)); + int (*get_pgcookie) __P((DB_MPOOLFILE *, DBT *)); + int (*get_priority) __P((DB_MPOOLFILE *, DB_CACHE_PRIORITY *)); + int (*open) __P((DB_MPOOLFILE *, const char *, u_int32_t, int, size_t)); + int (*put) __P((DB_MPOOLFILE *, void *, DB_CACHE_PRIORITY, u_int32_t)); + int (*set_clear_len) __P((DB_MPOOLFILE *, u_int32_t)); + int (*set_fileid) __P((DB_MPOOLFILE *, u_int8_t *)); + int (*set_flags) __P((DB_MPOOLFILE *, u_int32_t, int)); + int (*set_ftype) __P((DB_MPOOLFILE *, int)); + int (*set_lsn_offset) __P((DB_MPOOLFILE *, int32_t)); + int (*set_maxsize) __P((DB_MPOOLFILE *, u_int32_t, u_int32_t)); + int (*set_pgcookie) __P((DB_MPOOLFILE *, DBT *)); + int (*set_priority) __P((DB_MPOOLFILE *, DB_CACHE_PRIORITY)); + int (*sync) __P((DB_MPOOLFILE *)); + /* DB_MPOOLFILE PUBLIC HANDLE LIST END */ + + /* + * MP_FILEID_SET, MP_OPEN_CALLED and MP_READONLY do not need to be + * thread protected because they are initialized before the file is + * linked onto the per-process lists, and never modified. + * + * MP_FLUSH is thread protected because it is potentially read/set by + * multiple threads of control. + */ +#define MP_FILEID_SET 0x001 /* Application supplied a file ID. */ +#define MP_FLUSH 0x002 /* Was opened to flush a buffer. */ +#define MP_MULTIVERSION 0x004 /* Opened for multiversion access. */ +#define MP_OPEN_CALLED 0x008 /* File opened. */ +#define MP_READONLY 0x010 /* File is readonly. */ +#define MP_DUMMY 0x020 /* File is dummy for __memp_fput. */ + u_int32_t flags; +}; + +/* Mpool statistics structure. */ +struct __db_mpool_stat { /* SHARED */ + u_int32_t st_gbytes; /* Total cache size: GB. */ + u_int32_t st_bytes; /* Total cache size: B. */ + u_int32_t st_ncache; /* Number of cache regions. */ + u_int32_t st_max_ncache; /* Maximum number of regions. */ + db_size_t st_mmapsize; /* Maximum file size for mmap. */ + int32_t st_maxopenfd; /* Maximum number of open fd's. */ + int32_t st_maxwrite; /* Maximum buffers to write. */ + db_timeout_t st_maxwrite_sleep; /* Sleep after writing max buffers. */ + u_int32_t st_pages; /* Total number of pages. */ +#ifndef __TEST_DB_NO_STATISTICS + u_int32_t st_map; /* Pages from mapped files. */ + uintmax_t st_cache_hit; /* Pages found in the cache. */ + uintmax_t st_cache_miss; /* Pages not found in the cache. */ + uintmax_t st_page_create; /* Pages created in the cache. */ + uintmax_t st_page_in; /* Pages read in. */ + uintmax_t st_page_out; /* Pages written out. */ + uintmax_t st_ro_evict; /* Clean pages forced from the cache. */ + uintmax_t st_rw_evict; /* Dirty pages forced from the cache. */ + uintmax_t st_page_trickle; /* Pages written by memp_trickle. */ + u_int32_t st_page_clean; /* Clean pages. */ + u_int32_t st_page_dirty; /* Dirty pages. */ + u_int32_t st_hash_buckets; /* Number of hash buckets. */ + u_int32_t st_hash_mutexes; /* Number of hash bucket mutexes. */ + u_int32_t st_pagesize; /* Assumed page size. */ + u_int32_t st_hash_searches; /* Total hash chain searches. */ + u_int32_t st_hash_longest; /* Longest hash chain searched. */ + uintmax_t st_hash_examined; /* Total hash entries searched. */ + uintmax_t st_hash_nowait; /* Hash lock granted with nowait. */ + uintmax_t st_hash_wait; /* Hash lock granted after wait. */ + uintmax_t st_hash_max_nowait; /* Max hash lock granted with nowait. */ + uintmax_t st_hash_max_wait; /* Max hash lock granted after wait. */ + uintmax_t st_region_nowait; /* Region lock granted with nowait. */ + uintmax_t st_region_wait; /* Region lock granted after wait. */ + uintmax_t st_mvcc_frozen; /* Buffers frozen. */ + uintmax_t st_mvcc_thawed; /* Buffers thawed. */ + uintmax_t st_mvcc_freed; /* Frozen buffers freed. */ + uintmax_t st_alloc; /* Number of page allocations. */ + uintmax_t st_alloc_buckets; /* Buckets checked during allocation. */ + uintmax_t st_alloc_max_buckets;/* Max checked during allocation. */ + uintmax_t st_alloc_pages; /* Pages checked during allocation. */ + uintmax_t st_alloc_max_pages; /* Max checked during allocation. */ + uintmax_t st_io_wait; /* Thread waited on buffer I/O. */ + uintmax_t st_sync_interrupted; /* Number of times sync interrupted. */ + roff_t st_regsize; /* Region size. */ + roff_t st_regmax; /* Region max. */ +#endif +}; + +/* + * Mpool file statistics structure. + * The first fields in this structure must mirror the __db_mpool_fstat_int + * structure, since content is mem copied between the two. + */ +struct __db_mpool_fstat { + u_int32_t st_pagesize; /* Page size. */ +#ifndef __TEST_DB_NO_STATISTICS + u_int32_t st_map; /* Pages from mapped files. */ + uintmax_t st_cache_hit; /* Pages found in the cache. */ + uintmax_t st_cache_miss; /* Pages not found in the cache. */ + uintmax_t st_page_create; /* Pages created in the cache. */ + uintmax_t st_page_in; /* Pages read in. */ + uintmax_t st_page_out; /* Pages written out. */ +#endif + char *file_name; /* File name. */ +}; + +/******************************************************* + * Transactions and recovery. + *******************************************************/ +#define DB_TXNVERSION 1 + +typedef enum { + DB_TXN_ABORT=0, /* Public. */ + DB_TXN_APPLY=1, /* Public. */ + DB_TXN_BACKWARD_ROLL=3, /* Public. */ + DB_TXN_FORWARD_ROLL=4, /* Public. */ + DB_TXN_OPENFILES=5, /* Internal. */ + DB_TXN_POPENFILES=6, /* Internal. */ + DB_TXN_PRINT=7, /* Public. */ + DB_TXN_LOG_VERIFY=8 /* Internal. */ +} db_recops; + +/* + * BACKWARD_ALLOC is used during the forward pass to pick up any aborted + * allocations for files that were created during the forward pass. + * The main difference between _ALLOC and _ROLL is that the entry for + * the file not exist during the rollforward pass. + */ +#define DB_UNDO(op) ((op) == DB_TXN_ABORT || (op) == DB_TXN_BACKWARD_ROLL) +#define DB_REDO(op) ((op) == DB_TXN_FORWARD_ROLL || (op) == DB_TXN_APPLY) + +struct __db_txn { + DB_TXNMGR *mgrp; /* Pointer to transaction manager. */ + DB_TXN *parent; /* Pointer to transaction's parent. */ + DB_THREAD_INFO *thread_info; /* Pointer to thread information. */ + + u_int32_t txnid; /* Unique transaction id. */ + char *name; /* Transaction name. */ + DB_LOCKER *locker; /* Locker for this txn. */ + + void *td; /* Detail structure within region. */ + db_timeout_t lock_timeout; /* Timeout for locks for this txn. */ + void *txn_list; /* Undo information for parent. */ + + /* + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_ENTRY(__db_txn) links; + */ + struct { + struct __db_txn *tqe_next; + struct __db_txn **tqe_prev; + } links; /* Links transactions off manager. */ + + /* + * !!! + * Explicit representations of structures from shqueue.h. + * SH_TAILQ_ENTRY xa_links; + * These links link together transactions that are active in + * the same thread of control. + */ + struct { + db_ssize_t stqe_next; + db_ssize_t stqe_prev; + } xa_links; /* Links XA transactions. */ + + /* + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_HEAD(__kids, __db_txn) kids; + */ + struct __kids { + struct __db_txn *tqh_first; + struct __db_txn **tqh_last; + } kids; + + /* + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_HEAD(__events, __txn_event) events; + */ + struct { + struct __txn_event *tqh_first; + struct __txn_event **tqh_last; + } events; /* Links deferred events. */ + + /* + * !!! + * Explicit representations of structures from queue.h. + * STAILQ_HEAD(__logrec, __txn_logrec) logs; + */ + struct { + struct __txn_logrec *stqh_first; + struct __txn_logrec **stqh_last; + } logs; /* Links in memory log records. */ + + /* + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_ENTRY(__db_txn) klinks; + */ + struct { + struct __db_txn *tqe_next; + struct __db_txn **tqe_prev; + } klinks; /* Links of children in parent. */ + + /* + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_HEAD(__my_cursors, __dbc) my_cursors; + */ + struct __my_cursors { + struct __dbc *tqh_first; + struct __dbc **tqh_last; + } my_cursors; + + /* + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_HEAD(__femfs, MPOOLFILE) femfs; + * + * These are DBs involved in file extension in this transaction. + */ + struct __femfs { + DB *tqh_first; + DB **tqh_last; + } femfs; + + DB_TXN_TOKEN *token_buffer; /* User's commit token buffer. */ + void *api_internal; /* C++ API private. */ + void *xml_internal; /* XML API private. */ + + u_int32_t cursors; /* Number of cursors open for txn */ + + /* DB_TXN PUBLIC HANDLE LIST BEGIN */ + int (*abort) __P((DB_TXN *)); + int (*commit) __P((DB_TXN *, u_int32_t)); + int (*discard) __P((DB_TXN *, u_int32_t)); + int (*get_name) __P((DB_TXN *, const char **)); + int (*get_priority) __P((DB_TXN *, u_int32_t *)); + u_int32_t (*id) __P((DB_TXN *)); + int (*prepare) __P((DB_TXN *, u_int8_t *)); + int (*set_commit_token) __P((DB_TXN *, DB_TXN_TOKEN *)); + int (*set_name) __P((DB_TXN *, const char *)); + int (*set_priority) __P((DB_TXN *, u_int32_t)); + int (*set_timeout) __P((DB_TXN *, db_timeout_t, u_int32_t)); + /* DB_TXN PUBLIC HANDLE LIST END */ + + /* DB_TXN PRIVATE HANDLE LIST BEGIN */ + void (*set_txn_lsnp) __P((DB_TXN *txn, DB_LSN **, DB_LSN **)); + /* DB_TXN PRIVATE HANDLE LIST END */ + +#define TXN_XA_THREAD_NOTA 0 +#define TXN_XA_THREAD_ASSOCIATED 1 +#define TXN_XA_THREAD_SUSPENDED 2 +#define TXN_XA_THREAD_UNASSOCIATED 3 + u_int32_t xa_thr_status; + +#define TXN_CHILDCOMMIT 0x00001 /* Txn has committed. */ +#define TXN_COMPENSATE 0x00002 /* Compensating transaction. */ +#define TXN_DEADLOCK 0x00004 /* Txn has deadlocked. */ +#define TXN_FAMILY 0x00008 /* Cursors/children are independent. */ +#define TXN_IGNORE_LEASE 0x00010 /* Skip lease check at commit time. */ +#define TXN_INFAMILY 0x00020 /* Part of a transaction family. */ +#define TXN_LOCKTIMEOUT 0x00040 /* Txn has a lock timeout. */ +#define TXN_MALLOC 0x00080 /* Structure allocated by TXN system. */ +#define TXN_NOSYNC 0x00100 /* Do not sync on prepare and commit. */ +#define TXN_NOWAIT 0x00200 /* Do not wait on locks. */ +#define TXN_PRIVATE 0x00400 /* Txn owned by cursor. */ +#define TXN_READONLY 0x00800 /* CDS group handle. */ +#define TXN_READ_COMMITTED 0x01000 /* Txn has degree 2 isolation. */ +#define TXN_READ_UNCOMMITTED 0x02000 /* Txn has degree 1 isolation. */ +#define TXN_RESTORED 0x04000 /* Txn has been restored. */ +#define TXN_SNAPSHOT 0x08000 /* Snapshot Isolation. */ +#define TXN_SYNC 0x10000 /* Write and sync on prepare/commit. */ +#define TXN_WRITE_NOSYNC 0x20000 /* Write only on prepare/commit. */ +#define TXN_BULK 0x40000 /* Enable bulk loading optimization. */ + u_int32_t flags; +}; + +#define TXN_SYNC_FLAGS (TXN_SYNC | TXN_NOSYNC | TXN_WRITE_NOSYNC) + +/* + * Structure used for two phase commit interface. + * We set the size of our global transaction id (gid) to be 128 in order + * to match that defined by the XA X/Open standard. + */ +#define DB_GID_SIZE 128 +struct __db_preplist { + DB_TXN *txn; + u_int8_t gid[DB_GID_SIZE]; +}; + +/* Transaction statistics structure. */ +struct __db_txn_active { + u_int32_t txnid; /* Transaction ID */ + u_int32_t parentid; /* Transaction ID of parent */ + pid_t pid; /* Process owning txn ID */ + db_threadid_t tid; /* Thread owning txn ID */ + + DB_LSN lsn; /* LSN when transaction began */ + + DB_LSN read_lsn; /* Read LSN for MVCC */ + u_int32_t mvcc_ref; /* MVCC reference count */ + + u_int32_t priority; /* Deadlock resolution priority */ + +#define TXN_ABORTED 1 +#define TXN_COMMITTED 2 +#define TXN_NEED_ABORT 3 +#define TXN_PREPARED 4 +#define TXN_RUNNING 5 + u_int32_t status; /* Status of the transaction */ + +#define TXN_XA_ACTIVE 1 +#define TXN_XA_DEADLOCKED 2 +#define TXN_XA_IDLE 3 +#define TXN_XA_PREPARED 4 +#define TXN_XA_ROLLEDBACK 5 + u_int32_t xa_status; /* XA status */ + + u_int8_t gid[DB_GID_SIZE]; /* Global transaction ID */ + char name[51]; /* 50 bytes of name, nul termination */ +}; + +struct __db_txn_stat { + u_int32_t st_nrestores; /* number of restored transactions + after recovery. */ +#ifndef __TEST_DB_NO_STATISTICS + DB_LSN st_last_ckp; /* lsn of the last checkpoint */ + time_t st_time_ckp; /* time of last checkpoint */ + u_int32_t st_last_txnid; /* last transaction id given out */ + u_int32_t st_inittxns; /* inital txns allocated */ + u_int32_t st_maxtxns; /* maximum txns possible */ + uintmax_t st_naborts; /* number of aborted transactions */ + uintmax_t st_nbegins; /* number of begun transactions */ + uintmax_t st_ncommits; /* number of committed transactions */ + u_int32_t st_nactive; /* number of active transactions */ + u_int32_t st_nsnapshot; /* number of snapshot transactions */ + u_int32_t st_maxnactive; /* maximum active transactions */ + u_int32_t st_maxnsnapshot; /* maximum snapshot transactions */ + uintmax_t st_region_wait; /* Region lock granted after wait. */ + uintmax_t st_region_nowait; /* Region lock granted without wait. */ + roff_t st_regsize; /* Region size. */ + DB_TXN_ACTIVE *st_txnarray; /* array of active transactions */ +#endif +}; + +#define DB_TXN_TOKEN_SIZE 20 +struct __db_txn_token { + u_int8_t buf[DB_TXN_TOKEN_SIZE]; +}; + +/******************************************************* + * Replication. + *******************************************************/ +/* Special, out-of-band environment IDs. */ +#define DB_EID_BROADCAST -1 +#define DB_EID_INVALID -2 +#define DB_EID_MASTER -3 + +#define DB_REP_DEFAULT_PRIORITY 100 + +/* Acknowledgement policies; 0 reserved as OOB. */ +#define DB_REPMGR_ACKS_ALL 1 +#define DB_REPMGR_ACKS_ALL_AVAILABLE 2 +#define DB_REPMGR_ACKS_ALL_PEERS 3 +#define DB_REPMGR_ACKS_NONE 4 +#define DB_REPMGR_ACKS_ONE 5 +#define DB_REPMGR_ACKS_ONE_PEER 6 +#define DB_REPMGR_ACKS_QUORUM 7 + +/* Replication timeout configuration values. */ +#define DB_REP_ACK_TIMEOUT 1 /* RepMgr acknowledgements. */ +#define DB_REP_CHECKPOINT_DELAY 2 /* Master checkpoint delay. */ +#define DB_REP_CONNECTION_RETRY 3 /* RepMgr connections. */ +#define DB_REP_ELECTION_RETRY 4 /* RepMgr elect retries. */ +#define DB_REP_ELECTION_TIMEOUT 5 /* Rep normal elections. */ +#define DB_REP_FULL_ELECTION_TIMEOUT 6 /* Rep full elections. */ +#define DB_REP_HEARTBEAT_MONITOR 7 /* RepMgr client HB monitor. */ +#define DB_REP_HEARTBEAT_SEND 8 /* RepMgr master send freq. */ +#define DB_REP_LEASE_TIMEOUT 9 /* Master leases. */ + +/* + * Event notification types. (Tcl testing interface currently assumes there are + * no more than 32 of these.) + */ +#define DB_EVENT_PANIC 0 +#define DB_EVENT_REG_ALIVE 1 +#define DB_EVENT_REG_PANIC 2 +#define DB_EVENT_REP_CLIENT 3 +#define DB_EVENT_REP_CONNECT_BROKEN 4 +#define DB_EVENT_REP_CONNECT_ESTD 5 +#define DB_EVENT_REP_CONNECT_TRY_FAILED 6 +#define DB_EVENT_REP_DUPMASTER 7 +#define DB_EVENT_REP_ELECTED 8 +#define DB_EVENT_REP_ELECTION_FAILED 9 +#define DB_EVENT_REP_INIT_DONE 10 +#define DB_EVENT_REP_JOIN_FAILURE 11 +#define DB_EVENT_REP_LOCAL_SITE_REMOVED 12 +#define DB_EVENT_REP_MASTER 13 +#define DB_EVENT_REP_MASTER_FAILURE 14 +#define DB_EVENT_REP_NEWMASTER 15 +#define DB_EVENT_REP_PERM_FAILED 16 +#define DB_EVENT_REP_SITE_ADDED 17 +#define DB_EVENT_REP_SITE_REMOVED 18 +#define DB_EVENT_REP_STARTUPDONE 19 +#define DB_EVENT_REP_WOULD_ROLLBACK 20 /* Undocumented; C API only. */ +#define DB_EVENT_WRITE_FAILED 21 +#define DB_EVENT_NO_SUCH_EVENT 0xffffffff /* OOB sentinel value */ + +/* Replication Manager site status. */ +struct __db_repmgr_site { + int eid; + char *host; + u_int port; + +#define DB_REPMGR_CONNECTED 1 +#define DB_REPMGR_DISCONNECTED 2 + u_int32_t status; + +#define DB_REPMGR_ISPEER 0x01 + u_int32_t flags; +}; + +/* Replication statistics. */ +struct __db_rep_stat { /* SHARED */ + /* !!! + * Many replication statistics fields cannot be protected by a mutex + * without an unacceptable performance penalty, since most message + * processing is done without the need to hold a region-wide lock. + * Fields whose comments end with a '+' may be updated without holding + * the replication or log mutexes (as appropriate), and thus may be + * off somewhat (or, on unreasonable architectures under unlucky + * circumstances, garbaged). + */ + u_int32_t st_startup_complete; /* Site completed client sync-up. */ +#ifndef __TEST_DB_NO_STATISTICS + uintmax_t st_log_queued; /* Log records currently queued.+ */ + u_int32_t st_status; /* Current replication status. */ + DB_LSN st_next_lsn; /* Next LSN to use or expect. */ + DB_LSN st_waiting_lsn; /* LSN we're awaiting, if any. */ + DB_LSN st_max_perm_lsn; /* Maximum permanent LSN. */ + db_pgno_t st_next_pg; /* Next pg we expect. */ + db_pgno_t st_waiting_pg; /* pg we're awaiting, if any. */ + + u_int32_t st_dupmasters; /* # of times a duplicate master + condition was detected.+ */ + db_ssize_t st_env_id; /* Current environment ID. */ + u_int32_t st_env_priority; /* Current environment priority. */ + uintmax_t st_bulk_fills; /* Bulk buffer fills. */ + uintmax_t st_bulk_overflows; /* Bulk buffer overflows. */ + uintmax_t st_bulk_records; /* Bulk records stored. */ + uintmax_t st_bulk_transfers; /* Transfers of bulk buffers. */ + uintmax_t st_client_rerequests;/* Number of forced rerequests. */ + uintmax_t st_client_svc_req; /* Number of client service requests + received by this client. */ + uintmax_t st_client_svc_miss; /* Number of client service requests + missing on this client. */ + u_int32_t st_gen; /* Current generation number. */ + u_int32_t st_egen; /* Current election gen number. */ + uintmax_t st_lease_chk; /* Lease validity checks. */ + uintmax_t st_lease_chk_misses; /* Lease checks invalid. */ + uintmax_t st_lease_chk_refresh; /* Lease refresh attempts. */ + uintmax_t st_lease_sends; /* Lease messages sent live. */ + + uintmax_t st_log_duplicated; /* Log records received multiply.+ */ + uintmax_t st_log_queued_max; /* Max. log records queued at once.+ */ + uintmax_t st_log_queued_total; /* Total # of log recs. ever queued.+ */ + uintmax_t st_log_records; /* Log records received and put.+ */ + uintmax_t st_log_requested; /* Log recs. missed and requested.+ */ + db_ssize_t st_master; /* Env. ID of the current master. */ + uintmax_t st_master_changes; /* # of times we've switched masters. */ + uintmax_t st_msgs_badgen; /* Messages with a bad generation #.+ */ + uintmax_t st_msgs_processed; /* Messages received and processed.+ */ + uintmax_t st_msgs_recover; /* Messages ignored because this site + was a client in recovery.+ */ + uintmax_t st_msgs_send_failures;/* # of failed message sends.+ */ + uintmax_t st_msgs_sent; /* # of successful message sends.+ */ + uintmax_t st_newsites; /* # of NEWSITE msgs. received.+ */ + u_int32_t st_nsites; /* Current number of sites we will + assume during elections. */ + uintmax_t st_nthrottles; /* # of times we were throttled. */ + uintmax_t st_outdated; /* # of times we detected and returned + an OUTDATED condition.+ */ + uintmax_t st_pg_duplicated; /* Pages received multiply.+ */ + uintmax_t st_pg_records; /* Pages received and stored.+ */ + uintmax_t st_pg_requested; /* Pages missed and requested.+ */ + uintmax_t st_txns_applied; /* # of transactions applied.+ */ + uintmax_t st_startsync_delayed;/* # of STARTSYNC msgs delayed.+ */ + + /* Elections generally. */ + uintmax_t st_elections; /* # of elections held.+ */ + uintmax_t st_elections_won; /* # of elections won by this site.+ */ + + /* Statistics about an in-progress election. */ + db_ssize_t st_election_cur_winner; /* Current front-runner. */ + u_int32_t st_election_gen; /* Election generation number. */ + u_int32_t st_election_datagen; /* Election data generation number. */ + DB_LSN st_election_lsn; /* Max. LSN of current winner. */ + u_int32_t st_election_nsites; /* # of "registered voters". */ + u_int32_t st_election_nvotes; /* # of "registered voters" needed. */ + u_int32_t st_election_priority; /* Current election priority. */ + int32_t st_election_status; /* Current election status. */ + u_int32_t st_election_tiebreaker;/* Election tiebreaker value. */ + u_int32_t st_election_votes; /* Votes received in this round. */ + u_int32_t st_election_sec; /* Last election time seconds. */ + u_int32_t st_election_usec; /* Last election time useconds. */ + u_int32_t st_max_lease_sec; /* Maximum lease timestamp seconds. */ + u_int32_t st_max_lease_usec; /* Maximum lease timestamp useconds. */ + + /* Undocumented statistics only used by the test system. */ +#ifdef CONFIG_TEST + u_int32_t st_filefail_cleanups; /* # of FILE_FAIL cleanups done. */ +#endif +#endif +}; + +/* Replication Manager statistics. */ +struct __db_repmgr_stat { /* SHARED */ + uintmax_t st_perm_failed; /* # of insufficiently ack'ed msgs. */ + uintmax_t st_msgs_queued; /* # msgs queued for network delay. */ + uintmax_t st_msgs_dropped; /* # msgs discarded due to excessive + queue length. */ + uintmax_t st_connection_drop; /* Existing connections dropped. */ + uintmax_t st_connect_fail; /* Failed new connection attempts. */ + uintmax_t st_elect_threads; /* # of active election threads. */ + uintmax_t st_max_elect_threads; /* Max concurrent e-threads ever. */ +}; + +/* Replication Manager connection error. */ +struct __db_repmgr_conn_err { + int eid; /* Replication Environment ID. */ + int error; /* System networking error code. */ +}; + +/******************************************************* + * Sequences. + *******************************************************/ +/* + * The storage record for a sequence. + */ +struct __db_seq_record { + u_int32_t seq_version; /* Version size/number. */ + u_int32_t flags; /* DB_SEQ_XXX Flags. */ + db_seq_t seq_value; /* Current value. */ + db_seq_t seq_max; /* Max permitted. */ + db_seq_t seq_min; /* Min permitted. */ +}; + +/* + * Handle for a sequence object. + */ +struct __db_sequence { + DB *seq_dbp; /* DB handle for this sequence. */ + db_mutex_t mtx_seq; /* Mutex if sequence is threaded. */ + DB_SEQ_RECORD *seq_rp; /* Pointer to current data. */ + DB_SEQ_RECORD seq_record; /* Data from DB_SEQUENCE. */ + int32_t seq_cache_size; /* Number of values cached. */ + db_seq_t seq_last_value; /* Last value cached. */ + db_seq_t seq_prev_value; /* Last value returned. */ + DBT seq_key; /* DBT pointing to sequence key. */ + DBT seq_data; /* DBT pointing to seq_record. */ + + /* API-private structure: used by C++ and Java. */ + void *api_internal; + + /* DB_SEQUENCE PUBLIC HANDLE LIST BEGIN */ + int (*close) __P((DB_SEQUENCE *, u_int32_t)); + int (*get) __P((DB_SEQUENCE *, + DB_TXN *, int32_t, db_seq_t *, u_int32_t)); + int (*get_cachesize) __P((DB_SEQUENCE *, int32_t *)); + int (*get_db) __P((DB_SEQUENCE *, DB **)); + int (*get_flags) __P((DB_SEQUENCE *, u_int32_t *)); + int (*get_key) __P((DB_SEQUENCE *, DBT *)); + int (*get_range) __P((DB_SEQUENCE *, + db_seq_t *, db_seq_t *)); + int (*initial_value) __P((DB_SEQUENCE *, db_seq_t)); + int (*open) __P((DB_SEQUENCE *, + DB_TXN *, DBT *, u_int32_t)); + int (*remove) __P((DB_SEQUENCE *, DB_TXN *, u_int32_t)); + int (*set_cachesize) __P((DB_SEQUENCE *, int32_t)); + int (*set_flags) __P((DB_SEQUENCE *, u_int32_t)); + int (*set_range) __P((DB_SEQUENCE *, db_seq_t, db_seq_t)); + int (*stat) __P((DB_SEQUENCE *, + DB_SEQUENCE_STAT **, u_int32_t)); + int (*stat_print) __P((DB_SEQUENCE *, u_int32_t)); + /* DB_SEQUENCE PUBLIC HANDLE LIST END */ +}; + +struct __db_seq_stat { /* SHARED */ + uintmax_t st_wait; /* Sequence lock granted w/o wait. */ + uintmax_t st_nowait; /* Sequence lock granted after wait. */ + db_seq_t st_current; /* Current value in db. */ + db_seq_t st_value; /* Current cached value. */ + db_seq_t st_last_value; /* Last cached value. */ + db_seq_t st_min; /* Minimum value. */ + db_seq_t st_max; /* Maximum value. */ + int32_t st_cache_size; /* Cache size. */ + u_int32_t st_flags; /* Flag value. */ +}; + +/******************************************************* + * Access methods. + *******************************************************/ +/* + * Any new methods need to retain the original numbering. The type + * is written in a log record so must be maintained. + */ +typedef enum { + DB_BTREE=1, + DB_HASH=2, + DB_HEAP=6, + DB_RECNO=3, + DB_QUEUE=4, + DB_UNKNOWN=5 /* Figure it out on open. */ +} DBTYPE; + +#define DB_RENAMEMAGIC 0x030800 /* File has been renamed. */ + +#define DB_BTREEVERSION 9 /* Current btree version. */ +#define DB_BTREEOLDVER 8 /* Oldest btree version supported. */ +#define DB_BTREEMAGIC 0x053162 + +#define DB_HASHVERSION 9 /* Current hash version. */ +#define DB_HASHOLDVER 7 /* Oldest hash version supported. */ +#define DB_HASHMAGIC 0x061561 + +#define DB_HEAPVERSION 1 /* Current heap version. */ +#define DB_HEAPOLDVER 1 /* Oldest heap version supported. */ +#define DB_HEAPMAGIC 0x074582 + +#define DB_QAMVERSION 4 /* Current queue version. */ +#define DB_QAMOLDVER 3 /* Oldest queue version supported. */ +#define DB_QAMMAGIC 0x042253 + +#define DB_SEQUENCE_VERSION 2 /* Current sequence version. */ +#define DB_SEQUENCE_OLDVER 1 /* Oldest sequence version supported. */ + +/* + * DB access method and cursor operation values. Each value is an operation + * code to which additional bit flags are added. + */ +#define DB_AFTER 1 /* Dbc.put */ +#define DB_APPEND 2 /* Db.put */ +#define DB_BEFORE 3 /* Dbc.put */ +#define DB_CONSUME 4 /* Db.get */ +#define DB_CONSUME_WAIT 5 /* Db.get */ +#define DB_CURRENT 6 /* Dbc.get, Dbc.put, DbLogc.get */ +#define DB_FIRST 7 /* Dbc.get, DbLogc->get */ +#define DB_GET_BOTH 8 /* Db.get, Dbc.get */ +#define DB_GET_BOTHC 9 /* Dbc.get (internal) */ +#define DB_GET_BOTH_RANGE 10 /* Db.get, Dbc.get */ +#define DB_GET_RECNO 11 /* Dbc.get */ +#define DB_JOIN_ITEM 12 /* Dbc.get; don't do primary lookup */ +#define DB_KEYFIRST 13 /* Dbc.put */ +#define DB_KEYLAST 14 /* Dbc.put */ +#define DB_LAST 15 /* Dbc.get, DbLogc->get */ +#define DB_NEXT 16 /* Dbc.get, DbLogc->get */ +#define DB_NEXT_DUP 17 /* Dbc.get */ +#define DB_NEXT_NODUP 18 /* Dbc.get */ +#define DB_NODUPDATA 19 /* Db.put, Dbc.put */ +#define DB_NOOVERWRITE 20 /* Db.put */ +#define DB_OVERWRITE_DUP 21 /* Dbc.put, Db.put; no DB_KEYEXIST */ +#define DB_POSITION 22 /* Dbc.dup */ +#define DB_PREV 23 /* Dbc.get, DbLogc->get */ +#define DB_PREV_DUP 24 /* Dbc.get */ +#define DB_PREV_NODUP 25 /* Dbc.get */ +#define DB_SET 26 /* Dbc.get, DbLogc->get */ +#define DB_SET_RANGE 27 /* Dbc.get */ +#define DB_SET_RECNO 28 /* Db.get, Dbc.get */ +#define DB_UPDATE_SECONDARY 29 /* Dbc.get, Dbc.del (internal) */ +#define DB_SET_LTE 30 /* Dbc.get (internal) */ +#define DB_GET_BOTH_LTE 31 /* Dbc.get (internal) */ + +/* This has to change when the max opcode hits 255. */ +#define DB_OPFLAGS_MASK 0x000000ff /* Mask for operations flags. */ + +/* + * DB (user visible) error return codes. + * + * !!! + * We don't want our error returns to conflict with other packages where + * possible, so pick a base error value that's hopefully not common. We + * document that we own the error name space from -30,800 to -30,999. + */ +/* DB (public) error return codes. */ +#define DB_BUFFER_SMALL (-30999)/* User memory too small for return. */ +#define DB_DONOTINDEX (-30998)/* "Null" return from 2ndary callbk. */ +#define DB_FOREIGN_CONFLICT (-30997)/* A foreign db constraint triggered. */ +#define DB_HEAP_FULL (-30996)/* No free space in a heap file. */ +#define DB_KEYEMPTY (-30995)/* Key/data deleted or never created. */ +#define DB_KEYEXIST (-30994)/* The key/data pair already exists. */ +#define DB_LOCK_DEADLOCK (-30993)/* Deadlock. */ +#define DB_LOCK_NOTGRANTED (-30992)/* Lock unavailable. */ +#define DB_LOG_BUFFER_FULL (-30991)/* In-memory log buffer full. */ +#define DB_LOG_VERIFY_BAD (-30990)/* Log verification failed. */ +#define DB_NOSERVER (-30989)/* Server panic return. */ +#define DB_NOTFOUND (-30988)/* Key/data pair not found (EOF). */ +#define DB_OLD_VERSION (-30987)/* Out-of-date version. */ +#define DB_PAGE_NOTFOUND (-30986)/* Requested page not found. */ +#define DB_REP_DUPMASTER (-30985)/* There are two masters. */ +#define DB_REP_HANDLE_DEAD (-30984)/* Rolled back a commit. */ +#define DB_REP_HOLDELECTION (-30983)/* Time to hold an election. */ +#define DB_REP_IGNORE (-30982)/* This msg should be ignored.*/ +#define DB_REP_ISPERM (-30981)/* Cached not written perm written.*/ +#define DB_REP_JOIN_FAILURE (-30980)/* Unable to join replication group. */ +#define DB_REP_LEASE_EXPIRED (-30979)/* Master lease has expired. */ +#define DB_REP_LOCKOUT (-30978)/* API/Replication lockout now. */ +#define DB_REP_NEWSITE (-30977)/* New site entered system. */ +#define DB_REP_NOTPERM (-30976)/* Permanent log record not written. */ +#define DB_REP_UNAVAIL (-30975)/* Site cannot currently be reached. */ +#define DB_REP_WOULDROLLBACK (-30974)/* UNDOC: rollback inhibited by app. */ +#define DB_RUNRECOVERY (-30973)/* Panic return. */ +#define DB_SECONDARY_BAD (-30972)/* Secondary index corrupt. */ +#define DB_TIMEOUT (-30971)/* Timed out on read consistency. */ +#define DB_VERIFY_BAD (-30970)/* Verify failed; bad format. */ +#define DB_VERSION_MISMATCH (-30969)/* Environment version mismatch. */ + +/* DB (private) error return codes. */ +#define DB_ALREADY_ABORTED (-30899) +#define DB_DELETED (-30898)/* Recovery file marked deleted. */ +#define DB_EVENT_NOT_HANDLED (-30897)/* Forward event to application. */ +#define DB_NEEDSPLIT (-30896)/* Page needs to be split. */ +#define DB_REP_BULKOVF (-30895)/* Rep bulk buffer overflow. */ +#define DB_REP_LOGREADY (-30894)/* Rep log ready for recovery. */ +#define DB_REP_NEWMASTER (-30893)/* We have learned of a new master. */ +#define DB_REP_PAGEDONE (-30892)/* This page was already done. */ +#define DB_SURPRISE_KID (-30891)/* Child commit where parent + didn't know it was a parent. */ +#define DB_SWAPBYTES (-30890)/* Database needs byte swapping. */ +#define DB_TXN_CKP (-30889)/* Encountered ckp record in log. */ +#define DB_VERIFY_FATAL (-30888)/* DB->verify cannot proceed. */ + +/* Database handle. */ +struct __db { + /******************************************************* + * Public: owned by the application. + *******************************************************/ + u_int32_t pgsize; /* Database logical page size. */ + DB_CACHE_PRIORITY priority; /* Database priority in cache. */ + + /* Callbacks. */ + int (*db_append_recno) __P((DB *, DBT *, db_recno_t)); + void (*db_feedback) __P((DB *, int, int)); + int (*dup_compare) __P((DB *, const DBT *, const DBT *)); + + void *app_private; /* Application-private handle. */ + + /******************************************************* + * Private: owned by DB. + *******************************************************/ + DB_ENV *dbenv; /* Backing public environment. */ + ENV *env; /* Backing private environment. */ + + DBTYPE type; /* DB access method type. */ + + DB_MPOOLFILE *mpf; /* Backing buffer pool. */ + + db_mutex_t mutex; /* Synchronization for free threading */ + + char *fname, *dname; /* File/database passed to DB->open. */ + const char *dirname; /* Directory of DB file. */ + u_int32_t open_flags; /* Flags passed to DB->open. */ + + u_int8_t fileid[DB_FILE_ID_LEN];/* File's unique ID for locking. */ + + u_int32_t adj_fileid; /* File's unique ID for curs. adj. */ + +#define DB_LOGFILEID_INVALID -1 + FNAME *log_filename; /* File's naming info for logging. */ + + db_pgno_t meta_pgno; /* Meta page number */ + DB_LOCKER *locker; /* Locker for handle locking. */ + DB_LOCKER *cur_locker; /* Current handle lock holder. */ + DB_TXN *cur_txn; /* Opening transaction. */ + DB_LOCKER *associate_locker; /* Locker for DB->associate call. */ + DB_LOCK handle_lock; /* Lock held on this handle. */ + + time_t timestamp; /* Handle timestamp for replication. */ + u_int32_t fid_gen; /* Rep generation number for fids. */ + + /* + * Returned data memory for DB->get() and friends. + */ + DBT my_rskey; /* Secondary key. */ + DBT my_rkey; /* [Primary] key. */ + DBT my_rdata; /* Data. */ + + /* + * !!! + * Some applications use DB but implement their own locking outside of + * DB. If they're using fcntl(2) locking on the underlying database + * file, and we open and close a file descriptor for that file, we will + * discard their locks. The DB_FCNTL_LOCKING flag to DB->open is an + * undocumented interface to support this usage which leaves any file + * descriptors we open until DB->close. This will only work with the + * DB->open interface and simple caches, e.g., creating a transaction + * thread may open/close file descriptors this flag doesn't protect. + * Locking with fcntl(2) on a file that you don't own is a very, very + * unsafe thing to do. 'Nuff said. + */ + DB_FH *saved_open_fhp; /* Saved file handle. */ + + /* + * Linked list of DBP's, linked from the ENV, used to keep track + * of all open db handles for cursor adjustment. + * + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_ENTRY(__db) dblistlinks; + */ + struct { + struct __db *tqe_next; + struct __db **tqe_prev; + } dblistlinks; + + /* + * Cursor queues. + * + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_HEAD(__cq_fq, __dbc) free_queue; + * TAILQ_HEAD(__cq_aq, __dbc) active_queue; + * TAILQ_HEAD(__cq_jq, __dbc) join_queue; + */ + struct __cq_fq { + struct __dbc *tqh_first; + struct __dbc **tqh_last; + } free_queue; + struct __cq_aq { + struct __dbc *tqh_first; + struct __dbc **tqh_last; + } active_queue; + struct __cq_jq { + struct __dbc *tqh_first; + struct __dbc **tqh_last; + } join_queue; + + /* + * Secondary index support. + * + * Linked list of secondary indices -- set in the primary. + * + * !!! + * Explicit representations of structures from queue.h. + * LIST_HEAD(s_secondaries, __db); + */ + struct { + struct __db *lh_first; + } s_secondaries; + + /* + * List entries for secondaries, and reference count of how many + * threads are updating this secondary (see Dbc.put). + * + * !!! + * Note that these are synchronized by the primary's mutex, but + * filled in in the secondaries. + * + * !!! + * Explicit representations of structures from queue.h. + * LIST_ENTRY(__db) s_links; + */ + struct { + struct __db *le_next; + struct __db **le_prev; + } s_links; + u_int32_t s_refcnt; + + /* Secondary callback and free functions -- set in the secondary. */ + int (*s_callback) __P((DB *, const DBT *, const DBT *, DBT *)); + + /* Reference to primary -- set in the secondary. */ + DB *s_primary; + +#define DB_ASSOC_IMMUTABLE_KEY 0x00000001 /* Secondary key is immutable. */ +#define DB_ASSOC_CREATE 0x00000002 /* Secondary db populated on open. */ + + /* Flags passed to associate -- set in the secondary. */ + u_int32_t s_assoc_flags; + + /* + * Foreign key support. + * + * Linked list of primary dbs -- set in the foreign db + * + * !!! + * Explicit representations of structures from queue.h. + * LIST_HEAD(f_primaries, __db); + */ + struct { + struct __db_foreign_info *lh_first; + } f_primaries; + + /* + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_ENTRY(__db) felink; + * + * Links in a list of DBs involved in file extension + * during a transaction. These are to be used only while the + * metadata is locked. + */ + struct { + struct __db *tqe_next; + struct __db **tqe_prev; + } felink; + + /* Reference to foreign -- set in the secondary. */ + DB *s_foreign; + + /* API-private structure: used by DB 1.85, C++, Java, Perl and Tcl */ + void *api_internal; + + /* Subsystem-private structure. */ + void *bt_internal; /* Btree/Recno access method. */ + void *h_internal; /* Hash access method. */ + void *heap_internal; /* Heap access method. */ + void *p_internal; /* Partition informaiton. */ + void *q_internal; /* Queue access method. */ + + /* DB PUBLIC HANDLE LIST BEGIN */ + int (*associate) __P((DB *, DB_TXN *, DB *, + int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t)); + int (*associate_foreign) __P((DB *, DB *, + int (*)(DB *, const DBT *, DBT *, const DBT *, int *), + u_int32_t)); + int (*close) __P((DB *, u_int32_t)); + int (*compact) __P((DB *, + DB_TXN *, DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *)); + int (*cursor) __P((DB *, DB_TXN *, DBC **, u_int32_t)); + int (*del) __P((DB *, DB_TXN *, DBT *, u_int32_t)); + void (*err) __P((DB *, int, const char *, ...)); + void (*errx) __P((DB *, const char *, ...)); + int (*exists) __P((DB *, DB_TXN *, DBT *, u_int32_t)); + int (*fd) __P((DB *, int *)); + int (*get) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + int (*get_alloc) __P((DB *, void *(**)(size_t), + void *(**)(void *, size_t), void (**)(void *))); + int (*get_append_recno) __P((DB *, int (**)(DB *, DBT *, db_recno_t))); + int (*get_assoc_flags) __P((DB *, u_int32_t *)); + int (*get_bt_compare) + __P((DB *, int (**)(DB *, const DBT *, const DBT *))); + int (*get_bt_compress) __P((DB *, + int (**)(DB *, + const DBT *, const DBT *, const DBT *, const DBT *, DBT *), + int (**)(DB *, const DBT *, const DBT *, DBT *, DBT *, DBT *))); + int (*get_bt_minkey) __P((DB *, u_int32_t *)); + int (*get_bt_prefix) + __P((DB *, size_t (**)(DB *, const DBT *, const DBT *))); + int (*get_byteswapped) __P((DB *, int *)); + int (*get_cachesize) __P((DB *, u_int32_t *, u_int32_t *, int *)); + int (*get_create_dir) __P((DB *, const char **)); + int (*get_dbname) __P((DB *, const char **, const char **)); + int (*get_dup_compare) + __P((DB *, int (**)(DB *, const DBT *, const DBT *))); + int (*get_encrypt_flags) __P((DB *, u_int32_t *)); + DB_ENV *(*get_env) __P((DB *)); + void (*get_errcall) __P((DB *, + void (**)(const DB_ENV *, const char *, const char *))); + void (*get_errfile) __P((DB *, FILE **)); + void (*get_errpfx) __P((DB *, const char **)); + int (*get_feedback) __P((DB *, void (**)(DB *, int, int))); + int (*get_flags) __P((DB *, u_int32_t *)); + int (*get_h_compare) + __P((DB *, int (**)(DB *, const DBT *, const DBT *))); + int (*get_h_ffactor) __P((DB *, u_int32_t *)); + int (*get_h_hash) + __P((DB *, u_int32_t (**)(DB *, const void *, u_int32_t))); + int (*get_h_nelem) __P((DB *, u_int32_t *)); + int (*get_heapsize) __P((DB *, u_int32_t *, u_int32_t *)); + int (*get_lorder) __P((DB *, int *)); + DB_MPOOLFILE *(*get_mpf) __P((DB *)); + void (*get_msgcall) __P((DB *, + void (**)(const DB_ENV *, const char *))); + void (*get_msgfile) __P((DB *, FILE **)); + int (*get_multiple) __P((DB *)); + int (*get_open_flags) __P((DB *, u_int32_t *)); + int (*get_pagesize) __P((DB *, u_int32_t *)); + int (*get_partition_callback) __P((DB *, + u_int32_t *, u_int32_t (**)(DB *, DBT *key))); + int (*get_partition_dirs) __P((DB *, const char ***)); + int (*get_partition_keys) __P((DB *, u_int32_t *, DBT **)); + int (*get_priority) __P((DB *, DB_CACHE_PRIORITY *)); + int (*get_q_extentsize) __P((DB *, u_int32_t *)); + int (*get_re_delim) __P((DB *, int *)); + int (*get_re_len) __P((DB *, u_int32_t *)); + int (*get_re_pad) __P((DB *, int *)); + int (*get_re_source) __P((DB *, const char **)); + int (*get_transactional) __P((DB *)); + int (*get_type) __P((DB *, DBTYPE *)); + int (*join) __P((DB *, DBC **, DBC **, u_int32_t)); + int (*key_range) + __P((DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t)); + int (*open) __P((DB *, + DB_TXN *, const char *, const char *, DBTYPE, u_int32_t, int)); + int (*pget) __P((DB *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t)); + int (*put) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + int (*remove) __P((DB *, const char *, const char *, u_int32_t)); + int (*rename) __P((DB *, + const char *, const char *, const char *, u_int32_t)); + int (*set_alloc) __P((DB *, void *(*)(size_t), + void *(*)(void *, size_t), void (*)(void *))); + int (*set_append_recno) __P((DB *, int (*)(DB *, DBT *, db_recno_t))); + int (*set_bt_compare) + __P((DB *, int (*)(DB *, const DBT *, const DBT *))); + int (*set_bt_compress) __P((DB *, + int (*)(DB *, const DBT *, const DBT *, const DBT *, const DBT *, DBT *), + int (*)(DB *, const DBT *, const DBT *, DBT *, DBT *, DBT *))); + int (*set_bt_minkey) __P((DB *, u_int32_t)); + int (*set_bt_prefix) + __P((DB *, size_t (*)(DB *, const DBT *, const DBT *))); + int (*set_cachesize) __P((DB *, u_int32_t, u_int32_t, int)); + int (*set_create_dir) __P((DB *, const char *)); + int (*set_dup_compare) + __P((DB *, int (*)(DB *, const DBT *, const DBT *))); + int (*set_encrypt) __P((DB *, const char *, u_int32_t)); + void (*set_errcall) __P((DB *, + void (*)(const DB_ENV *, const char *, const char *))); + void (*set_errfile) __P((DB *, FILE *)); + void (*set_errpfx) __P((DB *, const char *)); + int (*set_feedback) __P((DB *, void (*)(DB *, int, int))); + int (*set_flags) __P((DB *, u_int32_t)); + int (*set_h_compare) + __P((DB *, int (*)(DB *, const DBT *, const DBT *))); + int (*set_h_ffactor) __P((DB *, u_int32_t)); + int (*set_h_hash) + __P((DB *, u_int32_t (*)(DB *, const void *, u_int32_t))); + int (*set_h_nelem) __P((DB *, u_int32_t)); + int (*set_heapsize) __P((DB *, u_int32_t, u_int32_t, u_int32_t)); + int (*set_lorder) __P((DB *, int)); + void (*set_msgcall) __P((DB *, void (*)(const DB_ENV *, const char *))); + void (*set_msgfile) __P((DB *, FILE *)); + int (*set_pagesize) __P((DB *, u_int32_t)); + int (*set_paniccall) __P((DB *, void (*)(DB_ENV *, int))); + int (*set_partition) __P((DB *, + u_int32_t, DBT *, u_int32_t (*)(DB *, DBT *key))); + int (*set_partition_dirs) __P((DB *, const char **)); + int (*set_priority) __P((DB *, DB_CACHE_PRIORITY)); + int (*set_q_extentsize) __P((DB *, u_int32_t)); + int (*set_re_delim) __P((DB *, int)); + int (*set_re_len) __P((DB *, u_int32_t)); + int (*set_re_pad) __P((DB *, int)); + int (*set_re_source) __P((DB *, const char *)); + int (*sort_multiple) __P((DB *, DBT *, DBT *, u_int32_t)); + int (*stat) __P((DB *, DB_TXN *, void *, u_int32_t)); + int (*stat_print) __P((DB *, u_int32_t)); + int (*sync) __P((DB *, u_int32_t)); + int (*truncate) __P((DB *, DB_TXN *, u_int32_t *, u_int32_t)); + int (*upgrade) __P((DB *, const char *, u_int32_t)); + int (*verify) + __P((DB *, const char *, const char *, FILE *, u_int32_t)); + /* DB PUBLIC HANDLE LIST END */ + + /* DB PRIVATE HANDLE LIST BEGIN */ + int (*dump) __P((DB *, const char *, + int (*)(void *, const void *), void *, int, int)); + int (*db_am_remove) __P((DB *, DB_THREAD_INFO *, + DB_TXN *, const char *, const char *, u_int32_t)); + int (*db_am_rename) __P((DB *, DB_THREAD_INFO *, + DB_TXN *, const char *, const char *, const char *)); + /* DB PRIVATE HANDLE LIST END */ + + /* + * Never called; these are a place to save function pointers + * so that we can undo an associate. + */ + int (*stored_get) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + int (*stored_close) __P((DB *, u_int32_t)); + + /* Alternative handle close function, used by C++ API. */ + int (*alt_close) __P((DB *, u_int32_t)); + +#define DB_OK_BTREE 0x01 +#define DB_OK_HASH 0x02 +#define DB_OK_HEAP 0x04 +#define DB_OK_QUEUE 0x08 +#define DB_OK_RECNO 0x10 + u_int32_t am_ok; /* Legal AM choices. */ + + /* + * This field really ought to be an AM_FLAG, but we have + * have run out of bits. If/when we decide to split up + * the flags, we can incorporate it. + */ + int preserve_fid; /* Do not free fileid on close. */ + +#define DB_AM_CHKSUM 0x00000001 /* Checksumming */ +#define DB_AM_COMPENSATE 0x00000002 /* Created by compensating txn */ +#define DB_AM_COMPRESS 0x00000004 /* Compressed BTree */ +#define DB_AM_CREATED 0x00000008 /* Database was created upon open */ +#define DB_AM_CREATED_MSTR 0x00000010 /* Encompassing file was created */ +#define DB_AM_DBM_ERROR 0x00000020 /* Error in DBM/NDBM database */ +#define DB_AM_DELIMITER 0x00000040 /* Variable length delimiter set */ +#define DB_AM_DISCARD 0x00000080 /* Discard any cached pages */ +#define DB_AM_DUP 0x00000100 /* DB_DUP */ +#define DB_AM_DUPSORT 0x00000200 /* DB_DUPSORT */ +#define DB_AM_ENCRYPT 0x00000400 /* Encryption */ +#define DB_AM_FIXEDLEN 0x00000800 /* Fixed-length records */ +#define DB_AM_INMEM 0x00001000 /* In-memory; no sync on close */ +#define DB_AM_INORDER 0x00002000 /* DB_INORDER */ +#define DB_AM_IN_RENAME 0x00004000 /* File is being renamed */ +#define DB_AM_NOT_DURABLE 0x00008000 /* Do not log changes */ +#define DB_AM_OPEN_CALLED 0x00010000 /* DB->open called */ +#define DB_AM_PAD 0x00020000 /* Fixed-length record pad */ +#define DB_AM_PGDEF 0x00040000 /* Page size was defaulted */ +#define DB_AM_RDONLY 0x00080000 /* Database is readonly */ +#define DB_AM_READ_UNCOMMITTED 0x00100000 /* Support degree 1 isolation */ +#define DB_AM_RECNUM 0x00200000 /* DB_RECNUM */ +#define DB_AM_RECOVER 0x00400000 /* DB opened by recovery routine */ +#define DB_AM_RENUMBER 0x00800000 /* DB_RENUMBER */ +#define DB_AM_REVSPLITOFF 0x01000000 /* DB_REVSPLITOFF */ +#define DB_AM_SECONDARY 0x02000000 /* Database is a secondary index */ +#define DB_AM_SNAPSHOT 0x04000000 /* DB_SNAPSHOT */ +#define DB_AM_SUBDB 0x08000000 /* Subdatabases supported */ +#define DB_AM_SWAP 0x10000000 /* Pages need to be byte-swapped */ +#define DB_AM_TXN 0x20000000 /* Opened in a transaction */ +#define DB_AM_VERIFYING 0x40000000 /* DB handle is in the verifier */ + u_int32_t orig_flags; /* Flags at open, for refresh */ + u_int32_t flags; +}; + +/* + * Macros for bulk operations. These are only intended for the C API. + * For C++, use DbMultiple*Iterator or DbMultiple*Builder. + * + * Bulk operations store multiple entries into a single DBT structure. The + * following macros assist with creating and reading these Multiple DBTs. + * + * The basic layout for single data items is: + * + * ------------------------------------------------------------------------- + * | data1 | ... | dataN | ..... |-1 | dNLen | dNOff | ... | d1Len | d1Off | + * ------------------------------------------------------------------------- + * + * For the DB_MULTIPLE_KEY* macros, the items are in key/data pairs, so data1 + * would be a key, and data2 its corresponding value (N is always even). + * + * For the DB_MULTIPLE_RECNO* macros, the record number is stored along with + * the len/off pair in the "header" section, and the list is zero terminated + * (since -1 is a valid record number): + * + * -------------------------------------------------------------------------- + * | d1 |..| dN |..| 0 | dNLen | dNOff | recnoN |..| d1Len | d1Off | recno1 | + * -------------------------------------------------------------------------- + */ +#define DB_MULTIPLE_INIT(pointer, dbt) \ + (pointer = (u_int8_t *)(dbt)->data + \ + (dbt)->ulen - sizeof(u_int32_t)) + +#define DB_MULTIPLE_NEXT(pointer, dbt, retdata, retdlen) \ + do { \ + u_int32_t *__p = (u_int32_t *)(pointer); \ + if (*__p == (u_int32_t)-1) { \ + retdata = NULL; \ + pointer = NULL; \ + break; \ + } \ + retdata = (u_int8_t *)(dbt)->data + *__p--; \ + retdlen = *__p--; \ + pointer = __p; \ + if (retdlen == 0 && retdata == (u_int8_t *)(dbt)->data) \ + retdata = NULL; \ + } while (0) + +#define DB_MULTIPLE_KEY_NEXT(pointer, dbt, retkey, retklen, retdata, retdlen) \ + do { \ + u_int32_t *__p = (u_int32_t *)(pointer); \ + if (*__p == (u_int32_t)-1) { \ + retdata = NULL; \ + retkey = NULL; \ + pointer = NULL; \ + break; \ + } \ + retkey = (u_int8_t *)(dbt)->data + *__p--; \ + retklen = *__p--; \ + retdata = (u_int8_t *)(dbt)->data + *__p--; \ + retdlen = *__p--; \ + pointer = __p; \ + } while (0) + +#define DB_MULTIPLE_RECNO_NEXT(pointer, dbt, recno, retdata, retdlen) \ + do { \ + u_int32_t *__p = (u_int32_t *)(pointer); \ + if (*__p == (u_int32_t)0) { \ + recno = 0; \ + retdata = NULL; \ + pointer = NULL; \ + break; \ + } \ + recno = *__p--; \ + retdata = (u_int8_t *)(dbt)->data + *__p--; \ + retdlen = *__p--; \ + pointer = __p; \ + } while (0) + +#define DB_MULTIPLE_WRITE_INIT(pointer, dbt) \ + do { \ + (dbt)->flags |= DB_DBT_BULK; \ + pointer = (u_int8_t *)(dbt)->data + \ + (dbt)->ulen - sizeof(u_int32_t); \ + *(u_int32_t *)(pointer) = (u_int32_t)-1; \ + } while (0) + +#define DB_MULTIPLE_RESERVE_NEXT(pointer, dbt, writedata, writedlen) \ + do { \ + u_int32_t *__p = (u_int32_t *)(pointer); \ + u_int32_t __off = ((pointer) == (u_int8_t *)(dbt)->data +\ + (dbt)->ulen - sizeof(u_int32_t)) ? 0 : __p[1] + __p[2];\ + if ((u_int8_t *)(dbt)->data + __off + (writedlen) > \ + (u_int8_t *)(__p - 2)) \ + writedata = NULL; \ + else { \ + writedata = (u_int8_t *)(dbt)->data + __off; \ + __p[0] = __off; \ + __p[-1] = (u_int32_t)(writedlen); \ + __p[-2] = (u_int32_t)-1; \ + pointer = __p - 2; \ + } \ + } while (0) + +#define DB_MULTIPLE_WRITE_NEXT(pointer, dbt, writedata, writedlen) \ + do { \ + void *__destd; \ + DB_MULTIPLE_RESERVE_NEXT((pointer), (dbt), \ + __destd, (writedlen)); \ + if (__destd == NULL) \ + pointer = NULL; \ + else \ + memcpy(__destd, (writedata), (writedlen)); \ + } while (0) + +#define DB_MULTIPLE_KEY_RESERVE_NEXT(pointer, dbt, writekey, writeklen, writedata, writedlen) \ + do { \ + u_int32_t *__p = (u_int32_t *)(pointer); \ + u_int32_t __off = ((pointer) == (u_int8_t *)(dbt)->data +\ + (dbt)->ulen - sizeof(u_int32_t)) ? 0 : __p[1] + __p[2];\ + if ((u_int8_t *)(dbt)->data + __off + (writeklen) + \ + (writedlen) > (u_int8_t *)(__p - 4)) { \ + writekey = NULL; \ + writedata = NULL; \ + } else { \ + writekey = (u_int8_t *)(dbt)->data + __off; \ + __p[0] = __off; \ + __p[-1] = (u_int32_t)(writeklen); \ + __p -= 2; \ + __off += (u_int32_t)(writeklen); \ + writedata = (u_int8_t *)(dbt)->data + __off; \ + __p[0] = __off; \ + __p[-1] = (u_int32_t)(writedlen); \ + __p[-2] = (u_int32_t)-1; \ + pointer = __p - 2; \ + } \ + } while (0) + +#define DB_MULTIPLE_KEY_WRITE_NEXT(pointer, dbt, writekey, writeklen, writedata, writedlen) \ + do { \ + void *__destk, *__destd; \ + DB_MULTIPLE_KEY_RESERVE_NEXT((pointer), (dbt), \ + __destk, (writeklen), __destd, (writedlen)); \ + if (__destk == NULL) \ + pointer = NULL; \ + else { \ + memcpy(__destk, (writekey), (writeklen)); \ + if (__destd != NULL) \ + memcpy(__destd, (writedata), (writedlen));\ + } \ + } while (0) + +#define DB_MULTIPLE_RECNO_WRITE_INIT(pointer, dbt) \ + do { \ + (dbt)->flags |= DB_DBT_BULK; \ + pointer = (u_int8_t *)(dbt)->data + \ + (dbt)->ulen - sizeof(u_int32_t); \ + *(u_int32_t *)(pointer) = 0; \ + } while (0) + +#define DB_MULTIPLE_RECNO_RESERVE_NEXT(pointer, dbt, recno, writedata, writedlen) \ + do { \ + u_int32_t *__p = (u_int32_t *)(pointer); \ + u_int32_t __off = ((pointer) == (u_int8_t *)(dbt)->data +\ + (dbt)->ulen - sizeof(u_int32_t)) ? 0 : __p[1] + __p[2]; \ + if (((u_int8_t *)(dbt)->data + __off) + (writedlen) > \ + (u_int8_t *)(__p - 3)) \ + writedata = NULL; \ + else { \ + writedata = (u_int8_t *)(dbt)->data + __off; \ + __p[0] = (u_int32_t)(recno); \ + __p[-1] = __off; \ + __p[-2] = (u_int32_t)(writedlen); \ + __p[-3] = 0; \ + pointer = __p - 3; \ + } \ + } while (0) + +#define DB_MULTIPLE_RECNO_WRITE_NEXT(pointer, dbt, recno, writedata, writedlen)\ + do { \ + void *__destd; \ + DB_MULTIPLE_RECNO_RESERVE_NEXT((pointer), (dbt), \ + (recno), __destd, (writedlen)); \ + if (__destd == NULL) \ + pointer = NULL; \ + else if ((writedlen) != 0) \ + memcpy(__destd, (writedata), (writedlen)); \ + } while (0) + +struct __db_heap_rid { + db_pgno_t pgno; /* Page number. */ + db_indx_t indx; /* Index in the offset table. */ +}; +#define DB_HEAP_RID_SZ (sizeof(db_pgno_t) + sizeof(db_indx_t)) + +/******************************************************* + * Access method cursors. + *******************************************************/ +struct __dbc { + DB *dbp; /* Backing database */ + DB_ENV *dbenv; /* Backing environment */ + ENV *env; /* Backing environment */ + + DB_THREAD_INFO *thread_info; /* Thread that owns this cursor. */ + DB_TXN *txn; /* Associated transaction. */ + DB_CACHE_PRIORITY priority; /* Priority in cache. */ + + /* + * Active/free cursor queues. + * + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_ENTRY(__dbc) links; + */ + struct { + DBC *tqe_next; + DBC **tqe_prev; + } links; + + /* + * Cursor queue of the owning transaction. + * + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_ENTRY(__dbc) txn_cursors; + */ + struct { + DBC *tqe_next; /* next element */ + DBC **tqe_prev; /* address of previous next element */ + } txn_cursors; + + /* + * The DBT *'s below are used by the cursor routines to return + * data to the user when DBT flags indicate that DB should manage + * the returned memory. They point at a DBT containing the buffer + * and length that will be used, and "belonging" to the handle that + * should "own" this memory. This may be a "my_*" field of this + * cursor--the default--or it may be the corresponding field of + * another cursor, a DB handle, a join cursor, etc. In general, it + * will be whatever handle the user originally used for the current + * DB interface call. + */ + DBT *rskey; /* Returned secondary key. */ + DBT *rkey; /* Returned [primary] key. */ + DBT *rdata; /* Returned data. */ + + DBT my_rskey; /* Space for returned secondary key. */ + DBT my_rkey; /* Space for returned [primary] key. */ + DBT my_rdata; /* Space for returned data. */ + + DB_LOCKER *lref; /* Reference to default locker. */ + DB_LOCKER *locker; /* Locker for this operation. */ + DBT lock_dbt; /* DBT referencing lock. */ + DB_LOCK_ILOCK lock; /* Object to be locked. */ + DB_LOCK mylock; /* CDB lock held on this cursor. */ + + DBTYPE dbtype; /* Cursor type. */ + + DBC_INTERNAL *internal; /* Access method private. */ + + /* DBC PUBLIC HANDLE LIST BEGIN */ + int (*close) __P((DBC *)); + int (*cmp) __P((DBC *, DBC *, int *, u_int32_t)); + int (*count) __P((DBC *, db_recno_t *, u_int32_t)); + int (*del) __P((DBC *, u_int32_t)); + int (*dup) __P((DBC *, DBC **, u_int32_t)); + int (*get) __P((DBC *, DBT *, DBT *, u_int32_t)); + int (*get_priority) __P((DBC *, DB_CACHE_PRIORITY *)); + int (*pget) __P((DBC *, DBT *, DBT *, DBT *, u_int32_t)); + int (*put) __P((DBC *, DBT *, DBT *, u_int32_t)); + int (*set_priority) __P((DBC *, DB_CACHE_PRIORITY)); + /* DBC PUBLIC HANDLE LIST END */ + + /* The following are the method names deprecated in the 4.6 release. */ + int (*c_close) __P((DBC *)); + int (*c_count) __P((DBC *, db_recno_t *, u_int32_t)); + int (*c_del) __P((DBC *, u_int32_t)); + int (*c_dup) __P((DBC *, DBC **, u_int32_t)); + int (*c_get) __P((DBC *, DBT *, DBT *, u_int32_t)); + int (*c_pget) __P((DBC *, DBT *, DBT *, DBT *, u_int32_t)); + int (*c_put) __P((DBC *, DBT *, DBT *, u_int32_t)); + + /* DBC PRIVATE HANDLE LIST BEGIN */ + int (*am_bulk) __P((DBC *, DBT *, u_int32_t)); + int (*am_close) __P((DBC *, db_pgno_t, int *)); + int (*am_del) __P((DBC *, u_int32_t)); + int (*am_destroy) __P((DBC *)); + int (*am_get) __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); + int (*am_put) __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); + int (*am_writelock) __P((DBC *)); + /* DBC PRIVATE HANDLE LIST END */ + +/* + * DBC_DONTLOCK and DBC_RECOVER are used during recovery and transaction + * abort. If a transaction is being aborted or recovered then DBC_RECOVER + * will be set and locking and logging will be disabled on this cursor. If + * we are performing a compensating transaction (e.g. free page processing) + * then DB_DONTLOCK will be set to inhibit locking, but logging will still + * be required. DB_DONTLOCK is also used if the whole database is locked. + */ +#define DBC_ACTIVE 0x00001 /* Cursor in use. */ +#define DBC_BULK 0x00002 /* Bulk update cursor. */ +#define DBC_DONTLOCK 0x00004 /* Don't lock on this cursor. */ +#define DBC_DOWNREV 0x00008 /* Down rev replication master. */ +#define DBC_DUPLICATE 0x00010 /* Create a duplicate cursor. */ +#define DBC_ERROR 0x00020 /* Error in this request. */ +#define DBC_FAMILY 0x00040 /* Part of a locker family. */ +#define DBC_FROM_DB_GET 0x00080 /* Called from the DB->get() method. */ +#define DBC_MULTIPLE 0x00100 /* Return Multiple data. */ +#define DBC_MULTIPLE_KEY 0x00200 /* Return Multiple keys and data. */ +#define DBC_OPD 0x00400 /* Cursor references off-page dups. */ +#define DBC_OWN_LID 0x00800 /* Free lock id on destroy. */ +#define DBC_PARTITIONED 0x01000 /* Cursor for a partitioned db. */ +#define DBC_READ_COMMITTED 0x02000 /* Cursor has degree 2 isolation. */ +#define DBC_READ_UNCOMMITTED 0x04000 /* Cursor has degree 1 isolation. */ +#define DBC_RECOVER 0x08000 /* Recovery cursor; don't log/lock. */ +#define DBC_RMW 0x10000 /* Acquire write flag in read op. */ +#define DBC_TRANSIENT 0x20000 /* Cursor is transient. */ +#define DBC_WAS_READ_COMMITTED 0x40000 /* Cursor holds a read commited lock. */ +#define DBC_WRITECURSOR 0x80000 /* Cursor may be used to write (CDB). */ +#define DBC_WRITER 0x100000 /* Cursor immediately writing (CDB). */ + u_int32_t flags; +}; + +/* Key range statistics structure */ +struct __key_range { + double less; + double equal; + double greater; +}; + +/* Btree/Recno statistics structure. */ +struct __db_bt_stat { /* SHARED */ + u_int32_t bt_magic; /* Magic number. */ + u_int32_t bt_version; /* Version number. */ + u_int32_t bt_metaflags; /* Metadata flags. */ + u_int32_t bt_nkeys; /* Number of unique keys. */ + u_int32_t bt_ndata; /* Number of data items. */ + u_int32_t bt_pagecnt; /* Page count. */ + u_int32_t bt_pagesize; /* Page size. */ + u_int32_t bt_minkey; /* Minkey value. */ + u_int32_t bt_re_len; /* Fixed-length record length. */ + u_int32_t bt_re_pad; /* Fixed-length record pad. */ + u_int32_t bt_levels; /* Tree levels. */ + u_int32_t bt_int_pg; /* Internal pages. */ + u_int32_t bt_leaf_pg; /* Leaf pages. */ + u_int32_t bt_dup_pg; /* Duplicate pages. */ + u_int32_t bt_over_pg; /* Overflow pages. */ + u_int32_t bt_empty_pg; /* Empty pages. */ + u_int32_t bt_free; /* Pages on the free list. */ + uintmax_t bt_int_pgfree; /* Bytes free in internal pages. */ + uintmax_t bt_leaf_pgfree; /* Bytes free in leaf pages. */ + uintmax_t bt_dup_pgfree; /* Bytes free in duplicate pages. */ + uintmax_t bt_over_pgfree; /* Bytes free in overflow pages. */ +}; + +struct __db_compact { + /* Input Parameters. */ + u_int32_t compact_fillpercent; /* Desired fillfactor: 1-100 */ + db_timeout_t compact_timeout; /* Lock timeout. */ + u_int32_t compact_pages; /* Max pages to process. */ + /* Output Stats. */ + u_int32_t compact_empty_buckets; /* Empty hash buckets found. */ + u_int32_t compact_pages_free; /* Number of pages freed. */ + u_int32_t compact_pages_examine; /* Number of pages examine. */ + u_int32_t compact_levels; /* Number of levels removed. */ + u_int32_t compact_deadlock; /* Number of deadlocks. */ + db_pgno_t compact_pages_truncated; /* Pages truncated to OS. */ + /* Internal. */ + db_pgno_t compact_truncate; /* Page number for truncation */ +}; + +/* Hash statistics structure. */ +struct __db_h_stat { /* SHARED */ + u_int32_t hash_magic; /* Magic number. */ + u_int32_t hash_version; /* Version number. */ + u_int32_t hash_metaflags; /* Metadata flags. */ + u_int32_t hash_nkeys; /* Number of unique keys. */ + u_int32_t hash_ndata; /* Number of data items. */ + u_int32_t hash_pagecnt; /* Page count. */ + u_int32_t hash_pagesize; /* Page size. */ + u_int32_t hash_ffactor; /* Fill factor specified at create. */ + u_int32_t hash_buckets; /* Number of hash buckets. */ + u_int32_t hash_free; /* Pages on the free list. */ + uintmax_t hash_bfree; /* Bytes free on bucket pages. */ + u_int32_t hash_bigpages; /* Number of big key/data pages. */ + uintmax_t hash_big_bfree; /* Bytes free on big item pages. */ + u_int32_t hash_overflows; /* Number of overflow pages. */ + uintmax_t hash_ovfl_free; /* Bytes free on ovfl pages. */ + u_int32_t hash_dup; /* Number of dup pages. */ + uintmax_t hash_dup_free; /* Bytes free on duplicate pages. */ +}; + +/* Heap statistics structure. */ +struct __db_heap_stat { /* SHARED */ + u_int32_t heap_magic; /* Magic number. */ + u_int32_t heap_version; /* Version number. */ + u_int32_t heap_metaflags; /* Metadata flags. */ + u_int32_t heap_nrecs; /* Number of records. */ + u_int32_t heap_pagecnt; /* Page count. */ + u_int32_t heap_pagesize; /* Page size. */ + u_int32_t heap_nregions; /* Number of regions. */ +}; + +/* Queue statistics structure. */ +struct __db_qam_stat { /* SHARED */ + u_int32_t qs_magic; /* Magic number. */ + u_int32_t qs_version; /* Version number. */ + u_int32_t qs_metaflags; /* Metadata flags. */ + u_int32_t qs_nkeys; /* Number of unique keys. */ + u_int32_t qs_ndata; /* Number of data items. */ + u_int32_t qs_pagesize; /* Page size. */ + u_int32_t qs_extentsize; /* Pages per extent. */ + u_int32_t qs_pages; /* Data pages. */ + u_int32_t qs_re_len; /* Fixed-length record length. */ + u_int32_t qs_re_pad; /* Fixed-length record pad. */ + u_int32_t qs_pgfree; /* Bytes free in data pages. */ + u_int32_t qs_first_recno; /* First not deleted record. */ + u_int32_t qs_cur_recno; /* Next available record number. */ +}; + +/******************************************************* + * Environment. + *******************************************************/ +#define DB_REGION_MAGIC 0x120897 /* Environment magic number. */ + +/* + * Database environment structure. + * + * This is the public database environment handle. The private environment + * handle is the ENV structure. The user owns this structure, the library + * owns the ENV structure. The reason there are two structures is because + * the user's configuration outlives any particular DB_ENV->open call, and + * separate structures allows us to easily discard internal information without + * discarding the user's configuration. + * + * Fields in the DB_ENV structure should normally be set only by application + * DB_ENV handle methods. + */ + +/* + * Memory configuration types. + */ +typedef enum { + DB_MEM_LOCK=1, + DB_MEM_LOCKOBJECT=2, + DB_MEM_LOCKER=3, + DB_MEM_LOGID=4, + DB_MEM_TRANSACTION=5, + DB_MEM_THREAD=6 +} DB_MEM_CONFIG; + +struct __db_env { + ENV *env; /* Linked ENV structure */ + + /* + * The DB_ENV structure can be used concurrently, so field access is + * protected. + */ + db_mutex_t mtx_db_env; /* DB_ENV structure mutex */ + + /* Error message callback */ + void (*db_errcall) __P((const DB_ENV *, const char *, const char *)); + FILE *db_errfile; /* Error message file stream */ + const char *db_errpfx; /* Error message prefix */ + + /* Other message callback */ + void (*db_msgcall) __P((const DB_ENV *, const char *)); + FILE *db_msgfile; /* Other message file stream */ + + /* Other application callback functions */ + int (*app_dispatch) __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); + void (*db_event_func) __P((DB_ENV *, u_int32_t, void *)); + void (*db_feedback) __P((DB_ENV *, int, int)); + void (*db_free) __P((void *)); + void (*db_paniccall) __P((DB_ENV *, int)); + void *(*db_malloc) __P((size_t)); + void *(*db_realloc) __P((void *, size_t)); + int (*is_alive) __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t)); + void (*thread_id) __P((DB_ENV *, pid_t *, db_threadid_t *)); + char *(*thread_id_string) __P((DB_ENV *, pid_t, db_threadid_t, char *)); + + /* Application specified paths */ + char *db_log_dir; /* Database log file directory */ + char *db_tmp_dir; /* Database tmp file directory */ + + char *db_create_dir; /* Create directory for data files */ + char **db_data_dir; /* Database data file directories */ + int data_cnt; /* Database data file slots */ + int data_next; /* Next database data file slot */ + + char *intermediate_dir_mode; /* Intermediate directory perms */ + + long shm_key; /* shmget key */ + + char *passwd; /* Cryptography support */ + size_t passwd_len; + + /* Private handle references */ + void *app_private; /* Application-private handle */ + void *api1_internal; /* C++, Perl API private */ + void *api2_internal; /* Java API private */ + + u_int32_t verbose; /* DB_VERB_XXX flags */ + + /* Mutex configuration */ + u_int32_t mutex_align; /* Mutex alignment */ + u_int32_t mutex_cnt; /* Number of mutexes to configure */ + u_int32_t mutex_inc; /* Number of mutexes to add */ + u_int32_t mutex_max; /* Max number of mutexes */ + u_int32_t mutex_tas_spins;/* Test-and-set spin count */ + + /* Locking configuration */ + u_int8_t *lk_conflicts; /* Two dimensional conflict matrix */ + int lk_modes; /* Number of lock modes in table */ + u_int32_t lk_detect; /* Deadlock detect on all conflicts */ + u_int32_t lk_max; /* Maximum number of locks */ + u_int32_t lk_max_lockers;/* Maximum number of lockers */ + u_int32_t lk_max_objects;/* Maximum number of locked objects */ + u_int32_t lk_init; /* Initial number of locks */ + u_int32_t lk_init_lockers;/* Initial number of lockers */ + u_int32_t lk_init_objects;/* Initial number of locked objects */ + u_int32_t lk_partitions ;/* Number of object partitions */ + db_timeout_t lk_timeout; /* Lock timeout period */ + /* Used during initialization */ + u_int32_t locker_t_size; /* Locker hash table size. */ + u_int32_t object_t_size; /* Object hash table size. */ + + /* Logging configuration */ + u_int32_t lg_bsize; /* Buffer size */ + u_int32_t lg_fileid_init; /* Initial allocation for fname structs */ + int lg_filemode; /* Log file permission mode */ + u_int32_t lg_regionmax; /* Region size */ + u_int32_t lg_size; /* Log file size */ + u_int32_t lg_flags; /* Log configuration */ + + /* Memory pool configuration */ + u_int32_t mp_gbytes; /* Cache size: GB */ + u_int32_t mp_bytes; /* Cache size: bytes */ + u_int32_t mp_max_gbytes; /* Maximum cache size: GB */ + u_int32_t mp_max_bytes; /* Maximum cache size: bytes */ + size_t mp_mmapsize; /* Maximum file size for mmap */ + int mp_maxopenfd; /* Maximum open file descriptors */ + int mp_maxwrite; /* Maximum buffers to write */ + u_int mp_ncache; /* Initial number of cache regions */ + u_int32_t mp_pagesize; /* Average page size */ + u_int32_t mp_tablesize; /* Approximate hash table size */ + u_int32_t mp_mtxcount; /* Number of mutexs */ + /* Sleep after writing max buffers */ + db_timeout_t mp_maxwrite_sleep; + + /* Transaction configuration */ + u_int32_t tx_init; /* Initial number of transactions */ + u_int32_t tx_max; /* Maximum number of transactions */ + time_t tx_timestamp; /* Recover to specific timestamp */ + db_timeout_t tx_timeout; /* Timeout for transactions */ + + /* Thread tracking configuration */ + u_int32_t thr_init; /* Thread count */ + u_int32_t thr_max; /* Thread max */ + roff_t memory_max; /* Maximum region memory */ + + /* + * The following fields are not strictly user-owned, but they outlive + * the ENV structure, and so are stored here. + */ + DB_FH *registry; /* DB_REGISTER file handle */ + u_int32_t registry_off; /* + * Offset of our slot. We can't use + * off_t because its size depends on + * build settings. + */ + db_timeout_t envreg_timeout; /* DB_REGISTER wait timeout */ + +#define DB_ENV_AUTO_COMMIT 0x00000001 /* DB_AUTO_COMMIT */ +#define DB_ENV_CDB_ALLDB 0x00000002 /* CDB environment wide locking */ +#define DB_ENV_FAILCHK 0x00000004 /* Failchk is running */ +#define DB_ENV_DIRECT_DB 0x00000008 /* DB_DIRECT_DB set */ +#define DB_ENV_DSYNC_DB 0x00000010 /* DB_DSYNC_DB set */ +#define DB_ENV_DATABASE_LOCKING 0x00000020 /* Try database-level locking */ +#define DB_ENV_MULTIVERSION 0x00000040 /* DB_MULTIVERSION set */ +#define DB_ENV_NOLOCKING 0x00000080 /* DB_NOLOCKING set */ +#define DB_ENV_NOMMAP 0x00000100 /* DB_NOMMAP set */ +#define DB_ENV_NOPANIC 0x00000200 /* Okay if panic set */ +#define DB_ENV_OVERWRITE 0x00000400 /* DB_OVERWRITE set */ +#define DB_ENV_REGION_INIT 0x00000800 /* DB_REGION_INIT set */ +#define DB_ENV_TIME_NOTGRANTED 0x00001000 /* DB_TIME_NOTGRANTED set */ +#define DB_ENV_TXN_NOSYNC 0x00002000 /* DB_TXN_NOSYNC set */ +#define DB_ENV_TXN_NOWAIT 0x00004000 /* DB_TXN_NOWAIT set */ +#define DB_ENV_TXN_SNAPSHOT 0x00008000 /* DB_TXN_SNAPSHOT set */ +#define DB_ENV_TXN_WRITE_NOSYNC 0x00010000 /* DB_TXN_WRITE_NOSYNC set */ +#define DB_ENV_YIELDCPU 0x00020000 /* DB_YIELDCPU set */ +#define DB_ENV_HOTBACKUP 0x00040000 /* DB_HOTBACKUP_IN_PROGRESS set */ +#define DB_ENV_NOFLUSH 0x00080000 /* DB_NOFLUSH set */ + u_int32_t flags; + + /* DB_ENV PUBLIC HANDLE LIST BEGIN */ + int (*add_data_dir) __P((DB_ENV *, const char *)); + int (*cdsgroup_begin) __P((DB_ENV *, DB_TXN **)); + int (*close) __P((DB_ENV *, u_int32_t)); + int (*dbremove) __P((DB_ENV *, + DB_TXN *, const char *, const char *, u_int32_t)); + int (*dbrename) __P((DB_ENV *, + DB_TXN *, const char *, const char *, const char *, u_int32_t)); + void (*err) __P((const DB_ENV *, int, const char *, ...)); + void (*errx) __P((const DB_ENV *, const char *, ...)); + int (*failchk) __P((DB_ENV *, u_int32_t)); + int (*fileid_reset) __P((DB_ENV *, const char *, u_int32_t)); + int (*get_alloc) __P((DB_ENV *, void *(**)(size_t), + void *(**)(void *, size_t), void (**)(void *))); + int (*get_app_dispatch) + __P((DB_ENV *, int (**)(DB_ENV *, DBT *, DB_LSN *, db_recops))); + int (*get_cache_max) __P((DB_ENV *, u_int32_t *, u_int32_t *)); + int (*get_cachesize) __P((DB_ENV *, u_int32_t *, u_int32_t *, int *)); + int (*get_create_dir) __P((DB_ENV *, const char **)); + int (*get_data_dirs) __P((DB_ENV *, const char ***)); + int (*get_data_len) __P((DB_ENV *, u_int32_t *)); + int (*get_encrypt_flags) __P((DB_ENV *, u_int32_t *)); + void (*get_errcall) __P((DB_ENV *, + void (**)(const DB_ENV *, const char *, const char *))); + void (*get_errfile) __P((DB_ENV *, FILE **)); + void (*get_errpfx) __P((DB_ENV *, const char **)); + int (*get_flags) __P((DB_ENV *, u_int32_t *)); + int (*get_feedback) __P((DB_ENV *, void (**)(DB_ENV *, int, int))); + int (*get_home) __P((DB_ENV *, const char **)); + int (*get_intermediate_dir_mode) __P((DB_ENV *, const char **)); + int (*get_isalive) __P((DB_ENV *, + int (**)(DB_ENV *, pid_t, db_threadid_t, u_int32_t))); + int (*get_lg_bsize) __P((DB_ENV *, u_int32_t *)); + int (*get_lg_dir) __P((DB_ENV *, const char **)); + int (*get_lg_filemode) __P((DB_ENV *, int *)); + int (*get_lg_max) __P((DB_ENV *, u_int32_t *)); + int (*get_lg_regionmax) __P((DB_ENV *, u_int32_t *)); + int (*get_lk_conflicts) __P((DB_ENV *, const u_int8_t **, int *)); + int (*get_lk_detect) __P((DB_ENV *, u_int32_t *)); + int (*get_lk_max_lockers) __P((DB_ENV *, u_int32_t *)); + int (*get_lk_max_locks) __P((DB_ENV *, u_int32_t *)); + int (*get_lk_max_objects) __P((DB_ENV *, u_int32_t *)); + int (*get_lk_partitions) __P((DB_ENV *, u_int32_t *)); + int (*get_lk_priority) __P((DB_ENV *, u_int32_t, u_int32_t *)); + int (*get_lk_tablesize) __P((DB_ENV *, u_int32_t *)); + int (*get_memory_init) __P((DB_ENV *, DB_MEM_CONFIG, u_int32_t *)); + int (*get_memory_max) __P((DB_ENV *, u_int32_t *, u_int32_t *)); + int (*get_mp_max_openfd) __P((DB_ENV *, int *)); + int (*get_mp_max_write) __P((DB_ENV *, int *, db_timeout_t *)); + int (*get_mp_mmapsize) __P((DB_ENV *, size_t *)); + int (*get_mp_mtxcount) __P((DB_ENV *, u_int32_t *)); + int (*get_mp_pagesize) __P((DB_ENV *, u_int32_t *)); + int (*get_mp_tablesize) __P((DB_ENV *, u_int32_t *)); + void (*get_msgcall) + __P((DB_ENV *, void (**)(const DB_ENV *, const char *))); + void (*get_msgfile) __P((DB_ENV *, FILE **)); + int (*get_open_flags) __P((DB_ENV *, u_int32_t *)); + int (*get_shm_key) __P((DB_ENV *, long *)); + int (*get_thread_count) __P((DB_ENV *, u_int32_t *)); + int (*get_thread_id_fn) + __P((DB_ENV *, void (**)(DB_ENV *, pid_t *, db_threadid_t *))); + int (*get_thread_id_string_fn) __P((DB_ENV *, + char *(**)(DB_ENV *, pid_t, db_threadid_t, char *))); + int (*get_timeout) __P((DB_ENV *, db_timeout_t *, u_int32_t)); + int (*get_tmp_dir) __P((DB_ENV *, const char **)); + int (*get_tx_max) __P((DB_ENV *, u_int32_t *)); + int (*get_tx_timestamp) __P((DB_ENV *, time_t *)); + int (*get_verbose) __P((DB_ENV *, u_int32_t, int *)); + int (*is_bigendian) __P((void)); + int (*lock_detect) __P((DB_ENV *, u_int32_t, u_int32_t, int *)); + int (*lock_get) __P((DB_ENV *, + u_int32_t, u_int32_t, DBT *, db_lockmode_t, DB_LOCK *)); + int (*lock_id) __P((DB_ENV *, u_int32_t *)); + int (*lock_id_free) __P((DB_ENV *, u_int32_t)); + int (*lock_put) __P((DB_ENV *, DB_LOCK *)); + int (*lock_stat) __P((DB_ENV *, DB_LOCK_STAT **, u_int32_t)); + int (*lock_stat_print) __P((DB_ENV *, u_int32_t)); + int (*lock_vec) __P((DB_ENV *, + u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); + int (*log_archive) __P((DB_ENV *, char **[], u_int32_t)); + int (*log_cursor) __P((DB_ENV *, DB_LOGC **, u_int32_t)); + int (*log_file) __P((DB_ENV *, const DB_LSN *, char *, size_t)); + int (*log_flush) __P((DB_ENV *, const DB_LSN *)); + int (*log_get_config) __P((DB_ENV *, u_int32_t, int *)); + int (*log_printf) __P((DB_ENV *, DB_TXN *, const char *, ...)); + int (*log_put) __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t)); + int (*log_put_record) __P((DB_ENV *, DB *, DB_TXN *, DB_LSN *, + u_int32_t, u_int32_t, u_int32_t, u_int32_t, + DB_LOG_RECSPEC *, ...)); + int (*log_read_record) __P((DB_ENV *, DB **, + void *, void *, DB_LOG_RECSPEC *, u_int32_t, void **)); + int (*log_set_config) __P((DB_ENV *, u_int32_t, int)); + int (*log_stat) __P((DB_ENV *, DB_LOG_STAT **, u_int32_t)); + int (*log_stat_print) __P((DB_ENV *, u_int32_t)); + int (*log_verify) __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *)); + int (*lsn_reset) __P((DB_ENV *, const char *, u_int32_t)); + int (*memp_fcreate) __P((DB_ENV *, DB_MPOOLFILE **, u_int32_t)); + int (*memp_register) __P((DB_ENV *, int, int (*)(DB_ENV *, db_pgno_t, + void *, DBT *), int (*)(DB_ENV *, db_pgno_t, void *, DBT *))); + int (*memp_stat) __P((DB_ENV *, + DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, u_int32_t)); + int (*memp_stat_print) __P((DB_ENV *, u_int32_t)); + int (*memp_sync) __P((DB_ENV *, DB_LSN *)); + int (*memp_trickle) __P((DB_ENV *, int, int *)); + int (*mutex_alloc) __P((DB_ENV *, u_int32_t, db_mutex_t *)); + int (*mutex_free) __P((DB_ENV *, db_mutex_t)); + int (*mutex_get_align) __P((DB_ENV *, u_int32_t *)); + int (*mutex_get_increment) __P((DB_ENV *, u_int32_t *)); + int (*mutex_get_init) __P((DB_ENV *, u_int32_t *)); + int (*mutex_get_max) __P((DB_ENV *, u_int32_t *)); + int (*mutex_get_tas_spins) __P((DB_ENV *, u_int32_t *)); + int (*mutex_lock) __P((DB_ENV *, db_mutex_t)); + int (*mutex_set_align) __P((DB_ENV *, u_int32_t)); + int (*mutex_set_increment) __P((DB_ENV *, u_int32_t)); + int (*mutex_set_init) __P((DB_ENV *, u_int32_t)); + int (*mutex_set_max) __P((DB_ENV *, u_int32_t)); + int (*mutex_set_tas_spins) __P((DB_ENV *, u_int32_t)); + int (*mutex_stat) __P((DB_ENV *, DB_MUTEX_STAT **, u_int32_t)); + int (*mutex_stat_print) __P((DB_ENV *, u_int32_t)); + int (*mutex_unlock) __P((DB_ENV *, db_mutex_t)); + int (*open) __P((DB_ENV *, const char *, u_int32_t, int)); + int (*remove) __P((DB_ENV *, const char *, u_int32_t)); + int (*rep_elect) __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t)); + int (*rep_flush) __P((DB_ENV *)); + int (*rep_get_clockskew) __P((DB_ENV *, u_int32_t *, u_int32_t *)); + int (*rep_get_config) __P((DB_ENV *, u_int32_t, int *)); + int (*rep_get_limit) __P((DB_ENV *, u_int32_t *, u_int32_t *)); + int (*rep_get_nsites) __P((DB_ENV *, u_int32_t *)); + int (*rep_get_priority) __P((DB_ENV *, u_int32_t *)); + int (*rep_get_request) __P((DB_ENV *, u_int32_t *, u_int32_t *)); + int (*rep_get_timeout) __P((DB_ENV *, int, u_int32_t *)); + int (*rep_process_message) + __P((DB_ENV *, DBT *, DBT *, int, DB_LSN *)); + int (*rep_set_clockskew) __P((DB_ENV *, u_int32_t, u_int32_t)); + int (*rep_set_config) __P((DB_ENV *, u_int32_t, int)); + int (*rep_set_limit) __P((DB_ENV *, u_int32_t, u_int32_t)); + int (*rep_set_nsites) __P((DB_ENV *, u_int32_t)); + int (*rep_set_priority) __P((DB_ENV *, u_int32_t)); + int (*rep_set_request) __P((DB_ENV *, u_int32_t, u_int32_t)); + int (*rep_set_timeout) __P((DB_ENV *, int, db_timeout_t)); + int (*rep_set_transport) __P((DB_ENV *, int, int (*)(DB_ENV *, + const DBT *, const DBT *, const DB_LSN *, int, u_int32_t))); + int (*rep_start) __P((DB_ENV *, DBT *, u_int32_t)); + int (*rep_stat) __P((DB_ENV *, DB_REP_STAT **, u_int32_t)); + int (*rep_stat_print) __P((DB_ENV *, u_int32_t)); + int (*rep_sync) __P((DB_ENV *, u_int32_t)); + int (*repmgr_channel) __P((DB_ENV *, int, DB_CHANNEL **, u_int32_t)); + int (*repmgr_get_ack_policy) __P((DB_ENV *, int *)); + int (*repmgr_local_site) __P((DB_ENV *, DB_SITE **)); + int (*repmgr_msg_dispatch) __P((DB_ENV *, + void (*)(DB_ENV *, DB_CHANNEL *, DBT *, u_int32_t, u_int32_t), + u_int32_t)); + int (*repmgr_set_ack_policy) __P((DB_ENV *, int)); + int (*repmgr_site) + __P((DB_ENV *, const char *, u_int, DB_SITE**, u_int32_t)); + int (*repmgr_site_by_eid) __P((DB_ENV *, int, DB_SITE**)); + int (*repmgr_site_list) __P((DB_ENV *, u_int *, DB_REPMGR_SITE **)); + int (*repmgr_start) __P((DB_ENV *, int, u_int32_t)); + int (*repmgr_stat) __P((DB_ENV *, DB_REPMGR_STAT **, u_int32_t)); + int (*repmgr_stat_print) __P((DB_ENV *, u_int32_t)); + int (*set_alloc) __P((DB_ENV *, void *(*)(size_t), + void *(*)(void *, size_t), void (*)(void *))); + int (*set_app_dispatch) + __P((DB_ENV *, int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops))); + int (*set_cache_max) __P((DB_ENV *, u_int32_t, u_int32_t)); + int (*set_cachesize) __P((DB_ENV *, u_int32_t, u_int32_t, int)); + int (*set_create_dir) __P((DB_ENV *, const char *)); + int (*set_data_dir) __P((DB_ENV *, const char *)); + int (*set_data_len) __P((DB_ENV *, u_int32_t)); + int (*set_encrypt) __P((DB_ENV *, const char *, u_int32_t)); + void (*set_errcall) __P((DB_ENV *, + void (*)(const DB_ENV *, const char *, const char *))); + void (*set_errfile) __P((DB_ENV *, FILE *)); + void (*set_errpfx) __P((DB_ENV *, const char *)); + int (*set_event_notify) + __P((DB_ENV *, void (*)(DB_ENV *, u_int32_t, void *))); + int (*set_feedback) __P((DB_ENV *, void (*)(DB_ENV *, int, int))); + int (*set_flags) __P((DB_ENV *, u_int32_t, int)); + int (*set_intermediate_dir_mode) __P((DB_ENV *, const char *)); + int (*set_isalive) __P((DB_ENV *, + int (*)(DB_ENV *, pid_t, db_threadid_t, u_int32_t))); + int (*set_lg_bsize) __P((DB_ENV *, u_int32_t)); + int (*set_lg_dir) __P((DB_ENV *, const char *)); + int (*set_lg_filemode) __P((DB_ENV *, int)); + int (*set_lg_max) __P((DB_ENV *, u_int32_t)); + int (*set_lg_regionmax) __P((DB_ENV *, u_int32_t)); + int (*set_lk_conflicts) __P((DB_ENV *, u_int8_t *, int)); + int (*set_lk_detect) __P((DB_ENV *, u_int32_t)); + int (*set_lk_max_lockers) __P((DB_ENV *, u_int32_t)); + int (*set_lk_max_locks) __P((DB_ENV *, u_int32_t)); + int (*set_lk_max_objects) __P((DB_ENV *, u_int32_t)); + int (*set_lk_partitions) __P((DB_ENV *, u_int32_t)); + int (*set_lk_priority) __P((DB_ENV *, u_int32_t, u_int32_t)); + int (*set_lk_tablesize) __P((DB_ENV *, u_int32_t)); + int (*set_memory_init) __P((DB_ENV *, DB_MEM_CONFIG, u_int32_t)); + int (*set_memory_max) __P((DB_ENV *, u_int32_t, u_int32_t)); + int (*set_mp_max_openfd) __P((DB_ENV *, int)); + int (*set_mp_max_write) __P((DB_ENV *, int, db_timeout_t)); + int (*set_mp_mmapsize) __P((DB_ENV *, size_t)); + int (*set_mp_mtxcount) __P((DB_ENV *, u_int32_t)); + int (*set_mp_pagesize) __P((DB_ENV *, u_int32_t)); + int (*set_mp_tablesize) __P((DB_ENV *, u_int32_t)); + void (*set_msgcall) + __P((DB_ENV *, void (*)(const DB_ENV *, const char *))); + void (*set_msgfile) __P((DB_ENV *, FILE *)); + int (*set_paniccall) __P((DB_ENV *, void (*)(DB_ENV *, int))); + int (*set_shm_key) __P((DB_ENV *, long)); + int (*set_thread_count) __P((DB_ENV *, u_int32_t)); + int (*set_thread_id) + __P((DB_ENV *, void (*)(DB_ENV *, pid_t *, db_threadid_t *))); + int (*set_thread_id_string) __P((DB_ENV *, + char *(*)(DB_ENV *, pid_t, db_threadid_t, char *))); + int (*set_timeout) __P((DB_ENV *, db_timeout_t, u_int32_t)); + int (*set_tmp_dir) __P((DB_ENV *, const char *)); + int (*set_tx_max) __P((DB_ENV *, u_int32_t)); + int (*set_tx_timestamp) __P((DB_ENV *, time_t *)); + int (*set_verbose) __P((DB_ENV *, u_int32_t, int)); + int (*txn_applied) __P((DB_ENV *, + DB_TXN_TOKEN *, db_timeout_t, u_int32_t)); + int (*stat_print) __P((DB_ENV *, u_int32_t)); + int (*txn_begin) __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t)); + int (*txn_checkpoint) __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t)); + int (*txn_recover) __P((DB_ENV *, + DB_PREPLIST *, long, long *, u_int32_t)); + int (*txn_stat) __P((DB_ENV *, DB_TXN_STAT **, u_int32_t)); + int (*txn_stat_print) __P((DB_ENV *, u_int32_t)); + /* DB_ENV PUBLIC HANDLE LIST END */ + + /* DB_ENV PRIVATE HANDLE LIST BEGIN */ + int (*prdbt) __P((DBT *, int, + const char *, void *, int (*)(void *, const void *), int, int)); + /* DB_ENV PRIVATE HANDLE LIST END */ +}; + +/* + * Dispatch structure for recovery, log verification and print routines. Since + * internal and external routines take different arguments (ENV versus DB_ENV), + * we need something more elaborate than a single pointer and size. + */ +struct __db_distab { + int (**int_dispatch) __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + size_t int_size; + int (**ext_dispatch) __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); + size_t ext_size; +}; + +/* + * Log verification configuration structure. + */ +struct __db_logvrfy_config { + int continue_after_fail, verbose; + u_int32_t cachesize; + const char *temp_envhome; + const char *dbfile, *dbname; + DB_LSN start_lsn, end_lsn; + time_t start_time, end_time; +}; + +struct __db_channel { + CHANNEL *channel; /* Pointer to internal state details. */ + int eid; /* Env. ID passed in constructor. */ + db_timeout_t timeout; + + /* DB_CHANNEL PUBLIC HANDLE LIST BEGIN */ + int (*close) __P((DB_CHANNEL *, u_int32_t)); + int (*send_msg) __P((DB_CHANNEL *, DBT *, u_int32_t, u_int32_t)); + int (*send_request) __P((DB_CHANNEL *, + DBT *, u_int32_t, DBT *, db_timeout_t, u_int32_t)); + int (*set_timeout) __P((DB_CHANNEL *, db_timeout_t)); + /* DB_CHANNEL PUBLIC HANDLE LIST END */ +}; + +struct __db_site { + ENV *env; + int eid; + const char *host; + u_int port; + u_int32_t flags; + + /* DB_SITE PUBLIC HANDLE LIST BEGIN */ + int (*get_address) __P((DB_SITE *, const char **, u_int *)); + int (*get_config) __P((DB_SITE *, u_int32_t, u_int32_t *)); + int (*get_eid) __P((DB_SITE *, int *)); + int (*set_config) __P((DB_SITE *, u_int32_t, u_int32_t)); + int (*remove) __P((DB_SITE *)); + int (*close) __P((DB_SITE *)); + /* DB_SITE PUBLIC HANDLE LIST END */ +}; + +#if DB_DBM_HSEARCH != 0 +/******************************************************* + * Dbm/Ndbm historic interfaces. + *******************************************************/ +typedef struct __db DBM; + +#define DBM_INSERT 0 /* Flags to dbm_store(). */ +#define DBM_REPLACE 1 + +/* + * The DB support for ndbm(3) always appends this suffix to the + * file name to avoid overwriting the user's original database. + */ +#define DBM_SUFFIX ".db" + +#if defined(_XPG4_2) +typedef struct { + char *dptr; + size_t dsize; +} datum; +#else +typedef struct { + char *dptr; + int dsize; +} datum; +#endif + +/* + * Translate NDBM calls into DB calls so that DB doesn't step on the + * application's name space. + */ +#define dbm_clearerr(a) __db_ndbm_clearerr@DB_VERSION_UNIQUE_NAME@(a) +#define dbm_close(a) __db_ndbm_close@DB_VERSION_UNIQUE_NAME@(a) +#define dbm_delete(a, b) __db_ndbm_delete@DB_VERSION_UNIQUE_NAME@(a, b) +#define dbm_dirfno(a) __db_ndbm_dirfno@DB_VERSION_UNIQUE_NAME@(a) +#define dbm_error(a) __db_ndbm_error@DB_VERSION_UNIQUE_NAME@(a) +#define dbm_fetch(a, b) __db_ndbm_fetch@DB_VERSION_UNIQUE_NAME@(a, b) +#define dbm_firstkey(a) __db_ndbm_firstkey@DB_VERSION_UNIQUE_NAME@(a) +#define dbm_nextkey(a) __db_ndbm_nextkey@DB_VERSION_UNIQUE_NAME@(a) +#define dbm_open(a, b, c) __db_ndbm_open@DB_VERSION_UNIQUE_NAME@(a, b, c) +#define dbm_pagfno(a) __db_ndbm_pagfno@DB_VERSION_UNIQUE_NAME@(a) +#define dbm_rdonly(a) __db_ndbm_rdonly@DB_VERSION_UNIQUE_NAME@(a) +#define dbm_store(a, b, c, d) \ + __db_ndbm_store@DB_VERSION_UNIQUE_NAME@(a, b, c, d) + +/* + * Translate DBM calls into DB calls so that DB doesn't step on the + * application's name space. + * + * The global variables dbrdonly, dirf and pagf were not retained when 4BSD + * replaced the dbm interface with ndbm, and are not supported here. + */ +#define dbminit(a) __db_dbm_init@DB_VERSION_UNIQUE_NAME@(a) +#define dbmclose __db_dbm_close@DB_VERSION_UNIQUE_NAME@ +#if !defined(__cplusplus) +#define delete(a) __db_dbm_delete@DB_VERSION_UNIQUE_NAME@(a) +#endif +#define fetch(a) __db_dbm_fetch@DB_VERSION_UNIQUE_NAME@(a) +#define firstkey __db_dbm_firstkey@DB_VERSION_UNIQUE_NAME@ +#define nextkey(a) __db_dbm_nextkey@DB_VERSION_UNIQUE_NAME@(a) +#define store(a, b) __db_dbm_store@DB_VERSION_UNIQUE_NAME@(a, b) + +/******************************************************* + * Hsearch historic interface. + *******************************************************/ +typedef enum { + FIND, ENTER +} ACTION; + +typedef struct entry { + char *key; + char *data; +} ENTRY; + +#define hcreate(a) __db_hcreate@DB_VERSION_UNIQUE_NAME@(a) +#define hdestroy __db_hdestroy@DB_VERSION_UNIQUE_NAME@ +#define hsearch(a, b) __db_hsearch@DB_VERSION_UNIQUE_NAME@(a, b) + +#endif /* DB_DBM_HSEARCH */ + +#if defined(__cplusplus) +} +#endif + +@platform_footer@ +#endif /* !_DB_H_ */ diff --git a/src/dbinc/db_185.in b/src/dbinc/db_185.in new file mode 100644 index 00000000..971d6b68 --- /dev/null +++ b/src/dbinc/db_185.in @@ -0,0 +1,176 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#ifndef _DB_185_H_ +#define _DB_185_H_ + +#include + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * XXX + * Handle function prototypes and the keyword "const". This steps on name + * space that DB doesn't control, but all of the other solutions are worse. + */ +#undef __P +#if defined(__STDC__) || defined(__cplusplus) +#define __P(protos) protos /* ANSI C prototypes */ +#else +#define const +#define __P(protos) () /* K&R C preprocessor */ +#endif + +#define RET_ERROR -1 /* Return values. */ +#define RET_SUCCESS 0 +#define RET_SPECIAL 1 + +#ifndef __BIT_TYPES_DEFINED__ +#define __BIT_TYPES_DEFINED__ +@u_int8_decl@ +@int16_decl@ +@u_int16_decl@ +@int32_decl@ +@u_int32_decl@ +#endif + +/* + * XXX + * SGI/IRIX already has a pgno_t. + */ +#ifdef __sgi +#define pgno_t db_pgno_t +#endif + +#define MAX_PAGE_NUMBER 0xffffffff /* >= # of pages in a file */ +typedef u_int32_t pgno_t; +#define MAX_PAGE_OFFSET 65535 /* >= # of bytes in a page */ +typedef u_int16_t indx_t; +#define MAX_REC_NUMBER 0xffffffff /* >= # of records in a tree */ +typedef u_int32_t recno_t; + +/* Key/data structure -- a Data-Base Thang. */ +typedef struct { + void *data; /* data */ + size_t size; /* data length */ +} DBT; + +/* Routine flags. */ +#define R_CURSOR 1 /* del, put, seq */ +#define __R_UNUSED 2 /* UNUSED */ +#define R_FIRST 3 /* seq */ +#define R_IAFTER 4 /* put (RECNO) */ +#define R_IBEFORE 5 /* put (RECNO) */ +#define R_LAST 6 /* seq (BTREE, RECNO) */ +#define R_NEXT 7 /* seq */ +#define R_NOOVERWRITE 8 /* put */ +#define R_PREV 9 /* seq (BTREE, RECNO) */ +#define R_SETCURSOR 10 /* put (RECNO) */ +#define R_RECNOSYNC 11 /* sync (RECNO) */ + +typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; + +/* Access method description structure. */ +typedef struct __db { + DBTYPE type; /* Underlying db type. */ + int (*close) __P((struct __db *)); + int (*del) __P((const struct __db *, const DBT *, u_int)); + int (*get) __P((const struct __db *, const DBT *, DBT *, u_int)); + int (*put) __P((const struct __db *, DBT *, const DBT *, u_int)); + int (*seq) __P((const struct __db *, DBT *, DBT *, u_int)); + int (*sync) __P((const struct __db *, u_int)); + void *internal; /* Access method private. */ + int (*fd) __P((const struct __db *)); +} DB; + +#define BTREEMAGIC 0x053162 +#define BTREEVERSION 3 + +/* Structure used to pass parameters to the btree routines. */ +typedef struct { +#define R_DUP 0x01 /* duplicate keys */ + u_int32_t flags; + u_int32_t cachesize; /* bytes to cache */ + u_int32_t maxkeypage; /* maximum keys per page */ + u_int32_t minkeypage; /* minimum keys per page */ + u_int32_t psize; /* page size */ + int (*compare) /* comparison function */ + __P((const DBT *, const DBT *)); + size_t (*prefix) /* prefix function */ + __P((const DBT *, const DBT *)); + int lorder; /* byte order */ +} BTREEINFO; + +#define HASHMAGIC 0x061561 +#define HASHVERSION 2 + +/* Structure used to pass parameters to the hashing routines. */ +typedef struct { + u_int32_t bsize; /* bucket size */ + u_int32_t ffactor; /* fill factor */ + u_int32_t nelem; /* number of elements */ + u_int32_t cachesize; /* bytes to cache */ + u_int32_t /* hash function */ + (*hash) __P((const void *, size_t)); + int lorder; /* byte order */ +} HASHINFO; + +/* Structure used to pass parameters to the record routines. */ +typedef struct { +#define R_FIXEDLEN 0x01 /* fixed-length records */ +#define R_NOKEY 0x02 /* key not required */ +#define R_SNAPSHOT 0x04 /* snapshot the input */ + u_int32_t flags; + u_int32_t cachesize; /* bytes to cache */ + u_int32_t psize; /* page size */ + int lorder; /* byte order */ + size_t reclen; /* record length (fixed-length records) */ + u_char bval; /* delimiting byte (variable-length records */ + char *bfname; /* btree file name */ +} RECNOINFO; + +/* Re-define the user's dbopen calls. */ +#define dbopen __db185_open@DB_VERSION_UNIQUE_NAME@ + +#if defined(__cplusplus) +} +#endif + +#endif /* !_DB_185_H_ */ diff --git a/src/dbinc/db_am.h b/src/dbinc/db_am.h new file mode 100644 index 00000000..6158abf8 --- /dev/null +++ b/src/dbinc/db_am.h @@ -0,0 +1,325 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#ifndef _DB_AM_H_ +#define _DB_AM_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +struct __db_foreign_info; \ + typedef struct __db_foreign_info DB_FOREIGN_INFO; + +/* + * Keep track of information for foreign keys. Used to maintain a linked list + * of 'primary' DBs which reference this 'foreign' DB. + */ +struct __db_foreign_info { + DB *dbp; + u_int32_t flags; + int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *)); + + /* + * List entries for foreign key. + * + * !!! + * Explicit representations of structures from queue.h. + * LIST_ENTRY(__db) s_links; + */ + struct { + struct __db_foreign_info *le_next; + struct __db_foreign_info **le_prev; + } f_links; +}; + +/* + * IS_ENV_AUTO_COMMIT -- + * Auto-commit test for enviroment operations: DbEnv::{open,remove,rename} + */ +#define IS_ENV_AUTO_COMMIT(env, txn, flags) \ + (LF_ISSET(DB_AUTO_COMMIT) || \ + (((txn) == NULL || F_ISSET((txn), TXN_FAMILY)) && \ + F_ISSET((env)->dbenv, DB_ENV_AUTO_COMMIT) && \ + !LF_ISSET(DB_NO_AUTO_COMMIT))) + +/* + * IS_DB_AUTO_COMMIT -- + * Auto-commit test for database operations. + */ +#define IS_DB_AUTO_COMMIT(dbp, txn) \ + (((txn) == NULL || F_ISSET((txn), TXN_FAMILY)) && \ + F_ISSET((dbp), DB_AM_TXN)) + +/* + * STRIP_AUTO_COMMIT -- + * Releases after 4.3 no longer requires DB operations to specify the + * AUTO_COMMIT flag, but the API continues to allow it to be specified. + */ +#define STRIP_AUTO_COMMIT(f) FLD_CLR((f), DB_AUTO_COMMIT) + +/* DB recovery operation codes. */ +#define DB_ADD_DUP 1 +#define DB_REM_DUP 2 +#define DB_ADD_BIG 3 +#define DB_REM_BIG 4 +#define DB_ADD_PAGE_COMPAT 5 /* Compatibility for 4.2 db_relink */ +#define DB_REM_PAGE_COMPAT 6 /* Compatibility for 4.2 db_relink */ +#define DB_APPEND_BIG 7 +#define DB_ADD_HEAP 8 +#define DB_REM_HEAP 9 + +#define OP_MODE_SHIFT 8 +#define OP_PAGE_MASK 0xff + +#define OP_SET(mode, page) (((mode) << OP_MODE_SHIFT) | (TYPE(page))) +#define OP_MODE_GET(mode) ((mode) >> OP_MODE_SHIFT) +#define OP_PAGE_GET(mode) ((mode) & OP_PAGE_MASK) + + +/* + * Standard initialization and shutdown macros for all recovery functions. + */ +#define REC_INTRO(func, ip, do_cursor) do { \ + argp = NULL; \ + dbc = NULL; \ + file_dbp = NULL; \ + COMPQUIET(mpf, NULL); /* Not all recovery routines use mpf. */\ + if ((ret = func(env, &file_dbp, \ + (info != NULL) ? ((DB_TXNHEAD *)info)->td : NULL, \ + dbtp->data, &argp)) != 0) { \ + if (ret == DB_DELETED) { \ + ret = 0; \ + goto done; \ + } \ + goto out; \ + } \ + if (do_cursor) { \ + if ((ret = __db_cursor(file_dbp, \ + ip, NULL, &dbc, DB_RECOVER)) != 0) \ + goto out; \ + } \ + mpf = file_dbp->mpf; \ +} while (0) + +#define REC_CLOSE { \ + int __t_ret; \ + if (argp != NULL) \ + __os_free(env, argp); \ + if (dbc != NULL && \ + (__t_ret = __dbc_close(dbc)) != 0 && ret == 0) \ + ret = __t_ret; \ + } \ + return (ret) + +/* + * No-op versions of the same macros. + */ +#define REC_NOOP_INTRO(func) do { \ + argp = NULL; \ + if ((ret = func(env, dbtp->data, &argp)) != 0) \ + return (ret); \ +} while (0) +#define REC_NOOP_CLOSE \ + if (argp != NULL) \ + __os_free(env, argp); \ + return (ret) + +/* + * Macro for reading pages during recovery. In most cases we + * want to avoid an error if the page is not found during rollback. + */ +#define REC_FGET(mpf, ip, pgno, pagep, cont) \ + if ((ret = __memp_fget(mpf, \ + &(pgno), ip, NULL, 0, pagep)) != 0) { \ + if (ret != DB_PAGE_NOTFOUND) { \ + ret = __db_pgerr(file_dbp, pgno, ret); \ + goto out; \ + } else \ + goto cont; \ + } +#define REC_DIRTY(mpf, ip, priority, pagep) \ + if ((ret = __memp_dirty(mpf, \ + pagep, ip, NULL, priority, DB_MPOOL_EDIT)) != 0) { \ + ret = __db_pgerr(file_dbp, PGNO(*(pagep)), ret); \ + goto out; \ + } + +/* + * Standard debugging macro for all recovery functions. + */ +#ifdef DEBUG_RECOVER +#define REC_PRINT(func) \ + (void)func(env, dbtp, lsnp, op, info); +#else +#define REC_PRINT(func) +#endif + +/* + * Actions to __db_lget + */ +#define LCK_ALWAYS 1 /* Lock even for off page dup cursors */ +#define LCK_COUPLE 2 /* Lock Couple */ +#define LCK_COUPLE_ALWAYS 3 /* Lock Couple even in txn. */ +#define LCK_DOWNGRADE 4 /* Downgrade the lock. (internal) */ +#define LCK_ROLLBACK 5 /* Lock even if in rollback */ + +/* + * If doing transactions we have to hold the locks associated with a data item + * from a page for the entire transaction. However, we don't have to hold the + * locks associated with walking the tree. Distinguish between the two so that + * we don't tie up the internal pages of the tree longer than necessary. + */ +#define __LPUT(dbc, lock) \ + __ENV_LPUT((dbc)->env, lock) + +#define __ENV_LPUT(env, lock) \ + (LOCK_ISSET(lock) ? __lock_put(env, &(lock)) : 0) + +/* + * __TLPUT -- transactional lock put + * If the lock is valid then + * If we are not in a transaction put the lock. + * Else if the cursor is doing dirty reads and this was a read then + * put the lock. + * Else if the db is supporting dirty reads and this is a write then + * downgrade it. + * Else do nothing. + */ +#define __TLPUT(dbc, lock) \ + (LOCK_ISSET(lock) ? __db_lput(dbc, &(lock)) : 0) + +/* + * Check whether a database is a primary (that is, has associated secondaries). + */ +#define DB_IS_PRIMARY(dbp) (LIST_FIRST(&dbp->s_secondaries) != NULL) +/* + * A database should be required to be readonly if it's been explicitly + * specified as such or if we're a client in a replicated environment + * and the user did not specify DB_TXN_NOT_DURABLE. + */ +#define DB_IS_READONLY(dbp) \ + (F_ISSET(dbp, DB_AM_RDONLY) || \ + (IS_REP_CLIENT((dbp)->env) && !F_ISSET((dbp), DB_AM_NOT_DURABLE))) + +#ifdef HAVE_COMPRESSION +/* + * Check whether a database is compressed (btree only) + */ +#define DB_IS_COMPRESSED(dbp) \ + (((BTREE *)(dbp)->bt_internal)->bt_compress != NULL) +#endif + +/* + * We copy the key out if there's any chance the key in the database is not + * the same as the user-specified key. If there is a custom comparator we + * return a key, as the user-specified key might be a partial key, containing + * only the unique identifier. [#13572] [#15770] + * + * The test for (flags != 0) is necessary for Db.{get,pget}, but it's not + * legal to pass a non-zero flags value to Dbc.{get,pget}. + * + * We need to split out the hash component, since it is possible to build + * without hash support enabled. Which would result in a null pointer access. + */ +#ifdef HAVE_HASH +#define DB_RETURNS_A_KEY_HASH(dbp) \ + ((HASH *)(dbp)->h_internal)->h_compare != NULL +#else +#define DB_RETURNS_A_KEY_HASH(dbp) 0 +#endif +#define DB_RETURNS_A_KEY(dbp, flags) \ + (((flags) != 0 && (flags) != DB_GET_BOTH && \ + (flags) != DB_GET_BOTH_RANGE && (flags) != DB_SET) || \ + ((BTREE *)(dbp)->bt_internal)->bt_compare != __bam_defcmp ||\ + DB_RETURNS_A_KEY_HASH(dbp)) + +/* + * For portability, primary keys that are record numbers are stored in + * secondaries in the same byte order as the secondary database. As a + * consequence, we need to swap the byte order of these keys before attempting + * to use them for lookups in the primary. We also need to swap user-supplied + * primary keys that are used in secondary lookups (for example, with the + * DB_GET_BOTH flag on a secondary get). + */ +#include "dbinc/db_swap.h" + +#define SWAP_IF_NEEDED(sdbp, pkey) \ + do { \ + if (((sdbp)->s_primary->type == DB_QUEUE || \ + (sdbp)->s_primary->type == DB_RECNO) && \ + F_ISSET((sdbp), DB_AM_SWAP)) \ + P_32_SWAP((pkey)->data); \ + } while (0) + +/* + * Cursor adjustment: + * Return the first DB handle in the sorted ENV list of DB + * handles that has a matching file ID. + */ +#define FIND_FIRST_DB_MATCH(env, dbp, tdbp) do { \ + for ((tdbp) = (dbp); \ + TAILQ_PREV((tdbp), __dblist, dblistlinks) != NULL && \ + TAILQ_PREV((tdbp), \ + __dblist, dblistlinks)->adj_fileid == (dbp)->adj_fileid;\ + (tdbp) = TAILQ_PREV((tdbp), __dblist, dblistlinks)) \ + ; \ +} while (0) + +/* + * Macros used to implement a binary search algorithm. Shared between the + * btree and hash implementations. + */ +#define DB_BINARY_SEARCH_FOR(base, limit, nument, adjust) \ + for (base = 0, limit = (nument) / (db_indx_t)(adjust); \ + (limit) != 0; (limit) >>= 1) + +#define DB_BINARY_SEARCH_INCR(index, base, limit, adjust) \ + index = (base) + (((limit) >> 1) * (adjust)) + +#define DB_BINARY_SEARCH_SHIFT_BASE(index, base, limit, adjust) do { \ + base = (index) + (adjust); \ + --(limit); \ +} while (0) + +/* + * Sequence macros, shared between sequence.c and seq_stat.c + */ +#define SEQ_IS_OPEN(seq) ((seq)->seq_key.data != NULL) + +#define SEQ_ILLEGAL_AFTER_OPEN(seq, name) \ + if (SEQ_IS_OPEN(seq)) \ + return (__db_mi_open((seq)->seq_dbp->env, name, 1)); + +#define SEQ_ILLEGAL_BEFORE_OPEN(seq, name) \ + if (!SEQ_IS_OPEN(seq)) \ + return (__db_mi_open((seq)->seq_dbp->env, name, 0)); + +/* + * Flags to __db_chk_meta. + */ +#define DB_CHK_META 0x01 /* Checksum the meta page. */ +#define DB_CHK_NOLSN 0x02 /* Don't check the LSN. */ + +/* + * Flags to __db_truncate_page. + */ +#define DB_EXCH_FREE 0x01 /* Free the old page. */ +#define DB_EXCH_PARENT 0x02 /* There is a parent to update. */ + +/* We usually want to do these operations. */ +#define DB_EXCH_DEFAULT (DB_EXCH_FREE | DB_EXCH_PARENT) + +#if defined(__cplusplus) +} +#endif + +#include "dbinc/db_dispatch.h" +#include "dbinc_auto/db_auto.h" +#include "dbinc_auto/crdel_auto.h" +#include "dbinc_auto/db_ext.h" +#endif /* !_DB_AM_H_ */ diff --git a/src/dbinc/db_cxx.in b/src/dbinc/db_cxx.in new file mode 100644 index 00000000..55fc7ce6 --- /dev/null +++ b/src/dbinc/db_cxx.in @@ -0,0 +1,1493 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_CXX_H_ +#define _DB_CXX_H_ +// +// C++ assumptions: +// +// To ensure portability to many platforms, both new and old, we make +// few assumptions about the C++ compiler and library. For example, +// we do not expect STL, templates or namespaces to be available. The +// "newest" C++ feature used is exceptions, which are used liberally +// to transmit error information. Even the use of exceptions can be +// disabled at runtime, to do so, use the DB_CXX_NO_EXCEPTIONS flags +// with the DbEnv or Db constructor. +// +// C++ naming conventions: +// +// - All top level class names start with Db. +// - All class members start with lower case letter. +// - All private data members are suffixed with underscore. +// - Use underscores to divide names into multiple words. +// - Simple data accessors are named with get_ or set_ prefix. +// - All method names are taken from names of functions in the C +// layer of db (usually by dropping a prefix like "db_"). +// These methods have the same argument types and order, +// other than dropping the explicit arg that acts as "this". +// +// As a rule, each DbFoo object has exactly one underlying DB_FOO struct +// (defined in db.h) associated with it. In some cases, we inherit directly +// from the DB_FOO structure to make this relationship explicit. Often, +// the underlying C layer allocates and deallocates these structures, so +// there is no easy way to add any data to the DbFoo class. When you see +// a comment about whether data is permitted to be added, this is what +// is going on. Of course, if we need to add data to such C++ classes +// in the future, we will arrange to have an indirect pointer to the +// DB_FOO struct (as some of the classes already have). +// + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Forward declarations +// + +#include + +@cxx_have_stdheaders@ +#ifdef HAVE_CXX_STDHEADERS +#include +#include +#define __DB_STD(x) std::x +#else +#include +#include +#define __DB_STD(x) x +#endif + +#include "db.h" + +class Db; // forward +class Dbc; // forward +class DbChannel; // forward +class DbEnv; // forward +class DbHeapRecordId; // forward +class DbInfo; // forward +class DbLock; // forward +class DbLogc; // forward +class DbLsn; // forward +class DbMpoolFile; // forward +class DbPreplist; // forward +class DbSequence; // forward +class DbSite; // forward +class Dbt; // forward +class DbTxn; // forward + +class DbMultipleIterator; // forward +class DbMultipleKeyDataIterator; // forward +class DbMultipleRecnoDataIterator; // forward +class DbMultipleDataIterator; // forward + +class DbException; // forward +class DbDeadlockException; // forward +class DbLockNotGrantedException; // forward +class DbMemoryException; // forward +class DbRepHandleDeadException; // forward +class DbRunRecoveryException; // forward + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Turn off inappropriate compiler warnings +// + +#ifdef _MSC_VER + +// These are level 4 warnings that are explicitly disabled. +// With Visual C++, by default you do not see above level 3 unless +// you use /W4. But we like to compile with the highest level +// warnings to catch other errors. +// +// 4201: nameless struct/union +// triggered by standard include file +// +// 4514: unreferenced inline function has been removed +// certain include files in MSVC define methods that are not called +// +#pragma warning(push) +#pragma warning(disable: 4201 4514) + +#endif + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Mechanisms for declaring classes +// + +// +// Every class defined in this file has an _exported next to the class name. +// This is needed for WinTel machines so that the class methods can +// be exported or imported in a DLL as appropriate. Users of the DLL +// use the define DB_USE_DLL. When the DLL is built, DB_CREATE_DLL +// must be defined. +// +#if defined(_MSC_VER) + +# if defined(DB_CREATE_DLL) +# define _exported __declspec(dllexport) // creator of dll +# elif defined(DB_USE_DLL) +# define _exported __declspec(dllimport) // user of dll +# else +# define _exported // static lib creator or user +# endif + +#else /* _MSC_VER */ + +# define _exported + +#endif /* _MSC_VER */ + +// Some interfaces can be customized by allowing users to define +// callback functions. For performance and logistical reasons, some +// callback functions must be declared in extern "C" blocks. For others, +// we allow you to declare the callbacks in C++ or C (or an extern "C" +// block) as you wish. See the set methods for the callbacks for +// the choices. +// +extern "C" { + typedef void * (*db_malloc_fcn_type) + (size_t); + typedef void * (*db_realloc_fcn_type) + (void *, size_t); + typedef void (*db_free_fcn_type) + (void *); + typedef int (*bt_compare_fcn_type) /*C++ version available*/ + (DB *, const DBT *, const DBT *); + typedef size_t (*bt_prefix_fcn_type) /*C++ version available*/ + (DB *, const DBT *, const DBT *); + typedef int (*dup_compare_fcn_type) /*C++ version available*/ + (DB *, const DBT *, const DBT *); + typedef int (*h_compare_fcn_type) /*C++ version available*/ + (DB *, const DBT *, const DBT *); + typedef u_int32_t (*h_hash_fcn_type) /*C++ version available*/ + (DB *, const void *, u_int32_t); + typedef int (*pgin_fcn_type) + (DB_ENV *dbenv, db_pgno_t pgno, void *pgaddr, DBT *pgcookie); + typedef int (*pgout_fcn_type) + (DB_ENV *dbenv, db_pgno_t pgno, void *pgaddr, DBT *pgcookie); +} + +// +// Represents a database table = a set of keys with associated values. +// +class _exported Db +{ + friend class DbEnv; + +public: + Db(DbEnv*, u_int32_t); // Create a Db object. + virtual ~Db(); // Calls close() if the user hasn't. + + // These methods exactly match those in the C interface. + // + virtual int associate(DbTxn *txn, Db *secondary, int (*callback) + (Db *, const Dbt *, const Dbt *, Dbt *), u_int32_t flags); + virtual int associate_foreign(Db *foreign, int (*callback) + (Db *, const Dbt *, Dbt *, const Dbt *, int *), u_int32_t flags); + virtual int close(u_int32_t flags); + virtual int compact(DbTxn *txnid, Dbt *start, + Dbt *stop, DB_COMPACT *c_data, u_int32_t flags, Dbt *end); + virtual int cursor(DbTxn *txnid, Dbc **cursorp, u_int32_t flags); + virtual int del(DbTxn *txnid, Dbt *key, u_int32_t flags); + virtual void err(int, const char *, ...); + virtual void errx(const char *, ...); + virtual int exists(DbTxn *txnid, Dbt *key, u_int32_t flags); + virtual int fd(int *fdp); + virtual int get(DbTxn *txnid, Dbt *key, Dbt *data, u_int32_t flags); + virtual int get_alloc( + db_malloc_fcn_type *, db_realloc_fcn_type *, db_free_fcn_type *); + virtual int get_append_recno(int (**)(Db *, Dbt *, db_recno_t)); + virtual int get_bt_compare(int (**)(Db *, const Dbt *, const Dbt *)); + virtual int get_bt_compress( + int (**)( + Db *, const Dbt *, const Dbt *, const Dbt *, const Dbt *, Dbt *), + int (**)(Db *, const Dbt *, const Dbt *, Dbt *, Dbt *, Dbt *)); + virtual int get_bt_minkey(u_int32_t *); + virtual int get_bt_prefix(size_t (**)(Db *, const Dbt *, const Dbt *)); + virtual int get_byteswapped(int *); + virtual int get_cachesize(u_int32_t *, u_int32_t *, int *); + virtual int get_create_dir(const char **); + virtual int get_dbname(const char **, const char **); + virtual int get_dup_compare(int (**)(Db *, const Dbt *, const Dbt *)); + virtual int get_encrypt_flags(u_int32_t *); + virtual void get_errcall( + void (**)(const DbEnv *, const char *, const char *)); + virtual void get_errfile(FILE **); + virtual void get_errpfx(const char **); + virtual int get_feedback(void (**)(Db *, int, int)); + virtual int get_flags(u_int32_t *); + virtual int get_heapsize(u_int32_t *, u_int32_t *); + virtual int get_h_compare(int (**)(Db *, const Dbt *, const Dbt *)); + virtual int get_h_ffactor(u_int32_t *); + virtual int get_h_hash(u_int32_t (**)(Db *, const void *, u_int32_t)); + virtual int get_h_nelem(u_int32_t *); + virtual int get_lorder(int *); + virtual void get_msgcall(void (**)(const DbEnv *, const char *)); + virtual void get_msgfile(FILE **); + virtual int get_multiple(); + virtual int get_open_flags(u_int32_t *); + virtual int get_pagesize(u_int32_t *); + virtual int get_partition_callback( + u_int32_t *, u_int32_t (**)(Db *, Dbt *key)); + virtual int get_partition_dirs(const char ***); + virtual int get_partition_keys(u_int32_t *, Dbt **); + virtual int get_priority(DB_CACHE_PRIORITY *); + virtual int get_q_extentsize(u_int32_t *); + virtual int get_re_delim(int *); + virtual int get_re_len(u_int32_t *); + virtual int get_re_pad(int *); + virtual int get_re_source(const char **); + virtual int get_transactional(); + virtual int get_type(DBTYPE *); + virtual int join(Dbc **curslist, Dbc **dbcp, u_int32_t flags); + virtual int key_range(DbTxn *, Dbt *, DB_KEY_RANGE *, u_int32_t); + virtual int open(DbTxn *txnid, + const char *, const char *subname, DBTYPE, u_int32_t, int); + virtual int pget(DbTxn *txnid, + Dbt *key, Dbt *pkey, Dbt *data, u_int32_t flags); + virtual int put(DbTxn *, Dbt *, Dbt *, u_int32_t); + virtual int remove(const char *, const char *, u_int32_t); + virtual int rename(const char *, const char *, const char *, u_int32_t); + virtual int set_alloc( + db_malloc_fcn_type, db_realloc_fcn_type, db_free_fcn_type); + virtual void set_app_private(void *); + virtual int set_append_recno(int (*)(Db *, Dbt *, db_recno_t)); + virtual int set_bt_compare(bt_compare_fcn_type); /*deprecated*/ + virtual int set_bt_compare(int (*)(Db *, const Dbt *, const Dbt *)); + virtual int set_bt_compress( + int (*) + (Db *, const Dbt *, const Dbt *, const Dbt *, const Dbt *, Dbt *), + int (*)(Db *, const Dbt *, const Dbt *, Dbt *, Dbt *, Dbt *)); + virtual int set_bt_minkey(u_int32_t); + virtual int set_bt_prefix(bt_prefix_fcn_type); /*deprecated*/ + virtual int set_bt_prefix(size_t (*)(Db *, const Dbt *, const Dbt *)); + virtual int set_cachesize(u_int32_t, u_int32_t, int); + virtual int set_create_dir(const char *); + virtual int set_dup_compare(dup_compare_fcn_type); /*deprecated*/ + virtual int set_dup_compare(int (*)(Db *, const Dbt *, const Dbt *)); + virtual int set_encrypt(const char *, u_int32_t); + virtual void set_errcall( + void (*)(const DbEnv *, const char *, const char *)); + virtual void set_errfile(FILE *); + virtual void set_errpfx(const char *); + virtual int set_feedback(void (*)(Db *, int, int)); + virtual int set_flags(u_int32_t); + virtual int set_heapsize(u_int32_t, u_int32_t); + virtual int set_h_compare(h_compare_fcn_type); /*deprecated*/ + virtual int set_h_compare(int (*)(Db *, const Dbt *, const Dbt *)); + virtual int set_h_ffactor(u_int32_t); + virtual int set_h_hash(h_hash_fcn_type); /*deprecated*/ + virtual int set_h_hash(u_int32_t (*)(Db *, const void *, u_int32_t)); + virtual int set_h_nelem(u_int32_t); + virtual int set_lorder(int); + virtual void set_msgcall(void (*)(const DbEnv *, const char *)); + virtual void set_msgfile(FILE *); + virtual int set_pagesize(u_int32_t); + virtual int set_paniccall(void (*)(DbEnv *, int)); + virtual int set_partition( + u_int32_t, Dbt *, u_int32_t (*)(Db *, Dbt *)); + virtual int set_partition_dirs(const char **); + virtual int set_priority(DB_CACHE_PRIORITY); + virtual int set_q_extentsize(u_int32_t); + virtual int set_re_delim(int); + virtual int set_re_len(u_int32_t); + virtual int set_re_pad(int); + virtual int set_re_source(const char *); + virtual int sort_multiple(Dbt *, Dbt *, u_int32_t); + virtual int stat(DbTxn *, void *sp, u_int32_t flags); + virtual int stat_print(u_int32_t flags); + virtual int sync(u_int32_t flags); + virtual int truncate(DbTxn *, u_int32_t *, u_int32_t); + virtual int upgrade(const char *name, u_int32_t flags); + virtual int verify( + const char *, const char *, __DB_STD(ostream) *, u_int32_t); + + // These additional methods are not in the C interface, and + // are only available for C++. + // + virtual void *get_app_private() const; + virtual __DB_STD(ostream) *get_error_stream(); + virtual void set_error_stream(__DB_STD(ostream) *); + virtual __DB_STD(ostream) *get_message_stream(); + virtual void set_message_stream(__DB_STD(ostream) *); + + virtual DbEnv *get_env(); + virtual DbMpoolFile *get_mpf(); + + virtual ENV *get_ENV() + { + return imp_->env; + } + + virtual DB *get_DB() + { + return imp_; + } + + virtual const DB *get_const_DB() const + { + return imp_; + } + + static Db* get_Db(DB *db) + { + return (Db *)db->api_internal; + } + + static const Db* get_const_Db(const DB *db) + { + return (const Db *)db->api_internal; + } + + u_int32_t get_create_flags() const + { + return construct_flags_; + } + +private: + // no copying + Db(const Db &); + Db &operator = (const Db &); + + void cleanup(); + int initialize(); + int error_policy(); + + // instance data + DB *imp_; + DbEnv *dbenv_; + DbMpoolFile *mpf_; + int construct_error_; + u_int32_t flags_; + u_int32_t construct_flags_; + + static int alt_close(DB *, u_int32_t); + +public: + // These are public only because they need to be called + // via C callback functions. They should never be used by + // external users of this class. + // + int (*append_recno_callback_)(Db *, Dbt *, db_recno_t); + int (*associate_callback_)(Db *, const Dbt *, const Dbt *, Dbt *); + int (*associate_foreign_callback_) + (Db *, const Dbt *, Dbt *, const Dbt *, int *); + int (*bt_compare_callback_)(Db *, const Dbt *, const Dbt *); + int (*bt_compress_callback_)( + Db *, const Dbt *, const Dbt *, const Dbt *, const Dbt *, Dbt *); + int (*bt_decompress_callback_)( + Db *, const Dbt *, const Dbt *, Dbt *, Dbt *, Dbt *); + size_t (*bt_prefix_callback_)(Db *, const Dbt *, const Dbt *); + u_int32_t (*db_partition_callback_)(Db *, Dbt *); + int (*dup_compare_callback_)(Db *, const Dbt *, const Dbt *); + void (*feedback_callback_)(Db *, int, int); + int (*h_compare_callback_)(Db *, const Dbt *, const Dbt *); + u_int32_t (*h_hash_callback_)(Db *, const void *, u_int32_t); +}; + +// +// Cursor +// +class _exported Dbc : protected DBC +{ + friend class Db; + +public: + int close(); + int cmp(Dbc *other_csr, int *result, u_int32_t flags); + int count(db_recno_t *countp, u_int32_t flags); + int del(u_int32_t flags); + int dup(Dbc** cursorp, u_int32_t flags); + int get(Dbt* key, Dbt *data, u_int32_t flags); + int get_priority(DB_CACHE_PRIORITY *priorityp); + int pget(Dbt* key, Dbt* pkey, Dbt *data, u_int32_t flags); + int put(Dbt* key, Dbt *data, u_int32_t flags); + int set_priority(DB_CACHE_PRIORITY priority); + +private: + // No data is permitted in this class (see comment at top) + + // Note: use Db::cursor() to get pointers to a Dbc, + // and call Dbc::close() rather than delete to release them. + // + Dbc(); + ~Dbc(); + + // no copying + Dbc(const Dbc &); + Dbc &operator = (const Dbc &); +}; + +// +// A channel in replication group +// +class _exported DbChannel +{ + friend class DbEnv; + +public: + int close(); + int send_msg(Dbt *msg, u_int32_t nmsg, u_int32_t flags); + int send_request(Dbt *request, u_int32_t nrequest, Dbt *response, + db_timeout_t timeout, u_int32_t flags); + int set_timeout(db_timeout_t timeout); + + virtual DB_CHANNEL *get_DB_CHANNEL() + { + return imp_; + } + + virtual const DB_CHANNEL *get_const_DB_CHANNEL() const + { + return imp_; + } + +private: + DbChannel(); + virtual ~DbChannel(); + + // no copying + DbChannel(const DbChannel &); + DbChannel &operator = (const DbChannel &); + DB_CHANNEL *imp_; + DbEnv *dbenv_; +}; + +// +// Berkeley DB environment class. Provides functions for opening databases. +// User of this library can use this class as a starting point for +// developing a DB application - derive their application class from +// this one, add application control logic. +// +// Note that if you use the default constructor, you must explicitly +// call appinit() before any other db activity (e.g. opening files) +// +class _exported DbEnv +{ + friend class Db; + friend class DbLock; + friend class DbMpoolFile; + +public: + // After using this constructor, you can set any needed + // parameters for the environment using the set_* methods. + // Then call open() to finish initializing the environment + // and attaching it to underlying files. + // + DbEnv(u_int32_t flags); + + virtual ~DbEnv(); + + // These methods match those in the C interface. + // + virtual int add_data_dir(const char *); + virtual int cdsgroup_begin(DbTxn **tid); + virtual int close(u_int32_t); + virtual int dbremove(DbTxn *txn, const char *name, const char *subdb, + u_int32_t flags); + virtual int dbrename(DbTxn *txn, const char *name, const char *subdb, + const char *newname, u_int32_t flags); + virtual void err(int, const char *, ...); + virtual void errx(const char *, ...); + virtual int failchk(u_int32_t); + virtual int fileid_reset(const char *, u_int32_t); + virtual int get_alloc(db_malloc_fcn_type *, db_realloc_fcn_type *, + db_free_fcn_type *); + virtual void *get_app_private() const; + virtual int get_home(const char **); + virtual int get_open_flags(u_int32_t *); + virtual int open(const char *, u_int32_t, int); + virtual int remove(const char *, u_int32_t); + virtual int stat_print(u_int32_t flags); + + virtual int set_alloc(db_malloc_fcn_type, db_realloc_fcn_type, + db_free_fcn_type); + virtual void set_app_private(void *); + virtual int get_cachesize(u_int32_t *, u_int32_t *, int *); + virtual int set_cachesize(u_int32_t, u_int32_t, int); + virtual int get_cache_max(u_int32_t *, u_int32_t *); + virtual int set_cache_max(u_int32_t, u_int32_t); + virtual int get_create_dir(const char **); + virtual int set_create_dir(const char *); + virtual int get_data_dirs(const char ***); + virtual int set_data_dir(const char *); + virtual int get_encrypt_flags(u_int32_t *); + virtual int get_intermediate_dir_mode(const char **); + virtual int set_intermediate_dir_mode(const char *); + virtual int get_isalive( + int (**)(DbEnv *, pid_t, db_threadid_t, u_int32_t)); + virtual int set_isalive( + int (*)(DbEnv *, pid_t, db_threadid_t, u_int32_t)); + virtual int set_encrypt(const char *, u_int32_t); + virtual void get_errcall( + void (**)(const DbEnv *, const char *, const char *)); + virtual void set_errcall( + void (*)(const DbEnv *, const char *, const char *)); + virtual void get_errfile(FILE **); + virtual void set_errfile(FILE *); + virtual void get_errpfx(const char **); + virtual void set_errpfx(const char *); + virtual int set_event_notify(void (*)(DbEnv *, u_int32_t, void *)); + virtual int get_flags(u_int32_t *); + virtual int set_flags(u_int32_t, int); + virtual bool is_bigendian(); + virtual int lsn_reset(const char *, u_int32_t); + virtual int get_feedback(void (**)(DbEnv *, int, int)); + virtual int set_feedback(void (*)(DbEnv *, int, int)); + virtual int get_lg_bsize(u_int32_t *); + virtual int set_lg_bsize(u_int32_t); + virtual int get_lg_dir(const char **); + virtual int set_lg_dir(const char *); + virtual int get_lg_filemode(int *); + virtual int set_lg_filemode(int); + virtual int get_lg_max(u_int32_t *); + virtual int set_lg_max(u_int32_t); + virtual int get_lg_regionmax(u_int32_t *); + virtual int set_lg_regionmax(u_int32_t); + virtual int get_lk_conflicts(const u_int8_t **, int *); + virtual int set_lk_conflicts(u_int8_t *, int); + virtual int get_lk_detect(u_int32_t *); + virtual int set_lk_detect(u_int32_t); + virtual int get_lk_max_lockers(u_int32_t *); + virtual int set_lk_max_lockers(u_int32_t); + virtual int get_lk_max_locks(u_int32_t *); + virtual int set_lk_max_locks(u_int32_t); + virtual int get_lk_max_objects(u_int32_t *); + virtual int set_lk_max_objects(u_int32_t); + virtual int get_lk_partitions(u_int32_t *); + virtual int set_lk_partitions(u_int32_t); + virtual int get_lk_priority(u_int32_t, u_int32_t *); + virtual int set_lk_priority(u_int32_t, u_int32_t); + virtual int get_lk_tablesize(u_int32_t *); + virtual int set_lk_tablesize(u_int32_t); + virtual int get_memory_init(DB_MEM_CONFIG, u_int32_t *); + virtual int set_memory_init(DB_MEM_CONFIG, u_int32_t); + virtual int get_memory_max(u_int32_t *, u_int32_t *); + virtual int set_memory_max(u_int32_t, u_int32_t); + virtual int get_mp_mmapsize(size_t *); + virtual int set_mp_mmapsize(size_t); + virtual int get_mp_max_openfd(int *); + virtual int set_mp_max_openfd(int); + virtual int get_mp_max_write(int *, db_timeout_t *); + virtual int set_mp_max_write(int, db_timeout_t); + virtual int get_mp_pagesize(u_int32_t *); + virtual int set_mp_pagesize(u_int32_t); + virtual int get_mp_tablesize(u_int32_t *); + virtual int set_mp_tablesize(u_int32_t); + virtual void get_msgcall(void (**)(const DbEnv *, const char *)); + virtual void set_msgcall(void (*)(const DbEnv *, const char *)); + virtual void get_msgfile(FILE **); + virtual void set_msgfile(FILE *); + virtual int set_paniccall(void (*)(DbEnv *, int)); + virtual int get_shm_key(long *); + virtual int set_shm_key(long); + virtual int get_timeout(db_timeout_t *, u_int32_t); + virtual int set_timeout(db_timeout_t, u_int32_t); + virtual int get_tmp_dir(const char **); + virtual int set_tmp_dir(const char *); + virtual int get_tx_max(u_int32_t *); + virtual int set_tx_max(u_int32_t); + virtual int get_app_dispatch( + int (**)(DbEnv *, Dbt *, DbLsn *, db_recops)); + virtual int set_app_dispatch(int (*)(DbEnv *, + Dbt *, DbLsn *, db_recops)); + virtual int get_tx_timestamp(time_t *); + virtual int set_tx_timestamp(time_t *); + virtual int get_verbose(u_int32_t which, int *); + virtual int set_verbose(u_int32_t which, int); + + // Version information. Static methods, can be called at any time. + // + static char *version(int *major, int *minor, int *patch); + static char *full_version(int *family, int *release, + int *major, int *minor, int *patch); + + // Convert DB errors to strings + static char *strerror(int); + + // If an error is detected and the error call function + // or stream is set, a message is dispatched or printed. + // If a prefix is set, each message is prefixed. + // + // You can use set_errcall() or set_errfile() above to control + // error functionality. Alternatively, you can call + // set_error_stream() to force all errors to a C++ stream. + // It is unwise to mix these approaches. + // + virtual __DB_STD(ostream) *get_error_stream(); + virtual void set_error_stream(__DB_STD(ostream) *); + virtual __DB_STD(ostream) *get_message_stream(); + virtual void set_message_stream(__DB_STD(ostream) *); + + // used internally + static void runtime_error(DbEnv *dbenv, const char *caller, int err, + int error_policy); + static void runtime_error_dbt(DbEnv *dbenv, const char *caller, Dbt *dbt, + int error_policy); + static void runtime_error_lock_get(DbEnv *dbenv, const char *caller, + int err, db_lockop_t op, db_lockmode_t mode, + Dbt *obj, DbLock lock, int index, + int error_policy); + + // Lock functions + // + virtual int lock_detect(u_int32_t flags, u_int32_t atype, int *aborted); + virtual int lock_get(u_int32_t locker, u_int32_t flags, Dbt *obj, + db_lockmode_t lock_mode, DbLock *lock); + virtual int lock_id(u_int32_t *idp); + virtual int lock_id_free(u_int32_t id); + virtual int lock_put(DbLock *lock); + virtual int lock_stat(DB_LOCK_STAT **statp, u_int32_t flags); + virtual int lock_stat_print(u_int32_t flags); + virtual int lock_vec(u_int32_t locker, u_int32_t flags, + DB_LOCKREQ list[], int nlist, DB_LOCKREQ **elistp); + + // Log functions + // + virtual int log_archive(char **list[], u_int32_t flags); + static int log_compare(const DbLsn *lsn0, const DbLsn *lsn1); + virtual int log_cursor(DbLogc **cursorp, u_int32_t flags); + virtual int log_file(DbLsn *lsn, char *namep, size_t len); + virtual int log_flush(const DbLsn *lsn); + virtual int log_get_config(u_int32_t, int *); + virtual int log_put(DbLsn *lsn, const Dbt *data, u_int32_t flags); + virtual int log_printf(DbTxn *, const char *, ...); + virtual int log_set_config(u_int32_t, int); + virtual int log_stat(DB_LOG_STAT **spp, u_int32_t flags); + virtual int log_stat_print(u_int32_t flags); + virtual int log_verify(DB_LOG_VERIFY_CONFIG *); + + // Mpool functions + // + virtual int memp_fcreate(DbMpoolFile **dbmfp, u_int32_t flags); + virtual int memp_register(int ftype, + pgin_fcn_type pgin_fcn, + pgout_fcn_type pgout_fcn); + virtual int memp_stat(DB_MPOOL_STAT + **gsp, DB_MPOOL_FSTAT ***fsp, u_int32_t flags); + virtual int memp_stat_print(u_int32_t flags); + virtual int memp_sync(DbLsn *lsn); + virtual int memp_trickle(int pct, int *nwrotep); + + // Mpool functions + // + virtual int mutex_alloc(u_int32_t, db_mutex_t *); + virtual int mutex_free(db_mutex_t); + virtual int mutex_get_align(u_int32_t *); + virtual int mutex_get_increment(u_int32_t *); + virtual int mutex_get_init(u_int32_t *); + virtual int mutex_get_max(u_int32_t *); + virtual int mutex_get_tas_spins(u_int32_t *); + virtual int mutex_lock(db_mutex_t); + virtual int mutex_set_align(u_int32_t); + virtual int mutex_set_increment(u_int32_t); + virtual int mutex_set_init(u_int32_t); + virtual int mutex_set_max(u_int32_t); + virtual int mutex_set_tas_spins(u_int32_t); + virtual int mutex_stat(DB_MUTEX_STAT **, u_int32_t); + virtual int mutex_stat_print(u_int32_t); + virtual int mutex_unlock(db_mutex_t); + + // Transaction functions + // + virtual int txn_begin(DbTxn *pid, DbTxn **tid, u_int32_t flags); + virtual int txn_checkpoint(u_int32_t kbyte, u_int32_t min, + u_int32_t flags); + virtual int txn_recover(DbPreplist *preplist, long count, + long *retp, u_int32_t flags); + virtual int txn_stat(DB_TXN_STAT **statp, u_int32_t flags); + virtual int txn_stat_print(u_int32_t flags); + + // Replication functions + // + virtual int rep_elect(u_int32_t, u_int32_t, u_int32_t); + virtual int rep_flush(); + virtual int rep_process_message(Dbt *, Dbt *, int, DbLsn *); + virtual int rep_start(Dbt *, u_int32_t); + virtual int rep_stat(DB_REP_STAT **statp, u_int32_t flags); + virtual int rep_stat_print(u_int32_t flags); + virtual int rep_get_clockskew(u_int32_t *, u_int32_t *); + virtual int rep_set_clockskew(u_int32_t, u_int32_t); + virtual int rep_get_limit(u_int32_t *, u_int32_t *); + virtual int rep_set_limit(u_int32_t, u_int32_t); + virtual int rep_set_transport(int, int (*)(DbEnv *, + const Dbt *, const Dbt *, const DbLsn *, int, u_int32_t)); + virtual int rep_set_request(u_int32_t, u_int32_t); + virtual int rep_get_request(u_int32_t *, u_int32_t *); + virtual int get_thread_count(u_int32_t *); + virtual int set_thread_count(u_int32_t); + virtual int get_thread_id_fn( + void (**)(DbEnv *, pid_t *, db_threadid_t *)); + virtual int set_thread_id(void (*)(DbEnv *, pid_t *, db_threadid_t *)); + virtual int get_thread_id_string_fn( + char *(**)(DbEnv *, pid_t, db_threadid_t, char *)); + virtual int set_thread_id_string(char *(*)(DbEnv *, + pid_t, db_threadid_t, char *)); + virtual int rep_set_config(u_int32_t, int); + virtual int rep_get_config(u_int32_t, int *); + virtual int rep_sync(u_int32_t flags); + + // Advanced replication functions + // + virtual int rep_get_nsites(u_int32_t *n); + virtual int rep_set_nsites(u_int32_t n); + virtual int rep_get_priority(u_int32_t *priorityp); + virtual int rep_set_priority(u_int32_t priority); + virtual int rep_get_timeout(int which, db_timeout_t *timeout); + virtual int rep_set_timeout(int which, db_timeout_t timeout); + virtual int repmgr_channel(int eid, DbChannel **channel, + u_int32_t flags); + virtual int repmgr_get_ack_policy(int *policy); + virtual int repmgr_set_ack_policy(int policy); + virtual int repmgr_local_site(DbSite **site); + virtual int repmgr_msg_dispatch(void (*) (DbEnv *, + DbChannel *, Dbt *, u_int32_t, u_int32_t), u_int32_t flags); + virtual int repmgr_site(const char *host, u_int port, DbSite **site, + u_int32_t flags); + virtual int repmgr_site_by_eid(int eid, DbSite **site); + virtual int repmgr_site_list(u_int *countp, DB_REPMGR_SITE **listp); + virtual int repmgr_start(int nthreads, u_int32_t flags); + virtual int repmgr_stat(DB_REPMGR_STAT **statp, u_int32_t flags); + virtual int repmgr_stat_print(u_int32_t flags); + + // Conversion functions + // + virtual ENV *get_ENV() + { + return imp_->env; + } + + virtual DB_ENV *get_DB_ENV() + { + return imp_; + } + + virtual const DB_ENV *get_const_DB_ENV() const + { + return imp_; + } + + static DbEnv* get_DbEnv(DB_ENV *dbenv) + { + return dbenv ? (DbEnv *)dbenv->api1_internal : 0; + } + + static const DbEnv* get_const_DbEnv(const DB_ENV *dbenv) + { + return dbenv ? (const DbEnv *)dbenv->api1_internal : 0; + } + + u_int32_t get_create_flags() const + { + return construct_flags_; + } + + // For internal use only. + static DbEnv* wrap_DB_ENV(DB_ENV *dbenv); + + // These are public only because they need to be called + // via C functions. They should never be called by users + // of this class. + // + static int _app_dispatch_intercept(DB_ENV *dbenv, DBT *dbt, DB_LSN *lsn, + db_recops op); + static void _paniccall_intercept(DB_ENV *dbenv, int errval); + static void _feedback_intercept(DB_ENV *dbenv, int opcode, int pct); + static void _event_func_intercept(DB_ENV *dbenv, u_int32_t, void *); + static int _isalive_intercept(DB_ENV *dbenv, pid_t pid, + db_threadid_t thrid, u_int32_t flags); + static int _rep_send_intercept(DB_ENV *dbenv, const DBT *cntrl, + const DBT *data, const DB_LSN *lsn, int id, u_int32_t flags); + static void _stream_error_function(const DB_ENV *dbenv, + const char *prefix, const char *message); + static void _stream_message_function(const DB_ENV *dbenv, + const char *message); + static void _thread_id_intercept(DB_ENV *dbenv, pid_t *pidp, + db_threadid_t *thridp); + static char *_thread_id_string_intercept(DB_ENV *dbenv, pid_t pid, + db_threadid_t thrid, char *buf); + static void _message_dispatch_intercept(DB_ENV *dbenv, + DB_CHANNEL *dbchannel, DBT *request, u_int32_t nrequest, + u_int32_t cb_flags); + +private: + void cleanup(); + int initialize(DB_ENV *dbenv); + int error_policy(); + + // For internal use only. + DbEnv(DB_ENV *, u_int32_t flags); + + // no copying + DbEnv(const DbEnv &); + void operator = (const DbEnv &); + + // instance data + DB_ENV *imp_; + int construct_error_; + u_int32_t construct_flags_; + __DB_STD(ostream) *error_stream_; + __DB_STD(ostream) *message_stream_; + + int (*app_dispatch_callback_)(DbEnv *, Dbt *, DbLsn *, db_recops); + int (*isalive_callback_)(DbEnv *, pid_t, db_threadid_t, u_int32_t); + void (*error_callback_)(const DbEnv *, const char *, const char *); + void (*feedback_callback_)(DbEnv *, int, int); + void (*message_callback_)(const DbEnv *, const char *); + void (*paniccall_callback_)(DbEnv *, int); + void (*event_func_callback_)(DbEnv *, u_int32_t, void *); + int (*rep_send_callback_)(DbEnv *, const Dbt *, const Dbt *, + const DbLsn *, int, u_int32_t); + void (*thread_id_callback_)(DbEnv *, pid_t *, db_threadid_t *); + char *(*thread_id_string_callback_)(DbEnv *, pid_t, db_threadid_t, + char *); + void (*message_dispatch_callback_)(DbEnv *, DbChannel *, Dbt *, + u_int32_t, u_int32_t); +}; + +// +// Heap record id +// +class _exported DbHeapRecordId : private DB_HEAP_RID +{ +public: + db_pgno_t get_pgno() const { return pgno; } + void set_pgno(db_pgno_t value) { pgno = value; } + + db_indx_t get_indx() const { return indx; } + void set_indx(db_indx_t value) { indx = value; } + + DB_HEAP_RID *get_DB_HEAP_RID() { return (DB_HEAP_RID *)this; } + const DB_HEAP_RID *get_const_DB_HEAP_RID() const + { return (const DB_HEAP_RID *)this; } + + static DbHeapRecordId* get_DbHeapRecordId(DB_HEAP_RID *rid) + { return (DbHeapRecordId *)rid; } + static const DbHeapRecordId* get_const_DbHeapRecordId(DB_HEAP_RID *rid) + { return (const DbHeapRecordId *)rid; } + + DbHeapRecordId(db_pgno_t pgno, db_indx_t indx); + DbHeapRecordId(); + ~DbHeapRecordId(); + DbHeapRecordId(const DbHeapRecordId &); + DbHeapRecordId &operator = (const DbHeapRecordId &); +}; + +// +// Lock +// +class _exported DbLock +{ + friend class DbEnv; + +public: + DbLock(); + DbLock(const DbLock &); + DbLock &operator = (const DbLock &); + +protected: + // We can add data to this class if needed + // since its contained class is not allocated by db. + // (see comment at top) + + DbLock(DB_LOCK); + DB_LOCK lock_; +}; + +// +// Log cursor +// +class _exported DbLogc : protected DB_LOGC +{ + friend class DbEnv; + +public: + int close(u_int32_t _flags); + int get(DbLsn *lsn, Dbt *data, u_int32_t _flags); + int version(u_int32_t *versionp, u_int32_t _flags); + +private: + // No data is permitted in this class (see comment at top) + + // Note: use Db::cursor() to get pointers to a Dbc, + // and call Dbc::close() rather than delete to release them. + // + DbLogc(); + ~DbLogc(); + + // no copying + DbLogc(const Dbc &); + DbLogc &operator = (const Dbc &); +}; + +// +// Log sequence number +// +class _exported DbLsn : public DB_LSN +{ + friend class DbEnv; // friendship needed to cast to base class + friend class DbLogc; // friendship needed to cast to base class +}; + +// +// Memory pool file +// +class _exported DbMpoolFile +{ + friend class DbEnv; + friend class Db; + +public: + int close(u_int32_t flags); + int get(db_pgno_t *pgnoaddr, DbTxn *txn, u_int32_t flags, void *pagep); + int get_clear_len(u_int32_t *len); + int get_fileid(u_int8_t *fileid); + int get_flags(u_int32_t *flagsp); + int get_ftype(int *ftype); + int get_last_pgno(db_pgno_t *pgnop); + int get_lsn_offset(int32_t *offsetp); + int get_maxsize(u_int32_t *gbytes, u_int32_t *bytes); + int get_pgcookie(DBT *dbt); + int get_priority(DB_CACHE_PRIORITY *priorityp); + int get_transactional(void); + int open(const char *file, u_int32_t flags, int mode, size_t pagesize); + int put(void *pgaddr, DB_CACHE_PRIORITY priority, u_int32_t flags); + int set_clear_len(u_int32_t len); + int set_fileid(u_int8_t *fileid); + int set_flags(u_int32_t flags, int onoff); + int set_ftype(int ftype); + int set_lsn_offset(int32_t offset); + int set_maxsize(u_int32_t gbytes, u_int32_t bytes); + int set_pgcookie(DBT *dbt); + int set_priority(DB_CACHE_PRIORITY priority); + int sync(); + + virtual DB_MPOOLFILE *get_DB_MPOOLFILE() + { + return imp_; + } + + virtual const DB_MPOOLFILE *get_const_DB_MPOOLFILE() const + { + return imp_; + } + +private: + DB_MPOOLFILE *imp_; + + // We can add data to this class if needed + // since it is implemented via a pointer. + // (see comment at top) + + // Note: use DbEnv::memp_fcreate() to get pointers to a DbMpoolFile, + // and call DbMpoolFile::close() rather than delete to release them. + // + DbMpoolFile(); + + // Shut g++ up. +protected: + virtual ~DbMpoolFile(); + +private: + // no copying + DbMpoolFile(const DbMpoolFile &); + void operator = (const DbMpoolFile &); +}; + +// +// This is filled in and returned by the DbEnv::txn_recover() method. +// +class _exported DbPreplist +{ +public: + DbTxn *txn; + u_int8_t gid[DB_GID_SIZE]; +}; + +// +// A sequence record in a database +// +class _exported DbSequence +{ +public: + DbSequence(Db *db, u_int32_t flags); + virtual ~DbSequence(); + + int open(DbTxn *txnid, Dbt *key, u_int32_t flags); + int initial_value(db_seq_t value); + int close(u_int32_t flags); + int remove(DbTxn *txnid, u_int32_t flags); + int stat(DB_SEQUENCE_STAT **sp, u_int32_t flags); + int stat_print(u_int32_t flags); + + int get(DbTxn *txnid, int32_t delta, db_seq_t *retp, u_int32_t flags); + int get_cachesize(int32_t *sizep); + int set_cachesize(int32_t size); + int get_flags(u_int32_t *flagsp); + int set_flags(u_int32_t flags); + int get_range(db_seq_t *minp, db_seq_t *maxp); + int set_range(db_seq_t min, db_seq_t max); + + Db *get_db(); + Dbt *get_key(); + + virtual DB_SEQUENCE *get_DB_SEQUENCE() + { + return imp_; + } + + virtual const DB_SEQUENCE *get_const_DB_SEQUENCE() const + { + return imp_; + } + + static DbSequence* get_DbSequence(DB_SEQUENCE *seq) + { + return (DbSequence *)seq->api_internal; + } + + static const DbSequence* get_const_DbSequence(const DB_SEQUENCE *seq) + { + return (const DbSequence *)seq->api_internal; + } + + // For internal use only. + static DbSequence* wrap_DB_SEQUENCE(DB_SEQUENCE *seq); + +private: + DbSequence(DB_SEQUENCE *seq); + // no copying + DbSequence(const DbSequence &); + DbSequence &operator = (const DbSequence &); + + DB_SEQUENCE *imp_; + DBT key_; +}; + +// +// A site in replication group +// +class _exported DbSite +{ + friend class DbEnv; + +public: + int close(); + int get_address(const char **hostp, u_int *port); + int get_config(u_int32_t which, u_int32_t *value); + int get_eid(int *eidp); + int remove(); + int set_config(u_int32_t which, u_int32_t value); + + virtual DB_SITE *get_DB_SITE() + { + return imp_; + } + + virtual const DB_SITE *get_const_DB_SITE() const + { + return imp_; + } + +private: + DbSite(); + virtual ~DbSite(); + + // no copying + DbSite(const DbSite &); + DbSite &operator = (const DbSite &); + DB_SITE *imp_; +}; + +// +// Transaction +// +class _exported DbTxn +{ + friend class DbEnv; + +public: + int abort(); + int commit(u_int32_t flags); + int discard(u_int32_t flags); + u_int32_t id(); + int get_name(const char **namep); + int get_priority(u_int32_t *priorityp); + int prepare(u_int8_t *gid); + int set_name(const char *name); + int set_priority(u_int32_t priority); + int set_timeout(db_timeout_t timeout, u_int32_t flags); + + virtual DB_TXN *get_DB_TXN() + { + return imp_; + } + + virtual const DB_TXN *get_const_DB_TXN() const + { + return imp_; + } + + static DbTxn* get_DbTxn(DB_TXN *txn) + { + return (DbTxn *)txn->api_internal; + } + + static const DbTxn* get_const_DbTxn(const DB_TXN *txn) + { + return (const DbTxn *)txn->api_internal; + } + + // For internal use only. + static DbTxn* wrap_DB_TXN(DB_TXN *txn); + void remove_child_txn(DbTxn *kid); + void add_child_txn(DbTxn *kid); + + void set_parent(DbTxn *ptxn) + { + parent_txn_ = ptxn; + } + +private: + DB_TXN *imp_; + + // We use a TAILQ to store this object's kids of DbTxn objects, and + // each kid has a "parent_txn_" to point to this DbTxn object. + // + // If imp_ has a parent transaction which is not wrapped by DbTxn + // class, parent_txn_ will be NULL since we don't need to maintain + // this parent-kid relationship. This relationship only helps to + // delete unresolved kids when the parent is resolved. + DbTxn *parent_txn_; + + // We can add data to this class if needed + // since it is implemented via a pointer. + // (see comment at top) + + // Note: use DbEnv::txn_begin() to get pointers to a DbTxn, + // and call DbTxn::abort() or DbTxn::commit rather than + // delete to release them. + // + DbTxn(DbTxn *ptxn); + // For internal use only. + DbTxn(DB_TXN *txn, DbTxn *ptxn); + virtual ~DbTxn(); + + // no copying + DbTxn(const DbTxn &); + void operator = (const DbTxn &); + + /* + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_HEAD(__children, DbTxn) children; + */ + struct __children { + DbTxn *tqh_first; + DbTxn **tqh_last; + } children; + + /* + * !!! + * Explicit representations of structures from queue.h. + * TAILQ_ENTRY(DbTxn) child_entry; + */ + struct { + DbTxn *tqe_next; + DbTxn **tqe_prev; + } child_entry; +}; + +// +// A chunk of data, maybe a key or value. +// +class _exported Dbt : private DBT +{ + friend class Db; + friend class Dbc; + friend class DbEnv; + friend class DbLogc; + friend class DbSequence; + +public: + // key/data + void *get_data() const { return data; } + void set_data(void *value) { data = value; } + + // key/data length + u_int32_t get_size() const { return size; } + void set_size(u_int32_t value) { size = value; } + + // RO: length of user buffer. + u_int32_t get_ulen() const { return ulen; } + void set_ulen(u_int32_t value) { ulen = value; } + + // RO: get/put record length. + u_int32_t get_dlen() const { return dlen; } + void set_dlen(u_int32_t value) { dlen = value; } + + // RO: get/put record offset. + u_int32_t get_doff() const { return doff; } + void set_doff(u_int32_t value) { doff = value; } + + // flags + u_int32_t get_flags() const { return flags; } + void set_flags(u_int32_t value) { flags = value; } + + // Conversion functions + DBT *get_DBT() { return (DBT *)this; } + const DBT *get_const_DBT() const { return (const DBT *)this; } + + static Dbt* get_Dbt(DBT *dbt) { return (Dbt *)dbt; } + static const Dbt* get_const_Dbt(const DBT *dbt) + { return (const Dbt *)dbt; } + + Dbt(void *data, u_int32_t size); + Dbt(); + ~Dbt(); + Dbt(const Dbt &); + Dbt &operator = (const Dbt &); + +private: + // Note: no extra data appears in this class (other than + // inherited from DBT) since we need DBT and Dbt objects + // to have interchangable pointers. + // + // When subclassing this class, remember that callback + // methods like bt_compare, bt_prefix, dup_compare may + // internally manufacture DBT objects (which later are + // cast to Dbt), so such callbacks might receive objects + // not of your subclassed type. +}; + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// multiple key/data/recno iterator classes +// + +// DbMultipleIterator is a shared private base class for the three types +// of bulk-return Iterator; it should never be instantiated directly, +// but it handles the functionality shared by its subclasses. +class _exported DbMultipleIterator +{ +public: + DbMultipleIterator(const Dbt &dbt); +protected: + u_int8_t *data_; + u_int32_t *p_; +}; + +class _exported DbMultipleKeyDataIterator : private DbMultipleIterator +{ +public: + DbMultipleKeyDataIterator(const Dbt &dbt) : DbMultipleIterator(dbt) {} + bool next(Dbt &key, Dbt &data); +}; + +class _exported DbMultipleRecnoDataIterator : private DbMultipleIterator +{ +public: + DbMultipleRecnoDataIterator(const Dbt &dbt) : DbMultipleIterator(dbt) {} + bool next(db_recno_t &recno, Dbt &data); +}; + +class _exported DbMultipleDataIterator : private DbMultipleIterator +{ +public: + DbMultipleDataIterator(const Dbt &dbt) : DbMultipleIterator(dbt) {} + bool next(Dbt &data); +}; + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// multiple key/data/recno builder classes +// + +// DbMultipleBuilder is a shared private base class for the three types +// of bulk buffer builders; it should never be instantiated directly, +// but it handles the functionality shared by its subclasses. +class _exported DbMultipleBuilder +{ +public: + DbMultipleBuilder(Dbt &dbt); +protected: + Dbt &dbt_; + void *p_; +}; + +class _exported DbMultipleDataBuilder : DbMultipleBuilder +{ +public: + DbMultipleDataBuilder(Dbt &dbt) : DbMultipleBuilder(dbt) {} + bool append(void *dbuf, size_t dlen); + bool reserve(void *&ddest, size_t dlen); +}; + +class _exported DbMultipleKeyDataBuilder : DbMultipleBuilder +{ +public: + DbMultipleKeyDataBuilder(Dbt &dbt) : DbMultipleBuilder(dbt) {} + bool append(void *kbuf, size_t klen, void *dbuf, size_t dlen); + bool reserve(void *&kdest, size_t klen, void *&ddest, size_t dlen); +}; + +class _exported DbMultipleRecnoDataBuilder +{ +public: + DbMultipleRecnoDataBuilder(Dbt &dbt); + bool append(db_recno_t recno, void *dbuf, size_t dlen); + bool reserve(db_recno_t recno, void *&ddest, size_t dlen); +protected: + Dbt &dbt_; + void *p_; +}; + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Exception classes +// + +// Almost any error in the DB library throws a DbException. +// Every exception should be considered an abnormality +// (e.g. bug, misuse of DB, file system error). +// +class _exported DbException : public __DB_STD(exception) +{ +public: + virtual ~DbException() throw(); + DbException(int err); + DbException(const char *description); + DbException(const char *description, int err); + DbException(const char *prefix, const char *description, int err); + int get_errno() const; + virtual const char *what() const throw(); + DbEnv *get_env() const; + void set_env(DbEnv *dbenv); + + DbException(const DbException &); + DbException &operator = (const DbException &); + +private: + void describe(const char *prefix, const char *description); + + char *what_; + int err_; // errno + DbEnv *dbenv_; +}; + +// +// A specific sort of exception that occurs when +// an operation is aborted to resolve a deadlock. +// +class _exported DbDeadlockException : public DbException +{ +public: + virtual ~DbDeadlockException() throw(); + DbDeadlockException(const char *description); + + DbDeadlockException(const DbDeadlockException &); + DbDeadlockException &operator = (const DbDeadlockException &); +}; + +// +// A specific sort of exception that occurs when +// a lock is not granted, e.g. by lock_get or lock_vec. +// Note that the Dbt is only live as long as the Dbt used +// in the offending call. +// +class _exported DbLockNotGrantedException : public DbException +{ +public: + virtual ~DbLockNotGrantedException() throw(); + DbLockNotGrantedException(const char *prefix, db_lockop_t op, + db_lockmode_t mode, const Dbt *obj, const DbLock lock, int index); + DbLockNotGrantedException(const char *description); + + DbLockNotGrantedException(const DbLockNotGrantedException &); + DbLockNotGrantedException &operator = + (const DbLockNotGrantedException &); + + db_lockop_t get_op() const; + db_lockmode_t get_mode() const; + const Dbt* get_obj() const; + DbLock *get_lock() const; + int get_index() const; + +private: + db_lockop_t op_; + db_lockmode_t mode_; + const Dbt *obj_; + DbLock *lock_; + int index_; +}; + +// +// A specific sort of exception that occurs when +// user declared memory is insufficient in a Dbt. +// +class _exported DbMemoryException : public DbException +{ +public: + virtual ~DbMemoryException() throw(); + DbMemoryException(Dbt *dbt); + DbMemoryException(const char *prefix, Dbt *dbt); + + DbMemoryException(const DbMemoryException &); + DbMemoryException &operator = (const DbMemoryException &); + + Dbt *get_dbt() const; +private: + Dbt *dbt_; +}; + +// +// A specific sort of exception that occurs when a change of replication +// master requires that all handles be re-opened. +// +class _exported DbRepHandleDeadException : public DbException +{ +public: + virtual ~DbRepHandleDeadException() throw(); + DbRepHandleDeadException(const char *description); + + DbRepHandleDeadException(const DbRepHandleDeadException &); + DbRepHandleDeadException &operator = (const DbRepHandleDeadException &); +}; + +// +// A specific sort of exception that occurs when +// recovery is required before continuing DB activity. +// +class _exported DbRunRecoveryException : public DbException +{ +public: + virtual ~DbRunRecoveryException() throw(); + DbRunRecoveryException(const char *description); + + DbRunRecoveryException(const DbRunRecoveryException &); + DbRunRecoveryException &operator = (const DbRunRecoveryException &); +}; + +// +// A specific sort of exception that occurs when + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Restore default compiler warnings +// +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif /* !_DB_CXX_H_ */ diff --git a/src/dbinc/db_dispatch.h b/src/dbinc/db_dispatch.h new file mode 100644 index 00000000..b0cc30e9 --- /dev/null +++ b/src/dbinc/db_dispatch.h @@ -0,0 +1,97 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#ifndef _DB_DISPATCH_H_ +#define _DB_DISPATCH_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Declarations and typedefs for the list of transaction IDs used during + * recovery. This is a generic list used to pass along whatever information + * we need during recovery. + */ +typedef enum { + TXNLIST_DELETE, + TXNLIST_LSN, + TXNLIST_TXNID +} db_txnlist_type; + +#define DB_TXNLIST_MASK(hp, n) (n % hp->nslots) +struct __db_txnhead { + void *td; /* If abort, the detail for the txn. */ + DB_THREAD_INFO *thread_info; /* Thread information. */ + u_int32_t maxid; /* Maximum transaction id. */ + DB_LSN maxlsn; /* Maximum commit lsn. */ + DB_LSN ckplsn; /* LSN of last retained checkpoint. */ + DB_LSN trunc_lsn; /* Lsn to which we are going to truncate; + * make sure we abort anyone after this. */ + u_int32_t generation; /* Current generation number. */ + u_int32_t gen_alloc; /* Number of generations allocated. */ + struct { + u_int32_t generation; + u_int32_t txn_min; + u_int32_t txn_max; + } *gen_array; /* Array of txnids associated with a gen. */ + u_int nslots; + LIST_HEAD(__db_headlink, __db_txnlist) head[1]; +}; + +#define DB_LSN_STACK_SIZE 4 +struct __db_txnlist { + db_txnlist_type type; + LIST_ENTRY(__db_txnlist) links; + union { + struct { + u_int32_t txnid; + u_int32_t generation; + u_int32_t status; + } t; + struct { + u_int32_t stack_size; + u_int32_t stack_indx; + DB_LSN *lsn_stack; + } l; + } u; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* !_DB_DISPATCH_H_ */ diff --git a/src/dbinc/db_int.in b/src/dbinc/db_int.in new file mode 100644 index 00000000..243be98e --- /dev/null +++ b/src/dbinc/db_int.in @@ -0,0 +1,1138 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_INT_H_ +#define _DB_INT_H_ + +/******************************************************* + * Berkeley DB ANSI/POSIX include files. + *******************************************************/ +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#ifdef DIAG_MVCC +#include +#endif +#include + +#if defined(HAVE_REPLICATION_THREADS) +#ifdef HAVE_SYS_SELECT_H +#include +#endif +#ifdef HAVE_VXWORKS +#include +#endif +#endif + +#if TIME_WITH_SYS_TIME +#include +#include +#else +#if HAVE_SYS_TIME_H +#include +#else +#include +#endif +#endif + +#ifdef HAVE_VXWORKS +#include +#else +#include +#endif + +#if defined(HAVE_REPLICATION_THREADS) +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +#include +#include +#include +#endif + +#if defined(STDC_HEADERS) || defined(__cplusplus) +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif /* !HAVE_SYSTEM_INCLUDE_FILES */ + +#ifdef DB_WIN32 +#include "dbinc/win_db.h" +#endif + +#ifdef HAVE_DBM +#undef DB_DBM_HSEARCH +#define DB_DBM_HSEARCH 1 +#endif + +#include "db.h" +#include "clib_port.h" + +#include "dbinc/queue.h" +#include "dbinc/shqueue.h" +#include "dbinc/perfmon.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * The Windows compiler needs to be told about structures that are available + * outside a dll. + */ +#if defined(DB_WIN32) && defined(_MSC_VER) && \ + !defined(DB_CREATE_DLL) && !defined(_LIB) +#define __DB_IMPORT __declspec(dllimport) +#else +#define __DB_IMPORT +#endif + +/******************************************************* + * Forward structure declarations. + *******************************************************/ +struct __db_commit_info; typedef struct __db_commit_info DB_COMMIT_INFO; +struct __db_reginfo_t; typedef struct __db_reginfo_t REGINFO; +struct __db_txnhead; typedef struct __db_txnhead DB_TXNHEAD; +struct __db_txnlist; typedef struct __db_txnlist DB_TXNLIST; +struct __vrfy_childinfo;typedef struct __vrfy_childinfo VRFY_CHILDINFO; +struct __vrfy_dbinfo; typedef struct __vrfy_dbinfo VRFY_DBINFO; +struct __vrfy_pageinfo; typedef struct __vrfy_pageinfo VRFY_PAGEINFO; + +struct __db_log_verify_info; +struct __txn_verify_info; +struct __lv_filereg_info; +struct __lv_ckp_info; +struct __lv_timestamp_info; +typedef struct __db_log_verify_info DB_LOG_VRFY_INFO; +typedef struct __txn_verify_info VRFY_TXN_INFO; +typedef struct __lv_filereg_info VRFY_FILEREG_INFO; +typedef struct __lv_filelife VRFY_FILELIFE; +typedef struct __lv_ckp_info VRFY_CKP_INFO; +typedef struct __lv_timestamp_info VRFY_TIMESTAMP_INFO; + +/* + * TXNINFO_HANDLER -- + * Callback function pointer type for __iterate_txninfo. + */ +typedef int (*TXNINFO_HANDLER) __P((DB_LOG_VRFY_INFO *, VRFY_TXN_INFO *, void *)); + +typedef SH_TAILQ_HEAD(__hash_head) DB_HASHTAB; + +/******************************************************* + * General purpose constants and macros. + *******************************************************/ +#undef FALSE +#define FALSE 0 +#undef TRUE +#define TRUE (!FALSE) + +#define MEGABYTE 1048576 +#define GIGABYTE 1073741824 + +#define NS_PER_MS 1000000 /* Nanoseconds in a millisecond */ +#define NS_PER_US 1000 /* Nanoseconds in a microsecond */ +#define NS_PER_SEC 1000000000 /* Nanoseconds in a second */ +#define US_PER_MS 1000 /* Microseconds in a millisecond */ +#define US_PER_SEC 1000000 /* Microseconds in a second */ +#define MS_PER_SEC 1000 /* Milliseconds in a second */ + +#define RECNO_OOB 0 /* Illegal record number. */ + +/* + * Define a macro which has no runtime effect, yet avoids triggering empty + * statement compiler warnings. Use it as the text of conditionally-null macros. + */ +#define NOP_STATEMENT do { } while (0) + +/* Test for a power-of-two (tests true for zero, which doesn't matter here). */ +#define POWER_OF_TWO(x) (((x) & ((x) - 1)) == 0) + +/* Test for valid page sizes. */ +#define DB_MIN_PGSIZE 0x000200 /* Minimum page size (512). */ +#define DB_MAX_PGSIZE 0x010000 /* Maximum page size (65536). */ +#define IS_VALID_PAGESIZE(x) \ + (POWER_OF_TWO(x) && (x) >= DB_MIN_PGSIZE && ((x) <= DB_MAX_PGSIZE)) + +/* Minimum number of pages cached, by default. */ +#define DB_MINPAGECACHE 16 + +/* + * If we are unable to determine the underlying filesystem block size, use + * 8K on the grounds that most OS's use less than 8K for a VM page size. + */ +#define DB_DEF_IOSIZE (8 * 1024) + +/* Align an integer to a specific boundary. */ +#undef DB_ALIGN +#define DB_ALIGN(v, bound) \ + (((v) + (bound) - 1) & ~(((uintmax_t)(bound)) - 1)) + +/* Increment a pointer to a specific boundary. */ +#undef ALIGNP_INC +#define ALIGNP_INC(p, bound) \ + (void *)(((uintptr_t)(p) + (bound) - 1) & ~(((uintptr_t)(bound)) - 1)) + +/* + * DB_ALIGN8 adjusts structure alignments to make sure shared structures have + * fixed size and filed offset on both 32bit and 64bit platforms when + * HAVE_MIXED_SIZE_ADDRESSING is defined. + */ +#ifdef HAVE_MIXED_SIZE_ADDRESSING +#define DB_ALIGN8 @DB_STRUCT_ALIGN8@ +#else +#define DB_ALIGN8 +#endif + +/* + * Berkeley DB uses the va_copy macro from C99, not all compilers include + * it, so add a dumb implementation compatible with pre C99 implementations. + */ +#ifndef va_copy +#define va_copy(d, s) ((d) = (s)) +#endif + +/* + * Print an address as a u_long (a u_long is the largest type we can print + * portably). Most 64-bit systems have made longs 64-bits, so this should + * work. + */ +#define P_TO_ULONG(p) ((u_long)(uintptr_t)(p)) + +/* + * Convert a pointer to an integral value. + * + * The (u_int16_t)(uintptr_t) cast avoids warnings: the (uintptr_t) cast + * converts the value to an integral type, and the (u_int16_t) cast converts + * it to a small integral type so we don't get complaints when we assign the + * final result to an integral type smaller than uintptr_t. + */ +#define P_TO_UINT32(p) ((u_int32_t)(uintptr_t)(p)) +#define P_TO_UINT16(p) ((u_int16_t)(uintptr_t)(p)) +#define P_TO_ROFF(p) ((roff_t)(uintptr_t)(p)) + +/* The converse of P_TO_ROFF() above. */ +#define ROFF_TO_P(roff) ((void *)(uintptr_t)(roff)) + +/* + * There are several on-page structures that are declared to have a number of + * fields followed by a variable length array of items. The structure size + * without including the variable length array or the address of the first of + * those elements can be found using SSZ. + * + * This macro can also be used to find the offset of a structure element in a + * structure. This is used in various places to copy structure elements from + * unaligned memory references, e.g., pointers into a packed page. + * + * There are two versions because compilers object if you take the address of + * an array. + */ +#undef SSZ +#define SSZ(name, field) P_TO_UINT16(&(((name *)0)->field)) + +#undef SSZA +#define SSZA(name, field) P_TO_UINT16(&(((name *)0)->field[0])) + +/* Structure used to print flag values. */ +typedef struct __fn { + u_int32_t mask; /* Flag value. */ + const char *name; /* Flag name. */ +} FN; + +/* Set, clear and test flags. */ +#define FLD_CLR(fld, f) (fld) &= ~(f) +#define FLD_ISSET(fld, f) ((fld) & (f)) +#define FLD_SET(fld, f) (fld) |= (f) +#define F_CLR(p, f) (p)->flags &= ~(f) +#define F_ISSET(p, f) ((p)->flags & (f)) +#define F_SET(p, f) (p)->flags |= (f) +#define LF_CLR(f) ((flags) &= ~(f)) +#define LF_ISSET(f) ((flags) & (f)) +#define LF_SET(f) ((flags) |= (f)) + +/* + * Calculate a percentage. The values can overflow 32-bit integer arithmetic + * so we use floating point. + * + * When calculating a bytes-vs-page size percentage, we're getting the inverse + * of the percentage in all cases, that is, we want 100 minus the percentage we + * calculate. + */ +#define DB_PCT(v, total) \ + ((int)((total) == 0 ? 0 : ((double)(v) * 100) / (total))) +#define DB_PCT_PG(v, total, pgsize) \ + ((int)((total) == 0 ? 0 : \ + 100 - ((double)(v) * 100) / (((double)total) * (pgsize)))) + +/* + * Statistics update shared memory and so are expensive -- don't update the + * values unless we're going to display the results. + * When performance monitoring is enabled, the changed value can be published + * (via DTrace or SystemTap) along with another associated value or two. + */ +#undef STAT +#ifdef HAVE_STATISTICS +#define STAT(x) x +#define STAT_ADJUST(env, cat, subcat, val, amount, id) \ + do { \ + (val) += (amount); \ + STAT_PERFMON2((env), cat, subcat, (val), (id)); \ + } while (0) +#define STAT_ADJUST_VERB(env, cat, subcat, val, amount, id1, id2) \ + do { \ + (val) += (amount); \ + STAT_PERFMON3((env), cat, subcat, (val), (id1), (id2)); \ + } while (0) +#define STAT_INC(env, cat, subcat, val, id) \ + STAT_ADJUST(env, cat, subcat, (val), 1, (id)) +#define STAT_INC_VERB(env, cat, subcat, val, id1, id2) \ + STAT_ADJUST_VERB((env), cat, subcat, (val), 1, (id1), (id2)) +/* + * STAT_DEC() subtracts one rather than adding (-1) with STAT_ADJUST(); the + * latter might generate a compilation warning for an unsigned value. + */ +#define STAT_DEC(env, cat, subcat, val, id) \ + do { \ + (val)--; \ + STAT_PERFMON2((env), cat, subcat, (val), (id)); \ + } while (0) +/* N.B.: Add a verbose version of STAT_DEC() when needed. */ + +#define STAT_SET(env, cat, subcat, val, newval, id) \ + do { \ + (val) = (newval); \ + STAT_PERFMON2((env), cat, subcat, (val), (id)); \ + } while (0) +#define STAT_SET_VERB(env, cat, subcat, val, newval, id1, id2) \ + do { \ + (val) = (newval); \ + STAT_PERFMON3((env), cat, subcat, (val), (id1), (id2)); \ + } while (0) +#else +#define STAT(x) NOP_STATEMENT +#define STAT_ADJUST(env, cat, subcat, val, amt, id) NOP_STATEMENT +#define STAT_ADJUST_VERB(env, cat, subcat, val, amt, id1, id2) NOP_STATEMENT +#define STAT_INC(env, cat, subcat, val, id) NOP_STATEMENT +#define STAT_INC_VERB(env, cat, subcat, val, id1, id2) NOP_STATEMENT +#define STAT_DEC(env, cat, subcat, val, id) NOP_STATEMENT +#define STAT_SET(env, cat, subcat, val, newval, id) NOP_STATEMENT +#define STAT_SET_VERB(env, cat, subcat, val, newval, id1, id2) NOP_STATEMENT +#endif + +/* + * These macros are used when an error condition is first noticed. They allow + * one to be notified (via e.g. DTrace, SystemTap, ...) when an error occurs + * deep inside DB, rather than when it is returned back through the API. + * + * The second actual argument to these is the second part of the error or + * warning event name. They work when 'errcode' is a symbolic name e.g. + * EINVAL or DB_LOCK_DEALOCK, not a variable. Noticing system call failures + * would be handled by tracing on syscall exit; when e.g., it returns < 0. + */ +#define ERR_ORIGIN(env, errcode) \ + (PERFMON0(env, error, errcode), errcode) + +#define ERR_ORIGIN_MSG(env, errcode, msg) \ + (PERFMON1(env, error, errcode, msg), errcode) + +#define WARNING_ORIGIN(env, errcode) \ + (PERFMON0(env, warning, errcode), errcode) + +/* + * Structure used for callback message aggregation. + * + * Display values in XXX_stat_print calls. + */ +typedef struct __db_msgbuf { + char *buf; /* Heap allocated buffer. */ + char *cur; /* Current end of message. */ + size_t len; /* Allocated length of buffer. */ +} DB_MSGBUF; +#define DB_MSGBUF_INIT(a) do { \ + (a)->buf = (a)->cur = NULL; \ + (a)->len = 0; \ +} while (0) +#define DB_MSGBUF_FLUSH(env, a) do { \ + if ((a)->buf != NULL) { \ + if ((a)->cur != (a)->buf) \ + __db_msg(env, "%s", (a)->buf); \ + __os_free(env, (a)->buf); \ + DB_MSGBUF_INIT(a); \ + } \ +} while (0) +#define DB_MSGBUF_REP_FLUSH(env, a, diag_msg, regular_msg) do { \ + if ((a)->buf != NULL) { \ + if ((a)->cur != (a)->buf && diag_msg) \ + __db_repmsg(env, "%s", (a)->buf); \ + if (regular_msg) \ + DB_MSGBUF_FLUSH(env, a); \ + else { \ + __os_free(env, (a)->buf); \ + DB_MSGBUF_INIT(a); \ + } \ + } \ +} while (0) +#define STAT_FMT(msg, fmt, type, v) do { \ + DB_MSGBUF __mb; \ + DB_MSGBUF_INIT(&__mb); \ + __db_msgadd(env, &__mb, fmt, (type)(v)); \ + __db_msgadd(env, &__mb, "\t%s", msg); \ + DB_MSGBUF_FLUSH(env, &__mb); \ +} while (0) +#define STAT_HEX(msg, v) \ + __db_msg(env, "%#lx\t%s", (u_long)(v), msg) +#define STAT_ISSET(msg, p) \ + __db_msg(env, "%sSet\t%s", (p) == NULL ? "!" : " ", msg) +#define STAT_LONG(msg, v) \ + __db_msg(env, "%ld\t%s", (long)(v), msg) +#define STAT_LSN(msg, lsnp) \ + __db_msg(env, "%lu/%lu\t%s", \ + (u_long)(lsnp)->file, (u_long)(lsnp)->offset, msg) +#define STAT_POINTER(msg, v) \ + __db_msg(env, "%#lx\t%s", P_TO_ULONG(v), msg) +#define STAT_STRING(msg, p) do { \ + const char *__p = p; /* p may be a function call. */ \ + __db_msg(env, "%s\t%s", __p == NULL ? "!Set" : __p, msg); \ +} while (0) +#define STAT_ULONG(msg, v) \ + __db_msg(env, "%lu\t%s", (u_long)(v), msg) + +/* + * The following macros are used to control how error and message strings are + * output by Berkeley DB. There are essentially three different controls + * available: + * - Default behavior is to output error strings with its unique identifier. + * - If HAVE_STRIPPED_MESSAGES is enabled, a unique identifier along with any + * parameters to the error string will be output. + * - If HAVE_LOCALIZATION is defined, and the '_()' macro is implemented, a + * gettext or ICU style translation will be done. + * + * Each new string that will be output should be wrapped in a DB_STR* macro. + * There are three versions of this macro for different scenarions: + * - DB_STR for strings that need an identifier, and don't have any argument. + * - DB_STR_A for strings that need an identifier, and have argument(s). + * - DB_STR_P for strings that don't need an identifier, and don't have + * arguments. + * + * Error message IDs are automatically assigned by dist/s_message_id script. + */ +#ifdef HAVE_LOCALIZATION +#define _(msg) msg /* Replace with localization function. */ +#else +#define _(msg) msg +#endif + +#ifdef HAVE_STRIPPED_MESSAGES +#define DB_STR_C(msg, fmt) fmt +#else +#define DB_STR_C(msg, fmt) _(msg) +#endif + +#define DB_MSGID(id) "BDB" id + +#define DB_STR(id, msg) DB_MSGID(id) " " DB_STR_C(msg, "") + +#define DB_STR_A(id, msg, fmt) DB_MSGID(id) " " DB_STR_C(msg, fmt) + +#define DB_STR_P(msg) _(msg) + +/* + * There are quite a few places in Berkeley DB where we want to initialize + * a DBT from a string or other random pointer type, using a length typed + * to size_t in most cases. This macro avoids a lot of casting. The macro + * comes in two flavors because we often want to clear the DBT first. + */ +#define DB_SET_DBT(dbt, d, s) do { \ + (dbt).data = (void *)(d); \ + (dbt).size = (u_int32_t)(s); \ +} while (0) +#define DB_INIT_DBT(dbt, d, s) do { \ + memset(&(dbt), 0, sizeof(dbt)); \ + DB_SET_DBT(dbt, d, s); \ +} while (0) + +/******************************************************* + * API return values + *******************************************************/ +/* + * Return values that are OK for each different call. Most calls have a + * standard 'return of 0 is only OK value', but some, like db->get have + * DB_NOTFOUND as a return value, but it really isn't an error. + */ +#define DB_RETOK_STD(ret) ((ret) == 0) +#define DB_RETOK_DBCDEL(ret) ((ret) == 0 || (ret) == DB_KEYEMPTY || \ + (ret) == DB_NOTFOUND) +#define DB_RETOK_DBCGET(ret) ((ret) == 0 || (ret) == DB_KEYEMPTY || \ + (ret) == DB_NOTFOUND) +#define DB_RETOK_DBCPUT(ret) ((ret) == 0 || (ret) == DB_KEYEXIST || \ + (ret) == DB_NOTFOUND) +#define DB_RETOK_DBDEL(ret) DB_RETOK_DBCDEL(ret) +#define DB_RETOK_DBGET(ret) DB_RETOK_DBCGET(ret) +#define DB_RETOK_DBPUT(ret) ((ret) == 0 || (ret) == DB_KEYEXIST) +#define DB_RETOK_EXISTS(ret) DB_RETOK_DBCGET(ret) +#define DB_RETOK_LGGET(ret) ((ret) == 0 || (ret) == DB_NOTFOUND) +#define DB_RETOK_MPGET(ret) ((ret) == 0 || (ret) == DB_PAGE_NOTFOUND) +#define DB_RETOK_REPPMSG(ret) ((ret) == 0 || \ + (ret) == DB_REP_IGNORE || \ + (ret) == DB_REP_ISPERM || \ + (ret) == DB_REP_NEWMASTER || \ + (ret) == DB_REP_NEWSITE || \ + (ret) == DB_REP_NOTPERM || \ + (ret) == DB_REP_WOULDROLLBACK) +#define DB_RETOK_REPMGR_LOCALSITE(ret) ((ret) == 0 || (ret) == DB_NOTFOUND) +#define DB_RETOK_REPMGR_START(ret) ((ret) == 0 || (ret) == DB_REP_IGNORE) +#define DB_RETOK_TXNAPPLIED(ret) ((ret) == 0 || \ + (ret) == DB_NOTFOUND || \ + (ret) == DB_TIMEOUT || \ + (ret) == DB_KEYEMPTY) + +/* Find a reasonable operation-not-supported error. */ +#ifdef EOPNOTSUPP +#define DB_OPNOTSUP EOPNOTSUPP +#else +#ifdef ENOTSUP +#define DB_OPNOTSUP ENOTSUP +#else +#define DB_OPNOTSUP EINVAL +#endif +#endif + +/******************************************************* + * Files. + *******************************************************/ +/* + * We use 1024 as the maximum path length. It's too hard to figure out what + * the real path length is, as it was traditionally stored in , + * and that file isn't always available. + */ +#define DB_MAXPATHLEN 1024 + +#define PATH_DOT "." /* Current working directory. */ + /* Path separator character(s). */ +#define PATH_SEPARATOR "@PATH_SEPARATOR@" + +/******************************************************* + * Environment. + *******************************************************/ +/* Type passed to __db_appname(). */ +typedef enum { + DB_APP_NONE=0, /* No type (region). */ + DB_APP_DATA, /* Data file. */ + DB_APP_LOG, /* Log file. */ + DB_APP_TMP, /* Temporary file. */ + DB_APP_RECOVER /* We are in recovery. */ +} APPNAME; + +/* + * A set of macros to check if various functionality has been configured. + * + * ALIVE_ON The is_alive function is configured. + * CDB_LOCKING CDB product locking. + * CRYPTO_ON Security has been configured. + * LOCKING_ON Locking has been configured. + * LOGGING_ON Logging has been configured. + * MUTEX_ON Mutexes have been configured. + * MPOOL_ON Memory pool has been configured. + * REP_ON Replication has been configured. + * TXN_ON Transactions have been configured. + * + * REP_ON is more complex than most: if the BDB library was compiled without + * replication support, ENV->rep_handle will be NULL; if the BDB library has + * replication support, but it was not configured, the region reference will + * be NULL. + */ +#define ALIVE_ON(env) ((env)->dbenv->is_alive != NULL) +#define CDB_LOCKING(env) F_ISSET(env, ENV_CDB) +#define CRYPTO_ON(env) ((env)->crypto_handle != NULL) +#define LOCKING_ON(env) ((env)->lk_handle != NULL) +#define LOGGING_ON(env) ((env)->lg_handle != NULL) +#define MPOOL_ON(env) ((env)->mp_handle != NULL) +#define MUTEX_ON(env) ((env)->mutex_handle != NULL) +#define REP_ON(env) \ + ((env)->rep_handle != NULL && (env)->rep_handle->region != NULL) +#define TXN_ON(env) ((env)->tx_handle != NULL) + +/* + * STD_LOCKING Standard locking, that is, locking was configured and CDB + * was not. We do not do locking in off-page duplicate trees, + * so we check for that in the cursor first. + */ +#define STD_LOCKING(dbc) \ + (!F_ISSET(dbc, DBC_OPD) && \ + !CDB_LOCKING((dbc)->env) && LOCKING_ON((dbc)->env)) + +/* + * IS_RECOVERING: The system is running recovery. + */ +#define IS_RECOVERING(env) \ + (LOGGING_ON(env) && F_ISSET((env)->lg_handle, DBLOG_RECOVER)) + +/* Initialization methods are often illegal before/after open is called. */ +#define ENV_ILLEGAL_AFTER_OPEN(env, name) \ + if (F_ISSET((env), ENV_OPEN_CALLED)) \ + return (__db_mi_open(env, name, 1)); +#define ENV_ILLEGAL_BEFORE_OPEN(env, name) \ + if (!F_ISSET((env), ENV_OPEN_CALLED)) \ + return (__db_mi_open(env, name, 0)); + +/* We're not actually user hostile, honest. */ +#define ENV_REQUIRES_CONFIG(env, handle, i, flags) \ + if (handle == NULL) \ + return (__env_not_config(env, i, flags)); +#define ENV_REQUIRES_CONFIG_XX(env, handle, i, flags) \ + if ((env)->handle->region == NULL) \ + return (__env_not_config(env, i, flags)); +#define ENV_NOT_CONFIGURED(env, handle, i, flags) \ + if (F_ISSET((env), ENV_OPEN_CALLED)) \ + ENV_REQUIRES_CONFIG(env, handle, i, flags) + +#define ENV_ENTER_RET(env, ip, ret) do { \ + ret = 0; \ + PANIC_CHECK_RET(env, ret); \ + if (ret == 0) { \ + if ((env)->thr_hashtab == NULL) \ + ip = NULL; \ + else \ + ret = __env_set_state(env, &(ip), THREAD_ACTIVE);\ + } \ +} while (0) + +#define ENV_ENTER(env, ip) do { \ + int __ret; \ + ip = NULL; \ + ENV_ENTER_RET(env, ip, __ret); \ + if (__ret != 0) \ + return (__ret); \ +} while (0) + +#define FAILCHK_THREAD(env, ip) do { \ + if ((ip) != NULL) \ + (ip)->dbth_state = THREAD_FAILCHK; \ +} while (0) + +#define ENV_GET_THREAD_INFO(env, ip) ENV_ENTER(env, ip) + +#ifdef DIAGNOSTIC +#define ENV_LEAVE(env, ip) do { \ + if ((ip) != NULL) { \ + DB_ASSERT(env, ((ip)->dbth_state == THREAD_ACTIVE || \ + (ip)->dbth_state == THREAD_FAILCHK)); \ + (ip)->dbth_state = THREAD_OUT; \ + } \ +} while (0) +#else +#define ENV_LEAVE(env, ip) do { \ + if ((ip) != NULL) \ + (ip)->dbth_state = THREAD_OUT; \ +} while (0) +#endif +#ifdef DIAGNOSTIC +#define CHECK_THREAD(env) do { \ + if ((env)->thr_hashtab != NULL) \ + (void)__env_set_state(env, NULL, THREAD_VERIFY); \ +} while (0) +#ifdef HAVE_STATISTICS +#define CHECK_MTX_THREAD(env, mtx) do { \ + if (mtx->alloc_id != MTX_MUTEX_REGION && \ + mtx->alloc_id != MTX_ENV_REGION && \ + mtx->alloc_id != MTX_APPLICATION) \ + CHECK_THREAD(env); \ +} while (0) +#else +#define CHECK_MTX_THREAD(env, mtx) NOP_STATEMENT +#endif +#else +#define CHECK_THREAD(env) NOP_STATEMENT +#define CHECK_MTX_THREAD(env, mtx) NOP_STATEMENT +#endif + +typedef enum { + THREAD_SLOT_NOT_IN_USE=0, + THREAD_OUT, + THREAD_ACTIVE, + THREAD_BLOCKED, + THREAD_BLOCKED_DEAD, + THREAD_FAILCHK, + THREAD_VERIFY +} DB_THREAD_STATE; + +typedef struct __pin_list { + roff_t b_ref; /* offset to buffer. */ + int region; /* region containing buffer. */ +} PIN_LIST; +#define PINMAX 4 + +struct __db_thread_info { /* SHARED */ + pid_t dbth_pid; + db_threadid_t dbth_tid; + DB_THREAD_STATE dbth_state; + SH_TAILQ_ENTRY dbth_links; + /* + * The next field contains the (process local) reference to the XA + * transaction currently associated with this thread of control. + */ + SH_TAILQ_HEAD(__dbth_xatxn) dbth_xatxn; + u_int32_t dbth_xa_status; + /* + * The following fields track which buffers this thread of + * control has pinned in the mpool buffer cache. + */ + u_int16_t dbth_pincount; /* Number of pins for this thread. */ + u_int16_t dbth_pinmax; /* Number of slots allocated. */ + roff_t dbth_pinlist; /* List of pins. */ + PIN_LIST dbth_pinarray[PINMAX]; /* Initial array of slots. */ +#ifdef DIAGNOSTIC + roff_t dbth_locker; /* Current locker for this thread. */ + u_int32_t dbth_check_off; /* Count of number of LOCK_OFF calls. */ +#endif +}; +#ifdef DIAGNOSTIC +#define LOCK_CHECK_OFF(ip) if ((ip) != NULL) \ + (ip)->dbth_check_off++ + +#define LOCK_CHECK_ON(ip) if ((ip) != NULL) \ + (ip)->dbth_check_off-- + +#define LOCK_CHECK(dbc, pgno, mode, type) \ + DB_ASSERT((dbc)->dbp->env, (dbc)->locker == NULL || \ + __db_haslock((dbc)->dbp->env, \ + (dbc)->locker, (dbc)->dbp->mpf, pgno, mode, type) == 0) +#else +#define LOCK_CHECK_OFF(ip) NOP_STATEMENT +#define LOCK_CHECK_ON(ip) NOP_STATEMENT +#define LOCK_CHECK(dbc, pgno, mode) NOP_STATEMENT +#endif + +typedef struct __env_thread_info { + u_int32_t thr_count; + u_int32_t thr_init; + u_int32_t thr_max; + u_int32_t thr_nbucket; + roff_t thr_hashoff; +} THREAD_INFO; + +#define DB_EVENT(env, e, einfo) do { \ + DB_ENV *__dbenv = (env)->dbenv; \ + if (__dbenv->db_event_func != NULL) \ + __dbenv->db_event_func(__dbenv, e, einfo); \ +} while (0) + +typedef struct __flag_map { + u_int32_t inflag, outflag; +} FLAG_MAP; + +/* + * Internal database environment structure. + * + * This is the private database environment handle. The public environment + * handle is the DB_ENV structure. The library owns this structure, the user + * owns the DB_ENV structure. The reason there are two structures is because + * the user's configuration outlives any particular DB_ENV->open call, and + * separate structures allows us to easily discard internal information without + * discarding the user's configuration. + */ +struct __env { + DB_ENV *dbenv; /* Linked DB_ENV structure */ + + /* + * The ENV structure can be used concurrently, so field access is + * protected. + */ + db_mutex_t mtx_env; /* ENV structure mutex */ + + /* + * Some fields are included in the ENV structure rather than in the + * DB_ENV structure because they are only set as arguments to the + * DB_ENV->open method. In other words, because of the historic API, + * not for any rational reason. + * + * Arguments to DB_ENV->open. + */ + char *db_home; /* Database home */ + u_int32_t open_flags; /* Flags */ + int db_mode; /* Default open permissions */ + + pid_t pid_cache; /* Cached process ID */ + + DB_FH *lockfhp; /* fcntl(2) locking file handle */ + + DB_LOCKER *env_lref; /* Locker in non-threaded handles */ + + DB_DISTAB recover_dtab; /* Dispatch table for recover funcs */ + + int dir_mode; /* Intermediate directory perms. */ + +#define ENV_DEF_DATA_LEN 100 + u_int32_t data_len; /* Data length in __db_prbytes. */ + + /* Thread tracking */ + u_int32_t thr_nbucket; /* Number of hash buckets */ + DB_HASHTAB *thr_hashtab; /* Hash table of DB_THREAD_INFO */ + + /* + * List of open DB handles for this ENV, used for cursor + * adjustment. Must be protected for multi-threaded support. + */ + db_mutex_t mtx_dblist; + int db_ref; /* DB handle reference count */ + TAILQ_HEAD(__dblist, __db) dblist; + + /* + * List of open file handles for this ENV. Must be protected + * for multi-threaded support. + */ + TAILQ_HEAD(__fdlist, __fh_t) fdlist; + + db_mutex_t mtx_mt; /* Mersenne Twister mutex */ + int mti; /* Mersenne Twister index */ + u_long *mt; /* Mersenne Twister state vector */ + + DB_CIPHER *crypto_handle; /* Crypto handle */ + DB_LOCKTAB *lk_handle; /* Lock handle */ + DB_LOG *lg_handle; /* Log handle */ + DB_MPOOL *mp_handle; /* Mpool handle */ + DB_MUTEXMGR *mutex_handle; /* Mutex handle */ + DB_REP *rep_handle; /* Replication handle */ + DB_TXNMGR *tx_handle; /* Txn handle */ + + /* + * XA support. + */ + int xa_rmid; /* XA Resource Manager ID */ + int xa_ref; /* XA Reference count */ + TAILQ_ENTRY(__env) links; /* XA environments */ + + /* Application callback to copy data to/from a custom data source */ +#define DB_USERCOPY_GETDATA 0x0001 +#define DB_USERCOPY_SETDATA 0x0002 + int (*dbt_usercopy) + __P((DBT *, u_int32_t, void *, u_int32_t, u_int32_t)); + + int (*log_verify_wrap) __P((ENV *, const char *, u_int32_t, + const char *, const char *, time_t, time_t, u_int32_t, u_int32_t, + u_int32_t, u_int32_t, int, int)); + + REGINFO *reginfo; /* REGINFO structure reference */ + +#define DB_TEST_ELECTINIT 1 /* after __rep_elect_init */ +#define DB_TEST_ELECTVOTE1 2 /* after sending VOTE1 */ +#define DB_TEST_NO_PAGES 3 /* before sending PAGE */ +#define DB_TEST_POSTDESTROY 4 /* after destroy op */ +#define DB_TEST_POSTLOG 5 /* after logging all pages */ +#define DB_TEST_POSTLOGMETA 6 /* after logging meta in btree */ +#define DB_TEST_POSTOPEN 7 /* after __os_open */ +#define DB_TEST_POSTSYNC 8 /* after syncing the log */ +#define DB_TEST_PREDESTROY 9 /* before destroy op */ +#define DB_TEST_PREOPEN 10 /* before __os_open */ +#define DB_TEST_REPMGR_PERM 11 /* repmgr perm/archiving tests */ +#define DB_TEST_SUBDB_LOCKS 12 /* subdb locking tests */ + int test_abort; /* Abort value for testing */ + int test_check; /* Checkpoint value for testing */ + int test_copy; /* Copy value for testing */ + +#define ENV_CDB 0x00000001 /* DB_INIT_CDB */ +#define ENV_DBLOCAL 0x00000002 /* Environment for a private DB */ +#define ENV_LITTLEENDIAN 0x00000004 /* Little endian system. */ +#define ENV_LOCKDOWN 0x00000008 /* DB_LOCKDOWN set */ +#define ENV_NO_OUTPUT_SET 0x00000010 /* No output channel set */ +#define ENV_OPEN_CALLED 0x00000020 /* DB_ENV->open called */ +#define ENV_PRIVATE 0x00000040 /* DB_PRIVATE set */ +#define ENV_RECOVER_FATAL 0x00000080 /* Doing fatal recovery in env */ +#define ENV_REF_COUNTED 0x00000100 /* Region references this handle */ +#define ENV_SYSTEM_MEM 0x00000200 /* DB_SYSTEM_MEM set */ +#define ENV_THREAD 0x00000400 /* DB_THREAD set */ +#define ENV_FORCE_TXN_BULK 0x00000800 /* Txns use bulk mode-for testing */ + u_int32_t flags; +}; + +/******************************************************* + * Database Access Methods. + *******************************************************/ +/* + * DB_IS_THREADED -- + * The database handle is free-threaded (was opened with DB_THREAD). + */ +#define DB_IS_THREADED(dbp) \ + ((dbp)->mutex != MUTEX_INVALID) + +/* Initialization methods are often illegal before/after open is called. */ +#define DB_ILLEGAL_AFTER_OPEN(dbp, name) \ + if (F_ISSET((dbp), DB_AM_OPEN_CALLED)) \ + return (__db_mi_open((dbp)->env, name, 1)); +#define DB_ILLEGAL_BEFORE_OPEN(dbp, name) \ + if (!F_ISSET((dbp), DB_AM_OPEN_CALLED)) \ + return (__db_mi_open((dbp)->env, name, 0)); +/* Some initialization methods are illegal if environment isn't local. */ +#define DB_ILLEGAL_IN_ENV(dbp, name) \ + if (!F_ISSET((dbp)->env, ENV_DBLOCAL)) \ + return (__db_mi_env((dbp)->env, name)); +#define DB_ILLEGAL_METHOD(dbp, flags) { \ + int __ret; \ + if ((__ret = __dbh_am_chk(dbp, flags)) != 0) \ + return (__ret); \ +} + +/* + * Common DBC->internal fields. Each access method adds additional fields + * to this list, but the initial fields are common. + */ +#define __DBC_INTERNAL \ + DBC *opd; /* Off-page duplicate cursor. */\ + DBC *pdbc; /* Pointer to parent cursor. */ \ + \ + void *page; /* Referenced page. */ \ + u_int32_t part; /* Partition number. */ \ + db_pgno_t root; /* Tree root. */ \ + db_pgno_t pgno; /* Referenced page number. */ \ + db_indx_t indx; /* Referenced key item index. */\ + \ + /* Streaming -- cache last position. */ \ + db_pgno_t stream_start_pgno; /* Last start pgno. */ \ + u_int32_t stream_off; /* Current offset. */ \ + db_pgno_t stream_curr_pgno; /* Current overflow page. */ \ + \ + DB_LOCK lock; /* Cursor lock. */ \ + db_lockmode_t lock_mode; /* Lock mode. */ + +struct __dbc_internal { + __DBC_INTERNAL +}; + +/* Actions that __db_master_update can take. */ +typedef enum { MU_REMOVE, MU_RENAME, MU_OPEN, MU_MOVE } mu_action; + +/* + * Access-method-common macro for determining whether a cursor + * has been initialized. + */ +#ifdef HAVE_PARTITION +#define IS_INITIALIZED(dbc) (DB_IS_PARTITIONED((dbc)->dbp) ? \ + ((PART_CURSOR *)(dbc)->internal)->sub_cursor != NULL && \ + ((PART_CURSOR *)(dbc)->internal)->sub_cursor-> \ + internal->pgno != PGNO_INVALID : \ + (dbc)->internal->pgno != PGNO_INVALID) +#else +#define IS_INITIALIZED(dbc) ((dbc)->internal->pgno != PGNO_INVALID) +#endif + +/* Free the callback-allocated buffer, if necessary, hanging off of a DBT. */ +#define FREE_IF_NEEDED(env, dbt) \ + if (F_ISSET((dbt), DB_DBT_APPMALLOC)) { \ + __os_ufree((env), (dbt)->data); \ + F_CLR((dbt), DB_DBT_APPMALLOC); \ + } + +/* + * Use memory belonging to object "owner" to return the results of + * any no-DBT-flag get ops on cursor "dbc". + */ +#define SET_RET_MEM(dbc, owner) \ + do { \ + (dbc)->rskey = &(owner)->my_rskey; \ + (dbc)->rkey = &(owner)->my_rkey; \ + (dbc)->rdata = &(owner)->my_rdata; \ + } while (0) + +/* Use the return-data memory src is currently set to use in dest as well. */ +#define COPY_RET_MEM(src, dest) \ + do { \ + (dest)->rskey = (src)->rskey; \ + (dest)->rkey = (src)->rkey; \ + (dest)->rdata = (src)->rdata; \ + } while (0) + +/* Reset the returned-memory pointers to their defaults. */ +#define RESET_RET_MEM(dbc) \ + do { \ + (dbc)->rskey = &(dbc)->my_rskey; \ + (dbc)->rkey = &(dbc)->my_rkey; \ + (dbc)->rdata = &(dbc)->my_rdata; \ + } while (0) + +#define COMPACT_TRUNCATE(c_data) do { \ + if (c_data->compact_truncate > 1) \ + c_data->compact_truncate--; \ +} while (0) + +/******************************************************* + * Mpool. + *******************************************************/ +/* + * File types for DB access methods. Negative numbers are reserved to DB. + */ +#define DB_FTYPE_SET -1 /* Call pgin/pgout functions. */ +#define DB_FTYPE_NOTSET 0 /* Don't call... */ +#define DB_LSN_OFF_NOTSET -1 /* Not yet set. */ +#define DB_CLEARLEN_NOTSET UINT32_MAX /* Not yet set. */ + +/* Structure used as the DB pgin/pgout pgcookie. */ +typedef struct __dbpginfo { + u_int32_t db_pagesize; /* Underlying page size. */ + u_int32_t flags; /* Some DB_AM flags needed. */ + DBTYPE type; /* DB type */ +} DB_PGINFO; + +/******************************************************* + * Log. + *******************************************************/ +/* Initialize an LSN to 'zero'. */ +#define ZERO_LSN(LSN) do { \ + (LSN).file = 0; \ + (LSN).offset = 0; \ +} while (0) +#define IS_ZERO_LSN(LSN) ((LSN).file == 0 && (LSN).offset == 0) + +#define IS_INIT_LSN(LSN) ((LSN).file == 1 && (LSN).offset == 0) +#define INIT_LSN(LSN) do { \ + (LSN).file = 1; \ + (LSN).offset = 0; \ +} while (0) + +#define MAX_LSN(LSN) do { \ + (LSN).file = UINT32_MAX; \ + (LSN).offset = UINT32_MAX; \ +} while (0) +#define IS_MAX_LSN(LSN) \ + ((LSN).file == UINT32_MAX && (LSN).offset == UINT32_MAX) + +/* If logging is turned off, smash the lsn. */ +#define LSN_NOT_LOGGED(LSN) do { \ + (LSN).file = 0; \ + (LSN).offset = 1; \ +} while (0) +#define IS_NOT_LOGGED_LSN(LSN) \ + ((LSN).file == 0 && (LSN).offset == 1) + +/* + * LOG_COMPARE -- compare two LSNs. + */ + +#define LOG_COMPARE(lsn0, lsn1) \ + ((lsn0)->file != (lsn1)->file ? \ + ((lsn0)->file < (lsn1)->file ? -1 : 1) : \ + ((lsn0)->offset != (lsn1)->offset ? \ + ((lsn0)->offset < (lsn1)->offset ? -1 : 1) : 0)) + +/******************************************************* + * Txn. + *******************************************************/ +#define DB_NONBLOCK(C) ((C)->txn != NULL && F_ISSET((C)->txn, TXN_NOWAIT)) +#define NOWAIT_FLAG(txn) \ + ((txn) != NULL && F_ISSET((txn), TXN_NOWAIT) ? DB_LOCK_NOWAIT : 0) +#define IS_REAL_TXN(txn) \ + ((txn) != NULL && !F_ISSET(txn, TXN_FAMILY)) +#define IS_SUBTRANSACTION(txn) \ + ((txn) != NULL && (txn)->parent != NULL) + +/* Checks for existence of an XA transaction in access method interfaces. */ +#define XA_CHECK_TXN(ip, txn) \ + if ((ip) != NULL && (txn) == NULL) { \ + (txn) = SH_TAILQ_FIRST(&(ip)->dbth_xatxn, __db_txn); \ + DB_ASSERT(env, txn == NULL || \ + txn->xa_thr_status == TXN_XA_THREAD_ASSOCIATED); \ + } + +/* Ensure that there is no XA transaction active. */ +#define XA_NO_TXN(ip, retval) { \ + DB_TXN *__txn; \ + retval = 0; \ + if ((ip) != NULL) { \ + __txn = SH_TAILQ_FIRST(&(ip)->dbth_xatxn, __db_txn); \ + if (__txn != NULL && \ + __txn->xa_thr_status == TXN_XA_THREAD_ASSOCIATED) \ + retval = EINVAL; \ + } \ +} + +/******************************************************* + * Crypto. + *******************************************************/ +#define DB_IV_BYTES 16 /* Bytes per IV */ +#define DB_MAC_KEY 20 /* Bytes per MAC checksum */ + +/******************************************************* + * Compression + *******************************************************/ +#define CMP_INT_SPARE_VAL 0xFC /* Smallest byte value that the integer + compression algorithm doesn't use */ + +#if defined(__cplusplus) +} +#endif + +/******************************************************* + * Remaining general DB includes. + *******************************************************/ +@db_int_def@ + +#include "dbinc/globals.h" +#include "dbinc/clock.h" +#include "dbinc/debug.h" +#include "dbinc/region.h" +#include "dbinc_auto/env_ext.h" +#include "dbinc/mutex.h" +#ifdef HAVE_REPLICATION_THREADS +#include "dbinc/repmgr.h" +#endif +#include "dbinc/rep.h" +#include "dbinc/os.h" +#include "dbinc_auto/clib_ext.h" +#include "dbinc_auto/common_ext.h" + +/******************************************************* + * Remaining Log. + * These need to be defined after the general includes + * because they need rep.h from above. + *******************************************************/ +/* + * Test if the environment is currently logging changes. If we're in recovery + * or we're a replication client, we don't need to log changes because they're + * already in the log, even though we have a fully functional log system. + */ +#define DBENV_LOGGING(env) \ + (LOGGING_ON(env) && !IS_REP_CLIENT(env) && (!IS_RECOVERING(env))) + +/* + * Test if we need to log a change. By default, we don't log operations without + * associated transactions, unless DIAGNOSTIC, DEBUG_ROP or DEBUG_WOP are on. + * This is because we want to get log records for read/write operations, and, if + * we are trying to debug something, more information is always better. + * + * The DBC_RECOVER flag is set when we're in abort, as well as during recovery; + * thus DBC_LOGGING may be false for a particular dbc even when DBENV_LOGGING + * is true. + * + * We explicitly use LOGGING_ON/IS_REP_CLIENT here because we don't want to pull + * in the log headers, which IS_RECOVERING (and thus DBENV_LOGGING) rely on, and + * because DBC_RECOVER should be set anytime IS_RECOVERING would be true. + * + * If we're not in recovery (master - doing an abort or a client applying + * a txn), then a client's only path through here is on an internal + * operation, and a master's only path through here is a transactional + * operation. Detect if either is not the case. + */ +#if defined(DIAGNOSTIC) || defined(DEBUG_ROP) || defined(DEBUG_WOP) +#define DBC_LOGGING(dbc) __dbc_logging(dbc) +#else +#define DBC_LOGGING(dbc) \ + ((dbc)->txn != NULL && LOGGING_ON((dbc)->env) && \ + !F_ISSET((dbc), DBC_RECOVER) && !IS_REP_CLIENT((dbc)->env)) +#endif + +#endif /* !_DB_INT_H_ */ diff --git a/src/dbinc/db_join.h b/src/dbinc/db_join.h new file mode 100644 index 00000000..ffe70834 --- /dev/null +++ b/src/dbinc/db_join.h @@ -0,0 +1,37 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_JOIN_H_ +#define _DB_JOIN_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Joins use a join cursor that is similar to a regular DB cursor except + * that it only supports c_get and c_close functionality. Also, it does + * not support the full range of flags for get. + */ +typedef struct __join_cursor { + u_int8_t *j_exhausted; /* Array of flags; is cursor i exhausted? */ + DBC **j_curslist; /* Array of cursors in the join: constant. */ + DBC **j_fdupcurs; /* Cursors w/ first instances of current dup. */ + DBC **j_workcurs; /* Scratch cursor copies to muck with. */ + DB *j_primary; /* Primary dbp. */ + DBT j_key; /* Used to do lookups. */ + DBT j_rdata; /* Memory used for data return. */ + u_int32_t j_ncurs; /* How many cursors do we have? */ +#define JOIN_RETRY 0x01 /* Error on primary get; re-return same key. */ + u_int32_t flags; +} JOIN_CURSOR; + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_JOIN_H_ */ diff --git a/src/dbinc/db_page.h b/src/dbinc/db_page.h new file mode 100644 index 00000000..9e17d96f --- /dev/null +++ b/src/dbinc/db_page.h @@ -0,0 +1,840 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_PAGE_H_ +#define _DB_PAGE_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * DB page formats. + * + * !!! + * This implementation requires that values within the following structures + * NOT be padded -- note, ANSI C permits random padding within structures. + * If your compiler pads randomly you can just forget ever making DB run on + * your system. In addition, no data type can require larger alignment than + * its own size, e.g., a 4-byte data element may not require 8-byte alignment. + * + * Note that key/data lengths are often stored in db_indx_t's -- this is + * not accidental, nor does it limit the key/data size. If the key/data + * item fits on a page, it's guaranteed to be small enough to fit into a + * db_indx_t, and storing it in one saves space. + */ + +#define PGNO_INVALID 0 /* Invalid page number in any database. */ +#define PGNO_BASE_MD 0 /* Base database: metadata page number. */ + +/* Page types. */ +#define P_INVALID 0 /* Invalid page type. */ +#define __P_DUPLICATE 1 /* Duplicate. DEPRECATED in 3.1 */ +#define P_HASH_UNSORTED 2 /* Hash pages created pre 4.6. DEPRECATED */ +#define P_IBTREE 3 /* Btree internal. */ +#define P_IRECNO 4 /* Recno internal. */ +#define P_LBTREE 5 /* Btree leaf. */ +#define P_LRECNO 6 /* Recno leaf. */ +#define P_OVERFLOW 7 /* Overflow. */ +#define P_HASHMETA 8 /* Hash metadata page. */ +#define P_BTREEMETA 9 /* Btree metadata page. */ +#define P_QAMMETA 10 /* Queue metadata page. */ +#define P_QAMDATA 11 /* Queue data page. */ +#define P_LDUP 12 /* Off-page duplicate leaf. */ +#define P_HASH 13 /* Sorted hash page. */ +#define P_HEAPMETA 14 /* Heap metadata page. */ +#define P_HEAP 15 /* Heap data page. */ +#define P_IHEAP 16 /* Heap internal. */ +#define P_PAGETYPE_MAX 17 +/* Flag to __db_new */ +#define P_DONTEXTEND 0x8000 /* Don't allocate if there are no free pages. */ + +/* + * When we create pages in mpool, we ask mpool to clear some number of bytes + * in the header. This number must be at least as big as the regular page + * headers and cover enough of the btree and hash meta-data pages to obliterate + * the page type. + */ +#define DB_PAGE_DB_LEN 32 +#define DB_PAGE_QUEUE_LEN 0 + +/************************************************************************ + GENERIC METADATA PAGE HEADER + * + * !!! + * The magic and version numbers have to be in the same place in all versions + * of the metadata page as the application may not have upgraded the database. + ************************************************************************/ +typedef struct _dbmeta33 { + DB_LSN lsn; /* 00-07: LSN. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + u_int32_t magic; /* 12-15: Magic number. */ + u_int32_t version; /* 16-19: Version. */ + u_int32_t pagesize; /* 20-23: Pagesize. */ + u_int8_t encrypt_alg; /* 24: Encryption algorithm. */ + u_int8_t type; /* 25: Page type. */ +#define DBMETA_CHKSUM 0x01 +#define DBMETA_PART_RANGE 0x02 +#define DBMETA_PART_CALLBACK 0x04 + u_int8_t metaflags; /* 26: Meta-only flags */ + u_int8_t unused1; /* 27: Unused. */ + u_int32_t free; /* 28-31: Free list page number. */ + db_pgno_t last_pgno; /* 32-35: Page number of last page in db. */ + u_int32_t nparts; /* 36-39: Number of partitions. */ + u_int32_t key_count; /* 40-43: Cached key count. */ + u_int32_t record_count; /* 44-47: Cached record count. */ + u_int32_t flags; /* 48-51: Flags: unique to each AM. */ + /* 52-71: Unique file ID. */ + u_int8_t uid[DB_FILE_ID_LEN]; +} DBMETA33, DBMETA; + +/************************************************************************ + BTREE METADATA PAGE LAYOUT + ************************************************************************/ +typedef struct _btmeta33 { +#define BTM_DUP 0x001 /* Duplicates. */ +#define BTM_RECNO 0x002 /* Recno tree. */ +#define BTM_RECNUM 0x004 /* Btree: maintain record count. */ +#define BTM_FIXEDLEN 0x008 /* Recno: fixed length records. */ +#define BTM_RENUMBER 0x010 /* Recno: renumber on insert/delete. */ +#define BTM_SUBDB 0x020 /* Subdatabases. */ +#define BTM_DUPSORT 0x040 /* Duplicates are sorted. */ +#define BTM_COMPRESS 0x080 /* Compressed. */ +#define BTM_MASK 0x0ff + DBMETA dbmeta; /* 00-71: Generic meta-data header. */ + + u_int32_t unused1; /* 72-75: Unused space. */ + u_int32_t minkey; /* 76-79: Btree: Minkey. */ + u_int32_t re_len; /* 80-83: Recno: fixed-length record length. */ + u_int32_t re_pad; /* 84-87: Recno: fixed-length record pad. */ + u_int32_t root; /* 88-91: Root page. */ + u_int32_t unused2[92]; /* 92-459: Unused space. */ + u_int32_t crypto_magic; /* 460-463: Crypto magic number */ + u_int32_t trash[3]; /* 464-475: Trash space - Do not use */ + u_int8_t iv[DB_IV_BYTES]; /* 476-495: Crypto IV */ + u_int8_t chksum[DB_MAC_KEY]; /* 496-511: Page chksum */ + + /* + * Minimum page size is 512. + */ +} BTMETA33, BTMETA; + +/************************************************************************ + HASH METADATA PAGE LAYOUT + ************************************************************************/ +typedef struct _hashmeta33 { +#define DB_HASH_DUP 0x01 /* Duplicates. */ +#define DB_HASH_SUBDB 0x02 /* Subdatabases. */ +#define DB_HASH_DUPSORT 0x04 /* Duplicates are sorted. */ + DBMETA dbmeta; /* 00-71: Generic meta-data page header. */ + + u_int32_t max_bucket; /* 72-75: ID of Maximum bucket in use */ + u_int32_t high_mask; /* 76-79: Modulo mask into table */ + u_int32_t low_mask; /* 80-83: Modulo mask into table lower half */ + u_int32_t ffactor; /* 84-87: Fill factor */ + u_int32_t nelem; /* 88-91: Number of keys in hash table */ + u_int32_t h_charkey; /* 92-95: Value of hash(CHARKEY) */ +#define NCACHED 32 /* number of spare points */ + /* 96-223: Spare pages for overflow */ + u_int32_t spares[NCACHED]; + u_int32_t unused[59]; /* 224-459: Unused space */ + u_int32_t crypto_magic; /* 460-463: Crypto magic number */ + u_int32_t trash[3]; /* 464-475: Trash space - Do not use */ + u_int8_t iv[DB_IV_BYTES]; /* 476-495: Crypto IV */ + u_int8_t chksum[DB_MAC_KEY]; /* 496-511: Page chksum */ + + /* + * Minimum page size is 512. + */ +} HMETA33, HMETA; + +/************************************************************************ + HEAP METADATA PAGE LAYOUT +*************************************************************************/ +/* + * Heap Meta data page structure + * + */ +typedef struct _heapmeta { + DBMETA dbmeta; /* 00-71: Generic meta-data header. */ + + db_pgno_t curregion; /* 72-75: Current region pgno. */ + u_int32_t nregions; /* 76-79: Number of regions. */ + u_int32_t gbytes; /* 80-83: GBytes for fixed size heap. */ + u_int32_t bytes; /* 84-87: Bytes for fixed size heap. */ + u_int32_t region_size; /* 88-91: Max region size. */ + u_int32_t unused2[92]; /* 92-459: Unused space.*/ + u_int32_t crypto_magic; /* 460-463: Crypto magic number */ + u_int32_t trash[3]; /* 464-475: Trash space - Do not use */ + u_int8_t iv[DB_IV_BYTES]; /* 476-495: Crypto IV */ + u_int8_t chksum[DB_MAC_KEY]; /* 496-511: Page chksum */ + + + /* + * Minimum page size is 512. + */ +} HEAPMETA; + +/************************************************************************ + QUEUE METADATA PAGE LAYOUT + ************************************************************************/ +/* + * QAM Meta data page structure + * + */ +typedef struct _qmeta33 { + DBMETA dbmeta; /* 00-71: Generic meta-data header. */ + + u_int32_t first_recno; /* 72-75: First not deleted record. */ + u_int32_t cur_recno; /* 76-79: Next recno to be allocated. */ + u_int32_t re_len; /* 80-83: Fixed-length record length. */ + u_int32_t re_pad; /* 84-87: Fixed-length record pad. */ + u_int32_t rec_page; /* 88-91: Records Per Page. */ + u_int32_t page_ext; /* 92-95: Pages per extent */ + + u_int32_t unused[91]; /* 96-459: Unused space */ + u_int32_t crypto_magic; /* 460-463: Crypto magic number */ + u_int32_t trash[3]; /* 464-475: Trash space - Do not use */ + u_int8_t iv[DB_IV_BYTES]; /* 476-495: Crypto IV */ + u_int8_t chksum[DB_MAC_KEY]; /* 496-511: Page chksum */ + /* + * Minimum page size is 512. + */ +} QMETA33, QMETA; + +/* + * DBMETASIZE is a constant used by __db_file_setup and DB->verify + * as a buffer which is guaranteed to be larger than any possible + * metadata page size and smaller than any disk sector. + */ +#define DBMETASIZE 512 + +/************************************************************************ + BTREE/HASH MAIN PAGE LAYOUT + ************************************************************************/ +/* + * +-----------------------------------+ + * | lsn | pgno | prev pgno | + * +-----------------------------------+ + * | next pgno | entries | hf offset | + * +-----------------------------------+ + * | level | type | chksum | + * +-----------------------------------+ + * | iv | index | free --> | + * +-----------+-----------------------+ + * | F R E E A R E A | + * +-----------------------------------+ + * | <-- free | item | + * +-----------------------------------+ + * | item | item | item | + * +-----------------------------------+ + * + * sizeof(PAGE) == 26 bytes + possibly 20 bytes of checksum and possibly + * 16 bytes of IV (+ 2 bytes for alignment), and the following indices + * are guaranteed to be two-byte aligned. If we aren't doing crypto or + * checksumming the bytes are reclaimed for data storage. + * + * For hash and btree leaf pages, index items are paired, e.g., inp[0] is the + * key for inp[1]'s data. All other types of pages only contain single items. + */ +typedef struct __pg_chksum { + u_int8_t unused[2]; /* 26-27: For alignment */ + u_int8_t chksum[4]; /* 28-31: Checksum */ +} PG_CHKSUM; + +typedef struct __pg_crypto { + u_int8_t unused[2]; /* 26-27: For alignment */ + u_int8_t chksum[DB_MAC_KEY]; /* 28-47: Checksum */ + u_int8_t iv[DB_IV_BYTES]; /* 48-63: IV */ + /* !!! + * Must be 16-byte aligned for crypto + */ +} PG_CRYPTO; + +typedef struct _db_page { + DB_LSN lsn; /* 00-07: Log sequence number. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + db_pgno_t prev_pgno; /* 12-15: Previous page number. */ + db_pgno_t next_pgno; /* 16-19: Next page number. */ + db_indx_t entries; /* 20-21: Number of items on the page. */ + db_indx_t hf_offset; /* 22-23: High free byte page offset. */ + + /* + * The btree levels are numbered from the leaf to the root, starting + * with 1, so the leaf is level 1, its parent is level 2, and so on. + * We maintain this level on all btree pages, but the only place that + * we actually need it is on the root page. It would not be difficult + * to hide the byte on the root page once it becomes an internal page, + * so we could get this byte back if we needed it for something else. + */ +#define LEAFLEVEL 1 +#define MAXBTREELEVEL 255 + u_int8_t level; /* 24: Btree tree level. */ + u_int8_t type; /* 25: Page type. */ +} PAGE; + +/* + * With many compilers sizeof(PAGE) == 28, while SIZEOF_PAGE == 26. + * We add in other things directly after the page header and need + * the SIZEOF_PAGE. When giving the sizeof(), many compilers will + * pad it out to the next 4-byte boundary. + */ +#define SIZEOF_PAGE 26 +/* + * !!! + * DB_AM_ENCRYPT always implies DB_AM_CHKSUM so that must come first. + */ +#define P_INP(dbp, pg) \ + ((db_indx_t *)((u_int8_t *)(pg) + SIZEOF_PAGE + \ + (F_ISSET((dbp), DB_AM_ENCRYPT) ? sizeof(PG_CRYPTO) : \ + (F_ISSET((dbp), DB_AM_CHKSUM) ? sizeof(PG_CHKSUM) : 0)))) + +#define P_IV(dbp, pg) \ + (F_ISSET((dbp), DB_AM_ENCRYPT) ? ((u_int8_t *)(pg) + \ + SIZEOF_PAGE + SSZA(PG_CRYPTO, iv)) \ + : NULL) + +#define P_CHKSUM(dbp, pg) \ + (F_ISSET((dbp), DB_AM_ENCRYPT) ? ((u_int8_t *)(pg) + \ + SIZEOF_PAGE + SSZA(PG_CRYPTO, chksum)) : \ + (F_ISSET((dbp), DB_AM_CHKSUM) ? ((u_int8_t *)(pg) + \ + SIZEOF_PAGE + SSZA(PG_CHKSUM, chksum)) \ + : NULL)) + +/* PAGE element macros. */ +#define LSN(p) (((PAGE *)p)->lsn) +#define PGNO(p) (((PAGE *)p)->pgno) +#define PREV_PGNO(p) (((PAGE *)p)->prev_pgno) +#define NEXT_PGNO(p) (((PAGE *)p)->next_pgno) +#define NUM_ENT(p) (((PAGE *)p)->entries) +#define HOFFSET(p) (((PAGE *)p)->hf_offset) +#define LEVEL(p) (((PAGE *)p)->level) +#define TYPE(p) (((PAGE *)p)->type) + +/************************************************************************ + HEAP PAGE LAYOUT + ************************************************************************/ +#define HEAPPG_NORMAL 26 +#define HEAPPG_CHKSUM 48 +#define HEAPPG_SEC 64 + +/* + * +0-----------2------------4-----------6-----------7+ + * | lsn | + * +-------------------------+------------------------+ + * | pgno | unused0 | + * +-------------+-----------+-----------+------------+ + * | high_indx | free_indx | entries | hf offset | + * +-------+-----+-----------+-----------+------------+ + * |unused2|type | unused3 | ...chksum... | + * +-------+-----+-----------+------------------------+ + * | ...iv... | offset table / free space map | + * +-------------+------------------------------------+ + * |free-> F R E E A R E A | + * +--------------------------------------------------+ + * | <-- free | item | + * +-------------------------+------------------------+ + * | item | item | + * +-------------------------+------------------------+ + * + * The page layout of both heap internal and data pages. If not using + * crypto, iv will be overwritten with data. If not using checksumming, + * unused3 and chksum will also be overwritten with data and data will start at + * 26. Note that this layout lets us re-use a lot of the PAGE element macros + * defined above. + */ +typedef struct _heappg { + DB_LSN lsn; /* 00-07: Log sequence number. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + u_int32_t high_pgno; /* 12-15: Highest page in region. */ + u_int16_t high_indx; /* 16-17: Highest index in the offset table. */ + db_indx_t free_indx; /* 18-19: First available index. */ + db_indx_t entries; /* 20-21: Number of items on the page. */ + db_indx_t hf_offset; /* 22-23: High free byte page offset. */ + u_int8_t unused2[1]; /* 24: Unused. */ + u_int8_t type; /* 25: Page type. */ + u_int8_t unused3[2]; /* 26-27: Never used, just checksum alignment. */ + u_int8_t chksum[DB_MAC_KEY]; /* 28-47: Checksum */ + u_int8_t iv[DB_IV_BYTES]; /* 48-63: IV */ +} HEAPPG; + +/* Define first possible data page for heap, 0 is metapage, 1 is region page */ +#define FIRST_HEAP_RPAGE 1 +#define FIRST_HEAP_DPAGE 2 + +typedef struct __heaphdr { +#define HEAP_RECSPLIT 0x01 /* Heap data record is split */ +#define HEAP_RECFIRST 0x02 /* First piece of a split record */ +#define HEAP_RECLAST 0x04 /* Last piece of a split record */ + u_int8_t flags; /* 00: Flags describing record. */ + u_int8_t unused; /* 01: Padding. */ + u_int16_t size; /* 02-03: The size of the stored data piece. */ +} HEAPHDR; + +typedef struct __heaphdrsplt { + HEAPHDR std_hdr; /* 00-03: The standard data header */ + u_int32_t tsize; /* 04-07: Total record size, 1st piece only */ + db_pgno_t nextpg; /* 08-11: RID.pgno of the next record piece */ + db_indx_t nextindx; /* 12-13: RID.indx of the next record piece */ + u_int16_t unused; /* 14-15: Padding. */ +} HEAPSPLITHDR; + +#define HEAP_HDRSIZE(hdr) \ + (F_ISSET((hdr), HEAP_RECSPLIT) ? sizeof(HEAPSPLITHDR) : sizeof(HEAPHDR)) + +#define HEAPPG_SZ(dbp) \ + (F_ISSET((dbp), DB_AM_ENCRYPT) ? HEAPPG_SEC : \ + F_ISSET((dbp), DB_AM_CHKSUM) ? HEAPPG_CHKSUM : HEAPPG_NORMAL) + +/* Each byte in the bitmap describes 4 pages (2 bits per page.) */ +#define HEAP_REGION_COUNT(size) ((size - HEAPPG_SZ(dbp)) * 4) +#define HEAP_DEFAULT_REGION_MAX HEAP_REGION_COUNT(8 * 1024) +#define HEAP_REGION_SIZE(dbp) (((HEAP*) (dbp)->heap_internal)->region_size) + +/* Figure out which region a given page belongs to. */ +#define HEAP_REGION_PGNO(dbp, p) \ + ((((p) - 1) / (HEAP_REGION_SIZE(dbp) + 1)) * \ + (HEAP_REGION_SIZE(dbp) + 1) + 1) +/* Translate a region pgno to region number */ +#define HEAP_REGION_NUM(dbp, pgno) \ + ((((pgno) - 1) / (HEAP_REGION_SIZE((dbp)) + 1)) + 1) +/* + * Given an internal heap page and page number relative to that page, return the + * bits from map describing free space on the nth page. Each byte in the map + * describes 4 pages. Point at the correct byte and mask the correct 2 bits. + */ +#define HEAP_SPACE(dbp, pg, n) \ + (HEAP_SPACEMAP((dbp), (pg))[(n) / 4] >> (2 * ((n) % 4)) & 3) + +#define HEAP_SETSPACE(dbp, pg, n, b) do { \ + HEAP_SPACEMAP((dbp), (pg))[(n) / 4] &= ~(3 << (2 * ((n) % 4))); \ + HEAP_SPACEMAP((dbp), (pg))[(n) / 4] |= ((b & 3) << (2 * ((n) % 4))); \ +} while (0) + +/* Return the bitmap describing free space on heap data pages. */ +#define HEAP_SPACEMAP(dbp, pg) ((u_int8_t *)P_INP((dbp), (pg))) + +/* Return the offset table for a heap data page. */ +#define HEAP_OFFSETTBL(dbp, pg) P_INP((dbp), (pg)) + +/* + * Calculate the % of a page a given size occupies and translate that to the + * corresponding bitmap value. + */ +#define HEAP_CALCSPACEBITS(dbp, sz, space) do { \ + (space) = 100 * (sz) / (dbp)->pgsize; \ + if ((space) <= HEAP_PG_FULL_PCT) \ + (space) = HEAP_PG_FULL; \ + else if ((space) <= HEAP_PG_GT66_PCT) \ + (space) = HEAP_PG_GT66; \ + else if ((space) <= HEAP_PG_GT33_PCT) \ + (space) = HEAP_PG_GT33; \ + else \ + (space) = HEAP_PG_LT33; \ +} while (0) + +/* Return the amount of free space on a heap data page. */ +#define HEAP_FREESPACE(dbp, p) \ + (HOFFSET(p) - HEAPPG_SZ(dbp) - \ + (NUM_ENT(p) == 0 ? 0 : ((HEAP_HIGHINDX(p) + 1) * sizeof(db_indx_t)))) + +/* The maximum amount of data that can fit on an empty heap data page. */ +#define HEAP_MAXDATASIZE(dbp) \ + ((dbp)->pgsize - HEAPPG_SZ(dbp) - sizeof(db_indx_t)) + +#define HEAP_FREEINDX(p) (((HEAPPG *)p)->free_indx) +#define HEAP_HIGHINDX(p) (((HEAPPG *)p)->high_indx) + +/* True if we have a page that deals with heap */ +#define HEAPTYPE(h) \ + (TYPE(h) == P_HEAPMETA || TYPE(h) == P_HEAP || TYPE(h) == P_IHEAP) + +/************************************************************************ + QUEUE MAIN PAGE LAYOUT + ************************************************************************/ +/* + * Sizes of page below. Used to reclaim space if not doing + * crypto or checksumming. If you change the QPAGE below you + * MUST adjust this too. + */ +#define QPAGE_NORMAL 28 +#define QPAGE_CHKSUM 48 +#define QPAGE_SEC 64 + +typedef struct _qpage { + DB_LSN lsn; /* 00-07: Log sequence number. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + u_int32_t unused0[3]; /* 12-23: Unused. */ + u_int8_t unused1[1]; /* 24: Unused. */ + u_int8_t type; /* 25: Page type. */ + u_int8_t unused2[2]; /* 26-27: Unused. */ + u_int8_t chksum[DB_MAC_KEY]; /* 28-47: Checksum */ + u_int8_t iv[DB_IV_BYTES]; /* 48-63: IV */ +} QPAGE; + +#define QPAGE_SZ(dbp) \ + (F_ISSET((dbp), DB_AM_ENCRYPT) ? QPAGE_SEC : \ + F_ISSET((dbp), DB_AM_CHKSUM) ? QPAGE_CHKSUM : QPAGE_NORMAL) +/* + * !!! + * The next_pgno and prev_pgno fields are not maintained for btree and recno + * internal pages. Doing so only provides a minor performance improvement, + * it's hard to do when deleting internal pages, and it increases the chance + * of deadlock during deletes and splits because we have to re-link pages at + * more than the leaf level. + * + * !!! + * The btree/recno access method needs db_recno_t bytes of space on the root + * page to specify how many records are stored in the tree. (The alternative + * is to store the number of records in the meta-data page, which will create + * a second hot spot in trees being actively modified, or recalculate it from + * the BINTERNAL fields on each access.) Overload the PREV_PGNO field. + */ +#define RE_NREC(p) \ + ((TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO) ? PREV_PGNO(p) : \ + (db_pgno_t)(TYPE(p) == P_LBTREE ? NUM_ENT(p) / 2 : NUM_ENT(p))) +#define RE_NREC_ADJ(p, adj) \ + PREV_PGNO(p) += adj; +#define RE_NREC_SET(p, num) \ + PREV_PGNO(p) = (num); + +/* + * Initialize a page. + * + * !!! + * Don't modify the page's LSN, code depends on it being unchanged after a + * P_INIT call. + */ +#define P_INIT(pg, pg_size, n, pg_prev, pg_next, btl, pg_type) do { \ + PGNO(pg) = (n); \ + PREV_PGNO(pg) = (pg_prev); \ + NEXT_PGNO(pg) = (pg_next); \ + NUM_ENT(pg) = (0); \ + HOFFSET(pg) = (db_indx_t)(pg_size); \ + LEVEL(pg) = (btl); \ + TYPE(pg) = (pg_type); \ +} while (0) + +/* Page header length (offset to first index). */ +#define P_OVERHEAD(dbp) P_TO_UINT16(P_INP(dbp, 0)) + +/* First free byte. */ +#define LOFFSET(dbp, pg) \ + (P_OVERHEAD(dbp) + NUM_ENT(pg) * sizeof(db_indx_t)) + +/* Free space on a regular page. */ +#define P_FREESPACE(dbp, pg) (HOFFSET(pg) - LOFFSET(dbp, pg)) + +/* Get a pointer to the bytes at a specific index. */ +#define P_ENTRY(dbp, pg, indx) ((u_int8_t *)pg + P_INP(dbp, pg)[indx]) + +/************************************************************************ + OVERFLOW PAGE LAYOUT + ************************************************************************/ + +/* + * Overflow items are referenced by HOFFPAGE and BOVERFLOW structures, which + * store a page number (the first page of the overflow item) and a length + * (the total length of the overflow item). The overflow item consists of + * some number of overflow pages, linked by the next_pgno field of the page. + * A next_pgno field of PGNO_INVALID flags the end of the overflow item. + * + * Overflow page overloads: + * The amount of overflow data stored on each page is stored in the + * hf_offset field. + * + * The implementation reference counts overflow items as it's possible + * for them to be promoted onto btree internal pages. The reference + * count is stored in the entries field. + */ +#define OV_LEN(p) (((PAGE *)p)->hf_offset) +#define OV_REF(p) (((PAGE *)p)->entries) + +/* Maximum number of bytes that you can put on an overflow page. */ +#define P_MAXSPACE(dbp, psize) ((psize) - P_OVERHEAD(dbp)) + +/* Free space on an overflow page. */ +#define P_OVFLSPACE(dbp, psize, pg) (P_MAXSPACE(dbp, psize) - HOFFSET(pg)) + +/************************************************************************ + HASH PAGE LAYOUT + ************************************************************************/ + +/* Each index references a group of bytes on the page. */ +#define H_KEYDATA 1 /* Key/data item. */ +#define H_DUPLICATE 2 /* Duplicate key/data item. */ +#define H_OFFPAGE 3 /* Overflow key/data item. */ +#define H_OFFDUP 4 /* Overflow page of duplicates. */ + +/* + * !!! + * Items on hash pages are (potentially) unaligned, so we can never cast the + * (page + offset) pointer to an HKEYDATA, HOFFPAGE or HOFFDUP structure, as + * we do with B+tree on-page structures. Because we frequently want the type + * field, it requires no alignment, and it's in the same location in all three + * structures, there's a pair of macros. + */ +#define HPAGE_PTYPE(p) (*(u_int8_t *)p) +#define HPAGE_TYPE(dbp, pg, indx) (*P_ENTRY(dbp, pg, indx)) + +/* + * The first and second types are H_KEYDATA and H_DUPLICATE, represented + * by the HKEYDATA structure: + * + * +-----------------------------------+ + * | type | key/data ... | + * +-----------------------------------+ + * + * For duplicates, the data field encodes duplicate elements in the data + * field: + * + * +---------------------------------------------------------------+ + * | type | len1 | element1 | len1 | len2 | element2 | len2 | + * +---------------------------------------------------------------+ + * + * Thus, by keeping track of the offset in the element, we can do both + * backward and forward traversal. + */ +typedef struct _hkeydata { + u_int8_t type; /* 00: Page type. */ + u_int8_t data[1]; /* Variable length key/data item. */ +} HKEYDATA; +#define HKEYDATA_DATA(p) (((u_int8_t *)p) + SSZA(HKEYDATA, data)) + +/* + * The length of any HKEYDATA item. Note that indx is an element index, + * not a PAIR index. + */ +#define LEN_HITEM(dbp, pg, pgsize, indx) \ + (((indx) == 0 ? (pgsize) : \ + (P_INP(dbp, pg)[(indx) - 1])) - (P_INP(dbp, pg)[indx])) + +#define LEN_HKEYDATA(dbp, pg, psize, indx) \ + (db_indx_t)(LEN_HITEM(dbp, pg, psize, indx) - HKEYDATA_SIZE(0)) + +/* + * Page space required to add a new HKEYDATA item to the page, with and + * without the index value. + */ +#define HKEYDATA_SIZE(len) \ + ((len) + SSZA(HKEYDATA, data)) +#define HKEYDATA_PSIZE(len) \ + (HKEYDATA_SIZE(len) + sizeof(db_indx_t)) + +/* Put a HKEYDATA item at the location referenced by a page entry. */ +#define PUT_HKEYDATA(pe, kd, len, etype) { \ + ((HKEYDATA *)(pe))->type = etype; \ + memcpy((u_int8_t *)(pe) + sizeof(u_int8_t), kd, len); \ +} + +/* + * Macros the describe the page layout in terms of key-data pairs. + */ +#define H_NUMPAIRS(pg) (NUM_ENT(pg) / 2) +#define H_KEYINDEX(indx) (indx) +#define H_DATAINDEX(indx) ((indx) + 1) +#define H_PAIRKEY(dbp, pg, indx) P_ENTRY(dbp, pg, H_KEYINDEX(indx)) +#define H_PAIRDATA(dbp, pg, indx) P_ENTRY(dbp, pg, H_DATAINDEX(indx)) +#define H_PAIRSIZE(dbp, pg, psize, indx) \ + (LEN_HITEM(dbp, pg, psize, H_KEYINDEX(indx)) + \ + LEN_HITEM(dbp, pg, psize, H_DATAINDEX(indx))) +#define LEN_HDATA(dbp, p, psize, indx) \ + LEN_HKEYDATA(dbp, p, psize, H_DATAINDEX(indx)) +#define LEN_HKEY(dbp, p, psize, indx) \ + LEN_HKEYDATA(dbp, p, psize, H_KEYINDEX(indx)) + +/* + * The third type is the H_OFFPAGE, represented by the HOFFPAGE structure: + */ +typedef struct _hoffpage { + u_int8_t type; /* 00: Page type and delete flag. */ + u_int8_t unused[3]; /* 01-03: Padding, unused. */ + db_pgno_t pgno; /* 04-07: Offpage page number. */ + u_int32_t tlen; /* 08-11: Total length of item. */ +} HOFFPAGE; + +#define HOFFPAGE_PGNO(p) (((u_int8_t *)p) + SSZ(HOFFPAGE, pgno)) +#define HOFFPAGE_TLEN(p) (((u_int8_t *)p) + SSZ(HOFFPAGE, tlen)) + +/* + * Page space required to add a new HOFFPAGE item to the page, with and + * without the index value. + */ +#define HOFFPAGE_SIZE (sizeof(HOFFPAGE)) +#define HOFFPAGE_PSIZE (HOFFPAGE_SIZE + sizeof(db_indx_t)) + +/* + * The fourth type is H_OFFDUP represented by the HOFFDUP structure: + */ +typedef struct _hoffdup { + u_int8_t type; /* 00: Page type and delete flag. */ + u_int8_t unused[3]; /* 01-03: Padding, unused. */ + db_pgno_t pgno; /* 04-07: Offpage page number. */ +} HOFFDUP; +#define HOFFDUP_PGNO(p) (((u_int8_t *)p) + SSZ(HOFFDUP, pgno)) + +/* + * Page space required to add a new HOFFDUP item to the page, with and + * without the index value. + */ +#define HOFFDUP_SIZE (sizeof(HOFFDUP)) + +/************************************************************************ + BTREE PAGE LAYOUT + ************************************************************************/ + +/* Each index references a group of bytes on the page. */ +#define B_KEYDATA 1 /* Key/data item. */ +#define B_DUPLICATE 2 /* Duplicate key/data item. */ +#define B_OVERFLOW 3 /* Overflow key/data item. */ + +/* + * We have to store a deleted entry flag in the page. The reason is complex, + * but the simple version is that we can't delete on-page items referenced by + * a cursor -- the return order of subsequent insertions might be wrong. The + * delete flag is an overload of the top bit of the type byte. + */ +#define B_DELETE (0x80) +#define B_DCLR(t) (t) &= ~B_DELETE +#define B_DSET(t) (t) |= B_DELETE +#define B_DISSET(t) ((t) & B_DELETE) + +#define B_TYPE(t) ((t) & ~B_DELETE) +#define B_TSET(t, type) ((t) = B_TYPE(type)) +#define B_TSET_DELETED(t, type) ((t) = (type) | B_DELETE) + +/* + * The first type is B_KEYDATA, represented by the BKEYDATA structure: + */ +typedef struct _bkeydata { + db_indx_t len; /* 00-01: Key/data item length. */ + u_int8_t type; /* 02: Page type AND DELETE FLAG. */ + u_int8_t data[1]; /* Variable length key/data item. */ +} BKEYDATA; + +/* Get a BKEYDATA item for a specific index. */ +#define GET_BKEYDATA(dbp, pg, indx) \ + ((BKEYDATA *)P_ENTRY(dbp, pg, indx)) + +/* + * Page space required to add a new BKEYDATA item to the page, with and + * without the index value. The (u_int16_t) cast avoids warnings: DB_ALIGN + * casts to uintmax_t, the cast converts it to a small integral type so we + * don't get complaints when we assign the final result to an integral type + * smaller than uintmax_t. + */ +#define BKEYDATA_SIZE(len) \ + (u_int16_t)DB_ALIGN((len) + SSZA(BKEYDATA, data), sizeof(u_int32_t)) +#define BKEYDATA_PSIZE(len) \ + (BKEYDATA_SIZE(len) + sizeof(db_indx_t)) + +/* + * The second and third types are B_DUPLICATE and B_OVERFLOW, represented + * by the BOVERFLOW structure. + */ +typedef struct _boverflow { + db_indx_t unused1; /* 00-01: Padding, unused. */ + u_int8_t type; /* 02: Page type AND DELETE FLAG. */ + u_int8_t unused2; /* 03: Padding, unused. */ + db_pgno_t pgno; /* 04-07: Next page number. */ + u_int32_t tlen; /* 08-11: Total length of item. */ +} BOVERFLOW; + +/* Get a BOVERFLOW item for a specific index. */ +#define GET_BOVERFLOW(dbp, pg, indx) \ + ((BOVERFLOW *)P_ENTRY(dbp, pg, indx)) + +/* + * Page space required to add a new BOVERFLOW item to the page, with and + * without the index value. + */ +#define BOVERFLOW_SIZE \ + ((u_int16_t)DB_ALIGN(sizeof(BOVERFLOW), sizeof(u_int32_t))) +#define BOVERFLOW_PSIZE \ + (BOVERFLOW_SIZE + sizeof(db_indx_t)) + +#define BITEM_SIZE(bk) \ + (B_TYPE((bk)->type) != B_KEYDATA ? BOVERFLOW_SIZE : \ + BKEYDATA_SIZE((bk)->len)) + +#define BITEM_PSIZE(bk) \ + (B_TYPE((bk)->type) != B_KEYDATA ? BOVERFLOW_PSIZE : \ + BKEYDATA_PSIZE((bk)->len)) + +/* + * Btree leaf and hash page layouts group indices in sets of two, one for the + * key and one for the data. Everything else does it in sets of one to save + * space. Use the following macros so that it's real obvious what's going on. + */ +#define O_INDX 1 +#define P_INDX 2 + +/************************************************************************ + BTREE INTERNAL PAGE LAYOUT + ************************************************************************/ + +/* + * Btree internal entry. + */ +typedef struct _binternal { + db_indx_t len; /* 00-01: Key/data item length. */ + u_int8_t type; /* 02: Page type AND DELETE FLAG. */ + u_int8_t unused; /* 03: Padding, unused. */ + db_pgno_t pgno; /* 04-07: Page number of referenced page. */ + db_recno_t nrecs; /* 08-11: Subtree record count. */ + u_int8_t data[1]; /* Variable length key item. */ +} BINTERNAL; + +/* Get a BINTERNAL item for a specific index. */ +#define GET_BINTERNAL(dbp, pg, indx) \ + ((BINTERNAL *)P_ENTRY(dbp, pg, indx)) + +/* + * Page space required to add a new BINTERNAL item to the page, with and + * without the index value. + */ +#define BINTERNAL_SIZE(len) \ + (u_int16_t)DB_ALIGN((len) + SSZA(BINTERNAL, data), sizeof(u_int32_t)) +#define BINTERNAL_PSIZE(len) \ + (BINTERNAL_SIZE(len) + sizeof(db_indx_t)) + +/************************************************************************ + RECNO INTERNAL PAGE LAYOUT + ************************************************************************/ + +/* + * The recno internal entry. + */ +typedef struct _rinternal { + db_pgno_t pgno; /* 00-03: Page number of referenced page. */ + db_recno_t nrecs; /* 04-07: Subtree record count. */ +} RINTERNAL; + +/* Get a RINTERNAL item for a specific index. */ +#define GET_RINTERNAL(dbp, pg, indx) \ + ((RINTERNAL *)P_ENTRY(dbp, pg, indx)) + +/* + * Page space required to add a new RINTERNAL item to the page, with and + * without the index value. + */ +#define RINTERNAL_SIZE \ + (u_int16_t)DB_ALIGN(sizeof(RINTERNAL), sizeof(u_int32_t)) +#define RINTERNAL_PSIZE \ + (RINTERNAL_SIZE + sizeof(db_indx_t)) + +typedef struct __pglist { + db_pgno_t pgno, next_pgno; + DB_LSN lsn; +} db_pglist_t; + +#if defined(__cplusplus) +} +#endif + +#endif /* !_DB_PAGE_H_ */ diff --git a/src/dbinc/db_swap.h b/src/dbinc/db_swap.h new file mode 100644 index 00000000..9eb3b284 --- /dev/null +++ b/src/dbinc/db_swap.h @@ -0,0 +1,262 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#ifndef _DB_SWAP_H_ +#define _DB_SWAP_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Little endian <==> big endian 64-bit swap macros. + * M_64_SWAP swap a memory location + * P_64_COPY copy potentially unaligned 4 byte quantities + * P_64_SWAP swap a referenced memory location + */ +#undef M_64_SWAP +#define M_64_SWAP(a) { \ + u_int64_t _tmp; \ + _tmp = (u_int64_t)a; \ + ((u_int8_t *)&a)[0] = ((u_int8_t *)&_tmp)[7]; \ + ((u_int8_t *)&a)[1] = ((u_int8_t *)&_tmp)[6]; \ + ((u_int8_t *)&a)[2] = ((u_int8_t *)&_tmp)[5]; \ + ((u_int8_t *)&a)[3] = ((u_int8_t *)&_tmp)[4]; \ + ((u_int8_t *)&a)[4] = ((u_int8_t *)&_tmp)[3]; \ + ((u_int8_t *)&a)[5] = ((u_int8_t *)&_tmp)[2]; \ + ((u_int8_t *)&a)[6] = ((u_int8_t *)&_tmp)[1]; \ + ((u_int8_t *)&a)[7] = ((u_int8_t *)&_tmp)[0]; \ +} +#undef P_64_COPY +#define P_64_COPY(a, b) { \ + ((u_int8_t *)b)[0] = ((u_int8_t *)a)[0]; \ + ((u_int8_t *)b)[1] = ((u_int8_t *)a)[1]; \ + ((u_int8_t *)b)[2] = ((u_int8_t *)a)[2]; \ + ((u_int8_t *)b)[3] = ((u_int8_t *)a)[3]; \ + ((u_int8_t *)b)[4] = ((u_int8_t *)a)[4]; \ + ((u_int8_t *)b)[5] = ((u_int8_t *)a)[5]; \ + ((u_int8_t *)b)[6] = ((u_int8_t *)a)[6]; \ + ((u_int8_t *)b)[7] = ((u_int8_t *)a)[7]; \ +} +#undef P_64_SWAP +#define P_64_SWAP(a) { \ + u_int64_t _tmp; \ + P_64_COPY(a, &_tmp); \ + ((u_int8_t *)a)[0] = ((u_int8_t *)&_tmp)[7]; \ + ((u_int8_t *)a)[1] = ((u_int8_t *)&_tmp)[6]; \ + ((u_int8_t *)a)[2] = ((u_int8_t *)&_tmp)[5]; \ + ((u_int8_t *)a)[3] = ((u_int8_t *)&_tmp)[4]; \ + ((u_int8_t *)a)[4] = ((u_int8_t *)&_tmp)[3]; \ + ((u_int8_t *)a)[5] = ((u_int8_t *)&_tmp)[2]; \ + ((u_int8_t *)a)[6] = ((u_int8_t *)&_tmp)[1]; \ + ((u_int8_t *)a)[7] = ((u_int8_t *)&_tmp)[0]; \ +} + +/* + * Little endian <==> big endian 32-bit swap macros. + * P_32_COPY copy potentially unaligned 4 byte quantities + * P_32_COPYSWAP copy and swap potentially unaligned 4 byte quantities + * P_32_SWAP swap a referenced memory location + * M_32_SWAP swap a memory location + */ +#undef P_32_COPY +#define P_32_COPY(a, b) do { \ + ((u_int8_t *)b)[0] = ((u_int8_t *)a)[0]; \ + ((u_int8_t *)b)[1] = ((u_int8_t *)a)[1]; \ + ((u_int8_t *)b)[2] = ((u_int8_t *)a)[2]; \ + ((u_int8_t *)b)[3] = ((u_int8_t *)a)[3]; \ +} while (0) +#undef P_32_COPYSWAP +#define P_32_COPYSWAP(a, b) do { \ + ((u_int8_t *)b)[0] = ((u_int8_t *)a)[3]; \ + ((u_int8_t *)b)[1] = ((u_int8_t *)a)[2]; \ + ((u_int8_t *)b)[2] = ((u_int8_t *)a)[1]; \ + ((u_int8_t *)b)[3] = ((u_int8_t *)a)[0]; \ +} while (0) +#undef P_32_SWAP +#define P_32_SWAP(a) do { \ + u_int32_t _tmp; \ + P_32_COPY(a, &_tmp); \ + P_32_COPYSWAP(&_tmp, a); \ +} while (0) +#undef M_32_SWAP +#define M_32_SWAP(a) P_32_SWAP(&a) + +/* + * Little endian <==> big endian 16-bit swap macros. + * P_16_COPY copy potentially unaligned 2 byte quantities + * P_16_COPYSWAP copy and swap potentially unaligned 2 byte quantities + * P_16_SWAP swap a referenced memory location + * M_16_SWAP swap a memory location + */ +#undef P_16_COPY +#define P_16_COPY(a, b) do { \ + ((u_int8_t *)b)[0] = ((u_int8_t *)a)[0]; \ + ((u_int8_t *)b)[1] = ((u_int8_t *)a)[1]; \ +} while (0) +#undef P_16_COPYSWAP +#define P_16_COPYSWAP(a, b) do { \ + ((u_int8_t *)b)[0] = ((u_int8_t *)a)[1]; \ + ((u_int8_t *)b)[1] = ((u_int8_t *)a)[0]; \ +} while (0) +#undef P_16_SWAP +#define P_16_SWAP(a) do { \ + u_int16_t _tmp; \ + P_16_COPY(a, &_tmp); \ + P_16_COPYSWAP(&_tmp, a); \ +} while (0) +#undef M_16_SWAP +#define M_16_SWAP(a) P_16_SWAP(&a) + +#undef SWAP32 +#define SWAP32(p) { \ + P_32_SWAP(p); \ + (p) += sizeof(u_int32_t); \ +} +#undef SWAP16 +#define SWAP16(p) { \ + P_16_SWAP(p); \ + (p) += sizeof(u_int16_t); \ +} + +/* + * Berkeley DB has local versions of htonl() and ntohl() that operate on + * pointers to the right size memory locations; the portability magic for + * finding the real system functions isn't worth the effort. + */ +#undef DB_HTONL_SWAP +#define DB_HTONL_SWAP(env, p) do { \ + if (F_ISSET((env), ENV_LITTLEENDIAN)) \ + P_32_SWAP(p); \ +} while (0) +#undef DB_NTOHL_SWAP +#define DB_NTOHL_SWAP(env, p) do { \ + if (F_ISSET((env), ENV_LITTLEENDIAN)) \ + P_32_SWAP(p); \ +} while (0) + +#undef DB_NTOHL_COPYIN +#define DB_NTOHL_COPYIN(env, i, p) do { \ + u_int8_t *tmp; \ + tmp = (u_int8_t *)&(i); \ + if (F_ISSET(env, ENV_LITTLEENDIAN)) { \ + tmp[3] = *p++; \ + tmp[2] = *p++; \ + tmp[1] = *p++; \ + tmp[0] = *p++; \ + } else { \ + memcpy(&i, p, sizeof(u_int32_t)); \ + p = (u_int8_t *)p + sizeof(u_int32_t); \ + } \ +} while (0) + +#undef DB_NTOHS_COPYIN +#define DB_NTOHS_COPYIN(env, i, p) do { \ + u_int8_t *tmp; \ + tmp = (u_int8_t *)&(i); \ + if (F_ISSET(env, ENV_LITTLEENDIAN)) { \ + tmp[1] = *p++; \ + tmp[0] = *p++; \ + } else { \ + memcpy(&i, p, sizeof(u_int16_t)); \ + p = (u_int8_t *)p + sizeof(u_int16_t); \ + } \ +} while (0) + +#undef DB_HTONL_COPYOUT +#define DB_HTONL_COPYOUT(env, p, i) do { \ + u_int8_t *tmp; \ + tmp = (u_int8_t *)p; \ + if (F_ISSET(env, ENV_LITTLEENDIAN)) { \ + *tmp++ = ((u_int8_t *)&(i))[3]; \ + *tmp++ = ((u_int8_t *)&(i))[2]; \ + *tmp++ = ((u_int8_t *)&(i))[1]; \ + *tmp++ = ((u_int8_t *)&(i))[0]; \ + } else \ + memcpy(p, &i, sizeof(u_int32_t)); \ + p = (u_int8_t *)p + sizeof(u_int32_t); \ +} while (0) + +#undef DB_HTONS_COPYOUT +#define DB_HTONS_COPYOUT(env, p, i) do { \ + u_int8_t *tmp; \ + tmp = (u_int8_t *)p; \ + if (F_ISSET(env, ENV_LITTLEENDIAN)) { \ + *tmp++ = ((u_int8_t *)&(i))[1]; \ + *tmp++ = ((u_int8_t *)&(i))[0]; \ + } else \ + memcpy(p, &i, sizeof(u_int16_t)); \ + p = (u_int8_t *)p + sizeof(u_int16_t); \ +} while (0) + +/* + * Helper macros for swapped logs. We write logs in little endian format to + * minimize disruption on x86 when upgrading from native byte order to + * platform-independent logs. + */ +#define LOG_SWAPPED(env) !F_ISSET(env, ENV_LITTLEENDIAN) + +#define LOGCOPY_32(env, x, p) do { \ + if (LOG_SWAPPED(env)) \ + P_32_COPYSWAP((p), (x)); \ + else \ + memcpy((x), (p), sizeof(u_int32_t)); \ +} while (0) + +#define LOGCOPY_16(env, x, p) do { \ + if (LOG_SWAPPED(env)) \ + P_16_COPYSWAP((p), (x)); \ + else \ + memcpy((x), (p), sizeof(u_int16_t)); \ +} while (0) + +#define LOGCOPY_TOLSN(env, lsnp, p) do { \ + LOGCOPY_32((env), &(lsnp)->file, (p)); \ + LOGCOPY_32((env), &(lsnp)->offset, \ + (u_int8_t *)(p) + sizeof(u_int32_t)); \ +} while (0) + +#define LOGCOPY_FROMLSN(env, p, lsnp) do { \ + LOGCOPY_32((env), (p), &(lsnp)->file); \ + LOGCOPY_32((env), \ + (u_int8_t *)(p) + sizeof(u_int32_t), &(lsnp)->offset); \ +} while (0) + +#if defined(__cplusplus) +} +#endif + +#endif /* !_DB_SWAP_H_ */ diff --git a/src/dbinc/db_upgrade.h b/src/dbinc/db_upgrade.h new file mode 100644 index 00000000..67891561 --- /dev/null +++ b/src/dbinc/db_upgrade.h @@ -0,0 +1,248 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_UPGRADE_H_ +#define _DB_UPGRADE_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * This file defines the metadata pages from the previous release. + * These structures are only used to upgrade old versions of databases. + */ + +/* Structures from the 3.1 release */ +typedef struct _dbmeta31 { + DB_LSN lsn; /* 00-07: LSN. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + u_int32_t magic; /* 12-15: Magic number. */ + u_int32_t version; /* 16-19: Version. */ + u_int32_t pagesize; /* 20-23: Pagesize. */ + u_int8_t unused1[1]; /* 24: Unused. */ + u_int8_t type; /* 25: Page type. */ + u_int8_t unused2[2]; /* 26-27: Unused. */ + u_int32_t free; /* 28-31: Free list page number. */ + DB_LSN unused3; /* 36-39: Unused. */ + u_int32_t key_count; /* 40-43: Cached key count. */ + u_int32_t record_count; /* 44-47: Cached record count. */ + u_int32_t flags; /* 48-51: Flags: unique to each AM. */ + /* 52-71: Unique file ID. */ + u_int8_t uid[DB_FILE_ID_LEN]; +} DBMETA31; + +typedef struct _btmeta31 { + DBMETA31 dbmeta; /* 00-71: Generic meta-data header. */ + + u_int32_t maxkey; /* 72-75: Btree: Maxkey. */ + u_int32_t minkey; /* 76-79: Btree: Minkey. */ + u_int32_t re_len; /* 80-83: Recno: fixed-length record length. */ + u_int32_t re_pad; /* 84-87: Recno: fixed-length record pad. */ + u_int32_t root; /* 88-92: Root page. */ + + /* + * Minimum page size is 128. + */ +} BTMETA31; + +/************************************************************************ + HASH METADATA PAGE LAYOUT + ************************************************************************/ +typedef struct _hashmeta31 { + DBMETA31 dbmeta; /* 00-71: Generic meta-data page header. */ + + u_int32_t max_bucket; /* 72-75: ID of Maximum bucket in use */ + u_int32_t high_mask; /* 76-79: Modulo mask into table */ + u_int32_t low_mask; /* 80-83: Modulo mask into table lower half */ + u_int32_t ffactor; /* 84-87: Fill factor */ + u_int32_t nelem; /* 88-91: Number of keys in hash table */ + u_int32_t h_charkey; /* 92-95: Value of hash(CHARKEY) */ +#define NCACHED 32 /* number of spare points */ + /* 96-223: Spare pages for overflow */ + u_int32_t spares[NCACHED]; + + /* + * Minimum page size is 256. + */ +} HMETA31; + +/* + * QAM Meta data page structure + * + */ +typedef struct _qmeta31 { + DBMETA31 dbmeta; /* 00-71: Generic meta-data header. */ + + u_int32_t start; /* 72-75: Start offset. */ + u_int32_t first_recno; /* 76-79: First not deleted record. */ + u_int32_t cur_recno; /* 80-83: Last recno allocated. */ + u_int32_t re_len; /* 84-87: Fixed-length record length. */ + u_int32_t re_pad; /* 88-91: Fixed-length record pad. */ + u_int32_t rec_page; /* 92-95: Records Per Page. */ + + /* + * Minimum page size is 128. + */ +} QMETA31; +/* Structures from the 3.2 release */ +typedef struct _qmeta32 { + DBMETA31 dbmeta; /* 00-71: Generic meta-data header. */ + + u_int32_t first_recno; /* 72-75: First not deleted record. */ + u_int32_t cur_recno; /* 76-79: Last recno allocated. */ + u_int32_t re_len; /* 80-83: Fixed-length record length. */ + u_int32_t re_pad; /* 84-87: Fixed-length record pad. */ + u_int32_t rec_page; /* 88-91: Records Per Page. */ + u_int32_t page_ext; /* 92-95: Pages per extent */ + + /* + * Minimum page size is 128. + */ +} QMETA32; + +/* Structures from the 3.0 release */ + +typedef struct _dbmeta30 { + DB_LSN lsn; /* 00-07: LSN. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + u_int32_t magic; /* 12-15: Magic number. */ + u_int32_t version; /* 16-19: Version. */ + u_int32_t pagesize; /* 20-23: Pagesize. */ + u_int8_t unused1[1]; /* 24: Unused. */ + u_int8_t type; /* 25: Page type. */ + u_int8_t unused2[2]; /* 26-27: Unused. */ + u_int32_t free; /* 28-31: Free list page number. */ + u_int32_t flags; /* 32-35: Flags: unique to each AM. */ + /* 36-55: Unique file ID. */ + u_int8_t uid[DB_FILE_ID_LEN]; +} DBMETA30; + +/************************************************************************ + BTREE METADATA PAGE LAYOUT + ************************************************************************/ +typedef struct _btmeta30 { + DBMETA30 dbmeta; /* 00-55: Generic meta-data header. */ + + u_int32_t maxkey; /* 56-59: Btree: Maxkey. */ + u_int32_t minkey; /* 60-63: Btree: Minkey. */ + u_int32_t re_len; /* 64-67: Recno: fixed-length record length. */ + u_int32_t re_pad; /* 68-71: Recno: fixed-length record pad. */ + u_int32_t root; /* 72-75: Root page. */ + + /* + * Minimum page size is 128. + */ +} BTMETA30; + +/************************************************************************ + HASH METADATA PAGE LAYOUT + ************************************************************************/ +typedef struct _hashmeta30 { + DBMETA30 dbmeta; /* 00-55: Generic meta-data page header. */ + + u_int32_t max_bucket; /* 56-59: ID of Maximum bucket in use */ + u_int32_t high_mask; /* 60-63: Modulo mask into table */ + u_int32_t low_mask; /* 64-67: Modulo mask into table lower half */ + u_int32_t ffactor; /* 68-71: Fill factor */ + u_int32_t nelem; /* 72-75: Number of keys in hash table */ + u_int32_t h_charkey; /* 76-79: Value of hash(CHARKEY) */ +#define NCACHED30 32 /* number of spare points */ + /* 80-207: Spare pages for overflow */ + u_int32_t spares[NCACHED30]; + + /* + * Minimum page size is 256. + */ +} HMETA30; + +/************************************************************************ + QUEUE METADATA PAGE LAYOUT + ************************************************************************/ +/* + * QAM Meta data page structure + * + */ +typedef struct _qmeta30 { + DBMETA30 dbmeta; /* 00-55: Generic meta-data header. */ + + u_int32_t start; /* 56-59: Start offset. */ + u_int32_t first_recno; /* 60-63: First not deleted record. */ + u_int32_t cur_recno; /* 64-67: Last recno allocated. */ + u_int32_t re_len; /* 68-71: Fixed-length record length. */ + u_int32_t re_pad; /* 72-75: Fixed-length record pad. */ + u_int32_t rec_page; /* 76-79: Records Per Page. */ + + /* + * Minimum page size is 128. + */ +} QMETA30; + +/* Structures from Release 2.x */ + +/************************************************************************ + BTREE METADATA PAGE LAYOUT + ************************************************************************/ + +/* + * Btree metadata page layout: + */ +typedef struct _btmeta2X { + DB_LSN lsn; /* 00-07: LSN. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + u_int32_t magic; /* 12-15: Magic number. */ + u_int32_t version; /* 16-19: Version. */ + u_int32_t pagesize; /* 20-23: Pagesize. */ + u_int32_t maxkey; /* 24-27: Btree: Maxkey. */ + u_int32_t minkey; /* 28-31: Btree: Minkey. */ + u_int32_t free; /* 32-35: Free list page number. */ + u_int32_t flags; /* 36-39: Flags. */ + u_int32_t re_len; /* 40-43: Recno: fixed-length record length. */ + u_int32_t re_pad; /* 44-47: Recno: fixed-length record pad. */ + /* 48-67: Unique file ID. */ + u_int8_t uid[DB_FILE_ID_LEN]; +} BTMETA2X; + +/************************************************************************ + HASH METADATA PAGE LAYOUT + ************************************************************************/ + +/* + * Hash metadata page layout: + */ +/* Hash Table Information */ +typedef struct hashhdr { /* Disk resident portion */ + DB_LSN lsn; /* 00-07: LSN of the header page */ + db_pgno_t pgno; /* 08-11: Page number (btree compatibility). */ + u_int32_t magic; /* 12-15: Magic NO for hash tables */ + u_int32_t version; /* 16-19: Version ID */ + u_int32_t pagesize; /* 20-23: Bucket/Page Size */ + u_int32_t ovfl_point; /* 24-27: Overflow page allocation location */ + u_int32_t last_freed; /* 28-31: Last freed overflow page pgno */ + u_int32_t max_bucket; /* 32-35: ID of Maximum bucket in use */ + u_int32_t high_mask; /* 36-39: Modulo mask into table */ + u_int32_t low_mask; /* 40-43: Modulo mask into table lower half */ + u_int32_t ffactor; /* 44-47: Fill factor */ + u_int32_t nelem; /* 48-51: Number of keys in hash table */ + u_int32_t h_charkey; /* 52-55: Value of hash(CHARKEY) */ + u_int32_t flags; /* 56-59: Allow duplicates. */ +#define NCACHED2X 32 /* number of spare points */ + /* 60-187: Spare pages for overflow */ + u_int32_t spares[NCACHED2X]; + /* 188-207: Unique file ID. */ + u_int8_t uid[DB_FILE_ID_LEN]; + + /* + * Minimum page size is 256. + */ +} HASHHDR; + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_UPGRADE_H_ */ diff --git a/src/dbinc/db_verify.h b/src/dbinc/db_verify.h new file mode 100644 index 00000000..99e6c866 --- /dev/null +++ b/src/dbinc/db_verify.h @@ -0,0 +1,209 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_VERIFY_H_ +#define _DB_VERIFY_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Structures and macros for the storage and retrieval of all information + * needed for inter-page verification of a database. + */ + +/* + * EPRINT is the macro for error printing. Takes as an arg the arg set + * for DB->err. + */ +#define EPRINT(x) do { \ + if (!LF_ISSET(DB_SALVAGE)) \ + __db_errx x; \ +} while (0) + +/* Complain about a totally zeroed page where we don't expect one. */ +#define ZEROPG_ERR_PRINT(dbenv, pgno, str) do { \ + EPRINT(((dbenv), DB_STR_A("0501", \ + "Page %lu: %s is of inappropriate type %lu", "%lu %s %lu"), \ + (u_long)(pgno), str, (u_long)P_INVALID)); \ + EPRINT(((dbenv), DB_STR_A("0502", \ + "Page %lu: totally zeroed page", \ + "%lu"), (u_long)(pgno))); \ +} while (0) + +/* + * Note that 0 is, in general, a valid pgno, despite equaling PGNO_INVALID; + * we have to test it separately where it's not appropriate. + */ +#define IS_VALID_PGNO(x) ((x) <= vdp->last_pgno) + +/* + * VRFY_DBINFO is the fundamental structure; it either represents the database + * of subdatabases, or the sole database if there are no subdatabases. + */ +struct __vrfy_dbinfo { + DB_THREAD_INFO *thread_info; + /* Info about this database in particular. */ + DBTYPE type; + + /* List of subdatabase meta pages, if any. */ + LIST_HEAD(__subdbs, __vrfy_childinfo) subdbs; + + /* Transaction handle for CDS group. */ + DB_TXN *txn; + + /* File-global info--stores VRFY_PAGEINFOs for each page. */ + DB *pgdbp; + + /* Child database--stores VRFY_CHILDINFOs of each page. */ + DB *cdbp; + + /* Page info structures currently in use. */ + LIST_HEAD(__activepips, __vrfy_pageinfo) activepips; + + /* + * DB we use to keep track of which pages are linked somehow + * during verification. 0 is the default, "unseen"; 1 is seen. + */ + DB *pgset; + + /* + * This is a database we use during salvaging to keep track of which + * overflow and dup pages we need to come back to at the end and print + * with key "UNKNOWN". Pages which print with a good key get set + * to SALVAGE_IGNORE; others get set, as appropriate, to SALVAGE_LDUP, + * SALVAGE_LRECNODUP, SALVAGE_OVERFLOW for normal db overflow pages, + * and SALVAGE_BTREE, SALVAGE_LRECNO, and SALVAGE_HASH for subdb + * pages. + */ +#define SALVAGE_INVALID 0 +#define SALVAGE_IGNORE 1 +#define SALVAGE_LDUP 2 +#define SALVAGE_IBTREE 3 +#define SALVAGE_OVERFLOW 4 +#define SALVAGE_LBTREE 5 +#define SALVAGE_HASH 6 +#define SALVAGE_LRECNO 7 +#define SALVAGE_LRECNODUP 8 + DB *salvage_pages; + + db_pgno_t last_pgno; + db_pgno_t meta_last_pgno; + db_pgno_t pgs_remaining; /* For dbp->db_feedback(). */ + + /* + * These are used during __bam_vrfy_subtree to keep track, while + * walking up and down the Btree structure, of the prev- and next-page + * chain of leaf pages and verify that it's intact. Also, make sure + * that this chain contains pages of only one type. + */ + db_pgno_t prev_pgno; + db_pgno_t next_pgno; + u_int8_t leaf_type; + + /* Queue needs these to verify data pages in the first pass. */ + u_int32_t re_pad; /* Record pad character. */ + u_int32_t re_len; /* Record length. */ + u_int32_t rec_page; + u_int32_t page_ext; + u_int32_t first_recno; + u_int32_t last_recno; + int nextents; + db_pgno_t *extents; + +#define SALVAGE_PRINTABLE 0x01 /* Output printable chars literally. */ +#define SALVAGE_PRINTHEADER 0x02 /* Print the unknown-key header. */ +#define SALVAGE_PRINTFOOTER 0x04 /* Print the unknown-key footer. */ +#define SALVAGE_HASSUBDBS 0x08 /* There are subdatabases to salvage. */ +#define VRFY_LEAFCHAIN_BROKEN 0x10 /* Lost one or more Btree leaf pgs. */ +#define VRFY_QMETA_SET 0x20 /* We've seen a QUEUE meta page and + set things up for it. */ + u_int32_t flags; +}; /* VRFY_DBINFO */ + +/* + * The amount of state information we need per-page is small enough that + * it's not worth the trouble to define separate structures for each + * possible type of page, and since we're doing verification with these we + * have to be open to the possibility that page N will be of a completely + * unexpected type anyway. So we define one structure here with all the + * info we need for inter-page verification. + */ +struct __vrfy_pageinfo { + u_int8_t type; + u_int8_t bt_level; + u_int8_t unused1; + u_int8_t unused2; + db_pgno_t pgno; + db_pgno_t prev_pgno; + db_pgno_t next_pgno; + + /* meta pages */ + db_pgno_t root; + db_pgno_t free; /* Free list head. */ + + db_indx_t entries; /* Actual number of entries. */ + u_int16_t unused; + db_recno_t rec_cnt; /* Record count. */ + u_int32_t re_pad; /* Record pad character. */ + u_int32_t re_len; /* Record length. */ + u_int32_t bt_minkey; + u_int32_t h_ffactor; + u_int32_t h_nelem; + + /* overflow pages */ + /* + * Note that refcount is the refcount for an overflow page; pi_refcount + * is this structure's own refcount! + */ + u_int32_t refcount; + u_int32_t olen; + +#define VRFY_DUPS_UNSORTED 0x0001 /* Have to flag the negative! */ +#define VRFY_HAS_CHKSUM 0x0002 +#define VRFY_HAS_DUPS 0x0004 +#define VRFY_HAS_DUPSORT 0x0008 /* Has the flag set. */ +#define VRFY_HAS_PART_RANGE 0x0010 /* Has the flag set. */ +#define VRFY_HAS_PART_CALLBACK 0x0020 /* Has the flag set. */ +#define VRFY_HAS_RECNUMS 0x0040 +#define VRFY_HAS_SUBDBS 0x0080 +#define VRFY_INCOMPLETE 0x0100 /* Meta or item order checks incomp. */ +#define VRFY_IS_ALLZEROES 0x0200 /* Hash page we haven't touched? */ +#define VRFY_IS_FIXEDLEN 0x0400 +#define VRFY_IS_RECNO 0x0800 +#define VRFY_IS_RRECNO 0x1000 +#define VRFY_OVFL_LEAFSEEN 0x2000 +#define VRFY_HAS_COMPRESS 0x4000 + u_int32_t flags; + + LIST_ENTRY(__vrfy_pageinfo) links; + u_int32_t pi_refcount; +}; /* VRFY_PAGEINFO */ + +struct __vrfy_childinfo { + /* The following fields are set by the caller of __db_vrfy_childput. */ + db_pgno_t pgno; + +#define V_DUPLICATE 1 /* off-page dup metadata */ +#define V_OVERFLOW 2 /* overflow page */ +#define V_RECNO 3 /* btree internal or leaf page */ + u_int32_t type; + db_recno_t nrecs; /* record count on a btree subtree */ + u_int32_t tlen; /* ovfl. item total size */ + + /* The following field is maintained by __db_vrfy_childput. */ + u_int32_t refcnt; /* # of times parent points to child. */ + + LIST_ENTRY(__vrfy_childinfo) links; +}; /* VRFY_CHILDINFO */ + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_VERIFY_H_ */ diff --git a/src/dbinc/debug.h b/src/dbinc/debug.h new file mode 100644 index 00000000..4950821c --- /dev/null +++ b/src/dbinc/debug.h @@ -0,0 +1,283 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_DEBUG_H_ +#define _DB_DEBUG_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Turn on additional error checking in gcc 3.X. + */ +#if !defined(__GNUC__) || __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) +#define __attribute__(s) +#endif + +/* + * When running with #DIAGNOSTIC defined, we smash memory and do memory + * guarding with a special byte value. + */ +#define CLEAR_BYTE 0xdb +#define GUARD_BYTE 0xdc + +/* + * DB assertions. + * + * Use __STDC__ rather than STDC_HEADERS, the #e construct is ANSI C specific. + */ +#if defined(DIAGNOSTIC) && defined(__STDC__) +#define DB_ASSERT(env, e) \ + ((e) ? (void)0 : __db_assert(env, #e, __FILE__, __LINE__)) +#else +#define DB_ASSERT(env, e) NOP_STATEMENT +#endif + +/* + * "Shut that bloody compiler up!" + * + * Unused, or not-used-yet variable. We need to write and then read the + * variable, some compilers are too bloody clever by half. + */ +#define COMPQUIET(n, v) do { \ + (n) = (v); \ + (n) = (n); \ +} while (0) + +/* + * Purify and other run-time tools complain about uninitialized reads/writes + * of structure fields whose only purpose is padding, as well as when heap + * memory that was never initialized is written to disk. + */ +#ifdef UMRW +#define UMRW_SET(v) (v) = 0 +#else +#define UMRW_SET(v) NOP_STATEMENT +#endif + +/* + * Errors are in one of two areas: a Berkeley DB error, or a system-level + * error. We use db_strerror to translate the former and __os_strerror to + * translate the latter. + */ +typedef enum { + DB_ERROR_NOT_SET=0, + DB_ERROR_SET=1, + DB_ERROR_SYSTEM=2 +} db_error_set_t; + +/* + * Message handling. Use a macro instead of a function because va_list + * references to variadic arguments cannot be reset to the beginning of the + * variadic argument list (and then rescanned), by functions other than the + * original routine that took the variadic list of arguments. + */ +#if defined(STDC_HEADERS) || defined(__cplusplus) +#define DB_REAL_ERR(dbenv, error, error_set, app_call, fmt) { \ + va_list __ap; \ + \ + /* Call the application's callback function, if specified. */ \ + va_start(__ap, fmt); \ + if ((dbenv) != NULL && (dbenv)->db_errcall != NULL) \ + __db_errcall(dbenv, error, error_set, fmt, __ap); \ + va_end(__ap); \ + \ + /* \ + * If the application specified a file descriptor, write to it. \ + * If we wrote to neither the application's callback routine or \ + * its file descriptor, and it's an application error message \ + * using {DbEnv,Db}.{err,errx} or the application has never \ + * configured an output channel, default by writing to stderr. \ + */ \ + va_start(__ap, fmt); \ + if ((dbenv) == NULL || \ + (dbenv)->db_errfile != NULL || \ + ((dbenv)->db_errcall == NULL && \ + ((app_call) || F_ISSET((dbenv)->env, ENV_NO_OUTPUT_SET)))) \ + __db_errfile(dbenv, error, error_set, fmt, __ap); \ + va_end(__ap); \ +} +#else +#define DB_REAL_ERR(dbenv, error, error_set, app_call, fmt) { \ + va_list __ap; \ + \ + /* Call the application's callback function, if specified. */ \ + va_start(__ap); \ + if ((dbenv) != NULL && (dbenv)->db_errcall != NULL) \ + __db_errcall(dbenv, error, error_set, fmt, __ap); \ + va_end(__ap); \ + \ + /* \ + * If the application specified a file descriptor, write to it. \ + * If we wrote to neither the application's callback routine or \ + * its file descriptor, and it's an application error message \ + * using {DbEnv,Db}.{err,errx} or the application has never \ + * configured an output channel, default by writing to stderr. \ + */ \ + va_start(__ap); \ + if ((dbenv) == NULL || \ + (dbenv)->db_errfile != NULL || \ + ((dbenv)->db_errcall == NULL && \ + ((app_call) || F_ISSET((dbenv)->env, ENV_NO_OUTPUT_SET)))) \ + __db_errfile(env, error, error_set, fmt, __ap); \ + va_end(__ap); \ +} +#endif +#if defined(STDC_HEADERS) || defined(__cplusplus) +#define DB_REAL_MSG(dbenv, fmt) { \ + va_list __ap; \ + \ + /* Call the application's callback function, if specified. */ \ + va_start(__ap, fmt); \ + if ((dbenv) != NULL && (dbenv)->db_msgcall != NULL) \ + __db_msgcall(dbenv, fmt, __ap); \ + va_end(__ap); \ + \ + /* \ + * If the application specified a file descriptor, write to it. \ + * If we wrote to neither the application's callback routine or \ + * its file descriptor, write to stdout. \ + */ \ + va_start(__ap, fmt); \ + if ((dbenv) == NULL || \ + (dbenv)->db_msgfile != NULL || \ + (dbenv)->db_msgcall == NULL) { \ + __db_msgfile(dbenv, fmt, __ap); \ + } \ + va_end(__ap); \ +} +#else +#define DB_REAL_MSG(dbenv, fmt) { \ + va_list __ap; \ + \ + /* Call the application's callback function, if specified. */ \ + va_start(__ap); \ + if ((dbenv) != NULL && (dbenv)->db_msgcall != NULL) \ + __db_msgcall(dbenv, fmt, __ap); \ + va_end(__ap); \ + \ + /* \ + * If the application specified a file descriptor, write to it. \ + * If we wrote to neither the application's callback routine or \ + * its file descriptor, write to stdout. \ + */ \ + va_start(__ap); \ + if ((dbenv) == NULL || \ + (dbenv)->db_msgfile != NULL || \ + (dbenv)->db_msgcall == NULL) { \ + __db_msgfile(dbenv, fmt, __ap); \ + } \ + va_end(__ap); \ +} +#endif + +/* + * Debugging macro to log operations. + * If DEBUG_WOP is defined, log operations that modify the database. + * If DEBUG_ROP is defined, log operations that read the database. + * + * D dbp + * T txn + * O operation (string) + * K key + * A data + * F flags + */ +#define LOG_OP(C, T, O, K, A, F) { \ + DB_LSN __lsn; \ + DBT __op; \ + if (DBC_LOGGING((C))) { \ + memset(&__op, 0, sizeof(__op)); \ + __op.data = O; \ + __op.size = (u_int32_t)strlen(O) + 1; \ + (void)__db_debug_log((C)->env, T, &__lsn, 0, \ + &__op, (C)->dbp->log_filename->id, K, A, F); \ + } \ +} +#ifdef DEBUG_ROP +#define DEBUG_LREAD(C, T, O, K, A, F) LOG_OP(C, T, O, K, A, F) +#else +#define DEBUG_LREAD(C, T, O, K, A, F) +#endif +#ifdef DEBUG_WOP +#define DEBUG_LWRITE(C, T, O, K, A, F) LOG_OP(C, T, O, K, A, F) +#else +#define DEBUG_LWRITE(C, T, O, K, A, F) +#endif + +/* + * Hook for testing recovery at various places in the create/delete paths. + * Hook for testing subdb locks. + */ +#if CONFIG_TEST +#define DB_TEST_SUBLOCKS(env, flags) do { \ + if ((env)->test_abort == DB_TEST_SUBDB_LOCKS) \ + (flags) |= DB_LOCK_NOWAIT; \ +} while (0) + +#define DB_ENV_TEST_RECOVERY(env, val, ret, name) do { \ + int __ret; \ + PANIC_CHECK((env)); \ + if ((env)->test_copy == (val)) { \ + /* COPY the FILE */ \ + if ((__ret = __db_testcopy((env), NULL, (name))) != 0) \ + (ret) = __env_panic((env), __ret); \ + } \ + if ((env)->test_abort == (val)) { \ + /* ABORT the TXN */ \ + (env)->test_abort = 0; \ + (ret) = EINVAL; \ + goto db_tr_err; \ + } \ +} while (0) + +#define DB_TEST_RECOVERY(dbp, val, ret, name) do { \ + ENV *__env = (dbp)->env; \ + int __ret; \ + PANIC_CHECK(__env); \ + if (__env->test_copy == (val)) { \ + /* Copy the file. */ \ + if (F_ISSET((dbp), \ + DB_AM_OPEN_CALLED) && (dbp)->mpf != NULL) \ + (void)__db_sync(dbp); \ + if ((__ret = \ + __db_testcopy(__env, (dbp), (name))) != 0) \ + (ret) = __env_panic(__env, __ret); \ + } \ + if (__env->test_abort == (val)) { \ + /* Abort the transaction. */ \ + __env->test_abort = 0; \ + (ret) = EINVAL; \ + goto db_tr_err; \ + } \ +} while (0) + +#define DB_TEST_RECOVERY_LABEL db_tr_err: + +#define DB_TEST_SET(field, val) do { \ + if (field == (val)) \ + goto db_tr_err; \ +} while (0) + +#define DB_TEST_WAIT(env, val) \ + if ((val) != 0) \ + __os_yield((env), (u_long)(val), 0) +#else +#define DB_TEST_SUBLOCKS(env, flags) +#define DB_ENV_TEST_RECOVERY(env, val, ret, name) +#define DB_TEST_RECOVERY(dbp, val, ret, name) +#define DB_TEST_RECOVERY_LABEL +#define DB_TEST_SET(env, val) +#define DB_TEST_WAIT(env, val) +#endif + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_DEBUG_H_ */ diff --git a/src/dbinc/fop.h b/src/dbinc/fop.h new file mode 100644 index 00000000..34b4d45d --- /dev/null +++ b/src/dbinc/fop.h @@ -0,0 +1,32 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_FOP_H_ +#define _DB_FOP_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +#define MAKE_INMEM(D) do { \ + F_SET((D), DB_AM_INMEM); \ + (void)__memp_set_flags((D)->mpf, DB_MPOOL_NOFILE, 1); \ +} while (0) + +#define CLR_INMEM(D) do { \ + F_CLR((D), DB_AM_INMEM); \ + (void)__memp_set_flags((D)->mpf, DB_MPOOL_NOFILE, 0); \ +} while (0) + +#include "dbinc_auto/fileops_auto.h" +#include "dbinc_auto/fileops_ext.h" + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_FOP_H_ */ diff --git a/src/dbinc/globals.h b/src/dbinc/globals.h new file mode 100644 index 00000000..d798a7e9 --- /dev/null +++ b/src/dbinc/globals.h @@ -0,0 +1,103 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_GLOBALS_H_ +#define _DB_GLOBALS_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/******************************************************* + * Global variables. + * + * Held in a single structure to minimize the name-space pollution. + *******************************************************/ +#ifdef HAVE_VXWORKS +#include "semLib.h" +#endif + +typedef struct __db_globals { +#ifdef HAVE_VXWORKS + u_int32_t db_global_init; /* VxWorks: inited */ + SEM_ID db_global_lock; /* VxWorks: global semaphore */ +#endif +#ifdef DB_WIN32 +#ifndef DB_WINCE + /* + * These fields are used by the Windows implementation of mutexes. + * Usually they are initialized by the first DB API call to lock a + * mutex. If that would result in the mutexes being inaccessible by + * other threads (e.g., ones which have lesser privileges) the + * application may first call db_env_set_win_security(). + */ + SECURITY_DESCRIPTOR win_default_sec_desc; + SECURITY_ATTRIBUTES win_default_sec_attr; +#endif + SECURITY_ATTRIBUTES *win_sec_attr; +#endif + + /* TAILQ_HEAD(__envq, __dbenv) envq; */ + struct __envq { + struct __env *tqh_first; + struct __env **tqh_last; + } envq; + + char *db_line; /* DB display string. */ + + char error_buf[40]; /* Error string buffer. */ + + int uid_init; /* srand set in UID generator */ + + u_long rand_next; /* rand/srand value */ + + u_int32_t fid_serial; /* file id counter */ + + int db_errno; /* Errno value if not available */ + + size_t num_active_pids; /* number of entries in active_pids */ + + size_t size_active_pids; /* allocated size of active_pids */ + + pid_t *active_pids; /* array active pids */ + + /* Underlying OS interface jump table.*/ + void (*j_assert) __P((const char *, const char *, int)); + int (*j_close) __P((int)); + void (*j_dirfree) __P((char **, int)); + int (*j_dirlist) __P((const char *, char ***, int *)); + int (*j_exists) __P((const char *, int *)); + void (*j_free) __P((void *)); + int (*j_fsync) __P((int)); + int (*j_ftruncate) __P((int, off_t)); + int (*j_ioinfo) __P((const char *, + int, u_int32_t *, u_int32_t *, u_int32_t *)); + void *(*j_malloc) __P((size_t)); + int (*j_file_map) __P((DB_ENV *, char *, size_t, int, void **)); + int (*j_file_unmap) __P((DB_ENV *, void *)); + int (*j_open) __P((const char *, int, ...)); + ssize_t (*j_pread) __P((int, void *, size_t, off_t)); + ssize_t (*j_pwrite) __P((int, const void *, size_t, off_t)); + ssize_t (*j_read) __P((int, void *, size_t)); + void *(*j_realloc) __P((void *, size_t)); + int (*j_region_map) __P((DB_ENV *, char *, size_t, int *, void **)); + int (*j_region_unmap) __P((DB_ENV *, void *)); + int (*j_rename) __P((const char *, const char *)); + int (*j_seek) __P((int, off_t, int)); + int (*j_unlink) __P((const char *)); + ssize_t (*j_write) __P((int, const void *, size_t)); + int (*j_yield) __P((u_long, u_long)); +} DB_GLOBALS; + +extern DB_GLOBALS __db_global_values; +#define DB_GLOBAL(v) __db_global_values.v + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_GLOBALS_H_ */ diff --git a/src/dbinc/hash.h b/src/dbinc/hash.h new file mode 100644 index 00000000..72f1e644 --- /dev/null +++ b/src/dbinc/hash.h @@ -0,0 +1,173 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#ifndef _DB_HASH_H_ +#define _DB_HASH_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Hash internal structure. */ +typedef struct hash_t { + db_pgno_t meta_pgno; /* Page number of the meta data page. */ + u_int32_t revision; /* Revision of subdb metadata. */ + u_int32_t h_ffactor; /* Fill factor. */ + u_int32_t h_nelem; /* Number of elements. */ + /* Hash and compare functions. */ + u_int32_t (*h_hash) __P((DB *, const void *, u_int32_t)); + int (*h_compare) __P((DB *, const DBT *, const DBT *)); +} HASH; + +/* Cursor structure definitions. */ +typedef struct cursor_t { + /* struct __dbc_internal */ + __DBC_INTERNAL + + /* Hash private part */ + + /* Per-thread information */ + DB_LOCK hlock; /* Metadata page lock. */ + HMETA *hdr; /* Pointer to meta-data page. */ + PAGE *split_buf; /* Temporary buffer for splits. */ + + /* Hash cursor information */ + db_pgno_t bucket; /* Bucket we are traversing. */ + db_pgno_t lbucket; /* Bucket for which we are locked. */ + db_indx_t dup_off; /* Offset within a duplicate set. */ + db_indx_t dup_len; /* Length of current duplicate. */ + db_indx_t dup_tlen; /* Total length of duplicate entry. */ + u_int32_t seek_size; /* Number of bytes we need for add. */ + db_pgno_t seek_found_page;/* Page on which we can insert. */ + db_indx_t seek_found_indx;/* Insert position for item. */ + u_int32_t order; /* Relative order among deleted curs. */ + +#define H_CONTINUE 0x0001 /* Join--search strictly fwd for data */ +#define H_CONTRACT 0x0002 /* Table contracted.*/ +#define H_DELETED 0x0004 /* Cursor item is deleted. */ +#define H_DUPONLY 0x0008 /* Dups only; do not change key. */ +#define H_EXPAND 0x0010 /* Table expanded. */ +#define H_ISDUP 0x0020 /* Cursor is within duplicate set. */ +#define H_NEXT_NODUP 0x0040 /* Get next non-dup entry. */ +#define H_NOMORE 0x0080 /* No more entries in bucket. */ +#define H_OK 0x0100 /* Request succeeded. */ + u_int32_t flags; +} HASH_CURSOR; + +/* Test string. */ +#define CHARKEY "%$sniglet^&" + +/* Overflow management */ +/* + * The spares table indicates the page number at which each doubling begins. + * From this page number we subtract the number of buckets already allocated + * so that we can do a simple addition to calculate the page number here. + */ +#define BS_TO_PAGE(bucket, spares) \ + ((bucket) + (spares)[__db_log2((bucket) + 1)]) +#define BUCKET_TO_PAGE(I, B) (BS_TO_PAGE((B), (I)->hdr->spares)) + +/* Constraints about much data goes on a page. */ + +#define MINFILL 4 +#define ISBIG(I, N) (((N) > ((I)->hdr->dbmeta.pagesize / MINFILL)) ? 1 : 0) + +/* Shorthands for accessing structure */ +#define NDX_INVALID 0xFFFF +#define BUCKET_INVALID 0xFFFFFFFF + +/* On page duplicates are stored as a string of size-data-size triples. */ +#define DUP_SIZE(len) ((len) + 2 * sizeof(db_indx_t)) + +/* Log messages types (these are subtypes within a record type) */ +/* These bits are obsolete and are only needed for down rev logs. */ +#define PAIR_KEYMASK 0x1 +#define PAIR_DATAMASK 0x2 +#define PAIR_DUPMASK 0x4 +#define PAIR_MASK 0xf +#define PAIR_ISKEYBIG(N) (N & PAIR_KEYMASK) +#define PAIR_ISDATABIG(N) (N & PAIR_DATAMASK) +#define PAIR_ISDATADUP(N) (N & PAIR_DUPMASK) +#define OPCODE_OF(N) (N & ~PAIR_MASK) + +/* Operators for hash recover routines. */ +#define PUTPAIR 0x20 +#define DELPAIR 0x30 +#define PUTOVFL 0x40 +#define DELOVFL 0x50 +#define HASH_UNUSED1 0x60 +#define HASH_UNUSED2 0x70 +#define SPLITOLD 0x80 +#define SPLITNEW 0x90 +#define SORTPAGE 0x100 + +/* Flags to control behavior of __ham_del_pair */ +#define HAM_DEL_NO_CURSOR 0x01 /* Don't do any cursor adjustment */ +#define HAM_DEL_NO_RECLAIM 0x02 /* Don't reclaim empty pages */ +/* Just delete onpage items (even if they are references to off-page items). */ +#define HAM_DEL_IGNORE_OFFPAGE 0x04 + +typedef enum { + DB_HAM_CURADJ_DEL = 1, + DB_HAM_CURADJ_ADD = 2, + DB_HAM_CURADJ_ADDMOD = 3, + DB_HAM_CURADJ_DELMOD = 4 +} db_ham_curadj; + +typedef enum { + DB_HAM_CHGPG = 1, + DB_HAM_DELFIRSTPG = 2, + DB_HAM_DELMIDPG = 3, + DB_HAM_DELLASTPG = 4, + DB_HAM_DUP = 5, + DB_HAM_SPLIT = 6 +} db_ham_mode; + +#if defined(__cplusplus) +} +#endif + +#include "dbinc_auto/hash_auto.h" +#include "dbinc_auto/hash_ext.h" +#include "dbinc/db_am.h" +#endif /* !_DB_HASH_H_ */ diff --git a/src/dbinc/heap.h b/src/dbinc/heap.h new file mode 100644 index 00000000..0771921a --- /dev/null +++ b/src/dbinc/heap.h @@ -0,0 +1,59 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _DB_HEAP_H_ +#define _DB_HEAP_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Forward structure declarations. */ +struct __heap; typedef struct __heap HEAP; +struct __heap_cursor; typedef struct __heap_cursor HEAP_CURSOR; + +/* + * The in-memory, per-heap data structure. + */ +struct __heap { /* Heap access method. */ + + u_int32_t gbytes; /* Initial heap size. */ + u_int32_t bytes; /* Initial heap size. */ + u_int32_t region_size; /* Size of each region. */ + + db_pgno_t curregion; /* The region of the next insert. */ + db_pgno_t maxpgno; /* Maximum page number of a fixed size heap. */ + int curpgindx; /* The last used offset in the region's space bitmap. */ +}; + +struct __heap_cursor { + /* struct __dbc_internal */ + __DBC_INTERNAL + + /* Heap private part */ + + u_int32_t flags; +}; + +#define HEAP_PG_FULL 3 /* No space on page. */ +#define HEAP_PG_GT66 2 /* Page greater than 66% full */ +#define HEAP_PG_GT33 1 /* Page greater than 33% full */ +#define HEAP_PG_LT33 0 /* Page less than 33% full */ + +#define HEAP_PG_FULL_PCT 5 /* Less than 5% of page is free. */ +#define HEAP_PG_GT66_PCT 33 /* Less than 33% of page is free. */ +#define HEAP_PG_GT33_PCT 66 /* Less than 66% of page is free. */ + +#if defined(__cplusplus) +} +#endif + +#include "dbinc_auto/heap_auto.h" +#include "dbinc_auto/heap_ext.h" +#include "dbinc/db_am.h" +#endif + + diff --git a/src/dbinc/hmac.h b/src/dbinc/hmac.h new file mode 100644 index 00000000..2d6bb8fb --- /dev/null +++ b/src/dbinc/hmac.h @@ -0,0 +1,39 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_HMAC_H_ +#define _DB_HMAC_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Algorithm specific information. + */ +/* + * SHA1 checksumming + */ +typedef struct { + u_int32_t state[5]; + u_int32_t count[2]; + unsigned char buffer[64]; +} SHA1_CTX; + +/* + * AES assumes the SHA1 checksumming (also called MAC) + */ +#define DB_MAC_MAGIC "mac derivation key magic value" +#define DB_ENC_MAGIC "encryption and decryption key value magic" + +#if defined(__cplusplus) +} +#endif + +#include "dbinc_auto/hmac_ext.h" +#endif /* !_DB_HMAC_H_ */ diff --git a/src/dbinc/lock.h b/src/dbinc/lock.h new file mode 100644 index 00000000..c94590f7 --- /dev/null +++ b/src/dbinc/lock.h @@ -0,0 +1,326 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_LOCK_H_ +#define _DB_LOCK_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DB_LOCK_DEFAULT_N 1000 /* Default # of locks in region. */ + +/* + * The locker id space is divided between the transaction manager and the lock + * manager. Lock IDs start at 1 and go to DB_LOCK_MAXID. Txn IDs start at + * DB_LOCK_MAXID + 1 and go up to TXN_MAXIMUM. + */ +#define DB_LOCK_INVALIDID 0 +#define DB_LOCK_MAXID 0x7fffffff + +/* + * A locker's deadlock resolution priority is stored as a 32 bit unsigned + * integer. The maximum priority is DB_LOCK_MAXPRIORITY and the default + * priorit is DB_LOCK_DEFPRIORITY. + */ +#define DB_LOCK_DEFPRIORITY 100 +#define DB_LOCK_MAXPRIORITY UINT32_MAX + +/* + * Out of band value for a lock. Locks contain an offset into a lock region, + * so we use an invalid region offset to indicate an invalid or unset lock. + */ +#define LOCK_INVALID INVALID_ROFF +#define LOCK_ISSET(lock) ((lock).off != LOCK_INVALID) +#define LOCK_INIT(lock) ((lock).off = LOCK_INVALID) + +/* + * Macro to identify a write lock for the purpose of counting locks + * for the NUMWRITES option to deadlock detection. + */ +#define IS_WRITELOCK(m) \ + ((m) == DB_LOCK_WRITE || (m) == DB_LOCK_WWRITE || \ + (m) == DB_LOCK_IWRITE || (m) == DB_LOCK_IWR) + +/* + * Macros to lock/unlock the lock region as a whole. Mostly used for + * initialization. + */ +#define LOCK_REGION_LOCK(env) \ + MUTEX_LOCK(env, ((DB_LOCKREGION *) \ + (env)->lk_handle->reginfo.primary)->mtx_region) +#define LOCK_REGION_UNLOCK(env) \ + MUTEX_UNLOCK(env, ((DB_LOCKREGION *) \ + (env)->lk_handle->reginfo.primary)->mtx_region) + +/* + * DB_LOCKREGION -- + * The lock shared region. + */ + +typedef struct __db_lockregion { /* SHARED */ + db_mutex_t mtx_region; /* Region mutex. */ + + u_int32_t need_dd; /* flag for deadlock detector */ + u_int32_t detect; /* run dd on every conflict */ + db_timespec next_timeout; /* next time to expire a lock */ + db_mutex_t mtx_dd; /* mutex for lock object dd list. */ + db_mutex_t mtx_lockers; /* mutex for locker allocation. */ + SH_TAILQ_HEAD(__dobj) dd_objs; /* objects with waiters */ + /* free locker header */ + roff_t locker_mem_off; /* block memory for lockers */ + SH_TAILQ_HEAD(__flocker) free_lockers; + SH_TAILQ_HEAD(__lkrs) lockers; /* list of lockers */ + + db_timeout_t lk_timeout; /* timeout for locks. */ + db_timeout_t tx_timeout; /* timeout for txns. */ + + u_int32_t locker_t_size; /* size of locker hash table */ + u_int32_t object_t_size; /* size of object hash table */ + u_int32_t part_t_size; /* number of partitions */ + + roff_t conf_off; /* offset of conflicts array */ + roff_t obj_off; /* offset of object hash table */ + roff_t part_off; /* offset of partition array */ + roff_t stat_off; /* offset to object hash stats */ + roff_t locker_off; /* offset of locker hash table */ + + u_int32_t lock_id; /* Current lock(er) id to allocate. */ + u_int32_t cur_maxid; /* Current max lock(er) id. */ + u_int32_t nlockers; /* Current number of lockers. */ + int32_t nmodes; /* Number of modes in conflict table. */ + DB_LOCK_STAT stat; /* stats about locking. */ +} DB_LOCKREGION; + +/* + * Since we will store DBTs in shared memory, we need the equivalent of a + * DBT that will work in shared memory. + */ +typedef struct __sh_dbt { /* SHARED */ + u_int32_t size; /* Byte length. */ + roff_t off; /* Region offset. */ +} SH_DBT; + +#define SH_DBT_PTR(p) ((void *)(((u_int8_t *)(p)) + (p)->off)) + +/* + * Object structures; these live in the object hash table. + */ +typedef struct __db_lockobj { /* SHARED */ + u_int32_t indx; /* Hash index of this object. */ + u_int32_t generation; /* Generation of this object. */ + SH_DBT lockobj; /* Identifies object locked. */ + SH_TAILQ_ENTRY links; /* Links for free list or hash list. */ + SH_TAILQ_ENTRY dd_links; /* Links for dd list. */ + SH_TAILQ_HEAD(__waitl) waiters; /* List of waiting locks. */ + SH_TAILQ_HEAD(__holdl) holders; /* List of held locks. */ + /* Declare room in the object to hold + * typical DB lock structures so that + * we do not have to allocate them from + * shalloc at run-time. */ + u_int8_t objdata[sizeof(struct __db_ilock)]; +} DB_LOCKOBJ; + +/* + * Locker structures; these live in the locker hash table. + */ +struct __db_locker { /* SHARED */ + u_int32_t id; /* Locker id. */ + + pid_t pid; /* Process owning locker ID */ + db_threadid_t tid; /* Thread owning locker ID */ + db_mutex_t mtx_locker; /* Mutex to block on. */ + + u_int32_t dd_id; /* Deadlock detector id. */ + + u_int32_t nlocks; /* Number of locks held. */ + u_int32_t nwrites; /* Number of write locks held. */ + u_int32_t priority; /* Deadlock resolution priority. */ + u_int32_t nrequest; /* number of requests. */ + + roff_t master_locker; /* Locker of master transaction. */ + roff_t parent_locker; /* Parent of this child. */ + SH_LIST_HEAD(_child) child_locker; /* List of descendant txns; + only used in a "master" + txn. */ + SH_LIST_ENTRY child_link; /* Links transactions in the family; + elements of the child_locker + list. */ + SH_TAILQ_ENTRY links; /* Links for free and hash list. */ + SH_TAILQ_ENTRY ulinks; /* Links in-use list. */ + SH_LIST_HEAD(_held) heldby; /* Locks held by this locker. */ + db_timespec lk_expire; /* When current lock expires. */ + db_timespec tx_expire; /* When this txn expires. */ + db_timeout_t lk_timeout; /* How long do we let locks live. */ + +#define DB_LOCKER_DIRTY 0x0001 /* Has write locks. */ +#define DB_LOCKER_INABORT 0x0002 /* Is aborting, don't abort again. */ +#define DB_LOCKER_TIMEOUT 0x0004 /* Has timeout set. */ +#define DB_LOCKER_FAMILY_LOCKER 0x0008 /* Part of a family of lockers. */ +#define DB_LOCKER_HANDLE_LOCKER 0x0010 /* Not associated with a thread. */ + u_int32_t flags; +}; + +/* + * Map a hash index into a partition. + */ +#define LOCK_PART(reg, ndx) (ndx % (reg)->part_t_size) + +/* + * Structure that contains information about a lock table partition. + */ +typedef struct __db_lockpart{ /* SHARED */ + db_mutex_t mtx_part; /* mutex for partition*/ + /* free lock header */ + SH_TAILQ_HEAD(__flock) free_locks; + /* free obj header */ + SH_TAILQ_HEAD(__fobj) free_objs; + roff_t lock_mem_off; /* block memory for locks */ + roff_t lockobj_mem_off;/* block memory for lockobjs */ +#ifdef HAVE_STATISTICS + DB_LOCK_PSTAT part_stat; /* Partition stats. */ +#endif +} DB_LOCKPART; + +#define FREE_LOCKS(lt, part) ((lt)->part_array[part].free_locks) +#define FREE_OBJS(lt, part) ((lt)->part_array[part].free_objs) + +/* + * DB_LOCKTAB -- + * The primary library lock data structure (i.e., the one referenced + * by the environment, as opposed to the internal one laid out in the region.) + */ +struct __db_locktab { + ENV *env; /* Environment. */ + REGINFO reginfo; /* Region information. */ + u_int8_t *conflicts; /* Pointer to conflict matrix. */ + DB_LOCKPART *part_array; /* Beginning of partition array. */ +#ifdef HAVE_STATISTICS + DB_LOCK_HSTAT *obj_stat; /* Object hash stats array. */ +#endif + DB_HASHTAB *obj_tab; /* Beginning of object hash table. */ + DB_HASHTAB *locker_tab; /* Beginning of locker hash table. */ +}; + +/* + * Test for conflicts. + * + * Cast HELD and WANTED to ints, they are usually db_lockmode_t enums. + */ +#define CONFLICTS(T, R, HELD, WANTED) \ + (T)->conflicts[((int)HELD) * (R)->nmodes + ((int)WANTED)] + +#define OBJ_LINKS_VALID(L) ((L)->links.stqe_prev != -1) + +struct __db_lock { /* SHARED */ + /* + * Wait on mutex to wait on lock. You reference your own mutex with + * ID 0 and others reference your mutex with ID 1. + */ + db_mutex_t mtx_lock; + + roff_t holder; /* Who holds this lock. */ + u_int32_t gen; /* Generation count. */ + SH_TAILQ_ENTRY links; /* Free or holder/waiter list. */ + SH_LIST_ENTRY locker_links; /* List of locks held by a locker. */ + u_int32_t refcount; /* Reference count the lock. */ + db_lockmode_t mode; /* What sort of lock. */ + roff_t obj; /* Relative offset of object struct. */ + u_int32_t indx; /* Hash index of this object. */ + db_status_t status; /* Status of this lock. */ +}; + +/* + * Flag values for __lock_put_internal: + * DB_LOCK_DOALL: Unlock all references in this lock (instead of only 1). + * DB_LOCK_FREE: Free the lock (used in checklocker). + * DB_LOCK_NOPROMOTE: Don't bother running promotion when releasing locks + * (used by __lock_put_internal). + * DB_LOCK_UNLINK: Remove from the locker links (used in checklocker). + * Make sure that these do not conflict with the interface flags because + * we pass some of those around. + */ +#define DB_LOCK_DOALL 0x010000 +#define DB_LOCK_FREE 0x040000 +#define DB_LOCK_NOPROMOTE 0x080000 +#define DB_LOCK_UNLINK 0x100000 +#define DB_LOCK_ONEWAITER 0x400000 + +/* + * Macros to get/release different types of mutexes. + */ +/* + * Operations on lock objects must be protected by a mutex, either on their + * partition or on the lock region. Lock structures associated with that + * object are protected as well. Each partition has a free list of objects + * and lock structures protected by that mutex. We want to avoid getting + * multiple mutexes, particularly in __lock_vec, when there is only a + * single partition. If there is only one partition, then all the calls + * to LOCK_SYSTEM_LOCK(UNLOCK) actually acquire(release) a lock system + * wide mutex and MUTEX_LOCK(UNLOCK)_PARTITION are no-ops. If the number + * of partitions is greater than one, then LOCK_SYSTEM_LOCK(UNLOCK) is a + * no-op, and MUTEX_LOCK(UNLOCK)_PARTITION acquire a mutex on a particular + * partition of the lock table. + */ +#define LOCK_SYSTEM_LOCK(lt, reg) do { \ + if ((reg)->part_t_size == 1) \ + MUTEX_LOCK((lt)->env, (reg)->mtx_region); \ +} while (0) +#define LOCK_SYSTEM_UNLOCK(lt, reg) do { \ + if ((reg)->part_t_size == 1) \ + MUTEX_UNLOCK((lt)->env, (reg)->mtx_region); \ +} while (0) +#define MUTEX_LOCK_PARTITION(lt, reg, p) do { \ + if ((reg)->part_t_size != 1) \ + MUTEX_LOCK((lt)->env, (lt)->part_array[p].mtx_part); \ +} while (0) +#define MUTEX_UNLOCK_PARTITION(lt, reg, p) do { \ + if ((reg)->part_t_size != 1) \ + MUTEX_UNLOCK((lt)->env, (lt)->part_array[p].mtx_part); \ +} while (0) + +#define OBJECT_LOCK(lt, reg, obj, ndx) do { \ + ndx = __lock_ohash(obj) % (reg)->object_t_size; \ + MUTEX_LOCK_PARTITION(lt, reg, LOCK_PART(reg, ndx)); \ +} while (0) + +#define OBJECT_LOCK_NDX(lt, reg, ndx) \ + MUTEX_LOCK_PARTITION(lt, reg, LOCK_PART(reg, ndx)); + +#define OBJECT_UNLOCK(lt, reg, ndx) \ + MUTEX_UNLOCK_PARTITION(lt, reg, LOCK_PART(reg, ndx)); + +/* + * Protect the object deadlock detector queue and the locker allocation + * and active queues + */ +#define LOCK_DD(env, region) \ + MUTEX_LOCK(env, (region)->mtx_dd) +#define UNLOCK_DD(env, region) \ + MUTEX_UNLOCK(env, (region)->mtx_dd) +#define LOCK_LOCKERS(env, region) \ + MUTEX_LOCK(env, (region)->mtx_lockers) +#define UNLOCK_LOCKERS(env, region) \ + MUTEX_UNLOCK(env, (region)->mtx_lockers) + +/* + * __lock_locker_hash -- + * Hash function for entering lockers into the locker hash table. + * Since these are simply 32-bit unsigned integers at the moment, + * just return the locker value. + */ +#define __lock_locker_hash(locker) (locker) +#define LOCKER_HASH(lt, reg, locker, ndx) \ + ndx = __lock_locker_hash(locker) % (reg)->locker_t_size; + +#if defined(__cplusplus) +} +#endif + +#include "dbinc_auto/lock_ext.h" +#endif /* !_DB_LOCK_H_ */ diff --git a/src/dbinc/log.h b/src/dbinc/log.h new file mode 100644 index 00000000..912f61b1 --- /dev/null +++ b/src/dbinc/log.h @@ -0,0 +1,458 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_LOG_H_ +#define _DB_LOG_H_ + +#include "dbinc/db_swap.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/******************************************************* + * DBREG: + * The DB file register code keeps track of open files. It's stored + * in the log subsystem's shared region, and so appears in the log.h + * header file, but is logically separate. + * The dbp may not be open if we are recovering the abort of a create. + *******************************************************/ +/* + * The per-process table that maps log file-id's to DB structures. + */ +typedef struct __db_entry { + DB *dbp; /* Open dbp for this file id. */ + int deleted; /* File was not found during open. */ +} DB_ENTRY; + +/* + * FNAME -- + * File name and id. + */ +struct __fname { + SH_TAILQ_ENTRY q; /* File name queue. */ + + pid_t pid; /* Process that owns this. */ + int32_t id; /* Logging file id. */ + int32_t old_id; /* Saved logging file id. */ + DBTYPE s_type; /* Saved DB type. */ + + roff_t fname_off; /* File name offset. */ + roff_t dname_off; /* Database name offset. */ + db_pgno_t meta_pgno; /* Page number of the meta page. */ + u_int8_t ufid[DB_FILE_ID_LEN]; /* Unique file id. */ + + u_int32_t create_txnid; /* + * Txn ID of the DB create, stored so + * we can log it at register time. + */ + db_mutex_t mutex; /* mutex from db handle. */ + /* number of txn referencing + 1 for the db handle. */ + u_int32_t txn_ref; + +#define DB_FNAME_CLOSED 0x01 /* DBP was closed. */ +#define DB_FNAME_DURABLE 0x02 /* File is durable. */ +#define DB_FNAME_INMEM 0x04 /* File is in memory. */ +#define DB_FNAME_NOTLOGGED 0x08 /* Log of close failed. */ +#define DB_FNAME_RECOVER 0x10 /* File was opened by recovery code. */ +#define DB_FNAME_RESTORED 0x20 /* File may be in restored txn. */ +#define DB_FNAME_DBREG_MASK 0xf000 /* These bits come from DBREG below. */ + u_int32_t flags; +}; + +/* File open/close register log record opcodes. */ +#define DBREG_CHKPNT 1 /* Checkpoint: file name/id dump. */ +#define DBREG_CLOSE 2 /* File close. */ +#define DBREG_OPEN 3 /* File open. */ +#define DBREG_PREOPEN 4 /* Open in mpool only. */ +#define DBREG_RCLOSE 5 /* File close after recovery. */ +#define DBREG_REOPEN 6 /* Open for in-memory database. */ + +/* These bits are logged so db_printlog can handle page data. */ +#define DBREG_OP_MASK 0xf /* Opcode mask */ +#define DBREG_BIGEND 0x1000 /* Db Big endian. */ +#define DBREG_CHKSUM 0x2000 /* Db is checksummed. */ +#define DBREG_ENCRYPT 0x4000 /* Db is encrypted. */ + +/******************************************************* + * LOG: + * The log subsystem information. + *******************************************************/ +struct __hdr; typedef struct __hdr HDR; +struct __log; typedef struct __log LOG; +struct __log_persist; typedef struct __log_persist LOGP; + +#define LFPREFIX "log." /* Log file name prefix. */ +#define LFNAME "log.%010d" /* Log file name template. */ +#define LFNAME_V1 "log.%05d" /* Log file name template, rev 1. */ + +#define LG_MAX_DEFAULT (10 * MEGABYTE) /* 10 MB. */ +#define LG_MAX_INMEM (256 * 1024) /* 256 KB. */ +#define LG_BSIZE_INMEM (1 * MEGABYTE) /* 1 MB. */ + +/* + * Allocate a few bytes under a power-of-two value. BDB doesn't care if it's + * a power-of-two or not, and requesting slightly under a power-of-two allows + * stupid allocators to avoid wasting space. + */ +#define LG_BASE_REGION_SIZE (130000) /* 128KB - 1072B */ +#define LG_BSIZE_DEFAULT (32000) /* 32 KB - 768B */ +#define LG_CURSOR_BUF_SIZE (32000) /* 32 KB - 768B */ + +/* + * DB_LOG + * Per-process log structure. + */ +struct __db_log { + /* + * These fields need to be protected for multi-threaded support. + */ + db_mutex_t mtx_dbreg; /* Mutex for thread protection. */ + + DB_ENTRY *dbentry; /* Recovery file-id mapping. */ +#define DB_GROW_SIZE 64 + int32_t dbentry_cnt; /* Entries. Grows by DB_GROW_SIZE. */ + + /* + * These fields are only accessed when the region lock is held, so + * they do not have to be protected by the thread lock as well. + */ + u_int32_t lfname; /* Log file "name". */ + DB_FH *lfhp; /* Log file handle. */ + time_t lf_timestamp; /* Log file timestamp. */ + + u_int8_t *bufp; /* Region buffer. */ + + /* These fields are not thread protected. */ + ENV *env; /* Environment */ + REGINFO reginfo; /* Region information. */ + +#define DBLOG_AUTOREMOVE 0x01 /* Autoremove log files. */ +#define DBLOG_DIRECT 0x02 /* Do direct I/O on the log. */ +#define DBLOG_DSYNC 0x04 /* Set OS_DSYNC on the log. */ +#define DBLOG_FORCE_OPEN 0x08 /* Force the DB open even if it appears + * to be deleted. */ +#define DBLOG_INMEMORY 0x10 /* Logging is in memory. */ +#define DBLOG_OPENFILES 0x20 /* Prepared files need to be open. */ +#define DBLOG_RECOVER 0x40 /* We are in recovery. */ +#define DBLOG_ZERO 0x80 /* Zero fill the log. */ +#define DBLOG_VERIFYING 0x100 /* The log is being verified. */ + u_int32_t flags; +}; + +/* + * HDR -- + * Log record header. + */ +struct __hdr { + u_int32_t prev; /* Previous offset. */ + u_int32_t len; /* Current length. */ + u_int8_t chksum[DB_MAC_KEY]; /* Current checksum. */ + u_int8_t iv[DB_IV_BYTES]; /* IV */ + u_int32_t orig_size; /* Original size of log record */ + /* !!! - 'size' is not written to log, must be last in hdr */ + size_t size; /* Size of header to use */ +}; + +/* + * LOG_HDR_SUM -- XOR in prev and len + * This helps avoids the race misreading the log while it + * it is being updated. + */ +#define LOG_HDR_SUM(crypto, hdr, sum) do { \ + if (crypto) { \ + ((u_int32_t *)sum)[0] ^= ((HDR *)hdr)->prev; \ + ((u_int32_t *)sum)[1] ^= ((HDR *)hdr)->len; \ + } else { \ + ((u_int32_t *)sum)[0] ^= \ + ((HDR *)hdr)->prev ^ ((HDR *)hdr)->len; \ + } \ +} while (0) + +/* + * We use HDR internally, and then when we write out, we write out + * prev, len, and then a 4-byte checksum if normal operation or + * a crypto-checksum and IV and original size if running in crypto + * mode. We must store the original size in case we pad. Set the + * size when we set up the header. We compute a DB_MAC_KEY size + * checksum regardless, but we can safely just use the first 4 bytes. + */ +#define HDR_NORMAL_SZ 12 +#define HDR_CRYPTO_SZ 12 + DB_MAC_KEY + DB_IV_BYTES + +struct __log_persist { + u_int32_t magic; /* DB_LOGMAGIC */ + u_int32_t version; /* DB_LOGVERSION */ + + u_int32_t log_size; /* Log file size. */ + u_int32_t notused; /* Historically the log file mode. */ +}; + +/* Macros to lock/unlock the log region as a whole. */ +#define LOG_SYSTEM_LOCK(env) \ + MUTEX_LOCK(env, ((LOG *) \ + (env)->lg_handle->reginfo.primary)->mtx_region) +#define LOG_SYSTEM_UNLOCK(env) \ + MUTEX_UNLOCK(env, ((LOG *) \ + (env)->lg_handle->reginfo.primary)->mtx_region) + +/* + * LOG -- + * Shared log region. One of these is allocated in shared memory, + * and describes the log. + */ +struct __log { /* SHARED */ + db_mutex_t mtx_region; /* Region mutex. */ + + db_mutex_t mtx_filelist; /* Mutex guarding file name list. */ + + LOGP persist; /* Persistent information. */ + + SH_TAILQ_HEAD(__fq1) fq; /* List of file names. */ + int32_t fid_max; /* Max fid allocated. */ + roff_t free_fid_stack; /* Stack of free file ids. */ + u_int32_t free_fids; /* Height of free fid stack. */ + u_int32_t free_fids_alloced; /* N free fid slots allocated. */ + + /* + * The lsn LSN is the file offset that we're about to write and which + * we will return to the user. + */ + DB_LSN lsn; /* LSN at current file offset. */ + + /* + * The f_lsn LSN is the LSN (returned to the user) that "owns" the + * first byte of the buffer. If the record associated with the LSN + * spans buffers, it may not reflect the physical file location of + * the first byte of the buffer. + */ + DB_LSN f_lsn; /* LSN of first byte in the buffer. */ + db_size_t b_off; /* Current offset in the buffer. */ + u_int32_t w_off; /* Current write offset in the file. */ + u_int32_t len; /* Length of the last record. */ + + DB_LSN active_lsn; /* Oldest active LSN in the buffer. */ + db_size_t a_off; /* Offset in the buffer of first active + file. */ + + /* + * The s_lsn LSN is the last LSN that we know is on disk, not just + * written, but synced. This field is protected by the flush mutex + * rather than by the region mutex. + */ + db_mutex_t mtx_flush; /* Mutex guarding flushing. */ + int32_t in_flush; /* Log flush in progress. */ + DB_LSN s_lsn; /* LSN of the last sync. */ + + DB_LOG_STAT stat; /* Log statistics. */ + + /* + * This timestamp is updated anytime someone unlinks log + * files. This can happen when calling __log_vtruncate + * or replication internal init when it unlinks log files. + * + * The timestamp is used so that other processes that might + * have file handles to log files know to close/reopen them + * so they're not potentially writing to now-removed files. + */ + time_t timestamp; /* Log trunc timestamp. */ + + /* + * !!! + * NOTE: the next group of fields are NOT protected by the log + * region lock. They are protected by REP->mtx_clientdb. If you + * need access to both, you must acquire REP->mtx_clientdb + * before acquiring the log region lock. + * + * The waiting_lsn is used by the replication system. It is the + * first LSN that we are holding without putting in the log, because + * we received one or more log records out of order. Associated with + * the waiting_lsn is the number of log records that we still have to + * receive before we decide that we should request it again. + * + * The max_wait_lsn is used to control retransmission in the face + * of dropped messages. If we are requesting all records from the + * current gap (i.e., chunk of the log that we are missing), then + * the max_wait_lsn contains the first LSN that we are known to have + * in the __db.rep.db. If we requested only a single record, then + * the max_wait_lsn has the LSN of that record we requested. + */ + /* BEGIN fields protected by rep->mtx_clientdb. */ + DB_LSN waiting_lsn; /* First log record after a gap. */ + DB_LSN verify_lsn; /* LSN we are waiting to verify. */ + DB_LSN prev_ckp; /* LSN of ckp preceeding verify_lsn. */ + DB_LSN max_wait_lsn; /* Maximum LSN requested. */ + DB_LSN max_perm_lsn; /* Maximum PERMANENT LSN processed. */ + db_timespec max_lease_ts; /* Maximum Lease timestamp seen. */ + db_timespec wait_ts; /* Time to wait before requesting. */ + db_timespec rcvd_ts; /* Initial received time to wait. */ + db_timespec last_ts; /* Last time of insert in temp db. */ + /* + * The ready_lsn is also used by the replication system. It is the + * next LSN we expect to receive. It's normally equal to "lsn", + * except at the beginning of a log file, at which point it's set + * to the LSN of the first record of the new file (after the + * header), rather than to 0. + */ + DB_LSN ready_lsn; + /* + * The bulk_buf is used by replication for bulk transfer. While this + * is protected by REP->mtx_clientdb, this doesn't contend with the + * above fields because the above are used by clients and the bulk + * fields below are used by a master. + */ + roff_t bulk_buf; /* Bulk transfer buffer in region. */ + roff_t bulk_off; /* Current offset into bulk buffer. */ + u_int32_t bulk_len; /* Length of buffer. */ + u_int32_t bulk_flags; /* Bulk buffer flags. */ + /* END fields protected by rep->mtx_clientdb. */ + + /* + * During initialization, the log system walks forward through the + * last log file to find its end. If it runs into a checkpoint + * while it's doing so, it caches it here so that the transaction + * system doesn't need to walk through the file again on its + * initialization. + */ + DB_LSN cached_ckp_lsn; + + u_int32_t regionmax; /* Configured size of the region. */ + + roff_t buffer_off; /* Log buffer offset in the region. */ + u_int32_t buffer_size; /* Log buffer size. */ + + u_int32_t log_size; /* Log file's size. */ + u_int32_t log_nsize; /* Next log file's size. */ + + int filemode; /* Log file permissions mode. */ + + /* + * DB_LOG_AUTOREMOVE and DB_LOG_INMEMORY: not protected by a mutex, + * all we care about is if they're zero or non-zero. + */ + int32_t db_log_autoremove; + int32_t db_log_inmemory; + + u_int32_t ncommit; /* Number of txns waiting to commit. */ + DB_LSN t_lsn; /* LSN of first commit */ + SH_TAILQ_HEAD(__commit) commits;/* list of txns waiting to commit. */ + SH_TAILQ_HEAD(__free) free_commits;/* free list of commit structs. */ + + /* + * In-memory logs maintain a list of the start positions of all log + * files currently active in the in-memory buffer. This is to make the + * lookup from LSN to log buffer offset efficient. + */ + SH_TAILQ_HEAD(__logfile) logfiles; + SH_TAILQ_HEAD(__free_logfile) free_logfiles; +}; + +/* + * __db_commit structure -- + * One of these is allocated for each transaction waiting to commit. + */ +struct __db_commit { + db_mutex_t mtx_txnwait; /* Mutex for txn to wait on. */ + DB_LSN lsn; /* LSN of commit record. */ + SH_TAILQ_ENTRY links; /* Either on free or waiting list. */ + +#define DB_COMMIT_FLUSH 0x0001 /* Flush the log when you wake up. */ + u_int32_t flags; +}; + +/* + * Check for the proper progression of Log Sequence Numbers. + * If we are rolling forward the LSN on the page must be greater + * than or equal to the previous LSN in log record. + * We ignore NOT LOGGED LSNs. The user did an unlogged update. + * We should eventually see a log record that matches and continue + * forward. + * A ZERO LSN implies a page that was allocated prior to the recovery + * start point and then truncated later in the log. An allocation of a + * page after this page will extend the file, leaving a hole. We want to + * ignore this page until it is truncated again. + * + */ + +#define CHECK_LSN(e, redo, cmp, lsn, prev) \ + if (DB_REDO(redo) && (cmp) < 0 && \ + ((!IS_NOT_LOGGED_LSN(*(lsn)) && !IS_ZERO_LSN(*(lsn))) || \ + IS_REP_CLIENT(e))) { \ + ret = __db_check_lsn(e, lsn, prev); \ + goto out; \ + } +#define CHECK_ABORT(e, redo, cmp, lsn, prev) \ + if (redo == DB_TXN_ABORT && (cmp) != 0 && \ + ((!IS_NOT_LOGGED_LSN(*(lsn)) && !IS_ZERO_LSN(*(lsn))) || \ + IS_REP_CLIENT(e))) { \ + ret = __db_check_lsn(e, lsn, prev); \ + goto out; \ + } + +/* + * Helper for in-memory logs -- check whether an offset is in range + * in a ring buffer (inclusive of start, exclusive of end). + */ +struct __db_filestart { + u_int32_t file; + size_t b_off; + + SH_TAILQ_ENTRY links; /* Either on free or waiting list. */ +}; + +#define RINGBUF_LEN(lp, start, end) \ + ((start) < (end) ? \ + (end) - (start) : (lp)->buffer_size - ((start) - (end))) + +/* + * Internal macro to set pointer to the begin_lsn for generated + * logging routines. If begin_lsn is already set then do nothing. + * Return a pointer to the last lsn too. + */ +#undef DB_SET_TXN_LSNP +#define DB_SET_TXN_LSNP(txn, blsnp, llsnp) do { \ + DB_LSN *__lsnp; \ + TXN_DETAIL *__td; \ + __td = (txn)->td; \ + *(llsnp) = &__td->last_lsn; \ + while (__td->parent != INVALID_ROFF) \ + __td = R_ADDR(&(txn)->mgrp->reginfo, __td->parent); \ + __lsnp = &__td->begin_lsn; \ + if (IS_ZERO_LSN(*__lsnp)) \ + *(blsnp) = __lsnp; \ +} while (0) + +/* + * Status codes indicating the validity of a log file examined by + * __log_valid(). + */ +typedef enum { + DB_LV_INCOMPLETE, + DB_LV_NONEXISTENT, + DB_LV_NORMAL, + DB_LV_OLD_READABLE, + DB_LV_OLD_UNREADABLE +} logfile_validity; + +/* + * All log records have these fields. + */ +typedef struct __log_rec_hdr { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; +} LOG_REC_HEADER; + +#if defined(__cplusplus) +} +#endif + +#include "dbinc_auto/log_ext.h" +#include "dbinc_auto/dbreg_auto.h" +#include "dbinc_auto/dbreg_ext.h" +#endif /* !_DB_LOG_H_ */ diff --git a/src/dbinc/log_verify.h b/src/dbinc/log_verify.h new file mode 100644 index 00000000..9fc7db7e --- /dev/null +++ b/src/dbinc/log_verify.h @@ -0,0 +1,205 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#ifndef _DB_LOG_VERIFY_H_ +#define _DB_LOG_VERIFY_H_ + +#include "db_config.h" +#include "db_int.h" + +/* + * Log verification handle, such a handle is shared among all verification + * functions during one verification process. + */ +struct __db_log_verify_info { + DB_ENV *dbenv; /* The database environment. */ + DB *txninfo; /* (txnid, __txn_verify_info) map. */ + DB *ckps; /* (ckp lrid, __ckpinfo) map. */ + DB *fileregs; /* (file-uid, __file_reg_info) map. */ + DB *fnameuid; /* (fname, fuid), secondary db of fileregs. */ + /* (dbreg-id, __file_reg_info) map, NOT the sec db for fileregs. */ + DB *dbregids; + DB *pgtxn; /* (fileid-pageno, txnid) map. */ + DB *txnpg; /* (txnid, fileid-pageno), sec db of pgtxn. */ + /* lsn, (time-stamp, logtype(txn_regop or txn_ckp)) map. */ + DB *lsntime; + /* Secondary db of lsntime, use timestamp as secindex. */ + DB *timelsn; + + /* Time range database, (u_int32_t, __lv_txnrange) db. */ + DB *txnrngs; + /* Store abort txn (lsn, txnid) map. */ + DB *txnaborts; + DB_LSN last_lsn; /* Lsn of last log record we verified. */ + /* The number of active, abort, commit and prepared txns. */ + u_int32_t ntxn_active, ntxn_abort, ntxn_commit, ntxn_prep; + u_int32_t nckp; /* The number of checkpoint log records. */ + /* + * Target database file unique id. Set if only verify log records + * of a database. + */ + u_int8_t target_dbid[DB_FILE_ID_LEN]; + u_int32_t non_txnup_cnt;/* Number of non-txnal log records. */ + u_int32_t unknown_logrec_cnt;/* Number of unknown log record. */ + u_int32_t external_logrec_cnt;/* Number of external log record. */ + /* + * (Log type, number of record) map. typeids are continuous + * integers, 256 is a big enough number. + */ + u_int32_t lrtypes[256]; + u_int32_t aborted_txnid;/* The last aborted txnid. */ + DB_LSN aborted_txnlsn; /* Last aborted txn's last log. */ + DB_LSN valid_lsn; /* When reach this log,unset DB_LOG_VERIFY_PARTIAL. */ + char *logtype_names[256];/* The type name string of each type of log.*/ + const DB_LOG_VERIFY_CONFIG *lv_config; + DB_THREAD_INFO *ip; + u_int32_t flags; /* The result of the verification. */ +}; + +/* Transaction information. */ +struct __txn_verify_info { +#define TXN_VERIFY_INFO_FIXSIZE (4 * sizeof(DB_LSN) + 9 * sizeof(u_int32_t)) +#define TXN_VERIFY_INFO_TOTSIZE(s) \ + (TXN_VERIFY_INFO_FIXSIZE + (s).num_recycle * sizeof(DB_LSN) + \ + __lv_dbt_arrsz((s).fileups, (s).filenum) + \ + sizeof(int32_t) * (s).filenum) + + u_int32_t txnid; /* The key, also stored in data here. */ + u_int32_t ptxnid; /* The parent txn id. */ + + DB_LSN first_lsn; /* Lsn of the first log record of this txn. */ + DB_LSN last_lsn; /* Last lsn of the txn. */ + DB_LSN prep_lsn; /* txn_prepare's lsn.*/ + DB_LSN cur_lsn; /* The lsn of the latest db op of this txn. */ + + u_int32_t num_recycle; /* The number of recycle lsns. */ + u_int32_t filenum; /* The number of files updated. */ + +#define TXN_STAT_ACTIVE 0 +#define TXN_STAT_ABORT 1 +#define TXN_STAT_COMMIT 2 +#define TXN_STAT_PREPARE 3 + u_int32_t status; /* Txn status */ + + /* The number of active, abort and commit children. */ + u_int32_t nchild_active; + u_int32_t nchild_abort; + u_int32_t nchild_commit; + + u_int32_t flags; /* Copied from the DB_TXN::flags member. */ + + DB_LSN *recycle_lsns; /* The array of txn_recycle records' lsns. */ + /* The array of file unique ids of files updated by this txn. */ + DBT *fileups; + int32_t *dbregid;/* The array of dbreg file ids updated by this txn. */ +}; + +/* Database file information. */ +struct __lv_filereg_info { +#define FILE_REG_INFO_FIXSIZE (sizeof(u_int32_t)) +#define FILE_REG_INFO_TOTSIZE(s) (FILE_REG_INFO_FIXSIZE + (s).fileid.size + \ + sizeof((s).fileid.size) + sizeof(int32_t) * (s).regcnt + \ + strlen((s).fname) + 1) + + u_int32_t regcnt; /* The number of dbregids for this file-uid. */ + int32_t *dbregids; + DBT fileid; /* The file unique id. */ + char *fname; /* Database file name. */ +}; + +/* Database file dbreg_register information. */ +struct __lv_filelife { + int32_t dbregid; /* The primary key. */ + DBTYPE dbtype; /* The database type. */ + u_int32_t lifetime; /* DBREG_OPEN, DBREG_CHKPNT, DBREG_CLOSE */ + db_pgno_t meta_pgno; /* The meta_pgno; */ + u_int8_t fileid[DB_FILE_ID_LEN]; + DB_LSN lsn; /* The lsn of log updating lifetime. */ +}; + +/* Checkpoint information. */ +struct __lv_ckp_info { + int32_t timestamp; + DB_LSN lsn, ckplsn; /* Lsn member is the primary key. */ +}; + +/* + * General information from log records which have timestamps. + * We use it to do time range verifications. Such information is + * acquired when backward-playing the logs before verification. + */ +struct __lv_timestamp_info { + DB_LSN lsn; /* The primary key. */ + int32_t timestamp; /* The secondary key. */ + + /* + * The log types containing a time stamp, so far only txn_ckp + * and txn_regop types. + */ + u_int32_t logtype; +}; + +/* + * Transaction ranges. Such information is acquired when backward-playing the + * logs before verification. Can be used to find aborted txns. + */ +struct __lv_txnrange { + /* + * Transaction ID, the primary key. The db storing records of this + * type should allow dup since txnids maybe reused. + */ + u_int32_t txnid; + + /* + * The parent txn id, ptxnid is the parent of txnid + * during [begin, end]. + */ + u_int32_t ptxnid; + + /* + * The first and last lsn, end is used to sort dup data because it's + * seen prior to begin in a backward playback, and [begin, end] + * intervals won't overlap. + */ + DB_LSN begin, end; + + int32_t when_commit;/* The time of the commit, 0 if aborted. */ +}; + + +/* Parameter types for __iterate_txninfo function. */ +struct __add_recycle_params { + u_int32_t min, max;/* The recycled txnid range. */ + /* The array of txn info to update into db. */ + VRFY_TXN_INFO **ti2u; + u_int32_t ti2ui, ti2ul;/* The effective length and array length. */ + DB_LSN recycle_lsn; +}; + +struct __ckp_verify_params { + DB_LSN lsn, ckp_lsn; + ENV *env; +}; + +/* Helper macros. */ +#define LOGTYPE_NAME(lvh, type) (lvh->logtype_names[type] == NULL ? \ + NULL : lvh->logtype_names[type] + 3) +#define NUMCMP(i1, i2) ((i1) > (i2) ? 1 : ((i1) < (i2) ? -1 : 0)) + +#define INVAL_DBREGID -1 + +/* + * During recovery, DBREG_CHKPNT can be seen as open, and it's followed by + * a DBREG_RCLOSE or DBREG_CLOSE. + */ +#define IS_DBREG_OPEN(opcode) (opcode == DBREG_OPEN || opcode == \ + DBREG_PREOPEN || opcode == DBREG_REOPEN || opcode == DBREG_CHKPNT) +#define IS_DBREG_CLOSE(opcode) (opcode == DBREG_CLOSE || opcode == DBREG_RCLOSE) + +#define IS_LOG_VRFY_SUPPORTED(version) ((version) == DB_LOGVERSION) + +#endif /* !_DB_LOG_VERIFY_H_*/ diff --git a/src/dbinc/mp.h b/src/dbinc/mp.h new file mode 100644 index 00000000..2a6510f1 --- /dev/null +++ b/src/dbinc/mp.h @@ -0,0 +1,684 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_MP_H_ +#define _DB_MP_H_ + +#include "dbinc/atomic.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +struct __bh; typedef struct __bh BH; +struct __bh_frozen_p; typedef struct __bh_frozen_p BH_FROZEN_PAGE; +struct __bh_frozen_a; typedef struct __bh_frozen_a BH_FROZEN_ALLOC; +struct __db_mpool_hash; typedef struct __db_mpool_hash DB_MPOOL_HASH; +struct __db_mpool_fstat_int; +typedef struct __db_mpool_fstat_int DB_MPOOL_FSTAT_INT; +struct __db_mpreg; typedef struct __db_mpreg DB_MPREG; +struct __mpool; typedef struct __mpool MPOOL; + + /* We require at least 20KB of cache. */ +#define DB_CACHESIZE_MIN (20 * 1024) + +/* + * DB_MPOOLFILE initialization methods cannot be called after open is called, + * other methods cannot be called before open is called + */ +#define MPF_ILLEGAL_AFTER_OPEN(dbmfp, name) \ + if (F_ISSET(dbmfp, MP_OPEN_CALLED)) \ + return (__db_mi_open((dbmfp)->env, name, 1)); +#define MPF_ILLEGAL_BEFORE_OPEN(dbmfp, name) \ + if (!F_ISSET(dbmfp, MP_OPEN_CALLED)) \ + return (__db_mi_open((dbmfp)->env, name, 0)); + +/* + * Cache flush operations, plus modifiers. + */ +#define DB_SYNC_ALLOC 0x0001 /* Flush for allocation. */ +#define DB_SYNC_CACHE 0x0002 /* Flush entire cache. */ +#define DB_SYNC_CHECKPOINT 0x0004 /* Checkpoint. */ +#define DB_SYNC_FILE 0x0008 /* Flush file. */ +#define DB_SYNC_INTERRUPT_OK 0x0010 /* Allow interrupt and return OK. */ +#define DB_SYNC_QUEUE_EXTENT 0x0020 /* Flush a queue file with extents. */ +#define DB_SYNC_SUPPRESS_WRITE 0x0040 /* Ignore max-write configuration. */ +#define DB_SYNC_TRICKLE 0x0080 /* Trickle sync. */ + +/* + * DB_MPOOL -- + * Per-process memory pool structure. + */ +struct __db_mpool { + /* These fields need to be protected for multi-threaded support. */ + db_mutex_t mutex; /* Thread mutex. */ + + /* + * DB_MPREG structure for the DB pgin/pgout routines. + * + * Linked list of application-specified pgin/pgout routines. + */ + DB_MPREG *pg_inout; + LIST_HEAD(__db_mpregh, __db_mpreg) dbregq; + + /* List of DB_MPOOLFILE's. */ + TAILQ_HEAD(__db_mpoolfileh, __db_mpoolfile) dbmfq; + + /* + * The env and reginfo fields are not thread protected, as they are + * initialized during mpool creation, and not modified again. + */ + ENV *env; /* Enclosing environment. */ + REGINFO *reginfo; /* Underlying cache regions. */ +}; + +/* + * DB_MPREG -- + * DB_MPOOL registry of pgin/pgout functions. + */ +struct __db_mpreg { + LIST_ENTRY(__db_mpreg) q; /* Linked list. */ + + int32_t ftype; /* File type. */ + /* Pgin, pgout routines. */ + int (*pgin) __P((DB_ENV *, db_pgno_t, void *, DBT *)); + int (*pgout) __P((DB_ENV *, db_pgno_t, void *, DBT *)); +}; + +/* + * File hashing -- + * We hash each file to hash bucket based on its fileid + * or, in the case of in memory files, its name. + */ + +/* Number of file hash buckets, a small prime number */ +#define MPOOL_FILE_BUCKETS 17 + +#define FHASH(id, len) __ham_func5(NULL, id, (u_int32_t)(len)) + +#define FNBUCKET(id, len) \ + (FHASH(id, len) % MPOOL_FILE_BUCKETS) + +/* Macros to lock/unlock the mpool region as a whole. */ +#define MPOOL_SYSTEM_LOCK(env) \ + MUTEX_LOCK(env, ((MPOOL *) \ + (env)->mp_handle->reginfo[0].primary)->mtx_region) +#define MPOOL_SYSTEM_UNLOCK(env) \ + MUTEX_UNLOCK(env, ((MPOOL *) \ + (env)->mp_handle->reginfo[0].primary)->mtx_region) + +/* Macros to lock/unlock a specific mpool region. */ +#define MPOOL_REGION_LOCK(env, infop) \ + MUTEX_LOCK(env, ((MPOOL *)(infop)->primary)->mtx_region) +#define MPOOL_REGION_UNLOCK(env, infop) \ + MUTEX_UNLOCK(env, ((MPOOL *)(infop)->primary)->mtx_region) + +/* + * MPOOL -- + * Shared memory pool region. + */ +struct __mpool { /* SHARED */ + /* + * The memory pool can be broken up into individual pieces/files. + * There are two reasons for this: firstly, on Solaris you can allocate + * only a little more than 2GB of memory in a contiguous chunk, + * and I expect to see more systems with similar issues. Secondly, + * applications can add / remove pieces to dynamically resize the + * cache. + * + * While this structure is duplicated in each piece of the cache, + * the first of these pieces/files describes the entire pool, the + * second only describe a piece of the cache. + */ + db_mutex_t mtx_region; /* Region mutex. */ + db_mutex_t mtx_resize; /* Resizing mutex. */ + + /* + * The lsn field and list of underlying MPOOLFILEs are thread protected + * by the region lock. + */ + DB_LSN lsn; /* Maximum checkpoint LSN. */ + + /* Configuration information: protected by the region lock. */ + u_int32_t max_nreg; /* Maximum number of regions. */ + u_int32_t gbytes; /* Number of gigabytes in cache. */ + u_int32_t bytes; /* Number of bytes in cache. */ + u_int32_t pagesize; /* Default page size. */ + db_size_t mp_mmapsize; /* Maximum file size for mmap. */ + int32_t mp_maxopenfd; /* Maximum open file descriptors. */ + int32_t mp_maxwrite; /* Maximum buffers to write. */ + db_timeout_t mp_maxwrite_sleep; /* Sleep after writing max buffers. */ + + /* + * The number of regions and the total number of hash buckets across + * all regions. + * These fields are not protected by a mutex because we assume that we + * can read a 32-bit value atomically. They are only modified by cache + * resizing which holds the mpool resizing mutex to ensure that + * resizing is single-threaded. See the comment in mp_resize.c for + * more information. + */ + u_int32_t nreg; /* Number of underlying REGIONS. */ + u_int32_t nbuckets; /* Total number of hash buckets. */ + + /* + * The regid field is protected by the resize mutex. + */ + roff_t regids; /* Array of underlying REGION Ids. */ + + roff_t ftab; /* Hash table of files. */ + + /* + * The following fields describe the per-cache portion of the region. + * + * The htab and htab_buckets fields are not thread protected as they + * are initialized during mpool creation, and not modified again. + * + * The last_checked, lru_priority, and lru_generation fields are thread + * protected by the region lock. + */ + roff_t htab; /* Hash table offset. */ + u_int32_t htab_buckets; /* Number of hash table entries. */ + u_int32_t last_checked; /* Last bucket checked for free. */ + u_int32_t lru_priority; /* Priority counter for buffer LRU. */ + u_int32_t lru_generation; /* Allocation race condition detector. */ + u_int32_t htab_mutexes; /* Number of hash mutexes per region. */ + + /* + * The pages field keeps track of the number of pages in the cache + * and is protected by the region lock. It is accessed for reading + * without the lock to return statistics. + */ + u_int32_t pages; /* Number of pages in the cache. */ + + /* + * The stat fields are not thread protected, and cannot be trusted. + */ + DB_MPOOL_STAT stat; /* Per-cache mpool statistics. */ + + /* + * We track page puts so that we can decide when allocation is never + * going to succeed. We don't lock the field, all we care about is + * if it changes. + */ + u_int32_t put_counter; /* Count of page put calls. */ + + /* + * Cache flush operations take a long time... + * + * Some cache flush operations want to ignore the app's configured + * max-write parameters (they are trying to quickly shut down an + * environment, for example). We can't specify that as an argument + * to the cache region functions, because we may decide to ignore + * the max-write configuration after the cache operation has begun. + * If the variable suppress_maxwrite is set, ignore the application + * max-write config. + * + * We may want to interrupt cache flush operations in high-availability + * configurations. + */ +#define DB_MEMP_SUPPRESS_WRITE 0x01 +#define DB_MEMP_SYNC_INTERRUPT 0x02 + u_int32_t config_flags; + + /* Free frozen buffer headers, protected by the region lock. */ + SH_TAILQ_HEAD(__free_frozen) free_frozen; + + /* Allocated blocks of frozen buffer headers. */ + SH_TAILQ_HEAD(__alloc_frozen) alloc_frozen; +}; + +/* + * NREGION -- + * Select a cache region given the bucket number. + */ +#define NREGION(mp, bucket) \ + ((bucket) / (mp)->htab_buckets) + +/* + * MP_HASH -- + * We make the assumption that early pages of the file are more likely + * to be retrieved than the later pages, which means the top bits will + * be more interesting for hashing as they're less likely to collide. + * That said, as 512 8K pages represents a 4MB file, so only reasonably + * large files will have page numbers with any other than the bottom 9 + * bits set. We XOR in the MPOOL offset of the MPOOLFILE that backs the + * page, since that should also be unique for the page. We don't want + * to do anything very fancy -- speed is more important to us than using + * good hashing. + * + * Since moving to a dynamic hash, which boils down to using some of the + * least significant bits of the hash value, we no longer want to use a + * simple shift here, because it's likely with a bit shift that mf_offset + * will be ignored, and pages from different files end up in the same + * hash bucket. Use a nearby prime instead. + */ +#define MP_HASH(mf_offset, pgno) \ + ((((pgno) << 8) ^ (pgno)) ^ (((u_int32_t) mf_offset) * 509)) + +/* + * Inline the calculation of the mask, since we can't reliably store the mask + * with the number of buckets in the region. + * + * This is equivalent to: + * mask = (1 << __db_log2(nbuckets)) - 1; + */ +#define MP_MASK(nbuckets, mask) do { \ + for (mask = 1; mask < (nbuckets); mask = (mask << 1) | 1) \ + ; \ +} while (0) + +#define MP_HASH_BUCKET(hash, nbuckets, mask, bucket) do { \ + (bucket) = (hash) & (mask); \ + if ((bucket) >= (nbuckets)) \ + (bucket) &= ((mask) >> 1); \ +} while (0) + +#define MP_BUCKET(mf_offset, pgno, nbuckets, bucket) do { \ + u_int32_t __mask; \ + MP_MASK(nbuckets, __mask); \ + MP_HASH_BUCKET(MP_HASH(mf_offset, pgno), nbuckets, \ + __mask, bucket); \ +} while (0) + +/* + * MP_GET_REGION -- + * Select the region for a given page. + */ +#define MP_GET_REGION(dbmfp, pgno, infopp, ret) do { \ + DB_MPOOL *__t_dbmp; \ + MPOOL *__t_mp; \ + \ + __t_dbmp = dbmfp->env->mp_handle; \ + __t_mp = __t_dbmp->reginfo[0].primary; \ + if (__t_mp->max_nreg == 1) { \ + *(infopp) = &__t_dbmp->reginfo[0]; \ + } else \ + ret = __memp_get_bucket((dbmfp)->env, \ + (dbmfp)->mfp, (pgno), (infopp), NULL, NULL); \ +} while (0) + +/* + * MP_GET_BUCKET -- + * Select and lock the bucket for a given page. + */ +#define MP_GET_BUCKET(env, mfp, pgno, infopp, hp, bucket, ret) do { \ + DB_MPOOL *__t_dbmp; \ + MPOOL *__t_mp; \ + roff_t __t_mf_offset; \ + \ + __t_dbmp = (env)->mp_handle; \ + __t_mp = __t_dbmp->reginfo[0].primary; \ + if (__t_mp->max_nreg == 1) { \ + *(infopp) = &__t_dbmp->reginfo[0]; \ + __t_mf_offset = R_OFFSET(*(infopp), (mfp)); \ + MP_BUCKET(__t_mf_offset, \ + (pgno), __t_mp->nbuckets, bucket); \ + (hp) = R_ADDR(*(infopp), __t_mp->htab); \ + (hp) = &(hp)[bucket]; \ + MUTEX_READLOCK(env, (hp)->mtx_hash); \ + ret = 0; \ + } else \ + ret = __memp_get_bucket((env), \ + (mfp), (pgno), (infopp), &(hp), &(bucket)); \ +} while (0) + +struct __db_mpool_hash { + db_mutex_t mtx_hash; /* Per-bucket mutex. */ + + DB_HASHTAB hash_bucket; /* Head of bucket. */ + + db_atomic_t hash_page_dirty;/* Count of dirty pages. */ + +#ifndef __TEST_DB_NO_STATISTICS + u_int32_t hash_io_wait; /* Count of I/O waits. */ + u_int32_t hash_frozen; /* Count of frozen buffers. */ + u_int32_t hash_thawed; /* Count of thawed buffers. */ + u_int32_t hash_frozen_freed;/* Count of freed frozen buffers. */ +#endif + + DB_LSN old_reader; /* Oldest snapshot reader (cached). */ + + u_int32_t flags; +}; + +/* + * Mpool file statistics structure for use in shared memory. + * This structure must contain the same fields as the __db_mpool_fstat struct + * except for any pointer fields that are filled in only when the struct is + * being populated for output through the API. + */ +struct __db_mpool_fstat_int { /* SHARED */ + u_int32_t st_pagesize; /* Page size. */ +#ifndef __TEST_DB_NO_STATISTICS + u_int32_t st_map; /* Pages from mapped files. */ + uintmax_t st_cache_hit; /* Pages found in the cache. */ + uintmax_t st_cache_miss; /* Pages not found in the cache. */ + uintmax_t st_page_create; /* Pages created in the cache. */ + uintmax_t st_page_in; /* Pages read in. */ + uintmax_t st_page_out; /* Pages written out. */ +#endif +}; + +/* + * The base mpool priority is 1/4th of the name space, or just under 2^30. When + * the LRU priority counter is about to wrap (within a 128-entry 'red zone' + * area) we adjust everybody down so that no one is larger than the new LRU + * priority. + */ +#define MPOOL_LRU_MAX UINT32_MAX +#define MPOOL_LRU_REDZONE (MPOOL_LRU_MAX - 128) +#define MPOOL_LRU_BASE (MPOOL_LRU_MAX / 4) +#define MPOOL_LRU_DECREMENT (MPOOL_LRU_MAX - MPOOL_LRU_BASE) + +/* + * Mpool priorities from low to high. Defined in terms of fractions of the + * buffers in the pool. + */ +#define MPOOL_PRI_VERY_LOW -1 /* Dead duck. Check and set to 0. */ +#define MPOOL_PRI_LOW -2 /* Low. */ +#define MPOOL_PRI_DEFAULT 0 /* No adjustment -- special case.*/ +#define MPOOL_PRI_HIGH 10 /* With the dirty buffers. */ +#define MPOOL_PRI_DIRTY 10 /* Dirty gets a 10% boost. */ +#define MPOOL_PRI_VERY_HIGH 1 /* Add number of buffers in pool. */ + +/* + * MPOOLFILE -- + * Shared DB_MPOOLFILE information. + */ +struct __mpoolfile { /* SHARED */ + db_mutex_t mutex; /* MPOOLFILE mutex. */ + + /* Protected by MPOOLFILE mutex. */ + u_int32_t revision; /* Bumped on any movement subdbs. */ + u_int32_t mpf_cnt; /* Ref count: DB_MPOOLFILEs. */ + u_int32_t block_cnt; /* Ref count: blocks in cache. */ + db_pgno_t last_pgno; /* Last page in the file. */ + db_pgno_t last_flushed_pgno; /* Last page flushed to disk. */ + db_pgno_t orig_last_pgno; /* Original last page in the file. */ + db_pgno_t maxpgno; /* Maximum page number. */ + + roff_t path_off; /* File name location. */ + + /* Protected by hash bucket mutex. */ + SH_TAILQ_ENTRY q; /* List of MPOOLFILEs */ + + /* + * The following are used for file compaction processing. + * They are only used when a thread is in the process + * of trying to move free pages to the end of the file. + * Other threads may look here when freeing a page. + * Protected by a lock on the metapage. + */ + u_int32_t free_ref; /* Refcount to freelist. */ + u_int32_t free_cnt; /* Count of free pages. */ + db_size_t free_size; /* Allocated size of free list. */ + roff_t free_list; /* Offset to free list. */ + + /* + * We normally don't lock the deadfile field when we read it since we + * only care if the field is zero or non-zero. We do lock on read when + * searching for a matching MPOOLFILE -- see that code for more detail. + */ + int32_t deadfile; /* Dirty pages can be discarded. */ + + u_int32_t bucket; /* hash bucket for this file. */ + + /* + * None of the following fields are thread protected. + * + * There are potential races with the ftype field because it's read + * without holding a lock. However, it has to be set before adding + * any buffers to the cache that depend on it being set, so there + * would need to be incorrect operation ordering to have a problem. + */ + int32_t ftype; /* File type. */ + + /* + * There are potential races with the priority field because it's read + * without holding a lock. However, a collision is unlikely and if it + * happens is of little consequence. + */ + int32_t priority; /* Priority when unpinning buffer. */ + + /* + * There are potential races with the file_written field (many threads + * may be writing blocks at the same time), and with no_backing_file + * and unlink_on_close fields, as they may be set while other threads + * are reading them. However, we only care if the field value is zero + * or non-zero, so don't lock the memory. + * + * !!! + * Theoretically, a 64-bit architecture could put two of these fields + * in a single memory operation and we could race. I have never seen + * an architecture where that's a problem, and I believe Java requires + * that to never be the case. + * + * File_written is set whenever a buffer is marked dirty in the cache. + * It can be cleared in some cases, after all dirty buffers have been + * written AND the file has been flushed to disk. + */ + int32_t file_written; /* File was written. */ + int32_t no_backing_file; /* Never open a backing file. */ + int32_t unlink_on_close; /* Unlink file on last close. */ + db_atomic_t multiversion; /* Number of DB_MULTIVERSION handles. */ + + /* + * We do not protect the statistics in "stat" because of the cost of + * the mutex in the get/put routines. There is a chance that a count + * will get lost. + */ + DB_MPOOL_FSTAT_INT stat; /* Per-file mpool statistics. */ + + /* + * The remaining fields are initialized at open and never subsequently + * modified. + */ + int32_t lsn_off; /* Page's LSN offset. */ + u_int32_t clear_len; /* Bytes to clear on page create. */ + + roff_t fileid_off; /* File ID string location. */ + + u_int32_t pagesize; /* Underlying pagesize. */ + roff_t pgcookie_len; /* Pgin/pgout cookie length. */ + roff_t pgcookie_off; /* Pgin/pgout cookie location. */ + + /* + * The flags are initialized at open and never subsequently modified. + */ +#define MP_CAN_MMAP 0x001 /* If the file can be mmap'd. */ +#define MP_DATABASE_LOCKING 0x002 /* Lock in exclusive mode. */ +#define MP_DIRECT 0x004 /* No OS buffering. */ +#define MP_DURABLE_UNKNOWN 0x008 /* We don't care about durability. */ +#define MP_EXTENT 0x010 /* Extent file. */ +#define MP_FAKE_DEADFILE 0x020 /* Deadfile field: fake flag. */ +#define MP_FAKE_FILEWRITTEN 0x040 /* File_written field: fake flag. */ +#define MP_FAKE_NB 0x080 /* No_backing_file field: fake flag. */ +#define MP_FAKE_UOC 0x100 /* Unlink_on_close field: fake flag. */ +#define MP_NOT_DURABLE 0x200 /* File is not durable. */ +#define MP_TEMP 0x400 /* Backing file is a temporary. */ + u_int32_t flags; + + db_pgno_t fe_watermark; /* File extension watermark. */ + u_int32_t fe_txnid; /* Transaction that set watermark. */ + u_int32_t fe_nlws; /* Number of log writes suppressed. */ +}; + +/* + * Flags to __memp_bh_free. + */ +#define BH_FREE_FREEMEM 0x01 +#define BH_FREE_REUSE 0x02 +#define BH_FREE_UNLOCKED 0x04 + +/* + * BH -- + * Buffer header. + */ +struct __bh { /* SHARED */ + db_mutex_t mtx_buf; /* Shared/Exclusive mutex */ + db_atomic_t ref; /* Reference count. */ +#define BH_REFCOUNT(bhp) atomic_read(&(bhp)->ref) + +#define BH_CALLPGIN 0x001 /* Convert the page before use. */ +#define BH_DIRTY 0x002 /* Page is modified. */ +#define BH_DIRTY_CREATE 0x004 /* Page is modified. */ +#define BH_DISCARD 0x008 /* Page is useless. */ +#define BH_EXCLUSIVE 0x010 /* Exclusive access acquired. */ +#define BH_FREED 0x020 /* Page was freed. */ +#define BH_FROZEN 0x040 /* Frozen buffer: allocate & re-read. */ +#define BH_TRASH 0x080 /* Page is garbage. */ +#define BH_THAWED 0x100 /* Page was thawed. */ + u_int16_t flags; + + u_int32_t priority; /* Priority. */ + SH_TAILQ_ENTRY hq; /* MPOOL hash bucket queue. */ + + db_pgno_t pgno; /* Underlying MPOOLFILE page number. */ + roff_t mf_offset; /* Associated MPOOLFILE offset. */ + u_int32_t bucket; /* Hash bucket containing header. */ + int region; /* Region containing header. */ + + roff_t td_off; /* MVCC: creating TXN_DETAIL offset. */ + SH_CHAIN_ENTRY vc; /* MVCC: version chain. */ +#ifdef DIAG_MVCC + u_int16_t align_off; /* Alignment offset for diagnostics.*/ +#endif + + /* + * !!! + * This array must be at least size_t aligned -- the DB access methods + * put PAGE and other structures into it, and then access them directly. + * (We guarantee size_t alignment to applications in the documentation, + * too.) + */ + DB_ALIGN8 u_int8_t buf[1]; /* Variable length data. */ +}; + +/* + * BH_FROZEN_PAGE -- + * Data used to find a frozen buffer header. + */ +struct __bh_frozen_p { + BH header; + db_pgno_t spgno; /* Page number in freezer file. */ +}; + +/* + * BH_FROZEN_ALLOC -- + * Frozen buffer headers are allocated a page at a time in general. This + * structure is allocated at the beginning of the page so that the + * allocation chunks can be tracked and freed (for private environments). + */ +struct __bh_frozen_a { + SH_TAILQ_ENTRY links; +}; + +#define MULTIVERSION(dbp) atomic_read(&(dbp)->mpf->mfp->multiversion) + +#define PAGE_TO_BH(p) (BH *)((u_int8_t *)(p) - SSZA(BH, buf)) +#define IS_DIRTY(p) \ + (F_ISSET(PAGE_TO_BH(p), BH_DIRTY|BH_EXCLUSIVE) == (BH_DIRTY|BH_EXCLUSIVE)) + +#define BH_OWNER(env, bhp) \ + ((TXN_DETAIL *)R_ADDR(&env->tx_handle->reginfo, bhp->td_off)) + +#define BH_OWNED_BY(env, bhp, txn) ((txn) != NULL && \ + (bhp)->td_off != INVALID_ROFF && \ + (txn)->td == BH_OWNER(env, bhp)) + +#define VISIBLE_LSN(env, bhp) \ + (&BH_OWNER(env, bhp)->visible_lsn) + +/* + * Make a copy of the buffer's visible LSN, one field at a time. We rely on the + * 32-bit operations being atomic. The visible_lsn starts at MAX_LSN and is + * set during commit or abort to the current LSN. + * + * If we race with a commit / abort, we may see either the file or the offset + * still at UINT32_MAX, so vlsn is guaranteed to be in the future. That's OK, + * since we had to take the log region lock to allocate the read LSN so we were + * never going to see this buffer anyway. + */ +#define BH_VISIBLE(env, bhp, read_lsnp, vlsn) \ + (bhp->td_off == INVALID_ROFF || \ + ((vlsn).file = VISIBLE_LSN(env, bhp)->file, \ + (vlsn).offset = VISIBLE_LSN(env, bhp)->offset, \ + LOG_COMPARE((read_lsnp), &(vlsn)) >= 0)) + +#define BH_OBSOLETE(bhp, old_lsn, vlsn) (SH_CHAIN_HASNEXT(bhp, vc) ? \ + BH_VISIBLE(env, SH_CHAIN_NEXTP(bhp, vc, __bh), &(old_lsn), vlsn) :\ + BH_VISIBLE(env, bhp, &(old_lsn), vlsn)) + +#define MVCC_SKIP_CURADJ(dbc, pgno) (dbc->txn != NULL && \ + F_ISSET(dbc->txn, TXN_SNAPSHOT) && MULTIVERSION(dbc->dbp) && \ + dbc->txn->td != NULL && __memp_skip_curadj(dbc, pgno)) + +#if defined(DIAG_MVCC) && defined(HAVE_MPROTECT) +#define VM_PAGESIZE 4096 +#define MVCC_BHSIZE(mfp, sz) do { \ + sz += VM_PAGESIZE + sizeof(BH); \ + if (mfp->pagesize < VM_PAGESIZE) \ + sz += VM_PAGESIZE - mfp->pagesize; \ +} while (0) + +#define MVCC_BHALIGN(p) do { \ + BH *__bhp; \ + void *__orig = (p); \ + p = ALIGNP_INC(p, VM_PAGESIZE); \ + if ((u_int8_t *)p < (u_int8_t *)__orig + sizeof(BH)) \ + p = (u_int8_t *)p + VM_PAGESIZE; \ + __bhp = (BH *)((u_int8_t *)p - SSZA(BH, buf)); \ + DB_ASSERT(env, \ + ((uintptr_t)__bhp->buf & (VM_PAGESIZE - 1)) == 0); \ + DB_ASSERT(env, \ + (u_int8_t *)__bhp >= (u_int8_t *)__orig); \ + DB_ASSERT(env, (u_int8_t *)p + mfp->pagesize < \ + (u_int8_t *)__orig + len); \ + __bhp->align_off = \ + (u_int16_t)((u_int8_t *)__bhp - (u_int8_t *)__orig); \ + p = __bhp; \ +} while (0) + +#define MVCC_BHUNALIGN(bhp) do { \ + (bhp) = (BH *)((u_int8_t *)(bhp) - (bhp)->align_off); \ +} while (0) + +#ifdef linux +#define MVCC_MPROTECT(buf, sz, mode) do { \ + int __ret = mprotect((buf), (sz), (mode)); \ + DB_ASSERT(env, __ret == 0); \ +} while (0) +#else +#define MVCC_MPROTECT(buf, sz, mode) do { \ + if (!F_ISSET(env, ENV_PRIVATE | ENV_SYSTEM_MEM)) { \ + int __ret = mprotect((buf), (sz), (mode)); \ + DB_ASSERT(env, __ret == 0); \ + } \ +} while (0) +#endif /* linux */ + +#else /* defined(DIAG_MVCC) && defined(HAVE_MPROTECT) */ +#define MVCC_BHSIZE(mfp, sz) do {} while (0) +#define MVCC_BHALIGN(p) do {} while (0) +#define MVCC_BHUNALIGN(bhp) do {} while (0) +#define MVCC_MPROTECT(buf, size, mode) do {} while (0) +#endif + +/* + * Flags to __memp_ftruncate. + */ +#define MP_TRUNC_NOCACHE 0x01 +#define MP_TRUNC_RECOVER 0x02 + +#if defined(__cplusplus) +} +#endif + +#include "dbinc_auto/mp_ext.h" +#endif /* !_DB_MP_H_ */ diff --git a/src/dbinc/mutex.h b/src/dbinc/mutex.h new file mode 100644 index 00000000..d3f92967 --- /dev/null +++ b/src/dbinc/mutex.h @@ -0,0 +1,304 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_MUTEX_H_ +#define _DB_MUTEX_H_ + +#ifdef HAVE_MUTEX_SUPPORT +/* The inlined trylock calls need access to the details of mutexes. */ +#define LOAD_ACTUAL_MUTEX_CODE +#include "dbinc/mutex_int.h" + +#ifndef HAVE_SHARED_LATCHES + #error "Shared latches are required in DB 4.8 and above" +#endif +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * By default, spin 50 times per processor if fail to acquire a test-and-set + * mutex, we have anecdotal evidence it's a reasonable value. + */ +#define MUTEX_SPINS_PER_PROCESSOR 50 + +/* + * Mutexes are represented by unsigned, 32-bit integral values. As the + * OOB value is 0, mutexes can be initialized by zero-ing out the memory + * in which they reside. + */ +#define MUTEX_INVALID 0 + +/* + * We track mutex allocations by ID. + */ +#define MTX_APPLICATION 1 +#define MTX_ATOMIC_EMULATION 2 +#define MTX_DB_HANDLE 3 +#define MTX_ENV_DBLIST 4 +#define MTX_ENV_HANDLE 5 +#define MTX_ENV_REGION 6 +#define MTX_LOCK_REGION 7 +#define MTX_LOGICAL_LOCK 8 +#define MTX_LOG_FILENAME 9 +#define MTX_LOG_FLUSH 10 +#define MTX_LOG_HANDLE 11 +#define MTX_LOG_REGION 12 +#define MTX_MPOOLFILE_HANDLE 13 +#define MTX_MPOOL_BH 14 +#define MTX_MPOOL_FH 15 +#define MTX_MPOOL_FILE_BUCKET 16 +#define MTX_MPOOL_HANDLE 17 +#define MTX_MPOOL_HASH_BUCKET 18 +#define MTX_MPOOL_REGION 19 +#define MTX_MUTEX_REGION 20 +#define MTX_MUTEX_TEST 21 +#define MTX_REP_CHKPT 22 +#define MTX_REP_DATABASE 23 +#define MTX_REP_DIAG 24 +#define MTX_REP_EVENT 25 +#define MTX_REP_REGION 26 +#define MTX_REP_START 27 +#define MTX_REP_WAITER 28 +#define MTX_REPMGR 29 +#define MTX_SEQUENCE 30 +#define MTX_TWISTER 31 +#define MTX_TCL_EVENTS 32 +#define MTX_TXN_ACTIVE 33 +#define MTX_TXN_CHKPT 34 +#define MTX_TXN_COMMIT 35 +#define MTX_TXN_MVCC 36 +#define MTX_TXN_REGION 37 + +#define MTX_MAX_ENTRY 37 + +/* The following macros are defined on some platforms, e.g. QNX. */ +#undef __mutex_init +#undef __mutex_lock +#undef __mutex_timedlock +#undef __mutex_unlock +#undef __mutex_destroy +#undef __mutex_trylock + +/* Redirect mutex calls to the correct functions. */ +#if !defined(HAVE_MUTEX_HYBRID) && ( \ + defined(HAVE_MUTEX_PTHREADS) || \ + defined(HAVE_MUTEX_SOLARIS_LWP) || \ + defined(HAVE_MUTEX_UI_THREADS)) +#define __mutex_init(a, b, c) __db_pthread_mutex_init(a, b, c) +#define __mutex_lock(a, b) __db_pthread_mutex_lock(a, b, 0) +#define __mutex_timedlock(a, b, c) __db_pthread_mutex_lock(a, b, c) +#define __mutex_unlock(a, b) __db_pthread_mutex_unlock(a, b) +#define __mutex_destroy(a, b) __db_pthread_mutex_destroy(a, b) +#define __mutex_trylock(a, b) __db_pthread_mutex_trylock(a, b) +/* + * These trylock versions do not support DB_ENV_FAILCHK. Callers which loop + * checking mutexes which are held by dead processes or threads might spin. + * These have ANSI-style definitions because this file can be included by + * C++ files, and extern "C" affects linkage only, not argument typing. + */ +static inline int __db_pthread_mutex_trylock(ENV *env, db_mutex_t mutex) +{ + int ret; + DB_MUTEX *mutexp; + if (!MUTEX_ON(env) || F_ISSET(env->dbenv, DB_ENV_NOLOCKING)) + return (0); + mutexp = MUTEXP_SET(env, mutex); +#ifdef HAVE_SHARED_LATCHES + if (F_ISSET(mutexp, DB_MUTEX_SHARED)) + ret = pthread_rwlock_trywrlock(&mutexp->u.rwlock); + else +#endif + ret = pthread_mutex_trylock(&mutexp->u.m.mutex); + if (ret == EBUSY) + ret = DB_LOCK_NOTGRANTED; + else if (ret == 0) { + F_SET(mutexp, DB_MUTEX_LOCKED); + env->dbenv->thread_id(env->dbenv, &mutexp->pid, &mutexp->tid); + STAT_INC(env, + mutex, set_nowait, mutexp->mutex_set_nowait, mutex); + } + return (ret); +} +#ifdef HAVE_SHARED_LATCHES +#define __mutex_rdlock(a, b) __db_pthread_mutex_readlock(a, b) +#define __mutex_tryrdlock(a, b) __db_pthread_mutex_tryreadlock(a, b) +static inline int __db_pthread_mutex_tryreadlock(ENV *env, db_mutex_t mutex) +{ + int ret; + DB_MUTEX *mutexp; + if (!MUTEX_ON(env) || F_ISSET(env->dbenv, DB_ENV_NOLOCKING)) + return (0); + mutexp = MUTEXP_SET(env, mutex); + if (F_ISSET(mutexp, DB_MUTEX_SHARED)) + ret = pthread_rwlock_tryrdlock(&mutexp->u.rwlock); + else + return (EINVAL); + if (ret == EBUSY) + ret = DB_LOCK_NOTGRANTED; +#ifdef HAVE_STATISTICS + if (ret == 0) + STAT_INC(env, + mutex, set_rd_nowait, mutexp->mutex_set_nowait, mutex); +#endif + return (ret); +} +#endif +#elif defined(HAVE_MUTEX_WIN32) || defined(HAVE_MUTEX_WIN32_GCC) +#define __mutex_init(a, b, c) __db_win32_mutex_init(a, b, c) +#define __mutex_lock(a, b) __db_win32_mutex_lock(a, b, 0) +#define __mutex_timedlock(a, b, c) __db_win32_mutex_lock(a, b, c) +#define __mutex_trylock(a, b) __db_win32_mutex_trylock(a, b) +#define __mutex_unlock(a, b) __db_win32_mutex_unlock(a, b) +#define __mutex_destroy(a, b) __db_win32_mutex_destroy(a, b) +#ifdef HAVE_SHARED_LATCHES +#define __mutex_rdlock(a, b) __db_win32_mutex_readlock(a, b) +#define __mutex_tryrdlock(a, b) __db_win32_mutex_tryreadlock(a, b) +#endif +#elif defined(HAVE_MUTEX_FCNTL) +#define __mutex_init(a, b, c) __db_fcntl_mutex_init(a, b, c) +#define __mutex_lock(a, b) __db_fcntl_mutex_lock(a, b, 0) +#define __mutex_timedlock(a, b, c) __db_fcntl_lock(a, b, c) +#define __mutex_trylock(a, b) __db_fcntl_mutex_trylock(a, b) +#define __mutex_unlock(a, b) __db_fcntl_mutex_unlock(a, b) +#define __mutex_destroy(a, b) __db_fcntl_mutex_destroy(a, b) +#else +#define __mutex_init(a, b, c) __db_tas_mutex_init(a, b, c) +#define __mutex_lock(a, b) __db_tas_mutex_lock(a, b, 0) +#define __mutex_timedlock(a, b, c) __db_tas_mutex_lock(a, b, c) +#define __mutex_trylock(a, b) __db_tas_mutex_trylock(a, b) +#define __mutex_unlock(a, b) __db_tas_mutex_unlock(a, b) +#define __mutex_destroy(a, b) __db_tas_mutex_destroy(a, b) +#if defined(HAVE_SHARED_LATCHES) +#define __mutex_rdlock(a, b) __db_tas_mutex_readlock(a, b) +#define __mutex_tryrdlock(a,b) __db_tas_mutex_tryreadlock(a, b) +#endif +#endif + +/* + * When there is no method to get a shared latch, fall back to + * implementing __mutex_rdlock() as getting an exclusive one. + * This occurs either when !HAVE_SHARED_LATCHES or HAVE_MUTEX_FCNTL. + */ +#ifndef __mutex_rdlock +#define __mutex_rdlock(a, b) __mutex_lock(a, b) +#endif +#ifndef __mutex_tryrdlock +#define __mutex_tryrdlock(a, b) __mutex_trylock(a, b) +#endif + +/* + * Lock/unlock a mutex. If the mutex was never required, the thread of + * control can proceed without it. + * + * We never fail to acquire or release a mutex without panicing. Simplify + * the macros to always return a panic value rather than saving the actual + * return value of the mutex routine. + */ +#ifdef HAVE_MUTEX_SUPPORT +#define MUTEX_LOCK(env, mutex) do { \ + if ((mutex) != MUTEX_INVALID && \ + __mutex_lock(env, mutex) != 0) \ + return (DB_RUNRECOVERY); \ +} while (0) + +/* + * Always check the return value of MUTEX_TRYLOCK()! Expect 0 on success, + * or DB_LOCK_NOTGRANTED, or possibly DB_RUNRECOVERY for failchk. + */ +#define MUTEX_TRYLOCK(env, mutex) \ + (((mutex) == MUTEX_INVALID) ? 0 : __mutex_trylock(env, mutex)) + +/* + * Acquire a DB_MUTEX_SHARED "mutex" in shared mode. + */ +#define MUTEX_READLOCK(env, mutex) do { \ + if ((mutex) != MUTEX_INVALID && \ + __mutex_rdlock(env, mutex) != 0) \ + return (DB_RUNRECOVERY); \ +} while (0) +#define MUTEX_TRY_READLOCK(env, mutex) \ + ((mutex) != MUTEX_INVALID ? __mutex_tryrdlock(env, mutex) : 0) + +#define MUTEX_UNLOCK(env, mutex) do { \ + if ((mutex) != MUTEX_INVALID && \ + __mutex_unlock(env, mutex) != 0) \ + return (DB_RUNRECOVERY); \ +} while (0) + +#define MUTEX_WAIT(env, mutex, duration) do { \ + int __ret; \ + if ((mutex) != MUTEX_INVALID && \ + (__ret = __mutex_timedlock(env, mutex, duration)) != 0 && \ + __ret != DB_TIMEOUT) \ + return (DB_RUNRECOVERY); \ +} while (0) +#else +/* + * There are calls to lock/unlock mutexes outside of #ifdef's -- replace + * the call with something the compiler can discard, but which will make + * if-then-else blocks work correctly. + */ +#define MUTEX_LOCK(env, mutex) (mutex) = (mutex) +#define MUTEX_TRYLOCK(env, mutex) (mutex) = (mutex) +#define MUTEX_READLOCK(env, mutex) (mutex) = (mutex) +#define MUTEX_TRY_READLOCK(env, mutex) (mutex) = (mutex) +#define MUTEX_UNLOCK(env, mutex) (mutex) = (mutex) +#define MUTEX_REQUIRED(env, mutex) (mutex) = (mutex) +#define MUTEX_REQUIRED_READ(env, mutex) (mutex) = (mutex) +#define MUTEX_WAIT(env, mutex, duration) (mutex) = (mutex) +#endif + +/* + * Berkeley DB ports may require single-threading at places in the code. + */ +#ifdef HAVE_MUTEX_VXWORKS +#include "taskLib.h" +/* + * Use the taskLock() mutex to eliminate a race where two tasks are + * trying to initialize the global lock at the same time. + */ +#define DB_BEGIN_SINGLE_THREAD do { \ + if (DB_GLOBAL(db_global_init)) \ + (void)semTake(DB_GLOBAL(db_global_lock), WAIT_FOREVER); \ + else { \ + taskLock(); \ + if (DB_GLOBAL(db_global_init)) { \ + taskUnlock(); \ + (void)semTake(DB_GLOBAL(db_global_lock), \ + WAIT_FOREVER); \ + continue; \ + } \ + DB_GLOBAL(db_global_lock) = \ + semBCreate(SEM_Q_FIFO, SEM_EMPTY); \ + if (DB_GLOBAL(db_global_lock) != NULL) \ + DB_GLOBAL(db_global_init) = 1; \ + taskUnlock(); \ + } \ +} while (DB_GLOBAL(db_global_init) == 0) +#define DB_END_SINGLE_THREAD (void)semGive(DB_GLOBAL(db_global_lock)) +#endif + +/* + * Single-threading defaults to a no-op. + */ +#ifndef DB_BEGIN_SINGLE_THREAD +#define DB_BEGIN_SINGLE_THREAD +#endif +#ifndef DB_END_SINGLE_THREAD +#define DB_END_SINGLE_THREAD +#endif + +#if defined(__cplusplus) +} +#endif + +#include "dbinc_auto/mutex_ext.h" +#endif /* !_DB_MUTEX_H_ */ diff --git a/src/dbinc/mutex_int.h b/src/dbinc/mutex_int.h new file mode 100644 index 00000000..6d0aa0c2 --- /dev/null +++ b/src/dbinc/mutex_int.h @@ -0,0 +1,1070 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_MUTEX_INT_H_ +#define _DB_MUTEX_INT_H_ + +#include "dbinc/atomic.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Mutexes and Shared Latches + * + * Mutexes may be test-and-set (spinning & yielding when busy), + * native versions (pthreads, WaitForSingleObject) + * or a hybrid which has the lower no-contention overhead of test-and-set + * mutexes, using operating system calls only to block and wakeup. + * + * Hybrid exclusive-only mutexes include a 'tas' field. + * Hybrid DB_MUTEX_SHARED latches also include a 'shared' field. + */ + +/********************************************************************* + * POSIX.1 pthreads interface. + *********************************************************************/ +#if defined(HAVE_MUTEX_PTHREADS) +/* + * Pthreads-based mutexes (exclusive-only) and latches (possibly shared) + * have the same MUTEX_FIELDS union. Different parts of the union are used + * depending on: + * - whether HAVE_SHARED_LATCHES is defined, and + * - if HAVE_SHARED_LATCHES, whether this particular instance of a mutex + * is a shared mutexDB_MUTEX_SHARED. + * + * The rwlock part of the union is used *only* for non-hybrid shared latches; + * in all other cases the mutex and cond fields are the only ones used. + * + * configuration & Who uses the field + * mutex + * mutex cond rwlock tas + * Native mutex y y + * Hybrid mutexes y y y + * Native sharedlatches y + * Hybrid sharedlatches y y y + * + * They all have a condition variable which is used only for + * DB_MUTEX_SELF_BLOCK waits. + * + * There can be no self-blocking shared latches: the pthread_cond_wait() would + * require getting a pthread_mutex_t, also it would not make sense. + */ +#define MUTEX_FIELDS \ + union { \ + struct { \ + pthread_mutex_t mutex; /* Mutex */ \ + pthread_cond_t cond; /* Condition variable */ \ + } m; \ + pthread_rwlock_t rwlock; /* Read/write lock */ \ + } u; + +#if defined(HAVE_SHARED_LATCHES) && !defined(HAVE_MUTEX_HYBRID) +#define RET_SET_PTHREAD_LOCK(mutexp, ret) do { \ + if (F_ISSET(mutexp, DB_MUTEX_SHARED)) \ + RET_SET((pthread_rwlock_wrlock(&(mutexp)->u.rwlock)), \ + ret); \ + else \ + RET_SET((pthread_mutex_lock(&(mutexp)->u.m.mutex)), ret); \ +} while (0) +#define RET_SET_PTHREAD_TRYLOCK(mutexp, ret) do { \ + if (F_ISSET(mutexp, DB_MUTEX_SHARED)) \ + RET_SET((pthread_rwlock_trywrlock(&(mutexp)->u.rwlock)), \ + ret); \ + else \ + RET_SET((pthread_mutex_trylock(&(mutexp)->u.m.mutex)), \ + ret); \ +} while (0) +#else +#define RET_SET_PTHREAD_LOCK(mutexp, ret) \ + RET_SET(pthread_mutex_lock(&(mutexp)->u.m.mutex), ret); +#define RET_SET_PTHREAD_TRYLOCK(mutexp, ret) \ + RET_SET(pthread_mutex_trylock(&(mutexp)->u.m.mutex), ret); +#endif +#endif + +#ifdef HAVE_MUTEX_UI_THREADS +#include +#endif + +/********************************************************************* + * Solaris lwp threads interface. + * + * !!! + * We use LWP mutexes on Solaris instead of UI or POSIX mutexes (both of + * which are available), for two reasons. First, the Solaris C library + * includes versions of the both UI and POSIX thread mutex interfaces, but + * they are broken in that they don't support inter-process locking, and + * there's no way to detect it, e.g., calls to configure the mutexes for + * inter-process locking succeed without error. So, we use LWP mutexes so + * that we don't fail in fairly undetectable ways because the application + * wasn't linked with the appropriate threads library. Second, there were + * bugs in SunOS 5.7 (Solaris 7) where if an application loaded the C library + * before loading the libthread/libpthread threads libraries (e.g., by using + * dlopen to load the DB library), the pwrite64 interface would be translated + * into a call to pwrite and DB would drop core. + *********************************************************************/ +#ifdef HAVE_MUTEX_SOLARIS_LWP +/* + * XXX + * Don't change to -- although lwp.h is listed in the + * Solaris manual page as the correct include to use, it causes the Solaris + * compiler on SunOS 2.6 to fail. + */ +#include + +#define MUTEX_FIELDS \ + lwp_mutex_t mutex; /* Mutex. */ \ + lwp_cond_t cond; /* Condition variable. */ +#endif + +/********************************************************************* + * Solaris/Unixware threads interface. + *********************************************************************/ +#ifdef HAVE_MUTEX_UI_THREADS +#include +#include + +#define MUTEX_FIELDS \ + mutex_t mutex; /* Mutex. */ \ + cond_t cond; /* Condition variable. */ +#endif + +/********************************************************************* + * AIX C library functions. + *********************************************************************/ +#ifdef HAVE_MUTEX_AIX_CHECK_LOCK +#include +typedef int tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) 0 +#define MUTEX_SET(x) (!_check_lock(x, 0, 1)) +#define MUTEX_UNSET(x) _clear_lock(x, 0) +#endif +#endif + +/********************************************************************* + * Apple/Darwin library functions. + *********************************************************************/ +#ifdef HAVE_MUTEX_DARWIN_SPIN_LOCK_TRY +typedef u_int32_t tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +extern int _spin_lock_try(tsl_t *); +extern void _spin_unlock(tsl_t *); +#define MUTEX_SET(tsl) _spin_lock_try(tsl) +#define MUTEX_UNSET(tsl) _spin_unlock(tsl) +#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0) +#endif +#endif + +/********************************************************************* + * General C library functions (msemaphore). + * + * !!! + * Check for HPPA as a special case, because it requires unusual alignment, + * and doesn't support semaphores in malloc(3) or shmget(2) memory. + * + * !!! + * Do not remove the MSEM_IF_NOWAIT flag. The problem is that if a single + * process makes two msem_lock() calls in a row, the second one returns an + * error. We depend on the fact that we can lock against ourselves in the + * locking subsystem, where we set up a mutex so that we can block ourselves. + * Tested on OSF1 v4.0. + *********************************************************************/ +#ifdef HAVE_MUTEX_HPPA_MSEM_INIT +#define MUTEX_ALIGN 16 +#endif + +#if defined(HAVE_MUTEX_MSEM_INIT) || defined(HAVE_MUTEX_HPPA_MSEM_INIT) +#include +typedef msemaphore tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) (msem_init(x, MSEM_UNLOCKED) <= (msemaphore *)0) +#define MUTEX_SET(x) (!msem_lock(x, MSEM_IF_NOWAIT)) +#define MUTEX_UNSET(x) msem_unlock(x, 0) +#endif +#endif + +/********************************************************************* + * Plan 9 library functions. + *********************************************************************/ +#ifdef HAVE_MUTEX_PLAN9 +typedef Lock tsl_t; + +#define MUTEX_INIT(x) (memset(x, 0, sizeof(Lock)), 0) +#define MUTEX_SET(x) canlock(x) +#define MUTEX_UNSET(x) unlock(x) +#endif + +/********************************************************************* + * Reliant UNIX C library functions. + *********************************************************************/ +#ifdef HAVE_MUTEX_RELIANTUNIX_INITSPIN +#include +typedef spinlock_t tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) (initspin(x, 1), 0) +#define MUTEX_SET(x) (cspinlock(x) == 0) +#define MUTEX_UNSET(x) spinunlock(x) +#endif +#endif + +/********************************************************************* + * General C library functions (POSIX 1003.1 sema_XXX). + * + * !!! + * Never selected by autoconfig in this release (semaphore calls are known + * to not work in Solaris 5.5). + *********************************************************************/ +#ifdef HAVE_MUTEX_SEMA_INIT +#include +typedef sema_t tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_DESTROY(x) sema_destroy(x) +#define MUTEX_INIT(x) (sema_init(x, 1, USYNC_PROCESS, NULL) != 0) +#define MUTEX_SET(x) (sema_wait(x) == 0) +#define MUTEX_UNSET(x) sema_post(x) +#endif +#endif + +/********************************************************************* + * SGI C library functions. + *********************************************************************/ +#ifdef HAVE_MUTEX_SGI_INIT_LOCK +#include +typedef abilock_t tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) (init_lock(x) != 0) +#define MUTEX_SET(x) (!acquire_lock(x)) +#define MUTEX_UNSET(x) release_lock(x) +#endif +#endif + +/********************************************************************* + * Solaris C library functions. + * + * !!! + * These are undocumented functions, but they're the only ones that work + * correctly as far as we know. + *********************************************************************/ +#ifdef HAVE_MUTEX_SOLARIS_LOCK_TRY +#include +#define MUTEX_MEMBAR(x) membar_enter() +#define MEMBAR_ENTER() membar_enter() +#define MEMBAR_EXIT() membar_exit() +#include +typedef lock_t tsl_t; + +/* + * The functions are declared in , but under #ifdef KERNEL. + * Re-declare them here to avoid warnings. + */ +extern int _lock_try(lock_t *); +extern void _lock_clear(lock_t *); + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) 0 +#define MUTEX_SET(x) _lock_try(x) +#define MUTEX_UNSET(x) _lock_clear(x) +#endif +#endif + +/********************************************************************* + * VMS. + *********************************************************************/ +#ifdef HAVE_MUTEX_VMS +#include +#include +typedef volatile unsigned char tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#ifdef __ALPHA +#define MUTEX_SET(tsl) (!__TESTBITSSI(tsl, 0)) +#else /* __VAX */ +#define MUTEX_SET(tsl) (!(int)_BBSSI(0, tsl)) +#endif +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0) +#endif +#endif + +/********************************************************************* + * VxWorks + * Use basic binary semaphores in VxWorks, as we currently do not need + * any special features. We do need the ability to single-thread the + * entire system, however, because VxWorks doesn't support the open(2) + * flag O_EXCL, the mechanism we normally use to single thread access + * when we're first looking for a DB environment. + *********************************************************************/ +#ifdef HAVE_MUTEX_VXWORKS +#include "taskLib.h" +typedef SEM_ID tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * Uses of this MUTEX_SET() need to have a local 'nowait' variable, + * which determines whether to return right away when the semaphore + * is busy or to wait until it is available. + */ +#define MUTEX_SET(tsl) \ + (semTake((*(tsl)), nowait ? NO_WAIT : WAIT_FOREVER) == OK) +#define MUTEX_UNSET(tsl) (semGive((*tsl))) +#define MUTEX_INIT(tsl) \ + ((*(tsl) = semBCreate(SEM_Q_FIFO, SEM_FULL)) == NULL) +#define MUTEX_DESTROY(tsl) semDelete(*tsl) +#endif +#endif + +/********************************************************************* + * Win16 + * + * Win16 spinlocks are simple because we cannot possibly be preempted. + * + * !!! + * We should simplify this by always returning a no-need-to-lock lock + * when we initialize the mutex. + *********************************************************************/ +#ifdef HAVE_MUTEX_WIN16 +typedef unsigned int tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) 0 +#define MUTEX_SET(tsl) (*(tsl) = 1) +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#endif +#endif + +/********************************************************************* + * Win32 - always a hybrid mutex + *********************************************************************/ +#if defined(HAVE_MUTEX_WIN32) || defined(HAVE_MUTEX_WIN32_GCC) +typedef LONG volatile tsl_t; +#define MUTEX_FIELDS \ + LONG nwaiters; \ + u_int32_t id; /* ID used for creating events */ \ + +#if defined(LOAD_ACTUAL_MUTEX_CODE) +#define MUTEX_SET(tsl) (!InterlockedExchange((PLONG)tsl, 1)) +#define MUTEX_UNSET(tsl) InterlockedExchange((PLONG)tsl, 0) +#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl) + +/* + * From Intel's performance tuning documentation (and see SR #6975): + * ftp://download.intel.com/design/perftool/cbts/appnotes/sse2/w_spinlock.pdf + * + * "For this reason, it is highly recommended that you insert the PAUSE + * instruction into all spin-wait code immediately. Using the PAUSE + * instruction does not affect the correctness of programs on existing + * platforms, and it improves performance on Pentium 4 processor platforms." + */ +#ifdef HAVE_MUTEX_WIN32 +#if !defined(_WIN64) && !defined(DB_WINCE) +#define MUTEX_PAUSE {__asm{_emit 0xf3}; __asm{_emit 0x90}} +#endif +#endif +#ifdef HAVE_MUTEX_WIN32_GCC +#define MUTEX_PAUSE __asm__ volatile ("rep; nop" : : ); +#endif +#endif +#endif + +/********************************************************************* + * 68K/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_68K_GCC_ASSEMBLY +typedef unsigned char tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* gcc/68K: 0 is clear, 1 is set. */ +#define MUTEX_SET(tsl) ({ \ + register tsl_t *__l = (tsl); \ + int __r; \ + __asm__ volatile("tas %1; \n \ + seq %0" \ + : "=dm" (__r), "=m" (*__l) \ + : "1" (*__l) \ + ); \ + __r & 1; \ +}) + +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0) +#endif +#endif + +/********************************************************************* + * ALPHA/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_ALPHA_GCC_ASSEMBLY +typedef u_int32_t tsl_t; + +#define MUTEX_ALIGN 4 + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * For gcc/alpha. Should return 0 if could not acquire the lock, 1 if + * lock was acquired properly. + */ +static inline int +MUTEX_SET(tsl_t *tsl) { + register tsl_t *__l = tsl; + register tsl_t __r; + __asm__ volatile( + "1: ldl_l %0,%2\n" + " blbs %0,2f\n" + " or $31,1,%0\n" + " stl_c %0,%1\n" + " beq %0,3f\n" + " mb\n" + " br 3f\n" + "2: xor %0,%0\n" + "3:" + : "=&r"(__r), "=m"(*__l) : "1"(*__l) : "memory"); + return __r; +} + +/* + * Unset mutex. Judging by Alpha Architecture Handbook, the mb instruction + * might be necessary before unlocking + */ +static inline int +MUTEX_UNSET(tsl_t *tsl) { + __asm__ volatile(" mb\n"); + return *tsl = 0; +} + +#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl) +#endif +#endif + +/********************************************************************* + * Tru64/cc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_TRU64_CC_ASSEMBLY +typedef volatile u_int32_t tsl_t; + +#define MUTEX_ALIGN 4 + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#include +#define MUTEX_SET(tsl) (__LOCK_LONG_RETRY((tsl), 1) != 0) +#define MUTEX_UNSET(tsl) (__UNLOCK_LONG(tsl)) + +#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0) +#endif +#endif + +/********************************************************************* + * ARM/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_ARM_GCC_ASSEMBLY +typedef unsigned char tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* gcc/arm: 0 is clear, 1 is set. */ +#define MUTEX_SET(tsl) ({ \ + int __r; \ + __asm__ volatile( \ + "swpb %0, %1, [%2]\n\t" \ + "eor %0, %0, #1\n\t" \ + : "=&r" (__r) \ + : "r" (1), "r" (tsl) \ + ); \ + __r & 1; \ +}) + +#define MUTEX_UNSET(tsl) (*(volatile tsl_t *)(tsl) = 0) +#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0) +#endif +#endif + +/********************************************************************* + * HPPA/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_HPPA_GCC_ASSEMBLY +typedef u_int32_t tsl_t; + +#define MUTEX_ALIGN 16 + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * The PA-RISC has a "load and clear" instead of a "test and set" instruction. + * The 32-bit word used by that instruction must be 16-byte aligned. We could + * use the "aligned" attribute in GCC but that doesn't work for stack variables. + */ +#define MUTEX_SET(tsl) ({ \ + register tsl_t *__l = (tsl); \ + int __r; \ + __asm__ volatile("ldcws 0(%1),%0" : "=r" (__r) : "r" (__l)); \ + __r & 1; \ +}) + +#define MUTEX_UNSET(tsl) (*(volatile tsl_t *)(tsl) = -1) +#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0) +#endif +#endif + +/********************************************************************* + * IA64/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_IA64_GCC_ASSEMBLY +typedef volatile unsigned char tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* gcc/ia64: 0 is clear, 1 is set. */ +#define MUTEX_SET(tsl) ({ \ + register tsl_t *__l = (tsl); \ + long __r; \ + __asm__ volatile("xchg1 %0=%1,%2" : \ + "=r"(__r), "+m"(*__l) : "r"(1)); \ + __r ^ 1; \ +}) + +/* + * Store through a "volatile" pointer so we get a store with "release" + * semantics. + */ +#define MUTEX_UNSET(tsl) (*(tsl_t *)(tsl) = 0) +#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0) +#endif +#endif + +/********************************************************************* + * PowerPC/gcc assembly. + *********************************************************************/ +#if defined(HAVE_MUTEX_PPC_GCC_ASSEMBLY) +typedef u_int32_t tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * The PowerPC does a sort of pseudo-atomic locking. You set up a + * 'reservation' on a chunk of memory containing a mutex by loading the + * mutex value with LWARX. If the mutex has an 'unlocked' (arbitrary) + * value, you then try storing into it with STWCX. If no other process or + * thread broke your 'reservation' by modifying the memory containing the + * mutex, then the STCWX succeeds; otherwise it fails and you try to get + * a reservation again. + * + * While mutexes are explicitly 4 bytes, a 'reservation' applies to an + * entire cache line, normally 32 bytes, aligned naturally. If the mutex + * lives near data that gets changed a lot, there's a chance that you'll + * see more broken reservations than you might otherwise. The only + * situation in which this might be a problem is if one processor is + * beating on a variable in the same cache block as the mutex while another + * processor tries to acquire the mutex. That's bad news regardless + * because of the way it bashes caches, but if you can't guarantee that a + * mutex will reside in a relatively quiescent cache line, you might + * consider padding the mutex to force it to live in a cache line by + * itself. No, you aren't guaranteed that cache lines are 32 bytes. Some + * embedded processors use 16-byte cache lines, while some 64-bit + * processors use 128-bit cache lines. But assuming a 32-byte cache line + * won't get you into trouble for now. + * + * If mutex locking is a bottleneck, then you can speed it up by adding a + * regular LWZ load before the LWARX load, so that you can test for the + * common case of a locked mutex without wasting cycles making a reservation. + * + * gcc/ppc: 0 is clear, 1 is set. + */ +static inline int +MUTEX_SET(int *tsl) { + int __r; + __asm__ volatile ( +"0: \n\t" +" lwarx %0,0,%1 \n\t" +" cmpwi %0,0 \n\t" +" bne- 1f \n\t" +" stwcx. %1,0,%1 \n\t" +" isync \n\t" +" beq+ 2f \n\t" +" b 0b \n\t" +"1: \n\t" +" li %1,0 \n\t" +"2: \n\t" + : "=&r" (__r), "+r" (tsl) + : + : "cr0", "memory"); + return (int)tsl; +} + +static inline int +MUTEX_UNSET(tsl_t *tsl) { + __asm__ volatile("sync" : : : "memory"); + return *tsl = 0; +} +#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl) +#endif +#endif + +/********************************************************************* + * OS/390 C. + *********************************************************************/ +#ifdef HAVE_MUTEX_S390_CC_ASSEMBLY +typedef int tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * cs() is declared in but is built in to the compiler. + * Must use LANGLVL(EXTENDED) to get its declaration. + */ +#define MUTEX_SET(tsl) (!cs(&zero, (tsl), 1)) +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0) +#endif +#endif + +/********************************************************************* + * S/390 32-bit assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_S390_GCC_ASSEMBLY +typedef int tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* gcc/S390: 0 is clear, 1 is set. */ +static inline int +MUTEX_SET(tsl_t *tsl) { \ + register tsl_t *__l = (tsl); \ + int __r; \ + __asm__ volatile( \ + " la 1,%1\n" \ + " lhi 0,1\n" \ + " l %0,%1\n" \ + "0: cs %0,0,0(1)\n" \ + " jl 0b" \ + : "=&d" (__r), "+m" (*__l) \ + : : "0", "1", "cc"); \ + return !__r; \ +} + +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0) +#endif +#endif + +/********************************************************************* + * SCO/cc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_SCO_X86_CC_ASSEMBLY +typedef unsigned char tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * UnixWare has threads in libthread, but OpenServer doesn't (yet). + * + * cc/x86: 0 is clear, 1 is set. + */ +#if defined(__USLC__) +asm int +_tsl_set(void *tsl) +{ +%mem tsl + movl tsl, %ecx + movl $1, %eax + lock + xchgb (%ecx),%al + xorl $1,%eax +} +#endif + +#define MUTEX_SET(tsl) _tsl_set(tsl) +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0) +#endif +#endif + +/********************************************************************* + * Sparc/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_SPARC_GCC_ASSEMBLY +typedef unsigned char tsl_t; + +#define MUTEX_ALIGN 8 + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * The ldstub instruction takes the location specified by its first argument + * (a register containing a memory address) and loads its contents into its + * second argument (a register) and atomically sets the contents the location + * specified by its first argument to a byte of 1s. (The value in the second + * argument is never read, but only overwritten.) + * + * Hybrid mutexes require membar #StoreLoad and #LoadStore ordering on multi- + * processor v9 systems. + * + * gcc/sparc: 0 is clear, 1 is set. + */ +#define MUTEX_SET(tsl) ({ \ + register tsl_t *__l = (tsl); \ + register tsl_t __r; \ + __asm__ volatile \ + ("ldstub [%1],%0; stbar" \ + : "=r"( __r) : "r" (__l)); \ + !__r; \ +}) + +#define MUTEX_UNSET(tsl) (*(tsl) = 0, MUTEX_MEMBAR(tsl)) +#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0) +#define MUTEX_MEMBAR(x) \ + ({ __asm__ volatile ("membar #StoreStore|#StoreLoad|#LoadStore"); }) +#define MEMBAR_ENTER() \ + ({ __asm__ volatile ("membar #StoreStore|#StoreLoad"); }) +#define MEMBAR_EXIT() \ + ({ __asm__ volatile ("membar #StoreStore|#LoadStore"); }) +#endif +#endif + +/********************************************************************* + * UTS/cc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_UTS_CC_ASSEMBLY +typedef int tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) 0 +#define MUTEX_SET(x) (!uts_lock(x, 1)) +#define MUTEX_UNSET(x) (*(x) = 0) +#endif +#endif + +/********************************************************************* + * MIPS/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_MIPS_GCC_ASSEMBLY +typedef u_int32_t tsl_t; + +#define MUTEX_ALIGN 4 + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * For gcc/MIPS. Should return 0 if could not acquire the lock, 1 if + * lock was acquired properly. + */ +static inline int +MUTEX_SET(tsl_t *tsl) { + register tsl_t *__l = tsl; + register tsl_t __r, __t; + __asm__ volatile( + " .set push \n" + " .set mips2 \n" + " .set noreorder \n" + " .set nomacro \n" + "1: ll %0, %3 \n" + " ori %2, %0, 1 \n" + " sc %2, %1 \n" + " beqzl %2, 1b \n" + " nop \n" + " andi %2, %0, 1 \n" + " sync \n" + " .set reorder \n" + " .set pop \n" + : "=&r" (__t), "=m" (*tsl), "=&r" (__r) + : "m" (*tsl) + : "memory"); + return (!__r); +} + +static inline void +MUTEX_UNSET(tsl_t *tsl) { + __asm__ volatile( + " .set noreorder \n" + " sync \n" + " sw $0, %0 \n" + " .set reorder \n" + : "=m" (*tsl) + : "m" (*tsl) + : "memory"); +} + +#define MUTEX_INIT(tsl) (*(tsl) = 0) +#endif +#endif + +/********************************************************************* + * x86/gcc (32- and 64-bit) assembly. + *********************************************************************/ +#if defined(HAVE_MUTEX_X86_GCC_ASSEMBLY) || \ + defined(HAVE_MUTEX_X86_64_GCC_ASSEMBLY) +typedef volatile unsigned char tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* gcc/x86: 0 is clear, 1 is set. */ +#define MUTEX_SET(tsl) ({ \ + tsl_t __r; \ + __asm__ volatile("movb $1, %b0\n\t" \ + "xchgb %b0,%1" \ + : "=&q" (__r) \ + : "m" (*(tsl_t *)(tsl)) \ + : "memory", "cc"); \ + !__r; /* return 1 on success, 0 on failure */ \ +}) + +#define MUTEX_UNSET(tsl) (*(tsl_t *)(tsl) = 0) +#define MUTEX_INIT(tsl) (MUTEX_UNSET(tsl), 0) +/* + * We need to pass a valid address to generate the memory barrier + * otherwise PURIFY will complain. Use something referenced recently + * and initialized. + */ +#if defined(HAVE_MUTEX_X86_GCC_ASSEMBLY) +#define MUTEX_MEMBAR(addr) \ + ({ __asm__ volatile ("lock; addl $0, %0" ::"m" (addr): "memory"); 1; }) +#else +#define MUTEX_MEMBAR(addr) \ + ({ __asm__ volatile ("mfence" ::: "memory"); 1; }) +#endif + +/* + * From Intel's performance tuning documentation (and see SR #6975): + * ftp://download.intel.com/design/perftool/cbts/appnotes/sse2/w_spinlock.pdf + * + * "For this reason, it is highly recommended that you insert the PAUSE + * instruction into all spin-wait code immediately. Using the PAUSE + * instruction does not affect the correctness of programs on existing + * platforms, and it improves performance on Pentium 4 processor platforms." + */ +#define MUTEX_PAUSE __asm__ volatile ("rep; nop" : : ); +#endif +#endif + +/* End of operating system & hardware architecture-specific definitions */ + +/* + * Mutex alignment defaults to sizeof(unsigned int). + * + * !!! + * Various systems require different alignments for mutexes (the worst we've + * seen so far is 16-bytes on some HP architectures). Malloc(3) is assumed + * to return reasonable alignment, all other mutex users must ensure proper + * alignment locally. + */ +#ifndef MUTEX_ALIGN +#define MUTEX_ALIGN sizeof(unsigned int) +#endif + +/* + * Mutex destruction defaults to a no-op. + */ +#ifndef MUTEX_DESTROY +#define MUTEX_DESTROY(x) +#endif + +/* + * Mutex pause defaults to a no-op. + */ +#ifndef MUTEX_PAUSE +#define MUTEX_PAUSE +#endif + +/* + * If no native atomic support is available then use mutexes to + * emulate atomic increment, decrement, and compare-and-exchange. + * The address of the atomic value selects which of a small number + * of mutexes to use to protect the updates. + * The number of mutexes should be somewhat larger than the number of + * processors in the system in order to minimize unnecessary contention. + * It defaults to 8 to handle most small (1-4) cpu systems, if it hasn't + * already been defined (e.g. in db_config.h) + */ +#if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT) && \ + !defined(MAX_ATOMIC_MUTEXES) +#define MAX_ATOMIC_MUTEXES 1 +#endif + +/* + * DB_MUTEXMGR -- + * The mutex manager encapsulates the mutex system. + */ +struct __db_mutexmgr { + /* These fields are never updated after creation, so not protected. */ + DB_ENV *dbenv; /* Environment */ + REGINFO reginfo; /* Region information */ + + void *mutex_array; /* Base of the mutex array */ +}; + +/* Macros to lock/unlock the mutex region as a whole. */ +#define MUTEX_SYSTEM_LOCK(dbenv) \ + MUTEX_LOCK(dbenv, ((DB_MUTEXREGION *) \ + (dbenv)->mutex_handle->reginfo.primary)->mtx_region) +#define MUTEX_SYSTEM_UNLOCK(dbenv) \ + MUTEX_UNLOCK(dbenv, ((DB_MUTEXREGION *) \ + (dbenv)->mutex_handle->reginfo.primary)->mtx_region) + +/* + * DB_MUTEXREGION -- + * The primary mutex data structure in the shared memory region. + */ +typedef struct __db_mutexregion { /* SHARED */ + /* These fields are initialized at create time and never modified. */ + roff_t mutex_off_alloc;/* Offset of mutex array */ + roff_t mutex_off; /* Adjusted offset of mutex array */ + db_size_t mutex_size; /* Size of the aligned mutex */ + roff_t thread_off; /* Offset of the thread area. */ + + db_mutex_t mtx_region; /* Region mutex. */ + + /* Protected using the region mutex. */ + db_mutex_t mutex_next; /* Next free mutex */ + +#if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT) + /* Mutexes for emulating atomic operations. */ + db_mutex_t mtx_atomic[MAX_ATOMIC_MUTEXES]; +#endif + + DB_MUTEX_STAT stat; /* Mutex statistics */ +} DB_MUTEXREGION; + +#ifdef HAVE_MUTEX_SUPPORT +struct __db_mutex_t { /* SHARED */ /* Mutex. */ +#ifdef MUTEX_FIELDS + MUTEX_FIELDS /* Opaque thread mutex structures. */ +#endif +#ifndef HAVE_MUTEX_FCNTL +#if defined(HAVE_MUTEX_HYBRID) || \ + (defined(HAVE_SHARED_LATCHES) && !defined(HAVE_MUTEX_PTHREADS)) + /* + * For hybrid and test-and-set shared latches it is a counter: + * 0 means it is free, + * -1 is exclusively locked, + * > 0 is the number of shared readers. + * Pthreads shared latches use pthread_rwlock instead. + */ + tsl_t tas; + db_atomic_t sharecount; +#elif !defined(MUTEX_FIELDS) + /* + * This is the Test and Set flag for exclusive latches (mutexes): + * there is a free value (often 0, 1, or -1) and a set value. + */ + tsl_t tas; +#endif +#endif +#ifdef HAVE_MUTEX_HYBRID + volatile u_int32_t wait; /* Count of waiters. */ +#endif + pid_t pid; /* Process owning mutex */ + db_threadid_t tid; /* Thread owning mutex */ + + db_mutex_t mutex_next_link; /* Linked list of free mutexes. */ + +#ifdef HAVE_STATISTICS + int alloc_id; /* Allocation ID. */ + + u_int32_t mutex_set_wait; /* Granted after wait. */ + u_int32_t mutex_set_nowait; /* Granted without waiting. */ +#ifdef HAVE_SHARED_LATCHES + u_int32_t mutex_set_rd_wait; /* Granted shared lock after wait. */ + u_int32_t mutex_set_rd_nowait; /* Granted shared lock w/out waiting. */ +#endif +#ifdef HAVE_MUTEX_HYBRID + u_int32_t hybrid_wait; + u_int32_t hybrid_wakeup; /* for counting spurious wakeups */ +#endif +#endif + + /* + * A subset of the flag arguments for __mutex_alloc(). + * + * Flags should be an unsigned integer even if it's not required by + * the possible flags values, getting a single byte on some machines + * is expensive, and the mutex structure is a MP hot spot. + */ + volatile u_int32_t flags; /* MUTEX_XXX */ +}; +#endif + +/* Macro to get a reference to a specific mutex. */ +#define MUTEXP_SET(env, indx) \ + (F_ISSET(env, ENV_PRIVATE) ? (DB_MUTEX *) indx : \ + (DB_MUTEX *)((u_int8_t *)env->mutex_handle->mutex_array + \ + (indx) * \ + ((DB_MUTEXREGION *)env->mutex_handle->reginfo.primary)->mutex_size)) + +/* + * Check that a particular mutex is exclusively held at least by someone, not + * necessarily the current thread. + */ +#ifdef HAVE_MUTEX_SUPPORT +#define MUTEX_IS_OWNED(env, mutex) \ + (mutex == MUTEX_INVALID || !MUTEX_ON(env) || \ + F_ISSET(env->dbenv, DB_ENV_NOLOCKING) || \ + F_ISSET(MUTEXP_SET(env, mutex), DB_MUTEX_LOCKED)) +#else +#define MUTEX_IS_OWNED(env, mutex) 0 +#endif + +#if defined(HAVE_MUTEX_HYBRID) || defined(DB_WIN32) || \ + (defined(HAVE_SHARED_LATCHES) && !defined(HAVE_MUTEX_PTHREADS)) +#define MUTEXP_IS_BUSY(mutexp) \ + (F_ISSET(mutexp, DB_MUTEX_SHARED) ? \ + (atomic_read(&(mutexp)->sharecount) != 0) : \ + F_ISSET(mutexp, DB_MUTEX_LOCKED)) +#define MUTEXP_BUSY_FIELD(mutexp) \ + (F_ISSET(mutexp, DB_MUTEX_SHARED) ? \ + (atomic_read(&(mutexp)->sharecount)) : (mutexp)->flags) +#else +/* Pthread_rwlocks don't have an low-cost 'is it being shared?' predicate. */ +#define MUTEXP_IS_BUSY(mutexp) (F_ISSET((mutexp), DB_MUTEX_LOCKED)) +#define MUTEXP_BUSY_FIELD(mutexp) ((mutexp)->flags) +#endif + +#define MUTEX_IS_BUSY(env, mutex) \ + (mutex == MUTEX_INVALID || !MUTEX_ON(env) || \ + F_ISSET(env->dbenv, DB_ENV_NOLOCKING) || \ + MUTEXP_IS_BUSY(MUTEXP_SET(env, mutex))) + +#define MUTEX_REQUIRED(env, mutex) \ + DB_ASSERT(env, MUTEX_IS_OWNED(env, mutex)) + +#define MUTEX_REQUIRED_READ(env, mutex) \ + DB_ASSERT(env, MUTEX_IS_OWNED(env, mutex) || MUTEX_IS_BUSY(env, mutex)) + +/* + * Test and set (and thus hybrid) shared latches use compare & exchange + * to acquire; the others the mutex-setting primitive defined above. + */ +#ifdef LOAD_ACTUAL_MUTEX_CODE + +#if defined(HAVE_SHARED_LATCHES) +/* This is the value of the 'sharecount' of an exclusively held tas latch. + * The particular value is not special; it is just unlikely to be caused + * by releasing or acquiring a shared latch too many times. + */ +#define MUTEX_SHARE_ISEXCLUSIVE (-1024) + +/* + * Get an exclusive lock on a possibly sharable latch. We use the native + * MUTEX_SET() operation for non-sharable latches; it usually is faster. + */ +#define MUTEXP_ACQUIRE(mutexp) \ + (F_ISSET(mutexp, DB_MUTEX_SHARED) ? \ + atomic_compare_exchange(env, \ + &(mutexp)->sharecount, 0, MUTEX_SHARE_ISEXCLUSIVE) : \ + MUTEX_SET(&(mutexp)->tas)) +#else +#define MUTEXP_ACQUIRE(mutexp) MUTEX_SET(&(mutexp)->tas) +#endif + +#ifndef MEMBAR_ENTER +#define MEMBAR_ENTER() +#define MEMBAR_EXIT() +#endif + +#endif + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_MUTEX_INT_H_ */ diff --git a/src/dbinc/os.h b/src/dbinc/os.h new file mode 100644 index 00000000..cc15fb69 --- /dev/null +++ b/src/dbinc/os.h @@ -0,0 +1,178 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_OS_H_ +#define _DB_OS_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Number of times to retry system calls that return EINTR or EBUSY. */ +#define DB_RETRY 100 + +#ifdef __TANDEM +/* + * OSS Tandem problem: fsync can return a Guardian file system error of 70, + * which has no symbolic name in OSS. HP says to retry the fsync. [#12957] + */ +#define RETRY_CHK(op, ret) do { \ + int __retries, __t_ret; \ + for ((ret) = 0, __retries = DB_RETRY;;) { \ + if ((op) == 0) \ + break; \ + (ret) = __os_get_syserr(); \ + if (((__t_ret = __os_posix_err(ret)) == EAGAIN || \ + __t_ret == EBUSY || __t_ret == EINTR || \ + __t_ret == EIO || __t_ret == 70) && --__retries > 0)\ + continue; \ + break; \ + } \ +} while (0) +#else +#define RETRY_CHK(op, ret) do { \ + int __retries, __t_ret; \ + for ((ret) = 0, __retries = DB_RETRY;;) { \ + if ((op) == 0) \ + break; \ + (ret) = __os_get_syserr(); \ + if (((__t_ret = __os_posix_err(ret)) == EAGAIN || \ + __t_ret == EBUSY || __t_ret == EINTR || \ + __t_ret == EIO) && --__retries > 0) \ + continue; \ + break; \ + } \ +} while (0) +#endif + +#define RETRY_CHK_EINTR_ONLY(op, ret) do { \ + int __retries; \ + for ((ret) = 0, __retries = DB_RETRY;;) { \ + if ((op) == 0) \ + break; \ + (ret) = __os_get_syserr(); \ + if (__os_posix_err(ret) == EINTR && --__retries > 0) \ + continue; \ + break; \ + } \ +} while (0) + +/* + * Flags understood by __os_open. + */ +#define DB_OSO_ABSMODE 0x0001 /* Absolute mode specified. */ +#define DB_OSO_CREATE 0x0002 /* POSIX: O_CREAT */ +#define DB_OSO_DIRECT 0x0004 /* Don't buffer the file in the OS. */ +#define DB_OSO_DSYNC 0x0008 /* POSIX: O_DSYNC. */ +#define DB_OSO_EXCL 0x0010 /* POSIX: O_EXCL */ +#define DB_OSO_RDONLY 0x0020 /* POSIX: O_RDONLY */ +#define DB_OSO_REGION 0x0040 /* Opening a region file. */ +#define DB_OSO_SEQ 0x0080 /* Expected sequential access. */ +#define DB_OSO_TEMP 0x0100 /* Remove after last close. */ +#define DB_OSO_TRUNC 0x0200 /* POSIX: O_TRUNC */ + +/* + * File modes. + */ +#define DB_MODE_400 (S_IRUSR) +#define DB_MODE_600 (S_IRUSR|S_IWUSR) +#define DB_MODE_660 (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP) +#define DB_MODE_666 (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH) +#define DB_MODE_700 (S_IRUSR|S_IWUSR|S_IXUSR) + +/* + * We group certain seek/write calls into a single function so that we + * can use pread(2)/pwrite(2) where they're available. + */ +#define DB_IO_READ 1 +#define DB_IO_WRITE 2 + +/* + * Make a last "panic" check. Imagine a thread of control running in Berkeley + * DB, going to sleep. Another thread of control decides to run recovery + * because the environment is broken. The first thing recovery does is panic + * the existing environment, but we only check the panic flag when crossing the + * public API. If the sleeping thread wakes up and writes something, we could + * have two threads of control writing the log files at the same time. So, + * before reading or writing, make a last panic check. Obviously, there's still + * a window, but it's very, very small. + */ +#define LAST_PANIC_CHECK_BEFORE_IO(env) \ + PANIC_CHECK(env); \ + if (env != NULL && \ + F_ISSET((env)->dbenv, DB_ENV_NOFLUSH)) \ + return (0) \ + \ +/* DB filehandle. */ +struct __fh_t { + /* + * Linked list of DB_FH's, linked from the DB_ENV, used to keep track + * of all open file handles for resource cleanup. + */ + TAILQ_ENTRY(__fh_t) q; + + /* + * The file-handle mutex is only used to protect the handle/fd + * across seek and read/write pairs, it does not protect the + * the reference count, or any other fields in the structure. + */ + db_mutex_t mtx_fh; /* Mutex to lock. */ + + int ref; /* Reference count. */ + +#if defined(DB_WIN32) + HANDLE handle; /* Windows/32 file handle. */ + HANDLE trunc_handle; /* Handle for truncate calls. */ +#endif + int fd; /* POSIX file descriptor. */ + + char *name; /* File name at open. */ + + /* + * Last seek statistics, used for zero-filling on filesystems + * that don't support it directly. + */ + db_pgno_t pgno; + u_int32_t pgsize; + off_t offset; + +#ifdef HAVE_STATISTICS + u_int32_t seek_count; /* I/O statistics */ + u_int32_t read_count; + u_int32_t write_count; +#endif + +#define DB_FH_ENVLINK 0x01 /* We're linked on the DB_ENV. */ +#define DB_FH_NOSYNC 0x02 /* Handle doesn't need to be sync'd. */ +#define DB_FH_OPENED 0x04 /* Handle is valid. */ +#define DB_FH_UNLINK 0x08 /* Unlink on close */ +#define DB_FH_REGION 0x10 /* Opened to contain a region */ + u_int8_t flags; +}; + +/* Standard buffer size for ctime/ctime_r function calls. */ +#define CTIME_BUFLEN 26 + +/* + * VxWorks requires we cast (const char *) variables to (char *) in order to + * pass them to system calls like stat, read and write. + */ +#ifdef HAVE_VXWORKS +#define CHAR_STAR_CAST (char *) +#define VOID_STAR_CAST (void *) +#else +#define CHAR_STAR_CAST +#define VOID_STAR_CAST +#endif + +#if defined(__cplusplus) +} +#endif + +#include "dbinc_auto/os_ext.h" +#endif /* !_DB_OS_H_ */ diff --git a/src/dbinc/partition.h b/src/dbinc/partition.h new file mode 100644 index 00000000..55336b35 --- /dev/null +++ b/src/dbinc/partition.h @@ -0,0 +1,55 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * $Id$ + */ +#ifndef _DB_PART_H_ +#define _DB_PART_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +typedef struct __db_partition { + u_int32_t nparts; /* number of partitions. */ + DBT *keys; /* array of range keys. */ + void *data; /* the partion info. */ + const char **dirs; /* locations for partitions. */ + DB **handles; /* array of partition handles. */ + u_int32_t (*callback) (DB *, DBT *); +#define PART_CALLBACK 0x01 +#define PART_RANGE 0x02 + u_int32_t flags; +} DB_PARTITION; + +/* + * Internal part of a partitoned cursor. + */ +typedef struct __part_internal { + __DBC_INTERNAL + u_int32_t part_id; + DBC *sub_cursor; +} PART_CURSOR; + +#ifdef HAVE_PARTITION +#define PART_NAME "__dbp.%s.%03d" +#define PART_LEN (strlen("__dbp..")+3) +#define PART_PREFIX "__dbp." + +#define DB_IS_PARTITIONED(dbp) \ + (dbp->p_internal != NULL && \ + ((DB_PARTITION *)dbp->p_internal)->handles != NULL) + +#define DBC_PART_REFRESH(dbc) (F_SET(dbc, DBC_PARTITIONED)) +#else +#define DBC_PART_REFRESH(dbc) +#define DB_IS_PARTITIONED(dbp) (0) +#endif + +#if defined(__cplusplus) +} +#endif +#endif diff --git a/src/dbinc/perfmon.h b/src/dbinc/perfmon.h new file mode 100644 index 00000000..84017695 --- /dev/null +++ b/src/dbinc/perfmon.h @@ -0,0 +1,103 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_PERFMON_H_ +#define _DB_PERFMON_H_ + +/******************************************************* + * Oracle Berkeley DB Performance Event Monitoring + * + * Some events inside of Oracle Berkeley DB can be 'published' + * to the operating environment's performance tracing system + * as they occur. Current support includes + * --enable-dtrace + * Solaris + * Linux (via SystemTap's dtrace wrappers) + * Darwin (Mac OS X) + * QNX(?) + * + ******************************************************/ + +/* + * The performance monitoring system can display many of the statistics which + * are obtainable through the {DB,DB_ENV}->xxx_stat() functions. By default + * they are excluded. They can be enabled with --enable-perfmon-statistics. + */ +#ifdef HAVE_PERFMON_STATISTICS +#define STAT_PERFMON1(env, cat, id, a1) PERFMON1(env, cat, id, (a1)) +#define STAT_PERFMON2(env, cat, id, a1, a2) \ + PERFMON2(env, cat, id, (a1), (a2)) +#define STAT_PERFMON3(env, cat, id, a1, a2, a3) \ + PERFMON3(env, cat, id, (a1), (a2), (a3)) +#else +#define STAT_PERFMON1(env, cat, id, a1) NOP_STATEMENT +#define STAT_PERFMON2(env, cat, id, a1, a2) NOP_STATEMENT +#define STAT_PERFMON3(env, cat, id, a1, a2, a3) NOP_STATEMENT +#endif + + +#if defined(HAVE_PERFMON) && defined(HAVE_STATISTICS) +/* + * The DTrace macros which are generated at configure time in db_provider.h can + * have full function signatures. These declarations are needed for compilation + * when DTrace support is enabled. It is "too early" in the include sequence + * to include the header files which define these structs. + */ +struct _db_page; +struct __bh; +struct __db_dbt; +struct __sh_dbt; +struct __db_mutex_t; + +#if defined(HAVE_DTRACE) +/* + * Solaris 10, Darwin/Mac OS X starting in 10.6 (Snow Leopard), Linux with + * the DTrace-compatible version of SystemTap, possibly QNX. + */ +#include "db_provider.h" + +#define PERFMON0(env, cat, id) bdb_##cat##_##id() +#define PERFMON1(env, cat, id, a1) bdb_##cat##_##id(a1) +#define PERFMON2(env, cat, id, a1, a2) \ + bdb_##cat##_##id((a1), (a2)) +#define PERFMON3(env, cat, id, a1, a2, a3) \ + do { \ + if (PERFMON_ENABLED(env, cat, id)) \ + bdb_##cat##_##id((a1), (a2), (a3)); \ + } while (0) +#define PERFMON4(env, cat, id, a1, a2, a3, a4) \ + do { \ + if (PERFMON_ENABLED(env, cat, id)) \ + bdb_##cat##_##id((a1), (a2), (a3), (a4)); \ + } while (0) +#define PERFMON5(env, cat, id, a1, a2, a3, a4, a5) \ + do { \ + if (PERFMON_ENABLED(env, cat, id)) \ + bdb_##cat##_##id((a1), (a2), (a3), (a4), (a5)); \ + } while (0) +#define PERFMON6(env, cat, id, a1, a2, a3, a4, a5, a6) \ + do { \ + if (PERFMON_ENABLED(env, cat, id)) \ + bdb_##cat##_##id((a1), (a2), (a3), (a4), (a5), (a6)); \ + } while (0) +#define PERFMON_ENABLED(env, cat, id) bdb_##cat##_##id##_enabled() +#endif + +#else +/* Without HAVE_PERFMON or HAVE_STATISTICS these macros map to null bodies. */ +#define PERFMON0(env, cat, id) NOP_STATEMENT +#define PERFMON1(env, cat, id, a1) NOP_STATEMENT +#define PERFMON2(env, cat, id, a1, a2) NOP_STATEMENT +#define PERFMON3(env, cat, id, a1, a2, a3) NOP_STATEMENT +#define PERFMON4(env, cat, id, a1, a2, a3, a4) NOP_STATEMENT +#define PERFMON5(env, cat, id, a1, a2, a3, a4, a5) NOP_STATEMENT +#define PERFMON6(env, cat, id, a1, a2, a3, a4, a5, a6) NOP_STATEMENT +#define PERFMON_ENABLED(env, cat, id) FALSE +#endif + +#endif diff --git a/src/dbinc/qam.h b/src/dbinc/qam.h new file mode 100644 index 00000000..91de4185 --- /dev/null +++ b/src/dbinc/qam.h @@ -0,0 +1,199 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_QAM_H_ +#define _DB_QAM_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * QAM data elements: a status field and the data. + */ +typedef struct _qamdata { + u_int8_t flags; /* 00: delete bit. */ +#define QAM_VALID 0x01 +#define QAM_SET 0x02 + u_int8_t data[1]; /* Record. */ +} QAMDATA; + +struct __queue; typedef struct __queue QUEUE; +struct __qcursor; typedef struct __qcursor QUEUE_CURSOR; + +struct __qcursor { + /* struct __dbc_internal */ + __DBC_INTERNAL + + /* Queue private part */ + + /* Per-thread information: queue private. */ + db_recno_t recno; /* Current record number. */ + + u_int32_t flags; +}; + +typedef struct __mpfarray { + u_int32_t n_extent; /* Number of extents in table. */ + u_int32_t low_extent; /* First extent open. */ + u_int32_t hi_extent; /* Last extent open. */ + struct __qmpf { + int pinref; + DB_MPOOLFILE *mpf; + } *mpfarray; /* Array of open extents. */ +} MPFARRAY; + +/* + * The in-memory, per-tree queue data structure. + */ +struct __queue { + db_pgno_t q_meta; /* Database meta-data page. */ + db_pgno_t q_root; /* Database root page. */ + + int re_pad; /* Fixed-length padding byte. */ + u_int32_t re_len; /* Length for fixed-length records. */ + u_int32_t rec_page; /* records per page */ + u_int32_t page_ext; /* Pages per extent */ + MPFARRAY array1, array2; /* File arrays. */ + + /* Extent file configuration: */ + DBT pgcookie; /* Initialized pgcookie. */ + DB_PGINFO pginfo; /* Initialized pginfo struct. */ + + char *path; /* Space allocated to file pathname. */ + char *name; /* The name of the file. */ + char *dir; /* The dir of the file. */ + int mode; /* Mode to open extents. */ +}; + +/* Format for queue extent names. */ +#define QUEUE_EXTENT "%s%c__dbq.%s.%d" +#define QUEUE_EXTENT_HEAD "__dbq.%s." +#define QUEUE_EXTENT_PREFIX "__dbq." + +typedef struct __qam_filelist { + DB_MPOOLFILE *mpf; + u_int32_t id; +} QUEUE_FILELIST; + +/* + * Calculate the page number of a recno. + * + * Number of records per page = + * Divide the available space on the page by the record len + header. + * + * Page number for record = + * divide the physical record number by the records per page + * add the root page number + * For now the root page will always be 1, but we might want to change + * in the future (e.g. multiple fixed len queues per file). + * + * Index of record on page = + * physical record number, less the logical pno times records/page + */ +#define CALC_QAM_RECNO_PER_PAGE(dbp) \ + (((dbp)->pgsize - QPAGE_SZ(dbp)) / \ + (u_int32_t)DB_ALIGN((uintmax_t)SSZA(QAMDATA, data) + \ + ((QUEUE *)(dbp)->q_internal)->re_len, sizeof(u_int32_t))) + +#define QAM_RECNO_PER_PAGE(dbp) (((QUEUE*)(dbp)->q_internal)->rec_page) + +#define QAM_RECNO_PAGE(dbp, recno) \ + (((QUEUE *)(dbp)->q_internal)->q_root \ + + (((recno) - 1) / QAM_RECNO_PER_PAGE(dbp))) + +#define QAM_PAGE_EXTENT(dbp, pgno) \ + (((pgno) - 1) / ((QUEUE *)(dbp)->q_internal)->page_ext) + +#define QAM_RECNO_EXTENT(dbp, recno) \ + QAM_PAGE_EXTENT(dbp, QAM_RECNO_PAGE(dbp, recno)) + +#define QAM_RECNO_INDEX(dbp, pgno, recno) \ + (u_int32_t)(((recno) - 1) - (QAM_RECNO_PER_PAGE(dbp) \ + * (pgno - ((QUEUE *)(dbp)->q_internal)->q_root))) + +#define QAM_GET_RECORD(dbp, page, index) \ + ((QAMDATA *)((u_int8_t *)(page) + (QPAGE_SZ(dbp) + \ + (DB_ALIGN((uintmax_t)SSZA(QAMDATA, data) + \ + ((QUEUE *)(dbp)->q_internal)->re_len, sizeof(u_int32_t)) * index)))) + +#define QAM_AFTER_CURRENT(meta, recno) \ + ((recno) >= (meta)->cur_recno && \ + ((meta)->first_recno <= (meta)->cur_recno || \ + ((recno) < (meta)->first_recno && \ + (recno) - (meta)->cur_recno < (meta)->first_recno - (recno)))) + +#define QAM_BEFORE_FIRST(meta, recno) \ + ((recno) < (meta)->first_recno && \ + ((meta)->first_recno <= (meta)->cur_recno || \ + ((recno) > (meta)->cur_recno && \ + (recno) - (meta)->cur_recno > (meta)->first_recno - (recno)))) + +#define QAM_NOT_VALID(meta, recno) \ + (recno == RECNO_OOB || \ + QAM_BEFORE_FIRST(meta, recno) || QAM_AFTER_CURRENT(meta, recno)) + +#define QAM_WAKEUP(dbc, ret) do { \ + if (STD_LOCKING(dbc)) { \ + dbc->lock.pgno = PGNO_INVALID; \ + dbc->lock.type = DB_PAGE_LOCK; \ + ret = __lock_wakeup((dbc)->dbp->env, &(dbc)->lock_dbt); \ + } else \ + ret = 0; \ +} while (0) + +/* Handle wrap around. */ +#define QAM_INC_RECNO(recno) do { \ + recno++; \ +} while (recno == RECNO_OOB) + +#define QAM_DEC_RECNO(recno) do { \ + recno--; \ +} while (recno == RECNO_OOB) + + +/* + * Log opcodes for the mvptr routine. + */ +#define QAM_SETFIRST 0x01 +#define QAM_SETCUR 0x02 +#define QAM_TRUNCATE 0x04 + +typedef enum { + QAM_PROBE_GET, + QAM_PROBE_PUT, + QAM_PROBE_DIRTY, + QAM_PROBE_MPF +} qam_probe_mode; + +/* + * Ops for __qam_nameop. + */ +typedef enum { + QAM_NAME_DISCARD, + QAM_NAME_RENAME, + QAM_NAME_REMOVE +} qam_name_op; + +#define __qam_fget(dbc, pgnoaddr, flags, addrp) \ + __qam_fprobe(dbc, *pgnoaddr, \ + addrp, QAM_PROBE_GET, DB_PRIORITY_UNCHANGED, flags) + +#define __qam_fput(dbc, pgno, addrp, priority) \ + __qam_fprobe(dbc, pgno, addrp, QAM_PROBE_PUT, priority, 0) + +#define __qam_dirty(dbc, pgno, pagep, priority) \ + __qam_fprobe(dbc, pgno, pagep, QAM_PROBE_DIRTY, priority, 0) + +#if defined(__cplusplus) +} +#endif + +#include "dbinc_auto/qam_auto.h" +#include "dbinc_auto/qam_ext.h" +#endif /* !_DB_QAM_H_ */ diff --git a/src/dbinc/queue.h b/src/dbinc/queue.h new file mode 100644 index 00000000..0198febd --- /dev/null +++ b/src/dbinc/queue.h @@ -0,0 +1,570 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + * $FreeBSD: src/sys/sys/queue.h,v 1.54 2002/08/05 05:18:43 alfred Exp $ + */ + +#ifndef _DB_QUEUE_H_ +#define _DB_QUEUE_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * This file defines four types of data structures: singly-linked lists, + * singly-linked tail queues, lists and tail queues. + * + * A singly-linked list is headed by a single forward pointer. The elements + * are singly linked for minimum space and pointer manipulation overhead at + * the expense of O(n) removal for arbitrary elements. New elements can be + * added to the list after an existing element or at the head of the list. + * Elements being removed from the head of the list should use the explicit + * macro for this purpose for optimum efficiency. A singly-linked list may + * only be traversed in the forward direction. Singly-linked lists are ideal + * for applications with large datasets and few or no removals or for + * implementing a LIFO queue. + * + * A singly-linked tail queue is headed by a pair of pointers, one to the + * head of the list and the other to the tail of the list. The elements are + * singly linked for minimum space and pointer manipulation overhead at the + * expense of O(n) removal for arbitrary elements. New elements can be added + * to the list after an existing element, at the head of the list, or at the + * end of the list. Elements being removed from the head of the tail queue + * should use the explicit macro for this purpose for optimum efficiency. + * A singly-linked tail queue may only be traversed in the forward direction. + * Singly-linked tail queues are ideal for applications with large datasets + * and few or no removals or for implementing a FIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * For details on the use of these macros, see the queue(3) manual page. + * + * + * SLIST LIST STAILQ TAILQ + * _HEAD + + + + + * _HEAD_INITIALIZER + + + + + * _ENTRY + + + + + * _INIT + + + + + * _EMPTY + + + + + * _FIRST + + + + + * _NEXT + + + + + * _PREV - - - + + * _LAST - - + + + * _FOREACH + + + + + * _FOREACH_REVERSE - - - + + * _INSERT_HEAD + + + + + * _INSERT_BEFORE - + - + + * _INSERT_AFTER + + + + + * _INSERT_TAIL - - + + + * _CONCAT - - + + + * _REMOVE_HEAD + - + - + * _REMOVE + + + + + * + */ + +/* + * XXX + * We #undef all of the macros because there are incompatible versions of this + * file and these macros on various systems. What makes the problem worse is + * they are included and/or defined by system include files which we may have + * already loaded into Berkeley DB before getting here. For example, FreeBSD's + * includes its system , and VxWorks UnixLib.h defines + * several of the LIST_XXX macros. Visual C.NET 7.0 also defines some of these + * same macros in Vc7\PlatformSDK\Include\WinNT.h. Make sure we use ours. + */ +#undef LIST_EMPTY +#undef LIST_ENTRY +#undef LIST_FIRST +#undef LIST_FOREACH +#undef LIST_HEAD +#undef LIST_HEAD_INITIALIZER +#undef LIST_INIT +#undef LIST_INSERT_AFTER +#undef LIST_INSERT_BEFORE +#undef LIST_INSERT_HEAD +#undef LIST_NEXT +#undef LIST_REMOVE +#undef QMD_TRACE_ELEM +#undef QMD_TRACE_HEAD +#undef QUEUE_MACRO_DEBUG +#undef SLIST_EMPTY +#undef SLIST_ENTRY +#undef SLIST_FIRST +#undef SLIST_FOREACH +#undef SLIST_FOREACH_PREVPTR +#undef SLIST_HEAD +#undef SLIST_HEAD_INITIALIZER +#undef SLIST_INIT +#undef SLIST_INSERT_AFTER +#undef SLIST_INSERT_HEAD +#undef SLIST_NEXT +#undef SLIST_REMOVE +#undef SLIST_REMOVE_HEAD +#undef STAILQ_CONCAT +#undef STAILQ_EMPTY +#undef STAILQ_ENTRY +#undef STAILQ_FIRST +#undef STAILQ_FOREACH +#undef STAILQ_HEAD +#undef STAILQ_HEAD_INITIALIZER +#undef STAILQ_INIT +#undef STAILQ_INSERT_AFTER +#undef STAILQ_INSERT_HEAD +#undef STAILQ_INSERT_TAIL +#undef STAILQ_LAST +#undef STAILQ_NEXT +#undef STAILQ_REMOVE +#undef STAILQ_REMOVE_HEAD +#undef STAILQ_REMOVE_HEAD_UNTIL +#undef TAILQ_CONCAT +#undef TAILQ_EMPTY +#undef TAILQ_ENTRY +#undef TAILQ_FIRST +#undef TAILQ_FOREACH +#undef TAILQ_FOREACH_REVERSE +#undef TAILQ_HEAD +#undef TAILQ_HEAD_INITIALIZER +#undef TAILQ_INIT +#undef TAILQ_INSERT_AFTER +#undef TAILQ_INSERT_BEFORE +#undef TAILQ_INSERT_HEAD +#undef TAILQ_INSERT_TAIL +#undef TAILQ_LAST +#undef TAILQ_NEXT +#undef TAILQ_PREV +#undef TAILQ_REMOVE +#undef TRACEBUF +#undef TRASHIT + +#define QUEUE_MACRO_DEBUG 0 +#if QUEUE_MACRO_DEBUG +/* Store the last 2 places the queue element or head was altered */ +struct qm_trace { + char * lastfile; + int lastline; + char * prevfile; + int prevline; +}; + +#define TRACEBUF struct qm_trace trace; +#define TRASHIT(x) do {(x) = (void *)-1;} while (0) + +#define QMD_TRACE_HEAD(head) do { \ + (head)->trace.prevline = (head)->trace.lastline; \ + (head)->trace.prevfile = (head)->trace.lastfile; \ + (head)->trace.lastline = __LINE__; \ + (head)->trace.lastfile = __FILE__; \ +} while (0) + +#define QMD_TRACE_ELEM(elem) do { \ + (elem)->trace.prevline = (elem)->trace.lastline; \ + (elem)->trace.prevfile = (elem)->trace.lastfile; \ + (elem)->trace.lastline = __LINE__; \ + (elem)->trace.lastfile = __FILE__; \ +} while (0) + +#else +#define QMD_TRACE_ELEM(elem) +#define QMD_TRACE_HEAD(head) +#define TRACEBUF +#define TRASHIT(x) +#endif /* QUEUE_MACRO_DEBUG */ + +/* + * Singly-linked List declarations. + */ +#define SLIST_HEAD(name, type) \ +struct name { \ + struct type *slh_first; /* first element */ \ +} + +#define SLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define SLIST_ENTRY(type) \ +struct { \ + struct type *sle_next; /* next element */ \ +} + +/* + * Singly-linked List functions. + */ +#define SLIST_EMPTY(head) ((head)->slh_first == NULL) + +#define SLIST_FIRST(head) ((head)->slh_first) + +#define SLIST_FOREACH(var, head, field) \ + for ((var) = SLIST_FIRST((head)); \ + (var); \ + (var) = SLIST_NEXT((var), field)) + +#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \ + for ((varp) = &SLIST_FIRST((head)); \ + ((var) = *(varp)) != NULL; \ + (varp) = &SLIST_NEXT((var), field)) + +#define SLIST_INIT(head) do { \ + SLIST_FIRST((head)) = NULL; \ +} while (0) + +#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \ + SLIST_NEXT((slistelm), field) = (elm); \ +} while (0) + +#define SLIST_INSERT_HEAD(head, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \ + SLIST_FIRST((head)) = (elm); \ +} while (0) + +#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) + +#define SLIST_REMOVE(head, elm, type, field) do { \ + if (SLIST_FIRST((head)) == (elm)) { \ + SLIST_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = SLIST_FIRST((head)); \ + while (curelm != NULL && \ + SLIST_NEXT(curelm, field) != (elm)) \ + curelm = SLIST_NEXT(curelm, field); \ + if (curelm != NULL) \ + SLIST_NEXT(curelm, field) = \ + SLIST_NEXT(SLIST_NEXT(curelm, field), field);\ + } \ +} while (0) + +#define SLIST_REMOVE_HEAD(head, field) do { \ + SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \ +} while (0) + +/* + * Singly-linked Tail queue declarations. + */ +#define STAILQ_HEAD(name, type) \ +struct name { \ + struct type *stqh_first;/* first element */ \ + struct type **stqh_last;/* addr of last next element */ \ +} + +#define STAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).stqh_first } + +#define STAILQ_ENTRY(type) \ +struct { \ + struct type *stqe_next; /* next element */ \ +} + +/* + * Singly-linked Tail queue functions. + */ +#define STAILQ_CONCAT(head1, head2) do { \ + if (!STAILQ_EMPTY((head2))) { \ + *(head1)->stqh_last = (head2)->stqh_first; \ + (head1)->stqh_last = (head2)->stqh_last; \ + STAILQ_INIT((head2)); \ + } \ +} while (0) + +#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL) + +#define STAILQ_FIRST(head) ((head)->stqh_first) + +#define STAILQ_FOREACH(var, head, field) \ + for ((var) = STAILQ_FIRST((head)); \ + (var); \ + (var) = STAILQ_NEXT((var), field)) + +#define STAILQ_INIT(head) do { \ + STAILQ_FIRST((head)) = NULL; \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_NEXT((tqelm), field) = (elm); \ +} while (0) + +#define STAILQ_INSERT_HEAD(head, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_FIRST((head)) = (elm); \ +} while (0) + +#define STAILQ_INSERT_TAIL(head, elm, field) do { \ + STAILQ_NEXT((elm), field) = NULL; \ + *(head)->stqh_last = (elm); \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ +} while (0) + +#define STAILQ_LAST(head, type, field) \ + (STAILQ_EMPTY((head)) ? \ + NULL : \ + ((struct type *) \ + ((char *)((head)->stqh_last) - __offsetof(struct type, field)))) + +#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) + +#define STAILQ_REMOVE(head, elm, type, field) do { \ + if (STAILQ_FIRST((head)) == (elm)) { \ + STAILQ_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = STAILQ_FIRST((head)); \ + while (STAILQ_NEXT(curelm, field) != (elm)) \ + curelm = STAILQ_NEXT(curelm, field); \ + if ((STAILQ_NEXT(curelm, field) = \ + STAILQ_NEXT(STAILQ_NEXT(curelm, field), field)) == NULL)\ + (head)->stqh_last = &STAILQ_NEXT((curelm), field);\ + } \ +} while (0) + +#define STAILQ_REMOVE_HEAD(head, field) do { \ + if ((STAILQ_FIRST((head)) = \ + STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_REMOVE_HEAD_UNTIL(head, elm, field) do { \ + if ((STAILQ_FIRST((head)) = STAILQ_NEXT((elm), field)) == NULL) \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +/* + * List declarations. + */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define LIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ + +#define LIST_EMPTY(head) ((head)->lh_first == NULL) + +#define LIST_FIRST(head) ((head)->lh_first) + +#define LIST_FOREACH(var, head, field) \ + for ((var) = LIST_FIRST((head)); \ + (var); \ + (var) = LIST_NEXT((var), field)) + +#define LIST_INIT(head) do { \ + LIST_FIRST((head)) = NULL; \ +} while (0) + +#define LIST_INSERT_AFTER(listelm, elm, field) do { \ + if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\ + LIST_NEXT((listelm), field)->field.le_prev = \ + &LIST_NEXT((elm), field); \ + LIST_NEXT((listelm), field) = (elm); \ + (elm)->field.le_prev = &LIST_NEXT((listelm), field); \ +} while (0) + +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + LIST_NEXT((elm), field) = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &LIST_NEXT((elm), field); \ +} while (0) + +#define LIST_INSERT_HEAD(head, elm, field) do { \ + if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \ + LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\ + LIST_FIRST((head)) = (elm); \ + (elm)->field.le_prev = &LIST_FIRST((head)); \ +} while (0) + +#define LIST_NEXT(elm, field) ((elm)->field.le_next) + +#define LIST_REMOVE(elm, field) do { \ + if (LIST_NEXT((elm), field) != NULL) \ + LIST_NEXT((elm), field)->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = LIST_NEXT((elm), field); \ +} while (0) + +/* + * Tail queue declarations. + */ +#define TAILQ_HEAD(name, type) \ +struct name { \ + struct type *tqh_first; /* first element */ \ + struct type **tqh_last; /* addr of last next element */ \ + TRACEBUF \ +} + +#define TAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).tqh_first } + +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ + TRACEBUF \ +} + +/* + * Tail queue functions. + */ +#define TAILQ_CONCAT(head1, head2, field) do { \ + if (!TAILQ_EMPTY(head2)) { \ + *(head1)->tqh_last = (head2)->tqh_first; \ + (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ + (head1)->tqh_last = (head2)->tqh_last; \ + TAILQ_INIT((head2)); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_HEAD(head2); \ + } \ +} while (0) + +#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) + +#define TAILQ_FIRST(head) ((head)->tqh_first) + +#define TAILQ_FOREACH(var, head, field) \ + for ((var) = TAILQ_FIRST((head)); \ + (var); \ + (var) = TAILQ_NEXT((var), field)) + +#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var); \ + (var) = TAILQ_PREV((var), headname, field)) + +#define TAILQ_INIT(head) do { \ + TAILQ_FIRST((head)) = NULL; \ + (head)->tqh_last = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ +} while (0) + +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else { \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + } \ + TAILQ_NEXT((listelm), field) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + TAILQ_NEXT((elm), field) = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_HEAD(head, elm, field) do { \ + if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ + TAILQ_FIRST((head))->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + TAILQ_FIRST((head)) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_INSERT_TAIL(head, elm, field) do { \ + TAILQ_NEXT((elm), field) = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) + +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + +#define TAILQ_REMOVE(head, elm, field) do { \ + if ((TAILQ_NEXT((elm), field)) != NULL) \ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else { \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + QMD_TRACE_HEAD(head); \ + } \ + *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ + TRASHIT((elm)->field.tqe_next); \ + TRASHIT((elm)->field.tqe_prev); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_QUEUE_H_ */ diff --git a/src/dbinc/region.h b/src/dbinc/region.h new file mode 100644 index 00000000..001be762 --- /dev/null +++ b/src/dbinc/region.h @@ -0,0 +1,327 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_REGION_H_ +#define _DB_REGION_H_ + +/* + * The DB environment consists of some number of "regions", which are described + * by the following four structures: + * + * REGENV -- shared information about the environment + * REGENV_REF -- file describing system memory version of REGENV + * REGION -- shared information about a single region + * REGINFO -- per-process information about a REGION + * + * There are three types of memory that hold regions: + * per-process heap (malloc) + * file mapped into memory (mmap, MapViewOfFile) + * system memory (shmget, CreateFileMapping) + * + * By default, regions are created in filesystem-backed shared memory. They + * can also be created in system shared memory (DB_SYSTEM_MEM), or, if private + * to a process, in heap memory (DB_PRIVATE). + * + * Regions in the filesystem are named "__db.001", "__db.002" and so on. If + * we're not using a private environment allocated in heap, "__db.001" will + * always exist, as we use it to synchronize on the regions, whether they are + * in filesystem-backed memory or system memory. + * + * The file "__db.001" contains a REGENV structure pointing to an + * array of REGION structures. Each REGION structures describes an + * underlying chunk of shared memory. + * + * __db.001 + * +---------+ + * |REGENV | + * +---------+ + * | + * \/ + * +---------+ +----------+ + * |REGION |-> | __db.001 | + * | | +----------+ + * +---------+ +----------+ + * |REGION |-> | __db.002 | + * | | +----------+ + * +---------+ +----------+ + * |REGION |-> | __db.003 | + * | | +----------+ + * +---------+ +----------+ + * |REGION |-> | __db.004 | + * | | +----------+ + * +---------+ + * + * The tricky part about manipulating the regions is creating or joining the + * database environment. We have to be sure only a single thread of control + * creates and/or recovers a database environment. All other threads should + * then join without seeing inconsistent data. + * + * We do this in two parts: first, we use the underlying O_EXCL flag to the + * open system call to serialize creation of the __db.001 file. The thread + * of control creating that file then proceeds to create the remaining + * regions in the environment, including the mutex region. Once the mutex + * region has been created, the creating thread of control fills in the + * __db.001 file's magic number. Other threads of control (the ones that + * didn't create the __db.001 file), wait on the initialization of the + * __db.001 file's magic number. After it has been initialized, all threads + * of control can proceed, using normal shared mutex locking procedures for + * exclusion. + * + * REGIONs are not moved or removed during the life of the environment, and + * so processes can have long-lived references to them. + * + * One of the REGION structures describes the environment region itself. + * + * The REGION array is not locked in any way. It's an array so we don't have + * to manipulate data structures after a crash -- on some systems, we have to + * join and clean up the mutex region after application failure. Using an + * array means we don't have to worry about broken links or other nastiness + * after the failure. + * + * All requests to create or join a region return a REGINFO structure, which + * is held by the caller and used to open and subsequently close the reference + * to the region. The REGINFO structure contains the per-process information + * that we need to access the region. + * + * The one remaining complication. If the regions (including the environment + * region) live in system memory, and the system memory isn't "named" somehow + * in the filesystem name space, we need some way of finding it. Do this by + * by writing the REGENV_REF structure into the "__db.001" file. When we find + * a __db.001 file that is too small to be a real, on-disk environment, we use + * the information it contains to redirect to the real "__db.001" file/memory. + * This currently only happens when the REGENV file is in shared system memory. + * + * Although DB does not currently grow regions when they run out of memory, it + * would be possible to do so. To grow a region, allocate a new region of the + * appropriate size, then copy the old region over it and insert the additional + * memory into the already existing shalloc arena. Region users must reset + * their base addresses and any local pointers into the memory, of course. + * This failed in historic versions of DB because the region mutexes lived in + * the mapped memory, and when it was unmapped and remapped (or copied), + * threads could lose track of it. Also, some systems didn't support mutex + * copying, e.g., from OSF1 V4.0: + * + * The address of an msemaphore structure may be significant. If the + * msemaphore structure contains any value copied from an msemaphore + * structure at a different address, the result is undefined. + * + * All mutexes are now maintained in a separate region which is never unmapped, + * so growing regions should be possible. + */ + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DB_REGION_PREFIX "__db" /* DB file name prefix. */ +#define DB_REGION_FMT "__db.%03d" /* Region file name format. */ +#define DB_REGION_ENV "__db.001" /* Primary environment name. */ + +#define INVALID_REGION_ID 0 /* Out-of-band region ID. */ +#define REGION_ID_ENV 1 /* Primary environment ID. */ + +typedef enum { + INVALID_REGION_TYPE=0, /* Region type. */ + REGION_TYPE_ENV, + REGION_TYPE_LOCK, + REGION_TYPE_LOG, + REGION_TYPE_MPOOL, + REGION_TYPE_MUTEX, + REGION_TYPE_TXN } reg_type_t; + +#define INVALID_REGION_SEGID -1 /* Segment IDs are either shmget(2) or + * Win16 segment identifiers. They are + * both stored in a "long", and we need + * an out-of-band value. + */ +/* + * Nothing can live at region offset 0, because, in all cases, that's where + * we store *something*. Lots of code needs an out-of-band value for region + * offsets, so we use 0. + */ +#define INVALID_ROFF 0 + +/* Reference describing system memory version of REGENV. */ +typedef struct __db_reg_env_ref { + roff_t size; /* Region size. */ + roff_t max; /* Region max in bytes. */ + long segid; /* UNIX shmget ID, VxWorks ID. */ +} REGENV_REF; + +/* Per-environment region information. */ +typedef struct __db_reg_env { /* SHARED */ + /* + * !!! + * The magic, panic, version, envid and signature fields of the region + * are fixed in size, the timestamp field is the first field which is + * variable length. These fields must never change in order, to + * guarantee we can always read them, no matter what release we have. + * + * !!! + * The magic and panic fields are NOT protected by any mutex, and for + * this reason cannot be anything more complicated than zero/non-zero. + */ + u_int32_t magic; /* Valid region magic number. */ + u_int32_t panic; /* Environment is dead. */ + + u_int32_t majver; /* Major DB version number. */ + u_int32_t minver; /* Minor DB version number. */ + u_int32_t patchver; /* Patch DB version number. */ + + u_int32_t envid; /* Unique environment ID. */ + + u_int32_t signature; /* Structure signatures. */ + + time_t timestamp; /* Creation time. */ + + /* + * Flags saved in the init_flags field of the environment, representing + * flags to DB_ENV->set_flags and DB_ENV->open that need to be set. + */ + u_int32_t init_flags; +#define DB_INITENV_CDB 0x0001 /* DB_INIT_CDB */ +#define DB_INITENV_CDB_ALLDB 0x0002 /* DB_INIT_CDB_ALLDB */ +#define DB_INITENV_LOCK 0x0004 /* DB_INIT_LOCK */ +#define DB_INITENV_LOG 0x0008 /* DB_INIT_LOG */ +#define DB_INITENV_MPOOL 0x0010 /* DB_INIT_MPOOL */ +#define DB_INITENV_REP 0x0020 /* DB_INIT_REP */ +#define DB_INITENV_TXN 0x0040 /* DB_INIT_TXN */ + + + /* + * The mtx_regenv mutex protects the environment reference count and + * memory allocation from the primary shared region (the crypto, thread + * control block and replication implementations allocate memory from + * the primary shared region). + * + * The rest of the fields are initialized at creation time, and don't + * need mutex protection. The flags, op_timestamp and rep_timestamp + * fields are used by replication only and are protected by the + * replication mutex. The rep_timestamp is is not protected when it + * is used in recovery as that is already single threaded. + */ + db_mutex_t mtx_regenv; /* Refcnt, region allocation mutex. */ + u_int32_t refcnt; /* References to the environment. */ + + u_int32_t region_cnt; /* Number of REGIONs. */ + roff_t region_off; /* Offset of region array */ + roff_t lt_primary; /* Lock primary. */ + roff_t lg_primary; /* Log primary. */ + roff_t tx_primary; /* Txn primary. */ + + roff_t cipher_off; /* Offset of cipher area */ + + roff_t thread_off; /* Offset of the thread area. */ + + roff_t rep_off; /* Offset of the replication area. */ +#define DB_REGENV_REPLOCKED 0x0001 /* Env locked for rep backup. */ + u_int32_t flags; /* Shared environment flags. */ +#define DB_REGENV_TIMEOUT 30 /* Backup timeout. */ + time_t op_timestamp; /* Timestamp for operations. */ + time_t rep_timestamp; /* Timestamp for rep db handles. */ + u_int32_t reg_panic; /* DB_REGISTER triggered panic */ + uintmax_t unused; /* The ALLOC_LAYOUT structure follows + * the REGENV structure in memory and + * contains uintmax_t fields. Force + * proper alignment of that structure. + */ +} REGENV; + +/* Per-region shared region information. */ +typedef struct __db_region { /* SHARED */ + roff_t size; /* Region size in bytes. */ + roff_t max; /* Region max in bytes. */ + long segid; /* UNIX shmget(2), Win16 segment ID. */ + + u_int32_t id; /* Region id. */ + reg_type_t type; /* Region type. */ + + roff_t primary; /* Primary data structure offset. */ + roff_t alloc; /* Region allocation size in bytes. */ +} REGION; + +/* + * Per-process/per-attachment information about a single region. + */ + +/* + * Structure used for tracking allocations in DB_PRIVATE regions. + */ +struct __db_region_mem_t; typedef struct __db_region_mem_t REGION_MEM; +struct __db_region_mem_t { + REGION_MEM *next; +}; + +struct __db_reginfo_t { /* __env_region_attach IN parameters. */ + ENV *env; /* Enclosing environment. */ + reg_type_t type; /* Region type. */ + u_int32_t id; /* Region id. */ + + /* env_region_attach OUT parameters. */ + REGION *rp; /* Shared region. */ + + char *name; /* Region file name. */ + DB_FH *fhp; /* Region file handle */ + + void *addr; /* Region address. */ + void *head; /* Head of the allocation struct. */ + void *primary; /* Primary data structure address. */ + + /* Private Memory Tracking. */ + size_t max_alloc; /* Maximum bytes allocated. */ + size_t allocated; /* Bytes allocated. */ + REGION_MEM *mem; /* List of memory to free */ + + db_mutex_t mtx_alloc; /* number of mutex for allocation. */ + +#ifdef DB_WIN32 + HANDLE wnt_handle; /* Win/NT HANDLE. */ +#endif + +#define REGION_CREATE 0x01 /* Caller created region. */ +#define REGION_CREATE_OK 0x02 /* Caller willing to create region. */ +#define REGION_JOIN_OK 0x04 /* Caller is looking for a match. */ +#define REGION_SHARED 0x08 /* Region is shared. */ +#define REGION_TRACKED 0x10 /* Region private memory is tracked. */ + u_int32_t flags; +}; + +/* + * R_ADDR Return a per-process address for a shared region offset. + * R_OFFSET Return a shared region offset for a per-process address. + */ +#define R_ADDR(reginfop, offset) \ + (F_ISSET((reginfop)->env, ENV_PRIVATE) ? \ + ROFF_TO_P(offset) : \ + (void *)((u_int8_t *)((reginfop)->addr) + (offset))) +#define R_OFFSET(reginfop, p) \ + (F_ISSET((reginfop)->env, ENV_PRIVATE) ? \ + P_TO_ROFF(p) : \ + (roff_t)((u_int8_t *)(p) - (u_int8_t *)(reginfop)->addr)) + +/* + * PANIC_ISSET, PANIC_CHECK: + * Check to see if the DB environment is dead. + */ +#define PANIC_ISSET(env) \ + ((env) != NULL && (env)->reginfo != NULL && \ + ((REGENV *)(env)->reginfo->primary)->panic != 0 && \ + !F_ISSET((env)->dbenv, DB_ENV_NOPANIC)) + +#define PANIC_CHECK(env) \ + if (PANIC_ISSET(env)) \ + return (__env_panic_msg(env)); + +#define PANIC_CHECK_RET(env, ret) \ + if (PANIC_ISSET(env)) \ + ret = (__env_panic_msg(env)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_REGION_H_ */ diff --git a/src/dbinc/rep.h b/src/dbinc/rep.h new file mode 100644 index 00000000..d6750872 --- /dev/null +++ b/src/dbinc/rep.h @@ -0,0 +1,1087 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_REP_H_ +#define _DB_REP_H_ + +#include "dbinc_auto/rep_automsg.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Names of client temp databases. + */ +#define REPFILEPREFIX "__db.rep" +#define REPDBNAME "__db.rep.db" +#define REPPAGENAME "__db.reppg.db" + +/* + * Name of replicated system database file, and LSN history subdatabase within + * it. If the INMEM config flag is set, we create the database in memory, with + * the REPLSNHIST name (so that is why it also follows the __db naming + * convention). + */ +#define REPSYSDBNAME "__db.rep.system" +#define REPLSNHIST "__db.lsn.history" +#define REPMEMBERSHIP "__db.membership" +#define REPSYSDBPGSZ 1024 + +/* Current version of commit token format, and LSN history database format. */ +#define REP_COMMIT_TOKEN_FMT_VERSION 1 +#define REP_LSN_HISTORY_FMT_VERSION 1 + +/* + * Message types + */ +#define REP_INVALID 0 /* Invalid message type. */ +#define REP_ALIVE 1 /* I am alive message. */ +#define REP_ALIVE_REQ 2 /* Request for alive messages. */ +#define REP_ALL_REQ 3 /* Request all log records greater than LSN. */ +#define REP_BULK_LOG 4 /* Bulk transfer of log records. */ +#define REP_BULK_PAGE 5 /* Bulk transfer of pages. */ +#define REP_DUPMASTER 6 /* Duplicate master detected; propagate. */ +#define REP_FILE 7 /* Page of a database file. NOTUSED */ +#define REP_FILE_FAIL 8 /* File requested does not exist. */ +#define REP_FILE_REQ 9 /* Request for a database file. NOTUSED */ +#define REP_LEASE_GRANT 10 /* Client grants a lease to a master. */ +#define REP_LOG 11 /* Log record. */ +#define REP_LOG_MORE 12 /* There are more log records to request. */ +#define REP_LOG_REQ 13 /* Request for a log record. */ +#define REP_MASTER_REQ 14 /* Who is the master */ +#define REP_NEWCLIENT 15 /* Announces the presence of a new client. */ +#define REP_NEWFILE 16 /* Announce a log file change. */ +#define REP_NEWMASTER 17 /* Announces who the master is. */ +#define REP_NEWSITE 18 /* Announces that a site has heard from a new + * site; like NEWCLIENT, but indirect. A + * NEWCLIENT message comes directly from the new + * client while a NEWSITE comes indirectly from + * someone who heard about a NEWSITE. + */ +#define REP_PAGE 19 /* Database page. */ +#define REP_PAGE_FAIL 20 /* Requested page does not exist. */ +#define REP_PAGE_MORE 21 /* There are more pages to request. */ +#define REP_PAGE_REQ 22 /* Request for a database page. */ +#define REP_REREQUEST 23 /* Force rerequest. */ +#define REP_START_SYNC 24 /* Tell client to begin syncing a ckp.*/ +#define REP_UPDATE 25 /* Environment hotcopy information. */ +#define REP_UPDATE_REQ 26 /* Request for hotcopy information. */ +#define REP_VERIFY 27 /* A log record for verification. */ +#define REP_VERIFY_FAIL 28 /* The client is outdated. */ +#define REP_VERIFY_REQ 29 /* Request for a log record to verify. */ +#define REP_VOTE1 30 /* Send out your information for an election. */ +#define REP_VOTE2 31 /* Send a "you are master" vote. */ +/* + * Maximum message number for conversion tables. Update this + * value as the largest message number above increases. + * It might make processing messages more straightforward if + * the *_MORE and BULK* messages were flags within the regular + * message type instead of separate message types themselves. + * + * !!! + * NOTE: When changing messages above, the two tables for upgrade support + * need adjusting. They are in rep_util.c. + */ +#define REP_MAX_MSG 31 + +/* + * This is the list of client-to-client requests messages. + * We use this to decide if we're doing client-to-client and + * might need to send a rerequest. + */ +#define REP_MSG_REQ(rectype) \ + (rectype == REP_ALL_REQ || \ + rectype == REP_LOG_REQ || \ + rectype == REP_PAGE_REQ || \ + rectype == REP_VERIFY_REQ) + +/* + * Note that the version information should be at the beginning of the + * structure, so that we can rearrange the rest of it while letting the + * version checks continue to work. DB_REPVERSION should be revved any time + * the rest of the structure changes or when the message numbers change. + * + * Define also, the corresponding log versions that are tied to the + * replication/release versions. These are only needed in replication + * and that is why they're defined here. db_printlog takes notice as well. + */ +#define DB_LOGVERSION_42 8 +#define DB_LOGVERSION_43 10 +#define DB_LOGVERSION_44 11 +#define DB_LOGVERSION_45 12 +#define DB_LOGVERSION_46 13 +#define DB_LOGVERSION_47 14 +#define DB_LOGVERSION_48 15 +#define DB_LOGVERSION_48p2 16 +#define DB_LOGVERSION_50 17 +#define DB_LOGVERSION_51 17 +#define DB_LOGVERSION_52 18 +#define DB_LOGVERSION_MIN DB_LOGVERSION_44 +#define DB_REPVERSION_INVALID 0 +#define DB_REPVERSION_44 3 +#define DB_REPVERSION_45 3 +#define DB_REPVERSION_46 4 +#define DB_REPVERSION_47 5 +#define DB_REPVERSION_48 5 +#define DB_REPVERSION_50 5 +#define DB_REPVERSION_51 5 +#define DB_REPVERSION_52 6 +#define DB_REPVERSION DB_REPVERSION_52 +#define DB_REPVERSION_MIN DB_REPVERSION_44 + +/* + * RPRINT - Replication diagnostic output + * VPRINT - Replication verbose output (superset of RPRINT). + * REP_PRINT_MESSAGE + * Macros for verbose replication messages. + * + * Everything using RPRINT will go to the system diag file (if it + * is configured) and also to the user's verbose output if + * they have that verbose level configured. + * Messages using VPRINT do not ever go to the system diag file, + * but will go to the user's verbose output if configured. + * + * Use VPRINT for anything that might be printed on a standard, + * successful transaction. Use RPRINT for error paths, rep + * state changes, elections, etc. + */ +#define REP_DIAGNAME "__db.rep.diag%02d" +#define REP_DIAGSIZE MEGABYTE +#define RPRINT(env, x) do { \ + if ((env)->dbenv->verbose != 0) \ + (void)__rep_print_system x; \ +} while (0) +#define VPRINT(env, x) do { \ + if ((env)->dbenv->verbose != 0) \ + (void)__rep_print x; \ +} while (0) +#define REP_PRINT_MESSAGE(env, eid, rp, str, fl) do { \ + if ((env)->dbenv->verbose != 0) \ + __rep_print_message(env, eid, rp, str, fl); \ +} while (0) + +/* + * Election gen file name + * The file contains an egen number for an election this client has NOT + * participated in. I.e. it is the number of a future election. We + * create it when we create the rep region, if it doesn't already exist + * and initialize egen to 1. If it does exist, we read it when we create + * the rep region. We write it immediately before sending our VOTE1 in + * an election. That way, if a client has ever sent a vote for any + * election, the file is already going to be updated to reflect a future + * election, should it crash. + */ +#define REP_EGENNAME "__db.rep.egen" +#define REP_GENNAME "__db.rep.gen" + +/* + * Internal init flag file name: + * The existence of this file serves as an indication that the client is in the + * process of Internal Initialization, in case it crashes before completing. + * During internal init the client's partially reconstructed database pages and + * logs may be in an inconsistent state, so much so that running recovery must + * be avoided. Furthermore, there is no other way to reliably recognize this + * condition. Therefore, when we open an environment, and we're just about to + * run recovery, we check for this file first. If it exists we must discard all + * logs and databases. This avoids the recovery problems, and leads to a fresh + * attempt at internal init if the environment becomes a replication client and + * finds a master. The list of databases which may need to be removed is stored + * in this file. + */ +#define REP_INITNAME "__db.rep.init" +#define REP_INITVERSION_46 1 +#define REP_INITVERSION_47 2 +#define REP_INITVERSION 3 + +/* + * Database types for __rep_client_dbinit + */ +typedef enum { + REP_DB, /* Log record database. */ + REP_PG /* Pg database. */ +} repdb_t; + +/* Macros to lock/unlock the replication region as a whole. */ +#define REP_SYSTEM_LOCK(env) \ + MUTEX_LOCK(env, (env)->rep_handle->region->mtx_region) +#define REP_SYSTEM_UNLOCK(env) \ + MUTEX_UNLOCK(env, (env)->rep_handle->region->mtx_region) + +/* + * Macros for manipulating the event synchronization. We use a separate mutex + * so that an application's call-back function can be invoked without locking + * the whole region. + */ +#define REP_EVENT_LOCK(env) \ + MUTEX_LOCK(env, (env)->rep_handle->region->mtx_event) +#define REP_EVENT_UNLOCK(env) \ + MUTEX_UNLOCK(env, (env)->rep_handle->region->mtx_event) + +/* + * Synchronization states + * Please change __rep_syncstate_to_string (rep_stat.c) to track any changes + * made to these states. + * + * The states are in alphabetical order (except for OFF). The usual + * order of progression for a full internal init is: + * VERIFY, UPDATE, PAGE, LOG (then back to OFF when we're done). + */ +typedef enum { + SYNC_OFF, /* No recovery. */ + SYNC_LOG, /* Recovery - log. */ + SYNC_PAGE, /* Recovery - pages. */ + SYNC_UPDATE, /* Recovery - update. */ + SYNC_VERIFY /* Recovery - verify. */ +} repsync_t; + +/* + * A record of the contents of the VOTE1 msg we sent out at current egen, in + * case we need to send out a duplicate VOTE1 to a late-joining client in a full + * election. The nsites, nvotes, and priority fields of the REP struct can't be + * used, because those could change. It's only safe to send out a dup if we + * send out the exact same info. + */ +typedef struct { + DB_LSN lsn; + u_int32_t nsites; + u_int32_t nvotes; + u_int32_t priority; + u_int32_t tiebreaker; + u_int32_t ctlflags; + u_int32_t data_gen; +} VOTE1_CONTENT; + +/* + * REP -- + * Shared replication structure. + */ +typedef struct __rep { /* SHARED */ + db_mutex_t mtx_region; /* Region mutex. */ + db_mutex_t mtx_clientdb; /* Client database mutex. */ + db_mutex_t mtx_ckp; /* Checkpoint mutex. */ + db_mutex_t mtx_diag; /* Diagnostic message mutex. */ + db_mutex_t mtx_repstart; /* Role change mutex. */ + int diag_index; /* Diagnostic file index. */ + off_t diag_off; /* Diagnostic message offset. */ + roff_t lease_off; /* Offset of the lease table. */ + roff_t tally_off; /* Offset of the tally region. */ + roff_t v2tally_off; /* Offset of the vote2 tally region. */ + int eid; /* Environment id. */ + int master_id; /* ID of the master site. */ + u_int32_t version; /* Current replication version. */ + u_int32_t egen; /* Replication election generation. */ + u_int32_t spent_egen; /* Egen satisfied by rep_elect call. */ + u_int32_t gen; /* Replication generation number. */ + u_int32_t mgen; /* Master gen seen by client. */ + u_int32_t asites; /* Space allocated for sites. */ + u_int32_t nsites; /* Number of sites in group. */ + u_int32_t nvotes; /* Number of votes needed. */ + u_int32_t priority; /* My priority in an election. */ + u_int32_t config_nsites; + + db_timeout_t elect_timeout; /* Normal/full election timeouts. */ + db_timeout_t full_elect_timeout; + + db_timeout_t chkpt_delay; /* Master checkpoint delay. */ + +#define REP_DEFAULT_THROTTLE (10 * MEGABYTE) /* Default value is < 1Gig. */ + u_int32_t gbytes; /* Limit on data sent in single... */ + u_int32_t bytes; /* __rep_process_message call. */ +#define DB_REP_REQUEST_GAP 40000 /* 40 msecs */ +#define DB_REP_MAX_GAP 1280000 /* 1.28 seconds */ + db_timespec request_gap; /* Minimum time to wait before we + * request a missing log record. */ + db_timespec max_gap; /* Maximum time to wait before + * requesting a missing log record. */ + /* Status change information */ + u_int32_t apply_th; /* Number of callers in rep_apply. */ + u_int32_t arch_th; /* Number of callers in log_archive. */ + u_int32_t elect_th; /* Elect threads in lock-out. */ + u_int32_t msg_th; /* Number of callers in rep_proc_msg.*/ + u_int32_t handle_cnt; /* Count of handles in library. */ + u_int32_t op_cnt; /* Multi-step operation count.*/ + DB_LSN ckp_lsn; /* LSN for syncing a checkpoint. */ + DB_LSN max_prep_lsn; /* Max LSN of txn_prepare record. */ + + /* + * Event notification synchronization: the mtx_event and associate + * fields which it protects govern event notification to the + * application. They form a guarantee that no matter how crazy the + * thread scheduling gets, the application sees a sensible, orderly + * progression of events. + */ + db_mutex_t mtx_event; /* Serializes event notification. */ + /* + * Latest generation whose NEWMASTER event the application has been + * notified of. Also serves to force STARTUPDONE to occur after + * NEWMASTER. + */ + u_int32_t newmaster_event_gen; + /* + * Latest local victory of an election that the application has been + * notified of, expressed as the election generation number. This + * ensures we notify the application exactly once when it wins an + * election. + */ + u_int32_t notified_egen; + + /* Internal init information. */ + u_int32_t nfiles; /* Number of files we have info on. */ + u_int32_t curfile; /* Cur file we're getting (0-based). */ + roff_t originfo_off; /* Offset of original file info. */ + u_int32_t infolen; /* Remaining length file info buffer. */ + u_int32_t originfolen; /* Original length file info buffer. */ + u_int32_t infoversion; /* Original file info version. */ + DB_LSN first_lsn; /* Earliest LSN we need. */ + u_int32_t first_vers; /* Log version of first log file. */ + DB_LSN last_lsn; /* Latest LSN we need. */ + /* These are protected by mtx_clientdb. */ + db_timespec last_pg_ts; /* Last page stored timestamp. */ + db_pgno_t ready_pg; /* Next pg expected. */ + db_pgno_t waiting_pg; /* First pg after gap. */ + db_pgno_t max_wait_pg; /* Maximum pg requested. */ + u_int32_t npages; /* Num of pages rcvd for this file. */ + roff_t curinfo_off; /* Offset of current file info. */ + /* Always access with GET_CURINFO(). */ + + /* Vote tallying information. */ + u_int32_t sites; /* Sites heard from. */ + int winner; /* Current winner EID. */ + u_int32_t w_priority; /* Winner priority. */ + u_int32_t w_gen; /* Winner generation. */ + u_int32_t w_datagen; /* Winner data generation. */ + DB_LSN w_lsn; /* Winner LSN. */ + u_int32_t w_tiebreaker; /* Winner tiebreaking value. */ + u_int32_t votes; /* Number of votes for this site. */ + + VOTE1_CONTENT vote1; /* Valid until rep->egen changes. */ + + db_timespec etime; /* Election start timestamp. */ + int full_elect; /* Is current election a "full" one? */ + + /* Leases. */ + db_timeout_t lease_timeout; /* Lease timeout. */ + db_timespec lease_duration; /* Lease timeout with clock skew. */ + u_int32_t clock_skew; /* Clock skew. */ + u_int32_t clock_base; /* Clock scale factor base. */ + db_timespec grant_expire; /* Local grant expiration time. */ + + /* Cached LSN history, matching current gen. */ + DB_LSN gen_base_lsn; /* Base LSN of current generation. */ + u_int32_t master_envid; /* Current master's "unique" env ID. */ + + SH_TAILQ_HEAD(__wait) waiters; /* List of threads in txn_applied(). */ + SH_TAILQ_HEAD(__wfree) free_waiters;/* Free list of waiter structs. */ + +#ifdef HAVE_REPLICATION_THREADS + /* + * Replication Framework (repmgr) shared config information. + */ + db_mutex_t mtx_repmgr; /* Region mutex. */ + roff_t siteinfo_off; /* Offset of site array region. */ + u_int site_cnt; /* Array slots in use. */ + u_int site_max; /* Total array slots allocated. */ + int self_eid; /* Where to find the local site. */ + u_int siteinfo_seq; /* Number of updates to this info. */ + + pid_t listener; + + int perm_policy; + db_timeout_t ack_timeout; + db_timeout_t election_retry_wait; + db_timeout_t connection_retry_wait; + db_timeout_t heartbeat_frequency; /* Max period between msgs. */ + db_timeout_t heartbeat_monitor_timeout; +#endif /* HAVE_REPLICATION_THREADS */ + + /* Statistics. */ + DB_REP_STAT stat; +#if defined(HAVE_REPLICATION_THREADS) && defined(HAVE_STATISTICS) + DB_REPMGR_STAT mstat; +#endif + + /* + * Please change __rep_print_all (rep_stat.c) to track any changes made + * to all these flag families below. + */ + /* Configuration. */ +#define REP_C_2SITE_STRICT 0x00001 /* Don't cheat on elections. */ +#define REP_C_AUTOINIT 0x00002 /* Auto initialization. */ +#define REP_C_AUTOROLLBACK 0x00004 /* Discard client txns: sync. */ +#define REP_C_BULK 0x00008 /* Bulk transfer. */ +#define REP_C_DELAYCLIENT 0x00010 /* Delay client sync-up. */ +#define REP_C_ELECTIONS 0x00020 /* Repmgr to use elections. */ +#define REP_C_INMEM 0x00040 /* In-memory replication. */ +#define REP_C_LEASE 0x00080 /* Leases configured. */ +#define REP_C_NOWAIT 0x00100 /* Immediate error return. */ + u_int32_t config; /* Configuration flags. */ + + /* Election. */ +#define REP_E_PHASE0 0x00000001 /* In phase 0 of election. */ +#define REP_E_PHASE1 0x00000002 /* In phase 1 of election. */ +#define REP_E_PHASE2 0x00000004 /* In phase 2 of election. */ +#define REP_E_TALLY 0x00000008 /* Tallied vote before elect. */ + u_int32_t elect_flags; /* Election flags. */ + + /* Lockout. */ +#define REP_LOCKOUT_API 0x00000001 /* BDB API - handle_cnt. */ +#define REP_LOCKOUT_APPLY 0x00000002 /* apply msgs - apply_th. */ +#define REP_LOCKOUT_ARCHIVE 0x00000004 /* log_archive. */ +#define REP_LOCKOUT_MSG 0x00000008 /* Message process - msg_th. */ +#define REP_LOCKOUT_OP 0x00000010 /* BDB ops txn,curs - op_cnt. */ + u_int32_t lockout_flags; /* Lockout flags. */ + + /* See above for enumerated sync states. */ + repsync_t sync_state; /* Recovery/synchronization flags. */ + + /* + * When adding a new flag value, consider whether it should be + * cleared in rep_start() when starting as a master or a client. + */ +#define REP_F_ABBREVIATED 0x00000001 /* Recover NIMDB pages only. */ +#define REP_F_APP_BASEAPI 0x00000002 /* Base API application. */ +#define REP_F_APP_REPMGR 0x00000004 /* repmgr application. */ +#define REP_F_CLIENT 0x00000008 /* Client replica. */ +#define REP_F_DELAY 0x00000010 /* Delaying client sync-up. */ +#define REP_F_GROUP_ESTD 0x00000020 /* Rep group is established. */ +#define REP_F_INUPDREQ 0x00000040 /* Thread in rep_update_req. */ +#define REP_F_LEASE_EXPIRED 0x00000080 /* Leases guaranteed expired. */ +#define REP_F_MASTER 0x00000100 /* Master replica. */ +#define REP_F_MASTERELECT 0x00000200 /* Master elect. */ +#define REP_F_NEWFILE 0x00000400 /* Newfile in progress. */ +#define REP_F_NIMDBS_LOADED 0x00000800 /* NIMDBs are materialized. */ +#define REP_F_SKIPPED_APPLY 0x00001000 /* Skipped applying a record. */ +#define REP_F_START_CALLED 0x00002000 /* Rep_start called. */ +#define REP_F_SYS_DB_OP 0x00004000 /* Operation in progress. */ + u_int32_t flags; +} REP; + +/* Information about a thread waiting in txn_applied(). */ +typedef enum { + AWAIT_GEN, /* Client's gen is behind token gen. */ + AWAIT_HISTORY, /* Haven't received master's LSN db update. */ + AWAIT_LSN, /* Awaiting replication of user txn. */ + AWAIT_NIMDB, /* LSN db missing: maybe it's INMEM. */ + LOCKOUT /* Thread awoken due to pending lockout. */ +} rep_waitreason_t; + +struct rep_waitgoal { + rep_waitreason_t why; + union { + DB_LSN lsn; /* For AWAIT_LSN and AWAIT_HISTORY. */ + u_int32_t gen; /* AWAIT_GEN */ + } u; +}; + +struct __rep_waiter { + db_mutex_t mtx_repwait; /* Self-blocking mutex. */ + struct rep_waitgoal goal; + SH_TAILQ_ENTRY links; /* On either free or waiting list. */ + +#define REP_F_PENDING_LOCKOUT 0x00000001 +#define REP_F_WOKEN 0x00000002 + u_int32_t flags; +}; + +/* + * Macros to check and clear the BDB lockouts. Currently they are + * locked out/set individually because they pertain to different pieces of + * the BDB API, they are otherwise always checked and cleared together. + */ +#define ISSET_LOCKOUT_BDB(R) \ + (FLD_ISSET((R)->lockout_flags, (REP_LOCKOUT_API | REP_LOCKOUT_OP))) + +#define CLR_LOCKOUT_BDB(R) \ + (FLD_CLR((R)->lockout_flags, (REP_LOCKOUT_API | REP_LOCKOUT_OP))) + +/* + * Recovery flag mask to easily check any/all recovery bits. That is + * REP_LOCKOUT_{API|OP} and most REP_S_*. This must change if the values + * of the flags change. NOTE: We do not include REP_LOCKOUT_MSG in + * this mask because it is used frequently in non-recovery related + * areas and we want to manipulate it separately (see especially + * in __rep_new_master). + */ +#define CLR_RECOVERY_SETTINGS(R) \ +do { \ + (R)->sync_state = SYNC_OFF; \ + CLR_LOCKOUT_BDB(R); \ +} while (0) + +#define IS_REP_RECOVERING(R) \ + ((R)->sync_state != SYNC_OFF || ISSET_LOCKOUT_BDB(R)) + +/* + * REP_F_EPHASE0 is not a *real* election phase. It is used for + * master leases and allowing the client to find the master or + * expire its lease. However, EPHASE0 is cleared by __rep_elect_done. + */ +#define IN_ELECTION(R) \ + FLD_ISSET((R)->elect_flags, REP_E_PHASE1 | REP_E_PHASE2) +#define IN_ELECTION_TALLY(R) \ + FLD_ISSET((R)->elect_flags, REP_E_PHASE1 | REP_E_PHASE2 | REP_E_TALLY) +#define ELECTION_MAJORITY(n) (((n) / 2) + 1) + +#define IN_INTERNAL_INIT(R) \ + ((R)->sync_state == SYNC_LOG || (R)->sync_state == SYNC_PAGE) + +#define IS_REP_MASTER(env) \ + (REP_ON(env) && \ + F_ISSET(((env)->rep_handle->region), REP_F_MASTER)) + +#define IS_REP_CLIENT(env) \ + (REP_ON(env) && \ + F_ISSET(((env)->rep_handle->region), REP_F_CLIENT)) + +#define IS_REP_STARTED(env) \ + (REP_ON(env) && \ + F_ISSET(((env)->rep_handle->region), REP_F_START_CALLED)) + +#define IS_USING_LEASES(env) \ + (REP_ON(env) && \ + FLD_ISSET(((env)->rep_handle->region)->config, REP_C_LEASE)) + +#define IS_CLIENT_PGRECOVER(env) \ + (IS_REP_CLIENT(env) && \ + (((env)->rep_handle->region)->sync_state == SYNC_PAGE)) + +/* + * Macros to figure out if we need to do replication pre/post-amble processing. + * Skip for specific DB handles owned by the replication layer, either because + * replication is running recovery or because it's a handle entirely owned by + * the replication code (replication opens its own databases to track state). + */ +#define REP_FLAGS_SET(env) \ + ((env)->rep_handle->region->flags != 0 || \ + (env)->rep_handle->region->elect_flags != 0 || \ + (env)->rep_handle->region->lockout_flags != 0) + +#define IS_ENV_REPLICATED(env) \ + (REP_ON(env) && REP_FLAGS_SET(env)) + +/* + * Update the temporary log archive block timer. + */ +#define MASTER_UPDATE(env, renv) do { \ + REP_SYSTEM_LOCK(env); \ + F_SET((renv), DB_REGENV_REPLOCKED); \ + (void)time(&(renv)->op_timestamp); \ + REP_SYSTEM_UNLOCK(env); \ +} while (0) + +/* + * Macro to set a new generation number. Cached values from the LSN history + * database are associated with the current gen, so when the gen changes we must + * invalidate the cache. Use this macro for all gen changes, to avoid + * forgetting to do so. This macro should be used while holding the rep system + * mutex (unless we know we're single-threaded for some other reason, like at + * region create time). + */ +#define SET_GEN(g) do { \ + rep->gen = (g); \ + ZERO_LSN(rep->gen_base_lsn); \ +} while (0) + + +/* + * Gap processing flags. These provide control over the basic + * gap processing algorithm for some special cases. + */ +#define REP_GAP_FORCE 0x001 /* Force a request for a gap. */ +#define REP_GAP_REREQUEST 0x002 /* Gap request is a forced rerequest. */ + /* REREQUEST is a superset of FORCE. */ + +/* + * Flags indicating what kind of record we want to back up to, in the log. + */ +#define REP_REC_COMMIT 0x001 /* Most recent commit record. */ +#define REP_REC_PERM 0x002 /* Most recent perm record. */ + /* PERM is a superset of COMMIT. */ + +/* + * Basic pre/post-amble processing. + */ +#define REPLICATION_WRAP(env, func_call, checklock, ret) do { \ + int __rep_check, __t_ret; \ + __rep_check = IS_ENV_REPLICATED(env) ? 1 : 0; \ + (ret) = __rep_check ? __env_rep_enter(env, checklock) : 0; \ + if ((ret) == 0) { \ + (ret) = func_call; \ + if (__rep_check && (__t_ret = \ + __env_db_rep_exit(env)) != 0 && (ret) == 0) \ + (ret) = __t_ret; \ + } \ +} while (0) + +/* + * Macro to safely access curinfo and its internal DBT pointers from + * any process. This should always be used to access curinfo. If + * the internal DBT pointers are to be used, mtx_clientdb must be held + * between the time of this call and the use of the pointers. + * + * The current file information (curinfo) is stored in shared region + * memory and accessed via an offset. It contains two DBTs that themselves + * point to allocated data. __rep_nextfile() manages this information in a + * single chunk of shared memory. + * + * If different processes access curinfo, they may have different shared + * region addresses. This means that curinfo and its pointers to DBT data + * must be recalculated for each process starting with the offset. + */ +#define GET_CURINFO(rep, infop, curinfo) \ +do { \ + curinfo = R_ADDR(infop, rep->curinfo_off); \ + if ((curinfo)->uid.size > 0) \ + (curinfo)->uid.data = R_ADDR(infop, \ + rep->curinfo_off + sizeof(__rep_fileinfo_args)); \ + else \ + (curinfo)->uid.data = NULL; \ + if ((curinfo)->info.size > 0) \ + (curinfo)->info.data = R_ADDR(infop, rep->curinfo_off + \ + sizeof(__rep_fileinfo_args) + (curinfo)->uid.size); \ + else \ + (curinfo)->info.data = NULL; \ +} while (0) + +/* + * Per-process replication structure. + * + * There are 2 mutexes used in the Base replication API. (See LOCK_MUTEX in + * repmgr.h for a discussion of repmgr.) + * 1. mtx_region - This protects the fields of the rep region above. + * 2. mtx_clientdb - This protects the per-process flags, and bookkeeping + * database and all of the components that maintain it. Those + * components include the following fields in the log region (see log.h): + * a. ready_lsn + * b. waiting_lsn + * c. verify_lsn + * d. wait_recs + * e. rcvd_recs + * f. max_wait_lsn + * These fields in the log region are NOT protected by the log region lock at + * all. + * + * Note that the per-process flags should truly be protected by a special + * per-process thread mutex, but it is currently set in so isolated a manner + * that it didn't make sense to do so and in most case we're already holding + * the mtx_clientdb anyway. + * + * The lock ordering protocol is that mtx_clientdb must be acquired first and + * then either REP->mtx_region, or the LOG->mtx_region mutex may be acquired if + * necessary. + * + * Note that the appropriate mutex is needed any time one or more related + * values are read or written that could possibly use more than one atomic + * machine instruction. A single 32-bit integer value is safe without a + * mutex, but most other types of value should use a mutex. + * + * Any use of a mutex must be inside a matched pair of ENV_ENTER() and + * ENV_LEAVE() macros. This ensures that if a thread dies while holding + * a lock (i.e. a mutex), recovery can clean it up so that it does not + * indefinitely block other threads. + */ +struct __db_rep { + /* + * Shared configuration information -- copied to and maintained in the + * shared region as soon as the shared region is created. + */ + int eid; /* Environment ID. */ + + u_int32_t gbytes; /* Limit on data sent in single... */ + u_int32_t bytes; /* __rep_process_message call. */ + + db_timespec request_gap; /* Minimum time to wait before we + * request a missing log record. */ + db_timespec max_gap; /* Maximum time to wait before + * requesting a missing log record. */ + + u_int32_t clock_skew; /* Clock skew factor. */ + u_int32_t clock_base; /* Clock skew base. */ + u_int32_t config; /* Configuration flags. */ + u_int32_t config_nsites; + + db_timeout_t elect_timeout; /* Normal/full election timeouts. */ + db_timeout_t full_elect_timeout; + + db_timeout_t chkpt_delay; /* Master checkpoint delay. */ + + u_int32_t my_priority; + db_timeout_t lease_timeout; /* Master leases. */ + /* + * End of shared configuration information. + */ + int (*send) /* Send function. */ + __P((DB_ENV *, const DBT *, const DBT *, + const DB_LSN *, int, u_int32_t)); + + DB *rep_db; /* Bookkeeping database. */ + DB *lsn_db; /* (Replicated) LSN history database. */ + + REP *region; /* In memory structure. */ + u_int8_t *bulk; /* Shared memory bulk area. */ + +#define DBREP_DIAG_FILES 2 + DB_FH *diagfile[DBREP_DIAG_FILES]; /* Diag files fhp. */ + off_t diag_off; /* Current diag file offset. */ + + /* These are protected by mtx_clientdb. */ + DB_MPOOLFILE *file_mpf; /* Mpoolfile for current database. */ + DB *file_dbp; /* This file's page info. */ + DBC *queue_dbc; /* Dbc for a queue file. */ + + /* + * Please change __rep_print_all (rep_stat.c) to track any changes made + * to these flags. + */ +#define DBREP_APP_BASEAPI 0x0001 /* Base API application. */ +#define DBREP_APP_REPMGR 0x0002 /* repmgr application. */ +#define DBREP_OPENFILES 0x0004 /* This handle has opened files. */ + u_int32_t flags; /* per-process flags. */ + +#ifdef HAVE_REPLICATION_THREADS + /* + * Replication Framework (repmgr) per-process information. + */ + u_int nthreads; /* Msg processing threads. */ + u_int athreads; /* Space allocated for msg threads. */ + u_int non_rep_th; /* Threads in GMDB or channel msgs. */ + u_int aelect_threads; /* Space allocated for elect threads. */ + u_int32_t init_policy; + int perm_policy; + DB_LSN perm_lsn; /* Last perm LSN we've announced. */ + db_timeout_t ack_timeout; + db_timeout_t election_retry_wait; + db_timeout_t connection_retry_wait; + db_timeout_t heartbeat_frequency; /* Max period between msgs. */ + db_timeout_t heartbeat_monitor_timeout; + + /* Thread synchronization. */ + REPMGR_RUNNABLE *selector, **messengers, **elect_threads; + REPMGR_RUNNABLE *preferred_elect_thr; + db_timespec repstart_time; + mgr_mutex_t *mutex; + cond_var_t check_election, gmdb_idle, msg_avail; + waiter_t ack_waiters; /* For threads awaiting PERM acks. */ +#ifdef DB_WIN32 + HANDLE signaler; +#else + int read_pipe, write_pipe; +#endif + + /* Operational stuff. */ + REPMGR_SITE *sites; /* Array of known sites. */ + u_int site_cnt; /* Array slots in use. */ + u_int site_max; /* Total array slots allocated. */ + int self_eid; /* Where to find the local site. */ + u_int siteinfo_seq; /* Last known update to this list. */ + + /* + * The connections list contains only those connections not actively + * associated with a known site (see repmgr.h). + */ + CONNECTION_LIST connections; + RETRY_Q_HEADER retries; /* Sites needing connection retry. */ + struct { + int size; + STAILQ_HEAD(__repmgr_q_header, __repmgr_message) header; + } input_queue; + + socket_t listen_fd; + db_timespec last_bcast; /* Time of last broadcast msg. */ + + int finished; /* Repmgr threads should shut down. */ + int new_connection; /* Since last master seek attempt. */ + int takeover_pending; /* We've been elected master. */ + int mgr_started; + int gmdb_busy; + int client_intent; /* Will relinquish master role. */ + int gmdb_dirty; + int have_gmdb; + int seen_repmsg; + + /* + * Flag to show what kind of transaction is currently in progress. + * Primary means we're doing the first (critical) phase of a membership + * DB update, where we care about perm failures. In the secondary phase + * we don't care. Usually the value is "none", when normal user + * transactions are happening. We need to use this global flag because + * we don't have a more proper direct channel to communicate information + * between the originator of a transaction and the replication send() + * function that has to wait for acks and decide what to do about them. + */ + enum { none, gmdb_primary, gmdb_secondary } active_gmdb_update; + int limbo_resolution_needed; + + /* + * GMDB update sequence count. On creation we write version 1; so, once + * repmgr has started and tried to read, a 0 here can be taken to mean + * that the DB doesn't exist yet. + */ + u_int32_t membership_version; + u_int32_t member_version_gen; + + /* LSN of GMDB txn that got a perm failure. */ + DB_LSN limbo_failure; + /* EID whose membership status is therefore unresolved */ + int limbo_victim; + /* LSN of a later txn that achieves perm success. */ + DB_LSN durable_lsn; + DB *gmdb; /* Membership database handle. */ + /* + * Membership list restored from init file after crash during internal init. + */ + u_int8_t *restored_list; + size_t restored_list_length; + + /* Application's message dispatch call-back function. */ + void (*msg_dispatch) __P((DB_ENV *, DB_CHANNEL *, + DBT *, u_int32_t, u_int32_t)); +#endif /* HAVE_REPLICATION_THREADS */ +}; + +/* + * Determine whether application is repmgr or base replication API. If + * repmgr was configured, base the test on internal replication flags for + * APP_REPMGR and APP_BASEAPI. These flags get set by the appropriate parts + * of the various replication APIs. + */ +#ifdef HAVE_REPLICATION_THREADS +/* + * Application type is set to be repmgr when: + * 1. A local site is defined. + * 2. A remote site is defined. + * 3. An acknowledgement policy is configured. + * 4. A repmgr flag is configured. + * 5. A timeout value is configured for one of the repmgr timeouts. + */ +#define APP_IS_REPMGR(env) \ + (REP_ON(env) ? \ + F_ISSET((env)->rep_handle->region, REP_F_APP_REPMGR) : \ + F_ISSET((env)->rep_handle, DBREP_APP_REPMGR)) + +/* + * Application type is set to be base replication API when: + * 1. Transport send function is defined and is not the repmgr send + * function. + */ +#define APP_IS_BASEAPI(env) \ + (REP_ON(env) ? \ + F_ISSET((env)->rep_handle->region, REP_F_APP_BASEAPI) : \ + F_ISSET((env)->rep_handle, DBREP_APP_BASEAPI)) + +/* + * Set application type. These macros do extra checking to guarantee that + * only one application type is ever set. + */ +#define APP_SET_REPMGR(env) do { \ + if (REP_ON(env)) { \ + ENV_ENTER(env, ip); \ + REP_SYSTEM_LOCK(env); \ + if (!F_ISSET((env)->rep_handle->region, \ + REP_F_APP_BASEAPI)) \ + F_SET((env)->rep_handle->region, \ + REP_F_APP_REPMGR); \ + REP_SYSTEM_UNLOCK(env); \ + ENV_LEAVE(env, ip); \ + } else if (!F_ISSET((env)->rep_handle, DBREP_APP_BASEAPI)) \ + F_SET((env)->rep_handle, DBREP_APP_REPMGR); \ +} while (0) +#define APP_SET_BASEAPI(env) do { \ + if (REP_ON(env)) { \ + ENV_ENTER(env, ip); \ + REP_SYSTEM_LOCK(env); \ + if (!F_ISSET((env)->rep_handle->region, \ + REP_F_APP_REPMGR)) \ + F_SET((env)->rep_handle->region, \ + REP_F_APP_BASEAPI); \ + REP_SYSTEM_UNLOCK(env); \ + ENV_LEAVE(env, ip); \ + } else if (!F_ISSET((env)->rep_handle, DBREP_APP_REPMGR)) \ + F_SET((env)->rep_handle, DBREP_APP_BASEAPI); \ +} while (0) + +#else +/* + * We did not configure repmgr, application must be base replication API. + * The APP_SET_* macros are noops in this case, but they must be defined + * with a null body to avoid compiler warnings on some platforms. + */ +#define APP_IS_REPMGR(env) 0 +#define APP_SET_REPMGR(env) do { \ + ; \ +} while (0) +#define APP_IS_BASEAPI(env) 1 +#define APP_SET_BASEAPI(env) do { \ + ; \ +} while (0) +#endif /* HAVE_REPLICATION_THREADS */ + +/* + * Control structure flags for replication communication infrastructure. + */ +/* + * Define old DB_LOG_ values that we must support here. For reasons of + * compatibility with old versions, these values must be reserved explicitly in + * the list of flag values (below) + */ +#define DB_LOG_PERM_42_44 0x20 +#define DB_LOG_RESEND_42_44 0x40 +#define REPCTL_INIT_45 0x02 /* Back compatible flag value. */ + +#define REPCTL_ELECTABLE 0x01 /* Upgraded client is electable. */ +#define REPCTL_FLUSH 0x02 /* Record should be flushed. */ +#define REPCTL_GROUP_ESTD 0x04 /* Message from site in a group. */ +#define REPCTL_INIT 0x08 /* Internal init message. */ +#define REPCTL_LEASE 0x10 /* Lease related message.. */ + /* + * Skip over reserved values 0x20 + * and 0x40, as explained above. + */ +#define REPCTL_LOG_END 0x80 /* Approximate end of group-wide log. */ +#define REPCTL_PERM DB_LOG_PERM_42_44 +#define REPCTL_RESEND DB_LOG_RESEND_42_44 + +/* + * File info flags for internal init. The per-database (i.e., file) flag + * represents the on-disk format of the file, and is conveyed from the master to + * the initializing client in the UPDATE message, so that the client can know + * how to create the file. The per-page flag is conveyed along with each PAGE + * message, describing the format of the page image being transmitted; it is of + * course set by the site serving the PAGE_REQ. The serving site gets the page + * image from its own mpool, and thus the page is in the native format of the + * serving site. This format may be different (i.e., opposite) from the on-disk + * format, and in fact can vary per-page, since with client-to-client sync it is + * possible for various different sites to serve the various PAGE_REQ requests. + */ +#define REPINFO_DB_LITTLEENDIAN 0x0001 /* File is little-endian lorder. */ +#define REPINFO_PG_LITTLEENDIAN 0x0002 /* Page is little-endian lorder. */ + +/* + * Control message format for 4.6 release. The db_timespec_t is + * not a portable structure. Therefore, in 4.6, replication among + * mixed OSs such as Linux and Windows, which have different time_t + * sizes, does not work. + */ +typedef struct { + u_int32_t rep_version; /* Replication version number. */ + u_int32_t log_version; /* Log version number. */ + + DB_LSN lsn; /* Log sequence number. */ + u_int32_t rectype; /* Message type. */ + u_int32_t gen; /* Generation number. */ + db_timespec msg_time; /* Timestamp seconds for leases. */ + u_int32_t flags; /* log_put flag value. */ +} REP_46_CONTROL; + +/* + * Control message format for 4.5 release and earlier. + */ +typedef struct { + u_int32_t rep_version; /* Replication version number. */ + u_int32_t log_version; /* Log version number. */ + + DB_LSN lsn; /* Log sequence number. */ + u_int32_t rectype; /* Message type. */ + u_int32_t gen; /* Generation number. */ + u_int32_t flags; /* log_put flag value. */ +} REP_OLD_CONTROL; + +#define LEASE_REFRESH_MIN 30 /* Minimum number of refresh retries. */ +#define LEASE_REFRESH_USEC 50000 /* Microseconds between refresh tries. */ + +/* Master granted lease information. */ +typedef struct __rep_lease_entry { + int eid; /* EID of client grantor. */ + db_timespec start_time; /* Start time clients echo back. */ + db_timespec end_time; /* Master lease expiration time. */ + DB_LSN lease_lsn; /* Durable LSN lease applies to. */ +} REP_LEASE_ENTRY; + +/* + * Old vote info where some fields were not fixed size. + */ +typedef struct { + u_int32_t egen; /* Election generation. */ + int nsites; /* Number of sites I've been in + * communication with. */ + int nvotes; /* Number of votes needed to win. */ + int priority; /* My site's priority. */ + u_int32_t tiebreaker; /* Tie-breaking quasi-random value. */ +} REP_OLD_VOTE_INFO; + +typedef struct { + u_int32_t egen; /* Voter's election generation. */ + int eid; /* Voter's ID. */ +} REP_VTALLY; + +/* + * The REP_THROTTLE_ONLY flag is used to do throttle processing only. + * If set, it will only allow sending the REP_*_MORE message, but not + * the normal, non-throttled message. It is used to support throttling + * with bulk transfer. + */ +/* Flags for __rep_send_throttle. */ +#define REP_THROTTLE_ONLY 0x0001 /* Send _MORE message only. */ + +/* Throttled message processing information. */ +typedef struct { + DB_LSN lsn; /* LSN of this record. */ + DBT *data_dbt; /* DBT of this record. */ + u_int32_t gbytes; /* This call's max gbytes sent. */ + u_int32_t bytes; /* This call's max bytes sent. */ + u_int32_t type; /* Record type. */ +} REP_THROTTLE; + +/* Bulk processing information. */ +/* + * !!! + * We use a roff_t for the offset. We'd really like to use a ptrdiff_t + * since that really is what it is. But ptrdiff_t is not portable and + * doesn't exist everywhere. + */ +typedef struct { + u_int8_t *addr; /* Address of bulk buffer. */ + roff_t *offp; /* Ptr to current offset into buffer. */ + u_int32_t len; /* Bulk buffer length. */ + u_int32_t type; /* Item type in buffer (log, page). */ + DB_LSN lsn; /* First LSN in buffer. */ + int eid; /* ID of potential recipients. */ +#define BULK_XMIT 0x001 /* Buffer in transit. */ + u_int32_t *flagsp; /* Buffer flags. */ +} REP_BULK; + +/* + * This structure takes care of representing a transaction. + * It holds all the records, sorted by page number so that + * we can obtain locks and apply updates in a deadlock free + * order. + */ +typedef struct { + u_int nlsns; + u_int nalloc; + DB_LSN *array; +} LSN_COLLECTION; + +/* + * This is used by the page-prep routines to do the lock_vec call to + * apply the updates for a single transaction or a collection of + * transactions. + */ +typedef struct { + int n; + DB_LOCKREQ *reqs; + DBT *objs; +} linfo_t; + +#if defined(__cplusplus) +} +#endif + +#include "dbinc_auto/rep_ext.h" +#endif /* !_DB_REP_H_ */ diff --git a/src/dbinc/repmgr.h b/src/dbinc/repmgr.h new file mode 100644 index 00000000..78a7b9d6 --- /dev/null +++ b/src/dbinc/repmgr.h @@ -0,0 +1,839 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_REPMGR_H_ +#define _DB_REPMGR_H_ + +#include "dbinc_auto/repmgr_automsg.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Replication Manager message format types. These few format codes identify + * enough information to describe, at the lowest level, how a message should be + * read from the wire, including how much memory should be allocated to hold the + * result. (Often we want to allocate more than just enough to hold the + * received bytes, if we know that we will need more during processing.) + * + * These values are transmitted between sites, even sites running differing BDB + * versions. Therefore, once assigned, the values are permanently "frozen". + * + * For example, in repmgr wire protocol version 1 the highest assigned message + * type value was 3, for REPMGR_REP_MESSAGE. Wire protocol version 2 added the + * HEARTBEAT message type (4). + * + * New message types added in later versions always get new (higher) values. We + * still list them in alphabetical order, for ease of reference. But this + * generally does not correspond to numerical order. + */ +#define REPMGR_APP_MESSAGE 5 /* Msg sent from app. on DB_CHANNEL. */ +#define REPMGR_APP_RESPONSE 6 /* Response to a channel request. */ +#define REPMGR_OWN_MSG 8 /* Repmgr's own messages, to peers. */ +#define REPMGR_HANDSHAKE 2 /* Connection establishment sequence. */ +#define REPMGR_HEARTBEAT 4 /* Monitor connection health. */ +#define REPMGR_PERMLSN 1 /* My perm LSN. */ +#define REPMGR_REP_MESSAGE 3 /* Normal replication message. */ +#define REPMGR_RESP_ERROR 7 /* Sys-gen'd error resp to request. */ + +/* + * Largest known message type code known in each protocol version we support. + * In protocol version one there were only three message types: 1, 2, and 3; so + * 3 was the max. In protocol version 2 we introduced heartbeats, type 4. + * (Protocol version 3 did not introduce any new message types.) In version 4 + * we introduced a few more new message types, the largest of which had value 7. + */ +#define REPMGR_MAX_V1_MSG_TYPE 3 +#define REPMGR_MAX_V2_MSG_TYPE 4 +#define REPMGR_MAX_V3_MSG_TYPE 4 +#define REPMGR_MAX_V4_MSG_TYPE 8 +#define HEARTBEAT_MIN_VERSION 2 +#define CHANNEL_MIN_VERSION 4 +#define GM_MIN_VERSION 4 +#define OWN_MIN_VERSION 4 + +/* The range of protocol versions we're willing to support. */ +#define DB_REPMGR_VERSION 4 +#define DB_REPMGR_MIN_VERSION 1 + +/* + * For messages with the "REPMGR_OWN_MSG" format code, a message type (see + * REPMGR_OWN_MSG_TYPE, below) is included in the header. While at the lowest + * level, the format codes identify only enough to read and allocate memory, at + * the next higher level the following message type codes identify the content + * of the message: how to unmarshal and dispatch it. + * + * Like the message format types, these message type values should be + * permanently frozen. + */ +#define REPMGR_CONNECT_REJECT 1 +#define REPMGR_GM_FAILURE 2 +#define REPMGR_GM_FORWARD 3 +#define REPMGR_JOIN_REQUEST 4 +#define REPMGR_JOIN_SUCCESS 5 +#define REPMGR_PARM_REFRESH 6 +#define REPMGR_REJOIN 7 +#define REPMGR_REMOVE_REQUEST 8 +#define REPMGR_REMOVE_SUCCESS 9 +#define REPMGR_RESOLVE_LIMBO 10 +#define REPMGR_SHARING 11 + + +struct __repmgr_connection; + typedef struct __repmgr_connection REPMGR_CONNECTION; +struct __repmgr_queue; typedef struct __repmgr_queue REPMGR_QUEUE; +struct __queued_output; typedef struct __queued_output QUEUED_OUTPUT; +struct __repmgr_response; typedef struct __repmgr_response REPMGR_RESPONSE; +struct __repmgr_retry; typedef struct __repmgr_retry REPMGR_RETRY; +struct __repmgr_runnable; typedef struct __repmgr_runnable REPMGR_RUNNABLE; +struct __repmgr_site; typedef struct __repmgr_site REPMGR_SITE; +struct __cond_waiters_table; + typedef struct __cond_waiters_table COND_WAITERS_TABLE; + +/* Current Group Membership DB format ID. */ +#define REPMGR_GMDB_FMT_VERSION 1 + +#ifdef DB_WIN32 +typedef SOCKET socket_t; +typedef HANDLE thread_id_t; +typedef HANDLE mgr_mutex_t; +typedef HANDLE cond_var_t; + +typedef COND_WAITERS_TABLE *waiter_t; +typedef WSABUF db_iovec_t; +#else +typedef int socket_t; +typedef pthread_t thread_id_t; +typedef pthread_mutex_t mgr_mutex_t; +typedef pthread_cond_t cond_var_t; +typedef pthread_cond_t waiter_t; +typedef struct iovec db_iovec_t; +#endif + +/* + * The (arbitrary) maximum number of outgoing messages we're willing to hold, on + * a queue per connection, waiting for TCP buffer space to become available in + * the kernel. Rather than exceeding this limit, we simply discard additional + * messages (since this is always allowed by the replication protocol). + * As a special dispensation, if a message is destined for a specific remote + * site (i.e., it's not a broadcast), then we first try blocking the sending + * thread, waiting for space to become available (though we only wait a limited + * time). This is so as to be able to handle the immediate flood of (a + * potentially large number of) outgoing messages that replication generates, in + * a tight loop, when handling PAGE_REQ, LOG_REQ and ALL_REQ requests. + */ +#define OUT_QUEUE_LIMIT 10 + +/* + * The system value is available from sysconf(_SC_HOST_NAME_MAX). + * Historically, the maximum host name was 256. + */ +#ifndef MAXHOSTNAMELEN +#define MAXHOSTNAMELEN 256 +#endif + +/* A buffer big enough for the string "site host.domain.com:65535". */ +#define MAX_SITE_LOC_STRING (MAXHOSTNAMELEN+20) +typedef char SITE_STRING_BUFFER[MAX_SITE_LOC_STRING+1]; + +#define MAX_MSG_BUF (__REPMGR_MAXMSG_SIZE + MAXHOSTNAMELEN + 1) + +/* Default timeout values, in seconds. */ +#define DB_REPMGR_DEFAULT_ACK_TIMEOUT (1 * US_PER_SEC) +#define DB_REPMGR_DEFAULT_CONNECTION_RETRY (30 * US_PER_SEC) +#define DB_REPMGR_DEFAULT_ELECTION_RETRY (10 * US_PER_SEC) +#define DB_REPMGR_DEFAULT_CHANNEL_TIMEOUT (5 * US_PER_SEC) + +typedef TAILQ_HEAD(__repmgr_conn_list, __repmgr_connection) CONNECTION_LIST; +typedef STAILQ_HEAD(__repmgr_out_q_head, __queued_output) OUT_Q_HEADER; +typedef TAILQ_HEAD(__repmgr_retry_q, __repmgr_retry) RETRY_Q_HEADER; + +/* Information about threads managed by Replication Framework. */ +struct __repmgr_runnable { + ENV *env; + thread_id_t thread_id; + void *(*run) __P((void *)); + int finished; /* Boolean: thread is exiting, may be joined. */ + int quit_requested; /* Boolean: thread has been asked to quit. */ +#ifdef DB_WIN32 + HANDLE quit_event; +#endif + union { + +/* + * Options governing requested behavior of election thread. + */ +#define ELECT_F_EVENT_NOTIFY 0x01 /* Notify application of master failure. */ +#define ELECT_F_FAST 0x02 /* First election "fast" (n-1 trick). */ +#define ELECT_F_IMMED 0x04 /* Start with immediate election. */ +#define ELECT_F_INVITEE 0x08 /* Honor (remote) inviter's nsites. */ +#define ELECT_F_STARTUP 0x10 /* Observe repmgr_start() policy. */ + u_int32_t flags; + + int eid; /* For Connector thread. */ + + /* + * Args for other thread types can be added here in the future + * as needed. + */ + } args; +}; + +/* + * Information about pending connection establishment retry operations. + * + * We keep these in order by time. This works, under the assumption that the + * DB_REP_CONNECTION_RETRY never changes once we get going (though that + * assumption is of course wrong, so this needs to be fixed). + * + * Usually, we put things onto the tail end of the list. But when we add a new + * site while threads are running, we trigger its first connection attempt by + * scheduling a retry for "0" microseconds from now, putting its retry element + * at the head of the list instead. + * + * TODO: I think this can be fixed by defining "time" to be the time the element + * was added (with some convention like "0" meaning immediate), rather than the + * deadline time. + */ +struct __repmgr_retry { + TAILQ_ENTRY(__repmgr_retry) entries; + u_int eid; + db_timespec time; +}; + +/* + * We use scatter/gather I/O for both reading and writing. Repmgr messages + * (including rep messages) use 3 segments: envelope, control and rec. + * Application messages can have any number of segments (the number they + * specify, plus 1 for our envelope). REPMGR_IOVECS_ALLOC_SZ should (only) be + * used when n > 3. + */ +#define REPMGR_IOVECS_ALLOC_SZ(n) \ + (sizeof(REPMGR_IOVECS) + ((n) - MIN_IOVEC) * sizeof(db_iovec_t)) +typedef struct { + /* + * Index of the first iovec to be used. Initially of course this is + * zero. But as we progress through partial I/O transfers, it ends up + * pointing to the first iovec to be used on the next operation. + */ + int offset; + + /* + * Total number of pieces defined for this message; equal to the number + * of times add_buffer and/or add_dbt were called to populate it. We do + * *NOT* revise this as we go along. So subsequent I/O operations must + * use count-offset to get the number of active vector pieces still + * remaining. + */ + int count; + + /* + * Total number of bytes accounted for in all the pieces of this + * message. We do *NOT* revise this as we go along. + */ + size_t total_bytes; + +#define MIN_IOVEC 3 + db_iovec_t vectors[MIN_IOVEC]; /* Variable length array. */ +} REPMGR_IOVECS; + +typedef struct { + size_t length; /* number of bytes in data */ + int ref_count; /* # of sites' send queues pointing to us */ + u_int8_t data[1]; /* variable size data area */ +} REPMGR_FLAT; + +struct __queued_output { + STAILQ_ENTRY(__queued_output) entries; + REPMGR_FLAT *msg; + size_t offset; +}; + +/* + * The following is for input. Once we know the sizes of the pieces of an + * incoming message, we can create this struct (and also the data areas for the + * pieces themselves, in the same memory allocation). This is also the struct + * in which the message lives while it's waiting to be processed by message + * threads. + */ +typedef struct __repmgr_message { + STAILQ_ENTRY(__repmgr_message) entries; + __repmgr_msg_hdr_args msg_hdr; + union { + struct { + int originating_eid; + DBT control, rec; + } repmsg; + struct { + REPMGR_CONNECTION *conn; + DBT request; + } gmdb_msg; + struct { + /* + * Connection from which the message arrived; NULL if + * generated on the local site. + */ + REPMGR_CONNECTION *conn; + + DBT buf; /* for reading */ + DBT segments[1]; /* expanded in msg th. before callbk */ + } appmsg; + } v; /* Variants */ +} REPMGR_MESSAGE; + +typedef enum { + SIZES_PHASE, + DATA_PHASE +} phase_t; + +typedef enum { + APP_CONNECTION, + REP_CONNECTION, + UNKNOWN_CONN_TYPE +} conn_type_t; + +struct __repmgr_connection { + TAILQ_ENTRY(__repmgr_connection) entries; + + socket_t fd; +#ifdef DB_WIN32 + WSAEVENT event_object; +#endif + + /* + * Number of other structures referring to this conn struct. This + * ref_count must be reduced to zero before this conn struct can be + * destroyed. Referents include: + * + * - the select() loop, which owns the right to do all reading, as well + * as the exclusive right to eventually close the socket + * + * - a "channel" that owns this APP_CONNECTION (on the originating side) + * + * - a message received on this APP_CONNECTION, queued for processing + * + * - any writer blocked on waiting for the outbound queue to drain + */ + u_int32_t ref_count; + + conn_type_t type; + u_int32_t version; /* Wire protocol version on this connection. */ + /* (0 means not yet determined.) */ + +/* + * When we make an outgoing connection, it starts in CONNECTED state. When we + * get the response to our version negotiation, we move to READY. + * For incoming connections that we accept, we start in NEGOTIATE, then to + * PARAMETERS, and then to READY. + * CONGESTED is a hierarchical substate of READY: it's just like READY, with + * the additional wrinkle that we don't bother waiting for the outgoing queue to + * drain in certain circumstances. + */ +#define CONN_CONGESTED 1 /* Long-lived full outgoing queue. */ +#define CONN_CONNECTED 2 /* Awaiting reply to our version negotiation. */ +#define CONN_DEFUNCT 3 /* Basically dead, awaiting clean-up. */ +#define CONN_NEGOTIATE 4 /* Awaiting version proposal. */ +#define CONN_PARAMETERS 5 /* Awaiting parameters handshake. */ +#define CONN_READY 6 /* Everything's fine. */ + int state; + +#define CONN_INCOMING 0x01 + u_int32_t flags; + + /* + * Input: while we're reading a message, we keep track of what phase + * we're in. In both phases, we use a REPMGR_IOVECS to keep track of + * our progress within the phase. Depending upon the message type, we + * end up with either a rep_message (which is a wrapper for the control + * and rec DBTs), or a single generic DBT. + * Any time we're in DATA_PHASE, it means we have already received + * the message header (consisting of msg_type and 2 sizes), and + * therefore we have allocated buffer space to read the data. (This is + * important for resource clean-up.) + */ + phase_t reading_phase; + REPMGR_IOVECS iovecs; + + u_int8_t msg_type; + u_int8_t msg_hdr_buf[__REPMGR_MSG_HDR_SIZE]; + + union { + REPMGR_MESSAGE *rep_message; + struct { + DBT cntrl, rec; + } repmgr_msg; + } input; + + /* + * Output: usually we just simply write messages right in line, in the + * send() function's thread. But if TCP doesn't have enough network + * buffer space for us when we first try it, we instead allocate some + * memory, and copy the message, and then send it as space becomes + * available in our main select() thread. In some cases, if the queue + * gets too long we wait until it's drained, and then append to it. + * This condition variable's associated mutex is the normal per-repmgr + * db_rep->mutex, because that mutex is always held anyway whenever the + * output queue is consulted. + */ + OUT_Q_HEADER outbound_queue; + int out_queue_length; + cond_var_t drained; + + /* =-=-=-=-= app-channel stuff =-=-=-=-= */ + waiter_t response_waiters; + + /* + * Array of info about pending responses to requests. This info is here + * (rather than on the stack of the thread calling send_request()) + * because it provides an easy way to allocate available numbers for + * message tags, and also so that we can easily find the right info when + * we get the tag back in the msg header of the response. + */ + REPMGR_RESPONSE *responses; + u_int32_t aresp; /* Array size. */ + u_int32_t cur_resp; /* Index of response currently reading. */ + + /* =-=-=-=-= for normal repmgr connections =-=-=-=-= */ + /* + * Generally on a REP_CONNECTION type, we have an associated EID (which + * is an index into the sites array, by the way). When we initiate the + * connection ("outgoing"), we know from the start what the EID is; the + * connection struct is linked from the site struct. On the other hand, + * when we receive an incoming connection, we don't know at first what + * site it may be associated with (or even whether it's an + * APP_CONNECTION or REP_CONNECTION, for that matter). During that + * initial uncertain time, the eid is -1. Also, when a connection + * becomes defunct, but the conn struct hasn't yet been destroyed, the + * eid also becomes -1. + * + * The eid should be -1 if and only if the connection is on the orphans + * list. + */ + int eid; + +}; + +#define IS_READY_STATE(s) ((s) == CONN_READY || (s) == CONN_CONGESTED) + +#ifdef HAVE_GETADDRINFO +typedef struct addrinfo ADDRINFO; +typedef struct sockaddr_storage ACCEPT_ADDR; +#else +typedef struct sockaddr_in ACCEPT_ADDR; +/* + * Some windows platforms have getaddrinfo (Windows XP), some don't. We don't + * support conditional compilation in our Windows build, so we always use our + * own getaddrinfo implementation. Rename everything so that we don't collide + * with the system libraries. + */ +#undef AI_PASSIVE +#define AI_PASSIVE 0x01 +#undef AI_CANONNAME +#define AI_CANONNAME 0x02 +#undef AI_NUMERICHOST +#define AI_NUMERICHOST 0x04 + +typedef struct __addrinfo { + int ai_flags; /* AI_PASSIVE, AI_CANONNAME, AI_NUMERICHOST */ + int ai_family; /* PF_xxx */ + int ai_socktype; /* SOCK_xxx */ + int ai_protocol; /* 0 or IPPROTO_xxx for IPv4 and IPv6 */ + size_t ai_addrlen; /* length of ai_addr */ + char *ai_canonname; /* canonical name for nodename */ + struct sockaddr *ai_addr; /* binary address */ + struct __addrinfo *ai_next; /* next structure in linked list */ +} ADDRINFO; +#endif /* HAVE_GETADDRINFO */ + +/* + * Unprocessed network address configuration. + */ +typedef struct { + roff_t host; /* Separately allocated copy of string. */ + u_int16_t port; /* Stored in plain old host-byte-order. */ +} SITEADDR; + +/* + * Site information, as stored in shared region. + */ +typedef struct { + SITEADDR addr; /* Unprocessed network address of site. */ + u_int32_t config; /* Configuration flags: peer, helper, etc. */ + u_int32_t status; /* Group membership status. */ +} SITEINFO; + +/* + * A site address, as stored locally. + */ +typedef struct { + char *host; /* Separately allocated copy of string. */ + u_int16_t port; /* Stored in plain old host-byte-order. */ +} repmgr_netaddr_t; + +/* + * We store site structs in a dynamically allocated, growable array, indexed by + * EID. We allocate EID numbers for all sites simply according to their + * index within this array. + */ +#define SITE_FROM_EID(eid) (&db_rep->sites[eid]) +#define EID_FROM_SITE(s) ((int)((s) - (&db_rep->sites[0]))) +#define IS_VALID_EID(e) ((e) >= 0) +#define IS_KNOWN_REMOTE_SITE(e) ((e) >= 0 && ((e) != db_rep->self_eid) && \ + (((u_int)(e)) < db_rep->site_cnt)) +#define FOR_EACH_REMOTE_SITE_INDEX(i) \ + for ((i) = (db_rep->self_eid == 0 ? 1 : 0); \ + (i) < db_rep->site_cnt; \ + ((int)++(i)) == db_rep->self_eid ? ++(i) : i) + +struct __repmgr_site { + repmgr_netaddr_t net_addr; + + /* + * Group membership status: a copy of the status from the membership + * database, or the out-of-band value 0, meaning that it doesn't exist. + * We keep track of a "non-existent" site because the associated + * host/port network address is promised to be associated with the + * locally known EID for the life of the environment. + */ + u_int32_t membership; /* Status flags from GMDB. */ + u_int32_t config; /* Flags from site->set_config() */ + + /* + * Everything below here is applicable only to remote sites. + */ + DB_LSN max_ack; /* Best ack we've heard from this site. */ + int ack_policy; /* Or 0 if unknown. */ + u_int16_t alignment; /* Requirements for app channel msgs. */ + db_timespec last_rcvd_timestamp; + + /* Contents depends on state. */ + union { + REPMGR_CONNECTION *conn; /* when CONNECTED */ + REPMGR_RETRY *retry; /* when PAUSING */ + /* Unused when CONNECTING. */ + } ref; + + /* + * Subordinate connections (connections from subordinate processes at a + * multi-process site). Note that the SITE_CONNECTED state, and all the + * ref.retry stuff above is irrelevant to subordinate connections. If a + * connection is on this list, it exists; and we never bother trying to + * reconnect lost connections (indeed we can't, for these are always + * incoming-only). + */ + CONNECTION_LIST sub_conns; + REPMGR_RUNNABLE *connector; /* Thread to open a connection. */ + +#define SITE_CONNECTED 1 /* We have a (main) connection. */ +#define SITE_CONNECTING 2 /* Trying to establish (main) connection. */ +#define SITE_IDLE 3 /* Doing nothing. */ +#define SITE_PAUSING 4 /* Waiting til time to retry connecting. */ + int state; + +#define SITE_HAS_PRIO 0x01 /* Set if "electable" flag bit is valid. */ +#define SITE_ELECTABLE 0x02 +#define SITE_TOUCHED 0x04 /* Seen GMDB record during present scan. */ + u_int32_t flags; +}; + +/* + * Flag values for the public DB_SITE handle. + */ +#define DB_SITE_PREOPEN 0x01 /* Provisional EID; may change at env open. */ + +struct __repmgr_response { + DBT dbt; + int ret; + +#define RESP_COMPLETE 0x01 +#define RESP_DUMMY_BUF 0x02 +#define RESP_IN_USE 0x04 +#define RESP_READING 0x08 +#define RESP_THREAD_WAITING 0x10 + u_int32_t flags; +}; + +/* + * Private structure for managing comms "channels." This is separate from + * DB_CHANNEL so as to avoid dragging in other private structures (e.g., + * REPMGR_CONNECTION) into db.h, similar to the relationship between DB_ENV and + * ENV. + */ +struct __channel { + DB_CHANNEL *db_channel; + ENV *env; + + union { + /* For simple, specific-EID channels. */ + REPMGR_CONNECTION *conn; + + /* For EID_MASTER or EID_BROADCAST channels. */ + struct { + mgr_mutex_t *mutex; /* For connection establishment. */ + REPMGR_CONNECTION **array; + u_int32_t cnt; + } conns; + } c; + REPMGR_MESSAGE *msg; /* Incoming channel only; NULL otherwise. */ + int responded; /* Boolean flag. */ + __repmgr_msg_metadata_args *meta; + + /* Used only in send-to-self request case. */ + struct __repmgr_response response; +}; + +/* + * Repmgr keeps track of references to connection information (instances + * of struct __repmgr_connection). There are three kinds of places + * connections may be found: (1) SITE->ref.conn, (2) SITE->sub_conns, and + * (3) db_rep->connections. + * + * 1. SITE->ref.conn points to our connection with the main process running + * at the given site, if such a connection exists. We may have initiated + * the connection to the site ourselves, or we may have received it as an + * incoming connection. Once it is established there is very little + * difference between those two cases. + * + * 2. SITE->sub_conns is a list of connections we have with subordinate + * processes running at the given site. There can be any number of these + * connections, one per subordinate process. Note that these connections + * are always incoming: there's no way for us to initiate this kind of + * connection because subordinate processes do not "listen". + * + * 3. The db_rep->connections list contains the references to any + * connections that are not actively associated with any site (we + * sometimes call these "orphans"). There are two times when this can + * be: + * + * a) When we accept an incoming connection, we don't know what site it + * comes from until we read the initial handshake message. + * + * b) When an error occurs on a connection, we first mark it as DEFUNCT + * and stop using it. Then, at a later, well-defined time, we close + * the connection's file descriptor and get rid of the connection + * struct. + * + * In light of the above, we can see that the following describes the + * rules for how connections may be moved among these three kinds of + * "places": + * + * - when we initiate an outgoing connection, we of course know what site + * it's going to be going to, and so we immediately put the pointer to + * the connection struct into SITE->ref.conn + * + * - when we accept an incoming connection, we don't immediately know + * whom it's from, so we have to put it on the orphans list + * (db_rep->connections). + * + * - (incoming, cont.) But as soon as we complete the initial "handshake" + * message exchange, we will know which site it's from and whether it's + * a subordinate or main connection. At that point we remove it from + * db_rep->connections and either point to it by SITE->ref.conn, or add + * it to the SITE->sub_conns list. + * + * - (for any active connection) when an error occurs, we move the + * connection to the orphans list until we have a chance to close it. + */ + +/* + * Repmgr message formats. + * + * Declarative definitions of current message formats appear in repmgr.msg. + * (The s_message/gen_msg.awk utility generates C code.) In general, we send + * the buffers marshaled from those structure formats in the "control" portion + * of a message. + * + * Each message is prefaced by a 9-byte message header (as described in + * repmgr_net.c). Different message types use the two available 32-bit integers + * in different ways, as codified here: + */ +#define REPMGR_HDR1(hdr) ((hdr).word1) +#define REPMGR_HDR2(hdr) ((hdr).word2) + +/* REPMGR_APP_MESSAGE */ +#define APP_MSG_BUFFER_SIZE REPMGR_HDR1 +#define APP_MSG_SEGMENT_COUNT REPMGR_HDR2 + +/* REPMGR_REP_MESSAGE and the other traditional repmgr message types. */ +#define REP_MSG_CONTROL_SIZE REPMGR_HDR1 +#define REP_MSG_REC_SIZE REPMGR_HDR2 + +/* REPMGR_APP_RESPONSE */ +#define APP_RESP_BUFFER_SIZE REPMGR_HDR1 +#define APP_RESP_TAG REPMGR_HDR2 + +/* REPMGR_RESP_ERROR. Note that a zero-length message body is implied. */ +#define RESP_ERROR_CODE REPMGR_HDR1 +#define RESP_ERROR_TAG REPMGR_HDR2 + +/* REPMGR_OWN_MSG */ +#define REPMGR_OWN_BUF_SIZE REPMGR_HDR1 +#define REPMGR_OWN_MSG_TYPE REPMGR_HDR2 + +/* + * Flags for the handshake message. As with repmgr message types, these values + * are transmitted between sites, and must therefore be "frozen" permanently. + * Names are alphabetized here for easy reference, but values reflect historical + * usage. + */ +#define APP_CHANNEL_CONNECTION 0x02 /* Connection used for app channel. */ +#define ELECTABLE_SITE 0x04 +#define REPMGR_SUBORDINATE 0x01 /* This is a subordinate connection. */ + +/* + * Flags for application-message meta-data. + */ +#define REPMGR_MULTI_RESP 0x01 +#define REPMGR_REQUEST_MSG_TYPE 0x02 +#define REPMGR_RESPONSE_LIMIT 0x04 + +/* + * Legacy V1 handshake message format. For compatibility, we send this as part + * of version negotiation upon connection establishment. + */ +typedef struct { + u_int32_t version; + u_int16_t port; + u_int32_t priority; +} DB_REPMGR_V1_HANDSHAKE; + +/* + * Storage formats. + * + * As with message formats, stored formats are defined in repmgr.msg. + */ +/* + * Flags for the Group Membership data portion of a record. Like message type + * codes, these values are frozen across releases, in order to avoid pointless + * churn. + */ +#define SITE_ADDING 0x01 +#define SITE_DELETING 0x02 +#define SITE_PRESENT 0x04 + +/* + * Message types whose processing could take a long time. We're careful to + * avoid using up all our message processing threads on these message types, so + * that we don't starve out the more important rep messages. + */ +#define IS_DEFERRABLE(t) ((t) == REPMGR_OWN_MSG || (t) == REPMGR_APP_MESSAGE) +/* + * When using leases there are times when a thread processing a message + * must block, waiting for leases to be refreshed. But refreshing the + * leases requires another thread to accept the lease grant messages. + */ +#define RESERVED_MSG_TH(env) (IS_USING_LEASES(env) ? 2 : 1) + +#define IS_SUBORDINATE(db_rep) (db_rep->listen_fd == INVALID_SOCKET) + +#define IS_PEER_POLICY(p) ((p) == DB_REPMGR_ACKS_ALL_PEERS || \ + (p) == DB_REPMGR_ACKS_QUORUM || \ + (p) == DB_REPMGR_ACKS_ONE_PEER) + +#define IS_SITE_AVAILABLE(s) ((s)->state == SITE_CONNECTED && \ + (s)->ref.conn->state == CONN_READY) + +#define IS_SITE_HANDSHAKEN(s) ((s)->state == SITE_CONNECTED && \ + IS_READY_STATE((s)->ref.conn->state)) + +/* + * Most of the code in repmgr runs while holding repmgr's main mutex, which + * resides in db_rep->mutex. This mutex is owned by a single repmgr process, + * and serializes access to the (large) critical sections among threads in the + * process. Unlike many other mutexes in DB, it is specifically coded as either + * a POSIX threads mutex or a Win32 mutex. Note that although it's a large + * fraction of the code, it's a tiny fraction of the time: repmgr spends most of + * its time in a call to select(), and as well a bit in calls into the Base + * replication API. All of those release the mutex. + * Access to repmgr's shared list of site addresses is protected by + * another mutex: mtx_repmgr. And, when changing space allocation for that site + * list we conform to the convention of acquiring renv->mtx_regenv. These are + * less frequent of course. + * When it's necessary to acquire more than one of these mutexes, the + * ordering priority (or "lock ordering protocol") is: + * db_rep->mutex (first) + * mtx_repmgr (briefly) + * mtx_regenv (last, and most briefly) + * + * There are also mutexes for app message "channels". Each channel has a mutex, + * which is used to serialize any connection re-establishment that may become + * necessary during its lifetime (such as when a master changes). This never + * happens on a simple, specific-EID channel, but in other cases multiple app + * threads could be making send_xxx() calls concurrently, and it would not do to + * have two of them try to re-connect concurrently. + * When re-establishing a connection, the channel lock is held while + * grabbing first the mtx_repmgr, and then the db_rep mutex (but not both + * together). I.e., we have: + * channel->mutex (first) + * [mtx_repmgr (very briefly)] and then [db_rep->mutex (very briefly)] + */ + +#define LOCK_MUTEX(m) do { \ + if (__repmgr_lock_mutex(m) != 0) \ + return (DB_RUNRECOVERY); \ +} while (0) + +#define UNLOCK_MUTEX(m) do { \ + if (__repmgr_unlock_mutex(m) != 0) \ + return (DB_RUNRECOVERY); \ +} while (0) + +/* POSIX/Win32 socket (and other) portability. */ +#ifdef DB_WIN32 +#define WOULDBLOCK WSAEWOULDBLOCK +#undef DB_REPMGR_EAGAIN + +#define net_errno WSAGetLastError() +typedef int socklen_t; +typedef char * sockopt_t; +#define sendsocket(s, buf, len, flags) send((s), (buf), (int)(len), (flags)) + +#define iov_len len +#define iov_base buf + +typedef DWORD threadsync_timeout_t; + +#define REPMGR_INITED(db_rep) (db_rep->signaler != NULL) +#else + +#define INVALID_SOCKET -1 +#define SOCKET_ERROR -1 +#define WOULDBLOCK EWOULDBLOCK +#define DB_REPMGR_EAGAIN EAGAIN + +#define net_errno errno +typedef void * sockopt_t; + +#define sendsocket(s, buf, len, flags) send((s), (buf), (len), (flags)) +#define closesocket(fd) close(fd) + +typedef struct timespec threadsync_timeout_t; + +#define REPMGR_INITED(db_rep) (db_rep->read_pipe >= 0) +#endif + +#define SELECTOR_RUNNING(db_rep) ((db_rep)->selector != NULL) + +/* + * Generic definition of some action to be performed on each connection, in the + * form of a call-back function. + */ +typedef int (*CONNECTION_ACTION) __P((ENV *, REPMGR_CONNECTION *, void *)); + +/* + * Generic predicate to test a condition that a thread is waiting for. + */ +typedef int (*PREDICATE) __P((ENV *, void *)); + +#include "dbinc_auto/repmgr_ext.h" + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_REPMGR_H_ */ diff --git a/src/dbinc/shqueue.h b/src/dbinc/shqueue.h new file mode 100644 index 00000000..7c2a15da --- /dev/null +++ b/src/dbinc/shqueue.h @@ -0,0 +1,410 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_SHQUEUE_H_ +#define _DB_SHQUEUE_H_ + +/* + * This file defines three types of data structures: chains, lists and + * tail queues similarly to the include file . + * + * The difference is that this set of macros can be used for structures that + * reside in shared memory that may be mapped at different addresses in each + * process. In most cases, the macros for shared structures exactly mirror + * the normal macros, although the macro calls require an additional type + * parameter, only used by the HEAD and ENTRY macros of the standard macros. + * + * Since we use relative offsets of type ssize_t rather than pointers, 0 + * (aka NULL) is a valid offset and cannot be used to indicate the end + * of a list. Therefore, we use -1 to indicate end of list. + * + * The macros ending in "P" return pointers without checking for end or + * beginning of lists, the others check for end of list and evaluate to + * either a pointer or NULL. + * + * For details on the use of these macros, see the queue(3) manual page. + */ + +#if defined(__cplusplus) +extern "C" { +#endif + +#define SH_PTR_TO_OFF(src, dest) \ + ((db_ssize_t)(((u_int8_t *)(dest)) - ((u_int8_t *)(src)))) + +#define SH_OFF_TO_PTR(base, off, type) \ + ((type *) (((u_int8_t *)(base)) + (db_ssize_t) (off))) + + +/* + * Shared memory chain definitions. + */ +#define SH_CHAIN_ENTRY \ +struct { \ + db_ssize_t sce_next; /* relative offset to next element */ \ + db_ssize_t sce_prev; /* relative offset of prev element */ \ +} + +#define SH_CHAIN_INIT(elm, field) \ + (elm)->field.sce_next = (elm)->field.sce_prev = -1 + +#define SH_CHAIN_HASNEXT(elm, field) ((elm)->field.sce_next != -1) +#define SH_CHAIN_NEXTP(elm, field, type) \ + ((struct type *)((u_int8_t *)(elm) + (elm)->field.sce_next)) +#define SH_CHAIN_NEXT(elm, field, type) (SH_CHAIN_HASNEXT(elm, field) ? \ + SH_CHAIN_NEXTP(elm, field, type) : (struct type *)NULL) + +#define SH_CHAIN_HASPREV(elm, field) ((elm)->field.sce_prev != -1) +#define SH_CHAIN_PREVP(elm, field, type) \ + ((struct type *)((u_int8_t *)(elm) + (elm)->field.sce_prev)) +#define SH_CHAIN_PREV(elm, field, type) (SH_CHAIN_HASPREV(elm, field) ? \ + SH_CHAIN_PREVP(elm, field, type) : (struct type *)NULL) + +#define SH_CHAIN_SINGLETON(elm, field) \ + (!(SH_CHAIN_HASNEXT(elm, field) || SH_CHAIN_HASPREV(elm, field))) + +#define SH_CHAIN_INSERT_AFTER(listelm, elm, field, type) do { \ + struct type *__next = SH_CHAIN_NEXT(listelm, field, type); \ + if (__next != NULL) { \ + (elm)->field.sce_next = SH_PTR_TO_OFF(elm, __next); \ + __next->field.sce_prev = SH_PTR_TO_OFF(__next, elm); \ + } else \ + (elm)->field.sce_next = -1; \ + (elm)->field.sce_prev = SH_PTR_TO_OFF(elm, listelm); \ + (listelm)->field.sce_next = SH_PTR_TO_OFF(listelm, elm); \ +} while (0) + +#define SH_CHAIN_INSERT_BEFORE(listelm, elm, field, type) do { \ + struct type *__prev = SH_CHAIN_PREV(listelm, field, type); \ + if (__prev != NULL) { \ + (elm)->field.sce_prev = SH_PTR_TO_OFF(elm, __prev); \ + __prev->field.sce_next = SH_PTR_TO_OFF(__prev, elm); \ + } else \ + (elm)->field.sce_prev = -1; \ + (elm)->field.sce_next = SH_PTR_TO_OFF(elm, listelm); \ + (listelm)->field.sce_prev = SH_PTR_TO_OFF(listelm, elm); \ +} while (0) + +#define SH_CHAIN_REMOVE(elm, field, type) do { \ + struct type *__prev = SH_CHAIN_PREV(elm, field, type); \ + struct type *__next = SH_CHAIN_NEXT(elm, field, type); \ + if (__next != NULL) \ + __next->field.sce_prev = (__prev == NULL) ? -1 : \ + SH_PTR_TO_OFF(__next, __prev); \ + if (__prev != NULL) \ + __prev->field.sce_next = (__next == NULL) ? -1 : \ + SH_PTR_TO_OFF(__prev, __next); \ + SH_CHAIN_INIT(elm, field); \ +} while (0) + +/* + * Shared memory list definitions. + */ +#define SH_LIST_HEAD(name) \ +struct name { \ + db_ssize_t slh_first; /* first element */ \ +} + +#define SH_LIST_HEAD_INITIALIZER(head) \ + { -1 } + +#define SH_LIST_ENTRY \ +struct { \ + db_ssize_t sle_next; /* relative offset to next element */ \ + db_ssize_t sle_prev; /* relative offset of prev element */ \ +} + +/* + * Shared memory list functions. + */ +#define SH_LIST_EMPTY(head) \ + ((head)->slh_first == -1) + +#define SH_LIST_FIRSTP(head, type) \ + ((struct type *)(((u_int8_t *)(head)) + (head)->slh_first)) + +#define SH_LIST_FIRST(head, type) \ + (SH_LIST_EMPTY(head) ? NULL : \ + ((struct type *)(((u_int8_t *)(head)) + (head)->slh_first))) + +#define SH_LIST_NEXTP(elm, field, type) \ + ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.sle_next)) + +#define SH_LIST_NEXT(elm, field, type) \ + ((elm)->field.sle_next == -1 ? NULL : \ + ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.sle_next))) + + /* + *__SH_LIST_PREV_OFF is private API. It calculates the address of + * the elm->field.sle_next member of a SH_LIST structure. All offsets + * between elements are relative to that point in SH_LIST structures. + */ +#define __SH_LIST_PREV_OFF(elm, field) \ + ((db_ssize_t *)(((u_int8_t *)(elm)) + (elm)->field.sle_prev)) + +#define SH_LIST_PREV(elm, field, type) \ + (struct type *)((db_ssize_t)(elm) - (*__SH_LIST_PREV_OFF(elm, field))) + +#define SH_LIST_FOREACH(var, head, field, type) \ + for ((var) = SH_LIST_FIRST((head), type); \ + (var) != NULL; \ + (var) = SH_LIST_NEXT((var), field, type)) + +/* + * Given correct A.next: B.prev = SH_LIST_NEXT_TO_PREV(A) + * in a list [A, B] + * The prev value is always the offset from an element to its preceding + * element's next location, not the beginning of the structure. To get + * to the beginning of an element structure in memory given an element + * do the following: + * A = B - (B.prev + (&B.next - B)) + * Take the element's next pointer and calculate what the corresponding + * Prev pointer should be -- basically it is the negation plus the offset + * of the next field in the structure. + */ +#define SH_LIST_NEXT_TO_PREV(elm, field) \ + (((elm)->field.sle_next == -1 ? 0 : -(elm)->field.sle_next) + \ + SH_PTR_TO_OFF(elm, &(elm)->field.sle_next)) + +#define SH_LIST_INIT(head) (head)->slh_first = -1 + +#define SH_LIST_INSERT_BEFORE(head, listelm, elm, field, type) do { \ + if (listelm == SH_LIST_FIRST(head, type)) { \ + SH_LIST_INSERT_HEAD(head, elm, field, type); \ + } else { \ + (elm)->field.sle_next = SH_PTR_TO_OFF(elm, listelm); \ + (elm)->field.sle_prev = SH_LIST_NEXT_TO_PREV( \ + SH_LIST_PREV((listelm), field, type), field) + \ + (elm)->field.sle_next; \ + (SH_LIST_PREV(listelm, field, type))->field.sle_next = \ + (SH_PTR_TO_OFF((SH_LIST_PREV(listelm, field, \ + type)), elm)); \ + (listelm)->field.sle_prev = SH_LIST_NEXT_TO_PREV(elm, field); \ + } \ +} while (0) + +#define SH_LIST_INSERT_AFTER(listelm, elm, field, type) do { \ + if ((listelm)->field.sle_next != -1) { \ + (elm)->field.sle_next = SH_PTR_TO_OFF(elm, \ + SH_LIST_NEXTP(listelm, field, type)); \ + SH_LIST_NEXTP(listelm, field, type)->field.sle_prev = \ + SH_LIST_NEXT_TO_PREV(elm, field); \ + } else \ + (elm)->field.sle_next = -1; \ + (listelm)->field.sle_next = SH_PTR_TO_OFF(listelm, elm); \ + (elm)->field.sle_prev = SH_LIST_NEXT_TO_PREV(listelm, field); \ +} while (0) + +#define SH_LIST_INSERT_HEAD(head, elm, field, type) do { \ + if ((head)->slh_first != -1) { \ + (elm)->field.sle_next = \ + (head)->slh_first - SH_PTR_TO_OFF(head, elm); \ + SH_LIST_FIRSTP(head, type)->field.sle_prev = \ + SH_LIST_NEXT_TO_PREV(elm, field); \ + } else \ + (elm)->field.sle_next = -1; \ + (head)->slh_first = SH_PTR_TO_OFF(head, elm); \ + (elm)->field.sle_prev = SH_PTR_TO_OFF(elm, &(head)->slh_first); \ +} while (0) + +#define SH_LIST_REMOVE(elm, field, type) do { \ + if ((elm)->field.sle_next != -1) { \ + SH_LIST_NEXTP(elm, field, type)->field.sle_prev = \ + (elm)->field.sle_prev - (elm)->field.sle_next; \ + *__SH_LIST_PREV_OFF(elm, field) += (elm)->field.sle_next;\ + } else \ + *__SH_LIST_PREV_OFF(elm, field) = -1; \ +} while (0) + +#define SH_LIST_REMOVE_HEAD(head, field, type) do { \ + if (!SH_LIST_EMPTY(head)) { \ + SH_LIST_REMOVE(SH_LIST_FIRSTP(head, type), field, type);\ + } \ +} while (0) + +/* + * Shared memory tail queue definitions. + */ +#define SH_TAILQ_HEAD(name) \ +struct name { \ + db_ssize_t stqh_first; /* relative offset of first element */ \ + db_ssize_t stqh_last; /* relative offset of last's next */ \ +} + +#define SH_TAILQ_HEAD_INITIALIZER(head) \ + { -1, 0 } + +#define SH_TAILQ_ENTRY \ +struct { \ + db_ssize_t stqe_next; /* relative offset of next element */ \ + db_ssize_t stqe_prev; /* relative offset of prev's next */ \ +} + +/* + * Shared memory tail queue functions. + */ + +#define SH_TAILQ_EMPTY(head) \ + ((head)->stqh_first == -1) + +#define SH_TAILQ_FIRSTP(head, type) \ + ((struct type *)((u_int8_t *)(head) + (head)->stqh_first)) + +#define SH_TAILQ_FIRST(head, type) \ + (SH_TAILQ_EMPTY(head) ? NULL : SH_TAILQ_FIRSTP(head, type)) + +#define SH_TAILQ_NEXTP(elm, field, type) \ + ((struct type *)((u_int8_t *)(elm) + (elm)->field.stqe_next)) + +#define SH_TAILQ_NEXT(elm, field, type) \ + ((elm)->field.stqe_next == -1 ? NULL : \ + ((struct type *)((u_int8_t *)(elm) + (elm)->field.stqe_next))) + + /* + * __SH_TAILQ_PREV_OFF is private API. It calculates the address of + * the elm->field.stqe_next member of a SH_TAILQ structure. All + * offsets between elements are relative to that point in SH_TAILQ + * structures. + */ +#define __SH_TAILQ_PREV_OFF(elm, field) \ + ((db_ssize_t *)(((u_int8_t *)(elm)) + (elm)->field.stqe_prev)) + +#define SH_TAILQ_PREVP(elm, field, type) \ + (struct type *)((db_ssize_t)elm - (*__SH_TAILQ_PREV_OFF(elm, field))) + +#define SH_TAILQ_PREV(head, elm, field, type) \ + (((elm) == SH_TAILQ_FIRST(head, type)) ? NULL : \ + (struct type *)((db_ssize_t)elm - (*__SH_TAILQ_PREV_OFF(elm, field)))) + + /* + * __SH_TAILQ_LAST_OFF is private API. It calculates the address of + * the stqe_next member of a SH_TAILQ structure in the last element + * of this list. All offsets between elements are relative to that + * point in SH_TAILQ structures. + */ +#define __SH_TAILQ_LAST_OFF(head) \ + ((db_ssize_t *)(((u_int8_t *)(head)) + (head)->stqh_last)) + +#define SH_TAILQ_LASTP(head, field, type) \ + ((struct type *)((db_ssize_t)(head) + \ + ((db_ssize_t)((head)->stqh_last) - \ + ((db_ssize_t)SH_PTR_TO_OFF(SH_TAILQ_FIRST(head, type), \ + &(SH_TAILQ_FIRSTP(head, type)->field.stqe_next)))))) + +#define SH_TAILQ_LAST(head, field, type) \ + (SH_TAILQ_EMPTY(head) ? NULL : SH_TAILQ_LASTP(head, field, type)) + +/* + * Given correct A.next: B.prev = SH_TAILQ_NEXT_TO_PREV(A) + * in a list [A, B] + * The prev value is always the offset from an element to its preceding + * element's next location, not the beginning of the structure. To get + * to the beginning of an element structure in memory given an element + * do the following: + * A = B - (B.prev + (&B.next - B)) + */ +#define SH_TAILQ_NEXT_TO_PREV(elm, field) \ + (((elm)->field.stqe_next == -1 ? 0 : \ + (-(elm)->field.stqe_next) + \ + SH_PTR_TO_OFF(elm, &(elm)->field.stqe_next))) + +#define SH_TAILQ_FOREACH(var, head, field, type) \ + for ((var) = SH_TAILQ_FIRST((head), type); \ + (var) != NULL; \ + (var) = SH_TAILQ_NEXT((var), field, type)) + +#define SH_TAILQ_FOREACH_REVERSE(var, head, field, type) \ + for ((var) = SH_TAILQ_LAST((head), field, type); \ + (var) != NULL; \ + (var) = SH_TAILQ_PREV((head), (var), field, type)) + +#define SH_TAILQ_INIT(head) { \ + (head)->stqh_first = -1; \ + (head)->stqh_last = SH_PTR_TO_OFF(head, &(head)->stqh_first); \ +} + +#define SH_TAILQ_INSERT_HEAD(head, elm, field, type) do { \ + if ((head)->stqh_first != -1) { \ + (elm)->field.stqe_next = \ + (head)->stqh_first - SH_PTR_TO_OFF(head, elm); \ + SH_TAILQ_FIRSTP(head, type)->field.stqe_prev = \ + SH_TAILQ_NEXT_TO_PREV(elm, field); \ + } else { \ + (head)->stqh_last = \ + SH_PTR_TO_OFF(head, &(elm)->field.stqe_next); \ + (elm)->field.stqe_next = -1; \ + } \ + (head)->stqh_first = SH_PTR_TO_OFF(head, elm); \ + (elm)->field.stqe_prev = \ + SH_PTR_TO_OFF(elm, &(head)->stqh_first); \ +} while (0) + +#define SH_TAILQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.stqe_next = -1; \ + (elm)->field.stqe_prev = \ + -SH_PTR_TO_OFF(head, elm) + (head)->stqh_last; \ + if ((head)->stqh_last == \ + SH_PTR_TO_OFF((head), &(head)->stqh_first)) \ + (head)->stqh_first = SH_PTR_TO_OFF(head, elm); \ + else \ + *__SH_TAILQ_LAST_OFF(head) = -(head)->stqh_last + \ + SH_PTR_TO_OFF((elm), &(elm)->field.stqe_next) + \ + SH_PTR_TO_OFF(head, elm); \ + (head)->stqh_last = \ + SH_PTR_TO_OFF(head, &((elm)->field.stqe_next)); \ +} while (0) + +#define SH_TAILQ_INSERT_BEFORE(head, listelm, elm, field, type) do { \ + if (listelm == SH_TAILQ_FIRST(head, type)) { \ + SH_TAILQ_INSERT_HEAD(head, elm, field, type); \ + } else { \ + (elm)->field.stqe_next = SH_PTR_TO_OFF(elm, listelm); \ + (elm)->field.stqe_prev = SH_TAILQ_NEXT_TO_PREV( \ + SH_TAILQ_PREVP((listelm), field, type), field) + \ + (elm)->field.stqe_next; \ + (SH_TAILQ_PREVP(listelm, field, type))->field.stqe_next =\ + (SH_PTR_TO_OFF((SH_TAILQ_PREVP(listelm, field, type)), \ + elm)); \ + (listelm)->field.stqe_prev = \ + SH_TAILQ_NEXT_TO_PREV(elm, field); \ + } \ +} while (0) + +#define SH_TAILQ_INSERT_AFTER(head, listelm, elm, field, type) do { \ + if ((listelm)->field.stqe_next != -1) { \ + (elm)->field.stqe_next = (listelm)->field.stqe_next - \ + SH_PTR_TO_OFF(listelm, elm); \ + SH_TAILQ_NEXTP(listelm, field, type)->field.stqe_prev = \ + SH_TAILQ_NEXT_TO_PREV(elm, field); \ + } else { \ + (elm)->field.stqe_next = -1; \ + (head)->stqh_last = \ + SH_PTR_TO_OFF(head, &(elm)->field.stqe_next); \ + } \ + (listelm)->field.stqe_next = SH_PTR_TO_OFF(listelm, elm); \ + (elm)->field.stqe_prev = SH_TAILQ_NEXT_TO_PREV(listelm, field); \ +} while (0) + +#define SH_TAILQ_REMOVE(head, elm, field, type) do { \ + if ((elm)->field.stqe_next != -1) { \ + SH_TAILQ_NEXTP(elm, field, type)->field.stqe_prev = \ + (elm)->field.stqe_prev + \ + SH_PTR_TO_OFF(SH_TAILQ_NEXTP(elm, \ + field, type), elm); \ + *__SH_TAILQ_PREV_OFF(elm, field) += (elm)->field.stqe_next;\ + } else { \ + (head)->stqh_last = (elm)->field.stqe_prev + \ + SH_PTR_TO_OFF(head, elm); \ + *__SH_TAILQ_PREV_OFF(elm, field) = -1; \ + } \ +} while (0) + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_SHQUEUE_H_ */ diff --git a/src/dbinc/tcl_db.h b/src/dbinc/tcl_db.h new file mode 100644 index 00000000..754b31d5 --- /dev/null +++ b/src/dbinc/tcl_db.h @@ -0,0 +1,316 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_TCL_DB_H_ +#define _DB_TCL_DB_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +#define MSG_SIZE 100 /* Message size */ + +enum INFOTYPE { + I_AUX, I_DB, I_DBC, I_ENV, I_LOCK, I_LOGC, I_MP, I_NDBM, I_PG, I_SEQ, I_TXN}; + +#define MAX_ID 8 /* Maximum number of sub-id's we need */ +#define DBTCL_PREP 64 /* Size of txn_recover preplist */ + +#define DBTCL_DBM 1 +#define DBTCL_NDBM 2 + +#define DBTCL_GETCLOCK 0 +#define DBTCL_GETLIMIT 1 +#define DBTCL_GETREQ 2 + +#define DBTCL_MUT_ALIGN 0 +#define DBTCL_MUT_INCR 1 +#define DBTCL_MUT_INIT 2 +#define DBTCL_MUT_MAX 3 +#define DBTCL_MUT_TAS 4 + +/* + * Data structure to record information about events that have occurred. Tcl + * command "env event_info" can retrieve the information. For now, we record + * only one occurrence per event type; "env event_info -clear" can be used to + * reset the info. + * + * Besides the bit flag that records the fact that an event type occurred, some + * event types have associated "info" and we record that here too. When new + * event types are invented that have associated info, we should add a field + * here to record that info as well, so that it can be returned to the script + * with the "env event_info" results. + */ +typedef struct dbtcl_event_info { + u_int32_t events; /* Bit flag on for each event fired. */ + int panic_error; + int newmaster_eid; + int added_eid; + int removed_eid; + pid_t attached_process; + int connected_eid; + DB_REPMGR_CONN_ERR conn_broken_info; + DB_REPMGR_CONN_ERR conn_failed_try_info; + DB_LSN sync_point; +} DBTCL_EVENT_INFO; + +/* + * Why use a home grown package over the Tcl_Hash functions? + * + * We could have implemented the stuff below without maintaining our + * own list manipulation, efficiently hashing it with the available + * Tcl functions (Tcl_CreateHashEntry, Tcl_GetHashValue, etc). I chose + * not to do so for these reasons: + * + * We still need the information below. Using the hashing only removes + * us from needing the next/prev pointers. We still need the structure + * itself because we need more than one value associated with a widget. + * We need to keep track of parent pointers for sub-widgets (like cursors) + * so we can correctly close. We need to keep track of individual widget's + * id counters for any sub-widgets they may have. We need to be able to + * associate the name/client data outside the scope of the widget. + * + * So, is it better to use the hashing rather than + * the linear list we have now? I decided against it for the simple reason + * that to access the structure would require two calls. The first is + * Tcl_FindHashEntry(table, key) and then, once we have the entry, we'd + * have to do Tcl_GetHashValue(entry) to get the pointer of the structure. + * + * I believe the number of simultaneous DB widgets in existence at one time + * is not going to be that large (more than several dozen) such that + * linearly searching the list is not going to impact performance in a + * noticeable way. Should performance be impacted due to the size of the + * info list, then perhaps it is time to revisit this decision. + */ +typedef struct dbtcl_info { + LIST_ENTRY(dbtcl_info) entries; + Tcl_Interp *i_interp; + char *i_name; + enum INFOTYPE i_type; + union infop { + DB *dbp; + DBC *dbcp; + DB_ENV *envp; + DB_LOCK *lock; + DB_LOGC *logc; + DB_MPOOLFILE *mp; + DB_TXN *txnp; + void *anyp; + } un; + union data { + int anydata; + db_pgno_t pgno; /* For I_MP. */ + u_int32_t lockid; /* For I_LOCK. */ + DBTCL_EVENT_INFO *event_info; /* For I_ENV. */ + DB_TXN_TOKEN *commit_token; /* For I_TXN. */ + } und; + union data2 { + int anydata; + int pagesz; /* For I_MP. */ + DB_COMPACT *c_data; /* For I_DB. */ + db_mutex_t mutex; /* Protects event_info (I_ENV). */ + } und2; + DBT i_lockobj; + FILE *i_err; + char *i_errpfx; + FILE *i_msg; + + /* Callbacks--Tcl_Objs containing proc names */ + Tcl_Obj *i_compare; + Tcl_Obj *i_dupcompare; + Tcl_Obj *i_foreign_call; + Tcl_Obj *i_hashproc; + Tcl_Obj *i_isalive; + Tcl_Obj *i_part_callback; + Tcl_Obj *i_rep_send; + Tcl_Obj *i_second_call; + + /* Environment ID for the i_rep_send callback. */ + Tcl_Obj *i_rep_eid; + + struct dbtcl_info *i_parent; + int i_otherid[MAX_ID]; + + /* Heap dbs have an associated recno db, and secondary db. */ + DB *hrdbp; + DB *hsdbp; +} DBTCL_INFO; + +#define i_anyp un.anyp +#define i_dbp un.dbp +#define i_dbcp un.dbcp +#define i_envp un.envp +#define i_lock un.lock +#define i_logc un.logc +#define i_mp un.mp +#define i_pagep un.anyp +#define i_txnp un.txnp + +#define i_data und.anydata +#define i_pgno und.pgno +#define i_locker und.lockid +#define i_event_info und.event_info +#define i_commit_token und.commit_token +#define i_data2 und2.anydata +#define i_pgsz und2.pagesz +#define i_cdata und2.c_data +#define i_mutex und2.mutex + +#define i_envtxnid i_otherid[0] +#define i_envmpid i_otherid[1] +#define i_envlockid i_otherid[2] +#define i_envlogcid i_otherid[3] + +#define i_mppgid i_otherid[0] + +#define i_dbdbcid i_otherid[0] + +extern int __debug_on, __debug_print, __debug_stop, __debug_test; + +typedef struct dbtcl_global { + LIST_HEAD(infohead, dbtcl_info) g_infohead; +} DBTCL_GLOBAL; +#define __db_infohead __dbtcl_global.g_infohead + +extern DBTCL_GLOBAL __dbtcl_global; + +/* + * Tcl_NewStringObj takes an "int" length argument, when the typical use is to + * call it with a size_t length (for example, returned by strlen). Tcl is in + * the wrong, but that doesn't help us much -- cast the argument. + */ +#define NewStringObj(a, b) \ + Tcl_NewStringObj((a), (int)(b)) + +#define NAME_TO_DB(name) (DB *)_NameToPtr((name)) +#define NAME_TO_DBC(name) (DBC *)_NameToPtr((name)) +#define NAME_TO_ENV(name) (DB_ENV *)_NameToPtr((name)) +#define NAME_TO_LOCK(name) (DB_LOCK *)_NameToPtr((name)) +#define NAME_TO_MP(name) (DB_MPOOLFILE *)_NameToPtr((name)) +#define NAME_TO_TXN(name) (DB_TXN *)_NameToPtr((name)) +#define NAME_TO_SEQUENCE(name) (DB_SEQUENCE *)_NameToPtr((name)) + +/* + * MAKE_STAT_LIST appends a {name value} pair to a result list that MUST be + * called 'res' that is a Tcl_Obj * in the local function. This macro also + * assumes a label "error" to go to in the event of a Tcl error. For stat + * functions this will typically go before the "free" function to free the + * stat structure returned by DB. + */ +#define MAKE_STAT_LIST(s, v) do { \ + result = _SetListElemInt(interp, res, (s), (long)(v)); \ + if (result != TCL_OK) \ + goto error; \ +} while (0) + +#define MAKE_WSTAT_LIST(s, v) do { \ + result = _SetListElemWideInt(interp, res, (s), (int64_t)(v)); \ + if (result != TCL_OK) \ + goto error; \ +} while (0) + +/* + * MAKE_STAT_LSN appends a {name {LSNfile LSNoffset}} pair to a result list + * that MUST be called 'res' that is a Tcl_Obj * in the local + * function. This macro also assumes a label "error" to go to + * in the even of a Tcl error. For stat functions this will + * typically go before the "free" function to free the stat structure + * returned by DB. + */ +#define MAKE_STAT_LSN(s, lsn) do { \ + myobjc = 2; \ + myobjv[0] = Tcl_NewLongObj((long)(lsn)->file); \ + myobjv[1] = Tcl_NewLongObj((long)(lsn)->offset); \ + lsnlist = Tcl_NewListObj(myobjc, myobjv); \ + myobjc = 2; \ + myobjv[0] = Tcl_NewStringObj((s), (int)strlen(s)); \ + myobjv[1] = lsnlist; \ + thislist = Tcl_NewListObj(myobjc, myobjv); \ + result = Tcl_ListObjAppendElement(interp, res, thislist); \ + if (result != TCL_OK) \ + goto error; \ +} while (0) + +/* + * MAKE_STAT_STRLIST appends a {name string} pair to a result list + * that MUST be called 'res' that is a Tcl_Obj * in the local + * function. This macro also assumes a label "error" to go to + * in the even of a Tcl error. For stat functions this will + * typically go before the "free" function to free the stat structure + * returned by DB. + */ +#define MAKE_STAT_STRLIST(s,s1) do { \ + result = _SetListElem(interp, res, (s), (u_int32_t)strlen(s), \ + (s1), (u_int32_t)strlen(s1)); \ + if (result != TCL_OK) \ + goto error; \ +} while (0) + +/* + * MAKE_SITE_LIST appends a {eid host port status} tuple to a result list + * that MUST be called 'res' that is a Tcl_Obj * in the local function. + * This macro also assumes a label "error" to go to in the event of a Tcl + * error. + */ +#define MAKE_SITE_LIST(e, h, p, s, pr) do { \ + myobjc = 5; \ + myobjv[0] = Tcl_NewIntObj(e); \ + myobjv[1] = Tcl_NewStringObj((h), (int)strlen(h)); \ + myobjv[2] = Tcl_NewIntObj((int)p); \ + myobjv[3] = Tcl_NewStringObj((s), (int)strlen(s)); \ + myobjv[4] = Tcl_NewStringObj((pr), (int)strlen(pr)); \ + thislist = Tcl_NewListObj(myobjc, myobjv); \ + result = Tcl_ListObjAppendElement(interp, res, thislist); \ + if (result != TCL_OK) \ + goto error; \ +} while (0) + +/* + * FLAG_CHECK checks that the given flag is not set yet. + * If it is, it sets up an error message. + */ +#define FLAG_CHECK(flag) do { \ + if ((flag) != 0) { \ + Tcl_SetResult(interp, \ + " Only 1 policy can be specified.\n", \ + TCL_STATIC); \ + result = TCL_ERROR; \ + break; \ + } \ +} while (0) + +/* + * FLAG_CHECK2 checks that the given flag is not set yet or is + * only set to the given allowed value. + * If it is, it sets up an error message. + */ +#define FLAG_CHECK2(flag, val) do { \ + if (((flag) & ~(val)) != 0) { \ + Tcl_SetResult(interp, \ + " Only 1 policy can be specified.\n", \ + TCL_STATIC); \ + result = TCL_ERROR; \ + break; \ + } \ +} while (0) + +/* + * IS_HELP checks whether the arg we bombed on is -?, which is a help option. + * If it is, we return TCL_OK (but leave the result set to whatever + * Tcl_GetIndexFromObj says, which lists all the valid options. Otherwise + * return TCL_ERROR. + */ +#define IS_HELP(s) \ + (strcmp(Tcl_GetStringFromObj(s,NULL), "-?") == 0) ? TCL_OK : TCL_ERROR + +#if defined(__cplusplus) +} +#endif + +#include "dbinc_auto/tcl_ext.h" +#endif /* !_DB_TCL_DB_H_ */ diff --git a/src/dbinc/txn.h b/src/dbinc/txn.h new file mode 100644 index 00000000..36546377 --- /dev/null +++ b/src/dbinc/txn.h @@ -0,0 +1,287 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef _DB_TXN_H_ +#define _DB_TXN_H_ + +#include "dbinc/xa.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Operation parameters to the delayed commit processing code. */ +typedef enum { + TXN_CLOSE, /* Close a DB handle whose close had failed. */ + TXN_REMOVE, /* Remove a file. */ + TXN_TRADE, /* Trade lockers. */ + TXN_TRADED /* Already traded; downgrade lock. */ +} TXN_EVENT_T; + +struct __db_txnregion; typedef struct __db_txnregion DB_TXNREGION; +struct __db_txn_stat_int; +typedef struct __db_txn_stat_int DB_TXN_STAT_INT; +struct __txn_logrec; typedef struct __txn_logrec DB_TXNLOGREC; + +/* + * !!! + * TXN_MINIMUM = (DB_LOCK_MAXID + 1) but this makes compilers complain. + */ +#define TXN_MINIMUM 0x80000000 +#define TXN_MAXIMUM 0xffffffff /* Maximum number of txn ids. */ +#define TXN_INVALID 0 /* Invalid transaction ID. */ + +#define DEF_MAX_TXNS 100 /* Default max transactions. */ +#define TXN_NSLOTS 4 /* Initial slots to hold DB refs */ + +#define TXN_PRIORITY_DEFAULT DB_LOCK_DEFPRIORITY + +/* + * This structure must contain the same fields as the __db_txn_stat struct + * except for any pointer fields that are filled in only when the struct is + * being populated for output through the API. + */ +DB_ALIGN8 struct __db_txn_stat_int { /* SHARED */ + u_int32_t st_nrestores; /* number of restored transactions + after recovery. */ +#ifndef __TEST_DB_NO_STATISTICS + DB_LSN st_last_ckp; /* lsn of the last checkpoint */ + time_t st_time_ckp; /* time of last checkpoint */ + u_int32_t st_last_txnid; /* last transaction id given out */ + u_int32_t st_inittxns; /* inital txns allocated */ + u_int32_t st_maxtxns; /* maximum txns possible */ + uintmax_t st_naborts; /* number of aborted transactions */ + uintmax_t st_nbegins; /* number of begun transactions */ + uintmax_t st_ncommits; /* number of committed transactions */ + u_int32_t st_nactive; /* number of active transactions */ + u_int32_t st_nsnapshot; /* number of snapshot transactions */ + u_int32_t st_maxnactive; /* maximum active transactions */ + u_int32_t st_maxnsnapshot; /* maximum snapshot transactions */ + uintmax_t st_region_wait; /* Region lock granted after wait. */ + uintmax_t st_region_nowait; /* Region lock granted without wait. */ + roff_t st_regsize; /* Region size. */ +#endif +}; + +/* + * Internal data maintained in shared memory for each transaction. + */ +typedef struct __txn_detail { + u_int32_t txnid; /* current transaction id + used to link free list also */ + pid_t pid; /* Process owning txn */ + db_threadid_t tid; /* Thread owning txn */ + + DB_LSN last_lsn; /* Last LSN written for this txn. */ + DB_LSN begin_lsn; /* LSN of begin record. */ + roff_t parent; /* Offset of transaction's parent. */ + roff_t name; /* Offset of txn name. */ + + u_int32_t nlog_dbs; /* Number of databases used. */ + u_int32_t nlog_slots; /* Number of allocated slots. */ + roff_t log_dbs; /* Databases used. */ + + DB_LSN read_lsn; /* Read LSN for MVCC. */ + DB_LSN visible_lsn; /* LSN at which this transaction's + changes are visible. */ + db_mutex_t mvcc_mtx; /* Version mutex. */ + u_int32_t mvcc_ref; /* Number of buffers created by this + transaction still in cache. */ + + u_int32_t priority; /* Deadlock resolution priority. */ + + SH_TAILQ_HEAD(__tdkids) kids; /* Linked list of child txn detail. */ + SH_TAILQ_ENTRY klinks; + + /* TXN_{ABORTED, COMMITTED PREPARED, RUNNING} */ + u_int32_t status; /* status of the transaction */ + +#define TXN_DTL_COLLECTED 0x01 /* collected during txn_recover */ +#define TXN_DTL_RESTORED 0x02 /* prepared txn restored */ +#define TXN_DTL_INMEMORY 0x04 /* uses in memory logs */ +#define TXN_DTL_SNAPSHOT 0x08 /* On the list of snapshot txns. */ +#define TXN_DTL_NOWAIT 0x10 /* Don't block on locks. */ + u_int32_t flags; + + SH_TAILQ_ENTRY links; /* active/free/snapshot list */ + + u_int32_t xa_ref; /* XA: reference count; number + of DB_TXNs reffing this struct */ + /* TXN_XA_{ACTIVE, DEADLOCKED, IDLE, PREPARED, ROLLEDBACK} */ + u_int32_t xa_br_status; /* status of XA branch */ + u_int8_t gid[DB_GID_SIZE]; /* global transaction id */ + u_int32_t bqual; /* bqual_length from XID */ + u_int32_t gtrid; /* gtrid_length from XID */ + int32_t format; /* XA format */ + roff_t slots[TXN_NSLOTS]; /* Initial DB slot allocation. */ +} TXN_DETAIL; + +/* + * DB_TXNMGR -- + * The transaction manager encapsulates the transaction system. + */ +struct __db_txnmgr { + /* + * These fields need to be protected for multi-threaded support. + * + * Lock list of active transactions (including the content of each + * TXN_DETAIL structure on the list). + */ + db_mutex_t mutex; + /* List of active transactions. */ + TAILQ_HEAD(_chain, __db_txn) txn_chain; + + u_int32_t n_discards; /* Number of txns discarded. */ + + /* These fields are never updated after creation, so not protected. */ + ENV *env; /* Environment. */ + REGINFO reginfo; /* Region information. */ +}; + +/* Macros to lock/unlock the transaction region as a whole. */ +#define TXN_SYSTEM_LOCK(env) \ + MUTEX_LOCK(env, ((DB_TXNREGION *) \ + (env)->tx_handle->reginfo.primary)->mtx_region) +#define TXN_SYSTEM_UNLOCK(env) \ + MUTEX_UNLOCK(env, ((DB_TXNREGION *) \ + (env)->tx_handle->reginfo.primary)->mtx_region) + +/* + * DB_TXNREGION -- + * The primary transaction data structure in the shared memory region. + */ +struct __db_txnregion { /* SHARED */ + db_mutex_t mtx_region; /* Region mutex. */ + + u_int32_t inittxns; /* initial number of active TXNs */ + u_int32_t curtxns; /* current number of active TXNs */ + u_int32_t maxtxns; /* maximum number of active TXNs */ + u_int32_t last_txnid; /* last transaction id given out */ + u_int32_t cur_maxid; /* current max unused id. */ + + db_mutex_t mtx_ckp; /* Single thread checkpoints. */ + DB_LSN last_ckp; /* lsn of the last checkpoint */ + time_t time_ckp; /* time of last checkpoint */ + + DB_TXN_STAT_INT stat; /* Statistics for txns. */ + + u_int32_t n_bulk_txn; /* Num. bulk txns in progress. */ + u_int32_t n_hotbackup; /* Num. of outstanding backup notices.*/ + +#define TXN_IN_RECOVERY 0x01 /* environment is being recovered */ + u_int32_t flags; + /* active TXN list */ + SH_TAILQ_HEAD(__active) active_txn; + SH_TAILQ_HEAD(__mvcc) mvcc_txn; +}; + +/* + * DB_COMMIT_INFO -- + * Meta-data uniquely describing a transaction commit across a replication + * group. + */ +struct __db_commit_info { + u_int32_t version; /* Stored format version. */ + u_int32_t gen; /* Replication master generation. */ + u_int32_t envid; /* Unique env ID of master. */ + DB_LSN lsn; /* LSN of commit log record. */ +}; + +/* + * DB_TXNLOGREC -- + * An in-memory, linked-list copy of a log record. + */ +struct __txn_logrec { + STAILQ_ENTRY(__txn_logrec) links;/* Linked list. */ + + u_int8_t data[1]; /* Log record. */ +}; + +/* + * Log record types. Note that these are *not* alphabetical. This is + * intentional so that we don't change the meaning of values between + * software upgrades. + * + * EXPECTED, UNEXPECTED, IGNORE, and OK are used in the txnlist functions. + * Here is an explanation of how the statuses are used. + * + * TXN_OK + * BEGIN records for transactions found on the txnlist during + * OPENFILES (BEGIN records are those with a prev_lsn of 0,0) + * + * TXN_COMMIT + * Transaction committed and should be rolled forward. + * + * TXN_ABORT + * This transaction's changes must be undone. Either there was + * never a prepare or commit record for this transaction OR there + * was a commit, but we are recovering to a timestamp or particular + * LSN and that point is before this transaction's commit. + * + * TXN_PREPARE + * Prepare record, but no commit record is in the log. + * + * TXN_IGNORE + * Generic meaning is that this transaction should not be + * processed during later recovery passes. We use it in a + * number of different manners: + * + * 1. We never saw its BEGIN record. Therefore, the logs have + * been reclaimed and we *know* that this transaction doesn't + * need to be aborted, because in order for it to be + * reclaimed, there must have been a subsequent checkpoint + * (and any dirty pages for this transaction made it to + * disk). + * + * 2. This is a child transaction that created a database. + * For some reason, we don't want to recreate that database + * (i.e., it already exists or some other database created + * after it exists). + * + * 3. During recovery open of subdatabases, if the master check fails, + * we use a TXN_IGNORE on the create of the subdb in the nested + * transaction. + * + * 4. During a remove, the file with the name being removed isn't + * the file for which we are recovering a remove. + * + * TXN_EXPECTED + * After a successful open during recovery, we update the + * transaction's status to TXN_EXPECTED. The open was done + * in the parent, but in the open log record, we record the + * child transaction's ID if we also did a create. When there + * is a valid ID in that field, we use it and mark the child's + * status as TXN_EXPECTED (indicating that we don't need to redo + * a create for this file). + * + * When recovering a remove, if we don't find or can't open + * the file, the child (which does the remove) gets marked + * EXPECTED (indicating that we don't need to redo the remove). + * + * TXN_UNEXPECTED + * During recovery, we attempted an open that should have succeeded + * and we got ENOENT, so like with the EXPECTED case, we indicate + * in the child that we got the UNEXPECTED return so that we do redo + * the creating/deleting operation. + * + */ +#define TXN_OK 0 +#define TXN_COMMIT 1 +#define TXN_PREPARE 2 +#define TXN_ABORT 3 +#define TXN_IGNORE 4 +#define TXN_EXPECTED 5 +#define TXN_UNEXPECTED 6 + +#if defined(__cplusplus) +} +#endif + +#include "dbinc_auto/txn_auto.h" +#include "dbinc_auto/txn_ext.h" +#endif /* !_DB_TXN_H_ */ diff --git a/src/dbinc/win_db.h b/src/dbinc/win_db.h new file mode 100644 index 00000000..497aaa07 --- /dev/null +++ b/src/dbinc/win_db.h @@ -0,0 +1,148 @@ +/*- + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * The following provides the information necessary to build Berkeley + * DB on native Windows, and other Windows environments such as MinGW. + */ + +/* + * Berkeley DB requires at least Windows 2000, tell Visual Studio of the + * requirement. + */ +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x0500 +#endif + +#ifndef DB_WINCE +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#endif /* DB_WINCE */ + +#include +#include +#include +#include +#include + +/* + * To build Tcl interface libraries, the include path must be configured to + * use the directory containing , usually the include directory in + * the Tcl distribution. + */ +#ifdef DB_TCL_SUPPORT +#include +#endif + +#define WIN32_LEAN_AND_MEAN +#include +#include +#ifndef DB_WINCE +#include +#endif + +#ifdef HAVE_GETADDRINFO +/* + * Need explicit includes for IPv6 support on Windows. Both are necessary to + * ensure that pre WinXP versions have an implementation of the getaddrinfo API. + */ +#include +#include +#endif + +/* + * Microsoft's C runtime library has fsync, getcwd, getpid, snprintf and + * vsnprintf, but under different names. + */ +#define fsync _commit + +#ifndef DB_WINCE +#define getcwd(buf, size) _getcwd(buf, size) +#endif +#define getpid GetCurrentProcessId +#define snprintf _snprintf +#define strcasecmp _stricmp +#define strncasecmp _strnicmp +#define vsnprintf _vsnprintf + +#define h_errno WSAGetLastError() + +/* + * Win32 does not have getopt. + * + * The externs are here, instead of using db_config.h and clib_port.h, because + * that approach changes function names to BDB specific names, and the example + * programs use getopt and can't use BDB specific names. + */ +#if defined(__cplusplus) +extern "C" { +#endif +extern int getopt(int, char * const *, const char *); +#if defined(__cplusplus) +} +#endif + +/* + * Microsoft's compiler _doesn't_ define __STDC__ unless you invoke it with + * arguments turning OFF all vendor extensions. Even more unfortunately, if + * we do that, it fails to parse windows.h!!!!! So, we define __STDC__ here, + * after windows.h comes in. Note: the compiler knows we've defined it, and + * starts enforcing strict ANSI compliance from this point on. + */ +#ifndef __STDC__ +#define __STDC__ 1 +#endif + +#ifdef _UNICODE +#define TO_TSTRING(dbenv, s, ts, ret) do { \ + int __len = (int)strlen(s) + 1; \ + ts = NULL; \ + if ((ret = __os_malloc((dbenv), \ + __len * sizeof(_TCHAR), &(ts))) == 0 && \ + MultiByteToWideChar(CP_UTF8, 0, \ + (s), -1, (ts), __len) == 0) \ + ret = __os_posix_err(__os_get_syserr()); \ + } while (0) + +#define FROM_TSTRING(dbenv, ts, s, ret) { \ + int __len = WideCharToMultiByte(CP_UTF8, 0, ts, -1, \ + NULL, 0, NULL, NULL); \ + s = NULL; \ + if ((ret = __os_malloc((dbenv), __len, &(s))) == 0 && \ + WideCharToMultiByte(CP_UTF8, 0, \ + (ts), -1, (s), __len, NULL, NULL) == 0) \ + ret = __os_posix_err(__os_get_syserr()); \ + } while (0) + +#define FREE_STRING(dbenv, s) do { \ + if ((s) != NULL) { \ + __os_free((dbenv), (s)); \ + (s) = NULL; \ + } \ + } while (0) + +#else +#define TO_TSTRING(dbenv, s, ts, ret) (ret) = 0, (ts) = (_TCHAR *)(s) +#define FROM_TSTRING(dbenv, ts, s, ret) (ret) = 0, (s) = (char *)(ts) +#define FREE_STRING(dbenv, ts) +#endif + +#ifndef INVALID_HANDLE_VALUE +#define INVALID_HANDLE_VALUE ((HANDLE)-1) +#endif + +#ifndef INVALID_FILE_ATTRIBUTES +#define INVALID_FILE_ATTRIBUTES ((DWORD)-1) +#endif + +#ifndef INVALID_SET_FILE_POINTER +#define INVALID_SET_FILE_POINTER ((DWORD)-1) +#endif diff --git a/src/dbinc/xa.h b/src/dbinc/xa.h new file mode 100644 index 00000000..726104c3 --- /dev/null +++ b/src/dbinc/xa.h @@ -0,0 +1,183 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +/* + * Start of xa.h header + * + * Define a symbol to prevent multiple inclusions of this header file + */ +#ifndef _DB_XA_H_ +#define _DB_XA_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Transaction branch identification: XID and NULLXID: + */ +#define XIDDATASIZE 128 /* size in bytes */ +#define MAXGTRIDSIZE 64 /* maximum size in bytes of gtrid */ +#define MAXBQUALSIZE 64 /* maximum size in bytes of bqual */ + +struct xid_t { + long formatID; /* format identifier */ + long gtrid_length; /* value from 1 through 64 */ + long bqual_length; /* value from 1 through 64 */ + char data[XIDDATASIZE]; +}; +typedef struct xid_t XID; +/* + * A value of -1 in formatID means that the XID is null. + */ + +/* + * Declarations of routines by which RMs call TMs: + */ +extern int ax_reg __P((int, XID *, long)); +extern int ax_unreg __P((int, long)); + +/* + * XA Switch Data Structure + */ +#define RMNAMESZ 32 /* length of resource manager name, */ + /* including the null terminator */ +#define MAXINFOSIZE 256 /* maximum size in bytes of xa_info */ + /* strings, including the null + terminator */ +struct xa_switch_t { + char name[RMNAMESZ]; /* name of resource manager */ + long flags; /* resource manager specific options */ + long version; /* must be 0 */ + int (*xa_open_entry) /* xa_open function pointer */ + __P((char *, int, long)); + int (*xa_close_entry) /* xa_close function pointer */ + __P((char *, int, long)); + int (*xa_start_entry) /* xa_start function pointer */ + __P((XID *, int, long)); + int (*xa_end_entry) /* xa_end function pointer */ + __P((XID *, int, long)); + int (*xa_rollback_entry) /* xa_rollback function pointer */ + __P((XID *, int, long)); + int (*xa_prepare_entry) /* xa_prepare function pointer */ + __P((XID *, int, long)); + int (*xa_commit_entry) /* xa_commit function pointer */ + __P((XID *, int, long)); + int (*xa_recover_entry) /* xa_recover function pointer */ + __P((XID *, long, int, long)); + int (*xa_forget_entry) /* xa_forget function pointer */ + __P((XID *, int, long)); + int (*xa_complete_entry) /* xa_complete function pointer */ + __P((int *, int *, int, long)); +}; + +/* + * Flag definitions for the RM switch + */ +#define TMNOFLAGS 0x00000000L /* no resource manager features + selected */ +#define TMREGISTER 0x00000001L /* resource manager dynamically + registers */ +#define TMNOMIGRATE 0x00000002L /* resource manager does not support + association migration */ +#define TMUSEASYNC 0x00000004L /* resource manager supports + asynchronous operations */ +/* + * Flag definitions for xa_ and ax_ routines + */ +/* use TMNOFLAGGS, defined above, when not specifying other flags */ +#define TMASYNC 0x80000000L /* perform routine asynchronously */ +#define TMONEPHASE 0x40000000L /* caller is using one-phase commit + optimisation */ +#define TMFAIL 0x20000000L /* dissociates caller and marks + transaction branch rollback-only */ +#define TMNOWAIT 0x10000000L /* return if blocking condition + exists */ +#define TMRESUME 0x08000000L /* caller is resuming association with + suspended transaction branch */ +#define TMSUCCESS 0x04000000L /* dissociate caller from transaction + branch */ +#define TMSUSPEND 0x02000000L /* caller is suspending, not ending, + association */ +#define TMSTARTRSCAN 0x01000000L /* start a recovery scan */ +#define TMENDRSCAN 0x00800000L /* end a recovery scan */ +#define TMMULTIPLE 0x00400000L /* wait for any asynchronous + operation */ +#define TMJOIN 0x00200000L /* caller is joining existing + transaction branch */ +#define TMMIGRATE 0x00100000L /* caller intends to perform + migration */ + +/* + * ax_() return codes (transaction manager reports to resource manager) + */ +#define TM_JOIN 2 /* caller is joining existing + transaction branch */ +#define TM_RESUME 1 /* caller is resuming association with + suspended transaction branch */ +#define TM_OK 0 /* normal execution */ +#define TMER_TMERR -1 /* an error occurred in the transaction + manager */ +#define TMER_INVAL -2 /* invalid arguments were given */ +#define TMER_PROTO -3 /* routine invoked in an improper + context */ + +/* + * xa_() return codes (resource manager reports to transaction manager) + */ +#define XA_RBBASE 100 /* The inclusive lower bound of the + rollback codes */ +#define XA_RBROLLBACK XA_RBBASE /* The rollback was caused by an + unspecified reason */ +#define XA_RBCOMMFAIL XA_RBBASE+1 /* The rollback was caused by a + communication failure */ +#define XA_RBDEADLOCK XA_RBBASE+2 /* A deadlock was detected */ +#define XA_RBINTEGRITY XA_RBBASE+3 /* A condition that violates the + integrity of the resources was + detected */ +#define XA_RBOTHER XA_RBBASE+4 /* The resource manager rolled back the + transaction branch for a reason not + on this list */ +#define XA_RBPROTO XA_RBBASE+5 /* A protocol error occurred in the + resource manager */ +#define XA_RBTIMEOUT XA_RBBASE+6 /* A transaction branch took too long */ +#define XA_RBTRANSIENT XA_RBBASE+7 /* May retry the transaction branch */ +#define XA_RBEND XA_RBTRANSIENT /* The inclusive upper bound of the + rollback codes */ +#define XA_NOMIGRATE 9 /* resumption must occur where + suspension occurred */ +#define XA_HEURHAZ 8 /* the transaction branch may have + been heuristically completed */ +#define XA_HEURCOM 7 /* the transaction branch has been + heuristically committed */ +#define XA_HEURRB 6 /* the transaction branch has been + heuristically rolled back */ +#define XA_HEURMIX 5 /* the transaction branch has been + heuristically committed and rolled + back */ +#define XA_RETRY 4 /* routine returned with no effect and + may be re-issued */ +#define XA_RDONLY 3 /* the transaction branch was read-only + and has been committed */ +#define XA_OK 0 /* normal execution */ +#define XAER_ASYNC -2 /* asynchronous operation already + outstanding */ +#define XAER_RMERR -3 /* a resource manager error occurred in + the transaction branch */ +#define XAER_NOTA -4 /* the XID is not valid */ +#define XAER_INVAL -5 /* invalid arguments were given */ +#define XAER_PROTO -6 /* routine invoked in an improper + context */ +#define XAER_RMFAIL -7 /* resource manager unavailable */ +#define XAER_DUPID -8 /* the XID already exists */ +#define XAER_OUTSIDE -9 /* resource manager doing work outside + transaction */ + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_XA_H_ */ diff --git a/src/dbinc_auto/api_flags.in b/src/dbinc_auto/api_flags.in new file mode 100644 index 00000000..3768c73f --- /dev/null +++ b/src/dbinc_auto/api_flags.in @@ -0,0 +1,220 @@ +/* DO NOT EDIT: automatically built by dist/s_apiflags. */ +#define DB_AGGRESSIVE 0x00000001 +#define DB_ARCH_ABS 0x00000001 +#define DB_ARCH_DATA 0x00000002 +#define DB_ARCH_LOG 0x00000004 +#define DB_ARCH_REMOVE 0x00000008 +#define DB_AUTO_COMMIT 0x00000100 +#define DB_BOOTSTRAP_HELPER 0x00000001 +#define DB_CDB_ALLDB 0x00000040 +#define DB_CHKSUM 0x00000008 +#define DB_CKP_INTERNAL 0x00000002 +#define DB_CREATE 0x00000001 +#define DB_CURSOR_BULK 0x00000001 +#define DB_CURSOR_TRANSIENT 0x00000004 +#define DB_CXX_NO_EXCEPTIONS 0x00000002 +#define DB_DATABASE_LOCKING 0x00000080 +#define DB_DIRECT 0x00000010 +#define DB_DIRECT_DB 0x00000200 +#define DB_DSYNC_DB 0x00000400 +#define DB_DUP 0x00000010 +#define DB_DUPSORT 0x00000004 +#define DB_DURABLE_UNKNOWN 0x00000020 +#define DB_ENCRYPT 0x00000001 +#define DB_ENCRYPT_AES 0x00000001 +#define DB_EXCL 0x00000040 +#define DB_EXTENT 0x00000040 +#define DB_FAILCHK 0x00000020 +#define DB_FAILCHK_ISALIVE 0x00000040 +#define DB_FAST_STAT 0x00000001 +#define DB_FCNTL_LOCKING 0x00000800 +#define DB_FLUSH 0x00000001 +#define DB_FORCE 0x00000001 +#define DB_FORCESYNC 0x00000001 +#define DB_FOREIGN_ABORT 0x00000001 +#define DB_FOREIGN_CASCADE 0x00000002 +#define DB_FOREIGN_NULLIFY 0x00000004 +#define DB_FREELIST_ONLY 0x00000001 +#define DB_FREE_SPACE 0x00000002 +#define DB_GROUP_CREATOR 0x00000002 +#define DB_HOTBACKUP_IN_PROGRESS 0x00000800 +#define DB_IGNORE_LEASE 0x00001000 +#define DB_IMMUTABLE_KEY 0x00000002 +#define DB_INIT_CDB 0x00000080 +#define DB_INIT_LOCK 0x00000100 +#define DB_INIT_LOG 0x00000200 +#define DB_INIT_MPOOL 0x00000400 +#define DB_INIT_MUTEX 0x00000800 +#define DB_INIT_REP 0x00001000 +#define DB_INIT_TXN 0x00002000 +#define DB_INORDER 0x00000020 +#define DB_INTERNAL_DB 0x00001000 +#define DB_JOIN_NOSORT 0x00000001 +#define DB_LEGACY 0x00000004 +#define DB_LOCAL_SITE 0x00000008 +#define DB_LOCKDOWN 0x00004000 +#define DB_LOCK_CHECK 0x00000001 +#define DB_LOCK_NOWAIT 0x00000002 +#define DB_LOCK_RECORD 0x00000004 +#define DB_LOCK_SET_TIMEOUT 0x00000008 +#define DB_LOCK_SWITCH 0x00000010 +#define DB_LOCK_UPGRADE 0x00000020 +#define DB_LOG_AUTO_REMOVE 0x00000001 +#define DB_LOG_CHKPNT 0x00000002 +#define DB_LOG_COMMIT 0x00000004 +#define DB_LOG_DIRECT 0x00000002 +#define DB_LOG_DSYNC 0x00000004 +#define DB_LOG_IN_MEMORY 0x00000008 +#define DB_LOG_NOCOPY 0x00000008 +#define DB_LOG_NOT_DURABLE 0x00000010 +#define DB_LOG_NO_DATA 0x00000004 +#define DB_LOG_VERIFY_CAF 0x00000001 +#define DB_LOG_VERIFY_DBFILE 0x00000002 +#define DB_LOG_VERIFY_ERR 0x00000004 +#define DB_LOG_VERIFY_FORWARD 0x00000008 +#define DB_LOG_VERIFY_INTERR 0x00000010 +#define DB_LOG_VERIFY_PARTIAL 0x00000020 +#define DB_LOG_VERIFY_VERBOSE 0x00000040 +#define DB_LOG_VERIFY_WARNING 0x00000080 +#define DB_LOG_WRNOSYNC 0x00000020 +#define DB_LOG_ZERO 0x00000010 +#define DB_MPOOL_CREATE 0x00000001 +#define DB_MPOOL_DIRTY 0x00000002 +#define DB_MPOOL_DISCARD 0x00000001 +#define DB_MPOOL_EDIT 0x00000004 +#define DB_MPOOL_FREE 0x00000008 +#define DB_MPOOL_LAST 0x00000010 +#define DB_MPOOL_NEW 0x00000020 +#define DB_MPOOL_NOFILE 0x00000001 +#define DB_MPOOL_NOLOCK 0x00000002 +#define DB_MPOOL_TRY 0x00000040 +#define DB_MPOOL_UNLINK 0x00000002 +#define DB_MULTIPLE 0x00000800 +#define DB_MULTIPLE_KEY 0x00004000 +#define DB_MULTIVERSION 0x00000004 +#define DB_MUTEX_ALLOCATED 0x00000001 +#define DB_MUTEX_LOCKED 0x00000002 +#define DB_MUTEX_LOGICAL_LOCK 0x00000004 +#define DB_MUTEX_PROCESS_ONLY 0x00000008 +#define DB_MUTEX_SELF_BLOCK 0x00000010 +#define DB_MUTEX_SHARED 0x00000020 +#define DB_NOERROR 0x00002000 +#define DB_NOFLUSH 0x00001000 +#define DB_NOLOCKING 0x00002000 +#define DB_NOMMAP 0x00000008 +#define DB_NOORDERCHK 0x00000002 +#define DB_NOPANIC 0x00004000 +#define DB_NOSYNC 0x00000001 +#define DB_NO_AUTO_COMMIT 0x00004000 +#define DB_NO_CHECKPOINT 0x00008000 +#define DB_ODDFILESIZE 0x00000080 +#define DB_ORDERCHKONLY 0x00000004 +#define DB_OVERWRITE 0x00008000 +#define DB_PANIC_ENVIRONMENT 0x00010000 +#define DB_PRINTABLE 0x00000008 +#define DB_PRIVATE 0x00010000 +#define DB_PR_PAGE 0x00000010 +#define DB_PR_RECOVERYTEST 0x00000020 +#define DB_RDONLY 0x00000400 +#define DB_RDWRMASTER 0x00008000 +#define DB_READ_COMMITTED 0x00000400 +#define DB_READ_UNCOMMITTED 0x00000200 +#define DB_RECNUM 0x00000040 +#define DB_RECOVER 0x00000002 +#define DB_RECOVER_FATAL 0x00020000 +#define DB_REGION_INIT 0x00020000 +#define DB_REGISTER 0x00040000 +#define DB_RENUMBER 0x00000080 +#define DB_REPMGR_CONF_2SITE_STRICT 0x00000001 +#define DB_REPMGR_CONF_ELECTIONS 0x00000002 +#define DB_REPMGR_NEED_RESPONSE 0x00000001 +#define DB_REPMGR_PEER 0x00000010 +#define DB_REP_ANYWHERE 0x00000001 +#define DB_REP_CLIENT 0x00000001 +#define DB_REP_CONF_AUTOINIT 0x00000004 +#define DB_REP_CONF_AUTOROLLBACK 0x00000008 +#define DB_REP_CONF_BULK 0x00000010 +#define DB_REP_CONF_DELAYCLIENT 0x00000020 +#define DB_REP_CONF_INMEM 0x00000040 +#define DB_REP_CONF_LEASE 0x00000080 +#define DB_REP_CONF_NOWAIT 0x00000100 +#define DB_REP_ELECTION 0x00000004 +#define DB_REP_MASTER 0x00000002 +#define DB_REP_NOBUFFER 0x00000002 +#define DB_REP_PERMANENT 0x00000004 +#define DB_REP_REREQUEST 0x00000008 +#define DB_REVSPLITOFF 0x00000100 +#define DB_RMW 0x00002000 +#define DB_SALVAGE 0x00000040 +#define DB_SA_SKIPFIRSTKEY 0x00000080 +#define DB_SA_UNKNOWNKEY 0x00000100 +#define DB_SEQ_DEC 0x00000001 +#define DB_SEQ_INC 0x00000002 +#define DB_SEQ_RANGE_SET 0x00000004 +#define DB_SEQ_WRAP 0x00000008 +#define DB_SEQ_WRAPPED 0x00000010 +#define DB_SET_LOCK_TIMEOUT 0x00000001 +#define DB_SET_REG_TIMEOUT 0x00000004 +#define DB_SET_TXN_NOW 0x00000008 +#define DB_SET_TXN_TIMEOUT 0x00000002 +#define DB_SHALLOW_DUP 0x00000100 +#define DB_SNAPSHOT 0x00000200 +#define DB_STAT_ALL 0x00000004 +#define DB_STAT_ALLOC 0x00000008 +#define DB_STAT_CLEAR 0x00000001 +#define DB_STAT_LOCK_CONF 0x00000010 +#define DB_STAT_LOCK_LOCKERS 0x00000020 +#define DB_STAT_LOCK_OBJECTS 0x00000040 +#define DB_STAT_LOCK_PARAMS 0x00000080 +#define DB_STAT_MEMP_HASH 0x00000010 +#define DB_STAT_MEMP_NOERROR 0x00000020 +#define DB_STAT_SUBSYSTEM 0x00000002 +#define DB_STAT_SUMMARY 0x00000010 +#define DB_ST_DUPOK 0x00000200 +#define DB_ST_DUPSET 0x00000400 +#define DB_ST_DUPSORT 0x00000800 +#define DB_ST_IS_RECNO 0x00001000 +#define DB_ST_OVFL_LEAF 0x00002000 +#define DB_ST_RECNUM 0x00004000 +#define DB_ST_RELEN 0x00008000 +#define DB_ST_TOPLEVEL 0x00010000 +#define DB_SYSTEM_MEM 0x00080000 +#define DB_THREAD 0x00000010 +#define DB_TIME_NOTGRANTED 0x00040000 +#define DB_TRUNCATE 0x00010000 +#define DB_TXN_BULK 0x00000008 +#define DB_TXN_FAMILY 0x00000040 +#define DB_TXN_NOSYNC 0x00000001 +#define DB_TXN_NOT_DURABLE 0x00000002 +#define DB_TXN_NOWAIT 0x00000002 +#define DB_TXN_SNAPSHOT 0x00000010 +#define DB_TXN_SYNC 0x00000004 +#define DB_TXN_WAIT 0x00000080 +#define DB_TXN_WRITE_NOSYNC 0x00000020 +#define DB_UNREF 0x00020000 +#define DB_UPGRADE 0x00000001 +#define DB_USE_ENVIRON 0x00000004 +#define DB_USE_ENVIRON_ROOT 0x00000008 +#define DB_VERB_DEADLOCK 0x00000001 +#define DB_VERB_FILEOPS 0x00000002 +#define DB_VERB_FILEOPS_ALL 0x00000004 +#define DB_VERB_RECOVERY 0x00000008 +#define DB_VERB_REGISTER 0x00000010 +#define DB_VERB_REPLICATION 0x00000020 +#define DB_VERB_REPMGR_CONNFAIL 0x00000040 +#define DB_VERB_REPMGR_MISC 0x00000080 +#define DB_VERB_REP_ELECT 0x00000100 +#define DB_VERB_REP_LEASE 0x00000200 +#define DB_VERB_REP_MISC 0x00000400 +#define DB_VERB_REP_MSGS 0x00000800 +#define DB_VERB_REP_SYNC 0x00001000 +#define DB_VERB_REP_SYSTEM 0x00002000 +#define DB_VERB_REP_TEST 0x00004000 +#define DB_VERB_WAITSFOR 0x00008000 +#define DB_VERIFY 0x00000002 +#define DB_VERIFY_PARTITION 0x00040000 +#define DB_WRITECURSOR 0x00000008 +#define DB_WRITELOCK 0x00000020 +#define DB_WRITEOPEN 0x00020000 +#define DB_XA_CREATE 0x00000001 +#define DB_YIELDCPU 0x00080000 diff --git a/src/dbinc_auto/btree_auto.h b/src/dbinc_auto/btree_auto.h new file mode 100644 index 00000000..e57551c7 --- /dev/null +++ b/src/dbinc_auto/btree_auto.h @@ -0,0 +1,456 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef __bam_AUTO_H +#define __bam_AUTO_H +#include "dbinc/log.h" +#define DB___bam_split 62 +typedef struct ___bam_split_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + u_int32_t opflags; + db_pgno_t left; + DB_LSN llsn; + db_pgno_t right; + DB_LSN rlsn; + u_int32_t indx; + db_pgno_t npgno; + DB_LSN nlsn; + db_pgno_t ppgno; + DB_LSN plsn; + u_int32_t pindx; + DBT pg; + DBT pentry; + DBT rentry; +} __bam_split_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_split_desc[]; +static inline int +__bam_split_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, u_int32_t opflags, db_pgno_t left, DB_LSN * llsn, db_pgno_t right, + DB_LSN * rlsn, u_int32_t indx, db_pgno_t npgno, DB_LSN * nlsn, db_pgno_t ppgno, + DB_LSN * plsn, u_int32_t pindx, const DBT *pg, const DBT *pentry, const DBT *rentry) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___bam_split, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(*llsn) + sizeof(u_int32_t) + sizeof(*rlsn) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*nlsn) + + sizeof(u_int32_t) + sizeof(*plsn) + sizeof(u_int32_t) + + LOG_DBT_SIZE(pg) + LOG_DBT_SIZE(pentry) + LOG_DBT_SIZE(rentry), + __bam_split_desc, opflags, left, llsn, right, rlsn, indx, npgno, + nlsn, ppgno, plsn, pindx, pg, pentry, rentry)); +} + +static inline int __bam_split_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_split_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_split_desc, sizeof(__bam_split_args), (void**)arg)); +} +#define DB___bam_split_48 62 +typedef struct ___bam_split_48_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t left; + DB_LSN llsn; + db_pgno_t right; + DB_LSN rlsn; + u_int32_t indx; + db_pgno_t npgno; + DB_LSN nlsn; + db_pgno_t ppgno; + DB_LSN plsn; + u_int32_t pindx; + DBT pg; + DBT pentry; + DBT rentry; + u_int32_t opflags; +} __bam_split_48_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_split_48_desc[]; +static inline int __bam_split_48_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_split_48_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_split_48_desc, sizeof(__bam_split_48_args), (void**)arg)); +} +#define DB___bam_split_42 62 +typedef struct ___bam_split_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t left; + DB_LSN llsn; + db_pgno_t right; + DB_LSN rlsn; + u_int32_t indx; + db_pgno_t npgno; + DB_LSN nlsn; + db_pgno_t root_pgno; + DBT pg; + u_int32_t opflags; +} __bam_split_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_split_42_desc[]; +static inline int __bam_split_42_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_split_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_split_42_desc, sizeof(__bam_split_42_args), (void**)arg)); +} +#define DB___bam_rsplit 63 +typedef struct ___bam_rsplit_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DBT pgdbt; + db_pgno_t root_pgno; + db_pgno_t nrec; + DBT rootent; + DB_LSN rootlsn; +} __bam_rsplit_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_rsplit_desc[]; +static inline int +__bam_rsplit_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, const DBT *pgdbt, db_pgno_t root_pgno, db_pgno_t nrec, + const DBT *rootent, DB_LSN * rootlsn) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___bam_rsplit, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(pgdbt) + + sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(rootent) + + sizeof(*rootlsn), + __bam_rsplit_desc, pgno, pgdbt, root_pgno, nrec, rootent, rootlsn)); +} + +static inline int __bam_rsplit_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_rsplit_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_rsplit_desc, sizeof(__bam_rsplit_args), (void**)arg)); +} +#define DB___bam_adj 55 +typedef struct ___bam_adj_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; + u_int32_t indx_copy; + u_int32_t is_insert; +} __bam_adj_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_adj_desc[]; +static inline int +__bam_adj_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, DB_LSN * lsn, u_int32_t indx, u_int32_t indx_copy, + u_int32_t is_insert) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___bam_adj, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*lsn) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t), + __bam_adj_desc, pgno, lsn, indx, indx_copy, is_insert)); +} + +static inline int __bam_adj_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_adj_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_adj_desc, sizeof(__bam_adj_args), (void**)arg)); +} +#define DB___bam_cadjust 56 +typedef struct ___bam_cadjust_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; + int32_t adjust; + u_int32_t opflags; +} __bam_cadjust_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_cadjust_desc[]; +static inline int +__bam_cadjust_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, DB_LSN * lsn, u_int32_t indx, int32_t adjust, + u_int32_t opflags) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___bam_cadjust, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*lsn) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t), + __bam_cadjust_desc, pgno, lsn, indx, adjust, opflags)); +} + +static inline int __bam_cadjust_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_cadjust_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_cadjust_desc, sizeof(__bam_cadjust_args), (void**)arg)); +} +#define DB___bam_cdel 57 +typedef struct ___bam_cdel_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; +} __bam_cdel_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_cdel_desc[]; +static inline int +__bam_cdel_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, DB_LSN * lsn, u_int32_t indx) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___bam_cdel, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*lsn) + + sizeof(u_int32_t), + __bam_cdel_desc, pgno, lsn, indx)); +} + +static inline int __bam_cdel_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_cdel_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_cdel_desc, sizeof(__bam_cdel_args), (void**)arg)); +} +#define DB___bam_repl 58 +typedef struct ___bam_repl_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; + u_int32_t isdeleted; + DBT orig; + DBT repl; + u_int32_t prefix; + u_int32_t suffix; +} __bam_repl_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_repl_desc[]; +static inline int +__bam_repl_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, DB_LSN * lsn, u_int32_t indx, u_int32_t isdeleted, + const DBT *orig, const DBT *repl, u_int32_t prefix, u_int32_t suffix) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___bam_repl, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*lsn) + + sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(orig) + + LOG_DBT_SIZE(repl) + sizeof(u_int32_t) + sizeof(u_int32_t), + __bam_repl_desc, pgno, lsn, indx, isdeleted, orig, repl, prefix, + suffix)); +} + +static inline int __bam_repl_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_repl_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_repl_desc, sizeof(__bam_repl_args), (void**)arg)); +} +#define DB___bam_irep 67 +typedef struct ___bam_irep_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; + u_int32_t ptype; + DBT hdr; + DBT data; + DBT old; +} __bam_irep_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_irep_desc[]; +static inline int +__bam_irep_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, DB_LSN * lsn, u_int32_t indx, u_int32_t ptype, + const DBT *hdr, const DBT *data, const DBT *old) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___bam_irep, 1, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*lsn) + + sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(hdr) + + LOG_DBT_SIZE(data) + LOG_DBT_SIZE(old), + __bam_irep_desc, pgno, lsn, indx, ptype, hdr, data, old)); +} + +static inline int __bam_irep_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_irep_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_irep_desc, sizeof(__bam_irep_args), (void**)arg)); +} +#define DB___bam_root 59 +typedef struct ___bam_root_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t meta_pgno; + db_pgno_t root_pgno; + DB_LSN meta_lsn; +} __bam_root_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_root_desc[]; +static inline int +__bam_root_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t meta_pgno, db_pgno_t root_pgno, DB_LSN * meta_lsn) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___bam_root, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(*meta_lsn), + __bam_root_desc, meta_pgno, root_pgno, meta_lsn)); +} + +static inline int __bam_root_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_root_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_root_desc, sizeof(__bam_root_args), (void**)arg)); +} +#define DB___bam_curadj 64 +typedef struct ___bam_curadj_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_ca_mode mode; + db_pgno_t from_pgno; + db_pgno_t to_pgno; + db_pgno_t left_pgno; + u_int32_t first_indx; + u_int32_t from_indx; + u_int32_t to_indx; +} __bam_curadj_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_curadj_desc[]; +static inline int +__bam_curadj_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_ca_mode mode, db_pgno_t from_pgno, db_pgno_t to_pgno, db_pgno_t left_pgno, + u_int32_t first_indx, u_int32_t from_indx, u_int32_t to_indx) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___bam_curadj, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t), + __bam_curadj_desc, mode, from_pgno, to_pgno, left_pgno, first_indx, from_indx, to_indx)); +} + +static inline int __bam_curadj_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_curadj_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_curadj_desc, sizeof(__bam_curadj_args), (void**)arg)); +} +#define DB___bam_rcuradj 65 +typedef struct ___bam_rcuradj_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + ca_recno_arg mode; + db_pgno_t root; + db_recno_t recno; + u_int32_t order; +} __bam_rcuradj_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_rcuradj_desc[]; +static inline int +__bam_rcuradj_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, ca_recno_arg mode, db_pgno_t root, db_recno_t recno, u_int32_t order) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___bam_rcuradj, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t), + __bam_rcuradj_desc, mode, root, recno, order)); +} + +static inline int __bam_rcuradj_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_rcuradj_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_rcuradj_desc, sizeof(__bam_rcuradj_args), (void**)arg)); +} +#define DB___bam_relink_43 147 +typedef struct ___bam_relink_43_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + db_pgno_t prev; + DB_LSN lsn_prev; + db_pgno_t next; + DB_LSN lsn_next; +} __bam_relink_43_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_relink_43_desc[]; +static inline int __bam_relink_43_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_relink_43_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_relink_43_desc, sizeof(__bam_relink_43_args), (void**)arg)); +} +#define DB___bam_merge_44 148 +typedef struct ___bam_merge_44_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + db_pgno_t npgno; + DB_LSN nlsn; + DBT hdr; + DBT data; + DBT ind; +} __bam_merge_44_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __bam_merge_44_desc[]; +static inline int __bam_merge_44_read(ENV *env, + DB **dbpp, void *td, void *data, __bam_merge_44_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __bam_merge_44_desc, sizeof(__bam_merge_44_args), (void**)arg)); +} +#endif diff --git a/src/dbinc_auto/btree_ext.h b/src/dbinc_auto/btree_ext.h new file mode 100644 index 00000000..c90f5b80 --- /dev/null +++ b/src/dbinc_auto/btree_ext.h @@ -0,0 +1,147 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _btree_ext_h_ +#define _btree_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __bam_compact_int __P((DBC *, DBT *, DBT *, u_int32_t, int *, DB_COMPACT *, int *)); +int __bam_compact_opd __P((DBC *, db_pgno_t, PAGE **, u_int32_t, DB_COMPACT *, int *)); +int __bam_truncate_ipages __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB_COMPACT *)); +int __bam_cmp __P((DBC *, const DBT *, PAGE *, u_int32_t, int (*)(DB *, const DBT *, const DBT *), int *)); +int __bam_defcmp __P((DB *, const DBT *, const DBT *)); +size_t __bam_defpfx __P((DB *, const DBT *, const DBT *)); +int __bam_compress_dupcmp __P((DB *, const DBT *, const DBT *)); +int __bam_defcompress __P((DB *, const DBT *, const DBT *, const DBT *, const DBT *, DBT *)); +int __bam_defdecompress __P((DB *, const DBT *, const DBT *, DBT *, DBT *, DBT *)); +int __bamc_compress_get __P((DBC *, DBT *, DBT *, u_int32_t)); +int __bamc_compress_put __P((DBC *, DBT *, DBT *, u_int32_t)); +int __bamc_compress_del __P((DBC *, u_int32_t)); +int __bamc_compress_bulk_del __P((DBC *, DBT *, u_int32_t)); +int __bamc_compress_count __P((DBC *, db_recno_t *)); +int __bamc_compress_cmp __P((DBC *, DBC *, int *)); +int __bamc_compress_dup __P((DBC *, DBC *, u_int32_t)); +int __bam_compress_salvage __P((DB *, VRFY_DBINFO *, void *, int (*)(void *, const void *), DBT *, DBT *)); +int __bam_compress_count __P((DBC *, u_int32_t *, u_int32_t *)); +int __bam_pgin __P((DB *, db_pgno_t, void *, DBT *)); +int __bam_pgout __P((DB *, db_pgno_t, void *, DBT *)); +int __bam_mswap __P((ENV *, PAGE *)); +int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, int, u_int32_t *)); +int __ram_ca_delete __P((DB *, db_pgno_t, u_int32_t *)); +int __bam_ca_di __P((DBC *, db_pgno_t, u_int32_t, int)); +int __bam_ca_dup __P((DBC *, u_int32_t, db_pgno_t, u_int32_t, db_pgno_t, u_int32_t)); +int __bam_ca_undodup __P((DB *, u_int32_t, db_pgno_t, u_int32_t, u_int32_t)); +int __bam_ca_rsplit __P((DBC *, db_pgno_t, db_pgno_t)); +int __bam_ca_split __P((DBC *, db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int)); +int __bam_ca_undosplit __P((DB *, db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t)); +int __bamc_init __P((DBC *, DBTYPE)); +int __bamc_refresh __P((DBC *)); +int __bamc_cmp __P((DBC *, DBC *, int *)); +int __bamc_count __P((DBC *, db_recno_t *)); +int __bamc_dup __P((DBC *, DBC *, u_int32_t)); +int __bam_bulk_overflow __P((DBC *, u_int32_t, db_pgno_t, u_int8_t *)); +int __bam_bulk_duplicates __P((DBC *, db_pgno_t, u_int8_t *, int32_t *, int32_t **, u_int8_t **, u_int32_t *, int)); +int __bamc_rget __P((DBC *, DBT *)); +int __bam_opd_exists __P((DBC *, db_pgno_t)); +int __bam_ditem __P((DBC *, PAGE *, u_int32_t)); +int __bam_adjindx __P((DBC *, PAGE *, u_int32_t, u_int32_t, int)); +int __bam_dpages __P((DBC *, int, int)); +int __bam_pupdate __P((DBC *, PAGE *)); +int __bam_db_create __P((DB *)); +int __bam_db_close __P((DB *)); +void __bam_map_flags __P((DB *, u_int32_t *, u_int32_t *)); +int __bam_set_flags __P((DB *, u_int32_t *flagsp)); +int __bam_set_bt_compare __P((DB *, int (*)(DB *, const DBT *, const DBT *))); +int __bam_set_bt_compress __P((DB *, int (*)(DB *, const DBT *, const DBT *, const DBT *, const DBT *, DBT *), int (*)(DB *, const DBT *, const DBT *, DBT *, DBT *, DBT *))); +int __bam_get_bt_minkey __P((DB *, u_int32_t *)); +void __bam_copy_config __P((DB *, DB*, u_int32_t)); +void __ram_map_flags __P((DB *, u_int32_t *, u_int32_t *)); +int __ram_set_flags __P((DB *, u_int32_t *flagsp)); +int __ram_get_re_len __P((DB *, u_int32_t *)); +int __ram_get_re_pad __P((DB *, int *)); +int __bam_open __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, db_pgno_t, u_int32_t)); +int __bam_metachk __P((DB *, const char *, BTMETA *)); +int __bam_read_root __P((DB *, DB_THREAD_INFO *, DB_TXN *, db_pgno_t, u_int32_t)); +int __bam_new_file __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *)); +int __bam_new_subdb __P((DB *, DB *, DB_THREAD_INFO *, DB_TXN *)); +int __bam_iitem __P((DBC *, DBT *, DBT *, u_int32_t, u_int32_t)); +int __bam_ritem __P((DBC *, PAGE *, u_int32_t, DBT *, u_int32_t)); +int __bam_ritem_nolog __P((DBC *, PAGE *, u_int32_t, DBT *, DBT *, u_int32_t)); +int __bam_irep __P((DBC *, PAGE *, u_int32_t, DBT *, DBT *)); +int __bam_split_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_split_48_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_split_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_rsplit_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_adj_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_cadjust_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_cdel_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_repl_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_irep_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_root_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_curadj_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_rcuradj_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_merge_44_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_relink_43_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_reclaim __P((DB *, DB_THREAD_INFO *, DB_TXN *, u_int32_t)); +int __bam_truncate __P((DBC *, u_int32_t *)); +int __ram_open __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, db_pgno_t, u_int32_t)); +int __ram_append __P((DBC *, DBT *, DBT *)); +int __ramc_del __P((DBC *, u_int32_t)); +int __ramc_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); +int __ramc_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); +int __ram_ca __P((DBC *, ca_recno_arg, int *)); +int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int)); +int __ram_writeback __P((DB *)); +int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *)); +int __bam_adjust __P((DBC *, int32_t)); +int __bam_nrecs __P((DBC *, db_recno_t *)); +db_recno_t __bam_total __P((DB *, PAGE *)); +int __bam_get_root __P((DBC *, db_pgno_t, int, u_int32_t, int *)); +int __bam_search __P((DBC *, db_pgno_t, const DBT *, u_int32_t, int, db_recno_t *, int *)); +int __bam_stkrel __P((DBC *, u_int32_t)); +int __bam_stkgrow __P((ENV *, BTREE_CURSOR *)); +int __bam_split __P((DBC *, void *, db_pgno_t *)); +int __bam_broot __P((DBC *, PAGE *, u_int32_t, PAGE *, PAGE *)); +int __ram_root __P((DBC *, PAGE *, PAGE *, PAGE *)); +int __bam_pinsert __P((DBC *, EPG *, u_int32_t, PAGE *, PAGE *, int)); +int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t)); +int __bam_stat __P((DBC *, void *, u_int32_t)); +int __bam_stat_print __P((DBC *, u_int32_t)); +int __bam_stat_callback __P((DBC *, PAGE *, void *, int *)); +void __bam_print_cursor __P((DBC *)); +int __bam_key_range __P((DBC *, DBT *, DB_KEY_RANGE *, u_int32_t)); +int __bam_traverse __P((DBC *, db_lockmode_t, db_pgno_t, int (*)(DBC *, PAGE *, void *, int *), void *)); +int __bam_30_btreemeta __P((DB *, char *, u_int8_t *)); +int __bam_31_btreemeta __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); +int __bam_31_lbtree __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); +int __bam_vrfy_meta __P((DB *, VRFY_DBINFO *, BTMETA *, db_pgno_t, u_int32_t)); +int __ram_vrfy_leaf __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); +int __bam_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); +int __bam_vrfy_itemorder __P((DB *, VRFY_DBINFO *, DB_THREAD_INFO *, PAGE *, db_pgno_t, u_int32_t, int, int, u_int32_t)); +int __bam_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, void *, void *, u_int32_t)); +int __bam_vrfy_subtree __P((DB *, VRFY_DBINFO *, db_pgno_t, void *, void *, u_int32_t, u_int32_t *, u_int32_t *, u_int32_t *)); +int __bam_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, PAGE *, void *, int (*)(void *, const void *), DBT *, u_int32_t)); +int __bam_salvage_walkdupint __P((DB *, VRFY_DBINFO *, PAGE *, DBT *, void *, int (*)(void *, const void *), u_int32_t)); +int __bam_meta2pgset __P((DB *, VRFY_DBINFO *, BTMETA *, u_int32_t, DB *)); +int __bam_init_recover __P((ENV *, DB_DISTAB *)); +int __bam_split_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_split_48_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_split_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_rsplit_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_adj_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_cadjust_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_cdel_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_repl_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_irep_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_root_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_curadj_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_rcuradj_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_relink_43_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_merge_44_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_init_print __P((ENV *, DB_DISTAB *)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_btree_ext_h_ */ diff --git a/src/dbinc_auto/clib_ext.h b/src/dbinc_auto/clib_ext.h new file mode 100644 index 00000000..c53be48c --- /dev/null +++ b/src/dbinc_auto/clib_ext.h @@ -0,0 +1,113 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _clib_ext_h_ +#define _clib_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +#ifndef HAVE_ATOI +int atoi __P((const char *)); +#endif +#ifndef HAVE_ATOL +long atol __P((const char *)); +#endif +#ifndef HAVE_BSEARCH +void *bsearch __P((const void *, const void *, size_t, size_t, int (*)(const void *, const void *))); +#endif +#ifndef HAVE_GETCWD +char *getcwd __P((char *, size_t)); +#endif +#ifndef HAVE_GETOPT +int getopt __P((int, char * const *, const char *)); +#endif +#ifndef HAVE_ISALPHA +int isalpha __P((int)); +#endif +#ifndef HAVE_ISDIGIT +int isdigit __P((int)); +#endif +#ifndef HAVE_ISPRINT +int isprint __P((int)); +#endif +#ifndef HAVE_ISSPACE +int isspace __P((int)); +#endif +#ifndef HAVE_MEMCMP +int memcmp __P((const void *, const void *, size_t)); +#endif +#ifndef HAVE_MEMCPY +void *memcpy __P((void *, const void *, size_t)); +#endif +#ifndef HAVE_MEMMOVE +void *memmove __P((void *, const void *, size_t)); +#endif +#ifndef HAVE_PRINTF +int printf __P((const char *, ...)); +#endif +#ifndef HAVE_PRINTF +int fprintf __P((FILE *, const char *, ...)); +#endif +#ifndef HAVE_PRINTF +int vfprintf __P((FILE *, const char *, va_list)); +#endif +#ifndef HAVE_QSORT +void qsort __P((void *, size_t, size_t, int(*)(const void *, const void *))); +#endif +#ifndef HAVE_RAISE +int raise __P((int)); +#endif +#ifndef HAVE_RAND +int rand __P((void)); +void srand __P((unsigned int)); +#endif +#ifndef HAVE_SNPRINTF +int snprintf __P((char *, size_t, const char *, ...)); +#endif +#ifndef HAVE_VSNPRINTF +int vsnprintf __P((char *, size_t, const char *, va_list)); +#endif +#ifndef HAVE_STRCASECMP +int strcasecmp __P((const char *, const char *)); +#endif +#ifndef HAVE_STRCASECMP +int strncasecmp __P((const char *, const char *, size_t)); +#endif +#ifndef HAVE_STRCAT +char *strcat __P((char *, const char *)); +#endif +#ifndef HAVE_STRCHR +char *strchr __P((const char *, int)); +#endif +#ifndef HAVE_STRDUP +char *strdup __P((const char *)); +#endif +#ifndef HAVE_STRERROR +char *strerror __P((int)); +#endif +#ifndef HAVE_STRNCAT +char *strncat __P((char *, const char *, size_t)); +#endif +#ifndef HAVE_STRNCMP +int strncmp __P((const char *, const char *, size_t)); +#endif +#ifndef HAVE_STRRCHR +char *strrchr __P((const char *, int)); +#endif +#ifndef HAVE_STRSEP +char *strsep __P((char **, const char *)); +#endif +#ifndef HAVE_STRTOL +long strtol __P((const char *, char **, int)); +#endif +#ifndef HAVE_STRTOUL +unsigned long strtoul __P((const char *, char **, int)); +#endif +#ifndef HAVE_TIME +time_t time __P((time_t *)); +#endif + +#if defined(__cplusplus) +} +#endif +#endif /* !_clib_ext_h_ */ diff --git a/src/dbinc_auto/common_ext.h b/src/dbinc_auto/common_ext.h new file mode 100644 index 00000000..ac16e9db --- /dev/null +++ b/src/dbinc_auto/common_ext.h @@ -0,0 +1,75 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _common_ext_h_ +#define _common_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +void __clock_set_expires __P((ENV *, db_timespec *, db_timeout_t)); +int __clock_expired __P((ENV *, db_timespec *, db_timespec *)); +int __crypto_region_init __P((ENV *)); +int __db_isbigendian __P((void)); +int __db_byteorder __P((ENV *, int)); +u_int32_t __db_compress_count_int __P((u_int64_t)); +int __db_compress_int __P((u_int8_t *, u_int64_t)); +u_int32_t __db_decompress_count_int __P((const u_int8_t *)); +int __db_decompress_int __P((const u_int8_t *, u_int64_t *)); +int __db_decompress_int32 __P((const u_int8_t *, u_int32_t *)); +int __db_fchk __P((ENV *, const char *, u_int32_t, u_int32_t)); +int __db_fcchk __P((ENV *, const char *, u_int32_t, u_int32_t, u_int32_t)); +int __db_ferr __P((const ENV *, const char *, int)); +int __db_fnl __P((const ENV *, const char *)); +int __db_pgerr __P((DB *, db_pgno_t, int)); +int __db_pgfmt __P((ENV *, db_pgno_t)); +#ifdef DIAGNOSTIC +void __db_assert __P((ENV *, const char *, const char *, int)); +#endif +int __env_panic_msg __P((ENV *)); +int __env_panic __P((ENV *, int)); +char *__db_unknown_error __P((int)); +void __db_syserr __P((const ENV *, int, const char *, ...)) __attribute__ ((__format__ (__printf__, 3, 4))); +void __db_err __P((const ENV *, int, const char *, ...)) __attribute__ ((__format__ (__printf__, 3, 4))); +void __db_errx __P((const ENV *, const char *, ...)) __attribute__ ((__format__ (__printf__, 2, 3))); +void __db_errcall __P((const DB_ENV *, int, db_error_set_t, const char *, va_list)); +void __db_errfile __P((const DB_ENV *, int, db_error_set_t, const char *, va_list)); +void __db_msgadd __P((ENV *, DB_MSGBUF *, const char *, ...)) __attribute__ ((__format__ (__printf__, 3, 4))); +void __db_msgadd_ap __P((ENV *, DB_MSGBUF *, const char *, va_list)); +void __db_msg __P((const ENV *, const char *, ...)) __attribute__ ((__format__ (__printf__, 2, 3))); +void __db_repmsg __P((const ENV *, const char *, ...)) __attribute__ ((__format__ (__printf__, 2, 3))); +int __db_unknown_flag __P((ENV *, char *, u_int32_t)); +int __db_unknown_type __P((ENV *, char *, DBTYPE)); +int __db_unknown_path __P((ENV *, char *)); +int __db_check_txn __P((DB *, DB_TXN *, DB_LOCKER *, int)); +int __db_txn_deadlock_err __P((ENV *, DB_TXN *)); +int __db_not_txn_env __P((ENV *)); +int __db_rec_toobig __P((ENV *, u_int32_t, u_int32_t)); +int __db_rec_repl __P((ENV *, u_int32_t, u_int32_t)); +int __dbc_logging __P((DBC *)); +int __db_check_lsn __P((ENV *, DB_LSN *, DB_LSN *)); +int __db_rdonly __P((const ENV *, const char *)); +int __db_space_err __P((const DB *)); +int __db_failed __P((const ENV *, const char *, pid_t, db_threadid_t)); +int __db_getlong __P((DB_ENV *, const char *, char *, long, long, long *)); +int __db_getulong __P((DB_ENV *, const char *, char *, u_long, u_long, u_long *)); +void __db_idspace __P((u_int32_t *, int, u_int32_t *, u_int32_t *)); +u_int32_t __db_log2 __P((u_int32_t)); +u_int32_t __db_tablesize __P((u_int32_t)); +void __db_hashinit __P((void *, u_int32_t)); +int __dbt_usercopy __P((ENV *, DBT *)); +void __dbt_userfree __P((ENV *, DBT *, DBT *, DBT *)); +int __db_mkpath __P((ENV *, const char *)); +u_int32_t __db_openflags __P((int)); +int __db_util_arg __P((char *, char *, int *, char ***)); +int __db_util_cache __P((DB *, u_int32_t *, int *)); +int __db_util_logset __P((const char *, char *)); +void __db_util_siginit __P((void)); +int __db_util_interrupted __P((void)); +void __db_util_sigresend __P((void)); +int __db_zero_fill __P((ENV *, DB_FH *)); +int __db_zero_extend __P((ENV *, DB_FH *, db_pgno_t, db_pgno_t, u_int32_t)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_common_ext_h_ */ diff --git a/src/dbinc_auto/crdel_auto.h b/src/dbinc_auto/crdel_auto.h new file mode 100644 index 00000000..86a60549 --- /dev/null +++ b/src/dbinc_auto/crdel_auto.h @@ -0,0 +1,127 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef __crdel_AUTO_H +#define __crdel_AUTO_H +#include "dbinc/log.h" +#define DB___crdel_metasub 142 +typedef struct ___crdel_metasub_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DBT page; + DB_LSN lsn; +} __crdel_metasub_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __crdel_metasub_desc[]; +static inline int +__crdel_metasub_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, const DBT *page, DB_LSN * lsn) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___crdel_metasub, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(page) + + sizeof(*lsn), + __crdel_metasub_desc, pgno, page, lsn)); +} + +static inline int __crdel_metasub_read(ENV *env, + DB **dbpp, void *td, void *data, __crdel_metasub_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __crdel_metasub_desc, sizeof(__crdel_metasub_args), (void**)arg)); +} +#define DB___crdel_inmem_create 138 +typedef struct ___crdel_inmem_create_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + DBT name; + DBT fid; + u_int32_t pgsize; +} __crdel_inmem_create_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __crdel_inmem_create_desc[]; +static inline int +__crdel_inmem_create_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + int32_t fileid, const DBT *name, const DBT *fid, u_int32_t pgsize) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___crdel_inmem_create, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + LOG_DBT_SIZE(name) + LOG_DBT_SIZE(fid) + + sizeof(u_int32_t), + __crdel_inmem_create_desc, + fileid, name, fid, pgsize)); +} + +static inline int __crdel_inmem_create_read(ENV *env, + void *data, __crdel_inmem_create_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __crdel_inmem_create_desc, sizeof(__crdel_inmem_create_args), (void**)arg)); +} +#define DB___crdel_inmem_rename 139 +typedef struct ___crdel_inmem_rename_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DBT oldname; + DBT newname; + DBT fid; +} __crdel_inmem_rename_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __crdel_inmem_rename_desc[]; +static inline int +__crdel_inmem_rename_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + const DBT *oldname, const DBT *newname, const DBT *fid) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___crdel_inmem_rename, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + LOG_DBT_SIZE(oldname) + LOG_DBT_SIZE(newname) + LOG_DBT_SIZE(fid), + __crdel_inmem_rename_desc, + oldname, newname, fid)); +} + +static inline int __crdel_inmem_rename_read(ENV *env, + void *data, __crdel_inmem_rename_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __crdel_inmem_rename_desc, sizeof(__crdel_inmem_rename_args), (void**)arg)); +} +#define DB___crdel_inmem_remove 140 +typedef struct ___crdel_inmem_remove_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DBT name; + DBT fid; +} __crdel_inmem_remove_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __crdel_inmem_remove_desc[]; +static inline int +__crdel_inmem_remove_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + const DBT *name, const DBT *fid) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___crdel_inmem_remove, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + LOG_DBT_SIZE(name) + LOG_DBT_SIZE(fid), + __crdel_inmem_remove_desc, + name, fid)); +} + +static inline int __crdel_inmem_remove_read(ENV *env, + void *data, __crdel_inmem_remove_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __crdel_inmem_remove_desc, sizeof(__crdel_inmem_remove_args), (void**)arg)); +} +#endif diff --git a/src/dbinc_auto/crypto_ext.h b/src/dbinc_auto/crypto_ext.h new file mode 100644 index 00000000..cd7113d7 --- /dev/null +++ b/src/dbinc_auto/crypto_ext.h @@ -0,0 +1,38 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _crypto_ext_h_ +#define _crypto_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __aes_setup __P((ENV *, DB_CIPHER *)); +u_int __aes_adj_size __P((size_t)); +int __aes_close __P((ENV *, void *)); +int __aes_decrypt __P((ENV *, void *, void *, u_int8_t *, size_t)); +int __aes_encrypt __P((ENV *, void *, void *, u_int8_t *, size_t)); +int __aes_init __P((ENV *, DB_CIPHER *)); +int __crypto_env_close __P((ENV *)); +int __crypto_env_refresh __P((ENV *)); +int __crypto_algsetup __P((ENV *, DB_CIPHER *, u_int32_t, int)); +int __crypto_decrypt_meta __P((ENV *, DB *, u_int8_t *, int)); +int __crypto_set_passwd __P((ENV *, ENV *)); +int __db_generate_iv __P((ENV *, u_int32_t *)); +int __db_rijndaelKeySetupEnc __P((u32 *, const u8 *, int)); +int __db_rijndaelKeySetupDec __P((u32 *, const u8 *, int)); +void __db_rijndaelEncrypt __P((u32 *, int, const u8 *, u8 *)); +void __db_rijndaelDecrypt __P((u32 *, int, const u8 *, u8 *)); +void __db_rijndaelEncryptRound __P((const u32 *, int, u8 *, int)); +void __db_rijndaelDecryptRound __P((const u32 *, int, u8 *, int)); +int __db_makeKey __P((keyInstance *, int, int, char *)); +int __db_cipherInit __P((cipherInstance *, int, char *)); +int __db_blockEncrypt __P((cipherInstance *, keyInstance *, u_int8_t *, size_t, u_int8_t *)); +int __db_padEncrypt __P((cipherInstance *, keyInstance *, u_int8_t *, int, u_int8_t *)); +int __db_blockDecrypt __P((cipherInstance *, keyInstance *, u_int8_t *, size_t, u_int8_t *)); +int __db_padDecrypt __P((cipherInstance *, keyInstance *, u_int8_t *, int, u_int8_t *)); +int __db_cipherUpdateRounds __P((cipherInstance *, keyInstance *, u_int8_t *, int, u_int8_t *, int)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_crypto_ext_h_ */ diff --git a/src/dbinc_auto/db_auto.h b/src/dbinc_auto/db_auto.h new file mode 100644 index 00000000..04e2f465 --- /dev/null +++ b/src/dbinc_auto/db_auto.h @@ -0,0 +1,666 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef __db_AUTO_H +#define __db_AUTO_H +#include "dbinc/log.h" +#define DB___db_addrem 41 +typedef struct ___db_addrem_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + u_int32_t indx; + u_int32_t nbytes; + DBT hdr; + DBT dbt; + DB_LSN pagelsn; +} __db_addrem_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_addrem_desc[]; +static inline int +__db_addrem_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + u_int32_t opcode, db_pgno_t pgno, u_int32_t indx, u_int32_t nbytes, + const DBT *hdr, const DBT *dbt, DB_LSN * pagelsn) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___db_addrem, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(hdr) + + LOG_DBT_SIZE(dbt) + sizeof(*pagelsn), + __db_addrem_desc, + opcode, pgno, indx, nbytes, hdr, dbt, pagelsn)); +} + +static inline int __db_addrem_read(ENV *env, + DB **dbpp, void *td, void *data, __db_addrem_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_addrem_desc, sizeof(__db_addrem_args), (void**)arg)); +} +#define DB___db_addrem_42 41 +typedef struct ___db_addrem_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + u_int32_t indx; + u_int32_t nbytes; + DBT hdr; + DBT dbt; + DB_LSN pagelsn; +} __db_addrem_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_addrem_42_desc[]; +static inline int __db_addrem_42_read(ENV *env, + DB **dbpp, void *td, void *data, __db_addrem_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_addrem_42_desc, sizeof(__db_addrem_42_args), (void**)arg)); +} +#define DB___db_big 43 +typedef struct ___db_big_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + db_pgno_t prev_pgno; + db_pgno_t next_pgno; + DBT dbt; + DB_LSN pagelsn; + DB_LSN prevlsn; + DB_LSN nextlsn; +} __db_big_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_big_desc[]; +static inline int +__db_big_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + u_int32_t opcode, db_pgno_t pgno, db_pgno_t prev_pgno, db_pgno_t next_pgno, + const DBT *dbt, DB_LSN * pagelsn, DB_LSN * prevlsn, DB_LSN * nextlsn) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___db_big, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(dbt) + + sizeof(*pagelsn) + sizeof(*prevlsn) + sizeof(*nextlsn), + __db_big_desc, + opcode, pgno, prev_pgno, next_pgno, dbt, pagelsn, prevlsn, + nextlsn)); +} + +static inline int __db_big_read(ENV *env, + DB **dbpp, void *td, void *data, __db_big_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_big_desc, sizeof(__db_big_args), (void**)arg)); +} +#define DB___db_big_42 43 +typedef struct ___db_big_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + db_pgno_t prev_pgno; + db_pgno_t next_pgno; + DBT dbt; + DB_LSN pagelsn; + DB_LSN prevlsn; + DB_LSN nextlsn; +} __db_big_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_big_42_desc[]; +static inline int __db_big_42_read(ENV *env, + DB **dbpp, void *td, void *data, __db_big_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_big_42_desc, sizeof(__db_big_42_args), (void**)arg)); +} +#define DB___db_ovref 44 +typedef struct ___db_ovref_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + int32_t adjust; + DB_LSN lsn; +} __db_ovref_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_ovref_desc[]; +static inline int +__db_ovref_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, int32_t adjust, DB_LSN * lsn) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___db_ovref, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(*lsn), + __db_ovref_desc, pgno, adjust, lsn)); +} + +static inline int __db_ovref_read(ENV *env, + DB **dbpp, void *td, void *data, __db_ovref_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_ovref_desc, sizeof(__db_ovref_args), (void**)arg)); +} +#define DB___db_relink_42 45 +typedef struct ___db_relink_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + db_pgno_t prev; + DB_LSN lsn_prev; + db_pgno_t next; + DB_LSN lsn_next; +} __db_relink_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_relink_42_desc[]; +static inline int __db_relink_42_read(ENV *env, + DB **dbpp, void *td, void *data, __db_relink_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_relink_42_desc, sizeof(__db_relink_42_args), (void**)arg)); +} +#define DB___db_debug 47 +typedef struct ___db_debug_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DBT op; + int32_t fileid; + DBT key; + DBT data; + u_int32_t arg_flags; +} __db_debug_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_debug_desc[]; +static inline int +__db_debug_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + const DBT *op, int32_t fileid, const DBT *key, const DBT *data, u_int32_t arg_flags) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___db_debug, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + LOG_DBT_SIZE(op) + sizeof(u_int32_t) + LOG_DBT_SIZE(key) + + LOG_DBT_SIZE(data) + sizeof(u_int32_t), + __db_debug_desc, + op, fileid, key, data, arg_flags)); +} + +static inline int __db_debug_read(ENV *env, + void *data, __db_debug_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __db_debug_desc, sizeof(__db_debug_args), (void**)arg)); +} +#define DB___db_noop 48 +typedef struct ___db_noop_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN prevlsn; +} __db_noop_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_noop_desc[]; +static inline int +__db_noop_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, DB_LSN * prevlsn) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___db_noop, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*prevlsn), + __db_noop_desc, pgno, prevlsn)); +} + +static inline int __db_noop_read(ENV *env, + DB **dbpp, void *td, void *data, __db_noop_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_noop_desc, sizeof(__db_noop_args), (void**)arg)); +} +#define DB___db_pg_alloc_42 49 +typedef struct ___db_pg_alloc_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN meta_lsn; + db_pgno_t meta_pgno; + DB_LSN page_lsn; + db_pgno_t pgno; + u_int32_t ptype; + db_pgno_t next; +} __db_pg_alloc_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_pg_alloc_42_desc[]; +static inline int __db_pg_alloc_42_read(ENV *env, + DB **dbpp, void *td, void *data, __db_pg_alloc_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_pg_alloc_42_desc, sizeof(__db_pg_alloc_42_args), (void**)arg)); +} +#define DB___db_pg_alloc 49 +typedef struct ___db_pg_alloc_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN meta_lsn; + db_pgno_t meta_pgno; + DB_LSN page_lsn; + db_pgno_t pgno; + u_int32_t ptype; + db_pgno_t next; + db_pgno_t last_pgno; +} __db_pg_alloc_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_pg_alloc_desc[]; +static inline int +__db_pg_alloc_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, DB_LSN * meta_lsn, db_pgno_t meta_pgno, DB_LSN * page_lsn, db_pgno_t pgno, + u_int32_t ptype, db_pgno_t next, db_pgno_t last_pgno) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___db_pg_alloc, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(*meta_lsn) + sizeof(u_int32_t) + + sizeof(*page_lsn) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t), + __db_pg_alloc_desc, meta_lsn, meta_pgno, page_lsn, pgno, ptype, next, last_pgno)); +} + +static inline int __db_pg_alloc_read(ENV *env, + DB **dbpp, void *td, void *data, __db_pg_alloc_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_pg_alloc_desc, sizeof(__db_pg_alloc_args), (void**)arg)); +} +#define DB___db_pg_free_42 50 +typedef struct ___db_pg_free_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN meta_lsn; + db_pgno_t meta_pgno; + DBT header; + db_pgno_t next; +} __db_pg_free_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_pg_free_42_desc[]; +static inline int __db_pg_free_42_read(ENV *env, + DB **dbpp, void *td, void *data, __db_pg_free_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_pg_free_42_desc, sizeof(__db_pg_free_42_args), (void**)arg)); +} +#define DB___db_pg_free 50 +typedef struct ___db_pg_free_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN meta_lsn; + db_pgno_t meta_pgno; + DBT header; + db_pgno_t next; + db_pgno_t last_pgno; +} __db_pg_free_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_pg_free_desc[]; +static inline int +__db_pg_free_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, DB_LSN * meta_lsn, db_pgno_t meta_pgno, const DBT *header, + db_pgno_t next, db_pgno_t last_pgno) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___db_pg_free, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*meta_lsn) + + sizeof(u_int32_t) + LOG_DBT_SIZE(header) + sizeof(u_int32_t) + + sizeof(u_int32_t), + __db_pg_free_desc, pgno, meta_lsn, meta_pgno, header, next, last_pgno)); +} + +static inline int __db_pg_free_read(ENV *env, + DB **dbpp, void *td, void *data, __db_pg_free_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_pg_free_desc, sizeof(__db_pg_free_args), (void**)arg)); +} +#define DB___db_cksum 51 +typedef struct ___db_cksum_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; +} __db_cksum_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_cksum_desc[]; +static inline int +__db_cksum_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___db_cksum, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN), + __db_cksum_desc)); +} + +static inline int __db_cksum_read(ENV *env, + void *data, __db_cksum_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __db_cksum_desc, sizeof(__db_cksum_args), (void**)arg)); +} +#define DB___db_pg_freedata_42 52 +typedef struct ___db_pg_freedata_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN meta_lsn; + db_pgno_t meta_pgno; + DBT header; + db_pgno_t next; + DBT data; +} __db_pg_freedata_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_pg_freedata_42_desc[]; +static inline int __db_pg_freedata_42_read(ENV *env, + DB **dbpp, void *td, void *data, __db_pg_freedata_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_pg_freedata_42_desc, sizeof(__db_pg_freedata_42_args), (void**)arg)); +} +#define DB___db_pg_freedata 52 +typedef struct ___db_pg_freedata_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN meta_lsn; + db_pgno_t meta_pgno; + DBT header; + db_pgno_t next; + db_pgno_t last_pgno; + DBT data; +} __db_pg_freedata_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_pg_freedata_desc[]; +static inline int +__db_pg_freedata_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, DB_LSN * meta_lsn, db_pgno_t meta_pgno, const DBT *header, + db_pgno_t next, db_pgno_t last_pgno, const DBT *data) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___db_pg_freedata, 1, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*meta_lsn) + + sizeof(u_int32_t) + LOG_DBT_SIZE(header) + sizeof(u_int32_t) + + sizeof(u_int32_t) + LOG_DBT_SIZE(data), + __db_pg_freedata_desc, pgno, meta_lsn, meta_pgno, header, next, last_pgno, data)); +} + +static inline int __db_pg_freedata_read(ENV *env, + DB **dbpp, void *td, void *data, __db_pg_freedata_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_pg_freedata_desc, sizeof(__db_pg_freedata_args), (void**)arg)); +} +#define DB___db_pg_init 60 +typedef struct ___db_pg_init_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DBT header; + DBT data; +} __db_pg_init_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_pg_init_desc[]; +static inline int +__db_pg_init_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, const DBT *header, const DBT *data) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___db_pg_init, 1, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(header) + + LOG_DBT_SIZE(data), + __db_pg_init_desc, pgno, header, data)); +} + +static inline int __db_pg_init_read(ENV *env, + DB **dbpp, void *td, void *data, __db_pg_init_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_pg_init_desc, sizeof(__db_pg_init_args), (void**)arg)); +} +#define DB___db_pg_sort_44 61 +typedef struct ___db_pg_sort_44_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t meta; + DB_LSN meta_lsn; + db_pgno_t last_free; + DB_LSN last_lsn; + db_pgno_t last_pgno; + DBT list; +} __db_pg_sort_44_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_pg_sort_44_desc[]; +static inline int __db_pg_sort_44_read(ENV *env, + DB **dbpp, void *td, void *data, __db_pg_sort_44_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_pg_sort_44_desc, sizeof(__db_pg_sort_44_args), (void**)arg)); +} +#define DB___db_pg_trunc 66 +typedef struct ___db_pg_trunc_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t meta; + DB_LSN meta_lsn; + db_pgno_t last_free; + DB_LSN last_lsn; + db_pgno_t next_free; + db_pgno_t last_pgno; + DBT list; +} __db_pg_trunc_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_pg_trunc_desc[]; +static inline int +__db_pg_trunc_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t meta, DB_LSN * meta_lsn, db_pgno_t last_free, DB_LSN * last_lsn, + db_pgno_t next_free, db_pgno_t last_pgno, const DBT *list) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___db_pg_trunc, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*meta_lsn) + + sizeof(u_int32_t) + sizeof(*last_lsn) + sizeof(u_int32_t) + + sizeof(u_int32_t) + LOG_DBT_SIZE(list), + __db_pg_trunc_desc, meta, meta_lsn, last_free, last_lsn, next_free, last_pgno, list)); +} + +static inline int __db_pg_trunc_read(ENV *env, + DB **dbpp, void *td, void *data, __db_pg_trunc_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_pg_trunc_desc, sizeof(__db_pg_trunc_args), (void**)arg)); +} +#define DB___db_realloc 36 +typedef struct ___db_realloc_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t prev_pgno; + DB_LSN page_lsn; + db_pgno_t next_free; + u_int32_t ptype; + DBT list; +} __db_realloc_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_realloc_desc[]; +static inline int +__db_realloc_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t prev_pgno, DB_LSN * page_lsn, db_pgno_t next_free, u_int32_t ptype, + const DBT *list) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___db_realloc, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*page_lsn) + + sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(list), + __db_realloc_desc, prev_pgno, page_lsn, next_free, ptype, list)); +} + +static inline int __db_realloc_read(ENV *env, + DB **dbpp, void *td, void *data, __db_realloc_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_realloc_desc, sizeof(__db_realloc_args), (void**)arg)); +} +#define DB___db_relink 147 +typedef struct ___db_relink_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + db_pgno_t new_pgno; + db_pgno_t prev_pgno; + DB_LSN lsn_prev; + db_pgno_t next_pgno; + DB_LSN lsn_next; +} __db_relink_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_relink_desc[]; +static inline int +__db_relink_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, db_pgno_t new_pgno, db_pgno_t prev_pgno, DB_LSN * lsn_prev, + db_pgno_t next_pgno, DB_LSN * lsn_next) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___db_relink, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(*lsn_prev) + sizeof(u_int32_t) + + sizeof(*lsn_next), + __db_relink_desc, pgno, new_pgno, prev_pgno, lsn_prev, next_pgno, lsn_next)); +} + +static inline int __db_relink_read(ENV *env, + DB **dbpp, void *td, void *data, __db_relink_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_relink_desc, sizeof(__db_relink_args), (void**)arg)); +} +#define DB___db_merge 148 +typedef struct ___db_merge_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + db_pgno_t npgno; + DB_LSN nlsn; + DBT hdr; + DBT data; + int32_t pg_copy; +} __db_merge_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_merge_desc[]; +static inline int +__db_merge_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, DB_LSN * lsn, db_pgno_t npgno, DB_LSN * nlsn, + const DBT *hdr, const DBT *data, int32_t pg_copy) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___db_merge, 1, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*lsn) + + sizeof(u_int32_t) + sizeof(*nlsn) + LOG_DBT_SIZE(hdr) + + LOG_DBT_SIZE(data) + sizeof(u_int32_t), + __db_merge_desc, pgno, lsn, npgno, nlsn, hdr, data, pg_copy)); +} + +static inline int __db_merge_read(ENV *env, + DB **dbpp, void *td, void *data, __db_merge_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_merge_desc, sizeof(__db_merge_args), (void**)arg)); +} +#define DB___db_pgno 149 +typedef struct ___db_pgno_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; + db_pgno_t opgno; + db_pgno_t npgno; +} __db_pgno_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __db_pgno_desc[]; +static inline int +__db_pgno_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, DB_LSN * lsn, u_int32_t indx, db_pgno_t opgno, + db_pgno_t npgno) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___db_pgno, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*lsn) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t), + __db_pgno_desc, pgno, lsn, indx, opgno, npgno)); +} + +static inline int __db_pgno_read(ENV *env, + DB **dbpp, void *td, void *data, __db_pgno_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __db_pgno_desc, sizeof(__db_pgno_args), (void**)arg)); +} +#endif diff --git a/src/dbinc_auto/db_ext.h b/src/dbinc_auto/db_ext.h new file mode 100644 index 00000000..0ffefe31 --- /dev/null +++ b/src/dbinc_auto/db_ext.h @@ -0,0 +1,342 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _db_ext_h_ +#define _db_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __crdel_init_recover __P((ENV *, DB_DISTAB *)); +int __crdel_metasub_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_inmem_create_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_inmem_rename_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_inmem_remove_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_init_print __P((ENV *, DB_DISTAB *)); +int __crdel_metasub_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_inmem_create_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_inmem_rename_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_inmem_remove_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_master_open __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, u_int32_t, int, DB **)); +int __db_master_update __P((DB *, DB *, DB_THREAD_INFO *, DB_TXN *, const char *, DBTYPE, mu_action, const char *, u_int32_t)); +int __env_dbreg_setup __P((DB *, DB_TXN *, const char *, const char *, u_int32_t)); +int __env_setup __P((DB *, DB_TXN *, const char *, const char *, u_int32_t, u_int32_t)); +int __env_mpool __P((DB *, const char *, u_int32_t)); +int __db_close __P((DB *, DB_TXN *, u_int32_t)); +int __db_refresh __P((DB *, DB_TXN *, u_int32_t, int *, int)); +int __db_log_page __P((DB *, DB_TXN *, DB_LSN *, db_pgno_t, PAGE *)); +int __db_walk_cursors __P((DB *, DBC *, int (*) __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)), u_int32_t *, db_pgno_t, u_int32_t, void *)); +int __db_backup_name __P((ENV *, const char *, DB_TXN *, char **)); +#ifdef CONFIG_TEST +int __db_testcopy __P((ENV *, DB *, const char *)); +#endif +int __db_testdocopy __P((ENV *, const char *)); +int __db_cursor_int __P((DB *, DB_THREAD_INFO *, DB_TXN *, DBTYPE, db_pgno_t, int, DB_LOCKER *, DBC **)); +int __db_put __P((DB *, DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, u_int32_t)); +int __db_del __P((DB *, DB_THREAD_INFO *, DB_TXN *, DBT *, u_int32_t)); +int __db_sync __P((DB *)); +int __db_associate __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB *, int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t)); +int __db_secondary_close __P((DB *, u_int32_t)); +int __db_associate_foreign __P((DB *, DB *, int (*)(DB *, const DBT *, DBT *, const DBT *, int *), u_int32_t)); +int __db_init_recover __P((ENV *, DB_DISTAB *)); +int __db_addrem_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_addrem_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_big_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_big_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_ovref_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_relink_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_debug_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_noop_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_alloc_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_alloc_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_free_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_free_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_cksum_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_freedata_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_freedata_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_init_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_sort_44_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_trunc_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_realloc_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_relink_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_merge_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pgno_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_init_print __P((ENV *, DB_DISTAB *)); +int __dbc_close __P((DBC *)); +int __dbc_destroy __P((DBC *)); +int __dbc_cmp __P((DBC *, DBC *, int *)); +int __dbc_count __P((DBC *, db_recno_t *)); +int __dbc_del __P((DBC *, u_int32_t)); +int __dbc_idel __P((DBC *, u_int32_t)); +#ifdef HAVE_COMPRESSION +int __dbc_bulk_del __P((DBC *, DBT *, u_int32_t)); +#endif +int __dbc_dup __P((DBC *, DBC **, u_int32_t)); +int __dbc_idup __P((DBC *, DBC **, u_int32_t)); +int __dbc_newopd __P((DBC *, db_pgno_t, DBC *, DBC **)); +int __dbc_get __P((DBC *, DBT *, DBT *, u_int32_t)); +int __dbc_iget __P((DBC *, DBT *, DBT *, u_int32_t)); +int __dbc_put __P((DBC *, DBT *, DBT *, u_int32_t)); +int __dbc_iput __P((DBC *, DBT *, DBT *, u_int32_t)); +int __db_duperr __P((DB *, u_int32_t)); +int __dbc_cleanup __P((DBC *, DBC *, int)); +int __dbc_secondary_get_pp __P((DBC *, DBT *, DBT *, u_int32_t)); +int __dbc_pget __P((DBC *, DBT *, DBT *, DBT *, u_int32_t)); +int __dbc_del_primary __P((DBC *)); +int __db_s_first __P((DB *, DB **)); +int __db_s_next __P((DB **, DB_TXN *)); +int __db_s_done __P((DB *, DB_TXN *)); +int __db_buildpartial __P((DB *, DBT *, DBT *, DBT *)); +u_int32_t __db_partsize __P((u_int32_t, DBT *)); +#ifdef DIAGNOSTIC +void __db_check_skeyset __P((DB *, DBT *)); +#endif +int __cdsgroup_begin __P((ENV *, DB_TXN **)); +int __cdsgroup_begin_pp __P((DB_ENV *, DB_TXN **)); +int __db_compact_int __P((DB *, DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *)); +int __db_exchange_page __P((DBC *, PAGE **, PAGE *, db_pgno_t, int)); +int __db_truncate_overflow __P((DBC *, db_pgno_t, PAGE **, DB_COMPACT *)); +int __db_truncate_root __P((DBC *, PAGE *, u_int32_t, db_pgno_t *, u_int32_t)); +int __db_find_free __P((DBC *, u_int32_t, u_int32_t, db_pgno_t, db_pgno_t *)); +int __db_relink __P((DBC *, PAGE *, PAGE *, db_pgno_t)); +int __db_move_metadata __P((DBC *, DBMETA **, DB_COMPACT *)); +int __db_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *)); +int __db_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *)); +int __db_decrypt_pg __P((ENV *, DB *, PAGE *)); +int __db_encrypt_and_checksum_pg __P((ENV *, DB *, PAGE *)); +void __db_metaswap __P((PAGE *)); +int __db_byteswap __P((DB *, db_pgno_t, PAGE *, size_t, int)); +int __db_pageswap __P((ENV *, DB *, void *, size_t, DBT *, int)); +void __db_recordswap __P((u_int32_t, u_int32_t, void *, void *, u_int32_t)); +int __db_dispatch __P((ENV *, DB_DISTAB *, DBT *, DB_LSN *, db_recops, void *)); +int __db_add_recovery __P((DB_ENV *, DB_DISTAB *, int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops), u_int32_t)); +int __db_add_recovery_int __P((ENV *, DB_DISTAB *, int (*)(ENV *, DBT *, DB_LSN *, db_recops, void *), u_int32_t)); +int __db_txnlist_init __P((ENV *, DB_THREAD_INFO *, u_int32_t, u_int32_t, DB_LSN *, DB_TXNHEAD **)); +int __db_txnlist_add __P((ENV *, DB_TXNHEAD *, u_int32_t, u_int32_t, DB_LSN *)); +int __db_txnlist_remove __P((ENV *, DB_TXNHEAD *, u_int32_t)); +void __db_txnlist_ckp __P((ENV *, DB_TXNHEAD *, DB_LSN *)); +void __db_txnlist_end __P((ENV *, DB_TXNHEAD *)); +int __db_txnlist_find __P((ENV *, DB_TXNHEAD *, u_int32_t, u_int32_t *)); +int __db_txnlist_update __P((ENV *, DB_TXNHEAD *, u_int32_t, u_int32_t, DB_LSN *, u_int32_t *, int)); +int __db_txnlist_gen __P((ENV *, DB_TXNHEAD *, int, u_int32_t, u_int32_t)); +int __db_txnlist_lsnadd __P((ENV *, DB_TXNHEAD *, DB_LSN *)); +int __db_txnlist_lsnget __P((ENV *, DB_TXNHEAD *, DB_LSN *, u_int32_t)); +int __db_txnlist_lsninit __P((ENV *, DB_TXNHEAD *, DB_LSN *)); +void __db_txnlist_print __P((DB_TXNHEAD *)); +int __db_ditem_nolog __P((DBC *, PAGE *, u_int32_t, u_int32_t)); +int __db_ditem __P((DBC *, PAGE *, u_int32_t, u_int32_t)); +int __db_pitem_nolog __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); +int __db_pitem __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); +int __db_associate_pp __P((DB *, DB_TXN *, DB *, int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t)); +int __db_close_pp __P((DB *, u_int32_t)); +int __db_cursor_pp __P((DB *, DB_TXN *, DBC **, u_int32_t)); +int __db_cursor __P((DB *, DB_THREAD_INFO *, DB_TXN *, DBC **, u_int32_t)); +int __db_del_pp __P((DB *, DB_TXN *, DBT *, u_int32_t)); +int __db_exists __P((DB *, DB_TXN *, DBT *, u_int32_t)); +int __db_fd_pp __P((DB *, int *)); +int __db_get_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); +int __db_get __P((DB *, DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, u_int32_t)); +int __db_join_pp __P((DB *, DBC **, DBC **, u_int32_t)); +int __db_key_range_pp __P((DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t)); +int __db_open_pp __P((DB *, DB_TXN *, const char *, const char *, DBTYPE, u_int32_t, int)); +int __db_pget_pp __P((DB *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t)); +int __db_pget __P((DB *, DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t)); +int __db_put_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); +int __db_compact_pp __P((DB *, DB_TXN *, DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *)); +int __db_associate_foreign_pp __P((DB *, DB *, int (*)(DB *, const DBT *, DBT *, const DBT *, int *), u_int32_t)); +int __db_sync_pp __P((DB *, u_int32_t)); +int __dbc_close_pp __P((DBC *)); +int __dbc_cmp_pp __P((DBC *, DBC *, int*, u_int32_t)); +int __dbc_count_pp __P((DBC *, db_recno_t *, u_int32_t)); +int __dbc_del_pp __P((DBC *, u_int32_t)); +int __dbc_dup_pp __P((DBC *, DBC **, u_int32_t)); +int __dbc_get_pp __P((DBC *, DBT *, DBT *, u_int32_t)); +int __dbc_get_arg __P((DBC *, DBT *, DBT *, u_int32_t)); +int __db_secondary_close_pp __P((DB *, u_int32_t)); +int __dbc_pget_pp __P((DBC *, DBT *, DBT *, DBT *, u_int32_t)); +int __dbc_put_pp __P((DBC *, DBT *, DBT *, u_int32_t)); +int __db_txn_auto_init __P((ENV *, DB_THREAD_INFO *, DB_TXN **)); +int __db_txn_auto_resolve __P((ENV *, DB_TXN *, int, int)); +int __db_join __P((DB *, DBC **, DBC **, u_int32_t)); +int __db_join_close __P((DBC *)); +int __db_secondary_corrupt __P((DB *)); +int __db_new __P((DBC *, u_int32_t, DB_LOCK *, PAGE **)); +int __db_free __P((DBC *, PAGE *, u_int32_t)); +#ifdef HAVE_FTRUNCATE +void __db_freelist_pos __P((db_pgno_t, db_pgno_t *, u_int32_t, u_int32_t *)); +#endif +void __db_freelist_sort __P((db_pglist_t *, u_int32_t)); +#ifdef HAVE_FTRUNCATE +int __db_pg_truncate __P((DBC *, DB_TXN *, db_pglist_t *, DB_COMPACT *, u_int32_t *, db_pgno_t , db_pgno_t *, DB_LSN *, int)); +#endif +#ifdef HAVE_FTRUNCATE +int __db_free_truncate __P((DB *, DB_THREAD_INFO *, DB_TXN *, u_int32_t, DB_COMPACT *, db_pglist_t **, u_int32_t *, db_pgno_t *)); +#endif +int __db_lprint __P((DBC *)); +int __db_lget __P((DBC *, int, db_pgno_t, db_lockmode_t, u_int32_t, DB_LOCK *)); +#ifdef DIAGNOSTIC +int __db_haslock __P((ENV *, DB_LOCKER *, DB_MPOOLFILE *, db_pgno_t, db_lockmode_t, u_int32_t)); +#endif +#ifdef DIAGNOSTIC +int __db_has_pagelock __P((ENV *, DB_LOCKER *, DB_MPOOLFILE *, PAGE *, db_lockmode_t)); +#endif +int __db_lput __P((DBC *, DB_LOCK *)); +int __db_create_internal __P((DB **, ENV *, u_int32_t)); +int __dbh_am_chk __P((DB *, u_int32_t)); +int __db_get_flags __P((DB *, u_int32_t *)); +int __db_set_flags __P((DB *, u_int32_t)); +int __db_get_lorder __P((DB *, int *)); +int __db_set_lorder __P((DB *, int)); +int __db_set_pagesize __P((DB *, u_int32_t)); +int __db_open __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, const char *, DBTYPE, u_int32_t, int, db_pgno_t)); +int __db_get_open_flags __P((DB *, u_int32_t *)); +int __db_new_file __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *)); +int __db_init_subdb __P((DB *, DB *, const char *, DB_THREAD_INFO *, DB_TXN *)); +int __db_chk_meta __P((ENV *, DB *, DBMETA *, u_int32_t)); +int __db_meta_setup __P((ENV *, DB *, const char *, DBMETA *, u_int32_t, u_int32_t)); +int __db_reopen __P((DBC *)); +int __db_goff __P((DBC *, DBT *, u_int32_t, db_pgno_t, void **, u_int32_t *)); +int __db_poff __P((DBC *, const DBT *, db_pgno_t *)); +int __db_ovref __P((DBC *, db_pgno_t)); +int __db_doff __P((DBC *, db_pgno_t)); +int __db_moff __P((DBC *, const DBT *, db_pgno_t, u_int32_t, int (*)(DB *, const DBT *, const DBT *), int *)); +int __db_coff __P((DBC *, const DBT *, const DBT *, int (*)(DB *, const DBT *, const DBT *), int *)); +int __db_vrfy_overflow __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); +int __db_vrfy_ovfl_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, u_int32_t)); +int __db_safe_goff __P((DB *, VRFY_DBINFO *, db_pgno_t, DBT *, void *, u_int32_t *, u_int32_t)); +void __db_loadme __P((void)); +int __db_dumptree __P((DB *, DB_TXN *, char *, char *, db_pgno_t, db_pgno_t)); +const FN * __db_get_flags_fn __P((void)); +int __db_prnpage __P((DB *, DB_TXN *, db_pgno_t)); +int __db_prpage __P((DB *, PAGE *, u_int32_t)); +const char * __db_lockmode_to_string __P((db_lockmode_t)); +int __db_dumptree __P((DB *, DB_TXN *, char *, char *, db_pgno_t, db_pgno_t)); +const FN * __db_get_flags_fn __P((void)); +int __db_prpage_int __P((ENV *, DB_MSGBUF *, DB *, char *, PAGE *, u_int32_t, u_int8_t *, u_int32_t)); +void __db_prbytes __P((ENV *, DB_MSGBUF *, u_int8_t *, u_int32_t)); +void __db_prflags __P((ENV *, DB_MSGBUF *, u_int32_t, const FN *, const char *, const char *)); +const char *__db_pagetype_to_string __P((u_int32_t)); +int __db_dump_pp __P((DB *, const char *, int (*)(void *, const void *), void *, int, int)); +int __db_dump __P((DB *, const char *, int (*)(void *, const void *), void *, int, int)); +int __db_prdbt __P((DBT *, int, const char *, void *, int (*)(void *, const void *), int, int)); +int __db_prheader __P((DB *, const char *, int, int, void *, int (*)(void *, const void *), VRFY_DBINFO *, db_pgno_t)); +int __db_prfooter __P((void *, int (*)(void *, const void *))); +int __db_pr_callback __P((void *, const void *)); +const char * __db_dbtype_to_string __P((DBTYPE)); +int __db_addrem_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_addrem_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_big_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_big_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_ovref_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_debug_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_noop_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_alloc_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_free_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_freedata_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_cksum_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_init_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_trunc_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_realloc_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_sort_44_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_alloc_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_free_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_freedata_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_relink_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_relink_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_merge_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pgno_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +void __db_pglist_swap __P((u_int32_t, void *)); +void __db_pglist_print __P((ENV *, DB_MSGBUF *, DBT *)); +int __db_traverse_big __P((DBC *, db_pgno_t, int (*)(DBC *, PAGE *, void *, int *), void *)); +int __db_reclaim_callback __P((DBC *, PAGE *, void *, int *)); +int __db_truncate_callback __P((DBC *, PAGE *, void *, int *)); +int __env_dbremove_pp __P((DB_ENV *, DB_TXN *, const char *, const char *, u_int32_t)); +int __db_remove_pp __P((DB *, const char *, const char *, u_int32_t)); +int __db_remove __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, const char *, u_int32_t)); +int __db_remove_int __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, const char *, u_int32_t)); +int __db_inmem_remove __P((DB *, DB_TXN *, const char *)); +int __env_dbrename_pp __P((DB_ENV *, DB_TXN *, const char *, const char *, const char *, u_int32_t)); +int __db_rename_pp __P((DB *, const char *, const char *, const char *, u_int32_t)); +int __db_rename_int __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, const char *, const char *, u_int32_t)); +int __db_ret __P((DBC *, PAGE *, u_int32_t, DBT *, void **, u_int32_t *)); +int __db_retcopy __P((ENV *, DBT *, void *, u_int32_t, void **, u_int32_t *)); +int __env_fileid_reset_pp __P((DB_ENV *, const char *, u_int32_t)); +int __env_fileid_reset __P((ENV *, DB_THREAD_INFO *, const char *, int)); +int __env_lsn_reset_pp __P((DB_ENV *, const char *, u_int32_t)); +int __db_lsn_reset __P((DB_MPOOLFILE *, DB_THREAD_INFO *)); +int __db_compare_both __P((DB *, const DBT *, const DBT *, const DBT *, const DBT *)); +int __db_sort_multiple __P((DB *, DBT *, DBT *, u_int32_t)); +int __db_stat_pp __P((DB *, DB_TXN *, void *, u_int32_t)); +int __db_stat_print_pp __P((DB *, u_int32_t)); +int __db_stat_print __P((DB *, DB_THREAD_INFO *, u_int32_t)); +int __db_truncate_pp __P((DB *, DB_TXN *, u_int32_t *, u_int32_t)); +int __db_truncate __P((DB *, DB_THREAD_INFO *, DB_TXN *, u_int32_t *)); +int __db_upgrade_pp __P((DB *, const char *, u_int32_t)); +int __db_upgrade __P((DB *, const char *, u_int32_t)); +int __db_lastpgno __P((DB *, char *, DB_FH *, db_pgno_t *)); +int __db_31_offdup __P((DB *, char *, DB_FH *, int, db_pgno_t *)); +int __db_verify_pp __P((DB *, const char *, const char *, FILE *, u_int32_t)); +int __db_verify_internal __P((DB *, const char *, const char *, void *, int (*)(void *, const void *), u_int32_t)); +int __db_verify __P((DB *, DB_THREAD_INFO *, const char *, const char *, void *, int (*)(void *, const void *), void *, void *, u_int32_t)); +int __db_vrfy_common __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); +int __db_vrfy_datapage __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); +int __db_vrfy_meta __P((DB *, VRFY_DBINFO *, DBMETA *, db_pgno_t, u_int32_t)); +void __db_vrfy_struct_feedback __P((DB *, VRFY_DBINFO *)); +int __db_salvage_pg __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, void *, int (*)(void *, const void *), u_int32_t)); +int __db_salvage_leaf __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, void *, int (*)(void *, const void *), u_int32_t)); +int __db_vrfy_inpitem __P((DB *, PAGE *, db_pgno_t, u_int32_t, int, u_int32_t, u_int32_t *, u_int32_t *)); +int __db_vrfy_duptype __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t)); +int __db_salvage_duptree __P((DB *, VRFY_DBINFO *, db_pgno_t, DBT *, void *, int (*)(void *, const void *), u_int32_t)); +int __db_vrfy_dbinfo_create __P((ENV *, DB_THREAD_INFO *, u_int32_t, VRFY_DBINFO **)); +int __db_vrfy_dbinfo_destroy __P((ENV *, VRFY_DBINFO *)); +int __db_vrfy_getpageinfo __P((VRFY_DBINFO *, db_pgno_t, VRFY_PAGEINFO **)); +int __db_vrfy_putpageinfo __P((ENV *, VRFY_DBINFO *, VRFY_PAGEINFO *)); +int __db_vrfy_pgset __P((ENV *, DB_THREAD_INFO *, u_int32_t, DB **)); +int __db_vrfy_pgset_get __P((DB *, DB_THREAD_INFO *, DB_TXN *, db_pgno_t, int *)); +int __db_vrfy_pgset_inc __P((DB *, DB_THREAD_INFO *, DB_TXN *, db_pgno_t)); +int __db_vrfy_pgset_next __P((DBC *, db_pgno_t *)); +int __db_vrfy_childcursor __P((VRFY_DBINFO *, DBC **)); +int __db_vrfy_childput __P((VRFY_DBINFO *, db_pgno_t, VRFY_CHILDINFO *)); +int __db_vrfy_ccset __P((DBC *, db_pgno_t, VRFY_CHILDINFO **)); +int __db_vrfy_ccnext __P((DBC *, VRFY_CHILDINFO **)); +int __db_vrfy_ccclose __P((DBC *)); +int __db_salvage_init __P((VRFY_DBINFO *)); +int __db_salvage_destroy __P((VRFY_DBINFO *)); +int __db_salvage_getnext __P((VRFY_DBINFO *, DBC **, db_pgno_t *, u_int32_t *, int)); +int __db_salvage_isdone __P((VRFY_DBINFO *, db_pgno_t)); +int __db_salvage_markdone __P((VRFY_DBINFO *, db_pgno_t)); +int __db_salvage_markneeded __P((VRFY_DBINFO *, db_pgno_t, u_int32_t)); +int __db_vrfy_prdbt __P((DBT *, int, const char *, void *, int (*)(void *, const void *), int, int, VRFY_DBINFO *)); +int __partition_init __P((DB *, u_int32_t)); +int __partition_set __P((DB *, u_int32_t, DBT *, u_int32_t (*callback)(DB *, DBT *key))); +int __partition_set_dirs __P((DB *, const char **)); +int __partition_open __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, DBTYPE, u_int32_t, int, int)); +int __partition_get_callback __P((DB *, u_int32_t *, u_int32_t (**callback)(DB *, DBT *key))); +int __partition_get_keys __P((DB *, u_int32_t *, DBT **)); +int __partition_get_dirs __P((DB *, const char ***)); +int __partc_init __P((DBC *)); +int __partc_get __P((DBC*, DBT *, DBT *, u_int32_t)); +int __partition_close __P((DB *, DB_TXN *, u_int32_t)); +int __partition_sync __P((DB *)); +int __partition_stat __P((DBC *, void *, u_int32_t)); +int __part_truncate __P((DBC *, u_int32_t *)); +int __part_compact __P((DB *, DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *)); +int __part_lsn_reset __P((DB *, DB_THREAD_INFO *)); +int __part_fileid_reset __P((ENV *, DB_THREAD_INFO *, const char *, u_int32_t, int)); +int __part_key_range __P((DBC *, DBT *, DB_KEY_RANGE *, u_int32_t)); +int __part_remove __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, const char *, u_int32_t)); +int __part_rename __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, const char *, const char *)); +int __part_verify __P((DB *, VRFY_DBINFO *, const char *, void *, int (*)(void *, const void *), u_int32_t)); +int __part_testdocopy __P((DB *, const char *)); +int __db_no_partition __P((ENV *)); +int __partition_set __P((DB *, u_int32_t, DBT *, u_int32_t (*callback)(DB *, DBT *key))); +int __partition_get_callback __P((DB *, u_int32_t *, u_int32_t (**callback)(DB *, DBT *key))); +int __partition_get_dirs __P((DB *, const char ***)); +int __partition_get_keys __P((DB *, u_int32_t *, DBT **)); +int __partition_init __P((DB *, u_int32_t)); +int __part_fileid_reset __P((ENV *, DB_THREAD_INFO *, const char *, u_int32_t, int)); +int __partition_set_dirs __P((DB *, const char **)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_db_ext_h_ */ diff --git a/src/dbinc_auto/dbreg_auto.h b/src/dbinc_auto/dbreg_auto.h new file mode 100644 index 00000000..63ad0cd3 --- /dev/null +++ b/src/dbinc_auto/dbreg_auto.h @@ -0,0 +1,43 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef __dbreg_AUTO_H +#define __dbreg_AUTO_H +#include "dbinc/log.h" +#define DB___dbreg_register 2 +typedef struct ___dbreg_register_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + DBT name; + DBT uid; + int32_t fileid; + DBTYPE ftype; + db_pgno_t meta_pgno; + u_int32_t id; +} __dbreg_register_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __dbreg_register_desc[]; +static inline int +__dbreg_register_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + u_int32_t opcode, const DBT *name, const DBT *uid, int32_t fileid, DBTYPE ftype, + db_pgno_t meta_pgno, u_int32_t id) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___dbreg_register, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + LOG_DBT_SIZE(name) + LOG_DBT_SIZE(uid) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t), + __dbreg_register_desc, + opcode, name, uid, fileid, ftype, meta_pgno, id)); +} + +static inline int __dbreg_register_read(ENV *env, + void *data, __dbreg_register_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __dbreg_register_desc, sizeof(__dbreg_register_args), (void**)arg)); +} +#endif diff --git a/src/dbinc_auto/dbreg_ext.h b/src/dbinc_auto/dbreg_ext.h new file mode 100644 index 00000000..0f495c33 --- /dev/null +++ b/src/dbinc_auto/dbreg_ext.h @@ -0,0 +1,46 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _dbreg_ext_h_ +#define _dbreg_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __dbreg_setup __P((DB *, const char *, const char *, u_int32_t)); +int __dbreg_teardown __P((DB *)); +int __dbreg_teardown_int __P((ENV *, FNAME *)); +int __dbreg_new_id __P((DB *, DB_TXN *)); +int __dbreg_get_id __P((DB *, DB_TXN *, int32_t *)); +int __dbreg_assign_id __P((DB *, int32_t, int)); +int __dbreg_revoke_id __P((DB *, int, int32_t)); +int __dbreg_revoke_id_int __P((ENV *, FNAME *, int, int, int32_t)); +int __dbreg_close_id __P((DB *, DB_TXN *, u_int32_t)); +int __dbreg_close_id_int __P((ENV *, FNAME *, u_int32_t, int)); +int __dbreg_failchk __P((ENV *)); +int __dbreg_log_close __P((ENV *, FNAME *, DB_TXN *, u_int32_t)); +int __dbreg_log_id __P((DB *, DB_TXN *, int32_t, int)); +int __dbreg_init_recover __P((ENV *, DB_DISTAB *)); +int __dbreg_register_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __dbreg_init_print __P((ENV *, DB_DISTAB *)); +int __dbreg_register_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __dbreg_stat_print __P((ENV *, u_int32_t)); +void __dbreg_print_fname __P((ENV *, FNAME *)); +int __dbreg_add_dbentry __P((ENV *, DB_LOG *, DB *, int32_t)); +int __dbreg_rem_dbentry __P((DB_LOG *, int32_t)); +int __dbreg_log_files __P((ENV *, u_int32_t)); +int __dbreg_log_nofiles __P((ENV *)); +int __dbreg_close_files __P((ENV *, int)); +int __dbreg_close_file __P((ENV *, FNAME *)); +int __dbreg_mark_restored __P((ENV *)); +int __dbreg_invalidate_files __P((ENV *, int)); +int __dbreg_id_to_db __P((ENV *, DB_TXN *, DB **, int32_t, int)); +int __dbreg_id_to_fname __P((DB_LOG *, int32_t, int, FNAME **)); +int __dbreg_fid_to_fname __P((DB_LOG *, u_int8_t *, int, FNAME **)); +int __dbreg_get_name __P((ENV *, u_int8_t *, char **, char **)); +int __dbreg_do_open __P((ENV *, DB_TXN *, DB_LOG *, u_int8_t *, char *, DBTYPE, int32_t, db_pgno_t, void *, u_int32_t, u_int32_t)); +int __dbreg_lazy_id __P((DB *)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_dbreg_ext_h_ */ diff --git a/src/dbinc_auto/env_ext.h b/src/dbinc_auto/env_ext.h new file mode 100644 index 00000000..d9b8d669 --- /dev/null +++ b/src/dbinc_auto/env_ext.h @@ -0,0 +1,152 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _env_ext_h_ +#define _env_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +void __env_alloc_init __P((REGINFO *, size_t)); +size_t __env_alloc_overhead __P((void)); +size_t __env_alloc_size __P((size_t)); +int __env_alloc __P((REGINFO *, size_t, void *)); +void __env_alloc_free __P((REGINFO *, void *)); +int __env_alloc_extend __P((REGINFO *, void *, size_t *)); +int __env_region_extend __P((ENV *, REGINFO *)); +uintmax_t __env_elem_size __P((ENV *, void *)); +void * __env_get_chunk __P((REGINFO *, void **, uintmax_t *)); +void __env_alloc_print __P((REGINFO *, u_int32_t)); +int __env_read_db_config __P((ENV *)); +int __config_split __P((char *, char *[])); +int __env_failchk_pp __P((DB_ENV *, u_int32_t)); +int __env_failchk_int __P((DB_ENV *)); +size_t __env_thread_size __P((ENV *, size_t)); +size_t __env_thread_max __P((ENV *)); +int __env_thread_init __P((ENV *, int)); +void __env_thread_destroy __P((ENV *)); +int __env_set_state __P((ENV *, DB_THREAD_INFO **, DB_THREAD_STATE)); +char *__env_thread_id_string __P((DB_ENV *, pid_t, db_threadid_t, char *)); +int __db_file_extend __P((ENV *, DB_FH *, size_t)); +int __db_file_multi_write __P((ENV *, const char *)); +int __db_file_write __P((ENV *, DB_FH *, u_int32_t, u_int32_t, int)); +void __db_env_destroy __P((DB_ENV *)); +int __env_get_alloc __P((DB_ENV *, void *(**)(size_t), void *(**)(void *, size_t), void (**)(void *))); +int __env_set_alloc __P((DB_ENV *, void *(*)(size_t), void *(*)(void *, size_t), void (*)(void *))); +int __env_get_memory_init __P((DB_ENV *, DB_MEM_CONFIG, u_int32_t *)); +int __env_set_memory_init __P((DB_ENV *, DB_MEM_CONFIG, u_int32_t)); +int __env_get_memory_max __P((DB_ENV *, u_int32_t *, u_int32_t *)); +int __env_set_memory_max __P((DB_ENV *, u_int32_t, u_int32_t)); +int __env_get_encrypt_flags __P((DB_ENV *, u_int32_t *)); +int __env_set_encrypt __P((DB_ENV *, const char *, u_int32_t)); +void __env_map_flags __P((const FLAG_MAP *, u_int, u_int32_t *, u_int32_t *)); +void __env_fetch_flags __P((const FLAG_MAP *, u_int, u_int32_t *, u_int32_t *)); +int __env_set_flags __P((DB_ENV *, u_int32_t, int)); +int __env_set_data_dir __P((DB_ENV *, const char *)); +int __env_add_data_dir __P((DB_ENV *, const char *)); +int __env_set_create_dir __P((DB_ENV *, const char *)); +int __env_set_data_len __P((DB_ENV *, u_int32_t)); +int __env_set_intermediate_dir_mode __P((DB_ENV *, const char *)); +void __env_get_errcall __P((DB_ENV *, void (**)(const DB_ENV *, const char *, const char *))); +void __env_set_errcall __P((DB_ENV *, void (*)(const DB_ENV *, const char *, const char *))); +void __env_get_errfile __P((DB_ENV *, FILE **)); +void __env_set_errfile __P((DB_ENV *, FILE *)); +void __env_get_errpfx __P((DB_ENV *, const char **)); +void __env_set_errpfx __P((DB_ENV *, const char *)); +int __env_set_thread_count __P((DB_ENV *, u_int32_t)); +void __env_get_msgcall __P((DB_ENV *, void (**)(const DB_ENV *, const char *))); +void __env_set_msgcall __P((DB_ENV *, void (*)(const DB_ENV *, const char *))); +void __env_get_msgfile __P((DB_ENV *, FILE **)); +void __env_set_msgfile __P((DB_ENV *, FILE *)); +int __env_set_paniccall __P((DB_ENV *, void (*)(DB_ENV *, int))); +int __env_set_shm_key __P((DB_ENV *, long)); +int __env_set_tmp_dir __P((DB_ENV *, const char *)); +int __env_set_verbose __P((DB_ENV *, u_int32_t, int)); +int __db_mi_env __P((ENV *, const char *)); +int __db_mi_open __P((ENV *, const char *, int)); +int __env_not_config __P((ENV *, char *, u_int32_t)); +int __env_set_timeout __P((DB_ENV *, db_timeout_t, u_int32_t)); +int __db_appname __P((ENV *, APPNAME, const char *, const char **, char **)); +int __db_tmp_open __P((ENV *, u_int32_t, DB_FH **)); +int __env_open_pp __P((DB_ENV *, const char *, u_int32_t, int)); +int __env_open __P((DB_ENV *, const char *, u_int32_t, int)); +int __env_remove __P((DB_ENV *, const char *, u_int32_t)); +int __env_config __P((DB_ENV *, const char *, u_int32_t *, int)); +int __env_close_pp __P((DB_ENV *, u_int32_t)); +int __env_close __P((DB_ENV *, u_int32_t)); +int __env_refresh __P((DB_ENV *, u_int32_t, int)); +int __env_get_open_flags __P((DB_ENV *, u_int32_t *)); +int __env_attach_regions __P((DB_ENV *, u_int32_t, u_int32_t, int)); +int __db_apprec __P((ENV *, DB_THREAD_INFO *, DB_LSN *, DB_LSN *, int, u_int32_t)); +int __env_openfiles __P((ENV *, DB_LOGC *, void *, DBT *, DB_LSN *, DB_LSN *, double, int)); +int __env_init_rec __P((ENV *, u_int32_t)); +int __env_attach __P((ENV *, u_int32_t *, int, int)); +int __env_turn_on __P((ENV *)); +int __env_turn_off __P((ENV *, u_int32_t)); +void __env_panic_set __P((ENV *, int)); +int __env_ref_increment __P((ENV *)); +int __env_ref_decrement __P((ENV *)); +int __env_detach __P((ENV *, int)); +int __env_remove_env __P((ENV *)); +int __env_region_attach __P((ENV *, REGINFO *, size_t, size_t)); +int __env_region_share __P((ENV *, REGINFO *)); +int __env_region_detach __P((ENV *, REGINFO *, int)); +int __envreg_register __P((ENV *, int *, u_int32_t)); +int __envreg_unregister __P((ENV *, int)); +int __envreg_xunlock __P((ENV *)); +int __envreg_isalive __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t)); +u_int32_t __env_struct_sig __P((void)); +int __env_stat_print_pp __P((DB_ENV *, u_int32_t)); +void __db_print_fh __P((ENV *, const char *, DB_FH *, u_int32_t)); +void __db_print_fileid __P((ENV *, u_int8_t *, const char *)); +void __db_dl __P((ENV *, const char *, u_long)); +void __db_dl_pct __P((ENV *, const char *, u_long, int, const char *)); +void __db_dlbytes __P((ENV *, const char *, u_long, u_long, u_long)); +void __db_print_reginfo __P((ENV *, REGINFO *, const char *, u_int32_t)); +int __db_stat_not_built __P((ENV *)); +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_close __P((ENV *)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_get_ack_policy __P((DB_ENV *, int *)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_set_ack_policy __P((DB_ENV *, int)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_site __P((DB_ENV *, const char *, u_int, DB_SITE **, u_int32_t)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_site_by_eid __P((DB_ENV *, int, DB_SITE **)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_local_site __P((DB_ENV *, DB_SITE **)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_site_list __P((DB_ENV *, u_int *, DB_REPMGR_SITE **)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_start __P((DB_ENV *, int, u_int32_t)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_stat_pp __P((DB_ENV *, DB_REPMGR_STAT **, u_int32_t)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_stat_print_pp __P((DB_ENV *, u_int32_t)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_handle_event __P((ENV *, u_int32_t, void *)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_channel __P((DB_ENV *, int, DB_CHANNEL **, u_int32_t)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_set_msg_dispatch __P((DB_ENV *, void (*)(DB_ENV *, DB_CHANNEL *, DBT *, u_int32_t, u_int32_t), u_int32_t)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_init_recover __P((ENV *, DB_DISTAB *)); +#endif + +#if defined(__cplusplus) +} +#endif +#endif /* !_env_ext_h_ */ diff --git a/src/dbinc_auto/ext_185_def.in b/src/dbinc_auto/ext_185_def.in new file mode 100644 index 00000000..8da68a8d --- /dev/null +++ b/src/dbinc_auto/ext_185_def.in @@ -0,0 +1,12 @@ + +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _DB_EXT_185_DEF_IN_ +#define _DB_EXT_185_DEF_IN_ + +#ifdef _DB185_INT_H_ +#define __db185_open __db185_open@DB_VERSION_UNIQUE_NAME@ +#else +#define __db185_open __db185_open@DB_VERSION_UNIQUE_NAME@ +#endif + +#endif /* !_DB_EXT_185_DEF_IN_ */ diff --git a/src/dbinc_auto/ext_185_prot.in b/src/dbinc_auto/ext_185_prot.in new file mode 100644 index 00000000..dfd8d3d4 --- /dev/null +++ b/src/dbinc_auto/ext_185_prot.in @@ -0,0 +1,19 @@ + +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _DB_EXT_185_PROT_IN_ +#define _DB_EXT_185_PROT_IN_ + +#if defined(__cplusplus) +extern "C" { +#endif + +#ifdef _DB185_INT_H_ +DB185 *__db185_open __P((const char *, int, int, DBTYPE, const void *)); +#else +DB *__db185_open __P((const char *, int, int, DBTYPE, const void *)); +#endif + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_EXT_185_PROT_IN_ */ diff --git a/src/dbinc_auto/ext_def.in b/src/dbinc_auto/ext_def.in new file mode 100644 index 00000000..1a56f192 --- /dev/null +++ b/src/dbinc_auto/ext_def.in @@ -0,0 +1,66 @@ + +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _DB_EXT_DEF_IN_ +#define _DB_EXT_DEF_IN_ + +#define db_copy db_copy@DB_VERSION_UNIQUE_NAME@ +#define db_create db_create@DB_VERSION_UNIQUE_NAME@ +#define db_strerror db_strerror@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_assert db_env_set_func_assert@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_close db_env_set_func_close@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_dirfree db_env_set_func_dirfree@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_dirlist db_env_set_func_dirlist@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_exists db_env_set_func_exists@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_free db_env_set_func_free@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_fsync db_env_set_func_fsync@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_ftruncate db_env_set_func_ftruncate@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_ioinfo db_env_set_func_ioinfo@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_malloc db_env_set_func_malloc@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_file_map db_env_set_func_file_map@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_region_map db_env_set_func_region_map@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_pread db_env_set_func_pread@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_pwrite db_env_set_func_pwrite@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_open db_env_set_func_open@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_read db_env_set_func_read@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_realloc db_env_set_func_realloc@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_rename db_env_set_func_rename@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_seek db_env_set_func_seek@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_unlink db_env_set_func_unlink@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_write db_env_set_func_write@DB_VERSION_UNIQUE_NAME@ +#define db_env_set_func_yield db_env_set_func_yield@DB_VERSION_UNIQUE_NAME@ +#define db_env_create db_env_create@DB_VERSION_UNIQUE_NAME@ +#define db_version db_version@DB_VERSION_UNIQUE_NAME@ +#define db_full_version db_full_version@DB_VERSION_UNIQUE_NAME@ +#define log_compare log_compare@DB_VERSION_UNIQUE_NAME@ +#if defined(DB_WIN32) && !defined(DB_WINCE) +#define db_env_set_win_security db_env_set_win_security@DB_VERSION_UNIQUE_NAME@ +#endif +#define db_sequence_create db_sequence_create@DB_VERSION_UNIQUE_NAME@ +#if DB_DBM_HSEARCH != 0 +#define __db_ndbm_clearerr __db_ndbm_clearerr@DB_VERSION_UNIQUE_NAME@ +#define __db_ndbm_close __db_ndbm_close@DB_VERSION_UNIQUE_NAME@ +#define __db_ndbm_delete __db_ndbm_delete@DB_VERSION_UNIQUE_NAME@ +#define __db_ndbm_dirfno __db_ndbm_dirfno@DB_VERSION_UNIQUE_NAME@ +#define __db_ndbm_error __db_ndbm_error@DB_VERSION_UNIQUE_NAME@ +#define __db_ndbm_fetch __db_ndbm_fetch@DB_VERSION_UNIQUE_NAME@ +#define __db_ndbm_firstkey __db_ndbm_firstkey@DB_VERSION_UNIQUE_NAME@ +#define __db_ndbm_nextkey __db_ndbm_nextkey@DB_VERSION_UNIQUE_NAME@ +#define __db_ndbm_open __db_ndbm_open@DB_VERSION_UNIQUE_NAME@ +#define __db_ndbm_pagfno __db_ndbm_pagfno@DB_VERSION_UNIQUE_NAME@ +#define __db_ndbm_rdonly __db_ndbm_rdonly@DB_VERSION_UNIQUE_NAME@ +#define __db_ndbm_store __db_ndbm_store@DB_VERSION_UNIQUE_NAME@ +#define __db_dbm_close __db_dbm_close@DB_VERSION_UNIQUE_NAME@ +#define __db_dbm_delete __db_dbm_delete@DB_VERSION_UNIQUE_NAME@ +#define __db_dbm_fetch __db_dbm_fetch@DB_VERSION_UNIQUE_NAME@ +#define __db_dbm_firstkey __db_dbm_firstkey@DB_VERSION_UNIQUE_NAME@ +#define __db_dbm_init __db_dbm_init@DB_VERSION_UNIQUE_NAME@ +#define __db_dbm_nextkey __db_dbm_nextkey@DB_VERSION_UNIQUE_NAME@ +#define __db_dbm_store __db_dbm_store@DB_VERSION_UNIQUE_NAME@ +#endif +#if DB_DBM_HSEARCH != 0 +#define __db_hcreate __db_hcreate@DB_VERSION_UNIQUE_NAME@ +#define __db_hsearch __db_hsearch@DB_VERSION_UNIQUE_NAME@ +#define __db_hdestroy __db_hdestroy@DB_VERSION_UNIQUE_NAME@ +#endif + +#endif /* !_DB_EXT_DEF_IN_ */ diff --git a/src/dbinc_auto/ext_prot.in b/src/dbinc_auto/ext_prot.in new file mode 100644 index 00000000..371e5a3e --- /dev/null +++ b/src/dbinc_auto/ext_prot.in @@ -0,0 +1,73 @@ + +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _DB_EXT_PROT_IN_ +#define _DB_EXT_PROT_IN_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int db_copy __P((DB_ENV *, const char *, const char *, const char *)); +int db_create __P((DB **, DB_ENV *, u_int32_t)); +char *db_strerror __P((int)); +int db_env_set_func_assert __P((void (*)(const char *, const char *, int))); +int db_env_set_func_close __P((int (*)(int))); +int db_env_set_func_dirfree __P((void (*)(char **, int))); +int db_env_set_func_dirlist __P((int (*)(const char *, char ***, int *))); +int db_env_set_func_exists __P((int (*)(const char *, int *))); +int db_env_set_func_free __P((void (*)(void *))); +int db_env_set_func_fsync __P((int (*)(int))); +int db_env_set_func_ftruncate __P((int (*)(int, off_t))); +int db_env_set_func_ioinfo __P((int (*)(const char *, int, u_int32_t *, u_int32_t *, u_int32_t *))); +int db_env_set_func_malloc __P((void *(*)(size_t))); +int db_env_set_func_file_map __P((int (*)(DB_ENV *, char *, size_t, int, void **), int (*)(DB_ENV *, void *))); +int db_env_set_func_region_map __P((int (*)(DB_ENV *, char *, size_t, int *, void **), int (*)(DB_ENV *, void *))); +int db_env_set_func_pread __P((ssize_t (*)(int, void *, size_t, off_t))); +int db_env_set_func_pwrite __P((ssize_t (*)(int, const void *, size_t, off_t))); +int db_env_set_func_open __P((int (*)(const char *, int, ...))); +int db_env_set_func_read __P((ssize_t (*)(int, void *, size_t))); +int db_env_set_func_realloc __P((void *(*)(void *, size_t))); +int db_env_set_func_rename __P((int (*)(const char *, const char *))); +int db_env_set_func_seek __P((int (*)(int, off_t, int))); +int db_env_set_func_unlink __P((int (*)(const char *))); +int db_env_set_func_write __P((ssize_t (*)(int, const void *, size_t))); +int db_env_set_func_yield __P((int (*)(u_long, u_long))); +int db_env_create __P((DB_ENV **, u_int32_t)); +char *db_version __P((int *, int *, int *)); +char *db_full_version __P((int *, int *, int *, int *, int *)); +int log_compare __P((const DB_LSN *, const DB_LSN *)); +#if defined(DB_WIN32) && !defined(DB_WINCE) +int db_env_set_win_security __P((SECURITY_ATTRIBUTES *sa)); +#endif +int db_sequence_create __P((DB_SEQUENCE **, DB *, u_int32_t)); +#if DB_DBM_HSEARCH != 0 +int __db_ndbm_clearerr __P((DBM *)); +void __db_ndbm_close __P((DBM *)); +int __db_ndbm_delete __P((DBM *, datum)); +int __db_ndbm_dirfno __P((DBM *)); +int __db_ndbm_error __P((DBM *)); +datum __db_ndbm_fetch __P((DBM *, datum)); +datum __db_ndbm_firstkey __P((DBM *)); +datum __db_ndbm_nextkey __P((DBM *)); +DBM *__db_ndbm_open __P((const char *, int, int)); +int __db_ndbm_pagfno __P((DBM *)); +int __db_ndbm_rdonly __P((DBM *)); +int __db_ndbm_store __P((DBM *, datum, datum, int)); +int __db_dbm_close __P((void)); +int __db_dbm_delete __P((datum)); +datum __db_dbm_fetch __P((datum)); +datum __db_dbm_firstkey __P((void)); +int __db_dbm_init __P((char *)); +datum __db_dbm_nextkey __P((datum)); +int __db_dbm_store __P((datum, datum)); +#endif +#if DB_DBM_HSEARCH != 0 +int __db_hcreate __P((size_t)); +ENTRY *__db_hsearch __P((ENTRY, ACTION)); +void __db_hdestroy __P((void)); +#endif + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_EXT_PROT_IN_ */ diff --git a/src/dbinc_auto/fileops_auto.h b/src/dbinc_auto/fileops_auto.h new file mode 100644 index 00000000..59385c88 --- /dev/null +++ b/src/dbinc_auto/fileops_auto.h @@ -0,0 +1,262 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef __fop_AUTO_H +#define __fop_AUTO_H +#include "dbinc/log.h" +#define DB___fop_create_42 143 +typedef struct ___fop_create_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DBT name; + u_int32_t appname; + u_int32_t mode; +} __fop_create_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __fop_create_42_desc[]; +static inline int __fop_create_42_read(ENV *env, + void *data, __fop_create_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __fop_create_42_desc, sizeof(__fop_create_42_args), (void**)arg)); +} +#define DB___fop_create 143 +typedef struct ___fop_create_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DBT name; + DBT dirname; + u_int32_t appname; + u_int32_t mode; +} __fop_create_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __fop_create_desc[]; +static inline int +__fop_create_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + const DBT *name, const DBT *dirname, u_int32_t appname, u_int32_t mode) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___fop_create, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + LOG_DBT_SIZE(name) + LOG_DBT_SIZE(dirname) + sizeof(u_int32_t) + + sizeof(u_int32_t), + __fop_create_desc, + name, dirname, appname, mode)); +} + +static inline int __fop_create_read(ENV *env, + void *data, __fop_create_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __fop_create_desc, sizeof(__fop_create_args), (void**)arg)); +} +#define DB___fop_remove 144 +typedef struct ___fop_remove_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DBT name; + DBT fid; + u_int32_t appname; +} __fop_remove_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __fop_remove_desc[]; +static inline int +__fop_remove_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + const DBT *name, const DBT *fid, u_int32_t appname) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___fop_remove, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + LOG_DBT_SIZE(name) + LOG_DBT_SIZE(fid) + sizeof(u_int32_t), + __fop_remove_desc, + name, fid, appname)); +} + +static inline int __fop_remove_read(ENV *env, + void *data, __fop_remove_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __fop_remove_desc, sizeof(__fop_remove_args), (void**)arg)); +} +#define DB___fop_write_42 145 +typedef struct ___fop_write_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DBT name; + u_int32_t appname; + u_int32_t pgsize; + db_pgno_t pageno; + u_int32_t offset; + DBT page; + u_int32_t flag; +} __fop_write_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __fop_write_42_desc[]; +static inline int __fop_write_42_read(ENV *env, + void *data, __fop_write_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __fop_write_42_desc, sizeof(__fop_write_42_args), (void**)arg)); +} +#define DB___fop_write 145 +typedef struct ___fop_write_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DBT name; + DBT dirname; + u_int32_t appname; + u_int32_t pgsize; + db_pgno_t pageno; + u_int32_t offset; + DBT page; + u_int32_t flag; +} __fop_write_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __fop_write_desc[]; +static inline int +__fop_write_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + const DBT *name, const DBT *dirname, u_int32_t appname, u_int32_t pgsize, db_pgno_t pageno, + u_int32_t offset, const DBT *page, u_int32_t flag) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___fop_write, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + LOG_DBT_SIZE(name) + LOG_DBT_SIZE(dirname) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + LOG_DBT_SIZE(page) + sizeof(u_int32_t), + __fop_write_desc, + name, dirname, appname, pgsize, pageno, offset, page, flag)); +} + +static inline int __fop_write_read(ENV *env, + void *data, __fop_write_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __fop_write_desc, sizeof(__fop_write_args), (void**)arg)); +} +#define DB___fop_rename_42 146 +#define DB___fop_rename_noundo_46 150 +typedef struct ___fop_rename_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DBT oldname; + DBT newname; + DBT fileid; + u_int32_t appname; +} __fop_rename_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __fop_rename_42_desc[]; +static inline int __fop_rename_42_read(ENV *env, + void *data, __fop_rename_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __fop_rename_42_desc, sizeof(__fop_rename_42_args), (void**)arg)); +} +extern __DB_IMPORT DB_LOG_RECSPEC __fop_rename_noundo_46_desc[]; +static inline int __fop_rename_noundo_46_read(ENV *env, + void *data, __fop_rename_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __fop_rename_noundo_46_desc, sizeof(__fop_rename_42_args), (void**)arg)); +} +#define DB___fop_rename 146 +#define DB___fop_rename_noundo 150 +typedef struct ___fop_rename_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DBT oldname; + DBT newname; + DBT dirname; + DBT fileid; + u_int32_t appname; +} __fop_rename_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __fop_rename_desc[]; +static inline int +__fop_rename_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + const DBT *oldname, const DBT *newname, const DBT *dirname, const DBT *fileid, u_int32_t appname) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___fop_rename, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + LOG_DBT_SIZE(oldname) + LOG_DBT_SIZE(newname) + LOG_DBT_SIZE(dirname) + + LOG_DBT_SIZE(fileid) + sizeof(u_int32_t), + __fop_rename_desc, + oldname, newname, dirname, fileid, appname)); +} + +static inline int __fop_rename_read(ENV *env, + void *data, __fop_rename_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __fop_rename_desc, sizeof(__fop_rename_args), (void**)arg)); +} +extern __DB_IMPORT DB_LOG_RECSPEC __fop_rename_noundo_desc[]; +static inline int +__fop_rename_noundo_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + const DBT *oldname, const DBT *newname, const DBT *dirname, const DBT *fileid, u_int32_t appname) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___fop_rename_noundo, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + LOG_DBT_SIZE(oldname) + LOG_DBT_SIZE(newname) + LOG_DBT_SIZE(dirname) + + LOG_DBT_SIZE(fileid) + sizeof(u_int32_t), + __fop_rename_noundo_desc, + oldname, newname, dirname, fileid, appname)); +} + +static inline int __fop_rename_noundo_read(ENV *env, + void *data, __fop_rename_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __fop_rename_noundo_desc, sizeof(__fop_rename_args), (void**)arg)); +} +#define DB___fop_file_remove 141 +typedef struct ___fop_file_remove_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DBT real_fid; + DBT tmp_fid; + DBT name; + u_int32_t appname; + u_int32_t child; +} __fop_file_remove_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __fop_file_remove_desc[]; +static inline int +__fop_file_remove_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + const DBT *real_fid, const DBT *tmp_fid, const DBT *name, u_int32_t appname, u_int32_t child) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___fop_file_remove, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + LOG_DBT_SIZE(real_fid) + LOG_DBT_SIZE(tmp_fid) + LOG_DBT_SIZE(name) + + sizeof(u_int32_t) + sizeof(u_int32_t), + __fop_file_remove_desc, + real_fid, tmp_fid, name, appname, child)); +} + +static inline int __fop_file_remove_read(ENV *env, + void *data, __fop_file_remove_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __fop_file_remove_desc, sizeof(__fop_file_remove_args), (void**)arg)); +} +#endif diff --git a/src/dbinc_auto/fileops_ext.h b/src/dbinc_auto/fileops_ext.h new file mode 100644 index 00000000..0aa6c1e1 --- /dev/null +++ b/src/dbinc_auto/fileops_ext.h @@ -0,0 +1,44 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _fileops_ext_h_ +#define _fileops_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __fop_init_recover __P((ENV *, DB_DISTAB *)); +int __fop_create_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_create_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_remove_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_write_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_write_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_rename_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_rename_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_file_remove_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_init_print __P((ENV *, DB_DISTAB *)); +int __fop_create __P((ENV *, DB_TXN *, DB_FH **, const char *, const char **, APPNAME, int, u_int32_t)); +int __fop_remove __P((ENV *, DB_TXN *, u_int8_t *, const char *, const char **, APPNAME, u_int32_t)); +int __fop_write __P((ENV *, DB_TXN *, const char *, const char *, APPNAME, DB_FH *, u_int32_t, db_pgno_t, u_int32_t, void *, u_int32_t, u_int32_t, u_int32_t)); +int __fop_rename __P((ENV *, DB_TXN *, const char *, const char *, const char **, u_int8_t *, APPNAME, int, u_int32_t)); +int __fop_create_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_create_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_remove_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_write_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_write_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_rename_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_rename_noundo_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_rename_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_rename_noundo_46_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_file_remove_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_lock_handle __P((ENV *, DB *, DB_LOCKER *, db_lockmode_t, DB_LOCK *, u_int32_t)); +int __fop_file_setup __P((DB *, DB_THREAD_INFO *ip, DB_TXN *, const char *, int, u_int32_t, u_int32_t *)); +int __fop_subdb_setup __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, const char *, int, u_int32_t)); +int __fop_remove_setup __P((DB *, DB_TXN *, const char *, u_int32_t)); +int __fop_read_meta __P((ENV *, const char *, u_int8_t *, size_t, DB_FH *, int, size_t *)); +int __fop_dummy __P((DB *, DB_TXN *, const char *, const char *)); +int __fop_dbrename __P((DB *, const char *, const char *)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_fileops_ext_h_ */ diff --git a/src/dbinc_auto/hash_auto.h b/src/dbinc_auto/hash_auto.h new file mode 100644 index 00000000..c1dcae91 --- /dev/null +++ b/src/dbinc_auto/hash_auto.h @@ -0,0 +1,484 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef __ham_AUTO_H +#define __ham_AUTO_H +#ifdef HAVE_HASH +#include "dbinc/log.h" +#define DB___ham_insdel 21 +typedef struct ___ham_insdel_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + u_int32_t ndx; + DB_LSN pagelsn; + u_int32_t keytype; + DBT key; + u_int32_t datatype; + DBT data; +} __ham_insdel_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_insdel_desc[]; +static inline int +__ham_insdel_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + u_int32_t opcode, db_pgno_t pgno, u_int32_t ndx, DB_LSN * pagelsn, + u_int32_t keytype, const DBT *key, u_int32_t datatype, const DBT *data) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___ham_insdel, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(*pagelsn) + sizeof(u_int32_t) + + LOG_DBT_SIZE(key) + sizeof(u_int32_t) + LOG_DBT_SIZE(data), + __ham_insdel_desc, + opcode, pgno, ndx, pagelsn, keytype, key, datatype, + data)); +} + +static inline int __ham_insdel_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_insdel_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_insdel_desc, sizeof(__ham_insdel_args), (void**)arg)); +} +#define DB___ham_insdel_42 21 +typedef struct ___ham_insdel_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + u_int32_t ndx; + DB_LSN pagelsn; + DBT key; + DBT data; +} __ham_insdel_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_insdel_42_desc[]; +static inline int __ham_insdel_42_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_insdel_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_insdel_42_desc, sizeof(__ham_insdel_42_args), (void**)arg)); +} +#define DB___ham_newpage 22 +typedef struct ___ham_newpage_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t prev_pgno; + DB_LSN prevlsn; + db_pgno_t new_pgno; + DB_LSN pagelsn; + db_pgno_t next_pgno; + DB_LSN nextlsn; +} __ham_newpage_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_newpage_desc[]; +static inline int +__ham_newpage_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + u_int32_t opcode, db_pgno_t prev_pgno, DB_LSN * prevlsn, db_pgno_t new_pgno, + DB_LSN * pagelsn, db_pgno_t next_pgno, DB_LSN * nextlsn) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___ham_newpage, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(*prevlsn) + sizeof(u_int32_t) + sizeof(*pagelsn) + + sizeof(u_int32_t) + sizeof(*nextlsn), + __ham_newpage_desc, + opcode, prev_pgno, prevlsn, new_pgno, pagelsn, next_pgno, nextlsn)); +} + +static inline int __ham_newpage_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_newpage_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_newpage_desc, sizeof(__ham_newpage_args), (void**)arg)); +} +#define DB___ham_splitdata 24 +typedef struct ___ham_splitdata_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + u_int32_t opcode; + db_pgno_t pgno; + DBT pageimage; + DB_LSN pagelsn; +} __ham_splitdata_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_splitdata_desc[]; +static inline int +__ham_splitdata_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, u_int32_t opcode, db_pgno_t pgno, const DBT *pageimage, DB_LSN * pagelsn) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___ham_splitdata, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + LOG_DBT_SIZE(pageimage) + sizeof(*pagelsn), + __ham_splitdata_desc, opcode, pgno, pageimage, pagelsn)); +} + +static inline int __ham_splitdata_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_splitdata_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_splitdata_desc, sizeof(__ham_splitdata_args), (void**)arg)); +} +#define DB___ham_replace 25 +typedef struct ___ham_replace_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + u_int32_t ndx; + DB_LSN pagelsn; + int32_t off; + u_int32_t oldtype; + DBT olditem; + u_int32_t newtype; + DBT newitem; +} __ham_replace_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_replace_desc[]; +static inline int +__ham_replace_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, u_int32_t ndx, DB_LSN * pagelsn, int32_t off, + u_int32_t oldtype, const DBT *olditem, u_int32_t newtype, const DBT *newitem) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___ham_replace, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(*pagelsn) + sizeof(u_int32_t) + sizeof(u_int32_t) + + LOG_DBT_SIZE(olditem) + sizeof(u_int32_t) + LOG_DBT_SIZE(newitem), + __ham_replace_desc, pgno, ndx, pagelsn, off, oldtype, olditem, newtype, + newitem)); +} + +static inline int __ham_replace_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_replace_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_replace_desc, sizeof(__ham_replace_args), (void**)arg)); +} +#define DB___ham_replace_42 25 +typedef struct ___ham_replace_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + u_int32_t ndx; + DB_LSN pagelsn; + int32_t off; + DBT olditem; + DBT newitem; + u_int32_t makedup; +} __ham_replace_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_replace_42_desc[]; +static inline int __ham_replace_42_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_replace_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_replace_42_desc, sizeof(__ham_replace_42_args), (void**)arg)); +} +#define DB___ham_copypage 28 +typedef struct ___ham_copypage_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN pagelsn; + db_pgno_t next_pgno; + DB_LSN nextlsn; + db_pgno_t nnext_pgno; + DB_LSN nnextlsn; + DBT page; +} __ham_copypage_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_copypage_desc[]; +static inline int +__ham_copypage_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, DB_LSN * pagelsn, db_pgno_t next_pgno, DB_LSN * nextlsn, + db_pgno_t nnext_pgno, DB_LSN * nnextlsn, const DBT *page) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___ham_copypage, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*pagelsn) + + sizeof(u_int32_t) + sizeof(*nextlsn) + sizeof(u_int32_t) + + sizeof(*nnextlsn) + LOG_DBT_SIZE(page), + __ham_copypage_desc, pgno, pagelsn, next_pgno, nextlsn, nnext_pgno, nnextlsn, page)); +} + +static inline int __ham_copypage_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_copypage_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_copypage_desc, sizeof(__ham_copypage_args), (void**)arg)); +} +#define DB___ham_metagroup_42 29 +typedef struct ___ham_metagroup_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + u_int32_t bucket; + db_pgno_t mmpgno; + DB_LSN mmetalsn; + db_pgno_t mpgno; + DB_LSN metalsn; + db_pgno_t pgno; + DB_LSN pagelsn; + u_int32_t newalloc; +} __ham_metagroup_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_metagroup_42_desc[]; +static inline int __ham_metagroup_42_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_metagroup_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_metagroup_42_desc, sizeof(__ham_metagroup_42_args), (void**)arg)); +} +#define DB___ham_metagroup 29 +typedef struct ___ham_metagroup_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + u_int32_t bucket; + db_pgno_t mmpgno; + DB_LSN mmetalsn; + db_pgno_t mpgno; + DB_LSN metalsn; + db_pgno_t pgno; + DB_LSN pagelsn; + u_int32_t newalloc; + db_pgno_t last_pgno; +} __ham_metagroup_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_metagroup_desc[]; +static inline int +__ham_metagroup_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, u_int32_t bucket, db_pgno_t mmpgno, DB_LSN * mmetalsn, db_pgno_t mpgno, + DB_LSN * metalsn, db_pgno_t pgno, DB_LSN * pagelsn, u_int32_t newalloc, db_pgno_t last_pgno) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___ham_metagroup, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(*mmetalsn) + sizeof(u_int32_t) + sizeof(*metalsn) + + sizeof(u_int32_t) + sizeof(*pagelsn) + sizeof(u_int32_t) + + sizeof(u_int32_t), + __ham_metagroup_desc, bucket, mmpgno, mmetalsn, mpgno, metalsn, pgno, pagelsn, + newalloc, last_pgno)); +} + +static inline int __ham_metagroup_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_metagroup_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_metagroup_desc, sizeof(__ham_metagroup_args), (void**)arg)); +} +#define DB___ham_groupalloc_42 32 +typedef struct ___ham_groupalloc_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN meta_lsn; + db_pgno_t start_pgno; + u_int32_t num; + db_pgno_t free; +} __ham_groupalloc_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_groupalloc_42_desc[]; +static inline int __ham_groupalloc_42_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_groupalloc_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_groupalloc_42_desc, sizeof(__ham_groupalloc_42_args), (void**)arg)); +} +#define DB___ham_groupalloc 32 +typedef struct ___ham_groupalloc_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN meta_lsn; + db_pgno_t start_pgno; + u_int32_t num; + db_pgno_t unused; + db_pgno_t last_pgno; +} __ham_groupalloc_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_groupalloc_desc[]; +static inline int +__ham_groupalloc_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, DB_LSN * meta_lsn, db_pgno_t start_pgno, u_int32_t num, db_pgno_t unused, + db_pgno_t last_pgno) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___ham_groupalloc, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(*meta_lsn) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t), + __ham_groupalloc_desc, meta_lsn, start_pgno, num, unused, last_pgno)); +} + +static inline int __ham_groupalloc_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_groupalloc_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_groupalloc_desc, sizeof(__ham_groupalloc_args), (void**)arg)); +} +#define DB___ham_changeslot 35 +typedef struct ___ham_changeslot_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN meta_lsn; + u_int32_t slot; + db_pgno_t old; + db_pgno_t new; +} __ham_changeslot_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_changeslot_desc[]; +static inline int +__ham_changeslot_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, DB_LSN * meta_lsn, u_int32_t slot, db_pgno_t old, db_pgno_t new) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___ham_changeslot, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(*meta_lsn) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t), + __ham_changeslot_desc, meta_lsn, slot, old, new)); +} + +static inline int __ham_changeslot_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_changeslot_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_changeslot_desc, sizeof(__ham_changeslot_args), (void**)arg)); +} +#define DB___ham_contract 37 +typedef struct ___ham_contract_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t meta; + DB_LSN meta_lsn; + u_int32_t bucket; + db_pgno_t pgno; +} __ham_contract_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_contract_desc[]; +static inline int +__ham_contract_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t meta, DB_LSN * meta_lsn, u_int32_t bucket, db_pgno_t pgno) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___ham_contract, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(*meta_lsn) + + sizeof(u_int32_t) + sizeof(u_int32_t), + __ham_contract_desc, meta, meta_lsn, bucket, pgno)); +} + +static inline int __ham_contract_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_contract_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_contract_desc, sizeof(__ham_contract_args), (void**)arg)); +} +#define DB___ham_curadj 33 +typedef struct ___ham_curadj_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + u_int32_t indx; + u_int32_t len; + u_int32_t dup_off; + int add; + int is_dup; + u_int32_t order; +} __ham_curadj_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_curadj_desc[]; +static inline int +__ham_curadj_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, u_int32_t indx, u_int32_t len, u_int32_t dup_off, + int add, int is_dup, u_int32_t order) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___ham_curadj, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t), + __ham_curadj_desc, pgno, indx, len, dup_off, add, is_dup, order)); +} + +static inline int __ham_curadj_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_curadj_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_curadj_desc, sizeof(__ham_curadj_args), (void**)arg)); +} +#define DB___ham_chgpg 34 +typedef struct ___ham_chgpg_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_ham_mode mode; + db_pgno_t old_pgno; + db_pgno_t new_pgno; + u_int32_t old_indx; + u_int32_t new_indx; +} __ham_chgpg_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __ham_chgpg_desc[]; +static inline int +__ham_chgpg_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_ham_mode mode, db_pgno_t old_pgno, db_pgno_t new_pgno, u_int32_t old_indx, + u_int32_t new_indx) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___ham_chgpg, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t), + __ham_chgpg_desc, mode, old_pgno, new_pgno, old_indx, new_indx)); +} + +static inline int __ham_chgpg_read(ENV *env, + DB **dbpp, void *td, void *data, __ham_chgpg_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __ham_chgpg_desc, sizeof(__ham_chgpg_args), (void**)arg)); +} +#endif /* HAVE_HASH */ +#endif diff --git a/src/dbinc_auto/hash_ext.h b/src/dbinc_auto/hash_ext.h new file mode 100644 index 00000000..e83fe817 --- /dev/null +++ b/src/dbinc_auto/hash_ext.h @@ -0,0 +1,129 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _hash_ext_h_ +#define _hash_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __ham_quick_delete __P((DBC *)); +int __hamc_init __P((DBC *)); +int __hamc_count __P((DBC *, db_recno_t *)); +int __hamc_cmp __P((DBC *, DBC *, int *)); +int __hamc_dup __P((DBC *, DBC *)); +int __ham_contract_table __P((DBC *, DB_COMPACT *)); +u_int32_t __ham_call_hash __P((DBC *, u_int8_t *, u_int32_t)); +int __ham_overwrite __P((DBC *, DBT *, u_int32_t)); +int __ham_lookup __P((DBC *, const DBT *, u_int32_t, db_lockmode_t, db_pgno_t *)); +int __ham_init_dbt __P((ENV *, DBT *, u_int32_t, void **, u_int32_t *)); +int __hamc_update __P((DBC *, u_int32_t, db_ham_curadj, int)); +int __ham_get_clist __P((DB *, db_pgno_t, u_int32_t, DBC ***)); +int __ham_init_recover __P((ENV *, DB_DISTAB *)); +int __ham_insdel_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_insdel_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_newpage_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_splitdata_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_replace_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_replace_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_copypage_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_metagroup_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_metagroup_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_groupalloc_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_groupalloc_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_changeslot_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_contract_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_curadj_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_chgpg_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_init_print __P((ENV *, DB_DISTAB *)); +int __ham_compact_int __P((DBC *, DBT *, DBT *, u_int32_t, DB_COMPACT *, int *, u_int32_t)); +int __ham_compact_bucket __P((DBC *, DB_COMPACT *, int *)); +int __ham_compact_hash __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB_COMPACT *)); +int __ham_pgin __P((DB *, db_pgno_t, void *, DBT *)); +int __ham_pgout __P((DB *, db_pgno_t, void *, DBT *)); +int __ham_mswap __P((ENV *, void *)); +int __ham_add_dup __P((DBC *, DBT *, u_int32_t, db_pgno_t *)); +int __ham_dup_convert __P((DBC *)); +int __ham_make_dup __P((ENV *, const DBT *, DBT *d, void **, u_int32_t *)); +void __ham_dsearch __P((DBC *, DBT *, u_int32_t *, int *, u_int32_t)); +u_int32_t __ham_func2 __P((DB *, const void *, u_int32_t)); +u_int32_t __ham_func3 __P((DB *, const void *, u_int32_t)); +u_int32_t __ham_func4 __P((DB *, const void *, u_int32_t)); +u_int32_t __ham_func5 __P((DB *, const void *, u_int32_t)); +u_int32_t __ham_test __P((DB *, const void *, u_int32_t)); +int __ham_get_meta __P((DBC *)); +int __ham_release_meta __P((DBC *)); +int __ham_dirty_meta __P((DBC *, u_int32_t)); +int __ham_return_meta __P((DBC *, u_int32_t, DBMETA **)); +int __ham_db_create __P((DB *)); +int __ham_db_close __P((DB *)); +int __ham_get_h_ffactor __P((DB *, u_int32_t *)); +int __ham_set_h_compare __P((DB *, int (*)(DB *, const DBT *, const DBT *))); +int __ham_get_h_nelem __P((DB *, u_int32_t *)); +void __ham_copy_config __P((DB *, DB*, u_int32_t)); +int __ham_open __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char * name, db_pgno_t, u_int32_t)); +int __ham_metachk __P((DB *, const char *, HMETA *)); +int __ham_new_file __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *)); +int __ham_new_subdb __P((DB *, DB *, DB_THREAD_INFO *, DB_TXN *)); +int __ham_item __P((DBC *, db_lockmode_t, db_pgno_t *)); +int __ham_item_reset __P((DBC *)); +int __ham_item_init __P((DBC *)); +int __ham_item_last __P((DBC *, db_lockmode_t, db_pgno_t *)); +int __ham_item_first __P((DBC *, db_lockmode_t, db_pgno_t *)); +int __ham_item_prev __P((DBC *, db_lockmode_t, db_pgno_t *)); +int __ham_item_next __P((DBC *, db_lockmode_t, db_pgno_t *)); +int __ham_insertpair __P((DBC *, PAGE *p, db_indx_t *indxp, const DBT *, const DBT *, u_int32_t, u_int32_t)); +int __ham_getindex __P((DBC *, PAGE *, const DBT *, u_int32_t, int *, db_indx_t *)); +int __ham_verify_sorted_page __P((DBC *, PAGE *)); +int __ham_sort_page_cursor __P((DBC *, PAGE *)); +int __ham_sort_page __P((DBC *, PAGE **, PAGE *)); +int __ham_del_pair __P((DBC *, int, PAGE *)); +int __ham_replpair __P((DBC *, DBT *, u_int32_t)); +void __ham_onpage_replace __P((DB *, PAGE *, u_int32_t, int32_t, u_int32_t, int, DBT *)); +int __ham_merge_pages __P((DBC *, u_int32_t, u_int32_t, DB_COMPACT *)); +int __ham_split_page __P((DBC *, u_int32_t, u_int32_t)); +int __ham_add_el __P((DBC *, const DBT *, const DBT *, u_int32_t)); +int __ham_copypair __P((DBC *, PAGE *, u_int32_t, PAGE *, db_indx_t *, int)); +int __ham_add_ovflpage __P((DBC *, PAGE **)); +int __ham_get_cpage __P((DBC *, db_lockmode_t)); +int __ham_next_cpage __P((DBC *, db_pgno_t)); +int __ham_lock_bucket __P((DBC *, db_lockmode_t)); +void __ham_dpair __P((DB *, PAGE *, u_int32_t)); +int __ham_insdel_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_insdel_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_newpage_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_replace_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_replace_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_splitdata_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_copypage_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_metagroup_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_contract_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_groupalloc_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_changeslot_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_curadj_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_chgpg_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_metagroup_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_groupalloc_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_reclaim __P((DB *, DB_THREAD_INFO *, DB_TXN *txn, u_int32_t)); +int __ham_truncate __P((DBC *, u_int32_t *)); +int __ham_stat __P((DBC *, void *, u_int32_t)); +int __ham_stat_print __P((DBC *, u_int32_t)); +void __ham_print_cursor __P((DBC *)); +int __ham_traverse __P((DBC *, db_lockmode_t, int (*)(DBC *, PAGE *, void *, int *), void *, int)); +int __db_no_hash_am __P((ENV *)); +int __ham_30_hashmeta __P((DB *, char *, u_int8_t *)); +int __ham_30_sizefix __P((DB *, DB_FH *, char *, u_int8_t *)); +int __ham_31_hashmeta __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); +int __ham_31_hash __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); +int __ham_46_hashmeta __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); +int __ham_46_hash __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); +int __ham_vrfy_meta __P((DB *, VRFY_DBINFO *, HMETA *, db_pgno_t, u_int32_t)); +int __ham_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); +int __ham_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t)); +int __ham_vrfy_hashing __P((DBC *, u_int32_t, HMETA *, u_int32_t, db_pgno_t, u_int32_t, u_int32_t (*) __P((DB *, const void *, u_int32_t)))); +int __ham_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, void *, int (*)(void *, const void *), u_int32_t)); +int __ham_meta2pgset __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, DB *)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_hash_ext_h_ */ diff --git a/src/dbinc_auto/heap_auto.h b/src/dbinc_auto/heap_auto.h new file mode 100644 index 00000000..bf288627 --- /dev/null +++ b/src/dbinc_auto/heap_auto.h @@ -0,0 +1,146 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef __heap_AUTO_H +#define __heap_AUTO_H +#ifdef HAVE_HEAP +#include "dbinc/log.h" +#define DB___heap_addrem 151 +typedef struct ___heap_addrem_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + u_int32_t indx; + u_int32_t nbytes; + DBT hdr; + DBT dbt; + DB_LSN pagelsn; +} __heap_addrem_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __heap_addrem_desc[]; +static inline int +__heap_addrem_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + u_int32_t opcode, db_pgno_t pgno, u_int32_t indx, u_int32_t nbytes, + const DBT *hdr, const DBT *dbt, DB_LSN * pagelsn) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___heap_addrem, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(hdr) + + LOG_DBT_SIZE(dbt) + sizeof(*pagelsn), + __heap_addrem_desc, + opcode, pgno, indx, nbytes, hdr, dbt, pagelsn)); +} + +static inline int __heap_addrem_read(ENV *env, + DB **dbpp, void *td, void *data, __heap_addrem_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __heap_addrem_desc, sizeof(__heap_addrem_args), (void**)arg)); +} +#define DB___heap_pg_alloc 152 +typedef struct ___heap_pg_alloc_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN meta_lsn; + db_pgno_t meta_pgno; + db_pgno_t pgno; + u_int32_t ptype; + db_pgno_t last_pgno; +} __heap_pg_alloc_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __heap_pg_alloc_desc[]; +static inline int +__heap_pg_alloc_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, DB_LSN * meta_lsn, db_pgno_t meta_pgno, db_pgno_t pgno, u_int32_t ptype, + db_pgno_t last_pgno) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___heap_pg_alloc, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(*meta_lsn) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t), + __heap_pg_alloc_desc, meta_lsn, meta_pgno, pgno, ptype, last_pgno)); +} + +static inline int __heap_pg_alloc_read(ENV *env, + DB **dbpp, void *td, void *data, __heap_pg_alloc_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __heap_pg_alloc_desc, sizeof(__heap_pg_alloc_args), (void**)arg)); +} +#define DB___heap_trunc_meta 153 +typedef struct ___heap_trunc_meta_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + u_int32_t last_pgno; + u_int32_t key_count; + u_int32_t record_count; + u_int32_t curregion; + u_int32_t nregions; + DB_LSN pagelsn; +} __heap_trunc_meta_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __heap_trunc_meta_desc[]; +static inline int +__heap_trunc_meta_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, u_int32_t last_pgno, u_int32_t key_count, u_int32_t record_count, + u_int32_t curregion, u_int32_t nregions, DB_LSN * pagelsn) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___heap_trunc_meta, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(*pagelsn), + __heap_trunc_meta_desc, pgno, last_pgno, key_count, record_count, curregion, nregions, pagelsn)); +} + +static inline int __heap_trunc_meta_read(ENV *env, + DB **dbpp, void *td, void *data, __heap_trunc_meta_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __heap_trunc_meta_desc, sizeof(__heap_trunc_meta_args), (void**)arg)); +} +#define DB___heap_trunc_page 154 +typedef struct ___heap_trunc_page_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DBT old_data; + u_int32_t is_region; + DB_LSN pagelsn; +} __heap_trunc_page_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __heap_trunc_page_desc[]; +static inline int +__heap_trunc_page_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_pgno_t pgno, const DBT *old_data, u_int32_t is_region, DB_LSN * pagelsn) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___heap_trunc_page, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(old_data) + + sizeof(u_int32_t) + sizeof(*pagelsn), + __heap_trunc_page_desc, pgno, old_data, is_region, pagelsn)); +} + +static inline int __heap_trunc_page_read(ENV *env, + DB **dbpp, void *td, void *data, __heap_trunc_page_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __heap_trunc_page_desc, sizeof(__heap_trunc_page_args), (void**)arg)); +} +#endif /* HAVE_HEAP */ +#endif diff --git a/src/dbinc_auto/heap_ext.h b/src/dbinc_auto/heap_ext.h new file mode 100644 index 00000000..0fe8f21b --- /dev/null +++ b/src/dbinc_auto/heap_ext.h @@ -0,0 +1,55 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _heap_ext_h_ +#define _heap_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __heapc_init __P((DBC *)); +int __heap_ditem __P((DBC *, PAGE *, u_int32_t, u_int32_t)); +int __heap_append __P((DBC *, DBT *, DBT *)); +int __heap_pitem __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); +int __heapc_dup __P((DBC *, DBC *)); +int __heapc_gsplit __P((DBC *, DBT *, void **, u_int32_t *)); +int __heapc_refresh __P((DBC *)); +int __heap_init_recover __P((ENV *, DB_DISTAB *)); +int __heap_addrem_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __heap_pg_alloc_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __heap_trunc_meta_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __heap_trunc_page_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __heap_init_print __P((ENV *, DB_DISTAB *)); +int __heap_pgin __P((DB *, db_pgno_t, void *, DBT *)); +int __heap_pgout __P((DB *, db_pgno_t, void *, DBT *)); +int __heap_mswap __P((ENV *, PAGE *)); +int __heap_db_create __P((DB *)); +int __heap_db_close __P((DB *)); +int __heap_get_heapsize __P((DB *, u_int32_t *, u_int32_t *)); +int __heap_set_heapsize __P((DB *, u_int32_t, u_int32_t, u_int32_t)); +int __heap_exist __P((void)); +int __heap_open __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, db_pgno_t, u_int32_t)); +int __heap_metachk __P((DB *, const char *, HEAPMETA *)); +int __heap_read_meta __P((DB *, DB_THREAD_INFO *, DB_TXN *, db_pgno_t, u_int32_t)); +int __heap_new_file __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *)); +int __heap_create_region __P((DBC *, db_pgno_t)); +int __heap_addrem_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __heap_pg_alloc_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __heap_trunc_meta_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __heap_trunc_page_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __heap_truncate __P((DBC *, u_int32_t *)); +int __heap_stat __P((DBC *, void *, u_int32_t)); +int __heap_stat_print __P((DBC *, u_int32_t)); +void __heap_print_cursor __P((DBC *)); +int __heap_stat_callback __P((DBC *, PAGE *, void *, int *)); +int __heap_traverse __P((DBC *, int (*)(DBC *, PAGE *, void *, int *), void *)); +int __db_no_heap_am __P((ENV *)); +int __heap_vrfy_meta __P((DB *, VRFY_DBINFO *, HEAPMETA *, db_pgno_t, u_int32_t)); +int __heap_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); +int __heap_vrfy_structure __P((DB *, VRFY_DBINFO *, u_int32_t)); +int __heap_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, void *, int (*)(void *, const void *), u_int32_t)); +int __heap_meta2pgset __P((DB *, VRFY_DBINFO *, HEAPMETA *, DB *)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_heap_ext_h_ */ diff --git a/src/dbinc_auto/hmac_ext.h b/src/dbinc_auto/hmac_ext.h new file mode 100644 index 00000000..c1371014 --- /dev/null +++ b/src/dbinc_auto/hmac_ext.h @@ -0,0 +1,20 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _hmac_ext_h_ +#define _hmac_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +void __db_chksum __P((void *, u_int8_t *, size_t, u_int8_t *, u_int8_t *)); +void __db_derive_mac __P((u_int8_t *, size_t, u_int8_t *)); +int __db_check_chksum __P((ENV *, void *, DB_CIPHER *, u_int8_t *, void *, size_t, int)); +void __db_SHA1Transform __P((u_int32_t *, unsigned char *)); +void __db_SHA1Init __P((SHA1_CTX *)); +void __db_SHA1Update __P((SHA1_CTX *, unsigned char *, size_t)); +void __db_SHA1Final __P((unsigned char *, SHA1_CTX *)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_hmac_ext_h_ */ diff --git a/src/dbinc_auto/int_def.in b/src/dbinc_auto/int_def.in new file mode 100644 index 00000000..98ab0008 --- /dev/null +++ b/src/dbinc_auto/int_def.in @@ -0,0 +1,2239 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _DB_INT_DEF_IN_ +#define _DB_INT_DEF_IN_ + +#define __crdel_metasub_desc __crdel_metasub_desc@DB_VERSION_UNIQUE_NAME@ +#define __crdel_inmem_create_desc __crdel_inmem_create_desc@DB_VERSION_UNIQUE_NAME@ +#define __crdel_inmem_rename_desc __crdel_inmem_rename_desc@DB_VERSION_UNIQUE_NAME@ +#define __crdel_inmem_remove_desc __crdel_inmem_remove_desc@DB_VERSION_UNIQUE_NAME@ +#define __crdel_init_recover __crdel_init_recover@DB_VERSION_UNIQUE_NAME@ +#define __crdel_metasub_print __crdel_metasub_print@DB_VERSION_UNIQUE_NAME@ +#define __crdel_inmem_create_print __crdel_inmem_create_print@DB_VERSION_UNIQUE_NAME@ +#define __crdel_inmem_rename_print __crdel_inmem_rename_print@DB_VERSION_UNIQUE_NAME@ +#define __crdel_inmem_remove_print __crdel_inmem_remove_print@DB_VERSION_UNIQUE_NAME@ +#define __crdel_init_print __crdel_init_print@DB_VERSION_UNIQUE_NAME@ +#define __crdel_metasub_recover __crdel_metasub_recover@DB_VERSION_UNIQUE_NAME@ +#define __crdel_inmem_create_recover __crdel_inmem_create_recover@DB_VERSION_UNIQUE_NAME@ +#define __crdel_inmem_rename_recover __crdel_inmem_rename_recover@DB_VERSION_UNIQUE_NAME@ +#define __crdel_inmem_remove_recover __crdel_inmem_remove_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_master_open __db_master_open@DB_VERSION_UNIQUE_NAME@ +#define __db_master_update __db_master_update@DB_VERSION_UNIQUE_NAME@ +#define __env_dbreg_setup __env_dbreg_setup@DB_VERSION_UNIQUE_NAME@ +#define __env_setup __env_setup@DB_VERSION_UNIQUE_NAME@ +#define __env_mpool __env_mpool@DB_VERSION_UNIQUE_NAME@ +#define __db_close __db_close@DB_VERSION_UNIQUE_NAME@ +#define __db_refresh __db_refresh@DB_VERSION_UNIQUE_NAME@ +#define __db_log_page __db_log_page@DB_VERSION_UNIQUE_NAME@ +#define __db_walk_cursors __db_walk_cursors@DB_VERSION_UNIQUE_NAME@ +#define __db_backup_name __db_backup_name@DB_VERSION_UNIQUE_NAME@ +#ifdef CONFIG_TEST +#define __db_testcopy __db_testcopy@DB_VERSION_UNIQUE_NAME@ +#endif +#define __db_testdocopy __db_testdocopy@DB_VERSION_UNIQUE_NAME@ +#define __db_cursor_int __db_cursor_int@DB_VERSION_UNIQUE_NAME@ +#define __db_put __db_put@DB_VERSION_UNIQUE_NAME@ +#define __db_del __db_del@DB_VERSION_UNIQUE_NAME@ +#define __db_sync __db_sync@DB_VERSION_UNIQUE_NAME@ +#define __db_associate __db_associate@DB_VERSION_UNIQUE_NAME@ +#define __db_secondary_close __db_secondary_close@DB_VERSION_UNIQUE_NAME@ +#define __db_associate_foreign __db_associate_foreign@DB_VERSION_UNIQUE_NAME@ +#define __db_addrem_desc __db_addrem_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_addrem_42_desc __db_addrem_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_big_desc __db_big_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_big_42_desc __db_big_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_ovref_desc __db_ovref_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_relink_42_desc __db_relink_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_debug_desc __db_debug_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_noop_desc __db_noop_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_alloc_42_desc __db_pg_alloc_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_alloc_desc __db_pg_alloc_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_free_42_desc __db_pg_free_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_free_desc __db_pg_free_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_cksum_desc __db_cksum_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_freedata_42_desc __db_pg_freedata_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_freedata_desc __db_pg_freedata_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_init_desc __db_pg_init_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_sort_44_desc __db_pg_sort_44_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_trunc_desc __db_pg_trunc_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_realloc_desc __db_realloc_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_relink_desc __db_relink_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_merge_desc __db_merge_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_pgno_desc __db_pgno_desc@DB_VERSION_UNIQUE_NAME@ +#define __db_init_recover __db_init_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_addrem_print __db_addrem_print@DB_VERSION_UNIQUE_NAME@ +#define __db_addrem_42_print __db_addrem_42_print@DB_VERSION_UNIQUE_NAME@ +#define __db_big_print __db_big_print@DB_VERSION_UNIQUE_NAME@ +#define __db_big_42_print __db_big_42_print@DB_VERSION_UNIQUE_NAME@ +#define __db_ovref_print __db_ovref_print@DB_VERSION_UNIQUE_NAME@ +#define __db_relink_42_print __db_relink_42_print@DB_VERSION_UNIQUE_NAME@ +#define __db_debug_print __db_debug_print@DB_VERSION_UNIQUE_NAME@ +#define __db_noop_print __db_noop_print@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_alloc_42_print __db_pg_alloc_42_print@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_alloc_print __db_pg_alloc_print@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_free_42_print __db_pg_free_42_print@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_free_print __db_pg_free_print@DB_VERSION_UNIQUE_NAME@ +#define __db_cksum_print __db_cksum_print@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_freedata_42_print __db_pg_freedata_42_print@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_freedata_print __db_pg_freedata_print@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_init_print __db_pg_init_print@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_sort_44_print __db_pg_sort_44_print@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_trunc_print __db_pg_trunc_print@DB_VERSION_UNIQUE_NAME@ +#define __db_realloc_print __db_realloc_print@DB_VERSION_UNIQUE_NAME@ +#define __db_relink_print __db_relink_print@DB_VERSION_UNIQUE_NAME@ +#define __db_merge_print __db_merge_print@DB_VERSION_UNIQUE_NAME@ +#define __db_pgno_print __db_pgno_print@DB_VERSION_UNIQUE_NAME@ +#define __db_init_print __db_init_print@DB_VERSION_UNIQUE_NAME@ +#define __dbc_close __dbc_close@DB_VERSION_UNIQUE_NAME@ +#define __dbc_destroy __dbc_destroy@DB_VERSION_UNIQUE_NAME@ +#define __dbc_cmp __dbc_cmp@DB_VERSION_UNIQUE_NAME@ +#define __dbc_count __dbc_count@DB_VERSION_UNIQUE_NAME@ +#define __dbc_del __dbc_del@DB_VERSION_UNIQUE_NAME@ +#define __dbc_idel __dbc_idel@DB_VERSION_UNIQUE_NAME@ +#ifdef HAVE_COMPRESSION +#define __dbc_bulk_del __dbc_bulk_del@DB_VERSION_UNIQUE_NAME@ +#endif +#define __dbc_dup __dbc_dup@DB_VERSION_UNIQUE_NAME@ +#define __dbc_idup __dbc_idup@DB_VERSION_UNIQUE_NAME@ +#define __dbc_newopd __dbc_newopd@DB_VERSION_UNIQUE_NAME@ +#define __dbc_get __dbc_get@DB_VERSION_UNIQUE_NAME@ +#define __dbc_iget __dbc_iget@DB_VERSION_UNIQUE_NAME@ +#define __dbc_put __dbc_put@DB_VERSION_UNIQUE_NAME@ +#define __dbc_iput __dbc_iput@DB_VERSION_UNIQUE_NAME@ +#define __db_duperr __db_duperr@DB_VERSION_UNIQUE_NAME@ +#define __dbc_cleanup __dbc_cleanup@DB_VERSION_UNIQUE_NAME@ +#define __dbc_secondary_get_pp __dbc_secondary_get_pp@DB_VERSION_UNIQUE_NAME@ +#define __dbc_pget __dbc_pget@DB_VERSION_UNIQUE_NAME@ +#define __dbc_del_primary __dbc_del_primary@DB_VERSION_UNIQUE_NAME@ +#define __db_s_first __db_s_first@DB_VERSION_UNIQUE_NAME@ +#define __db_s_next __db_s_next@DB_VERSION_UNIQUE_NAME@ +#define __db_s_done __db_s_done@DB_VERSION_UNIQUE_NAME@ +#define __db_buildpartial __db_buildpartial@DB_VERSION_UNIQUE_NAME@ +#define __db_partsize __db_partsize@DB_VERSION_UNIQUE_NAME@ +#ifdef DIAGNOSTIC +#define __db_check_skeyset __db_check_skeyset@DB_VERSION_UNIQUE_NAME@ +#endif +#define __cdsgroup_begin __cdsgroup_begin@DB_VERSION_UNIQUE_NAME@ +#define __cdsgroup_begin_pp __cdsgroup_begin_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_compact_int __db_compact_int@DB_VERSION_UNIQUE_NAME@ +#define __db_exchange_page __db_exchange_page@DB_VERSION_UNIQUE_NAME@ +#define __db_truncate_overflow __db_truncate_overflow@DB_VERSION_UNIQUE_NAME@ +#define __db_truncate_root __db_truncate_root@DB_VERSION_UNIQUE_NAME@ +#define __db_find_free __db_find_free@DB_VERSION_UNIQUE_NAME@ +#define __db_relink __db_relink@DB_VERSION_UNIQUE_NAME@ +#define __db_move_metadata __db_move_metadata@DB_VERSION_UNIQUE_NAME@ +#define __db_pgin __db_pgin@DB_VERSION_UNIQUE_NAME@ +#define __db_pgout __db_pgout@DB_VERSION_UNIQUE_NAME@ +#define __db_decrypt_pg __db_decrypt_pg@DB_VERSION_UNIQUE_NAME@ +#define __db_encrypt_and_checksum_pg __db_encrypt_and_checksum_pg@DB_VERSION_UNIQUE_NAME@ +#define __db_metaswap __db_metaswap@DB_VERSION_UNIQUE_NAME@ +#define __db_byteswap __db_byteswap@DB_VERSION_UNIQUE_NAME@ +#define __db_pageswap __db_pageswap@DB_VERSION_UNIQUE_NAME@ +#define __db_recordswap __db_recordswap@DB_VERSION_UNIQUE_NAME@ +#define __db_dispatch __db_dispatch@DB_VERSION_UNIQUE_NAME@ +#define __db_add_recovery __db_add_recovery@DB_VERSION_UNIQUE_NAME@ +#define __db_add_recovery_int __db_add_recovery_int@DB_VERSION_UNIQUE_NAME@ +#define __db_txnlist_init __db_txnlist_init@DB_VERSION_UNIQUE_NAME@ +#define __db_txnlist_add __db_txnlist_add@DB_VERSION_UNIQUE_NAME@ +#define __db_txnlist_remove __db_txnlist_remove@DB_VERSION_UNIQUE_NAME@ +#define __db_txnlist_ckp __db_txnlist_ckp@DB_VERSION_UNIQUE_NAME@ +#define __db_txnlist_end __db_txnlist_end@DB_VERSION_UNIQUE_NAME@ +#define __db_txnlist_find __db_txnlist_find@DB_VERSION_UNIQUE_NAME@ +#define __db_txnlist_update __db_txnlist_update@DB_VERSION_UNIQUE_NAME@ +#define __db_txnlist_gen __db_txnlist_gen@DB_VERSION_UNIQUE_NAME@ +#define __db_txnlist_lsnadd __db_txnlist_lsnadd@DB_VERSION_UNIQUE_NAME@ +#define __db_txnlist_lsnget __db_txnlist_lsnget@DB_VERSION_UNIQUE_NAME@ +#define __db_txnlist_lsninit __db_txnlist_lsninit@DB_VERSION_UNIQUE_NAME@ +#define __db_txnlist_print __db_txnlist_print@DB_VERSION_UNIQUE_NAME@ +#define __db_ditem_nolog __db_ditem_nolog@DB_VERSION_UNIQUE_NAME@ +#define __db_ditem __db_ditem@DB_VERSION_UNIQUE_NAME@ +#define __db_pitem_nolog __db_pitem_nolog@DB_VERSION_UNIQUE_NAME@ +#define __db_pitem __db_pitem@DB_VERSION_UNIQUE_NAME@ +#define __db_associate_pp __db_associate_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_close_pp __db_close_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_cursor_pp __db_cursor_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_cursor __db_cursor@DB_VERSION_UNIQUE_NAME@ +#define __db_del_pp __db_del_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_exists __db_exists@DB_VERSION_UNIQUE_NAME@ +#define __db_fd_pp __db_fd_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_get_pp __db_get_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_get __db_get@DB_VERSION_UNIQUE_NAME@ +#define __db_join_pp __db_join_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_key_range_pp __db_key_range_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_open_pp __db_open_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_pget_pp __db_pget_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_pget __db_pget@DB_VERSION_UNIQUE_NAME@ +#define __db_put_pp __db_put_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_compact_pp __db_compact_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_associate_foreign_pp __db_associate_foreign_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_sync_pp __db_sync_pp@DB_VERSION_UNIQUE_NAME@ +#define __dbc_close_pp __dbc_close_pp@DB_VERSION_UNIQUE_NAME@ +#define __dbc_cmp_pp __dbc_cmp_pp@DB_VERSION_UNIQUE_NAME@ +#define __dbc_count_pp __dbc_count_pp@DB_VERSION_UNIQUE_NAME@ +#define __dbc_del_pp __dbc_del_pp@DB_VERSION_UNIQUE_NAME@ +#define __dbc_dup_pp __dbc_dup_pp@DB_VERSION_UNIQUE_NAME@ +#define __dbc_get_pp __dbc_get_pp@DB_VERSION_UNIQUE_NAME@ +#define __dbc_get_arg __dbc_get_arg@DB_VERSION_UNIQUE_NAME@ +#define __db_secondary_close_pp __db_secondary_close_pp@DB_VERSION_UNIQUE_NAME@ +#define __dbc_pget_pp __dbc_pget_pp@DB_VERSION_UNIQUE_NAME@ +#define __dbc_put_pp __dbc_put_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_txn_auto_init __db_txn_auto_init@DB_VERSION_UNIQUE_NAME@ +#define __db_txn_auto_resolve __db_txn_auto_resolve@DB_VERSION_UNIQUE_NAME@ +#define __db_join __db_join@DB_VERSION_UNIQUE_NAME@ +#define __db_join_close __db_join_close@DB_VERSION_UNIQUE_NAME@ +#define __db_secondary_corrupt __db_secondary_corrupt@DB_VERSION_UNIQUE_NAME@ +#define __db_new __db_new@DB_VERSION_UNIQUE_NAME@ +#define __db_free __db_free@DB_VERSION_UNIQUE_NAME@ +#ifdef HAVE_FTRUNCATE +#define __db_freelist_pos __db_freelist_pos@DB_VERSION_UNIQUE_NAME@ +#endif +#define __db_freelist_sort __db_freelist_sort@DB_VERSION_UNIQUE_NAME@ +#ifdef HAVE_FTRUNCATE +#define __db_pg_truncate __db_pg_truncate@DB_VERSION_UNIQUE_NAME@ +#endif +#ifdef HAVE_FTRUNCATE +#define __db_free_truncate __db_free_truncate@DB_VERSION_UNIQUE_NAME@ +#endif +#define __db_lprint __db_lprint@DB_VERSION_UNIQUE_NAME@ +#define __db_lget __db_lget@DB_VERSION_UNIQUE_NAME@ +#ifdef DIAGNOSTIC +#define __db_haslock __db_haslock@DB_VERSION_UNIQUE_NAME@ +#endif +#ifdef DIAGNOSTIC +#define __db_has_pagelock __db_has_pagelock@DB_VERSION_UNIQUE_NAME@ +#endif +#define __db_lput __db_lput@DB_VERSION_UNIQUE_NAME@ +#define __db_create_internal __db_create_internal@DB_VERSION_UNIQUE_NAME@ +#define __dbh_am_chk __dbh_am_chk@DB_VERSION_UNIQUE_NAME@ +#define __db_get_flags __db_get_flags@DB_VERSION_UNIQUE_NAME@ +#define __db_set_flags __db_set_flags@DB_VERSION_UNIQUE_NAME@ +#define __db_get_lorder __db_get_lorder@DB_VERSION_UNIQUE_NAME@ +#define __db_set_lorder __db_set_lorder@DB_VERSION_UNIQUE_NAME@ +#define __db_set_pagesize __db_set_pagesize@DB_VERSION_UNIQUE_NAME@ +#define __db_open __db_open@DB_VERSION_UNIQUE_NAME@ +#define __db_get_open_flags __db_get_open_flags@DB_VERSION_UNIQUE_NAME@ +#define __db_new_file __db_new_file@DB_VERSION_UNIQUE_NAME@ +#define __db_init_subdb __db_init_subdb@DB_VERSION_UNIQUE_NAME@ +#define __db_chk_meta __db_chk_meta@DB_VERSION_UNIQUE_NAME@ +#define __db_meta_setup __db_meta_setup@DB_VERSION_UNIQUE_NAME@ +#define __db_reopen __db_reopen@DB_VERSION_UNIQUE_NAME@ +#define __db_goff __db_goff@DB_VERSION_UNIQUE_NAME@ +#define __db_poff __db_poff@DB_VERSION_UNIQUE_NAME@ +#define __db_ovref __db_ovref@DB_VERSION_UNIQUE_NAME@ +#define __db_doff __db_doff@DB_VERSION_UNIQUE_NAME@ +#define __db_moff __db_moff@DB_VERSION_UNIQUE_NAME@ +#define __db_coff __db_coff@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_overflow __db_vrfy_overflow@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_ovfl_structure __db_vrfy_ovfl_structure@DB_VERSION_UNIQUE_NAME@ +#define __db_safe_goff __db_safe_goff@DB_VERSION_UNIQUE_NAME@ +#define __db_loadme __db_loadme@DB_VERSION_UNIQUE_NAME@ +#define __db_dumptree __db_dumptree@DB_VERSION_UNIQUE_NAME@ +#define __db_get_flags_fn __db_get_flags_fn@DB_VERSION_UNIQUE_NAME@ +#define __db_prnpage __db_prnpage@DB_VERSION_UNIQUE_NAME@ +#define __db_prpage __db_prpage@DB_VERSION_UNIQUE_NAME@ +#define __db_lockmode_to_string __db_lockmode_to_string@DB_VERSION_UNIQUE_NAME@ +#define __db_dumptree __db_dumptree@DB_VERSION_UNIQUE_NAME@ +#define __db_get_flags_fn __db_get_flags_fn@DB_VERSION_UNIQUE_NAME@ +#define __db_prpage_int __db_prpage_int@DB_VERSION_UNIQUE_NAME@ +#define __db_prbytes __db_prbytes@DB_VERSION_UNIQUE_NAME@ +#define __db_prflags __db_prflags@DB_VERSION_UNIQUE_NAME@ +#define __db_pagetype_to_string __db_pagetype_to_string@DB_VERSION_UNIQUE_NAME@ +#define __db_dump_pp __db_dump_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_dump __db_dump@DB_VERSION_UNIQUE_NAME@ +#define __db_prdbt __db_prdbt@DB_VERSION_UNIQUE_NAME@ +#define __db_prheader __db_prheader@DB_VERSION_UNIQUE_NAME@ +#define __db_prfooter __db_prfooter@DB_VERSION_UNIQUE_NAME@ +#define __db_pr_callback __db_pr_callback@DB_VERSION_UNIQUE_NAME@ +#define __db_dbtype_to_string __db_dbtype_to_string@DB_VERSION_UNIQUE_NAME@ +#define __db_addrem_recover __db_addrem_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_addrem_42_recover __db_addrem_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_big_recover __db_big_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_big_42_recover __db_big_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_ovref_recover __db_ovref_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_debug_recover __db_debug_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_noop_recover __db_noop_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_alloc_recover __db_pg_alloc_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_free_recover __db_pg_free_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_freedata_recover __db_pg_freedata_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_cksum_recover __db_cksum_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_init_recover __db_pg_init_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_trunc_recover __db_pg_trunc_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_realloc_recover __db_realloc_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_sort_44_recover __db_pg_sort_44_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_alloc_42_recover __db_pg_alloc_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_free_42_recover __db_pg_free_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_freedata_42_recover __db_pg_freedata_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_relink_42_recover __db_relink_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_relink_recover __db_relink_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_merge_recover __db_merge_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_pgno_recover __db_pgno_recover@DB_VERSION_UNIQUE_NAME@ +#define __db_pglist_swap __db_pglist_swap@DB_VERSION_UNIQUE_NAME@ +#define __db_pglist_print __db_pglist_print@DB_VERSION_UNIQUE_NAME@ +#define __db_traverse_big __db_traverse_big@DB_VERSION_UNIQUE_NAME@ +#define __db_reclaim_callback __db_reclaim_callback@DB_VERSION_UNIQUE_NAME@ +#define __db_truncate_callback __db_truncate_callback@DB_VERSION_UNIQUE_NAME@ +#define __env_dbremove_pp __env_dbremove_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_remove_pp __db_remove_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_remove __db_remove@DB_VERSION_UNIQUE_NAME@ +#define __db_remove_int __db_remove_int@DB_VERSION_UNIQUE_NAME@ +#define __db_inmem_remove __db_inmem_remove@DB_VERSION_UNIQUE_NAME@ +#define __env_dbrename_pp __env_dbrename_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_rename_pp __db_rename_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_rename_int __db_rename_int@DB_VERSION_UNIQUE_NAME@ +#define __db_ret __db_ret@DB_VERSION_UNIQUE_NAME@ +#define __db_retcopy __db_retcopy@DB_VERSION_UNIQUE_NAME@ +#define __env_fileid_reset_pp __env_fileid_reset_pp@DB_VERSION_UNIQUE_NAME@ +#define __env_fileid_reset __env_fileid_reset@DB_VERSION_UNIQUE_NAME@ +#define __env_lsn_reset_pp __env_lsn_reset_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_lsn_reset __db_lsn_reset@DB_VERSION_UNIQUE_NAME@ +#define __db_compare_both __db_compare_both@DB_VERSION_UNIQUE_NAME@ +#define __db_sort_multiple __db_sort_multiple@DB_VERSION_UNIQUE_NAME@ +#define __db_stat_pp __db_stat_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_stat_print_pp __db_stat_print_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_stat_print __db_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __db_truncate_pp __db_truncate_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_truncate __db_truncate@DB_VERSION_UNIQUE_NAME@ +#define __db_upgrade_pp __db_upgrade_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_upgrade __db_upgrade@DB_VERSION_UNIQUE_NAME@ +#define __db_lastpgno __db_lastpgno@DB_VERSION_UNIQUE_NAME@ +#define __db_31_offdup __db_31_offdup@DB_VERSION_UNIQUE_NAME@ +#define __db_verify_pp __db_verify_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_verify_internal __db_verify_internal@DB_VERSION_UNIQUE_NAME@ +#define __db_verify __db_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_common __db_vrfy_common@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_datapage __db_vrfy_datapage@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_meta __db_vrfy_meta@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_struct_feedback __db_vrfy_struct_feedback@DB_VERSION_UNIQUE_NAME@ +#define __db_salvage_pg __db_salvage_pg@DB_VERSION_UNIQUE_NAME@ +#define __db_salvage_leaf __db_salvage_leaf@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_inpitem __db_vrfy_inpitem@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_duptype __db_vrfy_duptype@DB_VERSION_UNIQUE_NAME@ +#define __db_salvage_duptree __db_salvage_duptree@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_dbinfo_create __db_vrfy_dbinfo_create@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_dbinfo_destroy __db_vrfy_dbinfo_destroy@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_getpageinfo __db_vrfy_getpageinfo@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_putpageinfo __db_vrfy_putpageinfo@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_pgset __db_vrfy_pgset@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_pgset_get __db_vrfy_pgset_get@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_pgset_inc __db_vrfy_pgset_inc@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_pgset_next __db_vrfy_pgset_next@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_childcursor __db_vrfy_childcursor@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_childput __db_vrfy_childput@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_ccset __db_vrfy_ccset@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_ccnext __db_vrfy_ccnext@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_ccclose __db_vrfy_ccclose@DB_VERSION_UNIQUE_NAME@ +#define __db_salvage_init __db_salvage_init@DB_VERSION_UNIQUE_NAME@ +#define __db_salvage_destroy __db_salvage_destroy@DB_VERSION_UNIQUE_NAME@ +#define __db_salvage_getnext __db_salvage_getnext@DB_VERSION_UNIQUE_NAME@ +#define __db_salvage_isdone __db_salvage_isdone@DB_VERSION_UNIQUE_NAME@ +#define __db_salvage_markdone __db_salvage_markdone@DB_VERSION_UNIQUE_NAME@ +#define __db_salvage_markneeded __db_salvage_markneeded@DB_VERSION_UNIQUE_NAME@ +#define __db_vrfy_prdbt __db_vrfy_prdbt@DB_VERSION_UNIQUE_NAME@ +#define __partition_init __partition_init@DB_VERSION_UNIQUE_NAME@ +#define __partition_set __partition_set@DB_VERSION_UNIQUE_NAME@ +#define __partition_set_dirs __partition_set_dirs@DB_VERSION_UNIQUE_NAME@ +#define __partition_open __partition_open@DB_VERSION_UNIQUE_NAME@ +#define __partition_get_callback __partition_get_callback@DB_VERSION_UNIQUE_NAME@ +#define __partition_get_keys __partition_get_keys@DB_VERSION_UNIQUE_NAME@ +#define __partition_get_dirs __partition_get_dirs@DB_VERSION_UNIQUE_NAME@ +#define __partc_init __partc_init@DB_VERSION_UNIQUE_NAME@ +#define __partc_get __partc_get@DB_VERSION_UNIQUE_NAME@ +#define __partition_close __partition_close@DB_VERSION_UNIQUE_NAME@ +#define __partition_sync __partition_sync@DB_VERSION_UNIQUE_NAME@ +#define __partition_stat __partition_stat@DB_VERSION_UNIQUE_NAME@ +#define __part_truncate __part_truncate@DB_VERSION_UNIQUE_NAME@ +#define __part_compact __part_compact@DB_VERSION_UNIQUE_NAME@ +#define __part_lsn_reset __part_lsn_reset@DB_VERSION_UNIQUE_NAME@ +#define __part_fileid_reset __part_fileid_reset@DB_VERSION_UNIQUE_NAME@ +#define __part_key_range __part_key_range@DB_VERSION_UNIQUE_NAME@ +#define __part_remove __part_remove@DB_VERSION_UNIQUE_NAME@ +#define __part_rename __part_rename@DB_VERSION_UNIQUE_NAME@ +#define __part_verify __part_verify@DB_VERSION_UNIQUE_NAME@ +#define __part_testdocopy __part_testdocopy@DB_VERSION_UNIQUE_NAME@ +#define __db_no_partition __db_no_partition@DB_VERSION_UNIQUE_NAME@ +#define __partition_set __partition_set@DB_VERSION_UNIQUE_NAME@ +#define __partition_get_callback __partition_get_callback@DB_VERSION_UNIQUE_NAME@ +#define __partition_get_dirs __partition_get_dirs@DB_VERSION_UNIQUE_NAME@ +#define __partition_get_keys __partition_get_keys@DB_VERSION_UNIQUE_NAME@ +#define __partition_init __partition_init@DB_VERSION_UNIQUE_NAME@ +#define __part_fileid_reset __part_fileid_reset@DB_VERSION_UNIQUE_NAME@ +#define __partition_set_dirs __partition_set_dirs@DB_VERSION_UNIQUE_NAME@ +#define __bam_compact_int __bam_compact_int@DB_VERSION_UNIQUE_NAME@ +#define __bam_compact_opd __bam_compact_opd@DB_VERSION_UNIQUE_NAME@ +#define __bam_truncate_ipages __bam_truncate_ipages@DB_VERSION_UNIQUE_NAME@ +#define __bam_cmp __bam_cmp@DB_VERSION_UNIQUE_NAME@ +#define __bam_defcmp __bam_defcmp@DB_VERSION_UNIQUE_NAME@ +#define __bam_defpfx __bam_defpfx@DB_VERSION_UNIQUE_NAME@ +#define __bam_compress_dupcmp __bam_compress_dupcmp@DB_VERSION_UNIQUE_NAME@ +#define __bam_defcompress __bam_defcompress@DB_VERSION_UNIQUE_NAME@ +#define __bam_defdecompress __bam_defdecompress@DB_VERSION_UNIQUE_NAME@ +#define __bamc_compress_get __bamc_compress_get@DB_VERSION_UNIQUE_NAME@ +#define __bamc_compress_put __bamc_compress_put@DB_VERSION_UNIQUE_NAME@ +#define __bamc_compress_del __bamc_compress_del@DB_VERSION_UNIQUE_NAME@ +#define __bamc_compress_bulk_del __bamc_compress_bulk_del@DB_VERSION_UNIQUE_NAME@ +#define __bamc_compress_count __bamc_compress_count@DB_VERSION_UNIQUE_NAME@ +#define __bamc_compress_cmp __bamc_compress_cmp@DB_VERSION_UNIQUE_NAME@ +#define __bamc_compress_dup __bamc_compress_dup@DB_VERSION_UNIQUE_NAME@ +#define __bam_compress_salvage __bam_compress_salvage@DB_VERSION_UNIQUE_NAME@ +#define __bam_compress_count __bam_compress_count@DB_VERSION_UNIQUE_NAME@ +#define __bam_pgin __bam_pgin@DB_VERSION_UNIQUE_NAME@ +#define __bam_pgout __bam_pgout@DB_VERSION_UNIQUE_NAME@ +#define __bam_mswap __bam_mswap@DB_VERSION_UNIQUE_NAME@ +#define __bam_ca_delete __bam_ca_delete@DB_VERSION_UNIQUE_NAME@ +#define __ram_ca_delete __ram_ca_delete@DB_VERSION_UNIQUE_NAME@ +#define __bam_ca_di __bam_ca_di@DB_VERSION_UNIQUE_NAME@ +#define __bam_ca_dup __bam_ca_dup@DB_VERSION_UNIQUE_NAME@ +#define __bam_ca_undodup __bam_ca_undodup@DB_VERSION_UNIQUE_NAME@ +#define __bam_ca_rsplit __bam_ca_rsplit@DB_VERSION_UNIQUE_NAME@ +#define __bam_ca_split __bam_ca_split@DB_VERSION_UNIQUE_NAME@ +#define __bam_ca_undosplit __bam_ca_undosplit@DB_VERSION_UNIQUE_NAME@ +#define __bamc_init __bamc_init@DB_VERSION_UNIQUE_NAME@ +#define __bamc_refresh __bamc_refresh@DB_VERSION_UNIQUE_NAME@ +#define __bamc_cmp __bamc_cmp@DB_VERSION_UNIQUE_NAME@ +#define __bamc_count __bamc_count@DB_VERSION_UNIQUE_NAME@ +#define __bamc_dup __bamc_dup@DB_VERSION_UNIQUE_NAME@ +#define __bam_bulk_overflow __bam_bulk_overflow@DB_VERSION_UNIQUE_NAME@ +#define __bam_bulk_duplicates __bam_bulk_duplicates@DB_VERSION_UNIQUE_NAME@ +#define __bamc_rget __bamc_rget@DB_VERSION_UNIQUE_NAME@ +#define __bam_opd_exists __bam_opd_exists@DB_VERSION_UNIQUE_NAME@ +#define __bam_ditem __bam_ditem@DB_VERSION_UNIQUE_NAME@ +#define __bam_adjindx __bam_adjindx@DB_VERSION_UNIQUE_NAME@ +#define __bam_dpages __bam_dpages@DB_VERSION_UNIQUE_NAME@ +#define __bam_pupdate __bam_pupdate@DB_VERSION_UNIQUE_NAME@ +#define __bam_db_create __bam_db_create@DB_VERSION_UNIQUE_NAME@ +#define __bam_db_close __bam_db_close@DB_VERSION_UNIQUE_NAME@ +#define __bam_map_flags __bam_map_flags@DB_VERSION_UNIQUE_NAME@ +#define __bam_set_flags __bam_set_flags@DB_VERSION_UNIQUE_NAME@ +#define __bam_set_bt_compare __bam_set_bt_compare@DB_VERSION_UNIQUE_NAME@ +#define __bam_set_bt_compress __bam_set_bt_compress@DB_VERSION_UNIQUE_NAME@ +#define __bam_get_bt_minkey __bam_get_bt_minkey@DB_VERSION_UNIQUE_NAME@ +#define __bam_copy_config __bam_copy_config@DB_VERSION_UNIQUE_NAME@ +#define __ram_map_flags __ram_map_flags@DB_VERSION_UNIQUE_NAME@ +#define __ram_set_flags __ram_set_flags@DB_VERSION_UNIQUE_NAME@ +#define __ram_get_re_len __ram_get_re_len@DB_VERSION_UNIQUE_NAME@ +#define __ram_get_re_pad __ram_get_re_pad@DB_VERSION_UNIQUE_NAME@ +#define __bam_open __bam_open@DB_VERSION_UNIQUE_NAME@ +#define __bam_metachk __bam_metachk@DB_VERSION_UNIQUE_NAME@ +#define __bam_read_root __bam_read_root@DB_VERSION_UNIQUE_NAME@ +#define __bam_new_file __bam_new_file@DB_VERSION_UNIQUE_NAME@ +#define __bam_new_subdb __bam_new_subdb@DB_VERSION_UNIQUE_NAME@ +#define __bam_iitem __bam_iitem@DB_VERSION_UNIQUE_NAME@ +#define __bam_ritem __bam_ritem@DB_VERSION_UNIQUE_NAME@ +#define __bam_ritem_nolog __bam_ritem_nolog@DB_VERSION_UNIQUE_NAME@ +#define __bam_irep __bam_irep@DB_VERSION_UNIQUE_NAME@ +#define __bam_split_recover __bam_split_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_split_48_recover __bam_split_48_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_split_42_recover __bam_split_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_rsplit_recover __bam_rsplit_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_adj_recover __bam_adj_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_cadjust_recover __bam_cadjust_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_cdel_recover __bam_cdel_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_repl_recover __bam_repl_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_irep_recover __bam_irep_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_root_recover __bam_root_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_curadj_recover __bam_curadj_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_rcuradj_recover __bam_rcuradj_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_merge_44_recover __bam_merge_44_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_relink_43_recover __bam_relink_43_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_reclaim __bam_reclaim@DB_VERSION_UNIQUE_NAME@ +#define __bam_truncate __bam_truncate@DB_VERSION_UNIQUE_NAME@ +#define __ram_open __ram_open@DB_VERSION_UNIQUE_NAME@ +#define __ram_append __ram_append@DB_VERSION_UNIQUE_NAME@ +#define __ramc_del __ramc_del@DB_VERSION_UNIQUE_NAME@ +#define __ramc_get __ramc_get@DB_VERSION_UNIQUE_NAME@ +#define __ramc_put __ramc_put@DB_VERSION_UNIQUE_NAME@ +#define __ram_ca __ram_ca@DB_VERSION_UNIQUE_NAME@ +#define __ram_getno __ram_getno@DB_VERSION_UNIQUE_NAME@ +#define __ram_writeback __ram_writeback@DB_VERSION_UNIQUE_NAME@ +#define __bam_rsearch __bam_rsearch@DB_VERSION_UNIQUE_NAME@ +#define __bam_adjust __bam_adjust@DB_VERSION_UNIQUE_NAME@ +#define __bam_nrecs __bam_nrecs@DB_VERSION_UNIQUE_NAME@ +#define __bam_total __bam_total@DB_VERSION_UNIQUE_NAME@ +#define __bam_get_root __bam_get_root@DB_VERSION_UNIQUE_NAME@ +#define __bam_search __bam_search@DB_VERSION_UNIQUE_NAME@ +#define __bam_stkrel __bam_stkrel@DB_VERSION_UNIQUE_NAME@ +#define __bam_stkgrow __bam_stkgrow@DB_VERSION_UNIQUE_NAME@ +#define __bam_split __bam_split@DB_VERSION_UNIQUE_NAME@ +#define __bam_broot __bam_broot@DB_VERSION_UNIQUE_NAME@ +#define __ram_root __ram_root@DB_VERSION_UNIQUE_NAME@ +#define __bam_pinsert __bam_pinsert@DB_VERSION_UNIQUE_NAME@ +#define __bam_copy __bam_copy@DB_VERSION_UNIQUE_NAME@ +#define __bam_stat __bam_stat@DB_VERSION_UNIQUE_NAME@ +#define __bam_stat_print __bam_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_stat_callback __bam_stat_callback@DB_VERSION_UNIQUE_NAME@ +#define __bam_print_cursor __bam_print_cursor@DB_VERSION_UNIQUE_NAME@ +#define __bam_key_range __bam_key_range@DB_VERSION_UNIQUE_NAME@ +#define __bam_traverse __bam_traverse@DB_VERSION_UNIQUE_NAME@ +#define __bam_30_btreemeta __bam_30_btreemeta@DB_VERSION_UNIQUE_NAME@ +#define __bam_31_btreemeta __bam_31_btreemeta@DB_VERSION_UNIQUE_NAME@ +#define __bam_31_lbtree __bam_31_lbtree@DB_VERSION_UNIQUE_NAME@ +#define __bam_vrfy_meta __bam_vrfy_meta@DB_VERSION_UNIQUE_NAME@ +#define __ram_vrfy_leaf __ram_vrfy_leaf@DB_VERSION_UNIQUE_NAME@ +#define __bam_vrfy __bam_vrfy@DB_VERSION_UNIQUE_NAME@ +#define __bam_vrfy_itemorder __bam_vrfy_itemorder@DB_VERSION_UNIQUE_NAME@ +#define __bam_vrfy_structure __bam_vrfy_structure@DB_VERSION_UNIQUE_NAME@ +#define __bam_vrfy_subtree __bam_vrfy_subtree@DB_VERSION_UNIQUE_NAME@ +#define __bam_salvage __bam_salvage@DB_VERSION_UNIQUE_NAME@ +#define __bam_salvage_walkdupint __bam_salvage_walkdupint@DB_VERSION_UNIQUE_NAME@ +#define __bam_meta2pgset __bam_meta2pgset@DB_VERSION_UNIQUE_NAME@ +#define __bam_split_desc __bam_split_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_split_48_desc __bam_split_48_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_split_42_desc __bam_split_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_rsplit_desc __bam_rsplit_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_adj_desc __bam_adj_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_cadjust_desc __bam_cadjust_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_cdel_desc __bam_cdel_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_repl_desc __bam_repl_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_irep_desc __bam_irep_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_root_desc __bam_root_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_curadj_desc __bam_curadj_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_rcuradj_desc __bam_rcuradj_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_relink_43_desc __bam_relink_43_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_merge_44_desc __bam_merge_44_desc@DB_VERSION_UNIQUE_NAME@ +#define __bam_init_recover __bam_init_recover@DB_VERSION_UNIQUE_NAME@ +#define __bam_split_print __bam_split_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_split_48_print __bam_split_48_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_split_42_print __bam_split_42_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_rsplit_print __bam_rsplit_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_adj_print __bam_adj_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_cadjust_print __bam_cadjust_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_cdel_print __bam_cdel_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_repl_print __bam_repl_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_irep_print __bam_irep_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_root_print __bam_root_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_curadj_print __bam_curadj_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_rcuradj_print __bam_rcuradj_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_relink_43_print __bam_relink_43_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_merge_44_print __bam_merge_44_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_init_print __bam_init_print@DB_VERSION_UNIQUE_NAME@ +#ifndef HAVE_ATOI +#define atoi atoi@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_ATOL +#define atol atol@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_BSEARCH +#define bsearch bsearch@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_GETCWD +#define getcwd getcwd@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_GETOPT +#define getopt getopt@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_ISALPHA +#define isalpha isalpha@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_ISDIGIT +#define isdigit isdigit@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_ISPRINT +#define isprint isprint@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_ISSPACE +#define isspace isspace@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_MEMCMP +#define memcmp memcmp@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_MEMCPY +#define memcpy memcpy@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_MEMMOVE +#define memmove memmove@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_PRINTF +#define printf printf@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_PRINTF +#define fprintf fprintf@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_PRINTF +#define vfprintf vfprintf@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_QSORT +#define qsort qsort@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_RAISE +#define raise raise@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_RAND +#define rand rand@DB_VERSION_UNIQUE_NAME@ +#define srand srand@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_SNPRINTF +#define snprintf snprintf@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_VSNPRINTF +#define vsnprintf vsnprintf@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_STRCASECMP +#define strcasecmp strcasecmp@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_STRCASECMP +#define strncasecmp strncasecmp@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_STRCAT +#define strcat strcat@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_STRCHR +#define strchr strchr@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_STRDUP +#define strdup strdup@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_STRERROR +#define strerror strerror@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_STRNCAT +#define strncat strncat@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_STRNCMP +#define strncmp strncmp@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_STRRCHR +#define strrchr strrchr@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_STRSEP +#define strsep strsep@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_STRTOL +#define strtol strtol@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_STRTOUL +#define strtoul strtoul@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_TIME +#define time time@DB_VERSION_UNIQUE_NAME@ +#endif +#define __clock_set_expires __clock_set_expires@DB_VERSION_UNIQUE_NAME@ +#define __clock_expired __clock_expired@DB_VERSION_UNIQUE_NAME@ +#define __crypto_region_init __crypto_region_init@DB_VERSION_UNIQUE_NAME@ +#define __db_isbigendian __db_isbigendian@DB_VERSION_UNIQUE_NAME@ +#define __db_byteorder __db_byteorder@DB_VERSION_UNIQUE_NAME@ +#define __db_compress_count_int __db_compress_count_int@DB_VERSION_UNIQUE_NAME@ +#define __db_compress_int __db_compress_int@DB_VERSION_UNIQUE_NAME@ +#define __db_decompress_count_int __db_decompress_count_int@DB_VERSION_UNIQUE_NAME@ +#define __db_decompress_int __db_decompress_int@DB_VERSION_UNIQUE_NAME@ +#define __db_decompress_int32 __db_decompress_int32@DB_VERSION_UNIQUE_NAME@ +#define __db_fchk __db_fchk@DB_VERSION_UNIQUE_NAME@ +#define __db_fcchk __db_fcchk@DB_VERSION_UNIQUE_NAME@ +#define __db_ferr __db_ferr@DB_VERSION_UNIQUE_NAME@ +#define __db_fnl __db_fnl@DB_VERSION_UNIQUE_NAME@ +#define __db_pgerr __db_pgerr@DB_VERSION_UNIQUE_NAME@ +#define __db_pgfmt __db_pgfmt@DB_VERSION_UNIQUE_NAME@ +#ifdef DIAGNOSTIC +#define __db_assert __db_assert@DB_VERSION_UNIQUE_NAME@ +#endif +#define __env_panic_msg __env_panic_msg@DB_VERSION_UNIQUE_NAME@ +#define __env_panic __env_panic@DB_VERSION_UNIQUE_NAME@ +#define __db_unknown_error __db_unknown_error@DB_VERSION_UNIQUE_NAME@ +#define __db_syserr __db_syserr@DB_VERSION_UNIQUE_NAME@ +#define __db_err __db_err@DB_VERSION_UNIQUE_NAME@ +#define __db_errx __db_errx@DB_VERSION_UNIQUE_NAME@ +#define __db_errcall __db_errcall@DB_VERSION_UNIQUE_NAME@ +#define __db_errfile __db_errfile@DB_VERSION_UNIQUE_NAME@ +#define __db_msgadd __db_msgadd@DB_VERSION_UNIQUE_NAME@ +#define __db_msgadd_ap __db_msgadd_ap@DB_VERSION_UNIQUE_NAME@ +#define __db_msg __db_msg@DB_VERSION_UNIQUE_NAME@ +#define __db_repmsg __db_repmsg@DB_VERSION_UNIQUE_NAME@ +#define __db_unknown_flag __db_unknown_flag@DB_VERSION_UNIQUE_NAME@ +#define __db_unknown_type __db_unknown_type@DB_VERSION_UNIQUE_NAME@ +#define __db_unknown_path __db_unknown_path@DB_VERSION_UNIQUE_NAME@ +#define __db_check_txn __db_check_txn@DB_VERSION_UNIQUE_NAME@ +#define __db_txn_deadlock_err __db_txn_deadlock_err@DB_VERSION_UNIQUE_NAME@ +#define __db_not_txn_env __db_not_txn_env@DB_VERSION_UNIQUE_NAME@ +#define __db_rec_toobig __db_rec_toobig@DB_VERSION_UNIQUE_NAME@ +#define __db_rec_repl __db_rec_repl@DB_VERSION_UNIQUE_NAME@ +#define __dbc_logging __dbc_logging@DB_VERSION_UNIQUE_NAME@ +#define __db_check_lsn __db_check_lsn@DB_VERSION_UNIQUE_NAME@ +#define __db_rdonly __db_rdonly@DB_VERSION_UNIQUE_NAME@ +#define __db_space_err __db_space_err@DB_VERSION_UNIQUE_NAME@ +#define __db_failed __db_failed@DB_VERSION_UNIQUE_NAME@ +#define __db_getlong __db_getlong@DB_VERSION_UNIQUE_NAME@ +#define __db_getulong __db_getulong@DB_VERSION_UNIQUE_NAME@ +#define __db_idspace __db_idspace@DB_VERSION_UNIQUE_NAME@ +#define __db_log2 __db_log2@DB_VERSION_UNIQUE_NAME@ +#define __db_tablesize __db_tablesize@DB_VERSION_UNIQUE_NAME@ +#define __db_hashinit __db_hashinit@DB_VERSION_UNIQUE_NAME@ +#define __dbt_usercopy __dbt_usercopy@DB_VERSION_UNIQUE_NAME@ +#define __dbt_userfree __dbt_userfree@DB_VERSION_UNIQUE_NAME@ +#define __db_mkpath __db_mkpath@DB_VERSION_UNIQUE_NAME@ +#define __db_openflags __db_openflags@DB_VERSION_UNIQUE_NAME@ +#define __db_util_arg __db_util_arg@DB_VERSION_UNIQUE_NAME@ +#define __db_util_cache __db_util_cache@DB_VERSION_UNIQUE_NAME@ +#define __db_util_logset __db_util_logset@DB_VERSION_UNIQUE_NAME@ +#define __db_util_siginit __db_util_siginit@DB_VERSION_UNIQUE_NAME@ +#define __db_util_interrupted __db_util_interrupted@DB_VERSION_UNIQUE_NAME@ +#define __db_util_sigresend __db_util_sigresend@DB_VERSION_UNIQUE_NAME@ +#define __db_zero_fill __db_zero_fill@DB_VERSION_UNIQUE_NAME@ +#define __db_zero_extend __db_zero_extend@DB_VERSION_UNIQUE_NAME@ +#define __aes_setup __aes_setup@DB_VERSION_UNIQUE_NAME@ +#define __aes_adj_size __aes_adj_size@DB_VERSION_UNIQUE_NAME@ +#define __aes_close __aes_close@DB_VERSION_UNIQUE_NAME@ +#define __aes_decrypt __aes_decrypt@DB_VERSION_UNIQUE_NAME@ +#define __aes_encrypt __aes_encrypt@DB_VERSION_UNIQUE_NAME@ +#define __aes_init __aes_init@DB_VERSION_UNIQUE_NAME@ +#define __crypto_env_close __crypto_env_close@DB_VERSION_UNIQUE_NAME@ +#define __crypto_env_refresh __crypto_env_refresh@DB_VERSION_UNIQUE_NAME@ +#define __crypto_algsetup __crypto_algsetup@DB_VERSION_UNIQUE_NAME@ +#define __crypto_decrypt_meta __crypto_decrypt_meta@DB_VERSION_UNIQUE_NAME@ +#define __crypto_set_passwd __crypto_set_passwd@DB_VERSION_UNIQUE_NAME@ +#define __db_generate_iv __db_generate_iv@DB_VERSION_UNIQUE_NAME@ +#define __db_rijndaelKeySetupEnc __db_rijndaelKeySetupEnc@DB_VERSION_UNIQUE_NAME@ +#define __db_rijndaelKeySetupDec __db_rijndaelKeySetupDec@DB_VERSION_UNIQUE_NAME@ +#define __db_rijndaelEncrypt __db_rijndaelEncrypt@DB_VERSION_UNIQUE_NAME@ +#define __db_rijndaelDecrypt __db_rijndaelDecrypt@DB_VERSION_UNIQUE_NAME@ +#define __db_rijndaelEncryptRound __db_rijndaelEncryptRound@DB_VERSION_UNIQUE_NAME@ +#define __db_rijndaelDecryptRound __db_rijndaelDecryptRound@DB_VERSION_UNIQUE_NAME@ +#define __db_makeKey __db_makeKey@DB_VERSION_UNIQUE_NAME@ +#define __db_cipherInit __db_cipherInit@DB_VERSION_UNIQUE_NAME@ +#define __db_blockEncrypt __db_blockEncrypt@DB_VERSION_UNIQUE_NAME@ +#define __db_padEncrypt __db_padEncrypt@DB_VERSION_UNIQUE_NAME@ +#define __db_blockDecrypt __db_blockDecrypt@DB_VERSION_UNIQUE_NAME@ +#define __db_padDecrypt __db_padDecrypt@DB_VERSION_UNIQUE_NAME@ +#define __db_cipherUpdateRounds __db_cipherUpdateRounds@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_setup __dbreg_setup@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_teardown __dbreg_teardown@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_teardown_int __dbreg_teardown_int@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_new_id __dbreg_new_id@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_get_id __dbreg_get_id@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_assign_id __dbreg_assign_id@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_revoke_id __dbreg_revoke_id@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_revoke_id_int __dbreg_revoke_id_int@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_close_id __dbreg_close_id@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_close_id_int __dbreg_close_id_int@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_failchk __dbreg_failchk@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_log_close __dbreg_log_close@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_log_id __dbreg_log_id@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_register_desc __dbreg_register_desc@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_init_recover __dbreg_init_recover@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_register_print __dbreg_register_print@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_init_print __dbreg_init_print@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_register_recover __dbreg_register_recover@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_stat_print __dbreg_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_print_fname __dbreg_print_fname@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_add_dbentry __dbreg_add_dbentry@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_rem_dbentry __dbreg_rem_dbentry@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_log_files __dbreg_log_files@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_log_nofiles __dbreg_log_nofiles@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_close_files __dbreg_close_files@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_close_file __dbreg_close_file@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_mark_restored __dbreg_mark_restored@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_invalidate_files __dbreg_invalidate_files@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_id_to_db __dbreg_id_to_db@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_id_to_fname __dbreg_id_to_fname@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_fid_to_fname __dbreg_fid_to_fname@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_get_name __dbreg_get_name@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_do_open __dbreg_do_open@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_lazy_id __dbreg_lazy_id@DB_VERSION_UNIQUE_NAME@ +#define __env_alloc_init __env_alloc_init@DB_VERSION_UNIQUE_NAME@ +#define __env_alloc_overhead __env_alloc_overhead@DB_VERSION_UNIQUE_NAME@ +#define __env_alloc_size __env_alloc_size@DB_VERSION_UNIQUE_NAME@ +#define __env_alloc __env_alloc@DB_VERSION_UNIQUE_NAME@ +#define __env_alloc_free __env_alloc_free@DB_VERSION_UNIQUE_NAME@ +#define __env_alloc_extend __env_alloc_extend@DB_VERSION_UNIQUE_NAME@ +#define __env_region_extend __env_region_extend@DB_VERSION_UNIQUE_NAME@ +#define __env_elem_size __env_elem_size@DB_VERSION_UNIQUE_NAME@ +#define __env_get_chunk __env_get_chunk@DB_VERSION_UNIQUE_NAME@ +#define __env_alloc_print __env_alloc_print@DB_VERSION_UNIQUE_NAME@ +#define __env_read_db_config __env_read_db_config@DB_VERSION_UNIQUE_NAME@ +#define __config_split __config_split@DB_VERSION_UNIQUE_NAME@ +#define __env_failchk_pp __env_failchk_pp@DB_VERSION_UNIQUE_NAME@ +#define __env_failchk_int __env_failchk_int@DB_VERSION_UNIQUE_NAME@ +#define __env_thread_size __env_thread_size@DB_VERSION_UNIQUE_NAME@ +#define __env_thread_max __env_thread_max@DB_VERSION_UNIQUE_NAME@ +#define __env_thread_init __env_thread_init@DB_VERSION_UNIQUE_NAME@ +#define __env_thread_destroy __env_thread_destroy@DB_VERSION_UNIQUE_NAME@ +#define __env_set_state __env_set_state@DB_VERSION_UNIQUE_NAME@ +#define __env_thread_id_string __env_thread_id_string@DB_VERSION_UNIQUE_NAME@ +#define __db_file_extend __db_file_extend@DB_VERSION_UNIQUE_NAME@ +#define __db_file_multi_write __db_file_multi_write@DB_VERSION_UNIQUE_NAME@ +#define __db_file_write __db_file_write@DB_VERSION_UNIQUE_NAME@ +#define __db_env_destroy __db_env_destroy@DB_VERSION_UNIQUE_NAME@ +#define __env_get_alloc __env_get_alloc@DB_VERSION_UNIQUE_NAME@ +#define __env_set_alloc __env_set_alloc@DB_VERSION_UNIQUE_NAME@ +#define __env_get_memory_init __env_get_memory_init@DB_VERSION_UNIQUE_NAME@ +#define __env_set_memory_init __env_set_memory_init@DB_VERSION_UNIQUE_NAME@ +#define __env_get_memory_max __env_get_memory_max@DB_VERSION_UNIQUE_NAME@ +#define __env_set_memory_max __env_set_memory_max@DB_VERSION_UNIQUE_NAME@ +#define __env_get_encrypt_flags __env_get_encrypt_flags@DB_VERSION_UNIQUE_NAME@ +#define __env_set_encrypt __env_set_encrypt@DB_VERSION_UNIQUE_NAME@ +#define __env_map_flags __env_map_flags@DB_VERSION_UNIQUE_NAME@ +#define __env_fetch_flags __env_fetch_flags@DB_VERSION_UNIQUE_NAME@ +#define __env_set_flags __env_set_flags@DB_VERSION_UNIQUE_NAME@ +#define __env_set_data_dir __env_set_data_dir@DB_VERSION_UNIQUE_NAME@ +#define __env_add_data_dir __env_add_data_dir@DB_VERSION_UNIQUE_NAME@ +#define __env_set_create_dir __env_set_create_dir@DB_VERSION_UNIQUE_NAME@ +#define __env_set_data_len __env_set_data_len@DB_VERSION_UNIQUE_NAME@ +#define __env_set_intermediate_dir_mode __env_set_intermediate_dir_mode@DB_VERSION_UNIQUE_NAME@ +#define __env_get_errcall __env_get_errcall@DB_VERSION_UNIQUE_NAME@ +#define __env_set_errcall __env_set_errcall@DB_VERSION_UNIQUE_NAME@ +#define __env_get_errfile __env_get_errfile@DB_VERSION_UNIQUE_NAME@ +#define __env_set_errfile __env_set_errfile@DB_VERSION_UNIQUE_NAME@ +#define __env_get_errpfx __env_get_errpfx@DB_VERSION_UNIQUE_NAME@ +#define __env_set_errpfx __env_set_errpfx@DB_VERSION_UNIQUE_NAME@ +#define __env_set_thread_count __env_set_thread_count@DB_VERSION_UNIQUE_NAME@ +#define __env_get_msgcall __env_get_msgcall@DB_VERSION_UNIQUE_NAME@ +#define __env_set_msgcall __env_set_msgcall@DB_VERSION_UNIQUE_NAME@ +#define __env_get_msgfile __env_get_msgfile@DB_VERSION_UNIQUE_NAME@ +#define __env_set_msgfile __env_set_msgfile@DB_VERSION_UNIQUE_NAME@ +#define __env_set_paniccall __env_set_paniccall@DB_VERSION_UNIQUE_NAME@ +#define __env_set_shm_key __env_set_shm_key@DB_VERSION_UNIQUE_NAME@ +#define __env_set_tmp_dir __env_set_tmp_dir@DB_VERSION_UNIQUE_NAME@ +#define __env_set_verbose __env_set_verbose@DB_VERSION_UNIQUE_NAME@ +#define __db_mi_env __db_mi_env@DB_VERSION_UNIQUE_NAME@ +#define __db_mi_open __db_mi_open@DB_VERSION_UNIQUE_NAME@ +#define __env_not_config __env_not_config@DB_VERSION_UNIQUE_NAME@ +#define __env_set_timeout __env_set_timeout@DB_VERSION_UNIQUE_NAME@ +#define __db_appname __db_appname@DB_VERSION_UNIQUE_NAME@ +#define __db_tmp_open __db_tmp_open@DB_VERSION_UNIQUE_NAME@ +#define __env_open_pp __env_open_pp@DB_VERSION_UNIQUE_NAME@ +#define __env_open __env_open@DB_VERSION_UNIQUE_NAME@ +#define __env_remove __env_remove@DB_VERSION_UNIQUE_NAME@ +#define __env_config __env_config@DB_VERSION_UNIQUE_NAME@ +#define __env_close_pp __env_close_pp@DB_VERSION_UNIQUE_NAME@ +#define __env_close __env_close@DB_VERSION_UNIQUE_NAME@ +#define __env_refresh __env_refresh@DB_VERSION_UNIQUE_NAME@ +#define __env_get_open_flags __env_get_open_flags@DB_VERSION_UNIQUE_NAME@ +#define __env_attach_regions __env_attach_regions@DB_VERSION_UNIQUE_NAME@ +#define __db_apprec __db_apprec@DB_VERSION_UNIQUE_NAME@ +#define __env_openfiles __env_openfiles@DB_VERSION_UNIQUE_NAME@ +#define __env_init_rec __env_init_rec@DB_VERSION_UNIQUE_NAME@ +#define __env_attach __env_attach@DB_VERSION_UNIQUE_NAME@ +#define __env_turn_on __env_turn_on@DB_VERSION_UNIQUE_NAME@ +#define __env_turn_off __env_turn_off@DB_VERSION_UNIQUE_NAME@ +#define __env_panic_set __env_panic_set@DB_VERSION_UNIQUE_NAME@ +#define __env_ref_increment __env_ref_increment@DB_VERSION_UNIQUE_NAME@ +#define __env_ref_decrement __env_ref_decrement@DB_VERSION_UNIQUE_NAME@ +#define __env_detach __env_detach@DB_VERSION_UNIQUE_NAME@ +#define __env_remove_env __env_remove_env@DB_VERSION_UNIQUE_NAME@ +#define __env_region_attach __env_region_attach@DB_VERSION_UNIQUE_NAME@ +#define __env_region_share __env_region_share@DB_VERSION_UNIQUE_NAME@ +#define __env_region_detach __env_region_detach@DB_VERSION_UNIQUE_NAME@ +#define __envreg_register __envreg_register@DB_VERSION_UNIQUE_NAME@ +#define __envreg_unregister __envreg_unregister@DB_VERSION_UNIQUE_NAME@ +#define __envreg_xunlock __envreg_xunlock@DB_VERSION_UNIQUE_NAME@ +#define __envreg_isalive __envreg_isalive@DB_VERSION_UNIQUE_NAME@ +#define __env_struct_sig __env_struct_sig@DB_VERSION_UNIQUE_NAME@ +#define __env_stat_print_pp __env_stat_print_pp@DB_VERSION_UNIQUE_NAME@ +#define __db_print_fh __db_print_fh@DB_VERSION_UNIQUE_NAME@ +#define __db_print_fileid __db_print_fileid@DB_VERSION_UNIQUE_NAME@ +#define __db_dl __db_dl@DB_VERSION_UNIQUE_NAME@ +#define __db_dl_pct __db_dl_pct@DB_VERSION_UNIQUE_NAME@ +#define __db_dlbytes __db_dlbytes@DB_VERSION_UNIQUE_NAME@ +#define __db_print_reginfo __db_print_reginfo@DB_VERSION_UNIQUE_NAME@ +#define __db_stat_not_built __db_stat_not_built@DB_VERSION_UNIQUE_NAME@ +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_close __repmgr_close@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_get_ack_policy __repmgr_get_ack_policy@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_set_ack_policy __repmgr_set_ack_policy@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_site __repmgr_site@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_site_by_eid __repmgr_site_by_eid@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_local_site __repmgr_local_site@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_site_list __repmgr_site_list@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_start __repmgr_start@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_stat_pp __repmgr_stat_pp@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_stat_print_pp __repmgr_stat_print_pp@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_handle_event __repmgr_handle_event@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_channel __repmgr_channel@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_set_msg_dispatch __repmgr_set_msg_dispatch@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_init_recover __repmgr_init_recover@DB_VERSION_UNIQUE_NAME@ +#endif +#define __fop_create_42_desc __fop_create_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __fop_create_desc __fop_create_desc@DB_VERSION_UNIQUE_NAME@ +#define __fop_remove_desc __fop_remove_desc@DB_VERSION_UNIQUE_NAME@ +#define __fop_write_42_desc __fop_write_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __fop_write_desc __fop_write_desc@DB_VERSION_UNIQUE_NAME@ +#define __fop_rename_42_desc __fop_rename_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __fop_rename_noundo_46_desc __fop_rename_noundo_46_desc@DB_VERSION_UNIQUE_NAME@ +#define __fop_rename_desc __fop_rename_desc@DB_VERSION_UNIQUE_NAME@ +#define __fop_rename_noundo_desc __fop_rename_noundo_desc@DB_VERSION_UNIQUE_NAME@ +#define __fop_file_remove_desc __fop_file_remove_desc@DB_VERSION_UNIQUE_NAME@ +#define __fop_init_recover __fop_init_recover@DB_VERSION_UNIQUE_NAME@ +#define __fop_create_42_print __fop_create_42_print@DB_VERSION_UNIQUE_NAME@ +#define __fop_create_print __fop_create_print@DB_VERSION_UNIQUE_NAME@ +#define __fop_remove_print __fop_remove_print@DB_VERSION_UNIQUE_NAME@ +#define __fop_write_42_print __fop_write_42_print@DB_VERSION_UNIQUE_NAME@ +#define __fop_write_print __fop_write_print@DB_VERSION_UNIQUE_NAME@ +#define __fop_rename_42_print __fop_rename_42_print@DB_VERSION_UNIQUE_NAME@ +#define __fop_rename_print __fop_rename_print@DB_VERSION_UNIQUE_NAME@ +#define __fop_file_remove_print __fop_file_remove_print@DB_VERSION_UNIQUE_NAME@ +#define __fop_init_print __fop_init_print@DB_VERSION_UNIQUE_NAME@ +#define __fop_create __fop_create@DB_VERSION_UNIQUE_NAME@ +#define __fop_remove __fop_remove@DB_VERSION_UNIQUE_NAME@ +#define __fop_write __fop_write@DB_VERSION_UNIQUE_NAME@ +#define __fop_rename __fop_rename@DB_VERSION_UNIQUE_NAME@ +#define __fop_create_recover __fop_create_recover@DB_VERSION_UNIQUE_NAME@ +#define __fop_create_42_recover __fop_create_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __fop_remove_recover __fop_remove_recover@DB_VERSION_UNIQUE_NAME@ +#define __fop_write_recover __fop_write_recover@DB_VERSION_UNIQUE_NAME@ +#define __fop_write_42_recover __fop_write_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __fop_rename_recover __fop_rename_recover@DB_VERSION_UNIQUE_NAME@ +#define __fop_rename_noundo_recover __fop_rename_noundo_recover@DB_VERSION_UNIQUE_NAME@ +#define __fop_rename_42_recover __fop_rename_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __fop_rename_noundo_46_recover __fop_rename_noundo_46_recover@DB_VERSION_UNIQUE_NAME@ +#define __fop_file_remove_recover __fop_file_remove_recover@DB_VERSION_UNIQUE_NAME@ +#define __fop_lock_handle __fop_lock_handle@DB_VERSION_UNIQUE_NAME@ +#define __fop_file_setup __fop_file_setup@DB_VERSION_UNIQUE_NAME@ +#define __fop_subdb_setup __fop_subdb_setup@DB_VERSION_UNIQUE_NAME@ +#define __fop_remove_setup __fop_remove_setup@DB_VERSION_UNIQUE_NAME@ +#define __fop_read_meta __fop_read_meta@DB_VERSION_UNIQUE_NAME@ +#define __fop_dummy __fop_dummy@DB_VERSION_UNIQUE_NAME@ +#define __fop_dbrename __fop_dbrename@DB_VERSION_UNIQUE_NAME@ +#define __ham_quick_delete __ham_quick_delete@DB_VERSION_UNIQUE_NAME@ +#define __hamc_init __hamc_init@DB_VERSION_UNIQUE_NAME@ +#define __hamc_count __hamc_count@DB_VERSION_UNIQUE_NAME@ +#define __hamc_cmp __hamc_cmp@DB_VERSION_UNIQUE_NAME@ +#define __hamc_dup __hamc_dup@DB_VERSION_UNIQUE_NAME@ +#define __ham_contract_table __ham_contract_table@DB_VERSION_UNIQUE_NAME@ +#define __ham_call_hash __ham_call_hash@DB_VERSION_UNIQUE_NAME@ +#define __ham_overwrite __ham_overwrite@DB_VERSION_UNIQUE_NAME@ +#define __ham_lookup __ham_lookup@DB_VERSION_UNIQUE_NAME@ +#define __ham_init_dbt __ham_init_dbt@DB_VERSION_UNIQUE_NAME@ +#define __hamc_update __hamc_update@DB_VERSION_UNIQUE_NAME@ +#define __ham_get_clist __ham_get_clist@DB_VERSION_UNIQUE_NAME@ +#define __ham_insdel_desc __ham_insdel_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_insdel_42_desc __ham_insdel_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_newpage_desc __ham_newpage_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_splitdata_desc __ham_splitdata_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_replace_desc __ham_replace_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_replace_42_desc __ham_replace_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_copypage_desc __ham_copypage_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_metagroup_42_desc __ham_metagroup_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_metagroup_desc __ham_metagroup_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_groupalloc_42_desc __ham_groupalloc_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_groupalloc_desc __ham_groupalloc_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_changeslot_desc __ham_changeslot_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_contract_desc __ham_contract_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_curadj_desc __ham_curadj_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_chgpg_desc __ham_chgpg_desc@DB_VERSION_UNIQUE_NAME@ +#define __ham_init_recover __ham_init_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_insdel_print __ham_insdel_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_insdel_42_print __ham_insdel_42_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_newpage_print __ham_newpage_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_splitdata_print __ham_splitdata_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_replace_print __ham_replace_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_replace_42_print __ham_replace_42_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_copypage_print __ham_copypage_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_metagroup_42_print __ham_metagroup_42_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_metagroup_print __ham_metagroup_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_groupalloc_42_print __ham_groupalloc_42_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_groupalloc_print __ham_groupalloc_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_changeslot_print __ham_changeslot_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_contract_print __ham_contract_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_curadj_print __ham_curadj_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_chgpg_print __ham_chgpg_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_init_print __ham_init_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_compact_int __ham_compact_int@DB_VERSION_UNIQUE_NAME@ +#define __ham_compact_bucket __ham_compact_bucket@DB_VERSION_UNIQUE_NAME@ +#define __ham_compact_hash __ham_compact_hash@DB_VERSION_UNIQUE_NAME@ +#define __ham_pgin __ham_pgin@DB_VERSION_UNIQUE_NAME@ +#define __ham_pgout __ham_pgout@DB_VERSION_UNIQUE_NAME@ +#define __ham_mswap __ham_mswap@DB_VERSION_UNIQUE_NAME@ +#define __ham_add_dup __ham_add_dup@DB_VERSION_UNIQUE_NAME@ +#define __ham_dup_convert __ham_dup_convert@DB_VERSION_UNIQUE_NAME@ +#define __ham_make_dup __ham_make_dup@DB_VERSION_UNIQUE_NAME@ +#define __ham_dsearch __ham_dsearch@DB_VERSION_UNIQUE_NAME@ +#define __ham_func2 __ham_func2@DB_VERSION_UNIQUE_NAME@ +#define __ham_func3 __ham_func3@DB_VERSION_UNIQUE_NAME@ +#define __ham_func4 __ham_func4@DB_VERSION_UNIQUE_NAME@ +#define __ham_func5 __ham_func5@DB_VERSION_UNIQUE_NAME@ +#define __ham_test __ham_test@DB_VERSION_UNIQUE_NAME@ +#define __ham_get_meta __ham_get_meta@DB_VERSION_UNIQUE_NAME@ +#define __ham_release_meta __ham_release_meta@DB_VERSION_UNIQUE_NAME@ +#define __ham_dirty_meta __ham_dirty_meta@DB_VERSION_UNIQUE_NAME@ +#define __ham_return_meta __ham_return_meta@DB_VERSION_UNIQUE_NAME@ +#define __ham_db_create __ham_db_create@DB_VERSION_UNIQUE_NAME@ +#define __ham_db_close __ham_db_close@DB_VERSION_UNIQUE_NAME@ +#define __ham_get_h_ffactor __ham_get_h_ffactor@DB_VERSION_UNIQUE_NAME@ +#define __ham_set_h_compare __ham_set_h_compare@DB_VERSION_UNIQUE_NAME@ +#define __ham_get_h_nelem __ham_get_h_nelem@DB_VERSION_UNIQUE_NAME@ +#define __ham_copy_config __ham_copy_config@DB_VERSION_UNIQUE_NAME@ +#define __ham_open __ham_open@DB_VERSION_UNIQUE_NAME@ +#define __ham_metachk __ham_metachk@DB_VERSION_UNIQUE_NAME@ +#define __ham_new_file __ham_new_file@DB_VERSION_UNIQUE_NAME@ +#define __ham_new_subdb __ham_new_subdb@DB_VERSION_UNIQUE_NAME@ +#define __ham_item __ham_item@DB_VERSION_UNIQUE_NAME@ +#define __ham_item_reset __ham_item_reset@DB_VERSION_UNIQUE_NAME@ +#define __ham_item_init __ham_item_init@DB_VERSION_UNIQUE_NAME@ +#define __ham_item_last __ham_item_last@DB_VERSION_UNIQUE_NAME@ +#define __ham_item_first __ham_item_first@DB_VERSION_UNIQUE_NAME@ +#define __ham_item_prev __ham_item_prev@DB_VERSION_UNIQUE_NAME@ +#define __ham_item_next __ham_item_next@DB_VERSION_UNIQUE_NAME@ +#define __ham_insertpair __ham_insertpair@DB_VERSION_UNIQUE_NAME@ +#define __ham_getindex __ham_getindex@DB_VERSION_UNIQUE_NAME@ +#define __ham_verify_sorted_page __ham_verify_sorted_page@DB_VERSION_UNIQUE_NAME@ +#define __ham_sort_page_cursor __ham_sort_page_cursor@DB_VERSION_UNIQUE_NAME@ +#define __ham_sort_page __ham_sort_page@DB_VERSION_UNIQUE_NAME@ +#define __ham_del_pair __ham_del_pair@DB_VERSION_UNIQUE_NAME@ +#define __ham_replpair __ham_replpair@DB_VERSION_UNIQUE_NAME@ +#define __ham_onpage_replace __ham_onpage_replace@DB_VERSION_UNIQUE_NAME@ +#define __ham_merge_pages __ham_merge_pages@DB_VERSION_UNIQUE_NAME@ +#define __ham_split_page __ham_split_page@DB_VERSION_UNIQUE_NAME@ +#define __ham_add_el __ham_add_el@DB_VERSION_UNIQUE_NAME@ +#define __ham_copypair __ham_copypair@DB_VERSION_UNIQUE_NAME@ +#define __ham_add_ovflpage __ham_add_ovflpage@DB_VERSION_UNIQUE_NAME@ +#define __ham_get_cpage __ham_get_cpage@DB_VERSION_UNIQUE_NAME@ +#define __ham_next_cpage __ham_next_cpage@DB_VERSION_UNIQUE_NAME@ +#define __ham_lock_bucket __ham_lock_bucket@DB_VERSION_UNIQUE_NAME@ +#define __ham_dpair __ham_dpair@DB_VERSION_UNIQUE_NAME@ +#define __ham_insdel_recover __ham_insdel_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_insdel_42_recover __ham_insdel_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_newpage_recover __ham_newpage_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_replace_recover __ham_replace_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_replace_42_recover __ham_replace_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_splitdata_recover __ham_splitdata_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_copypage_recover __ham_copypage_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_metagroup_recover __ham_metagroup_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_contract_recover __ham_contract_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_groupalloc_recover __ham_groupalloc_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_changeslot_recover __ham_changeslot_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_curadj_recover __ham_curadj_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_chgpg_recover __ham_chgpg_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_metagroup_42_recover __ham_metagroup_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_groupalloc_42_recover __ham_groupalloc_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __ham_reclaim __ham_reclaim@DB_VERSION_UNIQUE_NAME@ +#define __ham_truncate __ham_truncate@DB_VERSION_UNIQUE_NAME@ +#define __ham_stat __ham_stat@DB_VERSION_UNIQUE_NAME@ +#define __ham_stat_print __ham_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_print_cursor __ham_print_cursor@DB_VERSION_UNIQUE_NAME@ +#define __ham_traverse __ham_traverse@DB_VERSION_UNIQUE_NAME@ +#define __db_no_hash_am __db_no_hash_am@DB_VERSION_UNIQUE_NAME@ +#define __ham_30_hashmeta __ham_30_hashmeta@DB_VERSION_UNIQUE_NAME@ +#define __ham_30_sizefix __ham_30_sizefix@DB_VERSION_UNIQUE_NAME@ +#define __ham_31_hashmeta __ham_31_hashmeta@DB_VERSION_UNIQUE_NAME@ +#define __ham_31_hash __ham_31_hash@DB_VERSION_UNIQUE_NAME@ +#define __ham_46_hashmeta __ham_46_hashmeta@DB_VERSION_UNIQUE_NAME@ +#define __ham_46_hash __ham_46_hash@DB_VERSION_UNIQUE_NAME@ +#define __ham_vrfy_meta __ham_vrfy_meta@DB_VERSION_UNIQUE_NAME@ +#define __ham_vrfy __ham_vrfy@DB_VERSION_UNIQUE_NAME@ +#define __ham_vrfy_structure __ham_vrfy_structure@DB_VERSION_UNIQUE_NAME@ +#define __ham_vrfy_hashing __ham_vrfy_hashing@DB_VERSION_UNIQUE_NAME@ +#define __ham_salvage __ham_salvage@DB_VERSION_UNIQUE_NAME@ +#define __ham_meta2pgset __ham_meta2pgset@DB_VERSION_UNIQUE_NAME@ +#define __heapc_init __heapc_init@DB_VERSION_UNIQUE_NAME@ +#define __heap_ditem __heap_ditem@DB_VERSION_UNIQUE_NAME@ +#define __heap_append __heap_append@DB_VERSION_UNIQUE_NAME@ +#define __heap_pitem __heap_pitem@DB_VERSION_UNIQUE_NAME@ +#define __heapc_dup __heapc_dup@DB_VERSION_UNIQUE_NAME@ +#define __heapc_gsplit __heapc_gsplit@DB_VERSION_UNIQUE_NAME@ +#define __heapc_refresh __heapc_refresh@DB_VERSION_UNIQUE_NAME@ +#define __heap_addrem_desc __heap_addrem_desc@DB_VERSION_UNIQUE_NAME@ +#define __heap_pg_alloc_desc __heap_pg_alloc_desc@DB_VERSION_UNIQUE_NAME@ +#define __heap_trunc_meta_desc __heap_trunc_meta_desc@DB_VERSION_UNIQUE_NAME@ +#define __heap_trunc_page_desc __heap_trunc_page_desc@DB_VERSION_UNIQUE_NAME@ +#define __heap_init_recover __heap_init_recover@DB_VERSION_UNIQUE_NAME@ +#define __heap_addrem_print __heap_addrem_print@DB_VERSION_UNIQUE_NAME@ +#define __heap_pg_alloc_print __heap_pg_alloc_print@DB_VERSION_UNIQUE_NAME@ +#define __heap_trunc_meta_print __heap_trunc_meta_print@DB_VERSION_UNIQUE_NAME@ +#define __heap_trunc_page_print __heap_trunc_page_print@DB_VERSION_UNIQUE_NAME@ +#define __heap_init_print __heap_init_print@DB_VERSION_UNIQUE_NAME@ +#define __heap_pgin __heap_pgin@DB_VERSION_UNIQUE_NAME@ +#define __heap_pgout __heap_pgout@DB_VERSION_UNIQUE_NAME@ +#define __heap_mswap __heap_mswap@DB_VERSION_UNIQUE_NAME@ +#define __heap_db_create __heap_db_create@DB_VERSION_UNIQUE_NAME@ +#define __heap_db_close __heap_db_close@DB_VERSION_UNIQUE_NAME@ +#define __heap_get_heapsize __heap_get_heapsize@DB_VERSION_UNIQUE_NAME@ +#define __heap_set_heapsize __heap_set_heapsize@DB_VERSION_UNIQUE_NAME@ +#define __heap_exist __heap_exist@DB_VERSION_UNIQUE_NAME@ +#define __heap_open __heap_open@DB_VERSION_UNIQUE_NAME@ +#define __heap_metachk __heap_metachk@DB_VERSION_UNIQUE_NAME@ +#define __heap_read_meta __heap_read_meta@DB_VERSION_UNIQUE_NAME@ +#define __heap_new_file __heap_new_file@DB_VERSION_UNIQUE_NAME@ +#define __heap_create_region __heap_create_region@DB_VERSION_UNIQUE_NAME@ +#define __heap_addrem_recover __heap_addrem_recover@DB_VERSION_UNIQUE_NAME@ +#define __heap_pg_alloc_recover __heap_pg_alloc_recover@DB_VERSION_UNIQUE_NAME@ +#define __heap_trunc_meta_recover __heap_trunc_meta_recover@DB_VERSION_UNIQUE_NAME@ +#define __heap_trunc_page_recover __heap_trunc_page_recover@DB_VERSION_UNIQUE_NAME@ +#define __heap_truncate __heap_truncate@DB_VERSION_UNIQUE_NAME@ +#define __heap_stat __heap_stat@DB_VERSION_UNIQUE_NAME@ +#define __heap_stat_print __heap_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __heap_print_cursor __heap_print_cursor@DB_VERSION_UNIQUE_NAME@ +#define __heap_stat_callback __heap_stat_callback@DB_VERSION_UNIQUE_NAME@ +#define __heap_traverse __heap_traverse@DB_VERSION_UNIQUE_NAME@ +#define __db_no_heap_am __db_no_heap_am@DB_VERSION_UNIQUE_NAME@ +#define __heap_vrfy_meta __heap_vrfy_meta@DB_VERSION_UNIQUE_NAME@ +#define __heap_vrfy __heap_vrfy@DB_VERSION_UNIQUE_NAME@ +#define __heap_vrfy_structure __heap_vrfy_structure@DB_VERSION_UNIQUE_NAME@ +#define __heap_salvage __heap_salvage@DB_VERSION_UNIQUE_NAME@ +#define __heap_meta2pgset __heap_meta2pgset@DB_VERSION_UNIQUE_NAME@ +#define __db_chksum __db_chksum@DB_VERSION_UNIQUE_NAME@ +#define __db_derive_mac __db_derive_mac@DB_VERSION_UNIQUE_NAME@ +#define __db_check_chksum __db_check_chksum@DB_VERSION_UNIQUE_NAME@ +#define __db_SHA1Transform __db_SHA1Transform@DB_VERSION_UNIQUE_NAME@ +#define __db_SHA1Init __db_SHA1Init@DB_VERSION_UNIQUE_NAME@ +#define __db_SHA1Update __db_SHA1Update@DB_VERSION_UNIQUE_NAME@ +#define __db_SHA1Final __db_SHA1Final@DB_VERSION_UNIQUE_NAME@ +#define __lock_vec_pp __lock_vec_pp@DB_VERSION_UNIQUE_NAME@ +#define __lock_vec __lock_vec@DB_VERSION_UNIQUE_NAME@ +#define __lock_get_pp __lock_get_pp@DB_VERSION_UNIQUE_NAME@ +#define __lock_get __lock_get@DB_VERSION_UNIQUE_NAME@ +#define __lock_get_internal __lock_get_internal@DB_VERSION_UNIQUE_NAME@ +#define __lock_put_pp __lock_put_pp@DB_VERSION_UNIQUE_NAME@ +#define __lock_put __lock_put@DB_VERSION_UNIQUE_NAME@ +#define __lock_downgrade __lock_downgrade@DB_VERSION_UNIQUE_NAME@ +#define __lock_locker_same_family __lock_locker_same_family@DB_VERSION_UNIQUE_NAME@ +#define __lock_wakeup __lock_wakeup@DB_VERSION_UNIQUE_NAME@ +#define __lock_promote __lock_promote@DB_VERSION_UNIQUE_NAME@ +#define __lock_change __lock_change@DB_VERSION_UNIQUE_NAME@ +#define __lock_detect_pp __lock_detect_pp@DB_VERSION_UNIQUE_NAME@ +#define __lock_detect __lock_detect@DB_VERSION_UNIQUE_NAME@ +#define __lock_failchk __lock_failchk@DB_VERSION_UNIQUE_NAME@ +#define __lock_id_pp __lock_id_pp@DB_VERSION_UNIQUE_NAME@ +#define __lock_id __lock_id@DB_VERSION_UNIQUE_NAME@ +#define __lock_set_thread_id __lock_set_thread_id@DB_VERSION_UNIQUE_NAME@ +#define __lock_id_free_pp __lock_id_free_pp@DB_VERSION_UNIQUE_NAME@ +#define __lock_id_free __lock_id_free@DB_VERSION_UNIQUE_NAME@ +#define __lock_id_set __lock_id_set@DB_VERSION_UNIQUE_NAME@ +#define __lock_getlocker __lock_getlocker@DB_VERSION_UNIQUE_NAME@ +#define __lock_getlocker_int __lock_getlocker_int@DB_VERSION_UNIQUE_NAME@ +#define __lock_addfamilylocker __lock_addfamilylocker@DB_VERSION_UNIQUE_NAME@ +#define __lock_freelocker __lock_freelocker@DB_VERSION_UNIQUE_NAME@ +#define __lock_familyremove __lock_familyremove@DB_VERSION_UNIQUE_NAME@ +#define __lock_fix_list __lock_fix_list@DB_VERSION_UNIQUE_NAME@ +#define __lock_get_list __lock_get_list@DB_VERSION_UNIQUE_NAME@ +#define __lock_list_print __lock_list_print@DB_VERSION_UNIQUE_NAME@ +#define __lock_env_create __lock_env_create@DB_VERSION_UNIQUE_NAME@ +#define __lock_env_destroy __lock_env_destroy@DB_VERSION_UNIQUE_NAME@ +#define __lock_get_lk_conflicts __lock_get_lk_conflicts@DB_VERSION_UNIQUE_NAME@ +#define __lock_set_lk_conflicts __lock_set_lk_conflicts@DB_VERSION_UNIQUE_NAME@ +#define __lock_get_lk_detect __lock_get_lk_detect@DB_VERSION_UNIQUE_NAME@ +#define __lock_set_lk_detect __lock_set_lk_detect@DB_VERSION_UNIQUE_NAME@ +#define __lock_get_lk_max_locks __lock_get_lk_max_locks@DB_VERSION_UNIQUE_NAME@ +#define __lock_set_lk_max_locks __lock_set_lk_max_locks@DB_VERSION_UNIQUE_NAME@ +#define __lock_get_lk_max_lockers __lock_get_lk_max_lockers@DB_VERSION_UNIQUE_NAME@ +#define __lock_set_lk_max_lockers __lock_set_lk_max_lockers@DB_VERSION_UNIQUE_NAME@ +#define __lock_get_lk_max_objects __lock_get_lk_max_objects@DB_VERSION_UNIQUE_NAME@ +#define __lock_set_lk_max_objects __lock_set_lk_max_objects@DB_VERSION_UNIQUE_NAME@ +#define __lock_get_lk_partitions __lock_get_lk_partitions@DB_VERSION_UNIQUE_NAME@ +#define __lock_set_lk_partitions __lock_set_lk_partitions@DB_VERSION_UNIQUE_NAME@ +#define __lock_get_lk_tablesize __lock_get_lk_tablesize@DB_VERSION_UNIQUE_NAME@ +#define __lock_set_lk_tablesize __lock_set_lk_tablesize@DB_VERSION_UNIQUE_NAME@ +#define __lock_set_lk_priority __lock_set_lk_priority@DB_VERSION_UNIQUE_NAME@ +#define __lock_get_lk_priority __lock_get_lk_priority@DB_VERSION_UNIQUE_NAME@ +#define __lock_get_env_timeout __lock_get_env_timeout@DB_VERSION_UNIQUE_NAME@ +#define __lock_set_env_timeout __lock_set_env_timeout@DB_VERSION_UNIQUE_NAME@ +#define __lock_open __lock_open@DB_VERSION_UNIQUE_NAME@ +#define __lock_env_refresh __lock_env_refresh@DB_VERSION_UNIQUE_NAME@ +#define __lock_region_mutex_count __lock_region_mutex_count@DB_VERSION_UNIQUE_NAME@ +#define __lock_region_mutex_max __lock_region_mutex_max@DB_VERSION_UNIQUE_NAME@ +#define __lock_region_max __lock_region_max@DB_VERSION_UNIQUE_NAME@ +#define __lock_region_size __lock_region_size@DB_VERSION_UNIQUE_NAME@ +#define __lock_stat_pp __lock_stat_pp@DB_VERSION_UNIQUE_NAME@ +#define __lock_stat_print_pp __lock_stat_print_pp@DB_VERSION_UNIQUE_NAME@ +#define __lock_stat_print __lock_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __lock_printlock __lock_printlock@DB_VERSION_UNIQUE_NAME@ +#define __lock_set_timeout __lock_set_timeout@DB_VERSION_UNIQUE_NAME@ +#define __lock_set_timeout_internal __lock_set_timeout_internal@DB_VERSION_UNIQUE_NAME@ +#define __lock_inherit_timeout __lock_inherit_timeout@DB_VERSION_UNIQUE_NAME@ +#define __lock_ohash __lock_ohash@DB_VERSION_UNIQUE_NAME@ +#define __lock_lhash __lock_lhash@DB_VERSION_UNIQUE_NAME@ +#define __lock_nomem __lock_nomem@DB_VERSION_UNIQUE_NAME@ +#define __log_open __log_open@DB_VERSION_UNIQUE_NAME@ +#define __log_find __log_find@DB_VERSION_UNIQUE_NAME@ +#define __log_valid __log_valid@DB_VERSION_UNIQUE_NAME@ +#define __log_env_refresh __log_env_refresh@DB_VERSION_UNIQUE_NAME@ +#define __log_get_cached_ckp_lsn __log_get_cached_ckp_lsn@DB_VERSION_UNIQUE_NAME@ +#define __log_region_mutex_count __log_region_mutex_count@DB_VERSION_UNIQUE_NAME@ +#define __log_region_mutex_max __log_region_mutex_max@DB_VERSION_UNIQUE_NAME@ +#define __log_region_size __log_region_size@DB_VERSION_UNIQUE_NAME@ +#define __log_region_max __log_region_max@DB_VERSION_UNIQUE_NAME@ +#define __log_vtruncate __log_vtruncate@DB_VERSION_UNIQUE_NAME@ +#define __log_is_outdated __log_is_outdated@DB_VERSION_UNIQUE_NAME@ +#define __log_zero __log_zero@DB_VERSION_UNIQUE_NAME@ +#define __log_inmem_lsnoff __log_inmem_lsnoff@DB_VERSION_UNIQUE_NAME@ +#define __log_inmem_newfile __log_inmem_newfile@DB_VERSION_UNIQUE_NAME@ +#define __log_inmem_chkspace __log_inmem_chkspace@DB_VERSION_UNIQUE_NAME@ +#define __log_inmem_copyout __log_inmem_copyout@DB_VERSION_UNIQUE_NAME@ +#define __log_inmem_copyin __log_inmem_copyin@DB_VERSION_UNIQUE_NAME@ +#define __log_set_version __log_set_version@DB_VERSION_UNIQUE_NAME@ +#define __log_get_oldversion __log_get_oldversion@DB_VERSION_UNIQUE_NAME@ +#define __log_archive_pp __log_archive_pp@DB_VERSION_UNIQUE_NAME@ +#define __log_get_stable_lsn __log_get_stable_lsn@DB_VERSION_UNIQUE_NAME@ +#define __log_autoremove __log_autoremove@DB_VERSION_UNIQUE_NAME@ +#define __log_check_page_lsn __log_check_page_lsn@DB_VERSION_UNIQUE_NAME@ +#define __log_printf_capi __log_printf_capi@DB_VERSION_UNIQUE_NAME@ +#define __log_printf_pp __log_printf_pp@DB_VERSION_UNIQUE_NAME@ +#define __log_printf __log_printf@DB_VERSION_UNIQUE_NAME@ +#define __log_cursor_pp __log_cursor_pp@DB_VERSION_UNIQUE_NAME@ +#define __log_cursor __log_cursor@DB_VERSION_UNIQUE_NAME@ +#define __logc_close __logc_close@DB_VERSION_UNIQUE_NAME@ +#define __logc_version __logc_version@DB_VERSION_UNIQUE_NAME@ +#define __logc_get __logc_get@DB_VERSION_UNIQUE_NAME@ +#define __log_hdrswap __log_hdrswap@DB_VERSION_UNIQUE_NAME@ +#define __log_persistswap __log_persistswap@DB_VERSION_UNIQUE_NAME@ +#define __log_read_record_pp __log_read_record_pp@DB_VERSION_UNIQUE_NAME@ +#define __log_read_record __log_read_record@DB_VERSION_UNIQUE_NAME@ +#define __log_env_create __log_env_create@DB_VERSION_UNIQUE_NAME@ +#define __log_env_destroy __log_env_destroy@DB_VERSION_UNIQUE_NAME@ +#define __log_get_lg_bsize __log_get_lg_bsize@DB_VERSION_UNIQUE_NAME@ +#define __log_set_lg_bsize __log_set_lg_bsize@DB_VERSION_UNIQUE_NAME@ +#define __log_get_lg_filemode __log_get_lg_filemode@DB_VERSION_UNIQUE_NAME@ +#define __log_set_lg_filemode __log_set_lg_filemode@DB_VERSION_UNIQUE_NAME@ +#define __log_get_lg_max __log_get_lg_max@DB_VERSION_UNIQUE_NAME@ +#define __log_set_lg_max __log_set_lg_max@DB_VERSION_UNIQUE_NAME@ +#define __log_get_lg_regionmax __log_get_lg_regionmax@DB_VERSION_UNIQUE_NAME@ +#define __log_set_lg_regionmax __log_set_lg_regionmax@DB_VERSION_UNIQUE_NAME@ +#define __log_get_lg_dir __log_get_lg_dir@DB_VERSION_UNIQUE_NAME@ +#define __log_set_lg_dir __log_set_lg_dir@DB_VERSION_UNIQUE_NAME@ +#define __log_get_flags __log_get_flags@DB_VERSION_UNIQUE_NAME@ +#define __log_set_flags __log_set_flags@DB_VERSION_UNIQUE_NAME@ +#define __log_get_config __log_get_config@DB_VERSION_UNIQUE_NAME@ +#define __log_set_config __log_set_config@DB_VERSION_UNIQUE_NAME@ +#define __log_set_config_int __log_set_config_int@DB_VERSION_UNIQUE_NAME@ +#define __log_check_sizes __log_check_sizes@DB_VERSION_UNIQUE_NAME@ +#define __log_print_record __log_print_record@DB_VERSION_UNIQUE_NAME@ +#define __log_put_pp __log_put_pp@DB_VERSION_UNIQUE_NAME@ +#define __log_put __log_put@DB_VERSION_UNIQUE_NAME@ +#define __log_current_lsn_int __log_current_lsn_int@DB_VERSION_UNIQUE_NAME@ +#define __log_current_lsn __log_current_lsn@DB_VERSION_UNIQUE_NAME@ +#define __log_newfile __log_newfile@DB_VERSION_UNIQUE_NAME@ +#define __log_flush_pp __log_flush_pp@DB_VERSION_UNIQUE_NAME@ +#define __log_flush __log_flush@DB_VERSION_UNIQUE_NAME@ +#define __log_flush_int __log_flush_int@DB_VERSION_UNIQUE_NAME@ +#define __log_file_pp __log_file_pp@DB_VERSION_UNIQUE_NAME@ +#define __log_name __log_name@DB_VERSION_UNIQUE_NAME@ +#define __log_rep_put __log_rep_put@DB_VERSION_UNIQUE_NAME@ +#define __log_put_record_pp __log_put_record_pp@DB_VERSION_UNIQUE_NAME@ +#define __log_put_record __log_put_record@DB_VERSION_UNIQUE_NAME@ +#define __log_stat_pp __log_stat_pp@DB_VERSION_UNIQUE_NAME@ +#define __log_stat_print_pp __log_stat_print_pp@DB_VERSION_UNIQUE_NAME@ +#define __log_stat_print __log_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __log_verify_pp __log_verify_pp@DB_VERSION_UNIQUE_NAME@ +#define __log_verify __log_verify@DB_VERSION_UNIQUE_NAME@ +#define __log_verify_wrap __log_verify_wrap@DB_VERSION_UNIQUE_NAME@ +#define __crdel_init_verify __crdel_init_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_init_verify __db_init_verify@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_init_verify __dbreg_init_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_init_verify __bam_init_verify@DB_VERSION_UNIQUE_NAME@ +#define __fop_init_verify __fop_init_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_init_verify __ham_init_verify@DB_VERSION_UNIQUE_NAME@ +#define __heap_init_verify __heap_init_verify@DB_VERSION_UNIQUE_NAME@ +#define __qam_init_verify __qam_init_verify@DB_VERSION_UNIQUE_NAME@ +#define __txn_init_verify __txn_init_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_log_verify_global_report __db_log_verify_global_report@DB_VERSION_UNIQUE_NAME@ +#define __crdel_metasub_verify __crdel_metasub_verify@DB_VERSION_UNIQUE_NAME@ +#define __crdel_inmem_create_verify __crdel_inmem_create_verify@DB_VERSION_UNIQUE_NAME@ +#define __crdel_inmem_rename_verify __crdel_inmem_rename_verify@DB_VERSION_UNIQUE_NAME@ +#define __crdel_inmem_remove_verify __crdel_inmem_remove_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_addrem_verify __db_addrem_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_big_verify __db_big_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_ovref_verify __db_ovref_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_relink_42_verify __db_relink_42_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_debug_verify __db_debug_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_noop_verify __db_noop_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_alloc_42_verify __db_pg_alloc_42_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_alloc_verify __db_pg_alloc_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_free_42_verify __db_pg_free_42_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_free_verify __db_pg_free_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_cksum_verify __db_cksum_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_freedata_42_verify __db_pg_freedata_42_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_freedata_verify __db_pg_freedata_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_init_verify __db_pg_init_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_sort_44_verify __db_pg_sort_44_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_pg_trunc_verify __db_pg_trunc_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_realloc_verify __db_realloc_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_relink_verify __db_relink_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_merge_verify __db_merge_verify@DB_VERSION_UNIQUE_NAME@ +#define __db_pgno_verify __db_pgno_verify@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_register_verify __dbreg_register_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_split_verify __bam_split_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_split_42_verify __bam_split_42_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_rsplit_verify __bam_rsplit_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_adj_verify __bam_adj_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_irep_verify __bam_irep_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_cadjust_verify __bam_cadjust_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_cdel_verify __bam_cdel_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_repl_verify __bam_repl_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_root_verify __bam_root_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_curadj_verify __bam_curadj_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_rcuradj_verify __bam_rcuradj_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_relink_43_verify __bam_relink_43_verify@DB_VERSION_UNIQUE_NAME@ +#define __bam_merge_44_verify __bam_merge_44_verify@DB_VERSION_UNIQUE_NAME@ +#define __fop_create_42_verify __fop_create_42_verify@DB_VERSION_UNIQUE_NAME@ +#define __fop_create_verify __fop_create_verify@DB_VERSION_UNIQUE_NAME@ +#define __fop_remove_verify __fop_remove_verify@DB_VERSION_UNIQUE_NAME@ +#define __fop_write_42_verify __fop_write_42_verify@DB_VERSION_UNIQUE_NAME@ +#define __fop_write_verify __fop_write_verify@DB_VERSION_UNIQUE_NAME@ +#define __fop_rename_42_verify __fop_rename_42_verify@DB_VERSION_UNIQUE_NAME@ +#define __fop_rename_verify __fop_rename_verify@DB_VERSION_UNIQUE_NAME@ +#define __fop_file_remove_verify __fop_file_remove_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_insdel_verify __ham_insdel_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_newpage_verify __ham_newpage_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_splitdata_verify __ham_splitdata_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_replace_verify __ham_replace_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_copypage_verify __ham_copypage_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_metagroup_42_verify __ham_metagroup_42_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_metagroup_verify __ham_metagroup_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_groupalloc_42_verify __ham_groupalloc_42_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_groupalloc_verify __ham_groupalloc_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_changeslot_verify __ham_changeslot_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_contract_verify __ham_contract_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_curadj_verify __ham_curadj_verify@DB_VERSION_UNIQUE_NAME@ +#define __ham_chgpg_verify __ham_chgpg_verify@DB_VERSION_UNIQUE_NAME@ +#define __heap_addrem_verify __heap_addrem_verify@DB_VERSION_UNIQUE_NAME@ +#define __heap_pg_alloc_verify __heap_pg_alloc_verify@DB_VERSION_UNIQUE_NAME@ +#define __heap_trunc_meta_verify __heap_trunc_meta_verify@DB_VERSION_UNIQUE_NAME@ +#define __heap_trunc_page_verify __heap_trunc_page_verify@DB_VERSION_UNIQUE_NAME@ +#define __qam_incfirst_verify __qam_incfirst_verify@DB_VERSION_UNIQUE_NAME@ +#define __qam_mvptr_verify __qam_mvptr_verify@DB_VERSION_UNIQUE_NAME@ +#define __qam_del_verify __qam_del_verify@DB_VERSION_UNIQUE_NAME@ +#define __qam_add_verify __qam_add_verify@DB_VERSION_UNIQUE_NAME@ +#define __qam_delext_verify __qam_delext_verify@DB_VERSION_UNIQUE_NAME@ +#define __txn_regop_42_verify __txn_regop_42_verify@DB_VERSION_UNIQUE_NAME@ +#define __txn_regop_verify __txn_regop_verify@DB_VERSION_UNIQUE_NAME@ +#define __txn_ckp_42_verify __txn_ckp_42_verify@DB_VERSION_UNIQUE_NAME@ +#define __txn_ckp_verify __txn_ckp_verify@DB_VERSION_UNIQUE_NAME@ +#define __txn_child_verify __txn_child_verify@DB_VERSION_UNIQUE_NAME@ +#define __txn_xa_regop_42_verify __txn_xa_regop_42_verify@DB_VERSION_UNIQUE_NAME@ +#define __txn_prepare_verify __txn_prepare_verify@DB_VERSION_UNIQUE_NAME@ +#define __txn_recycle_verify __txn_recycle_verify@DB_VERSION_UNIQUE_NAME@ +#define __create_log_vrfy_info __create_log_vrfy_info@DB_VERSION_UNIQUE_NAME@ +#define __destroy_log_vrfy_info __destroy_log_vrfy_info@DB_VERSION_UNIQUE_NAME@ +#define __put_txn_vrfy_info __put_txn_vrfy_info@DB_VERSION_UNIQUE_NAME@ +#define __get_txn_vrfy_info __get_txn_vrfy_info@DB_VERSION_UNIQUE_NAME@ +#define __add_recycle_lsn_range __add_recycle_lsn_range@DB_VERSION_UNIQUE_NAME@ +#define __iterate_txninfo __iterate_txninfo@DB_VERSION_UNIQUE_NAME@ +#define __rem_last_recycle_lsn __rem_last_recycle_lsn@DB_VERSION_UNIQUE_NAME@ +#define __add_file_updated __add_file_updated@DB_VERSION_UNIQUE_NAME@ +#define __del_file_updated __del_file_updated@DB_VERSION_UNIQUE_NAME@ +#define __clear_fileups __clear_fileups@DB_VERSION_UNIQUE_NAME@ +#define __free_txninfo_stack __free_txninfo_stack@DB_VERSION_UNIQUE_NAME@ +#define __free_txninfo __free_txninfo@DB_VERSION_UNIQUE_NAME@ +#define __put_filereg_info __put_filereg_info@DB_VERSION_UNIQUE_NAME@ +#define __del_filelife __del_filelife@DB_VERSION_UNIQUE_NAME@ +#define __put_filelife __put_filelife@DB_VERSION_UNIQUE_NAME@ +#define __get_filelife __get_filelife@DB_VERSION_UNIQUE_NAME@ +#define __get_filereg_by_dbregid __get_filereg_by_dbregid@DB_VERSION_UNIQUE_NAME@ +#define __add_dbregid __add_dbregid@DB_VERSION_UNIQUE_NAME@ +#define __get_filereg_info __get_filereg_info@DB_VERSION_UNIQUE_NAME@ +#define __free_filereg_info __free_filereg_info@DB_VERSION_UNIQUE_NAME@ +#define __get_ckp_info __get_ckp_info@DB_VERSION_UNIQUE_NAME@ +#define __get_last_ckp_info __get_last_ckp_info@DB_VERSION_UNIQUE_NAME@ +#define __put_ckp_info __put_ckp_info@DB_VERSION_UNIQUE_NAME@ +#define __get_timestamp_info __get_timestamp_info@DB_VERSION_UNIQUE_NAME@ +#define __get_latest_timestamp_info __get_latest_timestamp_info@DB_VERSION_UNIQUE_NAME@ +#define __put_timestamp_info __put_timestamp_info@DB_VERSION_UNIQUE_NAME@ +#define __find_lsnrg_by_timerg __find_lsnrg_by_timerg@DB_VERSION_UNIQUE_NAME@ +#define __add_txnrange __add_txnrange@DB_VERSION_UNIQUE_NAME@ +#define __get_aborttxn __get_aborttxn@DB_VERSION_UNIQUE_NAME@ +#define __txn_started __txn_started@DB_VERSION_UNIQUE_NAME@ +#define __set_logvrfy_dbfuid __set_logvrfy_dbfuid@DB_VERSION_UNIQUE_NAME@ +#define __add_page_to_txn __add_page_to_txn@DB_VERSION_UNIQUE_NAME@ +#define __del_txn_pages __del_txn_pages@DB_VERSION_UNIQUE_NAME@ +#define __is_ancestor_txn __is_ancestor_txn@DB_VERSION_UNIQUE_NAME@ +#define __return_txn_pages __return_txn_pages@DB_VERSION_UNIQUE_NAME@ +#define __memp_alloc __memp_alloc@DB_VERSION_UNIQUE_NAME@ +#define __memp_free __memp_free@DB_VERSION_UNIQUE_NAME@ +#define __memp_bhwrite __memp_bhwrite@DB_VERSION_UNIQUE_NAME@ +#define __memp_pgread __memp_pgread@DB_VERSION_UNIQUE_NAME@ +#define __memp_pg __memp_pg@DB_VERSION_UNIQUE_NAME@ +#define __memp_bhfree __memp_bhfree@DB_VERSION_UNIQUE_NAME@ +#define __memp_fget_pp __memp_fget_pp@DB_VERSION_UNIQUE_NAME@ +#define __memp_fget __memp_fget@DB_VERSION_UNIQUE_NAME@ +#define __memp_fcreate_pp __memp_fcreate_pp@DB_VERSION_UNIQUE_NAME@ +#define __memp_fcreate __memp_fcreate@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_clear_len __memp_set_clear_len@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_fileid __memp_get_fileid@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_fileid __memp_set_fileid@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_flags __memp_get_flags@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_flags __memp_set_flags@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_ftype __memp_get_ftype@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_ftype __memp_set_ftype@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_lsn_offset __memp_set_lsn_offset@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_pgcookie __memp_get_pgcookie@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_pgcookie __memp_set_pgcookie@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_priority __memp_get_priority@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_last_pgno __memp_get_last_pgno@DB_VERSION_UNIQUE_NAME@ +#define __memp_fn __memp_fn@DB_VERSION_UNIQUE_NAME@ +#define __memp_fns __memp_fns@DB_VERSION_UNIQUE_NAME@ +#define __memp_fopen_pp __memp_fopen_pp@DB_VERSION_UNIQUE_NAME@ +#define __memp_fopen __memp_fopen@DB_VERSION_UNIQUE_NAME@ +#define __memp_fclose_pp __memp_fclose_pp@DB_VERSION_UNIQUE_NAME@ +#define __memp_fclose __memp_fclose@DB_VERSION_UNIQUE_NAME@ +#define __memp_mf_discard __memp_mf_discard@DB_VERSION_UNIQUE_NAME@ +#define __memp_inmemlist __memp_inmemlist@DB_VERSION_UNIQUE_NAME@ +#define __memp_fput_pp __memp_fput_pp@DB_VERSION_UNIQUE_NAME@ +#define __memp_fput __memp_fput@DB_VERSION_UNIQUE_NAME@ +#define __memp_unpin_buffers __memp_unpin_buffers@DB_VERSION_UNIQUE_NAME@ +#define __memp_dirty __memp_dirty@DB_VERSION_UNIQUE_NAME@ +#define __memp_shared __memp_shared@DB_VERSION_UNIQUE_NAME@ +#define __memp_env_create __memp_env_create@DB_VERSION_UNIQUE_NAME@ +#define __memp_env_destroy __memp_env_destroy@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_cachesize __memp_get_cachesize@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_cachesize __memp_set_cachesize@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_config __memp_set_config@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_config __memp_get_config@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_mp_max_openfd __memp_get_mp_max_openfd@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_mp_max_openfd __memp_set_mp_max_openfd@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_mp_max_write __memp_get_mp_max_write@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_mp_max_write __memp_set_mp_max_write@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_mp_mmapsize __memp_get_mp_mmapsize@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_mp_mmapsize __memp_set_mp_mmapsize@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_mp_pagesize __memp_get_mp_pagesize@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_mp_pagesize __memp_set_mp_pagesize@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_mp_tablesize __memp_get_mp_tablesize@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_mp_tablesize __memp_set_mp_tablesize@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_mp_mtxcount __memp_get_mp_mtxcount@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_mp_mtxcount __memp_set_mp_mtxcount@DB_VERSION_UNIQUE_NAME@ +#define __memp_nameop __memp_nameop@DB_VERSION_UNIQUE_NAME@ +#define __memp_ftruncate __memp_ftruncate@DB_VERSION_UNIQUE_NAME@ +#define __memp_alloc_freelist __memp_alloc_freelist@DB_VERSION_UNIQUE_NAME@ +#define __memp_free_freelist __memp_free_freelist@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_freelist __memp_get_freelist@DB_VERSION_UNIQUE_NAME@ +#define __memp_extend_freelist __memp_extend_freelist@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_last_pgno __memp_set_last_pgno@DB_VERSION_UNIQUE_NAME@ +#define __memp_bh_settxn __memp_bh_settxn@DB_VERSION_UNIQUE_NAME@ +#define __memp_skip_curadj __memp_skip_curadj@DB_VERSION_UNIQUE_NAME@ +#define __memp_bh_freeze __memp_bh_freeze@DB_VERSION_UNIQUE_NAME@ +#define __memp_bh_thaw __memp_bh_thaw@DB_VERSION_UNIQUE_NAME@ +#define __memp_open __memp_open@DB_VERSION_UNIQUE_NAME@ +#define __memp_init __memp_init@DB_VERSION_UNIQUE_NAME@ +#define __memp_max_regions __memp_max_regions@DB_VERSION_UNIQUE_NAME@ +#define __memp_region_mutex_count __memp_region_mutex_count@DB_VERSION_UNIQUE_NAME@ +#define __memp_env_refresh __memp_env_refresh@DB_VERSION_UNIQUE_NAME@ +#define __memp_register_pp __memp_register_pp@DB_VERSION_UNIQUE_NAME@ +#define __memp_register __memp_register@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_bucket __memp_get_bucket@DB_VERSION_UNIQUE_NAME@ +#define __memp_resize __memp_resize@DB_VERSION_UNIQUE_NAME@ +#define __memp_get_cache_max __memp_get_cache_max@DB_VERSION_UNIQUE_NAME@ +#define __memp_set_cache_max __memp_set_cache_max@DB_VERSION_UNIQUE_NAME@ +#define __memp_stat_pp __memp_stat_pp@DB_VERSION_UNIQUE_NAME@ +#define __memp_stat_print_pp __memp_stat_print_pp@DB_VERSION_UNIQUE_NAME@ +#define __memp_stat_print __memp_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __memp_stat_hash __memp_stat_hash@DB_VERSION_UNIQUE_NAME@ +#define __memp_walk_files __memp_walk_files@DB_VERSION_UNIQUE_NAME@ +#define __memp_discard_all_mpfs __memp_discard_all_mpfs@DB_VERSION_UNIQUE_NAME@ +#define __memp_sync_pp __memp_sync_pp@DB_VERSION_UNIQUE_NAME@ +#define __memp_sync __memp_sync@DB_VERSION_UNIQUE_NAME@ +#define __memp_fsync_pp __memp_fsync_pp@DB_VERSION_UNIQUE_NAME@ +#define __memp_fsync __memp_fsync@DB_VERSION_UNIQUE_NAME@ +#define __mp_xxx_fh __mp_xxx_fh@DB_VERSION_UNIQUE_NAME@ +#define __memp_sync_int __memp_sync_int@DB_VERSION_UNIQUE_NAME@ +#define __memp_mf_sync __memp_mf_sync@DB_VERSION_UNIQUE_NAME@ +#define __memp_trickle_pp __memp_trickle_pp@DB_VERSION_UNIQUE_NAME@ +#define __mutex_alloc __mutex_alloc@DB_VERSION_UNIQUE_NAME@ +#define __mutex_alloc_int __mutex_alloc_int@DB_VERSION_UNIQUE_NAME@ +#define __mutex_free __mutex_free@DB_VERSION_UNIQUE_NAME@ +#define __mutex_free_int __mutex_free_int@DB_VERSION_UNIQUE_NAME@ +#define __mutex_refresh __mutex_refresh@DB_VERSION_UNIQUE_NAME@ +#define __mut_failchk __mut_failchk@DB_VERSION_UNIQUE_NAME@ +#define __db_fcntl_mutex_init __db_fcntl_mutex_init@DB_VERSION_UNIQUE_NAME@ +#define __db_fcntl_mutex_lock __db_fcntl_mutex_lock@DB_VERSION_UNIQUE_NAME@ +#define __db_fcntl_mutex_trylock __db_fcntl_mutex_trylock@DB_VERSION_UNIQUE_NAME@ +#define __db_fcntl_mutex_unlock __db_fcntl_mutex_unlock@DB_VERSION_UNIQUE_NAME@ +#define __db_fcntl_mutex_destroy __db_fcntl_mutex_destroy@DB_VERSION_UNIQUE_NAME@ +#define __mutex_alloc_pp __mutex_alloc_pp@DB_VERSION_UNIQUE_NAME@ +#define __mutex_free_pp __mutex_free_pp@DB_VERSION_UNIQUE_NAME@ +#define __mutex_lock_pp __mutex_lock_pp@DB_VERSION_UNIQUE_NAME@ +#define __mutex_unlock_pp __mutex_unlock_pp@DB_VERSION_UNIQUE_NAME@ +#define __mutex_get_align __mutex_get_align@DB_VERSION_UNIQUE_NAME@ +#define __mutex_set_align __mutex_set_align@DB_VERSION_UNIQUE_NAME@ +#define __mutex_get_increment __mutex_get_increment@DB_VERSION_UNIQUE_NAME@ +#define __mutex_set_increment __mutex_set_increment@DB_VERSION_UNIQUE_NAME@ +#define __mutex_get_init __mutex_get_init@DB_VERSION_UNIQUE_NAME@ +#define __mutex_set_init __mutex_set_init@DB_VERSION_UNIQUE_NAME@ +#define __mutex_get_max __mutex_get_max@DB_VERSION_UNIQUE_NAME@ +#define __mutex_set_max __mutex_set_max@DB_VERSION_UNIQUE_NAME@ +#define __mutex_get_tas_spins __mutex_get_tas_spins@DB_VERSION_UNIQUE_NAME@ +#define __mutex_set_tas_spins __mutex_set_tas_spins@DB_VERSION_UNIQUE_NAME@ +#if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT) +#define __atomic_inc __atomic_inc@DB_VERSION_UNIQUE_NAME@ +#endif +#if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT) +#define __atomic_dec __atomic_dec@DB_VERSION_UNIQUE_NAME@ +#endif +#if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT) +#define atomic_compare_exchange atomic_compare_exchange@DB_VERSION_UNIQUE_NAME@ +#endif +#define __db_pthread_mutex_init __db_pthread_mutex_init@DB_VERSION_UNIQUE_NAME@ +#ifndef HAVE_MUTEX_HYBRID +#define __db_pthread_mutex_lock __db_pthread_mutex_lock@DB_VERSION_UNIQUE_NAME@ +#endif +#if defined(HAVE_SHARED_LATCHES) +#define __db_pthread_mutex_readlock __db_pthread_mutex_readlock@DB_VERSION_UNIQUE_NAME@ +#endif +#ifdef HAVE_MUTEX_HYBRID +#define __db_hybrid_mutex_suspend __db_hybrid_mutex_suspend@DB_VERSION_UNIQUE_NAME@ +#endif +#define __db_pthread_mutex_unlock __db_pthread_mutex_unlock@DB_VERSION_UNIQUE_NAME@ +#define __db_pthread_mutex_destroy __db_pthread_mutex_destroy@DB_VERSION_UNIQUE_NAME@ +#define __mutex_open __mutex_open@DB_VERSION_UNIQUE_NAME@ +#define __mutex_env_refresh __mutex_env_refresh@DB_VERSION_UNIQUE_NAME@ +#define __mutex_resource_return __mutex_resource_return@DB_VERSION_UNIQUE_NAME@ +#define __mutex_stat_pp __mutex_stat_pp@DB_VERSION_UNIQUE_NAME@ +#define __mutex_stat_print_pp __mutex_stat_print_pp@DB_VERSION_UNIQUE_NAME@ +#define __mutex_stat_print __mutex_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __mutex_print_debug_single __mutex_print_debug_single@DB_VERSION_UNIQUE_NAME@ +#define __mutex_print_debug_stats __mutex_print_debug_stats@DB_VERSION_UNIQUE_NAME@ +#define __mutex_set_wait_info __mutex_set_wait_info@DB_VERSION_UNIQUE_NAME@ +#define __mutex_clear __mutex_clear@DB_VERSION_UNIQUE_NAME@ +#define __db_tas_mutex_init __db_tas_mutex_init@DB_VERSION_UNIQUE_NAME@ +#define __db_tas_mutex_lock __db_tas_mutex_lock@DB_VERSION_UNIQUE_NAME@ +#define __db_tas_mutex_trylock __db_tas_mutex_trylock@DB_VERSION_UNIQUE_NAME@ +#if defined(HAVE_SHARED_LATCHES) +#define __db_tas_mutex_readlock __db_tas_mutex_readlock@DB_VERSION_UNIQUE_NAME@ +#endif +#if defined(HAVE_SHARED_LATCHES) +#define __db_tas_mutex_tryreadlock __db_tas_mutex_tryreadlock@DB_VERSION_UNIQUE_NAME@ +#endif +#define __db_tas_mutex_unlock __db_tas_mutex_unlock@DB_VERSION_UNIQUE_NAME@ +#define __db_tas_mutex_destroy __db_tas_mutex_destroy@DB_VERSION_UNIQUE_NAME@ +#define __db_win32_mutex_init __db_win32_mutex_init@DB_VERSION_UNIQUE_NAME@ +#define __db_win32_mutex_lock __db_win32_mutex_lock@DB_VERSION_UNIQUE_NAME@ +#define __db_win32_mutex_trylock __db_win32_mutex_trylock@DB_VERSION_UNIQUE_NAME@ +#if defined(HAVE_SHARED_LATCHES) +#define __db_win32_mutex_readlock __db_win32_mutex_readlock@DB_VERSION_UNIQUE_NAME@ +#endif +#if defined(HAVE_SHARED_LATCHES) +#define __db_win32_mutex_tryreadlock __db_win32_mutex_tryreadlock@DB_VERSION_UNIQUE_NAME@ +#endif +#define __db_win32_mutex_unlock __db_win32_mutex_unlock@DB_VERSION_UNIQUE_NAME@ +#define __db_win32_mutex_destroy __db_win32_mutex_destroy@DB_VERSION_UNIQUE_NAME@ +#define __os_abort __os_abort@DB_VERSION_UNIQUE_NAME@ +#define __os_abspath __os_abspath@DB_VERSION_UNIQUE_NAME@ +#if defined(HAVE_REPLICATION_THREADS) +#define __os_getaddrinfo __os_getaddrinfo@DB_VERSION_UNIQUE_NAME@ +#endif +#if defined(HAVE_REPLICATION_THREADS) +#define __os_freeaddrinfo __os_freeaddrinfo@DB_VERSION_UNIQUE_NAME@ +#endif +#define __os_umalloc __os_umalloc@DB_VERSION_UNIQUE_NAME@ +#define __os_urealloc __os_urealloc@DB_VERSION_UNIQUE_NAME@ +#define __os_ufree __os_ufree@DB_VERSION_UNIQUE_NAME@ +#define __os_strdup __os_strdup@DB_VERSION_UNIQUE_NAME@ +#define __os_calloc __os_calloc@DB_VERSION_UNIQUE_NAME@ +#define __os_malloc __os_malloc@DB_VERSION_UNIQUE_NAME@ +#define __os_realloc __os_realloc@DB_VERSION_UNIQUE_NAME@ +#define __os_free __os_free@DB_VERSION_UNIQUE_NAME@ +#define __ua_memcpy __ua_memcpy@DB_VERSION_UNIQUE_NAME@ +#define __os_gettime __os_gettime@DB_VERSION_UNIQUE_NAME@ +#define __os_fs_notzero __os_fs_notzero@DB_VERSION_UNIQUE_NAME@ +#define __os_support_direct_io __os_support_direct_io@DB_VERSION_UNIQUE_NAME@ +#define __os_support_db_register __os_support_db_register@DB_VERSION_UNIQUE_NAME@ +#define __os_support_replication __os_support_replication@DB_VERSION_UNIQUE_NAME@ +#define __os_cpu_count __os_cpu_count@DB_VERSION_UNIQUE_NAME@ +#define __os_ctime __os_ctime@DB_VERSION_UNIQUE_NAME@ +#define __os_dirlist __os_dirlist@DB_VERSION_UNIQUE_NAME@ +#define __os_dirfree __os_dirfree@DB_VERSION_UNIQUE_NAME@ +#define __os_get_errno_ret_zero __os_get_errno_ret_zero@DB_VERSION_UNIQUE_NAME@ +#define __os_get_errno __os_get_errno@DB_VERSION_UNIQUE_NAME@ +#define __os_get_neterr __os_get_neterr@DB_VERSION_UNIQUE_NAME@ +#define __os_get_syserr __os_get_syserr@DB_VERSION_UNIQUE_NAME@ +#define __os_set_errno __os_set_errno@DB_VERSION_UNIQUE_NAME@ +#define __os_strerror __os_strerror@DB_VERSION_UNIQUE_NAME@ +#define __os_posix_err __os_posix_err@DB_VERSION_UNIQUE_NAME@ +#define __os_fileid __os_fileid@DB_VERSION_UNIQUE_NAME@ +#define __os_fdlock __os_fdlock@DB_VERSION_UNIQUE_NAME@ +#define __os_fsync __os_fsync@DB_VERSION_UNIQUE_NAME@ +#define __os_getenv __os_getenv@DB_VERSION_UNIQUE_NAME@ +#define __os_openhandle __os_openhandle@DB_VERSION_UNIQUE_NAME@ +#define __os_closehandle __os_closehandle@DB_VERSION_UNIQUE_NAME@ +#define __os_attach __os_attach@DB_VERSION_UNIQUE_NAME@ +#define __os_detach __os_detach@DB_VERSION_UNIQUE_NAME@ +#define __os_mapfile __os_mapfile@DB_VERSION_UNIQUE_NAME@ +#define __os_unmapfile __os_unmapfile@DB_VERSION_UNIQUE_NAME@ +#define __os_mkdir __os_mkdir@DB_VERSION_UNIQUE_NAME@ +#define __os_open __os_open@DB_VERSION_UNIQUE_NAME@ +#define __os_id __os_id@DB_VERSION_UNIQUE_NAME@ +#define __os_rename __os_rename@DB_VERSION_UNIQUE_NAME@ +#define __os_isroot __os_isroot@DB_VERSION_UNIQUE_NAME@ +#define __db_rpath __db_rpath@DB_VERSION_UNIQUE_NAME@ +#define __os_io __os_io@DB_VERSION_UNIQUE_NAME@ +#define __os_read __os_read@DB_VERSION_UNIQUE_NAME@ +#define __os_write __os_write@DB_VERSION_UNIQUE_NAME@ +#define __os_physwrite __os_physwrite@DB_VERSION_UNIQUE_NAME@ +#define __os_seek __os_seek@DB_VERSION_UNIQUE_NAME@ +#define __os_stack __os_stack@DB_VERSION_UNIQUE_NAME@ +#define __os_exists __os_exists@DB_VERSION_UNIQUE_NAME@ +#define __os_ioinfo __os_ioinfo@DB_VERSION_UNIQUE_NAME@ +#define __os_tmpdir __os_tmpdir@DB_VERSION_UNIQUE_NAME@ +#define __os_truncate __os_truncate@DB_VERSION_UNIQUE_NAME@ +#define __os_unique_id __os_unique_id@DB_VERSION_UNIQUE_NAME@ +#define __os_unlink __os_unlink@DB_VERSION_UNIQUE_NAME@ +#define __os_yield __os_yield@DB_VERSION_UNIQUE_NAME@ +#ifdef HAVE_QNX +#define __os_qnx_region_open __os_qnx_region_open@DB_VERSION_UNIQUE_NAME@ +#endif +#define __os_is_winnt __os_is_winnt@DB_VERSION_UNIQUE_NAME@ +#define __os_cpu_count __os_cpu_count@DB_VERSION_UNIQUE_NAME@ +#ifdef HAVE_REPLICATION_THREADS +#define __os_get_neterr __os_get_neterr@DB_VERSION_UNIQUE_NAME@ +#endif +#define __qam_position __qam_position@DB_VERSION_UNIQUE_NAME@ +#define __qam_pitem __qam_pitem@DB_VERSION_UNIQUE_NAME@ +#define __qam_append __qam_append@DB_VERSION_UNIQUE_NAME@ +#define __qamc_dup __qamc_dup@DB_VERSION_UNIQUE_NAME@ +#define __qamc_init __qamc_init@DB_VERSION_UNIQUE_NAME@ +#define __qam_truncate __qam_truncate@DB_VERSION_UNIQUE_NAME@ +#define __qam_delete __qam_delete@DB_VERSION_UNIQUE_NAME@ +#define __qam_incfirst_desc __qam_incfirst_desc@DB_VERSION_UNIQUE_NAME@ +#define __qam_mvptr_desc __qam_mvptr_desc@DB_VERSION_UNIQUE_NAME@ +#define __qam_del_desc __qam_del_desc@DB_VERSION_UNIQUE_NAME@ +#define __qam_add_desc __qam_add_desc@DB_VERSION_UNIQUE_NAME@ +#define __qam_delext_desc __qam_delext_desc@DB_VERSION_UNIQUE_NAME@ +#define __qam_init_recover __qam_init_recover@DB_VERSION_UNIQUE_NAME@ +#define __qam_incfirst_print __qam_incfirst_print@DB_VERSION_UNIQUE_NAME@ +#define __qam_mvptr_print __qam_mvptr_print@DB_VERSION_UNIQUE_NAME@ +#define __qam_del_print __qam_del_print@DB_VERSION_UNIQUE_NAME@ +#define __qam_add_print __qam_add_print@DB_VERSION_UNIQUE_NAME@ +#define __qam_delext_print __qam_delext_print@DB_VERSION_UNIQUE_NAME@ +#define __qam_init_print __qam_init_print@DB_VERSION_UNIQUE_NAME@ +#define __qam_mswap __qam_mswap@DB_VERSION_UNIQUE_NAME@ +#define __qam_pgin_out __qam_pgin_out@DB_VERSION_UNIQUE_NAME@ +#define __qam_fprobe __qam_fprobe@DB_VERSION_UNIQUE_NAME@ +#define __qam_fclose __qam_fclose@DB_VERSION_UNIQUE_NAME@ +#define __qam_fremove __qam_fremove@DB_VERSION_UNIQUE_NAME@ +#define __qam_sync __qam_sync@DB_VERSION_UNIQUE_NAME@ +#define __qam_gen_filelist __qam_gen_filelist@DB_VERSION_UNIQUE_NAME@ +#define __qam_extent_names __qam_extent_names@DB_VERSION_UNIQUE_NAME@ +#define __qam_exid __qam_exid@DB_VERSION_UNIQUE_NAME@ +#define __qam_nameop __qam_nameop@DB_VERSION_UNIQUE_NAME@ +#define __qam_lsn_reset __qam_lsn_reset@DB_VERSION_UNIQUE_NAME@ +#define __qam_db_create __qam_db_create@DB_VERSION_UNIQUE_NAME@ +#define __qam_db_close __qam_db_close@DB_VERSION_UNIQUE_NAME@ +#define __qam_get_extentsize __qam_get_extentsize@DB_VERSION_UNIQUE_NAME@ +#define __queue_pageinfo __queue_pageinfo@DB_VERSION_UNIQUE_NAME@ +#define __db_prqueue __db_prqueue@DB_VERSION_UNIQUE_NAME@ +#define __qam_remove __qam_remove@DB_VERSION_UNIQUE_NAME@ +#define __qam_rename __qam_rename@DB_VERSION_UNIQUE_NAME@ +#define __qam_map_flags __qam_map_flags@DB_VERSION_UNIQUE_NAME@ +#define __qam_set_flags __qam_set_flags@DB_VERSION_UNIQUE_NAME@ +#define __qam_open __qam_open@DB_VERSION_UNIQUE_NAME@ +#define __qam_set_ext_data __qam_set_ext_data@DB_VERSION_UNIQUE_NAME@ +#define __qam_metachk __qam_metachk@DB_VERSION_UNIQUE_NAME@ +#define __qam_new_file __qam_new_file@DB_VERSION_UNIQUE_NAME@ +#define __qam_incfirst_recover __qam_incfirst_recover@DB_VERSION_UNIQUE_NAME@ +#define __qam_mvptr_recover __qam_mvptr_recover@DB_VERSION_UNIQUE_NAME@ +#define __qam_del_recover __qam_del_recover@DB_VERSION_UNIQUE_NAME@ +#define __qam_delext_recover __qam_delext_recover@DB_VERSION_UNIQUE_NAME@ +#define __qam_add_recover __qam_add_recover@DB_VERSION_UNIQUE_NAME@ +#define __qam_stat __qam_stat@DB_VERSION_UNIQUE_NAME@ +#define __qam_stat_print __qam_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __db_no_queue_am __db_no_queue_am@DB_VERSION_UNIQUE_NAME@ +#define __qam_31_qammeta __qam_31_qammeta@DB_VERSION_UNIQUE_NAME@ +#define __qam_32_qammeta __qam_32_qammeta@DB_VERSION_UNIQUE_NAME@ +#define __qam_vrfy_meta __qam_vrfy_meta@DB_VERSION_UNIQUE_NAME@ +#define __qam_meta2pgset __qam_meta2pgset@DB_VERSION_UNIQUE_NAME@ +#define __qam_vrfy_data __qam_vrfy_data@DB_VERSION_UNIQUE_NAME@ +#define __qam_vrfy_structure __qam_vrfy_structure@DB_VERSION_UNIQUE_NAME@ +#define __qam_vrfy_walkqueue __qam_vrfy_walkqueue@DB_VERSION_UNIQUE_NAME@ +#define __qam_salvage __qam_salvage@DB_VERSION_UNIQUE_NAME@ +#define __rep_bulk_marshal __rep_bulk_marshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_bulk_unmarshal __rep_bulk_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_control_marshal __rep_control_marshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_control_unmarshal __rep_control_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_egen_marshal __rep_egen_marshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_egen_unmarshal __rep_egen_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_fileinfo_marshal __rep_fileinfo_marshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_fileinfo_unmarshal __rep_fileinfo_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_grant_info_marshal __rep_grant_info_marshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_grant_info_unmarshal __rep_grant_info_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_logreq_marshal __rep_logreq_marshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_logreq_unmarshal __rep_logreq_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_newfile_marshal __rep_newfile_marshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_newfile_unmarshal __rep_newfile_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_update_marshal __rep_update_marshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_update_unmarshal __rep_update_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_vote_info_marshal __rep_vote_info_marshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_vote_info_unmarshal __rep_vote_info_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_vote_info_v5_marshal __rep_vote_info_v5_marshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_vote_info_v5_unmarshal __rep_vote_info_v5_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_lsn_hist_key_marshal __rep_lsn_hist_key_marshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_lsn_hist_key_unmarshal __rep_lsn_hist_key_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_lsn_hist_data_marshal __rep_lsn_hist_data_marshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_lsn_hist_data_unmarshal __rep_lsn_hist_data_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __rep_update_req __rep_update_req@DB_VERSION_UNIQUE_NAME@ +#define __rep_page_req __rep_page_req@DB_VERSION_UNIQUE_NAME@ +#define __rep_update_setup __rep_update_setup@DB_VERSION_UNIQUE_NAME@ +#define __rep_bulk_page __rep_bulk_page@DB_VERSION_UNIQUE_NAME@ +#define __rep_page __rep_page@DB_VERSION_UNIQUE_NAME@ +#define __rep_init_cleanup __rep_init_cleanup@DB_VERSION_UNIQUE_NAME@ +#define __rep_pggap_req __rep_pggap_req@DB_VERSION_UNIQUE_NAME@ +#define __rep_finfo_alloc __rep_finfo_alloc@DB_VERSION_UNIQUE_NAME@ +#define __rep_remove_init_file __rep_remove_init_file@DB_VERSION_UNIQUE_NAME@ +#define __rep_reset_init __rep_reset_init@DB_VERSION_UNIQUE_NAME@ +#define __rep_elect_pp __rep_elect_pp@DB_VERSION_UNIQUE_NAME@ +#define __rep_elect_int __rep_elect_int@DB_VERSION_UNIQUE_NAME@ +#define __rep_vote1 __rep_vote1@DB_VERSION_UNIQUE_NAME@ +#define __rep_vote2 __rep_vote2@DB_VERSION_UNIQUE_NAME@ +#define __rep_update_grant __rep_update_grant@DB_VERSION_UNIQUE_NAME@ +#define __rep_islease_granted __rep_islease_granted@DB_VERSION_UNIQUE_NAME@ +#define __rep_lease_table_alloc __rep_lease_table_alloc@DB_VERSION_UNIQUE_NAME@ +#define __rep_lease_grant __rep_lease_grant@DB_VERSION_UNIQUE_NAME@ +#define __rep_lease_check __rep_lease_check@DB_VERSION_UNIQUE_NAME@ +#define __rep_lease_refresh __rep_lease_refresh@DB_VERSION_UNIQUE_NAME@ +#define __rep_lease_expire __rep_lease_expire@DB_VERSION_UNIQUE_NAME@ +#define __rep_lease_waittime __rep_lease_waittime@DB_VERSION_UNIQUE_NAME@ +#define __rep_allreq __rep_allreq@DB_VERSION_UNIQUE_NAME@ +#define __rep_log __rep_log@DB_VERSION_UNIQUE_NAME@ +#define __rep_bulk_log __rep_bulk_log@DB_VERSION_UNIQUE_NAME@ +#define __rep_logreq __rep_logreq@DB_VERSION_UNIQUE_NAME@ +#define __rep_loggap_req __rep_loggap_req@DB_VERSION_UNIQUE_NAME@ +#define __rep_logready __rep_logready@DB_VERSION_UNIQUE_NAME@ +#define __rep_env_create __rep_env_create@DB_VERSION_UNIQUE_NAME@ +#define __rep_env_destroy __rep_env_destroy@DB_VERSION_UNIQUE_NAME@ +#define __rep_get_config __rep_get_config@DB_VERSION_UNIQUE_NAME@ +#define __rep_set_config __rep_set_config@DB_VERSION_UNIQUE_NAME@ +#define __rep_start_pp __rep_start_pp@DB_VERSION_UNIQUE_NAME@ +#define __rep_start_int __rep_start_int@DB_VERSION_UNIQUE_NAME@ +#define __rep_open_sysdb __rep_open_sysdb@DB_VERSION_UNIQUE_NAME@ +#define __rep_client_dbinit __rep_client_dbinit@DB_VERSION_UNIQUE_NAME@ +#define __rep_get_limit __rep_get_limit@DB_VERSION_UNIQUE_NAME@ +#define __rep_set_limit __rep_set_limit@DB_VERSION_UNIQUE_NAME@ +#define __rep_set_nsites_pp __rep_set_nsites_pp@DB_VERSION_UNIQUE_NAME@ +#define __rep_set_nsites_int __rep_set_nsites_int@DB_VERSION_UNIQUE_NAME@ +#define __rep_get_nsites __rep_get_nsites@DB_VERSION_UNIQUE_NAME@ +#define __rep_set_priority __rep_set_priority@DB_VERSION_UNIQUE_NAME@ +#define __rep_get_priority __rep_get_priority@DB_VERSION_UNIQUE_NAME@ +#define __rep_set_timeout __rep_set_timeout@DB_VERSION_UNIQUE_NAME@ +#define __rep_get_timeout __rep_get_timeout@DB_VERSION_UNIQUE_NAME@ +#define __rep_get_request __rep_get_request@DB_VERSION_UNIQUE_NAME@ +#define __rep_set_request __rep_set_request@DB_VERSION_UNIQUE_NAME@ +#define __rep_set_transport_pp __rep_set_transport_pp@DB_VERSION_UNIQUE_NAME@ +#define __rep_set_transport_int __rep_set_transport_int@DB_VERSION_UNIQUE_NAME@ +#define __rep_get_clockskew __rep_get_clockskew@DB_VERSION_UNIQUE_NAME@ +#define __rep_set_clockskew __rep_set_clockskew@DB_VERSION_UNIQUE_NAME@ +#define __rep_flush __rep_flush@DB_VERSION_UNIQUE_NAME@ +#define __rep_sync __rep_sync@DB_VERSION_UNIQUE_NAME@ +#define __rep_txn_applied __rep_txn_applied@DB_VERSION_UNIQUE_NAME@ +#define __rep_process_message_pp __rep_process_message_pp@DB_VERSION_UNIQUE_NAME@ +#define __rep_process_message_int __rep_process_message_int@DB_VERSION_UNIQUE_NAME@ +#define __rep_apply __rep_apply@DB_VERSION_UNIQUE_NAME@ +#define __rep_process_txn __rep_process_txn@DB_VERSION_UNIQUE_NAME@ +#define __rep_resend_req __rep_resend_req@DB_VERSION_UNIQUE_NAME@ +#define __rep_check_doreq __rep_check_doreq@DB_VERSION_UNIQUE_NAME@ +#define __rep_check_missing __rep_check_missing@DB_VERSION_UNIQUE_NAME@ +#define __rep_open __rep_open@DB_VERSION_UNIQUE_NAME@ +#define __rep_close_diagfiles __rep_close_diagfiles@DB_VERSION_UNIQUE_NAME@ +#define __rep_env_refresh __rep_env_refresh@DB_VERSION_UNIQUE_NAME@ +#define __rep_env_close __rep_env_close@DB_VERSION_UNIQUE_NAME@ +#define __rep_preclose __rep_preclose@DB_VERSION_UNIQUE_NAME@ +#define __rep_closefiles __rep_closefiles@DB_VERSION_UNIQUE_NAME@ +#define __rep_write_egen __rep_write_egen@DB_VERSION_UNIQUE_NAME@ +#define __rep_write_gen __rep_write_gen@DB_VERSION_UNIQUE_NAME@ +#define __rep_stat_pp __rep_stat_pp@DB_VERSION_UNIQUE_NAME@ +#define __rep_stat_print_pp __rep_stat_print_pp@DB_VERSION_UNIQUE_NAME@ +#define __rep_stat_print __rep_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __rep_bulk_message __rep_bulk_message@DB_VERSION_UNIQUE_NAME@ +#define __rep_send_bulk __rep_send_bulk@DB_VERSION_UNIQUE_NAME@ +#define __rep_bulk_alloc __rep_bulk_alloc@DB_VERSION_UNIQUE_NAME@ +#define __rep_bulk_free __rep_bulk_free@DB_VERSION_UNIQUE_NAME@ +#define __rep_send_message __rep_send_message@DB_VERSION_UNIQUE_NAME@ +#define __rep_new_master __rep_new_master@DB_VERSION_UNIQUE_NAME@ +#define __rep_elect_done __rep_elect_done@DB_VERSION_UNIQUE_NAME@ +#define __env_rep_enter __env_rep_enter@DB_VERSION_UNIQUE_NAME@ +#define __env_db_rep_exit __env_db_rep_exit@DB_VERSION_UNIQUE_NAME@ +#define __db_rep_enter __db_rep_enter@DB_VERSION_UNIQUE_NAME@ +#define __op_handle_enter __op_handle_enter@DB_VERSION_UNIQUE_NAME@ +#define __op_rep_enter __op_rep_enter@DB_VERSION_UNIQUE_NAME@ +#define __op_rep_exit __op_rep_exit@DB_VERSION_UNIQUE_NAME@ +#define __archive_rep_enter __archive_rep_enter@DB_VERSION_UNIQUE_NAME@ +#define __archive_rep_exit __archive_rep_exit@DB_VERSION_UNIQUE_NAME@ +#define __rep_lockout_archive __rep_lockout_archive@DB_VERSION_UNIQUE_NAME@ +#define __rep_lockout_api __rep_lockout_api@DB_VERSION_UNIQUE_NAME@ +#define __rep_take_apilockout __rep_take_apilockout@DB_VERSION_UNIQUE_NAME@ +#define __rep_clear_apilockout __rep_clear_apilockout@DB_VERSION_UNIQUE_NAME@ +#define __rep_lockout_apply __rep_lockout_apply@DB_VERSION_UNIQUE_NAME@ +#define __rep_lockout_msg __rep_lockout_msg@DB_VERSION_UNIQUE_NAME@ +#define __rep_send_throttle __rep_send_throttle@DB_VERSION_UNIQUE_NAME@ +#define __rep_msg_to_old __rep_msg_to_old@DB_VERSION_UNIQUE_NAME@ +#define __rep_msg_from_old __rep_msg_from_old@DB_VERSION_UNIQUE_NAME@ +#define __rep_print_system __rep_print_system@DB_VERSION_UNIQUE_NAME@ +#define __rep_print __rep_print@DB_VERSION_UNIQUE_NAME@ +#define __rep_print_message __rep_print_message@DB_VERSION_UNIQUE_NAME@ +#define __rep_fire_event __rep_fire_event@DB_VERSION_UNIQUE_NAME@ +#define __rep_msg __rep_msg@DB_VERSION_UNIQUE_NAME@ +#define __rep_notify_threads __rep_notify_threads@DB_VERSION_UNIQUE_NAME@ +#define __rep_check_goal __rep_check_goal@DB_VERSION_UNIQUE_NAME@ +#define __rep_log_backup __rep_log_backup@DB_VERSION_UNIQUE_NAME@ +#define __rep_get_maxpermlsn __rep_get_maxpermlsn@DB_VERSION_UNIQUE_NAME@ +#define __rep_is_internal_rep_file __rep_is_internal_rep_file@DB_VERSION_UNIQUE_NAME@ +#define __rep_get_datagen __rep_get_datagen@DB_VERSION_UNIQUE_NAME@ +#define __rep_verify __rep_verify@DB_VERSION_UNIQUE_NAME@ +#define __rep_verify_fail __rep_verify_fail@DB_VERSION_UNIQUE_NAME@ +#define __rep_verify_req __rep_verify_req@DB_VERSION_UNIQUE_NAME@ +#define __rep_dorecovery __rep_dorecovery@DB_VERSION_UNIQUE_NAME@ +#define __rep_verify_match __rep_verify_match@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_member_desc __repmgr_member_desc@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_init_recover __repmgr_init_recover@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_handshake_marshal __repmgr_handshake_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_handshake_unmarshal __repmgr_handshake_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_v3handshake_marshal __repmgr_v3handshake_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_v3handshake_unmarshal __repmgr_v3handshake_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_v2handshake_marshal __repmgr_v2handshake_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_v2handshake_unmarshal __repmgr_v2handshake_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_parm_refresh_marshal __repmgr_parm_refresh_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_parm_refresh_unmarshal __repmgr_parm_refresh_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_permlsn_marshal __repmgr_permlsn_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_permlsn_unmarshal __repmgr_permlsn_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_version_proposal_marshal __repmgr_version_proposal_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_version_proposal_unmarshal __repmgr_version_proposal_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_version_confirmation_marshal __repmgr_version_confirmation_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_version_confirmation_unmarshal __repmgr_version_confirmation_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_msg_hdr_marshal __repmgr_msg_hdr_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_msg_hdr_unmarshal __repmgr_msg_hdr_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_msg_metadata_marshal __repmgr_msg_metadata_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_msg_metadata_unmarshal __repmgr_msg_metadata_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_membership_key_marshal __repmgr_membership_key_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_membership_key_unmarshal __repmgr_membership_key_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_membership_data_marshal __repmgr_membership_data_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_membership_data_unmarshal __repmgr_membership_data_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_member_metadata_marshal __repmgr_member_metadata_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_member_metadata_unmarshal __repmgr_member_metadata_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_gm_fwd_marshal __repmgr_gm_fwd_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_gm_fwd_unmarshal __repmgr_gm_fwd_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_membr_vers_marshal __repmgr_membr_vers_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_membr_vers_unmarshal __repmgr_membr_vers_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_site_info_marshal __repmgr_site_info_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_site_info_unmarshal __repmgr_site_info_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_connect_reject_marshal __repmgr_connect_reject_marshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_connect_reject_unmarshal __repmgr_connect_reject_unmarshal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_member_print __repmgr_member_print@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_init_print __repmgr_init_print@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_init_election __repmgr_init_election@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_claim_victory __repmgr_claim_victory@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_turn_on_elections __repmgr_turn_on_elections@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_start __repmgr_start@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_valid_config __repmgr_valid_config@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_autostart __repmgr_autostart@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_start_selector __repmgr_start_selector@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_close __repmgr_close@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_set_ack_policy __repmgr_set_ack_policy@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_get_ack_policy __repmgr_get_ack_policy@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_env_create __repmgr_env_create@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_env_destroy __repmgr_env_destroy@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_stop_threads __repmgr_stop_threads@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_local_site __repmgr_local_site@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_channel __repmgr_channel@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_set_msg_dispatch __repmgr_set_msg_dispatch@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_send_msg __repmgr_send_msg@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_send_request __repmgr_send_request@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_send_response __repmgr_send_response@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_channel_close __repmgr_channel_close@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_channel_timeout __repmgr_channel_timeout@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_send_request_inval __repmgr_send_request_inval@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_channel_close_inval __repmgr_channel_close_inval@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_channel_timeout_inval __repmgr_channel_timeout_inval@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_join_group __repmgr_join_group@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_site __repmgr_site@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_site_by_eid __repmgr_site_by_eid@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_get_site_address __repmgr_get_site_address@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_get_eid __repmgr_get_eid@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_get_config __repmgr_get_config@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_site_config __repmgr_site_config@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_site_close __repmgr_site_close@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_msg_thread __repmgr_msg_thread@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_send_err_resp __repmgr_send_err_resp@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_handle_event __repmgr_handle_event@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_update_membership __repmgr_update_membership@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_set_gm_version __repmgr_set_gm_version@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_setup_gmdb_op __repmgr_setup_gmdb_op@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_cleanup_gmdb_op __repmgr_cleanup_gmdb_op@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_hold_master_role __repmgr_hold_master_role@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_rlse_master_role __repmgr_rlse_master_role@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_set_sites __repmgr_set_sites@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_connect __repmgr_connect@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_send __repmgr_send@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_sync_siteaddr __repmgr_sync_siteaddr@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_send_broadcast __repmgr_send_broadcast@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_send_one __repmgr_send_one@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_send_many __repmgr_send_many@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_send_own_msg __repmgr_send_own_msg@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_write_iovecs __repmgr_write_iovecs@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_bust_connection __repmgr_bust_connection@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_disable_connection __repmgr_disable_connection@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_cleanup_defunct __repmgr_cleanup_defunct@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_close_connection __repmgr_close_connection@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_decr_conn_ref __repmgr_decr_conn_ref@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_destroy_conn __repmgr_destroy_conn@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_pack_netaddr __repmgr_pack_netaddr@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_getaddr __repmgr_getaddr@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_listen __repmgr_listen@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_net_close __repmgr_net_close@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_net_destroy __repmgr_net_destroy@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_thread_start __repmgr_thread_start@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_thread_join __repmgr_thread_join@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_set_nonblock_conn __repmgr_set_nonblock_conn@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_set_nonblocking __repmgr_set_nonblocking@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_wake_waiters __repmgr_wake_waiters@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_await_cond __repmgr_await_cond@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_await_gmdbop __repmgr_await_gmdbop@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_compute_wait_deadline __repmgr_compute_wait_deadline@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_await_drain __repmgr_await_drain@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_alloc_cond __repmgr_alloc_cond@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_free_cond __repmgr_free_cond@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_env_create_pf __repmgr_env_create_pf@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_create_mutex_pf __repmgr_create_mutex_pf@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_destroy_mutex_pf __repmgr_destroy_mutex_pf@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_init __repmgr_init@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_deinit __repmgr_deinit@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_init_waiters __repmgr_init_waiters@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_destroy_waiters __repmgr_destroy_waiters@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_lock_mutex __repmgr_lock_mutex@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_unlock_mutex __repmgr_unlock_mutex@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_signal __repmgr_signal@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_wake_msngers __repmgr_wake_msngers@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_wake_main_thread __repmgr_wake_main_thread@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_writev __repmgr_writev@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_readv __repmgr_readv@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_select_loop __repmgr_select_loop@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_queue_destroy __repmgr_queue_destroy@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_queue_get __repmgr_queue_get@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_queue_put __repmgr_queue_put@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_queue_size __repmgr_queue_size@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_member_recover __repmgr_member_recover@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_select_thread __repmgr_select_thread@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_bow_out __repmgr_bow_out@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_accept __repmgr_accept@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_compute_timeout __repmgr_compute_timeout@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_master_connection __repmgr_master_connection@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_check_timeouts __repmgr_check_timeouts@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_first_try_connections __repmgr_first_try_connections@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_send_v1_handshake __repmgr_send_v1_handshake@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_read_from_site __repmgr_read_from_site@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_read_conn __repmgr_read_conn@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_prepare_simple_input __repmgr_prepare_simple_input@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_send_handshake __repmgr_send_handshake@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_find_version_info __repmgr_find_version_info@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_write_some __repmgr_write_some@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_stat_pp __repmgr_stat_pp@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_stat_print_pp __repmgr_stat_print_pp@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_stat_print __repmgr_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_site_list __repmgr_site_list@DB_VERSION_UNIQUE_NAME@ +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_close __repmgr_close@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_get_ack_policy __repmgr_get_ack_policy@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_set_ack_policy __repmgr_set_ack_policy@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_site __repmgr_site@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_site_by_eid __repmgr_site_by_eid@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_local_site __repmgr_local_site@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_site_list __repmgr_site_list@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_start __repmgr_start@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_stat_pp __repmgr_stat_pp@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_stat_print_pp __repmgr_stat_print_pp@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_handle_event __repmgr_handle_event@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_channel __repmgr_channel@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_set_msg_dispatch __repmgr_set_msg_dispatch@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_REPLICATION_THREADS +#define __repmgr_init_recover __repmgr_init_recover@DB_VERSION_UNIQUE_NAME@ +#endif +#define __repmgr_schedule_connection_attempt __repmgr_schedule_connection_attempt@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_reset_for_reading __repmgr_reset_for_reading@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_new_connection __repmgr_new_connection@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_new_site __repmgr_new_site@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_create_mutex __repmgr_create_mutex@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_destroy_mutex __repmgr_destroy_mutex@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_cleanup_netaddr __repmgr_cleanup_netaddr@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_iovec_init __repmgr_iovec_init@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_add_buffer __repmgr_add_buffer@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_add_dbt __repmgr_add_dbt@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_update_consumed __repmgr_update_consumed@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_prepare_my_addr __repmgr_prepare_my_addr@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_get_nsites __repmgr_get_nsites@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_thread_failure __repmgr_thread_failure@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_format_eid_loc __repmgr_format_eid_loc@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_format_site_loc __repmgr_format_site_loc@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_format_addr_loc __repmgr_format_addr_loc@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_repstart __repmgr_repstart@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_become_master __repmgr_become_master@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_each_connection __repmgr_each_connection@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_open __repmgr_open@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_join __repmgr_join@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_env_refresh __repmgr_env_refresh@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_share_netaddrs __repmgr_share_netaddrs@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_copy_in_added_sites __repmgr_copy_in_added_sites@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_init_new_sites __repmgr_init_new_sites@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_failchk __repmgr_failchk@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_master_is_known __repmgr_master_is_known@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_stable_lsn __repmgr_stable_lsn@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_send_sync_msg __repmgr_send_sync_msg@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_marshal_member_list __repmgr_marshal_member_list@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_refresh_membership __repmgr_refresh_membership@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_reload_gmdb __repmgr_reload_gmdb@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_gmdb_version_cmp __repmgr_gmdb_version_cmp@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_init_save __repmgr_init_save@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_init_restore __repmgr_init_restore@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_defer_op __repmgr_defer_op@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_fire_conn_err_event __repmgr_fire_conn_err_event@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_print_conn_err __repmgr_print_conn_err@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_become_client __repmgr_become_client@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_lookup_site __repmgr_lookup_site@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_find_site __repmgr_find_site@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_set_membership __repmgr_set_membership@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_bcast_parm_refresh __repmgr_bcast_parm_refresh@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_chg_prio __repmgr_chg_prio@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_bcast_own_msg __repmgr_bcast_own_msg@DB_VERSION_UNIQUE_NAME@ +#define __seq_stat __seq_stat@DB_VERSION_UNIQUE_NAME@ +#define __seq_stat_print __seq_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __db_get_seq_flags_fn __db_get_seq_flags_fn@DB_VERSION_UNIQUE_NAME@ +#define __db_get_seq_flags_fn __db_get_seq_flags_fn@DB_VERSION_UNIQUE_NAME@ +#define bdb_HCommand bdb_HCommand@DB_VERSION_UNIQUE_NAME@ +#if DB_DBM_HSEARCH != 0 +#define bdb_NdbmOpen bdb_NdbmOpen@DB_VERSION_UNIQUE_NAME@ +#endif +#if DB_DBM_HSEARCH != 0 +#define bdb_DbmCommand bdb_DbmCommand@DB_VERSION_UNIQUE_NAME@ +#endif +#define ndbm_Cmd ndbm_Cmd@DB_VERSION_UNIQUE_NAME@ +#define _DbInfoDelete _DbInfoDelete@DB_VERSION_UNIQUE_NAME@ +#define db_Cmd db_Cmd@DB_VERSION_UNIQUE_NAME@ +#define tcl_CompactStat tcl_CompactStat@DB_VERSION_UNIQUE_NAME@ +#define tcl_rep_send tcl_rep_send@DB_VERSION_UNIQUE_NAME@ +#define dbc_Cmd dbc_Cmd@DB_VERSION_UNIQUE_NAME@ +#define env_Cmd env_Cmd@DB_VERSION_UNIQUE_NAME@ +#define tcl_EnvRemove tcl_EnvRemove@DB_VERSION_UNIQUE_NAME@ +#define tcl_EnvIdReset tcl_EnvIdReset@DB_VERSION_UNIQUE_NAME@ +#define tcl_EnvLsnReset tcl_EnvLsnReset@DB_VERSION_UNIQUE_NAME@ +#define tcl_EnvVerbose tcl_EnvVerbose@DB_VERSION_UNIQUE_NAME@ +#define tcl_EnvAttr tcl_EnvAttr@DB_VERSION_UNIQUE_NAME@ +#define tcl_EnvSetFlags tcl_EnvSetFlags@DB_VERSION_UNIQUE_NAME@ +#define tcl_EnvTest tcl_EnvTest@DB_VERSION_UNIQUE_NAME@ +#define tcl_EnvGetEncryptFlags tcl_EnvGetEncryptFlags@DB_VERSION_UNIQUE_NAME@ +#define tcl_EnvSetErrfile tcl_EnvSetErrfile@DB_VERSION_UNIQUE_NAME@ +#define tcl_EnvSetMsgfile tcl_EnvSetMsgfile@DB_VERSION_UNIQUE_NAME@ +#define tcl_EnvSetErrpfx tcl_EnvSetErrpfx@DB_VERSION_UNIQUE_NAME@ +#define tcl_EnvStatPrint tcl_EnvStatPrint@DB_VERSION_UNIQUE_NAME@ +#define _NewInfo _NewInfo@DB_VERSION_UNIQUE_NAME@ +#define _NameToPtr _NameToPtr@DB_VERSION_UNIQUE_NAME@ +#define _PtrToInfo _PtrToInfo@DB_VERSION_UNIQUE_NAME@ +#define _NameToInfo _NameToInfo@DB_VERSION_UNIQUE_NAME@ +#define _SetInfoData _SetInfoData@DB_VERSION_UNIQUE_NAME@ +#define _DeleteInfo _DeleteInfo@DB_VERSION_UNIQUE_NAME@ +#define _SetListElem _SetListElem@DB_VERSION_UNIQUE_NAME@ +#define _SetListElemInt _SetListElemInt@DB_VERSION_UNIQUE_NAME@ +#define _SetListElemWideInt _SetListElemWideInt@DB_VERSION_UNIQUE_NAME@ +#define _SetListRecnoElem _SetListRecnoElem@DB_VERSION_UNIQUE_NAME@ +#define _SetListHeapElem _SetListHeapElem@DB_VERSION_UNIQUE_NAME@ +#define _Set3DBTList _Set3DBTList@DB_VERSION_UNIQUE_NAME@ +#define _SetMultiList _SetMultiList@DB_VERSION_UNIQUE_NAME@ +#define _GetGlobPrefix _GetGlobPrefix@DB_VERSION_UNIQUE_NAME@ +#define _ReturnSetup _ReturnSetup@DB_VERSION_UNIQUE_NAME@ +#define _ErrorSetup _ErrorSetup@DB_VERSION_UNIQUE_NAME@ +#define _ErrorFunc _ErrorFunc@DB_VERSION_UNIQUE_NAME@ +#ifdef CONFIG_TEST +#define _EventFunc _EventFunc@DB_VERSION_UNIQUE_NAME@ +#endif +#define _GetLsn _GetLsn@DB_VERSION_UNIQUE_NAME@ +#define _GetRid _GetRid@DB_VERSION_UNIQUE_NAME@ +#define _GetUInt32 _GetUInt32@DB_VERSION_UNIQUE_NAME@ +#define _GetFlagsList _GetFlagsList@DB_VERSION_UNIQUE_NAME@ +#define _debug_check _debug_check@DB_VERSION_UNIQUE_NAME@ +#define _CopyObjBytes _CopyObjBytes@DB_VERSION_UNIQUE_NAME@ +#define tcl_LockDetect tcl_LockDetect@DB_VERSION_UNIQUE_NAME@ +#define tcl_LockGet tcl_LockGet@DB_VERSION_UNIQUE_NAME@ +#define tcl_LockStat tcl_LockStat@DB_VERSION_UNIQUE_NAME@ +#define tcl_LockStatPrint tcl_LockStatPrint@DB_VERSION_UNIQUE_NAME@ +#define tcl_LockTimeout tcl_LockTimeout@DB_VERSION_UNIQUE_NAME@ +#define tcl_LockVec tcl_LockVec@DB_VERSION_UNIQUE_NAME@ +#define tcl_LogArchive tcl_LogArchive@DB_VERSION_UNIQUE_NAME@ +#define tcl_LogCompare tcl_LogCompare@DB_VERSION_UNIQUE_NAME@ +#define tcl_LogFile tcl_LogFile@DB_VERSION_UNIQUE_NAME@ +#define tcl_LogFlush tcl_LogFlush@DB_VERSION_UNIQUE_NAME@ +#define tcl_LogGet tcl_LogGet@DB_VERSION_UNIQUE_NAME@ +#define tcl_LogPut tcl_LogPut@DB_VERSION_UNIQUE_NAME@ +#define tcl_LogStat tcl_LogStat@DB_VERSION_UNIQUE_NAME@ +#define tcl_LogStatPrint tcl_LogStatPrint@DB_VERSION_UNIQUE_NAME@ +#define logc_Cmd logc_Cmd@DB_VERSION_UNIQUE_NAME@ +#define tcl_LogConfig tcl_LogConfig@DB_VERSION_UNIQUE_NAME@ +#define tcl_LogGetConfig tcl_LogGetConfig@DB_VERSION_UNIQUE_NAME@ +#define _MpInfoDelete _MpInfoDelete@DB_VERSION_UNIQUE_NAME@ +#define tcl_MpSync tcl_MpSync@DB_VERSION_UNIQUE_NAME@ +#define tcl_MpTrickle tcl_MpTrickle@DB_VERSION_UNIQUE_NAME@ +#define tcl_Mp tcl_Mp@DB_VERSION_UNIQUE_NAME@ +#define tcl_MpStat tcl_MpStat@DB_VERSION_UNIQUE_NAME@ +#define tcl_MpStatPrint tcl_MpStatPrint@DB_VERSION_UNIQUE_NAME@ +#define tcl_Mutex tcl_Mutex@DB_VERSION_UNIQUE_NAME@ +#define tcl_MutFree tcl_MutFree@DB_VERSION_UNIQUE_NAME@ +#define tcl_MutGet tcl_MutGet@DB_VERSION_UNIQUE_NAME@ +#define tcl_MutLock tcl_MutLock@DB_VERSION_UNIQUE_NAME@ +#define tcl_MutSet tcl_MutSet@DB_VERSION_UNIQUE_NAME@ +#define tcl_MutStat tcl_MutStat@DB_VERSION_UNIQUE_NAME@ +#define tcl_MutStatPrint tcl_MutStatPrint@DB_VERSION_UNIQUE_NAME@ +#define tcl_MutUnlock tcl_MutUnlock@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepConfig tcl_RepConfig@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepGetTwo tcl_RepGetTwo@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepGetConfig tcl_RepGetConfig@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepGetTimeout tcl_RepGetTimeout@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepGetAckPolicy tcl_RepGetAckPolicy@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepGetLocalSite tcl_RepGetLocalSite@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepElect tcl_RepElect@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepFlush tcl_RepFlush@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepSync tcl_RepSync@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepLease tcl_RepLease@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepInmemFiles tcl_RepInmemFiles@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepLimit tcl_RepLimit@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepNSites tcl_RepNSites@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepRequest tcl_RepRequest@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepNoarchiveTimeout tcl_RepNoarchiveTimeout@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepTransport tcl_RepTransport@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepStart tcl_RepStart@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepProcessMessage tcl_RepProcessMessage@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepStat tcl_RepStat@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepStatPrint tcl_RepStatPrint@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepMgr tcl_RepMgr@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepMgrSiteList tcl_RepMgrSiteList@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepMgrStat tcl_RepMgrStat@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepMgrStatPrint tcl_RepMgrStatPrint@DB_VERSION_UNIQUE_NAME@ +#define tcl_RepApplied tcl_RepApplied@DB_VERSION_UNIQUE_NAME@ +#define seq_Cmd seq_Cmd@DB_VERSION_UNIQUE_NAME@ +#define _TxnInfoDelete _TxnInfoDelete@DB_VERSION_UNIQUE_NAME@ +#define tcl_TxnCheckpoint tcl_TxnCheckpoint@DB_VERSION_UNIQUE_NAME@ +#define tcl_Txn tcl_Txn@DB_VERSION_UNIQUE_NAME@ +#define tcl_CDSGroup tcl_CDSGroup@DB_VERSION_UNIQUE_NAME@ +#define tcl_TxnStat tcl_TxnStat@DB_VERSION_UNIQUE_NAME@ +#define tcl_TxnStatPrint tcl_TxnStatPrint@DB_VERSION_UNIQUE_NAME@ +#define tcl_TxnTimeout tcl_TxnTimeout@DB_VERSION_UNIQUE_NAME@ +#define tcl_TxnRecover tcl_TxnRecover@DB_VERSION_UNIQUE_NAME@ +#define bdb_RandCommand bdb_RandCommand@DB_VERSION_UNIQUE_NAME@ +#define tcl_LockMutex tcl_LockMutex@DB_VERSION_UNIQUE_NAME@ +#define tcl_UnlockMutex tcl_UnlockMutex@DB_VERSION_UNIQUE_NAME@ +#define __txn_begin_pp __txn_begin_pp@DB_VERSION_UNIQUE_NAME@ +#define __txn_begin __txn_begin@DB_VERSION_UNIQUE_NAME@ +#define __txn_recycle_id __txn_recycle_id@DB_VERSION_UNIQUE_NAME@ +#define __txn_continue __txn_continue@DB_VERSION_UNIQUE_NAME@ +#define __txn_commit __txn_commit@DB_VERSION_UNIQUE_NAME@ +#define __txn_abort __txn_abort@DB_VERSION_UNIQUE_NAME@ +#define __txn_discard_int __txn_discard_int@DB_VERSION_UNIQUE_NAME@ +#define __txn_prepare __txn_prepare@DB_VERSION_UNIQUE_NAME@ +#define __txn_id __txn_id@DB_VERSION_UNIQUE_NAME@ +#define __txn_get_name __txn_get_name@DB_VERSION_UNIQUE_NAME@ +#define __txn_set_name __txn_set_name@DB_VERSION_UNIQUE_NAME@ +#define __txn_get_priority __txn_get_priority@DB_VERSION_UNIQUE_NAME@ +#define __txn_set_priority __txn_set_priority@DB_VERSION_UNIQUE_NAME@ +#define __txn_set_timeout __txn_set_timeout@DB_VERSION_UNIQUE_NAME@ +#define __txn_activekids __txn_activekids@DB_VERSION_UNIQUE_NAME@ +#define __txn_force_abort __txn_force_abort@DB_VERSION_UNIQUE_NAME@ +#define __txn_preclose __txn_preclose@DB_VERSION_UNIQUE_NAME@ +#define __txn_reset __txn_reset@DB_VERSION_UNIQUE_NAME@ +#define __txn_applied_pp __txn_applied_pp@DB_VERSION_UNIQUE_NAME@ +#define __txn_regop_42_desc __txn_regop_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __txn_regop_desc __txn_regop_desc@DB_VERSION_UNIQUE_NAME@ +#define __txn_ckp_42_desc __txn_ckp_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __txn_ckp_desc __txn_ckp_desc@DB_VERSION_UNIQUE_NAME@ +#define __txn_child_desc __txn_child_desc@DB_VERSION_UNIQUE_NAME@ +#define __txn_xa_regop_42_desc __txn_xa_regop_42_desc@DB_VERSION_UNIQUE_NAME@ +#define __txn_prepare_desc __txn_prepare_desc@DB_VERSION_UNIQUE_NAME@ +#define __txn_recycle_desc __txn_recycle_desc@DB_VERSION_UNIQUE_NAME@ +#define __txn_init_recover __txn_init_recover@DB_VERSION_UNIQUE_NAME@ +#define __txn_regop_42_print __txn_regop_42_print@DB_VERSION_UNIQUE_NAME@ +#define __txn_regop_print __txn_regop_print@DB_VERSION_UNIQUE_NAME@ +#define __txn_ckp_42_print __txn_ckp_42_print@DB_VERSION_UNIQUE_NAME@ +#define __txn_ckp_print __txn_ckp_print@DB_VERSION_UNIQUE_NAME@ +#define __txn_child_print __txn_child_print@DB_VERSION_UNIQUE_NAME@ +#define __txn_xa_regop_42_print __txn_xa_regop_42_print@DB_VERSION_UNIQUE_NAME@ +#define __txn_prepare_print __txn_prepare_print@DB_VERSION_UNIQUE_NAME@ +#define __txn_recycle_print __txn_recycle_print@DB_VERSION_UNIQUE_NAME@ +#define __txn_init_print __txn_init_print@DB_VERSION_UNIQUE_NAME@ +#define __txn_checkpoint_pp __txn_checkpoint_pp@DB_VERSION_UNIQUE_NAME@ +#define __txn_checkpoint __txn_checkpoint@DB_VERSION_UNIQUE_NAME@ +#define __txn_getactive __txn_getactive@DB_VERSION_UNIQUE_NAME@ +#define __txn_getckp __txn_getckp@DB_VERSION_UNIQUE_NAME@ +#define __txn_updateckp __txn_updateckp@DB_VERSION_UNIQUE_NAME@ +#define __txn_failchk __txn_failchk@DB_VERSION_UNIQUE_NAME@ +#define __txn_env_create __txn_env_create@DB_VERSION_UNIQUE_NAME@ +#define __txn_env_destroy __txn_env_destroy@DB_VERSION_UNIQUE_NAME@ +#define __txn_get_tx_max __txn_get_tx_max@DB_VERSION_UNIQUE_NAME@ +#define __txn_set_tx_max __txn_set_tx_max@DB_VERSION_UNIQUE_NAME@ +#define __txn_get_tx_timestamp __txn_get_tx_timestamp@DB_VERSION_UNIQUE_NAME@ +#define __txn_set_tx_timestamp __txn_set_tx_timestamp@DB_VERSION_UNIQUE_NAME@ +#define __txn_regop_recover __txn_regop_recover@DB_VERSION_UNIQUE_NAME@ +#define __txn_prepare_recover __txn_prepare_recover@DB_VERSION_UNIQUE_NAME@ +#define __txn_ckp_recover __txn_ckp_recover@DB_VERSION_UNIQUE_NAME@ +#define __txn_child_recover __txn_child_recover@DB_VERSION_UNIQUE_NAME@ +#define __txn_restore_txn __txn_restore_txn@DB_VERSION_UNIQUE_NAME@ +#define __txn_recycle_recover __txn_recycle_recover@DB_VERSION_UNIQUE_NAME@ +#define __txn_regop_42_recover __txn_regop_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __txn_ckp_42_recover __txn_ckp_42_recover@DB_VERSION_UNIQUE_NAME@ +#define __txn_recover_pp __txn_recover_pp@DB_VERSION_UNIQUE_NAME@ +#define __txn_recover __txn_recover@DB_VERSION_UNIQUE_NAME@ +#define __txn_get_prepared __txn_get_prepared@DB_VERSION_UNIQUE_NAME@ +#define __txn_openfiles __txn_openfiles@DB_VERSION_UNIQUE_NAME@ +#define __txn_open __txn_open@DB_VERSION_UNIQUE_NAME@ +#define __txn_findlastckp __txn_findlastckp@DB_VERSION_UNIQUE_NAME@ +#define __txn_env_refresh __txn_env_refresh@DB_VERSION_UNIQUE_NAME@ +#define __txn_region_mutex_count __txn_region_mutex_count@DB_VERSION_UNIQUE_NAME@ +#define __txn_region_mutex_max __txn_region_mutex_max@DB_VERSION_UNIQUE_NAME@ +#define __txn_region_size __txn_region_size@DB_VERSION_UNIQUE_NAME@ +#define __txn_region_max __txn_region_max@DB_VERSION_UNIQUE_NAME@ +#define __txn_id_set __txn_id_set@DB_VERSION_UNIQUE_NAME@ +#define __txn_oldest_reader __txn_oldest_reader@DB_VERSION_UNIQUE_NAME@ +#define __txn_add_buffer __txn_add_buffer@DB_VERSION_UNIQUE_NAME@ +#define __txn_remove_buffer __txn_remove_buffer@DB_VERSION_UNIQUE_NAME@ +#define __txn_stat_pp __txn_stat_pp@DB_VERSION_UNIQUE_NAME@ +#define __txn_stat_print_pp __txn_stat_print_pp@DB_VERSION_UNIQUE_NAME@ +#define __txn_stat_print __txn_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __txn_closeevent __txn_closeevent@DB_VERSION_UNIQUE_NAME@ +#define __txn_remevent __txn_remevent@DB_VERSION_UNIQUE_NAME@ +#define __txn_remrem __txn_remrem@DB_VERSION_UNIQUE_NAME@ +#define __txn_lockevent __txn_lockevent@DB_VERSION_UNIQUE_NAME@ +#define __txn_remlock __txn_remlock@DB_VERSION_UNIQUE_NAME@ +#define __txn_doevents __txn_doevents@DB_VERSION_UNIQUE_NAME@ +#define __txn_record_fname __txn_record_fname@DB_VERSION_UNIQUE_NAME@ +#define __txn_dref_fname __txn_dref_fname@DB_VERSION_UNIQUE_NAME@ +#define __txn_reset_fe_watermarks __txn_reset_fe_watermarks@DB_VERSION_UNIQUE_NAME@ +#define __txn_remove_fe_watermark __txn_remove_fe_watermark@DB_VERSION_UNIQUE_NAME@ +#define __txn_add_fe_watermark __txn_add_fe_watermark@DB_VERSION_UNIQUE_NAME@ +#define __txn_flush_fe_files __txn_flush_fe_files@DB_VERSION_UNIQUE_NAME@ +#define __txn_pg_above_fe_watermark __txn_pg_above_fe_watermark@DB_VERSION_UNIQUE_NAME@ +#define __db_rmid_to_env __db_rmid_to_env@DB_VERSION_UNIQUE_NAME@ +#define __db_xid_to_txn __db_xid_to_txn@DB_VERSION_UNIQUE_NAME@ +#define __db_map_rmid __db_map_rmid@DB_VERSION_UNIQUE_NAME@ +#define __db_unmap_rmid __db_unmap_rmid@DB_VERSION_UNIQUE_NAME@ +#define __db_unmap_xid __db_unmap_xid@DB_VERSION_UNIQUE_NAME@ +#define __db_global_values __db_global_values@DB_VERSION_UNIQUE_NAME@ +#define __repmgr_guesstimated_max __repmgr_guesstimated_max@DB_VERSION_UNIQUE_NAME@ +#define db_xa_switch db_xa_switch@DB_VERSION_UNIQUE_NAME@ + +#endif /* !_DB_INT_DEF_IN_ */ diff --git a/src/dbinc_auto/lock_ext.h b/src/dbinc_auto/lock_ext.h new file mode 100644 index 00000000..d5981e18 --- /dev/null +++ b/src/dbinc_auto/lock_ext.h @@ -0,0 +1,78 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _lock_ext_h_ +#define _lock_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __lock_vec_pp __P((DB_ENV *, u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); +int __lock_vec __P((ENV *, DB_LOCKER *, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); +int __lock_get_pp __P((DB_ENV *, u_int32_t, u_int32_t, DBT *, db_lockmode_t, DB_LOCK *)); +int __lock_get __P((ENV *, DB_LOCKER *, u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *)); +int __lock_get_internal __P((DB_LOCKTAB *, DB_LOCKER *, u_int32_t, const DBT *, db_lockmode_t, db_timeout_t, DB_LOCK *)); +int __lock_put_pp __P((DB_ENV *, DB_LOCK *)); +int __lock_put __P((ENV *, DB_LOCK *)); +int __lock_downgrade __P((ENV *, DB_LOCK *, db_lockmode_t, u_int32_t)); +int __lock_locker_same_family __P((ENV *, DB_LOCKER *, DB_LOCKER *, int *)); +int __lock_wakeup __P((ENV *, const DBT *)); +int __lock_promote __P((DB_LOCKTAB *, DB_LOCKOBJ *, int *, u_int32_t)); +int __lock_change __P((ENV *, DB_LOCK *, DB_LOCK *)); +int __lock_detect_pp __P((DB_ENV *, u_int32_t, u_int32_t, int *)); +int __lock_detect __P((ENV *, u_int32_t, int *)); +int __lock_failchk __P((ENV *)); +int __lock_id_pp __P((DB_ENV *, u_int32_t *)); +int __lock_id __P((ENV *, u_int32_t *, DB_LOCKER **)); +void __lock_set_thread_id __P((void *, pid_t, db_threadid_t)); +int __lock_id_free_pp __P((DB_ENV *, u_int32_t)); +int __lock_id_free __P((ENV *, DB_LOCKER *)); +int __lock_id_set __P((ENV *, u_int32_t, u_int32_t)); +int __lock_getlocker __P((DB_LOCKTAB *, u_int32_t, int, DB_LOCKER **)); +int __lock_getlocker_int __P((DB_LOCKTAB *, u_int32_t, int, DB_LOCKER **)); +int __lock_addfamilylocker __P((ENV *, u_int32_t, u_int32_t, u_int32_t)); +int __lock_freelocker __P((DB_LOCKTAB *, DB_LOCKER *)); +int __lock_familyremove __P((DB_LOCKTAB *, DB_LOCKER *)); +int __lock_fix_list __P((ENV *, DBT *, u_int32_t)); +int __lock_get_list __P((ENV *, DB_LOCKER *, u_int32_t, db_lockmode_t, DBT *)); +void __lock_list_print __P((ENV *, DB_MSGBUF *, DBT *)); +int __lock_env_create __P((DB_ENV *)); +void __lock_env_destroy __P((DB_ENV *)); +int __lock_get_lk_conflicts __P((DB_ENV *, const u_int8_t **, int *)); +int __lock_set_lk_conflicts __P((DB_ENV *, u_int8_t *, int)); +int __lock_get_lk_detect __P((DB_ENV *, u_int32_t *)); +int __lock_set_lk_detect __P((DB_ENV *, u_int32_t)); +int __lock_get_lk_max_locks __P((DB_ENV *, u_int32_t *)); +int __lock_set_lk_max_locks __P((DB_ENV *, u_int32_t)); +int __lock_get_lk_max_lockers __P((DB_ENV *, u_int32_t *)); +int __lock_set_lk_max_lockers __P((DB_ENV *, u_int32_t)); +int __lock_get_lk_max_objects __P((DB_ENV *, u_int32_t *)); +int __lock_set_lk_max_objects __P((DB_ENV *, u_int32_t)); +int __lock_get_lk_partitions __P((DB_ENV *, u_int32_t *)); +int __lock_set_lk_partitions __P((DB_ENV *, u_int32_t)); +int __lock_get_lk_tablesize __P((DB_ENV *, u_int32_t *)); +int __lock_set_lk_tablesize __P((DB_ENV *, u_int32_t)); +int __lock_set_lk_priority __P((DB_ENV *, u_int32_t, u_int32_t)); +int __lock_get_lk_priority __P((DB_ENV *, u_int32_t, u_int32_t *)); +int __lock_get_env_timeout __P((DB_ENV *, db_timeout_t *, u_int32_t)); +int __lock_set_env_timeout __P((DB_ENV *, db_timeout_t, u_int32_t)); +int __lock_open __P((ENV *)); +int __lock_env_refresh __P((ENV *)); +u_int32_t __lock_region_mutex_count __P((ENV *)); +u_int32_t __lock_region_mutex_max __P((ENV *)); +size_t __lock_region_max __P((ENV *)); +size_t __lock_region_size __P((ENV *, size_t)); +int __lock_stat_pp __P((DB_ENV *, DB_LOCK_STAT **, u_int32_t)); +int __lock_stat_print_pp __P((DB_ENV *, u_int32_t)); +int __lock_stat_print __P((ENV *, u_int32_t)); +void __lock_printlock __P((DB_LOCKTAB *, DB_MSGBUF *mbp, struct __db_lock *, int)); +int __lock_set_timeout __P((ENV *, DB_LOCKER *, db_timeout_t, u_int32_t)); +int __lock_set_timeout_internal __P((ENV *, DB_LOCKER *, db_timeout_t, u_int32_t)); +int __lock_inherit_timeout __P((ENV *, DB_LOCKER *, DB_LOCKER *)); +u_int32_t __lock_ohash __P((const DBT *)); +u_int32_t __lock_lhash __P((DB_LOCKOBJ *)); +int __lock_nomem __P((ENV *, const char *)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_lock_ext_h_ */ diff --git a/src/dbinc_auto/log_ext.h b/src/dbinc_auto/log_ext.h new file mode 100644 index 00000000..9f6e09ab --- /dev/null +++ b/src/dbinc_auto/log_ext.h @@ -0,0 +1,207 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _log_ext_h_ +#define _log_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __log_open __P((ENV *)); +int __log_find __P((DB_LOG *, int, u_int32_t *, logfile_validity *)); +int __log_valid __P((DB_LOG *, u_int32_t, int, DB_FH **, u_int32_t, logfile_validity *, u_int32_t *)); +int __log_env_refresh __P((ENV *)); +int __log_get_cached_ckp_lsn __P((ENV *, DB_LSN *)); +u_int32_t __log_region_mutex_count __P((ENV *)); +u_int32_t __log_region_mutex_max __P((ENV *)); +size_t __log_region_size __P((ENV *)); +size_t __log_region_max __P((ENV *)); +int __log_vtruncate __P((ENV *, DB_LSN *, DB_LSN *, DB_LSN *)); +int __log_is_outdated __P((ENV *, u_int32_t, int *)); +int __log_zero __P((ENV *, DB_LSN *)); +int __log_inmem_lsnoff __P((DB_LOG *, DB_LSN *, size_t *)); +int __log_inmem_newfile __P((DB_LOG *, u_int32_t)); +int __log_inmem_chkspace __P((DB_LOG *, size_t)); +void __log_inmem_copyout __P((DB_LOG *, size_t, void *, size_t)); +void __log_inmem_copyin __P((DB_LOG *, size_t, void *, size_t)); +void __log_set_version __P((ENV *, u_int32_t)); +int __log_get_oldversion __P((ENV *, u_int32_t *)); +int __log_archive_pp __P((DB_ENV *, char **[], u_int32_t)); +int __log_get_stable_lsn __P((ENV *, DB_LSN *, int)); +void __log_autoremove __P((ENV *)); +int __log_check_page_lsn __P((ENV *, DB *, DB_LSN *)); +int __log_printf_capi __P((DB_ENV *, DB_TXN *, const char *, ...)) __attribute__ ((__format__ (__printf__, 3, 4))); +int __log_printf_pp __P((DB_ENV *, DB_TXN *, const char *, va_list)); +int __log_printf __P((ENV *, DB_TXN *, const char *, ...)) __attribute__ ((__format__ (__printf__, 3, 4))); +int __log_cursor_pp __P((DB_ENV *, DB_LOGC **, u_int32_t)); +int __log_cursor __P((ENV *, DB_LOGC **)); +int __logc_close __P((DB_LOGC *)); +int __logc_version __P((DB_LOGC *, u_int32_t *)); +int __logc_get __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); +void __log_hdrswap __P((HDR *, int)); +void __log_persistswap __P((LOGP *)); +int __log_read_record_pp __P((DB_ENV *, DB **, void *, void *, DB_LOG_RECSPEC *, u_int32_t, void **)); +int __log_read_record __P((ENV *, DB **, void *, void *, DB_LOG_RECSPEC *, u_int32_t, void **)); +int __log_env_create __P((DB_ENV *)); +void __log_env_destroy __P((DB_ENV *)); +int __log_get_lg_bsize __P((DB_ENV *, u_int32_t *)); +int __log_set_lg_bsize __P((DB_ENV *, u_int32_t)); +int __log_get_lg_filemode __P((DB_ENV *, int *)); +int __log_set_lg_filemode __P((DB_ENV *, int)); +int __log_get_lg_max __P((DB_ENV *, u_int32_t *)); +int __log_set_lg_max __P((DB_ENV *, u_int32_t)); +int __log_get_lg_regionmax __P((DB_ENV *, u_int32_t *)); +int __log_set_lg_regionmax __P((DB_ENV *, u_int32_t)); +int __log_get_lg_dir __P((DB_ENV *, const char **)); +int __log_set_lg_dir __P((DB_ENV *, const char *)); +void __log_get_flags __P((DB_ENV *, u_int32_t *)); +void __log_set_flags __P((ENV *, u_int32_t, int)); +int __log_get_config __P((DB_ENV *, u_int32_t, int *)); +int __log_set_config __P((DB_ENV *, u_int32_t, int)); +int __log_set_config_int __P((DB_ENV *, u_int32_t, int, int)); +int __log_check_sizes __P((ENV *, u_int32_t, u_int32_t)); +int __log_print_record __P((ENV *, DBT *, DB_LSN *, char *, DB_LOG_RECSPEC *, void *)); +int __log_put_pp __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t)); +int __log_put __P((ENV *, DB_LSN *, const DBT *, u_int32_t)); +int __log_current_lsn_int __P((ENV *, DB_LSN *, u_int32_t *, u_int32_t *)); +int __log_current_lsn __P((ENV *, DB_LSN *, u_int32_t *, u_int32_t *)); +int __log_newfile __P((DB_LOG *, DB_LSN *, u_int32_t, u_int32_t)); +int __log_flush_pp __P((DB_ENV *, const DB_LSN *)); +int __log_flush __P((ENV *, const DB_LSN *)); +int __log_flush_int __P((DB_LOG *, const DB_LSN *, int)); +int __log_file_pp __P((DB_ENV *, const DB_LSN *, char *, size_t)); +int __log_name __P((DB_LOG *, u_int32_t, char **, DB_FH **, u_int32_t)); +int __log_rep_put __P((ENV *, DB_LSN *, const DBT *, u_int32_t)); +int __log_put_record_pp __P((DB_ENV *, DB *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, u_int32_t, u_int32_t, DB_LOG_RECSPEC *, ...)); +int __log_put_record __P((ENV *, DB *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, u_int32_t, u_int32_t, DB_LOG_RECSPEC *, ...)); +int __log_stat_pp __P((DB_ENV *, DB_LOG_STAT **, u_int32_t)); +int __log_stat_print_pp __P((DB_ENV *, u_int32_t)); +int __log_stat_print __P((ENV *, u_int32_t)); +int __log_verify_pp __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *)); +int __log_verify __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *, DB_THREAD_INFO *)); +int __log_verify_wrap __P((ENV *, const char *, u_int32_t, const char *, const char *, time_t, time_t, u_int32_t, u_int32_t, u_int32_t, u_int32_t, int, int)); +int __crdel_init_verify __P((ENV *, DB_DISTAB *)); +int __db_init_verify __P((ENV *, DB_DISTAB *)); +int __dbreg_init_verify __P((ENV *, DB_DISTAB *)); +int __bam_init_verify __P((ENV *, DB_DISTAB *)); +int __fop_init_verify __P((ENV *, DB_DISTAB *)); +int __ham_init_verify __P((ENV *, DB_DISTAB *)); +int __heap_init_verify __P((ENV *, DB_DISTAB *)); +int __qam_init_verify __P((ENV *, DB_DISTAB *)); +int __txn_init_verify __P((ENV *, DB_DISTAB *)); +void __db_log_verify_global_report __P((const DB_LOG_VRFY_INFO *)); +int __crdel_metasub_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_inmem_create_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_inmem_rename_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_inmem_remove_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_addrem_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_big_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_ovref_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_relink_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_debug_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_noop_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_alloc_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_alloc_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_free_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_free_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_cksum_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_freedata_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_freedata_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_init_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_sort_44_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pg_trunc_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_realloc_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_relink_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_merge_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_pgno_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __dbreg_register_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_split_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_split_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_rsplit_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_adj_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_irep_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_cadjust_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_cdel_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_repl_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_root_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_curadj_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_rcuradj_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_relink_43_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_merge_44_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_create_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_create_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_remove_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_write_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_write_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_rename_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_rename_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __fop_file_remove_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_insdel_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_newpage_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_splitdata_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_replace_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_copypage_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_metagroup_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_metagroup_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_groupalloc_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_groupalloc_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_changeslot_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_contract_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_curadj_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_chgpg_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __heap_addrem_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __heap_pg_alloc_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __heap_trunc_meta_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __heap_trunc_page_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_incfirst_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_mvptr_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_del_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_add_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_delext_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_regop_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_regop_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_ckp_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_ckp_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_child_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_xa_regop_42_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_prepare_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_recycle_verify __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __create_log_vrfy_info __P((const DB_LOG_VERIFY_CONFIG *, DB_LOG_VRFY_INFO **, DB_THREAD_INFO *)); +int __destroy_log_vrfy_info __P((DB_LOG_VRFY_INFO *)); +int __put_txn_vrfy_info __P((const DB_LOG_VRFY_INFO *, const VRFY_TXN_INFO *)); +int __get_txn_vrfy_info __P((const DB_LOG_VRFY_INFO *, u_int32_t, VRFY_TXN_INFO **)); +int __add_recycle_lsn_range __P((DB_LOG_VRFY_INFO *, const DB_LSN *, u_int32_t, u_int32_t)); +int __iterate_txninfo __P((DB_LOG_VRFY_INFO *, u_int32_t, u_int32_t, TXNINFO_HANDLER, void *)); +int __rem_last_recycle_lsn __P((VRFY_TXN_INFO *)); +int __add_file_updated __P((VRFY_TXN_INFO *, const DBT *, int32_t)); +int __del_file_updated __P((VRFY_TXN_INFO *, const DBT *)); +int __clear_fileups __P((VRFY_TXN_INFO *)); +int __free_txninfo_stack __P((VRFY_TXN_INFO *)); +int __free_txninfo __P((VRFY_TXN_INFO *)); +int __put_filereg_info __P((const DB_LOG_VRFY_INFO *, const VRFY_FILEREG_INFO *)); +int __del_filelife __P((const DB_LOG_VRFY_INFO *, int32_t)); +int __put_filelife __P((const DB_LOG_VRFY_INFO *, VRFY_FILELIFE *)); +int __get_filelife __P((const DB_LOG_VRFY_INFO *, int32_t, VRFY_FILELIFE **)); +int __get_filereg_by_dbregid __P((const DB_LOG_VRFY_INFO *, int32_t, VRFY_FILEREG_INFO **)); +int __add_dbregid __P((DB_LOG_VRFY_INFO *, VRFY_FILEREG_INFO *, int32_t, u_int32_t, DB_LSN, DBTYPE, db_pgno_t, int *)); +int __get_filereg_info __P((const DB_LOG_VRFY_INFO *, const DBT *, VRFY_FILEREG_INFO **)); +int __free_filereg_info __P((VRFY_FILEREG_INFO *)); +int __get_ckp_info __P((const DB_LOG_VRFY_INFO *, DB_LSN, VRFY_CKP_INFO **)); +int __get_last_ckp_info __P((const DB_LOG_VRFY_INFO *, VRFY_CKP_INFO **)); +int __put_ckp_info __P((const DB_LOG_VRFY_INFO *, const VRFY_CKP_INFO *)); +int __get_timestamp_info __P((const DB_LOG_VRFY_INFO *, DB_LSN, VRFY_TIMESTAMP_INFO **)); +int __get_latest_timestamp_info __P((const DB_LOG_VRFY_INFO *, DB_LSN, VRFY_TIMESTAMP_INFO **)); +int __put_timestamp_info __P((const DB_LOG_VRFY_INFO *, const VRFY_TIMESTAMP_INFO *)); +int __find_lsnrg_by_timerg __P((DB_LOG_VRFY_INFO *, time_t, time_t, DB_LSN *, DB_LSN *)); +int __add_txnrange __P((DB_LOG_VRFY_INFO *, u_int32_t, DB_LSN, int32_t, int)); +int __get_aborttxn __P((DB_LOG_VRFY_INFO *, DB_LSN)); +int __txn_started __P((DB_LOG_VRFY_INFO *, DB_LSN, u_int32_t, int *)); +int __set_logvrfy_dbfuid __P((DB_LOG_VRFY_INFO *)); +int __add_page_to_txn __P((DB_LOG_VRFY_INFO *, int32_t, db_pgno_t, u_int32_t, u_int32_t *, int *)); +int __del_txn_pages __P((DB_LOG_VRFY_INFO *, u_int32_t)); +int __is_ancestor_txn __P((DB_LOG_VRFY_INFO *, u_int32_t, u_int32_t, DB_LSN, int *)); +int __return_txn_pages __P((DB_LOG_VRFY_INFO *, u_int32_t, u_int32_t)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_log_ext_h_ */ diff --git a/src/dbinc_auto/mp_ext.h b/src/dbinc_auto/mp_ext.h new file mode 100644 index 00000000..ef2be17f --- /dev/null +++ b/src/dbinc_auto/mp_ext.h @@ -0,0 +1,102 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _mp_ext_h_ +#define _mp_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __memp_alloc __P((DB_MPOOL *, REGINFO *, MPOOLFILE *, size_t, roff_t *, void *)); +void __memp_free __P((REGINFO *, void *)); +int __memp_bhwrite __P((DB_MPOOL *, DB_MPOOL_HASH *, MPOOLFILE *, BH *, int)); +int __memp_pgread __P((DB_MPOOLFILE *, BH *, int)); +int __memp_pg __P((DB_MPOOLFILE *, db_pgno_t, void *, int)); +int __memp_bhfree __P((DB_MPOOL *, REGINFO *, MPOOLFILE *, DB_MPOOL_HASH *, BH *, u_int32_t)); +int __memp_fget_pp __P((DB_MPOOLFILE *, db_pgno_t *, DB_TXN *, u_int32_t, void *)); +int __memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, DB_THREAD_INFO *, DB_TXN *, u_int32_t, void *)); +int __memp_fcreate_pp __P((DB_ENV *, DB_MPOOLFILE **, u_int32_t)); +int __memp_fcreate __P((ENV *, DB_MPOOLFILE **)); +int __memp_set_clear_len __P((DB_MPOOLFILE *, u_int32_t)); +int __memp_get_fileid __P((DB_MPOOLFILE *, u_int8_t *)); +int __memp_set_fileid __P((DB_MPOOLFILE *, u_int8_t *)); +int __memp_get_flags __P((DB_MPOOLFILE *, u_int32_t *)); +int __memp_set_flags __P((DB_MPOOLFILE *, u_int32_t, int)); +int __memp_get_ftype __P((DB_MPOOLFILE *, int *)); +int __memp_set_ftype __P((DB_MPOOLFILE *, int)); +int __memp_set_lsn_offset __P((DB_MPOOLFILE *, int32_t)); +int __memp_get_pgcookie __P((DB_MPOOLFILE *, DBT *)); +int __memp_set_pgcookie __P((DB_MPOOLFILE *, DBT *)); +int __memp_get_priority __P((DB_MPOOLFILE *, DB_CACHE_PRIORITY *)); +int __memp_get_last_pgno __P((DB_MPOOLFILE *, db_pgno_t *)); +char * __memp_fn __P((DB_MPOOLFILE *)); +char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *)); +int __memp_fopen_pp __P((DB_MPOOLFILE *, const char *, u_int32_t, int, size_t)); +int __memp_fopen __P((DB_MPOOLFILE *, MPOOLFILE *, const char *, const char **, u_int32_t, int, size_t)); +int __memp_fclose_pp __P((DB_MPOOLFILE *, u_int32_t)); +int __memp_fclose __P((DB_MPOOLFILE *, u_int32_t)); +int __memp_mf_discard __P((DB_MPOOL *, MPOOLFILE *, int)); +int __memp_inmemlist __P((ENV *, char ***, int *)); +int __memp_fput_pp __P((DB_MPOOLFILE *, void *, DB_CACHE_PRIORITY, u_int32_t)); +int __memp_fput __P((DB_MPOOLFILE *, DB_THREAD_INFO *, void *, DB_CACHE_PRIORITY)); +int __memp_unpin_buffers __P((ENV *, DB_THREAD_INFO *)); +int __memp_dirty __P((DB_MPOOLFILE *, void *, DB_THREAD_INFO *, DB_TXN *, DB_CACHE_PRIORITY, u_int32_t)); +int __memp_shared __P((DB_MPOOLFILE *, void *)); +int __memp_env_create __P((DB_ENV *)); +void __memp_env_destroy __P((DB_ENV *)); +int __memp_get_cachesize __P((DB_ENV *, u_int32_t *, u_int32_t *, int *)); +int __memp_set_cachesize __P((DB_ENV *, u_int32_t, u_int32_t, int)); +int __memp_set_config __P((DB_ENV *, u_int32_t, int)); +int __memp_get_config __P((DB_ENV *, u_int32_t, int *)); +int __memp_get_mp_max_openfd __P((DB_ENV *, int *)); +int __memp_set_mp_max_openfd __P((DB_ENV *, int)); +int __memp_get_mp_max_write __P((DB_ENV *, int *, db_timeout_t *)); +int __memp_set_mp_max_write __P((DB_ENV *, int, db_timeout_t)); +int __memp_get_mp_mmapsize __P((DB_ENV *, size_t *)); +int __memp_set_mp_mmapsize __P((DB_ENV *, size_t)); +int __memp_get_mp_pagesize __P((DB_ENV *, u_int32_t *)); +int __memp_set_mp_pagesize __P((DB_ENV *, u_int32_t)); +int __memp_get_mp_tablesize __P((DB_ENV *, u_int32_t *)); +int __memp_set_mp_tablesize __P((DB_ENV *, u_int32_t)); +int __memp_get_mp_mtxcount __P((DB_ENV *, u_int32_t *)); +int __memp_set_mp_mtxcount __P((DB_ENV *, u_int32_t)); +int __memp_nameop __P((ENV *, u_int8_t *, const char *, const char *, const char *, int)); +int __memp_ftruncate __P((DB_MPOOLFILE *, DB_TXN *, DB_THREAD_INFO *, db_pgno_t, u_int32_t)); +int __memp_alloc_freelist __P((DB_MPOOLFILE *, u_int32_t, db_pgno_t **)); +int __memp_free_freelist __P((DB_MPOOLFILE *)); +int __memp_get_freelist __P(( DB_MPOOLFILE *, u_int32_t *, db_pgno_t **)); +int __memp_extend_freelist __P(( DB_MPOOLFILE *, u_int32_t , db_pgno_t **)); +int __memp_set_last_pgno __P((DB_MPOOLFILE *, db_pgno_t)); +int __memp_bh_settxn __P((DB_MPOOL *, MPOOLFILE *mfp, BH *, void *)); +int __memp_skip_curadj __P((DBC *, db_pgno_t)); +int __memp_bh_freeze __P((DB_MPOOL *, REGINFO *, DB_MPOOL_HASH *, BH *, int *)); +int __memp_bh_thaw __P((DB_MPOOL *, REGINFO *, DB_MPOOL_HASH *, BH *, BH *)); +int __memp_open __P((ENV *, int)); +int __memp_init __P((ENV *, DB_MPOOL *, u_int, u_int32_t, u_int)); +u_int32_t __memp_max_regions __P((ENV *)); +u_int32_t __memp_region_mutex_count __P((ENV *)); +int __memp_env_refresh __P((ENV *)); +int __memp_register_pp __P((DB_ENV *, int, int (*)(DB_ENV *, db_pgno_t, void *, DBT *), int (*)(DB_ENV *, db_pgno_t, void *, DBT *))); +int __memp_register __P((ENV *, int, int (*)(DB_ENV *, db_pgno_t, void *, DBT *), int (*)(DB_ENV *, db_pgno_t, void *, DBT *))); +int __memp_get_bucket __P((ENV *, MPOOLFILE *, db_pgno_t, REGINFO **, DB_MPOOL_HASH **, u_int32_t *)); +int __memp_resize __P((DB_MPOOL *, u_int32_t, u_int32_t)); +int __memp_get_cache_max __P((DB_ENV *, u_int32_t *, u_int32_t *)); +int __memp_set_cache_max __P((DB_ENV *, u_int32_t, u_int32_t)); +int __memp_stat_pp __P((DB_ENV *, DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, u_int32_t)); +int __memp_stat_print_pp __P((DB_ENV *, u_int32_t)); +int __memp_stat_print __P((ENV *, u_int32_t)); +void __memp_stat_hash __P((REGINFO *, MPOOL *, u_int32_t *)); +int __memp_walk_files __P((ENV *, MPOOL *, int (*) __P((ENV *, MPOOLFILE *, void *, u_int32_t *, u_int32_t)), void *, u_int32_t *, u_int32_t)); +int __memp_discard_all_mpfs __P((ENV *, MPOOL *)); +int __memp_sync_pp __P((DB_ENV *, DB_LSN *)); +int __memp_sync __P((ENV *, u_int32_t, DB_LSN *)); +int __memp_fsync_pp __P((DB_MPOOLFILE *)); +int __memp_fsync __P((DB_MPOOLFILE *)); +int __mp_xxx_fh __P((DB_MPOOLFILE *, DB_FH **)); +int __memp_sync_int __P((ENV *, DB_MPOOLFILE *, u_int32_t, u_int32_t, u_int32_t *, int *)); +int __memp_mf_sync __P((DB_MPOOL *, MPOOLFILE *, int)); +int __memp_trickle_pp __P((DB_ENV *, int, int *)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_mp_ext_h_ */ diff --git a/src/dbinc_auto/mutex_ext.h b/src/dbinc_auto/mutex_ext.h new file mode 100644 index 00000000..1a2a1b2b --- /dev/null +++ b/src/dbinc_auto/mutex_ext.h @@ -0,0 +1,91 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _mutex_ext_h_ +#define _mutex_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __mutex_alloc __P((ENV *, int, u_int32_t, db_mutex_t *)); +int __mutex_alloc_int __P((ENV *, int, int, u_int32_t, db_mutex_t *)); +int __mutex_free __P((ENV *, db_mutex_t *)); +int __mutex_free_int __P((ENV *, int, db_mutex_t *)); +int __mutex_refresh __P((ENV *, db_mutex_t)); +int __mut_failchk __P((ENV *)); +int __db_fcntl_mutex_init __P((ENV *, db_mutex_t, u_int32_t)); +int __db_fcntl_mutex_lock __P((ENV *, db_mutex_t, db_timeout_t)); +int __db_fcntl_mutex_trylock __P((ENV *, db_mutex_t)); +int __db_fcntl_mutex_unlock __P((ENV *, db_mutex_t)); +int __db_fcntl_mutex_destroy __P((ENV *, db_mutex_t)); +int __mutex_alloc_pp __P((DB_ENV *, u_int32_t, db_mutex_t *)); +int __mutex_free_pp __P((DB_ENV *, db_mutex_t)); +int __mutex_lock_pp __P((DB_ENV *, db_mutex_t)); +int __mutex_unlock_pp __P((DB_ENV *, db_mutex_t)); +int __mutex_get_align __P((DB_ENV *, u_int32_t *)); +int __mutex_set_align __P((DB_ENV *, u_int32_t)); +int __mutex_get_increment __P((DB_ENV *, u_int32_t *)); +int __mutex_set_increment __P((DB_ENV *, u_int32_t)); +int __mutex_get_init __P((DB_ENV *, u_int32_t *)); +int __mutex_set_init __P((DB_ENV *, u_int32_t)); +int __mutex_get_max __P((DB_ENV *, u_int32_t *)); +int __mutex_set_max __P((DB_ENV *, u_int32_t)); +int __mutex_get_tas_spins __P((DB_ENV *, u_int32_t *)); +int __mutex_set_tas_spins __P((DB_ENV *, u_int32_t)); +#if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT) +atomic_value_t __atomic_inc __P((ENV *, db_atomic_t *)); +#endif +#if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT) +atomic_value_t __atomic_dec __P((ENV *, db_atomic_t *)); +#endif +#if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT) +int atomic_compare_exchange __P((ENV *, db_atomic_t *, atomic_value_t, atomic_value_t)); +#endif +int __db_pthread_mutex_init __P((ENV *, db_mutex_t, u_int32_t)); +#ifndef HAVE_MUTEX_HYBRID +int __db_pthread_mutex_lock __P((ENV *, db_mutex_t, db_timeout_t)); +#endif +#if defined(HAVE_SHARED_LATCHES) +int __db_pthread_mutex_readlock __P((ENV *, db_mutex_t)); +#endif +#ifdef HAVE_MUTEX_HYBRID +int __db_hybrid_mutex_suspend __P((ENV *, db_mutex_t, db_timespec *, int)); +#endif +int __db_pthread_mutex_unlock __P((ENV *, db_mutex_t)); +int __db_pthread_mutex_destroy __P((ENV *, db_mutex_t)); +int __mutex_open __P((ENV *, int)); +int __mutex_env_refresh __P((ENV *)); +void __mutex_resource_return __P((ENV *, REGINFO *)); +int __mutex_stat_pp __P((DB_ENV *, DB_MUTEX_STAT **, u_int32_t)); +int __mutex_stat_print_pp __P((DB_ENV *, u_int32_t)); +int __mutex_stat_print __P((ENV *, u_int32_t)); +void __mutex_print_debug_single __P((ENV *, const char *, db_mutex_t, u_int32_t)); +void __mutex_print_debug_stats __P((ENV *, DB_MSGBUF *, db_mutex_t, u_int32_t)); +void __mutex_set_wait_info __P((ENV *, db_mutex_t, uintmax_t *, uintmax_t *)); +void __mutex_clear __P((ENV *, db_mutex_t)); +int __db_tas_mutex_init __P((ENV *, db_mutex_t, u_int32_t)); +int __db_tas_mutex_lock __P((ENV *, db_mutex_t, db_timeout_t)); +int __db_tas_mutex_trylock __P((ENV *, db_mutex_t)); +#if defined(HAVE_SHARED_LATCHES) +int __db_tas_mutex_readlock __P((ENV *, db_mutex_t)); +#endif +#if defined(HAVE_SHARED_LATCHES) +int __db_tas_mutex_tryreadlock __P((ENV *, db_mutex_t)); +#endif +int __db_tas_mutex_unlock __P((ENV *, db_mutex_t)); +int __db_tas_mutex_destroy __P((ENV *, db_mutex_t)); +int __db_win32_mutex_init __P((ENV *, db_mutex_t, u_int32_t)); +int __db_win32_mutex_lock __P((ENV *, db_mutex_t, db_timeout_t)); +int __db_win32_mutex_trylock __P((ENV *, db_mutex_t)); +#if defined(HAVE_SHARED_LATCHES) +int __db_win32_mutex_readlock __P((ENV *, db_mutex_t)); +#endif +#if defined(HAVE_SHARED_LATCHES) +int __db_win32_mutex_tryreadlock __P((ENV *, db_mutex_t)); +#endif +int __db_win32_mutex_unlock __P((ENV *, db_mutex_t)); +int __db_win32_mutex_destroy __P((ENV *, db_mutex_t)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_mutex_ext_h_ */ diff --git a/src/dbinc_auto/os_ext.h b/src/dbinc_auto/os_ext.h new file mode 100644 index 00000000..dd63bf9c --- /dev/null +++ b/src/dbinc_auto/os_ext.h @@ -0,0 +1,83 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _os_ext_h_ +#define _os_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +void __os_abort __P((ENV *)); +int __os_abspath __P((const char *)); +#if defined(HAVE_REPLICATION_THREADS) +int __os_getaddrinfo __P((ENV *, const char *, u_int, const char *, const ADDRINFO *, ADDRINFO **)); +#endif +#if defined(HAVE_REPLICATION_THREADS) +void __os_freeaddrinfo __P((ENV *, ADDRINFO *)); +#endif +int __os_umalloc __P((ENV *, size_t, void *)); +int __os_urealloc __P((ENV *, size_t, void *)); +void __os_ufree __P((ENV *, void *)); +int __os_strdup __P((ENV *, const char *, void *)); +int __os_calloc __P((ENV *, size_t, size_t, void *)); +int __os_malloc __P((ENV *, size_t, void *)); +int __os_realloc __P((ENV *, size_t, void *)); +void __os_free __P((ENV *, void *)); +void *__ua_memcpy __P((void *, const void *, size_t)); +void __os_gettime __P((ENV *, db_timespec *, int)); +int __os_fs_notzero __P((void)); +int __os_support_direct_io __P((void)); +int __os_support_db_register __P((void)); +int __os_support_replication __P((void)); +u_int32_t __os_cpu_count __P((void)); +char *__os_ctime __P((const time_t *, char *)); +int __os_dirlist __P((ENV *, const char *, int, char ***, int *)); +void __os_dirfree __P((ENV *, char **, int)); +int __os_get_errno_ret_zero __P((void)); +int __os_get_errno __P((void)); +int __os_get_neterr __P((void)); +int __os_get_syserr __P((void)); +void __os_set_errno __P((int)); +char *__os_strerror __P((int, char *, size_t)); +int __os_posix_err __P((int)); +int __os_fileid __P((ENV *, const char *, int, u_int8_t *)); +int __os_fdlock __P((ENV *, DB_FH *, off_t, int, int)); +int __os_fsync __P((ENV *, DB_FH *)); +int __os_getenv __P((ENV *, const char *, char **, size_t)); +int __os_openhandle __P((ENV *, const char *, int, int, DB_FH **)); +int __os_closehandle __P((ENV *, DB_FH *)); +int __os_attach __P((ENV *, REGINFO *, REGION *)); +int __os_detach __P((ENV *, REGINFO *, int)); +int __os_mapfile __P((ENV *, char *, DB_FH *, size_t, int, void **)); +int __os_unmapfile __P((ENV *, void *, size_t)); +int __os_mkdir __P((ENV *, const char *, int)); +int __os_open __P((ENV *, const char *, u_int32_t, u_int32_t, int, DB_FH **)); +void __os_id __P((DB_ENV *, pid_t *, db_threadid_t*)); +int __os_rename __P((ENV *, const char *, const char *, u_int32_t)); +int __os_isroot __P((void)); +char *__db_rpath __P((const char *)); +int __os_io __P((ENV *, int, DB_FH *, db_pgno_t, u_int32_t, u_int32_t, u_int32_t, u_int8_t *, size_t *)); +int __os_read __P((ENV *, DB_FH *, void *, size_t, size_t *)); +int __os_write __P((ENV *, DB_FH *, void *, size_t, size_t *)); +int __os_physwrite __P((ENV *, DB_FH *, void *, size_t, size_t *)); +int __os_seek __P((ENV *, DB_FH *, db_pgno_t, u_int32_t, off_t)); +void __os_stack __P((ENV *)); +int __os_exists __P((ENV *, const char *, int *)); +int __os_ioinfo __P((ENV *, const char *, DB_FH *, u_int32_t *, u_int32_t *, u_int32_t *)); +int __os_tmpdir __P((ENV *, u_int32_t)); +int __os_truncate __P((ENV *, DB_FH *, db_pgno_t, u_int32_t)); +void __os_unique_id __P((ENV *, u_int32_t *)); +int __os_unlink __P((ENV *, const char *, int)); +void __os_yield __P((ENV *, u_long, u_long)); +#ifdef HAVE_QNX +int __os_qnx_region_open __P((ENV *, const char *, int, int, DB_FH **)); +#endif +int __os_is_winnt __P((void)); +u_int32_t __os_cpu_count __P((void)); +#ifdef HAVE_REPLICATION_THREADS +int __os_get_neterr __P((void)); +#endif + +#if defined(__cplusplus) +} +#endif +#endif /* !_os_ext_h_ */ diff --git a/src/dbinc_auto/qam_auto.h b/src/dbinc_auto/qam_auto.h new file mode 100644 index 00000000..fe7c2437 --- /dev/null +++ b/src/dbinc_auto/qam_auto.h @@ -0,0 +1,174 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef __qam_AUTO_H +#define __qam_AUTO_H +#ifdef HAVE_QUEUE +#include "dbinc/log.h" +#define DB___qam_incfirst 84 +typedef struct ___qam_incfirst_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + db_recno_t recno; + db_pgno_t meta_pgno; +} __qam_incfirst_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __qam_incfirst_desc[]; +static inline int +__qam_incfirst_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, db_recno_t recno, db_pgno_t meta_pgno) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___qam_incfirst, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t), + __qam_incfirst_desc, recno, meta_pgno)); +} + +static inline int __qam_incfirst_read(ENV *env, + DB **dbpp, void *td, void *data, __qam_incfirst_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __qam_incfirst_desc, sizeof(__qam_incfirst_args), (void**)arg)); +} +#define DB___qam_mvptr 85 +typedef struct ___qam_mvptr_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_recno_t old_first; + db_recno_t new_first; + db_recno_t old_cur; + db_recno_t new_cur; + DB_LSN metalsn; + db_pgno_t meta_pgno; +} __qam_mvptr_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __qam_mvptr_desc[]; +static inline int +__qam_mvptr_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + u_int32_t opcode, db_recno_t old_first, db_recno_t new_first, db_recno_t old_cur, + db_recno_t new_cur, DB_LSN * metalsn, db_pgno_t meta_pgno) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___qam_mvptr, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + sizeof(*metalsn) + sizeof(u_int32_t), + __qam_mvptr_desc, + opcode, old_first, new_first, old_cur, new_cur, metalsn, meta_pgno)); +} + +static inline int __qam_mvptr_read(ENV *env, + DB **dbpp, void *td, void *data, __qam_mvptr_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __qam_mvptr_desc, sizeof(__qam_mvptr_args), (void**)arg)); +} +#define DB___qam_del 79 +typedef struct ___qam_del_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN lsn; + db_pgno_t pgno; + u_int32_t indx; + db_recno_t recno; +} __qam_del_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __qam_del_desc[]; +static inline int +__qam_del_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, DB_LSN * lsn, db_pgno_t pgno, u_int32_t indx, db_recno_t recno) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___qam_del, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(*lsn) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t), + __qam_del_desc, lsn, pgno, indx, recno)); +} + +static inline int __qam_del_read(ENV *env, + DB **dbpp, void *td, void *data, __qam_del_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __qam_del_desc, sizeof(__qam_del_args), (void**)arg)); +} +#define DB___qam_add 80 +typedef struct ___qam_add_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN lsn; + db_pgno_t pgno; + u_int32_t indx; + db_recno_t recno; + DBT data; + u_int32_t vflag; + DBT olddata; +} __qam_add_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __qam_add_desc[]; +static inline int +__qam_add_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, DB_LSN * lsn, db_pgno_t pgno, u_int32_t indx, db_recno_t recno, + const DBT *data, u_int32_t vflag, const DBT *olddata) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___qam_add, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(*lsn) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(data) + + sizeof(u_int32_t) + LOG_DBT_SIZE(olddata), + __qam_add_desc, lsn, pgno, indx, recno, data, vflag, olddata)); +} + +static inline int __qam_add_read(ENV *env, + DB **dbpp, void *td, void *data, __qam_add_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __qam_add_desc, sizeof(__qam_add_args), (void**)arg)); +} +#define DB___qam_delext 83 +typedef struct ___qam_delext_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN lsn; + db_pgno_t pgno; + u_int32_t indx; + db_recno_t recno; + DBT data; +} __qam_delext_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __qam_delext_desc[]; +static inline int +__qam_delext_log(DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, DB_LSN * lsn, db_pgno_t pgno, u_int32_t indx, db_recno_t recno, + const DBT *data) +{ + return (__log_put_record((dbp)->env, dbp, txnp, ret_lsnp, + flags, DB___qam_delext, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(*lsn) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + LOG_DBT_SIZE(data), + __qam_delext_desc, lsn, pgno, indx, recno, data)); +} + +static inline int __qam_delext_read(ENV *env, + DB **dbpp, void *td, void *data, __qam_delext_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + dbpp, td, data, __qam_delext_desc, sizeof(__qam_delext_args), (void**)arg)); +} +#endif /* HAVE_QUEUE */ +#endif diff --git a/src/dbinc_auto/qam_ext.h b/src/dbinc_auto/qam_ext.h new file mode 100644 index 00000000..384bc988 --- /dev/null +++ b/src/dbinc_auto/qam_ext.h @@ -0,0 +1,67 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _qam_ext_h_ +#define _qam_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __qam_position __P((DBC *, db_recno_t *, u_int32_t, int *)); +int __qam_pitem __P((DBC *, QPAGE *, u_int32_t, db_recno_t, DBT *)); +int __qam_append __P((DBC *, DBT *, DBT *)); +int __qamc_dup __P((DBC *, DBC *)); +int __qamc_init __P((DBC *)); +int __qam_truncate __P((DBC *, u_int32_t *)); +int __qam_delete __P((DBC *, DBT *, u_int32_t)); +int __qam_init_recover __P((ENV *, DB_DISTAB *)); +int __qam_incfirst_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_mvptr_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_del_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_add_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_delext_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_init_print __P((ENV *, DB_DISTAB *)); +int __qam_mswap __P((ENV *, PAGE *)); +int __qam_pgin_out __P((ENV *, db_pgno_t, void *, DBT *)); +int __qam_fprobe __P((DBC *, db_pgno_t, void *, qam_probe_mode, DB_CACHE_PRIORITY, u_int32_t)); +int __qam_fclose __P((DB *, db_pgno_t)); +int __qam_fremove __P((DB *, db_pgno_t)); +int __qam_sync __P((DB *)); +int __qam_gen_filelist __P((DB *, DB_THREAD_INFO *, QUEUE_FILELIST **)); +int __qam_extent_names __P((ENV *, char *, char ***)); +void __qam_exid __P((DB *, u_int8_t *, u_int32_t)); +int __qam_nameop __P((DB *, DB_TXN *, const char *, qam_name_op)); +int __qam_lsn_reset __P((DB *, DB_THREAD_INFO *)); +int __qam_db_create __P((DB *)); +int __qam_db_close __P((DB *, u_int32_t)); +int __qam_get_extentsize __P((DB *, u_int32_t *)); +int __queue_pageinfo __P((DB *, db_pgno_t *, db_pgno_t *, int *, int, u_int32_t)); +int __db_prqueue __P((DB *, u_int32_t)); +int __qam_remove __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, const char *, u_int32_t)); +int __qam_rename __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, const char *, const char *)); +void __qam_map_flags __P((DB *, u_int32_t *, u_int32_t *)); +int __qam_set_flags __P((DB *, u_int32_t *flagsp)); +int __qam_open __P((DB *, DB_THREAD_INFO *, DB_TXN *, const char *, db_pgno_t, int, u_int32_t)); +int __qam_set_ext_data __P((DB*, const char *)); +int __qam_metachk __P((DB *, const char *, QMETA *)); +int __qam_new_file __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *)); +int __qam_incfirst_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_mvptr_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_del_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_delext_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_add_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_stat __P((DBC *, void *, u_int32_t)); +int __qam_stat_print __P((DBC *, u_int32_t)); +int __db_no_queue_am __P((ENV *)); +int __qam_31_qammeta __P((DB *, char *, u_int8_t *)); +int __qam_32_qammeta __P((DB *, char *, u_int8_t *)); +int __qam_vrfy_meta __P((DB *, VRFY_DBINFO *, QMETA *, db_pgno_t, u_int32_t)); +int __qam_meta2pgset __P((DB *, VRFY_DBINFO *, DB *)); +int __qam_vrfy_data __P((DB *, VRFY_DBINFO *, QPAGE *, db_pgno_t, u_int32_t)); +int __qam_vrfy_structure __P((DB *, VRFY_DBINFO *, u_int32_t)); +int __qam_vrfy_walkqueue __P((DB *, VRFY_DBINFO *, void *, int (*)(void *, const void *), u_int32_t)); +int __qam_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, void *, int (*)(void *, const void *), u_int32_t)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_qam_ext_h_ */ diff --git a/src/dbinc_auto/rep_automsg.h b/src/dbinc_auto/rep_automsg.h new file mode 100644 index 00000000..bddf8158 --- /dev/null +++ b/src/dbinc_auto/rep_automsg.h @@ -0,0 +1,106 @@ +/* Do not edit: automatically built by gen_msg.awk. */ + +#ifndef __rep_AUTOMSG_H +#define __rep_AUTOMSG_H + +/* + * Message sizes are simply the sum of field sizes (not + * counting variable size parts, when DBTs are present), + * and may be different from struct sizes due to padding. + */ +#define __REP_BULK_SIZE 16 +typedef struct ___rep_bulk_args { + u_int32_t len; + DB_LSN lsn; + DBT bulkdata; +} __rep_bulk_args; + +#define __REP_CONTROL_SIZE 36 +typedef struct ___rep_control_args { + u_int32_t rep_version; + u_int32_t log_version; + DB_LSN lsn; + u_int32_t rectype; + u_int32_t gen; + u_int32_t msg_sec; + u_int32_t msg_nsec; + u_int32_t flags; +} __rep_control_args; + +#define __REP_EGEN_SIZE 4 +typedef struct ___rep_egen_args { + u_int32_t egen; +} __rep_egen_args; + +#define __REP_FILEINFO_SIZE 36 +typedef struct ___rep_fileinfo_args { + u_int32_t pgsize; + db_pgno_t pgno; + db_pgno_t max_pgno; + u_int32_t filenum; + u_int32_t finfo_flags; + u_int32_t type; + u_int32_t db_flags; + DBT uid; + DBT info; +} __rep_fileinfo_args; + +#define __REP_GRANT_INFO_SIZE 8 +typedef struct ___rep_grant_info_args { + u_int32_t msg_sec; + u_int32_t msg_nsec; +} __rep_grant_info_args; + +#define __REP_LOGREQ_SIZE 8 +typedef struct ___rep_logreq_args { + DB_LSN endlsn; +} __rep_logreq_args; + +#define __REP_NEWFILE_SIZE 4 +typedef struct ___rep_newfile_args { + u_int32_t version; +} __rep_newfile_args; + +#define __REP_UPDATE_SIZE 16 +typedef struct ___rep_update_args { + DB_LSN first_lsn; + u_int32_t first_vers; + u_int32_t num_files; +} __rep_update_args; + +#define __REP_VOTE_INFO_SIZE 28 +typedef struct ___rep_vote_info_args { + u_int32_t egen; + u_int32_t nsites; + u_int32_t nvotes; + u_int32_t priority; + u_int32_t spare_pri; + u_int32_t tiebreaker; + u_int32_t data_gen; +} __rep_vote_info_args; + +#define __REP_VOTE_INFO_V5_SIZE 20 +typedef struct ___rep_vote_info_v5_args { + u_int32_t egen; + u_int32_t nsites; + u_int32_t nvotes; + u_int32_t priority; + u_int32_t tiebreaker; +} __rep_vote_info_v5_args; + +#define __REP_LSN_HIST_KEY_SIZE 8 +typedef struct ___rep_lsn_hist_key_args { + u_int32_t version; + u_int32_t gen; +} __rep_lsn_hist_key_args; + +#define __REP_LSN_HIST_DATA_SIZE 20 +typedef struct ___rep_lsn_hist_data_args { + u_int32_t envid; + DB_LSN lsn; + u_int32_t hist_sec; + u_int32_t hist_nsec; +} __rep_lsn_hist_data_args; + +#define __REP_MAXMSG_SIZE 36 +#endif diff --git a/src/dbinc_auto/rep_ext.h b/src/dbinc_auto/rep_ext.h new file mode 100644 index 00000000..4cb1612a --- /dev/null +++ b/src/dbinc_auto/rep_ext.h @@ -0,0 +1,149 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _rep_ext_h_ +#define _rep_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __rep_bulk_marshal __P((ENV *, __rep_bulk_args *, u_int8_t *, size_t, size_t *)); +int __rep_bulk_unmarshal __P((ENV *, __rep_bulk_args *, u_int8_t *, size_t, u_int8_t **)); +int __rep_control_marshal __P((ENV *, __rep_control_args *, u_int8_t *, size_t, size_t *)); +int __rep_control_unmarshal __P((ENV *, __rep_control_args *, u_int8_t *, size_t, u_int8_t **)); +int __rep_egen_marshal __P((ENV *, __rep_egen_args *, u_int8_t *, size_t, size_t *)); +int __rep_egen_unmarshal __P((ENV *, __rep_egen_args *, u_int8_t *, size_t, u_int8_t **)); +int __rep_fileinfo_marshal __P((ENV *, u_int32_t, __rep_fileinfo_args *, u_int8_t *, size_t, size_t *)); +int __rep_fileinfo_unmarshal __P((ENV *, u_int32_t, __rep_fileinfo_args **, u_int8_t *, size_t, u_int8_t **)); +int __rep_grant_info_marshal __P((ENV *, __rep_grant_info_args *, u_int8_t *, size_t, size_t *)); +int __rep_grant_info_unmarshal __P((ENV *, __rep_grant_info_args *, u_int8_t *, size_t, u_int8_t **)); +int __rep_logreq_marshal __P((ENV *, __rep_logreq_args *, u_int8_t *, size_t, size_t *)); +int __rep_logreq_unmarshal __P((ENV *, __rep_logreq_args *, u_int8_t *, size_t, u_int8_t **)); +int __rep_newfile_marshal __P((ENV *, __rep_newfile_args *, u_int8_t *, size_t, size_t *)); +int __rep_newfile_unmarshal __P((ENV *, __rep_newfile_args *, u_int8_t *, size_t, u_int8_t **)); +int __rep_update_marshal __P((ENV *, u_int32_t, __rep_update_args *, u_int8_t *, size_t, size_t *)); +int __rep_update_unmarshal __P((ENV *, u_int32_t, __rep_update_args **, u_int8_t *, size_t, u_int8_t **)); +int __rep_vote_info_marshal __P((ENV *, __rep_vote_info_args *, u_int8_t *, size_t, size_t *)); +int __rep_vote_info_unmarshal __P((ENV *, __rep_vote_info_args *, u_int8_t *, size_t, u_int8_t **)); +int __rep_vote_info_v5_marshal __P((ENV *, __rep_vote_info_v5_args *, u_int8_t *, size_t, size_t *)); +int __rep_vote_info_v5_unmarshal __P((ENV *, __rep_vote_info_v5_args *, u_int8_t *, size_t, u_int8_t **)); +void __rep_lsn_hist_key_marshal __P((ENV *, __rep_lsn_hist_key_args *, u_int8_t *)); +int __rep_lsn_hist_key_unmarshal __P((ENV *, __rep_lsn_hist_key_args *, u_int8_t *, size_t, u_int8_t **)); +void __rep_lsn_hist_data_marshal __P((ENV *, __rep_lsn_hist_data_args *, u_int8_t *)); +int __rep_lsn_hist_data_unmarshal __P((ENV *, __rep_lsn_hist_data_args *, u_int8_t *, size_t, u_int8_t **)); +int __rep_update_req __P((ENV *, __rep_control_args *)); +int __rep_page_req __P((ENV *, DB_THREAD_INFO *, int, __rep_control_args *, DBT *)); +int __rep_update_setup __P((ENV *, int, __rep_control_args *, DBT *, time_t, DB_LSN *)); +int __rep_bulk_page __P((ENV *, DB_THREAD_INFO *, int, __rep_control_args *, DBT *)); +int __rep_page __P((ENV *, DB_THREAD_INFO *, int, __rep_control_args *, DBT *)); +int __rep_init_cleanup __P((ENV *, REP *, int)); +int __rep_pggap_req __P((ENV *, REP *, __rep_fileinfo_args *, u_int32_t)); +int __rep_finfo_alloc __P((ENV *, __rep_fileinfo_args *, __rep_fileinfo_args **)); +int __rep_remove_init_file __P((ENV *)); +int __rep_reset_init __P((ENV *)); +int __rep_elect_pp __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t)); +int __rep_elect_int __P((ENV *, u_int32_t, u_int32_t, u_int32_t)); +int __rep_vote1 __P((ENV *, __rep_control_args *, DBT *, int)); +int __rep_vote2 __P((ENV *, __rep_control_args *, DBT *, int)); +int __rep_update_grant __P((ENV *, db_timespec *)); +int __rep_islease_granted __P((ENV *)); +int __rep_lease_table_alloc __P((ENV *, u_int32_t)); +int __rep_lease_grant __P((ENV *, __rep_control_args *, DBT *, int)); +int __rep_lease_check __P((ENV *, int)); +int __rep_lease_refresh __P((ENV *)); +int __rep_lease_expire __P((ENV *)); +db_timeout_t __rep_lease_waittime __P((ENV *)); +int __rep_allreq __P((ENV *, __rep_control_args *, int)); +int __rep_log __P((ENV *, DB_THREAD_INFO *, __rep_control_args *, DBT *, int, time_t, DB_LSN *)); +int __rep_bulk_log __P((ENV *, DB_THREAD_INFO *, __rep_control_args *, DBT *, time_t, DB_LSN *)); +int __rep_logreq __P((ENV *, __rep_control_args *, DBT *, int)); +int __rep_loggap_req __P((ENV *, REP *, DB_LSN *, u_int32_t)); +int __rep_logready __P((ENV *, REP *, time_t, DB_LSN *)); +int __rep_env_create __P((DB_ENV *)); +void __rep_env_destroy __P((DB_ENV *)); +int __rep_get_config __P((DB_ENV *, u_int32_t, int *)); +int __rep_set_config __P((DB_ENV *, u_int32_t, int)); +int __rep_start_pp __P((DB_ENV *, DBT *, u_int32_t)); +int __rep_start_int __P((ENV *, DBT *, u_int32_t)); +int __rep_open_sysdb __P((ENV *, DB_THREAD_INFO *, DB_TXN *, const char *, u_int32_t, DB **)); +int __rep_client_dbinit __P((ENV *, int, repdb_t)); +int __rep_get_limit __P((DB_ENV *, u_int32_t *, u_int32_t *)); +int __rep_set_limit __P((DB_ENV *, u_int32_t, u_int32_t)); +int __rep_set_nsites_pp __P((DB_ENV *, u_int32_t)); +int __rep_set_nsites_int __P((ENV *, u_int32_t)); +int __rep_get_nsites __P((DB_ENV *, u_int32_t *)); +int __rep_set_priority __P((DB_ENV *, u_int32_t)); +int __rep_get_priority __P((DB_ENV *, u_int32_t *)); +int __rep_set_timeout __P((DB_ENV *, int, db_timeout_t)); +int __rep_get_timeout __P((DB_ENV *, int, db_timeout_t *)); +int __rep_get_request __P((DB_ENV *, db_timeout_t *, db_timeout_t *)); +int __rep_set_request __P((DB_ENV *, db_timeout_t, db_timeout_t)); +int __rep_set_transport_pp __P((DB_ENV *, int, int (*)(DB_ENV *, const DBT *, const DBT *, const DB_LSN *, int, u_int32_t))); +int __rep_set_transport_int __P((ENV *, int, int (*)(DB_ENV *, const DBT *, const DBT *, const DB_LSN *, int, u_int32_t))); +int __rep_get_clockskew __P((DB_ENV *, u_int32_t *, u_int32_t *)); +int __rep_set_clockskew __P((DB_ENV *, u_int32_t, u_int32_t)); +int __rep_flush __P((DB_ENV *)); +int __rep_sync __P((DB_ENV *, u_int32_t)); +int __rep_txn_applied __P((ENV *, DB_THREAD_INFO *, DB_COMMIT_INFO *, db_timeout_t)); +int __rep_process_message_pp __P((DB_ENV *, DBT *, DBT *, int, DB_LSN *)); +int __rep_process_message_int __P((ENV *, DBT *, DBT *, int, DB_LSN *)); +int __rep_apply __P((ENV *, DB_THREAD_INFO *, __rep_control_args *, DBT *, DB_LSN *, int *, DB_LSN *)); +int __rep_process_txn __P((ENV *, DBT *)); +int __rep_resend_req __P((ENV *, int)); +int __rep_check_doreq __P((ENV *, REP *)); +int __rep_check_missing __P((ENV *, u_int32_t, DB_LSN *)); +int __rep_open __P((ENV *)); +int __rep_close_diagfiles __P((ENV *)); +int __rep_env_refresh __P((ENV *)); +int __rep_env_close __P((ENV *)); +int __rep_preclose __P((ENV *)); +int __rep_closefiles __P((ENV *)); +int __rep_write_egen __P((ENV *, REP *, u_int32_t)); +int __rep_write_gen __P((ENV *, REP *, u_int32_t)); +int __rep_stat_pp __P((DB_ENV *, DB_REP_STAT **, u_int32_t)); +int __rep_stat_print_pp __P((DB_ENV *, u_int32_t)); +int __rep_stat_print __P((ENV *, u_int32_t)); +int __rep_bulk_message __P((ENV *, REP_BULK *, REP_THROTTLE *, DB_LSN *, const DBT *, u_int32_t)); +int __rep_send_bulk __P((ENV *, REP_BULK *, u_int32_t)); +int __rep_bulk_alloc __P((ENV *, REP_BULK *, int, uintptr_t *, u_int32_t *, u_int32_t)); +int __rep_bulk_free __P((ENV *, REP_BULK *, u_int32_t)); +int __rep_send_message __P((ENV *, int, u_int32_t, DB_LSN *, const DBT *, u_int32_t, u_int32_t)); +int __rep_new_master __P((ENV *, __rep_control_args *, int)); +void __rep_elect_done __P((ENV *, REP *)); +int __env_rep_enter __P((ENV *, int)); +int __env_db_rep_exit __P((ENV *)); +int __db_rep_enter __P((DB *, int, int, int)); +int __op_handle_enter __P((ENV *)); +int __op_rep_enter __P((ENV *, int, int)); +int __op_rep_exit __P((ENV *)); +int __archive_rep_enter __P((ENV *)); +int __archive_rep_exit __P((ENV *)); +int __rep_lockout_archive __P((ENV *, REP *)); +int __rep_lockout_api __P((ENV *, REP *)); +int __rep_take_apilockout __P((ENV *)); +int __rep_clear_apilockout __P((ENV *)); +int __rep_lockout_apply __P((ENV *, REP *, u_int32_t)); +int __rep_lockout_msg __P((ENV *, REP *, u_int32_t)); +int __rep_send_throttle __P((ENV *, int, REP_THROTTLE *, u_int32_t, u_int32_t)); +u_int32_t __rep_msg_to_old __P((u_int32_t, u_int32_t)); +u_int32_t __rep_msg_from_old __P((u_int32_t, u_int32_t)); +int __rep_print_system __P((ENV *, u_int32_t, const char *, ...)) __attribute__ ((__format__ (__printf__, 3, 4))); +int __rep_print __P((ENV *, u_int32_t, const char *, ...)) __attribute__ ((__format__ (__printf__, 3, 4))); +void __rep_print_message __P((ENV *, int, __rep_control_args *, char *, u_int32_t)); +void __rep_fire_event __P((ENV *, u_int32_t, void *)); +void __rep_msg __P((const ENV *, const char *)); +int __rep_notify_threads __P((ENV *, rep_waitreason_t)); +int __rep_check_goal __P((ENV *, struct rep_waitgoal *)); +int __rep_log_backup __P((ENV *, DB_LOGC *, DB_LSN *, u_int32_t)); +int __rep_get_maxpermlsn __P((ENV *, DB_LSN *)); +int __rep_is_internal_rep_file __P((char *)); +int __rep_get_datagen __P((ENV *, u_int32_t *)); +int __rep_verify __P((ENV *, __rep_control_args *, DBT *, int, time_t)); +int __rep_verify_fail __P((ENV *, __rep_control_args *)); +int __rep_verify_req __P((ENV *, __rep_control_args *, int)); +int __rep_dorecovery __P((ENV *, DB_LSN *, DB_LSN *)); +int __rep_verify_match __P((ENV *, DB_LSN *, time_t)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_rep_ext_h_ */ diff --git a/src/dbinc_auto/repmgr_auto.h b/src/dbinc_auto/repmgr_auto.h new file mode 100644 index 00000000..5e9f386d --- /dev/null +++ b/src/dbinc_auto/repmgr_auto.h @@ -0,0 +1,41 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef __repmgr_AUTO_H +#define __repmgr_AUTO_H +#ifdef HAVE_REPLICATION_THREADS +#include "dbinc/log.h" +#define DB___repmgr_member 200 +typedef struct ___repmgr_member_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t version; + u_int32_t prev_status; + u_int32_t status; + DBT host; + u_int32_t port; +} __repmgr_member_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __repmgr_member_desc[]; +static inline int +__repmgr_member_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + u_int32_t version, u_int32_t prev_status, u_int32_t status, const DBT *host, u_int32_t port) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___repmgr_member, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + LOG_DBT_SIZE(host) + sizeof(u_int32_t), + __repmgr_member_desc, + version, prev_status, status, host, port)); +} + +static inline int __repmgr_member_read(ENV *env, + void *data, __repmgr_member_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __repmgr_member_desc, sizeof(__repmgr_member_args), (void**)arg)); +} +#endif /* HAVE_REPLICATION_THREADS */ +#endif diff --git a/src/dbinc_auto/repmgr_automsg.h b/src/dbinc_auto/repmgr_automsg.h new file mode 100644 index 00000000..1b2b928c --- /dev/null +++ b/src/dbinc_auto/repmgr_automsg.h @@ -0,0 +1,113 @@ +/* Do not edit: automatically built by gen_msg.awk. */ + +#ifndef __repmgr_AUTOMSG_H +#define __repmgr_AUTOMSG_H + +/* + * Message sizes are simply the sum of field sizes (not + * counting variable size parts, when DBTs are present), + * and may be different from struct sizes due to padding. + */ +#define __REPMGR_HANDSHAKE_SIZE 12 +typedef struct ___repmgr_handshake_args { + u_int16_t port; + u_int16_t alignment; + u_int32_t ack_policy; + u_int32_t flags; +} __repmgr_handshake_args; + +#define __REPMGR_V3HANDSHAKE_SIZE 10 +typedef struct ___repmgr_v3handshake_args { + u_int16_t port; + u_int32_t priority; + u_int32_t flags; +} __repmgr_v3handshake_args; + +#define __REPMGR_V2HANDSHAKE_SIZE 6 +typedef struct ___repmgr_v2handshake_args { + u_int16_t port; + u_int32_t priority; +} __repmgr_v2handshake_args; + +#define __REPMGR_PARM_REFRESH_SIZE 8 +typedef struct ___repmgr_parm_refresh_args { + u_int32_t ack_policy; + u_int32_t flags; +} __repmgr_parm_refresh_args; + +#define __REPMGR_PERMLSN_SIZE 12 +typedef struct ___repmgr_permlsn_args { + u_int32_t generation; + DB_LSN lsn; +} __repmgr_permlsn_args; + +#define __REPMGR_VERSION_PROPOSAL_SIZE 8 +typedef struct ___repmgr_version_proposal_args { + u_int32_t min; + u_int32_t max; +} __repmgr_version_proposal_args; + +#define __REPMGR_VERSION_CONFIRMATION_SIZE 4 +typedef struct ___repmgr_version_confirmation_args { + u_int32_t version; +} __repmgr_version_confirmation_args; + +#define __REPMGR_MSG_HDR_SIZE 9 +typedef struct ___repmgr_msg_hdr_args { + u_int8_t type; + u_int32_t word1; + u_int32_t word2; +} __repmgr_msg_hdr_args; + +#define __REPMGR_MSG_METADATA_SIZE 12 +typedef struct ___repmgr_msg_metadata_args { + u_int32_t tag; + u_int32_t limit; + u_int32_t flags; +} __repmgr_msg_metadata_args; + +#define __REPMGR_MEMBERSHIP_KEY_SIZE 6 +typedef struct ___repmgr_membership_key_args { + DBT host; + u_int16_t port; +} __repmgr_membership_key_args; + +#define __REPMGR_MEMBERSHIP_DATA_SIZE 4 +typedef struct ___repmgr_membership_data_args { + u_int32_t flags; +} __repmgr_membership_data_args; + +#define __REPMGR_MEMBER_METADATA_SIZE 8 +typedef struct ___repmgr_member_metadata_args { + u_int32_t format; + u_int32_t version; +} __repmgr_member_metadata_args; + +#define __REPMGR_GM_FWD_SIZE 10 +typedef struct ___repmgr_gm_fwd_args { + DBT host; + u_int16_t port; + u_int32_t gen; +} __repmgr_gm_fwd_args; + +#define __REPMGR_MEMBR_VERS_SIZE 8 +typedef struct ___repmgr_membr_vers_args { + u_int32_t version; + u_int32_t gen; +} __repmgr_membr_vers_args; + +#define __REPMGR_SITE_INFO_SIZE 10 +typedef struct ___repmgr_site_info_args { + DBT host; + u_int16_t port; + u_int32_t flags; +} __repmgr_site_info_args; + +#define __REPMGR_CONNECT_REJECT_SIZE 8 +typedef struct ___repmgr_connect_reject_args { + u_int32_t version; + u_int32_t gen; +} __repmgr_connect_reject_args; + +#define __REPMGR_MAXMSG_SIZE 12 +#endif diff --git a/src/dbinc_auto/repmgr_ext.h b/src/dbinc_auto/repmgr_ext.h new file mode 100644 index 00000000..49163a8a --- /dev/null +++ b/src/dbinc_auto/repmgr_ext.h @@ -0,0 +1,246 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _repmgr_ext_h_ +#define _repmgr_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __repmgr_init_recover __P((ENV *, DB_DISTAB *)); +void __repmgr_handshake_marshal __P((ENV *, __repmgr_handshake_args *, u_int8_t *)); +int __repmgr_handshake_unmarshal __P((ENV *, __repmgr_handshake_args *, u_int8_t *, size_t, u_int8_t **)); +void __repmgr_v3handshake_marshal __P((ENV *, __repmgr_v3handshake_args *, u_int8_t *)); +int __repmgr_v3handshake_unmarshal __P((ENV *, __repmgr_v3handshake_args *, u_int8_t *, size_t, u_int8_t **)); +void __repmgr_v2handshake_marshal __P((ENV *, __repmgr_v2handshake_args *, u_int8_t *)); +int __repmgr_v2handshake_unmarshal __P((ENV *, __repmgr_v2handshake_args *, u_int8_t *, size_t, u_int8_t **)); +void __repmgr_parm_refresh_marshal __P((ENV *, __repmgr_parm_refresh_args *, u_int8_t *)); +int __repmgr_parm_refresh_unmarshal __P((ENV *, __repmgr_parm_refresh_args *, u_int8_t *, size_t, u_int8_t **)); +void __repmgr_permlsn_marshal __P((ENV *, __repmgr_permlsn_args *, u_int8_t *)); +int __repmgr_permlsn_unmarshal __P((ENV *, __repmgr_permlsn_args *, u_int8_t *, size_t, u_int8_t **)); +void __repmgr_version_proposal_marshal __P((ENV *, __repmgr_version_proposal_args *, u_int8_t *)); +int __repmgr_version_proposal_unmarshal __P((ENV *, __repmgr_version_proposal_args *, u_int8_t *, size_t, u_int8_t **)); +void __repmgr_version_confirmation_marshal __P((ENV *, __repmgr_version_confirmation_args *, u_int8_t *)); +int __repmgr_version_confirmation_unmarshal __P((ENV *, __repmgr_version_confirmation_args *, u_int8_t *, size_t, u_int8_t **)); +void __repmgr_msg_hdr_marshal __P((ENV *, __repmgr_msg_hdr_args *, u_int8_t *)); +int __repmgr_msg_hdr_unmarshal __P((ENV *, __repmgr_msg_hdr_args *, u_int8_t *, size_t, u_int8_t **)); +void __repmgr_msg_metadata_marshal __P((ENV *, __repmgr_msg_metadata_args *, u_int8_t *)); +int __repmgr_msg_metadata_unmarshal __P((ENV *, __repmgr_msg_metadata_args *, u_int8_t *, size_t, u_int8_t **)); +int __repmgr_membership_key_marshal __P((ENV *, __repmgr_membership_key_args *, u_int8_t *, size_t, size_t *)); +int __repmgr_membership_key_unmarshal __P((ENV *, __repmgr_membership_key_args *, u_int8_t *, size_t, u_int8_t **)); +void __repmgr_membership_data_marshal __P((ENV *, __repmgr_membership_data_args *, u_int8_t *)); +int __repmgr_membership_data_unmarshal __P((ENV *, __repmgr_membership_data_args *, u_int8_t *, size_t, u_int8_t **)); +void __repmgr_member_metadata_marshal __P((ENV *, __repmgr_member_metadata_args *, u_int8_t *)); +int __repmgr_member_metadata_unmarshal __P((ENV *, __repmgr_member_metadata_args *, u_int8_t *, size_t, u_int8_t **)); +int __repmgr_gm_fwd_marshal __P((ENV *, __repmgr_gm_fwd_args *, u_int8_t *, size_t, size_t *)); +int __repmgr_gm_fwd_unmarshal __P((ENV *, __repmgr_gm_fwd_args *, u_int8_t *, size_t, u_int8_t **)); +void __repmgr_membr_vers_marshal __P((ENV *, __repmgr_membr_vers_args *, u_int8_t *)); +int __repmgr_membr_vers_unmarshal __P((ENV *, __repmgr_membr_vers_args *, u_int8_t *, size_t, u_int8_t **)); +int __repmgr_site_info_marshal __P((ENV *, __repmgr_site_info_args *, u_int8_t *, size_t, size_t *)); +int __repmgr_site_info_unmarshal __P((ENV *, __repmgr_site_info_args *, u_int8_t *, size_t, u_int8_t **)); +void __repmgr_connect_reject_marshal __P((ENV *, __repmgr_connect_reject_args *, u_int8_t *)); +int __repmgr_connect_reject_unmarshal __P((ENV *, __repmgr_connect_reject_args *, u_int8_t *, size_t, u_int8_t **)); +int __repmgr_member_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __repmgr_init_print __P((ENV *, DB_DISTAB *)); +int __repmgr_init_election __P((ENV *, u_int32_t)); +int __repmgr_claim_victory __P((ENV *)); +int __repmgr_turn_on_elections __P((ENV *)); +int __repmgr_start __P((DB_ENV *, int, u_int32_t)); +int __repmgr_valid_config __P((ENV *, u_int32_t)); +int __repmgr_autostart __P((ENV *)); +int __repmgr_start_selector __P((ENV *)); +int __repmgr_close __P((ENV *)); +int __repmgr_set_ack_policy __P((DB_ENV *, int)); +int __repmgr_get_ack_policy __P((DB_ENV *, int *)); +int __repmgr_env_create __P((ENV *, DB_REP *)); +void __repmgr_env_destroy __P((ENV *, DB_REP *)); +int __repmgr_stop_threads __P((ENV *)); +int __repmgr_local_site __P((DB_ENV *, DB_SITE **)); +int __repmgr_channel __P((DB_ENV *, int, DB_CHANNEL **, u_int32_t)); +int __repmgr_set_msg_dispatch __P((DB_ENV *, void (*)(DB_ENV *, DB_CHANNEL *, DBT *, u_int32_t, u_int32_t), u_int32_t)); +int __repmgr_send_msg __P((DB_CHANNEL *, DBT *, u_int32_t, u_int32_t)); +int __repmgr_send_request __P((DB_CHANNEL *, DBT *, u_int32_t, DBT *, db_timeout_t, u_int32_t)); +int __repmgr_send_response __P((DB_CHANNEL *, DBT *, u_int32_t, u_int32_t)); +int __repmgr_channel_close __P((DB_CHANNEL *, u_int32_t)); +int __repmgr_channel_timeout __P((DB_CHANNEL *, db_timeout_t)); +int __repmgr_send_request_inval __P((DB_CHANNEL *, DBT *, u_int32_t, DBT *, db_timeout_t, u_int32_t)); +int __repmgr_channel_close_inval __P((DB_CHANNEL *, u_int32_t)); +int __repmgr_channel_timeout_inval __P((DB_CHANNEL *, db_timeout_t)); +int __repmgr_join_group __P((ENV *)); +int __repmgr_site __P((DB_ENV *, const char *, u_int, DB_SITE **, u_int32_t)); +int __repmgr_site_by_eid __P((DB_ENV *, int, DB_SITE **)); +int __repmgr_get_site_address __P((DB_SITE *, const char **, u_int *)); +int __repmgr_get_eid __P((DB_SITE *, int *)); +int __repmgr_get_config __P((DB_SITE *, u_int32_t, u_int32_t *)); +int __repmgr_site_config __P((DB_SITE *, u_int32_t, u_int32_t)); +int __repmgr_site_close __P((DB_SITE *)); +void *__repmgr_msg_thread __P((void *)); +int __repmgr_send_err_resp __P((ENV *, CHANNEL *, int)); +int __repmgr_handle_event __P((ENV *, u_int32_t, void *)); +int __repmgr_update_membership __P((ENV *, DB_THREAD_INFO *, int, u_int32_t)); +int __repmgr_set_gm_version __P((ENV *, DB_THREAD_INFO *, DB_TXN *, u_int32_t)); +int __repmgr_setup_gmdb_op __P((ENV *, DB_THREAD_INFO *, DB_TXN **, u_int32_t)); +int __repmgr_cleanup_gmdb_op __P((ENV *, int)); +int __repmgr_hold_master_role __P((ENV *, REPMGR_CONNECTION *)); +int __repmgr_rlse_master_role __P((ENV *)); +void __repmgr_set_sites __P((ENV *)); +int __repmgr_connect __P((ENV *, repmgr_netaddr_t *, REPMGR_CONNECTION **, int *)); +int __repmgr_send __P((DB_ENV *, const DBT *, const DBT *, const DB_LSN *, int, u_int32_t)); +int __repmgr_sync_siteaddr __P((ENV *)); +int __repmgr_send_broadcast __P((ENV *, u_int, const DBT *, const DBT *, u_int *, u_int *)); +int __repmgr_send_one __P((ENV *, REPMGR_CONNECTION *, u_int, const DBT *, const DBT *, db_timeout_t)); +int __repmgr_send_many __P((ENV *, REPMGR_CONNECTION *, REPMGR_IOVECS *, db_timeout_t)); +int __repmgr_send_own_msg __P((ENV *, REPMGR_CONNECTION *, u_int32_t, u_int8_t *, u_int32_t)); +int __repmgr_write_iovecs __P((ENV *, REPMGR_CONNECTION *, REPMGR_IOVECS *, size_t *)); +int __repmgr_bust_connection __P((ENV *, REPMGR_CONNECTION *)); +int __repmgr_disable_connection __P((ENV *, REPMGR_CONNECTION *)); +int __repmgr_cleanup_defunct __P((ENV *, REPMGR_CONNECTION *)); +int __repmgr_close_connection __P((ENV *, REPMGR_CONNECTION *)); +int __repmgr_decr_conn_ref __P((ENV *, REPMGR_CONNECTION *)); +int __repmgr_destroy_conn __P((ENV *, REPMGR_CONNECTION *)); +int __repmgr_pack_netaddr __P((ENV *, const char *, u_int, repmgr_netaddr_t *)); +int __repmgr_getaddr __P((ENV *, const char *, u_int, int, ADDRINFO **)); +int __repmgr_listen __P((ENV *)); +int __repmgr_net_close __P((ENV *)); +void __repmgr_net_destroy __P((ENV *, DB_REP *)); +int __repmgr_thread_start __P((ENV *, REPMGR_RUNNABLE *)); +int __repmgr_thread_join __P((REPMGR_RUNNABLE *)); +int __repmgr_set_nonblock_conn __P((REPMGR_CONNECTION *)); +int __repmgr_set_nonblocking __P((socket_t)); +int __repmgr_wake_waiters __P((ENV *, waiter_t *)); +int __repmgr_await_cond __P((ENV *, PREDICATE, void *, db_timeout_t, waiter_t *)); +int __repmgr_await_gmdbop __P((ENV *)); +void __repmgr_compute_wait_deadline __P((ENV*, struct timespec *, db_timeout_t)); +int __repmgr_await_drain __P((ENV *, REPMGR_CONNECTION *, db_timeout_t)); +int __repmgr_alloc_cond __P((cond_var_t *)); +int __repmgr_free_cond __P((cond_var_t *)); +void __repmgr_env_create_pf __P((DB_REP *)); +int __repmgr_create_mutex_pf __P((mgr_mutex_t *)); +int __repmgr_destroy_mutex_pf __P((mgr_mutex_t *)); +int __repmgr_init __P((ENV *)); +int __repmgr_deinit __P((ENV *)); +int __repmgr_init_waiters __P((ENV *, waiter_t *)); +int __repmgr_destroy_waiters __P((ENV *, waiter_t *)); +int __repmgr_lock_mutex __P((mgr_mutex_t *)); +int __repmgr_unlock_mutex __P((mgr_mutex_t *)); +int __repmgr_signal __P((cond_var_t *)); +int __repmgr_wake_msngers __P((ENV*, u_int)); +int __repmgr_wake_main_thread __P((ENV*)); +int __repmgr_writev __P((socket_t, db_iovec_t *, int, size_t *)); +int __repmgr_readv __P((socket_t, db_iovec_t *, int, size_t *)); +int __repmgr_select_loop __P((ENV *)); +int __repmgr_queue_destroy __P((ENV *)); +int __repmgr_queue_get __P((ENV *, REPMGR_MESSAGE **, REPMGR_RUNNABLE *)); +int __repmgr_queue_put __P((ENV *, REPMGR_MESSAGE *)); +int __repmgr_queue_size __P((ENV *)); +int __repmgr_member_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +void *__repmgr_select_thread __P((void *)); +int __repmgr_bow_out __P((ENV *)); +int __repmgr_accept __P((ENV *)); +int __repmgr_compute_timeout __P((ENV *, db_timespec *)); +REPMGR_CONNECTION *__repmgr_master_connection __P((ENV *)); +int __repmgr_check_timeouts __P((ENV *)); +int __repmgr_first_try_connections __P((ENV *)); +int __repmgr_send_v1_handshake __P((ENV *, REPMGR_CONNECTION *, void *, size_t)); +int __repmgr_read_from_site __P((ENV *, REPMGR_CONNECTION *)); +int __repmgr_read_conn __P((REPMGR_CONNECTION *)); +int __repmgr_prepare_simple_input __P((ENV *, REPMGR_CONNECTION *, __repmgr_msg_hdr_args *)); +int __repmgr_send_handshake __P((ENV *, REPMGR_CONNECTION *, void *, size_t, u_int32_t)); +int __repmgr_find_version_info __P((ENV *, REPMGR_CONNECTION *, DBT *)); +int __repmgr_write_some __P((ENV *, REPMGR_CONNECTION *)); +int __repmgr_stat_pp __P((DB_ENV *, DB_REPMGR_STAT **, u_int32_t)); +int __repmgr_stat_print_pp __P((DB_ENV *, u_int32_t)); +int __repmgr_stat_print __P((ENV *, u_int32_t)); +int __repmgr_site_list __P((DB_ENV *, u_int *, DB_REPMGR_SITE **)); +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_close __P((ENV *)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_get_ack_policy __P((DB_ENV *, int *)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_set_ack_policy __P((DB_ENV *, int)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_site __P((DB_ENV *, const char *, u_int, DB_SITE **, u_int32_t)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_site_by_eid __P((DB_ENV *, int, DB_SITE **)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_local_site __P((DB_ENV *, DB_SITE **)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_site_list __P((DB_ENV *, u_int *, DB_REPMGR_SITE **)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_start __P((DB_ENV *, int, u_int32_t)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_stat_pp __P((DB_ENV *, DB_REPMGR_STAT **, u_int32_t)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_stat_print_pp __P((DB_ENV *, u_int32_t)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_handle_event __P((ENV *, u_int32_t, void *)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_channel __P((DB_ENV *, int, DB_CHANNEL **, u_int32_t)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_set_msg_dispatch __P((DB_ENV *, void (*)(DB_ENV *, DB_CHANNEL *, DBT *, u_int32_t, u_int32_t), u_int32_t)); +#endif +#ifndef HAVE_REPLICATION_THREADS +int __repmgr_init_recover __P((ENV *, DB_DISTAB *)); +#endif +int __repmgr_schedule_connection_attempt __P((ENV *, u_int, int)); +void __repmgr_reset_for_reading __P((REPMGR_CONNECTION *)); +int __repmgr_new_connection __P((ENV *, REPMGR_CONNECTION **, socket_t, int)); +int __repmgr_new_site __P((ENV *, REPMGR_SITE**, const char *, u_int)); +int __repmgr_create_mutex __P((ENV *, mgr_mutex_t **)); +int __repmgr_destroy_mutex __P((ENV *, mgr_mutex_t *)); +void __repmgr_cleanup_netaddr __P((ENV *, repmgr_netaddr_t *)); +void __repmgr_iovec_init __P((REPMGR_IOVECS *)); +void __repmgr_add_buffer __P((REPMGR_IOVECS *, void *, size_t)); +void __repmgr_add_dbt __P((REPMGR_IOVECS *, const DBT *)); +int __repmgr_update_consumed __P((REPMGR_IOVECS *, size_t)); +int __repmgr_prepare_my_addr __P((ENV *, DBT *)); +int __repmgr_get_nsites __P((ENV *, u_int32_t *)); +int __repmgr_thread_failure __P((ENV *, int)); +char *__repmgr_format_eid_loc __P((DB_REP *, REPMGR_CONNECTION *, char *)); +char *__repmgr_format_site_loc __P((REPMGR_SITE *, char *)); +char *__repmgr_format_addr_loc __P((repmgr_netaddr_t *, char *)); +int __repmgr_repstart __P((ENV *, u_int32_t)); +int __repmgr_become_master __P((ENV *)); +int __repmgr_each_connection __P((ENV *, CONNECTION_ACTION, void *, int)); +int __repmgr_open __P((ENV *, void *)); +int __repmgr_join __P((ENV *, void *)); +int __repmgr_env_refresh __P((ENV *env)); +int __repmgr_share_netaddrs __P((ENV *, void *, u_int, u_int)); +int __repmgr_copy_in_added_sites __P((ENV *)); +int __repmgr_init_new_sites __P((ENV *, u_int, u_int)); +int __repmgr_failchk __P((ENV *)); +int __repmgr_master_is_known __P((ENV *)); +int __repmgr_stable_lsn __P((ENV *, DB_LSN *)); +int __repmgr_send_sync_msg __P((ENV *, REPMGR_CONNECTION *, u_int32_t, u_int8_t *, u_int32_t)); +int __repmgr_marshal_member_list __P((ENV *, u_int8_t **, size_t *)); +int __repmgr_refresh_membership __P((ENV *, u_int8_t *, size_t)); +int __repmgr_reload_gmdb __P((ENV *)); +int __repmgr_gmdb_version_cmp __P((ENV *, u_int32_t, u_int32_t)); +int __repmgr_init_save __P((ENV *, DBT *)); +int __repmgr_init_restore __P((ENV *, DBT *)); +int __repmgr_defer_op __P((ENV *, u_int32_t)); +void __repmgr_fire_conn_err_event __P((ENV *, REPMGR_CONNECTION *, int)); +void __repmgr_print_conn_err __P((ENV *, repmgr_netaddr_t *, int)); +int __repmgr_become_client __P((ENV *)); +REPMGR_SITE *__repmgr_lookup_site __P((ENV *, const char *, u_int)); +int __repmgr_find_site __P((ENV *, const char *, u_int, int *)); +int __repmgr_set_membership __P((ENV *, const char *, u_int, u_int32_t)); +int __repmgr_bcast_parm_refresh __P((ENV *)); +int __repmgr_chg_prio __P((ENV *, u_int32_t, u_int32_t)); +int __repmgr_bcast_own_msg __P((ENV *, u_int32_t, u_int8_t *, size_t)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_repmgr_ext_h_ */ diff --git a/src/dbinc_auto/sequence_ext.h b/src/dbinc_auto/sequence_ext.h new file mode 100644 index 00000000..a2c114cf --- /dev/null +++ b/src/dbinc_auto/sequence_ext.h @@ -0,0 +1,17 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _sequence_ext_h_ +#define _sequence_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __seq_stat __P((DB_SEQUENCE *, DB_SEQUENCE_STAT **, u_int32_t)); +int __seq_stat_print __P((DB_SEQUENCE *, u_int32_t)); +const FN * __db_get_seq_flags_fn __P((void)); +const FN * __db_get_seq_flags_fn __P((void)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_sequence_ext_h_ */ diff --git a/src/dbinc_auto/tcl_ext.h b/src/dbinc_auto/tcl_ext.h new file mode 100644 index 00000000..c532f770 --- /dev/null +++ b/src/dbinc_auto/tcl_ext.h @@ -0,0 +1,133 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _tcl_ext_h_ +#define _tcl_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int bdb_HCommand __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); +#if DB_DBM_HSEARCH != 0 +int bdb_NdbmOpen __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DBM **)); +#endif +#if DB_DBM_HSEARCH != 0 +int bdb_DbmCommand __P((Tcl_Interp *, int, Tcl_Obj * CONST*, int, DBM *)); +#endif +int ndbm_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +void _DbInfoDelete __P((Tcl_Interp *, DBTCL_INFO *)); +int db_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +int tcl_CompactStat __P((Tcl_Interp *, DBTCL_INFO *)); +int tcl_rep_send __P((DB_ENV *, const DBT *, const DBT *, const DB_LSN *, int, u_int32_t)); +int dbc_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +int env_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +int tcl_EnvRemove __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); +int tcl_EnvIdReset __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_EnvLsnReset __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_EnvVerbose __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *, Tcl_Obj *)); +int tcl_EnvAttr __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_EnvSetFlags __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *, Tcl_Obj *)); +int tcl_EnvTest __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_EnvGetEncryptFlags __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +void tcl_EnvSetErrfile __P((Tcl_Interp *, DB_ENV *, DBTCL_INFO *, char *)); +void tcl_EnvSetMsgfile __P((Tcl_Interp *, DB_ENV *, DBTCL_INFO *, char *)); +int tcl_EnvSetErrpfx __P((Tcl_Interp *, DB_ENV *, DBTCL_INFO *, char *)); +int tcl_EnvStatPrint __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +DBTCL_INFO *_NewInfo __P((Tcl_Interp *, void *, char *, enum INFOTYPE)); +void *_NameToPtr __P((CONST char *)); +DBTCL_INFO *_PtrToInfo __P((CONST void *)); +DBTCL_INFO *_NameToInfo __P((CONST char *)); +void _SetInfoData __P((DBTCL_INFO *, void *)); +void _DeleteInfo __P((DBTCL_INFO *)); +int _SetListElem __P((Tcl_Interp *, Tcl_Obj *, void *, u_int32_t, void *, u_int32_t)); +int _SetListElemInt __P((Tcl_Interp *, Tcl_Obj *, void *, long)); +int _SetListElemWideInt __P((Tcl_Interp *, Tcl_Obj *, void *, int64_t)); +int _SetListRecnoElem __P((Tcl_Interp *, Tcl_Obj *, db_recno_t, u_char *, u_int32_t)); +int _SetListHeapElem __P((Tcl_Interp *, Tcl_Obj *, DB_HEAP_RID, u_char *, u_int32_t)); +int _Set3DBTList __P((Tcl_Interp *, Tcl_Obj *, DBT *, int, DBT *, int, DBT *)); +int _SetMultiList __P((Tcl_Interp *, Tcl_Obj *, DBT *, DBT*, DBTYPE, u_int32_t)); +int _GetGlobPrefix __P((char *, char **)); +int _ReturnSetup __P((Tcl_Interp *, int, int, char *)); +int _ErrorSetup __P((Tcl_Interp *, int, char *)); +void _ErrorFunc __P((const DB_ENV *, CONST char *, const char *)); +#ifdef CONFIG_TEST +void _EventFunc __P((DB_ENV *, u_int32_t, void *)); +#endif +int _GetLsn __P((Tcl_Interp *, Tcl_Obj *, DB_LSN *)); +int _GetRid __P((Tcl_Interp *, Tcl_Obj *, DB_HEAP_RID *)); +int _GetUInt32 __P((Tcl_Interp *, Tcl_Obj *, u_int32_t *)); +Tcl_Obj *_GetFlagsList __P((Tcl_Interp *, u_int32_t, const FN *)); +void _debug_check __P((void)); +int _CopyObjBytes __P((Tcl_Interp *, Tcl_Obj *obj, void *, u_int32_t *, int *)); +int tcl_LockDetect __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LockGet __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LockStat __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LockStatPrint __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LockTimeout __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LockVec __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogArchive __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogCompare __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); +int tcl_LogFile __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogFlush __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogGet __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogPut __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogStat __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogStatPrint __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int logc_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +int tcl_LogConfig __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *, Tcl_Obj *)); +int tcl_LogGetConfig __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *)); +void _MpInfoDelete __P((Tcl_Interp *, DBTCL_INFO *)); +int tcl_MpSync __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_MpTrickle __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_Mp __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *)); +int tcl_MpStat __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_MpStatPrint __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_Mutex __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_MutFree __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_MutGet __P((Tcl_Interp *, DB_ENV *, int)); +int tcl_MutLock __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_MutSet __P((Tcl_Interp *, Tcl_Obj *, DB_ENV *, int)); +int tcl_MutStat __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_MutStatPrint __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_MutUnlock __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_RepConfig __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *)); +int tcl_RepGetTwo __P((Tcl_Interp *, DB_ENV *, int)); +int tcl_RepGetConfig __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *)); +int tcl_RepGetTimeout __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *)); +int tcl_RepGetAckPolicy __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepGetLocalSite __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepElect __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepFlush __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepSync __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepLease __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepInmemFiles __P((Tcl_Interp *, DB_ENV *)); +int tcl_RepLimit __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepNSites __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepRequest __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepNoarchiveTimeout __P((Tcl_Interp *, DB_ENV *)); +int tcl_RepTransport __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *, DBTCL_INFO *)); +int tcl_RepStart __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepProcessMessage __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepStat __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepStatPrint __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_RepMgr __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepMgrSiteList __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepMgrStat __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int tcl_RepMgrStatPrint __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_RepApplied __P((Tcl_Interp *, int, Tcl_Obj * CONST *, DB_ENV *)); +int seq_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +void _TxnInfoDelete __P((Tcl_Interp *, DBTCL_INFO *)); +int tcl_TxnCheckpoint __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_Txn __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *)); +int tcl_CDSGroup __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *)); +int tcl_TxnStat __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_TxnStatPrint __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_TxnTimeout __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_TxnRecover __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *)); +int bdb_RandCommand __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); +int tcl_LockMutex __P((DB_ENV *, db_mutex_t)); +int tcl_UnlockMutex __P((DB_ENV *, db_mutex_t)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_tcl_ext_h_ */ diff --git a/src/dbinc_auto/txn_auto.h b/src/dbinc_auto/txn_auto.h new file mode 100644 index 00000000..48cb066d --- /dev/null +++ b/src/dbinc_auto/txn_auto.h @@ -0,0 +1,220 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef __txn_AUTO_H +#define __txn_AUTO_H +#include "dbinc/log.h" +#define DB___txn_regop_42 10 +typedef struct ___txn_regop_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t timestamp; + DBT locks; +} __txn_regop_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __txn_regop_42_desc[]; +static inline int __txn_regop_42_read(ENV *env, + void *data, __txn_regop_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __txn_regop_42_desc, sizeof(__txn_regop_42_args), (void**)arg)); +} +#define DB___txn_regop 10 +typedef struct ___txn_regop_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t timestamp; + u_int32_t envid; + DBT locks; +} __txn_regop_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __txn_regop_desc[]; +static inline int +__txn_regop_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + u_int32_t opcode, int32_t timestamp, u_int32_t envid, const DBT *locks) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___txn_regop, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int32_t) + + LOG_DBT_SIZE(locks), + __txn_regop_desc, + opcode, timestamp, envid, locks)); +} + +static inline int __txn_regop_read(ENV *env, + void *data, __txn_regop_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __txn_regop_desc, sizeof(__txn_regop_args), (void**)arg)); +} +#define DB___txn_ckp_42 11 +typedef struct ___txn_ckp_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DB_LSN ckp_lsn; + DB_LSN last_ckp; + int32_t timestamp; + u_int32_t rep_gen; +} __txn_ckp_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __txn_ckp_42_desc[]; +static inline int __txn_ckp_42_read(ENV *env, + void *data, __txn_ckp_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __txn_ckp_42_desc, sizeof(__txn_ckp_42_args), (void**)arg)); +} +#define DB___txn_ckp 11 +typedef struct ___txn_ckp_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + DB_LSN ckp_lsn; + DB_LSN last_ckp; + int32_t timestamp; + u_int32_t envid; + u_int32_t spare; +} __txn_ckp_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __txn_ckp_desc[]; +static inline int +__txn_ckp_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + DB_LSN * ckp_lsn, DB_LSN * last_ckp, int32_t timestamp, u_int32_t envid, u_int32_t spare) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___txn_ckp, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(*ckp_lsn) + sizeof(*last_ckp) + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t), + __txn_ckp_desc, + ckp_lsn, last_ckp, timestamp, envid, spare)); +} + +static inline int __txn_ckp_read(ENV *env, + void *data, __txn_ckp_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __txn_ckp_desc, sizeof(__txn_ckp_args), (void**)arg)); +} +#define DB___txn_child 12 +typedef struct ___txn_child_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t child; + DB_LSN c_lsn; +} __txn_child_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __txn_child_desc[]; +static inline int +__txn_child_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + u_int32_t child, DB_LSN * c_lsn) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___txn_child, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(*c_lsn), + __txn_child_desc, + child, c_lsn)); +} + +static inline int __txn_child_read(ENV *env, + void *data, __txn_child_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __txn_child_desc, sizeof(__txn_child_args), (void**)arg)); +} +#define DB___txn_xa_regop_42 13 +typedef struct ___txn_xa_regop_42_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + DBT xid; + int32_t formatID; + u_int32_t gtrid; + u_int32_t bqual; + DB_LSN begin_lsn; + DBT locks; +} __txn_xa_regop_42_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __txn_xa_regop_42_desc[]; +static inline int __txn_xa_regop_42_read(ENV *env, + void *data, __txn_xa_regop_42_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __txn_xa_regop_42_desc, sizeof(__txn_xa_regop_42_args), (void**)arg)); +} +#define DB___txn_prepare 13 +typedef struct ___txn_prepare_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t opcode; + DBT gid; + DB_LSN begin_lsn; + DBT locks; +} __txn_prepare_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __txn_prepare_desc[]; +static inline int +__txn_prepare_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + u_int32_t opcode, const DBT *gid, DB_LSN * begin_lsn, const DBT *locks) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___txn_prepare, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + LOG_DBT_SIZE(gid) + sizeof(*begin_lsn) + + LOG_DBT_SIZE(locks), + __txn_prepare_desc, + opcode, gid, begin_lsn, locks)); +} + +static inline int __txn_prepare_read(ENV *env, + void *data, __txn_prepare_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __txn_prepare_desc, sizeof(__txn_prepare_args), (void**)arg)); +} +#define DB___txn_recycle 14 +typedef struct ___txn_recycle_args { + u_int32_t type; + DB_TXN *txnp; + DB_LSN prev_lsn; + u_int32_t min; + u_int32_t max; +} __txn_recycle_args; + +extern __DB_IMPORT DB_LOG_RECSPEC __txn_recycle_desc[]; +static inline int +__txn_recycle_log(ENV *env, DB_TXN *txnp, DB_LSN *ret_lsnp, u_int32_t flags, + u_int32_t min, u_int32_t max) +{ + return (__log_put_record(env, NULL, txnp, ret_lsnp, + flags, DB___txn_recycle, 0, + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN) + + sizeof(u_int32_t) + sizeof(u_int32_t), + __txn_recycle_desc, + min, max)); +} + +static inline int __txn_recycle_read(ENV *env, + void *data, __txn_recycle_args **arg) +{ + *arg = NULL; + return (__log_read_record(env, + NULL, NULL, data, __txn_recycle_desc, sizeof(__txn_recycle_args), (void**)arg)); +} +#endif diff --git a/src/dbinc_auto/txn_ext.h b/src/dbinc_auto/txn_ext.h new file mode 100644 index 00000000..7c21455f --- /dev/null +++ b/src/dbinc_auto/txn_ext.h @@ -0,0 +1,93 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _txn_ext_h_ +#define _txn_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __txn_begin_pp __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t)); +int __txn_begin __P((ENV *, DB_THREAD_INFO *, DB_TXN *, DB_TXN **, u_int32_t)); +int __txn_recycle_id __P((ENV *, int)); +int __txn_continue __P((ENV *, DB_TXN *, TXN_DETAIL *, DB_THREAD_INFO *, int)); +int __txn_commit __P((DB_TXN *, u_int32_t)); +int __txn_abort __P((DB_TXN *)); +int __txn_discard_int __P((DB_TXN *, u_int32_t flags)); +int __txn_prepare __P((DB_TXN *, u_int8_t *)); +u_int32_t __txn_id __P((DB_TXN *)); +int __txn_get_name __P((DB_TXN *, const char **)); +int __txn_set_name __P((DB_TXN *, const char *)); +int __txn_get_priority __P((DB_TXN *, u_int32_t *)); +int __txn_set_priority __P((DB_TXN *, u_int32_t)); +int __txn_set_timeout __P((DB_TXN *, db_timeout_t, u_int32_t)); +int __txn_activekids __P((ENV *, u_int32_t, DB_TXN *)); +int __txn_force_abort __P((ENV *, u_int8_t *)); +int __txn_preclose __P((ENV *)); +int __txn_reset __P((ENV *)); +int __txn_applied_pp __P((DB_ENV *, DB_TXN_TOKEN *, db_timeout_t, u_int32_t)); +int __txn_init_recover __P((ENV *, DB_DISTAB *)); +int __txn_regop_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_regop_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_ckp_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_ckp_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_child_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_xa_regop_42_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_prepare_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_recycle_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_init_print __P((ENV *, DB_DISTAB *)); +int __txn_checkpoint_pp __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t)); +int __txn_checkpoint __P((ENV *, u_int32_t, u_int32_t, u_int32_t)); +int __txn_getactive __P((ENV *, DB_LSN *)); +int __txn_getckp __P((ENV *, DB_LSN *)); +int __txn_updateckp __P((ENV *, DB_LSN *)); +int __txn_failchk __P((ENV *)); +int __txn_env_create __P((DB_ENV *)); +void __txn_env_destroy __P((DB_ENV *)); +int __txn_get_tx_max __P((DB_ENV *, u_int32_t *)); +int __txn_set_tx_max __P((DB_ENV *, u_int32_t)); +int __txn_get_tx_timestamp __P((DB_ENV *, time_t *)); +int __txn_set_tx_timestamp __P((DB_ENV *, time_t *)); +int __txn_regop_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_prepare_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_ckp_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_child_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_restore_txn __P((ENV *, DB_LSN *, __txn_prepare_args *)); +int __txn_recycle_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_regop_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_ckp_42_recover __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_recover_pp __P((DB_ENV *, DB_PREPLIST *, long, long *, u_int32_t)); +int __txn_recover __P((ENV *, DB_PREPLIST *, long, long *, u_int32_t)); +int __txn_get_prepared __P((ENV *, XID *, DB_PREPLIST *, long, long *, u_int32_t)); +int __txn_openfiles __P((ENV *, DB_THREAD_INFO *, DB_LSN *, int)); +int __txn_open __P((ENV *)); +int __txn_findlastckp __P((ENV *, DB_LSN *, DB_LSN *)); +int __txn_env_refresh __P((ENV *)); +u_int32_t __txn_region_mutex_count __P((ENV *)); +u_int32_t __txn_region_mutex_max __P((ENV *)); +size_t __txn_region_size __P((ENV *)); +size_t __txn_region_max __P((ENV *)); +int __txn_id_set __P((ENV *, u_int32_t, u_int32_t)); +int __txn_oldest_reader __P((ENV *, DB_LSN *)); +int __txn_add_buffer __P((ENV *, TXN_DETAIL *)); +int __txn_remove_buffer __P((ENV *, TXN_DETAIL *, db_mutex_t)); +int __txn_stat_pp __P((DB_ENV *, DB_TXN_STAT **, u_int32_t)); +int __txn_stat_print_pp __P((DB_ENV *, u_int32_t)); +int __txn_stat_print __P((ENV *, u_int32_t)); +int __txn_closeevent __P((ENV *, DB_TXN *, DB *)); +int __txn_remevent __P((ENV *, DB_TXN *, const char *, u_int8_t *, int)); +void __txn_remrem __P((ENV *, DB_TXN *, const char *)); +int __txn_lockevent __P((ENV *, DB_TXN *, DB *, DB_LOCK *, DB_LOCKER *)); +void __txn_remlock __P((ENV *, DB_TXN *, DB_LOCK *, DB_LOCKER *)); +int __txn_doevents __P((ENV *, DB_TXN *, int, int)); +int __txn_record_fname __P((ENV *, DB_TXN *, FNAME *)); +int __txn_dref_fname __P((ENV *, DB_TXN *)); +void __txn_reset_fe_watermarks __P((DB_TXN *)); +void __txn_remove_fe_watermark __P((DB_TXN *,DB *)); +void __txn_add_fe_watermark __P((DB_TXN *, DB *, db_pgno_t)); +int __txn_flush_fe_files __P((DB_TXN *)); +int __txn_pg_above_fe_watermark __P((DB_TXN*, MPOOLFILE*, db_pgno_t)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_txn_ext_h_ */ diff --git a/src/dbinc_auto/xa_ext.h b/src/dbinc_auto/xa_ext.h new file mode 100644 index 00000000..47a167f9 --- /dev/null +++ b/src/dbinc_auto/xa_ext.h @@ -0,0 +1,18 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _xa_ext_h_ +#define _xa_ext_h_ + +#if defined(__cplusplus) +extern "C" { +#endif + +int __db_rmid_to_env __P((int, ENV **)); +int __db_xid_to_txn __P((ENV *, XID *, TXN_DETAIL **)); +void __db_map_rmid __P((int, ENV *)); +int __db_unmap_rmid __P((int)); +void __db_unmap_xid __P((ENV *, XID *, size_t)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_xa_ext_h_ */ diff --git a/src/dbreg/dbreg.c b/src/dbreg/dbreg.c new file mode 100644 index 00000000..0a8ed4fe --- /dev/null +++ b/src/dbreg/dbreg.c @@ -0,0 +1,1004 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" +#include "dbinc/db_am.h" + +static int __dbreg_push_id __P((ENV *, int32_t)); +static int __dbreg_pop_id __P((ENV *, int32_t *)); +static int __dbreg_pluck_id __P((ENV *, int32_t)); + +/* + * The dbreg subsystem, as its name implies, registers database handles so + * that we can associate log messages with them without logging a filename + * or a full, unique DB ID. Instead, we assign each dbp an int32_t which is + * easy and cheap to log, and use this subsystem to map back and forth. + * + * Overview of how dbreg ids are managed: + * + * OPEN + * dbreg_setup (Creates FNAME struct.) + * dbreg_new_id (Assigns new ID to dbp and logs it. May be postponed + * until we attempt to log something else using that dbp, if the dbp + * was opened on a replication client.) + * + * CLOSE + * dbreg_close_id (Logs closure of dbp/revocation of ID.) + * dbreg_revoke_id (As name implies, revokes ID.) + * dbreg_teardown (Destroys FNAME.) + * + * RECOVERY + * dbreg_setup + * dbreg_assign_id (Assigns a particular ID we have in the log to a dbp.) + * + * sometimes: dbreg_revoke_id; dbreg_teardown + * other times: normal close path + * + * A note about locking: + * + * FNAME structures are referenced only by their corresponding dbp's + * until they have a valid id. + * + * Once they have a valid id, they must get linked into the log + * region list so they can get logged on checkpoints. + * + * An FNAME that may/does have a valid id must be accessed under + * protection of the mtx_filelist, with the following exception: + * + * We don't want to have to grab the mtx_filelist on every log + * record, and it should be safe not to do so when we're just + * looking at the id, because once allocated, the id should + * not change under a handle until the handle is closed. + * + * If a handle is closed during an attempt by another thread to + * log with it, well, the application doing the close deserves to + * go down in flames and a lot else is about to fail anyway. + * + * When in the course of logging we encounter an invalid id + * and go to allocate it lazily, we *do* need to check again + * after grabbing the mutex, because it's possible to race with + * another thread that has also decided that it needs to allocate + * a id lazily. + * + * See SR #5623 for further discussion of the new dbreg design. + */ + +/* + * __dbreg_setup -- + * Allocate and initialize an FNAME structure. The FNAME structures + * live in the log shared region and map one-to-one with open database handles. + * When the handle needs to be logged, the FNAME should have a valid fid + * allocated. If the handle currently isn't logged, it still has an FNAME + * entry. If we later discover that the handle needs to be logged, we can + * allocate a id for it later. (This happens when the handle is on a + * replication client that later becomes a master.) + * + * PUBLIC: int __dbreg_setup __P((DB *, const char *, const char *, u_int32_t)); + */ +int +__dbreg_setup(dbp, fname, dname, create_txnid) + DB *dbp; + const char *fname, *dname; + u_int32_t create_txnid; +{ + DB_LOG *dblp; + ENV *env; + FNAME *fnp; +#ifdef HAVE_STATISTICS + LOG *lp; +#endif + REGINFO *infop; + int ret; + size_t len; + void *p; + + env = dbp->env; + dblp = env->lg_handle; + infop = &dblp->reginfo; + + fnp = NULL; + p = NULL; + + /* Allocate an FNAME and, if necessary, a buffer for the name itself. */ + LOG_SYSTEM_LOCK(env); + if ((ret = __env_alloc(infop, sizeof(FNAME), &fnp)) != 0) + goto err; + +#ifdef HAVE_STATISTICS + lp = dblp->reginfo.primary; + if (++lp->stat.st_nfileid > lp->stat.st_maxnfileid) + lp->stat.st_maxnfileid = lp->stat.st_nfileid; +#endif + + memset(fnp, 0, sizeof(FNAME)); + if (fname == NULL) + fnp->fname_off = INVALID_ROFF; + else { + len = strlen(fname) + 1; + if ((ret = __env_alloc(infop, len, &p)) != 0) + goto err; + fnp->fname_off = R_OFFSET(infop, p); + memcpy(p, fname, len); + } + if (dname == NULL) + fnp->dname_off = INVALID_ROFF; + else { + len = strlen(dname) + 1; + if ((ret = __env_alloc(infop, len, &p)) != 0) + goto err; + fnp->dname_off = R_OFFSET(infop, p); + memcpy(p, dname, len); + } + LOG_SYSTEM_UNLOCK(env); + + /* + * Fill in all the remaining info that we'll need later to register + * the file, if we use it for logging. + */ + fnp->id = fnp->old_id = DB_LOGFILEID_INVALID; + fnp->s_type = dbp->type; + memcpy(fnp->ufid, dbp->fileid, DB_FILE_ID_LEN); + fnp->meta_pgno = dbp->meta_pgno; + fnp->create_txnid = create_txnid; + dbp->dbenv->thread_id(dbp->dbenv, &fnp->pid, NULL); + + if (F_ISSET(dbp, DB_AM_INMEM)) + F_SET(fnp, DB_FNAME_INMEM); + if (F_ISSET(dbp, DB_AM_RECOVER)) + F_SET(fnp, DB_FNAME_RECOVER); + /* + * The DB is BIGENDed if its bytes are swapped XOR + * the machine is bigended + */ + if ((F_ISSET(dbp, DB_AM_SWAP) != 0) ^ + (F_ISSET(env, ENV_LITTLEENDIAN) == 0)) + F_SET(fnp, DBREG_BIGEND); + if (F_ISSET(dbp, DB_AM_CHKSUM)) + F_SET(fnp, DBREG_CHKSUM); + if (F_ISSET(dbp, DB_AM_ENCRYPT)) + F_SET(fnp, DBREG_ENCRYPT); + fnp->txn_ref = 1; + fnp->mutex = dbp->mutex; + + dbp->log_filename = fnp; + + return (0); + +err: LOG_SYSTEM_UNLOCK(env); + if (ret == ENOMEM) + __db_errx(env, DB_STR("1501", + "Logging region out of memory; you may need to increase its size")); + + return (ret); +} + +/* + * __dbreg_teardown -- + * Destroy a DB handle's FNAME struct. This is only called when closing + * the DB. + * + * PUBLIC: int __dbreg_teardown __P((DB *)); + */ +int +__dbreg_teardown(dbp) + DB *dbp; +{ + int ret; + + /* + * We may not have an FNAME if we were never opened. This is not an + * error. + */ + if (dbp->log_filename == NULL) + return (0); + + ret = __dbreg_teardown_int(dbp->env, dbp->log_filename); + + /* We freed the copy of the mutex from the FNAME. */ + dbp->log_filename = NULL; + dbp->mutex = MUTEX_INVALID; + + return (ret); +} + +/* + * __dbreg_teardown_int -- + * Destroy an FNAME struct. + * + * PUBLIC: int __dbreg_teardown_int __P((ENV *, FNAME *)); + */ +int +__dbreg_teardown_int(env, fnp) + ENV *env; + FNAME *fnp; +{ + DB_LOG *dblp; +#ifdef HAVE_STATISTICS + LOG *lp; +#endif + REGINFO *infop; + int ret; + + if (F_ISSET(fnp, DB_FNAME_NOTLOGGED)) + return (0); + dblp = env->lg_handle; + infop = &dblp->reginfo; +#ifdef HAVE_STATISTICS + lp = dblp->reginfo.primary; +#endif + + DB_ASSERT(env, fnp->id == DB_LOGFILEID_INVALID); + ret = __mutex_free(env, &fnp->mutex); + + LOG_SYSTEM_LOCK(env); + if (fnp->fname_off != INVALID_ROFF) + __env_alloc_free(infop, R_ADDR(infop, fnp->fname_off)); + if (fnp->dname_off != INVALID_ROFF) + __env_alloc_free(infop, R_ADDR(infop, fnp->dname_off)); + __env_alloc_free(infop, fnp); + STAT(lp->stat.st_nfileid--); + LOG_SYSTEM_UNLOCK(env); + + return (ret); +} + +/* + * __dbreg_new_id -- + * Get an unused dbreg id to this database handle. + * Used as a wrapper to acquire the mutex and + * only set the id on success. + * + * PUBLIC: int __dbreg_new_id __P((DB *, DB_TXN *)); + */ +int +__dbreg_new_id(dbp, txn) + DB *dbp; + DB_TXN *txn; +{ + DB_LOG *dblp; + ENV *env; + FNAME *fnp; + LOG *lp; + int32_t id; + int ret; + + env = dbp->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + fnp = dbp->log_filename; + + /* The mtx_filelist protects the FNAME list and id management. */ + MUTEX_LOCK(env, lp->mtx_filelist); + if (fnp->id != DB_LOGFILEID_INVALID) { + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (0); + } + if ((ret = __dbreg_get_id(dbp, txn, &id)) == 0) + fnp->id = id; + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (ret); +} + +/* + * __dbreg_get_id -- + * Assign an unused dbreg id to this database handle. + * Assume the caller holds the mtx_filelist locked. Assume the + * caller will set the fnp->id field with the id we return. + * + * PUBLIC: int __dbreg_get_id __P((DB *, DB_TXN *, int32_t *)); + */ +int +__dbreg_get_id(dbp, txn, idp) + DB *dbp; + DB_TXN *txn; + int32_t *idp; +{ + DB_LOG *dblp; + ENV *env; + FNAME *fnp; + LOG *lp; + int32_t id; + int ret; + + env = dbp->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + fnp = dbp->log_filename; + + /* + * It's possible that after deciding we needed to call this function, + * someone else allocated an ID before we grabbed the lock. Check + * to make sure there was no race and we have something useful to do. + */ + /* Get an unused ID from the free list. */ + if ((ret = __dbreg_pop_id(env, &id)) != 0) + goto err; + + /* If no ID was found, allocate a new one. */ + if (id == DB_LOGFILEID_INVALID) + id = lp->fid_max++; + + /* If the file is durable (i.e., not, not-durable), mark it as such. */ + if (!F_ISSET(dbp, DB_AM_NOT_DURABLE)) + F_SET(fnp, DB_FNAME_DURABLE); + + /* Hook the FNAME into the list of open files. */ + SH_TAILQ_INSERT_HEAD(&lp->fq, fnp, q, __fname); + + /* + * Log the registry. We should only request a new ID in situations + * where logging is reasonable. + */ + DB_ASSERT(env, !F_ISSET(dbp, DB_AM_RECOVER)); + + if ((ret = __dbreg_log_id(dbp, txn, id, 0)) != 0) + goto err; + + /* + * Once we log the create_txnid, we need to make sure we never + * log it again (as might happen if this is a replication client + * that later upgrades to a master). + */ + fnp->create_txnid = TXN_INVALID; + + DB_ASSERT(env, dbp->type == fnp->s_type); + DB_ASSERT(env, dbp->meta_pgno == fnp->meta_pgno); + + if ((ret = __dbreg_add_dbentry(env, dblp, dbp, id)) != 0) + goto err; + /* + * If we have a successful call, set the ID. Otherwise + * we have to revoke it and remove it from all the lists + * it has been added to, and return an invalid id. + */ +err: + if (ret != 0 && id != DB_LOGFILEID_INVALID) { + (void)__dbreg_revoke_id(dbp, 1, id); + id = DB_LOGFILEID_INVALID; + } + *idp = id; + return (ret); +} + +/* + * __dbreg_assign_id -- + * Assign a particular dbreg id to this database handle. + * + * PUBLIC: int __dbreg_assign_id __P((DB *, int32_t, int)); + */ +int +__dbreg_assign_id(dbp, id, deleted) + DB *dbp; + int32_t id; + int deleted; +{ + DB *close_dbp; + DB_LOG *dblp; + ENV *env; + FNAME *close_fnp, *fnp; + LOG *lp; + int ret; + + env = dbp->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + fnp = dbp->log_filename; + + close_dbp = NULL; + close_fnp = NULL; + + /* The mtx_filelist protects the FNAME list and id management. */ + MUTEX_LOCK(env, lp->mtx_filelist); + + /* We should only call this on DB handles that have no ID. */ + DB_ASSERT(env, fnp->id == DB_LOGFILEID_INVALID); + + /* + * Make sure there isn't already a file open with this ID. There can + * be in recovery, if we're recovering across a point where an ID got + * reused. + */ + if (__dbreg_id_to_fname(dblp, id, 1, &close_fnp) == 0) { + /* + * We want to save off any dbp we have open with this id. We + * can't safely close it now, because we hold the mtx_filelist, + * but we should be able to rely on it being open in this + * process, and we're running recovery, so no other thread + * should muck with it if we just put off closing it until + * we're ready to return. + * + * Once we have the dbp, revoke its id; we're about to + * reuse it. + */ + ret = __dbreg_id_to_db(env, NULL, &close_dbp, id, 0); + if (ret == ENOENT) { + ret = 0; + goto cont; + } else if (ret != 0) + goto err; + + if ((ret = __dbreg_revoke_id(close_dbp, 1, + DB_LOGFILEID_INVALID)) != 0) + goto err; + } + + /* + * Remove this ID from the free list, if it's there, and make sure + * we don't allocate it anew. + */ +cont: if ((ret = __dbreg_pluck_id(env, id)) != 0) + goto err; + if (id >= lp->fid_max) + lp->fid_max = id + 1; + + /* Now go ahead and assign the id to our dbp. */ + fnp->id = id; + /* If the file is durable (i.e., not, not-durable), mark it as such. */ + if (!F_ISSET(dbp, DB_AM_NOT_DURABLE)) + F_SET(fnp, DB_FNAME_DURABLE); + SH_TAILQ_INSERT_HEAD(&lp->fq, fnp, q, __fname); + + /* + * If we get an error adding the dbentry, revoke the id. + * We void the return value since we want to retain and + * return the original error in ret anyway. + */ + if ((ret = __dbreg_add_dbentry(env, dblp, dbp, id)) != 0) + (void)__dbreg_revoke_id(dbp, 1, id); + else + dblp->dbentry[id].deleted = deleted; + +err: MUTEX_UNLOCK(env, lp->mtx_filelist); + + /* There's nothing useful that our caller can do if this close fails. */ + if (close_dbp != NULL) + (void)__db_close(close_dbp, NULL, DB_NOSYNC); + + return (ret); +} + +/* + * __dbreg_revoke_id -- + * Take a log id away from a dbp, in preparation for closing it, + * but without logging the close. + * + * PUBLIC: int __dbreg_revoke_id __P((DB *, int, int32_t)); + */ +int +__dbreg_revoke_id(dbp, have_lock, force_id) + DB *dbp; + int have_lock; + int32_t force_id; +{ + DB_REP *db_rep; + ENV *env; + int push; + + env = dbp->env; + + /* + * If we are not in recovery but the file was opened for a recovery + * operation, then this process aborted a transaction for another + * process and the id may still be in use, so don't reuse this id. + * If our fid generation in replication has changed, this fid + * should not be reused + */ + db_rep = env->rep_handle; + push = (!F_ISSET(dbp, DB_AM_RECOVER) || IS_RECOVERING(env)) && + (!REP_ON(env) || ((REP *)db_rep->region)->gen == dbp->fid_gen); + + return (__dbreg_revoke_id_int(dbp->env, + dbp->log_filename, have_lock, push, force_id)); +} +/* + * __dbreg_revoke_id_int -- + * Revoke a log, in preparation for closing it, but without logging + * the close. + * + * PUBLIC: int __dbreg_revoke_id_int + * PUBLIC: __P((ENV *, FNAME *, int, int, int32_t)); + */ +int +__dbreg_revoke_id_int(env, fnp, have_lock, push, force_id) + ENV *env; + FNAME *fnp; + int have_lock, push; + int32_t force_id; +{ + DB_LOG *dblp; + LOG *lp; + int32_t id; + int ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + ret = 0; + + /* If we lack an ID, this is a null-op. */ + if (fnp == NULL) + return (0); + + /* + * If we have a force_id, we had an error after allocating + * the id, and putting it on the fq list, but before we + * finished setting up fnp. So, if we have a force_id use it. + */ + if (force_id != DB_LOGFILEID_INVALID) + id = force_id; + else if (fnp->id == DB_LOGFILEID_INVALID) { + if (fnp->old_id == DB_LOGFILEID_INVALID) + return (0); + id = fnp->old_id; + } else + id = fnp->id; + if (!have_lock) + MUTEX_LOCK(env, lp->mtx_filelist); + + fnp->id = DB_LOGFILEID_INVALID; + fnp->old_id = DB_LOGFILEID_INVALID; + + /* Remove the FNAME from the list of open files. */ + SH_TAILQ_REMOVE(&lp->fq, fnp, q, __fname); + + /* + * This FNAME may be for a DBP which is already closed. Its ID may + * still be in use by an aborting transaction. If not, + * remove this id from the dbentry table and push it onto the + * free list. + */ + if ((ret = __dbreg_rem_dbentry(dblp, id)) == 0 && push) + ret = __dbreg_push_id(env, id); + + if (!have_lock) + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (ret); +} + +/* + * __dbreg_close_id -- + * Take a dbreg id away from a dbp that we're closing, and log + * the unregistry if the refcount goes to 0. + * + * PUBLIC: int __dbreg_close_id __P((DB *, DB_TXN *, u_int32_t)); + */ +int +__dbreg_close_id(dbp, txn, op) + DB *dbp; + DB_TXN *txn; + u_int32_t op; +{ + DB_LOG *dblp; + ENV *env; + FNAME *fnp; + LOG *lp; + int ret, t_ret; + + env = dbp->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + fnp = dbp->log_filename; + + /* If we lack an ID, this is a null-op. */ + if (fnp == NULL) + return (0); + + if (fnp->id == DB_LOGFILEID_INVALID) { + ret = __dbreg_revoke_id(dbp, 0, DB_LOGFILEID_INVALID); + goto done; + } + + /* + * If we are the last reference to this db then we need to log it + * as closed. Otherwise the last transaction will do the logging. + * Remove the DBP from the db entry table since it can nolonger + * be used. If we abort it will have to be reopened. + */ + ret = 0; + DB_ASSERT(env, fnp->txn_ref > 0); + if (fnp->txn_ref > 1) { + MUTEX_LOCK(env, dbp->mutex); + if (fnp->txn_ref > 1) { + if ((t_ret = __dbreg_rem_dbentry( + env->lg_handle, fnp->id)) != 0 && ret == 0) + ret = t_ret; + + /* + * The DB handle has been closed in the logging system. + * Transactions may still have a ref to this name. + * Mark it so that if recovery reopens the file id + * the transaction will not close the wrong handle. + */ + F_SET(fnp, DB_FNAME_CLOSED); + fnp->txn_ref--; + MUTEX_UNLOCK(env, dbp->mutex); + /* The mutex now lives only in the FNAME. */ + dbp->mutex = MUTEX_INVALID; + dbp->log_filename = NULL; + goto no_log; + } + } + MUTEX_LOCK(env, lp->mtx_filelist); + + if ((ret = __dbreg_log_close(env, fnp, txn, op)) != 0) + goto err; + ret = __dbreg_revoke_id(dbp, 1, DB_LOGFILEID_INVALID); + +err: MUTEX_UNLOCK(env, lp->mtx_filelist); + +done: if ((t_ret = __dbreg_teardown(dbp)) != 0 && ret == 0) + ret = t_ret; +no_log: + return (ret); +} +/* + * __dbreg_close_id_int -- + * Close down a dbreg id and log the unregistry. This is called only + * when a transaction has the last ref to the fname. + * + * PUBLIC: int __dbreg_close_id_int __P((ENV *, FNAME *, u_int32_t, int)); + */ +int +__dbreg_close_id_int(env, fnp, op, locked) + ENV *env; + FNAME *fnp; + u_int32_t op; + int locked; +{ + DB_LOG *dblp; + LOG *lp; + int ret, t_ret; + + DB_ASSERT(env, fnp->txn_ref == 1); + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + if (fnp->id == DB_LOGFILEID_INVALID) + return (__dbreg_revoke_id_int(env, + fnp, locked, 1, DB_LOGFILEID_INVALID)); + + if (F_ISSET(fnp, DB_FNAME_RECOVER)) + return (__dbreg_close_file(env, fnp)); + /* + * If log_close fails then it will mark the name DB_FNAME_NOTLOGGED + * and the id must persist. + */ + if (!locked) + MUTEX_LOCK(env, lp->mtx_filelist); + if ((ret = __dbreg_log_close(env, fnp, NULL, op)) != 0) + goto err; + + ret = __dbreg_revoke_id_int(env, fnp, 1, 1, DB_LOGFILEID_INVALID); + +err: if (!locked) + MUTEX_UNLOCK(env, lp->mtx_filelist); + + if ((t_ret = __dbreg_teardown_int(env, fnp)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __dbreg_failchk -- + * + * Look for entries that belong to dead processes and either close them + * out or, if there are pending transactions, just remove the mutex which + * will get discarded later. + * + * PUBLIC: int __dbreg_failchk __P((ENV *)); + */ +int +__dbreg_failchk(env) + ENV *env; +{ + DB_ENV *dbenv; + DB_LOG *dblp; + FNAME *fnp, *nnp; + LOG *lp; + int ret, t_ret; + char buf[DB_THREADID_STRLEN]; + + if ((dblp = env->lg_handle) == NULL) + return (0); + + lp = dblp->reginfo.primary; + dbenv = env->dbenv; + ret = 0; + + MUTEX_LOCK(env, lp->mtx_filelist); + for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname); fnp != NULL; fnp = nnp) { + nnp = SH_TAILQ_NEXT(fnp, q, __fname); + if (dbenv->is_alive(dbenv, fnp->pid, 0, DB_MUTEX_PROCESS_ONLY)) + continue; + MUTEX_LOCK(env, fnp->mutex); + __db_msg(env, DB_STR_A("1502", + "Freeing log information for process: %s, (ref %lu)", + "%s %lu"), + dbenv->thread_id_string(dbenv, fnp->pid, 0, buf), + (u_long)fnp->txn_ref); + if (fnp->txn_ref > 1 || F_ISSET(fnp, DB_FNAME_CLOSED)) { + if (!F_ISSET(fnp, DB_FNAME_CLOSED)) { + fnp->txn_ref--; + F_SET(fnp, DB_FNAME_CLOSED); + } + MUTEX_UNLOCK(env, fnp->mutex); + fnp->mutex = MUTEX_INVALID; + fnp->pid = 0; + } else { + F_SET(fnp, DB_FNAME_CLOSED); + if ((t_ret = __dbreg_close_id_int(env, + fnp, DBREG_CLOSE, 1)) && ret == 0) + ret = t_ret; + } + } + + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (ret); +} +/* + * __dbreg_log_close -- + * + * Log a close of a database. Called when closing a file or when a + * replication client is becoming a master. That closes all the + * files it previously had open. + * + * Assumes caller holds the lp->mutex_filelist lock already. + * + * PUBLIC: int __dbreg_log_close __P((ENV *, FNAME *, + * PUBLIC: DB_TXN *, u_int32_t)); + */ +int +__dbreg_log_close(env, fnp, txn, op) + ENV *env; + FNAME *fnp; + DB_TXN *txn; + u_int32_t op; +{ + DBT fid_dbt, r_name, *dbtp; + DB_LOG *dblp; + DB_LSN r_unused; + int ret; + + dblp = env->lg_handle; + ret = 0; + + if (fnp->fname_off == INVALID_ROFF) + dbtp = NULL; + else { + memset(&r_name, 0, sizeof(r_name)); + r_name.data = R_ADDR(&dblp->reginfo, fnp->fname_off); + r_name.size = (u_int32_t)strlen((char *)r_name.data) + 1; + dbtp = &r_name; + } + memset(&fid_dbt, 0, sizeof(fid_dbt)); + fid_dbt.data = fnp->ufid; + fid_dbt.size = DB_FILE_ID_LEN; + if ((ret = __dbreg_register_log(env, txn, &r_unused, + F_ISSET(fnp, DB_FNAME_DURABLE) ? 0 : DB_LOG_NOT_DURABLE, + op, dbtp, &fid_dbt, fnp->id, + fnp->s_type, fnp->meta_pgno, TXN_INVALID)) != 0) { + /* + * We are trying to close, but the log write failed. + * Unfortunately, close needs to plow forward, because + * the application can't do anything with the handle. + * Make the entry in the shared memory region so that + * when we close the environment, we know that this + * happened. Also, make sure we remove this from the + * per-process table, so that we don't try to close it + * later. + */ + F_SET(fnp, DB_FNAME_NOTLOGGED); + (void)__dbreg_rem_dbentry(dblp, fnp->id); + } + return (ret); +} + +/* + * __dbreg_push_id and __dbreg_pop_id -- + * Dbreg ids from closed files are kept on a stack in shared memory + * for recycling. (We want to reuse them as much as possible because each + * process keeps open files in an array by ID.) Push them to the stack and + * pop them from it, managing memory as appropriate. + * + * The stack is protected by the mtx_filelist, and both functions assume it + * is already locked. + */ +static int +__dbreg_push_id(env, id) + ENV *env; + int32_t id; +{ + DB_LOG *dblp; + LOG *lp; + REGINFO *infop; + int32_t *stack, *newstack; + int ret; + + dblp = env->lg_handle; + infop = &dblp->reginfo; + lp = infop->primary; + + if (id == lp->fid_max - 1) { + lp->fid_max--; + return (0); + } + + /* Check if we have room on the stack. */ + if (lp->free_fid_stack == INVALID_ROFF || + lp->free_fids_alloced <= lp->free_fids + 1) { + LOG_SYSTEM_LOCK(env); + if ((ret = __env_alloc(infop, + (lp->free_fids_alloced + 20) * sizeof(u_int32_t), + &newstack)) != 0) { + LOG_SYSTEM_UNLOCK(env); + return (ret); + } + + if (lp->free_fid_stack != INVALID_ROFF) { + stack = R_ADDR(infop, lp->free_fid_stack); + memcpy(newstack, stack, + lp->free_fids_alloced * sizeof(u_int32_t)); + __env_alloc_free(infop, stack); + } + lp->free_fid_stack = R_OFFSET(infop, newstack); + lp->free_fids_alloced += 20; + LOG_SYSTEM_UNLOCK(env); + } + + stack = R_ADDR(infop, lp->free_fid_stack); + stack[lp->free_fids++] = id; + return (0); +} + +static int +__dbreg_pop_id(env, id) + ENV *env; + int32_t *id; +{ + DB_LOG *dblp; + LOG *lp; + int32_t *stack; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + /* Do we have anything to pop? */ + if (lp->free_fid_stack != INVALID_ROFF && lp->free_fids > 0) { + stack = R_ADDR(&dblp->reginfo, lp->free_fid_stack); + *id = stack[--lp->free_fids]; + } else + *id = DB_LOGFILEID_INVALID; + + return (0); +} + +/* + * __dbreg_pluck_id -- + * Remove a particular dbreg id from the stack of free ids. This is + * used when we open a file, as in recovery, with a specific ID that might + * be on the stack. + * + * Returns success whether or not the particular id was found, and like + * push and pop, assumes that the mtx_filelist is locked. + */ +static int +__dbreg_pluck_id(env, id) + ENV *env; + int32_t id; +{ + DB_LOG *dblp; + LOG *lp; + int32_t *stack; + u_int i; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + if (id >= lp->fid_max) + return (0); + + /* Do we have anything to look at? */ + if (lp->free_fid_stack != INVALID_ROFF) { + stack = R_ADDR(&dblp->reginfo, lp->free_fid_stack); + for (i = 0; i < lp->free_fids; i++) + if (id == stack[i]) { + /* + * Found it. Overwrite it with the top + * id (which may harmlessly be itself), + * and shorten the stack by one. + */ + stack[i] = stack[lp->free_fids - 1]; + lp->free_fids--; + return (0); + } + } + + return (0); +} + +/* + * __dbreg_log_id -- + * Used for in-memory named files. They are created in mpool and + * are given id's early in the open process so that we can read and + * create pages in the mpool for the files. However, at the time that + * the mpf is created, the file may not be fully created and/or its + * meta-data may not be fully known, so we can't do a full dbregister. + * This is a routine exported that will log a complete dbregister + * record that will allow for both recovery and replication. + * + * PUBLIC: int __dbreg_log_id __P((DB *, DB_TXN *, int32_t, int)); + */ +int +__dbreg_log_id(dbp, txn, id, needlock) + DB *dbp; + DB_TXN *txn; + int32_t id; + int needlock; +{ + DBT fid_dbt, r_name; + DB_LOG *dblp; + DB_LSN unused; + ENV *env; + FNAME *fnp; + LOG *lp; + u_int32_t op; + int i, ret; + + env = dbp->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + fnp = dbp->log_filename; + + /* + * Verify that the fnp has been initialized, by seeing if it + * has any non-zero bytes in it. + */ + for (i = 0; i < DB_FILE_ID_LEN; i++) + if (fnp->ufid[i] != 0) + break; + if (i == DB_FILE_ID_LEN) + memcpy(fnp->ufid, dbp->fileid, DB_FILE_ID_LEN); + + if (fnp->s_type == DB_UNKNOWN) + fnp->s_type = dbp->type; + + /* + * Log the registry. We should only request a new ID in situations + * where logging is reasonable. + */ + memset(&fid_dbt, 0, sizeof(fid_dbt)); + memset(&r_name, 0, sizeof(r_name)); + + if (needlock) + MUTEX_LOCK(env, lp->mtx_filelist); + + if (fnp->fname_off != INVALID_ROFF) { + r_name.data = R_ADDR(&dblp->reginfo, fnp->fname_off); + r_name.size = (u_int32_t)strlen((char *)r_name.data) + 1; + } + + fid_dbt.data = dbp->fileid; + fid_dbt.size = DB_FILE_ID_LEN; + + op = !F_ISSET(dbp, DB_AM_OPEN_CALLED) ? DBREG_PREOPEN : + (F_ISSET(dbp, DB_AM_INMEM) ? DBREG_REOPEN : DBREG_OPEN); + ret = __dbreg_register_log(env, txn, &unused, + F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0, + op | F_ISSET(fnp, DB_FNAME_DBREG_MASK), + r_name.size == 0 ? NULL : &r_name, &fid_dbt, id, + fnp->s_type, fnp->meta_pgno, fnp->create_txnid); + + if (needlock) + MUTEX_UNLOCK(env, lp->mtx_filelist); + + return (ret); +} diff --git a/src/dbreg/dbreg.src b/src/dbreg/dbreg.src new file mode 100644 index 00000000..f51f1379 --- /dev/null +++ b/src/dbreg/dbreg.src @@ -0,0 +1,37 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +DBPRIVATE +PREFIX __dbreg + +INCLUDE #include "db_int.h" +INCLUDE #include "dbinc/crypto.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_dispatch.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/txn.h" +INCLUDE + +/* + * Used for registering name/id translations at open or close. + * opcode: register or unregister + * name: file name + * fileid: unique file id + * ftype: file type + * ftype: database type + * id: transaction id of the subtransaction that created the fs object + */ +BEGIN register 42 2 +DBOP opcode u_int32_t lu +DBT name DBT s +DBT uid DBT s +ARG fileid int32_t ld +ARG ftype DBTYPE lx +ARG meta_pgno db_pgno_t lu +ARG id u_int32_t lx +END diff --git a/src/dbreg/dbreg_auto.c b/src/dbreg/dbreg_auto.c new file mode 100644 index 00000000..a26e5527 --- /dev/null +++ b/src/dbreg/dbreg_auto.c @@ -0,0 +1,35 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" + +DB_LOG_RECSPEC __dbreg_register_desc[] = { + {LOGREC_DBOP, SSZ(__dbreg_register_args, opcode), "opcode", ""}, + {LOGREC_DBT, SSZ(__dbreg_register_args, name), "name", ""}, + {LOGREC_DBT, SSZ(__dbreg_register_args, uid), "uid", ""}, + {LOGREC_ARG, SSZ(__dbreg_register_args, fileid), "fileid", "%ld"}, + {LOGREC_ARG, SSZ(__dbreg_register_args, ftype), "ftype", "%lx"}, + {LOGREC_ARG, SSZ(__dbreg_register_args, meta_pgno), "meta_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__dbreg_register_args, id), "id", "%lx"}, + {LOGREC_Done, 0, "", ""} +}; +/* + * PUBLIC: int __dbreg_init_recover __P((ENV *, DB_DISTAB *)); + */ +int +__dbreg_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __dbreg_register_recover, DB___dbreg_register)) != 0) + return (ret); + return (0); +} diff --git a/src/dbreg/dbreg_autop.c b/src/dbreg/dbreg_autop.c new file mode 100644 index 00000000..ea43addd --- /dev/null +++ b/src/dbreg/dbreg_autop.c @@ -0,0 +1,43 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" + +/* + * PUBLIC: int __dbreg_register_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__dbreg_register_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__dbreg_register", __dbreg_register_desc, info)); +} + +/* + * PUBLIC: int __dbreg_init_print __P((ENV *, DB_DISTAB *)); + */ +int +__dbreg_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __dbreg_register_print, DB___dbreg_register)) != 0) + return (ret); + return (0); +} diff --git a/src/dbreg/dbreg_rec.c b/src/dbreg/dbreg_rec.c new file mode 100644 index 00000000..aae6851f --- /dev/null +++ b/src/dbreg/dbreg_rec.c @@ -0,0 +1,392 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" + +static int __dbreg_open_file __P((ENV *, + DB_TXN *, __dbreg_register_args *, void *)); +/* + * PUBLIC: int __dbreg_register_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__dbreg_register_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __dbreg_register_args *argp; + DB_ENTRY *dbe; + DB_LOG *dblp; + DB *dbp; + u_int32_t opcode, status; + int do_close, do_open, do_rem, ret, t_ret; + + dblp = env->lg_handle; + dbp = NULL; + +#ifdef DEBUG_RECOVER + REC_PRINT(__dbreg_register_print); +#endif + do_open = do_close = 0; + if ((ret = __dbreg_register_read(env, dbtp->data, &argp)) != 0) + goto out; + + opcode = FLD_ISSET(argp->opcode, DBREG_OP_MASK); + switch (opcode) { + case DBREG_REOPEN: + case DBREG_PREOPEN: + case DBREG_OPEN: + /* + * In general, we redo the open on REDO and abort on UNDO. + * However, a reopen is a second instance of an open of + * in-memory files and we don't want to close them yet + * on abort, so just skip that here. + */ + if ((DB_REDO(op) || + op == DB_TXN_OPENFILES || op == DB_TXN_POPENFILES)) + do_open = 1; + else if (opcode != DBREG_REOPEN) + do_close = 1; + break; + case DBREG_CLOSE: + if (DB_UNDO(op)) + do_open = 1; + else + do_close = 1; + break; + case DBREG_RCLOSE: + /* + * DBREG_RCLOSE was generated by recover because a file was + * left open. The POPENFILES pass, which is run to open + * files to abort prepared transactions, may not include the + * open for this file so we open it here. Note that a normal + * CLOSE is not legal before the prepared transaction is + * committed or aborted. + */ + if (DB_UNDO(op) || op == DB_TXN_POPENFILES) + do_open = 1; + else + do_close = 1; + break; + case DBREG_CHKPNT: + if (DB_UNDO(op) || + op == DB_TXN_OPENFILES || op == DB_TXN_POPENFILES) + do_open = 1; + break; + default: + ret = __db_unknown_path(env, "__dbreg_register_recover"); + goto out; + } + + if (do_open) { + /* + * We must open the db even if the meta page is not + * yet written as we may be creating subdatabase. + */ + if (op == DB_TXN_OPENFILES && opcode != DBREG_CHKPNT) + F_SET(dblp, DBLOG_FORCE_OPEN); + + /* + * During an abort or an open pass to recover prepared txns, + * we need to make sure that we use the same locker id on the + * open. We pass the txnid along to ensure this. + */ + ret = __dbreg_open_file(env, + op == DB_TXN_ABORT || op == DB_TXN_POPENFILES ? + argp->txnp : NULL, argp, info); + if (ret == DB_PAGE_NOTFOUND && argp->meta_pgno != PGNO_BASE_MD) + ret = ENOENT; + if (ret == ENOENT || ret == EINVAL) { + /* + * If this is an OPEN while rolling forward, it's + * possible that the file was recreated since last + * time we got here. In that case, we've got deleted + * set and probably shouldn't, so we need to check + * for that case and possibly retry. + */ + if (DB_REDO(op) && argp->txnp != 0 && + dblp->dbentry[argp->fileid].deleted) { + dblp->dbentry[argp->fileid].deleted = 0; + ret = + __dbreg_open_file(env, NULL, argp, info); + if (ret == DB_PAGE_NOTFOUND && + argp->meta_pgno != PGNO_BASE_MD) + ret = ENOENT; + } + /* + * We treat ENOENT as OK since it's possible that + * the file was renamed or deleted. + * All other errors, we return. + */ + if (ret == ENOENT) + ret = 0; + } + F_CLR(dblp, DBLOG_FORCE_OPEN); + } + + if (do_close) { + /* + * If we are undoing an open, or redoing a close, + * then we need to close the file. If we are simply + * revoking then we just need to grab the DBP and revoke + * the log id. + * + * If the file is deleted, then we can just ignore this close. + * Otherwise, we should usually have a valid dbp we should + * close or whose reference count should be decremented. + * However, if we shut down without closing a file, we may, in + * fact, not have the file open, and that's OK. + */ + do_rem = 0; + MUTEX_LOCK(env, dblp->mtx_dbreg); + if (argp->fileid < dblp->dbentry_cnt) { + /* + * Typically, closes should match an open which means + * that if this is a close, there should be a valid + * entry in the dbentry table when we get here, + * however there are exceptions. 1. If this is an + * OPENFILES pass, then we may have started from + * a log file other than the first, and the + * corresponding open appears in an earlier file. + * 2. If we are undoing an open on an abort or + * recovery, it's possible that we failed after + * the log record, but before we actually entered + * a handle here. + * 3. If we aborted an open, then we wrote a non-txnal + * RCLOSE into the log. During the forward pass, the + * file won't be open, and that's OK. + */ + dbe = &dblp->dbentry[argp->fileid]; + if (dbe->dbp == NULL && !dbe->deleted) { + /* No valid entry here. Nothing to do. */ + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + goto done; + } + + /* We have either an open entry or a deleted entry. */ + if ((dbp = dbe->dbp) != NULL) { + /* + * If we're a replication client, it's + * possible to get here with a dbp that + * the user opened, but which we later + * assigned a fileid to. Be sure that + * we only close dbps that we opened in + * the recovery code or that were opened + * inside a currently aborting transaction + * but not by the recovery code. + */ + do_rem = F_ISSET(dbp, DB_AM_RECOVER) ? + op != DB_TXN_ABORT : op == DB_TXN_ABORT; + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + } else if (dbe->deleted) { + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + if ((ret = __dbreg_rem_dbentry( + dblp, argp->fileid)) != 0) + goto out; + } + } else + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + + /* + * During recovery, all files are closed. On an abort, we only + * close the file if we opened it during the abort + * (DB_AM_RECOVER set), otherwise we simply do a __db_refresh. + * For the close case, if remove or rename has closed the file, + * don't request a sync, because a NULL mpf would be a problem. + * + * If we are undoing a create we'd better discard any buffers + * from the memory pool. We identify creates because the + * argp->id field contains the transaction containing the file + * create; if that id is invalid, we are not creating. + * + * On the backward pass, we need to "undo" opens even if the + * transaction in which they appeared committed, because we have + * already undone the corresponding close. In that case, the + * id will be valid, but we do not want to discard buffers. + */ + if (do_rem && dbp != NULL) { + if (argp->id != TXN_INVALID) { + if ((ret = __db_txnlist_find(env, + info, argp->txnp->txnid, &status)) + != DB_NOTFOUND && ret != 0) + goto out; + if (ret == DB_NOTFOUND || status != TXN_COMMIT) + F_SET(dbp, DB_AM_DISCARD); + ret = 0; + } + + if (op == DB_TXN_ABORT) { + if ((t_ret = __db_refresh(dbp, + NULL, DB_NOSYNC, NULL, 0)) != 0 && ret == 0) + ret = t_ret; + } else { + if ((t_ret = __db_close( + dbp, NULL, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + } + } + } +done: if (ret == 0) + *lsnp = argp->prev_lsn; +out: if (argp != NULL) + __os_free(env, argp); + return (ret); +} + +/* + * __dbreg_open_file -- + * Called during log_register recovery. Make sure that we have an + * entry in the dbentry table for this ndx. Returns 0 on success, + * non-zero on error. + */ +static int +__dbreg_open_file(env, txn, argp, info) + ENV *env; + DB_TXN *txn; + __dbreg_register_args *argp; + void *info; +{ + DB *dbp; + DB_ENTRY *dbe; + DB_LOG *dblp; + u_int32_t id, opcode, status; + int ret; + + dblp = env->lg_handle; + opcode = FLD_ISSET(argp->opcode, DBREG_OP_MASK); + + /* + * When we're opening, we have to check that the name we are opening + * is what we expect. If it's not, then we close the old file and + * open the new one. + */ + MUTEX_LOCK(env, dblp->mtx_dbreg); + if (argp->fileid != DB_LOGFILEID_INVALID && + argp->fileid < dblp->dbentry_cnt) + dbe = &dblp->dbentry[argp->fileid]; + else + dbe = NULL; + + if (dbe != NULL) { + if (dbe->deleted) { + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + return (ENOENT); + } + + /* + * At the end of OPENFILES, we may have a file open. If this + * is a reopen, then we will always close and reopen. If the + * open was part of a committed transaction, so it doesn't + * get undone. However, if the fileid was previously used, + * we'll see a close that may need to get undone. There are + * three ways we can detect this. 1) the meta-pgno in the + * current file does not match that of the open file, 2) the + * file uid of the current file does not match that of the + * previously opened file, 3) the current file is unnamed, in + * which case it should never be opened during recovery. + * It is also possible that the db open previously failed + * because the file was missing. Check the DB_AM_OPEN_CALLED + * bit and try to open it again. + */ + if ((dbp = dbe->dbp) != NULL) { + if (opcode == DBREG_REOPEN || + !F_ISSET(dbp, DB_AM_OPEN_CALLED) || + dbp->meta_pgno != argp->meta_pgno || + argp->name.size == 0 || + memcmp(dbp->fileid, argp->uid.data, + DB_FILE_ID_LEN) != 0) { + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + (void)__dbreg_revoke_id(dbp, 0, + DB_LOGFILEID_INVALID); + if (F_ISSET(dbp, DB_AM_RECOVER)) + (void)__db_close(dbp, NULL, DB_NOSYNC); + goto reopen; + } + + DB_ASSERT(env, dbe->dbp == dbp); + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + + /* + * This is a successful open. We need to record that + * in the txnlist so that we know how to handle the + * subtransaction that created the file system object. + */ + if (argp != NULL && argp->id != TXN_INVALID && + (ret = __db_txnlist_update(env, info, + argp->id, TXN_EXPECTED, NULL, &status, 1)) != 0) + return (ret); + return (0); + } + } + + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + +reopen: + /* + * We never re-open temporary files. Temp files are only useful during + * aborts in which case the dbp was entered when the file was + * registered. During recovery, we treat temp files as properly deleted + * files, allowing the open to fail and not reporting any errors when + * recovery fails to get a valid dbp from __dbreg_id_to_db. + */ + if (argp->name.size == 0) { + (void)__dbreg_add_dbentry(env, dblp, NULL, argp->fileid); + return (ENOENT); + } + + /* + * We are about to pass a recovery txn pointer into the main library. + * We need to make sure that any accessed fields are set appropriately. + */ + if (txn != NULL) { + id = txn->txnid; + memset(txn, 0, sizeof(DB_TXN)); + txn->txnid = id; + txn->mgrp = env->tx_handle; + } + + return (__dbreg_do_open(env, + txn, dblp, argp->uid.data, argp->name.data, argp->ftype, + argp->fileid, argp->meta_pgno, info, argp->id, opcode)); +} diff --git a/src/dbreg/dbreg_stat.c b/src/dbreg/dbreg_stat.c new file mode 100644 index 00000000..4d2f28a4 --- /dev/null +++ b/src/dbreg/dbreg_stat.c @@ -0,0 +1,140 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" + +#ifdef HAVE_STATISTICS +static int __dbreg_print_all __P((ENV *, u_int32_t)); + +/* + * __dbreg_stat_print -- + * Print the dbreg statistics. + * + * PUBLIC: int __dbreg_stat_print __P((ENV *, u_int32_t)); + */ +int +__dbreg_stat_print(env, flags) + ENV *env; + u_int32_t flags; +{ + int ret; + + if (LF_ISSET(DB_STAT_ALL) && + (ret = __dbreg_print_all(env, flags)) != 0) + return (ret); + + return (0); +} + +/* + * __dbreg_print_fname -- + * Display the contents of an FNAME structure. + * + * PUBLIC: void __dbreg_print_fname __P((ENV *, FNAME *)); + */ +void +__dbreg_print_fname(env, fnp) + ENV *env; + FNAME *fnp; +{ + static const FN fn[] = { + { DB_FNAME_DURABLE, "DB_FNAME_DURABLE" }, + { DB_FNAME_NOTLOGGED, "DB_FNAME_NOTLOGGED" }, + { DB_FNAME_CLOSED, "DB_FNAME_CLOSED" }, + { DB_FNAME_RECOVER, "DB_FNAME_RECOVER" }, + { 0, NULL } + }; + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "DB handle FNAME contents:"); + STAT_LONG("log ID", fnp->id); + STAT_ULONG("Meta pgno", fnp->meta_pgno); + __db_print_fileid(env, fnp->ufid, "\tFile ID"); + STAT_ULONG("create txn", fnp->create_txnid); + STAT_ULONG("refcount", fnp->txn_ref); + __db_prflags(env, NULL, fnp->flags, fn, NULL, "\tFlags"); +} + +/* + * __dbreg_print_all -- + * Display the ENV's list of files. + */ +static int +__dbreg_print_all(env, flags) + ENV *env; + u_int32_t flags; +{ + DB *dbp; + DB_LOG *dblp; + FNAME *fnp; + LOG *lp; + int32_t *stack; + int del, first; + u_int32_t i; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + __db_msg(env, "LOG FNAME list:"); + __mutex_print_debug_single( + env, "File name mutex", lp->mtx_filelist, flags); + + STAT_LONG("Fid max", lp->fid_max); + STAT_LONG("Log buffer size", lp->buffer_size); + + MUTEX_LOCK(env, lp->mtx_filelist); + first = 1; + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) { + if (first) { + first = 0; + __db_msg(env, + "ID\tName\t\tType\tPgno\tPid\tTxnid\tFlags\tRef\tDBP-info"); + } + dbp = fnp->id >= dblp->dbentry_cnt ? NULL : + dblp->dbentry[fnp->id].dbp; + del = fnp->id >= dblp->dbentry_cnt ? 0 : + dblp->dbentry[fnp->id].deleted; + __db_msg(env, + "%ld\t%-8s%s%-8s%s\t%lu\t%lu\t%lx\t%lx\t%lx\t%s", + (long)fnp->id, + fnp->fname_off == INVALID_ROFF ? + "" : (char *)R_ADDR(&dblp->reginfo, fnp->fname_off), + fnp->dname_off == INVALID_ROFF ? "" : ":", + fnp->dname_off == INVALID_ROFF ? + "" : (char *)R_ADDR(&dblp->reginfo, fnp->dname_off), + __db_dbtype_to_string(fnp->s_type), + (u_long)fnp->meta_pgno, (u_long)fnp->pid, + (u_long)fnp->create_txnid, (u_long)fnp->flags, + (u_long)fnp->txn_ref, + dbp == NULL ? "No DBP" : "DBP"); + if (dbp != NULL) + __db_msg(env, " (%d %lx %lx)", del, P_TO_ULONG(dbp), + (u_long)(dbp == NULL ? 0 : dbp->flags)); + } + MUTEX_UNLOCK(env, lp->mtx_filelist); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "LOG region list of free IDs."); + if (lp->free_fid_stack == INVALID_ROFF) + __db_msg(env, "Free id stack is empty."); + else { + STAT_ULONG("Free id array size", lp->free_fids_alloced); + STAT_ULONG("Number of ids on the free stack", lp->free_fids); + stack = R_ADDR(&dblp->reginfo, lp->free_fid_stack); + for (i = 0; i < lp->free_fids; i++) + STAT_LONG("fid", stack[i]); + } + + return (0); +} +#endif diff --git a/src/dbreg/dbreg_util.c b/src/dbreg/dbreg_util.c new file mode 100644 index 00000000..6bbfc3f8 --- /dev/null +++ b/src/dbreg/dbreg_util.c @@ -0,0 +1,838 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/fop.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __dbreg_check_master __P((ENV *, u_int8_t *, char *)); + +/* + * __dbreg_add_dbentry -- + * Adds a DB entry to the dbreg DB entry table. + * + * PUBLIC: int __dbreg_add_dbentry __P((ENV *, DB_LOG *, DB *, int32_t)); + */ +int +__dbreg_add_dbentry(env, dblp, dbp, ndx) + ENV *env; + DB_LOG *dblp; + DB *dbp; + int32_t ndx; +{ + int32_t i; + int ret; + + ret = 0; + + MUTEX_LOCK(env, dblp->mtx_dbreg); + + /* + * Check if we need to grow the table. Note, ndx is 0-based (the + * index into the DB entry table) an dbentry_cnt is 1-based, the + * number of available slots. + */ + if (dblp->dbentry_cnt <= ndx) { + if ((ret = __os_realloc(env, + (size_t)(ndx + DB_GROW_SIZE) * sizeof(DB_ENTRY), + &dblp->dbentry)) != 0) + goto err; + + /* Initialize the new entries. */ + for (i = dblp->dbentry_cnt; i < ndx + DB_GROW_SIZE; i++) { + dblp->dbentry[i].dbp = NULL; + dblp->dbentry[i].deleted = 0; + } + dblp->dbentry_cnt = i; + } + + DB_ASSERT(env, dblp->dbentry[ndx].dbp == NULL); + dblp->dbentry[ndx].deleted = dbp == NULL; + dblp->dbentry[ndx].dbp = dbp; + +err: MUTEX_UNLOCK(env, dblp->mtx_dbreg); + return (ret); +} + +/* + * __dbreg_rem_dbentry + * Remove an entry from the DB entry table. + * + * PUBLIC: int __dbreg_rem_dbentry __P((DB_LOG *, int32_t)); + */ +int +__dbreg_rem_dbentry(dblp, ndx) + DB_LOG *dblp; + int32_t ndx; +{ + MUTEX_LOCK(dblp->env, dblp->mtx_dbreg); + if (dblp->dbentry_cnt > ndx) { + dblp->dbentry[ndx].dbp = NULL; + dblp->dbentry[ndx].deleted = 0; + } + MUTEX_UNLOCK(dblp->env, dblp->mtx_dbreg); + + return (0); +} + +/* + * __dbreg_log_files -- + * Put a DBREG_CHKPNT/CLOSE log record for each open database. + * + * PUBLIC: int __dbreg_log_files __P((ENV *, u_int32_t)); + */ +int +__dbreg_log_files(env, opcode) + ENV *env; + u_int32_t opcode; +{ + DBT *dbtp, fid_dbt, t; + DB_LOG *dblp; + DB_LSN r_unused; + FNAME *fnp; + LOG *lp; + int ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + ret = 0; + + MUTEX_LOCK(env, lp->mtx_filelist); + + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) { + /* This id was revoked by a switch in replication master. */ + if (fnp->id == DB_LOGFILEID_INVALID) + continue; + if (fnp->fname_off == INVALID_ROFF) + dbtp = NULL; + else { + memset(&t, 0, sizeof(t)); + t.data = R_ADDR(&dblp->reginfo, fnp->fname_off); + t.size = (u_int32_t)strlen(t.data) + 1; + dbtp = &t; + } + memset(&fid_dbt, 0, sizeof(fid_dbt)); + fid_dbt.data = fnp->ufid; + fid_dbt.size = DB_FILE_ID_LEN; + /* + * Output DBREG_CHKPNT records which will be processed during + * the OPENFILES pass of recovery. At the end of recovery we + * want to output the files that were open so a future recovery + * run will have the correct files open during a backward pass. + * For this we output DBREG_RCLOSE records so the files will be + * closed on the forward pass. + */ + if ((ret = __dbreg_register_log(env, NULL, &r_unused, + F_ISSET(fnp, DB_FNAME_DURABLE) ? 0 : DB_LOG_NOT_DURABLE, + opcode | F_ISSET(fnp, DB_FNAME_DBREG_MASK), + dbtp, &fid_dbt, fnp->id, fnp->s_type, fnp->meta_pgno, + TXN_INVALID)) != 0) + break; + } + + MUTEX_UNLOCK(env, lp->mtx_filelist); + + return (ret); +} + +/* + * __dbreg_log_nofiles -- + * + * PUBLIC: int __dbreg_log_nofiles __P((ENV *)); + */ +int +__dbreg_log_nofiles(env) + ENV *env; +{ + DB_LOG *dblp; + LOG *lp; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + return (SH_TAILQ_EMPTY(&lp->fq)); +} +/* + * __dbreg_close_files -- + * Remove the id's of open files and actually close those + * files that were opened by the recovery daemon. We sync the + * file, unless its mpf pointer has been NULLed by a db_remove or + * db_rename. We may not have flushed the log_register record that + * closes the file. + * + * PUBLIC: int __dbreg_close_files __P((ENV *, int)); + */ +int +__dbreg_close_files(env, do_restored) + ENV *env; + int do_restored; +{ + DB *dbp; + DB_LOG *dblp; + int ret, t_ret; + int32_t i; + + /* If we haven't initialized logging, we have nothing to do. */ + if (!LOGGING_ON(env)) + return (0); + + dblp = env->lg_handle; + ret = 0; + + MUTEX_LOCK(env, dblp->mtx_dbreg); + for (i = 0; i < dblp->dbentry_cnt; i++) { + /* + * We only want to close dbps that recovery opened. Any + * dbps that weren't opened by recovery but show up here + * are about to be unconditionally removed from the table. + * Before doing so, we need to revoke their log fileids + * so that we don't end up leaving around FNAME entries + * for dbps that shouldn't have them. + */ + if ((dbp = dblp->dbentry[i].dbp) != NULL) { + /* + * It's unsafe to call DB->close or revoke_id + * while holding the thread lock, because + * we'll call __dbreg_rem_dbentry and grab it again. + * + * Just drop it. Since dbreg ids go monotonically + * upward, concurrent opens should be safe, and the + * user should have no business closing files while + * we're in this loop anyway--we're in the process of + * making all outstanding dbps invalid. + */ + /* + * If we only want to close those FNAMES marked + * as restored, check now. + */ + if (do_restored && + !F_ISSET(dbp->log_filename, DB_FNAME_RESTORED)) + continue; + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + if (F_ISSET(dbp, DB_AM_RECOVER)) + t_ret = __db_close(dbp, + NULL, dbp->mpf == NULL ? DB_NOSYNC : 0); + else + t_ret = __dbreg_revoke_id( + dbp, 0, DB_LOGFILEID_INVALID); + if (ret == 0) + ret = t_ret; + MUTEX_LOCK(env, dblp->mtx_dbreg); + } + + dblp->dbentry[i].deleted = 0; + dblp->dbentry[i].dbp = NULL; + } + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + return (ret); +} + +/* + * __dbreg_close_file -- + * Close a database file opened by recovery. + * PUBLIC: int __dbreg_close_file __P((ENV *, FNAME *)); + */ +int +__dbreg_close_file(env, fnp) + ENV *env; + FNAME *fnp; +{ + DB *dbp; + DB_LOG *dblp; + + dblp = env->lg_handle; + + dbp = dblp->dbentry[fnp->id].dbp; + if (dbp == NULL) + return (0); + DB_ASSERT(env, dbp->log_filename == fnp); + DB_ASSERT(env, F_ISSET(dbp, DB_AM_RECOVER)); + return (__db_close(dbp, NULL, DB_NOSYNC)); +} + +/* + * __dbreg_mark_restored -- + * Mark files when we change replication roles and there are outstanding + * prepared txns that may use these files. These will be invalidated later + * when all outstanding prepared txns are resolved. + * + * PUBLIC: int __dbreg_mark_restored __P((ENV *)); + */ +int +__dbreg_mark_restored(env) + ENV *env; +{ + DB_LOG *dblp; + FNAME *fnp; + LOG *lp; + + /* If we haven't initialized logging, we have nothing to do. */ + if (!LOGGING_ON(env)) + return (0); + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + MUTEX_LOCK(env, lp->mtx_filelist); + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) + if (fnp->id != DB_LOGFILEID_INVALID) + F_SET(fnp, DB_FNAME_RESTORED); + + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (0); +} + +/* + * __dbreg_invalidate_files -- + * Invalidate files when we change replication roles. Save the + * id so that another process will be able to clean up the information + * when it notices. + * + * PUBLIC: int __dbreg_invalidate_files __P((ENV *, int)); + */ +int +__dbreg_invalidate_files(env, do_restored) + ENV *env; + int do_restored; +{ + DB_LOG *dblp; + FNAME *fnp; + LOG *lp; + int ret; + + /* If we haven't initialized logging, we have nothing to do. */ + if (!LOGGING_ON(env)) + return (0); + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + ret = 0; + MUTEX_LOCK(env, lp->mtx_filelist); + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) { + /* + * Normally, skip any file with DB_FNAME_RESTORED + * set. If do_restored is set, only invalidate + * those files with the flag set and skip all others. + */ + if (F_ISSET(fnp, DB_FNAME_RESTORED) && !do_restored) + continue; + if (!F_ISSET(fnp, DB_FNAME_RESTORED) && do_restored) + continue; + if (fnp->id != DB_LOGFILEID_INVALID) { + if ((ret = __dbreg_log_close(env, + fnp, NULL, DBREG_RCLOSE)) != 0) + goto err; + fnp->old_id = fnp->id; + fnp->id = DB_LOGFILEID_INVALID; + } + } +err: MUTEX_UNLOCK(env, lp->mtx_filelist); + return (ret); +} + +/* + * __dbreg_id_to_db -- + * Return the DB corresponding to the specified dbreg id. + * + * PUBLIC: int __dbreg_id_to_db __P((ENV *, DB_TXN *, DB **, int32_t, int)); + */ +int +__dbreg_id_to_db(env, txn, dbpp, ndx, tryopen) + ENV *env; + DB_TXN *txn; + DB **dbpp; + int32_t ndx; + int tryopen; +{ + DB_LOG *dblp; + FNAME *fname; + int ret; + char *name; + + dblp = env->lg_handle; + ret = 0; + + MUTEX_LOCK(env, dblp->mtx_dbreg); + + /* + * We take a final parameter that indicates whether we should attempt + * to open the file if no mapping is found. During recovery, the + * recovery routines all want to try to open the file (and this is + * called from __dbreg_id_to_db), however, if we have a multi-process + * environment where some processes may not have the files open, + * then we also get called from __dbreg_assign_id and it's OK if + * there is no mapping. + * + * Under failchk, a process different than the one issuing DB + * operations may abort a transaction. In this case, the "recovery" + * routines are run by a process that does not necessarily have the + * file open, so we we must open the file explicitly. + */ + if (ndx >= dblp->dbentry_cnt || + (!dblp->dbentry[ndx].deleted && dblp->dbentry[ndx].dbp == NULL)) { + if (!tryopen || F_ISSET(dblp, DBLOG_RECOVER)) { + ret = ENOENT; + goto err; + } + + /* + * __dbreg_id_to_fname acquires the mtx_filelist mutex, which + * we can't safely acquire while we hold the thread lock. We + * no longer need it anyway--the dbentry table didn't have what + * we needed. + */ + MUTEX_UNLOCK(env, dblp->mtx_dbreg); + + if (__dbreg_id_to_fname(dblp, ndx, 0, &fname) != 0) + /* + * With transactional opens, we may actually have + * closed this file in the transaction in which + * case this will fail too. Then it's up to the + * caller to reopen the file. + */ + return (ENOENT); + + /* + * Note that we're relying on fname not to change, even though + * we released the mutex that protects it (mtx_filelist) inside + * __dbreg_id_to_fname. This should be a safe assumption, the + * other process that has the file open shouldn't be closing it + * while we're trying to abort. + */ + name = fname->fname_off == INVALID_ROFF ? + NULL : R_ADDR(&dblp->reginfo, fname->fname_off); + + /* + * At this point, we are not holding the thread lock, so exit + * directly instead of going through the exit code at the + * bottom. If the __dbreg_do_open succeeded, then we don't need + * to do any of the remaining error checking at the end of this + * routine. + * If TXN_INVALID is passed then no txnlist is needed. + */ + if ((ret = __dbreg_do_open(env, txn, dblp, + fname->ufid, name, fname->s_type, ndx, fname->meta_pgno, + NULL, TXN_INVALID, F_ISSET(fname, DB_FNAME_INMEM) ? + DBREG_REOPEN : DBREG_OPEN)) != 0) + return (ret); + + *dbpp = dblp->dbentry[ndx].dbp; + return (*dbpp == NULL ? DB_DELETED : 0); + } + + /* + * Return DB_DELETED if the file has been deleted (it's not an error). + */ + if (dblp->dbentry[ndx].deleted) { + ret = DB_DELETED; + goto err; + } + + /* It's an error if we don't have a corresponding writable DB. */ + if ((*dbpp = dblp->dbentry[ndx].dbp) == NULL) + ret = ENOENT; + else + /* + * If we are in recovery, then set that the file has + * been written. It is possible to run recovery, + * find all the pages in their post update state + * in the OS buffer pool, put a checkpoint in the log + * and then crash the system without forcing the pages + * to disk. If this is an in-memory file, we may not have + * an mpf yet. + */ + if ((*dbpp)->mpf != NULL && (*dbpp)->mpf->mfp != NULL) + (*dbpp)->mpf->mfp->file_written = 1; + +err: MUTEX_UNLOCK(env, dblp->mtx_dbreg); + return (ret); +} + +/* + * __dbreg_id_to_fname -- + * Traverse the shared-memory region looking for the entry that + * matches the passed dbreg id. Returns 0 on success; -1 on error. + * + * PUBLIC: int __dbreg_id_to_fname __P((DB_LOG *, int32_t, int, FNAME **)); + */ +int +__dbreg_id_to_fname(dblp, id, have_lock, fnamep) + DB_LOG *dblp; + int32_t id; + int have_lock; + FNAME **fnamep; +{ + ENV *env; + FNAME *fnp; + LOG *lp; + int ret; + + env = dblp->env; + lp = dblp->reginfo.primary; + + ret = -1; + + if (!have_lock) + MUTEX_LOCK(env, lp->mtx_filelist); + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) + if (fnp->id == id) { + *fnamep = fnp; + ret = 0; + break; + } + if (!have_lock) + MUTEX_UNLOCK(env, lp->mtx_filelist); + + return (ret); +} +/* + * __dbreg_fid_to_fname -- + * Traverse the shared-memory region looking for the entry that + * matches the passed file unique id. Returns 0 on success; -1 on error. + * + * PUBLIC: int __dbreg_fid_to_fname __P((DB_LOG *, u_int8_t *, int, FNAME **)); + */ +int +__dbreg_fid_to_fname(dblp, fid, have_lock, fnamep) + DB_LOG *dblp; + u_int8_t *fid; + int have_lock; + FNAME **fnamep; +{ + ENV *env; + FNAME *fnp; + LOG *lp; + int ret; + + env = dblp->env; + lp = dblp->reginfo.primary; + + ret = -1; + + if (!have_lock) + MUTEX_LOCK(env, lp->mtx_filelist); + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) + if (memcmp(fnp->ufid, fid, DB_FILE_ID_LEN) == 0) { + *fnamep = fnp; + ret = 0; + break; + } + if (!have_lock) + MUTEX_UNLOCK(env, lp->mtx_filelist); + + return (ret); +} + +/* + * __dbreg_get_name + * + * Interface to get name of registered files. This is mainly diagnostic + * and the name passed could be transient unless there is something + * ensuring that the file cannot be closed. + * + * PUBLIC: int __dbreg_get_name __P((ENV *, u_int8_t *, char **, char **)); + */ +int +__dbreg_get_name(env, fid, fnamep, dnamep) + ENV *env; + u_int8_t *fid; + char **fnamep, **dnamep; +{ + DB_LOG *dblp; + FNAME *fnp; + + dblp = env->lg_handle; + + if (dblp != NULL && __dbreg_fid_to_fname(dblp, fid, 0, &fnp) == 0) { + *fnamep = fnp->fname_off == INVALID_ROFF ? + NULL : R_ADDR(&dblp->reginfo, fnp->fname_off); + *dnamep = fnp->dname_off == INVALID_ROFF ? + NULL : R_ADDR(&dblp->reginfo, fnp->dname_off); + return (0); + } + + *fnamep = *dnamep = NULL; + return (-1); +} + +/* + * __dbreg_do_open -- + * Open files referenced in the log. This is the part of the open that + * is not protected by the thread mutex. + * PUBLIC: int __dbreg_do_open __P((ENV *, + * PUBLIC: DB_TXN *, DB_LOG *, u_int8_t *, char *, DBTYPE, + * PUBLIC: int32_t, db_pgno_t, void *, u_int32_t, u_int32_t)); + */ +int +__dbreg_do_open(env, + txn, lp, uid, name, ftype, ndx, meta_pgno, info, id, opcode) + ENV *env; + DB_TXN *txn; + DB_LOG *lp; + u_int8_t *uid; + char *name; + DBTYPE ftype; + int32_t ndx; + db_pgno_t meta_pgno; + void *info; + u_int32_t id, opcode; +{ + DB *dbp; + u_int32_t cstat, ret_stat; + int ret, t_ret, try_inmem; + char *dname, *fname; + + cstat = TXN_EXPECTED; + fname = name; + dname = NULL; + try_inmem = 0; + +retry_inmem: + if ((ret = __db_create_internal(&dbp, lp->env, 0)) != 0) + return (ret); + + /* + * We can open files under a number of different scenarios. + * First, we can open a file during a normal txn_abort, if that file + * was opened and closed during the transaction (as is the master + * database of a sub-database). + * Second, we might be aborting a transaction in a process other than + * the one that did it (failchk). + * Third, we might be in recovery. + * In case 3, there is no locking, so there is no issue. + * In cases 1 and 2, we are guaranteed to already hold any locks + * that we need, since we're still in the same transaction, so by + * setting DB_AM_RECOVER, we guarantee that we don't log and that + * we don't try to acquire locks on behalf of a different locker id. + */ + F_SET(dbp, DB_AM_RECOVER); + if (meta_pgno != PGNO_BASE_MD) { + memcpy(dbp->fileid, uid, DB_FILE_ID_LEN); + dbp->meta_pgno = meta_pgno; + } + + if (opcode == DBREG_PREOPEN) { + dbp->type = ftype; + if ((ret = __dbreg_setup(dbp, name, NULL, id)) != 0) + goto err; + MAKE_INMEM(dbp); + goto skip_open; + } + + if (opcode == DBREG_REOPEN || try_inmem) { + MAKE_INMEM(dbp); + fname = NULL; + dname = name; + } + + if ((ret = __db_open(dbp, NULL, txn, fname, dname, ftype, + DB_DURABLE_UNKNOWN | DB_ODDFILESIZE, + DB_MODE_600, meta_pgno)) == 0) { +skip_open: + /* + * Verify that we are opening the same file that we were + * referring to when we wrote this log record. + */ + if ((meta_pgno != PGNO_BASE_MD && + __dbreg_check_master(env, uid, name) != 0) || + memcmp(uid, dbp->fileid, DB_FILE_ID_LEN) != 0) + cstat = TXN_UNEXPECTED; + else + cstat = TXN_EXPECTED; + + /* Assign the specific dbreg id to this dbp. */ + if ((ret = __dbreg_assign_id(dbp, ndx, 0)) != 0) + goto err; + + /* + * Record the newly-opened file in the transaction so it closed + * when the transaction ends. Decrement the reference count + * because there will be no explicit close for this handle and + * we want it to be closed when the transaction ends. + */ + if (txn != NULL && (ret = + __txn_record_fname(env, txn, dbp->log_filename)) != 0) + goto err; + --dbp->log_filename->txn_ref; + + /* + * If we successfully opened this file, then we need to + * convey that information to the txnlist so that we + * know how to handle the subtransaction that created + * the file system object. + */ + if (id != TXN_INVALID) + ret = __db_txnlist_update(env, + info, id, cstat, NULL, &ret_stat, 1); + +err: if (cstat == TXN_UNEXPECTED) + goto not_right; + return (ret); + } else if (ret == ENOENT) { + /* + * If the open failed with ENOENT, retry it as a named in-mem + * database. Some record types do not distinguish between a + * named in-memory database and one on-disk. Therefore, an + * internal init via replication that is trying to open and + * access this as a named in-mem database will not find it + * on-disk, and we need to try to open it in-memory too. + * But don't do this for [P]REOPEN, since we're already + * handling those cases specially, above. + */ + if (try_inmem == 0 && + opcode != DBREG_PREOPEN && opcode != DBREG_REOPEN) { + if ((ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0) + return (ret); + try_inmem = 1; + goto retry_inmem; + } else if (try_inmem != 0) + CLR_INMEM(dbp); + + /* + * If it exists neither on disk nor in memory + * record that the open failed in the txnlist. + */ + if (id != TXN_INVALID && (ret = __db_txnlist_update(env, + info, id, TXN_UNEXPECTED, NULL, &ret_stat, 1)) != 0) + goto not_right; + + /* + * If this is file is missing then we may have crashed + * without writing the corresponding close, record + * the open so recovery will write a close record + * with its checkpoint. If this is a backward pass then + * we are closing a non-existent file and need to mark + * it as deleted. + */ + if (dbp->log_filename == NULL && + (ret = __dbreg_setup(dbp, name, NULL, id)) != 0) + return (ret); + ret = __dbreg_assign_id(dbp, ndx, 1); + return (ret); + } +not_right: + if ((t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0) + return (ret == 0 ? t_ret : ret); + + /* Add this file as deleted. */ + if ((t_ret = __dbreg_add_dbentry(env, lp, NULL, ndx)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +static int +__dbreg_check_master(env, uid, name) + ENV *env; + u_int8_t *uid; + char *name; +{ + DB *dbp; + int ret; + + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + return (ret); + F_SET(dbp, DB_AM_RECOVER); + ret = __db_open(dbp, NULL, NULL, + name, NULL, DB_BTREE, 0, DB_MODE_600, PGNO_BASE_MD); + + if (ret == 0 && memcmp(uid, dbp->fileid, DB_FILE_ID_LEN) != 0) + ret = EINVAL; + + (void)__db_close(dbp, NULL, 0); + return (ret); +} + +/* + * __dbreg_lazy_id -- + * When a replication client gets upgraded to being a replication master, + * it may have database handles open that have not been assigned an ID, but + * which have become legal to use for logging. + * + * This function lazily allocates a new ID for such a function, in a + * new transaction created for the purpose. We need to do this in a new + * transaction because we definitely wish to commit the dbreg_register, but + * at this point we have no way of knowing whether the log record that incited + * us to call this will be part of a committed transaction. + * + * We first revoke any old id this handle may have had. That can happen + * if a master becomes a client and then becomes a master again and + * there are other processes with valid open handles to this env. + * + * PUBLIC: int __dbreg_lazy_id __P((DB *)); + */ +int +__dbreg_lazy_id(dbp) + DB *dbp; +{ + DB_LOG *dblp; + DB_TXN *txn; + ENV *env; + FNAME *fnp; + LOG *lp; + int32_t id; + int ret; + + env = dbp->env; + + DB_ASSERT(env, IS_REP_MASTER(env) || F_ISSET(dbp, DB_AM_NOT_DURABLE)); + + env = dbp->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + fnp = dbp->log_filename; + + /* The mtx_filelist protects the FNAME list and id management. */ + MUTEX_LOCK(env, lp->mtx_filelist); + if (fnp->id != DB_LOGFILEID_INVALID) { + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (0); + } + id = DB_LOGFILEID_INVALID; + /* + * When we became master we moved the fnp->id to old_id in + * every FNAME structure that was open. If our id was changed, + * we need to revoke and give back that id. + */ + if (fnp->old_id != DB_LOGFILEID_INVALID && + (ret = __dbreg_revoke_id(dbp, 1, DB_LOGFILEID_INVALID)) != 0) + goto err; + if ((ret = __txn_begin(env, NULL, NULL, &txn, DB_IGNORE_LEASE)) != 0) + goto err; + + if ((ret = __dbreg_get_id(dbp, txn, &id)) != 0) { + (void)__txn_abort(txn); + goto err; + } + + if ((ret = __txn_commit(txn, DB_TXN_NOSYNC)) != 0) + goto err; + + /* + * All DB related logging routines check the id value *without* + * holding the mtx_filelist to know whether we need to call + * dbreg_lazy_id to begin with. We must set the ID after a + * *successful* commit so that there is no possibility of a second + * modification call finding a valid ID in the dbp before the + * dbreg_register and commit records are in the log. + * If there was an error, then we call __dbreg_revoke_id to + * remove the entry from the lists. + */ + fnp->id = id; +err: + if (ret != 0 && id != DB_LOGFILEID_INVALID) + (void)__dbreg_revoke_id(dbp, 1, id); + MUTEX_UNLOCK(env, lp->mtx_filelist); + return (ret); +} diff --git a/src/env/env_alloc.c b/src/env/env_alloc.c new file mode 100644 index 00000000..4749e844 --- /dev/null +++ b/src/env/env_alloc.c @@ -0,0 +1,757 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * Implement shared memory region allocation. The initial list is a single + * memory "chunk" which is carved up as memory is requested. Chunks are + * coalesced when free'd. We maintain two types of linked-lists: a list of + * all chunks sorted by address, and a set of lists with free chunks sorted + * by size. + * + * The ALLOC_LAYOUT structure is the governing structure for the allocator. + * + * The ALLOC_ELEMENT structure is the structure that describes any single + * chunk of memory, and is immediately followed by the user's memory. + * + * The internal memory chunks are always aligned to a uintmax_t boundary so + * we don't drop core accessing the fields of the ALLOC_ELEMENT structure. + * + * The memory chunks returned to the user are aligned to a uintmax_t boundary. + * This is enforced by terminating the ALLOC_ELEMENT structure with a uintmax_t + * field as that immediately precedes the user's memory. Any caller needing + * more than uintmax_t alignment is responsible for doing alignment themselves. + */ + +typedef SH_TAILQ_HEAD(__sizeq) SIZEQ_HEAD; + +typedef struct __alloc_layout { + SH_TAILQ_HEAD(__addrq) addrq; /* Sorted by address */ + + /* + * A perfect Berkeley DB application does little allocation because + * most things are allocated on startup and never free'd. This is + * true even for the cache, because we don't free and re-allocate + * the memory associated with a cache buffer when swapping a page + * in memory for a page on disk -- unless the page is changing size. + * The latter problem is why we have multiple size queues. If the + * application's working set fits in cache, it's not a problem. If + * the application's working set doesn't fit in cache, but all of + * the databases have the same size pages, it's still not a problem. + * If the application's working set doesn't fit in cache, and its + * databases have different page sizes, we can end up walking a lot + * of 512B chunk allocations looking for an available 64KB chunk. + * + * So, we keep a set of queues, where we expect to find a chunk of + * roughly the right size at the front of the list. The first queue + * is chunks <= 1024, the second is <= 2048, and so on. With 11 + * queues, we have separate queues for chunks up to 1MB. + */ +#define DB_SIZE_Q_COUNT 11 + SIZEQ_HEAD sizeq[DB_SIZE_Q_COUNT]; /* Sorted by size */ +#ifdef HAVE_STATISTICS + u_int32_t pow2_size[DB_SIZE_Q_COUNT]; +#endif + +#ifdef HAVE_STATISTICS + u_int32_t success; /* Successful allocations */ + u_int32_t failure; /* Failed allocations */ + u_int32_t freed; /* Free calls */ + u_int32_t longest; /* Longest chain walked */ +#endif + uintmax_t unused; /* Guarantee alignment */ +} ALLOC_LAYOUT; + +typedef struct __alloc_element { + SH_TAILQ_ENTRY addrq; /* List by address */ + SH_TAILQ_ENTRY sizeq; /* List by size */ + + /* + * The "len" field is the total length of the chunk, not the size + * available to the caller. Use a uintmax_t to guarantee that the + * size of this struct will be aligned correctly. + */ + uintmax_t len; /* Chunk length */ + + /* + * The "ulen" field is the length returned to the caller. + * + * Set to 0 if the chunk is not currently in use. + */ + uintmax_t ulen; /* User's length */ +} ALLOC_ELEMENT; + +/* + * If the chunk can be split into two pieces, with the fragment holding at + * least 64 bytes of memory, we divide the chunk into two parts. + */ +#define SHALLOC_FRAGMENT (sizeof(ALLOC_ELEMENT) + 64) + +/* Macro to find the appropriate queue for a specific size chunk. */ +#undef SET_QUEUE_FOR_SIZE +#define SET_QUEUE_FOR_SIZE(head, q, i, len) do { \ + for (i = 0; i < DB_SIZE_Q_COUNT; ++i) { \ + q = &(head)->sizeq[i]; \ + if ((len) <= (u_int64_t)1024 << i) \ + break; \ + } \ +} while (0) + +static void __env_size_insert __P((ALLOC_LAYOUT *, ALLOC_ELEMENT *)); + +/* + * __env_alloc_init -- + * Initialize the area as one large chunk. + * + * PUBLIC: void __env_alloc_init __P((REGINFO *, size_t)); + */ +void +__env_alloc_init(infop, size) + REGINFO *infop; + size_t size; +{ + ALLOC_ELEMENT *elp; + ALLOC_LAYOUT *head; + ENV *env; + u_int i; + + env = infop->env; + + /* No initialization needed for heap memory regions. */ + if (F_ISSET(env, ENV_PRIVATE)) + return; + + /* + * The first chunk of memory is the ALLOC_LAYOUT structure. + */ + head = infop->head; + memset(head, 0, sizeof(*head)); + SH_TAILQ_INIT(&head->addrq); + for (i = 0; i < DB_SIZE_Q_COUNT; ++i) + SH_TAILQ_INIT(&head->sizeq[i]); + COMPQUIET(head->unused, 0); + + /* + * The rest of the memory is the first available chunk. + */ + elp = (ALLOC_ELEMENT *)((u_int8_t *)head + sizeof(ALLOC_LAYOUT)); + elp->len = size - sizeof(ALLOC_LAYOUT); + elp->ulen = 0; + + SH_TAILQ_INSERT_HEAD(&head->addrq, elp, addrq, __alloc_element); + SH_TAILQ_INSERT_HEAD( + &head->sizeq[DB_SIZE_Q_COUNT - 1], elp, sizeq, __alloc_element); +} + +/* + * The length, the ALLOC_ELEMENT structure and an optional guard byte, + * rounded up to standard alignment. + */ +#ifdef DIAGNOSTIC +#define DB_ALLOC_SIZE(len) \ + (size_t)DB_ALIGN((len) + sizeof(ALLOC_ELEMENT) + 1, sizeof(uintmax_t)) +#else +#define DB_ALLOC_SIZE(len) \ + (size_t)DB_ALIGN((len) + sizeof(ALLOC_ELEMENT), sizeof(uintmax_t)) +#endif + +/* + * __env_alloc_overhead -- + * Return the overhead needed for an allocation. + * + * PUBLIC: size_t __env_alloc_overhead __P((void)); + */ +size_t +__env_alloc_overhead() +{ + return (sizeof(ALLOC_ELEMENT)); +} + +/* + * __env_alloc_size -- + * Return the space needed for an allocation, including alignment. + * + * PUBLIC: size_t __env_alloc_size __P((size_t)); + */ +size_t +__env_alloc_size(len) + size_t len; +{ + return (DB_ALLOC_SIZE(len)); +} + +/* + * __env_alloc -- + * Allocate space from the shared region. + * + * PUBLIC: int __env_alloc __P((REGINFO *, size_t, void *)); + */ +int +__env_alloc(infop, len, retp) + REGINFO *infop; + size_t len; + void *retp; +{ + SIZEQ_HEAD *q; + ALLOC_ELEMENT *elp, *frag, *elp_tmp; + ALLOC_LAYOUT *head; + ENV *env; + REGION_MEM *mem; + REGINFO *envinfop; + size_t total_len; + u_int8_t *p; + u_int i; + int ret; +#ifdef HAVE_STATISTICS + u_int32_t st_search; +#endif + env = infop->env; + *(void **)retp = NULL; +#ifdef HAVE_MUTEX_SUPPORT + MUTEX_REQUIRED(env, infop->mtx_alloc); +#endif + + PERFMON3(env, mpool, env_alloc, len, infop->id, infop->type); + /* + * In a heap-backed environment, we call malloc for additional space. + * (Malloc must return memory correctly aligned for our use.) + * + * In a heap-backed environment, memory is laid out as follows: + * + * { uintmax_t total-length } { user-memory } { guard-byte } + */ + if (F_ISSET(env, ENV_PRIVATE)) { + /* + * If we are shared then we must track the allocation + * in the main environment region. + */ + if (F_ISSET(infop, REGION_SHARED)) + envinfop = env->reginfo; + else + envinfop = infop; + /* + * We need an additional uintmax_t to hold the length (and + * keep the buffer aligned on 32-bit systems). + */ + len += sizeof(uintmax_t); + if (F_ISSET(infop, REGION_TRACKED)) + len += sizeof(REGION_MEM); + +#ifdef DIAGNOSTIC + /* Plus one byte for the guard byte. */ + ++len; +#endif + /* Check if we're over the limit. */ + if (envinfop->max_alloc != 0 && + envinfop->allocated + len > envinfop->max_alloc) + return (ENOMEM); + + /* Allocate the space. */ + if ((ret = __os_malloc(env, len, &p)) != 0) + return (ret); + infop->allocated += len; + if (infop != envinfop) + envinfop->allocated += len; + + *(uintmax_t *)p = len; +#ifdef DIAGNOSTIC + p[len - 1] = GUARD_BYTE; +#endif + if (F_ISSET(infop, REGION_TRACKED)) { + mem = (REGION_MEM *)(p + sizeof(uintmax_t)); + mem->next = infop->mem; + infop->mem = mem; + p += sizeof(mem); + } + *(void **)retp = p + sizeof(uintmax_t); + return (0); + } + + head = infop->head; + total_len = DB_ALLOC_SIZE(len); + + /* Find the first size queue that could satisfy the request. */ + COMPQUIET(q, NULL); +#ifdef HAVE_MMAP_EXTEND +retry: +#endif + SET_QUEUE_FOR_SIZE(head, q, i, total_len); + +#ifdef HAVE_STATISTICS + if (i >= DB_SIZE_Q_COUNT) + i = DB_SIZE_Q_COUNT - 1; + ++head->pow2_size[i]; /* Note the size of the request. */ +#endif + + /* + * Search this queue, and, if necessary, queues larger than this queue, + * looking for a chunk we can use. + */ + STAT(st_search = 0); + for (elp = NULL;; ++q) { + SH_TAILQ_FOREACH(elp_tmp, q, sizeq, __alloc_element) { + STAT(++st_search); + + /* + * Chunks are sorted from largest to smallest -- if + * this chunk is less than what we need, no chunk + * further down the list will be large enough. + */ + if (elp_tmp->len < total_len) + break; + + /* + * This chunk will do... maybe there's a better one, + * but this one will do. + */ + elp = elp_tmp; + + /* + * We might have many chunks of the same size. Stop + * looking if we won't fragment memory by picking the + * current one. + */ + if (elp_tmp->len - total_len <= SHALLOC_FRAGMENT) + break; + } + if (elp != NULL || ++i >= DB_SIZE_Q_COUNT) + break; + } + +#ifdef HAVE_STATISTICS + if (head->longest < st_search) { + head->longest = st_search; + STAT_PERFMON3(env, + mpool, longest_search, len, infop->id, st_search); + } +#endif + + /* + * If we don't find an element of the right size, try to extend + * the region, if not then we are done. + */ + if (elp == NULL) { + ret = ENOMEM; +#ifdef HAVE_MMAP_EXTEND + if (infop->rp->size < infop->rp->max && + (ret = __env_region_extend(env, infop)) == 0) + goto retry; +#endif + STAT_INC_VERB(env, mpool, fail, head->failure, len, infop->id); + return (ret); + } + STAT_INC_VERB(env, mpool, alloc, head->success, len, infop->id); + + /* Pull the chunk off of the size queue. */ + SH_TAILQ_REMOVE(q, elp, sizeq, __alloc_element); + + if (elp->len - total_len > SHALLOC_FRAGMENT) { + frag = (ALLOC_ELEMENT *)((u_int8_t *)elp + total_len); + frag->len = elp->len - total_len; + frag->ulen = 0; + + elp->len = total_len; + + /* The fragment follows the chunk on the address queue. */ + SH_TAILQ_INSERT_AFTER( + &head->addrq, elp, frag, addrq, __alloc_element); + + /* Insert the frag into the correct size queue. */ + __env_size_insert(head, frag); + } + + p = (u_int8_t *)elp + sizeof(ALLOC_ELEMENT); + elp->ulen = len; +#ifdef DIAGNOSTIC + p[len] = GUARD_BYTE; +#endif + *(void **)retp = p; + + return (0); +} + +/* + * __env_alloc_free -- + * Free space into the shared region. + * + * PUBLIC: void __env_alloc_free __P((REGINFO *, void *)); + */ +void +__env_alloc_free(infop, ptr) + REGINFO *infop; + void *ptr; +{ + ALLOC_ELEMENT *elp, *elp_tmp; + ALLOC_LAYOUT *head; + ENV *env; + SIZEQ_HEAD *q; + size_t len; + u_int8_t i, *p; + + env = infop->env; + + /* In a private region, we call free. */ + if (F_ISSET(env, ENV_PRIVATE)) { + /* Find the start of the memory chunk and its length. */ + p = (u_int8_t *)((uintmax_t *)ptr - 1); + len = (size_t)*(uintmax_t *)p; + + infop->allocated -= len; + if (F_ISSET(infop, REGION_SHARED)) + env->reginfo->allocated -= len; + +#ifdef DIAGNOSTIC + /* Check the guard byte. */ + DB_ASSERT(env, p[len - 1] == GUARD_BYTE); + + /* Trash the memory chunk. */ + memset(p, CLEAR_BYTE, len); +#endif + __os_free(env, p); + return; + } + +#ifdef HAVE_MUTEX_SUPPORT + MUTEX_REQUIRED(env, infop->mtx_alloc); +#endif + + head = infop->head; + + p = ptr; + elp = (ALLOC_ELEMENT *)(p - sizeof(ALLOC_ELEMENT)); + + STAT_INC_VERB(env, mpool, free, head->freed, elp->ulen, infop->id); + +#ifdef DIAGNOSTIC + /* Check the guard byte. */ + DB_ASSERT(env, p[elp->ulen] == GUARD_BYTE); + + /* Trash the memory chunk. */ + memset(p, CLEAR_BYTE, (size_t)elp->len - sizeof(ALLOC_ELEMENT)); +#endif + + /* Mark the memory as no longer in use. */ + elp->ulen = 0; + + /* + * Try and merge this chunk with chunks on either side of it. Two + * chunks can be merged if they're contiguous and not in use. + */ + if ((elp_tmp = + SH_TAILQ_PREV(&head->addrq, elp, addrq, __alloc_element)) != NULL && + elp_tmp->ulen == 0 && + (u_int8_t *)elp_tmp + elp_tmp->len == (u_int8_t *)elp) { + /* + * If we're merging the entry into a previous entry, remove the + * current entry from the addr queue and the previous entry from + * its size queue, and merge. + */ + SH_TAILQ_REMOVE(&head->addrq, elp, addrq, __alloc_element); + SET_QUEUE_FOR_SIZE(head, q, i, elp_tmp->len); + SH_TAILQ_REMOVE(q, elp_tmp, sizeq, __alloc_element); + + elp_tmp->len += elp->len; + elp = elp_tmp; + } + if ((elp_tmp = SH_TAILQ_NEXT(elp, addrq, __alloc_element)) != NULL && + elp_tmp->ulen == 0 && + (u_int8_t *)elp + elp->len == (u_int8_t *)elp_tmp) { + /* + * If we're merging the current entry into a subsequent entry, + * remove the subsequent entry from the addr and size queues + * and merge. + */ + SH_TAILQ_REMOVE(&head->addrq, elp_tmp, addrq, __alloc_element); + SET_QUEUE_FOR_SIZE(head, q, i, elp_tmp->len); + SH_TAILQ_REMOVE(q, elp_tmp, sizeq, __alloc_element); + + elp->len += elp_tmp->len; + } + + /* Insert in the correct place in the size queues. */ + __env_size_insert(head, elp); +} + +/* + * __env_alloc_extend -- + * Extend a previously allocated chunk at the end of a region. + * + * PUBLIC: int __env_alloc_extend __P((REGINFO *, void *, size_t *)); + */ +int +__env_alloc_extend(infop, ptr, lenp) + REGINFO *infop; + void *ptr; + size_t *lenp; +{ + ALLOC_ELEMENT *elp, *elp_tmp; + ALLOC_LAYOUT *head; + ENV *env; + SIZEQ_HEAD *q; + size_t len, tlen; + u_int8_t i, *p; + int ret; + + env = infop->env; + + DB_ASSERT(env, !F_ISSET(env, ENV_PRIVATE)); + +#ifdef HAVE_MUTEX_SUPPORT + MUTEX_REQUIRED(env, infop->mtx_alloc); +#endif + + head = infop->head; + + p = ptr; + len = *lenp; + elp = (ALLOC_ELEMENT *)(p - sizeof(ALLOC_ELEMENT)); +#ifdef DIAGNOSTIC + /* Check the guard byte. */ + DB_ASSERT(env, p[elp->ulen] == GUARD_BYTE); +#endif + + /* See if there is anything left in the region. */ +again: if ((elp_tmp = SH_TAILQ_NEXT(elp, addrq, __alloc_element)) != NULL && + elp_tmp->ulen == 0 && + (u_int8_t *)elp + elp->len == (u_int8_t *)elp_tmp) { + /* + * If we're merging the current entry into a subsequent entry, + * remove the subsequent entry from the addr and size queues + * and merge. + */ + SH_TAILQ_REMOVE(&head->addrq, elp_tmp, addrq, __alloc_element); + SET_QUEUE_FOR_SIZE(head, q, i, elp_tmp->len); + SH_TAILQ_REMOVE(q, elp_tmp, sizeq, __alloc_element); + if (elp_tmp->len < len + SHALLOC_FRAGMENT) { + elp->len += elp_tmp->len; + if (elp_tmp->len < len) + len -= (size_t)elp_tmp->len; + else + len = 0; + } else { + tlen = (size_t)elp_tmp->len; + elp_tmp = (ALLOC_ELEMENT *) ((u_int8_t *)elp_tmp + len); + elp_tmp->len = tlen - len; + elp_tmp->ulen = 0; + elp->len += len; + len = 0; + + /* The fragment follows the on the address queue. */ + SH_TAILQ_INSERT_AFTER( + &head->addrq, elp, elp_tmp, addrq, __alloc_element); + + /* Insert the frag into the correct size queue. */ + __env_size_insert(head, elp_tmp); + } + } else if (elp_tmp != NULL) { + __db_errx(env, DB_STR("1583", "block not at end of region")); + return (__env_panic(env, EINVAL)); + } + if (len == 0) + goto done; + + if ((ret = __env_region_extend(env, infop)) != 0) { + if (ret != ENOMEM) + return (ret); + goto done; + } + goto again; + +done: elp->ulen = elp->len - sizeof(ALLOC_ELEMENT); +#ifdef DIAGNOSTIC + elp->ulen -= sizeof(uintmax_t); + /* There was room for the guarrd byte in the chunk that came in. */ + p[elp->ulen] = GUARD_BYTE; +#endif + *lenp -= len; + infop->allocated += *lenp; + if (F_ISSET(infop, REGION_SHARED)) + env->reginfo->allocated += *lenp; + return (0); +} + +/* + * __env_size_insert -- + * Insert into the correct place in the size queues. + */ +static void +__env_size_insert(head, elp) + ALLOC_LAYOUT *head; + ALLOC_ELEMENT *elp; +{ + SIZEQ_HEAD *q; + ALLOC_ELEMENT *elp_tmp; + u_int i; + + /* Find the appropriate queue for the chunk. */ + SET_QUEUE_FOR_SIZE(head, q, i, elp->len); + + /* Find the correct slot in the size queue. */ + SH_TAILQ_FOREACH(elp_tmp, q, sizeq, __alloc_element) + if (elp->len >= elp_tmp->len) + break; + if (elp_tmp == NULL) + SH_TAILQ_INSERT_TAIL(q, elp, sizeq); + else + SH_TAILQ_INSERT_BEFORE(q, elp_tmp, elp, sizeq, __alloc_element); +} + +/* + * __env_region_extend -- + * Extend a region. + * + * PUBLIC: int __env_region_extend __P((ENV *, REGINFO *)); + */ +int +__env_region_extend(env, infop) + ENV *env; + REGINFO *infop; +{ + ALLOC_ELEMENT *elp; + REGION *rp; + int ret; + + DB_ASSERT(env, !F_ISSET(env, ENV_PRIVATE)); + + ret = 0; + rp = infop->rp; + if (rp->size >= rp->max) + return (ENOMEM); + elp = (ALLOC_ELEMENT *)((u_int8_t *)infop->addr + rp->size); + if (rp->size + rp->alloc > rp->max) + rp->alloc = rp->max - rp->size; + rp->size += rp->alloc; + rp->size = (size_t)ALIGNP_INC(rp->size, sizeof(size_t)); + if (infop->fhp && + (ret = __db_file_extend(env, infop->fhp, rp->size)) != 0) + return (ret); + elp->len = rp->alloc; + elp->ulen = 0; +#ifdef DIAGNOSTIC + *(u_int8_t *)(elp+1) = GUARD_BYTE; +#endif + + SH_TAILQ_INSERT_TAIL(&((ALLOC_LAYOUT *)infop->head)->addrq, elp, addrq); + __env_alloc_free(infop, elp + 1); + if (rp->alloc < MEGABYTE) + rp->alloc += rp->size; + if (rp->alloc > MEGABYTE) + rp->alloc = MEGABYTE; + return (ret); +} + +/* + * __env_elem_size -- + * Return the size of an allocated element. + * PUBLIC: uintmax_t __env_elem_size __P((ENV *, void *)); + */ +uintmax_t +__env_elem_size(env, p) + ENV *env; + void *p; +{ + ALLOC_ELEMENT *elp; + uintmax_t size; + + if (F_ISSET(env, ENV_PRIVATE)) { + size = *((uintmax_t *)p - 1); + size -= sizeof(uintmax_t); + } else { + elp = (ALLOC_ELEMENT *)((u_int8_t *)p - sizeof(ALLOC_ELEMENT)); + size = elp->ulen; + } + return (size); +} + +/* + * __env_get_chunk -- + * Return the next chunk allocated in a private region. + * PUBLIC: void * __env_get_chunk __P((REGINFO *, void **, uintmax_t *)); + */ +void * +__env_get_chunk(infop, nextp, sizep) + REGINFO *infop; + void **nextp; + uintmax_t *sizep; +{ + REGION_MEM *mem; + + if (infop->mem == NULL) + return (NULL); + if (*nextp == NULL) + *nextp = infop->mem; + mem = *(REGION_MEM **)nextp; + *nextp = mem->next; + + *sizep = __env_elem_size(infop->env, mem); + *sizep -= sizeof(*mem); + + return ((void *)(mem + 1)); +} + +#ifdef HAVE_STATISTICS +/* + * __env_alloc_print -- + * Display the lists of memory chunks. + * + * PUBLIC: void __env_alloc_print __P((REGINFO *, u_int32_t)); + */ +void +__env_alloc_print(infop, flags) + REGINFO *infop; + u_int32_t flags; +{ + ALLOC_ELEMENT *elp; + ALLOC_LAYOUT *head; + ENV *env; + u_int i; + + env = infop->env; + head = infop->head; + + if (F_ISSET(env, ENV_PRIVATE)) + return; + + __db_msg(env, + "Region allocations: %lu allocations, %lu failures, %lu frees, %lu longest", + (u_long)head->success, (u_long)head->failure, (u_long)head->freed, + (u_long)head->longest); + + if (!LF_ISSET(DB_STAT_ALL)) + return; + + __db_msg(env, "%s", "Allocations by power-of-two sizes:"); + for (i = 0; i < DB_SIZE_Q_COUNT; ++i) + __db_msg(env, "%3dKB\t%lu", + (1024 << i) / 1024, (u_long)head->pow2_size[i]); + + if (!LF_ISSET(DB_STAT_ALLOC)) + return; + /* + * We don't normally display the list of address/chunk pairs, a few + * thousand lines of output is too voluminous for even DB_STAT_ALL. + */ + __db_msg(env, + "Allocation list by address, offset: {chunk length, user length}"); + SH_TAILQ_FOREACH(elp, &head->addrq, addrq, __alloc_element) + __db_msg(env, "\t%#lx, %lu {%lu, %lu}", + P_TO_ULONG(elp), (u_long)R_OFFSET(infop, elp), + (u_long)elp->len, (u_long)elp->ulen); + + __db_msg(env, "Allocation free list by size: KB {chunk length}"); + for (i = 0; i < DB_SIZE_Q_COUNT; ++i) { + __db_msg(env, "%3dKB", (1024 << i) / 1024); + SH_TAILQ_FOREACH(elp, &head->sizeq[i], sizeq, __alloc_element) + __db_msg(env, + "\t%#lx {%lu}", P_TO_ULONG(elp), (u_long)elp->len); + } +} +#endif diff --git a/src/env/env_config.c b/src/env/env_config.c new file mode 100644 index 00000000..f0f4f60b --- /dev/null +++ b/src/env/env_config.c @@ -0,0 +1,744 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __config_parse __P((ENV *, char *, int)); + +/* + * __env_read_db_config -- + * Read the DB_CONFIG file. + * + * PUBLIC: int __env_read_db_config __P((ENV *)); + */ +int +__env_read_db_config(env) + ENV *env; +{ + FILE *fp; + int lc, ret; + char *p, buf[256]; + + /* Parse the config file. */ + p = NULL; + if ((ret = __db_appname(env, + DB_APP_NONE, "DB_CONFIG", NULL, &p)) != 0) + return (ret); + if (p == NULL) + fp = NULL; + else { + fp = fopen(p, "r"); + __os_free(env, p); + } + + if (fp == NULL) + return (0); + + for (lc = 1; fgets(buf, sizeof(buf), fp) != NULL; ++lc) { + if ((p = strchr(buf, '\n')) == NULL) + p = buf + strlen(buf); + if (p > buf && p[-1] == '\r') + --p; + *p = '\0'; + for (p = buf; *p != '\0' && isspace((int)*p); ++p) + ; + if (*p == '\0' || *p == '#') + continue; + + if ((ret = __config_parse(env, p, lc)) != 0) + break; + } + (void)fclose(fp); + + return (ret); +} + +#undef CONFIG_GET_INT +#define CONFIG_GET_INT(s, vp) do { \ + int __ret; \ + if ((__ret = \ + __db_getlong(env->dbenv, NULL, s, 0, INT_MAX, vp)) != 0) \ + return (__ret); \ +} while (0) +#undef CONFIG_GET_LONG +#define CONFIG_GET_LONG(s, vp) do { \ + int __ret; \ + if ((__ret = \ + __db_getlong(env->dbenv, NULL, s, 0, LONG_MAX, vp)) != 0) \ + return (__ret); \ +} while (0) +#undef CONFIG_GET_UINT +#define CONFIG_GET_UINT(s, vp) do { \ + int __ret; \ + if ((__ret = \ + __db_getulong(env->dbenv, NULL, s, 0, UINT_MAX, vp)) != 0) \ + return (__ret); \ +} while (0) +#undef CONFIG_INT +#define CONFIG_INT(s, f) do { \ + if (strcasecmp(s, argv[0]) == 0) { \ + long __v; \ + if (nf != 2) \ + goto format; \ + CONFIG_GET_INT(argv[1], &__v); \ + return (f(env->dbenv, (int)__v)); \ + } \ +} while (0) +#undef CONFIG_GET_UINT32 +#define CONFIG_GET_UINT32(s, vp) do { \ + if (__db_getulong(env->dbenv, NULL, s, 0, UINT32_MAX, vp) != 0) \ + return (EINVAL); \ +} while (0) +#undef CONFIG_UINT32 +#define CONFIG_UINT32(s, f) do { \ + if (strcasecmp(s, argv[0]) == 0) { \ + u_long __v; \ + if (nf != 2) \ + goto format; \ + CONFIG_GET_UINT32(argv[1], &__v); \ + return (f(env->dbenv, (u_int32_t)__v)); \ + } \ +} while (0) + +#undef CONFIG_SLOTS +#define CONFIG_SLOTS 10 + +/* + * __config_parse -- + * Parse a single NAME VALUE pair. + */ +static int +__config_parse(env, s, lc) + ENV *env; + char *s; + int lc; +{ + DB_ENV *dbenv; + DB_SITE *site; + u_long uv1, uv2; + u_int32_t flags; + long lv1, lv2; + u_int port; + int i, nf, onoff, bad, ret, t_ret; + char *argv[CONFIG_SLOTS]; + DB_MEM_CONFIG mem_conf; + + bad = 0; + dbenv = env->dbenv; + /* Split the line by white-space. */ + if ((nf = __config_split(s, argv)) < 2) { +format: __db_errx(env, DB_STR_A("1584", + "line %d: %s: incorrect name-value pair", "%d %s"), + lc, argv[0]); + return (EINVAL); + } + + if (strcasecmp(argv[0], "set_memory_max") == 0) { + if (nf != 3) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + CONFIG_GET_UINT32(argv[2], &uv2); + return (__env_set_memory_max( + dbenv, (u_int32_t)uv1, (u_int32_t)uv2)); + } + if (strcasecmp(argv[0], "set_memory_init") == 0) { + if (nf != 3) + goto format; + if (strcasecmp(argv[1], "DB_MEM_LOCK") == 0) + mem_conf = DB_MEM_LOCK; + else if (strcasecmp(argv[1], "DB_MEM_LOCKER") == 0) + mem_conf = DB_MEM_LOCKER; + else if (strcasecmp(argv[1], "DB_MEM_LOCKOBJECT") == 0) + mem_conf = DB_MEM_LOCKOBJECT; + else if (strcasecmp(argv[1], "DB_MEM_TRANSACTION") == 0) + mem_conf = DB_MEM_TRANSACTION; + else if (strcasecmp(argv[1], "DB_MEM_THREAD") == 0) + mem_conf = DB_MEM_THREAD; + else if (strcasecmp(argv[1], "DB_MEM_LOGID") == 0) + mem_conf = DB_MEM_LOGID; + else + goto format; + CONFIG_GET_UINT32(argv[2], &uv2); + return (__env_set_memory_init(dbenv, mem_conf, (u_int32_t)uv2)); + } + + CONFIG_UINT32("mutex_set_align", __mutex_set_align); + CONFIG_UINT32("mutex_set_increment", __mutex_set_increment); + CONFIG_UINT32("mutex_set_init", __mutex_set_init); + CONFIG_UINT32("mutex_set_max", __mutex_set_max); + CONFIG_UINT32("mutex_set_tas_spins", __mutex_set_tas_spins); + + if (strcasecmp(argv[0], "rep_set_clockskew") == 0) { + if (nf != 3) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + CONFIG_GET_UINT32(argv[2], &uv2); + return (__rep_set_clockskew( + dbenv, (u_int32_t)uv1, (u_int32_t)uv2)); + } + + if (strcasecmp(argv[0], "rep_set_config") == 0) { + if (nf != 2 && nf != 3) + goto format; + onoff = 1; + if (nf == 3) { + if (strcasecmp(argv[2], "off") == 0) + onoff = 0; + else if (strcasecmp(argv[2], "on") != 0) + goto format; + } + if (strcasecmp(argv[1], "db_rep_conf_autoinit") == 0) + return (__rep_set_config(dbenv, + DB_REP_CONF_AUTOINIT, onoff)); + if (strcasecmp(argv[1], "db_rep_conf_autorollback") == 0) + return (__rep_set_config(dbenv, + DB_REP_CONF_AUTOROLLBACK, onoff)); + if (strcasecmp(argv[1], "db_rep_conf_bulk") == 0) + return (__rep_set_config(dbenv, + DB_REP_CONF_BULK, onoff)); + if (strcasecmp(argv[1], "db_rep_conf_delayclient") == 0) + return (__rep_set_config(dbenv, + DB_REP_CONF_DELAYCLIENT, onoff)); + if (strcasecmp(argv[1], "db_rep_conf_inmem") == 0) + return (__rep_set_config(dbenv, + DB_REP_CONF_INMEM, onoff)); + if (strcasecmp(argv[1], "db_rep_conf_lease") == 0) + return (__rep_set_config(dbenv, + DB_REP_CONF_LEASE, onoff)); + if (strcasecmp(argv[1], "db_rep_conf_nowait") == 0) + return (__rep_set_config(dbenv, + DB_REP_CONF_NOWAIT, onoff)); + if (strcasecmp(argv[1], "db_repmgr_conf_2site_strict") == 0) + return (__rep_set_config(dbenv, + DB_REPMGR_CONF_2SITE_STRICT, onoff)); + if (strcasecmp(argv[1], "db_repmgr_conf_elections") == 0) + return (__rep_set_config(dbenv, + DB_REPMGR_CONF_ELECTIONS, onoff)); + goto format; + } + + if (strcasecmp(argv[0], "rep_set_limit") == 0) { + if (nf != 3) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + CONFIG_GET_UINT32(argv[2], &uv2); + return (__rep_set_limit( + dbenv, (u_int32_t)uv1, (u_int32_t)uv2)); + } + + if (strcasecmp(argv[0], "rep_set_nsites") == 0) { + if (nf != 2) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + return (__rep_set_nsites_pp( + dbenv, (u_int32_t)uv1)); + } + + if (strcasecmp(argv[0], "rep_set_priority") == 0) { + if (nf != 2) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + return (__rep_set_priority( + dbenv, (u_int32_t)uv1)); + } + + if (strcasecmp(argv[0], "rep_set_request") == 0) { + if (nf != 3) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + CONFIG_GET_UINT32(argv[2], &uv2); + return (__rep_set_request( + dbenv, (u_int32_t)uv1, (u_int32_t)uv2)); + } + + if (strcasecmp(argv[0], "rep_set_timeout") == 0) { + if (nf != 3) + goto format; + CONFIG_GET_UINT32(argv[2], &uv2); + if (strcasecmp(argv[1], "db_rep_ack_timeout") == 0) + return (__rep_set_timeout( + dbenv, DB_REP_ACK_TIMEOUT, (u_int32_t)uv2)); + if (strcasecmp(argv[1], "db_rep_checkpoint_delay") == 0) + return (__rep_set_timeout( + dbenv, DB_REP_CHECKPOINT_DELAY, (u_int32_t)uv2)); + if (strcasecmp(argv[1], "db_rep_connection_retry") == 0) + return (__rep_set_timeout( + dbenv, DB_REP_CONNECTION_RETRY, (u_int32_t)uv2)); + if (strcasecmp(argv[1], "db_rep_election_timeout") == 0) + return (__rep_set_timeout( + dbenv, DB_REP_ELECTION_TIMEOUT, (u_int32_t)uv2)); + if (strcasecmp(argv[1], "db_rep_election_retry") == 0) + return (__rep_set_timeout( + dbenv, DB_REP_ELECTION_RETRY, (u_int32_t)uv2)); + if (strcasecmp(argv[1], "db_rep_full_election_timeout") == 0) + return (__rep_set_timeout(dbenv, + DB_REP_FULL_ELECTION_TIMEOUT, (u_int32_t)uv2)); + if (strcasecmp(argv[1], "db_rep_heartbeat_monitor") == 0) + return (__rep_set_timeout( + dbenv, DB_REP_HEARTBEAT_MONITOR, (u_int32_t)uv2)); + if (strcasecmp(argv[1], "db_rep_heartbeat_send") == 0) + return (__rep_set_timeout( + dbenv, DB_REP_HEARTBEAT_SEND, (u_int32_t)uv2)); + if (strcasecmp(argv[1], "db_rep_lease_timeout") == 0) + return (__rep_set_timeout( + dbenv, DB_REP_LEASE_TIMEOUT, (u_int32_t)uv2)); + goto format; + } + + if (strcasecmp(argv[0], "repmgr_set_ack_policy") == 0) { + if (nf != 2) + goto format; + if (strcasecmp(argv[1], "db_repmgr_acks_all") == 0) + return (__repmgr_set_ack_policy( + dbenv, DB_REPMGR_ACKS_ALL)); + if (strcasecmp(argv[1], "db_repmgr_acks_all_available") == 0) + return (__repmgr_set_ack_policy( + dbenv, DB_REPMGR_ACKS_ALL_AVAILABLE)); + if (strcasecmp(argv[1], "db_repmgr_acks_all_peers") == 0) + return (__repmgr_set_ack_policy( + dbenv, DB_REPMGR_ACKS_ALL_PEERS)); + if (strcasecmp(argv[1], "db_repmgr_acks_none") == 0) + return (__repmgr_set_ack_policy( + dbenv, DB_REPMGR_ACKS_NONE)); + if (strcasecmp(argv[1], "db_repmgr_acks_one") == 0) + return (__repmgr_set_ack_policy( + dbenv, DB_REPMGR_ACKS_ONE)); + if (strcasecmp(argv[1], "db_repmgr_acks_one_peer") == 0) + return (__repmgr_set_ack_policy( + dbenv, DB_REPMGR_ACKS_ONE_PEER)); + if (strcasecmp(argv[1], "db_repmgr_acks_quorum") == 0) + return (__repmgr_set_ack_policy( + dbenv, DB_REPMGR_ACKS_QUORUM)); + goto format; + } + + /* + * Configure name/value pairs of config information for a site (local or + * remote). + * + * repmgr_site host port [which value] ... + */ + if (strcasecmp(argv[0], "repmgr_site") == 0) { + if (nf < 3 || (nf % 2) == 0) + goto format; + CONFIG_GET_UINT(argv[2], &uv2); + port = (u_int)uv2; + + if ((ret = __repmgr_site(dbenv, argv[1], port, &site, 0)) != 0) + return (ret); +#ifdef HAVE_REPLICATION_THREADS + for (i = 3; i < nf; i += 2) { + if (strcasecmp(argv[i], "db_bootstrap_helper") == 0) + uv1 = DB_BOOTSTRAP_HELPER; + else if (strcasecmp(argv[i], "db_group_creator") == 0) + uv1 = DB_GROUP_CREATOR; + else if (strcasecmp(argv[i], "db_legacy") == 0) + uv1 = DB_LEGACY; + else if (strcasecmp(argv[i], "db_local_site") == 0) + uv1 = DB_LOCAL_SITE; + else if (strcasecmp(argv[i], "db_repmgr_peer") == 0) + uv1 = DB_REPMGR_PEER; + else { + bad = 1; + break; + } + + if (strcasecmp(argv[i + 1], "on") == 0) + uv2 = 1; + else if (strcasecmp(argv[i + 1], "off") == 0) + uv2 = 0; + else + CONFIG_GET_UINT32(argv[i + 1], &uv2); + if ((ret = __repmgr_site_config(site, + (u_int32_t)uv1, (u_int32_t)uv2)) != 0) + break; + } + if ((t_ret = __repmgr_site_close(site)) != 0 && ret == 0) + ret = t_ret; + if (bad) + goto format; +#else + /* If repmgr not built, __repmgr_site() returns DB_OPNOTSUP. */ + COMPQUIET(i, 0); + COMPQUIET(t_ret, 0); + DB_ASSERT(env, 0); +#endif + return (ret); + } + + if (strcasecmp(argv[0], "set_cachesize") == 0) { + if (nf != 4) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + CONFIG_GET_UINT32(argv[2], &uv2); + CONFIG_GET_INT(argv[3], &lv1); + return (__memp_set_cachesize( + dbenv, (u_int32_t)uv1, (u_int32_t)uv2, (int)lv1)); + } + + if (strcasecmp(argv[0], "set_cache_max") == 0) { + if (nf != 3) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + CONFIG_GET_UINT32(argv[2], &uv2); + return (__memp_set_cache_max( + dbenv, (u_int32_t)uv1, (u_int32_t)uv2)); + } + + if (strcasecmp(argv[0], "set_data_dir") == 0 || + strcasecmp(argv[0], "db_data_dir") == 0) { /* Compatibility. */ + if (nf != 2) + goto format; + return (__env_set_data_dir(dbenv, argv[1])); + } + + if (strcasecmp(argv[0], "add_data_dir") == 0) { + if (nf != 2) + goto format; + return (__env_add_data_dir(dbenv, argv[1])); + } + if (strcasecmp(argv[0], "set_create_dir") == 0) { + if (nf != 2) + goto format; + return (__env_set_create_dir(dbenv, argv[1])); + } + + /* Compatibility */ + if (strcasecmp(argv[0], "set_intermediate_dir") == 0) { + if (nf != 2) + goto format; + CONFIG_GET_INT(argv[1], &lv1); + if (lv1 <= 0) + goto format; + env->dir_mode = (int)lv1; + return (0); + } + if (strcasecmp(argv[0], "set_intermediate_dir_mode") == 0) { + if (nf != 2) + goto format; + return (__env_set_intermediate_dir_mode(dbenv, argv[1])); + } + + if (strcasecmp(argv[0], "set_flags") == 0) { + if (nf != 2 && nf != 3) + goto format; + onoff = 1; + if (nf == 3) { + if (strcasecmp(argv[2], "off") == 0) + onoff = 0; + else if (strcasecmp(argv[2], "on") != 0) + goto format; + } + if (strcasecmp(argv[1], "db_auto_commit") == 0) + return (__env_set_flags(dbenv, DB_AUTO_COMMIT, onoff)); + if (strcasecmp(argv[1], "db_cdb_alldb") == 0) + return (__env_set_flags(dbenv, DB_CDB_ALLDB, onoff)); + if (strcasecmp(argv[1], "db_direct_db") == 0) + return (__env_set_flags(dbenv, DB_DIRECT_DB, onoff)); + if (strcasecmp(argv[1], "db_dsync_db") == 0) + return (__env_set_flags(dbenv, DB_DSYNC_DB, onoff)); + if (strcasecmp(argv[1], "db_multiversion") == 0) + return (__env_set_flags(dbenv, DB_MULTIVERSION, onoff)); + if (strcasecmp(argv[1], "db_nolocking") == 0) + return (__env_set_flags(dbenv, DB_NOLOCKING, onoff)); + if (strcasecmp(argv[1], "db_nommap") == 0) + return (__env_set_flags(dbenv, DB_NOMMAP, onoff)); + if (strcasecmp(argv[1], "db_nopanic") == 0) + return (__env_set_flags(dbenv, DB_NOPANIC, onoff)); + if (strcasecmp(argv[1], "db_overwrite") == 0) + return (__env_set_flags(dbenv, DB_OVERWRITE, onoff)); + if (strcasecmp(argv[1], "db_region_init") == 0) + return (__env_set_flags(dbenv, DB_REGION_INIT, onoff)); + if (strcasecmp(argv[1], "db_time_notgranted") == 0) + return ( + __env_set_flags(dbenv, DB_TIME_NOTGRANTED, onoff)); + if (strcasecmp(argv[1], "db_txn_nosync") == 0) + return (__env_set_flags(dbenv, DB_TXN_NOSYNC, onoff)); + if (strcasecmp(argv[1], "db_txn_nowait") == 0) + return (__env_set_flags(dbenv, DB_TXN_NOWAIT, onoff)); + if (strcasecmp(argv[1], "db_txn_snapshot") == 0) + return (__env_set_flags(dbenv, DB_TXN_SNAPSHOT, onoff)); + if (strcasecmp(argv[1], "db_txn_write_nosync") == 0) + return ( + __env_set_flags(dbenv, DB_TXN_WRITE_NOSYNC, onoff)); + if (strcasecmp(argv[1], "db_yieldcpu") == 0) + return (__env_set_flags(dbenv, DB_YIELDCPU, onoff)); + if (strcasecmp(argv[1], "db_log_inmemory") == 0) + return ( + __log_set_config(dbenv, DB_LOG_IN_MEMORY, onoff)); + if (strcasecmp(argv[1], "db_direct_log") == 0) + return (__log_set_config(dbenv, DB_LOG_DIRECT, onoff)); + if (strcasecmp(argv[1], "db_dsync_log") == 0) + return (__log_set_config(dbenv, DB_LOG_DSYNC, onoff)); + if (strcasecmp(argv[1], "db_log_autoremove") == 0) + return ( + __log_set_config(dbenv, DB_LOG_AUTO_REMOVE, onoff)); + goto format; + } + if (strcasecmp(argv[0], "log_set_config") == 0) { + if (nf != 2 && nf != 3) + goto format; + onoff = 1; + if (nf == 3) { + if (strcasecmp(argv[2], "off") == 0) + onoff = 0; + else if (strcasecmp(argv[2], "on") != 0) + goto format; + } + if (strcasecmp(argv[1], "db_log_auto_remove") == 0) + return ( + __log_set_config(dbenv, DB_LOG_AUTO_REMOVE, onoff)); + if (strcasecmp(argv[1], "db_log_direct") == 0) + return (__log_set_config(dbenv, DB_LOG_DIRECT, onoff)); + if (strcasecmp(argv[1], "db_log_dsync") == 0) + return (__log_set_config(dbenv, DB_LOG_DSYNC, onoff)); + if (strcasecmp(argv[1], "db_log_in_memory") == 0) + return ( + __log_set_config(dbenv, DB_LOG_IN_MEMORY, onoff)); + if (strcasecmp(argv[1], "db_log_zero") == 0) + return (__log_set_config(dbenv, DB_LOG_ZERO, onoff)); + goto format; + } + + CONFIG_UINT32("set_data_len", __env_set_data_len); + CONFIG_UINT32("set_lg_bsize", __log_set_lg_bsize); + CONFIG_INT("set_lg_filemode", __log_set_lg_filemode); + CONFIG_UINT32("set_lg_max", __log_set_lg_max); + CONFIG_UINT32("set_lg_regionmax", __log_set_lg_regionmax); + + if (strcasecmp(argv[0], "set_lg_dir") == 0 || + strcasecmp(argv[0], "db_log_dir") == 0) { /* Compatibility. */ + if (nf != 2) + goto format; + return (__log_set_lg_dir(dbenv, argv[1])); + } + + if (strcasecmp(argv[0], "set_lk_detect") == 0) { + if (nf != 2) + goto format; + if (strcasecmp(argv[1], "db_lock_default") == 0) + flags = DB_LOCK_DEFAULT; + else if (strcasecmp(argv[1], "db_lock_expire") == 0) + flags = DB_LOCK_EXPIRE; + else if (strcasecmp(argv[1], "db_lock_maxlocks") == 0) + flags = DB_LOCK_MAXLOCKS; + else if (strcasecmp(argv[1], "db_lock_maxwrite") == 0) + flags = DB_LOCK_MAXWRITE; + else if (strcasecmp(argv[1], "db_lock_minlocks") == 0) + flags = DB_LOCK_MINLOCKS; + else if (strcasecmp(argv[1], "db_lock_minwrite") == 0) + flags = DB_LOCK_MINWRITE; + else if (strcasecmp(argv[1], "db_lock_oldest") == 0) + flags = DB_LOCK_OLDEST; + else if (strcasecmp(argv[1], "db_lock_random") == 0) + flags = DB_LOCK_RANDOM; + else if (strcasecmp(argv[1], "db_lock_youngest") == 0) + flags = DB_LOCK_YOUNGEST; + else + goto format; + return (__lock_set_lk_detect(dbenv, flags)); + } + + CONFIG_UINT32("set_lk_max_locks", __lock_set_lk_max_locks); + CONFIG_UINT32("set_lk_max_lockers", __lock_set_lk_max_lockers); + CONFIG_UINT32("set_lk_max_objects", __lock_set_lk_max_objects); + CONFIG_UINT32("set_lk_partitions", __lock_set_lk_partitions); + CONFIG_UINT32("set_lk_tablesize", __lock_set_lk_tablesize); + + if (strcasecmp(argv[0], "set_lock_timeout") == 0) { + if (nf != 2) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + return (__lock_set_env_timeout( + dbenv, (u_int32_t)uv1, DB_SET_LOCK_TIMEOUT)); + } + + CONFIG_INT("set_mp_max_openfd", __memp_set_mp_max_openfd); + CONFIG_UINT32("set_mp_mtxcount", __memp_set_mp_mtxcount); + CONFIG_UINT32("set_mp_pagesize", __memp_set_mp_pagesize); + + if (strcasecmp(argv[0], "set_mp_max_write") == 0) { + if (nf != 3) + goto format; + CONFIG_GET_INT(argv[1], &lv1); + CONFIG_GET_INT(argv[2], &lv2); + return (__memp_set_mp_max_write( + dbenv, (int)lv1, (db_timeout_t)lv2)); + } + + CONFIG_UINT32("set_mp_mmapsize", __memp_set_mp_mmapsize); + + if (strcasecmp(argv[0], "set_open_flags") == 0) { + if (nf != 2 && nf != 3) + goto format; + onoff = 1; + if (nf == 3) { + if (strcasecmp(argv[2], "off") == 0) + onoff = 0; + else if (strcasecmp(argv[2], "on") != 0) + goto format; + } + if (strcasecmp(argv[1], "db_init_rep") == 0) { + if (onoff == 1) + FLD_SET(env->open_flags, DB_INIT_REP); + else + FLD_CLR(env->open_flags, DB_INIT_REP); + return (0); + } else if (strcasecmp(argv[1], "db_private") == 0) { + if (onoff == 1) + FLD_SET(env->open_flags, DB_PRIVATE); + else + FLD_CLR(env->open_flags, DB_PRIVATE); + return (0); + } else if (strcasecmp(argv[1], "db_register") == 0) { + if (onoff == 1) + FLD_SET(env->open_flags, DB_REGISTER); + else + FLD_CLR(env->open_flags, DB_REGISTER); + return (0); + } else if (strcasecmp(argv[1], "db_thread") == 0) { + if (onoff == 1) + FLD_SET(env->open_flags, DB_THREAD); + else + FLD_CLR(env->open_flags, DB_THREAD); + return (0); + } else + goto format; + } + + if (strcasecmp(argv[0], "set_region_init") == 0) { + if (nf != 2) + goto format; + CONFIG_GET_INT(argv[1], &lv1); + if (lv1 != 0 && lv1 != 1) + goto format; + return (__env_set_flags( + dbenv, DB_REGION_INIT, lv1 == 0 ? 0 : 1)); + } + + if (strcasecmp(argv[0], "set_reg_timeout") == 0) { + if (nf != 2) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + return (__env_set_timeout( + dbenv, (u_int32_t)uv1, DB_SET_REG_TIMEOUT)); + } + + if (strcasecmp(argv[0], "set_shm_key") == 0) { + if (nf != 2) + goto format; + CONFIG_GET_LONG(argv[1], &lv1); + return (__env_set_shm_key(dbenv, lv1)); + } + + /* + * The set_tas_spins method has been replaced by mutex_set_tas_spins. + * The set_tas_spins argv[0] remains for DB_CONFIG compatibility. + */ + CONFIG_UINT32("set_tas_spins", __mutex_set_tas_spins); + + if (strcasecmp(argv[0], "set_tmp_dir") == 0 || + strcasecmp(argv[0], "db_tmp_dir") == 0) { /* Compatibility.*/ + if (nf != 2) + goto format; + return (__env_set_tmp_dir(dbenv, argv[1])); + } + + CONFIG_UINT32("set_thread_count", __env_set_thread_count); + CONFIG_UINT32("set_tx_max", __txn_set_tx_max); + + if (strcasecmp(argv[0], "set_txn_timeout") == 0) { + if (nf != 2) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + return (__lock_set_env_timeout( + dbenv, (u_int32_t)uv1, DB_SET_TXN_TIMEOUT)); + } + + if (strcasecmp(argv[0], "set_verbose") == 0) { + if (nf != 2 && nf != 3) + goto format; + onoff = 1; + if (nf == 3) { + if (strcasecmp(argv[2], "off") == 0) + onoff = 0; + else if (strcasecmp(argv[2], "on") != 0) + goto format; + } + if (strcasecmp(argv[1], "db_verb_deadlock") == 0) + flags = DB_VERB_DEADLOCK; + else if (strcasecmp(argv[1], "db_verb_fileops") == 0) + flags = DB_VERB_FILEOPS; + else if (strcasecmp(argv[1], "db_verb_fileops_all") == 0) + flags = DB_VERB_FILEOPS_ALL; + else if (strcasecmp(argv[1], "db_verb_recovery") == 0) + flags = DB_VERB_RECOVERY; + else if (strcasecmp(argv[1], "db_verb_register") == 0) + flags = DB_VERB_REGISTER; + else if (strcasecmp(argv[1], "db_verb_replication") == 0) + flags = DB_VERB_REPLICATION; + else if (strcasecmp(argv[1], "db_verb_rep_elect") == 0) + flags = DB_VERB_REP_ELECT; + else if (strcasecmp(argv[1], "db_verb_rep_lease") == 0) + flags = DB_VERB_REP_LEASE; + else if (strcasecmp(argv[1], "db_verb_rep_misc") == 0) + flags = DB_VERB_REP_MISC; + else if (strcasecmp(argv[1], "db_verb_rep_msgs") == 0) + flags = DB_VERB_REP_MSGS; + else if (strcasecmp(argv[1], "db_verb_rep_sync") == 0) + flags = DB_VERB_REP_SYNC; + else if (strcasecmp(argv[1], "db_verb_rep_system") == 0) + flags = DB_VERB_REP_SYSTEM; + else if (strcasecmp(argv[1], "db_verb_rep_test") == 0) + flags = DB_VERB_REP_TEST; + else if (strcasecmp(argv[1], "db_verb_repmgr_connfail") == 0) + flags = DB_VERB_REPMGR_CONNFAIL; + else if (strcasecmp(argv[1], "db_verb_repmgr_misc") == 0) + flags = DB_VERB_REPMGR_MISC; + else if (strcasecmp(argv[1], "db_verb_waitsfor") == 0) + flags = DB_VERB_WAITSFOR; + else + goto format; + return (__env_set_verbose(dbenv, flags, onoff)); + } + + __db_errx(env, DB_STR_A("1585", + "unrecognized name-value pair: %s", "%s"), s); + return (EINVAL); +} + +/* + * __config_split -- + * Split lines into white-space separated fields, returning the count of + * fields. + * + * PUBLIC: int __config_split __P((char *, char *[])); + */ +int +__config_split(input, argv) + char *input, *argv[CONFIG_SLOTS]; +{ + int count; + char **ap; + + for (count = 0, ap = argv; (*ap = strsep(&input, " \t\n")) != NULL;) + if (**ap != '\0') { + ++count; + if (++ap == &argv[CONFIG_SLOTS - 1]) { + *ap = NULL; + break; + } + } + return (count); +} diff --git a/src/env/env_failchk.c b/src/env/env_failchk.c new file mode 100644 index 00000000..7f965c1d --- /dev/null +++ b/src/env/env_failchk.c @@ -0,0 +1,551 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#ifndef HAVE_SIMPLE_THREAD_TYPE +#include "dbinc/db_page.h" +#include "dbinc/hash.h" /* Needed for call to __ham_func5. */ +#endif +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __env_in_api __P((ENV *)); +static void __env_clear_state __P((ENV *)); + +/* + * __env_failchk_pp -- + * ENV->failchk pre/post processing. + * + * PUBLIC: int __env_failchk_pp __P((DB_ENV *, u_int32_t)); + */ +int +__env_failchk_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->failchk"); + + /* + * ENV->failchk requires self and is-alive functions. We + * have a default self function, but no is-alive function. + */ + if (!ALIVE_ON(env)) { + __db_errx(env, DB_STR("1503", + "DB_ENV->failchk requires DB_ENV->is_alive be configured")); + return (EINVAL); + } + + if (flags != 0) + return (__db_ferr(env, "DB_ENV->failchk", 0)); + + ENV_ENTER(env, ip); + FAILCHK_THREAD(env, ip); /* mark as failchk thread */ + ret = __env_failchk_int(dbenv); + ENV_LEAVE(env, ip); + return (ret); +} +/* + * __env_failchk_int -- + * Process the subsystem failchk routines + * + * PUBLIC: int __env_failchk_int __P((DB_ENV *)); + */ +int +__env_failchk_int(dbenv) + DB_ENV *dbenv; +{ + ENV *env; + int ret; + + env = dbenv->env; + F_SET(dbenv, DB_ENV_FAILCHK); + + /* + * We check for dead threads in the API first as this would be likely + * to hang other things we try later, like locks and transactions. + */ + if ((ret = __env_in_api(env)) != 0) + goto err; + + if (LOCKING_ON(env) && (ret = __lock_failchk(env)) != 0) + goto err; + + if (TXN_ON(env) && + ((ret = __txn_failchk(env)) != 0 || + (ret = __dbreg_failchk(env)) != 0)) + goto err; + +#ifdef HAVE_REPLICATION_THREADS + if (REP_ON(env) && (ret = __repmgr_failchk(env)) != 0) + goto err; +#endif + + /* Mark any dead blocked threads as dead. */ + __env_clear_state(env); + +#ifdef HAVE_MUTEX_SUPPORT + ret = __mut_failchk(env); +#endif + +err: F_CLR(dbenv, DB_ENV_FAILCHK); + return (ret); +} + +/* + * __env_thread_size -- + * Initial amount of memory for thread info blocks. + * PUBLIC: size_t __env_thread_size __P((ENV *, size_t)); + */ +size_t +__env_thread_size(env, other_alloc) + ENV *env; + size_t other_alloc; +{ + DB_ENV *dbenv; + size_t size; + u_int32_t max; + + dbenv = env->dbenv; + size = 0; + + max = dbenv->thr_max; + if (dbenv->thr_init != 0) { + size = + dbenv->thr_init * __env_alloc_size(sizeof(DB_THREAD_INFO)); + if (max < dbenv->thr_init) + max = dbenv->thr_init; + } else if (max == 0 && ALIVE_ON(env)) { + if ((max = dbenv->tx_init) == 0) { + /* + * They want thread tracking, but don't say how much. + * Arbitrarily assume 1/10 of the remaining memory + * or at least 100. We just use this to size + * the hash table. + */ + if (dbenv->memory_max != 0) + max = (u_int32_t) + (((dbenv->memory_max - other_alloc) / 10) / + sizeof(DB_THREAD_INFO)); + if (max < 100) + max = 100; + } + } + /* + * Set the number of buckets to be 1/8th the number of + * thread control blocks. This is rather arbitrary. + */ + dbenv->thr_max = max; + if (max != 0) + size += __env_alloc_size(sizeof(DB_HASHTAB) * + __db_tablesize(max / 8)); + return (size); +} + +/* + * __env_thread_max -- + * Return the amount of extra memory to hold thread information. + * PUBLIC: size_t __env_thread_max __P((ENV *)); + */ +size_t +__env_thread_max(env) + ENV *env; +{ + DB_ENV *dbenv; + size_t size; + + dbenv = env->dbenv; + + /* + * Allocate space for thread info blocks. Max is only advisory, + * so we allocate 25% more. + */ + if (dbenv->thr_max > dbenv->thr_init) { + size = dbenv->thr_max - dbenv->thr_init; + size += size / 4; + } else { + dbenv->thr_max = dbenv->thr_init; + size = dbenv->thr_init / 4; + } + + size = size * __env_alloc_size(sizeof(DB_THREAD_INFO)); + return (size); +} + +/* + * __env_thread_init -- + * Initialize the thread control block table. + * + * PUBLIC: int __env_thread_init __P((ENV *, int)); + */ +int +__env_thread_init(env, during_creation) + ENV *env; + int during_creation; +{ + DB_ENV *dbenv; + DB_HASHTAB *htab; + REGENV *renv; + REGINFO *infop; + THREAD_INFO *thread; + int ret; + + dbenv = env->dbenv; + infop = env->reginfo; + renv = infop->primary; + + if (renv->thread_off == INVALID_ROFF) { + if (dbenv->thr_max == 0) { + env->thr_hashtab = NULL; + if (ALIVE_ON(env)) { + __db_errx(env, DB_STR("1504", + "is_alive method specified but no thread region allocated")); + return (EINVAL); + } + return (0); + } + + if (!during_creation) { + __db_errx(env, DB_STR("1505", +"thread table must be allocated when the database environment is created")); + return (EINVAL); + } + + if ((ret = + __env_alloc(infop, sizeof(THREAD_INFO), &thread)) != 0) { + __db_err(env, ret, DB_STR("1506", + "unable to allocate a thread status block")); + return (ret); + } + memset(thread, 0, sizeof(*thread)); + renv->thread_off = R_OFFSET(infop, thread); + thread->thr_nbucket = __db_tablesize(dbenv->thr_max / 8); + if ((ret = __env_alloc(infop, + thread->thr_nbucket * sizeof(DB_HASHTAB), &htab)) != 0) + return (ret); + thread->thr_hashoff = R_OFFSET(infop, htab); + __db_hashinit(htab, thread->thr_nbucket); + thread->thr_max = dbenv->thr_max; + thread->thr_init = dbenv->thr_init; + } else { + thread = R_ADDR(infop, renv->thread_off); + htab = R_ADDR(infop, thread->thr_hashoff); + } + + env->thr_hashtab = htab; + env->thr_nbucket = thread->thr_nbucket; + dbenv->thr_max = thread->thr_max; + dbenv->thr_init = thread->thr_init; + return (0); +} + +/* + * __env_thread_destroy -- + * Destroy the thread control block table. + * + * PUBLIC: void __env_thread_destroy __P((ENV *)); + */ +void +__env_thread_destroy(env) + ENV *env; +{ + DB_HASHTAB *htab; + DB_THREAD_INFO *ip, *np; + REGENV *renv; + REGINFO *infop; + THREAD_INFO *thread; + u_int32_t i; + + infop = env->reginfo; + renv = infop->primary; + if (renv->thread_off == INVALID_ROFF) + return; + + thread = R_ADDR(infop, renv->thread_off); + if ((htab = env->thr_hashtab) != NULL) { + for (i = 0; i < env->thr_nbucket; i++) { + ip = SH_TAILQ_FIRST(&htab[i], __db_thread_info); + for (; ip != NULL; ip = np) { + np = SH_TAILQ_NEXT(ip, + dbth_links, __db_thread_info); + __env_alloc_free(infop, ip); + } + } + __env_alloc_free(infop, htab); + } + + __env_alloc_free(infop, thread); + return; +} + +/* + * __env_in_api -- + * Look for threads which died in the api and complain. + * If no threads died but there are blocked threads unpin + * any buffers they may have locked. + */ +static int +__env_in_api(env) + ENV *env; +{ + DB_ENV *dbenv; + DB_HASHTAB *htab; + DB_THREAD_INFO *ip; + REGENV *renv; + REGINFO *infop; + THREAD_INFO *thread; + u_int32_t i; + int unpin, ret; + + if ((htab = env->thr_hashtab) == NULL) + return (EINVAL); + + dbenv = env->dbenv; + infop = env->reginfo; + renv = infop->primary; + thread = R_ADDR(infop, renv->thread_off); + unpin = 0; + + for (i = 0; i < env->thr_nbucket; i++) + SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) { + if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE || + (ip->dbth_state == THREAD_OUT && + thread->thr_count < thread->thr_max)) + continue; + if (dbenv->is_alive( + dbenv, ip->dbth_pid, ip->dbth_tid, 0)) + continue; + if (ip->dbth_state == THREAD_BLOCKED) { + ip->dbth_state = THREAD_BLOCKED_DEAD; + unpin = 1; + continue; + } + if (ip->dbth_state == THREAD_OUT) { + ip->dbth_state = THREAD_SLOT_NOT_IN_USE; + continue; + } + return (__db_failed(env, DB_STR("1507", + "Thread died in Berkeley DB library"), + ip->dbth_pid, ip->dbth_tid)); + } + + if (unpin == 0) + return (0); + + for (i = 0; i < env->thr_nbucket; i++) + SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) + if (ip->dbth_state == THREAD_BLOCKED_DEAD && + (ret = __memp_unpin_buffers(env, ip)) != 0) + return (ret); + + return (0); +} + +/* + * __env_clear_state -- + * Look for threads which died while blockedi and clear them.. + */ +static void +__env_clear_state(env) + ENV *env; +{ + DB_HASHTAB *htab; + DB_THREAD_INFO *ip; + u_int32_t i; + + htab = env->thr_hashtab; + for (i = 0; i < env->thr_nbucket; i++) + SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) + if (ip->dbth_state == THREAD_BLOCKED_DEAD) + ip->dbth_state = THREAD_SLOT_NOT_IN_USE; +} + +struct __db_threadid { + pid_t pid; + db_threadid_t tid; +}; + +/* + * PUBLIC: int __env_set_state __P((ENV *, DB_THREAD_INFO **, DB_THREAD_STATE)); + */ +int +__env_set_state(env, ipp, state) + ENV *env; + DB_THREAD_INFO **ipp; + DB_THREAD_STATE state; +{ + struct __db_threadid id; + DB_ENV *dbenv; + DB_HASHTAB *htab; + DB_THREAD_INFO *ip; + REGENV *renv; + REGINFO *infop; + THREAD_INFO *thread; + u_int32_t indx; + int ret; + + dbenv = env->dbenv; + htab = env->thr_hashtab; + + if (F_ISSET(dbenv, DB_ENV_NOLOCKING)) { + *ipp = NULL; + return (0); + } + dbenv->thread_id(dbenv, &id.pid, &id.tid); + + /* + * Hashing of thread ids. This is simple but could be replaced with + * something more expensive if needed. + */ +#ifdef HAVE_SIMPLE_THREAD_TYPE + /* + * A thread ID may be a pointer, so explicitly cast to a pointer of + * the appropriate size before doing the bitwise XOR. + */ + indx = (u_int32_t)((uintptr_t)id.pid ^ (uintptr_t)id.tid); +#else + indx = __ham_func5(NULL, &id.tid, sizeof(id.tid)); +#endif + indx %= env->thr_nbucket; + SH_TAILQ_FOREACH(ip, &htab[indx], dbth_links, __db_thread_info) { +#ifdef HAVE_SIMPLE_THREAD_TYPE + if (id.pid == ip->dbth_pid && id.tid == ip->dbth_tid) + break; +#else + if (memcmp(&id.pid, &ip->dbth_pid, sizeof(id.pid)) != 0) + continue; + if (memcmp(&id.tid, &ip->dbth_tid, sizeof(id.tid)) != 0) + continue; + break; +#endif + } + + /* + * If ipp is not null, return the thread control block if found. + * Check to ensure the thread of control has been registered. + */ + if (state == THREAD_VERIFY) { + DB_ASSERT(env, ip != NULL && ip->dbth_state != THREAD_OUT); + if (ipp != NULL) { + if (ip == NULL) /* The control block wasnt found */ + return (EINVAL); + *ipp = ip; + } + return (0); + } + + *ipp = NULL; + ret = 0; + if (ip == NULL) { + infop = env->reginfo; + renv = infop->primary; + thread = R_ADDR(infop, renv->thread_off); + MUTEX_LOCK(env, renv->mtx_regenv); + + /* + * If we are passed the specified max, try to reclaim one from + * our queue. If failcheck has marked the slot not in use, we + * can take it, otherwise we must call is_alive before freeing + * it. + */ + if (thread->thr_count >= thread->thr_max) { + SH_TAILQ_FOREACH( + ip, &htab[indx], dbth_links, __db_thread_info) + if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE || + (ip->dbth_state == THREAD_OUT && + ALIVE_ON(env) && !dbenv->is_alive( + dbenv, ip->dbth_pid, ip->dbth_tid, 0))) + break; + + if (ip != NULL) { + DB_ASSERT(env, ip->dbth_pincount == 0); + goto init; + } + } + + thread->thr_count++; + if ((ret = __env_alloc(infop, + sizeof(DB_THREAD_INFO), &ip)) == 0) { + memset(ip, 0, sizeof(*ip)); + /* + * This assumes we can link atomically since we do + * no locking here. We never use the backpointer + * so we only need to be able to write an offset + * atomically. + */ + SH_TAILQ_INSERT_HEAD( + &htab[indx], ip, dbth_links, __db_thread_info); + ip->dbth_pincount = 0; + ip->dbth_pinmax = PINMAX; + ip->dbth_pinlist = R_OFFSET(infop, ip->dbth_pinarray); + +init: ip->dbth_pid = id.pid; + ip->dbth_tid = id.tid; + ip->dbth_state = state; + SH_TAILQ_INIT(&ip->dbth_xatxn); + } + MUTEX_UNLOCK(env, renv->mtx_regenv); + } else + ip->dbth_state = state; + *ipp = ip; + + DB_ASSERT(env, ret == 0); + if (ret != 0) + __db_errx(env, DB_STR("1508", + "Unable to allocate thread control block")); + return (ret); +} + +/* + * __env_thread_id_string -- + * Convert a thread id to a string. + * + * PUBLIC: char *__env_thread_id_string + * PUBLIC: __P((DB_ENV *, pid_t, db_threadid_t, char *)); + */ +char * +__env_thread_id_string(dbenv, pid, tid, buf) + DB_ENV *dbenv; + pid_t pid; + db_threadid_t tid; + char *buf; +{ +#ifdef HAVE_SIMPLE_THREAD_TYPE +#ifdef UINT64_FMT + char fmt[20]; + + snprintf(fmt, sizeof(fmt), "%s/%s", UINT64_FMT, UINT64_FMT); + snprintf(buf, + DB_THREADID_STRLEN, fmt, (u_int64_t)pid, (u_int64_t)(uintptr_t)tid); +#else + snprintf(buf, DB_THREADID_STRLEN, "%lu/%lu", (u_long)pid, (u_long)tid); +#endif +#else +#ifdef UINT64_FMT + char fmt[20]; + + snprintf(fmt, sizeof(fmt), "%s/TID", UINT64_FMT); + snprintf(buf, DB_THREADID_STRLEN, fmt, (u_int64_t)pid); +#else + snprintf(buf, DB_THREADID_STRLEN, "%lu/TID", (u_long)pid); +#endif +#endif + COMPQUIET(dbenv, NULL); + COMPQUIET(*(u_int8_t *)&tid, 0); + + return (buf); +} diff --git a/src/env/env_file.c b/src/env/env_file.c new file mode 100644 index 00000000..cda15cd0 --- /dev/null +++ b/src/env/env_file.c @@ -0,0 +1,128 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __db_file_extend -- + * Initialize a regular file by writing the last page of the file. + * + * PUBLIC: int __db_file_extend __P((ENV *, DB_FH *, size_t)); + */ +int +__db_file_extend(env, fhp, size) + ENV *env; + DB_FH *fhp; + size_t size; +{ + db_pgno_t pages; + size_t nw; + u_int32_t relative; + int ret; + char buf; + + buf = '\0'; + /* + * Extend the file by writing the last page. If the region is >4Gb, + * increment may be larger than the maximum possible seek "relative" + * argument, as it's an unsigned 32-bit value. Break the offset into + * pages of 1MB each so we don't overflow -- (2^20 * 2^32 is bigger + * than any memory I expect to see for awhile). + */ + pages = (db_pgno_t)((size - sizeof(buf)) / MEGABYTE); + relative = (u_int32_t)((size - sizeof(buf)) % MEGABYTE); + if ((ret = __os_seek(env, fhp, pages, MEGABYTE, relative)) == 0) + ret = __os_write(env, fhp, &buf, sizeof(buf), &nw); + + return (ret); +} + +/* + * __db_file_multi_write -- + * Overwrite a file with multiple passes to corrupt the data. + * + * PUBLIC: int __db_file_multi_write __P((ENV *, const char *)); + */ +int +__db_file_multi_write(env, path) + ENV *env; + const char *path; +{ + DB_FH *fhp; + u_int32_t mbytes, bytes; + int ret; + + if ((ret = __os_open(env, path, 0, DB_OSO_REGION, 0, &fhp)) == 0 && + (ret = __os_ioinfo(env, path, fhp, &mbytes, &bytes, NULL)) == 0) { + /* + * !!! + * Overwrite a regular file with alternating 0xff, 0x00 and 0xff + * byte patterns. Implies a fixed-block filesystem, journaling + * or logging filesystems will require operating system support. + */ + if ((ret = + __db_file_write(env, fhp, mbytes, bytes, 255)) != 0) + goto err; + if ((ret = + __db_file_write(env, fhp, mbytes, bytes, 0)) != 0) + goto err; + if ((ret = + __db_file_write(env, fhp, mbytes, bytes, 255)) != 0) + goto err; + } else + __db_err(env, ret, "%s", path); + +err: if (fhp != NULL) + (void)__os_closehandle(env, fhp); + return (ret); +} + +/* + * __db_file_write -- + * A single pass over the file, writing the specified byte pattern. + * + * PUBLIC: int __db_file_write __P((ENV *, + * PUBLIC: DB_FH *, u_int32_t, u_int32_t, int)); + */ +int +__db_file_write(env, fhp, mbytes, bytes, pattern) + ENV *env; + DB_FH *fhp; + int pattern; + u_int32_t mbytes, bytes; +{ + size_t len, nw; + int i, ret; + char *buf; + +#undef FILE_WRITE_IO_SIZE +#define FILE_WRITE_IO_SIZE (64 * 1024) + if ((ret = __os_malloc(env, FILE_WRITE_IO_SIZE, &buf)) != 0) + return (ret); + memset(buf, pattern, FILE_WRITE_IO_SIZE); + + if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) + goto err; + for (; mbytes > 0; --mbytes) + for (i = MEGABYTE / FILE_WRITE_IO_SIZE; i > 0; --i) + if ((ret = __os_write( + env, fhp, buf, FILE_WRITE_IO_SIZE, &nw)) != 0) + goto err; + for (; bytes > 0; bytes -= (u_int32_t)len) { + len = bytes < FILE_WRITE_IO_SIZE ? bytes : FILE_WRITE_IO_SIZE; + if ((ret = __os_write(env, fhp, buf, len, &nw)) != 0) + goto err; + } + + ret = __os_fsync(env, fhp); + +err: __os_free(env, buf); + return (ret); +} diff --git a/src/env/env_globals.c b/src/env/env_globals.c new file mode 100644 index 00000000..be83bbd3 --- /dev/null +++ b/src/env/env_globals.c @@ -0,0 +1,65 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * A structure with static initialization values for all of the global fields + * used by Berkeley DB. + * See dbinc/globals.h for the structure definition. + */ +DB_GLOBALS __db_global_values = { +#ifdef HAVE_VXWORKS + 0, /* VxWorks: db_global_init */ + NULL, /* VxWorks: db_global_lock */ +#endif +#ifdef DB_WIN32 +#ifndef DB_WINCE + { 0 }, /* SECURITY_DESCRIPTOR win_default_sec_desc */ + { 0 }, /* SECURITY_ATTRIBUTES win_default_sec_attr */ +#endif + NULL, /* SECURITY_ATTRIBUTES *win_sec_attr */ +#endif + { NULL, NULL }, /* XA env list */ + + "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=", /* db_line */ + { 0 }, /* error_buf */ + 0, /* uid_init */ + 0, /* rand_next */ + 0, /* fid_serial */ + 0, /* db_errno */ + 0, /* num_active_pids */ + 0, /* size_active_pids */ + NULL, /* active_pids */ + NULL, /* j_assert */ + NULL, /* j_close */ + NULL, /* j_dirfree */ + NULL, /* j_dirlist */ + NULL, /* j_exists*/ + NULL, /* j_free */ + NULL, /* j_fsync */ + NULL, /* j_ftruncate */ + NULL, /* j_ioinfo */ + NULL, /* j_malloc */ + NULL, /* j_file_map */ + NULL, /* j_file_unmap */ + NULL, /* j_open */ + NULL, /* j_pread */ + NULL, /* j_pwrite */ + NULL, /* j_read */ + NULL, /* j_realloc */ + NULL, /* j_region_map */ + NULL, /* j_region_unmap */ + NULL, /* j_rename */ + NULL, /* j_seek */ + NULL, /* j_unlink */ + NULL, /* j_write */ + NULL /* j_yield */ +}; diff --git a/src/env/env_method.c b/src/env/env_method.c new file mode 100644 index 00000000..77e981ad --- /dev/null +++ b/src/env/env_method.c @@ -0,0 +1,1846 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id: env_method.c,v dabaaeb7d839 2010/08/03 17:28:53 mike $ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/hmac.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __db_env_init __P((DB_ENV *)); +static void __env_err __P((const DB_ENV *, int, const char *, ...)); +static void __env_errx __P((const DB_ENV *, const char *, ...)); +static int __env_get_create_dir __P((DB_ENV *, const char **)); +static int __env_get_data_dirs __P((DB_ENV *, const char ***)); +static int __env_get_data_len __P((DB_ENV *, u_int32_t *)); +static int __env_get_flags __P((DB_ENV *, u_int32_t *)); +static int __env_get_home __P((DB_ENV *, const char **)); +static int __env_get_intermediate_dir_mode __P((DB_ENV *, const char **)); +static int __env_get_shm_key __P((DB_ENV *, long *)); +static int __env_get_thread_count __P((DB_ENV *, u_int32_t *)); +static int __env_get_thread_id_fn __P((DB_ENV *, + void (**)(DB_ENV *, pid_t *, db_threadid_t *))); +static int __env_get_thread_id_string_fn __P((DB_ENV *, + char * (**)(DB_ENV *, pid_t, db_threadid_t, char *))); +static int __env_get_timeout __P((DB_ENV *, db_timeout_t *, u_int32_t)); +static int __env_get_tmp_dir __P((DB_ENV *, const char **)); +static int __env_get_verbose __P((DB_ENV *, u_int32_t, int *)); +static int __env_get_app_dispatch + __P((DB_ENV *, int (**)(DB_ENV *, DBT *, DB_LSN *, db_recops))); +static int __env_set_app_dispatch + __P((DB_ENV *, int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops))); +static int __env_set_event_notify + __P((DB_ENV *, void (*)(DB_ENV *, u_int32_t, void *))); +static int __env_get_feedback __P((DB_ENV *, void (**)(DB_ENV *, int, int))); +static int __env_set_feedback __P((DB_ENV *, void (*)(DB_ENV *, int, int))); +static int __env_get_isalive __P((DB_ENV *, + int (**)(DB_ENV *, pid_t, db_threadid_t, u_int32_t))); +static int __env_set_isalive __P((DB_ENV *, + int (*)(DB_ENV *, pid_t, db_threadid_t, u_int32_t))); +static int __env_set_thread_id __P((DB_ENV *, void (*)(DB_ENV *, + pid_t *, db_threadid_t *))); +static int __env_set_thread_id_string __P((DB_ENV *, + char * (*)(DB_ENV *, pid_t, db_threadid_t, char *))); + +/* + * db_env_create -- + * DB_ENV constructor. + * + * EXTERN: int db_env_create __P((DB_ENV **, u_int32_t)); + */ +int +db_env_create(dbenvpp, flags) + DB_ENV **dbenvpp; + u_int32_t flags; +{ + DB_ENV *dbenv; + ENV *env; + int ret; + + /* + * !!! + * Our caller has not yet had the opportunity to reset the panic + * state or turn off mutex locking, and so we can neither check + * the panic state or acquire a mutex in the DB_ENV create path. + * + * !!! + * We can't call the flags-checking routines, we don't have an + * environment yet. + */ + if (flags != 0) + return (EINVAL); + + /* Allocate the DB_ENV and ENV structures -- we always have both. */ + if ((ret = __os_calloc(NULL, 1, sizeof(DB_ENV), &dbenv)) != 0) + return (ret); + if ((ret = __os_calloc(NULL, 1, sizeof(ENV), &env)) != 0) + goto err; + dbenv->env = env; + env->dbenv = dbenv; + + if ((ret = __db_env_init(dbenv)) != 0 || + (ret = __lock_env_create(dbenv)) != 0 || + (ret = __log_env_create(dbenv)) != 0 || + (ret = __memp_env_create(dbenv)) != 0 || +#ifdef HAVE_REPLICATION + (ret = __rep_env_create(dbenv)) != 0 || +#endif + (ret = __txn_env_create(dbenv))) + goto err; + + *dbenvpp = dbenv; + return (0); + +err: __db_env_destroy(dbenv); + return (ret); +} + +/* + * __db_env_destroy -- + * DB_ENV destructor. + * + * PUBLIC: void __db_env_destroy __P((DB_ENV *)); + */ +void +__db_env_destroy(dbenv) + DB_ENV *dbenv; +{ + __lock_env_destroy(dbenv); + __log_env_destroy(dbenv); + __memp_env_destroy(dbenv); +#ifdef HAVE_REPLICATION + __rep_env_destroy(dbenv); +#endif + __txn_env_destroy(dbenv); + + /* + * Discard the underlying ENV structure. + * + * XXX + * This is wrong, but can't be fixed until we finish the work of + * splitting up the DB_ENV and ENV structures so that we don't + * touch anything in the ENV as part of the above calls to subsystem + * DB_ENV cleanup routines. + */ + memset(dbenv->env, CLEAR_BYTE, sizeof(ENV)); + __os_free(NULL, dbenv->env); + + memset(dbenv, CLEAR_BYTE, sizeof(DB_ENV)); + __os_free(NULL, dbenv); +} + +/* + * __db_env_init -- + * Initialize a DB_ENV structure. + */ +static int +__db_env_init(dbenv) + DB_ENV *dbenv; +{ + ENV *env; + /* + * !!! + * Our caller has not yet had the opportunity to reset the panic + * state or turn off mutex locking, and so we can neither check + * the panic state or acquire a mutex in the DB_ENV create path. + * + * Initialize the method handles. + */ + /* DB_ENV PUBLIC HANDLE LIST BEGIN */ + dbenv->add_data_dir = __env_add_data_dir; + dbenv->cdsgroup_begin = __cdsgroup_begin_pp; + dbenv->close = __env_close_pp; + dbenv->dbremove = __env_dbremove_pp; + dbenv->dbrename = __env_dbrename_pp; + dbenv->err = __env_err; + dbenv->errx = __env_errx; + dbenv->failchk = __env_failchk_pp; + dbenv->fileid_reset = __env_fileid_reset_pp; + dbenv->get_alloc = __env_get_alloc; + dbenv->get_app_dispatch = __env_get_app_dispatch; + dbenv->get_cache_max = __memp_get_cache_max; + dbenv->get_cachesize = __memp_get_cachesize; + dbenv->get_create_dir = __env_get_create_dir; + dbenv->get_data_dirs = __env_get_data_dirs; + dbenv->get_data_len = __env_get_data_len; + dbenv->get_encrypt_flags = __env_get_encrypt_flags; + dbenv->get_errcall = __env_get_errcall; + dbenv->get_errfile = __env_get_errfile; + dbenv->get_errpfx = __env_get_errpfx; + dbenv->get_feedback = __env_get_feedback; + dbenv->get_flags = __env_get_flags; + dbenv->get_home = __env_get_home; + dbenv->get_intermediate_dir_mode = __env_get_intermediate_dir_mode; + dbenv->get_isalive = __env_get_isalive; + dbenv->get_lg_bsize = __log_get_lg_bsize; + dbenv->get_lg_dir = __log_get_lg_dir; + dbenv->get_lg_filemode = __log_get_lg_filemode; + dbenv->get_lg_max = __log_get_lg_max; + dbenv->get_lg_regionmax = __log_get_lg_regionmax; + dbenv->get_lk_conflicts = __lock_get_lk_conflicts; + dbenv->get_lk_detect = __lock_get_lk_detect; + dbenv->get_lk_max_lockers = __lock_get_lk_max_lockers; + dbenv->get_lk_max_locks = __lock_get_lk_max_locks; + dbenv->get_lk_max_objects = __lock_get_lk_max_objects; + dbenv->get_lk_partitions = __lock_get_lk_partitions; + dbenv->get_lk_priority = __lock_get_lk_priority; + dbenv->get_lk_tablesize = __lock_get_lk_tablesize; + dbenv->get_memory_init = __env_get_memory_init; + dbenv->get_memory_max = __env_get_memory_max; + dbenv->get_mp_max_openfd = __memp_get_mp_max_openfd; + dbenv->get_mp_max_write = __memp_get_mp_max_write; + dbenv->get_mp_mmapsize = __memp_get_mp_mmapsize; + dbenv->get_mp_mtxcount = __memp_get_mp_mtxcount; + dbenv->get_mp_pagesize = __memp_get_mp_pagesize; + dbenv->get_mp_tablesize = __memp_get_mp_tablesize; + dbenv->get_msgcall = __env_get_msgcall; + dbenv->get_msgfile = __env_get_msgfile; + dbenv->get_open_flags = __env_get_open_flags; + dbenv->get_shm_key = __env_get_shm_key; + dbenv->get_thread_count = __env_get_thread_count; + dbenv->get_thread_id_fn = __env_get_thread_id_fn; + dbenv->get_thread_id_string_fn = __env_get_thread_id_string_fn; + dbenv->get_timeout = __env_get_timeout; + dbenv->get_tmp_dir = __env_get_tmp_dir; + dbenv->get_tx_max = __txn_get_tx_max; + dbenv->get_tx_timestamp = __txn_get_tx_timestamp; + dbenv->get_verbose = __env_get_verbose; + dbenv->is_bigendian = __db_isbigendian; + dbenv->lock_detect = __lock_detect_pp; + dbenv->lock_get = __lock_get_pp; + dbenv->lock_id = __lock_id_pp; + dbenv->lock_id_free = __lock_id_free_pp; + dbenv->lock_put = __lock_put_pp; + dbenv->lock_stat = __lock_stat_pp; + dbenv->lock_stat_print = __lock_stat_print_pp; + dbenv->lock_vec = __lock_vec_pp; + dbenv->log_archive = __log_archive_pp; + dbenv->log_cursor = __log_cursor_pp; + dbenv->log_file = __log_file_pp; + dbenv->log_flush = __log_flush_pp; + dbenv->log_get_config = __log_get_config; + dbenv->log_printf = __log_printf_capi; + dbenv->log_put = __log_put_pp; + dbenv->log_put_record = __log_put_record_pp; + dbenv->log_read_record = __log_read_record_pp; + dbenv->log_set_config = __log_set_config; + dbenv->log_stat = __log_stat_pp; + dbenv->log_stat_print = __log_stat_print_pp; + dbenv->log_verify = __log_verify_pp; + dbenv->lsn_reset = __env_lsn_reset_pp; + dbenv->memp_fcreate = __memp_fcreate_pp; + dbenv->memp_register = __memp_register_pp; + dbenv->memp_stat = __memp_stat_pp; + dbenv->memp_stat_print = __memp_stat_print_pp; + dbenv->memp_sync = __memp_sync_pp; + dbenv->memp_trickle = __memp_trickle_pp; + dbenv->mutex_alloc = __mutex_alloc_pp; + dbenv->mutex_free = __mutex_free_pp; + dbenv->mutex_get_align = __mutex_get_align; + dbenv->mutex_get_increment = __mutex_get_increment; + dbenv->mutex_get_init = __mutex_get_init; + dbenv->mutex_get_max = __mutex_get_max; + dbenv->mutex_get_tas_spins = __mutex_get_tas_spins; + dbenv->mutex_lock = __mutex_lock_pp; + dbenv->mutex_set_align = __mutex_set_align; + dbenv->mutex_set_increment = __mutex_set_increment; + dbenv->mutex_set_init = __mutex_set_init; + dbenv->mutex_set_max = __mutex_set_max; + dbenv->mutex_set_tas_spins = __mutex_set_tas_spins; + dbenv->mutex_stat = __mutex_stat_pp; + dbenv->mutex_stat_print = __mutex_stat_print_pp; + dbenv->mutex_unlock = __mutex_unlock_pp; + dbenv->open = __env_open_pp; + dbenv->remove = __env_remove; + dbenv->rep_elect = __rep_elect_pp; + dbenv->rep_flush = __rep_flush; + dbenv->rep_get_clockskew = __rep_get_clockskew; + dbenv->rep_get_config = __rep_get_config; + dbenv->rep_get_limit = __rep_get_limit; + dbenv->rep_get_nsites = __rep_get_nsites; + dbenv->rep_get_priority = __rep_get_priority; + dbenv->rep_get_request = __rep_get_request; + dbenv->rep_get_timeout = __rep_get_timeout; + dbenv->rep_process_message = __rep_process_message_pp; + dbenv->rep_set_clockskew = __rep_set_clockskew; + dbenv->rep_set_config = __rep_set_config; + dbenv->rep_set_limit = __rep_set_limit; + dbenv->rep_set_nsites = __rep_set_nsites_pp; + dbenv->rep_set_priority = __rep_set_priority; + dbenv->rep_set_request = __rep_set_request; + dbenv->rep_set_timeout = __rep_set_timeout; + dbenv->rep_set_transport = __rep_set_transport_pp; + dbenv->rep_start = __rep_start_pp; + dbenv->rep_stat = __rep_stat_pp; + dbenv->rep_stat_print = __rep_stat_print_pp; + dbenv->rep_sync = __rep_sync; + dbenv->repmgr_channel = __repmgr_channel; + dbenv->repmgr_get_ack_policy = __repmgr_get_ack_policy; + dbenv->repmgr_local_site = __repmgr_local_site; + dbenv->repmgr_msg_dispatch = __repmgr_set_msg_dispatch; + dbenv->repmgr_set_ack_policy = __repmgr_set_ack_policy; + dbenv->repmgr_site = __repmgr_site; + dbenv->repmgr_site_by_eid = __repmgr_site_by_eid; + dbenv->repmgr_site_list = __repmgr_site_list; + dbenv->repmgr_start = __repmgr_start; + dbenv->repmgr_stat = __repmgr_stat_pp; + dbenv->repmgr_stat_print = __repmgr_stat_print_pp; + dbenv->set_alloc = __env_set_alloc; + dbenv->set_app_dispatch = __env_set_app_dispatch; + dbenv->set_cache_max = __memp_set_cache_max; + dbenv->set_cachesize = __memp_set_cachesize; + dbenv->set_create_dir = __env_set_create_dir; + dbenv->set_data_dir = __env_set_data_dir; + dbenv->set_data_len = __env_set_data_len; + dbenv->set_encrypt = __env_set_encrypt; + dbenv->set_errcall = __env_set_errcall; + dbenv->set_errfile = __env_set_errfile; + dbenv->set_errpfx = __env_set_errpfx; + dbenv->set_event_notify = __env_set_event_notify; + dbenv->set_feedback = __env_set_feedback; + dbenv->set_flags = __env_set_flags; + dbenv->set_intermediate_dir_mode = __env_set_intermediate_dir_mode; + dbenv->set_isalive = __env_set_isalive; + dbenv->set_lg_bsize = __log_set_lg_bsize; + dbenv->set_lg_dir = __log_set_lg_dir; + dbenv->set_lg_filemode = __log_set_lg_filemode; + dbenv->set_lg_max = __log_set_lg_max; + dbenv->set_lg_regionmax = __log_set_lg_regionmax; + dbenv->set_lk_conflicts = __lock_set_lk_conflicts; + dbenv->set_lk_detect = __lock_set_lk_detect; + dbenv->set_lk_max_lockers = __lock_set_lk_max_lockers; + dbenv->set_lk_max_locks = __lock_set_lk_max_locks; + dbenv->set_lk_max_objects = __lock_set_lk_max_objects; + dbenv->set_lk_partitions = __lock_set_lk_partitions; + dbenv->set_lk_priority = __lock_set_lk_priority; + dbenv->set_lk_tablesize = __lock_set_lk_tablesize; + dbenv->set_memory_init = __env_set_memory_init; + dbenv->set_memory_max = __env_set_memory_max; + dbenv->set_mp_max_openfd = __memp_set_mp_max_openfd; + dbenv->set_mp_max_write = __memp_set_mp_max_write; + dbenv->set_mp_mmapsize = __memp_set_mp_mmapsize; + dbenv->set_mp_mtxcount = __memp_set_mp_mtxcount; + dbenv->set_mp_pagesize = __memp_set_mp_pagesize; + dbenv->set_mp_tablesize = __memp_set_mp_tablesize; + dbenv->set_msgcall = __env_set_msgcall; + dbenv->set_msgfile = __env_set_msgfile; + dbenv->set_paniccall = __env_set_paniccall; + dbenv->set_shm_key = __env_set_shm_key; + dbenv->set_thread_count = __env_set_thread_count; + dbenv->set_thread_id = __env_set_thread_id; + dbenv->set_thread_id_string = __env_set_thread_id_string; + dbenv->set_timeout = __env_set_timeout; + dbenv->set_tmp_dir = __env_set_tmp_dir; + dbenv->set_tx_max = __txn_set_tx_max; + dbenv->set_tx_timestamp = __txn_set_tx_timestamp; + dbenv->set_verbose = __env_set_verbose; + dbenv->stat_print = __env_stat_print_pp; + dbenv->txn_applied = __txn_applied_pp; + dbenv->txn_begin = __txn_begin_pp; + dbenv->txn_checkpoint = __txn_checkpoint_pp; + dbenv->txn_recover = __txn_recover_pp; + dbenv->txn_stat = __txn_stat_pp; + dbenv->txn_stat_print = __txn_stat_print_pp; + /* DB_ENV PUBLIC HANDLE LIST END */ + + /* DB_ENV PRIVATE HANDLE LIST BEGIN */ + dbenv->prdbt = __db_prdbt; + /* DB_ENV PRIVATE HANDLE LIST END */ + + dbenv->shm_key = INVALID_REGION_SEGID; + dbenv->thread_id = __os_id; + dbenv->thread_id_string = __env_thread_id_string; + + env = dbenv->env; + __os_id(NULL, &env->pid_cache, NULL); + + env->db_ref = 0; + env->log_verify_wrap = __log_verify_wrap; + env->data_len = ENV_DEF_DATA_LEN; + TAILQ_INIT(&env->fdlist); + + if (!__db_isbigendian()) + F_SET(env, ENV_LITTLEENDIAN); + F_SET(env, ENV_NO_OUTPUT_SET); + + return (0); +} + +/* + * __env_err -- + * DbEnv.err method. + */ +static void +#ifdef STDC_HEADERS +__env_err(const DB_ENV *dbenv, int error, const char *fmt, ...) +#else +__env_err(dbenv, error, fmt, va_alist) + const DB_ENV *dbenv; + int error; + const char *fmt; + va_dcl +#endif +{ + /* Message with error string, to stderr by default. */ + DB_REAL_ERR(dbenv, error, DB_ERROR_SET, 1, fmt); +} + +/* + * __env_errx -- + * DbEnv.errx method. + */ +static void +#ifdef STDC_HEADERS +__env_errx(const DB_ENV *dbenv, const char *fmt, ...) +#else +__env_errx(dbenv, fmt, va_alist) + const DB_ENV *dbenv; + const char *fmt; + va_dcl +#endif +{ + /* Message without error string, to stderr by default. */ + DB_REAL_ERR(dbenv, 0, DB_ERROR_NOT_SET, 1, fmt); +} + +static int +__env_get_home(dbenv, homep) + DB_ENV *dbenv; + const char **homep; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->get_home"); + *homep = env->db_home; + + return (0); +} + +/* + * __env_get_alloc -- + * {DB_ENV,DB}->get_alloc. + * + * PUBLIC: int __env_get_alloc __P((DB_ENV *, void *(**)(size_t), + * PUBLIC: void *(**)(void *, size_t), void (**)(void *))); + */ +int +__env_get_alloc(dbenv, mal_funcp, real_funcp, free_funcp) + DB_ENV *dbenv; + void *(**mal_funcp) __P((size_t)); + void *(**real_funcp) __P((void *, size_t)); + void (**free_funcp) __P((void *)); +{ + + if (mal_funcp != NULL) + *mal_funcp = dbenv->db_malloc; + if (real_funcp != NULL) + *real_funcp = dbenv->db_realloc; + if (free_funcp != NULL) + *free_funcp = dbenv->db_free; + return (0); +} + +/* + * __env_set_alloc -- + * {DB_ENV,DB}->set_alloc. + * + * PUBLIC: int __env_set_alloc __P((DB_ENV *, void *(*)(size_t), + * PUBLIC: void *(*)(void *, size_t), void (*)(void *))); + */ +int +__env_set_alloc(dbenv, mal_func, real_func, free_func) + DB_ENV *dbenv; + void *(*mal_func) __P((size_t)); + void *(*real_func) __P((void *, size_t)); + void (*free_func) __P((void *)); +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_alloc"); + + dbenv->db_malloc = mal_func; + dbenv->db_realloc = real_func; + dbenv->db_free = free_func; + return (0); +} +/* + * __env_get_memory_init -- + * DB_ENV->get_memory_init. + * + * PUBLIC: int __env_get_memory_init __P((DB_ENV *, + * PUBLIC: DB_MEM_CONFIG, u_int32_t *)); + */ +int +__env_get_memory_init(dbenv, type, countp) + DB_ENV *dbenv; + DB_MEM_CONFIG type; + u_int32_t *countp; +{ + ENV *env; + + env = dbenv->env; + + switch (type) { + case DB_MEM_LOCK: + ENV_NOT_CONFIGURED(env, + env->lk_handle, "DB_ENV->get_memory_init", DB_INIT_LOCK); + if (LOCKING_ON(env)) + *countp = ((DB_LOCKREGION *) + env->lk_handle->reginfo.primary)->stat.st_initlocks; + else + *countp = dbenv->lk_init; + break; + case DB_MEM_LOCKOBJECT: + ENV_NOT_CONFIGURED(env, + env->lk_handle, "DB_ENV->get_memory_init", DB_INIT_LOCK); + if (LOCKING_ON(env)) + *countp = ((DB_LOCKREGION *) env-> + lk_handle->reginfo.primary)->stat.st_initobjects; + else + *countp = dbenv->lk_init_objects; + break; + case DB_MEM_LOCKER: + ENV_NOT_CONFIGURED(env, + env->lk_handle, "DB_ENV->get_memory_init", DB_INIT_LOCK); + if (LOCKING_ON(env)) + *countp = ((DB_LOCKREGION *) env-> + lk_handle->reginfo.primary)->stat.st_initlockers; + else + *countp = dbenv->lk_init_lockers; + break; + case DB_MEM_LOGID: + ENV_NOT_CONFIGURED(env, + env->lg_handle, "DB_ENV->get_memory_init", DB_INIT_LOG); + + if (LOGGING_ON(env)) + *countp = ((LOG *)env->lg_handle-> + reginfo.primary)->stat.st_fileid_init; + else + *countp = dbenv->lg_fileid_init; + break; + case DB_MEM_TRANSACTION: + ENV_NOT_CONFIGURED(env, + env->tx_handle, "DB_ENV->memory_init", DB_INIT_TXN); + + if (TXN_ON(env)) + *countp = ((DB_TXNREGION *) + env->tx_handle->reginfo.primary)->inittxns; + else + *countp = dbenv->tx_init; + break; + case DB_MEM_THREAD: + /* We always update thr_init when joining an env. */ + *countp = dbenv->thr_init; + break; + } + + return (0); +} + +/* + * __env_set_memory_init -- + * DB_ENV->set_memory_init. + * + * PUBLIC: int __env_set_memory_init __P((DB_ENV *, DB_MEM_CONFIG, u_int32_t)); + */ +int +__env_set_memory_init(dbenv, type, count) + DB_ENV *dbenv; + DB_MEM_CONFIG type; + u_int32_t count; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_memory_init"); + switch (type) { + case DB_MEM_LOCK: + dbenv->lk_init = count; + break; + case DB_MEM_LOCKOBJECT: + dbenv->lk_init_objects = count; + break; + case DB_MEM_LOCKER: + dbenv->lk_init_lockers = count; + break; + case DB_MEM_LOGID: + dbenv->lg_fileid_init = count; + break; + case DB_MEM_TRANSACTION: + dbenv->tx_init = count; + break; + case DB_MEM_THREAD: + dbenv->thr_init = count; + break; + } + + return (0); +} +/* + * __env_get_memory_max -- + * DB_ENV->get_memory_max. + * + * PUBLIC: int __env_get_memory_max __P((DB_ENV *, u_int32_t *, u_int32_t *)); + */ +int +__env_get_memory_max(dbenv, gbytes, bytes) + DB_ENV *dbenv; + u_int32_t *gbytes, *bytes; +{ + ENV *env; + env = dbenv->env; + + if (F_ISSET(env, ENV_OPEN_CALLED)) { + *gbytes = (u_int32_t)(env->reginfo->rp->max / GIGABYTE); + *bytes = (u_int32_t)(env->reginfo->rp->max % GIGABYTE); + } else { + *gbytes = (u_int32_t)(dbenv->memory_max / GIGABYTE); + *bytes = (u_int32_t)(dbenv->memory_max % GIGABYTE); + } + return (0); +} + +/* + * __env_set_memory_max -- + * DB_ENV->set_memory_max. + * + * PUBLIC: int __env_set_memory_max __P((DB_ENV *, u_int32_t, u_int32_t)); + */ +int +__env_set_memory_max(dbenv, gbytes, bytes) + DB_ENV *dbenv; + u_int32_t gbytes, bytes; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_memory_max"); + + /* + * If they are asking for 4GB exactly on a 32 bit platform, they + * really meant 4GB - 1. Give it to them. + */ + if (sizeof(roff_t) == 4 && gbytes == 4 && bytes == 0) { + --gbytes; + bytes = GIGABYTE - 1; + } + /* + * Make sure they wouldn't overflow the memory_max field on a + * 32 bit architecture. + */ + if (sizeof(roff_t) == 4 && gbytes >= 4) { + __db_errx(env, DB_STR("1588", + "Maximum memory size too large: maximum is 4GB")); + return (EINVAL); + } + dbenv->memory_max = (roff_t)((gbytes * GIGABYTE) + bytes); + return (0); +} + +/* + * __env_get_app_dispatch -- + * Get the transaction abort recover function. + */ +static int +__env_get_app_dispatch(dbenv, app_dispatchp) + DB_ENV *dbenv; + int (**app_dispatchp) __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); +{ + + if (app_dispatchp != NULL) + *app_dispatchp = dbenv->app_dispatch; + return (0); +} + +/* + * __env_set_app_dispatch -- + * Set the transaction abort recover function. + */ +static int +__env_set_app_dispatch(dbenv, app_dispatch) + DB_ENV *dbenv; + int (*app_dispatch) __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_app_dispatch"); + + dbenv->app_dispatch = app_dispatch; + return (0); +} + +/* + * __env_get_encrypt_flags -- + * {DB_ENV,DB}->get_encrypt_flags. + * + * PUBLIC: int __env_get_encrypt_flags __P((DB_ENV *, u_int32_t *)); + */ +int +__env_get_encrypt_flags(dbenv, flagsp) + DB_ENV *dbenv; + u_int32_t *flagsp; +{ +#ifdef HAVE_CRYPTO + DB_CIPHER *db_cipher; +#endif + ENV *env; + + env = dbenv->env; + +#ifdef HAVE_CRYPTO + db_cipher = env->crypto_handle; + if (db_cipher != NULL && db_cipher->alg == CIPHER_AES) + *flagsp = DB_ENCRYPT_AES; + else + *flagsp = 0; + return (0); +#else + COMPQUIET(flagsp, 0); + __db_errx(env, DB_STR("1555", + "library build did not include support for cryptography")); + return (DB_OPNOTSUP); +#endif +} + +/* + * __env_set_encrypt -- + * DB_ENV->set_encrypt. + * + * PUBLIC: int __env_set_encrypt __P((DB_ENV *, const char *, u_int32_t)); + */ +int +__env_set_encrypt(dbenv, passwd, flags) + DB_ENV *dbenv; + const char *passwd; + u_int32_t flags; +{ +#ifdef HAVE_CRYPTO + DB_THREAD_INFO *ip; + DB_CIPHER *db_cipher; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_encrypt"); +#define OK_CRYPTO_FLAGS (DB_ENCRYPT_AES) + + if (flags != 0 && LF_ISSET(~OK_CRYPTO_FLAGS)) + return (__db_ferr(env, "DB_ENV->set_encrypt", 0)); + + if (passwd == NULL || strlen(passwd) == 0) { + __db_errx(env, DB_STR("1556", + "Empty password specified to set_encrypt")); + return (EINVAL); + } + ENV_ENTER(env, ip); + if (!CRYPTO_ON(env)) { + if ((ret = __os_calloc(env, 1, sizeof(DB_CIPHER), &db_cipher)) + != 0) + goto err; + env->crypto_handle = db_cipher; + } else + db_cipher = env->crypto_handle; + + if (dbenv->passwd != NULL) + __os_free(env, dbenv->passwd); + if ((ret = __os_strdup(env, passwd, &dbenv->passwd)) != 0) { + __os_free(env, db_cipher); + goto err; + } + /* + * We're going to need this often enough to keep around + */ + dbenv->passwd_len = strlen(dbenv->passwd) + 1; + /* + * The MAC key is for checksumming, and is separate from + * the algorithm. So initialize it here, even if they + * are using CIPHER_ANY. + */ + __db_derive_mac( + (u_int8_t *)dbenv->passwd, dbenv->passwd_len, db_cipher->mac_key); + switch (flags) { + case 0: + F_SET(db_cipher, CIPHER_ANY); + break; + case DB_ENCRYPT_AES: + if ((ret = + __crypto_algsetup(env, db_cipher, CIPHER_AES, 0)) != 0) + goto err1; + break; + default: /* Impossible. */ + break; + } + ENV_LEAVE(env, ip); + return (0); + +err1: + __os_free(env, dbenv->passwd); + __os_free(env, db_cipher); + env->crypto_handle = NULL; +err: + ENV_LEAVE(env, ip); + return (ret); +#else + COMPQUIET(passwd, NULL); + COMPQUIET(flags, 0); + + __db_errx(dbenv->env, DB_STR("1557", + "library build did not include support for cryptography")); + return (DB_OPNOTSUP); +#endif +} +#ifndef HAVE_BREW +static +#endif +const FLAG_MAP EnvMap[] = { + { DB_AUTO_COMMIT, DB_ENV_AUTO_COMMIT }, + { DB_CDB_ALLDB, DB_ENV_CDB_ALLDB }, + { DB_DATABASE_LOCKING, DB_ENV_DATABASE_LOCKING }, + { DB_DIRECT_DB, DB_ENV_DIRECT_DB }, + { DB_DSYNC_DB, DB_ENV_DSYNC_DB }, + { DB_HOTBACKUP_IN_PROGRESS, DB_ENV_HOTBACKUP }, + { DB_MULTIVERSION, DB_ENV_MULTIVERSION }, + { DB_NOFLUSH, DB_ENV_NOFLUSH }, + { DB_NOLOCKING, DB_ENV_NOLOCKING }, + { DB_NOMMAP, DB_ENV_NOMMAP }, + { DB_NOPANIC, DB_ENV_NOPANIC }, + { DB_OVERWRITE, DB_ENV_OVERWRITE }, + { DB_REGION_INIT, DB_ENV_REGION_INIT }, + { DB_TIME_NOTGRANTED, DB_ENV_TIME_NOTGRANTED }, + { DB_TXN_NOSYNC, DB_ENV_TXN_NOSYNC }, + { DB_TXN_NOWAIT, DB_ENV_TXN_NOWAIT }, + { DB_TXN_SNAPSHOT, DB_ENV_TXN_SNAPSHOT }, + { DB_TXN_WRITE_NOSYNC, DB_ENV_TXN_WRITE_NOSYNC }, + { DB_YIELDCPU, DB_ENV_YIELDCPU } +}; + +/* + * __env_map_flags -- map from external to internal flags. + * PUBLIC: void __env_map_flags __P((const FLAG_MAP *, + * PUBLIC: u_int, u_int32_t *, u_int32_t *)); + */ +void +__env_map_flags(flagmap, mapsize, inflagsp, outflagsp) + const FLAG_MAP *flagmap; + u_int mapsize; + u_int32_t *inflagsp, *outflagsp; +{ + + const FLAG_MAP *fmp; + u_int i; + + for (i = 0, fmp = flagmap; + i < mapsize / sizeof(flagmap[0]); ++i, ++fmp) + if (FLD_ISSET(*inflagsp, fmp->inflag)) { + FLD_SET(*outflagsp, fmp->outflag); + FLD_CLR(*inflagsp, fmp->inflag); + if (*inflagsp == 0) + break; + } +} + +/* + * __env_fetch_flags -- map from internal to external flags. + * PUBLIC: void __env_fetch_flags __P((const FLAG_MAP *, + * PUBLIC: u_int, u_int32_t *, u_int32_t *)); + */ +void +__env_fetch_flags(flagmap, mapsize, inflagsp, outflagsp) + const FLAG_MAP *flagmap; + u_int mapsize; + u_int32_t *inflagsp, *outflagsp; +{ + const FLAG_MAP *fmp; + u_int32_t i; + + *outflagsp = 0; + for (i = 0, fmp = flagmap; + i < mapsize / sizeof(flagmap[0]); ++i, ++fmp) + if (FLD_ISSET(*inflagsp, fmp->outflag)) + FLD_SET(*outflagsp, fmp->inflag); +} + +static int +__env_get_flags(dbenv, flagsp) + DB_ENV *dbenv; + u_int32_t *flagsp; +{ + ENV *env; + DB_THREAD_INFO *ip; + + __env_fetch_flags(EnvMap, sizeof(EnvMap), &dbenv->flags, flagsp); + + env = dbenv->env; + /* Some flags are persisted in the regions. */ + if (env->reginfo != NULL && + ((REGENV *)env->reginfo->primary)->panic != 0) + FLD_SET(*flagsp, DB_PANIC_ENVIRONMENT); + + /* If the hotbackup counter is positive, set the flag indicating so. */ + if (TXN_ON(env)) { + ENV_ENTER(env, ip); + TXN_SYSTEM_LOCK(env); + if (((DB_TXNREGION *) + env->tx_handle->reginfo.primary)->n_hotbackup > 0) + FLD_SET(*flagsp, DB_HOTBACKUP_IN_PROGRESS); + TXN_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } + + return (0); +} + +/* + * __env_set_flags -- + * DB_ENV->set_flags. + * + * PUBLIC: int __env_set_flags __P((DB_ENV *, u_int32_t, int)); + */ +int +__env_set_flags(dbenv, flags, on) + DB_ENV *dbenv; + u_int32_t flags; + int on; +{ + ENV *env; + DB_TXNREGION *tenv; + DB_THREAD_INFO *ip; + u_int32_t mapped_flags; + int mem_on, needs_checkpoint, ret; + + env = dbenv->env; + +#define OK_FLAGS \ + (DB_AUTO_COMMIT | DB_CDB_ALLDB | DB_DATABASE_LOCKING | \ + DB_DIRECT_DB | DB_DSYNC_DB | DB_MULTIVERSION | \ + DB_NOLOCKING | DB_NOMMAP | DB_NOPANIC | DB_OVERWRITE | \ + DB_PANIC_ENVIRONMENT | DB_REGION_INIT | \ + DB_TIME_NOTGRANTED | DB_TXN_NOSYNC | DB_TXN_NOWAIT | \ + DB_TXN_SNAPSHOT | DB_TXN_WRITE_NOSYNC | DB_YIELDCPU | \ + DB_HOTBACKUP_IN_PROGRESS | DB_NOFLUSH) + + if (LF_ISSET(~OK_FLAGS)) + return (__db_ferr(env, "DB_ENV->set_flags", 0)); + if (on) { + if ((ret = __db_fcchk(env, "DB_ENV->set_flags", + flags, DB_TXN_NOSYNC, DB_TXN_WRITE_NOSYNC)) != 0) + return (ret); + if (LF_ISSET(DB_DIRECT_DB) && __os_support_direct_io() == 0) { + __db_errx(env, + "DB_ENV->set_flags: direct I/O either not configured or not supported"); + return (EINVAL); + } + } + + if (LF_ISSET(DB_CDB_ALLDB)) + ENV_ILLEGAL_AFTER_OPEN(env, + "DB_ENV->set_flags: DB_CDB_ALLDB"); + if (LF_ISSET(DB_PANIC_ENVIRONMENT)) { + ENV_ILLEGAL_BEFORE_OPEN(env, + "DB_ENV->set_flags: DB_PANIC_ENVIRONMENT"); + if (on) { + __db_errx(env, DB_STR("1558", + "Environment panic set")); + (void)__env_panic(env, DB_RUNRECOVERY); + } else + __env_panic_set(env, 0); + } + if (LF_ISSET(DB_REGION_INIT)) + ENV_ILLEGAL_AFTER_OPEN(env, + "DB_ENV->set_flags: DB_REGION_INIT"); + + /* + * DB_LOG_IN_MEMORY, DB_TXN_NOSYNC and DB_TXN_WRITE_NOSYNC are + * mutually incompatible. If we're setting one of them, clear all + * current settings. If the environment is open, check to see that + * logging is not in memory. + */ + if (on && LF_ISSET(DB_TXN_NOSYNC | DB_TXN_WRITE_NOSYNC)) { + F_CLR(dbenv, DB_ENV_TXN_NOSYNC | DB_ENV_TXN_WRITE_NOSYNC); + if (!F_ISSET(env, ENV_OPEN_CALLED)) { + if ((ret = + __log_set_config(dbenv, DB_LOG_IN_MEMORY, 0)) != 0) + return (ret); + } else if (LOGGING_ON(env)) { + if ((ret = __log_get_config(dbenv, + DB_LOG_IN_MEMORY, &mem_on)) != 0) + return (ret); + if (mem_on == 1) { + __db_errx(env, DB_STR("1559", + "DB_TXN_NOSYNC and DB_TXN_WRITE_NOSYNC" + " may not be used with DB_LOG_IN_MEMORY")); + return (EINVAL); + } + } + } + + /* + * Settings of DB_HOTBACKUP_IN_PROGRESS are reference-counted + * in REGENV. + */ + if (LF_ISSET(DB_HOTBACKUP_IN_PROGRESS)) { + /* You can't take a hot backup without transactions. */ + ENV_REQUIRES_CONFIG(env, env->tx_handle, + "DB_ENV->set_flags: DB_HOTBACKUP_IN_PROGRESS", DB_INIT_TXN); + + tenv = (DB_TXNREGION *)env->tx_handle->reginfo.primary; + needs_checkpoint = 0; + ENV_ENTER(env, ip); + TXN_SYSTEM_LOCK(env); + if (on) { + tenv->n_hotbackup++; + if (tenv->n_bulk_txn > 0) + needs_checkpoint = 1; + } else { + if (tenv->n_hotbackup == 0) + needs_checkpoint = -1; /* signal count error */ + else + tenv->n_hotbackup--; + } + TXN_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + + if (needs_checkpoint == -1) { + __db_errx(env, DB_STR("1560", + "Attempt to decrement hotbackup counter past zero")); + return (EINVAL); + } + + if (needs_checkpoint && (ret = __txn_checkpoint(env, 0, 0, 0))) + return (ret); + } + + mapped_flags = 0; + __env_map_flags(EnvMap, sizeof(EnvMap), &flags, &mapped_flags); + if (on) + F_SET(dbenv, mapped_flags); + else + F_CLR(dbenv, mapped_flags); + + return (0); +} + +static int +__env_get_data_dirs(dbenv, dirpp) + DB_ENV *dbenv; + const char ***dirpp; +{ + *dirpp = (const char **)dbenv->db_data_dir; + return (0); +} + +/* + * __env_set_data_dir -- + * DB_ENV->set_data_dir. + * + * PUBLIC: int __env_set_data_dir __P((DB_ENV *, const char *)); + */ +int +__env_set_data_dir(dbenv, dir) + DB_ENV *dbenv; + const char *dir; +{ + int ret; + + if ((ret = __env_add_data_dir(dbenv, dir)) != 0) + return (ret); + + if (dbenv->data_next == 1) + return (__env_set_create_dir(dbenv, dir)); + + return (0); +} + +/* + * __env_add_data_dir -- + * DB_ENV->add_data_dir. + * + * PUBLIC: int __env_add_data_dir __P((DB_ENV *, const char *)); + */ +int +__env_add_data_dir(dbenv, dir) + DB_ENV *dbenv; + const char *dir; +{ + ENV *env; + int ret; + + env = dbenv->env; + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->add_data_dir"); + + /* + * The array is NULL-terminated so it can be returned by get_data_dirs + * without a length. + */ + +#define DATA_INIT_CNT 20 /* Start with 20 data slots. */ + if (dbenv->db_data_dir == NULL) { + if ((ret = __os_calloc(env, DATA_INIT_CNT, + sizeof(char **), &dbenv->db_data_dir)) != 0) + return (ret); + dbenv->data_cnt = DATA_INIT_CNT; + } else if (dbenv->data_next == dbenv->data_cnt - 2) { + dbenv->data_cnt *= 2; + if ((ret = __os_realloc(env, + (u_int)dbenv->data_cnt * sizeof(char **), + &dbenv->db_data_dir)) != 0) + return (ret); + } + + ret = __os_strdup(env, + dir, &dbenv->db_data_dir[dbenv->data_next++]); + dbenv->db_data_dir[dbenv->data_next] = NULL; + return (ret); +} + +/* + * __env_set_create_dir -- + * DB_ENV->set_create_dir. + * The list of directories cannot change after opening the env and setting + * a pointer must be atomic so we do not need to mutex here even if multiple + * threads are using the DB_ENV handle. + * + * PUBLIC: int __env_set_create_dir __P((DB_ENV *, const char *)); + */ +int +__env_set_create_dir(dbenv, dir) + DB_ENV *dbenv; + const char *dir; +{ + ENV *env; + int i; + + env = dbenv->env; + + for (i = 0; i < dbenv->data_next; i++) + if (strcmp(dir, dbenv->db_data_dir[i]) == 0) + break; + + if (i == dbenv->data_next) { + __db_errx(env, DB_STR_A("1561", + "Directory %s not in environment list.", "%s"), dir); + return (EINVAL); + } + + dbenv->db_create_dir = dbenv->db_data_dir[i]; + return (0); +} + +static int +__env_get_create_dir(dbenv, dirp) + DB_ENV *dbenv; + const char **dirp; +{ + *dirp = dbenv->db_create_dir; + return (0); +} + +static int +__env_get_intermediate_dir_mode(dbenv, modep) + DB_ENV *dbenv; + const char **modep; +{ + *modep = dbenv->intermediate_dir_mode; + return (0); +} + +/* + * __env_set_data_len -- + * DB_ENV->set_data_len. + * + * PUBLIC: int __env_set_data_len __P((DB_ENV *, u_int32_t)); + */ +int +__env_set_data_len(dbenv, data_len) + DB_ENV *dbenv; + u_int32_t data_len; +{ + + dbenv->env->data_len = data_len; + return (0); +} + +static int +__env_get_data_len(dbenv, data_lenp) + DB_ENV *dbenv; + u_int32_t *data_lenp; +{ + *data_lenp = dbenv->env->data_len; + return (0); +} + +/* + * __env_set_intermediate_dir_mode -- + * DB_ENV->set_intermediate_dir_mode. + * + * PUBLIC: int __env_set_intermediate_dir_mode __P((DB_ENV *, const char *)); + */ +int +__env_set_intermediate_dir_mode(dbenv, mode) + DB_ENV *dbenv; + const char *mode; +{ + ENV *env; + u_int t; + int ret; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_intermediate_dir_mode"); + +#define __SETMODE(offset, valid_ch, mask) { \ + if (mode[offset] == (valid_ch)) \ + t |= (mask); \ + else if (mode[offset] != '-') \ + goto format_err; \ +} + t = 0; + __SETMODE(0, 'r', S_IRUSR); + __SETMODE(1, 'w', S_IWUSR); + __SETMODE(2, 'x', S_IXUSR); + __SETMODE(3, 'r', S_IRGRP); + __SETMODE(4, 'w', S_IWGRP); + __SETMODE(5, 'x', S_IXGRP); + __SETMODE(6, 'r', S_IROTH); + __SETMODE(7, 'w', S_IWOTH); + __SETMODE(8, 'x', S_IXOTH); + if (mode[9] != '\0' || t == 0) { + /* + * We disallow modes of 0 -- we use 0 to decide the application + * never configured intermediate directory permissions, and we + * shouldn't create intermediate directories. Besides, setting + * the permissions to 0 makes no sense. + */ +format_err: __db_errx(env, + "DB_ENV->set_intermediate_dir_mode: illegal mode \"%s\"", mode); + return (EINVAL); + } + + if (dbenv->intermediate_dir_mode != NULL) + __os_free(env, dbenv->intermediate_dir_mode); + if ((ret = __os_strdup(env, mode, &dbenv->intermediate_dir_mode)) != 0) + return (ret); + + env->dir_mode = (int)t; + return (0); +} + +/* + * __env_get_errcall -- + * {DB_ENV,DB}->get_errcall. + * + * PUBLIC: void __env_get_errcall __P((DB_ENV *, + * PUBLIC: void (**)(const DB_ENV *, const char *, const char *))); + */ +void +__env_get_errcall(dbenv, errcallp) + DB_ENV *dbenv; + void (**errcallp) __P((const DB_ENV *, const char *, const char *)); +{ + *errcallp = dbenv->db_errcall; +} + +/* + * __env_set_errcall -- + * {DB_ENV,DB}->set_errcall. + * + * PUBLIC: void __env_set_errcall __P((DB_ENV *, + * PUBLIC: void (*)(const DB_ENV *, const char *, const char *))); + */ +void +__env_set_errcall(dbenv, errcall) + DB_ENV *dbenv; + void (*errcall) __P((const DB_ENV *, const char *, const char *)); +{ + ENV *env; + + env = dbenv->env; + + F_CLR(env, ENV_NO_OUTPUT_SET); + dbenv->db_errcall = errcall; +} + +/* + * __env_get_errfile -- + * {DB_ENV,DB}->get_errfile. + * + * PUBLIC: void __env_get_errfile __P((DB_ENV *, FILE **)); + */ +void +__env_get_errfile(dbenv, errfilep) + DB_ENV *dbenv; + FILE **errfilep; +{ + *errfilep = dbenv->db_errfile; +} + +/* + * __env_set_errfile -- + * {DB_ENV,DB}->set_errfile. + * + * PUBLIC: void __env_set_errfile __P((DB_ENV *, FILE *)); + */ +void +__env_set_errfile(dbenv, errfile) + DB_ENV *dbenv; + FILE *errfile; +{ + ENV *env; + + env = dbenv->env; + + F_CLR(env, ENV_NO_OUTPUT_SET); + dbenv->db_errfile = errfile; +} + +/* + * __env_get_errpfx -- + * {DB_ENV,DB}->get_errpfx. + * + * PUBLIC: void __env_get_errpfx __P((DB_ENV *, const char **)); + */ +void +__env_get_errpfx(dbenv, errpfxp) + DB_ENV *dbenv; + const char **errpfxp; +{ + *errpfxp = dbenv->db_errpfx; +} + +/* + * __env_set_errpfx -- + * {DB_ENV,DB}->set_errpfx. + * + * PUBLIC: void __env_set_errpfx __P((DB_ENV *, const char *)); + */ +void +__env_set_errpfx(dbenv, errpfx) + DB_ENV *dbenv; + const char *errpfx; +{ + dbenv->db_errpfx = errpfx; +} + +static int +__env_get_feedback(dbenv, feedbackp) + DB_ENV *dbenv; + void (**feedbackp) __P((DB_ENV *, int, int)); +{ + if (feedbackp != NULL) + *feedbackp = dbenv->db_feedback; + return (0); +} + +static int +__env_set_feedback(dbenv, feedback) + DB_ENV *dbenv; + void (*feedback) __P((DB_ENV *, int, int)); +{ + dbenv->db_feedback = feedback; + return (0); +} + +/* + * __env_get_thread_id_fn -- + * DB_ENV->get_thread_id_fn + */ +static int +__env_get_thread_id_fn(dbenv, idp) + DB_ENV *dbenv; + void (**idp) __P((DB_ENV *, pid_t *, db_threadid_t *)); +{ + if (idp != NULL) + *idp = dbenv->thread_id; + return (0); +} + +/* + * __env_set_thread_id -- + * DB_ENV->set_thread_id + */ +static int +__env_set_thread_id(dbenv, id) + DB_ENV *dbenv; + void (*id) __P((DB_ENV *, pid_t *, db_threadid_t *)); +{ + dbenv->thread_id = id; + return (0); +} + +/* + * __env_get_threadid_string_fn -- + * DB_ENV->get_threadid_string_fn + */ +static int +__env_get_thread_id_string_fn(dbenv, thread_id_stringp) + DB_ENV *dbenv; + char *(**thread_id_stringp) + __P((DB_ENV *, pid_t, db_threadid_t, char *)); +{ + if (thread_id_stringp != NULL) + *thread_id_stringp = dbenv->thread_id_string; + return (0); +} + +/* + * __env_set_threadid_string -- + * DB_ENV->set_threadid_string + */ +static int +__env_set_thread_id_string(dbenv, thread_id_string) + DB_ENV *dbenv; + char *(*thread_id_string) __P((DB_ENV *, pid_t, db_threadid_t, char *)); +{ + dbenv->thread_id_string = thread_id_string; + return (0); +} + +/* + * __env_get_isalive -- + * DB_ENV->get_isalive + */ +static int +__env_get_isalive(dbenv, is_alivep) + DB_ENV *dbenv; + int (**is_alivep) __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t)); +{ + ENV *env; + + env = dbenv->env; + + if (F_ISSET(env, ENV_OPEN_CALLED) && env->thr_nbucket == 0) { + __db_errx(env, DB_STR("1562", + "is_alive method specified but no thread region allocated")); + return (EINVAL); + } + if (is_alivep != NULL) + *is_alivep = dbenv->is_alive; + return (0); +} + +/* + * __env_set_isalive -- + * DB_ENV->set_isalive + */ +static int +__env_set_isalive(dbenv, is_alive) + DB_ENV *dbenv; + int (*is_alive) __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t)); +{ + ENV *env; + + env = dbenv->env; + + if (F_ISSET(env, ENV_OPEN_CALLED) && env->thr_nbucket == 0) { + __db_errx(env, DB_STR("1563", + "is_alive method specified but no thread region allocated")); + return (EINVAL); + } + dbenv->is_alive = is_alive; + return (0); +} + +/* + * __env_get_thread_count -- + * DB_ENV->get_thread_count + */ +static int +__env_get_thread_count(dbenv, countp) + DB_ENV *dbenv; + u_int32_t *countp; +{ + *countp = dbenv->thr_max; + return (0); +} + +/* + * __env_set_thread_count -- + * DB_ENV->set_thread_count + * + * PUBLIC: int __env_set_thread_count __P((DB_ENV *, u_int32_t)); + */ +int +__env_set_thread_count(dbenv, count) + DB_ENV *dbenv; + u_int32_t count; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_thread_count"); + dbenv->thr_max = count; + + return (0); +} + +/* + * __env_get_msgcall -- + * {DB_ENV,DB}->get_msgcall. + * + * PUBLIC: void __env_get_msgcall + * PUBLIC: __P((DB_ENV *, void (**)(const DB_ENV *, const char *))); + */ +void +__env_get_msgcall(dbenv, msgcallp) + DB_ENV *dbenv; + void (**msgcallp) __P((const DB_ENV *, const char *)); +{ + if (msgcallp != NULL) + *msgcallp = dbenv->db_msgcall; +} + +/* + * __env_set_msgcall -- + * {DB_ENV,DB}->set_msgcall. + * + * PUBLIC: void __env_set_msgcall + * PUBLIC: __P((DB_ENV *, void (*)(const DB_ENV *, const char *))); + */ +void +__env_set_msgcall(dbenv, msgcall) + DB_ENV *dbenv; + void (*msgcall) __P((const DB_ENV *, const char *)); +{ + dbenv->db_msgcall = msgcall; +} + +/* + * __env_get_msgfile -- + * {DB_ENV,DB}->get_msgfile. + * + * PUBLIC: void __env_get_msgfile __P((DB_ENV *, FILE **)); + */ +void +__env_get_msgfile(dbenv, msgfilep) + DB_ENV *dbenv; + FILE **msgfilep; +{ + *msgfilep = dbenv->db_msgfile; +} + +/* + * __env_set_msgfile -- + * {DB_ENV,DB}->set_msgfile. + * + * PUBLIC: void __env_set_msgfile __P((DB_ENV *, FILE *)); + */ +void +__env_set_msgfile(dbenv, msgfile) + DB_ENV *dbenv; + FILE *msgfile; +{ + dbenv->db_msgfile = msgfile; +} + +/* + * __env_set_paniccall -- + * {DB_ENV,DB}->set_paniccall. + * + * PUBLIC: int __env_set_paniccall __P((DB_ENV *, void (*)(DB_ENV *, int))); + */ +int +__env_set_paniccall(dbenv, paniccall) + DB_ENV *dbenv; + void (*paniccall) __P((DB_ENV *, int)); +{ + dbenv->db_paniccall = paniccall; + return (0); +} + +/* + * __env_set_event_notify -- + * DB_ENV->set_event_notify. + */ +static int +__env_set_event_notify(dbenv, event_func) + DB_ENV *dbenv; + void (*event_func) __P((DB_ENV *, u_int32_t, void *)); +{ + dbenv->db_event_func = event_func; + return (0); +} + +static int +__env_get_shm_key(dbenv, shm_keyp) + DB_ENV *dbenv; + long *shm_keyp; /* !!!: really a key_t *. */ +{ + *shm_keyp = dbenv->shm_key; + return (0); +} + +/* + * __env_set_shm_key -- + * DB_ENV->set_shm_key. + * + * PUBLIC: int __env_set_shm_key __P((DB_ENV *, long)); + */ +int +__env_set_shm_key(dbenv, shm_key) + DB_ENV *dbenv; + long shm_key; /* !!!: really a key_t. */ +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_shm_key"); + + dbenv->shm_key = shm_key; + return (0); +} + +static int +__env_get_tmp_dir(dbenv, dirp) + DB_ENV *dbenv; + const char **dirp; +{ + *dirp = dbenv->db_tmp_dir; + return (0); +} + +/* + * __env_set_tmp_dir -- + * DB_ENV->set_tmp_dir. + * + * PUBLIC: int __env_set_tmp_dir __P((DB_ENV *, const char *)); + */ +int +__env_set_tmp_dir(dbenv, dir) + DB_ENV *dbenv; + const char *dir; +{ + ENV *env; + + env = dbenv->env; + + if (dbenv->db_tmp_dir != NULL) + __os_free(env, dbenv->db_tmp_dir); + return (__os_strdup(env, dir, &dbenv->db_tmp_dir)); +} + +static int +__env_get_verbose(dbenv, which, onoffp) + DB_ENV *dbenv; + u_int32_t which; + int *onoffp; +{ + switch (which) { + case DB_VERB_DEADLOCK: + case DB_VERB_FILEOPS: + case DB_VERB_FILEOPS_ALL: + case DB_VERB_RECOVERY: + case DB_VERB_REGISTER: + case DB_VERB_REPLICATION: + case DB_VERB_REP_ELECT: + case DB_VERB_REP_LEASE: + case DB_VERB_REP_MISC: + case DB_VERB_REP_MSGS: + case DB_VERB_REP_SYNC: + case DB_VERB_REP_SYSTEM: + case DB_VERB_REP_TEST: + case DB_VERB_REPMGR_CONNFAIL: + case DB_VERB_REPMGR_MISC: + case DB_VERB_WAITSFOR: + *onoffp = FLD_ISSET(dbenv->verbose, which) ? 1 : 0; + break; + default: + return (EINVAL); + } + return (0); +} + +/* + * __env_set_verbose -- + * DB_ENV->set_verbose. + * + * PUBLIC: int __env_set_verbose __P((DB_ENV *, u_int32_t, int)); + */ +int +__env_set_verbose(dbenv, which, on) + DB_ENV *dbenv; + u_int32_t which; + int on; +{ + switch (which) { + case DB_VERB_DEADLOCK: + case DB_VERB_FILEOPS: + case DB_VERB_FILEOPS_ALL: + case DB_VERB_RECOVERY: + case DB_VERB_REGISTER: + case DB_VERB_REPLICATION: + case DB_VERB_REP_ELECT: + case DB_VERB_REP_LEASE: + case DB_VERB_REP_MISC: + case DB_VERB_REP_MSGS: + case DB_VERB_REP_SYNC: + case DB_VERB_REP_SYSTEM: + case DB_VERB_REP_TEST: + case DB_VERB_REPMGR_CONNFAIL: + case DB_VERB_REPMGR_MISC: + case DB_VERB_WAITSFOR: + if (on) + FLD_SET(dbenv->verbose, which); + else + FLD_CLR(dbenv->verbose, which); + break; + default: + return (EINVAL); + } + return (0); +} + +/* + * __db_mi_env -- + * Method illegally called with public environment. + * + * PUBLIC: int __db_mi_env __P((ENV *, const char *)); + */ +int +__db_mi_env(env, name) + ENV *env; + const char *name; +{ + __db_errx(env, DB_STR_A("1564", + "%s: method not permitted when environment specified", "%s"), + name); + return (EINVAL); +} + +/* + * __db_mi_open -- + * Method illegally called after open. + * + * PUBLIC: int __db_mi_open __P((ENV *, const char *, int)); + */ +int +__db_mi_open(env, name, after) + ENV *env; + const char *name; + int after; +{ + __db_errx(env, DB_STR_A("1565", + "%s: method not permitted %s handle's open method", "%s %s"), + name, after ? DB_STR_P("after") : DB_STR_P("before")); + return (EINVAL); +} + +/* + * __env_not_config -- + * Method or function called without required configuration. + * + * PUBLIC: int __env_not_config __P((ENV *, char *, u_int32_t)); + */ +int +__env_not_config(env, i, flags) + ENV *env; + char *i; + u_int32_t flags; +{ + char *sub; + int is_sub; + + is_sub = 1; + + switch (flags) { + case DB_INIT_CDB: + sub = "DB_INIT_CDB"; + is_sub = 0; + break; + case DB_INIT_LOCK: + sub = "locking"; + break; + case DB_INIT_LOG: + sub = "logging"; + break; + case DB_INIT_MPOOL: + sub = "memory pool"; + break; + case DB_INIT_MUTEX: + sub = "mutex"; + break; + case DB_INIT_REP: + sub = "replication"; + break; + case DB_INIT_TXN: + sub = "transaction"; + break; + default: + sub = ""; + break; + } + + if (is_sub) { + __db_errx(env, DB_STR_A("1566", + "%s interface requires an environment configured for the %s subsystem", + "%s %s"), i, sub); + } else { + __db_errx(env, DB_STR_A("1587", + "%s interface requires an environment configured with %s", + "%s %s"), i, sub); + } + + return (EINVAL); +} + +/* + * __env_get_timeout -- + * DB_ENV->get_timeout + */ +static int +__env_get_timeout(dbenv, timeoutp, flags) + DB_ENV *dbenv; + db_timeout_t *timeoutp; + u_int32_t flags; +{ + int ret; + + ret = 0; + if (flags == DB_SET_REG_TIMEOUT) { + *timeoutp = dbenv->envreg_timeout; + } else + ret = __lock_get_env_timeout(dbenv, timeoutp, flags); + return (ret); +} + +/* + * __env_set_timeout -- + * DB_ENV->set_timeout + * + * PUBLIC: int __env_set_timeout __P((DB_ENV *, db_timeout_t, u_int32_t)); + */ +int +__env_set_timeout(dbenv, timeout, flags) + DB_ENV *dbenv; + db_timeout_t timeout; + u_int32_t flags; +{ + int ret; + + ret = 0; + if (flags == DB_SET_REG_TIMEOUT) + dbenv->envreg_timeout = timeout; + else + ret = __lock_set_env_timeout(dbenv, timeout, flags); + return (ret); +} diff --git a/src/env/env_name.c b/src/env/env_name.c new file mode 100644 index 00000000..a57dc275 --- /dev/null +++ b/src/env/env_name.c @@ -0,0 +1,281 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +static int __db_fullpath + __P((ENV *, const char *, const char *, int, int, char **)); + +#define DB_ADDSTR(add) { \ + /* \ + * The string might be NULL or zero-length, and the p[-1] \ + * might indirect to before the beginning of our buffer. \ + */ \ + if ((add) != NULL && (add)[0] != '\0') { \ + /* If leading slash, start over. */ \ + if (__os_abspath(add)) { \ + p = str; \ + slash = 0; \ + } \ + /* Append to the current string. */ \ + len = strlen(add); \ + if (slash) \ + *p++ = PATH_SEPARATOR[0]; \ + memcpy(p, add, len); \ + p += len; \ + slash = strchr(PATH_SEPARATOR, p[-1]) == NULL; \ + } \ +} + +/* + * __db_fullpath -- + * Constructs a path name relative to the environment home, and optionally + * checks whether the file or directory exist. + */ +static int +__db_fullpath(env, dir, file, check_file, check_dir, namep) + ENV *env; + const char *dir; + const char *file; + int check_file; + int check_dir; + char **namep; +{ + size_t len; + const char *home; + char *p, *str; + int isdir, ret, slash; + + /* All paths are relative to the environment home. */ + home = (env == NULL) ? NULL : env->db_home; + + len = + (home == NULL ? 0 : strlen(home) + 1) + + (dir == NULL ? 0 : strlen(dir) + 1) + + (file == NULL ? 0 : strlen(file) + 1); + + if ((ret = __os_malloc(env, len, &str)) != 0) + return (ret); + + slash = 0; + p = str; + DB_ADDSTR(home); + DB_ADDSTR(dir); + *p = '\0'; + if (check_dir && (__os_exists(env, str, &isdir) != 0 || !isdir)) { + __os_free(env, str); + return (ENOENT); + } + DB_ADDSTR(file); + *p = '\0'; + + /* + * If we're opening a data file, see if it exists. If not, keep + * trying. + */ + if (check_file && __os_exists(env, str, NULL) != 0) { + __os_free(env, str); + return (ENOENT); + } + + if (namep == NULL) + __os_free(env, str); + else + *namep = str; + return (0); +} + +#define DB_CHECKFILE(file, dir, check_file, check_dir, namep, ret_dir) do { \ + ret = __db_fullpath(env, dir, file, \ + check_file, check_dir, namep); \ + if (ret == 0 && (ret_dir) != NULL) \ + *(ret_dir) = (dir); \ + if (ret != ENOENT) \ + return (ret); \ +} while (0) + +/* + * __db_appname -- + * Given an optional DB environment, directory and file name and type + * of call, build a path based on the ENV->open rules, and return + * it in allocated space. Dirp can be used to specify a data directory + * to use. If not and one is used then drip will contain a pointer + * to the directory name. + * + * PUBLIC: int __db_appname __P((ENV *, APPNAME, + * PUBLIC: const char *, const char **, char **)); + */ +int +__db_appname(env, appname, file, dirp, namep) + ENV *env; + APPNAME appname; + const char *file; + const char **dirp; + char **namep; +{ + DB_ENV *dbenv; + char **ddp; + const char *dir; + int ret; + + dbenv = env->dbenv; + dir = NULL; + + if (namep != NULL) + *namep = NULL; + + /* + * Absolute path names are never modified. If the file is an absolute + * path, we're done. + */ + if (file != NULL && __os_abspath(file)) + return (__os_strdup(env, file, namep)); + + /* + * DB_APP_NONE: + * DB_HOME/file + * DB_APP_DATA: + * DB_HOME/DB_DATA_DIR/file + * DB_APP_LOG: + * DB_HOME/DB_LOG_DIR/file + * DB_APP_TMP: + * DB_HOME/DB_TMP_DIR/ + */ + switch (appname) { + case DB_APP_NONE: + break; + case DB_APP_RECOVER: + case DB_APP_DATA: + /* + * First, step through the data_dir entries, if any, looking + * for the file. + */ + if (dbenv != NULL && dbenv->db_data_dir != NULL) + for (ddp = dbenv->db_data_dir; *ddp != NULL; ddp++) + DB_CHECKFILE(file, *ddp, 1, 0, namep, dirp); + + /* Second, look in the environment home directory. */ + DB_CHECKFILE(file, NULL, 1, 0, namep, dirp); + + /* + * Otherwise, we're going to create. Use the specified + * directory unless we're in recovery and it doesn't exist. + */ + if (dirp != NULL && *dirp != NULL) + DB_CHECKFILE(file, *dirp, 0, + appname == DB_APP_RECOVER, namep, dirp); + + /* Finally, use the create directory, if set. */ + if (dbenv != NULL && dbenv->db_create_dir != NULL) + dir = dbenv->db_create_dir; + break; + case DB_APP_LOG: + if (dbenv != NULL) + dir = dbenv->db_log_dir; + break; + case DB_APP_TMP: + if (dbenv != NULL) + dir = dbenv->db_tmp_dir; + break; + } + + /* + * Construct the full path. For temporary files, it is an error if the + * directory does not exist: if it doesn't, checking whether millions + * of temporary files exist inside it takes a *very* long time. + */ + DB_CHECKFILE(file, dir, 0, appname == DB_APP_TMP, namep, dirp); + + return (ret); +} + +/* + * __db_tmp_open -- + * Create a temporary file. + * + * PUBLIC: int __db_tmp_open __P((ENV *, u_int32_t, DB_FH **)); + */ +int +__db_tmp_open(env, oflags, fhpp) + ENV *env; + u_int32_t oflags; + DB_FH **fhpp; +{ + pid_t pid; + int filenum, i, ipid, ret; + char *path; + char *firstx, *trv; + + DB_ASSERT(env, fhpp != NULL); + *fhpp = NULL; + +#define DB_TRAIL "BDBXXXXX" + if ((ret = __db_appname(env, DB_APP_TMP, DB_TRAIL, NULL, &path)) != 0) + goto done; + + /* Replace the X's with the process ID (in decimal). */ + __os_id(env->dbenv, &pid, NULL); + ipid = (int)pid; + if (ipid < 0) + ipid = -ipid; + for (trv = path + strlen(path); *--trv == 'X'; ipid /= 10) + *trv = '0' + (u_char)(ipid % 10); + firstx = trv + 1; + + /* Loop, trying to open a file. */ + for (filenum = 1;; filenum++) { + if ((ret = __os_open(env, path, 0, + oflags | DB_OSO_CREATE | DB_OSO_EXCL | DB_OSO_TEMP, + DB_MODE_600, fhpp)) == 0) { + ret = 0; + goto done; + } + + /* + * !!!: + * If we don't get an EEXIST error, then there's something + * seriously wrong. Unfortunately, if the implementation + * doesn't return EEXIST for O_CREAT and O_EXCL regardless + * of other possible errors, we've lost. + */ + if (ret != EEXIST) { + __db_err(env, ret, DB_STR_A("1586", + "temporary open: %s", "%s"), path); + goto done; + } + + /* + * Generate temporary file names in a backwards-compatible way. + * If pid == 12345, the result is: + * /DB12345 (tried above, the first time through). + * /DBa2345 ... /DBz2345 + * /DBaa345 ... /DBaz345 + * /DBba345, and so on. + * + * XXX + * This algorithm is O(n**2) -- that is, creating 100 temporary + * files requires 5,000 opens, creating 1000 files requires + * 500,000. If applications open a lot of temporary files, we + * could improve performance by switching to timestamp-based + * file names. + */ + for (i = filenum, trv = firstx; i > 0; i = (i - 1) / 26) + if (*trv++ == '\0') { + ret = EINVAL; + goto done; + } + + for (i = filenum; i > 0; i = (i - 1) / 26) + *--trv = 'a' + ((i - 1) % 26); + } +done: + __os_free(env, path); + return (ret); +} diff --git a/src/env/env_open.c b/src/env/env_open.c new file mode 100644 index 00000000..a9e4cdc5 --- /dev/null +++ b/src/env/env_open.c @@ -0,0 +1,1234 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __env_open_arg __P((DB_ENV *, u_int32_t)); +static int __file_handle_cleanup __P((ENV *)); + +/* + * db_version -- + * Return legacy version information, including DB Major Version, + * DB Minor Version, and DB Patch/Build numbers. + * + * EXTERN: char *db_version __P((int *, int *, int *)); + */ +char * +db_version(majverp, minverp, patchp) + int *majverp, *minverp, *patchp; +{ + if (majverp != NULL) + *majverp = DB_VERSION_MAJOR; + if (minverp != NULL) + *minverp = DB_VERSION_MINOR; + if (patchp != NULL) + *patchp = DB_VERSION_PATCH; + return ((char *)DB_VERSION_STRING); +} + +/* + * db_full_version -- + * Return complete version information, including Oracle Family, + * Oracle Release, DB Major Version, DB Minor Version, and DB + * Patch/Build numbers. + * + * EXTERN: char *db_full_version __P((int *, int *, int *, int *, int *)); + */ +char * +db_full_version(familyp, releasep, majverp, minverp, patchp) + int *familyp, *releasep, *majverp, *minverp, *patchp; +{ + if (familyp != NULL) + *familyp = DB_VERSION_FAMILY; + if (releasep != NULL) + *releasep = DB_VERSION_RELEASE; + if (majverp != NULL) + *majverp = DB_VERSION_MAJOR; + if (minverp != NULL) + *minverp = DB_VERSION_MINOR; + if (patchp != NULL) + *patchp = DB_VERSION_PATCH; + return ((char *)DB_VERSION_FULL_STRING); +} + +/* + * __env_open_pp -- + * DB_ENV->open pre/post processing. + * + * PUBLIC: int __env_open_pp __P((DB_ENV *, const char *, u_int32_t, int)); + */ +int +__env_open_pp(dbenv, db_home, flags, mode) + DB_ENV *dbenv; + const char *db_home; + u_int32_t flags; + int mode; +{ + ENV *env; + int ret; + + env = dbenv->env; + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->open"); + +#undef OKFLAGS +#define OKFLAGS \ + (DB_CREATE | DB_FAILCHK | DB_FAILCHK_ISALIVE | DB_INIT_CDB | \ + DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_REP | \ + DB_INIT_TXN | DB_LOCKDOWN | DB_NO_CHECKPOINT | DB_PRIVATE | \ + DB_RECOVER | DB_RECOVER_FATAL | DB_REGISTER | DB_SYSTEM_MEM | \ + DB_THREAD | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT) +#undef OKFLAGS_CDB +#define OKFLAGS_CDB \ + (DB_CREATE | DB_INIT_CDB | DB_INIT_MPOOL | DB_LOCKDOWN | \ + DB_PRIVATE | DB_SYSTEM_MEM | DB_THREAD | \ + DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT) + + if ((ret = __db_fchk(env, "DB_ENV->open", flags, OKFLAGS)) != 0) + return (ret); + if ((ret = __db_fcchk( + env, "DB_ENV->open", flags, DB_INIT_CDB, ~OKFLAGS_CDB)) != 0) + return (ret); + +#if defined(HAVE_MIXED_SIZE_ADDRESSING) && (SIZEOF_CHAR_P == 8) + if (F_ISSET(env, DB_PRIVATE)) { + __db_errx(env, DB_STR("1589", "DB_PRIVATE is not " + "supported by 64-bit applications in " + "mixed-size-addressing mode")); + return (EINVAL); + } +#endif + + return (__env_open(dbenv, db_home, flags, mode)); +} + +/* + * __env_open -- + * DB_ENV->open. + * + * PUBLIC: int __env_open __P((DB_ENV *, const char *, u_int32_t, int)); + */ +int +__env_open(dbenv, db_home, flags, mode) + DB_ENV *dbenv; + const char *db_home; + u_int32_t flags; + int mode; +{ + DB_THREAD_INFO *ip; + ENV *env; + u_int32_t orig_flags; + int register_recovery, ret, t_ret; + + ip = NULL; + env = dbenv->env; + register_recovery = 0; + + /* Initial configuration. */ + if ((ret = __env_config(dbenv, db_home, &flags, mode)) != 0) + return (ret); + + /* + * Save the DB_ENV handle's configuration flags as set by user-called + * configuration methods and the environment directory's DB_CONFIG + * file. If we use this DB_ENV structure to recover the existing + * environment or to remove an environment we created after failure, + * we'll restore the DB_ENV flags to these values. + */ + orig_flags = dbenv->flags; + + /* Check open flags. */ + if ((ret = __env_open_arg(dbenv, flags)) != 0) + return (ret); + + /* + * If we're going to register with the environment, that's the first + * thing we do. + */ + if (LF_ISSET(DB_REGISTER)) { + /* + * Through the SQL interface (btree.c) we set + * DB_FAILCHK_ISALIVE. When set, we want to run failchk + * if a recovery is needed. Set up the infrastructure to run + * it. SQL applications have no way to specify the thread + * count or an isalive, so force it here. Failchk is run + * inside of register code. + */ + if (LF_ISSET(DB_FAILCHK_ISALIVE)) { + (void)__env_set_thread_count(dbenv, 50); + dbenv->is_alive = __envreg_isalive; + } + + if ((ret = + __envreg_register(env, ®ister_recovery, flags)) != 0) + goto err; + if (register_recovery) { + if (!LF_ISSET(DB_RECOVER)) { + __db_errx(env, DB_STR("1567", + "The DB_RECOVER flag was not specified, and recovery is needed")); + ret = DB_RUNRECOVERY; + goto err; + } + } else + LF_CLR(DB_RECOVER); + } + + /* + * If we're doing recovery, destroy the environment so that we create + * all the regions from scratch. The major concern I have is if the + * application stomps the environment with a rogue pointer. We have + * no way of detecting that, and we could be forced into a situation + * where we start up and then crash, repeatedly. + * + * We do not check any flags like DB_PRIVATE before calling remove. + * We don't care if the current environment was private or not, we + * want to remove files left over for any reason, from any session. + */ + if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) +#ifdef HAVE_REPLICATION + if ((ret = __rep_reset_init(env)) != 0 || + (ret = __env_remove_env(env)) != 0 || +#else + if ((ret = __env_remove_env(env)) != 0 || +#endif + (ret = __env_refresh(dbenv, orig_flags, 0)) != 0) + goto err; + + if ((ret = __env_attach_regions(dbenv, flags, orig_flags, 1)) != 0) + goto err; + + /* + * After attached to env, run failchk if not doing register + * recovery. Not providing this option with the DB_FAILCHK_ISALIVE + * flag. + */ + if (LF_ISSET(DB_FAILCHK) && !register_recovery) { + ENV_ENTER(env, ip); + if ((ret = __env_failchk_int(dbenv)) != 0) + goto err; + ENV_LEAVE(env, ip); + } + +err: if (ret != 0) + (void)__env_refresh(dbenv, orig_flags, 0); + + if (register_recovery) { + /* + * If recovery succeeded, release our exclusive lock, other + * processes can now proceed. + * + * If recovery failed, unregister now and let another process + * clean up. + */ + if (ret == 0 && (t_ret = __envreg_xunlock(env)) != 0) + ret = t_ret; + if (ret != 0) + (void)__envreg_unregister(env, 1); + } + + return (ret); +} + +/* + * __env_open_arg -- + * DB_ENV->open flags checking. + */ +static int +__env_open_arg(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + ENV *env; + int ret; + + env = dbenv->env; + ret = 0; + + if (LF_ISSET(DB_REGISTER)) { + if (!__os_support_db_register()) { + __db_errx(env, DB_STR("1568", + "Berkeley DB library does not support DB_REGISTER on this system")); + return (EINVAL); + } + if ((ret = __db_fcchk(env, "DB_ENV->open", flags, + DB_PRIVATE, DB_REGISTER | DB_SYSTEM_MEM)) != 0) + return (ret); + if (LF_ISSET(DB_CREATE) && !LF_ISSET(DB_INIT_TXN)) { + __db_errx(env, DB_STR("1569", + "registration requires transaction support")); + return (EINVAL); + } + } + /* + * Only check for flags compatible with DB_INIT_REP when creating + * since otherwise it'll be ignored anyway. + */ + if (LF_ISSET(DB_INIT_REP) && LF_ISSET(DB_CREATE)) { + if (!__os_support_replication()) { + __db_errx(env, DB_STR("1570", + "Berkeley DB library does not support replication on this system")); + return (EINVAL); + } + if (!LF_ISSET(DB_INIT_LOCK)) { + __db_errx(env, DB_STR("1571", + "replication requires locking support")); + return (EINVAL); + } + if (!LF_ISSET(DB_INIT_TXN)) { + __db_errx(env, DB_STR("1572", + "replication requires transaction support")); + return (EINVAL); + } + } + if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) { + if ((ret = __db_fcchk(env, + "DB_ENV->open", flags, DB_RECOVER, DB_RECOVER_FATAL)) != 0) + return (ret); + if ((ret = __db_fcchk(env, + "DB_ENV->open", flags, DB_REGISTER, DB_RECOVER_FATAL)) != 0) + return (ret); + if (!LF_ISSET(DB_CREATE)) { + __db_errx(env, DB_STR("1573", + "recovery requires the create flag")); + return (EINVAL); + } + if (!LF_ISSET(DB_INIT_TXN)) { + __db_errx(env, DB_STR("1574", + "recovery requires transaction support")); + return (EINVAL); + } + } + if (LF_ISSET(DB_FAILCHK)) { + if (!ALIVE_ON(env)) { + __db_errx(env, DB_STR("1575", + "DB_FAILCHK requires DB_ENV->is_alive be configured")); + return (EINVAL); + } + if (dbenv->thr_max == 0) { + __db_errx(env, DB_STR("1576", + "DB_FAILCHK requires DB_ENV->set_thread_count be configured")); + return (EINVAL); + } + } + +#ifdef HAVE_MUTEX_THREAD_ONLY + /* + * Currently we support one kind of mutex that is intra-process only, + * POSIX 1003.1 pthreads, because a variety of systems don't support + * the full pthreads API, and our only alternative is test-and-set. + */ + if (!LF_ISSET(DB_PRIVATE)) { + __db_errx(env, DB_STR("1577", + "Berkeley DB library configured to support only private environments")); + return (EINVAL); + } +#endif + +#ifdef HAVE_MUTEX_FCNTL + /* + * !!! + * We need a file descriptor for fcntl(2) locking. We use the file + * handle from the REGENV file for this purpose. + * + * Since we may be using shared memory regions, e.g., shmget(2), and + * not a mapped-in regular file, the backing file may be only a few + * bytes in length. So, this depends on the ability to call fcntl to + * lock file offsets much larger than the actual physical file. I + * think that's safe -- besides, very few systems actually need this + * kind of support, SunOS is the only one still in wide use of which + * I'm aware. + * + * The error case is if an application lacks spinlocks and wants to be + * threaded. That doesn't work because fcntl will lock the underlying + * process, including all its threads. + */ + if (F_ISSET(env, ENV_THREAD)) { + __db_errx(env, DB_STR("1578", + "architecture lacks fast mutexes: applications cannot be threaded")); + return (EINVAL); + } +#endif + return (ret); +} + +/* + * __env_remove -- + * DB_ENV->remove. + * + * PUBLIC: int __env_remove __P((DB_ENV *, const char *, u_int32_t)); + */ +int +__env_remove(dbenv, db_home, flags) + DB_ENV *dbenv; + const char *db_home; + u_int32_t flags; +{ + ENV *env; + int ret, t_ret; + + env = dbenv->env; + +#undef OKFLAGS +#define OKFLAGS \ + (DB_FORCE | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT) + + /* Validate arguments. */ + if ((ret = __db_fchk(env, "DB_ENV->remove", flags, OKFLAGS)) != 0) + return (ret); + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->remove"); + + if ((ret = __env_config(dbenv, db_home, &flags, 0)) != 0) + return (ret); + + /* + * Turn the environment off -- if the environment is corrupted, this + * could fail. Ignore any error if we're forcing the question. + */ + if ((ret = __env_turn_off(env, flags)) == 0 || LF_ISSET(DB_FORCE)) + ret = __env_remove_env(env); + + if ((t_ret = __env_close(dbenv, 0)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __env_config -- + * Argument-based initialization. + * + * PUBLIC: int __env_config __P((DB_ENV *, const char *, u_int32_t *, int)); + */ +int +__env_config(dbenv, db_home, flagsp, mode) + DB_ENV *dbenv; + const char *db_home; + u_int32_t *flagsp; + int mode; +{ + ENV *env; + int ret; + u_int32_t flags; + char *home, home_buf[DB_MAXPATHLEN]; + + env = dbenv->env; + flags = *flagsp; + + /* + * Set the database home. + * + * Use db_home by default, this allows utilities to reasonably + * override the environment either explicitly or by using a -h + * option. Otherwise, use the environment if it's permitted + * and initialized. + */ + home = (char *)db_home; + if (home == NULL && (LF_ISSET(DB_USE_ENVIRON) || + (LF_ISSET(DB_USE_ENVIRON_ROOT) && __os_isroot()))) { + home = home_buf; + if ((ret = __os_getenv( + env, "DB_HOME", &home, sizeof(home_buf))) != 0) + return (ret); + /* + * home set to NULL if __os_getenv failed to find DB_HOME. + */ + } + if (home != NULL) { + if (env->db_home != NULL) + __os_free(env, env->db_home); + if ((ret = __os_strdup(env, home, &env->db_home)) != 0) + return (ret); + } + + /* Save a copy of the DB_ENV->open method flags. */ + env->open_flags = flags; + + /* Default permissions are read-write for both owner and group. */ + env->db_mode = mode == 0 ? DB_MODE_660 : mode; + + /* Read the DB_CONFIG file. */ + if ((ret = __env_read_db_config(env)) != 0) + return (ret); + + /* + * Update the DB_ENV->open method flags. The copy of the flags might + * have been changed during reading DB_CONFIG file. + */ + flags = env->open_flags; + + /* + * If no temporary directory path was specified in the config file, + * choose one. + */ + if (dbenv->db_tmp_dir == NULL && (ret = __os_tmpdir(env, flags)) != 0) + return (ret); + + *flagsp = flags; + return (0); +} + +/* + * __env_close_pp -- + * DB_ENV->close pre/post processor. + * + * PUBLIC: int __env_close_pp __P((DB_ENV *, u_int32_t)); + */ +int +__env_close_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int rep_check, ret, t_ret; + u_int32_t close_flags, flags_orig; + + env = dbenv->env; + ret = 0; + close_flags = flags_orig = 0; + + /* + * Validate arguments, but as a DB_ENV handle destructor, we can't + * fail. + */ + if (flags != 0 && flags != DB_FORCESYNC && + (t_ret = __db_ferr(env, "DB_ENV->close", 0)) != 0 && ret == 0) + ret = t_ret; + +#define DBENV_FORCESYNC 0x00000001 +#define DBENV_CLOSE_REPCHECK 0x00000010 + if (flags == DB_FORCESYNC) + close_flags |= DBENV_FORCESYNC; + + /* + * If the environment has panic'd, all we do is try and discard + * the important resources. + */ + if (PANIC_ISSET(env)) { + /* clean up from registry file */ + if (dbenv->registry != NULL) { + /* + * Temporarily set no panic so we do not trigger the + * LAST_PANIC_CHECK_BEFORE_IO check in __os_physwr + * thus allowing the unregister to happen correctly. + */ + flags_orig = F_ISSET(dbenv, DB_ENV_NOPANIC); + F_SET(dbenv, DB_ENV_NOPANIC); + (void)__envreg_unregister(env, 0); + dbenv->registry = NULL; + if (!flags_orig) + F_CLR(dbenv, DB_ENV_NOPANIC); + } + + /* Close all underlying threads and sockets. */ + if (IS_ENV_REPLICATED(env)) + (void)__repmgr_close(env); + + /* Close all underlying file handles. */ + (void)__file_handle_cleanup(env); + + PANIC_CHECK(env); + } + + ENV_ENTER(env, ip); + + rep_check = IS_ENV_REPLICATED(env) ? 1 : 0; + if (rep_check) { +#ifdef HAVE_REPLICATION_THREADS + /* + * Shut down Replication Manager threads first of all. This + * must be done before __env_rep_enter to avoid a deadlock that + * could occur if repmgr's background threads try to do a rep + * operation that needs __rep_lockout. + */ + if ((t_ret = __repmgr_close(env)) != 0 && ret == 0) + ret = t_ret; +#endif + if ((t_ret = __env_rep_enter(env, 0)) != 0 && ret == 0) + ret = t_ret; + } + + if (rep_check) + close_flags |= DBENV_CLOSE_REPCHECK; + if ((t_ret = __env_close(dbenv, close_flags)) != 0 && ret == 0) + ret = t_ret; + + /* Don't ENV_LEAVE as we have already detached from the region. */ + return (ret); +} + +/* + * __env_close -- + * DB_ENV->close. + * + * PUBLIC: int __env_close __P((DB_ENV *, u_int32_t)); + */ +int +__env_close(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + DB *dbp; + ENV *env; + int ret, rep_check, t_ret; + char **p; + u_int32_t close_flags; + + env = dbenv->env; + ret = 0; + close_flags = LF_ISSET(DBENV_FORCESYNC) ? 0 : DB_NOSYNC; + rep_check = LF_ISSET(DBENV_CLOSE_REPCHECK); + + /* + * Check to see if we were in the middle of restoring transactions and + * need to close the open files. + */ + if (TXN_ON(env) && (t_ret = __txn_preclose(env)) != 0 && ret == 0) + ret = t_ret; + +#ifdef HAVE_REPLICATION + if ((t_ret = __rep_env_close(env)) != 0 && ret == 0) + ret = t_ret; +#endif + + /* + * Close all databases opened in this environment after the rep region + * is closed. Rep region's internal database is already closed now. + */ + while ((dbp = TAILQ_FIRST(&env->dblist)) != NULL) { + /* + * Note down and ignore the error code. Since we can't do + * anything about the dbp handle anyway if the close + * operation fails. But we want to return the error to the + * caller. This is how this function takes care of various + * close operation errors. + */ + if (dbp->alt_close != NULL) + t_ret = dbp->alt_close(dbp, close_flags); + else + t_ret = __db_close(dbp, NULL, close_flags); + if (t_ret != 0 && ret == 0) + ret = t_ret; + } + + /* + * Detach from the regions and undo the allocations done by + * DB_ENV->open. + */ + if ((t_ret = __env_refresh(dbenv, 0, rep_check)) != 0 && ret == 0) + ret = t_ret; + +#ifdef HAVE_CRYPTO + /* + * Crypto comes last, because higher level close functions need + * cryptography. + */ + if ((t_ret = __crypto_env_close(env)) != 0 && ret == 0) + ret = t_ret; +#endif + + /* If we're registered, clean up. */ + if (dbenv->registry != NULL) { + (void)__envreg_unregister(env, 0); + dbenv->registry = NULL; + } + + /* Check we've closed all underlying file handles. */ + if ((t_ret = __file_handle_cleanup(env)) != 0 && ret == 0) + ret = t_ret; + + /* Release any string-based configuration parameters we've copied. */ + if (dbenv->db_log_dir != NULL) + __os_free(env, dbenv->db_log_dir); + dbenv->db_log_dir = NULL; + if (dbenv->db_tmp_dir != NULL) + __os_free(env, dbenv->db_tmp_dir); + dbenv->db_tmp_dir = NULL; + if (dbenv->db_data_dir != NULL) { + for (p = dbenv->db_data_dir; *p != NULL; ++p) + __os_free(env, *p); + __os_free(env, dbenv->db_data_dir); + dbenv->db_data_dir = NULL; + dbenv->data_next = 0; + } + if (dbenv->intermediate_dir_mode != NULL) + __os_free(env, dbenv->intermediate_dir_mode); + if (env->db_home != NULL) { + __os_free(env, env->db_home); + env->db_home = NULL; + } + + /* Discard the structure. */ + __db_env_destroy(dbenv); + + return (ret); +} + +/* + * __env_refresh -- + * Refresh the DB_ENV structure. + * PUBLIC: int __env_refresh __P((DB_ENV *, u_int32_t, int)); + */ +int +__env_refresh(dbenv, orig_flags, rep_check) + DB_ENV *dbenv; + u_int32_t orig_flags; + int rep_check; +{ + DB *ldbp; + DB_THREAD_INFO *ip; + ENV *env; + int ret, t_ret; + + env = dbenv->env; + ret = 0; + + /* + * Release resources allocated by DB_ENV->open, and return it to the + * state it was in just before __env_open was called. (This means + * state set by pre-open configuration functions must be preserved.) + * + * Refresh subsystems, in the reverse order they were opened (txn + * must be first, it may want to discard locks and flush the log). + * + * !!! + * Note that these functions, like all of __env_refresh, only undo + * the effects of __env_open. Functions that undo work done by + * db_env_create or by a configuration function should go in + * __env_close. + */ + if (TXN_ON(env) && + (t_ret = __txn_env_refresh(env)) != 0 && ret == 0) + ret = t_ret; + + if (LOGGING_ON(env) && + (t_ret = __log_env_refresh(env)) != 0 && ret == 0) + ret = t_ret; + + /* + * Locking should come after logging, because closing log results + * in files closing which may require locks being released. + */ + if (LOCKING_ON(env)) { + if (!F_ISSET(env, ENV_THREAD) && + env->env_lref != NULL && (t_ret = + __lock_id_free(env, env->env_lref)) != 0 && ret == 0) + ret = t_ret; + env->env_lref = NULL; + + if ((t_ret = __lock_env_refresh(env)) != 0 && ret == 0) + ret = t_ret; + } + + /* Discard the DB_ENV, ENV handle mutexes. */ + if ((t_ret = __mutex_free(env, &dbenv->mtx_db_env)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __mutex_free(env, &env->mtx_env)) != 0 && ret == 0) + ret = t_ret; + + /* + * Discard DB list and its mutex. + * Discard the MT mutex. + * + * !!! + * This must be done after we close the log region, because we close + * database handles and so acquire this mutex when we close log file + * handles. + */ + if (env->db_ref != 0) { + __db_errx(env, DB_STR("1579", + "Database handles still open at environment close")); + TAILQ_FOREACH(ldbp, &env->dblist, dblistlinks) + __db_errx(env, DB_STR_A("1580", + "Open database handle: %s%s%s", "%s %s %s"), + ldbp->fname == NULL ? "unnamed" : ldbp->fname, + ldbp->dname == NULL ? "" : "/", + ldbp->dname == NULL ? "" : ldbp->dname); + if (ret == 0) + ret = EINVAL; + } + TAILQ_INIT(&env->dblist); + if ((t_ret = __mutex_free(env, &env->mtx_dblist)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __mutex_free(env, &env->mtx_mt)) != 0 && ret == 0) + ret = t_ret; + + if (env->mt != NULL) { + __os_free(env, env->mt); + env->mt = NULL; + } + + if (MPOOL_ON(env)) { + /* + * If it's a private environment, flush the contents to disk. + * Recovery would have put everything back together, but it's + * faster and cleaner to flush instead. + * + * Ignore application max-write configuration, we're shutting + * down. + */ + if (F_ISSET(env, ENV_PRIVATE) && + !F_ISSET(dbenv, DB_ENV_NOFLUSH) && + (t_ret = __memp_sync_int(env, NULL, 0, + DB_SYNC_CACHE | DB_SYNC_SUPPRESS_WRITE, NULL, NULL)) != 0 && + ret == 0) + ret = t_ret; + + if ((t_ret = __memp_env_refresh(env)) != 0 && ret == 0) + ret = t_ret; + } + + /* + * If we're included in a shared replication handle count, this + * is our last chance to decrement that count. + * + * !!! + * We can't afford to do anything dangerous after we decrement the + * handle count, of course, as replication may be proceeding with + * client recovery. However, since we're discarding the regions + * as soon as we drop the handle count, there's little opportunity + * to do harm. + */ + if (rep_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + /* + * Refresh the replication region. + * + * Must come after we call __env_db_rep_exit above. + */ + if (REP_ON(env) && (t_ret = __rep_env_refresh(env)) != 0 && ret == 0) + ret = t_ret; + +#ifdef HAVE_CRYPTO + /* + * Crypto comes last, because higher level close functions need + * cryptography. + */ + if (env->reginfo != NULL && + (t_ret = __crypto_env_refresh(env)) != 0 && ret == 0) + ret = t_ret; +#endif + + /* + * Mark the thread as out of the env before we get rid of the handles + * needed to do so. + */ + if (env->thr_hashtab != NULL && + (t_ret = __env_set_state(env, &ip, THREAD_OUT)) != 0 && ret == 0) + ret = t_ret; + + /* + * We are about to detach from the mutex region. This is the last + * chance we have to acquire/destroy a mutex -- acquire/destroy the + * mutex and release our reference. + * + * !!! + * There are two DbEnv methods that care about environment reference + * counts: DbEnv.close and DbEnv.remove. The DbEnv.close method is + * not a problem because it only decrements the reference count and + * no actual resources are discarded -- lots of threads of control + * can call DbEnv.close at the same time, and regardless of racing + * on the reference count mutex, we wouldn't have a problem. Since + * the DbEnv.remove method actually discards resources, we can have + * a problem. + * + * If we decrement the reference count to 0 here, go to sleep, and + * the DbEnv.remove method is called, by the time we run again, the + * underlying shared regions could have been removed. That's fine, + * except we might actually need the regions to resolve outstanding + * operations in the various subsystems, and if we don't have hard + * OS references to the regions, we could get screwed. Of course, + * we should have hard OS references to everything we need, but just + * in case, we put off decrementing the reference count as long as + * possible. + */ + if ((t_ret = __env_ref_decrement(env)) != 0 && ret == 0) + ret = t_ret; + +#ifdef HAVE_MUTEX_SUPPORT + if (MUTEX_ON(env) && + (t_ret = __mutex_env_refresh(env)) != 0 && ret == 0) + ret = t_ret; +#endif + /* Free memory for thread tracking. */ + if (env->reginfo != NULL) { + if (F_ISSET(env, ENV_PRIVATE)) { + __env_thread_destroy(env); + t_ret = __env_detach(env, 1); + } else + t_ret = __env_detach(env, 0); + + if (t_ret != 0 && ret == 0) + ret = t_ret; + + /* + * !!! + * Don't free env->reginfo or set the reference to NULL, + * that was done by __env_detach(). + */ + } + + if (env->recover_dtab.int_dispatch != NULL) { + __os_free(env, env->recover_dtab.int_dispatch); + env->recover_dtab.int_size = 0; + env->recover_dtab.int_dispatch = NULL; + } + if (env->recover_dtab.ext_dispatch != NULL) { + __os_free(env, env->recover_dtab.ext_dispatch); + env->recover_dtab.ext_size = 0; + env->recover_dtab.ext_dispatch = NULL; + } + + dbenv->flags = orig_flags; + + return (ret); +} + +/* + * __file_handle_cleanup -- + * Close any underlying open file handles so we don't leak system + * resources. + */ +static int +__file_handle_cleanup(env) + ENV *env; +{ + DB_FH *fhp; + + if (TAILQ_FIRST(&env->fdlist) == NULL) + return (0); + + __db_errx(env, DB_STR("1581", + "File handles still open at environment close")); + while ((fhp = TAILQ_FIRST(&env->fdlist)) != NULL) { + __db_errx(env, DB_STR_A("1582", "Open file handle: %s", "%s"), + fhp->name); + (void)__os_closehandle(env, fhp); + } + return (EINVAL); +} + +/* + * __env_get_open_flags + * DbEnv.get_open_flags method. + * + * PUBLIC: int __env_get_open_flags __P((DB_ENV *, u_int32_t *)); + */ +int +__env_get_open_flags(dbenv, flagsp) + DB_ENV *dbenv; + u_int32_t *flagsp; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->get_open_flags"); + + *flagsp = env->open_flags; + return (0); +} +/* + * __env_attach_regions -- + * Perform attaches to env and required regions (subsystems) + * + * PUBLIC: int __env_attach_regions __P((DB_ENV *, u_int32_t, u_int32_t, int)); + */ +int +__env_attach_regions(dbenv, flags, orig_flags, retry_ok) + DB_ENV *dbenv; + u_int32_t flags; + u_int32_t orig_flags; + int retry_ok; +{ + DB_THREAD_INFO *ip; + ENV *env; + REGINFO *infop; + u_int32_t init_flags; + int create_ok, rep_check, ret; + + ip = NULL; + env = dbenv->env; + rep_check = 0; + + /* Convert the DB_ENV->open flags to internal flags. */ + create_ok = LF_ISSET(DB_CREATE) ? 1 : 0; + if (LF_ISSET(DB_LOCKDOWN)) + F_SET(env, ENV_LOCKDOWN); + if (LF_ISSET(DB_PRIVATE)) + F_SET(env, ENV_PRIVATE); + if (LF_ISSET(DB_RECOVER_FATAL)) + F_SET(env, ENV_RECOVER_FATAL); + if (LF_ISSET(DB_SYSTEM_MEM)) + F_SET(env, ENV_SYSTEM_MEM); + if (LF_ISSET(DB_THREAD)) + F_SET(env, ENV_THREAD); + + /* + * Create/join the environment. We pass in the flags of interest to + * a thread subsequently joining an environment we create. If we're + * not the ones to create the environment, our flags will be updated + * to match the existing environment. + */ + init_flags = 0; + if (LF_ISSET(DB_INIT_CDB)) + FLD_SET(init_flags, DB_INITENV_CDB); + if (F_ISSET(dbenv, DB_ENV_CDB_ALLDB)) + FLD_SET(init_flags, DB_INITENV_CDB_ALLDB); + if (LF_ISSET(DB_INIT_LOCK)) + FLD_SET(init_flags, DB_INITENV_LOCK); + if (LF_ISSET(DB_INIT_LOG)) + FLD_SET(init_flags, DB_INITENV_LOG); + if (LF_ISSET(DB_INIT_MPOOL)) + FLD_SET(init_flags, DB_INITENV_MPOOL); + if (LF_ISSET(DB_INIT_REP)) + FLD_SET(init_flags, DB_INITENV_REP); + if (LF_ISSET(DB_INIT_TXN)) + FLD_SET(init_flags, DB_INITENV_TXN); + if ((ret = __env_attach(env, &init_flags, create_ok, retry_ok)) != 0) + goto err; + + /* + * __env_attach will return the saved init_flags field, which contains + * the DB_INIT_* flags used when the environment was created. + * + * We may be joining an environment -- reset our flags to match the + * ones in the environment. + */ + if (FLD_ISSET(init_flags, DB_INITENV_CDB)) + LF_SET(DB_INIT_CDB); + if (FLD_ISSET(init_flags, DB_INITENV_LOCK)) + LF_SET(DB_INIT_LOCK); + if (FLD_ISSET(init_flags, DB_INITENV_LOG)) + LF_SET(DB_INIT_LOG); + if (FLD_ISSET(init_flags, DB_INITENV_MPOOL)) + LF_SET(DB_INIT_MPOOL); + if (FLD_ISSET(init_flags, DB_INITENV_REP)) + LF_SET(DB_INIT_REP); + if (FLD_ISSET(init_flags, DB_INITENV_TXN)) + LF_SET(DB_INIT_TXN); + if (FLD_ISSET(init_flags, DB_INITENV_CDB_ALLDB) && + (ret = __env_set_flags(dbenv, DB_CDB_ALLDB, 1)) != 0) + goto err; + + /* Initialize for CDB product. */ + if (LF_ISSET(DB_INIT_CDB)) { + LF_SET(DB_INIT_LOCK); + F_SET(env, ENV_CDB); + } + + /* + * Update the flags to match the database environment. The application + * may have specified flags of 0 to join the environment, and this line + * replaces that value with the flags corresponding to the existing, + * underlying set of subsystems. This means the DbEnv.get_open_flags + * method returns the flags to open the existing environment instead of + * the specific flags passed to the DbEnv.open method. + */ + env->open_flags = flags; + + /* + * The DB_ENV structure has now been initialized. Turn off further + * use of the DB_ENV structure and most initialization methods, we're + * about to act on the values we currently have. + */ + F_SET(env, ENV_OPEN_CALLED); + + infop = env->reginfo; + +#ifdef HAVE_MUTEX_SUPPORT + /* + * Initialize the mutex regions first before ENV_ENTER(). + * Mutexes need to be 'on' when attaching to an existing env + * in order to safely allocate the thread tracking info. + */ + if ((ret = __mutex_open(env, create_ok)) != 0) + goto err; + /* The MUTEX_REQUIRED() in __env_alloc() expectes this to be set. */ + infop->mtx_alloc = ((REGENV *)infop->primary)->mtx_regenv; +#endif + /* + * Initialize thread tracking and enter the API. + */ + if ((ret = + __env_thread_init(env, F_ISSET(infop, REGION_CREATE) ? 1 : 0)) != 0) + goto err; + + ENV_ENTER(env, ip); + + /* + * Initialize the subsystems. + */ + /* + * We can now acquire/create mutexes: increment the region's reference + * count. + */ + if ((ret = __env_ref_increment(env)) != 0) + goto err; + + /* + * Initialize the handle mutexes. + */ + if ((ret = __mutex_alloc(env, + MTX_ENV_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbenv->mtx_db_env)) != 0 || + (ret = __mutex_alloc(env, + MTX_ENV_HANDLE, DB_MUTEX_PROCESS_ONLY, &env->mtx_env)) != 0) + goto err; + + /* + * Initialize the replication area next, so that we can lock out this + * call if we're currently running recovery for replication. + */ + if (LF_ISSET(DB_INIT_REP) && (ret = __rep_open(env)) != 0) + goto err; + + rep_check = IS_ENV_REPLICATED(env) ? 1 : 0; + if (rep_check && (ret = __env_rep_enter(env, 0)) != 0) + goto err; + + if (LF_ISSET(DB_INIT_MPOOL)) { + if ((ret = __memp_open(env, create_ok)) != 0) + goto err; + + /* + * BDB does do cache I/O during recovery and when starting up + * replication. If creating a new environment, then suppress + * any application max-write configuration. + */ + if (create_ok) + (void)__memp_set_config( + dbenv, DB_MEMP_SUPPRESS_WRITE, 1); + + /* + * Initialize the DB list and its mutex. If the mpool is + * not initialized, we can't ever open a DB handle, which + * is why this code lives here. + */ + TAILQ_INIT(&env->dblist); + if ((ret = __mutex_alloc(env, MTX_ENV_DBLIST, + DB_MUTEX_PROCESS_ONLY, &env->mtx_dblist)) != 0) + goto err; + + /* Register DB's pgin/pgout functions. */ + if ((ret = __memp_register( + env, DB_FTYPE_SET, __db_pgin, __db_pgout)) != 0) + goto err; + } + + /* + * Initialize the ciphering area prior to any running of recovery so + * that we can initialize the keys, etc. before recovery, including + * the MT mutex. + * + * !!! + * This must be after the mpool init, but before the log initialization + * because log_open may attempt to run log_recover during its open. + */ + if (LF_ISSET(DB_INIT_MPOOL | DB_INIT_LOG | DB_INIT_TXN) && + (ret = __crypto_region_init(env)) != 0) + goto err; + if ((ret = __mutex_alloc( + env, MTX_TWISTER, DB_MUTEX_PROCESS_ONLY, &env->mtx_mt)) != 0) + goto err; + + /* + * Transactions imply logging but do not imply locking. While almost + * all applications want both locking and logging, it would not be + * unreasonable for a single threaded process to want transactions for + * atomicity guarantees, but not necessarily need concurrency. + */ + if (LF_ISSET(DB_INIT_LOG | DB_INIT_TXN)) + if ((ret = __log_open(env)) != 0) + goto err; + if (LF_ISSET(DB_INIT_LOCK)) + if ((ret = __lock_open(env)) != 0) + goto err; + + if (LF_ISSET(DB_INIT_TXN)) { + if ((ret = __txn_open(env)) != 0) + goto err; + + /* + * If the application is running with transactions, initialize + * the function tables. + */ + if ((ret = __env_init_rec(env, + ((LOG *)env->lg_handle->reginfo.primary)->persist.version)) + != 0) + goto err; + } + + /* Perform recovery for any previous run. */ + if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) && + (ret = __db_apprec(env, ip, NULL, NULL, 1, + LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL | DB_NO_CHECKPOINT))) != 0) + goto err; + + /* + * If we've created the regions, are running with transactions, and did + * not just run recovery, we need to log the fact that the transaction + * IDs got reset. + * + * If we ran recovery, there may be prepared-but-not-yet-committed + * transactions that need to be resolved. Recovery resets the minimum + * transaction ID and logs the reset if that's appropriate, so we + * don't need to do anything here in the recover case. + */ + if (TXN_ON(env) && + !FLD_ISSET(dbenv->lg_flags, DB_LOG_IN_MEMORY) && + F_ISSET(infop, REGION_CREATE) && + !LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) && + (ret = __txn_reset(env)) != 0) + goto err; + + /* The database environment is ready for business. */ + if ((ret = __env_turn_on(env)) != 0) + goto err; + + if (rep_check) + ret = __env_db_rep_exit(env); + + /* Turn any application-specific max-write configuration back on. */ + if (LF_ISSET(DB_INIT_MPOOL)) + (void)__memp_set_config(dbenv, DB_MEMP_SUPPRESS_WRITE, 0); + +err: if (ret == 0) + ENV_LEAVE(env, ip); + else { + /* + * If we fail after creating regions, panic and remove them. + * + * !!! + * No need to call __env_db_rep_exit, that work is done by the + * calls to __env_refresh. + */ + infop = env->reginfo; + if (infop != NULL && F_ISSET(infop, REGION_CREATE)) { + ret = __env_panic(env, ret); + + /* Refresh the DB_ENV so can use it to call remove. */ + (void)__env_refresh(dbenv, orig_flags, rep_check); + (void)__env_remove_env(env); + (void)__env_refresh(dbenv, orig_flags, 0); + } else + (void)__env_refresh(dbenv, orig_flags, rep_check); + /* clear the fact that the region had been opened */ + F_CLR(env, ENV_OPEN_CALLED); + } + + return (ret); +} diff --git a/src/env/env_recover.c b/src/env/env_recover.c new file mode 100644 index 00000000..a8df6464 --- /dev/null +++ b/src/env/env_recover.c @@ -0,0 +1,1093 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/fop.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +static int __db_log_corrupt __P((ENV *, DB_LSN *)); +static int __env_init_rec_42 __P((ENV *)); +static int __env_init_rec_43 __P((ENV *)); +static int __env_init_rec_46 __P((ENV *)); +static int __env_init_rec_47 __P((ENV *)); +static int __env_init_rec_48 __P((ENV *)); +static int __log_earliest __P((ENV *, DB_LOGC *, int32_t *, DB_LSN *)); + +static double __lsn_diff __P((DB_LSN *, DB_LSN *, DB_LSN *, u_int32_t, int)); +static int __log_backup __P((ENV *, DB_LOGC *, DB_LSN *, DB_LSN*)); + +/* + * __db_apprec -- + * Perform recovery. If max_lsn is non-NULL, then we are trying + * to synchronize this system up with another system that has a max + * LSN of max_lsn, so we need to roll back sufficiently far for that + * to work. See __log_backup for details. + * + * PUBLIC: int __db_apprec __P((ENV *, + * PUBLIC: DB_THREAD_INFO *, DB_LSN *, DB_LSN *, int, u_int32_t)); + */ +int +__db_apprec(env, ip, max_lsn, trunclsn, update, flags) + ENV *env; + DB_THREAD_INFO *ip; + DB_LSN *max_lsn, *trunclsn; + int update; + u_int32_t flags; +{ + DBT data; + DB_ENV *dbenv; + DB_LOGC *logc; + DB_LSN ckp_lsn, first_lsn, last_lsn, lowlsn, lsn, stop_lsn, tlsn; + DB_LSN *vtrunc_ckp, *vtrunc_lsn; + DB_TXNHEAD *txninfo; + DB_TXNREGION *region; + REGENV *renv; + REGINFO *infop; + __txn_ckp_args *ckp_args; + time_t now, tlow; + double nfiles; + u_int32_t hi_txn, log_size, txnid; + int32_t low; + int all_recovered, progress, rectype, ret, t_ret; + char *p, *pass; + char t1[CTIME_BUFLEN], t2[CTIME_BUFLEN], time_buf[CTIME_BUFLEN]; + + COMPQUIET(nfiles, (double)0.001); + + dbenv = env->dbenv; + logc = NULL; + ckp_args = NULL; + hi_txn = TXN_MAXIMUM; + txninfo = NULL; + pass = DB_STR_P("initial"); + ZERO_LSN(lsn); + + /* + * XXX + * Get the log size. No locking required because we're single-threaded + * during recovery. + */ + log_size = ((LOG *)env->lg_handle->reginfo.primary)->log_size; + + /* + * If we need to, update the env handle timestamp. + */ + if (update && REP_ON(env)) { + infop = env->reginfo; + renv = infop->primary; + (void)time(&renv->rep_timestamp); + } + + /* Set in-recovery flags. */ + F_SET(env->lg_handle, DBLOG_RECOVER); + region = env->tx_handle->reginfo.primary; + F_SET(region, TXN_IN_RECOVERY); + + /* Allocate a cursor for the log. */ + if ((ret = __log_cursor(env, &logc)) != 0) + goto err; + + /* + * If the user is specifying recovery to a particular point in time + * or to a particular LSN, find the point to start recovery from. + */ + ZERO_LSN(lowlsn); + if (max_lsn != NULL) { + if ((ret = __log_backup(env, logc, max_lsn, &lowlsn)) != 0) + goto err; + } else if (dbenv->tx_timestamp != 0) { + if ((ret = __log_earliest(env, logc, &low, &lowlsn)) != 0) + goto err; + if ((int32_t)dbenv->tx_timestamp < low) { + t1[sizeof(t1) - 1] = '\0'; + (void)strncpy(t1, __os_ctime( + &dbenv->tx_timestamp, time_buf), sizeof(t1) - 1); + if ((p = strchr(t1, '\n')) != NULL) + *p = '\0'; + + t2[sizeof(t2) - 1] = '\0'; + tlow = (time_t)low; + (void)strncpy(t2, __os_ctime( + &tlow, time_buf), sizeof(t2) - 1); + if ((p = strchr(t2, '\n')) != NULL) + *p = '\0'; + + __db_errx(env, DB_STR_A("1509", + "Invalid recovery timestamp %s; earliest time is %s", + "%s %s"), t1, t2); + ret = EINVAL; + goto err; + } + } + + /* + * Recovery is done in three passes: + * Pass #0: + * We need to find the position from which we will open files. + * We need to open files beginning with the earlier of the + * most recent checkpoint LSN and a checkpoint LSN before the + * recovery timestamp, if specified. We need to be before the + * most recent checkpoint LSN because we are going to collect + * information about which transactions were begun before we + * start rolling forward. Those that were should never be undone + * because queue cannot use LSNs to determine what operations can + * safely be aborted and it cannot rollback operations in + * transactions for which there may be records not processed + * during recovery. We need to consider earlier points in time + * in case we are recovering to a particular timestamp. + * + * Pass #1: + * Read forward through the log from the position found in pass 0 + * opening and closing files, and recording transactions for which + * we've seen their first record (the transaction's prev_lsn is + * 0,0). At the end of this pass, we know all transactions for + * which we've seen begins and we have the "current" set of files + * open. + * + * Pass #2: + * Read backward through the log undoing any uncompleted TXNs. + * There are four cases: + * 1. If doing catastrophic recovery, we read to the + * beginning of the log + * 2. If we are doing normal reovery, then we have to roll + * back to the most recent checkpoint LSN. + * 3. If we are recovering to a point in time, then we have + * to roll back to the checkpoint whose ckp_lsn is earlier + * than the specified time. __log_earliest will figure + * this out for us. + * 4. If we are recovering back to a particular LSN, then + * we have to roll back to the checkpoint whose ckp_lsn + * is earlier than the max_lsn. __log_backup will figure + * that out for us. + * In case 2, "uncompleted TXNs" include all those who committed + * after the user's specified timestamp. + * + * Pass #3: + * Read forward through the log from the LSN found in pass #2, + * redoing any committed TXNs (which committed after any user- + * specified rollback point). During this pass, checkpoint + * file information is ignored, and file openings and closings + * are redone. + * + * ckp_lsn -- lsn of the last checkpoint or the first in the log. + * first_lsn -- the lsn where the forward passes begin. + * last_lsn -- the last lsn in the log, used for feedback + * lowlsn -- the lsn we are rolling back to, if we are recovering + * to a point in time. + * lsn -- temporary use lsn. + * stop_lsn -- the point at which forward roll should stop + */ + + /* + * Find out the last lsn, so that we can estimate how far along we + * are in recovery. This will help us determine how much log there + * is between the first LSN that we're going to be working with and + * the last one. We assume that each of the three phases takes the + * same amount of time (a false assumption) and then use the %-age + * of the amount of log traversed to figure out how much of the + * pass we've accomplished. + * + * If we can't find any log records, we're kind of done. + */ +#ifdef UMRW + ZERO_LSN(last_lsn); +#endif + memset(&data, 0, sizeof(data)); + /* + * Pass #0 + * Find the LSN from which we begin OPENFILES. + * + * If this is a catastrophic recovery, or if no checkpoint exists + * in the log, the LSN is the first LSN in the log. + * + * Otherwise, it is the minimum of (1) the LSN in the last checkpoint + * and (2) the LSN in the checkpoint before any specified recovery + * timestamp or max_lsn. + */ + /* + * Get the first LSN in the log; it's an initial default + * even if this is not a catastrophic recovery. + */ + if ((ret = __logc_get(logc, &ckp_lsn, &data, DB_FIRST)) != 0) { + if (ret == DB_NOTFOUND) + ret = 0; + else + __db_errx(env, DB_STR("1510", + "First log record not found")); + goto err; + } + first_lsn = ckp_lsn; + + if (!LF_ISSET(DB_RECOVER_FATAL)) { + if ((ret = __txn_getckp(env, &ckp_lsn)) == 0 && + (ret = __logc_get(logc, &ckp_lsn, &data, DB_SET)) == 0) { + /* We have a recent checkpoint. This is LSN (1). */ + if ((ret = __txn_ckp_read(env, + data.data, &ckp_args)) != 0) { + __db_errx(env, DB_STR_A("1511", + "Invalid checkpoint record at [%ld][%ld]", + "%ld %ld"), (u_long)ckp_lsn.file, + (u_long)ckp_lsn.offset); + goto err; + } + first_lsn = ckp_args->ckp_lsn; + __os_free(env, ckp_args); + } + + /* + * If LSN (2) exists, use it if it's before LSN (1). + * (If LSN (1) doesn't exist, first_lsn is the + * beginning of the log, so will "win" this check.) + * + * XXX + * In the recovery-to-a-timestamp case, lowlsn is chosen by + * __log_earliest, and is the checkpoint LSN of the + * *earliest* checkpoint in the unreclaimed log. I + * (krinsky) believe that we could optimize this by looking + * instead for the LSN of the *latest* checkpoint before + * the timestamp of interest, but I'm not sure that this + * is worth doing right now. (We have to look for lowlsn + * and low anyway, to make sure the requested timestamp is + * somewhere in the logs we have, and all that's required + * is that we pick *some* checkpoint after the beginning of + * the logs and before the timestamp. + */ + if ((dbenv->tx_timestamp != 0 || max_lsn != NULL) && + LOG_COMPARE(&lowlsn, &first_lsn) < 0) { + first_lsn = lowlsn; + } + } + + if ((ret = __logc_get(logc, &last_lsn, &data, DB_LAST)) != 0) { + if (ret == DB_NOTFOUND) + ret = 0; + else + __db_errx(env, DB_STR("1512", + "Last log record not found")); + goto err; + } + + rectype = 0; + txnid = 0; + do { + if (LOG_COMPARE(&lsn, &first_lsn) == 0) + break; + /* check if we have a recycle record. */ + if (rectype != DB___txn_recycle) + LOGCOPY_32(env, &rectype, data.data); + /* txnid is after rectype, which is a u_int32. */ + LOGCOPY_32(env, &txnid, + (u_int8_t *)data.data + sizeof(u_int32_t)); + + if (txnid != 0) + break; + } while ((ret = __logc_get(logc, &lsn, &data, DB_PREV)) == 0); + + /* + * There are no transactions, so there is nothing to do unless + * we're recovering to an LSN. If we are, we need to proceed since + * we'll still need to do a vtruncate based on information we haven't + * yet collected. + */ + if (ret == DB_NOTFOUND) + ret = 0; + else if (ret != 0) + goto err; + + hi_txn = txnid; + + /* Get the record at first_lsn. */ + if ((ret = __logc_get(logc, &first_lsn, &data, DB_SET)) != 0) { + __db_errx(env, DB_STR_A("1513", + "Checkpoint LSN record [%ld][%ld] not found", "%ld %ld"), + (u_long)first_lsn.file, (u_long)first_lsn.offset); + goto err; + } + + if (dbenv->db_feedback != NULL) { + if (last_lsn.file == first_lsn.file) + nfiles = (double) + (last_lsn.offset - first_lsn.offset) / log_size; + else + nfiles = (double)(last_lsn.file - first_lsn.file) + + (double)((log_size - first_lsn.offset) + + last_lsn.offset) / log_size; + /* We are going to divide by nfiles; make sure it isn't 0. */ + if (nfiles < 0.001) + nfiles = 0.001; + } + + /* Find a low txnid. */ + ret = 0; + if (hi_txn != 0) do { + /* txnid is after rectype, which is a u_int32. */ + LOGCOPY_32(env, &txnid, + (u_int8_t *)data.data + sizeof(u_int32_t)); + + if (txnid != 0) + break; + } while ((ret = __logc_get(logc, &lsn, &data, DB_NEXT)) == 0); + + /* + * There are no transactions and we're not recovering to an LSN (see + * above), so there is nothing to do. + */ + if (ret == DB_NOTFOUND) { + if (LOG_COMPARE(&lsn, &last_lsn) != 0) + ret = __db_log_corrupt(env, &lsn); + else + ret = 0; + } + + /* Reset to the first lsn. */ + if (ret != 0 || + (ret = __logc_get(logc, &first_lsn, &data, DB_SET)) != 0) + goto err; + + /* Initialize the transaction list. */ + if ((ret = __db_txnlist_init(env, ip, + txnid, hi_txn, max_lsn, &txninfo)) != 0) + goto err; + + /* + * Pass #1 + * Run forward through the log starting at the first relevant lsn. + */ + if ((ret = __env_openfiles(env, logc, + txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0) + goto err; + + /* If there were no transactions, then we can bail out early. */ + if (hi_txn == 0 && max_lsn == NULL) { + lsn = last_lsn; + goto done; + } + + /* + * Pass #2. + * + * We used first_lsn to tell us how far back we need to recover, + * use it here. + */ + if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) + __db_msg(env, DB_STR_A("1514", + "Recovery starting from [%lu][%lu]", "%lu %lu"), + (u_long)first_lsn.file, (u_long)first_lsn.offset); + + pass = DB_STR_P("backward"); + for (ret = __logc_get(logc, &lsn, &data, DB_LAST); + ret == 0 && LOG_COMPARE(&lsn, &first_lsn) >= 0; + ret = __logc_get(logc, &lsn, &data, DB_PREV)) { + if (dbenv->db_feedback != NULL) { + progress = 34 + (int)(33 * (__lsn_diff(&first_lsn, + &last_lsn, &lsn, log_size, 0) / nfiles)); + dbenv->db_feedback(dbenv, DB_RECOVER, progress); + } + + tlsn = lsn; + ret = __db_dispatch(env, &env->recover_dtab, + &data, &tlsn, DB_TXN_BACKWARD_ROLL, txninfo); + if (ret != 0) { + if (ret != DB_TXN_CKP) + goto msgerr; + else + ret = 0; + } + } + if (ret == DB_NOTFOUND) { + if (LOG_COMPARE(&lsn, &first_lsn) > 0) + ret = __db_log_corrupt(env, &lsn); + else + ret = 0; + } + if (ret != 0) + goto err; + + /* + * Pass #3. If we are recovering to a timestamp or to an LSN, + * we need to make sure that we don't roll-forward beyond that + * point because there may be non-transactional operations (e.g., + * closes that would fail). The last_lsn variable is used for + * feedback calculations, but use it to set an initial stopping + * point for the forward pass, and then reset appropriately to + * derive a real stop_lsn that tells how far the forward pass + * should go. + */ + pass = DB_STR_P("forward"); + stop_lsn = last_lsn; + if (max_lsn != NULL || dbenv->tx_timestamp != 0) + stop_lsn = ((DB_TXNHEAD *)txninfo)->maxlsn; + + for (ret = __logc_get(logc, &lsn, &data, DB_NEXT); + ret == 0; ret = __logc_get(logc, &lsn, &data, DB_NEXT)) { + if (dbenv->db_feedback != NULL) { + progress = 67 + (int)(33 * (__lsn_diff(&first_lsn, + &last_lsn, &lsn, log_size, 1) / nfiles)); + dbenv->db_feedback(dbenv, DB_RECOVER, progress); + } + + tlsn = lsn; + ret = __db_dispatch(env, &env->recover_dtab, + &data, &tlsn, DB_TXN_FORWARD_ROLL, txninfo); + if (ret != 0) { + if (ret != DB_TXN_CKP) + goto msgerr; + else + ret = 0; + } + /* + * If we are recovering to a timestamp or an LSN, + * we need to make sure that we don't try to roll + * forward beyond the soon-to-be end of log. + */ + if (LOG_COMPARE(&lsn, &stop_lsn) >= 0) + break; + + } + if (ret == DB_NOTFOUND) + ret = __db_log_corrupt(env, &lsn); + if (ret != 0) + goto err; + + if (max_lsn == NULL) + region->last_txnid = ((DB_TXNHEAD *)txninfo)->maxid; + +done: + /* We are going to truncate, so we'd best close the cursor. */ + if (logc != NULL) { + if ((ret = __logc_close(logc)) != 0) + goto err; + logc = NULL; + } + /* + * Also flush the cache before truncating the log. It's recovery, + * ignore any application max-write configuration. + */ + if ((ret = __memp_sync_int(env, + NULL, 0, DB_SYNC_CACHE | DB_SYNC_SUPPRESS_WRITE, NULL, NULL)) != 0) + goto err; + if (dbenv->tx_timestamp != 0) { + /* Run recovery up to this timestamp. */ + region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn; + vtrunc_lsn = &((DB_TXNHEAD *)txninfo)->maxlsn; + vtrunc_ckp = &((DB_TXNHEAD *)txninfo)->ckplsn; + } else if (max_lsn != NULL) { + /* This is a HA client syncing to the master. */ + if (!IS_ZERO_LSN(((DB_TXNHEAD *)txninfo)->ckplsn)) + region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn; + else if ((ret = + __txn_findlastckp(env, ®ion->last_ckp, max_lsn)) != 0) + goto err; + vtrunc_lsn = max_lsn; + vtrunc_ckp = &((DB_TXNHEAD *)txninfo)->ckplsn; + } else { + /* + * The usual case: we recovered the whole (valid) log; clear + * out any partial record after the recovery point. + */ + vtrunc_lsn = &lsn; + vtrunc_ckp = ®ion->last_ckp; + } + if ((ret = __log_vtruncate(env, vtrunc_lsn, vtrunc_ckp, trunclsn)) != 0) + goto err; + + /* If we had no txns, figure out if we need a checkpoint. */ + if (hi_txn == 0 && __dbreg_log_nofiles(env)) + LF_SET(DB_NO_CHECKPOINT); + /* + * Usually we close all files at the end of recovery, unless there are + * prepared transactions or errors in the checkpoint. + */ + all_recovered = region->stat.st_nrestores == 0; + /* + * Log a checkpoint here so subsequent recoveries can skip what's been + * done; this is unnecessary for HA rep clients, as they do not write + * log records. + */ + if (max_lsn == NULL && !LF_ISSET(DB_NO_CHECKPOINT) && + (ret = __txn_checkpoint(env, + 0, 0, DB_CKP_INTERNAL | DB_FORCE)) != 0) { + /* + * If there was no space for the checkpoint or flushng db + * pages we can still bring the environment up, if only for + * read-only access. We must not close the open files because a + * subsequent recovery might still need to redo this portion + * of the log [#18590]. + */ + if (max_lsn == NULL && ret == ENOSPC) { + if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) + __db_msg(env, DB_STR_A("1515", + "Recovery continuing after non-fatal checkpoint error: %s", + "%s"), db_strerror(ret)); + all_recovered = 0; + } + else + goto err; + } + + if (all_recovered ) { + /* Close all the db files that are open. */ + if ((ret = __dbreg_close_files(env, 0)) != 0) + goto err; + } else { + if ((ret = __dbreg_mark_restored(env)) != 0) + goto err; + F_SET(env->lg_handle, DBLOG_OPENFILES); + } + + if (max_lsn != NULL) { + /* + * Now we need to open files that should be open in order for + * client processing to continue. However, since we've + * truncated the log, we need to recompute from where the + * openfiles pass should begin. + */ + if ((ret = __log_cursor(env, &logc)) != 0) + goto err; + if ((ret = + __logc_get(logc, &first_lsn, &data, DB_FIRST)) != 0) { + if (ret == DB_NOTFOUND) + ret = 0; + else + __db_errx(env, DB_STR("1516", + "First log record not found")); + goto err; + } + if ((ret = __txn_getckp(env, &first_lsn)) == 0 && + (ret = __logc_get(logc, &first_lsn, &data, DB_SET)) == 0) { + /* We have a recent checkpoint. This is LSN (1). */ + if ((ret = __txn_ckp_read(env, + data.data, &ckp_args)) != 0) { + __db_errx(env, DB_STR_A("1517", + "Invalid checkpoint record at [%ld][%ld]", + "%ld %ld"), (u_long)first_lsn.file, + (u_long)first_lsn.offset); + goto err; + } + first_lsn = ckp_args->ckp_lsn; + __os_free(env, ckp_args); + } + if ((ret = __logc_get(logc, &first_lsn, &data, DB_SET)) != 0) + goto err; + if ((ret = __env_openfiles(env, logc, + txninfo, &data, &first_lsn, max_lsn, nfiles, 1)) != 0) + goto err; + } else if (all_recovered) { + /* + * If there are no transactions that need resolution, whether + * because they are prepared or because recovery will need to + * process them, we need to reset the transaction ID space and + * log this fact. + */ + if ((rectype != DB___txn_recycle || hi_txn != 0) && + (ret = __txn_reset(env)) != 0) + goto err; + } else { + if ((ret = __txn_recycle_id(env, 0)) != 0) + goto err; + } + + if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) { + (void)time(&now); + __db_msg(env, DB_STR_A("1518", + "Recovery complete at %.24s", "%.24s"), + __os_ctime(&now, time_buf)); + __db_msg(env, DB_STR_A("1519", + "Maximum transaction ID %lx recovery checkpoint [%lu][%lu]", + "%lx %lu %lu"), (u_long)(txninfo == NULL ? + TXN_MINIMUM : ((DB_TXNHEAD *)txninfo)->maxid), + (u_long)region->last_ckp.file, + (u_long)region->last_ckp.offset); + } + + if (0) { +msgerr: __db_errx(env, DB_STR_A("1520", + "Recovery function for LSN %lu %lu failed on %s pass", + "%lu %lu %s"), (u_long)lsn.file, (u_long)lsn.offset, pass); + } + +err: if (logc != NULL && (t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + + if (txninfo != NULL) + __db_txnlist_end(env, txninfo); + + dbenv->tx_timestamp = 0; + + F_CLR(env->lg_handle, DBLOG_RECOVER); + F_CLR(region, TXN_IN_RECOVERY); + + return (ret); +} + +/* + * Figure out how many logfiles we have processed. If we are moving + * forward (is_forward != 0), then we're computing current - low. If + * we are moving backward, we are computing high - current. max is + * the number of bytes per logfile. + */ +static double +__lsn_diff(low, high, current, max, is_forward) + DB_LSN *low, *high, *current; + u_int32_t max; + int is_forward; +{ + double nf; + + /* + * There are three cases in each direction. If you are in the + * same file, then all you need worry about is the difference in + * offsets. If you are in different files, then either your offsets + * put you either more or less than the integral difference in the + * number of files -- we need to handle both of these. + */ + if (is_forward) { + if (current->file == low->file) + nf = (double)(current->offset - low->offset) / max; + else if (current->offset < low->offset) + nf = (double)((current->file - low->file) - 1) + + (double)((max - low->offset) + current->offset) / + max; + else + nf = (double)(current->file - low->file) + + (double)(current->offset - low->offset) / max; + } else { + if (current->file == high->file) + nf = (double)(high->offset - current->offset) / max; + else if (current->offset > high->offset) + nf = (double)((high->file - current->file) - 1) + + (double) + ((max - current->offset) + high->offset) / max; + else + nf = (double)(high->file - current->file) + + (double)(high->offset - current->offset) / max; + } + return (nf); +} + +/* + * __log_backup -- + * + * This is used to find the earliest log record to process when a client + * is trying to sync up with a master whose max LSN is less than this + * client's max lsn; we want to roll back everything after that. + * + * Find the latest checkpoint whose ckp_lsn is less than the max lsn. + */ +static int +__log_backup(env, logc, max_lsn, start_lsn) + ENV *env; + DB_LOGC *logc; + DB_LSN *max_lsn, *start_lsn; +{ + DBT data; + DB_LSN lsn; + __txn_ckp_args *ckp_args; + int ret; + + memset(&data, 0, sizeof(data)); + ckp_args = NULL; + + if ((ret = __txn_getckp(env, &lsn)) != 0) + goto err; + while ((ret = __logc_get(logc, &lsn, &data, DB_SET)) == 0) { + if ((ret = __txn_ckp_read(env, data.data, &ckp_args)) != 0) + return (ret); + /* + * Follow checkpoints through the log until + * we find one with a ckp_lsn less than + * or equal max_lsn. + */ + if (LOG_COMPARE(&ckp_args->ckp_lsn, max_lsn) <= 0) { + *start_lsn = ckp_args->ckp_lsn; + break; + } + + lsn = ckp_args->last_ckp; + /* + * If there are no more checkpoints behind us, we're + * done. Break with DB_NOTFOUND. + */ + if (IS_ZERO_LSN(lsn)) { + ret = DB_NOTFOUND; + break; + } + __os_free(env, ckp_args); + ckp_args = NULL; + } + + if (ckp_args != NULL) + __os_free(env, ckp_args); + /* + * If we walked back through all the checkpoints, + * set the cursor on the first log record. + */ +err: if (IS_ZERO_LSN(*start_lsn) && (ret == 0 || ret == DB_NOTFOUND)) + ret = __logc_get(logc, start_lsn, &data, DB_FIRST); + return (ret); +} + +/* + * __log_earliest -- + * + * Return the earliest recovery point for the log files present. The + * earliest recovery time is the time stamp of the first checkpoint record + * whose checkpoint LSN is greater than the first LSN we process. + */ +static int +__log_earliest(env, logc, lowtime, lowlsn) + ENV *env; + DB_LOGC *logc; + int32_t *lowtime; + DB_LSN *lowlsn; +{ + __txn_ckp_args *ckpargs; + DB_LSN first_lsn, lsn; + DBT data; + u_int32_t rectype; + int cmp, ret; + + memset(&data, 0, sizeof(data)); + + /* + * Read forward through the log looking for the first checkpoint + * record whose ckp_lsn is greater than first_lsn. + */ + for (ret = __logc_get(logc, &first_lsn, &data, DB_FIRST); + ret == 0; ret = __logc_get(logc, &lsn, &data, DB_NEXT)) { + LOGCOPY_32(env, &rectype, data.data); + if (rectype != DB___txn_ckp) + continue; + if ((ret = + __txn_ckp_read(env, data.data, &ckpargs)) == 0) { + cmp = LOG_COMPARE(&ckpargs->ckp_lsn, &first_lsn); + *lowlsn = ckpargs->ckp_lsn; + *lowtime = ckpargs->timestamp; + + __os_free(env, ckpargs); + if (cmp >= 0) + break; + } + } + + return (ret); +} + +/* + * __env_openfiles -- + * Perform the pass of recovery that opens files. This is used + * both during regular recovery and an initial call to txn_recover (since + * we need files open in order to abort prepared, but not yet committed + * transactions). + * + * See the comments in db_apprec for a detailed description of the + * various recovery passes. + * + * If we are not doing feedback processing (i.e., we are doing txn_recover + * processing and in_recovery is zero), then last_lsn can be NULL. + * + * PUBLIC: int __env_openfiles __P((ENV *, + * PUBLIC: DB_LOGC *, void *, DBT *, DB_LSN *, DB_LSN *, double, int)); + */ +int +__env_openfiles(env, logc, txninfo, + data, open_lsn, last_lsn, nfiles, in_recovery) + ENV *env; + DB_LOGC *logc; + void *txninfo; + DBT *data; + DB_LSN *open_lsn, *last_lsn; + double nfiles; + int in_recovery; +{ + DB_ENV *dbenv; + DB_LSN lsn, tlsn; + u_int32_t log_size; + int progress, ret; + + dbenv = env->dbenv; + + /* + * XXX + * Get the log size. No locking required because we're single-threaded + * during recovery. + */ + log_size = ((LOG *)env->lg_handle->reginfo.primary)->log_size; + + lsn = *open_lsn; + for (;;) { + if (in_recovery && dbenv->db_feedback != NULL) { + DB_ASSERT(env, last_lsn != NULL); + progress = (int)(33 * (__lsn_diff(open_lsn, + last_lsn, &lsn, log_size, 1) / nfiles)); + dbenv->db_feedback(dbenv, DB_RECOVER, progress); + } + + tlsn = lsn; + ret = __db_dispatch(env, &env->recover_dtab, data, &tlsn, + in_recovery ? DB_TXN_OPENFILES : DB_TXN_POPENFILES, + txninfo); + if (ret != 0 && ret != DB_TXN_CKP) { + __db_errx(env, DB_STR_A("1521", + "Recovery function for LSN %lu %lu failed", + "%lu %lu"), (u_long)lsn.file, (u_long)lsn.offset); + break; + } + if ((ret = __logc_get(logc, &lsn, data, DB_NEXT)) != 0) { + if (ret == DB_NOTFOUND) { + if (last_lsn != NULL && + LOG_COMPARE(&lsn, last_lsn) != 0) + ret = __db_log_corrupt(env, &lsn); + else + ret = 0; + } + break; + } + } + + return (ret); +} + +static int +__db_log_corrupt(env, lsnp) + ENV *env; + DB_LSN *lsnp; +{ + __db_errx(env, DB_STR_A("1522", + "Log file corrupt at LSN: [%lu][%lu]", "%lu %lu"), + (u_long)lsnp->file, (u_long)lsnp->offset); + return (EINVAL); +} + +/* + * __env_init_rec -- + * + * PUBLIC: int __env_init_rec __P((ENV *, u_int32_t)); + */ +int +__env_init_rec(env, version) + ENV *env; + u_int32_t version; +{ + int ret; + + /* + * We need to prime the recovery table with the current recovery + * functions. Then we overwrite only specific entries based on + * each previous version we support. + */ + if ((ret = __bam_init_recover(env, &env->recover_dtab)) != 0) + goto err; + if ((ret = __crdel_init_recover(env, &env->recover_dtab)) != 0) + goto err; + if ((ret = __db_init_recover(env, &env->recover_dtab)) != 0) + goto err; + if ((ret = __dbreg_init_recover(env, &env->recover_dtab)) != 0) + goto err; + if ((ret = __fop_init_recover(env, &env->recover_dtab)) != 0) + goto err; + if ((ret = __ham_init_recover(env, &env->recover_dtab)) != 0) + goto err; + if ((ret = __heap_init_recover(env, &env->recover_dtab)) != 0) + goto err; + if ((ret = __qam_init_recover(env, &env->recover_dtab)) != 0) + goto err; + if ((ret = __repmgr_init_recover(env, &env->recover_dtab)) != 0) + goto err; + if ((ret = __txn_init_recover(env, &env->recover_dtab)) != 0) + goto err; + + /* + * After installing all the current recovery routines, we want to + * override them with older versions if we are reading a down rev + * log (from a downrev replication master). If a log record is + * changed then we must use the previous version for all older + * logs. If a record is changed in multiple revisions then the + * oldest revision that applies must be used. Therefore we override + * the recovery functions in reverse log version order. + */ + /* + * DB_LOGVERSION_52 is a strict superset of DB_LOGVERSION_50. + * So, only check > DB_LOGVERSION_48p2. If/When log records are + * altered, the condition below will need to change. + */ + if (version > DB_LOGVERSION_48p2) + goto done; + if ((ret = __env_init_rec_48(env)) != 0) + goto err; + /* + * Patch 2 added __db_pg_trunc but did not replace any log records + * so we want to override the same functions as in the original release. + */ + if (version >= DB_LOGVERSION_48) + goto done; + if ((ret = __env_init_rec_47(env)) != 0) + goto err; + if (version == DB_LOGVERSION_47) + goto done; + if ((ret = __env_init_rec_46(env)) != 0) + goto err; + /* + * There are no log record/recovery differences between 4.4 and 4.5. + * The log version changed due to checksum. There are no log recovery + * differences between 4.5 and 4.6. The name of the rep_gen in + * txn_checkpoint changed (to spare, since we don't use it anymore). + */ + if (version >= DB_LOGVERSION_44) + goto done; + if ((ret = __env_init_rec_43(env)) != 0) + goto err; + if (version == DB_LOGVERSION_43) + goto done; + if (version != DB_LOGVERSION_42) { + __db_errx(env, DB_STR_A("1523", "Unknown version %lu", + "%lu"), (u_long)version); + ret = EINVAL; + goto err; + } + ret = __env_init_rec_42(env); + +done: +err: return (ret); +} + +static int +__env_init_rec_42(env) + ENV *env; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __db_relink_42_recover, DB___db_relink_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __db_pg_alloc_42_recover, DB___db_pg_alloc_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __db_pg_free_42_recover, DB___db_pg_free_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __db_pg_freedata_42_recover, DB___db_pg_freedata_42)) != 0) + goto err; +#ifdef HAVE_HASH + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __ham_metagroup_42_recover, DB___ham_metagroup_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __ham_groupalloc_42_recover, DB___ham_groupalloc_42)) != 0) + goto err; +#endif + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __txn_ckp_42_recover, DB___txn_ckp_42)) != 0) + goto err; +err: + return (ret); +} + +static int +__env_init_rec_43(env) + ENV *env; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __bam_relink_43_recover, DB___bam_relink_43)) != 0) + goto err; + /* + * We want to use the 4.2-based txn_regop record. + */ + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __txn_regop_42_recover, DB___txn_regop_42)) != 0) + goto err; +err: + return (ret); +} + +static int +__env_init_rec_46(env) + ENV *env; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __bam_merge_44_recover, DB___bam_merge_44)) != 0) + goto err; + +err: return (ret); +} + +static int +__env_init_rec_47(env) + ENV *env; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __bam_split_42_recover, DB___bam_split_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __db_pg_sort_44_recover, DB___db_pg_sort_44)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __fop_create_42_recover, DB___fop_create_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __fop_write_42_recover, DB___fop_write_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __fop_rename_42_recover, DB___fop_rename_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __fop_rename_noundo_46_recover, DB___fop_rename_noundo_46)) != 0) + goto err; + +err: + return (ret); +} + +static int +__env_init_rec_48(env) + ENV *env; +{ + int ret; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __db_pg_sort_44_recover, DB___db_pg_sort_44)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __db_addrem_42_recover, DB___db_addrem_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __db_big_42_recover, DB___db_big_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __bam_split_48_recover, DB___bam_split_48)) != 0) + goto err; +#ifdef HAVE_HASH + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __ham_insdel_42_recover, DB___ham_insdel_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __ham_replace_42_recover, DB___ham_replace_42)) != 0) + goto err; +#endif +err: + return (ret); +} diff --git a/src/env/env_region.c b/src/env/env_region.c new file mode 100644 index 00000000..c0be73fe --- /dev/null +++ b/src/env/env_region.c @@ -0,0 +1,1466 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/mp.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" + +static int __env_des_get __P((ENV *, REGINFO *, REGINFO *, REGION **)); +static int __env_faultmem __P((ENV *, void *, size_t, int)); +static int __env_sys_attach __P((ENV *, REGINFO *, REGION *)); +static int __env_sys_detach __P((ENV *, REGINFO *, int)); +static void __env_des_destroy __P((ENV *, REGION *)); +static void __env_remove_file __P((ENV *)); + +/* + * __env_attach + * Join/create the environment + * + * PUBLIC: int __env_attach __P((ENV *, u_int32_t *, int, int)); + */ +int +__env_attach(env, init_flagsp, create_ok, retry_ok) + ENV *env; + u_int32_t *init_flagsp; + int create_ok, retry_ok; +{ + DB_ENV *dbenv; + REGENV rbuf, *renv; + REGENV_REF ref; + REGINFO *infop; + REGION *rp, tregion; + size_t max, nrw, size; + long segid; + u_int32_t bytes, i, mbytes, nregions, signature; + u_int retry_cnt; + int majver, minver, patchver, ret; + char buf[sizeof(DB_REGION_FMT) + 20]; + + /* Initialization */ + dbenv = env->dbenv; + retry_cnt = 0; + signature = __env_struct_sig(); + + /* Repeated initialization. */ +loop: renv = NULL; + + /* Set up the ENV's REG_INFO structure. */ + if ((ret = __os_calloc(env, 1, sizeof(REGINFO), &infop)) != 0) + return (ret); + infop->env = env; + infop->type = REGION_TYPE_ENV; + infop->id = REGION_ID_ENV; + infop->flags = REGION_JOIN_OK; + if (create_ok) + F_SET(infop, REGION_CREATE_OK); + + /* Build the region name. */ + if (F_ISSET(env, ENV_PRIVATE)) + ret = __os_strdup(env, "process-private", &infop->name); + else { + (void)snprintf(buf, sizeof(buf), "%s", DB_REGION_ENV); + ret = __db_appname(env, DB_APP_NONE, buf, NULL, &infop->name); + } + if (ret != 0) + goto err; + + /* + * We have to single-thread the creation of the REGENV region. Once + * it exists, we can serialize using region mutexes, but until then + * we have to be the only player in the game. + * + * If this is a private environment, we are only called once and there + * are no possible race conditions. + * + * If this is a public environment, we use the filesystem to ensure + * the creation of the environment file is single-threaded. + * + * If the application has specified their own mapping functions, try + * and create the region. The application will have to let us know if + * it's actually a creation or not, and we'll have to fall-back to a + * join if it's not a create. + */ + if (F_ISSET(env, ENV_PRIVATE) || DB_GLOBAL(j_region_map) != NULL) + goto creation; + + /* + * Try to create the file, if we have the authority. We have to ensure + * that multiple threads/processes attempting to simultaneously create + * the file are properly ordered. Open using the O_CREAT and O_EXCL + * flags so that multiple attempts to create the region will return + * failure in all but one. POSIX 1003.1 requires that EEXIST be the + * errno return value -- I sure hope they're right. + */ + if (create_ok) { + if ((ret = __os_open(env, infop->name, 0, + DB_OSO_CREATE | DB_OSO_EXCL | DB_OSO_REGION, + env->db_mode, &env->lockfhp)) == 0) + goto creation; + if (ret != EEXIST) { + __db_err(env, ret, "%s", infop->name); + goto err; + } + } + + /* The region must exist, it's not okay to recreate it. */ + F_CLR(infop, REGION_CREATE_OK); + + /* + * If we couldn't create the file, try and open it. (If that fails, + * we're done.) + */ + if ((ret = __os_open( + env, infop->name, 0, DB_OSO_REGION, 0, &env->lockfhp)) != 0) + goto err; + + /* + * !!! + * The region may be in system memory not backed by the filesystem + * (more specifically, not backed by this file), and we're joining + * it. In that case, the process that created it will have written + * out a REGENV_REF structure as its only contents. We read that + * structure before we do anything further, e.g., we can't just map + * that file in and then figure out what's going on. + * + * All of this noise is because some systems don't have a coherent VM + * and buffer cache, and what's worse, when you mix operations on the + * VM and buffer cache, half the time you hang the system. + * + * If the file is the size of an REGENV_REF structure, then we know + * the real region is in some other memory. (The only way you get a + * file that size is to deliberately write it, as it's smaller than + * any possible disk sector created by writing a file or mapping the + * file into memory.) In which case, retrieve the structure from the + * file and use it to acquire the referenced memory. + * + * If the structure is larger than a REGENV_REF structure, then this + * file is backing the shared memory region, and we just map it into + * memory. + * + * And yes, this makes me want to take somebody and kill them. (I + * digress -- but you have no freakin' idea. This is unbelievably + * stupid and gross, and I've probably spent six months of my life, + * now, trying to make different versions of it work.) + */ + if ((ret = __os_ioinfo(env, infop->name, + env->lockfhp, &mbytes, &bytes, NULL)) != 0) { + __db_err(env, ret, "%s", infop->name); + goto err; + } + + /* + * !!! + * A size_t is OK -- regions get mapped into memory, and so can't + * be larger than a size_t. + */ + size = mbytes * MEGABYTE + bytes; + + /* + * If the size is less than the size of a REGENV_REF structure, the + * region (or, possibly, the REGENV_REF structure) has not yet been + * completely written. Shouldn't be possible, but there's no reason + * not to wait awhile and try again. + * + * If the region is precisely the size of a ref, then we don't + * have the region here, just the meta-data, which implies that + * that we are using SYSTEM V shared memory (SYSTEM_MEM). However, + * if the flags say that we are using SYSTEM_MEM and the region is + * bigger than the ref, something bad has happened -- we are storing + * something in the region file other than meta-data and that + * shouldn't happen. + */ + if (size < sizeof(ref)) + goto retry; + else { + + if (size == sizeof(ref)) + F_SET(env, ENV_SYSTEM_MEM); + else if (F_ISSET(env, ENV_SYSTEM_MEM)) { + ret = EINVAL; + __db_err(env, ret, DB_STR_A("1535", + "%s: existing environment not created in system memory", + "%s"), infop->name); + goto err; + } else { + if ((ret = __os_read(env, env->lockfhp, &rbuf, + sizeof(rbuf), &nrw)) != 0 || + nrw < (size_t)sizeof(rbuf) || + (ret = __os_seek(env, + env->lockfhp, 0, 0, rbuf.region_off)) != 0) { + __db_err(env, ret, DB_STR_A("1536", + "%s: unable to read region info", "%s"), + infop->name); + goto err; + } + } + + if ((ret = __os_read(env, env->lockfhp, &ref, + sizeof(ref), &nrw)) != 0 || nrw < (size_t)sizeof(ref)) { + if (ret == 0) + ret = EIO; + __db_err(env, ret, DB_STR_A("1537", + "%s: unable to read system-memory information", + "%s"), infop->name); + goto err; + } + size = ref.size; + max = ref.max; + segid = ref.segid; + } + +#ifndef HAVE_MUTEX_FCNTL + /* + * If we're not doing fcntl locking, we can close the file handle. We + * no longer need it and the less contact between the buffer cache and + * the VM, the better. + */ + (void)__os_closehandle(env, env->lockfhp); + env->lockfhp = NULL; +#endif + + /* Call the region join routine to acquire the region. */ + memset(&tregion, 0, sizeof(tregion)); + tregion.size = (roff_t)size; + tregion.max = (roff_t)max; + tregion.segid = segid; + if ((ret = __env_sys_attach(env, infop, &tregion)) != 0) + goto err; + +user_map_functions: + /* + * The environment's REGENV structure has to live at offset 0 instead + * of the usual alloc information. Set the primary reference and + * correct the "head" value to reference the alloc region. + */ + infop->primary = infop->addr; + infop->head = (u_int8_t *)infop->addr + sizeof(REGENV); + renv = infop->primary; + + /* + * Make sure the region matches our build. Special case a region + * that's all nul bytes, just treat it like any other corruption. + */ + if (renv->majver != DB_VERSION_MAJOR || + renv->minver != DB_VERSION_MINOR) { + if (renv->majver != 0 || renv->minver != 0) { + __db_errx(env, DB_STR_A("1538", + "Program version %d.%d doesn't match environment version %d.%d", + "%d %d %d %d"), DB_VERSION_MAJOR, DB_VERSION_MINOR, + renv->majver, renv->minver); + ret = DB_VERSION_MISMATCH; + } else + ret = EINVAL; + goto err; + } + if (renv->signature != signature) { + __db_errx(env, DB_STR("1539", + "Build signature doesn't match environment")); + ret = DB_VERSION_MISMATCH; + goto err; + } + + /* + * Check if the environment has had a catastrophic failure. + * + * Check the magic number to ensure the region is initialized. If the + * magic number isn't set, the lock may not have been initialized, and + * an attempt to use it could lead to random behavior. + * + * The panic and magic values aren't protected by any lock, so we never + * use them in any check that's more complex than set/not-set. + * + * !!! + * I'd rather play permissions games using the underlying file, but I + * can't because Windows/NT filesystems won't open files mode 0. + */ + if (renv->panic && !F_ISSET(dbenv, DB_ENV_NOPANIC)) { + ret = __env_panic_msg(env); + goto err; + } + if (renv->magic != DB_REGION_MAGIC) + goto retry; + + /* + * Get a reference to the underlying REGION information for this + * environment. + */ + if ((ret = __env_des_get(env, infop, infop, &rp)) != 0 || rp == NULL) + goto find_err; + infop->rp = rp; + + /* + * There's still a possibility for inconsistent data. When we acquired + * the size of the region and attached to it, it might have still been + * growing as part of its creation. We can detect this by checking the + * size we originally found against the region's current size. (The + * region's current size has to be final, the creator finished growing + * it before setting the magic number in the region.) + * + * !!! + * Skip this test when the application specified its own map functions. + * The size of the region is essentially unknown in that case: some + * other process asked the application's map function for some bytes, + * but we were never told the final size of the region. We could get + * a size back from the map function, but for all we know, our process' + * map function only knows how to join regions, it has no clue how big + * those regions are. + */ + if (DB_GLOBAL(j_region_map) == NULL && rp->size != size) + goto retry; + + /* + * Check our callers configuration flags, it's an error to configure + * incompatible or additional subsystems in an existing environment. + * Return the total set of flags to the caller so they initialize the + * correct set of subsystems. + */ + if (init_flagsp != NULL) { + FLD_CLR(*init_flagsp, renv->init_flags); + if (*init_flagsp != 0) { + __db_errx(env, DB_STR("1540", + "configured environment flags incompatible with existing environment")); + ret = EINVAL; + goto err; + } + *init_flagsp = renv->init_flags; + } + + /* + * Fault the pages into memory. Note, do this AFTER releasing the + * lock, because we're only reading the pages, not writing them. + */ + (void)__env_faultmem(env, infop->primary, rp->size, 0); + + /* Everything looks good, we're done. */ + env->reginfo = infop; + return (0); + +creation: + /* Create the environment region. */ + F_SET(infop, REGION_CREATE); + + /* + * Allocate room for REGION structures plus overhead. + */ + memset(&tregion, 0, sizeof(tregion)); + nregions = __memp_max_regions(env) + 5; + size = nregions * sizeof(REGION); + size += dbenv->passwd_len; + size += (dbenv->thr_max + dbenv->thr_max / 4) * + __env_alloc_size(sizeof(DB_THREAD_INFO)); + /* Space for replication buffer. */ + if (init_flagsp != NULL && FLD_ISSET(*init_flagsp, DB_INITENV_REP)) + size += MEGABYTE; + size += __txn_region_size(env); + size += __log_region_size(env); + size += __env_thread_size(env, size); + size += __lock_region_size(env, size); + + tregion.size = (roff_t)size; + tregion.segid = INVALID_REGION_SEGID; + + if ((tregion.max = dbenv->memory_max) == 0) { + /* Add some slop. */ + size += 16 * 1024; + tregion.max = (roff_t)size; + + tregion.max += (roff_t)__lock_region_max(env); + tregion.max += (roff_t)__txn_region_max(env); + tregion.max += (roff_t)__log_region_max(env); + tregion.max += (roff_t)__env_thread_max(env); + } else if (tregion.size > tregion.max) { + __db_errx(env, DB_STR_A("1542", + "Minimum environment memory size %ld is bigger than spcified max %ld.", + "%ld %ld"), (u_long)tregion.size, (u_long)tregion.max); + ret = EINVAL; + goto err; + } else if (F_ISSET(env, ENV_PRIVATE)) + infop->max_alloc = dbenv->memory_max; + + if ((ret = __env_sys_attach(env, infop, &tregion)) != 0) + goto err; + + /* + * If the application has specified its own mapping functions, we don't + * know until we get here if we are creating the region or not. The + * way we find out is underlying functions clear the REGION_CREATE flag. + */ + if (!F_ISSET(infop, REGION_CREATE)) + goto user_map_functions; + + /* + * Fault the pages into memory. Note, do this BEFORE we initialize + * anything, because we're writing the pages, not just reading them. + */ + (void)__env_faultmem(env, infop->addr, tregion.size, 1); + + /* + * The first object in the region is the REGENV structure. This is + * different from the other regions, and, from everything else in + * this region, where all objects are allocated from the pool, i.e., + * there aren't any fixed locations. The remaining space is made + * available for later allocation. + * + * The allocation space must be size_t aligned, because that's what + * the initialization routine is going to store there. To make sure + * that happens, the REGENV structure was padded with a final size_t. + * No other region needs to worry about it because all of them treat + * the entire region as allocation space. + * + * Set the primary reference and correct the "head" value to reference + * the alloc region. + */ + infop->primary = infop->addr; + infop->head = (u_int8_t *)infop->addr + sizeof(REGENV); + __env_alloc_init(infop, tregion.size - sizeof(REGENV)); + + /* + * Initialize the rest of the REGENV structure. (Don't set the magic + * number to the correct value, that would validate the environment). + */ + renv = infop->primary; + renv->magic = 0; + renv->panic = 0; + + (void)db_version(&majver, &minver, &patchver); + renv->majver = (u_int32_t)majver; + renv->minver = (u_int32_t)minver; + renv->patchver = (u_int32_t)patchver; + renv->signature = signature; + + (void)time(&renv->timestamp); + __os_unique_id(env, &renv->envid); + + /* + * Initialize init_flags to store the flags that any other environment + * handle that uses DB_JOINENV to join this environment will need. + */ + renv->init_flags = (init_flagsp == NULL) ? 0 : *init_flagsp; + + /* + * Set up the region array. We use an array rather than a linked list + * as we have to traverse this list after failure in some cases, and + * we don't want to infinitely loop should the application fail while + * we're manipulating the list. + */ + renv->region_cnt = nregions; + if ((ret = __env_alloc(infop, nregions * sizeof(REGION), &rp)) != 0) { + __db_err(env, ret, DB_STR("1543", + "unable to create new master region array")); + goto err; + } + renv->region_off = R_OFFSET(infop, rp); + for (i = 0; i < nregions; ++i, ++rp) + rp->id = INVALID_REGION_ID; + + renv->cipher_off = renv->thread_off = renv->rep_off = INVALID_ROFF; + renv->flags = 0; + renv->op_timestamp = renv->rep_timestamp = 0; + renv->mtx_regenv = MUTEX_INVALID; + renv->reg_panic = 0; + + /* + * Get the underlying REGION structure for this environment. Note, + * we created the underlying OS region before we acquired the REGION + * structure, which is backwards from the normal procedure. Update + * the REGION structure. + */ + if ((ret = __env_des_get(env, infop, infop, &rp)) != 0) { +find_err: __db_errx(env, DB_STR_A("1544", + "%s: unable to find environment", "%s"), infop->name); + if (ret == 0) + ret = EINVAL; + goto err; + } + infop->rp = rp; + rp->alloc = rp->size = tregion.size; + rp->max = tregion.max; + rp->segid = tregion.segid; + + /* + * !!! + * If we create an environment where regions are public and in system + * memory, we have to inform processes joining the environment how to + * attach to the shared memory segment. So, we write the shared memory + * identifier into the file, to be read by those other processes. + * + * XXX + * This is really OS-layer information, but I can't see any easy way + * to move it down there without passing down information that it has + * no right to know, e.g., that this is the one-and-only REGENV region + * and not some other random region. + */ + if (tregion.segid != INVALID_REGION_SEGID) { + ref.size = tregion.size; + ref.segid = tregion.segid; + ref.max = tregion.max; + if ((ret = __os_write( + env, env->lockfhp, &ref, sizeof(ref), &nrw)) != 0) { + __db_err(env, ret, DB_STR_A("1545", + "%s: unable to write out public environment ID", + "%s"), infop->name); + goto err; + } + } + +#ifndef HAVE_MUTEX_FCNTL + /* + * If we're not doing fcntl locking, we can close the file handle. We + * no longer need it and the less contact between the buffer cache and + * the VM, the better. + */ + if (env->lockfhp != NULL) { + (void)__os_closehandle(env, env->lockfhp); + env->lockfhp = NULL; + } +#endif + + /* Everything looks good, we're done. */ + env->reginfo = infop; + return (0); + +err: +retry: /* Close any open file handle. */ + if (env->lockfhp != NULL) { + (void)__os_closehandle(env, env->lockfhp); + env->lockfhp = NULL; + } + + /* + * If we joined or created the region, detach from it. If we created + * it, destroy it. Note, there's a path in the above code where we're + * using a temporary REGION structure because we haven't yet allocated + * the real one. In that case the region address (addr) will be filled + * in, but the REGION pointer (rp) won't. Fix it. + */ + if (infop->addr != NULL) { + if (infop->rp == NULL) + infop->rp = &tregion; + + (void)__env_sys_detach(env, + infop, F_ISSET(infop, REGION_CREATE)); + } + + /* Free the allocated name and/or REGINFO structure. */ + if (infop->name != NULL) + __os_free(env, infop->name); + __os_free(env, infop); + + /* If we had a temporary error, wait awhile and try again. */ + if (ret == 0) { + if (!retry_ok || ++retry_cnt > 3) { + __db_errx(env, DB_STR("1546", + "unable to join the environment")); + ret = EAGAIN; + } else { + __os_yield(env, retry_cnt * 3, 0); + goto loop; + } + } + + return (ret); +} + +/* + * __env_turn_on -- + * Turn on the created environment. + * + * PUBLIC: int __env_turn_on __P((ENV *)); + */ +int +__env_turn_on(env) + ENV *env; +{ + REGENV *renv; + REGINFO *infop; + + infop = env->reginfo; + renv = infop->primary; + + /* If we didn't create the region, there's no need for further work. */ + if (!F_ISSET(infop, REGION_CREATE)) + return (0); + + /* + * Validate the file. All other threads of control are waiting + * on this value to be written -- "Let slip the hounds of war!" + */ + renv->magic = DB_REGION_MAGIC; + + return (0); +} + +/* + * __env_turn_off -- + * Turn off the environment. + * + * PUBLIC: int __env_turn_off __P((ENV *, u_int32_t)); + */ +int +__env_turn_off(env, flags) + ENV *env; + u_int32_t flags; +{ + REGENV *renv; + REGINFO *infop; + int ret, t_ret; + + ret = 0; + + /* + * Connect to the environment: If we can't join the environment, we + * guess it's because it doesn't exist and we're done. + * + * If the environment exists, attach and lock the environment. + */ + if (__env_attach(env, NULL, 0, 1) != 0) + return (0); + + infop = env->reginfo; + renv = infop->primary; + + MUTEX_LOCK(env, renv->mtx_regenv); + + /* + * If the environment is in use, we're done unless we're forcing the + * issue or the environment has panic'd. (If the environment panic'd, + * the thread holding the reference count may not have cleaned up, so + * we clean up. It's possible the application didn't plan on removing + * the environment in this particular call, but panic'd environments + * aren't useful to anyone.) + * + * Otherwise, panic the environment and overwrite the magic number so + * any thread of control attempting to connect (or racing with us) will + * back off and retry, or just die. + */ + if (renv->refcnt > 0 && !LF_ISSET(DB_FORCE) && !renv->panic) + ret = EBUSY; + else + renv->panic = 1; + + /* + * Unlock the environment (nobody should need this lock because + * we've poisoned the pool) and detach from the environment. + */ + MUTEX_UNLOCK(env, renv->mtx_regenv); + + if ((t_ret = __env_detach(env, 0)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __env_panic_set -- + * Set/clear unrecoverable error. + * + * PUBLIC: void __env_panic_set __P((ENV *, int)); + */ +void +__env_panic_set(env, on) + ENV *env; + int on; +{ + if (env != NULL && env->reginfo != NULL) + ((REGENV *)env->reginfo->primary)->panic = on ? 1 : 0; +} + +/* + * __env_ref_increment -- + * Increment the environment's reference count. + * + * PUBLIC: int __env_ref_increment __P((ENV *)); + */ +int +__env_ref_increment(env) + ENV *env; +{ + REGENV *renv; + REGINFO *infop; + int ret; + + infop = env->reginfo; + renv = infop->primary; + + /* If we're creating the primary region, allocate a mutex. */ + if (F_ISSET(infop, REGION_CREATE)) { + if ((ret = __mutex_alloc( + env, MTX_ENV_REGION, 0, &renv->mtx_regenv)) != 0) + return (ret); + renv->refcnt = 1; + } else { + /* Lock the environment, increment the reference, unlock. */ + MUTEX_LOCK(env, renv->mtx_regenv); + ++renv->refcnt; + MUTEX_UNLOCK(env, renv->mtx_regenv); + } + + F_SET(env, ENV_REF_COUNTED); + return (0); +} + +/* + * __env_ref_decrement -- + * Decrement the environment's reference count. + * + * PUBLIC: int __env_ref_decrement __P((ENV *)); + */ +int +__env_ref_decrement(env) + ENV *env; +{ + REGENV *renv; + REGINFO *infop; + + /* Be cautious -- we may not have an environment. */ + if ((infop = env->reginfo) == NULL) + return (0); + + renv = infop->primary; + + /* Even if we have an environment, may not have reference counted it. */ + if (F_ISSET(env, ENV_REF_COUNTED)) { + /* Lock the environment, decrement the reference, unlock. */ + MUTEX_LOCK(env, renv->mtx_regenv); + if (renv->refcnt == 0) + __db_errx(env, DB_STR("1547", + "environment reference count went negative")); + else + --renv->refcnt; + MUTEX_UNLOCK(env, renv->mtx_regenv); + + F_CLR(env, ENV_REF_COUNTED); + } + + /* If a private environment, we're done with the mutex, destroy it. */ + return (F_ISSET(env, ENV_PRIVATE) ? + __mutex_free(env, &renv->mtx_regenv) : 0); +} + +/* + * __env_detach -- + * Detach from the environment. + * + * PUBLIC: int __env_detach __P((ENV *, int)); + */ +int +__env_detach(env, destroy) + ENV *env; + int destroy; +{ + REGENV *renv; + REGINFO *infop; + REGION rp; + int ret, t_ret; + + infop = env->reginfo; + renv = infop->primary; + ret = 0; + + /* Close the locking file handle. */ + if (env->lockfhp != NULL) { + if ((t_ret = + __os_closehandle(env, env->lockfhp)) != 0 && ret == 0) + ret = t_ret; + env->lockfhp = NULL; + } + + /* + * If a private region, return the memory to the heap. Not needed for + * filesystem-backed or system shared memory regions, that memory isn't + * owned by any particular process. + */ + if (destroy) { + /* + * Free the REGION array. + * + * The actual underlying region structure is allocated from the + * primary shared region, and we're about to free it. Save a + * copy on our stack for the REGINFO to reference when it calls + * down into the OS layer to release the shared memory segment. + */ + rp = *infop->rp; + infop->rp = &rp; + + if (renv->region_off != INVALID_ROFF) + __env_alloc_free( + infop, R_ADDR(infop, renv->region_off)); + } + + /* + * Set the ENV->reginfo field to NULL. BDB uses the ENV->reginfo + * field to decide if the underlying region can be accessed or needs + * cleanup. We're about to destroy what it references, so it needs to + * be cleared. + */ + env->reginfo = NULL; + env->thr_hashtab = NULL; + + if ((t_ret = __env_sys_detach(env, infop, destroy)) != 0 && ret == 0) + ret = t_ret; + if (infop->name != NULL) + __os_free(env, infop->name); + + /* Discard the ENV->reginfo field's memory. */ + __os_free(env, infop); + + return (ret); +} + +/* + * __env_remove_env -- + * Remove an environment. + * + * PUBLIC: int __env_remove_env __P((ENV *)); + */ +int +__env_remove_env(env) + ENV *env; +{ + DB_ENV *dbenv; + REGENV *renv; + REGINFO *infop, reginfo; + REGION *rp; + u_int32_t flags_orig, i; + + dbenv = env->dbenv; + + /* + * We do not want to hang on a mutex request, nor do we care about + * panics. + */ + flags_orig = F_ISSET(dbenv, DB_ENV_NOLOCKING | DB_ENV_NOPANIC); + F_SET(dbenv, DB_ENV_NOLOCKING | DB_ENV_NOPANIC); + + /* + * This routine has to walk a nasty line between not looking into the + * environment (which may be corrupted after an app or system crash), + * and removing everything that needs removing. + * + * Connect to the environment: If we can't join the environment, we + * guess it's because it doesn't exist. Remove the underlying files, + * at least. + */ + if (__env_attach(env, NULL, 0, 0) != 0) + goto remfiles; + + infop = env->reginfo; + renv = infop->primary; + + /* + * Kill the environment, if it's not already dead. + */ + renv->panic = 1; + + /* + * Walk the array of regions. Connect to each region and disconnect + * with the destroy flag set. This shouldn't cause any problems, even + * if the region is corrupted, because we never look inside the region + * (with the single exception of mutex regions on systems where we have + * to return resources to the underlying system). + */ + for (rp = R_ADDR(infop, renv->region_off), + i = 0; i < renv->region_cnt; ++i, ++rp) { + if (rp->id == INVALID_REGION_ID || rp->type == REGION_TYPE_ENV) + continue; + /* + * !!! + * The REGION_CREATE_OK flag is set for Windows/95 -- regions + * are zero'd out when the last reference to the region goes + * away, in which case the underlying OS region code requires + * callers be prepared to create the region in order to join it. + */ + memset(®info, 0, sizeof(reginfo)); + reginfo.id = rp->id; + reginfo.flags = REGION_CREATE_OK; + + /* + * If we get here and can't attach and/or detach to the + * region, it's a mess. Ignore errors, there's nothing + * we can do about them. + */ + if (__env_region_attach(env, ®info, 0, 0) != 0) + continue; + +#ifdef HAVE_MUTEX_SYSTEM_RESOURCES + /* + * If destroying the mutex region, return any system + * resources to the system. + */ + if (reginfo.type == REGION_TYPE_MUTEX) + __mutex_resource_return(env, ®info); +#endif + (void)__env_region_detach(env, ®info, 1); + } + + /* Detach from the environment's primary region. */ + (void)__env_detach(env, 1); + +remfiles: + /* + * Walk the list of files in the directory, unlinking files in the + * Berkeley DB name space. + */ + __env_remove_file(env); + + F_CLR(dbenv, DB_ENV_NOLOCKING | DB_ENV_NOPANIC); + F_SET(dbenv, flags_orig); + + return (0); +} + +/* + * __env_remove_file -- + * Discard any region files in the filesystem. + */ +static void +__env_remove_file(env) + ENV *env; +{ + int cnt, fcnt, lastrm, ret; + const char *dir; + char saved_char, *p, **names, *path, buf[sizeof(DB_REGION_FMT) + 20]; + + /* Get the full path of a file in the environment. */ + (void)snprintf(buf, sizeof(buf), "%s", DB_REGION_ENV); + if ((ret = __db_appname(env, + DB_APP_NONE, buf, NULL, &path)) != 0) + return; + + /* Get the parent directory for the environment. */ + if ((p = __db_rpath(path)) == NULL) { + p = path; + saved_char = *p; + + dir = PATH_DOT; + } else { + saved_char = *p; + *p = '\0'; + + dir = path; + } + + /* Get the list of file names. */ + if ((ret = __os_dirlist(env, dir, 0, &names, &fcnt)) != 0) + __db_err(env, ret, "%s", dir); + + /* Restore the path, and free it. */ + *p = saved_char; + __os_free(env, path); + + if (ret != 0) + return; + + /* + * Remove files from the region directory. + */ + for (lastrm = -1, cnt = fcnt; --cnt >= 0;) { + /* Skip anything outside our name space. */ + if (strncmp(names[cnt], + DB_REGION_PREFIX, sizeof(DB_REGION_PREFIX) - 1)) + continue; + + /* Skip queue extent files. */ + if (strncmp(names[cnt], "__dbq.", 6) == 0) + continue; + if (strncmp(names[cnt], "__dbp.", 6) == 0) + continue; + + /* Skip registry files. */ + if (strncmp(names[cnt], "__db.register", 13) == 0) + continue; + + /* Skip replication files. */ + if (strncmp(names[cnt], "__db.rep", 8) == 0) + continue; + + /* + * Remove the primary environment region last, because it's + * the key to this whole mess. + */ + if (strcmp(names[cnt], DB_REGION_ENV) == 0) { + lastrm = cnt; + continue; + } + + /* Remove the file. */ + if (__db_appname(env, + DB_APP_NONE, names[cnt], NULL, &path) == 0) { + /* + * Overwrite region files. Temporary files would have + * been maintained in encrypted format, so there's no + * reason to overwrite them. This is not an exact + * check on the file being a region file, but it's + * not likely to be wrong, and the worst thing that can + * happen is we overwrite a file that didn't need to be + * overwritten. + */ + (void)__os_unlink(env, path, 1); + __os_free(env, path); + } + } + + if (lastrm != -1) + if (__db_appname(env, + DB_APP_NONE, names[lastrm], NULL, &path) == 0) { + (void)__os_unlink(env, path, 1); + __os_free(env, path); + } + __os_dirfree(env, names, fcnt); +} + +/* + * __env_region_attach + * Join/create a region. + * + * PUBLIC: int __env_region_attach __P((ENV *, REGINFO *, size_t, size_t)); + */ +int +__env_region_attach(env, infop, init, max) + ENV *env; + REGINFO *infop; + size_t init, max; +{ + REGION *rp; + int ret; + char buf[sizeof(DB_REGION_FMT) + 20]; + + /* + * Find or create a REGION structure for this region. If we create + * it, the REGION_CREATE flag will be set in the infop structure. + */ + F_CLR(infop, REGION_CREATE); + if ((ret = __env_des_get(env, env->reginfo, infop, &rp)) != 0) + return (ret); + infop->env = env; + infop->rp = rp; + infop->type = rp->type; + infop->id = rp->id; + + /* + * __env_des_get may have created the region and reset the create + * flag. If we're creating the region, set the desired size. + */ + if (F_ISSET(infop, REGION_CREATE)) { + rp->alloc = rp->size = (roff_t)init; + rp->max = (roff_t)max; + } + + /* Join/create the underlying region. */ + (void)snprintf(buf, sizeof(buf), DB_REGION_FMT, infop->id); + if ((ret = __db_appname(env, + DB_APP_NONE, buf, NULL, &infop->name)) != 0) + goto err; + if ((ret = __env_sys_attach(env, infop, rp)) != 0) + goto err; + + /* + * Fault the pages into memory. Note, do this BEFORE we initialize + * anything because we're writing pages in created regions, not just + * reading them. + */ + (void)__env_faultmem(env, + infop->addr, rp->size, F_ISSET(infop, REGION_CREATE)); + + /* + * !!! + * The underlying layer may have just decided that we are going + * to create the region. There are various system issues that + * can result in a useless region that requires re-initialization. + * + * If we created the region, initialize it for allocation. + */ + if (F_ISSET(infop, REGION_CREATE)) + __env_alloc_init(infop, rp->size); + + return (0); + +err: /* Discard the underlying region. */ + if (infop->addr != NULL) + (void)__env_sys_detach(env, + infop, F_ISSET(infop, REGION_CREATE)); + infop->rp = NULL; + infop->id = INVALID_REGION_ID; + + /* Discard the REGION structure if we created it. */ + if (F_ISSET(infop, REGION_CREATE)) { + __env_des_destroy(env, rp); + F_CLR(infop, REGION_CREATE); + } + + return (ret); +} + +/* + * __env_region_share + * Share the primary region. + * + * PUBLIC: int __env_region_share __P((ENV *, REGINFO *)); + */ +int +__env_region_share(env, infop) + ENV *env; + REGINFO *infop; +{ + REGINFO *envinfo; + REGION *rp; + + envinfo = env->reginfo; + rp = envinfo->rp; + F_SET(infop, F_ISSET(envinfo, REGION_CREATE) | REGION_SHARED); + infop->addr = envinfo->addr; + infop->head = envinfo->head; + + infop->env = env; + infop->rp = rp; + infop->name = envinfo->name; + infop->fhp = envinfo->fhp; + infop->type = rp->type; + infop->id = rp->id; + + return (0); +} + +/* + * __env_region_detach -- + * Detach from a region. + * + * PUBLIC: int __env_region_detach __P((ENV *, REGINFO *, int)); + */ +int +__env_region_detach(env, infop, destroy) + ENV *env; + REGINFO *infop; + int destroy; +{ + REGION *rp; + REGION_MEM *mem, *next; + int ret; + + if (F_ISSET(env, ENV_PRIVATE)) + destroy = 1; + else if (F_ISSET(infop, REGION_SHARED)) + return (0); + + rp = infop->rp; + + /* + * When discarding the regions as we shut down a database environment, + * discard any allocated shared memory segments. This is the last time + * we use them, and db_region_destroy is the last region-specific call + * we make. + */ + if (F_ISSET(env, ENV_PRIVATE) && infop->primary != NULL) { + for (mem = infop->mem; mem != NULL; mem = next) { + next = mem->next; + __env_alloc_free(infop, mem); + } + __env_alloc_free(infop, infop->primary); + } + + if (F_ISSET(infop, REGION_SHARED)) + return (0); + + /* Detach from the underlying OS region. */ + ret = __env_sys_detach(env, infop, destroy); + + /* If we destroyed the region, discard the REGION structure. */ + if (destroy) + __env_des_destroy(env, rp); + + /* Destroy the structure. */ + if (infop->name != NULL) + __os_free(env, infop->name); + + return (ret); +} + +/* + * __env_sys_attach -- + * Prep and call the underlying OS attach function. + */ +static int +__env_sys_attach(env, infop, rp) + ENV *env; + REGINFO *infop; + REGION *rp; +{ + int ret; + + /* + * All regions are created on 8K boundaries out of sheer paranoia, + * so we don't make some underlying VM unhappy. Make sure we don't + * overflow or underflow. + */ +#define OS_VMPAGESIZE (8 * 1024) +#define OS_VMROUNDOFF(i) { \ + if ((i) + OS_VMPAGESIZE - 1 > (i)) \ + (i) += OS_VMPAGESIZE - 1; \ + (i) -= (i) % OS_VMPAGESIZE; \ +} + if (F_ISSET(infop, REGION_CREATE)) { + OS_VMROUNDOFF(rp->size); + OS_VMROUNDOFF(rp->max); + } + +#ifdef DB_REGIONSIZE_MAX + /* Some architectures have hard limits on the maximum region size. */ + if (rp->size > DB_REGIONSIZE_MAX) { + __db_errx(env, DB_STR_A("1548", + "region size %lu is too large; maximum is %lu", "%lu %lu"), + (u_long)rp->size, (u_long)DB_REGIONSIZE_MAX); + return (EINVAL); + } + if (rp->max > DB_REGIONSIZE_MAX) { + __db_errx(env, DB_STR_A("1549", + "region max %lu is too large; maximum is %lu", "%lu %lu"), + (u_long)rp->max, (u_long)DB_REGIONSIZE_MAX); + return (EINVAL); + } +#endif + + /* + * If a region is private, malloc the memory. + * + * !!! + * If this fails because the region is too large to malloc, mmap(2) + * using the MAP_ANON or MAP_ANONYMOUS flags would be an alternative. + * I don't know of any architectures (yet!) where malloc is a problem. + */ + if (F_ISSET(env, ENV_PRIVATE)) { +#if defined(HAVE_MUTEX_HPPA_MSEM_INIT) + /* + * !!! + * There exist spinlocks that don't work in malloc memory, e.g., + * the HP/UX msemaphore interface. If we don't have locks that + * will work in malloc memory, we better not be private or not + * be threaded. + */ + if (F_ISSET(env, ENV_THREAD)) { + __db_errx(env, DB_STR("1550", +"architecture does not support locks inside process-local (malloc) memory")); + __db_errx(env, DB_STR("1551", + "application may not specify both DB_PRIVATE and DB_THREAD")); + return (EINVAL); + } +#endif + if ((ret = __os_malloc( + env, sizeof(REGENV), &infop->addr)) != 0) + return (ret); + + } else { +#if !defined(HAVE_MMAP_EXTEND) + /* Extend any disk file to its full size before mapping it. */ + rp->size = rp->max; +#endif + if ((ret = __os_attach(env, infop, rp)) != 0) + return (ret); + } + + /* Set the start of the allocation region. */ + infop->head = infop->addr; + + /* + * We require that the memory is aligned to fix the largest integral + * type. Otherwise, multiple processes mapping the same shared region + * would have to memcpy every value before reading it. + */ + if (infop->addr != ALIGNP_INC(infop->addr, sizeof(uintmax_t))) { + __db_errx(env, DB_STR("1552", + "region memory was not correctly aligned")); + (void)__env_sys_detach(env, infop, + F_ISSET(infop, REGION_CREATE)); + return (EINVAL); + } + + return (0); +} + +/* + * __env_sys_detach -- + * Prep and call the underlying OS detach function. + */ +static int +__env_sys_detach(env, infop, destroy) + ENV *env; + REGINFO *infop; + int destroy; +{ + + /* If a region is private, free the memory. */ + if (F_ISSET(env, ENV_PRIVATE)) { + __os_free(env, infop->addr); + return (0); + } + + return (__os_detach(env, infop, destroy)); +} + +/* + * __env_des_get -- + * Return a reference to the shared information for a REGION, + * optionally creating a new entry. + */ +static int +__env_des_get(env, env_infop, infop, rpp) + ENV *env; + REGINFO *env_infop, *infop; + REGION **rpp; +{ + REGENV *renv; + REGION *rp, *empty_slot, *first_type; + u_int32_t i, maxid; + + *rpp = NULL; + renv = env_infop->primary; + + /* + * If the caller wants to join a region, walk through the existing + * regions looking for a matching ID (if ID specified) or matching + * type (if type specified). If we return based on a matching type + * return the "primary" region, that is, the first region that was + * created of this type. + * + * Track the first empty slot and maximum region ID for new region + * allocation. + * + * MaxID starts at REGION_ID_ENV, the ID of the primary environment. + */ + maxid = REGION_ID_ENV; + empty_slot = first_type = NULL; + for (rp = R_ADDR(env_infop, renv->region_off), + i = 0; i < renv->region_cnt; ++i, ++rp) { + if (rp->id == INVALID_REGION_ID) { + if (empty_slot == NULL) + empty_slot = rp; + continue; + } + if (infop->id != INVALID_REGION_ID) { + if (infop->id == rp->id) + break; + continue; + } + if (infop->type == rp->type && + F_ISSET(infop, REGION_JOIN_OK) && + (first_type == NULL || first_type->id > rp->id)) + first_type = rp; + + if (rp->id > maxid) + maxid = rp->id; + } + + /* If we found a matching ID (or a matching type), return it. */ + if (i >= renv->region_cnt) + rp = first_type; + if (rp != NULL) { + *rpp = rp; + return (0); + } + + /* + * If we didn't find a region and we don't have permission to create + * the region, fail. The caller generates any error message. + */ + if (!F_ISSET(infop, REGION_CREATE_OK)) + return (ENOENT); + + /* + * If we didn't find a region and don't have room to create the region + * fail with an error message, there's a sizing problem. + */ + if (empty_slot == NULL) { + __db_errx(env, DB_STR("1553", + "no room remaining for additional REGIONs")); + return (ENOENT); + } + + /* + * Initialize a REGION structure for the caller. If id was set, use + * that value, otherwise we use the next available ID. + */ + memset(empty_slot, 0, sizeof(REGION)); + empty_slot->segid = INVALID_REGION_SEGID; + + /* + * Set the type and ID; if no region ID was specified, + * allocate one. + */ + empty_slot->type = infop->type; + empty_slot->id = infop->id == INVALID_REGION_ID ? maxid + 1 : infop->id; + + F_SET(infop, REGION_CREATE); + + *rpp = empty_slot; + return (0); +} + +/* + * __env_des_destroy -- + * Destroy a reference to a REGION. + */ +static void +__env_des_destroy(env, rp) + ENV *env; + REGION *rp; +{ + COMPQUIET(env, NULL); + + rp->id = INVALID_REGION_ID; +} + +/* + * __env_faultmem -- + * Fault the region into memory. + */ +static int +__env_faultmem(env, addr, size, created) + ENV *env; + void *addr; + size_t size; + int created; +{ + int ret; + u_int8_t *p, *t; + + /* Ignore heap regions. */ + if (F_ISSET(env, ENV_PRIVATE)) + return (0); + + /* + * It's sometimes significantly faster to page-fault in all of the + * region's pages before we run the application, as we see nasty + * side-effects when we page-fault while holding various locks, i.e., + * the lock takes a long time to acquire because of the underlying + * page fault, and the other threads convoy behind the lock holder. + * + * If we created the region, we write a non-zero value so that the + * system can't cheat. If we're just joining the region, we can + * only read the value and try to confuse the compiler sufficiently + * that it doesn't figure out that we're never really using it. + * + * Touch every page (assuming pages are 512B, the smallest VM page + * size used in any general purpose processor). + */ + ret = 0; + if (F_ISSET(env->dbenv, DB_ENV_REGION_INIT)) { + if (created) + for (p = addr, + t = (u_int8_t *)addr + size; p < t; p += 512) + p[0] = 0xdb; + else + for (p = addr, + t = (u_int8_t *)addr + size; p < t; p += 512) + ret |= p[0]; + } + + return (ret); +} diff --git a/src/env/env_register.c b/src/env/env_register.c new file mode 100644 index 00000000..b51884b5 --- /dev/null +++ b/src/env/env_register.c @@ -0,0 +1,730 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#define REGISTER_FILE "__db.register" + +#define PID_EMPTY "X 0\n" /* Unused PID entry */ +#define PID_FMT "%24lu\n" /* PID entry format */ + /* Unused PID test */ +#define PID_ISEMPTY(p) (memcmp(p, PID_EMPTY, PID_LEN) == 0) +#define PID_LEN (25) /* PID entry length */ + +#define REGISTRY_LOCK(env, pos, nowait) \ + __os_fdlock(env, (env)->dbenv->registry, (off_t)(pos), 1, nowait) +#define REGISTRY_UNLOCK(env, pos) \ + __os_fdlock(env, (env)->dbenv->registry, (off_t)(pos), 0, 0) +#define REGISTRY_EXCL_LOCK(env, nowait) \ + REGISTRY_LOCK(env, 1, nowait) +#define REGISTRY_EXCL_UNLOCK(env) \ + REGISTRY_UNLOCK(env, 1) + +static int __envreg_add __P((ENV *, int *, u_int32_t)); +static int __envreg_pid_compare __P((const void *, const void *)); +static int __envreg_create_active_pid __P((ENV *, char *)); + +/* + * Support for portable, multi-process database environment locking, based on + * the Subversion SR (#11511). + * + * The registry feature is configured by specifying the DB_REGISTER flag to the + * DbEnv.open method. If DB_REGISTER is specified, DB opens the registry file + * in the database environment home directory. The registry file is formatted + * as follows: + * + * 12345 # process ID slot 1 + * X # empty slot + * 12346 # process ID slot 2 + * X # empty slot + * 12347 # process ID slot 3 + * 12348 # process ID slot 4 + * X 12349 # empty slot + * X # empty slot + * + * All lines are fixed-length. All lines are process ID slots. Empty slots + * are marked with leading non-digit characters. + * + * To modify the file, you get an exclusive lock on the first byte of the file. + * + * While holding any DbEnv handle, each process has an exclusive lock on the + * first byte of a process ID slot. There is a restriction on having more + * than one DbEnv handle open at a time, because Berkeley DB uses per-process + * locking to implement this feature, that is, a process may never have more + * than a single slot locked. + * + * This work requires that if a process dies or the system crashes, locks held + * by the dying processes will be dropped. (We can't use system shared + * memory-backed or filesystem-backed locks because they're persistent when a + * process dies.) On POSIX systems, we use fcntl(2) locks; on Win32 we have + * LockFileEx/UnlockFile, except for Win/9X and Win/ME which have to loop on + * Lockfile/UnlockFile. + * + * We could implement the same solution with flock locking instead of fcntl, + * but flock would require a separate file for each process of control (and + * probably each DbEnv handle) in the database environment, which is fairly + * ugly. + * + * Whenever a process opens a new DbEnv handle, it walks the registry file and + * verifies it CANNOT acquire the lock for any non-empty slot. If a lock for + * a non-empty slot is available, we know a process died holding an open handle, + * and recovery needs to be run. + * + * It's possible to get corruption in the registry file. If a write system + * call fails after partially completing, there can be corrupted entries in + * the registry file, or a partial entry at the end of the file. This is OK. + * A corrupted entry will be flagged as a non-empty line during the registry + * file walk. Since the line was corrupted by process failure, no process will + * hold a lock on the slot, which will lead to recovery being run. + * + * There can still be processes running in the environment when we recover it, + * and, in fact, there can still be processes running in the old environment + * after we're up and running in a new one. This is safe because performing + * recovery panics (and removes) the existing environment, so the window of + * vulnerability is small. Further, we check the panic flag in the DB API + * methods, when waking from spinning on a mutex, and whenever we're about to + * write to disk). The only window of corruption is if the write check of the + * panic were to complete, the region subsequently be recovered, and then the + * write continues. That's very, very unlikely to happen. This vulnerability + * already exists in Berkeley DB, too, the registry code doesn't make it any + * worse than it already is. + * + * The only way to avoid that window entirely is to ensure that all processes + * in the Berkeley DB environment exit before we run recovery. Applications + * can do that if they maintain their own process registry outside of Berkeley + * DB, but it's a little more difficult to do here. The obvious approach is + * to send signals to any process using the database environment as soon as we + * decide to run recovery, but there are problems with that approach: we might + * not have permission to send signals to the process, the process might have + * signal handlers installed, the cookie stored might not be the same as kill's + * argument, we may not be able to reliably tell if the process died, and there + * are probably other problems. However, if we can send a signal, it reduces + * the window, and so we include the code here. To configure it, turn on the + * DB_ENVREG_KILL_ALL #define. + */ +#define DB_ENVREG_KILL_ALL 0 + +/* + * __envreg_register -- + * Register a ENV handle. + * + * PUBLIC: int __envreg_register __P((ENV *, int *, u_int32_t)); + */ +int +__envreg_register(env, need_recoveryp, flags) + ENV *env; + int *need_recoveryp; + u_int32_t flags; +{ + DB_ENV *dbenv; + pid_t pid; + u_int32_t bytes, mbytes; + int ret; + char *pp; + + *need_recoveryp = 0; + + dbenv = env->dbenv; + dbenv->thread_id(dbenv, &pid, NULL); + pp = NULL; + + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, DB_STR_A("1524", + "%lu: register environment", "%lu"), (u_long)pid); + + /* Build the path name and open the registry file. */ + if ((ret = __db_appname(env, + DB_APP_NONE, REGISTER_FILE, NULL, &pp)) != 0) + goto err; + if ((ret = __os_open(env, pp, 0, + DB_OSO_CREATE, DB_MODE_660, &dbenv->registry)) != 0) + goto err; + + /* + * Wait for an exclusive lock on the file. + * + * !!! + * We're locking bytes that don't yet exist, but that's OK as far as + * I know. + */ + if ((ret = REGISTRY_EXCL_LOCK(env, 0)) != 0) + goto err; + + /* + * If the file size is 0, initialize the file. + * + * Run recovery if we create the file, that means we can clean up the + * system by removing the registry file and restarting the application. + */ + if ((ret = __os_ioinfo( + env, pp, dbenv->registry, &mbytes, &bytes, NULL)) != 0) + goto err; + if (mbytes == 0 && bytes == 0) { + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, DB_STR_A("1525", + "%lu: creating %s", "%lu %s"), (u_long)pid, pp); + *need_recoveryp = 1; + } + + /* Register this process. */ + if ((ret = __envreg_add(env, need_recoveryp, flags)) != 0) + goto err; + + /* + * Release our exclusive lock if we don't need to run recovery. If + * we need to run recovery, ENV->open will call back into register + * code once recovery has completed. + */ + if (*need_recoveryp == 0 && (ret = REGISTRY_EXCL_UNLOCK(env)) != 0) + goto err; + + if (0) { +err: *need_recoveryp = 0; + + /* + * !!! + * Closing the file handle must release all of our locks. + */ + if (dbenv->registry != NULL) + (void)__os_closehandle(env, dbenv->registry); + dbenv->registry = NULL; + } + + if (pp != NULL) + __os_free(env, pp); + + return (ret); +} + +/* + * __envreg_add -- + * Add the process' pid to the register. + */ +static int +__envreg_add(env, need_recoveryp, flags) + ENV *env; + int *need_recoveryp; + u_int32_t flags; +{ + DB_ENV *dbenv; + DB_THREAD_INFO *ip; + REGENV * renv; + REGINFO *infop; + pid_t pid; + off_t end, pos, dead; + size_t nr, nw; + u_int lcnt; + u_int32_t bytes, mbytes, orig_flags; + int need_recovery, ret, t_ret; + char *p, buf[PID_LEN + 10], pid_buf[PID_LEN + 10]; + + dbenv = env->dbenv; + need_recovery = 0; + COMPQUIET(dead, 0); + COMPQUIET(p, NULL); + ip = NULL; + + /* Get a copy of our process ID. */ + dbenv->thread_id(dbenv, &pid, NULL); + snprintf(pid_buf, sizeof(pid_buf), PID_FMT, (u_long)pid); + + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, DB_STR_A("1526", + "%lu: adding self to registry", "%lu"), (u_long)pid); + +#if DB_ENVREG_KILL_ALL + if (0) { +kill_all: /* + * A second pass through the file, this time killing any + * processes still running. + */ + if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0) + return (ret); + } +#endif + + /* + * Read the file. Skip empty slots, and check that a lock is held + * for any allocated slots. An allocated slot which we can lock + * indicates a process died holding a handle and recovery needs to + * be run. + */ + for (lcnt = 0;; ++lcnt) { + if ((ret = __os_read( + env, dbenv->registry, buf, PID_LEN, &nr)) != 0) + return (ret); + if (nr == 0) + break; + + /* + * A partial record at the end of the file is possible if a + * previously un-registered process was interrupted while + * registering. + */ + if (nr != PID_LEN) { + need_recovery = 1; + break; + } + + if (PID_ISEMPTY(buf)) { + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, DB_STR_A("1527", + "%02u: EMPTY", "%02u"), lcnt); + continue; + } + + /* + * !!! + * DB_REGISTER is implemented using per-process locking, only + * a single ENV handle may be open per process. Enforce + * that restriction. + */ + if (memcmp(buf, pid_buf, PID_LEN) == 0) { + __db_errx(env, DB_STR("1528", +"DB_REGISTER limits processes to one open DB_ENV handle per environment")); + return (EINVAL); + } + + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) { + for (p = buf; *p == ' ';) + ++p; + buf[nr - 1] = '\0'; + } + +#if DB_ENVREG_KILL_ALL + if (need_recovery) { + pid = (pid_t)strtoul(buf, NULL, 10); + (void)kill(pid, SIGKILL); + + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, DB_STR_A("1529", + "%02u: %s: KILLED", "%02u %s"), lcnt, p); + continue; + } +#endif + pos = (off_t)lcnt * PID_LEN; + if (REGISTRY_LOCK(env, pos, 1) == 0) { + if ((ret = REGISTRY_UNLOCK(env, pos)) != 0) + return (ret); + + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, DB_STR_A("1530", + "%02u: %s: FAILED", "%02u %s"), lcnt, p); + + need_recovery = 1; + dead = pos; +#if DB_ENVREG_KILL_ALL + goto kill_all; +#else + break; +#endif + } else + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, DB_STR_A("1531", + "%02u: %s: LOCKED", "%02u %s"), lcnt, p); + } + + /* + * If we have to perform recovery... + * + * Mark all slots empty. Registry ignores empty slots we can't lock, + * so it doesn't matter if any of the processes are in the middle of + * exiting Berkeley DB -- they'll discard their lock when they exit. + */ + if (need_recovery) { + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, "%lu: recovery required", (u_long)pid); + + if (LF_ISSET(DB_FAILCHK) || LF_ISSET(DB_FAILCHK_ISALIVE)) { + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, + "%lu: performing failchk", (u_long)pid); + + if (LF_ISSET(DB_FAILCHK_ISALIVE)) + if ((ret = __envreg_create_active_pid( + env, pid_buf)) != 0) + goto sig_proc; + + /* The environment will already exist, so we do not + * want DB_CREATE set, nor do we want any recovery at + * this point. No need to put values back as flags is + * passed in by value. Save original dbenv flags in + * case we need to recover/remove existing environment. + * Set DB_ENV_FAILCHK before attach to help ensure we + * dont block on a mutex held by the dead process. + */ + LF_CLR(DB_CREATE | DB_RECOVER | DB_RECOVER_FATAL); + orig_flags = dbenv->flags; + F_SET(dbenv, DB_ENV_FAILCHK); + /* Attach to environment and subsystems. */ + if ((ret = __env_attach_regions( + dbenv, flags, orig_flags, 0)) != 0) + goto sig_proc; + if ((t_ret = + __env_set_state(env, &ip, THREAD_FAILCHK)) != 0 && + ret == 0) + ret = t_ret; + if ((t_ret = + __env_failchk_int(dbenv)) != 0 && ret == 0) + ret = t_ret; + + /* Free active pid array if used. */ + if (LF_ISSET(DB_FAILCHK_ISALIVE)) { + DB_GLOBAL(num_active_pids) = 0; + DB_GLOBAL(size_active_pids) = 0; + __os_free( env, DB_GLOBAL(active_pids)); + } + + /* Detach from environment and deregister thread. */ + if ((t_ret = + __env_refresh(dbenv, orig_flags, 0)) != 0 && + ret == 0) + ret = t_ret; + if (ret == 0) { + if ((ret = __os_seek(env, dbenv->registry, + 0, 0,(u_int32_t)dead)) != 0 || + (ret = __os_write(env, dbenv->registry, + PID_EMPTY, PID_LEN, &nw)) != 0) + return (ret); + need_recovery = 0; + goto add; + } + + } + /* If we can't attach, then we cannot set DB_REGISTER panic. */ +sig_proc: if (__env_attach(env, NULL, 0, 0) == 0) { + infop = env->reginfo; + renv = infop->primary; + /* Indicate DB_REGSITER panic. Also, set environment + * panic as this is the panic trigger mechanism in + * the code that everything looks for. + */ + renv->reg_panic = 1; + renv->panic = 1; + (void)__env_detach(env, 0); + } + + /* Wait for processes to see the panic and leave. */ + __os_yield(env, 0, dbenv->envreg_timeout); + + /* FIGURE out how big the file is. */ + if ((ret = __os_ioinfo( + env, NULL, dbenv->registry, &mbytes, &bytes, NULL)) != 0) + return (ret); + end = (off_t)mbytes * MEGABYTE + bytes; + + /* + * Seek to the beginning of the file and overwrite slots to + * the end of the file. + * + * It's possible for there to be a partial entry at the end of + * the file if a process died when trying to register. If so, + * correct for it and overwrite it as well. + */ + if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0) + return (ret); + for (lcnt = 0; lcnt < ((u_int)end / PID_LEN + + ((u_int)end % PID_LEN == 0 ? 0 : 1)); ++lcnt) { + + if ((ret = __os_read( + env, dbenv->registry, buf, PID_LEN, &nr)) != 0) + return (ret); + + pos = (off_t)lcnt * PID_LEN; + /* do not notify on dead process */ + if (pos != dead) { + pid = (pid_t)strtoul(buf, NULL, 10); + DB_EVENT(env, DB_EVENT_REG_ALIVE, &pid); + } + + if ((ret = __os_seek(env, + dbenv->registry, 0, 0, (u_int32_t)pos)) != 0 || + (ret = __os_write(env, + dbenv->registry, PID_EMPTY, PID_LEN, &nw)) != 0) + return (ret); + } + /* wait one last time to get everyone out */ + __os_yield(env, 0, dbenv->envreg_timeout); + } + + /* + * Seek to the first process slot and add ourselves to the first empty + * slot we can lock. + */ +add: if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0) + return (ret); + for (lcnt = 0;; ++lcnt) { + if ((ret = __os_read( + env, dbenv->registry, buf, PID_LEN, &nr)) != 0) + return (ret); + if (nr == PID_LEN && !PID_ISEMPTY(buf)) + continue; + pos = (off_t)lcnt * PID_LEN; + if (REGISTRY_LOCK(env, pos, 1) == 0) { + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, DB_STR_A("1532", + "%lu: locking slot %02u at offset %lu", + "%lu %02u %lu"), (u_long)pid, lcnt, + (u_long)pos); + + if ((ret = __os_seek(env, + dbenv->registry, 0, 0, (u_int32_t)pos)) != 0 || + (ret = __os_write(env, + dbenv->registry, pid_buf, PID_LEN, &nw)) != 0) + return (ret); + dbenv->registry_off = (u_int32_t)pos; + break; + } + } + + if (need_recovery) + *need_recoveryp = 1; + + return (ret); +} + +/* + * __envreg_unregister -- + * Unregister a ENV handle. + * + * PUBLIC: int __envreg_unregister __P((ENV *, int)); + */ +int +__envreg_unregister(env, recovery_failed) + ENV *env; + int recovery_failed; +{ + DB_ENV *dbenv; + size_t nw; + int ret, t_ret; + + dbenv = env->dbenv; + ret = 0; + + /* + * If recovery failed, we want to drop our locks and return, but still + * make sure any subsequent process doesn't decide everything is just + * fine and try to get into the database environment. In the case of + * an error, discard our locks, but leave our slot filled-in. + */ + if (recovery_failed) + goto err; + + /* + * Why isn't an exclusive lock necessary to discard a ENV handle? + * + * We mark our process ID slot empty before we discard the process slot + * lock, and threads of control reviewing the register file ignore any + * slots which they can't lock. + */ + if ((ret = __os_seek(env, + dbenv->registry, 0, 0, dbenv->registry_off)) != 0 || + (ret = __os_write( + env, dbenv->registry, PID_EMPTY, PID_LEN, &nw)) != 0) + goto err; + + /* + * !!! + * This code assumes that closing the file descriptor discards all + * held locks. + * + * !!! + * There is an ordering problem here -- in the case of a process that + * failed in recovery, we're unlocking both the exclusive lock and our + * slot lock. If the OS unlocked the exclusive lock and then allowed + * another thread of control to acquire the exclusive lock before also + * also releasing our slot lock, we could race. That can't happen, I + * don't think. + */ +err: if ((t_ret = + __os_closehandle(env, dbenv->registry)) != 0 && ret == 0) + ret = t_ret; + + dbenv->registry = NULL; + return (ret); +} + +/* + * __envreg_xunlock -- + * Discard the exclusive lock held by the ENV handle. + * + * PUBLIC: int __envreg_xunlock __P((ENV *)); + */ +int +__envreg_xunlock(env) + ENV *env; +{ + DB_ENV *dbenv; + pid_t pid; + int ret; + + dbenv = env->dbenv; + dbenv->thread_id(dbenv, &pid, NULL); + + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, DB_STR_A("1533", + "%lu: recovery completed, unlocking", "%lu"), (u_long)pid); + + if ((ret = REGISTRY_EXCL_UNLOCK(env)) == 0) + return (ret); + + __db_err(env, ret, DB_STR_A("1534", + "%s: exclusive file unlock", "%s"), REGISTER_FILE); + return (__env_panic(env, ret)); +} + +/* + * __envreg_pid_compare -- + * Compare routine for qsort and bsearch calls. + * returns neg if key is less than membr, 0 if equal and + * pos if key is greater than membr. + */ +static int +__envreg_pid_compare(key, membr) + const void *key; + const void *membr; +{ + return ( *(pid_t*)key - *(pid_t*)membr ); +} + +/* + * __envreg_isalive -- + * Default isalive function that uses contents of an array of active pids + * gotten from the db_register file to determine if process is still + * alive. + * + * PUBLIC: int __envreg_isalive + * PUBLIC: __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t)); + */ +int +__envreg_isalive(dbenv, pid, tid, flags ) + DB_ENV *dbenv; + pid_t pid; + db_threadid_t tid; + u_int32_t flags; +{ + /* in this case we really do not care about tid, simply for lint */ + COMPQUIET(tid, 0); + + /* if is not an expected value then return early */ + if (!((flags == 0) || (flags == DB_MUTEX_PROCESS_ONLY))) + return (EINVAL); + + if (DB_GLOBAL(active_pids) == NULL || + DB_GLOBAL(num_active_pids) == 0 || dbenv == NULL) + return (0); + /* + * bsearch returns a pointer to an entry in active_pids if a match + * is found on pid, else no match found it returns NULL. This + * routine will return a 1 if a match is found, else a 0. + */ + if (bsearch(&pid, DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids), + sizeof(pid_t), __envreg_pid_compare)) + return 1; + + return (0); +} + +/* + * __envreg_create_active_pid -- + * Create array of pids, if need more room in array then double size. + * Only add active pids from DB_REGISTER file into array. + */ +static int +__envreg_create_active_pid(env, my_pid) + ENV *env; + char *my_pid; +{ + DB_ENV *dbenv; + char buf[PID_LEN + 10]; + int ret; + off_t pos; + pid_t pid, *tmparray; + size_t tmpsize, nr; + u_int lcnt; + + dbenv = env->dbenv; + pos = 0; + ret = 0; + + /* + * Walk through DB_REGISTER file, we grab pid entries that are locked + * as those represent processes that are still alive. Ignore empty + * slots, or those that are unlocked. + */ + if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0) + return (ret); + for (lcnt = 0;; ++lcnt) { + if ((ret = __os_read( + env, dbenv->registry, buf, PID_LEN, &nr)) != 0) + return (ret); + + /* all done is read nothing, or get a partial record */ + if (nr == 0 || nr != PID_LEN) + break; + if (PID_ISEMPTY(buf)) + continue; + + pos = (off_t)lcnt * PID_LEN; + if (REGISTRY_LOCK(env, pos, 1) == 0) { + /* got lock, so process died. Do not add to array */ + if ((ret = REGISTRY_UNLOCK(env, pos)) != 0) + return (ret); + } else { + /* first, check to make sure we have room in arrary */ + if (DB_GLOBAL(num_active_pids) + 1 > + DB_GLOBAL(size_active_pids)) { + tmpsize = + DB_GLOBAL(size_active_pids) * sizeof(pid_t); + + /* start with 512, then double if must grow */ + tmpsize = tmpsize>0 ? tmpsize*2 : 512; + if ((ret = __os_malloc + (env, tmpsize, &tmparray )) != 0) + return (ret); + + /* if array exists, then copy and free */ + if (DB_GLOBAL(active_pids)) { + memcpy( tmparray, + DB_GLOBAL(active_pids), + DB_GLOBAL(num_active_pids) * + sizeof(pid_t)); + __os_free( env, DB_GLOBAL(active_pids)); + } + + DB_GLOBAL(active_pids) = tmparray; + DB_GLOBAL(size_active_pids) = tmpsize; + + /* + * The process getting here has not been added + * to the DB_REGISTER file yet, so include it + * as the first item in array + */ + if (DB_GLOBAL(num_active_pids) == 0) { + pid = (pid_t)strtoul(my_pid, NULL, 10); + DB_GLOBAL(active_pids) + [DB_GLOBAL(num_active_pids)++] = pid; + } + } + + /* insert into array */ + pid = (pid_t)strtoul(buf, NULL, 10); + DB_GLOBAL(active_pids) + [DB_GLOBAL(num_active_pids)++] = pid; + + } + + } + + /* lets sort the array to allow for binary search in isalive func */ + qsort(DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids), + sizeof(pid_t), __envreg_pid_compare); + return (ret); +} diff --git a/src/env/env_sig.c b/src/env/env_sig.c new file mode 100644 index 00000000..d8ab7f10 --- /dev/null +++ b/src/env/env_sig.c @@ -0,0 +1,200 @@ +/*- + * DO NOT EDIT: automatically built by dist/s_sig. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/crypto.h" +#include "dbinc/db_join.h" +#include "dbinc/db_verify.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/lock.h" +#include "dbinc/log_verify.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +/* + * For a pure 32bit/64bit environment, we check all structures and calculate a + * signature. For compatible environment, we only check the structures in + * shared memory. + */ +#ifdef HAVE_MIXED_SIZE_ADDRESSING +#define __STRUCTURE_COUNT 41 +#else +#define __STRUCTURE_COUNT (41 + 103) +#endif + +/* + * __env_struct_sig -- + * Compute signature of structures. + * + * PUBLIC: u_int32_t __env_struct_sig __P((void)); + */ +u_int32_t +__env_struct_sig() +{ + u_short t[__STRUCTURE_COUNT + 5]; + u_int i; + + i = 0; +#define __ADD(s) (t[i++] = sizeof(struct s)) + +#ifdef HAVE_MUTEX_SUPPORT + __ADD(__db_mutex_stat); +#endif + __ADD(__db_lock_stat); + __ADD(__db_lock_hstat); + __ADD(__db_lock_pstat); + __ADD(__db_ilock); + __ADD(__db_lock_u); + __ADD(__db_lsn); + __ADD(__db_log_stat); + __ADD(__db_mpool_stat); + __ADD(__db_rep_stat); + __ADD(__db_repmgr_stat); + __ADD(__db_seq_stat); + __ADD(__db_bt_stat); + __ADD(__db_h_stat); + __ADD(__db_heap_stat); + __ADD(__db_qam_stat); + __ADD(__db_thread_info); + __ADD(__db_lockregion); + __ADD(__sh_dbt); + __ADD(__db_lockobj); + __ADD(__db_locker); + __ADD(__db_lockpart); + __ADD(__db_lock); + __ADD(__log); + __ADD(__mpool); + __ADD(__db_mpool_fstat_int); + __ADD(__mpoolfile); + __ADD(__bh); +#ifdef HAVE_MUTEX_SUPPORT + __ADD(__db_mutexregion); +#endif +#ifdef HAVE_MUTEX_SUPPORT + __ADD(__db_mutex_t); +#endif + __ADD(__db_reg_env); + __ADD(__db_region); + __ADD(__rep); + __ADD(__db_txn_stat_int); + __ADD(__db_txnregion); + +#ifndef HAVE_MIXED_SIZE_ADDRESSING + __ADD(__db_dbt); + __ADD(__db_lockreq); + __ADD(__db_log_cursor); + __ADD(__log_rec_spec); + __ADD(__db_mpoolfile); + __ADD(__db_mpool_fstat); + __ADD(__db_txn); + __ADD(__kids); + __ADD(__my_cursors); + __ADD(__femfs); + __ADD(__db_preplist); + __ADD(__db_txn_active); + __ADD(__db_txn_stat); + __ADD(__db_txn_token); + __ADD(__db_repmgr_site); + __ADD(__db_repmgr_conn_err); + __ADD(__db_seq_record); + __ADD(__db_sequence); + __ADD(__db); + __ADD(__cq_fq); + __ADD(__cq_aq); + __ADD(__cq_jq); + __ADD(__db_heap_rid); + __ADD(__dbc); + __ADD(__key_range); + __ADD(__db_compact); + __ADD(__db_env); + __ADD(__db_distab); + __ADD(__db_logvrfy_config); + __ADD(__db_channel); + __ADD(__db_site); + __ADD(__fn); + __ADD(__db_msgbuf); + __ADD(__pin_list); + __ADD(__env_thread_info); + __ADD(__flag_map); + __ADD(__env); + __ADD(__dbc_internal); + __ADD(__dbpginfo); + __ADD(__epg); + __ADD(__cursor); + __ADD(__btree); + __ADD(__db_cipher); + __ADD(__db_foreign_info); + __ADD(__db_txnhead); + __ADD(__db_txnlist); + __ADD(__join_cursor); + __ADD(__pg_chksum); + __ADD(__pg_crypto); + __ADD(__heaphdr); + __ADD(__heaphdrsplt); + __ADD(__pglist); + __ADD(__vrfy_dbinfo); + __ADD(__vrfy_pageinfo); + __ADD(__vrfy_childinfo); + __ADD(__db_globals); + __ADD(__envq); + __ADD(__heap); + __ADD(__heap_cursor); + __ADD(__db_locktab); + __ADD(__db_entry); + __ADD(__fname); + __ADD(__db_log); + __ADD(__hdr); + __ADD(__log_persist); + __ADD(__db_commit); + __ADD(__db_filestart); + __ADD(__log_rec_hdr); + __ADD(__db_log_verify_info); + __ADD(__txn_verify_info); + __ADD(__lv_filereg_info); + __ADD(__lv_filelife); + __ADD(__lv_ckp_info); + __ADD(__lv_timestamp_info); + __ADD(__lv_txnrange); + __ADD(__add_recycle_params); + __ADD(__ckp_verify_params); + __ADD(__db_mpool); + __ADD(__db_mpreg); + __ADD(__db_mpool_hash); + __ADD(__bh_frozen_p); + __ADD(__bh_frozen_a); +#ifdef HAVE_MUTEX_SUPPORT + __ADD(__db_mutexmgr); +#endif + __ADD(__fh_t); + __ADD(__db_partition); + __ADD(__part_internal); + __ADD(__qcursor); + __ADD(__mpfarray); + __ADD(__qmpf); + __ADD(__queue); + __ADD(__qam_filelist); + __ADD(__db_reg_env_ref); + __ADD(__db_region_mem_t); + __ADD(__db_reginfo_t); + __ADD(__rep_waiter); + __ADD(__db_rep); + __ADD(__rep_lease_entry); + __ADD(__txn_detail); + __ADD(__db_txnmgr); + __ADD(__db_commit_info); + __ADD(__txn_logrec); +#endif + + return (__ham_func5(NULL, t, i * sizeof(t[0]))); +} diff --git a/src/env/env_stat.c b/src/env/env_stat.c new file mode 100644 index 00000000..31dbeda3 --- /dev/null +++ b/src/env/env_stat.c @@ -0,0 +1,878 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +#ifdef HAVE_STATISTICS +static int __env_print_all __P((ENV *, u_int32_t)); +static int __env_print_dbenv_all __P((ENV *, u_int32_t)); +static int __env_print_env_all __P((ENV *, u_int32_t)); +static int __env_print_fh __P((ENV *)); +static int __env_print_stats __P((ENV *, u_int32_t)); +static int __env_print_thread __P((ENV *)); +static int __env_stat_print __P((ENV *, u_int32_t)); +static char *__env_thread_state_print __P((DB_THREAD_STATE)); +static const char * + __reg_type __P((reg_type_t)); + +/* + * __env_stat_print_pp -- + * ENV->stat_print pre/post processor. + * + * PUBLIC: int __env_stat_print_pp __P((DB_ENV *, u_int32_t)); + */ +int +__env_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->stat_print"); + + if ((ret = __db_fchk(env, "DB_ENV->stat_print", + flags, DB_STAT_ALL | DB_STAT_ALLOC | + DB_STAT_CLEAR | DB_STAT_SUBSYSTEM)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__env_stat_print(env, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __env_stat_print -- + * ENV->stat_print method. + */ +static int +__env_stat_print(env, flags) + ENV *env; + u_int32_t flags; +{ + time_t now; + int ret; + char time_buf[CTIME_BUFLEN]; + + (void)time(&now); + __db_msg(env, "%.24s\tLocal time", __os_ctime(&now, time_buf)); + + if ((ret = __env_print_stats(env, flags)) != 0) + return (ret); + + if (LF_ISSET(DB_STAT_ALL) && + (ret = __env_print_all(env, flags)) != 0) + return (ret); + + if ((ret = __env_print_thread(env)) != 0) + return (ret); + + if ((ret = __env_print_fh(env)) != 0) + return (ret); + + if (!LF_ISSET(DB_STAT_SUBSYSTEM)) + return (0); + + if (LOGGING_ON(env)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + if ((ret = __log_stat_print(env, flags)) != 0) + return (ret); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + if ((ret = __dbreg_stat_print(env, flags)) != 0) + return (ret); + } + + if (LOCKING_ON(env)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + if ((ret = __lock_stat_print(env, flags)) != 0) + return (ret); + } + + if (MPOOL_ON(env)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + if ((ret = __memp_stat_print(env, flags)) != 0) + return (ret); + } + + if (REP_ON(env)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + if ((ret = __rep_stat_print(env, flags)) != 0) + return (ret); +#ifdef HAVE_REPLICATION_THREADS + if ((ret = __repmgr_stat_print(env, flags)) != 0) + return (ret); +#endif + } + + if (TXN_ON(env)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + if ((ret = __txn_stat_print(env, flags)) != 0) + return (ret); + } + +#ifdef HAVE_MUTEX_SUPPORT + /* + * Dump the mutexes last. If DB_STAT_CLEAR is set this will + * clear out the mutex counters and we want to see them in + * the context of the other subsystems first. + */ + if (MUTEX_ON(env)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + if ((ret = __mutex_stat_print(env, flags)) != 0) + return (ret); + } +#endif + + return (0); +} + +/* + * __env_print_stats -- + * Display the default environment statistics. + * + */ +static int +__env_print_stats(env, flags) + ENV *env; + u_int32_t flags; +{ + REGENV *renv; + REGINFO *infop; + char time_buf[CTIME_BUFLEN]; + + infop = env->reginfo; + renv = infop->primary; + + if (LF_ISSET(DB_STAT_ALL)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Default database environment information:"); + } + STAT_HEX("Magic number", renv->magic); + STAT_LONG("Panic value", renv->panic); + __db_msg(env, "%d.%d.%d\tEnvironment version", + renv->majver, renv->minver, renv->patchver); + STAT_LONG("Btree version", DB_BTREEVERSION); + STAT_LONG("Hash version", DB_HASHVERSION); + STAT_LONG("Lock version", DB_LOCKVERSION); + STAT_LONG("Log version", DB_LOGVERSION); + STAT_LONG("Queue version", DB_QAMVERSION); + STAT_LONG("Sequence version", DB_SEQUENCE_VERSION); + STAT_LONG("Txn version", DB_TXNVERSION); + __db_msg(env, + "%.24s\tCreation time", __os_ctime(&renv->timestamp, time_buf)); + STAT_HEX("Environment ID", renv->envid); + __mutex_print_debug_single(env, + "Primary region allocation and reference count mutex", + renv->mtx_regenv, flags); + STAT_LONG("References", renv->refcnt); + __db_dlbytes(env, "Current region size", + (u_long)0, (u_long)0, (u_long)infop->rp->size); + __db_dlbytes(env, "Maximum region size", + (u_long)0, (u_long)0, (u_long)infop->rp->max); + + return (0); +} + +/* + * __env_print_all -- + * Display the debugging environment statistics. + */ +static int +__env_print_all(env, flags) + ENV *env; + u_int32_t flags; +{ + int ret, t_ret; + + /* + * There are two structures -- DB_ENV and ENV. + */ + ret = __env_print_dbenv_all(env, flags); + if ((t_ret = __env_print_env_all(env, flags)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __env_print_dbenv_all -- + * Display the debugging environment statistics. + */ +static int +__env_print_dbenv_all(env, flags) + ENV *env; + u_int32_t flags; +{ + static const FN db_env_fn[] = { + { DB_ENV_AUTO_COMMIT, "DB_ENV_AUTO_COMMIT" }, + { DB_ENV_CDB_ALLDB, "DB_ENV_CDB_ALLDB" }, + { DB_ENV_DIRECT_DB, "DB_ENV_DIRECT_DB" }, + { DB_ENV_DSYNC_DB, "DB_ENV_DSYNC_DB" }, + { DB_ENV_MULTIVERSION, "DB_ENV_MULTIVERSION" }, + { DB_ENV_NOLOCKING, "DB_ENV_NOLOCKING" }, + { DB_ENV_NOMMAP, "DB_ENV_NOMMAP" }, + { DB_ENV_NOPANIC, "DB_ENV_NOPANIC" }, + { DB_ENV_OVERWRITE, "DB_ENV_OVERWRITE" }, + { DB_ENV_REGION_INIT, "DB_ENV_REGION_INIT" }, + { DB_ENV_TIME_NOTGRANTED, "DB_ENV_TIME_NOTGRANTED" }, + { DB_ENV_TXN_NOSYNC, "DB_ENV_TXN_NOSYNC" }, + { DB_ENV_TXN_NOWAIT, "DB_ENV_TXN_NOWAIT" }, + { DB_ENV_TXN_SNAPSHOT, "DB_ENV_TXN_SNAPSHOT" }, + { DB_ENV_TXN_WRITE_NOSYNC, "DB_ENV_TXN_WRITE_NOSYNC" }, + { DB_ENV_YIELDCPU, "DB_ENV_YIELDCPU" }, + { 0, NULL } + }; + static const FN vfn[] = { + { DB_VERB_DEADLOCK, "DB_VERB_DEADLOCK" }, + { DB_VERB_FILEOPS, "DB_VERB_FILEOPS" }, + { DB_VERB_FILEOPS_ALL, "DB_VERB_FILEOPS_ALL" }, + { DB_VERB_RECOVERY, "DB_VERB_RECOVERY" }, + { DB_VERB_REGISTER, "DB_VERB_REGISTER" }, + { DB_VERB_REPLICATION, "DB_VERB_REPLICATION" }, + { DB_VERB_REP_ELECT, "DB_VERB_REP_ELECT" }, + { DB_VERB_REP_LEASE, "DB_VERB_REP_LEASE" }, + { DB_VERB_REP_MISC, "DB_VERB_REP_MISC" }, + { DB_VERB_REP_MSGS, "DB_VERB_REP_MSGS" }, + { DB_VERB_REP_SYNC, "DB_VERB_REP_SYNC" }, + { DB_VERB_REP_SYSTEM, "DB_VERB_REP_SYSTEM" }, + { DB_VERB_REP_TEST, "DB_VERB_REP_TEST" }, + { DB_VERB_REPMGR_CONNFAIL, "DB_VERB_REPMGR_CONNFAIL" }, + { DB_VERB_REPMGR_MISC, "DB_VERB_REPMGR_MISC" }, + { DB_VERB_WAITSFOR, "DB_VERB_WAITSFOR" }, + { 0, NULL } + }; + DB_ENV *dbenv; + DB_MSGBUF mb; + char **p; + + dbenv = env->dbenv; + DB_MSGBUF_INIT(&mb); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + STAT_POINTER("ENV", dbenv->env); + __mutex_print_debug_single( + env, "DB_ENV handle mutex", dbenv->mtx_db_env, flags); + STAT_ISSET("Errcall", dbenv->db_errcall); + STAT_ISSET("Errfile", dbenv->db_errfile); + STAT_STRING("Errpfx", dbenv->db_errpfx); + STAT_ISSET("Msgfile", dbenv->db_msgfile); + STAT_ISSET("Msgcall", dbenv->db_msgcall); + + STAT_ISSET("AppDispatch", dbenv->app_dispatch); + STAT_ISSET("Event", dbenv->db_event_func); + STAT_ISSET("Feedback", dbenv->db_feedback); + STAT_ISSET("Free", dbenv->db_free); + STAT_ISSET("Panic", dbenv->db_paniccall); + STAT_ISSET("Malloc", dbenv->db_malloc); + STAT_ISSET("Realloc", dbenv->db_realloc); + STAT_ISSET("IsAlive", dbenv->is_alive); + STAT_ISSET("ThreadId", dbenv->thread_id); + STAT_ISSET("ThreadIdString", dbenv->thread_id_string); + + STAT_STRING("Log dir", dbenv->db_log_dir); + STAT_STRING("Tmp dir", dbenv->db_tmp_dir); + if (dbenv->db_data_dir == NULL) + STAT_ISSET("Data dir", dbenv->db_data_dir); + else { + for (p = dbenv->db_data_dir; *p != NULL; ++p) + __db_msgadd(env, &mb, "%s\tData dir", *p); + DB_MSGBUF_FLUSH(env, &mb); + } + + STAT_STRING( + "Intermediate directory mode", dbenv->intermediate_dir_mode); + + STAT_LONG("Shared memory key", dbenv->shm_key); + + STAT_ISSET("Password", dbenv->passwd); + + STAT_ISSET("App private", dbenv->app_private); + STAT_ISSET("Api1 internal", dbenv->api1_internal); + STAT_ISSET("Api2 internal", dbenv->api2_internal); + + __db_prflags(env, NULL, dbenv->verbose, vfn, NULL, "\tVerbose flags"); + + STAT_ULONG("Mutex align", dbenv->mutex_align); + STAT_ULONG("Mutex cnt", dbenv->mutex_cnt); + STAT_ULONG("Mutex inc", dbenv->mutex_inc); + STAT_ULONG("Mutex tas spins", dbenv->mutex_tas_spins); + + STAT_ISSET("Lock conflicts", dbenv->lk_conflicts); + STAT_LONG("Lock modes", dbenv->lk_modes); + STAT_ULONG("Lock detect", dbenv->lk_detect); + STAT_ULONG("Lock init", dbenv->lk_init); + STAT_ULONG("Lock init lockers", dbenv->lk_init_lockers); + STAT_ULONG("Lock init objects", dbenv->lk_init_objects); + STAT_ULONG("Lock max", dbenv->lk_max); + STAT_ULONG("Lock max lockers", dbenv->lk_max_lockers); + STAT_ULONG("Lock max objects", dbenv->lk_max_objects); + STAT_ULONG("Lock partitions", dbenv->lk_partitions); + STAT_ULONG("Lock object hash table size", dbenv->object_t_size); + STAT_ULONG("Lock timeout", dbenv->lk_timeout); + + STAT_ULONG("Log bsize", dbenv->lg_bsize); + STAT_FMT("Log file mode", "%#o", int, dbenv->lg_filemode); + STAT_ULONG("Log region max", dbenv->lg_regionmax); + STAT_ULONG("Log size", dbenv->lg_size); + + STAT_ULONG("Cache GB", dbenv->mp_gbytes); + STAT_ULONG("Cache B", dbenv->mp_bytes); + STAT_ULONG("Cache max GB", dbenv->mp_max_gbytes); + STAT_ULONG("Cache max B", dbenv->mp_max_bytes); + STAT_ULONG("Cache mmap size", dbenv->mp_mmapsize); + STAT_ULONG("Cache max open fd", dbenv->mp_maxopenfd); + STAT_ULONG("Cache max write", dbenv->mp_maxwrite); + STAT_ULONG("Cache number", dbenv->mp_ncache); + STAT_ULONG("Cache max write sleep", dbenv->mp_maxwrite_sleep); + + STAT_ULONG("Txn init", dbenv->tx_init); + STAT_ULONG("Txn max", dbenv->tx_max); + STAT_ULONG("Txn timestamp", dbenv->tx_timestamp); + STAT_ULONG("Txn timeout", dbenv->tx_timeout); + + STAT_ULONG("Thread count", dbenv->thr_max); + + STAT_ISSET("Registry", dbenv->registry); + STAT_ULONG("Registry offset", dbenv->registry_off); + STAT_ULONG("Registry timeout", dbenv->envreg_timeout); + + __db_prflags(env, + NULL, dbenv->flags, db_env_fn, NULL, "\tPublic environment flags"); + + return (0); +} + +/* + * __env_print_env_all -- + * Display the debugging environment statistics. + */ +static int +__env_print_env_all(env, flags) + ENV *env; + u_int32_t flags; +{ + static const FN env_fn[] = { + { ENV_CDB, "ENV_CDB" }, + { ENV_DBLOCAL, "ENV_DBLOCAL" }, + { ENV_LOCKDOWN, "ENV_LOCKDOWN" }, + { ENV_NO_OUTPUT_SET, "ENV_NO_OUTPUT_SET" }, + { ENV_OPEN_CALLED, "ENV_OPEN_CALLED" }, + { ENV_PRIVATE, "ENV_PRIVATE" }, + { ENV_RECOVER_FATAL, "ENV_RECOVER_FATAL" }, + { ENV_REF_COUNTED, "ENV_REF_COUNTED" }, + { ENV_SYSTEM_MEM, "ENV_SYSTEM_MEM" }, + { ENV_THREAD, "ENV_THREAD" }, + { 0, NULL } + }; + static const FN ofn[] = { + { DB_CREATE, "DB_CREATE" }, + { DB_FORCE, "DB_FORCE" }, + { DB_INIT_CDB, "DB_INIT_CDB" }, + { DB_INIT_LOCK, "DB_INIT_LOCK" }, + { DB_INIT_LOG, "DB_INIT_LOG" }, + { DB_INIT_MPOOL, "DB_INIT_MPOOL" }, + { DB_INIT_REP, "DB_INIT_REP" }, + { DB_INIT_TXN, "DB_INIT_TXN" }, + { DB_LOCKDOWN, "DB_LOCKDOWN" }, + { DB_NOMMAP, "DB_NOMMAP" }, + { DB_PRIVATE, "DB_PRIVATE" }, + { DB_RDONLY, "DB_RDONLY" }, + { DB_RECOVER, "DB_RECOVER" }, + { DB_RECOVER_FATAL, "DB_RECOVER_FATAL" }, + { DB_SYSTEM_MEM, "DB_SYSTEM_MEM" }, + { DB_THREAD, "DB_THREAD" }, + { DB_TRUNCATE, "DB_TRUNCATE" }, + { DB_TXN_NOSYNC, "DB_TXN_NOSYNC" }, + { DB_USE_ENVIRON, "DB_USE_ENVIRON" }, + { DB_USE_ENVIRON_ROOT, "DB_USE_ENVIRON_ROOT" }, + { 0, NULL } + }; + static const FN regenvfn[] = { + { DB_REGENV_REPLOCKED, "DB_REGENV_REPLOCKED" }, + { 0, NULL } + }; + REGENV *renv; + REGINFO *infop; + REGION *rp; + u_int32_t i; + char time_buf[CTIME_BUFLEN]; + + infop = env->reginfo; + renv = infop->primary; + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + STAT_POINTER("DB_ENV", env->dbenv); + __mutex_print_debug_single( + env, "ENV handle mutex", env->mtx_env, flags); + + STAT_STRING("Home", env->db_home); + __db_prflags(env, NULL, env->open_flags, ofn, NULL, "\tOpen flags"); + STAT_FMT("Mode", "%#o", int, env->db_mode); + + STAT_ULONG("Pid cache", env->pid_cache); + + STAT_ISSET("Lockfhp", env->lockfhp); + + STAT_ISSET("Locker", env->env_lref); + + STAT_ISSET("Internal recovery table", env->recover_dtab.int_dispatch); + STAT_ULONG("Number of recovery table slots", + env->recover_dtab.int_size); + STAT_ISSET("External recovery table", env->recover_dtab.ext_dispatch); + STAT_ULONG("Number of recovery table slots", + env->recover_dtab.ext_size); + + STAT_ULONG("Thread hash buckets", env->thr_nbucket); + STAT_ISSET("Thread hash table", env->thr_hashtab); + + __mutex_print_debug_single( + env, "ENV list of DB handles mutex", env->mtx_dblist, flags); + STAT_LONG("DB reference count", env->db_ref); + + __mutex_print_debug_single(env, "MT mutex", env->mtx_mt, flags); + + STAT_ISSET("Crypto handle", env->crypto_handle); + STAT_ISSET("Lock handle", env->lk_handle); + STAT_ISSET("Log handle", env->lg_handle); + STAT_ISSET("Cache handle", env->mp_handle); + STAT_ISSET("Mutex handle", env->mutex_handle); + STAT_ISSET("Replication handle", env->rep_handle); + STAT_ISSET("Txn handle", env->tx_handle); + + STAT_ISSET("User copy", env->dbt_usercopy); + + STAT_LONG("Test abort", env->test_abort); + STAT_LONG("Test check", env->test_check); + STAT_LONG("Test copy", env->test_copy); + + __db_prflags(env, + NULL, env->flags, env_fn, NULL, "\tPrivate environment flags"); + + __db_print_reginfo(env, infop, "Primary", flags); + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Per region database environment information:"); + for (rp = R_ADDR(infop, renv->region_off), + i = 0; i < renv->region_cnt; ++i, ++rp) { + if (rp->id == INVALID_REGION_ID) + continue; + __db_msg(env, "%s Region:", __reg_type(rp->type)); + STAT_LONG("Region ID", rp->id); + STAT_LONG("Segment ID", rp->segid); + __db_dlbytes(env, + "Size", (u_long)0, (u_long)0, (u_long)rp->size); + } + __db_prflags(env, + NULL, renv->init_flags, ofn, NULL, "\tInitialization flags"); + STAT_ULONG("Region slots", renv->region_cnt); + __db_prflags(env, + NULL, renv->flags, regenvfn, NULL, "\tReplication flags"); + __db_msg(env, "%.24s\tOperation timestamp", + renv->op_timestamp == 0 ? + "!Set" : __os_ctime(&renv->op_timestamp, time_buf)); + __db_msg(env, "%.24s\tReplication timestamp", + renv->rep_timestamp == 0 ? + "!Set" : __os_ctime(&renv->rep_timestamp, time_buf)); + + return (0); +} + +static char * +__env_thread_state_print(state) + DB_THREAD_STATE state; +{ + switch (state) { + case THREAD_ACTIVE: + return ("active"); + case THREAD_BLOCKED: + return ("blocked"); + case THREAD_BLOCKED_DEAD: + return ("blocked and dead"); + case THREAD_OUT: + return ("out"); + default: + return ("unknown"); + } + /* NOTREACHED */ +} + +/* + * __env_print_thread -- + * Display the thread block state. + */ +static int +__env_print_thread(env) + ENV *env; +{ + BH *bhp; + DB_ENV *dbenv; + DB_HASHTAB *htab; + DB_MPOOL *dbmp; + DB_THREAD_INFO *ip; + PIN_LIST *list, *lp; + REGENV *renv; + REGINFO *infop; + THREAD_INFO *thread; + u_int32_t i; + char buf[DB_THREADID_STRLEN]; + + dbenv = env->dbenv; + + /* The thread table may not be configured. */ + if ((htab = env->thr_hashtab) == NULL) + return (0); + + dbmp = env->mp_handle; + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Thread tracking information"); + + /* Dump out the info we have on thread tracking. */ + infop = env->reginfo; + renv = infop->primary; + thread = R_ADDR(infop, renv->thread_off); + STAT_ULONG("Thread blocks allocated", thread->thr_count); + STAT_ULONG("Thread allocation threshold", thread->thr_max); + STAT_ULONG("Thread hash buckets", thread->thr_nbucket); + + /* Dump out the info we have on active threads. */ + __db_msg(env, "Thread status blocks:"); + for (i = 0; i < env->thr_nbucket; i++) + SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) { + if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE) + continue; + __db_msg(env, "\tprocess/thread %s: %s", + dbenv->thread_id_string( + dbenv, ip->dbth_pid, ip->dbth_tid, buf), + __env_thread_state_print(ip->dbth_state)); + list = R_ADDR(env->reginfo, ip->dbth_pinlist); + for (lp = list; lp < &list[ip->dbth_pinmax]; lp++) { + if (lp->b_ref == INVALID_ROFF) + continue; + bhp = R_ADDR( + &dbmp->reginfo[lp->region], lp->b_ref); + __db_msg(env, + "\t\tpins: %lu", (u_long)bhp->pgno); + } + } + return (0); +} + +/* + * __env_print_fh -- + * Display statistics for all handles open in this environment. + */ +static int +__env_print_fh(env) + ENV *env; +{ + DB_FH *fhp; + + if (TAILQ_FIRST(&env->fdlist) == NULL) + return (0); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Environment file handle information"); + + MUTEX_LOCK(env, env->mtx_env); + + TAILQ_FOREACH(fhp, &env->fdlist, q) + __db_print_fh(env, NULL, fhp, 0); + + MUTEX_UNLOCK(env, env->mtx_env); + + return (0); +} + +/* + * __db_print_fh -- + * Print out a file handle. + * + * PUBLIC: void __db_print_fh __P((ENV *, const char *, DB_FH *, u_int32_t)); + */ +void +__db_print_fh(env, tag, fh, flags) + ENV *env; + const char *tag; + DB_FH *fh; + u_int32_t flags; +{ + static const FN fn[] = { + { DB_FH_NOSYNC, "DB_FH_NOSYNC" }, + { DB_FH_OPENED, "DB_FH_OPENED" }, + { DB_FH_UNLINK, "DB_FH_UNLINK" }, + { 0, NULL } + }; + + if (fh == NULL) { + STAT_ISSET(tag, fh); + return; + } + + STAT_STRING("file-handle.file name", fh->name); + + __mutex_print_debug_single( + env, "file-handle.mutex", fh->mtx_fh, flags); + + STAT_LONG("file-handle.reference count", fh->ref); + STAT_LONG("file-handle.file descriptor", fh->fd); + + STAT_ULONG("file-handle.page number", fh->pgno); + STAT_ULONG("file-handle.page size", fh->pgsize); + STAT_ULONG("file-handle.page offset", fh->offset); + + STAT_ULONG("file-handle.seek count", fh->seek_count); + STAT_ULONG("file-handle.read count", fh->read_count); + STAT_ULONG("file-handle.write count", fh->write_count); + + __db_prflags(env, NULL, fh->flags, fn, NULL, "\tfile-handle.flags"); +} + +/* + * __db_print_fileid -- + * Print out a file ID. + * + * PUBLIC: void __db_print_fileid __P((ENV *, u_int8_t *, const char *)); + */ +void +__db_print_fileid(env, id, suffix) + ENV *env; + u_int8_t *id; + const char *suffix; +{ + DB_MSGBUF mb; + int i; + + if (id == NULL) { + STAT_ISSET("ID", id); + return; + } + + DB_MSGBUF_INIT(&mb); + for (i = 0; i < DB_FILE_ID_LEN; ++i, ++id) { + __db_msgadd(env, &mb, "%x", (u_int)*id); + if (i < DB_FILE_ID_LEN - 1) + __db_msgadd(env, &mb, " "); + } + if (suffix != NULL) + __db_msgadd(env, &mb, "%s", suffix); + DB_MSGBUF_FLUSH(env, &mb); +} + +/* + * __db_dl -- + * Display a big value. + * + * PUBLIC: void __db_dl __P((ENV *, const char *, u_long)); + */ +void +__db_dl(env, msg, value) + ENV *env; + const char *msg; + u_long value; +{ + /* + * Two formats: if less than 10 million, display as the number, if + * greater than 10 million display as ###M. + */ + if (value < 10000000) + __db_msg(env, "%lu\t%s", value, msg); + else + __db_msg(env, "%luM\t%s (%lu)", value / 1000000, msg, value); +} + +/* + * __db_dl_pct -- + * Display a big value, and related percentage. + * + * PUBLIC: void __db_dl_pct + * PUBLIC: __P((ENV *, const char *, u_long, int, const char *)); + */ +void +__db_dl_pct(env, msg, value, pct, tag) + ENV *env; + const char *msg, *tag; + u_long value; + int pct; +{ + DB_MSGBUF mb; + + DB_MSGBUF_INIT(&mb); + + /* + * Two formats: if less than 10 million, display as the number, if + * greater than 10 million, round it off and display as ###M. + */ + if (value < 10000000) + __db_msgadd(env, &mb, "%lu\t%s", value, msg); + else + __db_msgadd(env, + &mb, "%luM\t%s", (value + 500000) / 1000000, msg); + if (tag == NULL) + __db_msgadd(env, &mb, " (%d%%)", pct); + else + __db_msgadd(env, &mb, " (%d%% %s)", pct, tag); + + DB_MSGBUF_FLUSH(env, &mb); +} + +/* + * __db_dlbytes -- + * Display a big number of bytes. + * + * PUBLIC: void __db_dlbytes + * PUBLIC: __P((ENV *, const char *, u_long, u_long, u_long)); + */ +void +__db_dlbytes(env, msg, gbytes, mbytes, bytes) + ENV *env; + const char *msg; + u_long gbytes, mbytes, bytes; +{ + DB_MSGBUF mb; + const char *sep; + + DB_MSGBUF_INIT(&mb); + + /* Normalize the values. */ + while (bytes >= MEGABYTE) { + ++mbytes; + bytes -= MEGABYTE; + } + while (mbytes >= GIGABYTE / MEGABYTE) { + ++gbytes; + mbytes -= GIGABYTE / MEGABYTE; + } + + if (gbytes == 0 && mbytes == 0 && bytes == 0) + __db_msgadd(env, &mb, "0"); + else { + sep = ""; + if (gbytes > 0) { + __db_msgadd(env, &mb, "%luGB", gbytes); + sep = " "; + } + if (mbytes > 0) { + __db_msgadd(env, &mb, "%s%luMB", sep, mbytes); + sep = " "; + } + if (bytes >= 1024) { + __db_msgadd(env, &mb, "%s%luKB", sep, bytes / 1024); + bytes %= 1024; + sep = " "; + } + if (bytes > 0) + __db_msgadd(env, &mb, "%s%luB", sep, bytes); + } + + __db_msgadd(env, &mb, "\t%s", msg); + + DB_MSGBUF_FLUSH(env, &mb); +} + +/* + * __db_print_reginfo -- + * Print out underlying shared region information. + * + * PUBLIC: void __db_print_reginfo + * PUBLIC: __P((ENV *, REGINFO *, const char *, u_int32_t)); + */ +void +__db_print_reginfo(env, infop, s, flags) + ENV *env; + REGINFO *infop; + const char *s; + u_int32_t flags; +{ + static const FN fn[] = { + { REGION_CREATE, "REGION_CREATE" }, + { REGION_CREATE_OK, "REGION_CREATE_OK" }, + { REGION_JOIN_OK, "REGION_JOIN_OK" }, + { REGION_SHARED, "REGION_SHARED" }, + { 0, NULL } + }; + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "%s REGINFO information:", s); + STAT_STRING("Region type", __reg_type(infop->type)); + STAT_ULONG("Region ID", infop->id); + STAT_STRING("Region name", infop->name); + STAT_POINTER("Region address", infop->addr); + STAT_POINTER("Region allocation head", infop->head); + STAT_POINTER("Region primary address", infop->primary); + STAT_ULONG("Region maximum allocation", infop->max_alloc); + STAT_ULONG("Region allocated", infop->allocated); + __env_alloc_print(infop, flags); + + __db_prflags(env, NULL, infop->flags, fn, NULL, "\tRegion flags"); +} + +/* + * __reg_type -- + * Return the region type string. + */ +static const char * +__reg_type(t) + reg_type_t t; +{ + switch (t) { + case REGION_TYPE_ENV: + return ("Environment"); + case REGION_TYPE_LOCK: + return ("Lock"); + case REGION_TYPE_LOG: + return ("Log"); + case REGION_TYPE_MPOOL: + return ("Mpool"); + case REGION_TYPE_MUTEX: + return ("Mutex"); + case REGION_TYPE_TXN: + return ("Transaction"); + case INVALID_REGION_TYPE: + return ("Invalid"); + } + return ("Unknown"); +} + +#else /* !HAVE_STATISTICS */ + +/* + * __db_stat_not_built -- + * Common error routine when library not built with statistics. + * + * PUBLIC: int __db_stat_not_built __P((ENV *)); + */ +int +__db_stat_not_built(env) + ENV *env; +{ + __db_errx(env, DB_STR("1554", + "Library build did not include statistics support")); + return (DB_OPNOTSUP); +} + +int +__env_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} +#endif diff --git a/src/fileops/fileops.src b/src/fileops/fileops.src new file mode 100644 index 00000000..843f6135 --- /dev/null +++ b/src/fileops/fileops.src @@ -0,0 +1,137 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +DBPRIVATE +PREFIX __fop + +INCLUDE #include "db_int.h" +INCLUDE #include "dbinc/crypto.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/txn.h" +INCLUDE #include "dbinc/fop.h" +INCLUDE + +/* + * create -- create a file system object. + * + * name: name in the file system + * appname: indicates if the name needs to go through __db_appname + * mode: file system mode + */ +BEGIN_COMPAT create 42 143 +DBT name DBT s +ARG appname u_int32_t lu +ARG mode u_int32_t o +END + +BEGIN create 48 143 +DBT name DBT s +DBT dirname DBT s +ARG appname u_int32_t lu +ARG mode u_int32_t o +END + +/* + * remove -- remove a file system object. + * + * name: name in the file system + * appname: indicates if the name needs to go through __db_appname + */ +BEGIN remove 42 144 +DBT name DBT s +DBT fid DBT s +ARG appname u_int32_t lu +END + +/* + * write: log the writing of data into an object. + * + * name: file containing the page. + * appname: indicates if the name needs to go through __db_appname + * pgsize: page size. + * pageno: page number in the file. + * offset: offset on the page. + * page: the actual meta-data page. + * flag: non-0 indicates that this is a tempfile, so we needn't undo + * these modifications (we'll toss the file). + */ +BEGIN_COMPAT write 42 145 +DBT name DBT s +ARG appname u_int32_t lu +ARG pgsize u_int32_t lu +ARG pageno db_pgno_t lu +ARG offset u_int32_t lu +DBT page DBT s +ARG flag u_int32_t lu +END + +BEGIN write 48 145 +DBT name DBT s +DBT dirname DBT s +ARG appname u_int32_t lu +ARG pgsize u_int32_t lu +ARG pageno db_pgno_t lu +ARG offset u_int32_t lu +DBT page DBT s +ARG flag u_int32_t lu +END + +/* + * rename: move a file from one name to another. + * The appname value indicates if this is a path name that should be used + * directly (i.e., no interpretation) or if it is a pathname that should + * be interpreted via calls to __db_appname. The fileid is the 20-byte + * DB fileid of the file being renamed. We need to check it on recovery + * so that we don't inadvertently overwrite good files. + * + * There are two variants of this log record: one that must be both done + * and undone and one that is not undone (used for renaming tmp files, see + * SR #15119) + * + * These two record types use the same structure, read, and print functions, + * but have different recovery functions. + */ +BEGIN_COMPAT rename 42 146 +DUPLICATE rename_noundo 46 150 +DBT oldname DBT s +DBT newname DBT s +DBT fileid DBT s +ARG appname u_int32_t lu +END + +BEGIN rename 48 146 +DUPLICATE rename_noundo 46 150 +DBT oldname DBT s +DBT newname DBT s +DBT dirname DBT s +DBT fileid DBT s +ARG appname u_int32_t lu +END + +/* + * File removal record. This is a DB-level log record that indicates + * we've just completed some form of file removal. The purpose of this + * log record is to logically identify the particular instance of the + * named file so that during recovery, in deciding if we should roll-forward + * a remove or a rename, we can make sure that we don't roll one forward and + * delete or overwrite the wrong file. + * real_fid: The 20-byte unique file identifier of the original file being + * removed. + * tmp_fid: The unique fid of the tmp file that is removed. + * name: The pre- __db_appname name of the file + * child: The transaction that removed or renamed the file. + */ + */ +BEGIN file_remove 42 141 +DBT real_fid DBT s +DBT tmp_fid DBT s +DBT name DBT s +ARG appname u_int32_t lu +ARG child u_int32_t lx +END diff --git a/src/fileops/fileops_auto.c b/src/fileops/fileops_auto.c new file mode 100644 index 00000000..0db619a5 --- /dev/null +++ b/src/fileops/fileops_auto.c @@ -0,0 +1,118 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" +#include "dbinc/fop.h" + +DB_LOG_RECSPEC __fop_create_42_desc[] = { + {LOGREC_DBT, SSZ(__fop_create_42_args, name), "name", ""}, + {LOGREC_ARG, SSZ(__fop_create_42_args, appname), "appname", "%lu"}, + {LOGREC_ARG, SSZ(__fop_create_42_args, mode), "mode", "%o"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __fop_create_desc[] = { + {LOGREC_DBT, SSZ(__fop_create_args, name), "name", ""}, + {LOGREC_DBT, SSZ(__fop_create_args, dirname), "dirname", ""}, + {LOGREC_ARG, SSZ(__fop_create_args, appname), "appname", "%lu"}, + {LOGREC_ARG, SSZ(__fop_create_args, mode), "mode", "%o"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __fop_remove_desc[] = { + {LOGREC_DBT, SSZ(__fop_remove_args, name), "name", ""}, + {LOGREC_DBT, SSZ(__fop_remove_args, fid), "fid", ""}, + {LOGREC_ARG, SSZ(__fop_remove_args, appname), "appname", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __fop_write_42_desc[] = { + {LOGREC_DBT, SSZ(__fop_write_42_args, name), "name", ""}, + {LOGREC_ARG, SSZ(__fop_write_42_args, appname), "appname", "%lu"}, + {LOGREC_ARG, SSZ(__fop_write_42_args, pgsize), "pgsize", "%lu"}, + {LOGREC_ARG, SSZ(__fop_write_42_args, pageno), "pageno", "%lu"}, + {LOGREC_ARG, SSZ(__fop_write_42_args, offset), "offset", "%lu"}, + {LOGREC_DBT, SSZ(__fop_write_42_args, page), "page", ""}, + {LOGREC_ARG, SSZ(__fop_write_42_args, flag), "flag", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __fop_write_desc[] = { + {LOGREC_DBT, SSZ(__fop_write_args, name), "name", ""}, + {LOGREC_DBT, SSZ(__fop_write_args, dirname), "dirname", ""}, + {LOGREC_ARG, SSZ(__fop_write_args, appname), "appname", "%lu"}, + {LOGREC_ARG, SSZ(__fop_write_args, pgsize), "pgsize", "%lu"}, + {LOGREC_ARG, SSZ(__fop_write_args, pageno), "pageno", "%lu"}, + {LOGREC_ARG, SSZ(__fop_write_args, offset), "offset", "%lu"}, + {LOGREC_DBT, SSZ(__fop_write_args, page), "page", ""}, + {LOGREC_ARG, SSZ(__fop_write_args, flag), "flag", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __fop_rename_42_desc[] = { + {LOGREC_DBT, SSZ(__fop_rename_42_args, oldname), "oldname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_42_args, newname), "newname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_42_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__fop_rename_42_args, appname), "appname", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __fop_rename_noundo_46_desc[] = { + {LOGREC_DBT, SSZ(__fop_rename_42_args, oldname), "oldname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_42_args, newname), "newname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_42_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__fop_rename_42_args, appname), "appname", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __fop_rename_desc[] = { + {LOGREC_DBT, SSZ(__fop_rename_args, oldname), "oldname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_args, newname), "newname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_args, dirname), "dirname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__fop_rename_args, appname), "appname", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __fop_rename_noundo_desc[] = { + {LOGREC_DBT, SSZ(__fop_rename_args, oldname), "oldname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_args, newname), "newname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_args, dirname), "dirname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__fop_rename_args, appname), "appname", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __fop_file_remove_desc[] = { + {LOGREC_DBT, SSZ(__fop_file_remove_args, real_fid), "real_fid", ""}, + {LOGREC_DBT, SSZ(__fop_file_remove_args, tmp_fid), "tmp_fid", ""}, + {LOGREC_DBT, SSZ(__fop_file_remove_args, name), "name", ""}, + {LOGREC_ARG, SSZ(__fop_file_remove_args, appname), "appname", "%lu"}, + {LOGREC_ARG, SSZ(__fop_file_remove_args, child), "child", "%lx"}, + {LOGREC_Done, 0, "", ""} +}; +/* + * PUBLIC: int __fop_init_recover __P((ENV *, DB_DISTAB *)); + */ +int +__fop_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_create_recover, DB___fop_create)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_remove_recover, DB___fop_remove)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_write_recover, DB___fop_write)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_rename_recover, DB___fop_rename)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_rename_noundo_recover, DB___fop_rename_noundo)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_file_remove_recover, DB___fop_file_remove)) != 0) + return (ret); + return (0); +} diff --git a/src/fileops/fileops_autop.c b/src/fileops/fileops_autop.c new file mode 100644 index 00000000..6e271a17 --- /dev/null +++ b/src/fileops/fileops_autop.c @@ -0,0 +1,177 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" +#include "dbinc/fop.h" + +/* + * PUBLIC: int __fop_create_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_create_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_create_42", __fop_create_42_desc, info)); +} + +/* + * PUBLIC: int __fop_create_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_create_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_create", __fop_create_desc, info)); +} + +/* + * PUBLIC: int __fop_remove_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_remove_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_remove", __fop_remove_desc, info)); +} + +/* + * PUBLIC: int __fop_write_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_write_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_write_42", __fop_write_42_desc, info)); +} + +/* + * PUBLIC: int __fop_write_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_write_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_write", __fop_write_desc, info)); +} + +/* + * PUBLIC: int __fop_rename_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_rename_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_rename_42", __fop_rename_42_desc, info)); +} + +/* + * PUBLIC: int __fop_rename_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_rename_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_rename", __fop_rename_desc, info)); +} + +/* + * PUBLIC: int __fop_file_remove_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_file_remove_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_file_remove", __fop_file_remove_desc, info)); +} + +/* + * PUBLIC: int __fop_init_print __P((ENV *, DB_DISTAB *)); + */ +int +__fop_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_create_print, DB___fop_create)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_remove_print, DB___fop_remove)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_write_print, DB___fop_write)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_rename_print, DB___fop_rename)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_rename_print, DB___fop_rename_noundo)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_file_remove_print, DB___fop_file_remove)) != 0) + return (ret); + return (0); +} diff --git a/src/fileops/fop_basic.c b/src/fileops/fop_basic.c new file mode 100644 index 00000000..0de5b788 --- /dev/null +++ b/src/fileops/fop_basic.c @@ -0,0 +1,318 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/fop.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" +#include "dbinc/db_am.h" + +/* + * The transactional guarantees Berkeley DB provides for file + * system level operations (database physical file create, delete, + * rename) are based on our understanding of current file system + * semantics; a system that does not provide these semantics and + * guarantees could be in danger. + * + * First, as in standard database changes, fsync and fdatasync must + * work: when applied to the log file, the records written into the + * log must be transferred to stable storage. + * + * Second, it must not be possible for the log file to be removed + * without previous file system level operations being flushed to + * stable storage. Berkeley DB applications write log records + * describing file system operations into the log, then perform the + * file system operation, then commit the enclosing transaction + * (which flushes the log file to stable storage). Subsequently, + * a database environment checkpoint may make it possible for the + * application to remove the log file containing the record of the + * file system operation. DB's transactional guarantees for file + * system operations require the log file removal not succeed until + * all previous filesystem operations have been flushed to stable + * storage. In other words, the flush of the log file, or the + * removal of the log file, must block until all previous + * filesystem operations have been flushed to stable storage. This + * semantic is not, as far as we know, required by any existing + * standards document, but we have never seen a filesystem where + * it does not apply. + */ + +/* + * __fop_create -- + * Create a (transactionally protected) file system object. This is used + * to create DB files now, potentially blobs, queue extents and anything + * else you wish to store in a file system object. + * + * PUBLIC: int __fop_create __P((ENV *, DB_TXN *, + * PUBLIC: DB_FH **, const char *, const char **, APPNAME, int, u_int32_t)); + */ +int +__fop_create(env, txn, fhpp, name, dirp, appname, mode, flags) + ENV *env; + DB_TXN *txn; + DB_FH **fhpp; + const char *name, **dirp; + APPNAME appname; + int mode; + u_int32_t flags; +{ + DBT data, dirdata; + DB_FH *fhp; + DB_LSN lsn; + int ret; + char *real_name; + + real_name = NULL; + fhp = NULL; + + if ((ret = __db_appname(env, appname, name, dirp, &real_name)) != 0) + return (ret); + + if (mode == 0) + mode = DB_MODE_600; + + if (DBENV_LOGGING(env) +#if !defined(DEBUG_WOP) + && txn != NULL +#endif + ) { + DB_INIT_DBT(data, name, strlen(name) + 1); + if (dirp != NULL && *dirp != NULL) + DB_INIT_DBT(dirdata, *dirp, strlen(*dirp) + 1); + else + memset(&dirdata, 0, sizeof(dirdata)); + if ((ret = __fop_create_log(env, txn, &lsn, + flags | DB_FLUSH, + &data, &dirdata, (u_int32_t)appname, (u_int32_t)mode)) != 0) + goto err; + } + + DB_ENV_TEST_RECOVERY(env, DB_TEST_POSTLOG, ret, name); + + if (fhpp == NULL) + fhpp = &fhp; + ret = __os_open( + env, real_name, 0, DB_OSO_CREATE | DB_OSO_EXCL, mode, fhpp); + +err: +DB_TEST_RECOVERY_LABEL + if (fhpp == &fhp && fhp != NULL) + (void)__os_closehandle(env, fhp); + if (real_name != NULL) + __os_free(env, real_name); + return (ret); +} + +/* + * __fop_remove -- + * Remove a file system object. + * + * PUBLIC: int __fop_remove __P((ENV *, DB_TXN *, + * PUBLIC: u_int8_t *, const char *, const char **, APPNAME, u_int32_t)); + */ +int +__fop_remove(env, txn, fileid, name, dirp, appname, flags) + ENV *env; + DB_TXN *txn; + u_int8_t *fileid; + const char *name, **dirp; + APPNAME appname; + u_int32_t flags; +{ + DBT fdbt, ndbt; + DB_LSN lsn; + char *real_name; + int ret; + + real_name = NULL; + + if ((ret = __db_appname(env, appname, name, dirp, &real_name)) != 0) + goto err; + + if (!IS_REAL_TXN(txn)) { + if (fileid != NULL && (ret = __memp_nameop( + env, fileid, NULL, real_name, NULL, 0)) != 0) + goto err; + } else { + if (DBENV_LOGGING(env) +#if !defined(DEBUG_WOP) + && txn != NULL +#endif + ) { + memset(&fdbt, 0, sizeof(ndbt)); + fdbt.data = fileid; + fdbt.size = fileid == NULL ? 0 : DB_FILE_ID_LEN; + DB_INIT_DBT(ndbt, name, strlen(name) + 1); + if ((ret = __fop_remove_log(env, txn, &lsn, + flags, &ndbt, &fdbt, (u_int32_t)appname)) != 0) + goto err; + } + ret = __txn_remevent(env, txn, real_name, fileid, 0); + } + +err: if (real_name != NULL) + __os_free(env, real_name); + return (ret); +} + +/* + * __fop_write + * + * Write "size" bytes from "buf" to file "name" beginning at offset "off." + * If the file is open, supply a handle in fhp. Istmp indicate if this is + * an operation that needs to be undone in the face of failure (i.e., if + * this is a write to a temporary file, we're simply going to remove the + * file, so don't worry about undoing the write). + * + * Currently, we *only* use this with istmp true. If we need more general + * handling, then we'll have to zero out regions on abort (and possibly + * log the before image of the data in the log record). + * + * PUBLIC: int __fop_write __P((ENV *, DB_TXN *, + * PUBLIC: const char *, const char *, APPNAME, DB_FH *, u_int32_t, + * PUBLIC: db_pgno_t, u_int32_t, void *, u_int32_t, u_int32_t, u_int32_t)); + */ +int +__fop_write(env, txn, + name, dirname, appname, fhp, pgsize, pageno, off, buf, size, istmp, flags) + ENV *env; + DB_TXN *txn; + const char *name, *dirname; + APPNAME appname; + DB_FH *fhp; + u_int32_t pgsize; + db_pgno_t pageno; + u_int32_t off; + void *buf; + u_int32_t size, istmp, flags; +{ + DBT data, namedbt, dirdbt; + DB_LSN lsn; + size_t nbytes; + int local_open, ret, t_ret; + char *real_name; + + DB_ASSERT(env, istmp != 0); + + ret = local_open = 0; + real_name = NULL; + + if (DBENV_LOGGING(env) +#if !defined(DEBUG_WOP) + && txn != NULL +#endif + ) { + memset(&data, 0, sizeof(data)); + data.data = buf; + data.size = size; + DB_INIT_DBT(namedbt, name, strlen(name) + 1); + if (dirname != NULL) + DB_INIT_DBT(dirdbt, dirname, strlen(dirname) + 1); + else + memset(&dirdbt, 0, sizeof(dirdbt)); + if ((ret = __fop_write_log(env, txn, + &lsn, flags, &namedbt, &dirdbt, (u_int32_t)appname, + pgsize, pageno, off, &data, istmp)) != 0) + goto err; + } + + if (fhp == NULL) { + /* File isn't open; we need to reopen it. */ + if ((ret = __db_appname(env, + appname, name, &dirname, &real_name)) != 0) + return (ret); + + if ((ret = __os_open(env, real_name, 0, 0, 0, &fhp)) != 0) + goto err; + local_open = 1; + } + + /* Seek to offset. */ + if ((ret = __os_seek(env, fhp, pageno, pgsize, off)) != 0) + goto err; + + /* Now do the write. */ + if ((ret = __os_write(env, fhp, buf, size, &nbytes)) != 0) + goto err; + +err: if (local_open && + (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0) + ret = t_ret; + + if (real_name != NULL) + __os_free(env, real_name); + return (ret); +} + +/* + * __fop_rename -- + * Change a file's name. + * + * PUBLIC: int __fop_rename __P((ENV *, DB_TXN *, const char *, const char *, + * PUBLIC: const char **, u_int8_t *, APPNAME, int, u_int32_t)); + */ +int +__fop_rename(env, txn, oldname, newname, dirp, fid, appname, with_undo, flags) + ENV *env; + DB_TXN *txn; + const char *oldname; + const char *newname; + const char **dirp; + u_int8_t *fid; + APPNAME appname; + int with_undo; + u_int32_t flags; +{ + DBT fiddbt, dir, new, old; + DB_LSN lsn; + int ret; + char *n, *o; + + o = n = NULL; + if ((ret = __db_appname(env, appname, oldname, dirp, &o)) != 0) + goto err; + if ((ret = __db_appname(env, appname, newname, dirp, &n)) != 0) + goto err; + + if (DBENV_LOGGING(env) +#if !defined(DEBUG_WOP) + && txn != NULL +#endif + ) { + DB_INIT_DBT(old, oldname, strlen(oldname) + 1); + DB_INIT_DBT(new, newname, strlen(newname) + 1); + if (dirp != NULL && *dirp != NULL) + DB_INIT_DBT(dir, *dirp, strlen(*dirp) + 1); + else + memset(&dir, 0, sizeof(dir)); + memset(&fiddbt, 0, sizeof(fiddbt)); + fiddbt.data = fid; + fiddbt.size = DB_FILE_ID_LEN; + if (with_undo) + ret = __fop_rename_log(env, + txn, &lsn, flags | DB_FLUSH, + &old, &new, &dir, &fiddbt, (u_int32_t)appname); + else + ret = __fop_rename_noundo_log(env, + txn, &lsn, flags | DB_FLUSH, + &old, &new, &dir, &fiddbt, (u_int32_t)appname); + if (ret != 0) + goto err; + } + + ret = __memp_nameop(env, fid, newname, o, n, 0); + +err: if (o != NULL) + __os_free(env, o); + if (n != NULL) + __os_free(env, n); + return (ret); +} diff --git a/src/fileops/fop_rec.c b/src/fileops/fop_rec.c new file mode 100644 index 00000000..dcbd022d --- /dev/null +++ b/src/fileops/fop_rec.c @@ -0,0 +1,697 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/fop.h" +#include "dbinc/db_am.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __fop_rename_recover_int + __P((ENV *, DBT *, DB_LSN *, db_recops, void *, int)); +static int __fop_rename_42_recover_int + __P((ENV *, DBT *, DB_LSN *, db_recops, void *, int)); + +/* + * The transactional guarantees Berkeley DB provides for file + * system level operations (database physical file create, delete, + * rename) are based on our understanding of current file system + * semantics; a system that does not provide these semantics and + * guarantees could be in danger. + * + * First, as in standard database changes, fsync and fdatasync must + * work: when applied to the log file, the records written into the + * log must be transferred to stable storage. + * + * Second, it must not be possible for the log file to be removed + * without previous file system level operations being flushed to + * stable storage. Berkeley DB applications write log records + * describing file system operations into the log, then perform the + * file system operation, then commit the enclosing transaction + * (which flushes the log file to stable storage). Subsequently, + * a database environment checkpoint may make it possible for the + * application to remove the log file containing the record of the + * file system operation. DB's transactional guarantees for file + * system operations require the log file removal not succeed until + * all previous filesystem operations have been flushed to stable + * storage. In other words, the flush of the log file, or the + * removal of the log file, must block until all previous + * filesystem operations have been flushed to stable storage. This + * semantic is not, as far as we know, required by any existing + * standards document, but we have never seen a filesystem where + * it does not apply. + */ + +/* + * __fop_create_recover -- + * Recovery function for create. + * + * PUBLIC: int __fop_create_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_create_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_create_args *argp; + DB_FH *fhp; + DBMETA *meta; + u_int8_t mbuf[DBMETASIZE]; + int ret; + char *real_name; + const char *dirname; + + COMPQUIET(info, NULL); + + real_name = NULL; + REC_PRINT(__fop_create_print); + REC_NOOP_INTRO(__fop_create_read); + meta = (DBMETA *)mbuf; + + if (argp->dirname.size == 0) + dirname = NULL; + else + dirname = (const char *)argp->dirname.data; + + if ((ret = __db_appname(env, (APPNAME)argp->appname == DB_APP_DATA ? + DB_APP_RECOVER : (APPNAME)argp->appname, + (const char *)argp->name.data, &dirname, &real_name)) != 0) + goto out; + + if (DB_UNDO(op)) { + /* + * If the file was opened in mpool, we must mark it as + * dead via nameop which will also unlink the file. + */ + if (__os_open(env, real_name, 0, 0, 0, &fhp) == 0) { + if (__fop_read_meta(env, + real_name, mbuf, DBMETASIZE, fhp, 1, NULL) == 0 && + __db_chk_meta(env, NULL, meta, 1) == 0) { + if ((ret = __memp_nameop(env, + meta->uid, NULL, real_name, NULL, 0)) != 0) + goto out; + } else { + (void)__os_closehandle(env, fhp); + goto do_unlink; + } + (void)__os_closehandle(env, fhp); + } else +do_unlink: (void)__os_unlink(env, real_name, 0); + } else if (DB_REDO(op)) { + if ((ret = __os_open(env, real_name, 0, + DB_OSO_CREATE, (int)argp->mode, &fhp)) == 0) + (void)__os_closehandle(env, fhp); + else + goto out; + } + + *lsnp = argp->prev_lsn; + +out: if (real_name != NULL) + __os_free(env, real_name); + + REC_NOOP_CLOSE; +} + +/* + * __fop_create_42_recover -- + * Recovery function for create. + * + * PUBLIC: int __fop_create_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_create_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_create_args *argp; + DB_FH *fhp; + DBMETA *meta; + u_int8_t mbuf[DBMETASIZE]; + int ret; + char *real_name; + + COMPQUIET(info, NULL); + + real_name = NULL; + REC_PRINT(__fop_create_print); + REC_NOOP_INTRO(__fop_create_read); + meta = (DBMETA *)mbuf; + + if ((ret = __db_appname(env, (APPNAME)argp->appname, + (const char *)argp->name.data, NULL, &real_name)) != 0) + goto out; + + if (DB_UNDO(op)) { + /* + * If the file was opened in mpool, we must mark it as + * dead via nameop which will also unlink the file. + */ + if (__os_open(env, real_name, 0, 0, 0, &fhp) == 0) { + if (__fop_read_meta(env, + real_name, mbuf, DBMETASIZE, fhp, 1, NULL) == 0 && + __db_chk_meta(env, NULL, meta, 1) == 0) { + if ((ret = __memp_nameop(env, + meta->uid, NULL, real_name, NULL, 0)) != 0) + goto out; + } else + goto do_unlink; + (void)__os_closehandle(env, fhp); + } else +do_unlink: (void)__os_unlink(env, real_name, 0); + } else if (DB_REDO(op)) { + if ((ret = __os_open(env, real_name, 0, + DB_OSO_CREATE, (int)argp->mode, &fhp)) == 0) + (void)__os_closehandle(env, fhp); + else + goto out; + } + + *lsnp = argp->prev_lsn; + +out: if (real_name != NULL) + __os_free(env, real_name); + + REC_NOOP_CLOSE; +} + +/* + * __fop_remove_recover -- + * Recovery function for remove. + * + * PUBLIC: int __fop_remove_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_remove_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_remove_args *argp; + int ret; + char *real_name; + + COMPQUIET(info, NULL); + + real_name = NULL; + REC_PRINT(__fop_remove_print); + REC_NOOP_INTRO(__fop_remove_read); + + if ((ret = __db_appname(env, (APPNAME)argp->appname, + (const char *)argp->name.data, NULL, &real_name)) != 0) + goto out; + + /* Its ok if the file is not there. */ + if (DB_REDO(op)) + (void)__memp_nameop(env, + (u_int8_t *)argp->fid.data, NULL, real_name, NULL, 0); + + *lsnp = argp->prev_lsn; +out: if (real_name != NULL) + __os_free(env, real_name); + REC_NOOP_CLOSE; +} + +/* + * __fop_write_recover -- + * Recovery function for writechunk. + * + * PUBLIC: int __fop_write_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_write_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_write_args *argp; + int ret; + + COMPQUIET(info, NULL); + + REC_PRINT(__fop_write_print); + REC_NOOP_INTRO(__fop_write_read); + + ret = 0; + if (DB_UNDO(op)) + DB_ASSERT(env, argp->flag != 0); + else if (DB_REDO(op)) + ret = __fop_write(env, + argp->txnp, argp->name.data, + argp->dirname.size == 0 ? NULL : argp->dirname.data, + (APPNAME)argp->appname == DB_APP_DATA ? DB_APP_RECOVER : + (APPNAME)argp->appname, + NULL, argp->pgsize, argp->pageno, argp->offset, + argp->page.data, argp->page.size, argp->flag, 0); + + if (ret == 0) + *lsnp = argp->prev_lsn; + REC_NOOP_CLOSE; +} + +/* + * __fop_write_42_recover -- + * Recovery function for writechunk. + * + * PUBLIC: int __fop_write_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_write_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_write_args *argp; + int ret; + + COMPQUIET(info, NULL); + + REC_PRINT(__fop_write_print); + REC_NOOP_INTRO(__fop_write_read); + + ret = 0; + if (DB_UNDO(op)) + DB_ASSERT(env, argp->flag != 0); + else if (DB_REDO(op)) + ret = __fop_write(env, + argp->txnp, argp->name.data, NULL, (APPNAME)argp->appname, + NULL, argp->pgsize, argp->pageno, argp->offset, + argp->page.data, argp->page.size, argp->flag, 0); + + if (ret == 0) + *lsnp = argp->prev_lsn; + REC_NOOP_CLOSE; +} + +/* + * __fop_rename_recover -- + * Recovery functions for rename. There are two variants that + * both use the same utility function. Had we known about this on day + * one, we would have simply added a parameter. However, since we need + * to retain old records for backward compatibility (online-upgrade) + * wrapping the two seems like the right solution. + * + * PUBLIC: int __fop_rename_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + * + * PUBLIC: int __fop_rename_noundo_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_rename_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + return (__fop_rename_recover_int(env, dbtp, lsnp, op, info, 1)); +} + +int +__fop_rename_noundo_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + return (__fop_rename_recover_int(env, dbtp, lsnp, op, info, 0)); +} + +static int +__fop_rename_recover_int(env, dbtp, lsnp, op, info, undo) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; + int undo; +{ + __fop_rename_args *argp; + APPNAME appname; + DB_FH *fhp; + DBMETA *meta; + u_int8_t *fileid, mbuf[DBMETASIZE]; + int ret; + char *real_new, *real_old, *src; + const char *dirname; + + COMPQUIET(info, NULL); + + fhp = NULL; + meta = (DBMETA *)&mbuf[0]; + ret = 0; + real_new = real_old = NULL; + + REC_PRINT(__fop_rename_print); + REC_NOOP_INTRO(__fop_rename_read); + fileid = argp->fileid.data; + + if (argp->dirname.size == 0) + dirname = NULL; + else + dirname = (const char *)argp->dirname.data; + + if ((APPNAME)argp->appname == DB_APP_DATA) + appname = DB_APP_RECOVER; + else + appname = (APPNAME)argp->appname; + + if ((ret = __db_appname(env, appname, (const char *)argp->newname.data, + &dirname, &real_new)) != 0) + goto out; + if ((ret = __db_appname(env, appname, (const char *)argp->oldname.data, + &dirname, &real_old)) != 0) + goto out; + + /* + * Verify that we are manipulating the correct file. We should always + * be OK on an ABORT or an APPLY, but during recovery, we have to + * check. + */ + if (op != DB_TXN_ABORT && op != DB_TXN_APPLY) { + src = DB_UNDO(op) ? real_new : real_old; + /* + * Interpret any error as meaning that the file either doesn't + * exist, doesn't have a meta-data page, or is in some other + * way, shape or form, incorrect, so that we should not restore + * it. + */ + if (__os_open(env, src, 0, 0, 0, &fhp) != 0) + goto done; + if (__fop_read_meta(env, + src, mbuf, DBMETASIZE, fhp, 1, NULL) != 0) + goto done; + if (__db_chk_meta(env, NULL, meta, 1) != 0) + goto done; + if (memcmp(argp->fileid.data, meta->uid, DB_FILE_ID_LEN) != 0) + goto done; + (void)__os_closehandle(env, fhp); + fhp = NULL; + if (DB_REDO(op)) { + /* + * Check to see if the target file exists. If it + * does and it does not have the proper id then + * it is a later version. We just remove the source + * file since the state of the world is beyond this + * point. + */ + if (__os_open(env, real_new, 0, 0, 0, &fhp) == 0 && + __fop_read_meta(env, src, mbuf, + DBMETASIZE, fhp, 1, NULL) == 0 && + __db_chk_meta(env, NULL, meta, 1) == 0 && + memcmp(argp->fileid.data, + meta->uid, DB_FILE_ID_LEN) != 0) { + (void)__memp_nameop(env, + fileid, NULL, real_old, NULL, 0); + goto done; + } + } + } + + if (undo && DB_UNDO(op)) + (void)__memp_nameop(env, fileid, + (const char *)argp->oldname.data, real_new, real_old, 0); + if (DB_REDO(op)) + (void)__memp_nameop(env, fileid, + (const char *)argp->newname.data, real_old, real_new, 0); + +done: *lsnp = argp->prev_lsn; +out: if (real_new != NULL) + __os_free(env, real_new); + if (real_old != NULL) + __os_free(env, real_old); + if (fhp != NULL) + (void)__os_closehandle(env, fhp); + + REC_NOOP_CLOSE; +} +/* + * __fop_rename_42_recover -- + * Recovery functions for rename. There are two variants that + * both use the same utility function. Had we known about this on day + * one, we would have simply added a parameter. However, since we need + * to retain old records for backward compatibility (online-upgrade) + * wrapping the two seems like the right solution. + * + * PUBLIC: int __fop_rename_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + * + * PUBLIC: int __fop_rename_noundo_46_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_rename_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + return (__fop_rename_42_recover_int(env, dbtp, lsnp, op, info, 1)); +} + +int +__fop_rename_noundo_46_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + return (__fop_rename_42_recover_int(env, dbtp, lsnp, op, info, 0)); +} + +static int +__fop_rename_42_recover_int(env, dbtp, lsnp, op, info, undo) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; + int undo; +{ + __fop_rename_args *argp; + DB_FH *fhp; + DBMETA *meta; + u_int8_t *fileid, mbuf[DBMETASIZE]; + int ret; + char *real_new, *real_old, *src; + + COMPQUIET(info, NULL); + + fhp = NULL; + meta = (DBMETA *)&mbuf[0]; + ret = 0; + real_new = real_old = NULL; + + REC_PRINT(__fop_rename_print); + REC_NOOP_INTRO(__fop_rename_read); + fileid = argp->fileid.data; + + if ((ret = __db_appname(env, (APPNAME)argp->appname, + (const char *)argp->newname.data, NULL, &real_new)) != 0) + goto out; + if ((ret = __db_appname(env, (APPNAME)argp->appname, + (const char *)argp->oldname.data, NULL, &real_old)) != 0) + goto out; + + /* + * Verify that we are manipulating the correct file. We should always + * be OK on an ABORT or an APPLY, but during recovery, we have to + * check. + */ + if (op != DB_TXN_ABORT && op != DB_TXN_APPLY) { + src = DB_UNDO(op) ? real_new : real_old; + /* + * Interpret any error as meaning that the file either doesn't + * exist, doesn't have a meta-data page, or is in some other + * way, shape or form, incorrect, so that we should not restore + * it. + */ + if (__os_open(env, src, 0, 0, 0, &fhp) != 0) + goto done; + if (__fop_read_meta(env, + src, mbuf, DBMETASIZE, fhp, 1, NULL) != 0) + goto done; + if (__db_chk_meta(env, NULL, meta, 1) != 0) + goto done; + if (memcmp(argp->fileid.data, meta->uid, DB_FILE_ID_LEN) != 0) + goto done; + (void)__os_closehandle(env, fhp); + fhp = NULL; + if (DB_REDO(op)) { + /* + * Check to see if the target file exists. If it + * does and it does not have the proper id then + * it is a later version. We just remove the source + * file since the state of the world is beyond this + * point. + */ + if (__os_open(env, real_new, 0, 0, 0, &fhp) == 0 && + __fop_read_meta(env, src, mbuf, + DBMETASIZE, fhp, 1, NULL) == 0 && + __db_chk_meta(env, NULL, meta, 1) == 0 && + memcmp(argp->fileid.data, + meta->uid, DB_FILE_ID_LEN) != 0) { + (void)__memp_nameop(env, + fileid, NULL, real_old, NULL, 0); + goto done; + } + } + } + + if (undo && DB_UNDO(op)) + (void)__memp_nameop(env, fileid, + (const char *)argp->oldname.data, real_new, real_old, 0); + if (DB_REDO(op)) + (void)__memp_nameop(env, fileid, + (const char *)argp->newname.data, real_old, real_new, 0); + +done: *lsnp = argp->prev_lsn; +out: if (real_new != NULL) + __os_free(env, real_new); + if (real_old != NULL) + __os_free(env, real_old); + if (fhp != NULL) + (void)__os_closehandle(env, fhp); + + REC_NOOP_CLOSE; +} + +/* + * __fop_file_remove_recover -- + * Recovery function for file_remove. On the REDO pass, we need to + * make sure no one recreated the file while we weren't looking. On an + * undo pass must check if the file we are interested in is the one that + * exists and then set the status of the child transaction depending on + * what we find out. + * + * PUBLIC: int __fop_file_remove_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_file_remove_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_file_remove_args *argp; + DBMETA *meta; + DB_FH *fhp; + size_t len; + u_int8_t mbuf[DBMETASIZE]; + u_int32_t cstat, ret_stat; + int is_real, is_tmp, ret; + char *real_name; + + fhp = NULL; + meta = (DBMETA *)&mbuf[0]; + is_real = is_tmp = 0; + real_name = NULL; + REC_PRINT(__fop_file_remove_print); + REC_NOOP_INTRO(__fop_file_remove_read); + + /* + * This record is only interesting on the backward, forward, and + * apply phases. + */ + if (op != DB_TXN_BACKWARD_ROLL && + op != DB_TXN_FORWARD_ROLL && op != DB_TXN_APPLY) + goto done; + + if ((ret = __db_appname(env, (APPNAME)argp->appname, + argp->name.data, NULL, &real_name)) != 0) + goto out; + + /* Verify that we are manipulating the correct file. */ + len = 0; + if (__os_open(env, real_name, 0, 0, 0, &fhp) != 0 || + (ret = __fop_read_meta(env, real_name, + mbuf, DBMETASIZE, fhp, 1, &len)) != 0) { + /* + * If len is non-zero, then the file exists and has something + * in it, but that something isn't a full meta-data page, so + * this is very bad. Bail out! + */ + if (len != 0) + goto out; + + /* File does not exist. */ + cstat = TXN_EXPECTED; + } else { + /* + * We can ignore errors here since we'll simply fail the + * checks below and assume this is the wrong file. + */ + (void)__db_chk_meta(env, NULL, meta, 1); + is_real = + memcmp(argp->real_fid.data, meta->uid, DB_FILE_ID_LEN) == 0; + is_tmp = + memcmp(argp->tmp_fid.data, meta->uid, DB_FILE_ID_LEN) == 0; + + if (!is_real && !is_tmp) + /* File exists, but isn't what we were removing. */ + cstat = TXN_IGNORE; + else + /* File exists and is the one that we were removing. */ + cstat = TXN_COMMIT; + } + if (fhp != NULL) { + (void)__os_closehandle(env, fhp); + fhp = NULL; + } + + if (DB_UNDO(op)) { + /* On the backward pass, we leave a note for the child txn. */ + if ((ret = __db_txnlist_update(env, + info, argp->child, cstat, NULL, &ret_stat, 1)) != 0) + goto out; + } else if (DB_REDO(op)) { + /* + * On the forward pass, check if someone recreated the + * file while we weren't looking. + */ + if (cstat == TXN_COMMIT) + (void)__memp_nameop(env, + is_real ? argp->real_fid.data : argp->tmp_fid.data, + NULL, real_name, NULL, 0); + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (real_name != NULL) + __os_free(env, real_name); + if (fhp != NULL) + (void)__os_closehandle(env, fhp); + REC_NOOP_CLOSE; +} diff --git a/src/fileops/fop_util.c b/src/fileops/fop_util.c new file mode 100644 index 00000000..aeb00249 --- /dev/null +++ b/src/fileops/fop_util.c @@ -0,0 +1,1770 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/hash.h" +#include "dbinc/fop.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __fop_set_pgsize __P((DB *, DB_FH *, const char *)); +static int __fop_inmem_create __P((DB *, const char *, DB_TXN *, u_int32_t)); +static int __fop_inmem_dummy __P((DB *, DB_TXN *, const char *, u_int8_t *)); +static int __fop_inmem_read_meta __P((DB *, DB_TXN *, const char *, u_int32_t)); +static int __fop_inmem_swap __P((DB *, DB *, DB_TXN *, + const char *, const char *, const char *, DB_LOCKER *)); +static int __fop_ondisk_dummy __P((DB *, DB_TXN *, const char *, u_int8_t *)); +static int __fop_ondisk_swap __P((DB *, DB *, DB_TXN *, + const char *, const char *, const char *, DB_LOCKER *)); + +/* + * Acquire the environment meta-data lock. The parameters are the + * environment (ENV), the locker id to use in acquiring the lock (ID) + * and a pointer to a DB_LOCK. + * + * !!! + * Turn off locking for Critical Path. The application must do its own + * synchronization of open/create. Two threads creating and opening a + * file at the same time may have unpredictable results. + */ +#ifdef CRITICALPATH_10266 +#define GET_ENVLOCK(ENV, ID, L) (0) +#else +#define GET_ENVLOCK(ENV, ID, L) do { \ + DBT __dbt; \ + u_int32_t __lockval; \ + \ + if (LOCKING_ON((ENV))) { \ + __lockval = 1; \ + __dbt.data = &__lockval; \ + __dbt.size = sizeof(__lockval); \ + if ((ret = __lock_get((ENV), (ID), \ + 0, &__dbt, DB_LOCK_WRITE, (L))) != 0) \ + goto err; \ + } \ +} while (0) +#endif + +#define RESET_MPF(D, F) do { \ + (void)__memp_fclose((D)->mpf, (F)); \ + (D)->mpf = NULL; \ + F_CLR((D), DB_AM_OPEN_CALLED); \ + if ((ret = __memp_fcreate((D)->env, &(D)->mpf)) != 0) \ + goto err; \ +} while (0) + +/* + * If we open a file handle and our caller is doing fcntl(2) locking, + * we can't close the handle because that would discard the caller's + * lock. Save it until we close or refresh the DB handle. + */ +#define CLOSE_HANDLE(D, F) { \ + if ((F) != NULL) { \ + if (LF_ISSET(DB_FCNTL_LOCKING)) \ + (D)->saved_open_fhp = (F); \ + else if ((t_ret = \ + __os_closehandle((D)->env, (F))) != 0) { \ + if (ret == 0) \ + ret = t_ret; \ + goto err; \ + } \ + (F) = NULL; \ + } \ +} + +/* + * __fop_lock_handle -- + * + * Get the handle lock for a database. If the envlock is specified, do this + * as a lock_vec call that releases the environment lock before acquiring the + * handle lock. + * + * PUBLIC: int __fop_lock_handle __P((ENV *, + * PUBLIC: DB *, DB_LOCKER *, db_lockmode_t, DB_LOCK *, u_int32_t)); + * + */ +int +__fop_lock_handle(env, dbp, locker, mode, elockp, flags) + ENV *env; + DB *dbp; + DB_LOCKER *locker; + db_lockmode_t mode; + DB_LOCK *elockp; + u_int32_t flags; +{ + DBT fileobj; + DB_LOCKREQ reqs[2], *ereq; + DB_LOCK_ILOCK lock_desc; + int ret; + + if (!LOCKING_ON(env) || + F_ISSET(dbp, DB_AM_COMPENSATE | DB_AM_RECOVER)) + return (0); + + /* + * If we are in recovery, the only locking we should be + * doing is on the global environment. + */ + if (IS_RECOVERING(env)) + return (elockp == NULL ? 0 : __ENV_LPUT(env, *elockp)); + + memcpy(lock_desc.fileid, dbp->fileid, DB_FILE_ID_LEN); + lock_desc.pgno = dbp->meta_pgno; + lock_desc.type = DB_HANDLE_LOCK; + + memset(&fileobj, 0, sizeof(fileobj)); + fileobj.data = &lock_desc; + fileobj.size = sizeof(lock_desc); + DB_TEST_SUBLOCKS(env, flags); + if (elockp == NULL) + ret = __lock_get(env, locker, + flags, &fileobj, mode, &dbp->handle_lock); + else { + reqs[0].op = DB_LOCK_PUT; + reqs[0].lock = *elockp; + reqs[1].op = DB_LOCK_GET; + reqs[1].mode = mode; + reqs[1].obj = &fileobj; + reqs[1].timeout = 0; + if ((ret = __lock_vec(env, + locker, flags, reqs, 2, &ereq)) == 0) { + dbp->handle_lock = reqs[1].lock; + if (elockp != &dbp->handle_lock) + LOCK_INIT(*elockp); + } else if (ereq != reqs) + LOCK_INIT(*elockp); + } + + dbp->cur_locker = locker; + return (ret); +} + +/* + * __fop_file_setup -- + * + * Perform all the needed checking and locking to open up or create a + * file. + * + * There's a reason we don't push this code down into the buffer cache. + * The problem is that there's no information external to the file that + * we can use as a unique ID. UNIX has dev/inode pairs, but they are + * not necessarily unique after reboot, if the file was mounted via NFS. + * Windows has similar problems, as the FAT filesystem doesn't maintain + * dev/inode numbers across reboot. So, we must get something from the + * file we can use to ensure that, even after a reboot, the file we're + * joining in the cache is the right file for us to join. The solution + * we use is to maintain a file ID that's stored in the database, and + * that's why we have to open and read the file before calling into the + * buffer cache or obtaining a lock (we use this unique fileid to lock + * as well as to identify like files in the cache). + * + * There are a couple of idiosyncrasies that this code must support, in + * particular, DB_TRUNCATE and DB_FCNTL_LOCKING. First, we disallow + * DB_TRUNCATE in the presence of transactions, since opening a file with + * O_TRUNC will result in data being lost in an unrecoverable fashion. + * We also disallow DB_TRUNCATE if locking is enabled, because even in + * the presence of locking, we cannot avoid race conditions, so allowing + * DB_TRUNCATE with locking would be misleading. See SR [#7345] for more + * details. + * + * However, if you are running with neither locking nor transactions, then + * you can specify DB_TRUNCATE, and if you do so, we will truncate the file + * regardless of its contents. + * + * FCNTL locking introduces another set of complications. First, the only + * reason we support the DB_FCNTL_LOCKING flag is for historic compatibility + * with programs like Sendmail and Postfix. In these cases, the caller may + * already have a lock on the file; we need to make sure that any file handles + * we open remain open, because if we were to close them, the lock held by the + * caller would go away. Furthermore, Sendmail and/or Postfix need the ability + * to create databases in empty files. So, when you're doing FCNTL locking, + * it's reasonable that you are trying to create a database into a 0-length + * file and we allow it, while under normal conditions, we do not create + * databases if the files already exist and are not Berkeley DB files. + * + * PUBLIC: int __fop_file_setup __P((DB *, DB_THREAD_INFO *ip, + * PUBLIC: DB_TXN *, const char *, int, u_int32_t, u_int32_t *)); + */ +int +__fop_file_setup(dbp, ip, txn, name, mode, flags, retidp) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name; + int mode; + u_int32_t flags, *retidp; +{ + DBTYPE save_type; + DB_FH *fhp; + DB_LOCK elock; + DB_LOCKER *locker; + DB_TXN *stxn; + ENV *env; + size_t len; + APPNAME aflags; + u_int32_t dflags, oflags; + u_int8_t mbuf[DBMETASIZE]; + int created_locker, create_ok, ret, retries, t_ret, tmp_created; + int truncating, was_inval; + char *real_name, *real_tmpname, *tmpname; + + *retidp = TXN_INVALID; + + env = dbp->env; + fhp = NULL; + LOCK_INIT(elock); + stxn = NULL; + created_locker = tmp_created = truncating = was_inval = 0; + real_name = real_tmpname = tmpname = NULL; + dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0; + aflags = LF_ISSET(DB_INTERNAL_DB) ? DB_APP_NONE : DB_APP_DATA; + LF_CLR(DB_INTERNAL_DB); + + ret = 0; + retries = 0; + save_type = dbp->type; + + /* + * Get a lockerid for this handle. There are paths through queue + * rename and remove where this dbp already has a locker, so make + * sure we don't clobber it and conflict. + */ + if (LOCKING_ON(env) && + !F_ISSET(dbp, DB_AM_COMPENSATE) && + !F_ISSET(dbp, DB_AM_RECOVER) && + dbp->locker == DB_LOCK_INVALIDID) { + if ((ret = __lock_id(env, NULL, &dbp->locker)) != 0) + goto err; + created_locker = 1; + } + LOCK_INIT(dbp->handle_lock); + + if (txn != NULL && dbp->locker != NULL && F_ISSET(txn, TXN_INFAMILY)) { + if ((ret = __lock_addfamilylocker(env, + txn->txnid, dbp->locker->id, 1)) != 0) + goto err; + txn = NULL; + } + + locker = txn == NULL ? dbp->locker : txn->locker; + + oflags = 0; + if (F_ISSET(dbp, DB_AM_INMEM)) + real_name = (char *)name; + else { + /* Get the real backing file name. */ + if ((ret = __db_appname(env, + aflags, name, &dbp->dirname, &real_name)) != 0) + goto err; + + /* Fill in the default file mode. */ + if (mode == 0) + mode = DB_MODE_660; + + if (LF_ISSET(DB_RDONLY)) + oflags |= DB_OSO_RDONLY; + if (LF_ISSET(DB_TRUNCATE)) + oflags |= DB_OSO_TRUNC; + } + + retries = 0; + create_ok = LF_ISSET(DB_CREATE); + LF_CLR(DB_CREATE); + +retry: + /* + * If we cannot create the file, only retry a few times. We + * think we might be in a race with another create, but it could + * be that the backup filename exists (that is, is left over from + * a previous crash). + */ + if (++retries > DB_RETRY) { + __db_errx(env, DB_STR_A("0002", + "__fop_file_setup: Retry limit (%d) exceeded", "%d"), + DB_RETRY); + goto err; + } + if (!F_ISSET(dbp, DB_AM_COMPENSATE) && !F_ISSET(dbp, DB_AM_RECOVER)) + GET_ENVLOCK(env, locker, &elock); + if (name == NULL) + ret = ENOENT; + else if (F_ISSET(dbp, DB_AM_INMEM)) { + ret = __env_mpool(dbp, name, flags); + /* + * We are using __env_open as a check for existence. + * However, __env_mpool does an actual open and there + * are scenarios where the object exists, but cannot be + * opened, because our settings don't match those internally. + * We need to check for that explicitly. We'll need the + * mpool open to read the meta-data page, so we're going to + * have to temporarily turn this dbp into an UNKNOWN one. + */ + if (ret == EINVAL) { + was_inval = 1; + save_type = dbp->type; + dbp->type = DB_UNKNOWN; + ret = __env_mpool(dbp, name, flags); + dbp->type = save_type; + } + } else + ret = __os_exists(env, real_name, NULL); + + if (ret == 0) { + /* + * If the file exists, there are 5 possible cases: + * 1. DB_EXCL was specified so this is an error, unless + * this is a file left around after a rename and we + * are in the same transaction. This gets decomposed + * into several subcases, because we check for various + * errors before we know we're in rename. + * 2. We are truncating, and it doesn't matter what kind + * of file it is, we should open/create it. + * 3. It is 0-length, we are not doing transactions (i.e., + * we are sendmail), we should open/create into it. + * -- on-disk files only! + * 4. Is it a Berkeley DB file and we should simply open it. + * 5. It is not a BDB file and we should return an error. + */ + + /* Open file (if there is one). */ +reopen: if (!F_ISSET(dbp, DB_AM_INMEM) && (ret = + __os_open(env, real_name, 0, oflags, 0, &fhp)) != 0) + goto err; + + /* Case 2: DB_TRUNCATE: we must do the creation in place. */ + if (LF_ISSET(DB_TRUNCATE)) { + if (LF_ISSET(DB_EXCL)) { + /* Case 1a: DB_EXCL and DB_TRUNCATE. */ + ret = EEXIST; + goto err; + } + tmpname = (char *)name; + goto creat2; + } + + /* Cases 1,3-5: we need to read the meta-data page. */ + if (F_ISSET(dbp, DB_AM_INMEM)) { + if (LOGGING_ON(env) && (ret = __env_dbreg_setup(dbp, + txn, NULL, name, TXN_INVALID)) != 0) + return (ret); + ret = __fop_inmem_read_meta(dbp, txn, name, flags); + } else { + ret = __fop_read_meta(env, real_name, mbuf, + sizeof(mbuf), fhp, + LF_ISSET(DB_NOERROR) || + (LF_ISSET(DB_FCNTL_LOCKING) && txn == NULL) ? 1 : 0, + &len); + + /* Case 3: 0-length, no txns. */ + if (ret != 0 && len == 0 && txn == NULL) { + if (LF_ISSET(DB_EXCL)) { + /* + * Case 1b: DB_EXCL and + * 0-length file exists. + */ + ret = EEXIST; + goto err; + } + tmpname = (char *)name; + if (create_ok) + goto creat2; + goto done; + } + + /* Case 4: This is a valid file. */ + if (ret == 0) + ret = __db_meta_setup(env, dbp, real_name, + (DBMETA *)mbuf, flags, DB_CHK_META); + + } + + /* Case 5: Invalid file. */ + if (ret != 0) + goto err; + + /* Now, get our handle lock. */ + if ((ret = __fop_lock_handle(env, + dbp, locker, DB_LOCK_READ, NULL, DB_LOCK_NOWAIT)) == 0) { + if ((ret = __ENV_LPUT(env, elock)) != 0) + goto err; + } else if (ret != DB_LOCK_NOTGRANTED || + (txn != NULL && F_ISSET(txn, TXN_NOWAIT))) + goto err; + else { + PERFMON3(env, + race, fop_file_setup, (char *) name, ret, flags); + /* + * We were unable to acquire the handle lock without + * blocking. The fact that we are blocking might mean + * that someone else is trying to delete the file. + * Since some platforms cannot delete files while they + * are open (Windows), we are going to have to close + * the file. This would be a problem if we were doing + * FCNTL locking, because our closing the handle would + * release the FCNTL locks. Fortunately, if we are + * doing FCNTL locking, then we should never fail to + * acquire our handle lock, so we should never get here. + * We assert it here to make sure we aren't destroying + * any application level FCNTL semantics. + */ + DB_ASSERT(env, !LF_ISSET(DB_FCNTL_LOCKING)); + if (!F_ISSET(dbp, DB_AM_INMEM)) { + if ((ret = __os_closehandle(env, fhp)) != 0) + goto err; + fhp = NULL; + } + if ((ret = __fop_lock_handle(env, + dbp, locker, DB_LOCK_READ, &elock, 0)) != 0) { + if (F_ISSET(dbp, DB_AM_INMEM)) + RESET_MPF(dbp, 0); + goto err; + } + + /* + * If we had to wait, we might be waiting on a + * dummy file used in create/destroy of a database. + * To be sure we have the correct information we + * try again. + */ + if ((ret = __db_refresh(dbp, + txn, DB_NOSYNC, NULL, 1)) != 0) + goto err; + if ((ret = + __ENV_LPUT(env, dbp->handle_lock)) != 0) { + LOCK_INIT(dbp->handle_lock); + goto err; + } + goto retry; + + } + + /* If we got here, then we have the handle lock. */ + + /* + * Check for a file in the midst of a rename. If we find that + * the file is in the midst of a rename, it must be the case + * that it is in our current transaction (else we would still + * be blocking), so we can continue along and create a new file + * with the same name. In that case, we have to close the file + * handle because we reuse it below. This is a case where + * a 'was_inval' above is OK. + */ + if (F_ISSET(dbp, DB_AM_IN_RENAME)) { + was_inval = 0; + if (create_ok) { + if (F_ISSET(dbp, DB_AM_INMEM)) { + RESET_MPF(dbp, DB_MPOOL_DISCARD); + } else if ((ret = + __os_closehandle(env, fhp)) != 0) + goto err; + LF_SET(DB_CREATE); + goto create; + } else { + ret = ENOENT; + goto err; + } + } + + /* If we get here, a was_inval is bad. */ + if (was_inval) { + ret = EINVAL; + goto err; + } + + /* + * Now, case 1: check for DB_EXCL, because the file that exists + * is not in the middle of a rename, so we have an error. This + * is a weird case, but we need to make sure that we don't + * continue to hold the handle lock, since technically, we + * should not have been allowed to open it. + */ + if (LF_ISSET(DB_EXCL)) { + ret = __ENV_LPUT(env, dbp->handle_lock); + LOCK_INIT(dbp->handle_lock); + if (ret == 0) + ret = EEXIST; + goto err; + } + goto done; + } + + /* File does not exist. */ +#ifdef HAVE_VXWORKS + /* + * VxWorks can return file-system specific error codes if the + * file does not exist, not ENOENT. + */ + if (!create_ok) +#else + if (!create_ok || ret != ENOENT) +#endif + goto err; + LF_SET(DB_CREATE); + /* + * If we were trying to open a non-existent master database + * readonly clear that here. + */ + LF_CLR(DB_RDONLY); + F_CLR(dbp, DB_AM_RDONLY); + ret = 0; + + /* + * We need to create file, which means that we need to set up the file, + * the fileid and the locks. Then we need to call the appropriate + * routines to create meta-data pages. For in-memory files, we retain + * the environment lock, while for on-disk files, we drop the env lock + * and create into a temporary. + */ + if (!F_ISSET(dbp, DB_AM_INMEM) && + (ret = __ENV_LPUT(env, elock)) != 0) + goto err; + +create: if (txn != NULL && IS_REP_CLIENT(env) && + !F_ISSET(dbp, DB_AM_NOT_DURABLE)) { + __db_errx(env, DB_STR("0003", + "Transactional create on replication client disallowed")); + ret = EINVAL; + goto err; + } + + if (F_ISSET(dbp, DB_AM_INMEM)) { + if (LOGGING_ON(env) && (ret = + __env_dbreg_setup(dbp, txn, NULL, name, TXN_INVALID)) != 0) + return (ret); + if ((ret = __fop_inmem_create(dbp, name, txn, flags)) != 0) + return (ret); + } else { + if ((ret = __db_backup_name(env, name, txn, &tmpname)) != 0) + goto err; + if (TXN_ON(env) && txn != NULL && + (ret = __txn_begin(env, NULL, txn, &stxn, 0)) != 0) + goto err; + if ((ret = __fop_create(env, stxn, &fhp, + tmpname, &dbp->dirname, aflags, mode, dflags)) != 0) { + /* + * If no transactions, there is a race on creating the + * backup file, as the backup file name is the same for + * all processes. Wait for the other process to finish + * with the name. + */ + if (!TXN_ON(env) && ret == EEXIST) { + PERFMON3(env, + race, fop_file_setup, tmpname, ret, flags); + __os_free(env, tmpname); + tmpname = NULL; + __os_yield(env, 1, 0); + goto retry; + } + goto err; + } + tmp_created = 1; + } + +creat2: if (!F_ISSET(dbp, DB_AM_INMEM)) { + if ((ret = __db_appname(env, + aflags, tmpname, &dbp->dirname, &real_tmpname)) != 0) + goto err; + + /* Set the pagesize if it isn't yet set. */ + if (dbp->pgsize == 0 && + (ret = __fop_set_pgsize(dbp, fhp, real_tmpname)) != 0) + goto errmsg; + + /* Construct a file_id. */ + if ((ret = + __os_fileid(env, real_tmpname, 1, dbp->fileid)) != 0) + goto errmsg; + } + + if ((ret = __db_new_file(dbp, ip, + F_ISSET(dbp, DB_AM_INMEM) ? txn : stxn, fhp, tmpname)) != 0) + goto err; + + /* Output the REOPEN record after we create. */ + if (F_ISSET(dbp, DB_AM_INMEM) && dbp->log_filename != NULL && (ret = + __dbreg_log_id(dbp, txn, dbp->log_filename->id, 0)) != 0) + return (ret); + + /* + * We need to close the handle here on platforms where remove and + * rename fail if a handle is open (including Windows). + */ + CLOSE_HANDLE(dbp, fhp); + + /* + * Now move the file into place unless we are creating in place (because + * we created a database in a file that started out 0-length). If + * this is an in-memory file, we may or may not hold the environment + * lock depending on how we got here. + */ + if (!F_ISSET(dbp, DB_AM_COMPENSATE) && + !F_ISSET(dbp, DB_AM_RECOVER) && !LOCK_ISSET(elock)) + GET_ENVLOCK(env, locker, &elock); + + if (F_ISSET(dbp, DB_AM_IN_RENAME)) { + F_CLR(dbp, DB_AM_IN_RENAME); + __txn_remrem(env, txn, real_name); + } else if (name == tmpname) { + /* We created it in place. */ + } else if (!F_ISSET(dbp, DB_AM_INMEM) && + __os_exists(env, real_name, NULL) == 0) { + /* + * Someone managed to create the file; remove our temp + * and try to open the file that now exists. + */ + (void)__fop_remove(env, NULL, + dbp->fileid, tmpname, &dbp->dirname, aflags, dflags); + (void)__ENV_LPUT(env, dbp->handle_lock); + LOCK_INIT(dbp->handle_lock); + + if (stxn != NULL) { + ret = __txn_abort(stxn); + stxn = NULL; + } + if (ret != 0) + goto err; + goto reopen; + } + + if (name != NULL && (ret = __fop_lock_handle(env, + dbp, locker, DB_LOCK_WRITE, NULL, NOWAIT_FLAG(txn))) != 0) + goto err; + if (tmpname != NULL && + tmpname != name && (ret = __fop_rename(env, stxn, tmpname, + name, &dbp->dirname, dbp->fileid, aflags, 1, dflags)) != 0) + goto err; + if ((ret = __ENV_LPUT(env, elock)) != 0) + goto err; + + if (stxn != NULL) { + *retidp = stxn->txnid; + ret = __txn_commit(stxn, 0); + stxn = NULL; + } else + *retidp = TXN_INVALID; + + if (ret != 0) + goto err; + + F_SET(dbp, DB_AM_CREATED); + + if (0) { +errmsg: __db_err(env, ret, "%s", name); + +err: CLOSE_HANDLE(dbp, fhp); + if (stxn != NULL) + (void)__txn_abort(stxn); + if (tmp_created && txn == NULL) + (void)__fop_remove(env, + NULL, NULL, tmpname, NULL, aflags, dflags); + if (txn == NULL) + (void)__ENV_LPUT(env, dbp->handle_lock); + (void)__ENV_LPUT(env, elock); + if (created_locker) { + (void)__lock_id_free(env, dbp->locker); + dbp->locker = NULL; + } + } + +done: /* + * There are cases where real_name and tmpname take on the + * exact same string, so we need to make sure that we do not + * free twice. + */ + if (!truncating && tmpname != NULL && tmpname != name) + __os_free(env, tmpname); + if (real_name != name && real_name != NULL) + __os_free(env, real_name); + if (real_tmpname != NULL) + __os_free(env, real_tmpname); + CLOSE_HANDLE(dbp, fhp); + + return (ret); +} + +/* + * __fop_set_pgsize -- + * Set the page size based on file information. + */ +static int +__fop_set_pgsize(dbp, fhp, name) + DB *dbp; + DB_FH *fhp; + const char *name; +{ + ENV *env; + u_int32_t iopsize; + int ret; + + env = dbp->env; + + /* + * Use the filesystem's optimum I/O size as the pagesize if a pagesize + * not specified. Some filesystems have 64K as their optimum I/O size, + * but as that results in fairly large default caches, we limit the + * default pagesize to 16K. + */ + if ((ret = __os_ioinfo(env, name, fhp, NULL, NULL, &iopsize)) != 0) { + __db_err(env, ret, "%s", name); + return (ret); + } + if (iopsize < 512) + iopsize = 512; + if (iopsize > 16 * 1024) + iopsize = 16 * 1024; + + /* + * Sheer paranoia, but we don't want anything that's not a power-of-2 + * (we rely on that for alignment of various types on the pages), and + * we want a multiple of the sector size as well. If the value + * we got out of __os_ioinfo looks bad, use a default instead. + */ + if (!IS_VALID_PAGESIZE(iopsize)) + iopsize = DB_DEF_IOSIZE; + + dbp->pgsize = iopsize; + F_SET(dbp, DB_AM_PGDEF); + + return (0); +} + +/* + * __fop_subdb_setup -- + * + * Subdb setup is significantly simpler than file setup. In terms of + * locking, for the duration of the operation/transaction, the locks on + * the meta-data page will suffice to protect us from simultaneous operations + * on the sub-database. Before we complete the operation though, we'll get a + * handle lock on the subdatabase so that on one else can try to remove it + * while we've got it open. We use an object that looks like the meta-data + * page lock with a different type (DB_HANDLE_LOCK) for the long-term handle. + * locks. + * + * PUBLIC: int __fop_subdb_setup __P((DB *, DB_THREAD_INFO *, DB_TXN *, + * PUBLIC: const char *, const char *, int, u_int32_t)); + */ +int +__fop_subdb_setup(dbp, ip, txn, mname, name, mode, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *mname, *name; + int mode; + u_int32_t flags; +{ + DB *mdbp; + ENV *env; + db_lockmode_t lkmode; + u_int32_t mflags; + int ret, t_ret; + + mdbp = NULL; + env = dbp->env; + + mflags = flags | DB_RDONLY; +retry: if ((ret = __db_master_open(dbp, + ip, txn, mname, mflags, mode, &mdbp)) != 0) + return (ret); + /* + * If we created this file, then we need to set the DISCARD flag so + * that if we fail in the middle of this routine, we discard from the + * mpool any pages that we just created. + */ + if (F_ISSET(mdbp, DB_AM_CREATED)) + F_SET(mdbp, DB_AM_DISCARD); + + /* + * We are going to close this instance of the master, so we can + * steal its handle instead of reopening a handle on the database. + */ + if (LF_ISSET(DB_FCNTL_LOCKING)) { + dbp->saved_open_fhp = mdbp->saved_open_fhp; + mdbp->saved_open_fhp = NULL; + } + + /* Copy the pagesize and set the sub-database flag. */ + dbp->pgsize = mdbp->pgsize; + F_SET(dbp, DB_AM_SUBDB); + + if (name != NULL && (ret = __db_master_update(mdbp, dbp, + ip, txn, name, dbp->type, MU_OPEN, NULL, flags)) != 0) { + if (ret == EBADF && F_ISSET(mdbp, DB_AM_RDONLY)) { + /* We need to reopen the master R/W to do the create. */ + if ((ret = __db_close(mdbp, txn, 0)) != 0) + goto err; + FLD_CLR(mflags, DB_RDONLY); + goto retry; + } + goto err; + } + + /* + * Hijack the master's locker ID as well, so that our locks don't + * conflict with the master's. Since we're closing the master, + * that locker would just have been freed anyway. Once we've gotten + * the locker id, we need to acquire the handle lock for this + * subdatabase. + */ + dbp->locker = mdbp->locker; + mdbp->locker = NULL; + + DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, mname); + + /* + * We copy our fileid from our master so that we all open + * the same file in mpool. We'll use the meta-pgno to lock + * so that we end up with different handle locks. + */ + + memcpy(dbp->fileid, mdbp->fileid, DB_FILE_ID_LEN); + lkmode = F_ISSET(dbp, DB_AM_CREATED) || LF_ISSET(DB_WRITEOPEN) ? + DB_LOCK_WRITE : DB_LOCK_READ; + if ((ret = __fop_lock_handle(env, dbp, + txn == NULL ? dbp->locker : txn->locker, lkmode, NULL, + NOWAIT_FLAG(txn))) != 0) + goto err; + + if ((ret = __db_init_subdb(mdbp, dbp, name, ip, txn)) != 0) { + /* + * If there was no transaction and we created this database, + * then we need to undo the update of the master database. + */ + if (F_ISSET(dbp, DB_AM_CREATED) && txn == NULL) + (void)__db_master_update(mdbp, dbp, + ip, txn, name, dbp->type, MU_REMOVE, NULL, 0); + F_CLR(dbp, DB_AM_CREATED); + goto err; + } + + /* + * XXX + * This should have been done at the top of this routine. The problem + * is that __db_init_subdb() uses "standard" routines to process the + * meta-data page and set information in the DB handle based on it. + * Those routines have to deal with swapped pages and will normally set + * the DB_AM_SWAP flag. However, we use the master's metadata page and + * that has already been swapped, so they get the is-swapped test wrong. + */ + F_CLR(dbp, DB_AM_SWAP); + F_SET(dbp, F_ISSET(mdbp, DB_AM_SWAP)); + + /* + * In the file create case, these happen in separate places so we have + * two different tests. They end up in the same place for subdbs, but + * for compatibility with file testing, we put them both here anyway. + */ + DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOGMETA, ret, mname); + DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, mname); + + /* + * File exists and we have the appropriate locks; we should now + * process a normal open. + */ + if (F_ISSET(mdbp, DB_AM_CREATED)) { + F_SET(dbp, DB_AM_CREATED_MSTR); + F_CLR(mdbp, DB_AM_DISCARD); + } + + if (0) { +err: +DB_TEST_RECOVERY_LABEL + if (txn == NULL) + (void)__ENV_LPUT(env, dbp->handle_lock); + } + + /* + * The master's handle lock is under the control of the + * subdb (it acquired the master's locker). We want to + * keep the master's handle lock so that no one can remove + * the file while the subdb is open. If we register the + * trade event and then invalidate the copy of the lock + * in the master's handle, that will accomplish this. However, + * before we register this event, we'd better remove any + * events that we've already registered for the master. + */ + if (!F_ISSET(dbp, DB_AM_RECOVER) && IS_REAL_TXN(txn)) { + /* Unregister old master events. */ + __txn_remlock(env, + txn, &mdbp->handle_lock, DB_LOCK_INVALIDID); + + /* Now register the new event. */ + if ((t_ret = __txn_lockevent(env, txn, dbp, + &mdbp->handle_lock, dbp->locker == NULL ? + mdbp->locker : dbp->locker)) != 0 && ret == 0) + ret = t_ret; + } + LOCK_INIT(mdbp->handle_lock); + + /* + * If the master was created, we need to sync so that the metadata + * page is correct on disk for recovery, since it isn't read through + * mpool. If we're opening a subdb in an existing file, we can skip + * the sync. + */ + if ((t_ret = __db_close(mdbp, txn, + F_ISSET(dbp, DB_AM_CREATED_MSTR) ? 0 : DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __fop_remove_setup -- + * Open handle appropriately and lock for removal of a database file. + * + * PUBLIC: int __fop_remove_setup __P((DB *, + * PUBLIC: DB_TXN *, const char *, u_int32_t)); + */ +int +__fop_remove_setup(dbp, txn, name, flags) + DB *dbp; + DB_TXN *txn; + const char *name; + u_int32_t flags; +{ + DB_FH *fhp; + DB_LOCK elock; + ENV *env; + u_int8_t mbuf[DBMETASIZE]; + int ret; + + COMPQUIET(flags, 0); + + env = dbp->env; + + LOCK_INIT(elock); + fhp = NULL; + ret = 0; + + /* Create locker if necessary. */ +retry: if (LOCKING_ON(env)) { + if (IS_REAL_TXN(txn)) + dbp->locker = txn->locker; + else if (dbp->locker == DB_LOCK_INVALIDID) { + if ((ret = __lock_id(env, NULL, &dbp->locker)) != 0) + goto err; + if (txn != NULL && F_ISSET(txn, TXN_INFAMILY) && + (ret = __lock_addfamilylocker(env, + txn->txnid, dbp->locker->id, 1)) != 0) + goto err; + } + } + + /* + * We are about to open a file handle and then possibly close it. + * We cannot close handles if we are doing FCNTL locking. However, + * there is no way to pass the FCNTL flag into this routine via the + * user API. The only way we can get in here and be doing FCNTL + * locking is if we are trying to clean up an open that was called + * with FCNTL locking. In that case, the save_fhp should already be + * set. So, we use that field to tell us if we need to make sure + * that we shouldn't close the handle. + */ + fhp = dbp->saved_open_fhp; + DB_ASSERT(env, LF_ISSET(DB_FCNTL_LOCKING) || fhp == NULL); + + /* + * Lock environment to protect file open. That will enable us to + * read the meta-data page and get the fileid so that we can lock + * the handle. + */ + GET_ENVLOCK(env, dbp->locker, &elock); + + /* Open database. */ + if (F_ISSET(dbp, DB_AM_INMEM)) { + if ((ret = __env_mpool(dbp, name, flags)) == 0) + ret = __os_strdup(env, name, &dbp->dname); + } else if (fhp == NULL) + ret = __os_open(env, name, 0, DB_OSO_RDONLY, 0, &fhp); + if (ret != 0) + goto err; + + /* Get meta-data */ + if (F_ISSET(dbp, DB_AM_INMEM)) + ret = __fop_inmem_read_meta(dbp, txn, name, flags); + else if ((ret = __fop_read_meta(env, + name, mbuf, sizeof(mbuf), fhp, 0, NULL)) == 0) + ret = __db_meta_setup(env, dbp, + name, (DBMETA *)mbuf, flags, DB_CHK_META | DB_CHK_NOLSN); + if (ret != 0) + goto err; + + /* + * Now, get the handle lock. We first try with NOWAIT, because if + * we have to wait, we're going to have to close the file and reopen + * it, so that if there is someone else removing it, our open doesn't + * prevent that. + */ + if ((ret = __fop_lock_handle(env, + dbp, dbp->locker, DB_LOCK_WRITE, NULL, DB_LOCK_NOWAIT)) != 0) { + /* + * Close the file, block on the lock, clean up the dbp, and + * then start all over again. + */ + if (!F_ISSET(dbp, DB_AM_INMEM) && !LF_ISSET(DB_FCNTL_LOCKING)) { + (void)__os_closehandle(env, fhp); + fhp = NULL; + } + if (ret != DB_LOCK_NOTGRANTED || + (txn != NULL && F_ISSET(txn, TXN_NOWAIT))) + goto err; + else if ((ret = __fop_lock_handle(env, + dbp, dbp->locker, DB_LOCK_WRITE, &elock, 0)) != 0) + goto err; + + if (F_ISSET(dbp, DB_AM_INMEM)) { + (void)__lock_put(env, &dbp->handle_lock); + (void)__db_refresh(dbp, txn, DB_NOSYNC, NULL, 1); + } else { + if (txn != NULL) + dbp->locker = NULL; + (void)__db_refresh(dbp, txn, DB_NOSYNC, NULL, 0); + } + goto retry; + } else if ((ret = __ENV_LPUT(env, elock)) != 0) + goto err; + else if (F_ISSET(dbp, DB_AM_IN_RENAME)) + ret = ENOENT; + + if (0) { +err: (void)__ENV_LPUT(env, elock); + } + if (fhp != NULL && !LF_ISSET(DB_FCNTL_LOCKING)) + (void)__os_closehandle(env, fhp); + /* + * If this is a real file and we are going to proceed with the removal, + * then we need to make sure that we don't leave any pages around in the + * mpool since the file is closed and will be reopened again before + * access. However, this might be an in-memory file, in which case + * we will handle the discard from the mpool later as it's the "real" + * removal of the database. + */ + if (ret == 0 && !F_ISSET(dbp, DB_AM_INMEM)) + F_SET(dbp, DB_AM_DISCARD); + return (ret); +} + +/* + * __fop_read_meta -- + * Read the meta-data page from a file and return it in buf. + * + * PUBLIC: int __fop_read_meta __P((ENV *, const char *, + * PUBLIC: u_int8_t *, size_t, DB_FH *, int, size_t *)); + */ +int +__fop_read_meta(env, name, buf, size, fhp, errok, nbytesp) + ENV *env; + const char *name; + u_int8_t *buf; + size_t size; + DB_FH *fhp; + int errok; + size_t *nbytesp; +{ + size_t nr; + int ret; + + /* + * Our caller wants to know the number of bytes read, even if we + * return an error. + */ + if (nbytesp != NULL) + *nbytesp = 0; + + nr = 0; + ret = __os_read(env, fhp, buf, size, &nr); + if (nbytesp != NULL) + *nbytesp = nr; + + if (ret != 0) { + if (!errok) + __db_err(env, ret, "%s", name); + goto err; + } + + if (nr != size) { + if (!errok) + __db_errx(env, DB_STR_A("0004", + "fop_read_meta: %s: unexpected file type or format", + "%s"), name); + ret = EINVAL; + } + +err: + return (ret); +} + +/* + * __fop_dummy -- + * This implements the creation and name swapping of dummy files that + * we use for remove and rename (remove is simply a rename with a delayed + * remove). + * + * PUBLIC: int __fop_dummy __P((DB *, + * PUBLIC: DB_TXN *, const char *, const char *)); + */ +int +__fop_dummy(dbp, txn, old, new) + DB *dbp; + DB_TXN *txn; + const char *old, *new; +{ + DB *tmpdbp; + DB_TXN *stxn; + ENV *env; + char *back; + int ret, t_ret; + u_int8_t mbuf[DBMETASIZE]; + + env = dbp->env; + back = NULL; + stxn = NULL; + tmpdbp = NULL; + + DB_ASSERT(env, txn != NULL); + + /* + * Begin sub transaction to encapsulate the rename. Note that we + * expect the inmem_swap calls to complete the sub-transaction, + * aborting on error and committing on success. + */ + if (TXN_ON(env) && + (ret = __txn_begin(env, NULL, txn, &stxn, 0)) != 0) + goto err; + + /* We need to create a dummy file as a place holder. */ + if ((ret = __db_backup_name(env, new, stxn, &back)) != 0) + goto err; + /* Create a dummy dbp handle. */ + if ((ret = __db_create_internal(&tmpdbp, env, 0)) != 0) + goto err; + if (F_ISSET(dbp, DB_AM_NOT_DURABLE) && + (ret = __db_set_flags(tmpdbp, DB_TXN_NOT_DURABLE)) != 0) + goto err; + memset(mbuf, 0, sizeof(mbuf)); + ret = F_ISSET(dbp, DB_AM_INMEM) ? + __fop_inmem_dummy(tmpdbp, stxn, back, mbuf) : + __fop_ondisk_dummy(tmpdbp, stxn, back, mbuf); + + if (ret != 0) + goto err; + + ret = F_ISSET(dbp, DB_AM_INMEM) ? + __fop_inmem_swap(dbp, tmpdbp, stxn, old, new, back, txn->locker) : + __fop_ondisk_swap(dbp, tmpdbp, stxn, old, new, back, txn->locker); + stxn = NULL; + if (ret != 0) + goto err; + +err: if (stxn != NULL) + (void)__txn_abort(stxn); + if (tmpdbp != NULL && + (t_ret = __db_close(tmpdbp, NULL, 0)) != 0 && ret == 0) + ret = t_ret; + if (back != NULL) + __os_free(env, back); + return (ret); +} + +/* + * __fop_dbrename -- + * Do the appropriate file locking and file system operations + * to effect a dbrename in the absence of transactions (__fop_dummy + * and the subsequent calls in __db_rename do the work for the + * transactional case). + * + * PUBLIC: int __fop_dbrename __P((DB *, const char *, const char *)); + */ +int +__fop_dbrename(dbp, old, new) + DB *dbp; + const char *old, *new; +{ + DB_LOCK elock; + ENV *env; + char *real_new, *real_old; + int ret, t_ret; + + env = dbp->env; + real_new = NULL; + real_old = NULL; + LOCK_INIT(elock); + + if (F_ISSET(dbp, DB_AM_INMEM)) { + real_new = (char *)new; + real_old = (char *)old; + } else { + /* Get full names. */ + if ((ret = __db_appname(env, + DB_APP_DATA, old, &dbp->dirname, &real_old)) != 0) + goto err; + + if ((ret = __db_appname(env, + DB_APP_DATA, new, &dbp->dirname, &real_new)) != 0) + goto err; + } + + /* + * It is an error to rename a file over one that already exists, + * as that wouldn't be transaction-safe. We check explicitly + * for ondisk files, but it's done memp_nameop for in-memory ones. + */ + GET_ENVLOCK(env, dbp->locker, &elock); + ret = F_ISSET(dbp, DB_AM_INMEM) ? ENOENT : + __os_exists(env, real_new, NULL); + + if (ret == 0) { + ret = EEXIST; + __db_errx(env, DB_STR_A("0005", + "rename: file %s exists", "%s"), real_new); + goto err; + } + + ret = __memp_nameop(env, + dbp->fileid, new, real_old, real_new, F_ISSET(dbp, DB_AM_INMEM)); + +err: if ((t_ret = __ENV_LPUT(env, elock)) != 0 && ret == 0) + ret = t_ret; + if (!F_ISSET(dbp, DB_AM_INMEM) && real_old != NULL) + __os_free(env, real_old); + if (!F_ISSET(dbp, DB_AM_INMEM) && real_new != NULL) + __os_free(env, real_new); + return (ret); +} + +static int +__fop_inmem_create(dbp, name, txn, flags) + DB *dbp; + const char *name; + DB_TXN *txn; + u_int32_t flags; +{ + DBT fid_dbt, name_dbt; + DB_LSN lsn; + ENV *env; + int ret; + int32_t lfid; + u_int32_t dflags, *p32; + + env = dbp->env; + dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0; + + MAKE_INMEM(dbp); + + /* Set the pagesize if it isn't yet set. */ + if (dbp->pgsize == 0) + dbp->pgsize = DB_DEF_IOSIZE; + + /* + * Construct a file_id. + * + * If this file has no name, then we only need a fileid for locking. + * If this file has a name, we need the fileid both for locking and + * matching in the memory pool. So, with unnamed in-memory databases, + * use a lock_id. For named in-memory files, we need to find a value + * that we can use to uniquely identify a name/fid pair. We use a + * combination of a unique id (__os_unique_id) and a hash of the + * original name. + */ + if (name == NULL) { + if (LOCKING_ON(env) && (ret = + __lock_id(env, (u_int32_t *)dbp->fileid, NULL)) != 0) + goto err; + } else { + p32 = (u_int32_t *)(&dbp->fileid[0]); + __os_unique_id(env, p32); + p32++; + (void)strncpy( + (char *)p32, name, DB_FILE_ID_LEN - sizeof(u_int32_t)); + dbp->preserve_fid = 1; + + if (DBENV_LOGGING(env) && +#if !defined(DEBUG_WOP) && !defined(DIAGNOSTIC) + txn != NULL && +#endif + dbp->log_filename != NULL) + memcpy(dbp->log_filename->ufid, + dbp->fileid, DB_FILE_ID_LEN); + } + + /* Now, set the fileid. */ + if ((ret = __memp_set_fileid(dbp->mpf, dbp->fileid)) != 0) + goto err; + + if ((ret = __env_mpool(dbp, name, flags)) != 0) + goto err; + + if (DBENV_LOGGING(env) && +#if !defined(DEBUG_WOP) + txn != NULL && +#endif + name != NULL) { + DB_INIT_DBT(name_dbt, name, strlen(name) + 1); + memset(&fid_dbt, 0, sizeof(fid_dbt)); + fid_dbt.data = dbp->fileid; + fid_dbt.size = DB_FILE_ID_LEN; + lfid = dbp->log_filename == NULL ? + DB_LOGFILEID_INVALID : dbp->log_filename->id; + if ((ret = __crdel_inmem_create_log(env, txn, + &lsn, dflags, lfid, &name_dbt, &fid_dbt, dbp->pgsize)) != 0) + goto err; + } + + F_SET(dbp, DB_AM_CREATED); + +err: + return (ret); +} + +static int +__fop_inmem_read_meta(dbp, txn, name, flags) + DB *dbp; + DB_TXN *txn; + const char *name; + u_int32_t flags; +{ + DBMETA *metap; + DB_THREAD_INFO *ip; + db_pgno_t pgno; + int ret, t_ret; + + if (txn == NULL) + ENV_GET_THREAD_INFO(dbp->env, ip); + else + ip = txn->thread_info; + + pgno = PGNO_BASE_MD; + if ((ret = __memp_fget(dbp->mpf, &pgno, ip, txn, 0, &metap)) != 0) + return (ret); + ret = __db_meta_setup(dbp->env, dbp, name, metap, flags, DB_CHK_META); + + if ((t_ret = + __memp_fput(dbp->mpf, ip, metap, dbp->priority)) && ret == 0) + ret = t_ret; + + return (ret); +} + +static int +__fop_ondisk_dummy(dbp, txn, name, mbuf) + DB *dbp; + DB_TXN *txn; + const char *name; + u_int8_t *mbuf; +{ + ENV *env; + int ret; + char *realname; + u_int32_t dflags; + + realname = NULL; + env = dbp->env; + dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0; + + if ((ret = __db_appname(env, + DB_APP_DATA, name, &dbp->dirname, &realname)) != 0) + goto err; + + if ((ret = __fop_create(env, + txn, NULL, name, &dbp->dirname, DB_APP_DATA, 0, dflags)) != 0) + goto err; + + if ((ret = + __os_fileid(env, realname, 1, ((DBMETA *)mbuf)->uid)) != 0) + goto err; + + ((DBMETA *)mbuf)->magic = DB_RENAMEMAGIC; + if ((ret = __fop_write(env, txn, name, dbp->dirname, + DB_APP_DATA, NULL, 0, 0, 0, mbuf, DBMETASIZE, 1, dflags)) != 0) + goto err; + + memcpy(dbp->fileid, ((DBMETA *)mbuf)->uid, DB_FILE_ID_LEN); + +err: if (realname != NULL) + __os_free(env, realname); + + return (ret); +} + +static int +__fop_inmem_dummy(dbp, txn, name, mbuf) + DB *dbp; + DB_TXN *txn; + const char *name; + u_int8_t *mbuf; +{ + DBMETA *metap; + DB_THREAD_INFO *ip; + db_pgno_t pgno; + int ret, t_ret; + + if ((ret = __fop_inmem_create(dbp, name, txn, DB_CREATE)) != 0) + return (ret); + if (txn == NULL) + ENV_GET_THREAD_INFO(dbp->env, ip); + else + ip = txn->thread_info; + + pgno = PGNO_BASE_MD; + if ((ret = __memp_fget(dbp->mpf, &pgno, ip, txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &metap)) != 0) + return (ret); + /* Check file existed. */ + if (metap->magic != 0) + ret = EEXIST; + else + metap->magic = DB_RENAMEMAGIC; + + /* Copy the fileid onto the meta-data page. */ + memcpy(metap->uid, dbp->fileid, DB_FILE_ID_LEN); + + if ((t_ret = __memp_fput(dbp->mpf, ip, metap, + ret == 0 ? dbp->priority : DB_PRIORITY_VERY_LOW)) != 0 && ret == 0) + ret = t_ret; + + if (ret != 0) + goto err; + + ((DBMETA *)mbuf)->magic = DB_RENAMEMAGIC; + +err: return (ret); +} + +static int +__fop_ondisk_swap(dbp, tmpdbp, txn, old, new, back, locker) + DB *dbp, *tmpdbp; + DB_TXN *txn; + const char *old, *new, *back; + DB_LOCKER *locker; +{ + DBT fiddbt, namedbt, tmpdbt; + DB_FH *fhp; + DB_LOCK elock; + DB_LSN lsn; + DB_TXN *parent; + ENV *env; + u_int8_t mbuf[DBMETASIZE]; + u_int32_t child_txnid, dflags; + int ret, t_ret; + char *realold, *realnew; + + env = dbp->env; + DB_ASSERT(env, txn != NULL); + DB_ASSERT(env, old != NULL); + + realold = realnew = NULL; + LOCK_INIT(elock); + fhp = NULL; + dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0; + + if ((ret = __db_appname(env, + DB_APP_DATA, new, &dbp->dirname, &realnew)) != 0) + goto err; + + /* Now, lock the name space while we initialize this file. */ +retry: GET_ENVLOCK(env, locker, &elock); + if (__os_exists(env, realnew, NULL) == 0) { + /* + * It is possible that the only reason this file exists is + * because we've done a previous rename of it and we have + * left a placeholder here. We need to check for that case + * and allow this rename to succeed if that's the case. + */ + if ((ret = __os_open(env, realnew, 0, 0, 0, &fhp)) != 0) + goto err; + if ((ret = __fop_read_meta(env, + realnew, mbuf, sizeof(mbuf), fhp, 0, NULL)) != 0 || + (ret = __db_meta_setup(env, + tmpdbp, realnew, (DBMETA *)mbuf, 0, DB_CHK_META)) != 0) { + ret = EEXIST; + goto err; + } + ret = __os_closehandle(env, fhp); + fhp = NULL; + if (ret != 0) + goto err; + + /* + * Now, try to acquire the handle lock. If the handle is locked + * by our current, transaction, then we'll get it and life is + * good. + * + * Alternately, it's not locked at all, we'll get the lock, but + * we will realize it exists and consider this an error. + * + * However, if it's held by another transaction, then there + * could be two different scenarios: 1) the file is in the + * midst of being created or deleted and when that transaction + * is over, we might be able to proceed. 2) the file is open + * and exists and we should report an error. In order to + * distinguish these two cases, we do the following. First, we + * try to acquire a READLOCK. If the handle is in the midst of + * being created, then we'll block because a writelock is held. + * In that case, we should request a blocking write, and when we + * get the lock, we should then go back and check to see if the + * object exists and start all over again. + * + * If we got the READLOCK, then either no one is holding the + * lock or someone has an open handle and the fact that the file + * exists is problematic. So, in this case, we request the + * WRITELOCK non-blocking -- if it succeeds, we're golden. If + * it fails, then the file exists and we return EEXIST. + */ + if ((ret = __fop_lock_handle(env, + tmpdbp, locker, DB_LOCK_READ, NULL, DB_LOCK_NOWAIT)) != 0) { + /* + * Someone holds a write-lock. Wait for the write-lock + * and after we get it, release it and start over. + */ + if ((ret = __fop_lock_handle(env, tmpdbp, + locker, DB_LOCK_WRITE, &elock, 0)) != 0) + goto err; + if ((ret = + __lock_put(env, &tmpdbp->handle_lock)) != 0) + goto err; + if ((ret = __db_refresh(tmpdbp, NULL, 0, NULL, 0)) != 0) + goto err; + goto retry; + } + + /* We got the read lock; try to upgrade it. */ + ret = __fop_lock_handle(env, + tmpdbp, locker, DB_LOCK_WRITE, + NULL, DB_LOCK_UPGRADE | DB_LOCK_NOWAIT); + if (ret != 0) { + /* + * We did not get the writelock, so someone + * has the handle open. This is an error. + */ + (void)__lock_put(env, &tmpdbp->handle_lock); + ret = EEXIST; + } else if (F_ISSET(tmpdbp, DB_AM_IN_RENAME)) + /* We got the lock and are renaming it. */ + ret = 0; + else { /* We got the lock, but the file exists. */ + (void)__lock_put(env, &tmpdbp->handle_lock); + ret = EEXIST; + } + if (ret != 0) + goto err; + } + + /* + * While we have the namespace locked, do the renames and then + * swap for the handle lock. + */ + if ((ret = __fop_rename(env, txn, + old, new, &dbp->dirname, dbp->fileid, DB_APP_DATA, 1, dflags)) != 0) + goto err; + if ((ret = __fop_rename(env, txn, back, old, + &dbp->dirname, tmpdbp->fileid, DB_APP_DATA, 0, dflags)) != 0) + goto err; + if ((ret = __fop_lock_handle(env, + tmpdbp, locker, DB_LOCK_WRITE, &elock, NOWAIT_FLAG(txn))) != 0) + goto err; + + /* + * We just acquired a transactional lock on the tmp handle. + * We need to null out the tmp handle's lock so that it + * doesn't create problems for us in the close path. + */ + LOCK_INIT(tmpdbp->handle_lock); + + /* Commit the child. */ + child_txnid = txn->txnid; + parent = txn->parent; + ret = __txn_commit(txn, 0); + txn = NULL; + + /* + * If the new name is available because it was previously renamed + * remove it from the remove list. + */ + if (F_ISSET(tmpdbp, DB_AM_IN_RENAME)) + __txn_remrem(env, parent, realnew); + + /* Now log the child information in the parent. */ + memset(&fiddbt, 0, sizeof(fiddbt)); + fiddbt.data = dbp->fileid; + fiddbt.size = DB_FILE_ID_LEN; + memset(&tmpdbt, 0, sizeof(fiddbt)); + tmpdbt.data = tmpdbp->fileid; + tmpdbt.size = DB_FILE_ID_LEN; + DB_INIT_DBT(namedbt, old, strlen(old) + 1); + if ((t_ret = __fop_file_remove_log(env, + parent, &lsn, dflags, &fiddbt, &tmpdbt, &namedbt, + (u_int32_t)DB_APP_DATA, child_txnid)) != 0 && ret == 0) + ret = t_ret; + + /* This is a delayed delete of the dummy file. */ + if ((ret = __db_appname(env, + DB_APP_DATA, old, &dbp->dirname, &realold)) != 0) + goto err; + + if ((ret = __txn_remevent(env, parent, realold, NULL, 0)) != 0) + goto err; + +err: if (txn != NULL) /* Ret must already be set, so void abort. */ + (void)__txn_abort(txn); + + (void)__ENV_LPUT(env, elock); + + if (fhp != NULL && + (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0) + ret = t_ret; + + if (realnew != NULL) + __os_free(env, realnew); + if (realold != NULL) + __os_free(env, realold); + return (ret); +} + +static int +__fop_inmem_swap(olddbp, backdbp, txn, old, new, back, locker) + DB *olddbp, *backdbp; + DB_TXN *txn; + const char *old, *new, *back; + DB_LOCKER *locker; +{ + DB *tmpdbp; + DBT fid_dbt, n1_dbt, n2_dbt; + DB_LOCK elock; + DB_LSN lsn; + DB_TXN *parent; + ENV *env; + int ret, t_ret; + + env = olddbp->env; + parent = txn->parent; +retry: LOCK_INIT(elock); + if ((ret = __db_create_internal(&tmpdbp, env, 0)) != 0) + return (ret); + MAKE_INMEM(tmpdbp); + + GET_ENVLOCK(env, locker, &elock); + if ((ret = __env_mpool(tmpdbp, new, 0)) == 0) { + /* + * It is possible that the only reason this database exists is + * because we've done a previous rename of it and we have + * left a placeholder here. We need to check for that case + * and allow this rename to succeed if that's the case. + */ + + if ((ret = __fop_inmem_read_meta(tmpdbp, txn, new, 0)) != 0) { + ret = EEXIST; + goto err; + } + + /* + * Now, try to acquire the handle lock. If it's from our txn, + * then we'll get the lock. If it's not, then someone else has + * it locked. See the comments in __fop_ondisk_swap for + * details. + */ + if ((ret = __fop_lock_handle(env, + tmpdbp, locker, DB_LOCK_READ, NULL, DB_LOCK_NOWAIT)) != 0) { + /* + * Someone holds a writelock. Try for the WRITELOCK + * and after we get it, retry. + */ + if ((ret = __fop_lock_handle(env, tmpdbp, + locker, DB_LOCK_WRITE, &elock, 0)) != 0) + goto err; + + /* We have the write lock; release it and start over. */ + (void)__lock_put(env, &tmpdbp->handle_lock); + (void)__db_close(tmpdbp, NULL, DB_NOSYNC); + (void)__ENV_LPUT(env, elock); + goto retry; + } else { + (void)__lock_put(env, &tmpdbp->handle_lock); + if (!F_ISSET(tmpdbp, DB_AM_IN_RENAME)) + ret = EEXIST; + } + if (ret != 0) + goto err; + } + + /* Log the renames. */ + if (LOGGING_ON(env) +#ifndef DEBUG_WOP + && txn != NULL +#endif + ) { + /* Rename old to new. */ + DB_INIT_DBT(fid_dbt, olddbp->fileid, DB_FILE_ID_LEN); + DB_INIT_DBT(n1_dbt, old, strlen(old) + 1); + DB_INIT_DBT(n2_dbt, new, strlen(new) + 1); + if ((ret = __crdel_inmem_rename_log( + env, txn, &lsn, 0, &n1_dbt, &n2_dbt, &fid_dbt)) != 0) + goto err; + + /* Rename back to old */ + fid_dbt.data = backdbp->fileid; + DB_SET_DBT(n2_dbt, back, strlen(back) + 1); + if ((ret = __crdel_inmem_rename_log( + env, txn, &lsn, 0, &n2_dbt, &n1_dbt, &fid_dbt)) != 0) + goto err; + } + + /* + * While we have the namespace locked, do the renames and then + * swap for the handle lock. If we ran into a file in the midst + * of rename, then we need to delete it first, else nameop is + * going to consider it an error. + */ + if (F_ISSET(tmpdbp, DB_AM_IN_RENAME)) { + if ((ret = __memp_nameop(env, + tmpdbp->fileid, NULL, new, NULL, 1)) != 0) + goto err; + __txn_remrem(env, parent, new); + } + + if ((ret = __memp_nameop( + env, olddbp->fileid, new, old, new, 1)) != 0) + goto err; + if ((ret = __memp_nameop( + env, backdbp->fileid, old, back, old, 1)) != 0) + goto err; + + if ((ret = __fop_lock_handle(env, + tmpdbp, locker, DB_LOCK_WRITE, &elock, 0)) != 0) + goto err; + + /* + * We just acquired a transactional lock on the tmp handle. + * We need to null out the tmp handle's lock so that it + * doesn't create problems for us in the close path. + */ + LOCK_INIT(tmpdbp->handle_lock); + + DB_ASSERT(env, txn != NULL); + + /* Commit the child. */ + ret = __txn_commit(txn, 0); + txn = NULL; + + if ((ret = __db_inmem_remove(backdbp, parent, old)) != 0) + goto err; + +err: (void)__ENV_LPUT(env, elock); + + if (txn != NULL) + (void)__txn_abort(txn); + + if ((t_ret = __db_close(tmpdbp, NULL, 0)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} diff --git a/src/hash/hash.c b/src/hash/hash.c new file mode 100644 index 00000000..fd0a9964 --- /dev/null +++ b/src/hash/hash.c @@ -0,0 +1,2340 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" + +static int __ham_bulk __P((DBC *, DBT *, u_int32_t)); +static int __hamc_close __P((DBC *, db_pgno_t, int *)); +static int __hamc_del __P((DBC *, u_int32_t)); +static int __hamc_destroy __P((DBC *)); +static int __hamc_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); +static int __hamc_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); +static int __hamc_writelock __P((DBC *)); +static int __ham_dup_return __P((DBC *, DBT *, u_int32_t)); +static int __ham_expand_table __P((DBC *)); +static int __hamc_update_getorder + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +static int __hamc_update_setorder + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +static int __ham_get_clist_func + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); + +/* + * __ham_quick_delete -- + * This function is called by __db_del when the appropriate conditions + * are met, and it performs the delete in the optimized way. + * + * PUBLIC: int __ham_quick_delete __P((DBC *)); + */ +int +__ham_quick_delete(dbc) + DBC *dbc; +{ + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + int ret, t_ret; + + /* + * When performing a DB->del operation not involving secondary indices + * and not removing an off-page duplicate tree, we can speed things up + * substantially by removing the entire duplicate set, if any is + * present, in one operation, rather than by conjuring up and deleting + * each of the items individually. (All are stored in one big HKEYDATA + * structure.) We don't bother to distinguish on-page duplicate sets + * from single, non-dup items; they're deleted in exactly the same way. + * + * The cursor should be set to the first item in the duplicate set, or + * to the sole key/data pair when the key does not have a duplicate set, + * before the function is called. + * + * We do not need to call CDB_LOCKING_INIT, __db_del calls here with + * a write cursor. + * + * Assert we're initialized, but not to an off-page duplicate. + * Assert we're not using secondary indices. + */ + DB_ASSERT(dbc->env, IS_INITIALIZED(dbc)); + DB_ASSERT(dbc->env, dbc->internal->opd == NULL); + DB_ASSERT(dbc->env, !F_ISSET(dbc->dbp, DB_AM_SECONDARY)); + DB_ASSERT(dbc->env, !DB_IS_PRIMARY(dbc->dbp)); + + hcp = (HASH_CURSOR *)dbc->internal; + mpf = dbc->dbp->mpf; + if ((ret = __ham_get_meta(dbc)) != 0) + return (ret); + + if ((ret = __hamc_writelock(dbc)) == 0) { + ret = __ham_del_pair(dbc, 0, NULL); + /* + * If a page was retreived during the delete, put it now. We + * can't rely on the callers cursor close to do that, since bulk + * delete operations keep the cursor open across deletes. + */ + if (hcp->page != NULL) { + if ((t_ret = __memp_fput(mpf, dbc->thread_info, + hcp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + hcp->page = NULL; + } + } + + if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* ****************** CURSORS ********************************** */ +/* + * __hamc_init -- + * Initialize the hash-specific portion of a cursor. + * + * PUBLIC: int __hamc_init __P((DBC *)); + */ +int +__hamc_init(dbc) + DBC *dbc; +{ + ENV *env; + HASH_CURSOR *new_curs; + int ret; + + env = dbc->env; + if ((ret = __os_calloc(env, + 1, sizeof(struct cursor_t), &new_curs)) != 0) + return (ret); + if ((ret = __os_malloc(env, + dbc->dbp->pgsize, &new_curs->split_buf)) != 0) { + __os_free(env, new_curs); + return (ret); + } + + dbc->internal = (DBC_INTERNAL *) new_curs; + dbc->close = dbc->c_close = __dbc_close_pp; + dbc->cmp = __dbc_cmp_pp; + dbc->count = dbc->c_count = __dbc_count_pp; + dbc->del = dbc->c_del = __dbc_del_pp; + dbc->dup = dbc->c_dup = __dbc_dup_pp; + dbc->get = dbc->c_get = __dbc_get_pp; + dbc->pget = dbc->c_pget = __dbc_pget_pp; + dbc->put = dbc->c_put = __dbc_put_pp; + dbc->am_bulk = __ham_bulk; + dbc->am_close = __hamc_close; + dbc->am_del = __hamc_del; + dbc->am_destroy = __hamc_destroy; + dbc->am_get = __hamc_get; + dbc->am_put = __hamc_put; + dbc->am_writelock = __hamc_writelock; + + return (__ham_item_init(dbc)); +} + +/* + * __hamc_close -- + * Close down the cursor from a single use. + */ +static int +__hamc_close(dbc, root_pgno, rmroot) + DBC *dbc; + db_pgno_t root_pgno; + int *rmroot; +{ + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + HKEYDATA *dp; + db_lockmode_t lock_mode; + int doroot, gotmeta, ret, t_ret; + + COMPQUIET(rmroot, 0); + mpf = dbc->dbp->mpf; + doroot = gotmeta = ret = 0; + hcp = (HASH_CURSOR *) dbc->internal; + + /* Check for off page dups. */ + if (dbc->internal->opd != NULL) { + if ((ret = __ham_get_meta(dbc)) != 0) + goto done; + gotmeta = 1; + lock_mode = DB_LOCK_READ; + + /* To support dirty reads we must reget the write lock. */ + if (F_ISSET(dbc->dbp, DB_AM_READ_UNCOMMITTED) && + F_ISSET((BTREE_CURSOR *) + dbc->internal->opd->internal, C_DELETED)) + lock_mode = DB_LOCK_WRITE; + + if ((ret = __ham_get_cpage(dbc, lock_mode)) != 0) + goto out; + dp = (HKEYDATA *)H_PAIRDATA(dbc->dbp, hcp->page, hcp->indx); + + /* If it's not a dup we aborted before we changed it. */ + if (HPAGE_PTYPE(dp) == H_OFFDUP) + memcpy(&root_pgno, + HOFFPAGE_PGNO(dp), sizeof(db_pgno_t)); + else + root_pgno = PGNO_INVALID; + + if ((ret = + hcp->opd->am_close(hcp->opd, root_pgno, &doroot)) != 0) + goto out; + if (doroot != 0) { + if ((ret = __memp_dirty(mpf, &hcp->page, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto out; + if ((ret = __ham_del_pair(dbc, 0, NULL)) != 0) + goto out; + } + } + +out: if (ret != 0) + F_SET(dbc, DBC_ERROR); + if (hcp->page != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, hcp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (gotmeta != 0 && (t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; + +done: if ((t_ret = __ham_item_init(dbc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __hamc_destroy -- + * Cleanup the access method private part of a cursor. + */ +static int +__hamc_destroy(dbc) + DBC *dbc; +{ + HASH_CURSOR *hcp; + + hcp = (HASH_CURSOR *)dbc->internal; + if (hcp->split_buf != NULL) + __os_free(dbc->env, hcp->split_buf); + __os_free(dbc->env, hcp); + + return (0); +} + +/* + * __hamc_count -- + * Return a count of on-page duplicates. + * + * PUBLIC: int __hamc_count __P((DBC *, db_recno_t *)); + */ +int +__hamc_count(dbc, recnop) + DBC *dbc; + db_recno_t *recnop; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + db_indx_t len; + db_recno_t recno; + int ret, t_ret; + u_int8_t *p, *pend; + + dbp = dbc->dbp; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + + recno = 0; + + if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0) + return (ret); + if (hcp->indx >= NUM_ENT(hcp->page)) { + *recnop = 0; + goto err; + } + + switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) { + case H_KEYDATA: + case H_OFFPAGE: + recno = 1; + break; + case H_DUPLICATE: + p = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); + pend = p + + LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); + for (; p < pend; recno++) { + /* p may be odd, so copy rather than just dereffing */ + memcpy(&len, p, sizeof(db_indx_t)); + p += 2 * sizeof(db_indx_t) + len; + } + + break; + default: + ret = __db_pgfmt(dbp->env, hcp->pgno); + goto err; + } + + *recnop = recno; + +err: if ((t_ret = __memp_fput(mpf, + dbc->thread_info, hcp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + hcp->page = NULL; + return (ret); +} + +/* + * __hamc_cmp -- + * Compare two hash cursors for equality. + * + * This function is only called with two cursors that point to the same item. + * It distinguishes two cases: + * * Cursors pointing to different items in the same on-page duplicate set. + * * Cursors pointing to the same item, with different DELETED flags. + * + * PUBLIC: int __hamc_cmp __P((DBC *, DBC *, int *)); + */ +int +__hamc_cmp(dbc, other_dbc, result) + DBC *dbc, *other_dbc; + int *result; +{ + ENV *env; + HASH_CURSOR *hcp, *ohcp; + + env = dbc->env; + hcp = (HASH_CURSOR *)dbc->internal; + ohcp = (HASH_CURSOR *)other_dbc->internal; + + DB_ASSERT (env, hcp->pgno == ohcp->pgno); + DB_ASSERT (env, hcp->indx == ohcp->indx); + + /* Only compare the duplicate offsets if this is a duplicate item. */ + if ((F_ISSET(hcp, H_ISDUP) && hcp->dup_off != ohcp->dup_off) || + F_ISSET(hcp, H_DELETED) != F_ISSET(ohcp, H_DELETED)) + *result = 1; + else + *result = 0; + return (0); +} + +static int +__hamc_del(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + DB *dbp; + DBT repldbt; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + int ret, t_ret; + + COMPQUIET(flags, 0); + dbp = dbc->dbp; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + + if (F_ISSET(hcp, H_DELETED)) + return (DB_NOTFOUND); + + if ((ret = __ham_get_meta(dbc)) != 0) + goto out; + + if ((ret = __ham_get_cpage(dbc, DB_LOCK_WRITE)) != 0) + goto out; + + /* Off-page duplicates. */ + if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) + goto out; + + DB_ASSERT(dbp->env, IS_DIRTY(hcp->page)); + + if (F_ISSET(hcp, H_ISDUP)) { /* On-page duplicate. */ + if (hcp->dup_off == 0 && + DUP_SIZE(hcp->dup_len) == LEN_HDATA(dbp, hcp->page, + hcp->hdr->dbmeta.pagesize, hcp->indx)) + ret = __ham_del_pair(dbc, 0, NULL); + else { + repldbt.flags = 0; + F_SET(&repldbt, DB_DBT_PARTIAL); + repldbt.doff = hcp->dup_off; + repldbt.dlen = DUP_SIZE(hcp->dup_len); + repldbt.size = 0; + repldbt.data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, + hcp->indx)); + if ((ret = + __ham_replpair(dbc, &repldbt, H_DUPLICATE)) == 0) { + hcp->dup_tlen -= DUP_SIZE(hcp->dup_len); + F_SET(hcp, H_DELETED); + /* + * Clear any cached streaming information. + */ + hcp->stream_start_pgno = PGNO_INVALID; + ret = __hamc_update(dbc, DUP_SIZE(hcp->dup_len), + DB_HAM_CURADJ_DEL, 1); + } + } + } else /* Not a duplicate */ + ret = __ham_del_pair(dbc, 0, NULL); + +out: if (hcp->page != NULL) { + if ((t_ret = __memp_fput(mpf, dbc->thread_info, + hcp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + hcp->page = NULL; + } + if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __hamc_dup -- + * Duplicate a hash cursor, such that the new one holds appropriate + * locks for the position of the original. + * + * PUBLIC: int __hamc_dup __P((DBC *, DBC *)); + */ +int +__hamc_dup(orig_dbc, new_dbc) + DBC *orig_dbc, *new_dbc; +{ + HASH_CURSOR *orig, *new; + + orig = (HASH_CURSOR *)orig_dbc->internal; + new = (HASH_CURSOR *)new_dbc->internal; + + new->bucket = orig->bucket; + new->lbucket = orig->lbucket; + new->dup_off = orig->dup_off; + new->dup_len = orig->dup_len; + new->dup_tlen = orig->dup_tlen; + + if (F_ISSET(orig, H_DELETED)) + F_SET(new, H_DELETED); + if (F_ISSET(orig, H_ISDUP)) + F_SET(new, H_ISDUP); + + return (0); +} + +static int +__hamc_get(dbc, key, data, flags, pgnop) + DBC *dbc; + DBT *key; + DBT *data; + u_int32_t flags; + db_pgno_t *pgnop; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + ENV *env; + HASH_CURSOR *hcp; + db_lockmode_t lock_type; + int ret, t_ret; + + hcp = (HASH_CURSOR *)dbc->internal; + dbp = dbc->dbp; + env = dbp->env; + mpf = dbp->mpf; + + /* Clear OR'd in additional bits so we can check for flag equality. */ + if (F_ISSET(dbc, DBC_RMW)) + lock_type = DB_LOCK_WRITE; + else + lock_type = DB_LOCK_READ; + + if ((ret = __ham_get_meta(dbc)) != 0) + return (ret); + hcp->seek_size = 0; + + ret = 0; + switch (flags) { + case DB_PREV_DUP: + F_SET(hcp, H_DUPONLY); + goto prev; + case DB_PREV_NODUP: + F_SET(hcp, H_NEXT_NODUP); + /* FALLTHROUGH */ + case DB_PREV: + if (IS_INITIALIZED(dbc)) { +prev: ret = __ham_item_prev(dbc, lock_type, pgnop); + break; + } + /* FALLTHROUGH */ + case DB_LAST: + ret = __ham_item_last(dbc, lock_type, pgnop); + break; + case DB_NEXT_DUP: + case DB_GET_BOTHC: + /* cgetchk has already determined that the cursor is set. */ + F_SET(hcp, H_DUPONLY); + goto next; + case DB_NEXT_NODUP: + F_SET(hcp, H_NEXT_NODUP); + /* FALLTHROUGH */ + case DB_NEXT: + if (IS_INITIALIZED(dbc)) { +next: ret = __ham_item_next(dbc, lock_type, pgnop); + break; + } + /* FALLTHROUGH */ + case DB_FIRST: + ret = __ham_item_first(dbc, lock_type, pgnop); + break; + case DB_SET: + case DB_SET_RANGE: + case DB_GET_BOTH: + case DB_GET_BOTH_RANGE: + ret = __ham_lookup(dbc, key, 0, lock_type, pgnop); + break; + case DB_CURRENT: + /* cgetchk has already determined that the cursor is set. */ + if (F_ISSET(hcp, H_DELETED)) { + ret = DB_KEYEMPTY; + goto err; + } + + ret = __ham_item(dbc, lock_type, pgnop); + break; + default: + ret = __db_unknown_flag(env, "__hamc_get", flags); + break; + } + + /* + * Must always enter this loop to do error handling and + * check for big key/data pair. + */ + for (;;) { + if (ret != 0 && ret != DB_NOTFOUND) + goto err; + else if (F_ISSET(hcp, H_OK)) { + if (*pgnop == PGNO_INVALID) + ret = __ham_dup_return(dbc, data, flags); + break; + } else if (!F_ISSET(hcp, H_NOMORE)) { + __db_errx(env, DB_STR("1130", + "H_NOMORE returned to __hamc_get")); + ret = EINVAL; + break; + } + + /* + * Ran out of entries in a bucket; change buckets. + */ + switch (flags) { + case DB_LAST: + case DB_PREV: + case DB_PREV_DUP: + case DB_PREV_NODUP: + ret = __memp_fput(mpf, + dbc->thread_info, hcp->page, dbc->priority); + hcp->page = NULL; + if (hcp->bucket == 0) { + ret = DB_NOTFOUND; + hcp->pgno = PGNO_INVALID; + goto err; + } + F_CLR(hcp, H_ISDUP); + hcp->bucket--; + hcp->indx = NDX_INVALID; + hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); + if (ret == 0) + ret = __ham_item_prev(dbc, lock_type, pgnop); + break; + case DB_FIRST: + case DB_NEXT: + case DB_NEXT_NODUP: + ret = __memp_fput(mpf, + dbc->thread_info, hcp->page, dbc->priority); + hcp->page = NULL; + hcp->indx = NDX_INVALID; + hcp->bucket++; + F_CLR(hcp, H_ISDUP); + hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); + if (hcp->bucket > hcp->hdr->max_bucket) { + ret = DB_NOTFOUND; + hcp->pgno = PGNO_INVALID; + goto err; + } + if (ret == 0) + ret = __ham_item_next(dbc, lock_type, pgnop); + break; + case DB_GET_BOTH: + case DB_GET_BOTHC: + case DB_GET_BOTH_RANGE: + case DB_NEXT_DUP: + case DB_SET: + case DB_SET_RANGE: + /* Key not found. */ + ret = DB_NOTFOUND; + goto err; + case DB_CURRENT: + /* + * This should only happen if you are doing deletes and + * reading with concurrent threads and not doing proper + * locking. We return the same error code as we would + * if the cursor were deleted. + */ + ret = DB_KEYEMPTY; + goto err; + default: + DB_ASSERT(env, 0); + } + } + +err: if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; + + F_CLR(hcp, H_DUPONLY); + F_CLR(hcp, H_NEXT_NODUP); + + return (ret); +} + +/* + * __ham_bulk -- Return bulk data from a hash table. + */ +static int +__ham_bulk(dbc, data, flags) + DBC *dbc; + DBT *data; + u_int32_t flags; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + HASH_CURSOR *cp; + PAGE *pg; + db_indx_t dup_len, dup_off, dup_tlen, indx, *inp; + db_lockmode_t lock_mode; + db_pgno_t pgno; + int32_t *endp, *offp, *saveoff; + u_int32_t key_off, key_size, pagesize, size, space; + u_int8_t *dbuf, *dp, *hk, *np, *tmp; + int is_dup, is_key; + int need_pg, next_key, no_dup, ret, t_ret; + + ret = 0; + key_off = 0; + dup_len = dup_off = dup_tlen = 0; + size = 0; + dbp = dbc->dbp; + pagesize = dbp->pgsize; + mpf = dbp->mpf; + cp = (HASH_CURSOR *)dbc->internal; + is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0; + next_key = is_key && LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP; + no_dup = LF_ISSET(DB_OPFLAGS_MASK) == DB_NEXT_NODUP; + dbuf = data->data; + np = dp = dbuf; + + /* Keep track of space that is left. There is an termination entry */ + space = data->ulen; + space -= sizeof(*offp); + + /* Build the offset/size table from the end up. */ + endp = (int32_t *) ((u_int8_t *)dbuf + data->ulen); + endp--; + offp = endp; + + key_size = 0; + lock_mode = F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE: DB_LOCK_READ; + +next_pg: + need_pg = 1; + indx = cp->indx; + pg = cp->page; + inp = P_INP(dbp, pg); + + do { + if (is_key) { + hk = H_PAIRKEY(dbp, pg, indx); + if (HPAGE_PTYPE(hk) == H_OFFPAGE) { + memcpy(&key_size, + HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); + memcpy(&pgno, + HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); + size = key_size; + if (key_size > space) + goto get_key_space; + if ((ret = __bam_bulk_overflow( + dbc, key_size, pgno, np)) != 0) + return (ret); + space -= key_size; + key_off = (u_int32_t)(np - dbuf); + np += key_size; + } else { + if (need_pg) { + dp = np; + size = pagesize - HOFFSET(pg); + if (space < size) { +get_key_space: + if (offp == endp) { + data->size = (u_int32_t) + DB_ALIGN(size + + pagesize, 1024); + return + (DB_BUFFER_SMALL); + } + goto back_up; + } + memcpy(dp, + (u_int8_t *)pg + HOFFSET(pg), size); + need_pg = 0; + space -= size; + np += size; + } + key_size = LEN_HKEY(dbp, pg, pagesize, indx); + key_off = ((inp[indx] - HOFFSET(pg)) + + (u_int32_t)(dp - dbuf)) + + SSZA(HKEYDATA, data); + } + } + + hk = H_PAIRDATA(dbp, pg, indx); + switch (HPAGE_PTYPE(hk)) { + case H_DUPLICATE: + case H_KEYDATA: + if (need_pg) { + dp = np; + size = pagesize - HOFFSET(pg); + if (space < size) { +back_up: + if (indx != 0) { + indx -= 2; + /* XXX + * It's not clear that this is + * the right way to fix this, + * but here goes. + * If we are backing up onto a + * duplicate, then we need to + * position ourselves at the + * end of the duplicate set. + * We probably need to make + * this work for H_OFFDUP too. + * It might be worth making a + * dummy cursor and calling + * __ham_item_prev. + */ + tmp = H_PAIRDATA(dbp, pg, indx); + if (HPAGE_PTYPE(tmp) == + H_DUPLICATE) { + dup_off = dup_tlen = + LEN_HDATA(dbp, pg, + pagesize, indx + 1); + memcpy(&dup_len, + HKEYDATA_DATA(tmp), + sizeof(db_indx_t)); + } else { + is_dup = 0; + dup_len = 0; + dup_off = 0; + dup_tlen = 0; + F_CLR(cp, H_ISDUP); + } + goto get_space; + } + /* indx == 0 */ + cp->dup_len = dup_len; + cp->dup_off = dup_off; + cp->dup_tlen = dup_tlen; + if ((ret = __ham_item_prev(dbc, + lock_mode, &pgno)) != 0) { + if (ret != DB_NOTFOUND) + return (ret); + if ((ret = __memp_fput(mpf, + dbc->thread_info, cp->page, + dbc->priority)) != 0) + return (ret); + cp->page = NULL; + if (cp->bucket == 0) { + cp->indx = indx = + NDX_INVALID; + goto get_space; + } + if ((ret = + __ham_get_meta(dbc)) != 0) + return (ret); + + cp->bucket--; + cp->pgno = BUCKET_TO_PAGE(cp, + cp->bucket); + cp->indx = NDX_INVALID; + if ((ret = __ham_release_meta( + dbc)) != 0) + return (ret); + /* + * Not an error to get + * DB_NOTFOUND, we're just at + * the beginning of the db. + */ + if ((ret = __ham_item_prev(dbc, + lock_mode, &pgno)) != 0) { + if (ret != DB_NOTFOUND) + return (ret); + else + ret = 0; + } + } + indx = cp->indx; +get_space: + /* + * See if we put any data in the buffer. + */ + if (offp >= endp || + F_ISSET(dbc, DBC_TRANSIENT)) { + data->size = (u_int32_t) + DB_ALIGN(size + + data->ulen - space, 1024); + return (DB_BUFFER_SMALL); + } + /* + * Don't continue; we're all out + * of space, even though we're + * returning success. + */ + next_key = 0; + break; + } + memcpy(dp, (u_int8_t *)pg + HOFFSET(pg), size); + need_pg = 0; + space -= size; + np += size; + } + + /* + * We're about to crack the offset(s) and length(s) + * out of an H_KEYDATA or H_DUPLICATE item. + * There are three cases: + * 1. We were moved into a duplicate set by + * the standard hash cursor code. Respect + * the dup_off and dup_tlen we were given. + * 2. We stumbled upon a duplicate set while + * walking the page on our own. We need to + * recognize it as a dup and set dup_off and + * dup_tlen. + * 3. The current item is not a dup. + */ + if (F_ISSET(cp, H_ISDUP)) { + /* Case 1 */ + is_dup = 1; + dup_len = cp->dup_len; + dup_off = cp->dup_off; + dup_tlen = cp->dup_tlen; + } else if (HPAGE_PTYPE(hk) == H_DUPLICATE) { + /* Case 2 */ + is_dup = 1; + /* + * If we run out of memory and bail, + * make sure the fact we're in a dup set + * isn't ignored later. + */ + F_SET(cp, H_ISDUP); + dup_off = 0; + memcpy(&dup_len, + HKEYDATA_DATA(hk), sizeof(db_indx_t)); + dup_tlen = LEN_HDATA(dbp, pg, pagesize, indx); + } else { + /* Case 3 */ + is_dup = 0; + dup_len = 0; + dup_off = 0; + dup_tlen = 0; + } + + do { + space -= (is_key ? 4 : 2) * sizeof(*offp); + size += (is_key ? 4 : 2) * sizeof(*offp); + /* + * Since space is an unsigned, if we happen + * to wrap, then this comparison will turn out + * to be true. XXX Wouldn't it be better to + * simply check above that space is greater than + * the value we're about to subtract??? + */ + if (space > data->ulen) { + if (!is_dup || dup_off == 0) + goto back_up; + dup_off -= (db_indx_t) + DUP_SIZE((u_int32_t)offp[1]); + goto get_space; + } + if (is_key) { + *offp-- = (int32_t)key_off; + *offp-- = (int32_t)key_size; + } + if (is_dup) { + *offp-- = (int32_t)( + ((inp[indx + 1] - HOFFSET(pg)) + + dp - dbuf) + SSZA(HKEYDATA, data) + + dup_off + sizeof(db_indx_t)); + memcpy(&dup_len, + HKEYDATA_DATA(hk) + dup_off, + sizeof(db_indx_t)); + dup_off += DUP_SIZE(dup_len); + *offp-- = dup_len; + } else { + *offp-- = (int32_t)( + ((inp[indx + 1] - HOFFSET(pg)) + + dp - dbuf) + SSZA(HKEYDATA, data)); + *offp-- = LEN_HDATA(dbp, pg, + pagesize, indx); + } + } while (is_dup && dup_off < dup_tlen && no_dup == 0); + F_CLR(cp, H_ISDUP); + break; + case H_OFFDUP: + memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); + space -= 2 * sizeof(*offp); + if (space > data->ulen) + goto back_up; + + if (is_key) { + space -= 2 * sizeof(*offp); + if (space > data->ulen) + goto back_up; + *offp-- = (int32_t)key_off; + *offp-- = (int32_t)key_size; + } + saveoff = offp; + if ((ret = __bam_bulk_duplicates(dbc, + pgno, dbuf, is_key ? offp + 2 : NULL, + &offp, &np, &space, no_dup)) != 0) { + if (ret == DB_BUFFER_SMALL) { + size = space; + space = 0; + if (is_key && saveoff == offp) { + offp += 2; + goto back_up; + } + goto get_space; + } + return (ret); + } + break; + case H_OFFPAGE: + space -= (is_key ? 4 : 2) * sizeof(*offp); + if (space > data->ulen) + goto back_up; + + memcpy(&size, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); + memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); + if (size > space) + goto back_up; + + if ((ret = + __bam_bulk_overflow(dbc, size, pgno, np)) != 0) + return (ret); + + if (is_key) { + *offp-- = (int32_t)key_off; + *offp-- = (int32_t)key_size; + } + + *offp-- = (int32_t)(np - dbuf); + *offp-- = (int32_t)size; + + np += size; + space -= size; + break; + default: + /* Do nothing. */ + break; + } + } while (next_key && (indx += 2) < NUM_ENT(pg)); + + cp->indx = indx; + cp->dup_len = dup_len; + cp->dup_off = dup_off; + cp->dup_tlen = dup_tlen; + + /* If we are off the page then try to the next page. */ + if (ret == 0 && next_key && indx >= NUM_ENT(pg)) { + if ((ret = __ham_item_next(dbc, lock_mode, &pgno)) == 0) + goto next_pg; + if (ret != DB_NOTFOUND) + return (ret); + if ((ret = __memp_fput(dbc->dbp->mpf, + dbc->thread_info, cp->page, dbc->priority)) != 0) + return (ret); + cp->page = NULL; + if ((ret = __ham_get_meta(dbc)) != 0) + return (ret); + + cp->bucket++; + if (cp->bucket > cp->hdr->max_bucket) { + /* + * Restore cursor to its previous state. We're past + * the last item in the last bucket, so the next + * DBC->get(DB_NEXT) will return DB_NOTFOUND. + */ + cp->bucket--; + ret = DB_NOTFOUND; + } else { + /* + * Start on the next bucket. + * + * Note that if this new bucket happens to be empty, + * but there's another non-empty bucket after it, + * we'll return early. This is a rare case, and we + * don't guarantee any particular number of keys + * returned on each call, so just let the next call + * to bulk get move forward by yet another bucket. + */ + cp->pgno = BUCKET_TO_PAGE(cp, cp->bucket); + cp->indx = NDX_INVALID; + F_CLR(cp, H_ISDUP); + ret = __ham_item_next(dbc, lock_mode, &pgno); + } + + if ((t_ret = __ham_release_meta(dbc)) != 0) + return (t_ret); + if (ret == 0) + goto next_pg; + if (ret != DB_NOTFOUND) + return (ret); + } + *offp = -1; + return (0); +} + +static int +__hamc_put(dbc, key, data, flags, pgnop) + DBC *dbc; + DBT *key; + DBT *data; + u_int32_t flags; + db_pgno_t *pgnop; +{ + DB *dbp; + DBT tmp_val, *myval; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + u_int32_t nbytes; + int ret, t_ret; + + /* + * The compiler doesn't realize that we only use this when ret is + * equal to 0 and that if ret is equal to 0, that we must have set + * myval. So, we initialize it here to shut the compiler up. + */ + COMPQUIET(myval, NULL); + + dbp = dbc->dbp; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + + if (F_ISSET(hcp, H_DELETED) && flags != DB_KEYFIRST && + flags != DB_KEYLAST && flags != DB_OVERWRITE_DUP) + return (DB_NOTFOUND); + + if ((ret = __ham_get_meta(dbc)) != 0) + goto err1; + + switch (flags) { + case DB_KEYLAST: + case DB_KEYFIRST: + case DB_NODUPDATA: + case DB_NOOVERWRITE: + case DB_OVERWRITE_DUP: + nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE : + HKEYDATA_PSIZE(key->size)) + + (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE : + HKEYDATA_PSIZE(data->size)); + if ((ret = __ham_lookup(dbc, + key, nbytes, DB_LOCK_WRITE, pgnop)) == DB_NOTFOUND) { + if (hcp->seek_found_page != PGNO_INVALID && + hcp->seek_found_page != hcp->pgno) { + if ((ret = __memp_fput(mpf, dbc->thread_info, + hcp->page, dbc->priority)) != 0) + goto err2; + hcp->page = NULL; + hcp->pgno = hcp->seek_found_page; + hcp->indx = NDX_INVALID; + } + + if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) { + /* + * A partial put, but the key does not exist + * and we are not beginning the write at 0. + * We must create a data item padded up to doff + * and then write the new bytes represented by + * val. + */ + if ((ret = __ham_init_dbt(dbp->env, &tmp_val, + data->size + data->doff, + &dbc->my_rdata.data, + &dbc->my_rdata.ulen)) != 0) + goto err2; + + memset(tmp_val.data, 0, data->doff); + memcpy((u_int8_t *)tmp_val.data + + data->doff, data->data, data->size); + myval = &tmp_val; + } else + myval = (DBT *)data; + + ret = __ham_add_el(dbc, key, myval, H_KEYDATA); + goto done; + } else if (ret == 0 && flags == DB_NOOVERWRITE && + !F_ISSET(hcp, H_DELETED)) { + if (*pgnop == PGNO_INVALID) + ret = DB_KEYEXIST; + else + ret = __bam_opd_exists(dbc, *pgnop); + if (ret != 0) + goto done; + } + break; + case DB_BEFORE: + case DB_AFTER: + case DB_CURRENT: + ret = __ham_item(dbc, DB_LOCK_WRITE, pgnop); + break; + default: + ret = __db_unknown_flag(dbp->env, "__hamc_put", flags); + break; + } + + /* + * Invalidate any insert index found so they are not reused + * in future inserts. + */ + hcp->seek_found_page = PGNO_INVALID; + hcp->seek_found_indx = NDX_INVALID; + + if (*pgnop == PGNO_INVALID && ret == 0) { + if ((ret = __memp_dirty(mpf, &hcp->page, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto done; + if (flags == DB_CURRENT || + (!(F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK)) && + (flags == DB_KEYFIRST || flags == DB_KEYLAST || + flags == DB_NODUPDATA || flags == DB_OVERWRITE_DUP))) + ret = __ham_overwrite(dbc, data, flags); + else + ret = __ham_add_dup(dbc, data, flags, pgnop); + } + +done: if (hcp->page != NULL) { + if ((t_ret = __memp_fput(mpf, dbc->thread_info, + hcp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (t_ret == 0) + hcp->page = NULL; + } + + if (ret == 0 && F_ISSET(hcp, H_EXPAND)) { + ret = __ham_expand_table(dbc); + F_CLR(hcp, H_EXPAND); + /* If we are out of space, ignore the error. */ + if (ret == ENOSPC && dbc->txn == NULL) + ret = 0; + } else if (ret == 0 && F_ISSET(hcp, H_CONTRACT)) { + if (!F_ISSET(dbp, DB_AM_REVSPLITOFF)) + ret = __ham_contract_table(dbc, NULL); + F_CLR(hcp, H_CONTRACT); + } + +err2: if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; + +err1: return (ret); +} + +/********************************* UTILITIES ************************/ + +/* + * __ham_contract_table -- remove the last bucket. + * PUBLIC: int __ham_contract_table __P((DBC *, DB_COMPACT *)); + */ +int +__ham_contract_table(dbc, c_data) + DBC *dbc; + DB_COMPACT *c_data; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + HMETA *hdr; + PAGE *h; + db_pgno_t maxpgno, stoppgno; + int drop_segment, ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + h = NULL; + if ((ret = __ham_dirty_meta(dbc, 0)) != 0) + return (ret); + hcp = (HASH_CURSOR *)dbc->internal; + hdr = hcp->hdr; + + if ((ret = __ham_merge_pages(dbc, + hdr->max_bucket & hdr->low_mask, hdr->max_bucket, c_data)) != 0) + return (ret); + + maxpgno = BUCKET_TO_PAGE(hcp, hdr->max_bucket); + drop_segment = hdr->max_bucket == (hdr->low_mask + 1); + + if (DBC_LOGGING(dbc)) { + if ((ret = __ham_contract_log(dbp, dbc->txn, &LSN(hdr), + 0, PGNO(hdr), &LSN(hdr), hdr->max_bucket, maxpgno)) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(hdr)); + + hdr->max_bucket--; + /* + * If we are dropping a segment then adjust the spares table and masks + * and free the pages in that segment. + */ + if (drop_segment) { + LOCK_CHECK_OFF(dbc->thread_info); + hdr->spares[__db_log2(hdr->max_bucket + 1) + 1] = PGNO_INVALID; + hdr->high_mask = hdr->low_mask; + hdr->low_mask >>= 1; + stoppgno = maxpgno + hdr->max_bucket + 1; + do { + if ((ret = __memp_fget(mpf, &maxpgno, + dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &h)) != 0) + break; + if ((ret = __db_free(dbc, h, 0)) != 0) + break; + ret = 0; + } while (++maxpgno < stoppgno); + LOCK_CHECK_ON(dbc->thread_info); + } + +err: return (ret); +} + +/* + * __ham_expand_table -- + */ +static int +__ham_expand_table(dbc) + DBC *dbc; +{ + DB *dbp; + DBMETA *mmeta; + DB_LOCK metalock; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + PAGE *h; + db_pgno_t pgno, mpgno; + u_int32_t logn, newalloc, new_bucket, old_bucket; + int got_meta, new_double, ret, t_ret; + + LOCK_CHECK_OFF(dbc->thread_info); + + dbp = dbc->dbp; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + if ((ret = __ham_dirty_meta(dbc, 0)) != 0) + return (ret); + + LOCK_INIT(metalock); + mmeta = (DBMETA *) hcp->hdr; + mpgno = mmeta->pgno; + h = NULL; + newalloc = 0; + got_meta = 0; + + /* + * If the split point is about to increase, make sure that we + * have enough extra pages. The calculation here is weird. + * We'd like to do this after we've upped max_bucket, but it's + * too late then because we've logged the meta-data split. What + * we'll do between then and now is increment max bucket and then + * see what the log of one greater than that is; here we have to + * look at the log of max + 2. VERY NASTY STUFF. + * + * We figure out what we need to do, then we log it, then request + * the pages from mpool. We don't want to fail after extending + * the file. + * + * If the page we are about to split into has already been allocated, + * then we simply need to get it to get its LSN. If it hasn't yet + * been allocated, then we know it's LSN (0,0). + */ + + new_bucket = hcp->hdr->max_bucket + 1; + old_bucket = new_bucket & hcp->hdr->low_mask; + + new_double = hcp->hdr->max_bucket == hcp->hdr->high_mask; + logn = __db_log2(new_bucket); + + if (!new_double || hcp->hdr->spares[logn + 1] != PGNO_INVALID) { + /* Page exists; get it so we can get its LSN */ + pgno = BUCKET_TO_PAGE(hcp, new_bucket); + if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &h)) != 0) + goto err; + lsn = h->lsn; + } else { + /* Get the master meta-data page to do allocation. */ + if (F_ISSET(dbp, DB_AM_SUBDB)) { + mpgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, + 0, mpgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &mpgno, dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, &mmeta)) != 0) + goto err; + got_meta = 1; + } + pgno = mmeta->last_pgno + 1; + ZERO_LSN(lsn); + newalloc = 1; + } + + /* Log the meta-data split first. */ + if (DBC_LOGGING(dbc)) { + /* + * We always log the page number of the first page of + * the allocation group. However, the LSN that we log + * is either the LSN on the first page (if we did not + * do the actual allocation here) or the LSN on the last + * page of the unit (if we did do the allocation here). + */ + if ((ret = __ham_metagroup_log(dbp, dbc->txn, + &lsn, 0, hcp->hdr->max_bucket, mpgno, &mmeta->lsn, + hcp->hdr->dbmeta.pgno, &hcp->hdr->dbmeta.lsn, + pgno, &lsn, newalloc, mmeta->last_pgno)) != 0) + goto err; + } else + LSN_NOT_LOGGED(lsn); + + hcp->hdr->dbmeta.lsn = lsn; + + if (new_double && hcp->hdr->spares[logn + 1] == PGNO_INVALID) { + /* + * We need to begin a new doubling and we have not allocated + * any pages yet. Read the last page in and initialize it to + * make the allocation contiguous. The pgno we calculated + * above is the first page allocated. The entry in spares is + * that page number minus any buckets already allocated (it + * simplifies bucket to page transaction). After we've set + * that, we calculate the last pgno. + */ + + pgno += hcp->hdr->max_bucket; + + if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &h)) != 0) + goto err; + + hcp->hdr->spares[logn + 1] = + (pgno - new_bucket) - hcp->hdr->max_bucket; + mmeta->last_pgno = pgno; + mmeta->lsn = lsn; + + P_INIT(h, dbp->pgsize, + pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + } + + /* Write out whatever page we ended up modifying. */ + h->lsn = lsn; + if ((ret = __memp_fput(mpf, dbc->thread_info, h, dbc->priority)) != 0) + goto err; + h = NULL; + + /* + * Update the meta-data page of this hash database. + */ + hcp->hdr->max_bucket = new_bucket; + if (new_double) { + hcp->hdr->low_mask = hcp->hdr->high_mask; + hcp->hdr->high_mask = new_bucket | hcp->hdr->low_mask; + } + +err: if (got_meta) + if ((t_ret = __memp_fput(mpf, + dbc->thread_info, mmeta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + if (h != NULL) + if ((t_ret = __memp_fput(mpf, + dbc->thread_info, h, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + /* Relocate records to the new bucket -- after releasing metapage. */ + if (ret == 0) + ret = __ham_split_page(dbc, old_bucket, new_bucket); + LOCK_CHECK_ON(dbc->thread_info); + + return (ret); +} + +/* + * PUBLIC: u_int32_t __ham_call_hash __P((DBC *, u_int8_t *, u_int32_t)); + */ +u_int32_t +__ham_call_hash(dbc, k, len) + DBC *dbc; + u_int8_t *k; + u_int32_t len; +{ + DB *dbp; + HASH *hashp; + HASH_CURSOR *hcp; + u_int32_t n, bucket; + + dbp = dbc->dbp; + hcp = (HASH_CURSOR *)dbc->internal; + hashp = dbp->h_internal; + + n = (u_int32_t)(hashp->h_hash(dbp, k, len)); + + bucket = n & hcp->hdr->high_mask; + if (bucket > hcp->hdr->max_bucket) + bucket = bucket & hcp->hdr->low_mask; + return (bucket); +} + +/* + * Check for duplicates, and call __db_ret appropriately. Release + * everything held by the cursor. + */ +static int +__ham_dup_return(dbc, val, flags) + DBC *dbc; + DBT *val; + u_int32_t flags; +{ + DB *dbp; + DBT *myval, tmp_val; + HASH_CURSOR *hcp; + PAGE *pp; + db_indx_t ndx; + db_pgno_t pgno; + u_int32_t off, tlen; + u_int8_t *hk, type; + int cmp, ret; + db_indx_t len; + + /* Check for duplicate and return the first one. */ + dbp = dbc->dbp; + hcp = (HASH_CURSOR *)dbc->internal; + ndx = H_DATAINDEX(hcp->indx); + type = HPAGE_TYPE(dbp, hcp->page, ndx); + pp = hcp->page; + myval = val; + + /* + * There are 4 cases: + * 1. We are not in duplicate, simply return; the upper layer + * will do the right thing. + * 2. We are looking at keys and stumbled onto a duplicate. + * 3. We are in the middle of a duplicate set. (ISDUP set) + * 4. We need to check for particular data match. + */ + + /* We should never get here with off-page dups. */ + DB_ASSERT(dbp->env, type != H_OFFDUP); + + /* Case 1 */ + if (type != H_DUPLICATE && flags != DB_GET_BOTH && + flags != DB_GET_BOTHC && flags != DB_GET_BOTH_RANGE) + return (0); + + /* + * Here we check for the case where we just stumbled onto a + * duplicate. In this case, we do initialization and then + * let the normal duplicate code handle it. (Case 2) + */ + if (!F_ISSET(hcp, H_ISDUP) && type == H_DUPLICATE) { + F_SET(hcp, H_ISDUP); + hcp->dup_tlen = LEN_HDATA(dbp, hcp->page, + hcp->hdr->dbmeta.pagesize, hcp->indx); + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); + if (flags == DB_LAST || + flags == DB_PREV || flags == DB_PREV_NODUP) { + hcp->dup_off = 0; + do { + memcpy(&len, + HKEYDATA_DATA(hk) + hcp->dup_off, + sizeof(db_indx_t)); + hcp->dup_off += DUP_SIZE(len); + } while (hcp->dup_off < hcp->dup_tlen); + hcp->dup_off -= DUP_SIZE(len); + } else { + memcpy(&len, + HKEYDATA_DATA(hk), sizeof(db_indx_t)); + hcp->dup_off = 0; + } + hcp->dup_len = len; + } + + /* + * If we are retrieving a specific key/data pair, then we + * may need to adjust the cursor before returning data. + * Case 4 + */ + if (flags == DB_GET_BOTH || + flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) { + if (F_ISSET(hcp, H_ISDUP)) { + /* + * If we're doing a join, search forward from the + * current position, not the beginning of the dup set. + */ + if (flags == DB_GET_BOTHC) + F_SET(hcp, H_CONTINUE); + + __ham_dsearch(dbc, val, &off, &cmp, flags); + + /* + * This flag is set nowhere else and is safe to + * clear unconditionally. + */ + F_CLR(hcp, H_CONTINUE); + hcp->dup_off = off; + } else { + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); + if (((HKEYDATA *)hk)->type == H_OFFPAGE) { + memcpy(&tlen, + HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); + memcpy(&pgno, + HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); + if ((ret = __db_moff(dbc, val, pgno, tlen, + dbp->dup_compare, &cmp)) != 0) + return (ret); + cmp = -cmp; + } else { + /* + * We do not zero tmp_val since the comparison + * routines may only look at data and size. + */ + tmp_val.data = HKEYDATA_DATA(hk); + tmp_val.size = LEN_HDATA(dbp, hcp->page, + dbp->pgsize, hcp->indx); + cmp = dbp->dup_compare == NULL ? + __bam_defcmp(dbp, &tmp_val, val) : + dbp->dup_compare(dbp, &tmp_val, val); + } + + if (cmp > 0 && flags == DB_GET_BOTH_RANGE && + F_ISSET(dbp, DB_AM_DUPSORT)) + cmp = 0; + } + + if (cmp != 0) + return (DB_NOTFOUND); + } + + /* + * If we've already got the data for this value, or we're doing a bulk + * get, we don't want to return the data. + */ + if (F_ISSET(dbc, DBC_MULTIPLE | DBC_MULTIPLE_KEY) || + F_ISSET(val, DB_DBT_ISSET)) + return (0); + + /* + * Now, everything is initialized, grab a duplicate if + * necessary. + */ + if (F_ISSET(hcp, H_ISDUP)) { /* Case 3 */ + /* + * Copy the DBT in case we are retrieving into user + * memory and we need the parameters for it. If the + * user requested a partial, then we need to adjust + * the user's parameters to get the partial of the + * duplicate which is itself a partial. + */ + memcpy(&tmp_val, val, sizeof(*val)); + + if (F_ISSET(&tmp_val, DB_DBT_PARTIAL)) { + /* + * Take the user's length unless it would go + * beyond the end of the duplicate. + */ + if (tmp_val.doff > hcp->dup_len) + tmp_val.dlen = 0; + else if (tmp_val.dlen + tmp_val.doff > hcp->dup_len) + tmp_val.dlen = hcp->dup_len - tmp_val.doff; + + } else { + F_SET(&tmp_val, DB_DBT_PARTIAL); + tmp_val.dlen = hcp->dup_len; + tmp_val.doff = 0; + } + + /* + * Set offset to the appropriate place within the + * current duplicate -- need to take into account + * both the dup_off and the current duplicate's + * length. + */ + tmp_val.doff += hcp->dup_off + sizeof(db_indx_t); + + myval = &tmp_val; + } + + /* + * Finally, if we had a duplicate, pp, ndx, and myval should be + * set appropriately. + */ + if ((ret = __db_ret(dbc, pp, ndx, myval, + &dbc->rdata->data, &dbc->rdata->ulen)) != 0) { + if (ret == DB_BUFFER_SMALL) + val->size = myval->size; + return (ret); + } + + /* + * In case we sent a temporary off to db_ret, set the real + * return values. + */ + val->data = myval->data; + val->size = myval->size; + + F_SET(val, DB_DBT_ISSET); + + return (0); +} + +/* + * Overwrite a record. + * + * PUBLIC: int __ham_overwrite __P((DBC *, DBT *, u_int32_t)); + */ +int +__ham_overwrite(dbc, nval, flags) + DBC *dbc; + DBT *nval; + u_int32_t flags; +{ + DB *dbp; + DBT *myval, tmp_val, tmp_val2; + ENV *env; + HASH_CURSOR *hcp; + void *newrec; + u_int8_t *hk, *p; + u_int32_t len, nondup_size; + db_indx_t newsize; + int ret; + + dbp = dbc->dbp; + env = dbp->env; + hcp = (HASH_CURSOR *)dbc->internal; + if (F_ISSET(hcp, H_ISDUP)) { + /* + * This is an overwrite of a duplicate. We should never + * be off-page at this point. + */ + DB_ASSERT(env, hcp->opd == NULL); + /* On page dups */ + if (F_ISSET(nval, DB_DBT_PARTIAL)) { + /* + * We're going to have to get the current item, then + * construct the record, do any padding and do a + * replace. + */ + memset(&tmp_val, 0, sizeof(tmp_val)); + if ((ret = + __ham_dup_return(dbc, &tmp_val, DB_CURRENT)) != 0) + return (ret); + + /* Figure out new size. */ + nondup_size = tmp_val.size; + newsize = nondup_size; + + /* + * Three cases: + * 1. strictly append (may need to allocate space + * for pad bytes; really gross). + * 2. overwrite some and append. + * 3. strictly overwrite. + */ + if (nval->doff > nondup_size) + newsize += + ((nval->doff - nondup_size) + nval->size); + else if (nval->doff + nval->dlen > nondup_size) + newsize += nval->size - + (nondup_size - nval->doff); + else + newsize += nval->size - nval->dlen; + + /* + * Make sure that the new size doesn't put us over + * the onpage duplicate size in which case we need + * to convert to off-page duplicates. + */ + if (ISBIG(hcp, + (hcp->dup_tlen - nondup_size) + newsize)) { + if ((ret = __ham_dup_convert(dbc)) != 0) + return (ret); + return (hcp->opd->am_put(hcp->opd, + NULL, nval, flags, NULL)); + } + + if ((ret = __os_malloc(dbp->env, + DUP_SIZE(newsize), &newrec)) != 0) + return (ret); + memset(&tmp_val2, 0, sizeof(tmp_val2)); + F_SET(&tmp_val2, DB_DBT_PARTIAL); + + /* Construct the record. */ + p = newrec; + /* Initial size. */ + memcpy(p, &newsize, sizeof(db_indx_t)); + p += sizeof(db_indx_t); + + /* First part of original record. */ + len = nval->doff > tmp_val.size + ? tmp_val.size : nval->doff; + memcpy(p, tmp_val.data, len); + p += len; + + if (nval->doff > tmp_val.size) { + /* Padding */ + memset(p, 0, nval->doff - tmp_val.size); + p += nval->doff - tmp_val.size; + } + + /* New bytes */ + memcpy(p, nval->data, nval->size); + p += nval->size; + + /* End of original record (if there is any) */ + if (nval->doff + nval->dlen < tmp_val.size) { + len = (tmp_val.size - nval->doff) - nval->dlen; + memcpy(p, (u_int8_t *)tmp_val.data + + nval->doff + nval->dlen, len); + p += len; + } + + /* Final size. */ + memcpy(p, &newsize, sizeof(db_indx_t)); + + /* + * Make sure that the caller isn't corrupting + * the sort order. + */ + if (dbp->dup_compare != NULL) { + tmp_val2.data = + (u_int8_t *)newrec + sizeof(db_indx_t); + tmp_val2.size = newsize; + if (dbp->dup_compare( + dbp, &tmp_val, &tmp_val2) != 0) { + __os_free(env, newrec); + return (__db_duperr(dbp, flags)); + } + } + + tmp_val2.data = newrec; + tmp_val2.size = DUP_SIZE(newsize); + tmp_val2.doff = hcp->dup_off; + tmp_val2.dlen = DUP_SIZE(hcp->dup_len); + + ret = __ham_replpair(dbc, &tmp_val2, H_DUPLICATE); + __os_free(env, newrec); + + /* Update cursor */ + if (ret != 0) + return (ret); + + if (newsize > nondup_size) { + if ((ret = __hamc_update(dbc, + (newsize - nondup_size), + DB_HAM_CURADJ_ADDMOD, 1)) != 0) + return (ret); + hcp->dup_tlen += (newsize - nondup_size); + } else { + if ((ret = __hamc_update(dbc, + (nondup_size - newsize), + DB_HAM_CURADJ_DELMOD, 1)) != 0) + return (ret); + hcp->dup_tlen -= (nondup_size - newsize); + } + hcp->dup_len = newsize; + return (0); + } else { + /* Check whether we need to convert to off page. */ + if (ISBIG(hcp, + (hcp->dup_tlen - hcp->dup_len) + nval->size)) { + if ((ret = __ham_dup_convert(dbc)) != 0) + return (ret); + return (hcp->opd->am_put(hcp->opd, + NULL, nval, flags, NULL)); + } + + /* Make sure we maintain sort order. */ + if (dbp->dup_compare != NULL) { + tmp_val2.data = + HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, + hcp->indx)) + hcp->dup_off + + sizeof(db_indx_t); + tmp_val2.size = hcp->dup_len; + if (dbp->dup_compare( + dbp, nval, &tmp_val2) != 0) { + __db_errx(env, DB_STR("1131", + "Existing data sorts differently from put data")); + return (EINVAL); + } + } + /* Overwriting a complete duplicate. */ + if ((ret = + __ham_make_dup(dbp->env, nval, &tmp_val, + &dbc->my_rdata.data, &dbc->my_rdata.ulen)) != 0) + return (ret); + /* Now fix what we are replacing. */ + tmp_val.doff = hcp->dup_off; + tmp_val.dlen = DUP_SIZE(hcp->dup_len); + + /* Update cursor */ + if (nval->size > hcp->dup_len) { + if ((ret = __hamc_update(dbc, + (nval->size - hcp->dup_len), + DB_HAM_CURADJ_ADDMOD, 1)) != 0) + return (ret); + hcp->dup_tlen += (nval->size - hcp->dup_len); + } else { + if ((ret = __hamc_update(dbc, + (hcp->dup_len - nval->size), + DB_HAM_CURADJ_DELMOD, 1)) != 0) + return (ret); + hcp->dup_tlen -= (hcp->dup_len - nval->size); + } + hcp->dup_len = (db_indx_t)nval->size; + } + myval = &tmp_val; + } else if (!F_ISSET(nval, DB_DBT_PARTIAL)) { + /* Put/overwrite */ + memcpy(&tmp_val, nval, sizeof(*nval)); + F_SET(&tmp_val, DB_DBT_PARTIAL); + tmp_val.doff = 0; + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); + if (HPAGE_PTYPE(hk) == H_OFFPAGE) + memcpy(&tmp_val.dlen, + HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); + else + tmp_val.dlen = LEN_HDATA(dbp, hcp->page, + hcp->hdr->dbmeta.pagesize, hcp->indx); + myval = &tmp_val; + } else + /* Regular partial put */ + myval = nval; + + return (__ham_replpair(dbc, myval, + F_ISSET(hcp, H_ISDUP) ? H_DUPLICATE : H_KEYDATA)); +} + +/* + * Given a key and a cursor, sets the cursor to the page/ndx on which + * the key resides. If the key is found, the cursor H_OK flag is set + * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set. + * If the key is not found, the H_OK flag is not set. If the sought + * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields + * are set indicating where an add might take place. If it is 0, + * none of the cursor pointer field are valid. + * PUBLIC: int __ham_lookup __P((DBC *, + * PUBLIC: const DBT *, u_int32_t, db_lockmode_t, db_pgno_t *)); + */ +int +__ham_lookup(dbc, key, sought, mode, pgnop) + DBC *dbc; + const DBT *key; + u_int32_t sought; + db_lockmode_t mode; + db_pgno_t *pgnop; +{ + DB *dbp; + HASH_CURSOR *hcp; + db_pgno_t next_pgno; + int match, ret; + u_int8_t *dk; + + dbp = dbc->dbp; + hcp = (HASH_CURSOR *)dbc->internal; + + /* + * Set up cursor so that we're looking for space to add an item + * as we cycle through the pages looking for the key. + */ + if ((ret = __ham_item_reset(dbc)) != 0) + return (ret); + hcp->seek_size = sought; + + hcp->bucket = __ham_call_hash(dbc, (u_int8_t *)key->data, key->size); + hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); + /* look though all pages in the bucket for the key */ + if ((ret = __ham_get_cpage(dbc, mode)) != 0) + return (ret); + + *pgnop = PGNO_INVALID; + if (hcp->indx == NDX_INVALID) { + hcp->indx = 0; + F_CLR(hcp, H_ISDUP); + } + while (hcp->pgno != PGNO_INVALID) { + /* Are we looking for space to insert an item. */ + if (hcp->seek_size != 0 && + hcp->seek_found_page == PGNO_INVALID && + hcp->seek_size < P_FREESPACE(dbp, hcp->page)) { + hcp->seek_found_page = hcp->pgno; + hcp->seek_found_indx = NDX_INVALID; + } + + if ((ret = __ham_getindex(dbc, hcp->page, key, + H_KEYDATA, &match, &hcp->indx)) != 0) + return (ret); + + /* + * If this is the first page in the bucket with space for + * inserting the requested item. Store the insert index to + * save having to look it up again later. + */ + if (hcp->seek_found_page == hcp->pgno) + hcp->seek_found_indx = hcp->indx; + + if (match == 0) { + F_SET(hcp, H_OK); + dk = H_PAIRDATA(dbp, hcp->page, hcp->indx); + if (HPAGE_PTYPE(dk) == H_OFFDUP) + memcpy(pgnop, HOFFDUP_PGNO(dk), + sizeof(db_pgno_t)); + return (0); + } + + /* move the cursor to the next page. */ + if (NEXT_PGNO(hcp->page) == PGNO_INVALID) + break; + next_pgno = NEXT_PGNO(hcp->page); + hcp->indx = 0; + if ((ret = __ham_next_cpage(dbc, next_pgno)) != 0) + return (ret); + } + F_SET(hcp, H_NOMORE); + return (DB_NOTFOUND); +} + +/* + * __ham_init_dbt -- + * Initialize a dbt using some possibly already allocated storage + * for items. + * + * PUBLIC: int __ham_init_dbt __P((ENV *, + * PUBLIC: DBT *, u_int32_t, void **, u_int32_t *)); + */ +int +__ham_init_dbt(env, dbt, size, bufp, sizep) + ENV *env; + DBT *dbt; + u_int32_t size; + void **bufp; + u_int32_t *sizep; +{ + int ret; + + memset(dbt, 0, sizeof(*dbt)); + if (*sizep < size) { + if ((ret = __os_realloc(env, size, bufp)) != 0) { + *sizep = 0; + return (ret); + } + *sizep = size; + } + dbt->data = *bufp; + dbt->size = size; + return (0); +} + +/* + * Adjust the cursor after an insert or delete. The cursor passed is + * the one that was operated upon; we just need to check any of the + * others. + * + * len indicates the length of the item added/deleted + * add indicates if the item indicated by the cursor has just been + * added (add == 1) or deleted (add == 0). + * dup indicates if the addition occurred into a duplicate set. + * + * PUBLIC: int __hamc_update + * PUBLIC: __P((DBC *, u_int32_t, db_ham_curadj, int)); + */ + static int + __hamc_update_getorder(cp, dbc, orderp, pgno, is_dup, args) + DBC *dbc, *cp; + u_int32_t *orderp; + db_pgno_t pgno; + u_int32_t is_dup; + void *args; +{ + HASH_CURSOR *hcp, *lcp; + + COMPQUIET(args, NULL); + COMPQUIET(pgno, 0); + + hcp = (HASH_CURSOR *)dbc->internal; + if (cp == dbc || cp->dbtype != DB_HASH) + return (0); + lcp = (HASH_CURSOR *)cp->internal; + if (F_ISSET(lcp, H_DELETED) && + hcp->pgno == lcp->pgno && + hcp->indx == lcp->indx && + *orderp < lcp->order && + (!is_dup || hcp->dup_off == lcp->dup_off) && + !MVCC_SKIP_CURADJ(cp, lcp->pgno)) + *orderp = lcp->order; + return (0); +} +struct __hamc_update_setorder_args { + int was_mod, was_add; + u_int32_t len, order; + DB_TXN *my_txn; +}; + +static int +__hamc_update_setorder(cp, dbc, foundp, pgno, is_dup, vargs) + DBC *dbc, *cp; + u_int32_t *foundp; + db_pgno_t pgno; + u_int32_t is_dup; + void *vargs; +{ + HASH_CURSOR *hcp, *lcp; + struct __hamc_update_setorder_args *args; + + COMPQUIET(pgno, 0); + + if (cp == dbc || cp->dbtype != DB_HASH) + return (0); + + hcp = (HASH_CURSOR *)dbc->internal; + lcp = (HASH_CURSOR *)cp->internal; + + if (lcp->pgno != hcp->pgno || + lcp->indx == NDX_INVALID || + MVCC_SKIP_CURADJ(cp, lcp->pgno)) + return (0); + + args = vargs; + /* + * We're about to move things out from under this + * cursor. Clear any cached streaming information. + */ + lcp->stream_start_pgno = PGNO_INVALID; + + if (args->my_txn != NULL && cp->txn != args->my_txn) + *foundp = 1; + + if (!is_dup) { + if (args->was_add == 1) { + /* + * This routine is not called to add + * non-dup records which are always put + * at the end. It is only called from + * recovery in this case and the + * cursor will be marked deleted. + * We are "undeleting" so unmark all + * cursors with the same order. + */ + if (lcp->indx == hcp->indx && + F_ISSET(lcp, H_DELETED)) { + if (lcp->order == hcp->order) + F_CLR(lcp, H_DELETED); + else if (lcp->order > + hcp->order) { + + /* + * If we've moved this cursor's + * index, split its order + * number--i.e., decrement it by + * enough so that the lowest + * cursor moved has order 1. + * cp_arg->order is the split + * point, so decrement by it. + */ + lcp->order -= + hcp->order; + lcp->indx += 2; + } + } else if (lcp->indx >= hcp->indx) + lcp->indx += 2; + } else { + if (lcp->indx > hcp->indx) { + lcp->indx -= 2; + if (lcp->indx == hcp->indx && + F_ISSET(lcp, H_DELETED)) + lcp->order += args->order; + } else if (lcp->indx == hcp->indx && + !F_ISSET(lcp, H_DELETED)) { + F_SET(lcp, H_DELETED); + F_CLR(lcp, H_ISDUP); + lcp->order = args->order; + } + } + } else if (lcp->indx == hcp->indx) { + /* + * Handle duplicates. This routine is only + * called for on page dups. Off page dups are + * handled by btree/rtree code. + */ + if (args->was_add == 1) { + lcp->dup_tlen += args->len; + if (lcp->dup_off == hcp->dup_off && + F_ISSET(hcp, H_DELETED) && + F_ISSET(lcp, H_DELETED)) { + /* Abort of a delete. */ + if (lcp->order == hcp->order) + F_CLR(lcp, H_DELETED); + else if (lcp->order > + hcp->order) { + lcp->order -= + (hcp->order -1); + lcp->dup_off += args->len; + } + } else if (lcp->dup_off > + hcp->dup_off || (!args->was_mod && + lcp->dup_off == hcp->dup_off)) + lcp->dup_off += args->len; + } else { + lcp->dup_tlen -= args->len; + if (lcp->dup_off > hcp->dup_off) { + lcp->dup_off -= args->len; + if (lcp->dup_off == + hcp->dup_off && + F_ISSET(lcp, H_DELETED)) + lcp->order += args->order; + } else if (!args->was_mod && + lcp->dup_off == hcp->dup_off && + !F_ISSET(lcp, H_DELETED)) { + F_SET(lcp, H_DELETED); + lcp->order = args->order; + } + } + } + return (0); +} + +int +__hamc_update(dbc, len, operation, is_dup) + DBC *dbc; + u_int32_t len; + db_ham_curadj operation; + int is_dup; +{ + DB *dbp; + DB_LSN lsn; + HASH_CURSOR *hcp; + int ret; + u_int32_t found; + struct __hamc_update_setorder_args args; + + dbp = dbc->dbp; + hcp = (HASH_CURSOR *)dbc->internal; + + /* + * Adjustment will only be logged if this is a subtransaction. + * Only subtransactions can abort and effect their parent + * transactions cursors. + */ + + args.my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL; + args.len = len; + + switch (operation) { + case DB_HAM_CURADJ_DEL: + args.was_mod = 0; + args.was_add = 0; + break; + case DB_HAM_CURADJ_ADD: + args.was_mod = 0; + args.was_add = 1; + break; + case DB_HAM_CURADJ_DELMOD: + args.was_mod = 1; + args.was_add = 0; + break; + case DB_HAM_CURADJ_ADDMOD: + args.was_mod = 1; + args.was_add = 1; + break; + default: + return (EINVAL); + } + + /* + * Calculate the order of this deleted record. + * This will be one greater than any cursor that is pointing + * at this record and already marked as deleted. + */ + if (args.was_add == 0) { + if ((ret = __db_walk_cursors(dbp, dbc, __hamc_update_getorder, + &args.order, 0, (u_int32_t)is_dup, NULL)) != 0) + return (ret); + args.order++; + hcp->order = args.order; + } + + if ((ret = __db_walk_cursors(dbp, dbc, + __hamc_update_setorder, &found, 0, (u_int32_t)is_dup, &args)) != 0) + return (ret); + + if (found != 0 && DBC_LOGGING(dbc)) { + if ((ret = __ham_curadj_log(dbp, args.my_txn, &lsn, 0, + hcp->pgno, hcp->indx, len, hcp->dup_off, + (int)operation, is_dup, args.order)) != 0) + return (ret); + } + + return (0); +} + +struct __ham_get_clist_args { + u_int nalloc, nused; + DBC **listp; +}; + +static int +__ham_get_clist_func(dbc, my_dbc, countp, pgno, indx, vargs) + DBC *dbc, *my_dbc; + u_int32_t *countp; + db_pgno_t pgno; + u_int32_t indx; + void *vargs; +{ + int ret; + struct __ham_get_clist_args *args; + + COMPQUIET(my_dbc, NULL); + COMPQUIET(countp, NULL); + args = vargs; + /* + * We match if dbc->pgno matches the specified + * pgno, and if either the dbc->indx matches + * or we weren't given an index. + */ + if (dbc->internal->pgno == pgno && + (indx == NDX_INVALID || + dbc->internal->indx == indx) && + !MVCC_SKIP_CURADJ(dbc, pgno)) { + if (args->nused >= args->nalloc) { + args->nalloc += 10; + if ((ret = __os_realloc(dbc->dbp->env, + args->nalloc * sizeof(HASH_CURSOR *), + &args->listp)) != 0) + return (ret); + } + args->listp[args->nused++] = dbc; + } + return (0); +} +/* + * __ham_get_clist -- + * + * Get a list of cursors either on a particular bucket or on a particular + * page and index combination. The former is so that we can update + * cursors on a split. The latter is so we can update cursors when we + * move items off page. + * + * PUBLIC: int __ham_get_clist __P((DB *, db_pgno_t, u_int32_t, DBC ***)); + */ +int +__ham_get_clist(dbp, pgno, indx, listp) + DB *dbp; + db_pgno_t pgno; + u_int32_t indx; + DBC ***listp; +{ + ENV *env; + int ret; + u_int32_t count; + struct __ham_get_clist_args args; + + env = dbp->env; + args.listp = NULL; + args.nalloc = args.nused = 0; + + if ((ret = __db_walk_cursors(dbp, NULL, + __ham_get_clist_func, &count, pgno, indx, &args)) != 0) + return (ret); + if (args.listp != NULL) { + if (args.nused >= args.nalloc) { + args.nalloc++; + if ((ret = __os_realloc(env, + args.nalloc * sizeof(HASH_CURSOR *), + &args.listp)) != 0) + return (ret); + } + args.listp[args.nused] = NULL; + } + *listp = args.listp; + return (0); +} + +static int +__hamc_writelock(dbc) + DBC *dbc; +{ + DB_LOCK tmp_lock; + HASH_CURSOR *hcp; + int ret; + + /* + * All we need do is acquire the lock and let the off-page + * dup tree do its thing. + */ + if (!STD_LOCKING(dbc)) + return (0); + + hcp = (HASH_CURSOR *)dbc->internal; + ret = 0; + if ((!LOCK_ISSET(hcp->lock) || hcp->lock_mode != DB_LOCK_WRITE)) { + tmp_lock = hcp->lock; + if ((ret = __ham_lock_bucket(dbc, DB_LOCK_WRITE)) == 0 && + tmp_lock.mode != DB_LOCK_WWRITE) + ret = __LPUT(dbc, tmp_lock); + } + return (ret); +} diff --git a/src/hash/hash.src b/src/hash/hash.src new file mode 100644 index 00000000..7888532a --- /dev/null +++ b/src/hash/hash.src @@ -0,0 +1,328 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +/* + * Copyright (c) 1995, 1996 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +DBPRIVATE +PREFIX __ham + +INCLUDE #include "db_int.h" +INCLUDE #include "dbinc/crypto.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_dispatch.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/hash.h" +INCLUDE #include "dbinc/txn.h" +INCLUDE + +/* + * HASH-insdel: used for hash to insert/delete a pair of entries onto a master + * page. The pair might be regular key/data pairs or they might be the + * structures that refer to off page items, duplicates or offpage duplicates. + * opcode - PUTPAIR/DELPAIR + big masks + * fileid - identifies the file referenced + * pgno - page within file + * ndx - index on the page of the item being added (item index) + * pagelsn - lsn on the page before the update + * key - the key being inserted + * data - the data being inserted + */ +BEGIN insdel 50 21 +ARG opcode u_int32_t lu +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG ndx u_int32_t lu +POINTER pagelsn DB_LSN * lu +OP keytype u_int32_t lu +HDR key DBT s +OP datatype u_int32_t lu +HDR data DBT s +END + +BEGIN_COMPAT insdel 42 21 +ARG opcode u_int32_t lu +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG ndx u_int32_t lu +POINTER pagelsn DB_LSN * lu +DBT key DBT s +DBT data DBT s +END + +/* + * Used to add and remove overflow pages. + * prev_pgno is the previous page that is going to get modified to + * point to this one. If this is the first page in a chain + * then prev_pgno should be PGNO_INVALID. + * new_pgno is the page being allocated. + * next_pgno is the page that follows this one. On allocation, + * this should be PGNO_INVALID. For deletes, it may exist. + * pagelsn is the old lsn on the page. + */ +BEGIN newpage 42 22 +ARG opcode u_int32_t lu +DB fileid int32_t ld +ARG prev_pgno db_pgno_t lu +POINTER prevlsn DB_LSN * lu +ARG new_pgno db_pgno_t lu +POINTER pagelsn DB_LSN * lu +ARG next_pgno db_pgno_t lu +POINTER nextlsn DB_LSN * lu +END + +/* + * Splitting requires two types of log messages. The second logs the + * data on the original page. To redo the split, we have to visit the + * new page (pages) and add the items back on the page if they are not + * yet there. + */ +BEGIN splitdata 42 24 +DB fileid int32_t ld +ARG opcode u_int32_t lu +ARG pgno db_pgno_t lu +PGDBT pageimage DBT s +POINTER pagelsn DB_LSN * lu +END + +/* + * HASH-replace: is used for hash to handle partial puts that only + * affect a single master page. + * fileid - identifies the file referenced + * pgno - page within file + * ndx - index on the page of the item being modified (item index) + * pagelsn - lsn on the page before the update + * off - offset in the old item where the new item is going. + * olditem - DBT that describes the part of the item being replaced. + * newitem - DBT of the new item. + * makedup - this was a replacement that made an item a duplicate. + */ +BEGIN replace 50 25 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG ndx u_int32_t lu +POINTER pagelsn DB_LSN * lu +ARG off int32_t ld +OP oldtype u_int32_t lu +HDR olditem DBT s +OP newtype u_int32_t lu +HDR newitem DBT s +END + +BEGIN_COMPAT replace 42 25 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG ndx u_int32_t lu +POINTER pagelsn DB_LSN * lu +ARG off int32_t ld +DBT olditem DBT s +DBT newitem DBT s +ARG makedup u_int32_t lu +END + +/* + * Used when we empty the first page in a bucket and there are pages after + * it. The page after it gets copied into the bucket page (since bucket + * pages have to be in fixed locations). + * pgno: the bucket page + * pagelsn: the old LSN on the bucket page + * next_pgno: the page number of the next page + * nnext_pgno: page after next_pgno (may need to change its prev) + * nnextlsn: the LSN of nnext_pgno. + */ +BEGIN copypage 42 28 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +POINTER pagelsn DB_LSN * lu +ARG next_pgno db_pgno_t lu +POINTER nextlsn DB_LSN * lu +ARG nnext_pgno db_pgno_t lu +POINTER nnextlsn DB_LSN * lu +PGDBT page DBT s +END + +/* + * This record logs the meta-data aspects of a split operation. It has enough + * information so that we can record both an individual page allocation as well + * as a group allocation which we do because in sub databases, the pages in + * a hash doubling, must be contiguous. If we do a group allocation, the + * number of pages allocated is bucket + 1, pgno is the page number of the + * first newly allocated bucket. + * + * bucket: Old maximum bucket number. + * mmpgno: Master meta-data page number (0 if same as mpgno). + * mmetalsn: Lsn of the master meta-data page. + * mpgno: Meta-data page number. + * metalsn: Lsn of the meta-data page. + * pgno: Page allocated to bucket + 1 (first newly allocated page) + * pagelsn: Lsn of either the first page allocated (if newalloc == 0) or + * the last page allocated (if newalloc == 1). + * newalloc: 1 indicates that this record did the actual allocation; + * 0 indicates that the pages were already allocated from a + * previous (failed) allocation. + * last_pgno: the last page in the file before this op (4.3+). + */ +BEGIN_COMPAT metagroup 42 29 +DB fileid int32_t ld +ARG bucket u_int32_t lu +ARG mmpgno db_pgno_t lu +POINTER mmetalsn DB_LSN * lu +ARG mpgno db_pgno_t lu +POINTER metalsn DB_LSN * lu +ARG pgno db_pgno_t lu +POINTER pagelsn DB_LSN * lu +ARG newalloc u_int32_t lu +END + +BEGIN metagroup 43 29 +DB fileid int32_t ld +ARG bucket u_int32_t lu +ARG mmpgno db_pgno_t lu +POINTER mmetalsn DB_LSN * lu +ARG mpgno db_pgno_t lu +POINTER metalsn DB_LSN * lu +ARG pgno db_pgno_t lu +POINTER pagelsn DB_LSN * lu +ARG newalloc u_int32_t lu +ARG last_pgno db_pgno_t lu +END + +/* + * groupalloc + * + * This is used in conjunction with MPOOL_NEW_GROUP when we are creating + * a new database to make sure that we recreate or reclaim free pages + * when we allocate a chunk of contiguous ones during database creation. + * + * meta_lsn: meta-data lsn + * start_pgno: starting page number + * num: number of allocated pages + * unused: unused, historically the meta-data free list page number + * last_pgno: the last page in the file before this op (4.3+). + */ +BEGIN_COMPAT groupalloc 42 32 +DB fileid int32_t ld +POINTER meta_lsn DB_LSN * lu +ARG start_pgno db_pgno_t lu +ARG num u_int32_t lu +ARG free db_pgno_t lu +END + +BEGIN groupalloc 43 32 +DB fileid int32_t ld +POINTER meta_lsn DB_LSN * lu +ARG start_pgno db_pgno_t lu +ARG num u_int32_t lu +ARG unused db_pgno_t lu +ARG last_pgno db_pgno_t lu +END + +/* + * Changeslot + * Change the entry in a spares table slot from the "old" page to the "new" + * page. + */ +BEGIN changeslot 50 35 +DB fileid int32_t ld +POINTER meta_lsn DB_LSN * lu +ARG slot u_int32_t lu +ARG old db_pgno_t lu +ARG new db_pgno_t lu +END + +/* + * Contract + * Contract the hash table by removing the last "bucket". "pgno" is the + * page number for that bucket. + */ +BEGIN contract 50 37 +DB fileid int32_t ld +ARG meta db_pgno_t lu +POINTER meta_lsn DB_LSN * lu +ARG bucket u_int32_t lu +ARG pgno db_pgno_t lu +END + +/* + * Records for backing out cursor adjustment. + * curadj - added or deleted a record or a dup + * within a record. + * pgno - page that was effected + * indx - indx of recrod effected. + * len - if a dup its length. + * dup_off - if a dup its offset + * add - 1 if add 0 if delete + * is_dup - 1 if dup 0 otherwise. + * order - order assigned to this deleted record or dup. + * + * chgpg - rmoved a page, move the records to a new page + * mode - CHGPG page was deleted or records move to new page. + * - SPLIT we split a bucket + * - DUP we convered to off page duplicates. + * old_pgno, new_pgno - old and new page numbers. + * old_index, new_index - old and new index numbers, NDX_INVALID if + * it effects all records on the page. + * For three opcodes new in 3.3 (DB_HAM_DELFIRSTPG, DELMIDPG, + * and DELLASTPG), we overload old_indx and new_indx to avoid + * needing a new log record type: old_indx stores the only + * indx of interest to these records, and new_indx stores the + * order that's assigned to the lowest deleted record we're + * moving. + */ +BEGIN curadj 42 33 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG indx u_int32_t lu +ARG len u_int32_t lu +ARG dup_off u_int32_t lu +ARG add int ld +ARG is_dup int ld +ARG order u_int32_t lu +END + +BEGIN chgpg 42 34 +DB fileid int32_t ld +ARG mode db_ham_mode ld +ARG old_pgno db_pgno_t lu +ARG new_pgno db_pgno_t lu +ARG old_indx u_int32_t lu +ARG new_indx u_int32_t lu +END + diff --git a/src/hash/hash_auto.c b/src/hash/hash_auto.c new file mode 100644 index 00000000..4adb6cd9 --- /dev/null +++ b/src/hash/hash_auto.c @@ -0,0 +1,209 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/hash.h" +#include "dbinc/txn.h" + +DB_LOG_RECSPEC __ham_insdel_desc[] = { + {LOGREC_ARG, SSZ(__ham_insdel_args, opcode), "opcode", "%lu"}, + {LOGREC_DB, SSZ(__ham_insdel_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__ham_insdel_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__ham_insdel_args, ndx), "ndx", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_insdel_args, pagelsn), "pagelsn", ""}, + {LOGREC_OP, SSZ(__ham_insdel_args, keytype), "keytype", "%lu"}, + {LOGREC_HDR, SSZ(__ham_insdel_args, key), "key", ""}, + {LOGREC_OP, SSZ(__ham_insdel_args, datatype), "datatype", "%lu"}, + {LOGREC_HDR, SSZ(__ham_insdel_args, data), "data", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_insdel_42_desc[] = { + {LOGREC_ARG, SSZ(__ham_insdel_42_args, opcode), "opcode", "%lu"}, + {LOGREC_DB, SSZ(__ham_insdel_42_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__ham_insdel_42_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__ham_insdel_42_args, ndx), "ndx", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_insdel_42_args, pagelsn), "pagelsn", ""}, + {LOGREC_DBT, SSZ(__ham_insdel_42_args, key), "key", ""}, + {LOGREC_DBT, SSZ(__ham_insdel_42_args, data), "data", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_newpage_desc[] = { + {LOGREC_ARG, SSZ(__ham_newpage_args, opcode), "opcode", "%lu"}, + {LOGREC_DB, SSZ(__ham_newpage_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__ham_newpage_args, prev_pgno), "prev_pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_newpage_args, prevlsn), "prevlsn", ""}, + {LOGREC_ARG, SSZ(__ham_newpage_args, new_pgno), "new_pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_newpage_args, pagelsn), "pagelsn", ""}, + {LOGREC_ARG, SSZ(__ham_newpage_args, next_pgno), "next_pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_newpage_args, nextlsn), "nextlsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_splitdata_desc[] = { + {LOGREC_DB, SSZ(__ham_splitdata_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__ham_splitdata_args, opcode), "opcode", "%lu"}, + {LOGREC_ARG, SSZ(__ham_splitdata_args, pgno), "pgno", "%lu"}, + {LOGREC_PGDBT, SSZ(__ham_splitdata_args, pageimage), "pageimage", ""}, + {LOGREC_POINTER, SSZ(__ham_splitdata_args, pagelsn), "pagelsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_replace_desc[] = { + {LOGREC_DB, SSZ(__ham_replace_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__ham_replace_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__ham_replace_args, ndx), "ndx", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_replace_args, pagelsn), "pagelsn", ""}, + {LOGREC_ARG, SSZ(__ham_replace_args, off), "off", "%ld"}, + {LOGREC_OP, SSZ(__ham_replace_args, oldtype), "oldtype", "%lu"}, + {LOGREC_HDR, SSZ(__ham_replace_args, olditem), "olditem", ""}, + {LOGREC_OP, SSZ(__ham_replace_args, newtype), "newtype", "%lu"}, + {LOGREC_HDR, SSZ(__ham_replace_args, newitem), "newitem", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_replace_42_desc[] = { + {LOGREC_DB, SSZ(__ham_replace_42_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__ham_replace_42_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__ham_replace_42_args, ndx), "ndx", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_replace_42_args, pagelsn), "pagelsn", ""}, + {LOGREC_ARG, SSZ(__ham_replace_42_args, off), "off", "%ld"}, + {LOGREC_DBT, SSZ(__ham_replace_42_args, olditem), "olditem", ""}, + {LOGREC_DBT, SSZ(__ham_replace_42_args, newitem), "newitem", ""}, + {LOGREC_ARG, SSZ(__ham_replace_42_args, makedup), "makedup", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_copypage_desc[] = { + {LOGREC_DB, SSZ(__ham_copypage_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__ham_copypage_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_copypage_args, pagelsn), "pagelsn", ""}, + {LOGREC_ARG, SSZ(__ham_copypage_args, next_pgno), "next_pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_copypage_args, nextlsn), "nextlsn", ""}, + {LOGREC_ARG, SSZ(__ham_copypage_args, nnext_pgno), "nnext_pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_copypage_args, nnextlsn), "nnextlsn", ""}, + {LOGREC_PGDBT, SSZ(__ham_copypage_args, page), "page", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_metagroup_42_desc[] = { + {LOGREC_DB, SSZ(__ham_metagroup_42_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__ham_metagroup_42_args, bucket), "bucket", "%lu"}, + {LOGREC_ARG, SSZ(__ham_metagroup_42_args, mmpgno), "mmpgno", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_metagroup_42_args, mmetalsn), "mmetalsn", ""}, + {LOGREC_ARG, SSZ(__ham_metagroup_42_args, mpgno), "mpgno", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_metagroup_42_args, metalsn), "metalsn", ""}, + {LOGREC_ARG, SSZ(__ham_metagroup_42_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_metagroup_42_args, pagelsn), "pagelsn", ""}, + {LOGREC_ARG, SSZ(__ham_metagroup_42_args, newalloc), "newalloc", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_metagroup_desc[] = { + {LOGREC_DB, SSZ(__ham_metagroup_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__ham_metagroup_args, bucket), "bucket", "%lu"}, + {LOGREC_ARG, SSZ(__ham_metagroup_args, mmpgno), "mmpgno", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_metagroup_args, mmetalsn), "mmetalsn", ""}, + {LOGREC_ARG, SSZ(__ham_metagroup_args, mpgno), "mpgno", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_metagroup_args, metalsn), "metalsn", ""}, + {LOGREC_ARG, SSZ(__ham_metagroup_args, pgno), "pgno", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_metagroup_args, pagelsn), "pagelsn", ""}, + {LOGREC_ARG, SSZ(__ham_metagroup_args, newalloc), "newalloc", "%lu"}, + {LOGREC_ARG, SSZ(__ham_metagroup_args, last_pgno), "last_pgno", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_groupalloc_42_desc[] = { + {LOGREC_DB, SSZ(__ham_groupalloc_42_args, fileid), "fileid", ""}, + {LOGREC_POINTER, SSZ(__ham_groupalloc_42_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_ARG, SSZ(__ham_groupalloc_42_args, start_pgno), "start_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__ham_groupalloc_42_args, num), "num", "%lu"}, + {LOGREC_ARG, SSZ(__ham_groupalloc_42_args, free), "free", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_groupalloc_desc[] = { + {LOGREC_DB, SSZ(__ham_groupalloc_args, fileid), "fileid", ""}, + {LOGREC_POINTER, SSZ(__ham_groupalloc_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_ARG, SSZ(__ham_groupalloc_args, start_pgno), "start_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__ham_groupalloc_args, num), "num", "%lu"}, + {LOGREC_ARG, SSZ(__ham_groupalloc_args, unused), "unused", "%lu"}, + {LOGREC_ARG, SSZ(__ham_groupalloc_args, last_pgno), "last_pgno", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_changeslot_desc[] = { + {LOGREC_DB, SSZ(__ham_changeslot_args, fileid), "fileid", ""}, + {LOGREC_POINTER, SSZ(__ham_changeslot_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_ARG, SSZ(__ham_changeslot_args, slot), "slot", "%lu"}, + {LOGREC_ARG, SSZ(__ham_changeslot_args, old), "old", "%lu"}, + {LOGREC_ARG, SSZ(__ham_changeslot_args, new), "new", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_contract_desc[] = { + {LOGREC_DB, SSZ(__ham_contract_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__ham_contract_args, meta), "meta", "%lu"}, + {LOGREC_POINTER, SSZ(__ham_contract_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_ARG, SSZ(__ham_contract_args, bucket), "bucket", "%lu"}, + {LOGREC_ARG, SSZ(__ham_contract_args, pgno), "pgno", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_curadj_desc[] = { + {LOGREC_DB, SSZ(__ham_curadj_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__ham_curadj_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__ham_curadj_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__ham_curadj_args, len), "len", "%lu"}, + {LOGREC_ARG, SSZ(__ham_curadj_args, dup_off), "dup_off", "%lu"}, + {LOGREC_ARG, SSZ(__ham_curadj_args, add), "add", "%ld"}, + {LOGREC_ARG, SSZ(__ham_curadj_args, is_dup), "is_dup", "%ld"}, + {LOGREC_ARG, SSZ(__ham_curadj_args, order), "order", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __ham_chgpg_desc[] = { + {LOGREC_DB, SSZ(__ham_chgpg_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__ham_chgpg_args, mode), "mode", "%ld"}, + {LOGREC_ARG, SSZ(__ham_chgpg_args, old_pgno), "old_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__ham_chgpg_args, new_pgno), "new_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__ham_chgpg_args, old_indx), "old_indx", "%lu"}, + {LOGREC_ARG, SSZ(__ham_chgpg_args, new_indx), "new_indx", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +/* + * PUBLIC: int __ham_init_recover __P((ENV *, DB_DISTAB *)); + */ +int +__ham_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_insdel_recover, DB___ham_insdel)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_newpage_recover, DB___ham_newpage)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_splitdata_recover, DB___ham_splitdata)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_replace_recover, DB___ham_replace)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_copypage_recover, DB___ham_copypage)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_metagroup_recover, DB___ham_metagroup)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_groupalloc_recover, DB___ham_groupalloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_changeslot_recover, DB___ham_changeslot)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_contract_recover, DB___ham_contract)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_curadj_recover, DB___ham_curadj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_chgpg_recover, DB___ham_chgpg)) != 0) + return (ret); + return (0); +} diff --git a/src/hash/hash_autop.c b/src/hash/hash_autop.c new file mode 100644 index 00000000..f1ef0042 --- /dev/null +++ b/src/hash/hash_autop.c @@ -0,0 +1,314 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" + +#ifdef HAVE_HASH +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/hash.h" +#include "dbinc/txn.h" + +/* + * PUBLIC: int __ham_insdel_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_insdel_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_insdel", __ham_insdel_desc, info)); +} + +/* + * PUBLIC: int __ham_insdel_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_insdel_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_insdel_42", __ham_insdel_42_desc, info)); +} + +/* + * PUBLIC: int __ham_newpage_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_newpage_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_newpage", __ham_newpage_desc, info)); +} + +/* + * PUBLIC: int __ham_splitdata_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_splitdata_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_splitdata", __ham_splitdata_desc, info)); +} + +/* + * PUBLIC: int __ham_replace_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_replace_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_replace", __ham_replace_desc, info)); +} + +/* + * PUBLIC: int __ham_replace_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_replace_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_replace_42", __ham_replace_42_desc, info)); +} + +/* + * PUBLIC: int __ham_copypage_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_copypage_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_copypage", __ham_copypage_desc, info)); +} + +/* + * PUBLIC: int __ham_metagroup_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_metagroup_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_metagroup_42", __ham_metagroup_42_desc, info)); +} + +/* + * PUBLIC: int __ham_metagroup_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_metagroup_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_metagroup", __ham_metagroup_desc, info)); +} + +/* + * PUBLIC: int __ham_groupalloc_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_groupalloc_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_groupalloc_42", __ham_groupalloc_42_desc, info)); +} + +/* + * PUBLIC: int __ham_groupalloc_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_groupalloc_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_groupalloc", __ham_groupalloc_desc, info)); +} + +/* + * PUBLIC: int __ham_changeslot_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_changeslot_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_changeslot", __ham_changeslot_desc, info)); +} + +/* + * PUBLIC: int __ham_contract_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_contract_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_contract", __ham_contract_desc, info)); +} + +/* + * PUBLIC: int __ham_curadj_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_curadj_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_curadj", __ham_curadj_desc, info)); +} + +/* + * PUBLIC: int __ham_chgpg_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_chgpg_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__ham_chgpg", __ham_chgpg_desc, info)); +} + +/* + * PUBLIC: int __ham_init_print __P((ENV *, DB_DISTAB *)); + */ +int +__ham_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_insdel_print, DB___ham_insdel)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_newpage_print, DB___ham_newpage)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_splitdata_print, DB___ham_splitdata)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_replace_print, DB___ham_replace)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_copypage_print, DB___ham_copypage)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_metagroup_print, DB___ham_metagroup)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_groupalloc_print, DB___ham_groupalloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_changeslot_print, DB___ham_changeslot)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_contract_print, DB___ham_contract)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_curadj_print, DB___ham_curadj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_chgpg_print, DB___ham_chgpg)) != 0) + return (ret); + return (0); +} +#endif /* HAVE_HASH */ diff --git a/src/hash/hash_compact.c b/src/hash/hash_compact.c new file mode 100644 index 00000000..f7f33e9b --- /dev/null +++ b/src/hash/hash_compact.c @@ -0,0 +1,535 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/lock.h" +#include "dbinc/txn.h" +#include "dbinc/mp.h" + +static int __ham_copy_data __P((DBC *, PAGE *, DB_COMPACT *, int *)); +static int __ham_truncate_overflow __P((DBC *, + PAGE *, u_int32_t, DB_COMPACT *, int *)); + +/* + * __ham_compact_int -- internal HASH compaction routine. + * + * PUBLIC: int __ham_compact_int __P((DBC *, + * PUBLIC: DBT *, DBT *, u_int32_t, DB_COMPACT *, int *, u_int32_t)); + */ +int +__ham_compact_int(dbc, start, stop, factor, c_data, donep, flags) + DBC *dbc; + DBT *start, *stop; + u_int32_t factor; + DB_COMPACT *c_data; + int *donep; + u_int32_t flags; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + db_pgno_t origpgno, pgno; + int check_trunc, pgs_done, ret, t_ret; + u_int32_t empty_buckets, i, stop_bucket; + + dbp = dbc->dbp; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + pgs_done = 0; + empty_buckets = 0; + check_trunc = c_data->compact_truncate != PGNO_INVALID; + if ((ret = __ham_get_meta(dbc)) != 0) + return (ret); + + if (stop != NULL && stop->size != 0) + stop_bucket = *(u_int32_t *)stop->data; + else + stop_bucket = hcp->hdr->max_bucket; + + if (start != NULL && start->size != 0) + hcp->bucket = *(u_int32_t *)start->data; + else + hcp->bucket = 0; + + for (; hcp->bucket <= stop_bucket && ret == 0; hcp->bucket++) { + /* + * For each bucket first move records toward the head of + * the bucket. + */ + hcp->indx = NDX_INVALID; + F_CLR(hcp, H_ISDUP); + hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); + pgno = PGNO_INVALID; + ret = __ham_item_next(dbc, DB_LOCK_WRITE, &pgno); + + /* + * If the bucket is empty, just note it, otherwise process it. + * If there are any records there must be some in the head + * of the bucket. + */ + if (ret == DB_NOTFOUND ) { + empty_buckets++; + c_data->compact_pages_examine++; + DB_ASSERT(dbp->env, + PREV_PGNO(hcp->page) == PGNO_INVALID && + NEXT_PGNO(hcp->page) == PGNO_INVALID); + goto err; + } else if (ret != 0) + break; + c_data->compact_pages_examine++; + + if (NEXT_PGNO(hcp->page) != PGNO_INVALID) { + if ((ret = + __ham_compact_bucket(dbc, c_data, &pgs_done)) != 0) + goto err; + pgno = PGNO_INVALID; + if ((ret = __ham_item(dbc, DB_LOCK_WRITE, &pgno)) != 0) + goto err; + } + + /* + * Loop through the items in this page in the bucket and process + * overflow records and off page duplicate sets. + */ + while (ret == 0) { + /* Handle off page duplicate trees. */ + if (pgno == PGNO_INVALID) + goto no_opd; + if (check_trunc && + pgno > c_data->compact_truncate) { + c_data->compact_pages_examine++; + /* + * Truncate this page if possible. + * We must update the parent here + * because the page number is + * not aligned. + */ + origpgno = pgno; + if ((ret = __db_truncate_root(dbc, hcp->page, + H_DATAINDEX(hcp->indx), &pgno, 0)) != 0) + break; + if (pgno != origpgno) { + memcpy(HOFFDUP_PGNO(H_PAIRDATA(dbp, + hcp->page, hcp->indx)), + &pgno, sizeof(db_pgno_t)); + pgs_done++; + c_data->compact_pages--; + } + } + /* + * Compact the off page duplicate tree. + */ + if ((ret = __bam_compact_opd(dbc, + pgno, NULL, factor, c_data, &pgs_done)) != 0) + break; + +no_opd: if (check_trunc && HPAGE_PTYPE(H_PAIRDATA( + dbp, hcp->page, hcp->indx)) == H_OFFPAGE) { + /* This is an overflow chain. */ + if ((ret = __ham_truncate_overflow(dbc, + hcp->page, H_DATAINDEX(hcp->indx), + c_data, &pgs_done)) != 0) + break; + } + + /* Check for an overflow key. */ + if (check_trunc && HPAGE_PTYPE(H_PAIRKEY( + dbp, hcp->page, hcp->indx)) == H_OFFPAGE) { + /* This is an overflow chain. */ + if ((ret = __ham_truncate_overflow(dbc, + hcp->page, H_KEYINDEX(hcp->indx), + c_data, &pgs_done)) != 0) + break; + } + + pgno = PGNO_INVALID; + ret = __ham_item_next(dbc, DB_LOCK_WRITE, &pgno); + } + +err: if (hcp->page != NULL && + (t_ret = __memp_fput(mpf, dbc->thread_info, + hcp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (ret == DB_NOTFOUND) + ret = 0; + hcp->page = NULL; + hcp->pgno = pgno = PGNO_INVALID; + /* + * If we are in an auto-transaction and we updated something + * return to the caller to commit this transaction to + * avoid holding locks. Otherwise process the next bucket. + * We can drop the lock if we did not do anything. + * We always must commit the txn if we are in MVCC + * as we have dirtied the hash buckets. + */ + if (ret == 0 && + atomic_read(&dbp->mpf->mfp->multiversion) == 0 && + (pgs_done == 0 || dbc->txn == NULL)) + ret = __LPUT(dbc, hcp->lock); + else if (LF_ISSET(DB_AUTO_COMMIT)) { + if (ret == 0) + hcp->bucket++; + break; + } + } + /* + * If we saw any empty buckets and we are freeing space we + * want to contract the table before dropping the metadata + * page. Wait till we are done with everything else as we + * need to get an exclusive lock on the metadata page. + */ + if (ret == 0 && empty_buckets != 0 && LF_ISSET(DB_FREE_SPACE)) { + for (i = 0; i < empty_buckets && hcp->hdr->max_bucket > 2; i++) + if ((ret = __ham_contract_table(dbc, c_data)) != 0) + break; + } + + if (ret == 0) + ret = __db_retcopy(dbp->env, start, &hcp->bucket, + sizeof(hcp->bucket), &start->data, &start->ulen); + (void)__ham_release_meta(dbc); + c_data->compact_empty_buckets += empty_buckets; + if (hcp->bucket > stop_bucket) + *donep = 1; + return (ret); +} + +/* + * __ham_compact_bucket -- move data to as few pages as possible. + * + * PUBLIC: int __ham_compact_bucket __P((DBC *, DB_COMPACT *, int *)); + */ +int +__ham_compact_bucket(dbc, c_data, pgs_donep) + DBC *dbc; + DB_COMPACT *c_data; + int *pgs_donep; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + PAGE *pg; + db_pgno_t pgno; + int check_trunc, ret, t_ret; + + hcp = (HASH_CURSOR *)dbc->internal; + dbp = dbc->dbp; + mpf = dbp->mpf; + pg = hcp->page; + check_trunc = c_data->compact_truncate != PGNO_INVALID; + ret = 0; + + pgno = hcp->pgno; + do { + if (pg == NULL && (ret = __memp_fget(mpf, &pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &pg)) != 0) + break; + if (NEXT_PGNO(pg) == PGNO_INVALID) + break; + /* Sort any unsorted pages before adding to the page. */ + if (TYPE(pg) == P_HASH_UNSORTED) { + if ((ret = __ham_sort_page_cursor(dbc, pg)) != 0) + break; + (*pgs_donep)++; + } + + /* If this is not the head try to move it to a lower page. */ + if (check_trunc && PREV_PGNO(pg) != PGNO_INVALID && + PGNO(pg) > c_data->compact_truncate && + (ret = __db_exchange_page(dbc, &pg, + hcp->page, PGNO_INVALID, DB_EXCH_FREE)) != 0) + break; + if (pgno != PGNO(pg)) + (*pgs_donep)++; + + if ((ret = __ham_copy_data(dbc, pg, c_data, pgs_donep)) != 0) + break; + pgno = NEXT_PGNO(pg); + if (pg != hcp->page && (ret = __memp_fput(mpf, + dbc->thread_info, pg, dbc->priority)) != 0) + break; + pg = NULL; + } while (pgno != PGNO_INVALID); + + if (pg != NULL && pg != hcp->page && + (t_ret = __memp_fput(mpf, dbc->thread_info, pg, dbc->priority)) && + ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __ham_copy_data -- copy as many records as possible from next page + */ +static int +__ham_copy_data(dbc, pg, c_data, pgs_donep) + DBC *dbc; + PAGE *pg; + DB_COMPACT *c_data; + int *pgs_donep; +{ + DB *dbp; + DBC *newdbc; + DBT data, key; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp, *ncp; + PAGE *nextpage; + db_pgno_t origpgno; + int i, nument, records, ret, t_ret; + u_int32_t len; + + dbp = dbc->dbp; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + records = 0; + + if ((ret = __dbc_dup(dbc, &newdbc, 0)) != 0) + return (ret); + ncp = (HASH_CURSOR *)newdbc->internal; + ncp->hdr = hcp->hdr; + + /* + * Copy data to the front of the bucket. Loop until either we + * have not replaced the next page or there is no next page. + * If the next page was not removed then it still has data + * on it. + */ + origpgno = PGNO_INVALID; + while (origpgno != NEXT_PGNO(pg) && + (origpgno = NEXT_PGNO(pg)) != PGNO_INVALID) { + + if ((ret = __memp_fget(mpf, &NEXT_PGNO(pg), dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, &nextpage)) != 0) + break; + + c_data->compact_pages_examine++; + ncp->page = nextpage; + ncp->pgno = PGNO(nextpage); + ncp->indx = 0; + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + nument = NUM_ENT(nextpage); + DB_ASSERT(dbp->env, nument != 0); + for (i = 0; i < nument; i += 2) { + len = LEN_HITEM(dbp, nextpage, dbp->pgsize, 0) + + LEN_HITEM(dbp, nextpage, dbp->pgsize, 1) + + 2 * sizeof(db_indx_t); + if (P_FREESPACE(dbp, pg) < len) + continue; + + if ((ret = + __ham_copypair(dbc, nextpage, 0, pg, NULL, 1)) != 0) + break; + + records++; + if ((ret = __ham_del_pair(newdbc, + HAM_DEL_IGNORE_OFFPAGE, pg)) != 0) + break; + if (!STD_LOCKING(dbc)) { + if ((ret = __ham_dirty_meta(dbc, 0)) != 0) + return (ret); + ++hcp->hdr->nelem; + } + } + /* + * If we moved all the records then __ham_del_pair will + * have deleted the nextpage. + */ + if (records >= nument/2) { + c_data->compact_pages_examine++; + c_data->compact_pages_free++; + COMPACT_TRUNCATE(c_data); + } + if (ncp->page != NULL && + (t_ret = __memp_fput(mpf, dbc->thread_info, + ncp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + ncp->page = NULL; + ncp->pgno = PGNO_INVALID; + } + + /* + * If __ham_del_pair freed a page then we needed to dirty the metapage + * and it could change so we need to copy it back to hcp. + */ + hcp->hdr = ncp->hdr; + ncp->hdr = NULL; + if ((t_ret = __ham_release_meta(newdbc)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __dbc_close(newdbc)) != 0 && ret == 0) + ret = t_ret; + if (records != 0) + (*pgs_donep)++; + return (ret); +} + +/* + * __ham_truncate_overflow -- try to truncate pages from an overflow chain. + */ +static int +__ham_truncate_overflow(dbc, page, indx, c_data, pgs_done) + DBC *dbc; + PAGE *page; + u_int32_t indx; + DB_COMPACT *c_data; + int *pgs_done; +{ + DB *dbp; + db_pgno_t origpgno, pgno; + int ret; + + dbp = dbc->dbp; + memcpy(&pgno, + HOFFPAGE_PGNO(P_ENTRY(dbp, page, indx)), sizeof(db_pgno_t)); + if (pgno > c_data->compact_truncate) { + c_data->compact_pages_examine++; + origpgno = pgno; + if ((ret = __db_truncate_root(dbc, page, indx, &pgno, 0)) != 0) + return (ret); + if (pgno != origpgno) { + memcpy(HOFFPAGE_PGNO(P_ENTRY(dbp, page, indx)), + &pgno, sizeof(db_pgno_t)); + (*pgs_done)++; + c_data->compact_pages--; + } + } + if ((ret = __db_truncate_overflow(dbc, pgno, NULL, c_data)) != 0) + return (ret); + return (0); +} + +#ifdef HAVE_FTRUNCATE +/* + * __ham_compact_hash -- compact the hash table. + * PUBLIC: int __ham_compact_hash __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DB_COMPACT *)); + */ +int +__ham_compact_hash(dbp, ip, txn, c_data) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DB_COMPACT *c_data; +{ + DBC *dbc; + DB_LOCK lock; + HASH_CURSOR *hcp; + HMETA *meta; + PAGE *oldpage; + db_pgno_t free_pgno, last_pgno, pgno, start_pgno; + int flags, local_txn, ret, t_ret; + u_int32_t bucket, i; + + local_txn = IS_DB_AUTO_COMMIT(dbp, txn); + oldpage = NULL; + dbc = NULL; + LOCK_INIT(lock); + + if (local_txn && + (ret = __txn_begin(dbp->env, ip, txn, &txn, 0)) != 0) + return (ret); + + if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) + goto err1; + hcp = (HASH_CURSOR *)dbc->internal; + + if ((ret = __ham_get_meta(dbc)) != 0 || + (ret = __ham_dirty_meta(dbc, 0)) != 0) + goto err1; + + meta = hcp->hdr; + + LOCK_CHECK_OFF(ip); + + /* + * Find contiguous lower numbered pages for each hash table segment. + */ + for (i = 1; i <= __db_log2(meta->max_bucket); i++) { + bucket = i == 0 ? 0 : 1 << (i - 1); + start_pgno = meta->spares[i] + bucket; + if ((ret = __db_find_free(dbc, P_HASH, + bucket, start_pgno, &free_pgno)) != 0) { + if (ret != DB_NOTFOUND) + break; + ret = 0; + continue; + } + if (DBC_LOGGING(dbc)) { + if ((ret = __ham_changeslot_log(dbp, + dbc->txn, &LSN(meta), + 0, &LSN(meta), i, start_pgno, free_pgno)) != 0) + break; + } else + LSN_NOT_LOGGED(LSN(meta)); + last_pgno = free_pgno + bucket; + /* + * March through the list swapping pages. If the page is + * empty we just need to free it. If we are just sliding + * things down don't free the pages that will be reused. + * Note that __db_exchange_page returns the new page so + * we must put it. + */ + for (pgno = start_pgno; + pgno < start_pgno + bucket; pgno++, free_pgno++) { + if ((ret = __db_lget(dbc, + LCK_COUPLE, pgno, DB_LOCK_WRITE, 0, &lock)) != 0) + goto err; + if ((ret = __memp_fget(dbp->mpf, &pgno, + dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &oldpage)) != 0) + goto err; + if (NUM_ENT(oldpage) != 0) { + if (pgno < last_pgno) + flags = 0; + else + flags = DB_EXCH_FREE; + if ((ret = __db_exchange_page(dbc, + &oldpage, NULL, free_pgno, flags)) != 0) + goto err; + } else if (pgno >= last_pgno) { + if ((ret = __db_free(dbc, oldpage, 0)) != 0) + goto err; + COMPACT_TRUNCATE(c_data); + oldpage = NULL; + } + if (oldpage != NULL && (ret = __memp_fput(dbp->mpf, + dbc->thread_info, oldpage, dbc->priority)) != 0) + goto err; + ret = 0; + oldpage = NULL; + c_data->compact_pages_examine++; + } + meta->spares[i] = free_pgno - (2 * bucket); + } + if (ret == 0 && F_ISSET(dbp, DB_AM_SUBDB) && + PGNO(hcp->hdr) > c_data->compact_truncate) + ret = __db_move_metadata(dbc, (DBMETA**)&hcp->hdr, c_data); + +err: if (oldpage != NULL && (t_ret = __memp_fput(dbp->mpf, + dbc->thread_info, oldpage, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + LOCK_CHECK_ON(ip); +err1: if (dbc != NULL) { + if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + } + if (local_txn && (t_ret = (ret == 0 ? + __txn_commit(txn, 0) : __txn_abort(txn))) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} +#endif diff --git a/src/hash/hash_conv.c b/src/hash/hash_conv.c new file mode 100644 index 00000000..2c75538c --- /dev/null +++ b/src/hash/hash_conv.c @@ -0,0 +1,110 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/hash.h" + +/* + * __ham_pgin -- + * Convert host-specific page layout from the host-independent format + * stored on disk. + * + * PUBLIC: int __ham_pgin __P((DB *, db_pgno_t, void *, DBT *)); + */ +int +__ham_pgin(dbp, pg, pp, cookie) + DB *dbp; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + DB_PGINFO *pginfo; + PAGE *h; + + h = pp; + pginfo = (DB_PGINFO *)cookie->data; + + /* + * The hash access method does blind reads of pages, causing them + * to be created. If the type field isn't set it's one of them, + * initialize the rest of the page and return. + */ + if (h->type != P_HASHMETA && h->pgno == PGNO_INVALID) { + P_INIT(pp, (db_indx_t)pginfo->db_pagesize, + pg, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + return (0); + } + + if (!F_ISSET(pginfo, DB_AM_SWAP)) + return (0); + + return (h->type == P_HASHMETA ? __ham_mswap(dbp->env, pp) : + __db_byteswap(dbp, pg, pp, pginfo->db_pagesize, 1)); +} + +/* + * __ham_pgout -- + * Convert host-specific page layout to the host-independent format + * stored on disk. + * + * PUBLIC: int __ham_pgout __P((DB *, db_pgno_t, void *, DBT *)); + */ +int +__ham_pgout(dbp, pg, pp, cookie) + DB *dbp; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + DB_PGINFO *pginfo; + PAGE *h; + + pginfo = (DB_PGINFO *)cookie->data; + if (!F_ISSET(pginfo, DB_AM_SWAP)) + return (0); + + h = pp; + return (h->type == P_HASHMETA ? __ham_mswap(dbp->env, pp) : + __db_byteswap(dbp, pg, pp, pginfo->db_pagesize, 0)); +} + +/* + * __ham_mswap -- + * Swap the bytes on the hash metadata page. + * + * PUBLIC: int __ham_mswap __P((ENV *, void *)); + */ +int +__ham_mswap(env, pg) + ENV *env; + void *pg; +{ + u_int8_t *p; + int i; + + COMPQUIET(env, NULL); + + __db_metaswap(pg); + p = (u_int8_t *)pg + sizeof(DBMETA); + + SWAP32(p); /* max_bucket */ + SWAP32(p); /* high_mask */ + SWAP32(p); /* low_mask */ + SWAP32(p); /* ffactor */ + SWAP32(p); /* nelem */ + SWAP32(p); /* h_charkey */ + for (i = 0; i < NCACHED; ++i) + SWAP32(p); /* spares */ + p += 59 * sizeof(u_int32_t); /* unused */ + SWAP32(p); /* crypto_magic */ + return (0); +} diff --git a/src/hash/hash_dup.c b/src/hash/hash_dup.c new file mode 100644 index 00000000..bdf3fd0b --- /dev/null +++ b/src/hash/hash_dup.c @@ -0,0 +1,943 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +/* + * PACKAGE: hashing + * + * DESCRIPTION: + * Manipulation of duplicates for the hash package. + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" +#include "dbinc/btree.h" +#include "dbinc/mp.h" + +static int __hamc_chgpg __P((DBC *, + db_pgno_t, u_int32_t, db_pgno_t, u_int32_t)); +static int __ham_check_move __P((DBC *, u_int32_t)); +static int __ham_dcursor __P((DBC *, db_pgno_t, u_int32_t)); +static int __ham_move_offpage __P((DBC *, PAGE *, u_int32_t, db_pgno_t)); +static int __hamc_chgpg_func + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); + +/* + * Called from hash_access to add a duplicate key. nval is the new + * value that we want to add. The flags correspond to the flag values + * to cursor_put indicating where to add the new element. + * There are 4 cases. + * Case 1: The existing duplicate set already resides on a separate page. + * We return and let the common code handle this. + * Case 2: The element is small enough to just be added to the existing set. + * Case 3: The element is large enough to be a big item, so we're going to + * have to push the set onto a new page. + * Case 4: The element is large enough to push the duplicate set onto a + * separate page. + * + * PUBLIC: int __ham_add_dup __P((DBC *, DBT *, u_int32_t, db_pgno_t *)); + */ +int +__ham_add_dup(dbc, nval, flags, pgnop) + DBC *dbc; + DBT *nval; + u_int32_t flags; + db_pgno_t *pgnop; +{ + DB *dbp; + DBT pval, tmp_val; + DB_MPOOLFILE *mpf; + ENV *env; + HASH_CURSOR *hcp; + u_int32_t add_bytes, new_size; + int cmp, ret; + u_int8_t *hk; + + dbp = dbc->dbp; + env = dbp->env; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + + DB_ASSERT(env, flags != DB_CURRENT); + + add_bytes = nval->size + + (F_ISSET(nval, DB_DBT_PARTIAL) ? nval->doff : 0); + add_bytes = DUP_SIZE(add_bytes); + + if ((ret = __ham_check_move(dbc, add_bytes)) != 0) + return (ret); + + /* + * Check if resulting duplicate set is going to need to go + * onto a separate duplicate page. If so, convert the + * duplicate set and add the new one. After conversion, + * hcp->dndx is the first free ndx or the index of the + * current pointer into the duplicate set. + */ + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); + /* Add the len bytes to the current singleton. */ + if (HPAGE_PTYPE(hk) != H_DUPLICATE) + add_bytes += DUP_SIZE(0); + new_size = + LEN_HKEYDATA(dbp, hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)) + + add_bytes; + + /* + * We convert to off-page duplicates if the item is a big item, + * the addition of the new item will make the set large, or + * if there isn't enough room on this page to add the next item. + */ + if (HPAGE_PTYPE(hk) != H_OFFDUP && + (HPAGE_PTYPE(hk) == H_OFFPAGE || ISBIG(hcp, new_size) || + add_bytes > P_FREESPACE(dbp, hcp->page))) { + + if ((ret = __ham_dup_convert(dbc)) != 0) + return (ret); + return (hcp->opd->am_put(hcp->opd, + NULL, nval, flags, NULL)); + } + + /* There are two separate cases here: on page and off page. */ + if (HPAGE_PTYPE(hk) != H_OFFDUP) { + if (HPAGE_PTYPE(hk) != H_DUPLICATE) { + pval.flags = 0; + pval.data = HKEYDATA_DATA(hk); + pval.size = LEN_HDATA(dbp, hcp->page, dbp->pgsize, + hcp->indx); + if ((ret = __ham_make_dup(env, + &pval, &tmp_val, &dbc->my_rdata.data, + &dbc->my_rdata.ulen)) != 0 || (ret = + __ham_replpair(dbc, &tmp_val, H_DUPLICATE)) != 0) + return (ret); + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); + HPAGE_PTYPE(hk) = H_DUPLICATE; + + /* + * Update the cursor position since we now are in + * duplicates. + */ + F_SET(hcp, H_ISDUP); + hcp->dup_off = 0; + hcp->dup_len = pval.size; + hcp->dup_tlen = DUP_SIZE(hcp->dup_len); + } + + /* Now make the new entry a duplicate. */ + if ((ret = __ham_make_dup(env, nval, + &tmp_val, &dbc->my_rdata.data, &dbc->my_rdata.ulen)) != 0) + return (ret); + + tmp_val.dlen = 0; + switch (flags) { /* On page. */ + case DB_KEYFIRST: + case DB_KEYLAST: + case DB_NODUPDATA: + case DB_OVERWRITE_DUP: + if (dbp->dup_compare != NULL) { + __ham_dsearch(dbc, + nval, &tmp_val.doff, &cmp, flags); + + /* + * Duplicate duplicates are not supported w/ + * sorted dups. We can either overwrite or + * return DB_KEYEXIST. + */ + if (cmp == 0) { + if (flags == DB_OVERWRITE_DUP) + return (__ham_overwrite(dbc, + nval, flags)); + return (__db_duperr(dbp, flags)); + } + } else { + hcp->dup_tlen = LEN_HDATA(dbp, hcp->page, + dbp->pgsize, hcp->indx); + hcp->dup_len = nval->size; + F_SET(hcp, H_ISDUP); + if (flags == DB_KEYFIRST) + hcp->dup_off = tmp_val.doff = 0; + else + hcp->dup_off = + tmp_val.doff = hcp->dup_tlen; + } + break; + case DB_BEFORE: + tmp_val.doff = hcp->dup_off; + break; + case DB_AFTER: + tmp_val.doff = hcp->dup_off + DUP_SIZE(hcp->dup_len); + break; + default: + return (__db_unknown_path(env, "__ham_add_dup")); + } + + /* Add the duplicate. */ + if ((ret = __memp_dirty(mpf, &hcp->page, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0 || + (ret = __ham_replpair(dbc, &tmp_val, H_DUPLICATE)) != 0) + return (ret); + + /* Now, update the cursor if necessary. */ + switch (flags) { + case DB_AFTER: + hcp->dup_off += DUP_SIZE(hcp->dup_len); + hcp->dup_len = nval->size; + hcp->dup_tlen += (db_indx_t)DUP_SIZE(nval->size); + break; + case DB_BEFORE: + case DB_KEYFIRST: + case DB_KEYLAST: + case DB_NODUPDATA: + case DB_OVERWRITE_DUP: + hcp->dup_tlen += (db_indx_t)DUP_SIZE(nval->size); + hcp->dup_len = nval->size; + break; + default: + return (__db_unknown_path(env, "__ham_add_dup")); + } + ret = __hamc_update(dbc, tmp_val.size, DB_HAM_CURADJ_ADD, 1); + return (ret); + } + + /* + * If we get here, then we're on duplicate pages; set pgnop and + * return so the common code can handle it. + */ + memcpy(pgnop, HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)), + sizeof(db_pgno_t)); + + return (ret); +} + +/* + * Convert an on-page set of duplicates to an offpage set of duplicates. + * + * PUBLIC: int __ham_dup_convert __P((DBC *)); + */ +int +__ham_dup_convert(dbc) + DBC *dbc; +{ + BOVERFLOW bo; + DB *dbp; + DBC **hcs; + DBT dbt; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + ENV *env; + HASH_CURSOR *hcp; + HOFFPAGE ho; + PAGE *dp; + db_indx_t i, len, off; + int c, ret, t_ret; + u_int8_t *p, *pend; + + dbp = dbc->dbp; + env = dbp->env; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + + /* + * Create a new page for the duplicates. + */ + if ((ret = __db_new(dbc, + dbp->dup_compare == NULL ? P_LRECNO : P_LDUP, NULL, &dp)) != 0) + return (ret); + P_INIT(dp, dbp->pgsize, + dp->pgno, PGNO_INVALID, PGNO_INVALID, LEAFLEVEL, TYPE(dp)); + + /* + * Get the list of cursors that may need to be updated. + */ + if ((ret = __ham_get_clist(dbp, + PGNO(hcp->page), (u_int32_t)hcp->indx, &hcs)) != 0) + goto err; + + /* + * Now put the duplicates onto the new page. + */ + dbt.flags = 0; + switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) { + case H_KEYDATA: + /* Simple case, one key on page; move it to dup page. */ + dbt.size = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); + dbt.data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); + ret = __db_pitem(dbc, + dp, 0, BKEYDATA_SIZE(dbt.size), NULL, &dbt); + goto finish; + case H_OFFPAGE: + /* Simple case, one key on page; move it to dup page. */ + memcpy(&ho, P_ENTRY(dbp, hcp->page, H_DATAINDEX(hcp->indx)), + HOFFPAGE_SIZE); + UMRW_SET(bo.unused1); + B_TSET(bo.type, ho.type); + UMRW_SET(bo.unused2); + bo.pgno = ho.pgno; + bo.tlen = ho.tlen; + dbt.size = BOVERFLOW_SIZE; + dbt.data = &bo; + + ret = __db_pitem(dbc, dp, 0, dbt.size, &dbt, NULL); +finish: if (ret == 0) { + /* Update any other cursors. */ + if (hcs != NULL && DBC_LOGGING(dbc) && + IS_SUBTRANSACTION(dbc->txn)) { + if ((ret = __ham_chgpg_log(dbp, dbc->txn, + &lsn, 0, DB_HAM_DUP, PGNO(hcp->page), + PGNO(dp), hcp->indx, 0)) != 0) + break; + } + for (c = 0; hcs != NULL && hcs[c] != NULL; c++) + if ((ret = __ham_dcursor(hcs[c], + PGNO(dp), 0)) != 0) + break; + } + break; + case H_DUPLICATE: + p = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); + pend = p + + LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); + + /* + * We need to maintain the duplicate cursor position. + * Keep track of where we are in the duplicate set via + * the offset, and when it matches the one in the cursor, + * set the off-page duplicate cursor index to the current + * index. + */ + for (off = 0, i = 0; p < pend; i++) { + memcpy(&len, p, sizeof(db_indx_t)); + dbt.size = len; + p += sizeof(db_indx_t); + dbt.data = p; + p += len + sizeof(db_indx_t); + if ((ret = __db_pitem(dbc, dp, + i, BKEYDATA_SIZE(dbt.size), NULL, &dbt)) != 0) + break; + + /* Update any other cursors */ + if (hcs != NULL && DBC_LOGGING(dbc) && + IS_SUBTRANSACTION(dbc->txn)) { + if ((ret = __ham_chgpg_log(dbp, dbc->txn, + &lsn, 0, DB_HAM_DUP, PGNO(hcp->page), + PGNO(dp), hcp->indx, i)) != 0) + break; + } + for (c = 0; hcs != NULL && hcs[c] != NULL; c++) + if (((HASH_CURSOR *)(hcs[c]->internal))->dup_off + == off && (ret = __ham_dcursor(hcs[c], + PGNO(dp), i)) != 0) + goto err; + off += len + 2 * sizeof(db_indx_t); + } + break; + default: + ret = __db_pgfmt(env, hcp->pgno); + break; + } + + /* + * Now attach this to the source page in place of the old duplicate + * item. + */ + if (ret == 0) + ret = __memp_dirty(mpf, + &hcp->page, dbc->thread_info, dbc->txn, dbc->priority, 0); + + if (ret == 0) + ret = __ham_move_offpage(dbc, hcp->page, + (u_int32_t)H_DATAINDEX(hcp->indx), PGNO(dp)); + +err: if ((t_ret = __memp_fput(mpf, + dbc->thread_info, dp, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + if (ret == 0) + hcp->dup_tlen = hcp->dup_off = hcp->dup_len = 0; + + if (hcs != NULL) + __os_free(env, hcs); + + return (ret); +} + +/* + * __ham_make_dup + * + * Take a regular dbt and make it into a duplicate item with all the partial + * information set appropriately. If the incoming dbt is a partial, assume + * we are creating a new entry and make sure that we do any initial padding. + * + * PUBLIC: int __ham_make_dup __P((ENV *, + * PUBLIC: const DBT *, DBT *d, void **, u_int32_t *)); + */ +int +__ham_make_dup(env, notdup, duplicate, bufp, sizep) + ENV *env; + const DBT *notdup; + DBT *duplicate; + void **bufp; + u_int32_t *sizep; +{ + db_indx_t tsize, item_size; + int ret; + u_int8_t *p; + + item_size = (db_indx_t)notdup->size; + if (F_ISSET(notdup, DB_DBT_PARTIAL)) + item_size += notdup->doff; + + tsize = DUP_SIZE(item_size); + if ((ret = __ham_init_dbt(env, duplicate, tsize, bufp, sizep)) != 0) + return (ret); + + duplicate->dlen = 0; + duplicate->flags = notdup->flags; + F_SET(duplicate, DB_DBT_PARTIAL); + + p = duplicate->data; + memcpy(p, &item_size, sizeof(db_indx_t)); + p += sizeof(db_indx_t); + if (F_ISSET(notdup, DB_DBT_PARTIAL)) { + memset(p, 0, notdup->doff); + p += notdup->doff; + } + memcpy(p, notdup->data, notdup->size); + p += notdup->size; + memcpy(p, &item_size, sizeof(db_indx_t)); + + duplicate->doff = 0; + duplicate->dlen = notdup->size; + + return (0); +} + +/* + * __ham_check_move -- + * + * Check if we can do whatever we need to on this page. If not, + * then we'll have to move the current element to a new page. + */ +static int +__ham_check_move(dbc, add_len) + DBC *dbc; + u_int32_t add_len; +{ + DB *dbp; + DBT k, d; + DB_LSN new_lsn; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + PAGE *new_pagep, *next_pagep; + db_pgno_t next_pgno; + u_int32_t data_type, key_type, new_datalen, old_len; + db_indx_t new_indx; + u_int8_t *hk; + int found, match, ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); + found = 0; + + /* + * If the item is already off page duplicates or an offpage item, + * then we know we can do whatever we need to do in-place + */ + if (HPAGE_PTYPE(hk) == H_OFFDUP || HPAGE_PTYPE(hk) == H_OFFPAGE) + return (0); + + old_len = + LEN_HITEM(dbp, hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)); + new_datalen = (old_len - HKEYDATA_SIZE(0)) + add_len; + if (HPAGE_PTYPE(hk) != H_DUPLICATE) + new_datalen += DUP_SIZE(0); + + /* + * We need to add a new page under two conditions: + * 1. The addition makes the total data length cross the BIG + * threshold and the OFFDUP structure won't fit on this page. + * 2. The addition does not make the total data cross the + * threshold, but the new data won't fit on the page. + * If neither of these is true, then we can return. + */ + if (ISBIG(hcp, new_datalen) && (old_len > HOFFDUP_SIZE || + HOFFDUP_SIZE - old_len <= P_FREESPACE(dbp, hcp->page))) + return (0); + + if (!ISBIG(hcp, new_datalen) && + (new_datalen - old_len) <= P_FREESPACE(dbp, hcp->page)) + return (0); + + /* + * If we get here, then we need to move the item to a new page. + * Check if there are more pages in the chain. We now need to + * update new_datalen to include the size of both the key and + * the data that we need to move. + */ + + new_datalen = ISBIG(hcp, new_datalen) ? + HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen); + new_datalen += + LEN_HITEM(dbp, hcp->page, dbp->pgsize, H_KEYINDEX(hcp->indx)); + + new_pagep = NULL; + next_pagep = hcp->page; + for (next_pgno = NEXT_PGNO(hcp->page); next_pgno != PGNO_INVALID; + next_pgno = NEXT_PGNO(next_pagep)) { + if (next_pagep != hcp->page && (ret = __memp_fput(mpf, + dbc->thread_info, next_pagep, dbc->priority)) != 0) + return (ret); + + if ((ret = __memp_fget(mpf, + &next_pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE, &next_pagep)) != 0) + return (ret); + + if (P_FREESPACE(dbp, next_pagep) >= new_datalen) { + found = 1; + break; + } + } + + if (found != 0) { + /* Found a page with space, dirty it and the original. */ + new_pagep = next_pagep; + if ((ret = __memp_dirty(mpf, &hcp->page, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err; + if ((ret = __memp_dirty(mpf, &new_pagep, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err; + } else { + if ((ret = __memp_dirty(mpf, &next_pagep, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err; + + /* Add new page at the end of the chain. */ + new_pagep = next_pagep; + if ((ret = __ham_add_ovflpage(dbc, &new_pagep)) != 0) + goto err; + + if (next_pagep != hcp->page) { + if ((ret = __memp_fput(mpf, + dbc->thread_info, next_pagep, dbc->priority)) != 0) + goto err; + next_pagep = NULL; + /* Dirty the original page to update it. */ + if ((ret = __memp_dirty(mpf, &hcp->page, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err; + } + } + + /* Copy the item to the new page. */ + if (DBC_LOGGING(dbc)) { + memset(&k, 0, sizeof(DBT)); + d.flags = 0; + if (HPAGE_PTYPE( + H_PAIRKEY(dbp, hcp->page, hcp->indx)) == H_OFFPAGE) { + k.data = H_PAIRKEY(dbp, hcp->page, hcp->indx); + k.size = HOFFPAGE_SIZE; + key_type = H_OFFPAGE; + } else { + k.data = + HKEYDATA_DATA(H_PAIRKEY(dbp, hcp->page, hcp->indx)); + k.size = + LEN_HKEY(dbp, hcp->page, dbp->pgsize, hcp->indx); + key_type = H_KEYDATA; + } + + /* Resolve the insert index so it can be written to the log. */ + if ((ret = __ham_getindex(dbc, new_pagep, &k, + key_type, &match, &new_indx)) != 0) + return (ret); + + if ((data_type = HPAGE_PTYPE(hk)) == H_OFFPAGE) { + d.data = hk; + d.size = HOFFPAGE_SIZE; + } else if (data_type == H_OFFDUP) { + d.data = hk; + d.size = HOFFDUP_SIZE; + } else { + d.data = HKEYDATA_DATA(hk); + d.size = LEN_HDATA(dbp, + hcp->page, dbp->pgsize, hcp->indx); + } + + if ((ret = __ham_insdel_log(dbp, dbc->txn, &new_lsn, + 0, PUTPAIR, PGNO(new_pagep), (u_int32_t)new_indx, + &LSN(new_pagep), OP_SET(key_type, new_pagep), &k, + OP_SET(data_type, new_pagep), &d)) != 0) { + (void)__memp_fput(mpf, + dbc->thread_info, new_pagep, dbc->priority); + return (ret); + } + } else { + LSN_NOT_LOGGED(new_lsn); + /* + * Ensure that an invalid index is passed to __ham_copypair, so + * it knows to resolve the index. Resolving the insert index + * here would require creating a temporary DBT with the key, + * and calling __ham_getindex. Let __ham_copypair do the + * resolution using the final key DBT. + */ + new_indx = NDX_INVALID; + } + + /* Move lsn onto page. */ + LSN(new_pagep) = new_lsn; /* Structure assignment. */ + + if ((ret = __ham_copypair(dbc, hcp->page, + H_KEYINDEX(hcp->indx), new_pagep, &new_indx, 0)) != 0) + goto err; + + /* Update all cursors that used to point to this item. */ + if ((ret = __hamc_chgpg(dbc, PGNO(hcp->page), H_KEYINDEX(hcp->indx), + PGNO(new_pagep), new_indx)) != 0) + goto err; + + /* Now delete the pair from the current page. */ + if ((ret = __ham_del_pair(dbc, HAM_DEL_NO_RECLAIM, NULL)) != 0) + goto err; + + /* + * __ham_del_pair decremented nelem. This is incorrect; we + * manually copied the element elsewhere, so the total number + * of elements hasn't changed. Increment it again. + * + * !!! + * Note that we still have the metadata page pinned, and + * __ham_del_pair dirtied it, so we don't need to set the dirty + * flag again. + */ + if (!STD_LOCKING(dbc)) + hcp->hdr->nelem++; + + ret = __memp_fput(mpf, dbc->thread_info, hcp->page, dbc->priority); + hcp->page = new_pagep; + hcp->pgno = PGNO(hcp->page); + hcp->indx = new_indx; + F_SET(hcp, H_EXPAND); + F_CLR(hcp, H_DELETED); + + return (ret); + +err: if (new_pagep != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, new_pagep, dbc->priority); + if (next_pagep != NULL && + next_pagep != hcp->page && next_pagep != new_pagep) + (void)__memp_fput(mpf, + dbc->thread_info, next_pagep, dbc->priority); + return (ret); + +} + +/* + * __ham_move_offpage -- + * Replace an onpage set of duplicates with the OFFDUP structure + * that references the duplicate page. + * + * XXX + * This is really just a special case of __onpage_replace; we should + * probably combine them. + * + */ +static int +__ham_move_offpage(dbc, pagep, ndx, pgno) + DBC *dbc; + PAGE *pagep; + u_int32_t ndx; + db_pgno_t pgno; +{ + DB *dbp; + DBT new_dbt; + DBT old_dbt; + HOFFDUP od; + db_indx_t i, *inp; + int32_t difflen; + u_int8_t *src; + int ret; + + dbp = dbc->dbp; + od.type = H_OFFDUP; + UMRW_SET(od.unused[0]); + UMRW_SET(od.unused[1]); + UMRW_SET(od.unused[2]); + od.pgno = pgno; + ret = 0; + + if (DBC_LOGGING(dbc)) { + HKEYDATA *hk; + new_dbt.data = &od; + new_dbt.size = HOFFDUP_SIZE; + hk = (HKEYDATA *)P_ENTRY(dbp, pagep, ndx); + if (hk->type == H_KEYDATA || hk->type == H_DUPLICATE) { + old_dbt.data = hk->data; + old_dbt.size = LEN_HITEM(dbp, pagep, dbp->pgsize, ndx) - + SSZA(HKEYDATA, data); + } else { + old_dbt.data = hk; + old_dbt.size = LEN_HITEM(dbp, pagep, dbp->pgsize, ndx); + } + if ((ret = __ham_replace_log(dbp, dbc->txn, &LSN(pagep), 0, + PGNO(pagep), (u_int32_t)ndx, &LSN(pagep), -1, + OP_SET(hk->type, pagep), &old_dbt, + OP_SET(H_OFFDUP, pagep), &new_dbt)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(pagep)); + + /* + * difflen is the difference in the lengths, and so may be negative. + * We know that the difference between two unsigned lengths from a + * database page will fit into an int32_t. + */ + difflen = + (int32_t)LEN_HITEM(dbp, pagep, dbp->pgsize, ndx) - + (int32_t)HOFFDUP_SIZE; + if (difflen != 0) { + /* Copy data. */ + inp = P_INP(dbp, pagep); + src = (u_int8_t *)(pagep) + HOFFSET(pagep); + memmove(src + difflen, src, inp[ndx] - HOFFSET(pagep)); + HOFFSET(pagep) += difflen; + + /* Update index table. */ + for (i = ndx; i < NUM_ENT(pagep); i++) + inp[i] += difflen; + } + + /* Now copy the offdup entry onto the page. */ + memcpy(P_ENTRY(dbp, pagep, ndx), &od, HOFFDUP_SIZE); + return (ret); +} + +/* + * __ham_dsearch: + * Locate a particular duplicate in a duplicate set. Make sure that + * we exit with the cursor set appropriately. + * + * PUBLIC: void __ham_dsearch + * PUBLIC: __P((DBC *, DBT *, u_int32_t *, int *, u_int32_t)); + */ +void +__ham_dsearch(dbc, dbt, offp, cmpp, flags) + DBC *dbc; + DBT *dbt; + u_int32_t *offp, flags; + int *cmpp; +{ + DB *dbp; + DBT cur; + HASH_CURSOR *hcp; + db_indx_t i, len; + int (*func) __P((DB *, const DBT *, const DBT *)); + u_int8_t *data; + + dbp = dbc->dbp; + hcp = (HASH_CURSOR *)dbc->internal; + func = dbp->dup_compare == NULL ? __bam_defcmp : dbp->dup_compare; + + i = F_ISSET(hcp, H_CONTINUE) ? hcp->dup_off: 0; + data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)) + i; + hcp->dup_tlen = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); + len = hcp->dup_len; + while (i < hcp->dup_tlen) { + memcpy(&len, data, sizeof(db_indx_t)); + data += sizeof(db_indx_t); + DB_SET_DBT(cur, data, len); + + /* + * If we find an exact match, we're done. If in a sorted + * duplicate set and the item is larger than our test item, + * we're done. In the latter case, if permitting partial + * matches, it's not a failure. + */ + *cmpp = func(dbp, dbt, &cur); + if (*cmpp == 0) + break; + if (*cmpp < 0 && dbp->dup_compare != NULL) { + if (flags == DB_GET_BOTH_RANGE) + *cmpp = 0; + break; + } + + i += len + 2 * sizeof(db_indx_t); + data += len + sizeof(db_indx_t); + } + + *offp = i; + hcp->dup_off = i; + hcp->dup_len = len; + F_SET(hcp, H_ISDUP); +} + +/* + * __ham_dcursor -- + * + * Create an off page duplicate cursor for this cursor. + */ +static int +__ham_dcursor(dbc, pgno, indx) + DBC *dbc; + db_pgno_t pgno; + u_int32_t indx; +{ + BTREE_CURSOR *dcp; + DB *dbp; + HASH_CURSOR *hcp; + int ret; + + dbp = dbc->dbp; + hcp = (HASH_CURSOR *)dbc->internal; + + if ((ret = __dbc_newopd(dbc, pgno, hcp->opd, &hcp->opd)) != 0) + return (ret); + + dcp = (BTREE_CURSOR *)hcp->opd->internal; + dcp->pgno = pgno; + dcp->indx = indx; + + if (dbp->dup_compare == NULL) { + /* + * Converting to off-page Recno trees is tricky. The + * record number for the cursor is the index + 1 (to + * convert to 1-based record numbers). + */ + dcp->recno = indx + 1; + } + + /* + * Transfer the deleted flag from the top-level cursor to the + * created one. + */ + if (F_ISSET(hcp, H_DELETED)) { + F_SET(dcp, C_DELETED); + F_CLR(hcp, H_DELETED); + } + + return (0); +} + +struct __hamc_chgpg_args { + db_pgno_t new_pgno; + db_indx_t new_index; + DB_TXN *my_txn; +}; + +static int +__hamc_chgpg_func(cp, my_dbc, foundp, old_pgno, old_index, vargs) + DBC *cp, *my_dbc; + u_int32_t *foundp; + db_pgno_t old_pgno; + u_int32_t old_index; + void *vargs; +{ + HASH_CURSOR *hcp; + struct __hamc_chgpg_args *args; + + if (cp == my_dbc || cp->dbtype != DB_HASH) + return (0); + + hcp = (HASH_CURSOR *)cp->internal; + + /* + * If a cursor is deleted, it doesn't refer to this + * item--it just happens to have the same indx, but + * it points to a former neighbor. Don't move it. + */ + if (F_ISSET(hcp, H_DELETED)) + return (0); + + args = vargs; + + if (hcp->pgno == old_pgno && + hcp->indx == old_index && + !MVCC_SKIP_CURADJ(cp, old_pgno)) { + hcp->pgno = args->new_pgno; + hcp->indx = args->new_index; + if (args->my_txn != NULL && cp->txn != args->my_txn) + *foundp = 1; + } + return (0); +} + +/* + * __hamc_chgpg -- + * Adjust the cursors after moving an item to a new page. We only + * move cursors that are pointing at this one item and are not + * deleted; since we only touch non-deleted cursors, and since + * (by definition) no item existed at the pgno/indx we're moving the + * item to, we're guaranteed that all the cursors we affect here or + * on abort really do refer to this one item. + */ +static int +__hamc_chgpg(dbc, old_pgno, old_index, new_pgno, new_index) + DBC *dbc; + db_pgno_t old_pgno, new_pgno; + u_int32_t old_index, new_index; +{ + DB *dbp; + DB_LSN lsn; + int ret; + u_int32_t found; + struct __hamc_chgpg_args args; + + dbp = dbc->dbp; + + args.my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL; + args.new_pgno = new_pgno; + args.new_index = new_index; + + if ((ret = __db_walk_cursors(dbp, dbc, + __hamc_chgpg_func, &found, old_pgno, old_index, &args)) != 0) + return (ret); + if (found != 0 && DBC_LOGGING(dbc)) { + if ((ret = __ham_chgpg_log(dbp, + args.my_txn, &lsn, 0, DB_HAM_CHGPG, + old_pgno, new_pgno, old_index, new_index)) != 0) + return (ret); + } + return (0); +} diff --git a/src/hash/hash_func.c b/src/hash/hash_func.c new file mode 100644 index 00000000..3988e129 --- /dev/null +++ b/src/hash/hash_func.c @@ -0,0 +1,240 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" + +/* + * __ham_func2 -- + * Phong Vo's linear congruential hash. + * + * PUBLIC: u_int32_t __ham_func2 __P((DB *, const void *, u_int32_t)); + */ +#define DCHARHASH(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c)) + +u_int32_t +__ham_func2(dbp, key, len) + DB *dbp; + const void *key; + u_int32_t len; +{ + const u_int8_t *e, *k; + u_int32_t h; + u_int8_t c; + + if (dbp != NULL) + COMPQUIET(dbp, NULL); + + k = key; + e = k + len; + for (h = 0; k != e;) { + c = *k++; + if (!c && k > e) + break; + DCHARHASH(h, c); + } + return (h); +} + +/* + * __ham_func3 -- + * Ozan Yigit's original sdbm hash. + * + * Ugly, but fast. Break the string up into 8 byte units. On the first time + * through the loop get the "leftover bytes" (strlen % 8). On every other + * iteration, perform 8 HASHC's so we handle all 8 bytes. Essentially, this + * saves us 7 cmp & branch instructions. + * + * PUBLIC: u_int32_t __ham_func3 __P((DB *, const void *, u_int32_t)); + */ +u_int32_t +__ham_func3(dbp, key, len) + DB *dbp; + const void *key; + u_int32_t len; +{ + const u_int8_t *k; + u_int32_t n, loop; + + if (dbp != NULL) + COMPQUIET(dbp, NULL); + + if (len == 0) + return (0); + +#define HASHC n = *k++ + 65599 * n + n = 0; + k = key; + + loop = (len + 8 - 1) >> 3; + switch (len & (8 - 1)) { + case 0: + do { + HASHC; + case 7: + HASHC; + case 6: + HASHC; + case 5: + HASHC; + case 4: + HASHC; + case 3: + HASHC; + case 2: + HASHC; + case 1: + HASHC; + } while (--loop); + } + return (n); +} + +/* + * __ham_func4 -- + * Chris Torek's hash function. Although this function performs only + * slightly worse than __ham_func5 on strings, it performs horribly on + * numbers. + * + * PUBLIC: u_int32_t __ham_func4 __P((DB *, const void *, u_int32_t)); + */ +u_int32_t +__ham_func4(dbp, key, len) + DB *dbp; + const void *key; + u_int32_t len; +{ + const u_int8_t *k; + u_int32_t h, loop; + + if (dbp != NULL) + COMPQUIET(dbp, NULL); + + if (len == 0) + return (0); + +#define HASH4a h = (h << 5) - h + *k++; +#define HASH4b h = (h << 5) + h + *k++; +#define HASH4 HASH4b + h = 0; + k = key; + + loop = (len + 8 - 1) >> 3; + switch (len & (8 - 1)) { + case 0: + do { + HASH4; + case 7: + HASH4; + case 6: + HASH4; + case 5: + HASH4; + case 4: + HASH4; + case 3: + HASH4; + case 2: + HASH4; + case 1: + HASH4; + } while (--loop); + } + return (h); +} + +/* + * Fowler/Noll/Vo hash + * + * The basis of the hash algorithm was taken from an idea sent by email to the + * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and + * Glenn Fowler (gsf@research.att.com). Landon Curt Noll (chongo@toad.com) + * later improved on their algorithm. + * + * The magic is in the interesting relationship between the special prime + * 16777619 (2^24 + 403) and 2^32 and 2^8. + * + * This hash produces the fewest collisions of any function that we've seen so + * far, and works well on both numbers and strings. + * + * PUBLIC: u_int32_t __ham_func5 __P((DB *, const void *, u_int32_t)); + */ +u_int32_t +__ham_func5(dbp, key, len) + DB *dbp; + const void *key; + u_int32_t len; +{ + const u_int8_t *k, *e; + u_int32_t h; + + if (dbp != NULL) + COMPQUIET(dbp, NULL); + + k = key; + e = k + len; + for (h = 0; k < e; ++k) { + h *= 16777619; + h ^= *k; + } + return (h); +} + +/* + * __ham_test -- + * + * PUBLIC: u_int32_t __ham_test __P((DB *, const void *, u_int32_t)); + */ +u_int32_t +__ham_test(dbp, key, len) + DB *dbp; + const void *key; + u_int32_t len; +{ + COMPQUIET(dbp, NULL); + COMPQUIET(len, 0); + return ((u_int32_t)*(char *)key); +} diff --git a/src/hash/hash_meta.c b/src/hash/hash_meta.c new file mode 100644 index 00000000..0996dddf --- /dev/null +++ b/src/hash/hash_meta.c @@ -0,0 +1,168 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +/* + * Acquire the meta-data page. + * + * PUBLIC: int __ham_get_meta __P((DBC *)); + */ +int +__ham_get_meta(dbc) + DBC *dbc; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + HASH *hashp; + HASH_CURSOR *hcp; + u_int32_t revision; + int ret, t_ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + hashp = dbp->h_internal; + hcp = (HASH_CURSOR *)dbc->internal; + +again: + revision = hashp->revision; + if ((ret = __db_lget(dbc, 0, + hashp->meta_pgno, DB_LOCK_READ, 0, &hcp->hlock)) != 0) + return (ret); + + if ((ret = __memp_fget(mpf, &hashp->meta_pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_CREATE, &hcp->hdr)) != 0) { + (void)__LPUT(dbc, hcp->hlock); + return (ret); + } + + if (F_ISSET(dbp, DB_AM_SUBDB) && + (revision != dbp->mpf->mfp->revision || + (TYPE(hcp->hdr) != P_HASHMETA && + !IS_RECOVERING(dbp->env) && !F_ISSET(dbp, DB_AM_RECOVER)))) { + ret = __LPUT(dbc, hcp->hlock); + t_ret = + __memp_fput(mpf, dbc->thread_info, hcp->hdr, dbc->priority); + hcp->hdr = NULL; + if (ret != 0) + return (ret); + if (t_ret != 0) + return (t_ret); + if ((ret = __db_reopen(dbc)) != 0) + return (ret); + goto again; + } + + return (ret); +} + +/* + * Release the meta-data page. + * + * PUBLIC: int __ham_release_meta __P((DBC *)); + */ +int +__ham_release_meta(dbc) + DBC *dbc; +{ + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + int ret; + + mpf = dbc->dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + + if (hcp->hdr != NULL) { + if ((ret = __memp_fput(mpf, + dbc->thread_info, hcp->hdr, dbc->priority)) != 0) + return (ret); + hcp->hdr = NULL; + } + + return (__TLPUT(dbc, hcp->hlock)); +} + +/* + * Mark the meta-data page dirty. + * + * PUBLIC: int __ham_dirty_meta __P((DBC *, u_int32_t)); + */ +int +__ham_dirty_meta(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + DB_MPOOLFILE *mpf; + HASH *hashp; + HASH_CURSOR *hcp; + int ret; + + if (F_ISSET(dbc, DBC_OPD)) + dbc = dbc->internal->pdbc; + hashp = dbc->dbp->h_internal; + hcp = (HASH_CURSOR *)dbc->internal; + if (hcp->hlock.mode == DB_LOCK_WRITE) + return (0); + + mpf = dbc->dbp->mpf; + + if ((ret = __db_lget(dbc, LCK_COUPLE, hashp->meta_pgno, + DB_LOCK_WRITE, DB_LOCK_NOWAIT, &hcp->hlock)) != 0) { + if (ret != DB_LOCK_NOTGRANTED && ret != DB_LOCK_DEADLOCK) + return (ret); + if ((ret = __memp_fput(mpf, + dbc->thread_info, hcp->hdr, dbc->priority)) != 0) + return (ret); + hcp->hdr = NULL; + if ((ret = __db_lget(dbc, LCK_COUPLE, hashp->meta_pgno, + DB_LOCK_WRITE, 0, &hcp->hlock)) != 0) + return (ret); + ret = __memp_fget(mpf, &hashp->meta_pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &hcp->hdr); + return (ret); + } + + return (__memp_dirty(mpf, + &hcp->hdr, dbc->thread_info, dbc->txn, dbc->priority, flags)); +} + +/* + * Return the meta data page if it is saved in the cursor. + * + * PUBLIC: int __ham_return_meta __P((DBC *, u_int32_t, DBMETA **)); + */ + int + __ham_return_meta(dbc, flags, metap) + DBC *dbc; + u_int32_t flags; + DBMETA **metap; +{ + HASH_CURSOR *hcp; + int ret; + + *metap = NULL; + if (F_ISSET(dbc, DBC_OPD)) + dbc = dbc->internal->pdbc; + + hcp = (HASH_CURSOR *)dbc->internal; + if (hcp->hdr == NULL || PGNO(hcp->hdr) != PGNO_BASE_MD) + return (0); + + if (LF_ISSET(DB_MPOOL_DIRTY) && + (ret = __ham_dirty_meta(dbc, flags)) != 0) + return (ret); + + *metap = (DBMETA *)hcp->hdr; + return (0); +} diff --git a/src/hash/hash_method.c b/src/hash/hash_method.c new file mode 100644 index 00000000..39c25f55 --- /dev/null +++ b/src/hash/hash_method.c @@ -0,0 +1,250 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" + +static int __ham_set_h_ffactor __P((DB *, u_int32_t)); +static int __ham_get_h_hash + __P((DB *, u_int32_t(**)(DB *, const void *, u_int32_t))); +static int __ham_set_h_hash + __P((DB *, u_int32_t(*)(DB *, const void *, u_int32_t))); +static int __ham_set_h_nelem __P((DB *, u_int32_t)); + +static int __ham_get_h_compare + __P((DB *, int (**)(DB *, const DBT *, const DBT *))); + +/* + * __ham_db_create -- + * Hash specific initialization of the DB structure. + * + * PUBLIC: int __ham_db_create __P((DB *)); + */ +int +__ham_db_create(dbp) + DB *dbp; +{ + HASH *hashp; + int ret; + + if ((ret = __os_malloc(dbp->env, + sizeof(HASH), &dbp->h_internal)) != 0) + return (ret); + + hashp = dbp->h_internal; + + hashp->h_nelem = 0; /* Defaults. */ + hashp->h_ffactor = 0; + hashp->h_hash = NULL; + hashp->h_compare = NULL; + + dbp->get_h_ffactor = __ham_get_h_ffactor; + dbp->set_h_ffactor = __ham_set_h_ffactor; + dbp->get_h_hash = __ham_get_h_hash; + dbp->set_h_hash = __ham_set_h_hash; + dbp->get_h_compare = __ham_get_h_compare; + dbp->set_h_compare = __ham_set_h_compare; + dbp->get_h_nelem = __ham_get_h_nelem; + dbp->set_h_nelem = __ham_set_h_nelem; + + return (0); +} + +/* + * PUBLIC: int __ham_db_close __P((DB *)); + */ +int +__ham_db_close(dbp) + DB *dbp; +{ + if (dbp->h_internal == NULL) + return (0); + __os_free(dbp->env, dbp->h_internal); + dbp->h_internal = NULL; + return (0); +} + +/* + * __ham_get_h_ffactor -- + * + * PUBLIC: int __ham_get_h_ffactor __P((DB *, u_int32_t *)); + */ +int +__ham_get_h_ffactor(dbp, h_ffactorp) + DB *dbp; + u_int32_t *h_ffactorp; +{ + HASH *hashp; + + hashp = dbp->h_internal; + *h_ffactorp = hashp->h_ffactor; + return (0); +} + +/* + * __ham_set_h_ffactor -- + * Set the fill factor. + */ +static int +__ham_set_h_ffactor(dbp, h_ffactor) + DB *dbp; + u_int32_t h_ffactor; +{ + HASH *hashp; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_h_ffactor"); + DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); + + hashp = dbp->h_internal; + hashp->h_ffactor = h_ffactor; + return (0); +} + +/* + * __ham_get_h_hash -- + * Get the hash function. + */ +static int +__ham_get_h_hash(dbp, funcp) + DB *dbp; + u_int32_t (**funcp) __P((DB *, const void *, u_int32_t)); +{ + HASH *hashp; + + DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); + + hashp = dbp->h_internal; + if (funcp != NULL) + *funcp = hashp->h_hash; + return (0); +} + +/* + * __ham_set_h_hash -- + * Set the hash function. + */ +static int +__ham_set_h_hash(dbp, func) + DB *dbp; + u_int32_t (*func) __P((DB *, const void *, u_int32_t)); +{ + HASH *hashp; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_h_hash"); + DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); + + hashp = dbp->h_internal; + hashp->h_hash = func; + return (0); +} + +/* + * __ham_get_h_compare -- + * Get the comparison function. + */ +static int +__ham_get_h_compare(dbp, funcp) + DB *dbp; + int (**funcp) __P((DB *, const DBT *, const DBT *)); +{ + HASH *t; + + DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); + + t = dbp->h_internal; + if (funcp != NULL) + *funcp = t->h_compare; + + return (0); +} + +/* + * __ham_set_h_compare -- + * Set the comparison function. + * + * PUBLIC: int __ham_set_h_compare + * PUBLIC: __P((DB *, int (*)(DB *, const DBT *, const DBT *))); + */ +int +__ham_set_h_compare(dbp, func) + DB *dbp; + int (*func) __P((DB *, const DBT *, const DBT *)); +{ + HASH *t; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_h_compare"); + DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); + + t = dbp->h_internal; + + t->h_compare = func; + + return (0); +} + +/* + * __db_get_h_nelem -- + * + * PUBLIC: int __ham_get_h_nelem __P((DB *, u_int32_t *)); + */ +int +__ham_get_h_nelem(dbp, h_nelemp) + DB *dbp; + u_int32_t *h_nelemp; +{ + HASH *hashp; + + DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); + + hashp = dbp->h_internal; + *h_nelemp = hashp->h_nelem; + return (0); +} + +/* + * __ham_set_h_nelem -- + * Set the table size. + */ +static int +__ham_set_h_nelem(dbp, h_nelem) + DB *dbp; + u_int32_t h_nelem; +{ + HASH *hashp; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_h_nelem"); + DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); + + hashp = dbp->h_internal; + hashp->h_nelem = h_nelem; + return (0); +} + +/* + * __ham_copy_config + * Copy the configuration of one DB handle to another. + * PUBLIC: void __ham_copy_config __P((DB *, DB*, u_int32_t)); + */ +void +__ham_copy_config(src, dst, nparts) + DB *src, *dst; + u_int32_t nparts; +{ + HASH *s, *d; + + s = src->h_internal; + d = dst->h_internal; + + d->h_ffactor = s->h_ffactor; + d->h_nelem = s->h_nelem / nparts; + d->h_hash = s->h_hash; + d->h_compare = s->h_compare; +} diff --git a/src/hash/hash_open.c b/src/hash/hash_open.c new file mode 100644 index 00000000..0789b28e --- /dev/null +++ b/src/hash/hash_open.c @@ -0,0 +1,584 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/partition.h" +#include "dbinc/btree.h" +#include "dbinc/fop.h" + +static db_pgno_t __ham_init_meta __P((DB *, HMETA *, db_pgno_t, DB_LSN *)); + +/* + * __ham_open -- + * + * PUBLIC: int __ham_open __P((DB *, DB_THREAD_INFO *, + * PUBLIC: DB_TXN *, const char * name, db_pgno_t, u_int32_t)); + */ +int +__ham_open(dbp, ip, txn, name, base_pgno, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name; + db_pgno_t base_pgno; + u_int32_t flags; +{ + DBC *dbc; + DBMETA *dbmeta; + ENV *env; + HASH *hashp; + HASH_CURSOR *hcp; + int ret, t_ret; + + env = dbp->env; + dbc = NULL; + + /* + * Get a cursor. If DB_CREATE is specified, we may be creating + * pages, and to do that safely in CDB we need a write cursor. + * In STD_LOCKING mode, we'll synchronize using the meta page + * lock instead. + */ + if ((ret = __db_cursor(dbp, ip, txn, &dbc, + (LF_ISSET(DB_CREATE) && CDB_LOCKING(env) ? DB_WRITECURSOR : 0) | + (F_ISSET(dbp, DB_AM_RECOVER) ? DB_RECOVER : 0))) != 0) + return (ret); + + hcp = (HASH_CURSOR *)dbc->internal; + hashp = dbp->h_internal; + hashp->meta_pgno = base_pgno; + hashp->revision = dbp->mpf->mfp->revision; + if ((ret = __ham_get_meta(dbc)) != 0) + goto err; + + /* Initialize the hdr structure. */ + dbmeta = &hcp->hdr->dbmeta; + if (dbmeta->magic == DB_HASHMAGIC) { + /* File exists, verify the data in the header. */ + if (hashp->h_hash == NULL) + hashp->h_hash = dbmeta->version < 5 + ? __ham_func4 : __ham_func5; + hashp->h_nelem = hcp->hdr->nelem; + if (F_ISSET(dbmeta, DB_HASH_DUP)) + F_SET(dbp, DB_AM_DUP); + if (F_ISSET(dbmeta, DB_HASH_DUPSORT)) + F_SET(dbp, DB_AM_DUPSORT); + if (F_ISSET(dbmeta, DB_HASH_SUBDB)) + F_SET(dbp, DB_AM_SUBDB); + if (PGNO(hcp->hdr) == PGNO_BASE_MD && + !F_ISSET(dbp, DB_AM_RECOVER) && + (txn == NULL || !F_ISSET(txn, TXN_SNAPSHOT)) && (ret = + __memp_set_last_pgno(dbp->mpf, dbmeta->last_pgno)) != 0) + goto err; + } else if (!IS_RECOVERING(env) && !F_ISSET(dbp, DB_AM_RECOVER)) { + __db_errx(env, DB_STR_A("1124", + "%s: Invalid hash meta page %lu", "%s %lu"), + name, (u_long)base_pgno); + ret = EINVAL; + } + + /* Release the meta data page */ + if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; +err: if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __ham_metachk -- + * + * PUBLIC: int __ham_metachk __P((DB *, const char *, HMETA *)); + */ +int +__ham_metachk(dbp, name, hashm) + DB *dbp; + const char *name; + HMETA *hashm; +{ + ENV *env; + u_int32_t vers; + int ret; + + env = dbp->env; + + /* + * At this point, all we know is that the magic number is for a Hash. + * Check the version, the database may be out of date. + */ + vers = hashm->dbmeta.version; + if (F_ISSET(dbp, DB_AM_SWAP)) + M_32_SWAP(vers); + switch (vers) { + case 4: + case 5: + case 6: + __db_errx(env, DB_STR_A("1125", + "%s: hash version %lu requires a version upgrade", + "%s %lu"), name, (u_long)vers); + return (DB_OLD_VERSION); + case 7: + case 8: + case 9: + break; + default: + __db_errx(env, DB_STR_A("1126", + "%s: unsupported hash version: %lu", "%s %lu"), + name, (u_long)vers); + return (EINVAL); + } + + /* Swap the page if we need to. */ + if (F_ISSET(dbp, DB_AM_SWAP) && + (ret = __ham_mswap(env, (PAGE *)hashm)) != 0) + return (ret); + + /* Check the type. */ + if (dbp->type != DB_HASH && dbp->type != DB_UNKNOWN) + return (EINVAL); + dbp->type = DB_HASH; + DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); + + /* + * Check application info against metadata info, and set info, flags, + * and type based on metadata info. + */ + if ((ret = __db_fchk(env, + "DB->open", hashm->dbmeta.flags, + DB_HASH_DUP | DB_HASH_SUBDB | DB_HASH_DUPSORT)) != 0) + return (ret); + + if (F_ISSET(&hashm->dbmeta, DB_HASH_DUP)) + F_SET(dbp, DB_AM_DUP); + else + if (F_ISSET(dbp, DB_AM_DUP)) { + __db_errx(env, DB_STR_A("1127", + "%s: DB_DUP specified to open method but not set in database", + "%s"), name); + return (EINVAL); + } + + if (F_ISSET(&hashm->dbmeta, DB_HASH_SUBDB)) + F_SET(dbp, DB_AM_SUBDB); + else + if (F_ISSET(dbp, DB_AM_SUBDB)) { + __db_errx(env, DB_STR_A("1128", + "%s: multiple databases specified but not supported in file", + "%s"), name); + return (EINVAL); + } + + if (F_ISSET(&hashm->dbmeta, DB_HASH_DUPSORT)) { + if (dbp->dup_compare == NULL) + dbp->dup_compare = __bam_defcmp; + } else + if (dbp->dup_compare != NULL) { + __db_errx(env, DB_STR_A("1129", + "%s: duplicate sort function specified but not set in database", + "%s"), name); + return (EINVAL); + } + + /* Set the page size. */ + dbp->pgsize = hashm->dbmeta.pagesize; + + /* Copy the file's ID. */ + memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN); + + return (0); +} + +/* + * __ham_init_meta -- + * + * Initialize a hash meta-data page. We assume that the meta-data page is + * contiguous with the initial buckets that we create. If that turns out + * to be false, we'll fix it up later. Return the initial number of buckets + * allocated. + */ +static db_pgno_t +__ham_init_meta(dbp, meta, pgno, lsnp) + DB *dbp; + HMETA *meta; + db_pgno_t pgno; + DB_LSN *lsnp; +{ +#ifdef HAVE_PARTITION + DB_PARTITION *part; +#endif + ENV *env; + HASH *hashp; + db_pgno_t nbuckets; + u_int i, l2; + + env = dbp->env; + hashp = dbp->h_internal; + + if (hashp->h_hash == NULL) + hashp->h_hash = DB_HASHVERSION < 5 ? __ham_func4 : __ham_func5; + + if (hashp->h_nelem != 0 && hashp->h_ffactor != 0) { + nbuckets = (hashp->h_nelem - 1) / hashp->h_ffactor + 1; + l2 = __db_log2(nbuckets > 2 ? nbuckets : 2); + } else + l2 = 1; + + /* Now make number of buckets a power of two. */ + nbuckets = (db_pgno_t)(1 << l2); + + memset(meta, 0, sizeof(HMETA)); + meta->dbmeta.lsn = *lsnp; + meta->dbmeta.pgno = pgno; + meta->dbmeta.magic = DB_HASHMAGIC; + meta->dbmeta.version = DB_HASHVERSION; + meta->dbmeta.pagesize = dbp->pgsize; + if (F_ISSET(dbp, DB_AM_CHKSUM)) + FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM); + if (F_ISSET(dbp, DB_AM_ENCRYPT)) { + meta->dbmeta.encrypt_alg = env->crypto_handle->alg; + DB_ASSERT(env, meta->dbmeta.encrypt_alg != 0); + meta->crypto_magic = meta->dbmeta.magic; + } + meta->dbmeta.type = P_HASHMETA; + meta->dbmeta.free = PGNO_INVALID; + meta->dbmeta.last_pgno = pgno; + meta->max_bucket = nbuckets - 1; + meta->high_mask = nbuckets - 1; + meta->low_mask = (nbuckets >> 1) - 1; + meta->ffactor = hashp->h_ffactor; + meta->nelem = hashp->h_nelem; + meta->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY)); + memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); + + if (F_ISSET(dbp, DB_AM_DUP)) + F_SET(&meta->dbmeta, DB_HASH_DUP); + if (F_ISSET(dbp, DB_AM_SUBDB)) + F_SET(&meta->dbmeta, DB_HASH_SUBDB); + if (dbp->dup_compare != NULL) + F_SET(&meta->dbmeta, DB_HASH_DUPSORT); + +#ifdef HAVE_PARTITION + if ((part = dbp->p_internal) != NULL) { + meta->dbmeta.nparts = part->nparts; + if (F_ISSET(part, PART_CALLBACK)) + FLD_SET(meta->dbmeta.metaflags, DBMETA_PART_CALLBACK); + if (F_ISSET(part, PART_RANGE)) + FLD_SET(meta->dbmeta.metaflags, DBMETA_PART_RANGE); + } +#endif + + /* + * Create the first and second buckets pages so that we have the + * page numbers for them and we can store that page number in the + * meta-data header (spares[0]). + */ + meta->spares[0] = pgno + 1; + + /* Fill in the last fields of the meta data page. */ + for (i = 1; i <= l2; i++) + meta->spares[i] = meta->spares[0]; + for (; i < NCACHED; i++) + meta->spares[i] = PGNO_INVALID; + + return (nbuckets); +} + +/* + * __ham_new_file -- + * Create the necessary pages to begin a new database file. If name + * is NULL, then this is an unnamed file, the mpf has been set in the dbp + * and we simply create the pages using mpool. In this case, we don't log + * because we never have to redo an unnamed create and the undo simply + * frees resources. + * + * This code appears more complex than it is because of the two cases (named + * and unnamed). The way to read the code is that for each page being created, + * there are three parts: 1) a "get page" chunk (which either uses malloc'd + * memory or calls __memp_fget), 2) the initialization, and 3) the "put page" + * chunk which either does a fop write or an __memp_fput. + * + * PUBLIC: int __ham_new_file __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *)); + */ +int +__ham_new_file(dbp, ip, txn, fhp, name) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DB_FH *fhp; + const char *name; +{ + DBT pdbt; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + DB_PGINFO pginfo; + ENV *env; + HMETA *meta; + PAGE *page; + int ret; + db_pgno_t lpgno; + void *buf; + + env = dbp->env; + mpf = dbp->mpf; + meta = NULL; + page = NULL; + buf = NULL; + + if (F_ISSET(dbp, DB_AM_INMEM)) { + /* Build meta-data page. */ + lpgno = PGNO_BASE_MD; + if ((ret = __memp_fget(mpf, &lpgno, + ip, txn, DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &meta)) != 0) + return (ret); + LSN_NOT_LOGGED(lsn); + lpgno = __ham_init_meta(dbp, meta, PGNO_BASE_MD, &lsn); + meta->dbmeta.last_pgno = lpgno; + if ((ret = __db_log_page(dbp, + txn, &lsn, meta->dbmeta.pgno, (PAGE *)meta)) != 0) + goto err; + ret = __memp_fput(mpf, ip, meta, dbp->priority); + meta = NULL; + if (ret != 0) + goto err; + + /* Allocate the final hash bucket. */ + if ((ret = __memp_fget(mpf, &lpgno, + ip, txn, DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &page)) != 0) + goto err; + P_INIT(page, + dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + LSN_NOT_LOGGED(page->lsn); + if ((ret = + __db_log_page(dbp, txn, &page->lsn, lpgno, page)) != 0) + goto err; + ret = __memp_fput(mpf, ip, page, dbp->priority); + page = NULL; + if (ret != 0) + goto err; + } else { + memset(&pdbt, 0, sizeof(pdbt)); + + /* Build meta-data page. */ + pginfo.db_pagesize = dbp->pgsize; + pginfo.type = dbp->type; + pginfo.flags = + F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); + pdbt.data = &pginfo; + pdbt.size = sizeof(pginfo); + if ((ret = __os_calloc(dbp->env, 1, dbp->pgsize, &buf)) != 0) + return (ret); + meta = (HMETA *)buf; + LSN_NOT_LOGGED(lsn); + lpgno = __ham_init_meta(dbp, meta, PGNO_BASE_MD, &lsn); + meta->dbmeta.last_pgno = lpgno; + if ((ret = + __db_pgout(env->dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0) + goto err; + if ((ret = __fop_write(env, txn, name, dbp->dirname, + DB_APP_DATA, fhp, + dbp->pgsize, 0, 0, buf, dbp->pgsize, 1, F_ISSET( + dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0)) != 0) + goto err; + meta = NULL; + + /* Allocate the final hash bucket. */ +#ifdef DIAGNOSTIC + memset(buf, 0, dbp->pgsize); +#endif + page = (PAGE *)buf; + P_INIT(page, + dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + LSN_NOT_LOGGED(page->lsn); + if ((ret = __db_pgout(env->dbenv, lpgno, buf, &pdbt)) != 0) + goto err; + if ((ret = __fop_write(env, txn, name, dbp->dirname, + DB_APP_DATA, fhp, + dbp->pgsize, lpgno, 0, buf, dbp->pgsize, 1, F_ISSET( + dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0)) != 0) + goto err; + page = NULL; + } + +err: if (buf != NULL) + __os_free(env, buf); + else { + if (meta != NULL) + (void)__memp_fput(mpf, ip, meta, dbp->priority); + if (page != NULL) + (void)__memp_fput(mpf, ip, page, dbp->priority); + } + return (ret); +} + +/* + * __ham_new_subdb -- + * Create the necessary pages to begin a new subdatabase. + * + * PUBLIC: int __ham_new_subdb __P((DB *, DB *, DB_THREAD_INFO *, DB_TXN *)); + */ +int +__ham_new_subdb(mdbp, dbp, ip, txn) + DB *mdbp, *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; +{ + DBC *dbc; + DBMETA *mmeta; + DB_LOCK lock, metalock, mmlock; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + ENV *env; + HMETA *meta; + PAGE *h; + int i, ret, t_ret; + db_pgno_t lpgno, mpgno; + + env = mdbp->env; + mpf = mdbp->mpf; + dbc = NULL; + meta = NULL; + mmeta = NULL; + LOCK_INIT(lock); + LOCK_INIT(metalock); + LOCK_INIT(mmlock); + + if ((ret = __db_cursor(mdbp, ip, txn, + &dbc, CDB_LOCKING(env) ? DB_WRITECURSOR : 0)) != 0) + return (ret); + + /* Get and lock the new meta data page. */ + if ((ret = __db_lget(dbc, + 0, dbp->meta_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &dbp->meta_pgno, ip, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &meta)) != 0) + goto err; + + /* Initialize the new meta-data page. */ + lsn = meta->dbmeta.lsn; + lpgno = __ham_init_meta(dbp, meta, dbp->meta_pgno, &lsn); + + /* + * We are about to allocate a set of contiguous buckets (lpgno + * worth). We need to get the master meta-data page to figure + * out where these pages are and to allocate them. So, lock and + * get the master meta data page. + */ + mpgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, 0, mpgno, DB_LOCK_WRITE, 0, &mmlock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &mpgno, ip, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &mmeta)) != 0) + goto err; + + /* + * Now update the hash meta-data page to reflect where the first + * set of buckets are actually located. + */ + meta->spares[0] = mmeta->last_pgno + 1; + for (i = 0; i < NCACHED && meta->spares[i] != PGNO_INVALID; i++) + meta->spares[i] = meta->spares[0]; + + /* The new meta data page is now complete; log it. */ + if ((ret = __db_log_page(mdbp, + txn, &meta->dbmeta.lsn, dbp->meta_pgno, (PAGE *)meta)) != 0) + goto err; + + /* Reflect the group allocation. */ + if (DBENV_LOGGING(env) +#if !defined(DEBUG_WOP) + && txn != NULL +#endif + ) + if ((ret = __ham_groupalloc_log(mdbp, txn, + &LSN(mmeta), 0, &LSN(mmeta), meta->spares[0], + meta->max_bucket + 1, 0, mmeta->last_pgno)) != 0) + goto err; + + /* Release the new meta-data page. */ + if ((ret = __memp_fput(mpf, ip, meta, dbc->priority)) != 0) + goto err; + meta = NULL; + + lpgno += mmeta->last_pgno; + + /* Now allocate the final hash bucket. */ + if ((ret = __db_lget(dbc, 0, lpgno, DB_LOCK_WRITE, 0, &lock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &lpgno, ip, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &h)) != 0) + goto err; + + mmeta->last_pgno = lpgno; + P_INIT(h, dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + LSN(h) = LSN(mmeta); + if ((ret = __memp_fput(mpf, ip, h, dbc->priority)) != 0) + goto err; + +err: /* Now put the master-metadata page back. */ + if (mmeta != NULL && (t_ret = __memp_fput(mpf, + ip, mmeta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __LPUT(dbc, mmlock)) != 0 && ret == 0) + ret = t_ret; + if (meta != NULL && (t_ret = __memp_fput(mpf, + ip, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + if (dbc != NULL) + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} diff --git a/src/hash/hash_page.c b/src/hash/hash_page.c new file mode 100644 index 00000000..22590327 --- /dev/null +++ b/src/hash/hash_page.c @@ -0,0 +1,3182 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +/* + * PACKAGE: hashing + * + * DESCRIPTION: + * Page manipulation for hashing package. + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +static int __hamc_delpg + __P((DBC *, db_pgno_t, db_pgno_t, u_int32_t, db_ham_mode, u_int32_t *)); +static int __ham_getindex_sorted + __P((DBC *, PAGE *, const DBT *, u_int32_t, int *, db_indx_t *)); +static int __ham_getindex_unsorted + __P((DBC *, PAGE *, const DBT *, int *, db_indx_t *)); +static int __hamc_delpg_getorder + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); +static int __hamc_delpg_setorder + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); + +/* + * PUBLIC: int __ham_item __P((DBC *, db_lockmode_t, db_pgno_t *)); + */ +int +__ham_item(dbc, mode, pgnop) + DBC *dbc; + db_lockmode_t mode; + db_pgno_t *pgnop; +{ + DB *dbp; + HASH_CURSOR *hcp; + db_pgno_t next_pgno; + int ret; + + dbp = dbc->dbp; + hcp = (HASH_CURSOR *)dbc->internal; + + if (F_ISSET(hcp, H_DELETED)) { + __db_errx(dbp->env, DB_STR("1132", + "Attempt to return a deleted item")); + return (EINVAL); + } + F_CLR(hcp, H_OK | H_NOMORE); + + /* Check if we need to get a page for this cursor. */ + if ((ret = __ham_get_cpage(dbc, mode)) != 0) + return (ret); + +recheck: + /* Check if we are looking for space in which to insert an item. */ + if (hcp->seek_size != 0 && hcp->seek_found_page == PGNO_INVALID && + hcp->seek_size < P_FREESPACE(dbp, hcp->page)) { + hcp->seek_found_page = hcp->pgno; + hcp->seek_found_indx = NDX_INVALID; + } + + /* Check for off-page duplicates. */ + if (hcp->indx < NUM_ENT(hcp->page) && + HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) { + memcpy(pgnop, + HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)), + sizeof(db_pgno_t)); + F_SET(hcp, H_OK); + return (0); + } + + /* Check if we need to go on to the next page. */ + if (F_ISSET(hcp, H_ISDUP)) + /* + * ISDUP is set, and offset is at the beginning of the datum. + * We need to grab the length of the datum, then set the datum + * pointer to be the beginning of the datum. + */ + memcpy(&hcp->dup_len, + HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)) + + hcp->dup_off, sizeof(db_indx_t)); + + if (hcp->indx >= (db_indx_t)NUM_ENT(hcp->page)) { + /* Fetch next page. */ + if (NEXT_PGNO(hcp->page) == PGNO_INVALID) { + F_SET(hcp, H_NOMORE); + return (DB_NOTFOUND); + } + next_pgno = NEXT_PGNO(hcp->page); + hcp->indx = 0; + if ((ret = __ham_next_cpage(dbc, next_pgno)) != 0) + return (ret); + goto recheck; + } + + F_SET(hcp, H_OK); + return (0); +} + +/* + * PUBLIC: int __ham_item_reset __P((DBC *)); + */ +int +__ham_item_reset(dbc) + DBC *dbc; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + int ret, t_ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + + ret = 0; + if (hcp->page != NULL) { + ret = __memp_fput(mpf, + dbc->thread_info, hcp->page, dbc->priority); + hcp->page = NULL; + } + + if ((t_ret = __ham_item_init(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * PUBLIC: int __ham_item_init __P((DBC *)); + */ +int +__ham_item_init(dbc) + DBC *dbc; +{ + HASH_CURSOR *hcp; + int ret; + + hcp = (HASH_CURSOR *)dbc->internal; + + /* + * If this cursor still holds any locks, we must release them if + * we are not running with transactions. + */ + ret = __TLPUT(dbc, hcp->lock); + + /* + * The following fields must *not* be initialized here because they + * may have meaning across inits. + * hlock, hdr, split_buf, stats + */ + hcp->bucket = BUCKET_INVALID; + hcp->lbucket = BUCKET_INVALID; + LOCK_INIT(hcp->lock); + hcp->lock_mode = DB_LOCK_NG; + hcp->dup_off = 0; + hcp->dup_len = 0; + hcp->dup_tlen = 0; + hcp->seek_size = 0; + hcp->seek_found_page = PGNO_INVALID; + hcp->seek_found_indx = NDX_INVALID; + hcp->flags = 0; + + hcp->pgno = PGNO_INVALID; + hcp->indx = NDX_INVALID; + hcp->page = NULL; + + return (ret); +} + +/* + * Returns the last item in a bucket. + * + * PUBLIC: int __ham_item_last __P((DBC *, db_lockmode_t, db_pgno_t *)); + */ +int +__ham_item_last(dbc, mode, pgnop) + DBC *dbc; + db_lockmode_t mode; + db_pgno_t *pgnop; +{ + HASH_CURSOR *hcp; + int ret; + + hcp = (HASH_CURSOR *)dbc->internal; + if ((ret = __ham_item_reset(dbc)) != 0) + return (ret); + + hcp->bucket = hcp->hdr->max_bucket; + hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); + F_SET(hcp, H_OK); + return (__ham_item_prev(dbc, mode, pgnop)); +} + +/* + * PUBLIC: int __ham_item_first __P((DBC *, db_lockmode_t, db_pgno_t *)); + */ +int +__ham_item_first(dbc, mode, pgnop) + DBC *dbc; + db_lockmode_t mode; + db_pgno_t *pgnop; +{ + HASH_CURSOR *hcp; + int ret; + + hcp = (HASH_CURSOR *)dbc->internal; + if ((ret = __ham_item_reset(dbc)) != 0) + return (ret); + F_SET(hcp, H_OK); + hcp->bucket = 0; + hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); + hcp->dup_off = 0; + return (__ham_item_next(dbc, mode, pgnop)); +} + +/* + * __ham_item_prev -- + * Returns a pointer to key/data pair on a page. In the case of + * bigkeys, just returns the page number and index of the bigkey + * pointer pair. + * + * PUBLIC: int __ham_item_prev __P((DBC *, db_lockmode_t, db_pgno_t *)); + */ +int +__ham_item_prev(dbc, mode, pgnop) + DBC *dbc; + db_lockmode_t mode; + db_pgno_t *pgnop; +{ + DB *dbp; + HASH_CURSOR *hcp; + db_pgno_t next_pgno; + int ret; + + hcp = (HASH_CURSOR *)dbc->internal; + dbp = dbc->dbp; + + /* + * There are 5 cases for backing up in a hash file. + * Case 1: In the middle of a page, no duplicates, just dec the index. + * Case 2: In the middle of a duplicate set, back up one. + * Case 3: At the beginning of a duplicate set, get out of set and + * back up to next key. + * Case 4: At the beginning of a page; go to previous page. + * Case 5: At the beginning of a bucket; go to prev bucket. + */ + F_CLR(hcp, H_OK | H_NOMORE | H_DELETED); + + if ((ret = __ham_get_cpage(dbc, mode)) != 0) + return (ret); + + /* + * First handle the duplicates. Either you'll get the key here + * or you'll exit the duplicate set and drop into the code below + * to handle backing up through keys. + */ + if (!F_ISSET(hcp, H_NEXT_NODUP) && F_ISSET(hcp, H_ISDUP)) { + if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == + H_OFFDUP) { + memcpy(pgnop, + HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)), + sizeof(db_pgno_t)); + F_SET(hcp, H_OK); + return (0); + } + + /* Duplicates are on-page. */ + if (hcp->dup_off != 0) { + memcpy(&hcp->dup_len, HKEYDATA_DATA( + H_PAIRDATA(dbp, hcp->page, hcp->indx)) + + hcp->dup_off - sizeof(db_indx_t), + sizeof(db_indx_t)); + hcp->dup_off -= + DUP_SIZE(hcp->dup_len); + return (__ham_item(dbc, mode, pgnop)); + } + } + + /* + * If we get here, we are not in a duplicate set, and just need + * to back up the cursor. There are still three cases: + * midpage, beginning of page, beginning of bucket. + */ + + if (F_ISSET(hcp, H_DUPONLY)) { + F_CLR(hcp, H_OK); + F_SET(hcp, H_NOMORE); + return (0); + } else + /* + * We are no longer in a dup set; flag this so the dup code + * will reinitialize should we stumble upon another one. + */ + F_CLR(hcp, H_ISDUP); + + if (hcp->indx == 0) { /* Beginning of page. */ + hcp->pgno = PREV_PGNO(hcp->page); + if (hcp->pgno == PGNO_INVALID) { + /* Beginning of bucket. */ + F_SET(hcp, H_NOMORE); + return (DB_NOTFOUND); + } else if ((ret = + __ham_next_cpage(dbc, hcp->pgno)) != 0) + return (ret); + else + hcp->indx = NUM_ENT(hcp->page); + } + + /* + * Either we've got the cursor set up to be decremented, or we + * have to find the end of a bucket. + */ + if (hcp->indx == NDX_INVALID) { + DB_ASSERT(dbp->env, hcp->page != NULL); + + hcp->indx = NUM_ENT(hcp->page); + for (next_pgno = NEXT_PGNO(hcp->page); + next_pgno != PGNO_INVALID; + next_pgno = NEXT_PGNO(hcp->page)) { + if ((ret = __ham_next_cpage(dbc, next_pgno)) != 0) + return (ret); + hcp->indx = NUM_ENT(hcp->page); + } + + if (hcp->indx == 0) { + /* Bucket was empty. */ + F_SET(hcp, H_NOMORE); + return (DB_NOTFOUND); + } + } + + hcp->indx -= 2; + + return (__ham_item(dbc, mode, pgnop)); +} + +/* + * Sets the cursor to the next key/data pair on a page. + * + * PUBLIC: int __ham_item_next __P((DBC *, db_lockmode_t, db_pgno_t *)); + */ +int +__ham_item_next(dbc, mode, pgnop) + DBC *dbc; + db_lockmode_t mode; + db_pgno_t *pgnop; +{ + HASH_CURSOR *hcp; + int ret; + + hcp = (HASH_CURSOR *)dbc->internal; + + if ((ret = __ham_get_cpage(dbc, mode)) != 0) + return (ret); + + /* + * Deleted on-page duplicates are a weird case. If we delete the last + * one, then our cursor is at the very end of a duplicate set and + * we actually need to go on to the next key. + */ + if (F_ISSET(hcp, H_DELETED)) { + if (hcp->indx != NDX_INVALID && + F_ISSET(hcp, H_ISDUP) && + HPAGE_TYPE(dbc->dbp, hcp->page, H_DATAINDEX(hcp->indx)) + == H_DUPLICATE && hcp->dup_tlen == hcp->dup_off) { + if (F_ISSET(hcp, H_DUPONLY)) { + F_CLR(hcp, H_OK); + F_SET(hcp, H_NOMORE); + return (0); + } else { + F_CLR(hcp, H_ISDUP); + hcp->indx += 2; + } + } else if (!F_ISSET(hcp, H_ISDUP) && F_ISSET(hcp, H_DUPONLY)) { + F_CLR(hcp, H_OK); + F_SET(hcp, H_NOMORE); + return (0); + } else if (F_ISSET(hcp, H_ISDUP) && + F_ISSET(hcp, H_NEXT_NODUP)) { + F_CLR(hcp, H_ISDUP); + hcp->indx += 2; + } + F_CLR(hcp, H_DELETED); + } else if (hcp->indx == NDX_INVALID) { + hcp->indx = 0; + F_CLR(hcp, H_ISDUP); + } else if (F_ISSET(hcp, H_NEXT_NODUP)) { + hcp->indx += 2; + F_CLR(hcp, H_ISDUP); + } else if (F_ISSET(hcp, H_ISDUP) && hcp->dup_tlen != 0) { + if (hcp->dup_off + DUP_SIZE(hcp->dup_len) >= + hcp->dup_tlen && F_ISSET(hcp, H_DUPONLY)) { + F_CLR(hcp, H_OK); + F_SET(hcp, H_NOMORE); + return (0); + } + hcp->dup_off += DUP_SIZE(hcp->dup_len); + if (hcp->dup_off >= hcp->dup_tlen) { + F_CLR(hcp, H_ISDUP); + hcp->indx += 2; + } + } else if (F_ISSET(hcp, H_DUPONLY)) { + F_CLR(hcp, H_OK); + F_SET(hcp, H_NOMORE); + return (0); + } else { + hcp->indx += 2; + F_CLR(hcp, H_ISDUP); + } + + ret = __ham_item(dbc, mode, pgnop); + return (ret); +} + +/* + * __ham_insertpair -- + * + * Used for adding a pair of elements to a sorted page. We are guaranteed that + * the pair will fit on this page. + * + * indexp will return the point at which we inserted the pair. + * + * We're overloading the meaning of the H_OFFPAGE type here, which is a little + * bit sleazy. When we recover deletes, we have the entire entry instead of + * having only the DBT, so we'll pass type H_OFFPAGE to mean "copy the whole + * entry" as opposed to constructing an H_KEYDATA around it. In the recovery + * case it is assumed that a valid index is passed in, since a lookup using + * the overloaded H_OFFPAGE key will be incorrect. + * + * PUBLIC: int __ham_insertpair __P((DBC *, PAGE *p, + * PUBLIC: db_indx_t *indxp, const DBT *, + * PUBLIC: const DBT *, u_int32_t, u_int32_t)); + */ +int +__ham_insertpair(dbc, p, indxp, key_dbt, data_dbt, key_type, data_type) + DBC *dbc; + PAGE *p; + db_indx_t *indxp; + const DBT *key_dbt, *data_dbt; + u_int32_t key_type, data_type; +{ + DB *dbp; + u_int16_t n, indx; + db_indx_t *inp; + u_int32_t ksize, dsize, increase, distance; + u_int8_t *offset; + int i; + + dbp = dbc->dbp; + n = NUM_ENT(p); + inp = P_INP(dbp, p); + ksize = (key_type == H_OFFPAGE) ? + key_dbt->size : HKEYDATA_SIZE(key_dbt->size); + dsize = (data_type == H_OFFPAGE || data_type == H_OFFDUP) ? + data_dbt->size : HKEYDATA_SIZE(data_dbt->size); + increase = ksize + dsize; + + DB_ASSERT(dbp->env, indxp != NULL && *indxp != NDX_INVALID); + DB_ASSERT(dbp->env, + P_FREESPACE(dbp, p) >= dsize + ksize + 2 * sizeof(db_indx_t)); + indx = *indxp; + + /* Special case if the page is empty or inserting at end of page.*/ + if (n == 0 || indx == n) { + inp[indx] = HOFFSET(p) - ksize; + inp[indx+1] = HOFFSET(p) - increase; + } else { + /* + * Shuffle the data elements. + * + * For example, inserting an element that sorts between items + * 2 and 3 on a page: + * The copy starts from the beginning of the second item. + * + * --------------------------- + * |pgheader.. + * |__________________________ + * ||1|2|3|4|... + * |-------------------------- + * | + * |__________________________ + * | ...|4|3|2|1| + * |-------------------------- + * --------------------------- + * + * Becomes: + * + * --------------------------- + * |pgheader.. + * |__________________________ + * ||1|2|2a|3|4|... + * |-------------------------- + * | + * |__________________________ + * | ...|4|3|2a|2|1| + * |-------------------------- + * --------------------------- + * + * Index's 3,4 etc move down the page. + * The data for 3,4,etc moves up the page by sizeof(2a) + * The index pointers in 3,4 etc are updated to point at the + * relocated data. + * It is necessary to move the data (not just adjust the index) + * since the hash format uses consecutive data items to + * dynamically calculate the item size. + * An item in this example is a key/data pair. + */ + offset = (u_int8_t *)p + HOFFSET(p); + if (indx == 0) + distance = dbp->pgsize - HOFFSET(p); + else + distance = (u_int32_t) + (P_ENTRY(dbp, p, indx - 1) - offset); + memmove(offset - increase, offset, distance); + + /* Shuffle the index array */ + memmove(&inp[indx + 2], &inp[indx], + (n - indx) * sizeof(db_indx_t)); + + /* update the index array */ + for (i = indx + 2; i < n + 2; i++) + inp[i] -= increase; + + /* set the new index elements. */ + inp[indx] = (HOFFSET(p) - increase) + distance + dsize; + inp[indx + 1] = (HOFFSET(p) - increase) + distance; + } + + HOFFSET(p) -= increase; + /* insert the new elements */ + if (key_type == H_OFFPAGE) + memcpy(P_ENTRY(dbp, p, indx), key_dbt->data, key_dbt->size); + else + PUT_HKEYDATA(P_ENTRY(dbp, p, indx), key_dbt->data, + key_dbt->size, key_type); + if (data_type == H_OFFPAGE || data_type == H_OFFDUP) + memcpy(P_ENTRY(dbp, p, indx+1), data_dbt->data, + data_dbt->size); + else + PUT_HKEYDATA(P_ENTRY(dbp, p, indx+1), data_dbt->data, + data_dbt->size, data_type); + NUM_ENT(p) += 2; + + /* + * If debugging a sorted hash page problem, this is a good place to + * insert a call to __ham_verify_sorted_page. + * It used to be called when diagnostic mode was enabled, but that + * causes problems in recovery if a custom comparator was used. + */ + return (0); +} + +/* + * __hame_getindex -- + * + * The key_type parameter overloads the entry type to allow for comparison of + * a key DBT that contains off-page data. A key that is not of type H_OFFPAGE + * might contain data larger than the page size, since this routine can be + * called with user-provided DBTs. + * + * PUBLIC: int __ham_getindex __P((DBC *, + * PUBLIC: PAGE *, const DBT *, u_int32_t, int *, db_indx_t *)); + */ +int +__ham_getindex(dbc, p, key, key_type, match, indx) + DBC *dbc; + PAGE *p; + const DBT *key; + u_int32_t key_type; + int *match; + db_indx_t *indx; +{ + /* Since all entries are key/data pairs. */ + DB_ASSERT(dbc->env, NUM_ENT(p)%2 == 0 ); + + /* Support pre 4.6 unsorted hash pages. */ + if (p->type == P_HASH_UNSORTED) + return (__ham_getindex_unsorted(dbc, p, key, match, indx)); + else + return (__ham_getindex_sorted(dbc, + p, key, key_type, match, indx)); +} + +#undef min +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +/* + * Perform a linear search of an unsorted (pre 4.6 format) hash page. + * + * This routine is never used to generate an index for insertion, because any + * unsorted page is sorted before we insert. + * + * Returns 0 if an exact match is found, with indx set to requested elem. + * Returns 1 if the item did not exist, indx is set to the last element on the + * page. + */ +static int +__ham_getindex_unsorted(dbc, p, key, match, indx) + DBC *dbc; + PAGE *p; + const DBT *key; + int *match; + db_indx_t *indx; +{ + DB *dbp; + DBT pg_dbt; + HASH *t; + db_pgno_t pgno; + int i, n, res, ret; + u_int32_t tlen; + u_int8_t *hk; + + dbp = dbc->dbp; + n = NUM_ENT(p); + t = dbp->h_internal; + res = 1; + + /* Do a linear search over the page looking for an exact match */ + for (i = 0; i < n; i+=2) { + hk = H_PAIRKEY(dbp, p, i); + switch (HPAGE_PTYPE(hk)) { + case H_OFFPAGE: + /* extract item length from possibly unaligned DBT */ + memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); + if (tlen == key->size) { + memcpy(&pgno, + HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); + if ((ret = __db_moff(dbc, key, pgno, tlen, + t->h_compare, &res)) != 0) + return (ret); + } + break; + case H_KEYDATA: + if (t->h_compare != NULL) { + DB_INIT_DBT(pg_dbt, + HKEYDATA_DATA(hk), key->size); + if (t->h_compare( + dbp, key, &pg_dbt) != 0) + break; + } else if (key->size == + LEN_HKEY(dbp, p, dbp->pgsize, i)) + res = memcmp(key->data, HKEYDATA_DATA(hk), + key->size); + break; + case H_DUPLICATE: + case H_OFFDUP: + /* + * These are errors because keys are never duplicated. + */ + /* FALLTHROUGH */ + default: + return (__db_pgfmt(dbp->env, PGNO(p))); + } + if (res == 0) + break; + } + *indx = i; + *match = (res == 0 ? 0 : 1); + return (0); +} + +/* + * Perform a binary search of a sorted hash page for a key. + * Return 0 if an exact match is found, with indx set to requested elem. + * Return 1 if the item did not exist, indx will be set to the first element + * greater than the requested item. + */ +static int +__ham_getindex_sorted(dbc, p, key, key_type, match, indxp) + DBC *dbc; + PAGE *p; + const DBT *key; + u_int32_t key_type; + int *match; + db_indx_t *indxp; +{ + DB *dbp; + DBT tmp_dbt; + HASH *t; + HOFFPAGE *offp; + db_indx_t indx; + db_pgno_t off_pgno, koff_pgno; + u_int32_t base, itemlen, lim, off_len; + u_int8_t *entry; + int res, ret; + void *data; + + dbp = dbc->dbp; + DB_ASSERT(dbp->env, p->type == P_HASH ); + + t = dbp->h_internal; + /* Initialize so the return params are correct for empty pages. */ + res = indx = 0; + + /* Do a binary search for the element. */ + DB_BINARY_SEARCH_FOR(base, lim, NUM_ENT(p), 2) { + DB_BINARY_SEARCH_INCR(indx, base, lim, 2); + data = HKEYDATA_DATA(H_PAIRKEY(dbp, p, indx)); + /* + * There are 4 cases here: + * 1) Off page key, off page match + * 2) Off page key, on page match + * 3) On page key, off page match + * 4) On page key, on page match + */ + entry = P_ENTRY(dbp, p, indx); + if (*entry == H_OFFPAGE) { + offp = (HOFFPAGE*)P_ENTRY(dbp, p, indx); + (void)__ua_memcpy(&itemlen, HOFFPAGE_TLEN(offp), + sizeof(u_int32_t)); + if (key_type == H_OFFPAGE) { + /* + * Case 1. + * + * If both key and cmp DBTs refer to different + * offpage items, it is necessary to compare + * the content of the entries, in order to be + * able to maintain a valid lexicographic sort + * order. + */ + (void)__ua_memcpy(&koff_pgno, + HOFFPAGE_PGNO(key->data), + sizeof(db_pgno_t)); + (void)__ua_memcpy(&off_pgno, + HOFFPAGE_PGNO(offp), sizeof(db_pgno_t)); + if (koff_pgno == off_pgno) + res = 0; + else { + memset(&tmp_dbt, 0, sizeof(tmp_dbt)); + tmp_dbt.size = HOFFPAGE_SIZE; + tmp_dbt.data = offp; + if ((ret = __db_coff(dbc, key, &tmp_dbt, + t->h_compare, &res)) != 0) + return (ret); + } + } else { + /* Case 2 */ + (void)__ua_memcpy(&off_pgno, + HOFFPAGE_PGNO(offp), sizeof(db_pgno_t)); + if ((ret = __db_moff(dbc, key, off_pgno, + itemlen, t->h_compare, &res)) != 0) + return (ret); + } + } else { + itemlen = LEN_HKEYDATA(dbp, p, dbp->pgsize, indx); + if (key_type == H_OFFPAGE) { + /* Case 3 */ + tmp_dbt.data = data; + tmp_dbt.size = itemlen; + offp = (HOFFPAGE *)key->data; + (void)__ua_memcpy(&off_pgno, + HOFFPAGE_PGNO(offp), sizeof(db_pgno_t)); + (void)__ua_memcpy(&off_len, HOFFPAGE_TLEN(offp), + sizeof(u_int32_t)); + if ((ret = __db_moff(dbc, &tmp_dbt, off_pgno, + off_len, t->h_compare, &res)) != 0) + return (ret); + /* + * Since we switched the key/match parameters + * in the __db_moff call, the result needs to + * be inverted. + */ + res = -res; + } else if (t->h_compare != NULL) { + /* Case 4, with a user comparison func */ + DB_INIT_DBT(tmp_dbt, data, itemlen); + res = t->h_compare(dbp, key, &tmp_dbt); + } else { + /* Case 4, without a user comparison func */ + if ((res = memcmp(key->data, data, + min(key->size, itemlen))) == 0) + res = itemlen > key->size ? 1 : + (itemlen < key->size ? -1 : 0); + } + } + if (res == 0) { + /* Found a match */ + *indxp = indx; + *match = 0; + return (0); + } else if (res > 0) + DB_BINARY_SEARCH_SHIFT_BASE(indx, base, lim, 2); + } + /* + * If no match was found, and the comparison indicates that the + * closest match was lexicographically less than the input key adjust + * the insertion index to be after the index of the closest match. + */ + if (res > 0) + indx += 2; + *indxp = indx; + *match = 1; + return (0); +} + +/* + * PUBLIC: int __ham_verify_sorted_page __P((DBC *, PAGE *)); + * + * The__ham_verify_sorted_page function is used to determine the correctness + * of sorted hash pages. The checks are used by verification, they are + * implemented in the hash code because they are also useful debugging aids. + */ +int +__ham_verify_sorted_page (dbc, p) + DBC *dbc; + PAGE *p; +{ + DB *dbp; + DBT prev_dbt, curr_dbt; + ENV *env; + HASH *t; + db_pgno_t tpgno; + u_int32_t curr_len, prev_len, tlen; + u_int16_t *indxp; + db_indx_t i, n; + int res, ret; + char *prev, *curr; + + /* Validate that next, prev pointers are OK */ + n = NUM_ENT(p); + dbp = dbc->dbp; + DB_ASSERT(dbp->env, n%2 == 0 ); + + env = dbp->env; + t = dbp->h_internal; + + /* Disable verification if a custom comparator is supplied */ + if (t->h_compare != NULL) + return (0); + + /* Iterate through page, ensuring order */ + prev = (char *)HKEYDATA_DATA(H_PAIRKEY(dbp, p, 0)); + prev_len = LEN_HKEYDATA(dbp, p, dbp->pgsize, 0); + for (i = 2; i < n; i+=2) { + curr = (char *)HKEYDATA_DATA(H_PAIRKEY(dbp, p, i)); + curr_len = LEN_HKEYDATA(dbp, p, dbp->pgsize, i); + + if (HPAGE_TYPE(dbp, p, i-2) == H_OFFPAGE && + HPAGE_TYPE(dbp, p, i) == H_OFFPAGE) { + memset(&prev_dbt, 0, sizeof(prev_dbt)); + memset(&curr_dbt, 0, sizeof(curr_dbt)); + prev_dbt.size = curr_dbt.size = HOFFPAGE_SIZE; + prev_dbt.data = H_PAIRKEY(dbp, p, i-2); + curr_dbt.data = H_PAIRKEY(dbp, p, i); + if ((ret = __db_coff(dbc, + &prev_dbt, &curr_dbt, t->h_compare, &res)) != 0) + return (ret); + } else if (HPAGE_TYPE(dbp, p, i-2) == H_OFFPAGE) { + memset(&curr_dbt, 0, sizeof(curr_dbt)); + curr_dbt.size = curr_len; + curr_dbt.data = H_PAIRKEY(dbp, p, i); + memcpy(&tlen, HOFFPAGE_TLEN(H_PAIRKEY(dbp, p, i-2)), + sizeof(u_int32_t)); + memcpy(&tpgno, HOFFPAGE_PGNO(H_PAIRKEY(dbp, p, i-2)), + sizeof(db_pgno_t)); + if ((ret = __db_moff(dbc, + &curr_dbt, tpgno, tlen, t->h_compare, &res)) != 0) + return (ret); + } else if (HPAGE_TYPE(dbp, p, i) == H_OFFPAGE) { + memset(&prev_dbt, 0, sizeof(prev_dbt)); + prev_dbt.size = prev_len; + prev_dbt.data = H_PAIRKEY(dbp, p, i); + memcpy(&tlen, HOFFPAGE_TLEN(H_PAIRKEY(dbp, p, i)), + sizeof(u_int32_t)); + memcpy(&tpgno, HOFFPAGE_PGNO(H_PAIRKEY(dbp, p, i)), + sizeof(db_pgno_t)); + if ((ret = __db_moff(dbc, + &prev_dbt, tpgno, tlen, t->h_compare, &res)) != 0) + return (ret); + } else + res = memcmp(prev, curr, min(curr_len, prev_len)); + + if (res == 0 && curr_len > prev_len) + res = 1; + else if (res == 0 && curr_len < prev_len) + res = -1; + + if (res >= 0) { + __db_msg(env, "key1: %s, key2: %s, len: %lu\n", + (char *)prev, (char *)curr, + (u_long)min(curr_len, prev_len)); + __db_msg(env, "curroffset %lu\n", (u_long)i); + __db_msg(env, "indexes: "); + for (i = 0; i < n; i++) { + indxp = P_INP(dbp, p) + i; + __db_msg(env, "%04X, ", *indxp); + } + __db_msg(env, "\n"); +#ifdef HAVE_STATISTICS + if ((ret = __db_prpage(dbp, p, DB_PR_PAGE)) != 0) + return (ret); +#endif + DB_ASSERT(dbp->env, res < 0); + } + + prev = curr; + prev_len = curr_len; + } + return (0); +} + +/* + * A wrapper for the __ham_sort_page function. Implements logging and cursor + * adjustments associated with sorting a page outside of recovery/upgrade. + * PUBLIC: int __ham_sort_page_cursor __P((DBC *, PAGE *)); + */ +int +__ham_sort_page_cursor(dbc, page) + DBC *dbc; + PAGE *page; +{ + DB *dbp; + DBT page_dbt; + DB_LSN new_lsn; + HASH_CURSOR *hcp; + int ret; + + dbp = dbc->dbp; + hcp = (HASH_CURSOR *)dbc->internal; + + if (DBC_LOGGING(dbc)) { + page_dbt.size = dbp->pgsize; + page_dbt.data = page; + if ((ret = __ham_splitdata_log(dbp, dbc->txn, + &new_lsn, 0, SORTPAGE, PGNO(page), + &page_dbt, &LSN(page))) != 0) + return (ret); + } else + LSN_NOT_LOGGED(new_lsn); + /* Move lsn onto page. */ + LSN(page) = new_lsn; /* Structure assignment. */ + + /* + * Invalidate the saved index, it needs to be retrieved + * again once the page is sorted. + */ + hcp->seek_found_indx = NDX_INVALID; + hcp->seek_found_page = PGNO_INVALID; + + return (__ham_sort_page(dbc, &hcp->split_buf, page)); +} + +/* + * PUBLIC: int __ham_sort_page __P((DBC *, PAGE **, PAGE *)); + * + * Convert a page from P_HASH_UNSORTED into the sorted format P_HASH. + * + * All locking and logging is carried out be the caller. A user buffer can + * optionally be passed in to save allocating a page size buffer for sorting. + * This is allows callers to re-use the buffer pre-allocated for page splits + * in the hash cursor. The buffer is optional since no cursor exists when in + * the recovery or upgrade code paths. + */ +int +__ham_sort_page(dbc, tmp_buf, page) + DBC *dbc; + PAGE **tmp_buf; + PAGE *page; +{ + DB *dbp; + PAGE *temp_pagep; + db_indx_t i; + int ret; + + dbp = dbc->dbp; + DB_ASSERT(dbp->env, page->type == P_HASH_UNSORTED); + + ret = 0; + if (tmp_buf != NULL) + temp_pagep = *tmp_buf; + else if ((ret = __os_malloc(dbp->env, dbp->pgsize, &temp_pagep)) != 0) + return (ret); + + memcpy(temp_pagep, page, dbp->pgsize); + + /* Re-initialize the page. */ + P_INIT(page, dbp->pgsize, + page->pgno, page->prev_pgno, page->next_pgno, 0, P_HASH); + + for (i = 0; i < NUM_ENT(temp_pagep); i += 2) + if ((ret = + __ham_copypair(dbc, temp_pagep, i, page, NULL, 0)) != 0) + break; + + if (tmp_buf == NULL) + __os_free(dbp->env, temp_pagep); + + return (ret); +} + +/* + * PUBLIC: int __ham_del_pair __P((DBC *, int, PAGE *)); + */ +int +__ham_del_pair(dbc, flags, ppg) + DBC *dbc; + int flags; + PAGE *ppg; +{ + DB *dbp; + DBT data_dbt, key_dbt; + DB_LSN new_lsn, *n_lsn, tmp_lsn; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + PAGE *n_pagep, *nn_pagep, *p, *p_pagep; + db_ham_mode op; + db_indx_t ndx; + db_pgno_t chg_pgno, pgno, tmp_pgno; + u_int32_t data_type, key_type, order; + int ret, t_ret; + u_int8_t *hk; + + dbp = dbc->dbp; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + n_pagep = p_pagep = nn_pagep = NULL; + ndx = hcp->indx; + + if (hcp->page == NULL && + (ret = __memp_fget(mpf, &hcp->pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &hcp->page)) != 0) + return (ret); + p = hcp->page; + + /* + * We optimize for the normal case which is when neither the key nor + * the data are large. In this case, we write a single log record + * and do the delete. If either is large, we'll call __big_delete + * to remove the big item and then update the page to remove the + * entry referring to the big item. + */ + if (!LF_ISSET(HAM_DEL_IGNORE_OFFPAGE) && + HPAGE_PTYPE(H_PAIRKEY(dbp, p, ndx)) == H_OFFPAGE) { + memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_KEYINDEX(ndx))), + sizeof(db_pgno_t)); + ret = __db_doff(dbc, pgno); + } else + ret = 0; + + if (!LF_ISSET(HAM_DEL_IGNORE_OFFPAGE) && ret == 0) + switch (HPAGE_PTYPE(H_PAIRDATA(dbp, p, ndx))) { + case H_OFFPAGE: + memcpy(&pgno, + HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_DATAINDEX(ndx))), + sizeof(db_pgno_t)); + ret = __db_doff(dbc, pgno); + break; + case H_OFFDUP: + case H_DUPLICATE: + /* + * If we delete a pair that is/was a duplicate, then + * we had better clear the flag so that we update the + * cursor appropriately. + */ + F_CLR(hcp, H_ISDUP); + break; + default: + /* No-op */ + break; + } + + if (ret) + return (ret); + + /* Now log the delete off this page. */ + if (DBC_LOGGING(dbc)) { + hk = H_PAIRKEY(dbp, hcp->page, ndx); + if ((key_type = HPAGE_PTYPE(hk)) == H_OFFPAGE) { + key_dbt.data = hk; + key_dbt.size = HOFFPAGE_SIZE; + } else { + key_dbt.data = HKEYDATA_DATA(hk); + key_dbt.size = + LEN_HKEY(dbp, hcp->page, dbp->pgsize, ndx); + } + hk = H_PAIRDATA(dbp, hcp->page, ndx); + if ((data_type = HPAGE_PTYPE(hk)) == H_OFFPAGE) { + data_dbt.data = hk; + data_dbt.size = HOFFPAGE_SIZE; + } else if (data_type == H_OFFDUP) { + data_dbt.data = hk; + data_dbt.size = HOFFDUP_SIZE; + } else { + data_dbt.data = HKEYDATA_DATA(hk); + data_dbt.size = + LEN_HDATA(dbp, hcp->page, dbp->pgsize, ndx); + } + + if ((ret = __ham_insdel_log(dbp, dbc->txn, &new_lsn, 0, + DELPAIR, PGNO(p), (u_int32_t)ndx, &LSN(p), + OP_SET(key_type, p), &key_dbt, + OP_SET(data_type, p), &data_dbt)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(new_lsn); + + /* Move lsn onto page. */ + LSN(p) = new_lsn; + /* Do the delete. */ + __ham_dpair(dbp, p, ndx); + + /* + * Mark item deleted so that we don't try to return it, and + * so that we update the cursor correctly on the next call + * to next. + */ + F_SET(hcp, H_DELETED); + F_CLR(hcp, H_OK); + + /* Clear any cache streaming information. */ + hcp->stream_start_pgno = PGNO_INVALID; + + /* + * If we are locking, we will not maintain this, because it is + * a hot spot. + * + * XXX + * Perhaps we can retain incremental numbers and apply them later. + */ + if (!STD_LOCKING(dbc)) { + if ((ret = __ham_dirty_meta(dbc, 0)) != 0) + return (ret); + --hcp->hdr->nelem; + } + + /* The HAM_DEL_NO_CURSOR flag implies HAM_DEL_NO_RECLAIM. */ + if (LF_ISSET(HAM_DEL_NO_CURSOR)) + return (0); + /* + * Update cursors that are on the page where the delete happened. + */ + if ((ret = __hamc_update(dbc, 0, DB_HAM_CURADJ_DEL, 0)) != 0) + return (ret); + + /* + * If we need to reclaim the page, then check if the page is empty. + * There are two cases. If it's empty and it's not the first page + * in the bucket (i.e., the bucket page) then we can simply remove + * it. If it is the first chain in the bucket, then we need to copy + * the second page into it and remove the second page. + * If its the only page in the bucket we leave it alone. + */ + if (LF_ISSET(HAM_DEL_NO_RECLAIM) || + NUM_ENT(p) != 0 || + (PREV_PGNO(p) == PGNO_INVALID && NEXT_PGNO(p) == PGNO_INVALID)) { + if (NUM_ENT(p) == 0) + F_SET(hcp, H_CONTRACT); + return (0); + } + + if (PREV_PGNO(p) == PGNO_INVALID) { + /* + * First page in chain is empty and we know that there + * are more pages in the chain. + */ + if ((ret = __memp_fget(mpf, + &NEXT_PGNO(p), dbc->thread_info, dbc->txn, + DB_MPOOL_DIRTY, &n_pagep)) != 0) + return (ret); + + if (NEXT_PGNO(n_pagep) != PGNO_INVALID && + (ret = __memp_fget(mpf, &NEXT_PGNO(n_pagep), + dbc->thread_info, dbc->txn, + DB_MPOOL_DIRTY, &nn_pagep)) != 0) + goto err; + + if (DBC_LOGGING(dbc)) { + key_dbt.data = n_pagep; + key_dbt.size = dbp->pgsize; + if ((ret = __ham_copypage_log(dbp, + dbc->txn, &new_lsn, 0, PGNO(p), + &LSN(p), PGNO(n_pagep), &LSN(n_pagep), + NEXT_PGNO(n_pagep), + nn_pagep == NULL ? NULL : &LSN(nn_pagep), + &key_dbt)) != 0) + goto err; + } else + LSN_NOT_LOGGED(new_lsn); + + /* Move lsn onto page. */ + LSN(p) = new_lsn; /* Structure assignment. */ + LSN(n_pagep) = new_lsn; + if (NEXT_PGNO(n_pagep) != PGNO_INVALID) + LSN(nn_pagep) = new_lsn; + + if (nn_pagep != NULL) { + PREV_PGNO(nn_pagep) = PGNO(p); + ret = __memp_fput(mpf, + dbc->thread_info, nn_pagep, dbc->priority); + nn_pagep = NULL; + if (ret != 0) + goto err; + } + + tmp_pgno = PGNO(p); + tmp_lsn = LSN(p); + memcpy(p, n_pagep, dbp->pgsize); + PGNO(p) = tmp_pgno; + LSN(p) = tmp_lsn; + PREV_PGNO(p) = PGNO_INVALID; + + /* + * Update cursors to reflect the fact that records + * on the second page have moved to the first page. + */ + if ((ret = __hamc_delpg(dbc, PGNO(n_pagep), + PGNO(p), 0, DB_HAM_DELFIRSTPG, &order)) != 0) + goto err; + + /* + * Update the cursor to reflect its new position. + */ + hcp->indx = 0; + hcp->pgno = PGNO(p); + hcp->order += order; + + if ((ret = __db_free(dbc, n_pagep, 0)) != 0) { + n_pagep = NULL; + goto err; + } + } else { + if ((p_pagep = ppg) == NULL && (ret = __memp_fget(mpf, + &PREV_PGNO(p), dbc->thread_info, dbc->txn, + DB_MPOOL_DIRTY, &p_pagep)) != 0) + goto err; + + if (NEXT_PGNO(p) != PGNO_INVALID) { + if ((ret = __memp_fget(mpf, &NEXT_PGNO(p), + dbc->thread_info, dbc->txn, + DB_MPOOL_DIRTY, &n_pagep)) != 0) + goto err; + n_lsn = &LSN(n_pagep); + } else { + n_pagep = NULL; + n_lsn = NULL; + } + + if (DBC_LOGGING(dbc)) { + if ((ret = __ham_newpage_log(dbp, dbc->txn, + &new_lsn, 0, DELOVFL, PREV_PGNO(p), &LSN(p_pagep), + PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0) + goto err; + } else + LSN_NOT_LOGGED(new_lsn); + + /* Move lsn onto page. */ + LSN(p_pagep) = new_lsn; /* Structure assignment. */ + if (n_pagep) + LSN(n_pagep) = new_lsn; + LSN(p) = new_lsn; + + NEXT_PGNO(p_pagep) = NEXT_PGNO(p); + if (n_pagep != NULL) + PREV_PGNO(n_pagep) = PGNO(p_pagep); + + if (NEXT_PGNO(p) == PGNO_INVALID) { + /* + * There is no next page; put the cursor on the + * previous page as if we'd deleted the last item + * on that page, with index after the last valid + * entry. + * + * The deleted flag was set up above. + */ + hcp->pgno = PGNO(p_pagep); + hcp->indx = NUM_ENT(p_pagep); + op = DB_HAM_DELLASTPG; + } else { + /* + * There is a next page, so put the cursor at + * the beginning of it. + */ + hcp->pgno = NEXT_PGNO(p); + hcp->indx = 0; + op = DB_HAM_DELMIDPG; + } + + /* + * Since we are about to delete the cursor page and we have + * just moved the cursor, we need to make sure that the + * old page pointer isn't left hanging around in the cursor. + */ + hcp->page = NULL; + chg_pgno = PGNO(p); + ret = __db_free(dbc, p, 0); + if (ppg == NULL && (t_ret = __memp_fput(mpf, dbc->thread_info, + p_pagep, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (n_pagep != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, n_pagep, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + return (ret); + if ((ret = __hamc_delpg(dbc, + chg_pgno, hcp->pgno, hcp->indx, op, &order)) != 0) + return (ret); + hcp->order += order; + } + return (ret); + +err: /* Clean up any pages. */ + if (n_pagep != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, n_pagep, dbc->priority); + if (nn_pagep != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, nn_pagep, dbc->priority); + if (ppg == NULL && p_pagep != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, p_pagep, dbc->priority); + return (ret); +} + +/* + * __ham_replpair -- + * Given the key data indicated by the cursor, replace part/all of it + * according to the fields in the dbt. + * + * PUBLIC: int __ham_replpair __P((DBC *, DBT *, u_int32_t)); + */ +int +__ham_replpair(dbc, dbt, newtype) + DBC *dbc; + DBT *dbt; + u_int32_t newtype; +{ + DB *dbp; + DBC **carray, *dbc_n; + DBT old_dbt, tdata, tmp, *new_dbt; + DB_LSN new_lsn; + ENV *env; + HASH_CURSOR *hcp, *cp; + db_indx_t orig_indx; + db_pgno_t off_pgno, orig_pgno; + u_int32_t change; + u_int32_t dup_flag, len, memsize, newlen, oldtype, type; + char tmp_ch; + int beyond_eor, is_big, is_plus, ret, i, found, t_ret; + u_int8_t *beg, *dest, *end, *hk, *src; + void *memp; + + /* + * Most items that were already offpage (ISBIG) were handled before + * we get in here. So, we need only handle cases where the old + * key is on a regular page. That leaves us 6 cases: + * 1. Original data onpage; new data is smaller + * 2. Original data onpage; new data is the same size + * 3. Original data onpage; new data is bigger, but not ISBIG, + * fits on page + * 4. Original data onpage; new data is bigger, but not ISBIG, + * does not fit on page + * 5. Original data onpage; New data is an off-page item. + * 6. Original data was offpage; new item is smaller. + * 7. Original data was offpage; new item is supplied as a partial. + * + * Cases 1-3 are essentially the same (and should be the common case). + * We handle 4-6 as delete and add. 7 is generally a delete and add, + * unless it is an append, when we extend the offpage item, and + * update the HOFFPAGE item on the current page to have the new size + * via a delete/add. + */ + dbp = dbc->dbp; + env = dbp->env; + hcp = (HASH_CURSOR *)dbc->internal; + carray = NULL; + dbc_n = memp = NULL; + found = 0; + new_dbt = NULL; + off_pgno = PGNO_INVALID; + type = 0; + + /* + * We need to compute the number of bytes that we are adding or + * removing from the entry. Normally, we can simply subtract + * the number of bytes we are replacing (dbt->dlen) from the + * number of bytes we are inserting (dbt->size). However, if + * we are doing a partial put off the end of a record, then this + * formula doesn't work, because we are essentially adding + * new bytes. + */ + if (dbt->size > dbt->dlen) { + change = dbt->size - dbt->dlen; + is_plus = 1; + } else { + change = dbt->dlen - dbt->size; + is_plus = 0; + } + + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); + oldtype = HPAGE_PTYPE(hk); + is_big = oldtype == H_OFFPAGE; + + if (is_big) { + memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); + memcpy(&off_pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); + } else + len = LEN_HKEYDATA(dbp, hcp->page, + dbp->pgsize, H_DATAINDEX(hcp->indx)); + + beyond_eor = dbt->doff + dbt->dlen > len; + if (beyond_eor) { + /* + * The change is beyond the end of record. If change + * is a positive number, we can simply add the extension + * to it. However, if change is negative, then we need + * to figure out if the extension is larger than the + * negative change. + */ + if (is_plus) + change += dbt->doff + dbt->dlen - len; + else if (dbt->doff + dbt->dlen - len > change) { + /* Extension bigger than change */ + is_plus = 1; + change = (dbt->doff + dbt->dlen - len) - change; + } else /* Extension is smaller than change. */ + change -= (dbt->doff + dbt->dlen - len); + } + + newlen = (is_plus ? len + change : len - change); + if (is_big || beyond_eor || ISBIG(hcp, newlen) || + (is_plus && change > P_FREESPACE(dbp, hcp->page))) { + /* + * If we are in cases 4 or 5 then is_plus will be true. + * If we don't have a transaction then we cannot roll back, + * make sure there is enough room for the new page. + */ + if (is_plus && dbc->txn == NULL && + dbp->mpf->mfp->maxpgno != 0 && + dbp->mpf->mfp->maxpgno == dbp->mpf->mfp->last_pgno) + return (__db_space_err(dbp)); + /* + * Cases 4-6 -- two subcases. + * A. This is not really a partial operation, but an overwrite. + * Simple del and add works. + * B. This is a partial and we need to construct the data that + * we are really inserting (yuck). + * In both cases, we need to grab the key off the page (in + * some cases we could do this outside of this routine; for + * cleanliness we do it here. If you happen to be on a big + * key, this could be a performance hit). + */ + memset(&tmp, 0, sizeof(tmp)); + if ((ret = __db_ret(dbc, hcp->page, H_KEYINDEX(hcp->indx), + &tmp, &dbc->my_rkey.data, &dbc->my_rkey.ulen)) != 0) + return (ret); + + /* Preserve duplicate info. */ + dup_flag = F_ISSET(hcp, H_ISDUP); + /* Streaming insert. */ + if (is_big && !dup_flag && !DB_IS_PRIMARY(dbp) && + F_ISSET(dbt, DB_DBT_PARTIAL) && dbt->doff == len) { + /* + * If the cursor has not already cached the last page + * in the offpage chain, we need to walk the chain to + * be sure that the page has been read. + */ + if (hcp->stream_start_pgno != off_pgno || + hcp->stream_off > dbt->doff || dbt->doff > + hcp->stream_off + P_MAXSPACE(dbp, dbp->pgsize)) { + memset(&tdata, 0, sizeof(DBT)); + tdata.doff = dbt->doff - 1; + /* + * Set the length to 1, to force __db_goff + * to do the traversal. + */ + tdata.dlen = tdata.ulen = 1; + tdata.data = &tmp_ch; + tdata.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM; + + /* + * Read to the last page. It will be cached + * in the cursor. + */ + if ((ret = __db_goff(dbc, &tdata, len, + off_pgno, NULL, NULL)) != 0) + return (ret); + } + /* + * Since this is an append, dlen is irrelevant (there + * are no bytes to overwrite). We need the caller's + * DBT size to end up with the total size of the item. + * From now on, use dlen as the length of the user's + * data that we are going to append. + * Don't futz with the caller's DBT any more than we + * have to in order to send back the size. + */ + tdata = *dbt; + tdata.dlen = dbt->size; + tdata.size = newlen; + new_dbt = &tdata; + F_SET(new_dbt, DB_DBT_STREAMING); + type = H_KEYDATA; + } + + /* + * In cases 4-6, a delete and insert works, but we need to + * track and update any cursors pointing to the item being + * moved. + */ + orig_pgno = PGNO(hcp->page); + orig_indx = hcp->indx; + if ((ret = __ham_get_clist(dbp, + orig_pgno, orig_indx, &carray)) != 0) + goto err; + + if (dbt->doff == 0 && dbt->dlen == len) { + type = (dup_flag ? H_DUPLICATE : H_KEYDATA); + new_dbt = dbt; + } else if (!F_ISSET(dbt, DB_DBT_STREAMING)) { /* Case B */ + type = HPAGE_PTYPE(hk) != H_OFFPAGE ? + HPAGE_PTYPE(hk) : H_KEYDATA; + memset(&tdata, 0, sizeof(tdata)); + memsize = 0; + if ((ret = __db_ret(dbc, hcp->page, + H_DATAINDEX(hcp->indx), &tdata, + &memp, &memsize)) != 0) + goto err; + + /* Now shift old data around to make room for new. */ + if (is_plus) { + if ((ret = __os_realloc(env, + tdata.size + change, &tdata.data)) != 0) + return (ret); + memp = tdata.data; + memsize = tdata.size + change; + memset((u_int8_t *)tdata.data + tdata.size, + 0, change); + } + end = (u_int8_t *)tdata.data + tdata.size; + + src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen; + if (src < end && tdata.size > dbt->doff + dbt->dlen) { + len = tdata.size - (dbt->doff + dbt->dlen); + if (is_plus) + dest = src + change; + else + dest = src - change; + memmove(dest, src, len); + } + memcpy((u_int8_t *)tdata.data + dbt->doff, + dbt->data, dbt->size); + if (is_plus) + tdata.size += change; + else + tdata.size -= change; + new_dbt = &tdata; + } + if ((ret = __ham_del_pair(dbc, HAM_DEL_NO_CURSOR | + (F_ISSET(dbt, DB_DBT_STREAMING) ? HAM_DEL_IGNORE_OFFPAGE : + 0), NULL)) != 0) + goto err; + /* + * Save the state of the cursor after the delete, so that we + * can adjust any cursors impacted by the delete. Don't just + * update the cursors now, to avoid ambiguity in reversing the + * adjustments during abort. + */ + if ((ret = __dbc_dup(dbc, &dbc_n, DB_POSITION)) != 0) + goto err; + if ((ret = __ham_add_el(dbc, &tmp, new_dbt, type)) != 0) + goto err; + F_SET(hcp, dup_flag); + + /* + * If the delete/insert pair caused the item to be moved + * to another location (which is possible for duplicate sets + * that are moved onto another page in the bucket), then update + * any impacted cursors. + */ + if (((HASH_CURSOR*)dbc_n->internal)->pgno != hcp->pgno || + ((HASH_CURSOR*)dbc_n->internal)->indx != hcp->indx) { + /* + * Set any cursors pointing to items in the moved + * duplicate set to the destination location and reset + * the deleted flag. This can't be done earlier, since + * the insert location is not computed until the actual + * __ham_add_el call is made. + */ + if (carray != NULL) { + for (i = 0; carray[i] != NULL; i++) { + cp = (HASH_CURSOR*)carray[i]->internal; + cp->pgno = hcp->pgno; + cp->indx = hcp->indx; + F_CLR(cp, H_DELETED); + found = 1; + } + /* + * Only log the update once, since the recovery + * code iterates through all open cursors and + * applies the change to all matching cursors. + */ + if (found && DBC_LOGGING(dbc) && + IS_SUBTRANSACTION(dbc->txn)) { + if ((ret = + __ham_chgpg_log(dbp, + dbc->txn, &new_lsn, 0, + DB_HAM_CHGPG, orig_pgno, hcp->pgno, + orig_indx, hcp->indx)) != 0) + goto err; + } + } + /* + * Update any cursors impacted by the delete. Do this + * after chgpg log so that recovery does not re-bump + * cursors pointing to the deleted item. + */ + ret = __hamc_update(dbc_n, 0, DB_HAM_CURADJ_DEL, 0); + } + +err: if (dbc_n != NULL && (t_ret = __dbc_close(dbc_n)) != 0 && + ret == 0) + ret = t_ret; + if (carray != NULL) + __os_free(env, carray); + if (memp != NULL) + __os_free(env, memp); + return (ret); + } + + /* + * Set up pointer into existing data. Do it before the log + * message so we can use it inside of the log setup. + */ + beg = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); + beg += dbt->doff; + + /* + * If we are going to have to move bytes at all, figure out + * all the parameters here. Then log the call before moving + * anything around. + */ + if (DBC_LOGGING(dbc)) { + old_dbt.data = beg; + old_dbt.size = dbt->dlen; + if ((ret = __ham_replace_log(dbp, dbc->txn, &new_lsn, + 0, PGNO(hcp->page), + (u_int32_t)H_DATAINDEX(hcp->indx), &LSN(hcp->page), + (int32_t)dbt->doff, OP_SET(oldtype, hcp->page), + &old_dbt, OP_SET(newtype, hcp->page), dbt)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(new_lsn); + + LSN(hcp->page) = new_lsn; /* Structure assignment. */ + + __ham_onpage_replace(dbp, hcp->page, (u_int32_t)H_DATAINDEX(hcp->indx), + (int32_t)dbt->doff, change, is_plus, dbt); + + return (0); +} + +/* + * Replace data on a page with new data, possibly growing or shrinking what's + * there. This is called on two different occasions. On one (from replpair) + * we are interested in changing only the data. On the other (from recovery) + * we are replacing the entire data (header and all) with a new element. In + * the latter case, the off argument is negative. + * pagep: the page that we're changing + * ndx: page index of the element that is growing/shrinking. + * off: Offset at which we are beginning the replacement. + * change: the number of bytes (+ or -) that the element is growing/shrinking. + * dbt: the new data that gets written at beg. + * + * PUBLIC: void __ham_onpage_replace __P((DB *, PAGE *, u_int32_t, + * PUBLIC: int32_t, u_int32_t, int, DBT *)); + */ +void +__ham_onpage_replace(dbp, pagep, ndx, off, change, is_plus, dbt) + DB *dbp; + PAGE *pagep; + u_int32_t ndx; + int32_t off; + u_int32_t change; + int is_plus; + DBT *dbt; +{ + db_indx_t i, *inp; + int32_t len; + size_t pgsize; + u_int8_t *src, *dest; + int zero_me; + + pgsize = dbp->pgsize; + inp = P_INP(dbp, pagep); + if (change != 0) { + zero_me = 0; + src = (u_int8_t *)(pagep) + HOFFSET(pagep); + if (off < 0) + len = inp[ndx] - HOFFSET(pagep); + else if ((u_int32_t)off >= + LEN_HKEYDATA(dbp, pagep, pgsize, ndx)) { + len = (int32_t)(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + + LEN_HKEYDATA(dbp, pagep, pgsize, ndx) - src); + zero_me = 1; + } else + len = (int32_t)( + (HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off) - + src); + if (is_plus) + dest = src - change; + else + dest = src + change; + memmove(dest, src, (size_t)len); + if (zero_me) + memset(dest + len, 0, change); + + /* Now update the indices. */ + for (i = ndx; i < NUM_ENT(pagep); i++) { + if (is_plus) + inp[i] -= change; + else + inp[i] += change; + } + if (is_plus) + HOFFSET(pagep) -= change; + else + HOFFSET(pagep) += change; + } + if (off >= 0) + memcpy(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off, + dbt->data, dbt->size); + else + memcpy(P_ENTRY(dbp, pagep, ndx), dbt->data, dbt->size); +} + +/* + * __ham_merge_page -- + * Merge pages from one bucket to another. + * PUBLIC: int __ham_merge_pages __P((DBC *, + * PUBLIC: u_int32_t, u_int32_t, DB_COMPACT *)); + */ +int +__ham_merge_pages(dbc, tobucket, frombucket, c_data) + DBC *dbc; + u_int32_t tobucket, frombucket; + DB_COMPACT *c_data; +{ + DB *dbp; + DBC **carray; + DB_LOCK tlock, firstlock; + DB_LSN from_lsn; + DB_MPOOLFILE *mpf; + ENV *env; + HASH_CURSOR *hcp, *cp; + PAGE *to_pagep, *first_pagep, + *from_pagep, *last_pagep, *next_pagep, *prev_pagep; + db_pgno_t to_pgno, first_pgno, from_pgno; + u_int32_t len; + db_indx_t dest_indx, n, num_ent; + int check_trunc, found, i, ret; + + dbp = dbc->dbp; + carray = NULL; + env = dbp->env; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + hcp->pgno = PGNO_INVALID; + to_pagep = first_pagep = NULL; + from_pagep = last_pagep = next_pagep = prev_pagep = NULL; + from_pgno = PGNO_INVALID; + LOCK_INIT(tlock); + LOCK_INIT(firstlock); + + check_trunc = + c_data == NULL ? 0 : c_data->compact_truncate != PGNO_INVALID; + to_pgno = BUCKET_TO_PAGE(hcp, tobucket); + if ((ret = __db_lget(dbc, + 0, to_pgno, DB_LOCK_WRITE, 0, &tlock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &to_pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &to_pagep)) != 0) + goto err; + + /* Sort any unsorted pages before adding to the page. */ + if (to_pagep->type == P_HASH_UNSORTED) + if ((ret = __ham_sort_page_cursor(dbc, to_pagep)) != 0) + return (ret); + + /* Fetch the first page of the bucket we are getting rid of. */ + from_pgno = BUCKET_TO_PAGE(hcp, frombucket); + if ((ret = __db_lget(dbc, + 0, from_pgno, DB_LOCK_WRITE, 0, &firstlock)) != 0) + goto err; +next_page: + /* + * from_pagep is the starting point in the bucket at which records + * are moved to the new bucket. + */ + if (from_pagep == NULL && + (ret = __memp_fget(mpf, &from_pgno, dbc->thread_info, + dbc->txn, DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &from_pagep)) != 0) + goto err; + if ((ret = __ham_get_clist(dbp, from_pgno, NDX_INVALID, &carray)) != 0) + goto err; + + hcp->indx = 0; + hcp->pgno = from_pgno; + hcp->page = from_pagep; + num_ent = NUM_ENT(from_pagep); + for (n = 0; n < num_ent; n += 2) { + /* + * Figure out how many bytes we need on the from + * page to store the key/data pair. + */ + len = LEN_HITEM(dbp, from_pagep, + dbp->pgsize, H_DATAINDEX(hcp->indx)) + + LEN_HITEM(dbp, from_pagep, + dbp->pgsize, H_KEYINDEX(hcp->indx)) + + 2 * sizeof(db_indx_t); + + /* + * Find a page that will fit this data. We don't go back + * to a page, so we may leave some space if there is a big + * variation in record size. + */ + while (P_FREESPACE(dbp, to_pagep) < len) { + to_pgno = NEXT_PGNO(to_pagep); + if (to_pgno == PGNO_INVALID) { + next_pagep = to_pagep; + if ((ret = + __ham_add_ovflpage(dbc, &next_pagep)) != 0) + goto err; + if ((ret = __memp_fput(mpf, dbc->thread_info, + to_pagep, dbc->priority)) != 0) + goto err; + to_pagep = next_pagep; + next_pagep = NULL; + if (c_data != NULL && + c_data->compact_pages_free > 0) + c_data->compact_pages_free--; + to_pgno = PGNO(to_pagep); + } else { + if ((ret = __memp_fput(mpf, dbc->thread_info, + to_pagep, dbc->priority)) != 0) + goto err; + to_pagep = NULL; + if ((ret = __memp_fget(mpf, + &to_pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, + &to_pagep)) != 0) + goto err; + + /* + * Sort any unsorted pages before adding + * to the page. + */ + if (to_pagep->type == P_HASH_UNSORTED) + if ((ret = __ham_sort_page_cursor(dbc, + to_pagep)) != 0) + goto err; + } + } + dest_indx = NDX_INVALID; + if ((ret = __ham_copypair(dbc, + from_pagep, hcp->indx, to_pagep, &dest_indx, 1)) != 0) + goto err; + + /* Update any cursors pointing at the moved item. */ + if (carray != NULL) { + found = 0; + for (i = 0; carray[i] != NULL; i++) { + cp = + (HASH_CURSOR *)carray[i]->internal; + if (cp->pgno == from_pgno && + cp->indx == n) { + cp->pgno = PGNO(to_pagep); + cp->indx = dest_indx; + cp->bucket = tobucket; + found = 1; + } + } + /* + * Only log the update once, since the recovery + * code iterates through all open cursors and + * applies the change to all matching cursors. + */ + if (found && DBC_LOGGING(dbc) && + IS_SUBTRANSACTION(dbc->txn)) { + if ((ret = + __ham_chgpg_log(dbp, + dbc->txn, &from_lsn, 0, + DB_HAM_SPLIT, from_pgno, + PGNO(to_pagep), n, dest_indx)) != 0) + goto err; + } + } + /* + * If this is the head of the bucket, delete the record. + * Otherwise we will just free the page after the loop. + */ + if (PREV_PGNO(from_pagep) == PGNO_INVALID) { + if ((ret = __ham_del_pair(dbc, + HAM_DEL_IGNORE_OFFPAGE | HAM_DEL_NO_CURSOR, + from_pagep)) != 0) + goto err; + if (!STD_LOCKING(dbc)) { + if ((ret = __ham_dirty_meta(dbc, 0)) != 0) + return (ret); + ++hcp->hdr->nelem; + } + } else + hcp->indx += 2; + } + /* + * If there are more pages in the bucket then we need to process them. + * First we may remove a page that is empty. If there is a next + * page then save the previous one for relinking. + */ + from_pgno = NEXT_PGNO(from_pagep); + if (PREV_PGNO(from_pagep) != PGNO_INVALID) { + if (DBC_LOGGING(dbc)) { + if ((ret = __db_relink_log(dbp, dbc->txn, + &LSN(prev_pagep), 0, PGNO(from_pagep), + PGNO_INVALID, PGNO(prev_pagep), + &LSN(prev_pagep), PGNO_INVALID, NULL)) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(prev_pagep)); + + NEXT_PGNO(prev_pagep) = PGNO_INVALID; + + if ((ret = __db_free(dbc, from_pagep, 0)) != 0) { + from_pagep = NULL; + goto err; + } + if (c_data != NULL) + c_data->compact_pages_free++; + if ((ret = __memp_fput(mpf, + dbc->thread_info, prev_pagep, dbc->priority)) != 0) + goto err; + prev_pagep = NULL; + } else if (from_pgno != PGNO_INVALID) + prev_pagep = from_pagep; + else if ((ret = __memp_fput(mpf, + dbc->thread_info, from_pagep, dbc->priority)) != 0) + goto err; + + from_pagep = NULL; + hcp->page = NULL; + if (carray != NULL) + __os_free(env, carray); + carray = NULL; + + /* + * The head of the bucket has been copied. Try to figure out + * if we should just relink the following pages or try to merge + * them into existing pages. This is quick and dirty: if it + * looks like the data will fit on the current "to" page then + * merge it, otherwise just do the linking. + * If this was called from DB->compact it will be better to copy + * the data to lower numbered pages. + */ + if (check_trunc && from_pgno > c_data->compact_truncate) + goto next_page; + + /* + * first_pgno will be the first page of a list that gets + * relinked to the new bucket. last_pagep will point at the + * last page of the linked list. + */ + first_pgno = from_pgno; + last_pagep = NULL; + while (from_pgno != PGNO_INVALID) { + if ((ret = __memp_fget(mpf, + &from_pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &from_pagep)) != 0) + goto err; + if (P_FREESPACE(dbp, to_pagep) > + (dbp->pgsize - HOFFSET(from_pagep)) + + (NUM_ENT(from_pagep) * sizeof(db_indx_t))) + break; + if (check_trunc && from_pgno > c_data->compact_truncate) + break; + from_pgno = NEXT_PGNO(from_pagep); + if (last_pagep != NULL && last_pagep != first_pagep && + (ret = __memp_fput(mpf, + dbc->thread_info, last_pagep, dbc->priority)) != 0) + goto err; + last_pagep = from_pagep; + if (first_pagep == NULL) + first_pagep = from_pagep; + from_pagep = NULL; + } + + /* Link the chain of "full" pages into the "to" bucket. */ + if (first_pgno != PGNO_INVALID && first_pgno != from_pgno) { + DB_ASSERT(dbp->env, first_pagep != NULL); + next_pagep = NULL; + if (NEXT_PGNO(to_pagep) != PGNO_INVALID && (ret = + __memp_fget(mpf, &NEXT_PGNO(to_pagep), dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, &next_pagep)) != 0) + goto err; + + if (last_pagep == NULL) + last_pagep = first_pagep; + DB_ASSERT(dbp->env, last_pagep != NULL); + /* + * At the point we have: + * to_pagep -- the page that we are linking to. + * first_pagep -- the page that is first in the list. + * last_pagep -- the page that is the last in the list. + * prev_pagep -- the page that points at first_pagep. + * next_pagep -- the next page after the list. + */ + if (DBC_LOGGING(dbc)) { + if ((ret = __db_relink_log(dbp, dbc->txn, + &LSN(to_pagep), 0, NEXT_PGNO(to_pagep), + first_pgno, to_pgno, &LSN(to_pagep), + PGNO_INVALID, NULL)) != 0) + goto err; + if ((ret = __db_relink_log(dbp, dbc->txn, + &LSN(first_pagep), 0, PREV_PGNO(first_pagep), + to_pgno, PGNO_INVALID, NULL, first_pgno, + &LSN(first_pagep))) != 0) + goto err; + if (next_pagep != NULL) { + if ((ret = __db_relink_log(dbp, dbc->txn, + &LSN(next_pagep), 0, PREV_PGNO(next_pagep), + PGNO(last_pagep), PGNO_INVALID, NULL, + PGNO(next_pagep), &LSN(next_pagep))) != 0) + goto err; + if ((ret = __db_relink_log(dbp, dbc->txn, + &LSN(last_pagep), 0, NEXT_PGNO(last_pagep), + PGNO(next_pagep), PGNO(last_pagep), + &LSN(last_pagep), PGNO_INVALID, NULL)) != 0) + goto err; + } else if (NEXT_PGNO(last_pagep) != PGNO_INVALID && + (ret = __db_relink_log(dbp, dbc->txn, + &LSN(last_pagep), 0, NEXT_PGNO(last_pagep), + PGNO_INVALID, PGNO(last_pagep), + &LSN(last_pagep), PGNO_INVALID, NULL)) != 0) + goto err; + if (prev_pagep != NULL && + (ret = __db_relink_log(dbp, dbc->txn, + &LSN(prev_pagep), 0, NEXT_PGNO(prev_pagep), + NEXT_PGNO(last_pagep), PGNO(prev_pagep), + &LSN(prev_pagep), PGNO_INVALID, NULL)) != 0) + goto err; + } else { + LSN_NOT_LOGGED(LSN(to_pagep)); + LSN_NOT_LOGGED(LSN(first_pagep)); + LSN_NOT_LOGGED(LSN(last_pagep)); + if (next_pagep != NULL) + LSN_NOT_LOGGED(LSN(to_pagep)); + } + if (prev_pagep != NULL) + NEXT_PGNO(prev_pagep) = NEXT_PGNO(last_pagep); + NEXT_PGNO(last_pagep) = NEXT_PGNO(to_pagep); + NEXT_PGNO(to_pagep) = first_pgno; + PREV_PGNO(first_pagep) = to_pgno; + if (next_pagep != NULL) { + PREV_PGNO(next_pagep) = PGNO(last_pagep); + if ((ret = __memp_fput(mpf, + dbc->thread_info, next_pagep, dbc->priority)) != 0) + goto err; + next_pagep = NULL; + } + if (last_pagep != first_pagep && (ret = __memp_fput(mpf, + dbc->thread_info, last_pagep, dbc->priority)) != 0) + goto err; + last_pagep = NULL; + if ((ret = __memp_fput(mpf, + dbc->thread_info, first_pagep, dbc->priority)) != 0) + goto err; + first_pagep = NULL; + } else if (last_pagep != NULL && (ret = __memp_fput(mpf, + dbc->thread_info, last_pagep, dbc->priority)) != 0) + goto err; + + if (from_pagep == NULL) { + from_pagep = first_pagep; + first_pagep = NULL; + } + if (from_pgno != PGNO_INVALID) + goto next_page; + + if (prev_pagep != NULL && (ret = __memp_fput(mpf, + dbc->thread_info, prev_pagep, dbc->priority)) != 0) + goto err; + ret = __memp_fput(mpf, dbc->thread_info, to_pagep, dbc->priority); + return (ret); + +err: if (last_pagep != NULL && last_pagep != first_pagep) + (void)__memp_fput(mpf, + dbc->thread_info, last_pagep, dbc->priority); + if (first_pagep != NULL && first_pagep != from_pagep) + (void)__memp_fput(mpf, + dbc->thread_info, first_pagep, dbc->priority); + if (next_pagep != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, next_pagep, dbc->priority); + if (from_pagep != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, from_pagep, dbc->priority); + if (to_pagep != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, to_pagep, dbc->priority); + if (prev_pagep != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, prev_pagep, dbc->priority); + hcp->page = NULL; + (void)__TLPUT(dbc, tlock); + (void)__TLPUT(dbc, firstlock); + if (carray != NULL) + __os_free(env, carray); + return (ret); +} + +/* + * PUBLIC: int __ham_split_page __P((DBC *, u_int32_t, u_int32_t)); + */ +int +__ham_split_page(dbc, obucket, nbucket) + DBC *dbc; + u_int32_t obucket, nbucket; +{ + DB *dbp; + DBC **carray, *tmp_dbc; + DBT key, page_dbt; + DB_LOCK block; + DB_LSN new_lsn; + DB_MPOOLFILE *mpf; + ENV *env; + HASH_CURSOR *hcp, *cp; + PAGE **pp, *old_pagep, *temp_pagep, *new_pagep, *next_pagep; + db_indx_t n, dest_indx; + db_pgno_t bucket_pgno, npgno, next_pgno; + u_int32_t big_len, len; + int found, i, ret, t_ret; + void *big_buf; + + dbp = dbc->dbp; + carray = NULL; + env = dbp->env; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + temp_pagep = old_pagep = new_pagep = NULL; + npgno = PGNO_INVALID; + LOCK_INIT(block); + + bucket_pgno = BUCKET_TO_PAGE(hcp, obucket); + if ((ret = __db_lget(dbc, + 0, bucket_pgno, DB_LOCK_WRITE, 0, &block)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &bucket_pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &old_pagep)) != 0) + goto err; + + /* Sort any unsorted pages before doing a hash split. */ + if (old_pagep->type == P_HASH_UNSORTED) + if ((ret = __ham_sort_page_cursor(dbc, old_pagep)) != 0) + return (ret); + + /* Properly initialize the new bucket page. */ + npgno = BUCKET_TO_PAGE(hcp, nbucket); + if ((ret = __memp_fget(mpf, &npgno, dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &new_pagep)) != 0) + goto err; + P_INIT(new_pagep, + dbp->pgsize, npgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + + temp_pagep = hcp->split_buf; + memcpy(temp_pagep, old_pagep, dbp->pgsize); + + if (DBC_LOGGING(dbc)) { + page_dbt.size = dbp->pgsize; + page_dbt.data = old_pagep; + if ((ret = __ham_splitdata_log(dbp, + dbc->txn, &new_lsn, 0, SPLITOLD, + PGNO(old_pagep), &page_dbt, &LSN(old_pagep))) != 0) + goto err; + } else + LSN_NOT_LOGGED(new_lsn); + + LSN(old_pagep) = new_lsn; /* Structure assignment. */ + + P_INIT(old_pagep, dbp->pgsize, PGNO(old_pagep), PGNO_INVALID, + PGNO_INVALID, 0, P_HASH); + + big_len = 0; + big_buf = NULL; + memset(&key, 0, sizeof(key)); + while (temp_pagep != NULL) { + if ((ret = __ham_get_clist(dbp, + PGNO(temp_pagep), NDX_INVALID, &carray)) != 0) + goto err; + + for (n = 0; n < (db_indx_t)NUM_ENT(temp_pagep); n += 2) { + if ((ret = __db_ret(dbc, temp_pagep, H_KEYINDEX(n), + &key, &big_buf, &big_len)) != 0) + goto err; + + if (__ham_call_hash(dbc, key.data, key.size) == obucket) + pp = &old_pagep; + else + pp = &new_pagep; + + /* + * Figure out how many bytes we need on the new + * page to store the key/data pair. + */ + len = LEN_HITEM(dbp, temp_pagep, dbp->pgsize, + H_DATAINDEX(n)) + + LEN_HITEM(dbp, temp_pagep, dbp->pgsize, + H_KEYINDEX(n)) + + 2 * sizeof(db_indx_t); + + if (P_FREESPACE(dbp, *pp) < len) { + if (DBC_LOGGING(dbc)) { + page_dbt.size = dbp->pgsize; + page_dbt.data = *pp; + if ((ret = __ham_splitdata_log(dbp, + dbc->txn, &new_lsn, 0, + SPLITNEW, PGNO(*pp), &page_dbt, + &LSN(*pp))) != 0) + goto err; + } else + LSN_NOT_LOGGED(new_lsn); + LSN(*pp) = new_lsn; + next_pagep = *pp; + if ((ret = + __ham_add_ovflpage(dbc, &next_pagep)) != 0) + goto err; + if ((ret = __memp_fput(mpf, + dbc->thread_info, *pp, dbc->priority)) != 0) + goto err; + *pp = next_pagep; + } + + dest_indx = NDX_INVALID; + if ((ret = __ham_copypair(dbc, temp_pagep, + H_KEYINDEX(n), *pp, &dest_indx, 0)) != 0) + goto err; + + /* + * Update any cursors that were pointing to items + * shuffled because of this insert. + * Use __hamc_update, since the cursor adjustments are + * the same as those required for an insert. The + * overhead of creating a cursor is worthwhile to save + * replicating the adjustment functionality. + * Adjusting shuffled cursors needs to be done prior to + * adjusting any cursors that were pointing to the + * moved item. + * All pages in a bucket are sorted, but the items are + * not sorted across pages within a bucket. This means + * that splitting the first page in a bucket into two + * new buckets won't require any cursor shuffling, + * since all inserts will be appends. Splitting of the + * second etc page from the initial bucket could + * cause an item to be inserted at any location on a + * page (since items already inserted from page 1 of + * the initial bucket may overlap), so only adjust + * cursors for the second etc pages within a bucket. + */ + if (PGNO(temp_pagep) != bucket_pgno) { + if ((ret = __db_cursor_int(dbp, + dbc->thread_info, dbc->txn, dbp->type, + PGNO_INVALID, 0, DB_LOCK_INVALIDID, + &tmp_dbc)) != 0) + goto err; + hcp = (HASH_CURSOR*)tmp_dbc->internal; + hcp->pgno = PGNO(*pp); + hcp->indx = dest_indx; + hcp->dup_off = 0; + hcp->order = 0; + if ((ret = __hamc_update( + tmp_dbc, len, DB_HAM_CURADJ_ADD, 0)) != 0) + goto err; + if ((ret = __dbc_close(tmp_dbc)) != 0) + goto err; + } + /* Update any cursors pointing at the moved item. */ + if (carray != NULL) { + found = 0; + for (i = 0; carray[i] != NULL; i++) { + cp = + (HASH_CURSOR *)carray[i]->internal; + if (cp->pgno == PGNO(temp_pagep) && + cp->indx == n) { + cp->pgno = PGNO(*pp); + cp->indx = dest_indx; + if (cp->pgno == PGNO(old_pagep)) + cp->bucket = obucket; + else + cp->bucket = nbucket; + found = 1; + } + } + /* + * Only log the update once, since the recovery + * code iterates through all open cursors and + * applies the change to all matching cursors. + */ + if (found && DBC_LOGGING(dbc) && + IS_SUBTRANSACTION(dbc->txn)) { + if ((ret = + __ham_chgpg_log(dbp, + dbc->txn, &new_lsn, 0, + DB_HAM_SPLIT, PGNO(temp_pagep), + PGNO(*pp), n, dest_indx)) != 0) + goto err; + } + } + } + next_pgno = NEXT_PGNO(temp_pagep); + + /* Clear temp_page; if it's a link overflow page, free it. */ + if (PGNO(temp_pagep) != bucket_pgno && (ret = + __db_free(dbc, temp_pagep, 0)) != 0) { + temp_pagep = NULL; + goto err; + } + + if (next_pgno == PGNO_INVALID) + temp_pagep = NULL; + else if ((ret = __memp_fget(mpf, + &next_pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &temp_pagep)) != 0) + goto err; + + if (temp_pagep != NULL) { + if (DBC_LOGGING(dbc)) { + page_dbt.size = dbp->pgsize; + page_dbt.data = temp_pagep; + if ((ret = __ham_splitdata_log(dbp, + dbc->txn, &new_lsn, 0, + SPLITOLD, PGNO(temp_pagep), + &page_dbt, &LSN(temp_pagep))) != 0) + goto err; + } else + LSN_NOT_LOGGED(new_lsn); + LSN(temp_pagep) = new_lsn; + } + + if (carray != NULL) /* We never knew its size. */ + __os_free(env, carray); + carray = NULL; + } + if (big_buf != NULL) + __os_free(env, big_buf); + + /* + * If the original bucket spanned multiple pages, then we've got + * a pointer to a page that used to be on the bucket chain. It + * should be deleted. + */ + if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno && + (ret = __db_free(dbc, temp_pagep, 0)) != 0) { + temp_pagep = NULL; + goto err; + } + + /* + * Write new buckets out. + */ + if (DBC_LOGGING(dbc)) { + page_dbt.size = dbp->pgsize; + page_dbt.data = old_pagep; + if ((ret = __ham_splitdata_log(dbp, dbc->txn, + &new_lsn, 0, SPLITNEW, PGNO(old_pagep), &page_dbt, + &LSN(old_pagep))) != 0) + goto err; + LSN(old_pagep) = new_lsn; + + page_dbt.data = new_pagep; + if ((ret = __ham_splitdata_log(dbp, dbc->txn, &new_lsn, 0, + SPLITNEW, PGNO(new_pagep), &page_dbt, + &LSN(new_pagep))) != 0) + goto err; + LSN(new_pagep) = new_lsn; + } else { + LSN_NOT_LOGGED(LSN(old_pagep)); + LSN_NOT_LOGGED(LSN(new_pagep)); + } + + ret = __memp_fput(mpf, dbc->thread_info, old_pagep, dbc->priority); + if ((t_ret = __memp_fput(mpf, + dbc->thread_info, new_pagep, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + if (0) { +err: if (old_pagep != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, old_pagep, dbc->priority); + if (new_pagep != NULL) { + P_INIT(new_pagep, dbp->pgsize, + npgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + (void)__memp_fput(mpf, + dbc->thread_info, new_pagep, dbc->priority); + } + if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno) + (void)__memp_fput(mpf, + dbc->thread_info, temp_pagep, dbc->priority); + } + if ((t_ret = __TLPUT(dbc, block)) != 0 && ret == 0) + ret = t_ret; + if (carray != NULL) /* We never knew its size. */ + __os_free(env, carray); + return (ret); +} + +/* + * Add the given pair to the page. The page in question may already be + * held (i.e. it was already gotten). If it is, then the page is passed + * in via the pagep parameter. On return, pagep will contain the page + * to which we just added something. This allows us to link overflow + * pages and return the new page having correctly put the last page. + * + * PUBLIC: int __ham_add_el __P((DBC *, const DBT *, const DBT *, u_int32_t)); + */ +int +__ham_add_el(dbc, key, val, type) + DBC *dbc; + const DBT *key, *val; + u_int32_t type; +{ + const DBT *pkey, *pdata; + DB *dbp; + DBT key_dbt, data_dbt; + DB_LSN new_lsn; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + HOFFPAGE doff, koff; + PAGE *new_pagep; + db_pgno_t next_pgno, pgno; + u_int32_t data_size, data_type, key_size, key_type; + u_int32_t pages, pagespace, pairsize; + int do_expand, is_keybig, is_databig, match, ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + do_expand = 0; + + pgno = hcp->seek_found_page != PGNO_INVALID ? + hcp->seek_found_page : hcp->pgno; + if (hcp->page == NULL && (ret = __memp_fget(mpf, &pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_CREATE, &hcp->page)) != 0) + return (ret); + + key_size = HKEYDATA_PSIZE(key->size); + data_size = HKEYDATA_PSIZE(val->size); + is_keybig = ISBIG(hcp, key->size); + is_databig = ISBIG(hcp, val->size); + if (is_keybig) + key_size = HOFFPAGE_PSIZE; + if (is_databig) + data_size = HOFFPAGE_PSIZE; + + pairsize = key_size + data_size; + + /* Advance to first page in chain with room for item. */ + while (H_NUMPAIRS(hcp->page) && NEXT_PGNO(hcp->page) != PGNO_INVALID) { + /* + * This may not be the end of the chain, but the pair may fit + * anyway. Check if it's a bigpair that fits or a regular + * pair that fits. + */ + if (P_FREESPACE(dbp, hcp->page) >= pairsize) + break; + next_pgno = NEXT_PGNO(hcp->page); + if ((ret = __ham_next_cpage(dbc, next_pgno)) != 0) + return (ret); + } + + /* + * Check if we need to allocate a new page. + */ + if (P_FREESPACE(dbp, hcp->page) < pairsize) { + do_expand = 1; + if ((ret = __memp_dirty(mpf, &hcp->page, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + return (ret); + new_pagep = hcp->page; + if ((ret = __ham_add_ovflpage(dbc, &new_pagep)) != 0) + return (ret); + if ((ret = __memp_fput(mpf, + dbc->thread_info, hcp->page, dbc->priority)) != 0) { + (void)__memp_fput(mpf, + dbc->thread_info, new_pagep, dbc->priority); + return (ret); + } + hcp->page = new_pagep; + hcp->pgno = PGNO(hcp->page); + } + + /* + * If we don't have a transaction then make sure we will not + * run out of file space before updating the key or data. + */ + if (dbc->txn == NULL && + dbp->mpf->mfp->maxpgno != 0 && (is_keybig || is_databig)) { + pagespace = P_MAXSPACE(dbp, dbp->pgsize); + pages = 0; + if (is_databig) + pages = ((data_size - 1) / pagespace) + 1; + if (is_keybig) { + pages += ((key->size - 1) / pagespace) + 1; + if (pages > + (dbp->mpf->mfp->maxpgno - dbp->mpf->mfp->last_pgno)) + return (__db_space_err(dbp)); + } + } + + if ((ret = __memp_dirty(mpf, + &hcp->page, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + return (ret); + + /* + * Update cursor. + */ + hcp->indx = hcp->seek_found_indx; + F_CLR(hcp, H_DELETED); + if (is_keybig) { + koff.type = H_OFFPAGE; + UMRW_SET(koff.unused[0]); + UMRW_SET(koff.unused[1]); + UMRW_SET(koff.unused[2]); + if ((ret = __db_poff(dbc, key, &koff.pgno)) != 0) + return (ret); + koff.tlen = key->size; + key_dbt.data = &koff; + key_dbt.size = sizeof(koff); + pkey = &key_dbt; + key_type = H_OFFPAGE; + } else { + pkey = key; + key_type = H_KEYDATA; + } + + if (is_databig) { + doff.type = H_OFFPAGE; + UMRW_SET(doff.unused[0]); + UMRW_SET(doff.unused[1]); + UMRW_SET(doff.unused[2]); + if ((ret = __db_poff(dbc, val, &doff.pgno)) != 0) + return (ret); + doff.tlen = val->size; + data_dbt.data = &doff; + data_dbt.size = sizeof(doff); + pdata = &data_dbt; + data_type = H_OFFPAGE; + } else { + pdata = val; + data_type = type; + } + + /* Sort any unsorted pages before doing the insert. */ + if (((PAGE *)hcp->page)->type == P_HASH_UNSORTED) + if ((ret = __ham_sort_page_cursor(dbc, hcp->page)) != 0) + return (ret); + + /* + * If inserting on the page found initially, then use the saved index. + * If inserting on a different page resolve the index now so it can be + * logged. + * The page might be different, if P_FREESPACE constraint failed (due + * to a partial put that increases the data size). + */ + if (PGNO(hcp->page) != hcp->seek_found_page) { + if ((ret = __ham_getindex(dbc, hcp->page, pkey, + key_type, &match, &hcp->seek_found_indx)) != 0) + return (ret); + hcp->seek_found_page = PGNO(hcp->page); + + DB_ASSERT(dbp->env, hcp->seek_found_indx <= NUM_ENT(hcp->page)); + } + + if (DBC_LOGGING(dbc)) { + if ((ret = __ham_insdel_log(dbp, dbc->txn, &new_lsn, 0, + PUTPAIR, PGNO(hcp->page), (u_int32_t)hcp->seek_found_indx, + &LSN(hcp->page), OP_SET(key_type, hcp->page), pkey, + OP_SET(data_type, hcp->page), pdata)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(new_lsn); + + /* Move lsn onto page. */ + LSN(hcp->page) = new_lsn; /* Structure assignment. */ + + if ((ret = __ham_insertpair(dbc, hcp->page, + &hcp->seek_found_indx, pkey, pdata, key_type, data_type)) != 0) + return (ret); + + /* + * Adjust any cursors that were pointing at items whose indices were + * shuffled due to the insert. + */ + if ((ret = __hamc_update(dbc, pairsize, DB_HAM_CURADJ_ADD, 0)) != 0) + return (ret); + + /* + * For splits, we are going to update item_info's page number + * field, so that we can easily return to the same page the + * next time we come in here. For other operations, this doesn't + * matter, since this is the last thing that happens before we return + * to the user program. + */ + hcp->pgno = PGNO(hcp->page); + /* + * When moving an item from one page in a bucket to another, due to an + * expanding on page duplicate set, or a partial put that increases the + * size of an item. The destination index needs to be saved so that the + * __ham_replpair code can update any cursors impacted by the move. For + * other operations, this does not matter, since this is the last thing + * that happens before we return to the user program. + */ + hcp->indx = hcp->seek_found_indx; + + /* + * XXX + * Maybe keep incremental numbers here. + */ + if (!STD_LOCKING(dbc)) { + if ((ret = __ham_dirty_meta(dbc, 0)) != 0) + return (ret); + hcp->hdr->nelem++; + } + + if (do_expand || (hcp->hdr->ffactor != 0 && + (u_int32_t)H_NUMPAIRS(hcp->page) > hcp->hdr->ffactor)) + F_SET(hcp, H_EXPAND); + return (0); +} + +/* + * Special insert pair call -- copies a key/data pair from one page to + * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA, + * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we + * do not need to log them here. + * + * dest_indx is an optional parameter, it serves several purposes: + * * ignored if NULL + * * Used as an insert index if non-null and not NDX_INVALID + * * Populated with the insert index if non-null and NDX_INVALID + * + * PUBLIC: int __ham_copypair __P((DBC *, PAGE *, u_int32_t, + * PUBLIC: PAGE *, db_indx_t *, int)); + */ +int +__ham_copypair(dbc, src_page, src_ndx, dest_page, dest_indx, log) + DBC *dbc; + PAGE *src_page; + u_int32_t src_ndx; + PAGE *dest_page; + db_indx_t *dest_indx; + int log; +{ + DB *dbp; + DBT tkey, tdata; + db_indx_t kindx, dindx, dest; + u_int32_t ktype, dtype; + int match, ret; + + dbp = dbc->dbp; + ret = 0; + memset(&tkey, 0, sizeof(tkey)); + memset(&tdata, 0, sizeof(tdata)); + + ktype = HPAGE_TYPE(dbp, src_page, H_KEYINDEX(src_ndx)); + dtype = HPAGE_TYPE(dbp, src_page, H_DATAINDEX(src_ndx)); + kindx = H_KEYINDEX(src_ndx); + dindx = H_DATAINDEX(src_ndx); + if (ktype == H_OFFPAGE) { + tkey.data = P_ENTRY(dbp, src_page, kindx); + tkey.size = LEN_HITEM(dbp, src_page, dbp->pgsize, kindx); + } else { + tkey.data = HKEYDATA_DATA(P_ENTRY(dbp, src_page, kindx)); + tkey.size = LEN_HKEYDATA(dbp, src_page, dbp->pgsize, kindx); + } + if (dtype == H_OFFPAGE || dtype == H_OFFDUP) { + tdata.data = P_ENTRY(dbp, src_page, dindx); + tdata.size = LEN_HITEM(dbp, src_page, dbp->pgsize, dindx); + } else { + tdata.data = HKEYDATA_DATA(P_ENTRY(dbp, src_page, dindx)); + tdata.size = LEN_HKEYDATA(dbp, src_page, dbp->pgsize, dindx); + } + if (dest_indx != NULL) + dest = *dest_indx; + else + dest = NDX_INVALID; + if (dest == NDX_INVALID) { + if ((ret = __ham_getindex(dbc, + dest_page, &tkey, ktype, &match, &dest)) != 0) + return (ret); + /* It is an error to insert a duplicate key */ + DB_ASSERT(dbp->env, match != 0); + } + + if (log == 1) { + if (DBC_LOGGING(dbc)) { + if ((ret = __ham_insdel_log(dbp, dbc->txn, + &LSN(dest_page), 0, PUTPAIR, + PGNO(dest_page), (u_int32_t)dest, &LSN(dest_page), + OP_SET(ktype, dest_page), &tkey, + OP_SET(dtype, dest_page), &tdata)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(dest_page)); + } + + if ((ret = __ham_insertpair(dbc, dest_page, &dest, + &tkey, &tdata, ktype, dtype)) != 0) + return (ret); + + DB_ASSERT(dbp->env, dtype != H_DUPLICATE || + HPAGE_TYPE(dbp, dest_page, H_DATAINDEX(dest)) == dtype); + + if (dest_indx != NULL) + *dest_indx = dest; + + return (ret); +} + +/* + * __ham_add_ovflpage -- + * + * Returns: + * 0 on success: pp points to new page; !0 on error, pp not valid. + * + * PUBLIC: int __ham_add_ovflpage __P((DBC *, PAGE **)); + */ +int +__ham_add_ovflpage(dbc, pp) + DBC *dbc; + PAGE **pp; +{ + DB *dbp; + DB_LSN new_lsn; + DB_MPOOLFILE *mpf; + PAGE *new_pagep, *pagep; + int ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + pagep = *pp; + *pp = NULL; + + DB_ASSERT(dbp->env, IS_DIRTY(pagep)); + + if ((ret = __db_new(dbc, P_HASH, NULL, &new_pagep)) != 0) + return (ret); + + if (DBC_LOGGING(dbc)) { + if ((ret = __ham_newpage_log(dbp, dbc->txn, &new_lsn, 0, + PUTOVFL, PGNO(pagep), &LSN(pagep), PGNO(new_pagep), + &LSN(new_pagep), PGNO_INVALID, NULL)) != 0) { + (void)__memp_fput(mpf, + dbc->thread_info, new_pagep, dbc->priority); + return (ret); + } + } else + LSN_NOT_LOGGED(new_lsn); + + /* Move lsn onto page. */ + LSN(pagep) = LSN(new_pagep) = new_lsn; + NEXT_PGNO(pagep) = PGNO(new_pagep); + + PREV_PGNO(new_pagep) = PGNO(pagep); + + *pp = new_pagep; + return (0); +} + +/* + * PUBLIC: int __ham_get_cpage __P((DBC *, db_lockmode_t)); + */ +int +__ham_get_cpage(dbc, mode) + DBC *dbc; + db_lockmode_t mode; +{ + DB *dbp; + DB_LOCK tmp_lock; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + int ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + ret = 0; + + /* + * There are four cases with respect to buckets and locks. + * 1. If there is no lock held, then if we are locking, we should + * get the lock. + * 2. If there is a lock held, it's for the current bucket, and it's + * for the right mode, we don't need to do anything. + * 3. If there is a lock held for the current bucket but it's not + * strong enough, we need to upgrade. + * 4. If there is a lock, but it's for a different bucket, then we need + * to release the existing lock and get a new lock. + */ + LOCK_INIT(tmp_lock); + if (STD_LOCKING(dbc)) { + if (hcp->lbucket != hcp->bucket) { /* Case 4 */ + if ((ret = __TLPUT(dbc, hcp->lock)) != 0) + return (ret); + LOCK_INIT(hcp->lock); + hcp->stream_start_pgno = PGNO_INVALID; + } + + /* + * See if we have the right lock. If we are doing + * dirty reads we assume the write lock has been downgraded. + */ + if ((LOCK_ISSET(hcp->lock) && + ((hcp->lock_mode == DB_LOCK_READ || + F_ISSET(dbp, DB_AM_READ_UNCOMMITTED)) && + mode == DB_LOCK_WRITE))) { + /* Case 3. */ + tmp_lock = hcp->lock; + LOCK_INIT(hcp->lock); + } + + /* Acquire the lock. */ + if (!LOCK_ISSET(hcp->lock)) + /* Cases 1, 3, and 4. */ + if ((ret = __ham_lock_bucket(dbc, mode)) != 0) + return (ret); + + if (ret == 0) { + hcp->lock_mode = mode; + hcp->lbucket = hcp->bucket; + /* Case 3: release the original lock. */ + if ((ret = __ENV_LPUT(dbp->env, tmp_lock)) != 0) + return (ret); + } else if (LOCK_ISSET(tmp_lock)) + hcp->lock = tmp_lock; + } + + if (ret == 0 && hcp->page == NULL) { + if (hcp->pgno == PGNO_INVALID) + hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); + if ((ret = __memp_fget(mpf, + &hcp->pgno, dbc->thread_info, dbc->txn, + (mode == DB_LOCK_WRITE ? DB_MPOOL_DIRTY : 0) | + DB_MPOOL_CREATE, &hcp->page)) != 0) + return (ret); + } + return (0); +} + +/* + * Get a new page at the cursor, putting the last page if necessary. + * If the flag is set to H_ISDUP, then we are talking about the + * duplicate page, not the main page. + * + * PUBLIC: int __ham_next_cpage __P((DBC *, db_pgno_t)); + */ +int +__ham_next_cpage(dbc, pgno) + DBC *dbc; + db_pgno_t pgno; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + PAGE *p; + int ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + + if (hcp->page != NULL && (ret = __memp_fput(mpf, + dbc->thread_info, hcp->page, dbc->priority)) != 0) + return (ret); + hcp->stream_start_pgno = PGNO_INVALID; + hcp->page = NULL; + + if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE, &p)) != 0) + return (ret); + + hcp->page = p; + hcp->pgno = pgno; + hcp->indx = 0; + + return (0); +} + +/* + * __ham_lock_bucket -- + * Get the lock on a particular bucket. + * + * PUBLIC: int __ham_lock_bucket __P((DBC *, db_lockmode_t)); + */ +int +__ham_lock_bucket(dbc, mode) + DBC *dbc; + db_lockmode_t mode; +{ + HASH_CURSOR *hcp; + db_pgno_t pgno; + int gotmeta, ret; + + hcp = (HASH_CURSOR *)dbc->internal; + gotmeta = hcp->hdr == NULL ? 1 : 0; + if (gotmeta) + if ((ret = __ham_get_meta(dbc)) != 0) + return (ret); + pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); + if (gotmeta) + if ((ret = __ham_release_meta(dbc)) != 0) + return (ret); + + ret = __db_lget(dbc, 0, pgno, mode, 0, &hcp->lock); + + hcp->lock_mode = mode; + return (ret); +} + +/* + * __ham_dpair -- + * Delete a pair on a page, paying no attention to what the pair + * represents. The caller is responsible for freeing up duplicates + * or offpage entries that might be referenced by this pair. + * + * Recovery assumes that this may be called without the metadata + * page pinned. + * + * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t)); + */ +void +__ham_dpair(dbp, p, indx) + DB *dbp; + PAGE *p; + u_int32_t indx; +{ + db_indx_t delta, n, *inp; + u_int8_t *dest, *src; + + inp = P_INP(dbp, p); + /* + * Compute "delta", the amount we have to shift all of the + * offsets. To find the delta, we just need to calculate + * the size of the pair of elements we are removing. + */ + delta = H_PAIRSIZE(dbp, p, dbp->pgsize, indx); + + /* + * The hard case: we want to remove something other than + * the last item on the page. We need to shift data and + * offsets down. + */ + if ((db_indx_t)indx != NUM_ENT(p) - 2) { + /* + * Move the data: src is the first occupied byte on + * the page. (Length is delta.) + */ + src = (u_int8_t *)p + HOFFSET(p); + + /* + * Destination is delta bytes beyond src. This might + * be an overlapping copy, so we have to use memmove. + */ + dest = src + delta; + memmove(dest, src, inp[H_DATAINDEX(indx)] - HOFFSET(p)); + } + + /* Adjust page metadata. */ + HOFFSET(p) = HOFFSET(p) + delta; + NUM_ENT(p) = NUM_ENT(p) - 2; + + /* Adjust the offsets. */ + for (n = (db_indx_t)indx; n < (db_indx_t)(NUM_ENT(p)); n++) + inp[n] = inp[n + 2] + delta; + +} + +static int +__hamc_delpg_getorder(cp, my_dbc, orderp, new_pgno, indx, args) + DBC *cp, *my_dbc; + u_int32_t *orderp; + db_pgno_t new_pgno; + u_int32_t indx; + void *args; +{ + HASH_CURSOR *hcp; + + COMPQUIET(args, NULL); + + if (cp == my_dbc || cp->dbtype != DB_HASH) + return (0); + hcp = (HASH_CURSOR *)cp->internal; + if (hcp->pgno == new_pgno && + !MVCC_SKIP_CURADJ(cp, new_pgno)) { + if (hcp->indx == indx && + F_ISSET(hcp, H_DELETED) && + hcp->order > *orderp) + *orderp = hcp->order; + } + return (0); +} + +struct __hamc_delpg_setorder_args { + db_pgno_t new_pgno; + u_int32_t order; + db_ham_mode op; + DB_TXN *my_txn; +}; + +static int +__hamc_delpg_setorder(cp, my_dbc, foundp, old_pgno, indx, vargs) + DBC *cp, *my_dbc; + u_int32_t *foundp; + db_pgno_t old_pgno; + u_int32_t indx; + void *vargs; +{ + HASH_CURSOR *hcp; + struct __hamc_delpg_setorder_args *args; + + if (cp == my_dbc || cp->dbtype != DB_HASH) + return (0); + + hcp = (HASH_CURSOR *)cp->internal; + args = vargs; + + if (hcp->pgno == old_pgno && + !MVCC_SKIP_CURADJ(cp, old_pgno)) { + switch (args->op) { + case DB_HAM_DELFIRSTPG: + /* + * We're moving all items, + * regardless of index. + */ + hcp->pgno = args->new_pgno; + + /* + * But we have to be careful of + * the order values. + */ + if (hcp->indx == indx) + hcp->order += args->order; + break; + case DB_HAM_DELMIDPG: + hcp->pgno = args->new_pgno; + DB_ASSERT(cp->dbp->env, hcp->indx == 0 && + F_ISSET(hcp, H_DELETED)); + hcp->order += args->order; + break; + case DB_HAM_DELLASTPG: + hcp->pgno = args->new_pgno; + DB_ASSERT(cp->dbp->env, hcp->indx == 0 && + F_ISSET(hcp, H_DELETED)); + hcp->indx = indx; + hcp->order += args->order; + break; + default: + return (__db_unknown_path( + cp->dbp->env, "__hamc_delpg")); + } + if (args->my_txn != NULL && cp->txn != args->my_txn) + *foundp = 1; + } + return (0); +} + +/* + * __hamc_delpg -- + * + * Adjust the cursors after we've emptied a page in a bucket, taking + * care that when we move cursors pointing to deleted items, their + * orders don't collide with the orders of cursors on the page we move + * them to (since after this function is called, cursors with the same + * index on the two pages will be otherwise indistinguishable--they'll + * all have pgno new_pgno). There are three cases: + * + * 1) The emptied page is the first page in the bucket. In this + * case, we've copied all the items from the second page into the + * first page, so the first page is new_pgno and the second page is + * old_pgno. new_pgno is empty, but can have deleted cursors + * pointing at indx 0, so we need to be careful of the orders + * there. This is DB_HAM_DELFIRSTPG. + * + * 2) The page is somewhere in the middle of a bucket. Our caller + * can just delete such a page, so it's old_pgno. old_pgno is + * empty, but may have deleted cursors pointing at indx 0, so we + * need to be careful of indx 0 when we move those cursors to + * new_pgno. This is DB_HAM_DELMIDPG. + * + * 3) The page is the last in a bucket. Again the empty page is + * old_pgno, and again it should only have cursors that are deleted + * and at indx == 0. This time, though, there's no next page to + * move them to, so we set them to indx == num_ent on the previous + * page--and indx == num_ent is the index whose cursors we need to + * be careful of. This is DB_HAM_DELLASTPG. + */ +static int +__hamc_delpg(dbc, old_pgno, new_pgno, num_ent, op, orderp) + DBC *dbc; + db_pgno_t old_pgno, new_pgno; + u_int32_t num_ent; + db_ham_mode op; + u_int32_t *orderp; +{ + DB *dbp; + DB_LSN lsn; + db_indx_t indx; + int ret; + u_int32_t found; + struct __hamc_delpg_setorder_args args; + + /* Which is the worrisome index? */ + indx = (op == DB_HAM_DELLASTPG) ? num_ent : 0; + + dbp = dbc->dbp; + + /* + * Find the highest order of any cursor our movement + * may collide with. + */ + if ((ret = __db_walk_cursors(dbp, dbc, + __hamc_delpg_getorder, &args.order, new_pgno, indx, NULL)) != 0) + return (ret); + args.order++; + + args.my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL; + args.op = op; + args.new_pgno = new_pgno; + if ((ret = __db_walk_cursors(dbp, dbc, + __hamc_delpg_setorder, &found, old_pgno, indx, &args)) != 0) + return (ret); + + if (found != 0 && DBC_LOGGING(dbc)) { + if ((ret = __ham_chgpg_log(dbp, args.my_txn, &lsn, 0, op, + old_pgno, new_pgno, indx, args.order)) != 0) + return (ret); + } + *orderp = args.order; + return (0); +} diff --git a/src/hash/hash_rec.c b/src/hash/hash_rec.c new file mode 100644 index 00000000..b434c8e1 --- /dev/null +++ b/src/hash/hash_rec.c @@ -0,0 +1,1895 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/mp.h" + +static int __ham_alloc_pages __P((DBC *, __ham_groupalloc_args *, DB_LSN *)); +static int __ham_alloc_pages_42 + __P((DBC *, __ham_groupalloc_42_args *, DB_LSN *)); +static int __ham_chgpg_recover_func + __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *)); + +/* + * __ham_insdel_recover -- + * + * PUBLIC: int __ham_insdel_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_insdel_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_insdel_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + db_indx_t dindx; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__ham_insdel_print); + REC_INTRO(__ham_insdel_read, ip, 1); + + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, + 0, &pagep)) != 0) { + if (DB_UNDO(op)) { + if (ret == DB_PAGE_NOTFOUND) + goto done; + else { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } + } + /* If the page is not here then it was later truncated. */ + if (!IS_ZERO_LSN(argp->pagelsn)) + goto done; + /* + * This page was created by a group allocation and + * the file may not have been extend yet. + * Create the page if necessary. + */ + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, + DB_MPOOL_CREATE, &pagep)) != 0) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); + + /* + * Two possible things going on: + * redo a delete/undo a put: delete the item from the page. + * redo a put/undo a delete: add the item to the page. + * If we are undoing a delete, then the information logged is the + * entire entry off the page, not just the data of a dbt. In + * this case, we want to copy it back onto the page verbatim. + * We do this by calling __insertpair with the type H_OFFPAGE instead + * of H_KEYDATA. + */ + if ((argp->opcode == DELPAIR && cmp_n == 0 && DB_UNDO(op)) || + (argp->opcode == PUTPAIR && cmp_p == 0 && DB_REDO(op))) { + /* + * Need to redo a PUT or undo a delete. + */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + dindx = (db_indx_t)argp->ndx; + if ((ret = __ham_insertpair(dbc, pagep, &dindx, &argp->key, + &argp->data, OP_MODE_GET(argp->keytype), + OP_MODE_GET(argp->datatype))) != 0) + goto out; + LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; + } else if ((argp->opcode == DELPAIR && cmp_p == 0 && DB_REDO(op)) || + (argp->opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) { + /* Need to undo a put or redo a delete. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + __ham_dpair(file_dbp, pagep, argp->ndx); + LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; + } + + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + + /* Return the previous LSN. */ +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * __ham_insdel_42_recover -- + * + * PUBLIC: int __ham_insdel_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_insdel_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_insdel_42_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + db_indx_t dindx; + u_int32_t dtype, ktype, opcode; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__ham_insdel_print); + REC_INTRO(__ham_insdel_42_read, ip, 1); + + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, + 0, &pagep)) != 0) { + if (DB_UNDO(op)) { + if (ret == DB_PAGE_NOTFOUND) + goto done; + else { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } + } + /* If the page is not here then it was later truncated. */ + if (!IS_ZERO_LSN(argp->pagelsn)) + goto done; + /* + * This page was created by a group allocation and + * the file may not have been extend yet. + * Create the page if necessary. + */ + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, + DB_MPOOL_CREATE, &pagep)) != 0) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); + + /* + * Two possible things going on: + * redo a delete/undo a put: delete the item from the page. + * redo a put/undo a delete: add the item to the page. + * If we are undoing a delete, then the information logged is the + * entire entry off the page, not just the data of a dbt. In + * this case, we want to copy it back onto the page verbatim. + * We do this by calling __insertpair with the type H_OFFPAGE instead + * of H_KEYDATA. + */ + opcode = OPCODE_OF(argp->opcode); + if ((opcode == DELPAIR && cmp_n == 0 && DB_UNDO(op)) || + (opcode == PUTPAIR && cmp_p == 0 && DB_REDO(op))) { + /* + * Need to redo a PUT or undo a delete. + */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + ktype = DB_UNDO(op) || PAIR_ISKEYBIG(argp->opcode) ? + H_OFFPAGE : H_KEYDATA; + if (PAIR_ISDATADUP(argp->opcode)) + dtype = H_DUPLICATE; + else if (DB_UNDO(op) || PAIR_ISDATABIG(argp->opcode)) + dtype = H_OFFPAGE; + else + dtype = H_KEYDATA; + dindx = (db_indx_t)argp->ndx; + if ((ret = __ham_insertpair(dbc, pagep, &dindx, + &argp->key, &argp->data, ktype, dtype)) != 0) + goto out; + LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; + } else if ((opcode == DELPAIR && cmp_p == 0 && DB_REDO(op)) || + (opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) { + /* Need to undo a put or redo a delete. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + __ham_dpair(file_dbp, pagep, argp->ndx); + LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; + } + + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + + /* Return the previous LSN. */ +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * __ham_newpage_recover -- + * This log message is used when we add/remove overflow pages. This + * message takes care of the pointer chains, not the data on the pages. + * + * PUBLIC: int __ham_newpage_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_newpage_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_newpage_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int change, cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__ham_newpage_print); + REC_INTRO(__ham_newpage_read, ip, 0); + + REC_FGET(mpf, ip, argp->new_pgno, &pagep, ppage); + change = 0; + + /* + * There are potentially three pages we need to check: the one + * that we created/deleted, the one before it and the one after + * it. + */ + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + + if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) || + (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) { + /* Redo a create new page or undo a delete new page. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, argp->new_pgno, + argp->prev_pgno, argp->next_pgno, 0, P_HASH); + change = 1; + } else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) || + (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) { + /* + * Redo a delete or undo a create new page. All we + * really need to do is change the LSN. + */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + change = 1; + } + + if (change) + LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; + + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + + /* Now do the prev page. */ +ppage: if (argp->prev_pgno != PGNO_INVALID) { + REC_FGET(mpf, ip, argp->prev_pgno, &pagep, npage); + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + change = 0; + + if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) || + (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) { + /* Redo a create new page or undo a delete new page. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->next_pgno = argp->new_pgno; + change = 1; + } else if ((cmp_p == 0 && + DB_REDO(op) && argp->opcode == DELOVFL) || + (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) { + /* Redo a delete or undo a create new page. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->next_pgno = argp->next_pgno; + change = 1; + } + + if (change) + LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn; + + if ((ret = __memp_fput(mpf, + ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + } + + /* Now time to do the next page */ +npage: if (argp->next_pgno != PGNO_INVALID) { + REC_FGET(mpf, ip, argp->next_pgno, &pagep, done); + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + change = 0; + + if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) || + (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) { + /* Redo a create new page or undo a delete new page. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->prev_pgno = argp->new_pgno; + change = 1; + } else if ((cmp_p == 0 && + DB_REDO(op) && argp->opcode == DELOVFL) || + (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) { + /* Redo a delete or undo a create new page. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + pagep->prev_pgno = argp->prev_pgno; + change = 1; + } + + if (change) + LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn; + + if ((ret = __memp_fput(mpf, + ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + } +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * __ham_replace_recover -- + * This log message refers to partial puts that are local to a single + * page. You can think of them as special cases of the more general + * insdel log message. + * + * PUBLIC: int __ham_replace_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_replace_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_replace_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DBT dbt; + PAGE *pagep; + u_int32_t change; + int cmp_n, cmp_p, is_plus, modified, off, ret; + u_int8_t *hk; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__ham_replace_print); + REC_INTRO(__ham_replace_read, ip, 0); + + REC_FGET(mpf, ip, argp->pgno, &pagep, done); + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + + memset(&dbt, 0, sizeof(dbt)); + modified = 0; + + /* + * Before we know the direction of the transformation we will + * determine the size differential; then once we know if we are + * redoing or undoing, we'll adjust the sign (is_plus) appropriately. + */ + if (argp->newitem.size > argp->olditem.size) { + change = argp->newitem.size - argp->olditem.size; + is_plus = 1; + } else { + change = argp->olditem.size - argp->newitem.size; + is_plus = 0; + } + /* + * When chaning from a "regular" record to an off page record + * the old record does not contain a header while the new record + * does and is at an offset of -1 relative to the data part of + * the record. We add this to the amount of the change (which is + * an absolute value). If we are undoing then the offset is not + * used in the placement of the data. + */ + off = argp->off; + if (off < 0 && + (OP_MODE_GET(argp->oldtype) == H_DUPLICATE || + OP_MODE_GET(argp->oldtype) == H_KEYDATA)) { + change -= (u_int32_t)off; + if (DB_UNDO(op)) + off = 0; + } + if (cmp_p == 0 && DB_REDO(op)) { + /* Reapply the change as specified. */ + dbt.data = argp->newitem.data; + dbt.size = argp->newitem.size; + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + LSN(pagep) = *lsnp; + /* + * The is_plus flag is set properly to reflect + * newitem.size - olditem.size. + */ + modified = 1; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Undo the already applied change. */ + dbt.data = argp->olditem.data; + dbt.size = argp->olditem.size; + /* + * Invert is_plus to reflect sign of + * olditem.size - newitem.size. + */ + is_plus = !is_plus; + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + LSN(pagep) = argp->pagelsn; + modified = 1; + } + + if (modified) { + __ham_onpage_replace(file_dbp, pagep, + argp->ndx, off, change, is_plus, &dbt); + if (argp->oldtype != argp->newtype) { + hk = P_ENTRY(file_dbp, pagep, argp->ndx); + if (DB_REDO(op)) + HPAGE_PTYPE(hk) = OP_MODE_GET(argp->newtype); + else + HPAGE_PTYPE(hk) = OP_MODE_GET(argp->oldtype); + } + } + + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * __ham_replace_42_recover -- + * This log message refers to partial puts that are local to a single + * page. You can think of them as special cases of the more general + * insdel log message. + * + * PUBLIC: int __ham_replace_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_replace_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_replace_42_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DBT dbt; + PAGE *pagep; + u_int32_t change; + int cmp_n, cmp_p, is_plus, modified, ret; + u_int8_t *hk; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__ham_replace_print); + REC_INTRO(__ham_replace_42_read, ip, 0); + + REC_FGET(mpf, ip, argp->pgno, &pagep, done); + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + + memset(&dbt, 0, sizeof(dbt)); + modified = 0; + + /* + * Before we know the direction of the transformation we will + * determine the size differential; then once we know if we are + * redoing or undoing, we'll adjust the sign (is_plus) appropriately. + */ + if (argp->newitem.size > argp->olditem.size) { + change = argp->newitem.size - argp->olditem.size; + is_plus = 1; + } else { + change = argp->olditem.size - argp->newitem.size; + is_plus = 0; + } + if (cmp_p == 0 && DB_REDO(op)) { + /* Reapply the change as specified. */ + dbt.data = argp->newitem.data; + dbt.size = argp->newitem.size; + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + LSN(pagep) = *lsnp; + /* + * The is_plus flag is set properly to reflect + * newitem.size - olditem.size. + */ + modified = 1; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Undo the already applied change. */ + dbt.data = argp->olditem.data; + dbt.size = argp->olditem.size; + /* + * Invert is_plus to reflect sign of + * olditem.size - newitem.size. + */ + is_plus = !is_plus; + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + LSN(pagep) = argp->pagelsn; + modified = 1; + } + + if (modified) { + __ham_onpage_replace(file_dbp, pagep, + argp->ndx, argp->off, change, is_plus, &dbt); + if (argp->makedup) { + hk = P_ENTRY(file_dbp, pagep, argp->ndx); + if (DB_REDO(op)) + HPAGE_PTYPE(hk) = H_DUPLICATE; + else + HPAGE_PTYPE(hk) = H_KEYDATA; + } + } + + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * __ham_splitdata_recover -- + * + * PUBLIC: int __ham_splitdata_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_splitdata_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_splitdata_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__ham_splitdata_print); + REC_INTRO(__ham_splitdata_read, ip, 1); + + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (DB_UNDO(op)) { + if (ret == DB_PAGE_NOTFOUND) + goto done; + else { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } + } + /* If the page is not here then it was later truncated. */ + if (!IS_ZERO_LSN(argp->pagelsn)) + goto done; + /* + * This page was created by a group allocation and + * the file may not have been extend yet. + * Create the page if necessary. + */ + if ((ret = __memp_fget(mpf, &argp->pgno, + ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + + /* + * There are three types of log messages here. Two are related + * to an actual page split operation, one for the old page + * and one for the new pages created. The original image in the + * SPLITOLD record is used for undo. The image in the SPLITNEW + * is used for redo. We should never have a case where there is + * a redo operation and the SPLITOLD record is on disk, but not + * the SPLITNEW record. Therefore, we only have work to do when + * redo NEW messages and undo OLD messages, but we have to update + * LSNs in both cases. + * + * The third message is generated when a page is sorted (SORTPAGE). In + * an undo the original image in the SORTPAGE is used. In a redo we + * recreate the sort operation by calling __ham_sort_page. + */ + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + if (argp->opcode == SPLITNEW) + /* Need to redo the split described. */ + memcpy(pagep, argp->pageimage.data, + argp->pageimage.size); + else if (argp->opcode == SORTPAGE) { + if ((ret = __ham_sort_page(dbc, NULL, pagep)) != 0) + goto out; + } + LSN(pagep) = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + if (argp->opcode == SPLITOLD || argp->opcode == SORTPAGE) { + /* Put back the old image. */ + memcpy(pagep, argp->pageimage.data, + argp->pageimage.size); + } else + P_INIT(pagep, file_dbp->pgsize, argp->pgno, + PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + LSN(pagep) = argp->pagelsn; + } + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * __ham_copypage_recover -- + * Recovery function for copypage. + * + * PUBLIC: int __ham_copypage_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_copypage_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_copypage_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__ham_copypage_print); + REC_INTRO(__ham_copypage_read, ip, 0); + + /* This is the bucket page. */ + REC_FGET(mpf, ip, argp->pgno, &pagep, donext); + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); + + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + memcpy(pagep, argp->page.data, argp->page.size); + PGNO(pagep) = argp->pgno; + PREV_PGNO(pagep) = PGNO_INVALID; + LSN(pagep) = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to undo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID, + argp->next_pgno, 0, P_HASH); + LSN(pagep) = argp->pagelsn; + } + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +donext: /* Now fix up the "next" page. */ + REC_FGET(mpf, ip, argp->next_pgno, &pagep, do_nn); + + /* For REDO just update the LSN. For UNDO copy page back. */ + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + LSN(pagep) = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to undo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + memcpy(pagep, argp->page.data, argp->page.size); + } + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + + /* Now fix up the next's next page. */ +do_nn: if (argp->nnext_pgno == PGNO_INVALID) + goto done; + + REC_FGET(mpf, ip, argp->nnext_pgno, &pagep, done); + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nnextlsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nnextlsn); + CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); + + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + PREV_PGNO(pagep) = argp->pgno; + LSN(pagep) = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to undo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + PREV_PGNO(pagep) = argp->next_pgno; + LSN(pagep) = argp->nnextlsn; + } + if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + REC_CLOSE; +} + +/* + * __ham_metagroup_recover -- + * Recovery function for metagroup. + * + * PUBLIC: int __ham_metagroup_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_metagroup_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_metagroup_args *argp; + DB_THREAD_INFO *ip; + HASH_CURSOR *hcp; + DB *file_dbp; + DBMETA *mmeta; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + db_pgno_t pgno; + int cmp_n, cmp_p, did_alloc, groupgrow, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + mmeta = NULL; + did_alloc = 0; + REC_PRINT(__ham_metagroup_print); + REC_INTRO(__ham_metagroup_read, ip, 1); + + /* + * This logs the virtual create of pages pgno to pgno + bucket. + * The log record contains: + * bucket: old maximum bucket + * pgno: page number of the new bucket. + * We round up on log calculations, so we can figure out if we are + * about to double the hash table if argp->bucket+1 is a power of 2. + * If it is, then we are allocating an entire doubling of pages, + * otherwise, we are simply allocated one new page. + */ + groupgrow = + (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == argp->bucket + 1; + pgno = argp->pgno; + if (argp->newalloc) + pgno += argp->bucket; + + pagep = NULL; + ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &pagep); + + /* If we are undoing, then we don't want to create the page. */ + if (ret != 0 && DB_REDO(op)) + ret = __memp_fget(mpf, + &pgno, ip, NULL, DB_MPOOL_CREATE, &pagep); + else if (ret == DB_PAGE_NOTFOUND) + goto do_meta; + if (ret != 0) { + if (ret != ENOSPC) + goto out; + pgno = 0; + goto do_meta; + } + + /* + * When we get here then either we did not grow the file + * (groupgrow == 0) or we did grow the file and the allocation + * of those new pages succeeded. + */ + did_alloc = groupgrow; + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); + + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + pagep->lsn = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* If this record allocated the pages give them back. */ + if (argp->newalloc) { + if (pagep != NULL && (ret = __memp_fput(mpf, + ip, pagep, DB_PRIORITY_VERY_LOW)) != 0) + goto out; + pagep = NULL; + if ((ret = __memp_ftruncate(mpf, NULL, ip, + argp->pgno, 0)) != 0) + goto out; + } else { + /* + * Otherwise just roll the page back to its + * previous state. + */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + pagep->lsn = argp->pagelsn; + } + } + if (pagep != NULL && + (ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + goto out; + + /* + * If a earlier aborted allocation used one of our pages it may + * be in the wrong state, read all the pages in the group and init + * them to be empty. + */ + if (DB_REDO(op) && argp->newalloc) { + for (pgno = argp->pgno; + pgno < argp->pgno + argp->bucket; pgno++) { + if ((ret = __memp_fget(mpf, + &pgno, ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + if (IS_ZERO_LSN(LSN(pagep))) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, + PGNO_INVALID, PGNO_INVALID, PGNO_INVALID, + 0, P_HASH); + } + if ((ret = + __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + goto out; + } + } + +do_meta: + /* Now we have to update the meta-data page. */ + hcp = (HASH_CURSOR *)dbc->internal; + if ((ret = __ham_get_meta(dbc)) != 0) + goto out; + cmp_n = LOG_COMPARE(lsnp, &hcp->hdr->dbmeta.lsn); + cmp_p = LOG_COMPARE(&hcp->hdr->dbmeta.lsn, &argp->metalsn); + CHECK_LSN(env, op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn); + CHECK_ABORT(env, op, cmp_n, &hcp->hdr->dbmeta.lsn, lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + /* Redo the actual updating of bucket counts. */ + REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr); + ++hcp->hdr->max_bucket; + if (groupgrow) { + hcp->hdr->low_mask = hcp->hdr->high_mask; + hcp->hdr->high_mask = + (argp->bucket + 1) | hcp->hdr->low_mask; + } + hcp->hdr->dbmeta.lsn = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Undo the actual updating of bucket counts. */ + REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr); + hcp->hdr->max_bucket = argp->bucket; + if (groupgrow) { + hcp->hdr->high_mask = argp->bucket; + hcp->hdr->low_mask = hcp->hdr->high_mask >> 1; + } + hcp->hdr->dbmeta.lsn = argp->metalsn; + } + + /* + * Now we need to fix up the spares array. Each entry in the + * spares array indicates the beginning page number for the + * indicated doubling. + */ + if (cmp_p == 0 && did_alloc && !DB_UNDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr); + hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] = + (argp->pgno - argp->bucket) - 1; + } + if (cmp_n == 0 && groupgrow && DB_UNDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr); + hcp->hdr->spares[ + __db_log2(argp->bucket + 1) + 1] = PGNO_INVALID; + } + + /* + * Finally, we need to potentially fix up the last_pgno field + * in the master meta-data page (which may or may not be the + * same as the hash header page). + */ + if (argp->mmpgno != argp->mpgno) { + if ((ret = __memp_fget(mpf, + &argp->mmpgno, ip, NULL, DB_MPOOL_EDIT, &mmeta)) != 0) { + if (DB_UNDO(op) && ret == DB_PAGE_NOTFOUND) + ret = 0; + goto out; + } + cmp_n = LOG_COMPARE(lsnp, &mmeta->lsn); + cmp_p = LOG_COMPARE(&mmeta->lsn, &argp->mmetalsn); + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &mmeta); + mmeta->lsn = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &mmeta); + mmeta->lsn = argp->mmetalsn; + } + } else { + mmeta = (DBMETA *)hcp->hdr; + REC_DIRTY(mpf, ip, dbc->priority, &mmeta); + } + + if (cmp_n == 0 && DB_UNDO(op)) + mmeta->last_pgno = argp->last_pgno; + else if (cmp_p == 0 && DB_REDO(op) && mmeta->last_pgno < pgno) + mmeta->last_pgno = pgno; + + if (argp->mmpgno != argp->mpgno && + (ret = __memp_fput(mpf, ip, mmeta, dbc->priority)) != 0) + goto out; + mmeta = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (mmeta != NULL) + (void)__memp_fput(mpf, ip, mmeta, dbc->priority); + if (dbc != NULL) + (void)__ham_release_meta(dbc); + + REC_CLOSE; +} + +/* + * __ham_contract_recover -- + * Recovery function for contracting a hash table + * + * PUBLIC: int __ham_contract_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_contract_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_contract_args *argp; + DB_THREAD_INFO *ip; + DB_MPOOLFILE *mpf; + DB *file_dbp; + DBC *dbc; + HASH_CURSOR *hcp; + HMETA *meta; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__ham_contract_print); + REC_INTRO(__ham_contract_read, ip, 1); + + hcp = (HASH_CURSOR *)dbc->internal; + if ((ret = __ham_get_meta(dbc)) != 0) + goto done; + meta = hcp->hdr; + cmp_n = LOG_COMPARE(lsnp, &meta->dbmeta.lsn); + cmp_p = LOG_COMPARE(&meta->dbmeta.lsn, &argp->meta_lsn); + CHECK_LSN(env, op, cmp_p, &meta->dbmeta.lsn, &argp->meta_lsn); + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr); + meta = hcp->hdr; + meta->max_bucket = argp->bucket - 1; + if (argp->bucket == meta->low_mask + 1) { + meta->spares[ + __db_log2(argp->bucket) + 1] = PGNO_INVALID; + meta->high_mask = meta->low_mask; + meta->low_mask >>= 1; + } + meta->dbmeta.lsn = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr); + meta = hcp->hdr; + meta->max_bucket = argp->bucket; + if (argp->bucket == meta->high_mask + 1) { + meta->spares[__db_log2(argp->bucket) + 1] = + argp->pgno - argp->bucket; + meta->low_mask = meta->high_mask; + meta->high_mask = meta->max_bucket | meta->low_mask; + } + meta->dbmeta.lsn = argp->meta_lsn; + } + *lsnp = argp->prev_lsn; + +out: ret = __ham_release_meta(dbc); +done: REC_CLOSE; +} + +/* + * __ham_groupalloc_recover -- + * Recover the batch creation of a set of pages for a new database. + * + * PUBLIC: int __ham_groupalloc_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_groupalloc_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_groupalloc_args *argp; + DB_THREAD_INFO *ip; + DBMETA *mmeta; + DB_MPOOLFILE *mpf; + DB *file_dbp; + DBC *dbc; + PAGE *pagep; + db_pgno_t pgno; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + mmeta = NULL; + REC_PRINT(__ham_groupalloc_print); + REC_INTRO(__ham_groupalloc_read, ip, 1); + + pgno = PGNO_BASE_MD; + if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &mmeta)) != 0) { + if (DB_REDO(op)) { + ret = __db_pgerr(file_dbp, pgno, ret); + goto out; + } else + goto done; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(mmeta)); + cmp_p = LOG_COMPARE(&LSN(mmeta), &argp->meta_lsn); + CHECK_LSN(env, op, cmp_p, &LSN(mmeta), &argp->meta_lsn); + CHECK_ABORT(env, op, cmp_n, &LSN(mmeta), lsnp); + + /* + * Basically, we used mpool to allocate a chunk of pages. + * We need to either add those to a free list (in the undo + * case) or initialize them (in the redo case). + * + * If we are redoing and this is a hash subdatabase, it's possible + * that the pages were never allocated, so we'd better check for + * that and handle it here. + */ + pgno = argp->start_pgno + argp->num - 1; + if (DB_REDO(op)) { + if ((ret = __ham_alloc_pages(dbc, argp, lsnp)) != 0) + goto out; + if (cmp_p == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &mmeta); + LSN(mmeta) = *lsnp; + } + } else if (DB_UNDO(op)) { + /* + * Fetch the last page and determine if it is in + * the post allocation state. + */ + pagep = NULL; + if ((ret = __memp_fget(mpf, &pgno, + ip, NULL, DB_MPOOL_EDIT, &pagep)) == 0) { + if (LOG_COMPARE(&pagep->lsn, lsnp) != 0) { + if ((ret = __memp_fput(mpf, ip, + pagep, DB_PRIORITY_VERY_LOW)) != 0) + goto out; + pagep = NULL; + } + } else if (ret != DB_PAGE_NOTFOUND) + goto out; + /* + * If the last page was allocated then truncate back + * to the first page. + */ + if (pagep != NULL) { + if ((ret = __memp_fput(mpf, ip, + pagep, DB_PRIORITY_VERY_LOW)) != 0) + goto out; + if ((ret = __memp_ftruncate(mpf, NULL, + ip, argp->start_pgno, 0)) != 0) + goto out; + } + + /* + * If we are rolling back the metapage, then make + * sure it reflects the the correct last_pgno. + */ + if (cmp_n == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &mmeta); + mmeta->last_pgno = argp->last_pgno; + } + pgno = 0; + if (cmp_n == 0) { + REC_DIRTY(mpf, ip, file_dbp->priority, &mmeta); + LSN(mmeta) = argp->meta_lsn; + } + } + + /* + * Set the last page number to the current value. + */ + if (pgno > mmeta->last_pgno) { + REC_DIRTY(mpf, ip, file_dbp->priority, &mmeta); + mmeta->last_pgno = pgno; + } + +done: if (ret == 0) + *lsnp = argp->prev_lsn; + ret = 0; + +out: if (mmeta != NULL) + (void)__memp_fput(mpf, ip, mmeta, file_dbp->priority); + + REC_CLOSE; +} + +/* + * __ham_alloc_pages -- + * + * Called during redo of a file create. We create new pages in the file + * using the MPOOL_NEW_GROUP flag. We then log the meta-data page with a + * __crdel_metasub message. If we manage to crash without the newly written + * pages getting to disk (I'm not sure this can happen anywhere except our + * test suite?!), then we need to go through a recreate the final pages. + * Hash normally has holes in its files and handles them appropriately. + */ +static int +__ham_alloc_pages(dbc, argp, lsnp) + DBC *dbc; + __ham_groupalloc_args *argp; + DB_LSN *lsnp; +{ + DB *file_dbp; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + db_pgno_t pgno; + int ret; + + file_dbp = dbc->dbp; + mpf = file_dbp->mpf; + ip = dbc->thread_info; + + /* Read the last page of the allocation. */ + pgno = argp->start_pgno + argp->num - 1; + + /* If the page exists, and it has been initialized, then we're done. */ + if ((ret = + __memp_fget(mpf, &pgno, ip, NULL, 0, &pagep)) == 0) { + if (NUM_ENT(pagep) == 0 && IS_ZERO_LSN(pagep->lsn)) + goto reinit_page; + return (__memp_fput(mpf, ip, pagep, dbc->priority)); + } + + /* Had to create the page. */ + if ((ret = __memp_fget(mpf, &pgno, + ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) + return (__db_pgerr(dbc->dbp, pgno, ret)); + +reinit_page: + /* Initialize the newly allocated page. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + P_INIT(pagep, dbc->dbp->pgsize, + pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + pagep->lsn = *lsnp; + +out: return (__memp_fput(mpf, ip, pagep, dbc->priority)); +} + +/* + * __ham_changeslot_recover -- + * Recovery function for changeslot. + * When we compact a hash database we may change one of the spares slots + * to point at a new block of pages. + * + * PUBLIC: int __ham_changeslot_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_changeslot_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_changeslot_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + HASH_CURSOR *hcp; + HMETA *meta; + u_int32_t bucket; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + + REC_PRINT(__ham_changeslot_print); + REC_INTRO(__ham_changeslot_read, ip, 1); + + hcp = (HASH_CURSOR *)dbc->internal; + if ((ret = __ham_get_meta(dbc)) != 0) + goto out; + meta = hcp->hdr; + cmp_n = log_compare(lsnp, &LSN(meta)); + cmp_p = log_compare(&LSN(meta), &argp->meta_lsn); + + bucket = argp->slot == 0 ? 0 : 1 << (argp->slot - 1); + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr); + meta = hcp->hdr; + meta->spares[argp->slot] = argp->new - bucket; + LSN(meta) = *lsnp; + } else if (cmp_n == 0 && !DB_REDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr); + meta = hcp->hdr; + meta->spares[argp->slot] = argp->old - bucket; + LSN(meta) = argp->meta_lsn; + } + *lsnp = argp->prev_lsn; + ret = __ham_release_meta(dbc); + +done: +out: REC_CLOSE; +} + +/* + * __ham_curadj_recover -- + * Undo cursor adjustments if a subtransaction fails. + * + * PUBLIC: int __ham_curadj_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_curadj_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_curadj_args *argp; + db_ham_curadj mode, hamc_mode; + DB_THREAD_INFO *ip; + DB_MPOOLFILE *mpf; + DB *file_dbp; + DBC *dbc; + HASH_CURSOR *hcp; + int ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__ham_curadj_print); + REC_INTRO(__ham_curadj_read, ip, 1); + + if (op != DB_TXN_ABORT) + goto done; + + mode = (db_ham_curadj)argp->add; + + /* + * Reverse the logged operation, so that the consequences are reversed + * by the __hamc_update code. + */ + switch (mode) { + case DB_HAM_CURADJ_DEL: + hamc_mode = DB_HAM_CURADJ_ADD; + break; + case DB_HAM_CURADJ_ADD: + hamc_mode = DB_HAM_CURADJ_DEL; + break; + case DB_HAM_CURADJ_ADDMOD: + hamc_mode = DB_HAM_CURADJ_DELMOD; + break; + case DB_HAM_CURADJ_DELMOD: + hamc_mode = DB_HAM_CURADJ_ADDMOD; + break; + default: + __db_errx(env, DB_STR("1122", + "Invalid flag in __ham_curadj_recover")); + ret = EINVAL; + goto out; + } + + /* + * Undo the adjustment by reinitializing the the cursor to look like + * the one that was used to do the adjustment, then we invert the + * add so that undo the adjustment. + */ + hcp = (HASH_CURSOR *)dbc->internal; + hcp->pgno = argp->pgno; + hcp->indx = argp->indx; + hcp->dup_off = argp->dup_off; + hcp->order = argp->order; + if (mode == DB_HAM_CURADJ_DEL) + F_SET(hcp, H_DELETED); + (void)__hamc_update(dbc, argp->len, hamc_mode, argp->is_dup); + +done: *lsnp = argp->prev_lsn; +out: REC_CLOSE; +} + +static int +__ham_chgpg_recover_func(cp, my_dbc, countp, pgno, indx, vargs) + DBC *cp, *my_dbc; + u_int32_t *countp; + db_pgno_t pgno; + u_int32_t indx; + void *vargs; +{ + BTREE_CURSOR *opdcp; + HASH_CURSOR *lcp; + u_int32_t order; + int ret; + __ham_chgpg_args *argp; + + COMPQUIET(my_dbc, NULL); + COMPQUIET(countp, NULL); + COMPQUIET(pgno, 0); + lcp = (HASH_CURSOR *)cp->internal; + argp = vargs; + + /* Overloaded field for DB_HAM_DEL*PG */ + order = argp->new_indx; + + switch (argp->mode) { + case DB_HAM_DELFIRSTPG: + if (lcp->pgno != argp->new_pgno || + MVCC_SKIP_CURADJ(cp, lcp->pgno)) + break; + if (lcp->indx != indx || + !F_ISSET(lcp, H_DELETED) || + lcp->order >= order) { + lcp->pgno = argp->old_pgno; + if (lcp->indx == indx) + lcp->order -= order; + } + break; + case DB_HAM_DELMIDPG: + case DB_HAM_DELLASTPG: + if (lcp->pgno == argp->new_pgno && + lcp->indx == indx && + F_ISSET(lcp, H_DELETED) && + lcp->order >= order && + !MVCC_SKIP_CURADJ(cp, lcp->pgno)) { + lcp->pgno = argp->old_pgno; + lcp->order -= order; + lcp->indx = 0; + } + break; + case DB_HAM_CHGPG: + /* + * If we're doing a CHGPG, we're undoing + * the move of a non-deleted item to a + * new page. Any cursors with the deleted + * flag set do not belong to this item; + * don't touch them. + */ + if (F_ISSET(lcp, H_DELETED)) + break; + /* FALLTHROUGH */ + case DB_HAM_SPLIT: + if (lcp->pgno == argp->new_pgno && + lcp->indx == argp->new_indx && + !MVCC_SKIP_CURADJ(cp, lcp->pgno)) { + lcp->indx = argp->old_indx; + lcp->pgno = argp->old_pgno; + } + break; + case DB_HAM_DUP: + if (lcp->opd == NULL) + break; + opdcp = (BTREE_CURSOR *)lcp->opd->internal; + if (opdcp->pgno != argp->new_pgno || + opdcp->indx != argp->new_indx || + MVCC_SKIP_CURADJ(lcp->opd, opdcp->pgno)) + break; + + if (F_ISSET(opdcp, C_DELETED)) + F_SET(lcp, H_DELETED); + /* + * We can't close a cursor while we have the + * dbp mutex locked, since c_close reacquires + * it. It should be safe to drop the mutex + * here, though, since newly opened cursors + * are put only at the end of the tailq and + * the cursor we're adjusting can't be closed + * under us. + */ + MUTEX_UNLOCK(cp->dbp->env, cp->dbp->mutex); + ret = __dbc_close(lcp->opd); + MUTEX_LOCK(cp->dbp->env, cp->dbp->mutex); + if (ret != 0) + return (ret); + lcp->opd = NULL; + break; + } + return (0); +} +/* + * __ham_chgpg_recover -- + * Undo cursor adjustments if a subtransaction fails. + * + * PUBLIC: int __ham_chgpg_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_chgpg_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_chgpg_args *argp; + DB_THREAD_INFO *ip; + DB_MPOOLFILE *mpf; + DB *file_dbp; + DBC *dbc; + int ret; + u_int32_t count; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__ham_chgpg_print); + REC_INTRO(__ham_chgpg_read, ip, 0); + + if (op != DB_TXN_ABORT) + goto done; + + ret = __db_walk_cursors(file_dbp, dbc, + __ham_chgpg_recover_func, &count, 0, argp->old_indx, argp); + +done: *lsnp = argp->prev_lsn; +out: REC_CLOSE; +} + +/* + * __ham_metagroup_recover -- + * Recovery function for metagroup. + * + * PUBLIC: int __ham_metagroup_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_metagroup_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_metagroup_42_args *argp; + DB_THREAD_INFO *ip; + HASH_CURSOR *hcp; + DB *file_dbp; + DBMETA *mmeta; + DBC *dbc; + DB_MPOOLFILE *mpf; + PAGE *pagep; + db_pgno_t pgno; + u_int32_t flags; + int cmp_n, cmp_p, did_alloc, groupgrow, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + mmeta = NULL; + did_alloc = 0; + REC_PRINT(__ham_metagroup_42_print); + REC_INTRO(__ham_metagroup_42_read, ip, 1); + + /* + * This logs the virtual create of pages pgno to pgno + bucket + * If HAVE_FTRUNCATE is not supported the mpool page-allocation is not + * transaction protected, we can never undo it. Even in an abort, + * we have to allocate these pages to the hash table if they + * were actually created. In particular, during disaster + * recovery the metapage may be before this point if we + * are rolling backward. If the file has not been extended + * then the metapage could not have been updated. + * The log record contains: + * bucket: old maximum bucket + * pgno: page number of the new bucket. + * We round up on log calculations, so we can figure out if we are + * about to double the hash table if argp->bucket+1 is a power of 2. + * If it is, then we are allocating an entire doubling of pages, + * otherwise, we are simply allocated one new page. + */ + groupgrow = + (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == argp->bucket + 1; + pgno = argp->pgno; + if (argp->newalloc) + pgno += argp->bucket; + + flags = 0; + pagep = NULL; + LF_SET(DB_MPOOL_CREATE); + ret = __memp_fget(mpf, &pgno, ip, NULL, flags, &pagep); + + if (ret != 0) { + if (ret != ENOSPC) + goto out; + pgno = 0; + goto do_meta; + } + + /* + * When we get here then either we did not grow the file + * (groupgrow == 0) or we did grow the file and the allocation + * of those new pages succeeded. + */ + did_alloc = groupgrow; + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); + CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); + + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + pagep->lsn = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* + * Otherwise just roll the page back to its + * previous state. + */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + pagep->lsn = argp->pagelsn; + } + if (pagep != NULL && + (ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + goto out; + +do_meta: + /* Now we have to update the meta-data page. */ + hcp = (HASH_CURSOR *)dbc->internal; + if ((ret = __ham_get_meta(dbc)) != 0) + goto out; + cmp_n = LOG_COMPARE(lsnp, &hcp->hdr->dbmeta.lsn); + cmp_p = LOG_COMPARE(&hcp->hdr->dbmeta.lsn, &argp->metalsn); + CHECK_LSN(env, op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn); + if (cmp_p == 0 && DB_REDO(op)) { + /* Redo the actual updating of bucket counts. */ + REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr); + ++hcp->hdr->max_bucket; + if (groupgrow) { + hcp->hdr->low_mask = hcp->hdr->high_mask; + hcp->hdr->high_mask = + (argp->bucket + 1) | hcp->hdr->low_mask; + } + hcp->hdr->dbmeta.lsn = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Undo the actual updating of bucket counts. */ + REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr); + hcp->hdr->max_bucket = argp->bucket; + if (groupgrow) { + hcp->hdr->high_mask = argp->bucket; + hcp->hdr->low_mask = hcp->hdr->high_mask >> 1; + } + hcp->hdr->dbmeta.lsn = argp->metalsn; + } + + /* + * Now we need to fix up the spares array. Each entry in the + * spares array indicates the beginning page number for the + * indicated doubling. We need to fill this in whenever the + * spares array is invalid, if we never reclaim pages then + * we have to allocate the pages to the spares array in both + * the redo and undo cases. + */ + if (did_alloc && + hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] == PGNO_INVALID) { + REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr); + hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] = + (argp->pgno - argp->bucket) - 1; + } + + /* + * Finally, we need to potentially fix up the last_pgno field + * in the master meta-data page (which may or may not be the + * same as the hash header page). + */ + if (argp->mmpgno != argp->mpgno) { + if ((ret = __memp_fget(mpf, &argp->mmpgno, ip, NULL, + DB_MPOOL_EDIT, &mmeta)) != 0) { + if (DB_UNDO(op) && ret == DB_PAGE_NOTFOUND) + ret = 0; + goto out; + } + cmp_n = LOG_COMPARE(lsnp, &mmeta->lsn); + cmp_p = LOG_COMPARE(&mmeta->lsn, &argp->mmetalsn); + if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &mmeta); + mmeta->lsn = *lsnp; + } else if (cmp_n == 0 && DB_UNDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &mmeta); + mmeta->lsn = argp->mmetalsn; + } + } else { + mmeta = (DBMETA *)hcp->hdr; + REC_DIRTY(mpf, ip, dbc->priority, &mmeta); + } + + if (mmeta->last_pgno < pgno) + mmeta->last_pgno = pgno; + + if (argp->mmpgno != argp->mpgno && + (ret = __memp_fput(mpf, ip, mmeta, dbc->priority)) != 0) + goto out; + mmeta = NULL; + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (mmeta != NULL) + (void)__memp_fput(mpf, ip, mmeta, dbc->priority); + if (dbc != NULL) + (void)__ham_release_meta(dbc); + + REC_CLOSE; +} + +/* + * __ham_groupalloc_42_recover -- + * Recover the batch creation of a set of pages for a new database. + * + * PUBLIC: int __ham_groupalloc_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__ham_groupalloc_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __ham_groupalloc_42_args *argp; + DB_THREAD_INFO *ip; + DBMETA *mmeta; + DB_MPOOLFILE *mpf; + DB *file_dbp; + DBC *dbc; + db_pgno_t pgno; + int cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + mmeta = NULL; + REC_PRINT(__ham_groupalloc_42_print); + REC_INTRO(__ham_groupalloc_42_read, ip, 1); + + pgno = PGNO_BASE_MD; + if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &mmeta)) != 0) { + if (DB_REDO(op)) { + ret = __db_pgerr(file_dbp, pgno, ret); + goto out; + } else + goto done; + } + + cmp_p = LOG_COMPARE(&LSN(mmeta), &argp->meta_lsn); + CHECK_LSN(env, op, cmp_p, &LSN(mmeta), &argp->meta_lsn); + + /* + * Basically, we used mpool to allocate a chunk of pages. + * We need to either add those to a free list (in the undo + * case) or initialize them (in the redo case). + * + * If we are redoing and this is a hash subdatabase, it's possible + * that the pages were never allocated, so we'd better check for + * that and handle it here. + */ + pgno = argp->start_pgno + argp->num - 1; + if (DB_REDO(op)) { + if ((ret = __ham_alloc_pages_42(dbc, argp, lsnp)) != 0) + goto out; + if (cmp_p == 0) { + REC_DIRTY(mpf, ip, dbc->priority, &mmeta); + LSN(mmeta) = *lsnp; + } + } else if (DB_UNDO(op)) { + /* + * We cannot roll back 4.2 style allocations. + */ + __db_errx(env, DB_STR("1123", +"Cannot replicate prepared transactions from master running release 4.2.")); + ret = __env_panic(env, EINVAL); + goto out; + } + + /* + * In both REDO and UNDO, we have grown the file and need to make + * sure that last_pgno is correct. If we HAVE_FTRUNCATE pgno + * will only be valid on REDO. + */ + if (pgno > mmeta->last_pgno) { + REC_DIRTY(mpf, ip, dbc->priority, &mmeta); + mmeta->last_pgno = pgno; + } + +done: if (ret == 0) + *lsnp = argp->prev_lsn; + ret = 0; + +out: if (mmeta != NULL) + (void)__memp_fput(mpf, ip, mmeta, dbc->priority); + + REC_CLOSE; +} + +/* + * __ham_alloc_pages_42 -- + * + * Called during redo of a file create. We create new pages in the file + * using the MPOOL_NEW_GROUP flag. We then log the meta-data page with a + * __crdel_metasub message. If we manage to crash without the newly written + * pages getting to disk (I'm not sure this can happen anywhere except our + * test suite?!), then we need to go through a recreate the final pages. + * Hash normally has holes in its files and handles them appropriately. + */ +static int +__ham_alloc_pages_42(dbc, argp, lsnp) + DBC *dbc; + __ham_groupalloc_42_args *argp; + DB_LSN *lsnp; +{ + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + db_pgno_t pgno; + int ret; + + mpf = dbc->dbp->mpf; + ip = dbc->thread_info; + + /* Read the last page of the allocation. */ + pgno = argp->start_pgno + argp->num - 1; + + /* If the page exists, and it has been initialized, then we're done. */ + if ((ret = __memp_fget(mpf, + &pgno, ip, NULL, 0, &pagep)) == 0) { + if (NUM_ENT(pagep) == 0 && IS_ZERO_LSN(pagep->lsn)) + goto reinit_page; + if ((ret = __memp_fput(mpf, + ip, pagep, dbc->priority)) != 0) + return (ret); + return (0); + } + + /* Had to create the page. */ + if ((ret = __memp_fget(mpf, &pgno, ip, NULL, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &pagep)) != 0) + return (__db_pgerr(dbc->dbp, pgno, ret)); + +reinit_page: + /* Initialize the newly allocated page. */ + P_INIT(pagep, + dbc->dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + pagep->lsn = *lsnp; + + if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + return (ret); + + return (0); +} diff --git a/src/hash/hash_reclaim.c b/src/hash/hash_reclaim.c new file mode 100644 index 00000000..c88a3695 --- /dev/null +++ b/src/hash/hash_reclaim.c @@ -0,0 +1,98 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" + +/* + * __ham_reclaim -- + * Reclaim the pages from a subdatabase and return them to the + * parent free list. For now, we link each freed page on the list + * separately. If people really store hash databases in subdatabases + * and do a lot of creates and deletes, this is going to be a problem, + * because hash needs chunks of contiguous storage. We may eventually + * need to go to a model where we maintain the free list with chunks of + * contiguous pages as well. + * + * PUBLIC: int __ham_reclaim __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *txn, u_int32_t)); + */ +int +__ham_reclaim(dbp, ip, txn, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + u_int32_t flags; +{ + DBC *dbc; + HASH_CURSOR *hcp; + int ret; + + /* Open up a cursor that we'll use for traversing. */ + if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) + return (ret); + hcp = (HASH_CURSOR *)dbc->internal; + + if ((ret = __ham_get_meta(dbc)) != 0) + goto err; + + /* Write lock the metapage for deallocations. */ + if ((ret = __ham_dirty_meta(dbc, 0)) != 0) + goto err; + + /* Avoid locking every page, we have the handle locked exclusive. */ + F_SET(dbc, DBC_DONTLOCK); + + if ((ret = __ham_traverse(dbc, DB_LOCK_WRITE, + __db_reclaim_callback, (void *)(uintptr_t)flags, 1)) != 0) + goto err; + if ((ret = __dbc_close(dbc)) != 0) + goto err; + if ((ret = __ham_release_meta(dbc)) != 0) + goto err; + return (0); + +err: if (hcp->hdr != NULL) + (void)__ham_release_meta(dbc); + (void)__dbc_close(dbc); + return (ret); +} + +/* + * __ham_truncate -- + * Reclaim the pages from a subdatabase and return them to the + * parent free list. + * + * PUBLIC: int __ham_truncate __P((DBC *, u_int32_t *)); + */ +int +__ham_truncate(dbc, countp) + DBC *dbc; + u_int32_t *countp; +{ + u_int32_t count; + int ret, t_ret; + + if ((ret = __ham_get_meta(dbc)) != 0) + return (ret); + + count = 0; + + ret = __ham_traverse(dbc, + DB_LOCK_WRITE, __db_truncate_callback, &count, 1); + + if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; + + if (countp != NULL) + *countp = count; + return (ret); +} diff --git a/src/hash/hash_stat.c b/src/hash/hash_stat.c new file mode 100644 index 00000000..afcaae14 --- /dev/null +++ b/src/hash/hash_stat.c @@ -0,0 +1,518 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/mp.h" + +#ifdef HAVE_STATISTICS +static int __ham_stat_callback __P((DBC *, PAGE *, void *, int *)); + +/* + * __ham_stat -- + * Gather/print the hash statistics + * + * PUBLIC: int __ham_stat __P((DBC *, void *, u_int32_t)); + */ +int +__ham_stat(dbc, spp, flags) + DBC *dbc; + void *spp; + u_int32_t flags; +{ + DB *dbp; + DB_HASH_STAT *sp; + DB_MPOOLFILE *mpf; + ENV *env; + HASH_CURSOR *hcp; + PAGE *h; + db_pgno_t pgno; + int ret; + + dbp = dbc->dbp; + env = dbp->env; + + mpf = dbp->mpf; + sp = NULL; + + hcp = (HASH_CURSOR *)dbc->internal; + + if ((ret = __ham_get_meta(dbc)) != 0) + goto err; + + /* Allocate and clear the structure. */ + if ((ret = __os_umalloc(env, sizeof(*sp), &sp)) != 0) + goto err; + memset(sp, 0, sizeof(*sp)); + /* Copy the fields that we have. */ + sp->hash_nkeys = hcp->hdr->dbmeta.key_count; + sp->hash_ndata = hcp->hdr->dbmeta.record_count; + /* + * Don't take the page number from the meta-data page -- that value is + * only maintained in the primary database, we may have been called on + * a subdatabase. + */ + if ((ret = __memp_get_last_pgno(dbp->mpf, &pgno)) != 0) + goto err; + sp->hash_pagecnt = pgno + 1; + sp->hash_pagesize = dbp->pgsize; + sp->hash_buckets = hcp->hdr->max_bucket + 1; + sp->hash_magic = hcp->hdr->dbmeta.magic; + sp->hash_version = hcp->hdr->dbmeta.version; + sp->hash_metaflags = hcp->hdr->dbmeta.flags; + sp->hash_ffactor = hcp->hdr->ffactor; + + if (flags == DB_FAST_STAT) + goto done; + + /* Walk the free list, counting pages. */ + for (sp->hash_free = 0, pgno = hcp->hdr->dbmeta.free; + pgno != PGNO_INVALID;) { + ++sp->hash_free; + + if ((ret = __memp_fget(mpf, + &pgno, dbc->thread_info, dbc->txn, 0, &h)) != 0) + goto err; + + pgno = h->next_pgno; + (void)__memp_fput(mpf, dbc->thread_info, h, dbc->priority); + } + + /* Now traverse the rest of the table. */ + sp->hash_nkeys = 0; + sp->hash_ndata = 0; + if ((ret = __ham_traverse(dbc, + DB_LOCK_READ, __ham_stat_callback, sp, 0)) != 0) + goto err; + + if (!F_ISSET(dbp, DB_AM_RDONLY)) { + /* + * A transaction is not required for DB->stat, so this update + * can't safely make a copy of the meta page. We have to + * update in place. + */ + if ((ret = __ham_dirty_meta(dbc, + (dbc->txn == NULL) ? DB_MPOOL_EDIT : 0)) != 0) + goto err; + hcp->hdr->dbmeta.key_count = sp->hash_nkeys; + hcp->hdr->dbmeta.record_count = sp->hash_ndata; + } + +done: if ((ret = __ham_release_meta(dbc)) != 0) + goto err; + + *(DB_HASH_STAT **)spp = sp; + return (0); + +err: if (sp != NULL) + __os_ufree(env, sp); + + if (hcp->hdr != NULL) + (void)__ham_release_meta(dbc); + + return (ret); +} + +/* + * __ham_stat_print -- + * Display hash statistics. + * + * PUBLIC: int __ham_stat_print __P((DBC *, u_int32_t)); + */ +int +__ham_stat_print(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + static const FN fn[] = { + { DB_HASH_DUP, "duplicates" }, + { DB_HASH_SUBDB, "multiple-databases" }, + { DB_HASH_DUPSORT, "sorted duplicates" }, + { 0, NULL } + }; + DB *dbp; + ENV *env; + DB_HASH_STAT *sp; + int lorder, ret; + const char *s; + + dbp = dbc->dbp; + env = dbp->env; + + if ((ret = __ham_stat(dbc, &sp, LF_ISSET(DB_FAST_STAT))) != 0) + return (ret); + + if (LF_ISSET(DB_STAT_ALL)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Default Hash database information:"); + } + __db_msg(env, "%lx\tHash magic number", (u_long)sp->hash_magic); + __db_msg(env, + "%lu\tHash version number", (u_long)sp->hash_version); + (void)__db_get_lorder(dbp, &lorder); + switch (lorder) { + case 1234: + s = "Little-endian"; + break; + case 4321: + s = "Big-endian"; + break; + default: + s = "Unrecognized byte order"; + break; + } + __db_msg(env, "%s\tByte order", s); + __db_prflags(env, NULL, sp->hash_metaflags, fn, NULL, "\tFlags"); + __db_dl(env, + "Number of pages in the database", (u_long)sp->hash_pagecnt); + __db_dl(env, + "Underlying database page size", (u_long)sp->hash_pagesize); + __db_dl(env, "Specified fill factor", (u_long)sp->hash_ffactor); + __db_dl(env, + "Number of keys in the database", (u_long)sp->hash_nkeys); + __db_dl(env, + "Number of data items in the database", (u_long)sp->hash_ndata); + + __db_dl(env, "Number of hash buckets", (u_long)sp->hash_buckets); + __db_dl_pct(env, "Number of bytes free on bucket pages", + (u_long)sp->hash_bfree, DB_PCT_PG( + sp->hash_bfree, sp->hash_buckets, sp->hash_pagesize), "ff"); + + __db_dl(env, + "Number of overflow pages", (u_long)sp->hash_bigpages); + __db_dl_pct(env, "Number of bytes free in overflow pages", + (u_long)sp->hash_big_bfree, DB_PCT_PG( + sp->hash_big_bfree, sp->hash_bigpages, sp->hash_pagesize), "ff"); + + __db_dl(env, + "Number of bucket overflow pages", (u_long)sp->hash_overflows); + __db_dl_pct(env, + "Number of bytes free in bucket overflow pages", + (u_long)sp->hash_ovfl_free, DB_PCT_PG( + sp->hash_ovfl_free, sp->hash_overflows, sp->hash_pagesize), "ff"); + + __db_dl(env, "Number of duplicate pages", (u_long)sp->hash_dup); + __db_dl_pct(env, "Number of bytes free in duplicate pages", + (u_long)sp->hash_dup_free, DB_PCT_PG( + sp->hash_dup_free, sp->hash_dup, sp->hash_pagesize), "ff"); + + __db_dl(env, + "Number of pages on the free list", (u_long)sp->hash_free); + + __os_ufree(env, sp); + + return (0); +} + +static int +__ham_stat_callback(dbc, pagep, cookie, putp) + DBC *dbc; + PAGE *pagep; + void *cookie; + int *putp; +{ + DB *dbp; + DB_BTREE_STAT bstat; + DB_HASH_STAT *sp; + db_indx_t indx, len, off, tlen, top; + u_int8_t *hk; + int ret; + + *putp = 0; + sp = cookie; + dbp = dbc->dbp; + + switch (pagep->type) { + case P_INVALID: + /* + * Hash pages may be wholly zeroed; this is not a bug. + * Obviously such pages have no data, so we can just proceed. + */ + break; + case P_HASH_UNSORTED: + case P_HASH: + /* + * We count the buckets and the overflow pages + * separately and tally their bytes separately + * as well. We need to figure out if this page + * is a bucket. + */ + if (PREV_PGNO(pagep) == PGNO_INVALID) + sp->hash_bfree += P_FREESPACE(dbp, pagep); + else { + sp->hash_overflows++; + sp->hash_ovfl_free += P_FREESPACE(dbp, pagep); + } + top = NUM_ENT(pagep); + /* Correct for on-page duplicates and deleted items. */ + for (indx = 0; indx < top; indx += P_INDX) { + switch (*H_PAIRDATA(dbp, pagep, indx)) { + case H_OFFDUP: + break; + case H_OFFPAGE: + case H_KEYDATA: + sp->hash_ndata++; + break; + case H_DUPLICATE: + tlen = LEN_HDATA(dbp, pagep, 0, indx); + hk = H_PAIRDATA(dbp, pagep, indx); + for (off = 0; off < tlen; + off += len + 2 * sizeof(db_indx_t)) { + sp->hash_ndata++; + memcpy(&len, + HKEYDATA_DATA(hk) + + off, sizeof(db_indx_t)); + } + break; + default: + return (__db_pgfmt(dbp->env, PGNO(pagep))); + } + } + sp->hash_nkeys += H_NUMPAIRS(pagep); + break; + case P_IBTREE: + case P_IRECNO: + case P_LBTREE: + case P_LRECNO: + case P_LDUP: + /* + * These are all btree pages; get a correct + * cookie and call them. Then add appropriate + * fields into our stat structure. + */ + memset(&bstat, 0, sizeof(bstat)); + if ((ret = __bam_stat_callback(dbc, pagep, &bstat, putp)) != 0) + return (ret); + sp->hash_dup++; + sp->hash_dup_free += bstat.bt_leaf_pgfree + + bstat.bt_dup_pgfree + bstat.bt_int_pgfree; + sp->hash_ndata += bstat.bt_ndata; + break; + case P_OVERFLOW: + sp->hash_bigpages++; + sp->hash_big_bfree += P_OVFLSPACE(dbp, dbp->pgsize, pagep); + break; + default: + return (__db_pgfmt(dbp->env, PGNO(pagep))); + } + + return (0); +} + +/* + * __ham_print_cursor -- + * Display the current cursor. + * + * PUBLIC: void __ham_print_cursor __P((DBC *)); + */ +void +__ham_print_cursor(dbc) + DBC *dbc; +{ + static const FN fn[] = { + { H_CONTINUE, "H_CONTINUE" }, + { H_DELETED, "H_DELETED" }, + { H_DUPONLY, "H_DUPONLY" }, + { H_EXPAND, "H_EXPAND" }, + { H_ISDUP, "H_ISDUP" }, + { H_NEXT_NODUP, "H_NEXT_NODUP" }, + { H_NOMORE, "H_NOMORE" }, + { H_OK, "H_OK" }, + { 0, NULL } + }; + ENV *env; + HASH_CURSOR *cp; + + env = dbc->env; + cp = (HASH_CURSOR *)dbc->internal; + + STAT_ULONG("Bucket traversing", cp->bucket); + STAT_ULONG("Bucket locked", cp->lbucket); + STAT_ULONG("Duplicate set offset", cp->dup_off); + STAT_ULONG("Current duplicate length", cp->dup_len); + STAT_ULONG("Total duplicate set length", cp->dup_tlen); + STAT_ULONG("Bytes needed for add", cp->seek_size); + STAT_ULONG("Page on which we can insert", cp->seek_found_page); + STAT_ULONG("Order", cp->order); + __db_prflags(env, NULL, cp->flags, fn, NULL, "\tInternal Flags"); +} + +#else /* !HAVE_STATISTICS */ + +int +__ham_stat(dbc, spp, flags) + DBC *dbc; + void *spp; + u_int32_t flags; +{ + COMPQUIET(spp, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbc->env)); +} +#endif + +/* + * __ham_traverse + * Traverse an entire hash table. We use the callback so that we + * can use this both for stat collection and for deallocation. + * + * PUBLIC: int __ham_traverse __P((DBC *, db_lockmode_t, + * PUBLIC: int (*)(DBC *, PAGE *, void *, int *), void *, int)); + */ +int +__ham_traverse(dbc, mode, callback, cookie, look_past_max) + DBC *dbc; + db_lockmode_t mode; + int (*callback) __P((DBC *, PAGE *, void *, int *)); + void *cookie; + int look_past_max; +{ + DB *dbp; + DBC *opd; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + HKEYDATA *hk; + db_pgno_t pgno, opgno; + int did_put, i, ret, t_ret; + u_int32_t bucket, spares_entry; + + dbp = dbc->dbp; + opd = NULL; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; + ret = 0; + + /* + * In a perfect world, we could simply read each page in the file + * and look at its page type to tally the information necessary. + * Unfortunately, the bucket locking that hash tables do to make + * locking easy, makes this a pain in the butt. We have to traverse + * duplicate, overflow and big pages from the bucket so that we + * don't access anything that isn't properly locked. + * + */ + for (bucket = 0;; bucket++) { + /* + * We put the loop exit condition check here, because + * it made for a really vile extended ?: that made SCO's + * compiler drop core. + * + * If look_past_max is not set, we can stop at max_bucket; + * if it is set, we need to include pages that are part of + * the current doubling but beyond the highest bucket we've + * split into, as well as pages from a "future" doubling + * that may have been created within an aborted + * transaction. To do this, keep looping (and incrementing + * bucket) until the corresponding spares array entries + * cease to be defined. + */ + if (look_past_max) { + spares_entry = __db_log2(bucket + 1); + if (spares_entry >= NCACHED || + hcp->hdr->spares[spares_entry] == 0) + break; + } else { + if (bucket > hcp->hdr->max_bucket) + break; + } + + hcp->bucket = bucket; + hcp->pgno = pgno = BUCKET_TO_PAGE(hcp, bucket); + for (ret = __ham_get_cpage(dbc, mode); ret == 0; + ret = __ham_next_cpage(dbc, pgno)) { + + /* + * If we are cleaning up pages past the max_bucket, + * then they may be on the free list and have their + * next pointers set, but they should be ignored. In + * fact, we really ought to just skip anybody who is + * not a valid page. + */ + if (TYPE(hcp->page) == P_INVALID) + break; + pgno = NEXT_PGNO(hcp->page); + + /* + * Go through each item on the page checking for + * duplicates (in which case we have to count the + * duplicate pages) or big key/data items (in which + * case we have to count those pages). + */ + for (i = 0; i < NUM_ENT(hcp->page); i++) { + hk = (HKEYDATA *)P_ENTRY(dbp, hcp->page, i); + switch (HPAGE_PTYPE(hk)) { + case H_OFFDUP: + memcpy(&opgno, HOFFDUP_PGNO(hk), + sizeof(db_pgno_t)); + if ((ret = __dbc_newopd(dbc, + opgno, NULL, &opd)) != 0) + return (ret); + if ((ret = __bam_traverse(opd, + DB_LOCK_READ, opgno, + callback, cookie)) + != 0) + goto err; + if ((ret = __dbc_close(opd)) != 0) + return (ret); + opd = NULL; + break; + case H_OFFPAGE: + /* + * We are about to get a big page + * which will use the same spot that + * the current page uses, so we need + * to restore the current page before + * looking at it again. + */ + memcpy(&opgno, HOFFPAGE_PGNO(hk), + sizeof(db_pgno_t)); + if ((ret = __db_traverse_big(dbc, + opgno, callback, cookie)) != 0) + goto err; + break; + case H_KEYDATA: + case H_DUPLICATE: + break; + default: + ret = __db_unknown_path( + dbp->env, "__ham_traverse"); + goto err; + } + } + + /* Call the callback on main pages. */ + if ((ret = callback(dbc, + hcp->page, cookie, &did_put)) != 0) + goto err; + + if (did_put) + hcp->page = NULL; + if (pgno == PGNO_INVALID) + break; + } + if (ret != 0) + goto err; + + if (hcp->page != NULL) { + if ((ret = __memp_fput(mpf, + dbc->thread_info, hcp->page, dbc->priority)) != 0) + return (ret); + hcp->page = NULL; + } + + } +err: if (opd != NULL && + (t_ret = __dbc_close(opd)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} diff --git a/src/hash/hash_stub.c b/src/hash/hash_stub.c new file mode 100644 index 00000000..414634cc --- /dev/null +++ b/src/hash/hash_stub.c @@ -0,0 +1,470 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef HAVE_HASH +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" + +/* + * If the library wasn't compiled with the Hash access method, various + * routines aren't available. Stub them here, returning an appropriate + * error. + */ + +/* + * __db_nohasham -- + * Error when a Berkeley DB build doesn't include the access method. + * + * PUBLIC: int __db_no_hash_am __P((ENV *)); + */ +int +__db_no_hash_am(env) + ENV *env; +{ + __db_errx(env, DB_STR("1133", + "library build did not include support for the Hash access method")); + return (DB_OPNOTSUP); +} + +int +__ham_30_hashmeta(dbp, real_name, obuf) + DB *dbp; + char *real_name; + u_int8_t *obuf; +{ + COMPQUIET(real_name, NULL); + COMPQUIET(obuf, NULL); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_30_sizefix(dbp, fhp, realname, metabuf) + DB *dbp; + DB_FH *fhp; + char *realname; + u_int8_t *metabuf; +{ + COMPQUIET(fhp, NULL); + COMPQUIET(realname, NULL); + COMPQUIET(metabuf, NULL); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_31_hash(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + COMPQUIET(real_name, NULL); + COMPQUIET(flags, 0); + COMPQUIET(fhp, NULL); + COMPQUIET(h, NULL); + COMPQUIET(dirtyp, NULL); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_31_hashmeta(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + COMPQUIET(real_name, NULL); + COMPQUIET(flags, 0); + COMPQUIET(fhp, NULL); + COMPQUIET(h, NULL); + COMPQUIET(dirtyp, NULL); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_46_hash(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + COMPQUIET(real_name, NULL); + COMPQUIET(flags, 0); + COMPQUIET(fhp, NULL); + COMPQUIET(h, NULL); + COMPQUIET(dirtyp, NULL); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_46_hashmeta(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + COMPQUIET(real_name, NULL); + COMPQUIET(flags, 0); + COMPQUIET(fhp, NULL); + COMPQUIET(h, NULL); + COMPQUIET(dirtyp, NULL); + return (__db_no_hash_am(dbp->env)); +} + +int +__hamc_cmp(dbc, other_dbc, result) + DBC *dbc, *other_dbc; + int *result; +{ + COMPQUIET(other_dbc, NULL); + COMPQUIET(result, NULL); + return (__db_no_hash_am(dbc->env)); +} + +int +__hamc_count(dbc, recnop) + DBC *dbc; + db_recno_t *recnop; +{ + COMPQUIET(recnop, NULL); + return (__db_no_hash_am(dbc->env)); +} + +int +__hamc_dup(orig_dbc, new_dbc) + DBC *orig_dbc, *new_dbc; +{ + COMPQUIET(new_dbc, NULL); + return (__db_no_hash_am(orig_dbc->env)); +} + +int +__hamc_init(dbc) + DBC *dbc; +{ + return (__db_no_hash_am(dbc->env)); +} + +int +__ham_db_close(dbp) + DB *dbp; +{ + COMPQUIET(dbp, NULL); + return (0); +} + +int +__ham_db_create(dbp) + DB *dbp; +{ + COMPQUIET(dbp, NULL); + return (0); +} + +int +__ham_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + COMPQUIET(env, NULL); + COMPQUIET(dtabp, NULL); + return (0); +} + +int +__ham_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + COMPQUIET(env, NULL); + COMPQUIET(dtabp, NULL); + return (0); +} + +int +__ham_meta2pgset(dbp, vdp, hmeta, flags, pgset) + DB *dbp; + VRFY_DBINFO *vdp; + HMETA *hmeta; + u_int32_t flags; + DB *pgset; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(hmeta, NULL); + COMPQUIET(flags, 0); + COMPQUIET(pgset, NULL); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_metachk(dbp, name, hashm) + DB *dbp; + const char *name; + HMETA *hashm; +{ + COMPQUIET(name, NULL); + COMPQUIET(hashm, NULL); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_metagroup_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + COMPQUIET(dbtp, NULL); + COMPQUIET(lsnp, NULL); + COMPQUIET(op, (db_recops)0); + COMPQUIET(info, NULL); + return (__db_no_hash_am(env)); +} + +int +__ham_mswap(env, pg) + ENV *env; + void *pg; +{ + COMPQUIET(pg, NULL); + return (__db_no_hash_am(env)); +} + +int +__ham_groupalloc_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + COMPQUIET(dbtp, NULL); + COMPQUIET(lsnp, NULL); + COMPQUIET(op, (db_recops)0); + COMPQUIET(info, NULL); + return (__db_no_hash_am(env)); +} + +int +__ham_new_file(dbp, ip, txn, fhp, name) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DB_FH *fhp; + const char *name; +{ + COMPQUIET(ip, NULL); + COMPQUIET(txn, NULL); + COMPQUIET(fhp, NULL); + COMPQUIET(name, NULL); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_new_subdb(mdbp, dbp, ip, txn) + DB *mdbp, *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; +{ + COMPQUIET(dbp, NULL); + COMPQUIET(txn, NULL); + COMPQUIET(ip, NULL); + return (__db_no_hash_am(mdbp->env)); +} + +int +__ham_open(dbp, ip, txn, name, base_pgno, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name; + db_pgno_t base_pgno; + u_int32_t flags; +{ + COMPQUIET(ip, NULL); + COMPQUIET(txn, NULL); + COMPQUIET(name, NULL); + COMPQUIET(base_pgno, 0); + COMPQUIET(flags, 0); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_pgin(dbp, pg, pp, cookie) + DB *dbp; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + COMPQUIET(pg, 0); + COMPQUIET(pp, NULL); + COMPQUIET(cookie, NULL); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_pgout(dbp, pg, pp, cookie) + DB *dbp; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + COMPQUIET(pg, 0); + COMPQUIET(pp, NULL); + COMPQUIET(cookie, NULL); + return (__db_no_hash_am(dbp->env)); +} + +void +__ham_print_cursor(dbc) + DBC *dbc; +{ + (void)__db_no_hash_am(dbc->env); +} + +int +__ham_quick_delete(dbc) + DBC *dbc; +{ + return (__db_no_hash_am(dbc->env)); +} + +int +__ham_reclaim(dbp, ip, txn, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + u_int32_t flags; +{ + COMPQUIET(txn, NULL); + COMPQUIET(ip, NULL); + COMPQUIET(flags, 0); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_salvage(dbp, vdp, pgno, h, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + PAGE *h; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(h, NULL); + COMPQUIET(handle, NULL); + COMPQUIET(callback, NULL); + COMPQUIET(flags, 0); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_stat(dbc, spp, flags) + DBC *dbc; + void *spp; + u_int32_t flags; +{ + COMPQUIET(spp, NULL); + COMPQUIET(flags, 0); + return (__db_no_hash_am(dbc->env)); +} + +int +__ham_stat_print(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + return (__db_no_hash_am(dbc->env)); +} + +int +__ham_truncate(dbc, countp) + DBC *dbc; + u_int32_t *countp; +{ + COMPQUIET(countp, NULL); + return (__db_no_hash_am(dbc->env)); +} + +int +__ham_vrfy(dbp, vdp, h, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *h; + db_pgno_t pgno; + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(h, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(flags, 0); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_vrfy_hashing(dbc, nentries, m, thisbucket, pgno, flags, hfunc) + DBC *dbc; + u_int32_t nentries; + HMETA *m; + u_int32_t thisbucket; + db_pgno_t pgno; + u_int32_t flags; + u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); +{ + COMPQUIET(nentries, 0); + COMPQUIET(m, NULL); + COMPQUIET(thisbucket, 0); + COMPQUIET(pgno, 0); + COMPQUIET(flags, 0); + COMPQUIET(hfunc, NULL); + return (__db_no_hash_am(dbc->dbp->env)); +} + +int +__ham_vrfy_meta(dbp, vdp, m, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + HMETA *m; + db_pgno_t pgno; + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(m, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(flags, 0); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_vrfy_structure(dbp, vdp, meta_pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t meta_pgno; + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(meta_pgno, 0); + COMPQUIET(flags, 0); + return (__db_no_hash_am(dbp->env)); +} +#endif /* !HAVE_HASH */ diff --git a/src/hash/hash_upgrade.c b/src/hash/hash_upgrade.c new file mode 100644 index 00000000..2856c3a7 --- /dev/null +++ b/src/hash/hash_upgrade.c @@ -0,0 +1,323 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" +#include "dbinc/db_upgrade.h" + +/* + * __ham_30_hashmeta -- + * Upgrade the database from version 4/5 to version 6. + * + * PUBLIC: int __ham_30_hashmeta __P((DB *, char *, u_int8_t *)); + */ +int +__ham_30_hashmeta(dbp, real_name, obuf) + DB *dbp; + char *real_name; + u_int8_t *obuf; +{ + ENV *env; + HASHHDR *oldmeta; + HMETA30 newmeta; + u_int32_t *o_spares, *n_spares; + u_int32_t fillf, i, maxb, max_entry, nelem; + int ret; + + env = dbp->env; + memset(&newmeta, 0, sizeof(newmeta)); + + oldmeta = (HASHHDR *)obuf; + + /* + * The first 32 bytes are similar. The only change is the version + * and that we removed the ovfl_point and have the page type now. + */ + + newmeta.dbmeta.lsn = oldmeta->lsn; + newmeta.dbmeta.pgno = oldmeta->pgno; + newmeta.dbmeta.magic = oldmeta->magic; + newmeta.dbmeta.version = 6; + newmeta.dbmeta.pagesize = oldmeta->pagesize; + newmeta.dbmeta.type = P_HASHMETA; + + /* Move flags */ + newmeta.dbmeta.flags = oldmeta->flags; + + /* Copy the free list, which has changed its name but works the same. */ + newmeta.dbmeta.free = oldmeta->last_freed; + + /* Copy: max_bucket, high_mask, low-mask, ffactor, nelem, h_charkey */ + newmeta.max_bucket = oldmeta->max_bucket; + newmeta.high_mask = oldmeta->high_mask; + newmeta.low_mask = oldmeta->low_mask; + newmeta.ffactor = oldmeta->ffactor; + newmeta.nelem = oldmeta->nelem; + newmeta.h_charkey = oldmeta->h_charkey; + + /* + * There was a bug in 2.X versions where the nelem could go negative. + * In general, this is considered "bad." If it does go negative + * (that is, very large and positive), we'll die trying to dump and + * load this database. So, let's see if we can fix it here. + */ + nelem = newmeta.nelem; + fillf = newmeta.ffactor; + maxb = newmeta.max_bucket; + + if ((fillf != 0 && fillf * maxb < 2 * nelem) || + (fillf == 0 && nelem > 0x8000000)) + newmeta.nelem = 0; + + /* + * We now have to convert the spares array. The old spares array + * contained the total number of extra pages allocated prior to + * the bucket that begins the next doubling. The new spares array + * contains the page number of the first bucket in the next doubling + * MINUS the bucket number of that bucket. + */ + o_spares = oldmeta->spares; + n_spares = newmeta.spares; + max_entry = __db_log2(maxb + 1); /* highest spares entry in use */ + n_spares[0] = 1; + for (i = 1; i < NCACHED && i <= max_entry; i++) + n_spares[i] = 1 + o_spares[i - 1]; + + /* Replace the unique ID. */ + if ((ret = __os_fileid(env, real_name, 1, newmeta.dbmeta.uid)) != 0) + return (ret); + + /* Overwrite the original. */ + memcpy(oldmeta, &newmeta, sizeof(newmeta)); + + return (0); +} + +/* + * __ham_30_sizefix -- + * Make sure that all hash pages belonging to the current + * hash doubling are within the bounds of the file. + * + * PUBLIC: int __ham_30_sizefix __P((DB *, DB_FH *, char *, u_int8_t *)); + */ +int +__ham_30_sizefix(dbp, fhp, realname, metabuf) + DB *dbp; + DB_FH *fhp; + char *realname; + u_int8_t *metabuf; +{ + u_int8_t buf[DB_MAX_PGSIZE]; + ENV *env; + HMETA30 *meta; + db_pgno_t last_actual, last_desired; + int ret; + size_t nw; + u_int32_t pagesize; + + env = dbp->env; + memset(buf, 0, DB_MAX_PGSIZE); + + meta = (HMETA30 *)metabuf; + pagesize = meta->dbmeta.pagesize; + + /* + * Get the last page number. To do this, we'll need dbp->pgsize + * to be set right, so slam it into place. + */ + dbp->pgsize = pagesize; + if ((ret = __db_lastpgno(dbp, realname, fhp, &last_actual)) != 0) + return (ret); + + /* + * The last bucket in the doubling is equal to high_mask; calculate + * the page number that implies. + */ + last_desired = BS_TO_PAGE(meta->high_mask, meta->spares); + + /* + * If last_desired > last_actual, we need to grow the file. Write + * a zeroed page where last_desired would go. + */ + if (last_desired > last_actual) { + if ((ret = __os_seek( + env, fhp, last_desired, pagesize, 0)) != 0) + return (ret); + if ((ret = __os_write(env, fhp, buf, pagesize, &nw)) != 0) + return (ret); + } + + return (0); +} + +/* + * __ham_31_hashmeta -- + * Upgrade the database from version 6 to version 7. + * + * PUBLIC: int __ham_31_hashmeta + * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); + */ +int +__ham_31_hashmeta(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + HMETA30 *oldmeta; + HMETA31 *newmeta; + + COMPQUIET(dbp, NULL); + COMPQUIET(real_name, NULL); + COMPQUIET(fhp, NULL); + + newmeta = (HMETA31 *)h; + oldmeta = (HMETA30 *)h; + + /* + * Copy the fields down the page. + * The fields may overlap so start at the bottom and use memmove(). + */ + memmove(newmeta->spares, oldmeta->spares, sizeof(oldmeta->spares)); + newmeta->h_charkey = oldmeta->h_charkey; + newmeta->nelem = oldmeta->nelem; + newmeta->ffactor = oldmeta->ffactor; + newmeta->low_mask = oldmeta->low_mask; + newmeta->high_mask = oldmeta->high_mask; + newmeta->max_bucket = oldmeta->max_bucket; + memmove(newmeta->dbmeta.uid, + oldmeta->dbmeta.uid, sizeof(oldmeta->dbmeta.uid)); + newmeta->dbmeta.flags = oldmeta->dbmeta.flags; + newmeta->dbmeta.record_count = 0; + newmeta->dbmeta.key_count = 0; + ZERO_LSN(newmeta->dbmeta.unused3); + + /* Update the version. */ + newmeta->dbmeta.version = 7; + + /* Upgrade the flags. */ + if (LF_ISSET(DB_DUPSORT)) + F_SET(&newmeta->dbmeta, DB_HASH_DUPSORT); + + *dirtyp = 1; + return (0); +} + +/* + * __ham_31_hash -- + * Upgrade the database hash leaf pages. + * + * PUBLIC: int __ham_31_hash + * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); + */ +int +__ham_31_hash(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + HKEYDATA *hk; + db_pgno_t pgno, tpgno; + db_indx_t indx; + int ret; + + COMPQUIET(flags, 0); + + ret = 0; + for (indx = 0; indx < NUM_ENT(h); indx += 2) { + hk = (HKEYDATA *)H_PAIRDATA(dbp, h, indx); + if (HPAGE_PTYPE(hk) == H_OFFDUP) { + memcpy(&pgno, HOFFDUP_PGNO(hk), sizeof(db_pgno_t)); + tpgno = pgno; + if ((ret = __db_31_offdup(dbp, real_name, fhp, + LF_ISSET(DB_DUPSORT) ? 1 : 0, &tpgno)) != 0) + break; + if (pgno != tpgno) { + *dirtyp = 1; + memcpy(HOFFDUP_PGNO(hk), + &tpgno, sizeof(db_pgno_t)); + } + } + } + + return (ret); +} + +/* + * __ham_46_hashmeta -- + * Upgrade the database from version 8 to version 9. + * + * PUBLIC: int __ham_46_hashmeta + * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); + */ +int +__ham_46_hashmeta(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + HMETA33 *newmeta; + + COMPQUIET(dbp, NULL); + COMPQUIET(real_name, NULL); + COMPQUIET(flags, 0); + COMPQUIET(fhp, NULL); + + newmeta = (HMETA33 *)h; + /* Update the version. */ + newmeta->dbmeta.version = 9; + *dirtyp = 1; + + return (0); +} + +/* + * __ham_46_hash -- + * Upgrade the database hash leaf pages. + * From version 8 databases to version 9. + * Involves sorting leaf pages, no format change. + * + * PUBLIC: int __ham_46_hash + * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); + */ +int +__ham_46_hash(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + DBC *dbc; + int ret, t_ret; + + COMPQUIET(real_name, NULL); + COMPQUIET(flags, 0); + COMPQUIET(fhp, NULL); + + if ((ret = __db_cursor(dbp, NULL, NULL, &dbc, 0)) != 0) + return (ret); + *dirtyp = 1; + ret = __ham_sort_page(dbc, NULL, h); + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} diff --git a/src/hash/hash_verify.c b/src/hash/hash_verify.c new file mode 100644 index 00000000..dde4e676 --- /dev/null +++ b/src/hash/hash_verify.c @@ -0,0 +1,1148 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_verify.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +static int __ham_dups_unsorted __P((DB *, u_int8_t *, u_int32_t)); +static int __ham_vrfy_bucket __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, + u_int32_t)); +static int __ham_vrfy_item __P((DB *, + VRFY_DBINFO *, db_pgno_t, PAGE *, u_int32_t, u_int32_t)); + +/* + * __ham_vrfy_meta -- + * Verify the hash-specific part of a metadata page. + * + * Note that unlike btree, we don't save things off, because we + * will need most everything again to verify each page and the + * amount of state here is significant. + * + * PUBLIC: int __ham_vrfy_meta __P((DB *, VRFY_DBINFO *, HMETA *, + * PUBLIC: db_pgno_t, u_int32_t)); + */ +int +__ham_vrfy_meta(dbp, vdp, m, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + HMETA *m; + db_pgno_t pgno; + u_int32_t flags; +{ + ENV *env; + HASH *hashp; + VRFY_PAGEINFO *pip; + int i, ret, t_ret, isbad; + u_int32_t pwr, mbucket; + u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); + + env = dbp->env; + isbad = 0; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + hashp = dbp->h_internal; + + if (hashp != NULL && hashp->h_hash != NULL) + hfunc = hashp->h_hash; + else + hfunc = __ham_func5; + + /* + * If we haven't already checked the common fields in pagezero, + * check them. + */ + if (!F_ISSET(pip, VRFY_INCOMPLETE) && + (ret = __db_vrfy_meta(dbp, vdp, &m->dbmeta, pgno, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + + /* h_charkey */ + if (!LF_ISSET(DB_NOORDERCHK)) + if (m->h_charkey != hfunc(dbp, CHARKEY, sizeof(CHARKEY))) { + EPRINT((env, DB_STR_A("1096", +"Page %lu: database has custom hash function; reverify with DB_NOORDERCHK set", + "%lu"), (u_long)pgno)); + /* + * Return immediately; this is probably a sign of user + * error rather than database corruption, so we want to + * avoid extraneous errors. + */ + isbad = 1; + goto err; + } + + /* max_bucket must be less than the last pgno. */ + if (m->max_bucket > vdp->last_pgno) { + EPRINT((env, DB_STR_A("1097", + "Page %lu: Impossible max_bucket %lu on meta page", + "%lu %lu"), (u_long)pgno, (u_long)m->max_bucket)); + /* + * Most other fields depend somehow on max_bucket, so + * we just return--there will be lots of extraneous + * errors. + */ + isbad = 1; + goto err; + } + + /* + * max_bucket, high_mask and low_mask: high_mask must be one + * less than the next power of two above max_bucket, and + * low_mask must be one less than the power of two below it. + */ + pwr = (m->max_bucket == 0) ? 1 : 1 << __db_log2(m->max_bucket + 1); + if (m->high_mask != pwr - 1) { + EPRINT((env, DB_STR_A("1098", + "Page %lu: incorrect high_mask %lu, should be %lu", + "%lu %lu %lu"), (u_long)pgno, (u_long)m->high_mask, + (u_long)pwr - 1)); + isbad = 1; + } + pwr >>= 1; + if (m->low_mask != pwr - 1) { + EPRINT((env, DB_STR_A("1099", + "Page %lu: incorrect low_mask %lu, should be %lu", + "%lu %lu %lu"), (u_long)pgno, (u_long)m->low_mask, + (u_long)pwr - 1)); + isbad = 1; + } + + /* ffactor: no check possible. */ + pip->h_ffactor = m->ffactor; + + /* + * nelem: just make sure it's not astronomical for now. This is the + * same check that hash_upgrade does, since there was a bug in 2.X + * which could make nelem go "negative". + */ + if (m->nelem > 0x80000000) { + EPRINT((env, DB_STR_A("1100", + "Page %lu: suspiciously high nelem of %lu", "%lu %lu"), + (u_long)pgno, (u_long)m->nelem)); + isbad = 1; + pip->h_nelem = 0; + } else + pip->h_nelem = m->nelem; + + /* flags */ + if (F_ISSET(&m->dbmeta, DB_HASH_DUP)) + F_SET(pip, VRFY_HAS_DUPS); + if (F_ISSET(&m->dbmeta, DB_HASH_DUPSORT)) + F_SET(pip, VRFY_HAS_DUPSORT); + /* XXX: Why is the DB_HASH_SUBDB flag necessary? */ + + /* spares array */ + for (i = 0; i < NCACHED && m->spares[i] != 0; i++) { + /* + * We set mbucket to the maximum bucket that would use a given + * spares entry; we want to ensure that it's always less + * than last_pgno. + */ + mbucket = (1 << i) - 1; + if (BS_TO_PAGE(mbucket, m->spares) > vdp->last_pgno) { + EPRINT((env, DB_STR_A("1101", + "Page %lu: spares array entry %d is invalid", + "%lu %d"), (u_long)pgno, i)); + isbad = 1; + } + } + +err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + if (LF_ISSET(DB_SALVAGE) && + (t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0) + ret = t_ret; + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +/* + * __ham_vrfy -- + * Verify hash page. + * + * PUBLIC: int __ham_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, + * PUBLIC: u_int32_t)); + */ +int +__ham_vrfy(dbp, vdp, h, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *h; + db_pgno_t pgno; + u_int32_t flags; +{ + DBC *dbc; + ENV *env; + VRFY_PAGEINFO *pip; + u_int32_t ent, himark, inpend; + db_indx_t *inp; + int isbad, ret, t_ret; + + env = dbp->env; + isbad = 0; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + if (TYPE(h) != P_HASH && TYPE(h) != P_HASH_UNSORTED) { + ret = __db_unknown_path(env, "__ham_vrfy"); + goto err; + } + + /* Verify and save off fields common to all PAGEs. */ + if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + + /* + * Verify inp[]. Each offset from 0 to NUM_ENT(h) must be lower + * than the previous one, higher than the current end of the inp array, + * and lower than the page size. + * + * In any case, we return immediately if things are bad, as it would + * be unsafe to proceed. + */ + inp = P_INP(dbp, h); + for (ent = 0, himark = dbp->pgsize, + inpend = (u_int32_t)((u_int8_t *)inp - (u_int8_t *)h); + ent < NUM_ENT(h); ent++) + if (inp[ent] >= himark) { + EPRINT((env, DB_STR_A("1102", + "Page %lu: item %lu is out of order or nonsensical", + "%lu %lu"), (u_long)pgno, (u_long)ent)); + isbad = 1; + goto err; + } else if (inpend >= himark) { + EPRINT((env, DB_STR_A("1103", + "Page %lu: entries array collided with data", + "%lu"), (u_long)pgno)); + isbad = 1; + goto err; + + } else { + himark = inp[ent]; + inpend += sizeof(db_indx_t); + if ((ret = __ham_vrfy_item( + dbp, vdp, pgno, h, ent, flags)) != 0) + goto err; + } + + if ((ret = __db_cursor_int(dbp, vdp->thread_info, NULL, DB_HASH, + PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0) + return (ret); + if (!LF_ISSET(DB_NOORDERCHK) && TYPE(h) == P_HASH && + (ret = __ham_verify_sorted_page(dbc, h)) != 0) + isbad = 1; + +err: if ((t_ret = + __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + return (ret == 0 && isbad == 1 ? DB_VERIFY_BAD : ret); +} + +/* + * __ham_vrfy_item -- + * Given a hash page and an offset, sanity-check the item itself, + * and save off any overflow items or off-page dup children as necessary. + */ +static int +__ham_vrfy_item(dbp, vdp, pgno, h, i, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + PAGE *h; + u_int32_t i, flags; +{ + HOFFDUP hod; + HOFFPAGE hop; + VRFY_CHILDINFO child; + VRFY_PAGEINFO *pip; + db_indx_t offset, len, dlen, elen; + int ret, t_ret; + u_int8_t *databuf; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + switch (HPAGE_TYPE(dbp, h, i)) { + case H_KEYDATA: + /* Nothing to do here--everything but the type field is data */ + break; + case H_DUPLICATE: + /* Are we a datum or a key? Better be the former. */ + if (i % 2 == 0) { + EPRINT((dbp->env, DB_STR_A("1104", + "Page %lu: hash key stored as duplicate item %lu", + "%lu %lu"), (u_long)pip->pgno, (u_long)i)); + } + /* + * Dups are encoded as a series within a single HKEYDATA, + * in which each dup is surrounded by a copy of its length + * on either side (so that the series can be walked in either + * direction. We loop through this series and make sure + * each dup is reasonable. + * + * Note that at this point, we've verified item i-1, so + * it's safe to use LEN_HKEYDATA (which looks at inp[i-1]). + */ + len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); + databuf = HKEYDATA_DATA(P_ENTRY(dbp, h, i)); + for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) { + memcpy(&dlen, databuf + offset, sizeof(db_indx_t)); + + /* Make sure the length is plausible. */ + if (offset + DUP_SIZE(dlen) > len) { + EPRINT((dbp->env, DB_STR_A("1105", + "Page %lu: duplicate item %lu has bad length", + "%lu %lu"), (u_long)pip->pgno, (u_long)i)); + ret = DB_VERIFY_BAD; + goto err; + } + + /* + * Make sure the second copy of the length is the + * same as the first. + */ + memcpy(&elen, + databuf + offset + dlen + sizeof(db_indx_t), + sizeof(db_indx_t)); + if (elen != dlen) { + EPRINT((dbp->env, DB_STR_A("1106", + "Page %lu: duplicate item %lu has two different lengths", + "%lu %lu"), (u_long)pip->pgno, (u_long)i)); + ret = DB_VERIFY_BAD; + goto err; + } + } + F_SET(pip, VRFY_HAS_DUPS); + if (!LF_ISSET(DB_NOORDERCHK) && + __ham_dups_unsorted(dbp, databuf, len)) + F_SET(pip, VRFY_DUPS_UNSORTED); + break; + case H_OFFPAGE: + /* Offpage item. Make sure pgno is sane, save off. */ + memcpy(&hop, P_ENTRY(dbp, h, i), HOFFPAGE_SIZE); + if (!IS_VALID_PGNO(hop.pgno) || hop.pgno == pip->pgno || + hop.pgno == PGNO_INVALID) { + EPRINT((dbp->env, DB_STR_A("1107", + "Page %lu: offpage item %lu has bad pgno %lu", + "%lu %lu %lu"), (u_long)pip->pgno, (u_long)i, + (u_long)hop.pgno)); + ret = DB_VERIFY_BAD; + goto err; + } + memset(&child, 0, sizeof(VRFY_CHILDINFO)); + child.pgno = hop.pgno; + child.type = V_OVERFLOW; + child.tlen = hop.tlen; /* This will get checked later. */ + if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0) + goto err; + break; + case H_OFFDUP: + /* Offpage duplicate item. Same drill. */ + memcpy(&hod, P_ENTRY(dbp, h, i), HOFFDUP_SIZE); + if (!IS_VALID_PGNO(hod.pgno) || hod.pgno == pip->pgno || + hod.pgno == PGNO_INVALID) { + EPRINT((dbp->env, DB_STR_A("1108", + "Page %lu: offpage item %lu has bad page number", + "%lu %lu"), (u_long)pip->pgno, (u_long)i)); + ret = DB_VERIFY_BAD; + goto err; + } + memset(&child, 0, sizeof(VRFY_CHILDINFO)); + child.pgno = hod.pgno; + child.type = V_DUPLICATE; + if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0) + goto err; + F_SET(pip, VRFY_HAS_DUPS); + break; + default: + EPRINT((dbp->env, DB_STR_A("1109", + "Page %lu: item %lu has bad type", "%lu %lu"), + (u_long)pip->pgno, (u_long)i)); + ret = DB_VERIFY_BAD; + break; + } + +err: if ((t_ret = + __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __ham_vrfy_structure -- + * Verify the structure of a hash database. + * + * PUBLIC: int __ham_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, + * PUBLIC: u_int32_t)); + */ +int +__ham_vrfy_structure(dbp, vdp, meta_pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t meta_pgno; + u_int32_t flags; +{ + DB *pgset; + DB_MPOOLFILE *mpf; + HMETA *m; + PAGE *h; + VRFY_PAGEINFO *pip; + int isbad, p, ret, t_ret; + db_pgno_t pgno; + u_int32_t bucket, spares_entry; + + mpf = dbp->mpf; + pgset = vdp->pgset; + h = NULL; + ret = isbad = 0; + + if ((ret = __db_vrfy_pgset_get(pgset, + vdp->thread_info, vdp->txn, meta_pgno, &p)) != 0) + return (ret); + if (p != 0) { + EPRINT((dbp->env, DB_STR_A("1110", + "Page %lu: Hash meta page referenced twice", "%lu"), + (u_long)meta_pgno)); + return (DB_VERIFY_BAD); + } + if ((ret = __db_vrfy_pgset_inc(pgset, + vdp->thread_info, vdp->txn, meta_pgno)) != 0) + return (ret); + + /* Get the meta page; we'll need it frequently. */ + if ((ret = __memp_fget(mpf, + &meta_pgno, vdp->thread_info, NULL, 0, &m)) != 0) + return (ret); + + /* Loop through bucket by bucket. */ + for (bucket = 0; bucket <= m->max_bucket; bucket++) + if ((ret = + __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + + /* + * There may be unused hash pages corresponding to buckets + * that have been allocated but not yet used. These may be + * part of the current doubling above max_bucket, or they may + * correspond to buckets that were used in a transaction + * that then aborted. + * + * Loop through them, as far as the spares array defines them, + * and make sure they're all empty. + * + * Note that this should be safe, since we've already verified + * that the spares array is sane. + */ + for (bucket = m->max_bucket + 1; spares_entry = __db_log2(bucket + 1), + spares_entry < NCACHED && m->spares[spares_entry] != 0; bucket++) { + pgno = BS_TO_PAGE(bucket, m->spares); + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + goto err; + + /* It's okay if these pages are totally zeroed; unmark it. */ + F_CLR(pip, VRFY_IS_ALLZEROES); + + /* It's also OK if this page is simply invalid. */ + if (pip->type == P_INVALID) { + if ((ret = __db_vrfy_putpageinfo(dbp->env, + vdp, pip)) != 0) + goto err; + continue; + } + + if (pip->type != P_HASH && pip->type != P_HASH_UNSORTED) { + EPRINT((dbp->env, DB_STR_A("1111", + "Page %lu: hash bucket %lu maps to non-hash page", + "%lu %lu"), (u_long)pgno, (u_long)bucket)); + isbad = 1; + } else if (pip->entries != 0) { + EPRINT((dbp->env, DB_STR_A("1112", + "Page %lu: non-empty page in unused hash bucket %lu", + "%lu %lu"), (u_long)pgno, (u_long)bucket)); + isbad = 1; + } else { + if ((ret = __db_vrfy_pgset_get(pgset, + vdp->thread_info, vdp->txn, pgno, &p)) != 0) + goto err; + if (p != 0) { + EPRINT((dbp->env, DB_STR_A("1113", + "Page %lu: above max_bucket referenced", + "%lu"), (u_long)pgno)); + isbad = 1; + } else { + if ((ret = __db_vrfy_pgset_inc(pgset, + vdp->thread_info, vdp->txn, pgno)) != 0) + goto err; + if ((ret = __db_vrfy_putpageinfo(dbp->env, + vdp, pip)) != 0) + goto err; + continue; + } + } + + /* If we got here, it's an error. */ + (void)__db_vrfy_putpageinfo(dbp->env, vdp, pip); + goto err; + } + +err: if ((t_ret = __memp_fput(mpf, vdp->thread_info, m, dbp->priority)) != 0) + return (t_ret); + if (h != NULL && + (t_ret = __memp_fput(mpf, vdp->thread_info, h, dbp->priority)) != 0) + return (t_ret); + return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD: ret); +} + +/* + * __ham_vrfy_bucket -- + * Verify a given bucket. + */ +static int +__ham_vrfy_bucket(dbp, vdp, m, bucket, flags) + DB *dbp; + VRFY_DBINFO *vdp; + HMETA *m; + u_int32_t bucket, flags; +{ + ENV *env; + HASH *hashp; + VRFY_CHILDINFO *child; + VRFY_PAGEINFO *mip, *pip; + int ret, t_ret, isbad, p; + db_pgno_t pgno, next_pgno; + DBC *cc; + u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); + + env = dbp->env; + isbad = 0; + pip = NULL; + cc = NULL; + + hashp = dbp->h_internal; + if (hashp != NULL && hashp->h_hash != NULL) + hfunc = hashp->h_hash; + else + hfunc = __ham_func5; + + if ((ret = __db_vrfy_getpageinfo(vdp, PGNO(m), &mip)) != 0) + return (ret); + + /* Calculate the first pgno for this bucket. */ + pgno = BS_TO_PAGE(bucket, m->spares); + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + goto err; + + /* Make sure we got a plausible page number. */ + if (pgno > vdp->last_pgno || + (pip->type != P_HASH && pip->type != P_HASH_UNSORTED)) { + EPRINT((env, DB_STR_A("1114", + "Page %lu: impossible first page in bucket %lu", "%lu %lu"), + (u_long)pgno, (u_long)bucket)); + /* Unsafe to continue. */ + isbad = 1; + goto err; + } + + if (pip->prev_pgno != PGNO_INVALID) { + EPRINT((env, DB_STR_A("1115", + "Page %lu: first page in hash bucket %lu has a prev_pgno", + "%lu %lu"), (u_long)pgno, (u_long)bucket)); + isbad = 1; + } + + /* + * Set flags for dups and sorted dups. + */ + flags |= F_ISSET(mip, VRFY_HAS_DUPS) ? DB_ST_DUPOK : 0; + flags |= F_ISSET(mip, VRFY_HAS_DUPSORT) ? DB_ST_DUPSORT : 0; + + /* Loop until we find a fatal bug, or until we run out of pages. */ + for (;;) { + /* Provide feedback on our progress to the application. */ + if (!LF_ISSET(DB_SALVAGE)) + __db_vrfy_struct_feedback(dbp, vdp); + + if ((ret = __db_vrfy_pgset_get(vdp->pgset, + vdp->thread_info, vdp->txn, pgno, &p)) != 0) + goto err; + if (p != 0) { + EPRINT((env, DB_STR_A("1116", + "Page %lu: hash page referenced twice", "%lu"), + (u_long)pgno)); + isbad = 1; + /* Unsafe to continue. */ + goto err; + } else if ((ret = __db_vrfy_pgset_inc(vdp->pgset, + vdp->thread_info, vdp->txn, pgno)) != 0) + goto err; + + /* + * Hash pages that nothing has ever hashed to may never + * have actually come into existence, and may appear to be + * entirely zeroed. This is acceptable, and since there's + * no real way for us to know whether this has actually + * occurred, we clear the "wholly zeroed" flag on every + * hash page. A wholly zeroed page, by nature, will appear + * to have no flags set and zero entries, so should + * otherwise verify correctly. + */ + F_CLR(pip, VRFY_IS_ALLZEROES); + + /* If we have dups, our meta page had better know about it. */ + if (F_ISSET(pip, VRFY_HAS_DUPS) && + !F_ISSET(mip, VRFY_HAS_DUPS)) { + EPRINT((env, DB_STR_A("1117", + "Page %lu: duplicates present in non-duplicate database", + "%lu"), (u_long)pgno)); + isbad = 1; + } + + /* + * If the database has sorted dups, this page had better + * not have unsorted ones. + */ + if (F_ISSET(mip, VRFY_HAS_DUPSORT) && + F_ISSET(pip, VRFY_DUPS_UNSORTED)) { + EPRINT((env, DB_STR_A("1118", + "Page %lu: unsorted dups in sorted-dup database", + "%lu"), (u_long)pgno)); + isbad = 1; + } + + /* Walk overflow chains and offpage dup trees. */ + if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) + goto err; + for (ret = __db_vrfy_ccset(cc, pip->pgno, &child); ret == 0; + ret = __db_vrfy_ccnext(cc, &child)) + if (child->type == V_OVERFLOW) { + if ((ret = __db_vrfy_ovfl_structure(dbp, vdp, + child->pgno, child->tlen, + flags | DB_ST_OVFL_LEAF)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + } else if (child->type == V_DUPLICATE) { + if ((ret = __db_vrfy_duptype(dbp, + vdp, child->pgno, flags)) != 0) { + isbad = 1; + continue; + } + if ((ret = __bam_vrfy_subtree(dbp, vdp, + child->pgno, NULL, NULL, + flags | DB_ST_RECNUM | DB_ST_DUPSET | DB_ST_TOPLEVEL, + NULL, NULL, NULL)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + } + /* Close the cursor on vdp, open one on dbp */ + if ((ret = __db_vrfy_ccclose(cc)) != 0) + goto err; + if ((ret = __db_cursor_int(dbp, vdp->thread_info, NULL, + DB_HASH, PGNO_INVALID, 0, DB_LOCK_INVALIDID, &cc)) != 0) + goto err; + /* If it's safe to check that things hash properly, do so. */ + if (isbad == 0 && !LF_ISSET(DB_NOORDERCHK) && + (ret = __ham_vrfy_hashing(cc, pip->entries, + m, bucket, pgno, flags, hfunc)) != 0) { + if (ret == DB_VERIFY_BAD) + isbad = 1; + else + goto err; + } + + next_pgno = pip->next_pgno; + ret = __db_vrfy_putpageinfo(env, vdp, pip); + + pip = NULL; + if (ret != 0) + goto err; + + if (next_pgno == PGNO_INVALID) + break; /* End of the bucket. */ + + /* We already checked this, but just in case... */ + if (!IS_VALID_PGNO(next_pgno)) { + EPRINT((env, DB_STR_A("1119", + "Page %lu: hash page has bad next_pgno", "%lu"), + (u_long)pgno)); + isbad = 1; + goto err; + } + + if ((ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0) + goto err; + + if (pip->prev_pgno != pgno) { + EPRINT((env, DB_STR_A("1120", + "Page %lu: hash page has bad prev_pgno", "%lu"), + (u_long)next_pgno)); + isbad = 1; + } + pgno = next_pgno; + } + +err: if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0) + ret = t_ret; + if (mip != NULL && ((t_ret = + __db_vrfy_putpageinfo(env, vdp, mip)) != 0) && ret == 0) + ret = t_ret; + if (pip != NULL && ((t_ret = + __db_vrfy_putpageinfo(env, vdp, pip)) != 0) && ret == 0) + ret = t_ret; + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +/* + * __ham_vrfy_hashing -- + * Verify that all items on a given hash page hash correctly. + * + * PUBLIC: int __ham_vrfy_hashing __P((DBC *, + * PUBLIC: u_int32_t, HMETA *, u_int32_t, db_pgno_t, u_int32_t, + * PUBLIC: u_int32_t (*) __P((DB *, const void *, u_int32_t)))); + */ +int +__ham_vrfy_hashing(dbc, nentries, m, thisbucket, pgno, flags, hfunc) + DBC *dbc; + u_int32_t nentries; + HMETA *m; + u_int32_t thisbucket; + db_pgno_t pgno; + u_int32_t flags; + u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); +{ + DB *dbp; + DBT dbt; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *h; + db_indx_t i; + int ret, t_ret, isbad; + u_int32_t hval, bucket; + + dbp = dbc->dbp; + mpf = dbp->mpf; + ret = isbad = 0; + + memset(&dbt, 0, sizeof(DBT)); + F_SET(&dbt, DB_DBT_REALLOC); + ENV_GET_THREAD_INFO(dbp->env, ip); + + if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &h)) != 0) + return (ret); + + for (i = 0; i < nentries; i += 2) { + /* + * We've already verified the page integrity and that of any + * overflow chains linked off it; it is therefore safe to use + * __db_ret. It's also not all that much slower, since we have + * to copy every hash item to deal with alignment anyway; we + * can tweak this a bit if this proves to be a bottleneck, + * but for now, take the easy route. + */ + if ((ret = __db_ret(dbc, h, i, &dbt, NULL, NULL)) != 0) + goto err; + hval = hfunc(dbp, dbt.data, dbt.size); + + bucket = hval & m->high_mask; + if (bucket > m->max_bucket) + bucket = bucket & m->low_mask; + + if (bucket != thisbucket) { + EPRINT((dbp->env, DB_STR_A("1121", + "Page %lu: item %lu hashes incorrectly", "%lu %lu"), + (u_long)pgno, (u_long)i)); + isbad = 1; + } + } + +err: if (dbt.data != NULL) + __os_ufree(dbp->env, dbt.data); + if ((t_ret = __memp_fput(mpf, ip, h, dbp->priority)) != 0) + return (t_ret); + + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); +} + +/* + * __ham_salvage -- + * Safely dump out anything that looks like a key on an alleged + * hash page. + * + * PUBLIC: int __ham_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, + * PUBLIC: void *, int (*)(void *, const void *), u_int32_t)); + */ +int +__ham_salvage(dbp, vdp, pgno, h, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + PAGE *h; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + DBT dbt, key_dbt, unkdbt; + db_pgno_t dpgno; + int ret, err_ret, t_ret; + u_int32_t himark, i, ovfl_bufsz; + u_int8_t *hk, *p; + void *buf, *key_buf; + db_indx_t dlen, len, tlen; + + memset(&dbt, 0, sizeof(DBT)); + dbt.flags = DB_DBT_REALLOC; + + DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1); + + err_ret = 0; + + /* + * Allocate a buffer for overflow items. Start at one page; + * __db_safe_goff will realloc as needed. + */ + if ((ret = __os_malloc(dbp->env, dbp->pgsize, &buf)) != 0) + return (ret); + ovfl_bufsz = dbp->pgsize; + + himark = dbp->pgsize; + for (i = 0;; i++) { + /* If we're not aggressive, break when we hit NUM_ENT(h). */ + if (!LF_ISSET(DB_AGGRESSIVE) && i >= NUM_ENT(h)) + break; + + /* + * Verify the current item. If we're beyond NUM_ENT errors are + * expected and ignored. + */ + ret = __db_vrfy_inpitem(dbp, + h, pgno, i, 0, flags, &himark, NULL); + /* If this returned a fatality, it's time to break. */ + if (ret == DB_VERIFY_FATAL) { + if (i >= NUM_ENT(h)) + ret = 0; + break; + } else if (ret != 0 && i >= NUM_ENT(h)) { + /* Not a reportable error, but don't salvage item. */ + ret = 0; + } else if (ret == 0) { + /* Set len to total entry length. */ + len = LEN_HITEM(dbp, h, dbp->pgsize, i); + hk = P_ENTRY(dbp, h, i); + if (len == 0 || len > dbp->pgsize || + (u_int32_t)(hk + len - (u_int8_t *)h) > + dbp->pgsize) { + /* Item is unsafely large; skip it. */ + err_ret = DB_VERIFY_BAD; + continue; + } + switch (HPAGE_PTYPE(hk)) { + case H_KEYDATA: + /* Update len to size of item. */ + len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); +keydata: memcpy(buf, HKEYDATA_DATA(hk), len); + dbt.size = len; + dbt.data = buf; + if ((ret = __db_vrfy_prdbt(&dbt, + 0, " ", handle, callback, 0, 0, vdp)) != 0) + err_ret = ret; + break; + case H_OFFPAGE: + if (len < HOFFPAGE_SIZE) { + err_ret = DB_VERIFY_BAD; + continue; + } + memcpy(&dpgno, + HOFFPAGE_PGNO(hk), sizeof(dpgno)); + if ((ret = __db_safe_goff(dbp, + vdp, dpgno, &dbt, &buf, + &ovfl_bufsz, flags)) != 0) { + err_ret = ret; + (void)__db_vrfy_prdbt(&unkdbt, 0, " ", + handle, callback, 0, 0, vdp); + /* fallthrough to end of case */ + } else if ((ret = __db_vrfy_prdbt(&dbt, + 0, " ", handle, callback, 0, 0, vdp)) != 0) + err_ret = ret; + break; + case H_OFFDUP: + if (len < HOFFDUP_SIZE) { + err_ret = DB_VERIFY_BAD; + continue; + } + memcpy(&dpgno, + HOFFDUP_PGNO(hk), sizeof(dpgno)); + /* UNKNOWN iff pgno is bad or we're a key. */ + if (!IS_VALID_PGNO(dpgno) || (i % 2 == 0)) { + if ((ret = + __db_vrfy_prdbt(&unkdbt, 0, " ", + handle, callback, 0, 0, vdp)) != 0) + err_ret = ret; + } else if ((ret = __db_salvage_duptree(dbp, + vdp, dpgno, &dbt, handle, callback, + flags | DB_SA_SKIPFIRSTKEY)) != 0) + err_ret = ret; + break; + case H_DUPLICATE: + /* + * This is an on-page duplicate item, iterate + * over the duplicate set, printing out + * key/data pairs. + */ + len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); + /* + * If this item is at an even index it must be + * a key item and it should never be of type + * H_DUPLICATE. If we are in aggressive mode, + * print the item out as a normal key, and let + * the user resolve the discrepancy. + */ + if (i % 2 == 0) { + err_ret = ret; + if (LF_ISSET(DB_AGGRESSIVE)) + goto keydata; + break; + } + + /* + * Check to ensure that the item size is + * greater than the smallest possible on page + * duplicate. + */ + if (len < + HKEYDATA_SIZE(2 * sizeof(db_indx_t))) { + err_ret = DB_VERIFY_BAD; + continue; + } + + /* + * Copy out the key from the dbt, it is still + * present from the previous pass. + */ + memset(&key_dbt, 0, sizeof(key_dbt)); + if ((ret = __os_malloc( + dbp->env, dbt.size, &key_buf)) != 0) + return (ret); + memcpy(key_buf, buf, dbt.size); + key_dbt.data = key_buf; + key_dbt.size = dbt.size; + key_dbt.flags = DB_DBT_USERMEM; + + /* Loop until we hit the total length. */ + for (tlen = 0; tlen + sizeof(db_indx_t) < len; + tlen += dlen + 2 * sizeof(db_indx_t)) { + /* + * Print the key for every duplicate + * item. Except the first dup, since + * the key was already output once by + * the previous iteration. + */ + if (tlen != 0) { + if ((ret = __db_vrfy_prdbt( + &key_dbt, 0, " ", handle, + callback, 0, 0, vdp)) != 0) + err_ret = ret; + } + p = HKEYDATA_DATA(hk) + tlen; + memcpy(&dlen, p, sizeof(db_indx_t)); + p += sizeof(db_indx_t); + /* + * If dlen is too long, print all the + * rest of the dup set in a chunk. + */ + if (dlen + tlen + sizeof(db_indx_t) > + len) { + dlen = len - + (tlen + sizeof(db_indx_t)); + err_ret = DB_VERIFY_BAD; + } + memcpy(buf, p, dlen); + dbt.size = dlen; + dbt.data = buf; + if ((ret = __db_vrfy_prdbt(&dbt, 0, " ", + handle, callback, 0, 0, vdp)) != 0) + err_ret = ret; + } + __os_free(dbp->env, key_buf); + break; + default: + if (!LF_ISSET(DB_AGGRESSIVE)) + break; + err_ret = DB_VERIFY_BAD; + break; + } + } + } + + __os_free(dbp->env, buf); + if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) + return (t_ret); + return ((ret == 0 && err_ret != 0) ? err_ret : ret); +} + +/* + * __ham_meta2pgset -- + * Return the set of hash pages corresponding to the given + * known-good meta page. + * + * PUBLIC: int __ham_meta2pgset __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, + * PUBLIC: DB *)); + */ +int +__ham_meta2pgset(dbp, vdp, hmeta, flags, pgset) + DB *dbp; + VRFY_DBINFO *vdp; + HMETA *hmeta; + u_int32_t flags; + DB *pgset; +{ + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *h; + db_pgno_t pgno; + u_int32_t bucket, totpgs; + int ret, val; + + /* + * We don't really need flags, but leave them for consistency with + * __bam_meta2pgset. + */ + COMPQUIET(flags, 0); + ip = vdp->thread_info; + + DB_ASSERT(dbp->env, pgset != NULL); + + mpf = dbp->mpf; + totpgs = 0; + + /* + * Loop through all the buckets, pushing onto pgset the corresponding + * page(s) for each one. + */ + for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) { + pgno = BS_TO_PAGE(bucket, hmeta->spares); + + /* + * We know the initial pgno is safe because the spares array has + * been verified. + * + * Safely walk the list of pages in this bucket. + */ + for (;;) { + if ((ret = + __memp_fget(mpf, &pgno, ip, NULL, 0, &h)) != 0) + return (ret); + if (TYPE(h) == P_HASH || TYPE(h) == P_HASH_UNSORTED) { + + /* + * Make sure we don't go past the end of + * pgset. + */ + if (++totpgs > vdp->last_pgno) { + (void)__memp_fput(mpf, + ip, h, dbp->priority); + return (DB_VERIFY_BAD); + } + if ((ret = __db_vrfy_pgset_inc(pgset, + vdp->thread_info, vdp->txn, pgno)) != 0) { + (void)__memp_fput(mpf, + ip, h, dbp->priority); + return (ret); + } + + pgno = NEXT_PGNO(h); + } else + pgno = PGNO_INVALID; + + if ((ret = __memp_fput(mpf, ip, h, dbp->priority)) != 0) + return (ret); + + /* If the new pgno is wonky, go onto the next bucket. */ + if (!IS_VALID_PGNO(pgno) || + pgno == PGNO_INVALID) + break; + + /* + * If we've touched this page before, we have a cycle; + * go on to the next bucket. + */ + if ((ret = __db_vrfy_pgset_get(pgset, + vdp->thread_info, vdp->txn, pgno, &val)) != 0) + return (ret); + if (val != 0) + break; + } + } + return (0); +} + +/* + * __ham_dups_unsorted -- + * Takes a known-safe hash duplicate set and its total length. + * Returns 1 if there are out-of-order duplicates in this set, + * 0 if there are not. + */ +static int +__ham_dups_unsorted(dbp, buf, len) + DB *dbp; + u_int8_t *buf; + u_int32_t len; +{ + DBT a, b; + db_indx_t offset, dlen; + int (*func) __P((DB *, const DBT *, const DBT *)); + + memset(&a, 0, sizeof(DBT)); + memset(&b, 0, sizeof(DBT)); + + func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare; + + /* + * Loop through the dup set until we hit the end or we find + * a pair of dups that's out of order. b is always the current + * dup, a the one before it. + */ + for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) { + memcpy(&dlen, buf + offset, sizeof(db_indx_t)); + b.data = buf + offset + sizeof(db_indx_t); + b.size = dlen; + + if (a.data != NULL && func(dbp, &a, &b) > 0) + return (1); + + a.data = b.data; + a.size = b.size; + } + + return (0); +} diff --git a/src/heap/heap.c b/src/heap/heap.c new file mode 100644 index 00000000..3d70bf40 --- /dev/null +++ b/src/heap/heap.c @@ -0,0 +1,2530 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/heap.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +static int __heap_bulk __P((DBC *, DBT *, u_int32_t)); +static int __heap_getpage __P((DBC *, u_int32_t, u_int8_t *)); +static int __heapc_close __P((DBC *, db_pgno_t, int *)); +static int __heapc_del __P((DBC *, u_int32_t)); +static int __heapc_destroy __P((DBC *)); +static int __heapc_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); +static int __heapc_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); +static int __heapc_reloc __P((DBC *, DBT *, DBT *)); +static int __heapc_reloc_partial __P((DBC *, DBT *, DBT *)); +static int __heapc_split __P((DBC *, DBT *, DBT *, int)); + +/* + * Acquire a new page/lock. If we are already holding a page and a lock + * we discard those and get the new ones. In this case we can use + * LCK_COUPLE to save trips to lock manager. If we are not holding a page or + * locks, we just get a new lock and page. Lock release done with a + * transactional lock put. + */ +#undef ACQUIRE +#define ACQUIRE(dbc, mode, lpgno, lock, fpgno, pagep, flags, mflags, ret) do { \ + DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \ + if ((pagep) != NULL) { \ + ret = __memp_fput(__mpf, \ + (dbc)->thread_info, pagep, dbc->priority); \ + pagep = NULL; \ + } \ + if ((ret) == 0 && STD_LOCKING(dbc)) \ + ret = __db_lget(dbc, \ + LOCK_ISSET(lock) ? LCK_COUPLE : 0, \ + lpgno, mode, flags, &(lock)); \ + if ((ret) == 0) \ + ret = __memp_fget(__mpf, &(fpgno), \ + (dbc)->thread_info, (dbc)->txn, mflags, &(pagep)); \ +} while (0) + +/* Acquire a new page/lock for a heap cursor */ +#undef ACQUIRE_CUR +#define ACQUIRE_CUR(dbc, mode, p, flags, mflags, ret) do { \ + HEAP_CURSOR *__cp = (HEAP_CURSOR *)(dbc)->internal; \ + if (p != __cp->pgno) \ + __cp->pgno = PGNO_INVALID; \ + ACQUIRE(dbc, mode, p, __cp->lock, p, __cp->page, flags, mflags, ret); \ + if ((ret) == 0) { \ + __cp->pgno = p; \ + __cp->lock_mode = (mode); \ + } \ +} while (0) + +/* Discard the current page/lock for a cursor, indicate txn lock release */ +#undef DISCARD +#define DISCARD(dbc, pagep, lock, tlock, ret) do { \ + DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \ + int __t_ret; \ + __t_ret = 0; \ + if ((pagep) != NULL) { \ + __t_ret = __memp_fput(__mpf, \ + (dbc)->thread_info, pagep, dbc->priority); \ + pagep = NULL; \ + } \ + if (__t_ret != 0 && (ret) == 0) \ + ret = __t_ret; \ + if (tlock == 1) \ + __t_ret = __TLPUT((dbc), lock); \ + else \ + __t_ret = __LPUT((dbc), lock); \ + if (__t_ret != 0 && (ret) == 0) \ + ret = __t_ret; \ +} while (0) + +/* + * __heapc_init -- + * Initialize the access private portion of a cursor + * + * PUBLIC: int __heapc_init __P((DBC *)); + */ +int +__heapc_init(dbc) + DBC *dbc; +{ + ENV *env; + int ret; + + env = dbc->env; + + if (dbc->internal == NULL) + if ((ret = __os_calloc( + env, 1, sizeof(HEAP_CURSOR), &dbc->internal)) != 0) + return (ret); + + /* Initialize methods. */ + dbc->close = dbc->c_close = __dbc_close_pp; + dbc->cmp = __dbc_cmp_pp; + dbc->count = dbc->c_count = __dbc_count_pp; + dbc->del = dbc->c_del = __dbc_del_pp; + dbc->dup = dbc->c_dup = __dbc_dup_pp; + dbc->get = dbc->c_get = __dbc_get_pp; + dbc->pget = dbc->c_pget = __dbc_pget_pp; + dbc->put = dbc->c_put = __dbc_put_pp; + dbc->am_bulk = __heap_bulk; + dbc->am_close = __heapc_close; + dbc->am_del = __heapc_del; + dbc->am_destroy = __heapc_destroy; + dbc->am_get = __heapc_get; + dbc->am_put = __heapc_put; + dbc->am_writelock = NULL; + + return (0); +} + +static int +__heap_bulk(dbc, data, flags) + DBC *dbc; + DBT *data; + u_int32_t flags; +{ + COMPQUIET(dbc, NULL); + COMPQUIET(data, NULL); + COMPQUIET(flags, 0); + + return (EINVAL); +} + +static int +__heapc_close(dbc, root_pgno, rmroot) + DBC *dbc; + db_pgno_t root_pgno; + int *rmroot; +{ + DB_MPOOLFILE *mpf; + HEAP_CURSOR *cp; + int ret; + + COMPQUIET(root_pgno, 0); + COMPQUIET(rmroot, 0); + + cp = (HEAP_CURSOR *)dbc->internal; + mpf = dbc->dbp->mpf; + ret = 0; + + /* Release the page/lock held by the cursor. */ + DISCARD(dbc, cp->page, cp->lock, 1, ret); + if (ret == 0 && !LOCK_ISSET(cp->lock)) + cp->lock_mode = DB_LOCK_NG; + + return (ret); +} + +static int +__heapc_del(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + DB *dbp; + DB_HEAP_RID next_rid, orig_rid; + DB_MPOOLFILE *mpf; + DBT hdr_dbt, log_dbt; + HEAP *h; + HEAPHDR *hdr; + HEAPPG *rpage; + HEAP_CURSOR *cp; + db_pgno_t region_pgno; + int oldspacebits, ret, spacebits, t_ret; + u_int16_t data_size, size; + + dbp = dbc->dbp; + mpf = dbp->mpf; + h = dbp->heap_internal; + cp = (HEAP_CURSOR *)dbc->internal; + rpage = NULL; + COMPQUIET(flags, 0); + + /* + * We need to be able to reset the cursor after deleting a record split + * across multiple pages. + */ + orig_rid.pgno = cp->pgno; + orig_rid.indx = cp->indx; + + /* + * This code is always called with a page lock but no page. + */ + DB_ASSERT(dbp->env, cp->page == NULL); + + /* We have a read lock, but need a write lock. */ +start: if (STD_LOCKING(dbc) && (ret = __db_lget(dbc, + LCK_COUPLE, cp->pgno, DB_LOCK_WRITE, 0, &cp->lock)) != 0) + return (ret); + + if ((ret = __memp_fget(mpf, &cp->pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &cp->page)) != 0) + return (ret); + + HEAP_CALCSPACEBITS(dbp, HEAP_FREESPACE(dbp, cp->page), oldspacebits); + + hdr = (HEAPHDR *)P_ENTRY(dbp, cp->page, cp->indx); + data_size = DB_ALIGN(hdr->size, sizeof(u_int32_t)); + size = data_size + HEAP_HDRSIZE(hdr); + if (size < sizeof(HEAPSPLITHDR)) + size = sizeof(HEAPSPLITHDR); + if (F_ISSET(hdr, HEAP_RECSPLIT) && !F_ISSET(hdr, HEAP_RECLAST)) { + next_rid.pgno = F_ISSET(hdr, HEAP_RECLAST) ? + PGNO_INVALID : ((HEAPSPLITHDR *)hdr)->nextpg; + next_rid.indx = F_ISSET(hdr, HEAP_RECLAST) ? + PGNO_INVALID : ((HEAPSPLITHDR *)hdr)->nextindx; + } else { + next_rid.pgno = PGNO_INVALID; + next_rid.indx = 0; + } + + /* Log the deletion. */ + if (DBC_LOGGING(dbc)) { + hdr_dbt.data = hdr; + hdr_dbt.size = HEAP_HDRSIZE(hdr); + log_dbt.data = (u_int8_t *)hdr + hdr_dbt.size; + log_dbt.size = data_size; + if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page), + 0, DB_REM_HEAP, cp->pgno, (u_int32_t)cp->indx, + size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + + if ((ret = __heap_ditem(dbc, cp->page, cp->indx, size)) != 0) + goto err; + + /* + * If the deleted item lived in a region prior to our current, back up + * the current region, giving us a chance to reuse the newly available + * space on the next insert. + */ + region_pgno = HEAP_REGION_PGNO(dbp, cp->pgno); + if (region_pgno < h->curregion) + h->curregion = region_pgno; + + HEAP_CALCSPACEBITS(dbp, HEAP_FREESPACE(dbp, cp->page), spacebits); + + if (spacebits != oldspacebits) { + /* + * Get the region page. We never lock the region page, the data + * page lock locks the corresponding bits in the bitmap and + * latching serializes access. + */ + if ((ret = __memp_fget(mpf, ®ion_pgno, + dbc->thread_info, NULL, DB_MPOOL_DIRTY, &rpage)) != 0) + goto err; + HEAP_SETSPACE(dbp, rpage, + cp->pgno - region_pgno - 1, spacebits); + } + +err: if (rpage != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, rpage, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + rpage = NULL; + + if ((t_ret = __memp_fput(mpf, + dbc->thread_info, cp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + cp->page = NULL; + + if (ret == 0 && next_rid.pgno != PGNO_INVALID) { + cp->pgno = next_rid.pgno; + cp->indx = next_rid.indx; + goto start; + } + + cp->pgno = orig_rid.pgno; + cp->indx = orig_rid.indx; + + return (ret); +} + +/* + * __heap_ditem -- + * Remove an item from a page. + * + * PUBLIC: int __heap_ditem + * PUBLIC: __P((DBC *, PAGE *, u_int32_t, u_int32_t)); + */ +int +__heap_ditem(dbc, pagep, indx, nbytes) + DBC *dbc; + PAGE *pagep; + u_int32_t indx, nbytes; +{ + DB *dbp; + db_indx_t first, i, max, off, *offtbl, span; + u_int8_t *src, *dest; + + dbp = dbc->dbp; + + DB_ASSERT(dbp->env, TYPE(pagep) == P_HEAP); + DB_ASSERT(dbp->env, nbytes == DB_ALIGN(nbytes, sizeof(u_int32_t))); + DB_ASSERT(dbp->env, nbytes >= sizeof(HEAPSPLITHDR)); + + offtbl = (db_indx_t *)HEAP_OFFSETTBL(dbp, pagep); + off = offtbl[indx]; + /* + * Find the lowest offset on the page, and adjust offsets that are about + * to be moved. If the deleted item is the lowest offset on the page, + + * everything will work, that is not a special case. + */ + max = HEAP_HIGHINDX(pagep); + first = HOFFSET(pagep); + for (i = 0; i <= max; i++) { + if (offtbl[i] < off && offtbl[i] != 0) + offtbl[i] += nbytes; + } + offtbl[indx] = 0; + + /* + * Coalesce free space at the beginning of the page. Shift all the data + * preceding the deleted entry down, overwriting the deleted entry. + */ + src = (u_int8_t *)(pagep) + first; + dest = src + nbytes; + span = off - first; + memmove(dest, src, span); +#ifdef DIAGNOSTIC + memset(src, CLEAR_BYTE, nbytes); +#endif + + /* Update the page's metadata. */ + NUM_ENT(pagep)--; + HOFFSET(pagep) += nbytes; + if (indx < HEAP_FREEINDX(pagep)) + HEAP_FREEINDX(pagep) = indx; + while (HEAP_HIGHINDX(pagep) > 0 && offtbl[HEAP_HIGHINDX(pagep)] == 0) + HEAP_HIGHINDX(pagep)--; + if (NUM_ENT(pagep) == 0) + HEAP_FREEINDX(pagep) = 0; + else if (HEAP_FREEINDX(pagep) > HEAP_HIGHINDX(pagep) + 1) + HEAP_FREEINDX(pagep) = HEAP_HIGHINDX(pagep) + 1; + + return (0); +} + +static int +__heapc_destroy(dbc) + DBC *dbc; +{ + HEAP_CURSOR *cp; + + cp = (HEAP_CURSOR *)dbc->internal; + __os_free(dbc->env, cp); + dbc->internal = NULL; + + return (0); +} + +/* + * __heapc_get -- + * Get using a cursor (heap). + */ +static int +__heapc_get(dbc, key, data, flags, pgnop) + DBC *dbc; + DBT *key; + DBT *data; + u_int32_t flags; + db_pgno_t *pgnop; +{ + DB *dbp; + DB_HEAP_RID rid; + DB_MPOOLFILE *mpf; + DB_LOCK meta_lock; + DBT tmp_val; + HEAP *h; + HEAPHDR *hdr; + HEAPMETA *meta; + HEAPPG *dpage; + HEAP_CURSOR *cp; + db_lockmode_t lock_type; + db_pgno_t pgno; + int cmp, f_indx, found, getpage, indx, ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + h = dbp->heap_internal; + cp = (HEAP_CURSOR *)dbc->internal; + LOCK_INIT(meta_lock); + COMPQUIET(pgnop, NULL); + + if (F_ISSET(key, DB_DBT_USERMEM) && key->ulen < DB_HEAP_RID_SZ) { + key->size = DB_HEAP_RID_SZ; + return (DB_BUFFER_SMALL); + } + + /* Check for additional bits for locking */ + if (F_ISSET(dbc, DBC_RMW)) + lock_type = DB_LOCK_WRITE; + else + lock_type = DB_LOCK_READ; + + ret = 0; + found = getpage = FALSE; + meta = NULL; + dpage = NULL; + switch (flags) { + case DB_CURRENT: + + /* + * Acquire the current page with read lock unless user + * has asked for a write lock. Ensure page and record + * exist still. + */ + ACQUIRE_CUR(dbc, lock_type, cp->pgno, 0, 0, ret); + if (ret != 0) { + if (ret == DB_PAGE_NOTFOUND) + ret = DB_NOTFOUND; + goto err; + } + + if (HEAP_OFFSETTBL(dbp, cp->page)[cp->indx] == 0) { + ret = DB_NOTFOUND; + goto err; + } + dpage = (HEAPPG *)cp->page; + hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, cp->indx); + if (F_ISSET(hdr, HEAP_RECSPLIT) && + !F_ISSET(hdr, HEAP_RECFIRST)) { + ret = DB_NOTFOUND; + goto err; + } + + break; + case DB_FIRST: + /* + * The region pages do not distinguish between an empty + * page and page with a something on it. So, we will + * grab the first possible data page and look for the + * lowest index with data. If page is empty we go on to + * the next page and look. If no page, then no records. + */ +first: pgno = FIRST_HEAP_DPAGE; + while (!found) { + /* Put old lock/page and get the new lock/page */ + ACQUIRE_CUR(dbc, lock_type, pgno, 0, 0, ret); + if (ret != 0 ) { + if (ret == DB_PAGE_NOTFOUND) + ret = DB_NOTFOUND; + goto err; + } + dpage = (HEAPPG *)cp->page; + /* + * The page needs to be a data page with entries on + * it. If page is good, loop through the offset table + * finding first non-split record or first piece of a + * split record, then set up cursor. + */ + if (TYPE(dpage) == P_HEAP && NUM_ENT(dpage) != 0) { + for (indx = 0; + indx <= HEAP_HIGHINDX(dpage); indx++) { + if (HEAP_OFFSETTBL( + dbp, dpage)[indx] == 0) + continue; + hdr = (HEAPHDR *)P_ENTRY( + dbp, dpage, indx); + if (!F_ISSET(hdr, HEAP_RECSPLIT) || + F_ISSET(hdr, HEAP_RECFIRST)) { + found = TRUE; + cp->pgno = pgno; + cp->indx = indx; + break; + } + } + if (!found) + pgno++; + } else + pgno++; + } + break; + case DB_LAST: + /* + * Grab the metedata page to find the last page, and start + * there looking backwards for the record with the highest + * index and return that one. + */ +last: pgno = PGNO_BASE_MD; + ACQUIRE(dbc, DB_LOCK_READ, + pgno, meta_lock, pgno, meta, 0, 0, ret); + if (ret != 0) + goto err; + + pgno = meta->dbmeta.last_pgno; + + /* + * It is possible to have another page added while we are + * searching backwards for last record. No need to block + * this case from occurring by keeping meta page lock. + */ + DISCARD(dbc, meta, meta_lock, 1, ret); + if (ret != 0) + goto err; + + while (!found) { + /* Don't look earlier than the first data page. */ + if (pgno < FIRST_HEAP_DPAGE) { + ret = DB_NOTFOUND; + goto err; + } + + /* Put old lock/page and get the new lock/page. */ + ACQUIRE_CUR(dbc, lock_type, pgno, 0, 0, ret); + if (ret != 0) + goto err; + dpage = (HEAPPG *)cp->page; + /* + * The page needs to be a data page with entries on + * it. If page is good, search backwards until the a + * non-split record or the first piece of a split record + * is found. + */ + if (TYPE(dpage) == P_HEAP && NUM_ENT(dpage) != 0) { + for (indx = HEAP_HIGHINDX(dpage); + indx >= 0; indx--) { + if (HEAP_OFFSETTBL( + dbp, dpage)[indx] == 0) + continue; + hdr = (HEAPHDR *)P_ENTRY( + dbp, dpage, indx); + if (!F_ISSET(hdr, HEAP_RECSPLIT) || + F_ISSET(hdr, HEAP_RECFIRST)) { + found = TRUE; + cp->pgno = pgno; + cp->indx = indx; + break; + } + } + if (!found) + pgno--; + } else + pgno--; + } + break; + case DB_NEXT_NODUP: + case DB_NEXT: + /* If cursor not initialize, behave as DB_FIRST */ + if (dbc->internal->pgno == PGNO_INVALID) + goto first; + + /* + * Acquire the current page with the lock we have already, + * unless user has asked for a write lock. + */ + ACQUIRE_CUR(dbc, lock_type, cp->pgno, 0, 0, ret); + if (ret != 0) + goto err; + dpage = (HEAPPG *)cp->page; + + /* At end of current page, must get next page */ + if (cp->indx >= HEAP_HIGHINDX(dpage)) + getpage = TRUE; + + while (!found) { + if (getpage) { + pgno = cp->pgno + 1; + + /* Put current page/lock and get next one */ + ACQUIRE_CUR(dbc, lock_type, pgno, 0, 0, ret); + if (ret != 0) { + /* Beyond last page? */ + if (ret == DB_PAGE_NOTFOUND) + ret = DB_NOTFOUND; + goto err; + } + dpage = (HEAPPG *)cp->page; + + /* + * If page is a spam page or its a data + * page without entries, try again. + */ + if (TYPE(dpage) != P_HEAP || + (TYPE(dpage) == P_HEAP && + NUM_ENT(dpage) == 0)) + continue; + + /* When searching, indx gets bumped to 0 */ + cp->indx = -1; + getpage = FALSE; + } + + /* + * Bump index and loop through the offset table finding + * first nonzero entry. If the offset is for a split + * record, make sure it's the first piece of the split + * record. HEAP_HIGHINDX always points to highest filled + * entry on page. + */ + cp->indx++; + for (indx=cp->indx; + indx <= HEAP_HIGHINDX(dpage); indx++) { + if (HEAP_OFFSETTBL(dbp, dpage)[indx] == 0) + continue; + hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, indx); + if (!F_ISSET(hdr, HEAP_RECSPLIT) || + F_ISSET(hdr, HEAP_RECFIRST)) { + found = TRUE; + cp->indx = indx; + break; + } + } + + /* Nothing of interest on page, so try next */ + if (!found) + getpage = TRUE; + } + break; + case DB_PREV_NODUP: + case DB_PREV: + /* If cursor not initialize, behave as DB_LAST */ + if (dbc->internal->pgno == PGNO_INVALID) + goto last; + + /* + * Acquire the current page with the lock we have already, + * unless user has asked for a write lock. + */ + ACQUIRE_CUR(dbc, lock_type, cp->pgno, 0, 0, ret); + if (ret != 0) + goto err; + dpage = (HEAPPG *)cp->page; + + /* + * Loop through indexes and find first used slot. Check if + * already at the first slot. + */ + for (f_indx=0; (f_indx <= HEAP_HIGHINDX(dpage)) && + (HEAP_OFFSETTBL(dbp, dpage)[f_indx] == 0); f_indx++) ; + + /* At the beginning of current page, must get new page */ + if (cp->indx == 0 || cp->indx <= f_indx) { + if (cp->pgno == FIRST_HEAP_DPAGE) { + ret = DB_NOTFOUND; + goto err; + } + getpage = TRUE; + } + + while (!found) { + if (getpage) { + pgno = cp->pgno - 1; + /* Do not go past first page */ + if (pgno < FIRST_HEAP_DPAGE) { + ret = DB_NOTFOUND; + goto err; + } + /* Put current page/lock and get prev page. */ + ACQUIRE_CUR(dbc, lock_type, pgno, 0, 0, ret); + if (ret != 0) + goto err; + + dpage = (HEAPPG *)cp->page; + + /* + * If page is a spam page or its a data + * page without entries, try again. + */ + if (TYPE(dpage) != P_HEAP || + (TYPE(dpage) == P_HEAP && + NUM_ENT(dpage) == 0)) + continue; + + /* When search, this gets bumped to high indx */ + cp->indx = HEAP_HIGHINDX(dpage) + 1; + getpage = FALSE; + } + + /* + * Decrement index and loop through the offset table + * finding previous nonzero entry. + */ + cp->indx--; + for (indx=cp->indx; + indx >= 0; indx--) { + if (HEAP_OFFSETTBL(dbp, dpage)[indx] == 0) + continue; + hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, indx); + if (!F_ISSET(hdr, HEAP_RECSPLIT) || + F_ISSET(hdr, HEAP_RECFIRST)) { + found = TRUE; + cp->indx = indx; + break; + } + } + + /* Nothing of interest on page, so try previous */ + if (!found) + getpage = TRUE; + } + break; + case DB_GET_BOTH_RANGE: + case DB_GET_BOTH: + case DB_SET_RANGE: + case DB_SET: + pgno = ((DB_HEAP_RID *)key->data)->pgno; + indx = ((DB_HEAP_RID *)key->data)->indx; + + /* First make sure we're trying to get a data page. */ + if (pgno == PGNO_BASE_MD || + pgno == HEAP_REGION_PGNO(dbp, pgno)) { + ret = DB_NOTFOUND; + goto err; + } + + /* Lock the data page and get it. */ + ACQUIRE_CUR(dbc, lock_type, pgno, 0, 0, ret); + + if (ret != 0) { + if (ret == DB_PAGE_NOTFOUND) + ret = DB_NOTFOUND; + goto err; + } + dpage = (HEAPPG *)cp->page; + + /* validate requested index, throw error if not in range */ + if ((indx > HEAP_HIGHINDX(dpage)) || + (HEAP_OFFSETTBL(dbp, dpage)[indx] == 0)) { + DISCARD(dbc, cp->page, cp->lock, 0, ret); + ret = DB_NOTFOUND; + goto err; + } + hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, indx); + if (F_ISSET(hdr, HEAP_RECSPLIT) && + !F_ISSET(hdr, HEAP_RECFIRST)) { + DISCARD(dbc, cp->page, cp->lock, 0, ret); + ret = DB_NOTFOUND; + goto err; + } + + cp->pgno = pgno; + cp->indx = indx; + + if (flags == DB_GET_BOTH || flags == DB_GET_BOTH_RANGE) { + memset(&tmp_val, 0, sizeof(DBT)); + /* does the data match ? */ + if (F_ISSET(hdr, HEAP_RECSPLIT)) { + tmp_val.flags = DB_DBT_MALLOC; + if ((ret = __heapc_gsplit( + dbc, &tmp_val, NULL, 0)) != 0) + goto err; + } else { + tmp_val.data = + (void *)((u_int8_t *)hdr + sizeof(HEAPHDR)); + tmp_val.size = hdr->size; + } + cmp = __bam_defcmp(dbp, &tmp_val, data); + if (F_ISSET(&tmp_val, DB_DBT_MALLOC)) + __os_ufree(dbp->env, tmp_val.data); + if (cmp != 0) { + ret = DB_NOTFOUND; + goto err; + } + } + + break; + case DB_NEXT_DUP: + case DB_PREV_DUP: + ret = DB_NOTFOUND; + goto err; + default: + /* DB_GET_RECNO, DB_JOIN_ITEM, DB_SET_RECNO are invalid */ + ret = __db_unknown_flag(dbp->env, "__heap_get", flags); + goto err; + + } + +err: if (ret == 0 ) { + if (key != NULL) { + rid.pgno = cp->pgno; + rid.indx = cp->indx; + ret = __db_retcopy(dbp->env, key, &rid, + DB_HEAP_RID_SZ, &dbc->rkey->data, &dbc->rkey->ulen); + F_SET(key, DB_DBT_ISSET); + } + + } else { + if (meta != NULL) + (void)__memp_fput(mpf, + dbc->thread_info, meta, dbc->priority); + if (LOCK_ISSET(meta_lock)) + (void)__LPUT(dbc, meta_lock); + if (LOCK_ISSET(cp->lock)) + (void)__LPUT(dbc, cp->lock); + } + return (ret); +} + +/* + * __heapc_reloc_partial -- + * Move data from a too-full page to a new page. The old data page must + * be write locked before calling this method. + */ +static int +__heapc_reloc_partial(dbc, key, data) + DBC *dbc; + DBT *key; + DBT *data; +{ + DB *dbp; + DBT hdr_dbt, log_dbt, t_data, t_key; + DB_HEAP_RID last_rid, next_rid; + HEAPHDR *old_hdr; + HEAPSPLITHDR new_hdr; + HEAP_CURSOR *cp; + int add_bytes, is_first, ret; + u_int32_t buflen, data_size, dlen, doff, left, old_size; + u_int32_t remaining, size; + u_int8_t *buf, *olddata; + + dbp = dbc->dbp; + cp = (HEAP_CURSOR *)dbc->internal; + old_hdr = (HEAPHDR *)(P_ENTRY(dbp, cp->page, cp->indx)); + memset(&hdr_dbt, 0, sizeof(DBT)); + memset(&log_dbt, 0, sizeof(DBT)); + buf = NULL; + COMPQUIET(key, NULL); + + /* We only work on partial puts. */ + DB_ASSERT(dbp->env, F_ISSET(data, DB_DBT_PARTIAL)); + + /* + * Start by calculating the data_size, total size of the new record, and + * dlen, the number of bytes we will actually overwrite. Keep a local + * copy of doff, we'll adjust it as we see pieces of the record so that + * it's always relative to the current piece of data. + */ + if (F_ISSET(old_hdr, HEAP_RECSPLIT)) + old_size = ((HEAPSPLITHDR *)old_hdr)->tsize; + else + old_size = old_hdr->size; + doff = data->doff; + if (old_size < doff) { + /* Post-pending */ + dlen = data->dlen; + data_size = doff + data->size; + } else { + if (old_size - doff < data->dlen) + dlen = old_size - doff; + else + dlen = data->dlen; + data_size = old_size - dlen + data->size; + } + + /* + * We don't need a buffer large enough to hold the data_size + * bytes, just one large enough to hold the bytes that will be + * written to an individual page. We'll realloc to the necessary size + * as needed. + */ + buflen = 0; + buf = NULL; + + /* + * We are updating an existing record, which will grow into a split + * record. The strategy is to overwrite the existing record (or each + * piece of the record if the record is already split.) If the new + * record is shorter than the old, delete any extra pieces. If the new + * record is longer than the old, use heapc_split() to write the extra + * data. + * + * We start each loop with old_hdr pointed at the header for the old + * record and the necessary page write locked in cp->page. + */ + add_bytes = is_first = 1; + left = data_size; + memset(&t_data, 0, sizeof(DBT)); + remaining = 0; + for (;;) { + /* Figure out if we have a next piece. */ + if (F_ISSET(old_hdr, HEAP_RECSPLIT)) { + next_rid.pgno = ((HEAPSPLITHDR *)old_hdr)->nextpg; + next_rid.indx = ((HEAPSPLITHDR *)old_hdr)->nextindx; + } else { + next_rid.pgno = PGNO_INVALID; + next_rid.indx = 0; + } + + /* + * Before we delete the old data, use it to construct the new + * data. First figure out the size of the new piece, including + * any remaining data from the last piece. + */ + if (doff >= old_hdr->size) + if (F_ISSET(old_hdr, HEAP_RECLAST) || + !F_ISSET(old_hdr, HEAP_RECSPLIT)) { + /* Post-pending. */ + data_size = doff + data->size; + } else { + /* The new piece is just the old piece. */ + data_size = old_hdr->size; + } + else if (doff + dlen > old_hdr->size) + /* + * Some of the to-be-overwritten bytes are on the next + * piece, but we'll append all the new bytes to this + * piece if we haven't already written them. + */ + data_size = doff + (add_bytes ? data->size : 0); + else + data_size = old_hdr->size - + dlen + (add_bytes ? data->size : 0); + data_size += remaining; + + if (data_size > buflen) { + if (__os_realloc(dbp->env, data_size, &buf) != 0) + return (ENOMEM); + buflen = data_size; + } + t_data.data = buf; + + /* + * Adjust past any remaining bytes, they've already been moved + * to the beginning of the buffer. + */ + buf += remaining; + remaining = 0; + + olddata = (u_int8_t *)old_hdr + HEAP_HDRSIZE(old_hdr); + if (doff >= old_hdr->size) { + memcpy(buf, olddata, old_hdr->size); + doff -= old_hdr->size; + if (F_ISSET(old_hdr, HEAP_RECLAST) || + !F_ISSET(old_hdr, HEAP_RECSPLIT)) { + /* Post-pending. */ + buf += old_hdr->size; + memset(buf, '\0', doff); + buf += doff; + memcpy(buf, data->data, data->size); + } + } else { + /* Preserve the first doff bytes. */ + memcpy(buf, olddata, doff); + buf += doff; + olddata += doff; + /* Copy in the new bytes, if needed. */ + if (add_bytes) { + memcpy(buf, data->data, data->size); + buf += data->size; + add_bytes = 0; + } + /* Skip dlen bytes. */ + if (doff + dlen < old_hdr->size) { + olddata += dlen; + memcpy(buf, + olddata, old_hdr->size - doff - dlen); + dlen = 0; + } else + /* + * The data to be removed spills over onto the + * following page(s). Adjust dlen to account + * for the bytes removed from this page. + */ + dlen = doff + dlen - old_hdr->size; + doff = 0; + } + buf = t_data.data; + + /* Delete the old data, after logging it. */ + old_size = DB_ALIGN( + old_hdr->size + HEAP_HDRSIZE(old_hdr), sizeof(u_int32_t)); + if (old_size < sizeof(HEAPSPLITHDR)) + old_size = sizeof(HEAPSPLITHDR); + if (DBC_LOGGING(dbc)) { + hdr_dbt.data = old_hdr; + hdr_dbt.size = HEAP_HDRSIZE(old_hdr); + log_dbt.data = (u_int8_t *)old_hdr + hdr_dbt.size; + log_dbt.size = DB_ALIGN( + old_hdr->size, sizeof(u_int32_t)); + if ((ret = __heap_addrem_log(dbp, dbc->txn, + &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno, + (u_int32_t)cp->indx, old_size, + &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + if ((ret = __heap_ditem( + dbc, cp->page, cp->indx, old_size)) != 0) + goto err; + + if (left == 0) + /* + * We've finished writing the new record, we're just + * cleaning up the old record now. + */ + goto next_pg; + + /* Set up the header for the new record. */ + memset(&new_hdr, 0, sizeof(HEAPSPLITHDR)); + new_hdr.std_hdr.flags = HEAP_RECSPLIT; + /* + * If next_rid.pgno == PGNO_INVALID and there's still more data, + * we'll come back and correct the header once we know where the + * next piece lives. + */ + new_hdr.nextpg = next_rid.pgno; + new_hdr.nextindx = next_rid.indx; + /* + * Figure out how much we can fit on the page, rounding down to + * a multiple of 4. If we will have to expand the offset table, + * account for that. It needs to be enough to at least fit the + * split header. + */ + size = HEAP_FREESPACE(dbp, cp->page); + if (NUM_ENT(cp->page) == 0 || + HEAP_FREEINDX(cp->page) > HEAP_HIGHINDX(cp->page)) + size -= sizeof(db_indx_t); + /* Round down to a multiple of 4. */ + size = DB_ALIGN( + size - sizeof(u_int32_t) + 1, sizeof(u_int32_t)); + DB_ASSERT(dbp->env, size >= sizeof(HEAPSPLITHDR)); + + /* + * We try to fill the page, but cannot write more than + * t_data.size bytes, that's all we have in-memory. + */ + new_hdr.std_hdr.size = (u_int16_t) + (size - sizeof(HEAPSPLITHDR)); + if (new_hdr.std_hdr.size > data_size) + new_hdr.std_hdr.size = data_size; + if (new_hdr.std_hdr.size >= left) { + new_hdr.std_hdr.size = left; + new_hdr.std_hdr.flags |= HEAP_RECLAST; + new_hdr.nextpg = PGNO_INVALID; + new_hdr.nextindx = 0; + } + if (is_first) { + new_hdr.std_hdr.flags |= HEAP_RECFIRST; + new_hdr.tsize = left; + is_first = 0; + } + + /* Now write the new data to the page. */ + t_data.size = new_hdr.std_hdr.size; + hdr_dbt.data = &new_hdr; + hdr_dbt.size = sizeof(HEAPSPLITHDR); + /* Log the write. */ + if (DBC_LOGGING(dbc)) { + if ((ret = __heap_addrem_log(dbp, + dbc->txn, &LSN(cp->page), 0, + DB_ADD_HEAP, cp->pgno, (u_int32_t)cp->indx, + size, &hdr_dbt, &t_data, &LSN(cp->page))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + if ((ret = __heap_pitem(dbc, + (PAGE *)cp->page, cp->indx, size, &hdr_dbt, &t_data)) != 0) + goto err; + + left -= new_hdr.std_hdr.size; + /* + * If any data couldn't fit on this page, it has to go onto the + * next. Copy it to the front of the buffer and it will be + * preserved in the next loop. + */ + if (new_hdr.std_hdr.size < data_size) { + remaining = data_size - new_hdr.std_hdr.size; + memmove(buf, buf + new_hdr.std_hdr.size, remaining); + } + + /* Get the next page, if any. */ +next_pg: if (next_rid.pgno != PGNO_INVALID) { + ACQUIRE_CUR(dbc, DB_LOCK_WRITE, + next_rid.pgno, 0, DB_MPOOL_DIRTY, ret); + if (ret != 0) + goto err; + cp->indx = next_rid.indx; + old_hdr = (HEAPHDR *)(P_ENTRY(dbp, cp->page, cp->indx)); + DB_ASSERT(dbp->env, HEAP_HIGHINDX(cp->page) <= cp->indx); + DB_ASSERT(dbp->env, F_ISSET(old_hdr, HEAP_RECSPLIT)); + } else { + /* + * Remember the final piece's RID, we may need to update + * the header after writing the rest of the record. + */ + last_rid.pgno = cp->pgno; + last_rid.indx = cp->indx; + /* Discard the page and drop the lock, txn-ally. */ + DISCARD(dbc, cp->page, cp->lock, 1, ret); + if (ret != 0) + goto err; + break; + } + } + + /* + * If there is more work to do, let heapc_split do it. After + * heapc_split returns we need to update nextpg and nextindx in the + * header of the last piece we wrote above. + * + * For logging purposes, we "delete" the old record and then "add" the + * record. This makes redo/undo work as-is, but we won't actually + * delete and re-add the record. + */ + if (left > 0) { + memset(&t_key, 0, sizeof(DBT)); + t_key.size = t_key.ulen = sizeof(DB_HEAP_RID); + t_key.data = &next_rid; + t_key.flags = DB_DBT_USERMEM; + t_data.size = left; + if ((ret = __heapc_split(dbc, &t_key, &t_data, 0)) != 0) + goto err; + + ACQUIRE_CUR(dbc, + DB_LOCK_WRITE, last_rid.pgno, 0, DB_MPOOL_DIRTY, ret); + if (ret != 0) + goto err; + + cp->indx = last_rid.indx; + old_hdr = (HEAPHDR *)(P_ENTRY(dbp, cp->page, cp->indx)); + + if (DBC_LOGGING(dbc)) { + old_size = DB_ALIGN(old_hdr->size + + HEAP_HDRSIZE(old_hdr), sizeof(u_int32_t)); + hdr_dbt.data = old_hdr; + hdr_dbt.size = HEAP_HDRSIZE(old_hdr); + log_dbt.data = (u_int8_t *)old_hdr + hdr_dbt.size; + log_dbt.size = DB_ALIGN( + old_hdr->size, sizeof(u_int32_t)); + if ((ret = __heap_addrem_log(dbp, dbc->txn, + &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno, + (u_int32_t)cp->indx, old_size, + &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + + ((HEAPSPLITHDR *)old_hdr)->nextpg = next_rid.pgno; + ((HEAPSPLITHDR *)old_hdr)->nextindx = next_rid.indx; + + if (DBC_LOGGING(dbc)) { + if ((ret = __heap_addrem_log(dbp, dbc->txn, + &LSN(cp->page), 0, DB_ADD_HEAP, cp->pgno, + (u_int32_t)cp->indx,old_size, + &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + + DISCARD(dbc, cp->page, cp->lock, 1, ret); + } + +err: if (buf != NULL) + __os_free(dbp->env, buf); + return (ret); +} + +/* + * __heapc_reloc -- + * Move data from a too-full page to a new page. The old data page must + * be write locked before calling this method. + */ +static int +__heapc_reloc(dbc, key, data) + DBC *dbc; + DBT *key; + DBT *data; +{ + DB *dbp; + DBT hdr_dbt, log_dbt, t_data, t_key; + DB_HEAP_RID last_rid, next_rid; + HEAPHDR *old_hdr; + HEAPSPLITHDR new_hdr; + HEAP_CURSOR *cp; + int is_first, ret; + u_int32_t left, old_size, size; + + dbp = dbc->dbp; + cp = (HEAP_CURSOR *)dbc->internal; + old_hdr = (HEAPHDR *)(P_ENTRY(dbp, cp->page, cp->indx)); + memset(&hdr_dbt, 0, sizeof(DBT)); + memset(&log_dbt, 0, sizeof(DBT)); + COMPQUIET(key, NULL); + + /* + * We are updating an existing record, which will grow into a split + * record. The strategy is to overwrite the existing record (or each + * piece of the record if the record is already split.) If the new + * record is shorter than the old, delete any extra pieces. If the new + * record is longer than the old, use heapc_split() to write the extra + * data. + * + * We start each loop with t_data.data positioned to the next byte to be + * written, old_hdr pointed at the header for the old record and the + * necessary page write locked in cp->page. + */ + is_first = 1; + left = data->size; + memset(&t_data, 0, sizeof(DBT)); + t_data.data = data->data; + for (;;) { + /* Figure out if we have a next piece. */ + if (F_ISSET(old_hdr, HEAP_RECSPLIT)) { + next_rid.pgno = ((HEAPSPLITHDR *)old_hdr)->nextpg; + next_rid.indx = ((HEAPSPLITHDR *)old_hdr)->nextindx; + } else { + next_rid.pgno = PGNO_INVALID; + next_rid.indx = 0; + } + + /* Delete the old data, after logging it. */ + old_size = DB_ALIGN( + old_hdr->size + HEAP_HDRSIZE(old_hdr), sizeof(u_int32_t)); + if (old_size < sizeof(HEAPSPLITHDR)) + old_size = sizeof(HEAPSPLITHDR); + if (DBC_LOGGING(dbc)) { + hdr_dbt.data = old_hdr; + hdr_dbt.size = HEAP_HDRSIZE(old_hdr); + log_dbt.data = (u_int8_t *)old_hdr + hdr_dbt.size; + log_dbt.size = DB_ALIGN( + old_hdr->size, sizeof(u_int32_t)); + if ((ret = __heap_addrem_log(dbp, dbc->txn, + &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno, + (u_int32_t)cp->indx, old_size, + &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + if ((ret = __heap_ditem( + dbc, cp->page, cp->indx, old_size)) != 0) + goto err; + + if (left == 0) + /* + * We've finished writing the new record, we're just + * cleaning up the old record now. + */ + goto next_pg; + + /* Set up the header for the new record. */ + memset(&new_hdr, 0, sizeof(HEAPSPLITHDR)); + new_hdr.std_hdr.flags = HEAP_RECSPLIT; + /* We'll set this later if next_rid.pgno == PGNO_INVALID. */ + new_hdr.nextpg = next_rid.pgno; + new_hdr.nextindx = next_rid.indx; + /* + * Figure out how much we can fit on the page, rounding down to + * a multiple of 4. If we will have to expand the offset table, + * account for that.It needs to be enough to at least fit the + * split header. + */ + size = HEAP_FREESPACE(dbp, cp->page); + if (NUM_ENT(cp->page) == 0 || + HEAP_FREEINDX(cp->page) > HEAP_HIGHINDX(cp->page)) + size -= sizeof(db_indx_t); + /* Round down to a multiple of 4. */ + size = DB_ALIGN( + size - sizeof(u_int32_t) + 1, sizeof(u_int32_t)); + DB_ASSERT(dbp->env, size >= sizeof(HEAPSPLITHDR)); + new_hdr.std_hdr.size = + (u_int16_t)(size - sizeof(HEAPSPLITHDR)); + if (new_hdr.std_hdr.size >= left) { + new_hdr.std_hdr.size = left; + new_hdr.std_hdr.flags |= HEAP_RECLAST; + new_hdr.nextpg = PGNO_INVALID; + new_hdr.nextindx = 0; + } + if (is_first) { + new_hdr.std_hdr.flags |= HEAP_RECFIRST; + new_hdr.tsize = left; + is_first = 0; + } + + /* Now write the new data to the page. */ + t_data.size = new_hdr.std_hdr.size; + hdr_dbt.data = &new_hdr; + hdr_dbt.size = sizeof(HEAPSPLITHDR); + /* Log the write. */ + if (DBC_LOGGING(dbc)) { + if ((ret = __heap_addrem_log(dbp, + dbc->txn, &LSN(cp->page), 0, + DB_ADD_HEAP, cp->pgno, (u_int32_t)cp->indx, + size, &hdr_dbt, &t_data, &LSN(cp->page))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + if ((ret = __heap_pitem(dbc, + (PAGE *)cp->page, cp->indx, size, &hdr_dbt, &t_data)) != 0) + goto err; + + left -= new_hdr.std_hdr.size; + t_data.data = (u_int8_t *)(t_data.data) + new_hdr.std_hdr.size; + + /* Get the next page, if any. */ +next_pg: if (next_rid.pgno != PGNO_INVALID) { + ACQUIRE_CUR(dbc, DB_LOCK_WRITE, + next_rid.pgno, 0, DB_MPOOL_DIRTY, ret); + if (ret != 0) + goto err; + cp->indx = next_rid.indx; + old_hdr = (HEAPHDR *)(P_ENTRY(dbp, cp->page, cp->indx)); + } else { + /* + * Remember the final piece's RID, we may need to update + * the header after writing the rest of the record. + */ + last_rid.pgno = cp->pgno; + last_rid.indx = cp->indx; + /* Discard the page and drop the lock, txn-ally. */ + DISCARD(dbc, cp->page, cp->lock, 1, ret); + if (ret != 0) + goto err; + break; + } + } + + /* + * If there is more work to do, let heapc_split do it. After + * heapc_split returns we need to update nextpg and nextindx in the + * header of the last piece we wrote above. + * + * For logging purposes, we "delete" the old record and then "add" the + * record. This makes redo/undo work as-is, but we won't actually + * delete and re-add the record. + */ + if (left > 0) { + memset(&t_key, 0, sizeof(DBT)); + t_key.size = t_key.ulen = sizeof(DB_HEAP_RID); + t_key.data = &next_rid; + t_key.flags = DB_DBT_USERMEM; + t_data.size = left; + if ((ret = __heapc_split(dbc, &t_key, &t_data, 0)) != 0) + goto err; + + ACQUIRE_CUR(dbc, + DB_LOCK_WRITE, last_rid.pgno, 0, DB_MPOOL_DIRTY, ret); + if (ret != 0) + goto err; + + cp->indx = last_rid.indx; + old_hdr = (HEAPHDR *)(P_ENTRY(dbp, cp->page, cp->indx)); + + if (DBC_LOGGING(dbc)) { + old_size = DB_ALIGN(old_hdr->size + + HEAP_HDRSIZE(old_hdr), sizeof(u_int32_t)); + hdr_dbt.data = old_hdr; + hdr_dbt.size = HEAP_HDRSIZE(old_hdr); + log_dbt.data = (u_int8_t *)old_hdr + hdr_dbt.size; + log_dbt.size = DB_ALIGN( + old_hdr->size, sizeof(u_int32_t)); + if ((ret = __heap_addrem_log(dbp, dbc->txn, + &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno, + (u_int32_t)cp->indx, old_size, + &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + + ((HEAPSPLITHDR *)old_hdr)->nextpg = next_rid.pgno; + ((HEAPSPLITHDR *)old_hdr)->nextindx = next_rid.indx; + + if (DBC_LOGGING(dbc)) { + if ((ret = __heap_addrem_log(dbp, dbc->txn, + &LSN(cp->page), 0, DB_ADD_HEAP, cp->pgno, + (u_int32_t)cp->indx,old_size, + &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + + DISCARD(dbc, cp->page, cp->lock, 1, ret); + } + +err: return (ret); +} + +/* + * __heapc_put -- + * + * Put using a cursor. If the given key exists, update the associated data. If + * the given key does not exsit, return an error. + */ +static int +__heapc_put(dbc, key, data, flags, pgnop) + DBC *dbc; + DBT *key; + DBT *data; + u_int32_t flags; + db_pgno_t *pgnop; +{ + DB *dbp; + DBT hdr_dbt, log_dbt, new_data; + DB_MPOOLFILE *mpf; + HEAPHDR hdr, *old_hdr; + HEAP_CURSOR *cp; + PAGE *rpage; + db_pgno_t region_pgno; + int oldspace, ret, space, t_ret; + u_int32_t data_size, dlen, new_size, old_flags, old_size, tot_size; + u_int8_t *buf, *olddata, *src, *dest; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (HEAP_CURSOR *)dbc->internal; + rpage = NULL; + buf = dest = src = NULL; + dlen = 0; + + if (flags != DB_CURRENT) { + /* We're going to write following the get, so use RMW. */ + old_flags = dbc->flags; + F_SET(dbc, DBC_RMW); + ret = __heapc_get(dbc, key, data, DB_SET, pgnop); + F_CLR(key, DB_DBT_ISSET); + dbc->flags = old_flags; + if (ret != 0) + return (ret); + else if (flags == DB_NOOVERWRITE) + return (DB_KEYEXIST); + if ((ret = __memp_dirty(mpf, &cp->page, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + return (ret); + } else { + /* We have a read lock, but need a write lock. */ + if (STD_LOCKING(dbc) && cp->lock_mode != DB_LOCK_WRITE && + (ret = __db_lget(dbc, + LCK_COUPLE, cp->pgno, DB_LOCK_WRITE, 0, &cp->lock)) != 0) + return (ret); + + if ((ret = __memp_fget(mpf, &cp->pgno, dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, &cp->page)) != 0) + return (ret); + } + + /* We've got the page locked and stored in cp->page. */ + HEAP_CALCSPACEBITS(dbp, HEAP_FREESPACE(dbp, cp->page), oldspace); + + /* + * Figure out the spacing issue. There is a very rare corner case where + * we don't have enough space on the page to expand the data. Splitting + * the record results in a larger header, if the page is jam packed + * there might not be room for the larger header. + * + * hdr->size is the size of the stored data, it doesn't include any + * padding. + */ + old_hdr = (HEAPHDR *)(P_ENTRY(dbp, cp->page, cp->indx)); + /* Need data.size + header size, 4-byte aligned. */ + old_size = + DB_ALIGN(old_hdr->size + HEAP_HDRSIZE(old_hdr), sizeof(u_int32_t)); + if (old_size < sizeof(HEAPSPLITHDR)) + old_size = sizeof(HEAPSPLITHDR); + if (F_ISSET(data, DB_DBT_PARTIAL)) { + if (F_ISSET(old_hdr, HEAP_RECSPLIT)) + tot_size = ((HEAPSPLITHDR *)old_hdr)->tsize; + else + tot_size = old_hdr->size; + if (tot_size < data->doff) { + /* Post-pending */ + dlen = data->dlen; + data_size = data->doff + data->size; + } else { + if (tot_size - data->doff < data->dlen) + dlen = tot_size - data->doff; + else + dlen = data->dlen; + data_size = tot_size - dlen + data->size; + } + } else + data_size = data->size; + new_size = DB_ALIGN(data_size + sizeof(HEAPHDR), sizeof(u_int32_t)); + if (new_size < sizeof(HEAPSPLITHDR)) + new_size = sizeof(HEAPSPLITHDR); + + /* Check whether we actually have enough space on this page. */ + if (new_size > old_size && + new_size - old_size > HEAP_FREESPACE(dbp, cp->page)) { + /* + * We've got to split the record, not enough room on the + * page. Splitting the record will remove old_size bytes and + * introduce at least sizeof(HEAPSPLITHDR). + */ + if (F_ISSET(data, DB_DBT_PARTIAL)) + return (__heapc_reloc_partial(dbc, key, data)); + else + return (__heapc_reloc(dbc, key, data)); + } + + memset(&new_data, 0, sizeof(DBT)); + new_data.size = data_size; + if (F_ISSET(data, DB_DBT_PARTIAL)) { + /* + * Before replacing the old data, we need to use it to build the + * new data. + */ + if ((ret = __os_malloc(dbp->env, data_size, &buf)) != 0) + goto err; + new_data.data = buf; + + /* + * Preserve data->doff bytes at the start, or all of the old + * record plus padding, if post-pending. + */ + olddata = (u_int8_t *)old_hdr + sizeof(HEAPHDR); + if (data->doff > old_hdr->size) { + memcpy(buf, olddata, old_hdr->size); + buf += old_hdr->size; + memset(buf, '\0', data->doff - old_hdr->size); + buf += data->doff - old_hdr->size; + } else { + memcpy(buf, olddata, data->doff); + buf += data->doff; + } + + /* Now copy in the user's data. */ + memcpy(buf, data->data, data->size); + buf += data->size; + + /* Fill in remaining data from the old record, skipping dlen. */ + if (data->doff < old_hdr->size) { + olddata += data->doff + data->dlen; + memcpy(buf, + olddata, old_hdr->size - data->doff - data->dlen); + } + } else { + new_data.data = data->data; + } + + /* + * Do the update by deleting the old record and writing the new + * record. Start by logging the entire operation. + */ + memset(&hdr, 0, sizeof(HEAPHDR)); + hdr.size = data_size; + if (DBC_LOGGING(dbc)) { + hdr_dbt.data = old_hdr; + hdr_dbt.size = HEAP_HDRSIZE(old_hdr); + log_dbt.data = (u_int8_t *)old_hdr + hdr_dbt.size; + log_dbt.size = DB_ALIGN(old_hdr->size, sizeof(u_int32_t)); + if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page), + 0, DB_REM_HEAP, cp->pgno, (u_int32_t)cp->indx, + old_size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) + goto err; + hdr_dbt.data = &hdr; + hdr_dbt.size = HEAP_HDRSIZE(&hdr); + if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page), + 0, DB_ADD_HEAP, cp->pgno, (u_int32_t)cp->indx, + new_size, &hdr_dbt, &new_data, &LSN(cp->page))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + + if ((ret = __heap_ditem(dbc, cp->page, cp->indx, old_size)) != 0) + goto err; + hdr_dbt.data = &hdr; + hdr_dbt.size = HEAP_HDRSIZE(&hdr); + if ((ret = __heap_pitem(dbc, + (PAGE *)cp->page, cp->indx, new_size, &hdr_dbt, &new_data)) != 0) + goto err; + + /* Check whether we need to update the space bitmap. */ + HEAP_CALCSPACEBITS(dbp, HEAP_FREESPACE(dbp, cp->page), space); + + if (space != oldspace) { + /* Get the region page with an exclusive latch. */ + region_pgno = HEAP_REGION_PGNO(dbp, cp->pgno); + + if ((ret = __memp_fget(mpf, ®ion_pgno, + dbc->thread_info, NULL, DB_MPOOL_DIRTY, &rpage)) != 0) + goto err; + + HEAP_SETSPACE(dbp, rpage, cp->pgno - region_pgno - 1, space); + } + +err: if (rpage != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, rpage, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (F_ISSET(data, DB_DBT_PARTIAL)) + __os_free(dbp->env, new_data.data); + + if (ret != 0 && LOCK_ISSET(cp->lock)) + (void)__TLPUT(dbc, cp->lock); + + return (ret); +} + +/* + * __heap_getpage -- + * Return a page with sufficient free space. The page will be write locked + * and marked dirty. + */ +static int +__heap_getpage(dbc, size, avail) + DBC *dbc; + u_int32_t size; + u_int8_t *avail; +{ + DB *dbp; + DBMETA *meta; + DB_LOCK meta_lock; + DB_LSN meta_lsn; + DB_MPOOLFILE *mpf; + HEAP *h; + HEAPPG *rpage; + HEAP_CURSOR *cp; + db_pgno_t data_pgno, *lkd_pgs, meta_pgno, region_pgno, start_region; + int i, lk_mode, max, p, ret, space, start, t_ret; + + LOCK_INIT(meta_lock); + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (HEAP_CURSOR *)dbc->internal; + h = dbp->heap_internal; + start_region = region_pgno = h->curregion; + max = HEAP_REGION_SIZE(dbp); + i = ret = t_ret = 0; + lkd_pgs = NULL; + + /* + * The algorithm for finding a page: + * + * Look in the space bitmap of the current region page for a data page + * with at least size bytes free. Once we find a page, try to lock it + * and if we get the lock we're done. + * + * Don't wait for a locked region page, just move on to the next region + * page, creating it if it doesn't exist. If the size of the heap + * database is not constrained, just keep creating regions and extending + * the database until we find a page with space. If the database size + * is constrained, loop back to the first region page from the final + * region page. If we wind up making it all the way back to where our + * search began, we need to start waiting for locked region pages. If + * we finish another loop through the database waiting for every region + * page, we know there's no room. + */ + + /* + * Figure out the % of the page the data will occupy and translate that + * to the relevant bit-map value we need to look for. + */ + HEAP_CALCSPACEBITS(dbp, size, space); + + /* + * Get the current region page, with a shared latch. On the first loop + * through a fixed size database, we move on to the next region if the + * page is locked. On the second loop, we wait for locked region + * pages. If the database isn't fixed size, we never wait, we'll + * eventually get to use one of the region pages we create. + */ + lk_mode = DB_MPOOL_TRY; +find: while ((ret = __memp_fget(mpf, ®ion_pgno, + dbc->thread_info, NULL, lk_mode, &rpage)) != 0 || + TYPE(rpage) != P_IHEAP) { + if (ret == DB_LOCK_NOTGRANTED) + goto next; + if (ret != 0 && ret != DB_PAGE_NOTFOUND) + return (ret); + /* + * The region page doesn't exist, or hasn't been initialized, + * create it, then try again. If the page exists, we have to + * drop it before initializing the region. + */ + if (ret == 0 && (ret = __memp_fput( + mpf, dbc->thread_info, rpage, dbc->priority)) != 0) + return (ret); + + if ((ret = __heap_create_region(dbc, region_pgno)) != 0) + return (ret); + } + + start = h->curpgindx; + /* + * If this is the last region page in a fixed size db, figure out the + * maximum pgno in the bitmap. + */ + if (region_pgno + max > h->maxpgno) + max = h->maxpgno - region_pgno; + /* + * Look in the bitmap for a page with sufficient free space. We use i + * in a slightly strange way. Because the 2-bits in the bitmap are only + * an estimate, there is a chance the data won't fit on the page we + * choose. In that case, we re-start the process and want to be able to + * resume this loop where we left off. + */ + for (; i < max; i++) { + p = start + i; + if (p >= max) + p -= max; + if ((*avail = HEAP_SPACE(dbp, rpage, p)) > space) + continue; + data_pgno = region_pgno + p + 1; + ACQUIRE_CUR(dbc, + DB_LOCK_WRITE, data_pgno, DB_LOCK_NOWAIT, 0, ret); + /* + * If we have the lock and the page or have the lock and need to + * create the page, we're good. If we don't have the lock, try + * to find different page. + */ + if (ret == 0 || ret == DB_PAGE_NOTFOUND) + break; + else if (ret == DB_LOCK_NOTGRANTED || ret == DB_LOCK_DEADLOCK) { + ret = 0; + continue; + } else + goto err; + } + + /* + * Keep a worst case range of highest used page in the region. + */ + if (i < max && data_pgno > rpage->high_pgno) { + if ((ret = __memp_dirty(mpf, + &rpage, dbc->thread_info, NULL, dbc->priority, 0)) != 0) + goto err; + /* We might have blocked, check again */ + if (data_pgno > rpage->high_pgno) + rpage->high_pgno = data_pgno; + } + + /* Done with the region page, even if we didn't find a page. */ + if ((ret = __memp_fput(mpf, + dbc->thread_info, rpage, dbc->priority)) != 0) { + /* Did not read the data page, so we can release its lock. */ + DISCARD(dbc, cp->page, cp->lock, 0, t_ret); + goto err; + } + rpage = NULL; + + if (i >= max) { + /* + * No free pages on this region page, advance to the next region + * page. If we're at the end of a fixed size heap db, loop + * around to the first region page. There is not currently a + * data page locked. + */ +next: region_pgno += HEAP_REGION_SIZE(dbp) + 1; + + if (region_pgno > h->maxpgno) + region_pgno = FIRST_HEAP_RPAGE; + + if (region_pgno == start_region) { + /* + * We're in a fixed size db and we've looped through all + * region pages. + */ + + if (lk_mode == DB_MPOOL_TRY) { + /* + * We may have missed a region page with room, + * because we didn't wait for locked pages. Try + * another loop, waiting for all pages. + */ + lk_mode = 0; + } else { + /* + * We've seen every region page, because we + * waited for all pages. No room. + */ + ret = DB_HEAP_FULL; + goto err; + } + } + + h->curregion = region_pgno; + h->curpgindx = 0; + i = 0; + goto find; + } + + /* + * At this point we have the page locked. If we have the page, we need + * to mark it dirty. If we don't have the page (or if the page is + * empty) we need to create and initialize it. + */ + if (cp->pgno == PGNO_INVALID || PGNO(cp->page) == PGNO_INVALID) { + /* + * The data page needs to be created and the metadata page needs + * to be updated. Once we get the metadata page, we must not + * jump to err, the metadata page and lock are put back here. + * + * It is possible that the page was created by an aborted txn, + * in which case the page exists but is all zeros. We still + * need to "create" it and log the creation. + * + */ + + meta_pgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, LCK_ALWAYS, meta_pgno, + DB_LOCK_WRITE, DB_LOCK_NOWAIT, &meta_lock)) != 0) { + if (ret != DB_LOCK_NOTGRANTED) + goto err; + /* + * We don't want to block while having latched + * a page off the end of file. This could + * get truncated by another thread and we + * will deadlock. + */ + DISCARD(dbc, cp->page, cp->lock, 0, ret); + if (ret != 0) + goto err; + if ((ret = __db_lget(dbc, LCK_ALWAYS, meta_pgno, + DB_LOCK_WRITE, 0, &meta_lock)) != 0) + goto err; + ACQUIRE_CUR(dbc, DB_LOCK_WRITE, data_pgno, 0, 0, ret); + if (ret != 0) + goto err; + /* Check if we lost a race. */ + if (PGNO(cp->page) != PGNO_INVALID) { + if ((ret = __LPUT(dbc, meta_lock)) != 0) + goto err; + goto check; + } + } + + if ((ret = __memp_fget(mpf, &meta_pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &meta)) != 0) + goto err; + + /* Log the page creation. Can't jump to err if it fails. */ + if (DBC_LOGGING(dbc)) + ret = __heap_pg_alloc_log(dbp, + dbc->txn, &LSN(meta), 0, &LSN(meta), meta_pgno, + data_pgno, (u_int32_t)P_HEAP, meta->last_pgno); + else + LSN_NOT_LOGGED(LSN(meta)); + + /* + * We may have created a page earlier with a larger page number + * check before updating the metadata page. + */ + if (ret == 0 && data_pgno > meta->last_pgno) + meta->last_pgno = data_pgno; + meta_lsn = LSN(meta); + + if ((t_ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + meta = NULL; + if (ret != 0) + goto err; + + /* If the page doesn't actually exist we need to create it. */ + if (cp->pgno == PGNO_INVALID) { + cp->pgno = data_pgno; + if ((ret = __memp_fget(mpf, &cp->pgno, + dbc->thread_info, dbc->txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &cp->page)) != 0) + goto err; + DB_ASSERT(dbp->env, cp->pgno == data_pgno); + } else if ((ret = __memp_dirty(mpf, &cp->page, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) { + /* Did not read the page, so we can release the lock. */ + DISCARD(dbc, cp->page, cp->lock, 0, t_ret); + goto err; + } + + /* Now that we have the page we initialize it and we're done. */ + P_INIT(cp->page, + dbp->pgsize, cp->pgno, P_INVALID, P_INVALID, 0, P_HEAP); + LSN(cp->page) = meta_lsn; + + if ((t_ret = __TLPUT(dbc, meta_lock)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + } else { + /* Check whether we actually have enough space on this page. */ +check: if (size + sizeof(db_indx_t) > HEAP_FREESPACE(dbp, cp->page)) { + /* Put back the page and lock, they were never used. */ + DISCARD(dbc, cp->page, cp->lock, 0, ret); + if (ret != 0) + goto err; + + /* Re-start the bitmap check on the next page. */ + i++; + goto find; + } + + if ((ret = __memp_dirty(mpf, &cp->page, + dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) { + /* Did not read the page, so we can release the lock. */ + DISCARD(dbc, cp->page, cp->lock, 0, t_ret); + goto err; + } + } + + h->curpgindx = data_pgno - region_pgno - 1; +err: if (rpage != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, rpage, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __heap_append -- + * Add an item to a heap database. + * + * PUBLIC: int __heap_append + * PUBLIC: __P((DBC *, DBT *, DBT *)); + */ +int +__heap_append(dbc, key, data) + DBC *dbc; + DBT *data, *key; +{ + DB *dbp; + DBT tmp_dbt; + DB_HEAP_RID rid; + DB_MPOOLFILE *mpf; + HEAPPG *rpage; + HEAPHDR hdr; + HEAP_CURSOR *cp; + db_indx_t indx; + db_pgno_t region_pgno; + int ret, space, t_ret; + u_int8_t avail; + u_int32_t data_size; + + dbp = dbc->dbp; + mpf = dbp->mpf; + ret = t_ret = 0; + rpage = NULL; + cp = (HEAP_CURSOR *)dbc->internal; + + /* Need data.size + header size, 4-byte aligned. */ + if (F_ISSET(data, DB_DBT_PARTIAL)) + data_size = DB_ALIGN(data->doff + + data->size + sizeof(HEAPHDR), sizeof(u_int32_t)); + else + data_size = DB_ALIGN( + data->size + sizeof(HEAPHDR), sizeof(u_int32_t)); + + if (data_size >= HEAP_MAXDATASIZE(dbp)) + return (__heapc_split(dbc, key, data, 1)); + else if (data_size < sizeof(HEAPSPLITHDR)) + data_size = sizeof(HEAPSPLITHDR); + + if ((ret = __heap_getpage(dbc, data_size, &avail)) != 0) + goto err; + + indx = HEAP_FREEINDX(cp->page); + memset(&hdr, 0, sizeof(HEAPHDR)); + hdr.size = data->size; + if (F_ISSET(data, DB_DBT_PARTIAL)) + hdr.size += data->doff; + tmp_dbt.data = &hdr; + tmp_dbt.size = sizeof(HEAPHDR); + + /* Log the write. */ + if (DBC_LOGGING(dbc)) { + if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page), + 0, DB_ADD_HEAP, cp->pgno, (u_int32_t)indx, + data_size, &tmp_dbt, data, &LSN(cp->page))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + + if ((ret = __heap_pitem( + dbc, (PAGE *)cp->page, indx, data_size, &tmp_dbt, data)) != 0) + goto err; + + rid.pgno = cp->pgno; + rid.indx = indx; + cp->indx = indx; + + /* Check whether we need to update the space bitmap. */ + HEAP_CALCSPACEBITS(dbp, HEAP_FREESPACE(dbp, cp->page), space); + + if (space != avail) { + /* Get the region page with an exclusive latch. */ + region_pgno = HEAP_REGION_PGNO(dbp, cp->pgno); + if ((ret = __memp_fget(mpf, ®ion_pgno, + dbc->thread_info, NULL, DB_MPOOL_DIRTY, &rpage)) != 0) + goto err; + + HEAP_SETSPACE(dbp, rpage, cp->pgno - region_pgno - 1, space); + } + +err: if (rpage != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, rpage, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + if (cp->page != NULL) { + DISCARD(dbc, cp->page, cp->lock, 1, t_ret); + if (ret == 0) + ret = t_ret; + } + + if (ret == 0 && key != NULL) + ret = __db_retcopy(dbp->env, key, + &rid, DB_HEAP_RID_SZ, &dbc->rkey->data, &dbc->rkey->ulen); + + return (ret); +} + +static int +__heapc_split(dbc, key, data, is_first) + DBC *dbc; + DBT *key, *data; + int is_first; +{ + DB *dbp; + DBT hdr_dbt, t_data; + DB_HEAP_RID rid; + DB_MPOOLFILE *mpf; + HEAPPG *rpage; + HEAPSPLITHDR hdrs; + HEAP_CURSOR *cp; + db_indx_t indx; + db_pgno_t region_pgno; + int ret, spacebits, t_ret; + u_int32_t buflen, doff, left, size; + u_int8_t availbits, *buf; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (HEAP_CURSOR *)dbc->internal; + memset(&hdrs, 0, sizeof(HEAPSPLITHDR)); + memset(&t_data, 0, sizeof(DBT)); + hdrs.std_hdr.flags = HEAP_RECSPLIT | HEAP_RECLAST; + + doff = data->doff; + rpage = NULL; + ret = t_ret = 0; + indx = 0; + buf = NULL; + buflen = 0; + + /* + * Write the record to multiple pages, in chunks starting from the end. + * To reconstruct during a get we need the RID of the next chunk, so if + * work our way from back to front during writing we always know the rid + * of the "next" chunk, it's the chunk we just wrote. + */ + t_data.data = (u_int8_t *)data->data + data->size; + left = data->size; + if (F_ISSET(data, DB_DBT_PARTIAL)) { + left += data->doff; + } + hdrs.tsize = left; + while (left > 0) { + size = DB_ALIGN(left + sizeof(HEAPSPLITHDR), sizeof(u_int32_t)); + if (size < sizeof(HEAPSPLITHDR)) + size = sizeof(HEAPSPLITHDR); + + if (size > HEAP_MAXDATASIZE(dbp)) + /* + * Data won't fit on a single page, find one at least + * 33% free. + */ + size = DB_ALIGN(dbp->pgsize / 3, sizeof(u_int32_t)); + else + hdrs.std_hdr.flags |= HEAP_RECFIRST; + + if ((ret = __heap_getpage(dbc, size, &availbits)) != 0) + return (ret); + + /* + * size is the total number of bytes being written to the page. + * The header holds the size of the data being written. + */ + if (F_ISSET(&(hdrs.std_hdr), HEAP_RECFIRST)) { + hdrs.std_hdr.size = left; + /* + * If we're called from heapc_reloc, we are only writing + * a piece of the full record and shouldn't set + * HEAP_RECFIRST. + */ + if (!is_first) + F_CLR(&(hdrs.std_hdr), HEAP_RECFIRST); + } else { + /* + * Figure out how much room is on the page. If we will + * have to expand the offset table, account for that. + */ + size = HEAP_FREESPACE(dbp, cp->page); + if (NUM_ENT(cp->page) == 0 || + HEAP_FREEINDX(cp->page) > HEAP_HIGHINDX(cp->page)) + size -= sizeof(db_indx_t); + /* Round down to a multiple of 4. */ + size = DB_ALIGN( + size - sizeof(u_int32_t) + 1, sizeof(u_int32_t)); + DB_ASSERT(dbp->env, size >= sizeof(HEAPSPLITHDR)); + hdrs.std_hdr.size = + (u_int16_t)(size - sizeof(HEAPSPLITHDR)); + } + + /* + * t_data.data points at the end of the data left to write. Now + * that we know how much we're going to write to this page, we + * can adjust the pointer to point at the start of the data to + * be written. + * + * If DB_DBT_PARTIAL is set, once data->data is exhausted, we + * have to pad with data->doff bytes (or as much as can fit on + * this page.) left - doff gives the number of bytes to use + * from data->data. Once that can't fill t_data, we have to + * start padding. + */ + t_data.data = (u_int8_t *)(t_data.data) - hdrs.std_hdr.size; + DB_ASSERT(dbp->env, (F_ISSET(data, DB_DBT_PARTIAL) || + t_data.data >= data->data)); + t_data.size = hdrs.std_hdr.size; + if (F_ISSET(data, DB_DBT_PARTIAL) && t_data.size > left - doff) { + if (buflen < t_data.size) { + if (__os_realloc( + dbp->env, t_data.size, &buf) != 0) + return (ENOMEM); + buflen = t_data.size; + } + /* + * We have to figure out how much data remains. left + * includes doff, so we need (left - doff) bytes from + * data. We also need the amount of padding that can + * fit on the page. That's the amount we can fit on the + * page minus the bytes we're taking from data. + */ + t_data.data = buf; + memset(buf, '\0', t_data.size - left + doff); + buf += t_data.size - left + doff; + memcpy(buf, data->data, left - doff); + doff -= t_data.size - left + doff; + buf = t_data.data; + } + hdr_dbt.data = &hdrs; + hdr_dbt.size = sizeof(HEAPSPLITHDR); + indx = HEAP_FREEINDX(cp->page); + + /* Log the write. */ + if (DBC_LOGGING(dbc)) { + if ((ret = __heap_addrem_log(dbp, + dbc->txn, &LSN(cp->page), 0, + DB_ADD_HEAP, cp->pgno, (u_int32_t)indx, + size, &hdr_dbt, &t_data, &LSN(cp->page))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); + + if ((ret = __heap_pitem(dbc, + (PAGE *)cp->page, indx, size, &hdr_dbt, &t_data)) != 0) + goto err; + F_CLR(&(hdrs.std_hdr), HEAP_RECLAST); + left -= hdrs.std_hdr.size; + + /* + * Save the rid where we just wrote, this is the "next" + * chunk. + */ + hdrs.nextpg = cp->pgno; + hdrs.nextindx = indx; + + /* Check whether we need to update the space bitmap. */ + HEAP_CALCSPACEBITS(dbp, + HEAP_FREESPACE(dbp, cp->page), spacebits); + + if (spacebits != availbits) { + /* Get the region page with an exclusive latch. */ + region_pgno = HEAP_REGION_PGNO(dbp, cp->pgno); + if ((ret = __memp_fget(mpf, ®ion_pgno, + dbc->thread_info, + NULL, DB_MPOOL_DIRTY, &rpage)) != 0) + goto err; + + HEAP_SETSPACE(dbp, + rpage, cp->pgno - region_pgno - 1, spacebits); + ret = __memp_fput(mpf, + dbc->thread_info, rpage, dbc->priority); + rpage = NULL; + if (ret != 0) + goto err; + } + + } + + rid.pgno = cp->pgno; + rid.indx = indx; + cp->indx = indx; + +err: if (rpage != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, rpage, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (cp->page != NULL) { + DISCARD(dbc, cp->page, cp->lock, 1, t_ret); + if (ret == 0) + ret = t_ret; + } + if (buf != NULL) + __os_free(dbp->env, buf); + + if (ret == 0 && key != NULL) + ret = __db_retcopy(dbp->env, key, + &rid, DB_HEAP_RID_SZ, &dbc->rkey->data, &dbc->rkey->ulen); + return (ret); +} + +/* + * __heapc_pitem -- + * Put an item on a heap page. Copy all bytes from the header (if any) + * first and then copy from data. + * + * PUBLIC: int __heap_pitem __P((DBC *, + * PUBLIC: PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); + */ +int +__heap_pitem(dbc, pagep, indx, nbytes, hdr, data) + DBC *dbc; + PAGE *pagep; + u_int32_t indx; + u_int32_t nbytes; + DBT *hdr, *data; +{ + DB *dbp; + u_int8_t *buf; + + dbp = dbc->dbp; + + DB_ASSERT(dbp->env, TYPE(pagep) == P_HEAP); + DB_ASSERT(dbp->env, IS_DIRTY(pagep)); + DB_ASSERT(dbp->env, nbytes == DB_ALIGN(nbytes, sizeof(u_int32_t))); + DB_ASSERT(dbp->env, DB_ALIGN(((HEAPHDR *)hdr->data)->size, + sizeof (u_int32_t)) >= data->size); + DB_ASSERT(dbp->env, nbytes >= hdr->size + data->size); + + /* + * We're writing data either as a result of DB->put or as a result of + * undo-ing a delete. If we're undo-ing a delete we just need to write + * the bytes from hdr to the page. Otherwise, we need to construct a + * heap header, etc. + */ + HEAP_OFFSETTBL(dbp, pagep)[indx] = HOFFSET(pagep) - nbytes; + buf = P_ENTRY(dbp, pagep, indx); + DB_ASSERT(dbp->env, buf > (u_int8_t*)&HEAP_OFFSETTBL(dbp, pagep)[indx]); + + if (hdr != NULL) { + memcpy(buf, hdr->data, hdr->size); + buf += hdr->size; + } + if (F_ISSET(data, DB_DBT_PARTIAL)) { + memset(buf, 0, data->doff); + buf += data->doff; + } + memcpy(buf, data->data, data->size); + + /* + * Update data page header. If DEBUG/DIAGNOSTIC is set, the page might + * be filled with 0xdb, so we can't just look for a 0 in the offset + * table. We used the first available index, so start there and scan + * forward. If the table is full, the first available index is the + * highest index plus one. + */ + if (indx > HEAP_HIGHINDX(pagep)) { + if (NUM_ENT(pagep) == 0) + HEAP_FREEINDX(pagep) = 0; + else if (HEAP_FREEINDX(pagep) >= indx) { + if (indx > (u_int32_t)HEAP_HIGHINDX(pagep) + 1) + HEAP_FREEINDX(pagep) = HEAP_HIGHINDX(pagep) + 1; + else + HEAP_FREEINDX(pagep) = indx + 1; + } + while (++HEAP_HIGHINDX(pagep) < indx) + HEAP_OFFSETTBL(dbp,pagep)[HEAP_HIGHINDX(pagep)] = 0; + } else { + for (; indx <= HEAP_HIGHINDX(pagep); indx++) + if (HEAP_OFFSETTBL(dbp, pagep)[indx] == 0) + break; + HEAP_FREEINDX(pagep) = indx; + } + HOFFSET(pagep) -= nbytes; + NUM_ENT(pagep)++; + + return (0); +} + +/* + * __heapc_dup -- + * Duplicate a heap cursor, such that the new one holds appropriate + * locks for the position of the original. + * + * PUBLIC: int __heapc_dup __P((DBC *, DBC *)); + */ +int +__heapc_dup(orig_dbc, new_dbc) + DBC *orig_dbc, *new_dbc; +{ + HEAP_CURSOR *orig, *new; + + orig = (HEAP_CURSOR *)orig_dbc->internal; + new = (HEAP_CURSOR *)new_dbc->internal; + new->flags = orig->flags; + return (0); +} + +/* + * __heapc_gsplit -- + * Get a heap split record. The page pointed to by the cursor must + * be the first segment of this record. + * + * PUBLIC: int __heapc_gsplit __P((DBC *, + * PUBLIC: DBT *, void **, u_int32_t *)); + */ +int +__heapc_gsplit(dbc, dbt, bpp, bpsz) + DBC *dbc; + DBT *dbt; + void **bpp; + u_int32_t *bpsz; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + DB_HEAP_RID rid; + DB_LOCK data_lock; + HEAP_CURSOR *cp; + ENV *env; + HEAPPG *dpage; + HEAPSPLITHDR *hdr; + db_indx_t bytes; + u_int32_t curoff, needed, start, tlen; + u_int8_t *p, *src; + int putpage, ret, t_ret; + + LOCK_INIT(data_lock); + dbp = dbc->dbp; + env = dbp->env; + mpf = dbp->mpf; + cp = (HEAP_CURSOR *)dbc->internal; + putpage = FALSE; + ret = 0; + + /* + * We should have first page, locked already in cursor. Get the + * record id out of the cursor and set up local variables. + */ + DB_ASSERT(env, cp->page != NULL); + rid.pgno = cp->pgno; + rid.indx = cp->indx; + dpage = cp->page; + hdr = (HEAPSPLITHDR *)P_ENTRY(dbp, dpage, rid.indx); + DB_ASSERT(env, hdr->tsize != 0); + tlen = hdr->tsize; + + /* + * If we doing a partial retrieval, figure out how much we are + * actually going to get. + */ + if (F_ISSET(dbt, DB_DBT_PARTIAL)) { + start = dbt->doff; + if (start > tlen) + needed = 0; + else if (dbt->dlen > tlen - start) + needed = tlen - start; + else + needed = dbt->dlen; + } else { + start = 0; + needed = tlen; + } + + /* + * If the caller has not requested any data, return success. This + * "early-out" also avoids setting up the streaming optimization when + * no page would be retrieved. If it were removed, the streaming code + * should only initialize when needed is not 0. + */ + if (needed == 0) { + dbt->size = 0; + return (0); + } + + /* + * Check if the buffer is big enough; if it is not and we are + * allowed to malloc space, then we'll malloc it. If we are + * not (DB_DBT_USERMEM), then we'll set the dbt and return + * appropriately. + */ + if (F_ISSET(dbt, DB_DBT_USERCOPY)) + goto skip_alloc; + + /* Allocate any necessary memory. */ + if (F_ISSET(dbt, DB_DBT_USERMEM)) { + if (needed > dbt->ulen) { + dbt->size = needed; + return (DB_BUFFER_SMALL); + } + } else if (F_ISSET(dbt, DB_DBT_MALLOC)) { + if ((ret = __os_umalloc(env, needed, &dbt->data)) != 0) + return (ret); + } else if (F_ISSET(dbt, DB_DBT_REALLOC)) { + if ((ret = __os_urealloc(env, needed, &dbt->data)) != 0) + return (ret); + } else if (bpsz != NULL && (*bpsz == 0 || *bpsz < needed)) { + if ((ret = __os_realloc(env, needed, bpp)) != 0) + return (ret); + *bpsz = needed; + dbt->data = *bpp; + } else if (bpp != NULL) + dbt->data = *bpp; + else { + DB_ASSERT(env, + F_ISSET(dbt, + DB_DBT_USERMEM | DB_DBT_MALLOC | DB_DBT_REALLOC) || + bpsz != NULL || bpp != NULL); + return (DB_BUFFER_SMALL); + } + +skip_alloc: + /* + * Go through each of the pieces, copying the data on each one + * into the buffer. Never copy more than the total data length. + * We are starting off with the page that is currently pointed to by + * the cursor, + */ + curoff = 0; + dbt->size = needed; + for (p = dbt->data; needed > 0;) { + /* Check if we need any bytes from this page */ + if (curoff + hdr->std_hdr.size >= start) { + bytes = hdr->std_hdr.size; + src = (u_int8_t *)hdr + + P_TO_UINT16(sizeof(HEAPSPLITHDR)); + if (start > curoff) { + src += start - curoff; + bytes -= start - curoff; + } + if (bytes > needed) + bytes = needed; + if (F_ISSET(dbt, DB_DBT_USERCOPY)) { + /* + * The offset into the DBT is the total size + * less the amount of data still needed. Care + * needs to be taken if doing a partial copy + * beginning at an offset other than 0. + */ + if ((ret = env->dbt_usercopy( + dbt, dbt->size - needed, + src, bytes, DB_USERCOPY_SETDATA)) != 0) { + if (putpage) + (void)__memp_fput( + mpf, dbc->thread_info, + dpage, dbp->priority); + + return (ret); + } + } else + memcpy(p, src, bytes); + p += bytes; + needed -= bytes; + } + curoff += hdr->std_hdr.size; + + /* Find next record piece as long as it exists */ + if (!F_ISSET((HEAPHDR *)hdr, HEAP_RECLAST)) { + rid.pgno = hdr->nextpg; + rid.indx = hdr->nextindx; + + /* + * First pass through here, we are using the + * page pointed to by the cursor, and this page + * will get put when the cursor is is closed. + * Only pages specifically gotten in this loop + * need to be put back. + */ + if (putpage) { + if ((ret = __memp_fput(mpf, dbc->thread_info, + dpage, dbp->priority) ) != 0) + goto err; + dpage = NULL; + if ((ret = __TLPUT(dbc, data_lock)) != 0) + goto err; + } + + if ((ret = __db_lget(dbc, 0, rid.pgno, + DB_LOCK_READ, 0, &data_lock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &rid.pgno, + dbc->thread_info, dbc->txn, 0, &dpage)) != 0) + goto err; + hdr = (HEAPSPLITHDR *)P_ENTRY(dbp, dpage, rid.indx); + putpage = TRUE; + + /* + * If we have the last piece of this record and we're + * reading the entire record, then what we need should + * equal what is remaining. + */ + if (F_ISSET((HEAPHDR *)hdr, HEAP_RECLAST) && + !F_ISSET(dbt, DB_DBT_PARTIAL) && + (hdr->std_hdr.size != needed)) { + ret = __env_panic(env, DB_RUNRECOVERY); + goto err; + } + } + } + +err: if (putpage && dpage != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, dpage, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, data_lock)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} +/* + * __heapc_refresh -- + * do the proper set up for cursor reuse. + * + * PUBLIC: int __heapc_refresh __P((DBC *)); + */ +int +__heapc_refresh(dbc) + DBC *dbc; +{ + HEAP_CURSOR *cp; + + cp = (HEAP_CURSOR *)dbc->internal; + + LOCK_INIT(cp->lock); + cp->lock_mode = DB_LOCK_NG; + cp->flags = 0; + + return (0); +} diff --git a/src/heap/heap.src b/src/heap/heap.src new file mode 100644 index 00000000..e0b072a7 --- /dev/null +++ b/src/heap/heap.src @@ -0,0 +1,101 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +DBPRIVATE +PREFIX __heap + +INCLUDE #include "db_int.h" +INCLUDE #include "dbinc/crypto.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/heap.h" +INCLUDE #include "dbinc/txn.h" +INCLUDE + +/* + * addrem -- Add or remove an entry from a heap db. + * + * opcode: identifies if this is an add or delete. + * fileid: file identifier of the file being modified. + * pgno: page number. + * indx: location at which to insert or delete. + * nbytes: number of bytes added/removed to/from the page. + * hdr: header for the data item. + * dbt: data that is to be added or deleted. + * pagelsn: former lsn of the page. + */ +BEGIN addrem 49 151 +ARG opcode u_int32_t lu +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG indx u_int32_t lu +ARG nbytes u_int32_t lu +DBT hdr DBT s +DBT dbt DBT s +POINTER pagelsn DB_LSN * lu +END + +/* + * pg_alloc: used to record allocating a new page in a heap database. + * + * meta_lsn: the lsn of the metadata page + * meta_pgno the metadata page + * page_lsn: the allocated page's original lsn. + * pgno: the page allocated. + * ptype: the type of the page allocated. + * last_pgno: the last page in the file after this op (4.3+). + */ +BEGIN pg_alloc 49 152 +DB fileid int32_t ld +POINTER meta_lsn DB_LSN * lu +ARG meta_pgno db_pgno_t lu +ARG pgno db_pgno_t lu +ARG ptype u_int32_t lu +ARG last_pgno db_pgno_t lu +END + +/* + * trunc_meta -- Used to record truncation of a heap database's meta page + * + * fileid: file identifier of the file being modified. + * pgno: page number. + * last_pgno: value of last_pgno on meta page + * key_count: value of key_count on meta page + * record_count: value of record_count on meta page + * curregion: value of curregion on meta page + * nregions: value of nregions on meta page + * pagelsn: former lsn of the page. + */ +BEGIN trunc_meta 49 153 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG last_pgno u_int32_t lu +ARG key_count u_int32_t lu +ARG record_count u_int32_t lu +ARG curregion u_int32_t lu +ARG nregions u_int32_t lu +POINTER pagelsn DB_LSN * lu +END + +/* + * trunc_page -- Used to record truncation of a heap database's region page + * + * fileid: file identifier of the file being modified. + * pgno: page number. + * old_data: the contents of the page before truncation + * pagelsn: former lsn of the page. + */ +BEGIN trunc_page 49 154 +DB fileid int32_t ld +ARG pgno db_pgno_t lu +DBT old_data DBT s +ARG is_region u_int32_t lu +POINTER pagelsn DB_LSN * lu +END + + diff --git a/src/heap/heap_auto.c b/src/heap/heap_auto.c new file mode 100644 index 00000000..1cb705f4 --- /dev/null +++ b/src/heap/heap_auto.c @@ -0,0 +1,73 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/heap.h" +#include "dbinc/txn.h" + +DB_LOG_RECSPEC __heap_addrem_desc[] = { + {LOGREC_ARG, SSZ(__heap_addrem_args, opcode), "opcode", "%lu"}, + {LOGREC_DB, SSZ(__heap_addrem_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__heap_addrem_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__heap_addrem_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__heap_addrem_args, nbytes), "nbytes", "%lu"}, + {LOGREC_DBT, SSZ(__heap_addrem_args, hdr), "hdr", ""}, + {LOGREC_DBT, SSZ(__heap_addrem_args, dbt), "dbt", ""}, + {LOGREC_POINTER, SSZ(__heap_addrem_args, pagelsn), "pagelsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __heap_pg_alloc_desc[] = { + {LOGREC_DB, SSZ(__heap_pg_alloc_args, fileid), "fileid", ""}, + {LOGREC_POINTER, SSZ(__heap_pg_alloc_args, meta_lsn), "meta_lsn", ""}, + {LOGREC_ARG, SSZ(__heap_pg_alloc_args, meta_pgno), "meta_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__heap_pg_alloc_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__heap_pg_alloc_args, ptype), "ptype", "%lu"}, + {LOGREC_ARG, SSZ(__heap_pg_alloc_args, last_pgno), "last_pgno", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __heap_trunc_meta_desc[] = { + {LOGREC_DB, SSZ(__heap_trunc_meta_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__heap_trunc_meta_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__heap_trunc_meta_args, last_pgno), "last_pgno", "%lu"}, + {LOGREC_ARG, SSZ(__heap_trunc_meta_args, key_count), "key_count", "%lu"}, + {LOGREC_ARG, SSZ(__heap_trunc_meta_args, record_count), "record_count", "%lu"}, + {LOGREC_ARG, SSZ(__heap_trunc_meta_args, curregion), "curregion", "%lu"}, + {LOGREC_ARG, SSZ(__heap_trunc_meta_args, nregions), "nregions", "%lu"}, + {LOGREC_POINTER, SSZ(__heap_trunc_meta_args, pagelsn), "pagelsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __heap_trunc_page_desc[] = { + {LOGREC_DB, SSZ(__heap_trunc_page_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__heap_trunc_page_args, pgno), "pgno", "%lu"}, + {LOGREC_DBT, SSZ(__heap_trunc_page_args, old_data), "old_data", ""}, + {LOGREC_ARG, SSZ(__heap_trunc_page_args, is_region), "is_region", "%lu"}, + {LOGREC_POINTER, SSZ(__heap_trunc_page_args, pagelsn), "pagelsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +/* + * PUBLIC: int __heap_init_recover __P((ENV *, DB_DISTAB *)); + */ +int +__heap_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_addrem_recover, DB___heap_addrem)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_pg_alloc_recover, DB___heap_pg_alloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_trunc_meta_recover, DB___heap_trunc_meta)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_trunc_page_recover, DB___heap_trunc_page)) != 0) + return (ret); + return (0); +} diff --git a/src/heap/heap_autop.c b/src/heap/heap_autop.c new file mode 100644 index 00000000..b767203b --- /dev/null +++ b/src/heap/heap_autop.c @@ -0,0 +1,105 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" + +#ifdef HAVE_HEAP +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/heap.h" +#include "dbinc/txn.h" + +/* + * PUBLIC: int __heap_addrem_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__heap_addrem_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__heap_addrem", __heap_addrem_desc, info)); +} + +/* + * PUBLIC: int __heap_pg_alloc_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__heap_pg_alloc_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__heap_pg_alloc", __heap_pg_alloc_desc, info)); +} + +/* + * PUBLIC: int __heap_trunc_meta_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__heap_trunc_meta_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__heap_trunc_meta", __heap_trunc_meta_desc, info)); +} + +/* + * PUBLIC: int __heap_trunc_page_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__heap_trunc_page_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__heap_trunc_page", __heap_trunc_page_desc, info)); +} + +/* + * PUBLIC: int __heap_init_print __P((ENV *, DB_DISTAB *)); + */ +int +__heap_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_addrem_print, DB___heap_addrem)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_pg_alloc_print, DB___heap_pg_alloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_trunc_meta_print, DB___heap_trunc_meta)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_trunc_page_print, DB___heap_trunc_page)) != 0) + return (ret); + return (0); +} +#endif /* HAVE_HEAP */ diff --git a/src/heap/heap_conv.c b/src/heap/heap_conv.c new file mode 100644 index 00000000..0fb8844e --- /dev/null +++ b/src/heap/heap_conv.c @@ -0,0 +1,92 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/heap.h" + +/* + * __heap_pgin -- + * Convert host-specific page layout from the host-independent format + * stored on disk. + * + * PUBLIC: int __heap_pgin __P((DB *, db_pgno_t, void *, DBT *)); + */ +int +__heap_pgin(dbp, pg, pp, cookie) + DB *dbp; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + DB_PGINFO *pginfo; + PAGE *h; + + pginfo = (DB_PGINFO *)cookie->data; + if (!F_ISSET(pginfo, DB_AM_SWAP)) + return (0); + + h = pp; + return (TYPE(h) == P_HEAPMETA ? __heap_mswap(dbp->env, pp) : + __db_byteswap(dbp, pg, pp, pginfo->db_pagesize, 1)); +} + +/* + * __heap_pgout -- + * Convert host-specific page layout from the host-independent format + * stored on disk. + * + * PUBLIC: int __heap_pgout __P((DB *, db_pgno_t, void *, DBT *)); + */ +int +__heap_pgout(dbp, pg, pp, cookie) + DB *dbp; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + DB_PGINFO *pginfo; + PAGE *h; + + pginfo = (DB_PGINFO *)cookie->data; + if (!F_ISSET(pginfo, DB_AM_SWAP)) + return (0); + + h = pp; + return (TYPE(h) == P_HEAPMETA ? __heap_mswap(dbp->env, pp) : + __db_byteswap(dbp, pg, pp, pginfo->db_pagesize, 0)); +} + +/* + * __heap_mswap -- + * Swap the bytes on the heap metadata page. + * + * PUBLIC: int __heap_mswap __P((ENV *, PAGE *)); + */ +int +__heap_mswap(env, pg) + ENV *env; + PAGE *pg; +{ + u_int8_t *p; + + COMPQUIET(env, NULL); + + __db_metaswap(pg); + p = (u_int8_t *)pg + sizeof(DBMETA); + + SWAP32(p); /* curregion */ + SWAP32(p); /* nregions */ + SWAP32(p); /* gbytes */ + SWAP32(p); /* bytes */ + p += 93 * sizeof(u_int32_t); /* unused */ + SWAP32(p); /* crypto_magic */ + + return (0); +} diff --git a/src/heap/heap_method.c b/src/heap/heap_method.c new file mode 100644 index 00000000..39e70298 --- /dev/null +++ b/src/heap/heap_method.c @@ -0,0 +1,118 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/heap.h" + +/* + * __heap_db_create -- + * Heap specific initialization of the DB structure. + * + * PUBLIC: int __heap_db_create __P((DB *)); + */ +int +__heap_db_create(dbp) + DB *dbp; +{ + HEAP *h; + int ret; + + if ((ret = __os_calloc(dbp->env, 1, sizeof(HEAP), &h)) != 0) + return (ret); + dbp->heap_internal = h; + h->region_size = HEAP_DEFAULT_REGION_MAX; + + dbp->get_heapsize = __heap_get_heapsize; + dbp->set_heapsize = __heap_set_heapsize; + + return (0); +} + +/* + * __heap_db_close -- + * Heap specific discard of the DB structure. + * + * PUBLIC: int __heap_db_close __P((DB *)); + */ +int +__heap_db_close(dbp) + DB *dbp; +{ + HEAP *h; + int ret; + + ret = 0; + if ((h = dbp->heap_internal) == NULL) + return (0); + + __os_free(dbp->env, h); + dbp->heap_internal = NULL; + + return (0); +} + +/* + * __heap_get_heapsize -- + * Get the initial size of the heap. + * + * PUBLIC: int __heap_get_heapsize __P((DB *, u_int32_t *, u_int32_t *)); + */ +int +__heap_get_heapsize(dbp, gbytes, bytes) + DB *dbp; + u_int32_t *gbytes, *bytes; +{ + HEAP *h; + + DB_ILLEGAL_METHOD(dbp, DB_OK_HEAP); + + h = dbp->heap_internal; + *gbytes = h->gbytes; + *bytes = h->bytes; + + return (0); +} + +/* + * __heap_set_heapsize -- + * Set the initial size of the heap. + * + * PUBLIC: int __heap_set_heapsize __P((DB *, u_int32_t, u_int32_t, u_int32_t)); + */ +int +__heap_set_heapsize(dbp, gbytes, bytes, flags) + DB *dbp; + u_int32_t gbytes, bytes, flags; +{ + HEAP *h; + + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_heapsize"); + DB_ILLEGAL_METHOD(dbp, DB_OK_HEAP); + + COMPQUIET(flags, 0); + h = dbp->heap_internal; + h->gbytes = gbytes; + h->bytes = bytes; + + return (0); +} + +/* + * __heap_exist -- + * Test to see if heap exists or not, used in Perl interface + * + * PUBLIC: int __heap_exist __P((void)); + */ +int +__heap_exist() +{ + return (1); +} diff --git a/src/heap/heap_open.c b/src/heap/heap_open.c new file mode 100644 index 00000000..f8fb2921 --- /dev/null +++ b/src/heap/heap_open.c @@ -0,0 +1,427 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/fop.h" +#include "dbinc/heap.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" + +static void __heap_init_meta __P((DB *, HEAPMETA *, db_pgno_t, DB_LSN*)); + +/* + * __heap_open -- + * Open a heap. + * + * PUBLIC: int __heap_open __P((DB *, DB_THREAD_INFO *, + * PUBLIC: DB_TXN *, const char *, db_pgno_t, u_int32_t)); + */ +int +__heap_open(dbp, ip, txn, name, base_pgno, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name; + db_pgno_t base_pgno; + u_int32_t flags; +{ + HEAP *h; + db_pgno_t npgs; + int ret; + + h = (HEAP *)dbp->heap_internal; + COMPQUIET(name, NULL); + + ret = __heap_read_meta(dbp, ip, txn, base_pgno, flags); + + if (h->gbytes != 0 || h->bytes != 0) { + /* + * We don't have to worry about rounding with gbytes, as pgsize + * is always a multiple of 2, but we round up if bytes isn't + * a multiple of the page size. + */ + npgs = (db_pgno_t)(h->gbytes * (GIGABYTE / dbp->pgsize)); + npgs += (db_pgno_t)((h->bytes +dbp->pgsize - 1)/ dbp->pgsize); + h->maxpgno = npgs - 1; + if (h->maxpgno < FIRST_HEAP_DPAGE) { + __db_errx(dbp->env, + "requested database size is too small"); + return (EINVAL); + } + } else + /* If not fixed size heap, set maxregion to maximum value */ + h->maxpgno = UINT32_MAX; + + return (ret); +} + +/* + * __heap_metachk -- + * + * PUBLIC: int __heap_metachk __P((DB *, const char *, HEAPMETA *)); + */ +int +__heap_metachk(dbp, name, hm) + DB *dbp; + const char *name; + HEAPMETA *hm; +{ + ENV *env; + HEAP *h; + int ret; + u_int32_t vers; + + env = dbp->env; + h = (HEAP *)dbp->heap_internal; + + /* + * At this point, all we know is that the magic number is for a Heap. + * Check the version, the database may be out of date. + */ + vers = hm->dbmeta.version; + if (F_ISSET(dbp, DB_AM_SWAP)) + M_32_SWAP(vers); + switch (vers) { + case 1: + break; + default: + __db_errx(env, + "%s: unsupported heap version: %lu", name, (u_long)vers); + return (EINVAL); + } + + /* Swap the page if needed. */ + if (F_ISSET(dbp, DB_AM_SWAP) && + (ret = __heap_mswap(env, (PAGE *)hm)) != 0) + return (ret); + + /* Check application info against metadata info. */ + if (h->gbytes != 0 || h->bytes != 0) + if (h->gbytes != hm->gbytes || h->bytes != hm->bytes) { + __db_errx(env, DB_STR_A("1155", + "%s: specified heap size does not match size set in database", + "%s"), name); + return (EINVAL); + } + + /* Set the page size. */ + dbp->pgsize = hm->dbmeta.pagesize; + + /* Copy the file's ID. */ + memcpy(dbp->fileid, hm->dbmeta.uid, DB_FILE_ID_LEN); + + return (0); +} + +/* + * __heap_read_meta -- + * Read the meta page and set up the internal structure. + * + * PUBLIC: int __heap_read_meta __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, db_pgno_t, u_int32_t)); + */ +int +__heap_read_meta(dbp, ip, txn, meta_pgno, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + db_pgno_t meta_pgno; + u_int32_t flags; +{ + DBC *dbc; + DB_LOCK metalock; + DB_MPOOLFILE *mpf; + HEAPMETA *meta; + HEAP *h; + int ret, t_ret; + + COMPQUIET(flags, 0); + + meta = NULL; + h = dbp->heap_internal; + LOCK_INIT(metalock); + mpf = dbp->mpf; + ret = 0; + + /* Get a cursor. */ + if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) + return (ret); + + /* Get the metadata page. */ + if ((ret = + __db_lget(dbc, 0, meta_pgno, DB_LOCK_READ, 0, &metalock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &meta_pgno, ip, dbc->txn, 0, &meta)) != 0) + goto err; + + /* + * If the magic number is set, the heap has been created. Correct + * any fields that may not be right. Note, all of the local flags + * were set by DB->open. + * + * Otherwise, we'd better be in recovery or abort, in which case the + * metadata page will be created/initialized elsewhere. + */ + if (meta->dbmeta.magic == DB_HEAPMAGIC) { + h->curregion = meta->curregion; + h->curpgindx = 0; + h->gbytes = meta->gbytes; + h->bytes = meta->bytes; + h->region_size = meta->region_size; + + if (PGNO(meta) == PGNO_BASE_MD && !F_ISSET(dbp, DB_AM_RECOVER)) + __memp_set_last_pgno(mpf, meta->dbmeta.last_pgno); + } else { + DB_ASSERT(dbp->env, + IS_RECOVERING(dbp->env) || F_ISSET(dbp, DB_AM_RECOVER)); + } + +err: /* Put the metadata page back. */ + if (meta != NULL && (t_ret = __memp_fput(mpf, + ip, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __heap_new_file -- + * Create the necessary pages to begin a new database file. + * + * PUBLIC: int __heap_new_file __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *)); + */ +int +__heap_new_file(dbp, ip, txn, fhp, name) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DB_FH *fhp; + const char *name; +{ + DBT pdbt; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + DB_PGINFO pginfo; + ENV *env; + HEAPMETA *meta; + HEAPPG *region; + db_pgno_t pgno; + int ret, t_ret; + void *buf; + + env = dbp->env; + mpf = dbp->mpf; + buf = NULL; + + if (F_ISSET(dbp, DB_AM_INMEM)) { + /* Build the meta-data page. */ + pgno = PGNO_BASE_MD; + if ((ret = __memp_fget(mpf, &pgno, + ip, txn, DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &meta)) != 0) + return (ret); + LSN_NOT_LOGGED(lsn); + __heap_init_meta(dbp, meta, PGNO_BASE_MD, &lsn); + ret = __db_log_page(dbp, txn, &lsn, pgno, (PAGE *)meta); + if ((t_ret = + __memp_fput(mpf, ip, meta, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + meta = NULL; + if (ret != 0) + goto err; + + /* Build the first region page. */ + pgno = 1; + if ((ret = __memp_fget(mpf, &pgno, + ip, txn, DB_MPOOL_CREATE | DB_MPOOL_DIRTY, ®ion)) != 0) + goto err; + memset(region, 0, dbp->pgsize); + + P_INIT(region, + dbp->pgsize, 1, PGNO_INVALID, PGNO_INVALID, 0, P_IHEAP); + LSN_NOT_LOGGED(region->lsn); + ret = __db_log_page( + dbp, txn, ®ion->lsn, pgno, (PAGE *)region); + if ((t_ret = __memp_fput( + mpf, ip, region, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + region = NULL; + if (ret != 0) + goto err; + } else { + memset(&pdbt, 0, sizeof(pdbt)); + + /* Build the meta-data page. */ + pginfo.db_pagesize = dbp->pgsize; + pginfo.flags = + F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); + pginfo.type = dbp->type; + pdbt.data = &pginfo; + pdbt.size = sizeof(pginfo); + if ((ret = __os_calloc(env, 1, dbp->pgsize, &buf)) != 0) + return (ret); + meta = (HEAPMETA *)buf; + LSN_NOT_LOGGED(lsn); + __heap_init_meta(dbp, meta, PGNO_BASE_MD, &lsn); + if ((ret = + __db_pgout(dbp->dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0) + goto err; + if ((ret = __fop_write(env, txn, name, dbp->dirname, + DB_APP_DATA, fhp, + dbp->pgsize, 0, 0, buf, dbp->pgsize, 1, F_ISSET( + dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0)) != 0) + goto err; + meta = NULL; + + /* Build the first region page */ + memset(buf, 0, dbp->pgsize); + region = (HEAPPG *)buf; + P_INIT(region, + dbp->pgsize, 1, PGNO_INVALID, PGNO_INVALID, 0, P_IHEAP); + LSN_NOT_LOGGED(region->lsn); + if ((ret = + __db_pgout(dbp->dbenv, region->pgno, region, &pdbt)) != 0) + goto err; + if ((ret = + __fop_write(env, txn, name, dbp->dirname, DB_APP_DATA, + fhp, dbp->pgsize, 1, 0, buf, dbp->pgsize, 1, F_ISSET( + dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0)) != 0) + goto err; + region = NULL; + } + +err: if (buf != NULL) + __os_free(env, buf); + return (ret); +} + +/* + * __heap_create_region -- + * Create a region page + * + * PUBLIC: int __heap_create_region __P((DBC *, db_pgno_t)); + */ +int +__heap_create_region(dbc, pgno) + DBC *dbc; + db_pgno_t pgno; +{ + DB *dbp; + DB_LOCK meta_lock; + DB_MPOOLFILE *mpf; + HEAPMETA *meta; + HEAPPG *region; + db_pgno_t meta_pgno; + int ret, t_ret; + + LOCK_INIT(meta_lock); + dbp = dbc->dbp; + mpf = dbp->mpf; + region = NULL; + + /* We may need to update the last page number on the metadata page. */ + meta_pgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, + LCK_ALWAYS, meta_pgno, DB_LOCK_WRITE, 0, &meta_lock)) != 0) + return (ret); + if ((ret = __memp_fget(mpf, &meta_pgno, + dbc->thread_info, NULL, DB_MPOOL_DIRTY, &meta)) != 0) { + (void)__LPUT(dbc, meta_lock); + return (ret); + } + + ret = __memp_fget(mpf, &pgno, dbc->thread_info, + NULL, DB_MPOOL_CREATE | DB_MPOOL_DIRTY, ®ion); + + if (ret != 0 || region->pgno != 0) + /* + * There's been an error or someone got here before us and + * created the page. Either way, our work here is done. + */ + goto done; + + /* Log the page creation. */ + if (DBC_LOGGING(dbc)) { + if ((ret = __heap_pg_alloc_log(dbp, + dbc->txn, &LSN(meta), 0, &LSN(meta), meta_pgno, + pgno, (u_int32_t)P_IHEAP, meta->dbmeta.last_pgno)) != 0) + goto done; + } else + LSN_NOT_LOGGED(LSN(&meta->dbmeta)); + + memset((void *)region, 0, dbp->pgsize); + P_INIT(region, + dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_IHEAP); + LSN(region) = LSN(&meta->dbmeta); + + /* + * We may have created a page earlier with a larger page number + * check before updating the metadata page. + */ + if (pgno > meta->dbmeta.last_pgno) + meta->dbmeta.last_pgno = pgno; + if (HEAP_REGION_NUM(dbp, pgno) > meta->nregions) + meta->nregions = HEAP_REGION_NUM(dbp, pgno); + +done: if (region != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, region, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + ret = __memp_fput(mpf, dbc->thread_info, meta, dbc->priority); + if ((t_ret = __TLPUT(dbc, meta_lock)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +static void +__heap_init_meta(dbp, meta, pgno, lsnp) + DB *dbp; + HEAPMETA *meta; + db_pgno_t pgno; + DB_LSN *lsnp; +{ + HEAP *h; + ENV *env; + + env = dbp->env; + h = dbp->heap_internal; + + memset(meta, 0, sizeof(HEAPMETA)); + meta->dbmeta.lsn = *lsnp; + meta->dbmeta.pgno = pgno; + meta->dbmeta.magic = DB_HEAPMAGIC; + meta->dbmeta.version = DB_HEAPVERSION; + meta->dbmeta.pagesize = dbp->pgsize; + if (F_ISSET(dbp, DB_AM_CHKSUM)) + FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM); + if (F_ISSET(dbp, DB_AM_ENCRYPT)) { + meta->dbmeta.encrypt_alg = env->crypto_handle->alg; + DB_ASSERT(env, meta->dbmeta.encrypt_alg != 0); + meta->crypto_magic = meta->dbmeta.magic; + } + meta->dbmeta.type = P_HEAPMETA; + meta->dbmeta.free = PGNO_INVALID; + meta->dbmeta.last_pgno = FIRST_HEAP_RPAGE; + memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); + meta->gbytes = h->gbytes; + meta->bytes = h->bytes; + if (h->region_size > HEAP_REGION_COUNT(dbp->pgsize)) + h->region_size = HEAP_REGION_COUNT(dbp->pgsize); + meta->region_size = h->region_size; + meta->nregions = 1; + meta->curregion = 1; +} diff --git a/src/heap/heap_rec.c b/src/heap/heap_rec.c new file mode 100644 index 00000000..012a834c --- /dev/null +++ b/src/heap/heap_rec.c @@ -0,0 +1,374 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/heap.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" + +/* + * __heap_addrem_recover -- + * Recovery function for addrem. + * + * PUBLIC: int __heap_addrem_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_addrem_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __heap_addrem_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep, *regionp; + db_pgno_t region_pgno; + int cmp_n, cmp_p, modified, oldspace, ret, space; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__heap_addrem_print); + REC_INTRO(__heap_addrem_read, ip, 1); + region_pgno = HEAP_REGION_PGNO(file_dbp, argp->pgno); + + REC_FGET(mpf, ip, argp->pgno, &pagep, done); + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + + if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_HEAP) || + (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_HEAP)) { + /* We are either redo-ing an add or undoing a delete. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if ((ret = __heap_pitem(dbc, pagep, + argp->indx, argp->nbytes, &argp->hdr, &argp->dbt)) != 0) + goto out; + modified = 1; + } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_HEAP) || + (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_HEAP)) { + /* We are either undoing an add or redo-ing a delete. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if ((ret = __heap_ditem( + dbc, pagep, argp->indx, argp->nbytes)) != 0) + goto out; + modified = 1; + } + + if (modified) { + REC_FGET(mpf, ip, region_pgno, ®ionp, done); + if (DB_REDO(op)) + LSN(pagep) = *lsnp; + else + LSN(pagep) = argp->pagelsn; + + /* Update the available space bitmap, if necessary. */ + HEAP_CALCSPACEBITS( + file_dbp, HEAP_FREESPACE(file_dbp, pagep), space); + oldspace = HEAP_SPACE(file_dbp, regionp, + argp->pgno - region_pgno - 1); + if (space != oldspace) { + REC_DIRTY(mpf, ip, dbc->priority, ®ionp); + HEAP_SETSPACE(file_dbp, + regionp, argp->pgno - region_pgno - 1, space); + } + if ((ret = __memp_fput(mpf, ip, regionp, dbc->priority)) != 0) + goto out; + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, dbc->priority); + REC_CLOSE; +} + +/* + * __heap_pg_alloc_recover -- + * Recovery function for pg_alloc. + * + * PUBLIC: int __heap_pg_alloc_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_pg_alloc_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __heap_pg_alloc_args *argp; + DB *file_dbp; + DBC *dbc; + HEAPMETA *meta; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + HEAPPG *pagep; + db_pgno_t pgno; + int cmp_n, cmp_p, ret, trunc; + + ip = ((DB_TXNHEAD *)info)->thread_info; + meta = NULL; + pagep = NULL; + + REC_PRINT(__heap_pg_alloc_print); + REC_INTRO(__heap_pg_alloc_read, ip, 0); + + trunc = 0; + pgno = PGNO_BASE_MD; + if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &meta)) != 0) { + /* The metadata page must always exist on redo. */ + if (DB_REDO(op)) { + ret = __db_pgerr(file_dbp, pgno, ret); + goto out; + } else { + ret = 0; + goto done; + } + } + cmp_n = log_compare(lsnp, &LSN(meta)); + cmp_p = log_compare(&LSN(meta), &argp->meta_lsn); + CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); + CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp); + if (cmp_p == 0 && DB_REDO(op)) { + /* Need to redo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &meta); + LSN(meta) = *lsnp; + if (argp->pgno > meta->dbmeta.last_pgno) + meta->dbmeta.last_pgno = argp->pgno; + if (argp->ptype == P_IHEAP && + HEAP_REGION_NUM(file_dbp, argp->pgno) > meta->nregions) + meta->nregions = HEAP_REGION_NUM(file_dbp, argp->pgno); + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Need to undo update described. */ + REC_DIRTY(mpf, ip, file_dbp->priority, &meta); + LSN(meta) = argp->meta_lsn; + if (meta->dbmeta.last_pgno != argp->last_pgno) { + if (file_dbp->mpf->mfp->last_pgno == + meta->dbmeta.last_pgno) + trunc = 1; + meta->dbmeta.last_pgno = argp->last_pgno; + } + if (argp->ptype == P_IHEAP && + HEAP_REGION_NUM(file_dbp, argp->pgno) == meta->nregions) { + do + meta->nregions--; + while (argp->last_pgno < + (meta->nregions - 1) * HEAP_REGION_SIZE(file_dbp)); + } + } + /* + * Fix up the allocated page. + * If we're undoing and the page doesn't exist, there's nothing to do, + * if the page does exist we simply zero it out. + * Otherwise if we're redoing the operation, we have + * to get the page (creating it if it doesn't exist), and update its + * LSN. + */ + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (DB_UNDO(op)) { + ret = 0; + goto do_meta; + } + if ((ret = __memp_fget(mpf, + &argp->pgno, ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + if (DB_REDO(op) && IS_ZERO_LSN(LSN(pagep))) { + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + P_INIT(pagep, file_dbp->pgsize, + argp->pgno, PGNO_INVALID, PGNO_INVALID, 0, argp->ptype); + LSN(pagep) = *lsnp; + } else if ((cmp_n == 0 || IS_ZERO_LSN(LSN(pagep))) && DB_UNDO(op)) { + if (argp->pgno > meta->dbmeta.last_pgno) { + if (argp->pgno == file_dbp->mpf->mfp->last_pgno) + trunc = 1; + } else if (!IS_ZERO_LSN(LSN(pagep))){ + REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); + memset(pagep, 0, file_dbp->pgsize); + } + } + /* If the page is newly allocated and aborted, give it back. */ + if (pagep != NULL && trunc == 1) { + if ((ret = __memp_fput(mpf, + ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + if ((ret = __memp_fget(mpf, + &argp->pgno, ip, NULL, DB_MPOOL_FREE, &pagep)) != 0) + goto out; + } + /* + * Keep the region high_pgno up to date This not logged so we + * always need to check it. + */ + if (DB_REDO(op)) { + if ((ret = __memp_fput(mpf, + ip, pagep, file_dbp->priority)) != 0) + goto out; + pagep = NULL; + pgno = HEAP_REGION_PGNO(file_dbp, argp->pgno); + if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &pagep)) != 0) + goto out; + if (pagep->high_pgno >= argp->pgno) + goto done; + if ((ret = __memp_dirty(mpf, &pagep, ip, NULL, + DB_PRIORITY_UNCHANGED, 0)) != 0) + goto done; + pagep->high_pgno = argp->pgno; + } + +do_meta: + if (trunc == 1 && + (ret = __memp_ftruncate(mpf, NULL, ip, meta->dbmeta.last_pgno + 1, + MP_TRUNC_RECOVER | MP_TRUNC_NOCACHE)) != 0) + goto out; + +done: *lsnp = argp->prev_lsn; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); + if (meta != NULL) + (void)__memp_fput(mpf, ip, meta, file_dbp->priority); + REC_CLOSE; +} + +/* + * __heap_trunc_meta_recover -- + * Recovery function for trunc_meta. + * + * PUBLIC: int __heap_trunc_meta_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_trunc_meta_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __heap_trunc_meta_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + HEAPMETA *meta; + PAGE *pagep; + int cmp_n, cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__heap_trunc_meta_print); + REC_INTRO(__heap_trunc_meta_read, ip, 1); + + REC_FGET(mpf, ip, argp->pgno, &pagep, done); + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + meta = (HEAPMETA *)pagep; + + if (cmp_n == 0 && DB_UNDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + meta->dbmeta.last_pgno = argp->last_pgno; + meta->dbmeta.key_count = argp->key_count; + meta->dbmeta.record_count = argp->record_count; + meta->curregion = argp->curregion; + meta->nregions = argp->nregions; + LSN(meta) = argp->pagelsn; + } else if (cmp_p == 0 && DB_REDO(op)) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + /* last_pgno to 1 to account for region page */ + meta->dbmeta.last_pgno = 1; + meta->dbmeta.key_count = 0; + meta->dbmeta.record_count = 0; + meta->curregion = FIRST_HEAP_RPAGE; + meta->nregions = 1; + LSN(meta) = *lsnp; + if ((ret = __memp_ftruncate(mpf, dbc->txn, + ip, PGNO_BASE_MD + 1, MP_TRUNC_NOCACHE)) != 0) + goto out; + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, dbc->priority); + REC_CLOSE; +} + +/* + * __heap_trunc_page_recover -- + * Recovery function for trunc_page. + * + * PUBLIC: int __heap_trunc_page_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_trunc_page_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __heap_trunc_page_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int cmp_p, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__heap_trunc_page_print); + REC_INTRO(__heap_trunc_page_read, ip, 1); + + if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { + if (DB_REDO(op)) + goto done; + if ((ret = __memp_fget(mpf, + &argp->pgno, ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) { + ret = __db_pgerr(file_dbp, argp->pgno, ret); + goto out; + } + } + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + + if (DB_UNDO(op) && IS_ZERO_LSN(LSN(pagep))) { + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + memcpy(pagep, argp->old_data.data, argp->old_data.size); + LSN(pagep) = argp->pagelsn; + } else if (cmp_p == 0 && DB_REDO(op)) { + if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) + goto out; + pagep = NULL; + if ((ret = __memp_fget(mpf, &argp->pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_FREE, &pagep)) != 0) + goto out; + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, dbc->priority); + REC_CLOSE; +} diff --git a/src/heap/heap_reclaim.c b/src/heap/heap_reclaim.c new file mode 100644 index 00000000..9623974a --- /dev/null +++ b/src/heap/heap_reclaim.c @@ -0,0 +1,151 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/heap.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +/* + * __heap_truncate -- + * Truncate a database. + * + * PUBLIC: int __heap_truncate __P((DBC *, u_int32_t *)); + */ +int +__heap_truncate(dbc, countp) + DBC *dbc; + u_int32_t *countp; +{ + DB *dbp; + DB_LOCK lock, meta_lock; + DB_MPOOLFILE *mpf; + DBT log_dbt; + HEAPHDR *hdr; + HEAPMETA *meta; + HEAPPG *pg; + db_pgno_t pgno; + int i, ret, t_ret; + u_int32_t count, next_region, region_size; + + LOCK_INIT(lock); + dbp = dbc->dbp; + mpf = dbp->mpf; + count = 0; + next_region = FIRST_HEAP_RPAGE; + region_size = HEAP_REGION_SIZE(dbp); + + /* Traverse the entire database, starting with the metadata pg. */ + pgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, + LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &meta_lock)) != 0) + return (ret); + if ((ret = __memp_fget(mpf, &pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &meta)) != 0) { + __TLPUT(dbc, lock); + goto err; + } + + for (;;) { + pgno++; + if ((ret = __db_lget(dbc, + LCK_COUPLE, pgno, DB_LOCK_WRITE, 0, &lock)) != 0) + break; + if ((ret = __memp_fget(mpf, &pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &pg)) != 0) { + if (ret == DB_PAGE_NOTFOUND) + ret = 0; + break; + } + if (DBC_LOGGING(dbc)) { + memset(&log_dbt, 0, sizeof(DBT)); + log_dbt.data = pg; + log_dbt.size = dbp->pgsize; + if ((ret = __heap_trunc_page_log(dbp, dbc->txn, + &LSN(pg), 0, pgno, + &log_dbt, (pgno == next_region), &LSN(pg))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(pg)); + + if (pgno == next_region) { + next_region += region_size; + } else { + /* + * We can't use pg->entries to calculate the record + * count, because it can include split records. So we + * check the header for each entry and only count + * non-split records and the first piece of split + * records. But if the page is empty, there's no work to + * do. + */ + if (NUM_ENT(pg) != 0) + for (i = 0; i <= HEAP_HIGHINDX(pg); i++) { + if (HEAP_OFFSETTBL(dbp, pg)[i] == 0) + continue; + hdr = (HEAPHDR *)P_ENTRY(dbp, pg, i); + if (!F_ISSET(hdr, HEAP_RECSPLIT) || + F_ISSET(hdr, HEAP_RECFIRST)) + count++; + } + } + if ((ret = __memp_fput(mpf, + dbc->thread_info, pg, dbc->priority)) != 0) + break; + if ((ret = __memp_fget(mpf, &pgno, + dbc->thread_info, dbc->txn, DB_MPOOL_FREE, &pg)) != 0) + break; + } + if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + + if (countp != NULL && ret == 0) + *countp = count; + + if (DBC_LOGGING(dbc)) { + if ((ret = __heap_trunc_meta_log(dbp, dbc->txn, &LSN(meta), 0, + meta->dbmeta.pgno, meta->dbmeta.last_pgno, + meta->dbmeta.key_count, meta->dbmeta.record_count, + meta->curregion, meta->nregions, &LSN(meta))) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(meta)); + meta->dbmeta.key_count = 0; + meta->dbmeta.record_count = 0; + meta->dbmeta.last_pgno = PGNO_BASE_MD + 1; + meta->curregion = 1; + meta->nregions = 1; + + if ((ret = __memp_ftruncate(mpf, dbc->txn, + dbc->thread_info, PGNO_BASE_MD + 1, MP_TRUNC_NOCACHE)) != 0) + goto err; + + /* Create the first region. */ + pgno = PGNO_BASE_MD + 1; + if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, + dbc->txn, DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &pg)) != 0) + goto err; + + memset(pg, 0, dbp->pgsize); + P_INIT(pg, + dbp->pgsize, 1, PGNO_INVALID, PGNO_INVALID, 0, P_IHEAP); + ret = __db_log_page(dbp, dbc->txn, &pg->lsn, pgno, (PAGE *)pg); + if ((t_ret = __memp_fput( + mpf, dbc->thread_info, pg, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + +err: if ((t_ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, meta_lock)) && ret == 0) + ret = t_ret; + return (ret); +} diff --git a/src/heap/heap_stat.c b/src/heap/heap_stat.c new file mode 100644 index 00000000..bcf8980d --- /dev/null +++ b/src/heap/heap_stat.c @@ -0,0 +1,286 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/heap.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +#ifdef HAVE_STATISTICS +/* + * __heap_stat -- + * Gather/print the heap statistics + * + * PUBLIC: int __heap_stat __P((DBC *, void *, u_int32_t)); + */ +int +__heap_stat(dbc, spp, flags) + DBC *dbc; + void *spp; + u_int32_t flags; +{ + DB *dbp; + DB_HEAP_STAT *sp; + DB_LOCK lock, metalock; + DB_MPOOLFILE *mpf; + ENV *env; + HEAPMETA *meta; + db_pgno_t metapgno; + int ret, t_ret, write_meta; + + dbp = dbc->dbp; + env = dbp->env; + + meta = NULL; + LOCK_INIT(metalock); + LOCK_INIT(lock); + mpf = dbp->mpf; + sp = NULL; + ret = t_ret = write_meta = 0; + + /* Allocate and clear the structure. */ + if ((ret = __os_umalloc(env, sizeof(*sp), &sp)) != 0) + goto err; + memset(sp, 0, sizeof(*sp)); + + /* Get the metadata page for the entire database. */ + metapgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, + 0, metapgno, DB_LOCK_READ, 0, &metalock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &metapgno, + dbc->thread_info, dbc->txn, 0, &meta)) != 0) + goto err; + + sp->heap_metaflags = meta->dbmeta.flags; + sp->heap_pagecnt = meta->dbmeta.last_pgno + 1; + sp->heap_pagesize = meta->dbmeta.pagesize; + sp->heap_magic = meta->dbmeta.magic; + sp->heap_version = meta->dbmeta.version; + sp->heap_nregions = meta->nregions; + + if (LF_ISSET(DB_FAST_STAT)) { + sp->heap_nrecs = meta->dbmeta.record_count; + } else { + /* Count the entries in the database. */ + if ((ret = __heap_traverse(dbc, __heap_stat_callback, sp)) != 0) + goto err; + + write_meta = !F_ISSET(dbp, DB_AM_RDONLY) && + (!MULTIVERSION(dbp) || dbc->txn != NULL); + if (write_meta) { + ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority); + meta = NULL; + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + + if ((ret = __db_lget(dbc, + 0, metapgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &metapgno, dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY, &meta)) != 0) + goto err; + + meta->dbmeta.key_count = sp->heap_nrecs; + meta->dbmeta.record_count = sp->heap_nrecs; + } + } + + *(DB_HEAP_STAT **)spp = sp; + +err: /* Discard metadata page. */ + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + if (meta != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + if (ret != 0 && sp != NULL) { + __os_ufree(env, sp); + *(DB_BTREE_STAT **)spp = NULL; + } + + return (ret); +} + +/* + * __heap_stat_print -- + * Display heap statistics. + * + * PUBLIC: int __heap_stat_print __P((DBC *, u_int32_t)); + */ +int +__heap_stat_print(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + DB *dbp; + DB_HEAP_STAT *sp; + ENV *env; + int ret; + + dbp = dbc->dbp; + env = dbp->env; + + if ((ret = __heap_stat(dbc, &sp, LF_ISSET(DB_FAST_STAT))) != 0) + return (ret); + + if (LF_ISSET(DB_STAT_ALL)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Default Heap database information:"); + } + __db_msg(env, "%lx\tHeap magic number", (u_long)sp->heap_magic); + __db_msg(env, "%lu\tHeap version number", (u_long)sp->heap_version); + __db_dl(env, + "Underlying database page size", (u_long)sp->heap_pagesize); + __db_dl(env, + "Number of records in the database", (u_long)sp->heap_nrecs); + __db_dl(env, "Number of database pages", (u_long)sp->heap_pagecnt); + __db_dl(env, "Number of database regions", (u_long)sp->heap_nregions); + + __os_ufree(env, sp); + + return (0); +} + +/* + * __heap_print_cursor -- + * Display the current cursor. + * + * PUBLIC: void __heap_print_cursor __P((DBC *)); + */ +void +__heap_print_cursor(dbc) + DBC *dbc; +{ + COMPQUIET(dbc, NULL); + + return; +} + +/* + * __heap_stat_callback -- + * Statistics callback. + * + * PUBLIC: int __heap_stat_callback __P((DBC *, PAGE *, void *, int *)); + */ +int +__heap_stat_callback(dbc, h, cookie, putp) + DBC *dbc; + PAGE *h; + void *cookie; + int *putp; +{ + DB *dbp; + DB_HEAP_STAT *sp; + HEAPHDR *hdr; + int i; + + dbp = dbc->dbp; + sp = cookie; + *putp = 0; + + switch (TYPE(h)) { + case P_HEAP: + /* + * We can't just use NUM_ENT, otherwise we'd mis-count split + * records. + */ + for (i = 0; i < NUM_ENT(h); i++) { + hdr = (HEAPHDR *)P_ENTRY(dbp, h, i); + if (!F_ISSET(hdr, HEAP_RECSPLIT) || + F_ISSET(hdr, HEAP_RECFIRST)) + sp->heap_nrecs++; + } + break; + case P_HEAPMETA: /* Fallthrough */ + case P_IHEAP: /* Fallthrough */ + default: + break; + } + + return (0); +} + +#else /* !HAVE_STATISTICS */ + +int +__heap_stat(dbc, spp, flags) + DBC *dbc; + void *spp; + u_int32_t flags; +{ + COMPQUIET(spp, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbc->env)); +} +#endif + +/* + * __heap_traverse -- + * Walk a Heap database. + * + * PUBLIC: int __heap_traverse __P((DBC *, + * PUBLIC: int (*)(DBC *, PAGE *, void *, int *), void *)); + */ +int +__heap_traverse(dbc, callback, cookie) + DBC *dbc; + int (*callback)__P((DBC *, PAGE *, void *, int *)); + void *cookie; +{ + DB *dbp; + DB_LOCK lock; + DB_MPOOLFILE *mpf; + PAGE *h; + db_pgno_t pgno; + int already_put, ret, t_ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + LOCK_INIT(lock); + pgno = FIRST_HEAP_DPAGE; + + for (;;) { + already_put = 0; + h = NULL; + + if ((ret = __db_lget(dbc, + 0, pgno, DB_LOCK_READ, 0, &lock)) != 0) + break; + if ((ret = __memp_fget(mpf, + &pgno, dbc->thread_info, dbc->txn, 0, &h)) != 0) { + if (ret == DB_PAGE_NOTFOUND) + ret = 0; + if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + break; + } + + ret = callback(dbc, h, cookie, &already_put); + + if (!already_put && (t_ret = __memp_fput( + mpf, dbc->thread_info, h, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + + if (ret != 0) + break; + pgno++; + } + + return (ret); +} diff --git a/src/heap/heap_stub.c b/src/heap/heap_stub.c new file mode 100644 index 00000000..1cb6fe7b --- /dev/null +++ b/src/heap/heap_stub.c @@ -0,0 +1,311 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id: + */ + +#ifndef HAVE_HEAP +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/heap.h" + +/* + * If the library wasn't compiled with the Heap access method, various + * routines aren't available. Stub them here, returning an appropriate + * error. + */ + +/* + * __db_no_heap_am -- + * Error when a Berkeley DB build doesn't include the access method. + * + * PUBLIC: int __db_no_heap_am __P((ENV *)); + */ +int +__db_no_heap_am(env) + ENV *env; +{ + __db_errx(env, + "library build did not include support for the Heap access method"); + return (DB_OPNOTSUP); +} + +int +__heap_db_create(dbp) + DB *dbp; +{ + COMPQUIET(dbp, NULL); + return (0); +} + +int +__heap_db_close(dbp) + DB *dbp; +{ + COMPQUIET(dbp, NULL); + return (0); +} + +int +__heap_get_heapsize(dbp, gbytes, bytes) + DB *dbp; + u_int32_t *gbytes, *bytes; +{ + COMPQUIET(gbytes, NULL); + COMPQUIET(bytes, NULL); + return (__db_no_heap_am(dbp->env)); +} + +int +__heapc_dup(orig_dbc, new_dbc) + DBC *orig_dbc, *new_dbc; +{ + COMPQUIET(new_dbc, NULL); + return (__db_no_heap_am(orig_dbc->env)); +} + +int +__heapc_gsplit(dbc, dbt, bpp, bpsz) + DBC *dbc; + DBT *dbt; + void **bpp; + u_int32_t *bpsz; +{ + COMPQUIET(dbt, NULL); + COMPQUIET(bpp, NULL); + COMPQUIET(bpsz, NULL); + return (__db_no_heap_am(dbc->env)); +} + +int +__heap_append(dbc, key, data) + DBC *dbc; + DBT *key, *data; +{ + COMPQUIET(key, NULL); + COMPQUIET(data, NULL); + return (__db_no_heap_am(dbc->env)); +} + +int +__heapc_init(dbc) + DBC *dbc; +{ + return (__db_no_heap_am(dbc->env)); +} + +int +__heap_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + COMPQUIET(env, NULL); + COMPQUIET(dtabp, NULL); + return (0); +} + +int +__heap_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + COMPQUIET(env, NULL); + COMPQUIET(dtabp, NULL); + return (0); +} + +int +__heap_meta2pgset(dbp, vdp, heapmeta, pgset) + DB *dbp; + VRFY_DBINFO *vdp; + HEAPMETA *heapmeta; + DB *pgset; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(heapmeta, NULL); + COMPQUIET(pgset, NULL); + return (__db_no_heap_am(dbp->env)); +} + +int +__heap_metachk(dbp, name, hm) + DB *dbp; + const char *name; + HEAPMETA *hm; +{ + COMPQUIET(name, NULL); + COMPQUIET(hm, NULL); + return (__db_no_heap_am(dbp->env)); +} + +int +__heap_new_file(dbp, ip, txn, fhp, name) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DB_FH *fhp; + const char *name; +{ + COMPQUIET(ip, NULL); + COMPQUIET(txn, NULL); + COMPQUIET(fhp, NULL); + COMPQUIET(name, NULL); + return (__db_no_heap_am(dbp->env)); +} + +int +__heap_open(dbp, ip, txn, name, base_pgno, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name; + db_pgno_t base_pgno; + u_int32_t flags; +{ + COMPQUIET(ip, NULL); + COMPQUIET(txn, NULL); + COMPQUIET(name, NULL); + COMPQUIET(base_pgno, 0); + COMPQUIET(flags, 0); + return (__db_no_heap_am(dbp->env)); +} + +int +__heap_pgin(dbp, pg, pp, cookie) + DB *dbp; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + COMPQUIET(pg, 0); + COMPQUIET(pp, NULL); + COMPQUIET(cookie, NULL); + return (__db_no_heap_am(dbp->env)); +} + +int +__heap_pgout(dbp, pg, pp, cookie) + DB *dbp; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + COMPQUIET(pg, 0); + COMPQUIET(pp, NULL); + COMPQUIET(cookie, NULL); + return (__db_no_heap_am(dbp->env)); +} + +void +__heap_print_cursor(dbc) + DBC *dbc; +{ + (void)__db_no_heap_am(dbc->env); +} + +int +__heapc_refresh(dbc) + DBC *dbc; +{ + return (__db_no_heap_am(dbc->env)); +} + +int +__heap_salvage(dbp, vdp, pgno, h, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + PAGE *h; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(handle, NULL); + COMPQUIET(h, NULL); + COMPQUIET(callback, NULL); + COMPQUIET(flags, 0); + return (__db_no_heap_am(dbp->env)); +} + +int +__heap_stat(dbc, spp, flags) + DBC *dbc; + void *spp; + u_int32_t flags; +{ + COMPQUIET(spp, NULL); + COMPQUIET(flags, 0); + return (__db_no_heap_am(dbc->env)); +} + +int +__heap_stat_print(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + return (__db_no_heap_am(dbc->env)); +} + +int +__heap_truncate(dbc, countp) + DBC *dbc; + u_int32_t *countp; +{ + COMPQUIET(countp, NULL); + return (__db_no_heap_am(dbc->env)); +} + +int +__heap_vrfy(dbp, vdbp, h, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdbp; + PAGE *h; + db_pgno_t pgno; + u_int32_t flags; +{ + COMPQUIET(h, NULL); + COMPQUIET(vdbp, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(flags, 0); + return (__db_no_heap_am(dbp->env)); +} + +int +__heap_vrfy_meta(dbp, vdp, meta, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + HEAPMETA *meta; + db_pgno_t pgno; + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(meta, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(flags, 0); + return (__db_no_heap_am(dbp->env)); +} + +int +__heap_vrfy_structure(dbp, vdp, flags) + DB *dbp; + VRFY_DBINFO *vdp; + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(flags, 0); + return (__db_no_heap_am(dbp->env)); +} + +int +__heap_exist() +{ + return (0); +} +#endif /* !HAVE_HEAP */ diff --git a/src/heap/heap_verify.c b/src/heap/heap_verify.c new file mode 100644 index 00000000..d33a4e76 --- /dev/null +++ b/src/heap/heap_verify.c @@ -0,0 +1,451 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_verify.h" +#include "dbinc/heap.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" + +static int __heap_safe_gsplit __P((DB *, VRFY_DBINFO *, PAGE *, db_indx_t, + DBT *)); +static int __heap_verify_offset_cmp __P((const void *, const void *)); + +/* + * __heap_vrfy_meta -- + * Verify the heap-specific part of a metadata page. + * + * PUBLIC: int __heap_vrfy_meta __P((DB *, VRFY_DBINFO *, HEAPMETA *, + * PUBLIC: db_pgno_t, u_int32_t)); + */ +int +__heap_vrfy_meta(dbp, vdp, meta, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + HEAPMETA *meta; + db_pgno_t pgno; + u_int32_t flags; +{ + HEAP *h; + VRFY_PAGEINFO *pip; + db_pgno_t last_pgno, max_pgno, npgs; + int isbad, ret; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + isbad = 0; + /* + * Heap can't be used in subdatabases, so if this isn't set + * something very odd is going on. + */ + if (!F_ISSET(pip, VRFY_INCOMPLETE)) + EPRINT((dbp->env, DB_STR_A("1156", + "Page %lu: Heap databases must be one-per-file", + "%lu"), (u_long)pgno)); + + /* + * Check that nregions is correct. The last page in the database must + * belong to the nregion-th page. + */ + h = (HEAP *)dbp->heap_internal; + h->region_size = meta->region_size; + last_pgno = meta->dbmeta.last_pgno; + if (meta->nregions != HEAP_REGION_NUM(dbp, last_pgno)) { + EPRINT((dbp->env, DB_STR_A("1157", + "Page %lu: Number of heap regions incorrect", + "%lu"), (u_long)pgno)); + isbad = 1; + } + + /* + * Check that last_pgno doesn't surpass the end of a fixed size + * database. + */ + if (meta->gbytes != 0 || meta->bytes != 0) { + /* + * We don't have to worry about rounding with gbytes, as pgsize + * is always a multiple of 2, but we round down if bytes isn't + * a multiple of the page size. + */ + npgs = (db_pgno_t)(meta->gbytes * (GIGABYTE / dbp->pgsize)); + npgs += (db_pgno_t)(meta->bytes / dbp->pgsize); + max_pgno = npgs - 1; + if (last_pgno > max_pgno) { + EPRINT((dbp->env, DB_STR_A("1158", + "Page %lu: last_pgno beyond end of fixed size heap", + "%lu"), (u_long)pgno)); + isbad = 1; + } + } + + if (LF_ISSET(DB_SALVAGE)) + ret = __db_salvage_markdone(vdp, pgno); + + return (ret == 0 && isbad == 1 ? DB_VERIFY_BAD : ret); +} + +/* + * __heap_vrfy -- + * Verify a heap data or internal page. + * + * PUBLIC: int __heap_vrfy __P((DB *, + * PUBLIC: VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); + */ +int +__heap_vrfy(dbp, vdp, h, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *h; + db_pgno_t pgno; + u_int32_t flags; +{ + HEAPHDR *hdr; + int cnt, i, j, ret; + db_indx_t *offsets, *offtbl, end; + + if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) + goto err; + + if (TYPE(h) == P_IHEAP) + /* Nothing to verify on a region page. */ + return (0); + + offtbl = HEAP_OFFSETTBL(dbp, h); + + if ((ret = __os_malloc(dbp->env, + NUM_ENT(h) * sizeof(db_indx_t), &offsets)) != 0) + goto err; + + /* + * Build a sorted list of all the offsets in the table. Entries in the + * offset table are not always sorted. While we're here, check that + * flags are sane. + */ + cnt = 0; + for (i = 0; i <= HEAP_HIGHINDX(h); i++) { + if (offtbl[i] == 0) + /* Unused index. */ + continue; + if (cnt >= NUM_ENT(h)) { + /* Unexpected entry in the offset table. */ + EPRINT((dbp->env, DB_STR_A("1159", + "Page %lu: incorrect number of entries in page's offset table", + "%lu"), (u_long)pgno)); + ret = DB_VERIFY_BAD; + goto err; + } + hdr = (HEAPHDR *)P_ENTRY(dbp, h, i); + if (!F_ISSET(hdr, HEAP_RECSPLIT) && + F_ISSET(hdr, HEAP_RECFIRST | HEAP_RECLAST)) { + EPRINT((dbp->env, DB_STR_A("1165", + "Page %lu: record %lu has invalid flags", + "%lu %lu"), (u_long)pgno, (u_long)i)); + ret = DB_VERIFY_BAD; + goto err; + } + + offsets[cnt] = offtbl[i]; + cnt++; + } + if (cnt == 0) { + /* Empty page. */ + ret = 0; + goto err; + } + qsort(offsets, cnt, sizeof(db_indx_t), __heap_verify_offset_cmp); + + /* + * Now check that the record at each offset does not overlap the next + * record. We can't use the P_ENTRY macro because we've kept track of + * the offsets, not the indexes. + */ + for (i = 0; i < cnt - 1; i++) { + hdr = (HEAPHDR *)((u_int8_t *)h + offsets[i]); + end = offsets[i] + HEAP_HDRSIZE(hdr) + hdr->size; + if (end > offsets[i+1]) { + /* + * Find the record number for this offset, for the error + * msg. + */ + for (j = 0; j < HEAP_HIGHINDX(h); j++) + if (offtbl[j] == offsets[i]) + break; + EPRINT((dbp->env, DB_STR_A("1160", + "Page %lu: record %lu (length %lu) overlaps next record", + "%lu %lu %lu"), + (u_long)pgno, (u_long)j, (u_long)hdr->size)); + ret = DB_VERIFY_BAD; + } + } + + /* Finally, check that the last record doesn't overflow the page */ + hdr = (HEAPHDR *)((u_int8_t *)h + offsets[i]); + end = offsets[i] + HEAP_HDRSIZE(hdr) + hdr->size; + if (end > dbp->pgsize) { + /* Find the record number for this offset, for the error msg. */ + for (j = 0; j < HEAP_HIGHINDX(h); j++) + if (offtbl[j] == offsets[i]) + break; + EPRINT((dbp->env, DB_STR_A("1161", + "Page %lu: record %lu (length %lu) beyond end of page", + "%lu %lu %lu"), + (u_long)pgno, (u_long)j, (u_long)hdr->size)); + ret = DB_VERIFY_BAD; + } + + err: __os_free(dbp->env, offsets); + return (ret); +} + +static int +__heap_verify_offset_cmp(off1, off2) + const void *off1; + const void *off2; +{ + return (*(db_indx_t *)off1 - *(db_indx_t *)off2); +} + +/* + * __heap_vrfy_structure -- + * Verify the structure of a heap database. + * + * PUBLIC: int __heap_vrfy_structure __P((DB *, VRFY_DBINFO *, u_int32_t)); + */ +int +__heap_vrfy_structure(dbp, vdp, flags) + DB *dbp; + VRFY_DBINFO *vdp; + u_int32_t flags; +{ + VRFY_PAGEINFO *pip; + db_pgno_t i, next_region, high_pgno; + int ret, isbad; + + isbad = 0; + + if ((ret = __db_vrfy_getpageinfo(vdp, PGNO_BASE_MD, &pip)) != 0) + return (ret); + + if (pip->type != P_HEAPMETA) { + EPRINT((dbp->env, DB_STR_A("1162", + "Page %lu: heap database has no meta page", "%lu"), + (u_long)PGNO_BASE_MD)); + isbad = 1; + goto err; + } + + if ((ret = __db_vrfy_pgset_inc( + vdp->pgset, vdp->thread_info, vdp->txn, 0)) != 0) + goto err; + + /* + * Not much structure to verify. Just make sure region pages are where + * they're supposed to be. + */ + next_region = FIRST_HEAP_RPAGE; + high_pgno = 0; + for (i = 1; i <= vdp->last_pgno; i++) { + /* Send feedback to the application about our progress. */ + if (!LF_ISSET(DB_SALVAGE)) + __db_vrfy_struct_feedback(dbp, vdp); + + if ((ret = __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0 || + (ret = __db_vrfy_getpageinfo(vdp, i, &pip)) != 0) + return (ret); + if (i != next_region && + pip->type != P_HEAP && pip->type != P_INVALID) { + EPRINT((dbp->env, DB_STR_A("1163", + "Page %lu: heap database page of incorrect type %lu", + "%lu %lu"), (u_long)i, (u_long)pip->type)); + isbad = 1; + } else if (i == next_region && pip->type != P_IHEAP) { + EPRINT((dbp->env, DB_STR_A("1164", + "Page %lu: heap database missing region page (page type %lu)", + "%lu %lu"), (u_long)i, (u_long)pip->type)); + isbad = 1; + } else if ((ret = __db_vrfy_pgset_inc(vdp->pgset, + vdp->thread_info, vdp->txn, i)) != 0) + goto err; + + if (i == next_region) { + high_pgno = pip->prev_pgno; + next_region += HEAP_REGION_SIZE(dbp) + 1; + } else if (pip->type != P_INVALID && i > high_pgno) { + EPRINT((dbp->env, DB_STR_A("1166", + "Page %lu heap database page beyond high page in region", + "%lu"), (u_long) i)); + isbad = 1; + } + } + +err: if ((ret = __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0) + return (ret); + return (isbad == 1 ? DB_VERIFY_BAD : 0); +} + +/* + * __heap_salvage -- + * Safely dump out anything that looks like a record on an alleged heap + * data page. + * + * PUBLIC: int __heap_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, + * PUBLIC: PAGE *, void *, int (*)(void *, const void *), u_int32_t)); + */ +int +__heap_salvage(dbp, vdp, pgno, h, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + PAGE *h; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + DBT dbt; + HEAPHDR *hdr; + db_indx_t i, *offtbl; + int err_ret, ret, t_ret; + + COMPQUIET(flags, 0); + memset(&dbt, 0, sizeof(DBT)); + + offtbl = (db_indx_t *)HEAP_OFFSETTBL(dbp, h); + err_ret = ret = t_ret = 0; + + /* + * Walk the page, dumping non-split records and retrieving split records + * when the first piece is encountered, + */ + for (i = 0; i <= HEAP_HIGHINDX(h); i++) { + if (offtbl[i] == 0) + continue; + hdr = (HEAPHDR *)P_ENTRY(dbp, h, i); + if (F_ISSET(hdr, HEAP_RECSPLIT)) { + if (!F_ISSET(hdr, HEAP_RECFIRST)) + continue; + /* + * We don't completely trust hdr->tsize if it's huge, + * gsplit() is able to realloc as needed. + */ + dbt.size = ((HEAPSPLITHDR *)hdr)->tsize; + if (dbt.size > dbp->pgsize * 4) + dbt.size = dbp->pgsize * 4; + if ((ret = + __os_malloc(dbp->env, dbt.size, &dbt.data)) != 0) + goto err; + __heap_safe_gsplit(dbp, vdp, h, i, &dbt); + } else { + dbt.data = (u_int8_t *)hdr + HEAP_HDRSIZE(hdr); + dbt.size = hdr->size; + } + + if ((ret = __db_vrfy_prdbt(&dbt, + 0, " ", handle, callback, 0, 0, vdp)) != 0) + err_ret = ret; + if (F_ISSET(hdr, HEAP_RECSPLIT)) + __os_free(dbp->env, dbt.data); + } + +err: if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) + return (t_ret); + return ((ret == 0 && err_ret != 0) ? err_ret : ret); +} + +/* + * __heap_safe_gsplit -- + * Given a page and offset, retrieve a split record. + */ +static int +__heap_safe_gsplit(dbp, vdp, h, i, dbt) + DB *dbp; + VRFY_DBINFO *vdp; + PAGE *h; + db_indx_t i; + DBT *dbt; +{ + DB_MPOOLFILE *mpf; + HEAPSPLITHDR *hdr; + int gotpg, ret, t_ret; + u_int32_t bufsz, reclen; + u_int8_t *buf; + + mpf = dbp->mpf; + buf = dbt->data; + bufsz = dbt->size; + dbt->size = 0; + ret = 0; + + gotpg = 0; + for (;;) { + hdr = (HEAPSPLITHDR *)P_ENTRY(dbp, h, i); + reclen = hdr->std_hdr.size; + /* First copy the data from this page */ + if (dbt->size + reclen > bufsz) { + bufsz = dbt->size + reclen; + if ((ret = __os_realloc( + dbp->env, bufsz, &dbt->data)) != 0) + goto err; + buf = (u_int8_t *)dbt->data + dbt->size; + } + memcpy(buf, (u_int8_t *)hdr + sizeof(HEAPSPLITHDR), reclen); + buf += reclen; + dbt->size += reclen; + + /* If we're not at the end of the record, grab the next page. */ + if (F_ISSET(&hdr->std_hdr, HEAP_RECLAST)) + break; + if (gotpg && (ret = __memp_fput(mpf, + vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0) + return (ret); + gotpg = 0; + if ((ret = __memp_fget(mpf, + &hdr->nextpg, vdp->thread_info, NULL, 0, &h)) != 0) + goto err; + gotpg = 1; + i = hdr->nextindx; + } + +err: if (gotpg && (t_ret = __memp_fput( + mpf, vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0 && ret == 0) + t_ret = ret; + return (ret); +} + +/* + * __heap_meta2pgset -- + * Given a known-good meta page, populate pgsetp with the db_pgno_t's + * corresponding to the pages in the heap. This is just all pages in the + * database. + * + * PUBLIC: int __heap_meta2pgset __P((DB *, VRFY_DBINFO *, HEAPMETA *, DB *)); + */ +int +__heap_meta2pgset(dbp, vdp, heapmeta, pgset) + DB *dbp; + VRFY_DBINFO *vdp; + HEAPMETA *heapmeta; + DB *pgset; +{ + db_pgno_t pgno, last; + int ret; + + COMPQUIET(dbp, NULL); + + last = heapmeta->dbmeta.last_pgno; + ret = 0; + + for (pgno = 1; pgno <= last; pgno++) + if ((ret = __db_vrfy_pgset_inc( + pgset, vdp->thread_info, vdp->txn, pgno)) != 0) + break; + return (ret); +} diff --git a/src/hmac/hmac.c b/src/hmac/hmac.c new file mode 100644 index 00000000..14df898d --- /dev/null +++ b/src/hmac/hmac.c @@ -0,0 +1,234 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * Some parts of this code originally written by Adam Stubblefield, + * -- astubble@rice.edu. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" /* for hash.h only */ +#include "dbinc/hash.h" +#include "dbinc/hmac.h" +#include "dbinc/log.h" + +#define HMAC_OUTPUT_SIZE 20 +#define HMAC_BLOCK_SIZE 64 + +static void __db_hmac __P((u_int8_t *, u_int8_t *, size_t, u_int8_t *)); + +/* + * !!! + * All of these functions use a ctx structure on the stack. The __db_SHA1Init + * call does not initialize the 64-byte buffer portion of it. The + * underlying SHA1 functions will properly pad the buffer if the data length + * is less than 64-bytes, so there isn't a chance of reading uninitialized + * memory. Although it would be cleaner to do a memset(ctx.buffer, 0, 64) + * we do not want to incur that penalty if we don't have to for performance. + */ + +/* + * __db_hmac -- + * Do a hashed MAC. + */ +static void +__db_hmac(k, data, data_len, mac) + u_int8_t *k, *data, *mac; + size_t data_len; +{ + SHA1_CTX ctx; + u_int8_t key[HMAC_BLOCK_SIZE]; + u_int8_t ipad[HMAC_BLOCK_SIZE]; + u_int8_t opad[HMAC_BLOCK_SIZE]; + u_int8_t tmp[HMAC_OUTPUT_SIZE]; + int i; + + memset(key, 0x00, HMAC_BLOCK_SIZE); + memset(ipad, 0x36, HMAC_BLOCK_SIZE); + memset(opad, 0x5C, HMAC_BLOCK_SIZE); + + memcpy(key, k, HMAC_OUTPUT_SIZE); + + for (i = 0; i < HMAC_BLOCK_SIZE; i++) { + ipad[i] ^= key[i]; + opad[i] ^= key[i]; + } + + __db_SHA1Init(&ctx); + __db_SHA1Update(&ctx, ipad, HMAC_BLOCK_SIZE); + __db_SHA1Update(&ctx, data, data_len); + __db_SHA1Final(tmp, &ctx); + __db_SHA1Init(&ctx); + __db_SHA1Update(&ctx, opad, HMAC_BLOCK_SIZE); + __db_SHA1Update(&ctx, tmp, HMAC_OUTPUT_SIZE); + __db_SHA1Final(mac, &ctx); + return; +} + +/* + * __db_chksum -- + * Create a MAC/SHA1 checksum. + * + * PUBLIC: void __db_chksum __P((void *, + * PUBLIC: u_int8_t *, size_t, u_int8_t *, u_int8_t *)); + */ +void +__db_chksum(hdr, data, data_len, mac_key, store) + void *hdr; + u_int8_t *data; + size_t data_len; + u_int8_t *mac_key; + u_int8_t *store; +{ + int sumlen; + u_int32_t hash4; + + /* + * Since the checksum might be on a page of data we are checksumming + * we might be overwriting after checksumming, we zero-out the + * checksum value so that we can have a known value there when + * we verify the checksum. + * If we are passed a log header XOR in prev and len so we have + * some redundancy on these fields. Mostly we need to be sure that + * we detect a race when doing hot backups and reading a live log + * file. + */ + if (mac_key == NULL) + sumlen = sizeof(u_int32_t); + else + sumlen = DB_MAC_KEY; + if (hdr == NULL) + memset(store, 0, sumlen); + else + store = ((HDR*)hdr)->chksum; + if (mac_key == NULL) { + /* Just a hash, no MAC */ + hash4 = __ham_func4(NULL, data, (u_int32_t)data_len); + if (hdr != NULL) + hash4 ^= ((HDR *)hdr)->prev ^ ((HDR *)hdr)->len; + memcpy(store, &hash4, sumlen); + } else { + __db_hmac(mac_key, data, data_len, store); + if (hdr != 0) { + ((int *)store)[0] ^= ((HDR *)hdr)->prev; + ((int *)store)[1] ^= ((HDR *)hdr)->len; + } + } + return; +} +/* + * __db_derive_mac -- + * Create a MAC/SHA1 key. + * + * PUBLIC: void __db_derive_mac __P((u_int8_t *, size_t, u_int8_t *)); + */ +void +__db_derive_mac(passwd, plen, mac_key) + u_int8_t *passwd; + size_t plen; + u_int8_t *mac_key; +{ + SHA1_CTX ctx; + + /* Compute the MAC key. mac_key must be 20 bytes. */ + __db_SHA1Init(&ctx); + __db_SHA1Update(&ctx, passwd, plen); + __db_SHA1Update(&ctx, (u_int8_t *)DB_MAC_MAGIC, strlen(DB_MAC_MAGIC)); + __db_SHA1Update(&ctx, passwd, plen); + __db_SHA1Final(mac_key, &ctx); + + return; +} + +/* + * __db_check_chksum -- + * Verify a checksum. + * + * Return 0 on success, >0 (errno) on error, -1 on checksum mismatch. + * + * PUBLIC: int __db_check_chksum __P((ENV *, + * PUBLIC: void *, DB_CIPHER *, u_int8_t *, void *, size_t, int)); + */ +int +__db_check_chksum(env, hdr, db_cipher, chksum, data, data_len, is_hmac) + ENV *env; + void *hdr; + DB_CIPHER *db_cipher; + u_int8_t *chksum; + void *data; + size_t data_len; + int is_hmac; +{ + int ret; + size_t sum_len; + u_int32_t hash4; + u_int8_t *mac_key, old[DB_MAC_KEY], new[DB_MAC_KEY]; + + /* + * If we are just doing checksumming and not encryption, then checksum + * is 4 bytes. Otherwise, it is DB_MAC_KEY size. Check for illegal + * combinations of crypto/non-crypto checksums. + */ + if (is_hmac == 0) { + if (db_cipher != NULL) { + __db_errx(env, DB_STR("0195", + "Unencrypted checksum with a supplied encryption key")); + return (EINVAL); + } + sum_len = sizeof(u_int32_t); + mac_key = NULL; + } else { + if (db_cipher == NULL) { + __db_errx(env, DB_STR("0196", + "Encrypted checksum: no encryption key specified")); + return (EINVAL); + } + sum_len = DB_MAC_KEY; + mac_key = db_cipher->mac_key; + } + + /* + * !!! + * Since the checksum might be on the page, we need to have known data + * there so that we can generate the same original checksum. We zero + * it out, just like we do in __db_chksum above. + * If there is a log header, XOR the prev and len fields. + */ +retry: + if (hdr == NULL) { + memcpy(old, chksum, sum_len); + memset(chksum, 0, sum_len); + chksum = old; + } + + if (mac_key == NULL) { + /* Just a hash, no MAC */ + hash4 = __ham_func4(NULL, data, (u_int32_t)data_len); + if (hdr != NULL) + LOG_HDR_SUM(0, hdr, &hash4); + ret = memcmp((u_int32_t *)chksum, &hash4, sum_len) ? -1 : 0; + } else { + __db_hmac(mac_key, data, data_len, new); + if (hdr != NULL) + LOG_HDR_SUM(1, hdr, new); + ret = memcmp(chksum, new, sum_len) ? -1 : 0; + } + /* + * !!! + * We might be looking at an old log even with the new + * code. So, if we have a hdr, and the checksum doesn't + * match, try again without a hdr. + */ + if (hdr != NULL && ret != 0) { + hdr = NULL; + goto retry; + } + + return (ret); +} diff --git a/src/hmac/sha1.c b/src/hmac/sha1.c new file mode 100644 index 00000000..76069694 --- /dev/null +++ b/src/hmac/sha1.c @@ -0,0 +1,289 @@ +/* + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/hmac.h" + +/* +SHA-1 in C +By Steve Reid +100% Public Domain + +----------------- +Modified 7/98 +By James H. Brown +Still 100% Public Domain + +Corrected a problem which generated improper hash values on 16 bit machines +Routine SHA1Update changed from + void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int +len) +to + void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned +long len) + +The 'len' parameter was declared an int which works fine on 32 bit machines. +However, on 16 bit machines an int is too small for the shifts being done +against +it. This caused the hash function to generate incorrect values if len was +greater than 8191 (8K - 1) due to the 'len << 3' on line 3 of SHA1Update(). + +Since the file IO in main() reads 16K at a time, any file 8K or larger would +be guaranteed to generate the wrong hash (e.g. Test Vector #3, a million +"a"s). + +I also changed the declaration of variables i & j in SHA1Update to +unsigned long from unsigned int for the same reason. + +These changes should make no difference to any 32 bit implementations since +an +int and a long are the same size in those environments. + +-- +I also corrected a few compiler warnings generated by Borland C. +1. Added #include for exit() prototype +2. Removed unused variable 'j' in SHA1Final +3. Changed exit(0) to return (0) at end of main. + +ALL changes I made can be located by searching for comments containing 'JHB' +----------------- +Modified 8/98 +By Steve Reid +Still 100% public domain + +1- Removed #include and used return () instead of exit() +2- Fixed overwriting of finalcount in SHA1Final() (discovered by Chris Hall) +3- Changed email address from steve@edmweb.com to sreid@sea-to-sky.net + +----------------- +Modified 4/01 +By Saul Kravitz +Still 100% PD +Modified to run on Compaq Alpha hardware. + +*/ + +/* +Test Vectors (from FIPS PUB 180-1) +"abc" + A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D +"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" + 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 +A million repetitions of "a" + 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F +*/ + +#define SHA1HANDSOFF + +/* #include */ /* prototype for exit() - JHB */ +/* Using return () instead of exit() - SWR */ + +#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits)))) + +/* blk0() and blk() perform the initial expand. */ +/* I got the idea of expanding during the round function from SSLeay */ +#define blk0(i) is_bigendian ? block->l[i] : \ + (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \ + |(rol(block->l[i],8)&0x00FF00FF)) +#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \ + ^block->l[(i+2)&15]^block->l[i&15],1)) + +/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ +#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5); \ + w=rol(w,30); +#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5); \ + w=rol(w,30); +#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30); +#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5); \ + w=rol(w,30); +#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30); + +#ifdef VERBOSE /* SAK */ +static void __db_SHAPrintContext __P((SHA1_CTX *, char *)); +static void +__db_SHAPrintContext(context, msg) + SHA1_CTX *context; + char *msg; +{ + printf("%s (%d,%d) %x %x %x %x %x\n", + msg, + context->count[0], context->count[1], + context->state[0], + context->state[1], + context->state[2], + context->state[3], + context->state[4]); +} +#endif + +/* Hash a single 512-bit block. This is the core of the algorithm. */ + +/* + * __db_SHA1Transform -- + * + * PUBLIC: void __db_SHA1Transform __P((u_int32_t *, unsigned char *)); + */ +void +__db_SHA1Transform(state, buffer) + u_int32_t *state; + unsigned char *buffer; +{ +u_int32_t a, b, c, d, e; +typedef union { + unsigned char c[64]; + u_int32_t l[16]; +} CHAR64LONG16; +CHAR64LONG16* block; + int is_bigendian; +#ifdef SHA1HANDSOFF + unsigned char workspace[64]; + + block = (CHAR64LONG16*)workspace; + memcpy(block, buffer, 64); +#else + block = (CHAR64LONG16*)buffer; +#endif + is_bigendian = __db_isbigendian(); + + /* Copy context->state[] to working vars */ + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + /* 4 rounds of 20 operations each. Loop unrolled. */ + R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); + R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); + R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); + R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); + R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); + R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); + R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); + R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); + R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); + R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); + R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); + R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); + R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); + R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); + R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); + R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); + R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); + R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); + R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); + R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); + /* Add the working vars back into context.state[] */ + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + /* Wipe variables */ + a = b = c = d = e = 0; +} + +/* SHA1Init - Initialize new context */ + +/* + * __db_SHA1Init -- + * Initialize new context + * + * PUBLIC: void __db_SHA1Init __P((SHA1_CTX *)); + */ +void +__db_SHA1Init(context) + SHA1_CTX *context; +{ + /* SHA1 initialization constants */ + context->state[0] = 0x67452301; + context->state[1] = 0xEFCDAB89; + context->state[2] = 0x98BADCFE; + context->state[3] = 0x10325476; + context->state[4] = 0xC3D2E1F0; + context->count[0] = context->count[1] = 0; +} + +/* Run your data through this. */ + +/* + * __db_SHA1Update -- + * Run your data through this. + * + * PUBLIC: void __db_SHA1Update __P((SHA1_CTX *, unsigned char *, + * PUBLIC: size_t)); + */ +void +__db_SHA1Update(context, data, len) + SHA1_CTX *context; + unsigned char *data; + size_t len; +{ +u_int32_t i, j; /* JHB */ + +#ifdef VERBOSE + __db_SHAPrintContext(context, DB_STR_P("before")); +#endif + j = (context->count[0] >> 3) & 63; + if ((context->count[0] += (u_int32_t)len << 3) < (len << 3)) + context->count[1]++; + context->count[1] += (u_int32_t)(len >> 29); + if ((j + len) > 63) { + memcpy(&context->buffer[j], data, (i = 64-j)); + __db_SHA1Transform(context->state, context->buffer); + for ( ; i + 63 < len; i += 64) { + __db_SHA1Transform(context->state, &data[i]); + } + j = 0; + } + else i = 0; + memcpy(&context->buffer[j], &data[i], len - i); +#ifdef VERBOSE + __db_SHAPrintContext(context, DB_STR_P("after ")); +#endif +} + +/* Add padding and return the message digest. */ + +/* + * __db_SHA1Final -- + * Add padding and return the message digest. + * + * PUBLIC: void __db_SHA1Final __P((unsigned char *, SHA1_CTX *)); + */ +void +__db_SHA1Final(digest, context) + unsigned char *digest; + SHA1_CTX *context; +{ +u_int32_t i; /* JHB */ +unsigned char finalcount[8]; + + for (i = 0; i < 8; i++) { + finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] + >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */ + } + __db_SHA1Update(context, (unsigned char *)"\200", 1); + while ((context->count[0] & 504) != 448) { + __db_SHA1Update(context, (unsigned char *)"\0", 1); + } + __db_SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() +*/ + for (i = 0; i < 20; i++) { + digest[i] = (unsigned char) + ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255); + } + /* Wipe variables */ + i = 0; /* JHB */ + memset(context->buffer, 0, 64); + memset(context->state, 0, 20); + memset(context->count, 0, 8); + memset(finalcount, 0, 8); /* SWR */ +#ifdef SHA1HANDSOFF /* make SHA1Transform overwrite it's own static vars */ + __db_SHA1Transform(context->state, context->buffer); +#endif +} + +/*************************************************************/ diff --git a/src/lock/Design b/src/lock/Design new file mode 100644 index 00000000..8a52d625 --- /dev/null +++ b/src/lock/Design @@ -0,0 +1,301 @@ +Synchronization in the Locking Subsystem + +This is a document that describes how we implemented fine-grain locking +in the lock manager (that is, locking on a hash bucket level instead of +locking the entire region). We found that the increase in concurrency +was not sufficient to warrant the increase in complexity or the additional +cost of performing each lock operation. Therefore, we don't use this +any more. Should we have to do fine-grain locking in a future release, +this would be a reasonable starting point. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +1. Data structures +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +The lock manager maintains 3 different structures: + +Objects (__db_lockobj): + Describes an object that is locked. When used with DB, this consists + of a __db_ilock (a file identifier and a page number). + +Lockers (__db_locker): + Identifies a specific locker ID and maintains the head of a list of + locks held by a locker (for using during transaction commit/abort). + +Locks (__db_lock): + Describes a particular object lock held on behalf of a particular + locker id. + +Objects and Lockers reference Locks. + +These structures are organized via two synchronized hash tables. Each +hash table consists of two physical arrays: the array of actual hash +buckets and an array of mutexes so we can lock individual buckets, rather +than the whole table. + +One hash table contains Objects and the other hash table contains Lockers. +Objects contain two lists of locks, waiters and holders: holders currently +hold a lock on the Object, waiters are lock waiting to be granted. +Lockers are a single linked list that connects the Locks held on behalf +of the specific locker ID. + +In the diagram below: + +Locker ID #1 holds a lock on Object #1 (L1) and Object #2 (L5), and is +waiting on a lock on Object #1 (L3). + +Locker ID #2 holds a lock on Object #1 (L2) and is waiting on a lock for +Object #2 (L7). + +Locker ID #3 is waiting for a lock on Object #2 (L6). + + OBJECT ----------------------- + HASH | | + ----|------------- | + ________ _______ | | ________ | | + | |-->| O1 |--|---|-->| O2 | | | + |_______| |_____| | | |______| V | + | | W H--->L1->L2 W H--->L5 | holders + |_______| | | | | V + | | ------->L3 \ ------->L6------>L7 waiters + |_______| / \ \ + . . / \ \ + . . | \ \ + . . | \ ----------- + |_______| | -------------- | + | | ____|____ ___|_____ _|______ + |_______| | | | | | | + | | | LID1 | | LID2 | | LID3 | + |_______| |_______| |_______| |______| + ^ ^ ^ + | | | + ___|________________________|________|___ + LOCKER | | | | | | | | | + HASH | | | | | | | | | + | | | | | | | | | + |____|____|____|____|____|____|____|____| + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +2. Synchronization +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +There are four types of mutexes in the subsystem. + +Object mutexes; + These map one-to-one to each bucket in the Object hash table. + Holding a mutex on an Object bucket secures all the Objects in + that bucket as well as the Lock structures linked from those + Objects. All fields in the Locks EXCEPT the Locker links (the + links that attach Locks by Locker ID) are protected by these + mutexes. + +Locker mutexes: + These map one-to-one to each bucket in the Locker hash table. + Holding a mutex on a Locker bucket secures the Locker structures + and the Locker links in the Locks. + +Memory mutex: + This mutex allows calls to allocate/free memory, i.e. calls to + __db_shalloc and __db_shalloc_free, as well as manipulation of + the Object, Locker and Lock free lists. + +Region mutex: + This mutex is currently only used to protect the locker ids. + It may also be needed later to provide exclusive access to + the region for deadlock detection. + +Creating or removing a Lock requires locking both the Object lock and the +Locker lock (and eventually the shalloc lock to return the item to the +free list). + +The locking hierarchy is as follows: + + The Region mutex may never be acquired after any other mutex. + + The Object mutex may be acquired after the Region mutex. + + The Locker mutex may be acquired after the Region and Object + mutexes. + + The Memory mutex may be acquired after any mutex. + +So, if both and Object mutex and a Locker mutex are going to be acquired, +the Object mutex must be acquired first. + +The Memory mutex may be acquired after any other mutex, but no other mutexes +can be acquired once the Memory mutex is held. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +3. The algorithms: +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +The locking subsystem supports four basic operations: + Get a Lock (lock_get) + + Release a Lock (lock_put) + + Release all the Locks on a specific Object (lock_vec) + + Release all the Locks for a specific Locker (lock_vec) + +Get a lock: + Acquire Object bucket mutex. + Acquire Locker bucket mutex. + + Acquire Memory mutex. + If the Object does not exist + Take an Object off the freelist. + If the Locker doesn't exist + Take a Locker off the freelist. + Take a Lock off the free list. + Release Memory mutex. + + Add Lock to the Object list. + Add Lock to the Locker list. + Release Locker bucket mutex + + If the lock cannot be granted + Release Object bucket mutex + Acquire lock mutex (blocks) + + Acquire Object bucket mutex + If lock acquisition did not succeed (e.g, deadlock) + Acquire Locker bucket mutex + If locker should be destroyed + Remove locker from hash table + Acquire Memory mutex + Return locker to free list + Release Memory mutex + Release Locker bucket mutex + + If object should be released + Acquire Memory mutex + Return object to free list + Release Memory mutex + + Release Object bucket mutex + +Release a lock: + Acquire Object bucket mutex. + (Requires that we be able to find the Object hash bucket + without looking inside the Lock itself.) + + If releasing a single lock and the user provided generation number + doesn't match the Lock's generation number, the Lock has been reused + and we return failure. + + Enter lock_put_internal: + if the Lock is still on the Object's lists: + Increment Lock's generation number. + Remove Lock from the Object's list (NULL link fields). + Promote locks for the Object. + + Enter locker_list_removal + Acquire Locker bucket mutex. + If Locker doesn't exist: + Release Locker bucket mutex + Release Object bucket mutex + Return error. + Else if Locker marked as deleted: + dont_release = TRUE + Else + Remove Lock from Locker list. + If Locker has no more locks + Remove Locker from table. + Acquire Memory mutex. + Return Locker to free list + Release Memory mutex + Release Locker bucket mutex. + Exit locker_list_removal + + If (!dont_release) + Acquire Memory mutex + Return Lock to free list + Release Memory mutex + + Exit lock_put_internal + + Release Object bucket mutex + +Release all the Locks on a specific Object (lock_vec, DB_PUT_ALL_OBJ): + + Acquire Object bucket mutex. + + For each lock on the waiter list: + lock_put_internal + For each lock on the holder list: + lock_put_internal + + Release Object bucket mutex. + +Release all the Locks for a specific Locker (lock_vec, DB_PUT_ALL): + + Acquire Locker bucket mutex. + Mark Locker deleted. + Release Locker mutex. + + For each lock on the Locker's list: + Remove from locker's list + (The lock could get put back on the free list in + lock_put and then could get reallocated and the + act of setting its locker links could clobber us.) + Perform "Release a Lock" above: skip locker_list_removal. + + Acquire Locker bucket mutex. + Remove Locker + Release Locker mutex. + + Acquire Memory mutex + Return Locker to free list + Release Memory mutex + +Deadlock detection (lock_detect): + + For each bucket in Object table + Acquire the Object bucket mutex. + create waitsfor + + For each bucket in Object table + Release the Object mutex. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +FAQ: +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +Q: Why do you need generation numbers? +A: If a lock has been released due to a transaction abort (potentially in a + different process), and then lock is released by a thread of control + unaware of the abort, the lock might have potentially been re-allocated + to a different object. The generation numbers detect this problem. + + Note, we assume that reads/writes of lock generation numbers are atomic, + if they are not, it is theoretically possible that a re-allocated lock + could be mistaken for another lock. + +Q: Why is is safe to walk the Locker list without holding any mutexes at + all? +A: Locks are created with both the Object and Locker bucket mutexes held. + Once created, they removed in two ways: + + a) when a specific Lock is released, in which case, the Object and + Locker bucket mutexes are again held, and + + b) when all Locks for a specific Locker Id is released. + + In case b), the Locker bucket mutex is held while the Locker chain is + marked as "destroyed", which blocks any further access to the Locker + chain. Then, each individual Object bucket mutex is acquired when each + individual Lock is removed. + +Q: What are the implications of doing fine grain locking? + +A: Since we no longer globally lock the entire region, lock_vec will no + longer be atomic. We still execute the items in a lock_vec in order, + so things like lock-coupling still work, but you can't make any + guarantees about atomicity. + +Q: How do I configure for FINE_GRAIN locking? + +A: We currently do not support any automatic configuration for FINE_GRAIN + locking. When we do, will need to document that atomicity discussion + listed above (it is bug-report #553). + +Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. diff --git a/src/lock/lock.c b/src/lock/lock.c new file mode 100644 index 00000000..7a55307e --- /dev/null +++ b/src/lock/lock.c @@ -0,0 +1,2019 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" + +static int __lock_allocobj __P((DB_LOCKTAB *, u_int32_t)); +static int __lock_alloclock __P((DB_LOCKTAB *, u_int32_t)); +static int __lock_freelock __P((DB_LOCKTAB *, + struct __db_lock *, DB_LOCKER *, u_int32_t)); +static int __lock_getobj + __P((DB_LOCKTAB *, const DBT *, u_int32_t, int, DB_LOCKOBJ **)); +static int __lock_get_api __P((ENV *, + u_int32_t, u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *)); +static int __lock_inherit_locks __P ((DB_LOCKTAB *, DB_LOCKER *, u_int32_t)); +static int __lock_same_family __P((DB_LOCKTAB *, DB_LOCKER *, DB_LOCKER *)); +static int __lock_put_internal __P((DB_LOCKTAB *, + struct __db_lock *, u_int32_t, u_int32_t)); +static int __lock_put_nolock __P((ENV *, DB_LOCK *, int *, u_int32_t)); +static int __lock_remove_waiter __P((DB_LOCKTAB *, + DB_LOCKOBJ *, struct __db_lock *, db_status_t)); +static int __lock_trade __P((ENV *, DB_LOCK *, DB_LOCKER *)); +static int __lock_vec_api __P((ENV *, + u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); + +static const char __db_lock_invalid[] = "%s: Lock is no longer valid"; +static const char __db_locker_invalid[] = "Locker is not valid"; + +#ifdef DEBUG +extern void __db_loadme (void); +#endif + +/* + * __lock_vec_pp -- + * ENV->lock_vec pre/post processing. + * + * PUBLIC: int __lock_vec_pp __P((DB_ENV *, + * PUBLIC: u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); + */ +int +__lock_vec_pp(dbenv, lid, flags, list, nlist, elistp) + DB_ENV *dbenv; + u_int32_t lid, flags; + int nlist; + DB_LOCKREQ *list, **elistp; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lk_handle, "DB_ENV->lock_vec", DB_INIT_LOCK); + + /* Validate arguments. */ + if ((ret = __db_fchk(env, + "DB_ENV->lock_vec", flags, DB_LOCK_NOWAIT)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, + (__lock_vec_api(env, lid, flags, list, nlist, elistp)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +static int +__lock_vec_api(env, lid, flags, list, nlist, elistp) + ENV *env; + u_int32_t lid, flags; + int nlist; + DB_LOCKREQ *list, **elistp; +{ + DB_LOCKER *sh_locker; + int ret; + + if ((ret = + __lock_getlocker(env->lk_handle, lid, 0, &sh_locker)) == 0) + ret = __lock_vec(env, sh_locker, flags, list, nlist, elistp); + return (ret); +} + +/* + * __lock_vec -- + * ENV->lock_vec. + * + * Vector lock routine. This function takes a set of operations + * and performs them all at once. In addition, lock_vec provides + * functionality for lock inheritance, releasing all locks for a + * given locker (used during transaction commit/abort), releasing + * all locks on a given object, and generating debugging information. + * + * PUBLIC: int __lock_vec __P((ENV *, + * PUBLIC: DB_LOCKER *, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); + */ +int +__lock_vec(env, sh_locker, flags, list, nlist, elistp) + ENV *env; + DB_LOCKER *sh_locker; + u_int32_t flags; + int nlist; + DB_LOCKREQ *list, **elistp; +{ + struct __db_lock *lp, *next_lock; + DB_LOCK lock; DB_LOCKOBJ *sh_obj; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + DBT *objlist, *np; + u_int32_t ndx; + int did_abort, i, ret, run_dd, upgrade, writes; + + /* Check if locks have been globally turned off. */ + if (F_ISSET(env->dbenv, DB_ENV_NOLOCKING)) + return (0); + + lt = env->lk_handle; + region = lt->reginfo.primary; + + run_dd = 0; + LOCK_SYSTEM_LOCK(lt, region); + for (i = 0, ret = 0; i < nlist && ret == 0; i++) + switch (list[i].op) { + case DB_LOCK_GET_TIMEOUT: + LF_SET(DB_LOCK_SET_TIMEOUT); + /* FALLTHROUGH */ + case DB_LOCK_GET: + if (IS_RECOVERING(env)) { + LOCK_INIT(list[i].lock); + break; + } + ret = __lock_get_internal(lt, + sh_locker, flags, list[i].obj, + list[i].mode, list[i].timeout, &list[i].lock); + break; + case DB_LOCK_INHERIT: + ret = __lock_inherit_locks(lt, sh_locker, flags); + break; + case DB_LOCK_PUT: + ret = __lock_put_nolock(env, + &list[i].lock, &run_dd, flags); + break; + case DB_LOCK_PUT_ALL: /* Put all locks. */ + case DB_LOCK_PUT_READ: /* Put read locks. */ + case DB_LOCK_UPGRADE_WRITE: + /* Upgrade was_write and put read locks. */ + /* + * Since the locker may hold no + * locks (i.e., you could call abort before you've + * done any work), it's perfectly reasonable for there + * to be no locker; this is not an error. + */ + if (sh_locker == NULL) + /* + * If ret is set, then we'll generate an + * error. If it's not set, we have nothing + * to do. + */ + break; + upgrade = 0; + writes = 1; + if (list[i].op == DB_LOCK_PUT_READ) + writes = 0; + else if (list[i].op == DB_LOCK_UPGRADE_WRITE) { + if (F_ISSET(sh_locker, DB_LOCKER_DIRTY)) + upgrade = 1; + writes = 0; + } + objlist = list[i].obj; + if (objlist != NULL) { + /* + * We know these should be ilocks, + * but they could be something else, + * so allocate room for the size too. + */ + objlist->size = + sh_locker->nwrites * sizeof(DBT); + if ((ret = __os_malloc(env, + objlist->size, &objlist->data)) != 0) + goto up_done; + memset(objlist->data, 0, objlist->size); + np = (DBT *) objlist->data; + } else + np = NULL; + + /* Now traverse the locks, releasing each one. */ + for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); + lp != NULL; lp = next_lock) { + sh_obj = SH_OFF_TO_PTR(lp, lp->obj, DB_LOCKOBJ); + next_lock = SH_LIST_NEXT(lp, + locker_links, __db_lock); + if (writes == 1 || + lp->mode == DB_LOCK_READ || + lp->mode == DB_LOCK_READ_UNCOMMITTED) { + SH_LIST_REMOVE(lp, + locker_links, __db_lock); + sh_obj = SH_OFF_TO_PTR(lp, + lp->obj, DB_LOCKOBJ); + ndx = sh_obj->indx; + OBJECT_LOCK_NDX(lt, region, ndx); + /* + * We are not letting lock_put_internal + * unlink the lock, so we'll have to + * update counts here. + */ + if (lp->status == DB_LSTAT_HELD) { + DB_ASSERT(env, + sh_locker->nlocks != 0); + sh_locker->nlocks--; + if (IS_WRITELOCK(lp->mode)) + sh_locker->nwrites--; + } + ret = __lock_put_internal(lt, lp, + sh_obj->indx, + DB_LOCK_FREE | DB_LOCK_DOALL); + OBJECT_UNLOCK(lt, region, ndx); + if (ret != 0) + break; + continue; + } + if (objlist != NULL) { + DB_ASSERT(env, (u_int8_t *)np < + (u_int8_t *)objlist->data + + objlist->size); + np->data = SH_DBT_PTR(&sh_obj->lockobj); + np->size = sh_obj->lockobj.size; + np++; + } + } + if (ret != 0) + goto up_done; + + if (objlist != NULL) + if ((ret = __lock_fix_list(env, + objlist, sh_locker->nwrites)) != 0) + goto up_done; + switch (list[i].op) { + case DB_LOCK_UPGRADE_WRITE: + /* + * Upgrade all WWRITE locks to WRITE so + * that we can abort a transaction which + * was supporting dirty readers. + */ + if (upgrade != 1) + goto up_done; + SH_LIST_FOREACH(lp, &sh_locker->heldby, + locker_links, __db_lock) { + if (lp->mode != DB_LOCK_WWRITE) + continue; + lock.off = R_OFFSET(<->reginfo, lp); + lock.gen = lp->gen; + F_SET(sh_locker, DB_LOCKER_INABORT); + if ((ret = __lock_get_internal(lt, + sh_locker, flags | DB_LOCK_UPGRADE, + NULL, DB_LOCK_WRITE, 0, &lock)) !=0) + break; + } + up_done: + /* FALLTHROUGH */ + case DB_LOCK_PUT_READ: + case DB_LOCK_PUT_ALL: + break; + default: + break; + } + break; + case DB_LOCK_PUT_OBJ: + /* Remove all the locks associated with an object. */ + OBJECT_LOCK(lt, region, list[i].obj, ndx); + if ((ret = __lock_getobj(lt, list[i].obj, + ndx, 0, &sh_obj)) != 0 || sh_obj == NULL) { + if (ret == 0) + ret = EINVAL; + OBJECT_UNLOCK(lt, region, ndx); + break; + } + + /* + * Go through both waiters and holders. Don't bother + * to run promotion, because everyone is getting + * released. The processes waiting will still get + * awakened as their waiters are released. + */ + for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock); + ret == 0 && lp != NULL; + lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock)) + ret = __lock_put_internal(lt, lp, ndx, + DB_LOCK_UNLINK | + DB_LOCK_NOPROMOTE | DB_LOCK_DOALL); + + /* + * On the last time around, the object will get + * reclaimed by __lock_put_internal, structure the + * loop carefully so we do not get bitten. + */ + for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock); + ret == 0 && lp != NULL; + lp = next_lock) { + next_lock = SH_TAILQ_NEXT(lp, links, __db_lock); + ret = __lock_put_internal(lt, lp, ndx, + DB_LOCK_UNLINK | + DB_LOCK_NOPROMOTE | DB_LOCK_DOALL); + } + OBJECT_UNLOCK(lt, region, ndx); + break; + + case DB_LOCK_TIMEOUT: + ret = __lock_set_timeout_internal(env, + sh_locker, 0, DB_SET_TXN_NOW); + break; + + case DB_LOCK_TRADE: + /* + * INTERNAL USE ONLY. + * Change the holder of the lock described in + * list[i].lock to the locker-id specified by + * the locker parameter. + */ + /* + * You had better know what you're doing here. + * We are trading locker-id's on a lock to + * facilitate file locking on open DB handles. + * We do not do any conflict checking on this, + * so heaven help you if you use this flag under + * any other circumstances. + */ + ret = __lock_trade(env, &list[i].lock, sh_locker); + break; +#if defined(DEBUG) && defined(HAVE_STATISTICS) + case DB_LOCK_DUMP: + if (sh_locker == NULL) + break; + + SH_LIST_FOREACH( + lp, &sh_locker->heldby, locker_links, __db_lock) + __lock_printlock(lt, NULL, lp, 1); + break; +#endif + default: + __db_errx(env, DB_STR_A("2035", + "Invalid lock operation: %d", "%d"), list[i].op); + ret = EINVAL; + break; + } + + if (ret == 0 && region->detect != DB_LOCK_NORUN && + (region->need_dd || timespecisset(®ion->next_timeout))) + run_dd = 1; + LOCK_SYSTEM_UNLOCK(lt, region); + + if (run_dd) + (void)__lock_detect(env, region->detect, &did_abort); + + if (ret != 0 && elistp != NULL) + *elistp = &list[i - 1]; + + return (ret); +} + +/* + * __lock_get_pp -- + * ENV->lock_get pre/post processing. + * + * PUBLIC: int __lock_get_pp __P((DB_ENV *, + * PUBLIC: u_int32_t, u_int32_t, DBT *, db_lockmode_t, DB_LOCK *)); + */ +int +__lock_get_pp(dbenv, locker, flags, obj, lock_mode, lock) + DB_ENV *dbenv; + u_int32_t locker, flags; + DBT *obj; + db_lockmode_t lock_mode; + DB_LOCK *lock; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lk_handle, "DB_ENV->lock_get", DB_INIT_LOCK); + + /* Validate arguments. */ + if ((ret = __db_fchk(env, "DB_ENV->lock_get", flags, + DB_LOCK_NOWAIT | DB_LOCK_UPGRADE | DB_LOCK_SWITCH)) != 0) + return (ret); + + if ((ret = __dbt_usercopy(env, obj)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, + (__lock_get_api(env, locker, flags, obj, lock_mode, lock)), + 0, ret); + ENV_LEAVE(env, ip); + __dbt_userfree(env, obj, NULL, NULL); + return (ret); +} + +static int +__lock_get_api(env, locker, flags, obj, lock_mode, lock) + ENV *env; + u_int32_t locker, flags; + const DBT *obj; + db_lockmode_t lock_mode; + DB_LOCK *lock; +{ + DB_LOCKER *sh_locker; + DB_LOCKREGION *region; + int ret; + + COMPQUIET(region, NULL); + + region = env->lk_handle->reginfo.primary; + + LOCK_LOCKERS(env, region); + ret = __lock_getlocker_int(env->lk_handle, locker, 0, &sh_locker); + UNLOCK_LOCKERS(env, region); + LOCK_SYSTEM_LOCK(env->lk_handle, region); + if (ret == 0) + ret = __lock_get_internal(env->lk_handle, + sh_locker, flags, obj, lock_mode, 0, lock); + LOCK_SYSTEM_UNLOCK(env->lk_handle, region); + return (ret); +} + +/* + * __lock_get -- + * ENV->lock_get. + * + * PUBLIC: int __lock_get __P((ENV *, + * PUBLIC: DB_LOCKER *, u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *)); + */ +int +__lock_get(env, locker, flags, obj, lock_mode, lock) + ENV *env; + DB_LOCKER *locker; + u_int32_t flags; + const DBT *obj; + db_lockmode_t lock_mode; + DB_LOCK *lock; +{ + DB_LOCKTAB *lt; + int ret; + + lt = env->lk_handle; + + if (IS_RECOVERING(env)) { + LOCK_INIT(*lock); + return (0); + } + + LOCK_SYSTEM_LOCK(lt, (DB_LOCKREGION *)lt->reginfo.primary); + ret = __lock_get_internal(lt, locker, flags, obj, lock_mode, 0, lock); + LOCK_SYSTEM_UNLOCK(lt, (DB_LOCKREGION *)lt->reginfo.primary); + return (ret); +} +/* + * __lock_alloclock -- allocate a lock from another partition. + * We assume we have the partition locked on entry and leave + * it unlocked on success since we will have to retry the lock operation. + * The mutex will still be locked if we are out of space. + */ +static int +__lock_alloclock(lt, part_id) + DB_LOCKTAB *lt; + u_int32_t part_id; +{ +#define FREE_LIST_HEAD free_locks +#define STRUCT_NAME __db_lock +#define CURRENT_COUNT st_locks +#define MAX_COUNT st_maxlocks +#define STEAL_NAME st_locksteals +#define STEAL_EVENT steal + +#ifdef DEBUG + __db_loadme(); +#endif + +#include "lock_alloc.incl" +} + +/* + * __lock_get_internal -- + * All the work for lock_get (and for the GET option of lock_vec) is done + * inside of lock_get_internal. + * + * PUBLIC: int __lock_get_internal __P((DB_LOCKTAB *, DB_LOCKER *, u_int32_t, + * PUBLIC: const DBT *, db_lockmode_t, db_timeout_t, DB_LOCK *)); + */ +int +__lock_get_internal(lt, sh_locker, flags, obj, lock_mode, timeout, lock) + DB_LOCKTAB *lt; + DB_LOCKER *sh_locker; + u_int32_t flags; + const DBT *obj; + db_lockmode_t lock_mode; + db_timeout_t timeout; + DB_LOCK *lock; +{ + struct __db_lock *newl, *lp; + ENV *env; + DB_LOCKOBJ *sh_obj; + DB_LOCKREGION *region; + DB_THREAD_INFO *ip; + u_int32_t ndx, part_id; + int did_abort, ihold, grant_dirty, no_dd, ret, t_ret; + roff_t holder, sh_off; + + /* + * We decide what action to take based on what locks are already held + * and what locks are in the wait queue. + */ + enum { + GRANT, /* Grant the lock. */ + UPGRADE, /* Upgrade the lock. */ + HEAD, /* Wait at head of wait queue. */ + SECOND, /* Wait as the second waiter. */ + TAIL /* Wait at tail of the wait queue. */ + } action; + + env = lt->env; + region = lt->reginfo.primary; + + /* Check if locks have been globally turned off. */ + if (F_ISSET(env->dbenv, DB_ENV_NOLOCKING)) + return (0); + + if (sh_locker == NULL) { + __db_errx(env, DB_STR("2036", "Locker does not exist")); + return (EINVAL); + } + + DB_ASSERT(env, lock_mode == DB_LOCK_WAIT || !LF_ISSET(DB_LOCK_SWITCH)); + + no_dd = ret = 0; + newl = NULL; + sh_obj = NULL; + + /* Check that the lock mode is valid. */ + if (lock_mode >= (db_lockmode_t)region->nmodes) { + __db_errx(env, DB_STR_A("2037", + "DB_ENV->lock_get: invalid lock mode %lu", "%lu"), + (u_long)lock_mode); + return (EINVAL); + } + +again: if (obj == NULL) { + DB_ASSERT(env, LOCK_ISSET(*lock)); + lp = R_ADDR(<->reginfo, lock->off); + DB_ASSERT(env, lock->gen == lp->gen); + sh_obj = SH_OFF_TO_PTR(lp, lp->obj, DB_LOCKOBJ); + ndx = sh_obj->indx; + OBJECT_LOCK_NDX(lt, region, ndx); + } else { + /* Allocate a shared memory new object. */ + OBJECT_LOCK(lt, region, obj, lock->ndx); + ndx = lock->ndx; + if ((ret = __lock_getobj(lt, + obj, lock->ndx, !LF_ISSET(DB_LOCK_CHECK), &sh_obj)) != 0) + goto err; +#ifdef DIAGNOSTIC + if (sh_obj == NULL) { + ret = ENOENT; + goto err; + } + if (LF_ISSET(DB_LOCK_UPGRADE)) { + DB_ASSERT(env, LOCK_ISSET(*lock)); + lp = R_ADDR(<->reginfo, lock->off); + DB_ASSERT(env, lock->gen == lp->gen); + DB_ASSERT(env, + SH_OFF_TO_PTR(lp, lp->obj, DB_LOCKOBJ) == sh_obj); + } +#endif + } + +#ifdef HAVE_STATISTICS + if (LF_ISSET(DB_LOCK_UPGRADE)) + STAT_INC_VERB(env, lock, upgrade, + lt->obj_stat[ndx].st_nupgrade, + (DBT *) obj, sh_locker->id); + else if (!LF_ISSET(DB_LOCK_SWITCH | DB_LOCK_CHECK)) + STAT_INC_VERB(env, lock, request, + lt->obj_stat[ndx].st_nrequests, + (DBT *) obj, sh_locker->id); +#endif + + /* + * Figure out if we can grant this lock or if it should wait. + * By default, we can grant the new lock if it does not conflict with + * anyone on the holders list OR anyone on the waiters list. + * The reason that we don't grant if there's a conflict is that + * this can lead to starvation (a writer waiting on a popularly + * read item will never be granted). The downside of this is that + * a waiting reader can prevent an upgrade from reader to writer, + * which is not uncommon. + * + * There are two exceptions to the no-conflict rule. First, if + * a lock is held by the requesting locker AND the new lock does + * not conflict with any other holders, then we grant the lock. + * The most common place this happens is when the holder has a + * WRITE lock and a READ lock request comes in for the same locker. + * If we do not grant the read lock, then we guarantee deadlock. + * Second, dirty readers are granted if at all possible while + * avoiding starvation, see below. + * + * In case of conflict, we put the new lock on the end of the waiters + * list, unless we are upgrading or this is a dirty reader in which + * case the locker goes at or near the front of the list. + */ + ihold = 0; + grant_dirty = 0; + holder = 0; + + /* + * DB_LOCK_WAIT is is a special case used by the queue + * access method when we want to get an entry which is past + * the end of the queue. With CDB we have a DB_READ_LOCK and + * need to switch it to DB_LOCK_WAIT. Otherwise we insert a + * DB_LOCK_WAIT and and then after releaseing the metadata + * page wait on it and join the waiters queue. This must be + * done as a single operation so that another locker cannot + * get in and fail to wake us up. + */ + if (lock_mode == DB_LOCK_WAIT) + lp = NULL; + else + lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock); + + sh_off = R_OFFSET(<->reginfo, sh_locker); + for (; lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { + if (sh_off == lp->holder) { + if (lp->mode == lock_mode && + lp->status == DB_LSTAT_HELD) { + if (LF_ISSET(DB_LOCK_UPGRADE)) + goto upgrade; + +#ifdef DIAGNOSTIC + if (LF_ISSET(DB_LOCK_CHECK)) + goto done; +#endif + + /* + * Lock is held, so we can increment the + * reference count and return this lock + * to the caller. We do not count reference + * increments towards the locks held by + * the locker. + */ + lp->refcount++; + lock->off = R_OFFSET(<->reginfo, lp); + lock->gen = lp->gen; + lock->mode = lp->mode; + goto done; + } else { + ihold = 1; + } + } else if (__lock_same_family(lt, + R_ADDR(<->reginfo, lp->holder), sh_locker)) + ihold = 1; + else if (CONFLICTS(lt, region, lp->mode, lock_mode)) + break; + else if (lp->mode == DB_LOCK_READ || + lp->mode == DB_LOCK_WWRITE) { + grant_dirty = 1; + holder = lp->holder; + } + } + +#ifdef DIAGNOSTIC + if (LF_ISSET(DB_LOCK_CHECK)) { + ret = ENOENT; + goto err; + } +#endif + + /* + * If there are conflicting holders we will have to wait. If we + * already hold a lock on this object or are doing an upgrade or + * this is a dirty reader it goes to the head of the queue, everyone + * else to the back. + */ + if (lp != NULL) { + if (ihold || LF_ISSET(DB_LOCK_UPGRADE) || + lock_mode == DB_LOCK_READ_UNCOMMITTED) + action = HEAD; + else + action = TAIL; + } else { + if (LF_ISSET(DB_LOCK_UPGRADE)) + action = UPGRADE; + else if (lock_mode == DB_LOCK_WAIT) + action = TAIL; + else if (ihold) + action = GRANT; + else { + /* + * Look for conflicting waiters. + */ + SH_TAILQ_FOREACH(lp, &sh_obj->waiters, links, __db_lock) + if (lp->holder != sh_off && + CONFLICTS(lt, region, lp->mode, lock_mode)) + break; + + /* + * If there are no conflicting holders or waiters, + * then we grant. Normally when we wait, we + * wait at the end (TAIL). However, the goal of + * DIRTY_READ locks to allow forward progress in the + * face of updating transactions, so we try to allow + * all DIRTY_READ requests to proceed as rapidly + * as possible, so long as we can prevent starvation. + * + * When determining how to queue a DIRTY_READ + * request: + * + * 1. If there is a waiting upgrading writer, + * then we enqueue the dirty reader BEHIND it + * (second in the queue). + * 2. Else, if the current holders are either + * READ or WWRITE, we grant + * 3. Else queue SECOND i.e., behind the first + * waiter. + * + * The end result is that dirty_readers get to run + * so long as other lockers are blocked. Once + * there is a locker which is only waiting on + * dirty readers then they queue up behind that + * locker so that it gets to run. In general + * this locker will be a WRITE which will shortly + * get downgraded to a WWRITE, permitting the + * DIRTY locks to be granted. + */ + if (lp == NULL) + action = GRANT; + else if (grant_dirty && + lock_mode == DB_LOCK_READ_UNCOMMITTED) { + /* + * An upgrade will be at the head of the + * queue. + */ + lp = SH_TAILQ_FIRST( + &sh_obj->waiters, __db_lock); + if (lp->mode == DB_LOCK_WRITE && + lp->holder == holder) + action = SECOND; + else + action = GRANT; + } else if (lock_mode == DB_LOCK_READ_UNCOMMITTED) + action = SECOND; + else + action = TAIL; + } + } + + switch (action) { + case HEAD: + case TAIL: + case SECOND: + if (LF_ISSET(DB_LOCK_NOWAIT) && lock_mode != DB_LOCK_WAIT) { + ret = DB_LOCK_NOTGRANTED; + STAT_INC_VERB(env, lock, nowait_notgranted, + region->stat.st_lock_nowait, + (DBT *) obj, sh_locker->id); + goto err; + } + /* FALLTHROUGH */ + case GRANT: + part_id = LOCK_PART(region, ndx); + /* Allocate a new lock. */ + if ((newl = SH_TAILQ_FIRST( + &FREE_LOCKS(lt, part_id), __db_lock)) == NULL) { + if ((ret = __lock_alloclock(lt, part_id)) != 0) + goto err; + /* Allocation dropped the mutex, start over. */ + OBJECT_UNLOCK(lt, region, ndx); + sh_obj = NULL; + goto again; + } + SH_TAILQ_REMOVE( + &FREE_LOCKS(lt, part_id), newl, links, __db_lock); + +#ifdef HAVE_STATISTICS + /* + * Keep track of the maximum number of locks allocated + * in each partition and the maximum number of locks + * used by any one bucket. + */ + if (++lt->obj_stat[ndx].st_nlocks > + lt->obj_stat[ndx].st_maxnlocks) + lt->obj_stat[ndx].st_maxnlocks = + lt->obj_stat[ndx].st_nlocks; + if (++lt->part_array[part_id].part_stat.st_nlocks > + lt->part_array[part_id].part_stat.st_maxnlocks) + lt->part_array[part_id].part_stat.st_maxnlocks = + lt->part_array[part_id].part_stat.st_nlocks; +#endif + + newl->holder = R_OFFSET(<->reginfo, sh_locker); + newl->refcount = 1; + newl->mode = lock_mode; + newl->obj = (roff_t)SH_PTR_TO_OFF(newl, sh_obj); + newl->indx = sh_obj->indx; + newl->mtx_lock = MUTEX_INVALID; + /* + * Now, insert the lock onto its locker's list. + * If the locker does not currently hold any locks, + * there's no reason to run a deadlock + * detector, save that information. + */ + no_dd = sh_locker->master_locker == INVALID_ROFF && + SH_LIST_FIRST( + &sh_locker->child_locker, __db_locker) == NULL && + SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL; + + SH_LIST_INSERT_HEAD( + &sh_locker->heldby, newl, locker_links, __db_lock); + + break; + + case UPGRADE: +upgrade: lp = R_ADDR(<->reginfo, lock->off); + DB_ASSERT(env, lock->gen == lp->gen); + if (IS_WRITELOCK(lock_mode) && !IS_WRITELOCK(lp->mode)) + sh_locker->nwrites++; + lp->mode = lock_mode; + /* If we are upgrading to a WAIT we must wait. */ + if (lock_mode != DB_LOCK_WAIT) + goto done; + if (lp->status != DB_LSTAT_WAITING) { + /* We have already been granted. */ + MUTEX_LOCK(env, lp->mtx_lock); + newl = lp; + if (lp->status == DB_LSTAT_EXPIRED) + goto expired; + DB_ASSERT(env, lp->status == DB_LSTAT_PENDING); + SH_TAILQ_REMOVE( + &sh_obj->holders, newl, links, __db_lock); + newl->links.stqe_prev = -1; + goto done; + } + COMPQUIET(action, UPGRADE); + } + + switch (action) { + case GRANT: + newl->status = DB_LSTAT_HELD; + SH_TAILQ_INSERT_TAIL(&sh_obj->holders, newl, links); + break; + case UPGRADE: + DB_ASSERT(env, lock_mode == DB_LOCK_WAIT); + /* FALLTHROUGH */ + case HEAD: + case TAIL: + case SECOND: + if ((lp = + SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock)) == NULL) { + LOCK_DD(env, region); + SH_TAILQ_INSERT_HEAD(®ion->dd_objs, + sh_obj, dd_links, __db_lockobj); + UNLOCK_DD(env, region); + } + switch (action) { + case HEAD: + SH_TAILQ_INSERT_HEAD( + &sh_obj->waiters, newl, links, __db_lock); + break; + case SECOND: + SH_TAILQ_INSERT_AFTER( + &sh_obj->waiters, lp, newl, links, __db_lock); + break; + case TAIL: + SH_TAILQ_INSERT_TAIL(&sh_obj->waiters, newl, links); + break; + case UPGRADE: + /* The lock is already in the queue. */ + newl = R_ADDR(<->reginfo, lock->off); + break; + default: + DB_ASSERT(env, 0); + } + + /* + * First check to see if this txn has expired. + * If not then see if the lock timeout is past + * the expiration of the txn, if it is, use + * the txn expiration time. lk_expire is passed + * to avoid an extra call to get the time. + */ + if (__clock_expired(env, + &sh_locker->lk_expire, &sh_locker->tx_expire)) { + newl->status = DB_LSTAT_EXPIRED; + sh_locker->lk_expire = sh_locker->tx_expire; + + /* We are done. */ + goto expired; + } + + /* + * If a timeout was specified in this call then it + * takes priority. If a lock timeout has been specified + * for this transaction then use that, otherwise use + * the global timeout value. + */ + if (!LF_ISSET(DB_LOCK_SET_TIMEOUT)) { + if (F_ISSET(sh_locker, DB_LOCKER_TIMEOUT)) + timeout = sh_locker->lk_timeout; + else + timeout = region->lk_timeout; + } + + /* + * For queue we insert the WAIT lock and don't wait on it. + * That way we can unpin the metadata page first and then + * block. + */ + if (lock_mode == DB_LOCK_WAIT && LF_ISSET(DB_LOCK_NOWAIT)) { + newl->mtx_lock = sh_locker->mtx_locker; + newl->status = DB_LSTAT_WAITING; + goto out; + } + + if (timeout != 0) + __clock_set_expires(env, + &sh_locker->lk_expire, timeout); + else + timespecclear(&sh_locker->lk_expire); + + if (timespecisset(&sh_locker->tx_expire) && + (timeout == 0 || __clock_expired(env, + &sh_locker->lk_expire, &sh_locker->tx_expire))) + sh_locker->lk_expire = sh_locker->tx_expire; + if (timespecisset(&sh_locker->lk_expire) && + (!timespecisset(®ion->next_timeout) || + timespeccmp( + ®ion->next_timeout, &sh_locker->lk_expire, >))) + region->next_timeout = sh_locker->lk_expire; + +in_abort: newl->status = DB_LSTAT_WAITING; + newl->mtx_lock = sh_locker->mtx_locker; + STAT(lt->obj_stat[ndx].st_lock_wait++); + /* We are about to block, deadlock detector must run. */ + region->need_dd = 1; + + OBJECT_UNLOCK(lt, region, sh_obj->indx); + + /* If we are switching drop the lock we had. */ + if (LF_ISSET(DB_LOCK_SWITCH) && + (ret = __lock_put_nolock(env, lock, &ihold, 0)) != 0) { + OBJECT_LOCK_NDX(lt, region, sh_obj->indx); + (void)__lock_remove_waiter( + lt, sh_obj, newl, DB_LSTAT_FREE); + goto err; + } + + LOCK_SYSTEM_UNLOCK(lt, region); + + /* + * Before waiting, see if the deadlock detector should run. + */ + if (region->detect != DB_LOCK_NORUN && !no_dd) + (void)__lock_detect(env, region->detect, &did_abort); + + ip = NULL; + if (env->thr_hashtab != NULL && + (ret = __env_set_state(env, &ip, THREAD_BLOCKED)) != 0) { + LOCK_SYSTEM_LOCK(lt, region); + OBJECT_LOCK_NDX(lt, region, ndx); + goto err; + } + + PERFMON2(env, lock, suspend, (DBT *) obj, lock_mode); + MUTEX_LOCK(env, newl->mtx_lock); + PERFMON2(env, lock, resume, (DBT *) obj, lock_mode); + + if (ip != NULL) + ip->dbth_state = THREAD_ACTIVE; + + LOCK_SYSTEM_LOCK(lt, region); + OBJECT_LOCK_NDX(lt, region, ndx); + + /* Turn off lock timeout. */ + if (newl->status != DB_LSTAT_EXPIRED) + timespecclear(&sh_locker->lk_expire); + + switch (newl->status) { + case DB_LSTAT_ABORTED: + /* + * If we raced with the deadlock detector and it + * mistakenly picked this tranaction to abort again + * ignore the abort and request the lock again. + */ + if (F_ISSET(sh_locker, DB_LOCKER_INABORT)) + goto in_abort; + ret = DB_LOCK_DEADLOCK; + goto err; + case DB_LSTAT_EXPIRED: +expired: ret = __lock_put_internal(lt, newl, + ndx, DB_LOCK_UNLINK | DB_LOCK_FREE); + newl = NULL; + if (ret != 0) + goto err; +#ifdef HAVE_STATISTICS + if (timespeccmp( + &sh_locker->lk_expire, &sh_locker->tx_expire, ==)) + STAT_INC(env, lock, txntimeout, + lt->obj_stat[ndx].st_ntxntimeouts, + (DBT *) obj); + else + STAT_INC(env, lock, locktimeout, + lt->obj_stat[ndx].st_nlocktimeouts, + (DBT *) obj); +#endif + ret = DB_LOCK_NOTGRANTED; + timespecclear(&sh_locker->lk_expire); + goto err; + case DB_LSTAT_PENDING: + if (LF_ISSET(DB_LOCK_UPGRADE)) { + /* + * The lock just granted got put on the holders + * list. Since we're upgrading some other lock, + * we've got to remove it here. + */ + SH_TAILQ_REMOVE( + &sh_obj->holders, newl, links, __db_lock); + /* + * Ensure the object is not believed to be on + * the object's lists, if we're traversing by + * locker. + */ + newl->links.stqe_prev = -1; + if (newl->mode == DB_LOCK_WAIT) + goto done; + goto upgrade; + } else + newl->status = DB_LSTAT_HELD; + break; + case DB_LSTAT_FREE: + case DB_LSTAT_HELD: + case DB_LSTAT_WAITING: + default: + __db_errx(env, DB_STR_A("2038", + "Unexpected lock status: %d", "%d"), + (int)newl->status); + ret = __env_panic(env, EINVAL); + goto err; + } + } + +out: lock->off = R_OFFSET(<->reginfo, newl); + lock->gen = newl->gen; + lock->mode = newl->mode; + sh_locker->nlocks++; + if (IS_WRITELOCK(newl->mode)) { + sh_locker->nwrites++; + if (newl->mode == DB_LOCK_WWRITE) + F_SET(sh_locker, DB_LOCKER_DIRTY); + } + + OBJECT_UNLOCK(lt, region, ndx); + return (0); + +err: if (!LF_ISSET(DB_LOCK_UPGRADE | DB_LOCK_SWITCH)) + LOCK_INIT(*lock); + +done: if (newl != NULL && + (t_ret = __lock_freelock(lt, newl, sh_locker, + DB_LOCK_FREE | DB_LOCK_UNLINK)) != 0 && ret == 0) + ret = t_ret; + OBJECT_UNLOCK(lt, region, ndx); + + return (ret); +} + +/* + * __lock_put_pp -- + * ENV->lock_put pre/post processing. + * + * PUBLIC: int __lock_put_pp __P((DB_ENV *, DB_LOCK *)); + */ +int +__lock_put_pp(dbenv, lock) + DB_ENV *dbenv; + DB_LOCK *lock; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lk_handle, "DB_LOCK->lock_put", DB_INIT_LOCK); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__lock_put(env, lock)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __lock_put -- + * + * PUBLIC: int __lock_put __P((ENV *, DB_LOCK *)); + * Internal lock_put interface. + */ +int +__lock_put(env, lock) + ENV *env; + DB_LOCK *lock; +{ + DB_LOCKTAB *lt; + int ret, run_dd; + + if (IS_RECOVERING(env)) + return (0); + + lt = env->lk_handle; + + LOCK_SYSTEM_LOCK(lt, (DB_LOCKREGION *)lt->reginfo.primary); + ret = __lock_put_nolock(env, lock, &run_dd, 0); + LOCK_SYSTEM_UNLOCK(lt, (DB_LOCKREGION *)lt->reginfo.primary); + + /* + * Only run the lock detector if put told us to AND we are running + * in auto-detect mode. If we are not running in auto-detect, then + * a call to lock_detect here will 0 the need_dd bit, but will not + * actually abort anything. + */ + if (ret == 0 && run_dd) + (void)__lock_detect(env, + ((DB_LOCKREGION *)lt->reginfo.primary)->detect, NULL); + return (ret); +} + +static int +__lock_put_nolock(env, lock, runp, flags) + ENV *env; + DB_LOCK *lock; + int *runp; + u_int32_t flags; +{ + struct __db_lock *lockp; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + int ret; + + /* Check if locks have been globally turned off. */ + if (F_ISSET(env->dbenv, DB_ENV_NOLOCKING)) + return (0); + + lt = env->lk_handle; + region = lt->reginfo.primary; + + lockp = R_ADDR(<->reginfo, lock->off); + DB_ASSERT(env, lock->gen == lockp->gen); + if (lock->gen != lockp->gen) { + __db_errx(env, __db_lock_invalid, "DB_LOCK->lock_put"); + LOCK_INIT(*lock); + return (EINVAL); + } + + OBJECT_LOCK_NDX(lt, region, lock->ndx); + ret = __lock_put_internal(lt, + lockp, lock->ndx, flags | DB_LOCK_UNLINK | DB_LOCK_FREE); + OBJECT_UNLOCK(lt, region, lock->ndx); + + LOCK_INIT(*lock); + + *runp = 0; + if (ret == 0 && region->detect != DB_LOCK_NORUN && + (region->need_dd || timespecisset(®ion->next_timeout))) + *runp = 1; + + return (ret); +} + +/* + * __lock_downgrade -- + * + * Used to downgrade locks. Currently this is used in three places: 1) by the + * Concurrent Data Store product to downgrade write locks back to iwrite locks + * and 2) to downgrade write-handle locks to read-handle locks at the end of + * an open/create. 3) To downgrade write locks to was_write to support dirty + * reads. + * + * PUBLIC: int __lock_downgrade __P((ENV *, + * PUBLIC: DB_LOCK *, db_lockmode_t, u_int32_t)); + */ +int +__lock_downgrade(env, lock, new_mode, flags) + ENV *env; + DB_LOCK *lock; + db_lockmode_t new_mode; + u_int32_t flags; +{ + struct __db_lock *lockp; + DB_LOCKER *sh_locker; + DB_LOCKOBJ *obj; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + int ret; + + ret = 0; + + /* Check if locks have been globally turned off. */ + if (F_ISSET(env->dbenv, DB_ENV_NOLOCKING)) + return (0); + + lt = env->lk_handle; + region = lt->reginfo.primary; + + LOCK_SYSTEM_LOCK(lt, region); + + lockp = R_ADDR(<->reginfo, lock->off); + if (lock->gen != lockp->gen) { + __db_errx(env, __db_lock_invalid, "lock_downgrade"); + ret = EINVAL; + goto out; + } + + sh_locker = R_ADDR(<->reginfo, lockp->holder); + + if (IS_WRITELOCK(lockp->mode) && !IS_WRITELOCK(new_mode)) + sh_locker->nwrites--; + + lockp->mode = new_mode; + lock->mode = new_mode; + + /* Get the object associated with this lock. */ + obj = SH_OFF_TO_PTR(lockp, lockp->obj, DB_LOCKOBJ); + OBJECT_LOCK_NDX(lt, region, obj->indx); + STAT(lt->obj_stat[obj->indx].st_ndowngrade++); + ret = __lock_promote(lt, obj, NULL, flags); + OBJECT_UNLOCK(lt, region, obj->indx); + +out: LOCK_SYSTEM_UNLOCK(lt, region); + return (ret); +} + +/* + * __lock_put_internal -- put a lock structure + * We assume that we are called with the proper object locked. + */ +static int +__lock_put_internal(lt, lockp, obj_ndx, flags) + DB_LOCKTAB *lt; + struct __db_lock *lockp; + u_int32_t obj_ndx, flags; +{ + DB_LOCKOBJ *sh_obj; + DB_LOCKREGION *region; + ENV *env; + u_int32_t part_id; + int ret, state_changed; + + COMPQUIET(env, NULL); + env = lt->env; + region = lt->reginfo.primary; + ret = state_changed = 0; + + if (!OBJ_LINKS_VALID(lockp)) { + /* + * Someone removed this lock while we were doing a release + * by locker id. We are trying to free this lock, but it's + * already been done; all we need to do is return it to the + * free list. + */ + (void)__lock_freelock(lt, lockp, NULL, DB_LOCK_FREE); + return (0); + } + +#ifdef HAVE_STATISTICS + if (LF_ISSET(DB_LOCK_DOALL)) + lt->obj_stat[obj_ndx].st_nreleases += lockp->refcount; + else + lt->obj_stat[obj_ndx].st_nreleases++; +#endif + + if (!LF_ISSET(DB_LOCK_DOALL) && lockp->refcount > 1) { + lockp->refcount--; + PERFMON2(env, lock, put_reduce_count, + &(SH_OFF_TO_PTR(lockp, lockp->obj, DB_LOCKOBJ))->lockobj, + flags); + return (0); + } + + /* Increment generation number. */ + lockp->gen++; + + /* Get the object associated with this lock. */ + sh_obj = SH_OFF_TO_PTR(lockp, lockp->obj, DB_LOCKOBJ); + + PERFMON2(env, lock, put, &sh_obj->lockobj, flags); + /* + * Remove this lock from its holders/waitlist. Set its status + * to ABORTED. It may get freed below, but if not then the + * waiter has been aborted (it will panic if the lock is + * free). + */ + if (lockp->status != DB_LSTAT_HELD && + lockp->status != DB_LSTAT_PENDING) { + DB_ASSERT(env, lockp != + SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)); + if ((ret = __lock_remove_waiter( + lt, sh_obj, lockp, DB_LSTAT_ABORTED)) != 0) + return (ret); + } else { + DB_ASSERT(env, lockp != + SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock)); + SH_TAILQ_REMOVE(&sh_obj->holders, lockp, links, __db_lock); + lockp->links.stqe_prev = -1; + } + + if (LF_ISSET(DB_LOCK_NOPROMOTE)) + state_changed = 0; + else if ((ret = __lock_promote(lt, + sh_obj, &state_changed, flags)) != 0) + return (ret); + + /* Check if object should be reclaimed. */ + if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL && + SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) { + part_id = LOCK_PART(region, obj_ndx); + SH_TAILQ_REMOVE( + <->obj_tab[obj_ndx], sh_obj, links, __db_lockobj); + if (sh_obj->lockobj.size > sizeof(sh_obj->objdata)) { + if (region->part_t_size != 1) + LOCK_REGION_LOCK(env); + __env_alloc_free(<->reginfo, + SH_DBT_PTR(&sh_obj->lockobj)); + if (region->part_t_size != 1) + LOCK_REGION_UNLOCK(env); + } + SH_TAILQ_INSERT_HEAD( + &FREE_OBJS(lt, part_id), sh_obj, links, __db_lockobj); + sh_obj->generation++; + STAT(lt->part_array[part_id].part_stat.st_nobjects--); + STAT(lt->obj_stat[obj_ndx].st_nobjects--); + state_changed = 1; + } + + /* Free lock. */ + if (LF_ISSET(DB_LOCK_UNLINK | DB_LOCK_FREE)) + ret = __lock_freelock(lt, lockp, + R_ADDR(<->reginfo, lockp->holder), flags); + + /* + * If we did not promote anyone; we need to run the deadlock + * detector again. + */ + if (state_changed == 0) + region->need_dd = 1; + + return (ret); +} + +/* + * __lock_freelock -- + * Free a lock. Unlink it from its locker if necessary. + * We must hold the object lock. + * + */ +static int +__lock_freelock(lt, lockp, sh_locker, flags) + DB_LOCKTAB *lt; + struct __db_lock *lockp; + DB_LOCKER *sh_locker; + u_int32_t flags; +{ + DB_LOCKREGION *region; + ENV *env; + u_int32_t part_id; + int ret; + + env = lt->env; + region = lt->reginfo.primary; + + if (LF_ISSET(DB_LOCK_UNLINK)) { + SH_LIST_REMOVE(lockp, locker_links, __db_lock); + if (lockp->status == DB_LSTAT_HELD) { + sh_locker->nlocks--; + if (IS_WRITELOCK(lockp->mode)) + sh_locker->nwrites--; + } + } + + if (LF_ISSET(DB_LOCK_FREE)) { + /* + * If the lock is not held we cannot be sure of its mutex + * state so we refresh it. + */ + part_id = LOCK_PART(region, lockp->indx); + if (lockp->mtx_lock != MUTEX_INVALID && + lockp->status != DB_LSTAT_HELD && + lockp->status != DB_LSTAT_EXPIRED) { + if ((ret = __mutex_refresh(env, lockp->mtx_lock)) != 0) + return (ret); + MUTEX_LOCK(env, lockp->mtx_lock); + } + + lockp->status = DB_LSTAT_FREE; + SH_TAILQ_INSERT_HEAD(&FREE_LOCKS(lt, part_id), + lockp, links, __db_lock); + STAT(lt->part_array[part_id].part_stat.st_nlocks--); + STAT(lt->obj_stat[lockp->indx].st_nlocks--); + } + + return (0); +} + +#undef FREE_LIST_HEAD +#undef STRUCT_NAME +#undef CURRENT_COUNT +#undef MAX_COUNT +#undef STEAL_NAME +#undef STEAL_EVENT +/* + * __lock_allocobj -- allocate a object from another partition. + * We assume we have the partition locked on entry and leave + * with the same partition locked on exit. + */ +static int +__lock_allocobj(lt, part_id) + DB_LOCKTAB *lt; + u_int32_t part_id; +{ +#define FREE_LIST_HEAD free_objs +#define STRUCT_NAME __db_lockobj +#define CURRENT_COUNT st_objects +#define MAX_COUNT st_maxobjects +#define STEAL_NAME st_objectsteals +#define STEAL_EVENT object_steal + +#ifdef DEBUG + __db_loadme(); +#endif + +#include "lock_alloc.incl" + +} + +/* + * __lock_getobj -- + * Get an object in the object hash table. The create parameter + * indicates if the object should be created if it doesn't exist in + * the table. + * + * This must be called with the object bucket locked. + */ +static int +__lock_getobj(lt, obj, ndx, create, retp) + DB_LOCKTAB *lt; + const DBT *obj; + u_int32_t ndx; + int create; + DB_LOCKOBJ **retp; +{ + DB_LOCKOBJ *sh_obj; + DB_LOCKREGION *region; + ENV *env; + int ret; + void *p; + u_int32_t len, part_id; + + env = lt->env; + region = lt->reginfo.primary; + len = 0; + + /* Look up the object in the hash table. */ +retry: SH_TAILQ_FOREACH(sh_obj, <->obj_tab[ndx], links, __db_lockobj) { + len++; + if (obj->size == sh_obj->lockobj.size && + memcmp(obj->data, + SH_DBT_PTR(&sh_obj->lockobj), obj->size) == 0) + break; + } + + /* + * If we found the object, then we can just return it. If + * we didn't find the object, then we need to create it. + */ + if (sh_obj == NULL && create) { + /* Create new object and then insert it into hash table. */ + part_id = LOCK_PART(region, ndx); + if ((sh_obj = SH_TAILQ_FIRST(&FREE_OBJS( + lt, part_id), __db_lockobj)) == NULL) { + if ((ret = __lock_allocobj(lt, part_id)) == 0) + goto retry; + goto err; + } + + /* + * If we can fit this object in the structure, do so instead + * of alloc-ing space for it. + */ + if (obj->size <= sizeof(sh_obj->objdata)) + p = sh_obj->objdata; + else { + /* + * If we have only one partition, the region is locked. + */ + if (region->part_t_size != 1) + LOCK_REGION_LOCK(env); + ret = __env_alloc(<->reginfo, obj->size, &p); + if (region->part_t_size != 1) + LOCK_REGION_UNLOCK(env); + if (ret != 0) { + __db_errx(env, + "No space for lock object storage"); + goto err; + } + } + + memcpy(p, obj->data, obj->size); + + SH_TAILQ_REMOVE(&FREE_OBJS( + lt, part_id), sh_obj, links, __db_lockobj); +#ifdef HAVE_STATISTICS + /* + * Keep track of both the max number of objects allocated + * per partition and the max number of objects used by + * this bucket. + */ + len++; + if (++lt->obj_stat[ndx].st_nobjects > + lt->obj_stat[ndx].st_maxnobjects) + lt->obj_stat[ndx].st_maxnobjects = + lt->obj_stat[ndx].st_nobjects; + if (++lt->part_array[part_id].part_stat.st_nobjects > + lt->part_array[part_id].part_stat.st_maxnobjects) + lt->part_array[part_id].part_stat.st_maxnobjects = + lt->part_array[part_id].part_stat.st_nobjects; +#endif + + sh_obj->indx = ndx; + SH_TAILQ_INIT(&sh_obj->waiters); + SH_TAILQ_INIT(&sh_obj->holders); + sh_obj->lockobj.size = obj->size; + sh_obj->lockobj.off = + (roff_t)SH_PTR_TO_OFF(&sh_obj->lockobj, p); + SH_TAILQ_INSERT_HEAD( + <->obj_tab[ndx], sh_obj, links, __db_lockobj); + } + +#ifdef HAVE_STATISTICS + if (len > lt->obj_stat[ndx].st_hash_len) + lt->obj_stat[ndx].st_hash_len = len; +#endif + + *retp = sh_obj; + return (0); + +err: return (ret); +} + +/* + * __lock_same_family -- + * Looks for compatible lockers. There are two modes: + * 1) If the lockers 2 belongs to a family transaction, then the locks are + * compatible if the lockers share the same last ancestor. + * 2) Otherwise the lockers are compatible if locker 1 is a parent of + * locker 2. + * Return 1 if the lockers are compatible. + * + * This is used to determine if we should grant locks that appear to conflict, + * but don't because the lock is already held by a compatible locker. + */ +static int +__lock_same_family(lt, sh_locker1, sh_locker2) + DB_LOCKTAB *lt; + DB_LOCKER *sh_locker1; + DB_LOCKER *sh_locker2; +{ + while (sh_locker2->parent_locker != INVALID_ROFF) { + sh_locker2 = R_ADDR(<->reginfo, sh_locker2->parent_locker); + if (sh_locker2 == sh_locker1) + return (1); + } + + if (!F_ISSET(sh_locker2, DB_LOCKER_FAMILY_LOCKER)) + return (0); + + /* + * If checking for a family locker situation, compare the last ancestor + * of each locker. + */ + while (sh_locker1->parent_locker != INVALID_ROFF) + sh_locker1 = + R_ADDR(<->reginfo, sh_locker1->parent_locker); + + return (sh_locker1 == sh_locker2); +} + +/* + * __lock_locker_same_family -- + * Determine if "locker" is an ancestor of "child". + * *retp == 1 if so, 0 otherwise. + * + * PUBLIC: int __lock_locker_same_family + * PUBLIC: __P((ENV *, DB_LOCKER *, DB_LOCKER *, int *)); + */ +int +__lock_locker_same_family(env, locker1, locker2, retp) + ENV *env; + DB_LOCKER *locker1; + DB_LOCKER *locker2; + int *retp; +{ + DB_LOCKTAB *lt; + + lt = env->lk_handle; + + /* + * The locker may not exist for this transaction, if not then it has + * no parents. + */ + if (locker1 == NULL) + *retp = 0; + else + *retp = __lock_same_family(lt, locker1, locker2); + return (0); +} + +/* + * __lock_inherit_locks -- + * Called on child commit to merge child's locks with parent's. + */ +static int +__lock_inherit_locks(lt, sh_locker, flags) + DB_LOCKTAB *lt; + DB_LOCKER *sh_locker; + u_int32_t flags; +{ + DB_LOCKER *sh_parent; + DB_LOCKOBJ *obj; + DB_LOCKREGION *region; + ENV *env; + int ret; + struct __db_lock *hlp, *lp; + roff_t poff; + + env = lt->env; + region = lt->reginfo.primary; + + /* + * Get the committing locker and mark it as deleted. + * This allows us to traverse the locker links without + * worrying that someone else is deleting locks out + * from under us. However, if the locker doesn't + * exist, that just means that the child holds no + * locks, so inheritance is easy! + */ + if (sh_locker == NULL) { + __db_errx(env, __db_locker_invalid); + return (EINVAL); + } + + /* Make sure we are a child transaction. */ + if (sh_locker->parent_locker == INVALID_ROFF) { + __db_errx(env, DB_STR("2039", "Not a child transaction")); + return (EINVAL); + } + sh_parent = R_ADDR(<->reginfo, sh_locker->parent_locker); + + /* + * In order to make it possible for a parent to have + * many, many children who lock the same objects, and + * not require an inordinate number of locks, we try + * to merge the child's locks with its parent's. + */ + poff = R_OFFSET(<->reginfo, sh_parent); + for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); + lp != NULL; + lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) { + SH_LIST_REMOVE(lp, locker_links, __db_lock); + + /* See if the parent already has a lock. */ + obj = SH_OFF_TO_PTR(lp, lp->obj, DB_LOCKOBJ); + OBJECT_LOCK_NDX(lt, region, obj->indx); + SH_TAILQ_FOREACH(hlp, &obj->holders, links, __db_lock) + if (hlp->holder == poff && lp->mode == hlp->mode) + break; + + if (hlp != NULL) { + /* Parent already holds lock. */ + hlp->refcount += lp->refcount; + + /* Remove lock from object list and free it. */ + DB_ASSERT(env, lp->status == DB_LSTAT_HELD); + SH_TAILQ_REMOVE(&obj->holders, lp, links, __db_lock); + (void)__lock_freelock(lt, lp, sh_locker, DB_LOCK_FREE); + } else { + /* Just move lock to parent chains. */ + SH_LIST_INSERT_HEAD(&sh_parent->heldby, + lp, locker_links, __db_lock); + lp->holder = poff; + sh_parent->nlocks++; + if (IS_WRITELOCK(lp->mode)) + sh_parent->nwrites++; + } + + /* + * We may need to promote regardless of whether we simply + * moved the lock to the parent or changed the parent's + * reference count, because there might be a sibling waiting, + * who will now be allowed to make forward progress. + */ + ret = __lock_promote(lt, obj, NULL, flags); + OBJECT_UNLOCK(lt, region, obj->indx); + if (ret != 0) + return (ret); + } + + return (0); +} +/* + * __lock_wakeup -- + * + * Wakeup any waiters on a lock objects. + * + * PUBLIC: int __lock_wakeup __P((ENV *, const DBT *)); + */ +int +__lock_wakeup(env, obj) + ENV *env; + const DBT *obj; +{ + DB_LOCKOBJ *sh_obj; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + u_int32_t ndx; + int ret; + + /* Check if locks have been globally turned off. */ + if (F_ISSET(env->dbenv, DB_ENV_NOLOCKING)) + return (0); + + lt = env->lk_handle; + region = lt->reginfo.primary; + + OBJECT_LOCK(lt, region, obj, ndx); + if ((ret = __lock_getobj(lt, obj, ndx, 0, &sh_obj)) == 0 && + sh_obj != NULL) + ret = __lock_promote(lt, sh_obj, NULL, DB_LOCK_ONEWAITER); + + OBJECT_UNLOCK(lt, region, ndx); + return (ret); +} + +/* + * __lock_promote -- + * + * Look through the waiters and holders lists and decide which (if any) + * locks can be promoted. Promote any that are eligible. + * + * PUBLIC: int __lock_promote + * PUBLIC: __P((DB_LOCKTAB *, DB_LOCKOBJ *, int *, u_int32_t)); + */ +int +__lock_promote(lt, obj, state_changedp, flags) + DB_LOCKTAB *lt; + DB_LOCKOBJ *obj; + int *state_changedp; + u_int32_t flags; +{ + struct __db_lock *lp_w, *lp_h, *next_waiter; + DB_LOCKREGION *region; + int had_waiters, state_changed; + + region = lt->reginfo.primary; + had_waiters = 0; + + /* + * We need to do lock promotion. We also need to determine if we're + * going to need to run the deadlock detector again. If we release + * locks, and there are waiters, but no one gets promoted, then we + * haven't fundamentally changed the lockmgr state, so we may still + * have a deadlock and we have to run again. However, if there were + * no waiters, or we actually promoted someone, then we are OK and we + * don't have to run it immediately. + * + * During promotion, we look for state changes so we can return this + * information to the caller. + */ + + for (lp_w = SH_TAILQ_FIRST(&obj->waiters, __db_lock), + state_changed = lp_w == NULL; + lp_w != NULL; + lp_w = next_waiter) { + had_waiters = 1; + next_waiter = SH_TAILQ_NEXT(lp_w, links, __db_lock); + + /* Waiter may have aborted or expired. */ + if (lp_w->status != DB_LSTAT_WAITING) + continue; + + SH_TAILQ_FOREACH(lp_h, &obj->holders, links, __db_lock) { + if (lp_h->holder != lp_w->holder && + CONFLICTS(lt, region, lp_h->mode, lp_w->mode)) { + if (!__lock_same_family(lt, + R_ADDR(<->reginfo, lp_h->holder), + R_ADDR(<->reginfo, lp_w->holder))) + break; + } + } + if (lp_h != NULL) /* Found a conflict. */ + break; + + /* No conflict, promote the waiting lock. */ + SH_TAILQ_REMOVE(&obj->waiters, lp_w, links, __db_lock); + lp_w->status = DB_LSTAT_PENDING; + SH_TAILQ_INSERT_TAIL(&obj->holders, lp_w, links); + + /* Wake up waiter. */ + MUTEX_UNLOCK(lt->env, lp_w->mtx_lock); + state_changed = 1; + if (LF_ISSET(DB_LOCK_ONEWAITER)) + break; + } + + /* + * If this object had waiters and doesn't any more, then we need + * to remove it from the dd_obj list. + */ + if (had_waiters && SH_TAILQ_FIRST(&obj->waiters, __db_lock) == NULL) { + LOCK_DD(lt->env, region); + /* + * Bump the generation when removing an object from the + * queue so that the deadlock detector will retry. + */ + obj->generation++; + SH_TAILQ_REMOVE(®ion->dd_objs, obj, dd_links, __db_lockobj); + UNLOCK_DD(lt->env, region); + } + + if (state_changedp != NULL) + *state_changedp = state_changed; + + return (0); +} + +/* + * __lock_remove_waiter -- + * Any lock on the waitlist has a process waiting for it. Therefore, + * we can't return the lock to the freelist immediately. Instead, we can + * remove the lock from the list of waiters, set the status field of the + * lock, and then let the process waking up return the lock to the + * free list. + * + * This must be called with the Object bucket locked. + */ +static int +__lock_remove_waiter(lt, sh_obj, lockp, status) + DB_LOCKTAB *lt; + DB_LOCKOBJ *sh_obj; + struct __db_lock *lockp; + db_status_t status; +{ + DB_LOCKREGION *region; + int do_wakeup; + + region = lt->reginfo.primary; + + do_wakeup = lockp->status == DB_LSTAT_WAITING; + + SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock); + lockp->links.stqe_prev = -1; + lockp->status = status; + if (SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) { + LOCK_DD(lt->env, region); + sh_obj->generation++; + SH_TAILQ_REMOVE( + ®ion->dd_objs, + sh_obj, dd_links, __db_lockobj); + UNLOCK_DD(lt->env, region); + } + + /* + * Wake whoever is waiting on this lock. + */ + if (do_wakeup) + MUTEX_UNLOCK(lt->env, lockp->mtx_lock); + + return (0); +} + +/* + * __lock_trade -- + * + * Trade locker ids on a lock. This is used to reassign file locks from + * a transactional locker id to a long-lived locker id. This should be + * called with the region mutex held. + */ +static int +__lock_trade(env, lock, new_locker) + ENV *env; + DB_LOCK *lock; + DB_LOCKER *new_locker; +{ + struct __db_lock *lp; + DB_LOCKTAB *lt; + int ret; + + lt = env->lk_handle; + lp = R_ADDR(<->reginfo, lock->off); + + /* If the lock is already released, simply return. */ + if (lp->gen != lock->gen) + return (DB_NOTFOUND); + + if (new_locker == NULL) { + __db_errx(env, DB_STR("2040", "Locker does not exist")); + return (EINVAL); + } + + /* Remove the lock from its current locker. */ + if ((ret = __lock_freelock(lt, + lp, R_ADDR(<->reginfo, lp->holder), DB_LOCK_UNLINK)) != 0) + return (ret); + + /* Add lock to its new locker. */ + SH_LIST_INSERT_HEAD(&new_locker->heldby, lp, locker_links, __db_lock); + new_locker->nlocks++; + if (IS_WRITELOCK(lp->mode)) + new_locker->nwrites++; + lp->holder = R_OFFSET(<->reginfo, new_locker); + + return (0); +} + +/* + * __lock_change -- + * + * PUBLIC: int __lock_change __P((ENV *, DB_LOCK *, DB_LOCK *)); + * + * Change a lock to a different object. This is used when we move a + * metadata page to change the handle lock. We know that the new lock + * has replaced the old lock so we just delete that lock. + */ +int +__lock_change(env, old_lock, new_lock) + ENV *env; + DB_LOCK *old_lock, *new_lock; +{ + struct __db_lock *lp, *old_lp; + DB_LOCKOBJ *old_obj, *new_obj; + DB_LOCKTAB *lt; + DB_LOCKREGION *region; + u_int32_t old_part, new_part; + int ret; + + lt = env->lk_handle; + region = lt->reginfo.primary; + + old_lp = R_ADDR(<->reginfo, old_lock->off); + DB_ASSERT(env, old_lp->gen == old_lock->gen); + old_obj = SH_OFF_TO_PTR(old_lp, old_lp->obj, DB_LOCKOBJ); + + lp = R_ADDR(<->reginfo, new_lock->off); + DB_ASSERT(env, lp->gen == new_lock->gen); + new_obj = SH_OFF_TO_PTR(lp, lp->obj, DB_LOCKOBJ); + + /* Don't deadlock on partition mutexes, order the latches. */ + LOCK_SYSTEM_LOCK(lt, region); + old_part = LOCK_PART(region, old_obj->indx); + new_part = LOCK_PART(region, new_obj->indx); + + if (old_part == new_part) + MUTEX_LOCK_PARTITION(lt, region, old_part); + else if (new_obj->indx < old_obj->indx) { + MUTEX_LOCK_PARTITION(lt, region, new_part); + MUTEX_LOCK_PARTITION(lt, region, old_part); + } else { + MUTEX_LOCK_PARTITION(lt, region, old_part); + MUTEX_LOCK_PARTITION(lt, region, new_part); + } + + for (lp = SH_TAILQ_FIRST(&old_obj->waiters, __db_lock); + lp != NULL; + lp = SH_TAILQ_FIRST(&old_obj->waiters, __db_lock)) { + SH_TAILQ_REMOVE(&old_obj->waiters, lp, links, __db_lock); + SH_TAILQ_INSERT_TAIL(&new_obj->waiters, lp, links); + lp->indx = new_obj->indx; + lp->obj = (roff_t)SH_PTR_TO_OFF(lp, new_obj); + } + + for (lp = SH_TAILQ_FIRST(&old_obj->holders, __db_lock); + lp != NULL; + lp = SH_TAILQ_FIRST(&old_obj->holders, __db_lock)) { + SH_TAILQ_REMOVE(&old_obj->holders, lp, links, __db_lock); + if (lp == old_lp) + continue; + SH_TAILQ_INSERT_TAIL(&new_obj->holders, lp, links); + lp->indx = new_obj->indx; + lp->obj = (roff_t)SH_PTR_TO_OFF(lp, new_obj); + } + + /* Put the lock back in and call put so the object goes away too. */ + SH_TAILQ_INSERT_TAIL(&old_obj->holders, old_lp, links); + ret = __lock_put_internal(lt, old_lp, old_obj->indx, + DB_LOCK_UNLINK | DB_LOCK_FREE | DB_LOCK_NOPROMOTE); + + MUTEX_UNLOCK_PARTITION(lt, region, new_part); + if (new_part != old_part) + MUTEX_UNLOCK_PARTITION(lt, region, old_part); + LOCK_SYSTEM_UNLOCK(lt, region); + + return (ret); +} diff --git a/src/lock/lock_alloc.incl b/src/lock/lock_alloc.incl new file mode 100644 index 00000000..e14d43b0 --- /dev/null +++ b/src/lock/lock_alloc.incl @@ -0,0 +1,138 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * This is a template for allocation in the lock region. The following + * macros must be defined: + * + * FREE_LIST_HEAD -- the name of the head of the free list. + * STRUCT_NAME -- the name of the structure in the free list. + * CURRENT_COUNT -- structure element for count of current objects. + * MAX_COUNT -- structure element for max of current objects. + * STEAL_NAME -- name of stat to track steals. + * STEAL_EVENT -- name of event to track steals. + */ +#define __lock_alloc() /* for ctags */ +{ + struct STRUCT_NAME *sh_thing; + DB_LOCKPART *end_p, *cur_p, *orig_p; + DB_LOCKREGION *region; + int begin, locked; + u_int32_t i, nobjs; + + region = lt->reginfo.primary; + + orig_p = <->part_array[part_id]; + if (region->part_t_size == 1) + goto alloc; +retry: MUTEX_UNLOCK(lt->env, orig_p->mtx_part); + locked = 0; + sh_thing = NULL; + end_p = <->part_array[region->part_t_size]; + /* + * Start looking at the next partition and wrap around. If + * we get back to our partition then raise an error. + */ + begin = 0; + nobjs = 0; + cur_p = orig_p + 1; +again: for (; sh_thing == NULL && cur_p < end_p; cur_p++) { + MUTEX_LOCK(lt->env, cur_p->mtx_part); + if ((sh_thing = SH_TAILQ_FIRST( + &cur_p->FREE_LIST_HEAD, STRUCT_NAME)) != NULL) + SH_TAILQ_REMOVE(&cur_p->FREE_LIST_HEAD, + sh_thing, links, STRUCT_NAME); + MUTEX_UNLOCK(lt->env, cur_p->mtx_part); + } + if (sh_thing != NULL) { + MUTEX_LOCK(lt->env, orig_p->mtx_part); + SH_TAILQ_INSERT_HEAD(&orig_p->FREE_LIST_HEAD, + sh_thing, links, STRUCT_NAME); + STAT_INC_VERB(env, + lock, STEAL_EVENT, orig_p->part_stat.STEAL_NAME, + cur_p - lt->part_array, part_id); + return (0); + } + if (!begin) { + begin = 1; + cur_p = lt->part_array; + end_p = orig_p; + goto again; + } + /* + * Try to get some more space in the region. + */ + LOCK_REGION_LOCK(lt->env); + MUTEX_LOCK(lt->env, orig_p->mtx_part); + locked = 1; + nobjs = 0; + /* check to see if we raced with someone. */ + if ((region->stat.MAX_COUNT == 0 || + region->stat.CURRENT_COUNT < region->stat.MAX_COUNT) && + SH_TAILQ_FIRST(&orig_p->FREE_LIST_HEAD, STRUCT_NAME) == NULL) { + MUTEX_UNLOCK(lt->env, orig_p->mtx_part); +alloc: locked = 0; + sh_thing = NULL; + cur_p = orig_p; + end_p = <->part_array[region->part_t_size]; + nobjs = region->stat.CURRENT_COUNT >> 2; + /* Just in case. */ + if (nobjs == 0) + nobjs = 1; + if (region->stat.MAX_COUNT != 0 && + region->stat.MAX_COUNT < + region->stat.CURRENT_COUNT + nobjs) + nobjs = region->stat.MAX_COUNT - + region->stat.CURRENT_COUNT; + /* + * If the max memory is not sized for max objects, + * allocate as much as possible. + */ + F_SET(<->reginfo, REGION_TRACKED); + while (__env_alloc(<->reginfo, + nobjs * sizeof(struct STRUCT_NAME), &sh_thing) != 0) + if ((nobjs >>= 1) == 0) + break; + F_CLR(<->reginfo, REGION_TRACKED); + region->stat.CURRENT_COUNT += nobjs; + if (region->part_t_size != 1) + LOCK_REGION_UNLOCK(lt->env); + + if (nobjs == 0) + goto err; + + for (i = 0; i < nobjs; i++) { + memset(sh_thing, 0, sizeof (struct STRUCT_NAME)); + if (&cur_p->free_locks == + (struct __flock *)&cur_p->FREE_LIST_HEAD) + ((struct __db_lock *) + sh_thing)->status = DB_LSTAT_FREE; + MUTEX_LOCK(lt->env, cur_p->mtx_part); + SH_TAILQ_INSERT_HEAD(&cur_p->FREE_LIST_HEAD, + sh_thing, links, STRUCT_NAME); + MUTEX_UNLOCK(lt->env, cur_p->mtx_part); + if (region->part_t_size != 1 && ++cur_p == end_p) + cur_p = lt->part_array; + sh_thing++; + } + if (region->part_t_size != 1) + MUTEX_LOCK(lt->env, orig_p->mtx_part); + locked = 1; + } else + LOCK_REGION_UNLOCK(lt->env); + + if (SH_TAILQ_FIRST(&orig_p->FREE_LIST_HEAD, STRUCT_NAME) != NULL) + return (0); + /* Somone stole all the locks! */ + if (nobjs > 0) + goto retry; + +err: if (region->part_t_size != 1 && locked == 0) + MUTEX_LOCK(lt->env, orig_p->mtx_part); + return (__lock_nomem(lt->env, "lock entries")); +} diff --git a/src/lock/lock_deadlock.c b/src/lock/lock_deadlock.c new file mode 100644 index 00000000..4d5c3c8d --- /dev/null +++ b/src/lock/lock_deadlock.c @@ -0,0 +1,1063 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" + +#define ISSET_MAP(M, N) ((M)[(N) / 32] & (1 << ((N) % 32))) + +#define CLEAR_MAP(M, N) { \ + u_int32_t __i; \ + for (__i = 0; __i < (N); __i++) \ + (M)[__i] = 0; \ +} + +#define SET_MAP(M, B) ((M)[(B) / 32] |= (1 << ((B) % 32))) +#define CLR_MAP(M, B) ((M)[(B) / 32] &= ~((u_int)1 << ((B) % 32))) + +#define OR_MAP(D, S, N) { \ + u_int32_t __i; \ + for (__i = 0; __i < (N); __i++) \ + D[__i] |= S[__i]; \ +} +#define BAD_KILLID 0xffffffff + +typedef struct { + int valid; + int self_wait; + int in_abort; + u_int32_t count; + u_int32_t id; + roff_t last_lock; + roff_t last_obj; + u_int32_t last_ndx; + u_int32_t last_locker_id; + db_pgno_t pgno; + u_int32_t priority; +} locker_info; + +static int __dd_abort __P((ENV *, locker_info *, int *)); +static int __dd_build __P((ENV *, u_int32_t, u_int32_t **, + u_int32_t *, u_int32_t *, locker_info **, int*, int*)); +static int __dd_find __P((ENV *, + u_int32_t *, locker_info *, u_int32_t, u_int32_t, u_int32_t ***)); +static int __dd_isolder __P((u_int32_t, u_int32_t, u_int32_t, u_int32_t)); +static int __dd_verify __P((locker_info *, u_int32_t *, u_int32_t *, + u_int32_t *, u_int32_t, u_int32_t, u_int32_t)); + +#ifdef DIAGNOSTIC +static void __dd_debug + __P((ENV *, locker_info *, u_int32_t *, u_int32_t, u_int32_t)); +#endif + +/* + * __lock_detect_pp -- + * ENV->lock_detect pre/post processing. + * + * PUBLIC: int __lock_detect_pp __P((DB_ENV *, u_int32_t, u_int32_t, int *)); + */ +int +__lock_detect_pp(dbenv, flags, atype, rejectp) + DB_ENV *dbenv; + u_int32_t flags, atype; + int *rejectp; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lk_handle, "DB_ENV->lock_detect", DB_INIT_LOCK); + + /* Validate arguments. */ + if ((ret = __db_fchk(env, "DB_ENV->lock_detect", flags, 0)) != 0) + return (ret); + switch (atype) { + case DB_LOCK_DEFAULT: + case DB_LOCK_EXPIRE: + case DB_LOCK_MAXLOCKS: + case DB_LOCK_MAXWRITE: + case DB_LOCK_MINLOCKS: + case DB_LOCK_MINWRITE: + case DB_LOCK_OLDEST: + case DB_LOCK_RANDOM: + case DB_LOCK_YOUNGEST: + break; + default: + __db_errx(env, DB_STR("2048", + "DB_ENV->lock_detect: unknown deadlock detection mode specified")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__lock_detect(env, atype, rejectp)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __lock_detect -- + * ENV->lock_detect. + * + * PUBLIC: int __lock_detect __P((ENV *, u_int32_t, int *)); + */ +int +__lock_detect(env, atype, rejectp) + ENV *env; + u_int32_t atype; + int *rejectp; +{ + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + db_timespec now; + locker_info *idmap; + u_int32_t *bitmap, *copymap, **deadp, **deadlist, *tmpmap; + u_int32_t i, cid, keeper, killid, limit, nalloc, nlockers; + u_int32_t lock_max, txn_max; + int pri_set, ret, status; + + /* + * If this environment is a replication client, then we must use the + * MINWRITE detection discipline. + */ + if (IS_REP_CLIENT(env)) + atype = DB_LOCK_MINWRITE; + + copymap = tmpmap = NULL; + deadlist = NULL; + + lt = env->lk_handle; + if (rejectp != NULL) + *rejectp = 0; + + /* Check if a detector run is necessary. */ + + /* Make a pass only if auto-detect would run. */ + region = lt->reginfo.primary; + + timespecclear(&now); + if (region->need_dd == 0 && + (!timespecisset(®ion->next_timeout) || + !__clock_expired(env, &now, ®ion->next_timeout))) { + return (0); + } + if (region->need_dd == 0) + atype = DB_LOCK_EXPIRE; + + /* Reset need_dd, so we know we've run the detector. */ + region->need_dd = 0; + + /* Build the waits-for bitmap. */ + ret = __dd_build(env, + atype, &bitmap, &nlockers, &nalloc, &idmap, rejectp, &pri_set); + lock_max = region->stat.st_cur_maxid; + if (ret != 0 || atype == DB_LOCK_EXPIRE) + return (ret); + + /* If there are no lockers, there are no deadlocks. */ + if (nlockers == 0) + return (0); + +#ifdef DIAGNOSTIC + if (FLD_ISSET(env->dbenv->verbose, DB_VERB_WAITSFOR)) + __dd_debug(env, idmap, bitmap, nlockers, nalloc); +#endif + + /* Now duplicate the bitmaps so we can verify deadlock participants. */ + if ((ret = __os_calloc(env, (size_t)nlockers, + sizeof(u_int32_t) * nalloc, ©map)) != 0) + goto err; + memcpy(copymap, bitmap, nlockers * sizeof(u_int32_t) * nalloc); + + if ((ret = __os_calloc(env, sizeof(u_int32_t), nalloc, &tmpmap)) != 0) + goto err; + + /* Find a deadlock. */ + if ((ret = + __dd_find(env, bitmap, idmap, nlockers, nalloc, &deadlist)) != 0) + return (ret); + + /* + * We need the cur_maxid from the txn region as well. In order + * to avoid tricky synchronization between the lock and txn + * regions, we simply unlock the lock region and then lock the + * txn region. This introduces a small window during which the + * transaction system could then wrap. We're willing to return + * the wrong answer for "oldest" or "youngest" in those rare + * circumstances. + */ + if (TXN_ON(env)) { + TXN_SYSTEM_LOCK(env); + txn_max = ((DB_TXNREGION *) + env->tx_handle->reginfo.primary)->cur_maxid; + TXN_SYSTEM_UNLOCK(env); + } else + txn_max = TXN_MAXIMUM; + + killid = BAD_KILLID; + for (deadp = deadlist; *deadp != NULL; deadp++) { + if (rejectp != NULL) + ++*rejectp; + killid = (u_int32_t)(*deadp - bitmap) / nalloc; + limit = killid; + + /* + * There are cases in which our general algorithm will + * fail. Returning 1 from verify indicates that the + * particular locker is not only involved in a deadlock, + * but that killing him will allow others to make forward + * progress. Unfortunately, there are cases where we need + * to abort someone, but killing them will not necessarily + * ensure forward progress (imagine N readers all trying to + * acquire a write lock). + * killid is only set to lockers that pass the db_verify test. + * keeper will hold the best candidate even if it does + * not pass db_verify. Once we fill in killid then we do + * not need a keeper, but we keep updating it anyway. + */ + + keeper = idmap[killid].in_abort == 0 ? killid : BAD_KILLID; + if (keeper == BAD_KILLID || + __dd_verify(idmap, *deadp, + tmpmap, copymap, nlockers, nalloc, keeper) == 0) + killid = BAD_KILLID; + + if (!pri_set && killid != BAD_KILLID && + (atype == DB_LOCK_DEFAULT || atype == DB_LOCK_RANDOM)) + goto dokill; + + /* + * Start with the id that we know is deadlocked, then examine + * all other set bits and see if any are a better candidate + * for abortion and they are genuinely part of the deadlock. + * The definition of "best": + * MAXLOCKS: maximum count + * MAXWRITE: maximum write count + * MINLOCKS: minimum count + * MINWRITE: minimum write count + * OLDEST: smallest id + * YOUNGEST: largest id + */ + for (i = (limit + 1) % nlockers; + i != limit; + i = (i + 1) % nlockers) { + if (!ISSET_MAP(*deadp, i) || idmap[i].in_abort) + continue; + + /* + * Determine if we have a verified candidate + * in killid, if not then compare with the + * non-verified candidate in keeper. + */ + if (killid == BAD_KILLID) { + if (keeper == BAD_KILLID) + goto use_next; + else + cid = keeper; + } else + cid = killid; + + if (idmap[i].priority > idmap[cid].priority) + continue; + if (idmap[i].priority < idmap[cid].priority) + goto use_next; + + /* Equal priorities, break ties using atype. */ + switch (atype) { + case DB_LOCK_OLDEST: + if (__dd_isolder(idmap[cid].id, + idmap[i].id, lock_max, txn_max)) + continue; + break; + case DB_LOCK_YOUNGEST: + if (__dd_isolder(idmap[i].id, + idmap[cid].id, lock_max, txn_max)) + continue; + break; + case DB_LOCK_MAXLOCKS: + if (idmap[i].count < idmap[cid].count) + continue; + break; + case DB_LOCK_MAXWRITE: + if (idmap[i].count < idmap[cid].count) + continue; + break; + case DB_LOCK_MINLOCKS: + case DB_LOCK_MINWRITE: + if (idmap[i].count > idmap[cid].count) + continue; + break; + case DB_LOCK_DEFAULT: + case DB_LOCK_RANDOM: + continue; + + default: + killid = BAD_KILLID; + ret = EINVAL; + goto dokill; + } + +use_next: keeper = i; + if (__dd_verify(idmap, *deadp, + tmpmap, copymap, nlockers, nalloc, i)) + killid = i; + } + +dokill: if (killid == BAD_KILLID) { + if (keeper == BAD_KILLID) + continue; + else { + /* + * Removing a single locker will not + * break the deadlock, signal to run + * detection again. + */ + region->need_dd = 1; + killid = keeper; + } + } + + /* Kill the locker with lockid idmap[killid]. */ + if ((ret = __dd_abort(env, &idmap[killid], &status)) != 0) + break; + + /* + * It's possible that the lock was already aborted; this isn't + * necessarily a problem, so do not treat it as an error. If + * the txn was aborting and deadlocked trying to upgrade + * a was_write lock, the detector should be run again or + * the deadlock might persist. + */ + if (status != 0) { + if (status != DB_ALREADY_ABORTED) + __db_errx(env, DB_STR_A("2049", + "warning: unable to abort locker %lx", + "%lx"), (u_long)idmap[killid].id); + else + region->need_dd = 1; + } else if (FLD_ISSET(env->dbenv->verbose, DB_VERB_DEADLOCK)) + __db_msg(env, DB_STR_A("2050", "Aborting locker %lx", + "%lx"), (u_long)idmap[killid].id); + } +err: if (copymap != NULL) + __os_free(env, copymap); + if (deadlist != NULL) + __os_free(env, deadlist); + if (tmpmap != NULL) + __os_free(env, tmpmap); + __os_free(env, bitmap); + __os_free(env, idmap); + + return (ret); +} + +/* + * ======================================================================== + * Utilities + */ + +#define DD_INVALID_ID ((u_int32_t) -1) + +/* + * __dd_build -- + * Build the lock dependency bit maps. + * Notes on syncronization: + * LOCK_SYSTEM_LOCK is used to hold objects locked when we have + * a single partition. + * LOCK_LOCKERS is held while we are walking the lockers list and + * to single thread the use of lockerp->dd_id. + * LOCK_DD protects the DD list of objects. + */ + +static int +__dd_build(env, atype, bmp, nlockers, allocp, idmap, rejectp, pri_set) + ENV *env; + u_int32_t atype, **bmp, *nlockers, *allocp; + locker_info **idmap; + int *pri_set, *rejectp; +{ + struct __db_lock *lp; + DB_LOCKER *lip, *lockerp, *child; + DB_LOCKOBJ *op, *lo, *np; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + locker_info *id_array; + db_timespec now, min_timeout; + u_int32_t *bitmap, count, dd; + u_int32_t *entryp, gen, id, indx, ndx, nentries, *tmpmap; + u_int8_t *pptr; + int is_first, ret; + + COMPQUIET(indx, 0); + lt = env->lk_handle; + region = lt->reginfo.primary; + timespecclear(&now); + timespecclear(&min_timeout); + + /* + * While we always check for expired timeouts, if we are called with + * DB_LOCK_EXPIRE, then we are only checking for timeouts (i.e., not + * doing deadlock detection at all). If we aren't doing real deadlock + * detection, then we can skip a significant, amount of the processing. + * In particular we do not build the conflict array and our caller + * needs to expect this. + */ + LOCK_SYSTEM_LOCK(lt, region); + if (atype == DB_LOCK_EXPIRE) { +skip: LOCK_DD(env, region); + op = SH_TAILQ_FIRST(®ion->dd_objs, __db_lockobj); + for (; op != NULL; op = np) { + indx = op->indx; + gen = op->generation; + UNLOCK_DD(env, region); + OBJECT_LOCK_NDX(lt, region, indx); + if (op->generation != gen) { + OBJECT_UNLOCK(lt, region, indx); + goto skip; + } + SH_TAILQ_FOREACH(lp, &op->waiters, links, __db_lock) { + lockerp = (DB_LOCKER *) + R_ADDR(<->reginfo, lp->holder); + if (lp->status == DB_LSTAT_WAITING) { + if (__clock_expired(env, + &now, &lockerp->lk_expire)) { + lp->status = DB_LSTAT_EXPIRED; + MUTEX_UNLOCK( + env, lp->mtx_lock); + if (rejectp != NULL) + ++*rejectp; + continue; + } + if (timespecisset( + &lockerp->lk_expire) && + (!timespecisset(&min_timeout) || + timespeccmp(&min_timeout, + &lockerp->lk_expire, >))) + min_timeout = + lockerp->lk_expire; + } + } + LOCK_DD(env, region); + np = SH_TAILQ_NEXT(op, dd_links, __db_lockobj); + OBJECT_UNLOCK(lt, region, indx); + } + UNLOCK_DD(env, region); + LOCK_SYSTEM_UNLOCK(lt, region); + goto done; + } + + /* + * Allocate after locking the region + * to make sure the structures are large enough. + */ + LOCK_LOCKERS(env, region); + count = region->nlockers; + if (count == 0) { + UNLOCK_LOCKERS(env, region); + LOCK_SYSTEM_UNLOCK(lt, region); + *nlockers = 0; + return (0); + } + + if (FLD_ISSET(env->dbenv->verbose, DB_VERB_DEADLOCK)) + __db_msg(env, DB_STR_A("2051", "%lu lockers", + "%lu"), (u_long)count); + + nentries = (u_int32_t)DB_ALIGN(count, 32) / 32; + + /* Allocate enough space for a count by count bitmap matrix. */ + if ((ret = __os_calloc(env, (size_t)count, + sizeof(u_int32_t) * nentries, &bitmap)) != 0) { + UNLOCK_LOCKERS(env, region); + LOCK_SYSTEM_UNLOCK(lt, region); + return (ret); + } + + if ((ret = __os_calloc(env, + sizeof(u_int32_t), nentries, &tmpmap)) != 0) { + UNLOCK_LOCKERS(env, region); + LOCK_SYSTEM_UNLOCK(lt, region); + __os_free(env, bitmap); + return (ret); + } + + if ((ret = __os_calloc(env, + (size_t)count, sizeof(locker_info), &id_array)) != 0) { + UNLOCK_LOCKERS(env, region); + LOCK_SYSTEM_UNLOCK(lt, region); + __os_free(env, bitmap); + __os_free(env, tmpmap); + return (ret); + } + + /* + * First we go through and assign each locker a deadlock detector id. + */ + id = 0; + *pri_set = 0; + SH_TAILQ_FOREACH(lip, ®ion->lockers, ulinks, __db_locker) { + if (lip->master_locker == INVALID_ROFF) { + DB_ASSERT(env, id < count); + lip->dd_id = id++; + id_array[lip->dd_id].id = lip->id; + id_array[lip->dd_id].priority = lip->priority; + if (lip->dd_id > 0 && + id_array[lip->dd_id-1].priority != lip->priority) + *pri_set = 1; + + switch (atype) { + case DB_LOCK_MINLOCKS: + case DB_LOCK_MAXLOCKS: + id_array[lip->dd_id].count = lip->nlocks; + break; + case DB_LOCK_MINWRITE: + case DB_LOCK_MAXWRITE: + id_array[lip->dd_id].count = lip->nwrites; + break; + default: + break; + } + } else + lip->dd_id = DD_INVALID_ID; + + } + + /* + * We only need consider objects that have waiters, so we use + * the list of objects with waiters (dd_objs) instead of traversing + * the entire hash table. For each object, we traverse the waiters + * list and add an entry in the waitsfor matrix for each waiter/holder + * combination. We don't want to lock from the DD mutex to the + * hash mutex, so we drop deadlock mutex and get the hash mutex. Then + * check to see if the object has changed. Once we have the object + * locked then locks cannot be remove and lockers cannot go away. + */ + if (0) { + /* If an object has changed state, start over. */ +again: memset(bitmap, 0, count * sizeof(u_int32_t) * nentries); + } + LOCK_DD(env, region); + op = SH_TAILQ_FIRST(®ion->dd_objs, __db_lockobj); + for (; op != NULL; op = np) { + indx = op->indx; + gen = op->generation; + UNLOCK_DD(env, region); + + OBJECT_LOCK_NDX(lt, region, indx); + if (gen != op->generation) { + OBJECT_UNLOCK(lt, region, indx); + goto again; + } + + /* + * First we go through and create a bit map that + * represents all the holders of this object. + */ + + CLEAR_MAP(tmpmap, nentries); + SH_TAILQ_FOREACH(lp, &op->holders, links, __db_lock) { + lockerp = (DB_LOCKER *)R_ADDR(<->reginfo, lp->holder); + + if (lockerp->dd_id == DD_INVALID_ID) { + /* + * If the locker was not here when we started, + * then it was not deadlocked at that time. + */ + if (lockerp->master_locker == INVALID_ROFF) + continue; + dd = ((DB_LOCKER *)R_ADDR(<->reginfo, + lockerp->master_locker))->dd_id; + if (dd == DD_INVALID_ID) + continue; + lockerp->dd_id = dd; + switch (atype) { + case DB_LOCK_MINLOCKS: + case DB_LOCK_MAXLOCKS: + id_array[dd].count += lockerp->nlocks; + break; + case DB_LOCK_MINWRITE: + case DB_LOCK_MAXWRITE: + id_array[dd].count += lockerp->nwrites; + break; + default: + break; + } + + } else + dd = lockerp->dd_id; + id_array[dd].valid = 1; + + /* + * If the holder has already been aborted, then + * we should ignore it for now. + */ + if (lp->status == DB_LSTAT_HELD) + SET_MAP(tmpmap, dd); + } + + /* + * Next, for each waiter, we set its row in the matrix + * equal to the map of holders we set up above. + */ + for (is_first = 1, + lp = SH_TAILQ_FIRST(&op->waiters, __db_lock); + lp != NULL; + is_first = 0, + lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { + lockerp = (DB_LOCKER *)R_ADDR(<->reginfo, lp->holder); + if (lp->status == DB_LSTAT_WAITING) { + if (__clock_expired(env, + &now, &lockerp->lk_expire)) { + lp->status = DB_LSTAT_EXPIRED; + MUTEX_UNLOCK(env, lp->mtx_lock); + if (rejectp != NULL) + ++*rejectp; + continue; + } + if (timespecisset(&lockerp->lk_expire) && + (!timespecisset(&min_timeout) || + timespeccmp( + &min_timeout, &lockerp->lk_expire, >))) + min_timeout = lockerp->lk_expire; + } + + if (lockerp->dd_id == DD_INVALID_ID) { + dd = ((DB_LOCKER *)R_ADDR(<->reginfo, + lockerp->master_locker))->dd_id; + lockerp->dd_id = dd; + switch (atype) { + case DB_LOCK_MINLOCKS: + case DB_LOCK_MAXLOCKS: + id_array[dd].count += lockerp->nlocks; + break; + case DB_LOCK_MINWRITE: + case DB_LOCK_MAXWRITE: + id_array[dd].count += lockerp->nwrites; + break; + default: + break; + } + } else + dd = lockerp->dd_id; + id_array[dd].valid = 1; + + /* + * If the transaction is pending abortion, then + * ignore it on this iteration. + */ + if (lp->status != DB_LSTAT_WAITING) + continue; + + entryp = bitmap + (nentries * dd); + OR_MAP(entryp, tmpmap, nentries); + /* + * If this is the first waiter on the queue, + * then we remove the waitsfor relationship + * with oneself. However, if it's anywhere + * else on the queue, then we have to keep + * it and we have an automatic deadlock. + */ + if (is_first) { + if (ISSET_MAP(entryp, dd)) + id_array[dd].self_wait = 1; + CLR_MAP(entryp, dd); + } + } + LOCK_DD(env, region); + np = SH_TAILQ_NEXT(op, dd_links, __db_lockobj); + OBJECT_UNLOCK(lt, region, indx); + } + UNLOCK_DD(env, region); + + /* + * Now for each locker, record its last lock and set abort status. + * We need to look at the heldby list carefully. We have the LOCKERS + * locked so they cannot go away. The lock at the head of the + * list can be removed by locking the object it points at. + * Since lock memory is not freed if we get a lock we can look + * at it safely but SH_LIST_FIRST is not atomic, so we check that + * the list has not gone empty during that macro. We check abort + * status after building the bit maps so that we will not detect + * a blocked transaction without noting that it is already aborting. + */ + for (id = 0; id < count; id++) { + if (!id_array[id].valid) + continue; + if ((ret = __lock_getlocker_int(lt, + id_array[id].id, 0, &lockerp)) != 0 || lockerp == NULL) + continue; + + /* + * If this is a master transaction, try to + * find one of its children's locks first, + * as they are probably more recent. + */ + child = SH_LIST_FIRST(&lockerp->child_locker, __db_locker); + if (child != NULL) { + do { +c_retry: lp = SH_LIST_FIRST(&child->heldby, __db_lock); + if (SH_LIST_EMPTY(&child->heldby) || lp == NULL) + goto c_next; + + if (F_ISSET(child, DB_LOCKER_INABORT)) + id_array[id].in_abort = 1; + ndx = lp->indx; + OBJECT_LOCK_NDX(lt, region, ndx); + if (lp != SH_LIST_FIRST( + &child->heldby, __db_lock) || + ndx != lp->indx) { + OBJECT_UNLOCK(lt, region, ndx); + goto c_retry; + } + + if (lp != NULL && + lp->status == DB_LSTAT_WAITING) { + id_array[id].last_locker_id = child->id; + goto get_lock; + } else { + OBJECT_UNLOCK(lt, region, ndx); + } +c_next: child = SH_LIST_NEXT( + child, child_link, __db_locker); + } while (child != NULL); + } + +l_retry: lp = SH_LIST_FIRST(&lockerp->heldby, __db_lock); + if (!SH_LIST_EMPTY(&lockerp->heldby) && lp != NULL) { + ndx = lp->indx; + OBJECT_LOCK_NDX(lt, region, ndx); + if (lp != SH_LIST_FIRST(&lockerp->heldby, __db_lock) || + lp->indx != ndx) { + OBJECT_UNLOCK(lt, region, ndx); + goto l_retry; + } + id_array[id].last_locker_id = lockerp->id; +get_lock: id_array[id].last_lock = R_OFFSET(<->reginfo, lp); + id_array[id].last_obj = lp->obj; + lo = SH_OFF_TO_PTR(lp, lp->obj, DB_LOCKOBJ); + id_array[id].last_ndx = lo->indx; + pptr = SH_DBT_PTR(&lo->lockobj); + if (lo->lockobj.size >= sizeof(db_pgno_t)) + memcpy(&id_array[id].pgno, + pptr, sizeof(db_pgno_t)); + else + id_array[id].pgno = 0; + OBJECT_UNLOCK(lt, region, ndx); + } + if (F_ISSET(lockerp, DB_LOCKER_INABORT)) + id_array[id].in_abort = 1; + } + UNLOCK_LOCKERS(env, region); + LOCK_SYSTEM_UNLOCK(lt, region); + + /* + * Now we can release everything except the bitmap matrix that we + * created. + */ + *nlockers = id; + *idmap = id_array; + *bmp = bitmap; + *allocp = nentries; + __os_free(env, tmpmap); +done: if (timespecisset(®ion->next_timeout)) + region->next_timeout = min_timeout; + return (0); +} + +static int +__dd_find(env, bmp, idmap, nlockers, nalloc, deadp) + ENV *env; + u_int32_t *bmp, nlockers, nalloc; + locker_info *idmap; + u_int32_t ***deadp; +{ + u_int32_t i, j, k, *mymap, *tmpmap, **retp; + u_int ndead, ndeadalloc; + int ret; + +#undef INITIAL_DEAD_ALLOC +#define INITIAL_DEAD_ALLOC 8 + + ndeadalloc = INITIAL_DEAD_ALLOC; + ndead = 0; + if ((ret = __os_malloc(env, + ndeadalloc * sizeof(u_int32_t *), &retp)) != 0) + return (ret); + + /* + * For each locker, OR in the bits from the lockers on which that + * locker is waiting. + */ + for (mymap = bmp, i = 0; i < nlockers; i++, mymap += nalloc) { + if (!idmap[i].valid) + continue; + for (j = 0; j < nlockers; j++) { + if (!ISSET_MAP(mymap, j)) + continue; + + /* Find the map for this bit. */ + tmpmap = bmp + (nalloc * j); + OR_MAP(mymap, tmpmap, nalloc); + if (!ISSET_MAP(mymap, i)) + continue; + + /* Make sure we leave room for NULL. */ + if (ndead + 2 >= ndeadalloc) { + ndeadalloc <<= 1; + /* + * If the alloc fails, then simply return the + * deadlocks that we already have. + */ + if (__os_realloc(env, + ndeadalloc * sizeof(u_int32_t *), + &retp) != 0) { + retp[ndead] = NULL; + *deadp = retp; + return (0); + } + } + retp[ndead++] = mymap; + + /* Mark all participants in this deadlock invalid. */ + for (k = 0; k < nlockers; k++) + if (ISSET_MAP(mymap, k)) + idmap[k].valid = 0; + break; + } + } + retp[ndead] = NULL; + *deadp = retp; + return (0); +} + +static int +__dd_abort(env, info, statusp) + ENV *env; + locker_info *info; + int *statusp; +{ + struct __db_lock *lockp; + DB_LOCKER *lockerp; + DB_LOCKOBJ *sh_obj; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + int ret; + + *statusp = 0; + + lt = env->lk_handle; + region = lt->reginfo.primary; + ret = 0; + + /* We must lock so this locker cannot go away while we abort it. */ + LOCK_SYSTEM_LOCK(lt, region); + LOCK_LOCKERS(env, region); + + /* + * Get the locker. If it's gone or was aborted while we were + * detecting, return that. + */ + if ((ret = __lock_getlocker_int(lt, + info->last_locker_id, 0, &lockerp)) != 0) + goto err; + if (lockerp == NULL || F_ISSET(lockerp, DB_LOCKER_INABORT)) { + *statusp = DB_ALREADY_ABORTED; + goto err; + } + + /* + * Find the locker's last lock. It is possible for this lock to have + * been freed, either though a timeout or another detector run. + * First lock the lock object so it is stable. + */ + + OBJECT_LOCK_NDX(lt, region, info->last_ndx); + if ((lockp = SH_LIST_FIRST(&lockerp->heldby, __db_lock)) == NULL) { + *statusp = DB_ALREADY_ABORTED; + goto done; + } + if (R_OFFSET(<->reginfo, lockp) != info->last_lock || + lockp->holder != R_OFFSET(<->reginfo, lockerp) || + F_ISSET(lockerp, DB_LOCKER_INABORT) || + lockp->obj != info->last_obj || lockp->status != DB_LSTAT_WAITING) { + *statusp = DB_ALREADY_ABORTED; + goto done; + } + + sh_obj = SH_OFF_TO_PTR(lockp, lockp->obj, DB_LOCKOBJ); + + STAT_INC_VERB(env, lock, deadlock, + region->stat.st_ndeadlocks, lockerp->id, &sh_obj->lockobj); + /* Abort lock, take it off list, and wake up this lock. */ + lockp->status = DB_LSTAT_ABORTED; + SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock); + + /* + * Either the waiters list is now empty, in which case we remove + * it from dd_objs, or it is not empty, in which case we need to + * do promotion. + */ + if (SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) { + LOCK_DD(env, region); + SH_TAILQ_REMOVE(®ion->dd_objs, + sh_obj, dd_links, __db_lockobj); + UNLOCK_DD(env, region); + } else + ret = __lock_promote(lt, sh_obj, NULL, 0); + MUTEX_UNLOCK(env, lockp->mtx_lock); + +done: OBJECT_UNLOCK(lt, region, info->last_ndx); +err: UNLOCK_LOCKERS(env, region); + LOCK_SYSTEM_UNLOCK(lt, region); + return (ret); +} + +#ifdef DIAGNOSTIC +static void +__dd_debug(env, idmap, bitmap, nlockers, nalloc) + ENV *env; + locker_info *idmap; + u_int32_t *bitmap, nlockers, nalloc; +{ + DB_MSGBUF mb; + u_int32_t i, j, *mymap; + + DB_MSGBUF_INIT(&mb); + + __db_msg(env, "Waitsfor array\nWaiter:\tWaiting on:"); + for (mymap = bitmap, i = 0; i < nlockers; i++, mymap += nalloc) { + if (!idmap[i].valid) + continue; + + __db_msgadd(env, &mb, /* Waiter. */ + "%lx/%lu:\t", (u_long)idmap[i].id, (u_long)idmap[i].pgno); + for (j = 0; j < nlockers; j++) + if (ISSET_MAP(mymap, j)) + __db_msgadd(env, + &mb, " %lx", (u_long)idmap[j].id); + __db_msgadd(env, &mb, " %lu", (u_long)idmap[i].last_lock); + DB_MSGBUF_FLUSH(env, &mb); + } +} +#endif + +/* + * Given a bitmap that contains a deadlock, verify that the bit + * specified in the which parameter indicates a transaction that + * is actually deadlocked. Return 1 if really deadlocked, 0 otherwise. + * deadmap -- the array that identified the deadlock. + * tmpmap -- a copy of the initial bitmaps from the dd_build phase. + * origmap -- a temporary bit map into which we can OR things. + * nlockers -- the number of actual lockers under consideration. + * nalloc -- the number of words allocated for the bitmap. + * which -- the locker in question. + */ +static int +__dd_verify(idmap, deadmap, tmpmap, origmap, nlockers, nalloc, which) + locker_info *idmap; + u_int32_t *deadmap, *tmpmap, *origmap; + u_int32_t nlockers, nalloc, which; +{ + u_int32_t *tmap; + u_int32_t j; + int count; + + memset(tmpmap, 0, sizeof(u_int32_t) * nalloc); + + /* + * In order for "which" to be actively involved in + * the deadlock, removing him from the evaluation + * must remove the deadlock. So, we OR together everyone + * except which; if all the participants still have their + * bits set, then the deadlock persists and which does + * not participate. If the deadlock does not persist + * then "which" does participate. + */ + count = 0; + for (j = 0; j < nlockers; j++) { + if (!ISSET_MAP(deadmap, j) || j == which) + continue; + + /* Find the map for this bit. */ + tmap = origmap + (nalloc * j); + + /* + * We special case the first waiter who is also a holder, so + * we don't automatically call that a deadlock. However, if + * it really is a deadlock, we need the bit set now so that + * we treat the first waiter like other waiters. + */ + if (idmap[j].self_wait) + SET_MAP(tmap, j); + OR_MAP(tmpmap, tmap, nalloc); + count++; + } + + if (count == 1) + return (1); + + /* + * Now check the resulting map and see whether + * all participants still have their bit set. + */ + for (j = 0; j < nlockers; j++) { + if (!ISSET_MAP(deadmap, j) || j == which) + continue; + if (!ISSET_MAP(tmpmap, j)) + return (1); + } + return (0); +} + +/* + * __dd_isolder -- + * + * Figure out the relative age of two lockers. We make all lockers + * older than all transactions, because that's how it's worked + * historically (because lockers are lower ids). + */ +static int +__dd_isolder(a, b, lock_max, txn_max) + u_int32_t a, b; + u_int32_t lock_max, txn_max; +{ + u_int32_t max; + + /* Check for comparing lock-id and txnid. */ + if (a <= DB_LOCK_MAXID && b > DB_LOCK_MAXID) + return (1); + if (b <= DB_LOCK_MAXID && a > DB_LOCK_MAXID) + return (0); + + /* In the same space; figure out which one. */ + max = txn_max; + if (a <= DB_LOCK_MAXID) + max = lock_max; + + /* + * We can't get a 100% correct ordering, because we don't know + * where the current interval started and if there were older + * lockers outside the interval. We do the best we can. + */ + + /* + * Check for a wrapped case with ids above max. + */ + if (a > max && b < max) + return (1); + if (b > max && a < max) + return (0); + + return (a < b); +} diff --git a/src/lock/lock_failchk.c b/src/lock/lock_failchk.c new file mode 100644 index 00000000..b494dffc --- /dev/null +++ b/src/lock/lock_failchk.c @@ -0,0 +1,114 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/lock.h" +#include "dbinc/txn.h" + +/* + * __lock_failchk -- + * Check for locks held by dead threads of control and release + * read locks. If any write locks were held by dead non-trasnactional + * lockers then we must abort and run recovery. Otherwise we release + * read locks for lockers owned by dead threads. Write locks for + * dead transactional lockers will be freed when we abort the transaction. + * + * PUBLIC: int __lock_failchk __P((ENV *)); + */ +int +__lock_failchk(env) + ENV *env; +{ + DB_ENV *dbenv; + DB_LOCKER *lip; + DB_LOCKREGION *lrp; + DB_LOCKREQ request; + DB_LOCKTAB *lt; + u_int32_t i; + int ret; + char buf[DB_THREADID_STRLEN]; + + dbenv = env->dbenv; + lt = env->lk_handle; + lrp = lt->reginfo.primary; + +retry: LOCK_LOCKERS(env, lrp); + + ret = 0; + for (i = 0; i < lrp->locker_t_size; i++) + SH_TAILQ_FOREACH(lip, <->locker_tab[i], links, __db_locker) { + /* + * If the locker is transactional, we can ignore it if + * it has no read locks or has no locks at all. Check + * the heldby list rather then nlocks since a lock may + * be PENDING. __txn_failchk aborts any transactional + * lockers. Non-transactional lockers progress to + * is_alive test. + */ + if ((lip->id >= TXN_MINIMUM) && + (SH_LIST_EMPTY(&lip->heldby) || + lip->nlocks == lip->nwrites)) + continue; + + /* If the locker is still alive, it's not a problem. */ + if (dbenv->is_alive(dbenv, lip->pid, lip->tid, + F_ISSET(lip, DB_LOCKER_HANDLE_LOCKER) ? + DB_MUTEX_PROCESS_ONLY : 0)) + continue; + + /* + * We can only deal with read locks. If a + * non-transactional locker holds write locks we + * have to assume a Berkeley DB operation was + * interrupted with only 1-of-N pages modified. + */ + if (lip->id < TXN_MINIMUM && lip->nwrites != 0) { + ret = __db_failed(env, DB_STR_A("2052", + "locker has write locks", ""), + lip->pid, lip->tid); + break; + } + + /* + * Discard the locker and its read locks. + */ + if (!SH_LIST_EMPTY(&lip->heldby)) { + __db_msg(env, DB_STR_A("2053", + "Freeing read locks for locker %#lx: %s", + "%#lx %s"), (u_long)lip->id, + dbenv->thread_id_string( + dbenv, lip->pid, lip->tid, buf)); + UNLOCK_LOCKERS(env, lrp); + memset(&request, 0, sizeof(request)); + request.op = DB_LOCK_PUT_READ; + if ((ret = __lock_vec(env, + lip, 0, &request, 1, NULL)) != 0) + return (ret); + } + else + UNLOCK_LOCKERS(env, lrp); + + /* + * This locker is most likely referenced by a cursor + * which is owned by a dead thread. Normally the + * cursor would be available for other threads + * but we assume the dead thread will never release + * it. + */ + if (lip->id < TXN_MINIMUM && + (ret = __lock_freelocker(lt, lip)) != 0) + return (ret); + goto retry; + } + + UNLOCK_LOCKERS(env, lrp); + + return (ret); +} diff --git a/src/lock/lock_id.c b/src/lock/lock_id.c new file mode 100644 index 00000000..3527380e --- /dev/null +++ b/src/lock/lock_id.c @@ -0,0 +1,572 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" + +static int __lock_freelocker_int + __P((DB_LOCKTAB *, DB_LOCKREGION *, DB_LOCKER *, int)); + +/* + * __lock_id_pp -- + * ENV->lock_id pre/post processing. + * + * PUBLIC: int __lock_id_pp __P((DB_ENV *, u_int32_t *)); + */ +int +__lock_id_pp(dbenv, idp) + DB_ENV *dbenv; + u_int32_t *idp; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lk_handle, "DB_ENV->lock_id", DB_INIT_LOCK); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__lock_id(env, idp, NULL)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __lock_id -- + * ENV->lock_id. + * + * PUBLIC: int __lock_id __P((ENV *, u_int32_t *, DB_LOCKER **)); + */ +int +__lock_id(env, idp, lkp) + ENV *env; + u_int32_t *idp; + DB_LOCKER **lkp; +{ + DB_LOCKER *lk; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + u_int32_t id, *ids; + int nids, ret; + + lk = NULL; + lt = env->lk_handle; + region = lt->reginfo.primary; + id = DB_LOCK_INVALIDID; + ret = 0; + + id = DB_LOCK_INVALIDID; + lk = NULL; + + LOCK_LOCKERS(env, region); + + /* + * Allocate a new lock id. If we wrap around then we find the minimum + * currently in use and make sure we can stay below that. This code is + * similar to code in __txn_begin_int for recovering txn ids. + * + * Our current valid range can span the maximum valid value, so check + * for it and wrap manually. + */ + if (region->lock_id == DB_LOCK_MAXID && + region->cur_maxid != DB_LOCK_MAXID) + region->lock_id = DB_LOCK_INVALIDID; + if (region->lock_id == region->cur_maxid) { + if ((ret = __os_malloc(env, + sizeof(u_int32_t) * region->nlockers, &ids)) != 0) + goto err; + nids = 0; + SH_TAILQ_FOREACH(lk, ®ion->lockers, ulinks, __db_locker) + ids[nids++] = lk->id; + region->lock_id = DB_LOCK_INVALIDID; + region->cur_maxid = DB_LOCK_MAXID; + if (nids != 0) + __db_idspace(ids, nids, + ®ion->lock_id, ®ion->cur_maxid); + __os_free(env, ids); + } + id = ++region->lock_id; + + /* Allocate a locker for this id. */ + ret = __lock_getlocker_int(lt, id, 1, &lk); + +err: UNLOCK_LOCKERS(env, region); + + if (idp != NULL) + *idp = id; + if (lkp != NULL) + *lkp = lk; + + return (ret); +} + +/* + * __lock_set_thread_id -- + * Set the thread_id in an existing locker. + * PUBLIC: void __lock_set_thread_id __P((void *, pid_t, db_threadid_t)); + */ +void +__lock_set_thread_id(lref_arg, pid, tid) + void *lref_arg; + pid_t pid; + db_threadid_t tid; +{ + DB_LOCKER *lref; + + lref = lref_arg; + lref->pid = pid; + lref->tid = tid; +} + +/* + * __lock_id_free_pp -- + * ENV->lock_id_free pre/post processing. + * + * PUBLIC: int __lock_id_free_pp __P((DB_ENV *, u_int32_t)); + */ +int +__lock_id_free_pp(dbenv, id) + DB_ENV *dbenv; + u_int32_t id; +{ + DB_LOCKER *sh_locker; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lk_handle, "DB_ENV->lock_id_free", DB_INIT_LOCK); + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __env_rep_enter(env, 0)) != 0) { + handle_check = 0; + goto err; + } + + lt = env->lk_handle; + region = lt->reginfo.primary; + + LOCK_LOCKERS(env, region); + if ((ret = + __lock_getlocker_int(env->lk_handle, id, 0, &sh_locker)) == 0) { + if (sh_locker != NULL) + ret = __lock_freelocker_int(lt, region, sh_locker, 1); + else { + __db_errx(env, DB_STR_A("2045", + "Unknown locker id: %lx", "%lx"), (u_long)id); + ret = EINVAL; + } + } + UNLOCK_LOCKERS(env, region); + + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + +err: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __lock_id_free -- + * Free a locker id. + * + * PUBLIC: int __lock_id_free __P((ENV *, DB_LOCKER *)); + */ +int +__lock_id_free(env, sh_locker) + ENV *env; + DB_LOCKER *sh_locker; +{ + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + int ret; + + lt = env->lk_handle; + region = lt->reginfo.primary; + ret = 0; + + if (sh_locker->nlocks != 0) { + __db_errx(env, DB_STR("2046", + "Locker still has locks")); + ret = EINVAL; + goto err; + } + + LOCK_LOCKERS(env, region); + ret = __lock_freelocker_int(lt, region, sh_locker, 1); + UNLOCK_LOCKERS(env, region); + +err: return (ret); +} + +/* + * __lock_id_set -- + * Set the current locker ID and current maximum unused ID (for + * testing purposes only). + * + * PUBLIC: int __lock_id_set __P((ENV *, u_int32_t, u_int32_t)); + */ +int +__lock_id_set(env, cur_id, max_id) + ENV *env; + u_int32_t cur_id, max_id; +{ + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + + ENV_REQUIRES_CONFIG(env, + env->lk_handle, "lock_id_set", DB_INIT_LOCK); + + lt = env->lk_handle; + region = lt->reginfo.primary; + region->lock_id = cur_id; + region->cur_maxid = max_id; + + return (0); +} + +/* + * __lock_getlocker -- + * Get a locker in the locker hash table. The create parameter + * indicates if the locker should be created if it doesn't exist in + * the table. + * + * This must be called with the locker mutex lock if create == 1. + * + * PUBLIC: int __lock_getlocker __P((DB_LOCKTAB *, + * PUBLIC: u_int32_t, int, DB_LOCKER **)); + * PUBLIC: int __lock_getlocker_int __P((DB_LOCKTAB *, + * PUBLIC: u_int32_t, int, DB_LOCKER **)); + */ +int +__lock_getlocker(lt, locker, create, retp) + DB_LOCKTAB *lt; + u_int32_t locker; + int create; + DB_LOCKER **retp; +{ + DB_LOCKREGION *region; + ENV *env; + int ret; + + COMPQUIET(region, NULL); + env = lt->env; + region = lt->reginfo.primary; + + LOCK_LOCKERS(env, region); + ret = __lock_getlocker_int(lt, locker, create, retp); + UNLOCK_LOCKERS(env, region); + + return (ret); +} + +int +__lock_getlocker_int(lt, locker, create, retp) + DB_LOCKTAB *lt; + u_int32_t locker; + int create; + DB_LOCKER **retp; +{ + DB_LOCKER *sh_locker; + DB_LOCKREGION *region; + DB_THREAD_INFO *ip; + ENV *env; + db_mutex_t mutex; + u_int32_t i, indx, nlockers; + int ret; + + env = lt->env; + region = lt->reginfo.primary; + + LOCKER_HASH(lt, region, locker, indx); + + /* + * If we find the locker, then we can just return it. If we don't find + * the locker, then we need to create it. + */ + SH_TAILQ_FOREACH(sh_locker, <->locker_tab[indx], links, __db_locker) + if (sh_locker->id == locker) + break; + if (sh_locker == NULL && create) { + nlockers = 0; + /* Create new locker and then insert it into hash table. */ + if ((ret = __mutex_alloc(env, MTX_LOGICAL_LOCK, + DB_MUTEX_LOGICAL_LOCK | DB_MUTEX_SELF_BLOCK, + &mutex)) != 0) + return (ret); + else + MUTEX_LOCK(env, mutex); + if ((sh_locker = SH_TAILQ_FIRST( + ®ion->free_lockers, __db_locker)) == NULL) { + nlockers = region->stat.st_lockers >> 2; + /* Just in case. */ + if (nlockers == 0) + nlockers = 1; + if (region->stat.st_maxlockers != 0 && + region->stat.st_maxlockers < + region->stat.st_lockers + nlockers) + nlockers = region->stat.st_maxlockers - + region->stat.st_lockers; + /* + * Don't hold lockers when getting the region, + * we could deadlock. When creating a locker + * there is no race since the id allocation + * is syncrhonized. + */ + UNLOCK_LOCKERS(env, region); + LOCK_REGION_LOCK(env); + /* + * If the max memory is not sized for max objects, + * allocate as much as possible. + */ + F_SET(<->reginfo, REGION_TRACKED); + while (__env_alloc(<->reginfo, nlockers * + sizeof(struct __db_locker), &sh_locker) != 0) + if ((nlockers >> 1) == 0) + break; + F_CLR(<->reginfo, REGION_TRACKED); + LOCK_REGION_UNLOCK(lt->env); + LOCK_LOCKERS(env, region); + for (i = 0; i < nlockers; i++) { + SH_TAILQ_INSERT_HEAD(®ion->free_lockers, + sh_locker, links, __db_locker); + sh_locker++; + } + if (nlockers == 0) + return (__lock_nomem(env, "locker entries")); + region->stat.st_lockers += nlockers; + sh_locker = SH_TAILQ_FIRST( + ®ion->free_lockers, __db_locker); + } + SH_TAILQ_REMOVE( + ®ion->free_lockers, sh_locker, links, __db_locker); + ++region->nlockers; +#ifdef HAVE_STATISTICS + STAT_PERFMON2(env, lock, nlockers, region->nlockers, locker); + if (region->nlockers > region->stat.st_maxnlockers) + STAT_SET(env, lock, maxnlockers, + region->stat.st_maxnlockers, + region->nlockers, locker); +#endif + sh_locker->id = locker; + env->dbenv->thread_id( + env->dbenv, &sh_locker->pid, &sh_locker->tid); + sh_locker->mtx_locker = mutex; + sh_locker->dd_id = 0; + sh_locker->master_locker = INVALID_ROFF; + sh_locker->parent_locker = INVALID_ROFF; + SH_LIST_INIT(&sh_locker->child_locker); + sh_locker->flags = 0; + SH_LIST_INIT(&sh_locker->heldby); + sh_locker->nlocks = 0; + sh_locker->nwrites = 0; + sh_locker->priority = DB_LOCK_DEFPRIORITY; + sh_locker->lk_timeout = 0; + timespecclear(&sh_locker->tx_expire); + timespecclear(&sh_locker->lk_expire); + + SH_TAILQ_INSERT_HEAD( + <->locker_tab[indx], sh_locker, links, __db_locker); + SH_TAILQ_INSERT_HEAD(®ion->lockers, + sh_locker, ulinks, __db_locker); + ENV_GET_THREAD_INFO(env, ip); +#ifdef DIAGNOSTIC + if (ip != NULL) + ip->dbth_locker = R_OFFSET(<->reginfo, sh_locker); +#endif + } + + *retp = sh_locker; + return (0); +} + +/* + * __lock_addfamilylocker + * Put a locker entry in for a child transaction. + * + * PUBLIC: int __lock_addfamilylocker __P((ENV *, + * PUBLIC: u_int32_t, u_int32_t, u_int32_t)); + */ +int +__lock_addfamilylocker(env, pid, id, is_family) + ENV *env; + u_int32_t pid, id, is_family; +{ + DB_LOCKER *lockerp, *mlockerp; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + int ret; + + COMPQUIET(region, NULL); + lt = env->lk_handle; + region = lt->reginfo.primary; + LOCK_LOCKERS(env, region); + + /* get/create the parent locker info */ + if ((ret = __lock_getlocker_int(lt, pid, 1, &mlockerp)) != 0) + goto err; + + /* + * We assume that only one thread can manipulate + * a single transaction family. + * Therefore the master locker cannot go away while + * we manipulate it, nor can another child in the + * family be created at the same time. + */ + if ((ret = __lock_getlocker_int(lt, id, 1, &lockerp)) != 0) + goto err; + + /* Point to our parent. */ + lockerp->parent_locker = R_OFFSET(<->reginfo, mlockerp); + + /* See if this locker is the family master. */ + if (mlockerp->master_locker == INVALID_ROFF) + lockerp->master_locker = R_OFFSET(<->reginfo, mlockerp); + else { + lockerp->master_locker = mlockerp->master_locker; + mlockerp = R_ADDR(<->reginfo, mlockerp->master_locker); + } + + /* + * Set the family locker flag, so it is possible to distinguish + * between locks held by subtransactions and those with compatible + * lockers. + */ + if (is_family) + F_SET(mlockerp, DB_LOCKER_FAMILY_LOCKER); + + /* + * Link the child at the head of the master's list. + * The guess is when looking for deadlock that + * the most recent child is the one thats blocked. + */ + SH_LIST_INSERT_HEAD( + &mlockerp->child_locker, lockerp, child_link, __db_locker); + +err: UNLOCK_LOCKERS(env, region); + + return (ret); +} + +/* + * __lock_freelocker_int + * Common code for deleting a locker; must be called with the + * locker bucket locked. + */ +static int +__lock_freelocker_int(lt, region, sh_locker, reallyfree) + DB_LOCKTAB *lt; + DB_LOCKREGION *region; + DB_LOCKER *sh_locker; + int reallyfree; +{ + ENV *env; + u_int32_t indx; + int ret; + + env = lt->env; + + if (SH_LIST_FIRST(&sh_locker->heldby, __db_lock) != NULL) { + __db_errx(env, DB_STR("2047", + "Freeing locker with locks")); + return (EINVAL); + } + + /* If this is part of a family, we must fix up its links. */ + if (sh_locker->master_locker != INVALID_ROFF) { + SH_LIST_REMOVE(sh_locker, child_link, __db_locker); + sh_locker->master_locker = INVALID_ROFF; + } + + if (reallyfree) { + LOCKER_HASH(lt, region, sh_locker->id, indx); + SH_TAILQ_REMOVE(<->locker_tab[indx], sh_locker, + links, __db_locker); + if (sh_locker->mtx_locker != MUTEX_INVALID && + (ret = __mutex_free(env, &sh_locker->mtx_locker)) != 0) + return (ret); + SH_TAILQ_INSERT_HEAD(®ion->free_lockers, sh_locker, + links, __db_locker); + SH_TAILQ_REMOVE(®ion->lockers, sh_locker, + ulinks, __db_locker); + region->nlockers--; + STAT_PERFMON2(env, + lock, nlockers, region->nlockers, sh_locker->id); + } + + return (0); +} + +/* + * __lock_freelocker + * Remove a locker its family from the hash table. + * + * This must be called without the locker bucket locked. + * + * PUBLIC: int __lock_freelocker __P((DB_LOCKTAB *, DB_LOCKER *)); + */ +int +__lock_freelocker(lt, sh_locker) + DB_LOCKTAB *lt; + DB_LOCKER *sh_locker; +{ + DB_LOCKREGION *region; + ENV *env; + int ret; + + region = lt->reginfo.primary; + env = lt->env; + + if (sh_locker == NULL) + return (0); + + LOCK_LOCKERS(env, region); + ret = __lock_freelocker_int(lt, region, sh_locker, 1); + UNLOCK_LOCKERS(env, region); + + return (ret); +} + +/* + * __lock_familyremove + * Remove a locker from its family. + * + * This must be called without the locker bucket locked. + * + * PUBLIC: int __lock_familyremove __P((DB_LOCKTAB *, DB_LOCKER *)); + */ +int +__lock_familyremove(lt, sh_locker) + DB_LOCKTAB *lt; + DB_LOCKER *sh_locker; +{ + DB_LOCKREGION *region; + ENV *env; + int ret; + + region = lt->reginfo.primary; + env = lt->env; + + LOCK_LOCKERS(env, region); + ret = __lock_freelocker_int(lt, region, sh_locker, 0); + UNLOCK_LOCKERS(env, region); + + return (ret); +} diff --git a/src/lock/lock_list.c b/src/lock/lock_list.c new file mode 100644 index 00000000..c644ffe7 --- /dev/null +++ b/src/lock/lock_list.c @@ -0,0 +1,365 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" + +static int __lock_sort_cmp __P((const void *, const void *)); + +/* + * Lock list routines. + * The list is composed of a 32-bit count of locks followed by + * each lock. A lock is represented by a 16-bit page-count, a lock + * object and a page list. A lock object consists of a 16-bit size + * and the object itself. In a pseudo BNF notation, you get: + * + * LIST = COUNT32 LOCK* + * LOCK = COUNT16 LOCKOBJ PAGELIST + * LOCKOBJ = COUNT16 OBJ + * PAGELIST = COUNT32* + * + * (Recall that X* means "0 or more X's") + * + * In most cases, the OBJ is a struct __db_ilock and the page list is + * a series of (32-bit) page numbers that should get written into the + * pgno field of the __db_ilock. So, the actual number of pages locked + * is the number of items in the PAGELIST plus 1. If this is an application- + * specific lock, then we cannot interpret obj and the pagelist must + * be empty. + * + * Consider a lock list for: File A, pages 1&2, File B pages 3-5, Applock + * This would be represented as: + * 5 1 [fid=A;page=1] 2 2 [fid=B;page=3] 4 5 0 APPLOCK + * ------------------ -------------------- --------- + * LOCK for file A LOCK for file B application-specific lock + */ + +#define MAX_PGNOS 0xffff + +/* + * These macros are bigger than one might expect because some compilers say a + * cast does not return an lvalue, so constructs like *(u_int32_t*)dp = count; + * generate warnings. + */ +#define RET_SIZE(size, count) ((size) + \ + sizeof(u_int32_t) + (count) * 2 * sizeof(u_int16_t)) + +#define PUT_COUNT(dp, count) do { u_int32_t __c = (count); \ + LOGCOPY_32(env, dp, &__c); \ + dp = (u_int8_t *)dp + \ + sizeof(u_int32_t); \ + } while (0) +#define PUT_PCOUNT(dp, count) do { u_int16_t __c = (count); \ + LOGCOPY_16(env, dp, &__c); \ + dp = (u_int8_t *)dp + \ + sizeof(u_int16_t); \ + } while (0) +#define PUT_SIZE(dp, size) do { u_int16_t __s = (size); \ + LOGCOPY_16(env, dp, &__s); \ + dp = (u_int8_t *)dp + \ + sizeof(u_int16_t); \ + } while (0) +#define PUT_PGNO(dp, pgno) do { db_pgno_t __pg = (pgno); \ + LOGCOPY_32(env, dp, &__pg); \ + dp = (u_int8_t *)dp + \ + sizeof(db_pgno_t); \ + } while (0) +#define COPY_OBJ(dp, obj) do { \ + memcpy(dp, \ + (obj)->data, (obj)->size); \ + dp = (u_int8_t *)dp + \ + DB_ALIGN((obj)->size, \ + sizeof(u_int32_t)); \ + } while (0) +#define GET_COUNT(dp, count) do { LOGCOPY_32(env, &count, dp); \ + dp = (u_int8_t *)dp + \ + sizeof(u_int32_t); \ + } while (0) +#define GET_PCOUNT(dp, count) do { LOGCOPY_16(env, &count, dp); \ + dp = (u_int8_t *)dp + \ + sizeof(u_int16_t); \ + } while (0) +#define GET_SIZE(dp, size) do { LOGCOPY_16(env, &size, dp); \ + dp = (u_int8_t *)dp + \ + sizeof(u_int16_t); \ + } while (0) +#define GET_PGNO(dp, pgno) do { LOGCOPY_32(env, &pgno, dp); \ + dp = (u_int8_t *)dp + \ + sizeof(db_pgno_t); \ + } while (0) + +/* + * __lock_fix_list -- + * + * PUBLIC: int __lock_fix_list __P((ENV *, DBT *, u_int32_t)); + */ +int +__lock_fix_list(env, list_dbt, nlocks) + ENV *env; + DBT *list_dbt; + u_int32_t nlocks; +{ + DBT *obj; + DB_LOCK_ILOCK *lock, *plock; + u_int32_t i, j, nfid, npgno, size; + u_int8_t *data, *dp; + int ret; + + if ((size = list_dbt->size) == 0) + return (0); + + obj = (DBT *)list_dbt->data; + + /* + * If necessary sort the list of locks so that locks on the same fileid + * are together. We do not sort 1 or 2 locks because by definition if + * there are locks on the same fileid they will be together. The sort + * will also move any locks that do not look like page locks to the end + * of the list so we can stop looking for locks we can combine when we + * hit one. + */ + switch (nlocks) { + case 1: + size = RET_SIZE(obj->size, 1); + if ((ret = __os_malloc(env, size, &data)) != 0) + return (ret); + + dp = data; + PUT_COUNT(dp, 1); + PUT_PCOUNT(dp, 0); + PUT_SIZE(dp, obj->size); + COPY_OBJ(dp, obj); + break; + default: + /* Sort so that all locks with same fileid are together. */ + qsort(list_dbt->data, nlocks, sizeof(DBT), __lock_sort_cmp); + /* FALLTHROUGH */ + case 2: + nfid = npgno = 0; + i = 0; + if (obj->size != sizeof(DB_LOCK_ILOCK)) + goto not_ilock; + + nfid = 1; + plock = (DB_LOCK_ILOCK *)obj->data; + + /* We use ulen to keep track of the number of pages. */ + j = 0; + obj[0].ulen = 0; + for (i = 1; i < nlocks; i++) { + if (obj[i].size != sizeof(DB_LOCK_ILOCK)) + break; + lock = (DB_LOCK_ILOCK *)obj[i].data; + if (obj[j].ulen < MAX_PGNOS && + lock->type == plock->type && + memcmp(lock->fileid, + plock->fileid, DB_FILE_ID_LEN) == 0) { + obj[j].ulen++; + npgno++; + } else { + nfid++; + plock = lock; + j = i; + obj[j].ulen = 0; + } + } + +not_ilock: size = nfid * sizeof(DB_LOCK_ILOCK); + size += npgno * sizeof(db_pgno_t); + /* Add the number of nonstandard locks and get their size. */ + nfid += nlocks - i; + for (; i < nlocks; i++) { + size += obj[i].size; + obj[i].ulen = 0; + } + + size = RET_SIZE(size, nfid); + if ((ret = __os_malloc(env, size, &data)) != 0) + return (ret); + + dp = data; + PUT_COUNT(dp, nfid); + + for (i = 0; i < nlocks; i = j) { + PUT_PCOUNT(dp, obj[i].ulen); + PUT_SIZE(dp, obj[i].size); + COPY_OBJ(dp, &obj[i]); + lock = (DB_LOCK_ILOCK *)obj[i].data; + for (j = i + 1; j <= i + obj[i].ulen; j++) { + lock = (DB_LOCK_ILOCK *)obj[j].data; + PUT_PGNO(dp, lock->pgno); + } + } + } + + __os_free(env, list_dbt->data); + + list_dbt->data = data; + list_dbt->size = size; + + return (0); +} + +/* + * PUBLIC: int __lock_get_list __P((ENV *, DB_LOCKER *, u_int32_t, + * PUBLIC: db_lockmode_t, DBT *)); + */ +int +__lock_get_list(env, locker, flags, lock_mode, list) + ENV *env; + DB_LOCKER *locker; + u_int32_t flags; + db_lockmode_t lock_mode; + DBT *list; +{ + DBT obj_dbt; + DB_LOCK ret_lock; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + DB_LOCK_ILOCK *lock; + db_pgno_t save_pgno; + u_int16_t npgno, size; + u_int32_t i, nlocks; + int ret; + void *data, *dp; + + if (list->size == 0) + return (0); + ret = 0; + data = NULL; + + lt = env->lk_handle; + dp = list->data; + + /* + * There is no assurance log records will be aligned. If not, then + * copy the data to an aligned region so the rest of the code does + * not have to worry about it. + */ + if ((uintptr_t)dp != DB_ALIGN((uintptr_t)dp, sizeof(u_int32_t))) { + if ((ret = __os_malloc(env, list->size, &data)) != 0) + return (ret); + memcpy(data, list->data, list->size); + dp = data; + } + + region = lt->reginfo.primary; + LOCK_SYSTEM_LOCK(lt, region); + GET_COUNT(dp, nlocks); + + for (i = 0; i < nlocks; i++) { + GET_PCOUNT(dp, npgno); + GET_SIZE(dp, size); + lock = (DB_LOCK_ILOCK *) dp; + save_pgno = lock->pgno; + obj_dbt.data = dp; + obj_dbt.size = size; + dp = ((u_int8_t *)dp) + DB_ALIGN(size, sizeof(u_int32_t)); + do { + if ((ret = __lock_get_internal(lt, locker, + flags, &obj_dbt, lock_mode, 0, &ret_lock)) != 0) { + lock->pgno = save_pgno; + goto err; + } + if (npgno != 0) + GET_PGNO(dp, lock->pgno); + } while (npgno-- != 0); + lock->pgno = save_pgno; + } + +err: LOCK_SYSTEM_UNLOCK(lt, region); + if (data != NULL) + __os_free(env, data); + return (ret); +} + +#define UINT32_CMP(A, B) ((A) == (B) ? 0 : ((A) > (B) ? 1 : -1)) +static int +__lock_sort_cmp(a, b) + const void *a, *b; +{ + const DBT *d1, *d2; + DB_LOCK_ILOCK *l1, *l2; + + d1 = a; + d2 = b; + + /* Force all non-standard locks to sort at end. */ + if (d1->size != sizeof(DB_LOCK_ILOCK)) { + if (d2->size != sizeof(DB_LOCK_ILOCK)) + return (UINT32_CMP(d1->size, d2->size)); + else + return (1); + } else if (d2->size != sizeof(DB_LOCK_ILOCK)) + return (-1); + + l1 = d1->data; + l2 = d2->data; + if (l1->type != l2->type) + return (UINT32_CMP(l1->type, l2->type)); + return (memcmp(l1->fileid, l2->fileid, DB_FILE_ID_LEN)); +} + +/* + * PUBLIC: void __lock_list_print __P((ENV *, DB_MSGBUF *, DBT *)); + */ +void +__lock_list_print(env, mbp, list) + ENV *env; + DB_MSGBUF *mbp; + DBT *list; +{ + DB_LOCK_ILOCK *lock; + db_pgno_t pgno; + u_int16_t npgno, size; + u_int32_t i, nlocks; + u_int8_t *fidp; + char *fname, *dname, *p, namebuf[26]; + void *dp; + + if (list->size == 0) + return; + dp = list->data; + + GET_COUNT(dp, nlocks); + + for (i = 0; i < nlocks; i++) { + GET_PCOUNT(dp, npgno); + GET_SIZE(dp, size); + lock = (DB_LOCK_ILOCK *) dp; + fidp = lock->fileid; + (void)__dbreg_get_name(env, fidp, &fname, &dname); + __db_msgadd(env, mbp, "\t"); + if (fname == NULL && dname == NULL) + __db_msgadd(env, mbp, "(%lx %lx %lx %lx %lx)", + (u_long)fidp[0], (u_long)fidp[1], (u_long)fidp[2], + (u_long)fidp[3], (u_long)fidp[4]); + else { + if (fname != NULL && dname != NULL) { + (void)snprintf(namebuf, sizeof(namebuf), + "%14s.%-10s", fname, dname); + p = namebuf; + } else if (fname != NULL) + p = fname; + else + p = dname; + __db_msgadd(env, mbp, "%-25s", p); + } + dp = ((u_int8_t *)dp) + DB_ALIGN(size, sizeof(u_int32_t)); + LOGCOPY_32(env, &pgno, &lock->pgno); + do { + __db_msgadd(env, mbp, " %d", pgno); + if (npgno != 0) + GET_PGNO(dp, pgno); + } while (npgno-- != 0); + __db_msgadd(env, mbp, "\n"); + } +} diff --git a/src/lock/lock_method.c b/src/lock/lock_method.c new file mode 100644 index 00000000..451b5e1c --- /dev/null +++ b/src/lock/lock_method.c @@ -0,0 +1,630 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/lock.h" + +/* + * __lock_env_create -- + * Lock specific creation of the DB_ENV structure. + * + * PUBLIC: int __lock_env_create __P((DB_ENV *)); + */ +int +__lock_env_create(dbenv) + DB_ENV *dbenv; +{ + u_int32_t cpu; + /* + * !!! + * Our caller has not yet had the opportunity to reset the panic + * state or turn off mutex locking, and so we can neither check + * the panic state or acquire a mutex in the DB_ENV create path. + */ + dbenv->lk_init = 0; + dbenv->lk_init_lockers = 0; + dbenv->lk_init_objects = 0; + + /* + * Default to 10 partitions per cpu. This seems to be near + * the point of diminishing returns on Xeon type processors. + * Cpu count often returns the number of hyper threads and if + * there is only one CPU you probably do not want to run partitions. + */ + cpu = __os_cpu_count(); + dbenv->lk_partitions = cpu > 1 ? 10 * cpu : 1; + + return (0); +} + +/* + * __lock_env_destroy -- + * Lock specific destruction of the DB_ENV structure. + * + * PUBLIC: void __lock_env_destroy __P((DB_ENV *)); + */ +void +__lock_env_destroy(dbenv) + DB_ENV *dbenv; +{ + ENV *env; + + env = dbenv->env; + + if (dbenv->lk_conflicts != NULL) { + __os_free(env, dbenv->lk_conflicts); + dbenv->lk_conflicts = NULL; + } +} + +/* + * __lock_get_lk_conflicts + * Get the conflicts matrix. + * + * PUBLIC: int __lock_get_lk_conflicts + * PUBLIC: __P((DB_ENV *, const u_int8_t **, int *)); + */ +int +__lock_get_lk_conflicts(dbenv, lk_conflictsp, lk_modesp) + DB_ENV *dbenv; + const u_int8_t **lk_conflictsp; + int *lk_modesp; +{ + DB_LOCKTAB *lt; + ENV *env; + + env = dbenv->env; + lt = env->lk_handle; + + ENV_NOT_CONFIGURED(env, + env->lk_handle, "DB_ENV->get_lk_conflicts", DB_INIT_LOCK); + + if (LOCKING_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + if (lk_conflictsp != NULL) + *lk_conflictsp = lt->conflicts; + if (lk_modesp != NULL) + *lk_modesp = ((DB_LOCKREGION *) + (lt->reginfo.primary))->nmodes; + } else { + if (lk_conflictsp != NULL) + *lk_conflictsp = dbenv->lk_conflicts; + if (lk_modesp != NULL) + *lk_modesp = dbenv->lk_modes; + } + return (0); +} + +/* + * __lock_set_lk_conflicts + * Set the conflicts matrix. + * + * PUBLIC: int __lock_set_lk_conflicts __P((DB_ENV *, u_int8_t *, int)); + */ +int +__lock_set_lk_conflicts(dbenv, lk_conflicts, lk_modes) + DB_ENV *dbenv; + u_int8_t *lk_conflicts; + int lk_modes; +{ + ENV *env; + int ret; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_lk_conflicts"); + + if (dbenv->lk_conflicts != NULL) { + __os_free(env, dbenv->lk_conflicts); + dbenv->lk_conflicts = NULL; + } + if ((ret = __os_malloc(env, + (size_t)(lk_modes * lk_modes), &dbenv->lk_conflicts)) != 0) + return (ret); + memcpy( + dbenv->lk_conflicts, lk_conflicts, (size_t)(lk_modes * lk_modes)); + dbenv->lk_modes = lk_modes; + + return (0); +} + +/* + * PUBLIC: int __lock_get_lk_detect __P((DB_ENV *, u_int32_t *)); + */ +int +__lock_get_lk_detect(dbenv, lk_detectp) + DB_ENV *dbenv; + u_int32_t *lk_detectp; +{ + DB_LOCKTAB *lt; + DB_THREAD_INFO *ip; + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lk_handle, "DB_ENV->get_lk_detect", DB_INIT_LOCK); + + if (LOCKING_ON(env)) { + lt = env->lk_handle; + ENV_ENTER(env, ip); + LOCK_REGION_LOCK(env); + *lk_detectp = ((DB_LOCKREGION *)lt->reginfo.primary)->detect; + LOCK_REGION_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + *lk_detectp = dbenv->lk_detect; + return (0); +} + +/* + * __lock_set_lk_detect + * DB_ENV->set_lk_detect. + * + * PUBLIC: int __lock_set_lk_detect __P((DB_ENV *, u_int32_t)); + */ +int +__lock_set_lk_detect(dbenv, lk_detect) + DB_ENV *dbenv; + u_int32_t lk_detect; +{ + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lk_handle, "DB_ENV->set_lk_detect", DB_INIT_LOCK); + + switch (lk_detect) { + case DB_LOCK_DEFAULT: + case DB_LOCK_EXPIRE: + case DB_LOCK_MAXLOCKS: + case DB_LOCK_MAXWRITE: + case DB_LOCK_MINLOCKS: + case DB_LOCK_MINWRITE: + case DB_LOCK_OLDEST: + case DB_LOCK_RANDOM: + case DB_LOCK_YOUNGEST: + break; + default: + __db_errx(env, DB_STR("2043", + "DB_ENV->set_lk_detect: unknown deadlock detection mode specified")); + return (EINVAL); + } + + ret = 0; + if (LOCKING_ON(env)) { + ENV_ENTER(env, ip); + + lt = env->lk_handle; + region = lt->reginfo.primary; + LOCK_REGION_LOCK(env); + /* + * Check for incompatible automatic deadlock detection requests. + * There are scenarios where changing the detector configuration + * is reasonable, but we disallow them guessing it is likely to + * be an application error. + * + * We allow applications to turn on the lock detector, and we + * ignore attempts to set it to the default or current value. + */ + if (region->detect != DB_LOCK_NORUN && + lk_detect != DB_LOCK_DEFAULT && + region->detect != lk_detect) { + __db_errx(env, DB_STR("2044", + "DB_ENV->set_lk_detect: incompatible deadlock detector mode")); + ret = EINVAL; + } else + if (region->detect == DB_LOCK_NORUN) + region->detect = lk_detect; + LOCK_REGION_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + dbenv->lk_detect = lk_detect; + + return (ret); +} + +/* + * PUBLIC: int __lock_get_lk_max_locks __P((DB_ENV *, u_int32_t *)); + */ +int +__lock_get_lk_max_locks(dbenv, lk_maxp) + DB_ENV *dbenv; + u_int32_t *lk_maxp; +{ + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lk_handle, "DB_ENV->get_lk_maxlocks", DB_INIT_LOCK); + + if (LOCKING_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *lk_maxp = ((DB_LOCKREGION *) + env->lk_handle->reginfo.primary)->stat.st_maxlocks; + } else + *lk_maxp = dbenv->lk_max; + return (0); +} + +/* + * __lock_set_lk_max_locks + * DB_ENV->set_lk_max_locks. + * + * PUBLIC: int __lock_set_lk_max_locks __P((DB_ENV *, u_int32_t)); + */ +int +__lock_set_lk_max_locks(dbenv, lk_max) + DB_ENV *dbenv; + u_int32_t lk_max; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_lk_max_locks"); + + dbenv->lk_max = lk_max; + return (0); +} + +/* + * PUBLIC: int __lock_get_lk_max_lockers __P((DB_ENV *, u_int32_t *)); + */ +int +__lock_get_lk_max_lockers(dbenv, lk_maxp) + DB_ENV *dbenv; + u_int32_t *lk_maxp; +{ + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lk_handle, "DB_ENV->get_lk_max_lockers", DB_INIT_LOCK); + + if (LOCKING_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *lk_maxp = ((DB_LOCKREGION *) + env->lk_handle->reginfo.primary)->stat.st_maxlockers; + } else + *lk_maxp = dbenv->lk_max_lockers; + return (0); +} + +/* + * __lock_set_lk_max_lockers + * DB_ENV->set_lk_max_lockers. + * + * PUBLIC: int __lock_set_lk_max_lockers __P((DB_ENV *, u_int32_t)); + */ +int +__lock_set_lk_max_lockers(dbenv, lk_max) + DB_ENV *dbenv; + u_int32_t lk_max; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_lk_max_lockers"); + + dbenv->lk_max_lockers = lk_max; + return (0); +} + +/* + * PUBLIC: int __lock_get_lk_max_objects __P((DB_ENV *, u_int32_t *)); + */ +int +__lock_get_lk_max_objects(dbenv, lk_maxp) + DB_ENV *dbenv; + u_int32_t *lk_maxp; +{ + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lk_handle, "DB_ENV->get_lk_max_objects", DB_INIT_LOCK); + + if (LOCKING_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *lk_maxp = ((DB_LOCKREGION *) + env->lk_handle->reginfo.primary)->stat.st_maxobjects; + } else + *lk_maxp = dbenv->lk_max_objects; + return (0); +} + +/* + * __lock_set_lk_max_objects + * DB_ENV->set_lk_max_objects. + * + * PUBLIC: int __lock_set_lk_max_objects __P((DB_ENV *, u_int32_t)); + */ +int +__lock_set_lk_max_objects(dbenv, lk_max) + DB_ENV *dbenv; + u_int32_t lk_max; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_lk_max_objects"); + + dbenv->lk_max_objects = lk_max; + return (0); +} +/* + * PUBLIC: int __lock_get_lk_partitions __P((DB_ENV *, u_int32_t *)); + */ +int +__lock_get_lk_partitions(dbenv, lk_partitionp) + DB_ENV *dbenv; + u_int32_t *lk_partitionp; +{ + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lk_handle, "DB_ENV->get_lk_partitions", DB_INIT_LOCK); + + if (LOCKING_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *lk_partitionp = ((DB_LOCKREGION *) + env->lk_handle->reginfo.primary)->stat.st_partitions; + } else + *lk_partitionp = dbenv->lk_partitions; + return (0); +} + +/* + * __lock_set_lk_partitions + * DB_ENV->set_lk_partitions. + * + * PUBLIC: int __lock_set_lk_partitions __P((DB_ENV *, u_int32_t)); + */ +int +__lock_set_lk_partitions(dbenv, lk_partitions) + DB_ENV *dbenv; + u_int32_t lk_partitions; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_lk_partitions"); + + dbenv->lk_partitions = lk_partitions; + return (0); +} +/* + * PUBLIC: int __lock_get_lk_tablesize __P((DB_ENV *, u_int32_t *)); + */ +int +__lock_get_lk_tablesize(dbenv, lk_tablesizep) + DB_ENV *dbenv; + u_int32_t *lk_tablesizep; +{ + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lk_handle, "DB_ENV->get_lk_tablesize", DB_INIT_LOCK); + + if (LOCKING_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *lk_tablesizep = ((DB_LOCKREGION *) + env->lk_handle->reginfo.primary)->stat.st_tablesize; + } else + *lk_tablesizep = dbenv->object_t_size; + return (0); +} + +/* + * __lock_set_lk_tablesize + * DB_ENV->set_lk_tablesize. + * + * PUBLIC: int __lock_set_lk_tablesize __P((DB_ENV *, u_int32_t)); + */ +int +__lock_set_lk_tablesize(dbenv, lk_tablesize) + DB_ENV *dbenv; + u_int32_t lk_tablesize; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_lk_tablesize"); + + dbenv->object_t_size = lk_tablesize; + return (0); +} + +/* + * __lock_set_lk_priority -- + * Set a locker's priority. + * + * PUBLIC: int __lock_set_lk_priority __P((DB_ENV *, u_int32_t, u_int32_t)); + */ +int +__lock_set_lk_priority(dbenv, lockid, priority) + DB_ENV *dbenv; + u_int32_t lockid, priority; +{ + DB_LOCKER *locker; + ENV *env; + int ret; + + env = dbenv->env; + + if (!LOCKING_ON(env)) + return (EINVAL); + + if ((ret = __lock_getlocker(env->lk_handle, lockid, 0, &locker)) == 0) + locker->priority = priority; + return (ret); +} + +/* + * __lock_get_lk_priority -- + * Get a locker's priority. + * + * PUBLIC: int __lock_get_lk_priority __P((DB_ENV *, u_int32_t, u_int32_t *)); + */ +int +__lock_get_lk_priority(dbenv, lockid, priorityp) + DB_ENV *dbenv; + u_int32_t lockid, *priorityp; +{ + DB_LOCKER *locker; + ENV *env; + int ret; + + env = dbenv->env; + + if (!LOCKING_ON(env)) + return (EINVAL); + + if ((ret = __lock_getlocker(env->lk_handle, lockid, 0, &locker)) == 0) + *priorityp = locker->priority; + return ret; +} + +/* + * PUBLIC: int __lock_get_env_timeout + * PUBLIC: __P((DB_ENV *, db_timeout_t *, u_int32_t)); + */ +int +__lock_get_env_timeout(dbenv, timeoutp, flag) + DB_ENV *dbenv; + db_timeout_t *timeoutp; + u_int32_t flag; +{ + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lk_handle, "DB_ENV->get_env_timeout", DB_INIT_LOCK); + + ret = 0; + if (LOCKING_ON(env)) { + lt = env->lk_handle; + region = lt->reginfo.primary; + ENV_ENTER(env, ip); + LOCK_REGION_LOCK(env); + switch (flag) { + case DB_SET_LOCK_TIMEOUT: + *timeoutp = region->lk_timeout; + break; + case DB_SET_TXN_TIMEOUT: + *timeoutp = region->tx_timeout; + break; + default: + ret = 1; + break; + } + LOCK_REGION_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + switch (flag) { + case DB_SET_LOCK_TIMEOUT: + *timeoutp = dbenv->lk_timeout; + break; + case DB_SET_TXN_TIMEOUT: + *timeoutp = dbenv->tx_timeout; + break; + default: + ret = 1; + break; + } + + if (ret) + ret = __db_ferr(env, "DB_ENV->get_timeout", 0); + + return (ret); +} + +/* + * __lock_set_env_timeout + * DB_ENV->set_lock_timeout. + * + * PUBLIC: int __lock_set_env_timeout __P((DB_ENV *, db_timeout_t, u_int32_t)); + */ +int +__lock_set_env_timeout(dbenv, timeout, flags) + DB_ENV *dbenv; + db_timeout_t timeout; + u_int32_t flags; +{ + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lk_handle, "DB_ENV->set_env_timeout", DB_INIT_LOCK); + + ret = 0; + if (LOCKING_ON(env)) { + lt = env->lk_handle; + region = lt->reginfo.primary; + ENV_ENTER(env, ip); + LOCK_REGION_LOCK(env); + switch (flags) { + case DB_SET_LOCK_TIMEOUT: + region->lk_timeout = timeout; + break; + case DB_SET_TXN_TIMEOUT: + region->tx_timeout = timeout; + break; + default: + ret = 1; + break; + } + LOCK_REGION_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + switch (flags) { + case DB_SET_LOCK_TIMEOUT: + dbenv->lk_timeout = timeout; + break; + case DB_SET_TXN_TIMEOUT: + dbenv->tx_timeout = timeout; + break; + default: + ret = 1; + break; + } + + if (ret) + ret = __db_ferr(env, "DB_ENV->set_timeout", 0); + + return (ret); +} diff --git a/src/lock/lock_region.c b/src/lock/lock_region.c new file mode 100644 index 00000000..1b58d835 --- /dev/null +++ b/src/lock/lock_region.c @@ -0,0 +1,578 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/lock.h" + +static int __lock_region_init __P((ENV *, DB_LOCKTAB *)); + +/* + * The conflict arrays are set up such that the row is the lock you are + * holding and the column is the lock that is desired. + */ +#define DB_LOCK_RIW_N 9 +static const u_int8_t db_riw_conflicts[] = { +/* N R W WT IW IR RIW DR WW */ +/* N */ 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* R */ 0, 0, 1, 0, 1, 0, 1, 0, 1, +/* W */ 0, 1, 1, 1, 1, 1, 1, 1, 1, +/* WT */ 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* IW */ 0, 1, 1, 0, 0, 0, 0, 1, 1, +/* IR */ 0, 0, 1, 0, 0, 0, 0, 0, 1, +/* RIW */ 0, 1, 1, 0, 0, 0, 0, 1, 1, +/* DR */ 0, 0, 1, 0, 1, 0, 1, 0, 0, +/* WW */ 0, 1, 1, 0, 1, 1, 1, 0, 1 +}; + +/* + * This conflict array is used for concurrent db access (CDB). It uses + * the same locks as the db_riw_conflicts array, but adds an IW mode to + * be used for write cursors. + */ +#define DB_LOCK_CDB_N 5 +static const u_int8_t db_cdb_conflicts[] = { + /* N R W WT IW */ + /* N */ 0, 0, 0, 0, 0, + /* R */ 0, 0, 1, 0, 0, + /* W */ 0, 1, 1, 1, 1, + /* WT */ 0, 0, 0, 0, 0, + /* IW */ 0, 0, 1, 0, 1 +}; + +/* + * __lock_open -- + * Internal version of lock_open: only called from ENV->open. + * + * PUBLIC: int __lock_open __P((ENV *)); + */ +int +__lock_open(env) + ENV *env; +{ + DB_ENV *dbenv; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + int region_locked, ret; + + dbenv = env->dbenv; + region_locked = 0; + + /* Create the lock table structure. */ + if ((ret = __os_calloc(env, 1, sizeof(DB_LOCKTAB), <)) != 0) + return (ret); + lt->env = env; + + /* Join/create the lock region. */ + if ((ret = __env_region_share(env, <->reginfo)) != 0) + goto err; + + /* If we created the region, initialize it. */ + if (F_ISSET(<->reginfo, REGION_CREATE)) + if ((ret = __lock_region_init(env, lt)) != 0) + goto err; + + /* Set the local addresses. */ + region = lt->reginfo.primary = + R_ADDR(<->reginfo, ((REGENV *)env->reginfo->primary)->lt_primary); + + /* Set remaining pointers into region. */ + lt->conflicts = R_ADDR(<->reginfo, region->conf_off); + lt->obj_tab = R_ADDR(<->reginfo, region->obj_off); +#ifdef HAVE_STATISTICS + lt->obj_stat = R_ADDR(<->reginfo, region->stat_off); +#endif + lt->part_array = R_ADDR(<->reginfo, region->part_off); + lt->locker_tab = R_ADDR(<->reginfo, region->locker_off); + + env->lk_handle = lt; + lt->reginfo.mtx_alloc = region->mtx_region; + + LOCK_REGION_LOCK(env); + region_locked = 1; + + if (dbenv->lk_detect != DB_LOCK_NORUN) { + /* + * Check for incompatible automatic deadlock detection requests. + * There are scenarios where changing the detector configuration + * is reasonable, but we disallow them guessing it is likely to + * be an application error. + * + * We allow applications to turn on the lock detector, and we + * ignore attempts to set it to the default or current value. + */ + if (region->detect != DB_LOCK_NORUN && + dbenv->lk_detect != DB_LOCK_DEFAULT && + region->detect != dbenv->lk_detect) { + __db_errx(env, DB_STR("2041", + "lock_open: incompatible deadlock detector mode")); + ret = EINVAL; + goto err; + } + if (region->detect == DB_LOCK_NORUN) + region->detect = dbenv->lk_detect; + } + + /* + * A process joining the region may have reset the lock and transaction + * timeouts. + */ + if (dbenv->lk_timeout != 0) + region->lk_timeout = dbenv->lk_timeout; + if (dbenv->tx_timeout != 0) + region->tx_timeout = dbenv->tx_timeout; + + LOCK_REGION_UNLOCK(env); + region_locked = 0; + + return (0); + +err: if (lt->reginfo.addr != NULL) { + if (region_locked) + LOCK_REGION_UNLOCK(env); + (void)__env_region_detach(env, <->reginfo, 0); + } + env->lk_handle = NULL; + + __os_free(env, lt); + return (ret); +} + +/* + * __lock_region_init -- + * Initialize the lock region. + */ +static int +__lock_region_init(env, lt) + ENV *env; + DB_LOCKTAB *lt; +{ + const u_int8_t *lk_conflicts; + struct __db_lock *lp; + DB_ENV *dbenv; + DB_LOCKER *lidp; + DB_LOCKOBJ *op; + DB_LOCKREGION *region; + DB_LOCKPART *part; + u_int32_t extra_locks, extra_objects, i, j, max; + u_int8_t *addr; + int lk_modes, ret; + + dbenv = env->dbenv; + + if ((ret = __env_alloc(<->reginfo, + sizeof(DB_LOCKREGION), <->reginfo.primary)) != 0) + goto mem_err; + ((REGENV *)env->reginfo->primary)->lt_primary = + R_OFFSET(<->reginfo, lt->reginfo.primary); + region = lt->reginfo.primary; + memset(region, 0, sizeof(*region)); + + /* We share the region so we need the same mutex. */ + region->mtx_region = ((REGENV *)env->reginfo->primary)->mtx_regenv; + + /* Select a conflict matrix if none specified. */ + if (dbenv->lk_modes == 0) + if (CDB_LOCKING(env)) { + lk_modes = DB_LOCK_CDB_N; + lk_conflicts = db_cdb_conflicts; + } else { + lk_modes = DB_LOCK_RIW_N; + lk_conflicts = db_riw_conflicts; + } + else { + lk_modes = dbenv->lk_modes; + lk_conflicts = dbenv->lk_conflicts; + } + + region->need_dd = 0; + timespecclear(®ion->next_timeout); + region->detect = DB_LOCK_NORUN; + region->lk_timeout = dbenv->lk_timeout; + region->tx_timeout = dbenv->tx_timeout; + region->locker_t_size = dbenv->locker_t_size; + region->object_t_size = dbenv->object_t_size; + region->part_t_size = dbenv->lk_partitions; + region->lock_id = 0; + region->cur_maxid = DB_LOCK_MAXID; + region->nmodes = lk_modes; + memset(®ion->stat, 0, sizeof(region->stat)); + region->stat.st_maxlocks = dbenv->lk_max; + region->stat.st_maxlockers = dbenv->lk_max_lockers; + region->stat.st_maxobjects = dbenv->lk_max_objects; + region->stat.st_initlocks = region->stat.st_locks = dbenv->lk_init; + region->stat.st_initlockers = + region->stat.st_lockers = dbenv->lk_init_lockers; + region->stat.st_initobjects = + region->stat.st_objects = dbenv->lk_init_objects; + region->stat.st_partitions = dbenv->lk_partitions; + region->stat.st_tablesize = dbenv->object_t_size; + + /* Allocate room for the conflict matrix and initialize it. */ + if ((ret = __env_alloc( + <->reginfo, (size_t)(lk_modes * lk_modes), &addr)) != 0) + goto mem_err; + memcpy(addr, lk_conflicts, (size_t)(lk_modes * lk_modes)); + region->conf_off = R_OFFSET(<->reginfo, addr); + + /* Allocate room for the object hash table and initialize it. */ + if ((ret = __env_alloc(<->reginfo, + region->object_t_size * sizeof(DB_HASHTAB), &addr)) != 0) + goto mem_err; + __db_hashinit(addr, region->object_t_size); + region->obj_off = R_OFFSET(<->reginfo, addr); + +#ifdef HAVE_STATISTICS + /* Allocate room for the object hash stats table and initialize it. */ + if ((ret = __env_alloc(<->reginfo, + region->object_t_size * sizeof(DB_LOCK_HSTAT), &addr)) != 0) + goto mem_err; + memset(addr, 0, region->object_t_size * sizeof(DB_LOCK_HSTAT)); + region->stat_off = R_OFFSET(<->reginfo, addr); +#endif + + /* Allocate room for the partition table and initialize its mutexes. */ + if ((ret = __env_alloc(<->reginfo, + region->part_t_size * sizeof(DB_LOCKPART), &part)) != 0) + goto mem_err; + memset(part, 0, region->part_t_size * sizeof(DB_LOCKPART)); + region->part_off = R_OFFSET(<->reginfo, part); + for (i = 0; i < region->part_t_size; i++) { + if ((ret = __mutex_alloc( + env, MTX_LOCK_REGION, 0, &part[i].mtx_part)) != 0) + return (ret); + } + if ((ret = __mutex_alloc( + env, MTX_LOCK_REGION, 0, ®ion->mtx_dd)) != 0) + return (ret); + + if ((ret = __mutex_alloc( + env, MTX_LOCK_REGION, 0, ®ion->mtx_lockers)) != 0) + return (ret); + + /* Allocate room for the locker hash table and initialize it. */ + if ((ret = __env_alloc(<->reginfo, + region->locker_t_size * sizeof(DB_HASHTAB), &addr)) != 0) + goto mem_err; + __db_hashinit(addr, region->locker_t_size); + region->locker_off = R_OFFSET(<->reginfo, addr); + + SH_TAILQ_INIT(®ion->dd_objs); + + /* + * If the locks and objects don't divide evenly, spread them around. + */ + extra_locks = region->stat.st_locks - + ((region->stat.st_locks / region->part_t_size) * + region->part_t_size); + extra_objects = region->stat.st_objects - + ((region->stat.st_objects / region->part_t_size) * + region->part_t_size); + for (j = 0; j < region->part_t_size; j++) { + /* Initialize locks onto a free list. */ + SH_TAILQ_INIT(&part[j].free_locks); + max = region->stat.st_locks / region->part_t_size; + if (extra_locks > 0) { + max++; + extra_locks--; + } + + if ((ret = + __env_alloc(<->reginfo, + sizeof(struct __db_lock) * max, + &lp)) != 0) + goto mem_err; + part[j].lock_mem_off = R_OFFSET(<->reginfo, lp); + for (i = 0; i < max; ++i) { + memset(lp, 0, sizeof(*lp)); + lp->status = DB_LSTAT_FREE; + SH_TAILQ_INSERT_HEAD( + &part[j].free_locks, lp, links, __db_lock); + ++lp; + } + + /* Initialize objects onto a free list. */ + max = region->stat.st_objects / region->part_t_size; + if (extra_objects > 0) { + max++; + extra_objects--; + } + SH_TAILQ_INIT(&part[j].free_objs); + + if ((ret = + __env_alloc(<->reginfo, + sizeof(DB_LOCKOBJ) * max, + &op)) != 0) + goto mem_err; + part[j].lockobj_mem_off = R_OFFSET(<->reginfo, op); + for (i = 0; i < max; ++i) { + memset(op, 0, sizeof(*op)); + SH_TAILQ_INSERT_HEAD( + &part[j].free_objs, op, links, __db_lockobj); + ++op; + } + } + + /* Initialize lockers onto a free list. */ + SH_TAILQ_INIT(®ion->lockers); + SH_TAILQ_INIT(®ion->free_lockers); + if ((ret = + __env_alloc(<->reginfo, + sizeof(DB_LOCKER) * region->stat.st_lockers, + &lidp)) != 0) + goto mem_err; + + region->locker_mem_off = R_OFFSET(<->reginfo, lidp); + for (i = 0; i < region->stat.st_lockers; ++i) { + SH_TAILQ_INSERT_HEAD( + ®ion->free_lockers, lidp, links, __db_locker); + ++lidp; + } + return (0); +mem_err: __db_errx(env, DB_STR("2042", + "unable to allocate memory for the lock table")); + return (ret); + } + +/* + * __lock_env_refresh -- + * Clean up after the lock system on a close or failed open. + * + * PUBLIC: int __lock_env_refresh __P((ENV *)); + */ +int +__lock_env_refresh(env) + ENV *env; +{ + DB_LOCKREGION *lr; + DB_LOCKTAB *lt; + REGINFO *reginfo; + u_int32_t j; + int ret; + + lt = env->lk_handle; + reginfo = <->reginfo; + lr = reginfo->primary; + + /* + * If a private region, return the memory to the heap. Not needed for + * filesystem-backed or system shared memory regions, that memory isn't + * owned by any particular process. + */ + if (F_ISSET(env, ENV_PRIVATE)) { + reginfo->mtx_alloc = MUTEX_INVALID; + /* Discard the conflict matrix. */ + __env_alloc_free(reginfo, R_ADDR(reginfo, lr->conf_off)); + + /* Discard the object hash table. */ + __env_alloc_free(reginfo, R_ADDR(reginfo, lr->obj_off)); + + /* Discard the locker hash table. */ + __env_alloc_free(reginfo, R_ADDR(reginfo, lr->locker_off)); + + /* Discard the object hash stat table. */ + __env_alloc_free(reginfo, R_ADDR(reginfo, lr->stat_off)); + for (j = 0; j < lr->part_t_size; j++) { + SH_TAILQ_INIT(&FREE_OBJS(lt, j)); + SH_TAILQ_INIT(&FREE_LOCKS(lt, j)); + __env_alloc_free(reginfo, + R_ADDR(reginfo, + lt->part_array[j].lock_mem_off)); + __env_alloc_free(reginfo, + R_ADDR(reginfo, + lt->part_array[j].lockobj_mem_off)); + } + + /* Discard the object partition array. */ + __env_alloc_free(reginfo, R_ADDR(reginfo, lr->part_off)); + SH_TAILQ_INIT(&lr->free_lockers); + __env_alloc_free(reginfo, + R_ADDR(reginfo, lr->locker_mem_off)); + } + + /* Detach from the region. */ + ret = __env_region_detach(env, reginfo, 0); + + /* Discard DB_LOCKTAB. */ + __os_free(env, lt); + env->lk_handle = NULL; + + return (ret); +} + +/* + * __lock_region_mutex_count -- + * Return the number of mutexes the lock region will need. + * + * PUBLIC: u_int32_t __lock_region_mutex_count __P((ENV *)); + */ +u_int32_t +__lock_region_mutex_count(env) + ENV *env; +{ + DB_ENV *dbenv; + + dbenv = env->dbenv; + + /* + * We need one mutex per locker for it to block on. + */ + return (dbenv->lk_init_lockers + dbenv->lk_partitions + 3); +} +/* + * __lock_region_mutex_max -- + * Return the number of additional mutexes the lock region will need. + * + * PUBLIC: u_int32_t __lock_region_mutex_max __P((ENV *)); + */ +u_int32_t +__lock_region_mutex_max(env) + ENV *env; +{ + DB_ENV *dbenv; + u_int32_t count; + + dbenv = env->dbenv; + + /* + * For backward compatibility, ensure enough mutexes. + * These might actually get used by other things. + */ + if ((count = dbenv->lk_max_lockers) == 0) + count = DB_LOCK_DEFAULT_N; + if (count > dbenv->lk_init_lockers) + return (count - dbenv->lk_init_lockers); + else + return (0); +} + +/* + * __lock_region_max -- + * Return the amount of extra memory to allocate for locking information. + * PUBLIC: size_t __lock_region_max __P((ENV *)); + */ +size_t +__lock_region_max(env) + ENV *env; +{ + DB_ENV *dbenv; + size_t retval; + u_int32_t count; + + dbenv = env->dbenv; + + retval = 0; + if ((count = dbenv->lk_max) == 0) + count = DB_LOCK_DEFAULT_N; + if (count > dbenv->lk_init) + retval += __env_alloc_size(sizeof(struct __db_lock)) * + (count - dbenv->lk_init); + if ((count = dbenv->lk_max_objects) == 0) + count = DB_LOCK_DEFAULT_N; + if (count > dbenv->lk_init_objects) + retval += __env_alloc_size(sizeof(DB_LOCKOBJ)) * + (count - dbenv->lk_init_objects); + if ((count = dbenv->lk_max_lockers) == 0) + count = DB_LOCK_DEFAULT_N; + if (count > dbenv->lk_init_lockers) + retval += __env_alloc_size(sizeof(DB_LOCKER)) * + (count - dbenv->lk_init_lockers); + + /* And we keep getting this wrong, let's be generous. */ + retval += retval / 4; + + return (retval); +} + +/* + * __lock_region_size -- + * Return the inital region size. + * PUBLIC: size_t __lock_region_size __P((ENV *, size_t)); + */ +size_t +__lock_region_size(env, other_alloc) + ENV *env; + size_t other_alloc; +{ + DB_ENV *dbenv; + size_t retval; + u_int32_t count; + + dbenv = env->dbenv; + + /* Make sure there is at least 5 objects and locks per partition. */ + if (dbenv->lk_init_objects < dbenv->lk_partitions * 5) + dbenv->lk_init_objects = dbenv->lk_partitions * 5; + if (dbenv->lk_init < dbenv->lk_partitions * 5) + dbenv->lk_init = dbenv->lk_partitions * 5; + /* + * Figure out how much space we're going to need. This list should + * map one-to-one with the __env_alloc calls in __lock_region_init. + */ + retval = 0; + retval += __env_alloc_size(sizeof(DB_LOCKREGION)); + retval += __env_alloc_size((size_t)(dbenv->lk_modes * dbenv->lk_modes)); + /* + * Try to figure out the size of the locker hash table. + */ + if (dbenv->lk_max_lockers != 0) + dbenv->locker_t_size = __db_tablesize(dbenv->lk_max_lockers); + else if (dbenv->tx_max != 0) + dbenv->locker_t_size = __db_tablesize(dbenv->tx_max); + else { + if (dbenv->memory_max != 0) + count = (u_int32_t) + (((dbenv->memory_max - other_alloc) / 10) / + sizeof(DB_LOCKER)); + else + count = DB_LOCK_DEFAULT_N / 10; + if (count < dbenv->lk_init_lockers) + count = dbenv->lk_init_lockers; + dbenv->locker_t_size = __db_tablesize(count); + } + retval += __env_alloc_size(dbenv->locker_t_size * (sizeof(DB_HASHTAB))); + retval += __env_alloc_size(sizeof(DB_LOCKER)) * dbenv->lk_init_lockers; + retval += __env_alloc_size(sizeof(struct __db_lock) * dbenv->lk_init); + other_alloc += retval; + /* + * We want to allocate a object hash table that is big enough to + * avoid many collisions, but not too big for starters. Arbitrarily + * pick the point 2/3s of the way to the max size. If the max + * is not stated then guess that objects will fill 1/2 the memory. + * Failing to know how much memory there might we just wind up + * using the default value. If this winds up being less thatn + * the init value then we just make the table fit the init value. + */ + if ((count = dbenv->lk_max_objects) == 0) { + if (dbenv->memory_max != 0) + count = (u_int32_t)( + ((dbenv->memory_max - other_alloc) / 2) + / sizeof(DB_LOCKOBJ)); + else + count = DB_LOCK_DEFAULT_N; + if (count < dbenv->lk_init_objects) + count = dbenv->lk_init_objects; + } + count *= 2; + count += dbenv->lk_init_objects; + count /= 3; + if (dbenv->object_t_size == 0) + dbenv->object_t_size = __db_tablesize(count); + retval += __env_alloc_size( + __db_tablesize(dbenv->object_t_size) * (sizeof(DB_HASHTAB))); +#ifdef HAVE_STATISTICS + retval += __env_alloc_size( + __db_tablesize(dbenv->object_t_size) * (sizeof(DB_LOCK_HSTAT))); +#endif + retval += + __env_alloc_size(dbenv->lk_partitions * (sizeof(DB_LOCKPART))); + retval += __env_alloc_size(sizeof(DB_LOCKOBJ) * dbenv->lk_init_objects); + + return (retval); +} diff --git a/src/lock/lock_stat.c b/src/lock/lock_stat.c new file mode 100644 index 00000000..80680266 --- /dev/null +++ b/src/lock/lock_stat.c @@ -0,0 +1,770 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/db_am.h" + +#ifdef HAVE_STATISTICS +static int __lock_dump_locker + __P((ENV *, DB_MSGBUF *, DB_LOCKTAB *, DB_LOCKER *)); +static int __lock_dump_object __P((DB_LOCKTAB *, DB_MSGBUF *, DB_LOCKOBJ *)); +static int __lock_print_all __P((ENV *, u_int32_t)); +static int __lock_print_stats __P((ENV *, u_int32_t)); +static void __lock_print_header __P((ENV *)); +static int __lock_stat __P((ENV *, DB_LOCK_STAT **, u_int32_t)); + +/* + * __lock_stat_pp -- + * ENV->lock_stat pre/post processing. + * + * PUBLIC: int __lock_stat_pp __P((DB_ENV *, DB_LOCK_STAT **, u_int32_t)); + */ +int +__lock_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_LOCK_STAT **statp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lk_handle, "DB_ENV->lock_stat", DB_INIT_LOCK); + + if ((ret = __db_fchk(env, + "DB_ENV->lock_stat", flags, DB_STAT_CLEAR)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__lock_stat(env, statp, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __lock_stat -- + * ENV->lock_stat. + */ +static int +__lock_stat(env, statp, flags) + ENV *env; + DB_LOCK_STAT **statp; + u_int32_t flags; +{ + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + DB_LOCK_STAT *stats, tmp; + DB_LOCK_HSTAT htmp; + DB_LOCK_PSTAT ptmp; + int ret; + u_int32_t i; + uintmax_t tmp_wait, tmp_nowait; + + *statp = NULL; + lt = env->lk_handle; + + if ((ret = __os_umalloc(env, sizeof(*stats), &stats)) != 0) + return (ret); + + /* Copy out the global statistics. */ + LOCK_REGION_LOCK(env); + + region = lt->reginfo.primary; + memcpy(stats, ®ion->stat, sizeof(*stats)); + stats->st_locktimeout = region->lk_timeout; + stats->st_txntimeout = region->tx_timeout; + stats->st_id = region->lock_id; + stats->st_cur_maxid = region->cur_maxid; + stats->st_nlockers = region->nlockers; + stats->st_nmodes = region->nmodes; + + for (i = 0; i < region->object_t_size; i++) { + stats->st_nrequests += lt->obj_stat[i].st_nrequests; + stats->st_nreleases += lt->obj_stat[i].st_nreleases; + stats->st_nupgrade += lt->obj_stat[i].st_nupgrade; + stats->st_ndowngrade += lt->obj_stat[i].st_ndowngrade; + stats->st_lock_wait += lt->obj_stat[i].st_lock_wait; + stats->st_lock_nowait += lt->obj_stat[i].st_lock_nowait; + stats->st_nlocktimeouts += lt->obj_stat[i].st_nlocktimeouts; + stats->st_ntxntimeouts += lt->obj_stat[i].st_ntxntimeouts; + if (stats->st_maxhlocks < lt->obj_stat[i].st_maxnlocks) + stats->st_maxhlocks = lt->obj_stat[i].st_maxnlocks; + if (stats->st_maxhobjects < lt->obj_stat[i].st_maxnobjects) + stats->st_maxhobjects = lt->obj_stat[i].st_maxnobjects; + if (stats->st_hash_len < lt->obj_stat[i].st_hash_len) + stats->st_hash_len = lt->obj_stat[i].st_hash_len; + if (LF_ISSET(DB_STAT_CLEAR)) { + htmp = lt->obj_stat[i]; + memset(<->obj_stat[i], 0, sizeof(lt->obj_stat[i])); + lt->obj_stat[i].st_nlocks = htmp.st_nlocks; + lt->obj_stat[i].st_maxnlocks = htmp.st_nlocks; + lt->obj_stat[i].st_nobjects = htmp.st_nobjects; + lt->obj_stat[i].st_maxnobjects = htmp.st_nobjects; + + } + } + + for (i = 0; i < region->part_t_size; i++) { + stats->st_nlocks += lt->part_array[i].part_stat.st_nlocks; + stats->st_maxnlocks += + lt->part_array[i].part_stat.st_maxnlocks; + stats->st_nobjects += lt->part_array[i].part_stat.st_nobjects; + stats->st_maxnobjects += + lt->part_array[i].part_stat.st_maxnobjects; + stats->st_locksteals += + lt->part_array[i].part_stat.st_locksteals; + if (stats->st_maxlsteals < + lt->part_array[i].part_stat.st_locksteals) + stats->st_maxlsteals = + lt->part_array[i].part_stat.st_locksteals; + stats->st_objectsteals += + lt->part_array[i].part_stat.st_objectsteals; + if (stats->st_maxosteals < + lt->part_array[i].part_stat.st_objectsteals) + stats->st_maxosteals = + lt->part_array[i].part_stat.st_objectsteals; + __mutex_set_wait_info(env, + lt->part_array[i].mtx_part, &tmp_wait, &tmp_nowait); + stats->st_part_nowait += tmp_nowait; + stats->st_part_wait += tmp_wait; + if (tmp_wait > stats->st_part_max_wait) { + stats->st_part_max_nowait = tmp_nowait; + stats->st_part_max_wait = tmp_wait; + } + + if (LF_ISSET(DB_STAT_CLEAR)) { + ptmp = lt->part_array[i].part_stat; + memset(<->part_array[i].part_stat, + 0, sizeof(lt->part_array[i].part_stat)); + lt->part_array[i].part_stat.st_nlocks = + ptmp.st_nlocks; + lt->part_array[i].part_stat.st_maxnlocks = + ptmp.st_nlocks; + lt->part_array[i].part_stat.st_nobjects = + ptmp.st_nobjects; + lt->part_array[i].part_stat.st_maxnobjects = + ptmp.st_nobjects; + } + } + + __mutex_set_wait_info(env, region->mtx_region, + &stats->st_region_wait, &stats->st_region_nowait); + __mutex_set_wait_info(env, region->mtx_dd, + &stats->st_objs_wait, &stats->st_objs_nowait); + __mutex_set_wait_info(env, region->mtx_lockers, + &stats->st_lockers_wait, &stats->st_lockers_nowait); + stats->st_regsize = lt->reginfo.rp->size; + if (LF_ISSET(DB_STAT_CLEAR)) { + tmp = region->stat; + memset(®ion->stat, 0, sizeof(region->stat)); + if (!LF_ISSET(DB_STAT_SUBSYSTEM)) { + __mutex_clear(env, region->mtx_region); + __mutex_clear(env, region->mtx_dd); + __mutex_clear(env, region->mtx_lockers); + for (i = 0; i < region->part_t_size; i++) + __mutex_clear(env, lt->part_array[i].mtx_part); + } + + region->stat.st_maxlocks = tmp.st_maxlocks; + region->stat.st_maxlockers = tmp.st_maxlockers; + region->stat.st_maxobjects = tmp.st_maxobjects; + region->stat.st_nlocks = + region->stat.st_maxnlocks = tmp.st_nlocks; + region->stat.st_maxnlockers = region->nlockers; + region->stat.st_nobjects = + region->stat.st_maxnobjects = tmp.st_nobjects; + region->stat.st_partitions = tmp.st_partitions; + region->stat.st_tablesize = tmp.st_tablesize; + } + + LOCK_REGION_UNLOCK(env); + + *statp = stats; + return (0); +} + +/* + * __lock_stat_print_pp -- + * ENV->lock_stat_print pre/post processing. + * + * PUBLIC: int __lock_stat_print_pp __P((DB_ENV *, u_int32_t)); + */ +int +__lock_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lk_handle, "DB_ENV->lock_stat_print", DB_INIT_LOCK); + +#define DB_STAT_LOCK_FLAGS \ + (DB_STAT_ALL | DB_STAT_ALLOC | DB_STAT_CLEAR | DB_STAT_LOCK_CONF |\ + DB_STAT_LOCK_LOCKERS | DB_STAT_LOCK_OBJECTS | DB_STAT_LOCK_PARAMS) + if ((ret = __db_fchk(env, "DB_ENV->lock_stat_print", + flags, DB_STAT_CLEAR | DB_STAT_LOCK_FLAGS)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__lock_stat_print(env, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __lock_stat_print -- + * ENV->lock_stat_print method. + * + * PUBLIC: int __lock_stat_print __P((ENV *, u_int32_t)); + */ +int +__lock_stat_print(env, flags) + ENV *env; + u_int32_t flags; +{ + u_int32_t orig_flags; + int ret; + + orig_flags = flags; + LF_CLR(DB_STAT_CLEAR | DB_STAT_SUBSYSTEM); + if (flags == 0 || LF_ISSET(DB_STAT_ALL)) { + ret = __lock_print_stats(env, orig_flags); + if (flags == 0 || ret != 0) + return (ret); + } + + if (LF_ISSET(DB_STAT_ALL | DB_STAT_LOCK_CONF | DB_STAT_LOCK_LOCKERS | + DB_STAT_LOCK_OBJECTS | DB_STAT_LOCK_PARAMS) && + (ret = __lock_print_all(env, orig_flags)) != 0) + return (ret); + + return (0); +} + +/* + * __lock_print_stats -- + * Display default lock region statistics. + */ +static int +__lock_print_stats(env, flags) + ENV *env; + u_int32_t flags; +{ + DB_LOCK_STAT *sp; + int ret; + +#ifdef LOCK_DIAGNOSTIC + DB_LOCKTAB *lt; + DB_LOCKREGION *region; + u_int32_t i; + u_int32_t wait, nowait; + + lt = env->lk_handle; + region = lt->reginfo.primary; + + for (i = 0; i < region->object_t_size; i++) { + if (lt->obj_stat[i].st_hash_len == 0) + continue; + __db_dl(env, + "Hash bucket", (u_long)i); + __db_dl(env, "Partition", (u_long)LOCK_PART(region, i)); + __mutex_set_wait_info(env, + lt->part_array[LOCK_PART(region, i)].mtx_part, + &wait, &nowait); + __db_dl_pct(env, + "The number of partition mutex requests that required waiting", + (u_long)wait, DB_PCT(wait, wait + nowait), NULL); + __db_dl(env, + "Maximum hash bucket length", + (u_long)lt->obj_stat[i].st_hash_len); + __db_dl(env, + "Total number of locks requested", + (u_long)lt->obj_stat[i].st_nrequests); + __db_dl(env, + "Total number of locks released", + (u_long)lt->obj_stat[i].st_nreleases); + __db_dl(env, + "Total number of locks upgraded", + (u_long)lt->obj_stat[i].st_nupgrade); + __db_dl(env, + "Total number of locks downgraded", + (u_long)lt->obj_stat[i].st_ndowngrade); + __db_dl(env, + "Lock requests not available due to conflicts, for which we waited", + (u_long)lt->obj_stat[i].st_lock_wait); + __db_dl(env, + "Lock requests not available due to conflicts, for which we did not wait", + (u_long)lt->obj_stat[i].st_lock_nowait); + __db_dl(env, "Number of locks that have timed out", + (u_long)lt->obj_stat[i].st_nlocktimeouts); + __db_dl(env, "Number of transactions that have timed out", + (u_long)lt->obj_stat[i].st_ntxntimeouts); + } +#endif + if ((ret = __lock_stat(env, &sp, flags)) != 0) + return (ret); + + if (LF_ISSET(DB_STAT_ALL)) + __db_msg(env, "Default locking region information:"); + __db_dl(env, "Last allocated locker ID", (u_long)sp->st_id); + __db_msg(env, "%#lx\tCurrent maximum unused locker ID", + (u_long)sp->st_cur_maxid); + __db_dl(env, "Number of lock modes", (u_long)sp->st_nmodes); + __db_dl(env, + "Initial number of locks allocated", (u_long)sp->st_initlocks); + __db_dl(env, + "Initial number of lockers allocated", (u_long)sp->st_initlockers); + __db_dl(env, "Initial number of lock objects allocated", + (u_long)sp->st_initobjects); + __db_dl(env, + "Maximum number of locks possible", (u_long)sp->st_maxlocks); + __db_dl(env, + "Maximum number of lockers possible", (u_long)sp->st_maxlockers); + __db_dl(env, "Maximum number of lock objects possible", + (u_long)sp->st_maxobjects); + __db_dl(env, + "Current number of locks allocated", (u_long)sp->st_locks); + __db_dl(env, + "Current number of lockers allocated", (u_long)sp->st_lockers); + __db_dl(env, "Current number of lock objects allocated", + (u_long)sp->st_objects); + __db_dl(env, "Number of lock object partitions", + (u_long)sp->st_partitions); + __db_dl(env, "Size of object hash table", + (u_long)sp->st_tablesize); + __db_dl(env, "Number of current locks", (u_long)sp->st_nlocks); + __db_dl(env, "Maximum number of locks at any one time", + (u_long)sp->st_maxnlocks); + __db_dl(env, "Maximum number of locks in any one bucket", + (u_long)sp->st_maxhlocks); + __db_dl(env, "Maximum number of locks stolen by for an empty partition", + (u_long)sp->st_locksteals); + __db_dl(env, "Maximum number of locks stolen for any one partition", + (u_long)sp->st_maxlsteals); + __db_dl(env, "Number of current lockers", (u_long)sp->st_nlockers); + __db_dl(env, "Maximum number of lockers at any one time", + (u_long)sp->st_maxnlockers); + __db_dl(env, + "Number of current lock objects", (u_long)sp->st_nobjects); + __db_dl(env, "Maximum number of lock objects at any one time", + (u_long)sp->st_maxnobjects); + __db_dl(env, "Maximum number of lock objects in any one bucket", + (u_long)sp->st_maxhobjects); + __db_dl(env, + "Maximum number of objects stolen by for an empty partition", + (u_long)sp->st_objectsteals); + __db_dl(env, "Maximum number of objects stolen for any one partition", + (u_long)sp->st_maxosteals); + __db_dl(env, + "Total number of locks requested", (u_long)sp->st_nrequests); + __db_dl(env, + "Total number of locks released", (u_long)sp->st_nreleases); + __db_dl(env, + "Total number of locks upgraded", (u_long)sp->st_nupgrade); + __db_dl(env, + "Total number of locks downgraded", (u_long)sp->st_ndowngrade); + __db_dl(env, + "Lock requests not available due to conflicts, for which we waited", + (u_long)sp->st_lock_wait); + __db_dl(env, + "Lock requests not available due to conflicts, for which we did not wait", + (u_long)sp->st_lock_nowait); + __db_dl(env, "Number of deadlocks", (u_long)sp->st_ndeadlocks); + __db_dl(env, "Lock timeout value", (u_long)sp->st_locktimeout); + __db_dl(env, "Number of locks that have timed out", + (u_long)sp->st_nlocktimeouts); + __db_dl(env, + "Transaction timeout value", (u_long)sp->st_txntimeout); + __db_dl(env, "Number of transactions that have timed out", + (u_long)sp->st_ntxntimeouts); + + __db_dlbytes(env, "Region size", + (u_long)0, (u_long)0, (u_long)sp->st_regsize); + __db_dl_pct(env, + "The number of partition locks that required waiting", + (u_long)sp->st_part_wait, DB_PCT( + sp->st_part_wait, sp->st_part_wait + sp->st_part_nowait), NULL); + __db_dl_pct(env, + "The maximum number of times any partition lock was waited for", + (u_long)sp->st_part_max_wait, DB_PCT(sp->st_part_max_wait, + sp->st_part_max_wait + sp->st_part_max_nowait), NULL); + __db_dl_pct(env, + "The number of object queue operations that required waiting", + (u_long)sp->st_objs_wait, DB_PCT(sp->st_objs_wait, + sp->st_objs_wait + sp->st_objs_nowait), NULL); + __db_dl_pct(env, + "The number of locker allocations that required waiting", + (u_long)sp->st_lockers_wait, DB_PCT(sp->st_lockers_wait, + sp->st_lockers_wait + sp->st_lockers_nowait), NULL); + __db_dl_pct(env, + "The number of region locks that required waiting", + (u_long)sp->st_region_wait, DB_PCT(sp->st_region_wait, + sp->st_region_wait + sp->st_region_nowait), NULL); + __db_dl(env, "Maximum hash bucket length", + (u_long)sp->st_hash_len); + + __os_ufree(env, sp); + + return (0); +} + +/* + * __lock_print_all -- + * Display debugging lock region statistics. + */ +static int +__lock_print_all(env, flags) + ENV *env; + u_int32_t flags; +{ + DB_LOCKER *lip; + DB_LOCKOBJ *op; + DB_LOCKREGION *lrp; + DB_LOCKTAB *lt; + DB_MSGBUF mb; + int i, j; + u_int32_t k; + + lt = env->lk_handle; + lrp = lt->reginfo.primary; + DB_MSGBUF_INIT(&mb); + + LOCK_REGION_LOCK(env); + __db_print_reginfo(env, <->reginfo, "Lock", flags); + + if (LF_ISSET(DB_STAT_ALL | DB_STAT_LOCK_PARAMS)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Lock region parameters:"); + __mutex_print_debug_single(env, + "Lock region region mutex", lrp->mtx_region, flags); + STAT_ULONG("locker table size", lrp->locker_t_size); + STAT_ULONG("object table size", lrp->object_t_size); + STAT_ULONG("obj_off", lrp->obj_off); + STAT_ULONG("locker_off", lrp->locker_off); + STAT_ULONG("need_dd", lrp->need_dd); + if (timespecisset(&lrp->next_timeout)) { +#ifdef HAVE_STRFTIME + time_t t = (time_t)lrp->next_timeout.tv_sec; + char tbuf[64]; + if (strftime(tbuf, sizeof(tbuf), + "%m-%d-%H:%M:%S", localtime(&t)) != 0) + __db_msg(env, "next_timeout: %s.%09lu", + tbuf, (u_long)lrp->next_timeout.tv_nsec); + else +#endif + __db_msg(env, "next_timeout: %lu.%09lu", + (u_long)lrp->next_timeout.tv_sec, + (u_long)lrp->next_timeout.tv_nsec); + } + } + + if (LF_ISSET(DB_STAT_ALL | DB_STAT_LOCK_CONF)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Lock conflict matrix:"); + for (i = 0; i < lrp->stat.st_nmodes; i++) { + for (j = 0; j < lrp->stat.st_nmodes; j++) + __db_msgadd(env, &mb, "%lu\t", (u_long) + lt->conflicts[i * lrp->stat.st_nmodes + j]); + DB_MSGBUF_FLUSH(env, &mb); + } + } + LOCK_REGION_UNLOCK(env); + + if (LF_ISSET(DB_STAT_ALL | DB_STAT_LOCK_LOCKERS)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Locks grouped by lockers:"); + __lock_print_header(env); + LOCK_LOCKERS(env, lrp); + for (k = 0; k < lrp->locker_t_size; k++) + SH_TAILQ_FOREACH( + lip, <->locker_tab[k], links, __db_locker) + (void)__lock_dump_locker(env, &mb, lt, lip); + UNLOCK_LOCKERS(env, lrp); + } + + if (LF_ISSET(DB_STAT_ALL | DB_STAT_LOCK_OBJECTS)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Locks grouped by object:"); + __lock_print_header(env); + for (k = 0; k < lrp->object_t_size; k++) { + OBJECT_LOCK_NDX(lt, lrp, k); + SH_TAILQ_FOREACH( + op, <->obj_tab[k], links, __db_lockobj) { + (void)__lock_dump_object(lt, &mb, op); + __db_msg(env, "%s", ""); + } + OBJECT_UNLOCK(lt, lrp, k); + } + } + + return (0); +} + +static int +__lock_dump_locker(env, mbp, lt, lip) + ENV *env; + DB_MSGBUF *mbp; + DB_LOCKTAB *lt; + DB_LOCKER *lip; +{ + DB_LOCKREGION *lrp; + struct __db_lock *lp; + char buf[DB_THREADID_STRLEN]; + u_int32_t ndx; + + lrp = lt->reginfo.primary; + + __db_msgadd(env, + mbp, "%8lx dd=%2ld locks held %-4d write locks %-4d pid/thread %s", + (u_long)lip->id, (long)lip->dd_id, lip->nlocks, lip->nwrites, + env->dbenv->thread_id_string(env->dbenv, lip->pid, lip->tid, buf)); + __db_msgadd(env, mbp, + " flags %-4x priority %-10u", lip->flags, lip->priority); + + if (timespecisset(&lip->tx_expire)) { +#ifdef HAVE_STRFTIME + time_t t = (time_t)lip->tx_expire.tv_sec; + char tbuf[64]; + if (strftime(tbuf, sizeof(tbuf), + "%m-%d-%H:%M:%S", localtime(&t)) != 0) + __db_msgadd(env, mbp, "expires %s.%09lu", + tbuf, (u_long)lip->tx_expire.tv_nsec); + else +#endif + __db_msgadd(env, mbp, "expires %lu.%09lu", + (u_long)lip->tx_expire.tv_sec, + (u_long)lip->tx_expire.tv_nsec); + } + if (F_ISSET(lip, DB_LOCKER_TIMEOUT)) + __db_msgadd( + env, mbp, " lk timeout %lu", (u_long)lip->lk_timeout); + if (timespecisset(&lip->lk_expire)) { +#ifdef HAVE_STRFTIME + time_t t = (time_t)lip->lk_expire.tv_sec; + char tbuf[64]; + if (strftime(tbuf, + sizeof(tbuf), "%m-%d-%H:%M:%S", localtime(&t)) != 0) + __db_msgadd(env, mbp, " lk expires %s.%09lu", + tbuf, (u_long)lip->lk_expire.tv_nsec); + else +#endif + __db_msgadd(env, mbp, " lk expires %lu.%09lu", + (u_long)lip->lk_expire.tv_sec, + (u_long)lip->lk_expire.tv_nsec); + } + DB_MSGBUF_FLUSH(env, mbp); + + /* + * We need some care here since the list may change while we + * look. + */ +retry: SH_LIST_FOREACH(lp, &lip->heldby, locker_links, __db_lock) { + if (!SH_LIST_EMPTY(&lip->heldby) && lp != NULL) { + ndx = lp->indx; + OBJECT_LOCK_NDX(lt, lrp, ndx); + if (lp->indx == ndx) + __lock_printlock(lt, mbp, lp, 1); + else { + OBJECT_UNLOCK(lt, lrp, ndx); + goto retry; + } + OBJECT_UNLOCK(lt, lrp, ndx); + } + } + return (0); +} + +static int +__lock_dump_object(lt, mbp, op) + DB_LOCKTAB *lt; + DB_MSGBUF *mbp; + DB_LOCKOBJ *op; +{ + struct __db_lock *lp; + + SH_TAILQ_FOREACH(lp, &op->holders, links, __db_lock) + __lock_printlock(lt, mbp, lp, 1); + SH_TAILQ_FOREACH(lp, &op->waiters, links, __db_lock) + __lock_printlock(lt, mbp, lp, 1); + return (0); +} + +/* + * __lock_print_header -- + */ +static void +__lock_print_header(env) + ENV *env; +{ + __db_msg(env, "%-8s %-10s%-4s %-7s %s", + "Locker", "Mode", + "Count", "Status", "----------------- Object ---------------"); +} + +/* + * __lock_printlock -- + * + * PUBLIC: void __lock_printlock + * PUBLIC: __P((DB_LOCKTAB *, DB_MSGBUF *mbp, struct __db_lock *, int)); + */ +void +__lock_printlock(lt, mbp, lp, ispgno) + DB_LOCKTAB *lt; + DB_MSGBUF *mbp; + struct __db_lock *lp; + int ispgno; +{ + DB_LOCKOBJ *lockobj; + DB_MSGBUF mb; + ENV *env; + db_pgno_t pgno; + u_int32_t *fidp, type; + u_int8_t *ptr; + char *fname, *dname, *p, namebuf[26]; + const char *mode, *status; + + env = lt->env; + + if (mbp == NULL) { + DB_MSGBUF_INIT(&mb); + mbp = &mb; + } + + switch (lp->mode) { + case DB_LOCK_IREAD: + mode = "IREAD"; + break; + case DB_LOCK_IWR: + mode = "IWR"; + break; + case DB_LOCK_IWRITE: + mode = "IWRITE"; + break; + case DB_LOCK_NG: + mode = "NG"; + break; + case DB_LOCK_READ: + mode = "READ"; + break; + case DB_LOCK_READ_UNCOMMITTED: + mode = "READ_UNCOMMITTED"; + break; + case DB_LOCK_WRITE: + mode = "WRITE"; + break; + case DB_LOCK_WWRITE: + mode = "WAS_WRITE"; + break; + case DB_LOCK_WAIT: + mode = "WAIT"; + break; + default: + mode = "UNKNOWN"; + break; + } + switch (lp->status) { + case DB_LSTAT_ABORTED: + status = "ABORT"; + break; + case DB_LSTAT_EXPIRED: + status = "EXPIRED"; + break; + case DB_LSTAT_FREE: + status = "FREE"; + break; + case DB_LSTAT_HELD: + status = "HELD"; + break; + case DB_LSTAT_PENDING: + status = "PENDING"; + break; + case DB_LSTAT_WAITING: + status = "WAIT"; + break; + default: + status = "UNKNOWN"; + break; + } + __db_msgadd(env, mbp, "%8lx %-10s %4lu %-7s ", + (u_long)((DB_LOCKER *)R_ADDR(<->reginfo, lp->holder))->id, + mode, (u_long)lp->refcount, status); + + lockobj = SH_OFF_TO_PTR(lp, lp->obj, DB_LOCKOBJ); + ptr = SH_DBT_PTR(&lockobj->lockobj); + if (ispgno && lockobj->lockobj.size == sizeof(struct __db_ilock)) { + /* Assume this is a DBT lock. */ + memcpy(&pgno, ptr, sizeof(db_pgno_t)); + fidp = (u_int32_t *)(ptr + sizeof(db_pgno_t)); + type = *(u_int32_t *)(ptr + sizeof(db_pgno_t) + DB_FILE_ID_LEN); + (void)__dbreg_get_name( + lt->env, (u_int8_t *)fidp, &fname, &dname); + if (fname == NULL && dname == NULL) + __db_msgadd(env, mbp, "(%lx %lx %lx %lx %lx) ", + (u_long)fidp[0], (u_long)fidp[1], (u_long)fidp[2], + (u_long)fidp[3], (u_long)fidp[4]); + else { + if (fname != NULL && dname != NULL) { + (void)snprintf(namebuf, sizeof(namebuf), + "%14s:%-10s", fname, dname); + p = namebuf; + } else if (fname != NULL) + p = fname; + else + p = dname; + __db_msgadd(env, mbp, "%-25s ", p); + } + __db_msgadd(env, mbp, "%-7s %7lu", + type == DB_PAGE_LOCK ? "page" : + type == DB_RECORD_LOCK ? "record" : + type == DB_DATABASE_LOCK ? "database" : "handle", + (u_long)pgno); + } else { + __db_msgadd(env, mbp, "0x%lx ", + (u_long)R_OFFSET(<->reginfo, lockobj)); + __db_prbytes(env, mbp, ptr, lockobj->lockobj.size); + } + DB_MSGBUF_FLUSH(env, mbp); +} + +#else /* !HAVE_STATISTICS */ + +int +__lock_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_LOCK_STAT **statp; + u_int32_t flags; +{ + COMPQUIET(statp, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} + +int +__lock_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} +#endif diff --git a/src/lock/lock_stub.c b/src/lock/lock_stub.c new file mode 100644 index 00000000..e3043076 --- /dev/null +++ b/src/lock/lock_stub.c @@ -0,0 +1,632 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/lock.h" + +/* + * If the library wasn't compiled with locking support, various routines + * aren't available. Stub them here, returning an appropriate error. + */ +static int __db_nolocking __P((ENV *)); + +/* + * __db_nolocking -- + * Error when a Berkeley DB build doesn't include the locking subsystem. + */ +static int +__db_nolocking(env) + ENV *env; +{ + __db_errx(env, DB_STR("2054", + "library build did not include support for locking")); + return (DB_OPNOTSUP); +} + +int +__lock_env_create(dbenv) + DB_ENV *dbenv; +{ + COMPQUIET(dbenv, 0); + return (0); +} + +void +__lock_env_destroy(dbenv) + DB_ENV *dbenv; +{ + COMPQUIET(dbenv, 0); +} + +int +__lock_get_lk_conflicts(dbenv, lk_conflictsp, lk_modesp) + DB_ENV *dbenv; + const u_int8_t **lk_conflictsp; + int *lk_modesp; +{ + COMPQUIET(lk_conflictsp, NULL); + COMPQUIET(lk_modesp, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_get_lk_detect(dbenv, lk_detectp) + DB_ENV *dbenv; + u_int32_t *lk_detectp; +{ + COMPQUIET(lk_detectp, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_get_lk_init_lockers(dbenv, lk_initp) + DB_ENV *dbenv; + u_int32_t *lk_initp; +{ + COMPQUIET(lk_initp, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_get_lk_init_locks(dbenv, lk_initp) + DB_ENV *dbenv; + u_int32_t *lk_initp; +{ + COMPQUIET(lk_initp, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_get_lk_init_objects(dbenv, lk_initp) + DB_ENV *dbenv; + u_int32_t *lk_initp; +{ + COMPQUIET(lk_initp, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_get_lk_max_lockers(dbenv, lk_maxp) + DB_ENV *dbenv; + u_int32_t *lk_maxp; +{ + COMPQUIET(lk_maxp, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_get_lk_max_locks(dbenv, lk_maxp) + DB_ENV *dbenv; + u_int32_t *lk_maxp; +{ + COMPQUIET(lk_maxp, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_get_lk_max_objects(dbenv, lk_maxp) + DB_ENV *dbenv; + u_int32_t *lk_maxp; +{ + COMPQUIET(lk_maxp, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_get_lk_partitions(dbenv, lk_maxp) + DB_ENV *dbenv; + u_int32_t *lk_maxp; +{ + COMPQUIET(lk_maxp, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_get_lk_tablesize(dbenv, lk_tablesizep) + DB_ENV *dbenv; + u_int32_t *lk_tablesizep; +{ + COMPQUIET(lk_tablesizep, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_set_lk_tablesize(dbenv, lk_tablesize) + DB_ENV *dbenv; + u_int32_t lk_tablesize; +{ + COMPQUIET(lk_tablesize, 0); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_get_lk_priority(dbenv, lockid, priorityp) + DB_ENV *dbenv; + u_int32_t lockid, *priorityp; +{ + COMPQUIET(lockid, 0); + COMPQUIET(priorityp, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_set_lk_priority(dbenv, lockid, priority) + DB_ENV *dbenv; + u_int32_t lockid, priority; +{ + COMPQUIET(lockid, 0); + COMPQUIET(priority, 0); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_get_env_timeout(dbenv, timeoutp, flag) + DB_ENV *dbenv; + db_timeout_t *timeoutp; + u_int32_t flag; +{ + COMPQUIET(timeoutp, NULL); + COMPQUIET(flag, 0); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_detect_pp(dbenv, flags, atype, abortp) + DB_ENV *dbenv; + u_int32_t flags, atype; + int *abortp; +{ + COMPQUIET(flags, 0); + COMPQUIET(atype, 0); + COMPQUIET(abortp, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_get_pp(dbenv, locker, flags, obj, lock_mode, lock) + DB_ENV *dbenv; + u_int32_t locker, flags; + DBT *obj; + db_lockmode_t lock_mode; + DB_LOCK *lock; +{ + COMPQUIET(locker, 0); + COMPQUIET(flags, 0); + COMPQUIET(obj, NULL); + COMPQUIET(lock_mode, 0); + COMPQUIET(lock, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_id_pp(dbenv, idp) + DB_ENV *dbenv; + u_int32_t *idp; +{ + COMPQUIET(idp, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_id_free_pp(dbenv, id) + DB_ENV *dbenv; + u_int32_t id; +{ + COMPQUIET(id, 0); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_put_pp(dbenv, lock) + DB_ENV *dbenv; + DB_LOCK *lock; +{ + COMPQUIET(lock, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_LOCK_STAT **statp; + u_int32_t flags; +{ + COMPQUIET(statp, NULL); + COMPQUIET(flags, 0); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_vec_pp(dbenv, locker, flags, list, nlist, elistp) + DB_ENV *dbenv; + u_int32_t locker, flags; + int nlist; + DB_LOCKREQ *list, **elistp; +{ + COMPQUIET(locker, 0); + COMPQUIET(flags, 0); + COMPQUIET(list, NULL); + COMPQUIET(nlist, 0); + COMPQUIET(elistp, NULL); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_set_lk_conflicts(dbenv, lk_conflicts, lk_modes) + DB_ENV *dbenv; + u_int8_t *lk_conflicts; + int lk_modes; +{ + COMPQUIET(lk_conflicts, NULL); + COMPQUIET(lk_modes, 0); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_set_lk_detect(dbenv, lk_detect) + DB_ENV *dbenv; + u_int32_t lk_detect; +{ + COMPQUIET(lk_detect, 0); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_set_lk_max_locks(dbenv, lk_max) + DB_ENV *dbenv; + u_int32_t lk_max; +{ + COMPQUIET(lk_max, 0); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_set_lk_max_lockers(dbenv, lk_max) + DB_ENV *dbenv; + u_int32_t lk_max; +{ + COMPQUIET(lk_max, 0); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_set_lk_max_objects(dbenv, lk_max) + DB_ENV *dbenv; + u_int32_t lk_max; +{ + COMPQUIET(lk_max, 0); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_set_lk_partitions(dbenv, lk_max) + DB_ENV *dbenv; + u_int32_t lk_max; +{ + COMPQUIET(lk_max, 0); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_set_env_timeout(dbenv, timeout, flags) + DB_ENV *dbenv; + db_timeout_t timeout; + u_int32_t flags; +{ + COMPQUIET(timeout, 0); + COMPQUIET(flags, 0); + return (__db_nolocking(dbenv->env)); +} + +int +__lock_open(env) + ENV *env; +{ + return (__db_nolocking(env)); +} + +u_int32_t +__lock_region_mutex_count(env) + ENV *env; +{ + return (__db_nolocking(env)); +} + +u_int32_t +__lock_region_mutex_max(env) + ENV *env; +{ + return (__db_nolocking(env)); +} + +size_t +__lock_region_max(env) + ENV *env; +{ + return (0); +} + +size_t +__lock_region_size(env, other_alloc) + ENV *env; + size_t other_alloc; +{ + COMPQUIET(other_alloc, 0); + return (0); +} + +int +__lock_id_free(env, sh_locker) + ENV *env; + DB_LOCKER *sh_locker; +{ + COMPQUIET(env, NULL); + COMPQUIET(sh_locker, 0); + return (0); +} + +int +__lock_env_refresh(env) + ENV *env; +{ + COMPQUIET(env, NULL); + return (0); +} + +int +__lock_stat_print(env, flags) + ENV *env; + u_int32_t flags; +{ + COMPQUIET(env, NULL); + COMPQUIET(flags, 0); + return (0); +} + +int +__lock_put(env, lock) + ENV *env; + DB_LOCK *lock; +{ + COMPQUIET(env, NULL); + COMPQUIET(lock, NULL); + return (0); +} + +int +__lock_vec(env, sh_locker, flags, list, nlist, elistp) + ENV *env; + DB_LOCKER *sh_locker; + u_int32_t flags; + int nlist; + DB_LOCKREQ *list, **elistp; +{ + COMPQUIET(env, NULL); + COMPQUIET(sh_locker, 0); + COMPQUIET(flags, 0); + COMPQUIET(list, NULL); + COMPQUIET(nlist, 0); + COMPQUIET(elistp, NULL); + return (0); +} + +int +__lock_get(env, locker, flags, obj, lock_mode, lock) + ENV *env; + DB_LOCKER *locker; + u_int32_t flags; + const DBT *obj; + db_lockmode_t lock_mode; + DB_LOCK *lock; +{ + COMPQUIET(env, NULL); + COMPQUIET(locker, NULL); + COMPQUIET(flags, 0); + COMPQUIET(obj, NULL); + COMPQUIET(lock_mode, 0); + COMPQUIET(lock, NULL); + return (0); +} + +int +__lock_id(env, idp, lkp) + ENV *env; + u_int32_t *idp; + DB_LOCKER **lkp; +{ + COMPQUIET(env, NULL); + COMPQUIET(idp, NULL); + COMPQUIET(lkp, NULL); + return (0); +} + +int +__lock_inherit_timeout(env, parent, locker) + ENV *env; + DB_LOCKER *parent, *locker; +{ + COMPQUIET(env, NULL); + COMPQUIET(parent, NULL); + COMPQUIET(locker, NULL); + return (0); +} + +int +__lock_set_timeout(env, locker, timeout, op) + ENV *env; + DB_LOCKER *locker; + db_timeout_t timeout; + u_int32_t op; +{ + COMPQUIET(env, NULL); + COMPQUIET(locker, NULL); + COMPQUIET(timeout, 0); + COMPQUIET(op, 0); + return (0); +} + +int +__lock_addfamilylocker(env, pid, id, is_family) + ENV *env; + u_int32_t pid, id, is_family; +{ + COMPQUIET(env, NULL); + COMPQUIET(pid, 0); + COMPQUIET(id, 0); + COMPQUIET(is_family, 0); + return (0); +} + +int +__lock_freelocker(lt, sh_locker) + DB_LOCKTAB *lt; + DB_LOCKER *sh_locker; +{ + COMPQUIET(lt, NULL); + COMPQUIET(sh_locker, NULL); + return (0); +} + +int +__lock_familyremove(lt, sh_locker) + DB_LOCKTAB *lt; + DB_LOCKER *sh_locker; +{ + COMPQUIET(lt, NULL); + COMPQUIET(sh_locker, NULL); + return (0); +} + +int +__lock_downgrade(env, lock, new_mode, flags) + ENV *env; + DB_LOCK *lock; + db_lockmode_t new_mode; + u_int32_t flags; +{ + COMPQUIET(env, NULL); + COMPQUIET(lock, NULL); + COMPQUIET(new_mode, 0); + COMPQUIET(flags, 0); + return (0); +} + +int +__lock_locker_same_family(env, locker1, locker2, retp) + ENV *env; + DB_LOCKER *locker1; + DB_LOCKER *locker2; + int *retp; +{ + COMPQUIET(env, NULL); + COMPQUIET(locker1, NULL); + COMPQUIET(locker2, NULL); + + *retp = 1; + return (0); +} + +void +__lock_set_thread_id(lref, pid, tid) + void *lref; + pid_t pid; + db_threadid_t tid; +{ + COMPQUIET(lref, NULL); + COMPQUIET(pid, 0); + COMPQUIET(tid, 0); +} + +int +__lock_failchk(env) + ENV *env; +{ + COMPQUIET(env, NULL); + return (0); +} + +int +__lock_get_list(env, locker, flags, lock_mode, list) + ENV *env; + DB_LOCKER *locker; + u_int32_t flags; + db_lockmode_t lock_mode; + DBT *list; +{ + COMPQUIET(env, NULL); + COMPQUIET(locker, NULL); + COMPQUIET(flags, 0); + COMPQUIET(lock_mode, 0); + COMPQUIET(list, NULL); + return (0); +} + +void +__lock_list_print(env, mbp, list) + ENV *env; + DB_MSGBUF *mbp; + DBT *list; +{ + COMPQUIET(env, NULL); + COMPQUIET(list, NULL); +} + +int +__lock_getlocker(lt, locker, create, retp) + DB_LOCKTAB *lt; + u_int32_t locker; + int create; + DB_LOCKER **retp; +{ + COMPQUIET(locker, 0); + COMPQUIET(create, 0); + COMPQUIET(retp, NULL); + return (__db_nolocking(lt->env)); +} + +int +__lock_id_set(env, cur_id, max_id) + ENV *env; + u_int32_t cur_id, max_id; +{ + COMPQUIET(env, NULL); + COMPQUIET(cur_id, 0); + COMPQUIET(max_id, 0); + return (0); +} + +int +__lock_wakeup(env, obj) + ENV *env; + const DBT *obj; +{ + COMPQUIET(obj, NULL); + return (__db_nolocking(env)); +} + + +int +__lock_change(env, old_lock, new_lock) + ENV *env; + DB_LOCK *old_lock, *new_lock; +{ + COMPQUIET(env, NULL); + COMPQUIET(old_lock, NULL); + COMPQUIET(new_lock, NULL); +} diff --git a/src/lock/lock_timer.c b/src/lock/lock_timer.c new file mode 100644 index 00000000..a1c673df --- /dev/null +++ b/src/lock/lock_timer.c @@ -0,0 +1,128 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/lock.h" + +/* + * __lock_set_timeout -- + * Set timeout values in shared memory. + * + * This is called from the transaction system. We either set the time that + * this transaction expires or the amount of time a lock for this transaction + * is permitted to wait. + * + * PUBLIC: int __lock_set_timeout __P((ENV *, + * PUBLIC: DB_LOCKER *, db_timeout_t, u_int32_t)); + */ +int +__lock_set_timeout(env, locker, timeout, op) + ENV *env; + DB_LOCKER *locker; + db_timeout_t timeout; + u_int32_t op; +{ + int ret; + + if (locker == NULL) + return (0); + LOCK_REGION_LOCK(env); + ret = __lock_set_timeout_internal(env, locker, timeout, op); + LOCK_REGION_UNLOCK(env); + return (ret); +} + +/* + * __lock_set_timeout_internal + * -- set timeout values in shared memory. + * + * This is the internal version called from the lock system. We either set + * the time that this transaction expires or the amount of time that a lock + * for this transaction is permitted to wait. + * + * PUBLIC: int __lock_set_timeout_internal + * PUBLIC: __P((ENV *, DB_LOCKER *, db_timeout_t, u_int32_t)); + */ +int +__lock_set_timeout_internal(env, sh_locker, timeout, op) + ENV *env; + DB_LOCKER *sh_locker; + db_timeout_t timeout; + u_int32_t op; +{ + DB_LOCKREGION *region; + region = env->lk_handle->reginfo.primary; + + if (op == DB_SET_TXN_TIMEOUT) { + if (timeout == 0) + timespecclear(&sh_locker->tx_expire); + else + __clock_set_expires(env, + &sh_locker->tx_expire, timeout); + } else if (op == DB_SET_LOCK_TIMEOUT) { + sh_locker->lk_timeout = timeout; + F_SET(sh_locker, DB_LOCKER_TIMEOUT); + } else if (op == DB_SET_TXN_NOW) { + timespecclear(&sh_locker->tx_expire); + __clock_set_expires(env, &sh_locker->tx_expire, 0); + sh_locker->lk_expire = sh_locker->tx_expire; + if (!timespecisset(®ion->next_timeout) || + timespeccmp( + ®ion->next_timeout, &sh_locker->lk_expire, >)) + region->next_timeout = sh_locker->lk_expire; + } else + return (EINVAL); + + return (0); +} + +/* + * __lock_inherit_timeout + * -- inherit timeout values from parent locker. + * This is called from the transaction system. This will + * return EINVAL if the parent does not exist or did not + * have a current txn timeout set. + * + * PUBLIC: int __lock_inherit_timeout __P((ENV *, DB_LOCKER *, DB_LOCKER *)); + */ +int +__lock_inherit_timeout(env, parent, locker) + ENV *env; + DB_LOCKER *parent, *locker; +{ + int ret; + + ret = 0; + LOCK_REGION_LOCK(env); + + /* + * If the parent is not there yet, thats ok. If it + * does not have any timouts set, then avoid creating + * the child locker at this point. + */ + if (parent == NULL || + (timespecisset(&parent->tx_expire) && + !F_ISSET(parent, DB_LOCKER_TIMEOUT))) { + ret = EINVAL; + goto err; + } + + locker->tx_expire = parent->tx_expire; + + if (F_ISSET(parent, DB_LOCKER_TIMEOUT)) { + locker->lk_timeout = parent->lk_timeout; + F_SET(locker, DB_LOCKER_TIMEOUT); + if (!timespecisset(&parent->tx_expire)) + ret = EINVAL; + } + +err: LOCK_REGION_UNLOCK(env); + return (ret); +} diff --git a/src/lock/lock_util.c b/src/lock/lock_util.c new file mode 100644 index 00000000..bb1d775d --- /dev/null +++ b/src/lock/lock_util.c @@ -0,0 +1,98 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" +#include "dbinc/lock.h" + +/* + * The next two functions are the hash functions used to store objects in the + * lock hash tables. They are hashing the same items, but one (__lock_ohash) + * takes a DBT (used for hashing a parameter passed from the user) and the + * other (__lock_lhash) takes a DB_LOCKOBJ (used for hashing something that is + * already in the lock manager). In both cases, we have a special check to + * fast path the case where we think we are doing a hash on a DB page/fileid + * pair. If the size is right, then we do the fast hash. + * + * We know that DB uses DB_LOCK_ILOCK types for its lock objects. The first + * four bytes are the 4-byte page number and the next DB_FILE_ID_LEN bytes + * are a unique file id, where the first 4 bytes on UNIX systems are the file + * inode number, and the first 4 bytes on Windows systems are the FileIndexLow + * bytes. This is followed by a random number. The inode values tend + * to increment fairly slowly and are not good for hashing. So, we use + * the XOR of the page number and the four bytes of the file id randome + * number to produce a 32-bit hash value. + * + * We have no particular reason to believe that this algorithm will produce + * a good hash, but we want a fast hash more than we want a good one, when + * we're coming through this code path. + */ +#define FAST_HASH(P) { \ + u_int32_t __h; \ + u_int8_t *__cp, *__hp; \ + __hp = (u_int8_t *)&__h; \ + __cp = (u_int8_t *)(P); \ + __hp[0] = __cp[0] ^ __cp[12]; \ + __hp[1] = __cp[1] ^ __cp[13]; \ + __hp[2] = __cp[2] ^ __cp[14]; \ + __hp[3] = __cp[3] ^ __cp[15]; \ + return (__h); \ +} + +/* + * __lock_ohash -- + * + * PUBLIC: u_int32_t __lock_ohash __P((const DBT *)); + */ +u_int32_t +__lock_ohash(dbt) + const DBT *dbt; +{ + if (dbt->size == sizeof(DB_LOCK_ILOCK)) + FAST_HASH(dbt->data); + + return (__ham_func5(NULL, dbt->data, dbt->size)); +} + +/* + * __lock_lhash -- + * + * PUBLIC: u_int32_t __lock_lhash __P((DB_LOCKOBJ *)); + */ +u_int32_t +__lock_lhash(lock_obj) + DB_LOCKOBJ *lock_obj; +{ + void *obj_data; + + obj_data = SH_DBT_PTR(&lock_obj->lockobj); + + if (lock_obj->lockobj.size == sizeof(DB_LOCK_ILOCK)) + FAST_HASH(obj_data); + + return (__ham_func5(NULL, obj_data, lock_obj->lockobj.size)); +} + +/* + * __lock_nomem -- + * Report a lack of some resource. + * + * PUBLIC: int __lock_nomem __P((ENV *, const char *)); + */ +int +__lock_nomem(env, res) + ENV *env; + const char *res; +{ + __db_errx(env, DB_STR_A("2055", "Lock table is out of available %s", + "%s"), res); + return (ENOMEM); +} diff --git a/src/log/log.c b/src/log/log.c new file mode 100644 index 00000000..2d71165c --- /dev/null +++ b/src/log/log.c @@ -0,0 +1,1685 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/hmac.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +static int __log_init __P((ENV *, DB_LOG *)); +static int __log_recover __P((DB_LOG *)); + +/* + * __log_open -- + * Internal version of log_open: only called from ENV->open. + * + * PUBLIC: int __log_open __P((ENV *)); + */ +int +__log_open(env) + ENV *env; +{ + DB_ENV *dbenv; + DB_LOG *dblp; + LOG *lp; + u_int8_t *bulk; + int region_locked, ret; + + dbenv = env->dbenv; + region_locked = 0; + + /* Create/initialize the DB_LOG structure. */ + if ((ret = __os_calloc(env, 1, sizeof(DB_LOG), &dblp)) != 0) + return (ret); + dblp->env = env; + + /* Join/create the log region. */ + if ((ret = __env_region_share(env, &dblp->reginfo)) != 0) + goto err; + + /* If we created the region, initialize it. */ + if (F_ISSET(&dblp->reginfo, REGION_CREATE)) + if ((ret = __log_init(env, dblp)) != 0) + goto err; + + /* Set the local addresses. */ + lp = dblp->reginfo.primary = R_ADDR(&dblp->reginfo, + ((REGENV *)env->reginfo->primary)->lg_primary); + dblp->bufp = R_ADDR(&dblp->reginfo, lp->buffer_off); + + /* + * If the region is threaded, we have to lock the DBREG list, and we + * need to allocate a mutex for that purpose. + */ + if ((ret = __mutex_alloc(env, + MTX_LOG_REGION, DB_MUTEX_PROCESS_ONLY, &dblp->mtx_dbreg)) != 0) + goto err; + + /* + * Set the handle -- we may be about to run recovery, which allocates + * log cursors. Log cursors require logging be already configured, + * and the handle being set is what demonstrates that. + * + * If we created the region, run recovery. If that fails, make sure + * we reset the log handle before cleaning up, otherwise we will try + * and clean up again in the mainline ENV initialization code. + */ + env->lg_handle = dblp; + + if (F_ISSET(&dblp->reginfo, REGION_CREATE)) { + /* + * We first take the log file size from the environment, if + * specified. If that wasn't set, default it. Regardless, + * recovery may set it from the persistent information in a + * log file header. + */ + if (lp->log_size == 0) + lp->log_size = + FLD_ISSET(dbenv->lg_flags, DB_LOG_IN_MEMORY) ? + LG_MAX_INMEM : LG_MAX_DEFAULT; + + if ((ret = __log_recover(dblp)) != 0) + goto err; + + /* + * If the next log file size hasn't been set yet, default it + * to the current log file size. + */ + if (lp->log_nsize == 0) + lp->log_nsize = lp->log_size; + + /* + * If we haven't written any log files, write the first one + * so that checkpoint gets a valid ckp_lsn value. + */ + if (IS_INIT_LSN(lp->lsn) && + (ret = __log_newfile(dblp, NULL, 0, 0)) != 0) + goto err; + + /* + * Initialize replication's next-expected LSN value + * and replication's bulk buffer. In __env_open, we + * always create/open the replication region before + * the log region so we're assured that our rep_handle + * is valid at this point, if replication is being used. + */ + lp->ready_lsn = lp->lsn; + if (IS_ENV_REPLICATED(env)) { + if ((ret = + __env_alloc(&dblp->reginfo, MEGABYTE, &bulk)) != 0) + goto err; + lp->bulk_buf = R_OFFSET(&dblp->reginfo, bulk); + lp->bulk_len = MEGABYTE; + lp->bulk_off = 0; + lp->wait_ts = env->rep_handle->request_gap; + __os_gettime(env, &lp->rcvd_ts, 1); + } else { + lp->bulk_buf = INVALID_ROFF; + lp->bulk_len = 0; + lp->bulk_off = 0; + } + } else { + /* + * A process joining the region may have reset the log file + * size, too. If so, it only affects the next log file we + * create. We need to check that the size is reasonable given + * the buffer size in the region. + */ + LOG_SYSTEM_LOCK(env); + region_locked = 1; + + if (dbenv->lg_size != 0) { + if ((ret = + __log_check_sizes(env, dbenv->lg_size, 0)) != 0) + goto err; + + lp->log_nsize = dbenv->lg_size; + } + + LOG_SYSTEM_UNLOCK(env); + region_locked = 0; + + if (dbenv->lg_flags != 0 && (ret = + __log_set_config_int(dbenv, dbenv->lg_flags, 1, 0)) != 0) + return (ret); + } + dblp->reginfo.mtx_alloc = lp->mtx_region; + + return (0); + +err: if (dblp->reginfo.addr != NULL) { + if (region_locked) + LOG_SYSTEM_UNLOCK(env); + (void)__env_region_detach(env, &dblp->reginfo, 0); + } + env->lg_handle = NULL; + + (void)__mutex_free(env, &dblp->mtx_dbreg); + __os_free(env, dblp); + + return (ret); +} + +/* + * __log_init -- + * Initialize a log region in shared memory. + */ +static int +__log_init(env, dblp) + ENV *env; + DB_LOG *dblp; +{ + DB_ENV *dbenv; + LOG *lp; + int ret; + void *p; + + dbenv = env->dbenv; + + /* + * This is the first point where we can validate the buffer size, + * because we know all three settings have been configured (file size, + * buffer size and the in-memory flag). + */ + if ((ret = + __log_check_sizes(env, dbenv->lg_size, dbenv->lg_bsize)) != 0) + return (ret); + + if ((ret = __env_alloc(&dblp->reginfo, + sizeof(*lp), &dblp->reginfo.primary)) != 0) + goto mem_err; + + ((REGENV *)env->reginfo->primary)->lg_primary = + R_OFFSET(&dblp->reginfo, dblp->reginfo.primary); + + lp = dblp->reginfo.primary; + memset(lp, 0, sizeof(*lp)); + + /* We share the region so we need the same mutex. */ + lp->mtx_region = ((REGENV *)env->reginfo->primary)->mtx_regenv; + + lp->fid_max = 0; + SH_TAILQ_INIT(&lp->fq); + lp->free_fid_stack = INVALID_ROFF; + lp->free_fids = lp->free_fids_alloced = 0; + + /* Initialize LOG LSNs. */ + INIT_LSN(lp->lsn); + INIT_LSN(lp->t_lsn); + + /* + * It's possible to be waiting for an LSN of [1][0], if a replication + * client gets the first log record out of order. An LSN of [0][0] + * signifies that we're not waiting. + */ + ZERO_LSN(lp->waiting_lsn); + + /* + * Log makes note of the fact that it ran into a checkpoint on + * startup if it did so, as a recovery optimization. A zero + * LSN signifies that it hasn't found one [yet]. + */ + ZERO_LSN(lp->cached_ckp_lsn); + + if ((ret = + __mutex_alloc(env, MTX_LOG_FILENAME, 0, &lp->mtx_filelist)) != 0) + return (ret); + if ((ret = __mutex_alloc(env, MTX_LOG_FLUSH, 0, &lp->mtx_flush)) != 0) + return (ret); + + /* Initialize the buffer. */ + if ((ret = __env_alloc(&dblp->reginfo, dbenv->lg_bsize, &p)) != 0) { +mem_err: __db_errx( env, DB_STR("2524", + "unable to allocate log region memory")); + return (ret); + } + lp->regionmax = dbenv->lg_regionmax; + lp->buffer_off = R_OFFSET(&dblp->reginfo, p); + lp->buffer_size = dbenv->lg_bsize; + lp->filemode = dbenv->lg_filemode; + lp->log_size = lp->log_nsize = dbenv->lg_size; + lp->stat.st_fileid_init = dbenv->lg_fileid_init; + + /* Initialize the commit Queue. */ + SH_TAILQ_INIT(&lp->free_commits); + SH_TAILQ_INIT(&lp->commits); + lp->ncommit = 0; + + /* Initialize the logfiles list for in-memory logs. */ + SH_TAILQ_INIT(&lp->logfiles); + SH_TAILQ_INIT(&lp->free_logfiles); + + /* + * Fill in the log's persistent header. Don't fill in the log file + * sizes, as they may change at any time and so have to be filled in + * as each log file is created. + */ + lp->persist.magic = DB_LOGMAGIC; + /* + * Don't use __log_set_version because env->dblp isn't set up yet. + */ + lp->persist.version = DB_LOGVERSION; + lp->persist.notused = 0; + env->lg_handle = dblp; + + /* Migrate persistent flags from the ENV into the region. */ + if (dbenv->lg_flags != 0 && + (ret = __log_set_config_int(dbenv, dbenv->lg_flags, 1, 1)) != 0) + return (ret); + + (void)time(&lp->timestamp); + return (0); +} + +/* + * __log_recover -- + * Recover a log. + */ +static int +__log_recover(dblp) + DB_LOG *dblp; +{ + DBT dbt; + DB_ENV *dbenv; + DB_LOGC *logc; + DB_LSN lsn; + ENV *env; + LOG *lp; + u_int32_t cnt, rectype; + int ret; + logfile_validity status; + + env = dblp->env; + dbenv = env->dbenv; + logc = NULL; + lp = dblp->reginfo.primary; + + /* + * Find a log file. If none exist, we simply return, leaving + * everything initialized to a new log. + */ + if ((ret = __log_find(dblp, 0, &cnt, &status)) != 0) + return (ret); + if (cnt == 0) { + if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) + __db_msg(env, DB_STR("2525", "No log files found")); + return (0); + } + + /* + * If the last file is an old, unreadable version, start a new + * file. Don't bother finding the end of the last log file; + * we assume that it's valid in its entirety, since the user + * should have shut down cleanly or run recovery before upgrading. + */ + if (status == DB_LV_OLD_UNREADABLE) { + lp->lsn.file = lp->s_lsn.file = cnt + 1; + lp->lsn.offset = lp->s_lsn.offset = 0; + goto skipsearch; + } + DB_ASSERT(env, + (status == DB_LV_NORMAL || status == DB_LV_OLD_READABLE)); + + /* + * We have the last useful log file and we've loaded any persistent + * information. Set the end point of the log past the end of the last + * file. Read the last file, looking for the last checkpoint and + * the log's end. + */ + lp->lsn.file = cnt + 1; + lp->lsn.offset = 0; + lsn.file = cnt; + lsn.offset = 0; + + /* + * Allocate a cursor and set it to the first record. This shouldn't + * fail, leave error messages on. + */ + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + F_SET(logc, DB_LOG_LOCKED); + memset(&dbt, 0, sizeof(dbt)); + if ((ret = __logc_get(logc, &lsn, &dbt, DB_SET)) != 0) + goto err; + + /* + * Read to the end of the file. This may fail at some point, so + * turn off error messages. + */ + F_SET(logc, DB_LOG_SILENT_ERR); + while (__logc_get(logc, &lsn, &dbt, DB_NEXT) == 0) { + if (dbt.size < sizeof(u_int32_t)) + continue; + LOGCOPY_32(env, &rectype, dbt.data); + if (rectype == DB___txn_ckp) + /* + * If we happen to run into a checkpoint, cache its + * LSN so that the transaction system doesn't have + * to walk this log file again looking for it. + */ + lp->cached_ckp_lsn = lsn; + } + F_CLR(logc, DB_LOG_SILENT_ERR); + + /* + * We now know where the end of the log is. Set the first LSN that + * we want to return to an application and the LSN of the last known + * record on disk. + */ + lp->lsn = lsn; + lp->s_lsn = lsn; + lp->lsn.offset += logc->len; + lp->s_lsn.offset += logc->len; + + /* Set up the current buffer information, too. */ + lp->len = logc->len; + lp->a_off = 0; + lp->b_off = 0; + lp->w_off = lp->lsn.offset; + +skipsearch: + if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) + __db_msg(env, DB_STR_A("2526", + "Finding last valid log LSN: file: %lu offset %lu", + "%lu %lu"), (u_long)lp->lsn.file, (u_long)lp->lsn.offset); + +err: if (logc != NULL) + (void)__logc_close(logc); + + return (ret); +} + +/* + * __log_find -- + * Try to find a log file. If find_first is set, valp will contain + * the number of the first readable log file, else it will contain the number + * of the last log file (which may be too old to read). + * + * PUBLIC: int __log_find __P((DB_LOG *, int, u_int32_t *, logfile_validity *)); + */ +int +__log_find(dblp, find_first, valp, statusp) + DB_LOG *dblp; + int find_first; + u_int32_t *valp; + logfile_validity *statusp; +{ + ENV *env; + LOG *lp; + logfile_validity logval_status, status; + struct __db_filestart *filestart; + u_int32_t clv, logval; + int cnt, fcnt, ret; + const char *dir; + char *c, **names, *p, *q; + + env = dblp->env; + lp = dblp->reginfo.primary; + logval_status = status = DB_LV_NONEXISTENT; + + /* Return a value of 0 as the log file number on failure. */ + *valp = 0; + + if (lp->db_log_inmemory) { + filestart = find_first ? + SH_TAILQ_FIRST(&lp->logfiles, __db_filestart) : + SH_TAILQ_LAST(&lp->logfiles, links, __db_filestart); + if (filestart != NULL) { + *valp = filestart->file; + logval_status = DB_LV_NORMAL; + } + *statusp = logval_status; + return (0); + } + + /* Find the directory name. */ + if ((ret = __log_name(dblp, 1, &p, NULL, 0)) != 0) { + __os_free(env, p); + return (ret); + } + if ((q = __db_rpath(p)) == NULL) + dir = PATH_DOT; + else { + *q = '\0'; + dir = p; + } + + /* Get the list of file names. */ +retry: if ((ret = __os_dirlist(env, dir, 0, &names, &fcnt)) != 0) { + __db_err(env, ret, "%s", dir); + __os_free(env, p); + return (ret); + } + + /* Search for a valid log file name. */ + for (cnt = fcnt, clv = logval = 0; --cnt >= 0;) { + if (strncmp(names[cnt], LFPREFIX, sizeof(LFPREFIX) - 1) != 0) + continue; + + /* + * Names of the form log\.[0-9]* are reserved for DB. Other + * names sharing LFPREFIX, such as "log.db", are legal. + */ + for (c = names[cnt] + sizeof(LFPREFIX) - 1; *c != '\0'; c++) + if (!isdigit((int)*c)) + break; + if (*c != '\0') + continue; + + /* + * Use atol, not atoi; if an "int" is 16-bits, the largest + * log file name won't fit. + */ + clv = (u_int32_t)atol(names[cnt] + (sizeof(LFPREFIX) - 1)); + + /* + * If searching for the first log file, we want to return the + * oldest log file we can read, or, if no readable log files + * exist, the newest log file we can't read (the crossover + * point between the old and new versions of the log file). + * + * If we're searching for the last log file, we want to return + * the newest log file, period. + * + * Readable log files should never precede unreadable log + * files, that would mean the admin seriously screwed up. + */ + if (find_first) { + if (logval != 0 && + status != DB_LV_OLD_UNREADABLE && clv > logval) + continue; + } else + if (logval != 0 && clv < logval) + continue; + + if ((ret = __log_valid(dblp, clv, 1, NULL, 0, + &status, NULL)) != 0) { + /* + * If we have raced with removal of a log file since + * the call to __os_dirlist, it may no longer exist. + * In that case, just go on to the next one. If we're + * at the end of the list, all of the log files we saw + * initially are gone and we need to get the list again. + */ + if (ret == ENOENT) { + ret = 0; + if (cnt == 0) { + __os_dirfree(env, names, fcnt); + goto retry; + } + continue; + } + __db_err(env, ret, DB_STR_A("2527", + "Invalid log file: %s", "%s"), names[cnt]); + goto err; + } + switch (status) { + case DB_LV_NONEXISTENT: + /* __log_valid never returns DB_LV_NONEXISTENT. */ + DB_ASSERT(env, 0); + break; + case DB_LV_INCOMPLETE: + /* + * The last log file may not have been initialized -- + * it's possible to create a log file but not write + * anything to it. If performing recovery (that is, + * if find_first isn't set), ignore the file, it's + * not interesting. If we're searching for the first + * log record, return the file (assuming we don't find + * something better), as the "real" first log record + * is likely to be in the log buffer, and we want to + * set the file LSN for our return. + */ + if (find_first) + goto found; + break; + case DB_LV_OLD_UNREADABLE: + /* + * If we're searching for the first log file, then we + * only want this file if we don't yet have a file or + * already have an unreadable file and this one is + * newer than that one. If we're searching for the + * last log file, we always want this file because we + * wouldn't be here if it wasn't newer than our current + * choice. + */ + if (!find_first || logval == 0 || + (status == DB_LV_OLD_UNREADABLE && clv > logval)) + goto found; + break; + case DB_LV_NORMAL: + case DB_LV_OLD_READABLE: +found: logval = clv; + logval_status = status; + break; + } + } + + *valp = logval; + +err: __os_dirfree(env, names, fcnt); + __os_free(env, p); + *statusp = logval_status; + + return (ret); +} + +/* + * log_valid -- + * Validate a log file. Returns an error code in the event of + * a fatal flaw in a the specified log file; returns success with + * a code indicating the currentness and completeness of the specified + * log file if it is not unexpectedly flawed (that is, if it's perfectly + * normal, if it's zero-length, or if it's an old version). + * + * PUBLIC: int __log_valid __P((DB_LOG *, u_int32_t, int, + * PUBLIC: DB_FH **, u_int32_t, logfile_validity *, u_int32_t *)); + */ +int +__log_valid(dblp, number, set_persist, fhpp, flags, statusp, versionp) + DB_LOG *dblp; + u_int32_t number; + int set_persist; + DB_FH **fhpp; + u_int32_t flags; + logfile_validity *statusp; + u_int32_t *versionp; +{ + DB_CIPHER *db_cipher; + DB_FH *fhp; + ENV *env; + HDR *hdr; + LOG *lp; + LOGP *persist; + logfile_validity status; + size_t hdrsize, nr, recsize; + int is_hmac, ret; + u_int8_t *tmp; + char *fname; + + env = dblp->env; + db_cipher = env->crypto_handle; + fhp = NULL; + persist = NULL; + status = DB_LV_NORMAL; + tmp = NULL; + + /* Return the file handle to our caller, on request */ + if (fhpp != NULL) + *fhpp = NULL; + + if (flags == 0) + flags = DB_OSO_RDONLY | DB_OSO_SEQ; + /* Try to open the log file. */ + if ((ret = __log_name(dblp, number, &fname, &fhp, flags)) != 0) { + __os_free(env, fname); + return (ret); + } + + hdrsize = HDR_NORMAL_SZ; + is_hmac = 0; + recsize = sizeof(LOGP); + if (CRYPTO_ON(env)) { + hdrsize = HDR_CRYPTO_SZ; + recsize = sizeof(LOGP); + recsize += db_cipher->adj_size(recsize); + is_hmac = 1; + } + if ((ret = __os_calloc(env, 1, recsize + hdrsize, &tmp)) != 0) + goto err; + + hdr = (HDR *)tmp; + persist = (LOGP *)(tmp + hdrsize); + + /* + * Try to read the header. This can fail if the log is truncated, or + * if we find a preallocated log file where the header has not yet been + * written, so we need to check whether the header is zero-filled. + */ + if ((ret = __os_read(env, fhp, tmp, recsize + hdrsize, &nr)) != 0 || + nr != recsize + hdrsize || + (hdr->len == 0 && persist->magic == 0 && persist->log_size == 0)) { + if (ret == 0) + status = DB_LV_INCOMPLETE; + else + /* + * The error was a fatal read error, not just an + * incompletely initialized log file. + */ + __db_err(env, ret, DB_STR_A("2528", + "ignoring log file: %s", "%s"), fname); + goto err; + } + + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + + /* + * Now we have to validate the persistent record. We have + * several scenarios we have to deal with: + * + * 1. User has crypto turned on: + * - They're reading an old, unencrypted log file + * . We will fail the record size match check below. + * - They're reading a current, unencrypted log file + * . We will fail the record size match check below. + * - They're reading an old, encrypted log file [NOT YET] + * . After decryption we'll fail the version check. [NOT YET] + * - They're reading a current, encrypted log file + * . We should proceed as usual. + * 2. User has crypto turned off: + * - They're reading an old, unencrypted log file + * . We will fail the version check. + * - They're reading a current, unencrypted log file + * . We should proceed as usual. + * - They're reading an old, encrypted log file [NOT YET] + * . We'll fail the magic number check (it is encrypted). + * - They're reading a current, encrypted log file + * . We'll fail the magic number check (it is encrypted). + */ + if (CRYPTO_ON(env)) { + /* + * If we are trying to decrypt an unencrypted log + * we can only detect that by having an unreasonable + * data length for our persistent data. + */ + if ((hdr->len - hdrsize) != sizeof(LOGP)) { + __db_errx(env, "log record size mismatch"); + goto err; + } + /* Check the checksum and decrypt. */ +#ifdef HAVE_LOG_CHECKSUM + if ((ret = __db_check_chksum(env, hdr, db_cipher, + &hdr->chksum[0], (u_int8_t *)persist, + hdr->len - hdrsize, is_hmac)) != 0) { + __db_errx(env, "log record checksum mismatch"); + goto err; + } +#endif + + if ((ret = db_cipher->decrypt(env, db_cipher->data, + &hdr->iv[0], (u_int8_t *)persist, hdr->len - hdrsize)) != 0) + goto err; + } + + /* Swap the header, if necessary. */ + if (LOG_SWAPPED(env)) { + /* + * If the magic number is not byte-swapped, we're looking at an + * old log that we can no longer read. + */ + if (persist->magic == DB_LOGMAGIC) { + __db_errx(env, DB_STR_A("2529", + "Ignoring log file: %s historic byte order", + "%s"), fname); + status = DB_LV_OLD_UNREADABLE; + goto err; + } + + __log_persistswap(persist); + } + + /* Validate the header. */ + if (persist->magic != DB_LOGMAGIC) { + __db_errx(env, DB_STR_A("2530", + "Ignoring log file: %s: magic number %lx, not %lx", + "%s %lx %lx"), fname, + (u_long)persist->magic, (u_long)DB_LOGMAGIC); + ret = EINVAL; + goto err; + } + + /* + * Set our status code to indicate whether the log file belongs to an + * unreadable or readable old version; leave it alone if and only if + * the log file version is the current one. + */ + if (persist->version > DB_LOGVERSION) { + /* This is a fatal error--the log file is newer than DB. */ + __db_errx(env, DB_STR_A("2531", + "Unacceptable log file %s: unsupported log version %lu", + "%s %lu"), fname, (u_long)persist->version); + ret = EINVAL; + goto err; + } else if (persist->version < DB_LOGOLDVER) { + status = DB_LV_OLD_UNREADABLE; + /* This is a non-fatal error, but give some feedback. */ + __db_errx(env, DB_STR_A("2532", + "Skipping log file %s: historic log version %lu", "%s %lu"), + fname, (u_long)persist->version); + /* + * We don't want to set persistent info based on an unreadable + * region, so jump to "err". + */ + goto err; + } else if (persist->version < DB_LOGVERSION) + status = DB_LV_OLD_READABLE; + + /* + * Only if we have a current log do we verify the checksum. We could + * not check the checksum before checking the magic and version because + * old log headers put the length and checksum in a different location. + * The checksum was calculated with the swapped byte order, so we need + * to check it with the same bytes. + */ + if (!CRYPTO_ON(env)) { + if (LOG_SWAPPED(env)) + __log_persistswap(persist); +#ifdef HAVE_LOG_CHECKSUM + if ((ret = __db_check_chksum(env, + hdr, db_cipher, &hdr->chksum[0], (u_int8_t *)persist, + hdr->len - hdrsize, is_hmac)) != 0) { + __db_errx(env, DB_STR("2533", + "log record checksum mismatch")); + goto err; + } +#endif + + if (LOG_SWAPPED(env)) + __log_persistswap(persist); + } + + /* + * If the log is readable so far and we're doing system initialization, + * set the region's persistent information based on the headers. + * + * Override the current log file size. + */ + if (set_persist) { + lp = dblp->reginfo.primary; + lp->log_size = persist->log_size; + lp->persist.version = persist->version; + } + if (versionp != NULL) + *versionp = persist->version; + +err: if (fname != NULL) + __os_free(env, fname); + if (ret == 0 && fhpp != NULL) + *fhpp = fhp; + else + /* Must close on error or if we only used it locally. */ + (void)__os_closehandle(env, fhp); + if (tmp != NULL) + __os_free(env, tmp); + + if (statusp != NULL) + *statusp = status; + + return (ret); +} + +/* + * __log_env_refresh -- + * Clean up after the log system on a close or failed open. + * + * PUBLIC: int __log_env_refresh __P((ENV *)); + */ +int +__log_env_refresh(env) + ENV *env; +{ + DB_LOG *dblp; + LOG *lp; + REGINFO *reginfo; + struct __fname *fnp; + struct __db_commit *commit; + struct __db_filestart *filestart; + int ret, t_ret; + + dblp = env->lg_handle; + reginfo = &dblp->reginfo; + lp = reginfo->primary; + ret = 0; + + /* + * Flush the log if it's private -- there's no Berkeley DB guarantee + * that this gets done, but in case the application has forgotten to + * flush for durability, it's the polite thing to do. + */ + if (F_ISSET(env, ENV_PRIVATE) && + (t_ret = __log_flush(env, NULL)) != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = __dbreg_close_files(env, 0)) != 0 && ret == 0) + ret = t_ret; + + /* + * After we close the files, check for any unlogged closes left in + * the shared memory queue. If we find any, try to log it, otherwise + * return the error. We cannot say the environment was closed + * cleanly. + */ + MUTEX_LOCK(env, lp->mtx_filelist); + SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) + if (F_ISSET(fnp, DB_FNAME_NOTLOGGED) && + (t_ret = __dbreg_close_id_int( + env, fnp, DBREG_CLOSE, 1)) != 0) + ret = t_ret; + MUTEX_UNLOCK(env, lp->mtx_filelist); + + /* + * If a private region, return the memory to the heap. Not needed for + * filesystem-backed or system shared memory regions, that memory isn't + * owned by any particular process. + */ + if (F_ISSET(env, ENV_PRIVATE)) { + reginfo->mtx_alloc = MUTEX_INVALID; + /* Discard the flush mutex. */ + if ((t_ret = + __mutex_free(env, &lp->mtx_flush)) != 0 && ret == 0) + ret = t_ret; + + /* Discard the buffer. */ + __env_alloc_free(reginfo, R_ADDR(reginfo, lp->buffer_off)); + + /* Discard stack of free file IDs. */ + if (lp->free_fid_stack != INVALID_ROFF) + __env_alloc_free(reginfo, + R_ADDR(reginfo, lp->free_fid_stack)); + + /* Discard the list of in-memory log file markers. */ + while ((filestart = SH_TAILQ_FIRST(&lp->logfiles, + __db_filestart)) != NULL) { + SH_TAILQ_REMOVE(&lp->logfiles, filestart, links, + __db_filestart); + __env_alloc_free(reginfo, filestart); + } + + while ((filestart = SH_TAILQ_FIRST(&lp->free_logfiles, + __db_filestart)) != NULL) { + SH_TAILQ_REMOVE(&lp->free_logfiles, filestart, links, + __db_filestart); + __env_alloc_free(reginfo, filestart); + } + + /* Discard commit queue elements. */ + while ((commit = SH_TAILQ_FIRST(&lp->free_commits, + __db_commit)) != NULL) { + SH_TAILQ_REMOVE(&lp->free_commits, commit, links, + __db_commit); + __env_alloc_free(reginfo, commit); + } + + /* Discard replication bulk buffer. */ + if (lp->bulk_buf != INVALID_ROFF) { + __env_alloc_free(reginfo, + R_ADDR(reginfo, lp->bulk_buf)); + lp->bulk_buf = INVALID_ROFF; + } + } + + /* Discard the per-thread DBREG mutex. */ + if ((t_ret = __mutex_free(env, &dblp->mtx_dbreg)) != 0 && ret == 0) + ret = t_ret; + + /* Detach from the region. */ + if ((t_ret = __env_region_detach(env, reginfo, 0)) != 0 && ret == 0) + ret = t_ret; + + /* Close open files, release allocated memory. */ + if (dblp->lfhp != NULL) { + if ((t_ret = + __os_closehandle(env, dblp->lfhp)) != 0 && ret == 0) + ret = t_ret; + dblp->lfhp = NULL; + } + if (dblp->dbentry != NULL) + __os_free(env, dblp->dbentry); + + __os_free(env, dblp); + + env->lg_handle = NULL; + return (ret); +} + +/* + * __log_get_cached_ckp_lsn -- + * Retrieve any last checkpoint LSN that we may have found on startup. + * + * PUBLIC: int __log_get_cached_ckp_lsn __P((ENV *, DB_LSN *)); + */ +int +__log_get_cached_ckp_lsn(env, ckp_lsnp) + ENV *env; + DB_LSN *ckp_lsnp; +{ + DB_LOG *dblp; + LOG *lp; + + dblp = env->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + + LOG_SYSTEM_LOCK(env); + *ckp_lsnp = lp->cached_ckp_lsn; + LOG_SYSTEM_UNLOCK(env); + + return (0); +} + +/* + * __log_region_mutex_count -- + * Return the number of mutexes the log region will need. + * + * PUBLIC: u_int32_t __log_region_mutex_count __P((ENV *)); + */ +u_int32_t +__log_region_mutex_count(env) + ENV *env; +{ + /* + * We need a few assorted mutexes, and one per transaction waiting + * on the group commit list. We can't know how many that will be, + * but it should be bounded by the maximum active transactions. + */ + return (env->dbenv->tx_init + 5); +} + +/* + * __log_region_mutex_max -- + * Return the number of additional mutexes the log region will need. + * + * PUBLIC: u_int32_t __log_region_mutex_max __P((ENV *)); + */ +u_int32_t +__log_region_mutex_max(env) + ENV *env; +{ + DB_ENV *dbenv; + u_int32_t count; + + dbenv = env->dbenv; + + if ((count = dbenv->tx_max) == 0) + count = DEF_MAX_TXNS; + if (count < dbenv->tx_init) + return (0); + return (count - dbenv->tx_init); +} + +/* + * __log_region_size -- + * Return the amount of space needed for the log region. + * Make the region large enough to hold txn_max transaction + * detail structures plus some space to hold thread handles + * and the beginning of the alloc region and anything we + * need for mutex system resource recording. + * PUBLIC: size_t __log_region_size __P((ENV *)); + */ +size_t +__log_region_size(env) + ENV *env; +{ + DB_ENV *dbenv; + size_t s; + + dbenv = env->dbenv; + + /* Set the default buffer size, if not otherwise configured. */ + if (dbenv->lg_bsize == 0) + dbenv->lg_bsize = FLD_ISSET(dbenv->lg_flags, DB_LOG_IN_MEMORY) ? + LG_BSIZE_INMEM : LG_BSIZE_DEFAULT; + + s = dbenv->lg_bsize; + /* Allocate the initial fileid allocation, plus some path name space. */ + s += dbenv->lg_fileid_init * __env_alloc_size((sizeof(FNAME)) + 16); + + return (s); +} +/* + * __log_region_max -- + * Return the amount of extra memory to allocate for logging informaition. + * PUBLIC: size_t __log_region_max __P((ENV *)); + */ +size_t +__log_region_max(env) + ENV *env; +{ + + DB_ENV *dbenv; + size_t s; + + dbenv = env->dbenv; + if (dbenv->lg_fileid_init == 0) { + if ((s = dbenv->lg_regionmax) == 0) + s = LG_BASE_REGION_SIZE; + } else if ((s = dbenv->lg_regionmax) != 0 && + s < dbenv->lg_fileid_init * (__env_alloc_size(sizeof(FNAME)) + 16)) + s = 0; + else if (s != 0) + s -= dbenv->lg_fileid_init * + (__env_alloc_size(sizeof(FNAME)) + 16); + + return (s); +} + +/* + * __log_vtruncate + * This is a virtual truncate. We set up the log indicators to + * make everyone believe that the given record is the last one in the + * log. Returns with the next valid LSN (i.e., the LSN of the next + * record to be written). This is used in replication to discard records + * in the log file that do not agree with the master. + * + * PUBLIC: int __log_vtruncate __P((ENV *, DB_LSN *, DB_LSN *, DB_LSN *)); + */ +int +__log_vtruncate(env, lsn, ckplsn, trunclsn) + ENV *env; + DB_LSN *lsn, *ckplsn, *trunclsn; +{ + DBT log_dbt; + DB_LOG *dblp; + DB_LOGC *logc; + LOG *lp; + u_int32_t bytes, len; + size_t offset; + int ret, t_ret; + + /* Need to find out the length of this soon-to-be-last record. */ + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + memset(&log_dbt, 0, sizeof(log_dbt)); + ret = __logc_get(logc, lsn, &log_dbt, DB_SET); + len = logc->len; + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + return (ret); + + /* Now do the truncate. */ + dblp = env->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + + LOG_SYSTEM_LOCK(env); + + /* + * Flush the log so we can simply initialize the in-memory buffer + * after the truncate. + */ + if ((ret = __log_flush_int(dblp, NULL, 0)) != 0) + goto err; + + lp->lsn = *lsn; + lp->len = len; + lp->lsn.offset += lp->len; + + offset = lp->b_off; + if (lp->db_log_inmemory && (ret = + __log_inmem_lsnoff(dblp, &lp->lsn, &offset)) != 0) { + lp->b_off = (db_size_t)offset; + goto err; + } + lp->b_off = (db_size_t)offset; + + /* + * I am going to assume that the number of bytes written since + * the last checkpoint doesn't exceed a 32-bit number. + */ + DB_ASSERT(env, lp->lsn.file >= ckplsn->file); + bytes = 0; + if (ckplsn->file != lp->lsn.file) { + bytes = lp->log_size - ckplsn->offset; + if (lp->lsn.file > ckplsn->file + 1) + bytes += lp->log_size * + ((lp->lsn.file - ckplsn->file) - 1); + bytes += lp->lsn.offset; + } else + bytes = lp->lsn.offset - ckplsn->offset; + + lp->stat.st_wc_mbytes += bytes / MEGABYTE; + lp->stat.st_wc_bytes += bytes % MEGABYTE; + + /* + * If the synced lsn is greater than our new end of log, reset it + * to our current end of log. + */ + MUTEX_LOCK(env, lp->mtx_flush); + if (LOG_COMPARE(&lp->s_lsn, lsn) > 0) + lp->s_lsn = lp->lsn; + MUTEX_UNLOCK(env, lp->mtx_flush); + + /* Initialize the in-region buffer to a pristine state. */ + ZERO_LSN(lp->f_lsn); + lp->w_off = lp->lsn.offset; + + if (trunclsn != NULL) + *trunclsn = lp->lsn; + + /* Truncate the log to the new point. */ + if ((ret = __log_zero(env, &lp->lsn)) != 0) + goto err; + +err: LOG_SYSTEM_UNLOCK(env); + return (ret); +} + +/* + * __log_is_outdated -- + * Used by the replication system to identify if a client's logs are too + * old. + * + * PUBLIC: int __log_is_outdated __P((ENV *, u_int32_t, int *)); + */ +int +__log_is_outdated(env, fnum, outdatedp) + ENV *env; + u_int32_t fnum; + int *outdatedp; +{ + DB_LOG *dblp; + LOG *lp; + char *name; + int ret; + u_int32_t cfile; + struct __db_filestart *filestart; + + dblp = env->lg_handle; + + /* + * The log represented by env is compared to the file number passed + * in fnum. If the log file fnum does not exist and is lower-numbered + * than the current logs, return *outdatedp non-zero, else we return 0. + */ + if (FLD_ISSET(env->dbenv->lg_flags, DB_LOG_IN_MEMORY)) { + LOG_SYSTEM_LOCK(env); + lp = (LOG *)dblp->reginfo.primary; + filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); + *outdatedp = filestart == NULL ? 0 : (fnum < filestart->file); + LOG_SYSTEM_UNLOCK(env); + return (0); + } + + *outdatedp = 0; + if ((ret = __log_name(dblp, fnum, &name, NULL, 0)) != 0) { + __os_free(env, name); + return (ret); + } + + /* If the file exists, we're just fine. */ + if (__os_exists(env, name, NULL) == 0) + goto out; + + /* + * It didn't exist, decide if the file number is too big or + * too little. If it's too little, then we need to indicate + * that the LSN is outdated. + */ + LOG_SYSTEM_LOCK(env); + lp = (LOG *)dblp->reginfo.primary; + cfile = lp->lsn.file; + LOG_SYSTEM_UNLOCK(env); + + if (cfile > fnum) + *outdatedp = 1; +out: __os_free(env, name); + return (ret); +} + +/* + * __log_zero -- + * Zero out the tail of a log after a truncate. + * + * PUBLIC: int __log_zero __P((ENV *, DB_LSN *)); + */ +int +__log_zero(env, from_lsn) + ENV *env; + DB_LSN *from_lsn; +{ + DB_FH *fhp; + DB_LOG *dblp; + LOG *lp; + struct __db_filestart *filestart, *nextstart; + size_t nbytes, len, nw; + u_int32_t fn, mbytes, bytes; + u_int8_t buf[4096]; + int ret; + char *fname; + + dblp = env->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + DB_ASSERT(env, LOG_COMPARE(from_lsn, &lp->lsn) <= 0); + if (LOG_COMPARE(from_lsn, &lp->lsn) > 0) { + __db_errx(env, DB_STR("2534", + "Warning: truncating to point beyond end of log")); + return (0); + } + + if (lp->db_log_inmemory) { + /* + * Remove the files that are invalidated by this truncate. + */ + for (filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); + filestart != NULL; filestart = nextstart) { + nextstart = SH_TAILQ_NEXT(filestart, + links, __db_filestart); + if (filestart->file > from_lsn->file) { + SH_TAILQ_REMOVE(&lp->logfiles, + filestart, links, __db_filestart); + SH_TAILQ_INSERT_HEAD(&lp->free_logfiles, + filestart, links, __db_filestart); + } + } + + return (0); + } + + /* Close any open file handles so unlinks don't fail. */ + if (dblp->lfhp != NULL) { + (void)__os_closehandle(env, dblp->lfhp); + dblp->lfhp = NULL; + } + + /* Throw away any extra log files that we have around. */ + for (fn = from_lsn->file + 1;; fn++) { + if (__log_name(dblp, fn, &fname, &fhp, DB_OSO_RDONLY) != 0) { + __os_free(env, fname); + break; + } + (void)__os_closehandle(env, fhp); + (void)time(&lp->timestamp); + ret = __os_unlink(env, fname, 0); + __os_free(env, fname); + if (ret != 0) + return (ret); + } + + /* We removed some log files; have to 0 to end of file. */ + if ((ret = + __log_name(dblp, from_lsn->file, &fname, &dblp->lfhp, 0)) != 0) { + __os_free(env, fname); + return (ret); + } + __os_free(env, fname); + if ((ret = __os_ioinfo(env, + NULL, dblp->lfhp, &mbytes, &bytes, NULL)) != 0) + goto err; + DB_ASSERT(env, (mbytes * MEGABYTE + bytes) >= from_lsn->offset); + len = (mbytes * MEGABYTE + bytes) - from_lsn->offset; + + memset(buf, 0, sizeof(buf)); + + /* Initialize the write position. */ + if ((ret = __os_seek(env, dblp->lfhp, 0, 0, from_lsn->offset)) != 0) + goto err; + + while (len > 0) { + nbytes = len > sizeof(buf) ? sizeof(buf) : len; + if ((ret = + __os_write(env, dblp->lfhp, buf, nbytes, &nw)) != 0) + goto err; + len -= nbytes; + } + +err: (void)__os_closehandle(env, dblp->lfhp); + dblp->lfhp = NULL; + + return (ret); +} + +/* + * __log_inmem_lsnoff -- + * Find the offset in the buffer of a given LSN. + * + * PUBLIC: int __log_inmem_lsnoff __P((DB_LOG *, DB_LSN *, size_t *)); + */ +int +__log_inmem_lsnoff(dblp, lsnp, offsetp) + DB_LOG *dblp; + DB_LSN *lsnp; + size_t *offsetp; +{ + LOG *lp; + struct __db_filestart *filestart; + + lp = (LOG *)dblp->reginfo.primary; + + SH_TAILQ_FOREACH(filestart, &lp->logfiles, links, __db_filestart) + if (filestart->file == lsnp->file) { + *offsetp = (u_int32_t) + (filestart->b_off + lsnp->offset) % lp->buffer_size; + return (0); + } + + return (DB_NOTFOUND); +} + +/* + * __log_inmem_newfile -- + * Records the offset of the beginning of a new file in the in-memory + * buffer. + * + * PUBLIC: int __log_inmem_newfile __P((DB_LOG *, u_int32_t)); + */ +int +__log_inmem_newfile(dblp, file) + DB_LOG *dblp; + u_int32_t file; +{ + HDR hdr; + LOG *lp; + struct __db_filestart *filestart; + int ret; +#ifdef DIAGNOSTIC + struct __db_filestart *first, *last; +#endif + + lp = (LOG *)dblp->reginfo.primary; + + /* + * If the log buffer is empty, reuse the filestart entry. + */ + filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); + if (filestart != NULL && + RINGBUF_LEN(lp, filestart->b_off, lp->b_off) <= + sizeof(HDR) + sizeof(LOGP)) { + filestart->file = file; + filestart->b_off = lp->b_off; + return (0); + } + + /* + * We write an empty header at the end of every in-memory log file. + * This is used during cursor traversal to indicate when to switch the + * LSN to the next file. + */ + if (file > 1) { + memset(&hdr, 0, sizeof(HDR)); + __log_inmem_copyin(dblp, lp->b_off, &hdr, sizeof(HDR)); + lp->b_off = (lp->b_off + sizeof(HDR)) % lp->buffer_size; + } + + filestart = SH_TAILQ_FIRST(&lp->free_logfiles, __db_filestart); + if (filestart == NULL) { + if ((ret = __env_alloc(&dblp->reginfo, + sizeof(struct __db_filestart), &filestart)) != 0) + return (ret); + memset(filestart, 0, sizeof(*filestart)); + } else + SH_TAILQ_REMOVE(&lp->free_logfiles, filestart, + links, __db_filestart); + + filestart->file = file; + filestart->b_off = lp->b_off; + +#ifdef DIAGNOSTIC + first = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); + last = SH_TAILQ_LAST(&(lp)->logfiles, links, __db_filestart); + + /* Check that we don't wrap. */ + DB_ASSERT(dblp->env, !first || first == last || + RINGBUF_LEN(lp, first->b_off, lp->b_off) == + RINGBUF_LEN(lp, first->b_off, last->b_off) + + RINGBUF_LEN(lp, last->b_off, lp->b_off)); +#endif + + SH_TAILQ_INSERT_TAIL(&lp->logfiles, filestart, links); + return (0); +} + +/* + * __log_inmem_chkspace -- + * Ensure that the requested amount of space is available in the buffer, + * and invalidate the region. + * Note: assumes that the region lock is held on entry. + * + * PUBLIC: int __log_inmem_chkspace __P((DB_LOG *, size_t)); + */ +int +__log_inmem_chkspace(dblp, len) + DB_LOG *dblp; + size_t len; +{ + DB_LSN active_lsn, old_active_lsn; + ENV *env; + LOG *lp; + struct __db_filestart *filestart; + size_t offset; + int ret; + + env = dblp->env; + lp = dblp->reginfo.primary; + + DB_ASSERT(env, lp->db_log_inmemory); + + /* + * Allow room for an extra header so that we don't need to check for + * space when switching files. + */ + len += sizeof(HDR); + + /* + * If transactions are enabled and we're about to fill available space, + * update the active LSN and recheck. If transactions aren't enabled, + * don't even bother checking: in that case we can always overwrite old + * log records, because we're never going to abort. + */ + while (TXN_ON(env) && + RINGBUF_LEN(lp, lp->b_off, lp->a_off) <= len) { + old_active_lsn = lp->active_lsn; + active_lsn = lp->lsn; + + /* + * Drop the log region lock so we don't hold it while + * taking the transaction region lock. + */ + LOG_SYSTEM_UNLOCK(env); + ret = __txn_getactive(env, &active_lsn); + LOG_SYSTEM_LOCK(env); + if (ret != 0) + return (ret); + active_lsn.offset = 0; + + /* If we didn't make any progress, give up. */ + if (LOG_COMPARE(&active_lsn, &old_active_lsn) == 0) { + __db_errx(env, DB_STR("2535", +"In-memory log buffer is full (an active transaction spans the buffer)")); + return (DB_LOG_BUFFER_FULL); + } + + /* Make sure we're moving the region LSN forwards. */ + if (LOG_COMPARE(&active_lsn, &lp->active_lsn) > 0) { + lp->active_lsn = active_lsn; + offset = lp->a_off; + (void)__log_inmem_lsnoff(dblp, &active_lsn, &offset); + lp->a_off = (db_size_t)offset; + } + } + + /* + * Remove the first file if it is invalidated by this write. + * Log records can't be bigger than a file, so we only need to + * check the first file. + */ + filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); + if (filestart != NULL && + RINGBUF_LEN(lp, lp->b_off, filestart->b_off) <= len) { + SH_TAILQ_REMOVE(&lp->logfiles, filestart, + links, __db_filestart); + SH_TAILQ_INSERT_HEAD(&lp->free_logfiles, filestart, + links, __db_filestart); + lp->f_lsn.file = filestart->file + 1; + } + + return (0); +} + +/* + * __log_inmem_copyout -- + * Copies the given number of bytes from the buffer -- no checking. + * Note: assumes that the region lock is held on entry. + * + * PUBLIC: void __log_inmem_copyout __P((DB_LOG *, size_t, void *, size_t)); + */ +void +__log_inmem_copyout(dblp, offset, buf, size) + DB_LOG *dblp; + size_t offset; + void *buf; + size_t size; +{ + LOG *lp; + size_t nbytes; + + lp = (LOG *)dblp->reginfo.primary; + nbytes = (offset + size < lp->buffer_size) ? + size : lp->buffer_size - offset; + memcpy(buf, dblp->bufp + offset, nbytes); + if (nbytes < size) + memcpy((u_int8_t *)buf + nbytes, dblp->bufp, size - nbytes); +} + +/* + * __log_inmem_copyin -- + * Copies the given number of bytes into the buffer -- no checking. + * Note: assumes that the region lock is held on entry. + * + * PUBLIC: void __log_inmem_copyin __P((DB_LOG *, size_t, void *, size_t)); + */ +void +__log_inmem_copyin(dblp, offset, buf, size) + DB_LOG *dblp; + size_t offset; + void *buf; + size_t size; +{ + LOG *lp; + size_t nbytes; + + lp = (LOG *)dblp->reginfo.primary; + nbytes = (offset + size < lp->buffer_size) ? + size : lp->buffer_size - offset; + memcpy(dblp->bufp + offset, buf, nbytes); + if (nbytes < size) + memcpy(dblp->bufp, (u_int8_t *)buf + nbytes, size - nbytes); +} + +/* + * __log_set_version -- + * Sets the current version of the log subsystem to the given version. + * Essentially this modifies the lp->persist.version field in the + * shared memory region. Called when region is initially created + * and when replication is starting up or finds a new master. + * + * PUBLIC: void __log_set_version __P((ENV *, u_int32_t)); + */ +void +__log_set_version(env, newver) + ENV *env; + u_int32_t newver; +{ + DB_LOG *dblp; + LOG *lp; + + dblp = env->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + /* + * We should be able to update this atomically without locking. + */ + lp->persist.version = newver; +} + +/* + * __log_get_oldversion -- + * Returns the last version of log that this environment was working + * with. Since there could be several versions of log files, if + * the user upgraded and didn't log archive, we check the version + * of the first log file, compare it to the last log file. If those + * are different, then there is an older log existing, and we then + * walk backward in the log files looking for the version of the + * most recent older log file. + * + * PUBLIC: int __log_get_oldversion __P((ENV *, u_int32_t *)); + */ +int +__log_get_oldversion(env, ver) + ENV *env; + u_int32_t *ver; +{ + DBT rec; + DB_LOG *dblp; + DB_LOGC *logc; + DB_LSN lsn; + LOG *lp; + u_int32_t firstfnum, fnum, lastver, oldver; + int ret, t_ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + logc = NULL; + ret = 0; + oldver = DB_LOGVERSION; + /* + * If we're in-memory logs we're always the current version. + */ + if (lp->db_log_inmemory) { + *ver = oldver; + return (0); + } + memset(&rec, 0, sizeof(rec)); + if ((ret = __log_cursor(env, &logc)) != 0) + goto err; + /* + * Get the version numbers of the first and last log files. + */ + if ((ret = __logc_get(logc, &lsn, &rec, DB_FIRST)) != 0) { + /* + * If there is no log file, we'll get DB_NOTFOUND. + * If we get that, set the version to the current. + */ + if (ret == DB_NOTFOUND) + ret = 0; + goto err; + } + firstfnum = lsn.file; + if ((ret = __logc_get(logc, &lsn, &rec, DB_LAST)) != 0) + goto err; + if ((ret = __log_valid(dblp, firstfnum, 0, NULL, 0, + NULL, &oldver)) != 0) + goto err; + /* + * If the first and last LSN are in the same file, then we + * already have the version in oldver. Return it. + */ + if (firstfnum == lsn.file) + goto err; + + /* + * Otherwise they're in different files and we call __log_valid + * to get the version numbers in both files. + */ + if ((ret = __log_valid(dblp, lsn.file, 0, NULL, 0, + NULL, &lastver)) != 0) + goto err; + /* + * If the version numbers are different, walk backward getting + * the version of each log file until we find one that is + * different than the last. + */ + if (oldver != lastver) { + for (fnum = lsn.file - 1; fnum >= firstfnum; fnum--) { + if ((ret = __log_valid(dblp, fnum, 0, NULL, 0, + NULL, &oldver)) != 0) + goto err; + if (oldver != lastver) + break; + } + } +err: if (logc != NULL && ((t_ret = __logc_close(logc)) != 0) && ret == 0) + ret = t_ret; + if (ret == 0 && ver != NULL) + *ver = oldver; + return (ret); +} diff --git a/src/log/log_archive.c b/src/log/log_archive.c new file mode 100644 index 00000000..61c8596c --- /dev/null +++ b/src/log/log_archive.c @@ -0,0 +1,643 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/log.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +static int __absname __P((ENV *, char *, char *, char **)); +static int __build_data __P((ENV *, char *, char ***)); +static int __cmpfunc __P((const void *, const void *)); +static int __log_archive __P((ENV *, char **[], u_int32_t)); +static int __usermem __P((ENV *, char ***)); + +/* + * __log_archive_pp -- + * ENV->log_archive pre/post processing. + * + * PUBLIC: int __log_archive_pp __P((DB_ENV *, char **[], u_int32_t)); + */ +int +__log_archive_pp(dbenv, listp, flags) + DB_ENV *dbenv; + char ***listp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_archive", DB_INIT_LOG); + +#undef OKFLAGS +#define OKFLAGS (DB_ARCH_ABS | DB_ARCH_DATA | DB_ARCH_LOG | DB_ARCH_REMOVE) + if (flags != 0) { + if ((ret = __db_fchk( + env, "DB_ENV->log_archive", flags, OKFLAGS)) != 0) + return (ret); + if ((ret = __db_fcchk(env, "DB_ENV->log_archive", + flags, DB_ARCH_DATA, DB_ARCH_LOG)) != 0) + return (ret); + if ((ret = __db_fcchk(env, "DB_ENV->log_archive", + flags, DB_ARCH_REMOVE, + DB_ARCH_ABS | DB_ARCH_DATA | DB_ARCH_LOG)) != 0) + return (ret); + } + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_archive(env, listp, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_archive -- + * ENV->log_archive. Internal. + */ +static int +__log_archive(env, listp, flags) + ENV *env; + char ***listp; + u_int32_t flags; +{ + DBT rec; + DB_LOG *dblp; + DB_LOGC *logc; + DB_LSN stable_lsn; + LOG *lp; + u_int array_size, n; + u_int32_t fnum; + int handle_check, ret, t_ret; + char **array, **arrayp, *name, *p, *pref; +#ifdef HAVE_GETCWD + char path[DB_MAXPATHLEN]; +#endif + + dblp = env->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + array = NULL; + name = NULL; + ret = 0; + COMPQUIET(fnum, 0); + + if (flags != DB_ARCH_REMOVE) + *listp = NULL; + + /* There are no log files if logs are in memory. */ + if (lp->db_log_inmemory) { + LF_CLR(~DB_ARCH_DATA); + if (flags == 0) + return (0); + } + + /* + * Check if the user wants the list of log files to remove and we're + * at a bad time in replication initialization. + */ + handle_check = 0; + if (!LF_ISSET(DB_ARCH_DATA) && + !LF_ISSET(DB_ARCH_LOG)) { + /* + * If we're locked out, just return success. No files + * can be archived right now. Any other error pass back + * to the caller. + */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __archive_rep_enter(env)) != 0) { + if (ret == DB_REP_LOCKOUT) + ret = 0; + return (ret); + } + } + + /* + * Prepend the original absolute pathname if the user wants an + * absolute path to the database environment directory. + */ +#ifdef HAVE_GETCWD + if (LF_ISSET(DB_ARCH_ABS)) { + /* + * XXX + * Can't trust getcwd(3) to set a valid errno, so don't display + * one unless we know it's good. It's likely a permissions + * problem: use something bland and useless in the default + * return value, so we don't send somebody off in the wrong + * direction. + */ + __os_set_errno(0); + if (getcwd(path, sizeof(path)) == NULL) { + ret = __os_get_errno(); + __db_err(env, ret, DB_STR("2570", + "no absolute path for the current directory")); + goto err; + } + pref = path; + } else +#endif + pref = NULL; + + LF_CLR(DB_ARCH_ABS); + switch (flags) { + case DB_ARCH_DATA: + ret = __build_data(env, pref, listp); + goto err; + case DB_ARCH_LOG: + memset(&rec, 0, sizeof(rec)); + if ((ret = __log_cursor(env, &logc)) != 0) + goto err; +#ifdef UMRW + ZERO_LSN(stable_lsn); +#endif + ret = __logc_get(logc, &stable_lsn, &rec, DB_LAST); + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + fnum = stable_lsn.file; + break; + case DB_ARCH_REMOVE: + __log_autoremove(env); + goto err; + case 0: + + ret = __log_get_stable_lsn(env, &stable_lsn, 1); + /* + * A return of DB_NOTFOUND means the checkpoint LSN + * is before the beginning of the log files we have. + * This is not an error; it just means we're done. + */ + if (ret != 0) { + if (ret == DB_NOTFOUND) + ret = 0; + goto err; + } + /* Remove any log files before the last stable LSN. */ + fnum = stable_lsn.file - 1; + break; + default: + ret = __db_unknown_path(env, "__log_archive"); + goto err; + } + +#define LIST_INCREMENT 64 + /* Get some initial space. */ + array_size = 64; + if ((ret = __os_malloc(env, + sizeof(char *) * array_size, &array)) != 0) + goto err; + array[0] = NULL; + + /* Build an array of the file names. */ + for (n = 0; fnum > 0; --fnum) { + if ((ret = __log_name(dblp, fnum, &name, NULL, 0)) != 0) { + __os_free(env, name); + goto err; + } + if (__os_exists(env, name, NULL) != 0) { + __os_free(env, name); + name = NULL; + if (LF_ISSET(DB_ARCH_LOG) && fnum == stable_lsn.file) + continue; + break; + } + + if (n >= array_size - 2) { + array_size += LIST_INCREMENT; + if ((ret = __os_realloc(env, + sizeof(char *) * array_size, &array)) != 0) + goto err; + } + + if (pref != NULL) { + if ((ret = + __absname(env, pref, name, &array[n])) != 0) + goto err; + __os_free(env, name); + } else if ((p = __db_rpath(name)) != NULL) { + if ((ret = __os_strdup(env, p + 1, &array[n])) != 0) + goto err; + __os_free(env, name); + } else + array[n] = name; + + name = NULL; + array[++n] = NULL; + } + + /* If there's nothing to return, we're done. */ + if (n == 0) + goto err; + + /* Sort the list. */ + qsort(array, (size_t)n, sizeof(char *), __cmpfunc); + + /* Rework the memory. */ + if ((ret = __usermem(env, &array)) != 0) + goto err; + + if (listp != NULL) + *listp = array; + + if (0) { +err: if (array != NULL) { + for (arrayp = array; *arrayp != NULL; ++arrayp) + __os_free(env, *arrayp); + __os_free(env, array); + } + if (name != NULL) + __os_free(env, name); + } + if (handle_check && (t_ret = __archive_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __log_get_stable_lsn -- + * Get the stable lsn based on where checkpoints are. + * + * PUBLIC: int __log_get_stable_lsn __P((ENV *, DB_LSN *, int)); + */ +int +__log_get_stable_lsn(env, stable_lsn, group_wide) + ENV *env; + DB_LSN *stable_lsn; + int group_wide; +{ + DBT rec; + DB_LOGC *logc; + LOG *lp; + __txn_ckp_args *ckp_args; + int ret, t_ret; + + lp = env->lg_handle->reginfo.primary; + + ret = 0; + memset(&rec, 0, sizeof(rec)); + if (!TXN_ON(env)) { + if ((ret = __log_get_cached_ckp_lsn(env, stable_lsn)) != 0) + goto err; + /* + * No need to check for a return value of DB_NOTFOUND; + * __txn_findlastckp returns 0 if no checkpoint record + * is found. Instead of checking the return value, we + * check to see if the return LSN has been filled in. + */ + if (IS_ZERO_LSN(*stable_lsn) && (ret = + __txn_findlastckp(env, stable_lsn, NULL)) != 0) + goto err; + /* + * If the LSN has not been filled in return DB_NOTFOUND + * so that the caller knows it may be done. + */ + if (IS_ZERO_LSN(*stable_lsn)) { + ret = DB_NOTFOUND; + goto err; + } + } else if ((ret = __txn_getckp(env, stable_lsn)) != 0) + goto err; + if ((ret = __log_cursor(env, &logc)) != 0) + goto err; + /* + * Read checkpoint records until we find one that is on disk, + * then copy the ckp_lsn to the stable_lsn; + */ + while ((ret = __logc_get(logc, stable_lsn, &rec, DB_SET)) == 0 && + (ret = __txn_ckp_read(env, rec.data, &ckp_args)) == 0) { + if (stable_lsn->file < lp->s_lsn.file || + (stable_lsn->file == lp->s_lsn.file && + stable_lsn->offset < lp->s_lsn.offset)) { + *stable_lsn = ckp_args->ckp_lsn; + __os_free(env, ckp_args); + break; + } + *stable_lsn = ckp_args->last_ckp; + __os_free(env, ckp_args); + } + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; +#ifdef HAVE_REPLICATION_THREADS + /* + * If we have RepMgr, get the minimum group-aware LSN. + */ + if (group_wide && ret == 0 && REP_ON(env) && APP_IS_REPMGR(env) && + (t_ret = __repmgr_stable_lsn(env, stable_lsn)) != 0) + ret = t_ret; +#else + COMPQUIET(group_wide, 0); +#endif +err: + return (ret); +} + +/* + * __log_autoremove -- + * Delete any non-essential log files. + * + * PUBLIC: void __log_autoremove __P((ENV *)); + */ +void +__log_autoremove(env) + ENV *env; +{ + int ret; + char **begin, **list; + + /* + * Complain if there's an error, but don't return the error to our + * caller. Auto-remove is done when writing a log record, and we + * don't want to fail a write, which could fail the corresponding + * committing transaction, for a permissions error. + */ + if ((ret = __log_archive(env, &list, DB_ARCH_ABS)) != 0) { + if (ret != DB_NOTFOUND) + __db_err(env, ret, DB_STR("2571", + "log file auto-remove")); + return; + } + + /* Remove the files. */ + if (list != NULL) { + for (begin = list; *list != NULL; ++list) + (void)__os_unlink(env, *list, 0); + __os_ufree(env, begin); + } +} + +/* + * __build_data -- + * Build a list of datafiles for return. + */ +static int +__build_data(env, pref, listp) + ENV *env; + char *pref, ***listp; +{ + DBT rec; + DB_LOGC *logc; + DB_LSN lsn; + __dbreg_register_args *argp; + u_int array_size, last, n, nxt; + u_int32_t rectype; + int ret, t_ret; + char **array, **arrayp, **list, **lp, *p, *real_name; + + /* Get some initial space. */ + array_size = 64; + if ((ret = __os_malloc(env, + sizeof(char *) * array_size, &array)) != 0) + return (ret); + array[0] = NULL; + + memset(&rec, 0, sizeof(rec)); + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + for (n = 0; (ret = __logc_get(logc, &lsn, &rec, DB_PREV)) == 0;) { + if (rec.size < sizeof(rectype)) { + ret = EINVAL; + __db_errx(env, DB_STR("2572", + "DB_ENV->log_archive: bad log record")); + break; + } + + LOGCOPY_32(env, &rectype, rec.data); + if (rectype != DB___dbreg_register) + continue; + if ((ret = + __dbreg_register_read(env, rec.data, &argp)) != 0) { + ret = EINVAL; + __db_errx(env, DB_STR("2573", + "DB_ENV->log_archive: unable to read log record")); + break; + } + + if (n >= array_size - 2) { + array_size += LIST_INCREMENT; + if ((ret = __os_realloc(env, + sizeof(char *) * array_size, &array)) != 0) + goto free_continue; + } + + if ((ret = __os_strdup(env, + argp->name.data, &array[n++])) != 0) + goto free_continue; + array[n] = NULL; + + if (argp->ftype == DB_QUEUE) { + if ((ret = __qam_extent_names(env, + argp->name.data, &list)) != 0) + goto q_err; + for (lp = list; + lp != NULL && *lp != NULL; lp++) { + if (n >= array_size - 2) { + array_size += LIST_INCREMENT; + if ((ret = __os_realloc(env, + sizeof(char *) * + array_size, &array)) != 0) + goto q_err; + } + if ((ret = + __os_strdup(env, *lp, &array[n++])) != 0) + goto q_err; + array[n] = NULL; + } +q_err: if (list != NULL) + __os_free(env, list); + } +free_continue: __os_free(env, argp); + if (ret != 0) + break; + } + if (ret == DB_NOTFOUND) + ret = 0; + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err1; + + /* If there's nothing to return, we're done. */ + if (n == 0) { + ret = 0; + *listp = NULL; + goto err1; + } + + /* Sort the list. */ + qsort(array, (size_t)n, sizeof(char *), __cmpfunc); + + /* + * Build the real pathnames, discarding nonexistent files and + * duplicates. + */ + for (last = nxt = 0; nxt < n;) { + /* + * Discard duplicates. Last is the next slot we're going + * to return to the user, nxt is the next slot that we're + * going to consider. + */ + if (last != nxt) { + array[last] = array[nxt]; + array[nxt] = NULL; + } + for (++nxt; nxt < n && + strcmp(array[last], array[nxt]) == 0; ++nxt) { + __os_free(env, array[nxt]); + array[nxt] = NULL; + } + + /* Get the real name. */ + if ((ret = __db_appname(env, + DB_APP_DATA, array[last], NULL, &real_name)) != 0) + goto err2; + + /* If the file doesn't exist, ignore it. */ + if (__os_exists(env, real_name, NULL) != 0) { + __os_free(env, real_name); + __os_free(env, array[last]); + array[last] = NULL; + continue; + } + + /* Rework the name as requested by the user. */ + __os_free(env, array[last]); + array[last] = NULL; + if (pref != NULL) { + ret = __absname(env, pref, real_name, &array[last]); + __os_free(env, real_name); + if (ret != 0) + goto err2; + } else if ((p = __db_rpath(real_name)) != NULL) { + ret = __os_strdup(env, p + 1, &array[last]); + __os_free(env, real_name); + if (ret != 0) + goto err2; + } else + array[last] = real_name; + ++last; + } + + /* NULL-terminate the list. */ + array[last] = NULL; + + /* Rework the memory. */ + if ((ret = __usermem(env, &array)) != 0) + goto err1; + + *listp = array; + return (0); + +err2: /* + * XXX + * We've possibly inserted NULLs into the array list, so clean up a + * bit so that the other error processing works. + */ + if (array != NULL) + for (; nxt < n; ++nxt) + __os_free(env, array[nxt]); + /* FALLTHROUGH */ + +err1: if (array != NULL) { + for (arrayp = array; *arrayp != NULL; ++arrayp) + __os_free(env, *arrayp); + __os_free(env, array); + } + return (ret); +} + +/* + * __absname -- + * Return an absolute path name for the file. + */ +static int +__absname(env, pref, name, newnamep) + ENV *env; + char *pref, *name, **newnamep; +{ + size_t l_pref, l_name; + int isabspath, ret; + char *newname; + + l_name = strlen(name); + isabspath = __os_abspath(name); + l_pref = isabspath ? 0 : strlen(pref); + + /* Malloc space for concatenating the two. */ + if ((ret = __os_malloc(env, + l_pref + l_name + 2, &newname)) != 0) + return (ret); + *newnamep = newname; + + /* Build the name. If `name' is an absolute path, ignore any prefix. */ + if (!isabspath) { + memcpy(newname, pref, l_pref); + if (strchr(PATH_SEPARATOR, newname[l_pref - 1]) == NULL) + newname[l_pref++] = PATH_SEPARATOR[0]; + } + memcpy(newname + l_pref, name, l_name + 1); + + return (0); +} + +/* + * __usermem -- + * Create a single chunk of memory that holds the returned information. + * If the user has their own malloc routine, use it. + */ +static int +__usermem(env, listp) + ENV *env; + char ***listp; +{ + size_t len; + int ret; + char **array, **arrayp, **orig, *strp; + + /* Find out how much space we need. */ + for (len = 0, orig = *listp; *orig != NULL; ++orig) + len += sizeof(char *) + strlen(*orig) + 1; + len += sizeof(char *); + + /* Allocate it and set up the pointers. */ + if ((ret = __os_umalloc(env, len, &array)) != 0) + return (ret); + + strp = (char *)(array + (orig - *listp) + 1); + + /* Copy the original information into the new memory. */ + for (orig = *listp, arrayp = array; *orig != NULL; ++orig, ++arrayp) { + len = strlen(*orig); + memcpy(strp, *orig, len + 1); + *arrayp = strp; + strp += len + 1; + + __os_free(env, *orig); + } + + /* NULL-terminate the list. */ + *arrayp = NULL; + + __os_free(env, *listp); + *listp = array; + + return (0); +} + +static int +__cmpfunc(p1, p2) + const void *p1, *p2; +{ + return (strcmp(*((char * const *)p1), *((char * const *)p2))); +} diff --git a/src/log/log_compare.c b/src/log/log_compare.c new file mode 100644 index 00000000..4ae6b595 --- /dev/null +++ b/src/log/log_compare.c @@ -0,0 +1,66 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" + +/* + * log_compare -- + * Compare two LSN's; return 1, 0, -1 if first is >, == or < second. + * + * EXTERN: int log_compare __P((const DB_LSN *, const DB_LSN *)); + */ +int +log_compare(lsn0, lsn1) + const DB_LSN *lsn0, *lsn1; +{ + return (LOG_COMPARE(lsn0, lsn1)); +} + +/* + * __log_check_page_lsn -- + * Panic if the page's lsn in past the end of the current log. + * + * PUBLIC: int __log_check_page_lsn __P((ENV *, DB *, DB_LSN *)); + */ +int +__log_check_page_lsn(env, dbp, lsnp) + ENV *env; + DB *dbp; + DB_LSN *lsnp; +{ + LOG *lp; + int ret; + + lp = env->lg_handle->reginfo.primary; + LOG_SYSTEM_LOCK(env); + + ret = LOG_COMPARE(lsnp, &lp->lsn); + + LOG_SYSTEM_UNLOCK(env); + + if (ret < 0) + return (0); + + __db_errx(env, DB_STR_A("2506", + "file %s has LSN %lu/%lu, past end of log at %lu/%lu", + "%s %lu %lu %lu %lu"), + dbp == NULL || + dbp->fname == NULL ? DB_STR_P("unknown") : dbp->fname, + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)lp->lsn.file, (u_long)lp->lsn.offset); + __db_errx(env, DB_STR("2507", + "Commonly caused by moving a database from one database environment")); + __db_errx(env, DB_STR("2508", + "to another without clearing the database LSNs, or by removing all of")); + __db_errx(env, DB_STR("2509", + "the log files from a database environment")); + return (EINVAL); +} diff --git a/src/log/log_debug.c b/src/log/log_debug.c new file mode 100644 index 00000000..55afbe8e --- /dev/null +++ b/src/log/log_debug.c @@ -0,0 +1,146 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +static int __log_printf_int __P((ENV *, DB_TXN *, const char *, va_list)); + +/* + * __log_printf_capi -- + * Write a printf-style format string into the DB log. + * + * PUBLIC: int __log_printf_capi __P((DB_ENV *, DB_TXN *, const char *, ...)) + * PUBLIC: __attribute__ ((__format__ (__printf__, 3, 4))); + */ +int +#ifdef STDC_HEADERS +__log_printf_capi(DB_ENV *dbenv, DB_TXN *txnid, const char *fmt, ...) +#else +__log_printf_capi(dbenv, txnid, fmt, va_alist) + DB_ENV *dbenv; + DB_TXN *txnid; + const char *fmt; + va_dcl +#endif +{ + va_list ap; + int ret; + +#ifdef STDC_HEADERS + va_start(ap, fmt); +#else + va_start(ap); +#endif + ret = __log_printf_pp(dbenv, txnid, fmt, ap); + va_end(ap); + + return (ret); +} + +/* + * __log_printf_pp -- + * Handle the arguments and call an internal routine to do the work. + * + * The reason this routine isn't just folded into __log_printf_capi + * is because the C++ API has to call a C API routine, and you can + * only pass variadic arguments to a single routine. + * + * PUBLIC: int __log_printf_pp + * PUBLIC: __P((DB_ENV *, DB_TXN *, const char *, va_list)); + */ +int +__log_printf_pp(dbenv, txnid, fmt, ap) + DB_ENV *dbenv; + DB_TXN *txnid; + const char *fmt; + va_list ap; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_printf", DB_INIT_LOG); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_printf_int(env, txnid, fmt, ap)), 0, ret); + va_end(ap); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_printf -- + * Write a printf-style format string into the DB log. + * + * PUBLIC: int __log_printf __P((ENV *, DB_TXN *, const char *, ...)) + * PUBLIC: __attribute__ ((__format__ (__printf__, 3, 4))); + */ +int +#ifdef STDC_HEADERS +__log_printf(ENV *env, DB_TXN *txnid, const char *fmt, ...) +#else +__log_printf(env, txnid, fmt, va_alist) + ENV *env; + DB_TXN *txnid; + const char *fmt; + va_dcl +#endif +{ + va_list ap; + int ret; + +#ifdef STDC_HEADERS + va_start(ap, fmt); +#else + va_start(ap); +#endif + ret = __log_printf_int(env, txnid, fmt, ap); + va_end(ap); + + return (ret); +} + +/* + * __log_printf_int -- + * Write a printf-style format string into the DB log (internal). + */ +static int +__log_printf_int(env, txnid, fmt, ap) + ENV *env; + DB_TXN *txnid; + const char *fmt; + va_list ap; +{ + DBT opdbt, msgdbt; + DB_LSN lsn; + char __logbuf[2048]; /* !!!: END OF THE STACK DON'T TRUST SPRINTF. */ + + if (!DBENV_LOGGING(env)) { + __db_errx(env, DB_STR("2510", + "Logging not currently permitted")); + return (EAGAIN); + } + + memset(&opdbt, 0, sizeof(opdbt)); + opdbt.data = "DIAGNOSTIC"; + opdbt.size = sizeof("DIAGNOSTIC") - 1; + + memset(&msgdbt, 0, sizeof(msgdbt)); + msgdbt.data = __logbuf; + msgdbt.size = (u_int32_t)vsnprintf(__logbuf, sizeof(__logbuf), fmt, ap); + + return (__db_debug_log( + env, txnid, &lsn, 0, &opdbt, -1, &msgdbt, NULL, 0)); +} diff --git a/src/log/log_get.c b/src/log/log_get.c new file mode 100644 index 00000000..872f8b73 --- /dev/null +++ b/src/log/log_get.c @@ -0,0 +1,1615 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/hmac.h" +#include "dbinc/log.h" +#include "dbinc/hash.h" + +typedef enum { L_ALREADY, L_ACQUIRED, L_NONE } RLOCK; + +static int __logc_close_pp __P((DB_LOGC *, u_int32_t)); +static int __logc_get_pp __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); +static int __logc_get_int __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); +static int __logc_hdrchk __P((DB_LOGC *, DB_LSN *, HDR *, int *)); +static int __logc_incursor __P((DB_LOGC *, DB_LSN *, HDR *, u_int8_t **)); +static int __logc_inregion __P((DB_LOGC *, + DB_LSN *, RLOCK *, DB_LSN *, HDR *, u_int8_t **, int *)); +static int __logc_io __P((DB_LOGC *, + u_int32_t, u_int32_t, void *, size_t *, int *)); +static int __logc_ondisk __P((DB_LOGC *, + DB_LSN *, DB_LSN *, u_int32_t, HDR *, u_int8_t **, int *)); +static int __logc_set_maxrec __P((DB_LOGC *, char *)); +static int __logc_shortread __P((DB_LOGC *, DB_LSN *, int)); +static int __logc_version_pp __P((DB_LOGC *, u_int32_t *, u_int32_t)); + +/* + * __log_cursor_pp -- + * ENV->log_cursor + * + * PUBLIC: int __log_cursor_pp __P((DB_ENV *, DB_LOGC **, u_int32_t)); + */ +int +__log_cursor_pp(dbenv, logcp, flags) + DB_ENV *dbenv; + DB_LOGC **logcp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_cursor", DB_INIT_LOG); + + /* Validate arguments. */ + if ((ret = __db_fchk(env, "DB_ENV->log_cursor", flags, 0)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_cursor(env, logcp)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_cursor -- + * Create a log cursor. + * + * PUBLIC: int __log_cursor __P((ENV *, DB_LOGC **)); + */ +int +__log_cursor(env, logcp) + ENV *env; + DB_LOGC **logcp; +{ + DB_LOGC *logc; + int ret; + + *logcp = NULL; + + /* Allocate memory for the cursor. */ + if ((ret = __os_calloc(env, 1, sizeof(DB_LOGC), &logc)) != 0) + return (ret); + + logc->bp_size = LG_CURSOR_BUF_SIZE; + /* + * Set this to something positive. + */ + logc->bp_maxrec = MEGABYTE; + if ((ret = __os_malloc(env, logc->bp_size, &logc->bp)) != 0) { + __os_free(env, logc); + return (ret); + } + + logc->env = env; + logc->close = __logc_close_pp; + logc->get = __logc_get_pp; + logc->version = __logc_version_pp; + + *logcp = logc; + return (0); +} + +/* + * __logc_close_pp -- + * DB_LOGC->close pre/post processing. + */ +static int +__logc_close_pp(logc, flags) + DB_LOGC *logc; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = logc->env; + + if ((ret = __db_fchk(env, "DB_LOGC->close", flags, 0)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__logc_close(logc)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __logc_close -- + * DB_LOGC->close. + * + * PUBLIC: int __logc_close __P((DB_LOGC *)); + */ +int +__logc_close(logc) + DB_LOGC *logc; +{ + ENV *env; + + env = logc->env; + + if (logc->fhp != NULL) { + (void)__os_closehandle(env, logc->fhp); + logc->fhp = NULL; + } + + if (logc->dbt.data != NULL) + __os_free(env, logc->dbt.data); + + __os_free(env, logc->bp); + __os_free(env, logc); + + return (0); +} + +/* + * __logc_version_pp -- + * DB_LOGC->version. + */ +static int +__logc_version_pp(logc, versionp, flags) + DB_LOGC *logc; + u_int32_t *versionp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = logc->env; + + if ((ret = __db_fchk(env, "DB_LOGC->version", flags, 0)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__logc_version(logc, versionp)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __logc_version -- + * DB_LOGC->version. + * + * PUBLIC: int __logc_version __P((DB_LOGC *, u_int32_t *)); + */ +int +__logc_version(logc, versionp) + DB_LOGC *logc; + u_int32_t *versionp; +{ + DBT hdrdbt; + DB_LOGC *plogc; + DB_LSN plsn; + ENV *env; + LOGP *persist; + int ret, t_ret; + + env = logc->env; + if (IS_ZERO_LSN(logc->lsn)) { + __db_errx(env, DB_STR("2574", "DB_LOGC->get: unset cursor")); + return (EINVAL); + } + ret = 0; + /* + * Check if the persist info we have is for the same file + * as the current cursor position. If we already have the + * information, then we're done. If not, we open a new + * log cursor and get the header. + * + * Since most users walk forward through the log when + * using this feature (i.e. printlog) we're likely to + * have the information we need. + */ + if (logc->lsn.file != logc->p_lsn.file) { + if ((ret = __log_cursor(env, &plogc)) != 0) + return (ret); + plsn.file = logc->lsn.file; + plsn.offset = 0; + plogc->lsn = plsn; + memset(&hdrdbt, 0, sizeof(DBT)); + if ((ret = __logc_get_int(plogc, + &plsn, &hdrdbt, DB_SET)) == 0) { + persist = (LOGP *)hdrdbt.data; + if (LOG_SWAPPED(env)) + __log_persistswap(persist); + logc->p_lsn = logc->lsn; + logc->p_version = persist->version; + } + if ((t_ret = __logc_close(plogc)) != 0 && ret == 0) + ret = t_ret; + } + /* Return the version. */ + if (ret == 0) + *versionp = logc->p_version; + return (ret); +} + +/* + * __logc_get_pp -- + * DB_LOGC->get pre/post processing. + */ +static int +__logc_get_pp(logc, alsn, dbt, flags) + DB_LOGC *logc; + DB_LSN *alsn; + DBT *dbt; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = logc->env; + + /* Validate arguments. */ + switch (flags) { + case DB_CURRENT: + case DB_FIRST: + case DB_LAST: + case DB_NEXT: + case DB_PREV: + break; + case DB_SET: + if (IS_ZERO_LSN(*alsn)) { + __db_errx(env, DB_STR_A("2575", + "DB_LOGC->get: invalid LSN: %lu/%lu", "%lu %lu"), + (u_long)alsn->file, (u_long)alsn->offset); + return (EINVAL); + } + break; + default: + return (__db_ferr(env, "DB_LOGC->get", 1)); + } + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__logc_get(logc, alsn, dbt, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __logc_get -- + * DB_LOGC->get. + * + * PUBLIC: int __logc_get __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); + */ +int +__logc_get(logc, alsn, dbt, flags) + DB_LOGC *logc; + DB_LSN *alsn; + DBT *dbt; + u_int32_t flags; +{ + DB_LSN saved_lsn; + ENV *env; + LOGP *persist; + int ret; + + env = logc->env; + + /* + * On error, we take care not to overwrite the caller's LSN. This + * is because callers looking for the end of the log loop using the + * DB_NEXT flag, and expect to take the last successful lsn out of + * the passed-in structure after DB_LOGC->get fails with DB_NOTFOUND. + * + * !!! + * This line is often flagged an uninitialized memory read during a + * Purify or similar tool run, as the application didn't initialize + * *alsn. If the application isn't setting the DB_SET flag, there is + * no reason it should have initialized *alsn, but we can't know that + * and we want to make sure we never overwrite whatever the application + * put in there. + */ + saved_lsn = *alsn; + /* + * If we get one of the log's header records as a result of doing a + * DB_FIRST, DB_NEXT, DB_LAST or DB_PREV, repeat the operation, log + * file header records aren't useful to applications. + */ + if ((ret = __logc_get_int(logc, alsn, dbt, flags)) != 0) { + *alsn = saved_lsn; + return (ret); + } + /* + * The DBT was populated by the call to __logc_get_int, copy the data + * out of DB_DBT_USERMEM space if it is there. + */ + if ((ret = __dbt_usercopy(env, dbt)) != 0) + return (ret); + + if (alsn->offset == 0 && (flags == DB_FIRST || + flags == DB_NEXT || flags == DB_LAST || flags == DB_PREV)) { + switch (flags) { + case DB_FIRST: + flags = DB_NEXT; + break; + case DB_LAST: + flags = DB_PREV; + break; + case DB_NEXT: + case DB_PREV: + default: + break; + } + /* + * If we're walking the log and we find a persist header + * then store so that we may use it later if needed. + */ + persist = (LOGP *)dbt->data; + if (LOG_SWAPPED(env)) + __log_persistswap(persist); + logc->p_lsn = *alsn; + logc->p_version = persist->version; + if (F_ISSET(dbt, DB_DBT_MALLOC)) { + __os_free(env, dbt->data); + dbt->data = NULL; + } + if ((ret = __logc_get_int(logc, alsn, dbt, flags)) != 0) { + *alsn = saved_lsn; + goto err; + } + } + +err: __dbt_userfree(env, dbt, NULL, NULL); + return (ret); +} + +/* + * __logc_get_int -- + * Get a log record; internal version. + */ +static int +__logc_get_int(logc, alsn, dbt, flags) + DB_LOGC *logc; + DB_LSN *alsn; + DBT *dbt; + u_int32_t flags; +{ + DB_CIPHER *db_cipher; + DB_LOG *dblp; + DB_LSN last_lsn, nlsn; + ENV *env; + HDR hdr; + LOG *lp; + RLOCK rlock; + logfile_validity status; + u_int32_t cnt, logfsz, orig_flags; + u_int8_t *rp; + int eof, is_hmac, need_cksum, ret; + size_t blen; +#ifdef HAVE_LOG_CHECKSUM + u_int32_t i, logtype, version; + char chksumbuf[256]; + u_int8_t ch; +#endif + + env = logc->env; + db_cipher = env->crypto_handle; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + is_hmac = 0; + orig_flags = flags; /* flags may be altered later. */ + blen = 0; + logfsz = lp->persist.log_size; + + /* + * We don't acquire the log region lock until we need it, and we + * release it as soon as we're done. + */ + rlock = F_ISSET(logc, DB_LOG_LOCKED) ? L_ALREADY : L_NONE; + +#ifdef HAVE_LOG_CHECKSUM +nextrec: +#endif + nlsn = logc->lsn; + switch (flags) { + case DB_NEXT: /* Next log record. */ + if (!IS_ZERO_LSN(nlsn)) { + /* Increment the cursor by the cursor record size. */ + nlsn.offset += logc->len; + break; + } + flags = DB_FIRST; + /* FALLTHROUGH */ + case DB_FIRST: /* First log record. */ + /* Find the first log file. */ + if ((ret = __log_find(dblp, 1, &cnt, &status)) != 0) + goto err; + + /* + * DB_LV_INCOMPLETE: + * Theoretically, the log file we want could be created + * but not yet written, the "first" log record must be + * in the log buffer. + * DB_LV_NORMAL: + * DB_LV_OLD_READABLE: + * We found a log file we can read. + * DB_LV_NONEXISTENT: + * No log files exist, the "first" log record must be in + * the log buffer. + * DB_LV_OLD_UNREADABLE: + * No readable log files exist, we're at the cross-over + * point between two versions. The "first" log record + * must be in the log buffer. + */ + switch (status) { + case DB_LV_INCOMPLETE: + DB_ASSERT(env, lp->lsn.file == cnt); + /* FALLTHROUGH */ + case DB_LV_NORMAL: + case DB_LV_OLD_READABLE: + nlsn.file = cnt; + break; + case DB_LV_NONEXISTENT: + nlsn.file = 1; + DB_ASSERT(env, lp->lsn.file == nlsn.file); + break; + case DB_LV_OLD_UNREADABLE: + nlsn.file = cnt + 1; + DB_ASSERT(env, lp->lsn.file == nlsn.file); + break; + } + nlsn.offset = 0; + break; + case DB_CURRENT: /* Current log record. */ + break; + case DB_PREV: /* Previous log record. */ + if (!IS_ZERO_LSN(nlsn)) { + /* If at start-of-file, move to the previous file. */ + if (nlsn.offset == 0) { + if (nlsn.file == 1) { + ret = DB_NOTFOUND; + goto err; + } + if ((!lp->db_log_inmemory && + (__log_valid(dblp, nlsn.file - 1, 0, NULL, + 0, &status, NULL) != 0 || + (status != DB_LV_NORMAL && + status != DB_LV_OLD_READABLE)))) { + ret = DB_NOTFOUND; + goto err; + } + + --nlsn.file; + } + nlsn.offset = logc->prev; + break; + } + /* FALLTHROUGH */ + case DB_LAST: /* Last log record. */ + if (rlock == L_NONE) { + rlock = L_ACQUIRED; + LOG_SYSTEM_LOCK(env); + } + nlsn.file = lp->lsn.file; + nlsn.offset = lp->lsn.offset - lp->len; + break; + case DB_SET: /* Set log record. */ + nlsn = *alsn; + break; + default: + ret = __db_unknown_path(env, "__logc_get_int"); + goto err; + } + + if (0) { /* Move to the next file. */ +next_file: ++nlsn.file; + nlsn.offset = 0; + } + + /* + * The above switch statement should have set nlsn to the lsn of + * the requested record. + */ + + if (CRYPTO_ON(env)) { + hdr.size = HDR_CRYPTO_SZ; + is_hmac = 1; + } else { + hdr.size = HDR_NORMAL_SZ; + is_hmac = 0; + } + + /* + * Check to see if the record is in the cursor's buffer -- if so, + * we'll need to checksum it. + */ + if ((ret = __logc_incursor(logc, &nlsn, &hdr, &rp)) != 0) + goto err; + if (rp != NULL) + goto cksum; + + /* + * Look to see if we're moving backward in the log with the last record + * coming from the disk -- it means the record can't be in the region's + * buffer. Else, check the region's buffer. + * + * If the record isn't in the region's buffer, then either logs are + * in-memory, and we're done, or we're going to have to read the + * record from disk. We want to make a point of not reading past the + * end of the logical log (after recovery, there may be data after the + * end of the logical log, not to mention the log file may have been + * pre-allocated). So, zero out last_lsn, and initialize it inside + * __logc_inregion -- if it's still zero when we check it in + * __logc_ondisk, that's OK, it just means the logical end of the log + * isn't an issue for this request. + */ + ZERO_LSN(last_lsn); + if (!F_ISSET(logc, DB_LOG_DISK) || + LOG_COMPARE(&nlsn, &logc->lsn) > 0) { + F_CLR(logc, DB_LOG_DISK); + + if ((ret = __logc_inregion(logc, + &nlsn, &rlock, &last_lsn, &hdr, &rp, &need_cksum)) != 0) + goto err; + if (rp != NULL) { + /* + * If we read the entire record from the in-memory log + * buffer, we don't need to checksum it, nor do we need + * to worry about vtruncate issues. + */ + if (need_cksum) + goto cksum; + goto from_memory; + } + if (lp->db_log_inmemory) + goto nohdr; + } + + /* + * We have to read from an on-disk file to retrieve the record. + * If we ever can't retrieve the record at offset 0, we're done, + * return EOF/DB_NOTFOUND. + * + * Discard the region lock if we're still holding it, the on-disk + * reading routines don't need it. + */ + if (rlock == L_ACQUIRED) { + rlock = L_NONE; + LOG_SYSTEM_UNLOCK(env); + } + if ((ret = __logc_ondisk( + logc, &nlsn, &last_lsn, flags, &hdr, &rp, &eof)) != 0) + goto err; + + /* + * If we got a 0-length record, that means we're in the midst of some + * bytes that got 0'd as the result of a vtruncate. In that case or at + * the end of a file, with DB_NEXT we're going to have to retry. + */ + if (eof || hdr.len == 0) { +nohdr: switch (flags) { + case DB_LAST: + case DB_PREV: + /* + * We should never get here. If we recover a log + * file with 0's at the end, we'll treat the 0'd + * headers as the end of log and ignore them. If + * we're reading backwards from another file, then + * the first record in that new file should have its + * prev field set correctly. + */ + __db_errx(env, DB_STR("2576", + "Encountered zero length records while traversing backwards")); + ret = __env_panic(env, DB_RUNRECOVERY); + goto err; + case DB_FIRST: + case DB_NEXT: + /* + * Zero'd records always indicate the end of a file, + * but only go to the next file once. + */ + if (nlsn.offset != 0) + goto next_file; + /* FALLTHROUGH */ + case DB_SET: + default: + ret = DB_NOTFOUND; + goto err; + } + } + + F_SET(logc, DB_LOG_DISK); + +cksum: /* + * Discard the region lock if we're still holding it. (The path to + * get here is we acquired the region lock because of the caller's + * flag argument, but we found the record in the in-memory or cursor + * buffers. Improbable, but it's easy to avoid.) + */ + if (rlock == L_ACQUIRED) { + rlock = L_NONE; + LOG_SYSTEM_UNLOCK(env); + } +#ifdef HAVE_LOG_CHECKSUM + /* + * Checksum: there are two types of errors -- a configuration error + * or a checksum mismatch. The former is always bad. The latter is + * OK if we're searching for the end of the log, and very, very bad + * if we're reading random log records. + */ + if ((ret = __db_check_chksum(env, &hdr, db_cipher, + hdr.chksum, rp + hdr.size, hdr.len - hdr.size, is_hmac)) != 0) { + /* + * We may be dealing with a version that does not + * checksum the header. Try again without the header. + * Set the cursor to the LSN we are trying to look at. + */ + last_lsn = logc->lsn; + logc->lsn = nlsn; + if (__logc_version(logc, &version) == 0 && + version < DB_LOGCHKSUM && + __db_check_chksum(env, NULL, db_cipher, hdr.chksum, + rp + hdr.size, hdr.len - hdr.size, is_hmac) == 0) { + logc->lsn = last_lsn; + goto from_memory; + } + + /* + * If we are iterating logs during log verification and basic + * header info is correct, we can skip the failed log record + * and goto next one. + */ + if (F_ISSET(logc->env->lg_handle, DBLOG_VERIFYING) && + (orig_flags == DB_FIRST || orig_flags == DB_LAST || + orig_flags == DB_PREV || orig_flags == DB_NEXT) && + hdr.size > 0 && hdr.len > hdr.size && hdr.len < logfsz && + (((flags == DB_FIRST || flags == DB_NEXT) && + hdr.prev == last_lsn.offset) || + ((flags == DB_PREV || flags == DB_LAST) && + last_lsn.offset - hdr.len == nlsn.offset))) { + + flags = orig_flags; + + logc->lsn = nlsn; + logc->len = hdr.len; + logc->prev = hdr.prev; + + if (flags == DB_LAST) + flags = DB_PREV; + else if (flags == DB_FIRST) + flags = DB_NEXT; + + memset(chksumbuf, 0, 256); + blen = 0; + for (i = 0; i < DB_MAC_KEY && blen < 256; i++) { + ch = hdr.chksum[i]; + blen = strlen(chksumbuf); + snprintf(chksumbuf + blen, 255 - blen, + isprint(ch) || + ch == 0x0a ? "%c" : "%#x ", ch); + } + /* Type field is always the first one in the record. */ + memcpy(&logtype, rp + hdr.size, sizeof(logtype)); + __db_errx(env, DB_STR_A("2577", + "DB_LOGC->get: log record LSN %lu/%lu: " + "checksum mismatch, hdr.chksum: %s, hdr.prev: %u, " + "hdr.len: %u, log type: %u. Skipping it and " + "continuing with the %s one", + "%lu %lu %s %u %u %u %s"), + (u_long)nlsn.file, (u_long)nlsn.offset, chksumbuf, + hdr.prev, hdr.len, logtype, flags == DB_NEXT ? + DB_STR_P("next") : DB_STR_P("previous")); + goto nextrec; + } + + if (F_ISSET(logc, DB_LOG_SILENT_ERR)) { + if (ret == -1) + ret = EIO; + } else if (ret == -1) { + __db_errx(env, DB_STR_A("2578", + "DB_LOGC->get: log record LSN %lu/%lu: checksum mismatch", + "%lu %lu"), (u_long)nlsn.file, (u_long)nlsn.offset); + __db_errx(env, DB_STR("2579", + "DB_LOGC->get: catastrophic recovery may be required")); + ret = __env_panic(env, DB_RUNRECOVERY); + } + logc->lsn = last_lsn; + goto err; + } +#endif + +from_memory: + /* + * Discard the region lock if we're still holding it. (The path to + * get here is we acquired the region lock because of the caller's + * flag argument, but we found the record in the in-memory or cursor + * buffers. Improbable, but it's easy to avoid.) + */ + if (rlock == L_ACQUIRED) { + rlock = L_NONE; + LOG_SYSTEM_UNLOCK(env); + } + + /* Copy the record into the user's DBT. */ + if ((ret = __db_retcopy(env, dbt, rp + hdr.size, + (u_int32_t)(hdr.len - hdr.size), + &logc->dbt.data, &logc->dbt.ulen)) != 0) + goto err; + + if (CRYPTO_ON(env)) { + if ((ret = db_cipher->decrypt(env, db_cipher->data, + hdr.iv, dbt->data, hdr.len - hdr.size)) != 0) { + ret = EAGAIN; + goto err; + } + /* + * Return the original log record size to the user, + * even though we've allocated more than that, possibly. + * The log record is decrypted in the user dbt, not in + * the buffer, so we must do this here after decryption, + * not adjust the len passed to the __db_retcopy call. + */ + dbt->size = hdr.orig_size; + } + + /* Update the cursor and the returned LSN. */ + *alsn = nlsn; + logc->lsn = nlsn; + logc->len = hdr.len; + logc->prev = hdr.prev; + +err: if (rlock == L_ACQUIRED) + LOG_SYSTEM_UNLOCK(env); + + return (ret); +} + +/* + * __logc_incursor -- + * Check to see if the requested record is in the cursor's buffer. + */ +static int +__logc_incursor(logc, lsn, hdr, pp) + DB_LOGC *logc; + DB_LSN *lsn; + HDR *hdr; + u_int8_t **pp; +{ + ENV *env; + u_int8_t *p; + int eof; + + env = logc->env; + *pp = NULL; + + /* + * Test to see if the requested LSN could be part of the cursor's + * buffer. + * + * The record must be part of the same file as the cursor's buffer. + * The record must start at a byte offset equal to or greater than + * the cursor buffer. + * The record must not start at a byte offset after the cursor + * buffer's end. + */ + if (logc->bp_lsn.file != lsn->file) + return (0); + if (logc->bp_lsn.offset > lsn->offset) + return (0); + if (logc->bp_lsn.offset + logc->bp_rlen <= lsn->offset + hdr->size) + return (0); + + /* + * Read the record's header and check if the record is entirely held + * in the buffer. If the record is not entirely held, get it again. + * (The only advantage in having part of the record locally is that + * we might avoid a system call because we already have the HDR in + * memory.) + * + * If the header check fails for any reason, it must be because the + * LSN is bogus. Fail hard. + */ + p = logc->bp + (lsn->offset - logc->bp_lsn.offset); + memcpy(hdr, p, hdr->size); + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + if (__logc_hdrchk(logc, lsn, hdr, &eof)) + return (DB_NOTFOUND); + if (eof || logc->bp_lsn.offset + logc->bp_rlen < lsn->offset + hdr->len) + return (0); + + *pp = p; /* Success. */ + + return (0); +} + +/* + * __logc_inregion -- + * Check to see if the requested record is in the region's buffer. + */ +static int +__logc_inregion(logc, lsn, rlockp, last_lsn, hdr, pp, need_cksump) + DB_LOGC *logc; + DB_LSN *lsn, *last_lsn; + RLOCK *rlockp; + HDR *hdr; + u_int8_t **pp; + int *need_cksump; +{ + DB_LOG *dblp; + ENV *env; + LOG *lp; + size_t b_region, len, nr; + u_int32_t b_disk; + int eof, ret; + u_int8_t *p; + + env = logc->env; + dblp = env->lg_handle; + lp = env->lg_handle->reginfo.primary; + + ret = 0; + b_region = 0; + *pp = NULL; + *need_cksump = 0; + + /* If we haven't yet acquired the log region lock, do so. */ + if (*rlockp == L_NONE) { + *rlockp = L_ACQUIRED; + LOG_SYSTEM_LOCK(env); + } + + /* + * The routines to read from disk must avoid reading past the logical + * end of the log, so pass that information back to it. + * + * Since they're reading directly from the disk, they must also avoid + * reading past the offset we've written out. If the log was + * truncated, it's possible that there are zeroes or garbage on + * disk after this offset, and the logical end of the log can + * come later than this point if the log buffer isn't empty. + */ + *last_lsn = lp->lsn; + if (!lp->db_log_inmemory && last_lsn->offset > lp->w_off) + last_lsn->offset = lp->w_off; + + /* + * Test to see if the requested LSN could be part of the region's + * buffer. + * + * During recovery, we read the log files getting the information to + * initialize the region. In that case, the region's lsn field will + * not yet have been filled in, use only the disk. + * + * The record must not start at a byte offset after the region buffer's + * end, since that means the request is for a record after the end of + * the log. Do this test even if the region's buffer is empty -- after + * recovery, the log files may continue past the declared end-of-log, + * and the disk reading routine will incorrectly attempt to read the + * remainder of the log. + * + * Otherwise, test to see if the region's buffer actually has what we + * want: + * + * The buffer must have some useful content. + * The record must be in the same file as the region's buffer and must + * start at a byte offset equal to or greater than the region's buffer. + */ + if (IS_ZERO_LSN(lp->lsn)) + return (0); + if (LOG_COMPARE(lsn, &lp->lsn) >= 0) + return (DB_NOTFOUND); + else if (lp->db_log_inmemory) { + if ((ret = __log_inmem_lsnoff(dblp, lsn, &b_region)) != 0) + return (ret); + } else if (lp->b_off == 0 || LOG_COMPARE(lsn, &lp->f_lsn) < 0) + return (0); + + /* + * The current contents of the cursor's buffer will be useless for a + * future call, we're about to overwrite it -- trash it rather than + * try and make it look correct. + */ + logc->bp_rlen = 0; + + /* + * If the requested LSN is greater than the region buffer's first + * byte, we know the entire record is in the buffer on a good LSN. + * + * If we're given a bad LSN, the "entire" record might not be in + * our buffer in order to fail at the chksum. __logc_hdrchk made + * sure our dest buffer fits, via bp_maxrec, but we also need to + * make sure we don't run off the end of this buffer, the src. + * + * There is one case where the header check can fail: on a scan through + * in-memory logs, when we reach the end of a file we can read an empty + * header. In that case, it's safe to return zero, here: it will be + * caught in our caller. Otherwise, the LSN is bogus. Fail hard. + */ + if (lp->db_log_inmemory || LOG_COMPARE(lsn, &lp->f_lsn) > 0) { + if (!lp->db_log_inmemory) + b_region = lsn->offset - lp->w_off; + __log_inmem_copyout(dblp, b_region, hdr, hdr->size); + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + if (__logc_hdrchk(logc, lsn, hdr, &eof) != 0) + return (DB_NOTFOUND); + if (eof) + return (0); + if (lp->db_log_inmemory) { + if (RINGBUF_LEN(lp, b_region, lp->b_off) < hdr->len) + return (DB_NOTFOUND); + } else if (lsn->offset + hdr->len > lp->w_off + lp->buffer_size) + return (DB_NOTFOUND); + if (logc->bp_size <= hdr->len) { + len = (size_t)DB_ALIGN((uintmax_t)hdr->len * 2, 128); + if ((ret = + __os_realloc(logc->env, len, &logc->bp)) != 0) + return (ret); + logc->bp_size = (u_int32_t)len; + } + __log_inmem_copyout(dblp, b_region, logc->bp, hdr->len); + *pp = logc->bp; + return (0); + } + + DB_ASSERT(env, !lp->db_log_inmemory); + + /* + * There's a partial record, that is, the requested record starts + * in a log file and finishes in the region buffer. We have to + * find out how many bytes of the record are in the region buffer + * so we can copy them out into the cursor buffer. First, check + * to see if the requested record is the only record in the region + * buffer, in which case we should copy the entire region buffer. + * + * Else, walk back through the region's buffer to find the first LSN + * after the record that crosses the buffer boundary -- we can detect + * that LSN, because its "prev" field will reference the record we + * want. The bytes we need to copy from the region buffer are the + * bytes up to the record we find. The bytes we'll need to allocate + * to hold the log record are the bytes between the two offsets. + */ + b_disk = lp->w_off - lsn->offset; + if (lp->b_off <= lp->len) + b_region = (u_int32_t)lp->b_off; + else + for (p = dblp->bufp + (lp->b_off - lp->len);;) { + memcpy(hdr, p, hdr->size); + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + if (hdr->prev == lsn->offset) { + b_region = (u_int32_t)(p - dblp->bufp); + break; + } + p = dblp->bufp + (hdr->prev - lp->w_off); + } + + /* + * If we don't have enough room for the record, we have to allocate + * space. We have to do it while holding the region lock, which is + * truly annoying, but there's no way around it. This call is why + * we allocate cursor buffer space when allocating the cursor instead + * of waiting. + */ + if (logc->bp_size <= b_region + b_disk) { + len = (size_t)DB_ALIGN((uintmax_t)(b_region + b_disk) * 2, 128); + if ((ret = __os_realloc(logc->env, len, &logc->bp)) != 0) + return (ret); + logc->bp_size = (u_int32_t)len; + } + + /* Copy the region's bytes to the end of the cursor's buffer. */ + p = (logc->bp + logc->bp_size) - b_region; + memcpy(p, dblp->bufp, b_region); + + /* Release the region lock. */ + if (*rlockp == L_ACQUIRED) { + *rlockp = L_NONE; + LOG_SYSTEM_UNLOCK(env); + } + + /* + * Read the rest of the information from disk. Neither short reads + * or EOF are acceptable, the bytes we want had better be there. + */ + if (b_disk != 0) { + p -= b_disk; + nr = b_disk; + if ((ret = __logc_io( + logc, lsn->file, lsn->offset, p, &nr, NULL)) != 0) + return (ret); + if (nr < b_disk) + return (__logc_shortread(logc, lsn, 0)); + + /* We read bytes from the disk, we'll need to checksum them. */ + *need_cksump = 1; + } + + /* Copy the header information into the caller's structure. */ + memcpy(hdr, p, hdr->size); + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + + *pp = p; + return (0); +} + +/* + * __log_hdrswap -- + * Swap the bytes in a log header from machines with different endianness. + * + * PUBLIC: void __log_hdrswap __P((HDR *, int)); + */ +void +__log_hdrswap(hdr, is_hmac) + HDR *hdr; + int is_hmac; +{ + M_32_SWAP(hdr->prev); + M_32_SWAP(hdr->len); + if (!is_hmac) + P_32_SWAP(hdr->chksum); +} + +/* + * __log_persistswap -- + * Swap the bytes in a log file persistent header from machines with + * different endianness. + * + * PUBLIC: void __log_persistswap __P((LOGP *)); + */ +void +__log_persistswap(persist) + LOGP *persist; +{ + M_32_SWAP(persist->magic); + M_32_SWAP(persist->version); + M_32_SWAP(persist->log_size); + M_32_SWAP(persist->notused); +} + +/* + * __logc_ondisk -- + * Read a record off disk. + */ +static int +__logc_ondisk(logc, lsn, last_lsn, flags, hdr, pp, eofp) + DB_LOGC *logc; + DB_LSN *lsn, *last_lsn; + u_int32_t flags; + int *eofp; + HDR *hdr; + u_int8_t **pp; +{ + ENV *env; + size_t len, nr; + u_int32_t offset; + int ret; + + env = logc->env; + *eofp = 0; + + nr = hdr->size; + if ((ret = + __logc_io(logc, lsn->file, lsn->offset, hdr, &nr, eofp)) != 0) + return (ret); + if (*eofp) + return (0); + + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + + /* + * If the read was successful, but we can't read a full header, assume + * we've hit EOF. We can't check that the header has been partially + * zeroed out, but it's unlikely that this is caused by a write failure + * since the header is written as a single write call and it's less + * than sector. + */ + if (nr < hdr->size) { + *eofp = 1; + return (0); + } + + /* Check the HDR. */ + if ((ret = __logc_hdrchk(logc, lsn, hdr, eofp)) != 0) + return (ret); + if (*eofp) + return (0); + + /* + * Regardless of how we return, the previous contents of the cursor's + * buffer are useless -- trash it. + */ + logc->bp_rlen = 0; + + /* + * Otherwise, we now (finally!) know how big the record is. (Maybe + * we should have just stuck the length of the record into the LSN!?) + * Make sure we have enough space. + */ + if (logc->bp_size <= hdr->len) { + len = (size_t)DB_ALIGN((uintmax_t)hdr->len * 2, 128); + if ((ret = __os_realloc(env, len, &logc->bp)) != 0) + return (ret); + logc->bp_size = (u_int32_t)len; + } + + /* + * If we're moving forward in the log file, read this record in at the + * beginning of the buffer. Otherwise, read this record in at the end + * of the buffer, making sure we don't try and read before the start + * of the file. (We prefer positioning at the end because transaction + * aborts use DB_SET to move backward through the log and we might get + * lucky.) + * + * Read a buffer's worth, without reading past the logical EOF. The + * last_lsn may be a zero LSN, but that's OK, the test works anyway. + */ + if (flags == DB_FIRST || flags == DB_NEXT) + offset = lsn->offset; + else if (lsn->offset + hdr->len < logc->bp_size) + offset = 0; + else + offset = (lsn->offset + hdr->len) - logc->bp_size; + + nr = logc->bp_size; + if (lsn->file == last_lsn->file && offset + nr >= last_lsn->offset) + nr = last_lsn->offset - offset; + + if ((ret = + __logc_io(logc, lsn->file, offset, logc->bp, &nr, eofp)) != 0) + return (ret); + + /* + * We should have at least gotten the bytes up-to-and-including the + * record we're reading. + */ + if (nr < (lsn->offset + hdr->len) - offset) + return (__logc_shortread(logc, lsn, 1)); + + /* + * Set up the return information. + * + * !!! + * No need to set the bp_lsn.file field, __logc_io set it for us. + */ + logc->bp_rlen = (u_int32_t)nr; + logc->bp_lsn.offset = offset; + + *pp = logc->bp + (lsn->offset - offset); + + return (0); +} + +/* + * __logc_hdrchk -- + * + * Check for corrupted HDRs before we use them to allocate memory or find + * records. + * + * If the log files were pre-allocated, a zero-filled HDR structure is the + * logical file end. However, we can see buffers filled with 0's during + * recovery, too (because multiple log buffers were written asynchronously, + * and one made it to disk before a different one that logically precedes + * it in the log file. + * + * Check for impossibly large records. The malloc should fail later, but we + * have customers that run mallocs that treat all allocation failures as fatal + * errors. + * + * Note that none of this is necessarily something awful happening. We let + * the application hand us any LSN they want, and it could be a pointer into + * the middle of a log record, there's no way to tell. + */ +static int +__logc_hdrchk(logc, lsn, hdr, eofp) + DB_LOGC *logc; + DB_LSN *lsn; + HDR *hdr; + int *eofp; +{ + ENV *env; + int ret; + + env = logc->env; + + /* + * Check EOF before we do any other processing. + */ + if (eofp != NULL) { + if (hdr->prev == 0 && hdr->chksum[0] == 0 && hdr->len == 0) { + *eofp = 1; + return (0); + } + *eofp = 0; + } + + /* + * Sanity check the log record's size. + * We must check it after "virtual" EOF above. + */ + if (hdr->len <= hdr->size) + goto err; + + /* + * If the cursor's max-record value isn't yet set, it means we aren't + * reading these records from a log file and no check is necessary. + */ + if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec) { + /* + * If we fail the check, there's the pathological case that + * we're reading the last file, it's growing, and our initial + * check information was wrong. Get it again, to be sure. + */ + if ((ret = __logc_set_maxrec(logc, NULL)) != 0) { + __db_err(env, ret, "DB_LOGC->get"); + return (ret); + } + if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec) + goto err; + } + return (0); + +err: if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) + __db_errx(env, DB_STR_A("2580", + "DB_LOGC->get: LSN %lu/%lu: invalid log record header", + "%lu %lu"), (u_long)lsn->file, (u_long)lsn->offset); + return (EIO); +} + +/* + * __logc_io -- + * Read records from a log file. + */ +static int +__logc_io(logc, fnum, offset, p, nrp, eofp) + DB_LOGC *logc; + u_int32_t fnum, offset; + void *p; + size_t *nrp; + int *eofp; +{ + DB_LOG *dblp; + ENV *env; + LOG *lp; + int ret; + char *np; + + env = logc->env; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + /* + * If we've switched files, discard the current file handle and acquire + * a new one. + */ + if (logc->fhp != NULL && logc->bp_lsn.file != fnum) { + ret = __os_closehandle(env, logc->fhp); + logc->fhp = NULL; + logc->bp_lsn.file = 0; + + if (ret != 0) + return (ret); + } + if (logc->fhp == NULL) { + if ((ret = __log_name(dblp, fnum, + &np, &logc->fhp, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) { + /* + * If we're allowed to return EOF, assume that's the + * problem, set the EOF status flag and return 0. + */ + if (eofp != NULL) { + *eofp = 1; + ret = 0; + } else if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) + __db_err(env, ret, "DB_LOGC->get: %s", + np == NULL ? "__log_name failed" : np); + __os_free(env, np); + return (ret); + } + + if ((ret = __logc_set_maxrec(logc, np)) != 0) { + __db_err(env, ret, "DB_LOGC->get: %s", np); + __os_free(env, np); + return (ret); + } + __os_free(env, np); + + logc->bp_lsn.file = fnum; + } + + STAT_INC(env, log, read, lp->stat.st_rcount, fnum); + /* Seek to the record's offset and read the data. */ + if ((ret = __os_io(env, DB_IO_READ, + logc->fhp, 0, 0, offset, (u_int32_t)*nrp, p, nrp)) != 0) { + if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) + __db_err(env, ret, DB_STR_A("2581", + "DB_LOGC->get: LSN: %lu/%lu: read", "%lu %lu"), + (u_long)fnum, (u_long)offset); + return (ret); + } + + return (0); +} + +/* + * __logc_shortread -- + * Read was short -- return a consistent error message and error. + */ +static int +__logc_shortread(logc, lsn, check_silent) + DB_LOGC *logc; + DB_LSN *lsn; + int check_silent; +{ + if (!check_silent || !F_ISSET(logc, DB_LOG_SILENT_ERR)) + __db_errx(logc->env, DB_STR_A("2582", + "DB_LOGC->get: LSN: %lu/%lu: short read", "%lu %lu"), + (u_long)lsn->file, (u_long)lsn->offset); + return (EIO); +} + +/* + * __logc_set_maxrec -- + * Bound the maximum log record size in a log file. + */ +static int +__logc_set_maxrec(logc, np) + DB_LOGC *logc; + char *np; +{ + DB_LOG *dblp; + ENV *env; + LOG *lp; + u_int32_t mbytes, bytes; + int ret; + + env = logc->env; + dblp = env->lg_handle; + + /* + * We don't want to try and allocate huge chunks of memory because + * applications with error-checking malloc's often consider that a + * hard failure. If we're about to look at a corrupted record with + * a bizarre size, we need to know before trying to allocate space + * to hold it. We could read the persistent data at the beginning + * of the file but that's hard -- we may have to decrypt it, checksum + * it and so on. Stat the file instead. + */ + if (logc->fhp != NULL) { + if ((ret = __os_ioinfo(env, np, logc->fhp, + &mbytes, &bytes, NULL)) != 0) + return (ret); + if (logc->bp_maxrec < (mbytes * MEGABYTE + bytes)) + logc->bp_maxrec = mbytes * MEGABYTE + bytes; + } + + /* + * If reading from the log file currently being written, we could get + * an incorrect size, that is, if the cursor was opened on the file + * when it had only a few hundred bytes, and then the cursor used to + * move forward in the file, after more log records were written, the + * original stat value would be wrong. Use the maximum of the current + * log file size and the size of the buffer -- that should represent + * the max of any log record currently in the file. + * + * The log buffer size is set when the environment is opened and never + * changed, we don't need a lock on it. + */ + lp = dblp->reginfo.primary; + if (logc->bp_maxrec < lp->buffer_size) + logc->bp_maxrec = lp->buffer_size; + + return (0); +} + +/* + * PUBLIC: int __log_read_record_pp __P((DB_ENV *, DB **, void *, void *, + * PUBLIC: DB_LOG_RECSPEC *, u_int32_t, void **)); + */ +int +__log_read_record_pp(dbenv, dbpp, td, recbuf, spec, size, argpp) + DB_ENV *dbenv; + DB **dbpp; + void *td; + void *recbuf; + DB_LOG_RECSPEC *spec; + u_int32_t size; + void **argpp; +{ + DB_THREAD_INFO *ip; + int ret; + + ENV_REQUIRES_CONFIG(dbenv->env, + dbenv->env->lg_handle, "DB_ENV->log_read_record", DB_INIT_LOG); + + *argpp = NULL; + ENV_ENTER(dbenv->env, ip); + if ((ret = __os_umalloc(dbenv->env, size + sizeof(DB_TXN), argpp)) != 0) + goto done; + REPLICATION_WRAP(dbenv->env, (__log_read_record(dbenv->env, dbpp, + td, recbuf, spec, size, argpp)), 0, ret); + if (ret != 0) { + __os_ufree(dbenv->env, *argpp); + *argpp = NULL; + } +done: ENV_LEAVE(dbenv->env, ip); + return (ret); +} + +/* + * PUBLIC: int __log_read_record __P((ENV *, DB **, void *, void *, + * PUBLIC: DB_LOG_RECSPEC *, u_int32_t, void **)); + */ +int +__log_read_record(env, dbpp, td, recbuf, spec, size, argpp) + ENV *env; + DB **dbpp; + void *td; + void *recbuf; + DB_LOG_RECSPEC *spec; + u_int32_t size; + void **argpp; +{ + DB_LOG_RECSPEC *sp, *np; + DB_TXN *txnp; + LOG *lp; + PAGE *hdrstart; + u_int32_t hdrsize, op, uinttmp; + u_int8_t *ap, *bp; + int has_data, ret, downrev; + + COMPQUIET(has_data, 0); + COMPQUIET(hdrsize, 0); + COMPQUIET(hdrstart, NULL); + COMPQUIET(op, 0); + ap = *argpp; + /* + * Allocate space for the arg structure and a transaction + * structure which will imeediately follow it. + */ + if (ap == NULL && + (ret = __os_malloc(env, size + sizeof(DB_TXN), &ap)) != 0) + return (ret); + txnp = (DB_TXN *)(ap + size); + memset(txnp, 0, sizeof(DB_TXN)); + txnp->td = td; + lp = env->lg_handle->reginfo.primary; + downrev = lp->persist.version < DB_LOGVERSION_50; + + bp = recbuf; + + /* + * The first three fields are always the same in every arg + * struct so we know their offsets. + */ + /* type */ + LOGCOPY_32(env, ap + SSZ(LOG_REC_HEADER, type), bp); + bp += sizeof(u_int32_t); + + /* txnp */ + LOGCOPY_32(env, &txnp->txnid, bp); + *(DB_TXN **)(ap + SSZ(LOG_REC_HEADER, txnp)) = txnp; + bp += sizeof(txnp->txnid); + + /* Previous LSN */ + LOGCOPY_TOLSN(env, + (DB_LSN *)(ap + SSZ(LOG_REC_HEADER, prev_lsn)), bp); + bp += sizeof(DB_LSN); + + ret = 0; + for (sp = spec; sp->type != LOGREC_Done; sp++) { + switch (sp->type) { + case LOGREC_DB: + LOGCOPY_32(env, &uinttmp, bp); + *(u_int32_t*)(ap + sp->offset) = uinttmp; + bp += sizeof(uinttmp); + if (dbpp != NULL) { + *dbpp = NULL; + ret = __dbreg_id_to_db(env, + txnp, dbpp, (int32_t)uinttmp, 1); + } + break; + + case LOGREC_ARG: + case LOGREC_TIME: + case LOGREC_DBOP: + LOGCOPY_32(env, ap + sp->offset, bp); + bp += sizeof(uinttmp); + break; + case LOGREC_OP: + LOGCOPY_32(env, &op, bp); + *(u_int32_t *)(ap + sp->offset) = op; + bp += sizeof(uinttmp); + break; + case LOGREC_DBT: + case LOGREC_PGLIST: + case LOGREC_LOCKS: + case LOGREC_HDR: + case LOGREC_DATA: + case LOGREC_PGDBT: + case LOGREC_PGDDBT: + memset(ap + sp->offset, 0, sizeof(DBT)); + LOGCOPY_32(env, &uinttmp, bp); + *(u_int32_t*) + (ap + sp->offset + SSZ(DBT, size)) = uinttmp; + bp += sizeof(u_int32_t); + *(void **)(ap + sp->offset + SSZ(DBT, data)) = bp; + + /* Process fields that need to be byte swapped. */ + switch (sp->type) { + case LOGREC_DBT: + case LOGREC_PGLIST: + case LOGREC_LOCKS: + break; + case LOGREC_HDR: + if (uinttmp == 0) + break; + has_data = 0; + for (np = sp + 1; np->type != LOGREC_Done; np++) + if (np->type == LOGREC_DATA) { + has_data = 1; + break; + } + hdrstart = (PAGE *)bp; + hdrsize = uinttmp; + if (has_data == 1) + break; + /* FALLTHROUGH */ + case LOGREC_DATA: + if (downrev ? LOG_SWAPPED(env) : + (dbpp != NULL && *dbpp != NULL && + F_ISSET(*dbpp, DB_AM_SWAP))) + __db_recordswap(op, hdrsize, + hdrstart, has_data ? + ap + sp->offset : NULL, 1); + break; + case LOGREC_PGDBT: + has_data = 0; + for (np = sp + 1; np->type != LOGREC_Done; np++) + if (np->type == LOGREC_PGDDBT) { + has_data = 1; + break; + } + + hdrstart = (PAGE *)bp; + hdrsize = uinttmp; + if (has_data == 1) + break; + /* FALLTHROUGH */ + case LOGREC_PGDDBT: + if (dbpp != NULL && *dbpp != NULL && + (downrev ? LOG_SWAPPED(env) : + F_ISSET(*dbpp, DB_AM_SWAP)) && + (ret = __db_pageswap(env, *dbpp, hdrstart, + hdrsize, has_data == 0 ? NULL : + (DBT *)(ap + sp->offset), 1)) != 0) + return (ret); + break; + default: + DB_ASSERT(env, sp->type != sp->type); + } + + bp += uinttmp; + break; + + case LOGREC_POINTER: + LOGCOPY_TOLSN(env, (DB_LSN *)(ap + sp->offset), bp); + bp += sizeof(DB_LSN); + break; + + default: + DB_ASSERT(env, sp->type != sp->type); + } + } + + *argpp = ap; + return (ret); +} diff --git a/src/log/log_method.c b/src/log/log_method.c new file mode 100644 index 00000000..2b81b03f --- /dev/null +++ b/src/log/log_method.c @@ -0,0 +1,533 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" + +/* + * __log_env_create -- + * Log specific initialization of the DB_ENV structure. + * + * PUBLIC: int __log_env_create __P((DB_ENV *)); + */ +int +__log_env_create(dbenv) + DB_ENV *dbenv; +{ + /* + * !!! + * Our caller has not yet had the opportunity to reset the panic + * state or turn off mutex locking, and so we can neither check + * the panic state or acquire a mutex in the DB_ENV create path. + */ + dbenv->lg_bsize = 0; + dbenv->lg_regionmax = 0; + + return (0); +} + +/* + * __log_env_destroy -- + * Log specific destruction of the DB_ENV structure. + * + * PUBLIC: void __log_env_destroy __P((DB_ENV *)); + */ +void +__log_env_destroy(dbenv) + DB_ENV *dbenv; +{ + COMPQUIET(dbenv, NULL); +} + +/* + * PUBLIC: int __log_get_lg_bsize __P((DB_ENV *, u_int32_t *)); + */ +int +__log_get_lg_bsize(dbenv, lg_bsizep) + DB_ENV *dbenv; + u_int32_t *lg_bsizep; +{ + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lg_handle, "DB_ENV->get_lg_bsize", DB_INIT_LOG); + + if (LOGGING_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *lg_bsizep = + ((LOG *)env->lg_handle->reginfo.primary)->buffer_size; + } else + *lg_bsizep = dbenv->lg_bsize; + return (0); +} + +/* + * __log_set_lg_bsize -- + * DB_ENV->set_lg_bsize. + * + * PUBLIC: int __log_set_lg_bsize __P((DB_ENV *, u_int32_t)); + */ +int +__log_set_lg_bsize(dbenv, lg_bsize) + DB_ENV *dbenv; + u_int32_t lg_bsize; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_lg_bsize"); + + dbenv->lg_bsize = lg_bsize; + return (0); +} + +/* + * PUBLIC: int __log_get_lg_filemode __P((DB_ENV *, int *)); + */ +int +__log_get_lg_filemode(dbenv, lg_modep) + DB_ENV *dbenv; + int *lg_modep; +{ + DB_LOG *dblp; + DB_THREAD_INFO *ip; + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lg_handle, "DB_ENV->get_lg_filemode", DB_INIT_LOG); + + if (LOGGING_ON(env)) { + dblp = env->lg_handle; + ENV_ENTER(env, ip); + LOG_SYSTEM_LOCK(env); + *lg_modep = ((LOG *)dblp->reginfo.primary)->filemode; + LOG_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + *lg_modep = dbenv->lg_filemode; + + return (0); +} + +/* + * __log_set_lg_filemode -- + * DB_ENV->set_lg_filemode. + * + * PUBLIC: int __log_set_lg_filemode __P((DB_ENV *, int)); + */ +int +__log_set_lg_filemode(dbenv, lg_mode) + DB_ENV *dbenv; + int lg_mode; +{ + DB_LOG *dblp; + DB_THREAD_INFO *ip; + ENV *env; + LOG *lp; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lg_handle, "DB_ENV->set_lg_filemode", DB_INIT_LOG); + + if (LOGGING_ON(env)) { + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + ENV_ENTER(env, ip); + LOG_SYSTEM_LOCK(env); + lp->filemode = lg_mode; + LOG_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + dbenv->lg_filemode = lg_mode; + + return (0); +} + +/* + * PUBLIC: int __log_get_lg_max __P((DB_ENV *, u_int32_t *)); + */ +int +__log_get_lg_max(dbenv, lg_maxp) + DB_ENV *dbenv; + u_int32_t *lg_maxp; +{ + DB_LOG *dblp; + DB_THREAD_INFO *ip; + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lg_handle, "DB_ENV->get_lg_max", DB_INIT_LOG); + + if (LOGGING_ON(env)) { + dblp = env->lg_handle; + ENV_ENTER(env, ip); + LOG_SYSTEM_LOCK(env); + *lg_maxp = ((LOG *)dblp->reginfo.primary)->log_nsize; + LOG_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + *lg_maxp = dbenv->lg_size; + + return (0); +} + +/* + * __log_set_lg_max -- + * DB_ENV->set_lg_max. + * + * PUBLIC: int __log_set_lg_max __P((DB_ENV *, u_int32_t)); + */ +int +__log_set_lg_max(dbenv, lg_max) + DB_ENV *dbenv; + u_int32_t lg_max; +{ + DB_LOG *dblp; + DB_THREAD_INFO *ip; + ENV *env; + LOG *lp; + int ret; + + env = dbenv->env; + ret = 0; + + ENV_NOT_CONFIGURED(env, + env->lg_handle, "DB_ENV->set_lg_max", DB_INIT_LOG); + + if (LOGGING_ON(env)) { + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + ENV_ENTER(env, ip); + if ((ret = __log_check_sizes(env, lg_max, 0)) == 0) { + LOG_SYSTEM_LOCK(env); + lp->log_nsize = lg_max; + LOG_SYSTEM_UNLOCK(env); + } + ENV_LEAVE(env, ip); + } else + dbenv->lg_size = lg_max; + + return (ret); +} + +/* + * PUBLIC: int __log_get_lg_regionmax __P((DB_ENV *, u_int32_t *)); + */ +int +__log_get_lg_regionmax(dbenv, lg_regionmaxp) + DB_ENV *dbenv; + u_int32_t *lg_regionmaxp; +{ + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->lg_handle, "DB_ENV->get_lg_regionmax", DB_INIT_LOG); + + if (LOGGING_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *lg_regionmaxp = + ((LOG *)env->lg_handle->reginfo.primary)->regionmax; + } else + *lg_regionmaxp = dbenv->lg_regionmax; + return (0); +} + +/* + * __log_set_lg_regionmax -- + * DB_ENV->set_lg_regionmax. + * + * PUBLIC: int __log_set_lg_regionmax __P((DB_ENV *, u_int32_t)); + */ +int +__log_set_lg_regionmax(dbenv, lg_regionmax) + DB_ENV *dbenv; + u_int32_t lg_regionmax; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_lg_regionmax"); + + /* Let's not be silly. */ + if (lg_regionmax != 0 && lg_regionmax < LG_BASE_REGION_SIZE) { + __db_errx(env, DB_STR_A("2569", + "log region size must be >= %d", + "%d"), LG_BASE_REGION_SIZE); + return (EINVAL); + } + + dbenv->lg_regionmax = lg_regionmax; + return (0); +} + +/* + * PUBLIC: int __log_get_lg_dir __P((DB_ENV *, const char **)); + */ +int +__log_get_lg_dir(dbenv, dirp) + DB_ENV *dbenv; + const char **dirp; +{ + *dirp = dbenv->db_log_dir; + return (0); +} + +/* + * __log_set_lg_dir -- + * DB_ENV->set_lg_dir. + * + * PUBLIC: int __log_set_lg_dir __P((DB_ENV *, const char *)); + */ +int +__log_set_lg_dir(dbenv, dir) + DB_ENV *dbenv; + const char *dir; +{ + ENV *env; + + env = dbenv->env; + + if (dbenv->db_log_dir != NULL) + __os_free(env, dbenv->db_log_dir); + return (__os_strdup(env, dir, &dbenv->db_log_dir)); +} + +/* + * __log_get_flags -- + * DB_ENV->get_flags. + * + * PUBLIC: void __log_get_flags __P((DB_ENV *, u_int32_t *)); + */ +void +__log_get_flags(dbenv, flagsp) + DB_ENV *dbenv; + u_int32_t *flagsp; +{ + DB_LOG *dblp; + ENV *env; + LOG *lp; + u_int32_t flags; + + env = dbenv->env; + + if ((dblp = env->lg_handle) == NULL) + return; + + lp = dblp->reginfo.primary; + + flags = *flagsp; + if (lp->db_log_autoremove) + LF_SET(DB_LOG_AUTO_REMOVE); + else + LF_CLR(DB_LOG_AUTO_REMOVE); + if (lp->db_log_inmemory) + LF_SET(DB_LOG_IN_MEMORY); + else + LF_CLR(DB_LOG_IN_MEMORY); + *flagsp = flags; +} + +/* + * __log_set_flags -- + * DB_ENV->set_flags. + * + * PUBLIC: void __log_set_flags __P((ENV *, u_int32_t, int)); + */ +void +__log_set_flags(env, flags, on) + ENV *env; + u_int32_t flags; + int on; +{ + DB_LOG *dblp; + LOG *lp; + + if ((dblp = env->lg_handle) == NULL) + return; + + lp = dblp->reginfo.primary; + + if (LF_ISSET(DB_LOG_AUTO_REMOVE)) + lp->db_log_autoremove = on ? 1 : 0; + if (LF_ISSET(DB_LOG_IN_MEMORY)) + lp->db_log_inmemory = on ? 1 : 0; +} + +/* + * List of flags we can handle here. DB_LOG_INMEMORY must be + * processed before creating the region, leave it out for now. + */ +#undef OK_FLAGS +#define OK_FLAGS \ + (DB_LOG_AUTO_REMOVE | DB_LOG_DIRECT | \ + DB_LOG_DSYNC | DB_LOG_IN_MEMORY | DB_LOG_ZERO) +static const FLAG_MAP LogMap[] = { + { DB_LOG_AUTO_REMOVE, DBLOG_AUTOREMOVE}, + { DB_LOG_DIRECT, DBLOG_DIRECT}, + { DB_LOG_DSYNC, DBLOG_DSYNC}, + { DB_LOG_IN_MEMORY, DBLOG_INMEMORY}, + { DB_LOG_ZERO, DBLOG_ZERO} +}; +/* + * __log_get_config -- + * Configure the logging subsystem. + * + * PUBLIC: int __log_get_config __P((DB_ENV *, u_int32_t, int *)); + */ +int +__log_get_config(dbenv, which, onp) + DB_ENV *dbenv; + u_int32_t which; + int *onp; +{ + ENV *env; + DB_LOG *dblp; + u_int32_t flags; + + env = dbenv->env; + if (FLD_ISSET(which, ~OK_FLAGS)) + return (__db_ferr(env, "DB_ENV->log_get_config", 0)); + dblp = env->lg_handle; + ENV_REQUIRES_CONFIG(env, dblp, "DB_ENV->log_get_config", DB_INIT_LOG); + + __env_fetch_flags(LogMap, sizeof(LogMap), &dblp->flags, &flags); + __log_get_flags(dbenv, &flags); + if (LF_ISSET(which)) + *onp = 1; + else + *onp = 0; + + return (0); +} + +/* + * __log_set_config -- + * Configure the logging subsystem. + * + * PUBLIC: int __log_set_config __P((DB_ENV *, u_int32_t, int)); + */ +int +__log_set_config(dbenv, flags, on) + DB_ENV *dbenv; + u_int32_t flags; + int on; +{ + return (__log_set_config_int(dbenv, flags, on, 0)); +} +/* + * __log_set_config_int -- + * Configure the logging subsystem. + * + * PUBLIC: int __log_set_config_int __P((DB_ENV *, u_int32_t, int, int)); + */ +int +__log_set_config_int(dbenv, flags, on, in_open) + DB_ENV *dbenv; + u_int32_t flags; + int on; + int in_open; +{ + ENV *env; + DB_LOG *dblp; + u_int32_t mapped_flags; + + env = dbenv->env; + dblp = env->lg_handle; + if (FLD_ISSET(flags, ~OK_FLAGS)) + return (__db_ferr(env, "DB_ENV->log_set_config", 0)); + ENV_NOT_CONFIGURED(env, dblp, "DB_ENV->log_set_config", DB_INIT_LOG); + if (LF_ISSET(DB_LOG_DIRECT) && __os_support_direct_io() == 0) { + __db_errx(env, +"DB_ENV->log_set_config: direct I/O either not configured or not supported"); + return (EINVAL); + } + + if (LOGGING_ON(env)) { + if (!in_open && LF_ISSET(DB_LOG_IN_MEMORY) && + ((LOG *)dblp->reginfo.primary)->db_log_inmemory == 0) + ENV_ILLEGAL_AFTER_OPEN(env, + "DB_ENV->log_set_config: DB_LOG_IN_MEMORY"); + __log_set_flags(env, flags, on); + mapped_flags = 0; + __env_map_flags(LogMap, sizeof(LogMap), &flags, &mapped_flags); + if (on) + F_SET(dblp, mapped_flags); + else + F_CLR(dblp, mapped_flags); + } else { + /* + * DB_LOG_IN_MEMORY, DB_TXN_NOSYNC and DB_TXN_WRITE_NOSYNC + * are mutually incompatible. If we're setting one of them, + * clear all current settings. + */ + if (on && LF_ISSET(DB_LOG_IN_MEMORY)) + F_CLR(dbenv, + DB_ENV_TXN_NOSYNC | DB_ENV_TXN_WRITE_NOSYNC); + + if (on) + FLD_SET(dbenv->lg_flags, flags); + else + FLD_CLR(dbenv->lg_flags, flags); + } + + return (0); +} + +/* + * __log_check_sizes -- + * Makes sure that the log file size and log buffer size are compatible. + * + * PUBLIC: int __log_check_sizes __P((ENV *, u_int32_t, u_int32_t)); + */ +int +__log_check_sizes(env, lg_max, lg_bsize) + ENV *env; + u_int32_t lg_max; + u_int32_t lg_bsize; +{ + DB_ENV *dbenv; + LOG *lp; + int inmem; + + dbenv = env->dbenv; + + if (LOGGING_ON(env)) { + lp = env->lg_handle->reginfo.primary; + inmem = lp->db_log_inmemory; + lg_bsize = lp->buffer_size; + } else + inmem = (FLD_ISSET(dbenv->lg_flags, DB_LOG_IN_MEMORY) != 0); + + if (inmem) { + if (lg_bsize == 0) + lg_bsize = LG_BSIZE_INMEM; + if (lg_max == 0) + lg_max = LG_MAX_INMEM; + + if (lg_bsize <= lg_max) { + __db_errx(env, + "in-memory log buffer must be larger than the log file size"); + return (EINVAL); + } + } + + return (0); +} diff --git a/src/log/log_print.c b/src/log/log_print.c new file mode 100644 index 00000000..b7da72c8 --- /dev/null +++ b/src/log/log_print.c @@ -0,0 +1,366 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" + +static int __log_print_dbregister __P((ENV *, DBT *, DB_LOG *)); + +/* + * PUBLIC: int __log_print_record __P((ENV *, + * PUBLIC: DBT *, DB_LSN *, char *, DB_LOG_RECSPEC *, void *)); + */ +int +__log_print_record(env, recbuf, lsnp, name, spec, info) + ENV *env; + DBT *recbuf; + DB_LSN *lsnp; + char *name; + DB_LOG_RECSPEC *spec; + void *info; +{ + DB *dbp; + DBT dbt; + DB_LOG_RECSPEC *sp, *np; + DB_LOG *dblp; + DB_LSN prev_lsn; + DB_MSGBUF msgbuf; + LOG *lp; + PAGE *hdrstart, *hdrtmp; + int32_t inttmp; + u_int32_t hdrsize, op, uinttmp; + u_int32_t type, txnid; + u_int8_t *bp, *datatmp; + int has_data, ret, downrev; + struct tm *lt; + time_t timeval; + char time_buf[CTIME_BUFLEN], *s; + const char *hdrname; + + COMPQUIET(hdrstart, NULL); + COMPQUIET(hdrname, NULL); + COMPQUIET(hdrsize, 0); + COMPQUIET(has_data, 0); + COMPQUIET(op, 0); + + bp = recbuf->data; + dblp = info; + dbp = NULL; + lp = env->lg_handle->reginfo.primary; + downrev = lp->persist.version < DB_LOGVERSION_50; + DB_MSGBUF_INIT(&msgbuf); + + /* + * The first three fields are always the same in every arg + * struct so we know their offsets. + */ + /* type */ + LOGCOPY_32(env, &type, bp); + bp += sizeof(u_int32_t); + + /* txnp */ + LOGCOPY_32(env, &txnid, bp); + bp += sizeof(txnid); + + /* Previous LSN */ + LOGCOPY_TOLSN(env,&prev_lsn, bp); + bp += sizeof(DB_LSN); + __db_msgadd(env, &msgbuf, + "[%lu][%lu]%s%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, (u_long)lsnp->offset, + name, (type & DB_debug_FLAG) ? "_debug" : "", + (u_long)type, + (u_long)txnid, + (u_long)prev_lsn.file, (u_long)prev_lsn.offset); + + for (sp = spec; sp->type != LOGREC_Done; sp++) { + switch (sp->type) { + case LOGREC_OP: + LOGCOPY_32(env, &op, bp); + __db_msgadd(env, &msgbuf, "\t%s: ", sp->name); + __db_msgadd(env, &msgbuf, sp->fmt, OP_MODE_GET(op)); + __db_msgadd(env, &msgbuf, " ptype: %s\n", + __db_pagetype_to_string(OP_PAGE_GET(op))); + bp += sizeof(uinttmp); + break; + case LOGREC_DB: + LOGCOPY_32(env, &inttmp, bp); + __db_msgadd(env, &msgbuf, "\t%s: %lu\n", + sp->name, (unsigned long)inttmp); + bp += sizeof(inttmp); + if (dblp != NULL && inttmp < dblp->dbentry_cnt) + dbp = dblp->dbentry[inttmp].dbp; + break; + + case LOGREC_DBOP: + /* Special op for dbreg_register records. */ + if (dblp != NULL && (ret = + __log_print_dbregister(env, recbuf, dblp)) != 0) + return (ret); + LOGCOPY_32(env, &uinttmp, bp); + switch (FLD_ISSET(uinttmp, DBREG_OP_MASK)) { + case DBREG_CHKPNT: + s = "CHKPNT"; + break; + case DBREG_CLOSE: + s = "CLOSE"; + break; + case DBREG_OPEN: + s = "OPEN"; + break; + case DBREG_PREOPEN: + s = "PREOPEN"; + break; + case DBREG_RCLOSE: + s = "RCLOSE"; + break; + case DBREG_REOPEN: + s = "REOPEN"; + break; + default: + s = "UNKNOWN"; + break; + } + __db_msgadd(env, &msgbuf, "\t%s: %s %lx\n", sp->name, + s, (unsigned long)(uinttmp & ~DBREG_OP_MASK)); + bp += sizeof(uinttmp); + break; + case LOGREC_ARG: + LOGCOPY_32(env, &uinttmp, bp); + __db_msgadd(env, &msgbuf, "\t%s: ", sp->name); + __db_msgadd(env, &msgbuf, sp->fmt, uinttmp); + __db_msgadd(env, &msgbuf, "\n"); + bp += sizeof(uinttmp); + break; + case LOGREC_TIME: + /* time_t is long but we only store 32 bits. */ + LOGCOPY_32(env, &uinttmp, bp); + timeval = uinttmp; + lt = localtime(&timeval); + __db_msgadd(env, &msgbuf, + "\t%s: %ld (%.24s, 20%02lu%02lu%02lu%02lu%02lu.%02lu)\n", + sp->name, (long)timeval, + __os_ctime(&timeval, time_buf), + (u_long)lt->tm_year - 100, (u_long)lt->tm_mon+1, + (u_long)lt->tm_mday, (u_long)lt->tm_hour, + (u_long)lt->tm_min, (u_long)lt->tm_sec); + bp += sizeof(uinttmp); + break; + case LOGREC_PGDBT: + case LOGREC_PGDDBT: + case LOGREC_PGLIST: + case LOGREC_LOCKS: + case LOGREC_HDR: + case LOGREC_DATA: + case LOGREC_DBT: + LOGCOPY_32(env, &uinttmp, bp); + bp += sizeof(u_int32_t); + switch (sp->type) { + case LOGREC_HDR: + if (uinttmp == 0) + break; + has_data = 0; + for (np = sp + 1; np->type != LOGREC_Done; np++) + if (np->type == LOGREC_DATA) { + has_data = 1; + break; + } + + hdrstart = (PAGE*)bp; + hdrsize = uinttmp; + hdrname = sp->name; + if (has_data == 1) + break; + /* FALLTHROUGH */ + case LOGREC_DATA: + if (downrev ? LOG_SWAPPED(env) : + (dbp != NULL && F_ISSET(dbp, DB_AM_SWAP))) + __db_recordswap(op, hdrsize, hdrstart, + (has_data && uinttmp != 0) ? + bp : NULL, 1); + __db_msgadd(env, &msgbuf, "\t%s: ", hdrname); + __db_prbytes(env, &msgbuf, + (u_int8_t *)hdrstart, hdrsize); + if (has_data == 0 || uinttmp == 0) + break; + /* FALLTHROUGH */ + default: + __db_msgadd(env, &msgbuf, "\t%s: ", sp->name); + pr_data: + __db_prbytes(env, &msgbuf, bp, uinttmp); + has_data = 0; + break; + case LOGREC_PGDBT: + has_data = 0; + for (np = sp + 1; np->type != LOGREC_Done; np++) + if (np->type == LOGREC_PGDDBT) { + has_data = 1; + break; + } + + hdrstart = (PAGE*)bp; + hdrsize = uinttmp; + if (has_data == 1) + break; + /* FALLTHROUGH */ + case LOGREC_PGDDBT: + DB_ASSERT(env, hdrstart != NULL); + if (dbp != NULL && (downrev ? LOG_SWAPPED(env) : + F_ISSET(dbp, DB_AM_SWAP))) { + dbt.data = bp; + dbt.size = uinttmp; + if ((ret = __db_pageswap(env, dbp, + hdrstart, hdrsize, has_data == 0 ? + NULL : &dbt, 1)) != 0) + return (ret); + } + if (downrev) + goto pr_data; + if (ALIGNP_INC(hdrstart, + sizeof(u_int32_t)) != hdrstart) { + if ((ret = __os_malloc(env, + hdrsize, &hdrtmp)) != 0) + return (ret); + memcpy(hdrtmp, hdrstart, hdrsize); + } else + hdrtmp = hdrstart; + if (has_data == 1 && ALIGNP_INC(bp, + sizeof(u_int32_t)) != bp) { + if ((ret = __os_malloc(env, + uinttmp, &datatmp)) != 0) + return (ret); + memcpy(datatmp, bp, uinttmp); + } else if (has_data == 1) + datatmp = bp; + else + datatmp = NULL; + if ((ret = __db_prpage_int(env, &msgbuf, + dbp, "\t", hdrtmp, + uinttmp, datatmp, DB_PR_PAGE)) != 0) + return (ret); + has_data = 0; + if (hdrtmp != hdrstart) + __os_free(env, hdrtmp); + if (datatmp != bp && datatmp != NULL) + __os_free(env, datatmp); + break; + case LOGREC_PGLIST: + dbt.data = bp; + dbt.size = uinttmp; + __db_pglist_print(env, &msgbuf, &dbt); + break; + case LOGREC_LOCKS: + dbt.data = bp; + dbt.size = uinttmp; + __lock_list_print(env, &msgbuf, &dbt); + break; + } + bp += uinttmp; + break; + + case LOGREC_POINTER: + LOGCOPY_TOLSN(env, &prev_lsn, bp); + __db_msgadd(env, &msgbuf, + "\t%s: [%lu][%lu]\n", sp->name, + (u_long)prev_lsn.file, (u_long)prev_lsn.offset); + bp += sizeof(DB_LSN); + break; + case LOGREC_Done: + DB_ASSERT(env, sp->type != LOGREC_Done); + } + } + if (msgbuf.buf != NULL) + DB_MSGBUF_FLUSH(env, &msgbuf); + else + __db_msg(env, "%s", ""); + return (0); +} + +/* + * __log_print_dbregister -- + * So that we can properly swap and print information from databases + * we generate dummy DB handles here. These are real handles that are never + * opened but their fileid, meta_pgno and some flags are set properly. + * This code uses parallel structures to those in the dbregister code. + * The DB_LOG handle passed in must NOT be the real environment handle + * since this would confuse actual running transactions if printing is + * done while the environment is active. + */ +static int +__log_print_dbregister(env, recbuf, dblp) + ENV *env; + DBT *recbuf; + DB_LOG *dblp; +{ + __dbreg_register_args *argp; + DB *dbp; + DB_ENTRY *dbe; + int ret; + + if ((ret = __dbreg_register_read(env, recbuf->data, &argp)) != 0) + return (ret); + + if (dblp->dbentry_cnt <= argp->fileid && + (ret = __dbreg_add_dbentry(env, dblp, NULL, argp->fileid)) != 0) + goto err; + dbe = &dblp->dbentry[argp->fileid]; + dbp = dbe->dbp; + + switch (FLD_ISSET(argp->opcode, DBREG_OP_MASK)) { + case DBREG_CHKPNT: + case DBREG_OPEN: + case DBREG_REOPEN: + if (dbp != NULL) { + if (memcmp(dbp->fileid, + argp->uid.data, DB_FILE_ID_LEN) == 0 && + dbp->meta_pgno == argp->meta_pgno) + goto done; + if ((__db_close(dbp, NULL, DB_NOSYNC)) != 0) + goto err; + dbe->dbp = dbp = NULL; + } + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + goto err; + memcpy(dbp->fileid, argp->uid.data, DB_FILE_ID_LEN); + dbp->meta_pgno = argp->meta_pgno; + F_SET(dbp, DB_AM_RECOVER); + /* + * We need to swap bytes if we are on a BIGEND machine XOR + * we have a BIGEND database. + */ + if ((F_ISSET(env, ENV_LITTLEENDIAN) == 0) ^ + (FLD_ISSET(argp->opcode, DBREG_BIGEND) != 0)) + F_SET(dbp, DB_AM_SWAP); + if (FLD_ISSET(argp->opcode, DBREG_CHKSUM)) + F_SET(dbp, DB_AM_CHKSUM); + if (FLD_ISSET(argp->opcode, DBREG_ENCRYPT)) + F_SET(dbp, DB_AM_ENCRYPT); + dbe->dbp = dbp; + break; + case DBREG_CLOSE: + case DBREG_RCLOSE: + if (dbp == NULL) + goto err; + if ((__db_close(dbp, NULL, DB_NOSYNC)) != 0) + goto err; + dbe->dbp = dbp = NULL; + break; + case DBREG_PREOPEN: + break; + default: + DB_ASSERT(env, argp->opcode != argp->opcode); + } +done: +err: + __os_free(env, argp); + return (ret); +} diff --git a/src/log/log_put.c b/src/log/log_put.c new file mode 100644 index 00000000..357fc40b --- /dev/null +++ b/src/log/log_put.c @@ -0,0 +1,2054 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/hmac.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" +#include "dbinc/db_page.h" +#include "dbinc_auto/db_ext.h" + +static int __log_encrypt_record __P((ENV *, DBT *, HDR *, u_int32_t)); +static int __log_file __P((ENV *, const DB_LSN *, char *, size_t)); +static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t)); +static int __log_flush_commit __P((ENV *, const DB_LSN *, u_int32_t)); +static int __log_newfh __P((DB_LOG *, int)); +static int __log_put_next __P((ENV *, + DB_LSN *, const DBT *, HDR *, DB_LSN *)); +static int __log_put_record_int __P((ENV *, DB *, DB_TXN *, DB_LSN *, + u_int32_t, u_int32_t, u_int32_t, u_int32_t, DB_LOG_RECSPEC *, va_list)); +static int __log_putr __P((DB_LOG *, + DB_LSN *, const DBT *, u_int32_t, HDR *)); +static int __log_write __P((DB_LOG *, void *, u_int32_t)); + +/* + * __log_put_pp -- + * ENV->log_put pre/post processing. + * + * PUBLIC: int __log_put_pp __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t)); + */ +int +__log_put_pp(dbenv, lsnp, udbt, flags) + DB_ENV *dbenv; + DB_LSN *lsnp; + const DBT *udbt; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_put", DB_INIT_LOG); + + /* Validate arguments: check for allowed flags. */ + if ((ret = __db_fchk(env, "DB_ENV->log_put", flags, + DB_LOG_CHKPNT | DB_LOG_COMMIT | + DB_FLUSH | DB_LOG_NOCOPY | DB_LOG_WRNOSYNC)) != 0) + return (ret); + + /* DB_LOG_WRNOSYNC and DB_FLUSH are mutually exclusive. */ + if (LF_ISSET(DB_LOG_WRNOSYNC) && LF_ISSET(DB_FLUSH)) + return (__db_ferr(env, "DB_ENV->log_put", 1)); + + /* Replication clients should never write log records. */ + if (IS_REP_CLIENT(env)) { + __db_errx(env, DB_STR("2511", + "DB_ENV->log_put is illegal on replication clients")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_put(env, lsnp, udbt, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_put -- + * ENV->log_put. + * + * PUBLIC: int __log_put __P((ENV *, DB_LSN *, const DBT *, u_int32_t)); + */ +int +__log_put(env, lsnp, udbt, flags) + ENV *env; + DB_LSN *lsnp; + const DBT *udbt; + u_int32_t flags; +{ + DBT *dbt, t; + DB_CIPHER *db_cipher; + DB_LOG *dblp; + DB_LSN lsn, old_lsn; + DB_REP *db_rep; + HDR hdr; + LOG *lp; + REP *rep; + int lock_held, need_free, ret; + u_int8_t *key; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + db_cipher = env->crypto_handle; + db_rep = env->rep_handle; + if (db_rep != NULL) + rep = db_rep->region; + else + rep = NULL; + + dbt = &t; + t = *udbt; + lock_held = need_free = 0; + ZERO_LSN(old_lsn); + hdr.len = hdr.prev = 0; + + /* + * In general, if we are not a rep application, but are sharing a master + * rep env, we should not be writing log records. However, we can allow + * a non-replication-aware process to join a pre-existing repmgr + * environment, if env handle meets repmgr's DB_THREAD requirement. + */ + + if (IS_REP_MASTER(env) && db_rep->send == NULL) { +#ifdef HAVE_REPLICATION_THREADS + if (F_ISSET(env, ENV_THREAD) && APP_IS_REPMGR(env)) { + if ((ret = __repmgr_autostart(env)) != 0) + return (ret); + } else +#endif + { +#if !defined(DEBUG_ROP) && !defined(DEBUG_WOP) + __db_errx(env, DB_STR("2512", + "Non-replication DB_ENV handle attempting " + "to modify a replicated environment")); + return (EINVAL); +#endif + } + } + DB_ASSERT(env, !IS_REP_CLIENT(env)); + + /* + * If we are coming from the logging code, we use an internal flag, + * DB_LOG_NOCOPY, because we know we can overwrite/encrypt the log + * record in place. Otherwise, if a user called log_put then we + * must copy it to new memory so that we know we can write it. + * + * We also must copy it to new memory if we are a replication master + * so that we retain an unencrypted copy of the log record to send + * to clients. + */ + if (!LF_ISSET(DB_LOG_NOCOPY) || IS_REP_MASTER(env)) { + if (CRYPTO_ON(env)) + t.size += db_cipher->adj_size(udbt->size); + if ((ret = __os_calloc(env, 1, t.size, &t.data)) != 0) + goto err; + need_free = 1; + memcpy(t.data, udbt->data, udbt->size); + } + if ((ret = __log_encrypt_record(env, dbt, &hdr, udbt->size)) != 0) + goto err; + if (CRYPTO_ON(env)) + key = db_cipher->mac_key; + else + key = NULL; +#ifdef HAVE_LOG_CHECKSUM + __db_chksum(&hdr, dbt->data, dbt->size, key, hdr.chksum); +#endif + + LOG_SYSTEM_LOCK(env); + lock_held = 1; + + if ((ret = __log_put_next(env, &lsn, dbt, &hdr, &old_lsn)) != 0) + goto panic_check; + + /* + * Assign the return LSN before dropping the region lock. Necessary + * in case the lsn is a begin_lsn from a TXN_DETAIL structure passed in + * by the logging routines. We use atomic 32-bit operations because + * during commit this will be a TXN_DETAIL visible_lsn field, and MVCC + * relies on reading the fields atomically. + */ + lsnp->file = lsn.file; + lsnp->offset = lsn.offset; + +#ifdef HAVE_REPLICATION + if (IS_REP_MASTER(env)) { + __rep_newfile_args nf_args; + DBT newfiledbt; + REP_BULK bulk; + size_t len; + u_int32_t ctlflags; + u_int8_t buf[__REP_NEWFILE_SIZE]; + + /* + * Replication masters need to drop the lock to send messages, + * but want to drop and reacquire it a minimal number of times. + */ + ctlflags = LF_ISSET(DB_LOG_COMMIT | DB_LOG_CHKPNT) ? + REPCTL_PERM : 0; + LOG_SYSTEM_UNLOCK(env); + lock_held = 0; + if (LF_ISSET(DB_FLUSH)) + ctlflags |= REPCTL_FLUSH; + + /* + * If we changed files and we're in a replicated environment, + * we need to inform our clients now that we've dropped the + * region lock. + * + * Note that a failed NEWFILE send is a dropped message that + * our client can handle, so we can ignore it. It's possible + * that the record we already put is a commit, so we don't just + * want to return failure. + */ + if (!IS_ZERO_LSN(old_lsn)) { + memset(&newfiledbt, 0, sizeof(newfiledbt)); + nf_args.version = lp->persist.version; + (void)__rep_newfile_marshal(env, &nf_args, + buf, __REP_NEWFILE_SIZE, &len); + DB_INIT_DBT(newfiledbt, buf, len); + (void)__rep_send_message(env, DB_EID_BROADCAST, + REP_NEWFILE, &old_lsn, &newfiledbt, 0, 0); + } + + /* + * If we're doing bulk processing put it in the bulk buffer. + */ + ret = 0; + if (FLD_ISSET(rep->config, REP_C_BULK)) { + /* + * Bulk could have been turned on by another process. + * If so, set the address into the bulk region now. + */ + if (db_rep->bulk == NULL) + db_rep->bulk = R_ADDR(&dblp->reginfo, + lp->bulk_buf); + memset(&bulk, 0, sizeof(bulk)); + bulk.addr = db_rep->bulk; + bulk.offp = &lp->bulk_off; + bulk.len = lp->bulk_len; + bulk.lsn = lsn; + bulk.type = REP_BULK_LOG; + bulk.eid = DB_EID_BROADCAST; + bulk.flagsp = &lp->bulk_flags; + ret = __rep_bulk_message(env, &bulk, NULL, + &lsn, udbt, ctlflags); + } + if (!FLD_ISSET(rep->config, REP_C_BULK) || + ret == DB_REP_BULKOVF) { + /* + * Then send the log record itself on to our clients. + */ + /* + * !!! + * In the crypto case, we MUST send the udbt, not the + * now-encrypted dbt. Clients have no way to decrypt + * without the header. + */ + ret = __rep_send_message(env, DB_EID_BROADCAST, + REP_LOG, &lsn, udbt, ctlflags, 0); + } + if (FLD_ISSET(ctlflags, REPCTL_PERM)) { + LOG_SYSTEM_LOCK(env); +#ifdef HAVE_STATISTICS + if (IS_USING_LEASES(env)) + rep->stat.st_lease_sends++; +#endif + /* + * Keep track of our last PERM lsn. Set this on a + * master under the log lock. When using leases, if + * we set max_perm_lsn too early (before the send) + * then we hit a lot of false invalid lease checks + * which all try to refresh and hurt performance. + */ + if (LOG_COMPARE(&lp->max_perm_lsn, &lsn) < 0) + lp->max_perm_lsn = lsn; + LOG_SYSTEM_UNLOCK(env); + } + /* + * If the send fails and we're a commit or checkpoint, + * there's nothing we can do; the record's in the log. + * Flush it, even if we're running with TXN_NOSYNC, + * on the grounds that it should be in durable + * form somewhere. + */ + if (ret != 0 && FLD_ISSET(ctlflags, REPCTL_PERM)) + LF_SET(DB_FLUSH); + /* + * We ignore send failures so reset 'ret' to 0 here. + * We needed to check special return values from + * bulk transfer and errors from either bulk or normal + * message sending need flushing on perm records. But + * otherwise we need to ignore it and reset it now. + */ + ret = 0; + } +#endif + + /* + * If needed, do a flush. Note that failures at this point + * are only permissible if we know we haven't written a commit + * record; __log_flush_commit is responsible for enforcing this. + * + * If a flush is not needed, see if WRITE_NOSYNC was set and we + * need to write out the log buffer. + */ + if (LF_ISSET(DB_FLUSH | DB_LOG_WRNOSYNC)) { + if (!lock_held) { + LOG_SYSTEM_LOCK(env); + lock_held = 1; + } + if ((ret = __log_flush_commit(env, &lsn, flags)) != 0) + goto panic_check; + } + + /* + * If flushed a checkpoint record, reset the "bytes since the last + * checkpoint" counters. + */ + if (LF_ISSET(DB_LOG_CHKPNT)) + lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0; + + /* Increment count of records added to the log. */ + STAT(++lp->stat.st_record); + + if (0) { +panic_check: /* + * Writing log records cannot fail if we're a replication + * master. The reason is that once we send the record to + * replication clients, the transaction can no longer + * abort, otherwise the master would be out of sync with + * the rest of the replication group. Panic the system. + */ + if (ret != 0 && IS_REP_MASTER(env)) + ret = __env_panic(env, ret); + } + +err: if (lock_held) + LOG_SYSTEM_UNLOCK(env); + if (need_free) + __os_free(env, dbt->data); + + /* + * If auto-remove is set and we switched files, remove unnecessary + * log files. + */ + if (ret == 0 && !IS_ZERO_LSN(old_lsn) && lp->db_log_autoremove) + __log_autoremove(env); + + return (ret); +} + +/* + * __log_current_lsn_int -- + * internal operations of __log_current_lsn + * + * PUBLIC: int __log_current_lsn_int + * PUBLIC: __P((ENV *, DB_LSN *, u_int32_t *, u_int32_t *)); + */ +int +__log_current_lsn_int(env, lsnp, mbytesp, bytesp) + ENV *env; + DB_LSN *lsnp; + u_int32_t *mbytesp, *bytesp; +{ + DB_LOG *dblp; + LOG *lp; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + LOG_SYSTEM_LOCK(env); + + /* + * We need the LSN of the last entry in the log. + * + * Typically, it's easy to get the last written LSN, you simply look + * at the current log pointer and back up the number of bytes of the + * last log record. However, if the last thing we did was write the + * log header of a new log file, then, this doesn't work, so we return + * the first log record that will be written in this new file. + */ + *lsnp = lp->lsn; + if (lp->lsn.offset > lp->len) + lsnp->offset -= lp->len; + + /* + * Since we're holding the log region lock, return the bytes put into + * the log since the last checkpoint, transaction checkpoint needs it. + * + * We add the current buffer offset so as to count bytes that have not + * yet been written, but are sitting in the log buffer. + */ + if (mbytesp != NULL) { + *mbytesp = lp->stat.st_wc_mbytes; + *bytesp = (u_int32_t)(lp->stat.st_wc_bytes + lp->b_off); + } + + LOG_SYSTEM_UNLOCK(env); + + return (0); +} + +/* + * __log_current_lsn -- + * Return the current LSN. + * + * PUBLIC: int __log_current_lsn + * PUBLIC: __P((ENV *, DB_LSN *, u_int32_t *, u_int32_t *)); + */ +int +__log_current_lsn(env, lsnp, mbytesp, bytesp) + ENV *env; + DB_LSN *lsnp; + u_int32_t *mbytesp, *bytesp; +{ + DB_THREAD_INFO *ip; + int ret; + + ret = 0; + ENV_ENTER(env, ip); + ret = __log_current_lsn_int(env, lsnp, mbytesp, bytesp); + ENV_LEAVE(env, ip); + + return ret; +} + +/* + * __log_put_next -- + * Put the given record as the next in the log, wherever that may + * turn out to be. + */ +static int +__log_put_next(env, lsn, dbt, hdr, old_lsnp) + ENV *env; + DB_LSN *lsn; + const DBT *dbt; + HDR *hdr; + DB_LSN *old_lsnp; +{ + DB_LOG *dblp; + DB_LSN old_lsn; + LOG *lp; + int adv_file, newfile, ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + /* + * Save a copy of lp->lsn before we might decide to switch log + * files and change it. If we do switch log files, and we're + * doing replication, we'll need to tell our clients about the + * switch, and they need to receive a NEWFILE message + * with this "would-be" LSN in order to know they're not + * missing any log records. + */ + old_lsn = lp->lsn; + newfile = 0; + adv_file = 0; + /* + * If our current log is at an older version and we want to write + * a record then we need to advance the log. + */ + if (lp->persist.version != DB_LOGVERSION) { + __log_set_version(env, DB_LOGVERSION); + adv_file = 1; + } + + /* + * If this information won't fit in the file, or if we're a + * replication client environment and have been told to do so, + * swap files. + */ + if (adv_file || lp->lsn.offset == 0 || + lp->lsn.offset + hdr->size + dbt->size > lp->log_size) { + if (hdr->size + sizeof(LOGP) + dbt->size > lp->log_size) { + __db_errx(env, DB_STR_A("2513", + "DB_ENV->log_put: record larger than maximum file size (%lu > %lu)", + "%lu %lu"), + (u_long)hdr->size + sizeof(LOGP) + dbt->size, + (u_long)lp->log_size); + return (EINVAL); + } + + if ((ret = __log_newfile(dblp, NULL, 0, 0)) != 0) + return (ret); + + /* + * Flag that we switched files, in case we're a master + * and need to send this information to our clients. + * We postpone doing the actual send until we can + * safely release the log region lock and are doing so + * anyway. + */ + newfile = 1; + } + + /* If we switched log files, let our caller know where. */ + if (newfile) + *old_lsnp = old_lsn; + + /* Actually put the record. */ + return (__log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len, hdr)); +} + +/* + * __log_flush_commit -- + * Flush a record. + */ +static int +__log_flush_commit(env, lsnp, flags) + ENV *env; + const DB_LSN *lsnp; + u_int32_t flags; +{ + DB_LOG *dblp; + DB_LSN flush_lsn; + HDR hdr; + LOG *lp; + int ret, t_ret; + size_t nr, nw; + u_int8_t *buffer; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + flush_lsn = *lsnp; + + ret = 0; + + /* + * DB_FLUSH: + * Flush a record for which the DB_FLUSH flag to log_put was set. + * + * DB_LOG_WRNOSYNC: + * If there's anything in the current log buffer, write it out. + */ + if (LF_ISSET(DB_FLUSH)) + ret = __log_flush_int(dblp, &flush_lsn, 1); + else if (!lp->db_log_inmemory && lp->b_off != 0) + if ((ret = __log_write(dblp, + dblp->bufp, (u_int32_t)lp->b_off)) == 0) + lp->b_off = 0; + + /* + * If a flush supporting a transaction commit fails, we must abort the + * transaction. (If we aren't doing a commit, return the failure; if + * if the commit we care about made it to disk successfully, we just + * ignore the failure, because there's no way to undo the commit.) + */ + if (ret == 0 || !LF_ISSET(DB_LOG_COMMIT)) + return (ret); + + if (LF_ISSET(DB_FLUSH) ? + flush_lsn.file != lp->s_lsn.file || + flush_lsn.offset < lp->s_lsn.offset : + flush_lsn.file != lp->lsn.file || flush_lsn.offset < lp->w_off) + return (0); + + if (IS_REP_MASTER(env)) { + __db_err(env, ret, DB_STR("2514", + "Write failed on MASTER commit.")); + return (__env_panic(env, ret)); + } + + /* + * Else, make sure that the commit record does not get out after we + * abort the transaction. Do this by overwriting the commit record + * in the buffer. (Note that other commits in this buffer will wait + * until a successful write happens, we do not wake them.) We point + * at the right part of the buffer and write an abort record over the + * commit. We must then try and flush the buffer again, since the + * interesting part of the buffer may have actually made it out to + * disk before there was a failure, we can't know for sure. + */ + if (flush_lsn.offset > lp->w_off) { + if ((t_ret = __txn_force_abort(env, + dblp->bufp + flush_lsn.offset - lp->w_off)) != 0) + return (__env_panic(env, t_ret)); + } else { + /* + * The buffer was written, but its not on disk, we + * must read it back and force things from a commit + * state to an abort state. Lots of things could fail + * here and we will be left with a commit record but + * a panic return. + */ + if ( + (t_ret = __os_seek(env, + dblp->lfhp, 0, 0, flush_lsn.offset)) != 0 || + (t_ret = __os_read(env, dblp->lfhp, &hdr, + HDR_NORMAL_SZ, &nr)) != 0 || nr != HDR_NORMAL_SZ) + return (__env_panic(env, t_ret == 0 ? EIO : t_ret)); + if (LOG_SWAPPED(env)) + __log_hdrswap(&hdr, CRYPTO_ON(env)); + if ((t_ret = __os_malloc(env, hdr.len, &buffer)) != 0 || + (t_ret = __os_seek(env, + dblp->lfhp, 0, 0, flush_lsn.offset)) != 0 || + (t_ret = __os_read(env, dblp->lfhp, buffer, + hdr.len, &nr)) != 0 || nr != hdr.len || + (t_ret = __txn_force_abort(env, buffer)) != 0 || + (t_ret = __os_seek(env, + dblp->lfhp, 0, 0, flush_lsn.offset)) != 0 || + (t_ret = __os_write(env, dblp->lfhp, buffer, + nr, &nw)) != 0 || nw != nr) + return (__env_panic(env, t_ret == 0 ? EIO : t_ret)); + __os_free(env, buffer); + } + /* + * Try to flush the log again, if the disk just bounced then we + * want to be sure it does not go away again before we write the + * abort record. + */ + (void)__log_flush_int(dblp, &flush_lsn, 0); + + return (ret); +} + +/* + * __log_newfile -- + * Initialize and switch to a new log file. (Note that this is + * called both when no log yet exists and when we fill a log file.) + * + * PUBLIC: int __log_newfile __P((DB_LOG *, DB_LSN *, u_int32_t, u_int32_t)); + */ +int +__log_newfile(dblp, lsnp, logfile, version) + DB_LOG *dblp; + DB_LSN *lsnp; + u_int32_t logfile; + u_int32_t version; +{ + DBT t; + DB_CIPHER *db_cipher; + DB_LSN lsn; + ENV *env; + HDR hdr; + LOG *lp; + LOGP *tpersist; + int need_free, ret; + u_int32_t lastoff; + size_t tsize; + + env = dblp->env; + lp = dblp->reginfo.primary; + + /* + * If we're not specifying a specific log file number and we're + * not at the beginning of a file already, start a new one. + */ + if (logfile == 0 && lp->lsn.offset != 0) { + /* + * Flush the log so this file is out and can be closed. We + * cannot release the region lock here because we need to + * protect the end of the file while we switch. In + * particular, a thread with a smaller record than ours + * could detect that there is space in the log. Even + * blocking that event by declaring the file full would + * require all threads to wait here so that the lsn.file + * can be moved ahead after the flush completes. This + * probably can be changed if we had an lsn for the + * previous file and one for the current, but it does not + * seem like this would get much more throughput, if any. + */ + if ((ret = __log_flush_int(dblp, NULL, 0)) != 0) + return (ret); + + /* + * Save the last known offset from the previous file, we'll + * need it to initialize the persistent header information. + */ + lastoff = lp->lsn.offset; + + /* Point the current LSN to the new file. */ + ++lp->lsn.file; + lp->lsn.offset = 0; + + /* Reset the file write offset. */ + lp->w_off = 0; + } else + lastoff = 0; + + /* + * Replication may require we reset the log file name space entirely. + * In that case we also force a file switch so that replication can + * clean up old files. + */ + if (logfile != 0) { + lp->lsn.file = logfile; + lp->lsn.offset = 0; + lp->w_off = 0; + if (lp->db_log_inmemory) { + lsn = lp->lsn; + (void)__log_zero(env, &lsn); + } else { + lp->s_lsn = lp->lsn; + if ((ret = __log_newfh(dblp, 1)) != 0) + return (ret); + } + } + + DB_ASSERT(env, lp->db_log_inmemory || lp->b_off == 0); + if (lp->db_log_inmemory && + (ret = __log_inmem_newfile(dblp, lp->lsn.file)) != 0) + return (ret); + + /* + * Insert persistent information as the first record in every file. + * Note that the previous length is wrong for the very first record + * of the log, but that's okay, we check for it during retrieval. + */ + memset(&t, 0, sizeof(t)); + memset(&hdr, 0, sizeof(HDR)); + + need_free = 0; + tsize = sizeof(LOGP); + db_cipher = env->crypto_handle; + if (CRYPTO_ON(env)) + tsize += db_cipher->adj_size(tsize); + if ((ret = __os_calloc(env, 1, tsize, &tpersist)) != 0) + return (ret); + need_free = 1; + /* + * If we're told what version to make this file, then we + * need to be at that version. Update here. + */ + if (version != 0) { + __log_set_version(env, version); + if ((ret = __env_init_rec(env, version)) != 0) + goto err; + } + lp->persist.log_size = lp->log_size = lp->log_nsize; + memcpy(tpersist, &lp->persist, sizeof(LOGP)); + DB_SET_DBT(t, tpersist, tsize); + if (LOG_SWAPPED(env)) + __log_persistswap(tpersist); + + if ((ret = + __log_encrypt_record(env, &t, &hdr, (u_int32_t)tsize)) != 0) + goto err; +#ifdef HAVE_LOG_CHECKSUM + if (lp->persist.version != DB_LOGVERSION) + __db_chksum(NULL, t.data, t.size, + (CRYPTO_ON(env)) ? db_cipher->mac_key : NULL, hdr.chksum); + else + __db_chksum(&hdr, t.data, t.size, + (CRYPTO_ON(env)) ? db_cipher->mac_key : NULL, hdr.chksum); +#endif + + if ((ret = __log_putr(dblp, &lsn, + &t, lastoff == 0 ? 0 : lastoff - lp->len, &hdr)) != 0) + goto err; + + /* Update the LSN information returned to the caller. */ + if (lsnp != NULL) + *lsnp = lp->lsn; + +err: if (need_free) + __os_free(env, tpersist); + return (ret); +} + +/* + * __log_putr -- + * Actually put a record into the log. + */ +static int +__log_putr(dblp, lsn, dbt, prev, h) + DB_LOG *dblp; + DB_LSN *lsn; + const DBT *dbt; + u_int32_t prev; + HDR *h; +{ + DB_CIPHER *db_cipher; + DB_LSN f_lsn; + ENV *env; + HDR tmp, *hdr; + LOG *lp; + int ret, t_ret; + db_size_t b_off; + size_t nr; + u_int32_t w_off; + + env = dblp->env; + lp = dblp->reginfo.primary; + + /* + * If we weren't given a header, use a local one. + */ + db_cipher = env->crypto_handle; + if (h == NULL) { + hdr = &tmp; + memset(hdr, 0, sizeof(HDR)); + if (CRYPTO_ON(env)) + hdr->size = HDR_CRYPTO_SZ; + else + hdr->size = HDR_NORMAL_SZ; + } else + hdr = h; + + /* Save our position in case we fail. */ + b_off = lp->b_off; + w_off = lp->w_off; + f_lsn = lp->f_lsn; + + /* + * Initialize the header. If we just switched files, lsn.offset will + * be 0, and what we really want is the offset of the previous record + * in the previous file. Fortunately, prev holds the value we want. + */ + hdr->prev = prev; + hdr->len = (u_int32_t)hdr->size + dbt->size; + + /* + * If we were passed in a nonzero checksum, our caller calculated + * the checksum before acquiring the log mutex, as an optimization. + * + * If our caller calculated a real checksum of 0, we'll needlessly + * recalculate it. C'est la vie; there's no out-of-bounds value + * here. + */ + if (hdr->chksum[0] == 0) { +#ifdef HAVE_LOG_CHECKSUM + if (lp->persist.version != DB_LOGVERSION) + __db_chksum(NULL, dbt->data, dbt->size, + (CRYPTO_ON(env)) ? db_cipher->mac_key : NULL, + hdr->chksum); + else + __db_chksum(hdr, dbt->data, dbt->size, + (CRYPTO_ON(env)) ? db_cipher->mac_key : NULL, + hdr->chksum); +#endif + } else if (lp->persist.version == DB_LOGVERSION) { + /* + * We need to correct for prev and len since they are not + * set before here. + */ + LOG_HDR_SUM(CRYPTO_ON(env), hdr, hdr->chksum); + } + + if (lp->db_log_inmemory && (ret = __log_inmem_chkspace(dblp, + (u_int32_t)hdr->size + dbt->size)) != 0) + goto err; + + /* + * The offset into the log file at this point is the LSN where + * we're about to put this record, and is the LSN the caller wants. + */ + *lsn = lp->lsn; + + nr = hdr->size; + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + + /* nr can't overflow a 32 bit value - header size is internal. */ + ret = __log_fill(dblp, lsn, hdr, (u_int32_t)nr); + + if (LOG_SWAPPED(env)) + __log_hdrswap(hdr, CRYPTO_ON(env)); + + if (ret != 0) + goto err; + + if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0) + goto err; + + lp->len = (u_int32_t)(hdr->size + dbt->size); + lp->lsn.offset += lp->len; + return (0); +err: + /* + * If we wrote more than one buffer before failing, get the + * first one back. The extra buffers will fail the checksums + * and be ignored. + */ + if (w_off + lp->buffer_size < lp->w_off) { + DB_ASSERT(env, !lp->db_log_inmemory); + if ((t_ret = __os_seek(env, dblp->lfhp, 0, 0, w_off)) != 0 || + (t_ret = __os_read(env, dblp->lfhp, dblp->bufp, + b_off, &nr)) != 0) + return (__env_panic(env, t_ret)); + if (nr != b_off) { + __db_errx(env, DB_STR("2515", + "Short read while restoring log")); + return (__env_panic(env, EIO)); + } + } + + /* Reset to where we started. */ + lp->w_off = w_off; + lp->b_off = b_off; + lp->f_lsn = f_lsn; + + return (ret); +} + +/* + * __log_flush_pp -- + * ENV->log_flush pre/post processing. + * + * PUBLIC: int __log_flush_pp __P((DB_ENV *, const DB_LSN *)); + */ +int +__log_flush_pp(dbenv, lsn) + DB_ENV *dbenv; + const DB_LSN *lsn; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_flush", DB_INIT_LOG); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_flush(env, lsn)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * See if we need to wait. s_lsn is not locked so some care is needed. + * The sync point can only move forward. The lsnp->file cannot be + * greater than the s_lsn.file. If the file we want is in the past + * we are done. If the file numbers are the same check the offset. + * This all assumes we can read an 32-bit quantity in one state or + * the other, not in transition. + */ +#define ALREADY_FLUSHED(lp, lsnp) \ + (((lp)->s_lsn.file > (lsnp)->file) || \ + ((lp)->s_lsn.file == (lsnp)->file && \ + (lp)->s_lsn.offset > (lsnp)->offset)) + +/* + * __log_flush -- + * ENV->log_flush + * + * PUBLIC: int __log_flush __P((ENV *, const DB_LSN *)); + */ +int +__log_flush(env, lsn) + ENV *env; + const DB_LSN *lsn; +{ + DB_LOG *dblp; + LOG *lp; + int ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + if (lsn != NULL && ALREADY_FLUSHED(lp, lsn)) + return (0); + LOG_SYSTEM_LOCK(env); + ret = __log_flush_int(dblp, lsn, 1); + LOG_SYSTEM_UNLOCK(env); + return (ret); +} + +/* + * __log_flush_int -- + * Write all records less than or equal to the specified LSN; internal + * version. + * + * PUBLIC: int __log_flush_int __P((DB_LOG *, const DB_LSN *, int)); + */ +int +__log_flush_int(dblp, lsnp, release) + DB_LOG *dblp; + const DB_LSN *lsnp; + int release; +{ + struct __db_commit *commit; + ENV *env; + DB_LSN flush_lsn, f_lsn; + LOG *lp; + size_t b_off; + u_int32_t ncommit, w_off; + int do_flush, first, ret; + + env = dblp->env; + lp = dblp->reginfo.primary; + ncommit = 0; + ret = 0; + + if (lp->db_log_inmemory) { + lp->s_lsn = lp->lsn; + STAT(++lp->stat.st_scount); + return (0); + } + + /* + * If no LSN specified, flush the entire log by setting the flush LSN + * to the last LSN written in the log. Otherwise, check that the LSN + * isn't a non-existent record for the log. + */ + if (lsnp == NULL) { + flush_lsn.file = lp->lsn.file; + flush_lsn.offset = lp->lsn.offset - lp->len; + } else if (lsnp->file > lp->lsn.file || + (lsnp->file == lp->lsn.file && + lsnp->offset > lp->lsn.offset - lp->len)) { + __db_errx(env, DB_STR_A("2516", + "DB_ENV->log_flush: LSN of %lu/%lu past current end-of-log of %lu/%lu", + "%lu %lu %lu %lu"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)lp->lsn.file, + (u_long)lp->lsn.offset); + __db_errx(env, DB_STR("2517", + "Database environment corrupt; the wrong log files may " + "have been removed or incompatible database files " + "imported from another environment")); + return (__env_panic(env, DB_RUNRECOVERY)); + } else { + if (ALREADY_FLUSHED(lp, lsnp)) + return (0); + flush_lsn = *lsnp; + } + + /* + * If a flush is in progress and we're allowed to do so, drop + * the region lock and block waiting for the next flush. + */ + if (release && lp->in_flush != 0) { + if ((commit = SH_TAILQ_FIRST( + &lp->free_commits, __db_commit)) == NULL) { + if ((ret = __env_alloc(&dblp->reginfo, + sizeof(struct __db_commit), &commit)) != 0) + goto flush; + memset(commit, 0, sizeof(*commit)); + if ((ret = __mutex_alloc(env, MTX_TXN_COMMIT, + DB_MUTEX_SELF_BLOCK, &commit->mtx_txnwait)) != 0) { + __env_alloc_free(&dblp->reginfo, commit); + return (ret); + } + MUTEX_LOCK(env, commit->mtx_txnwait); + } else + SH_TAILQ_REMOVE( + &lp->free_commits, commit, links, __db_commit); + + lp->ncommit++; + + /* + * Flushes may be requested out of LSN order; be + * sure we only move lp->t_lsn forward. + */ + if (LOG_COMPARE(&lp->t_lsn, &flush_lsn) < 0) + lp->t_lsn = flush_lsn; + + commit->lsn = flush_lsn; + SH_TAILQ_INSERT_HEAD( + &lp->commits, commit, links, __db_commit); + LOG_SYSTEM_UNLOCK(env); + /* Wait here for the in-progress flush to finish. */ + MUTEX_LOCK(env, commit->mtx_txnwait); + LOG_SYSTEM_LOCK(env); + + lp->ncommit--; + /* + * Grab the flag before freeing the struct to see if + * we need to flush the log to commit. If so, + * use the maximal lsn for any committing thread. + */ + do_flush = F_ISSET(commit, DB_COMMIT_FLUSH); + F_CLR(commit, DB_COMMIT_FLUSH); + SH_TAILQ_INSERT_HEAD( + &lp->free_commits, commit, links, __db_commit); + if (do_flush) { + lp->in_flush--; + flush_lsn = lp->t_lsn; + } else + return (0); + } + + /* + * Protect flushing with its own mutex so we can release + * the region lock except during file switches. + */ +flush: MUTEX_LOCK(env, lp->mtx_flush); + + /* + * If the LSN is less than or equal to the last-sync'd LSN, we're done. + * Note, the last-sync LSN saved in s_lsn is the LSN of the first byte + * after the byte we absolutely know was written to disk, so the test + * is <, not <=. + */ + if (flush_lsn.file < lp->s_lsn.file || + (flush_lsn.file == lp->s_lsn.file && + flush_lsn.offset < lp->s_lsn.offset)) { + MUTEX_UNLOCK(env, lp->mtx_flush); + goto done; + } + + /* + * We may need to write the current buffer. We have to write the + * current buffer if the flush LSN is greater than or equal to the + * buffer's starting LSN. + * + * Otherwise, it's still possible that this thread may never have + * written to this log file. Acquire a file descriptor if we don't + * already have one. + */ + if (lp->b_off != 0 && LOG_COMPARE(&flush_lsn, &lp->f_lsn) >= 0) { + if ((ret = __log_write(dblp, + dblp->bufp, (u_int32_t)lp->b_off)) != 0) { + MUTEX_UNLOCK(env, lp->mtx_flush); + goto done; + } + + lp->b_off = 0; + } else if (dblp->lfhp == NULL || dblp->lfname != lp->lsn.file) + if ((ret = __log_newfh(dblp, 0)) != 0) { + MUTEX_UNLOCK(env, lp->mtx_flush); + goto done; + } + + /* + * We are going to flush, release the region. + * First get the current state of the buffer since + * another write may come in, but we may not flush it. + */ + b_off = lp->b_off; + w_off = lp->w_off; + f_lsn = lp->f_lsn; + lp->in_flush++; + if (release) + LOG_SYSTEM_UNLOCK(env); + + /* Sync all writes to disk. */ + if ((ret = __os_fsync(env, dblp->lfhp)) != 0) { + MUTEX_UNLOCK(env, lp->mtx_flush); + if (release) + LOG_SYSTEM_LOCK(env); + lp->in_flush--; + goto done; + } + + /* + * Set the last-synced LSN. + * This value must be set to the LSN past the last complete + * record that has been flushed. This is at least the first + * lsn, f_lsn. If the buffer is empty, b_off == 0, then + * we can move up to write point since the first lsn is not + * set for the new buffer. + */ + lp->s_lsn = f_lsn; + if (b_off == 0) + lp->s_lsn.offset = w_off; + + MUTEX_UNLOCK(env, lp->mtx_flush); + if (release) + LOG_SYSTEM_LOCK(env); + + lp->in_flush--; + STAT(++lp->stat.st_scount); + + /* + * How many flush calls (usually commits) did this call actually sync? + * At least one, if it got here. + */ + ncommit = 1; +done: + if (lp->ncommit != 0) { + first = 1; + SH_TAILQ_FOREACH(commit, &lp->commits, links, __db_commit) + if (LOG_COMPARE(&lp->s_lsn, &commit->lsn) > 0) { + MUTEX_UNLOCK(env, commit->mtx_txnwait); + SH_TAILQ_REMOVE( + &lp->commits, commit, links, __db_commit); + ncommit++; + } else if (first == 1) { + F_SET(commit, DB_COMMIT_FLUSH); + MUTEX_UNLOCK(env, commit->mtx_txnwait); + SH_TAILQ_REMOVE( + &lp->commits, commit, links, __db_commit); + /* + * This thread will wake and flush. + * If another thread commits and flushes + * first we will waste a trip trough the + * mutex. + */ + lp->in_flush++; + first = 0; + } + } +#ifdef HAVE_STATISTICS + if (lp->stat.st_maxcommitperflush < ncommit) + lp->stat.st_maxcommitperflush = ncommit; + if (lp->stat.st_mincommitperflush > ncommit || + lp->stat.st_mincommitperflush == 0) + lp->stat.st_mincommitperflush = ncommit; +#endif + + return (ret); +} + +/* + * __log_fill -- + * Write information into the log. + */ +static int +__log_fill(dblp, lsn, addr, len) + DB_LOG *dblp; + DB_LSN *lsn; + void *addr; + u_int32_t len; +{ + LOG *lp; + u_int32_t bsize, nrec; + size_t nw, remain; + int ret; + + lp = dblp->reginfo.primary; + bsize = lp->buffer_size; + + if (lp->db_log_inmemory) { + __log_inmem_copyin(dblp, lp->b_off, addr, len); + lp->b_off = (lp->b_off + len) % lp->buffer_size; + return (0); + } + + while (len > 0) { /* Copy out the data. */ + /* + * If we're beginning a new buffer, note the user LSN to which + * the first byte of the buffer belongs. We have to know this + * when flushing the buffer so that we know if the in-memory + * buffer needs to be flushed. + */ + if (lp->b_off == 0) + lp->f_lsn = *lsn; + + /* + * If we're on a buffer boundary and the data is big enough, + * copy as many records as we can directly from the data. + */ + if (lp->b_off == 0 && len >= bsize) { + nrec = len / bsize; + if ((ret = __log_write(dblp, addr, nrec * bsize)) != 0) + return (ret); + addr = (u_int8_t *)addr + nrec * bsize; + len -= nrec * bsize; + STAT(++lp->stat.st_wcount_fill); + continue; + } + + /* Figure out how many bytes we can copy this time. */ + remain = bsize - lp->b_off; + nw = remain > len ? len : remain; + memcpy(dblp->bufp + lp->b_off, addr, nw); + addr = (u_int8_t *)addr + nw; + len -= (u_int32_t)nw; + lp->b_off += (u_int32_t)nw; + + /* If we fill the buffer, flush it. */ + if (lp->b_off == bsize) { + if ((ret = __log_write(dblp, dblp->bufp, bsize)) != 0) + return (ret); + lp->b_off = 0; + STAT(++lp->stat.st_wcount_fill); + } + } + return (0); +} + +/* + * __log_write -- + * Write the log buffer to disk. + */ +static int +__log_write(dblp, addr, len) + DB_LOG *dblp; + void *addr; + u_int32_t len; +{ + ENV *env; + LOG *lp; + size_t nw; + int ret; + + env = dblp->env; + lp = dblp->reginfo.primary; + + DB_ASSERT(env, !lp->db_log_inmemory); + + /* + * If we haven't opened the log file yet or the current one has + * changed, acquire a new log file. We are creating the file if we're + * about to write to the start of it, in other words, if the write + * offset is zero. + */ + if (dblp->lfhp == NULL || dblp->lfname != lp->lsn.file || + dblp->lf_timestamp != lp->timestamp) + if ((ret = __log_newfh(dblp, lp->w_off == 0)) != 0) + return (ret); + + /* + * If we're writing the first block in a log file on a filesystem that + * guarantees unwritten blocks are zero-filled, we set the size of the + * file in advance. This increases sync performance on some systems, + * because they don't need to update metadata on every sync. + * + * Ignore any error -- we may have run out of disk space, but that's no + * reason to quit. + */ +#ifdef HAVE_FILESYSTEM_NOTZERO + if (lp->w_off == 0 && !__os_fs_notzero()) { +#else + if (lp->w_off == 0) { +#endif + (void)__db_file_extend(env, dblp->lfhp, lp->log_size); + if (F_ISSET(dblp, DBLOG_ZERO)) + (void)__db_zero_extend(env, dblp->lfhp, + 0, lp->log_size/lp->buffer_size, lp->buffer_size); + + } + + /* + * Seek to the offset in the file (someone may have written it + * since we last did). + */ + if ((ret = __os_io(env, DB_IO_WRITE, + dblp->lfhp, 0, 0, lp->w_off, len, addr, &nw)) != 0) + return (ret); + + /* Reset the buffer offset and update the seek offset. */ + lp->w_off += len; + + /* Update written statistics. */ + if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) { + lp->stat.st_wc_bytes -= MEGABYTE; + ++lp->stat.st_wc_mbytes; + } +#ifdef HAVE_STATISTICS + if ((lp->stat.st_w_bytes += len) >= MEGABYTE) { + lp->stat.st_w_bytes -= MEGABYTE; + ++lp->stat.st_w_mbytes; + } + ++lp->stat.st_wcount; +#endif + + return (0); +} + +/* + * __log_file_pp -- + * ENV->log_file pre/post processing. + * + * PUBLIC: int __log_file_pp __P((DB_ENV *, const DB_LSN *, char *, size_t)); + */ +int +__log_file_pp(dbenv, lsn, namep, len) + DB_ENV *dbenv; + const DB_LSN *lsn; + char *namep; + size_t len; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret, set; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_file", DB_INIT_LOG); + + if ((ret = __log_get_config(dbenv, DB_LOG_IN_MEMORY, &set)) != 0) + return (ret); + if (set) { + __db_errx(env, DB_STR("2518", + "DB_ENV->log_file is illegal with in-memory logs")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_file(env, lsn, namep, len)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_file -- + * ENV->log_file. + */ +static int +__log_file(env, lsn, namep, len) + ENV *env; + const DB_LSN *lsn; + char *namep; + size_t len; +{ + DB_LOG *dblp; + int ret; + char *name; + + dblp = env->lg_handle; + LOG_SYSTEM_LOCK(env); + ret = __log_name(dblp, lsn->file, &name, NULL, 0); + LOG_SYSTEM_UNLOCK(env); + if (ret != 0) + return (ret); + + /* Check to make sure there's enough room and copy the name. */ + if (len < strlen(name) + 1) { + *namep = '\0'; + __db_errx(env, DB_STR("2519", + "DB_ENV->log_file: name buffer is too short")); + return (EINVAL); + } + (void)strcpy(namep, name); + __os_free(env, name); + + return (0); +} + +/* + * __log_newfh -- + * Acquire a file handle for the current log file. + */ +static int +__log_newfh(dblp, create) + DB_LOG *dblp; + int create; +{ + ENV *env; + LOG *lp; + u_int32_t flags; + int ret; + logfile_validity status; + + env = dblp->env; + lp = dblp->reginfo.primary; + + /* Close any previous file descriptor. */ + if (dblp->lfhp != NULL) { + (void)__os_closehandle(env, dblp->lfhp); + dblp->lfhp = NULL; + } + + flags = DB_OSO_SEQ | + (create ? DB_OSO_CREATE : 0) | + (F_ISSET(dblp, DBLOG_DIRECT) ? DB_OSO_DIRECT : 0) | + (F_ISSET(dblp, DBLOG_DSYNC) ? DB_OSO_DSYNC : 0); + + /* Get the path of the new file and open it. */ + dblp->lfname = lp->lsn.file; + if ((ret = __log_valid(dblp, dblp->lfname, 0, &dblp->lfhp, + flags, &status, NULL)) != 0) + __db_err(env, ret, + "DB_ENV->log_newfh: %lu", (u_long)lp->lsn.file); + else if (status != DB_LV_NORMAL && status != DB_LV_INCOMPLETE && + status != DB_LV_OLD_READABLE) + ret = DB_NOTFOUND; + + return (ret); +} + +/* + * __log_name -- + * Return the log name for a particular file, and optionally open it. + * + * PUBLIC: int __log_name __P((DB_LOG *, + * PUBLIC: u_int32_t, char **, DB_FH **, u_int32_t)); + */ +int +__log_name(dblp, filenumber, namep, fhpp, flags) + DB_LOG *dblp; + u_int32_t filenumber, flags; + char **namep; + DB_FH **fhpp; +{ + ENV *env; + LOG *lp; + int mode, ret; + char *oname; + char old[sizeof(LFPREFIX) + 5 + 20], new[sizeof(LFPREFIX) + 10 + 20]; + + env = dblp->env; + lp = dblp->reginfo.primary; + + DB_ASSERT(env, !lp->db_log_inmemory); + + /* + * !!! + * The semantics of this routine are bizarre. + * + * The reason for all of this is that we need a place where we can + * intercept requests for log files, and, if appropriate, check for + * both the old-style and new-style log file names. The trick is + * that all callers of this routine that are opening the log file + * read-only want to use an old-style file name if they can't find + * a match using a new-style name. The only down-side is that some + * callers may check for the old-style when they really don't need + * to, but that shouldn't mess up anything, and we only check for + * the old-style name when we've already failed to find a new-style + * one. + * + * Create a new-style file name, and if we're not going to open the + * file, return regardless. + */ + (void)snprintf(new, sizeof(new), LFNAME, filenumber); + if ((ret = __db_appname(env, + DB_APP_LOG, new, NULL, namep)) != 0 || fhpp == NULL) + return (ret); + + /* The application may have specified an absolute file mode. */ + if (lp->filemode == 0) + mode = env->db_mode; + else { + LF_SET(DB_OSO_ABSMODE); + mode = lp->filemode; + } + + /* Open the new-style file -- if we succeed, we're done. */ + dblp->lf_timestamp = lp->timestamp; + if ((ret = __os_open(env, *namep, 0, flags, mode, fhpp)) == 0) + return (0); + + /* + * If the open failed for reason other than the file + * not being there, complain loudly, the wrong user + * probably started up the application. + */ + if (ret != ENOENT) { + __db_err(env, ret, DB_STR_A("2520", + "%s: log file unreadable", "%s"), *namep); + return (__env_panic(env, ret)); + } + + /* + * The open failed... if the DB_RDONLY flag isn't set, we're done, + * the caller isn't interested in old-style files. + */ + if (!LF_ISSET(DB_OSO_RDONLY)) { + __db_err(env, ret, DB_STR_A("2521", + "%s: log file open failed", "%s"), *namep); + return (__env_panic(env, ret)); + } + + /* Create an old-style file name. */ + (void)snprintf(old, sizeof(old), LFNAME_V1, filenumber); + if ((ret = __db_appname(env, + DB_APP_LOG, old, NULL, &oname)) != 0) + goto err; + + /* + * Open the old-style file -- if we succeed, we're done. Free the + * space allocated for the new-style name and return the old-style + * name to the caller. + */ + if ((ret = __os_open(env, oname, 0, flags, mode, fhpp)) == 0) { + __os_free(env, *namep); + *namep = oname; + return (0); + } + + /* + * Couldn't find either style of name -- return the new-style name + * for the caller's error message. If it's an old-style name that's + * actually missing we're going to confuse the user with the error + * message, but that implies that not only were we looking for an + * old-style name, but we expected it to exist and we weren't just + * looking for any log file. That's not a likely error. + */ +err: __os_free(env, oname); + return (ret); +} + +/* + * __log_rep_put -- + * Short-circuit way for replication clients to put records into the + * log. Replication clients' logs need to be laid out exactly as their masters' + * are, so we let replication take responsibility for when the log gets + * flushed, when log switches files, etc. This is just a thin PUBLIC wrapper + * for __log_putr with a slightly prettier interface. + * + * Note that the REP->mtx_clientdb should be held when this is called. + * Note that we acquire the log region mutex while holding mtx_clientdb. + * + * PUBLIC: int __log_rep_put __P((ENV *, DB_LSN *, const DBT *, u_int32_t)); + */ +int +__log_rep_put(env, lsnp, rec, flags) + ENV *env; + DB_LSN *lsnp; + const DBT *rec; + u_int32_t flags; +{ + DBT *dbt, t; + DB_CIPHER *db_cipher; + DB_LOG *dblp; + HDR hdr; + LOG *lp; + int need_free, ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + LOG_SYSTEM_LOCK(env); + memset(&hdr, 0, sizeof(HDR)); + t = *rec; + dbt = &t; + need_free = 0; + db_cipher = env->crypto_handle; + if (CRYPTO_ON(env)) + t.size += db_cipher->adj_size(rec->size); + if ((ret = __os_calloc(env, 1, t.size, &t.data)) != 0) + goto err; + need_free = 1; + memcpy(t.data, rec->data, rec->size); + + if ((ret = __log_encrypt_record(env, dbt, &hdr, rec->size)) != 0) + goto err; +#ifdef HAVE_LOG_CHECKSUM + __db_chksum(&hdr, t.data, t.size, + (CRYPTO_ON(env)) ? db_cipher->mac_key : NULL, hdr.chksum); +#endif + + DB_ASSERT(env, LOG_COMPARE(lsnp, &lp->lsn) == 0); + ret = __log_putr(dblp, lsnp, dbt, lp->lsn.offset - lp->len, &hdr); +err: + /* + * !!! Assume caller holds REP->mtx_clientdb to modify ready_lsn. + */ + lp->ready_lsn = lp->lsn; + + if (LF_ISSET(DB_LOG_CHKPNT)) + lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0; + + /* Increment count of records added to the log. */ + STAT(++lp->stat.st_record); + LOG_SYSTEM_UNLOCK(env); + if (need_free) + __os_free(env, t.data); + return (ret); +} + +static int +__log_encrypt_record(env, dbt, hdr, orig) + ENV *env; + DBT *dbt; + HDR *hdr; + u_int32_t orig; +{ + DB_CIPHER *db_cipher; + int ret; + + if (CRYPTO_ON(env)) { + db_cipher = env->crypto_handle; + hdr->size = HDR_CRYPTO_SZ; + hdr->orig_size = orig; + if ((ret = db_cipher->encrypt(env, db_cipher->data, + hdr->iv, dbt->data, dbt->size)) != 0) + return (ret); + } else { + hdr->size = HDR_NORMAL_SZ; + } + return (0); +} +/* + * __log_put_record_pp -- + * DB_ENV->log_put_record pre/post processing. + * + * PUBLIC: int __log_put_record_pp __P((DB_ENV *, DB *, DB_TXN *, DB_LSN *, + * PUBLIC: u_int32_t, u_int32_t, u_int32_t, u_int32_t, + * PUBLIC: DB_LOG_RECSPEC *, ...)); + */ +#ifdef STDC_HEADERS +int +__log_put_record_pp(DB_ENV *dbenv, DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, + u_int32_t flags, u_int32_t rectype, u_int32_t has_data, u_int32_t size, + DB_LOG_RECSPEC *spec, ...) +#else +int +__log_put_record_pp(dbenv, dbp, txnp, ret_lsnp, + flags, rectype, has_data, size, + spec, va_alist) + DB_ENV *dbenv; + DB *dbp; + DB_TXN *txnp; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t rectype; + u_int32_t has_data; + u_int32_t size; + DB_LOG_RECSPEC *spec; + va_dcl +#endif +{ + DB_THREAD_INFO *ip; + ENV *env; + va_list argp; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_put_record", DB_INIT_LOG); + + /* Validate arguments: check for allowed flags. */ + if ((ret = __db_fchk(env, "DB_ENV->log_put_record", flags, + DB_LOG_CHKPNT | DB_LOG_COMMIT | + DB_FLUSH | DB_LOG_NOCOPY | DB_LOG_WRNOSYNC)) != 0) + return (ret); + + /* DB_LOG_WRNOSYNC and DB_FLUSH are mutually exclusive. */ + if (LF_ISSET(DB_LOG_WRNOSYNC) && LF_ISSET(DB_FLUSH)) + return (__db_ferr(env, "DB_ENV->log_put_record", 1)); + + /* Replication clients should never write log records. */ + if (IS_REP_CLIENT(env)) { + __db_errx(env, DB_STR("2522", + "DB_ENV->log_put is illegal on replication clients")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + va_start(argp, spec); + REPLICATION_WRAP(env, (__log_put_record_int(env, dbp, + txnp, ret_lsnp, flags, rectype, has_data, size, spec, argp)), + 0, ret); + va_end(argp); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * PUBLIC: int __log_put_record __P((ENV *, DB *, DB_TXN *, DB_LSN *, + * PUBLIC: u_int32_t, u_int32_t, u_int32_t, u_int32_t, + * PUBLIC: DB_LOG_RECSPEC *, ...)); + */ +#ifdef STDC_HEADERS +int +__log_put_record(ENV *env, DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, + u_int32_t flags, u_int32_t rectype, u_int32_t has_data, u_int32_t size, + DB_LOG_RECSPEC *spec, ...) +#else +int +__log_put_record(env, dbp, txnp, ret_lsnp, + flags, rectype, has_data, size, spec, va_alist); + ENV *env; + DB *dbp; + DB_TXN *txnp; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t rectype; + u_int32_t has_data; + u_int32_t size; + DB_LOG_RECSPEC *spec; + va_dcl +#endif +{ + va_list argp; + int ret; + + va_start(argp, spec); + ret = __log_put_record_int(env, dbp, txnp, ret_lsnp, flags, + rectype, has_data, size, spec, argp); + va_end(argp); + return (ret); +} + +#ifdef STDC_HEADERS +static int +__log_put_record_int(ENV *env, DB *dbp, DB_TXN *txnp, DB_LSN *ret_lsnp, + u_int32_t flags, u_int32_t rectype, u_int32_t has_data, u_int32_t size, + DB_LOG_RECSPEC *spec, va_list argp) +#else +int +__log_put_record_int(env, dbp, txnp, ret_lsnp, + flags, rectype, has_data, size, spec, argp); + ENV *env; + DB *dbp; + DB_TXN *txnp; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t has_data; + u_int32_t size; + u_int32_t rectype; + DB_LOG_RECSPEC *spec; + va_list argp; +#endif +{ + DBT *data, *dbt, *header, logrec; + DB_LOG_RECSPEC *sp; + DB_LSN *lsnp, lsn, null_lsn, *pagelsn, *rlsnp; + DB_TXNLOGREC *lr; + LOG *lp; + PAGE *pghdrstart; + u_int32_t hdrsize, op, zero, uinttmp, txn_num; + u_int npad; + u_int8_t *bp; + int is_durable, ret; + void *hdrstart; + + COMPQUIET(lr, NULL); + COMPQUIET(hdrsize, 0); + COMPQUIET(op, 0); + COMPQUIET(hdrstart, NULL); + COMPQUIET(pghdrstart, NULL); + COMPQUIET(header, NULL); + + /* + * rlsnp will be stored into while holding the log system lock. + * If this is a commit record then ret_lsnp will be the address of + * the transaction detail visible_lsn field. If not then this + * may be the lsn of a page and we do not want to set it if + * the log_put fails after writing the record (due to an I/O error). + */ + if (LF_ISSET(DB_LOG_COMMIT)) + rlsnp = ret_lsnp; + else + rlsnp = &lsn; + npad = 0; + ret = 0; + data = NULL; + + if (LF_ISSET(DB_LOG_NOT_DURABLE) || + (dbp != NULL && F_ISSET(dbp, DB_AM_NOT_DURABLE))) { + if (txnp == NULL) + return (0); + is_durable = 0; + } else + is_durable = 1; + + if (txnp == NULL) { + txn_num = 0; + lsnp = &null_lsn; + null_lsn.file = null_lsn.offset = 0; + } else { + if (TAILQ_FIRST(&txnp->kids) != NULL && + (ret = __txn_activekids(env, rectype, txnp)) != 0) + return (ret); + /* + * We need to assign begin_lsn while holding region mutex. + * That assignment is done inside the DbEnv->log_put call, + * so pass in the appropriate memory location to be filled + * in by the log_put code. + */ + DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); + txn_num = txnp->txnid; + } + + if (dbp != NULL) { + DB_ASSERT(env, dbp->log_filename != NULL); + if (dbp->log_filename->id == DB_LOGFILEID_INVALID && + (ret = __dbreg_lazy_id(dbp)) != 0) + return (ret); + } + + logrec.size = size; + + if (CRYPTO_ON(env)) { + npad = env->crypto_handle->adj_size(logrec.size); + logrec.size += npad; + } + + if (is_durable || txnp == NULL) { + if ((ret = + __os_malloc(env, logrec.size, &logrec.data)) != 0) + return (ret); + } else { + if ((ret = __os_malloc(env, + logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) + return (ret); +#ifdef DIAGNOSTIC + if ((ret = + __os_malloc(env, logrec.size, &logrec.data)) != 0) { + __os_free(env, lr); + return (ret); + } +#else + logrec.data = lr->data; +#endif + } + if (npad > 0) + memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); + + bp = logrec.data; + + LOGCOPY_32(env, bp, &rectype); + bp += sizeof(rectype); + + LOGCOPY_32(env, bp, &txn_num); + bp += sizeof(txn_num); + + LOGCOPY_FROMLSN(env, bp, lsnp); + bp += sizeof(DB_LSN); + + zero = 0; + lp = env->lg_handle->reginfo.primary; + for (sp = spec; sp->type != LOGREC_Done; sp++) { + switch (sp->type) { + case LOGREC_DB: + /* This is not in the varargs. */ + uinttmp = (u_int32_t)dbp->log_filename->id; + LOGCOPY_32(env, bp, &uinttmp); + bp += sizeof(uinttmp); + break; + + case LOGREC_ARG: + case LOGREC_TIME: + case LOGREC_DBOP: + uinttmp = va_arg(argp, u_int32_t); + LOGCOPY_32(env, bp, &uinttmp); + bp += sizeof(uinttmp); + break; + case LOGREC_OP: + op = va_arg(argp, u_int32_t); + LOGCOPY_32(env, bp, &op); + bp += sizeof(uinttmp); + break; + case LOGREC_DBT: + case LOGREC_PGLIST: + case LOGREC_LOCKS: + case LOGREC_HDR: + case LOGREC_DATA: + dbt = va_arg(argp, DBT *); + if (dbt == NULL) { + LOGCOPY_32(env, bp, &zero); + bp += sizeof(u_int32_t); + } else { + LOGCOPY_32(env, bp, &dbt->size); + bp += sizeof(dbt->size); + memcpy(bp, dbt->data, dbt->size); + } + /* Process fields that need to be byte swapped. */ + if (dbp != NULL && F_ISSET(dbp, DB_AM_SWAP)) { + if (sp->type == LOGREC_HDR && + dbt != NULL && has_data == 0) + __db_recordswap(op, + dbt->size, bp, NULL, 0); + else if (sp->type == LOGREC_HDR) { + hdrstart = bp; + hdrsize = dbt == NULL ? 0 : dbt->size; + } else if (sp->type == LOGREC_DATA) { + __db_recordswap(op, + hdrsize, hdrstart, bp, 0); + has_data = 0; + } + } + if (dbt != NULL) + bp += dbt->size; + + break; + /* + * Page header and data -- we assume that the header + * is listed first and the data follows sometime later. + * There should be only one header/data pair per record. + */ + case LOGREC_PGDBT: + header = va_arg(argp, DBT *); + if (header == NULL) { + LOGCOPY_32(env, bp, &zero); + bp += sizeof(u_int32_t); + } else { + LOGCOPY_32(env, bp, &header->size); + bp += sizeof(header->size); + pghdrstart = (PAGE *)bp; + memcpy(bp, header->data, header->size); + if (has_data == 0 && + F_ISSET(dbp, DB_AM_SWAP) && + (ret = __db_pageswap( + env, dbp, pghdrstart, (size_t)header->size, + NULL, 0)) != 0) + return (ret); + bp += header->size; + } + break; + + case LOGREC_PGDDBT: + data = va_arg(argp, DBT *); + if (data == NULL) { + zero = 0; + LOGCOPY_32(env, bp, &zero); + bp += sizeof(u_int32_t); + } else { + if (F_ISSET(dbp, DB_AM_SWAP) && + (ret = __db_pageswap(env, dbp, pghdrstart, + (size_t)header->size, (DBT *)data, 0)) != 0) + return (ret); + LOGCOPY_32(env, bp, &data->size); + bp += sizeof(data->size); + memcpy(bp, data->data, data->size); + if (F_ISSET(dbp, DB_AM_SWAP) && + F_ISSET(data, DB_DBT_APPMALLOC)) + __os_free(env, data->data); + bp += data->size; + } + break; + case LOGREC_POINTER: + pagelsn = va_arg(argp, DB_LSN *); + if (pagelsn != NULL) { + if (txnp != NULL) { + if (LOG_COMPARE(pagelsn, + &lp->lsn) >= 0 && (ret = + __log_check_page_lsn(env, + dbp, pagelsn)) != 0) + return (ret); + } + LOGCOPY_FROMLSN(env, bp, pagelsn); + } else + memset(bp, 0, sizeof(*pagelsn)); + bp += sizeof(*pagelsn); + break; + + default: + DB_ASSERT(env, sp->type != sp->type); + } + } + + DB_ASSERT(env, + (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); + + if (is_durable || txnp == NULL) { + if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, + flags | DB_LOG_NOCOPY)) == 0) { + if (txnp != NULL) + *lsnp = *rlsnp; + *ret_lsnp = *rlsnp; + } + } else { + ret = 0; +#ifdef DIAGNOSTIC + /* + * Set the debug bit if we are going to log non-durable + * transactions so they will be ignored by recovery. + */ + memcpy(lr->data, logrec.data, logrec.size); + rectype |= DB_debug_FLAG; + LOGCOPY_32(env, logrec.data, &rectype); + + if (!IS_REP_CLIENT(env) && !lp->db_log_inmemory) + ret = __log_put(env, + rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); +#endif + STAILQ_INSERT_HEAD(&txnp->logs, lr, links); + F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); + LSN_NOT_LOGGED(*ret_lsnp); + } + +#ifdef LOG_DIAGNOSTIC + if (ret != 0) + (void)__db_addrem_print(env, + (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); +#endif + +#ifdef DIAGNOSTIC + __os_free(env, logrec.data); +#else + if (is_durable || txnp == NULL) + __os_free(env, logrec.data); +#endif + return (ret); +} diff --git a/src/log/log_stat.c b/src/log/log_stat.c new file mode 100644 index 00000000..74f4c5aa --- /dev/null +++ b/src/log/log_stat.c @@ -0,0 +1,336 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +#ifdef HAVE_STATISTICS +static int __log_print_all __P((ENV *, u_int32_t)); +static int __log_print_stats __P((ENV *, u_int32_t)); +static int __log_stat __P((ENV *, DB_LOG_STAT **, u_int32_t)); + +/* + * __log_stat_pp -- + * DB_ENV->log_stat pre/post processing. + * + * PUBLIC: int __log_stat_pp __P((DB_ENV *, DB_LOG_STAT **, u_int32_t)); + */ +int +__log_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_LOG_STAT **statp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_stat", DB_INIT_LOG); + + if ((ret = __db_fchk(env, + "DB_ENV->log_stat", flags, DB_STAT_CLEAR)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_stat(env, statp, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_stat -- + * DB_ENV->log_stat. + */ +static int +__log_stat(env, statp, flags) + ENV *env; + DB_LOG_STAT **statp; + u_int32_t flags; +{ + DB_LOG *dblp; + DB_LOG_STAT *stats; + LOG *lp; + int ret; + + *statp = NULL; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + if ((ret = __os_umalloc(env, sizeof(DB_LOG_STAT), &stats)) != 0) + return (ret); + + /* Copy out the global statistics. */ + LOG_SYSTEM_LOCK(env); + *stats = lp->stat; + if (LF_ISSET(DB_STAT_CLEAR)) + memset(&lp->stat, 0, sizeof(lp->stat)); + + stats->st_magic = lp->persist.magic; + stats->st_version = lp->persist.version; + stats->st_mode = lp->filemode; + stats->st_lg_bsize = lp->buffer_size; + stats->st_lg_size = lp->log_nsize; + + __mutex_set_wait_info(env, lp->mtx_region, + &stats->st_region_wait, &stats->st_region_nowait); + if (LF_ISSET(DB_STAT_CLEAR | DB_STAT_SUBSYSTEM) == DB_STAT_CLEAR) + __mutex_clear(env, lp->mtx_region); + stats->st_regsize = dblp->reginfo.rp->size; + + stats->st_cur_file = lp->lsn.file; + stats->st_cur_offset = lp->lsn.offset; + stats->st_disk_file = lp->s_lsn.file; + stats->st_disk_offset = lp->s_lsn.offset; + + LOG_SYSTEM_UNLOCK(env); + + *statp = stats; + return (0); +} + +/* + * __log_stat_print_pp -- + * DB_ENV->log_stat_print pre/post processing. + * + * PUBLIC: int __log_stat_print_pp __P((DB_ENV *, u_int32_t)); + */ +int +__log_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->log_stat_print", DB_INIT_LOG); + + if ((ret = __db_fchk(env, "DB_ENV->log_stat_print", + flags, DB_STAT_ALL | DB_STAT_ALLOC | DB_STAT_CLEAR)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__log_stat_print(env, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __log_stat_print -- + * DB_ENV->log_stat_print method. + * + * PUBLIC: int __log_stat_print __P((ENV *, u_int32_t)); + */ +int +__log_stat_print(env, flags) + ENV *env; + u_int32_t flags; +{ + u_int32_t orig_flags; + int ret; + + orig_flags = flags; + LF_CLR(DB_STAT_CLEAR | DB_STAT_SUBSYSTEM); + if (flags == 0 || LF_ISSET(DB_STAT_ALL)) { + ret = __log_print_stats(env, orig_flags); + if (flags == 0 || ret != 0) + return (ret); + } + + if (LF_ISSET(DB_STAT_ALL) && + (ret = __log_print_all(env, orig_flags)) != 0) + return (ret); + + return (0); +} + +/* + * __log_print_stats -- + * Display default log region statistics. + */ +static int +__log_print_stats(env, flags) + ENV *env; + u_int32_t flags; +{ + DB_LOG_STAT *sp; + int ret; + + if ((ret = __log_stat(env, &sp, flags)) != 0) + return (ret); + + if (LF_ISSET(DB_STAT_ALL)) + __db_msg(env, "Default logging region information:"); + STAT_HEX("Log magic number", sp->st_magic); + STAT_ULONG("Log version number", sp->st_version); + __db_dlbytes(env, "Log record cache size", + (u_long)0, (u_long)0, (u_long)sp->st_lg_bsize); + __db_msg(env, "%#o\tLog file mode", sp->st_mode); + if (sp->st_lg_size % MEGABYTE == 0) + __db_msg(env, "%luMb\tCurrent log file size", + (u_long)sp->st_lg_size / MEGABYTE); + else if (sp->st_lg_size % 1024 == 0) + __db_msg(env, "%luKb\tCurrent log file size", + (u_long)sp->st_lg_size / 1024); + else + __db_msg(env, "%lu\tCurrent log file size", + (u_long)sp->st_lg_size); + __db_dl(env, "Initial fileid allocation", (u_long)sp->st_fileid_init); + __db_dl(env, "Current fileids in use", (u_long)sp->st_nfileid); + __db_dl(env, "Maximum fileids used", (u_long)sp->st_maxnfileid); + __db_dl(env, "Records entered into the log", (u_long)sp->st_record); + __db_dlbytes(env, "Log bytes written", + (u_long)0, (u_long)sp->st_w_mbytes, (u_long)sp->st_w_bytes); + __db_dlbytes(env, "Log bytes written since last checkpoint", + (u_long)0, (u_long)sp->st_wc_mbytes, (u_long)sp->st_wc_bytes); + __db_dl(env, "Total log file I/O writes", (u_long)sp->st_wcount); + __db_dl(env, "Total log file I/O writes due to overflow", + (u_long)sp->st_wcount_fill); + __db_dl(env, "Total log file flushes", (u_long)sp->st_scount); + __db_dl(env, "Total log file I/O reads", (u_long)sp->st_rcount); + STAT_ULONG("Current log file number", sp->st_cur_file); + STAT_ULONG("Current log file offset", sp->st_cur_offset); + STAT_ULONG("On-disk log file number", sp->st_disk_file); + STAT_ULONG("On-disk log file offset", sp->st_disk_offset); + + __db_dl(env, + "Maximum commits in a log flush", (u_long)sp->st_maxcommitperflush); + __db_dl(env, + "Minimum commits in a log flush", (u_long)sp->st_mincommitperflush); + + __db_dlbytes(env, "Region size", + (u_long)0, (u_long)0, (u_long)sp->st_regsize); + __db_dl_pct(env, + "The number of region locks that required waiting", + (u_long)sp->st_region_wait, DB_PCT(sp->st_region_wait, + sp->st_region_wait + sp->st_region_nowait), NULL); + + __os_ufree(env, sp); + + return (0); +} + +/* + * __log_print_all -- + * Display debugging log region statistics. + */ +static int +__log_print_all(env, flags) + ENV *env; + u_int32_t flags; +{ + static const FN fn[] = { + { DBLOG_RECOVER, "DBLOG_RECOVER" }, + { DBLOG_FORCE_OPEN, "DBLOG_FORCE_OPEN" }, + { DBLOG_AUTOREMOVE, "DBLOG_AUTOREMOVE"}, + { DBLOG_DIRECT, "DBLOG_DIRECT"}, + { DBLOG_DSYNC, "DBLOG_DSYNC"}, + { DBLOG_FORCE_OPEN, "DBLOG_FORCE_OPEN"}, + { DBLOG_INMEMORY, "DBLOG_INMEMORY"}, + { DBLOG_OPENFILES, "DBLOG_OPENFILES"}, + { DBLOG_RECOVER, "DBLOG_RECOVER"}, + { DBLOG_ZERO, "DBLOG_ZERO"}, + { 0, NULL } + }; + DB_LOG *dblp; + LOG *lp; + + dblp = env->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + + LOG_SYSTEM_LOCK(env); + + __db_print_reginfo(env, &dblp->reginfo, "Log", flags); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "DB_LOG handle information:"); + __mutex_print_debug_single( + env, "DB_LOG handle mutex", dblp->mtx_dbreg, flags); + STAT_ULONG("Log file name", dblp->lfname); + __db_print_fh(env, "Log file handle", dblp->lfhp, flags); + __db_prflags(env, NULL, dblp->flags, fn, NULL, "\tFlags"); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "LOG handle information:"); + __mutex_print_debug_single( + env, "LOG region mutex", lp->mtx_region, flags); + __mutex_print_debug_single( + env, "File name list mutex", lp->mtx_filelist, flags); + + STAT_HEX("persist.magic", lp->persist.magic); + STAT_ULONG("persist.version", lp->persist.version); + __db_dlbytes(env, + "persist.log_size", (u_long)0, (u_long)0, lp->persist.log_size); + STAT_FMT("log file permissions mode", "%#lo", u_long, lp->filemode); + STAT_LSN("current file offset LSN", &lp->lsn); + STAT_LSN("first buffer byte LSN", &lp->lsn); + STAT_ULONG("current buffer offset", lp->b_off); + STAT_ULONG("current file write offset", lp->w_off); + STAT_ULONG("length of last record", lp->len); + STAT_LONG("log flush in progress", lp->in_flush); + __mutex_print_debug_single( + env, "Log flush mutex", lp->mtx_flush, flags); + + STAT_LSN("last sync LSN", &lp->s_lsn); + + /* + * Don't display the replication fields here, they're displayed as part + * of the replication statistics. + */ + + STAT_LSN("cached checkpoint LSN", &lp->cached_ckp_lsn); + + __db_dlbytes(env, + "log buffer size", (u_long)0, (u_long)0, lp->buffer_size); + __db_dlbytes(env, + "log file size", (u_long)0, (u_long)0, lp->log_size); + __db_dlbytes(env, + "next log file size", (u_long)0, (u_long)0, lp->log_nsize); + + STAT_ULONG("transactions waiting to commit", lp->ncommit); + STAT_LSN("LSN of first commit", &lp->t_lsn); + + LOG_SYSTEM_UNLOCK(env); + + return (0); +} + +#else /* !HAVE_STATISTICS */ + +int +__log_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_LOG_STAT **statp; + u_int32_t flags; +{ + COMPQUIET(statp, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} + +int +__log_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} +#endif diff --git a/src/log/log_verify.c b/src/log/log_verify.c new file mode 100644 index 00000000..41b78219 --- /dev/null +++ b/src/log/log_verify.c @@ -0,0 +1,437 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/fop.h" +#include "dbinc/hash.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +#include "dbinc/log_verify.h" + +#define FIRST_OFFSET(env) \ + (sizeof(LOGP) + (CRYPTO_ON(env) ? HDR_CRYPTO_SZ : HDR_NORMAL_SZ)) + +static int __env_init_verify __P((ENV *, u_int32_t, DB_DISTAB *)); + +/* + * PUBLIC: int __log_verify_pp __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *)); + */ +int +__log_verify_pp(dbenv, lvconfig) + DB_ENV *dbenv; + const DB_LOG_VERIFY_CONFIG *lvconfig; +{ + int lsnrg, ret, timerg; + DB_THREAD_INFO *ip; + const char *phome; + + lsnrg = ret = timerg = 0; + phome = NULL; + + if (!IS_ZERO_LSN(lvconfig->start_lsn) || + !IS_ZERO_LSN(lvconfig->end_lsn)) + lsnrg = 1; + if (lvconfig->start_time != 0 || lvconfig->end_time != 0) + timerg = 1; + + if ((!IS_ZERO_LSN(lvconfig->start_lsn) && lvconfig->start_time != 0) || + (!IS_ZERO_LSN(lvconfig->end_lsn) && lvconfig->end_time != 0) || + (lsnrg && timerg)) { + __db_errx(dbenv->env, DB_STR("2501", + "Set either an lsn range or a time range to verify logs " + "in the range, don't mix time and lsn.")); + ret = EINVAL; + goto err; + } + phome = dbenv->env->db_home; + if (phome != NULL && lvconfig->temp_envhome != NULL && + strcmp(phome, lvconfig->temp_envhome) == 0) { + __db_errx(dbenv->env, + "Environment home for log verification internal use " + "overlaps with that of the environment to verify."); + ret = EINVAL; + goto err; + } + + ENV_ENTER(dbenv->env, ip); + ret = __log_verify(dbenv, lvconfig, ip); + ENV_LEAVE(dbenv->env, ip); +err: return (ret); +} + +/* + * PUBLIC: int __log_verify __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *, + * PUBLIC: DB_THREAD_INFO *)); + */ +int +__log_verify(dbenv, lvconfig, ip) + DB_ENV *dbenv; + const DB_LOG_VERIFY_CONFIG *lvconfig; + DB_THREAD_INFO *ip; +{ + + u_int32_t logcflag, max_fileno; + DB_LOGC *logc; + ENV *env; + DBT data; + DB_DISTAB dtab; + DB_LSN key, start, start2, stop, stop2, verslsn; + u_int32_t newversion, version; + int cmp, fwdscroll, goprev, ret, tret; + time_t starttime, endtime; + const char *okmsg; + DB_LOG_VRFY_INFO *logvrfy_hdl; + + okmsg = NULL; + fwdscroll = 1; + max_fileno = (u_int32_t)-1; + goprev = 0; + env = dbenv->env; + logc = NULL; + memset(&dtab, 0, sizeof(dtab)); + memset(&data, 0, sizeof(data)); + version = newversion = 0; + ZERO_LSN(verslsn); + memset(&start, 0, sizeof(DB_LSN)); + memset(&start2, 0, sizeof(DB_LSN)); + memset(&stop, 0, sizeof(DB_LSN)); + memset(&stop2, 0, sizeof(DB_LSN)); + memset(&key, 0, sizeof(DB_LSN)); + memset(&verslsn, 0, sizeof(DB_LSN)); + + start = lvconfig->start_lsn; + stop = lvconfig->end_lsn; + starttime = lvconfig->start_time; + endtime = lvconfig->end_time; + + if ((ret = __create_log_vrfy_info(lvconfig, &logvrfy_hdl, ip)) != 0) + goto err; + logvrfy_hdl->lv_config = lvconfig; + if (lvconfig->continue_after_fail) + F_SET(logvrfy_hdl, DB_LOG_VERIFY_CAF); + if (lvconfig->verbose) + F_SET(logvrfy_hdl, DB_LOG_VERIFY_VERBOSE); + + /* Allocate a log cursor. */ + if ((ret = __log_cursor(dbenv->env, &logc)) != 0) { + __db_err(dbenv->env, ret, "DB_ENV->log_cursor"); + goto err; + } + /* Ignore failed chksum and go on with next one. */ + F_SET(logc->env->lg_handle, DBLOG_VERIFYING); + + /* Only scan the range that we want to verify. */ + if (fwdscroll) { + if (IS_ZERO_LSN(stop)) { + logcflag = DB_LAST; + key.file = key.offset = 0; + } else { + key = stop; + logcflag = DB_SET; + } + logvrfy_hdl->flags |= DB_LOG_VERIFY_FORWARD; + goto startscroll; + } + +vrfyscroll: + + /* + * Initialize version to 0 so that we get the + * correct version right away. + */ + version = 0; + ZERO_LSN(verslsn); + + /* + * In the log verification config struct, start_lsn and end_lsn have + * higher priority than start_time and end_time, and you can specify + * either lsn or time to start/stop verification. + */ + if (starttime != 0 || endtime != 0) { + if ((ret = __find_lsnrg_by_timerg(logvrfy_hdl, + starttime, endtime, &start2, &stop2)) != 0) + goto err; + ((DB_LOG_VERIFY_CONFIG *)lvconfig)->start_lsn = start = start2; + ((DB_LOG_VERIFY_CONFIG *)lvconfig)->end_lsn = stop = stop2; + } + + if (IS_ZERO_LSN(start)) { + logcflag = DB_FIRST; + key.file = key.offset = 0; + } else { + key = start; + logcflag = DB_SET; + F_SET(logvrfy_hdl, DB_LOG_VERIFY_PARTIAL); + } + goprev = 0; + + /* + * So far we only support verifying a specific db file. The config's + * dbfile must be prefixed with the data directory if it's not in + * environment home directory. + */ + if (lvconfig->dbfile != NULL) { + F_SET(logvrfy_hdl, + DB_LOG_VERIFY_DBFILE | DB_LOG_VERIFY_PARTIAL); + if ((ret = __set_logvrfy_dbfuid(logvrfy_hdl)) != 0) + goto err; + } + +startscroll: + + memset(&data, 0, sizeof(data)); + + for (;;) { + + /* + * We may have reached beyond the range we're verifying. + */ + if (!fwdscroll && !IS_ZERO_LSN(stop)) { + cmp = LOG_COMPARE(&key, &stop); + if (cmp > 0) + break; + } + if (fwdscroll && !IS_ZERO_LSN(start)) { + cmp = LOG_COMPARE(&key, &start); + if (cmp < 0) + break; + } + + ret = __logc_get(logc, &key, &data, logcflag); + if (ret != 0) { + if (ret == DB_NOTFOUND) { + /* We may not start from the first log file. */ + if (logcflag == DB_PREV && key.file > 1) + F_SET(logvrfy_hdl, + DB_LOG_VERIFY_PARTIAL); + break; + } + __db_err(dbenv->env, ret, "DB_LOGC->get"); + /* + * When go beyond valid lsn range, we may get other + * error values than DB_NOTFOUND. + */ + goto out; + } + + if (logcflag == DB_SET) { + if (goprev) + logcflag = DB_PREV; + else + logcflag = DB_NEXT; + } else if (logcflag == DB_LAST) { + logcflag = DB_PREV; + max_fileno = key.file; + } else if (logcflag == DB_FIRST) + logcflag = DB_NEXT; + + if (key.file != verslsn.file) { + /* + * If our log file changed, we need to see if the + * version of the log file changed as well. + * If it changed, reset the print table. + */ + if ((ret = __logc_version(logc, &newversion)) != 0) { + __db_err(dbenv->env, ret, "DB_LOGC->version"); + goto err; + } + if (version != newversion) { + version = newversion; + if (!IS_LOG_VRFY_SUPPORTED(version)) { + __db_msg(dbenv->env, DB_STR_A("2502", + "[%lu][%lu] Unsupported version of log file, " + "log file number: %u, log file version: %u, " + "supported log version: %u.", + "%lu %lu %u %u %u"), + (u_long)key.file, + (u_long)key.offset, + key.file, version, DB_LOGVERSION); + if (logcflag == DB_NEXT) { + key.file += 1; + if (key.file > max_fileno) + break; + /* + * Txns don't span log versions, no need to + * set DB_LOG_VERIFY_PARTIAL here. + */ + } else { + goprev = 1; + key.file -= 1; + if (key.file == 0) + break; + } + key.offset = FIRST_OFFSET(env); + logcflag = DB_SET; + continue; + } + if ((ret = __env_init_verify(env, version, + &dtab)) != 0) { + __db_err(dbenv->env, ret, + DB_STR("2503", + "callback: initialization")); + goto err; + } + } + verslsn = key; + } + + ret = __db_dispatch(dbenv->env, &dtab, &data, &key, + DB_TXN_LOG_VERIFY, logvrfy_hdl); + + if (!fwdscroll && ret != 0) { + if (!F_ISSET(logvrfy_hdl, DB_LOG_VERIFY_CAF)) { + __db_err(dbenv->env, ret, + "[%lu][%lu] __db_dispatch", + (u_long)key.file, (u_long)key.offset); + goto err; + } else + F_SET(logvrfy_hdl, DB_LOG_VERIFY_ERR); + } + } + + if (fwdscroll) { + fwdscroll = 0; + F_CLR(logvrfy_hdl, DB_LOG_VERIFY_FORWARD); + goto vrfyscroll; + } +out: + /* + * When we arrive here ret can be 0 or errors returned by DB_LOGC->get, + * all which we have already handled. So we clear ret. + */ + ret = 0; + + /* If continuing after fail, we can complete the entire log. */ + if (F_ISSET(logvrfy_hdl, DB_LOG_VERIFY_ERR) || + F_ISSET(logvrfy_hdl, DB_LOG_VERIFY_INTERR)) + ret = DB_LOG_VERIFY_BAD; + /* + * This function can be called when the environment is alive, so + * there can be active transactions. + */ + __db_log_verify_global_report(logvrfy_hdl); + if (ret == DB_LOG_VERIFY_BAD) + okmsg = DB_STR_P("FAILED"); + else { + DB_ASSERT(dbenv->env, ret == 0); + okmsg = DB_STR_P("SUCCEEDED"); + } + + __db_msg(dbenv->env, DB_STR_A("2504", + "Log verification ended and %s.", "%s"), okmsg); + +err: + if (logc != NULL) + (void)__logc_close(logc); + if ((tret = __destroy_log_vrfy_info(logvrfy_hdl)) != 0 && ret == 0) + ret = tret; + if (dtab.int_dispatch) + __os_free(dbenv->env, dtab.int_dispatch); + if (dtab.ext_dispatch) + __os_free(dbenv->env, dtab.ext_dispatch); + + return (ret); +} + +/* + * __env_init_verify-- + */ +static int +__env_init_verify(env, version, dtabp) + ENV *env; + u_int32_t version; + DB_DISTAB *dtabp; +{ + int ret; + + /* + * We need to prime the print table with the current print + * functions. Then we overwrite only specific entries based on + * each previous version we support. + */ + if ((ret = __bam_init_verify(env, dtabp)) != 0) + goto err; + if ((ret = __crdel_init_verify(env, dtabp)) != 0) + goto err; + if ((ret = __db_init_verify(env, dtabp)) != 0) + goto err; + if ((ret = __dbreg_init_verify(env, dtabp)) != 0) + goto err; + if ((ret = __fop_init_verify(env, dtabp)) != 0) + goto err; +#ifdef HAVE_HASH + if ((ret = __ham_init_verify(env, dtabp)) != 0) + goto err; +#endif +#ifdef HAVE_HEAP + if ((ret = __heap_init_verify(env, dtabp)) != 0) + goto err; +#endif +#ifdef HAVE_QUEUE + if ((ret = __qam_init_verify(env, dtabp)) != 0) + goto err; +#endif + if ((ret = __txn_init_verify(env, dtabp)) != 0) + goto err; + + switch (version) { + case DB_LOGVERSION: + ret = 0; + break; + + default: + __db_errx(env, DB_STR_A("2505", "Not supported version %lu", + "%lu"), (u_long)version); + ret = EINVAL; + break; + } +err: return (ret); +} + +/* + * __log_verify_wrap -- + * Wrapper function for APIs of other languages, like java/c# and + * script languages. It's much easier to implement the swig layer + * when we split up the C structure. + * + * PUBLIC: int __log_verify_wrap __P((ENV *, const char *, u_int32_t, + * PUBLIC: const char *, const char *, time_t, time_t, u_int32_t, + * PUBLIC: u_int32_t, u_int32_t, u_int32_t, int, int)); + */ +int +__log_verify_wrap(env, envhome, cachesize, dbfile, dbname, + stime, etime, stfile, stoffset, efile, eoffset, caf, verbose) + ENV *env; + const char *envhome, *dbfile, *dbname; + time_t stime, etime; + u_int32_t cachesize, stfile, stoffset, efile, eoffset; + int caf, verbose; +{ + DB_LOG_VERIFY_CONFIG cfg; + + memset(&cfg, 0, sizeof(cfg)); + cfg.cachesize = cachesize; + cfg.temp_envhome = envhome; + cfg.dbfile = dbfile; + cfg.dbname = dbname; + cfg.start_time = stime; + cfg.end_time = etime; + cfg.start_lsn.file = stfile; + cfg.start_lsn.offset = stoffset; + cfg.end_lsn.file = efile; + cfg.end_lsn.offset = eoffset; + cfg.continue_after_fail = caf; + cfg.verbose = verbose; + + return __log_verify_pp(env->dbenv, &cfg); +} diff --git a/src/log/log_verify_auto.c b/src/log/log_verify_auto.c new file mode 100644 index 00000000..08bc5d64 --- /dev/null +++ b/src/log/log_verify_auto.c @@ -0,0 +1,318 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/btree.h" +#include "dbinc/txn.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/qam.h" +#include "dbinc/fop.h" + +/* + * PUBLIC: int __crdel_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__crdel_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_metasub_verify, DB___crdel_metasub)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_inmem_create_verify, DB___crdel_inmem_create)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_inmem_rename_verify, DB___crdel_inmem_rename)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __crdel_inmem_remove_verify, DB___crdel_inmem_remove)) != 0) + return (ret); + return (0); +} + +/* + * PUBLIC: int __db_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__db_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __db_addrem_verify, DB___db_addrem)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_big_verify, DB___db_big)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_ovref_verify, DB___db_ovref)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_debug_verify, DB___db_debug)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_noop_verify, DB___db_noop)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_alloc_verify, DB___db_pg_alloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_free_verify, DB___db_pg_free)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_cksum_verify, DB___db_cksum)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_freedata_verify, DB___db_pg_freedata)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_init_verify, DB___db_pg_init)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_trunc_verify, DB___db_pg_trunc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_realloc_verify, DB___db_realloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_relink_verify, DB___db_relink)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_merge_verify, DB___db_merge)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pgno_verify, DB___db_pgno)) != 0) + return (ret); + return (0); +} + +/* + * PUBLIC: int __dbreg_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__dbreg_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __dbreg_register_verify, DB___dbreg_register)) != 0) + return (ret); + return (0); +} + +/* + * PUBLIC: int __bam_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__bam_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_split_verify, DB___bam_split)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_rsplit_verify, DB___bam_rsplit)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_adj_verify, DB___bam_adj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_cadjust_verify, DB___bam_cadjust)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_cdel_verify, DB___bam_cdel)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_repl_verify, DB___bam_repl)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_root_verify, DB___bam_root)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_curadj_verify, DB___bam_curadj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_rcuradj_verify, DB___bam_rcuradj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_irep_verify, DB___bam_irep)) != 0) + return (ret); + return (0); +} + +/* + * PUBLIC: int __fop_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__fop_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_create_verify, DB___fop_create)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_remove_verify, DB___fop_remove)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_write_verify, DB___fop_write)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_rename_verify, DB___fop_rename)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_rename_verify, DB___fop_rename_noundo)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_file_remove_verify, DB___fop_file_remove)) != 0) + return (ret); + return (0); +} + +#ifdef HAVE_HASH +/* + * PUBLIC: int __ham_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__ham_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_insdel_verify, DB___ham_insdel)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_newpage_verify, DB___ham_newpage)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_splitdata_verify, DB___ham_splitdata)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_replace_verify, DB___ham_replace)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_copypage_verify, DB___ham_copypage)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_metagroup_verify, DB___ham_metagroup)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_groupalloc_verify, DB___ham_groupalloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_changeslot_verify, DB___ham_changeslot)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_contract_verify, DB___ham_contract)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_curadj_verify, DB___ham_curadj)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_chgpg_verify, DB___ham_chgpg)) != 0) + return (ret); + return (0); +} + +#endif /* HAVE_HASH */ +#ifdef HAVE_HEAP +/* + * PUBLIC: int __heap_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__heap_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_addrem_verify, DB___heap_addrem)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_pg_alloc_verify, DB___heap_pg_alloc)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_trunc_meta_verify, DB___heap_trunc_meta)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __heap_trunc_page_verify, DB___heap_trunc_page)) != 0) + return (ret); + return (0); +} +#endif /* HAVE_HEAP */ +#ifdef HAVE_QUEUE +/* + * PUBLIC: int __qam_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__qam_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_incfirst_verify, DB___qam_incfirst)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_mvptr_verify, DB___qam_mvptr)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_del_verify, DB___qam_del)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_add_verify, DB___qam_add)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_delext_verify, DB___qam_delext)) != 0) + return (ret); + return (0); +} + +#endif /* HAVE_QUEUE */ +/* + * PUBLIC: int __txn_init_verify __P((ENV *, DB_DISTAB *)); + */ +int +__txn_init_verify(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_regop_verify, DB___txn_regop)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_ckp_verify, DB___txn_ckp)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_child_verify, DB___txn_child)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_prepare_verify, DB___txn_prepare)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_recycle_verify, DB___txn_recycle)) != 0) + return (ret); + return (0); +} diff --git a/src/log/log_verify_int.c b/src/log/log_verify_int.c new file mode 100644 index 00000000..2df21418 --- /dev/null +++ b/src/log/log_verify_int.c @@ -0,0 +1,4343 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * This file contains verification functions for all types of log records, + * one for each type. We can't make this automated like the log_type_print/read + * functions because there are no consistent handling. Each type of log records + * have unique ways to verify, and unique information to extract. + * + * In each verification function, we first call the log_type_read function + * to get the log_type_args structure, then extract information according to + * the type of log. The log types can be made into different categories, each + * of which have similar types of information. + * + * For example, txn_regop and txn_ckp types both have timestamps, and we + * want to maintain (timestamp,lsn) mapping, so we will have a on_timestamp + * function, and call it in txn_regop_verify and txn_ckp_verify functions, + * and in the two functions we may call other on_*** functions to extract and + * verify other information. + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/fop.h" +#include "dbinc/hash.h" +#include "dbinc/heap.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +#include "dbinc/log_verify.h" + +static int __log_vrfy_proc __P((DB_LOG_VRFY_INFO *, DB_LSN, DB_LSN, + u_int32_t, DB_TXN *, int32_t, int *)); +static int __lv_ckp_vrfy_handler __P((DB_LOG_VRFY_INFO *, + VRFY_TXN_INFO *, void *)); +static const char *__lv_dbreg_str __P((u_int32_t)); +static int __lv_dbregid_to_dbtype __P((DB_LOG_VRFY_INFO *, int32_t, DBTYPE *)); +static int __lv_dbt_str __P((const DBT *, char **)); +static const char *__lv_dbtype_str __P((DBTYPE)); +static u_int32_t __lv_first_offset __P((ENV *)); +static int __lv_new_logfile_vrfy __P((DB_LOG_VRFY_INFO *, const DB_LSN *)); +static int __lv_log_fwdscr_oncmt __P((DB_LOG_VRFY_INFO *, DB_LSN, + u_int32_t, u_int32_t, int32_t)); +static int __lv_log_fwdscr_onrec __P((DB_LOG_VRFY_INFO *, + u_int32_t, u_int32_t, DB_LSN, DB_LSN)); +static int __lv_log_mismatch __P((DB_LOG_VRFY_INFO *, DB_LSN, DBTYPE, DBTYPE)); +static int __lv_on_bam_log __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t)); +static int __lv_on_ham_log __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t)); +static int __lv_on_heap_log __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t)); +static int __lv_on_new_txn __P((DB_LOG_VRFY_INFO *, const DB_LSN *, + const DB_TXN *, u_int32_t, int32_t, const DBT *)); +static int __lv_on_nontxn_update __P((DB_LOG_VRFY_INFO *, const DB_LSN *, + u_int32_t, u_int32_t, int32_t)); +static int __lv_on_page_update __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t, + db_pgno_t, DB_TXN *, int *)); +static int __lv_on_qam_log __P((DB_LOG_VRFY_INFO *, DB_LSN, int32_t)); +static int __lv_on_timestamp __P((DB_LOG_VRFY_INFO *, const DB_LSN *, + int32_t, u_int32_t)); +static int __lv_on_txn_aborted __P((DB_LOG_VRFY_INFO *)); +static int __lv_on_txn_logrec __P((DB_LOG_VRFY_INFO *, const DB_LSN *, + const DB_LSN *, const DB_TXN *, u_int32_t, int32_t)); +static int __lv_vrfy_for_dbfile __P((DB_LOG_VRFY_INFO *, int32_t, int *)); + +/* General error handlers, called when a check fails. */ +#define ON_ERROR(lvh, errv) do { \ + (lvh)->flags |= (errv); \ + if (F_ISSET((lvh), DB_LOG_VERIFY_CAF)) \ + ret = 0;/* Ignore the error and continue. */ \ + goto err; \ +} while (0) + +/* Used by logs of unsupported types. */ +#define ON_NOT_SUPPORTED(env, lvh, lsn, ltype) do { \ + __db_errx((env), DB_STR_A("2536", \ + "[%lu][%lu] Not supported type of log record %u.", \ + "%lu %lu %u"), (u_long)((lsn).file), (u_long)((lsn).offset),\ + (ltype)); \ + (lvh)->unknown_logrec_cnt++; \ + goto err; \ +} while (0) + +#define SKIP_FORWARD_CHK(type) ((type) != DB___txn_regop && \ + (type) != DB___txn_ckp && (type) != DB___fop_rename && \ + (type) != DB___txn_child) + +#define NOTCOMMIT(type) ((type) != DB___txn_regop && \ + (type) != DB___txn_child) + +#define LOG_VRFY_PROC(lvh, lsn, argp, fileid) do { \ + int __lv_log_vrfy_proc_step = 0; \ + if ((ret = __log_vrfy_proc((lvh), (lsn), (argp)->prev_lsn, \ + (argp)->type, (argp)->txnp, (fileid), \ + &__lv_log_vrfy_proc_step)) != 0) \ + goto err; \ + if (__lv_log_vrfy_proc_step == 1) \ + goto out; \ + else if (__lv_log_vrfy_proc_step == -1) \ + goto err; \ + else \ + DB_ASSERT(lvh->dbenv->env, \ + __lv_log_vrfy_proc_step == 0); \ +} while (0) + +/* Log record handlers used by log types involving page updates. */ +#define ON_PAGE_UPDATE(lvh, lsn, argp, pgno) do { \ + int __lv_onpgupdate_res; \ + if ((ret = __lv_on_page_update((lvh), (lsn), (argp)->fileid, \ + (pgno), (argp)->txnp, &__lv_onpgupdate_res)) != 0) \ + goto err; \ + if (__lv_onpgupdate_res == 1) \ + goto out; \ + else if (__lv_onpgupdate_res == -1) \ + goto err; \ + else \ + DB_ASSERT(lvh->dbenv->env, __lv_onpgupdate_res == 0); \ +} while (0) + +static int +__lv_on_page_update(lvh, lsn, fileid, pgno, txnp, step) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn; + int32_t fileid; + db_pgno_t pgno; + DB_TXN *txnp; + int *step; +{ + u_int32_t otxn, txnid; + int res, ret; + + txnid = txnp->txnid; + res = ret = 0; + + if ((ret = __add_page_to_txn(lvh, fileid, pgno, + txnid, &otxn, &res)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + if (res != -1) {/* No access violation, we are done. */ + *step = 0; + goto out; + } + /* + * It's OK for a child txn to update its parent's page, but not OK + * for a parent txn to update its active child's pages. We can't + * detect the child's abort, so we may false alarm that a parent txn + * is updating its child's pages. + */ + if ((ret = __is_ancestor_txn(lvh, otxn, txnid, lsn, &res)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + if (res) {/* The txnid is updating its parent otxn's pages. */ + *step = 0; + goto out; + } + if ((ret = __is_ancestor_txn(lvh, txnid, otxn, lsn, &res)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + if (res) {/* The txnid is updating its active child otxn's pages. */ + __db_errx(lvh->dbenv->env, DB_STR_A("2537", + "[%lu][%lu] [WARNING] Parent txn %lx is updating its " + "active child txn %lx's pages, or %lx aborted.", + "%lu %lu %lx %lx %lx"), (u_long)lsn.file, + (u_long)lsn.offset, (u_long)txnid, + (u_long)otxn, (u_long)otxn); + *step = 0; + goto out; + } + /* + * It's likely that the two txns are parent-child and the child + * aborted, but from the log we can't figure out this fact. + */ + __db_errx(lvh->dbenv->env, DB_STR_A("2538", + "[%lu][%lu] [WARNING] Txn %lx is updating txn %lx's pages.", + "%lu %lu %lx %lx"), (u_long)lsn.file, (u_long)lsn.offset, + (u_long)txnid, (u_long)otxn); + *step = 0; +out: +err: + return (ret); +} + +/* + * This macro is put in all types of verify functions where a db file is + * updated, but no page number/lock involved. + */ +#define ON_PAGE_UPDATE4 + +/* + * General log record handler used by all log verify functions. + */ +static int +__log_vrfy_proc(lvh, lsn, prev_lsn, type, txnp, fileid, step) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn, prev_lsn; + u_int32_t type; /* Log record type. */ + DB_TXN *txnp; + int32_t fileid; + int *step; +{ + int dovrfy, ret; + + dovrfy = 1; + ret = 0; + /* + * step is used to tell if go on with the rest of the caller, or + * goto err/out. + * 0: go on after this function; 1: goto out; -1: goto err. + */ + *step = 0; + + if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) { + /* Commits are not abort/beginnings. */ + if (NOTCOMMIT(type) && ((ret = __lv_log_fwdscr_onrec( + lvh, txnp->txnid, type, prev_lsn, lsn)) != 0)) + goto err; + if (SKIP_FORWARD_CHK(type)) + goto out; + } else {/* Verifying */ + if (F_ISSET(lvh, DB_LOG_VERIFY_VERBOSE)) + __db_errx(lvh->dbenv->env, DB_STR_A("2539", + "[%lu][%lu] Verifying log record of type %s", + "%lu %lu %s"), (u_long)lsn.file, + (u_long)lsn.offset, LOGTYPE_NAME(lvh, type)); + /* + * If verifying a log range and we've passed the initial part + * which may have partial txns, remove the PARTIAL bit. + */ + if (F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL) && + LOG_COMPARE(&lsn, &(lvh->valid_lsn)) >= 0) { + lvh->valid_lsn.offset = lvh->valid_lsn.file = 0; + F_CLR(lvh, DB_LOG_VERIFY_PARTIAL); + } + + if ((ret = __lv_new_logfile_vrfy(lvh, &lsn)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + /* If only verify a db file, ignore logs about other dbs. */ + if (F_ISSET(lvh, DB_LOG_VERIFY_DBFILE) && fileid != + INVAL_DBREGID && (ret = __lv_vrfy_for_dbfile(lvh, + fileid, &dovrfy)) != 0) + goto err; + if (!dovrfy) + goto out; + if (lvh->aborted_txnid != 0 && + ((ret = __lv_on_txn_aborted(lvh)) != 0)) + goto err; + if ((ret = __get_aborttxn(lvh, lsn)) != 0) + goto err; + if (txnp->txnid >= TXN_MINIMUM) { + if ((ret = __lv_on_txn_logrec(lvh, &lsn, &(prev_lsn), + txnp, type, fileid)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } else {/* Non-txnal updates. */ + if ((ret = __lv_on_nontxn_update(lvh, &lsn, + txnp->txnid, type, fileid)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + } + if (0) { +out: + *step = 1; + } + if (0) { +err: + *step = -1; + } + return (ret); +} + +/* Log record handlers used by log types for each access method. */ +static int +__lv_on_bam_log(lvh, lsn, fileid) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn; + int32_t fileid; +{ + int ret; + DBTYPE dbtype; + if ((ret = __lv_dbregid_to_dbtype(lvh, fileid, &dbtype)) == 0 && + dbtype != DB_BTREE && dbtype != DB_RECNO && dbtype != DB_HASH) + ret = __lv_log_mismatch(lvh, lsn, dbtype, DB_BTREE); + if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + return (ret); +} + +static int +__lv_on_ham_log(lvh, lsn, fileid) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn; + int32_t fileid; +{ + int ret; + DBTYPE dbtype; + if ((ret = __lv_dbregid_to_dbtype(lvh, fileid, &dbtype)) == 0 && + dbtype != DB_HASH) + ret = __lv_log_mismatch(lvh, lsn, dbtype, DB_HASH); + if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + return (ret); +} + +static int +__lv_on_heap_log(lvh, lsn, fileid) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn; + int32_t fileid; +{ + int ret; + DBTYPE dbtype; + if ((ret = __lv_dbregid_to_dbtype(lvh, fileid, &dbtype)) == 0 && + dbtype != DB_HEAP) + ret = __lv_log_mismatch(lvh, lsn, dbtype, DB_HEAP); + if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + return (ret); +} + +static int +__lv_on_qam_log(lvh, lsn, fileid) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn; + int32_t fileid; +{ + int ret; + DBTYPE dbtype; + if ((ret = __lv_dbregid_to_dbtype(lvh, fileid, &dbtype)) == 0 && + dbtype != DB_QUEUE) + ret = __lv_log_mismatch(lvh, lsn, dbtype, DB_QUEUE); + if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + return (ret); +} + +/* Catch commits and store into lvinfo->txnrngs database. */ +static int +__lv_log_fwdscr_oncmt(lvinfo, lsn, txnid, ptxnid, timestamp) + DB_LOG_VRFY_INFO *lvinfo; + DB_LSN lsn; + u_int32_t txnid, ptxnid; + int32_t timestamp; +{ + int ret; + struct __lv_txnrange tr; + DBT key, data; + + memset(&tr, 0, sizeof(tr)); + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + tr.txnid = txnid; + tr.end = lsn; + tr.when_commit = timestamp; + tr.ptxnid = ptxnid; + key.data = &(txnid); + key.size = sizeof(txnid); + data.data = &tr; + data.size = sizeof(tr); + if ((ret = __db_put(lvinfo->txnrngs, lvinfo->ip, NULL, + &key, &data, 0)) != 0) + goto err; +err: + return (ret); +} + +/* Catch aborts and txn beginnings and store into lvinfo->txnrngs database. */ +static int +__lv_log_fwdscr_onrec(lvinfo, txnid, lrtype, prevlsn, lsn) + DB_LOG_VRFY_INFO *lvinfo; + u_int32_t txnid, lrtype; + DB_LSN prevlsn, lsn; +{ + int doput, ret, ret2, tret; + u_int32_t putflag; + struct __lv_txnrange tr, *ptr; + DBC *csr; + DBT key, key2, data, data2; + + /* Ignore non-txnal log records. */ + if (txnid < TXN_MINIMUM) + return (0); + + /* Not used for now, but may be used later. Pass lint checks. */ + COMPQUIET(lrtype ,0); + putflag = 0; + doput = ret = ret2 = 0; + csr = NULL; + memset(&tr, 0, sizeof(tr)); + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + memset(&key2, 0, sizeof(DBT)); + memset(&data2, 0, sizeof(DBT)); + key.data = &txnid; + key.size = sizeof(txnid); + tr.txnid = txnid; + tr.when_commit = 0;/* This is not a __txn_regop record. */ + + if ((ret = __db_cursor(lvinfo->txnrngs, lvinfo->ip, + NULL, &csr, 0)) != 0) + goto err; + /* + * If the txnid is first seen here or reused later, it's aborted + * after this log record; if this log record is the 1st one of a txn, + * we have the beginning of the txn; otherwise the log record is one + * of the actions taken within the txn, and we don't do anything. + */ + if ((ret = __dbc_get(csr, &key, &data, DB_SET)) != 0 && + ret != DB_NOTFOUND) + goto err; + + ptr = (struct __lv_txnrange *)data.data; + if (ret == DB_NOTFOUND || !IS_ZERO_LSN(ptr->begin)) { + tr.end = lsn; + data.data = &tr; + data.size = sizeof(tr); + doput = 1; + key2.data = &lsn; + key2.size = sizeof(lsn); + data2.data = &(tr.txnid); + data2.size = sizeof(tr.txnid); + putflag = DB_KEYFIRST; + if ((ret2 = __db_put(lvinfo->txnaborts, lvinfo->ip, NULL, + &key2, &data2, 0)) != 0) { + ret = ret2; + goto err; + } + } else if (ret == 0 && IS_ZERO_LSN(prevlsn)) {/* The beginning of txn.*/ + /* The begin field must be [0, 0]. */ + DB_ASSERT(lvinfo->dbenv->env, IS_ZERO_LSN(ptr->begin)); + ptr->begin = lsn; + putflag = DB_CURRENT; + doput = 1; + } + + if (doput && (ret = __dbc_put(csr, &key, &data, putflag)) != 0) + goto err; +err: + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + + return (ret); +} + +/* + * Return 0 from dovrfy if verifying logs for a specified db file, and fileid + * is not the one we want; Otherwise return 1 from dovrfy. If DB operations + * failed, the error is returned. + */ +static int +__lv_vrfy_for_dbfile(lvh, fileid, dovrfy) + DB_LOG_VRFY_INFO *lvh; + int32_t fileid; + int *dovrfy; +{ + u_int8_t tmpuid[DB_FILE_ID_LEN]; + VRFY_FILEREG_INFO *fregp; + u_int32_t i; + int ret, tret; + DBT tgtkey; + + ret = tret = 0; + *dovrfy = 0; + fregp = NULL; + memset(tmpuid, 0, sizeof(u_int8_t) * DB_FILE_ID_LEN); + memset(&tgtkey, 0, sizeof(tgtkey)); + tgtkey.data = lvh->target_dbid; + tgtkey.size = DB_FILE_ID_LEN; + ret = __get_filereg_info(lvh, &tgtkey, &fregp); + + /* + * If the target db file is not seen yet, we don't verify any file, + * and it does not mean anything wrong. + */ + if (ret == DB_NOTFOUND) { + ret = 0; + goto out; + } + if (ret != 0) + goto err; + + for (i = 0; i < fregp->regcnt; i++) + if (fregp->dbregids[i] == fileid) { + *dovrfy = 1; + goto out; + } +out: +err: + if (fregp != NULL && + (tret = __free_filereg_info(fregp)) != 0 && ret == 0) + ret = tret; + + return (ret); +} + +static int +__lv_log_mismatch(lvh, lsn, dbtype, exp_dbtype) + DB_LOG_VRFY_INFO *lvh; + DB_LSN lsn; + DBTYPE dbtype, exp_dbtype; +{ + int ret; + + __db_errx(lvh->dbenv->env, DB_STR_A("2540", + "[%lu][%lu] Log record type does not match related database type, " + "current database type: %s, expected database type according to " + "the log record type: %s.", "%lu %lu %s %s"), + (u_long)lsn.file, (u_long)lsn.offset, __lv_dbtype_str(dbtype), + __lv_dbtype_str(exp_dbtype)); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); +err: + return (ret); +} + +static int +__lv_dbregid_to_dbtype(lvh, id, ptype) + DB_LOG_VRFY_INFO *lvh; + int32_t id; + DBTYPE *ptype; +{ + int ret; + VRFY_FILELIFE *pflife; + + ret = 0; + pflife = NULL; + + if ((ret = __get_filelife(lvh, id, &pflife)) != 0) + goto err; + *ptype = pflife->dbtype; +err: + if (pflife != NULL) + __os_free(lvh->dbenv->env, pflife); + + return (ret); +} + +/* + * __db_log_verify_global_report -- + * Report statistics data in DB_LOG_VRFY_INFO handle. + * + * PUBLIC: void __db_log_verify_global_report __P((const DB_LOG_VRFY_INFO *)); + */ +void __db_log_verify_global_report (lvinfo) + const DB_LOG_VRFY_INFO *lvinfo; +{ + u_int32_t i, nltype; + + __db_msg(lvinfo->dbenv->env, + "Number of active transactions: %u;", lvinfo->ntxn_active); + __db_msg(lvinfo->dbenv->env, + "Number of committed transactions: %u;", lvinfo->ntxn_commit); + __db_msg(lvinfo->dbenv->env, + "Number of aborted transactions: %u;", lvinfo->ntxn_abort); + __db_msg(lvinfo->dbenv->env, + "Number of prepared transactions: %u;", lvinfo->ntxn_prep); + __db_msg(lvinfo->dbenv->env, + "Total number of checkpoint: %u;", lvinfo->nckp); + __db_msg(lvinfo->dbenv->env, + "Total number of non-transactional updates: %u;", + lvinfo->non_txnup_cnt); + __db_msg(lvinfo->dbenv->env, + "Total number of unknown log records: %u;", + lvinfo->unknown_logrec_cnt); + __db_msg(lvinfo->dbenv->env, + "Total number of app-specific log record: %u;", + lvinfo->external_logrec_cnt); + __db_msg(lvinfo->dbenv->env, + "The number of each type of log record:"); + + for (i = 0; i < 256; i++) { + nltype = lvinfo->lrtypes[i]; + if (LOGTYPE_NAME(lvinfo, i) != NULL) + __db_msg(lvinfo->dbenv->env, "\n\t%s : %u;", + LOGTYPE_NAME(lvinfo, i), nltype); + } +} + +/* + * PUBLIC: int __crdel_metasub_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__crdel_metasub_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __crdel_metasub_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __crdel_metasub_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __crdel_inmem_create_verify __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_create_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __crdel_inmem_create_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __crdel_inmem_create_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __crdel_inmem_rename_verify __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_rename_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __crdel_inmem_rename_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __crdel_inmem_rename_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __crdel_inmem_remove_verify __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__crdel_inmem_remove_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __crdel_inmem_remove_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __crdel_inmem_remove_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_addrem_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_addrem_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_addrem_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_addrem_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_big_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_big_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_big_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_big_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_ovref_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_ovref_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_ovref_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_ovref_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_relink_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_relink_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_relink_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_relink_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_debug_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_debug_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_debug_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __db_debug_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_noop_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_noop_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_noop_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_noop_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_alloc_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_alloc_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_alloc_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_alloc_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_alloc_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_alloc_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_alloc_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_alloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_free_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_free_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_free_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_free_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_free_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_free_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_free_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_free_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_cksum_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_cksum_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_cksum_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_freedata_42_verify __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__db_pg_freedata_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_freedata_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_freedata_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_freedata_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_freedata_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_freedata_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_freedata_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_init_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_init_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_init_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_init_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_sort_44_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_sort_44_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_sort_44_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_sort_44_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pg_trunc_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pg_trunc_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pg_trunc_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pg_trunc_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ +out: +err: + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_realloc_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_realloc_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_realloc_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_realloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_relink_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_relink_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_relink_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_relink_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_merge_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_merge_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_merge_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_merge_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __db_pgno_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__db_pgno_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __db_pgno_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __db_pgno_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +static const char * +__lv_dbreg_str(op) + u_int32_t op; +{ + const char *p; + + switch (op) { + case DBREG_CHKPNT: + p = "DBREG_CHKPNT"; + break; + case DBREG_RCLOSE: + p = "DBREG_RCLOSE"; + break; + case DBREG_CLOSE: + p = "DBREG_CLOSE"; + break; + case DBREG_OPEN: + p = "DBREG_OPEN"; + break; + case DBREG_PREOPEN: + p = "DBREG_PREOPEN"; + break; + case DBREG_REOPEN: + p = "DBREG_REOPEN"; + break; + default: + p = DB_STR_P("Unknown dbreg op code"); + break; + } + + return (p); +} + +static int +__lv_dbt_str(dbt, str) + const DBT *dbt; + char **str; +{ + char *p, *q; + u_int32_t buflen, bufsz, i; + int ret; + + ret = 0; + p = q = NULL; + buflen = bufsz = i = 0; + bufsz = sizeof(char) * dbt->size * 2; + + if ((ret = __os_malloc(NULL, bufsz, &p)) != 0) + goto err; + q = (char *)dbt->data; + + memset(p, 0, bufsz); + /* + * Each unprintable character takes up several bytes, so be ware of + * memory access violation. + */ + for (i = 0; i < dbt->size && buflen < bufsz; i++) { + buflen = (u_int32_t)strlen(p); + snprintf(p + buflen, bufsz - (buflen + 1), + isprint(q[i]) || q[i] == 0x0a ? "%c" : "%x", q[i]); + } + *str = p; +err: + return (ret); +} + +static const char * +__lv_dbtype_str(dbtype) + DBTYPE dbtype; +{ + char *p; + + switch (dbtype) { + case DB_BTREE: + p = "DB_BTREE"; + break; + case DB_HASH: + p = "DB_HASH"; + break; + case DB_RECNO: + p = "DB_RECNO"; + break; + case DB_QUEUE: + p = "DB_QUEUE"; + break; + default: + p = DB_STR_P("Unknown db type"); + break; + } + + return (p); +} + +/* + * PUBLIC: int __dbreg_register_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__dbreg_register_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __dbreg_register_args *argp; + DB_LOG_VRFY_INFO *lvh; + VRFY_FILEREG_INFO *fregp, freg; + VRFY_FILELIFE *pflife, flife; + int checklife, rmv_dblife, ret, ret2; + u_int32_t opcode; + char *puid; + const char *dbfname; + + dbfname = NULL; + checklife = 1; + opcode = 0; + ret = ret2 = rmv_dblife = 0; + puid = NULL; + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + fregp = NULL; + pflife = NULL; + memset(&flife, 0, sizeof(flife)); + memset(&freg, 0, sizeof(freg)); + + if ((ret = __dbreg_register_read(env, dbtp->data, &argp)) != 0) + return (ret); + + opcode = FLD_ISSET(argp->opcode, DBREG_OP_MASK); + dbfname = argp->name.size == 0 ? "(null)" : (char *)(argp->name.data); + /* + * We don't call LOG_VRFY_PROC macro here, so we have to copy the code + * snippet in __log_vrfy_proc here. + */ + if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) { + if ((ret = __lv_log_fwdscr_onrec(lvh, argp->txnp->txnid, + argp->type, argp->prev_lsn, *lsnp)) != 0) + goto err; + goto out; + } + if (lvh->aborted_txnid != 0 && (ret = __lv_on_txn_aborted(lvh)) != 0) + goto err; + + if ((ret = __get_filereg_info(lvh, &(argp->uid), &fregp)) != 0 && + ret != DB_NOTFOUND) + goto err; + + /* + * When DBREG_CLOSE, we should remove the fileuid-filename mapping + * from filereg because the file can be opened again with a different + * fileuid after closed. + */ + if (ret == 0 && IS_DBREG_CLOSE(opcode)) { + if ((ret = __db_del(lvh->fileregs, lvh->ip, NULL, + &(argp->uid), 0)) != 0) + goto err; + } + + /* + * If this db file is seen for the 1st time, store filereg and + * filelife info. Since we will do a end-to-begin scan before the + * verification, we will be able to get the record but it's regcnt + * is 0 since we didn't know any dbregid yet. + */ + if (ret == DB_NOTFOUND || fregp->regcnt == 0) { + /* Store filereg info unless it's a CLOSE. */ + freg.fileid = argp->uid; + if (!IS_DBREG_CLOSE(opcode)) { + freg.regcnt = 1; + freg.dbregids = &(argp->fileid); + } else { + freg.regcnt = 0; + freg.dbregids = NULL; + } + if (ret == DB_NOTFOUND) { + /* + * If the db file is an in-memory db file, we can arrive + * here because there is no __fop_rename log for it; + * if the __fop_rename log record is out of the log range we + * verify, we will also arrive here. + */ + if ((ret = __os_malloc(env, argp->name.size + 1, + &(freg.fname))) != 0) + goto err; + memset(freg.fname, 0, + sizeof(char) * (argp->name.size + 1)); + (void)strncpy(freg.fname, + (const char *)(argp->name.data), argp->name.size); + } else /* We already have the name. */ + if ((ret = __os_strdup(env, + fregp->fname, &(freg.fname))) != 0) + goto err; + + if (!IS_DBREG_OPEN(opcode) && + !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + /* It's likely that the DBREG_OPEN is not seen.*/ + __db_msg(env, DB_STR_A("2541", + "[%lu][%lu] Suspicious dbreg operation: %s, the " + "database file %s's register in log region does " + "not begin with an open operation.", + "%lu %lu %s %s"), (u_long)lsnp->file, + (u_long)lsnp->offset, + __lv_dbreg_str(opcode), dbfname); + } + + /* + * PREOPEN is only generated when openning an in-memory db. + * Because we need to log the fileid we're allocating, but we + * don't have all the details yet, we are preopening the + * database and will actually complete the open later. So + * PREOPEN is not a real open, and the log should be ignored + * in log_verify. + * If fileuid is in a CLOSE operation there is no need to + * record it. + */ + if ((opcode != DBREG_PREOPEN) && !IS_DBREG_CLOSE(opcode) && + (ret = __put_filereg_info(lvh, &freg)) != 0) + goto err; + + /* Store filelife info unless it's a CLOSE dbreg operation. */ + if (!IS_DBREG_CLOSE(opcode)) { + flife.lifetime = opcode; + flife.dbregid = argp->fileid; + flife.lsn = *lsnp; + flife.dbtype = argp->ftype; + flife.meta_pgno = argp->meta_pgno; + memcpy(flife.fileid, argp->uid.data, argp->uid.size); + if ((ret = __put_filelife(lvh, &flife)) != 0) + goto err; + } + /* on_txn_logrec relies on the freg info in db first. */ + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + goto out; + } + + /* + * Add dbregid if it's new, and store the file register info; or + * remove dbregid from fregp if we are closing the file. + */ + if ((ret = __add_dbregid(lvh, fregp, argp->fileid, + opcode, *lsnp, argp->ftype, argp->meta_pgno, &ret2)) != 0) + goto err; + ret = ret2; + if (ret != 0 && ret != 1 && ret != 2 && ret != -1) + goto err;/* DB operation error. */ + if (ret != 0) { + /* Newly seen dbregid does not need to check life. */ + if (ret == 1) + checklife = 0; + else if (ret == -1) + rmv_dblife = 1;/* The dbreg file id is closed. */ + else if (ret == 2) { + __db_errx(env, DB_STR_A("2542", + "[%lu][%lu] Wrong dbreg operation " + "sequence, opening %s for id %d which is already " + "open.", "%lu %lu %s %d"), + (u_long)lsnp->file, (u_long)lsnp->offset, + dbfname, argp->fileid); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + if (!rmv_dblife && (ret = __put_filereg_info(lvh, fregp)) != 0) + goto err; + } + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + if (!checklife) + goto out; + + /* + * Verify the database type does not change, and the lifetime of a + * db file follow an open/chkpnt->[chkpnt]->close order. + * A VRFY_FILELIFE record is removed from db on DBREG_CLOSE, + * and inserted into db on DBREG_OPEN. + */ + if (!IS_DBREG_OPEN(opcode) && + (ret = __get_filelife(lvh, argp->fileid, &pflife)) != 0) { + if (ret == DB_NOTFOUND) { + if (!F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + __db_errx(env, DB_STR_A("2543", + "[%lu][%lu] Wrong dbreg operation sequence," + "file %s with id %d is first seen of " + "status: %s", "%lu %lu %s %d"), + (u_long)lsnp->file, (u_long)lsnp->offset, + dbfname, argp->fileid, + __lv_dbreg_str(opcode)); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } else + ret = 0; + } + goto err; + } + + /* Can't go on verifying without pflife. */ + if (pflife == NULL) + goto out; + if (argp->ftype != pflife->dbtype) { + if ((ret = __lv_dbt_str(&(argp->uid), &puid)) != 0) + goto err; + __db_errx(env, DB_STR_A("2544", + "[%lu][%lu] The dbtype of database file %s with uid %s " + " and id %d has changed from %s to %s.", + "%lu %lu %s %s %d %s %s"), (u_long)lsnp->file, + (u_long)lsnp->offset, dbfname, puid, + pflife->dbregid, __lv_dbtype_str(pflife->dbtype), + __lv_dbtype_str(argp->ftype)); + + __os_free(env, puid); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + + if ((IS_DBREG_CLOSE(opcode) && + (pflife->lifetime != DBREG_CHKPNT) && + !IS_DBREG_OPEN(pflife->lifetime))) { + __db_errx(env, DB_STR_A("2545", + "[%lu][%lu] Wrong dbreg operation sequence for file %s " + "with id %d, current status: %s, new status: %s", + "%lu %lu %s %d %s %s"), (u_long)lsnp->file, + (u_long)lsnp->offset, dbfname, pflife->dbregid, + __lv_dbreg_str(pflife->lifetime), + __lv_dbreg_str(opcode)); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + + pflife->lifetime = opcode; + pflife->lsn = *lsnp; + if ((!rmv_dblife && (ret = __put_filelife(lvh, pflife)) != 0) || + ((rmv_dblife || IS_DBREG_CLOSE(opcode)) && + ((ret = __del_filelife(lvh, argp->fileid)) != 0))) + goto err; + +out: + /* There may be something to do here in future. */ +err: + __os_free(env, argp); + if (fregp != NULL && + (ret2 = __free_filereg_info(fregp)) != 0 && ret == 0) + ret = ret2; + if (freg.fname != NULL) + __os_free(env, freg.fname); + if (pflife != NULL) + __os_free(env, pflife); + + return (ret); +} + +/* + * PUBLIC: int __bam_split_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_split_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_split_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_split_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->left); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->right); + /* Parent page lock is always released before __bam_page returns. */ + + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_split_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_split_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_split_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_split_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_rsplit_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_rsplit_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_rsplit_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_rsplit_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_adj_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_adj_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_adj_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_adj_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_irep_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_irep_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_irep_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_irep_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_cadjust_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_cadjust_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_cadjust_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_cadjust_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_cdel_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_cdel_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_cdel_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_cdel_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_repl_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_repl_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_repl_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_repl_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_root_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_root_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_root_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_root_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __bam_curadj_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_curadj_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_curadj_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_curadj_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __bam_rcuradj_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_rcuradj_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_rcuradj_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_rcuradj_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_bam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __bam_relink_43_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_relink_43_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_relink_43_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_relink_43_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __bam_merge_44_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__bam_merge_44_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __bam_merge_44_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __bam_merge_44_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __fop_create_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_create_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_create_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_create_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __fop_create_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_create_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_create_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_create_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __fop_remove_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_remove_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_remove_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_remove_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __fop_write_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_write_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_write_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_write_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */ +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __fop_write_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_write_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_write_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_write_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __fop_rename_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_rename_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_rename_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_rename_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __fop_rename_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_rename_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_rename_args *argp; + DB_LOG_VRFY_INFO *lvh; + char *buf; + int ret; + size_t buflen; + VRFY_FILEREG_INFO freg, *fregp; + + memset(&freg, 0, sizeof(freg)); + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + buf = NULL; + + if ((ret = __fop_rename_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) { + /* + * Since we get the fname-fuid map when iterating from end to + * beginning, we only store the latest file name, that's the + * name supposed to be used currently. So if the fileid is + * already stored, and we see it again here, it means the db + * file was renamed and we already have its latest name. + * + * Store the dbfile path (dir/fname) in case there are db + * files with same name in different data directories. + */ + if (__get_filereg_info(lvh, &(argp->fileid), &fregp) == 0) { + if (fregp != NULL && + (ret = __free_filereg_info(fregp)) != 0) + goto err; + goto out; + } + freg.fileid = argp->fileid; + if ((ret = __os_malloc(env, buflen = argp->dirname.size + + argp->newname.size + 2, &buf)) != 0) + goto err; + snprintf(buf, buflen, "%s/%s", (char *)argp->dirname.data, + (char *)argp->newname.data); + freg.fname = buf; + /* Store the dbfilename<-->dbfileid map. */ + if ((ret = __put_filereg_info(lvh, &freg)) != 0) + goto err; + } +out: + +err: + if (buf != NULL) + __os_free(lvh->dbenv->env, buf); + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __fop_file_remove_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_file_remove_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __fop_file_remove_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __fop_file_remove_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +#ifdef HAVE_HASH +/* + * PUBLIC: int __ham_insdel_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_insdel_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_insdel_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_insdel_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __ham_newpage_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_newpage_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_newpage_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_newpage_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __ham_splitdata_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_splitdata_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_splitdata_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_splitdata_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __ham_replace_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_replace_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_replace_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_replace_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __ham_copypage_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_copypage_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_copypage_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_copypage_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __ham_metagroup_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_metagroup_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_metagroup_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_metagroup_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __ham_metagroup_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_metagroup_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_metagroup_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_metagroup_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __ham_groupalloc_42_verify __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__ham_groupalloc_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_groupalloc_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_groupalloc_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __ham_groupalloc_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_groupalloc_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_groupalloc_args *argp; + DB_LOG_VRFY_INFO *lvh; + VRFY_FILELIFE *pflife; + int ret; + + ret = 0; + pflife = NULL; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_groupalloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ + + /* + * The __ham_groupalloc record is only generated when creating the + * hash sub database so it will always be on the master database's + * fileid. + */ + + if ((ret = __get_filelife(lvh, argp->fileid, &pflife)) != 0) + goto err; + + if (pflife->meta_pgno != PGNO_BASE_MD) { + __db_errx(lvh->dbenv->env, DB_STR_A("2546", + "[%lu][%lu] __ham_groupalloc should apply only to the " + "master database with meta page number 0, current meta " + "page number is %d.", "%lu %lu %d"), + (u_long)lsnp->file, (u_long)lsnp->offset, + pflife->meta_pgno); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + +out: + +err: + if (pflife != NULL) + __os_free(lvh->dbenv->env, pflife); + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __ham_changeslot_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_changeslot_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_changeslot_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_changeslot_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __ham_contract_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_contract_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_contract_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_contract_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __ham_curadj_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_curadj_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_curadj_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_curadj_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __ham_chgpg_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__ham_chgpg_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __ham_chgpg_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __ham_chgpg_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE4 /* No pages are locked by txns. */ + if ((ret = __lv_on_ham_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + return (ret); +} +#endif + +#ifdef HAVE_HEAP +/* + * PUBLIC: int __heap_addrem_verify + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_addrem_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __heap_addrem_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __heap_addrem_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_heap_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; +out: + +err: + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __heap_pg_alloc_verify + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_pg_alloc_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __heap_pg_alloc_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __heap_pg_alloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_heap_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; +out: + +err: + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __heap_trunc_meta_verify + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_trunc_meta_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __heap_trunc_meta_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __heap_trunc_meta_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_heap_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* + * PUBLIC: int __heap_trunc_page_verify + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_trunc_page_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __heap_trunc_page_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __heap_trunc_page_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + ON_PAGE_UPDATE(lvh, *lsnp, argp, argp->pgno); + if ((ret = __lv_on_heap_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; +out: + +err: + __os_free(env, argp); + return (ret); +} +#endif + +#ifdef HAVE_QUEUE +/* + * PUBLIC: int __qam_incfirst_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_incfirst_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __qam_incfirst_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __qam_incfirst_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __qam_mvptr_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_mvptr_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __qam_mvptr_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __qam_mvptr_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __qam_del_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_del_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __qam_del_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __qam_del_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __qam_add_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_add_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __qam_add_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __qam_add_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, argp->fileid); + if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __qam_delext_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_delext_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __qam_delext_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = + __qam_delext_read(env, NULL, NULL, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + if ((ret = __lv_on_qam_log(lvh, *lsnp, argp->fileid)) != 0) + goto err; + +out: + +err: + + __os_free(env, argp); + + return (ret); +} +#endif + +/* + * PUBLIC: int __txn_regop_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_regop_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_regop_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __txn_regop_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __txn_regop_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_regop_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_regop_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret, ret2, started; + VRFY_TXN_INFO *ptvi, *pptvi; + VRFY_TIMESTAMP_INFO tsinfo; + + ptvi = pptvi = NULL; + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + ret = ret2 = started = 0; + + if ((ret = __txn_regop_read(env, dbtp->data, &argp)) != 0) + return (ret); + + /* + * The __lv_log_fwdscr_oncmt call must precede LOG_VRFY_PROC otherwise + * this txn will be taken as an aborted txn. + */ + if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) { + if ((ret = __lv_log_fwdscr_oncmt(lvh, *lsnp, + argp->txnp->txnid, 0, argp->timestamp)) != 0) + goto err; + + tsinfo.lsn = *lsnp; + tsinfo.timestamp = argp->timestamp; + tsinfo.logtype = argp->type; + if ((ret = __put_timestamp_info(lvh, &tsinfo)) != 0) + goto err; + goto out; /* We are done. */ + } + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + if ((ret = __del_txn_pages(lvh, argp->txnp->txnid)) != 0 && + ret != DB_NOTFOUND) + goto err;/* Some txns may have updated no pages. */ + if ((ret = __lv_on_timestamp(lvh, lsnp, argp->timestamp, + DB___txn_regop)) != 0) + goto err; + if ((ret = __get_txn_vrfy_info(lvh, argp->txnp->txnid, &ptvi)) != 0 && + ret != DB_NOTFOUND) + goto err; + if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) && + (ret2 = __txn_started(lvh, lvh->lv_config->start_lsn, + argp->txnp->txnid, &started)) == 0 && started != 0) { + ret = 0; + goto err; + } + if (ret2 != 0) + ret = ret2; + __db_errx(lvh->dbenv->env, DB_STR_A("2547", + "[%lu][%lu] Can not find an active transaction's " + "information, txnid: %lx.", "%lu %lu %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)argp->txnp->txnid); + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + + } + + if (ptvi == NULL) { + if (ret == DB_NOTFOUND && + F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + goto out; + + } + DB_ASSERT(env, ptvi->ptxnid == 0); + + /* + * This log record is only logged when committing a outermost txn, + * child txn commits are logged in __txn_child_log. + */ + if (ptvi->ptxnid == 0) { + if (ptvi->status == TXN_STAT_PREPARE) + lvh->ntxn_prep--; + else if (ptvi->status == TXN_STAT_ACTIVE) + lvh->ntxn_active--; + lvh->ntxn_commit++; + } + ptvi->status = TXN_STAT_COMMIT; + DB_ASSERT(env, IS_ZERO_LSN(ptvi->last_lsn)); + ptvi->last_lsn = *lsnp; + if ((ret = __put_txn_vrfy_info(lvh, ptvi)) != 0) + goto err; + + /* Report txn stats. */ + if (F_ISSET(lvh, DB_LOG_VERIFY_VERBOSE)) + __db_msg(env, DB_STR_A("2548", + "[%lu][%lu] The number of active, committed and aborted " + "child txns of txn %lx: %u, %u, %u.", + "%lu %lu %lx %u %u %u"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)ptvi->txnid, + ptvi->nchild_active, ptvi->nchild_commit, + ptvi->nchild_abort); +out: +err: + + if (pptvi != NULL && (ret2 = __free_txninfo(pptvi)) != 0 && ret == 0) + ret = ret2; + if (ptvi != NULL && (ret2 = __free_txninfo(ptvi)) != 0 && ret == 0) + ret = ret2; + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __txn_ckp_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_ckp_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_ckp_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __txn_ckp_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */ +err: + + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __txn_ckp_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_ckp_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_ckp_args *argp; + DB_LOG_VRFY_INFO *lvh; + VRFY_CKP_INFO *lastckp, ckpinfo; + int ret; + struct __ckp_verify_params cvp; + VRFY_TIMESTAMP_INFO tsinfo; + char timebuf[CTIME_BUFLEN]; + time_t ckp_time, lastckp_time; + + lastckp = NULL; + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + memset(&ckpinfo, 0, sizeof(ckpinfo)); + memset(&cvp, 0, sizeof(cvp)); + + if ((ret = __txn_ckp_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + + if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) { + tsinfo.lsn = *lsnp; + tsinfo.timestamp = argp->timestamp; + tsinfo.logtype = argp->type; + /* + * Store the first ckp_lsn, or the least one greater than the + * starting point. There will be no partial txns after + * valid_lsn. + */ + if (!(!IS_ZERO_LSN(lvh->lv_config->start_lsn) && + LOG_COMPARE(&(lvh->lv_config->start_lsn), + &(argp->ckp_lsn)) > 0)) + lvh->valid_lsn = argp->ckp_lsn; + if ((ret = __put_timestamp_info(lvh, &tsinfo)) != 0) + goto err; + goto out;/* We are done, exit. */ + } + lvh->nckp++; + ckp_time = (time_t)argp->timestamp; + __db_msg(env, DB_STR_A("2549", + "[%lu][%lu] Checkpoint record, ckp_lsn: [%lu][%lu], " + "timestamp: %s. Total checkpoint: %u", + "%lu %lu %lu %lu %s %u"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)argp->ckp_lsn.file, + (u_long)argp->ckp_lsn.offset, + __os_ctime(&ckp_time, timebuf), lvh->nckp); + + if ((ret = __lv_on_timestamp(lvh, lsnp, + argp->timestamp, DB___txn_ckp)) != 0) + goto err; + if (((ret = __get_last_ckp_info(lvh, &lastckp)) != 0) && + ret != DB_NOTFOUND) + return (ret); + if (ret == DB_NOTFOUND) + goto cont; + + if (LOG_COMPARE(&(argp->last_ckp), &(lastckp->lsn)) != 0) { + __db_errx(env, DB_STR_A("2550", + "[%lu][%lu] Last known checkpoint [%lu][%lu] not equal " + "to last_ckp :[%lu][%lu]. Some checkpoint log records " + "may be missing.", "%lu %lu %lu %lu %lu %lu"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)lastckp->lsn.file, (u_long)lastckp->lsn.offset, + (u_long)argp->last_ckp.file, (u_long)argp->last_ckp.offset); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + + /* + * Checkpoint are generally not performed quite often, so we see this + * as an error, but in txn commits we see it as a warning. + */ + lastckp_time = (time_t)lastckp->timestamp; + if (argp->timestamp < lastckp->timestamp) { + __db_errx(env, DB_STR_A("2551", + "[%lu][%lu] Last known checkpoint [%lu, %lu] has a " + "timestamp %s smaller than this checkpoint timestamp %s.", + "%lu %lu %lu %lu %s %s"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)lastckp->lsn.file, + (u_long)lastckp->lsn.offset, + __os_ctime(&lastckp_time, timebuf), + __os_ctime(&ckp_time, timebuf)); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + +cont: + cvp.env = env; + cvp.lsn = *lsnp; + cvp.ckp_lsn = argp->ckp_lsn; + + /* + * Verify that all active txn's first lsn is greater than + * argp->ckp_lsn. + */ + if ((ret = __iterate_txninfo(lvh, 0, 0, + __lv_ckp_vrfy_handler, &cvp)) != 0) + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + ckpinfo.timestamp = argp->timestamp; + ckpinfo.lsn = *lsnp; + ckpinfo.ckplsn = argp->ckp_lsn; + + if ((ret = __put_ckp_info(lvh, &ckpinfo)) != 0) + goto err; +out: +err: + if (argp) + __os_free(env, argp); + if (lastckp) + __os_free(env, lastckp); + return (ret); +} + +static int +__lv_ckp_vrfy_handler(lvinfo, txninfop, param) + DB_LOG_VRFY_INFO *lvinfo; + VRFY_TXN_INFO *txninfop; + void *param; +{ + struct __ckp_verify_params *cvp; + int ret; + + ret = 0; + cvp = (struct __ckp_verify_params *)param; + /* ckp_lsn should be less than any active txn's first lsn. */ + if (txninfop->status == TXN_STAT_ACTIVE && LOG_COMPARE(&(cvp->ckp_lsn), + &(txninfop->first_lsn)) >= 0) { + __db_errx(cvp->env, DB_STR_A("2552", + "[%lu][%lu] ckp log's ckp_lsn [%lu][%lu] greater than " + "active txn %lx 's first lsn [%lu][%lu]", + "%lu %lu %lu %lu %lx %lu %lu"), + (u_long)cvp->lsn.file, (u_long)cvp->lsn.offset, + (u_long)cvp->ckp_lsn.file, (u_long)cvp->ckp_lsn.offset, + (u_long)txninfop->txnid, + (u_long)txninfop->first_lsn.file, + (u_long)txninfop->first_lsn.offset); + lvinfo->flags |= DB_LOG_VERIFY_ERR; + if (!F_ISSET(lvinfo, DB_LOG_VERIFY_CAF)) + /* Stop the iteration. */ + ret = DB_LOG_VERIFY_BAD; + } + + return (ret); +} + +/* + * PUBLIC: int __txn_child_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_child_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_child_args *argp; + DB_LOG_VRFY_INFO *lvh; + VRFY_TXN_INFO *ptvi, *ptvi2; + int ret, ret2, started; + + /* + * This function is called when a txn T0's child txn T1 commits. Before + * this log record we don't know T0 and T1's relationship. This means + * we never know the T0 has an active child txn T1, all child txns + * we know are committed. + */ + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + ptvi = ptvi2 = NULL; + ret = ret2 = started = 0; + + if ((ret = __txn_child_read(env, dbtp->data, &argp)) != 0) + return (ret); + + /* + * The __lv_log_fwdscr_oncmt call must precede LOG_VRFY_PROC otherwise + * this txn will be taken as an aborted txn. + */ + if (F_ISSET(lvh, DB_LOG_VERIFY_FORWARD)) { + if ((ret = __lv_log_fwdscr_oncmt(lvh, argp->c_lsn, argp->child, + argp->txnp->txnid, 0)) != 0) + goto err; + if ((ret = __lv_log_fwdscr_onrec(lvh, argp->txnp->txnid, + argp->type, argp->prev_lsn, *lsnp)) != 0) + goto err; + goto out;/* We are done. */ + } + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + if ((ret = __return_txn_pages(lvh, argp->child, + argp->txnp->txnid)) != 0 && ret != DB_NOTFOUND) + goto err;/* Some txns may have updated no pages. */ + + /* Update parent txn info. */ + if ((ret = __get_txn_vrfy_info(lvh, argp->txnp->txnid, &ptvi)) != 0 && + ret != DB_NOTFOUND) + goto err; + if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) && + ((ret2 = __txn_started(lvh, lvh->lv_config->start_lsn, + argp->txnp->txnid, &started)) == 0) && started != 0) { + ret = 0; + goto err; + } + if (ret2 != 0) + ret = ret2; + __db_errx(lvh->dbenv->env, DB_STR_A("2553", + "[%lu][%lu] Can not find an active transaction's " + "information, txnid: %lx.", "%lu %lu %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)argp->txnp->txnid); + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + + } + if (ptvi == NULL) { + if (ret == DB_NOTFOUND && + F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + goto out; + + } + ptvi->nchild_commit++; + /* + * The start of this child txn caused lvh->ntxn_active to be + * incremented unecessarily, so decrement it. + */ + lvh->ntxn_active--; + if (ptvi->status != TXN_STAT_ACTIVE) { + __db_errx(lvh->dbenv->env, DB_STR_A("2554", + "[%lu][%lu] Parent txn %lx ended " + "before child txn %lx ends.", "%lu %lu %lx %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)argp->txnp->txnid, (u_long)argp->child); + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + if ((ret = __put_txn_vrfy_info(lvh, ptvi)) != 0) + goto err; + + /* Update child txn info. */ + if ((ret = __get_txn_vrfy_info(lvh, argp->child, &ptvi2)) != 0 && + ret != DB_NOTFOUND) + goto err; + if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) && + ((ret2 = __txn_started(lvh, lvh->lv_config->start_lsn, + argp->child, &started)) == 0) && started != 0) { + ret = 0; + goto err; + } + if (ret2 != 0) + ret = ret2; + __db_errx(lvh->dbenv->env, DB_STR_A("2555", + "[%lu][%lu] Can not find an active " + "transaction's information, txnid: %lx.", + "%lu %lu %lx"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)argp->child); + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + + } + if (ptvi2 == NULL) { + if (ret == DB_NOTFOUND && + F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + goto out; + + } + if (ptvi2->status != TXN_STAT_ACTIVE) { + __db_errx(lvh->dbenv->env, DB_STR_A("2556", + "[%lu][%lu] Txn %lx ended before it commits.", + "%lu %lu %lx"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)argp->child); + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + ptvi2->status = TXN_STAT_COMMIT; + if ((ret = __put_txn_vrfy_info(lvh, ptvi2)) != 0) + goto err; +out: +err: + __os_free(env, argp); + if (ptvi != NULL && (ret2 = __free_txninfo(ptvi)) != 0 && ret == 0) + ret = ret2; + if (ptvi2 != NULL && (ret2 = __free_txninfo(ptvi2)) != 0 && ret == 0) + ret = ret2; + + return (ret); +} + +/* + * PUBLIC: int __txn_xa_regop_42_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_xa_regop_42_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_xa_regop_42_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __txn_xa_regop_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + ON_NOT_SUPPORTED(env, lvh, *lsnp, argp->type); + /* LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); */ +err: + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __txn_prepare_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_prepare_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_prepare_args *argp; + DB_LOG_VRFY_INFO *lvh; + VRFY_TXN_INFO *ptvi; + int ret, ret2, started; + + ret = ret2 = started = 0; + ptvi = NULL; + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + + if ((ret = __txn_prepare_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + + if ((ret = __get_txn_vrfy_info(lvh, argp->txnp->txnid, &ptvi)) != 0 && + ret != DB_NOTFOUND) + goto err; + + if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) && + ((ret2 = __txn_started(lvh, lvh->lv_config->start_lsn, + argp->txnp->txnid, &started)) == 0) && started != 0) { + ret = 0; + goto err; + } + if (ret2 != 0) + ret = ret2; + __db_errx(lvh->dbenv->env, DB_STR_A("2557", + "[%lu][%lu] Can not find an active transaction's " + "information, txnid: %lx.", "%lu %lu %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)argp->txnp->txnid); + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + + } + if (ptvi == NULL) { + if (ret == DB_NOTFOUND && + F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + goto out; + + } + DB_ASSERT(env, + (IS_ZERO_LSN(ptvi->prep_lsn) && ptvi->status != TXN_STAT_PREPARE) || + (!IS_ZERO_LSN(ptvi->prep_lsn) && ptvi->status == TXN_STAT_PREPARE)); + + lvh->ntxn_prep++; + lvh->ntxn_active--; + + if (!IS_ZERO_LSN(ptvi->prep_lsn)) {/* Prepared more than once. */ + + __db_errx(lvh->dbenv->env, DB_STR_A("2558", + "[%lu][%lu] Multiple txn_prepare log record for " + "transaction %lx, previous prepare lsn: [%lu, %lu].", + "%lu %lu %lx %lu %lu"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)argp->txnp->txnid, + (u_long)ptvi->prep_lsn.file, (u_long)ptvi->prep_lsn.offset); + } else { + ptvi->prep_lsn = *lsnp; + ptvi->status = TXN_STAT_PREPARE; + } + ret = __put_txn_vrfy_info(lvh, ptvi); +out: +err: + __os_free(env, argp); + if (ptvi != NULL && (ret2 = __free_txninfo(ptvi)) != 0 && ret == 0) + ret = ret2; + return (ret); +} + +/* + * PUBLIC: int __txn_recycle_verify __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_recycle_verify(env, dbtp, lsnp, notused2, lvhp) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *lvhp; +{ + __txn_recycle_args *argp; + DB_LOG_VRFY_INFO *lvh; + int ret; + + notused2 = DB_TXN_LOG_VERIFY; + lvh = (DB_LOG_VRFY_INFO *)lvhp; + ret = 0; + + if ((ret = __txn_recycle_read(env, dbtp->data, &argp)) != 0) + return (ret); + + LOG_VRFY_PROC(lvh, *lsnp, argp, INVAL_DBREGID); + + /* Add recycle info for all txns whose ID is in the [min, max] range. */ + ret = __add_recycle_lsn_range(lvh, lsnp, argp->min, argp->max); + +out: + +err: + + __os_free(env, argp); + return (ret); +} + +/* Handle log types having timestamps, so far only __txn_ckp and __txn_regop. */ +static int +__lv_on_timestamp(lvh, lsn, timestamp, logtype) + DB_LOG_VRFY_INFO *lvh; + const DB_LSN *lsn; + int32_t timestamp; + u_int32_t logtype; +{ + VRFY_TIMESTAMP_INFO *ltsinfo; + int ret; + + ltsinfo = NULL; + ret = 0; + if ((ret = __get_latest_timestamp_info(lvh, *lsn, <sinfo)) == 0) { + DB_ASSERT(lvh->dbenv->env, ltsinfo != NULL); + if (ltsinfo->timestamp >= timestamp && + F_ISSET(lvh, DB_LOG_VERIFY_VERBOSE)) { + __db_errx(lvh->dbenv->env, DB_STR_A("2559", + "[%lu][%lu] [WARNING] This log record of type %s " + "does not have a greater time stamp than " + "[%lu, %lu] of type %s", "%lu %lu %s %lu %lu %s"), + (u_long)lsn->file, (u_long)lsn->offset, + LOGTYPE_NAME(lvh, logtype), + (u_long)ltsinfo->lsn.file, + (u_long)ltsinfo->lsn.offset, + LOGTYPE_NAME(lvh, ltsinfo->logtype)); + lvh->flags |= DB_LOG_VERIFY_WARNING; + } + } + if (ltsinfo != NULL) + __os_free(lvh->dbenv->env, ltsinfo); + if (ret == DB_NOTFOUND) + ret = 0; + + return (ret); +} + +/* + * Called whenever the log record belongs to a transaction. + */ +static int +__lv_on_txn_logrec(lvh, lsnp, prev_lsnp, txnp, type, dbregid) + DB_LOG_VRFY_INFO *lvh; + const DB_LSN *lsnp; + const DB_LSN *prev_lsnp; + const DB_TXN *txnp; + u_int32_t type; + int32_t dbregid; +{ + DBT fid; + VRFY_TXN_INFO *pvti; + u_int32_t txnid; + VRFY_FILEREG_INFO *fregp; + int ret, ret2, started; + + ret = ret2 = started = 0; + pvti = NULL; + fregp = NULL; + lvh->lrtypes[type]++;/* Increment per-type log record count. */ + txnid = txnp->txnid; + memset(&fid, 0, sizeof(fid)); + + if (dbregid == INVAL_DBREGID) + goto cont; + if ((ret = __get_filereg_by_dbregid(lvh, dbregid, &fregp)) != 0) { + if (ret == DB_NOTFOUND) { + /* + * It's likely that we are verifying a subset of logs + * and the DBREG_OPEN is outside the range. + */ + if (!F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + __db_msg(lvh->dbenv->env, DB_STR_A("2560", + "[%lu][%lu] Transaction %lx is updating a " + "db file %d not registered.", + "%lu %lu %lx %d"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)txnp->txnid, dbregid); + goto cont; + } else + goto err; + } + + fid = fregp->fileid; +cont: + if (IS_ZERO_LSN(*prev_lsnp) && + (ret = __lv_on_new_txn(lvh, lsnp, txnp, type, dbregid, &fid)) != 0) + goto err; + + if ((ret = __get_txn_vrfy_info(lvh, txnid, &pvti)) != 0 && + ret != DB_NOTFOUND) + goto err; + + /* If can't find the txn, there is an internal error. */ + if (ret == DB_NOTFOUND && !F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) { + /* + * If verifying from middle, it's expected that txns begun + * before start are not found. + */ + if (!IS_ZERO_LSN(lvh->lv_config->start_lsn) && ((ret2 = + __txn_started(lvh, lvh->lv_config->start_lsn, txnid, + &started)) == 0) && started != 0) { + ret = 0; + goto out;/* We are done. */ + } + if (ret2 != 0) + ret = ret2; + + __db_errx(lvh->dbenv->env, DB_STR_A("2561", + "[%lu][%lu] Can not find an active transaction's " + "information, txnid: %lx.", "%lu %lu %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, (u_long)txnid); + ON_ERROR(lvh, DB_LOG_VERIFY_INTERR); + } + + /* Can't proceed without the txn info. */ + if (pvti == NULL) { + if (ret == DB_NOTFOUND && F_ISSET(lvh, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + goto out; + } + + /* Check if prev lsn is wrong, and some log records may be missing. */ + if (!IS_ZERO_LSN(*prev_lsnp) && + LOG_COMPARE(prev_lsnp, &(pvti->cur_lsn)) != 0) { + __db_errx(lvh->dbenv->env, DB_STR_A("2562", + "[%lu][%lu] Previous record for transaction %lx is " + "[%lu][%lu] and prev_lsn is [%lu][%lu].", + "%lu %lu %lx %lu %lu %lu %lu"), (u_long)lsnp->file, + (u_long)lsnp->offset, (u_long)pvti->txnid, + (u_long)pvti->cur_lsn.file, (u_long)pvti->cur_lsn.offset, + (u_long)prev_lsnp->file, (u_long)prev_lsnp->offset); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + + /* + * After the txn is prepared, the only valid log record for this txn + * is the commit record. + */ + if (pvti->status == TXN_STAT_PREPARE && type != DB___txn_regop) { + __db_errx(lvh->dbenv->env, DB_STR_A("2563", + "[%lu][%lu] Update action is performed in a " + "prepared transaction %lx.", "%lu %lu %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, (u_long)txnid); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + pvti->cur_lsn = *lsnp; + pvti->flags = txnp->flags; + if (dbregid != INVAL_DBREGID && fid.size > 0 && + (ret = __add_file_updated(pvti, &fid, dbregid)) != 0) + goto err; + if ((ret = __put_txn_vrfy_info(lvh, pvti)) != 0) + goto err; +out: +err: + if (pvti != NULL && (ret2 = __free_txninfo(pvti)) != 0 && ret == 0) + ret = ret2; + if (fregp != NULL && + (ret2 = __free_filereg_info(fregp)) != 0 && ret == 0) + ret = ret2; + return (ret); +} + +/* + * Called whenever a new transaction is started, including child transactions. + */ +static int +__lv_on_new_txn (lvh, lsnp, txnp, type, dbregid, fid) + DB_LOG_VRFY_INFO *lvh; + const DB_LSN *lsnp; + const DB_TXN *txnp; + u_int32_t type; + int32_t dbregid; + const DBT *fid; +{ + VRFY_TXN_INFO vti, *pvti, *vtip; + int ret, tret; + u_int32_t txnid; + ENV *env; + + ret = tret = 0; + txnid = txnp->txnid; + pvti = NULL; + memset(&vti, 0, sizeof(vti)); + vti.txnid = txnid; + env = lvh->dbenv->env; + /* Log record type, may be used later. Pass lint checks. */ + COMPQUIET(type, 0); + + /* + * It's possible that the new txn is a child txn, we will decrement + * this value in __txn_child_verify when we realize this, because + * this value only records the number of outermost active txns. + */ + lvh->ntxn_active++; + + if ((ret = __get_txn_vrfy_info(lvh, txnid, &pvti)) != 0 && + ret != DB_NOTFOUND) + goto err; + if (ret == DB_NOTFOUND) + vtip = &vti; + else {/* The txnid is reused, may be illegal. */ + vtip = pvti; + /* + * If this txn id was recycled, this use is legal. A legal + * recyclable txnid is immediately not recycleable after + * it's recycled here. And it's impossible for vtip->status + * to be TXN_STAT_ACTIVE, since we have made it TXN_STAT_ABORT + * when we detected this txn id recycle just now. + */ + if (vtip->num_recycle > 0 && LOG_COMPARE(&(vtip->recycle_lsns + [vtip->num_recycle - 1]), lsnp) < 0) { + DB_ASSERT(env, vtip->status != TXN_STAT_ACTIVE); + if ((ret = __rem_last_recycle_lsn(vtip)) != 0) + goto err; + if ((ret = __clear_fileups(vtip)) != 0) + goto err; + + vtip->status = 0; + ZERO_LSN(vtip->prep_lsn); + ZERO_LSN(vtip->last_lsn); + + vtip->nchild_active = 0; + vtip->nchild_commit = 0; + vtip->nchild_abort = 0; + /* + * We may goto the else branch if this txn has child txns + * before any updates done on its behalf. So we should + * exclude this possiblilty to conclude a failed verification. + */ + } else if (vtip->nchild_active + vtip->nchild_commit + + vtip->nchild_abort == 0) { + __db_errx(lvh->dbenv->env, DB_STR_A("2564", + "[%lu][%lu] Transaction id %lx reused without " + "being recycled with a __txn_recycle.", + "%lu %lu %lx"), + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)txnid); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + } + + vtip->first_lsn = *lsnp; + vtip->cur_lsn = *lsnp; + vtip->flags = txnp->flags; + + /* + * It's possible that the first log rec does not update any file, + * like the __txn_child type of record. + */ + if (fid->size > 0 && (ret = + __add_file_updated(vtip, fid, dbregid)) != 0) + goto err; + if ((ret = __put_txn_vrfy_info(lvh, vtip)) != 0) + goto err; + +err: + if (pvti != NULL && (tret = __free_txninfo(pvti)) != 0 && ret == 0) + ret = tret; + if ((tret = __free_txninfo_stack(&vti)) != 0 && ret == 0) + ret = tret; + + return (ret); +} + +/* Called when we detect that a new log file is used. */ +static int +__lv_new_logfile_vrfy(lvh, lsnp) + DB_LOG_VRFY_INFO *lvh; + const DB_LSN *lsnp; +{ + int ret; + + ret = 0; + if (IS_ZERO_LSN(lvh->last_lsn) || lvh->last_lsn.file == lsnp->file) { + lvh->last_lsn = *lsnp; + return (0); + } + + /* + * If file number changed, it must have been incremented, + * and the offset is 0. + * */ + if (lsnp->file - lvh->last_lsn.file != 1 || lsnp->offset != + __lv_first_offset(lvh->dbenv->env)) { + __db_errx(lvh->dbenv->env, + "[%lu][%lu] Last log record verified ([%lu][%lu]) is not " + "immidiately before the current log record.", + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)lvh->last_lsn.file, (u_long)lvh->last_lsn.offset); + ret = DB_LOG_VERIFY_BAD; + ON_ERROR(lvh, DB_LOG_VERIFY_ERR); + } + + lvh->last_lsn = *lsnp; +err: + return (ret); +} + +static u_int32_t +__lv_first_offset(env) + ENV *env; +{ + u_int32_t sz; + + if (CRYPTO_ON(env)) + sz = HDR_CRYPTO_SZ; + else + sz = HDR_NORMAL_SZ; + + sz += sizeof(LOGP); + + return sz; +} + +/* Called when we see a non-transactional update log record. */ +static int +__lv_on_nontxn_update(lvh, lsnp, txnid, logtype, fileid) + DB_LOG_VRFY_INFO *lvh; + const DB_LSN *lsnp; + u_int32_t txnid, logtype; + int32_t fileid; +{ + lvh->lrtypes[logtype]++; + COMPQUIET(txnid, 0); + if (fileid != INVAL_DBREGID) { + lvh->non_txnup_cnt++; + __db_msg(lvh->dbenv->env, DB_STR_A("2565", + "[%lu][%lu] Non-transactional update, " + "log type: %u, fileid: %d.", "%lu %lu %u %d"), + (u_long)lsnp->file, (u_long)lsnp->offset, logtype, fileid); + } + + return (0); +} + +static int +__lv_on_txn_aborted(lvinfo) + DB_LOG_VRFY_INFO *lvinfo; +{ + int ret, ret2, sres; + VRFY_TXN_INFO *ptvi; + u_int32_t abtid; + DB_LSN lsn, slsn; + + ret = ret2 = sres = 0; + abtid = lvinfo->aborted_txnid; + lsn = lvinfo->aborted_txnlsn; + slsn = lvinfo->lv_config->start_lsn; + ptvi = NULL; + + if ((ret = __del_txn_pages(lvinfo, lvinfo->aborted_txnid)) != 0 && + ret != DB_NOTFOUND) + goto err;/* Some txns may have updated no pages. */ + ret = __get_txn_vrfy_info(lvinfo, lvinfo->aborted_txnid, &ptvi); + if (ret == DB_NOTFOUND && !F_ISSET(lvinfo, DB_LOG_VERIFY_PARTIAL)) { + /* + * If verifying from slsn and the txn abtid started before + * slsn, it's expected that we can't find the txn. + */ + if (!IS_ZERO_LSN(slsn) && (ret2 = __txn_started(lvinfo, slsn, + abtid, &sres)) == 0 && sres != 0) { + ret = 0; + goto err; + } + if (ret2 != 0) + ret = ret2;/* Use the same error msg below. */ + __db_errx(lvinfo->dbenv->env, DB_STR_A("2566", + "[%lu][%lu] Can not find an active transaction's " + "information, txnid: %lx.", "%lu %lu %lx"), + (u_long)lsn.file, (u_long)lsn.offset, + (u_long)lvinfo->aborted_txnid); + ON_ERROR(lvinfo, DB_LOG_VERIFY_INTERR); + } + if (ptvi == NULL) { + if (ret == DB_NOTFOUND && + F_ISSET(lvinfo, DB_LOG_VERIFY_PARTIAL)) + ret = 0; + goto out; + } + ptvi->status = TXN_STAT_ABORT; + lvinfo->ntxn_abort++; + lvinfo->ntxn_active--; + /* Report txn stats. */ + if (F_ISSET(lvinfo, DB_LOG_VERIFY_VERBOSE)) { + __db_msg(lvinfo->dbenv->env, DB_STR_A("2567", + "[%lu][%lu] Txn %lx aborted after this log record.", + "%lu %lu %lx"), (u_long)lvinfo->aborted_txnlsn.file, + (u_long)lvinfo->aborted_txnlsn.offset, (u_long)ptvi->txnid); + __db_msg(lvinfo->dbenv->env, DB_STR_A("2568", + "\tThe number of active, committed and aborted child txns " + "of txn %lx: %u, %u, %u.", "%lx %u %u %u"), + (u_long)ptvi->txnid, ptvi->nchild_active, + ptvi->nchild_commit, ptvi->nchild_abort); + } + lvinfo->aborted_txnid = 0; + lvinfo->aborted_txnlsn.file = lvinfo->aborted_txnlsn.offset = 0; + if ((ret = __put_txn_vrfy_info(lvinfo, ptvi)) != 0) + goto err; + if ((ret = __free_txninfo(ptvi)) != 0) + goto err; +out: +err: + return (ret); +} diff --git a/src/log/log_verify_stub.c b/src/log/log_verify_stub.c new file mode 100644 index 00000000..32ceb49f --- /dev/null +++ b/src/log/log_verify_stub.c @@ -0,0 +1,79 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef HAVE_VERIFY + +#include "db_config.h" +#include "db_int.h" + +static int __db_log_novrfy __P((ENV *)); +int __log_verify_pp __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *)); +int __log_verify __P((DB_ENV *, const DB_LOG_VERIFY_CONFIG *)); +int __log_verify_wrap __P((ENV *env, const char *, u_int32_t, const char *, + const char *, time_t, time_t, u_int32_t, u_int32_t, u_int32_t, u_int32_t, + int, int)); + +/* + * __db_log_novrfy -- + * Error when a Berkeley DB build doesn't include the access method. + */ +static int +__db_log_novrfy(env) + ENV *env; +{ + __db_errx(env, DB_STR("2523", + "library build did not include support for log verification")); + return (DB_OPNOTSUP); +} + +int +__log_verify_pp(dbenv, lvconfig) + DB_ENV *dbenv; + const DB_LOG_VERIFY_CONFIG *lvconfig; +{ + COMPQUIET(lvconfig, NULL); + + /* The dbenv is intact, callers should properly take care of it. */ + return (__db_log_novrfy(dbenv->env)); +} + +int +__log_verify(dbenv, lvconfig) + DB_ENV *dbenv; + const DB_LOG_VERIFY_CONFIG *lvconfig; +{ + COMPQUIET(lvconfig, NULL); + + return (__db_log_novrfy(dbenv->env)); +} + +int +__log_verify_wrap(env, envhome, cachesize, dbfile, dbname, + stime, etime, stfile, stoffset, efile, eoffset, caf, verbose) + ENV *env; + const char *envhome, *dbfile, *dbname; + time_t stime, etime; + u_int32_t cachesize, stfile, stoffset, efile, eoffset; + int caf, verbose; +{ + COMPQUIET(envhome, NULL); + COMPQUIET(dbfile, NULL); + COMPQUIET(dbname, NULL); + COMPQUIET(stime, 0); + COMPQUIET(etime, 0); + COMPQUIET(cachesize, 0); + COMPQUIET(stfile, 0); + COMPQUIET(stoffset, 0); + COMPQUIET(efile, 0); + COMPQUIET(eoffset, 0); + COMPQUIET(caf, 0); + COMPQUIET(verbose, 0); + return (__db_log_novrfy(env)); +} + +#endif /* !HAVE_VERIFY */ diff --git a/src/log/log_verify_util.c b/src/log/log_verify_util.c new file mode 100644 index 00000000..2cadd755 --- /dev/null +++ b/src/log/log_verify_util.c @@ -0,0 +1,2233 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * This file contains helper functions like data structure and in-memory db + * management, which are used to store various log verification information. + */ +#include "db_config.h" +#include "db_int.h" + +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/qam.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" +#include "dbinc/fop.h" + +#include "dbinc/log_verify.h" + +#define BDBOP(op) do { \ + ret = (op); \ + if (ret != 0) { \ + __lv_on_bdbop_err(ret); \ + goto err; \ + } \ +} while (0) + +#define BDBOP2(dbenv, op, funct) do { \ + ret = (op); \ + if (ret != 0) { \ + __lv_on_bdbop_err(ret); \ + __db_err(dbenv->env, ret, "\n%s", funct); \ + return (ret); \ + } \ +} while (0) + +#define BDBOP3(dbenv, op, excpt, funct) do { \ + ret = (op); \ + if (ret != 0) { \ + __lv_on_bdbop_err(ret); \ + if (ret != excpt) { \ + __db_err(dbenv->env, ret, "\n%s", funct); \ + return (ret); \ + } \ + } \ +} while (0) + +typedef int (*btcmp_funct)(DB *, const DBT *, const DBT *); +typedef int (*dupcmp_funct)(DB *, const DBT *, const DBT *); + +static int __lv_add_recycle_handler __P(( + DB_LOG_VRFY_INFO *, VRFY_TXN_INFO *, void *)); +static int __lv_add_recycle_lsn __P((VRFY_TXN_INFO *, const DB_LSN *)); +static size_t __lv_dbt_arrsz __P((const DBT *, u_int32_t)); +static int __lv_fidpgno_cmp __P((DB *, const DBT *, const DBT *)); +static int __lv_i32_cmp __P((DB *, const DBT *, const DBT *)); +static int __lv_lsn_cmp __P((DB *, const DBT *, const DBT *)); +static void __lv_on_bdbop_err __P((int)); +static int __lv_open_db __P((DB_ENV *, DB **, DB_THREAD_INFO *, + const char *, int, btcmp_funct, u_int32_t, dupcmp_funct)); +static int __lv_pack_filereg __P((const VRFY_FILEREG_INFO *, DBT *)); +static int __lv_pack_txn_vrfy_info __P(( + const VRFY_TXN_INFO *, DBT *, DBT *data)); +static int __lv_seccbk_fname __P((DB *, const DBT *, const DBT *, DBT *)); +static int __lv_seccbk_lsn __P((DB *, const DBT *, const DBT *, DBT *)); +static int __lv_seccbk_txnpg __P((DB *, const DBT *, const DBT *, DBT *)); +static void __lv_setup_logtype_names __P((DB_LOG_VRFY_INFO *lvinfo)); +static int __lv_txnrgns_lsn_cmp __P((DB *, const DBT *, const DBT *)); +static int __lv_ui32_cmp __P((DB *, const DBT *, const DBT *)); +static int __lv_unpack_txn_vrfy_info __P((VRFY_TXN_INFO **, const DBT *)); +static int __lv_unpack_filereg __P((const DBT *, VRFY_FILEREG_INFO **)); + +static void __lv_on_bdbop_err(ret) + int ret; +{ + /* Pass lint checks. We need the ret and this function for debugging. */ + COMPQUIET(ret, 0); +} + +/* + * __create_log_vrfy_info -- + * Initialize and return a log verification handle to be used throughout + * a verification process. + * + * PUBLIC: int __create_log_vrfy_info __P((const DB_LOG_VERIFY_CONFIG *, + * PUBLIC: DB_LOG_VRFY_INFO **, DB_THREAD_INFO *)); + */ +int +__create_log_vrfy_info(cfg, lvinfopp, ip) + const DB_LOG_VERIFY_CONFIG *cfg; + DB_LOG_VRFY_INFO **lvinfopp; + DB_THREAD_INFO *ip; +{ + const char *envhome; + int inmem, ret; + u_int32_t cachesz, envflags; + const char *dbf1, *dbf2, *dbf3, *dbf4, *dbf5, *dbf6, *dbf7, *dbf8, + *dbf9, *dbf10, *dbf11; + DB_LOG_VRFY_INFO *lvinfop; + + dbf1 = "__db_log_vrfy_txninfo.db"; + dbf2 = "__db_log_vrfy_fileregs.db"; + dbf3 = "__db_log_vrfy_pgtxn.db"; + dbf4 = "__db_log_vrfy_lsntime.db"; + dbf5 = "__db_log_vrfy_timelsn.db"; + dbf6 = "__db_log_vrfy_ckps.db"; + dbf7 = "__db_log_vrfy_dbregids.db"; + dbf8 = "__db_log_vrfy_fnameuid.db"; + dbf9 = "__db_log_vrfy_timerange.db"; + dbf10 = "__db_log_vrfy_txnaborts.db"; + dbf11 = "__db_log_vrfy_txnpg.db"; + + envhome = cfg->temp_envhome; + lvinfop = NULL; + cachesz = cfg->cachesize; + if (cachesz== 0) + cachesz = 1024 * 1024 * 256; + + BDBOP(__os_malloc(NULL, sizeof(DB_LOG_VRFY_INFO), &lvinfop)); + memset(lvinfop, 0, sizeof(DB_LOG_VRFY_INFO)); + lvinfop->ip = ip; + __lv_setup_logtype_names(lvinfop); + /* Avoid the VERIFY_PARTIAL bit being cleared if no ckp_lsn exists. */ + lvinfop->valid_lsn.file = lvinfop->valid_lsn.offset = (u_int32_t)-1; + + /* + * The envhome parameter determines if we will use an in-memory + * environment and databases. + */ + if (envhome == NULL) { + envflags = DB_PRIVATE; + inmem = 1; + } else { + envflags = 0; + inmem = 0; + } + + /* Create log verify internal database environment. */ + BDBOP(db_env_create(&lvinfop->dbenv, 0)); + BDBOP(__memp_set_cachesize(lvinfop->dbenv, 0, cachesz, 1)); + /* + * Log verification internal db environment should be accessed + * single-threaded. No transaction semantics needed. + */ + BDBOP(__env_open(lvinfop->dbenv, envhome, + envflags | DB_CREATE | DB_INIT_MPOOL, 0666)); + + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->txninfo, ip, dbf1, + inmem, __lv_ui32_cmp, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->fileregs, ip, dbf2, + inmem, NULL, 0, NULL)); + + /* No dup allowed, always overwrite data with same key. */ + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->dbregids, ip, dbf7, + inmem, __lv_i32_cmp, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->pgtxn, ip, dbf3, + inmem, __lv_fidpgno_cmp, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->txnpg, ip, dbf11, + inmem, __lv_ui32_cmp, DB_DUP | DB_DUPSORT, __lv_fidpgno_cmp)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->lsntime, ip, dbf4, + inmem, __lv_lsn_cmp, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->timelsn, ip, dbf5, + inmem, __lv_i32_cmp, DB_DUP | DB_DUPSORT, __lv_lsn_cmp)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->txnaborts, ip, dbf10, + inmem, __lv_lsn_cmp, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->ckps, ip, dbf6, + inmem, __lv_lsn_cmp, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->fnameuid, ip, dbf8, + inmem, NULL, 0, NULL)); + BDBOP(__lv_open_db(lvinfop->dbenv, &lvinfop->txnrngs, ip, dbf9, + inmem, __lv_ui32_cmp, DB_DUP | DB_DUPSORT, __lv_txnrgns_lsn_cmp)); + + BDBOP(__db_associate(lvinfop->lsntime, ip, NULL, + lvinfop->timelsn, __lv_seccbk_lsn, DB_CREATE)); + BDBOP(__db_associate(lvinfop->fileregs, ip, NULL, + lvinfop->fnameuid, __lv_seccbk_fname, DB_CREATE)); + BDBOP(__db_associate(lvinfop->pgtxn, ip, NULL, + lvinfop->txnpg, __lv_seccbk_txnpg, DB_CREATE)); + + *lvinfopp = lvinfop; + + return (0); +err: + if (lvinfop->dbenv && ret != 0) + __db_err(lvinfop->dbenv->env, ret, "__create_log_vrfy_info"); + (void)__destroy_log_vrfy_info(lvinfop); + + return (ret); +} + +/* + * __destroy_log_vrfy_info -- + * Destroy and free a log verification handle. + * + * PUBLIC: int __destroy_log_vrfy_info __P((DB_LOG_VRFY_INFO *)); + */ +int +__destroy_log_vrfy_info(lvinfop) + DB_LOG_VRFY_INFO *lvinfop; +{ + int ret; + + ret = 0; + if (lvinfop == NULL) + return (0); + + if (lvinfop->txnaborts != NULL && + (ret = __db_close(lvinfop->txnaborts, NULL, 0)) != 0) + goto err; + if (lvinfop->txninfo != NULL && + (ret = __db_close(lvinfop->txninfo, NULL, 0)) != 0) + goto err; + if (lvinfop->dbregids != NULL && + (ret = __db_close(lvinfop->dbregids, NULL, 0)) != 0) + goto err; + if (lvinfop->fileregs != NULL && + (ret = __db_close(lvinfop->fileregs, NULL, 0)) != 0) + goto err; + if (lvinfop->pgtxn != NULL && + (ret = __db_close(lvinfop->pgtxn, NULL, 0)) != 0) + goto err; + if (lvinfop->lsntime != NULL && + (ret = __db_close(lvinfop->lsntime, NULL, 0)) != 0) + goto err; + if (lvinfop->ckps != NULL && + (ret = __db_close(lvinfop->ckps, NULL, 0)) != 0) + goto err; + if (lvinfop->txnrngs != NULL && + (ret = __db_close(lvinfop->txnrngs, NULL, 0)) != 0) + goto err; + if (lvinfop->fnameuid != NULL && + (ret = __db_close(lvinfop->fnameuid, NULL, 0)) != 0) + goto err; + if (lvinfop->timelsn != NULL && + (ret = __db_close(lvinfop->timelsn, NULL, 0)) != 0) + goto err; + if (lvinfop->txnpg != NULL && + (ret = __db_close(lvinfop->txnpg, NULL, 0)) != 0) + goto err; + if (lvinfop->dbenv != NULL && + (ret = __env_close(lvinfop->dbenv, 0)) != 0) + goto err; +err: + __os_free(NULL, lvinfop); + + return (ret); +} + +/* Seocndary index callback function for DB_LOG_VRFY_INFO->timelsn. */ +static int +__lv_seccbk_fname(secdb, key, data, result) + DB *secdb; + const DBT *key; + const DBT *data; + DBT *result; +{ + int ret, tret; + VRFY_FILEREG_INFO *freg; + char *buf; + size_t buflen, slen; + + ret = tret = 0; + COMPQUIET(key, NULL); + if ((ret = __lv_unpack_filereg(data, &freg)) != 0) + goto out; + if (freg->fname == NULL || (slen = strlen(freg->fname)) == 0) { + ret = DB_DONOTINDEX; + goto out; + } + + buflen = (slen + 1) * sizeof(char); + if ((ret = __os_umalloc(secdb->dbenv->env, buflen, &buf)) != 0) + goto out; + (void)strcpy(buf, freg->fname); + result->size = (u_int32_t)buflen; + result->flags |= DB_DBT_APPMALLOC; + result->data = buf; +out: + if (freg != NULL && (tret = __free_filereg_info(freg)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* Seocndary index callback function for DB_LOG_VRFY_INFO->txnpg. */ +static int +__lv_seccbk_txnpg(secdb, key, data, result) + DB *secdb; + const DBT *key; + const DBT *data; + DBT *result; +{ + COMPQUIET(key, NULL); + COMPQUIET(secdb, NULL); + /* Txnid is the secondary key, and it's all the data dbt has. */ + result->data = data->data; + result->size = data->size; + + return (0); +} + +/* Seocndary index callback function for DB_LOG_VRFY_INFO->timelsn. */ +static int +__lv_seccbk_lsn(secdb, key, data, result) + DB *secdb; + const DBT *key; + const DBT *data; + DBT *result; +{ + VRFY_TIMESTAMP_INFO *lvti; + + COMPQUIET(key, NULL); + COMPQUIET(secdb, NULL); + + lvti = (VRFY_TIMESTAMP_INFO *)data->data; + result->data = &(lvti->timestamp); + result->size = sizeof(lvti->timestamp); + + return (0); +} + +/* + * Open a BTREE database handle, optionally set the btree compare function + * and flags if any. + */ +static int +__lv_open_db(dbenv, dbpp, ip, name, inmem, cmpf, sflags, dupcmpf) + DB_ENV *dbenv; + DB **dbpp; + const char *name; + int inmem; + btcmp_funct cmpf; + u_int32_t sflags; + dupcmp_funct dupcmpf; + DB_THREAD_INFO *ip; +{ + int ret; + const char *dbfname, *dbname; + DB *dbp; + + dbp = NULL; + ret = 0; + if (inmem) { + dbfname = NULL; + dbname = name; + } else { + dbfname = name; + dbname = NULL; + } + + BDBOP(db_create(&dbp, dbenv, 0)); + + if (cmpf != NULL) + BDBOP(__bam_set_bt_compare(dbp, cmpf)); + if (dupcmpf != NULL) + dbp->dup_compare = dupcmpf; + if (sflags != 0) + BDBOP(__db_set_flags(dbp, sflags)); + /* No concurrency needed, a big page size reduces overflow pages. */ + BDBOP(__db_set_pagesize(dbp, 16 * 1024)); + + BDBOP(__db_open(dbp, ip, NULL, dbfname, dbname, DB_BTREE, DB_CREATE, + 0666, PGNO_BASE_MD)); + + *dbpp = dbp; + + return (0); +err: + if (dbenv != NULL && ret != 0) + __db_err(dbenv->env, ret, "__lv_open_db"); + if (dbp != NULL) + (void)__db_close(dbp, NULL, 0); + + return (ret); +} + +/* Btree compare function for a [fileid, pgno] key. */ +static int +__lv_fidpgno_cmp(db, dbt1, dbt2) + DB *db; + const DBT *dbt1; + const DBT *dbt2; +{ + db_pgno_t pgno1, pgno2; + int ret; + size_t len; + + COMPQUIET(db, NULL); + len = DB_FILE_ID_LEN; + ret = memcmp(dbt1->data, dbt2->data, len); + if (ret == 0) { + memcpy(&pgno1, (u_int8_t *)dbt1->data + len, + sizeof(pgno1)); + memcpy(&pgno2, (u_int8_t *)dbt2->data + len, + sizeof(pgno2)); + ret = NUMCMP(pgno1, pgno2); + } + + return (ret); +} + +/* Btree compare function for a int32_t type of key. */ +static int +__lv_i32_cmp(db, dbt1, dbt2) + DB *db; + const DBT *dbt1; + const DBT *dbt2; +{ + int32_t k1, k2; + + COMPQUIET(db, NULL); + memcpy(&k1, dbt1->data, sizeof(k1)); + memcpy(&k2, dbt2->data, sizeof(k2)); + + return (NUMCMP(k1, k2)); +} + +/* Btree compare function for a u_int32_t type of key. */ +static int +__lv_ui32_cmp(db, dbt1, dbt2) + DB *db; + const DBT *dbt1; + const DBT *dbt2; +{ + u_int32_t k1, k2; + + COMPQUIET(db, NULL); + memcpy(&k1, dbt1->data, sizeof(k1)); + memcpy(&k2, dbt2->data, sizeof(k2)); + + return (NUMCMP(k1, k2)); +} + +/* Btree compare function for a DB_LSN type of key. */ +static int +__lv_lsn_cmp(db, dbt1, dbt2) + DB *db; + const DBT *dbt1; + const DBT *dbt2; +{ + DB_LSN lsn1, lsn2; + + DB_ASSERT(db->env, dbt1->size == sizeof(DB_LSN)); + DB_ASSERT(db->env, dbt2->size == sizeof(DB_LSN)); + memcpy(&lsn1, dbt1->data, sizeof(DB_LSN)); + memcpy(&lsn2, dbt2->data, sizeof(DB_LSN)); + + return (LOG_COMPARE(&lsn1, &lsn2)); +} + +/* + * Structure management routines. We keep each structure on a + * consecutive memory chunk. + * + * The get functions will allocate memory via __os_malloc, and callers + * should free the memory after use. The update functions for VRFY_TXN_INFO + * and VRFY_FILEREG_INFO may realloc the structure. + */ + +/* + * PUBLIC: int __put_txn_vrfy_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: const VRFY_TXN_INFO *)); + */ +int +__put_txn_vrfy_info (lvinfo, txninfop) + const DB_LOG_VRFY_INFO *lvinfo; + const VRFY_TXN_INFO *txninfop; +{ + int ret; + DBT key, data; + + ret = __lv_pack_txn_vrfy_info(txninfop, &key, &data); + DB_ASSERT(lvinfo->dbenv->env, ret == 0); + + BDBOP2(lvinfo->dbenv, __db_put(lvinfo->txninfo, lvinfo->ip, NULL, + &key, &data, 0), "__put_txn_vrfy_info"); + __os_free(lvinfo->dbenv->env, data.data); + + return (0); +} + +/* Construct a key and data DBT from the structure. */ +static int +__lv_pack_txn_vrfy_info(txninfop, key, data) + const VRFY_TXN_INFO *txninfop; + DBT *key, *data; +{ + int ret; + char *buf, *p; + size_t bufsz, len; + u_int32_t i; + DBT *pdbt; + + memset(key, 0, sizeof(DBT)); + memset(data, 0, sizeof(DBT)); + ret = 0; + bufsz = TXN_VERIFY_INFO_TOTSIZE(*txninfop); + + if ((ret = __os_malloc(NULL, bufsz, &buf)) != 0) + goto err; + memset(buf, 0, bufsz); + memcpy(buf, txninfop, TXN_VERIFY_INFO_FIXSIZE); + p = buf + TXN_VERIFY_INFO_FIXSIZE; + memcpy(p, txninfop->recycle_lsns, len = sizeof(DB_LSN) * + txninfop->num_recycle); + p += len; + + for (i = 0; i < txninfop->filenum; i++) { + + pdbt = &(txninfop->fileups[i]); + memcpy(p, &(pdbt->size), sizeof(pdbt->size)); + p += sizeof(pdbt->size); + memcpy(p, pdbt->data, pdbt->size); + p += pdbt->size; + } + + key->data = (void *)&txninfop->txnid; + key->size = sizeof(txninfop->txnid); + data->data = buf; + data->size = (u_int32_t)bufsz; + data->flags |= DB_DBT_MALLOC; +err: + return (ret); +} + +/* Calculate a DBT array's total number of bytes to store. */ +static size_t +__lv_dbt_arrsz(arr, arrlen) + const DBT *arr; + u_int32_t arrlen; +{ + u_int32_t i; + size_t sz; + + sz = 0; + + /* For each DBT object, store its size and its data bytes. */ + for (i = 0; i < arrlen; i++) + sz += arr[i].size + sizeof(arr[i].size); + + return sz; +} + +/* + * __get_txn_vrfy_info -- + * Get a VRFY_TXN_INFO object from db by txnid. Callers should free the + * object by calling __free_txninfo. + * + * PUBLIC: int __get_txn_vrfy_info __P((const DB_LOG_VRFY_INFO *, u_int32_t, + * PUBLIC: VRFY_TXN_INFO **)); + */ +int +__get_txn_vrfy_info (lvinfo, txnid, txninfopp) + const DB_LOG_VRFY_INFO *lvinfo; + u_int32_t txnid; + VRFY_TXN_INFO **txninfopp; +{ + int ret; + DBT key, data; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = &txnid; + key.size = sizeof(txnid); + + BDBOP3(lvinfo->dbenv, __db_get(lvinfo->txninfo, lvinfo->ip, NULL, + &key, &data, 0), DB_NOTFOUND, "__get_txn_vrfy_info"); + + if (ret != DB_NOTFOUND) + ret = __lv_unpack_txn_vrfy_info(txninfopp, &data); + + return (ret); +} + +/* Construct a structure from a DBT. */ +static int +__lv_unpack_txn_vrfy_info(txninfopp, data) + VRFY_TXN_INFO **txninfopp; + const DBT *data; +{ + size_t bufsz; + VRFY_TXN_INFO *buf, *txninfop; + DB_LSN *lsns, *p; + u_int32_t i, sz; + char *pb, *q; + int ret; + + ret = 0; + i = sz = 0; + lsns = p = NULL; + pb = q = NULL; + txninfop = (VRFY_TXN_INFO *)data->data; + lsns = (DB_LSN *)((char *)data->data + TXN_VERIFY_INFO_FIXSIZE); + pb = (char *)lsns + txninfop->num_recycle * sizeof(DB_LSN); + + if ((ret = __os_malloc(NULL, bufsz = sizeof(VRFY_TXN_INFO), &buf)) != 0) + goto err; + memset(buf, 0, bufsz); + memcpy(buf, data->data, TXN_VERIFY_INFO_FIXSIZE); + + if (txninfop->num_recycle != 0) { + if ((ret = __os_malloc(NULL, + txninfop->num_recycle * sizeof(DB_LSN), &p)) != 0) + goto err; + memcpy(p, lsns, txninfop->num_recycle * sizeof(DB_LSN)); + buf->recycle_lsns = p; + } + + if (txninfop->filenum != 0) { + if ((ret = __os_malloc(NULL, + txninfop->filenum * sizeof(DBT), &q)) != 0) + goto err; + memset(q, 0, txninfop->filenum * sizeof(DBT)); + buf->fileups = (DBT *)q; + for (i = 0; i < txninfop->filenum; i++) { + memcpy(&sz, pb, sizeof(sz)); + pb += sizeof(sz); + if ((ret = __os_malloc(NULL, sz, &q)) != 0) + goto err; + memcpy(q, pb, sz); + pb += sz; + + buf->fileups[i].data = q; + buf->fileups[i].size = sz; + } + } + + *txninfopp = buf; +err: + return (ret); +} + +static int +__lv_add_recycle_lsn (txninfop, lsn) + VRFY_TXN_INFO *txninfop; + const DB_LSN *lsn; +{ + int ret; + + ret = 0; + txninfop->num_recycle++; + if ((ret = __os_realloc(NULL, txninfop->num_recycle * sizeof(DB_LSN), + &(txninfop->recycle_lsns))) != 0) + goto err; + txninfop->recycle_lsns[txninfop->num_recycle - 1] = *lsn; +err: + return (ret); +} + +/* + * __add_recycle_lsn_range -- + * Add recycle info for each txn within the recycled txnid range. + * + * PUBLIC: int __add_recycle_lsn_range __P((DB_LOG_VRFY_INFO *, + * PUBLIC: const DB_LSN *, u_int32_t, u_int32_t)); + */ +int +__add_recycle_lsn_range(lvinfo, lsn, min, max) + DB_LOG_VRFY_INFO *lvinfo; + const DB_LSN *lsn; + u_int32_t min, max; +{ + DBC *csr; + int ret, tret; + u_int32_t i; + DBT key2, data2; + struct __add_recycle_params param; + + csr = NULL; + ret = tret = 0; + memset(&key2, 0, sizeof(DBT)); + memset(&data2, 0, sizeof(DBT)); + memset(¶m, 0, sizeof(param)); + + if ((ret = __os_malloc(lvinfo->dbenv->env, sizeof(VRFY_TXN_INFO *) * + (param.ti2ul = 1024), &(param.ti2u))) != 0) + goto err; + param.ti2ui = 0; + param.recycle_lsn = *lsn; + param.min = min; + param.max = max; + + /* Iterate the specified range and process each transaction. */ + if ((ret = __iterate_txninfo(lvinfo, min, max, __lv_add_recycle_handler, + ¶m)) != 0) + goto err; + + /* + * Save updated txninfo structures. We can't do so in the above + * iteration, so we have to save them here. + */ + BDBOP(__db_cursor(lvinfo->txninfo, lvinfo->ip, NULL, &csr, DBC_BULK)); + + for (i = 0; i < param.ti2ui; i++) { + ret = __lv_pack_txn_vrfy_info(param.ti2u[i], &key2, &data2); + DB_ASSERT(lvinfo->dbenv->env, ret == 0); + BDBOP(__dbc_put(csr, &key2, &data2, DB_KEYLAST)); + /* + * key2.data refers to param.ti2u[i]'s memory, data2.data is + * freed by DB since we set DB_DBT_MALLOC. + */ + if ((ret = __free_txninfo(param.ti2u[i])) != 0) + goto err; + } + +err: + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + __os_free(lvinfo->dbenv->env, param.ti2u); + if (ret != 0) + __db_err(lvinfo->dbenv->env, ret, + "__add_recycle_lsn_range"); + + return (ret); +} + +/* + * __iterate_txninfo -- + * Iterate throught the transaction info database as fast as possible, + * and process each key/data pair using a callback handler. Break the + * iteration if the handler returns non-zero values. + * + * PUBLIC: int __iterate_txninfo __P((DB_LOG_VRFY_INFO *, u_int32_t, + * PUBLIC: u_int32_t, TXNINFO_HANDLER, void *)); + */ +int +__iterate_txninfo(lvinfo, min, max, handler, param) + DB_LOG_VRFY_INFO *lvinfo; + u_int32_t min, max; + TXNINFO_HANDLER handler; + void *param; +{ + ENV *env; + VRFY_TXN_INFO *txninfop; + int ret, tret; + u_int32_t bufsz, pgsz, txnid; + size_t retkl, retdl; + char *btbuf; + u_int8_t *retk, *retd; + DBT key, data, data2; + DBC *csr; + void *p; + + csr = NULL; + env = lvinfo->dbenv->env; + txninfop = NULL; + ret = tret = 0; + txnid = 0; + retkl = retdl = 0; + bufsz = 64 * 1024; + btbuf = NULL; + retk = retd = NULL; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + memset(&data2, 0, sizeof(DBT)); + + pgsz = lvinfo->txninfo->pgsize; + DB_ASSERT(env, ret == 0); + + if (bufsz % pgsz != 0) + bufsz = pgsz * (bufsz / pgsz); + + if ((ret = __os_malloc(env, bufsz, &btbuf)) != 0) + goto err; + + BDBOP(__db_cursor(lvinfo->txninfo, lvinfo->ip, NULL, &csr, DBC_BULK)); + + /* + * Use bulk retrieval to scan the database as fast as possible. + */ + data.data = btbuf; + data.ulen = bufsz; + data.flags |= DB_DBT_USERMEM; + + for (ret = __dbc_get(csr, &key, &data, DB_FIRST | DB_MULTIPLE_KEY) ;; + ret = __dbc_get(csr, &key, &data, DB_NEXT | DB_MULTIPLE_KEY)) { + switch (ret) { + case 0: + break; + case DB_NOTFOUND: + goto out; + /* No break statement allowed by lint here. */ + case DB_BUFFER_SMALL: + if ((ret = __os_realloc(lvinfo->dbenv->env, + bufsz *= 2, &btbuf)) != 0) + goto out; + data.ulen = bufsz; + data.data = btbuf; + continue;/* Continue the for-loop. */ + /* No break statement allowed by lint here. */ + default: + goto err; + } + + /* + * Do bulk get. Some txninfo objects may be updated by the + * handler, but we can't store them immediately in the same + * loop because we wouldn't be able to continue the bulk get + * using the same cursor; and we can't use another cursor + * otherwise we may self-block. In the handler we need to + * store the updated objects and store them to db when we get + * out of this loop. + */ + DB_MULTIPLE_INIT(p, &data); + while (1) { + DB_MULTIPLE_KEY_NEXT(p, &data, + retk, retkl, retd, retdl); + if (p == NULL) + break; + DB_ASSERT(env, retkl == sizeof(txnid) && retk != NULL); + memcpy(&txnid, retk, retkl); + /* + * Process it if txnid in range or no range specified. + * The range must be a closed one. + */ + if ((min != 0 && txnid >= min && max != 0 && + txnid <= max) || (min == 0 && max == 0)) { + data2.data = retd; + data2.size = (u_int32_t)retdl; + + if ((ret = __lv_unpack_txn_vrfy_info( + &txninfop, &data2)) != 0) + goto out; + if ((ret = handler(lvinfo, txninfop, + param)) != 0) + /* Stop the iteration on error. */ + goto out; + } + } + + } +out: + if (ret == DB_NOTFOUND) + ret = 0; +err: + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + __os_free(lvinfo->dbenv->env, btbuf); + return (ret); +} + +/* Txninfo iteration handler to add recycle info for affected txns. */ +static int +__lv_add_recycle_handler(lvinfo, txninfop, params) + DB_LOG_VRFY_INFO *lvinfo; + VRFY_TXN_INFO *txninfop; + void *params; +{ + int ret; + struct __add_recycle_params *param; + + ret = 0; + param = (struct __add_recycle_params *)params; + + /* + * If the txnid is reused, update its recycle info and note it for + * later update, otherwise free the txninfop structure. + */ + if (txninfop->txnid < param->min && txninfop->txnid > param->max) { + ret = __free_txninfo(txninfop); + return (ret); + } + + ret = __lv_add_recycle_lsn(txninfop, &(param->recycle_lsn)); + + if (ret != 0) + goto err; + /* + * Below is one way to tell if a txn is aborted without doing another + * backward pass of the log. However if the txn id is not in the + * chosen recycled txn id range, we can't tell, until all the log + * records are passed --- the remaining active txns are the aborted + * txns. + * No longer needed since we did another backward pass of the log + * and have all the txn lifetimes. + if (txninfop->status == TXN_STAT_ACTIVE) + __on_txn_abort(lvinfo, txninfop); + */ + if (txninfop->status == TXN_STAT_PREPARE) { + __db_errx(lvinfo->dbenv->env, + "[ERROR] Transaction with ID %u is prepared and not " + "committed, but its ID is recycled by log record [%u, %u].", + txninfop->txnid, param->recycle_lsn.file, + param->recycle_lsn.offset); + } + /* Note down to store later. */ + param->ti2u[(param->ti2ui)++] = txninfop; + if (param->ti2ui == param->ti2ul) + BDBOP(__os_realloc(lvinfo->dbenv->env, + sizeof(VRFY_TXN_INFO *) * (param->ti2ul *= 2), + &(param->ti2u))); +err: + return (ret); + +} +/* + * PUBLIC: int __rem_last_recycle_lsn __P((VRFY_TXN_INFO *)); + */ +int +__rem_last_recycle_lsn(txninfop) + VRFY_TXN_INFO *txninfop; +{ + int ret; + + ret = 0; + if (txninfop->num_recycle == 0) + return (0); + txninfop->num_recycle--; + if (txninfop->num_recycle > 0) + BDBOP(__os_realloc(NULL, txninfop->num_recycle * sizeof(DB_LSN), + &(txninfop->recycle_lsns))); + else { + __os_free(NULL, txninfop->recycle_lsns); + txninfop->recycle_lsns = NULL; + } +err: + return (ret); + +} + +/* + * __add_file_updated -- + * Add a file's dbregid and uid to the updating txn if it's not yet + * recorded. + * + * PUBLIC: int __add_file_updated __P((VRFY_TXN_INFO *, const DBT *, int32_t)); + */ +int +__add_file_updated (txninfop, fileid, dbregid) + VRFY_TXN_INFO *txninfop; + const DBT *fileid; + int32_t dbregid; +{ + int ret; + DBT *pdbt, *p; + u_int32_t found, i; + + ret = 0; + p = pdbt = NULL; + + for (found = 0, i = 0; i < txninfop->filenum; i++) { + p = &(txninfop->fileups[i]); + if (p->size == fileid->size && + memcmp(p->data, fileid->data, p->size) == 0) { + found = 1; + break; + } + } + + if (found) + return (0); + + /* Add file's uid into the array, deep copy from fileid. */ + txninfop->filenum++; + if ((ret = __os_realloc(NULL, txninfop->filenum * + sizeof(DBT), &(txninfop->fileups))) != 0) + goto err; + + pdbt = &(txninfop->fileups[txninfop->filenum - 1]); + memset(pdbt, 0, sizeof(DBT)); + if ((ret = __os_malloc(NULL, + pdbt->size = fileid->size, &(pdbt->data))) != 0) + goto err; + memcpy(pdbt->data, fileid->data, fileid->size); + + /* Add file dbregid into the array. */ + BDBOP(__os_realloc(NULL, txninfop->filenum * + sizeof(int32_t), &(txninfop->dbregid))); + txninfop->dbregid[txninfop->filenum - 1] = dbregid; +err: + return (ret); +} + +/* + * PUBLIC: int __del_file_updated __P((VRFY_TXN_INFO *, const DBT *)); + */ +int +__del_file_updated (txninfop, fileid) + VRFY_TXN_INFO *txninfop; + const DBT *fileid; +{ + u_int32_t found, i; + int ret; + DBT *p; + void *pdbtdata; + + ret = 0; + + if (txninfop->filenum == 0) + return (0); + + /* + * If the array has an element identical to fileid, remove it. fileid + * itself is intact after this function call. + */ + for (found = 0, i = 0, pdbtdata = NULL; i < txninfop->filenum; i++) { + p = &(txninfop->fileups[i]); + if (p->size == fileid->size && + memcmp(p->data, fileid->data, p->size) == 0) { + pdbtdata = p->data; + if (txninfop->filenum > 1) { + memmove(txninfop->fileups + i, txninfop-> + fileups + i + 1, sizeof(DBT) * (txninfop-> + filenum - (i + 1))); + memmove(txninfop->dbregid + i, txninfop-> + dbregid + i + 1, sizeof(int32_t) * + (txninfop->filenum - (i + 1))); + } else { + __os_free(NULL, txninfop->fileups); + __os_free(NULL, txninfop->dbregid); + txninfop->fileups = NULL; + txninfop->dbregid = NULL; + } + found = 1; + break; + } + } + + if (found) { + txninfop->filenum--; + if (txninfop->filenum) { + BDBOP(__os_realloc(NULL, sizeof(DBT) * + txninfop->filenum, &(txninfop->fileups))); + BDBOP(__os_realloc(NULL, sizeof(int32_t) * + txninfop->filenum, &(txninfop->dbregid))); + } + __os_free(NULL, pdbtdata); + } +err: + return (ret); +} + +/* + * PUBLIC: int __clear_fileups __P((VRFY_TXN_INFO *)); + */ +int +__clear_fileups(txninfop) + VRFY_TXN_INFO *txninfop; +{ + u_int32_t i; + + for (i = 0; i < txninfop->filenum; i++) + __os_free(NULL, txninfop->fileups[i].data); + + __os_free(NULL, txninfop->fileups); + __os_free(NULL, txninfop->dbregid); + txninfop->fileups = NULL; + txninfop->dbregid = NULL; + txninfop->filenum = 0; + + return (0); +} + +/* + * __free_txninfo_stack -- + * The object is on stack, only free its internal memory, not itself. + * PUBLIC: int __free_txninfo_stack __P((VRFY_TXN_INFO *)); + */ +int +__free_txninfo_stack (p) + VRFY_TXN_INFO *p; +{ + u_int32_t i; + + if (p == NULL) + return (0); + + if (p->fileups != NULL) { + for (i = 0; i < p->filenum; i++) + __os_free(NULL, p->fileups[i].data); + __os_free(NULL, p->fileups); + } + + if (p->dbregid != NULL) + __os_free(NULL, p->dbregid); + + if (p->recycle_lsns != NULL) + __os_free(NULL, p->recycle_lsns); + + return (0); +} +/* + * PUBLIC: int __free_txninfo __P((VRFY_TXN_INFO *)); + */ +int +__free_txninfo(p) + VRFY_TXN_INFO *p; +{ + (void)__free_txninfo_stack(p); + __os_free(NULL, p); + + return (0); +} + +/* Construct a key and data DBT from the structure. */ +static int +__lv_pack_filereg(freginfo, data) + const VRFY_FILEREG_INFO *freginfo; + DBT *data; +{ + char *buf, *p; + size_t bufsz, offset; + int ret; + + ret = 0; + if ((ret = __os_malloc(NULL, + bufsz = FILE_REG_INFO_TOTSIZE(*freginfo), &buf)) != 0) + goto err; + memset(buf, 0, bufsz); + + memcpy(buf, freginfo, FILE_REG_INFO_FIXSIZE); + p = buf + FILE_REG_INFO_FIXSIZE; + + offset = sizeof(int32_t) * freginfo->regcnt; + memcpy(p, freginfo->dbregids, offset); + p += offset; + + memcpy(p, &(freginfo->fileid.size), sizeof(freginfo->fileid.size)); + p += sizeof(freginfo->fileid.size); + memcpy(p, freginfo->fileid.data, freginfo->fileid.size); + p += freginfo->fileid.size; + (void)strcpy(p, freginfo->fname); + + data->data = buf; + data->size = (u_int32_t)bufsz; +err: + return (ret); +} + +/* + * PUBLIC: int __put_filereg_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: const VRFY_FILEREG_INFO *)); + */ +int __put_filereg_info (lvinfo, freginfo) + const DB_LOG_VRFY_INFO *lvinfo; + const VRFY_FILEREG_INFO *freginfo; +{ + + int ret; + DBT data; + + memset(&data, 0, sizeof(DBT)); + + if ((ret = __lv_pack_filereg(freginfo, &data)) != 0) + goto err; + + /* + * We store dbregid-filereg map into dbregids.db, but we can't make + * dbregids.db the sec db of fileregs.db, because dbregid is only + * valid when a db file is open, we want to delete data with same + * key in dbregids.db, but we want to keep all filereg_info data in + * fileregs.db to track all db file lifetime and status. + * + * Consequently we will store dbregid-file_uid in dbregs.db, so that we + * can delete dbregid when the db handle is closed, and we can + * use the dbregid to get the currently open db file's uid. + */ + + BDBOP2(lvinfo->dbenv, __db_put(lvinfo->fileregs, lvinfo->ip, NULL, + (DBT *)&(freginfo->fileid), &data, 0), "__put_filereg_info"); + +err: + if (data.data != NULL) + __os_free(lvinfo->dbenv->env, data.data); + + return (ret); +} + +/* + * PUBLIC: int __del_filelife __P((const DB_LOG_VRFY_INFO *, int32_t)); + */ +int +__del_filelife(lvinfo, dbregid) + const DB_LOG_VRFY_INFO *lvinfo; + int32_t dbregid; +{ + int ret; + DBT key; + + memset(&key, 0, sizeof(DBT)); + key.data = &(dbregid); + key.size = sizeof(dbregid); + + if ((ret = __db_del(lvinfo->dbregids, lvinfo->ip, NULL, + &key, 0)) != 0) + goto err; + +err: + return (ret); +} + +/* + * PUBLIC: int __put_filelife __P((const DB_LOG_VRFY_INFO *, VRFY_FILELIFE *)); + */ +int +__put_filelife (lvinfo, pflife) + const DB_LOG_VRFY_INFO *lvinfo; + VRFY_FILELIFE *pflife; +{ + int ret; + DBT key, data; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = &(pflife->dbregid); + key.size = sizeof(pflife->dbregid); + data.data = pflife; + data.size = sizeof(VRFY_FILELIFE); + + if ((ret = __db_put(lvinfo->dbregids, lvinfo->ip, NULL, + &key, &data, 0)) != 0) + goto err; + +err: + return (ret); +} + +/* + * PUBLIC: int __get_filelife __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: int32_t, VRFY_FILELIFE **)); + */ +int +__get_filelife (lvinfo, dbregid, flifepp) + const DB_LOG_VRFY_INFO *lvinfo; + int32_t dbregid; + VRFY_FILELIFE **flifepp; +{ + int ret; + DBT key, data; + VRFY_FILELIFE *flifep; + + ret = 0; + flifep = NULL; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + key.data = &dbregid; + key.size = sizeof(dbregid); + if ((ret = __db_get(lvinfo->dbregids, lvinfo->ip, NULL, + &key, &data, 0)) != 0) + goto err; + if ((ret = __os_malloc(lvinfo->dbenv->env, + sizeof(VRFY_FILELIFE), &flifep)) != 0) + goto err; + DB_ASSERT(lvinfo->dbenv->env, flifep != NULL); + memcpy(flifep, data.data, sizeof(VRFY_FILELIFE)); + *flifepp = flifep; +err: + return (ret); +} + +/* + * PUBLIC: int __get_filereg_by_dbregid __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: int32_t, VRFY_FILEREG_INFO **)); + */ +int +__get_filereg_by_dbregid(lvinfo, dbregid, freginfopp) + const DB_LOG_VRFY_INFO *lvinfo; + int32_t dbregid; + VRFY_FILEREG_INFO **freginfopp; +{ + int ret; + DBT key, data; + char uid[DB_FILE_ID_LEN]; + VRFY_FILELIFE *pflife; + + memset(&data, 0, sizeof(DBT)); + memset(&key, 0, sizeof(DBT)); + key.data = &dbregid; + key.size = sizeof(dbregid); + + BDBOP3(lvinfo->dbenv, __db_get(lvinfo->dbregids, lvinfo->ip, NULL, + &key, &data, 0), DB_NOTFOUND, "__get_filereg_by_dbregid"); + if (ret == DB_NOTFOUND) + goto err; + + /* Use the file-uid as key to retrieve from fileregs.db. */ + pflife = (VRFY_FILELIFE *)data.data; + memcpy((void *)uid, (void *)pflife->fileid, key.size = DB_FILE_ID_LEN); + + key.data = (void *)uid; + memset(&data, 0, sizeof(DBT)); + + BDBOP3(lvinfo->dbenv, __db_get(lvinfo->fileregs, lvinfo->ip, NULL, + &key, &data, 0), DB_NOTFOUND, "__get_filereg_by_dbregid"); + if (ret == DB_NOTFOUND) + goto err; + if ((ret = __lv_unpack_filereg(&data, freginfopp)) != 0) + goto err; + +err: + return (ret); +} + +/* + * PUBLIC: int __add_dbregid __P((DB_LOG_VRFY_INFO *, VRFY_FILEREG_INFO *, + * PUBLIC: int32_t, u_int32_t, DB_LSN, DBTYPE, db_pgno_t, int *)); + */ +int +__add_dbregid(lvh, freg, dbregid, opcode, lsn, dbtype, meta_pgno, addp) + DB_LOG_VRFY_INFO *lvh; + VRFY_FILEREG_INFO *freg; + int32_t dbregid; + u_int32_t opcode; + DB_LSN lsn; + DBTYPE dbtype; + db_pgno_t meta_pgno; + int *addp; +{ + int inarray, ret, tret; + u_int32_t i, j; + VRFY_FILELIFE flife; + + inarray = ret = tret = 0; + for (i = 0; i < freg->regcnt; i++) { + if (freg->dbregids[i] == dbregid) { + if (!IS_DBREG_CLOSE(opcode)) { + /* Opening an open dbreg id. */ + if (IS_DBREG_OPEN(opcode) && + opcode != DBREG_CHKPNT) { + tret = 2; + goto err; + } + tret = 0; + inarray = 1; + } else + /* Found the dbregid; gonna remove it. */ + tret = -1; + break; + } + } + + if (IS_DBREG_OPEN(opcode)) + tret = 1;/* dbregid not in the array, gonna add 1. */ + + /* + * Remove closed dbregid. dbregid can be recycled, not unique to a db + * file, it's dynamically allocated for each db handle. + */ + if (tret == -1) { + for (j = i; j < freg->regcnt - 1; j++) + freg->dbregids[j] = freg->dbregids[j + 1]; + freg->regcnt--; + BDBOP(__os_realloc(lvh->dbenv->env, + sizeof(int32_t) * freg->regcnt, &(freg->dbregids))); + /* Don't remove dbregid life info from dbregids db. */ + } else if (tret == 1) { + if (!inarray) { + freg->regcnt++; + BDBOP(__os_realloc(lvh->dbenv->env, + sizeof(int32_t) * freg->regcnt, &(freg->dbregids))); + freg->dbregids[freg->regcnt - 1] = dbregid; + } + flife.dbregid = dbregid; + memcpy(flife.fileid, freg->fileid.data, freg->fileid.size); + flife.lifetime = opcode; + flife.dbtype = dbtype; + flife.lsn = lsn; + flife.meta_pgno = meta_pgno; + if ((ret = __put_filelife(lvh, &flife)) != 0) + goto err; + } + +err: + *addp = tret; + return (ret); + +} + +/* + * PUBLIC: int __get_filereg_info __P((const DB_LOG_VRFY_INFO *, const DBT *, + * PUBLIC: VRFY_FILEREG_INFO **)); + */ +int +__get_filereg_info (lvinfo, fuid, freginfopp) + const DB_LOG_VRFY_INFO *lvinfo; + const DBT *fuid; + VRFY_FILEREG_INFO **freginfopp; +{ + int ret; + DBT data; + + memset(&data, 0, sizeof(DBT)); + + BDBOP3(lvinfo->dbenv, __db_get(lvinfo->fileregs, lvinfo->ip, NULL, + (DBT *)fuid, &data, 0), DB_NOTFOUND, "__get_filereg_info"); + if (ret == DB_NOTFOUND) + goto err; + if ((ret = __lv_unpack_filereg(&data, freginfopp)) != 0) + goto err; + +err: + return (ret); +} + +static int +__lv_unpack_filereg(data, freginfopp) + const DBT *data; + VRFY_FILEREG_INFO **freginfopp; +{ + char *p, *q; + u_int32_t fidsz, arrsz; + VRFY_FILEREG_INFO *buf; + int ret; + + ret = 0; + p = q = NULL; + fidsz = arrsz = 0; + buf = NULL; + + if ((ret = __os_malloc(NULL, sizeof(VRFY_FILEREG_INFO), &buf)) != 0) + goto err; + memset(buf, 0, sizeof(VRFY_FILEREG_INFO)); + + memcpy(buf, data->data, FILE_REG_INFO_FIXSIZE); + *freginfopp = (VRFY_FILEREG_INFO *)buf; + p = ((char *)(data->data)) + FILE_REG_INFO_FIXSIZE; + + if ((ret = __os_malloc(NULL, arrsz = (*freginfopp)->regcnt * + sizeof(int32_t), &((*freginfopp)->dbregids))) != 0) + goto err; + memcpy((*freginfopp)->dbregids, p, arrsz); + p += arrsz; + + memcpy(&fidsz, p, sizeof(fidsz)); + p += sizeof(fidsz); + if ((ret = __os_malloc(NULL, fidsz, &q)) != 0) + goto err; + memcpy(q, p, fidsz); + (*freginfopp)->fileid.data = q; + (*freginfopp)->fileid.size = fidsz; + p += fidsz; + + if ((ret = __os_malloc(NULL, sizeof(char) * (strlen(p) + 1), &q)) != 0) + goto err; + (void)strcpy(q, p); + + (*freginfopp)->fname = q; +err: + return (ret); +} + +/* + * PUBLIC: int __free_filereg_info __P((VRFY_FILEREG_INFO *)); + */ +int +__free_filereg_info(p) + VRFY_FILEREG_INFO *p; +{ + if (p == NULL) + return (0); + if (p ->fname != NULL) + __os_free(NULL, (void *)(p->fname)); + if (p->fileid.data != NULL) + __os_free(NULL, p->fileid.data); + if (p->dbregids != NULL) + __os_free(NULL, p->dbregids); + __os_free(NULL, p); + + return (0); +} + +/* + * PUBLIC: int __get_ckp_info __P((const DB_LOG_VRFY_INFO *, DB_LSN, + * PUBLIC: VRFY_CKP_INFO **)); + */ +int +__get_ckp_info (lvinfo, lsn, ckpinfopp) + const DB_LOG_VRFY_INFO *lvinfo; + DB_LSN lsn; + VRFY_CKP_INFO **ckpinfopp; +{ + int ret; + DBT key, data; + VRFY_CKP_INFO *ckpinfo; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = &lsn; + key.size = sizeof(DB_LSN); + BDBOP3(lvinfo->dbenv, __db_get(lvinfo->ckps, lvinfo->ip, NULL, + &key, &data, 0), DB_NOTFOUND, "__get_ckp_info"); + + if (ret == DB_NOTFOUND) + goto err; + + if ((ret = __os_malloc(lvinfo->dbenv->env, + sizeof(VRFY_CKP_INFO), &ckpinfo)) != 0) + goto err; + memcpy(ckpinfo, data.data, sizeof(VRFY_CKP_INFO)); + *ckpinfopp = ckpinfo; +err: + return (ret); + +} + +/* + * PUBLIC: int __get_last_ckp_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: VRFY_CKP_INFO **)); + */ +int +__get_last_ckp_info (lvinfo, ckpinfopp) + const DB_LOG_VRFY_INFO *lvinfo; + VRFY_CKP_INFO **ckpinfopp; +{ + int ret, tret; + DBT key, data; + VRFY_CKP_INFO *ckpinfo; + DBC *csr; + + csr = NULL; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + BDBOP(__db_cursor(lvinfo->ckps, lvinfo->ip, NULL, &csr, 0)); + if ((ret = __dbc_get(csr, &key, &data, DB_LAST)) != 0) + goto err; + + if ((ret = __os_malloc(lvinfo->dbenv->env, + sizeof(VRFY_CKP_INFO), &ckpinfo)) != 0) + goto err; + DB_ASSERT(lvinfo->dbenv->env, sizeof(VRFY_CKP_INFO) == data.size); + memcpy(ckpinfo, data.data, sizeof(VRFY_CKP_INFO)); + *ckpinfopp = ckpinfo; +err: + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + if (ret != 0 && ret != DB_NOTFOUND) + __db_err(lvinfo->dbenv->env, ret, "__get_last_ckp_info"); + return (ret); +} + +/* + * PUBLIC: int __put_ckp_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: const VRFY_CKP_INFO *)); + */ +int __put_ckp_info (lvinfo, ckpinfo) + const DB_LOG_VRFY_INFO *lvinfo; + const VRFY_CKP_INFO *ckpinfo; +{ + int ret; + DBT key, data; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = (void *)&ckpinfo->lsn; + key.size = sizeof(DB_LSN); + data.data = (void *)ckpinfo; + data.size = sizeof(VRFY_CKP_INFO); + + BDBOP2(lvinfo->dbenv, __db_put(lvinfo->ckps, lvinfo->ip, + NULL, &key, &data, 0), "__put_ckp_info"); + return (0); +} + +/* + * PUBLIC: int __get_timestamp_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: DB_LSN, VRFY_TIMESTAMP_INFO **)); + */ +int __get_timestamp_info (lvinfo, lsn, tsinfopp) + const DB_LOG_VRFY_INFO *lvinfo; + DB_LSN lsn; + VRFY_TIMESTAMP_INFO **tsinfopp; +{ + int ret; + DBT key, data; + VRFY_TIMESTAMP_INFO *tsinfo; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = &lsn; + key.size = sizeof(DB_LSN); + BDBOP3(lvinfo->dbenv, __db_get(lvinfo->lsntime, lvinfo->ip, NULL, + &key, &data, 0), DB_NOTFOUND, "__get_timestamp_info"); + + if (ret == DB_NOTFOUND) + goto err; + + if ((ret = __os_malloc(lvinfo->dbenv->env, + sizeof(VRFY_TIMESTAMP_INFO), &tsinfo)) != 0) + goto err; + + memcpy(tsinfo, data.data, sizeof(VRFY_TIMESTAMP_INFO)); + *tsinfopp = tsinfo; +err: + return (ret); +} + +/* + * __get_latest_timestamp_info -- + * Get latest timestamp info before lsn. + * PUBLIC: int __get_latest_timestamp_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: DB_LSN, VRFY_TIMESTAMP_INFO **)); + */ +int __get_latest_timestamp_info(lvinfo, lsn, tsinfopp) + const DB_LOG_VRFY_INFO *lvinfo; + DB_LSN lsn; + VRFY_TIMESTAMP_INFO **tsinfopp; +{ + int ret, tret; + DBT key, data; + VRFY_TIMESTAMP_INFO *tsinfo; + DBC *csr; + + csr = NULL; + ret = tret = 0; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + key.data = &lsn; + key.size = sizeof(lsn); + BDBOP(__db_cursor(lvinfo->lsntime, lvinfo->ip, NULL, &csr, 0)); + + BDBOP(__dbc_get(csr, &key, &data, DB_SET)); + BDBOP(__dbc_get(csr, &key, &data, DB_PREV)); + + if ((ret = __os_malloc(lvinfo->dbenv->env, sizeof(VRFY_TIMESTAMP_INFO), + &tsinfo)) != 0) + goto err; + + memcpy(tsinfo, data.data, sizeof(VRFY_TIMESTAMP_INFO)); + *tsinfopp = tsinfo; + +err: + if (ret != 0 && ret != DB_NOTFOUND) + __db_err(lvinfo->dbenv->env, + ret, "__get_latest_timestamp_info"); + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* + * PUBLIC: int __put_timestamp_info __P((const DB_LOG_VRFY_INFO *, + * PUBLIC: const VRFY_TIMESTAMP_INFO *)); + */ +int __put_timestamp_info (lvinfo, tsinfo) + const DB_LOG_VRFY_INFO *lvinfo; + const VRFY_TIMESTAMP_INFO *tsinfo; +{ + int ret; + DBT key, data; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = (void *)&(tsinfo->lsn); + key.size = sizeof(DB_LSN); + data.data = (void *)tsinfo; + data.size = sizeof(VRFY_TIMESTAMP_INFO); + BDBOP2(lvinfo->dbenv, __db_put(lvinfo->lsntime, lvinfo->ip, NULL, + &key, &data, 0), "__put_timestamp_info"); + + return (0); +} + +static int +__lv_txnrgns_lsn_cmp (db, d1, d2) + DB *db; + const DBT *d1, *d2; +{ + struct __lv_txnrange r1, r2; + + DB_ASSERT(db->env, d1->size == sizeof(r1)); + DB_ASSERT(db->env, d2->size == sizeof(r2)); + memcpy(&r1, d1->data, d1->size); + memcpy(&r2, d2->data, d2->size); + + return (LOG_COMPARE(&(r1.end), &(r2.end))); +} + +/* + * __find_lsnrg_by_timerg -- + * Find the lsn closed interval [beginlsn, endlsn] so that the + * corresponding timestamp interval fully contains interval [begin, end]. + * PUBLIC: int __find_lsnrg_by_timerg __P((DB_LOG_VRFY_INFO *, + * PUBLIC: time_t, time_t, DB_LSN *, DB_LSN *)); + */ +int +__find_lsnrg_by_timerg(lvinfo, begin, end, startlsn, endlsn) + DB_LOG_VRFY_INFO *lvinfo; + time_t begin, end; + DB_LSN *startlsn, *endlsn; +{ + int ret, tret; + DBC *csr; + struct __lv_timestamp_info *t1, *t2; + DBT key, data; + + ret = tret = 0; + csr = NULL; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + BDBOP(__db_cursor(lvinfo->timelsn, lvinfo->ip, NULL, &csr, 0)); + + /* + * We want a lsn range that completely contains [begin, end], so + * try move 1 record prev when getting the startlsn. + */ + key.data = &begin; + key.size = sizeof(begin); + BDBOP(__dbc_get(csr, &key, &data, DB_SET_RANGE)); + if ((ret = __dbc_get(csr, &key, &data, DB_PREV)) != 0 && + ret != DB_NOTFOUND) + goto err; + if (ret == DB_NOTFOUND)/* begin is smaller than the smallest key. */ + startlsn->file = startlsn->offset = 0;/* beginning. */ + else { + t1 = (struct __lv_timestamp_info *)data.data; + *startlsn = t1->lsn; + } + + /* + * Move to the last key/data pair of the duplicate set to get the + * biggest lsn having end as timestamp. + */ + key.data = &end; + key.size = sizeof(end); + if ((ret = __dbc_get(csr, &key, &data, DB_SET_RANGE)) != 0 && + ret != DB_NOTFOUND) + goto err; + if (ret == DB_NOTFOUND) { + endlsn->file = endlsn->offset = (u_int32_t)-1;/* Biggest lsn. */ + ret = 0; + goto err; /* We are done. */ + } + + /* + * Go to the biggest lsn of the dup set, if the key is the last one, + * go to the last one. + */ + if ((ret = __dbc_get(csr, &key, &data, DB_NEXT_NODUP)) != 0 && + ret != DB_NOTFOUND) + goto err; + + if (ret == DB_NOTFOUND) + BDBOP(__dbc_get(csr, &key, &data, DB_LAST)); + else + BDBOP(__dbc_get(csr, &key, &data, DB_PREV)); + + t2 = (struct __lv_timestamp_info *)data.data; + *endlsn = t2->lsn; +err: + if (ret == DB_NOTFOUND) + ret = 0; + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* + * PUBLIC: int __add_txnrange __P((DB_LOG_VRFY_INFO *, u_int32_t, + * PUBLIC: DB_LSN, int32_t, int)); + */ +int __add_txnrange (lvinfo, txnid, lsn, when, ishead) + DB_LOG_VRFY_INFO *lvinfo; + u_int32_t txnid; + DB_LSN lsn; + int32_t when; + int ishead; /* Whether it's the 1st log of the txn. */ +{ + int ret, tret; + DBC *csr; + struct __lv_txnrange tr, *ptr; + DBT key, data; + + csr = NULL; + ret = 0; + ptr = NULL; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + memset(&tr, 0, sizeof(tr)); + + key.data = &txnid; + key.size = sizeof(txnid); + tr.txnid = txnid; + BDBOP(__db_cursor(lvinfo->txnrngs, lvinfo->ip, NULL, &csr, 0)); + /* + * Note that we will backward play the logs to gather such information. + */ + if (!ishead) { + tr.end = lsn; + tr.when_commit = when; + data.data = &tr; + data.size = sizeof(tr); + BDBOP(__dbc_put(csr, &key, &data, DB_KEYFIRST)); + } else { + /* + * Dup data sorted by lsn, and we are backward playing logs, + * so the 1st record should be the one we want. + */ + BDBOP(__dbc_get(csr, &key, &data, DB_SET)); + ptr = (struct __lv_txnrange *)data.data; + DB_ASSERT(lvinfo->dbenv->env, IS_ZERO_LSN(ptr->begin)); + ptr->begin = lsn; + BDBOP(__dbc_put(csr, &key, &data, DB_CURRENT)); + } + +err: + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* + * __get_aborttxn -- + * If lsn is the last log of an aborted txn T, T's txnid is + * returned via the log verify handle. + * + * PUBLIC: int __get_aborttxn __P((DB_LOG_VRFY_INFO *, DB_LSN)); + */ +int +__get_aborttxn(lvinfo, lsn) + DB_LOG_VRFY_INFO *lvinfo; + DB_LSN lsn; +{ + int ret, tret; + u_int32_t txnid; + DBC *csr; + DBT key, data; + + csr = NULL; + txnid = 0; + ret = tret = 0; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + key.data = &lsn; + key.size = sizeof(lsn); + BDBOP(__db_cursor(lvinfo->txnaborts, lvinfo->ip, NULL, &csr, 0)); + BDBOP(__dbc_get(csr, &key, &data, DB_SET)); + memcpy(&txnid, data.data, data.size); + /* + * The lsn is the last op of an aborted txn, call __on_txnabort + * before processing next log record. + */ + lvinfo->aborted_txnid = txnid; + lvinfo->aborted_txnlsn = lsn; + +err: + /* It's OK if can't find it. */ + if (ret == DB_NOTFOUND) + ret = 0; + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* + * __txn_started -- + * Whether txnid is started before lsn and ended after lsn. + * + * PUBLIC: int __txn_started __P((DB_LOG_VRFY_INFO *, + * PUBLIC: DB_LSN, u_int32_t, int *)); + */ +int +__txn_started(lvinfo, lsn, txnid, res) + DB_LOG_VRFY_INFO *lvinfo; + DB_LSN lsn; + u_int32_t txnid; + int *res; +{ + int ret, tret; + DBC *csr; + DBT key, data; + struct __lv_txnrange *ptr, tr; + + ret = *res = 0; + csr = NULL; + memset(&tr, 0, sizeof(tr)); + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = &txnid; + key.size = sizeof(txnid); + + BDBOP(__db_cursor(lvinfo->txnrngs, lvinfo->ip, NULL, &csr, 0)); + BDBOP(__dbc_get(csr, &key, &data, DB_SET)); + for (;ret == 0; ret = __dbc_get(csr, &key, &data, DB_NEXT_DUP)) { + ptr = (struct __lv_txnrange *)data.data; + if (LOG_COMPARE(&lsn, &(ptr->begin)) > 0 && + LOG_COMPARE(&lsn, &(ptr->end)) <= 0) { + *res = 1; + break; + } + } +err: + if (ret == DB_NOTFOUND) + ret = 0;/* It's OK if can't find it. */ + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* + * PUBLIC: int __set_logvrfy_dbfuid __P((DB_LOG_VRFY_INFO *)); + */ +int +__set_logvrfy_dbfuid(lvinfo) + DB_LOG_VRFY_INFO *lvinfo; +{ + int ret; + const char *p; + DBT key, data; + size_t buflen; + + p = NULL; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + /* So far we only support verifying a specific db file. */ + p = lvinfo->lv_config->dbfile; + buflen = sizeof(char) * (strlen(p) + 1); + key.data = (char *)p; + key.size = (u_int32_t)buflen; + + BDBOP2(lvinfo->dbenv, __db_get(lvinfo->fnameuid, lvinfo->ip, NULL, + &key, &data, 0), "__set_logvrfy_dbfuid"); + + memcpy(lvinfo->target_dbid, data.data, DB_FILE_ID_LEN); + + return (ret); +} + +/* + * __add_page_to_txn -- + * Try adding a page to a txn, result brings back if really added(0/1) + * or if there is an access violation(-1). + * PUBLIC: int __add_page_to_txn __P((DB_LOG_VRFY_INFO *, + * PUBLIC: int32_t, db_pgno_t, u_int32_t, u_int32_t *, int *)); + */ +int +__add_page_to_txn (lvinfo, dbregid, pgno, txnid, otxn, result) + DB_LOG_VRFY_INFO *lvinfo; + int32_t dbregid; + db_pgno_t pgno; + u_int32_t txnid, *otxn; + int *result; +{ + int ret; + u_int8_t *buf; + DBT key, data; + size_t buflen; + u_int32_t txnid2; + VRFY_FILELIFE *pff; + + if (txnid < TXN_MINIMUM) { + *result = 0; + return (0); + } + buf = NULL; + ret = 0; + txnid2 = 0; + pff = NULL; + buflen = sizeof(u_int8_t) * DB_FILE_ID_LEN + sizeof(db_pgno_t); + BDBOP(__os_malloc(lvinfo->dbenv->env, buflen, &buf)); + memset(buf, 0, buflen); + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + /* + * We use the file uid as key because a single db file can have + * multiple dbregid at the same time, and we may neglect the fact + * that the same db file is being updated by multiple txns if we use + * dbregid as key. + */ + key.data = &dbregid; + key.size = sizeof(dbregid); + if ((ret = __db_get(lvinfo->dbregids, lvinfo->ip, NULL, + &key, &data, 0)) != 0) { + if (ret == DB_NOTFOUND) { + if (F_ISSET(lvinfo, DB_LOG_VERIFY_PARTIAL)) { + ret = 0; + goto out; + } else + F_SET(lvinfo, DB_LOG_VERIFY_INTERR); + } + goto err; + } + pff = (VRFY_FILELIFE *)data.data; + memcpy(buf, pff->fileid, DB_FILE_ID_LEN); + memcpy(buf + DB_FILE_ID_LEN, (u_int8_t *)&pgno, sizeof(pgno)); + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.data = buf; + key.size = (u_int32_t)buflen; + if ((ret = __db_get(lvinfo->pgtxn, lvinfo->ip, NULL, + &key, &data, 0)) != 0) { + if (ret == DB_NOTFOUND) { + data.data = &txnid; + data.size = sizeof(txnid); + BDBOP(__db_put(lvinfo->pgtxn, lvinfo->ip, NULL, &key, + &data, 0)); + *result = 1; + ret = 0;/* This is not an error. */ + } + goto err; + } + DB_ASSERT(lvinfo->dbenv->env, data.size == sizeof(txnid2)); + memcpy(&txnid2, data.data, data.size); + if (txnid == txnid2)/* The same txn already has the page. */ + *result = 0; + else {/* Txn txnid is updating pages still held by txnid2. */ + *result = -1; + *otxn = txnid2; + } +out: + /* result is set to -1 on violation, 0 if already has it, 1 if added. */ +err: + if (buf != NULL) + __os_free(lvinfo->dbenv->env, buf); + return (ret); +} + +/* + * PUBLIC: int __del_txn_pages __P((DB_LOG_VRFY_INFO *, u_int32_t)); + */ +int +__del_txn_pages(lvinfo, txnid) + DB_LOG_VRFY_INFO *lvinfo; + u_int32_t txnid; +{ + int ret; + DBT key; + + ret = 0; + memset(&key, 0, sizeof(DBT)); + key.data = &txnid; + key.size = sizeof(txnid); + + BDBOP(__db_del(lvinfo->txnpg, lvinfo->ip, NULL, &key, 0)); + +err: + return (ret); +} + +/* + * __is_ancestor_txn -- + * Tells via res if ptxnid is txnid's parent txn at the moment of lsn. + * + * PUBLIC: int __is_ancestor_txn __P((DB_LOG_VRFY_INFO *, + * PUBLIC: u_int32_t, u_int32_t, DB_LSN, int *)); + */ +int +__is_ancestor_txn (lvinfo, ptxnid, txnid, lsn, res) + DB_LOG_VRFY_INFO *lvinfo; + u_int32_t ptxnid, txnid; + DB_LSN lsn; + int *res; +{ + u_int32_t ptid; + int ret, tret; + DBC *csr; + DB *pdb; + DBT key, data; + struct __lv_txnrange tr; + + ret = 0; + ptid = txnid; + csr = NULL; + pdb = lvinfo->txnrngs; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + *res = 0; + BDBOP(__db_cursor(pdb, lvinfo->ip, NULL, &csr, 0)); + + /* See if ptxnid is an ancestor of txnid. */ + do { + key.data = &ptid; + key.size = sizeof(ptid); + BDBOP(__dbc_get(csr, &key, &data, DB_SET)); + /* A txnid maybe reused, we want the range having lsn in it. */ + for (;ret == 0; + ret = __dbc_get(csr, &key, &data, DB_NEXT_DUP)) { + DB_ASSERT(pdb->env, sizeof(tr) == data.size); + memcpy(&tr, data.data, data.size); + if (tr.ptxnid > 0 && + LOG_COMPARE(&lsn, &(tr.begin)) >= 0 && + LOG_COMPARE(&lsn, &(tr.end)) <= 0) + break; + } + + if (tr.ptxnid == ptxnid) { + *res = 1; + goto out; + } else + ptid = tr.ptxnid; + + } while (ptid != 0); +out: + +err: + if (ret == DB_NOTFOUND) + ret = 0; + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +/* + * PUBLIC: int __return_txn_pages __P((DB_LOG_VRFY_INFO *, + * PUBLIC: u_int32_t, u_int32_t)); + */ +int __return_txn_pages(lvh, ctxn, ptxn) + DB_LOG_VRFY_INFO *lvh; + u_int32_t ctxn, ptxn; +{ + int ret, tret; + DBC *csr; + DB *pdb, *sdb; + DBT key, key2, data, data2; + char buf[DB_FILE_ID_LEN + sizeof(db_pgno_t)]; + + ret = tret = 0; + csr = NULL; + sdb = lvh->txnpg; + pdb = lvh->pgtxn; + memset(&key, 0, sizeof(DBT)); + memset(&key2, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + memset(&data2, 0, sizeof(DBT)); + + BDBOP(__db_cursor(sdb, lvh->ip, NULL, &csr, 0)); + key.data = &ctxn; + key.size = sizeof(ctxn); + key2.data = &ptxn; + key2.size = sizeof(ptxn); + data2.data = buf; + data2.ulen = DB_FILE_ID_LEN + sizeof(db_pgno_t); + data2.flags = DB_DBT_USERMEM; + + for (ret = __dbc_pget(csr, &key, &data2, &data, DB_SET); ret == 0; + ret = __dbc_pget(csr, &key, &data2, &data, DB_NEXT_DUP)) + BDBOP(__db_put(pdb, lvh->ip, NULL, &data2, &key2, 0)); + if ((ret = __del_txn_pages(lvh, ctxn)) != 0 && ret != DB_NOTFOUND) + goto err; +err: + if (csr != NULL && (tret = __dbc_close(csr)) != 0 && ret == 0) + ret = tret; + return (ret); +} + +#define ADD_ITEM(lvh, logtype) ((lvh)->logtype_names[(logtype)] = (#logtype)) +static void +__lv_setup_logtype_names(lvinfo) + DB_LOG_VRFY_INFO *lvinfo; +{ + ADD_ITEM(lvinfo, DB___bam_irep); + ADD_ITEM(lvinfo, DB___bam_split_42); + ADD_ITEM(lvinfo, DB___bam_split); + ADD_ITEM(lvinfo, DB___bam_rsplit); + ADD_ITEM(lvinfo, DB___bam_adj); + ADD_ITEM(lvinfo, DB___bam_cadjust); + ADD_ITEM(lvinfo, DB___bam_cdel); + ADD_ITEM(lvinfo, DB___bam_repl); + ADD_ITEM(lvinfo, DB___bam_root); + ADD_ITEM(lvinfo, DB___bam_curadj); + ADD_ITEM(lvinfo, DB___bam_rcuradj); + ADD_ITEM(lvinfo, DB___bam_relink_43); + ADD_ITEM(lvinfo, DB___bam_merge_44); + ADD_ITEM(lvinfo, DB___crdel_metasub); + ADD_ITEM(lvinfo, DB___crdel_inmem_create); + ADD_ITEM(lvinfo, DB___crdel_inmem_rename); + ADD_ITEM(lvinfo, DB___crdel_inmem_remove); + ADD_ITEM(lvinfo, DB___dbreg_register); + ADD_ITEM(lvinfo, DB___db_addrem); + ADD_ITEM(lvinfo, DB___db_big); + ADD_ITEM(lvinfo, DB___db_ovref); + ADD_ITEM(lvinfo, DB___db_relink_42); + ADD_ITEM(lvinfo, DB___db_debug); + ADD_ITEM(lvinfo, DB___db_noop); + ADD_ITEM(lvinfo, DB___db_pg_alloc_42); + ADD_ITEM(lvinfo, DB___db_pg_alloc); + ADD_ITEM(lvinfo, DB___db_pg_free_42); + ADD_ITEM(lvinfo, DB___db_pg_free); + ADD_ITEM(lvinfo, DB___db_cksum); + ADD_ITEM(lvinfo, DB___db_pg_freedata_42); + ADD_ITEM(lvinfo, DB___db_pg_freedata); + ADD_ITEM(lvinfo, DB___db_pg_init); + ADD_ITEM(lvinfo, DB___db_pg_sort_44); + ADD_ITEM(lvinfo, DB___db_pg_trunc); + ADD_ITEM(lvinfo, DB___db_realloc); + ADD_ITEM(lvinfo, DB___db_relink); + ADD_ITEM(lvinfo, DB___db_merge); + ADD_ITEM(lvinfo, DB___db_pgno); +#ifdef HAVE_HASH + ADD_ITEM(lvinfo, DB___ham_insdel); + ADD_ITEM(lvinfo, DB___ham_newpage); + ADD_ITEM(lvinfo, DB___ham_splitdata); + ADD_ITEM(lvinfo, DB___ham_replace); + ADD_ITEM(lvinfo, DB___ham_copypage); + ADD_ITEM(lvinfo, DB___ham_metagroup_42); + ADD_ITEM(lvinfo, DB___ham_metagroup); + ADD_ITEM(lvinfo, DB___ham_groupalloc_42); + ADD_ITEM(lvinfo, DB___ham_groupalloc); + ADD_ITEM(lvinfo, DB___ham_changeslot); + ADD_ITEM(lvinfo, DB___ham_contract); + ADD_ITEM(lvinfo, DB___ham_curadj); + ADD_ITEM(lvinfo, DB___ham_chgpg); +#endif +#ifdef HAVE_QUEUE + ADD_ITEM(lvinfo, DB___qam_incfirst); + ADD_ITEM(lvinfo, DB___qam_mvptr); + ADD_ITEM(lvinfo, DB___qam_del); + ADD_ITEM(lvinfo, DB___qam_add); + ADD_ITEM(lvinfo, DB___qam_delext); +#endif + ADD_ITEM(lvinfo, DB___txn_regop_42); + ADD_ITEM(lvinfo, DB___txn_regop); + ADD_ITEM(lvinfo, DB___txn_ckp_42); + ADD_ITEM(lvinfo, DB___txn_ckp); + ADD_ITEM(lvinfo, DB___txn_child); + ADD_ITEM(lvinfo, DB___txn_xa_regop_42); + ADD_ITEM(lvinfo, DB___txn_prepare); + ADD_ITEM(lvinfo, DB___txn_recycle); + ADD_ITEM(lvinfo, DB___fop_create_42); + ADD_ITEM(lvinfo, DB___fop_create); + ADD_ITEM(lvinfo, DB___fop_remove); + ADD_ITEM(lvinfo, DB___fop_write_42); + ADD_ITEM(lvinfo, DB___fop_write); + ADD_ITEM(lvinfo, DB___fop_rename_42); + ADD_ITEM(lvinfo, DB___fop_rename_noundo_46); + ADD_ITEM(lvinfo, DB___fop_rename); + ADD_ITEM(lvinfo, DB___fop_rename_noundo); + ADD_ITEM(lvinfo, DB___fop_file_remove); +} diff --git a/src/mp/mp_alloc.c b/src/mp/mp_alloc.c new file mode 100644 index 00000000..e3441986 --- /dev/null +++ b/src/mp/mp_alloc.c @@ -0,0 +1,699 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +/* + * This configuration parameter limits the number of hash buckets which + * __memp_alloc() searches through while excluding buffers with a 'high' + * priority. + */ +#if !defined(MPOOL_ALLOC_SEARCH_LIMIT) +#define MPOOL_ALLOC_SEARCH_LIMIT 500 +#endif + +/* + * __memp_alloc -- + * Allocate some space from a cache region. + * + * PUBLIC: int __memp_alloc __P((DB_MPOOL *, + * PUBLIC: REGINFO *, MPOOLFILE *, size_t, roff_t *, void *)); + */ +int +__memp_alloc(dbmp, infop, mfp, len, offsetp, retp) + DB_MPOOL *dbmp; + REGINFO *infop; + MPOOLFILE *mfp; + size_t len; + roff_t *offsetp; + void *retp; +{ + BH *bhp, *current_bhp, *mvcc_bhp, *oldest_bhp; + BH_FROZEN_PAGE *frozen_bhp; + DB_LSN vlsn; + DB_MPOOL_HASH *dbht, *hp, *hp_end, *hp_saved, *hp_tmp; + ENV *env; + MPOOL *c_mp; + MPOOLFILE *bh_mfp; + size_t freed_space; + u_int32_t buckets, bucket_priority, buffers, cache_reduction; + u_int32_t high_priority, priority; + u_int32_t priority_saved, put_counter, lru_generation, total_buckets; + int aggressive, alloc_freeze, b_lock, giveup, got_oldest; + int h_locked, need_free, obsolete, ret, write_error; + u_int8_t *endp; + void *p; + + env = dbmp->env; + c_mp = infop->primary; + dbht = R_ADDR(infop, c_mp->htab); + hp_end = &dbht[c_mp->htab_buckets]; + hp_saved = NULL; + priority_saved = 0; + write_error = 0; + + buckets = buffers = put_counter = total_buckets = 0; + aggressive = alloc_freeze = giveup = got_oldest = h_locked = 0; + + /* + * If we're allocating a buffer, and the one we're discarding is the + * same size, we don't want to waste the time to re-integrate it into + * the shared memory free list. If the DB_MPOOLFILE argument isn't + * NULL, we'll compare the underlying page sizes of the two buffers + * before free-ing and re-allocating buffers. + */ + if (mfp != NULL) { + len = SSZA(BH, buf) + mfp->pagesize; + /* Add space for alignment padding for MVCC diagnostics. */ + MVCC_BHSIZE(mfp, len); + } + + STAT_INC(env, mpool, nallocs, c_mp->stat.st_alloc, len); + + MPOOL_REGION_LOCK(env, infop); + + /* + * First we try to allocate from free memory. If that fails, scan the + * buffer pool to find buffers with low priorities. We consider small + * sets of hash buckets each time to limit the amount of work needing + * to be done. This approximates LRU, but not very well. We either + * find a buffer of the same size to use, or we will free 3 times what + * we need in the hopes it will coalesce into a contiguous chunk of the + * right size. In the latter case we branch back here and try again. + */ +alloc: if ((ret = __env_alloc(infop, len, &p)) == 0) { + if (mfp != NULL) { + /* + * For MVCC diagnostics, align the pointer so that the + * buffer starts on a page boundary. + */ + MVCC_BHALIGN(p); + bhp = (BH *)p; + + if ((ret = __mutex_alloc(env, MTX_MPOOL_BH, + DB_MUTEX_SHARED, &bhp->mtx_buf)) != 0) { + MVCC_BHUNALIGN(bhp); + __env_alloc_free(infop, bhp); + goto search; + } + c_mp->pages++; + } + MPOOL_REGION_UNLOCK(env, infop); +found: if (offsetp != NULL) + *offsetp = R_OFFSET(infop, p); + *(void **)retp = p; + + /* + * Update the search statistics. + * + * We're not holding the region locked here, these statistics + * can't be trusted. + */ +#ifdef HAVE_STATISTICS + total_buckets += buckets; + if (total_buckets != 0) { + if (total_buckets > c_mp->stat.st_alloc_max_buckets) + STAT_SET(env, mpool, alloc_max_buckets, + c_mp->stat.st_alloc_max_buckets, + total_buckets, infop->id); + STAT_ADJUST(env, mpool, alloc_buckets, + c_mp->stat.st_alloc_buckets, + total_buckets, infop->id); + } + if (buffers != 0) { + if (buffers > c_mp->stat.st_alloc_max_pages) + STAT_SET(env, mpool, alloc_max_pages, + c_mp->stat.st_alloc_max_pages, + buffers, infop->id); + STAT_ADJUST(env, mpool, alloc_pages, + c_mp->stat.st_alloc_pages, buffers, infop->id); + } +#endif + return (0); + } else if (giveup || c_mp->pages == 0) { + MPOOL_REGION_UNLOCK(env, infop); + + __db_errx(env, DB_STR("3017", + "unable to allocate space from the buffer cache")); + return ((ret == ENOMEM && write_error != 0) ? EIO : ret); + } + +search: + /* + * Anything newer than 1/10th of the buffer pool is ignored during the + * first MPOOL_SEARCH_ALLOC_LIMIT buckets worth of allocation. + */ + cache_reduction = c_mp->pages / 10; + high_priority = aggressive ? MPOOL_LRU_MAX : + c_mp->lru_priority - cache_reduction; + lru_generation = c_mp->lru_generation; + + ret = 0; + + /* + * We re-attempt the allocation every time we've freed 3 times what + * we need. Reset our free-space counter. + */ + freed_space = 0; + total_buckets += buckets; + buckets = 0; + + /* + * Walk the hash buckets and find the next two with potentially useful + * buffers. Free the buffer with the lowest priority from the buckets' + * chains. + */ + for (;;) { + /* All pages have been freed, make one last try */ + if (c_mp->pages == 0) + goto alloc; + + /* Check for wrap around. */ + hp = &dbht[c_mp->last_checked++]; + if (hp >= hp_end) { + c_mp->last_checked = 0; + hp = &dbht[c_mp->last_checked++]; + } + + /* + * The failure mode is when there are too many buffers we can't + * write or there's not enough memory in the system to support + * the number of pinned buffers. + * + * Get aggressive if we've reviewed the entire cache without + * freeing the needed space. (The code resets "aggressive" + * when we free any space.) Aggressive means: + * + * a: set a flag to attempt to flush high priority buffers as + * well as other buffers. + * b: look at a buffer in every hash bucket rather than choose + * the more preferable of two. + * c: start to think about giving up. + * + * If we get here three or more times, sync the mpool to force + * out queue extent pages. While we might not have enough + * space for what we want and flushing is expensive, why not? + * Then sleep for a second, hopefully someone else will run and + * free up some memory. + * + * Always try to allocate memory too, in case some other thread + * returns its memory to the region. + * + * We don't have any way to know an allocation has no way to + * succeed. Fail if no pages are returned to the cache after + * we've been trying for a relatively long time. + * + * !!! + * This test ignores pathological cases like no buffers in the + * system -- we check for that early on, so it isn't possible. + */ + if (buckets++ == c_mp->htab_buckets) { + if (freed_space > 0) + goto alloc; + MPOOL_REGION_UNLOCK(env, infop); + + aggressive++; + /* + * Once aggressive, we consider all buffers. By setting + * this to MPOOL_LRU_MAX, we'll still select a victim + * even if all buffers have the highest normal priority. + */ + high_priority = MPOOL_LRU_MAX; + PERFMON4(env, mpool, alloc_wrap, + len, infop->id, aggressive, c_mp->put_counter); + switch (aggressive) { + case 1: + break; + case 2: + put_counter = c_mp->put_counter; + break; + case 3: + case 4: + case 5: + case 6: + (void)__memp_sync_int( + env, NULL, 0, DB_SYNC_ALLOC, NULL, NULL); + + __os_yield(env, 1, 0); + break; + default: + aggressive = 1; + if (put_counter == c_mp->put_counter) + giveup = 1; + break; + } + + MPOOL_REGION_LOCK(env, infop); + goto alloc; + } + + /* + * Skip empty buckets. + * + * We can check for empty buckets before locking the hash + * bucket as we only care if the pointer is zero or non-zero. + */ + if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL) + continue; + + /* Set aggressive if we have already searched for too long. */ + if (aggressive == 0 && buckets >= MPOOL_ALLOC_SEARCH_LIMIT) { + aggressive = 1; + /* Once aggressive, we consider all buffers. */ + high_priority = MPOOL_LRU_MAX; + } + + /* Unlock the region and lock the hash bucket. */ + MPOOL_REGION_UNLOCK(env, infop); + MUTEX_READLOCK(env, hp->mtx_hash); + h_locked = 1; + b_lock = 0; + + /* + * Find a buffer we can use. + * + * We use the lowest-LRU singleton buffer if we find one and + * it's better than the result of another hash bucket we've + * reviewed. We do not use a buffer which has a priority + * greater than high_priority unless we are being aggressive. + * + * With MVCC buffers, the situation is more complicated: we + * don't want to free a buffer out of the middle of an MVCC + * chain, since that requires I/O. So, walk the buffers, + * looking for an obsolete buffer at the end of an MVCC chain. + * Once a buffer becomes obsolete, its LRU priority is + * irrelevant because that version can never be accessed again. + * + * If we don't find any obsolete MVCC buffers, we will get + * aggressive, and in that case consider the lowest priority + * buffer within a chain. + * + * Ignore referenced buffers, we can't get rid of them. + */ +retry_search: bhp = NULL; + bucket_priority = high_priority; + obsolete = 0; + SH_TAILQ_FOREACH(current_bhp, &hp->hash_bucket, hq, __bh) { + /* + * First, do the standard LRU check for singletons. + * We can use the buffer if it is unreferenced, has a + * priority that isn't too high (unless we are + * aggressive), and is better than the best candidate + * we have found so far in this bucket. + */ + if (SH_CHAIN_SINGLETON(current_bhp, vc)) { + if (BH_REFCOUNT(current_bhp) == 0 && + bucket_priority > current_bhp->priority) { + bucket_priority = current_bhp->priority; + if (bhp != NULL) + atomic_dec(env, &bhp->ref); + bhp = current_bhp; + atomic_inc(env, &bhp->ref); + } + continue; + } + + /* + * For MVCC buffers, walk through the chain. If we are + * aggressive, choose the best candidate from within + * the chain for freezing. + */ + for (mvcc_bhp = oldest_bhp = current_bhp; + mvcc_bhp != NULL; + oldest_bhp = mvcc_bhp, + mvcc_bhp = SH_CHAIN_PREV(mvcc_bhp, vc, __bh)) { + DB_ASSERT(env, mvcc_bhp != + SH_CHAIN_PREV(mvcc_bhp, vc, __bh)); + if (aggressive > 1 && + BH_REFCOUNT(mvcc_bhp) == 0 && + !F_ISSET(mvcc_bhp, BH_FROZEN) && + (bhp == NULL || + bhp->priority > mvcc_bhp->priority)) { + if (bhp != NULL) + atomic_dec(env, &bhp->ref); + bhp = mvcc_bhp; + atomic_inc(env, &bhp->ref); + } + } + + /* + * oldest_bhp is the last buffer on the MVCC chain, and + * an obsolete buffer at the end of the MVCC chain gets + * used without further search. + * + * If the buffer isn't obsolete with respect to the + * cached old reader LSN, recalculate the oldest reader + * LSN and check again. + */ + if (BH_REFCOUNT(oldest_bhp) != 0) + continue; + +retry_obsolete: if (BH_OBSOLETE(oldest_bhp, hp->old_reader, vlsn)) { + obsolete = 1; + if (bhp != NULL) + atomic_dec(env, &bhp->ref); + bhp = oldest_bhp; + atomic_inc(env, &bhp->ref); + goto this_buffer; + } + if (!got_oldest) { + if ((ret = __txn_oldest_reader( + env, &hp->old_reader)) != 0) + return (ret); + got_oldest = 1; + goto retry_obsolete; + } + } + + /* + * bhp is either NULL or the best candidate buffer. + * We'll use the chosen buffer only if we have compared its + * priority against one chosen from another hash bucket. + */ + if (bhp == NULL) + goto next_hb; + + priority = bhp->priority; + + /* + * Compare two hash buckets and select the one with the lower + * priority. Performance testing showed looking at two improves + * the LRU-ness and looking at more only does a little better. + */ + if (hp_saved == NULL) { + hp_saved = hp; + priority_saved = priority; + goto next_hb; + } + + /* + * If the buffer we just found is a better choice than our + * previous choice, use it. + * + * If the previous choice was better, pretend we're moving + * from this hash bucket to the previous one and re-do the + * search. + * + * We don't worry about simply swapping between two buckets + * because that could only happen if a buffer was removed + * from the chain, or its priority updated. If a buffer + * is removed from the chain, some other thread has managed + * to discard a buffer, so we're moving forward. Updating + * a buffer's priority will make it a high-priority buffer, + * so we'll ignore it when we search again, and so we will + * eventually zero in on a buffer to use, or we'll decide + * there are no buffers we can use. + * + * If there's only a single hash bucket with buffers, we'll + * search the bucket once, choose a buffer, walk the entire + * list of buckets and search it again. In the case of a + * system that's busy, it's possible to imagine a case where + * we'd loop for a long while. For that reason, and because + * the test is easy, we special case and test for it. + */ + if (priority > priority_saved && hp != hp_saved) { + MUTEX_UNLOCK(env, hp->mtx_hash); + hp_tmp = hp_saved; + hp_saved = hp; + hp = hp_tmp; + priority_saved = priority; + MUTEX_READLOCK(env, hp->mtx_hash); + h_locked = 1; + DB_ASSERT(env, BH_REFCOUNT(bhp) > 0); + atomic_dec(env, &bhp->ref); + goto retry_search; + } + + /* + * If another thread has called __memp_reset_lru() while we were + * looking for this buffer, it is possible that we've picked a + * poor choice for a victim. If so toss it and start over. + */ + if (lru_generation != c_mp->lru_generation) { + DB_ASSERT(env, BH_REFCOUNT(bhp) > 0); + atomic_dec(env, &bhp->ref); + MUTEX_UNLOCK(env, hp->mtx_hash); + MPOOL_REGION_LOCK(env, infop); + hp_saved = NULL; + goto search; + } + +this_buffer: buffers++; + + /* + * Discard any previously remembered hash bucket, we've got + * a winner. + */ + hp_saved = NULL; + + /* Drop the hash mutex and lock the buffer exclusively. */ + MUTEX_UNLOCK(env, hp->mtx_hash); + h_locked = 0; + + /* Don't bother trying to latch a busy buffer. */ + if (BH_REFCOUNT(bhp) > 1) + goto next_hb; + + /* We cannot block as the caller is probably holding locks. */ + if ((ret = MUTEX_TRYLOCK(env, bhp->mtx_buf)) != 0) { + if (ret != DB_LOCK_NOTGRANTED) + return (ret); + goto next_hb; + } + F_SET(bhp, BH_EXCLUSIVE); + b_lock = 1; + + /* Someone may have grabbed it while we got the lock. */ + if (BH_REFCOUNT(bhp) != 1) + goto next_hb; + + /* Find the associated MPOOLFILE. */ + bh_mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); + + /* If the page is dirty, write it. */ + ret = 0; + if (F_ISSET(bhp, BH_DIRTY)) { + DB_ASSERT(env, atomic_read(&hp->hash_page_dirty) > 0); + ret = __memp_bhwrite(dbmp, hp, bh_mfp, bhp, 0); + DB_ASSERT(env, atomic_read(&bhp->ref) > 0); + + /* + * If a write fails for any reason, we can't proceed. + * + * If there's a write error and we're having problems + * finding something to allocate, avoid selecting this + * buffer again by maximizing its priority. + */ + if (ret != 0) { + if (ret != EPERM) { + write_error++; + __db_errx(env, DB_STR_A("3018", + "%s: unwritable page %d remaining in the cache after error %d", + "%s %d %d"), + __memp_fns(dbmp, bh_mfp), + bhp->pgno, ret); + } + bhp->priority = MPOOL_LRU_REDZONE; + + goto next_hb; + } + + STAT_INC(env, mpool, + dirty_eviction, c_mp->stat.st_rw_evict, infop->id); + + } + else + STAT_INC(env, mpool, + clean_eviction, c_mp->stat.st_ro_evict, infop->id); + + /* + * Freeze this buffer, if necessary. That is, if the buffer is + * part of an MVCC chain and could be required by a reader. + */ + if (SH_CHAIN_HASPREV(bhp, vc) || + (SH_CHAIN_HASNEXT(bhp, vc) && !obsolete)) { + if (!aggressive || + F_ISSET(bhp, BH_DIRTY | BH_FROZEN)) + goto next_hb; + ret = __memp_bh_freeze( + dbmp, infop, hp, bhp, &alloc_freeze); + if (ret == EIO) + write_error++; + if (ret == EBUSY || ret == EIO || + ret == ENOMEM || ret == ENOSPC) { + ret = 0; + goto next_hb; + } else if (ret != 0) { + DB_ASSERT(env, BH_REFCOUNT(bhp) > 0); + atomic_dec(env, &bhp->ref); + DB_ASSERT(env, b_lock); + F_CLR(bhp, BH_EXCLUSIVE); + MUTEX_UNLOCK(env, bhp->mtx_buf); + DB_ASSERT(env, !h_locked); + return (ret); + } + } + + MUTEX_LOCK(env, hp->mtx_hash); + h_locked = 1; + + /* + * We released the hash bucket lock while doing I/O, so another + * thread may have acquired this buffer and incremented the ref + * count or dirtied the buffer or installed a new version after + * we wrote it, in which case we can't have it. + */ + if (BH_REFCOUNT(bhp) != 1 || F_ISSET(bhp, BH_DIRTY) || + (SH_CHAIN_HASNEXT(bhp, vc) && + SH_CHAIN_NEXTP(bhp, vc, __bh)->td_off != bhp->td_off && + !BH_OBSOLETE(bhp, hp->old_reader, vlsn))) + goto next_hb; + + /* + * If the buffer is frozen, thaw it and look for another one + * we can use. (Calling __memp_bh_freeze above will not + * mark bhp BH_FROZEN.) + */ + if (F_ISSET(bhp, BH_FROZEN)) { + DB_ASSERT(env, obsolete || SH_CHAIN_SINGLETON(bhp, vc)); + DB_ASSERT(env, BH_REFCOUNT(bhp) > 0); + if (!F_ISSET(bhp, BH_THAWED)) { + /* + * This call releases the hash bucket mutex. + * We're going to retry the search, so we need + * to re-lock it. + */ + if ((ret = __memp_bh_thaw(dbmp, + infop, hp, bhp, NULL)) != 0) + return (ret); + MUTEX_READLOCK(env, hp->mtx_hash); + } else { + need_free = (atomic_dec(env, &bhp->ref) == 0); + F_CLR(bhp, BH_EXCLUSIVE); + MUTEX_UNLOCK(env, bhp->mtx_buf); + if (need_free) { + MPOOL_REGION_LOCK(env, infop); + SH_TAILQ_INSERT_TAIL(&c_mp->free_frozen, + bhp, hq); + MPOOL_REGION_UNLOCK(env, infop); + } + } + bhp = NULL; + b_lock = alloc_freeze = 0; + goto retry_search; + } + + /* + * If we need some empty buffer headers for freezing, turn the + * buffer we've found into frozen headers and put them on the + * free list. Only reset alloc_freeze if we've actually + * allocated some frozen buffer headers. + */ + if (alloc_freeze) { + if ((ret = __memp_bhfree(dbmp, + infop, bh_mfp, hp, bhp, 0)) != 0) + return (ret); + b_lock = 0; + h_locked = 0; + + MVCC_MPROTECT(bhp->buf, bh_mfp->pagesize, + PROT_READ | PROT_WRITE | PROT_EXEC); + + MPOOL_REGION_LOCK(env, infop); + SH_TAILQ_INSERT_TAIL(&c_mp->alloc_frozen, + (BH_FROZEN_ALLOC *)bhp, links); + frozen_bhp = (BH_FROZEN_PAGE *) + ((BH_FROZEN_ALLOC *)bhp + 1); + endp = (u_int8_t *)bhp->buf + bh_mfp->pagesize; + while ((u_int8_t *)(frozen_bhp + 1) < endp) { + frozen_bhp->header.mtx_buf = MUTEX_INVALID; + SH_TAILQ_INSERT_TAIL(&c_mp->free_frozen, + (BH *)frozen_bhp, hq); + frozen_bhp++; + } + MPOOL_REGION_UNLOCK(env, infop); + + alloc_freeze = 0; + MUTEX_READLOCK(env, hp->mtx_hash); + h_locked = 1; + goto retry_search; + } + + /* + * Check to see if the buffer is the size we're looking for. + * If so, we can simply reuse it. Otherwise, free the buffer + * and its space and keep looking. + */ + if (mfp != NULL && mfp->pagesize == bh_mfp->pagesize) { + if ((ret = __memp_bhfree(dbmp, + infop, bh_mfp, hp, bhp, 0)) != 0) + return (ret); + p = bhp; + goto found; + } + + freed_space += sizeof(*bhp) + bh_mfp->pagesize; + if ((ret = + __memp_bhfree(dbmp, infop, + bh_mfp, hp, bhp, BH_FREE_FREEMEM)) != 0) + return (ret); + + /* Reset "aggressive" and "write_error" if we free any space. */ + if (aggressive > 1) + aggressive = 1; + write_error = 0; + + /* + * Unlock this buffer and re-acquire the region lock. If + * we're reaching here as a result of calling memp_bhfree, the + * buffer lock has already been discarded. + */ + if (0) { +next_hb: if (bhp != NULL) { + DB_ASSERT(env, BH_REFCOUNT(bhp) > 0); + atomic_dec(env, &bhp->ref); + if (b_lock) { + F_CLR(bhp, BH_EXCLUSIVE); + MUTEX_UNLOCK(env, bhp->mtx_buf); + } + } + if (h_locked) + MUTEX_UNLOCK(env, hp->mtx_hash); + h_locked = 0; + } + MPOOL_REGION_LOCK(env, infop); + + /* + * Retry the allocation as soon as we've freed up sufficient + * space. We're likely to have to coalesce of memory to + * satisfy the request, don't try until it's likely (possible?) + * we'll succeed. + */ + if (freed_space >= 3 * len) + goto alloc; + } + /* NOTREACHED */ +} + +/* + * __memp_free -- + * Free some space from a cache region. + * + * PUBLIC: void __memp_free __P((REGINFO *, void *)); + */ +void +__memp_free(infop, buf) + REGINFO *infop; + void *buf; +{ + __env_alloc_free(infop, buf); +} diff --git a/src/mp/mp_bh.c b/src/mp/mp_bh.c new file mode 100644 index 00000000..19dfcbed --- /dev/null +++ b/src/mp/mp_bh.c @@ -0,0 +1,646 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" /* Required for diagnostic code. */ +#include "dbinc/mp.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" + +static int __memp_pgwrite + __P((ENV *, DB_MPOOLFILE *, DB_MPOOL_HASH *, BH *)); + +/* + * __memp_bhwrite -- + * Write the page associated with a given buffer header. + * + * PUBLIC: int __memp_bhwrite __P((DB_MPOOL *, + * PUBLIC: DB_MPOOL_HASH *, MPOOLFILE *, BH *, int)); + */ +int +__memp_bhwrite(dbmp, hp, mfp, bhp, open_extents) + DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp; + MPOOLFILE *mfp; + BH *bhp; + int open_extents; +{ + DB_MPOOLFILE *dbmfp; + DB_MPREG *mpreg; + ENV *env; + int ret; + + env = dbmp->env; + + /* + * If the file has been removed or is a closed temporary file, we're + * done -- the page-write function knows how to handle the fact that + * we don't have (or need!) any real file descriptor information. + */ + if (mfp->deadfile) + return (__memp_pgwrite(env, NULL, hp, bhp)); + + /* + * Walk the process' DB_MPOOLFILE list and find a file descriptor for + * the file. We also check that the descriptor is open for writing. + */ + MUTEX_LOCK(env, dbmp->mutex); + TAILQ_FOREACH(dbmfp, &dbmp->dbmfq, q) + if (dbmfp->mfp == mfp && !F_ISSET(dbmfp, MP_READONLY)) { + ++dbmfp->ref; + break; + } + MUTEX_UNLOCK(env, dbmp->mutex); + + if (dbmfp != NULL) { + /* + * Temporary files may not have been created. We only handle + * temporary files in this path, because only the process that + * created a temporary file will ever flush buffers to it. + */ + if (dbmfp->fhp == NULL) { + /* We may not be allowed to create backing files. */ + if (mfp->no_backing_file) { + --dbmfp->ref; + return (EPERM); + } + + MUTEX_LOCK(env, dbmp->mutex); + if (dbmfp->fhp == NULL) { + ret = __db_tmp_open(env, + F_ISSET(env->dbenv, DB_ENV_DIRECT_DB) ? + DB_OSO_DIRECT : 0, &dbmfp->fhp); + } else + ret = 0; + MUTEX_UNLOCK(env, dbmp->mutex); + if (ret != 0) { + __db_errx(env, DB_STR("3014", + "unable to create temporary backing file")); + --dbmfp->ref; + return (ret); + } + } + + goto pgwrite; + } + + /* + * There's no file handle for this file in our process. + * + * !!! + * It's the caller's choice if we're going to open extent files. + */ + if (!open_extents && F_ISSET(mfp, MP_EXTENT)) + return (EPERM); + + /* + * !!! + * Don't try to attach to temporary files. There are two problems in + * trying to do that. First, if we have different privileges than the + * process that "owns" the temporary file, we might create the backing + * disk file such that the owning process couldn't read/write its own + * buffers, e.g., memp_trickle running as root creating a file owned + * as root, mode 600. Second, if the temporary file has already been + * created, we don't have any way of finding out what its real name is, + * and, even if we did, it was already unlinked (so that it won't be + * left if the process dies horribly). This decision causes a problem, + * however: if the temporary file consumes the entire buffer cache, + * and the owner doesn't flush the buffers to disk, we could end up + * with resource starvation, and the memp_trickle thread couldn't do + * anything about it. That's a pretty unlikely scenario, though. + * + * Note we should never get here when the temporary file in question + * has already been closed in another process, in which case it should + * be marked dead. + */ + if (F_ISSET(mfp, MP_TEMP) || mfp->no_backing_file) + return (EPERM); + + /* + * It's not a page from a file we've opened. If the file requires + * application-specific input/output processing, see if this process + * has ever registered information as to how to write this type of + * file. If not, there's nothing we can do. + */ + if (mfp->ftype != 0 && mfp->ftype != DB_FTYPE_SET) { + MUTEX_LOCK(env, dbmp->mutex); + LIST_FOREACH(mpreg, &dbmp->dbregq, q) + if (mpreg->ftype == mfp->ftype) + break; + MUTEX_UNLOCK(env, dbmp->mutex); + if (mpreg == NULL) + return (EPERM); + } + + /* + * Try and open the file, specifying the known underlying shared area. + * + * !!! + * There's no negative cache, so we may repeatedly try and open files + * that we have previously tried (and failed) to open. + */ + if ((ret = __memp_fcreate(env, &dbmfp)) != 0) + return (ret); + if ((ret = __memp_fopen(dbmfp, mfp, + NULL, NULL, DB_DURABLE_UNKNOWN, 0, mfp->pagesize)) != 0) { + (void)__memp_fclose(dbmfp, 0); + + /* + * Ignore any error if the file is marked dead, assume the file + * was removed from under us. + */ + if (!mfp->deadfile) + return (ret); + + dbmfp = NULL; + } + +pgwrite: + MVCC_MPROTECT(bhp->buf, mfp->pagesize, + PROT_READ | PROT_WRITE | PROT_EXEC); + ret = __memp_pgwrite(env, dbmfp, hp, bhp); + if (dbmfp == NULL) + return (ret); + + /* + * Discard our reference, and, if we're the last reference, make sure + * the file eventually gets closed. + */ + MUTEX_LOCK(env, dbmp->mutex); + if (dbmfp->ref == 1) + F_SET(dbmfp, MP_FLUSH); + else + --dbmfp->ref; + MUTEX_UNLOCK(env, dbmp->mutex); + + return (ret); +} + +/* + * __memp_pgread -- + * Read a page from a file. + * + * PUBLIC: int __memp_pgread __P((DB_MPOOLFILE *, BH *, int)); + */ +int +__memp_pgread(dbmfp, bhp, can_create) + DB_MPOOLFILE *dbmfp; + BH *bhp; + int can_create; +{ + ENV *env; + MPOOLFILE *mfp; + size_t len, nr; + u_int32_t pagesize; + int ret; + + env = dbmfp->env; + mfp = dbmfp->mfp; + pagesize = mfp->pagesize; + + /* We should never be called with a dirty or unlocked buffer. */ + DB_ASSERT(env, !F_ISSET(bhp, BH_DIRTY_CREATE | BH_FROZEN)); + DB_ASSERT(env, can_create || + F_ISSET(bhp, BH_TRASH) || !F_ISSET(bhp, BH_DIRTY)); + DB_ASSERT(env, F_ISSET(bhp, BH_EXCLUSIVE)); + + /* Mark the buffer as in transistion. */ + F_SET(bhp, BH_TRASH); + + /* + * Temporary files may not yet have been created. We don't create + * them now, we create them when the pages have to be flushed. + */ + nr = 0; + if (dbmfp->fhp != NULL) { + PERFMON3(env, mpool, read, __memp_fn(dbmfp), bhp->pgno, bhp); + if ((ret = __os_io(env, DB_IO_READ, dbmfp->fhp, + bhp->pgno, pagesize, 0, pagesize, bhp->buf, &nr)) != 0) + goto err; + } + + /* + * The page may not exist; if it doesn't, nr may well be 0, but we + * expect the underlying OS calls not to return an error code in + * this case. + */ + if (nr < pagesize) { + /* + * Don't output error messages for short reads. In particular, + * DB recovery processing may request pages never written to + * disk or for which only some part have been written to disk, + * in which case we won't find the page. The caller must know + * how to handle the error. + */ + if (!can_create) { + ret = DB_PAGE_NOTFOUND; + goto err; + } + + /* Clear any bytes that need to be cleared. */ + len = mfp->clear_len == DB_CLEARLEN_NOTSET ? + pagesize : mfp->clear_len; + memset(bhp->buf, 0, len); + +#if defined(DIAGNOSTIC) || defined(UMRW) + /* + * If we're running in diagnostic mode, corrupt any bytes on + * the page that are unknown quantities for the caller. + */ + if (len < pagesize) + memset(bhp->buf + len, CLEAR_BYTE, pagesize - len); +#endif + STAT_INC_VERB(env, mpool, page_create, + mfp->stat.st_page_create, __memp_fn(dbmfp), bhp->pgno); + } else + STAT_INC_VERB(env, mpool, page_in, + mfp->stat.st_page_in, __memp_fn(dbmfp), bhp->pgno); + + /* Call any pgin function. */ + ret = mfp->ftype == 0 ? 0 : __memp_pg(dbmfp, bhp->pgno, bhp->buf, 1); + + /* + * If no errors occurred, the data is now valid, clear the BH_TRASH + * flag. + */ + if (ret == 0) + F_CLR(bhp, BH_TRASH); +err: return (ret); +} + +/* + * __memp_pgwrite -- + * Write a page to a file. + */ +static int +__memp_pgwrite(env, dbmfp, hp, bhp) + ENV *env; + DB_MPOOLFILE *dbmfp; + DB_MPOOL_HASH *hp; + BH *bhp; +{ + DB_LSN lsn; + MPOOLFILE *mfp; + size_t nw; + int ret; + void * buf; + + /* + * Since writing does not require exclusive access, another thread + * could have already written this buffer. + */ + if (!F_ISSET(bhp, BH_DIRTY)) + return (0); + + mfp = dbmfp == NULL ? NULL : dbmfp->mfp; + ret = 0; + buf = NULL; + + /* We should never be called with a frozen or trashed buffer. */ + DB_ASSERT(env, !F_ISSET(bhp, BH_FROZEN | BH_TRASH)); + + /* + * It's possible that the underlying file doesn't exist, either + * because of an outright removal or because it was a temporary + * file that's been closed. + * + * !!! + * Once we pass this point, we know that dbmfp and mfp aren't NULL, + * and that we have a valid file reference. + */ + if (mfp == NULL || mfp->deadfile) + goto file_dead; + + /* + * If the page is in a file for which we have LSN information, we have + * to ensure the appropriate log records are on disk. + */ + if (LOGGING_ON(env) && mfp->lsn_off != DB_LSN_OFF_NOTSET && + !IS_CLIENT_PGRECOVER(env)) { + memcpy(&lsn, bhp->buf + mfp->lsn_off, sizeof(DB_LSN)); + if (!IS_NOT_LOGGED_LSN(lsn) && + (ret = __log_flush(env, &lsn)) != 0) + goto err; + } + +#ifdef DIAGNOSTIC + /* + * Verify write-ahead logging semantics. + * + * !!! + * Two special cases. There is a single field on the meta-data page, + * the last-page-number-in-the-file field, for which we do not log + * changes. If the page was originally created in a database that + * didn't have logging turned on, we can see a page marked dirty but + * for which no corresponding log record has been written. However, + * the only way that a page can be created for which there isn't a + * previous log record and valid LSN is when the page was created + * without logging turned on, and so we check for that special-case + * LSN value. + * + * Second, when a client is reading database pages from a master + * during an internal backup, we may get pages modified after + * the current end-of-log. + */ + if (LOGGING_ON(env) && !IS_NOT_LOGGED_LSN(LSN(bhp->buf)) && + !IS_CLIENT_PGRECOVER(env)) { + /* + * There is a potential race here. If we are in the midst of + * switching log files, it's possible we could test against the + * old file and the new offset in the log region's LSN. If we + * fail the first test, acquire the log mutex and check again. + */ + DB_LOG *dblp; + LOG *lp; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + if (!lp->db_log_inmemory && + LOG_COMPARE(&lp->s_lsn, &LSN(bhp->buf)) <= 0) { + MUTEX_LOCK(env, lp->mtx_flush); + DB_ASSERT(env, F_ISSET(env->dbenv, DB_ENV_NOLOCKING) || + LOG_COMPARE(&lp->s_lsn, &LSN(bhp->buf)) > 0); + MUTEX_UNLOCK(env, lp->mtx_flush); + } + } +#endif + + /* + * Call any pgout function. If we have the page exclusive then + * we are going to reuse it otherwise make a copy of the page so + * that others can continue looking at the page while we write it. + */ + buf = bhp->buf; + if (mfp->ftype != 0) { + if (F_ISSET(bhp, BH_EXCLUSIVE)) + F_SET(bhp, BH_TRASH); + else { + if ((ret = __os_malloc(env, mfp->pagesize, &buf)) != 0) + goto err; + memcpy(buf, bhp->buf, mfp->pagesize); + } + if ((ret = __memp_pg(dbmfp, bhp->pgno, buf, 0)) != 0) + goto err; + } + + PERFMON3(env, mpool, write, __memp_fn(dbmfp), bhp->pgno, bhp); + /* Write the page. */ + if ((ret = __os_io(env, DB_IO_WRITE, dbmfp->fhp, bhp->pgno, + mfp->pagesize, 0, mfp->pagesize, buf, &nw)) != 0) { + __db_errx(env, DB_STR_A("3015", + "%s: write failed for page %lu", "%s %lu"), + __memp_fn(dbmfp), (u_long)bhp->pgno); + goto err; + } + STAT_INC_VERB(env, mpool, page_out, + mfp->stat.st_page_out, __memp_fn(dbmfp), bhp->pgno); + if (bhp->pgno > mfp->last_flushed_pgno) { + MUTEX_LOCK(env, mfp->mutex); + if (bhp->pgno > mfp->last_flushed_pgno) + mfp->last_flushed_pgno = bhp->pgno; + MUTEX_UNLOCK(env, mfp->mutex); + } + +err: +file_dead: + if (buf != NULL && buf != bhp->buf) + __os_free(env, buf); + /* + * !!! + * Once we pass this point, dbmfp and mfp may be NULL, we may not have + * a valid file reference. + */ + + /* + * Update the hash bucket statistics, reset the flags. If we were + * successful, the page is no longer dirty. Someone else may have + * also written the page so we need to latch the hash bucket here + * to get the accounting correct. Since we have the buffer + * shared it cannot be marked dirty again till we release it. + * This is the only place we update the flags field only holding + * a shared latch. + */ + if (F_ISSET(bhp, BH_DIRTY | BH_TRASH)) { + MUTEX_LOCK(env, hp->mtx_hash); + DB_ASSERT(env, !SH_CHAIN_HASNEXT(bhp, vc)); + if (ret == 0 && F_ISSET(bhp, BH_DIRTY)) { + F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE); + DB_ASSERT(env, atomic_read(&hp->hash_page_dirty) > 0); + atomic_dec(env, &hp->hash_page_dirty); + } + + /* put the page back if necessary. */ + if ((ret != 0 || BH_REFCOUNT(bhp) > 1) && + F_ISSET(bhp, BH_TRASH)) { + ret = __memp_pg(dbmfp, bhp->pgno, bhp->buf, 1); + F_CLR(bhp, BH_TRASH); + } + MUTEX_UNLOCK(env, hp->mtx_hash); + } + + return (ret); +} + +/* + * __memp_pg -- + * Call the pgin/pgout routine. + * + * PUBLIC: int __memp_pg __P((DB_MPOOLFILE *, db_pgno_t, void *, int)); + */ +int +__memp_pg(dbmfp, pgno, buf, is_pgin) + DB_MPOOLFILE *dbmfp; + db_pgno_t pgno; + void *buf; + int is_pgin; +{ + DBT dbt, *dbtp; + DB_MPOOL *dbmp; + DB_MPREG *mpreg; + ENV *env; + MPOOLFILE *mfp; + int ftype, ret; + + env = dbmfp->env; + dbmp = env->mp_handle; + mfp = dbmfp->mfp; + + if ((ftype = mfp->ftype) == DB_FTYPE_SET) + mpreg = dbmp->pg_inout; + else { + MUTEX_LOCK(env, dbmp->mutex); + LIST_FOREACH(mpreg, &dbmp->dbregq, q) + if (ftype == mpreg->ftype) + break; + MUTEX_UNLOCK(env, dbmp->mutex); + } + if (mpreg == NULL) + return (0); + + if (mfp->pgcookie_len == 0) + dbtp = NULL; + else { + DB_SET_DBT(dbt, R_ADDR( + dbmp->reginfo, mfp->pgcookie_off), mfp->pgcookie_len); + dbtp = &dbt; + } + + if (is_pgin) { + if (mpreg->pgin != NULL && (ret = + mpreg->pgin(env->dbenv, pgno, buf, dbtp)) != 0) + goto err; + } else + if (mpreg->pgout != NULL && (ret = + mpreg->pgout(env->dbenv, pgno, buf, dbtp)) != 0) + goto err; + + return (0); + +err: __db_errx(env, DB_STR_A("3016", + "%s: %s failed for page %lu", "%s %s %lu"), __memp_fn(dbmfp), + is_pgin ? DB_STR_P("pgin") : DB_STR_P("pgout"), (u_long)pgno); + return (ret); +} + +/* + * __memp_bhfree -- + * Free a bucket header and its referenced data. + * + * PUBLIC: int __memp_bhfree __P((DB_MPOOL *, + * PUBLIC: REGINFO *, MPOOLFILE *, DB_MPOOL_HASH *, BH *, u_int32_t)); + */ +int +__memp_bhfree(dbmp, infop, mfp, hp, bhp, flags) + DB_MPOOL *dbmp; + REGINFO *infop; + MPOOLFILE *mfp; + DB_MPOOL_HASH *hp; + BH *bhp; + u_int32_t flags; +{ + ENV *env; +#ifdef DIAGNOSTIC + DB_LSN vlsn; +#endif + BH *prev_bhp; + MPOOL *c_mp; + int ret, t_ret; +#ifdef DIAG_MVCC + size_t pagesize; +#endif + + ret = 0; + + /* + * Assumes the hash bucket is locked and the MPOOL is not. + */ + env = dbmp->env; +#ifdef DIAG_MVCC + if (mfp != NULL) + pagesize = mfp->pagesize; +#endif + + DB_ASSERT(env, LF_ISSET(BH_FREE_UNLOCKED) || + (hp != NULL && MUTEX_IS_OWNED(env, hp->mtx_hash))); + DB_ASSERT(env, BH_REFCOUNT(bhp) == 1 && + !F_ISSET(bhp, BH_DIRTY | BH_FROZEN)); + DB_ASSERT(env, LF_ISSET(BH_FREE_UNLOCKED) || + SH_CHAIN_SINGLETON(bhp, vc) || (SH_CHAIN_HASNEXT(bhp, vc) && + (SH_CHAIN_NEXTP(bhp, vc, __bh)->td_off == bhp->td_off || + bhp->td_off == INVALID_ROFF || + IS_MAX_LSN(*VISIBLE_LSN(env, bhp)) || + BH_OBSOLETE(bhp, hp->old_reader, vlsn)))); + + PERFMON3(env, mpool, evict, __memp_fns(dbmp, mfp), bhp->pgno, bhp); + + /* + * Delete the buffer header from the hash bucket queue or the + * version chain. + */ + if (hp == NULL) + goto no_hp; + prev_bhp = SH_CHAIN_PREV(bhp, vc, __bh); + if (!SH_CHAIN_HASNEXT(bhp, vc)) { + if (prev_bhp != NULL) + SH_TAILQ_INSERT_AFTER(&hp->hash_bucket, + bhp, prev_bhp, hq, __bh); + SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh); + } + SH_CHAIN_REMOVE(bhp, vc, __bh); + + /* + * Remove the reference to this buffer from the transaction that + * created it, if any. When the BH_FREE_UNLOCKED flag is set, we're + * discarding the environment, so the transaction region is already + * gone. + */ + if (bhp->td_off != INVALID_ROFF && !LF_ISSET(BH_FREE_UNLOCKED)) { + ret = __txn_remove_buffer( + env, BH_OWNER(env, bhp), hp->mtx_hash); + bhp->td_off = INVALID_ROFF; + } + + /* + * We're going to use the memory for something else -- it had better be + * accessible. + */ +no_hp: if (mfp != NULL) + MVCC_MPROTECT(bhp->buf, + pagesize, PROT_READ | PROT_WRITE | PROT_EXEC); + + /* + * Discard the hash bucket's mutex, it's no longer needed, and + * we don't want to be holding it when acquiring other locks. + */ + if (!LF_ISSET(BH_FREE_UNLOCKED)) + MUTEX_UNLOCK(env, hp->mtx_hash); + + /* + * If we're only removing this header from the chain for reuse, we're + * done. + */ + if (LF_ISSET(BH_FREE_REUSE)) + return (ret); + + /* + * If we're not reusing the buffer immediately, free the buffer for + * real. + */ + if (!LF_ISSET(BH_FREE_UNLOCKED)) + MUTEX_UNLOCK(env, bhp->mtx_buf); + if (LF_ISSET(BH_FREE_FREEMEM)) { + if ((ret = __mutex_free(env, &bhp->mtx_buf)) != 0) + return (ret); + MPOOL_REGION_LOCK(env, infop); + + MVCC_BHUNALIGN(bhp); + __memp_free(infop, bhp); + c_mp = infop->primary; + c_mp->pages--; + + MPOOL_REGION_UNLOCK(env, infop); + } + + if (mfp == NULL) + return (ret); + + /* + * Decrement the reference count of the underlying MPOOLFILE. + * If this is its last reference, remove it. + */ + MUTEX_LOCK(env, mfp->mutex); + if (--mfp->block_cnt == 0 && mfp->mpf_cnt == 0) { + if ((t_ret = __memp_mf_discard(dbmp, mfp, 0)) != 0 && ret == 0) + ret = t_ret; + } else + MUTEX_UNLOCK(env, mfp->mutex); + + return (ret); +} diff --git a/src/mp/mp_fget.c b/src/mp/mp_fget.c new file mode 100644 index 00000000..5d95607e --- /dev/null +++ b/src/mp/mp_fget.c @@ -0,0 +1,1222 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" +#ifdef DIAGNOSTIC +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#endif + +/* + * __memp_fget_pp -- + * DB_MPOOLFILE->get pre/post processing. + * + * PUBLIC: int __memp_fget_pp + * PUBLIC: __P((DB_MPOOLFILE *, db_pgno_t *, DB_TXN *, u_int32_t, void *)); + */ +int +__memp_fget_pp(dbmfp, pgnoaddr, txnp, flags, addrp) + DB_MPOOLFILE *dbmfp; + db_pgno_t *pgnoaddr; + DB_TXN *txnp; + u_int32_t flags; + void *addrp; +{ + DB_THREAD_INFO *ip; + ENV *env; + int rep_blocked, ret; + + env = dbmfp->env; + + MPF_ILLEGAL_BEFORE_OPEN(dbmfp, "DB_MPOOLFILE->get"); + + /* + * Validate arguments. + * + * !!! + * Don't test for DB_MPOOL_CREATE and DB_MPOOL_NEW flags for readonly + * files here, and create non-existent pages in readonly files if the + * flags are set, later. The reason is that the hash access method + * wants to get empty pages that don't really exist in readonly files. + * The only alternative is for hash to write the last "bucket" all the + * time, which we don't want to do because one of our big goals in life + * is to keep database files small. It's sleazy as hell, but we catch + * any attempt to actually write the file in memp_fput(). + */ +#undef OKFLAGS +#define OKFLAGS (DB_MPOOL_CREATE | DB_MPOOL_DIRTY | \ + DB_MPOOL_EDIT | DB_MPOOL_LAST | DB_MPOOL_NEW) + if (flags != 0) { + if ((ret = __db_fchk(env, "memp_fget", flags, OKFLAGS)) != 0) + return (ret); + + switch (FLD_CLR(flags, DB_MPOOL_DIRTY | DB_MPOOL_EDIT)) { + case DB_MPOOL_CREATE: + case DB_MPOOL_LAST: + case DB_MPOOL_NEW: + case 0: + break; + default: + return (__db_ferr(env, "memp_fget", 1)); + } + } + + ENV_ENTER(env, ip); + + rep_blocked = 0; + if (txnp == NULL && IS_ENV_REPLICATED(env)) { + if ((ret = __op_rep_enter(env, 0, 1)) != 0) + goto err; + rep_blocked = 1; + } + ret = __memp_fget(dbmfp, pgnoaddr, ip, txnp, flags, addrp); + /* + * We only decrement the count in op_rep_exit if the operation fails. + * Otherwise the count will be decremented when the page is no longer + * pinned in memp_fput. + */ + if (ret != 0 && rep_blocked) + (void)__op_rep_exit(env); + + /* Similarly if an app has a page pinned it is ACTIVE. */ +err: if (ret != 0) + ENV_LEAVE(env, ip); + + return (ret); +} + +/* + * __memp_fget -- + * Get a page from the file. + * + * PUBLIC: int __memp_fget __P((DB_MPOOLFILE *, + * PUBLIC: db_pgno_t *, DB_THREAD_INFO *, DB_TXN *, u_int32_t, void *)); + */ +int +__memp_fget(dbmfp, pgnoaddr, ip, txn, flags, addrp) + DB_MPOOLFILE *dbmfp; + db_pgno_t *pgnoaddr; + DB_THREAD_INFO *ip; + DB_TXN *txn; + u_int32_t flags; + void *addrp; +{ + enum { FIRST_FOUND, FIRST_MISS, SECOND_FOUND, SECOND_MISS } state; + BH *alloc_bhp, *bhp, *oldest_bhp; + ENV *env; + DB_LSN *read_lsnp, vlsn; + DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp; + MPOOL *c_mp; + MPOOLFILE *mfp; + PIN_LIST *list, *lp; + REGENV *renv; + REGINFO *infop, *t_infop, *reginfo; + TXN_DETAIL *td; + roff_t list_off, mf_offset; + u_int32_t bucket, pinmax, st_hsearch; + int b_incr, b_lock, h_locked, dirty, extending; + int makecopy, mvcc, need_free, ret; +#ifdef DIAGNOSTIC + DB_LOCKTAB *lt; + DB_LOCKER *locker; +#endif + + *(void **)addrp = NULL; + COMPQUIET(c_mp, NULL); + COMPQUIET(infop, NULL); + + env = dbmfp->env; + dbmp = env->mp_handle; + + mfp = dbmfp->mfp; + mvcc = atomic_read(&mfp->multiversion) && (txn != NULL); + mf_offset = R_OFFSET(dbmp->reginfo, mfp); + alloc_bhp = bhp = oldest_bhp = NULL; + read_lsnp = NULL; + td = NULL; + hp = NULL; + b_incr = b_lock = h_locked = extending = makecopy = ret = 0; + + if (LF_ISSET(DB_MPOOL_DIRTY)) { + if (F_ISSET(dbmfp, MP_READONLY)) { + __db_errx(env, DB_STR_A("3021", + "%s: dirty flag set for readonly file page", + "%s"), __memp_fn(dbmfp)); + return (EINVAL); + } + if ((ret = __db_fcchk(env, "DB_MPOOLFILE->get", + flags, DB_MPOOL_DIRTY, DB_MPOOL_EDIT)) != 0) + return (ret); + } + + dirty = LF_ISSET(DB_MPOOL_DIRTY | DB_MPOOL_EDIT | DB_MPOOL_FREE); + LF_CLR(DB_MPOOL_DIRTY | DB_MPOOL_EDIT); + + /* + * If the transaction is being used to update a multiversion database + * for the first time, set the read LSN. In addition, if this is an + * update, allocate a mutex. If no transaction has been supplied, that + * will be caught later, when we know whether one is required. + */ + if (mvcc && txn != NULL && txn->td != NULL) { + /* We're only interested in the ultimate parent transaction. */ + while (txn->parent != NULL) + txn = txn->parent; + td = (TXN_DETAIL *)txn->td; + if (F_ISSET(txn, TXN_SNAPSHOT)) { + read_lsnp = &td->read_lsn; + if (IS_MAX_LSN(*read_lsnp) && + (ret = __log_current_lsn_int(env, read_lsnp, + NULL, NULL)) != 0) + return (ret); + } + if ((dirty || LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW)) && + td->mvcc_mtx == MUTEX_INVALID && (ret = + __mutex_alloc(env, MTX_TXN_MVCC, 0, &td->mvcc_mtx)) != 0) + return (ret); + } + + switch (flags) { + case DB_MPOOL_LAST: + /* Get the last page number in the file. */ + MUTEX_LOCK(env, mfp->mutex); + *pgnoaddr = mfp->last_pgno; + MUTEX_UNLOCK(env, mfp->mutex); + break; + case DB_MPOOL_NEW: + /* + * If always creating a page, skip the first search + * of the hash bucket. + */ + goto newpg; + case DB_MPOOL_CREATE: + default: + break; + } + + /* + * If mmap'ing the file and the page is not past the end of the file, + * just return a pointer. We can't use R_ADDR here: this is an offset + * into an mmap'd file, not a shared region, and doesn't change for + * private environments. + * + * The page may be past the end of the file, so check the page number + * argument against the original length of the file. If we previously + * returned pages past the original end of the file, last_pgno will + * have been updated to match the "new" end of the file, and checking + * against it would return pointers past the end of the mmap'd region. + * + * If another process has opened the file for writing since we mmap'd + * it, we will start playing the game by their rules, i.e. everything + * goes through the cache. All pages previously returned will be safe, + * as long as the correct locking protocol was observed. + * + * We don't discard the map because we don't know when all of the + * pages will have been discarded from the process' address space. + * It would be possible to do so by reference counting the open + * pages from the mmap, but it's unclear to me that it's worth it. + */ + if (dbmfp->addr != NULL && + F_ISSET(mfp, MP_CAN_MMAP) && *pgnoaddr <= mfp->orig_last_pgno) { + *(void **)addrp = (u_int8_t *)dbmfp->addr + + (*pgnoaddr * mfp->pagesize); + STAT_INC_VERB(env, + mpool, map, mfp->stat.st_map, __memp_fn(dbmfp), *pgnoaddr); + return (0); + } + + /* + * Determine the cache and hash bucket where this page lives and get + * local pointers to them. Reset on each pass through this code, the + * page number can change. + */ + MP_GET_BUCKET(env, mfp, *pgnoaddr, &infop, hp, bucket, ret); + if (ret != 0) + return (ret); + c_mp = infop->primary; + + if (0) { + /* if we search again, get an exclusive lock. */ +retry: MUTEX_LOCK(env, hp->mtx_hash); + } + + /* Search the hash chain for the page. */ + st_hsearch = 0; + h_locked = 1; + SH_TAILQ_FOREACH(bhp, &hp->hash_bucket, hq, __bh) { + ++st_hsearch; + if (bhp->pgno != *pgnoaddr || bhp->mf_offset != mf_offset) + continue; + + /* Snapshot reads -- get the version visible at read_lsn. */ + if (read_lsnp != NULL) { + while (bhp != NULL && + !BH_OWNED_BY(env, bhp, txn) && + !BH_VISIBLE(env, bhp, read_lsnp, vlsn)) + bhp = SH_CHAIN_PREV(bhp, vc, __bh); + + /* + * We can get a null bhp if we are looking for a + * page that was created after the transaction was + * started so its not visible (i.e. page added to + * the BTREE in a subsequent txn). + */ + if (bhp == NULL) { + ret = DB_PAGE_NOTFOUND; + goto err; + } + } + + makecopy = mvcc && dirty && !BH_OWNED_BY(env, bhp, txn); + + /* + * Increment the reference count. This signals that the + * buffer may not be discarded. We must drop the hash + * mutex before we lock the buffer mutex. + */ + if (BH_REFCOUNT(bhp) == UINT16_MAX) { + __db_errx(env, DB_STR_A("3022", + "%s: page %lu: reference count overflow", + "%s %lu"), __memp_fn(dbmfp), (u_long)bhp->pgno); + ret = __env_panic(env, EINVAL); + goto err; + } + atomic_inc(env, &bhp->ref); + b_incr = 1; + + /* + * Lock the buffer. If the page is being read in or modified it + * will be exclusively locked and we will block. + */ + MUTEX_UNLOCK(env, hp->mtx_hash); + h_locked = 0; + if (dirty || extending || makecopy || F_ISSET(bhp, BH_FROZEN)) { +xlatch: if (LF_ISSET(DB_MPOOL_TRY)) { + if ((ret = + MUTEX_TRYLOCK(env, bhp->mtx_buf)) != 0) + goto err; + } else + MUTEX_LOCK(env, bhp->mtx_buf); + F_SET(bhp, BH_EXCLUSIVE); + } else if (LF_ISSET(DB_MPOOL_TRY)) { + if ((ret = MUTEX_TRY_READLOCK(env, bhp->mtx_buf)) != 0) + goto err; + } else + MUTEX_READLOCK(env, bhp->mtx_buf); + +#ifdef HAVE_SHARED_LATCHES + /* + * If buffer is still in transit once we have a shared latch, + * upgrade to an exclusive latch. + */ + if (F_ISSET(bhp, BH_FREED | BH_TRASH) && + !F_ISSET(bhp, BH_EXCLUSIVE)) { + MUTEX_UNLOCK(env, bhp->mtx_buf); + goto xlatch; + } +#else + F_SET(bhp, BH_EXCLUSIVE); +#endif + b_lock = 1; + + /* + * If the buffer was frozen before we waited for any I/O to + * complete and is still frozen, we will need to thaw it. + * Otherwise, it was thawed while we waited, and we need to + * search again. + */ + if (F_ISSET(bhp, BH_THAWED)) { +thawed: need_free = (atomic_dec(env, &bhp->ref) == 0); + b_incr = 0; + MUTEX_UNLOCK(env, bhp->mtx_buf); + b_lock = 0; + if (need_free) { + MPOOL_REGION_LOCK(env, infop); + SH_TAILQ_INSERT_TAIL(&c_mp->free_frozen, + bhp, hq); + MPOOL_REGION_UNLOCK(env, infop); + } + bhp = NULL; + goto retry; + } + + /* + * If the buffer we wanted was frozen or thawed while we + * waited, we need to start again. That is indicated by + * a new buffer header in the version chain owned by the same + * transaction as the one we pinned. + * + * Also, if we're doing an unversioned read on a multiversion + * file, another thread may have dirtied this buffer while we + * swapped from the hash bucket lock to the buffer lock. + */ + if (SH_CHAIN_HASNEXT(bhp, vc) && + (SH_CHAIN_NEXTP(bhp, vc, __bh)->td_off == bhp->td_off || + (!dirty && read_lsnp == NULL))) { + DB_ASSERT(env, b_incr && BH_REFCOUNT(bhp) != 0); + atomic_dec(env, &bhp->ref); + b_incr = 0; + MUTEX_UNLOCK(env, bhp->mtx_buf); + b_lock = 0; + bhp = NULL; + goto retry; + } else if (dirty && SH_CHAIN_HASNEXT(bhp, vc)) { + ret = DB_LOCK_DEADLOCK; + goto err; + } else if (F_ISSET(bhp, BH_FREED) && flags != DB_MPOOL_CREATE && + flags != DB_MPOOL_NEW && flags != DB_MPOOL_FREE) { + ret = DB_PAGE_NOTFOUND; + goto err; + } + + /* Is it worthwhile to publish oh-so-frequent cache hits? */ + STAT_INC_VERB(env, mpool, hit, + mfp->stat.st_cache_hit, __memp_fn(dbmfp), *pgnoaddr); + break; + } + +#ifdef HAVE_STATISTICS + /* + * Update the hash bucket search statistics -- do now because our next + * search may be for a different bucket. Are these too frequent also? + */ + STAT_INC_VERB(env, mpool, hash_search, + c_mp->stat.st_hash_searches, __memp_fn(dbmfp), *pgnoaddr); + if (st_hsearch > c_mp->stat.st_hash_longest) + STAT_SET_VERB(env, mpool, hash_longest, + c_mp->stat.st_hash_longest, + st_hsearch, __memp_fn(dbmfp), *pgnoaddr); + STAT_ADJUST_VERB(env, mpool, hash_examined, c_mp->stat.st_hash_searches, + st_hsearch, __memp_fn(dbmfp), *pgnoaddr); +#endif + + /* + * There are 4 possible paths to this location: + * + * FIRST_MISS: + * Didn't find the page in the hash bucket on our first pass: + * bhp == NULL, alloc_bhp == NULL + * + * FIRST_FOUND: + * Found the page in the hash bucket on our first pass: + * bhp != NULL, alloc_bhp == NULL + * + * SECOND_FOUND: + * Didn't find the page in the hash bucket on the first pass, + * allocated space, and found the page in the hash bucket on + * our second pass: + * bhp != NULL, alloc_bhp != NULL + * + * SECOND_MISS: + * Didn't find the page in the hash bucket on the first pass, + * allocated space, and didn't find the page in the hash bucket + * on our second pass: + * bhp == NULL, alloc_bhp != NULL + */ + state = bhp == NULL ? + (alloc_bhp == NULL ? FIRST_MISS : SECOND_MISS) : + (alloc_bhp == NULL ? FIRST_FOUND : SECOND_FOUND); + + switch (state) { + case FIRST_FOUND: + /* + * If we are to free the buffer, then this had better be the + * only reference. If so, just free the buffer. If not, + * complain and get out. + */ + if (flags == DB_MPOOL_FREE) { +freebuf: MUTEX_LOCK(env, hp->mtx_hash); + h_locked = 1; + if (F_ISSET(bhp, BH_DIRTY)) { + F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE); + DB_ASSERT(env, + atomic_read(&hp->hash_page_dirty) > 0); + atomic_dec(env, &hp->hash_page_dirty); + } + + /* + * If the buffer we found is already freed, we're done. + * If the ref count is not 1 then someone may be + * peeking at the buffer. We cannot free it until they + * determine that it is not what they want. Clear the + * buffer so that waiting threads get an empty page. + */ + if (F_ISSET(bhp, BH_FREED)) + goto done; + else if (BH_REFCOUNT(bhp) != 1 || + !SH_CHAIN_SINGLETON(bhp, vc)) { + /* + * Create an empty page in the chain for + * subsequent gets. Otherwise, a thread that + * re-creates this page while it is still in + * cache will see stale data. + */ + F_SET(bhp, BH_FREED); + F_CLR(bhp, BH_TRASH); + } else if (F_ISSET(bhp, BH_FROZEN)) { + /* + * Freeing a singleton frozen buffer: just free + * it. This call will release the hash bucket + * mutex. + */ + ret = + __memp_bh_thaw(dbmp, infop, hp, bhp, NULL); + bhp = NULL; + b_incr = b_lock = h_locked = 0; + } else { + ret = __memp_bhfree(dbmp, infop, mfp, + hp, bhp, BH_FREE_FREEMEM); + bhp = NULL; + b_incr = b_lock = h_locked = 0; + } + goto done; + } else if (F_ISSET(bhp, BH_FREED | BH_TRASH)) { +revive: DB_ASSERT(env, F_ISSET(bhp, BH_TRASH) || + flags == DB_MPOOL_CREATE || flags == DB_MPOOL_NEW); + if (F_ISSET(bhp, BH_FREED)) + makecopy = makecopy || + (mvcc && !BH_OWNED_BY(env, bhp, txn)) || + F_ISSET(bhp, BH_FROZEN); + if (flags == DB_MPOOL_CREATE) { + MUTEX_LOCK(env, mfp->mutex); + if (*pgnoaddr > mfp->last_pgno) + mfp->last_pgno = *pgnoaddr; + MUTEX_UNLOCK(env, mfp->mutex); + } + } + if (mvcc) { + /* + * With multiversion databases, we might need to + * allocate a new buffer into which we can copy the one + * that we found. In that case, check the last buffer + * in the chain to see whether we can reuse an obsolete + * buffer. + * + * To provide snapshot isolation, we need to make sure + * that we've seen a buffer older than the oldest + * snapshot read LSN. + */ +reuse: if ((makecopy || F_ISSET(bhp, BH_FROZEN)) && + !h_locked) { + MUTEX_LOCK(env, hp->mtx_hash); + h_locked = 1; + } + if ((makecopy || F_ISSET(bhp, BH_FROZEN)) && + SH_CHAIN_HASPREV(bhp, vc)) { + oldest_bhp = SH_CHAIN_PREVP(bhp, vc, __bh); + while (SH_CHAIN_HASPREV(oldest_bhp, vc)) + oldest_bhp = SH_CHAIN_PREVP( + oldest_bhp, vc, __bh); + + if (BH_REFCOUNT(oldest_bhp) == 0 && + !BH_OBSOLETE( + oldest_bhp, hp->old_reader, vlsn) && + (ret = __txn_oldest_reader(env, + &hp->old_reader)) != 0) + goto err; + + if (BH_OBSOLETE( + oldest_bhp, hp->old_reader, vlsn) && + BH_REFCOUNT(oldest_bhp) == 0) { + DB_ASSERT(env, + !F_ISSET(oldest_bhp, BH_DIRTY)); + atomic_inc(env, &oldest_bhp->ref); + if (F_ISSET(oldest_bhp, BH_FROZEN)) { + /* + * This call will release the + * hash bucket mutex. + */ + ret = __memp_bh_thaw(dbmp, + infop, hp, oldest_bhp, + NULL); + h_locked = 0; + if (ret != 0) + goto err; + goto reuse; + } + if ((ret = __memp_bhfree(dbmp, + infop, mfp, hp, oldest_bhp, + BH_FREE_REUSE)) != 0) + goto err; + alloc_bhp = oldest_bhp; + h_locked = 0; + } + + DB_ASSERT(env, alloc_bhp == NULL || + !F_ISSET(alloc_bhp, BH_FROZEN)); + } + } + + /* We found the buffer or we're ready to copy -- we're done. */ + if (!(makecopy || F_ISSET(bhp, BH_FROZEN)) || alloc_bhp != NULL) + break; + + /* FALLTHROUGH */ + case FIRST_MISS: + /* + * We didn't find the buffer in our first check. Figure out + * if the page exists, and allocate structures so we can add + * the page to the buffer pool. + */ + if (h_locked) + MUTEX_UNLOCK(env, hp->mtx_hash); + h_locked = 0; + + /* + * The buffer is not in the pool, so we don't need to free it. + */ + if (LF_ISSET(DB_MPOOL_FREE) && + (bhp == NULL || F_ISSET(bhp, BH_FREED) || !makecopy)) + goto done; + + if (bhp != NULL) + goto alloc; + +newpg: /* + * If DB_MPOOL_NEW is set, we have to allocate a page number. + * If neither DB_MPOOL_CREATE or DB_MPOOL_NEW is set, then + * it's an error to try and get a page past the end of file. + */ + DB_ASSERT(env, !h_locked); + MUTEX_LOCK(env, mfp->mutex); + switch (flags) { + case DB_MPOOL_NEW: + extending = 1; + if (mfp->maxpgno != 0 && + mfp->last_pgno >= mfp->maxpgno) { + __db_errx(env, DB_STR_A("3023", + "%s: file limited to %lu pages", "%s %lu"), + __memp_fn(dbmfp), (u_long)mfp->maxpgno); + ret = ENOSPC; + } else + *pgnoaddr = mfp->last_pgno + 1; + break; + case DB_MPOOL_CREATE: + if (mfp->maxpgno != 0 && *pgnoaddr > mfp->maxpgno) { + __db_errx(env, DB_STR_A("3024", + "%s: file limited to %lu pages", "%s %lu"), + __memp_fn(dbmfp), (u_long)mfp->maxpgno); + ret = ENOSPC; + } else if (!extending) + extending = *pgnoaddr > mfp->last_pgno; + break; + default: + ret = *pgnoaddr > mfp->last_pgno ? DB_PAGE_NOTFOUND : 0; + break; + } + MUTEX_UNLOCK(env, mfp->mutex); + if (ret != 0) + goto err; + + /* + * !!! + * In the DB_MPOOL_NEW code path, hp, infop and c_mp have + * not yet been initialized. + */ + if (hp == NULL) { + MP_GET_BUCKET(env, + mfp, *pgnoaddr, &infop, hp, bucket, ret); + if (ret != 0) + goto err; + MUTEX_UNLOCK(env, hp->mtx_hash); + c_mp = infop->primary; + } + +alloc: /* Allocate a new buffer header and data space. */ + if (alloc_bhp == NULL && (ret = + __memp_alloc(dbmp, infop, mfp, 0, NULL, &alloc_bhp)) != 0) + goto err; + + /* Initialize enough so we can call __memp_bhfree. */ + alloc_bhp->flags = 0; + atomic_init(&alloc_bhp->ref, 1); +#ifdef DIAGNOSTIC + if ((uintptr_t)alloc_bhp->buf & (sizeof(size_t) - 1)) { + __db_errx(env, DB_STR("3025", + "DB_MPOOLFILE->get: buffer data is NOT size_t aligned")); + ret = __env_panic(env, EINVAL); + goto err; + } +#endif + + /* + * If we're doing copy-on-write, we will already have the + * buffer header. In that case, we don't need to search again. + */ + if (bhp != NULL) + break; + + /* + * If we are extending the file, we'll need the mfp lock + * again. + */ + if (extending) + MUTEX_LOCK(env, mfp->mutex); + + /* + * DB_MPOOL_NEW does not guarantee you a page unreferenced by + * any other thread of control. (That guarantee is interesting + * for DB_MPOOL_NEW, unlike DB_MPOOL_CREATE, because the caller + * did not specify the page number, and so, may reasonably not + * have any way to lock the page outside of mpool.) Regardless, + * if we allocate the page, and some other thread of control + * requests the page by number, we will not detect that and the + * thread of control that allocated using DB_MPOOL_NEW may not + * have a chance to initialize the page. (Note: we *could* + * detect this case if we set a flag in the buffer header which + * guaranteed that no gets of the page would succeed until the + * reference count went to 0, that is, until the creating page + * put the page.) What we do guarantee is that if two threads + * of control are both doing DB_MPOOL_NEW calls, they won't + * collide, that is, they won't both get the same page. + * + * There's a possibility that another thread allocated the page + * we were planning to allocate while we were off doing buffer + * allocation. We can do that by making sure the page number + * we were going to use is still available. If it's not, then + * we check to see if the next available page number hashes to + * the same mpool region as the old one -- if it does, we can + * continue, otherwise, we have to start over. + */ + if (flags == DB_MPOOL_NEW && *pgnoaddr != mfp->last_pgno + 1) { + *pgnoaddr = mfp->last_pgno + 1; + MP_GET_REGION(dbmfp, *pgnoaddr, &t_infop, ret); + if (ret != 0) + goto err; + if (t_infop != infop) { + /* + * flags == DB_MPOOL_NEW, so extending is set + * and we're holding the mfp locked. + */ + MUTEX_UNLOCK(env, mfp->mutex); + hp = NULL; + goto newpg; + } + } + + /* + * We released the mfp lock, so another thread might have + * extended the file. Update the last_pgno and initialize + * the file, as necessary, if we extended the file. + */ + if (extending) { + if (*pgnoaddr > mfp->last_pgno) + mfp->last_pgno = *pgnoaddr; + else + extending = 0; + MUTEX_UNLOCK(env, mfp->mutex); + if (ret != 0) + goto err; + } + goto retry; + case SECOND_FOUND: + /* + * We allocated buffer space for the requested page, but then + * found the page in the buffer cache on our second check. + * That's OK -- we can use the page we found in the pool, + * unless DB_MPOOL_NEW is set. If we're about to copy-on-write, + * this is exactly the situation we want. + * + * For multiversion files, we may have left some pages in cache + * beyond the end of a file after truncating. In that case, we + * would get to here with extending set. If so, we need to + * insert the new page in the version chain similar to when + * we copy on write. + */ + if (F_ISSET(bhp, BH_FREED) && + (flags == DB_MPOOL_NEW || flags == DB_MPOOL_CREATE)) + goto revive; + else if (flags == DB_MPOOL_FREE) + goto freebuf; + else if (makecopy || F_ISSET(bhp, BH_FROZEN)) + break; + + /* + * We can't use the page we found in the pool if DB_MPOOL_NEW + * was set. (For details, see the above comment beginning + * "DB_MPOOL_NEW does not guarantee you a page unreferenced by + * any other thread of control".) If DB_MPOOL_NEW is set, we + * release our pin on this particular buffer, and try to get + * another one. + */ + if (flags == DB_MPOOL_NEW) { + DB_ASSERT(env, b_incr && BH_REFCOUNT(bhp) != 0); + atomic_dec(env, &bhp->ref); + b_incr = 0; + if (F_ISSET(bhp, BH_EXCLUSIVE)) + F_CLR(bhp, BH_EXCLUSIVE); + MUTEX_UNLOCK(env, bhp->mtx_buf); + b_lock = 0; + bhp = NULL; + hp = NULL; + goto newpg; + } + + break; + case SECOND_MISS: + /* + * We allocated buffer space for the requested page, and found + * the page still missing on our second pass through the buffer + * cache. Instantiate the page. + */ + DB_ASSERT(env, alloc_bhp != NULL); + bhp = alloc_bhp; + alloc_bhp = NULL; + + /* + * Initialize all the BH and hash bucket fields so we can call + * __memp_bhfree if an error occurs. + * + * Append the buffer to the tail of the bucket list. + */ + bhp->priority = MPOOL_LRU_REDZONE; + bhp->pgno = *pgnoaddr; + bhp->mf_offset = mf_offset; + bhp->bucket = bucket; + bhp->region = (int)(infop - dbmp->reginfo); + bhp->td_off = INVALID_ROFF; + SH_CHAIN_INIT(bhp, vc); + bhp->flags = 0; + + /* + * Reference the buffer and lock exclusive. We either + * need to read the buffer or create it from scratch + * and don't want anyone looking at it till we do. + */ + MUTEX_LOCK(env, bhp->mtx_buf); + b_lock = 1; + F_SET(bhp, BH_EXCLUSIVE); + b_incr = 1; + + /* We created a new page, it starts dirty. */ + if (extending) { + atomic_inc(env, &hp->hash_page_dirty); + F_SET(bhp, BH_DIRTY | BH_DIRTY_CREATE); + } + + MUTEX_REQUIRED(env, hp->mtx_hash); + SH_TAILQ_INSERT_HEAD(&hp->hash_bucket, bhp, hq, __bh); + MUTEX_UNLOCK(env, hp->mtx_hash); + h_locked = 0; + + /* + * If we created the page, zero it out. If we didn't create + * the page, read from the backing file. + * + * !!! + * DB_MPOOL_NEW doesn't call the pgin function. + * + * If DB_MPOOL_CREATE is used, then the application's pgin + * function has to be able to handle pages of 0's -- if it + * uses DB_MPOOL_NEW, it can detect all of its page creates, + * and not bother. + * + * If we're running in diagnostic mode, smash any bytes on the + * page that are unknown quantities for the caller. + * + * Otherwise, read the page into memory, optionally creating it + * if DB_MPOOL_CREATE is set. + */ + if (extending) { + MVCC_MPROTECT(bhp->buf, mfp->pagesize, + PROT_READ | PROT_WRITE); + memset(bhp->buf, 0, + (mfp->clear_len == DB_CLEARLEN_NOTSET) ? + mfp->pagesize : mfp->clear_len); +#if defined(DIAGNOSTIC) || defined(UMRW) + if (mfp->clear_len != DB_CLEARLEN_NOTSET) + memset(bhp->buf + mfp->clear_len, CLEAR_BYTE, + mfp->pagesize - mfp->clear_len); +#endif + + if (flags == DB_MPOOL_CREATE && mfp->ftype != 0 && + (ret = __memp_pg(dbmfp, + bhp->pgno, bhp->buf, 1)) != 0) + goto err; + + STAT_INC_VERB(env, mpool, page_create, + mfp->stat.st_page_create, + __memp_fn(dbmfp), *pgnoaddr); + } else { + F_SET(bhp, BH_TRASH); + STAT_INC_VERB(env, mpool, miss, mfp->stat.st_cache_miss, + __memp_fn(dbmfp), *pgnoaddr); + } + + makecopy = mvcc && dirty && !extending; + + /* Increment buffer count referenced by MPOOLFILE. */ + MUTEX_LOCK(env, mfp->mutex); + ++mfp->block_cnt; + MUTEX_UNLOCK(env, mfp->mutex); + } + + DB_ASSERT(env, bhp != NULL && BH_REFCOUNT(bhp) != 0 && b_lock); + DB_ASSERT(env, !F_ISSET(bhp, BH_FROZEN) || !F_ISSET(bhp, BH_FREED) || + makecopy); + + /* We've got a buffer header we're re-instantiating. */ + if (F_ISSET(bhp, BH_FROZEN) && !F_ISSET(bhp, BH_FREED)) { + if (alloc_bhp == NULL) + goto reuse; + + /* + * To thaw the buffer, we must hold the hash bucket mutex, + * and the call to __memp_bh_thaw will release it. + */ + if (h_locked == 0) + MUTEX_LOCK(env, hp->mtx_hash); + h_locked = 1; + + /* + * If the empty buffer has been filled in the meantime, don't + * overwrite it. + */ + if (F_ISSET(bhp, BH_THAWED)) { + MUTEX_UNLOCK(env, hp->mtx_hash); + h_locked = 0; + goto thawed; + } + + ret = __memp_bh_thaw(dbmp, infop, hp, bhp, alloc_bhp); + bhp = NULL; + b_lock = h_locked = 0; + if (ret != 0) + goto err; + bhp = alloc_bhp; + alloc_bhp = NULL; + MUTEX_REQUIRED(env, bhp->mtx_buf); + b_incr = b_lock = 1; + } + + /* + * BH_TRASH -- + * The buffer we found may need to be filled from the disk. + * + * It's possible for the read function to fail, which means we fail + * as well. Discard the buffer on failure unless another thread + * is waiting on our I/O to complete. It's OK to leave the buffer + * around, as the waiting thread will see the BH_TRASH flag set, + * and will also attempt to discard it. If there's a waiter, + * we need to decrement our reference count. + */ + if (F_ISSET(bhp, BH_TRASH) && + flags != DB_MPOOL_FREE && !F_ISSET(bhp, BH_FREED)) { + MVCC_MPROTECT(bhp->buf, mfp->pagesize, + PROT_READ | PROT_WRITE); + if ((ret = __memp_pgread(dbmfp, + bhp, LF_ISSET(DB_MPOOL_CREATE) ? 1 : 0)) != 0) + goto err; + DB_ASSERT(env, read_lsnp != NULL || !SH_CHAIN_HASNEXT(bhp, vc)); + } + + /* Copy-on-write. */ + if (makecopy) { + /* + * If we read a page from disk that we want to modify, we now + * need to make copy, so we now need to allocate another buffer + * to hold the new copy. + */ + if (alloc_bhp == NULL) + goto reuse; + + DB_ASSERT(env, bhp != NULL && alloc_bhp != bhp); + DB_ASSERT(env, bhp->td_off == INVALID_ROFF || + !IS_MAX_LSN(*VISIBLE_LSN(env, bhp)) || + (F_ISSET(bhp, BH_FREED) && F_ISSET(bhp, BH_FROZEN))); + DB_ASSERT(env, txn != NULL || + (F_ISSET(bhp, BH_FROZEN) && F_ISSET(bhp, BH_FREED))); + DB_ASSERT(env, (extending || flags == DB_MPOOL_FREE || + F_ISSET(bhp, BH_FREED)) || + !F_ISSET(bhp, BH_FROZEN | BH_TRASH)); + MUTEX_REQUIRED(env, bhp->mtx_buf); + + if (BH_REFCOUNT(bhp) == 1) + MVCC_MPROTECT(bhp->buf, mfp->pagesize, + PROT_READ); + + atomic_init(&alloc_bhp->ref, 1); + MUTEX_LOCK(env, alloc_bhp->mtx_buf); + alloc_bhp->priority = bhp->priority; + alloc_bhp->pgno = bhp->pgno; + alloc_bhp->bucket = bhp->bucket; + alloc_bhp->region = bhp->region; + alloc_bhp->mf_offset = bhp->mf_offset; + alloc_bhp->td_off = INVALID_ROFF; + if (txn == NULL) { + DB_ASSERT(env, + F_ISSET(bhp, BH_FROZEN) && F_ISSET(bhp, BH_FREED)); + if (bhp->td_off != INVALID_ROFF && (ret = + __memp_bh_settxn(dbmp, mfp, alloc_bhp, + BH_OWNER(env, bhp))) != 0) + goto err; + } else if ((ret = + __memp_bh_settxn(dbmp, mfp, alloc_bhp, td)) != 0) + goto err; + MVCC_MPROTECT(alloc_bhp->buf, mfp->pagesize, + PROT_READ | PROT_WRITE); + if (extending || + F_ISSET(bhp, BH_FREED) || flags == DB_MPOOL_FREE) { + memset(alloc_bhp->buf, 0, + (mfp->clear_len == DB_CLEARLEN_NOTSET) ? + mfp->pagesize : mfp->clear_len); +#if defined(DIAGNOSTIC) || defined(UMRW) + if (mfp->clear_len != DB_CLEARLEN_NOTSET) + memset(alloc_bhp->buf + mfp->clear_len, + CLEAR_BYTE, + mfp->pagesize - mfp->clear_len); +#endif + if (mfp->ftype != 0 && (ret = __memp_pg(dbmfp, + alloc_bhp->pgno, alloc_bhp->buf, 1)) != 0) + goto err; + } else + memcpy(alloc_bhp->buf, bhp->buf, mfp->pagesize); + MVCC_MPROTECT(alloc_bhp->buf, mfp->pagesize, 0); + + if (h_locked == 0) + MUTEX_LOCK(env, hp->mtx_hash); + MUTEX_REQUIRED(env, hp->mtx_hash); + h_locked = 1; + + alloc_bhp->flags = BH_EXCLUSIVE | + ((flags == DB_MPOOL_FREE) ? BH_FREED : + F_ISSET(bhp, BH_DIRTY | BH_DIRTY_CREATE)); + DB_ASSERT(env, flags != DB_MPOOL_FREE || + !F_ISSET(bhp, BH_DIRTY)); + F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE); + DB_ASSERT(env, !SH_CHAIN_HASNEXT(bhp, vc)); + SH_CHAIN_INSERT_AFTER(bhp, alloc_bhp, vc, __bh); + SH_TAILQ_INSERT_BEFORE(&hp->hash_bucket, + bhp, alloc_bhp, hq, __bh); + SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh); + MUTEX_UNLOCK(env, hp->mtx_hash); + h_locked = 0; + DB_ASSERT(env, b_incr && BH_REFCOUNT(bhp) > 0); + if (atomic_dec(env, &bhp->ref) == 0) { + bhp->priority = c_mp->lru_priority; + MVCC_MPROTECT(bhp->buf, mfp->pagesize, 0); + } + F_CLR(bhp, BH_EXCLUSIVE); + MUTEX_UNLOCK(env, bhp->mtx_buf); + + bhp = alloc_bhp; + DB_ASSERT(env, BH_REFCOUNT(bhp) > 0); + b_incr = 1; + MUTEX_REQUIRED(env, bhp->mtx_buf); + b_lock = 1; + + if (alloc_bhp != oldest_bhp) { + MUTEX_LOCK(env, mfp->mutex); + ++mfp->block_cnt; + MUTEX_UNLOCK(env, mfp->mutex); + } + + alloc_bhp = NULL; + } else if (mvcc && extending && + (ret = __memp_bh_settxn(dbmp, mfp, bhp, td)) != 0) + goto err; + + if (flags == DB_MPOOL_FREE) { + DB_ASSERT(env, !SH_CHAIN_HASNEXT(bhp, vc)); + /* If we have created an empty buffer, it is not returned. */ + if (!F_ISSET(bhp, BH_FREED)) + goto freebuf; + goto done; + } + + /* + * Free the allocated memory, we no longer need it. + */ + if (alloc_bhp != NULL) { + if ((ret = __memp_bhfree(dbmp, infop, NULL, + NULL, alloc_bhp, BH_FREE_FREEMEM | BH_FREE_UNLOCKED)) != 0) + goto err; + alloc_bhp = NULL; + } + + if (dirty || extending || + (F_ISSET(bhp, BH_FREED) && + (flags == DB_MPOOL_CREATE || flags == DB_MPOOL_NEW))) { + MUTEX_REQUIRED(env, bhp->mtx_buf); + if (F_ISSET(bhp, BH_FREED)) { + DB_ASSERT(env, bhp->pgno <= mfp->last_pgno); + memset(bhp->buf, 0, + (mfp->clear_len == DB_CLEARLEN_NOTSET) ? + mfp->pagesize : mfp->clear_len); + F_CLR(bhp, BH_FREED); + if (mfp->ftype != 0 && (ret = + __memp_pg(dbmfp, bhp->pgno, bhp->buf, 1)) != 0) + goto err; + } + if (!F_ISSET(bhp, BH_DIRTY)) { +#ifdef DIAGNOSTIC + MUTEX_LOCK(env, hp->mtx_hash); +#endif + DB_ASSERT(env, !SH_CHAIN_HASNEXT(bhp, vc)); + atomic_inc(env, &hp->hash_page_dirty); + F_SET(bhp, BH_DIRTY); +#ifdef DIAGNOSTIC + MUTEX_UNLOCK(env, hp->mtx_hash); +#endif + } + } else if (F_ISSET(bhp, BH_EXCLUSIVE)) { + F_CLR(bhp, BH_EXCLUSIVE); +#ifdef HAVE_SHARED_LATCHES + MUTEX_UNLOCK(env, bhp->mtx_buf); + MUTEX_READLOCK(env, bhp->mtx_buf); + /* + * If another thread has dirtied the page while we + * switched locks, we have to go through it all again. + */ + if (SH_CHAIN_HASNEXT(bhp, vc) && read_lsnp == NULL) { + atomic_dec(env, &bhp->ref); + b_incr = 0; + MUTEX_UNLOCK(env, bhp->mtx_buf); + b_lock = 0; + bhp = NULL; + goto retry; + } +#endif + } + + MVCC_MPROTECT(bhp->buf, mfp->pagesize, PROT_READ | + (dirty || extending || F_ISSET(bhp, BH_DIRTY) ? + PROT_WRITE : 0)); + +#ifdef DIAGNOSTIC + MUTEX_LOCK(env, hp->mtx_hash); + { + BH *next_bhp = SH_CHAIN_NEXT(bhp, vc, __bh); + + DB_ASSERT(env, !atomic_read(&mfp->multiversion) || read_lsnp != NULL || + next_bhp == NULL); + DB_ASSERT(env, !mvcc || read_lsnp == NULL || + bhp->td_off == INVALID_ROFF || BH_OWNED_BY(env, bhp, txn) || + (BH_VISIBLE(env, bhp, read_lsnp, vlsn) && + (next_bhp == NULL || F_ISSET(next_bhp, BH_FROZEN) || + (next_bhp->td_off != INVALID_ROFF && + (BH_OWNER(env, next_bhp)->status != TXN_COMMITTED || + IS_ZERO_LSN(BH_OWNER(env, next_bhp)->last_lsn) || + !BH_VISIBLE(env, next_bhp, read_lsnp, vlsn)))))); + } + MUTEX_UNLOCK(env, hp->mtx_hash); +#endif + + /* + * Record this pin for this thread. Holding the page pinned + * without recording the pin is ok since we do not recover from + * a death from within the library itself. + */ + if (ip != NULL) { + reginfo = env->reginfo; + if (ip->dbth_pincount == ip->dbth_pinmax) { + pinmax = ip->dbth_pinmax; + renv = reginfo->primary; + MUTEX_LOCK(env, renv->mtx_regenv); + if ((ret = __env_alloc(reginfo, + 2 * pinmax * sizeof(PIN_LIST), &list)) != 0) { + MUTEX_UNLOCK(env, renv->mtx_regenv); + goto err; + } + + memcpy(list, R_ADDR(reginfo, ip->dbth_pinlist), + pinmax * sizeof(PIN_LIST)); + memset(&list[pinmax], 0, pinmax * sizeof(PIN_LIST)); + list_off = R_OFFSET(reginfo, list); + list = R_ADDR(reginfo, ip->dbth_pinlist); + ip->dbth_pinmax = 2 * pinmax; + ip->dbth_pinlist = list_off; + if (list != ip->dbth_pinarray) + __env_alloc_free(reginfo, list); + MUTEX_UNLOCK(env, renv->mtx_regenv); + } + list = R_ADDR(reginfo, ip->dbth_pinlist); + for (lp = list; lp < &list[ip->dbth_pinmax]; lp++) + if (lp->b_ref == INVALID_ROFF) + break; + + ip->dbth_pincount++; + lp->b_ref = R_OFFSET(infop, bhp); + lp->region = (int)(infop - dbmp->reginfo); +#ifdef DIAGNOSTIC + if (dirty && ip->dbth_locker != INVALID_ROFF && + ip->dbth_check_off == 0) { + lt = env->lk_handle; + locker = (DB_LOCKER *) + (R_ADDR(<->reginfo, ip->dbth_locker)); + DB_ASSERT(env, __db_has_pagelock(env, locker, dbmfp, + (PAGE*)bhp->buf, DB_LOCK_WRITE) == 0); + } +#endif + + } + /* + * During recovery we can read past the end of the file. Also + * last_pgno is not versioned, so if this is an older version + * that is ok as well. + */ + DB_ASSERT(env, IS_RECOVERING(env) || + bhp->pgno <= mfp->last_pgno || !SH_CHAIN_SINGLETON(bhp, vc)); + +#ifdef DIAGNOSTIC + /* Update the file's pinned reference count. */ + MPOOL_SYSTEM_LOCK(env); + ++dbmfp->pinref; + MPOOL_SYSTEM_UNLOCK(env); + + /* + * We want to switch threads as often as possible, and at awkward + * times. Yield every time we get a new page to ensure contention. + */ + if (F_ISSET(env->dbenv, DB_ENV_YIELDCPU)) + __os_yield(env, 0, 0); +#endif + + DB_ASSERT(env, alloc_bhp == NULL); + DB_ASSERT(env, !(dirty || extending) || + atomic_read(&hp->hash_page_dirty) > 0); + DB_ASSERT(env, BH_REFCOUNT(bhp) > 0 && + !F_ISSET(bhp, BH_FREED | BH_FROZEN | BH_TRASH)); + + *(void **)addrp = bhp->buf; + return (0); + +done: +err: /* + * We should only get to here with ret == 0 if freeing a buffer. + * In that case, check that it has in fact been freed. + */ + DB_ASSERT(env, ret != 0 || flags != DB_MPOOL_FREE || bhp == NULL || + (F_ISSET(bhp, BH_FREED) && !SH_CHAIN_HASNEXT(bhp, vc))); + + if (bhp != NULL) { + if (b_incr) + atomic_dec(env, &bhp->ref); + if (b_lock) { + F_CLR(bhp, BH_EXCLUSIVE); + MUTEX_UNLOCK(env, bhp->mtx_buf); + } + } + + if (h_locked) + MUTEX_UNLOCK(env, hp->mtx_hash); + + /* If alloc_bhp is set, free the memory. */ + if (alloc_bhp != NULL) + (void)__memp_bhfree(dbmp, infop, NULL, + NULL, alloc_bhp, BH_FREE_FREEMEM | BH_FREE_UNLOCKED); + + return (ret); +} diff --git a/src/mp/mp_fmethod.c b/src/mp/mp_fmethod.c new file mode 100644 index 00000000..0773d470 --- /dev/null +++ b/src/mp/mp_fmethod.c @@ -0,0 +1,587 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" + +static int __memp_get_clear_len __P((DB_MPOOLFILE *, u_int32_t *)); +static int __memp_get_lsn_offset __P((DB_MPOOLFILE *, int32_t *)); +static int __memp_get_maxsize __P((DB_MPOOLFILE *, u_int32_t *, u_int32_t *)); +static int __memp_set_maxsize __P((DB_MPOOLFILE *, u_int32_t, u_int32_t)); +static int __memp_set_priority __P((DB_MPOOLFILE *, DB_CACHE_PRIORITY)); +static int __memp_get_last_pgno_pp __P((DB_MPOOLFILE *, db_pgno_t *)); + +/* + * __memp_fcreate_pp -- + * ENV->memp_fcreate pre/post processing. + * + * PUBLIC: int __memp_fcreate_pp __P((DB_ENV *, DB_MPOOLFILE **, u_int32_t)); + */ +int +__memp_fcreate_pp(dbenv, retp, flags) + DB_ENV *dbenv; + DB_MPOOLFILE **retp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + /* Validate arguments. */ + if ((ret = __db_fchk(env, "DB_ENV->memp_fcreate", flags, 0)) != 0) + return (ret); + + if (REP_ON(env)) { + __db_errx(env, DB_STR("3029", +"DB_ENV->memp_fcreate: method not permitted when replication is configured")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + ret = __memp_fcreate(env, retp); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __memp_fcreate -- + * ENV->memp_fcreate. + * + * PUBLIC: int __memp_fcreate __P((ENV *, DB_MPOOLFILE **)); + */ +int +__memp_fcreate(env, retp) + ENV *env; + DB_MPOOLFILE **retp; +{ + DB_MPOOLFILE *dbmfp; + int ret; + + /* Allocate and initialize the per-process structure. */ + if ((ret = __os_calloc(env, 1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0) + return (ret); + + dbmfp->ref = 1; + dbmfp->lsn_offset = DB_LSN_OFF_NOTSET; + dbmfp->env = env; + dbmfp->mfp = INVALID_ROFF; + + dbmfp->close = __memp_fclose_pp; + dbmfp->get = __memp_fget_pp; + dbmfp->get_clear_len = __memp_get_clear_len; + dbmfp->get_fileid = __memp_get_fileid; + dbmfp->get_flags = __memp_get_flags; + dbmfp->get_ftype = __memp_get_ftype; + dbmfp->get_last_pgno = __memp_get_last_pgno_pp; + dbmfp->get_lsn_offset = __memp_get_lsn_offset; + dbmfp->get_maxsize = __memp_get_maxsize; + dbmfp->get_pgcookie = __memp_get_pgcookie; + dbmfp->get_priority = __memp_get_priority; + dbmfp->open = __memp_fopen_pp; + dbmfp->put = __memp_fput_pp; + dbmfp->set_clear_len = __memp_set_clear_len; + dbmfp->set_fileid = __memp_set_fileid; + dbmfp->set_flags = __memp_set_flags; + dbmfp->set_ftype = __memp_set_ftype; + dbmfp->set_lsn_offset = __memp_set_lsn_offset; + dbmfp->set_maxsize = __memp_set_maxsize; + dbmfp->set_pgcookie = __memp_set_pgcookie; + dbmfp->set_priority = __memp_set_priority; + dbmfp->sync = __memp_fsync_pp; + + *retp = dbmfp; + return (0); +} + +/* + * __memp_get_clear_len -- + * Get the clear length. + */ +static int +__memp_get_clear_len(dbmfp, clear_lenp) + DB_MPOOLFILE *dbmfp; + u_int32_t *clear_lenp; +{ + *clear_lenp = dbmfp->clear_len; + return (0); +} + +/* + * __memp_set_clear_len -- + * DB_MPOOLFILE->set_clear_len. + * + * PUBLIC: int __memp_set_clear_len __P((DB_MPOOLFILE *, u_int32_t)); + */ +int +__memp_set_clear_len(dbmfp, clear_len) + DB_MPOOLFILE *dbmfp; + u_int32_t clear_len; +{ + MPF_ILLEGAL_AFTER_OPEN(dbmfp, "DB_MPOOLFILE->set_clear_len"); + + dbmfp->clear_len = clear_len; + return (0); +} + +/* + * __memp_get_fileid -- + * DB_MPOOLFILE->get_fileid. + * + * PUBLIC: int __memp_get_fileid __P((DB_MPOOLFILE *, u_int8_t *)); + */ +int +__memp_get_fileid(dbmfp, fileid) + DB_MPOOLFILE *dbmfp; + u_int8_t *fileid; +{ + if (!F_ISSET(dbmfp, MP_FILEID_SET)) { + __db_errx(dbmfp->env, DB_STR("3030", + "get_fileid: file ID not set")); + return (EINVAL); + } + + memcpy(fileid, dbmfp->fileid, DB_FILE_ID_LEN); + return (0); +} + +/* + * __memp_set_fileid -- + * DB_MPOOLFILE->set_fileid. + * + * PUBLIC: int __memp_set_fileid __P((DB_MPOOLFILE *, u_int8_t *)); + */ +int +__memp_set_fileid(dbmfp, fileid) + DB_MPOOLFILE *dbmfp; + u_int8_t *fileid; +{ + MPF_ILLEGAL_AFTER_OPEN(dbmfp, "DB_MPOOLFILE->set_fileid"); + + memcpy(dbmfp->fileid, fileid, DB_FILE_ID_LEN); + F_SET(dbmfp, MP_FILEID_SET); + + return (0); +} + +/* + * __memp_get_flags -- + * Get the DB_MPOOLFILE flags; + * + * PUBLIC: int __memp_get_flags __P((DB_MPOOLFILE *, u_int32_t *)); + */ +int +__memp_get_flags(dbmfp, flagsp) + DB_MPOOLFILE *dbmfp; + u_int32_t *flagsp; +{ + MPOOLFILE *mfp; + + mfp = dbmfp->mfp; + + *flagsp = 0; + + if (mfp == NULL) + *flagsp = FLD_ISSET(dbmfp->config_flags, + DB_MPOOL_NOFILE | DB_MPOOL_UNLINK); + else { + if (mfp->no_backing_file) + FLD_SET(*flagsp, DB_MPOOL_NOFILE); + if (mfp->unlink_on_close) + FLD_SET(*flagsp, DB_MPOOL_UNLINK); + } + return (0); +} + +/* + * __memp_set_flags -- + * Set the DB_MPOOLFILE flags; + * + * PUBLIC: int __memp_set_flags __P((DB_MPOOLFILE *, u_int32_t, int)); + */ +int +__memp_set_flags(dbmfp, flags, onoff) + DB_MPOOLFILE *dbmfp; + u_int32_t flags; + int onoff; +{ + ENV *env; + MPOOLFILE *mfp; + int ret; + + env = dbmfp->env; + mfp = dbmfp->mfp; + + switch (flags) { + case DB_MPOOL_NOFILE: + if (mfp == NULL) + if (onoff) + FLD_SET(dbmfp->config_flags, DB_MPOOL_NOFILE); + else + FLD_CLR(dbmfp->config_flags, DB_MPOOL_NOFILE); + else + mfp->no_backing_file = onoff; + break; + case DB_MPOOL_UNLINK: + if (mfp == NULL) + if (onoff) + FLD_SET(dbmfp->config_flags, DB_MPOOL_UNLINK); + else + FLD_CLR(dbmfp->config_flags, DB_MPOOL_UNLINK); + else + mfp->unlink_on_close = onoff; + break; + default: + if ((ret = __db_fchk(env, "DB_MPOOLFILE->set_flags", + flags, DB_MPOOL_NOFILE | DB_MPOOL_UNLINK)) != 0) + return (ret); + break; + } + return (0); +} + +/* + * __memp_get_ftype -- + * Get the file type (as registered). + * + * PUBLIC: int __memp_get_ftype __P((DB_MPOOLFILE *, int *)); + */ +int +__memp_get_ftype(dbmfp, ftypep) + DB_MPOOLFILE *dbmfp; + int *ftypep; +{ + *ftypep = dbmfp->ftype; + return (0); +} + +/* + * __memp_set_ftype -- + * DB_MPOOLFILE->set_ftype. + * + * PUBLIC: int __memp_set_ftype __P((DB_MPOOLFILE *, int)); + */ +int +__memp_set_ftype(dbmfp, ftype) + DB_MPOOLFILE *dbmfp; + int ftype; +{ + MPF_ILLEGAL_AFTER_OPEN(dbmfp, "DB_MPOOLFILE->set_ftype"); + + dbmfp->ftype = ftype; + return (0); +} + +/* + * __memp_get_lsn_offset -- + * Get the page's LSN offset. + */ +static int +__memp_get_lsn_offset(dbmfp, lsn_offsetp) + DB_MPOOLFILE *dbmfp; + int32_t *lsn_offsetp; +{ + *lsn_offsetp = dbmfp->lsn_offset; + return (0); +} + +/* + * __memp_set_lsn_offset -- + * Set the page's LSN offset. + * + * PUBLIC: int __memp_set_lsn_offset __P((DB_MPOOLFILE *, int32_t)); + */ +int +__memp_set_lsn_offset(dbmfp, lsn_offset) + DB_MPOOLFILE *dbmfp; + int32_t lsn_offset; +{ + MPF_ILLEGAL_AFTER_OPEN(dbmfp, "DB_MPOOLFILE->set_lsn_offset"); + + dbmfp->lsn_offset = lsn_offset; + return (0); +} + +/* + * __memp_get_maxsize -- + * Get the file's maximum size. + */ +static int +__memp_get_maxsize(dbmfp, gbytesp, bytesp) + DB_MPOOLFILE *dbmfp; + u_int32_t *gbytesp, *bytesp; +{ + DB_THREAD_INFO *ip; + ENV *env; + MPOOLFILE *mfp; + + if ((mfp = dbmfp->mfp) == NULL) { + *gbytesp = dbmfp->gbytes; + *bytesp = dbmfp->bytes; + } else { + env = dbmfp->env; + ENV_ENTER(env, ip); + + MUTEX_LOCK(env, mfp->mutex); + *gbytesp = (u_int32_t) + (mfp->maxpgno / (GIGABYTE / mfp->pagesize)); + *bytesp = (u_int32_t) + ((mfp->maxpgno % (GIGABYTE / mfp->pagesize)) * + mfp->pagesize); + MUTEX_UNLOCK(env, mfp->mutex); + + ENV_LEAVE(env, ip); + } + + return (0); +} + +/* + * __memp_set_maxsize -- + * Set the file's maximum size. + */ +static int +__memp_set_maxsize(dbmfp, gbytes, bytes) + DB_MPOOLFILE *dbmfp; + u_int32_t gbytes, bytes; +{ + DB_THREAD_INFO *ip; + ENV *env; + MPOOLFILE *mfp; + + if ((mfp = dbmfp->mfp) == NULL) { + dbmfp->gbytes = gbytes; + dbmfp->bytes = bytes; + } else { + env = dbmfp->env; + ENV_ENTER(env, ip); + + MUTEX_LOCK(env, mfp->mutex); + mfp->maxpgno = (db_pgno_t) + (gbytes * (GIGABYTE / mfp->pagesize)); + mfp->maxpgno += (db_pgno_t) + ((bytes + mfp->pagesize - 1) / mfp->pagesize); + MUTEX_UNLOCK(env, mfp->mutex); + + ENV_LEAVE(env, ip); + } + + return (0); +} + +/* + * __memp_get_pgcookie -- + * Get the pgin/pgout cookie. + * + * PUBLIC: int __memp_get_pgcookie __P((DB_MPOOLFILE *, DBT *)); + */ +int +__memp_get_pgcookie(dbmfp, pgcookie) + DB_MPOOLFILE *dbmfp; + DBT *pgcookie; +{ + if (dbmfp->pgcookie == NULL) { + pgcookie->size = 0; + pgcookie->data = ""; + } else + memcpy(pgcookie, dbmfp->pgcookie, sizeof(DBT)); + return (0); +} + +/* + * __memp_set_pgcookie -- + * Set the pgin/pgout cookie. + * + * PUBLIC: int __memp_set_pgcookie __P((DB_MPOOLFILE *, DBT *)); + */ +int +__memp_set_pgcookie(dbmfp, pgcookie) + DB_MPOOLFILE *dbmfp; + DBT *pgcookie; +{ + DBT *cookie; + ENV *env; + int ret; + + MPF_ILLEGAL_AFTER_OPEN(dbmfp, "DB_MPOOLFILE->set_pgcookie"); + env = dbmfp->env; + + if ((ret = __os_calloc(env, 1, sizeof(*cookie), &cookie)) != 0) + return (ret); + if ((ret = __os_malloc(env, pgcookie->size, &cookie->data)) != 0) { + __os_free(env, cookie); + return (ret); + } + + memcpy(cookie->data, pgcookie->data, pgcookie->size); + cookie->size = pgcookie->size; + + dbmfp->pgcookie = cookie; + return (0); +} + +/* + * __memp_get_priority -- + * Set the cache priority for pages from this file. + * + * PUBLIC: int __memp_get_priority __P((DB_MPOOLFILE *, DB_CACHE_PRIORITY *)); + */ +int +__memp_get_priority(dbmfp, priorityp) + DB_MPOOLFILE *dbmfp; + DB_CACHE_PRIORITY *priorityp; +{ + switch (dbmfp->priority) { + case MPOOL_PRI_VERY_LOW: + *priorityp = DB_PRIORITY_VERY_LOW; + break; + case MPOOL_PRI_LOW: + *priorityp = DB_PRIORITY_LOW; + break; + case MPOOL_PRI_DEFAULT: + *priorityp = DB_PRIORITY_DEFAULT; + break; + case MPOOL_PRI_HIGH: + *priorityp = DB_PRIORITY_HIGH; + break; + case MPOOL_PRI_VERY_HIGH: + *priorityp = DB_PRIORITY_VERY_HIGH; + break; + default: + __db_errx(dbmfp->env, DB_STR_A("3031", + "DB_MPOOLFILE->get_priority: unknown priority value: %d", + "%d"), dbmfp->priority); + return (EINVAL); + } + + return (0); +} + +/* + * __memp_set_priority -- + * Set the cache priority for pages from this file. + */ +static int +__memp_set_priority(dbmfp, priority) + DB_MPOOLFILE *dbmfp; + DB_CACHE_PRIORITY priority; +{ + switch (priority) { + case DB_PRIORITY_VERY_LOW: + dbmfp->priority = MPOOL_PRI_VERY_LOW; + break; + case DB_PRIORITY_LOW: + dbmfp->priority = MPOOL_PRI_LOW; + break; + case DB_PRIORITY_DEFAULT: + dbmfp->priority = MPOOL_PRI_DEFAULT; + break; + case DB_PRIORITY_HIGH: + dbmfp->priority = MPOOL_PRI_HIGH; + break; + case DB_PRIORITY_VERY_HIGH: + dbmfp->priority = MPOOL_PRI_VERY_HIGH; + break; + default: + __db_errx(dbmfp->env, DB_STR_A("3032", + "DB_MPOOLFILE->set_priority: unknown priority value: %d", + "%d"), priority); + return (EINVAL); + } + + /* Update the underlying file if we've already opened it. */ + if (dbmfp->mfp != NULL) + dbmfp->mfp->priority = dbmfp->priority; + + return (0); +} + +/* + * __memp_get_last_pgno -- + * Return the page number of the last page in the file. + * + * !!! + * The method is undocumented, but the handle is exported, users occasionally + * ask for it. + * + * PUBLIC: int __memp_get_last_pgno __P((DB_MPOOLFILE *, db_pgno_t *)); + */ +int +__memp_get_last_pgno(dbmfp, pgnoaddr) + DB_MPOOLFILE *dbmfp; + db_pgno_t *pgnoaddr; +{ + ENV *env; + MPOOLFILE *mfp; + + env = dbmfp->env; + mfp = dbmfp->mfp; + + MUTEX_LOCK(env, mfp->mutex); + *pgnoaddr = mfp->last_pgno; + MUTEX_UNLOCK(env, mfp->mutex); + + return (0); +} + +/* + * __memp_get_last_pgno_pp -- + * pre/post processing for __memp_get_last_pgno. + * + */ +static int +__memp_get_last_pgno_pp(dbmfp, pgnoaddr) + DB_MPOOLFILE *dbmfp; + db_pgno_t *pgnoaddr; +{ + DB_THREAD_INFO *ip; + int ret; + + ret = 0; + ENV_ENTER(dbmfp->env, ip); + + ret = __memp_get_last_pgno(dbmfp, pgnoaddr); + + ENV_LEAVE(dbmfp->env, ip); + return (ret); +} + +/* + * __memp_fn -- + * On errors we print whatever is available as the file name. + * + * PUBLIC: char * __memp_fn __P((DB_MPOOLFILE *)); + */ +char * +__memp_fn(dbmfp) + DB_MPOOLFILE *dbmfp; +{ + return (__memp_fns(dbmfp->env->mp_handle, dbmfp->mfp)); +} + +/* + * __memp_fns -- + * On errors we print whatever is available as the file name. + * + * PUBLIC: char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *)); + * + */ +char * +__memp_fns(dbmp, mfp) + DB_MPOOL *dbmp; + MPOOLFILE *mfp; +{ + if (mfp == NULL || mfp->path_off == 0) + return ((char *)"unknown"); + + return ((char *)R_ADDR(dbmp->reginfo, mfp->path_off)); +} diff --git a/src/mp/mp_fopen.c b/src/mp/mp_fopen.c new file mode 100644 index 00000000..282025c1 --- /dev/null +++ b/src/mp/mp_fopen.c @@ -0,0 +1,1103 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" + +static int __memp_mpf_alloc __P((DB_MPOOL *, + DB_MPOOLFILE *, const char *, u_int32_t, u_int32_t, MPOOLFILE **)); +static int __memp_mpf_find __P((ENV *, + DB_MPOOLFILE *, DB_MPOOL_HASH *, const char *, u_int32_t, MPOOLFILE **)); + +/* + * __memp_fopen_pp -- + * DB_MPOOLFILE->open pre/post processing. + * + * PUBLIC: int __memp_fopen_pp + * PUBLIC: __P((DB_MPOOLFILE *, const char *, u_int32_t, int, size_t)); + */ +int +__memp_fopen_pp(dbmfp, path, flags, mode, pagesize) + DB_MPOOLFILE *dbmfp; + const char *path; + u_int32_t flags; + int mode; + size_t pagesize; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbmfp->env; + + /* Validate arguments. */ + if ((ret = __db_fchk(env, "DB_MPOOLFILE->open", flags, + DB_CREATE | DB_DIRECT | DB_EXTENT | DB_MULTIVERSION | + DB_NOMMAP | DB_ODDFILESIZE | DB_RDONLY | DB_TRUNCATE)) != 0) + return (ret); + + /* + * Require a non-zero, power-of-two pagesize, smaller than the + * clear length. + */ + if (pagesize == 0 || !POWER_OF_TWO(pagesize)) { + __db_errx(env, DB_STR("3033", + "DB_MPOOLFILE->open: page sizes must be a power-of-2")); + return (EINVAL); + } + if (dbmfp->clear_len > pagesize) { + __db_errx(env, DB_STR("3034", + "DB_MPOOLFILE->open: clear length larger than page size")); + return (EINVAL); + } + + /* Read-only checks, and local flag. */ + if (LF_ISSET(DB_RDONLY) && path == NULL) { + __db_errx(env, DB_STR("3035", + "DB_MPOOLFILE->open: temporary files can't be readonly")); + return (EINVAL); + } + + if (LF_ISSET(DB_MULTIVERSION) && !TXN_ON(env)) { + __db_errx(env, DB_STR("3036", + "DB_MPOOLFILE->open: DB_MULTIVERSION requires transactions")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, + (__memp_fopen(dbmfp, NULL, + path, NULL, flags, mode, pagesize)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __memp_fopen -- + * DB_MPOOLFILE->open. + * + * PUBLIC: int __memp_fopen __P((DB_MPOOLFILE *, MPOOLFILE *, + * PUBLIC: const char *, const char **, u_int32_t, int, size_t)); + */ +int +__memp_fopen(dbmfp, mfp, path, dirp, flags, mode, pgsize) + DB_MPOOLFILE *dbmfp; + MPOOLFILE *mfp; + const char *path; + const char **dirp; + u_int32_t flags; + int mode; + size_t pgsize; +{ + DB_ENV *dbenv; + DB_MPOOL *dbmp; + DB_MPOOLFILE *tmp_dbmfp; + DB_MPOOL_HASH *hp; + ENV *env; + MPOOL *mp; + MPOOLFILE *alloc_mfp; + size_t maxmap; + db_pgno_t last_pgno; + u_int32_t bucket, mbytes, bytes, oflags, pagesize; + int refinc, ret; + char *rpath; + + /* If this handle is already open, return. */ + if (F_ISSET(dbmfp, MP_OPEN_CALLED)) + return (0); + + env = dbmfp->env; + dbmp = env->mp_handle; + dbenv = env->dbenv; + mp = dbmp->reginfo[0].primary; + alloc_mfp = NULL; + mbytes = bytes = 0; + refinc = ret = 0; + rpath = NULL; + + /* + * We're keeping the page size as a size_t in the public API, but + * it's a u_int32_t everywhere internally. + */ + pagesize = (u_int32_t)pgsize; + + /* + * We're called internally with a specified mfp, in which case the + * path is NULL, but we'll get the path from the underlying region + * information. Otherwise, if the path is NULL, it's a temporary + * file -- we know we can't join any existing files, and we'll delay + * the open until we actually need to write the file. All temporary + * files will go into the first hash bucket. + */ + DB_ASSERT(env, mfp == NULL || path == NULL); + + bucket = 0; + hp = R_ADDR(dbmp->reginfo, mp->ftab); + if (mfp == NULL) { + if (path == NULL) + goto alloc; + + /* + * Hash to the proper file table entry and walk it. + * + * The fileID is a filesystem unique number (e.g., a + * UNIX dev/inode pair) plus a timestamp. If files are + * removed and created in less than a second, the fileID + * can be repeated. The problem with repetition happens + * when the file that previously had the fileID value still + * has pages in the pool, since we don't want to use them + * to satisfy requests for the new file. Because the + * DB_TRUNCATE flag reuses the dev/inode pair, repeated + * opens with that flag set guarantees matching fileIDs + * when the machine can open a file and then re-open + * with truncate within a second. For this reason, we + * pass that flag down, and, if we find a matching entry, + * we ensure that it's never found again, and we create + * a new entry for the current request. + */ + + if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE)) + bucket = FNBUCKET(path, strlen(path)); + else + bucket = FNBUCKET(dbmfp->fileid, DB_FILE_ID_LEN); + hp += bucket; + + /* + * If we are passed a FILEID find the MPOOLFILE and inc + * its ref count. That way it cannot go away while we + * open it. + */ + if (F_ISSET(dbmfp, MP_FILEID_SET)) { + MUTEX_LOCK(env, hp->mtx_hash); + ret = + __memp_mpf_find(env, dbmfp, hp, path, flags,&mfp); + MUTEX_UNLOCK(env, hp->mtx_hash); + if (ret != 0) + goto err; + if (mfp != NULL) + refinc = 1; + } + } else { + /* + * Deadfile can only be set if mpf_cnt goes to zero (or if we + * failed creating the file DB_AM_DISCARD). Increment the ref + * count so the file cannot become dead and be unlinked. + */ + MUTEX_LOCK(env, mfp->mutex); + if (!mfp->deadfile) { + ++mfp->mpf_cnt; + refinc = 1; + } + MUTEX_UNLOCK(env, mfp->mutex); + + /* + * Test one last time to see if the file is dead -- it may have + * been removed. This happens when a checkpoint trying to open + * the file to flush a buffer races with the Db::remove method. + * The error will be ignored, so don't output an error message. + */ + if (mfp->deadfile) + return (EINVAL); + } + + /* + * Share the underlying file descriptor if that's possible. + */ + if (mfp != NULL && !FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE)) { + MUTEX_LOCK(env, dbmp->mutex); + TAILQ_FOREACH(tmp_dbmfp, &dbmp->dbmfq, q) + if (mfp == tmp_dbmfp->mfp && + (F_ISSET(dbmfp, MP_READONLY) || + !F_ISSET(tmp_dbmfp, MP_READONLY))) { + ++tmp_dbmfp->fhp->ref; + dbmfp->fhp = tmp_dbmfp->fhp; + dbmfp->addr = tmp_dbmfp->addr; + break; + } + MUTEX_UNLOCK(env, dbmp->mutex); + if (dbmfp->fhp != NULL) + goto have_mfp; + } + + /* + * If there's no backing file, we can join existing files in the cache, + * but there's nothing to read from disk. + */ + if (!FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE)) { + /* Convert MP open flags to DB OS-layer open flags. */ + oflags = 0; + if (LF_ISSET(DB_CREATE)) + oflags |= DB_OSO_CREATE; + if (LF_ISSET(DB_DIRECT)) + oflags |= DB_OSO_DIRECT; + if (LF_ISSET(DB_RDONLY)) { + F_SET(dbmfp, MP_READONLY); + oflags |= DB_OSO_RDONLY; + } + + /* + * XXX + * A grievous layering violation, the DB_DSYNC_DB flag + * was left in the ENV structure and not driven through + * the cache API. This needs to be fixed when the general + * API configuration is fixed. + */ + if (F_ISSET(env->dbenv, DB_ENV_DSYNC_DB)) + oflags |= DB_OSO_DSYNC; + + /* + * Get the real name for this file and open it. + * + * Supply a page size so os_open can decide whether to + * turn buffering off if the DB_DIRECT_DB flag is set. + * + * Acquire the region lock if we're using a path from + * an underlying MPOOLFILE -- there's a race in accessing + * the path name stored in the region, __memp_nameop may + * be simultaneously renaming the file. + */ + if (mfp != NULL) { + MPOOL_SYSTEM_LOCK(env); + path = R_ADDR(dbmp->reginfo, mfp->path_off); + } + if ((ret = __db_appname(env, + DB_APP_DATA, path, dirp, &rpath)) == 0) + ret = __os_open(env, rpath, + (u_int32_t)pagesize, oflags, mode, &dbmfp->fhp); + if (mfp != NULL) + MPOOL_SYSTEM_UNLOCK(env); + if (ret != 0) + goto err; + + /* + * Cache file handles are shared, and have mutexes to + * protect the underlying file handle across seek and + * read/write calls. + */ + dbmfp->fhp->ref = 1; + if ((ret = __mutex_alloc(env, MTX_MPOOL_FH, + DB_MUTEX_PROCESS_ONLY, &dbmfp->fhp->mtx_fh)) != 0) + goto err; + + /* Figure out the file's size. */ + if ((ret = __os_ioinfo( + env, rpath, dbmfp->fhp, &mbytes, &bytes, NULL)) != 0) { + __db_err(env, ret, "%s", rpath); + goto err; + } + + /* + * Don't permit files that aren't a multiple of the pagesize, + * and find the number of the last page in the file, all the + * time being careful not to overflow 32 bits. + * + * During verify or recovery, we might have to cope with a + * truncated file; if the file size is not a multiple of the + * page size, round down to a page, we'll take care of the + * partial page outside the mpool system. + */ + DB_ASSERT(env, pagesize != 0); + if (bytes % pagesize != 0) { + if (LF_ISSET(DB_ODDFILESIZE)) + bytes -= (u_int32_t)(bytes % pagesize); + else { + __db_errx(env, DB_STR_A("3037", + "%s: file size not a multiple of the pagesize", "%s"), + rpath); + ret = EINVAL; + goto err; + } + } + + /* + * Get the file id if we weren't given one. Generated file id's + * don't use timestamps, otherwise there'd be no chance of any + * other process joining the party. Don't bother looking for + * this id in the hash table, its new. + */ + if (mfp == NULL && !F_ISSET(dbmfp, MP_FILEID_SET)) { + if ((ret = + __os_fileid(env, rpath, 0, dbmfp->fileid)) != 0) + goto err; + F_SET(dbmfp, MP_FILEID_SET); + goto alloc; + } + } + + if (mfp != NULL) + goto have_mfp; + + /* + * We can race with another process opening the same file when + * we allocate the mpoolfile structure. We will come back + * here and check the hash table again to see if it has appeared. + * For most files this is not a problem, since the name is locked + * at a higher layer but QUEUE extent files are not locked. + */ +check: MUTEX_LOCK(env, hp->mtx_hash); + if ((ret = __memp_mpf_find(env, dbmfp, hp, path, flags, &mfp) != 0)) + goto err; + + if (alloc_mfp != NULL && mfp == NULL) { + mfp = alloc_mfp; + alloc_mfp = NULL; + SH_TAILQ_INSERT_HEAD(&hp->hash_bucket, mfp, q, __mpoolfile); + } else if (mfp != NULL) { + /* + * Some things about a file cannot be changed: the clear length, + * page size, or LSN location. However, if this is an attempt + * to open a named in-memory file, we may not yet have that + * information. so accept uninitialized entries. + * + * The file type can change if the application's pre- and post- + * processing needs change. For example, an application that + * created a hash subdatabase in a database that was previously + * all btree. + * + * !!! + * We do not check to see if the pgcookie information changed, + * or update it if it is. + */ + if ((dbmfp->clear_len != DB_CLEARLEN_NOTSET && + mfp->clear_len != DB_CLEARLEN_NOTSET && + dbmfp->clear_len != mfp->clear_len) || + (pagesize != 0 && pagesize != mfp->pagesize) || + (dbmfp->lsn_offset != DB_LSN_OFF_NOTSET && + mfp->lsn_off != DB_LSN_OFF_NOTSET && + dbmfp->lsn_offset != mfp->lsn_off)) { + __db_errx(env, DB_STR_A("3038", + "%s: clear length, page size or LSN location changed", + "%s"), path); + MUTEX_UNLOCK(env, hp->mtx_hash); + ret = EINVAL; + goto err; + } + } + + MUTEX_UNLOCK(env, hp->mtx_hash); + if (alloc_mfp != NULL) { + MUTEX_LOCK(env, alloc_mfp->mutex); + if ((ret = __memp_mf_discard(dbmp, alloc_mfp, 0)) != 0) + goto err; + } + + if (mfp == NULL) { + /* + * If we didn't find the file and this is an in-memory file, + * then the create flag should be set. + */ + if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE) && + !LF_ISSET(DB_CREATE)) { + ret = ENOENT; + goto err; + } + +alloc: /* + * Get the file ID if we weren't given one. Generated file + * ID's don't use timestamps, otherwise there'd be no + * chance of any other process joining the party. + */ + if (path != NULL && + !FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE) && + !F_ISSET(dbmfp, MP_FILEID_SET) && (ret = + __os_fileid(env, rpath, 0, dbmfp->fileid)) != 0) + goto err; + + if ((ret = __memp_mpf_alloc(dbmp, + dbmfp, path, pagesize, flags, &alloc_mfp)) != 0) + goto err; + + /* + * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a + * page get, we have to increment the last page in the file. + * Figure it out and save it away. + * + * Note correction: page numbers are zero-based, not 1-based. + */ + DB_ASSERT(env, pagesize != 0); + last_pgno = (db_pgno_t)(mbytes * (MEGABYTE / pagesize)); + last_pgno += (db_pgno_t)(bytes / pagesize); + if (last_pgno != 0) + --last_pgno; + + alloc_mfp->last_flushed_pgno = alloc_mfp->orig_last_pgno = + alloc_mfp->last_pgno = last_pgno; + + alloc_mfp->bucket = bucket; + + /* Go back and see if someone else has opened the file. */ + if (path != NULL) + goto check; + + mfp = alloc_mfp; + /* This is a temp, noone else can see it, put it at the end. */ + MUTEX_LOCK(env, hp->mtx_hash); + SH_TAILQ_INSERT_TAIL(&hp->hash_bucket, mfp, q); + MUTEX_UNLOCK(env, hp->mtx_hash); + } +have_mfp: + /* + * We need to verify that all handles open a file either durable or not + * durable. This needs to be cross process and cross sub-databases, so + * mpool is the place to do it. + */ + if (!LF_ISSET(DB_DURABLE_UNKNOWN | DB_RDONLY)) { + if (F_ISSET(mfp, MP_DURABLE_UNKNOWN)) { + if (LF_ISSET(DB_TXN_NOT_DURABLE)) + F_SET(mfp, MP_NOT_DURABLE); + F_CLR(mfp, MP_DURABLE_UNKNOWN); + } else if (!LF_ISSET(DB_TXN_NOT_DURABLE) != + !F_ISSET(mfp, MP_NOT_DURABLE)) { + __db_errx(env, DB_STR("3039", + "Cannot open DURABLE and NOT DURABLE handles in the same file")); + ret = EINVAL; + goto err; + } + } + + if (LF_ISSET(DB_MULTIVERSION)) { + atomic_inc(env, &mfp->multiversion); + F_SET(dbmfp, MP_MULTIVERSION); + } + + /* + * All paths to here have initialized the mfp variable to reference + * the selected (or allocated) MPOOLFILE. + */ + dbmfp->mfp = mfp; + + /* + * Check to see if we can mmap the file. If a file: + * + isn't temporary + * + is read-only + * + doesn't require any pgin/pgout support + * + the DB_NOMMAP flag wasn't set (in either the file open or + * the environment in which it was opened) + * + and is less than mp_mmapsize bytes in size + * + * we can mmap it instead of reading/writing buffers. Don't do error + * checking based on the mmap call failure. We want to do normal I/O + * on the file if the reason we failed was because the file was on an + * NFS mounted partition, and we can fail in buffer I/O just as easily + * as here. + * + * We'd like to test to see if the file is too big to mmap. Since we + * don't know what size or type off_t's or size_t's are, or the largest + * unsigned integral type is, or what random insanity the local C + * compiler will perpetrate, doing the comparison in a portable way is + * flatly impossible. Hope that mmap fails if the file is too large. + */ +#define DB_MAXMMAPSIZE (10 * 1024 * 1024) /* 10 MB. */ + if (F_ISSET(mfp, MP_CAN_MMAP) && dbmfp->addr == NULL) { + maxmap = dbenv->mp_mmapsize == 0 ? + DB_MAXMMAPSIZE : dbenv->mp_mmapsize; + if (path == NULL || + FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE)) + F_CLR(mfp, MP_CAN_MMAP); + else if (!F_ISSET(dbmfp, MP_READONLY)) + F_CLR(mfp, MP_CAN_MMAP); + else if (dbmfp->ftype != 0) + F_CLR(mfp, MP_CAN_MMAP); + else if (LF_ISSET(DB_NOMMAP) || F_ISSET(dbenv, DB_ENV_NOMMAP)) + F_CLR(mfp, MP_CAN_MMAP); + else { + MPOOL_SYSTEM_LOCK(env); + maxmap = mp->mp_mmapsize == 0 ? + DB_MAXMMAPSIZE : mp->mp_mmapsize; + MPOOL_SYSTEM_UNLOCK(env); + if (mbytes > maxmap / MEGABYTE || + (mbytes == maxmap / MEGABYTE && + bytes >= maxmap % MEGABYTE)) + F_CLR(mfp, MP_CAN_MMAP); + } + + dbmfp->addr = NULL; + if (F_ISSET(mfp, MP_CAN_MMAP)) { + dbmfp->len = (size_t)mbytes * MEGABYTE + bytes; + if (__os_mapfile(env, rpath, + dbmfp->fhp, dbmfp->len, 1, &dbmfp->addr) != 0) { + dbmfp->addr = NULL; + F_CLR(mfp, MP_CAN_MMAP); + } + } + } + + F_SET(dbmfp, MP_OPEN_CALLED); + + /* + * Add the file to the process' list of DB_MPOOLFILEs. + */ + MUTEX_LOCK(env, dbmp->mutex); + TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q); + MUTEX_UNLOCK(env, dbmp->mutex); + + if (0) { +err: if (refinc) { + /* + * If mpf_cnt goes to zero here and unlink_on_close is + * set, then we missed the last close, but there was an + * error trying to open the file, so we probably cannot + * unlink it anyway. + */ + MUTEX_LOCK(env, mfp->mutex); + --mfp->mpf_cnt; + MUTEX_UNLOCK(env, mfp->mutex); + } + + } + if (rpath != NULL) + __os_free(env, rpath); + return (ret); +} + +/* + * __memp_mpf_find -- + * Search a hash bucket for a MPOOLFILE. + */ +static int +__memp_mpf_find(env, dbmfp, hp, path, flags, mfpp) + ENV *env; + DB_MPOOLFILE *dbmfp; + DB_MPOOL_HASH *hp; + const char *path; + u_int32_t flags; + MPOOLFILE **mfpp; +{ + DB_MPOOL *dbmp; + MPOOLFILE *mfp; + + dbmp = env->mp_handle; + + SH_TAILQ_FOREACH(mfp, &hp->hash_bucket, q, __mpoolfile) { + /* Skip dead files and temporary files. */ + if (mfp->deadfile || F_ISSET(mfp, MP_TEMP)) + continue; + + /* + * Any remaining DB_MPOOL_NOFILE databases are in-memory + * named databases and need only match other in-memory + * databases with the same name. + */ + if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE)) { + if (!mfp->no_backing_file) + continue; + + if (strcmp(path, R_ADDR(dbmp->reginfo, mfp->path_off))) + continue; + + /* + * We matched an in-memory file; grab the fileid if + * it is set in the region, but not in the dbmfp. + */ + if (!F_ISSET(dbmfp, MP_FILEID_SET)) + (void)__memp_set_fileid(dbmfp, + R_ADDR(dbmp->reginfo, mfp->fileid_off)); + } else + if (memcmp(dbmfp->fileid, R_ADDR(dbmp->reginfo, + mfp->fileid_off), DB_FILE_ID_LEN) != 0) + continue; + + /* + * If the file is being truncated, remove it from the system + * and create a new entry. + * + * !!! + * We should be able to set mfp to NULL and break out of the + * loop, but I like the idea of checking all the entries. + */ + if (LF_ISSET(DB_TRUNCATE)) { + MUTEX_LOCK(env, mfp->mutex); + mfp->deadfile = 1; + MUTEX_UNLOCK(env, mfp->mutex); + continue; + } + + /* + * Check to see if this file has died while we waited. + * + * We normally don't lock the deadfile field when we read it as + * we only care if the field is zero or non-zero. We do lock + * on read when searching for a matching MPOOLFILE so that two + * threads of control don't race between setting the deadfile + * bit and incrementing the reference count, that is, a thread + * of control decrementing the reference count and then setting + * deadfile because the reference count is 0 blocks us finding + * the file without knowing it's about to be marked dead. + */ + MUTEX_LOCK(env, mfp->mutex); + if (mfp->deadfile) { + MUTEX_UNLOCK(env, mfp->mutex); + continue; + } + ++mfp->mpf_cnt; + MUTEX_UNLOCK(env, mfp->mutex); + + /* Initialize any fields that are not yet set. */ + if (dbmfp->ftype != 0) + mfp->ftype = dbmfp->ftype; + if (dbmfp->clear_len != DB_CLEARLEN_NOTSET) + mfp->clear_len = dbmfp->clear_len; + if (dbmfp->lsn_offset != -1) + mfp->lsn_off = dbmfp->lsn_offset; + + break; + } + + *mfpp = mfp; + return (0); +} + +static int +__memp_mpf_alloc(dbmp, dbmfp, path, pagesize, flags, retmfp) + DB_MPOOL *dbmp; + DB_MPOOLFILE *dbmfp; + const char *path; + u_int32_t pagesize; + u_int32_t flags; + MPOOLFILE **retmfp; +{ + ENV *env; + MPOOLFILE *mfp; + int ret; + void *p; + + env = dbmp->env; + ret = 0; + /* Allocate and initialize a new MPOOLFILE. */ + if ((ret = __memp_alloc(dbmp, + dbmp->reginfo, NULL, sizeof(MPOOLFILE), NULL, &mfp)) != 0) + goto err; + memset(mfp, 0, sizeof(MPOOLFILE)); + mfp->mpf_cnt = 1; + mfp->ftype = dbmfp->ftype; + mfp->pagesize = pagesize; + mfp->lsn_off = dbmfp->lsn_offset; + mfp->clear_len = dbmfp->clear_len; + mfp->priority = dbmfp->priority; + if (dbmfp->gbytes != 0 || dbmfp->bytes != 0) { + mfp->maxpgno = (db_pgno_t) + (dbmfp->gbytes * (GIGABYTE / mfp->pagesize)); + mfp->maxpgno += (db_pgno_t) + ((dbmfp->bytes + mfp->pagesize - 1) / + mfp->pagesize); + } + if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE)) + mfp->no_backing_file = 1; + if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_UNLINK)) + mfp->unlink_on_close = 1; + + F_SET(mfp, MP_CAN_MMAP); + if (F_ISSET(env->dbenv, DB_ENV_DATABASE_LOCKING)) + F_SET(mfp, MP_DATABASE_LOCKING); + if (LF_ISSET(DB_DIRECT)) + F_SET(mfp, MP_DIRECT); + if (LF_ISSET(DB_DURABLE_UNKNOWN | DB_RDONLY)) + F_SET(mfp, MP_DURABLE_UNKNOWN); + if (LF_ISSET(DB_EXTENT)) + F_SET(mfp, MP_EXTENT); + if (LF_ISSET(DB_TXN_NOT_DURABLE)) + F_SET(mfp, MP_NOT_DURABLE); + + /* + * An in-memory database with no name is a temp file. Named + * in-memory databases get an artificially bumped reference + * count so they don't disappear on close; they need a remove + * to make them disappear. + */ + if (path == NULL) + F_SET(mfp, MP_TEMP); + else if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE)) + mfp->mpf_cnt++; + + /* Copy the file identification string into shared memory. */ + if (F_ISSET(dbmfp, MP_FILEID_SET)) { + if ((ret = __memp_alloc(dbmp, dbmp->reginfo, + NULL, DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0) + goto err; + memcpy(p, dbmfp->fileid, DB_FILE_ID_LEN); + } + + /* Copy the file path into shared memory. */ + if (path != NULL) { + if ((ret = __memp_alloc(dbmp, dbmp->reginfo, + NULL, strlen(path) + 1, &mfp->path_off, &p)) != 0) + goto err; + memcpy(p, path, strlen(path) + 1); + } + + /* Copy the page cookie into shared memory. */ + if (dbmfp->pgcookie == NULL || dbmfp->pgcookie->size == 0) { + mfp->pgcookie_len = 0; + mfp->pgcookie_off = 0; + } else { + if ((ret = __memp_alloc(dbmp, dbmp->reginfo, + NULL, dbmfp->pgcookie->size, + &mfp->pgcookie_off, &p)) != 0) + goto err; + memcpy(p, + dbmfp->pgcookie->data, dbmfp->pgcookie->size); + mfp->pgcookie_len = dbmfp->pgcookie->size; + } + + if ((ret = __mutex_alloc(env, + MTX_MPOOLFILE_HANDLE, 0, &mfp->mutex)) != 0) + goto err; + *retmfp = mfp; + +err: return (ret); +} + +/* + * memp_fclose_pp -- + * DB_MPOOLFILE->close pre/post processing. + * + * PUBLIC: int __memp_fclose_pp __P((DB_MPOOLFILE *, u_int32_t)); + */ +int +__memp_fclose_pp(dbmfp, flags) + DB_MPOOLFILE *dbmfp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbmfp->env; + + /* + * Validate arguments, but as a handle destructor, we can't fail. + */ + if (flags != 0) + (void)__db_ferr(env, "DB_MPOOLFILE->close", 0); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__memp_fclose(dbmfp, 0)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __memp_fclose -- + * DB_MPOOLFILE->close. + * + * PUBLIC: int __memp_fclose __P((DB_MPOOLFILE *, u_int32_t)); + */ +int +__memp_fclose(dbmfp, flags) + DB_MPOOLFILE *dbmfp; + u_int32_t flags; +{ + DB_MPOOL *dbmp; + ENV *env; + MPOOLFILE *mfp; + char *rpath; + u_int32_t ref; + int deleted, ret, t_ret; + + env = dbmfp->env; + dbmp = env->mp_handle; + ret = 0; + + /* + * Remove the DB_MPOOLFILE from the process' list. + * + * It's possible the underlying mpool cache may never have been created. + * In that case, all we have is a structure, discard it. + * + * It's possible the DB_MPOOLFILE was never added to the DB_MPOOLFILE + * file list, check the MP_OPEN_CALLED flag to be sure. + */ + if (dbmp == NULL) + goto done; + + MUTEX_LOCK(env, dbmp->mutex); + + DB_ASSERT(env, dbmfp->ref >= 1); + if ((ref = --dbmfp->ref) == 0 && F_ISSET(dbmfp, MP_OPEN_CALLED)) + TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q); + + /* + * Decrement the file descriptor's ref count -- if we're the last ref, + * we'll discard the file descriptor. + */ + if (ref == 0 && dbmfp->fhp != NULL && --dbmfp->fhp->ref > 0) + dbmfp->fhp = NULL; + MUTEX_UNLOCK(env, dbmp->mutex); + if (ref != 0) + return (0); + + /* Complain if pinned blocks never returned. */ + if (dbmfp->pinref != 0) { + __db_errx(env, DB_STR_A("3040", + "%s: close: %lu blocks left pinned", "%s %lu"), + __memp_fn(dbmfp), (u_long)dbmfp->pinref); + ret = __env_panic(env, DB_RUNRECOVERY); + } + + /* Discard any mmap information. */ + if (dbmfp->addr != NULL && + (ret = __os_unmapfile(env, dbmfp->addr, dbmfp->len)) != 0) + __db_err(env, ret, "%s", __memp_fn(dbmfp)); + + /* + * Close the file and discard the descriptor structure; temporary + * files may not yet have been created. + */ + if (dbmfp->fhp != NULL) { + if ((t_ret = + __mutex_free(env, &dbmfp->fhp->mtx_fh)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __os_closehandle(env, dbmfp->fhp)) != 0) { + __db_err(env, t_ret, "%s", __memp_fn(dbmfp)); + if (ret == 0) + ret = t_ret; + } + dbmfp->fhp = NULL; + } + + /* + * Discard our reference on the underlying MPOOLFILE, and close it + * if it's no longer useful to anyone. It possible the open of the + * file never happened or wasn't successful, in which case, mpf will + * be NULL and MP_OPEN_CALLED will not be set. + */ + mfp = dbmfp->mfp; + DB_ASSERT(env, + (F_ISSET(dbmfp, MP_OPEN_CALLED) && mfp != NULL) || + (!F_ISSET(dbmfp, MP_OPEN_CALLED) && mfp == NULL)); + if (!F_ISSET(dbmfp, MP_OPEN_CALLED)) + goto done; + + /* + * If it's a temp file, all outstanding references belong to unflushed + * buffers. (A temp file can only be referenced by one DB_MPOOLFILE). + * We don't care about preserving any of those buffers, so mark the + * MPOOLFILE as dead so that even the dirty ones just get discarded + * when we try to flush them. + */ + deleted = 0; + if (!LF_ISSET(DB_MPOOL_NOLOCK)) + MUTEX_LOCK(env, mfp->mutex); + if (F_ISSET(dbmfp, MP_MULTIVERSION)) + atomic_dec(env, &mfp->multiversion); + if (--mfp->mpf_cnt == 0 || LF_ISSET(DB_MPOOL_DISCARD)) { + if (LF_ISSET(DB_MPOOL_DISCARD) || + F_ISSET(mfp, MP_TEMP) || mfp->unlink_on_close) { + mfp->deadfile = 1; + } + if (mfp->unlink_on_close) { + if ((t_ret = __db_appname(dbmp->env, DB_APP_DATA, + R_ADDR(dbmp->reginfo, mfp->path_off), NULL, + &rpath)) != 0 && ret == 0) + ret = t_ret; + if (t_ret == 0) { + if ((t_ret = __os_unlink( + dbmp->env, rpath, 0)) != 0 && ret == 0) + ret = t_ret; + __os_free(env, rpath); + } + } + if (mfp->mpf_cnt == 0) { + F_CLR(mfp, MP_NOT_DURABLE); + F_SET(mfp, MP_DURABLE_UNKNOWN); + } + if (mfp->block_cnt == 0) { + /* + * We should never discard this mp file if our caller + * is holding the lock on it. See comment in + * __memp_sync_file. + */ + DB_ASSERT(env, !LF_ISSET(DB_MPOOL_NOLOCK)); + if ((t_ret = + __memp_mf_discard(dbmp, mfp, 0)) != 0 && ret == 0) + ret = t_ret; + deleted = 1; + } + } + if (!deleted && !LF_ISSET(DB_MPOOL_NOLOCK)) + MUTEX_UNLOCK(env, mfp->mutex); + +done: /* Discard the DB_MPOOLFILE structure. */ + if (dbmfp->pgcookie != NULL) { + __os_free(env, dbmfp->pgcookie->data); + __os_free(env, dbmfp->pgcookie); + } + __os_free(env, dbmfp); + + return (ret); +} + +/* + * __memp_mf_discard -- + * Discard an MPOOLFILE. + * + * PUBLIC: int __memp_mf_discard __P((DB_MPOOL *, MPOOLFILE *, int)); + */ +int +__memp_mf_discard(dbmp, mfp, hp_locked) + DB_MPOOL *dbmp; + MPOOLFILE *mfp; + int hp_locked; +{ + DB_MPOOL_HASH *hp; + ENV *env; +#ifdef HAVE_STATISTICS + DB_MPOOL_STAT *sp; +#endif + MPOOL *mp; + int need_sync, ret, t_ret; + + env = dbmp->env; + mp = dbmp->reginfo[0].primary; + hp = R_ADDR(dbmp->reginfo, mp->ftab); + hp += mfp->bucket; + ret = 0; + + /* + * Expects caller to be holding the MPOOLFILE mutex. + * + * When discarding a file, we have to flush writes from it to disk. + * The scenario is that dirty buffers from this file need to be + * flushed to satisfy a future checkpoint, but when the checkpoint + * calls mpool sync, the sync code won't know anything about them. + * Ignore files not written, discarded, or only temporary. + */ + need_sync = mfp->file_written && !mfp->deadfile && + !F_ISSET(mfp, MP_TEMP) && !mfp->no_backing_file; + + /* + * We have to release the MPOOLFILE mutex before acquiring the region + * mutex so we don't deadlock. Make sure nobody ever looks at this + * structure again. + */ + mfp->deadfile = 1; + + /* Discard the mutex we're holding and return it too the pool. */ + MUTEX_UNLOCK(env, mfp->mutex); + if ((t_ret = __mutex_free(env, &mfp->mutex)) != 0 && ret == 0) + ret = t_ret; + + /* + * Lock the bucket and delete from the list of MPOOLFILEs. + * If this function is called by __memp_discard_all_mpfs, + * the MPOOLFILE hash bucket is already locked. + */ + if (!hp_locked) + MUTEX_LOCK(env, hp->mtx_hash); + SH_TAILQ_REMOVE(&hp->hash_bucket, mfp, q, __mpoolfile); + if (!hp_locked) + MUTEX_UNLOCK(env, hp->mtx_hash); + + /* Lock the region and collect stats and free the space. */ + MPOOL_SYSTEM_LOCK(env); + if (need_sync && + (t_ret = __memp_mf_sync(dbmp, mfp, 0)) != 0 && ret == 0) + ret = t_ret; + +#ifdef HAVE_STATISTICS + /* Copy the statistics into the region. */ + sp = &mp->stat; + sp->st_cache_hit += mfp->stat.st_cache_hit; + sp->st_cache_miss += mfp->stat.st_cache_miss; + sp->st_map += mfp->stat.st_map; + sp->st_page_create += mfp->stat.st_page_create; + sp->st_page_in += mfp->stat.st_page_in; + sp->st_page_out += mfp->stat.st_page_out; +#endif + + /* Free the space. */ + if (mfp->path_off != 0) + __memp_free(&dbmp->reginfo[0], + R_ADDR(dbmp->reginfo, mfp->path_off)); + if (mfp->fileid_off != 0) + __memp_free(&dbmp->reginfo[0], + R_ADDR(dbmp->reginfo, mfp->fileid_off)); + if (mfp->pgcookie_off != 0) + __memp_free(&dbmp->reginfo[0], + R_ADDR(dbmp->reginfo, mfp->pgcookie_off)); + __memp_free(&dbmp->reginfo[0], mfp); + + MPOOL_SYSTEM_UNLOCK(env); + + return (ret); +} + +/* + * __memp_inmemlist -- + * Return a list of the named in-memory databases. + * + * PUBLIC: int __memp_inmemlist __P((ENV *, char ***, int *)); + */ +int +__memp_inmemlist(env, namesp, cntp) + ENV *env; + char ***namesp; + int *cntp; +{ + DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp; + MPOOL *mp; + MPOOLFILE *mfp; + int arraysz, cnt, i, ret; + char **names; + + names = NULL; + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + hp = R_ADDR(dbmp->reginfo, mp->ftab); + + arraysz = cnt = 0; + for (i = 0; i < MPOOL_FILE_BUCKETS; i++, hp++) { + MUTEX_LOCK(env, hp->mtx_hash); + SH_TAILQ_FOREACH(mfp, &hp->hash_bucket, q, __mpoolfile) { + /* Skip dead files and temporary files. */ + if (mfp->deadfile || F_ISSET(mfp, MP_TEMP)) + continue; + + /* Skip entries that allow files. */ + if (!mfp->no_backing_file) + continue; + + /* We found one. */ + if (cnt >= arraysz) { + arraysz += 100; + if ((ret = __os_realloc(env, + (u_int)arraysz * sizeof(names[0]), + &names)) != 0) + goto nomem; + } + if ((ret = __os_strdup(env, + R_ADDR(dbmp->reginfo, mfp->path_off), + &names[cnt])) != 0) + goto nomem; + + cnt++; + } + MUTEX_UNLOCK(env, hp->mtx_hash); + } + *namesp = names; + *cntp = cnt; + return (0); + +nomem: MUTEX_UNLOCK(env, hp->mtx_hash); + if (names != NULL) { + while (--cnt >= 0) + __os_free(env, names[cnt]); + __os_free(env, names); + } + + /* Make sure we don't return any garbage. */ + *cntp = 0; + *namesp = NULL; + return (ret); +} diff --git a/src/mp/mp_fput.c b/src/mp/mp_fput.c new file mode 100644 index 00000000..03ed2c3a --- /dev/null +++ b/src/mp/mp_fput.c @@ -0,0 +1,374 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" + +static int __memp_reset_lru __P((ENV *, REGINFO *)); + +/* + * __memp_fput_pp -- + * DB_MPOOLFILE->put pre/post processing. + * + * PUBLIC: int __memp_fput_pp + * PUBLIC: __P((DB_MPOOLFILE *, void *, DB_CACHE_PRIORITY, u_int32_t)); + */ +int +__memp_fput_pp(dbmfp, pgaddr, priority, flags) + DB_MPOOLFILE *dbmfp; + void *pgaddr; + DB_CACHE_PRIORITY priority; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret, t_ret; + + env = dbmfp->env; + + if (flags != 0) + return (__db_ferr(env, "DB_MPOOLFILE->put", 0)); + + MPF_ILLEGAL_BEFORE_OPEN(dbmfp, "DB_MPOOLFILE->put"); + + ENV_ENTER(env, ip); + + ret = __memp_fput(dbmfp, ip, pgaddr, priority); + if (IS_ENV_REPLICATED(env) && + (t_ret = __op_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __memp_fput -- + * DB_MPOOLFILE->put. + * + * PUBLIC: int __memp_fput __P((DB_MPOOLFILE *, + * PUBLIC: DB_THREAD_INFO *, void *, DB_CACHE_PRIORITY)); + */ +int +__memp_fput(dbmfp, ip, pgaddr, priority) + DB_MPOOLFILE *dbmfp; + DB_THREAD_INFO *ip; + void *pgaddr; + DB_CACHE_PRIORITY priority; +{ + BH *bhp; + DB_ENV *dbenv; + DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp; + ENV *env; + MPOOL *c_mp; + MPOOLFILE *mfp; + PIN_LIST *list, *lp; + REGINFO *infop, *reginfo; + roff_t b_ref; + int region; + int adjust, pfactor, ret, t_ret; + char buf[DB_THREADID_STRLEN]; + + env = dbmfp->env; + dbenv = env->dbenv; + dbmp = env->mp_handle; + mfp = dbmfp->mfp; + bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf)); + ret = 0; + + /* + * If this is marked dummy, we are using it to unpin a buffer for + * another thread. + */ + if (F_ISSET(dbmfp, MP_DUMMY)) + goto unpin; + + /* + * If we're mapping the file, there's nothing to do. Because we can + * stop mapping the file at any time, we have to check on each buffer + * to see if the address we gave the application was part of the map + * region. + */ + if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr && + (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len) + return (0); + + DB_ASSERT(env, IS_RECOVERING(env) || bhp->pgno <= mfp->last_pgno || + F_ISSET(bhp, BH_FREED) || !SH_CHAIN_SINGLETON(bhp, vc)); +#ifdef DIAGNOSTIC + /* + * Decrement the per-file pinned buffer count (mapped pages aren't + * counted). + */ + MPOOL_SYSTEM_LOCK(env); + if (dbmfp->pinref == 0) { + MPOOL_SYSTEM_UNLOCK(env); + __db_errx(env, DB_STR_A("3011", + "%s: more pages returned than retrieved", "%s"), + __memp_fn(dbmfp)); + return (__env_panic(env, EACCES)); + } + --dbmfp->pinref; + MPOOL_SYSTEM_UNLOCK(env); +#endif + +unpin: + infop = &dbmp->reginfo[bhp->region]; + c_mp = infop->primary; + hp = R_ADDR(infop, c_mp->htab); + hp = &hp[bhp->bucket]; + + /* + * Check for a reference count going to zero. This can happen if the + * application returns a page twice. + */ + if (atomic_read(&bhp->ref) == 0) { + __db_errx(env, DB_STR_A("3012", + "%s: page %lu: unpinned page returned", "%s %lu"), + __memp_fn(dbmfp), (u_long)bhp->pgno); + DB_ASSERT(env, atomic_read(&bhp->ref) != 0); + return (__env_panic(env, EACCES)); + } + + /* Note the activity so allocation won't decide to quit. */ + ++c_mp->put_counter; + + if (ip != NULL) { + reginfo = env->reginfo; + list = R_ADDR(reginfo, ip->dbth_pinlist); + region = (int)(infop - dbmp->reginfo); + b_ref = R_OFFSET(infop, bhp); + for (lp = list; lp < &list[ip->dbth_pinmax]; lp++) + if (lp->b_ref == b_ref && lp->region == region) + break; + + if (lp == &list[ip->dbth_pinmax]) { + __db_errx(env, DB_STR_A("3013", + "__memp_fput: pinned buffer not found for thread %s", + "%s"), dbenv->thread_id_string(dbenv, + ip->dbth_pid, ip->dbth_tid, buf)); + return (__env_panic(env, EINVAL)); + } + + lp->b_ref = INVALID_ROFF; + ip->dbth_pincount--; + } + + /* + * Mark the file dirty. + */ + if (F_ISSET(bhp, BH_EXCLUSIVE) && F_ISSET(bhp, BH_DIRTY)) { + DB_ASSERT(env, atomic_read(&hp->hash_page_dirty) > 0); + mfp->file_written = 1; + } + + /* + * If more than one reference to the page we're done. Ignore the + * discard flags (for now) and leave the buffer's priority alone. + * We are doing this a little early as the remaining ref may or + * may not be a write behind. If it is we set the priority + * here, if not it will get set again later. We might race + * and miss setting the priority which would leave it wrong + * for a while. + */ + DB_ASSERT(env, atomic_read(&bhp->ref) != 0); + if (atomic_dec(env, &bhp->ref) > 1 || (atomic_read(&bhp->ref) == 1 && + !F_ISSET(bhp, BH_DIRTY))) { + /* + * __memp_pgwrite only has a shared lock while it clears + * the BH_DIRTY bit. If we only have a shared latch then + * we can't touch the flags bits. + */ + if (F_ISSET(bhp, BH_EXCLUSIVE)) + F_CLR(bhp, BH_EXCLUSIVE); + MUTEX_UNLOCK(env, bhp->mtx_buf); + return (0); + } + + /* The buffer should not be accessed again. */ +#ifdef DIAG_MVCC + MUTEX_LOCK(env, hp->mtx_hash); + if (BH_REFCOUNT(bhp) == 0) + MVCC_MPROTECT(bhp->buf, mfp->pagesize, 0); + MUTEX_UNLOCK(env, hp->mtx_hash); +#endif + + /* Update priority values. */ + if (priority == DB_PRIORITY_VERY_LOW || + mfp->priority == MPOOL_PRI_VERY_LOW) + bhp->priority = 0; + else { + /* + * We don't lock the LRU priority or the pages field, if + * we get garbage (which won't happen on a 32-bit machine), it + * only means a buffer has the wrong priority. + */ + bhp->priority = c_mp->lru_priority; + + switch (priority) { + default: + case DB_PRIORITY_UNCHANGED: + pfactor = mfp->priority; + break; + case DB_PRIORITY_VERY_LOW: + pfactor = MPOOL_PRI_VERY_LOW; + break; + case DB_PRIORITY_LOW: + pfactor = MPOOL_PRI_LOW; + break; + case DB_PRIORITY_DEFAULT: + pfactor = MPOOL_PRI_DEFAULT; + break; + case DB_PRIORITY_HIGH: + pfactor = MPOOL_PRI_HIGH; + break; + case DB_PRIORITY_VERY_HIGH: + pfactor = MPOOL_PRI_VERY_HIGH; + break; + } + + adjust = 0; + if (pfactor != 0) + adjust = (int)c_mp->pages / pfactor; + + if (F_ISSET(bhp, BH_DIRTY)) + adjust += (int)c_mp->pages / MPOOL_PRI_DIRTY; + + if (adjust > 0) { + if (MPOOL_LRU_REDZONE - bhp->priority >= + (u_int32_t)adjust) + bhp->priority += adjust; + } else if (adjust < 0) + if (bhp->priority > (u_int32_t)-adjust) + bhp->priority += adjust; + } + + /* + * __memp_pgwrite only has a shared lock while it clears the + * BH_DIRTY bit. If we only have a shared latch then we can't + * touch the flags bits. + */ + if (F_ISSET(bhp, BH_EXCLUSIVE)) + F_CLR(bhp, BH_EXCLUSIVE); + MUTEX_UNLOCK(env, bhp->mtx_buf); + + /* + * On every buffer put we update the cache lru priority and check + * for wraparound. The increment doesn't need to be atomic: occasional + * lost increments are okay; __memp_reset_lru handles race conditions. + */ + if (++c_mp->lru_priority >= MPOOL_LRU_REDZONE && + (t_ret = __memp_reset_lru(env, infop)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __memp_reset_lru -- + * Reset the cache LRU priority when it reaches the upper limit. + */ +static int +__memp_reset_lru(env, infop) + ENV *env; + REGINFO *infop; +{ + BH *bhp, *tbhp; + DB_MPOOL_HASH *hp; + MPOOL *c_mp; + u_int32_t bucket; + int reset; + + /* + * Update the priority so all future allocations will start at the + * bottom. Lock this cache region to ensure that exactly one thread + * will reset this cache's buffers. + */ + c_mp = infop->primary; + MPOOL_REGION_LOCK(env, infop); + reset = c_mp->lru_priority >= MPOOL_LRU_DECREMENT; + if (reset) { + c_mp->lru_priority -= MPOOL_LRU_DECREMENT; + c_mp->lru_generation++; + } + MPOOL_REGION_UNLOCK(env, infop); + + if (!reset) + return (0); + + /* Reduce the priority of every buffer in this cache region. */ + for (hp = R_ADDR(infop, c_mp->htab), + bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) { + /* + * Skip empty buckets. + * + * We can check for empty buckets before locking as we + * only care if the pointer is zero or non-zero. + */ + if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL) + continue; + + MUTEX_LOCK(env, hp->mtx_hash); + SH_TAILQ_FOREACH(bhp, &hp->hash_bucket, hq, __bh) { + for (tbhp = bhp; tbhp != NULL; + tbhp = SH_CHAIN_PREV(tbhp, vc, __bh)) { + if (tbhp->priority > MPOOL_LRU_DECREMENT) + tbhp->priority -= MPOOL_LRU_DECREMENT; + else + tbhp->priority = 0; + } + } + MUTEX_UNLOCK(env, hp->mtx_hash); + } + + COMPQUIET(env, NULL); + return (0); +} + +/* + * __memp_unpin_buffers -- + * Unpin buffers pinned by a thread. + * + * PUBLIC: int __memp_unpin_buffers __P((ENV *, DB_THREAD_INFO *)); + */ +int +__memp_unpin_buffers(env, ip) + ENV *env; + DB_THREAD_INFO *ip; +{ + BH *bhp; + DB_MPOOL *dbmp; + DB_MPOOLFILE dbmf; + PIN_LIST *list, *lp; + REGINFO *rinfop, *reginfo; + int ret; + + memset(&dbmf, 0, sizeof(dbmf)); + dbmf.env = env; + dbmf.flags = MP_DUMMY; + dbmp = env->mp_handle; + reginfo = env->reginfo; + + list = R_ADDR(reginfo, ip->dbth_pinlist); + for (lp = list; lp < &list[ip->dbth_pinmax]; lp++) { + if (lp->b_ref == INVALID_ROFF) + continue; + rinfop = &dbmp->reginfo[lp->region]; + bhp = R_ADDR(rinfop, lp->b_ref); + dbmf.mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); + if ((ret = __memp_fput(&dbmf, ip, + (u_int8_t *)bhp + SSZA(BH, buf), + DB_PRIORITY_UNCHANGED)) != 0) + return (ret); + } + return (0); +} diff --git a/src/mp/mp_fset.c b/src/mp/mp_fset.c new file mode 100644 index 00000000..3482dafa --- /dev/null +++ b/src/mp/mp_fset.c @@ -0,0 +1,170 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +/* + * __memp_dirty -- + * Upgrade a page from a read-only to a writable pointer. + * + * PUBLIC: int __memp_dirty __P((DB_MPOOLFILE *, void *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DB_CACHE_PRIORITY, u_int32_t)); + */ +int +__memp_dirty(dbmfp, addrp, ip, txn, priority, flags) + DB_MPOOLFILE *dbmfp; + void *addrp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DB_CACHE_PRIORITY priority; + u_int32_t flags; +{ + BH *bhp; + DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp; + DB_TXN *ancestor; + ENV *env; + MPOOL *c_mp; +#ifdef DIAG_MVCC + MPOOLFILE *mfp; +#endif + REGINFO *infop; + int mvcc, ret; + db_pgno_t pgno; + void *pgaddr; + + env = dbmfp->env; + dbmp = env->mp_handle; + mvcc = atomic_read(&dbmfp->mfp->multiversion); + + /* Convert the page address to a buffer header. */ + pgaddr = *(void **)addrp; + bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf)); + pgno = bhp->pgno; + + /* If we have it exclusively then its already dirty. */ + if (F_ISSET(bhp, BH_EXCLUSIVE)) { + DB_ASSERT(env, F_ISSET(bhp, BH_DIRTY)); + return (0); + } + + if (flags == 0) + flags = DB_MPOOL_DIRTY; + DB_ASSERT(env, flags == DB_MPOOL_DIRTY || flags == DB_MPOOL_EDIT); + + if (F_ISSET(dbmfp, MP_READONLY)) { + __db_errx(env, DB_STR_A("3008", + "%s: dirty flag set for readonly file page", "%s"), + __memp_fn(dbmfp)); + return (EACCES); + } + + for (ancestor = txn; + ancestor != NULL && ancestor->parent != NULL; + ancestor = ancestor->parent) + ; + + if (mvcc && txn != NULL && flags == DB_MPOOL_DIRTY && + (!BH_OWNED_BY(env, bhp, ancestor) || SH_CHAIN_HASNEXT(bhp, vc))) { + atomic_inc(env, &bhp->ref); + *(void **)addrp = NULL; + if ((ret = __memp_fput(dbmfp, ip, pgaddr, priority)) != 0) { + __db_errx(env, DB_STR_A("3009", + "%s: error releasing a read-only page", "%s"), + __memp_fn(dbmfp)); + atomic_dec(env, &bhp->ref); + return (ret); + } + if ((ret = __memp_fget(dbmfp, + &pgno, ip, txn, flags, addrp)) != 0) { + if (ret != DB_LOCK_DEADLOCK) + __db_errx(env, DB_STR_A("3010", + "%s: error getting a page for writing", + "%s"), __memp_fn(dbmfp)); + atomic_dec(env, &bhp->ref); + return (ret); + } + atomic_dec(env, &bhp->ref); + + /* + * If the MVCC handle count hasn't changed, we should get a + * different version of the page. + */ + DB_ASSERT(env, *(void **)addrp != pgaddr || + mvcc != atomic_read(&dbmfp->mfp->multiversion)); + + pgaddr = *(void **)addrp; + bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf)); + DB_ASSERT(env, pgno == bhp->pgno); + return (0); + } + + infop = &dbmp->reginfo[bhp->region]; + c_mp = infop->primary; + hp = R_ADDR(infop, c_mp->htab); + hp = &hp[bhp->bucket]; + + /* Drop the shared latch and get an exclusive. We have the buf ref'ed.*/ + MUTEX_UNLOCK(env, bhp->mtx_buf); + MUTEX_LOCK(env, bhp->mtx_buf); + DB_ASSERT(env, !F_ISSET(bhp, BH_EXCLUSIVE)); + F_SET(bhp, BH_EXCLUSIVE); + + /* Set/clear the page bits. */ + if (!F_ISSET(bhp, BH_DIRTY)) { +#ifdef DIAGNOSTIC + MUTEX_LOCK(env, hp->mtx_hash); +#endif + atomic_inc(env, &hp->hash_page_dirty); + F_SET(bhp, BH_DIRTY); +#ifdef DIAGNOSTIC + MUTEX_UNLOCK(env, hp->mtx_hash); +#endif + } + +#ifdef DIAG_MVCC + mfp = R_ADDR(env->mp_handle->reginfo, bhp->mf_offset); + MVCC_MPROTECT(bhp->buf, mfp->pagesize, PROT_READ | PROT_WRITE); +#endif + DB_ASSERT(env, !F_ISSET(bhp, BH_DIRTY) || + atomic_read(&hp->hash_page_dirty) != 0); + return (0); +} + +/* + * __memp_shared -- + * Downgrade a page from exlusively held to shared. + * + * PUBLIC: int __memp_shared __P((DB_MPOOLFILE *, void *)); + */ +int +__memp_shared(dbmfp, pgaddr) + DB_MPOOLFILE *dbmfp; + void *pgaddr; +{ + BH *bhp; + ENV *env; + + env = dbmfp->env; + /* Convert the page address to a buffer header. */ + bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf)); + + if (F_ISSET(bhp, BH_DIRTY)) + dbmfp->mfp->file_written = 1; + DB_ASSERT(env, F_ISSET(bhp, BH_EXCLUSIVE)); + F_CLR(bhp, BH_EXCLUSIVE); + MUTEX_UNLOCK(env, bhp->mtx_buf); + MUTEX_READLOCK(env, bhp->mtx_buf); + + return (0); +} diff --git a/src/mp/mp_method.c b/src/mp/mp_method.c new file mode 100644 index 00000000..7d6a55d5 --- /dev/null +++ b/src/mp/mp_method.c @@ -0,0 +1,1091 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/mp.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" + +/* + * __memp_env_create -- + * Mpool specific creation of the DB_ENV structure. + * + * PUBLIC: int __memp_env_create __P((DB_ENV *)); + */ +int +__memp_env_create(dbenv) + DB_ENV *dbenv; +{ + /* + * !!! + * Our caller has not yet had the opportunity to reset the panic + * state or turn off mutex locking, and so we can neither check + * the panic state or acquire a mutex in the DB_ENV create path. + * + * We default to 32 8K pages. We don't default to a flat 256K, because + * we want to include the size of the buffer header which can vary + * from system to system. + */ + dbenv->mp_bytes = + 32 * ((8 * 1024) + sizeof(BH)) + 37 * sizeof(DB_MPOOL_HASH); + dbenv->mp_ncache = 1; + + return (0); +} + +/* + * __memp_env_destroy -- + * Mpool specific destruction of the DB_ENV structure. + * + * PUBLIC: void __memp_env_destroy __P((DB_ENV *)); + */ +void +__memp_env_destroy(dbenv) + DB_ENV *dbenv; +{ + COMPQUIET(dbenv, NULL); +} + +/* + * __memp_get_cachesize -- + * {DB_ENV,DB}->get_cachesize. + * + * PUBLIC: int __memp_get_cachesize + * PUBLIC: __P((DB_ENV *, u_int32_t *, u_int32_t *, int *)); + */ +int +__memp_get_cachesize(dbenv, gbytesp, bytesp, ncachep) + DB_ENV *dbenv; + u_int32_t *gbytesp, *bytesp; + int *ncachep; +{ + DB_MPOOL *dbmp; + ENV *env; + MPOOL *mp; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->get_cachesize", DB_INIT_MPOOL); + + if (MPOOL_ON(env)) { + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + if (gbytesp != NULL) + *gbytesp = mp->gbytes; + if (bytesp != NULL) + *bytesp = mp->bytes; + if (ncachep != NULL) + *ncachep = (int)mp->nreg; + } else { + if (gbytesp != NULL) + *gbytesp = dbenv->mp_gbytes; + if (bytesp != NULL) + *bytesp = dbenv->mp_bytes; + if (ncachep != NULL) + *ncachep = (int)dbenv->mp_ncache; + } + + return (0); +} + +/* + * __memp_set_cachesize -- + * {DB_ENV,DB}->set_cachesize. + * + * PUBLIC: int __memp_set_cachesize __P((DB_ENV *, u_int32_t, u_int32_t, int)); + */ +int +__memp_set_cachesize(dbenv, gbytes, bytes, arg_ncache) + DB_ENV *dbenv; + u_int32_t gbytes, bytes; + int arg_ncache; +{ + ENV *env; + DB_THREAD_INFO *ip; + u_int ncache; + int ret; + + env = dbenv->env; + ret = 0; + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->set_cachesize", DB_INIT_MPOOL); + + /* Normalize the cache count. */ + ncache = arg_ncache <= 0 ? 1 : (u_int)arg_ncache; + + /* + * You can only store 4GB-1 in an unsigned 32-bit value, so correct for + * applications that specify 4GB cache sizes -- we know what they meant. + */ + if (sizeof(roff_t) == 4 && gbytes / ncache == 4 && bytes == 0) { + --gbytes; + bytes = GIGABYTE - 1; + } else { + gbytes += bytes / GIGABYTE; + bytes %= GIGABYTE; + } + + /* + * !!! + * With 32-bit region offsets, individual cache regions must be smaller + * than 4GB. Also, cache sizes larger than 10TB would cause 32-bit + * wrapping in the calculation of the number of hash buckets. See + * __memp_open for details. + */ + if (!F_ISSET(env, ENV_OPEN_CALLED)) { + if (sizeof(roff_t) <= 4 && gbytes / ncache >= 4) { + __db_errx(env, DB_STR("3003", + "individual cache size too large: maximum is 4GB")); + return (EINVAL); + } + if (gbytes / ncache > 10000) { + __db_errx(env, DB_STR("3004", + "individual cache size too large: maximum is 10TB")); + return (EINVAL); + } + } + + /* + * If the application requested less than 500Mb, increase the cachesize + * by 25% and factor in the size of the hash buckets to account for our + * overhead. (I'm guessing caches over 500Mb are specifically sized, + * that is, it's a large server and the application actually knows how + * much memory is available. We only document the 25% overhead number, + * not the hash buckets, but I don't see a reason to confuse the issue, + * it shouldn't matter to an application.) + * + * There is a minimum cache size, regardless. + */ + if (gbytes == 0) { + if (bytes < 500 * MEGABYTE) + bytes += (bytes / 4) + 37 * sizeof(DB_MPOOL_HASH); + if (bytes / ncache < DB_CACHESIZE_MIN) + bytes = ncache * DB_CACHESIZE_MIN; + } + + if (F_ISSET(env, ENV_OPEN_CALLED)) { + ENV_ENTER(env, ip); + ret = __memp_resize(env->mp_handle, gbytes, bytes); + ENV_LEAVE(env, ip); + return ret; + } + + dbenv->mp_gbytes = gbytes; + dbenv->mp_bytes = bytes; + dbenv->mp_ncache = ncache; + + return (0); +} + +/* + * __memp_set_config -- + * Set the cache subsystem configuration. + * + * PUBLIC: int __memp_set_config __P((DB_ENV *, u_int32_t, int)); + */ +int +__memp_set_config(dbenv, which, on) + DB_ENV *dbenv; + u_int32_t which; + int on; +{ + DB_MPOOL *dbmp; + ENV *env; + MPOOL *mp; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->memp_set_config", DB_INIT_MPOOL); + + switch (which) { + case DB_MEMP_SUPPRESS_WRITE: + case DB_MEMP_SYNC_INTERRUPT: + if (MPOOL_ON(env)) { + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + if (on) + FLD_SET(mp->config_flags, which); + else + FLD_CLR(mp->config_flags, which); + } + break; + default: + return (EINVAL); + } + return (0); +} + +/* + * __memp_get_config -- + * Return the cache subsystem configuration. + * + * PUBLIC: int __memp_get_config __P((DB_ENV *, u_int32_t, int *)); + */ +int +__memp_get_config(dbenv, which, onp) + DB_ENV *dbenv; + u_int32_t which; + int *onp; +{ + DB_MPOOL *dbmp; + ENV *env; + MPOOL *mp; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->mp_handle, "DB_ENV->memp_get_config", DB_INIT_MPOOL); + + switch (which) { + case DB_MEMP_SUPPRESS_WRITE: + case DB_MEMP_SYNC_INTERRUPT: + if (MPOOL_ON(env)) { + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + *onp = FLD_ISSET(mp->config_flags, which) ? 1 : 0; + } else + *onp = 0; + break; + default: + return (EINVAL); + } + return (0); +} + +/* + * PUBLIC: int __memp_get_mp_max_openfd __P((DB_ENV *, int *)); + */ +int +__memp_get_mp_max_openfd(dbenv, maxopenfdp) + DB_ENV *dbenv; + int *maxopenfdp; +{ + DB_MPOOL *dbmp; + DB_THREAD_INFO *ip; + ENV *env; + MPOOL *mp; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->get_mp_max_openfd", DB_INIT_MPOOL); + + if (MPOOL_ON(env)) { + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + ENV_ENTER(env, ip); + MPOOL_SYSTEM_LOCK(env); + *maxopenfdp = mp->mp_maxopenfd; + MPOOL_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + *maxopenfdp = dbenv->mp_maxopenfd; + return (0); +} + +/* + * __memp_set_mp_max_openfd -- + * Set the maximum number of open fd's when flushing the cache. + * PUBLIC: int __memp_set_mp_max_openfd __P((DB_ENV *, int)); + */ +int +__memp_set_mp_max_openfd(dbenv, maxopenfd) + DB_ENV *dbenv; + int maxopenfd; +{ + DB_MPOOL *dbmp; + DB_THREAD_INFO *ip; + ENV *env; + MPOOL *mp; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->set_mp_max_openfd", DB_INIT_MPOOL); + + if (MPOOL_ON(env)) { + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + ENV_ENTER(env, ip); + MPOOL_SYSTEM_LOCK(env); + mp->mp_maxopenfd = maxopenfd; + MPOOL_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + dbenv->mp_maxopenfd = maxopenfd; + return (0); +} + +/* + * PUBLIC: int __memp_get_mp_max_write __P((DB_ENV *, int *, db_timeout_t *)); + */ +int +__memp_get_mp_max_write(dbenv, maxwritep, maxwrite_sleepp) + DB_ENV *dbenv; + int *maxwritep; + db_timeout_t *maxwrite_sleepp; +{ + DB_MPOOL *dbmp; + DB_THREAD_INFO *ip; + ENV *env; + MPOOL *mp; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->get_mp_max_write", DB_INIT_MPOOL); + + if (MPOOL_ON(env)) { + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + ENV_ENTER(env, ip); + MPOOL_SYSTEM_LOCK(env); + *maxwritep = mp->mp_maxwrite; + *maxwrite_sleepp = mp->mp_maxwrite_sleep; + MPOOL_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else { + *maxwritep = dbenv->mp_maxwrite; + *maxwrite_sleepp = dbenv->mp_maxwrite_sleep; + } + return (0); +} + +/* + * __memp_set_mp_max_write -- + * Set the maximum continuous I/O count. + * + * PUBLIC: int __memp_set_mp_max_write __P((DB_ENV *, int, db_timeout_t)); + */ +int +__memp_set_mp_max_write(dbenv, maxwrite, maxwrite_sleep) + DB_ENV *dbenv; + int maxwrite; + db_timeout_t maxwrite_sleep; +{ + DB_MPOOL *dbmp; + DB_THREAD_INFO *ip; + ENV *env; + MPOOL *mp; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->get_mp_max_write", DB_INIT_MPOOL); + + if (MPOOL_ON(env)) { + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + ENV_ENTER(env, ip); + MPOOL_SYSTEM_LOCK(env); + mp->mp_maxwrite = maxwrite; + mp->mp_maxwrite_sleep = maxwrite_sleep; + MPOOL_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else { + dbenv->mp_maxwrite = maxwrite; + dbenv->mp_maxwrite_sleep = maxwrite_sleep; + } + return (0); +} + +/* + * PUBLIC: int __memp_get_mp_mmapsize __P((DB_ENV *, size_t *)); + */ +int +__memp_get_mp_mmapsize(dbenv, mp_mmapsizep) + DB_ENV *dbenv; + size_t *mp_mmapsizep; +{ + DB_MPOOL *dbmp; + DB_THREAD_INFO *ip; + ENV *env; + MPOOL *mp; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->get_mp_max_mmapsize", DB_INIT_MPOOL); + + if (MPOOL_ON(env)) { + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + ENV_ENTER(env, ip); + MPOOL_SYSTEM_LOCK(env); + *mp_mmapsizep = mp->mp_mmapsize; + MPOOL_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + *mp_mmapsizep = dbenv->mp_mmapsize; + return (0); +} + +/* + * __memp_set_mp_mmapsize -- + * DB_ENV->set_mp_mmapsize. + * + * PUBLIC: int __memp_set_mp_mmapsize __P((DB_ENV *, size_t)); + */ +int +__memp_set_mp_mmapsize(dbenv, mp_mmapsize) + DB_ENV *dbenv; + size_t mp_mmapsize; +{ + DB_MPOOL *dbmp; + DB_THREAD_INFO *ip; + ENV *env; + MPOOL *mp; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->set_mp_max_mmapsize", DB_INIT_MPOOL); + + if (MPOOL_ON(env)) { + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + ENV_ENTER(env, ip); + MPOOL_SYSTEM_LOCK(env); + /* + * We need to cast here because size_t and db_size_t can be + * different on a 64 bit build, when building in 32 bit + * compatibility mode. The cast is safe, because we check for + * overflow when the fields are assigned. + */ + mp->mp_mmapsize = (db_size_t)mp_mmapsize; + MPOOL_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else + dbenv->mp_mmapsize = (db_size_t)mp_mmapsize; + return (0); +} + +/* + * PUBLIC: int __memp_get_mp_pagesize __P((DB_ENV *, u_int32_t *)); + */ +int +__memp_get_mp_pagesize(dbenv, mp_pagesizep) + DB_ENV *dbenv; + u_int32_t *mp_pagesizep; +{ + DB_MPOOL *dbmp; + ENV *env; + MPOOL *mp; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->get_mp_max_pagesize", DB_INIT_MPOOL); + + if (MPOOL_ON(env)) { + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + *mp_pagesizep = mp->pagesize; + } else { + *mp_pagesizep = dbenv->mp_pagesize; + } + return (0); +} + +/* + * __memp_set_mp_pagesize -- + * DB_ENV->set_mp_pagesize. + * + * PUBLIC: int __memp_set_mp_pagesize __P((DB_ENV *, u_int32_t)); + */ +int +__memp_set_mp_pagesize(dbenv, mp_pagesize) + DB_ENV *dbenv; + u_int32_t mp_pagesize; +{ + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->get_mp_max_mmapsize", DB_INIT_MPOOL); + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_mp_pagesize"); + + dbenv->mp_pagesize = mp_pagesize; + return (0); +} + +/* + * PUBLIC: int __memp_get_mp_tablesize __P((DB_ENV *, u_int32_t *)); + */ +int +__memp_get_mp_tablesize(dbenv, mp_tablesizep) + DB_ENV *dbenv; + u_int32_t *mp_tablesizep; +{ + DB_MPOOL *dbmp; + ENV *env; + MPOOL *mp; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->get_mp_max_tablesize", DB_INIT_MPOOL); + + if (MPOOL_ON(env)) { + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + *mp_tablesizep = mp->htab_buckets; + } else + *mp_tablesizep = dbenv->mp_tablesize; + return (0); +} + +/* + * __memp_set_mp_tablesize -- + * DB_ENV->set_mp_tablesize. + * + * PUBLIC: int __memp_set_mp_tablesize __P((DB_ENV *, u_int32_t)); + */ +int +__memp_set_mp_tablesize(dbenv, mp_tablesize) + DB_ENV *dbenv; + u_int32_t mp_tablesize; +{ + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->get_mp_max_mmapsize", DB_INIT_MPOOL); + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_mp_tablesize"); + + dbenv->mp_tablesize = mp_tablesize; + return (0); +} + +/* + * PUBLIC: int __memp_get_mp_mtxcount __P((DB_ENV *, u_int32_t *)); + */ +int +__memp_get_mp_mtxcount(dbenv, mp_mtxcountp) + DB_ENV *dbenv; + u_int32_t *mp_mtxcountp; +{ + DB_MPOOL *dbmp; + ENV *env; + MPOOL *mp; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->get_mp_max_mtxcount", DB_INIT_MPOOL); + + if (MPOOL_ON(env)) { + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + *mp_mtxcountp = mp->htab_mutexes; + } else + *mp_mtxcountp = dbenv->mp_mtxcount; + return (0); +} + +/* + * __memp_set_mp_mtxcount -- + * DB_ENV->set_mp_mtxcount. + * + * PUBLIC: int __memp_set_mp_mtxcount __P((DB_ENV *, u_int32_t)); + */ +int +__memp_set_mp_mtxcount(dbenv, mp_mtxcount) + DB_ENV *dbenv; + u_int32_t mp_mtxcount; +{ + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->get_mp_max_mmapsize", DB_INIT_MPOOL); + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_mp_mtxcount"); + + dbenv->mp_mtxcount = mp_mtxcount; + return (0); +} + +/* + * __memp_nameop + * Remove or rename a file in the pool. + * + * PUBLIC: int __memp_nameop __P((ENV *, + * PUBLIC: u_int8_t *, const char *, const char *, const char *, int)); + * + * XXX + * Undocumented interface: DB private. + */ +int +__memp_nameop(env, fileid, newname, fullold, fullnew, inmem) + ENV *env; + u_int8_t *fileid; + const char *newname, *fullold, *fullnew; + int inmem; +{ + DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp, *nhp; + MPOOL *mp; + MPOOLFILE *mfp; + roff_t newname_off; + u_int32_t bucket; + int locked, ret; + size_t nlen; + void *p; + +#undef op_is_remove +#define op_is_remove (newname == NULL) + + COMPQUIET(bucket, 0); + COMPQUIET(hp, NULL); + COMPQUIET(newname_off, 0); + COMPQUIET(nlen, 0); + + dbmp = NULL; + mfp = NULL; + nhp = NULL; + p = NULL; + locked = ret = 0; + + if (!MPOOL_ON(env)) + goto fsop; + + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + hp = R_ADDR(dbmp->reginfo, mp->ftab); + + if (!op_is_remove) { + nlen = strlen(newname); + if ((ret = __memp_alloc(dbmp, dbmp->reginfo, + NULL, nlen + 1, &newname_off, &p)) != 0) + return (ret); + memcpy(p, newname, nlen + 1); + } + + /* + * Remove or rename a file that the mpool might know about. We assume + * that the fop layer has the file locked for exclusive access, so we + * don't worry about locking except for the mpool mutexes. Checkpoint + * can happen at any time, independent of file locking, so we have to + * do the actual unlink or rename system call while holding + * all affected buckets locked. + * + * If this is a rename and this is a memory file then we need + * to make sure that the new name does not exist. Since we + * are locking two buckets lock them in ascending order. + */ + if (inmem) { + DB_ASSERT(env, fullold != NULL); + hp += FNBUCKET(fullold, strlen(fullold)); + if (!op_is_remove) { + bucket = FNBUCKET(newname, nlen); + nhp = R_ADDR(dbmp->reginfo, mp->ftab); + nhp += bucket; + } + } else + hp += FNBUCKET(fileid, DB_FILE_ID_LEN); + + if (nhp != NULL && nhp < hp) + MUTEX_LOCK(env, nhp->mtx_hash); + MUTEX_LOCK(env, hp->mtx_hash); + if (nhp != NULL && nhp > hp) + MUTEX_LOCK(env, nhp->mtx_hash); + locked = 1; + + if (!op_is_remove && inmem) { + SH_TAILQ_FOREACH(mfp, &nhp->hash_bucket, q, __mpoolfile) + if (!mfp->deadfile && + mfp->no_backing_file && strcmp(newname, + R_ADDR(dbmp->reginfo, mfp->path_off)) == 0) + break; + if (mfp != NULL) { + ret = EEXIST; + goto err; + } + } + + /* + * Find the file -- if mpool doesn't know about this file, that may + * not be an error. + */ + SH_TAILQ_FOREACH(mfp, &hp->hash_bucket, q, __mpoolfile) { + /* Ignore non-active files. */ + if (mfp->deadfile || F_ISSET(mfp, MP_TEMP)) + continue; + + /* Try to match on fileid. */ + if (memcmp(fileid, R_ADDR( + dbmp->reginfo, mfp->fileid_off), DB_FILE_ID_LEN) != 0) + continue; + + break; + } + + if (mfp == NULL) { + if (inmem) { + ret = ENOENT; + goto err; + } + goto fsop; + } + + if (op_is_remove) { + MUTEX_LOCK(env, mfp->mutex); + /* + * In-memory dbs have an artificially incremented ref count so + * they do not get reclaimed as long as they exist. Since we + * are now deleting the database, we need to dec that count. + */ + if (mfp->no_backing_file) + mfp->mpf_cnt--; + mfp->deadfile = 1; + MUTEX_UNLOCK(env, mfp->mutex); + } else { + /* + * Else, it's a rename. We've allocated memory for the new + * name. Swap it with the old one. If it's in memory we + * need to move it the right bucket. + */ + p = R_ADDR(dbmp->reginfo, mfp->path_off); + mfp->path_off = newname_off; + + if (inmem && hp != nhp) { + DB_ASSERT(env, nhp != NULL); + SH_TAILQ_REMOVE(&hp->hash_bucket, mfp, q, __mpoolfile); + mfp->bucket = bucket; + SH_TAILQ_INSERT_TAIL(&nhp->hash_bucket, mfp, q); + } + } + +fsop: /* + * If this is a real file, then mfp could be NULL, because + * mpool isn't turned on, and we still need to do the file ops. + */ + if (mfp == NULL || !mfp->no_backing_file) { + if (op_is_remove) { + /* + * !!! + * Replication may ask us to unlink a file that's been + * renamed. Don't complain if it doesn't exist. + */ + if ((ret = __os_unlink(env, fullold, 0)) == ENOENT) + ret = 0; + } else { + /* + * Defensive only, fullnew should never be + * NULL. + */ + DB_ASSERT(env, fullnew != NULL); + if (fullnew == NULL) { + ret = EINVAL; + goto err; + } + ret = __os_rename(env, fullold, fullnew, 1); + } + } + + /* Delete the memory we no longer need. */ +err: if (p != NULL) { + MPOOL_REGION_LOCK(env, &dbmp->reginfo[0]); + __memp_free(&dbmp->reginfo[0], p); + MPOOL_REGION_UNLOCK(env, &dbmp->reginfo[0]); + } + + /* If we have buckets locked, unlock them when done moving files. */ + if (locked == 1) { + MUTEX_UNLOCK(env, hp->mtx_hash); + if (nhp != NULL && nhp != hp) + MUTEX_UNLOCK(env, nhp->mtx_hash); + } + return (ret); +} + +/* + * __memp_ftruncate __ + * Truncate the file. + * + * PUBLIC: int __memp_ftruncate __P((DB_MPOOLFILE *, DB_TXN *, + * PUBLIC: DB_THREAD_INFO *, db_pgno_t, u_int32_t)); + */ +int +__memp_ftruncate(dbmfp, txn, ip, pgno, flags) + DB_MPOOLFILE *dbmfp; + DB_TXN *txn; + DB_THREAD_INFO *ip; + db_pgno_t pgno; + u_int32_t flags; +{ + ENV *env; + MPOOLFILE *mfp; + void *pagep; + db_pgno_t last_pgno, pg; + int ret; + + env = dbmfp->env; + mfp = dbmfp->mfp; + ret = 0; + + MUTEX_LOCK(env, mfp->mutex); + last_pgno = mfp->last_pgno; + MUTEX_UNLOCK(env, mfp->mutex); + + if (pgno > last_pgno) { + if (LF_ISSET(MP_TRUNC_RECOVER)) + return (0); + __db_errx(env, DB_STR("3005", + "Truncate beyond the end of file")); + return (EINVAL); + } + + pg = pgno; + if (!LF_ISSET(MP_TRUNC_NOCACHE)) + do { + if (mfp->block_cnt == 0) + break; + if ((ret = __memp_fget(dbmfp, &pg, + ip, txn, DB_MPOOL_FREE, &pagep)) != 0) + return (ret); + } while (pg++ < last_pgno); + + /* + * If we are aborting an extend of a file, the call to __os_truncate + * could extend the file if the new page(s) had not yet been + * written to disk. We do not want to extend the file to pages + * whose log records are not yet flushed [#14031]. In addition if + * we are out of disk space we can generate an error [#12743]. + */ + MUTEX_LOCK(env, mfp->mutex); + if (!F_ISSET(mfp, MP_TEMP) && + !mfp->no_backing_file && pgno <= mfp->last_flushed_pgno) +#ifdef HAVE_FTRUNCATE + ret = __os_truncate(env, + dbmfp->fhp, pgno, mfp->pagesize); +#else + ret = __db_zero_extend(env, + dbmfp->fhp, pgno, mfp->last_pgno, mfp->pagesize); +#endif + + /* + * This set could race with another thread of control that extending + * the file. It's not a problem because we should have the page + * locked at a higher level of the system. + */ + if (ret == 0) { + mfp->last_pgno = pgno - 1; + if (mfp->last_flushed_pgno > mfp->last_pgno) + mfp->last_flushed_pgno = mfp->last_pgno; + } + MUTEX_UNLOCK(env, mfp->mutex); + + return (ret); +} + +#ifdef HAVE_FTRUNCATE +/* + * Support routines for maintaining a sorted freelist while we try to rearrange + * and truncate the file. + */ + +/* + * __memp_alloc_freelist -- + * Allocate mpool space for the freelist. + * + * PUBLIC: int __memp_alloc_freelist __P((DB_MPOOLFILE *, + * PUBLIC: u_int32_t, db_pgno_t **)); + */ +int +__memp_alloc_freelist(dbmfp, nelems, listp) + DB_MPOOLFILE *dbmfp; + u_int32_t nelems; + db_pgno_t **listp; +{ + DB_MPOOL *dbmp; + ENV *env; + MPOOLFILE *mfp; + void *retp; + int ret; + + env = dbmfp->env; + dbmp = env->mp_handle; + mfp = dbmfp->mfp; + + *listp = NULL; + + /* + * These fields are protected because the database layer + * has the metapage locked while manipulating them. + */ + mfp->free_ref++; + if (mfp->free_size != 0) + return (EBUSY); + + /* Allocate at least a few slots. */ + mfp->free_cnt = nelems; + if (nelems == 0) + nelems = 50; + + if ((ret = __memp_alloc(dbmp, dbmp->reginfo, + NULL, nelems * sizeof(db_pgno_t), &mfp->free_list, &retp)) != 0) + return (ret); + + mfp->free_size = nelems * sizeof(db_pgno_t); + *listp = retp; + return (0); +} + +/* + * __memp_free_freelist -- + * Free the list. + * + * PUBLIC: int __memp_free_freelist __P((DB_MPOOLFILE *)); + */ +int +__memp_free_freelist(dbmfp) + DB_MPOOLFILE *dbmfp; +{ + DB_MPOOL *dbmp; + ENV *env; + MPOOLFILE *mfp; + + env = dbmfp->env; + dbmp = env->mp_handle; + mfp = dbmfp->mfp; + + DB_ASSERT(env, mfp->free_ref > 0); + if (--mfp->free_ref > 0) + return (0); + + DB_ASSERT(env, mfp->free_size != 0); + + MPOOL_SYSTEM_LOCK(env); + __memp_free(dbmp->reginfo, R_ADDR(dbmp->reginfo, mfp->free_list)); + MPOOL_SYSTEM_UNLOCK(env); + + mfp->free_cnt = 0; + mfp->free_list = 0; + mfp->free_size = 0; + return (0); +} + +/* + * __memp_get_freelst -- + * Return current list. + * + * PUBLIC: int __memp_get_freelist __P(( + * PUBLIC: DB_MPOOLFILE *, u_int32_t *, db_pgno_t **)); + */ +int +__memp_get_freelist(dbmfp, nelemp, listp) + DB_MPOOLFILE *dbmfp; + u_int32_t *nelemp; + db_pgno_t **listp; +{ + DB_MPOOL *dbmp; + ENV *env; + MPOOLFILE *mfp; + + env = dbmfp->env; + dbmp = env->mp_handle; + mfp = dbmfp->mfp; + + if (mfp->free_size == 0) { + *nelemp = 0; + *listp = NULL; + } else { + *nelemp = mfp->free_cnt; + *listp = R_ADDR(dbmp->reginfo, mfp->free_list); + } + + return (0); +} + +/* + * __memp_extend_freelist -- + * Extend the list. + * + * PUBLIC: int __memp_extend_freelist __P(( + * PUBLIC: DB_MPOOLFILE *, u_int32_t , db_pgno_t **)); + */ +int +__memp_extend_freelist(dbmfp, count, listp) + DB_MPOOLFILE *dbmfp; + u_int32_t count; + db_pgno_t **listp; +{ + DB_MPOOL *dbmp; + ENV *env; + MPOOLFILE *mfp; + int ret; + size_t size; + void *retp; + + env = dbmfp->env; + dbmp = env->mp_handle; + mfp = dbmfp->mfp; + + if (mfp->free_size == 0) + return (EINVAL); + + if (count * sizeof(db_pgno_t) > mfp->free_size) { + size = (size_t)DB_ALIGN(count * sizeof(db_pgno_t), 512); +#ifdef HAVE_MIXED_SIZE_ADDRESSING + if (size >= 0xFFFFFFFF) { + __db_errx(env, DB_STR("3006", + "Can't get the required free size while" + "operating in mixed-size-addressing mode")); + return EINVAL; + } +#endif + *listp = R_ADDR(dbmp->reginfo, mfp->free_list); + if ((ret = __memp_alloc(dbmp, dbmp->reginfo, + NULL, size, &mfp->free_list, &retp)) != 0) + return (ret); + mfp->free_size = (db_size_t)size; + + memcpy(retp, *listp, mfp->free_cnt * sizeof(db_pgno_t)); + + MPOOL_SYSTEM_LOCK(env); + __memp_free(dbmp->reginfo, *listp); + MPOOL_SYSTEM_UNLOCK(env); + } + + mfp->free_cnt = count; + *listp = R_ADDR(dbmp->reginfo, mfp->free_list); + + return (0); +} +#endif + +/* + * __memp_set_last_pgno -- set the last page of the file + * + * PUBLIC: int __memp_set_last_pgno __P((DB_MPOOLFILE *, db_pgno_t)); + */ +int +__memp_set_last_pgno(dbmfp, pgno) + DB_MPOOLFILE *dbmfp; + db_pgno_t pgno; +{ + MPOOLFILE *mfp; + + mfp = dbmfp->mfp; + + if (mfp->mpf_cnt == 1) { + MUTEX_LOCK(dbmfp->env, mfp->mutex); + if (mfp->mpf_cnt == 1) + dbmfp->mfp->last_pgno = pgno; + MUTEX_UNLOCK(dbmfp->env, mfp->mutex); + } + return (0); +} diff --git a/src/mp/mp_mvcc.c b/src/mp/mp_mvcc.c new file mode 100644 index 00000000..1633ccc3 --- /dev/null +++ b/src/mp/mp_mvcc.c @@ -0,0 +1,636 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __pgno_cmp __P((const void *, const void *)); + +/* + * __memp_bh_settxn -- + * Set the transaction that owns the given buffer. + * + * PUBLIC: int __memp_bh_settxn __P((DB_MPOOL *, MPOOLFILE *mfp, BH *, void *)); + */ +int +__memp_bh_settxn(dbmp, mfp, bhp, vtd) + DB_MPOOL *dbmp; + MPOOLFILE *mfp; + BH *bhp; + void *vtd; +{ + ENV *env; + TXN_DETAIL *td; + + env = dbmp->env; + td = (TXN_DETAIL *)vtd; + + if (td == NULL) { + __db_errx(env, DB_STR_A("3002", + "%s: non-transactional update to a multiversion file", + "%s"), __memp_fns(dbmp, mfp)); + return (EINVAL); + } + + if (bhp->td_off != INVALID_ROFF) { + DB_ASSERT(env, BH_OWNER(env, bhp) == td); + return (0); + } + + bhp->td_off = R_OFFSET(&env->tx_handle->reginfo, td); + return (__txn_add_buffer(env, td)); +} + +/* + * __memp_skip_curadj -- + * Indicate whether a cursor adjustment can be skipped for a snapshot + * cursor. + * + * PUBLIC: int __memp_skip_curadj __P((DBC *, db_pgno_t)); + */ +int +__memp_skip_curadj(dbc, pgno) + DBC * dbc; + db_pgno_t pgno; +{ + BH *bhp; + DB_MPOOL *dbmp; + DB_MPOOLFILE *dbmfp; + DB_MPOOL_HASH *hp; + DB_TXN *txn; + ENV *env; + MPOOLFILE *mfp; + REGINFO *infop; + roff_t mf_offset; + int ret, skip; + u_int32_t bucket; + + env = dbc->env; + dbmp = env->mp_handle; + dbmfp = dbc->dbp->mpf; + mfp = dbmfp->mfp; + mf_offset = R_OFFSET(dbmp->reginfo, mfp); + skip = 0; + + for (txn = dbc->txn; txn->parent != NULL; txn = txn->parent) + ; + + /* + * Determine the cache and hash bucket where this page lives and get + * local pointers to them. Reset on each pass through this code, the + * page number can change. + */ + MP_GET_BUCKET(env, mfp, pgno, &infop, hp, bucket, ret); + if (ret != 0) { + /* Panic: there is no way to return the error. */ + (void)__env_panic(env, ret); + return (0); + } + + SH_TAILQ_FOREACH(bhp, &hp->hash_bucket, hq, __bh) { + if (bhp->pgno != pgno || bhp->mf_offset != mf_offset) + continue; + + if (!BH_OWNED_BY(env, bhp, txn)) + skip = 1; + break; + } + MUTEX_UNLOCK(env, hp->mtx_hash); + + return (skip); +} + +#define DB_FREEZER_MAGIC 0x06102002 + +/* + * __memp_bh_freeze -- + * Save a buffer to temporary storage in case it is needed later by + * a snapshot transaction. This function should be called with the buffer + * locked and will exit with it locked. A BH_FROZEN buffer header is + * allocated to represent the frozen data in mpool. + * + * PUBLIC: int __memp_bh_freeze __P((DB_MPOOL *, REGINFO *, DB_MPOOL_HASH *, + * PUBLIC: BH *, int *)); + */ +int +__memp_bh_freeze(dbmp, infop, hp, bhp, need_frozenp) + DB_MPOOL *dbmp; + REGINFO *infop; + DB_MPOOL_HASH *hp; + BH *bhp; + int *need_frozenp; +{ + BH *frozen_bhp; + BH_FROZEN_ALLOC *frozen_alloc; + DB_FH *fhp; + ENV *env; + MPOOL *c_mp; + MPOOLFILE *mfp; + db_mutex_t mutex; + db_pgno_t maxpgno, newpgno, nextfree; + size_t nio; + int created, h_locked, ret, t_ret; + u_int32_t magic, nbucket, ncache, pagesize; + char filename[100], *real_name; + + env = dbmp->env; + c_mp = infop->primary; + created = h_locked = ret = 0; + /* Find the associated MPOOLFILE. */ + mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); + pagesize = mfp->pagesize; + real_name = NULL; + fhp = NULL; + + MVCC_MPROTECT(bhp->buf, pagesize, PROT_READ | PROT_WRITE); + + MPOOL_REGION_LOCK(env, infop); + frozen_bhp = SH_TAILQ_FIRST(&c_mp->free_frozen, __bh); + if (frozen_bhp != NULL) { + SH_TAILQ_REMOVE(&c_mp->free_frozen, frozen_bhp, hq, __bh); + *need_frozenp = SH_TAILQ_EMPTY(&c_mp->free_frozen); + } else { + *need_frozenp = 1; + + /* There might be a small amount of unallocated space. */ + if (__env_alloc(infop, + sizeof(BH_FROZEN_ALLOC) + sizeof(BH_FROZEN_PAGE), + &frozen_alloc) == 0) { + frozen_bhp = (BH *)(frozen_alloc + 1); + frozen_bhp->mtx_buf = MUTEX_INVALID; + SH_TAILQ_INSERT_TAIL(&c_mp->alloc_frozen, + frozen_alloc, links); + } + } + MPOOL_REGION_UNLOCK(env, infop); + + /* + * If we can't get a frozen buffer header, return ENOMEM immediately: + * we don't want to call __memp_alloc recursively. __memp_alloc will + * turn the next free page it finds into frozen buffer headers. + */ + if (frozen_bhp == NULL) { + ret = ENOMEM; + goto err; + } + + /* + * For now, keep things simple and have one file per page size per + * hash bucket. This improves concurrency but can mean lots of files + * if there is lots of freezing. + */ + ncache = (u_int32_t)(infop - dbmp->reginfo); + nbucket = (u_int32_t)(hp - (DB_MPOOL_HASH *)R_ADDR(infop, c_mp->htab)); + snprintf(filename, sizeof(filename), "__db.freezer.%lu.%lu.%luK", + (u_long)ncache, (u_long)nbucket, (u_long)pagesize / 1024); + + if ((ret = __db_appname(env, + DB_APP_NONE, filename, NULL, &real_name)) != 0) + goto err; + + MUTEX_LOCK(env, hp->mtx_hash); + h_locked = 1; + DB_ASSERT(env, F_ISSET(bhp, BH_EXCLUSIVE) && !F_ISSET(bhp, BH_FROZEN)); + + if (BH_REFCOUNT(bhp) > 1 || F_ISSET(bhp, BH_DIRTY)) { + ret = EBUSY; + goto err; + } + + if ((ret = __os_open(env, real_name, pagesize, + DB_OSO_CREATE | DB_OSO_EXCL, env->db_mode, &fhp)) == 0) { + /* We're creating the file -- initialize the metadata page. */ + created = 1; + magic = DB_FREEZER_MAGIC; + maxpgno = newpgno = 0; + if ((ret = __os_write(env, fhp, + &magic, sizeof(u_int32_t), &nio)) != 0 || + (ret = __os_write(env, fhp, + &newpgno, sizeof(db_pgno_t), &nio)) != 0 || + (ret = __os_write(env, fhp, + &maxpgno, sizeof(db_pgno_t), &nio)) != 0 || + (ret = __os_seek(env, fhp, 0, 0, 0)) != 0) + goto err; + } else if (ret == EEXIST) + ret = __os_open(env, + real_name, pagesize, 0, env->db_mode, &fhp); + if (ret != 0) + goto err; + if ((ret = __os_read(env, fhp, + &magic, sizeof(u_int32_t), &nio)) != 0 || + (ret = __os_read(env, fhp, + &newpgno, sizeof(db_pgno_t), &nio)) != 0 || + (ret = __os_read(env, fhp, + &maxpgno, sizeof(db_pgno_t), &nio)) != 0) + goto err; + if (magic != DB_FREEZER_MAGIC) { + ret = EINVAL; + goto err; + } + if (newpgno == 0) { + newpgno = ++maxpgno; + if ((ret = __os_seek(env, + fhp, 0, 0, sizeof(u_int32_t) + sizeof(db_pgno_t))) != 0 || + (ret = __os_write(env, fhp, &maxpgno, sizeof(db_pgno_t), + &nio)) != 0) + goto err; + } else { + if ((ret = __os_seek(env, fhp, newpgno, pagesize, 0)) != 0 || + (ret = __os_read(env, fhp, &nextfree, sizeof(db_pgno_t), + &nio)) != 0) + goto err; + if ((ret = + __os_seek(env, fhp, 0, 0, sizeof(u_int32_t))) != 0 || + (ret = __os_write(env, fhp, &nextfree, sizeof(db_pgno_t), + &nio)) != 0) + goto err; + } + + /* Write the buffer to the allocated page. */ + if ((ret = __os_io(env, DB_IO_WRITE, fhp, newpgno, pagesize, 0, + pagesize, bhp->buf, &nio)) != 0) + goto err; + + ret = __os_closehandle(env, fhp); + fhp = NULL; + if (ret != 0) + goto err; + + /* + * Set up the frozen_bhp with the freezer page number. The original + * buffer header is about to be freed, so transfer resources to the + * frozen header here. + */ + mutex = frozen_bhp->mtx_buf; +#ifdef DIAG_MVCC + memcpy(frozen_bhp, bhp, SSZ(BH, align_off)); +#else + memcpy(frozen_bhp, bhp, SSZA(BH, buf)); +#endif + atomic_init(&frozen_bhp->ref, 0); + if (mutex != MUTEX_INVALID) + frozen_bhp->mtx_buf = mutex; + else if ((ret = __mutex_alloc(env, MTX_MPOOL_BH, + DB_MUTEX_SHARED, &frozen_bhp->mtx_buf)) != 0) + goto err; + F_SET(frozen_bhp, BH_FROZEN); + F_CLR(frozen_bhp, BH_EXCLUSIVE); + ((BH_FROZEN_PAGE *)frozen_bhp)->spgno = newpgno; + + /* + * We're about to add the frozen buffer header to the version chain, so + * we have temporarily created another buffer for the owning + * transaction. + */ + if (frozen_bhp->td_off != INVALID_ROFF && + (ret = __txn_add_buffer(env, BH_OWNER(env, frozen_bhp))) != 0) { + (void)__env_panic(env, ret); + goto err; + } + + STAT_INC(env, mpool, freeze, hp->hash_frozen, bhp->pgno); + + /* + * Add the frozen buffer to the version chain and update the hash + * bucket if this is the head revision. The original buffer will be + * freed by __memp_alloc calling __memp_bhfree (assuming no other + * thread has blocked waiting for it while we were freezing). + */ + SH_CHAIN_INSERT_AFTER(bhp, frozen_bhp, vc, __bh); + if (!SH_CHAIN_HASNEXT(frozen_bhp, vc)) { + SH_TAILQ_INSERT_BEFORE(&hp->hash_bucket, + bhp, frozen_bhp, hq, __bh); + SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh); + } + MUTEX_UNLOCK(env, hp->mtx_hash); + h_locked = 0; + + /* + * Increment the file's block count -- freeing the original buffer will + * decrement it. + */ + MUTEX_LOCK(env, mfp->mutex); + ++mfp->block_cnt; + MUTEX_UNLOCK(env, mfp->mutex); + + if (0) { +err: if (fhp != NULL && + (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0) + ret = t_ret; + if (created) { + DB_ASSERT(env, h_locked); + if ((t_ret = __os_unlink(env, real_name, 0)) != 0 && + ret == 0) + ret = t_ret; + } + if (h_locked) + MUTEX_UNLOCK(env, hp->mtx_hash); + if (ret == 0) + ret = EIO; + if (frozen_bhp != NULL) { + MPOOL_REGION_LOCK(env, infop); + SH_TAILQ_INSERT_TAIL(&c_mp->free_frozen, + frozen_bhp, hq); + MPOOL_REGION_UNLOCK(env, infop); + } + } + if (real_name != NULL) + __os_free(env, real_name); + if (ret != 0 && ret != EBUSY && ret != ENOMEM) + __db_err(env, ret, "__memp_bh_freeze"); + + return (ret); +} + +static int +__pgno_cmp(a, b) + const void *a, *b; +{ + db_pgno_t *ap, *bp; + + ap = (db_pgno_t *)a; + bp = (db_pgno_t *)b; + + return (int)(*ap - *bp); +} + +/* + * __memp_bh_thaw -- + * Free a buffer header in temporary storage. Optionally restore the + * buffer (if alloc_bhp != NULL). This function should be + * called with the hash bucket locked and will return with it unlocked. + * + * PUBLIC: int __memp_bh_thaw __P((DB_MPOOL *, REGINFO *, + * PUBLIC: DB_MPOOL_HASH *, BH *, BH *)); + */ +int +__memp_bh_thaw(dbmp, infop, hp, frozen_bhp, alloc_bhp) + DB_MPOOL *dbmp; + REGINFO *infop; + DB_MPOOL_HASH *hp; + BH *frozen_bhp, *alloc_bhp; +{ + DB_FH *fhp; + ENV *env; +#ifdef DIAGNOSTIC + DB_LSN vlsn; +#endif + MPOOL *c_mp; + MPOOLFILE *mfp; + db_mutex_t mutex; + db_pgno_t *freelist, *ppgno, freepgno, maxpgno, spgno; + size_t nio; + u_int32_t listsize, magic, nbucket, ncache, ntrunc, nfree, pagesize; +#ifdef HAVE_FTRUNCATE + int i; +#endif + int h_locked, needfree, ret, t_ret; + char filename[100], *real_name; + + env = dbmp->env; + fhp = NULL; + c_mp = infop->primary; + mfp = R_ADDR(dbmp->reginfo, frozen_bhp->mf_offset); + freelist = NULL; + pagesize = mfp->pagesize; + ret = 0; + real_name = NULL; + + MUTEX_REQUIRED(env, hp->mtx_hash); + DB_ASSERT(env, F_ISSET(frozen_bhp, BH_EXCLUSIVE) || alloc_bhp == NULL); + h_locked = 1; + + DB_ASSERT(env, F_ISSET(frozen_bhp, BH_FROZEN) && + !F_ISSET(frozen_bhp, BH_THAWED)); + DB_ASSERT(env, alloc_bhp != NULL || + SH_CHAIN_SINGLETON(frozen_bhp, vc) || + (SH_CHAIN_HASNEXT(frozen_bhp, vc) && + BH_OBSOLETE(frozen_bhp, hp->old_reader, vlsn))); + DB_ASSERT(env, alloc_bhp == NULL || !F_ISSET(alloc_bhp, BH_FROZEN)); + + spgno = ((BH_FROZEN_PAGE *)frozen_bhp)->spgno; + + if (alloc_bhp != NULL) { + mutex = alloc_bhp->mtx_buf; +#ifdef DIAG_MVCC + memcpy(alloc_bhp, frozen_bhp, SSZ(BH, align_off)); +#else + memcpy(alloc_bhp, frozen_bhp, SSZA(BH, buf)); +#endif + alloc_bhp->mtx_buf = mutex; + MUTEX_LOCK(env, alloc_bhp->mtx_buf); + atomic_init(&alloc_bhp->ref, 1); + F_CLR(alloc_bhp, BH_FROZEN); + } + + /* + * For now, keep things simple and have one file per page size per + * hash bucket. This improves concurrency but can mean lots of files + * if there is lots of freezing. + */ + ncache = (u_int32_t)(infop - dbmp->reginfo); + nbucket = (u_int32_t)(hp - (DB_MPOOL_HASH *)R_ADDR(infop, c_mp->htab)); + snprintf(filename, sizeof(filename), "__db.freezer.%lu.%lu.%luK", + (u_long)ncache, (u_long)nbucket, (u_long)pagesize / 1024); + + if ((ret = __db_appname(env, + DB_APP_NONE, filename, NULL, &real_name)) != 0) + goto err; + if ((ret = __os_open(env, + real_name, pagesize, 0, env->db_mode, &fhp)) != 0) + goto err; + + /* + * Read the first free page number -- we're about to free the page + * after we we read it. + */ + if ((ret = __os_read(env, fhp, &magic, sizeof(u_int32_t), &nio)) != 0 || + (ret = + __os_read(env, fhp, &freepgno, sizeof(db_pgno_t), &nio)) != 0 || + (ret = __os_read(env, fhp, &maxpgno, sizeof(db_pgno_t), &nio)) != 0) + goto err; + + if (magic != DB_FREEZER_MAGIC) { + ret = EINVAL; + goto err; + } + + /* Read the buffer from the frozen page. */ + if (alloc_bhp != NULL) { + DB_ASSERT(env, !F_ISSET(frozen_bhp, BH_FREED)); + if ((ret = __os_io(env, DB_IO_READ, fhp, + spgno, pagesize, 0, pagesize, alloc_bhp->buf, &nio)) != 0) + goto err; + } + + /* + * Free the page from the file. If it's the last page, truncate. + * Otherwise, update free page linked list. + */ + needfree = 1; + if (spgno == maxpgno) { + listsize = 100; + if ((ret = __os_malloc(env, + listsize * sizeof(db_pgno_t), &freelist)) != 0) + goto err; + nfree = 0; + while (freepgno != 0) { + if (nfree == listsize - 1) { + listsize *= 2; + if ((ret = __os_realloc(env, + listsize * sizeof(db_pgno_t), + &freelist)) != 0) + goto err; + } + freelist[nfree++] = freepgno; + if ((ret = __os_seek(env, fhp, + freepgno, pagesize, 0)) != 0 || + (ret = __os_read(env, fhp, &freepgno, + sizeof(db_pgno_t), &nio)) != 0) + goto err; + } + freelist[nfree++] = spgno; + qsort(freelist, nfree, sizeof(db_pgno_t), __pgno_cmp); + for (ppgno = &freelist[nfree - 1]; ppgno > freelist; ppgno--) + if (*(ppgno - 1) != *ppgno - 1) + break; + ntrunc = (u_int32_t)(&freelist[nfree] - ppgno); + if (ntrunc == (u_int32_t)maxpgno) { + needfree = 0; + ret = __os_closehandle(env, fhp); + fhp = NULL; + if (ret != 0 || + (ret = __os_unlink(env, real_name, 0)) != 0) + goto err; + } +#ifdef HAVE_FTRUNCATE + else { + maxpgno -= (db_pgno_t)ntrunc; + if ((ret = __os_truncate(env, fhp, + maxpgno + 1, pagesize)) != 0) + goto err; + + /* Fix up the linked list */ + freelist[nfree - ntrunc] = 0; + if ((ret = __os_seek(env, fhp, + 0, 0, sizeof(u_int32_t))) != 0 || + (ret = __os_write(env, fhp, &freelist[0], + sizeof(db_pgno_t), &nio)) != 0 || + (ret = __os_write(env, fhp, &maxpgno, + sizeof(db_pgno_t), &nio)) != 0) + goto err; + + for (i = 0; i < (int)(nfree - ntrunc); i++) + if ((ret = __os_seek(env, + fhp, freelist[i], pagesize, 0)) != 0 || + (ret = __os_write(env, fhp, + &freelist[i + 1], sizeof(db_pgno_t), + &nio)) != 0) + goto err; + needfree = 0; + } +#endif + } + if (needfree) { + if ((ret = __os_seek(env, fhp, spgno, pagesize, 0)) != 0 || + (ret = __os_write(env, fhp, + &freepgno, sizeof(db_pgno_t), &nio)) != 0 || + (ret = __os_seek(env, fhp, 0, 0, sizeof(u_int32_t))) != 0 || + (ret = __os_write(env, fhp, + &spgno, sizeof(db_pgno_t), &nio)) != 0) + goto err; + + ret = __os_closehandle(env, fhp); + fhp = NULL; + if (ret != 0) + goto err; + } + + /* + * Add the thawed buffer (if any) to the version chain. We can't + * do this any earlier, because we can't guarantee that another thread + * won't be waiting for it, which means we can't clean up if there are + * errors reading from the freezer. We can't do it any later, because + * we're about to free frozen_bhp, and without it we would need to do + * another cache lookup to find out where the new page should live. + */ + MUTEX_REQUIRED(env, hp->mtx_hash); + if (alloc_bhp != NULL) { + alloc_bhp->priority = c_mp->lru_priority; + + SH_CHAIN_INSERT_AFTER(frozen_bhp, alloc_bhp, vc, __bh); + if (!SH_CHAIN_HASNEXT(alloc_bhp, vc)) { + SH_TAILQ_INSERT_BEFORE(&hp->hash_bucket, frozen_bhp, + alloc_bhp, hq, __bh); + SH_TAILQ_REMOVE(&hp->hash_bucket, frozen_bhp, hq, __bh); + } + } else if (!SH_CHAIN_HASNEXT(frozen_bhp, vc)) { + if (SH_CHAIN_HASPREV(frozen_bhp, vc)) + SH_TAILQ_INSERT_BEFORE(&hp->hash_bucket, frozen_bhp, + SH_CHAIN_PREV(frozen_bhp, vc, __bh), hq, __bh); + SH_TAILQ_REMOVE(&hp->hash_bucket, frozen_bhp, hq, __bh); + } + SH_CHAIN_REMOVE(frozen_bhp, vc, __bh); + + if (alloc_bhp == NULL && frozen_bhp->td_off != INVALID_ROFF && + (ret = __txn_remove_buffer(env, + BH_OWNER(env, frozen_bhp), MUTEX_INVALID)) != 0) { + (void)__env_panic(env, ret); + goto err; + } + frozen_bhp->td_off = INVALID_ROFF; + + /* + * If other threads are waiting for this buffer as well, they will have + * incremented the reference count and will be waiting on the mutex. + * For that reason, we can't unconditionally free the memory here. + */ + needfree = (atomic_dec(env, &frozen_bhp->ref) == 0); + if (!needfree) + F_SET(frozen_bhp, BH_THAWED); + MUTEX_UNLOCK(env, hp->mtx_hash); + if (F_ISSET(frozen_bhp, BH_EXCLUSIVE)) + MUTEX_UNLOCK(env, frozen_bhp->mtx_buf); + h_locked = 0; + if (needfree) { + MPOOL_REGION_LOCK(env, infop); + SH_TAILQ_INSERT_TAIL(&c_mp->free_frozen, frozen_bhp, hq); + MPOOL_REGION_UNLOCK(env, infop); + } + +#ifdef HAVE_STATISTICS + if (alloc_bhp != NULL) + STAT_INC_VERB(env, mpool, thaw, + hp->hash_thawed, __memp_fns(dbmp, mfp), frozen_bhp->pgno); + else + STAT_INC_VERB(env, mpool, free_frozen, hp->hash_frozen_freed, + __memp_fns(dbmp, mfp), frozen_bhp->pgno); +#endif + + if (0) { +err: if (h_locked) + MUTEX_UNLOCK(env, hp->mtx_hash); + if (ret == 0) + ret = EIO; + } + if (real_name != NULL) + __os_free(env, real_name); + if (freelist != NULL) + __os_free(env, freelist); + if (fhp != NULL && + (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + __db_err(env, ret, "__memp_bh_thaw"); + + return (ret); +} diff --git a/src/mp/mp_region.c b/src/mp/mp_region.c new file mode 100644 index 00000000..65d43a2f --- /dev/null +++ b/src/mp/mp_region.c @@ -0,0 +1,620 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/mp.h" + +static int __memp_init_config __P((ENV *, MPOOL *)); +static void __memp_region_size __P((ENV *, roff_t *, u_int32_t *)); + +#define MPOOL_DEFAULT_PAGESIZE (4 * 1024) + +/* + * __memp_open -- + * Internal version of memp_open: only called from ENV->open. + * + * PUBLIC: int __memp_open __P((ENV *, int)); + */ +int +__memp_open(env, create_ok) + ENV *env; + int create_ok; +{ + DB_ENV *dbenv; + DB_MPOOL *dbmp; + MPOOL *mp, *mp_i; + REGINFO reginfo; + roff_t cache_size, max_size, reg_size; + u_int i, max_nreg; + u_int32_t htab_buckets, *regids; + int ret; + + dbenv = env->dbenv; + cache_size = 0; + + /* Calculate the region size and hash bucket count. */ + __memp_region_size(env, &max_size, &htab_buckets); + + /* Create and initialize the DB_MPOOL structure. */ + if ((ret = __os_calloc(env, 1, sizeof(*dbmp), &dbmp)) != 0) + return (ret); + LIST_INIT(&dbmp->dbregq); + TAILQ_INIT(&dbmp->dbmfq); + dbmp->env = env; + + /* Join/create the first mpool region. */ + memset(®info, 0, sizeof(REGINFO)); + reginfo.env = env; + reginfo.type = REGION_TYPE_MPOOL; + reginfo.id = INVALID_REGION_ID; + reginfo.flags = REGION_JOIN_OK; + + /* Calculate the minimum allocation. */ + reg_size = sizeof(MPOOL); + reg_size += MPOOL_FILE_BUCKETS * sizeof(DB_MPOOL_HASH); + reg_size += htab_buckets * sizeof(DB_MPOOL_HASH); + reg_size += (dbenv->mp_pagesize == 0 ? + MPOOL_DEFAULT_PAGESIZE : dbenv->mp_pagesize) * 10; + if (reg_size > max_size) + reg_size = max_size; + + if (create_ok) + F_SET(®info, REGION_CREATE_OK); + if ((ret = __env_region_attach(env, ®info, reg_size, max_size)) != 0) + goto err; + cache_size = reginfo.rp->max; + if (F_ISSET(env, ENV_PRIVATE)) + reginfo.max_alloc = reginfo.rp->max; + + /* + * If we created the region, initialize it. Create or join any + * additional regions. + */ + if (F_ISSET(®info, REGION_CREATE)) { + /* + * We define how many regions there are going to be, allocate + * the REGINFO structures and create them. Make sure we don't + * clear the wrong entries on error. + */ + max_nreg = __memp_max_regions(env); + if ((ret = __os_calloc(env, + max_nreg, sizeof(REGINFO), &dbmp->reginfo)) != 0) + goto err; + /* Make sure we don't clear the wrong entries on error. */ + dbmp->reginfo[0] = reginfo; + for (i = 1; i < max_nreg; ++i) + dbmp->reginfo[i].id = INVALID_REGION_ID; + + /* Initialize the first region. */ + if ((ret = __memp_init(env, dbmp, + 0, htab_buckets, max_nreg)) != 0) + goto err; + + /* + * Create/initialize remaining regions and copy their IDs into + * the first region. + */ + mp = R_ADDR(dbmp->reginfo, dbmp->reginfo[0].rp->primary); + regids = R_ADDR(dbmp->reginfo, mp->regids); + regids[0] = dbmp->reginfo[0].id; + for (i = 1; i < dbenv->mp_ncache; ++i) { + dbmp->reginfo[i].env = env; + dbmp->reginfo[i].type = REGION_TYPE_MPOOL; + dbmp->reginfo[i].id = INVALID_REGION_ID; + dbmp->reginfo[i].flags = REGION_CREATE_OK; + if ((ret = __env_region_attach( + env, &dbmp->reginfo[i], reg_size, max_size)) != 0) + goto err; + if (F_ISSET(env, ENV_PRIVATE)) + dbmp->reginfo[i].max_alloc = max_size; + cache_size += dbmp->reginfo[i].rp->max; + if ((ret = __memp_init(env, dbmp, + i, htab_buckets, max_nreg)) != 0) + goto err; + + regids[i] = dbmp->reginfo[i].id; + } + mp->gbytes = (u_int32_t) (cache_size / GIGABYTE); + mp->bytes = (u_int32_t) (cache_size % GIGABYTE); + } else { + /* + * Determine how many regions there are going to be, allocate + * the REGINFO structures and fill in local copies of that + * information. + */ + mp = R_ADDR(®info, reginfo.rp->primary); + dbenv->mp_ncache = mp->nreg; + if ((ret = __os_calloc(env, + mp->max_nreg, sizeof(REGINFO), &dbmp->reginfo)) != 0) + goto err; + /* Make sure we don't clear the wrong entries on error. */ + for (i = 0; i < dbenv->mp_ncache; ++i) + dbmp->reginfo[i].id = INVALID_REGION_ID; + dbmp->reginfo[0] = reginfo; + + /* Join remaining regions. */ + regids = R_ADDR(dbmp->reginfo, mp->regids); + for (i = 1; i < dbenv->mp_ncache; ++i) { + dbmp->reginfo[i].env = env; + dbmp->reginfo[i].type = REGION_TYPE_MPOOL; + dbmp->reginfo[i].id = regids[i]; + dbmp->reginfo[i].flags = REGION_JOIN_OK; + if ((ret = __env_region_attach( + env, &dbmp->reginfo[i], 0, 0)) != 0) + goto err; + } + } + + /* Set the local addresses for the regions. */ + for (i = 0; i < dbenv->mp_ncache; ++i) { + mp_i = dbmp->reginfo[i].primary = + R_ADDR(&dbmp->reginfo[i], dbmp->reginfo[i].rp->primary); + dbmp->reginfo[i].mtx_alloc = mp_i->mtx_region; + } + + /* If the region is threaded, allocate a mutex to lock the handles. */ + if ((ret = __mutex_alloc(env, + MTX_MPOOL_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbmp->mutex)) != 0) + goto err; + + env->mp_handle = dbmp; + + /* A process joining the region may reset the mpool configuration. */ + if ((ret = __memp_init_config(env, mp)) != 0) + return (ret); + + return (0); + +err: env->mp_handle = NULL; + if (dbmp->reginfo != NULL && dbmp->reginfo[0].addr != NULL) { + for (i = 0; i < dbenv->mp_ncache; ++i) + if (dbmp->reginfo[i].id != INVALID_REGION_ID) + (void)__env_region_detach( + env, &dbmp->reginfo[i], 0); + __os_free(env, dbmp->reginfo); + } + + (void)__mutex_free(env, &dbmp->mutex); + __os_free(env, dbmp); + return (ret); +} + +/* + * __memp_init -- + * Initialize a MPOOL structure in shared memory. + * + * PUBLIC: int __memp_init + * PUBLIC: __P((ENV *, DB_MPOOL *, u_int, u_int32_t, u_int)); + */ +int +__memp_init(env, dbmp, reginfo_off, htab_buckets, max_nreg) + ENV *env; + DB_MPOOL *dbmp; + u_int reginfo_off, max_nreg; + u_int32_t htab_buckets; +{ + BH *frozen_bhp; + BH_FROZEN_ALLOC *frozen; + DB_ENV *dbenv; + DB_MPOOL_HASH *htab, *hp; + MPOOL *mp, *main_mp; + REGINFO *infop; + db_mutex_t mtx_base, mtx_discard, mtx_prev; + u_int32_t i; + int ret; + void *p; + + dbenv = env->dbenv; + + infop = &dbmp->reginfo[reginfo_off]; + if ((ret = __env_alloc(infop, sizeof(MPOOL), &infop->primary)) != 0) + goto mem_err; + infop->rp->primary = R_OFFSET(infop, infop->primary); + mp = infop->primary; + memset(mp, 0, sizeof(*mp)); + + if ((ret = + __mutex_alloc(env, MTX_MPOOL_REGION, 0, &mp->mtx_region)) != 0) + return (ret); + + if (reginfo_off == 0) { + ZERO_LSN(mp->lsn); + + mp->nreg = dbenv->mp_ncache; + mp->max_nreg = max_nreg; + if ((ret = __env_alloc(&dbmp->reginfo[0], + max_nreg * sizeof(u_int32_t), &p)) != 0) + goto mem_err; + mp->regids = R_OFFSET(dbmp->reginfo, p); + mp->nbuckets = dbenv->mp_ncache * htab_buckets; + + /* Allocate file table space and initialize it. */ + if ((ret = __env_alloc(infop, + MPOOL_FILE_BUCKETS * sizeof(DB_MPOOL_HASH), &htab)) != 0) + goto mem_err; + mp->ftab = R_OFFSET(infop, htab); + for (i = 0; i < MPOOL_FILE_BUCKETS; i++) { + if ((ret = __mutex_alloc(env, + MTX_MPOOL_FILE_BUCKET, 0, &htab[i].mtx_hash)) != 0) + return (ret); + SH_TAILQ_INIT(&htab[i].hash_bucket); + atomic_init(&htab[i].hash_page_dirty, 0); + } + + /* + * Allocate all of the hash bucket mutexes up front. We do + * this so that we don't need to free and reallocate mutexes as + * the cache is resized. + */ + mtx_base = mtx_prev = MUTEX_INVALID; + if (!MUTEX_ON(env) || F_ISSET(env, ENV_PRIVATE)) + goto no_prealloc; + for (i = 0; i < mp->max_nreg * dbenv->mp_mtxcount; i++) { + if ((ret = __mutex_alloc(env, MTX_MPOOL_HASH_BUCKET, + DB_MUTEX_SHARED, &mtx_discard)) != 0) + return (ret); + if (i == 0) + mtx_base = mtx_discard; + else + DB_ASSERT(env, mtx_base == MUTEX_INVALID || + mtx_discard == mtx_prev + 1); + mtx_prev = mtx_discard; + } + } else { + main_mp = dbmp->reginfo[0].primary; + htab = R_ADDR(&dbmp->reginfo[0], main_mp->htab); + mtx_base = htab[0].mtx_hash; + } + + /* + * We preallocated all of the mutexes in a block, so for regions after + * the first, we skip mutexes in use in earlier regions. Each region + * has the same number of buckets + */ +no_prealloc: + if (MUTEX_ON(env)) + mtx_base += reginfo_off * dbenv->mp_mtxcount; + + /* Allocate hash table space and initialize it. */ + if ((ret = __env_alloc(infop, + htab_buckets * sizeof(DB_MPOOL_HASH), &htab)) != 0) + goto mem_err; + mp->htab = R_OFFSET(infop, htab); + for (i = 0; i < htab_buckets; i++) { + hp = &htab[i]; + if (!MUTEX_ON(env) || dbenv->mp_mtxcount == 0) + hp->mtx_hash = MUTEX_INVALID; + else if (F_ISSET(env, ENV_PRIVATE)) { + if (i >= dbenv->mp_mtxcount) + hp->mtx_hash = + htab[i % dbenv->mp_mtxcount].mtx_hash; + else if + ((ret = __mutex_alloc(env, MTX_MPOOL_HASH_BUCKET, + DB_MUTEX_SHARED, &hp->mtx_hash)) != 0) + return (ret); + } else + hp->mtx_hash = mtx_base + (i % dbenv->mp_mtxcount); + SH_TAILQ_INIT(&hp->hash_bucket); + atomic_init(&hp->hash_page_dirty, 0); +#ifdef HAVE_STATISTICS + hp->hash_io_wait = 0; + hp->hash_frozen = hp->hash_thawed = hp->hash_frozen_freed = 0; +#endif + hp->flags = 0; + ZERO_LSN(hp->old_reader); + } + mp->htab_buckets = htab_buckets; + mp->htab_mutexes = dbenv->mp_mtxcount; + mp->pagesize = dbenv->mp_pagesize == 0 ? + MPOOL_DEFAULT_PAGESIZE : dbenv->mp_pagesize; + + SH_TAILQ_INIT(&mp->free_frozen); + SH_TAILQ_INIT(&mp->alloc_frozen); + + /* + * Pre-allocate one frozen buffer header. This avoids situations where + * the cache becomes full of pages and we don't even have the 28 bytes + * (or so) available to allocate a frozen buffer header. + */ + if ((ret = __env_alloc(infop, + sizeof(BH_FROZEN_ALLOC) + sizeof(BH_FROZEN_PAGE), &frozen)) != 0) + goto mem_err; + SH_TAILQ_INSERT_TAIL(&mp->alloc_frozen, frozen, links); + frozen_bhp = (BH *)(frozen + 1); + frozen_bhp->mtx_buf = MUTEX_INVALID; + SH_TAILQ_INSERT_TAIL(&mp->free_frozen, frozen_bhp, hq); + + /* + * Only the environment creator knows the total cache size, + * fill in those fields now. + */ + mp->gbytes = dbenv->mp_gbytes; + mp->bytes = dbenv->mp_bytes; + infop->mtx_alloc = mp->mtx_region; + return (0); + +mem_err:__db_errx(env, DB_STR("3026", + "Unable to allocate memory for mpool region")); + return (ret); +} + +/* + * PUBLIC: u_int32_t __memp_max_regions __P((ENV *)); + */ +u_int32_t +__memp_max_regions(env) + ENV *env; +{ + DB_ENV *dbenv; + roff_t reg_size, max_size; + size_t max_nreg; + + dbenv = env->dbenv; + + if (dbenv->mp_max_gbytes == 0 && dbenv->mp_max_bytes == 0) + return (dbenv->mp_ncache); + __memp_region_size(env, ®_size, NULL); + max_size = + (roff_t)dbenv->mp_max_gbytes * GIGABYTE + dbenv->mp_max_bytes; + max_nreg = (max_size + reg_size / 2) / reg_size; + + /* Sanity check that the number of regions fits in 32 bits. */ + DB_ASSERT(env, max_nreg == (u_int32_t)max_nreg); + + if (max_nreg <= dbenv->mp_ncache) + max_nreg = dbenv->mp_ncache; + return ((u_int32_t)max_nreg); +} + +/* + * __memp_region_size -- + * Size the region and figure out how many hash buckets we'll have. + */ +static void +__memp_region_size(env, reg_sizep, htab_bucketsp) + ENV *env; + roff_t *reg_sizep; + u_int32_t *htab_bucketsp; +{ + DB_ENV *dbenv; + roff_t reg_size, cache_size; + u_int32_t pgsize; + + dbenv = env->dbenv; + + /* + * Figure out how big each cache region is. Cast an operand to roff_t + * so we do 64-bit arithmetic as appropriate. + */ + cache_size = (roff_t)dbenv->mp_gbytes * GIGABYTE + dbenv->mp_bytes; + reg_size = cache_size / dbenv->mp_ncache; + if (reg_sizep != NULL) + *reg_sizep = reg_size; + + /* + * Figure out how many hash buckets each region will have. Assume we + * want to keep the hash chains with under 3 pages on each chain. We + * don't know the pagesize in advance, and it may differ for different + * files. Use a pagesize of 4K for the calculation -- we walk these + * chains a lot, they must be kept short. We use 2.5 as this maintains + * compatibility with previous releases. + * + * XXX + * Cache sizes larger than 10TB would cause 32-bit wrapping in the + * calculation of the number of hash buckets. This probably isn't + * something we need to worry about right now, but is checked when the + * cache size is set. + */ + if (htab_bucketsp != NULL) { + if (dbenv->mp_tablesize != 0) + *htab_bucketsp = __db_tablesize(dbenv->mp_tablesize); + else { + if ((pgsize = dbenv->mp_pagesize) == 0) + pgsize = MPOOL_DEFAULT_PAGESIZE; + *htab_bucketsp = __db_tablesize( + (u_int32_t)(reg_size / (2.5 * pgsize))); + } + } + +} + +/* + * __memp_region_mutex_count -- + * Return the number of mutexes the mpool region will need. + * + * PUBLIC: u_int32_t __memp_region_mutex_count __P((ENV *)); + */ +u_int32_t +__memp_region_mutex_count(env) + ENV *env; +{ + DB_ENV *dbenv; + u_int32_t htab_buckets; + roff_t reg_size; + u_int32_t max_region, num_per_cache, pgsize; + + dbenv = env->dbenv; + + __memp_region_size(env, ®_size, &htab_buckets); + if (F_ISSET(env->dbenv, DB_ENV_MULTIVERSION)) + pgsize = sizeof(BH_FROZEN_ALLOC) + sizeof(BH_FROZEN_PAGE); + if ((pgsize = dbenv->mp_pagesize) == 0) + pgsize = MPOOL_DEFAULT_PAGESIZE; + max_region = __memp_max_regions(env); + + /* + * We need a couple of mutexes for the region itself, one for each + * file handle (MPOOLFILE) the application allocates, one for each + * of the MPOOL_FILE_BUCKETS, and each cache has one mutex per + * hash bucket. We then need one mutex per page in the cache, + * the worst case is really big if the pages are 512 bytes. + */ + if (dbenv->mp_mtxcount != 0) + htab_buckets = dbenv->mp_mtxcount; + else + dbenv->mp_mtxcount = htab_buckets; + num_per_cache = htab_buckets + (u_int32_t)(reg_size / pgsize); + return ((max_region * num_per_cache) + 50 + MPOOL_FILE_BUCKETS); +} + +/* + * __memp_init_config -- + * Initialize shared configuration information. + */ +static int +__memp_init_config(env, mp) + ENV *env; + MPOOL *mp; +{ + DB_ENV *dbenv; + + dbenv = env->dbenv; + + MPOOL_SYSTEM_LOCK(env); + if (dbenv->mp_mmapsize != 0) + mp->mp_mmapsize = (db_size_t)dbenv->mp_mmapsize; + if (dbenv->mp_maxopenfd != 0) + mp->mp_maxopenfd = dbenv->mp_maxopenfd; + if (dbenv->mp_maxwrite != 0) + mp->mp_maxwrite = dbenv->mp_maxwrite; + if (dbenv->mp_maxwrite_sleep != 0) + mp->mp_maxwrite_sleep = dbenv->mp_maxwrite_sleep; + MPOOL_SYSTEM_UNLOCK(env); + + return (0); +} + +/* + * __memp_env_refresh -- + * Clean up after the mpool system on a close or failed open. + * + * PUBLIC: int __memp_env_refresh __P((ENV *)); + */ +int +__memp_env_refresh(env) + ENV *env; +{ + BH *bhp; + BH_FROZEN_ALLOC *frozen_alloc; + DB_MPOOL *dbmp; + DB_MPOOLFILE *dbmfp; + DB_MPOOL_HASH *hp; + DB_MPREG *mpreg; + MPOOL *mp, *c_mp; + REGINFO *infop; + u_int32_t bucket, i, nreg; + int ret, t_ret; + + ret = 0; + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + nreg = mp->nreg; + hp = R_ADDR(&dbmp->reginfo[0], mp->htab); + + /* + * If a private region, return the memory to the heap. Not needed for + * filesystem-backed or system shared memory regions, that memory isn't + * owned by any particular process. + */ + if (!F_ISSET(env, ENV_PRIVATE)) + goto not_priv; + + /* Discard buffers. */ + for (i = 0; i < nreg; ++i) { + infop = &dbmp->reginfo[i]; + c_mp = infop->primary; + for (hp = R_ADDR(infop, c_mp->htab), bucket = 0; + bucket < c_mp->htab_buckets; ++hp, ++bucket) { + while ((bhp = SH_TAILQ_FIRST( + &hp->hash_bucket, __bh)) != NULL) + if (F_ISSET(bhp, BH_FROZEN)) + SH_TAILQ_REMOVE( + &hp->hash_bucket, bhp, + hq, __bh); + else { + if (F_ISSET(bhp, BH_DIRTY)) { + atomic_dec(env, + &hp->hash_page_dirty); + F_CLR(bhp, + BH_DIRTY | BH_DIRTY_CREATE); + } + atomic_inc(env, &bhp->ref); + if ((t_ret = __memp_bhfree(dbmp, infop, + R_ADDR(dbmp->reginfo, + bhp->mf_offset), hp, bhp, + BH_FREE_FREEMEM | + BH_FREE_UNLOCKED)) != 0 && ret == 0) + ret = t_ret; + } + } + MPOOL_REGION_LOCK(env, infop); + while ((frozen_alloc = SH_TAILQ_FIRST( + &c_mp->alloc_frozen, __bh_frozen_a)) != NULL) { + SH_TAILQ_REMOVE(&c_mp->alloc_frozen, frozen_alloc, + links, __bh_frozen_a); + __env_alloc_free(infop, frozen_alloc); + } + MPOOL_REGION_UNLOCK(env, infop); + } + +not_priv: + /* Discard DB_MPOOLFILEs. */ + while ((dbmfp = TAILQ_FIRST(&dbmp->dbmfq)) != NULL) + if ((t_ret = __memp_fclose(dbmfp, 0)) != 0 && ret == 0) + ret = t_ret; + + /* Discard DB_MPREGs. */ + if (dbmp->pg_inout != NULL) + __os_free(env, dbmp->pg_inout); + while ((mpreg = LIST_FIRST(&dbmp->dbregq)) != NULL) { + LIST_REMOVE(mpreg, q); + __os_free(env, mpreg); + } + + /* Discard the DB_MPOOL thread mutex. */ + if ((t_ret = __mutex_free(env, &dbmp->mutex)) != 0 && ret == 0) + ret = t_ret; + + if (F_ISSET(env, ENV_PRIVATE)) { + /* Discard REGION IDs. */ + infop = &dbmp->reginfo[0]; + infop->mtx_alloc = MUTEX_INVALID; + __memp_free(infop, R_ADDR(infop, mp->regids)); + + /* Discard all the MPOOLFILEs. */ + if ((t_ret = __memp_discard_all_mpfs(env, mp)) != 0 && ret == 0) + ret = t_ret; + /* Discard the File table. */ + __memp_free(infop, R_ADDR(infop, mp->ftab)); + + /* Discard Hash tables. */ + for (i = 0; i < nreg; ++i) { + infop = &dbmp->reginfo[i]; + c_mp = infop->primary; + infop->mtx_alloc = MUTEX_INVALID; + __memp_free(infop, R_ADDR(infop, c_mp->htab)); + } + } + + /* Detach from the region. */ + for (i = 0; i < nreg; ++i) { + infop = &dbmp->reginfo[i]; + if ((t_ret = + __env_region_detach(env, infop, 0)) != 0 && ret == 0) + ret = t_ret; + } + + /* Discard DB_MPOOL. */ + __os_free(env, dbmp->reginfo); + __os_free(env, dbmp); + + env->mp_handle = NULL; + return (ret); +} diff --git a/src/mp/mp_register.c b/src/mp/mp_register.c new file mode 100644 index 00000000..76781546 --- /dev/null +++ b/src/mp/mp_register.c @@ -0,0 +1,116 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" + +/* + * memp_register_pp -- + * ENV->memp_register pre/post processing. + * + * PUBLIC: int __memp_register_pp __P((DB_ENV *, int, + * PUBLIC: int (*)(DB_ENV *, db_pgno_t, void *, DBT *), + * PUBLIC: int (*)(DB_ENV *, db_pgno_t, void *, DBT *))); + */ +int +__memp_register_pp(dbenv, ftype, pgin, pgout) + DB_ENV *dbenv; + int ftype; + int (*pgin) __P((DB_ENV *, db_pgno_t, void *, DBT *)); + int (*pgout) __P((DB_ENV *, db_pgno_t, void *, DBT *)); +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->mp_handle, "DB_ENV->memp_register", DB_INIT_MPOOL); + + if (REP_ON(env)) { + __db_errx(env, DB_STR_A("3001", + "%smethod not permitted when replication is configured", + "%s"), "DB_ENV->memp_register: "); + return (EINVAL); + } + + ENV_ENTER(env, ip); + ret = __memp_register(env, ftype, pgin, pgout); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * memp_register -- + * ENV->memp_register. + * + * PUBLIC: int __memp_register __P((ENV *, int, + * PUBLIC: int (*)(DB_ENV *, db_pgno_t, void *, DBT *), + * PUBLIC: int (*)(DB_ENV *, db_pgno_t, void *, DBT *))); + */ +int +__memp_register(env, ftype, pgin, pgout) + ENV *env; + int ftype; + int (*pgin) __P((DB_ENV *, db_pgno_t, void *, DBT *)); + int (*pgout) __P((DB_ENV *, db_pgno_t, void *, DBT *)); +{ + DB_MPOOL *dbmp; + DB_MPREG *mpreg; + int ret; + + dbmp = env->mp_handle; + + /* + * We keep the DB pgin/pgout functions outside of the linked list + * to avoid locking/unlocking the linked list on every page I/O. + * + * The Berkeley DB I/O conversion functions are registered when the + * environment is first created, so there's no need for locking here. + */ + if (ftype == DB_FTYPE_SET) { + if (dbmp->pg_inout != NULL) + return (0); + if ((ret = + __os_malloc(env, sizeof(DB_MPREG), &dbmp->pg_inout)) != 0) + return (ret); + dbmp->pg_inout->ftype = ftype; + dbmp->pg_inout->pgin = pgin; + dbmp->pg_inout->pgout = pgout; + return (0); + } + + /* + * The item may already have been registered. If already registered, + * just update the entry, although it's probably unchanged. + */ + MUTEX_LOCK(env, dbmp->mutex); + LIST_FOREACH(mpreg, &dbmp->dbregq, q) + if (mpreg->ftype == ftype) { + mpreg->pgin = pgin; + mpreg->pgout = pgout; + break; + } + + if (mpreg == NULL) { /* New entry. */ + if ((ret = __os_malloc(env, sizeof(DB_MPREG), &mpreg)) != 0) + return (ret); + mpreg->ftype = ftype; + mpreg->pgin = pgin; + mpreg->pgout = pgout; + + LIST_INSERT_HEAD(&dbmp->dbregq, mpreg, q); + } + MUTEX_UNLOCK(env, dbmp->mutex); + + return (0); +} diff --git a/src/mp/mp_resize.c b/src/mp/mp_resize.c new file mode 100644 index 00000000..727ae020 --- /dev/null +++ b/src/mp/mp_resize.c @@ -0,0 +1,605 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __memp_add_bucket __P((DB_MPOOL *)); +static int __memp_add_region __P((DB_MPOOL *)); +static int __memp_map_regions __P((DB_MPOOL *)); +static int __memp_merge_buckets + __P((DB_MPOOL *, u_int32_t, u_int32_t, u_int32_t)); +static int __memp_remove_bucket __P((DB_MPOOL *)); +static int __memp_remove_region __P((DB_MPOOL *)); + +/* + * PUBLIC: int __memp_get_bucket __P((ENV *, MPOOLFILE *, + * PUBLIC: db_pgno_t, REGINFO **, DB_MPOOL_HASH **, u_int32_t *)); + */ +int +__memp_get_bucket(env, mfp, pgno, infopp, hpp, bucketp) + ENV *env; + MPOOLFILE *mfp; + db_pgno_t pgno; + REGINFO **infopp; + DB_MPOOL_HASH **hpp; + u_int32_t *bucketp; +{ + DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp; + MPOOL *c_mp, *mp; + REGINFO *infop; + roff_t mf_offset; + u_int32_t bucket, nbuckets, new_bucket, new_nbuckets, region; + u_int32_t *regids; + int ret; + + dbmp = env->mp_handle; + mf_offset = R_OFFSET(dbmp->reginfo, mfp); + mp = dbmp->reginfo[0].primary; + ret = 0; + + for (;;) { + nbuckets = mp->nbuckets; + MP_BUCKET(mf_offset, pgno, nbuckets, bucket); + + /* + * Once we work out which region we are looking in, we have to + * check that we have that region mapped, and that the version + * we have matches the ID in the main mpool region. Otherwise + * we have to go and map in any regions that don't match and + * retry. + */ + region = NREGION(mp, bucket); + regids = R_ADDR(dbmp->reginfo, mp->regids); + + for (;;) { + infop = *infopp = &dbmp->reginfo[region]; + c_mp = infop->primary; + + /* If we have the correct region mapped, we're done. */ + if (c_mp != NULL && regids[region] == infop->id) + break; + if ((ret = __memp_map_regions(dbmp)) != 0) + return (ret); + } + + /* If our caller wants the hash bucket, lock it here. */ + if (hpp != NULL) { + hp = R_ADDR(infop, c_mp->htab); + hp = &hp[bucket - region * mp->htab_buckets]; + + MUTEX_READLOCK(env, hp->mtx_hash); + + /* + * Check that we still have the correct region mapped. + */ + if (regids[region] != infop->id) { + MUTEX_UNLOCK(env, hp->mtx_hash); + continue; + } + + /* + * Now that the bucket is locked, we need to check that + * the cache has not been resized while we waited. + */ + new_nbuckets = mp->nbuckets; + if (nbuckets != new_nbuckets) { + MP_BUCKET(mf_offset, pgno, new_nbuckets, + new_bucket); + + if (new_bucket != bucket) { + MUTEX_UNLOCK(env, hp->mtx_hash); + continue; + } + } + + *hpp = hp; + } + + break; + } + + if (bucketp != NULL) + *bucketp = bucket - region * mp->htab_buckets; + return (ret); +} + +static int +__memp_merge_buckets(dbmp, new_nbuckets, old_bucket, new_bucket) + DB_MPOOL *dbmp; + u_int32_t new_nbuckets, old_bucket, new_bucket; +{ + BH *alloc_bhp, *bhp, *current_bhp, *new_bhp, *next_bhp; + DB_LSN vlsn; + DB_MPOOL_HASH *new_hp, *old_hp; + ENV *env; + MPOOL *mp, *new_mp, *old_mp; + MPOOLFILE *mfp; + REGINFO *new_infop, *old_infop; + u_int32_t bucket, high_mask, new_region, old_region; + int ret; + + env = dbmp->env; + mp = dbmp->reginfo[0].primary; + new_bhp = NULL; + ret = 0; + + MP_MASK(new_nbuckets, high_mask); + + old_region = NREGION(mp, old_bucket); + old_infop = &dbmp->reginfo[old_region]; + old_mp = old_infop->primary; + old_hp = R_ADDR(old_infop, old_mp->htab); + old_hp = &old_hp[old_bucket - old_region * mp->htab_buckets]; + + new_region = NREGION(mp, new_bucket); + new_infop = &dbmp->reginfo[new_region]; + new_mp = new_infop->primary; + new_hp = R_ADDR(new_infop, new_mp->htab); + new_hp = &new_hp[new_bucket - new_region * mp->htab_buckets]; + + /* + * Before merging, we need to check that there are no old buffers left + * in the target hash bucket after a previous split. + */ +free_old: + MUTEX_LOCK(env, new_hp->mtx_hash); + SH_TAILQ_FOREACH(bhp, &new_hp->hash_bucket, hq, __bh) { + MP_BUCKET(bhp->mf_offset, bhp->pgno, mp->nbuckets, bucket); + + if (bucket != new_bucket) { + /* + * There is no way that an old buffer can be locked + * after a split, since everyone will look for it in + * the new hash bucket. + */ + DB_ASSERT(env, !F_ISSET(bhp, BH_DIRTY) && + atomic_read(&bhp->ref) == 0); + atomic_inc(env, &bhp->ref); + mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); + if ((ret = __memp_bhfree(dbmp, new_infop, + mfp, new_hp, bhp, BH_FREE_FREEMEM)) != 0) { + MUTEX_UNLOCK(env, new_hp->mtx_hash); + return (ret); + } + + /* + * The free has modified the list of buffers and + * dropped the mutex. We need to start again. + */ + goto free_old; + } + } + MUTEX_UNLOCK(env, new_hp->mtx_hash); + + /* + * Before we begin, make sure that all of the buffers we care about are + * not in use and not frozen. We do this because we can't drop the old + * hash bucket mutex once we start moving buffers around. + */ +retry: MUTEX_LOCK(env, old_hp->mtx_hash); + SH_TAILQ_FOREACH(bhp, &old_hp->hash_bucket, hq, __bh) { + MP_HASH_BUCKET(MP_HASH(bhp->mf_offset, bhp->pgno), + new_nbuckets, high_mask, bucket); + + if (bucket == new_bucket && atomic_read(&bhp->ref) != 0) { + MUTEX_UNLOCK(env, old_hp->mtx_hash); + __os_yield(env, 0, 0); + goto retry; + } else if (bucket == new_bucket && F_ISSET(bhp, BH_FROZEN)) { + atomic_inc(env, &bhp->ref); + /* + * We need to drop the hash bucket mutex to avoid + * self-blocking when we allocate a new buffer. + */ + MUTEX_UNLOCK(env, old_hp->mtx_hash); + MUTEX_LOCK(env, bhp->mtx_buf); + F_SET(bhp, BH_EXCLUSIVE); + if (BH_OBSOLETE(bhp, old_hp->old_reader, vlsn)) + alloc_bhp = NULL; + else { + mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); + if ((ret = __memp_alloc(dbmp, + old_infop, mfp, 0, NULL, &alloc_bhp)) != 0) + goto err; + } + /* + * But we need to lock the hash bucket again before + * thawing the buffer. The call to __memp_bh_thaw + * will unlock the hash bucket mutex. + */ + MUTEX_LOCK(env, old_hp->mtx_hash); + if (F_ISSET(bhp, BH_THAWED)) { + ret = __memp_bhfree(dbmp, old_infop, NULL, NULL, + alloc_bhp, + BH_FREE_FREEMEM | BH_FREE_UNLOCKED); + } else + ret = __memp_bh_thaw(dbmp, + old_infop, old_hp, bhp, alloc_bhp); + + /* + * We've dropped the mutex in order to thaw, so we need + * to go back to the beginning and check that all of + * the buffers we care about are still unlocked and + * unreferenced. + */ +err: atomic_dec(env, &bhp->ref); + F_CLR(bhp, BH_EXCLUSIVE); + MUTEX_UNLOCK(env, bhp->mtx_buf); + if (ret != 0) + return (ret); + goto retry; + } + } + + /* + * We now know that all of the buffers we care about are unlocked and + * unreferenced. Go ahead and copy them. + */ + SH_TAILQ_FOREACH(bhp, &old_hp->hash_bucket, hq, __bh) { + MP_HASH_BUCKET(MP_HASH(bhp->mf_offset, bhp->pgno), + new_nbuckets, high_mask, bucket); + mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); + + /* + * We ignore buffers that don't hash to the new bucket. We + * could also ignore clean buffers which are not part of a + * multiversion chain as long as they have a backing file. + */ + if (bucket != new_bucket || (!F_ISSET(bhp, BH_DIRTY) && + SH_CHAIN_SINGLETON(bhp, vc) && !mfp->no_backing_file)) + continue; + + for (current_bhp = bhp, next_bhp = NULL; + current_bhp != NULL; + current_bhp = SH_CHAIN_PREV(current_bhp, vc, __bh), + next_bhp = alloc_bhp) { + /* Allocate in the new region. */ + if ((ret = __memp_alloc(dbmp, + new_infop, mfp, 0, NULL, &alloc_bhp)) != 0) + break; + + alloc_bhp->ref = current_bhp->ref; + alloc_bhp->priority = current_bhp->priority; + alloc_bhp->pgno = current_bhp->pgno; + alloc_bhp->mf_offset = current_bhp->mf_offset; + alloc_bhp->flags = current_bhp->flags; + alloc_bhp->td_off = current_bhp->td_off; + + /* + * We've duplicated the buffer, so now we need to + * update reference counts, including the counts in the + * per-MPOOLFILE and the transaction detail (for MVCC + * buffers). + */ + MUTEX_LOCK(env, mfp->mutex); + ++mfp->block_cnt; + MUTEX_UNLOCK(env, mfp->mutex); + + if (alloc_bhp->td_off != INVALID_ROFF && + (ret = __txn_add_buffer(env, + R_ADDR(&env->tx_handle->reginfo, + alloc_bhp->td_off))) != 0) + break; + + memcpy(alloc_bhp->buf, bhp->buf, mfp->pagesize); + + /* + * We build up the MVCC chain first, then insert the + * head (stored in new_bhp) once. + */ + if (next_bhp == NULL) { + SH_CHAIN_INIT(alloc_bhp, vc); + new_bhp = alloc_bhp; + } else + SH_CHAIN_INSERT_BEFORE( + next_bhp, alloc_bhp, vc, __bh); + } + + DB_ASSERT(env, new_hp->mtx_hash != old_hp->mtx_hash); + MUTEX_LOCK(env, new_hp->mtx_hash); + SH_TAILQ_INSERT_TAIL(&new_hp->hash_bucket, new_bhp, hq); + if (F_ISSET(new_bhp, BH_DIRTY)) + atomic_inc(env, &new_hp->hash_page_dirty); + + if (F_ISSET(bhp, BH_DIRTY)) { + F_CLR(bhp, BH_DIRTY); + atomic_dec(env, &old_hp->hash_page_dirty); + } + MUTEX_UNLOCK(env, new_hp->mtx_hash); + } + + if (ret == 0) + mp->nbuckets = new_nbuckets; + MUTEX_UNLOCK(env, old_hp->mtx_hash); + + return (ret); +} + +static int +__memp_add_bucket(dbmp) + DB_MPOOL *dbmp; +{ + ENV *env; + MPOOL *mp; + u_int32_t high_mask, new_bucket, old_bucket; + + env = dbmp->env; + mp = dbmp->reginfo[0].primary; + + new_bucket = mp->nbuckets; + /* We should always be adding buckets to the last region. */ + DB_ASSERT(env, NREGION(mp, new_bucket) == mp->nreg - 1); + MP_MASK(mp->nbuckets, high_mask); + old_bucket = new_bucket & (high_mask >> 1); + + /* + * With fixed-sized regions, the new region is always smaller than the + * existing total cache size, so buffers always need to be copied. If + * we implement variable region sizes, it's possible that we will be + * splitting a hash bucket in the new region. Catch that here. + */ + DB_ASSERT(env, NREGION(mp, old_bucket) != NREGION(mp, new_bucket)); + + return (__memp_merge_buckets(dbmp, mp->nbuckets + 1, + old_bucket, new_bucket)); +} + +static int +__memp_add_region(dbmp) + DB_MPOOL *dbmp; +{ + ENV *env; + MPOOL *mp; + REGINFO *infop; + int ret; + roff_t cache_size, reg_size; + u_int i; + u_int32_t *regids; + + env = dbmp->env; + mp = dbmp->reginfo[0].primary; + cache_size = (roff_t)mp->gbytes * GIGABYTE + mp->bytes; + + /* All cache regions are the same size. */ + reg_size = dbmp->reginfo[0].rp->size; + ret = 0; + + infop = &dbmp->reginfo[mp->nreg]; + infop->env = env; + infop->type = REGION_TYPE_MPOOL; + infop->id = INVALID_REGION_ID; + infop->flags = REGION_CREATE_OK; + if ((ret = __env_region_attach(env, infop, reg_size, reg_size)) != 0) + return (ret); + if ((ret = __memp_init(env, + dbmp, mp->nreg, mp->htab_buckets, mp->max_nreg)) != 0) + return (ret); + cache_size += reg_size; + mp->gbytes = (u_int32_t)(cache_size / GIGABYTE); + mp->bytes = (u_int32_t)(cache_size % GIGABYTE); + regids = R_ADDR(dbmp->reginfo, mp->regids); + regids[mp->nreg++] = infop->id; + + for (i = 0; i < mp->htab_buckets; i++) + if ((ret = __memp_add_bucket(dbmp)) != 0) + break; + + return (ret); +} + +static int +__memp_remove_bucket(dbmp) + DB_MPOOL *dbmp; +{ + ENV *env; + MPOOL *mp; + u_int32_t high_mask, new_bucket, old_bucket; + + env = dbmp->env; + mp = dbmp->reginfo[0].primary; + + old_bucket = mp->nbuckets - 1; + + /* We should always be removing buckets from the last region. */ + DB_ASSERT(env, NREGION(mp, old_bucket) == mp->nreg - 1); + MP_MASK(mp->nbuckets - 1, high_mask); + new_bucket = old_bucket & (high_mask >> 1); + + return (__memp_merge_buckets(dbmp, mp->nbuckets - 1, + old_bucket, new_bucket)); +} + +static int +__memp_remove_region(dbmp) + DB_MPOOL *dbmp; +{ + DB_MPOOL_HASH *hp; + ENV *env; + MPOOL *mp; + REGINFO *infop; + int ret; + roff_t cache_size, reg_size; + u_int i; + + env = dbmp->env; + mp = dbmp->reginfo[0].primary; + reg_size = dbmp->reginfo[0].rp->size; + cache_size = (roff_t)mp->gbytes * GIGABYTE + mp->bytes; + ret = 0; + + if (mp->nreg == 1) { + __db_errx(env, DB_STR("3019", + "cannot remove the last cache")); + return (EINVAL); + } + + for (i = 0; i < mp->htab_buckets; i++) + if ((ret = __memp_remove_bucket(dbmp)) != 0) + return (ret); + + /* Detach from the region then destroy it. */ + infop = &dbmp->reginfo[mp->nreg]; + if (F_ISSET(env, ENV_PRIVATE)) { + hp = R_ADDR(infop, ((MPOOL*)infop->primary)->htab); + for (i = 0; i < env->dbenv->mp_mtxcount; i++) + if ((ret = __mutex_free(env, &hp[i].mtx_hash)) != 0) + return (ret); + } + + ret = __env_region_detach(env, infop, 1); + if (ret == 0) { + mp->nreg--; + cache_size -= reg_size; + mp->gbytes = (u_int32_t)(cache_size / GIGABYTE); + mp->bytes = (u_int32_t)(cache_size % GIGABYTE); + } + + return (ret); +} + +static int +__memp_map_regions(dbmp) + DB_MPOOL *dbmp; +{ + ENV *env; + MPOOL *mp; + int ret; + u_int i; + u_int32_t *regids; + + env = dbmp->env; + mp = dbmp->reginfo[0].primary; + regids = R_ADDR(dbmp->reginfo, mp->regids); + ret = 0; + + for (i = 1; i < mp->nreg; ++i) { + if (dbmp->reginfo[i].primary != NULL && + dbmp->reginfo[i].id == regids[i]) + continue; + + if (dbmp->reginfo[i].primary != NULL) + ret = __env_region_detach(env, &dbmp->reginfo[i], 0); + + dbmp->reginfo[i].env = env; + dbmp->reginfo[i].type = REGION_TYPE_MPOOL; + dbmp->reginfo[i].id = regids[i]; + dbmp->reginfo[i].flags = REGION_JOIN_OK; + if ((ret = + __env_region_attach(env, &dbmp->reginfo[i], 0, 0)) != 0) + return (ret); + dbmp->reginfo[i].primary = R_ADDR(&dbmp->reginfo[i], + dbmp->reginfo[i].rp->primary); + } + + for (; i < mp->max_nreg; i++) + if (dbmp->reginfo[i].primary != NULL && + (ret = __env_region_detach(env, + &dbmp->reginfo[i], 0)) != 0) + break; + + return (ret); +} + +/* + * PUBLIC: int __memp_resize __P((DB_MPOOL *, u_int32_t, u_int32_t)); + */ +int +__memp_resize(dbmp, gbytes, bytes) + DB_MPOOL *dbmp; + u_int32_t gbytes, bytes; +{ + ENV *env; + MPOOL *mp; + int ret; + u_int32_t ncache; + roff_t reg_size, total_size; + + env = dbmp->env; + mp = dbmp->reginfo[0].primary; + reg_size = dbmp->reginfo[0].rp->size; + total_size = (roff_t)gbytes * GIGABYTE + bytes; + ncache = (u_int32_t)((total_size + reg_size / 2) / reg_size); + + if (ncache < 1) + ncache = 1; + else if (ncache > mp->max_nreg) { + __db_errx(env, DB_STR_A("3020", + "cannot resize to %lu cache regions: maximum is %lu", + "%lu %lu"), (u_long)ncache, (u_long)mp->max_nreg); + return (EINVAL); + } + + ret = 0; + MUTEX_LOCK(env, mp->mtx_resize); + while (mp->nreg != ncache) + if ((ret = (mp->nreg < ncache ? + __memp_add_region(dbmp) : + __memp_remove_region(dbmp))) != 0) + break; + MUTEX_UNLOCK(env, mp->mtx_resize); + + return (ret); +} + +/* + * PUBLIC: int __memp_get_cache_max __P((DB_ENV *, u_int32_t *, u_int32_t *)); + */ +int +__memp_get_cache_max(dbenv, max_gbytesp, max_bytesp) + DB_ENV *dbenv; + u_int32_t *max_gbytesp, *max_bytesp; +{ + DB_MPOOL *dbmp; + ENV *env; + MPOOL *mp; + roff_t reg_size, max_size; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->mp_handle, "DB_ENV->get_mp_max_ncache", DB_INIT_MPOOL); + + if (MPOOL_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + reg_size = dbmp->reginfo[0].rp->size; + max_size = mp->max_nreg * reg_size; + *max_gbytesp = (u_int32_t)(max_size / GIGABYTE); + *max_bytesp = (u_int32_t)(max_size % GIGABYTE); + } else { + *max_gbytesp = dbenv->mp_max_gbytes; + *max_bytesp = dbenv->mp_max_bytes; + } + + return (0); +} + +/* + * PUBLIC: int __memp_set_cache_max __P((DB_ENV *, u_int32_t, u_int32_t)); + */ +int +__memp_set_cache_max(dbenv, max_gbytes, max_bytes) + DB_ENV *dbenv; + u_int32_t max_gbytes, max_bytes; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_cache_max"); + dbenv->mp_max_gbytes = max_gbytes; + dbenv->mp_max_bytes = max_bytes; + + return (0); +} diff --git a/src/mp/mp_stat.c b/src/mp/mp_stat.c new file mode 100644 index 00000000..b3679994 --- /dev/null +++ b/src/mp/mp_stat.c @@ -0,0 +1,900 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +#ifdef HAVE_STATISTICS +static void __memp_print_bh __P((ENV *, + DB_MPOOL *, const char *, BH *, roff_t *)); +static int __memp_print_all __P((ENV *, u_int32_t)); +static int __memp_print_stats __P((ENV *, u_int32_t)); +static int __memp_print_hash __P((ENV *, + DB_MPOOL *, REGINFO *, roff_t *, u_int32_t)); +static int __memp_stat __P((ENV *, + DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, u_int32_t)); +static void __memp_stat_wait + __P((ENV *, REGINFO *, MPOOL *, DB_MPOOL_STAT *, u_int32_t)); +static int __memp_file_stats __P((ENV *, + MPOOLFILE *, void *, u_int32_t *, u_int32_t)); +static int __memp_count_files __P((ENV *, + MPOOLFILE *, void *, u_int32_t *, u_int32_t)); +static int __memp_get_files __P((ENV *, + MPOOLFILE *, void *, u_int32_t *, u_int32_t)); +static int __memp_print_files __P((ENV *, + MPOOLFILE *, void *, u_int32_t *, u_int32_t)); + +/* + * __memp_stat_pp -- + * DB_ENV->memp_stat pre/post processing. + * + * PUBLIC: int __memp_stat_pp + * PUBLIC: __P((DB_ENV *, DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, u_int32_t)); + */ +int +__memp_stat_pp(dbenv, gspp, fspp, flags) + DB_ENV *dbenv; + DB_MPOOL_STAT **gspp; + DB_MPOOL_FSTAT ***fspp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->mp_handle, "DB_ENV->memp_stat", DB_INIT_MPOOL); + + if ((ret = __db_fchk(env, + "DB_ENV->memp_stat", flags, DB_STAT_CLEAR)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__memp_stat(env, gspp, fspp, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __memp_stat -- + * ENV->memp_stat + */ +static int +__memp_stat(env, gspp, fspp, flags) + ENV *env; + DB_MPOOL_STAT **gspp; + DB_MPOOL_FSTAT ***fspp; + u_int32_t flags; +{ + DB_MPOOL *dbmp; + DB_MPOOL_FSTAT **tfsp; + DB_MPOOL_STAT *sp; + MPOOL *c_mp, *mp; + size_t len; + int ret; + u_int32_t i; + uintmax_t tmp_wait, tmp_nowait; + + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + + /* Global statistics. */ + if (gspp != NULL) { + *gspp = NULL; + + if ((ret = __os_umalloc(env, sizeof(**gspp), gspp)) != 0) + return (ret); + memset(*gspp, 0, sizeof(**gspp)); + sp = *gspp; + + /* + * Initialization and information that is not maintained on + * a per-cache basis. Note that configuration information + * may be modified at any time, and so we have to lock. + */ + sp->st_gbytes = mp->gbytes; + sp->st_bytes = mp->bytes; + sp->st_pagesize = mp->pagesize; + sp->st_ncache = mp->nreg; + sp->st_max_ncache = mp->max_nreg; + sp->st_regsize = dbmp->reginfo[0].rp->size; + sp->st_regmax = dbmp->reginfo[0].rp->max; + sp->st_sync_interrupted = mp->stat.st_sync_interrupted; + + MPOOL_SYSTEM_LOCK(env); + sp->st_mmapsize = mp->mp_mmapsize; + sp->st_maxopenfd = mp->mp_maxopenfd; + sp->st_maxwrite = mp->mp_maxwrite; + sp->st_maxwrite_sleep = mp->mp_maxwrite_sleep; + MPOOL_SYSTEM_UNLOCK(env); + + /* Walk the cache list and accumulate the global information. */ + for (i = 0; i < mp->nreg; ++i) { + c_mp = dbmp->reginfo[i].primary; + + sp->st_map += c_mp->stat.st_map; + sp->st_cache_hit += c_mp->stat.st_cache_hit; + sp->st_cache_miss += c_mp->stat.st_cache_miss; + sp->st_page_create += c_mp->stat.st_page_create; + sp->st_page_in += c_mp->stat.st_page_in; + sp->st_page_out += c_mp->stat.st_page_out; + sp->st_ro_evict += c_mp->stat.st_ro_evict; + sp->st_rw_evict += c_mp->stat.st_rw_evict; + sp->st_page_trickle += c_mp->stat.st_page_trickle; + sp->st_pages += c_mp->pages; + /* + * st_page_dirty calculated by __memp_stat_hash + * st_page_clean calculated here + */ + __memp_stat_hash( + &dbmp->reginfo[i], c_mp, &sp->st_page_dirty); + sp->st_page_clean = sp->st_pages - sp->st_page_dirty; + sp->st_hash_buckets += c_mp->htab_buckets; + sp->st_hash_mutexes += c_mp->htab_mutexes; + sp->st_hash_searches += c_mp->stat.st_hash_searches; + sp->st_hash_longest += c_mp->stat.st_hash_longest; + sp->st_hash_examined += c_mp->stat.st_hash_examined; + /* + * st_hash_nowait calculated by __memp_stat_wait + * st_hash_wait + */ + __memp_stat_wait( + env, &dbmp->reginfo[i], c_mp, sp, flags); + __mutex_set_wait_info(env, + c_mp->mtx_region, &tmp_wait, &tmp_nowait); + sp->st_region_nowait += tmp_nowait; + sp->st_region_wait += tmp_wait; + sp->st_alloc += c_mp->stat.st_alloc; + sp->st_alloc_buckets += c_mp->stat.st_alloc_buckets; + if (sp->st_alloc_max_buckets < + c_mp->stat.st_alloc_max_buckets) + sp->st_alloc_max_buckets = + c_mp->stat.st_alloc_max_buckets; + sp->st_alloc_pages += c_mp->stat.st_alloc_pages; + if (sp->st_alloc_max_pages < + c_mp->stat.st_alloc_max_pages) + sp->st_alloc_max_pages = + c_mp->stat.st_alloc_max_pages; + + if (LF_ISSET(DB_STAT_CLEAR)) { + if (!LF_ISSET(DB_STAT_SUBSYSTEM)) + __mutex_clear(env, c_mp->mtx_region); + + memset(&c_mp->stat, 0, sizeof(c_mp->stat)); + } + } + + /* + * We have duplicate statistics fields in per-file structures + * and the cache. The counters are only incremented in the + * per-file structures, except if a file is flushed from the + * mpool, at which time we copy its information into the cache + * statistics. We added the cache information above, now we + * add the per-file information. + */ + if ((ret = __memp_walk_files(env, mp, __memp_file_stats, + sp, NULL, fspp == NULL ? LF_ISSET(DB_STAT_CLEAR) : 0)) != 0) + return (ret); + } + + /* Per-file statistics. */ + if (fspp != NULL) { + *fspp = NULL; + + /* Count the MPOOLFILE structures. */ + i = 0; + len = 0; + if ((ret = __memp_walk_files(env, + mp, __memp_count_files, &len, &i, flags)) != 0) + return (ret); + + if (i == 0) + return (0); + len += sizeof(DB_MPOOL_FSTAT *); /* Trailing NULL */ + + /* Allocate space */ + if ((ret = __os_umalloc(env, len, fspp)) != 0) + return (ret); + + tfsp = *fspp; + *tfsp = NULL; + + /* + * Files may have been opened since we counted, don't walk + * off the end of the allocated space. + */ + if ((ret = __memp_walk_files(env, + mp, __memp_get_files, &tfsp, &i, flags)) != 0) + return (ret); + + *++tfsp = NULL; + } + + return (0); +} + +static int +__memp_file_stats(env, mfp, argp, countp, flags) + ENV *env; + MPOOLFILE *mfp; + void *argp; + u_int32_t *countp; + u_int32_t flags; +{ + DB_MPOOL_STAT *sp; + + COMPQUIET(env, NULL); + COMPQUIET(countp, NULL); + + sp = argp; + + sp->st_map += mfp->stat.st_map; + sp->st_cache_hit += mfp->stat.st_cache_hit; + sp->st_cache_miss += mfp->stat.st_cache_miss; + sp->st_page_create += mfp->stat.st_page_create; + sp->st_page_in += mfp->stat.st_page_in; + sp->st_page_out += mfp->stat.st_page_out; + if (LF_ISSET(DB_STAT_CLEAR)) + memset(&mfp->stat, 0, sizeof(mfp->stat)); + + return (0); +} + +static int +__memp_count_files(env, mfp, argp, countp, flags) + ENV *env; + MPOOLFILE *mfp; + void *argp; + u_int32_t *countp; + u_int32_t flags; +{ + DB_MPOOL *dbmp; + size_t len; + + COMPQUIET(flags, 0); + dbmp = env->mp_handle; + len = *(size_t *)argp; + + (*countp)++; + len += sizeof(DB_MPOOL_FSTAT *) + + sizeof(DB_MPOOL_FSTAT) + strlen(__memp_fns(dbmp, mfp)) + 1; + + *(size_t *)argp = len; + return (0); +} + +/* + * __memp_get_files -- + * get file specific statistics + * + * Build each individual entry. We assume that an array of pointers are + * aligned correctly to be followed by an array of structures, which should + * be safe (in this particular case, the first element of the structure + * is a pointer, so we're doubly safe). The array is followed by space + * for the text file names. + */ +static int +__memp_get_files(env, mfp, argp, countp, flags) + ENV *env; + MPOOLFILE *mfp; + void *argp; + u_int32_t *countp; + u_int32_t flags; +{ + DB_MPOOL *dbmp; + DB_MPOOL_FSTAT **tfsp, *tstruct; + char *name, *tname; + size_t nlen; + + if (*countp == 0) + return (0); + + dbmp = env->mp_handle; + tfsp = *(DB_MPOOL_FSTAT ***)argp; + + if (*tfsp == NULL) { + /* Add 1 to count because we need to skip over the NULL. */ + tstruct = (DB_MPOOL_FSTAT *)(tfsp + *countp + 1); + tname = (char *)(tstruct + *countp); + *tfsp = tstruct; + } else { + tstruct = *tfsp + 1; + tname = (*tfsp)->file_name + strlen((*tfsp)->file_name) + 1; + *++tfsp = tstruct; + } + + name = __memp_fns(dbmp, mfp); + nlen = strlen(name) + 1; + memcpy(tname, name, nlen); + memcpy(tstruct, &mfp->stat, sizeof(mfp->stat)); + tstruct->file_name = tname; + + /* Grab the pagesize from the mfp. */ + tstruct->st_pagesize = mfp->pagesize; + + *(DB_MPOOL_FSTAT ***)argp = tfsp; + (*countp)--; + + if (LF_ISSET(DB_STAT_CLEAR)) + memset(&mfp->stat, 0, sizeof(mfp->stat)); + + return (0); +} + +/* + * __memp_stat_print_pp -- + * ENV->memp_stat_print pre/post processing. + * + * PUBLIC: int __memp_stat_print_pp __P((DB_ENV *, u_int32_t)); + */ +int +__memp_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->mp_handle, "DB_ENV->memp_stat_print", DB_INIT_MPOOL); + +#define DB_STAT_MEMP_FLAGS \ + (DB_STAT_ALL | DB_STAT_ALLOC | DB_STAT_CLEAR | DB_STAT_MEMP_HASH) + if ((ret = __db_fchk(env, + "DB_ENV->memp_stat_print", flags, DB_STAT_MEMP_FLAGS)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__memp_stat_print(env, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +#define FMAP_ENTRIES 200 /* Files we map. */ + +/* + * __memp_stat_print -- + * ENV->memp_stat_print method. + * + * PUBLIC: int __memp_stat_print __P((ENV *, u_int32_t)); + */ +int +__memp_stat_print(env, flags) + ENV *env; + u_int32_t flags; +{ + u_int32_t orig_flags; + int ret; + + orig_flags = flags; + LF_CLR(DB_STAT_CLEAR | DB_STAT_SUBSYSTEM); + if (flags == 0 || LF_ISSET(DB_STAT_ALL)) { + ret = __memp_print_stats(env, + LF_ISSET(DB_STAT_ALL) ? flags : orig_flags); + if (flags == 0 || ret != 0) + return (ret); + } + + if (LF_ISSET(DB_STAT_ALL | DB_STAT_MEMP_HASH) && + (ret = __memp_print_all(env, orig_flags)) != 0) + return (ret); + + return (0); +} + +/* + * __memp_print_stats -- + * Display default mpool region statistics. + */ +static int +__memp_print_stats(env, flags) + ENV *env; + u_int32_t flags; +{ + DB_MPOOL_FSTAT **fsp, **tfsp; + DB_MPOOL_STAT *gsp; + int ret; + + if ((ret = __memp_stat(env, &gsp, &fsp, flags)) != 0) + return (ret); + + if (LF_ISSET(DB_STAT_ALL)) + __db_msg(env, "Default cache region information:"); + __db_dlbytes(env, "Total cache size", + (u_long)gsp->st_gbytes, (u_long)0, (u_long)gsp->st_bytes); + __db_dl(env, "Number of caches", (u_long)gsp->st_ncache); + __db_dl(env, "Maximum number of caches", (u_long)gsp->st_max_ncache); + __db_dlbytes(env, "Pool individual cache size", + (u_long)0, (u_long)0, (u_long)gsp->st_regsize); + __db_dlbytes(env, "Pool individual cache max", + (u_long)0, (u_long)0, (u_long)gsp->st_regmax); + __db_dlbytes(env, "Maximum memory-mapped file size", + (u_long)0, (u_long)0, (u_long)gsp->st_mmapsize); + STAT_LONG("Maximum open file descriptors", gsp->st_maxopenfd); + STAT_LONG("Maximum sequential buffer writes", gsp->st_maxwrite); + STAT_LONG("Sleep after writing maximum sequential buffers", + gsp->st_maxwrite_sleep); + __db_dl(env, + "Requested pages mapped into the process' address space", + (u_long)gsp->st_map); + __db_dl_pct(env, "Requested pages found in the cache", + (u_long)gsp->st_cache_hit, DB_PCT( + gsp->st_cache_hit, gsp->st_cache_hit + gsp->st_cache_miss), NULL); + __db_dl(env, "Requested pages not found in the cache", + (u_long)gsp->st_cache_miss); + __db_dl(env, + "Pages created in the cache", (u_long)gsp->st_page_create); + __db_dl(env, "Pages read into the cache", (u_long)gsp->st_page_in); + __db_dl(env, "Pages written from the cache to the backing file", + (u_long)gsp->st_page_out); + __db_dl(env, "Clean pages forced from the cache", + (u_long)gsp->st_ro_evict); + __db_dl(env, "Dirty pages forced from the cache", + (u_long)gsp->st_rw_evict); + __db_dl(env, "Dirty pages written by trickle-sync thread", + (u_long)gsp->st_page_trickle); + __db_dl(env, "Current total page count", + (u_long)gsp->st_pages); + __db_dl(env, "Current clean page count", + (u_long)gsp->st_page_clean); + __db_dl(env, "Current dirty page count", + (u_long)gsp->st_page_dirty); + __db_dl(env, "Number of hash buckets used for page location", + (u_long)gsp->st_hash_buckets); + __db_dl(env, "Number of mutexes for the hash buckets", + (u_long)gsp->st_hash_mutexes); + __db_dl(env, "Assumed page size used", + (u_long)gsp->st_pagesize); + __db_dl(env, + "Total number of times hash chains searched for a page", + (u_long)gsp->st_hash_searches); + __db_dl(env, "The longest hash chain searched for a page", + (u_long)gsp->st_hash_longest); + __db_dl(env, + "Total number of hash chain entries checked for page", + (u_long)gsp->st_hash_examined); + __db_dl_pct(env, + "The number of hash bucket locks that required waiting", + (u_long)gsp->st_hash_wait, DB_PCT( + gsp->st_hash_wait, gsp->st_hash_wait + gsp->st_hash_nowait), NULL); + __db_dl_pct(env, + "The maximum number of times any hash bucket lock was waited for", + (u_long)gsp->st_hash_max_wait, DB_PCT(gsp->st_hash_max_wait, + gsp->st_hash_max_wait + gsp->st_hash_max_nowait), NULL); + __db_dl_pct(env, + "The number of region locks that required waiting", + (u_long)gsp->st_region_wait, DB_PCT(gsp->st_region_wait, + gsp->st_region_wait + gsp->st_region_nowait), NULL); + __db_dl(env, "The number of buffers frozen", + (u_long)gsp->st_mvcc_frozen); + __db_dl(env, "The number of buffers thawed", + (u_long)gsp->st_mvcc_thawed); + __db_dl(env, "The number of frozen buffers freed", + (u_long)gsp->st_mvcc_freed); + __db_dl(env, "The number of page allocations", (u_long)gsp->st_alloc); + __db_dl(env, + "The number of hash buckets examined during allocations", + (u_long)gsp->st_alloc_buckets); + __db_dl(env, + "The maximum number of hash buckets examined for an allocation", + (u_long)gsp->st_alloc_max_buckets); + __db_dl(env, "The number of pages examined during allocations", + (u_long)gsp->st_alloc_pages); + __db_dl(env, "The max number of pages examined for an allocation", + (u_long)gsp->st_alloc_max_pages); + __db_dl(env, "Threads waited on page I/O", (u_long)gsp->st_io_wait); + __db_dl(env, "The number of times a sync is interrupted", + (u_long)gsp->st_sync_interrupted); + + for (tfsp = fsp; fsp != NULL && *tfsp != NULL; ++tfsp) { + if (LF_ISSET(DB_STAT_ALL)) + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Pool File: %s", (*tfsp)->file_name); + __db_dl(env, "Page size", (u_long)(*tfsp)->st_pagesize); + __db_dl(env, + "Requested pages mapped into the process' address space", + (u_long)(*tfsp)->st_map); + __db_dl_pct(env, "Requested pages found in the cache", + (u_long)(*tfsp)->st_cache_hit, DB_PCT((*tfsp)->st_cache_hit, + (*tfsp)->st_cache_hit + (*tfsp)->st_cache_miss), NULL); + __db_dl(env, "Requested pages not found in the cache", + (u_long)(*tfsp)->st_cache_miss); + __db_dl(env, "Pages created in the cache", + (u_long)(*tfsp)->st_page_create); + __db_dl(env, "Pages read into the cache", + (u_long)(*tfsp)->st_page_in); + __db_dl(env, + "Pages written from the cache to the backing file", + (u_long)(*tfsp)->st_page_out); + } + + __os_ufree(env, fsp); + __os_ufree(env, gsp); + return (0); +} + +/* + * __memp_print_all -- + * Display debugging mpool region statistics. + */ +static int +__memp_print_all(env, flags) + ENV *env; + u_int32_t flags; +{ + static const FN cfn[] = { + { DB_MPOOL_NOFILE, "DB_MPOOL_NOFILE" }, + { DB_MPOOL_UNLINK, "DB_MPOOL_UNLINK" }, + { 0, NULL } + }; + DB_MPOOL *dbmp; + DB_MPOOLFILE *dbmfp; + MPOOL *mp; + roff_t fmap[FMAP_ENTRIES + 1]; + u_int32_t i, cnt; + int ret; + + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + ret = 0; + + MPOOL_SYSTEM_LOCK(env); + + __db_print_reginfo(env, dbmp->reginfo, "Mpool", flags); + __db_msg(env, "%s", DB_GLOBAL(db_line)); + + __db_msg(env, "MPOOL structure:"); + __mutex_print_debug_single( + env, "MPOOL region mutex", mp->mtx_region, flags); + STAT_LSN("Maximum checkpoint LSN", &mp->lsn); + STAT_ULONG("Hash table entries", mp->htab_buckets); + STAT_ULONG("Hash table mutexes", mp->htab_mutexes); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "DB_MPOOL handle information:"); + __mutex_print_debug_single( + env, "DB_MPOOL handle mutex", dbmp->mutex, flags); + STAT_ULONG("Underlying cache regions", mp->nreg); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "DB_MPOOLFILE structures:"); + for (cnt = 0, dbmfp = TAILQ_FIRST(&dbmp->dbmfq); + dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q), ++cnt) { + __db_msg(env, "File #%lu: %s: per-process, %s", + (u_long)cnt + 1, __memp_fn(dbmfp), + F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write"); + STAT_ULONG("Reference count", dbmfp->ref); + STAT_ULONG("Pinned block reference count", dbmfp->ref); + STAT_ULONG("Clear length", dbmfp->clear_len); + __db_print_fileid(env, dbmfp->fileid, "\tID"); + STAT_ULONG("File type", dbmfp->ftype); + STAT_ULONG("LSN offset", dbmfp->lsn_offset); + STAT_ULONG("Max gbytes", dbmfp->gbytes); + STAT_ULONG("Max bytes", dbmfp->bytes); + STAT_ULONG("Cache priority", dbmfp->priority); + STAT_POINTER("mmap address", dbmfp->addr); + STAT_ULONG("mmap length", dbmfp->len); + __db_prflags(env, NULL, dbmfp->flags, cfn, NULL, "\tFlags"); + __db_print_fh(env, "File handle", dbmfp->fhp, flags); + } + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "MPOOLFILE structures:"); + cnt = 0; + ret = __memp_walk_files(env, mp, __memp_print_files, fmap, &cnt, flags); + MPOOL_SYSTEM_UNLOCK(env); + if (ret != 0) + return (ret); + + if (cnt < FMAP_ENTRIES) + fmap[cnt] = INVALID_ROFF; + else + fmap[FMAP_ENTRIES] = INVALID_ROFF; + + /* Dump the individual caches. */ + for (i = 0; i < mp->nreg; ++i) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Cache #%d:", i + 1); + if (i > 0) + __env_alloc_print(&dbmp->reginfo[i], flags); + if ((ret = __memp_print_hash( + env, dbmp, &dbmp->reginfo[i], fmap, flags)) != 0) + break; + } + + return (ret); +} + +static int +__memp_print_files(env, mfp, argp, countp, flags) + ENV *env; + MPOOLFILE *mfp; + void *argp; + u_int32_t *countp; + u_int32_t flags; +{ + roff_t *fmap; + DB_MPOOL *dbmp; + u_int32_t mfp_flags; + static const FN fn[] = { + { MP_CAN_MMAP, "MP_CAN_MMAP" }, + { MP_DIRECT, "MP_DIRECT" }, + { MP_EXTENT, "MP_EXTENT" }, + { MP_FAKE_DEADFILE, "deadfile" }, + { MP_FAKE_FILEWRITTEN, "file written" }, + { MP_FAKE_NB, "no backing file" }, + { MP_FAKE_UOC, "unlink on close" }, + { MP_NOT_DURABLE, "not durable" }, + { MP_TEMP, "MP_TEMP" }, + { 0, NULL } + }; + + dbmp = env->mp_handle; + fmap = argp; + + __db_msg(env, "File #%d: %s", *countp + 1, __memp_fns(dbmp, mfp)); + __mutex_print_debug_single(env, "Mutex", mfp->mutex, flags); + + MUTEX_LOCK(env, mfp->mutex); + STAT_ULONG("Revision count", mfp->revision); + STAT_ULONG("Reference count", mfp->mpf_cnt); + STAT_ULONG("Block count", mfp->block_cnt); + STAT_ULONG("Last page number", mfp->last_pgno); + STAT_ULONG("Original last page number", mfp->orig_last_pgno); + STAT_ULONG("Maximum page number", mfp->maxpgno); + STAT_LONG("Type", mfp->ftype); + STAT_LONG("Priority", mfp->priority); + STAT_LONG("Page's LSN offset", mfp->lsn_off); + STAT_LONG("Page's clear length", mfp->clear_len); + + __db_print_fileid(env, + R_ADDR(dbmp->reginfo, mfp->fileid_off), "\tID"); + + mfp_flags = 0; + if (mfp->deadfile) + FLD_SET(mfp_flags, MP_FAKE_DEADFILE); + if (mfp->file_written) + FLD_SET(mfp_flags, MP_FAKE_FILEWRITTEN); + if (mfp->no_backing_file) + FLD_SET(mfp_flags, MP_FAKE_NB); + if (mfp->unlink_on_close) + FLD_SET(mfp_flags, MP_FAKE_UOC); + __db_prflags(env, NULL, mfp_flags, fn, NULL, "\tFlags"); + + if (*countp < FMAP_ENTRIES) + fmap[*countp] = R_OFFSET(dbmp->reginfo, mfp); + (*countp)++; + MUTEX_UNLOCK(env, mfp->mutex); + return (0); +} + +/* + * __memp_print_hash -- + * Display hash bucket statistics for a cache. + */ +static int +__memp_print_hash(env, dbmp, reginfo, fmap, flags) + ENV *env; + DB_MPOOL *dbmp; + REGINFO *reginfo; + roff_t *fmap; + u_int32_t flags; +{ + BH *bhp, *vbhp; + DB_MPOOL_HASH *hp; + DB_MSGBUF mb; + MPOOL *c_mp; + u_int32_t bucket; + + c_mp = reginfo->primary; + DB_MSGBUF_INIT(&mb); + STAT_ULONG("Hash table last-checked", c_mp->last_checked); + STAT_ULONG("Hash table LRU priority", c_mp->lru_priority); + STAT_ULONG("Hash table LRU generation", c_mp->lru_generation); + STAT_ULONG("Put counter", c_mp->put_counter); + + /* Display the hash table list of BH's. */ + __db_msg(env, + "BH hash table (%lu hash slots)", (u_long)c_mp->htab_buckets); + __db_msg(env, "bucket #: priority, I/O wait, [mutex]"); + __db_msg(env, "\tpageno, file, ref, LSN, address, priority, flags"); + + for (hp = R_ADDR(reginfo, c_mp->htab), + bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) { + MUTEX_READLOCK(env, hp->mtx_hash); + if ((bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)) != NULL) { + __db_msgadd(env, &mb, + "bucket %lu: %lu (%lu dirty)", + (u_long)bucket, (u_long)hp->hash_io_wait, + (u_long)atomic_read(&hp->hash_page_dirty)); + if (hp->hash_frozen != 0) + __db_msgadd(env, &mb, "(MVCC %lu/%lu/%lu) ", + (u_long)hp->hash_frozen, + (u_long)hp->hash_thawed, + (u_long)hp->hash_frozen_freed); + __mutex_print_debug_stats( + env, &mb, hp->mtx_hash, flags); + DB_MSGBUF_FLUSH(env, &mb); + } + for (; bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) { + __memp_print_bh(env, dbmp, NULL, bhp, fmap); + + /* Print the version chain, if it exists. */ + for (vbhp = SH_CHAIN_PREV(bhp, vc, __bh); + vbhp != NULL; + vbhp = SH_CHAIN_PREV(vbhp, vc, __bh)) { + __memp_print_bh(env, dbmp, + " next:\t", vbhp, fmap); + } + } + MUTEX_UNLOCK(env, hp->mtx_hash); + } + + return (0); +} + +/* + * __memp_print_bh -- + * Display a BH structure. + */ +static void +__memp_print_bh(env, dbmp, prefix, bhp, fmap) + ENV *env; + DB_MPOOL *dbmp; + const char *prefix; + BH *bhp; + roff_t *fmap; +{ + static const FN fn[] = { + { BH_CALLPGIN, "callpgin" }, + { BH_DIRTY, "dirty" }, + { BH_DIRTY_CREATE, "created" }, + { BH_DISCARD, "discard" }, + { BH_EXCLUSIVE, "exclusive" }, + { BH_FREED, "freed" }, + { BH_FROZEN, "frozen" }, + { BH_TRASH, "trash" }, + { BH_THAWED, "thawed" }, + { 0, NULL } + }; + DB_MSGBUF mb; + int i; + + DB_MSGBUF_INIT(&mb); + + if (prefix != NULL) + __db_msgadd(env, &mb, "%s", prefix); + else + __db_msgadd(env, &mb, "\t"); + + for (i = 0; i < FMAP_ENTRIES; ++i) + if (fmap[i] == INVALID_ROFF || fmap[i] == bhp->mf_offset) + break; + + if (fmap[i] == INVALID_ROFF) + __db_msgadd(env, &mb, "%5lu, %lu, ", + (u_long)bhp->pgno, (u_long)bhp->mf_offset); + else + __db_msgadd( + env, &mb, "%5lu, #%d, ", (u_long)bhp->pgno, i + 1); + + __db_msgadd(env, &mb, "%2lu, %lu/%lu", (u_long)atomic_read(&bhp->ref), + F_ISSET(bhp, BH_FROZEN) ? 0 : (u_long)LSN(bhp->buf).file, + F_ISSET(bhp, BH_FROZEN) ? 0 : (u_long)LSN(bhp->buf).offset); + if (bhp->td_off != INVALID_ROFF) + __db_msgadd(env, &mb, " (@%lu/%lu 0x%x)", + (u_long)VISIBLE_LSN(env, bhp)->file, + (u_long)VISIBLE_LSN(env, bhp)->offset, + BH_OWNER(env, bhp)->txnid); + __db_msgadd(env, &mb, ", %#08lx, %lu", + (u_long)R_OFFSET(dbmp->reginfo, bhp), (u_long)bhp->priority); + __db_prflags(env, &mb, bhp->flags, fn, " (", ")"); + DB_MSGBUF_FLUSH(env, &mb); +} + +/* + * __memp_stat_wait -- + * Total hash bucket wait stats into the region. + */ +static void +__memp_stat_wait(env, reginfo, mp, mstat, flags) + ENV *env; + REGINFO *reginfo; + MPOOL *mp; + DB_MPOOL_STAT *mstat; + u_int32_t flags; +{ + DB_MPOOL_HASH *hp; + u_int32_t i; + uintmax_t tmp_nowait, tmp_wait; + + mstat->st_hash_max_wait = 0; + hp = R_ADDR(reginfo, mp->htab); + for (i = 0; i < mp->htab_buckets; i++, hp++) { + __mutex_set_wait_info( + env, hp->mtx_hash, &tmp_wait, &tmp_nowait); + mstat->st_hash_nowait += tmp_nowait; + mstat->st_hash_wait += tmp_wait; + if (tmp_wait > mstat->st_hash_max_wait) { + mstat->st_hash_max_wait = tmp_wait; + mstat->st_hash_max_nowait = tmp_nowait; + } + if (LF_ISSET(DB_STAT_CLEAR | + DB_STAT_SUBSYSTEM) == DB_STAT_CLEAR) + __mutex_clear(env, hp->mtx_hash); + + mstat->st_io_wait += hp->hash_io_wait; + mstat->st_mvcc_frozen += hp->hash_frozen; + mstat->st_mvcc_thawed += hp->hash_thawed; + mstat->st_mvcc_freed += hp->hash_frozen_freed; + if (LF_ISSET(DB_STAT_CLEAR)) { + hp->hash_io_wait = 0; + hp->hash_frozen = 0; + hp->hash_thawed = 0; + hp->hash_frozen_freed = 0; + } + } +} + +#else /* !HAVE_STATISTICS */ + +int +__memp_stat_pp(dbenv, gspp, fspp, flags) + DB_ENV *dbenv; + DB_MPOOL_STAT **gspp; + DB_MPOOL_FSTAT ***fspp; + u_int32_t flags; +{ + COMPQUIET(gspp, NULL); + COMPQUIET(fspp, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} + +int +__memp_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} +#endif + +/* + * __memp_stat_hash -- + * Total hash bucket stats (other than mutex wait) into the region. + * + * PUBLIC: void __memp_stat_hash __P((REGINFO *, MPOOL *, u_int32_t *)); + */ +void +__memp_stat_hash(reginfo, mp, dirtyp) + REGINFO *reginfo; + MPOOL *mp; + u_int32_t *dirtyp; +{ + DB_MPOOL_HASH *hp; + u_int32_t dirty, i; + + hp = R_ADDR(reginfo, mp->htab); + for (i = 0, dirty = 0; i < mp->htab_buckets; i++, hp++) + dirty += (u_int32_t)atomic_read(&hp->hash_page_dirty); + *dirtyp = dirty; +} diff --git a/src/mp/mp_sync.c b/src/mp/mp_sync.c new file mode 100644 index 00000000..2e197d58 --- /dev/null +++ b/src/mp/mp_sync.c @@ -0,0 +1,956 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" + +typedef struct { + DB_MPOOL_HASH *track_hp; /* Hash bucket. */ + + roff_t track_off; /* Page file offset. */ + db_pgno_t track_pgno; /* Page number. */ +} BH_TRACK; + +static int __bhcmp __P((const void *, const void *)); +static int __memp_close_flush_files __P((ENV *, int)); +static int __memp_sync_files __P((ENV *)); +static int __memp_sync_file __P((ENV *, + MPOOLFILE *, void *, u_int32_t *, u_int32_t)); + +/* + * __memp_walk_files -- + * PUBLIC: int __memp_walk_files __P((ENV *, MPOOL *, + * PUBLIC: int (*) __P((ENV *, MPOOLFILE *, void *, + * PUBLIC: u_int32_t *, u_int32_t)), void *, u_int32_t *, u_int32_t)); + */ +int +__memp_walk_files(env, mp, func, arg, countp, flags) + ENV *env; + MPOOL *mp; + int (*func)__P((ENV *, MPOOLFILE *, void *, u_int32_t *, u_int32_t)); + void *arg; + u_int32_t *countp; + u_int32_t flags; +{ + DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp; + MPOOLFILE *mfp; + int i, ret, t_ret; + + dbmp = env->mp_handle; + ret = 0; + + hp = R_ADDR(dbmp->reginfo, mp->ftab); + for (i = 0; i < MPOOL_FILE_BUCKETS; i++, hp++) { + MUTEX_LOCK(env, hp->mtx_hash); + SH_TAILQ_FOREACH(mfp, &hp->hash_bucket, q, __mpoolfile) { + if ((t_ret = func(env, + mfp, arg, countp, flags)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0 && !LF_ISSET(DB_STAT_MEMP_NOERROR)) + break; + } + MUTEX_UNLOCK(env, hp->mtx_hash); + if (ret != 0 && !LF_ISSET(DB_STAT_MEMP_NOERROR)) + break; + } + return (ret); +} + +/* + * __memp_discard_all_mpfs -- + * Force discard all mpoolfiles. When closing a private environment, we + * always want to discard all mpoolfiles to avoid memory leak. + * + * PUBLIC: int __memp_discard_all_mpfs __P((ENV *, MPOOL *)); + */ +int +__memp_discard_all_mpfs (env, mp) + ENV *env; + MPOOL *mp; +{ + DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp; + MPOOLFILE *mfp; + int i, ret, t_ret; + + ret = t_ret = 0; + mfp = NULL; + hp = NULL; + dbmp = env->mp_handle; + + hp = R_ADDR(dbmp->reginfo, mp->ftab); + for (i = 0; i < MPOOL_FILE_BUCKETS; i++, hp++) { + MUTEX_LOCK(env, hp->mtx_hash); + while ((mfp = SH_TAILQ_FIRST( + &hp->hash_bucket, __mpoolfile)) != NULL) { + MUTEX_LOCK(env, mfp->mutex); + if ((t_ret = __memp_mf_discard(dbmp, mfp, 1)) != 0 && + ret == 0) + ret = t_ret; + } + MUTEX_UNLOCK(env, hp->mtx_hash); + } + return (ret); +} + +/* + * __memp_sync_pp -- + * ENV->memp_sync pre/post processing. + * + * PUBLIC: int __memp_sync_pp __P((DB_ENV *, DB_LSN *)); + */ +int +__memp_sync_pp(dbenv, lsnp) + DB_ENV *dbenv; + DB_LSN *lsnp; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->mp_handle, "memp_sync", DB_INIT_MPOOL); + + /* + * If no LSN is provided, flush the entire cache (reasonable usage + * even if there's no log subsystem configured). + */ + if (lsnp != NULL) + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "memp_sync", DB_INIT_LOG); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__memp_sync(env, DB_SYNC_CACHE, lsnp)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __memp_sync -- + * ENV->memp_sync. + * + * PUBLIC: int __memp_sync __P((ENV *, u_int32_t, DB_LSN *)); + */ +int +__memp_sync(env, flags, lsnp) + ENV *env; + u_int32_t flags; + DB_LSN *lsnp; +{ + DB_MPOOL *dbmp; + MPOOL *mp; + int interrupted, ret; + + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + + /* If we've flushed to the requested LSN, return that information. */ + if (lsnp != NULL) { + MPOOL_SYSTEM_LOCK(env); + if (LOG_COMPARE(lsnp, &mp->lsn) <= 0) { + *lsnp = mp->lsn; + + MPOOL_SYSTEM_UNLOCK(env); + return (0); + } + MPOOL_SYSTEM_UNLOCK(env); + } + + if ((ret = + __memp_sync_int(env, NULL, 0, flags, NULL, &interrupted)) != 0) + return (ret); + + if (!interrupted && lsnp != NULL) { + MPOOL_SYSTEM_LOCK(env); + if (LOG_COMPARE(lsnp, &mp->lsn) > 0) + mp->lsn = *lsnp; + MPOOL_SYSTEM_UNLOCK(env); + } + + return (0); +} + +/* + * __memp_fsync_pp -- + * DB_MPOOLFILE->sync pre/post processing. + * + * PUBLIC: int __memp_fsync_pp __P((DB_MPOOLFILE *)); + */ +int +__memp_fsync_pp(dbmfp) + DB_MPOOLFILE *dbmfp; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbmfp->env; + + MPF_ILLEGAL_BEFORE_OPEN(dbmfp, "DB_MPOOLFILE->sync"); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__memp_fsync(dbmfp)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __memp_fsync -- + * DB_MPOOLFILE->sync. + * + * PUBLIC: int __memp_fsync __P((DB_MPOOLFILE *)); + */ +int +__memp_fsync(dbmfp) + DB_MPOOLFILE *dbmfp; +{ + MPOOLFILE *mfp; + + mfp = dbmfp->mfp; + + /* + * If this handle doesn't have a file descriptor that's open for + * writing, or if the file is a temporary, or if the file hasn't + * been written since it was flushed, there's no reason to proceed + * further. + */ + if (F_ISSET(dbmfp, MP_READONLY)) + return (0); + + if (F_ISSET(dbmfp->mfp, MP_TEMP) || dbmfp->mfp->no_backing_file) + return (0); + + if (mfp->file_written == 0) + return (0); + + return (__memp_sync_int( + dbmfp->env, dbmfp, 0, DB_SYNC_FILE, NULL, NULL)); +} + +/* + * __mp_xxx_fh -- + * Return a file descriptor for DB 1.85 compatibility locking. + * + * PUBLIC: int __mp_xxx_fh __P((DB_MPOOLFILE *, DB_FH **)); + */ +int +__mp_xxx_fh(dbmfp, fhp) + DB_MPOOLFILE *dbmfp; + DB_FH **fhp; +{ + int ret; + + /* + * This is a truly spectacular layering violation, intended ONLY to + * support compatibility for the DB 1.85 DB->fd call. + * + * Sync the database file to disk, creating the file as necessary. + * + * We skip the MP_READONLY and MP_TEMP tests done by memp_fsync(3). + * The MP_READONLY test isn't interesting because we will either + * already have a file descriptor (we opened the database file for + * reading) or we aren't readonly (we created the database which + * requires write privileges). The MP_TEMP test isn't interesting + * because we want to write to the backing file regardless so that + * we get a file descriptor to return. + */ + if ((*fhp = dbmfp->fhp) != NULL) + return (0); + + if ((ret = __memp_sync_int( + dbmfp->env, dbmfp, 0, DB_SYNC_FILE, NULL, NULL)) == 0) + *fhp = dbmfp->fhp; + return (ret); +} + +/* + * __memp_sync_int -- + * Mpool sync internal function. + * + * PUBLIC: int __memp_sync_int __P((ENV *, + * PUBLIC: DB_MPOOLFILE *, u_int32_t, u_int32_t, u_int32_t *, int *)); + */ +int +__memp_sync_int(env, dbmfp, trickle_max, flags, wrote_totalp, interruptedp) + ENV *env; + DB_MPOOLFILE *dbmfp; + u_int32_t trickle_max, flags, *wrote_totalp; + int *interruptedp; +{ + BH *bhp; + BH_TRACK *bharray; + DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp; + MPOOL *c_mp, *mp; + MPOOLFILE *mfp; + db_mutex_t mutex; + roff_t last_mf_offset; + u_int32_t ar_cnt, ar_max, i, n_cache, remaining, wrote_total; + int32_t wrote_cnt; + int dirty, filecnt, maxopenfd, required_write, ret, t_ret; + + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + last_mf_offset = INVALID_ROFF; + filecnt = wrote_total = 0; + + if (wrote_totalp != NULL) + *wrote_totalp = 0; + if (interruptedp != NULL) + *interruptedp = 0; + + /* + * If we're flushing the cache, it's a checkpoint or we're flushing a + * specific file, we really have to write the blocks and we have to + * confirm they made it to disk. Otherwise, we can skip a block if + * it's hard to get. + */ + required_write = LF_ISSET(DB_SYNC_CACHE | + DB_SYNC_CHECKPOINT | DB_SYNC_FILE | DB_SYNC_QUEUE_EXTENT); + + /* Get shared configuration information. */ + MPOOL_SYSTEM_LOCK(env); + maxopenfd = mp->mp_maxopenfd; + MPOOL_SYSTEM_UNLOCK(env); + + /* Assume one dirty page per bucket. */ + ar_max = mp->nreg * mp->htab_buckets; + if ((ret = + __os_malloc(env, ar_max * sizeof(BH_TRACK), &bharray)) != 0) + return (ret); + + /* + * Walk each cache's list of buffers and mark all dirty buffers to be + * written and all dirty buffers to be potentially written, depending + * on our flags. + */ + for (ar_cnt = 0, n_cache = 0; n_cache < mp->nreg; ++n_cache) { + c_mp = dbmp->reginfo[n_cache].primary; + + hp = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab); + for (i = 0; i < c_mp->htab_buckets; i++, hp++) { + /* + * We can check for empty buckets before locking as + * we only care if the pointer is zero or non-zero. + * We can ignore empty or clean buckets because we + * only need write buffers that were dirty before + * we started. + */ +#ifdef DIAGNOSTIC + if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL) +#else + if (atomic_read(&hp->hash_page_dirty) == 0) +#endif + continue; + + dirty = 0; + MUTEX_LOCK(env, hp->mtx_hash); + SH_TAILQ_FOREACH(bhp, &hp->hash_bucket, hq, __bh) { + /* Always ignore clean pages. */ + if (!F_ISSET(bhp, BH_DIRTY)) + continue; + + dirty++; + mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); + + /* + * Ignore in-memory files, unless the file is + * specifically being flushed. + */ + if (mfp->no_backing_file) + continue; + if (!LF_ISSET(DB_SYNC_FILE) && + F_ISSET(mfp, MP_TEMP)) + continue; + + /* + * Ignore files that aren't involved in DB's + * transactional operations during checkpoints. + */ + if (LF_ISSET(DB_SYNC_CHECKPOINT) && + mfp->lsn_off == DB_LSN_OFF_NOTSET) + continue; + + /* + * Ignore files that aren't Queue extent files + * if we're flushing a Queue file with extents. + */ + if (LF_ISSET(DB_SYNC_QUEUE_EXTENT) && + !F_ISSET(mfp, MP_EXTENT)) + continue; + + /* + * If we're flushing a specific file, see if + * this page is from that file. + */ + if (dbmfp != NULL && mfp != dbmfp->mfp) + continue; + + /* Track the buffer, we want it. */ + bharray[ar_cnt].track_hp = hp; + bharray[ar_cnt].track_pgno = bhp->pgno; + bharray[ar_cnt].track_off = bhp->mf_offset; + ar_cnt++; + + /* + * If we run out of space, double and continue. + * Don't stop at trickle_max, we want to sort + * as large a sample set as possible in order + * to minimize disk seeks. + */ + if (ar_cnt >= ar_max) { + if ((ret = __os_realloc(env, + (ar_max * 2) * sizeof(BH_TRACK), + &bharray)) != 0) + break; + ar_max *= 2; + } + } + + if (ret != 0) + goto err; + /* + * We are only checking this in diagnostic mode + * since it requires extra latching to keep the count + * in sync with the number of bits counted. + */ + DB_ASSERT(env, + dirty == (int)atomic_read(&hp->hash_page_dirty)); + MUTEX_UNLOCK(env, hp->mtx_hash); + + /* Check if the call has been interrupted. */ + if (LF_ISSET(DB_SYNC_INTERRUPT_OK) && FLD_ISSET( + mp->config_flags, DB_MEMP_SYNC_INTERRUPT)) { + STAT(++mp->stat.st_sync_interrupted); + if (interruptedp != NULL) + *interruptedp = 1; + goto err; + } + } + } + + /* If there no buffers to write, we're done. */ + if (ar_cnt == 0) + goto done; + + /* + * Write the buffers in file/page order, trying to reduce seeks by the + * filesystem and, when pages are smaller than filesystem block sizes, + * reduce the actual number of writes. + */ + if (ar_cnt > 1) + qsort(bharray, ar_cnt, sizeof(BH_TRACK), __bhcmp); + + /* + * If we're trickling buffers, only write enough to reach the correct + * percentage. + */ + if (LF_ISSET(DB_SYNC_TRICKLE) && ar_cnt > trickle_max) + ar_cnt = trickle_max; + + /* + * Flush the log. We have to ensure the log records reflecting the + * changes on the database pages we're writing have already made it + * to disk. We still have to check the log each time we write a page + * (because pages we are about to write may be modified after we have + * flushed the log), but in general this will at least avoid any I/O + * on the log's part. + */ + if (LOGGING_ON(env) && (ret = __log_flush(env, NULL)) != 0) + goto err; + + /* + * Walk the array, writing buffers. When we write a buffer, we NULL + * out its hash bucket pointer so we don't process a slot more than + * once. + */ + for (i = wrote_cnt = 0, remaining = ar_cnt; remaining > 0; ++i) { + if (i >= ar_cnt) { + i = 0; + __os_yield(env, 1, 0); + } + if ((hp = bharray[i].track_hp) == NULL) + continue; + + /* Lock the hash bucket and find the buffer. */ + mutex = hp->mtx_hash; + MUTEX_READLOCK(env, mutex); + SH_TAILQ_FOREACH(bhp, &hp->hash_bucket, hq, __bh) + if (bhp->pgno == bharray[i].track_pgno && + bhp->mf_offset == bharray[i].track_off) + break; + + /* + * If we can't find the buffer we're done, somebody else had + * to have written it. + * + * If the buffer isn't dirty, we're done, there's no work + * needed. + */ + if (bhp == NULL || !F_ISSET(bhp, BH_DIRTY)) { + MUTEX_UNLOCK(env, mutex); + --remaining; + bharray[i].track_hp = NULL; + continue; + } + + /* + * If the buffer is locked by another thread, ignore it, we'll + * come back to it. + */ + if (F_ISSET(bhp, BH_EXCLUSIVE)) { + MUTEX_UNLOCK(env, mutex); + if (!required_write) { + --remaining; + bharray[i].track_hp = NULL; + } + continue; + } + + /* Pin the buffer into memory. */ + atomic_inc(env, &bhp->ref); + MUTEX_UNLOCK(env, mutex); + MUTEX_READLOCK(env, bhp->mtx_buf); + DB_ASSERT(env, !F_ISSET(bhp, BH_EXCLUSIVE)); + + /* + * When swapping the hash bucket mutex for the buffer mutex, + * we may have raced with an MVCC update. In that case, we + * no longer have the most recent version, and need to retry + * (the buffer header we have pinned will no longer be marked + * dirty, so we can't just write it). + */ + if (SH_CHAIN_HASNEXT(bhp, vc)) { + atomic_dec(env, &bhp->ref); + MUTEX_UNLOCK(env, bhp->mtx_buf); + continue; + } + + /* we will dispose of this buffer. */ + --remaining; + bharray[i].track_hp = NULL; + + /* + * If we've switched files, check to see if we're configured + * to close file descriptors. + */ + if (maxopenfd != 0 && bhp->mf_offset != last_mf_offset) { + if (++filecnt >= maxopenfd) { + filecnt = 0; + if ((t_ret = __memp_close_flush_files( + env, 1)) != 0 && ret == 0) + ret = t_ret; + } + last_mf_offset = bhp->mf_offset; + } + + /* + * If the buffer is dirty, we write it. We only try to + * write the buffer once. + */ + if (F_ISSET(bhp, BH_DIRTY)) { + mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); + if ((t_ret = + __memp_bhwrite(dbmp, hp, mfp, bhp, 1)) == 0) { + ++wrote_cnt; + ++wrote_total; + } else { + if (ret == 0) + ret = t_ret; + __db_errx(env, DB_STR_A("3027", + "%s: unable to flush page: %lu", "%s %lu"), + __memp_fns(dbmp, mfp), (u_long)bhp->pgno); + + } + } + + /* Discard our buffer reference. */ + DB_ASSERT(env, atomic_read(&bhp->ref) > 0); + atomic_dec(env, &bhp->ref); + MUTEX_UNLOCK(env, bhp->mtx_buf); + + /* Check if the call has been interrupted. */ + if (LF_ISSET(DB_SYNC_INTERRUPT_OK) && + FLD_ISSET(mp->config_flags, DB_MEMP_SYNC_INTERRUPT)) { + STAT(++mp->stat.st_sync_interrupted); + if (interruptedp != NULL) + *interruptedp = 1; + goto err; + } + + /* + * Sleep after some number of writes to avoid disk saturation. + * Don't cache the max writes value, an application shutting + * down might reset the value in order to do a fast flush or + * checkpoint. + */ + if (!LF_ISSET(DB_SYNC_SUPPRESS_WRITE) && + !FLD_ISSET(mp->config_flags, DB_MEMP_SUPPRESS_WRITE) && + mp->mp_maxwrite != 0 && wrote_cnt >= mp->mp_maxwrite) { + wrote_cnt = 0; + __os_yield(env, 0, (u_long)mp->mp_maxwrite_sleep); + } + } + +done: /* + * If a write is required, we have to force the pages to disk. We + * don't do this as we go along because we want to give the OS as + * much time as possible to lazily flush, and because we have to flush + * files that might not even have had dirty buffers in the cache, so + * we have to walk the files list. + */ + if (ret == 0 && required_write) { + if (dbmfp == NULL) + ret = __memp_sync_files(env); + else + ret = __os_fsync(env, dbmfp->fhp); + } + + /* If we've opened files to flush pages, close them. */ + if ((t_ret = __memp_close_flush_files(env, 0)) != 0 && ret == 0) + ret = t_ret; + +err: __os_free(env, bharray); + if (wrote_totalp != NULL) + *wrote_totalp = wrote_total; + + return (ret); +} + +static int +__memp_sync_file(env, mfp, argp, countp, flags) + ENV *env; + MPOOLFILE *mfp; + void *argp; + u_int32_t *countp; + u_int32_t flags; +{ + DB_MPOOL *dbmp; + DB_MPOOLFILE *dbmfp; + int ret, t_ret; + + COMPQUIET(countp, NULL); + COMPQUIET(flags, 0); + + if (!mfp->file_written || mfp->no_backing_file || + mfp->deadfile || F_ISSET(mfp, MP_TEMP)) + return (0); + /* + * Pin the MPOOLFILE structure into memory, and release the + * region mutex allowing us to walk the linked list. We'll + * re-acquire that mutex to move to the next entry in the list. + * + * This works because we only need to flush current entries, + * we don't care about new entries being added, and the linked + * list is never re-ordered, a single pass is sufficient. It + * requires MPOOLFILE structures removed before we get to them + * be flushed to disk, but that's nothing new, they could have + * been removed while checkpoint was running, too. + * + * Once we have the MPOOLFILE lock, re-check the MPOOLFILE is + * not being discarded. (A thread removing the MPOOLFILE + * will: hold the MPOOLFILE mutex, set deadfile, drop the + * MPOOLFILE mutex and then acquire the region MUTEX to walk + * the linked list and remove the MPOOLFILE structure. Make + * sure the MPOOLFILE wasn't marked dead while we waited for + * the mutex. + */ + MUTEX_LOCK(env, mfp->mutex); + if (!mfp->file_written || mfp->deadfile) { + MUTEX_UNLOCK(env, mfp->mutex); + return (0); + } + ++mfp->mpf_cnt; + MUTEX_UNLOCK(env, mfp->mutex); + + /* + * Look for an already open, writable handle (fsync doesn't + * work on read-only Windows handles). + */ + dbmp = env->mp_handle; + MUTEX_LOCK(env, dbmp->mutex); + TAILQ_FOREACH(dbmfp, &dbmp->dbmfq, q) { + if (dbmfp->mfp != mfp || F_ISSET(dbmfp, MP_READONLY)) + continue; + /* + * We don't want to hold the mutex while calling sync. + * Increment the DB_MPOOLFILE handle ref count to pin + * it into memory. + */ + ++dbmfp->ref; + break; + } + MUTEX_UNLOCK(env, dbmp->mutex); + + /* If we don't find a handle we can use, open one. */ + if (dbmfp == NULL) { + if ((ret = __memp_mf_sync(dbmp, mfp, 1)) != 0) { + __db_err(env, ret, DB_STR_A("3028", + "%s: unable to flush", "%s"), (char *) + R_ADDR(dbmp->reginfo, mfp->path_off)); + } + } else + ret = __os_fsync(env, dbmfp->fhp); + + /* + * Re-acquire the MPOOLFILE mutex, we need it to modify the + * reference count. + */ + MUTEX_LOCK(env, mfp->mutex); + + /* + * If we wrote the file and there are no other references (or there + * is a single reference, and it's the one we opened to write + * buffers during checkpoint), clear the file_written flag. We + * do this so that applications opening thousands of files don't + * loop here opening and flushing those files during checkpoint. + * + * The danger here is if a buffer were to be written as part of + * a checkpoint, and then not be flushed to disk. This cannot + * happen because we only clear file_written when there are no + * other users of the MPOOLFILE in the system, and, as we hold + * the region lock, no possibility of another thread of control + * racing with us to open a MPOOLFILE. + */ + if (mfp->mpf_cnt == 1 || (mfp->mpf_cnt == 2 && + dbmfp != NULL && F_ISSET(dbmfp, MP_FLUSH))) { + mfp->file_written = 0; + + /* + * We may be the last reference for a MPOOLFILE, as we + * weren't holding the MPOOLFILE mutex when flushing + * it's buffers to disk. If we can discard it, set + * a flag to schedule a clean-out pass. (Not likely, + * I mean, what are the chances that there aren't any + * buffers in the pool? Regardless, it might happen.) + */ + if (mfp->mpf_cnt == 1 && mfp->block_cnt == 0) + *(int *)argp = 1; + } + + /* + * If we found the file we must close it in case we are the last + * reference to the dbmfp. NOTE: since we have incremented + * mfp->mpf_cnt this cannot be the last reference to the mfp. + * This is important since we are called with the hash bucket + * locked. The mfp will get freed via the cleanup pass. + */ + if (dbmfp != NULL && + (t_ret = __memp_fclose(dbmfp, DB_MPOOL_NOLOCK)) != 0 && ret == 0) + ret = t_ret; + + --mfp->mpf_cnt; + + /* Unlock the MPOOLFILE. */ + MUTEX_UNLOCK(env, mfp->mutex); + return (ret); +} + +/* + * __memp_sync_files -- + * Sync all the files in the environment, open or not. + */ +static int +__memp_sync_files(env) + ENV *env; +{ + DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp; + MPOOL *mp; + MPOOLFILE *mfp, *next_mfp; + int i, need_discard_pass, ret; + + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + need_discard_pass = ret = 0; + + ret = __memp_walk_files(env, + mp, __memp_sync_file, &need_discard_pass, 0, DB_STAT_MEMP_NOERROR); + + /* + * We may need to do a last pass through the MPOOLFILE list -- if we + * were the last reference to an MPOOLFILE, we need to clean it out. + */ + if (!need_discard_pass) + return (ret); + + hp = R_ADDR(dbmp->reginfo, mp->ftab); + for (i = 0; i < MPOOL_FILE_BUCKETS; i++, hp++) { +retry: MUTEX_LOCK(env, hp->mtx_hash); + for (mfp = SH_TAILQ_FIRST(&hp->hash_bucket, + __mpoolfile); mfp != NULL; mfp = next_mfp) { + next_mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile); + /* + * Do a fast check -- we can check for zero/non-zero + * without a mutex on the MPOOLFILE. If likely to + * succeed, lock the MPOOLFILE down and look for real. + */ + if (mfp->deadfile || + mfp->block_cnt != 0 || mfp->mpf_cnt != 0) + continue; + + MUTEX_LOCK(env, mfp->mutex); + if (!mfp->deadfile && + mfp->block_cnt == 0 && mfp->mpf_cnt == 0) { + MUTEX_UNLOCK(env, hp->mtx_hash); + (void)__memp_mf_discard(dbmp, mfp, 0); + goto retry; + } else + MUTEX_UNLOCK(env, mfp->mutex); + } + MUTEX_UNLOCK(env, hp->mtx_hash); + } + return (ret); +} + +/* + * __memp_mf_sync -- + * Flush an MPOOLFILE, when no currently open handle is available. + * + * PUBLIC: int __memp_mf_sync __P((DB_MPOOL *, MPOOLFILE *, int)); + */ +int +__memp_mf_sync(dbmp, mfp, locked) + DB_MPOOL *dbmp; + MPOOLFILE *mfp; + int locked; +{ + DB_FH *fhp; + DB_MPOOL_HASH *hp; + ENV *env; + MPOOL *mp; + int ret, t_ret; + char *rpath; + + COMPQUIET(hp, NULL); + env = dbmp->env; + + /* + * We need to be holding the hash lock: we're using the path name + * and __memp_nameop might try and rename the file. + */ + if (!locked) { + mp = dbmp->reginfo[0].primary; + hp = R_ADDR(dbmp->reginfo, mp->ftab); + hp += FNBUCKET( + R_ADDR(dbmp->reginfo, mfp->fileid_off), DB_FILE_ID_LEN); + MUTEX_LOCK(env, hp->mtx_hash); + } + + if ((ret = __db_appname(env, DB_APP_DATA, + R_ADDR(dbmp->reginfo, mfp->path_off), NULL, &rpath)) == 0) { + if ((ret = __os_open(env, rpath, 0, 0, 0, &fhp)) == 0) { + ret = __os_fsync(env, fhp); + if ((t_ret = + __os_closehandle(env, fhp)) != 0 && ret == 0) + ret = t_ret; + } + __os_free(env, rpath); + } + + if (!locked) + MUTEX_UNLOCK(env, hp->mtx_hash); + + return (ret); +} + +/* + * __memp_close_flush_files -- + * Close files opened only to flush buffers. + */ +static int +__memp_close_flush_files(env, dosync) + ENV *env; + int dosync; +{ + DB_MPOOL *dbmp; + DB_MPOOLFILE *dbmfp; + MPOOLFILE *mfp; + int ret; + + dbmp = env->mp_handle; + + /* + * The routine exists because we must close files opened by sync to + * flush buffers. There are two cases: first, extent files have to + * be closed so they may be removed when empty. Second, regular + * files have to be closed so we don't run out of descriptors (for + * example, an application partitioning its data into databases + * based on timestamps, so there's a continually increasing set of + * files). + * + * We mark files opened in the __memp_bhwrite() function with the + * MP_FLUSH flag. Here we walk through our file descriptor list, + * and, if a file was opened by __memp_bhwrite(), we close it. + */ +retry: MUTEX_LOCK(env, dbmp->mutex); + TAILQ_FOREACH(dbmfp, &dbmp->dbmfq, q) + if (F_ISSET(dbmfp, MP_FLUSH)) { + F_CLR(dbmfp, MP_FLUSH); + MUTEX_UNLOCK(env, dbmp->mutex); + if (dosync) { + /* + * If we have the only open handle on the file, + * clear the dirty flag so we don't re-open and + * sync it again when discarding the MPOOLFILE + * structure. Clear the flag before the sync + * so can't race with a thread writing the file. + */ + mfp = dbmfp->mfp; + if (mfp->mpf_cnt == 1) { + MUTEX_LOCK(env, mfp->mutex); + if (mfp->mpf_cnt == 1) + mfp->file_written = 0; + MUTEX_UNLOCK(env, mfp->mutex); + } + if ((ret = __os_fsync(env, dbmfp->fhp)) != 0) + return (ret); + } + if ((ret = __memp_fclose(dbmfp, 0)) != 0) + return (ret); + goto retry; + } + MUTEX_UNLOCK(env, dbmp->mutex); + + return (0); +} + +static int +__bhcmp(p1, p2) + const void *p1, *p2; +{ + BH_TRACK *bhp1, *bhp2; + + bhp1 = (BH_TRACK *)p1; + bhp2 = (BH_TRACK *)p2; + + /* Sort by file (shared memory pool offset). */ + if (bhp1->track_off < bhp2->track_off) + return (-1); + if (bhp1->track_off > bhp2->track_off) + return (1); + + /* + * !!! + * Defend against badly written quicksort code calling the comparison + * function with two identical pointers (e.g., WATCOM C++ (Power++)). + */ + if (bhp1->track_pgno < bhp2->track_pgno) + return (-1); + if (bhp1->track_pgno > bhp2->track_pgno) + return (1); + return (0); +} diff --git a/src/mp/mp_trickle.c b/src/mp/mp_trickle.c new file mode 100644 index 00000000..abb4519e --- /dev/null +++ b/src/mp/mp_trickle.c @@ -0,0 +1,112 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" + +static int __memp_trickle __P((ENV *, int, int *)); + +/* + * __memp_trickle_pp -- + * ENV->memp_trickle pre/post processing. + * + * PUBLIC: int __memp_trickle_pp __P((DB_ENV *, int, int *)); + */ +int +__memp_trickle_pp(dbenv, pct, nwrotep) + DB_ENV *dbenv; + int pct, *nwrotep; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->mp_handle, "memp_trickle", DB_INIT_MPOOL); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__memp_trickle(env, pct, nwrotep)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __memp_trickle -- + * ENV->memp_trickle. + */ +static int +__memp_trickle(env, pct, nwrotep) + ENV *env; + int pct, *nwrotep; +{ + DB_MPOOL *dbmp; + MPOOL *c_mp, *mp; + u_int32_t clean, dirty, i, need_clean, total, dtmp, wrote; + int ret; + + dbmp = env->mp_handle; + mp = dbmp->reginfo[0].primary; + + if (nwrotep != NULL) + *nwrotep = 0; + + if (pct < 1 || pct > 100) { + __db_errx(env, DB_STR_A("3007", + "DB_ENV->memp_trickle: %d: percent must be between 1 and 100", + "%d"), pct); + return (EINVAL); + } + + /* + * Loop through the caches counting total/dirty buffers. + * + * XXX + * Using hash_page_dirty is our only choice at the moment, but it's not + * as correct as we might like in the presence of pools having more + * than one page size, as a free 512B buffer may not be equivalent to + * having a free 8KB buffer. + */ + for (ret = 0, i = dirty = total = 0; i < mp->nreg; ++i) { + c_mp = dbmp->reginfo[i].primary; + total += c_mp->pages; + __memp_stat_hash(&dbmp->reginfo[i], c_mp, &dtmp); + dirty += dtmp; + } + + /* + * If there are sufficient clean buffers, no buffers or no dirty + * buffers, we're done. + */ + if (total == 0 || dirty == 0) + return (0); + + /* + * The total number of pages is an exact number, but the dirty page + * count can change while we're walking the hash buckets, and it's + * even possible the dirty page count ends up larger than the total + * number of pages. + */ + clean = total > dirty ? total - dirty : 0; + need_clean = (total * (u_int)pct) / 100; + if (clean >= need_clean) + return (0); + + need_clean -= clean; + ret = __memp_sync_int(env, NULL, + need_clean, DB_SYNC_TRICKLE | DB_SYNC_INTERRUPT_OK, &wrote, NULL); + STAT((mp->stat.st_page_trickle += wrote)); + if (nwrotep != NULL) + *nwrotep = (int)wrote; + + return (ret); +} diff --git a/src/mutex/README b/src/mutex/README new file mode 100644 index 00000000..6e95c5fd --- /dev/null +++ b/src/mutex/README @@ -0,0 +1,110 @@ +# $Id$ + +Note: this only applies to locking using test-and-set and fcntl calls, +pthreads were added after this was written. + +Resource locking routines: lock based on a DB_MUTEX. All this gunk +(including trying to make assembly code portable), is necessary because +System V semaphores require system calls for uncontested locks and we +don't want to make two system calls per resource lock. + +First, this is how it works. The DB_MUTEX structure contains a resource +test-and-set lock (tsl), a file offset, a pid for debugging and statistics +information. + +If HAVE_MUTEX_FCNTL is NOT defined (that is, we know how to do +test-and-sets for this compiler/architecture combination), we try and +lock the resource tsl some number of times (based on the number of +processors). If we can't acquire the mutex that way, we use a system +call to sleep for 1ms, 2ms, 4ms, etc. (The time is bounded at 10ms for +mutexes backing logical locks and 25 ms for data structures, just in +case.) Using the timer backoff means that there are two assumptions: +that mutexes are held for brief periods (never over system calls or I/O) +and mutexes are not hotly contested. + +If HAVE_MUTEX_FCNTL is defined, we use a file descriptor to do byte +locking on a file at a specified offset. In this case, ALL of the +locking is done in the kernel. Because file descriptors are allocated +per process, we have to provide the file descriptor as part of the lock +call. We still have to do timer backoff because we need to be able to +block ourselves, that is, the lock manager causes processes to wait by +having the process acquire a mutex and then attempting to re-acquire the +mutex. There's no way to use kernel locking to block yourself, that is, +if you hold a lock and attempt to re-acquire it, the attempt will +succeed. + +Next, let's talk about why it doesn't work the way a reasonable person +would think it should work. + +Ideally, we'd have the ability to try to lock the resource tsl, and if +that fails, increment a counter of waiting processes, then block in the +kernel until the tsl is released. The process holding the resource tsl +would see the wait counter when it went to release the resource tsl, and +would wake any waiting processes up after releasing the lock. This would +actually require both another tsl (call it the mutex tsl) and +synchronization between the call that blocks in the kernel and the actual +resource tsl. The mutex tsl would be used to protect accesses to the +DB_MUTEX itself. Locking the mutex tsl would be done by a busy loop, +which is safe because processes would never block holding that tsl (all +they would do is try to obtain the resource tsl and set/check the wait +count). The problem in this model is that the blocking call into the +kernel requires a blocking semaphore, i.e. one whose normal state is +locked. + +The only portable forms of locking under UNIX are fcntl(2) on a file +descriptor/offset, and System V semaphores. Neither of these locking +methods are sufficient to solve the problem. + +The problem with fcntl locking is that only the process that obtained the +lock can release it. Remember, we want the normal state of the kernel +semaphore to be locked. So, if the creator of the DB_MUTEX were to +initialize the lock to "locked", then a second process locks the resource +tsl, and then a third process needs to block, waiting for the resource +tsl, when the second process wants to wake up the third process, it can't +because it's not the holder of the lock! For the second process to be +the holder of the lock, we would have to make a system call per +uncontested lock, which is what we were trying to get away from in the +first place. + +There are some hybrid schemes, such as signaling the holder of the lock, +or using a different blocking offset depending on which process is +holding the lock, but it gets complicated fairly quickly. I'm open to +suggestions, but I'm not holding my breath. + +Regardless, we use this form of locking when we don't have any other +choice, because it doesn't have the limitations found in System V +semaphores, and because the normal state of the kernel object in that +case is unlocked, so the process releasing the lock is also the holder +of the lock. + +The System V semaphore design has a number of other limitations that make +it inappropriate for this task. Namely: + +First, the semaphore key name space is separate from the file system name +space (although there exist methods for using file names to create +semaphore keys). If we use a well-known key, there's no reason to believe +that any particular key will not already be in use, either by another +instance of the DB application or some other application, in which case +the DB application will fail. If we create a key, then we have to use a +file system name to rendezvous and pass around the key. + +Second, System V semaphores traditionally have compile-time, system-wide +limits on the number of semaphore keys that you can have. Typically, that +number is far too low for any practical purpose. Since the semaphores +permit more than a single slot per semaphore key, we could try and get +around that limit by using multiple slots, but that means that the file +that we're using for rendezvous is going to have to contain slot +information as well as semaphore key information, and we're going to be +reading/writing it on every db_mutex_t init or destroy operation. Anyhow, +similar compile-time, system-wide limits on the numbers of slots per +semaphore key kick in, and you're right back where you started. + +My fantasy is that once POSIX.1 standard mutexes are in wide-spread use, +we can switch to them. My guess is that it won't happen, because the +POSIX semaphores are only required to work for threads within a process, +and not independent processes. + +Note: there are races in the statistics code, but since it's just that, +I didn't bother fixing them. (The fix requires a mutex tsl, so, when/if +this code is fixed to do rational locking (see above), then change the +statistics update code to acquire/release the mutex tsl. diff --git a/src/mutex/mut_alloc.c b/src/mutex/mut_alloc.c new file mode 100644 index 00000000..a15deab2 --- /dev/null +++ b/src/mutex/mut_alloc.c @@ -0,0 +1,291 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __mutex_alloc -- + * Allocate a mutex from the mutex region. + * + * PUBLIC: int __mutex_alloc __P((ENV *, int, u_int32_t, db_mutex_t *)); + */ +int +__mutex_alloc(env, alloc_id, flags, indxp) + ENV *env; + int alloc_id; + u_int32_t flags; + db_mutex_t *indxp; +{ + /* The caller may depend on us to initialize. */ + *indxp = MUTEX_INVALID; + + /* + * If this is not an application lock, and we've turned off locking, + * or the ENV handle isn't thread-safe, and this is a thread lock + * or the environment isn't multi-process by definition, there's no + * need to mutex at all. + */ + if (alloc_id != MTX_APPLICATION && alloc_id != MTX_MUTEX_TEST && + (F_ISSET(env->dbenv, DB_ENV_NOLOCKING) || + (!F_ISSET(env, ENV_THREAD) && + (LF_ISSET(DB_MUTEX_PROCESS_ONLY) || + F_ISSET(env, ENV_PRIVATE))))) + return (0); + + /* Private environments never share mutexes. */ + if (F_ISSET(env, ENV_PRIVATE)) + LF_SET(DB_MUTEX_PROCESS_ONLY); + + /* + * If we have a region in which to allocate the mutexes, lock it and + * do the allocation. + */ + if (!MUTEX_ON(env)) { + __db_errx(env, DB_STR("2033", + "Mutex allocated before mutex region.")); + return (__env_panic(env, EINVAL)); + } + return (__mutex_alloc_int(env, 1, alloc_id, flags, indxp)); +} + +/* + * __mutex_alloc_int -- + * Internal routine to allocate a mutex. + * + * PUBLIC: int __mutex_alloc_int + * PUBLIC: __P((ENV *, int, int, u_int32_t, db_mutex_t *)); + */ +int +__mutex_alloc_int(env, locksys, alloc_id, flags, indxp) + ENV *env; + int locksys, alloc_id; + u_int32_t flags; + db_mutex_t *indxp; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + db_mutex_t i; + size_t len; + u_int32_t cnt; + int ret; + + dbenv = env->dbenv; + mtxmgr = env->mutex_handle; + mtxregion = mtxmgr->reginfo.primary; + ret = 0; + + /* + * If we're not initializing the mutex region, then lock the region to + * allocate new mutexes. Drop the lock before initializing the mutex, + * mutex initialization may require a system call. + */ + if (locksys) + MUTEX_SYSTEM_LOCK(env); + + if (mtxregion->mutex_next == MUTEX_INVALID) { + if (mtxregion->stat.st_mutex_max != 0 && + mtxregion->stat.st_mutex_cnt >= + mtxregion->stat.st_mutex_max) { +nomem: __db_errx(env, DB_STR("2034", + "unable to allocate memory for mutex; resize mutex region")); + if (locksys) + MUTEX_SYSTEM_UNLOCK(env); + return (ret == 0 ? ENOMEM : ret); + } + cnt = mtxregion->stat.st_mutex_cnt / 2; + if (cnt < 8) + cnt = 8; + if (mtxregion->stat.st_mutex_max != 0 && + mtxregion->stat.st_mutex_cnt + cnt > + mtxregion->stat.st_mutex_max) + cnt = mtxregion->stat.st_mutex_max - + mtxregion->stat.st_mutex_cnt; + if (F_ISSET(env, ENV_PRIVATE)) { + F_SET(&mtxmgr->reginfo, REGION_TRACKED); + while (__env_alloc(&mtxmgr->reginfo, + (cnt * mtxregion->mutex_size) + + mtxregion->stat.st_mutex_align, &i) != 0) + if ((cnt >> 1) == 0) + break; + F_CLR(&mtxmgr->reginfo, REGION_TRACKED); + i = (db_mutex_t)ALIGNP_INC(i, + mtxregion->stat.st_mutex_align); + } else { + len = cnt * mtxregion->mutex_size; + if ((ret = __env_alloc_extend(&mtxmgr->reginfo, + R_ADDR(&mtxmgr->reginfo, + mtxregion->mutex_off_alloc), &len)) != 0) + goto nomem; + cnt = (u_int32_t)(len / mtxregion->mutex_size); + i = mtxregion->stat.st_mutex_cnt + 1; + } + if (cnt == 0) + goto nomem; + mutexp = MUTEXP_SET(env, i); + mtxregion->stat.st_mutex_free = cnt; + mtxregion->mutex_next = i; + mtxregion->stat.st_mutex_cnt += cnt; + while (--cnt > 0) { + mutexp->flags = 0; + if (F_ISSET(env, ENV_PRIVATE)) + mutexp->mutex_next_link = + (uintptr_t)(mutexp + 1); + else + mutexp->mutex_next_link = ++i; + mutexp++; + } + mutexp->flags = 0; + mutexp->mutex_next_link = MUTEX_INVALID; + } + + *indxp = mtxregion->mutex_next; + mutexp = MUTEXP_SET(env, *indxp); + DB_ASSERT(env, + ((uintptr_t)mutexp & (dbenv->mutex_align - 1)) == 0); + mtxregion->mutex_next = mutexp->mutex_next_link; + + --mtxregion->stat.st_mutex_free; + ++mtxregion->stat.st_mutex_inuse; + if (mtxregion->stat.st_mutex_inuse > mtxregion->stat.st_mutex_inuse_max) + mtxregion->stat.st_mutex_inuse_max = + mtxregion->stat.st_mutex_inuse; + if (locksys) + MUTEX_SYSTEM_UNLOCK(env); + + /* Initialize the mutex. */ + memset(mutexp, 0, sizeof(*mutexp)); + F_SET(mutexp, DB_MUTEX_ALLOCATED | + LF_ISSET(DB_MUTEX_LOGICAL_LOCK | + DB_MUTEX_PROCESS_ONLY | DB_MUTEX_SHARED)); + + /* + * If the mutex is associated with a single process, set the process + * ID. If the application ever calls DbEnv::failchk, we'll need the + * process ID to know if the mutex is still in use. + */ + if (LF_ISSET(DB_MUTEX_PROCESS_ONLY)) + dbenv->thread_id(dbenv, &mutexp->pid, NULL); + +#ifdef HAVE_STATISTICS + mutexp->alloc_id = alloc_id; +#else + COMPQUIET(alloc_id, 0); +#endif + + if ((ret = __mutex_init(env, *indxp, flags)) != 0) + (void)__mutex_free_int(env, locksys, indxp); + + return (ret); +} + +/* + * __mutex_free -- + * Free a mutex. + * + * PUBLIC: int __mutex_free __P((ENV *, db_mutex_t *)); + */ +int +__mutex_free(env, indxp) + ENV *env; + db_mutex_t *indxp; +{ + /* + * There is no explicit ordering in how the regions are cleaned up + * up and/or discarded when an environment is destroyed (either a + * private environment is closed or a public environment is removed). + * The way we deal with mutexes is to clean up all remaining mutexes + * when we close the mutex environment (because we have to be able to + * do that anyway, after a crash), which means we don't have to deal + * with region cleanup ordering on normal environment destruction. + * All that said, what it really means is we can get here without a + * mpool region. It's OK, the mutex has been, or will be, destroyed. + * + * If the mutex has never been configured, we're done. + */ + if (!MUTEX_ON(env) || *indxp == MUTEX_INVALID) + return (0); + + return (__mutex_free_int(env, 1, indxp)); +} + +/* + * __mutex_free_int -- + * Internal routine to free a mutex. + * + * PUBLIC: int __mutex_free_int __P((ENV *, int, db_mutex_t *)); + */ +int +__mutex_free_int(env, locksys, indxp) + ENV *env; + int locksys; + db_mutex_t *indxp; +{ + DB_MUTEX *mutexp; + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + db_mutex_t mutex; + int ret; + + mutex = *indxp; + *indxp = MUTEX_INVALID; + + mtxmgr = env->mutex_handle; + mtxregion = mtxmgr->reginfo.primary; + mutexp = MUTEXP_SET(env, mutex); + + DB_ASSERT(env, F_ISSET(mutexp, DB_MUTEX_ALLOCATED)); + F_CLR(mutexp, DB_MUTEX_ALLOCATED); + + ret = __mutex_destroy(env, mutex); + + if (locksys) + MUTEX_SYSTEM_LOCK(env); + + /* Link the mutex on the head of the free list. */ + mutexp->mutex_next_link = mtxregion->mutex_next; + mtxregion->mutex_next = mutex; + ++mtxregion->stat.st_mutex_free; + --mtxregion->stat.st_mutex_inuse; + + if (locksys) + MUTEX_SYSTEM_UNLOCK(env); + + return (ret); +} + +/* + * __mutex_refresh -- + * Reinitialize a mutex, if we are not sure of its state. + * + * PUBLIC: int __mutex_refresh __P((ENV *, db_mutex_t)); + */ +int +__mutex_refresh(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + DB_MUTEX *mutexp; + u_int32_t flags; + int ret; + + mutexp = MUTEXP_SET(env, mutex); + flags = mutexp->flags; + if ((ret = __mutex_destroy(env, mutex)) == 0) { + memset(mutexp, 0, sizeof(*mutexp)); + F_SET(mutexp, DB_MUTEX_ALLOCATED | + LF_ISSET(DB_MUTEX_LOGICAL_LOCK | + DB_MUTEX_PROCESS_ONLY | DB_MUTEX_SHARED)); + LF_CLR(DB_MUTEX_LOCKED); + ret = __mutex_init(env, mutex, flags); + } + return (ret); +} diff --git a/src/mutex/mut_failchk.c b/src/mutex/mut_failchk.c new file mode 100644 index 00000000..8529ad9e --- /dev/null +++ b/src/mutex/mut_failchk.c @@ -0,0 +1,73 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __mut_failchk -- + * Check for mutexes held by dead processes. + * + * PUBLIC: int __mut_failchk __P((ENV *)); + */ +int +__mut_failchk(env) + ENV *env; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + db_mutex_t i; + int ret; + char buf[DB_THREADID_STRLEN]; + + if (F_ISSET(env, ENV_PRIVATE)) + return (0); + + dbenv = env->dbenv; + mtxmgr = env->mutex_handle; + mtxregion = mtxmgr->reginfo.primary; + ret = 0; + + MUTEX_SYSTEM_LOCK(env); + for (i = 1; i <= mtxregion->stat.st_mutex_cnt; ++i, ++mutexp) { + mutexp = MUTEXP_SET(env, i); + + /* + * We're looking for per-process mutexes where the process + * has died. + */ + if (!F_ISSET(mutexp, DB_MUTEX_ALLOCATED) || + !F_ISSET(mutexp, DB_MUTEX_PROCESS_ONLY)) + continue; + + /* + * The thread that allocated the mutex may have exited, but + * we cannot reclaim the mutex if the process is still alive. + */ + if (dbenv->is_alive( + dbenv, mutexp->pid, 0, DB_MUTEX_PROCESS_ONLY)) + continue; + + __db_msg(env, DB_STR_A("2017", + "Freeing mutex for process: %s", "%s"), + dbenv->thread_id_string(dbenv, mutexp->pid, 0, buf)); + + /* Unlock and free the mutex. */ + if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) + MUTEX_UNLOCK(env, i); + + if ((ret = __mutex_free_int(env, 0, &i)) != 0) + break; + } + MUTEX_SYSTEM_UNLOCK(env); + + return (ret); +} diff --git a/src/mutex/mut_fcntl.c b/src/mutex/mut_fcntl.c new file mode 100644 index 00000000..e22282f5 --- /dev/null +++ b/src/mutex/mut_fcntl.c @@ -0,0 +1,248 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +static inline int __db_fcntl_mutex_lock_int + __P((ENV *, db_mutex_t, db_timeout_t, int)); + +/* + * __db_fcntl_mutex_init -- + * Initialize a fcntl mutex. + * + * PUBLIC: int __db_fcntl_mutex_init __P((ENV *, db_mutex_t, u_int32_t)); + */ +int +__db_fcntl_mutex_init(env, mutex, flags) + ENV *env; + db_mutex_t mutex; + u_int32_t flags; +{ + COMPQUIET(env, NULL); + COMPQUIET(mutex, MUTEX_INVALID); + COMPQUIET(flags, 0); + + return (0); +} + +/* + * __db_fcntl_mutex_lock_int + * Internal function to lock a mutex, blocking only when requested + */ +inline int +__db_fcntl_mutex_lock_int(env, mutex, timeout, wait) + ENV *env; + db_mutex_t mutex; + db_timeout_t timeout; + int wait; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + DB_THREAD_INFO *ip; + struct flock k_lock; + int locked, ms, ret; + db_timespec now, timespec; + db_timeout_t time_left; + + dbenv = env->dbenv; + + if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); + + mutexp = MUTEXP_SET(env, mutex); + + CHECK_MTX_THREAD(env, mutexp); + +#ifdef HAVE_STATISTICS + if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) + ++mutexp->mutex_set_wait; + else + ++mutexp->mutex_set_nowait; +#endif + + /* Initialize the lock. */ + k_lock.l_whence = SEEK_SET; + k_lock.l_start = mutex; + k_lock.l_len = 1; + + if (timeout != 0) { + timespecclear(×pec); + __clock_set_expires(env, ×pec, timeout); + } + + /* + * Only check the thread state once, by initializing the thread + * control block pointer to null. If it is not the failchk + * thread, then ip will have a valid value subsequent times + * in the loop. + */ + ip = NULL; + + for (locked = 0;;) { + /* + * Wait for the lock to become available; wait 1ms initially, + * up to 1 second. + */ + for (ms = 1; F_ISSET(mutexp, DB_MUTEX_LOCKED);) { + if (F_ISSET(dbenv, DB_ENV_FAILCHK) && + ip == NULL && dbenv->is_alive(dbenv, + mutexp->pid, mutexp->tid, 0) == 0) { + ret = __env_set_state(env, &ip, THREAD_VERIFY); + if (ret != 0 || + ip->dbth_state == THREAD_FAILCHK) + return (DB_RUNRECOVERY); + } + if (!wait) + return (DB_LOCK_NOTGRANTED); + if (timeout != 0) { + timespecclear(&now); + if (__clock_expired(env, &now, ×pec)) + return (DB_TIMEOUT); + DB_TIMESPEC_TO_TIMEOUT(time_left, &now, 0); + time_left = timeout - time_left; + if (ms * US_PER_MS > time_left) + ms = time_left / US_PER_MS; + } + __os_yield(NULL, 0, ms * US_PER_MS); + if ((ms <<= 1) > MS_PER_SEC) + ms = MS_PER_SEC; + } + + /* Acquire an exclusive kernel lock on the byte. */ + k_lock.l_type = F_WRLCK; + if (fcntl(env->lockfhp->fd, F_SETLKW, &k_lock)) + goto err; + + /* If the resource is still available, it's ours. */ + if (!F_ISSET(mutexp, DB_MUTEX_LOCKED)) { + locked = 1; + + F_SET(mutexp, DB_MUTEX_LOCKED); + dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid); + } + + /* Release the kernel lock. */ + k_lock.l_type = F_UNLCK; + if (fcntl(env->lockfhp->fd, F_SETLK, &k_lock)) + goto err; + + /* + * If we got the resource lock we're done. + * + * !!! + * We can't check to see if the lock is ours, because we may + * be trying to block ourselves in the lock manager, and so + * the holder of the lock that's preventing us from getting + * the lock may be us! (Seriously.) + */ + if (locked) + break; + } + +#ifdef DIAGNOSTIC + /* + * We want to switch threads as often as possible. Yield every time + * we get a mutex to ensure contention. + */ + if (F_ISSET(dbenv, DB_ENV_YIELDCPU)) + __os_yield(env, 0, 0); +#endif + return (0); + +err: ret = __os_get_syserr(); + __db_syserr(env, ret, DB_STR("2019", "fcntl lock failed")); + return (__env_panic(env, __os_posix_err(ret))); +} + +/* + * __db_fcntl_mutex_lock + * Lock a mutex, blocking if necessary. + * + * PUBLIC: int __db_fcntl_mutex_lock __P((ENV *, db_mutex_t, db_timeout_t)); + */ +int +__db_fcntl_mutex_lock(env, mutex, timeout) + ENV *env; + db_mutex_t mutex; + db_timeout_t timeout; +{ + return (__db_fcntl_mutex_lock_int(env, mutex, timeout, 1)); +} + +/* + * __db_fcntl_mutex_trylock + * Try to lock a mutex, without blocking when it is busy. + * + * PUBLIC: int __db_fcntl_mutex_trylock __P((ENV *, db_mutex_t)); + */ +int +__db_fcntl_mutex_trylock(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + return (__db_fcntl_mutex_lock_int(env, mutex, 0, 0)); +} + +/* + * __db_fcntl_mutex_unlock -- + * Release a mutex. + * + * PUBLIC: int __db_fcntl_mutex_unlock __P((ENV *, db_mutex_t)); + */ +int +__db_fcntl_mutex_unlock(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + + dbenv = env->dbenv; + + if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); + + mutexp = MUTEXP_SET(env, mutex); + +#ifdef DIAGNOSTIC + if (!F_ISSET(mutexp, DB_MUTEX_LOCKED)) { + __db_errx(env, DB_STR("2020", + "fcntl unlock failed: lock already unlocked")); + return (__env_panic(env, EACCES)); + } +#endif + + /* + * Release the resource. We don't have to acquire any locks because + * processes trying to acquire the lock are waiting for the flag to + * go to 0. Once that happens the waiters will serialize acquiring + * an exclusive kernel lock before locking the mutex. + */ + F_CLR(mutexp, DB_MUTEX_LOCKED); + + return (0); +} + +/* + * __db_fcntl_mutex_destroy -- + * Destroy a mutex. + * + * PUBLIC: int __db_fcntl_mutex_destroy __P((ENV *, db_mutex_t)); + */ +int +__db_fcntl_mutex_destroy(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + COMPQUIET(env, NULL); + COMPQUIET(mutex, MUTEX_INVALID); + + return (0); +} diff --git a/src/mutex/mut_method.c b/src/mutex/mut_method.c new file mode 100644 index 00000000..dda4b67b --- /dev/null +++ b/src/mutex/mut_method.c @@ -0,0 +1,482 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __mutex_alloc_pp -- + * Allocate a mutex, application method. + * + * PUBLIC: int __mutex_alloc_pp __P((DB_ENV *, u_int32_t, db_mutex_t *)); + */ +int +__mutex_alloc_pp(dbenv, flags, indxp) + DB_ENV *dbenv; + u_int32_t flags; + db_mutex_t *indxp; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + if ((ret = __db_fchk(env, "DB_ENV->mutex_alloc", + flags, DB_MUTEX_PROCESS_ONLY | DB_MUTEX_SELF_BLOCK)) != 0) + return (ret); + + ENV_ENTER(env, ip); + ret = __mutex_alloc(env, MTX_APPLICATION, flags, indxp); + ENV_LEAVE(env, ip); + + return (ret); +} + +/* + * __mutex_free_pp -- + * Destroy a mutex, application method. + * + * PUBLIC: int __mutex_free_pp __P((DB_ENV *, db_mutex_t)); + */ +int +__mutex_free_pp(dbenv, indx) + DB_ENV *dbenv; + db_mutex_t indx; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + if (indx == MUTEX_INVALID) + return (EINVAL); + + /* + * Internally Berkeley DB passes around the db_mutex_t address on + * free, because we want to make absolutely sure the slot gets + * overwritten with MUTEX_INVALID. We don't export MUTEX_INVALID, + * so we don't export that part of the API, either. + */ + ENV_ENTER(env, ip); + ret = __mutex_free(env, &indx); + ENV_LEAVE(env, ip); + + return (ret); +} + +/* + * __mutex_lock -- + * Lock a mutex, application method. + * + * PUBLIC: int __mutex_lock_pp __P((DB_ENV *, db_mutex_t)); + */ +int +__mutex_lock_pp(dbenv, indx) + DB_ENV *dbenv; + db_mutex_t indx; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + if (indx == MUTEX_INVALID) + return (EINVAL); + + ENV_ENTER(env, ip); + ret = __mutex_lock(env, indx); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __mutex_unlock -- + * Unlock a mutex, application method. + * + * PUBLIC: int __mutex_unlock_pp __P((DB_ENV *, db_mutex_t)); + */ +int +__mutex_unlock_pp(dbenv, indx) + DB_ENV *dbenv; + db_mutex_t indx; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + if (indx == MUTEX_INVALID) + return (EINVAL); + + ENV_ENTER(env, ip); + ret = __mutex_unlock(env, indx); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __mutex_get_align -- + * DB_ENV->mutex_get_align. + * + * PUBLIC: int __mutex_get_align __P((DB_ENV *, u_int32_t *)); + */ +int +__mutex_get_align(dbenv, alignp) + DB_ENV *dbenv; + u_int32_t *alignp; +{ + ENV *env; + + env = dbenv->env; + + if (MUTEX_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *alignp = ((DB_MUTEXREGION *) + env->mutex_handle->reginfo.primary)->stat.st_mutex_align; + } else + *alignp = dbenv->mutex_align; + return (0); +} + +/* + * __mutex_set_align -- + * DB_ENV->mutex_set_align. + * + * PUBLIC: int __mutex_set_align __P((DB_ENV *, u_int32_t)); + */ +int +__mutex_set_align(dbenv, align) + DB_ENV *dbenv; + u_int32_t align; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_mutex_align"); + + if (align == 0 || !POWER_OF_TWO(align)) { + __db_errx(env, DB_STR("2018", +"DB_ENV->mutex_set_align: alignment value must be a non-zero power-of-two")); + return (EINVAL); + } + + dbenv->mutex_align = align; + return (0); +} + +/* + * __mutex_get_increment -- + * DB_ENV->mutex_get_increment. + * + * PUBLIC: int __mutex_get_increment __P((DB_ENV *, u_int32_t *)); + */ +int +__mutex_get_increment(dbenv, incrementp) + DB_ENV *dbenv; + u_int32_t *incrementp; +{ + /* + * We don't maintain the increment in the region (it just makes + * no sense). Return whatever we have configured on this handle, + * nobody is ever going to notice. + */ + *incrementp = dbenv->mutex_inc; + return (0); +} + +/* + * __mutex_set_increment -- + * DB_ENV->mutex_set_increment. + * + * PUBLIC: int __mutex_set_increment __P((DB_ENV *, u_int32_t)); + */ +int +__mutex_set_increment(dbenv, increment) + DB_ENV *dbenv; + u_int32_t increment; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_mutex_increment"); + + dbenv->mutex_cnt = 0; + dbenv->mutex_inc = increment; + return (0); +} + +/* + * __mutex_get_init -- + * DB_ENV->mutex_get_init. + * + * PUBLIC: int __mutex_get_init __P((DB_ENV *, u_int32_t *)); + */ +int +__mutex_get_init(dbenv, initp) + DB_ENV *dbenv; + u_int32_t *initp; +{ + ENV *env; + + env = dbenv->env; + + if (MUTEX_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *initp = ((DB_MUTEXREGION *) + env->mutex_handle->reginfo.primary)->stat.st_mutex_init; + } else + *initp = dbenv->mutex_cnt; + return (0); +} + +/* + * __mutex_set_init -- + * DB_ENV->mutex_set_init. + * + * PUBLIC: int __mutex_set_init __P((DB_ENV *, u_int32_t)); + */ +int +__mutex_set_init(dbenv, init) + DB_ENV *dbenv; + u_int32_t init; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_mutex_init"); + + dbenv->mutex_cnt = init; + dbenv->mutex_inc = 0; + return (0); +} + +/* + * __mutex_get_max -- + * DB_ENV->mutex_get_max. + * + * PUBLIC: int __mutex_get_max __P((DB_ENV *, u_int32_t *)); + */ +int +__mutex_get_max(dbenv, maxp) + DB_ENV *dbenv; + u_int32_t *maxp; +{ + ENV *env; + + env = dbenv->env; + + if (MUTEX_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *maxp = ((DB_MUTEXREGION *) + env->mutex_handle->reginfo.primary)->stat.st_mutex_max; + } else + *maxp = dbenv->mutex_max; + return (0); +} + +/* + * __mutex_set_max -- + * DB_ENV->mutex_set_max. + * + * PUBLIC: int __mutex_set_max __P((DB_ENV *, u_int32_t)); + */ +int +__mutex_set_max(dbenv, max) + DB_ENV *dbenv; + u_int32_t max; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_mutex_max"); + + dbenv->mutex_max = max; + dbenv->mutex_inc = 0; + return (0); +} + +/* + * __mutex_get_tas_spins -- + * DB_ENV->mutex_get_tas_spins. + * + * PUBLIC: int __mutex_get_tas_spins __P((DB_ENV *, u_int32_t *)); + */ +int +__mutex_get_tas_spins(dbenv, tas_spinsp) + DB_ENV *dbenv; + u_int32_t *tas_spinsp; +{ + ENV *env; + + env = dbenv->env; + + if (MUTEX_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *tas_spinsp = ((DB_MUTEXREGION *)env-> + mutex_handle->reginfo.primary)->stat.st_mutex_tas_spins; + } else + *tas_spinsp = dbenv->mutex_tas_spins; + return (0); +} + +/* + * __mutex_set_tas_spins -- + * DB_ENV->mutex_set_tas_spins. + * + * PUBLIC: int __mutex_set_tas_spins __P((DB_ENV *, u_int32_t)); + */ +int +__mutex_set_tas_spins(dbenv, tas_spins) + DB_ENV *dbenv; + u_int32_t tas_spins; +{ + ENV *env; + + env = dbenv->env; + + /* + * Bound the value -- less than 1 makes no sense, greater than 1M + * makes no sense. + */ + if (tas_spins == 0) + tas_spins = 1; + else if (tas_spins > 1000000) + tas_spins = 1000000; + + /* + * There's a theoretical race here, but I'm not interested in locking + * the test-and-set spin count. The worst possibility is a thread + * reads out a bad spin count and spins until it gets the lock, but + * that's awfully unlikely. + */ + if (MUTEX_ON(env)) + ((DB_MUTEXREGION *)env->mutex_handle + ->reginfo.primary)->stat.st_mutex_tas_spins = tas_spins; + else + dbenv->mutex_tas_spins = tas_spins; + return (0); +} + +#if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT) +/* + * Provide atomic operations for platforms which have mutexes yet do not have + * native atomic operations configured. They are emulated by protected the + * operation with a mutex. The address of the atomic value selects which + * mutex to use. + */ +/* + * atomic_get_mutex - + * Map an address to the mutex to use to atomically modify it + */ +static inline db_mutex_t atomic_get_mutex(env, v) + ENV *env; + db_atomic_t *v; +{ + u_int index; + DB_MUTEXREGION *mtxreg; + + if (!MUTEX_ON(env)) + return (MUTEX_INVALID); + index = (u_int)(((uintptr_t) (v)) >> 6) % MAX_ATOMIC_MUTEXES; + mtxreg = (DB_MUTEXREGION *)env->mutex_handle->reginfo.primary; + return (mtxreg->mtx_atomic[index]); +} + +/* + * __atomic_inc + * Use a mutex to provide an atomic increment function + * + * PUBLIC: #if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT) + * PUBLIC: atomic_value_t __atomic_inc __P((ENV *, db_atomic_t *)); + * PUBLIC: #endif + */ +atomic_value_t +__atomic_inc(env, v) + ENV *env; + db_atomic_t *v; +{ + db_mutex_t mtx; + int ret; + + mtx = atomic_get_mutex(env, v); + MUTEX_LOCK(env, mtx); + ret = ++v->value; + MUTEX_UNLOCK(env, mtx); + + return (ret); +} + +/* + * __atomic_dec + * Use a mutex to provide an atomic decrement function + * + * PUBLIC: #if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT) + * PUBLIC: atomic_value_t __atomic_dec __P((ENV *, db_atomic_t *)); + * PUBLIC: #endif + */ +atomic_value_t +__atomic_dec(env, v) + ENV *env; + db_atomic_t *v; +{ + db_mutex_t mtx; + int ret; + + mtx = atomic_get_mutex(env, v); + MUTEX_LOCK(env, mtx); + ret = --v->value; + MUTEX_UNLOCK(env, mtx); + + return (ret); +} + +/* + * atomic_compare_exchange + * Use a mutex to provide an atomic decrement function + * + * PUBLIC: #if !defined(HAVE_ATOMIC_SUPPORT) && defined(HAVE_MUTEX_SUPPORT) + * PUBLIC: int atomic_compare_exchange + * PUBLIC: __P((ENV *, db_atomic_t *, atomic_value_t, atomic_value_t)); + * PUBLIC: #endif + * Returns 1 if the *v was equal to oldval, else 0 + * + * Side Effect: + * Sets the value to newval if and only if returning 1 + */ +int +atomic_compare_exchange(env, v, oldval, newval) + ENV *env; + db_atomic_t *v; + atomic_value_t oldval; + atomic_value_t newval; +{ + db_mutex_t mtx; + int ret; + + if (atomic_read(v) != oldval) + return (0); + + mtx = atomic_get_mutex(env, v); + MUTEX_LOCK(env, mtx); + ret = atomic_read(v) == oldval; + if (ret) + atomic_init(v, newval); + MUTEX_UNLOCK(env, mtx); + + return (ret); +} +#endif diff --git a/src/mutex/mut_pthread.c b/src/mutex/mut_pthread.c new file mode 100644 index 00000000..944e26cb --- /dev/null +++ b/src/mutex/mut_pthread.c @@ -0,0 +1,769 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/lock.h" + +/* + * This is where we load in architecture/compiler specific mutex code. + */ +#define LOAD_ACTUAL_MUTEX_CODE + +#ifdef HAVE_MUTEX_SOLARIS_LWP +#define pthread_cond_destroy(x) 0 +#define pthread_cond_signal _lwp_cond_signal +#define pthread_cond_broadcast _lwp_cond_broadcast +#define pthread_cond_wait _lwp_cond_wait +#define pthread_mutex_destroy(x) 0 +#define pthread_mutex_lock _lwp_mutex_lock +#define pthread_mutex_trylock _lwp_mutex_trylock +#define pthread_mutex_unlock _lwp_mutex_unlock +#endif +#ifdef HAVE_MUTEX_UI_THREADS +#define pthread_cond_destroy(x) cond_destroy +#define pthread_cond_broadcast cond_broadcast +#define pthread_cond_wait cond_wait +#define pthread_mutex_destroy mutex_destroy +#define pthread_mutex_lock mutex_lock +#define pthread_mutex_trylock mutex_trylock +#define pthread_mutex_unlock mutex_unlock +#endif + +/* + * According to HP-UX engineers contacted by Netscape, + * pthread_mutex_unlock() will occasionally return EFAULT for no good reason + * on mutexes in shared memory regions, and the correct caller behavior + * is to try again. Do so, up to EFAULT_RETRY_ATTEMPTS consecutive times. + * Note that we don't bother to restrict this to HP-UX; + * it should be harmless elsewhere. [#2471] + */ +#define EFAULT_RETRY_ATTEMPTS 5 +#define RETRY_ON_EFAULT(func_invocation, ret) do { \ + int i; \ + i = EFAULT_RETRY_ATTEMPTS; \ + do { \ + RET_SET((func_invocation), ret); \ + } while (ret == EFAULT && --i > 0); \ +} while (0) + +/* + * IBM's MVS pthread mutex implementation returns -1 and sets errno rather than + * returning errno itself. As -1 is not a valid errno value, assume functions + * returning -1 have set errno. If they haven't, return a random error value. + */ +#define RET_SET(f, ret) do { \ + if (((ret) = (f)) == -1 && ((ret) = errno) == 0) \ + (ret) = EAGAIN; \ +} while (0) + +/* + * __db_pthread_mutex_init -- + * Initialize a pthread mutex: either a native one or + * just the mutex for block/wakeup of a hybrid test-and-set mutex + * + * + * PUBLIC: int __db_pthread_mutex_init __P((ENV *, db_mutex_t, u_int32_t)); + */ +int +__db_pthread_mutex_init(env, mutex, flags) + ENV *env; + db_mutex_t mutex; + u_int32_t flags; +{ + DB_MUTEX *mutexp; + int ret; + + mutexp = MUTEXP_SET(env, mutex); + ret = 0; + +#ifndef HAVE_MUTEX_HYBRID + /* Can't have self-blocking shared latches. */ + DB_ASSERT(env, !LF_ISSET(DB_MUTEX_SELF_BLOCK) || + !LF_ISSET(DB_MUTEX_SHARED)); +#endif + +#ifdef HAVE_MUTEX_PTHREADS + { +#ifndef HAVE_MUTEX_THREAD_ONLY + pthread_condattr_t condattr; + pthread_mutexattr_t mutexattr; +#endif + pthread_condattr_t *condattrp = NULL; + pthread_mutexattr_t *mutexattrp = NULL; + +#ifndef HAVE_MUTEX_HYBRID + if (LF_ISSET(DB_MUTEX_SHARED)) { +#if defined(HAVE_SHARED_LATCHES) + pthread_rwlockattr_t rwlockattr, *rwlockattrp = NULL; +#ifndef HAVE_MUTEX_THREAD_ONLY + if (!LF_ISSET(DB_MUTEX_PROCESS_ONLY)) { + RET_SET((pthread_rwlockattr_init(&rwlockattr)), ret); + if (ret != 0) + goto err; + RET_SET((pthread_rwlockattr_setpshared( + &rwlockattr, PTHREAD_PROCESS_SHARED)), ret); + rwlockattrp = &rwlockattr; + } +#endif + + if (ret == 0) + RET_SET((pthread_rwlock_init(&mutexp->u.rwlock, + rwlockattrp)), ret); + if (rwlockattrp != NULL) + (void)pthread_rwlockattr_destroy(rwlockattrp); + + F_SET(mutexp, DB_MUTEX_SHARED); + /* For rwlocks, we're done - cannot use the mutex or cond */ + goto err; +#endif + } +#endif +#ifndef HAVE_MUTEX_THREAD_ONLY + if (!LF_ISSET(DB_MUTEX_PROCESS_ONLY)) { + RET_SET((pthread_mutexattr_init(&mutexattr)), ret); + if (ret != 0) + goto err; + RET_SET((pthread_mutexattr_setpshared( + &mutexattr, PTHREAD_PROCESS_SHARED)), ret); + mutexattrp = &mutexattr; + } +#endif + + if (ret == 0) + RET_SET( + (pthread_mutex_init(&mutexp->u.m.mutex, mutexattrp)), ret); + + if (mutexattrp != NULL) + (void)pthread_mutexattr_destroy(mutexattrp); + if (ret != 0) + goto err; + if (LF_ISSET(DB_MUTEX_SELF_BLOCK)) { +#ifndef HAVE_MUTEX_THREAD_ONLY + if (!LF_ISSET(DB_MUTEX_PROCESS_ONLY)) { + RET_SET((pthread_condattr_init(&condattr)), ret); + if (ret != 0) + goto err; + + condattrp = &condattr; + RET_SET((pthread_condattr_setpshared( + &condattr, PTHREAD_PROCESS_SHARED)), ret); + } +#endif + + if (ret == 0) + RET_SET((pthread_cond_init( + &mutexp->u.m.cond, condattrp)), ret); + + F_SET(mutexp, DB_MUTEX_SELF_BLOCK); + if (condattrp != NULL) + (void)pthread_condattr_destroy(condattrp); + } + + } +#endif +#ifdef HAVE_MUTEX_SOLARIS_LWP + /* + * XXX + * Gcc complains about missing braces in the static initializations of + * lwp_cond_t and lwp_mutex_t structures because the structures contain + * sub-structures/unions and the Solaris include file that defines the + * initialization values doesn't have surrounding braces. There's not + * much we can do. + */ + if (LF_ISSET(DB_MUTEX_PROCESS_ONLY)) { + static lwp_mutex_t mi = DEFAULTMUTEX; + + mutexp->mutex = mi; + } else { + static lwp_mutex_t mi = SHAREDMUTEX; + + mutexp->mutex = mi; + } + if (LF_ISSET(DB_MUTEX_SELF_BLOCK)) { + if (LF_ISSET(DB_MUTEX_PROCESS_ONLY)) { + static lwp_cond_t ci = DEFAULTCV; + + mutexp->cond = ci; + } else { + static lwp_cond_t ci = SHAREDCV; + + mutexp->cond = ci; + } + F_SET(mutexp, DB_MUTEX_SELF_BLOCK); + } +#endif +#ifdef HAVE_MUTEX_UI_THREADS + { + int type; + + type = LF_ISSET(DB_MUTEX_PROCESS_ONLY) ? USYNC_THREAD : USYNC_PROCESS; + + ret = mutex_init(&mutexp->mutex, type, NULL); + if (ret == 0 && LF_ISSET(DB_MUTEX_SELF_BLOCK)) { + ret = cond_init(&mutexp->cond, type, NULL); + + F_SET(mutexp, DB_MUTEX_SELF_BLOCK); + }} +#endif + +err: if (ret != 0) { + __db_err(env, ret, DB_STR("2021", + "unable to initialize mutex")); + } + return (ret); +} + +/* + * __db_pthread_mutex_prep + * Prepare to use a pthread-based DB_MUTEX. + * + * This exclusively locks a DB_MUTEX's pthread_mutex_t or pthread_rwlock_t, + * before locking, unlocking, or waiting for the DB mutex to be become + * available in the requested mode (exclusive == 1, shared == 0). + * + * Test for failchk concerns here too, to avoid hanging on a dead pid/tid. + */ +inline static int +__db_pthread_mutex_prep(env, mutex, mutexp, exclusive) + ENV *env; + db_mutex_t mutex; + DB_MUTEX *mutexp; + int exclusive; +{ + DB_ENV *dbenv; + DB_THREAD_INFO *ip; + int ret; + + dbenv = env->dbenv; + PERFMON4(env, + mutex, suspend, mutex, exclusive, mutexp->alloc_id, mutexp); + if (F_ISSET(dbenv, DB_ENV_FAILCHK)) { + for (;;) { + RET_SET_PTHREAD_TRYLOCK(mutexp, ret); + if (ret != EBUSY) + break; + if (dbenv->is_alive(dbenv, + mutexp->pid, mutexp->tid, 0) == 0) { + ret = __env_set_state(env, &ip, THREAD_VERIFY); + if (ret != 0 || + ip->dbth_state == THREAD_FAILCHK) { + ret = DB_RUNRECOVERY; + } else { + /* + * Some thread other than the true + * FAILCHK thread in this process is + * asking for the mutex held by the + * dead process/thread. We will block + * here until someone else does the + * cleanup. Same behavior as if we + * hadn't gone down the 'if + * DB_ENV_FAILCHK' path to start with. + */ + RET_SET_PTHREAD_LOCK(mutexp, ret); + break; + } + } + } + } else + RET_SET_PTHREAD_LOCK(mutexp, ret); + + PERFMON4(env, + mutex, resume, mutex, exclusive, mutexp->alloc_id, mutexp); + COMPQUIET(mutex, 0); + COMPQUIET(exclusive, 0); + return (ret); +} + +/* + * __db_pthread_mutex_condwait + * Perform a pthread condition wait for a DB_MUTEX. + * + * This will be a timed wait when a timespec has been specified. EINTR and + * spurious ETIME* values are mapped to 0, and hence success. The + * mutexp->u.m.mutex must be locked upon entry. When returning a success + * or timeout status it will have been locked again. + * + * Returns: + * 0 if it is safe to retry to get the mutex + * DB_TIMEOUT if the timeout exceeded + * a fatal error. The mutexp->u.m.mutex has been unlocked. + */ +inline static int +__db_pthread_mutex_condwait(env, mutex, mutexp, timespec) + ENV *env; + db_mutex_t mutex; + DB_MUTEX *mutexp; + db_timespec *timespec; +{ + int ret; + +#ifdef MUTEX_DIAG + printf("condwait %ld %x wait busy %x count %d\n", + mutex, pthread_self(), MUTEXP_BUSY_FIELD(mutexp), mutexp->wait); +#endif + PERFMON4(env, mutex, suspend, mutex, TRUE, mutexp->alloc_id, mutexp); + + if (timespec != NULL) { + RET_SET((pthread_cond_timedwait(&mutexp->u.m.cond, + &mutexp->u.m.mutex, (struct timespec *) timespec)), ret); + if (ret == ETIMEDOUT) { + ret = DB_TIMEOUT; + goto ret; + } + } else + RET_SET((pthread_cond_wait(&mutexp->u.m.cond, + &mutexp->u.m.mutex)), ret); +#ifdef MUTEX_DIAG + printf("condwait %ld %x wait returns %d busy %x\n", + mutex, pthread_self(), ret, MUTEXP_BUSY_FIELD(mutexp)); +#endif + /* + * !!! + * Solaris bug workaround: pthread_cond_wait() sometimes returns ETIME + * -- out of sheer paranoia, check both ETIME and ETIMEDOUT. We + * believe this happens when the application uses SIGALRM for some + * purpose, e.g., the C library sleep call, and Solaris delivers the + * signal to the wrong LWP. + */ + if (ret != 0) { + if (ret == ETIMEDOUT || +#ifdef ETIME + ret == ETIME || +#endif + ret == EINTR) + ret = 0; + else + /* Failure, caller shouldn't condwait again. */ + (void)pthread_mutex_unlock(&mutexp->u.m.mutex); + } + +ret: + PERFMON4(env, mutex, resume, mutex, TRUE, mutexp->alloc_id, mutexp); + + COMPQUIET(mutex, 0); + COMPQUIET(env, 0); + return (ret); +} + +#ifndef HAVE_MUTEX_HYBRID +/* + * __db_pthread_mutex_lock + * Lock on a mutex, blocking if necessary. + * Timeouts are supported only for self-blocking mutexes. + * + * Self-blocking shared latches are not supported. + * + * PUBLIC: #ifndef HAVE_MUTEX_HYBRID + * PUBLIC: int __db_pthread_mutex_lock __P((ENV *, db_mutex_t, db_timeout_t)); + * PUBLIC: #endif + */ +int +__db_pthread_mutex_lock(env, mutex, timeout) + ENV *env; + db_mutex_t mutex; + db_timeout_t timeout; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + db_timespec timespec; + int ret, t_ret; + + dbenv = env->dbenv; + + if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); + + t_ret = 0; + mutexp = MUTEXP_SET(env, mutex); + + CHECK_MTX_THREAD(env, mutexp); + +#if defined(HAVE_STATISTICS) + /* + * We want to know which mutexes are contentious, but don't want to + * do an interlocked test here -- that's slower when the underlying + * system has adaptive mutexes and can perform optimizations like + * spinning only if the thread holding the mutex is actually running + * on a CPU. Make a guess, using a normal load instruction. + */ + if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) + STAT_INC(env, mutex, set_wait, mutexp->mutex_set_wait, mutex); + else + STAT_INC(env, + mutex, set_nowait, mutexp->mutex_set_nowait, mutex); +#endif + + /* Single-thread the next block, except during the possible condwait. */ + if ((ret = __db_pthread_mutex_prep(env, mutex, mutexp, TRUE)) != 0) + goto err; + + if (F_ISSET(mutexp, DB_MUTEX_SELF_BLOCK)) { + if (timeout != 0) + timespecclear(×pec); + while (MUTEXP_IS_BUSY(mutexp)) { + /* Set expiration timer upon first need. */ + if (timeout != 0 && !timespecisset(×pec)) { + timespecclear(×pec); + __clock_set_expires(env, ×pec, timeout); + } + t_ret = __db_pthread_mutex_condwait(env, + mutex, mutexp, timeout == 0 ? NULL : ×pec); + if (t_ret != 0) { + if (t_ret == DB_TIMEOUT) + goto out; + ret = t_ret; + goto err; + } + } + + F_SET(mutexp, DB_MUTEX_LOCKED); + dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid); +out: + /* #2471: HP-UX can sporadically return EFAULT. See above */ + RETRY_ON_EFAULT(pthread_mutex_unlock(&mutexp->u.m.mutex), ret); + if (ret != 0) + goto err; + } else { +#ifdef DIAGNOSTIC + if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) { + char buf[DB_THREADID_STRLEN]; + (void)dbenv->thread_id_string(dbenv, + mutexp->pid, mutexp->tid, buf); + __db_errx(env, DB_STR_A("2022", + "pthread lock failed: lock currently in use: pid/tid: %s", + "%s"), buf); + ret = EINVAL; + goto err; + } +#endif + F_SET(mutexp, DB_MUTEX_LOCKED); + dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid); + } + +#ifdef DIAGNOSTIC + /* + * We want to switch threads as often as possible. Yield every time + * we get a mutex to ensure contention. + */ + if (F_ISSET(dbenv, DB_ENV_YIELDCPU)) + __os_yield(env, 0, 0); +#endif + return (t_ret); + +err: + __db_err(env, ret, DB_STR("2023", "pthread lock failed")); + return (__env_panic(env, ret)); +} +#endif + +#if defined(HAVE_SHARED_LATCHES) && !defined(HAVE_MUTEX_HYBRID) +/* + * __db_pthread_mutex_readlock + * Take a shared lock on a mutex, blocking if necessary. + * + * PUBLIC: #if defined(HAVE_SHARED_LATCHES) + * PUBLIC: int __db_pthread_mutex_readlock __P((ENV *, db_mutex_t)); + * PUBLIC: #endif + */ +int +__db_pthread_mutex_readlock(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + int ret; + + dbenv = env->dbenv; + + if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); + + mutexp = MUTEXP_SET(env, mutex); + DB_ASSERT(env, F_ISSET(mutexp, DB_MUTEX_SHARED)); + + CHECK_MTX_THREAD(env, mutexp); + +#if defined(HAVE_STATISTICS) + /* + * We want to know which mutexes are contentious, but don't want to + * do an interlocked test here -- that's slower when the underlying + * system has adaptive mutexes and can perform optimizations like + * spinning only if the thread holding the mutex is actually running + * on a CPU. Make a guess, using a normal load instruction. + */ + if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) + STAT_INC(env, + mutex, set_rd_wait, mutexp->mutex_set_rd_wait, mutex); + else + STAT_INC(env, + mutex, set_rd_nowait, mutexp->mutex_set_rd_nowait, mutex); +#endif + + PERFMON4(env, mutex, suspend, mutex, FALSE, mutexp->alloc_id, mutexp); + RET_SET((pthread_rwlock_rdlock(&mutexp->u.rwlock)), ret); + PERFMON4(env, mutex, resume, mutex, FALSE, mutexp->alloc_id, mutexp); + DB_ASSERT(env, !F_ISSET(mutexp, DB_MUTEX_LOCKED)); + if (ret != 0) + goto err; + +#ifdef DIAGNOSTIC + /* + * We want to switch threads as often as possible. Yield every time + * we get a mutex to ensure contention. + */ + if (F_ISSET(dbenv, DB_ENV_YIELDCPU)) + __os_yield(env, 0, 0); +#endif + return (0); + +err: __db_err(env, ret, DB_STR("2024", "pthread readlock failed")); + return (__env_panic(env, ret)); +} +#endif + +#ifdef HAVE_MUTEX_HYBRID +/* + * __db_hybrid_mutex_suspend + * Suspend this thread until the mutex is free enough to give the caller a + * good chance of getting the mutex in the requested exclusivity mode. + * + * The major difference between this and the old __db_pthread_mutex_lock() + * is the additional 'exclusive' parameter. + * + * PUBLIC: #ifdef HAVE_MUTEX_HYBRID + * PUBLIC: int __db_hybrid_mutex_suspend + * PUBLIC: __P((ENV *, db_mutex_t, db_timespec *, int)); + * PUBLIC: #endif + */ +int +__db_hybrid_mutex_suspend(env, mutex, timespec, exclusive) + ENV *env; + db_mutex_t mutex; + db_timespec *timespec; + int exclusive; +{ + DB_MUTEX *mutexp; + int ret, t_ret; + + t_ret = 0; + mutexp = MUTEXP_SET(env, mutex); + + if (!exclusive) + DB_ASSERT(env, F_ISSET(mutexp, DB_MUTEX_SHARED)); + DB_ASSERT(env, F_ISSET(mutexp, DB_MUTEX_SELF_BLOCK)); + + if ((ret = __db_pthread_mutex_prep(env, mutex, mutexp, exclusive)) != 0) + goto err; + + /* + * Since this is only for hybrid mutexes the pthread mutex + * is only used to wait after spinning on the TAS mutex. + * Set the wait flag before checking to see if the mutex + * is still locked. The holder will clear DB_MUTEX_LOCKED + * before checking the wait counter. + */ + mutexp->wait++; + MUTEX_MEMBAR(mutexp->wait); + while (exclusive ? MUTEXP_IS_BUSY(mutexp) : + atomic_read(&mutexp->sharecount) == MUTEX_SHARE_ISEXCLUSIVE) { + t_ret = __db_pthread_mutex_condwait(env, mutex, mutexp, timespec); + if (t_ret != 0) { + if (t_ret == DB_TIMEOUT) + break; + ret = t_ret; + goto err; + } + MUTEX_MEMBAR(mutexp->flags); + } + + mutexp->wait--; + + /* #2471: HP-UX can sporadically return EFAULT. See above */ + RETRY_ON_EFAULT(pthread_mutex_unlock(&mutexp->u.m.mutex), ret); + if (ret != 0) + goto err; + + PERFMON4(env, + mutex, resume, mutex, exclusive, mutexp->alloc_id, mutexp); + +#ifdef DIAGNOSTIC + /* + * We want to switch threads as often as possible. Yield every time + * we get a mutex to ensure contention. + */ + if (F_ISSET(env->dbenv, DB_ENV_YIELDCPU)) + __os_yield(env, 0, 0); +#endif + return (t_ret); + +err: + PERFMON4(env, + mutex, resume, mutex, exclusive, mutexp->alloc_id, mutexp); + __db_err(env, ret, "pthread suspend failed"); + return (__env_panic(env, ret)); +} +#endif + +/* + * __db_pthread_mutex_unlock -- + * Release a mutex, or, if hybrid, wake a thread up from a suspend. + * + * PUBLIC: int __db_pthread_mutex_unlock __P((ENV *, db_mutex_t)); + */ +int +__db_pthread_mutex_unlock(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + int ret; +#if defined(MUTEX_DIAG) && defined(HAVE_MUTEX_HYBRID) + int waiters; +#endif + + dbenv = env->dbenv; + + if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); + + mutexp = MUTEXP_SET(env, mutex); +#if defined(MUTEX_DIAG) && defined(HAVE_MUTEX_HYBRID) + waiters = mutexp->wait; +#endif + +#if !defined(HAVE_MUTEX_HYBRID) && defined(DIAGNOSTIC) + if (!F_ISSET(mutexp, DB_MUTEX_LOCKED | DB_MUTEX_SHARED)) { + __db_errx(env, DB_STR("2025", + "pthread unlock failed: lock already unlocked")); + return (__env_panic(env, EACCES)); + } +#endif + if (F_ISSET(mutexp, DB_MUTEX_SELF_BLOCK)) { + ret = __db_pthread_mutex_prep(env, mutex, mutexp, TRUE); + if (ret != 0) + goto err; + +#ifdef HAVE_MUTEX_HYBRID + STAT_INC(env, + mutex, hybrid_wakeup, mutexp->hybrid_wakeup, mutex); +#else + F_CLR(mutexp, DB_MUTEX_LOCKED); /* nop if DB_MUTEX_SHARED */ +#endif + + if (F_ISSET(mutexp, DB_MUTEX_SHARED)) + RET_SET( + (pthread_cond_broadcast(&mutexp->u.m.cond)), ret); + else + RET_SET((pthread_cond_signal(&mutexp->u.m.cond)), ret); + if (ret != 0) + goto err; + } else { +#ifndef HAVE_MUTEX_HYBRID + F_CLR(mutexp, DB_MUTEX_LOCKED); +#endif + } + + /* See comment above; workaround for [#2471]. */ +#if defined(HAVE_SHARED_LATCHES) && !defined(HAVE_MUTEX_HYBRID) + if (F_ISSET(mutexp, DB_MUTEX_SHARED)) + RETRY_ON_EFAULT(pthread_rwlock_unlock(&mutexp->u.rwlock), ret); + else +#endif + RETRY_ON_EFAULT(pthread_mutex_unlock(&mutexp->u.m.mutex), ret); + +err: if (ret != 0) { + __db_err(env, ret, "pthread unlock failed"); + return (__env_panic(env, ret)); + } +#if defined(MUTEX_DIAG) && defined(HAVE_MUTEX_HYBRID) + if (!MUTEXP_IS_BUSY(mutexp) && mutexp->wait != 0) + printf("unlock %ld %x busy %x waiters %d/%d\n", + mutex, pthread_self(), ret, + MUTEXP_BUSY_FIELD(mutexp), waiters, mutexp->wait); +#endif + return (ret); +} + +/* + * __db_pthread_mutex_destroy -- + * Destroy a mutex. + * If it is a native shared latch (not hybrid) then + * destroy only one half of the rwlock/mutex&cond union, + * depending whether it was allocated as shared + * + * PUBLIC: int __db_pthread_mutex_destroy __P((ENV *, db_mutex_t)); + */ +int +__db_pthread_mutex_destroy(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + DB_MUTEX *mutexp; + DB_THREAD_INFO *ip; + int ret, t_ret, failchk_thread; + + if (!MUTEX_ON(env)) + return (0); + + mutexp = MUTEXP_SET(env, mutex); + + ret = 0; + failchk_thread = FALSE; + /* Get information to determine if we are really the failchk thread. */ + if (F_ISSET(env->dbenv, DB_ENV_FAILCHK)) { + ret = __env_set_state(env, &ip, THREAD_VERIFY); + if (ip != NULL && ip->dbth_state == THREAD_FAILCHK) + failchk_thread = TRUE; + } + +#ifndef HAVE_MUTEX_HYBRID + if (F_ISSET(mutexp, DB_MUTEX_SHARED)) { +#if defined(HAVE_SHARED_LATCHES) + /* + * If there were dead processes waiting on the condition + * we may not be able to destroy it. Let failchk thread skip + * this, unless destroy is required. + * XXX What operating system resources might this leak? + */ +#ifdef HAVE_PTHREAD_RWLOCK_REINIT_OKAY + if (!failchk_thread) +#endif + RET_SET( + (pthread_rwlock_destroy(&mutexp->u.rwlock)), ret); + /* For rwlocks, we're done - must not destroy rest of union */ + return (ret); +#endif + } +#endif + if (F_ISSET(mutexp, DB_MUTEX_SELF_BLOCK)) { + /* + * If there were dead processes waiting on the condition + * we may not be able to destroy it. Let failchk thread + * skip this, unless destroy is required. + */ +#ifdef HAVE_PTHREAD_COND_REINIT_OKAY + if (!failchk_thread) +#endif + RET_SET((pthread_cond_destroy(&mutexp->u.m.cond)), ret); + if (ret != 0) + __db_err(env, ret, DB_STR("2026", + "unable to destroy cond")); + } + RET_SET((pthread_mutex_destroy(&mutexp->u.m.mutex)), t_ret); + if (t_ret != 0 && !failchk_thread) { + __db_err(env, t_ret, DB_STR("2027", + "unable to destroy mutex")); + if (ret == 0) + ret = t_ret; + } + return (ret); +} diff --git a/src/mutex/mut_region.c b/src/mutex/mut_region.c new file mode 100644 index 00000000..4e022973 --- /dev/null +++ b/src/mutex/mut_region.c @@ -0,0 +1,469 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static db_size_t __mutex_align_size __P((ENV *)); +static int __mutex_region_init __P((ENV *, DB_MUTEXMGR *)); +static size_t __mutex_region_size __P((ENV *)); +static size_t __mutex_region_max __P((ENV *)); + +/* + * __mutex_open -- + * Open a mutex region. + * + * PUBLIC: int __mutex_open __P((ENV *, int)); + */ +int +__mutex_open(env, create_ok) + ENV *env; + int create_ok; +{ + DB_ENV *dbenv; + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + size_t size; + u_int32_t cpu_count; + int ret; +#ifndef HAVE_ATOMIC_SUPPORT + u_int i; +#endif + + dbenv = env->dbenv; + if (dbenv->mutex_max == 0 && + dbenv->mutex_cnt == 0 && dbenv->mutex_inc == 0 && + F_ISSET(env, ENV_PRIVATE | ENV_THREAD) == ENV_PRIVATE) + return (0); + + /* + * Initialize the ENV handle information if not already initialized. + * + * Align mutexes on the byte boundaries specified by the application. + */ + if (dbenv->mutex_align == 0) + dbenv->mutex_align = MUTEX_ALIGN; + if (dbenv->mutex_tas_spins == 0) { + cpu_count = __os_cpu_count(); + if ((ret = __mutex_set_tas_spins(dbenv, cpu_count == 1 ? + cpu_count : cpu_count * MUTEX_SPINS_PER_PROCESSOR)) != 0) + return (ret); + } + + /* + * If the user didn't set an absolute value on the number of mutexes + * we'll need, figure it out. We're conservative in our allocation, + * we need mutexes for DB handles, group-commit queues and other things + * applications allocate at run-time. The application may have kicked + * up our count to allocate its own mutexes, add that in. + */ + if (dbenv->mutex_cnt == 0 && + F_ISSET(env, ENV_PRIVATE | ENV_THREAD) != ENV_PRIVATE) + dbenv->mutex_cnt = + __lock_region_mutex_count(env) + + __log_region_mutex_count(env) + + __memp_region_mutex_count(env) + + __txn_region_mutex_count(env); + + if (dbenv->mutex_max != 0 && dbenv->mutex_cnt > dbenv->mutex_max) + dbenv->mutex_cnt = dbenv->mutex_max; + + /* Create/initialize the mutex manager structure. */ + if ((ret = __os_calloc(env, 1, sizeof(DB_MUTEXMGR), &mtxmgr)) != 0) + return (ret); + + /* Join/create the mutex region. */ + mtxmgr->reginfo.env = env; + mtxmgr->reginfo.type = REGION_TYPE_MUTEX; + mtxmgr->reginfo.id = INVALID_REGION_ID; + mtxmgr->reginfo.flags = REGION_JOIN_OK; + size = __mutex_region_size(env); + if (create_ok) + F_SET(&mtxmgr->reginfo, REGION_CREATE_OK); + if ((ret = __env_region_attach(env, + &mtxmgr->reginfo, size, size + __mutex_region_max(env))) != 0) + goto err; + + /* If we created the region, initialize it. */ + if (F_ISSET(&mtxmgr->reginfo, REGION_CREATE)) + if ((ret = __mutex_region_init(env, mtxmgr)) != 0) + goto err; + + /* Set the local addresses. */ + mtxregion = mtxmgr->reginfo.primary = + R_ADDR(&mtxmgr->reginfo, mtxmgr->reginfo.rp->primary); + mtxmgr->mutex_array = R_ADDR(&mtxmgr->reginfo, mtxregion->mutex_off); + + env->mutex_handle = mtxmgr; + +#ifndef HAVE_ATOMIC_SUPPORT + /* If necessary allocate the atomic emulation mutexes. */ + if (F_ISSET(&mtxmgr->reginfo, REGION_CREATE)) + for (i = 0; i != MAX_ATOMIC_MUTEXES; i++) + if ((ret = __mutex_alloc_int( + env, 0, MTX_ATOMIC_EMULATION, + 0, &mtxregion->mtx_atomic[i])) != 0) + return (ret); +#endif + + return (0); + +err: env->mutex_handle = NULL; + if (mtxmgr->reginfo.addr != NULL) + (void)__env_region_detach(env, &mtxmgr->reginfo, 0); + + __os_free(env, mtxmgr); + return (ret); +} + +/* + * __mutex_region_init -- + * Initialize a mutex region in shared memory. + */ +static int +__mutex_region_init(env, mtxmgr) + ENV *env; + DB_MUTEXMGR *mtxmgr; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + DB_MUTEXREGION *mtxregion; + db_mutex_t mutex; + int ret; + void *mutex_array; + + dbenv = env->dbenv; + + COMPQUIET(mutexp, NULL); + + if ((ret = __env_alloc(&mtxmgr->reginfo, + sizeof(DB_MUTEXREGION), &mtxmgr->reginfo.primary)) != 0) { + __db_errx(env, DB_STR("2013", + "Unable to allocate memory for the mutex region")); + return (ret); + } + mtxmgr->reginfo.rp->primary = + R_OFFSET(&mtxmgr->reginfo, mtxmgr->reginfo.primary); + mtxregion = mtxmgr->reginfo.primary; + memset(mtxregion, 0, sizeof(*mtxregion)); + + mtxregion->mutex_size = __mutex_align_size(env); + + mtxregion->stat.st_mutex_align = dbenv->mutex_align; + if (dbenv->mutex_cnt == 0) + dbenv->mutex_cnt = 1; + mtxregion->stat.st_mutex_init = + mtxregion->stat.st_mutex_cnt = dbenv->mutex_cnt; + mtxregion->stat.st_mutex_max = dbenv->mutex_max; + if (mtxregion->stat.st_mutex_max != 0) + mtxregion->stat.st_mutex_max += dbenv->mutex_inc; + mtxregion->stat.st_mutex_tas_spins = dbenv->mutex_tas_spins; + + /* + * Get a chunk of memory to be used for the mutexes themselves. Each + * piece of the memory must be properly aligned, and that alignment + * may be more restrictive than the memory alignment returned by the + * underlying allocation code. We already know how much memory each + * mutex in the array will take up, but we need to offset the first + * mutex in the array so the array begins properly aligned. + * + * The OOB mutex (MUTEX_INVALID) is 0. To make this work, we ignore + * the first allocated slot when we build the free list. We have to + * correct the count by 1 here, though, otherwise our counter will be + * off by 1. + */ + if ((ret = __env_alloc(&mtxmgr->reginfo, + mtxregion->stat.st_mutex_align + + (mtxregion->stat.st_mutex_cnt + 1) * mtxregion->mutex_size, + &mutex_array)) != 0) { + __db_errx(env, DB_STR("2014", + "Unable to allocate memory for mutexes from the region")); + return (ret); + } + + mtxregion->mutex_off_alloc = R_OFFSET(&mtxmgr->reginfo, mutex_array); + mutex_array = ALIGNP_INC(mutex_array, mtxregion->stat.st_mutex_align); + mtxregion->mutex_off = R_OFFSET(&mtxmgr->reginfo, mutex_array); + mtxmgr->mutex_array = mutex_array; + + /* + * Put the mutexes on a free list and clear the allocated flag. + * + * The OOB mutex (MUTEX_INVALID) is 0, skip it. + * + * The comparison is <, not <=, because we're looking ahead one + * in each link. + */ + env->mutex_handle = mtxmgr; + if (F_ISSET(env, ENV_PRIVATE)) { + mutexp = (DB_MUTEX *)mutex_array; + mutexp++; + mutexp = ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align); + mtxregion->mutex_next = (db_mutex_t)mutexp; + } else { + mtxregion->mutex_next = 1; + mutexp = MUTEXP_SET(env, 1); + } + for (mutex = 1; mutex < mtxregion->stat.st_mutex_cnt; ++mutex) { + mutexp->flags = 0; + if (F_ISSET(env, ENV_PRIVATE)) + mutexp->mutex_next_link = (db_mutex_t)(mutexp + 1); + else + mutexp->mutex_next_link = mutex + 1; + mutexp++; + mutexp = ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align); + } + mutexp->flags = 0; + mutexp->mutex_next_link = MUTEX_INVALID; + mtxregion->stat.st_mutex_free = mtxregion->stat.st_mutex_cnt; + mtxregion->stat.st_mutex_inuse = mtxregion->stat.st_mutex_inuse_max = 0; + if ((ret = __mutex_alloc(env, MTX_MUTEX_REGION, 0, &mutex)) != 0) + return (ret); + mtxmgr->reginfo.mtx_alloc = mtxregion->mtx_region = mutex; + + /* + * This is the first place we can test mutexes and we need to + * know if they're working. (They CAN fail, for example on + * SunOS, when using fcntl(2) for locking and using an + * in-memory filesystem as the database environment directory. + * But you knew that, I'm sure -- it probably wasn't worth + * mentioning.) + */ + mutex = MUTEX_INVALID; + if ((ret = + __mutex_alloc(env, MTX_MUTEX_TEST, 0, &mutex) != 0) || + (ret = __mutex_lock(env, mutex)) != 0 || + (ret = __mutex_unlock(env, mutex)) != 0 || + (ret = __mutex_trylock(env, mutex)) != 0 || + (ret = __mutex_unlock(env, mutex)) != 0 || + (ret = __mutex_free(env, &mutex)) != 0) { + __db_errx(env, DB_STR("2015", + "Unable to acquire/release a mutex; check configuration")); + return (ret); + } +#ifdef HAVE_SHARED_LATCHES + if ((ret = + __mutex_alloc(env, + MTX_MUTEX_TEST, DB_MUTEX_SHARED, &mutex) != 0) || + (ret = __mutex_lock(env, mutex)) != 0 || + (ret = __mutex_tryrdlock(env, mutex)) != DB_LOCK_NOTGRANTED || + (ret = __mutex_unlock(env, mutex)) != 0 || + (ret = __mutex_rdlock(env, mutex)) != 0 || + (ret = __mutex_rdlock(env, mutex)) != 0 || + (ret = __mutex_unlock(env, mutex)) != 0 || + (ret = __mutex_unlock(env, mutex)) != 0 || + (ret = __mutex_free(env, &mutex)) != 0) { + __db_errx(env, DB_STR("2016", + "Unable to acquire/release a shared latch; check configuration")); + return (ret); + } +#endif + + return (0); +} + +/* + * __mutex_env_refresh -- + * Clean up after the mutex region on a close or failed open. + * + * PUBLIC: int __mutex_env_refresh __P((ENV *)); + */ +int +__mutex_env_refresh(env) + ENV *env; +{ + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + REGINFO *reginfo; + int ret; + + mtxmgr = env->mutex_handle; + reginfo = &mtxmgr->reginfo; + mtxregion = mtxmgr->reginfo.primary; + + /* + * If a private region, return the memory to the heap. Not needed for + * filesystem-backed or system shared memory regions, that memory isn't + * owned by any particular process. + */ + if (F_ISSET(env, ENV_PRIVATE)) { + reginfo->mtx_alloc = MUTEX_INVALID; + +#ifdef HAVE_MUTEX_SYSTEM_RESOURCES + /* + * If destroying the mutex region, return any system resources + * to the system. + */ + __mutex_resource_return(env, reginfo); +#endif + /* Discard the mutex array. */ + __env_alloc_free( + reginfo, R_ADDR(reginfo, mtxregion->mutex_off_alloc)); + } + + /* Detach from the region. */ + ret = __env_region_detach(env, reginfo, 0); + + __os_free(env, mtxmgr); + + env->mutex_handle = NULL; + + return (ret); +} + +/* + * __mutex_align_size -- + * Return how much memory each mutex will take up if an array of them + * are to be properly aligned, individually, within the array. + */ +static db_size_t +__mutex_align_size(env) + ENV *env; +{ + DB_ENV *dbenv; + + dbenv = env->dbenv; + + return ((db_size_t)DB_ALIGN(sizeof(DB_MUTEX), dbenv->mutex_align)); +} + +/* + * __mutex_region_size -- + * Return the amount of space needed for the mutex region. + */ +static size_t +__mutex_region_size(env) + ENV *env; +{ + DB_ENV *dbenv; + size_t s; + + dbenv = env->dbenv; + + s = sizeof(DB_MUTEXMGR) + 1024; + + /* We discard one mutex for the OOB slot. */ + s += __env_alloc_size( + (dbenv->mutex_cnt + 1) *__mutex_align_size(env)); + + return (s); +} + +/* + * __mutex_region_max -- + * Return the amount of space needed to reach the maximum size. + */ +static size_t +__mutex_region_max(env) + ENV *env; +{ + DB_ENV *dbenv; + u_int32_t max; + + dbenv = env->dbenv; + + if ((max = dbenv->mutex_max) == 0) { + if (F_ISSET(env, ENV_PRIVATE | ENV_THREAD) == ENV_PRIVATE) + max = dbenv->mutex_inc + 1; + else + max = __lock_region_mutex_max(env) + + __txn_region_mutex_max(env) + + __log_region_mutex_max(env) + + dbenv->mutex_inc + 100; + } else if (max <= dbenv->mutex_cnt) + return (0); + else + max -= dbenv->mutex_cnt; + + return ( __env_alloc_size(max * __mutex_align_size(env))); +} + +#ifdef HAVE_MUTEX_SYSTEM_RESOURCES +/* + * __mutex_resource_return + * Return any system-allocated mutex resources to the system. + * + * PUBLIC: void __mutex_resource_return __P((ENV *, REGINFO *)); + */ +void +__mutex_resource_return(env, infop) + ENV *env; + REGINFO *infop; +{ + DB_MUTEX *mutexp; + DB_MUTEXMGR *mtxmgr, mtxmgr_st; + DB_MUTEXREGION *mtxregion; + db_mutex_t i, indx; + void *orig_handle, *chunk; + uintmax_t size; + + /* + * This routine is called in two cases: when discarding the regions + * from a previous Berkeley DB run, during recovery, and two, when + * discarding regions as we shut down the database environment. + * + * Walk the list of mutexes and destroy any live ones. + * + * This is just like joining a region -- the REGINFO we're handed is + * the same as the one returned by __env_region_attach(), all we have + * to do is fill in the links. + * + * !!! + * The region may be corrupted, of course. We're safe because the + * only things we look at are things that are initialized when the + * region is created, and never modified after that. + */ + memset(&mtxmgr_st, 0, sizeof(mtxmgr_st)); + mtxmgr = &mtxmgr_st; + mtxmgr->reginfo = *infop; + mtxregion = mtxmgr->reginfo.primary = + R_ADDR(&mtxmgr->reginfo, mtxmgr->reginfo.rp->primary); + mtxmgr->mutex_array = R_ADDR(&mtxmgr->reginfo, mtxregion->mutex_off); + + /* + * This is a little strange, but the mutex_handle is what all of the + * underlying mutex routines will use to determine if they should do + * any work and to find their information. Save/restore the handle + * around the work loop. + * + * The OOB mutex (MUTEX_INVALID) is 0, skip it. + */ + orig_handle = env->mutex_handle; + env->mutex_handle = mtxmgr; + if (F_ISSET(env, ENV_PRIVATE)) { + mutexp = (DB_MUTEX *)mtxmgr->mutex_array + 1; + chunk = NULL; + size = __env_elem_size(env, + (void *)mtxregion->mutex_off_alloc); + size -= sizeof(*mutexp); + } else + mutexp = MUTEXP_SET(env, 1); + for (i = 1; i <= mtxregion->stat.st_mutex_cnt; ++i) { + if (F_ISSET(env, ENV_PRIVATE)) + indx = (db_mutex_t)mutexp; + else + indx = i; + if (F_ISSET(mutexp, DB_MUTEX_ALLOCATED)) + (void)__mutex_destroy(env, indx); + mutexp++; + if (F_ISSET(env, ENV_PRIVATE) && + (size -= sizeof(*mutexp)) < sizeof(*mutexp)) { + mutexp = __env_get_chunk(&mtxmgr->reginfo, + &chunk, &size); + mutexp = ALIGNP_INC(mutexp, + mtxregion->stat.st_mutex_align); + } + } + env->mutex_handle = orig_handle; +} +#endif diff --git a/src/mutex/mut_stat.c b/src/mutex/mut_stat.c new file mode 100644 index 00000000..84bf5707 --- /dev/null +++ b/src/mutex/mut_stat.c @@ -0,0 +1,580 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +#ifdef HAVE_STATISTICS +static int __mutex_print_all __P((ENV *, u_int32_t)); +static const char *__mutex_print_id __P((int)); +static int __mutex_print_stats __P((ENV *, u_int32_t)); +static void __mutex_print_summary __P((ENV *)); +static int __mutex_stat __P((ENV *, DB_MUTEX_STAT **, u_int32_t)); + +/* + * __mutex_stat_pp -- + * ENV->mutex_stat pre/post processing. + * + * PUBLIC: int __mutex_stat_pp __P((DB_ENV *, DB_MUTEX_STAT **, u_int32_t)); + */ +int +__mutex_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_MUTEX_STAT **statp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->mutex_handle, "DB_ENV->mutex_stat", DB_INIT_MUTEX); + + if ((ret = __db_fchk(env, + "DB_ENV->mutex_stat", flags, DB_STAT_CLEAR)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__mutex_stat(env, statp, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __mutex_stat -- + * ENV->mutex_stat. + */ +static int +__mutex_stat(env, statp, flags) + ENV *env; + DB_MUTEX_STAT **statp; + u_int32_t flags; +{ + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + DB_MUTEX_STAT *stats; + int ret; + + *statp = NULL; + mtxmgr = env->mutex_handle; + mtxregion = mtxmgr->reginfo.primary; + + if ((ret = __os_umalloc(env, sizeof(DB_MUTEX_STAT), &stats)) != 0) + return (ret); + + MUTEX_SYSTEM_LOCK(env); + + /* + * Most fields are maintained in the underlying region structure. + * Region size and region mutex are not. + */ + *stats = mtxregion->stat; + stats->st_regsize = mtxmgr->reginfo.rp->size; + stats->st_regmax = mtxmgr->reginfo.rp->max; + __mutex_set_wait_info(env, mtxregion->mtx_region, + &stats->st_region_wait, &stats->st_region_nowait); + if (LF_ISSET(DB_STAT_CLEAR)) + __mutex_clear(env, mtxregion->mtx_region); + + MUTEX_SYSTEM_UNLOCK(env); + + *statp = stats; + return (0); +} + +/* + * __mutex_stat_print_pp -- + * ENV->mutex_stat_print pre/post processing. + * + * PUBLIC: int __mutex_stat_print_pp __P((DB_ENV *, u_int32_t)); + */ +int +__mutex_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->mutex_handle, "DB_ENV->mutex_stat_print", DB_INIT_MUTEX); + + if ((ret = __db_fchk(env, "DB_ENV->mutex_stat_print", + flags, DB_STAT_ALL | DB_STAT_ALLOC | DB_STAT_CLEAR)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__mutex_stat_print(env, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __mutex_stat_print + * ENV->mutex_stat_print method. + * + * PUBLIC: int __mutex_stat_print __P((ENV *, u_int32_t)); + */ +int +__mutex_stat_print(env, flags) + ENV *env; + u_int32_t flags; +{ + u_int32_t orig_flags; + int ret; + + orig_flags = flags; + LF_CLR(DB_STAT_CLEAR | DB_STAT_SUBSYSTEM); + if (flags == 0 || LF_ISSET(DB_STAT_ALL)) { + ret = __mutex_print_stats(env, orig_flags); + __mutex_print_summary(env); + if (flags == 0 || ret != 0) + return (ret); + } + + if (LF_ISSET(DB_STAT_ALL)) + ret = __mutex_print_all(env, orig_flags); + + return (0); +} + +static void +__mutex_print_summary(env) + ENV *env; +{ + DB_MUTEX *mutexp; + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + void *chunk; + db_mutex_t i; + u_int32_t counts[MTX_MAX_ENTRY + 2]; + uintmax_t size; + int alloc_id; + + mtxmgr = env->mutex_handle; + mtxregion = mtxmgr->reginfo.primary; + memset(counts, 0, sizeof(counts)); + size = 0; + + if (F_ISSET(env, ENV_PRIVATE)) { + mutexp = (DB_MUTEX *)mtxmgr->mutex_array + 1; + chunk = NULL; + size = __env_elem_size(env, + ROFF_TO_P(mtxregion->mutex_off_alloc)); + size -= sizeof(*mutexp); + } else + mutexp = MUTEXP_SET(env, 1); + for (i = 1; i <= mtxregion->stat.st_mutex_cnt; ++i) { + if (!F_ISSET(mutexp, DB_MUTEX_ALLOCATED)) + counts[0]++; + else if (mutexp->alloc_id > MTX_MAX_ENTRY) + counts[MTX_MAX_ENTRY + 1]++; + else + counts[mutexp->alloc_id]++; + + mutexp++; + if (F_ISSET(env, ENV_PRIVATE) && + (size -= sizeof(*mutexp)) < sizeof(*mutexp)) { + mutexp = + __env_get_chunk(&mtxmgr->reginfo, &chunk, &size); + mutexp = + ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align); + } + } + __db_msg(env, "Mutex counts"); + __db_msg(env, "%d\tUnallocated", counts[0]); + for (alloc_id = 1; alloc_id <= MTX_TXN_REGION + 1; alloc_id++) + if (counts[alloc_id] != 0) + __db_msg(env, "%lu\t%s", + (u_long)counts[alloc_id], + __mutex_print_id(alloc_id)); + +} + +/* + * __mutex_print_stats -- + * Display default mutex region statistics. + */ +static int +__mutex_print_stats(env, flags) + ENV *env; + u_int32_t flags; +{ + DB_MUTEX_STAT *sp; + int ret; + + if ((ret = __mutex_stat(env, &sp, LF_ISSET(DB_STAT_CLEAR))) != 0) + return (ret); + + if (LF_ISSET(DB_STAT_ALL)) + __db_msg(env, "Default mutex region information:"); + + __db_dlbytes(env, "Mutex region size", + (u_long)0, (u_long)0, (u_long)sp->st_regsize); + __db_dlbytes(env, "Mutex region max size", + (u_long)0, (u_long)0, (u_long)sp->st_regmax); + __db_dl_pct(env, + "The number of region locks that required waiting", + (u_long)sp->st_region_wait, DB_PCT(sp->st_region_wait, + sp->st_region_wait + sp->st_region_nowait), NULL); + STAT_ULONG("Mutex alignment", sp->st_mutex_align); + STAT_ULONG("Mutex test-and-set spins", sp->st_mutex_tas_spins); + STAT_ULONG("Mutex initial count", sp->st_mutex_init); + STAT_ULONG("Mutex total count", sp->st_mutex_cnt); + STAT_ULONG("Mutex max count", sp->st_mutex_max); + STAT_ULONG("Mutex free count", sp->st_mutex_free); + STAT_ULONG("Mutex in-use count", sp->st_mutex_inuse); + STAT_ULONG("Mutex maximum in-use count", sp->st_mutex_inuse_max); + + __os_ufree(env, sp); + + return (0); +} + +/* + * __mutex_print_all -- + * Display debugging mutex region statistics. + */ +static int +__mutex_print_all(env, flags) + ENV *env; + u_int32_t flags; +{ + static const FN fn[] = { + { DB_MUTEX_ALLOCATED, "alloc" }, + { DB_MUTEX_LOCKED, "locked" }, + { DB_MUTEX_LOGICAL_LOCK, "logical" }, + { DB_MUTEX_PROCESS_ONLY, "process-private" }, + { DB_MUTEX_SELF_BLOCK, "self-block" }, + { 0, NULL } + }; + DB_MSGBUF mb, *mbp; + DB_MUTEX *mutexp; + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + db_mutex_t i; + uintmax_t size; + void *chunk; + + DB_MSGBUF_INIT(&mb); + mbp = &mb; + + mtxmgr = env->mutex_handle; + mtxregion = mtxmgr->reginfo.primary; + + __db_print_reginfo(env, &mtxmgr->reginfo, "Mutex", flags); + __db_msg(env, "%s", DB_GLOBAL(db_line)); + + __db_msg(env, "DB_MUTEXREGION structure:"); + __mutex_print_debug_single(env, + "DB_MUTEXREGION region mutex", mtxregion->mtx_region, flags); + STAT_ULONG("Size of the aligned mutex", mtxregion->mutex_size); + STAT_ULONG("Next free mutex", mtxregion->mutex_next); + + /* + * The OOB mutex (MUTEX_INVALID) is 0, skip it. + * + * We're not holding the mutex region lock, so we're racing threads of + * control allocating mutexes. That's OK, it just means we display or + * clear statistics while mutexes are moving. + */ + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "mutex\twait/nowait, pct wait, holder, flags"); + size = 0; + if (F_ISSET(env, ENV_PRIVATE)) { + mutexp = (DB_MUTEX *)mtxmgr->mutex_array + 1; + chunk = NULL; + size = __env_elem_size(env, + ROFF_TO_P(mtxregion->mutex_off_alloc)); + size -= sizeof(*mutexp); + } else + mutexp = MUTEXP_SET(env, 1); + for (i = 1; i <= mtxregion->stat.st_mutex_cnt; ++i) { + if (!F_ISSET(mutexp, DB_MUTEX_ALLOCATED)) + continue; + + __db_msgadd(env, mbp, "%5lu\t", (u_long)i); + + __mutex_print_debug_stats(env, mbp, + F_ISSET(env, ENV_PRIVATE) ? (db_mutex_t)mutexp : i, flags); + + if (mutexp->alloc_id != 0) + __db_msgadd(env, + mbp, ", %s", __mutex_print_id(mutexp->alloc_id)); + + __db_prflags(env, mbp, mutexp->flags, fn, " (", ")"); + + DB_MSGBUF_FLUSH(env, mbp); + + mutexp++; + if (F_ISSET(env, ENV_PRIVATE) && + (size -= sizeof(*mutexp)) < sizeof(*mutexp)) { + mutexp = + __env_get_chunk(&mtxmgr->reginfo, &chunk, &size); + mutexp = + ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align); + } + } + + return (0); +} + +/* + * __mutex_print_debug_single -- + * Print mutex internal debugging statistics for a single mutex on a + * single output line. + * + * PUBLIC: void __mutex_print_debug_single + * PUBLIC: __P((ENV *, const char *, db_mutex_t, u_int32_t)); + */ +void +__mutex_print_debug_single(env, tag, mutex, flags) + ENV *env; + const char *tag; + db_mutex_t mutex; + u_int32_t flags; +{ + DB_MSGBUF mb, *mbp; + + DB_MSGBUF_INIT(&mb); + mbp = &mb; + + if (LF_ISSET(DB_STAT_SUBSYSTEM)) + LF_CLR(DB_STAT_CLEAR); + __db_msgadd(env, mbp, "%lu\t%s ", (u_long)mutex, tag); + __mutex_print_debug_stats(env, mbp, mutex, flags); + DB_MSGBUF_FLUSH(env, mbp); +} + +/* + * __mutex_print_debug_stats -- + * Print mutex internal debugging statistics, that is, the statistics + * in the [] square brackets. + * + * PUBLIC: void __mutex_print_debug_stats + * PUBLIC: __P((ENV *, DB_MSGBUF *, db_mutex_t, u_int32_t)); + */ +void +__mutex_print_debug_stats(env, mbp, mutex, flags) + ENV *env; + DB_MSGBUF *mbp; + db_mutex_t mutex; + u_int32_t flags; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + u_long value; + char buf[DB_THREADID_STRLEN]; +#if defined(HAVE_SHARED_LATCHES) && (defined(HAVE_MUTEX_HYBRID) || \ + !defined(HAVE_MUTEX_PTHREADS)) + int sharecount; +#endif + + if (mutex == MUTEX_INVALID) { + __db_msgadd(env, mbp, "[!Set]"); + return; + } + + dbenv = env->dbenv; + mutexp = MUTEXP_SET(env, mutex); + + __db_msgadd(env, mbp, "["); + if ((value = mutexp->mutex_set_wait) < 10000000) + __db_msgadd(env, mbp, "%lu", value); + else + __db_msgadd(env, mbp, "%luM", value / 1000000); + if ((value = mutexp->mutex_set_nowait) < 10000000) + __db_msgadd(env, mbp, "/%lu", value); + else + __db_msgadd(env, mbp, "/%luM", value / 1000000); + + __db_msgadd(env, mbp, " %d%% ", + DB_PCT(mutexp->mutex_set_wait, + mutexp->mutex_set_wait + mutexp->mutex_set_nowait)); + +#if defined(HAVE_SHARED_LATCHES) + if (F_ISSET(mutexp, DB_MUTEX_SHARED)) { + __db_msgadd(env, mbp, " rd "); + if ((value = mutexp->mutex_set_rd_wait) < 10000000) + __db_msgadd(env, mbp, "%lu", value); + else + __db_msgadd(env, mbp, "%luM", value / 1000000); + if ((value = mutexp->mutex_set_rd_nowait) < 10000000) + __db_msgadd(env, mbp, "/%lu", value); + else + __db_msgadd(env, mbp, "/%luM", value / 1000000); + __db_msgadd(env, mbp, " %d%% ", + DB_PCT(mutexp->mutex_set_rd_wait, + mutexp->mutex_set_rd_wait + mutexp->mutex_set_rd_nowait)); + } +#endif + + if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) + __db_msgadd(env, mbp, "%s]", + dbenv->thread_id_string(dbenv, + mutexp->pid, mutexp->tid, buf)); + /* Pthreads-based shared latches do not expose the share count. */ +#if defined(HAVE_SHARED_LATCHES) && (defined(HAVE_MUTEX_HYBRID) || \ + !defined(HAVE_MUTEX_PTHREADS)) + else if (F_ISSET(mutexp, DB_MUTEX_SHARED) && + (sharecount = atomic_read(&mutexp->sharecount)) != 0) { + if (sharecount == 1) + __db_msgadd(env, mbp, "1 reader"); + else + __db_msgadd(env, mbp, "%d readers", sharecount); + /* Show the thread which last acquired the latch. */ + __db_msgadd(env, mbp, " %s]", + dbenv->thread_id_string(dbenv, + mutexp->pid, mutexp->tid, buf)); + } +#endif + else + __db_msgadd(env, mbp, "!Own]"); + +#ifdef HAVE_MUTEX_HYBRID + if (mutexp->hybrid_wait != 0 || mutexp->hybrid_wakeup != 0) + __db_msgadd(env, mbp, " ", + mutexp->hybrid_wait, mutexp->hybrid_wakeup); +#endif + + if (LF_ISSET(DB_STAT_CLEAR)) + __mutex_clear(env, mutex); +} + +static const char * +__mutex_print_id(alloc_id) + int alloc_id; +{ + switch (alloc_id) { + case MTX_APPLICATION: return ("application allocated"); + case MTX_ATOMIC_EMULATION: return ("atomic emulation"); + case MTX_DB_HANDLE: return ("db handle"); + case MTX_ENV_DBLIST: return ("env dblist"); + case MTX_ENV_HANDLE: return ("env handle"); + case MTX_ENV_REGION: return ("env region"); + case MTX_LOCK_REGION: return ("lock region"); + case MTX_LOGICAL_LOCK: return ("logical lock"); + case MTX_LOG_FILENAME: return ("log filename"); + case MTX_LOG_FLUSH: return ("log flush"); + case MTX_LOG_HANDLE: return ("log handle"); + case MTX_LOG_REGION: return ("log region"); + case MTX_MPOOLFILE_HANDLE: return ("mpoolfile handle"); + case MTX_MPOOL_BH: return ("mpool buffer"); + case MTX_MPOOL_FH: return ("mpool filehandle"); + case MTX_MPOOL_FILE_BUCKET: return ("mpool file bucket"); + case MTX_MPOOL_HANDLE: return ("mpool handle"); + case MTX_MPOOL_HASH_BUCKET: return ("mpool hash bucket"); + case MTX_MPOOL_REGION: return ("mpool region"); + case MTX_MUTEX_REGION: return ("mutex region"); + case MTX_MUTEX_TEST: return ("mutex test"); + case MTX_REPMGR: return ("replication manager"); + case MTX_REP_CHKPT: return ("replication checkpoint"); + case MTX_REP_DATABASE: return ("replication database"); + case MTX_REP_DIAG: return ("replication diagnostics"); + case MTX_REP_EVENT: return ("replication event"); + case MTX_REP_REGION: return ("replication region"); + case MTX_REP_START: return ("replication role config"); + case MTX_REP_WAITER: return ("replication txn apply"); + case MTX_SEQUENCE: return ("sequence"); + case MTX_TWISTER: return ("twister"); + case MTX_TCL_EVENTS: return ("Tcl events"); + case MTX_TXN_ACTIVE: return ("txn active list"); + case MTX_TXN_CHKPT: return ("transaction checkpoint"); + case MTX_TXN_COMMIT: return ("txn commit"); + case MTX_TXN_MVCC: return ("txn mvcc"); + case MTX_TXN_REGION: return ("txn region"); + default: return ("unknown mutex type"); + /* NOTREACHED */ + } +} + +/* + * __mutex_set_wait_info -- + * Return mutex statistics. + * + * PUBLIC: void __mutex_set_wait_info + * PUBLIC: __P((ENV *, db_mutex_t, uintmax_t *, uintmax_t *)); + */ +void +__mutex_set_wait_info(env, mutex, waitp, nowaitp) + ENV *env; + db_mutex_t mutex; + uintmax_t *waitp, *nowaitp; +{ + DB_MUTEX *mutexp; + + if (mutex == MUTEX_INVALID) { + *waitp = 0; + *nowaitp = 0; + return; + } + mutexp = MUTEXP_SET(env, mutex); + + *waitp = mutexp->mutex_set_wait; + *nowaitp = mutexp->mutex_set_nowait; +} + +/* + * __mutex_clear -- + * Clear mutex statistics. + * + * PUBLIC: void __mutex_clear __P((ENV *, db_mutex_t)); + */ +void +__mutex_clear(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + DB_MUTEX *mutexp; + + if (!MUTEX_ON(env)) + return; + + mutexp = MUTEXP_SET(env, mutex); + + mutexp->mutex_set_wait = mutexp->mutex_set_nowait = 0; +#ifdef HAVE_SHARED_LATCHES + mutexp->mutex_set_rd_wait = mutexp->mutex_set_rd_nowait = 0; +#endif +#ifdef HAVE_MUTEX_HYBRID + mutexp->hybrid_wait = mutexp->hybrid_wakeup = 0; +#endif +} + +#else /* !HAVE_STATISTICS */ + +int +__mutex_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_MUTEX_STAT **statp; + u_int32_t flags; +{ + COMPQUIET(statp, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} + +int +__mutex_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} +#endif diff --git a/src/mutex/mut_stub.c b/src/mutex/mut_stub.c new file mode 100644 index 00000000..6b345502 --- /dev/null +++ b/src/mutex/mut_stub.c @@ -0,0 +1,252 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef HAVE_MUTEX_SUPPORT +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +/* + * If the library wasn't compiled with mutex support, various routines + * aren't available. Stub them here, returning an appropriate error. + */ +static int __db_nomutex __P((ENV *)); + +/* + * __db_nomutex -- + * Error when a Berkeley DB build doesn't include mutexes. + */ +static int +__db_nomutex(env) + ENV *env; +{ + __db_errx(env, DB_STR("2001", + "library build did not include support for mutexes")); + return (DB_OPNOTSUP); +} + +int +__mutex_alloc_pp(dbenv, flags, indxp) + DB_ENV *dbenv; + u_int32_t flags; + db_mutex_t *indxp; +{ + COMPQUIET(flags, 0); + COMPQUIET(indxp, NULL); + return (__db_nomutex(dbenv->env)); +} + +int +__mutex_alloc(env, alloc_id, flags, indxp) + ENV *env; + int alloc_id; + u_int32_t flags; + db_mutex_t *indxp; +{ + COMPQUIET(env, NULL); + COMPQUIET(alloc_id, 0); + COMPQUIET(flags, 0); + *indxp = MUTEX_INVALID; + return (0); +} + +void +__mutex_clear(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + COMPQUIET(env, NULL); + COMPQUIET(mutex, MUTEX_INVALID); +} + +int +__mutex_free_pp(dbenv, indx) + DB_ENV *dbenv; + db_mutex_t indx; +{ + COMPQUIET(indx, 0); + return (__db_nomutex(dbenv->env)); +} + +int +__mutex_free(env, indxp) + ENV *env; + db_mutex_t *indxp; +{ + COMPQUIET(env, NULL); + *indxp = MUTEX_INVALID; + return (0); +} + +int +__mutex_get_align(dbenv, alignp) + DB_ENV *dbenv; + u_int32_t *alignp; +{ + COMPQUIET(alignp, NULL); + return (__db_nomutex(dbenv->env)); +} + +int +__mutex_get_increment(dbenv, incrementp) + DB_ENV *dbenv; + u_int32_t *incrementp; +{ + COMPQUIET(incrementp, NULL); + return (__db_nomutex(dbenv->env)); +} + +int +__mutex_get_max(dbenv, maxp) + DB_ENV *dbenv; + u_int32_t *maxp; +{ + COMPQUIET(maxp, NULL); + return (__db_nomutex(dbenv->env)); +} + +int +__mutex_get_tas_spins(dbenv, tas_spinsp) + DB_ENV *dbenv; + u_int32_t *tas_spinsp; +{ + COMPQUIET(tas_spinsp, NULL); + return (__db_nomutex(dbenv->env)); +} + +int +__mutex_lock_pp(dbenv, indx) + DB_ENV *dbenv; + db_mutex_t indx; +{ + COMPQUIET(indx, 0); + return (__db_nomutex(dbenv->env)); +} + +void +__mutex_print_debug_single(env, tag, mutex, flags) + ENV *env; + const char *tag; + db_mutex_t mutex; + u_int32_t flags; +{ + COMPQUIET(env, NULL); + COMPQUIET(tag, NULL); + COMPQUIET(mutex, MUTEX_INVALID); + COMPQUIET(flags, 0); +} + +void +__mutex_print_debug_stats(env, mbp, mutex, flags) + ENV *env; + DB_MSGBUF *mbp; + db_mutex_t mutex; + u_int32_t flags; +{ + COMPQUIET(env, NULL); + COMPQUIET(mbp, NULL); + COMPQUIET(mutex, MUTEX_INVALID); + COMPQUIET(flags, 0); +} + +int +__mutex_set_align(dbenv, align) + DB_ENV *dbenv; + u_int32_t align; +{ + COMPQUIET(align, 0); + return (__db_nomutex(dbenv->env)); +} + +int +__mutex_set_increment(dbenv, increment) + DB_ENV *dbenv; + u_int32_t increment; +{ + COMPQUIET(increment, 0); + return (__db_nomutex(dbenv->env)); +} + +int +__mutex_get_init(dbenv, initp) + DB_ENV *dbenv; + u_int32_t *initp; +{ + COMPQUIET(initp, 0); + return (__db_nomutex(dbenv->env)); +} + +int +__mutex_set_init(dbenv, init) + DB_ENV *dbenv; + u_int32_t init; +{ + COMPQUIET(init, 0); + return (__db_nomutex(dbenv->env)); +} + +int +__mutex_set_max(dbenv, max) + DB_ENV *dbenv; + u_int32_t max; +{ + COMPQUIET(max, 0); + return (__db_nomutex(dbenv->env)); +} + +int +__mutex_set_tas_spins(dbenv, tas_spins) + DB_ENV *dbenv; + u_int32_t tas_spins; +{ + COMPQUIET(tas_spins, 0); + return (__db_nomutex(dbenv->env)); +} + +void +__mutex_set_wait_info(env, mutex, waitp, nowaitp) + ENV *env; + db_mutex_t mutex; + uintmax_t *waitp, *nowaitp; +{ + COMPQUIET(env, NULL); + COMPQUIET(mutex, MUTEX_INVALID); + *waitp = *nowaitp = 0; +} + +int +__mutex_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_MUTEX_STAT **statp; + u_int32_t flags; +{ + COMPQUIET(statp, NULL); + COMPQUIET(flags, 0); + return (__db_nomutex(dbenv->env)); +} + +int +__mutex_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + return (__db_nomutex(dbenv->env)); +} + +int +__mutex_unlock_pp(dbenv, indx) + DB_ENV *dbenv; + db_mutex_t indx; +{ + COMPQUIET(indx, 0); + return (__db_nomutex(dbenv->env)); +} +#endif /* !HAVE_MUTEX_SUPPORT */ diff --git a/src/mutex/mut_tas.c b/src/mutex/mut_tas.c new file mode 100644 index 00000000..cab86510 --- /dev/null +++ b/src/mutex/mut_tas.c @@ -0,0 +1,608 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/lock.h" + +static inline int __db_tas_mutex_lock_int + __P((ENV *, db_mutex_t, db_timeout_t, int)); +static inline int __db_tas_mutex_readlock_int __P((ENV *, db_mutex_t, int)); + +/* + * __db_tas_mutex_init -- + * Initialize a test-and-set mutex. + * + * PUBLIC: int __db_tas_mutex_init __P((ENV *, db_mutex_t, u_int32_t)); + */ +int +__db_tas_mutex_init(env, mutex, flags) + ENV *env; + db_mutex_t mutex; + u_int32_t flags; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + int ret; + +#ifndef HAVE_MUTEX_HYBRID + COMPQUIET(flags, 0); +#endif + + dbenv = env->dbenv; + mutexp = MUTEXP_SET(env, mutex); + + /* Check alignment. */ + if (((uintptr_t)mutexp & (dbenv->mutex_align - 1)) != 0) { + __db_errx(env, DB_STR("2028", + "TAS: mutex not appropriately aligned")); + return (EINVAL); + } + +#ifdef HAVE_SHARED_LATCHES + if (F_ISSET(mutexp, DB_MUTEX_SHARED)) + atomic_init(&mutexp->sharecount, 0); + else +#endif + if (MUTEX_INIT(&mutexp->tas)) { + ret = __os_get_syserr(); + __db_syserr(env, ret, DB_STR("2029", + "TAS: mutex initialize")); + return (__os_posix_err(ret)); + } +#ifdef HAVE_MUTEX_HYBRID + if ((ret = __db_pthread_mutex_init(env, + mutex, flags | DB_MUTEX_SELF_BLOCK)) != 0) + return (ret); +#endif + return (0); +} + +/* + * __db_tas_mutex_lock_int + * Internal function to lock a mutex, or just try to lock it without waiting + */ +inline static int +__db_tas_mutex_lock_int(env, mutex, timeout, nowait) + ENV *env; + db_mutex_t mutex; + db_timeout_t timeout; + int nowait; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + DB_THREAD_INFO *ip; + db_timespec now, timespec; + u_int32_t nspins; + int ret; +#ifdef HAVE_MUTEX_HYBRID + const u_long micros = 0; +#else + u_long micros, max_micros; + db_timeout_t time_left; +#endif + + dbenv = env->dbenv; + + if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); + + mtxmgr = env->mutex_handle; + mtxregion = mtxmgr->reginfo.primary; + mutexp = MUTEXP_SET(env, mutex); + + CHECK_MTX_THREAD(env, mutexp); + +#ifdef HAVE_STATISTICS + if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) + STAT_INC(env, mutex, set_wait, mutexp->mutex_set_wait, mutex); + else + STAT_INC(env, + mutex, set_nowait, mutexp->mutex_set_nowait, mutex); +#endif + +#ifndef HAVE_MUTEX_HYBRID + /* + * Wait 1ms initially, up to 10ms for mutexes backing logical database + * locks, and up to 25 ms for mutual exclusion data structure mutexes. + * SR: #7675 + */ + micros = 1000; + max_micros = F_ISSET(mutexp, DB_MUTEX_LOGICAL_LOCK) ? 10000 : 25000; +#endif + + /* Clear the ending timespec so it'll be initialed upon first need. */ + if (timeout != 0) + timespecclear(×pec); + + /* + * Only check the thread state once, by initializing the thread + * control block pointer to null. If it is not the failchk + * thread, then ip will have a valid value subsequent times + * in the loop. + */ + ip = NULL; + +loop: /* Attempt to acquire the resource for N spins. */ + for (nspins = + mtxregion->stat.st_mutex_tas_spins; nspins > 0; --nspins) { +#ifdef HAVE_MUTEX_S390_CC_ASSEMBLY + tsl_t zero; + + zero = 0; +#endif + +#ifdef HAVE_MUTEX_HPPA_MSEM_INIT + relock: +#endif + /* + * Avoid interlocked instructions until they're likely to + * succeed by first checking whether it is held + */ + if (MUTEXP_IS_BUSY(mutexp) || !MUTEXP_ACQUIRE(mutexp)) { + if (F_ISSET(dbenv, DB_ENV_FAILCHK) && + ip == NULL && dbenv->is_alive(dbenv, + mutexp->pid, mutexp->tid, 0) == 0) { + ret = __env_set_state(env, &ip, THREAD_VERIFY); + if (ret != 0 || + ip->dbth_state == THREAD_FAILCHK) + return (DB_RUNRECOVERY); + } + if (nowait) + return (DB_LOCK_NOTGRANTED); + /* + * Some systems (notably those with newer Intel CPUs) + * need a small pause here. [#6975] + */ + MUTEX_PAUSE + continue; + } + + MEMBAR_ENTER(); + +#ifdef HAVE_MUTEX_HPPA_MSEM_INIT + /* + * HP semaphores are unlocked automatically when a holding + * process exits. If the mutex appears to be locked + * (F_ISSET(DB_MUTEX_LOCKED)) but we got here, assume this + * has happened. Set the pid and tid into the mutex and + * lock again. (The default state of the mutexes used to + * block in __lock_get_internal is locked, so exiting with + * a locked mutex is reasonable behavior for a process that + * happened to initialize or use one of them.) + */ + if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) { + dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid); + goto relock; + } + /* + * If we make it here, the mutex isn't locked, the diagnostic + * won't fire, and we were really unlocked by someone calling + * the DB mutex unlock function. + */ +#endif +#ifdef DIAGNOSTIC + if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) { + char buf[DB_THREADID_STRLEN]; + __db_errx(env, DB_STR_A("2030", + "TAS lock failed: lock %ld currently in use: ID: %s", + "%ld %s"), (long)mutex, + dbenv->thread_id_string(dbenv, + mutexp->pid, mutexp->tid, buf)); + return (__env_panic(env, EACCES)); + } +#endif + F_SET(mutexp, DB_MUTEX_LOCKED); + dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid); + +#ifdef DIAGNOSTIC + /* + * We want to switch threads as often as possible. Yield + * every time we get a mutex to ensure contention. + */ + if (F_ISSET(dbenv, DB_ENV_YIELDCPU)) + __os_yield(env, 0, 0); +#endif + return (0); + } + + /* + * We need to wait for the lock to become available. + * Possibly setup timeouts if this is the first wait, or + * check expiration times for the second and subsequent waits. + */ + if (timeout != 0) { + /* Set the expiration time if this is the first sleep . */ + if (!timespecisset(×pec)) + __clock_set_expires(env, ×pec, timeout); + else { + timespecclear(&now); + if (__clock_expired(env, &now, ×pec)) + return (DB_TIMEOUT); +#ifndef HAVE_MUTEX_HYBRID + timespecsub(&now, ×pec); + DB_TIMESPEC_TO_TIMEOUT(time_left, &now, 0); + time_left = timeout - time_left; + if (micros > time_left) + micros = time_left; +#endif + } + } + + /* + * This yields for a while for tas mutexes, and just gives up the + * processor for hybrid mutexes. + * By yielding here we can get the other thread to give up the + * mutex before calling the more expensive library mutex call. + * Tests have shown this to be a big win when there is contention. + */ + PERFMON4(env, mutex, suspend, mutex, TRUE, mutexp->alloc_id, mutexp); + __os_yield(env, 0, micros); + PERFMON4(env, mutex, resume, mutex, TRUE, mutexp->alloc_id, mutexp); + +#if defined(HAVE_MUTEX_HYBRID) + if (!MUTEXP_IS_BUSY(mutexp)) + goto loop; + /* Wait until the mutex can be obtained exclusively or it times out. */ + if ((ret = __db_hybrid_mutex_suspend(env, + mutex, timeout == 0 ? NULL : ×pec, TRUE)) != 0) + return (ret); +#else + if ((micros <<= 1) > max_micros) + micros = max_micros; +#endif + + /* + * We're spinning. The environment might be hung, and somebody else + * has already recovered it. The first thing recovery does is panic + * the environment. Check to see if we're never going to get this + * mutex. + */ + PANIC_CHECK(env); + + goto loop; +} + +/* + * __db_tas_mutex_lock + * Lock on a mutex, blocking if necessary. + * + * PUBLIC: int __db_tas_mutex_lock __P((ENV *, db_mutex_t, db_timeout_t)); + */ +int +__db_tas_mutex_lock(env, mutex, timeout) + ENV *env; + db_mutex_t mutex; + db_timeout_t timeout; +{ + return (__db_tas_mutex_lock_int(env, mutex, timeout, 0)); +} + +/* + * __db_tas_mutex_trylock + * Try to exclusively lock a mutex without ever blocking - ever! + * + * Returns 0 on success, + * DB_LOCK_NOTGRANTED on timeout + * Possibly DB_RUNRECOVERY if DB_ENV_FAILCHK or panic. + * + * This will work for DB_MUTEX_SHARED, though it always tries + * for exclusive access. + * + * PUBLIC: int __db_tas_mutex_trylock __P((ENV *, db_mutex_t)); + */ +int +__db_tas_mutex_trylock(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + return (__db_tas_mutex_lock_int(env, mutex, 0, 1)); +} + +#if defined(HAVE_SHARED_LATCHES) +/* + * __db_tas_mutex_readlock_int + * Internal function to get a shared lock on a latch, blocking if necessary. + * + */ +static inline int +__db_tas_mutex_readlock_int(env, mutex, nowait) + ENV *env; + db_mutex_t mutex; + int nowait; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + DB_THREAD_INFO *ip; + int lock; + u_int32_t nspins; + int ret; +#ifndef HAVE_MUTEX_HYBRID + u_long micros, max_micros; +#endif + dbenv = env->dbenv; + + if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); + + mtxmgr = env->mutex_handle; + mtxregion = mtxmgr->reginfo.primary; + mutexp = MUTEXP_SET(env, mutex); + + CHECK_MTX_THREAD(env, mutexp); + + DB_ASSERT(env, F_ISSET(mutexp, DB_MUTEX_SHARED)); +#ifdef HAVE_STATISTICS + if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) + STAT_INC(env, + mutex, set_rd_wait, mutexp->mutex_set_rd_wait, mutex); + else + STAT_INC(env, + mutex, set_rd_nowait, mutexp->mutex_set_rd_nowait, mutex); +#endif + +#ifndef HAVE_MUTEX_HYBRID + /* + * Wait 1ms initially, up to 10ms for mutexes backing logical database + * locks, and up to 25 ms for mutual exclusion data structure mutexes. + * SR: #7675 + */ + micros = 1000; + max_micros = F_ISSET(mutexp, DB_MUTEX_LOGICAL_LOCK) ? 10000 : 25000; +#endif + +loop: /* Attempt to acquire the resource for N spins. */ + for (nspins = + mtxregion->stat.st_mutex_tas_spins; nspins > 0; --nspins) { + lock = atomic_read(&mutexp->sharecount); + if (lock == MUTEX_SHARE_ISEXCLUSIVE || + !atomic_compare_exchange(env, + &mutexp->sharecount, lock, lock + 1)) { + /* + * Some systems (notably those with newer Intel CPUs) + * need a small pause here. [#6975] + */ + MUTEX_PAUSE + continue; + } + + MEMBAR_ENTER(); + /* For shared lactches the threadid is the last requestor's id. + */ + dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid); + + return (0); + } + + /* + * Waiting for the latched must be avoided when it could allow a + * 'failchk'ing thread to hang. + */ + if (F_ISSET(dbenv, DB_ENV_FAILCHK) && + dbenv->is_alive(dbenv, mutexp->pid, mutexp->tid, 0) == 0) { + ret = __env_set_state(env, &ip, THREAD_VERIFY); + if (ret != 0 || ip->dbth_state == THREAD_FAILCHK) + return (DB_RUNRECOVERY); + } + + /* + * It is possible to spin out when the latch is just shared, due to + * many threads or interrupts interfering with the compare&exchange. + * Avoid spurious DB_LOCK_NOTGRANTED returns by retrying. + */ + if (nowait) { + if (atomic_read(&mutexp->sharecount) != MUTEX_SHARE_ISEXCLUSIVE) + goto loop; + return (DB_LOCK_NOTGRANTED); + } + + /* Wait for the lock to become available. */ +#ifdef HAVE_MUTEX_HYBRID + /* + * By yielding here we can get the other thread to give up the + * mutex before calling the more expensive library mutex call. + * Tests have shown this to be a big win when there is contention. + */ + PERFMON4(env, mutex, suspend, mutex, FALSE, mutexp->alloc_id, mutexp); + __os_yield(env, 0, 0); + PERFMON4(env, mutex, resume, mutex, FALSE, mutexp->alloc_id, mutexp); + if (atomic_read(&mutexp->sharecount) != MUTEX_SHARE_ISEXCLUSIVE) + goto loop; + /* Wait until the mutex is no longer exclusively locked. */ + if ((ret = __db_hybrid_mutex_suspend(env, mutex, NULL, FALSE)) != 0) + return (ret); +#else + PERFMON4(env, mutex, suspend, mutex, FALSE, mutexp->alloc_id, mutexp); + __os_yield(env, 0, micros); + PERFMON4(env, mutex, resume, mutex, FALSE, mutexp->alloc_id, mutexp); + if ((micros <<= 1) > max_micros) + micros = max_micros; +#endif + + /* + * We're spinning. The environment might be hung, and somebody else + * has already recovered it. The first thing recovery does is panic + * the environment. Check to see if we're never going to get this + * mutex. + */ + PANIC_CHECK(env); + + goto loop; +} + +/* + * __db_tas_mutex_readlock + * Get a shared lock on a latch, waiting if necessary. + * + * PUBLIC: #if defined(HAVE_SHARED_LATCHES) + * PUBLIC: int __db_tas_mutex_readlock __P((ENV *, db_mutex_t)); + * PUBLIC: #endif + */ +int +__db_tas_mutex_readlock(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + return (__db_tas_mutex_readlock_int(env, mutex, 0)); +} + +/* + * __db_tas_mutex_tryreadlock + * Try to get a shared lock on a latch; don't wait when busy. + * + * PUBLIC: #if defined(HAVE_SHARED_LATCHES) + * PUBLIC: int __db_tas_mutex_tryreadlock __P((ENV *, db_mutex_t)); + * PUBLIC: #endif + */ +int +__db_tas_mutex_tryreadlock(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + return (__db_tas_mutex_readlock_int(env, mutex, 1)); +} +#endif + +/* + * __db_tas_mutex_unlock -- + * Release a mutex. + * + * PUBLIC: int __db_tas_mutex_unlock __P((ENV *, db_mutex_t)); + * + * Hybrid shared latch wakeup + * When an exclusive requester waits for the last shared holder to + * release, it increments mutexp->wait and pthread_cond_wait()'s. The + * last shared unlock calls __db_pthread_mutex_unlock() to wake it. + */ +int +__db_tas_mutex_unlock(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; +#ifdef HAVE_MUTEX_HYBRID + int ret; +#ifdef MUTEX_DIAG + int waiters; +#endif +#endif +#ifdef HAVE_SHARED_LATCHES + int sharecount; +#endif + dbenv = env->dbenv; + + if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); + + mutexp = MUTEXP_SET(env, mutex); +#if defined(HAVE_MUTEX_HYBRID) && defined(MUTEX_DIAG) + waiters = mutexp->wait; +#endif + +#if defined(DIAGNOSTIC) +#if defined(HAVE_SHARED_LATCHES) + if (F_ISSET(mutexp, DB_MUTEX_SHARED)) { + if (atomic_read(&mutexp->sharecount) == 0) { + __db_errx(env, DB_STR_A("2031", + "shared unlock %ld already unlocked", "%ld"), + (long)mutex); + return (__env_panic(env, EACCES)); + } + } else +#endif + if (!F_ISSET(mutexp, DB_MUTEX_LOCKED)) { + __db_errx(env, DB_STR_A("2032", + "unlock %ld already unlocked", "%ld"), (long)mutex); + return (__env_panic(env, EACCES)); + } +#endif + +#ifdef HAVE_SHARED_LATCHES + if (F_ISSET(mutexp, DB_MUTEX_SHARED)) { + sharecount = atomic_read(&mutexp->sharecount); + /*MUTEX_MEMBAR(mutexp->sharecount);*/ /* XXX why? */ + if (sharecount == MUTEX_SHARE_ISEXCLUSIVE) { + F_CLR(mutexp, DB_MUTEX_LOCKED); + /* Flush flag update before zeroing count */ + MEMBAR_EXIT(); + atomic_init(&mutexp->sharecount, 0); + } else { + DB_ASSERT(env, sharecount > 0); + MEMBAR_EXIT(); + sharecount = atomic_dec(env, &mutexp->sharecount); + DB_ASSERT(env, sharecount >= 0); + if (sharecount > 0) + return (0); + } + } else +#endif + { + F_CLR(mutexp, DB_MUTEX_LOCKED); + MUTEX_UNSET(&mutexp->tas); + } + +#ifdef HAVE_MUTEX_HYBRID +#ifdef DIAGNOSTIC + if (F_ISSET(dbenv, DB_ENV_YIELDCPU)) + __os_yield(env, 0, 0); +#endif + + /* Prevent the load of wait from being hoisted before MUTEX_UNSET */ + MUTEX_MEMBAR(mutexp->flags); + if (mutexp->wait && + (ret = __db_pthread_mutex_unlock(env, mutex)) != 0) + return (ret); + +#ifdef MUTEX_DIAG + if (mutexp->wait) + printf("tas_unlock %ld %x waiters! busy %x waiters %d/%d\n", + mutex, pthread_self(), + MUTEXP_BUSY_FIELD(mutexp), waiters, mutexp->wait); +#endif +#endif + + return (0); +} + +/* + * __db_tas_mutex_destroy -- + * Destroy a mutex. + * + * PUBLIC: int __db_tas_mutex_destroy __P((ENV *, db_mutex_t)); + */ +int +__db_tas_mutex_destroy(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + DB_MUTEX *mutexp; +#ifdef HAVE_MUTEX_HYBRID + int ret; +#endif + + if (!MUTEX_ON(env)) + return (0); + + mutexp = MUTEXP_SET(env, mutex); + + MUTEX_DESTROY(&mutexp->tas); + +#ifdef HAVE_MUTEX_HYBRID + if ((ret = __db_pthread_mutex_destroy(env, mutex)) != 0) + return (ret); +#endif + + COMPQUIET(mutexp, NULL); /* MUTEX_DESTROY may not be defined. */ + return (0); +} diff --git a/src/mutex/mut_win32.c b/src/mutex/mut_win32.c new file mode 100644 index 00000000..a4ddcce6 --- /dev/null +++ b/src/mutex/mut_win32.c @@ -0,0 +1,589 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#define LOAD_ACTUAL_MUTEX_CODE +#include "db_int.h" + +#include "dbinc/atomic.h" +/* + * This is where we load in the actual mutex declarations. + */ +#include "dbinc/mutex_int.h" + +/* + * Common code to get an event handle. This is executed whenever a mutex + * blocks, or when unlocking a mutex that a thread is waiting on. We can't + * keep these handles around, since the mutex structure is in shared memory, + * and each process gets its own handle value. + * + * We pass security attributes so that the created event is accessible by all + * users, in case a Windows service is sharing an environment with a local + * process run as a different user. + */ +static _TCHAR hex_digits[] = _T("0123456789abcdef"); + +static __inline int get_handle(env, mutexp, eventp) + ENV *env; + DB_MUTEX *mutexp; + HANDLE *eventp; +{ + _TCHAR idbuf[] = _T("db.m00000000"); + _TCHAR *p = idbuf + 12; + int ret = 0; + u_int32_t id; + + for (id = (mutexp)->id; id != 0; id >>= 4) + *--p = hex_digits[id & 0xf]; + +#ifndef DB_WINCE + if (DB_GLOBAL(win_sec_attr) == NULL) { + InitializeSecurityDescriptor(&DB_GLOBAL(win_default_sec_desc), + SECURITY_DESCRIPTOR_REVISION); + SetSecurityDescriptorDacl(&DB_GLOBAL(win_default_sec_desc), + TRUE, 0, FALSE); + DB_GLOBAL(win_default_sec_attr).nLength = + sizeof(SECURITY_ATTRIBUTES); + DB_GLOBAL(win_default_sec_attr).bInheritHandle = FALSE; + DB_GLOBAL(win_default_sec_attr).lpSecurityDescriptor = + &DB_GLOBAL(win_default_sec_desc); + DB_GLOBAL(win_sec_attr) = &DB_GLOBAL(win_default_sec_attr); + } +#endif + + if ((*eventp = CreateEvent(DB_GLOBAL(win_sec_attr), + FALSE, FALSE, idbuf)) == NULL) { + ret = __os_get_syserr(); + __db_syserr(env, ret, DB_STR("2002", + "Win32 create event failed")); + } + + return (ret); +} + +/* + * __db_win32_mutex_lock_int + * Internal function to lock a win32 mutex + * + * If the wait paramter is 0, this function will return DB_LOCK_NOTGRANTED + * rather than wait. + * + */ +static __inline int +__db_win32_mutex_lock_int(env, mutex, timeout, wait) + ENV *env; + db_mutex_t mutex; + db_timeout_t timeout; + int wait; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + DB_THREAD_INFO *ip; + HANDLE event; + u_int32_t ms, nspins; + db_timespec now, tempspec, timeoutspec; + db_timeout_t time_left; + int ret; +#ifdef MUTEX_DIAG + LARGE_INTEGER now; +#endif + dbenv = env->dbenv; + + if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); + + mtxmgr = env->mutex_handle; + mtxregion = mtxmgr->reginfo.primary; + mutexp = MUTEXP_SET(env, mutex); + + CHECK_MTX_THREAD(env, mutexp); + + if (timeout != 0) { + timespecclear(&timeoutspec); + __clock_set_expires(env, &timeoutspec, timeout); + } + + /* + * See WINCE_ATOMIC_MAGIC definition for details. + * Use sharecount, because the value just needs to be a db_atomic_t + * memory mapped onto the same page as those being Interlocked*. + */ + WINCE_ATOMIC_MAGIC(&mutexp->sharecount); + + event = NULL; + ms = 50; + ret = 0; + + /* + * Only check the thread state once, by initializing the thread + * control block pointer to null. If it is not the failchk + * thread, then ip will have a valid value subsequent times + * in the loop. + */ + ip = NULL; + +loop: /* Attempt to acquire the mutex mutex_tas_spins times, if waiting. */ + for (nspins = + mtxregion->stat.st_mutex_tas_spins; nspins > 0; --nspins) { + /* + * We can avoid the (expensive) interlocked instructions if + * the mutex is already busy. + */ + if (MUTEXP_IS_BUSY(mutexp) || !MUTEXP_ACQUIRE(mutexp)) { + if (F_ISSET(dbenv, DB_ENV_FAILCHK) && + ip == NULL && dbenv->is_alive(dbenv, + mutexp->pid, mutexp->tid, 0) == 0) { + ret = __env_set_state(env, &ip, THREAD_VERIFY); + if (ret != 0 || + ip->dbth_state == THREAD_FAILCHK) + return (DB_RUNRECOVERY); + } + if (!wait) + return (DB_LOCK_NOTGRANTED); + /* + * Some systems (notably those with newer Intel CPUs) + * need a small pause before retrying. [#6975] + */ + MUTEX_PAUSE + continue; + } + +#ifdef DIAGNOSTIC + if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) { + char buf[DB_THREADID_STRLEN]; + __db_errx(env, DB_STR_A("2003", + "Win32 lock failed: mutex already locked by %s", + "%s"), dbenv->thread_id_string(dbenv, + mutexp->pid, mutexp->tid, buf)); + return (__env_panic(env, EACCES)); + } +#endif + F_SET(mutexp, DB_MUTEX_LOCKED); + dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid); + +#ifdef HAVE_STATISTICS + if (event == NULL) + ++mutexp->mutex_set_nowait; + else + ++mutexp->mutex_set_wait; +#endif + if (event != NULL) { + CloseHandle(event); + InterlockedDecrement(&mutexp->nwaiters); +#ifdef MUTEX_DIAG + if (ret != WAIT_OBJECT_0) { + QueryPerformanceCounter(&diag_now); + printf(DB_STR_A("2004", + "[%I64d]: Lost signal on mutex %p, " + "id %d, ms %d\n", "%I64d %p %d %d"), + diag_now.QuadPart, mutexp, mutexp->id, ms); + } +#endif + } + +#ifdef DIAGNOSTIC + /* + * We want to switch threads as often as possible. Yield + * every time we get a mutex to ensure contention. + */ + if (F_ISSET(dbenv, DB_ENV_YIELDCPU)) + __os_yield(env, 0, 0); +#endif + + return (0); + } + + /* + * Yield the processor; wait 50 ms initially, up to 1 second. This + * loop is needed to work around a race where the signal from the + * unlocking thread gets lost. We start at 50 ms because it's unlikely + * to happen often and we want to avoid wasting CPU. + */ + if (timeout != 0) { + timespecclear(&now); + if (__clock_expired(env, &now, &timeoutspec)) { + if (event != NULL) { + CloseHandle(event); + InterlockedDecrement(&mutexp->nwaiters); + } + return (DB_TIMEOUT); + } + /* Reduce the event wait if the timeout would happen first. */ + tempspec = timeoutspec; + timespecsub(&tempspec, &now); + DB_TIMESPEC_TO_TIMEOUT(time_left, &tempspec, 0); + time_left /= US_PER_MS; + if (ms > time_left) + ms = time_left; + } + if (event == NULL) { +#ifdef MUTEX_DIAG + QueryPerformanceCounter(&diag_now); + printf(DB_STR_A("2005", + "[%I64d]: Waiting on mutex %p, id %d\n", + "%I64d %p %d"), diag_now.QuadPart, mutexp, mutexp->id); +#endif + InterlockedIncrement(&mutexp->nwaiters); + if ((ret = get_handle(env, mutexp, &event)) != 0) + goto err; + } + if ((ret = WaitForSingleObject(event, ms)) == WAIT_FAILED) { + ret = __os_get_syserr(); + goto err; + } + if ((ms <<= 1) > MS_PER_SEC) + ms = MS_PER_SEC; + + PANIC_CHECK(env); + goto loop; + +err: __db_syserr(env, ret, DB_STR("2006", "Win32 lock failed")); + return (__env_panic(env, __os_posix_err(ret))); +} + +/* + * __db_win32_mutex_init -- + * Initialize a Win32 mutex. + * + * PUBLIC: int __db_win32_mutex_init __P((ENV *, db_mutex_t, u_int32_t)); + */ +int +__db_win32_mutex_init(env, mutex, flags) + ENV *env; + db_mutex_t mutex; + u_int32_t flags; +{ + DB_MUTEX *mutexp; + + mutexp = MUTEXP_SET(env, mutex); + mutexp->id = ((getpid() & 0xffff) << 16) ^ P_TO_UINT32(mutexp); + F_SET(mutexp, flags); + + return (0); +} + +/* + * __db_win32_mutex_lock + * Lock on a mutex, blocking if necessary. + * + * PUBLIC: int __db_win32_mutex_lock __P((ENV *, db_mutex_t, db_timeout_t)); + */ +int +__db_win32_mutex_lock(env, mutex, timeout) + ENV *env; + db_mutex_t mutex; + db_timeout_t timeout; +{ + return (__db_win32_mutex_lock_int(env, mutex, timeout, 1)); +} + +/* + * __db_win32_mutex_trylock + * Try to lock a mutex, returning without waiting if it is busy + * + * PUBLIC: int __db_win32_mutex_trylock __P((ENV *, db_mutex_t)); + */ +int +__db_win32_mutex_trylock(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + return (__db_win32_mutex_lock_int(env, mutex, 0)); +} + +#if defined(HAVE_SHARED_LATCHES) +/* + * __db_win32_mutex_readlock_int + * Try to lock a mutex, possibly waiting if requested and necessary. + */ +int +__db_win32_mutex_readlock_int(env, mutex, nowait) + ENV *env; + db_mutex_t mutex; + int nowait; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + HANDLE event; + u_int32_t nspins; + int ms, ret; + long exch_ret, mtx_val; +#ifdef MUTEX_DIAG + LARGE_INTEGER diag_now; +#endif + dbenv = env->dbenv; + + if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); + + mtxmgr = env->mutex_handle; + mtxregion = mtxmgr->reginfo.primary; + mutexp = MUTEXP_SET(env, mutex); + + CHECK_MTX_THREAD(env, mutexp); + + /* + * See WINCE_ATOMIC_MAGIC definition for details. + * Use sharecount, because the value just needs to be a db_atomic_t + * memory mapped onto the same page as those being Interlocked*. + */ + WINCE_ATOMIC_MAGIC(&mutexp->sharecount); + + event = NULL; + ms = 50; + ret = 0; + /* + * This needs to be initialized, since if mutexp->tas + * is write locked on the first pass, it needs a value. + */ + exch_ret = 0; + +loop: /* Attempt to acquire the resource for N spins. */ + for (nspins = + mtxregion->stat.st_mutex_tas_spins; nspins > 0; --nspins) { + /* + * We can avoid the (expensive) interlocked instructions if + * the mutex is already "set". + */ +retry: mtx_val = atomic_read(&mutexp->sharecount); + if (mtx_val == MUTEX_SHARE_ISEXCLUSIVE) { + if (nowait) + return (DB_LOCK_NOTGRANTED); + + continue; + } else if (!atomic_compare_exchange(env, &mutexp->sharecount, + mtx_val, mtx_val + 1)) { + /* + * Some systems (notably those with newer Intel CPUs) + * need a small pause here. [#6975] + */ + MUTEX_PAUSE + goto retry; + } + +#ifdef HAVE_STATISTICS + if (event == NULL) + ++mutexp->mutex_set_rd_nowait; + else + ++mutexp->mutex_set_rd_wait; +#endif + if (event != NULL) { + CloseHandle(event); + InterlockedDecrement(&mutexp->nwaiters); +#ifdef MUTEX_DIAG + if (ret != WAIT_OBJECT_0) { + QueryPerformanceCounter(&diag_now); + printf(DB_STR_A("2007", + "[%I64d]: Lost signal on mutex %p, " + "id %d, ms %d\n", "%I64d %p %d %d"), + diag_now.QuadPart, mutexp, mutexp->id, ms); + } +#endif + } + +#ifdef DIAGNOSTIC + /* + * We want to switch threads as often as possible. Yield + * every time we get a mutex to ensure contention. + */ + if (F_ISSET(dbenv, DB_ENV_YIELDCPU)) + __os_yield(env, 0, 0); +#endif + + return (0); + } + + /* + * Yield the processor; wait 50 ms initially, up to 1 second. This + * loop is needed to work around a race where the signal from the + * unlocking thread gets lost. We start at 50 ms because it's unlikely + * to happen often and we want to avoid wasting CPU. + */ + if (event == NULL) { +#ifdef MUTEX_DIAG + QueryPerformanceCounter(&diag_now); + printf(DB_STR_A("2008", + "[%I64d]: Waiting on mutex %p, id %d\n", + "%I64d %p %d"), diag_now.QuadPart, mutexp, mutexp->id); +#endif + InterlockedIncrement(&mutexp->nwaiters); + if ((ret = get_handle(env, mutexp, &event)) != 0) + goto err; + } + if ((ret = WaitForSingleObject(event, ms)) == WAIT_FAILED) { + ret = __os_get_syserr(); + goto err; + } + if ((ms <<= 1) > MS_PER_SEC) + ms = MS_PER_SEC; + + PANIC_CHECK(env); + goto loop; + +err: __db_syserr(env, ret, DB_STR("2009", + "Win32 read lock failed")); + return (__env_panic(env, __os_posix_err(ret))); +} + +/* + * __db_win32_mutex_readlock + * Get a shared lock on a latch + * + * PUBLIC: #if defined(HAVE_SHARED_LATCHES) + * PUBLIC: int __db_win32_mutex_readlock __P((ENV *, db_mutex_t)); + * PUBLIC: #endif + */ +int +__db_win32_mutex_readlock(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + return (__db_win32_mutex_readlock_int(env, mutex, 0)); +} + +/* + * __db_win32_mutex_tryreadlock + * Try to a shared lock on a latch + * + * PUBLIC: #if defined(HAVE_SHARED_LATCHES) + * PUBLIC: int __db_win32_mutex_tryreadlock __P((ENV *, db_mutex_t)); + * PUBLIC: #endif + */ +int +__db_win32_mutex_tryreadlock(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + return (__db_win32_mutex_readlock_int(env, mutex, 1)); +} +#endif + +/* + * __db_win32_mutex_unlock -- + * Release a mutex. + * + * PUBLIC: int __db_win32_mutex_unlock __P((ENV *, db_mutex_t)); + */ +int +__db_win32_mutex_unlock(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + DB_ENV *dbenv; + DB_MUTEX *mutexp; + HANDLE event; + int ret; +#ifdef MUTEX_DIAG + LARGE_INTEGER diag_now; +#endif + dbenv = env->dbenv; + + if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); + + mutexp = MUTEXP_SET(env, mutex); + +#ifdef DIAGNOSTIC + if (!MUTEXP_IS_BUSY(mutexp) || !(F_ISSET(mutexp, DB_MUTEX_SHARED) || + F_ISSET(mutexp, DB_MUTEX_LOCKED))) { + __db_errx(env, DB_STR_A("2010", + "Win32 unlock failed: lock already unlocked: mutex %d busy %d", + "%d %d"), mutex, MUTEXP_BUSY_FIELD(mutexp)); + return (__env_panic(env, EACCES)); + } +#endif + /* + * If we have a shared latch, and a read lock (DB_MUTEX_LOCKED is only + * set for write locks), then decrement the latch. If the readlock is + * still held by other threads, just return. Otherwise go ahead and + * notify any waiting threads. + */ +#ifdef HAVE_SHARED_LATCHES + if (F_ISSET(mutexp, DB_MUTEX_SHARED)) { + if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) { + F_CLR(mutexp, DB_MUTEX_LOCKED); + if ((ret = InterlockedExchange( + (interlocked_val)(&atomic_read( + &mutexp->sharecount)), 0)) != + MUTEX_SHARE_ISEXCLUSIVE) { + ret = DB_RUNRECOVERY; + goto err; + } + } else if (InterlockedDecrement( + (interlocked_val)(&atomic_read(&mutexp->sharecount))) > 0) + return (0); + } else +#endif + { + F_CLR(mutexp, DB_MUTEX_LOCKED); + MUTEX_UNSET(&mutexp->tas); + } + + if (mutexp->nwaiters > 0) { + if ((ret = get_handle(env, mutexp, &event)) != 0) + goto err; + +#ifdef MUTEX_DIAG + QueryPerformanceCounter(&diag_now); + printf(DB_STR_A("2011", + "[%I64d]: Signalling mutex %p, id %d\n", + "%I64d %p %d"), diag_now.QuadPart, mutexp, mutexp->id); +#endif + if (!PulseEvent(event)) { + ret = __os_get_syserr(); + CloseHandle(event); + goto err; + } + + CloseHandle(event); + } + + return (0); + +err: __db_syserr(env, ret, DB_STR("2012", "Win32 unlock failed")); + return (__env_panic(env, __os_posix_err(ret))); +} + +/* + * __db_win32_mutex_destroy -- + * Destroy a mutex. + * + * PUBLIC: int __db_win32_mutex_destroy __P((ENV *, db_mutex_t)); + */ +int +__db_win32_mutex_destroy(env, mutex) + ENV *env; + db_mutex_t mutex; +{ + return (0); +} + +#ifndef DB_WINCE +/* + * db_env_set_win_security + * + * Set the SECURITY_ATTRIBUTES to be used by BDB on Windows. + * It should not be called while any BDB mutexes are locked. + * + * EXTERN: #if defined(DB_WIN32) && !defined(DB_WINCE) + * EXTERN: int db_env_set_win_security __P((SECURITY_ATTRIBUTES *sa)); + * EXTERN: #endif + */ +int +db_env_set_win_security(sa) + SECURITY_ATTRIBUTES *sa; +{ + DB_GLOBAL(win_sec_attr) = sa; + return (0); +} +#endif diff --git a/src/mutex/test_mutex.c b/src/mutex/test_mutex.c new file mode 100644 index 00000000..b0aa6418 --- /dev/null +++ b/src/mutex/test_mutex.c @@ -0,0 +1,1051 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * Standalone mutex tester for Berkeley DB mutexes. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifdef DB_WIN32 +#define MUTEX_THREAD_TEST 1 + +extern int getopt(int, char * const *, const char *); + +typedef HANDLE os_pid_t; +typedef HANDLE os_thread_t; + +#define os_thread_create(thrp, attr, func, arg) \ + (((*(thrp) = CreateThread(NULL, 0, \ + (LPTHREAD_START_ROUTINE)(func), (arg), 0, NULL)) == NULL) ? -1 : 0) +#define os_thread_join(thr, statusp) \ + ((WaitForSingleObject((thr), INFINITE) == WAIT_OBJECT_0) && \ + GetExitCodeThread((thr), (LPDWORD)(statusp)) ? 0 : -1) +#define os_thread_self() GetCurrentThreadId() + +#else /* !DB_WIN32 */ + +#include + +typedef pid_t os_pid_t; + +/* + * There's only one mutex implementation that can't support thread-level + * locking: UNIX/fcntl mutexes. + * + * The general Berkeley DB library configuration doesn't look for the POSIX + * pthread functions, with one exception -- pthread_yield. + * + * Use these two facts to decide if we're going to build with or without + * threads. + */ +#if !defined(HAVE_MUTEX_FCNTL) && defined(HAVE_PTHREAD_YIELD) +#define MUTEX_THREAD_TEST 1 + +#include + +typedef pthread_t os_thread_t; + +#define os_thread_create(thrp, attr, func, arg) \ + pthread_create((thrp), (attr), (func), (arg)) +#define os_thread_join(thr, statusp) pthread_join((thr), (statusp)) +#define os_thread_self() pthread_self() +#endif /* HAVE_PTHREAD_YIELD */ +#endif /* !DB_WIN32 */ + +#define OS_BAD_PID ((os_pid_t)-1) + +#define TESTDIR "TESTDIR" /* Working area */ +#define MT_FILE "TESTDIR/mutex.file" +#define MT_FILE_QUIT "TESTDIR/mutex.file.quit" + +/* + * The backing data layout: + * TM[1] per-thread mutex array lock + * TM[nthreads] per-thread mutex array + * TM[maxlocks] per-lock mutex array + */ +typedef struct { + db_mutex_t mutex; /* Mutex. */ + u_long id; /* Holder's ID. */ + u_int wakeme; /* Request to awake. */ +} TM; + +DB_ENV *dbenv; /* Backing environment */ +ENV *env; +size_t len; /* Backing data chunk size. */ + +u_int8_t *gm_addr; /* Global mutex */ +u_int8_t *lm_addr; /* Locker mutexes */ +u_int8_t *tm_addr; /* Thread mutexes */ + +#ifdef MUTEX_THREAD_TEST +os_thread_t *kidsp; /* Locker threads */ +os_thread_t wakep; /* Wakeup thread */ +#endif + +#ifndef HAVE_MMAP +u_int nprocs = 1; /* -p: Processes. */ +u_int nthreads = 20; /* -t: Threads. */ +#elif MUTEX_THREAD_TEST +u_int nprocs = 5; /* -p: Processes. */ +u_int nthreads = 4; /* -t: Threads. */ +#else +u_int nprocs = 20; /* -p: Processes. */ +u_int nthreads = 1; /* -t: Threads. */ +#endif + +u_int maxlocks = 20; /* -l: Backing locks. */ +u_int nlocks = 10000; /* -n: Locks per process. */ +int verbose; /* -v: Verbosity. */ + +const char *progname; + +void data_off(u_int8_t *, DB_FH *); +void data_on(u_int8_t **, u_int8_t **, u_int8_t **, DB_FH **, int); +int locker_start(u_long); +int locker_wait(void); +os_pid_t os_spawn(const char *, char *const[]); +int os_wait(os_pid_t *, u_int); +void *run_lthread(void *); +void *run_wthread(void *); +os_pid_t spawn_proc(u_long, char *, char *); +void tm_env_close(void); +int tm_env_init(void); +void tm_mutex_destroy(void); +void tm_mutex_init(void); +void tm_mutex_stats(void); +int usage(void); +int wakeup_start(u_long); +int wakeup_wait(void); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + enum {LOCKER, WAKEUP, PARENT} rtype; + extern int optind; + extern char *optarg; + os_pid_t wakeup_pid, *pids; + u_long id; + u_int i; + DB_FH *fhp, *map_fhp; + int ch, err; + char *p, *tmpath, cmd[1024]; + + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + rtype = PARENT; + id = 0; + tmpath = argv[0]; + while ((ch = getopt(argc, argv, "l:n:p:T:t:v")) != EOF) + switch (ch) { + case 'l': + maxlocks = (u_int)atoi(optarg); + break; + case 'n': + nlocks = (u_int)atoi(optarg); + break; + case 'p': + nprocs = (u_int)atoi(optarg); + break; + case 't': + if ((nthreads = (u_int)atoi(optarg)) == 0) + nthreads = 1; +#if !defined(MUTEX_THREAD_TEST) + if (nthreads != 1) { + fprintf(stderr, + "%s: thread support not available or not compiled for this platform.\n", + progname); + return (EXIT_FAILURE); + } +#endif + break; + case 'T': + if (!memcmp(optarg, "locker", sizeof("locker") - 1)) + rtype = LOCKER; + else if ( + !memcmp(optarg, "wakeup", sizeof("wakeup") - 1)) + rtype = WAKEUP; + else + return (usage()); + if ((p = strchr(optarg, '=')) == NULL) + return (usage()); + id = (u_long)atoi(p + 1); + break; + case 'v': + verbose = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + /* + * If we're not running a multi-process test, we should be running + * a multi-thread test. + */ + if (nprocs == 1 && nthreads == 1) { + fprintf(stderr, + "%s: running in a single process requires multiple threads\n", + progname); + return (EXIT_FAILURE); + } + + len = sizeof(TM) * (1 + nthreads * nprocs + maxlocks); + + /* + * In the multi-process test, the parent spawns processes that exec + * the original binary, ending up here. Each process joins the DB + * environment separately and then calls the supporting function. + */ + if (rtype == LOCKER || rtype == WAKEUP) { + __os_yield(env, 3, 0); /* Let everyone catch up. */ + /* Initialize random numbers. */ + srand((u_int)time(NULL) % (u_int)getpid()); + + if (tm_env_init() != 0) /* Join the environment. */ + exit(EXIT_FAILURE); + /* Join the backing data. */ + data_on(&gm_addr, &tm_addr, &lm_addr, &map_fhp, 0); + if (verbose) + printf( + "Backing file: global (%#lx), threads (%#lx), locks (%#lx)\n", + (u_long)gm_addr, (u_long)tm_addr, (u_long)lm_addr); + + if ((rtype == LOCKER ? + locker_start(id) : wakeup_start(id)) != 0) + exit(EXIT_FAILURE); + if ((rtype == LOCKER ? locker_wait() : wakeup_wait()) != 0) + exit(EXIT_FAILURE); + + data_off(gm_addr, map_fhp); /* Detach from backing data. */ + + tm_env_close(); /* Detach from environment. */ + + exit(EXIT_SUCCESS); + } + + /* + * The following code is only executed by the original parent process. + * + * Clean up from any previous runs. + */ + snprintf(cmd, sizeof(cmd), "rm -rf %s", TESTDIR); + (void)system(cmd); + snprintf(cmd, sizeof(cmd), "mkdir %s", TESTDIR); + (void)system(cmd); + + printf( + "%s: %u processes, %u threads/process, %u lock requests from %u locks\n", + progname, nprocs, nthreads, nlocks, maxlocks); + printf("%s: backing data %lu bytes\n", progname, (u_long)len); + + if (tm_env_init() != 0) /* Create the environment. */ + exit(EXIT_FAILURE); + /* Create the backing data. */ + data_on(&gm_addr, &tm_addr, &lm_addr, &map_fhp, 1); + if (verbose) + printf( + "backing data: global (%#lx), threads (%#lx), locks (%#lx)\n", + (u_long)gm_addr, (u_long)tm_addr, (u_long)lm_addr); + + tm_mutex_init(); /* Initialize mutexes. */ + + if (nprocs > 1) { /* Run the multi-process test. */ + /* Allocate array of locker process IDs. */ + if ((pids = calloc(nprocs, sizeof(os_pid_t))) == NULL) { + fprintf(stderr, "%s: %s\n", progname, strerror(errno)); + goto fail; + } + + /* Spawn locker processes and threads. */ + for (i = 0; i < nprocs; ++i) { + if ((pids[i] = + spawn_proc(id, tmpath, "locker")) == OS_BAD_PID) { + fprintf(stderr, + "%s: failed to spawn a locker\n", progname); + goto fail; + } + id += nthreads; + } + + /* Spawn wakeup process/thread. */ + if ((wakeup_pid = + spawn_proc(id, tmpath, "wakeup")) == OS_BAD_PID) { + fprintf(stderr, + "%s: failed to spawn waker\n", progname); + goto fail; + } + ++id; + + /* Wait for all lockers to exit. */ + if ((err = os_wait(pids, nprocs)) != 0) { + fprintf(stderr, "%s: locker wait failed with %d\n", + progname, err); + goto fail; + } + + /* Signal wakeup process to exit. */ + if ((err = __os_open( + env, MT_FILE_QUIT, 0, DB_OSO_CREATE, 0664, &fhp)) != 0) { + fprintf(stderr, + "%s: open %s\n", progname, db_strerror(err)); + goto fail; + } + (void)__os_closehandle(env, fhp); + + /* Wait for wakeup process/thread. */ + if ((err = os_wait(&wakeup_pid, 1)) != 0) { + fprintf(stderr, "%s: %lu: exited %d\n", + progname, (u_long)wakeup_pid, err); + goto fail; + } + } else { /* Run the single-process test. */ + /* Spawn locker threads. */ + if (locker_start(0) != 0) + goto fail; + + /* Spawn wakeup thread. */ + if (wakeup_start(nthreads) != 0) + goto fail; + + /* Wait for all lockers to exit. */ + if (locker_wait() != 0) + goto fail; + + /* Signal wakeup process to exit. */ + if ((err = __os_open( + env, MT_FILE_QUIT, 0, DB_OSO_CREATE, 0664, &fhp)) != 0) { + fprintf(stderr, + "%s: open %s\n", progname, db_strerror(err)); + goto fail; + } + (void)__os_closehandle(env, fhp); + + /* Wait for wakeup thread. */ + if (wakeup_wait() != 0) + goto fail; + } + + tm_mutex_stats(); /* Display run statistics. */ + tm_mutex_destroy(); /* Destroy mutexes. */ + + data_off(gm_addr, map_fhp); /* Detach from backing data. */ + + tm_env_close(); /* Detach from environment. */ + + printf("%s: test succeeded\n", progname); + return (EXIT_SUCCESS); + +fail: printf("%s: FAILED!\n", progname); + return (EXIT_FAILURE); +} + +int +locker_start(id) + u_long id; +{ +#if defined(MUTEX_THREAD_TEST) + u_int i; + int err; + + /* + * Spawn off threads. We have nthreads all locking and going to + * sleep, and one other thread cycling through and waking them up. + */ + if ((kidsp = + (os_thread_t *)calloc(sizeof(os_thread_t), nthreads)) == NULL) { + fprintf(stderr, "%s: %s\n", progname, strerror(errno)); + return (1); + } + for (i = 0; i < nthreads; i++) + if ((err = os_thread_create( + &kidsp[i], NULL, run_lthread, (void *)(id + i))) != 0) { + fprintf(stderr, "%s: failed spawning thread: %s\n", + progname, db_strerror(err)); + return (1); + } + return (0); +#else + return (run_lthread((void *)id) == NULL ? 0 : 1); +#endif +} + +int +locker_wait() +{ +#if defined(MUTEX_THREAD_TEST) + u_int i; + void *retp; + + /* Wait for the threads to exit. */ + for (i = 0; i < nthreads; i++) { + (void)os_thread_join(kidsp[i], &retp); + if (retp != NULL) { + fprintf(stderr, + "%s: thread exited with error\n", progname); + return (1); + } + } + free(kidsp); +#endif + return (0); +} + +void * +run_lthread(arg) + void *arg; +{ + TM *gp, *mp, *tp; + u_long id, tid; + u_int lock, nl; + int err, i; + + id = (u_long)arg; +#if defined(MUTEX_THREAD_TEST) + tid = (u_long)os_thread_self(); +#else + tid = 0; +#endif + printf("Locker: ID %03lu (PID: %lu; TID: %lx)\n", + id, (u_long)getpid(), tid); + + gp = (TM *)gm_addr; + tp = (TM *)(tm_addr + id * sizeof(TM)); + + for (nl = nlocks; nl > 0;) { + /* Select and acquire a data lock. */ + lock = (u_int)rand() % maxlocks; + mp = (TM *)(lm_addr + lock * sizeof(TM)); + if (verbose) + printf("%03lu: lock %d (mtx: %lu)\n", + id, lock, (u_long)mp->mutex); + + if ((err = dbenv->mutex_lock(dbenv, mp->mutex)) != 0) { + fprintf(stderr, "%s: %03lu: never got lock %d: %s\n", + progname, id, lock, db_strerror(err)); + return ((void *)1); + } + if (mp->id != 0) { + fprintf(stderr, + "%s: RACE! (%03lu granted lock %d held by %03lu)\n", + progname, id, lock, mp->id); + return ((void *)1); + } + mp->id = id; + + /* + * Pretend to do some work, periodically checking to see if + * we still hold the mutex. + */ + for (i = 0; i < 3; ++i) { + __os_yield(env, 0, (u_long)rand() % 3); + if (mp->id != id) { + fprintf(stderr, + "%s: RACE! (%03lu stole lock %d from %03lu)\n", + progname, mp->id, lock, id); + return ((void *)1); + } + } + + /* + * Test self-blocking and unlocking by other threads/processes: + * + * acquire the global lock + * set our wakeup flag + * release the global lock + * acquire our per-thread lock + * + * The wakeup thread will wake us up. + */ + if ((err = dbenv->mutex_lock(dbenv, gp->mutex)) != 0) { + fprintf(stderr, "%s: %03lu: global lock: %s\n", + progname, id, db_strerror(err)); + return ((void *)1); + } + if (tp->id != 0 && tp->id != id) { + fprintf(stderr, + "%s: %03lu: per-thread mutex isn't mine, owned by %03lu\n", + progname, id, tp->id); + return ((void *)1); + } + tp->id = id; + if (verbose) + printf("%03lu: self-blocking (mtx: %lu)\n", + id, (u_long)tp->mutex); + if (tp->wakeme) { + fprintf(stderr, + "%s: %03lu: wakeup flag incorrectly set\n", + progname, id); + return ((void *)1); + } + tp->wakeme = 1; + if ((err = dbenv->mutex_unlock(dbenv, gp->mutex)) != 0) { + fprintf(stderr, + "%s: %03lu: global unlock: %s\n", + progname, id, db_strerror(err)); + return ((void *)1); + } + if ((err = dbenv->mutex_lock(dbenv, tp->mutex)) != 0) { + fprintf(stderr, "%s: %03lu: per-thread lock: %s\n", + progname, id, db_strerror(err)); + return ((void *)1); + } + /* Time passes... */ + if (tp->wakeme) { + fprintf(stderr, "%s: %03lu: wakeup flag not cleared\n", + progname, id); + return ((void *)1); + } + + if (verbose) + printf("%03lu: release %d (mtx: %lu)\n", + id, lock, (u_long)mp->mutex); + + /* Release the data lock. */ + mp->id = 0; + if ((err = dbenv->mutex_unlock(dbenv, mp->mutex)) != 0) { + fprintf(stderr, + "%s: %03lu: lock release: %s\n", + progname, id, db_strerror(err)); + return ((void *)1); + } + + if (--nl % 1000 == 0) + printf("%03lu: %d\n", id, nl); + } + + return (NULL); +} + +int +wakeup_start(id) + u_long id; +{ +#if defined(MUTEX_THREAD_TEST) + int err; + + /* + * Spawn off wakeup thread. + */ + if ((err = os_thread_create( + &wakep, NULL, run_wthread, (void *)id)) != 0) { + fprintf(stderr, "%s: failed spawning wakeup thread: %s\n", + progname, db_strerror(err)); + return (1); + } + return (0); +#else + return (run_wthread((void *)id) == NULL ? 0 : 1); +#endif +} + +int +wakeup_wait() +{ +#if defined(MUTEX_THREAD_TEST) + void *retp; + + /* + * A file is created when the wakeup thread is no longer needed. + */ + (void)os_thread_join(wakep, &retp); + if (retp != NULL) { + fprintf(stderr, + "%s: wakeup thread exited with error\n", progname); + return (1); + } +#endif + return (0); +} + +/* + * run_wthread -- + * Thread to wake up other threads that are sleeping. + */ +void * +run_wthread(arg) + void *arg; +{ + TM *gp, *tp; + u_long id, tid; + u_int check_id; + int err, quitcheck; + + id = (u_long)arg; + quitcheck = 0; +#if defined(MUTEX_THREAD_TEST) + tid = (u_long)os_thread_self(); +#else + tid = 0; +#endif + printf("Wakeup: ID %03lu (PID: %lu; TID: %lx)\n", + id, (u_long)getpid(), tid); + + gp = (TM *)gm_addr; + + /* Loop, waking up sleepers and periodically sleeping ourselves. */ + for (check_id = 0;; ++check_id) { + /* Check to see if the locking threads have finished. */ + if (++quitcheck >= 100) { + quitcheck = 0; + if (__os_exists(env, MT_FILE_QUIT, NULL) == 0) + break; + } + + /* Check for ID wraparound. */ + if (check_id == nthreads * nprocs) + check_id = 0; + + /* Check for a thread that needs a wakeup. */ + tp = (TM *)(tm_addr + check_id * sizeof(TM)); + if (!tp->wakeme) + continue; + + if (verbose) { + printf("%03lu: wakeup thread %03lu (mtx: %lu)\n", + id, tp->id, (u_long)tp->mutex); + (void)fflush(stdout); + } + + /* Acquire the global lock. */ + if ((err = dbenv->mutex_lock(dbenv, gp->mutex)) != 0) { + fprintf(stderr, "%s: wakeup: global lock: %s\n", + progname, db_strerror(err)); + return ((void *)1); + } + + tp->wakeme = 0; + if ((err = dbenv->mutex_unlock(dbenv, tp->mutex)) != 0) { + fprintf(stderr, "%s: wakeup: unlock: %s\n", + progname, db_strerror(err)); + return ((void *)1); + } + + if ((err = dbenv->mutex_unlock(dbenv, gp->mutex)) != 0) { + fprintf(stderr, "%s: wakeup: global unlock: %s\n", + progname, db_strerror(err)); + return ((void *)1); + } + + __os_yield(env, 0, (u_long)rand() % 3); + } + return (NULL); +} + +/* + * tm_env_init -- + * Create the backing database environment. + */ +int +tm_env_init() +{ + u_int32_t flags; + int ret; + char *home; + + /* + * Create an environment object and initialize it for error + * reporting. + */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, "%s: %s\n", progname, db_strerror(ret)); + return (1); + } + env = dbenv->env; + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + /* Allocate enough mutexes. */ + if ((ret = dbenv->mutex_set_increment(dbenv, + 1 + nthreads * nprocs + maxlocks)) != 0) { + dbenv->err(dbenv, ret, "dbenv->mutex_set_increment"); + return (1); + } + + flags = DB_CREATE; + if (nprocs == 1) { + home = NULL; + flags |= DB_PRIVATE; + } else + home = TESTDIR; + if (nthreads != 1) + flags |= DB_THREAD; + if ((ret = dbenv->open(dbenv, home, flags, 0)) != 0) { + dbenv->err(dbenv, ret, "environment open: %s", home); + return (1); + } + + return (0); +} + +/* + * tm_env_close -- + * Close the backing database environment. + */ +void +tm_env_close() +{ + (void)dbenv->close(dbenv, 0); +} + +/* + * tm_mutex_init -- + * Initialize the mutexes. + */ +void +tm_mutex_init() +{ + TM *mp; + u_int i; + int err; + + if (verbose) + printf("Allocate the global mutex: "); + mp = (TM *)gm_addr; + if ((err = dbenv->mutex_alloc(dbenv, 0, &mp->mutex)) != 0) { + fprintf(stderr, "%s: DB_ENV->mutex_alloc (global): %s\n", + progname, db_strerror(err)); + exit(EXIT_FAILURE); + } + if (verbose) + printf("%lu\n", (u_long)mp->mutex); + + if (verbose) + printf( + "Allocate %d per-thread, self-blocking mutexes: ", + nthreads * nprocs); + for (i = 0; i < nthreads * nprocs; ++i) { + mp = (TM *)(tm_addr + i * sizeof(TM)); + if ((err = dbenv->mutex_alloc( + dbenv, DB_MUTEX_SELF_BLOCK, &mp->mutex)) != 0) { + fprintf(stderr, + "%s: DB_ENV->mutex_alloc (per-thread %d): %s\n", + progname, i, db_strerror(err)); + exit(EXIT_FAILURE); + } + if ((err = dbenv->mutex_lock(dbenv, mp->mutex)) != 0) { + fprintf(stderr, + "%s: DB_ENV->mutex_lock (per-thread %d): %s\n", + progname, i, db_strerror(err)); + exit(EXIT_FAILURE); + } + if (verbose) + printf("%lu ", (u_long)mp->mutex); + } + if (verbose) + printf("\n"); + + if (verbose) + printf("Allocate %d per-lock mutexes: ", maxlocks); + for (i = 0; i < maxlocks; ++i) { + mp = (TM *)(lm_addr + i * sizeof(TM)); + if ((err = dbenv->mutex_alloc(dbenv, 0, &mp->mutex)) != 0) { + fprintf(stderr, + "%s: DB_ENV->mutex_alloc (per-lock: %d): %s\n", + progname, i, db_strerror(err)); + exit(EXIT_FAILURE); + } + if (verbose) + printf("%lu ", (u_long)mp->mutex); + } + if (verbose) + printf("\n"); +} + +/* + * tm_mutex_destroy -- + * Destroy the mutexes. + */ +void +tm_mutex_destroy() +{ + TM *gp, *mp; + u_int i; + int err; + + if (verbose) + printf("Destroy the global mutex.\n"); + gp = (TM *)gm_addr; + if ((err = dbenv->mutex_free(dbenv, gp->mutex)) != 0) { + fprintf(stderr, "%s: DB_ENV->mutex_free (global): %s\n", + progname, db_strerror(err)); + exit(EXIT_FAILURE); + } + + if (verbose) + printf("Destroy the per-thread mutexes.\n"); + for (i = 0; i < nthreads * nprocs; ++i) { + mp = (TM *)(tm_addr + i * sizeof(TM)); + if ((err = dbenv->mutex_free(dbenv, mp->mutex)) != 0) { + fprintf(stderr, + "%s: DB_ENV->mutex_free (per-thread %d): %s\n", + progname, i, db_strerror(err)); + exit(EXIT_FAILURE); + } + } + + if (verbose) + printf("Destroy the per-lock mutexes.\n"); + for (i = 0; i < maxlocks; ++i) { + mp = (TM *)(lm_addr + i * sizeof(TM)); + if ((err = dbenv->mutex_free(dbenv, mp->mutex)) != 0) { + fprintf(stderr, + "%s: DB_ENV->mutex_free (per-lock: %d): %s\n", + progname, i, db_strerror(err)); + exit(EXIT_FAILURE); + } + } +} + +/* + * tm_mutex_stats -- + * Display mutex statistics. + */ +void +tm_mutex_stats() +{ +#ifdef HAVE_STATISTICS + TM *mp; + uintmax_t set_wait, set_nowait; + u_int i; + + printf("Per-lock mutex statistics.\n"); + for (i = 0; i < maxlocks; ++i) { + mp = (TM *)(lm_addr + i * sizeof(TM)); + __mutex_set_wait_info(env, mp->mutex, &set_wait, &set_nowait); + printf("mutex %2d: wait: %lu; no wait %lu\n", i, + (u_long)set_wait, (u_long)set_nowait); + } +#endif +} + +/* + * data_on -- + * Map in or allocate the backing data space. + */ +void +data_on(gm_addrp, tm_addrp, lm_addrp, fhpp, init) + u_int8_t **gm_addrp, **tm_addrp, **lm_addrp; + DB_FH **fhpp; + int init; +{ + DB_FH *fhp; + size_t nwrite; + int err; + void *addr; + + fhp = NULL; + + /* + * In a single process, use heap memory. + */ + if (nprocs == 1) { + if (init) { + if ((err = + __os_calloc(env, (size_t)len, 1, &addr)) != 0) + exit(EXIT_FAILURE); + } else { + fprintf(stderr, + "%s: init should be set for single process call\n", + progname); + exit(EXIT_FAILURE); + } + } else { + if (init) { + if (verbose) + printf("Create the backing file.\n"); + + if ((err = __os_open(env, MT_FILE, 0, + DB_OSO_CREATE | DB_OSO_TRUNC, 0666, &fhp)) == -1) { + fprintf(stderr, "%s: %s: open: %s\n", + progname, MT_FILE, db_strerror(err)); + exit(EXIT_FAILURE); + } + + if ((err = + __os_seek(env, fhp, 0, 0, (u_int32_t)len)) != 0 || + (err = + __os_write(env, fhp, &err, 1, &nwrite)) != 0 || + nwrite != 1) { + fprintf(stderr, "%s: %s: seek/write: %s\n", + progname, MT_FILE, db_strerror(err)); + exit(EXIT_FAILURE); + } + } else + if ((err = __os_open(env, MT_FILE, 0, 0, 0, &fhp)) != 0) + exit(EXIT_FAILURE); + + if ((err = + __os_mapfile(env, MT_FILE, fhp, len, 0, &addr)) != 0) + exit(EXIT_FAILURE); + } + + *gm_addrp = (u_int8_t *)addr; + addr = (u_int8_t *)addr + sizeof(TM); + *tm_addrp = (u_int8_t *)addr; + addr = (u_int8_t *)addr + sizeof(TM) * (nthreads * nprocs); + *lm_addrp = (u_int8_t *)addr; + + if (fhpp != NULL) + *fhpp = fhp; +} + +/* + * data_off -- + * Discard or de-allocate the backing data space. + */ +void +data_off(addr, fhp) + u_int8_t *addr; + DB_FH *fhp; +{ + if (nprocs == 1) + __os_free(env, addr); + else { + if (__os_unmapfile(env, addr, len) != 0) + exit(EXIT_FAILURE); + if (__os_closehandle(env, fhp) != 0) + exit(EXIT_FAILURE); + } +} + +/* + * usage -- + * + */ +int +usage() +{ + fprintf(stderr, "usage: %s %s\n\t%s\n", progname, + "[-v] [-l maxlocks]", + "[-n locks] [-p procs] [-T locker=ID|wakeup=ID] [-t threads]"); + return (EXIT_FAILURE); +} + +/* + * os_wait -- + * Wait for an array of N procs. + */ +int +os_wait(procs, n) + os_pid_t *procs; + u_int n; +{ + u_int i; + int status; +#if defined(DB_WIN32) + DWORD ret; +#endif + + status = 0; + +#if defined(DB_WIN32) + do { + ret = WaitForMultipleObjects(n, procs, FALSE, INFINITE); + i = ret - WAIT_OBJECT_0; + if (i < 0 || i >= n) + return (__os_posix_err(__os_get_syserr())); + + if ((GetExitCodeProcess(procs[i], &ret) == 0) || (ret != 0)) + return (ret); + + /* remove the process handle from the list */ + while (++i < n) + procs[i - 1] = procs[i]; + } while (--n); +#elif !defined(HAVE_VXWORKS) + do { + if (wait(&status) == -1) + return (__os_posix_err(__os_get_syserr())); + + if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0) { + for (i = 0; i < n; i++) + (void)kill(procs[i], SIGKILL); + return (WEXITSTATUS(status)); + } + } while (--n); +#endif + + return (0); +} + +os_pid_t +spawn_proc(id, tmpath, typearg) + u_long id; + char *tmpath, *typearg; +{ + char *const vbuf = verbose ? "-v" : NULL; + char *args[13], lbuf[16], nbuf[16], pbuf[16], tbuf[16], Tbuf[256]; + + args[0] = tmpath; + args[1] = "-l"; + snprintf(lbuf, sizeof(lbuf), "%d", maxlocks); + args[2] = lbuf; + args[3] = "-n"; + snprintf(nbuf, sizeof(nbuf), "%d", nlocks); + args[4] = nbuf; + args[5] = "-p"; + snprintf(pbuf, sizeof(pbuf), "%d", nprocs); + args[6] = pbuf; + args[7] = "-t"; + snprintf(tbuf, sizeof(tbuf), "%d", nthreads); + args[8] = tbuf; + args[9] = "-T"; + snprintf(Tbuf, sizeof(Tbuf), "%s=%lu", typearg, id); + args[10] = Tbuf; + args[11] = vbuf; + args[12] = NULL; + + return (os_spawn(tmpath, args)); +} + +os_pid_t +os_spawn(path, argv) + const char *path; + char *const argv[]; +{ + os_pid_t pid; + int status; + + COMPQUIET(pid, 0); + COMPQUIET(status, 0); + +#ifdef HAVE_VXWORKS + fprintf(stderr, "%s: os_spawn not supported for VxWorks.\n", progname); + return (OS_BAD_PID); +#elif defined(HAVE_QNX) + /* + * For QNX, we cannot fork if we've ever used threads. So + * we'll use their spawn function. We use 'spawnl' which + * is NOT a POSIX function. + * + * The return value of spawnl is just what we want depending + * on the value of the 'wait' arg. + */ + return (spawnv(P_NOWAIT, path, argv)); +#elif defined(DB_WIN32) + return (os_pid_t)(_spawnv(P_NOWAIT, path, argv)); +#else + if ((pid = fork()) != 0) { + if (pid == -1) + return (OS_BAD_PID); + return (pid); + } else { + (void)execv(path, argv); + exit(EXIT_FAILURE); + } +#endif +} diff --git a/src/mutex/uts4_cc.s b/src/mutex/uts4_cc.s new file mode 100644 index 00000000..b4b4e358 --- /dev/null +++ b/src/mutex/uts4_cc.s @@ -0,0 +1,26 @@ + / See the file LICENSE for redistribution information. + / + / Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + / + / $Id$ + / + / int uts_lock ( int *p, int i ); + / Update the lock word pointed to by p with the + / value i, using compare-and-swap. + / Returns 0 if update was successful. + / Returns 1 if update failed. + / + entry uts_lock + uts_lock: + using .,r15 + st r2,8(sp) / Save R2 + l r2,64+0(sp) / R2 -> word to update + slr r0, r0 / R0 = current lock value must be 0 + l r1,64+4(sp) / R1 = new lock value + cs r0,r1,0(r2) / Try the update ... + be x / ... Success. Return 0 + la r0,1 / ... Failure. Return 1 + x: / + l r2,8(sp) / Restore R2 + b 2(,r14) / Return to caller + drop r15 diff --git a/src/os/os_abort.c b/src/os/os_abort.c new file mode 100644 index 00000000..191ed819 --- /dev/null +++ b/src/os/os_abort.c @@ -0,0 +1,33 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_abort -- + * + * PUBLIC: void __os_abort __P((ENV *)); + */ +void +__os_abort(env) + ENV *env; +{ + __os_stack(env); /* Try and get a stack trace. */ + +#ifdef HAVE_ABORT + abort(); /* Try and drop core. */ + /* NOTREACHED */ +#endif +#ifdef SIGABRT + (void)raise(SIGABRT); /* Try and drop core. */ +#endif + exit(1); /* Quit anyway. */ + /* NOTREACHED */ +} diff --git a/src/os/os_abs.c b/src/os/os_abs.c new file mode 100644 index 00000000..0b49f8e4 --- /dev/null +++ b/src/os/os_abs.c @@ -0,0 +1,24 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_abspath -- + * Return if a path is an absolute path. + * + * PUBLIC: int __os_abspath __P((const char *)); + */ +int +__os_abspath(path) + const char *path; +{ + return (path[0] == '/'); +} diff --git a/src/os/os_addrinfo.c b/src/os/os_addrinfo.c new file mode 100644 index 00000000..ca30789d --- /dev/null +++ b/src/os/os_addrinfo.c @@ -0,0 +1,179 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_getaddrinfo and __os_freeaddrinfo wrap the getaddrinfo and freeaddrinfo + * calls, as well as the associated platform dependent error handling, mapping + * the error return to a ANSI C/POSIX error return. + */ + +/* + * __os_getaddrinfo -- + * + * PUBLIC: #if defined(HAVE_REPLICATION_THREADS) + * PUBLIC: int __os_getaddrinfo __P((ENV *, const char *, u_int, + * PUBLIC: const char *, const ADDRINFO *, ADDRINFO **)); + * PUBLIC: #endif + */ +int +__os_getaddrinfo(env, nodename, port, servname, hints, res) + ENV *env; + const char *nodename, *servname; + u_int port; + const ADDRINFO *hints; + ADDRINFO **res; +{ +#ifdef HAVE_GETADDRINFO + int ret; + + if ((ret = getaddrinfo(nodename, servname, hints, res)) == 0) + return (0); + + __db_errx(env, DB_STR_A("0153", + "%s(%u): host lookup failed: %s", "%s %u %s"), + nodename == NULL ? "" : nodename, port, +#ifdef DB_WIN32 + gai_strerrorA(ret)); +#else + gai_strerror(ret)); +#endif + return (__os_posix_err(ret)); +#else + ADDRINFO *answer; + struct hostent *hostaddr; + struct sockaddr_in sin; + u_int32_t tmpaddr; + int ret; + + COMPQUIET(hints, NULL); + COMPQUIET(servname, NULL); + + /* INADDR_NONE is not defined on Solaris 2.6, 2.7 or 2.8. */ +#ifndef INADDR_NONE +#define INADDR_NONE ((u_long)0xffffffff) +#endif + + /* + * Basic implementation of IPv4 component of getaddrinfo. + * Limited to the functionality used by repmgr. + */ + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + if (nodename) { + if (nodename[0] == '\0') + sin.sin_addr.s_addr = htonl(INADDR_ANY); + else if ((tmpaddr = inet_addr(CHAR_STAR_CAST nodename)) != + INADDR_NONE) { + sin.sin_addr.s_addr = tmpaddr; + } else { + hostaddr = gethostbyname(nodename); + if (hostaddr == NULL) { +#ifdef DB_WIN32 + ret = __os_get_neterr(); + __db_syserr(env, ret, DB_STR_A("0154", + "%s(%u): host lookup failed", "%s %u"), + nodename == NULL ? "" : nodename, port); + return (__os_posix_err(ret)); +#else + /* + * Historic UNIX systems used the h_errno + * global variable to return gethostbyname + * errors. The only function we currently + * use that needs h_errno is gethostbyname, + * so we deal with it here. + * + * hstrerror is not available on Solaris 2.6 + * (it is in libresolv but is a private, + * unexported symbol). + */ +#ifdef HAVE_HSTRERROR + __db_errx(env, DB_STR_A("0155", + "%s(%u): host lookup failed: %s", + "%s %u %s"), + nodename == NULL ? "" : nodename, port, + hstrerror(h_errno)); +#else + __db_errx(env, DB_STR_A("0156", + "%s(%u): host lookup failed: %d", + "%s %u %d"), + nodename == NULL ? "" : nodename, port, + h_errno); +#endif + switch (h_errno) { + case HOST_NOT_FOUND: + case NO_DATA: + return (EHOSTUNREACH); + case TRY_AGAIN: + return (EAGAIN); + case NO_RECOVERY: + default: + return (EFAULT); + } + /* NOTREACHED */ +#endif + } + memcpy(&(sin.sin_addr), + hostaddr->h_addr, (size_t)hostaddr->h_length); + } + } else /* No host specified. */ + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons((u_int16_t)port); + + if ((ret = __os_calloc(env, 1, sizeof(ADDRINFO), &answer)) != 0) + return (ret); + if ((ret = __os_malloc(env, sizeof(sin), &answer->ai_addr)) != 0) { + __os_free(env, answer); + return (ret); + } + + answer->ai_family = AF_INET; + answer->ai_protocol = IPPROTO_TCP; + answer->ai_socktype = SOCK_STREAM; + answer->ai_addrlen = sizeof(sin); + memcpy(answer->ai_addr, &sin, sizeof(sin)); + *res = answer; + + return (0); +#endif /* HAVE_GETADDRINFO */ +} + +/* + * __os_freeaddrinfo -- + * + * PUBLIC: #if defined(HAVE_REPLICATION_THREADS) + * PUBLIC: void __os_freeaddrinfo __P((ENV *, ADDRINFO *)); + * PUBLIC: #endif + */ +void +__os_freeaddrinfo(env, ai) + ENV *env; + ADDRINFO *ai; +{ +#ifdef HAVE_GETADDRINFO + COMPQUIET(env, NULL); + + freeaddrinfo(ai); +#else + ADDRINFO *next, *tmpaddr; + + for (next = ai; next != NULL; next = tmpaddr) { + if (next->ai_canonname != NULL) + __os_free(env, next->ai_canonname); + + if (next->ai_addr != NULL) + __os_free(env, next->ai_addr); + + tmpaddr = next->ai_next; + __os_free(env, next); + } +#endif +} diff --git a/src/os/os_alloc.c b/src/os/os_alloc.c new file mode 100644 index 00000000..8f74e138 --- /dev/null +++ b/src/os/os_alloc.c @@ -0,0 +1,464 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifdef DIAGNOSTIC +static void __os_guard __P((ENV *)); + +typedef union { + size_t size; + uintmax_t align; +} db_allocinfo_t; +#endif + +/* + * !!! + * Correct for systems that return NULL when you allocate 0 bytes of memory. + * There are several places in DB where we allocate the number of bytes held + * by the key/data item, and it can be 0. Correct here so that malloc never + * returns a NULL for that reason (which behavior is permitted by ANSI). We + * could make these calls macros on non-Alpha architectures (that's where we + * saw the problem), but it's probably not worth the autoconf complexity. + * + * !!! + * Correct for systems that don't set errno when malloc and friends fail. + * + * Out of memory. + * We wish to hold the whole sky, + * But we never will. + */ + +/* + * __os_umalloc -- + * Allocate memory to be used by the application. + * + * Use, in order of preference, the allocation function specified to the + * ENV handle, the allocation function specified as a replacement for + * the library malloc, or the library malloc(). + * + * PUBLIC: int __os_umalloc __P((ENV *, size_t, void *)); + */ +int +__os_umalloc(env, size, storep) + ENV *env; + size_t size; + void *storep; +{ + DB_ENV *dbenv; + int ret; + + dbenv = env == NULL ? NULL : env->dbenv; + + /* Never allocate 0 bytes -- some C libraries don't like it. */ + if (size == 0) + ++size; + + if (dbenv == NULL || dbenv->db_malloc == NULL) { + if (DB_GLOBAL(j_malloc) != NULL) + *(void **)storep = DB_GLOBAL(j_malloc)(size); + else + *(void **)storep = malloc(size); + if (*(void **)storep == NULL) { + /* + * Correct error return, see __os_malloc. + */ + if ((ret = __os_get_errno_ret_zero()) == 0) { + ret = ENOMEM; + __os_set_errno(ENOMEM); + } + __db_err(env, ret, DB_STR_A("0143", "malloc: %lu", + "%lu"), (u_long)size); + return (ret); + } + return (0); + } + + if ((*(void **)storep = dbenv->db_malloc(size)) == NULL) { + __db_errx(env, DB_STR("0144", + "user-specified malloc function returned NULL")); + return (ENOMEM); + } + + return (0); +} + +/* + * __os_urealloc -- + * Allocate memory to be used by the application. + * + * A realloc(3) counterpart to __os_umalloc's malloc(3). + * + * PUBLIC: int __os_urealloc __P((ENV *, size_t, void *)); + */ +int +__os_urealloc(env, size, storep) + ENV *env; + size_t size; + void *storep; +{ + DB_ENV *dbenv; + int ret; + void *ptr; + + dbenv = env == NULL ? NULL : env->dbenv; + ptr = *(void **)storep; + + /* Never allocate 0 bytes -- some C libraries don't like it. */ + if (size == 0) + ++size; + + if (dbenv == NULL || dbenv->db_realloc == NULL) { + if (ptr == NULL) + return (__os_umalloc(env, size, storep)); + + if (DB_GLOBAL(j_realloc) != NULL) + *(void **)storep = DB_GLOBAL(j_realloc)(ptr, size); + else + *(void **)storep = realloc(ptr, size); + if (*(void **)storep == NULL) { + /* + * Correct errno, see __os_realloc. + */ + if ((ret = __os_get_errno_ret_zero()) == 0) { + ret = ENOMEM; + __os_set_errno(ENOMEM); + } + __db_err(env, ret, DB_STR_A("0145", + "realloc: %lu", "%lu"), (u_long)size); + return (ret); + } + return (0); + } + + if ((*(void **)storep = dbenv->db_realloc(ptr, size)) == NULL) { + __db_errx(env, DB_STR("0146", + "User-specified realloc function returned NULL")); + return (ENOMEM); + } + + return (0); +} + +/* + * __os_ufree -- + * Free memory used by the application. + * + * A free(3) counterpart to __os_umalloc's malloc(3). + * + * PUBLIC: void __os_ufree __P((ENV *, void *)); + */ +void +__os_ufree(env, ptr) + ENV *env; + void *ptr; +{ + DB_ENV *dbenv; + + dbenv = env == NULL ? NULL : env->dbenv; + + if (dbenv != NULL && dbenv->db_free != NULL) + dbenv->db_free(ptr); + else if (DB_GLOBAL(j_free) != NULL) + DB_GLOBAL(j_free)(ptr); + else + free(ptr); +} + +/* + * __os_strdup -- + * The strdup(3) function for DB. + * + * PUBLIC: int __os_strdup __P((ENV *, const char *, void *)); + */ +int +__os_strdup(env, str, storep) + ENV *env; + const char *str; + void *storep; +{ + size_t size; + int ret; + void *p; + + *(void **)storep = NULL; + + size = strlen(str) + 1; + if ((ret = __os_malloc(env, size, &p)) != 0) + return (ret); + + memcpy(p, str, size); + + *(void **)storep = p; + return (0); +} + +/* + * __os_calloc -- + * The calloc(3) function for DB. + * + * PUBLIC: int __os_calloc __P((ENV *, size_t, size_t, void *)); + */ +int +__os_calloc(env, num, size, storep) + ENV *env; + size_t num, size; + void *storep; +{ + int ret; + + size *= num; + if ((ret = __os_malloc(env, size, storep)) != 0) + return (ret); + + memset(*(void **)storep, 0, size); + + return (0); +} + +/* + * __os_malloc -- + * The malloc(3) function for DB. + * + * PUBLIC: int __os_malloc __P((ENV *, size_t, void *)); + */ +int +__os_malloc(env, size, storep) + ENV *env; + size_t size; + void *storep; +{ + int ret; + void *p; + + *(void **)storep = NULL; + + /* Never allocate 0 bytes -- some C libraries don't like it. */ + if (size == 0) + ++size; + +#ifdef DIAGNOSTIC + /* Add room for size and a guard byte. */ + size += sizeof(db_allocinfo_t) + 1; +#endif + + if (DB_GLOBAL(j_malloc) != NULL) + p = DB_GLOBAL(j_malloc)(size); + else + p = malloc(size); + if (p == NULL) { + /* + * Some C libraries don't correctly set errno when malloc(3) + * fails. We'd like to 0 out errno before calling malloc, + * but it turns out that setting errno is quite expensive on + * Windows/NT in an MT environment. + */ + if ((ret = __os_get_errno_ret_zero()) == 0) { + ret = ENOMEM; + __os_set_errno(ENOMEM); + } + __db_err(env, ret, DB_STR_A("0147", "malloc: %lu", "%lu"), + (u_long)size); + return (ret); + } + +#ifdef DIAGNOSTIC + /* Overwrite memory. */ + memset(p, CLEAR_BYTE, size); + + /* + * Guard bytes: if #DIAGNOSTIC is defined, we allocate an additional + * byte after the memory and set it to a special value that we check + * for when the memory is free'd. + */ + ((u_int8_t *)p)[size - 1] = CLEAR_BYTE; + + ((db_allocinfo_t *)p)->size = size; + p = &((db_allocinfo_t *)p)[1]; +#endif + *(void **)storep = p; + + return (0); +} + +/* + * __os_realloc -- + * The realloc(3) function for DB. + * + * PUBLIC: int __os_realloc __P((ENV *, size_t, void *)); + */ +int +__os_realloc(env, size, storep) + ENV *env; + size_t size; + void *storep; +{ + int ret; + void *p, *ptr; + + ptr = *(void **)storep; + + /* Never allocate 0 bytes -- some C libraries don't like it. */ + if (size == 0) + ++size; + + /* If we haven't yet allocated anything yet, simply call malloc. */ + if (ptr == NULL) + return (__os_malloc(env, size, storep)); + +#ifdef DIAGNOSTIC + /* Add room for size and a guard byte. */ + size += sizeof(db_allocinfo_t) + 1; + + /* Back up to the real beginning */ + ptr = &((db_allocinfo_t *)ptr)[-1]; + + { + size_t s; + + s = ((db_allocinfo_t *)ptr)->size; + if (((u_int8_t *)ptr)[s - 1] != CLEAR_BYTE) + __os_guard(env); + } +#endif + + /* + * Don't overwrite the original pointer, there are places in DB we + * try to continue after realloc fails. + */ + if (DB_GLOBAL(j_realloc) != NULL) + p = DB_GLOBAL(j_realloc)(ptr, size); + else + p = realloc(ptr, size); + if (p == NULL) { + /* + * Some C libraries don't correctly set errno when malloc(3) + * fails. We'd like to 0 out errno before calling malloc, + * but it turns out that setting errno is quite expensive on + * Windows/NT in an MT environment. + */ + if ((ret = __os_get_errno_ret_zero()) == 0) { + ret = ENOMEM; + __os_set_errno(ENOMEM); + } + __db_err(env, ret, DB_STR_A("0148", "realloc: %lu", "%lu"), + (u_long)size); + return (ret); + } +#ifdef DIAGNOSTIC + ((u_int8_t *)p)[size - 1] = CLEAR_BYTE; /* Initialize guard byte. */ + + ((db_allocinfo_t *)p)->size = size; + p = &((db_allocinfo_t *)p)[1]; +#endif + + *(void **)storep = p; + + return (0); +} + +/* + * __os_free -- + * The free(3) function for DB. + * + * PUBLIC: void __os_free __P((ENV *, void *)); + */ +void +__os_free(env, ptr) + ENV *env; + void *ptr; +{ +#ifdef DIAGNOSTIC + size_t size; +#endif + + /* + * ANSI C requires free(NULL) work. Don't depend on the underlying + * library. + */ + if (ptr == NULL) + return; + +#ifdef DIAGNOSTIC + /* + * Check that the guard byte (one past the end of the memory) is + * still CLEAR_BYTE. + */ + ptr = &((db_allocinfo_t *)ptr)[-1]; + size = ((db_allocinfo_t *)ptr)->size; + if (((u_int8_t *)ptr)[size - 1] != CLEAR_BYTE) + __os_guard(env); + + /* Overwrite memory. */ + if (size != 0) + memset(ptr, CLEAR_BYTE, size); +#else + COMPQUIET(env, NULL); +#endif + + if (DB_GLOBAL(j_free) != NULL) + DB_GLOBAL(j_free)(ptr); + else + free(ptr); +} + +#ifdef DIAGNOSTIC +/* + * __os_guard -- + * Complain and abort. + */ +static void +__os_guard(env) + ENV *env; +{ + __db_errx(env, DB_STR("0149", + "Guard byte incorrect during free")); + __os_abort(env); + /* NOTREACHED */ +} +#endif + +/* + * __ua_memcpy -- + * Copy memory to memory without relying on any kind of alignment. + * + * There are places in DB that we have unaligned data, for example, + * when we've stored a structure in a log record as a DBT, and now + * we want to look at it. Unfortunately, if you have code like: + * + * struct a { + * int x; + * } *p; + * + * void *func_argument; + * int local; + * + * p = (struct a *)func_argument; + * memcpy(&local, p->x, sizeof(local)); + * + * compilers optimize to use inline instructions requiring alignment, + * and records in the log don't have any particular alignment. (This + * isn't a compiler bug, because it's a structure they're allowed to + * assume alignment.) + * + * Casting the memcpy arguments to (u_int8_t *) appears to work most + * of the time, but we've seen examples where it wasn't sufficient + * and there's nothing in ANSI C that requires that work. + * + * PUBLIC: void *__ua_memcpy __P((void *, const void *, size_t)); + */ +void * +__ua_memcpy(dst, src, len) + void *dst; + const void *src; + size_t len; +{ + return ((void *)memcpy(dst, src, len)); +} diff --git a/src/os/os_clock.c b/src/os/os_clock.c new file mode 100644 index 00000000..9457a516 --- /dev/null +++ b/src/os/os_clock.c @@ -0,0 +1,73 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_gettime -- + * Return the current time-of-day clock in seconds and nanoseconds. + * + * PUBLIC: void __os_gettime __P((ENV *, db_timespec *, int)); + */ +void +__os_gettime(env, tp, monotonic) + ENV *env; + db_timespec *tp; + int monotonic; +{ + const char *sc; + int ret; + +#if defined(HAVE_CLOCK_GETTIME) +#if defined(HAVE_CLOCK_MONOTONIC) + if (monotonic) + RETRY_CHK((clock_gettime( + CLOCK_MONOTONIC, (struct timespec *)tp)), ret); + else +#endif + RETRY_CHK((clock_gettime( + CLOCK_REALTIME, (struct timespec *)tp)), ret); + + RETRY_CHK((clock_gettime(CLOCK_REALTIME, (struct timespec *)tp)), ret); + if (ret != 0) { + sc = "clock_gettime"; + goto err; + } +#elif defined(HAVE_GETTIMEOFDAY) + struct timeval v; + + RETRY_CHK((gettimeofday(&v, NULL)), ret); + if (ret != 0) { + sc = "gettimeofday"; + goto err; + } + + tp->tv_sec = v.tv_sec; + tp->tv_nsec = v.tv_usec * NS_PER_US; +#elif defined(HAVE_TIME) + time_t now; + + RETRY_CHK((time(&now) == (time_t)-1 ? 1 : 0), ret); + if (ret != 0) { + sc = "time"; + goto err; + } + + tp->tv_sec = now; + tp->tv_nsec = 0; +#else + NO AVAILABLE CLOCK IMPLEMENTATION +#endif + COMPQUIET(monotonic, 0); + return; + +err: __db_syserr(env, ret, "%s", sc); + (void)__env_panic(env, __os_posix_err(ret)); +} diff --git a/src/os/os_config.c b/src/os/os_config.c new file mode 100644 index 00000000..7b8f5ee4 --- /dev/null +++ b/src/os/os_config.c @@ -0,0 +1,70 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_fs_notzero -- + * Return 1 if allocated filesystem blocks are not zeroed. + * + * PUBLIC: int __os_fs_notzero __P((void)); + */ +int +__os_fs_notzero() +{ + /* Most filesystems zero out implicitly created pages. */ + return (0); +} + +/* + * __os_support_direct_io -- + * Return 1 if we support direct I/O. + * + * PUBLIC: int __os_support_direct_io __P((void)); + */ +int +__os_support_direct_io() +{ + int ret; + + ret = 0; + +#ifdef HAVE_O_DIRECT + ret = 1; +#endif +#if defined(HAVE_DIRECTIO) && defined(DIRECTIO_ON) + ret = 1; +#endif + return (ret); +} + +/* + * __os_support_db_register -- + * Return 1 if the system supports DB_REGISTER. + * + * PUBLIC: int __os_support_db_register __P((void)); + */ +int +__os_support_db_register() +{ + return (1); +} + +/* + * __os_support_replication -- + * Return 1 if the system supports replication. + * + * PUBLIC: int __os_support_replication __P((void)); + */ +int +__os_support_replication() +{ + return (1); +} diff --git a/src/os/os_cpu.c b/src/os/os_cpu.c new file mode 100644 index 00000000..0bf8429f --- /dev/null +++ b/src/os/os_cpu.c @@ -0,0 +1,47 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#if defined(HAVE_PSTAT_GETDYNAMIC) +#include +#endif +#endif + +/* + * __os_cpu_count -- + * Return the number of CPUs. + * + * PUBLIC: u_int32_t __os_cpu_count __P((void)); + */ +u_int32_t +__os_cpu_count() +{ +#if defined(HAVE_PSTAT_GETDYNAMIC) + /* + * HP/UX. + */ + struct pst_dynamic psd; + + return ((u_int32_t)pstat_getdynamic(&psd, + sizeof(psd), (size_t)1, 0) == -1 ? 1 : psd.psd_proc_cnt); +#elif defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) + /* + * Solaris, Linux. + */ + long nproc; + + nproc = sysconf(_SC_NPROCESSORS_ONLN); + return ((u_int32_t)(nproc > 1 ? nproc : 1)); +#else + return (1); +#endif +} diff --git a/src/os/os_ctime.c b/src/os/os_ctime.c new file mode 100644 index 00000000..c6652108 --- /dev/null +++ b/src/os/os_ctime.c @@ -0,0 +1,47 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_ctime -- + * Format a time-stamp. + * + * PUBLIC: char *__os_ctime __P((const time_t *, char *)); + */ +char * +__os_ctime(tod, time_buf) + const time_t *tod; + char *time_buf; +{ + time_buf[CTIME_BUFLEN - 1] = '\0'; + + /* + * The ctime_r interface is the POSIX standard, thread-safe version of + * ctime. However, it was implemented in three different ways (with + * and without a buffer length argument, and where the buffer length + * argument was an int vs. a size_t *). Also, you can't depend on a + * return of (char *) from ctime_r, HP-UX 10.XX's version returned an + * int. + */ +#if defined(HAVE_VXWORKS) + { + size_t buflen = CTIME_BUFLEN; + (void)ctime_r(tod, time_buf, &buflen); + } +#elif defined(HAVE_CTIME_R_3ARG) + (void)ctime_r(tod, time_buf, CTIME_BUFLEN); +#elif defined(HAVE_CTIME_R) + (void)ctime_r(tod, time_buf); +#else + (void)strncpy(time_buf, ctime(tod), CTIME_BUFLEN - 1); +#endif + return (time_buf); +} diff --git a/src/os/os_dir.c b/src/os/os_dir.c new file mode 100644 index 00000000..01349f1b --- /dev/null +++ b/src/os/os_dir.c @@ -0,0 +1,140 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#if HAVE_DIRENT_H +# include +# define NAMLEN(dirent) strlen((dirent)->d_name) +#else +# define dirent direct +# define NAMLEN(dirent) (dirent)->d_namlen +# if HAVE_SYS_NDIR_H +# include +# endif +# if HAVE_SYS_DIR_H +# include +# endif +# if HAVE_NDIR_H +# include +# endif +#endif + +#include "db_int.h" + +/* + * __os_dirlist -- + * Return a list of the files in a directory. + * + * PUBLIC: int __os_dirlist __P((ENV *, const char *, int, char ***, int *)); + */ +int +__os_dirlist(env, dir, returndir, namesp, cntp) + ENV *env; + const char *dir; + int returndir, *cntp; + char ***namesp; +{ + DB_ENV *dbenv; + struct dirent *dp; + DIR *dirp; + struct stat sb; + int arraysz, cnt, ret; + char **names, buf[DB_MAXPATHLEN]; + + *namesp = NULL; + *cntp = 0; + + dbenv = env == NULL ? NULL : env->dbenv; + + if (dbenv != NULL && + FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0159", + "fileops: directory list %s", "%s"), dir); + + if (DB_GLOBAL(j_dirlist) != NULL) + return (DB_GLOBAL(j_dirlist)(dir, namesp, cntp)); + + if ((dirp = opendir(CHAR_STAR_CAST dir)) == NULL) + return (__os_get_errno()); + names = NULL; + for (arraysz = cnt = 0; (dp = readdir(dirp)) != NULL;) { + snprintf(buf, sizeof(buf), "%s/%s", dir, dp->d_name); + + RETRY_CHK(stat(buf, &sb), ret); + if (ret != 0) { + ret = __os_posix_err(ret); + /* Ignore entries that no longer exist. */ + if (ret == ENOENT) + continue; + + goto err; + } + + /* + * We return regular files, and optionally return directories + * (except for dot and dot-dot). + * + * Shared memory files are of a different type on QNX, and we + * return those as well. + */ +#ifdef HAVE_QNX + if (!S_ISREG(sb.st_mode) && !S_TYPEISSHM(&sb)) { +#else + if (!S_ISREG(sb.st_mode)) { +#endif + if (!returndir || !S_ISDIR(sb.st_mode)) + continue; + if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || + (dp->d_name[1] == '.' && dp->d_name[2] == '\0'))) + continue; + } + + if (cnt >= arraysz) { + arraysz += 100; + if ((ret = __os_realloc(env, + (u_int)arraysz * sizeof(names[0]), &names)) != 0) + goto err; + } + if ((ret = __os_strdup(env, dp->d_name, &names[cnt])) != 0) + goto err; + cnt++; + } + (void)closedir(dirp); + + *namesp = names; + *cntp = cnt; + return (0); + +err: if (names != NULL) + __os_dirfree(env, names, cnt); + if (dirp != NULL) + (void)closedir(dirp); + return (ret); +} + +/* + * __os_dirfree -- + * Free the list of files. + * + * PUBLIC: void __os_dirfree __P((ENV *, char **, int)); + */ +void +__os_dirfree(env, names, cnt) + ENV *env; + char **names; + int cnt; +{ + if (DB_GLOBAL(j_dirfree) != NULL) + DB_GLOBAL(j_dirfree)(names, cnt); + else { + while (cnt > 0) + __os_free(env, names[--cnt]); + __os_free(env, names); + } +} diff --git a/src/os/os_errno.c b/src/os/os_errno.c new file mode 100644 index 00000000..6884a9a6 --- /dev/null +++ b/src/os/os_errno.c @@ -0,0 +1,129 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_get_errno_ret_zero -- + * Return the last system error, including an error of zero. + * + * PUBLIC: int __os_get_errno_ret_zero __P((void)); + */ +int +__os_get_errno_ret_zero() +{ + /* This routine must be able to return the same value repeatedly. */ + return (errno); +} + +/* + * We've seen cases where system calls failed but errno was never set. For + * that reason, __os_get_errno() and __os_get_syserr set errno to EAGAIN if + * it's not already set, to work around the problem. For obvious reasons, + * we can only call this function if we know an error has occurred, that + * is, we can't test the return for a non-zero value after the get call. + * + * __os_get_errno -- + * Return the last ANSI C "errno" value or EAGAIN if the last error + * is zero. + * + * PUBLIC: int __os_get_errno __P((void)); + */ +int +__os_get_errno() +{ + /* This routine must be able to return the same value repeatedly. */ + return (__os_get_syserr()); +} + +#if 0 +/* + * __os_get_neterr -- + * Return the last network-related error or EAGAIN if the last + * error is zero. + * + * PUBLIC: int __os_get_neterr __P((void)); + */ +int +__os_get_neterr() +{ + /* This routine must be able to return the same value repeatedly. */ + return (__os_get_syserr()); +} +#endif + +/* + * __os_get_syserr -- + * Return the last system error or EAGAIN if the last error is zero. + * + * PUBLIC: int __os_get_syserr __P((void)); + */ +int +__os_get_syserr() +{ + /* This routine must be able to return the same value repeatedly. */ + if (errno == 0) + __os_set_errno(EAGAIN); + return (errno); +} + +/* + * __os_set_errno -- + * Set the value of errno. + * + * PUBLIC: void __os_set_errno __P((int)); + */ +void +__os_set_errno(evalue) + int evalue; +{ + /* + * This routine is called by the compatibility interfaces (DB 1.85, + * dbm and hsearch). Force values > 0, that is, not one of DB 2.X + * and later's public error returns. If something bad has happened, + * default to EFAULT -- a nasty return. Otherwise, default to EINVAL. + * As the compatibility APIs aren't included on Windows, the Windows + * version of this routine doesn't need this behavior. + */ + errno = + evalue >= 0 ? evalue : (evalue == DB_RUNRECOVERY ? EFAULT : EINVAL); +} + +/* + * __os_strerror -- + * Return a string associated with the system error. + * + * PUBLIC: char *__os_strerror __P((int, char *, size_t)); + */ +char * +__os_strerror(error, buf, len) + int error; + char *buf; + size_t len; +{ + /* No translation is needed in the POSIX layer. */ + (void)strncpy(buf, strerror(error), len - 1); + buf[len - 1] = '\0'; + + return (buf); +} + +/* + * __os_posix_err + * Convert a system error to a POSIX error. + * + * PUBLIC: int __os_posix_err __P((int)); + */ +int +__os_posix_err(error) + int error; +{ + return (error); +} diff --git a/src/os/os_fid.c b/src/os/os_fid.c new file mode 100644 index 00000000..2caa1977 --- /dev/null +++ b/src/os/os_fid.c @@ -0,0 +1,135 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_fileid -- + * Return a unique identifier for a file. + * + * PUBLIC: int __os_fileid __P((ENV *, const char *, int, u_int8_t *)); + */ +int +__os_fileid(env, fname, unique_okay, fidp) + ENV *env; + const char *fname; + int unique_okay; + u_int8_t *fidp; +{ + pid_t pid; + size_t i; + u_int32_t tmp; + u_int8_t *p; + +#ifdef HAVE_STAT + struct stat sb; + int ret; + + /* + * The structure of a fileid on a POSIX/UNIX system is: + * + * ino[4] dev[4] unique-ID[4] serial-counter[4] empty[4]. + * + * For real files, which have a backing inode and device, the first + * 8 bytes are filled in and the following bytes are left 0. For + * temporary files, the following 12 bytes are filled in. + * + * Clear the buffer. + */ + memset(fidp, 0, DB_FILE_ID_LEN); + RETRY_CHK((stat(CHAR_STAR_CAST fname, &sb)), ret); + if (ret != 0) { + __db_syserr(env, ret, DB_STR_A("0158", + "stat: %s", "%s"), fname); + return (__os_posix_err(ret)); + } + + /* + * !!! + * Nothing is ever big enough -- on Sparc V9, st_ino, st_dev and the + * time_t types are all 8 bytes. As DB_FILE_ID_LEN is only 20 bytes, + * we convert to a (potentially) smaller fixed-size type and use it. + * + * We don't worry about byte sexing or the actual variable sizes. + * + * When this routine is called from the DB access methods, it's only + * called once -- whatever ID is generated when a database is created + * is stored in the database file's metadata, and that is what is + * saved in the mpool region's information to uniquely identify the + * file. + * + * When called from the mpool layer this routine will be called each + * time a new thread of control wants to share the file, which makes + * things tougher. As far as byte sexing goes, since the mpool region + * lives on a single host, there's no issue of that -- the entire + * region is byte sex dependent. As far as variable sizes go, we make + * the simplifying assumption that 32-bit and 64-bit processes will + * get the same 32-bit values if we truncate any returned 64-bit value + * to a 32-bit value. When we're called from the mpool layer, though, + * we need to be careful not to include anything that isn't + * reproducible for a given file, such as the timestamp or serial + * number. + */ + tmp = (u_int32_t)sb.st_ino; + for (p = (u_int8_t *)&tmp, i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = *p++; + + tmp = (u_int32_t)sb.st_dev; + for (p = (u_int8_t *)&tmp, i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = *p++; +#else + /* + * Use the file name. + * + * XXX + * Cast the first argument, the BREW ARM compiler is unhappy if + * we don't. + */ + (void)strncpy((char *)fidp, fname, DB_FILE_ID_LEN); +#endif /* HAVE_STAT */ + + if (unique_okay) { + /* Add in 32-bits of (hopefully) unique number. */ + __os_unique_id(env, &tmp); + for (p = (u_int8_t *)&tmp, i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = *p++; + + /* + * Initialize/increment the serial number we use to help + * avoid fileid collisions. Note we don't bother with + * locking; it's unpleasant to do from down in here, and + * if we race on this no real harm will be done, since the + * finished fileid has so many other components. + * + * We use the bottom 32-bits of the process ID, hoping they + * are more random than the top 32-bits (should we be on a + * machine with 64-bit process IDs). + * + * We increment by 100000 on each call as a simple way of + * randomizing; simply incrementing seems potentially less + * useful if pids are also simply incremented, since this + * is process-local and we may be one of a set of processes + * starting up. 100000 pushes us out of pid space on most + * 32-bit platforms, and has few interesting properties in + * base 2. + */ + if (DB_GLOBAL(fid_serial) == 0) { + __os_id(env->dbenv, &pid, NULL); + DB_GLOBAL(fid_serial) = (u_int32_t)pid; + } else + DB_GLOBAL(fid_serial) += 100000; + + for (p = (u_int8_t *) + &DB_GLOBAL(fid_serial), i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = *p++; + } + + return (0); +} diff --git a/src/os/os_flock.c b/src/os/os_flock.c new file mode 100644 index 00000000..6f0fef85 --- /dev/null +++ b/src/os/os_flock.c @@ -0,0 +1,64 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_fdlock -- + * Acquire/release a lock on a byte in a file. + * + * PUBLIC: int __os_fdlock __P((ENV *, DB_FH *, off_t, int, int)); + */ +int +__os_fdlock(env, fhp, offset, acquire, nowait) + ENV *env; + DB_FH *fhp; + int acquire, nowait; + off_t offset; +{ +#ifdef HAVE_FCNTL + DB_ENV *dbenv; + struct flock fl; + int ret, t_ret; + + dbenv = env == NULL ? NULL : env->dbenv; + + DB_ASSERT(env, F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1); + + if (dbenv != NULL && FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0138", + "fileops: flock %s %s offset %lu", "%s %s %lu"), fhp->name, + acquire ? DB_STR_P("acquire"): DB_STR_P("release"), + (u_long)offset); + + fl.l_start = offset; + fl.l_len = 1; + fl.l_type = acquire ? F_WRLCK : F_UNLCK; + fl.l_whence = SEEK_SET; + + RETRY_CHK_EINTR_ONLY( + (fcntl(fhp->fd, nowait ? F_SETLK : F_SETLKW, &fl)), ret); + + if (ret == 0) + return (0); + + if ((t_ret = __os_posix_err(ret)) != EACCES && t_ret != EAGAIN) + __db_syserr(env, ret, DB_STR("0139", "fcntl")); + return (t_ret); +#else + COMPQUIET(fhp, NULL); + COMPQUIET(acquire, 0); + COMPQUIET(nowait, 0); + COMPQUIET(offset, 0); + __db_syserr(env, DB_OPNOTSUP, DB_STR("0140", + "advisory file locking unavailable")); + return (DB_OPNOTSUP); +#endif +} diff --git a/src/os/os_fsync.c b/src/os/os_fsync.c new file mode 100644 index 00000000..7583be01 --- /dev/null +++ b/src/os/os_fsync.c @@ -0,0 +1,104 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifdef HAVE_VXWORKS +#include "ioLib.h" + +#define fsync(fd) __vx_fsync(fd) + +int +__vx_fsync(fd) + int fd; +{ + int ret; + + /* + * The results of ioctl are driver dependent. Some will return the + * number of bytes sync'ed. Only if it returns 'ERROR' should we + * flag it. + */ + if ((ret = ioctl(fd, FIOSYNC, 0)) != ERROR) + return (0); + return (ret); +} +#endif + +#ifdef __hp3000s900 +#define fsync(fd) __mpe_fsync(fd) + +int +__mpe_fsync(fd) + int fd; +{ + extern FCONTROL(short, short, void *); + + FCONTROL(_MPE_FILENO(fd), 2, NULL); /* Flush the buffers */ + FCONTROL(_MPE_FILENO(fd), 6, NULL); /* Write the EOF */ + return (0); +} +#endif + +/* + * __os_fsync -- + * Flush a file descriptor. + * + * PUBLIC: int __os_fsync __P((ENV *, DB_FH *)); + */ +int +__os_fsync(env, fhp) + ENV *env; + DB_FH *fhp; +{ + DB_ENV *dbenv; + int ret; + + dbenv = env == NULL ? NULL : env->dbenv; + + DB_ASSERT(env, F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1); + + /* + * Do nothing if the file descriptor has been marked as not requiring + * any sync to disk. + */ + if (F_ISSET(fhp, DB_FH_NOSYNC)) + return (0); + + if (dbenv != NULL && FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0150", "fileops: flush %s", "%s"), + fhp->name); + + if (DB_GLOBAL(j_fsync) != NULL) + ret = DB_GLOBAL(j_fsync)(fhp->fd); + else { +#if defined(F_FULLFSYNC) + RETRY_CHK((fcntl(fhp->fd, F_FULLFSYNC, 0)), ret); + /* + * On OS X, F_FULLSYNC only works on HFS+, so we need to fall + * back to regular fsync on other filesystems. + */ + if (ret == ENOTSUP) + RETRY_CHK((fsync(fhp->fd)), ret); +#elif defined(HAVE_QNX) + ret = __qnx_fsync(fhp); +#elif defined(HAVE_FDATASYNC) + RETRY_CHK((fdatasync(fhp->fd)), ret); +#else + RETRY_CHK((fsync(fhp->fd)), ret); +#endif + } + + if (ret != 0) { + __db_syserr(env, ret, DB_STR("0151", "fsync")); + ret = __os_posix_err(ret); + } + return (ret); +} diff --git a/src/os/os_getenv.c b/src/os/os_getenv.c new file mode 100644 index 00000000..d82fa511 --- /dev/null +++ b/src/os/os_getenv.c @@ -0,0 +1,58 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_getenv -- + * Retrieve an environment variable. + * + * PUBLIC: int __os_getenv __P((ENV *, const char *, char **, size_t)); + */ +int +__os_getenv(env, name, bpp, buflen) + ENV *env; + const char *name; + char **bpp; + size_t buflen; +{ + /* + * If we have getenv, there's a value and the buffer is large enough: + * copy value into the pointer, return 0 + * If we have getenv, there's a value and the buffer is too short: + * set pointer to NULL, return EINVAL + * If we have getenv and there's no value: + * set pointer to NULL, return 0 + * If we don't have getenv: + * set pointer to NULL, return 0 + */ +#ifdef HAVE_GETENV + char *p; + + if ((p = getenv(name)) != NULL) { + if (strlen(p) < buflen) { + (void)strcpy(*bpp, p); + return (0); + } + + *bpp = NULL; + __db_errx(env, DB_STR_A("0157", + "%s: buffer too small to hold environment variable %s", + "%s %s"), name, p); + return (EINVAL); + } +#else + COMPQUIET(env, NULL); + COMPQUIET(name, NULL); + COMPQUIET(buflen, 0); +#endif + *bpp = NULL; + return (0); +} diff --git a/src/os/os_handle.c b/src/os/os_handle.c new file mode 100644 index 00000000..eaf08178 --- /dev/null +++ b/src/os/os_handle.c @@ -0,0 +1,243 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_openhandle -- + * Open a file, using POSIX 1003.1 open flags. + * + * PUBLIC: int __os_openhandle + * PUBLIC: __P((ENV *, const char *, int, int, DB_FH **)); + */ +int +__os_openhandle(env, name, flags, mode, fhpp) + ENV *env; + const char *name; + int flags, mode; + DB_FH **fhpp; +{ + DB_FH *fhp; + u_int nrepeat, retries; + int fcntl_flags, ret; +#ifdef HAVE_VXWORKS + int newflags; +#endif + /* + * Allocate the file handle and copy the file name. We generally only + * use the name for verbose or error messages, but on systems where we + * can't unlink temporary files immediately, we use the name to unlink + * the temporary file when the file handle is closed. + * + * Lock the ENV handle and insert the new file handle on the list. + */ + if ((ret = __os_calloc(env, 1, sizeof(DB_FH), &fhp)) != 0) + return (ret); + if ((ret = __os_strdup(env, name, &fhp->name)) != 0) + goto err; + if (env != NULL) { + MUTEX_LOCK(env, env->mtx_env); + TAILQ_INSERT_TAIL(&env->fdlist, fhp, q); + MUTEX_UNLOCK(env, env->mtx_env); + F_SET(fhp, DB_FH_ENVLINK); + } + + /* If the application specified an interface, use it. */ + if (DB_GLOBAL(j_open) != NULL) { + if ((fhp->fd = DB_GLOBAL(j_open)(name, flags, mode)) == -1) { + ret = __os_posix_err(__os_get_syserr()); + goto err; + } + goto done; + } + + retries = 0; + for (nrepeat = 1; nrepeat < 4; ++nrepeat) { + ret = 0; +#ifdef HAVE_VXWORKS + /* + * VxWorks does not support O_CREAT on open, you have to use + * creat() instead. (It does not support O_EXCL or O_TRUNC + * either, even though they are defined "for future support".) + * We really want the POSIX behavior that if O_CREAT is set, + * we open if it exists, or create it if it doesn't exist. + * If O_CREAT is specified, single thread and try to open the + * file. If successful, and O_EXCL return EEXIST. If + * unsuccessful call creat and then end single threading. + */ + if (LF_ISSET(O_CREAT)) { + DB_BEGIN_SINGLE_THREAD; + newflags = flags & ~(O_CREAT | O_EXCL); + if ((fhp->fd = open(name, newflags, mode)) != -1) { + /* + * We need to mark the file opened at this + * point so that if we get any error below + * we will properly close the fd we just + * opened on the error path. + */ + F_SET(fhp, DB_FH_OPENED); + if (LF_ISSET(O_EXCL)) { + /* + * If we get here, want O_EXCL create, + * and the file exists. Close and + * return EEXISTS. + */ + DB_END_SINGLE_THREAD; + ret = EEXIST; + goto err; + } + /* + * XXX + * Assume any error means non-existence. + * Unfortunately return values (even for + * non-existence) are driver specific so + * there is no single error we can use to + * verify we truly got the equivalent of + * ENOENT. + */ + } else + fhp->fd = creat(name, newflags); + DB_END_SINGLE_THREAD; + } else + /* FALLTHROUGH */ +#endif +#ifdef __VMS + /* + * !!! + * Open with full sharing on VMS. + * + * We use these flags because they are the ones set by the VMS + * CRTL mmap() call when it opens a file, and we have to be + * able to open files that mmap() has previously opened, e.g., + * when we're joining already existing DB regions. + */ + fhp->fd = open(name, flags, mode, "shr=get,put,upd,del,upi"); +#else + fhp->fd = open(name, flags, mode); +#endif + if (fhp->fd != -1) { + ret = 0; + break; + } + + switch (ret = __os_posix_err(__os_get_syserr())) { + case EMFILE: + case ENFILE: + case ENOSPC: + /* + * If it's a "temporary" error, we retry up to 3 times, + * waiting up to 12 seconds. While it's not a problem + * if we can't open a database, an inability to open a + * log file is cause for serious dismay. + */ + __os_yield(env, nrepeat * 2, 0); + break; + case EAGAIN: + case EBUSY: + case EINTR: + /* + * If an EAGAIN, EBUSY or EINTR, retry immediately for + * DB_RETRY times. + */ + if (++retries < DB_RETRY) + --nrepeat; + break; + default: + /* Open is silent on error. */ + goto err; + } + } + + if (ret == 0) { +#if defined(HAVE_FCNTL_F_SETFD) + /* Deny file descriptor access to any child process. */ + if ((fcntl_flags = fcntl(fhp->fd, F_GETFD)) == -1 || + fcntl(fhp->fd, F_SETFD, fcntl_flags | FD_CLOEXEC) == -1) { + ret = __os_get_syserr(); + __db_syserr(env, ret, DB_STR("0162", + "fcntl(F_SETFD)")); + ret = __os_posix_err(ret); + goto err; + } +#else + COMPQUIET(fcntl_flags, 0); +#endif + +done: F_SET(fhp, DB_FH_OPENED); + *fhpp = fhp; + return (0); + } + +err: (void)__os_closehandle(env, fhp); + return (ret); +} + +/* + * __os_closehandle -- + * Close a file. + * + * PUBLIC: int __os_closehandle __P((ENV *, DB_FH *)); + */ +int +__os_closehandle(env, fhp) + ENV *env; + DB_FH *fhp; +{ + DB_ENV *dbenv; + int ret; + + ret = 0; + + /* + * If we linked the DB_FH handle into the ENV, it needs to be + * unlinked. + */ + DB_ASSERT(env, env != NULL || !F_ISSET(fhp, DB_FH_ENVLINK)); + + if (env != NULL) { + dbenv = env->dbenv; + if (fhp->name != NULL && FLD_ISSET( + dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0163", + "fileops: close %s", "%s"), fhp->name); + + if (F_ISSET(fhp, DB_FH_ENVLINK)) { + /* + * Lock the ENV handle and remove this file + * handle from the list. + */ + MUTEX_LOCK(env, env->mtx_env); + TAILQ_REMOVE(&env->fdlist, fhp, q); + MUTEX_UNLOCK(env, env->mtx_env); + } + } + + /* Discard any underlying system file reference. */ + if (F_ISSET(fhp, DB_FH_OPENED)) { + if (DB_GLOBAL(j_close) != NULL) + ret = DB_GLOBAL(j_close)(fhp->fd); + else + RETRY_CHK((close(fhp->fd)), ret); + if (ret != 0) { + __db_syserr(env, ret, DB_STR("0164", "close")); + ret = __os_posix_err(ret); + } + } + + /* Unlink the file if we haven't already done so. */ + if (F_ISSET(fhp, DB_FH_UNLINK)) + (void)__os_unlink(env, fhp->name, 0); + + if (fhp->name != NULL) + __os_free(env, fhp->name); + __os_free(env, fhp); + + return (ret); +} diff --git a/src/os/os_map.c b/src/os/os_map.c new file mode 100644 index 00000000..3be8ef11 --- /dev/null +++ b/src/os/os_map.c @@ -0,0 +1,607 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#ifdef HAVE_MMAP +#include +#endif + +#ifdef HAVE_SHMGET +#include +#include +#endif +#endif + +#ifdef HAVE_MMAP +static int __os_map __P((ENV *, char *, DB_FH *, size_t, int, int, void **)); +#endif +#ifdef HAVE_SHMGET +static int __shm_mode __P((ENV *)); +#else +static int __no_system_mem __P((ENV *)); +#endif + +/* + * __os_attach -- + * Create/join a shared memory region. + * + * PUBLIC: int __os_attach __P((ENV *, REGINFO *, REGION *)); + */ +int +__os_attach(env, infop, rp) + ENV *env; + REGINFO *infop; + REGION *rp; +{ + DB_ENV *dbenv; + int create_ok, ret; + + /* + * We pass a DB_ENV handle to the user's replacement map function, + * so there must be a valid handle. + */ + DB_ASSERT(env, env != NULL && env->dbenv != NULL); + dbenv = env->dbenv; + + if (DB_GLOBAL(j_region_map) != NULL) { + /* + * We have to find out if the region is being created. Ask + * the underlying map function, and use the REGINFO structure + * to pass that information back to our caller. + */ + create_ok = F_ISSET(infop, REGION_CREATE) ? 1 : 0; + ret = DB_GLOBAL(j_region_map) + (dbenv, infop->name, rp->max, &create_ok, &infop->addr); + if (create_ok) + F_SET(infop, REGION_CREATE); + else + F_CLR(infop, REGION_CREATE); + return (ret); + } + + if (F_ISSET(env, ENV_SYSTEM_MEM)) { + /* + * If the region is in system memory on UNIX, we use shmget(2). + * + * !!! + * There exist spinlocks that don't work in shmget memory, e.g., + * the HP/UX msemaphore interface. If we don't have locks that + * will work in shmget memory, we better be private and not be + * threaded. If we reach this point, we know we're public, so + * it's an error. + */ +#if defined(HAVE_MUTEX_HPPA_MSEM_INIT) + __db_errx(env, DB_STR("0114", + "architecture does not support locks inside system shared memory")); + return (EINVAL); +#endif +#if defined(HAVE_SHMGET) + { + key_t segid; + int id, mode; + + /* + * We could potentially create based on REGION_CREATE_OK, but + * that's dangerous -- we might get crammed in sideways if + * some of the expected regions exist but others do not. Also, + * if the requested size differs from an existing region's + * actual size, then all sorts of nasty things can happen. + * Basing create solely on REGION_CREATE is much safer -- a + * recovery will get us straightened out. + */ + if (F_ISSET(infop, REGION_CREATE)) { + /* + * The application must give us a base System V IPC key + * value. Adjust that value based on the region's ID, + * and correct so the user's original value appears in + * the ipcs output. + */ + if (dbenv->shm_key == INVALID_REGION_SEGID) { + __db_errx(env, DB_STR("0115", + "no base system shared memory ID specified")); + return (EINVAL); + } + + /* + * !!! + * The BDB API takes a "long" as the base segment ID, + * then adds an unsigned 32-bit value and stores it + * in a key_t. Wrong, admittedly, but not worth an + * API change to fix. + */ + segid = (key_t) + ((u_long)dbenv->shm_key + (infop->id - 1)); + + /* + * If map to an existing region, assume the application + * crashed and we're restarting. Delete the old region + * and re-try. If that fails, return an error, the + * application will have to select a different segment + * ID or clean up some other way. + */ + if ((id = shmget(segid, 0, 0)) != -1) { + (void)shmctl(id, IPC_RMID, NULL); + if ((id = shmget(segid, 0, 0)) != -1) { + __db_errx(env, DB_STR_A("0116", + "shmget: key: %ld: shared system memory region already exists", + "%ld"), (long)segid); + return (EAGAIN); + } + } + + /* + * Map the DbEnv::open method file mode permissions to + * shmget call permissions. + */ + mode = IPC_CREAT | __shm_mode(env); + if ((id = shmget(segid, rp->max, mode)) == -1) { + ret = __os_get_syserr(); + __db_syserr(env, ret, DB_STR_A("0117", + "shmget: key: %ld: unable to create shared system memory region", + "%ld"), (long)segid); + return (__os_posix_err(ret)); + } + rp->size = rp->max; + rp->segid = id; + } else + id = rp->segid; + + if ((infop->addr = shmat(id, NULL, 0)) == (void *)-1) { + infop->addr = NULL; + ret = __os_get_syserr(); + __db_syserr(env, ret, DB_STR_A("0118", + "shmat: id %d: unable to attach to shared system memory region", + "%d"), id); + return (__os_posix_err(ret)); + } + + /* Optionally lock the memory down. */ + if (F_ISSET(env, ENV_LOCKDOWN)) { +#ifdef HAVE_SHMCTL_SHM_LOCK + ret = shmctl( + id, SHM_LOCK, NULL) == 0 ? 0 : __os_get_syserr(); +#else + ret = DB_OPNOTSUP; +#endif + if (ret != 0) { + __db_syserr(env, ret, DB_STR_A("0119", + "shmctl/SHM_LOCK: id %d: unable to lock down shared memory region", + "%d"), id); + return (__os_posix_err(ret)); + } + } + + return (0); + } +#else + return (__no_system_mem(env)); +#endif + } + +#ifdef HAVE_MMAP + { + infop->fhp = NULL; + + /* + * Try to open/create the shared region file. We DO NOT need to ensure + * that multiple threads/processes attempting to simultaneously create + * the region are properly ordered, our caller has already taken care + * of that. + */ + if ((ret = __os_open(env, infop->name, 0, + DB_OSO_REGION | + (F_ISSET(infop, REGION_CREATE_OK) ? DB_OSO_CREATE : 0), + env->db_mode, &infop->fhp)) != 0) + __db_err(env, ret, "%s", infop->name); + + /* + * If we created the file, grow it before mapping it in. We really want + * to avoid touching the buffer cache after mmap() is called, doing + * anything else confuses the hell out of systems without merged + * VM/buffer cache systems, or, more to the point, *badly* merged + * VM/buffer cache systems. + */ + if (rp->max < rp->size) + rp->max = rp->size; + if (ret == 0 && F_ISSET(infop, REGION_CREATE)) { + if (F_ISSET(dbenv, DB_ENV_REGION_INIT)) + ret = __db_file_write(env, infop->fhp, + rp->size / MEGABYTE, rp->size % MEGABYTE, 0x00); + else + ret = __db_file_extend(env, infop->fhp, rp->size); + } + + /* Map the file in. */ + if (ret == 0) + ret = __os_map(env, + infop->name, infop->fhp, rp->max, 1, 0, &infop->addr); + + if (ret != 0 && infop->fhp != NULL) { + (void)__os_closehandle(env, infop->fhp); + infop->fhp = NULL; + } + + return (ret); + } +#else + COMPQUIET(infop, NULL); + COMPQUIET(rp, NULL); + __db_errx(env, DB_STR("0120", + "architecture lacks mmap(2), shared environments not possible")); + return (DB_OPNOTSUP); +#endif +} + +/* + * __os_detach -- + * Detach from a shared memory region. + * + * PUBLIC: int __os_detach __P((ENV *, REGINFO *, int)); + */ +int +__os_detach(env, infop, destroy) + ENV *env; + REGINFO *infop; + int destroy; +{ + DB_ENV *dbenv; + REGION *rp; + int ret; + + /* + * We pass a DB_ENV handle to the user's replacement unmap function, + * so there must be a valid handle. + */ + DB_ASSERT(env, env != NULL && env->dbenv != NULL); + dbenv = env->dbenv; + + rp = infop->rp; + + /* If the user replaced the unmap call, call through their interface. */ + if (DB_GLOBAL(j_region_unmap) != NULL) + return (DB_GLOBAL(j_region_unmap)(dbenv, infop->addr)); + + if (F_ISSET(env, ENV_SYSTEM_MEM)) { +#ifdef HAVE_SHMGET + int segid; + + /* + * We may be about to remove the memory referenced by rp, + * save the segment ID, and (optionally) wipe the original. + */ + segid = rp->segid; + if (destroy) + rp->segid = INVALID_REGION_SEGID; + + if (shmdt(infop->addr) != 0) { + ret = __os_get_syserr(); + __db_syserr(env, ret, DB_STR("0121", "shmdt")); + return (__os_posix_err(ret)); + } + + if (destroy && shmctl(segid, IPC_RMID, + NULL) != 0 && (ret = __os_get_syserr()) != EINVAL) { + __db_syserr(env, ret, DB_STR_A("0122", + "shmctl: id %d: unable to delete system shared memory region", + "%d"), segid); + return (__os_posix_err(ret)); + } + + return (0); +#else + return (__no_system_mem(env)); +#endif + } + +#ifdef HAVE_MMAP +#ifdef HAVE_MUNLOCK + if (F_ISSET(env, ENV_LOCKDOWN)) + (void)munlock(infop->addr, rp->max); +#endif + if (infop->fhp != NULL) { + ret = __os_closehandle(env, infop->fhp); + infop->fhp = NULL; + if (ret != 0) + return (ret); + } + + if (munmap(infop->addr, rp->max) != 0) { + ret = __os_get_syserr(); + __db_syserr(env, ret, DB_STR("0123", "munmap")); + return (__os_posix_err(ret)); + } + + if (destroy && (ret = __os_unlink(env, infop->name, 1)) != 0) + return (ret); + + return (0); +#else + COMPQUIET(destroy, 0); + COMPQUIET(ret, 0); + return (EINVAL); +#endif +} + +/* + * __os_mapfile -- + * Map in a shared memory file. + * + * PUBLIC: int __os_mapfile __P((ENV *, char *, DB_FH *, size_t, int, void **)); + */ +int +__os_mapfile(env, path, fhp, len, is_rdonly, addrp) + ENV *env; + char *path; + DB_FH *fhp; + int is_rdonly; + size_t len; + void **addrp; +{ +#if defined(HAVE_MMAP) && !defined(HAVE_QNX) + DB_ENV *dbenv; + + /* If the user replaced the map call, call through their interface. */ + if (DB_GLOBAL(j_file_map) != NULL) { + /* + * We pass a DB_ENV handle to the user's replacement map + * function, so there must be a valid handle. + */ + DB_ASSERT(env, env != NULL && env->dbenv != NULL); + dbenv = env->dbenv; + + return ( + DB_GLOBAL(j_file_map)(dbenv, path, len, is_rdonly, addrp)); + } + + return (__os_map(env, path, fhp, len, 0, is_rdonly, addrp)); +#else + COMPQUIET(env, NULL); + COMPQUIET(path, NULL); + COMPQUIET(fhp, NULL); + COMPQUIET(is_rdonly, 0); + COMPQUIET(len, 0); + COMPQUIET(addrp, NULL); + return (DB_OPNOTSUP); +#endif +} + +/* + * __os_unmapfile -- + * Unmap the shared memory file. + * + * PUBLIC: int __os_unmapfile __P((ENV *, void *, size_t)); + */ +int +__os_unmapfile(env, addr, len) + ENV *env; + void *addr; + size_t len; +{ + DB_ENV *dbenv; + int ret; + + /* + * We pass a DB_ENV handle to the user's replacement unmap function, + * so there must be a valid handle. + */ + DB_ASSERT(env, env != NULL && env->dbenv != NULL); + dbenv = env->dbenv; + + if (FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR("0124", "fileops: munmap")); + + /* If the user replaced the map call, call through their interface. */ + if (DB_GLOBAL(j_file_unmap) != NULL) + return (DB_GLOBAL(j_file_unmap)(dbenv, addr)); + +#ifdef HAVE_MMAP +#ifdef HAVE_MUNLOCK + if (F_ISSET(env, ENV_LOCKDOWN)) + RETRY_CHK((munlock(addr, len)), ret); + /* + * !!! + * The return value is ignored. + */ +#else + COMPQUIET(env, NULL); +#endif + RETRY_CHK((munmap(addr, len)), ret); + ret = __os_posix_err(ret); +#else + COMPQUIET(env, NULL); + ret = EINVAL; +#endif + return (ret); +} + +#ifdef HAVE_MMAP +/* + * __os_map -- + * Call the mmap(2) function. + */ +static int +__os_map(env, path, fhp, len, is_region, is_rdonly, addrp) + ENV *env; + char *path; + DB_FH *fhp; + int is_region, is_rdonly; + size_t len; + void **addrp; +{ + DB_ENV *dbenv; + int flags, prot, ret; + void *p; + + /* + * We pass a DB_ENV handle to the user's replacement map function, + * so there must be a valid handle. + */ + DB_ASSERT(env, env != NULL && env->dbenv != NULL); + dbenv = env->dbenv; + + if (FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0125", "fileops: mmap %s", + "%s"), path); + + DB_ASSERT(env, F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1); + + /* + * If it's read-only, it's private, and if it's not, it's shared. + * Don't bother with an additional parameter. + */ + flags = is_rdonly ? MAP_PRIVATE : MAP_SHARED; + +#ifdef MAP_FILE + /* + * Historically, MAP_FILE was required for mapping regular files, + * even though it was the default. Some systems have it, some + * don't, some that have it set it to 0. + */ + flags |= MAP_FILE; +#endif + + /* + * I know of no systems that implement the flag to tell the system + * that the region contains semaphores, but it's not an unreasonable + * thing to do, and has been part of the design since forever. I + * don't think anyone will object, but don't set it for read-only + * files, it doesn't make sense. + */ +#ifdef MAP_HASSEMAPHORE + if (is_region && !is_rdonly) + flags |= MAP_HASSEMAPHORE; +#else + COMPQUIET(is_region, 0); +#endif + + /* + * FreeBSD: + * Causes data dirtied via this VM map to be flushed to physical media + * only when necessary (usually by the pager) rather then gratuitously. + * Typically this prevents the update daemons from flushing pages + * dirtied through such maps and thus allows efficient sharing of + * memory across unassociated processes using a file-backed shared + * memory map. + */ +#ifdef MAP_NOSYNC + flags |= MAP_NOSYNC; +#endif + + prot = PROT_READ | (is_rdonly ? 0 : PROT_WRITE); + + /* + * XXX + * Work around a bug in the VMS V7.1 mmap() implementation. To map + * a file into memory on VMS it needs to be opened in a certain way, + * originally. To get the file opened in that certain way, the VMS + * mmap() closes the file and re-opens it. When it does this, it + * doesn't flush any caches out to disk before closing. The problem + * this causes us is that when the memory cache doesn't get written + * out, the file isn't big enough to match the memory chunk and the + * mmap() call fails. This call to fsync() fixes the problem. DEC + * thinks this isn't a bug because of language in XPG5 discussing user + * responsibility for on-disk and in-memory synchronization. + */ +#ifdef VMS + if (__os_fsync(env, fhp) == -1) + return (__os_posix_err(__os_get_syserr())); +#endif + + /* MAP_FAILED was not defined in early mmap implementations. */ +#ifndef MAP_FAILED +#define MAP_FAILED -1 +#endif + if ((p = mmap(NULL, + len, prot, flags, fhp->fd, (off_t)0)) == (void *)MAP_FAILED) { + ret = __os_get_syserr(); + __db_syserr(env, ret, DB_STR("0126", "mmap")); + return (__os_posix_err(ret)); + } + + /* + * If it's a region, we want to make sure that the memory isn't paged. + * For example, Solaris will page large mpools because it thinks that + * I/O buffer memory is more important than we are. The mlock system + * call may or may not succeed (mlock is restricted to the super-user + * on some systems). Currently, the only other use of mmap in DB is + * to map read-only databases -- we don't want them paged, either, so + * the call isn't conditional. + */ + if (F_ISSET(env, ENV_LOCKDOWN)) { +#ifdef HAVE_MLOCK + ret = mlock(p, len) == 0 ? 0 : __os_get_syserr(); +#else + ret = DB_OPNOTSUP; +#endif + if (ret != 0) { + __db_syserr(env, ret, DB_STR("0127", "mlock")); + return (__os_posix_err(ret)); + } + } + + *addrp = p; + return (0); +} +#endif + +#ifdef HAVE_SHMGET +#ifndef SHM_R +#define SHM_R 0400 +#endif +#ifndef SHM_W +#define SHM_W 0200 +#endif + +/* + * __shm_mode -- + * Map the DbEnv::open method file mode permissions to shmget call + * permissions. + */ +static int +__shm_mode(env) + ENV *env; +{ + int mode; + + /* Default to r/w owner, r/w group. */ + if (env->db_mode == 0) + return (SHM_R | SHM_W | SHM_R >> 3 | SHM_W >> 3); + + mode = 0; + if (env->db_mode & S_IRUSR) + mode |= SHM_R; + if (env->db_mode & S_IWUSR) + mode |= SHM_W; + if (env->db_mode & S_IRGRP) + mode |= SHM_R >> 3; + if (env->db_mode & S_IWGRP) + mode |= SHM_W >> 3; + if (env->db_mode & S_IROTH) + mode |= SHM_R >> 6; + if (env->db_mode & S_IWOTH) + mode |= SHM_W >> 6; + return (mode); +} +#else +/* + * __no_system_mem -- + * No system memory environments error message. + */ +static int +__no_system_mem(env) + ENV *env; +{ + __db_errx(env, DB_STR("0128", + "architecture doesn't support environments in system memory")); + return (DB_OPNOTSUP); +} +#endif /* HAVE_SHMGET */ diff --git a/src/os/os_mkdir.c b/src/os/os_mkdir.c new file mode 100644 index 00000000..16992e0f --- /dev/null +++ b/src/os/os_mkdir.c @@ -0,0 +1,52 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_mkdir -- + * Create a directory. + * + * PUBLIC: int __os_mkdir __P((ENV *, const char *, int)); + */ +int +__os_mkdir(env, name, mode) + ENV *env; + const char *name; + int mode; +{ + DB_ENV *dbenv; + int ret; + + dbenv = env == NULL ? NULL : env->dbenv; + if (dbenv != NULL && + FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0129", "fileops: mkdir %s", + "%s"), name); + + /* Make the directory, with paranoid permissions. */ +#if defined(HAVE_VXWORKS) + RETRY_CHK((mkdir(CHAR_STAR_CAST name)), ret); +#else + RETRY_CHK((mkdir(name, DB_MODE_700)), ret); +#endif + if (ret != 0) + return (__os_posix_err(ret)); + + /* Set the absolute permissions, if specified. */ +#if !defined(HAVE_VXWORKS) + if (mode != 0) { + RETRY_CHK((chmod(name, mode)), ret); + if (ret != 0) + ret = __os_posix_err(ret); + } +#endif + return (ret); +} diff --git a/src/os/os_open.c b/src/os/os_open.c new file mode 100644 index 00000000..23c5cb88 --- /dev/null +++ b/src/os/os_open.c @@ -0,0 +1,162 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_open -- + * Open a file descriptor (including page size and log size information). + * + * PUBLIC: int __os_open __P((ENV *, + * PUBLIC: const char *, u_int32_t, u_int32_t, int, DB_FH **)); + */ +int +__os_open(env, name, page_size, flags, mode, fhpp) + ENV *env; + const char *name; + u_int32_t page_size, flags; + int mode; + DB_FH **fhpp; +{ + DB_ENV *dbenv; + DB_FH *fhp; + int oflags, ret; + + COMPQUIET(page_size, 0); + + dbenv = env == NULL ? NULL : env->dbenv; + *fhpp = NULL; + oflags = 0; + + if (dbenv != NULL && + FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0152", + "fileops: open %s", "%s"), name); + +#undef OKFLAGS +#define OKFLAGS \ + (DB_OSO_ABSMODE | DB_OSO_CREATE | DB_OSO_DIRECT | DB_OSO_DSYNC |\ + DB_OSO_EXCL | DB_OSO_RDONLY | DB_OSO_REGION | DB_OSO_SEQ | \ + DB_OSO_TEMP | DB_OSO_TRUNC) + if ((ret = __db_fchk(env, "__os_open", flags, OKFLAGS)) != 0) + return (ret); + +#if defined(O_BINARY) + /* + * If there's a binary-mode open flag, set it, we never want any + * kind of translation. Some systems do translations by default, + * e.g., with Cygwin, the default mode for an open() is set by the + * mode of the mount that underlies the file. + */ + oflags |= O_BINARY; +#endif + + /* + * DB requires the POSIX 1003.1 semantic that two files opened at the + * same time with DB_OSO_CREATE/O_CREAT and DB_OSO_EXCL/O_EXCL flags + * set return an EEXIST failure in at least one. + */ + if (LF_ISSET(DB_OSO_CREATE)) + oflags |= O_CREAT; + + if (LF_ISSET(DB_OSO_EXCL)) + oflags |= O_EXCL; + +#ifdef HAVE_O_DIRECT + if (LF_ISSET(DB_OSO_DIRECT)) + oflags |= O_DIRECT; +#endif +#ifdef O_DSYNC + if (LF_ISSET(DB_OSO_DSYNC)) + oflags |= O_DSYNC; +#endif + + if (LF_ISSET(DB_OSO_RDONLY)) + oflags |= O_RDONLY; + else + oflags |= O_RDWR; + + if (LF_ISSET(DB_OSO_TRUNC)) + oflags |= O_TRUNC; + + /* + * Undocumented feature: allow applications to create intermediate + * directories whenever a file is opened. + */ + if (dbenv != NULL && + env->dir_mode != 0 && LF_ISSET(DB_OSO_CREATE) && + (ret = __db_mkpath(env, name)) != 0) + return (ret); + + /* Open the file. */ +#ifdef HAVE_QNX + if (LF_ISSET(DB_OSO_REGION)) + ret = __os_qnx_region_open(env, name, oflags, mode, &fhp); + else +#endif + ret = __os_openhandle(env, name, oflags, mode, &fhp); + if (ret != 0) + return (ret); + + if (LF_ISSET(DB_OSO_REGION)) + F_SET(fhp, DB_FH_REGION); +#ifdef HAVE_FCHMOD + /* + * If the code using Berkeley DB is a library, that code may not be able + * to control the application's umask value. Allow applications to set + * absolute file modes. We can't fix the race between file creation and + * the fchmod call -- we can't modify the process' umask here since the + * process may be multi-threaded and the umask value is per-process, not + * per-thread. + */ + if (LF_ISSET(DB_OSO_CREATE) && LF_ISSET(DB_OSO_ABSMODE)) + (void)fchmod(fhp->fd, mode); +#endif + +#ifdef O_DSYNC + /* + * If we can configure the file descriptor to flush on write, the + * file descriptor does not need to be explicitly sync'd. + */ + if (LF_ISSET(DB_OSO_DSYNC)) + F_SET(fhp, DB_FH_NOSYNC); +#endif + +#if defined(HAVE_DIRECTIO) && defined(DIRECTIO_ON) + /* + * The Solaris C library includes directio, but you have to set special + * compile flags to #define DIRECTIO_ON. Require both in order to call + * directio. + */ + if (LF_ISSET(DB_OSO_DIRECT)) + (void)directio(fhp->fd, DIRECTIO_ON); +#endif + + /* + * Delete any temporary file. + * + * !!! + * There's a race here, where we've created a file and we crash before + * we can unlink it. Temporary files aren't common in DB, regardless, + * it's not a security problem because the file is empty. There's no + * reasonable way to avoid the race (playing signal games isn't worth + * the portability nightmare), so we just live with it. + */ + if (LF_ISSET(DB_OSO_TEMP)) { +#if defined(HAVE_UNLINK_WITH_OPEN_FAILURE) || defined(CONFIG_TEST) + F_SET(fhp, DB_FH_UNLINK); +#else + (void)__os_unlink(env, name, 0); +#endif + } + + *fhpp = fhp; + return (0); +} diff --git a/src/os/os_pid.c b/src/os/os_pid.c new file mode 100644 index 00000000..742534cd --- /dev/null +++ b/src/os/os_pid.c @@ -0,0 +1,57 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_id -- + * Return the current process ID. + * + * PUBLIC: void __os_id __P((DB_ENV *, pid_t *, db_threadid_t*)); + */ +void +__os_id(dbenv, pidp, tidp) + DB_ENV *dbenv; + pid_t *pidp; + db_threadid_t *tidp; +{ + /* + * We can't depend on dbenv not being NULL, this routine is called + * from places where there's no DB_ENV handle. + * + * We cache the pid in the ENV handle, getting the process ID is a + * fairly slow call on lots of systems. + */ + if (pidp != NULL) { + if (dbenv == NULL) { +#if defined(HAVE_VXWORKS) + *pidp = taskIdSelf(); +#else + *pidp = getpid(); +#endif + } else + *pidp = dbenv->env->pid_cache; + } + + if (tidp != NULL) { +#if defined(DB_WIN32) + *tidp = GetCurrentThreadId(); +#elif defined(HAVE_MUTEX_UI_THREADS) + *tidp = thr_self(); +#elif defined(HAVE_PTHREAD_SELF) + *tidp = pthread_self(); +#else + /* + * Default to just getpid. + */ + *tidp = 0; +#endif + } +} diff --git a/src/os/os_rename.c b/src/os/os_rename.c new file mode 100644 index 00000000..e56c32ac --- /dev/null +++ b/src/os/os_rename.c @@ -0,0 +1,53 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_rename -- + * Rename a file. + * + * PUBLIC: int __os_rename __P((ENV *, + * PUBLIC: const char *, const char *, u_int32_t)); + */ +int +__os_rename(env, oldname, newname, silent) + ENV *env; + const char *oldname, *newname; + u_int32_t silent; +{ + DB_ENV *dbenv; + int ret; + + dbenv = env == NULL ? NULL : env->dbenv; + if (dbenv != NULL && + FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0168", "fileops: rename %s to %s", + "%s %s"), oldname, newname); + + LAST_PANIC_CHECK_BEFORE_IO(env); + + if (DB_GLOBAL(j_rename) != NULL) + ret = DB_GLOBAL(j_rename)(oldname, newname); + else + RETRY_CHK((rename(oldname, newname)), ret); + + /* + * If "silent" is not set, then errors are OK and we should not output + * an error message. + */ + if (ret != 0) { + if (!silent) + __db_syserr(env, ret, DB_STR_A("0169", + "rename %s %s", "%s %s"), oldname, newname); + ret = __os_posix_err(ret); + } + return (ret); +} diff --git a/src/os/os_root.c b/src/os/os_root.c new file mode 100644 index 00000000..b1e48f4d --- /dev/null +++ b/src/os/os_root.c @@ -0,0 +1,27 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_isroot -- + * Return if user has special permissions. + * + * PUBLIC: int __os_isroot __P((void)); + */ +int +__os_isroot() +{ +#ifdef HAVE_GETUID + return (getuid() == 0); +#else + return (0); +#endif +} diff --git a/src/os/os_rpath.c b/src/os/os_rpath.c new file mode 100644 index 00000000..264a82b8 --- /dev/null +++ b/src/os/os_rpath.c @@ -0,0 +1,36 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __db_rpath -- + * Return the last path separator in the path or NULL if none found. + * + * PUBLIC: char *__db_rpath __P((const char *)); + */ +char * +__db_rpath(path) + const char *path; +{ + const char *s, *last; + + s = path; + last = NULL; + if (PATH_SEPARATOR[1] != '\0') { + for (; s[0] != '\0'; ++s) + if (strchr(PATH_SEPARATOR, s[0]) != NULL) + last = s; + } else + for (; s[0] != '\0'; ++s) + if (s[0] == PATH_SEPARATOR[0]) + last = s; + return ((char *)last); +} diff --git a/src/os/os_rw.c b/src/os/os_rw.c new file mode 100644 index 00000000..b786344f --- /dev/null +++ b/src/os/os_rw.c @@ -0,0 +1,291 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_io -- + * Do an I/O. + * + * PUBLIC: int __os_io __P((ENV *, int, DB_FH *, db_pgno_t, + * PUBLIC: u_int32_t, u_int32_t, u_int32_t, u_int8_t *, size_t *)); + */ +int +__os_io(env, op, fhp, pgno, pgsize, relative, io_len, buf, niop) + ENV *env; + int op; + DB_FH *fhp; + db_pgno_t pgno; + u_int32_t pgsize, relative, io_len; + u_int8_t *buf; + size_t *niop; +{ +#if defined(HAVE_PREAD) && defined(HAVE_PWRITE) + DB_ENV *dbenv; + off_t offset; + ssize_t nio; +#endif + int ret; + + /* + * Check for illegal usage. + * + * This routine is used in one of two ways: reading bytes from an + * absolute offset and reading a specific database page. All of + * our absolute offsets are known to fit into a u_int32_t, while + * our database pages might be at offsets larger than a u_int32_t. + */ + DB_ASSERT(env, F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1); + DB_ASSERT(env, (pgno == 0 && pgsize == 0) || relative == 0); + +#if defined(HAVE_PREAD) && defined(HAVE_PWRITE) + dbenv = env == NULL ? NULL : env->dbenv; + + if ((offset = relative) == 0) + offset = (off_t)pgno * pgsize; + switch (op) { + case DB_IO_READ: + if (DB_GLOBAL(j_read) != NULL) + goto slow; +#if defined(HAVE_STATISTICS) + ++fhp->read_count; +#endif + if (dbenv != NULL && + FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0130", + "fileops: read %s: %lu bytes at offset %lu", + "%s %lu %lu"), fhp->name, (u_long)io_len, + (u_long)offset); + + LAST_PANIC_CHECK_BEFORE_IO(env); + nio = DB_GLOBAL(j_pread) != NULL ? + DB_GLOBAL(j_pread)(fhp->fd, buf, io_len, offset) : + pread(fhp->fd, buf, io_len, offset); + break; + case DB_IO_WRITE: + if (DB_GLOBAL(j_write) != NULL) + goto slow; +#ifdef HAVE_FILESYSTEM_NOTZERO + if (__os_fs_notzero()) + goto slow; +#endif +#if defined(HAVE_STATISTICS) + ++fhp->write_count; +#endif + if (dbenv != NULL && + FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0131", + "fileops: write %s: %lu bytes at offset %lu", + "%s %lu %lu"), fhp->name, (u_long)io_len, + (u_long)offset); + + LAST_PANIC_CHECK_BEFORE_IO(env); + nio = DB_GLOBAL(j_pwrite) != NULL ? + DB_GLOBAL(j_pwrite)(fhp->fd, buf, io_len, offset) : + pwrite(fhp->fd, buf, io_len, offset); + break; + default: + return (EINVAL); + } + if (nio == (ssize_t)io_len) { + *niop = io_len; + return (0); + } +slow: +#endif + MUTEX_LOCK(env, fhp->mtx_fh); + + if ((ret = __os_seek(env, fhp, pgno, pgsize, relative)) != 0) + goto err; + switch (op) { + case DB_IO_READ: + ret = __os_read(env, fhp, buf, io_len, niop); + break; + case DB_IO_WRITE: + ret = __os_write(env, fhp, buf, io_len, niop); + break; + default: + ret = EINVAL; + break; + } + +err: MUTEX_UNLOCK(env, fhp->mtx_fh); + + return (ret); + +} + +/* + * __os_read -- + * Read from a file handle. + * + * PUBLIC: int __os_read __P((ENV *, DB_FH *, void *, size_t, size_t *)); + */ +int +__os_read(env, fhp, addr, len, nrp) + ENV *env; + DB_FH *fhp; + void *addr; + size_t len; + size_t *nrp; +{ + DB_ENV *dbenv; + size_t offset; + ssize_t nr; + int ret; + u_int8_t *taddr; + + dbenv = env == NULL ? NULL : env->dbenv; + ret = 0; + + DB_ASSERT(env, F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1); + +#if defined(HAVE_STATISTICS) + ++fhp->read_count; +#endif + if (dbenv != NULL && FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0132", + "fileops: read %s: %lu bytes", "%s %lu"), + fhp->name, (u_long)len); + + if (DB_GLOBAL(j_read) != NULL) { + *nrp = len; + LAST_PANIC_CHECK_BEFORE_IO(env); + if (DB_GLOBAL(j_read)(fhp->fd, addr, len) != (ssize_t)len) { + ret = __os_get_syserr(); + __db_syserr(env, ret, DB_STR_A("0133", + "read: %#lx, %lu", "%#lx %lu"), + P_TO_ULONG(addr), (u_long)len); + ret = __os_posix_err(ret); + } + return (ret); + } + + for (taddr = addr, offset = 0; + offset < len; taddr += nr, offset += (u_int32_t)nr) { + LAST_PANIC_CHECK_BEFORE_IO(env); + RETRY_CHK(((nr = read(fhp->fd, + CHAR_STAR_CAST taddr, len - offset)) < 0 ? 1 : 0), ret); + if (nr == 0 || ret != 0) + break; + } + *nrp = (size_t)(taddr - (u_int8_t *)addr); + if (ret != 0) { + __db_syserr(env, ret, DB_STR_A("0134", + "read: %#lx, %lu", "%#lx %lu"), + P_TO_ULONG(taddr), (u_long)len - offset); + ret = __os_posix_err(ret); + } + return (ret); +} + +/* + * __os_write -- + * Write to a file handle. + * + * PUBLIC: int __os_write __P((ENV *, DB_FH *, void *, size_t, size_t *)); + */ +int +__os_write(env, fhp, addr, len, nwp) + ENV *env; + DB_FH *fhp; + void *addr; + size_t len; + size_t *nwp; +{ + DB_ASSERT(env, F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1); + +#ifdef HAVE_FILESYSTEM_NOTZERO + /* Zero-fill as necessary. */ + if (__os_fs_notzero()) { + int ret; + if ((ret = __db_zero_fill(env, fhp)) != 0) + return (ret); + } +#endif + return (__os_physwrite(env, fhp, addr, len, nwp)); +} + +/* + * __os_physwrite -- + * Physical write to a file handle. + * + * PUBLIC: int __os_physwrite + * PUBLIC: __P((ENV *, DB_FH *, void *, size_t, size_t *)); + */ +int +__os_physwrite(env, fhp, addr, len, nwp) + ENV *env; + DB_FH *fhp; + void *addr; + size_t len; + size_t *nwp; +{ + DB_ENV *dbenv; + size_t offset; + ssize_t nw; + int ret; + u_int8_t *taddr; + + dbenv = env == NULL ? NULL : env->dbenv; + ret = 0; + +#if defined(HAVE_STATISTICS) + ++fhp->write_count; +#endif + if (dbenv != NULL && FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0135", + "fileops: write %s: %lu bytes", "%s %lu"), + fhp->name, (u_long)len); + +#if defined(HAVE_FILESYSTEM_NOTZERO) && defined(DIAGNOSTIC) + if (__os_fs_notzero()) { + struct stat sb; + off_t cur_off; + + DB_ASSERT(env, fstat(fhp->fd, &sb) != -1 && + (cur_off = lseek(fhp->fd, (off_t)0, SEEK_CUR)) != -1 && + cur_off <= sb.st_size); + } +#endif + if (DB_GLOBAL(j_write) != NULL) { + *nwp = len; + LAST_PANIC_CHECK_BEFORE_IO(env); + if (DB_GLOBAL(j_write)(fhp->fd, addr, len) != (ssize_t)len) { + ret = __os_get_syserr(); + __db_syserr(env, ret, DB_STR_A("0136", + "write: %#lx, %lu", "%#lx %lu"), + P_TO_ULONG(addr), (u_long)len); + ret = __os_posix_err(ret); + + DB_EVENT(env, DB_EVENT_WRITE_FAILED, NULL); + } + return (ret); + } + + for (taddr = addr, offset = 0; + offset < len; taddr += nw, offset += (u_int32_t)nw) { + LAST_PANIC_CHECK_BEFORE_IO(env); + RETRY_CHK(((nw = write(fhp->fd, + CHAR_STAR_CAST taddr, len - offset)) < 0 ? 1 : 0), ret); + if (ret != 0) + break; + } + *nwp = len; + if (ret != 0) { + __db_syserr(env, ret, DB_STR_A("0137", + "write: %#lx, %lu", "%#lx %lu"), + P_TO_ULONG(taddr), (u_long)len - offset); + ret = __os_posix_err(ret); + + DB_EVENT(env, DB_EVENT_WRITE_FAILED, NULL); + } + return (ret); +} diff --git a/src/os/os_seek.c b/src/os/os_seek.c new file mode 100644 index 00000000..eef9e3d8 --- /dev/null +++ b/src/os/os_seek.c @@ -0,0 +1,66 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_seek -- + * Seek to a page/byte offset in the file. + * + * PUBLIC: int __os_seek __P((ENV *, + * PUBLIC: DB_FH *, db_pgno_t, u_int32_t, off_t)); + */ +int +__os_seek(env, fhp, pgno, pgsize, relative) + ENV *env; + DB_FH *fhp; + db_pgno_t pgno; + u_int32_t pgsize; + off_t relative; +{ + DB_ENV *dbenv; + off_t offset; + int ret; + + dbenv = env == NULL ? NULL : env->dbenv; + + DB_ASSERT(env, F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1); + +#if defined(HAVE_STATISTICS) + ++fhp->seek_count; +#endif + + offset = (off_t)pgsize * pgno + relative; + + if (dbenv != NULL && FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0170", + "fileops: seek %s to %lu", "%s %lu"), + fhp->name, (u_long)offset); + + if (DB_GLOBAL(j_seek) != NULL) + ret = DB_GLOBAL(j_seek)(fhp->fd, offset, SEEK_SET); + else + RETRY_CHK((lseek( + fhp->fd, offset, SEEK_SET) == -1 ? 1 : 0), ret); + + if (ret == 0) { + fhp->pgsize = pgsize; + fhp->pgno = pgno; + fhp->offset = relative; + } else { + __db_syserr(env, ret, DB_STR_A("0171", + "seek: %lu: (%lu * %lu) + %lu", "%lu %lu %lu %lu"), + (u_long)offset, (u_long)pgno, (u_long)pgsize, + (u_long)relative); + ret = __os_posix_err(ret); + } + + return (ret); +} diff --git a/src/os/os_stack.c b/src/os/os_stack.c new file mode 100644 index 00000000..f94f1071 --- /dev/null +++ b/src/os/os_stack.c @@ -0,0 +1,45 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#if defined(HAVE_SYSTEM_INCLUDE_FILES) && defined(HAVE_BACKTRACE) && \ + defined(HAVE_BACKTRACE_SYMBOLS) && defined(HAVE_EXECINFO_H) +#include +#endif + +/* + * __os_stack -- + * Output a stack trace to the message file handle. + * + * PUBLIC: void __os_stack __P((ENV *)); + */ +void +__os_stack(env) + ENV *env; +{ +#if defined(HAVE_BACKTRACE) && defined(HAVE_BACKTRACE_SYMBOLS) + void *array[200]; + size_t i, size; + char **strings; + + /* + * Solaris and the GNU C library support this interface. Solaris + * has additional interfaces (printstack and walkcontext), I don't + * know if they offer any additional value or not. + */ + size = backtrace(array, sizeof(array) / sizeof(array[0])); + strings = backtrace_symbols(array, size); + + for (i = 0; i < size; ++i) + __db_errx(env, "%s", strings[i]); + free(strings); +#endif + COMPQUIET(env, NULL); +} diff --git a/src/os/os_stat.c b/src/os/os_stat.c new file mode 100644 index 00000000..e22e0163 --- /dev/null +++ b/src/os/os_stat.c @@ -0,0 +1,108 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_exists -- + * Return if the file exists. + * + * PUBLIC: int __os_exists __P((ENV *, const char *, int *)); + */ +int +__os_exists(env, path, isdirp) + ENV *env; + const char *path; + int *isdirp; +{ + DB_ENV *dbenv; + struct stat sb; + int ret; + + dbenv = env == NULL ? NULL : env->dbenv; + + if (dbenv != NULL && + FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0165", + "fileops: stat %s", "%s"), path); + + if (DB_GLOBAL(j_exists) != NULL) + return (DB_GLOBAL(j_exists)(path, isdirp)); + + RETRY_CHK((stat(CHAR_STAR_CAST path, &sb)), ret); + if (ret != 0) + return (__os_posix_err(ret)); + +#if !defined(S_ISDIR) || defined(STAT_MACROS_BROKEN) +#undef S_ISDIR +#ifdef _S_IFDIR +#define S_ISDIR(m) (_S_IFDIR & (m)) +#else +#define S_ISDIR(m) (((m) & 0170000) == 0040000) +#endif +#endif + if (isdirp != NULL) + *isdirp = S_ISDIR(sb.st_mode); + + return (0); +} + +/* + * __os_ioinfo -- + * Return file size and I/O size; abstracted to make it easier + * to replace. + * + * PUBLIC: int __os_ioinfo __P((ENV *, const char *, + * PUBLIC: DB_FH *, u_int32_t *, u_int32_t *, u_int32_t *)); + */ +int +__os_ioinfo(env, path, fhp, mbytesp, bytesp, iosizep) + ENV *env; + const char *path; + DB_FH *fhp; + u_int32_t *mbytesp, *bytesp, *iosizep; +{ + struct stat sb; + int ret; + + if (DB_GLOBAL(j_ioinfo) != NULL) + return (DB_GLOBAL(j_ioinfo)(path, + fhp->fd, mbytesp, bytesp, iosizep)); + + DB_ASSERT(env, F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1); + + RETRY_CHK((fstat(fhp->fd, &sb)), ret); + if (ret != 0) { + __db_syserr(env, ret, DB_STR("0166", "fstat")); + return (__os_posix_err(ret)); + } + + /* Return the size of the file. */ + if (mbytesp != NULL) + *mbytesp = (u_int32_t)(sb.st_size / MEGABYTE); + if (bytesp != NULL) + *bytesp = (u_int32_t)(sb.st_size % MEGABYTE); + + /* + * Return the underlying filesystem I/O size, if available. + * + * XXX + * Check for a 0 size -- the HP MPE/iX architecture has st_blksize, + * but it's always 0. + */ +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE + if (iosizep != NULL && (*iosizep = sb.st_blksize) == 0) + *iosizep = DB_DEF_IOSIZE; +#else + if (iosizep != NULL) + *iosizep = DB_DEF_IOSIZE; +#endif + return (0); +} diff --git a/src/os/os_tmpdir.c b/src/os/os_tmpdir.c new file mode 100644 index 00000000..c9b6742e --- /dev/null +++ b/src/os/os_tmpdir.c @@ -0,0 +1,141 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#ifdef macintosh +#include +#endif +#endif + +/* + * __os_tmpdir -- + * Set the temporary directory path. + * + * The order of items in the list structure and the order of checks in + * the environment are documented. + * + * PUBLIC: int __os_tmpdir __P((ENV *, u_int32_t)); + */ +int +__os_tmpdir(env, flags) + ENV *env; + u_int32_t flags; +{ + DB_ENV *dbenv; + int isdir, ret; + char *tdir, tdir_buf[DB_MAXPATHLEN]; + + dbenv = env->dbenv; + + /* Use the environment if it's permitted and initialized. */ + if (LF_ISSET(DB_USE_ENVIRON) || + (LF_ISSET(DB_USE_ENVIRON_ROOT) && __os_isroot())) { + /* POSIX: TMPDIR */ + tdir = tdir_buf; + if ((ret = __os_getenv( + env, "TMPDIR", &tdir, sizeof(tdir_buf))) != 0) + return (ret); + if (tdir != NULL && tdir[0] != '\0') + goto found; + + /* + * Windows: TEMP, TMP + */ + tdir = tdir_buf; + if ((ret = __os_getenv( + env, "TEMP", &tdir, sizeof(tdir_buf))) != 0) + return (ret); + if (tdir != NULL && tdir[0] != '\0') + goto found; + + tdir = tdir_buf; + if ((ret = __os_getenv( + env, "TMP", &tdir, sizeof(tdir_buf))) != 0) + return (ret); + if (tdir != NULL && tdir[0] != '\0') + goto found; + + /* Macintosh */ + tdir = tdir_buf; + if ((ret = __os_getenv( + env, "TempFolder", &tdir, sizeof(tdir_buf))) != 0) + return (ret); + + if (tdir != NULL && tdir[0] != '\0') +found: return (__os_strdup(env, tdir, &dbenv->db_tmp_dir)); + } + +#ifdef macintosh + /* Get the path to the temporary folder. */ + {FSSpec spec; + + if (!Special2FSSpec(kTemporaryFolderType, + kOnSystemDisk, 0, &spec)) + return (__os_strdup(env, + FSp2FullPath(&spec), &dbenv->db_tmp_dir)); + } +#endif +#ifdef DB_WIN32 + /* Get the path to the temporary directory. */ + { + _TCHAR tpath[DB_MAXPATHLEN + 1]; + char *path, *eos; + + if (GetTempPath(DB_MAXPATHLEN, tpath) > 2) { + FROM_TSTRING(env, tpath, path, ret); + if (ret != 0) + return (ret); + + eos = path + strlen(path) - 1; + if (*eos == '\\' || *eos == '/') + *eos = '\0'; + if (__os_exists(env, path, &isdir) == 0 && isdir) { + ret = __os_strdup(env, + path, &dbenv->db_tmp_dir); + FREE_STRING(env, path); + return (ret); + } + FREE_STRING(env, path); + } + } +#endif + + /* + * Step through the static list looking for a possibility. + * + * We don't use the obvious data structure because some C compilers + * (and I use the phrase loosely) don't like static data arrays. + */ +#define DB_TEMP_DIRECTORY(n) { \ + char *__p = n; \ + if (__os_exists(env, __p, &isdir) == 0 && isdir != 0) \ + return (__os_strdup(env, __p, &dbenv->db_tmp_dir)); \ + } +#ifdef DB_WIN32 + DB_TEMP_DIRECTORY("/temp"); + DB_TEMP_DIRECTORY("C:/temp"); + DB_TEMP_DIRECTORY("C:/tmp"); +#else + DB_TEMP_DIRECTORY("/var/tmp"); + DB_TEMP_DIRECTORY("/usr/tmp"); + DB_TEMP_DIRECTORY("/tmp"); +#if defined(ANDROID) || defined(DB_ANDROID) + DB_TEMP_DIRECTORY("/cache"); +#endif +#endif + + /* + * If we don't have any other place to store temporary files, store + * them in the current directory. + */ + return (__os_strdup(env, "", &dbenv->db_tmp_dir)); +} diff --git a/src/os/os_truncate.c b/src/os/os_truncate.c new file mode 100644 index 00000000..a37e787c --- /dev/null +++ b/src/os/os_truncate.c @@ -0,0 +1,63 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_truncate -- + * Truncate the file. + * + * PUBLIC: int __os_truncate __P((ENV *, DB_FH *, db_pgno_t, u_int32_t)); + */ +int +__os_truncate(env, fhp, pgno, pgsize) + ENV *env; + DB_FH *fhp; + db_pgno_t pgno; + u_int32_t pgsize; +{ + DB_ENV *dbenv; + off_t offset; + int ret; + + dbenv = env == NULL ? NULL : env->dbenv; + + /* + * Truncate a file so that "pgno" is discarded from the end of the + * file. + */ + offset = (off_t)pgsize * pgno; + + if (dbenv != NULL && + FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0141", + "fileops: truncate %s to %lu", "%s %lu"), + fhp->name, (u_long)offset); + + LAST_PANIC_CHECK_BEFORE_IO(env); + + if (DB_GLOBAL(j_ftruncate) != NULL) + ret = DB_GLOBAL(j_ftruncate)(fhp->fd, offset); + else { +#ifdef HAVE_FTRUNCATE + RETRY_CHK((ftruncate(fhp->fd, offset)), ret); +#else + ret = DB_OPNOTSUP; +#endif + } + + if (ret != 0) { + __db_syserr(env, ret, DB_STR_A("0142", + "ftruncate: %lu", "%lu"), (u_long)offset); + ret = __os_posix_err(ret); + } + + return (ret); +} diff --git a/src/os/os_uid.c b/src/os/os_uid.c new file mode 100644 index 00000000..5ab6c34c --- /dev/null +++ b/src/os/os_uid.c @@ -0,0 +1,55 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_unique_id -- + * Return a unique 32-bit value. + * + * PUBLIC: void __os_unique_id __P((ENV *, u_int32_t *)); + */ +void +__os_unique_id(env, idp) + ENV *env; + u_int32_t *idp; +{ + DB_ENV *dbenv; + db_timespec v; + pid_t pid; + u_int32_t id; + + *idp = 0; + + dbenv = env == NULL ? NULL : env->dbenv; + + /* + * Our randomized value is comprised of our process ID, the current + * time of day and a stack address, all XOR'd together. + */ + __os_id(dbenv, &pid, NULL); + __os_gettime(env, &v, 1); + + id = (u_int32_t)pid ^ + (u_int32_t)v.tv_sec ^ (u_int32_t)v.tv_nsec ^ P_TO_UINT32(&pid); + + /* + * We could try and find a reasonable random-number generator, but + * that's not all that easy to do. Seed and use srand()/rand(), if + * we can find them. + */ + if (DB_GLOBAL(uid_init) == 0) { + DB_GLOBAL(uid_init) = 1; + srand((u_int)id); + } + id ^= (u_int)rand(); + + *idp = id; +} diff --git a/src/os/os_unlink.c b/src/os/os_unlink.c new file mode 100644 index 00000000..be262fe1 --- /dev/null +++ b/src/os/os_unlink.c @@ -0,0 +1,80 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_unlink -- + * Remove a file. + * + * PUBLIC: int __os_unlink __P((ENV *, const char *, int)); + */ +int +__os_unlink(env, path, overwrite_test) + ENV *env; + const char *path; + int overwrite_test; +{ + DB_ENV *dbenv; + int ret, t_ret; + + dbenv = env == NULL ? NULL : env->dbenv; + + if (dbenv != NULL && + FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("0160", "fileops: unlink %s", + "%s"), path); + + /* Optionally overwrite the contents of the file to enhance security. */ + if (dbenv != NULL && overwrite_test && F_ISSET(dbenv, DB_ENV_OVERWRITE)) + (void)__db_file_multi_write(env, path); + + LAST_PANIC_CHECK_BEFORE_IO(env); + + if (DB_GLOBAL(j_unlink) != NULL) + ret = DB_GLOBAL(j_unlink)(path); + else { + RETRY_CHK((unlink(CHAR_STAR_CAST path)), ret); +#ifdef HAVE_QNX + /* + * The file may be a region file created by shm_open, not a + * regular file. Try and delete using unlink, and if that + * fails for an unexpected reason, try a shared memory unlink. + */ + if (ret != 0 && __os_posix_err(ret) != ENOENT) + RETRY_CHK((shm_unlink(path)), ret); +#endif + } + + /* + * !!! + * The results of unlink are file system driver specific on VxWorks. + * In the case of removing a file that did not exist, some, at least, + * return an error, but with an errno of 0, not ENOENT. We do not + * have to test for that explicitly, the RETRY_CHK macro resets "ret" + * to be the errno, and so we'll just slide right on through. + * + * XXX + * We shouldn't be testing for an errno of ENOENT here, but ENOENT + * signals that a file is missing, and we attempt to unlink things + * (such as v. 2.x environment regions, in ENV->remove) that we + * are expecting not to be there. Reporting errors in these cases + * is annoying. + */ + if (ret != 0) { + t_ret = __os_posix_err(ret); + if (t_ret != ENOENT) + __db_syserr(env, ret, DB_STR_A("0161", + "unlink: %s", "%s"), path); + ret = t_ret; + } + + return (ret); +} diff --git a/src/os/os_yield.c b/src/os/os_yield.c new file mode 100644 index 00000000..ee86bd3c --- /dev/null +++ b/src/os/os_yield.c @@ -0,0 +1,95 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#if defined(HAVE_SYSTEM_INCLUDE_FILES) && defined(HAVE_SCHED_YIELD) +#include +#endif + +static void __os_sleep __P((ENV *, u_long, u_long)); + +/* + * __os_yield -- + * Yield the processor, optionally pausing until running again. + * + * PUBLIC: void __os_yield __P((ENV *, u_long, u_long)); + */ +void +__os_yield(env, secs, usecs) + ENV *env; + u_long secs, usecs; /* Seconds and microseconds. */ +{ + /* + * Don't require the values be normalized (some operating systems + * return an error if the usecs argument to select is too large). + */ + for (; usecs >= US_PER_SEC; usecs -= US_PER_SEC) + ++secs; + + if (DB_GLOBAL(j_yield) != NULL) { + (void)DB_GLOBAL(j_yield)(secs, usecs); + return; + } + + /* + * Yield the processor so other processes or threads can run. Use + * the local yield call if not pausing, otherwise call the select + * function. + */ + if (secs != 0 || usecs != 0) + __os_sleep(env, secs, usecs); + else { +#if defined(HAVE_MUTEX_UI_THREADS) + thr_yield(); +#elif defined(HAVE_PTHREAD_YIELD) + pthread_yield(); +#elif defined(HAVE_SCHED_YIELD) + (void)sched_yield(); +#elif defined(HAVE_YIELD) + yield(); +#else + __os_sleep(env, 0, 0); +#endif + } +} + +/* + * __os_sleep -- + * Pause the thread of control. + */ +static void +__os_sleep(env, secs, usecs) + ENV *env; + u_long secs, usecs; /* Seconds and microseconds. */ +{ + struct timeval t; + int ret; + + /* + * Sheer raving paranoia -- don't select for 0 time, in case some + * implementation doesn't yield the processor in that case. + */ + t.tv_sec = (long)secs; + t.tv_usec = (long)usecs + 1; + + /* + * We don't catch interrupts and restart the system call here, unlike + * other Berkeley DB system calls. This may be a user attempting to + * interrupt a sleeping DB utility (for example, db_checkpoint), and + * we want the utility to see the signal and quit. This assumes it's + * always OK for DB to sleep for less time than originally scheduled. + */ + if (select(0, NULL, NULL, NULL, &t) == -1) { + ret = __os_get_syserr(); + if (__os_posix_err(ret) != EINTR) + __db_syserr(env, ret, DB_STR("0167", "select")); + } +} diff --git a/src/qam/qam.c b/src/qam/qam.c new file mode 100644 index 00000000..17911878 --- /dev/null +++ b/src/qam/qam.c @@ -0,0 +1,1760 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" + +static int __qam_bulk __P((DBC *, DBT *, u_int32_t)); +static int __qamc_close __P((DBC *, db_pgno_t, int *)); +static int __qamc_del __P((DBC *, u_int32_t)); +static int __qamc_destroy __P((DBC *)); +static int __qamc_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); +static int __qamc_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); +static int __qam_consume __P((DBC *, QMETA *, db_recno_t)); +static int __qam_getno __P((DB *, const DBT *, db_recno_t *)); + +#define DONT_NEED_LOCKS(dbc) ((dbc)->txn == NULL || \ + F_ISSET(dbc, DBC_READ_COMMITTED | DBC_READ_UNCOMMITTED)) + +/* + * __qam_position -- + * Position a queued access method cursor at a record. This returns + * the page locked. *exactp will be set if the record is valid. + * PUBLIC: int __qam_position + * PUBLIC: __P((DBC *, db_recno_t *, u_int32_t, int *)); + */ +int +__qam_position(dbc, recnop, get_mode, exactp) + DBC *dbc; /* open cursor */ + db_recno_t *recnop; /* pointer to recno to find */ + u_int32_t get_mode; /* flags to __memp_fget */ + int *exactp; /* indicate if it was found */ +{ + DB *dbp; + QAMDATA *qp; + QUEUE_CURSOR *cp; + db_pgno_t pg; + int ret; + + dbp = dbc->dbp; + cp = (QUEUE_CURSOR *)dbc->internal; + + /* Fetch the page for this recno. */ + cp->pgno = pg = QAM_RECNO_PAGE(dbp, *recnop); + + cp->page = NULL; + *exactp = 0; + if ((ret = __qam_fget(dbc, &pg, get_mode, &cp->page)) != 0) { + if (!FLD_ISSET(get_mode, DB_MPOOL_CREATE) && + (ret == DB_PAGE_NOTFOUND || ret == ENOENT)) + ret = 0; + return (ret); + } + cp->indx = QAM_RECNO_INDEX(dbp, pg, *recnop); + + if (PGNO(cp->page) == 0) { + /* + * We have read an uninitialized page: set the page number if + * we're creating the page. Otherwise, we know that the record + * doesn't exist yet. + */ + if (!FLD_ISSET(get_mode, DB_MPOOL_CREATE)) { + *exactp = 0; + return (0); + } + DB_ASSERT(dbp->env, FLD_ISSET(get_mode, DB_MPOOL_CREATE)); + PGNO(cp->page) = pg; + TYPE(cp->page) = P_QAMDATA; + } + + qp = QAM_GET_RECORD(dbp, cp->page, cp->indx); + *exactp = F_ISSET(qp, QAM_VALID) ? 1 : 0; + + return (ret); +} + +/* + * __qam_pitem -- + * Put an item on a queue page. Copy the data to the page and set the + * VALID and SET bits. If logging and the record was previously set, + * log that data, otherwise just log the new data. + * + * pagep must be write locked + * + * PUBLIC: int __qam_pitem + * PUBLIC: __P((DBC *, QPAGE *, u_int32_t, db_recno_t, DBT *)); + */ +int +__qam_pitem(dbc, pagep, indx, recno, data) + DBC *dbc; + QPAGE *pagep; + u_int32_t indx; + db_recno_t recno; + DBT *data; +{ + DB *dbp; + DBT olddata, pdata, *datap; + ENV *env; + QAMDATA *qp; + QUEUE *t; + u_int8_t *dest, *p; + int allocated, ret; + + dbp = dbc->dbp; + env = dbp->env; + t = (QUEUE *)dbp->q_internal; + allocated = ret = 0; + + if (data->size > t->re_len) + return (__db_rec_toobig(env, data->size, t->re_len)); + qp = QAM_GET_RECORD(dbp, pagep, indx); + + p = qp->data; + datap = data; + if (F_ISSET(data, DB_DBT_PARTIAL)) { + if (data->doff + data->dlen > t->re_len) { + __db_errx(env, DB_STR_A("1142", +"Record length error: data offset plus length larger than record size of %lu", + "%s %lu"), (u_long)t->re_len); + return (EINVAL); + } + + if (data->size != data->dlen) + return (__db_rec_repl(env, data->size, data->dlen)); + + if (data->size == t->re_len) + goto no_partial; + + /* + * If we are logging, then we have to build the record + * first, otherwise, we can simply drop the change + * directly on the page. After this clause, make + * sure that datap and p are set up correctly so that + * copying datap into p does the right thing. + * + * Note, I am changing this so that if the existing + * record is not valid, we create a complete record + * to log so that both this and the recovery code is simpler. + */ + + if (DBC_LOGGING(dbc) || !F_ISSET(qp, QAM_VALID)) { + datap = &pdata; + memset(datap, 0, sizeof(*datap)); + + if ((ret = __os_malloc(env, + t->re_len, &datap->data)) != 0) + return (ret); + allocated = 1; + datap->size = t->re_len; + + /* + * Construct the record if it's valid, otherwise set it + * all to the pad character. + */ + dest = datap->data; + if (F_ISSET(qp, QAM_VALID)) + memcpy(dest, p, t->re_len); + else + memset(dest, (int)t->re_pad, t->re_len); + + dest += data->doff; + memcpy(dest, data->data, data->size); + } else { + datap = data; + p += data->doff; + } + } + +no_partial: + if (DBC_LOGGING(dbc)) { + olddata.size = 0; + if (F_ISSET(qp, QAM_SET)) { + olddata.data = qp->data; + olddata.size = t->re_len; + } + if ((ret = __qam_add_log(dbp, dbc->txn, &LSN(pagep), + 0, &LSN(pagep), pagep->pgno, + indx, recno, datap, qp->flags, + olddata.size == 0 ? NULL : &olddata)) != 0) + goto err; + } else if (!F_ISSET((dbc), DBC_RECOVER)) + LSN_NOT_LOGGED(LSN(pagep)); + + F_SET(qp, QAM_VALID | QAM_SET); + memcpy(p, datap->data, datap->size); + if (!F_ISSET(data, DB_DBT_PARTIAL)) + memset(p + datap->size, + (int)t->re_pad, t->re_len - datap->size); + +err: if (allocated) + __os_free(env, datap->data); + + return (ret); +} +/* + * __qamc_put + * Cursor put for queued access method. + * BEFORE and AFTER cannot be specified. + */ +static int +__qamc_put(dbc, key, data, flags, pgnop) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; + db_pgno_t *pgnop; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + ENV *env; + QMETA *meta; + QUEUE_CURSOR *cp; + db_pgno_t metapg; + db_recno_t new_cur, new_first; + u_int32_t opcode; + int exact, ret, t_ret, writelock; + + dbp = dbc->dbp; + env = dbp->env; + mpf = dbp->mpf; + if (pgnop != NULL) + *pgnop = PGNO_INVALID; + + cp = (QUEUE_CURSOR *)dbc->internal; + + switch (flags) { + case DB_KEYFIRST: + case DB_KEYLAST: + case DB_NOOVERWRITE: + case DB_OVERWRITE_DUP: + if ((ret = __qam_getno(dbp, key, &cp->recno)) != 0) + return (ret); + /* FALLTHROUGH */ + case DB_CURRENT: + break; + default: + /* The interface shouldn't let anything else through. */ + return (__db_ferr(env, "DBC->put", 0)); + } + + /* Write lock the record. */ + if ((ret = __db_lget(dbc, LCK_COUPLE, + cp->recno, DB_LOCK_WRITE, DB_LOCK_RECORD, &cp->lock)) != 0) + return (ret); + + if ((ret = __qam_position(dbc, &cp->recno, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &exact)) != 0) { + /* We could not get the page, we can release the record lock. */ + (void)__LPUT(dbc, cp->lock); + return (ret); + } + + if (exact != 0 && flags == DB_NOOVERWRITE) + ret = DB_KEYEXIST; + else + /* Put the item on the page. */ + ret = __qam_pitem(dbc, + (QPAGE *)cp->page, cp->indx, cp->recno, data); + + if ((t_ret = __qam_fput(dbc, + cp->pgno, cp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + cp->page = NULL; + cp->lock_mode = DB_LOCK_WRITE; + if (ret != 0) + return (ret); + + /* Unlock the record if not in a transaction. */ + if ((ret = __TLPUT(dbc, cp->lock)) != 0) + return (ret); + + /* We may need to reset the head or tail of the queue. */ + metapg = ((QUEUE *)dbp->q_internal)->q_meta; + + writelock = 0; + if ((ret = __memp_fget(mpf, &metapg, + dbc->thread_info, dbc->txn, 0, &meta)) != 0) + return (ret); + + opcode = 0; + new_cur = new_first = 0; + + /* + * If the put address is outside the queue, adjust the head and + * tail of the queue. If the order is inverted we move + * the one which is closer. The first case is when the + * queue is empty, move first and current to where the new + * insert is. + */ + +recheck: + if (meta->first_recno == meta->cur_recno) { + new_first = cp->recno; + new_cur = cp->recno; + QAM_INC_RECNO(new_cur); + opcode |= QAM_SETFIRST; + opcode |= QAM_SETCUR; + } else { + if (QAM_BEFORE_FIRST(meta, cp->recno)) { + new_first = cp->recno; + opcode |= QAM_SETFIRST; + } + + if (QAM_AFTER_CURRENT(meta, cp->recno)) { + new_cur = cp->recno; + QAM_INC_RECNO(new_cur); + opcode |= QAM_SETCUR; + } + } + + if (opcode == 0) + goto done; + + /* Exclusive latch the metadata page. */ + if (writelock == 0 && (ret = __memp_dirty(mpf, &meta, + dbc->thread_info, dbc->txn, dbc->priority, DB_MPOOL_DIRTY)) != 0) + goto done; + if (writelock++ == 0) + goto recheck; + + if (DBC_LOGGING(dbc) && (ret = __qam_mvptr_log(dbp, dbc->txn, + &meta->dbmeta.lsn, 0, opcode, meta->first_recno, + new_first, meta->cur_recno, new_cur, + &meta->dbmeta.lsn, PGNO_BASE_MD)) != 0) + opcode = 0; + + if (opcode & QAM_SETCUR) + meta->cur_recno = new_cur; + if (opcode & QAM_SETFIRST) + meta->first_recno = new_first; + + QAM_WAKEUP(dbc, ret); + +done: if (meta != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __qam_append -- + * Perform a put(DB_APPEND) in queue. + * + * PUBLIC: int __qam_append __P((DBC *, DBT *, DBT *)); + */ +int +__qam_append(dbc, key, data) + DBC *dbc; + DBT *key, *data; +{ + DB *dbp; + DB_LOCK lock; + DB_MPOOLFILE *mpf; + QMETA *meta; + QPAGE *page; + QUEUE *qp; + QUEUE_CURSOR *cp; + db_pgno_t pg, metapg; + db_recno_t recno; + int ret, t_ret, waited; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (QUEUE_CURSOR *)dbc->internal; + LOCK_INIT(lock); + + /* Exclusive latch the meta page. */ + metapg = ((QUEUE *)dbp->q_internal)->q_meta; +again: if ((ret = __memp_fget(mpf, &metapg, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &meta)) != 0) + return (ret); + + /* Get the next record number. */ + recno = meta->cur_recno; + QAM_INC_RECNO(meta->cur_recno); + + if (meta->cur_recno == meta->first_recno) { + QAM_DEC_RECNO(meta->cur_recno); + ret = EFBIG; + goto err; + } + + if (QAM_BEFORE_FIRST(meta, recno)) + meta->first_recno = recno; + + /* Lock the record. */ + waited = 0; + ret = __db_lget(dbc, 0, recno, + DB_LOCK_WRITE, DB_LOCK_NOWAIT | DB_LOCK_RECORD, &lock); + + /* Release the meta page. */ + if ((t_ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + meta = NULL; + /* If we couldn't lock the record try again. */ + if (t_ret == 0 && + (ret == DB_LOCK_NOTGRANTED || ret == DB_LOCK_DEADLOCK)) { + waited = 1; + ret = __db_lget(dbc, 0, recno, + DB_LOCK_WRITE, DB_LOCK_RECORD, &lock); + } + + /* + * The application may modify the data based on the selected record + * number. We always want to call this even if we ultimately end + * up aborting, because we are allocating a record number, regardless. + */ + if (dbc->dbp->db_append_recno != NULL && + (t_ret = dbc->dbp->db_append_recno(dbc->dbp, data, recno)) != 0 && + ret == 0) + ret = t_ret; + + /* + * Capture errors from either the lock couple or the call to + * dbp->db_append_recno. + */ + if (ret != 0) + goto err; + + pg = QAM_RECNO_PAGE(dbp, recno); + + /* Fetch for write the data page. */ + if ((ret = __qam_fget(dbc, &pg, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &page)) != 0) + goto err; + + /* See if this is a new page. */ + if (page->pgno == 0) { + page->pgno = pg; + page->type = P_QAMDATA; + } else if (waited && F_ISSET(QAM_GET_RECORD( + dbp, page, QAM_RECNO_INDEX(dbp, pg, recno)), QAM_VALID)) { + /* The record is in use, try again. */ + if ((ret = __qam_fput(dbc, pg, page, dbc->priority)) != 0) + goto err; + if ((ret = __LPUT(dbc, lock)) != 0) + goto err; + goto again; + } + + cp->lock = lock; + cp->lock_mode = DB_LOCK_WRITE; + LOCK_INIT(lock); + + /* Put the item on the page and log it. */ + ret = __qam_pitem(dbc, page, + QAM_RECNO_INDEX(dbp, pg, recno), recno, data); + + if ((t_ret = __qam_fput(dbc, + pg, page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + /* Return the record number to the user. */ + if (ret == 0 && key != NULL) + ret = __db_retcopy(dbp->env, key, + &recno, sizeof(recno), &dbc->rkey->data, &dbc->rkey->ulen); + + /* Position the cursor on this record. */ + cp->recno = recno; + + /* See if we are leaving the extent. */ + qp = (QUEUE *) dbp->q_internal; + if (qp->page_ext != 0 && + (recno % (qp->page_ext * qp->rec_page) == 0 || + recno == UINT32_MAX)) { + if ((ret = __memp_fget(mpf, &metapg, + dbc->thread_info, dbc->txn, 0, &meta)) != 0) + goto err; + if (!QAM_AFTER_CURRENT(meta, recno)) + if ((ret = __qam_fclose(dbp, pg)) != 0) + goto err; + } + + QAM_WAKEUP(dbc, ret); + +err: /* Release the meta page. */ + if (meta != NULL && (t_ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __qamc_del -- + * Qam cursor->am_del function + */ +static int +__qamc_del(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + DB *dbp; + DBT data; + DB_MPOOLFILE *mpf; + PAGE *pagep; + QAMDATA *qp; + QMETA *meta; + QUEUE_CURSOR *cp; + db_pgno_t metapg; + db_recno_t first; + int exact, ret, t_ret; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (QUEUE_CURSOR *)dbc->internal; + + metapg = ((QUEUE *)dbp->q_internal)->q_meta; + + /* Read latch the meta page. */ + if ((ret = __memp_fget(mpf, &metapg, + dbc->thread_info, dbc->txn, 0, &meta)) != 0) + return (ret); + + if (QAM_NOT_VALID(meta, cp->recno)) { + ret = DB_NOTFOUND; + goto err; + } + first = meta->first_recno; + + /* Don't hold the meta page long term. */ + if ((ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0) + goto err; + meta = NULL; + + /* Get the record. */ + if ((ret = __db_lget(dbc, LCK_COUPLE, + cp->recno, DB_LOCK_WRITE, DB_LOCK_RECORD, &cp->lock)) != 0) + goto err; + cp->lock_mode = DB_LOCK_WRITE; + + /* Find the record; delete only deletes exact matches. */ + if ((ret = __qam_position(dbc, &cp->recno, + DB_MPOOL_DIRTY, &exact)) != 0) + goto err; + + if (!exact) { + ret = DB_NOTFOUND; + goto err; + } + + pagep = cp->page; + qp = QAM_GET_RECORD(dbp, pagep, cp->indx); + + if (DBC_LOGGING(dbc)) { + if (((QUEUE *)dbp->q_internal)->page_ext == 0 || + ((QUEUE *)dbp->q_internal)->re_len == 0) { + if ((ret = __qam_del_log(dbp, + dbc->txn, &LSN(pagep), 0, &LSN(pagep), + pagep->pgno, cp->indx, cp->recno)) != 0) + goto err; + } else { + data.size = ((QUEUE *)dbp->q_internal)->re_len; + data.data = qp->data; + if ((ret = __qam_delext_log(dbp, + dbc->txn, &LSN(pagep), 0, &LSN(pagep), + pagep->pgno, cp->indx, cp->recno, &data)) != 0) + goto err; + } + } else + LSN_NOT_LOGGED(LSN(pagep)); + + F_CLR(qp, QAM_VALID); + if ((ret = __qam_fput(dbc, + cp->pgno, cp->page, dbc->priority)) != 0) + goto err; + cp->page = NULL; + + /* + * Other threads cannot move first_recno past + * our position while we have the record locked. + * If it's pointing at the deleted record then get + * the metapage and check again as lower numbered + * record may have been inserted. + */ + if (LF_ISSET(DB_CONSUME) || cp->recno == first) { + if ((ret = __memp_fget(mpf, &metapg, + dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &meta)) != 0) + goto err; + if (LF_ISSET(DB_CONSUME) || cp->recno == meta->first_recno) + ret = __qam_consume(dbc, meta, RECNO_OOB); + } + +err: if (meta != NULL && (t_ret = __memp_fput(mpf, dbc->thread_info, + meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + if (cp->page != NULL && + (t_ret = __qam_fput(dbc, + cp->pgno, cp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + cp->page = NULL; + + return (ret); +} + +#ifdef DEBUG_WOP +#define QDEBUG +#endif + +/* + * __qamc_get -- + * Queue DBC->get function. + */ +static int +__qamc_get(dbc, key, data, flags, pgnop) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; + db_pgno_t *pgnop; +{ + DB *dbp; + DBC *dbcdup; + DBT tmp; + DB_LOCK lock, metalock; + DB_MPOOLFILE *mpf; + ENV *env; + PAGE *pg; + QAMDATA *qp; + QMETA *meta; + QUEUE *t; + QUEUE_CURSOR *cp; + db_lockmode_t lock_mode; + db_pgno_t metapno; + db_recno_t first; + int exact, inorder, is_first, ret, t_ret, wait, with_delete; + int retrying; + u_int32_t skip, meta_mode; + + dbp = dbc->dbp; + env = dbp->env; + mpf = dbp->mpf; + cp = (QUEUE_CURSOR *)dbc->internal; + LOCK_INIT(lock); + + lock_mode = F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE : DB_LOCK_READ; + meta_mode = 0; + meta = NULL; + *pgnop = 0; + pg = NULL; + retrying = t_ret = wait = with_delete = 0; + + if (flags == DB_CONSUME_WAIT) { + wait = 1; + flags = DB_CONSUME; + } + if (flags == DB_CONSUME) { + with_delete = 1; + flags = DB_FIRST; + meta_mode = DB_MPOOL_DIRTY; + lock_mode = DB_LOCK_WRITE; + } + inorder = F_ISSET(dbp, DB_AM_INORDER) && with_delete; + + DEBUG_LREAD(dbc, dbc->txn, "qamc_get", + flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags); + + /* Make lint and friends happy. */ + is_first = 0; + first = 0; + + t = (QUEUE *)dbp->q_internal; + metapno = t->q_meta; + + /* + * Get the meta page first + */ + if ((ret = __memp_fget(mpf, &metapno, + dbc->thread_info, dbc->txn, meta_mode, &meta)) != 0) + return (ret); + + /* Release any previous lock if not in a transaction. */ + if ((ret = __TLPUT(dbc, cp->lock)) != 0) + goto err; + + skip = 0; +retry: /* Update the record number. */ + switch (flags) { + case DB_CURRENT: + break; + case DB_NEXT_DUP: + case DB_PREV_DUP: + ret = DB_NOTFOUND; + goto err; + /* NOTREACHED */ + case DB_NEXT: + case DB_NEXT_NODUP: + if (cp->recno != RECNO_OOB) { + if (with_delete && !inorder && + QAM_BEFORE_FIRST(meta, cp->recno)) + cp->recno = meta->first_recno; + else + QAM_INC_RECNO(cp->recno); + /* + * Check to see if we are out of data. + */ + if (QAM_AFTER_CURRENT(meta, cp->recno)) { + pg = NULL; + if (!wait) { + ret = DB_NOTFOUND; + goto err; + } + /* + * If we skipped a locked record, go back and + * find it. If we find a locked record again + * wait for it. + */ + if (skip == 1 && + !QAM_AFTER_CURRENT(meta, first)) { + retrying = 1; + cp->recno = first; + goto dolock; + } + flags = DB_FIRST; + + if (CDB_LOCKING(env)) { + /* Drop the metapage before we wait. */ + ret = __memp_fput(mpf, dbc->thread_info, + meta, dbc->priority); + meta = NULL; + if (ret != 0) + goto err; + if ((ret = __lock_get( + env, dbc->locker, + DB_LOCK_SWITCH, &dbc->lock_dbt, + DB_LOCK_WAIT, &dbc->mylock)) != 0) + goto err; + + if ((ret = __lock_get( + env, dbc->locker, + DB_LOCK_UPGRADE, &dbc->lock_dbt, + DB_LOCK_WRITE, &dbc->mylock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &metapno, + dbc->thread_info, + dbc->txn, meta_mode, &meta)) != 0) + goto err; + goto retry; + } + + /* + * Put us in the wait queue, when someone + * adds something they will unlock it. + */ + if ((ret = __db_lget(dbc, + 0, PGNO_INVALID, DB_LOCK_WAIT, + DB_LOCK_NOWAIT, &metalock)) != 0) + goto err; + + /* Drop the metapage before we wait. */ + ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority); + meta = NULL; + if (ret != 0) + goto err; + + /* Upgrade the lock to wait on it. */ + if ((ret = __db_lget(dbc, 0, + PGNO_INVALID, DB_LOCK_WAIT, + DB_LOCK_UPGRADE, &metalock)) != 0) { + if (ret == DB_LOCK_DEADLOCK) + ret = DB_LOCK_NOTGRANTED; + goto err; + } + + if ((ret = __memp_fget(mpf, + &metapno, dbc->thread_info, dbc->txn, + meta_mode, &meta)) != 0) + goto err; + goto retry; + } + break; + } + /* FALLTHROUGH */ + case DB_FIRST: + flags = DB_NEXT; + is_first = 1; + + /* get the first record number */ + cp->recno = first = meta->first_recno; + + break; + case DB_PREV: + case DB_PREV_NODUP: + if (cp->recno != RECNO_OOB) { + if (cp->recno == meta->first_recno || + QAM_BEFORE_FIRST(meta, cp->recno)) { + ret = DB_NOTFOUND; + goto err; + } + QAM_DEC_RECNO(cp->recno); + break; + } + /* FALLTHROUGH */ + case DB_LAST: + if (meta->first_recno == meta->cur_recno) { + ret = DB_NOTFOUND; + goto err; + } + cp->recno = meta->cur_recno; + QAM_DEC_RECNO(cp->recno); + break; + case DB_SET: + case DB_SET_RANGE: + case DB_GET_BOTH: + case DB_GET_BOTH_RANGE: + if ((ret = __qam_getno(dbp, key, &cp->recno)) != 0) + goto err; + break; + default: + ret = __db_unknown_flag(env, "__qamc_get", flags); + goto err; + } + +dolock: if (!with_delete || inorder || retrying) { + if ((ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0) + goto err; + meta = NULL; + } + + /* Lock the record. */ + if (((ret = __db_lget(dbc, LCK_COUPLE, cp->recno, lock_mode, + (with_delete && !inorder && !retrying) ? + DB_LOCK_NOWAIT | DB_LOCK_RECORD : DB_LOCK_RECORD, + &lock)) == DB_LOCK_DEADLOCK || ret == DB_LOCK_NOTGRANTED) && + with_delete) { +#ifdef QDEBUG + if (DBC_LOGGING(dbc)) + (void)__log_printf(env, + dbc->txn, "Queue S: %x %d %d", + dbc->locker ? dbc->locker->id : 0, + cp->recno, first); +#endif + skip = 1; + goto retry; + } + + if (ret != 0) + goto err; + + /* + * In the DB_FIRST or DB_LAST cases we must wait and then start over + * since the first/last may have moved while we slept. If we are + * reading in order and the first record was not there, we can skip it + * as it must have been aborted was was skipped by a non-queue insert + * or we could not have gotten its lock. If we have the wrong + * record we release our locks and try again. + */ + switch (flags) { + default: + if (inorder) { + if (first != cp->recno) + break; + } else if (with_delete || !is_first) + break; + /* FALLTHROUGH */ + case DB_SET: + case DB_SET_RANGE: + case DB_GET_BOTH: + case DB_GET_BOTH_RANGE: + case DB_LAST: + if ((ret = __memp_fget(mpf, &metapno, + dbc->thread_info, dbc->txn, meta_mode, &meta)) != 0) + goto lerr; + if ((is_first && cp->recno != meta->first_recno) || + (flags == DB_LAST && cp->recno != meta->cur_recno - 1)) { + if ((ret = __LPUT(dbc, lock)) != 0) + goto err; + if (is_first) + flags = DB_FIRST; + goto retry; + } else if (!is_first && flags != DB_LAST) { + if (QAM_BEFORE_FIRST(meta, cp->recno)) { + if (flags == DB_SET_RANGE || + flags == DB_GET_BOTH_RANGE) { + cp->lock = lock; + LOCK_INIT(lock); + goto release_retry; + } + ret = DB_NOTFOUND; + goto lerr; + } + if (QAM_AFTER_CURRENT(meta, cp->recno)) { + ret = DB_NOTFOUND; + goto lerr; + } + } + if ((ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0) + goto err; + meta = NULL; + } + + /* Position the cursor on the record. */ + if ((ret = __qam_position(dbc, &cp->recno, 0, &exact)) != 0) { + /* We cannot get the page, release the record lock. */ + (void)__LPUT(dbc, lock); + goto err; + } + + pg = cp->page; + cp->lock = lock; + cp->lock_mode = lock_mode; + LOCK_INIT(lock); + + if (!exact) { +release_retry: /* Release locks and retry, if possible. */ +#ifdef QDEBUG + if (with_delete && DBC_LOGGING(dbc)) { + (void)__log_printf(dbp->env, dbc->txn, + "Queue E: %x %d %d", + dbc->locker ? dbc->locker->id : 0, + cp->recno, first); + } +#endif + if (pg != NULL) + (void)__qam_fput(dbc, cp->pgno, pg, dbc->priority); + cp->page = pg = NULL; + if (with_delete) { + if ((ret = __LPUT(dbc, cp->lock)) != 0) + goto err1; + } else if ((ret = __TLPUT(dbc, cp->lock)) != 0) + goto err1; + + if (meta == NULL && (ret = __memp_fget(mpf, &metapno, + dbc->thread_info, dbc->txn, meta_mode, &meta)) != 0) + goto err1; + /* + * If we don't need locks and we are out of range + * then we can just skip to the FIRST/LAST record + * otherwise we must iterate to lock the records + * and get serializability. + */ + switch (flags) { + case DB_NEXT: + case DB_NEXT_NODUP: + if (!with_delete) + is_first = 0; + else if (first == cp->recno) + /* we have verified that this record is gone. */ + first++; + if (QAM_BEFORE_FIRST(meta, cp->recno) && + DONT_NEED_LOCKS(dbc)) + flags = DB_FIRST; + break; + case DB_LAST: + case DB_PREV: + case DB_PREV_NODUP: + if (QAM_AFTER_CURRENT(meta, cp->recno) && + DONT_NEED_LOCKS(dbc)) + flags = DB_LAST; + else + flags = DB_PREV; + break; + + case DB_GET_BOTH_RANGE: + case DB_SET_RANGE: + if (QAM_BEFORE_FIRST(meta, cp->recno) && + DONT_NEED_LOCKS(dbc)) + flags = DB_FIRST; + else + flags = DB_NEXT; + break; + + default: + /* this is for the SET and GET_BOTH cases */ + ret = DB_KEYEMPTY; + goto err1; + } + retrying = 0; + goto retry; + } + + if (with_delete && cp->recno == first) { + if (meta == NULL && + (ret = __memp_fget(mpf, &metapno, dbc->thread_info, + dbc->txn, DB_MPOOL_DIRTY | DB_MPOOL_TRY, &meta)) != 0) { + if (ret == DB_LOCK_NOTGRANTED) { + first = RECNO_OOB; + ret = 0; + } else + goto err; + } + if (meta != NULL && cp->recno != meta->cur_recno) { + if (DBC_LOGGING(dbc)) { +#ifdef QDEBUG + (void)__log_printf(dbp->env, dbc->txn, + "Queue I: %x %d %d %d", + dbc->locker ? dbc->locker->id : 0, + cp->recno, first, meta->cur_recno); +#endif + if ((ret = __qam_incfirst_log(dbp, + dbc->txn, &meta->dbmeta.lsn, 0, + cp->recno, PGNO_BASE_MD)) != 0) + goto err; + } else + LSN_NOT_LOGGED(meta->dbmeta.lsn); + + meta->first_recno = cp->recno; + QAM_INC_RECNO(meta->first_recno); + } + } + if (meta != NULL) { + if ((ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0) + goto err; + meta = NULL; + } + + qp = QAM_GET_RECORD(dbp, pg, cp->indx); + + /* Return the data item. */ + if (flags == DB_GET_BOTH || flags == DB_GET_BOTH_RANGE) { + /* + * Need to compare + */ + tmp.data = qp->data; + tmp.size = t->re_len; + if ((ret = __bam_defcmp(dbp, data, &tmp)) != 0) { + if (flags == DB_GET_BOTH_RANGE) + goto release_retry; + ret = DB_NOTFOUND; + goto err1; + } + } + + /* Return the key if the user didn't give us one. */ + if (key != NULL && !F_ISSET(key, DB_DBT_ISSET)) { + if ((ret = __db_retcopy(dbp->env, + key, &cp->recno, sizeof(cp->recno), + &dbc->rkey->data, &dbc->rkey->ulen)) != 0) + goto err1; + F_SET(key, DB_DBT_ISSET); + } + + if (data != NULL && + !F_ISSET(dbc, DBC_MULTIPLE|DBC_MULTIPLE_KEY) && + !F_ISSET(data, DB_DBT_ISSET)) { + if ((ret = __db_retcopy(dbp->env, data, qp->data, t->re_len, + &dbc->rdata->data, &dbc->rdata->ulen)) != 0) + goto err1; + F_SET(data, DB_DBT_ISSET); + } + + /* Finally, if we are doing DB_CONSUME mark the record. */ + if (with_delete) { + /* + * Assert that we're not a secondary index. Doing a DB_CONSUME + * on a secondary makes very little sense, since one can't + * DB_APPEND there; attempting one should be forbidden by + * the interface. + */ + DB_ASSERT(env, !F_ISSET(dbp, DB_AM_SECONDARY)); + + /* + * If we have any secondary indices, call __dbc_del_primary to + * delete the references to the item we're about to delete. + * + * Note that we work on a duplicated cursor, since the + * __db_ret work has already been done, so it's not safe + * to perform any additional ops on this cursor. + */ + if (DB_IS_PRIMARY(dbp)) { + if ((ret = __dbc_idup(dbc, + &dbcdup, DB_POSITION)) != 0) + goto err1; + + if ((ret = __qam_fput(dbc, + cp->pgno, cp->page, dbc->priority)) != 0) + goto err1; + cp->page = NULL; + if (meta != NULL && + (ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0) + goto err1; + meta = NULL; + if ((ret = __dbc_del_primary(dbcdup)) != 0) { + /* + * The __dbc_del_primary return is more + * interesting. + */ + (void)__dbc_close(dbcdup); + goto err1; + } + + if ((ret = __dbc_close(dbcdup)) != 0) + goto err1; + if ((ret = __qam_fget(dbc, + &cp->pgno, DB_MPOOL_DIRTY, &cp->page)) != 0) + goto err; + } else if ((ret = __qam_dirty(dbc, + cp->pgno, &cp->page, dbc->priority)) != 0) + goto err1; + + pg = cp->page; + + if (DBC_LOGGING(dbc)) { + if (t->page_ext == 0 || t->re_len == 0) { + if ((ret = __qam_del_log(dbp, dbc->txn, + &LSN(pg), 0, &LSN(pg), + pg->pgno, cp->indx, cp->recno)) != 0) + goto err1; + } else { + tmp.data = qp->data; + tmp.size = t->re_len; + if ((ret = __qam_delext_log(dbp, + dbc->txn, &LSN(pg), 0, &LSN(pg), + pg->pgno, cp->indx, cp->recno, &tmp)) != 0) + goto err1; + } + } else + LSN_NOT_LOGGED(LSN(pg)); + + F_CLR(qp, QAM_VALID); + if ((ret = __qam_fput(dbc, + cp->pgno, cp->page, dbc->priority)) != 0) + goto err; + cp->page = NULL; + + /* + * Clean up the first pointer, need to check two things: + * Are we leaving an page or an extent? + * Is the first pointer is beyond the first one we looked at? + * If we deleted the first record we checked then we moved + * the first pointer properly. + */ + + if (first == cp->recno && (skip = (first % t->rec_page)) != 0) + goto done; + if (meta == NULL && + (ret = __memp_fget(mpf, &metapno, + dbc->thread_info, dbc->txn, 0, &meta)) != 0) + goto err; + if (skip && !QAM_BEFORE_FIRST(meta, first)) + goto done; + +#ifdef QDEBUG + if (DBC_LOGGING(dbc)) + (void)__log_printf(env, + dbc->txn, "Queue D: %x %d %d %d", + dbc->locker ? dbc->locker->id : 0, + cp->recno, first, meta->first_recno); +#endif + ret = __qam_consume(dbc, meta, first); + } + +err1: if (cp->page != NULL) { + if ((t_ret = __qam_fput(dbc, + cp->pgno, cp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + cp->page = NULL; + } + if (0) { +lerr: (void)__LPUT(dbc, lock); + } + +done: +err: if (meta) { + /* Release the meta page. */ + if ((t_ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + } + + return ((ret == DB_LOCK_NOTGRANTED && !F_ISSET(env->dbenv, + DB_ENV_TIME_NOTGRANTED)) ? DB_LOCK_DEADLOCK : ret); +} + +/* + * __qam_consume -- try to reset the head of the queue. + * + */ +static int +__qam_consume(dbc, meta, first) + DBC *dbc; + QMETA *meta; + db_recno_t first; +{ + DB *dbp; + DB_LOCK lock, save_lock; + DB_MPOOLFILE *mpf; + QUEUE_CURSOR *cp; + db_indx_t save_indx; + db_pgno_t save_page; + db_recno_t current, save_first, save_recno; + u_int32_t rec_extent; + int exact, ret, t_ret, wrapped; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (QUEUE_CURSOR *)dbc->internal; + ret = 0; + + save_page = cp->pgno; + save_indx = cp->indx; + save_recno = cp->recno; + save_lock = cp->lock; + save_first = first; + + /* + * We call this routine for two reasons: + * 1) to toss pages and extents as we leave them. + * 2) update meta->first_recno. + * We do not need to update first_recno if we deleted + * the first record we tried since we updated it then. + * If we are not going to update meta->first_recno we + * do not need an exclusive latch. + */ + if (first != cp->recno && (ret = __memp_dirty(mpf, + &meta, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) + goto err; + /* + * If we skipped some deleted records, we need to + * reposition on the first one. Get a lock + * in case someone is trying to put it back. + */ + if (first == RECNO_OOB || !QAM_BEFORE_FIRST(meta, first)) + first = meta->first_recno; + + if (first != cp->recno) { + ret = __db_lget(dbc, 0, first, DB_LOCK_READ, + DB_LOCK_NOWAIT | DB_LOCK_RECORD, &lock); + if (ret == DB_LOCK_NOTGRANTED || ret == DB_LOCK_DEADLOCK) { + ret = 0; + goto done; + } + if (ret != 0) + goto err; + if (cp->page != NULL && (ret = + __qam_fput(dbc, cp->pgno, cp->page, dbc->priority)) != 0) + goto err; + cp->page = NULL; + if ((ret = __qam_position(dbc, &first, 0, &exact)) != 0) { + (void)__LPUT(dbc, lock); + goto err; + } + if ((ret = __LPUT(dbc, lock)) != 0) + goto err; + if (exact != 0) + goto done; + } + + current = meta->cur_recno; + wrapped = 0; + if (first > current) + wrapped = 1; + rec_extent = meta->page_ext * meta->rec_page; + + /* Loop until we find a record or hit current */ + for (;;) { + /* + * Check to see if we are moving off the extent + * and remove the extent. + * If we are moving off a page we need to + * get rid of the buffer. + */ + if (rec_extent != 0 && + ((exact = (first % rec_extent == 0)) || + (first % meta->rec_page == 0) || + first == UINT32_MAX)) { +#ifdef QDEBUG + if (DBC_LOGGING(dbc)) + (void)__log_printf(dbp->env, dbc->txn, + "Queue R: %x %d %d %d", + dbc->locker ? dbc->locker->id : 0, + cp->pgno, first, meta->first_recno); +#endif + if (cp->page != NULL && (ret = __qam_fput(dbc, + cp->pgno, cp->page, DB_PRIORITY_VERY_LOW)) != 0) + break; + cp->page = NULL; + + if (exact == 1 && + (ret = __qam_fremove(dbp, cp->pgno)) != 0) + break; + } else if (cp->page != NULL && (ret = __qam_fput(dbc, + cp->pgno, cp->page, dbc->priority)) != 0) + break; + cp->page = NULL; + first++; + if (first == RECNO_OOB) { + wrapped = 0; + first++; + } + + /* + * LOOP EXIT when we come move to the current + * pointer. + */ + if (!wrapped && first >= current) + break; + + ret = __db_lget(dbc, 0, first, DB_LOCK_READ, + DB_LOCK_NOWAIT | DB_LOCK_RECORD, &lock); + if (ret == DB_LOCK_NOTGRANTED || ret == DB_LOCK_DEADLOCK) { + ret = 0; + break; + } + if (ret != 0) + break; + + if ((ret = __qam_position(dbc, &first, 0, &exact)) != 0) { + (void)__LPUT(dbc, lock); + break; + } + if ((ret =__LPUT(dbc, lock)) != 0 || exact) { + if ((t_ret = __qam_fput(dbc, cp->pgno, + cp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + cp->page = NULL; + break; + } + } + + cp->pgno = save_page; + cp->indx = save_indx; + cp->recno = save_recno; + cp->lock = save_lock; + +done: + /* + * We have advanced as far as we can. + * Advance first_recno to this point. + */ + if (ret == 0 && meta->first_recno != first && save_first != cp->recno) { + if (DBC_LOGGING(dbc)) { +#ifdef QDEBUG + (void)__log_printf(dbp->env, dbc->txn, + "Queue M: %x %d %d %d", + dbc->locker ? dbc->locker->id : 0, + cp->recno, first, meta->first_recno); +#endif + if ((ret = __qam_incfirst_log(dbp, + dbc->txn, &meta->dbmeta.lsn, 0, + first, PGNO_BASE_MD)) != 0) + goto err; + } else + LSN_NOT_LOGGED(meta->dbmeta.lsn); + meta->first_recno = first; + } + +err: + return (ret); +} + +static int +__qam_bulk(dbc, data, flags) + DBC *dbc; + DBT *data; + u_int32_t flags; +{ + DB *dbp; + DB_LOCK rlock; + DB_MPOOLFILE *mpf; + PAGE *pg; + QAMDATA *qp; + QMETA *meta; + QUEUE_CURSOR *cp; + db_indx_t indx; + db_lockmode_t lkmode; + db_pgno_t metapno; + u_int32_t *endp, *offp; + u_int32_t pagesize, re_len, recs; + u_int8_t *dbuf, *dp, *np; + int exact, ret, t_ret, valid; + int is_key, need_pg, size, space; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (QUEUE_CURSOR *)dbc->internal; + + lkmode = F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE : DB_LOCK_READ; + + pagesize = dbp->pgsize; + re_len = ((QUEUE *)dbp->q_internal)->re_len; + recs = ((QUEUE *)dbp->q_internal)->rec_page; + metapno = ((QUEUE *)dbp->q_internal)->q_meta; + + is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0; + size = 0; + + dbuf = data->data; + np = dp = dbuf; + + /* Keep track of space that is left. There is an termination entry */ + space = (int)data->ulen; + space -= (int)sizeof(*offp); + + /* Build the offset/size table from the end up. */ + endp = (u_int32_t *)((u_int8_t *)dbuf + data->ulen); + endp--; + offp = endp; + /* Save the lock on the current position of the cursor. */ + rlock = cp->lock; + LOCK_INIT(cp->lock); + + if ((ret = __memp_fget(mpf, &metapno, + dbc->thread_info, dbc->txn, 0, &meta)) != 0) + return (ret); + +next_pg: + /* Wrap around, skipping zero. */ + if (cp->recno == RECNO_OOB) + cp->recno++; + if ((ret = __qam_position(dbc, &cp->recno, 0, &exact)) != 0) + goto done; + + pg = cp->page; + indx = cp->indx; + need_pg = 1; + + do { + /* + * If this page is a nonexistent page at the end of an + * extent, pg may be NULL. A NULL page has no valid records, + * so just keep looping as though qp exists and isn't QAM_VALID; + * calling QAM_GET_RECORD is unsafe. + */ + valid = 0; + + if (pg != NULL) { + if ((ret = __db_lget(dbc, LCK_COUPLE, cp->recno, lkmode, + DB_LOCK_NOWAIT | DB_LOCK_RECORD, &rlock)) != 0) { + if (ret != DB_LOCK_NOTGRANTED && + ret != DB_LOCK_DEADLOCK) + goto done; + /* If we put anything in the buffer return. */ + if (offp != endp) + break; + if ((ret = __memp_fput(mpf, dbc->thread_info, + meta, dbc->priority)) != 0) + goto done; + meta = NULL; + if ((ret = __db_lget(dbc, LCK_COUPLE, cp->recno, + lkmode, DB_LOCK_RECORD, &rlock)) != 0) + goto done; + if ((ret = __memp_fget(mpf, + &metapno, dbc->thread_info, + dbc->txn, 0, &meta)) != 0) + goto done; + } + qp = QAM_GET_RECORD(dbp, pg, indx); + if (F_ISSET(qp, QAM_VALID)) { + valid = 1; + space -= (int) + ((is_key ? 3 : 2) * sizeof(*offp)); + if (space < 0) + goto get_space; + if (need_pg) { + dp = np; + size = (int)pagesize - QPAGE_SZ(dbp); + if (space < size) { +get_space: + if (offp == endp) { + data->size = (u_int32_t) + DB_ALIGN((u_int32_t) + size + pagesize, + sizeof(u_int32_t)); + ret = DB_BUFFER_SMALL; + break; + } + if (indx != 0) + indx--; + cp->recno--; + space = 0; + break; + } + memcpy(dp, + (u_int8_t *)pg + QPAGE_SZ(dbp), + (u_int)size); + need_pg = 0; + space -= size; + np += size; + } + if (is_key) + *offp-- = cp->recno; + *offp-- = (u_int32_t)((((u_int8_t *)qp - + (u_int8_t *)pg) - QPAGE_SZ(dbp)) + + (dp - dbuf) + SSZA(QAMDATA, data)); + *offp-- = re_len; + } + } + if (!valid && is_key == 0) { + *offp-- = 0; + *offp-- = 0; + } + cp->recno++; + } while (++indx < recs && cp->recno != RECNO_OOB && + !QAM_AFTER_CURRENT(meta, cp->recno)); + + if (cp->page != NULL) { + if ((t_ret = __qam_fput(dbc, + cp->pgno, cp->page, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + cp->page = NULL; + } + + if (ret == 0 && space > 0 && + (indx >= recs || cp->recno == RECNO_OOB) && + !QAM_AFTER_CURRENT(meta, cp->recno)) + goto next_pg; + + /* + * Correct recno in two cases: + * 1) If we just wrapped fetch must start at record 1 not a FIRST. + * 2) We ran out of space exactly at the end of a page. + */ + if (cp->recno == RECNO_OOB || (space == 0 && indx == recs)) + cp->recno--; + + if (is_key == 1) + *offp = RECNO_OOB; + else + *offp = (u_int32_t)-1; + +done: /* Release the meta page. */ + if ((t_ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + cp->lock = rlock; + + return (ret); +} + +/* + * __qamc_close -- + * Close down the cursor from a single use. + */ +static int +__qamc_close(dbc, root_pgno, rmroot) + DBC *dbc; + db_pgno_t root_pgno; + int *rmroot; +{ + QUEUE_CURSOR *cp; + int ret; + + COMPQUIET(root_pgno, 0); + COMPQUIET(rmroot, NULL); + + cp = (QUEUE_CURSOR *)dbc->internal; + + /* Discard any locks not acquired inside of a transaction. */ + ret = __TLPUT(dbc, cp->lock); + + LOCK_INIT(cp->lock); + cp->page = NULL; + cp->pgno = PGNO_INVALID; + cp->indx = 0; + cp->lock_mode = DB_LOCK_NG; + cp->recno = RECNO_OOB; + cp->flags = 0; + + return (ret); +} + +/* + * __qamc_dup -- + * Duplicate a queue cursor, such that the new one holds appropriate + * locks for the position of the original. + * + * PUBLIC: int __qamc_dup __P((DBC *, DBC *)); + */ +int +__qamc_dup(orig_dbc, new_dbc) + DBC *orig_dbc, *new_dbc; +{ + QUEUE_CURSOR *orig, *new; + + orig = (QUEUE_CURSOR *)orig_dbc->internal; + new = (QUEUE_CURSOR *)new_dbc->internal; + + new->recno = orig->recno; + + return (0); +} + +/* + * __qamc_init + * + * PUBLIC: int __qamc_init __P((DBC *)); + */ +int +__qamc_init(dbc) + DBC *dbc; +{ + DB *dbp; + QUEUE_CURSOR *cp; + int ret; + + dbp = dbc->dbp; + + /* Allocate the internal structure. */ + cp = (QUEUE_CURSOR *)dbc->internal; + if (cp == NULL) { + if ((ret = + __os_calloc(dbp->env, 1, sizeof(QUEUE_CURSOR), &cp)) != 0) + return (ret); + dbc->internal = (DBC_INTERNAL *)cp; + } + + /* Initialize methods. */ + dbc->close = dbc->c_close = __dbc_close_pp; + dbc->cmp = __dbc_cmp_pp; + dbc->count = dbc->c_count = __dbc_count_pp; + dbc->del = dbc->c_del = __dbc_del_pp; + dbc->dup = dbc->c_dup = __dbc_dup_pp; + dbc->get = dbc->c_get = __dbc_get_pp; + dbc->pget = dbc->c_pget = __dbc_pget_pp; + dbc->put = dbc->c_put = __dbc_put_pp; + dbc->am_bulk = __qam_bulk; + dbc->am_close = __qamc_close; + dbc->am_del = __qamc_del; + dbc->am_destroy = __qamc_destroy; + dbc->am_get = __qamc_get; + dbc->am_put = __qamc_put; + dbc->am_writelock = NULL; + + return (0); +} + +/* + * __qamc_destroy -- + * Close a single cursor -- internal version. + */ +static int +__qamc_destroy(dbc) + DBC *dbc; +{ + /* Discard the structures. */ + __os_free(dbc->env, dbc->internal); + + return (0); +} + +/* + * __qam_getno -- + * Check the user's record number. + */ +static int +__qam_getno(dbp, key, rep) + DB *dbp; + const DBT *key; + db_recno_t *rep; +{ + /* If passed an empty DBT from Java, key->data may be NULL */ + if (key->size != sizeof(db_recno_t)) { + __db_errx(dbp->env, DB_STR("1143", + "illegal record number size")); + return (EINVAL); + } + + if ((*rep = *(db_recno_t *)key->data) == 0) { + __db_errx(dbp->env, DB_STR("1144", + "illegal record number of 0")); + return (EINVAL); + } + return (0); +} + +/* + * __qam_truncate -- + * Truncate a queue database + * + * PUBLIC: int __qam_truncate __P((DBC *, u_int32_t *)); + */ +int +__qam_truncate(dbc, countp) + DBC *dbc; + u_int32_t *countp; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + QMETA *meta; + db_pgno_t metapno; + u_int32_t count; + int ret, t_ret; + + dbp = dbc->dbp; + + /* Walk the queue, counting rows. */ + for (count = 0; + (ret = __qamc_get(dbc, NULL, NULL, DB_CONSUME, &metapno)) == 0;) + count++; + if (ret != DB_NOTFOUND) + return (ret); + + mpf = dbp->mpf; + /* Update the meta page. */ + metapno = ((QUEUE *)dbp->q_internal)->q_meta; + if ((ret = __memp_fget(mpf, &metapno, dbc->thread_info, dbc->txn, + DB_MPOOL_DIRTY, &meta)) != 0) + return (ret); + + /* Remove the last extent file. */ + if (meta->cur_recno > 1 && ((QUEUE *)dbp->q_internal)->page_ext != 0) { + if ((ret = __qam_fremove(dbp, + QAM_RECNO_PAGE(dbp, meta->cur_recno - 1))) != 0) + goto err; + } + + if (DBC_LOGGING(dbc)) { + ret = __qam_mvptr_log(dbp, dbc->txn, &meta->dbmeta.lsn, 0, + QAM_SETCUR | QAM_SETFIRST | QAM_TRUNCATE, meta->first_recno, + 1, meta->cur_recno, 1, &meta->dbmeta.lsn, PGNO_BASE_MD); + } else + LSN_NOT_LOGGED(meta->dbmeta.lsn); + if (ret == 0) + meta->first_recno = meta->cur_recno = 1; + +err: if ((t_ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + if (countp != NULL) + *countp = count; + + return (ret); +} + +/* + * __qam_delete -- + * Queue fast delete function. + * + * PUBLIC: int __qam_delete __P((DBC *, DBT *, u_int32_t)); + */ +int +__qam_delete(dbc, key, flags) + DBC *dbc; + DBT *key; + u_int32_t flags; +{ + QUEUE_CURSOR *cp; + int ret; + + cp = (QUEUE_CURSOR *)dbc->internal; + if ((ret = __qam_getno(dbc->dbp, key, &cp->recno)) != 0) + goto err; + + ret = __qamc_del(dbc, flags); + +err: return (ret); +} diff --git a/src/qam/qam.src b/src/qam/qam.src new file mode 100644 index 00000000..03fb912c --- /dev/null +++ b/src/qam/qam.src @@ -0,0 +1,89 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +DBPRIVATE +PREFIX __qam + +INCLUDE #include "db_int.h" +INCLUDE #include "dbinc/crypto.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_dispatch.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/qam.h" +INCLUDE #include "dbinc/txn.h" +INCLUDE + +/* + * incfirst + * Used when we increment first_recno. + */ +BEGIN incfirst 42 84 +DB fileid int32_t ld +ARG recno db_recno_t lu +ARG meta_pgno db_pgno_t lu +END + +/* + * mvptr + * Used when we change one or both of cur_recno and first_recno. + */ +BEGIN mvptr 42 85 +ARG opcode u_int32_t lu +DB fileid int32_t ld +ARG old_first db_recno_t lu +ARG new_first db_recno_t lu +ARG old_cur db_recno_t lu +ARG new_cur db_recno_t lu +POINTER metalsn DB_LSN * lu +ARG meta_pgno db_pgno_t lu +END + + +/* + * del + * Used when we delete a record. + * recno is the record that is being deleted. + */ +BEGIN del 42 79 +DB fileid int32_t ld +POINTER lsn DB_LSN * lu +ARG pgno db_pgno_t lu +ARG indx u_int32_t lu +ARG recno db_recno_t lu +END + +/* + * add + * Used when we put a record on a page. + * recno is the record being added. + * data is the record itself. + */ +BEGIN add 42 80 +DB fileid int32_t ld +POINTER lsn DB_LSN * lu +ARG pgno db_pgno_t lu +ARG indx u_int32_t lu +ARG recno db_recno_t lu +DBT data DBT s +ARG vflag u_int32_t lu +DBT olddata DBT s +END + +/* + * delext + * Used when we delete a record in extent based queue. + * recno is the record that is being deleted. + */ +BEGIN delext 42 83 +DB fileid int32_t ld +POINTER lsn DB_LSN * lu +ARG pgno db_pgno_t lu +ARG indx u_int32_t lu +ARG recno db_recno_t lu +DBT data DBT s +END diff --git a/src/qam/qam_auto.c b/src/qam/qam_auto.c new file mode 100644 index 00000000..604ad3f4 --- /dev/null +++ b/src/qam/qam_auto.c @@ -0,0 +1,83 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +DB_LOG_RECSPEC __qam_incfirst_desc[] = { + {LOGREC_DB, SSZ(__qam_incfirst_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__qam_incfirst_args, recno), "recno", "%lu"}, + {LOGREC_ARG, SSZ(__qam_incfirst_args, meta_pgno), "meta_pgno", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __qam_mvptr_desc[] = { + {LOGREC_ARG, SSZ(__qam_mvptr_args, opcode), "opcode", "%lu"}, + {LOGREC_DB, SSZ(__qam_mvptr_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__qam_mvptr_args, old_first), "old_first", "%lu"}, + {LOGREC_ARG, SSZ(__qam_mvptr_args, new_first), "new_first", "%lu"}, + {LOGREC_ARG, SSZ(__qam_mvptr_args, old_cur), "old_cur", "%lu"}, + {LOGREC_ARG, SSZ(__qam_mvptr_args, new_cur), "new_cur", "%lu"}, + {LOGREC_POINTER, SSZ(__qam_mvptr_args, metalsn), "metalsn", ""}, + {LOGREC_ARG, SSZ(__qam_mvptr_args, meta_pgno), "meta_pgno", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __qam_del_desc[] = { + {LOGREC_DB, SSZ(__qam_del_args, fileid), "fileid", ""}, + {LOGREC_POINTER, SSZ(__qam_del_args, lsn), "lsn", ""}, + {LOGREC_ARG, SSZ(__qam_del_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__qam_del_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__qam_del_args, recno), "recno", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __qam_add_desc[] = { + {LOGREC_DB, SSZ(__qam_add_args, fileid), "fileid", ""}, + {LOGREC_POINTER, SSZ(__qam_add_args, lsn), "lsn", ""}, + {LOGREC_ARG, SSZ(__qam_add_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__qam_add_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__qam_add_args, recno), "recno", "%lu"}, + {LOGREC_DBT, SSZ(__qam_add_args, data), "data", ""}, + {LOGREC_ARG, SSZ(__qam_add_args, vflag), "vflag", "%lu"}, + {LOGREC_DBT, SSZ(__qam_add_args, olddata), "olddata", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __qam_delext_desc[] = { + {LOGREC_DB, SSZ(__qam_delext_args, fileid), "fileid", ""}, + {LOGREC_POINTER, SSZ(__qam_delext_args, lsn), "lsn", ""}, + {LOGREC_ARG, SSZ(__qam_delext_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__qam_delext_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__qam_delext_args, recno), "recno", "%lu"}, + {LOGREC_DBT, SSZ(__qam_delext_args, data), "data", ""}, + {LOGREC_Done, 0, "", ""} +}; +/* + * PUBLIC: int __qam_init_recover __P((ENV *, DB_DISTAB *)); + */ +int +__qam_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_incfirst_recover, DB___qam_incfirst)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_mvptr_recover, DB___qam_mvptr)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_del_recover, DB___qam_del)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_add_recover, DB___qam_add)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_delext_recover, DB___qam_delext)) != 0) + return (ret); + return (0); +} diff --git a/src/qam/qam_autop.c b/src/qam/qam_autop.c new file mode 100644 index 00000000..123a0a37 --- /dev/null +++ b/src/qam/qam_autop.c @@ -0,0 +1,126 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" + +#ifdef HAVE_QUEUE +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +/* + * PUBLIC: int __qam_incfirst_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_incfirst_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__qam_incfirst", __qam_incfirst_desc, info)); +} + +/* + * PUBLIC: int __qam_mvptr_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_mvptr_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__qam_mvptr", __qam_mvptr_desc, info)); +} + +/* + * PUBLIC: int __qam_del_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_del_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__qam_del", __qam_del_desc, info)); +} + +/* + * PUBLIC: int __qam_add_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_add_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__qam_add", __qam_add_desc, info)); +} + +/* + * PUBLIC: int __qam_delext_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_delext_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__qam_delext", __qam_delext_desc, info)); +} + +/* + * PUBLIC: int __qam_init_print __P((ENV *, DB_DISTAB *)); + */ +int +__qam_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_incfirst_print, DB___qam_incfirst)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_mvptr_print, DB___qam_mvptr)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_del_print, DB___qam_del)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_add_print, DB___qam_add)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __qam_delext_print, DB___qam_delext)) != 0) + return (ret); + return (0); +} +#endif /* HAVE_QUEUE */ diff --git a/src/qam/qam_conv.c b/src/qam/qam_conv.c new file mode 100644 index 00000000..96209362 --- /dev/null +++ b/src/qam/qam_conv.c @@ -0,0 +1,79 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/db_am.h" +#include "dbinc/qam.h" + +/* + * __qam_mswap -- + * Swap the bytes on the queue metadata page. + * + * PUBLIC: int __qam_mswap __P((ENV *, PAGE *)); + */ +int +__qam_mswap(env, pg) + ENV *env; + PAGE *pg; +{ + u_int8_t *p; + + COMPQUIET(env, NULL); + + __db_metaswap(pg); + p = (u_int8_t *)pg + sizeof(DBMETA); + + SWAP32(p); /* first_recno */ + SWAP32(p); /* cur_recno */ + SWAP32(p); /* re_len */ + SWAP32(p); /* re_pad */ + SWAP32(p); /* rec_page */ + SWAP32(p); /* page_ext */ + p += 91 * sizeof(u_int32_t); /* unused */ + SWAP32(p); /* crypto_magic */ + + return (0); +} + +/* + * __qam_pgin_out -- + * Convert host-specific page layout to/from the host-independent format + * stored on disk. + * We only need to fix up a few fields in the header + * + * PUBLIC: int __qam_pgin_out __P((ENV *, db_pgno_t, void *, DBT *)); + */ +int +__qam_pgin_out(env, pg, pp, cookie) + ENV *env; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + DB_PGINFO *pginfo; + QPAGE *h; + + COMPQUIET(pg, 0); + pginfo = (DB_PGINFO *)cookie->data; + if (!F_ISSET(pginfo, DB_AM_SWAP)) + return (0); + + h = pp; + if (h->type == P_QAMMETA) + return (__qam_mswap(env, pp)); + + M_32_SWAP(h->lsn.file); + M_32_SWAP(h->lsn.offset); + M_32_SWAP(h->pgno); + + return (0); +} diff --git a/src/qam/qam_files.c b/src/qam/qam_files.c new file mode 100644 index 00000000..b94beef2 --- /dev/null +++ b/src/qam/qam_files.c @@ -0,0 +1,893 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/fop.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" + +#define QAM_EXNAME(Q, I, B, L) \ + snprintf((B), (L), \ + QUEUE_EXTENT, (Q)->dir, PATH_SEPARATOR[0], (Q)->name, (I)) + +/* + * __qam_fprobe -- calculate and open extent + * + * Calculate which extent the page is in, open and create if necessary. + * + * PUBLIC: int __qam_fprobe __P((DBC *, db_pgno_t, + * PUBLIC: void *, qam_probe_mode, DB_CACHE_PRIORITY, u_int32_t)); + */ +int +__qam_fprobe(dbc, pgno, addrp, mode, priority, flags) + DBC *dbc; + db_pgno_t pgno; + void *addrp; + qam_probe_mode mode; + DB_CACHE_PRIORITY priority; + u_int32_t flags; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + ENV *env; + MPFARRAY *array; + QUEUE *qp; + u_int8_t fid[DB_FILE_ID_LEN]; + u_int32_t i, extid, maxext, numext, lflags, offset, oldext, openflags; + char buf[DB_MAXPATHLEN]; + int ftype, less, ret, t_ret; + + dbp = dbc->dbp; + env = dbp->env; + qp = (QUEUE *)dbp->q_internal; + ret = 0; + + if (qp->page_ext == 0) { + mpf = dbp->mpf; + switch (mode) { + case QAM_PROBE_GET: + return (__memp_fget(mpf, &pgno, + dbc->thread_info, dbc->txn, flags, addrp)); + case QAM_PROBE_PUT: + return (__memp_fput(mpf, + dbc->thread_info, addrp, priority)); + case QAM_PROBE_DIRTY: + return (__memp_dirty(mpf, addrp, + dbc->thread_info, dbc->txn, priority, flags)); + case QAM_PROBE_MPF: + *(DB_MPOOLFILE **)addrp = mpf; + return (0); + } + } + + mpf = NULL; + + /* + * Need to lock long enough to find the mpf or create the file. + * The file cannot go away because we must have a record locked + * in that file. + */ + MUTEX_LOCK(env, dbp->mutex); + extid = QAM_PAGE_EXTENT(dbp, pgno); + + /* Array1 will always be in use if array2 is in use. */ + array = &qp->array1; + if (array->n_extent == 0) { + /* Start with 4 extents */ + array->n_extent = 4; + array->low_extent = extid; + numext = offset = oldext = 0; + less = 0; + goto alloc; + } + +retry: + if (extid < array->low_extent) { + less = 1; + offset = array->low_extent - extid; + } else { + less = 0; + offset = extid - array->low_extent; + } + if (qp->array2.n_extent != 0 && + (extid >= qp->array2.low_extent ? + offset > extid - qp->array2.low_extent : + offset > qp->array2.low_extent - extid)) { + array = &qp->array2; + if (extid < array->low_extent) { + less = 1; + offset = array->low_extent - extid; + } else { + less = 0; + offset = extid - array->low_extent; + } + } + + /* + * Check to see if the requested extent is outside the range of + * extents in the array. This is true by default if there are + * no extents here yet. + */ + if (less == 1 || offset >= array->n_extent) { + oldext = array->n_extent; + numext = (array->hi_extent - array->low_extent) + 1; + if (less == 1 && offset + numext <= array->n_extent) { + /* + * If we can fit this one into the existing array by + * shifting the existing entries then we do not have + * to allocate. + */ + memmove(&array->mpfarray[offset], + array->mpfarray, numext + * sizeof(array->mpfarray[0])); + memset(array->mpfarray, 0, offset + * sizeof(array->mpfarray[0])); + offset = 0; + } else if (less == 0 && offset == array->n_extent && + (mode == QAM_PROBE_GET || mode == QAM_PROBE_PUT) && + array->mpfarray[0].pinref == 0) { + /* + * If this is at the end of the array and the file at + * the beginning has a zero pin count we can close + * the bottom extent and put this one at the end. + */ + mpf = array->mpfarray[0].mpf; + if (mpf != NULL && (ret = __memp_fclose(mpf, 0)) != 0) + goto err; + memmove(&array->mpfarray[0], &array->mpfarray[1], + (array->n_extent - 1) * sizeof(array->mpfarray[0])); + array->low_extent++; + array->hi_extent++; + offset--; + array->mpfarray[offset].mpf = NULL; + array->mpfarray[offset].pinref = 0; + } else { + /* + * See if we have wrapped around the queue. + * If it has then allocate the second array. + * Otherwise just expand the one we are using. + */ + maxext = (u_int32_t) UINT32_MAX + / (qp->page_ext * qp->rec_page); + if (offset >= maxext/2) { + array = &qp->array2; + DB_ASSERT(env, array->n_extent == 0); + oldext = 0; + array->n_extent = 4; + array->low_extent = extid; + offset = 0; + numext = 0; + } else if (array->mpfarray[0].pinref == 0) { + /* + * Check to see if there are extents marked + * for deletion at the beginning of the cache. + * If so close them so they will go away. + */ + for (i = 0; i < array->n_extent; i++) { + if (array->mpfarray[i].pinref != 0) + break; + mpf = array->mpfarray[i].mpf; + if (mpf == NULL) + continue; + (void)__memp_get_flags(mpf, &lflags); + if (!FLD_ISSET(lflags, DB_MPOOL_UNLINK)) + break; + + array->mpfarray[i].mpf = NULL; + if ((ret = __memp_fclose(mpf, 0)) != 0) + goto err; + } + if (i == 0) + goto increase; + memmove(&array->mpfarray[0], + &array->mpfarray[i], + (array->n_extent - i) * + sizeof(array->mpfarray[0])); + memset(&array->mpfarray[array->n_extent - i], + '\0', i * sizeof(array->mpfarray[0])); + array->low_extent += i; + array->hi_extent += i; + goto retry; + } else { + /* + * Increase the size to at least include + * the new one and double it. + */ +increase: array->n_extent += offset; + array->n_extent <<= 2; + } +alloc: if ((ret = __os_realloc(env, + array->n_extent * sizeof(struct __qmpf), + &array->mpfarray)) != 0) + goto err; + + if (less == 1) { + /* + * Move the array up and put the new one + * in the first slot. + */ + memmove(&array->mpfarray[offset], + array->mpfarray, + numext * sizeof(array->mpfarray[0])); + memset(array->mpfarray, 0, + offset * sizeof(array->mpfarray[0])); + memset(&array->mpfarray[numext + offset], 0, + (array->n_extent - (numext + offset)) + * sizeof(array->mpfarray[0])); + offset = 0; + } + else + /* Clear the new part of the array. */ + memset(&array->mpfarray[oldext], 0, + (array->n_extent - oldext) * + sizeof(array->mpfarray[0])); + } + } + + /* Update the low and hi range of saved extents. */ + if (extid < array->low_extent) + array->low_extent = extid; + if (extid > array->hi_extent) + array->hi_extent = extid; + + /* If the extent file is not yet open, open it. */ + if (array->mpfarray[offset].mpf == NULL) { + QAM_EXNAME(qp, extid, buf, sizeof(buf)); + if ((ret = __memp_fcreate( + env, &array->mpfarray[offset].mpf)) != 0) + goto err; + mpf = array->mpfarray[offset].mpf; + (void)__memp_set_lsn_offset(mpf, 0); + (void)__memp_set_pgcookie(mpf, &qp->pgcookie); + (void)__memp_get_ftype(dbp->mpf, &ftype); + (void)__memp_set_ftype(mpf, ftype); + (void)__memp_set_clear_len(mpf, dbp->pgsize); + + /* Set up the fileid for this extent. */ + __qam_exid(dbp, fid, extid); + (void)__memp_set_fileid(mpf, fid); + openflags = DB_EXTENT; + if (LF_ISSET(DB_MPOOL_CREATE)) + openflags |= DB_CREATE; + if (F_ISSET(dbp, DB_AM_RDONLY)) + openflags |= DB_RDONLY; + if (F_ISSET(env->dbenv, DB_ENV_DIRECT_DB)) + openflags |= DB_DIRECT; + if ((ret = __memp_fopen(mpf, NULL, + buf, NULL, openflags, qp->mode, dbp->pgsize)) != 0) { + array->mpfarray[offset].mpf = NULL; + (void)__memp_fclose(mpf, 0); + goto err; + } + } + + /* + * We have found the right file. Update its ref count + * before dropping the dbp mutex so it does not go away. + */ + mpf = array->mpfarray[offset].mpf; + if (mode == QAM_PROBE_GET) + array->mpfarray[offset].pinref++; + + /* + * If we may create the page, then we are writing, + * the file may nolonger be empty after this operation + * so we clear the UNLINK flag. + */ + if (LF_ISSET(DB_MPOOL_CREATE)) + (void)__memp_set_flags(mpf, DB_MPOOL_UNLINK, 0); + +err: + MUTEX_UNLOCK(env, dbp->mutex); + + if (ret == 0) { + pgno--; + pgno %= qp->page_ext; + switch (mode) { + case QAM_PROBE_GET: + ret = __memp_fget(mpf, &pgno, + dbc->thread_info, dbc->txn, flags, addrp); + if (ret == 0) + return (0); + break; + case QAM_PROBE_PUT: + ret = __memp_fput(mpf, + dbc->thread_info, addrp, dbp->priority); + break; + case QAM_PROBE_DIRTY: + return (__memp_dirty(mpf, addrp, + dbc->thread_info, dbc->txn, dbp->priority, flags)); + case QAM_PROBE_MPF: + *(DB_MPOOLFILE **)addrp = mpf; + return (0); + } + + MUTEX_LOCK(env, dbp->mutex); + /* Recalculate because we dropped the lock. */ + offset = extid - array->low_extent; + DB_ASSERT(env, array->mpfarray[offset].pinref > 0); + if (--array->mpfarray[offset].pinref == 0 && + (mode == QAM_PROBE_GET || ret == 0)) { + /* Check to see if this file will be unlinked. */ + (void)__memp_get_flags(mpf, &flags); + if (LF_ISSET(DB_MPOOL_UNLINK)) { + array->mpfarray[offset].mpf = NULL; + if ((t_ret = + __memp_fclose(mpf, 0)) != 0 && ret == 0) + ret = t_ret; + } + } + MUTEX_UNLOCK(env, dbp->mutex); + } + return (ret); +} + +/* + * __qam_fclose -- close an extent. + * + * Calculate which extent the page is in and close it. + * We assume the mpf entry is present. + * + * PUBLIC: int __qam_fclose __P((DB *, db_pgno_t)); + */ +int +__qam_fclose(dbp, pgnoaddr) + DB *dbp; + db_pgno_t pgnoaddr; +{ + DB_MPOOLFILE *mpf; + ENV *env; + MPFARRAY *array; + QUEUE *qp; + u_int32_t extid, offset; + int ret; + + ret = 0; + env = dbp->env; + qp = (QUEUE *)dbp->q_internal; + + MUTEX_LOCK(env, dbp->mutex); + + extid = QAM_PAGE_EXTENT(dbp, pgnoaddr); + array = &qp->array1; + if (array->low_extent > extid || array->hi_extent < extid) + array = &qp->array2; + offset = extid - array->low_extent; + + DB_ASSERT(env, + extid >= array->low_extent && offset < array->n_extent); + + /* If other threads are still using this file, leave it. */ + if (array->mpfarray[offset].pinref != 0) + goto done; + + mpf = array->mpfarray[offset].mpf; + array->mpfarray[offset].mpf = NULL; + ret = __memp_fclose(mpf, 0); + +done: + MUTEX_UNLOCK(env, dbp->mutex); + return (ret); +} + +/* + * __qam_fremove -- remove an extent. + * + * Calculate which extent the page is in and remove it. There is no way + * to remove an extent without probing it first and seeing that is is empty + * so we assume the mpf entry is present. + * + * PUBLIC: int __qam_fremove __P((DB *, db_pgno_t)); + */ +int +__qam_fremove(dbp, pgnoaddr) + DB *dbp; + db_pgno_t pgnoaddr; +{ + DB_MPOOLFILE *mpf; + ENV *env; + MPFARRAY *array; + QUEUE *qp; + u_int32_t extid, offset; + int ret; + + qp = (QUEUE *)dbp->q_internal; + env = dbp->env; + ret = 0; + + MUTEX_LOCK(env, dbp->mutex); + + extid = QAM_PAGE_EXTENT(dbp, pgnoaddr); + array = &qp->array1; + if (array->low_extent > extid || array->hi_extent < extid) + array = &qp->array2; + offset = extid - array->low_extent; + + DB_ASSERT(env, + extid >= array->low_extent && offset < array->n_extent); + + mpf = array->mpfarray[offset].mpf; + /* This extent my already be marked for delete and closed. */ + if (mpf == NULL) + goto err; + + /* + * The log must be flushed before the file is deleted. We depend on + * the log record of the last delete to recreate the file if we crash. + */ + if (LOGGING_ON(env) && (ret = __log_flush(env, NULL)) != 0) + goto err; + + (void)__memp_set_flags(mpf, DB_MPOOL_UNLINK, 1); + /* Someone could be real slow, let them close it down. */ + if (array->mpfarray[offset].pinref != 0) + goto err; + array->mpfarray[offset].mpf = NULL; + if ((ret = __memp_fclose(mpf, 0)) != 0) + goto err; + + /* + * If the file is at the bottom of the array + * shift things down and adjust the end points. + */ + if (offset == 0) { + memmove(array->mpfarray, &array->mpfarray[1], + (array->hi_extent - array->low_extent) + * sizeof(array->mpfarray[0])); + array->mpfarray[ + array->hi_extent - array->low_extent].mpf = NULL; + if (array->low_extent != array->hi_extent) + array->low_extent++; + } else { + if (extid == array->hi_extent) + array->hi_extent--; + } + +err: MUTEX_UNLOCK(env, dbp->mutex); + + return (ret); +} + +/* + * __qam_sync -- + * Flush the database cache. + * + * PUBLIC: int __qam_sync __P((DB *)); + */ +int +__qam_sync(dbp) + DB *dbp; +{ + int ret; + /* + * We can't easily identify the extent files associated with a specific + * Queue file, so flush all Queue extent files. + */ + if ((ret = __memp_fsync(dbp->mpf)) != 0) + return (ret); + if (((QUEUE *)dbp->q_internal)->page_ext != 0) + return (__memp_sync_int( + dbp->env, NULL, 0, DB_SYNC_QUEUE_EXTENT, NULL, NULL)); + return (0); +} + +/* + * __qam_gen_filelist -- generate a list of extent files. + * Another thread may close the handle so this should only + * be used single threaded or with care. + * + * PUBLIC: int __qam_gen_filelist __P((DB *, + * PUBLIC: DB_THREAD_INFO *, QUEUE_FILELIST **)); + */ +int +__qam_gen_filelist(dbp, ip, filelistp) + DB *dbp; + DB_THREAD_INFO *ip; + QUEUE_FILELIST **filelistp; +{ + DBC *dbc; + DB_MPOOLFILE *mpf; + ENV *env; + QMETA *meta; + QUEUE *qp; + size_t extent_cnt; + db_recno_t i, current, first, stop, rec_extent; + QUEUE_FILELIST *fp; + int ret; + + env = dbp->env; + mpf = dbp->mpf; + qp = (QUEUE *)dbp->q_internal; + *filelistp = NULL; + + if (qp->page_ext == 0) + return (0); + + /* This may happen during metapage recovery. */ + if (qp->name == NULL) + return (0); + + /* Find out the first and last record numbers in the database. */ + i = PGNO_BASE_MD; + if ((ret = __memp_fget(mpf, &i, ip, NULL, 0, &meta)) != 0) + return (ret); + + current = meta->cur_recno; + first = meta->first_recno; + + if ((ret = __memp_fput(mpf, ip, meta, dbp->priority)) != 0) + return (ret); + + /* + * Allocate the extent array. Calculate the worst case number of + * pages and convert that to a count of extents. The count of + * extents has 3 or 4 extra slots: + * roundoff at first (e.g., current record in extent); + * roundoff at current (e.g., first record in extent); + * NULL termination; and + * UINT32_MAX wraparound (the last extent can be small). + */ + rec_extent = qp->rec_page * qp->page_ext; + if (current >= first) + extent_cnt = (current - first) / rec_extent + 3; + else + extent_cnt = + (current + (UINT32_MAX - first)) / rec_extent + 4; + + if (extent_cnt == 0) + return (0); + if ((ret = __os_calloc(env, + extent_cnt, sizeof(QUEUE_FILELIST), filelistp)) != 0) + return (ret); + fp = *filelistp; + if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0) + return (ret); + +again: + if (current >= first) + stop = current; + else + stop = UINT32_MAX; + + /* + * Make sure that first is at the same offset in the extent as stop. + * This guarantees that the stop will be reached in the loop below, + * even if it is the only record in its extent. This calculation is + * safe because first won't move out of its extent. + */ + first -= first % rec_extent; + first += stop % rec_extent; + + for (i = first; i >= first && i <= stop; i += rec_extent) { + if ((ret = __qam_fprobe(dbc, QAM_RECNO_PAGE(dbp, i), + &fp->mpf, QAM_PROBE_MPF, dbp->priority, 0)) != 0) { + if (ret == ENOENT) + continue; + goto err; + } + fp->id = QAM_RECNO_EXTENT(dbp, i); + fp++; + DB_ASSERT(env, (size_t)(fp - *filelistp) < extent_cnt); + } + + if (current < first) { + first = 1; + goto again; + } + +err: (void)__dbc_close(dbc); + return (ret); +} + +/* + * __qam_extent_names -- generate a list of extent files names. + * + * PUBLIC: int __qam_extent_names __P((ENV *, char *, char ***)); + */ +int +__qam_extent_names(env, name, namelistp) + ENV *env; + char *name; + char ***namelistp; +{ + DB *dbp; + DB_THREAD_INFO *ip; + QUEUE *qp; + QUEUE_FILELIST *filelist, *fp; + size_t len; + int cnt, ret, t_ret; + char buf[DB_MAXPATHLEN], **cp, *freep; + + *namelistp = NULL; + filelist = NULL; + ENV_GET_THREAD_INFO(env, ip); + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + return (ret); + if ((ret = __db_open(dbp, ip, + NULL, name, NULL, DB_QUEUE, DB_RDONLY, 0, PGNO_BASE_MD)) != 0) + goto done; + qp = dbp->q_internal; + if (qp->page_ext == 0) + goto done; + + if ((ret = __qam_gen_filelist(dbp, ip, &filelist)) != 0) + goto done; + + if (filelist == NULL) + goto done; + + cnt = 0; + for (fp = filelist; fp->mpf != NULL; fp++) + cnt++; + + /* QUEUE_EXTENT contains extra chars, but add 6 anyway for the int. */ + len = (size_t)cnt * (sizeof(**namelistp) + + strlen(QUEUE_EXTENT) + strlen(qp->dir) + strlen(qp->name) + 6); + + if ((ret = __os_malloc(dbp->env, len, namelistp)) != 0) + goto done; + cp = *namelistp; + freep = (char *)(cp + cnt + 1); + for (fp = filelist; fp->mpf != NULL; fp++) { + QAM_EXNAME(qp, fp->id, buf, sizeof(buf)); + len = strlen(buf); + *cp++ = freep; + (void)strcpy(freep, buf); + freep += len + 1; + } + *cp = NULL; + +done: + if (filelist != NULL) + __os_free(dbp->env, filelist); + if ((t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __qam_exid -- + * Generate a fileid for an extent based on the fileid of the main + * file. Since we do not log schema creates/deletes explicitly, the log + * never captures the fileid of an extent file. In order that masters and + * replicas have the same fileids (so they can explicitly delete them), we + * use computed fileids for the extent files of Queue files. + * + * An extent file id retains the low order 12 bytes of the file id and + * overwrites the dev/inode fields, placing a 0 in the inode field, and + * the extent number in the dev field. + * + * PUBLIC: void __qam_exid __P((DB *, u_int8_t *, u_int32_t)); + */ +void +__qam_exid(dbp, fidp, exnum) + DB *dbp; + u_int8_t *fidp; + u_int32_t exnum; +{ + int i; + u_int8_t *p; + + /* Copy the fileid from the master. */ + memcpy(fidp, dbp->fileid, DB_FILE_ID_LEN); + + /* The first four bytes are the inode or the FileIndexLow; 0 it. */ + for (i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = 0; + + /* The next four bytes are the dev/FileIndexHigh; insert the exnum . */ + for (p = (u_int8_t *)&exnum, i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = *p++; +} + +/* + * __qam_nameop -- + * Remove or rename extent files associated with a particular file. + * This is to remove or rename (both in mpool and the file system) any + * extent files associated with the given dbp. + * This is either called from the QUEUE remove or rename methods or + * when undoing a transaction that created the database. + * + * PUBLIC: int __qam_nameop __P((DB *, DB_TXN *, const char *, qam_name_op)); + */ +int +__qam_nameop(dbp, txn, newname, op) + DB *dbp; + DB_TXN *txn; + const char *newname; + qam_name_op op; +{ + ENV *env; + QUEUE *qp; + size_t exlen, fulllen, len; + u_int8_t fid[DB_FILE_ID_LEN]; + u_int32_t exid; + int cnt, i, ret, t_ret; + char buf[DB_MAXPATHLEN], nbuf[DB_MAXPATHLEN], sepsave; + char *endname, *endpath, *exname, *fullname, **names; + char *ndir, *namep, *new, *cp; + + env = dbp->env; + qp = (QUEUE *)dbp->q_internal; + cnt = ret = t_ret = 0; + namep = exname = fullname = NULL; + names = NULL; + + /* If this isn't a queue with extents, we're done. */ + if (qp->page_ext == 0) + return (0); + + /* + * Generate the list of all queue extents for this file (from the + * file system) and then cycle through removing them and evicting + * from mpool. We have two modes of operation here. If we are + * undoing log operations, then do not write log records and try + * to keep going even if we encounter failures in nameop. If we + * are in mainline code, then return as soon as we have a problem. + * Memory allocation errors (__db_appname, __os_malloc) are always + * considered failure. + * + * Set buf to : dir/__dbq.NAME.0 and fullname to HOME/dir/__dbq.NAME.0 + * or, in the case of an absolute path: /dir/__dbq.NAME.0 + */ + QAM_EXNAME(qp, 0, buf, sizeof(buf)); + if ((ret = __db_appname(env, + DB_APP_DATA, buf, &dbp->dirname, &fullname)) != 0) + return (ret); + + /* We should always have a path separator here. */ + if ((endpath = __db_rpath(fullname)) == NULL) { + ret = EINVAL; + goto err; + } + sepsave = *endpath; + *endpath = '\0'; + + /* + * Get the list of all names in the directory and restore the + * path separator. + */ + if ((ret = __os_dirlist(env, fullname, 0, &names, &cnt)) != 0) + goto err; + *endpath = sepsave; + + /* If there aren't any names, don't allocate any space. */ + if (cnt == 0) + goto err; + + /* + * Now, make endpath reference the queue extent names upon which + * we can match. Then we set the end of the path to be the + * beginning of the extent number, and we can compare the bytes + * between endpath and endname (__dbq.NAME.). + */ + endpath++; + endname = strrchr(endpath, '.'); + if (endname == NULL) { + ret = EINVAL; + goto err; + } + ++endname; + *endname = '\0'; + len = strlen(endpath); + fulllen = strlen(fullname); + + /* Allocate space for a full extent name. */ + exlen = fulllen + 20; + if ((ret = __os_malloc(env, exlen, &exname)) != 0) + goto err; + + ndir = new = NULL; + if (newname != NULL) { + if ((ret = __os_strdup(env, newname, &namep)) != 0) + goto err; + ndir = namep; + if ((new = __db_rpath(namep)) != NULL) + *new++ = '\0'; + else { + new = namep; + ndir = PATH_DOT; + } + } + for (i = 0; i < cnt; i++) { + /* Check if this is a queue extent file. */ + if (strncmp(names[i], endpath, len) != 0) + continue; + /* Make sure we have all numbers. foo.db vs. foo.db.0. */ + for (cp = &names[i][len]; *cp != '\0'; cp++) + if (!isdigit((int)*cp)) + break; + if (*cp != '\0') + continue; + + /* + * We have a queue extent file. We need to generate its + * name and its fileid. + */ + exid = (u_int32_t)strtoul(names[i] + len, NULL, 10); + __qam_exid(dbp, fid, exid); + + switch (op) { + case QAM_NAME_DISCARD: + snprintf(exname, exlen, + "%s%s", fullname, names[i] + len); + if ((t_ret = __memp_nameop(dbp->env, + fid, NULL, exname, NULL, + F_ISSET(dbp, DB_AM_INMEM))) != 0 && ret == 0) + ret = t_ret; + break; + + case QAM_NAME_RENAME: + snprintf(nbuf, sizeof(nbuf), QUEUE_EXTENT, + ndir, PATH_SEPARATOR[0], new, exid); + QAM_EXNAME(qp, exid, buf, sizeof(buf)); + if ((ret = __fop_rename(env, + txn, buf, nbuf, &dbp->dirname, fid, DB_APP_DATA, 1, + F_ISSET(dbp, DB_AM_NOT_DURABLE) ? + DB_LOG_NOT_DURABLE : 0)) != 0) + goto err; + break; + + case QAM_NAME_REMOVE: + QAM_EXNAME(qp, exid, buf, sizeof(buf)); + if ((ret = __fop_remove(env, txn, fid, + buf, &dbp->dirname, + DB_APP_DATA, F_ISSET(dbp, DB_AM_NOT_DURABLE) ? + DB_LOG_NOT_DURABLE : 0)) != 0) + goto err; + break; + } + } + +err: if (fullname != NULL) + __os_free(env, fullname); + if (exname != NULL) + __os_free(env, exname); + if (namep != NULL) + __os_free(env, namep); + if (names != NULL) + __os_dirfree(env, names, cnt); + return (ret); +} + +/* + * __qam_lsn_reset -- reset the lsns for extents. + * + * PUBLIC: int __qam_lsn_reset __P((DB *, DB_THREAD_INFO *)); + */ +int +__qam_lsn_reset(dbp, ip) + DB *dbp; + DB_THREAD_INFO *ip; +{ + QUEUE *qp; + QUEUE_FILELIST *filelist, *fp; + int ret; + + qp = dbp->q_internal; + if (qp->page_ext == 0) + return (0); + + if ((ret = __qam_gen_filelist(dbp, ip, &filelist)) != 0) + return (ret); + + if (filelist == NULL) + return (ret); + + for (fp = filelist; fp->mpf != NULL; fp++) + if ((ret = __db_lsn_reset(fp->mpf, ip)) != 0) + break; + + __os_free(dbp->env, filelist); + return (ret); +} diff --git a/src/qam/qam_method.c b/src/qam/qam_method.c new file mode 100644 index 00000000..681cede9 --- /dev/null +++ b/src/qam/qam_method.c @@ -0,0 +1,399 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +static int __qam_rr __P((DB *, DB_THREAD_INFO *, DB_TXN *, + const char *, const char *, const char *, qam_name_op)); +static int __qam_set_extentsize __P((DB *, u_int32_t)); + +/* + * __qam_db_create -- + * Queue specific initialization of the DB structure. + * + * PUBLIC: int __qam_db_create __P((DB *)); + */ +int +__qam_db_create(dbp) + DB *dbp; +{ + QUEUE *t; + int ret; + + /* Allocate and initialize the private queue structure. */ + if ((ret = __os_calloc(dbp->env, 1, sizeof(QUEUE), &t)) != 0) + return (ret); + dbp->q_internal = t; + dbp->get_q_extentsize = __qam_get_extentsize; + dbp->set_q_extentsize = __qam_set_extentsize; + + t->re_pad = ' '; + + return (0); +} + +/* + * __qam_db_close -- + * Queue specific discard of the DB structure. + * + * PUBLIC: int __qam_db_close __P((DB *, u_int32_t)); + */ +int +__qam_db_close(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + DB_MPOOLFILE *mpf; + MPFARRAY *array; + QUEUE *t; + struct __qmpf *mpfp; + u_int32_t i; + int ret, t_ret; + + ret = 0; + if ((t = dbp->q_internal) == NULL) + return (0); + + array = &t->array1; +again: + mpfp = array->mpfarray; + if (mpfp != NULL) { + for (i = array->low_extent; + i <= array->hi_extent; i++, mpfp++) { + mpf = mpfp->mpf; + mpfp->mpf = NULL; + if (mpf != NULL && (t_ret = __memp_fclose(mpf, + LF_ISSET(DB_AM_DISCARD) ? DB_MPOOL_DISCARD : 0)) + != 0 && ret == 0) + ret = t_ret; + } + __os_free(dbp->env, array->mpfarray); + } + if (t->array2.n_extent != 0) { + array = &t->array2; + array->n_extent = 0; + goto again; + } + + if (LF_ISSET(DB_AM_DISCARD) && + (t_ret = __qam_nameop(dbp, NULL, + NULL, QAM_NAME_DISCARD)) != 0 && ret == 0) + ret = t_ret; + + if (t->path != NULL) + __os_free(dbp->env, t->path); + __os_free(dbp->env, t); + dbp->q_internal = NULL; + + return (ret); +} + +/* + * __qam_get_extentsize -- + * The DB->q_get_extentsize method. + * + * PUBLIC: int __qam_get_extentsize __P((DB *, u_int32_t *)); + */ +int +__qam_get_extentsize(dbp, q_extentsizep) + DB *dbp; + u_int32_t *q_extentsizep; +{ + *q_extentsizep = ((QUEUE*)dbp->q_internal)->page_ext; + return (0); +} + +static int +__qam_set_extentsize(dbp, extentsize) + DB *dbp; + u_int32_t extentsize; +{ + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_extentsize"); + + if (extentsize < 1) { + __db_errx(dbp->env, DB_STR("1140", + "Extent size must be at least 1")); + return (EINVAL); + } + + ((QUEUE*)dbp->q_internal)->page_ext = extentsize; + + return (0); +} + +/* + * __queue_pageinfo - + * Given a dbp, get first/last page information about a queue. + * + * PUBLIC: int __queue_pageinfo __P((DB *, db_pgno_t *, db_pgno_t *, + * PUBLIC: int *, int, u_int32_t)); + */ +int +__queue_pageinfo(dbp, firstp, lastp, emptyp, prpage, flags) + DB *dbp; + db_pgno_t *firstp, *lastp; + int *emptyp; + int prpage; + u_int32_t flags; +{ + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + QMETA *meta; + db_pgno_t first, i, last; + int empty, ret, t_ret; + + mpf = dbp->mpf; + ENV_GET_THREAD_INFO(dbp->env, ip); + + /* Find out the page number of the last page in the database. */ + i = PGNO_BASE_MD; + if ((ret = __memp_fget(mpf, &i, ip, NULL, 0, &meta)) != 0) + return (ret); + + first = QAM_RECNO_PAGE(dbp, meta->first_recno); + last = QAM_RECNO_PAGE( + dbp, meta->cur_recno == 1 ? 1 : meta->cur_recno - 1); + + empty = meta->cur_recno == meta->first_recno; + if (firstp != NULL) + *firstp = first; + if (lastp != NULL) + *lastp = last; + if (emptyp != NULL) + *emptyp = empty; +#ifdef HAVE_STATISTICS + if (prpage) + ret = __db_prpage(dbp, (PAGE *)meta, flags); +#else + COMPQUIET(prpage, 0); + COMPQUIET(flags, 0); +#endif + + if ((t_ret = __memp_fput(mpf, + ip, meta, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +#ifdef HAVE_STATISTICS +/* + * __db_prqueue -- + * Print out a queue + * + * PUBLIC: int __db_prqueue __P((DB *, u_int32_t)); + */ +int +__db_prqueue(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + DBC *dbc; + DB_THREAD_INFO *ip; + PAGE *h; + db_pgno_t first, i, last, pg_ext, stop; + int empty, ret, t_ret; + + if ((ret = __queue_pageinfo(dbp, &first, &last, &empty, 1, flags)) != 0) + return (ret); + + if (empty || ret != 0) + return (ret); + + ENV_GET_THREAD_INFO(dbp->env, ip); + if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0) + return (ret); + i = first; + if (first > last) + stop = QAM_RECNO_PAGE(dbp, UINT32_MAX); + else + stop = last; + + /* Dump each page. */ + pg_ext = ((QUEUE *)dbp->q_internal)->page_ext; +begin: + for (; i <= stop; ++i) { + if ((ret = __qam_fget(dbc, &i, 0, &h)) != 0) { + if (pg_ext == 0) { + if (ret == DB_PAGE_NOTFOUND && first == last) + ret = 0; + goto err; + } + if (ret == ENOENT || ret == DB_PAGE_NOTFOUND) { + i += (pg_ext - ((i - 1) % pg_ext)) - 1; + ret = 0; + continue; + } + goto err; + } + (void)__db_prpage(dbp, h, flags); + if ((ret = __qam_fput(dbc, i, h, dbp->priority)) != 0) + goto err; + } + + if (first > last) { + i = 1; + stop = last; + first = last; + goto begin; + } + +err: + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} +#endif + +/* + * __qam_remove -- + * Remove method for a Queue. + * + * PUBLIC: int __qam_remove __P((DB *, DB_THREAD_INFO *, DB_TXN *, + * PUBLIC: const char *, const char *, u_int32_t)); + */ +int +__qam_remove(dbp, ip, txn, name, subdb, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name, *subdb; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + return (__qam_rr(dbp, ip, txn, name, subdb, NULL, QAM_NAME_REMOVE)); +} + +/* + * __qam_rename -- + * Rename method for a Queue. + * + * PUBLIC: int __qam_rename __P((DB *, DB_THREAD_INFO *, DB_TXN *, + * PUBLIC: const char *, const char *, const char *)); + */ +int +__qam_rename(dbp, ip, txn, name, subdb, newname) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name, *subdb, *newname; +{ + return (__qam_rr(dbp, ip, txn, name, subdb, newname, QAM_NAME_RENAME)); +} + +/* + * __qam_rr -- + * Remove/Rename method for a Queue. + */ +static int +__qam_rr(dbp, ip, txn, name, subdb, newname, op) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name, *subdb, *newname; + qam_name_op op; +{ + DB *tmpdbp; + ENV *env; + QUEUE *qp; + int ret, t_ret; + + env = dbp->env; + ret = 0; + + if (subdb != NULL && name != NULL) { + __db_errx(env, DB_STR("1141", + "Queue does not support multiple databases per file")); + return (EINVAL); + } + + /* + * Since regular rename no longer opens the database, we may have + * to do it here. + */ + if (F_ISSET(dbp, DB_AM_OPEN_CALLED)) + tmpdbp = dbp; + else { + if ((ret = __db_create_internal(&tmpdbp, env, 0)) != 0) + return (ret); + + /* + * We need to make sure we don't self-deadlock, so give + * this dbp the same locker as the incoming one. + */ + tmpdbp->locker = dbp->locker; + if ((ret = __db_open(tmpdbp, ip, txn, + name, NULL, DB_QUEUE, DB_RDONLY, 0, PGNO_BASE_MD)) != 0) + goto err; + } + + qp = (QUEUE *)tmpdbp->q_internal; + if (qp->page_ext != 0) + ret = __qam_nameop(tmpdbp, txn, newname, op); + + if (!F_ISSET(dbp, DB_AM_OPEN_CALLED)) { +err: /* + * Since we copied the locker ID from the dbp, we'd better not + * free it here. + */ + tmpdbp->locker = NULL; + + /* We need to remove the lock event we associated with this. */ + if (txn != NULL) + __txn_remlock(env, + txn, &tmpdbp->handle_lock, DB_LOCK_INVALIDID); + + if ((t_ret = __db_close(tmpdbp, + txn, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + } + return (ret); +} + +/* + * __qam_map_flags -- + * Map queue-specific flags from public to the internal values. + * + * PUBLIC: void __qam_map_flags __P((DB *, u_int32_t *, u_int32_t *)); + */ +void +__qam_map_flags(dbp, inflagsp, outflagsp) + DB *dbp; + u_int32_t *inflagsp, *outflagsp; +{ + COMPQUIET(dbp, NULL); + + if (FLD_ISSET(*inflagsp, DB_INORDER)) { + FLD_SET(*outflagsp, DB_AM_INORDER); + FLD_CLR(*inflagsp, DB_INORDER); + } +} + +/* + * __qam_set_flags -- + * Set queue-specific flags. + * + * PUBLIC: int __qam_set_flags __P((DB *, u_int32_t *flagsp)); + */ +int +__qam_set_flags(dbp, flagsp) + DB *dbp; + u_int32_t *flagsp; +{ + + __qam_map_flags(dbp, flagsp, &dbp->flags); + return (0); +} diff --git a/src/qam/qam_open.c b/src/qam/qam_open.c new file mode 100644 index 00000000..e619fa2f --- /dev/null +++ b/src/qam/qam_open.c @@ -0,0 +1,346 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" +#include "dbinc/fop.h" + +static int __qam_init_meta __P((DB *, QMETA *)); + +/* + * __qam_open + * + * PUBLIC: int __qam_open __P((DB *, DB_THREAD_INFO *, + * PUBLIC: DB_TXN *, const char *, db_pgno_t, int, u_int32_t)); + */ +int +__qam_open(dbp, ip, txn, name, base_pgno, mode, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name; + db_pgno_t base_pgno; + int mode; + u_int32_t flags; +{ + DBC *dbc; + DB_MPOOLFILE *mpf; + ENV *env; + QMETA *qmeta; + QUEUE *t; + int ret, t_ret; + + env = dbp->env; + mpf = dbp->mpf; + t = dbp->q_internal; + ret = 0; + qmeta = NULL; + + if (name == NULL && t->page_ext != 0) { + __db_errx(env, DB_STR("1134", + "Extent size may not be specified for in-memory queue database")); + return (EINVAL); + } + + if (MULTIVERSION(dbp)) { + __db_errx(env, DB_STR("1135", + "Multiversion queue databases are not supported")); + return (EINVAL); + } + + /* Initialize the remaining fields/methods of the DB. */ + dbp->db_am_remove = __qam_remove; + dbp->db_am_rename = __qam_rename; + + /* + * Get a cursor. If DB_CREATE is specified, we may be creating + * pages, and to do that safely in CDB we need a write cursor. + * In STD_LOCKING mode, we'll synchronize using the meta page + * lock instead. + */ + if ((ret = __db_cursor(dbp, ip, txn, &dbc, + LF_ISSET(DB_CREATE) && CDB_LOCKING(env) ? + DB_WRITECURSOR : 0)) != 0) + return (ret); + + /* + * Get the meta data page. It must exist, because creates of + * files/databases come in through the __qam_new_file interface + * and queue doesn't support subdatabases. + */ + if ((ret = __memp_fget(mpf, &base_pgno, ip, txn, 0, &qmeta)) != 0) + goto err; + + /* If the magic number is incorrect, that's a fatal error. */ + if (qmeta->dbmeta.magic != DB_QAMMAGIC) { + __db_errx(env, DB_STR_A("1136", + "__qam_open: %s: unexpected file type or format", "%s"), + name); + ret = EINVAL; + goto err; + } + + /* Setup information needed to open extents. */ + t->page_ext = qmeta->page_ext; + + if (t->page_ext != 0 && (ret = __qam_set_ext_data(dbp, name)) != 0) + goto err; + + if (mode == 0) + mode = DB_MODE_660; + t->mode = mode; + t->re_pad = (int)qmeta->re_pad; + t->re_len = qmeta->re_len; + t->rec_page = qmeta->rec_page; + + t->q_meta = base_pgno; + t->q_root = base_pgno + 1; + +err: if (qmeta != NULL && (t_ret = + __memp_fput(mpf, ip, qmeta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __qam_set_ext_data -- + * Setup DBP data for opening queue extents. + * + * PUBLIC: int __qam_set_ext_data __P((DB*, const char *)); + */ +int +__qam_set_ext_data(dbp, name) + DB *dbp; + const char *name; +{ + QUEUE *t; + int ret; + + t = dbp->q_internal; + t->pginfo.db_pagesize = dbp->pgsize; + t->pginfo.flags = + F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); + t->pginfo.type = dbp->type; + t->pgcookie.data = &t->pginfo; + t->pgcookie.size = sizeof(DB_PGINFO); + + if ((ret = __os_strdup(dbp->env, name, &t->path)) != 0) + return (ret); + t->dir = t->path; + if ((t->name = __db_rpath(t->path)) == NULL) { + t->name = t->path; + t->dir = PATH_DOT; + } else + *t->name++ = '\0'; + + return (0); +} + +/* + * __qam_metachk -- + * + * PUBLIC: int __qam_metachk __P((DB *, const char *, QMETA *)); + */ +int +__qam_metachk(dbp, name, qmeta) + DB *dbp; + const char *name; + QMETA *qmeta; +{ + ENV *env; + u_int32_t vers; + int ret; + + env = dbp->env; + ret = 0; + + /* + * At this point, all we know is that the magic number is for a Queue. + * Check the version, the database may be out of date. + */ + vers = qmeta->dbmeta.version; + if (F_ISSET(dbp, DB_AM_SWAP)) + M_32_SWAP(vers); + switch (vers) { + case 1: + case 2: + __db_errx(env, DB_STR_A("1137", + "%s: queue version %lu requires a version upgrade", + "%s %lu"), name, (u_long)vers); + return (DB_OLD_VERSION); + case 3: + case 4: + break; + default: + __db_errx(env, DB_STR_A("1138", + "%s: unsupported qam version: %lu", "%s %lu"), + name, (u_long)vers); + return (EINVAL); + } + + /* Swap the page if we need to. */ + if (F_ISSET(dbp, DB_AM_SWAP) && + (ret = __qam_mswap(env, (PAGE *)qmeta)) != 0) + return (ret); + + /* Check the type. */ + if (dbp->type != DB_QUEUE && dbp->type != DB_UNKNOWN) + return (EINVAL); + dbp->type = DB_QUEUE; + DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE); + + /* Set the page size. */ + dbp->pgsize = qmeta->dbmeta.pagesize; + + /* Copy the file's ID. */ + memcpy(dbp->fileid, qmeta->dbmeta.uid, DB_FILE_ID_LEN); + + /* Set up AM-specific methods that do not require an open. */ + dbp->db_am_rename = __qam_rename; + dbp->db_am_remove = __qam_remove; + + return (ret); +} + +/* + * __qam_init_meta -- + * Initialize the meta-data for a Queue database. + */ +static int +__qam_init_meta(dbp, meta) + DB *dbp; + QMETA *meta; +{ + ENV *env; + QUEUE *t; + + env = dbp->env; + t = dbp->q_internal; + + memset(meta, 0, sizeof(QMETA)); + LSN_NOT_LOGGED(meta->dbmeta.lsn); + meta->dbmeta.pgno = PGNO_BASE_MD; + meta->dbmeta.last_pgno = 0; + meta->dbmeta.magic = DB_QAMMAGIC; + meta->dbmeta.version = DB_QAMVERSION; + meta->dbmeta.pagesize = dbp->pgsize; + if (F_ISSET(dbp, DB_AM_CHKSUM)) + FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM); + if (F_ISSET(dbp, DB_AM_ENCRYPT)) { + meta->dbmeta.encrypt_alg = env->crypto_handle->alg; + DB_ASSERT(env, meta->dbmeta.encrypt_alg != 0); + meta->crypto_magic = meta->dbmeta.magic; + } + meta->dbmeta.type = P_QAMMETA; + meta->re_pad = (u_int32_t)t->re_pad; + meta->re_len = t->re_len; + meta->rec_page = CALC_QAM_RECNO_PER_PAGE(dbp); + meta->cur_recno = 1; + meta->first_recno = 1; + meta->page_ext = t->page_ext; + t->rec_page = meta->rec_page; + memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); + + /* Verify that we can fit at least one record per page. */ + if (QAM_RECNO_PER_PAGE(dbp) < 1) { + __db_errx(env, DB_STR_A("1139", + "Record size of %lu too large for page size of %lu", + "%lu %lu"), (u_long)t->re_len, (u_long)dbp->pgsize); + return (EINVAL); + } + + return (0); +} + +/* + * __qam_new_file -- + * Create the necessary pages to begin a new queue database file. + * + * PUBLIC: int __qam_new_file __P((DB *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *)); + */ +int +__qam_new_file(dbp, ip, txn, fhp, name) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DB_FH *fhp; + const char *name; +{ + DBT pdbt; + DB_MPOOLFILE *mpf; + DB_PGINFO pginfo; + ENV *env; + QMETA *meta; + db_pgno_t pgno; + int ret, t_ret; + + /* + * Build meta-data page. + * + * This code appears more complex than it is because of the two cases + * (named and unnamed). + * + * For each page being created, there are three parts: 1) a "get page" + * chunk (which either uses malloc'd memory or calls __memp_fget), 2) + * the initialization, and 3) the "put page" chunk which either does a + * fop write or an __memp_fput. + */ + if (F_ISSET(dbp, DB_AM_INMEM)) { + mpf = dbp->mpf; + pgno = PGNO_BASE_MD; + if ((ret = __memp_fget(mpf, &pgno, ip, txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &meta)) != 0) + return (ret); + + if ((ret = __qam_init_meta(dbp, meta)) != 0) + goto err1; + + if ((ret = __db_log_page(dbp, + txn, &meta->dbmeta.lsn, pgno, (PAGE *)meta)) != 0) + goto err1; +err1: if ((t_ret = + __memp_fput(mpf, ip, meta, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + } else { + env = dbp->env; + if ((ret = __os_calloc(env, 1, dbp->pgsize, &meta)) != 0) + return (ret); + + if ((ret = __qam_init_meta(dbp, meta)) != 0) + goto err2; + + pginfo.db_pagesize = dbp->pgsize; + pginfo.flags = + F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); + pginfo.type = DB_QUEUE; + DB_SET_DBT(pdbt, &pginfo, sizeof(pginfo)); + if ((ret = + __db_pgout(env->dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0) + goto err2; + ret = __fop_write(env, txn, name, dbp->dirname, + DB_APP_DATA, fhp, dbp->pgsize, 0, 0, meta, dbp->pgsize, 1, + F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0); + +err2: __os_free(env, meta); + } + + return (ret); +} diff --git a/src/qam/qam_rec.c b/src/qam/qam_rec.c new file mode 100644 index 00000000..c6ab9de6 --- /dev/null +++ b/src/qam/qam_rec.c @@ -0,0 +1,687 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +static int __qam_adjust_first __P((DB *, DBC *, QMETA *, db_recno_t)); + +/* + * LSNs in queue data pages are advisory. They do not have to be accurate + * as all operations are idempotent on records. They should not be rolled + * forward during recovery as committed transaction may obscure updates from + * an incomplete transaction that updates the same page. The incomplete + * transaction may be completed during a later hot backup cycle. + */ + +/* Queue version of REC_DIRTY -- needs to probe the correct file. */ +#define QAM_DIRTY(dbc, pgno, pagep) \ + if ((ret = __qam_dirty((dbc), \ + pgno, pagep, (dbc)->priority)) != 0) { \ + ret = __db_pgerr((dbc)->dbp, (pgno), ret); \ + goto out; \ + } + +static int +__qam_adjust_first(file_dbp, dbc, meta, recno) + DB *file_dbp; + DBC *dbc; + QMETA *meta; + db_recno_t recno; +{ + QUEUE_CURSOR *cp; + u_int32_t rec_ext; + int exact, ret; + + ret = 0; + if (meta->page_ext == 0) + rec_ext = 0; + else + rec_ext = meta->page_ext * meta->rec_page; + cp = (QUEUE_CURSOR *)dbc->internal; + if (meta->first_recno == RECNO_OOB) + meta->first_recno++; + while (meta->first_recno != meta->cur_recno && + !QAM_BEFORE_FIRST(meta, recno)) { + if ((ret = __qam_position(dbc, + &meta->first_recno, 0, &exact)) != 0) + return (ret); + if (cp->page != NULL && (ret = __qam_fput(dbc, + cp->pgno, cp->page, dbc->priority)) != 0) + return (ret); + + if (exact == 1) + break; + if (cp->page != NULL && + rec_ext != 0 && meta->first_recno % rec_ext == 0) + if ((ret = + __qam_fremove(file_dbp, cp->pgno)) != 0) + return (ret); + REC_DIRTY(file_dbp->mpf, + dbc->thread_info, dbc->priority, &meta); + QAM_INC_RECNO(meta->first_recno); + } +out: return (ret); +} + +/* + * __qam_incfirst_recover -- + * Recovery function for incfirst. + * + * PUBLIC: int __qam_incfirst_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__qam_incfirst_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __qam_incfirst_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_LSN trunc_lsn; + DB_MPOOLFILE *mpf; + QMETA *meta; + db_pgno_t metapg; + int ret; + + COMPQUIET(meta, NULL); + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__qam_incfirst_print); + REC_INTRO(__qam_incfirst_read, ip, 0); + + metapg = ((QUEUE *)file_dbp->q_internal)->q_meta; + + /* Allocate our own cursor without DB_RECOVER as we need a locker. */ + if ((ret = __db_cursor_int(file_dbp, ip, NULL, + DB_QUEUE, PGNO_INVALID, 0, NULL, &dbc)) != 0) + goto out; + F_SET(dbc, DBC_RECOVER); + + if ((ret = __memp_fget(mpf, &metapg, ip, NULL, + 0, &meta)) != 0) { + if (DB_REDO(op)) { + if ((ret = __memp_fget(mpf, &metapg, ip, NULL, + DB_MPOOL_CREATE, &meta)) != 0) + goto out; + meta->dbmeta.pgno = metapg; + meta->dbmeta.type = P_QAMMETA; + } else { + *lsnp = argp->prev_lsn; + goto out; + } + } + + /* + * Only move first_recno backwards so we pick up the aborted delete. + * When going forward we need to be careful since + * we may have bumped over a locked record. + */ + if (DB_UNDO(op)) { + if (QAM_BEFORE_FIRST(meta, argp->recno)) { + REC_DIRTY(mpf, ip, dbc->priority, &meta); + meta->first_recno = argp->recno; + } + + trunc_lsn = ((DB_TXNHEAD *)info)->trunc_lsn; + /* if we are truncating, update the LSN */ + if (!IS_ZERO_LSN(trunc_lsn) && + LOG_COMPARE(&LSN(meta), &trunc_lsn) > 0) { + REC_DIRTY(mpf, ip, dbc->priority, &meta); + LSN(meta) = trunc_lsn; + } + } else { + if (LOG_COMPARE(&LSN(meta), lsnp) < 0) { + REC_DIRTY(mpf, ip, dbc->priority, &meta); + LSN(meta) = *lsnp; + } + if ((ret = __qam_adjust_first(file_dbp, + dbc, meta, argp->recno + 1)) != 0) + goto err; + } + + ret = __memp_fput(mpf, ip, meta, dbc->priority); + if (ret != 0) + goto out; + +done: *lsnp = argp->prev_lsn; + ret = 0; + + if (0) { +err: (void)__memp_fput(mpf, ip, meta, dbc->priority); + } + +out: REC_CLOSE; +} + +/* + * __qam_mvptr_recover -- + * Recovery function for mvptr. + * + * PUBLIC: int __qam_mvptr_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__qam_mvptr_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __qam_mvptr_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_LSN trunc_lsn; + DB_MPOOLFILE *mpf; + QMETA *meta; + QUEUE_CURSOR *cp; + db_pgno_t metapg; + int cmp_n, cmp_p, exact, ret; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__qam_mvptr_print); + REC_INTRO(__qam_mvptr_read, ip, 0); + + /* Allocate our own cursor without DB_RECOVER as we need a locker. */ + if ((ret = __db_cursor_int(file_dbp, ip, NULL, + DB_QUEUE, PGNO_INVALID, 0, NULL, &dbc)) != 0) + goto out; + F_SET(dbc, DBC_RECOVER); + + metapg = ((QUEUE *)file_dbp->q_internal)->q_meta; + + if ((ret = __memp_fget(mpf, &metapg, ip, NULL, 0, &meta)) != 0) { + if (DB_REDO(op)) { + if ((ret = __memp_fget(mpf, &metapg, ip, NULL, + DB_MPOOL_CREATE, &meta)) != 0) { + goto out; + } + meta->dbmeta.pgno = metapg; + meta->dbmeta.type = P_QAMMETA; + } else { + *lsnp = argp->prev_lsn; + goto out; + } + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); + cmp_p = LOG_COMPARE(&LSN(meta), &argp->metalsn); + + /* + * Under normal circumstances, we never undo a movement of one of + * the pointers. Just move them along regardless of abort/commit. + * When going forward we need to verify that this is really where + * the pointer belongs. A transaction may roll back and reinsert + * a record that was missing at the time of this action. + * + * If we're undoing a truncate, we need to reset the pointers to + * their state before the truncate. + */ + if (DB_UNDO(op)) { + if ((argp->opcode & QAM_TRUNCATE) && cmp_n <= 0) { + REC_DIRTY(mpf, ip, dbc->priority, &meta); + meta->first_recno = argp->old_first; + meta->cur_recno = argp->old_cur; + LSN(meta) = argp->metalsn; + } + /* If the page lsn is beyond the truncate point, move it back */ + trunc_lsn = ((DB_TXNHEAD *)info)->trunc_lsn; + if (!IS_ZERO_LSN(trunc_lsn) && + LOG_COMPARE(&trunc_lsn, &LSN(meta)) < 0) { + REC_DIRTY(mpf, ip, dbc->priority, &meta); + LSN(meta) = argp->metalsn; + } + } else if (op == DB_TXN_APPLY || cmp_p == 0) { + REC_DIRTY(mpf, ip, dbc->priority, &meta); + cp = (QUEUE_CURSOR *)dbc->internal; + if ((argp->opcode & QAM_SETFIRST) && + meta->first_recno == argp->old_first) { + if (argp->old_first > argp->new_first) + meta->first_recno = argp->new_first; + else { + if ((ret = __qam_position(dbc, + &meta->first_recno, 0, &exact)) != 0) + goto err; + if (!exact) + meta->first_recno = argp->new_first; + if (cp->page != NULL && + (ret = __qam_fput(dbc, + cp->pgno, cp->page, dbc->priority)) != 0) + goto err; + } + } + + if ((argp->opcode & QAM_SETCUR) && + meta->cur_recno == argp->old_cur) { + if (argp->old_cur < argp->new_cur) + meta->cur_recno = argp->new_cur; + else { + if ((ret = __qam_position(dbc, + &meta->cur_recno, 0, &exact)) != 0) + goto err; + if (!exact) + meta->cur_recno = argp->new_cur; + if (cp->page != NULL && + (ret = __qam_fput(dbc, + cp->pgno, cp->page, dbc->priority)) != 0) + goto err; + } + } + + meta->dbmeta.lsn = *lsnp; + } + + if ((ret = __memp_fput(mpf, ip, meta, dbc->priority)) != 0) + goto out; + +done: *lsnp = argp->prev_lsn; + ret = 0; + + if (0) { +err: (void)__memp_fput(mpf, ip, meta, dbc->priority); + } + +out: REC_CLOSE; +} + +/* + * __qam_del_recover -- + * Recovery function for del. + * Non-extent version or if there is no data (zero len). + * + * PUBLIC: int __qam_del_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__qam_del_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __qam_del_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + QAMDATA *qp; + QMETA *meta; + QPAGE *pagep; + db_pgno_t metapg; + int cmp_n, ret, t_ret; + + COMPQUIET(pagep, NULL); + meta = NULL; + pagep = NULL; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__qam_del_print); + REC_INTRO(__qam_del_read, ip, 0); + + /* Allocate our own cursor without DB_RECOVER as we need a locker. */ + if ((ret = __db_cursor_int(file_dbp, ip, NULL, + DB_QUEUE, PGNO_INVALID, 0, NULL, &dbc)) != 0) + goto out; + F_SET(dbc, DBC_RECOVER); + + /* Get the meta page before latching the page. */ + metapg = ((QUEUE *)file_dbp->q_internal)->q_meta; + if ((ret = __memp_fget(mpf, &metapg, + ip, NULL, DB_MPOOL_EDIT, &meta)) != 0) + goto err; + + if ((ret = __qam_fget(dbc, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto err; + + if (pagep->pgno == PGNO_INVALID) { + QAM_DIRTY(dbc, argp->pgno, &pagep); + pagep->pgno = argp->pgno; + pagep->type = P_QAMDATA; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + + if (DB_UNDO(op)) { + /* make sure first is behind us */ + if (meta->first_recno == RECNO_OOB || + (QAM_BEFORE_FIRST(meta, argp->recno) && + (meta->first_recno <= meta->cur_recno || + meta->first_recno - + argp->recno < argp->recno - meta->cur_recno))) { + REC_DIRTY(mpf, ip, dbc->priority, &meta); + meta->first_recno = argp->recno; + } + + /* Need to undo delete - mark the record as present */ + QAM_DIRTY(dbc, pagep->pgno, &pagep); + qp = QAM_GET_RECORD(file_dbp, pagep, argp->indx); + F_SET(qp, QAM_VALID); + + /* + * Move the LSN back to this point; do not move it forward. + * If we're in an abort, because we don't hold a page lock, + * we could foul up a concurrent put. Having too late an + * LSN * is harmless in queue except when we're determining + * what we need to roll forward during recovery. [#2588] + */ + if (cmp_n <= 0 && op == DB_TXN_BACKWARD_ROLL) + LSN(pagep) = argp->lsn; + + if (op == DB_TXN_ABORT) + QAM_WAKEUP(dbc, ret); + + } else if (op == DB_TXN_APPLY || (cmp_n > 0 && DB_REDO(op))) { + /* Need to redo delete - clear the valid bit */ + QAM_DIRTY(dbc, pagep->pgno, &pagep); + qp = QAM_GET_RECORD(file_dbp, pagep, argp->indx); + F_CLR(qp, QAM_VALID); + + /* + * We only move the LSN forward during replication. + * During recovery we could obscure an update from + * a partially completed transaction while processing + * a hot backup. [#13823] + */ + if (op == DB_TXN_APPLY) + LSN(pagep) = *lsnp; + if ((ret = __qam_fput(dbc, + argp->pgno, pagep, dbc->priority)) != 0) + goto err; + pagep = NULL; + if ((ret = __qam_adjust_first(file_dbp, + dbc, meta, argp->recno)) != 0) + goto err; + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +err: if (pagep != NULL && (t_ret = + __qam_fput(dbc, argp->pgno, pagep, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (meta != NULL && (t_ret = + __memp_fput(mpf, ip, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; +out: REC_CLOSE; +} + +/* + * __qam_delext_recover -- + * Recovery function for del in an extent based queue. + * + * PUBLIC: int __qam_delext_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__qam_delext_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __qam_delext_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + QAMDATA *qp; + QMETA *meta; + QPAGE *pagep; + db_pgno_t metapg; + int cmp_n, ret, t_ret; + + COMPQUIET(pagep, NULL); + meta = NULL; + pagep = NULL; + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__qam_delext_print); + REC_INTRO(__qam_delext_read, ip, 0); + + /* Allocate our own cursor without DB_RECOVER as we need a locker. */ + if ((ret = __db_cursor_int(file_dbp, ip, NULL, + DB_QUEUE, PGNO_INVALID, 0, NULL, &dbc)) != 0) + goto out; + F_SET(dbc, DBC_RECOVER); + + metapg = ((QUEUE *)file_dbp->q_internal)->q_meta; + if ((ret = __memp_fget(mpf, &metapg, ip, NULL, + DB_MPOOL_EDIT, &meta)) != 0) + goto err; + + if ((ret = __qam_fget(dbc, &argp->pgno, + DB_REDO(op) ? 0 : DB_MPOOL_CREATE, &pagep)) != 0) { + /* + * If we are redoing a delete and the page is not there + * we are done. + */ + if (DB_REDO(op) && (ret == DB_PAGE_NOTFOUND || ret == ENOENT)) + goto done; + goto out; + } + + if (pagep->pgno == PGNO_INVALID) { + QAM_DIRTY(dbc, argp->pgno, &pagep); + pagep->pgno = argp->pgno; + pagep->type = P_QAMDATA; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + + if (DB_UNDO(op)) { + /* make sure first is behind us */ + if (meta->first_recno == RECNO_OOB || + (QAM_BEFORE_FIRST(meta, argp->recno) && + (meta->first_recno <= meta->cur_recno || + meta->first_recno - + argp->recno < argp->recno - meta->cur_recno))) { + meta->first_recno = argp->recno; + } + + QAM_DIRTY(dbc, pagep->pgno, &pagep); + if ((ret = __qam_pitem(dbc, pagep, + argp->indx, argp->recno, &argp->data)) != 0) + goto err; + + /* + * Move the LSN back to this point; do not move it forward. + * If we're in an abort, because we don't hold a page lock, + * we could foul up a concurrent put. Having too late an + * LSN is harmless in queue except when we're determining + * what we need to roll forward during recovery. [#2588] + */ + if (cmp_n <= 0 && op == DB_TXN_BACKWARD_ROLL) + LSN(pagep) = argp->lsn; + + if (op == DB_TXN_ABORT) + QAM_WAKEUP(dbc, ret); + + } else if (op == DB_TXN_APPLY || (cmp_n > 0 && DB_REDO(op))) { + QAM_DIRTY(dbc, pagep->pgno, &pagep); + /* Need to redo delete - clear the valid bit */ + qp = QAM_GET_RECORD(file_dbp, pagep, argp->indx); + F_CLR(qp, QAM_VALID); + /* + * We only move the LSN forward during replication. + * During recovery we could obscure an update from + * a partially completed transaction while processing + * a hot backup. [#13823] + */ + if (op == DB_TXN_APPLY) + LSN(pagep) = *lsnp; + if ((ret = __qam_fput(dbc, + argp->pgno, pagep, dbc->priority)) != 0) + goto err; + pagep = NULL; + if ((ret = __qam_adjust_first(file_dbp, + dbc, meta, argp->recno)) != 0) + goto err; + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +err: if (pagep != NULL && (t_ret = + __qam_fput(dbc, argp->pgno, pagep, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (meta != NULL && (t_ret = + __memp_fput(mpf, ip, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + +out: REC_CLOSE; +} + +/* + * __qam_add_recover -- + * Recovery function for add. + * + * PUBLIC: int __qam_add_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__qam_add_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __qam_add_args *argp; + DB_THREAD_INFO *ip; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + QAMDATA *qp; + QMETA *meta; + QPAGE *pagep; + db_pgno_t metapg; + int cmp_n, ret; + + COMPQUIET(pagep, NULL); + + ip = ((DB_TXNHEAD *)info)->thread_info; + REC_PRINT(__qam_add_print); + REC_INTRO(__qam_add_read, ip, 1); + + if ((ret = __qam_fget(dbc, &argp->pgno, + DB_UNDO(op) ? 0 : DB_MPOOL_CREATE, &pagep)) != 0) { + /* + * If we are undoing an append and the page is not there + * we are done. + */ + if (DB_UNDO(op) && (ret == DB_PAGE_NOTFOUND || ret == ENOENT)) + goto done; + goto out; + } + + if (pagep->pgno == PGNO_INVALID) { + QAM_DIRTY(dbc, argp->pgno, &pagep); + pagep->pgno = argp->pgno; + pagep->type = P_QAMDATA; + } + + cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); + + if (DB_REDO(op)) { + /* Fix meta-data page. */ + metapg = ((QUEUE *)file_dbp->q_internal)->q_meta; + if ((ret = __memp_fget(mpf, &metapg, ip, NULL, + 0, &meta)) != 0) + goto err; + if (QAM_BEFORE_FIRST(meta, argp->recno)) { + REC_DIRTY(mpf, ip, dbc->priority, &meta); + meta->first_recno = argp->recno; + } + if (argp->recno == meta->cur_recno || + QAM_AFTER_CURRENT(meta, argp->recno)) { + REC_DIRTY(mpf, ip, dbc->priority, &meta); + meta->cur_recno = argp->recno + 1; + } + if ((ret = __memp_fput(mpf, ip, meta, dbc->priority)) != 0) + goto err; + + /* Now update the actual page if necessary. */ + if (op == DB_TXN_APPLY || cmp_n > 0) { + QAM_DIRTY(dbc, pagep->pgno, &pagep); + /* Need to redo add - put the record on page */ + if ((ret = __qam_pitem(dbc, + pagep, argp->indx, argp->recno, &argp->data)) != 0) + goto err; + /* + * We only move the LSN forward during replication. + * During recovery we could obscure an update from + * a partially completed transaction while processing + * a hot backup. [#13823] + */ + if (op == DB_TXN_APPLY) { + LSN(pagep) = *lsnp; + QAM_WAKEUP(dbc, ret); + } + } + } else if (DB_UNDO(op)) { + /* + * Need to undo add + * If this was an overwrite, put old record back. + * Otherwise just clear the valid bit + */ + if (argp->olddata.size != 0) { + QAM_DIRTY(dbc, pagep->pgno, &pagep); + if ((ret = __qam_pitem(dbc, pagep, + argp->indx, argp->recno, &argp->olddata)) != 0) + goto err; + + if (!(argp->vflag & QAM_VALID)) { + qp = QAM_GET_RECORD( + file_dbp, pagep, argp->indx); + F_CLR(qp, QAM_VALID); + } + } else { + QAM_DIRTY(dbc, pagep->pgno, &pagep); + qp = QAM_GET_RECORD(file_dbp, pagep, argp->indx); + qp->flags = 0; + } + + /* + * Move the LSN back to this point; do not move it forward. + * If we're in an abort, because we don't hold a page lock, + * we could foul up a concurrent put. Having too late an + * LSN is harmless in queue except when we're determining + * what we need to roll forward during recovery. [#2588] + */ + if (cmp_n <= 0 && op == DB_TXN_BACKWARD_ROLL) + LSN(pagep) = argp->lsn; + } + + if ((ret = __qam_fput(dbc, argp->pgno, pagep, dbc->priority)) != 0) + goto out; + +done: *lsnp = argp->prev_lsn; + ret = 0; + + if (0) { +err: (void)__qam_fput(dbc, argp->pgno, pagep, dbc->priority); + } + +out: REC_CLOSE; +} diff --git a/src/qam/qam_stat.c b/src/qam/qam_stat.c new file mode 100644 index 00000000..7ee43435 --- /dev/null +++ b/src/qam/qam_stat.c @@ -0,0 +1,255 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" + +#ifdef HAVE_STATISTICS +/* + * __qam_stat -- + * Gather/print the qam statistics + * + * PUBLIC: int __qam_stat __P((DBC *, void *, u_int32_t)); + */ +int +__qam_stat(dbc, spp, flags) + DBC *dbc; + void *spp; + u_int32_t flags; +{ + DB *dbp; + DB_LOCK lock; + DB_MPOOLFILE *mpf; + DB_QUEUE_STAT *sp; + PAGE *h; + QAMDATA *qp, *ep; + QMETA *meta; + QUEUE *t; + db_indx_t indx; + db_pgno_t first, last, pgno, pg_ext, stop; + u_int32_t re_len; + int ret, t_ret; + + dbp = dbc->dbp; + + LOCK_INIT(lock); + mpf = dbp->mpf; + sp = NULL; + t = dbp->q_internal; + + if (spp == NULL) + return (0); + + /* Allocate and clear the structure. */ + if ((ret = __os_umalloc(dbp->env, sizeof(*sp), &sp)) != 0) + goto err; + memset(sp, 0, sizeof(*sp)); + + re_len = ((QUEUE *)dbp->q_internal)->re_len; + + /* Determine the last page of the database. */ + if ((ret = __db_lget(dbc, 0, t->q_meta, DB_LOCK_READ, 0, &lock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &t->q_meta, + dbc->thread_info, dbc->txn, 0, &meta)) != 0) + goto err; + + if (flags == DB_FAST_STAT) { + sp->qs_nkeys = meta->dbmeta.key_count; + sp->qs_ndata = meta->dbmeta.record_count; + goto meta_only; + } + + first = QAM_RECNO_PAGE(dbp, meta->first_recno); + last = QAM_RECNO_PAGE(dbp, meta->cur_recno); + + ret = __memp_fput(mpf, dbc->thread_info, meta, dbc->priority); + if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + + pgno = first; + if (first > last) + stop = QAM_RECNO_PAGE(dbp, UINT32_MAX); + else + stop = last; + + /* Dump each page. */ + pg_ext = ((QUEUE *)dbp->q_internal)->page_ext; +begin: + /* Walk through the pages and count. */ + for (; pgno <= stop; ++pgno) { + if ((ret = + __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lock)) != 0) + goto err; + ret = __qam_fget(dbc, &pgno, 0, &h); + if (ret == ENOENT) { + pgno += pg_ext - 1; + continue; + } + if (ret == DB_PAGE_NOTFOUND) { + if (pg_ext == 0) { + if (pgno != stop && first != last) + goto err; + ret = 0; + break; + } + pgno += (pg_ext - ((pgno - 1) % pg_ext)) - 1; + continue; + } + if (ret != 0) + goto err; + + ++sp->qs_pages; + + ep = (QAMDATA *)((u_int8_t *)h + dbp->pgsize - re_len); + for (indx = 0, qp = QAM_GET_RECORD(dbp, h, indx); + qp <= ep; + ++indx, qp = QAM_GET_RECORD(dbp, h, indx)) { + if (F_ISSET(qp, QAM_VALID)) + sp->qs_ndata++; + else + sp->qs_pgfree += re_len; + } + + ret = __qam_fput(dbc, pgno, h, dbc->priority); + if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + } + + if ((ret = __LPUT(dbc, lock)) != 0) + goto err; + if (first > last) { + pgno = 1; + stop = last; + first = last; + goto begin; + } + + /* Get the meta-data page. */ + if ((ret = __db_lget(dbc, + 0, t->q_meta, F_ISSET(dbp, DB_AM_RDONLY) ? + DB_LOCK_READ : DB_LOCK_WRITE, 0, &lock)) != 0) + goto err; + if ((ret = __memp_fget(mpf, &t->q_meta, dbc->thread_info, dbc->txn, + F_ISSET(dbp, DB_AM_RDONLY) ? 0 : DB_MPOOL_DIRTY, &meta)) != 0) + goto err; + + if (!F_ISSET(dbp, DB_AM_RDONLY)) + meta->dbmeta.key_count = + meta->dbmeta.record_count = sp->qs_ndata; + sp->qs_nkeys = sp->qs_ndata; + +meta_only: + /* Get the metadata fields. */ + sp->qs_magic = meta->dbmeta.magic; + sp->qs_version = meta->dbmeta.version; + sp->qs_metaflags = meta->dbmeta.flags; + sp->qs_pagesize = meta->dbmeta.pagesize; + sp->qs_extentsize = meta->page_ext; + sp->qs_re_len = meta->re_len; + sp->qs_re_pad = meta->re_pad; + sp->qs_first_recno = meta->first_recno; + sp->qs_cur_recno = meta->cur_recno; + + /* Discard the meta-data page. */ + ret = __memp_fput(mpf, dbc->thread_info, meta, dbc->priority); + if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + + *(DB_QUEUE_STAT **)spp = sp; + + if (0) { +err: if (sp != NULL) + __os_ufree(dbp->env, sp); + } + + if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __qam_stat_print -- + * Display queue statistics. + * + * PUBLIC: int __qam_stat_print __P((DBC *, u_int32_t)); + */ +int +__qam_stat_print(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + DB *dbp; + DB_QUEUE_STAT *sp; + ENV *env; + int ret; + + dbp = dbc->dbp; + env = dbp->env; + + if ((ret = __qam_stat(dbc, &sp, LF_ISSET(DB_FAST_STAT))) != 0) + return (ret); + + if (LF_ISSET(DB_STAT_ALL)) { + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "Default Queue database information:"); + } + __db_msg(env, "%lx\tQueue magic number", (u_long)sp->qs_magic); + __db_msg(env, "%lu\tQueue version number", (u_long)sp->qs_version); + __db_dl(env, "Fixed-length record size", (u_long)sp->qs_re_len); + __db_msg(env, "%#x\tFixed-length record pad", (int)sp->qs_re_pad); + __db_dl(env, + "Underlying database page size", (u_long)sp->qs_pagesize); + __db_dl(env, + "Underlying database extent size", (u_long)sp->qs_extentsize); + __db_dl(env, + "Number of records in the database", (u_long)sp->qs_nkeys); + __db_dl(env, + "Number of data items in the database", (u_long)sp->qs_ndata); + __db_dl(env, "Number of database pages", (u_long)sp->qs_pages); + __db_dl_pct(env, + "Number of bytes free in database pages", + (u_long)sp->qs_pgfree, + DB_PCT_PG(sp->qs_pgfree, sp->qs_pages, sp->qs_pagesize), "ff"); + __db_msg(env, + "%lu\tFirst undeleted record", (u_long)sp->qs_first_recno); + __db_msg(env, + "%lu\tNext available record number", (u_long)sp->qs_cur_recno); + + __os_ufree(env, sp); + + return (0); +} + +#else /* !HAVE_STATISTICS */ + +int +__qam_stat(dbc, spp, flags) + DBC *dbc; + void *spp; + u_int32_t flags; +{ + COMPQUIET(spp, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbc->env)); +} +#endif diff --git a/src/qam/qam_stub.c b/src/qam/qam_stub.c new file mode 100644 index 00000000..6879961b --- /dev/null +++ b/src/qam/qam_stub.c @@ -0,0 +1,339 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef HAVE_QUEUE +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/qam.h" + +/* + * If the library wasn't compiled with the Queue access method, various + * routines aren't available. Stub them here, returning an appropriate + * error. + */ + +/* + * __db_no_queue_am -- + * Error when a Berkeley DB build doesn't include the access method. + * + * PUBLIC: int __db_no_queue_am __P((ENV *)); + */ +int +__db_no_queue_am(env) + ENV *env; +{ + __db_errx(env, DB_STR("1145", + "library build did not include support for the Queue access method")); + return (DB_OPNOTSUP); +} + +int +__db_prqueue(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->env)); +} + +int +__qam_31_qammeta(dbp, real_name, buf) + DB *dbp; + char *real_name; + u_int8_t *buf; +{ + COMPQUIET(real_name, NULL); + COMPQUIET(buf, NULL); + return (__db_no_queue_am(dbp->env)); +} + +int +__qam_32_qammeta(dbp, real_name, buf) + DB *dbp; + char *real_name; + u_int8_t *buf; +{ + COMPQUIET(real_name, NULL); + COMPQUIET(buf, NULL); + return (__db_no_queue_am(dbp->env)); +} + +int +__qam_append(dbc, key, data) + DBC *dbc; + DBT *key, *data; +{ + COMPQUIET(key, NULL); + COMPQUIET(data, NULL); + return (__db_no_queue_am(dbc->env)); +} + +int +__qamc_dup(orig_dbc, new_dbc) + DBC *orig_dbc, *new_dbc; +{ + COMPQUIET(new_dbc, NULL); + return (__db_no_queue_am(orig_dbc->env)); +} + +int +__qamc_init(dbc) + DBC *dbc; +{ + return (__db_no_queue_am(dbc->env)); +} + +int +__qam_db_close(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + COMPQUIET(dbp, NULL); + COMPQUIET(flags, 0); + return (0); +} + +int +__qam_db_create(dbp) + DB *dbp; +{ + COMPQUIET(dbp, NULL); + return (0); +} + +int +__qam_extent_names(env, name, namelistp) + ENV *env; + char *name; + char ***namelistp; +{ + COMPQUIET(name, NULL); + COMPQUIET(namelistp, NULL); + return (__db_no_queue_am(env)); +} + +int +__qam_gen_filelist(dbp, ip, filelistp) + DB *dbp; + DB_THREAD_INFO *ip; + QUEUE_FILELIST **filelistp; +{ + COMPQUIET(ip, NULL); + COMPQUIET(filelistp, NULL); + return (__db_no_queue_am(dbp->env)); +} + +int +__qam_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + COMPQUIET(env, NULL); + COMPQUIET(dtabp, NULL); + return (0); +} + +int +__qam_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + COMPQUIET(env, NULL); + COMPQUIET(dtabp, NULL); + return (0); +} + +int +__qam_metachk(dbp, name, qmeta) + DB *dbp; + const char *name; + QMETA *qmeta; +{ + COMPQUIET(name, NULL); + COMPQUIET(qmeta, NULL); + return (__db_no_queue_am(dbp->env)); +} + +int +__qam_mswap(env, pg) + ENV *env; + PAGE *pg; +{ + COMPQUIET(pg, NULL); + return (__db_no_queue_am(env)); +} + +int +__qam_new_file(dbp, ip, txn, fhp, name) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + DB_FH *fhp; + const char *name; +{ + COMPQUIET(ip, NULL); + COMPQUIET(txn, NULL); + COMPQUIET(fhp, NULL); + COMPQUIET(name, NULL); + return (__db_no_queue_am(dbp->env)); +} + +int +__qam_open(dbp, ip, txn, name, base_pgno, mode, flags) + DB *dbp; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *name; + db_pgno_t base_pgno; + int mode; + u_int32_t flags; +{ + COMPQUIET(ip, NULL); + COMPQUIET(txn, NULL); + COMPQUIET(name, NULL); + COMPQUIET(base_pgno, 0); + COMPQUIET(mode, 0); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->env)); +} + +int +__qam_pgin_out(env, pg, pp, cookie) + ENV *env; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + COMPQUIET(pg, 0); + COMPQUIET(pp, NULL); + COMPQUIET(cookie, NULL); + return (__db_no_queue_am(env)); +} + +int +__qam_salvage(dbp, vdp, pgno, h, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + PAGE *h; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(h, NULL); + COMPQUIET(handle, NULL); + COMPQUIET(callback, NULL); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->env)); +} + +int +__qam_set_ext_data(dbp, name) + DB *dbp; + const char *name; +{ + COMPQUIET(name, NULL); + return (__db_no_queue_am(dbp->env)); +} + +int +__qam_stat(dbc, spp, flags) + DBC *dbc; + void *spp; + u_int32_t flags; +{ + COMPQUIET(spp, NULL); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbc->env)); +} + +int +__qam_stat_print(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbc->env)); +} + +int +__qam_sync(dbp) + DB *dbp; +{ + return (__db_no_queue_am(dbp->env)); +} + +int +__qam_truncate(dbc, countp) + DBC *dbc; + u_int32_t *countp; +{ + COMPQUIET(countp, NULL); + return (__db_no_queue_am(dbc->env)); +} + +int +__qam_vrfy_data(dbp, vdp, h, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + QPAGE *h; + db_pgno_t pgno; + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(h, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->env)); +} + +int +__qam_vrfy_meta(dbp, vdp, meta, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + QMETA *meta; + db_pgno_t pgno; + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(meta, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->env)); +} + +int +__qam_vrfy_structure(dbp, vdp, flags) + DB *dbp; + VRFY_DBINFO *vdp; + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->env)); +} + +int +__qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(handle, NULL); + COMPQUIET(callback, NULL); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->env)); +} +#endif /* !HAVE_QUEUE */ diff --git a/src/qam/qam_upgrade.c b/src/qam/qam_upgrade.c new file mode 100644 index 00000000..3ea48238 --- /dev/null +++ b/src/qam/qam_upgrade.c @@ -0,0 +1,101 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_upgrade.h" +#include "dbinc/db_page.h" +#include "dbinc/qam.h" + +/* + * __qam_31_qammeta -- + * Upgrade the database from version 1 to version 2. + * + * PUBLIC: int __qam_31_qammeta __P((DB *, char *, u_int8_t *)); + */ +int +__qam_31_qammeta(dbp, real_name, buf) + DB *dbp; + char *real_name; + u_int8_t *buf; +{ + QMETA30 *oldmeta; + QMETA31 *newmeta; + + COMPQUIET(dbp, NULL); + COMPQUIET(real_name, NULL); + + newmeta = (QMETA31 *)buf; + oldmeta = (QMETA30 *)buf; + + /* + * Copy the fields to their new locations. + * They may overlap so start at the bottom and use memmove(). + */ + newmeta->rec_page = oldmeta->rec_page; + newmeta->re_pad = oldmeta->re_pad; + newmeta->re_len = oldmeta->re_len; + newmeta->cur_recno = oldmeta->cur_recno; + newmeta->first_recno = oldmeta->first_recno; + newmeta->start = oldmeta->start; + memmove(newmeta->dbmeta.uid, + oldmeta->dbmeta.uid, sizeof(oldmeta->dbmeta.uid)); + newmeta->dbmeta.flags = oldmeta->dbmeta.flags; + newmeta->dbmeta.record_count = 0; + newmeta->dbmeta.key_count = 0; + ZERO_LSN(newmeta->dbmeta.unused3); + + /* Update the version. */ + newmeta->dbmeta.version = 2; + + return (0); +} + +/* + * __qam_32_qammeta -- + * Upgrade the database from version 2 to version 3. + * + * PUBLIC: int __qam_32_qammeta __P((DB *, char *, u_int8_t *)); + */ +int +__qam_32_qammeta(dbp, real_name, buf) + DB *dbp; + char *real_name; + u_int8_t *buf; +{ + QMETA31 *oldmeta; + QMETA32 *newmeta; + + COMPQUIET(dbp, NULL); + COMPQUIET(real_name, NULL); + + newmeta = (QMETA32 *)buf; + oldmeta = (QMETA31 *)buf; + + /* + * Copy the fields to their new locations. + * We are dropping the first field so move + * from the top. + */ + newmeta->first_recno = oldmeta->first_recno; + newmeta->cur_recno = oldmeta->cur_recno; + newmeta->re_len = oldmeta->re_len; + newmeta->re_pad = oldmeta->re_pad; + newmeta->rec_page = oldmeta->rec_page; + newmeta->page_ext = 0; + /* cur_recno now points to the first free slot. */ + newmeta->cur_recno++; + if (newmeta->first_recno == 0) + newmeta->first_recno = 1; + + /* Update the version. */ + newmeta->dbmeta.version = 3; + + return (0); +} diff --git a/src/qam/qam_verify.c b/src/qam/qam_verify.c new file mode 100644 index 00000000..295f9d83 --- /dev/null +++ b/src/qam/qam_verify.c @@ -0,0 +1,636 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_verify.h" +#include "dbinc/db_am.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" +/* + * __qam_vrfy_meta -- + * Verify the queue-specific part of a metadata page. + * + * PUBLIC: int __qam_vrfy_meta __P((DB *, VRFY_DBINFO *, QMETA *, + * PUBLIC: db_pgno_t, u_int32_t)); + */ +int +__qam_vrfy_meta(dbp, vdp, meta, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + QMETA *meta; + db_pgno_t pgno; + u_int32_t flags; +{ + ENV *env; + QUEUE *qp; + VRFY_PAGEINFO *pip; + db_pgno_t *extents, extid, first, last; + size_t len; + int count, i, isbad, nextents, ret, t_ret; + char *buf, **names; + + COMPQUIET(count, 0); + + env = dbp->env; + qp = (QUEUE *)dbp->q_internal; + extents = NULL; + first = last = 0; + isbad = 0; + buf = NULL; + names = NULL; + + if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) + return (ret); + + /* + * Queue can't be used in subdatabases, so if this isn't set + * something very odd is going on. + */ + if (!F_ISSET(pip, VRFY_INCOMPLETE)) + EPRINT((env, DB_STR_A("1146", + "Page %lu: queue databases must be one-per-file", + "%lu"), (u_long)pgno)); + + /* + * Because the metapage pointers are rolled forward by + * aborting transactions, the extent of the queue may + * extend beyond the allocated pages, so we do + * not check that meta_current is within the allocated + * pages. + */ + + /* + * re_len: If this is bad, we can't safely verify queue data pages, so + * return DB_VERIFY_FATAL + */ + if (DB_ALIGN(meta->re_len + sizeof(QAMDATA) - 1, sizeof(u_int32_t)) * + meta->rec_page + QPAGE_SZ(dbp) > dbp->pgsize) { + EPRINT((env, DB_STR_A("1147", + "Page %lu: queue record length %lu too high for page size and recs/page", + "%lu %lu"), (u_long)pgno, (u_long)meta->re_len)); + ret = DB_VERIFY_FATAL; + goto err; + } else { + /* + * We initialize the Queue internal pointer; we may need + * it when handling extents. It would get set up in open, + * if we called open normally, but we don't. + */ + vdp->re_pad = meta->re_pad; + qp->re_pad = (int)meta->re_pad; + qp->re_len = vdp->re_len = meta->re_len; + qp->rec_page = vdp->rec_page = meta->rec_page; + qp->page_ext = vdp->page_ext = meta->page_ext; + } + + /* + * There's no formal maximum extentsize, and a 0 value represents + * no extents, so there's nothing to verify. + * + * Note that since QUEUE databases can't have subdatabases, it's an + * error to see more than one QUEUE metadata page in a single + * verifier run. Theoretically, this should really be a structure + * rather than a per-page check, but since we're setting qp fields + * here (and have only one qp to set) we raise the alarm now if + * this assumption fails. (We need the qp info to be reasonable + * before we do per-page verification of queue extents.) + */ + if (F_ISSET(vdp, VRFY_QMETA_SET)) { + isbad = 1; + EPRINT((env, DB_STR_A("1148", + "Page %lu: database contains multiple Queue metadata pages", + "%lu"), (u_long)pgno)); + goto err; + } + F_SET(vdp, VRFY_QMETA_SET); + qp->page_ext = meta->page_ext; + dbp->pgsize = meta->dbmeta.pagesize; + qp->q_meta = pgno; + qp->q_root = pgno + 1; + vdp->first_recno = meta->first_recno; + vdp->last_recno = meta->cur_recno; + if (qp->page_ext != 0) { + first = QAM_RECNO_EXTENT(dbp, vdp->first_recno); + last = QAM_RECNO_EXTENT(dbp, vdp->last_recno); + } + + /* + * Look in the data directory to see if there are any extents + * around that are not in the range of the queue. If so, + * then report that and look there if we are salvaging. + */ + + if ((ret = __db_appname(env, + DB_APP_DATA, qp->dir, NULL, &buf)) != 0) + goto err; + if ((ret = __os_dirlist(env, buf, 0, &names, &count)) != 0) + goto err; + __os_free(env, buf); + buf = NULL; + + len = strlen(QUEUE_EXTENT_HEAD) + strlen(qp->name) + 1; + if ((ret = __os_malloc(env, len, &buf)) != 0) + goto err; + len = (size_t)snprintf(buf, len, QUEUE_EXTENT_HEAD, qp->name); + for (i = nextents = 0; i < count; i++) { + if (strncmp(names[i], buf, len) == 0) { + /* Only save extents out of bounds. */ + extid = (db_pgno_t)strtoul(&names[i][len], NULL, 10); + if (qp->page_ext != 0 && + (last > first ? + (extid >= first && extid <= last) : + (extid >= first || extid <= last))) + continue; + if (extents == NULL && (ret = __os_malloc( + env, (size_t)(count - i) * sizeof(extid), + &extents)) != 0) + goto err; + extents[nextents] = extid; + nextents++; + } + } + if (nextents > 0) + __db_errx(env, DB_STR_A("1149", + "Warning: %d extra extent files found", "%d"), nextents); + vdp->nextents = nextents; + vdp->extents = extents; + +err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + if (names != NULL) + __os_dirfree(env, names, count); + if (buf != NULL) + __os_free(env, buf); + if (ret != 0 && extents != NULL) + __os_free(env, extents); + if (LF_ISSET(DB_SALVAGE) && + (t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0) + ret = t_ret; + return (ret == 0 && isbad == 1 ? DB_VERIFY_BAD : ret); +} + +/* + * __qam_meta2pgset -- + * For a given Queue meta page, add all of the db's pages to the pgset. Dealing + * with extents complicates things, as it is possible for there to be gaps in + * the page number sequence (the user could have re-inserted record numbers that + * had been on deleted extents) so we test the existence of each extent before + * adding its pages to the pgset. If there are no extents, just loop from + * first_recno to last_recno. + * + * PUBLIC: int __qam_meta2pgset __P((DB *, VRFY_DBINFO *, DB *)); + */ +int +__qam_meta2pgset(dbp, vdp, pgset) + DB *dbp; + VRFY_DBINFO *vdp; + DB *pgset; +{ + DBC *dbc; + PAGE *h; + db_pgno_t first, last, pgno, pg_ext, stop; + int ret, t_ret; + u_int32_t i; + + ret = 0; + h = NULL; + if (vdp->last_recno <= vdp->first_recno) + return (0); + + pg_ext = vdp->page_ext; + + first = QAM_RECNO_PAGE(dbp, vdp->first_recno); + + /* + * last_recno gives the next recno to be allocated, we want the last + * allocated recno. + */ + last = QAM_RECNO_PAGE(dbp, vdp->last_recno - 1); + + if (first == PGNO_INVALID || last == PGNO_INVALID) + return (DB_VERIFY_BAD); + + pgno = first; + if (first > last) + stop = QAM_RECNO_PAGE(dbp, UINT32_MAX); + else + stop = last; + + /* + * If this db doesn't have extents, just add all page numbers from first + * to last. + */ + if (pg_ext == 0) { + for (pgno = first; pgno <= stop; pgno++) + if ((ret = __db_vrfy_pgset_inc( + pgset, vdp->thread_info, vdp->txn, pgno)) != 0) + break; + if (first > last) + for (pgno = 1; pgno <= last; pgno++) + if ((ret = __db_vrfy_pgset_inc(pgset, + vdp->thread_info, vdp->txn, pgno)) != 0) + break; + + return (ret); + } + + if ((ret = __db_cursor(dbp, vdp->thread_info, NULL, &dbc, 0)) != 0) + return (ret); + /* + * Check if we can get the first page of each extent. If we can, then + * add all of that extent's pages to the pgset. If we can't, assume the + * extent doesn't exist and don't add any pages, if we're wrong we'll + * find the pages in __db_vrfy_walkpages. + */ +begin: for (; pgno <= stop; pgno += pg_ext) { + if ((ret = __qam_fget(dbc, &pgno, 0, &h)) != 0) { + if (ret == ENOENT || ret == DB_PAGE_NOTFOUND) { + ret = 0; + continue; + } + goto err; + } + if ((ret = __qam_fput(dbc, pgno, h, dbp->priority)) != 0) + goto err; + + for (i = 0; i < pg_ext && pgno + i <= last; i++) + if ((ret = __db_vrfy_pgset_inc( + pgset, vdp->thread_info, vdp->txn, pgno + i)) != 0) + goto err; + + /* The first recno won't always occur on the first page of the + * extent. Back up to the beginning of the extent before the + * end of the loop so that the increment works correctly. + */ + if (pgno == first) + pgno = pgno % pg_ext + 1; + } + + if (first > last) { + pgno = 1; + first = last; + stop = last; + goto begin; + } + +err: + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __qam_vrfy_data -- + * Verify a queue data page. + * + * PUBLIC: int __qam_vrfy_data __P((DB *, VRFY_DBINFO *, QPAGE *, + * PUBLIC: db_pgno_t, u_int32_t)); + */ +int +__qam_vrfy_data(dbp, vdp, h, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + QPAGE *h; + db_pgno_t pgno; + u_int32_t flags; +{ + DB fakedb; + struct __queue fakeq; + QAMDATA *qp; + db_recno_t i; + + /* + * Not much to do here, except make sure that flags are reasonable. + * + * QAM_GET_RECORD assumes a properly initialized q_internal + * structure, however, and we don't have one, so we play + * some gross games to fake it out. + */ + fakedb.q_internal = &fakeq; + fakedb.flags = dbp->flags; + fakeq.re_len = vdp->re_len; + + for (i = 0; i < vdp->rec_page; i++) { + qp = QAM_GET_RECORD(&fakedb, h, i); + if ((u_int8_t *)qp >= (u_int8_t *)h + dbp->pgsize) { + EPRINT((dbp->env, DB_STR_A("1150", + "Page %lu: queue record %lu extends past end of page", + "%lu %lu"), (u_long)pgno, (u_long)i)); + return (DB_VERIFY_BAD); + } + + if (qp->flags & ~(QAM_VALID | QAM_SET)) { + EPRINT((dbp->env, DB_STR_A("1151", + "Page %lu: queue record %lu has bad flags (%#lx)", + "%lu %lu %#lx"), (u_long)pgno, (u_long)i, + (u_long)qp->flags)); + return (DB_VERIFY_BAD); + } + } + + return (0); +} + +/* + * __qam_vrfy_structure -- + * Verify a queue database structure, such as it is. + * + * PUBLIC: int __qam_vrfy_structure __P((DB *, VRFY_DBINFO *, u_int32_t)); + */ +int +__qam_vrfy_structure(dbp, vdp, flags) + DB *dbp; + VRFY_DBINFO *vdp; + u_int32_t flags; +{ + VRFY_PAGEINFO *pip; + db_pgno_t i; + int ret, isbad; + + isbad = 0; + + if ((ret = __db_vrfy_getpageinfo(vdp, PGNO_BASE_MD, &pip)) != 0) + return (ret); + + if (pip->type != P_QAMMETA) { + EPRINT((dbp->env, DB_STR_A("1152", + "Page %lu: queue database has no meta page", "%lu"), + (u_long)PGNO_BASE_MD)); + isbad = 1; + goto err; + } + + if ((ret = __db_vrfy_pgset_inc( + vdp->pgset, vdp->thread_info, vdp->txn, 0)) != 0) + goto err; + + for (i = 1; i <= vdp->last_pgno; i++) { + /* Send feedback to the application about our progress. */ + if (!LF_ISSET(DB_SALVAGE)) + __db_vrfy_struct_feedback(dbp, vdp); + + if ((ret = __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0 || + (ret = __db_vrfy_getpageinfo(vdp, i, &pip)) != 0) + return (ret); + if (!F_ISSET(pip, VRFY_IS_ALLZEROES) && + pip->type != P_QAMDATA) { + EPRINT((dbp->env, DB_STR_A("1153", + "Page %lu: queue database page of incorrect type %lu", + "%lu %lu"), (u_long)i, (u_long)pip->type)); + isbad = 1; + goto err; + } else if ((ret = __db_vrfy_pgset_inc(vdp->pgset, + vdp->thread_info, vdp->txn, i)) != 0) + goto err; + } + +err: if ((ret = __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0) + return (ret); + return (isbad == 1 ? DB_VERIFY_BAD : 0); +} + +/* + * __qam_vrfy_walkqueue -- + * Do a "walkpages" per-page verification pass over the set of Queue + * extent pages. + * + * PUBLIC: int __qam_vrfy_walkqueue __P((DB *, VRFY_DBINFO *, void *, + * PUBLIC: int (*)(void *, const void *), u_int32_t)); + */ +int +__qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + DBC *dbc; + ENV *env; + PAGE *h; + QUEUE *qp; + VRFY_PAGEINFO *pip; + db_pgno_t first, i, last, pg_ext, stop; + int isbad, nextents, ret, t_ret; + + COMPQUIET(h, NULL); + + env = dbp->env; + qp = dbp->q_internal; + pip = NULL; + pg_ext = qp->page_ext; + isbad = ret = t_ret = 0; + h = NULL; + + /* If this database has no extents, we've seen all the pages already. */ + if (pg_ext == 0) + return (0); + + first = QAM_RECNO_PAGE(dbp, vdp->first_recno); + last = QAM_RECNO_PAGE(dbp, vdp->last_recno); + + i = first; + if (first > last) + stop = QAM_RECNO_PAGE(dbp, UINT32_MAX); + else + stop = last; + nextents = vdp->nextents; + + /* Verify/salvage each page. */ + if ((ret = __db_cursor(dbp, vdp->thread_info, NULL, &dbc, 0)) != 0) + return (ret); +begin: for (; i <= stop; i++) { + /* + * If DB_SALVAGE is set, we inspect our database of completed + * pages, and skip any we've already printed in the subdb pass. + */ + if (LF_ISSET(DB_SALVAGE) && (__db_salvage_isdone(vdp, i) != 0)) + continue; + if ((t_ret = __qam_fget(dbc, &i, 0, &h)) != 0) { + if (t_ret == ENOENT || t_ret == DB_PAGE_NOTFOUND) { + i += (pg_ext - ((i - 1) % pg_ext)) - 1; + continue; + } + + /* + * If an individual page get fails, keep going iff + * we're salvaging. + */ + if (LF_ISSET(DB_SALVAGE)) { + if (ret == 0) + ret = t_ret; + continue; + } + h = NULL; + ret = t_ret; + goto err; + } + + if (LF_ISSET(DB_SALVAGE)) { + /* + * We pretty much don't want to quit unless a + * bomb hits. May as well return that something + * was screwy, however. + */ + if ((t_ret = __db_salvage_pg(dbp, + vdp, i, h, handle, callback, flags)) != 0) { + if (ret == 0) + ret = t_ret; + isbad = 1; + } + } else { + /* + * If we are not salvaging, and we get any error + * other than DB_VERIFY_BAD, return immediately; + * it may not be safe to proceed. If we get + * DB_VERIFY_BAD, keep going; listing more errors + * may make it easier to diagnose problems and + * determine the magnitude of the corruption. + */ + if ((ret = __db_vrfy_common(dbp, + vdp, h, i, flags)) == DB_VERIFY_BAD) + isbad = 1; + else if (ret != 0) + goto err; + + __db_vrfy_struct_feedback(dbp, vdp); + + if ((ret = __db_vrfy_getpageinfo(vdp, i, &pip)) != 0) + goto err; + if (F_ISSET(pip, VRFY_IS_ALLZEROES)) + goto put; + if (pip->type != P_QAMDATA) { + EPRINT((env, DB_STR_A("1154", + "Page %lu: queue database page of incorrect type %lu", + "%lu %lu"), (u_long)i, (u_long)pip->type)); + isbad = 1; + goto err; + } + if ((ret = __db_vrfy_pgset_inc(vdp->pgset, + vdp->thread_info, vdp->txn, i)) != 0) + goto err; + if ((ret = __qam_vrfy_data(dbp, vdp, + (QPAGE *)h, i, flags)) == DB_VERIFY_BAD) + isbad = 1; + else if (ret != 0) + goto err; + +put: if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) + goto err1; + pip = NULL; + } + + /* Again, keep going iff we're salvaging. */ + if ((t_ret = __qam_fput(dbc, i, h, dbp->priority)) != 0) { + if (LF_ISSET(DB_SALVAGE)) { + if (ret == 0) + ret = t_ret; + continue; + } + ret = t_ret; + goto err1; + } + } + + if (first > last) { + i = 1; + stop = last; + first = last; + goto begin; + } + + /* + * Now check to see if there were any lingering + * extents and dump their data. + */ + if (LF_ISSET(DB_SALVAGE) && nextents != 0) { + nextents--; + i = 1 + + vdp->extents[nextents] * vdp->page_ext; + stop = i + vdp->page_ext; + goto begin; + } + + if (0) { +err: if (h != NULL && (t_ret = + __qam_fput(dbc, i, h, dbp->priority)) != 0 && ret == 0) + ret = t_ret; + if (pip != NULL && (t_ret = + __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) + ret = t_ret; + } +err1: if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret); +} + +/* + * __qam_salvage -- + * Safely dump out all recnos and data on a queue page. + * + * PUBLIC: int __qam_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, + * PUBLIC: void *, int (*)(void *, const void *), u_int32_t)); + */ +int +__qam_salvage(dbp, vdp, pgno, h, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + PAGE *h; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + DBT dbt, key; + QAMDATA *qp, *qep; + db_recno_t recno; + int ret, err_ret, t_ret; + u_int32_t pagesize, qlen; + u_int32_t i; + + memset(&dbt, 0, sizeof(DBT)); + memset(&key, 0, sizeof(DBT)); + + err_ret = ret = 0; + + pagesize = (u_int32_t)dbp->mpf->mfp->pagesize; + qlen = ((QUEUE *)dbp->q_internal)->re_len; + dbt.size = qlen; + key.data = &recno; + key.size = sizeof(recno); + recno = (pgno - 1) * QAM_RECNO_PER_PAGE(dbp) + 1; + i = 0; + qep = (QAMDATA *)((u_int8_t *)h + pagesize - qlen); + for (qp = QAM_GET_RECORD(dbp, h, i); qp < qep; + recno++, i++, qp = QAM_GET_RECORD(dbp, h, i)) { + if (F_ISSET(qp, ~(QAM_VALID|QAM_SET))) + continue; + if (!F_ISSET(qp, QAM_SET)) + continue; + + if (!LF_ISSET(DB_AGGRESSIVE) && !F_ISSET(qp, QAM_VALID)) + continue; + + dbt.data = qp->data; + if ((ret = __db_vrfy_prdbt(&key, + 0, " ", handle, callback, 1, 0, vdp)) != 0) + err_ret = ret; + + if ((ret = __db_vrfy_prdbt(&dbt, + 0, " ", handle, callback, 0, 0, vdp)) != 0) + err_ret = ret; + } + + if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) + return (t_ret); + return ((ret == 0 && err_ret != 0) ? err_ret : ret); +} diff --git a/src/rep/mlease.html b/src/rep/mlease.html new file mode 100644 index 00000000..28bcd50c --- /dev/null +++ b/src/rep/mlease.html @@ -0,0 +1,1198 @@ + + + + + + + Master Lease + + +

+

Master Leases for Berkeley DB

+
+
Susan LoVerso
+sue@sleepycat.com
+Rev 1.1
+2007 Feb 2
+
+


+

+

What are Master Leases?

+A master lease is a mechanism whereby clients grant master-ship rights +to a site and that master, by holding lease rights can provide a  +guarantee of durability to a replication group for a given period of +time.  By granting a lease to a master, +a  client will not participate in an election to elect a new +master until that granted master lease has expired.  By holding a +collection of granted leases, a master will be able to supply +authoritative read requests to applications.  By holding leases a +read operation on a master can guarantee several things to the +application:
+
    +
  1. Authoritative reads: a guarantee that the data being read by the +application is durable and can never be rolled back.
  2. +
  3. Freshness: a guarantee that the data being read by the +application at the master is +not stale.
  4. +
  5. Master viability: a guarantee that a current master with valid +leases will not encounter a duplicate master situation.
    +
  6. +
+

Requirements

+The requirements of DB to support this include:
+
    +
  • After turning them on, users can choose to ignore them in reads +or not.
  • +
  • We are providing read authority on the master only.  A +read on a client is equivalent to a read while ignoring leases.
  • +
  • We guarantee that data committed on a master that has been +read by an application on the +master will not be rolled back.  Data read on a client or +while ignoring leases or data +successfully updated/committed but not read, +may be rolled back.
    +
  • +
  • A master will not return successfully from a read operation +unless it holds a +majority of leases unless leases are ignored.
  • +
  • Master leases will remove the possibility of a current/correct +master being "shot down" by DUPMASTER.  NOTE: Old/Expired +masters may discover a +later master and return DUPMASTER to the application however.
    +
  • +
  • Any send callback failure must result in premature lease +expiration on the master.
    +
  • +
  • Users who change the system clock during master leases void the +guarantee and may get undefined behavior.  We assume time always +runs forward.
    +
  • +
  • Clients are forbidden from participating in elections while they +have an outstanding lease granted to another site.
  • +
  • Clients are forbidden from accepting a new master while they have +an outstanding lease granted to another site.
  • +
  • Clients are forbidden from upgrading themselves to master while +they have an outstanding lease granted to another site.
  • +
  • When asked for a lease grant explicitly by the master, the client +cannot grant the lease to the master unless the LSN in the master's +request has been processed by this client.
    +
  • +
+The requirements of the +application using leases include:
+
    +
  • Users must implement (Base API users on their own, RepMgr users +via configuration) a majority (or larger) ACK policy.
    +
  • +
  • The application must use the election mechanism to decide a master. +It may not simply declare a site master.
  • +
  • The send callback must return an error if the majority ACK policy +is not met for PERM records.
  • +
  • Users must set the number of sites in the group.
  • +
  • Using leases in a replication group is all-or-none.  +Therefore, if a site knows it is using leases, it can assume other +sites are also.
    +
  • +
  • All applications that care about read guarantees must forward or +perform all reads on the master.  Reading on the client means a +read ignoring leases.
  • +
+

There are some open questions +remaining.

+
    +
  • There is one major showstopper issue, see Crashing - Potential +problem near the end of the document.  We need a better solution +than the one shown there (writing to disk every time a lease is +granted). Perhaps just documenting that durability means it must be +flushed to disk before success to avoid that situation?
    +
  • +
  • What about db->join?  Users can call join, but the calls +on the join cursor to get the data would be subject to leases and +therefore protected.  Ok, this is not an open question.
  • +
  • What about other read-like operations?  Clearly +DB->get, DB->pget, DBC->get, +DBC->pget need lease checks.  However, other APIs use +keys.  DB->key_range +provides an estimate only so it shouldn't need lease checks. +DB->stat provides exact counts +to bt_nkeys and bt_ndata fields.  Are those +fields considered authoritative that providing those values implies a +durability guarantee and therefore DB->stat +should be subject to lease verification?  DBC->count +provides a count for +the number of data items associated with a key.  Is this +authoritative information? This is similar to stat - should it be +subject to lease verification?
    +
  • +
  • Do we require master lease checks on write operations?  I +think lease checks are not needed on write operations.  It doesn't +add correctness and adds a lot of complexity (checking leases in put, +del, and cursors, then what about rename, remove, etc).
    +
  • +
  • Do master leases give an iron-clad guarantee of never rolling +back a transaction? No, but it should mean that a committed transaction +can never be read on a master +unless the lease is valid.  A committed transaction on a master +that has never been presented to the application may get rolled back.
    +
  • +
  • Do we need to quarantine or prevent reads on an ex-master until +sync-up is done?  No.  A master that is simply downgraded to +client or crashes and reboots is now a client.  Reading from that +client is the same as saying Ignore Leases.
  • +
  • What about adding and removing sites while leases are +active?  This is SR 14778.  A consistent nsites value +is required by master +leases.    It isn't +clear to me what a master is +supposed to do if the value of nsites gets smaller while leases are +active.  Perhaps it leaves its larger table intact and simply +checks for a smaller number of granted leases?
    +
  • +
  • Can users turn leases off?  No.  There is no planned turn +leases off API.
  • +
  • Clock skew will be a percentage.  However, the smallest, 1%, +is probably rather large for clock skew.  Percentage was chosen +for simplicity and similarity to other APIs.  What granularity is +appropriate here?
  • +
+

API Changes

+The API changes that are visible +to the user are fairly minimal.  +There are a few API calls they need to make to configure master leases +and then there is the API call to turn them on.  There is also a +new flag to existing APIs to allow read operations to ignore leases and +return data that +may be non-durable potentially.
+

Lease Timeout
+

+There is a new timout the user +must configure for leases called DB_REP_LEASE_TIMEOUT.  +This timeout will be new to +the dbenv->rep_set_timeout method. The DB_REP_LEASE_TIMEOUT +has no default and it is required that the user configure a timeout +before they turn on leases (obviously, this timeout need not be set of +leases will not be used).  That timeout is the amount of time +the lease is valid on the master and how long it is granted +on the client.  This timeout must be the same +value on all sites (like log file size).  The timeout used when +refreshing leases is the DB_REP_ACK_TIMEOUT +for RepMgr application.  For Base API applications, lease +refreshes will use the same mechanism as PERM messages and they +should +have no additional burden.  This timeout is used for lease +refreshment and is the amount of time a reader will wait to refresh +leases before returning failure to the application from a read +operation.
+
+This timeout will be both stored +with its original value, and also +converted to a db_timespec +using the DB_TIMEOUT_TO_TIMESPEC +macro and have the clock skew accounted for and stored in the shared +rep structure:
+
db_timeout_t lease_timeout;
db_timespec lease_duration;
+NOTE:  By sending the lease refresh during DB operations, we are +forcing/assuming that the operation's process has a replication +transport function set.  That is obviously the case for write +operations, but would it be a burden for read processes (on a +master)?  I think mostly not, but if we need leases for +DB->stat then we need to +document it as it is certainly possible for an application to have a +separate or dedicated stat +application or attempt to use db_stat +(which will not work if leases must be checked).
+
+Leases should be checked after the local operation so that we don't +have a window/boundary if we were to check leases first, get +descheduled, the lose our lease and then perform the operation.  +Do the operation, then check leases before returning to the user.
+

Using Leases

+There is a new API that the user must call to tell the system to use +the lease mechanism.  The method must be called before the +application calls dbenv->rep_start +or dbenv->repmgr_start. +This new +method is:
+
+
    dbenv->rep_set_lease(DB_ENV *dbenv, u_int32_t clock_scale_factor, u_int32_t flags)
+
+The clock_scale_factor +parameter is interpreted as a percentage, greater than 100 (to transmit +a floating point number as an integer to the API) that represents the +maximum shkew between any two sites' clocks.  That is, a clock_scale_factor of 150 suggests +that the greatest discrepancy between clocks is that one runs 50% +faster than the others.  Both the +master and client sides +compensate for possible clock skew.  The master uses the value to +compensate in case the replica has a slow clock and replicas compensate +in case they have a fast clock.  This scaling factor will need to +be divided by 100 on all sites to truly represent the percentage for +adjustments made to time values.
+
+Assume the slowest replica's clock is a factor of clock_scale_factor +slower than the +fastest clock.  Using that assumption, if the fastest clock goes +from time t1 to t2 in X +seconds, the slowest clock does it in (clock_scale_factor / 100) +* X seconds.
+
+The flags parameter is not +currently used.
+
+When the dbenv->rep_set_lease +method is called, we will set a configuration flag indicating that +leases are turned on:
+#define REP_C_LEASE <value>.  +We will also record the u_int32_t +clock_skew value passed in.  The rep_set_lease method +will not allow +calls after rep_start.  If +multiple calls are made prior to calling rep_start then later +calls will +overwrite the earlier clock skew value. 
+
+We need a new flag to prevent calling rep_set_lease +after rep_start.  The +simplest solution would be to reject the call to +rep_set_lease  +if +REP_F_CLIENT +or REP_F_MASTER is set.  +However that does not work in the cases where a site cleanly closes its +environment and then opens without running recovery.  The +replication state will still be set.  The prevention will be +implemented as:
+
#define REP_F_START_CALLED <some bit value>
+In __rep_start, at the end:
+
if (ret == 0 ) {
REP_SYSTEM_LOCK
F_SET(rep, REP_F_START_CALLED)
REP_SYSTEM_UNLOCK
}
+In __rep_env_refresh, if we +are the last reference closing the env (we already check for that):
+
F_CLR(rep, REP_F_START_CALLED);
+In order to avoid run-time floating point operations +on db_timespec structures, +when a site is declared as a client or master in rep_start we +will pre-compute the +lease duration based on the integer-based clock skew and the +integer-based lease timeout.  A master should set a replica's +lease expiration to the start time of +the sent message + +(lease_timeout / clock_scale_factor) in case the replica has a +slow clock.  Replicas extend their leases to received message +time + (lease_timeout * +clock_scale_factor) in case this replica has a fast clock.  +Therefore, the computation will be as follows if the site is becoming a +master:
+
db_timeout_t tmp;
tmp = (db_timeout_t)((double)rep->lease_timeout / ((double)rep->clock_skew / (double)100));
rep->lease_duration = DB_TIMEOUT_TO_TIMESPEC(&tmp);
+Similarly, on a client the computation is:
+
tmp = (db_timeout_t)((double)rep->lease_timeout * ((double)rep->clock_skew / (double)100));
+When a site changes state, its lease duration will change based on +whether it is becoming a master or client and it will be recomputed +from the original values.  Note that these computations, coupled +with the fact that the lease on the master is computed based on the +master's time that it sent the message means that leases on the master +are more conservatively computed than on the clients.
+
+The dbenv->rep_set_lease +method must be called after dbenv->open, +similar to dbenv->rep_set_config.  +The reason is so that we can check that this is a replication +environment and we have access to the replication shared memory region.
+

Read Operations
+

+Authoritative read operations on the master with leases enabled will +abide by leases by default.  We will provide a flag that allows an +operation on a master to ignore leases.  All read operations +on a client imply +ignoring leases. If an application wants authoritative reads +they must forward the read requests to the master and it is the +application's responsibility to provide the forwarding. +The consensus was that forcing DB_IGNORE_LEASE +on client read operations (with leases enabled, obviously) was too +heavy handed.  Read operations on the client will ignore leases, +but do no special flag checking.
+
+The flag will be called DB_IGNORE_LEASE +and it will be a flag that can be OR'd into the DB access method and +cursor operation values.  It will be similar to the DB_READ_UNCOMMITTED +flag. +
+The methods that will +adhere to leases are:
+
    +
  • Db->get
  • +
  • Db->pget
  • +
  • Dbc->get
  • +
  • Dbc->pget
  • +
+The code that will check leases for a client reading would look +something +like this, if we decide to become heavy-handed:
+
if (IS_REP_CLIENT(dbenv)) {
[get to rep structure]
if (FLD_ISSET(rep->config, REP_C_LEASE) && !LF_ISSET(DB_IGNORE_LEASE)) {
db_err("Read operations must ignore leases or go to master");
ret = EINVAL;
goto err;
}
}
+On the master, the new code to abide by leases is more complex.  +After the call to perform the operation we will check the lease.  +In that checking code, the master will see if it has a valid +lease.  If so, then all is well.  If not, it will try to +refresh the leases.  If that refresh attempt results in leases, +all is well.  If the refresh attempt does not get leases, then the +master cannot respond to the read as an authority and we return an +error.  The new error is called DB_REP_LEASE_EXPIRED.  +The location of the master lease check is down after the internal call +to read the data is successful:
+
if (IS_REP_MASTER(dbenv) && !LF_ISSET(DB_IGNORE_LEASE)) {
[get to rep structure]
if (FLD_ISSET(rep->config, REP_C_LEASE) &&
(ret = __rep_lease_check(dbenv)) != 0) {
/*
* We don't hold the lease.
*/
goto err;
}
}
+See below for the details of __rep_lease_check.
+
+Also note that if leases (or replication) are not configured, then DB_IGNORE_LEASE is a no-op.  It +is ignored (and won't error) if used when leases are not in +effect.  The reason is so that we can generically set that flag in +utility programs like db_dump +that walk the database with a cursor.  Note that db_dump is the only utility that +reads with a cursor.
+

Nsites +and Elections

+The call to dbenv->rep_set_nsites +must be performed before the call to dbenv->rep_start +or dbenv->repmgr_start.  +This document assumes either that SR +14778 gets resolved, or assumes that the value of nsites is +immutable.  The +master and all clients need to know how many sites and leases are in +the group.  Clients need to know for elections.  The master +needs to know for the size of the lease table and to know what value a +majority of the group is. [Until +14778 is resolved, the master lease work must assume nsites is +immutable and will +therefore enforce that this is called before rep_start using +the same mechanism +as rep_set_lease.]
+
+Elections and leases need to agree on the number of sites in the +group.  Therefore, when leases are in effect on clients, all calls +to dbenv->rep_elect must +set the nsites parameter to +0.  The rep_elect code +path will return EINVAL if REP_C_LEASE is set and nsites +is non-0. +

Lease Management

+

Message Changes

+In order for clients to grant leases to the master a new message type +must be added for that purpose.  This will be the REP_LEASE_GRANT +message.  +Granting leases will be a result of applying a DB_REP_PERMANENT +record and therefore we +do not need any additional message in order for a master to request a +lease grant.  The REP_LEASE_GRANT +message will pass a structure as its message DBT:
+
struct __rep_lease_grant {
db_timespec msg_time;
#ifdef DIAGNOSTIC
db_timespec expire_time;
#endif
} REP_GRANT_INFO;
+In the REP_LEASE_GRANT +message, the client is actually giving the master several pieces of +information.  We only need the echoed msg_time in this +structure because +everything else is already sent.  The client is really sending the +master:
+
    +
  • Its EID (parameter to rep_send_message +and rep_process_message)
    +
  • +
  • The PERM LSN this message acknowledged (sent in the control +message)
  • +
  • Unique identifier echoed back to master (msg_time sent in +message as above)
  • +
+On the client, we always maintain the maximum PERM LSN already in lp->max_perm_lsn.  +

Local State Management

+Each client must maintain a db_timespec +timestamp containing the expiration of its granted lease.  This +field will be in the replication shared memory structure:
+
db_timespec grant_expire;
+This timestamp already takes into account the clock skew.  All +new fields must be initialized when the region is created. Whenever we +grant our master lease and want to send the REP_LEASE_GRANT +message, this value +will be updated.  It will be used in the following way: +
db_timespec mytime;
DB_LSN perm_lsn;
DBT lease_dbt;
REP_GRANT_INFO gi;


timespecclear(&mytime);
timespecclear(&newgrant);
memset(&lease_dbt, 0, sizeof(lease_dbt));
memset(&gi, 0, sizeof(gi));
__os_gettime(dbenv, &mytime);
timespecadd(&mytime, &rep->lease_duration);
MUTEX_LOCK(rep->clientdb_mutex);
perm_lsn = lp->max_perm_lsn;
MUTEX_UNLOCK(rep->clientdb_mutex);
REP_SYSTEM_LOCK(dbenv);
if (timespeccmp(mytime, rep->grant_expire, >))
rep->grant_expire = mytime;
gi.msg_time = msg->msg_time;
#ifdef DIAGNOSTIC
gi.expire_time = rep->grant_expire;
#endif
lease_dbt.data = &gi;
lease_dbt.size = sizeof(gi);
REP_SYSTEM_UNLOCK(dbenv);
__rep_send_message(dbenv, eid, REP_LEASE_GRANT, &perm_lsn, &lease_dbt, 0, 0);
+This updating of the lease grant will occur in the PERM code +path when we have +successfully applied the permanent record.
+

Maintaining Leases on the +Master/Rep_start

+The master maintains a lease table that it checks when fulfilling a +read request that is subject to leases.  This table is initialized +when a site calls +dbenv->rep_start(DB_MASTER) and the site is undergoing a role +change (i.e. a master making additional calls to dbenv->rep_start(DB_MASTER) +does +not affect an already existing table).
+
+When a non-master site becomes master, it must do two things related to +leases on a role change.  First, a client cannot upgrade to master +while it has an outstanding lease granted to another site.  If a +client attempts to do so, an error, EINVAL, +will be returned.  The only way this should happen is if the +application simply declares a site master, instead of using +elections.  Elections will already wait for leases to expire +before proceeding. (See below.) +
+
+Second, once we are proceeding with becoming a master, the site must +allocate the table it will use to maintain lease information.  +This table will be sized based on nsites +and it will be an array of the following structure:
+
struct  {
int eid; /* EID of client site. */
db_timespec start_time; /* Unique time ID client echoes back on grants. */
db_timespec end_time; /* Master's lease expiration time. */
DB_LSN lease_lsn; /* Durable LSN this lease applies to. */
u_int32_t flags; /* Unused for now?? */
} REP_LEASE_ENTRY;
+

Granting Leases

+It is the burden of the application to make sure that all sites in the +group +are using leases, or none are.  Therefore, when a client processes +a PERM +log record that arrived from the master, it will grant its lease +automatically if that record is permanent (i.e. DB_REP_ISPERM +is being returned), +and leases are configured.  A client will not send a +lease grant when it is processing log records (even PERM +ones) it receives from other clients that use client-to-client +synchronization.  The reason is that the master requires a unique +time-of-msg ID (see below) that the client echoes back in its lease +grant and it will not have such an ID from another client.
+
+The master stores a time-of-msg ID in each message and the client +simply echoes it back to the master.  In its lease table, it does +keep the base +time-of-msg for a valid lease.  When REP_LEASE_GRANT +message comes in, +the master does a number of things:
+
    +
  1. Pulls the echoed timespec from the client message, into msg_time.
    +
  2. +
  3. Finds the entry in its lease table for the client's EID.  It +walks the table searching for the ID.  EIDs of DB_EID_INVALID are +illegal.  Either the master will find the entry, or it will find +an empty slot in the table (i.e. it is still populating the table with +leases).
  4. +
  5. If this is a previously unknown site lease, the master +initializes the entry by copying to the eid, start_time, and + lease_lsn fields.  The master +also computes the end_time +based on the adjusted rep->lease_duration.
  6. +
  7. If this is a lease from a previously known site, the master must +perform timespeccmp(&msg_time, +&table[i].start_time, >) and only update the end_time +of the lease when this is +a more recent message.  If it is a more recent message, then we +should update +the lease_lsn to the LSN in +the message.
  8. +
  9. Since lease durations are computed taking the clock skew into +account, clients compute them based on the current time and the master +computes it based on original sending time, for diagnostic purposes +only, I also plan to send the client's expiration time.  The +client errs on the side of computing a larger lease expiration time and +the master errs on the side of computing a smaller duration.  +Since both are taking the clock skew +into account, the client's ending expiration time should never be +smaller than +the master's computed expiration time or their value for clock skew may +not be correct.
    +
  10. +
+Any log records (new or resent) that originate from the master and +result in DB_REP_ISPERM get an +ack.
+
+

Refreshing Leases

+Leases get refreshed when a master receives a REP_LEASE_GRANT +message from a client. There are three pieces to lease +refreshment. 
+

Lazy Lease Refreshing on Read
+

+If the master discovers that leases are +expired during the read operation, it attempts to refresh its +collection of lease grants.  It does this by calling a new +function __rep_lease_refresh.  +This function is very similar to the already-existing function __rep_flush.  +Basically, to +refresh the lease, the master simply needs to resend the last PERM +record to the clients.  The requirements state that when the +application send function returns successfully from sending a PERM +record, the majority of clients have that PERM LSN durable.  We +will have a new public DB error return called DB_REP_LEASE_EXPIRED +that will be +returned back to the caller if the master cannot assert its +authority.  The code will look something like this:
+
/*
* Use lp->max_perm_lsn on the master (currently not used on the master)
* to keep track of the last PERM record written through the logging system.
* need to initialize lp->max_perm_lsn in rep_start on role_chg.
*/
call __rep_send_message on the last PERM record the master wrote, with DB_REP_PERMANENT
if failure
expire leases
return lease expired error to caller
else /* success */
recheck lease table
/*
* We need to recheck the lease table because the client
* lease grant messages may not be processed yet, or got
* lost, or racing with the application's ACK messages or
* whatever.
*/
if we have a majority of valid leases
return success
else
return lease expired error to caller
+

Ongoing Update Refreshment
+

+Second is having the master indicate to +the client it needs to send a lease grant in response to the current +PERM log message.  The problem is +that acknowledgements must contain a master-supplied message timestamp +that the client sends back to the master.  We need to modify the +structure of the  log record messages when leases are configured +so +that when a PERM message is sent, the master sends, and the client +expects, the message timestamp.  There are three fairly +straightforward and different implementations to consider.
+
    +
  1. Adding the timestamp to the REP_CONTROL +structure.  If this option is chosen, then the code trivially +sends back the timestamp in the client's reply.  There is no +special processing done by either side with the message contents.  +So, on a PERM log record, the master will send a non-zero +timestamp.  On a normal log record the timestamp will be zero or +some known invalid value.  If the client sees a non-zero +timestamp, it sends a REP_LEASE_GRANT +with the lp->max_perm_lsn +after applying that log record.  If it is zero, then the client +does nothing different.  The advantage is ease of code.  The +disadvantage is that for mixed version systems, the client is now +dealing with different sized control structures.  We would have to +retain the old control structure so that during a mixed version group +the (upgraded) clients can use, expect and send old control structures +to the master.  This is unfortunate, so let's consider additional +implementations that don't require modifying the control structure.
    +
  2. +
  3. Adding a new REPCTL_LEASE +flag to the list of flags for the control structure, but do not change +the control structure fields.  When a master wants to send a +message that needs a lease ack, it sets the flag.  Additionally, +instead of simply sending a log record DBT as the rec parameter +for replication, we +would send a new structure that had the timestamp first and then the +record (similar to the bulk transfer buffer).  The advantage of +this is that the control structure does not change.  Disadvantages +include more special-cased code in the normal code path where we have +to check the flag.  If the flag is set we have to extract the +timestamp value and massage the incoming data to pass on the real log +record to rep_apply.  On +bulk transfer, we would just add the timestamp into the buffer.  +On normal transfers, it would incur an additional data copy on the +master side.  That is unfortunate.  Additionally, if this +record needs to be stored in the temp db, we need some way to get it +back again later or rep_apply +would have to extract the timestamp out when it processed the record +(either live or from the temp db).
    +
  4. +
  5. Adding a different message type, such as REP_LOG_ACK.  +Similarly to REP_LOG_MORE this message would be a +special-case version of a log record.  We would extract out the +timestamp and then handle as a normal log record.  This +implementation is rejected because it actually would require three new +message types: REP_LOG_ACK, +REP_LOG_ACK_MORE, REP_BULK_LOG_ACK.  That is just too ugly +to contemplate.
  6. +
+[Slight digression: it occurs +to me while writing about #2 and #3 above, that our implementation of +all of the *_MORE messages could really be implemented with a REPCTL_MORE +flag instead of a +separate message type.  We should clean that up and simplify the +messages but not part of master leases. Hmm, taking that thought +process further, we really could get rid of the REP_BULK_* +messages as well if we +added a REPCTL_BULK +flag.  I think we should definitely do it for the *_MORE +messages.  I am not sure we should do it for bulk because the +structure of the incoming data record is vastly different.]
+
+Of these options, I believe that modifying the control structure is the +best alternative.  The handling of the old structure will be very +isolated to code dealing with old versions and is far less complicated +than injecting the timestamp into the log record DBT and doing a data +copy.  Actually, I will likely combine #1 and the flag from #2 +above.  I will have the REPCTL_LEASE +flag that indicates a lease grant reply is expected and have the +timestamp in the control structure.  +Also I will probably add in a spare field or two for future use in the REP_CONTROL +structure.
+

Gap processing

+No matter which implementation we choose for ongoing lease refreshment, +gap processing must be considered.  The code above assumes the +timestamps will be placed on PERM records only.  Normal log +records will not have a timestamp, nor a flag or anything else like +that.  However, any log message can fill a gap on a client and +result in the processing of that normal log record to return DB_REP_ISPERM +because later records +were also processed.
+
+The current implementation should work fine in that case because when +we store the message in the client temp db we store both the control +DBT and the record DBT.  Therefore, when a normal record fills a +gap, the later PERM record, when retrieved will look just like it did +when it arrived.  The client will have access to the LSN, and the +timestamp, etc.  However, it does mean that sending the REP_LEASE_GRANT +message must take +place down in __rep_apply +because that is the only place we have access to the contents of those +stored records with the timestamps.
+
+There are two logical choices to consider for granting the lease when +processing an update.  As we process (either a live record or one +read from the temp db after filling a gap) a PERM message, we send the REP_LEASE_GRANT +message for each +PERM record we successfully apply.  Or, second, we keep track of +the largest timestamp of all PERM records we've processed and at the +end of the function after we've applied all records, we send back a +single lease grant with the max_perm_lsn +and a new max_lease_timestamp +value to the master.  The first is easier to implement, the second +results in possibly slightly fewer messages at the expense of more +bookkeeping on the client.
+
+A third, more complicated option would be to have the message timestamp +on all records, but grants are only sent on the PERM messages.  A +reason to do this is that the later timestamp of a normal log record +would be used as the timestamp sent in the reply and the master would +get a more up to date timestamp value and a longer lease. 
+
+If we change the REP_CONTROL +structure to include the timestamp, we potentially break or at least +need to revisit the gap processing algorithm.  That code assumes +that the control and record elements for the same LSN look the same +each and every time.  The code stores the control DBT as the key and the rec DBT as the data.  We use a +specialized compare function to sort based on the LSN in the control +DBT.  With master leases, the same record transmitted by a master +multiple times or client for the same LSN will be different because the +timestamp field will not be the same.  Therefore, the client will +end up with duplicate entries in the temp database for the same +LSN.  Both solutions (adding the timestamp to REP_CONTROL and adding a REPCTL_LEASE flag) can yield +duplicate entries.  The flag would cause the same record from the +master and client to be different as well.
+

Handling Incoming Lease Grants
+

+The third piece of lease management is handling the incoming REP_LEASE_GRANT +message on the +master.  When this message is received, the master must do the +following:
+
REP_SYSTEM_LOCK
msg_timestamp = cntrl->timestamp;
client_lease = __rep_lease_entry(dbenv, client eid)
if (client_lease == NULL)
initial lease for this site, DB_ASSERT there is space in the table
add this to the table if there is space
} else
compare msg_timestamp with client_lease->start_time
if (msg_timestamp is more recent && msg_lsn >= lease LSN)
update entry in table
REP_SYSTEM_UNLOCK
+

Expiring Leases

+Leases can expire in two ways.  First they can expire naturally +due to the passage of time.  When checking leases, if the current +time is later than the lease entry's end_time +then the lease is expired.  Second, they can be forced with a +premature expiration when the application's transport function returns +an error.  In the first case, there is nothing to do, in the +second case we need to manipulate the end_time +so that all future lease checks fail.  Since the lease start_time +is guaranteed to not be in the future we will have a function __rep_lease_expire +that will:
+
REP_SYSTEM_LOCK
for each entry in the lease table
entry->end_time = entry->start_time;
REP_SYSTEM_UNLOCK
+Is there a potential race or problem with prematurely expiring +leases?  Consider an application that enforces an ALL +acknowledgement policy for PERM records in its transport +callback.  There are four clients and three send the PERM ack to +the application.  The callback returns an error to the master DB +code.  The DB code will now prematurely expire its leases.  +However, at approximately the same time the three clients are also +sending their REP_LEASE_GRANT +messages to the master.  There is a race between the master +processing those messages and the thread handling the callback failure +expiring the table.  This is only an issue if the messages arrive +after the table has been expired.
+
+Let's assume all three clients send their grants after the master +expires the table.  If we accept those grants and then a read +occurs the read will succeed since the master has a majority of leases +even though the callback failed earlier.  Is that a problem?  +The lease code is using a majority and the application policy is using +something other value.  It feels like this should be okay since +the data is held by leases on a majority.  Should we consider +having the lease checking threshold be the same as the permanent ack +policy?  That is difficult because Base API users implement +whatever they want and DB does not know what it is.
+

Checking Leases

+When a read operation on the master completes, the last thing we need +to do is verify the master leases.  We've already discussed +refreshing them when they are expired above.  We need two things +for a lease to be valid.  It must be within the timeframe of the +lease grant and the lease must be valid for the last PERM record +LSN.  Here is the logic +for checking the validity of leases in __rep_lease_check:
+
#define MAX_REFRESH_TRIES	3
DB_LSN lease_lsn;
REP_LEASE_ENTRY *entry;
u_int32_t min_leases, valid_leases;
db_timespec cur_time;
int ret, tries;

tries = 0;
retry:
ret = 0;
LOG_SYSTEM_LOCK
lease_lsn = lp->lsn
LOG_SYSTEM_UNLOCK
REP_SYSTEM_LOCK
min_leases = rep->nsites / 2;
__os_gettime(dbenv, &cur_time);
for (entry = head of table, valid_leases = 0; entry != NULL && valid_leases < min_leases; entry++)
if (timespec_cmp(&entry->end_time, &cur_time) >= 0 && log_compare(&entry->lsn, lease_lsn) == 0)
valid_leases++;
REP_SYSTEM_UNLOCK
if (valid_leases < min_leases) {
ret =__rep_lease_refresh(dbenv, ...);
/*
* If we are successful, we need to recheck the leases because
* the lease grant messages may have raced with the PERM
* acknowledgement. Give those messages a chance to arrive.
*/
if (ret == 0) {
if (tries <= MAX_REFRESH_TRIES) {
/*
* If we were successful sending, but not successful in racing the
* message thread, yield the processor so that message
* threads may have a chance to run.
*/
if (tries > 0)
/* __os_sleep instead?? */
__os_yield()
tries++;
goto retry;
} else
ret = DB_RET_LEASE_EXPIRED;
}
}
return (ret);
+If the master has enough valid leases it returns success.  If it +does not have enough, it attempts to refresh them.  This attempt +may fail if sending the PERM record does not receive sufficient +acks.  If we do receive sufficient acknowledgements we may still +find that scheduling of message threads means the master hasn't yet +processed the incoming REP_LEASE_GRANT +messages yet.  We will retry a couple times (possibly +parameterized) if the master discovers that situation. 
+

Elections

+When a client grants a lease to a master, it gives up the right to +participate in an election until that grant expires.  If we are +the master and dbenv->rep_elect +is called, it should return, no matter what, like it does today.  +If we are a client and rep_elect +is called special processing takes place when leases are in +effect.  First, the easy case is if the lease granted by this +client has already expired, then the client goes directly into the +election as normal.  If a valid lease grant is outstanding to a +master, this site cannot participate in an election until that grant +expires.  We have at least two options when a site calls the dbenv->rep_elect +API while +leases are in effect.
+
    +
  1. The simplest coding solution for DB would be simply to refuse to +participate in the election if this site has a current lease granted to +a master.  We would detect this situation and return EINVAL.  +This is correct behavior and trivial to implement.  The +disadvantage of this solution is that the application would then be +responsible for repeatedly attempting an election until the lease grant +expired.
    +
  2. +
  3. The more satisfying solution is for DB to wait the remaining time +for the grant.  If this client hears from the master during that +time the election does not take place and the call to rep_elect +returns with the +information for the current/old master.
  4. +
+

Election Code Changes

+The code changes to support leases in the election code are fairly +isolated.  First if leases are configured, we must verify the nsites +parameter is set to 0.  +Second, in __rep_elect_init +we must not overwrite the value of rep->nsites +for leases because it is controlled by the dbenv->rep_set_nsites +API.  +These changes are small and easy to understand.
+
+The more complicated code will be the client code when it has an +outstanding lease granted.  The client will wait for the current +lease grant to expire before proceeding with the election.  The +client will only do so if it does not hear from the master for the +remainder of the lease grant time.  If the client hears from the +master, it returns and does not begin participating in the +election.  A new election phase, REP_EPHASE0 +will exist so that the call to __rep_wait +can detect if a master responds.  The client, while waiting for +the lease grant to expire, will send a REP_MASTER_REQ +message so that the master will respond with a REP_NEWMASTER +message and thus, +allow the client to know the master exists.  However, it is also +desirable that if the master +replies to the client, the master wants the client to update its lease +grant. 
+
+Recall that the REP_NEWMASTER +message does not result in a lease grant from the client.  The +client responds when it processes a PERM record that has the REPCTL_LEASE +flag set in the message +with its lease grant up to the given LSN.  Therefore, we want the +client's REP_MASTER_REQ to +yield both the discovery of the existing master and have the master +refresh its leases.  The client will also use the REPCTL_LEASE +flag in its REP_MASTER_REQ message to the +master.  This flag will serve as the indicator to the master that +it needs to deal with leases and both send the REP_NEWMASTER +message and refresh +the lease.
+The code will work as follows:
+
if (leases_configured && (my_grant_still_valid || lease_never_granted) {
if (lease_never_granted)
wait_time = lease_timeout
else
wait_time = grant_expiration - current_time
F_SET(REP_F_EPHASE0);
__rep_send_message(..., REP_MASTER_REQ, ... REPCTL_LEASE);
ret = __rep_wait(..., REP_F_EPHASE0);
if (we found a master)
return
} /* if we don't return, fall out and proceed with election */
+On the master side, the code handling the REP_MASTER_REQ will +do:
+
if (I am master) {
...
__rep_send_message(REP_NEWMASTER...)
if (F_ISSET(rp, REPCTL_LEASE))
__rep_lease_refresh(...)
}
+Other minor implementation details are that __rep_elect_done +must also clear +the REP_F_EPHASE0 flag.  +We also, obviously, need to define REP_F_EPHASE0 +in the list of replication flags.  Note that the client's call to __rep_wait +will return upon +receiving the REP_NEWMASTER +message.  The client will independently refresh its lease when it +receives the log record from the master's call to refresh the lease.
+
+Again, similar to what I suggested above, the code could simply assume +global leases are configured, and instead of having the REPCTL_LEASE +flag at all, the master +assumes that it needs to refresh leases because it has them configured, +not because it is specified in the REP_MASTER_REQ +message it is processing. Right now I don't think every possible +REP_MASTER_REQ message should result in a lease grant request.
+

Elections and Quiescient Systems

+It is possible that a master is slow or the client is close to its +expiration time, or that the master is quiescient and all leases are +currently expired, but nothing much is going on anyway, yet some client +calls __rep_elect at that +time.  In the code above, we will not send the REP_MASTER_REQ +because the lease is +not valid.  The client will simply proceed directly to sending the +REP_VOTE1 message, throwing all +other clients into an election.  The master is still master and +should stay that way.  Currently in response to a vote message, a +master will broadcast out a REP_NEWMASTER +to assert its mastership.  That causes the election to +complete.  However, if desired the master may want to proactively +refresh its leases.  This situation indicates to me that the +master should choose to refresh leases based on configuration, not a +flag sent from the client.  I believe anytime the master asserts +its mastership via sending a REP_NEWMASTER +message that I need to add code to proactively refresh leases at that +time.
+

Other Implementation Details

+

Role Changes
+

+When a site changes its role via a call to rep_start in either +direction, we +must take action when leases are configured.  There are three +types of role changes that all need changes to deal with leases:
+
    +
  1. A master downgrading to a +client. When a master downgrades to a client, it can do so +immediately after it has proactively expired all existing leases it +holds.  This situation is similar to an error from the send +callback, and it effectively cancels all outstanding leases held on +this site.  Note that if this master expires its leases, it does +not have any effect on when the clients' lease grants expire on the +client side.  The clients must still wait their full expected +grant time.
    +
  2. +
  3. A client upgrading to master. +If a client is upgrading to a master but it has an outstanding lease +granted to another site, the code will return an EINVAL +error.  This situation +only arises if the application simply declares this site master.  +If a site wins an election then the election itself should have waited +long enough for the granted lease to expire and this state should not +arise then.
  4. +
  5. A client finding a new master. +When a client discovers a new and different master, via a REP_NEWMASTER +message then the +client cannot accept that new master until its current lease grant +expires.  This situation should only occur when a site declares +itself master without an election and that site's lease grant expires +before this client's grant expires.  However, it is possible +for this situation to arise +with elections also.  If we have 5 sites holding an election and 4 +of those sites have leases expire at about the same time T, and this +site's lease expires at time T+N and the election timeout is < N, +then those 4 sites may hold an election and elect a master without this +site's participation.  A client in this situation must call __rep_wait +with the time remaining +on its lease.  If the lease is expired after waiting the remaining +time, then the client can accept this new master.  If the lease +was refreshed during the waiting period then the client does not accept +this new master and returns.
    +
  6. +
+

DUPMASTER

+A duplicate master situation can occur if an old master becomes +disconnected from the rest of the group, that group elects a new master +and then the partition is resolved.  The requirement for master +leases is that this situation will not cause the newly elected, +rightful master to receive the DB_REP_DUPMASTER +return.  It is okay for the old master to get that return +value.  When a dual master situation exists, the following will +happen:
+
    +
  • On the current master and all +current clients - If the current master receives an update +message or other conflicting message from the old master then that +message will be ignored because the generation number is out of date.
  • +
  • On the old master - If +the old master receives an update message from the current master, or +any other message with a later generation from any site, the new +generation number will trigger this site to return DB_REP_DUPMASTER.  +However, +instead of broadcasting out the REP_DUPMASTER +message to shoot down others as well, this site, if leases are +configured, will call __rep_lease_check +and if they are expired, return the error.  It should be +impossible for us to receive a later generation message and still hold +a majority of master leases.  Something is seriously wrong and we +will DB_ASSERT this situation +cannot happen.
    +
  • +
+

Client to Client Synchronization

+One question to ask is how lease grants interact with client-to-client +synchronization. The only answer is that they do not.  A client +that is sending log records to another client cannot request the +receiving client refresh its lease with the master.  That client +does not have a timestamp it can use for the master and clock skew +makes it meaningless between machines.  Therefore, sites that use +client-to-client synchronization will likely see more lease refreshment +during the read path and leases will be refreshed during live updates +only.  Of course, if a client supplies log records that fill a +gap, and the later log records stored came from the master in a live +update then the client will respond as per the discussion on Gap +Processing above.
+

Interaction Matrix

+If leases are granted (by a client) or held (by a master) what should +the following APIs and messages do?
+
+Other:
+log_archive: Leases do not affect log_archive.  OK.
+dbenv->close: OK.
+crash during lease grant and restart: Potential +problem here.  See discussion below.
+
+Rep Base API method:
+rep_elect: Already discussed above.  Must wait for lease to expire.
+rep_flush: Master only, OK - this will be the basis for refreshing +leases.
+rep_get_*: Not affected by leases.
+rep_process_message: Generally OK.  We'll discuss each message +below.
+rep_set_config: OK.
+rep_set_limit: OK
+rep_set_nsites: Must be called before rep_start +and nsites is immutable until +14778 is resolved.
+rep_set_priority: OK
+rep_set_timeout: OK.  Used to set lease timeout.
+rep_set_transport: OK.
+rep_start(MASTER): Role changes are discussed above.  Make sure +duplicate rep_start calls are no-ops for leases.
+rep_start(CLIENT): Role changes are discussed above.  Make sure +duplicate calls are no-ops for leases.
+rep_stat: OK.
+rep_sync: Should not be able to happen.  Client cannot accept new +master with outstanding lease grant.  Add DB_ASSERT here.
+
+REP_ALIVE: OK.
+REP_ALIVE_REQ: OK.
+REP_ALL_REQ: OK.
+REP_BULK_LOG: OK.  Clients check to send ACK.
+REP_BULK_PAGE: Should never process one with lease granted.  Add +DB_ASSERT.
+REP_DUPMASTER: Should never happen, this is what leases are supposed to +prevent.  See above.
+REP_LOG: OK.  Clients check to send ACK.
+REP_LOG_MORE: OK.  Clients check to send ACK.
+REP_LOG_REQ: OK.
+REP_MASTER_REQ: OK.
+REP_NEWCLIENT: OK.
+REP_NEWFILE: OK.  Clients check to send ACK.
+REP_NEWMASTER: See above.
+REP_NEWSITE: OK.
+REP_PAGE: OK.  Should never process one with lease granted.  +Add DB_ASSERT.
+REP_PAGE_FAIL:  OK.  Should never process one with lease +granted.  Add DB_ASSERT.
+REP_PAGE_MORE:  OK.  Should never process one with lease +granted.  Add DB_ASSERT.
+REP_PAGE_REQ: OK.
+REP_REREQUEST: OK.
+REP_UPDATE: OK.  Should never process one with lease +granted.  Add DB_ASSERT.
+REP_UPDATE_REQ: OK.  This is a master-only message.
+REP_VERIFY: OK.  Should never process one with lease +granted.  Add DB_ASSERT.
+REP_VERIFY_FAIL: OK.  Should never process one with lease +granted.  Add DB_ASSERT.
+REP_VERIFY_REQ: OK.
+REP_VOTE1: OK.  See Election discussion above.  It is +possible to receive one with a lease granted.  Client cannot send +one with an outstanding lease however.
+REP_VOTE2: OK.  See Election discussion above.  It is +possible to receive one with a lease granted.
+
+If the following method or message processing is in progress and a +client wants to grant a lease, what should it do?  Let's examine +what this means.  The client wanting to grant a lease simply means +it is responding to the receipt of a REP_LOG +(or its variants) message and applying a log record.  Therefore, +we need to consider a thread processing a log message racing with these +other actions.
+
+Other:
+log_archive: OK. 
+dbenv->close: User error.  User should not be closing the env +while other threads are using that handle.  Should have no effect +if a 2nd dbenv handle to same env is closed.
+
+Rep Base API method:
+rep_elect: See Election discussion above.  rep_elect +should wait and may grant +lease while election is in progress.
+rep_flush: Should not be called on client.
+rep_get_*: OK.
+rep_process_message: Generally OK.  See handling each message +below.
+rep_set_config: OK.
+rep_set_limit: OK.
+rep_set_nsites: Must be called before rep_start +until 14778 is resolved.
+rep_set_priority: OK.
+rep_set_timeout: OK.
+rep_set_transport: OK.
+rep_start(MASTER): OK, can't happen - already protect racing rep_start +and rep_process_message.
+rep_start(CLIENT): OK, can't happen - already protect racing rep_start +and rep_process_message.
+rep_stat: OK.
+rep_sync: Shouldn't happen because client cannot grant leases during +sync-up.  Incoming log message ignored.
+
+REP_ALIVE: OK.
+REP_ALIVE_REQ: OK.
+REP_ALL_REQ: OK.
+REP_BULK_LOG: OK.
+REP_BULK_PAGE: OK.  Incoming log message ignored during internal +init.
+REP_DUPMASTER: Shouldn't happen.  See DUPMASTER discussion above.
+REP_LOG: OK.
+REP_LOG_MORE: OK.
+REP_LOG_REQ: OK.
+REP_MASTER_REQ: OK.
+REP_NEWCLIENT: OK.
+REP_NEWFILE: OK.
+REP_NEWMASTER: See above.  If a client accepts a new master +because its lease grant expired, then that master sends a message +requesting the lease grant, this client will not process the log record +if it is in sync-up recovery, or it may after the master switch is +complete and the client doesn't need sync-up recovery.  Basically, +just uses existing log record processing/newmaster infrastructure.
+REP_NEWSITE: OK.
+REP_PAGE: OK.  Receiving a log record during internal init PAGE +phase should ignore log record.
+REP_PAGE_FAIL: OK.
+REP_PAGE_MORE: OK.
+REP_PAGE_REQ: OK.
+REP_REREQUEST: OK.
+REP_UPDATE: OK.  Receiving a log record during internal init +should ignore log record.
+REP_UPDATE_REQ: OK - master-only message.
+REP_VERIFY: OK.  Receiving a log record during verify phase +ignores log record.
+REP_VERIFY_FAIL: OK.
+REP_VERIFY_REQ: OK.
+REP_VOTE1: OK.  This client is processing someone else's vote when +the lease request comes in.  That is fine.  We protect our +own election and lease interaction in __rep_elect.
+REP_VOTE2: OK.
+

Crashing - Potential Problem
+

+It appears there is one area where we could have a problem.  I +believe that crashes can cause us to break our guarantee on durability, +authoritative reads and inability to elect duplicate masters.  +Consider this scenario:
+
    +
  1. A master and 4 clients are all up and running.
  2. +
  3. The master commits a txn and all 4 clients refresh their lease +grants at time T.
  4. +
  5. All 4 clients have the txn and log records in the cache.  +None are flushing to disk.
  6. +
  7. All 4 clients have responded to the PERM messages as well as +refreshed their lease with the master.
  8. +
  9. All 4 clients hit the same application coding error and crash +(machine/OS stays up).
  10. +
  11. Master authoritatively reads data in txn from step 2.
  12. +
  13. All 4 clients restart the application and run recovery, thus the +txn from step 2 is lost on all clients because it isn't any logs.
    +
  14. +
  15. A network partition happens and the master is alone on its side.
  16. +
  17. All 4 clients are on the other side and elect a new master.
  18. +
  19. Partition resolves itself and we have duplicate masters, where +the former master still holds all valid lease grants.
    +
  20. +
+Therefore, we have broken both guarantees.  In step 6 the data is +really not durable and we've given it to the user.  One can argue +that if this is an issue the application better be syncing somewhere if +they really want durability.  However, worse than that is that we +have a legitimate DUPMASTER situation in step 10 where both masters +hold valid leases.  The reason is that all lease knowledge is in +the shared memory and that is lost when the app restarts and runs +recovery.
+
+How can we solve this?  The obvious solution is (ugh, yet another) +durable BDB-owned file with some information in it, such as the current +lease expiration time so that rebooting after a crash leaves the +knowledge that the lease was granted.  However, writing and +syncing every lease grant on every client out to disk is far too +expensive.
+
+A second possible solution is to have clients wait a full lease timeout +before entering an election the first time. This solution solves the +DUPMASTER issue, but not the non-authoritative read.  This +solution naturally falls out of elections and leases really.  If a +client has never granted a lease, it should be considered as having to +wait a full lease timeout before entering an election.  +Applications already know that leases impact elections and this does +not seem so bad as it is only on the first election.
+
+Is it sufficient to document that the authoritative read is only as +authoritative as the durability guarantees they make on the sites that +indicate it is permanent? Yes, I believe this is sufficient.  If +the application says it is permanent and it really isn't, then the +application is at fault.  Believing the application when it +indicates with the PERM response that it is permanent avoids the +authoritative problem. 
+

Upgrade/Mixed Versions

+Clearly leases cannot be used with mixed version sites since masters +running older releases will not have any knowledge of lease +support.  What considerations are needed in the lease code for +mixed versions?
+
+First if the REP_CONTROL +structure changes, we need to maintain and use an old version of the +structure for talking to older clients and masters.  The +implementation of this would be similar to the way we manage for old REP_VOTE_INFO +structures.  +Second any new messages need translation table entries added.  +Third, if we are assuming global leases then clearly any mixed versions +cannot have leases configured, and leases cannot be used in mixed +version groups.  Maintaining two versions of the control structure +is not necessary if we choose a different style of implementation and +don't change the control structure.
+
+However, then how could an old application both run continuously, +upgrade to the new release and take advantage of leases without taking +down the entire application?  I believe it is possible for clients +to be configured for leases but be subject to the master regarding +leases, yet the master code can assume that if it has leases +configured, all client sites do as well.  In several places above +I suggested that a client could make a choice based on either a new REPCTL_LEASE +flag or simply having +leases turned on locally.  If we choose to use the flag, then we +can support leases with mixed versions.  The upgraded clients can +configure leases and they simply will not be granted until the old +master is upgraded and send PERM message with the flag indicating it +wants a lease grant.  The client will not grant a lease until such +time.  The clients, while having the leases configured, will not +grant a lease until told to do so and will simply have an expired +lease.  Then, when the old master finally upgrades, it too can +configure leases and suddenly all sites are using them.  I believe +this should work just fine and I will need to make sure a client's +granting of leases is only in response to the master asking for a +grant.  If the master never asks, then the client has them +configured, but doesn't grant them.
+

Testing

+Clearly any user-facing API changes will need the equivalent reflection +in the Tcl API for testing, under CONFIG_TEST.
+
+I am sure the list of tests will grow but off the top of my head:
+Basic test: have N sites all configure leases, run some,  read on +master, etc.
+Refresh test: Perform update on master, sleep until past expiration, +read on master and make sure leases are refreshed/read successful
+Error test: Test error conditions (reading on client with leases but no +ignore flag, calling after rep_start, etc)
+Read test: Test reading on both client and master both with and without +the IGNORE flag.  Test that data read with the ignore flag can be +rolled back.
+Dupmaster test: Force a DUPMASTER situation and verify that the newer +master cannot get DUPMASTER error.
+Election test: Call election while grant is outstanding and master +exists.
+Call election while grant is outstanding and master does not exist.
+Call election after expiration on quiescient system with master +existing.
+Run with a group where some members have leases configured and other do +not to make sure we get errors instead of dumping core.
+
+
+
+ + diff --git a/src/rep/rep.msg b/src/rep/rep.msg new file mode 100644 index 00000000..ecd0d68b --- /dev/null +++ b/src/rep/rep.msg @@ -0,0 +1,144 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +DBPRIVATE +PREFIX __rep + +INCLUDE #include "db_int.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/mp.h" +INCLUDE #include "dbinc/txn.h" +INCLUDE + +/* + * bulk - message for bulk log records or pages + */ +BEGIN_MSG bulk check_length +ARG len u_int32_t +ARG lsn DB_LSN +ARG bulkdata DBT +END + +/* + * control - replication control message + */ +BEGIN_MSG control check_length +ARG rep_version u_int32_t +ARG log_version u_int32_t +ARG lsn DB_LSN +ARG rectype u_int32_t +ARG gen u_int32_t +ARG msg_sec u_int32_t +ARG msg_nsec u_int32_t +ARG flags u_int32_t +END + +/* + * egen data + */ +BEGIN_MSG egen check_length +ARG egen u_int32_t +END + +/* + * file info + */ +BEGIN_MSG fileinfo alloc check_length version +ARG pgsize u_int32_t +ARG pgno db_pgno_t +ARG max_pgno db_pgno_t +ARG filenum u_int32_t +ARG finfo_flags u_int32_t +ARG type u_int32_t +ARG db_flags u_int32_t +ARG uid DBT +ARG info DBT +END + +/* + * grant info - clients send to masters granting a lease. + */ +BEGIN_MSG grant_info check_length +ARG msg_sec u_int32_t +ARG msg_nsec u_int32_t +END + +/* + * We do not need to do anything with LOG record data. + * It is opaque data to us. + */ + +/* + * log request + */ +BEGIN_MSG logreq check_length +ARG endlsn DB_LSN +END + +/* + * We do not need to do anything with NEWCLIENT/NEWSITE cdata dbt. + * It is user data and the app has to do whatever transformation + * it needs to with its own data. + */ +/* + * newfile version + */ +BEGIN_MSG newfile check_length +ARG version u_int32_t +END + +/* + * update - send update information + */ +BEGIN_MSG update alloc check_length version +ARG first_lsn DB_LSN +ARG first_vers u_int32_t +ARG num_files u_int32_t +END + +/* + * vote info. Current version. + */ +BEGIN_MSG vote_info check_length +ARG egen u_int32_t +ARG nsites u_int32_t +ARG nvotes u_int32_t +ARG priority u_int32_t +ARG spare_pri u_int32_t +ARG tiebreaker u_int32_t +ARG data_gen u_int32_t +END +/* + * vote info old version from REPVERSION 5 and earlier. + */ +BEGIN_MSG vote_info_v5 check_length +ARG egen u_int32_t +ARG nsites u_int32_t +ARG nvotes u_int32_t +ARG priority u_int32_t +ARG tiebreaker u_int32_t +END + +/* + * LSN history database - key + */ +BEGIN_MSG lsn_hist_key +ARG version u_int32_t +ARG gen u_int32_t +END + +/* + * LSN history database - data + */ +BEGIN_MSG lsn_hist_data +ARG envid u_int32_t +ARG lsn DB_LSN +ARG hist_sec u_int32_t +ARG hist_nsec u_int32_t +END diff --git a/src/rep/rep_automsg.c b/src/rep/rep_automsg.c new file mode 100644 index 00000000..fbe112c2 --- /dev/null +++ b/src/rep/rep_automsg.c @@ -0,0 +1,827 @@ +/* Do not edit: automatically built by gen_msg.awk. */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +/* + * PUBLIC: int __rep_bulk_marshal __P((ENV *, __rep_bulk_args *, + * PUBLIC: u_int8_t *, size_t, size_t *)); + */ +int +__rep_bulk_marshal(env, argp, bp, max, lenp) + ENV *env; + __rep_bulk_args *argp; + u_int8_t *bp; + size_t *lenp, max; +{ + u_int8_t *start; + + if (max < __REP_BULK_SIZE + + (size_t)argp->bulkdata.size) + return (ENOMEM); + start = bp; + + DB_HTONL_COPYOUT(env, bp, argp->len); + DB_HTONL_COPYOUT(env, bp, argp->lsn.file); + DB_HTONL_COPYOUT(env, bp, argp->lsn.offset); + DB_HTONL_COPYOUT(env, bp, argp->bulkdata.size); + if (argp->bulkdata.size > 0) { + memcpy(bp, argp->bulkdata.data, argp->bulkdata.size); + bp += argp->bulkdata.size; + } + + *lenp = (size_t)(bp - start); + return (0); +} + +/* + * PUBLIC: int __rep_bulk_unmarshal __P((ENV *, __rep_bulk_args *, + * PUBLIC: u_int8_t *, size_t, u_int8_t **)); + */ +int +__rep_bulk_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __rep_bulk_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + size_t needed; + + needed = __REP_BULK_SIZE; + if (max < needed) + goto too_few; + DB_NTOHL_COPYIN(env, argp->len, bp); + DB_NTOHL_COPYIN(env, argp->lsn.file, bp); + DB_NTOHL_COPYIN(env, argp->lsn.offset, bp); + DB_NTOHL_COPYIN(env, argp->bulkdata.size, bp); + argp->bulkdata.data = bp; + needed += (size_t)argp->bulkdata.size; + if (max < needed) + goto too_few; + bp += argp->bulkdata.size; + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __rep_bulk message")); + return (EINVAL); +} + +/* + * PUBLIC: int __rep_control_marshal __P((ENV *, __rep_control_args *, + * PUBLIC: u_int8_t *, size_t, size_t *)); + */ +int +__rep_control_marshal(env, argp, bp, max, lenp) + ENV *env; + __rep_control_args *argp; + u_int8_t *bp; + size_t *lenp, max; +{ + u_int8_t *start; + + if (max < __REP_CONTROL_SIZE) + return (ENOMEM); + start = bp; + + DB_HTONL_COPYOUT(env, bp, argp->rep_version); + DB_HTONL_COPYOUT(env, bp, argp->log_version); + DB_HTONL_COPYOUT(env, bp, argp->lsn.file); + DB_HTONL_COPYOUT(env, bp, argp->lsn.offset); + DB_HTONL_COPYOUT(env, bp, argp->rectype); + DB_HTONL_COPYOUT(env, bp, argp->gen); + DB_HTONL_COPYOUT(env, bp, argp->msg_sec); + DB_HTONL_COPYOUT(env, bp, argp->msg_nsec); + DB_HTONL_COPYOUT(env, bp, argp->flags); + + *lenp = (size_t)(bp - start); + return (0); +} + +/* + * PUBLIC: int __rep_control_unmarshal __P((ENV *, + * PUBLIC: __rep_control_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__rep_control_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __rep_control_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REP_CONTROL_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->rep_version, bp); + DB_NTOHL_COPYIN(env, argp->log_version, bp); + DB_NTOHL_COPYIN(env, argp->lsn.file, bp); + DB_NTOHL_COPYIN(env, argp->lsn.offset, bp); + DB_NTOHL_COPYIN(env, argp->rectype, bp); + DB_NTOHL_COPYIN(env, argp->gen, bp); + DB_NTOHL_COPYIN(env, argp->msg_sec, bp); + DB_NTOHL_COPYIN(env, argp->msg_nsec, bp); + DB_NTOHL_COPYIN(env, argp->flags, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __rep_control message")); + return (EINVAL); +} + +/* + * PUBLIC: int __rep_egen_marshal __P((ENV *, __rep_egen_args *, + * PUBLIC: u_int8_t *, size_t, size_t *)); + */ +int +__rep_egen_marshal(env, argp, bp, max, lenp) + ENV *env; + __rep_egen_args *argp; + u_int8_t *bp; + size_t *lenp, max; +{ + u_int8_t *start; + + if (max < __REP_EGEN_SIZE) + return (ENOMEM); + start = bp; + + DB_HTONL_COPYOUT(env, bp, argp->egen); + + *lenp = (size_t)(bp - start); + return (0); +} + +/* + * PUBLIC: int __rep_egen_unmarshal __P((ENV *, __rep_egen_args *, + * PUBLIC: u_int8_t *, size_t, u_int8_t **)); + */ +int +__rep_egen_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __rep_egen_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REP_EGEN_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->egen, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __rep_egen message")); + return (EINVAL); +} + +/* + * PUBLIC: int __rep_fileinfo_marshal __P((ENV *, u_int32_t, + * PUBLIC: __rep_fileinfo_args *, u_int8_t *, size_t, size_t *)); + */ +int +__rep_fileinfo_marshal(env, version, argp, bp, max, lenp) + ENV *env; + u_int32_t version; + __rep_fileinfo_args *argp; + u_int8_t *bp; + size_t *lenp, max; +{ + int copy_only; + u_int8_t *start; + + if (max < __REP_FILEINFO_SIZE + + (size_t)argp->uid.size + + (size_t)argp->info.size) + return (ENOMEM); + start = bp; + + copy_only = 0; + if (version < DB_REPVERSION_47) + copy_only = 1; + if (copy_only) { + memcpy(bp, &argp->pgsize, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_HTONL_COPYOUT(env, bp, argp->pgsize); + if (copy_only) { + memcpy(bp, &argp->pgno, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_HTONL_COPYOUT(env, bp, argp->pgno); + if (copy_only) { + memcpy(bp, &argp->max_pgno, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_HTONL_COPYOUT(env, bp, argp->max_pgno); + if (copy_only) { + memcpy(bp, &argp->filenum, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_HTONL_COPYOUT(env, bp, argp->filenum); + if (copy_only) { + memcpy(bp, &argp->finfo_flags, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_HTONL_COPYOUT(env, bp, argp->finfo_flags); + if (copy_only) { + memcpy(bp, &argp->type, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_HTONL_COPYOUT(env, bp, argp->type); + if (copy_only) { + memcpy(bp, &argp->db_flags, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_HTONL_COPYOUT(env, bp, argp->db_flags); + if (copy_only) { + memcpy(bp, &argp->uid.size, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_HTONL_COPYOUT(env, bp, argp->uid.size); + if (argp->uid.size > 0) { + memcpy(bp, argp->uid.data, argp->uid.size); + bp += argp->uid.size; + } + if (copy_only) { + memcpy(bp, &argp->info.size, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_HTONL_COPYOUT(env, bp, argp->info.size); + if (argp->info.size > 0) { + memcpy(bp, argp->info.data, argp->info.size); + bp += argp->info.size; + } + + *lenp = (size_t)(bp - start); + return (0); +} + +/* + * PUBLIC: int __rep_fileinfo_unmarshal __P((ENV *, u_int32_t, + * PUBLIC: __rep_fileinfo_args **, u_int8_t *, size_t, u_int8_t **)); + */ +int +__rep_fileinfo_unmarshal(env, version, argpp, bp, max, nextp) + ENV *env; + u_int32_t version; + __rep_fileinfo_args **argpp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + size_t needed; + __rep_fileinfo_args *argp; + int ret; + int copy_only; + + needed = __REP_FILEINFO_SIZE; + if (max < needed) + goto too_few; + if ((ret = __os_malloc(env, sizeof(*argp), &argp)) != 0) + return (ret); + + copy_only = 0; + if (version < DB_REPVERSION_47) + copy_only = 1; + if (copy_only) { + memcpy(&argp->pgsize, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_NTOHL_COPYIN(env, argp->pgsize, bp); + if (copy_only) { + memcpy(&argp->pgno, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_NTOHL_COPYIN(env, argp->pgno, bp); + if (copy_only) { + memcpy(&argp->max_pgno, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_NTOHL_COPYIN(env, argp->max_pgno, bp); + if (copy_only) { + memcpy(&argp->filenum, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_NTOHL_COPYIN(env, argp->filenum, bp); + if (copy_only) { + memcpy(&argp->finfo_flags, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_NTOHL_COPYIN(env, argp->finfo_flags, bp); + if (copy_only) { + memcpy(&argp->type, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_NTOHL_COPYIN(env, argp->type, bp); + if (copy_only) { + memcpy(&argp->db_flags, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_NTOHL_COPYIN(env, argp->db_flags, bp); + if (copy_only) { + memcpy(&argp->uid.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_NTOHL_COPYIN(env, argp->uid.size, bp); + argp->uid.data = bp; + needed += (size_t)argp->uid.size; + if (max < needed) + goto too_few; + bp += argp->uid.size; + if (copy_only) { + memcpy(&argp->info.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_NTOHL_COPYIN(env, argp->info.size, bp); + argp->info.data = bp; + needed += (size_t)argp->info.size; + if (max < needed) + goto too_few; + bp += argp->info.size; + + if (nextp != NULL) + *nextp = bp; + *argpp = argp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __rep_fileinfo message")); + return (EINVAL); +} + +/* + * PUBLIC: int __rep_grant_info_marshal __P((ENV *, + * PUBLIC: __rep_grant_info_args *, u_int8_t *, size_t, size_t *)); + */ +int +__rep_grant_info_marshal(env, argp, bp, max, lenp) + ENV *env; + __rep_grant_info_args *argp; + u_int8_t *bp; + size_t *lenp, max; +{ + u_int8_t *start; + + if (max < __REP_GRANT_INFO_SIZE) + return (ENOMEM); + start = bp; + + DB_HTONL_COPYOUT(env, bp, argp->msg_sec); + DB_HTONL_COPYOUT(env, bp, argp->msg_nsec); + + *lenp = (size_t)(bp - start); + return (0); +} + +/* + * PUBLIC: int __rep_grant_info_unmarshal __P((ENV *, + * PUBLIC: __rep_grant_info_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__rep_grant_info_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __rep_grant_info_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REP_GRANT_INFO_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->msg_sec, bp); + DB_NTOHL_COPYIN(env, argp->msg_nsec, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __rep_grant_info message")); + return (EINVAL); +} + +/* + * PUBLIC: int __rep_logreq_marshal __P((ENV *, __rep_logreq_args *, + * PUBLIC: u_int8_t *, size_t, size_t *)); + */ +int +__rep_logreq_marshal(env, argp, bp, max, lenp) + ENV *env; + __rep_logreq_args *argp; + u_int8_t *bp; + size_t *lenp, max; +{ + u_int8_t *start; + + if (max < __REP_LOGREQ_SIZE) + return (ENOMEM); + start = bp; + + DB_HTONL_COPYOUT(env, bp, argp->endlsn.file); + DB_HTONL_COPYOUT(env, bp, argp->endlsn.offset); + + *lenp = (size_t)(bp - start); + return (0); +} + +/* + * PUBLIC: int __rep_logreq_unmarshal __P((ENV *, __rep_logreq_args *, + * PUBLIC: u_int8_t *, size_t, u_int8_t **)); + */ +int +__rep_logreq_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __rep_logreq_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REP_LOGREQ_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->endlsn.file, bp); + DB_NTOHL_COPYIN(env, argp->endlsn.offset, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __rep_logreq message")); + return (EINVAL); +} + +/* + * PUBLIC: int __rep_newfile_marshal __P((ENV *, __rep_newfile_args *, + * PUBLIC: u_int8_t *, size_t, size_t *)); + */ +int +__rep_newfile_marshal(env, argp, bp, max, lenp) + ENV *env; + __rep_newfile_args *argp; + u_int8_t *bp; + size_t *lenp, max; +{ + u_int8_t *start; + + if (max < __REP_NEWFILE_SIZE) + return (ENOMEM); + start = bp; + + DB_HTONL_COPYOUT(env, bp, argp->version); + + *lenp = (size_t)(bp - start); + return (0); +} + +/* + * PUBLIC: int __rep_newfile_unmarshal __P((ENV *, + * PUBLIC: __rep_newfile_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__rep_newfile_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __rep_newfile_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REP_NEWFILE_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->version, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __rep_newfile message")); + return (EINVAL); +} + +/* + * PUBLIC: int __rep_update_marshal __P((ENV *, u_int32_t, + * PUBLIC: __rep_update_args *, u_int8_t *, size_t, size_t *)); + */ +int +__rep_update_marshal(env, version, argp, bp, max, lenp) + ENV *env; + u_int32_t version; + __rep_update_args *argp; + u_int8_t *bp; + size_t *lenp, max; +{ + int copy_only; + u_int8_t *start; + + if (max < __REP_UPDATE_SIZE) + return (ENOMEM); + start = bp; + + copy_only = 0; + if (version < DB_REPVERSION_47) + copy_only = 1; + if (copy_only) { + memcpy(bp, &argp->first_lsn.file, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + memcpy(bp, &argp->first_lsn.offset, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + DB_HTONL_COPYOUT(env, bp, argp->first_lsn.file); + DB_HTONL_COPYOUT(env, bp, argp->first_lsn.offset); + } + if (copy_only) { + memcpy(bp, &argp->first_vers, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_HTONL_COPYOUT(env, bp, argp->first_vers); + if (copy_only) { + memcpy(bp, &argp->num_files, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_HTONL_COPYOUT(env, bp, argp->num_files); + + *lenp = (size_t)(bp - start); + return (0); +} + +/* + * PUBLIC: int __rep_update_unmarshal __P((ENV *, u_int32_t, + * PUBLIC: __rep_update_args **, u_int8_t *, size_t, u_int8_t **)); + */ +int +__rep_update_unmarshal(env, version, argpp, bp, max, nextp) + ENV *env; + u_int32_t version; + __rep_update_args **argpp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + __rep_update_args *argp; + int ret; + int copy_only; + + if (max < __REP_UPDATE_SIZE) + goto too_few; + if ((ret = __os_malloc(env, sizeof(*argp), &argp)) != 0) + return (ret); + + copy_only = 0; + if (version < DB_REPVERSION_47) + copy_only = 1; + if (copy_only) { + memcpy(&argp->first_lsn.file, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + memcpy(&argp->first_lsn.offset, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + DB_NTOHL_COPYIN(env, argp->first_lsn.file, bp); + DB_NTOHL_COPYIN(env, argp->first_lsn.offset, bp); + } + if (copy_only) { + memcpy(&argp->first_vers, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_NTOHL_COPYIN(env, argp->first_vers, bp); + if (copy_only) { + memcpy(&argp->num_files, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else + DB_NTOHL_COPYIN(env, argp->num_files, bp); + + if (nextp != NULL) + *nextp = bp; + *argpp = argp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __rep_update message")); + return (EINVAL); +} + +/* + * PUBLIC: int __rep_vote_info_marshal __P((ENV *, + * PUBLIC: __rep_vote_info_args *, u_int8_t *, size_t, size_t *)); + */ +int +__rep_vote_info_marshal(env, argp, bp, max, lenp) + ENV *env; + __rep_vote_info_args *argp; + u_int8_t *bp; + size_t *lenp, max; +{ + u_int8_t *start; + + if (max < __REP_VOTE_INFO_SIZE) + return (ENOMEM); + start = bp; + + DB_HTONL_COPYOUT(env, bp, argp->egen); + DB_HTONL_COPYOUT(env, bp, argp->nsites); + DB_HTONL_COPYOUT(env, bp, argp->nvotes); + DB_HTONL_COPYOUT(env, bp, argp->priority); + DB_HTONL_COPYOUT(env, bp, argp->spare_pri); + DB_HTONL_COPYOUT(env, bp, argp->tiebreaker); + DB_HTONL_COPYOUT(env, bp, argp->data_gen); + + *lenp = (size_t)(bp - start); + return (0); +} + +/* + * PUBLIC: int __rep_vote_info_unmarshal __P((ENV *, + * PUBLIC: __rep_vote_info_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__rep_vote_info_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __rep_vote_info_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REP_VOTE_INFO_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->egen, bp); + DB_NTOHL_COPYIN(env, argp->nsites, bp); + DB_NTOHL_COPYIN(env, argp->nvotes, bp); + DB_NTOHL_COPYIN(env, argp->priority, bp); + DB_NTOHL_COPYIN(env, argp->spare_pri, bp); + DB_NTOHL_COPYIN(env, argp->tiebreaker, bp); + DB_NTOHL_COPYIN(env, argp->data_gen, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __rep_vote_info message")); + return (EINVAL); +} + +/* + * PUBLIC: int __rep_vote_info_v5_marshal __P((ENV *, + * PUBLIC: __rep_vote_info_v5_args *, u_int8_t *, size_t, size_t *)); + */ +int +__rep_vote_info_v5_marshal(env, argp, bp, max, lenp) + ENV *env; + __rep_vote_info_v5_args *argp; + u_int8_t *bp; + size_t *lenp, max; +{ + u_int8_t *start; + + if (max < __REP_VOTE_INFO_V5_SIZE) + return (ENOMEM); + start = bp; + + DB_HTONL_COPYOUT(env, bp, argp->egen); + DB_HTONL_COPYOUT(env, bp, argp->nsites); + DB_HTONL_COPYOUT(env, bp, argp->nvotes); + DB_HTONL_COPYOUT(env, bp, argp->priority); + DB_HTONL_COPYOUT(env, bp, argp->tiebreaker); + + *lenp = (size_t)(bp - start); + return (0); +} + +/* + * PUBLIC: int __rep_vote_info_v5_unmarshal __P((ENV *, + * PUBLIC: __rep_vote_info_v5_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__rep_vote_info_v5_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __rep_vote_info_v5_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REP_VOTE_INFO_V5_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->egen, bp); + DB_NTOHL_COPYIN(env, argp->nsites, bp); + DB_NTOHL_COPYIN(env, argp->nvotes, bp); + DB_NTOHL_COPYIN(env, argp->priority, bp); + DB_NTOHL_COPYIN(env, argp->tiebreaker, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __rep_vote_info_v5 message")); + return (EINVAL); +} + +/* + * PUBLIC: void __rep_lsn_hist_key_marshal __P((ENV *, + * PUBLIC: __rep_lsn_hist_key_args *, u_int8_t *)); + */ +void +__rep_lsn_hist_key_marshal(env, argp, bp) + ENV *env; + __rep_lsn_hist_key_args *argp; + u_int8_t *bp; +{ + DB_HTONL_COPYOUT(env, bp, argp->version); + DB_HTONL_COPYOUT(env, bp, argp->gen); +} + +/* + * PUBLIC: int __rep_lsn_hist_key_unmarshal __P((ENV *, + * PUBLIC: __rep_lsn_hist_key_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__rep_lsn_hist_key_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __rep_lsn_hist_key_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REP_LSN_HIST_KEY_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->version, bp); + DB_NTOHL_COPYIN(env, argp->gen, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __rep_lsn_hist_key message")); + return (EINVAL); +} + +/* + * PUBLIC: void __rep_lsn_hist_data_marshal __P((ENV *, + * PUBLIC: __rep_lsn_hist_data_args *, u_int8_t *)); + */ +void +__rep_lsn_hist_data_marshal(env, argp, bp) + ENV *env; + __rep_lsn_hist_data_args *argp; + u_int8_t *bp; +{ + DB_HTONL_COPYOUT(env, bp, argp->envid); + DB_HTONL_COPYOUT(env, bp, argp->lsn.file); + DB_HTONL_COPYOUT(env, bp, argp->lsn.offset); + DB_HTONL_COPYOUT(env, bp, argp->hist_sec); + DB_HTONL_COPYOUT(env, bp, argp->hist_nsec); +} + +/* + * PUBLIC: int __rep_lsn_hist_data_unmarshal __P((ENV *, + * PUBLIC: __rep_lsn_hist_data_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__rep_lsn_hist_data_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __rep_lsn_hist_data_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REP_LSN_HIST_DATA_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->envid, bp); + DB_NTOHL_COPYIN(env, argp->lsn.file, bp); + DB_NTOHL_COPYIN(env, argp->lsn.offset, bp); + DB_NTOHL_COPYIN(env, argp->hist_sec, bp); + DB_NTOHL_COPYIN(env, argp->hist_nsec, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __rep_lsn_hist_data message")); + return (EINVAL); +} + diff --git a/src/rep/rep_backup.c b/src/rep/rep_backup.c new file mode 100644 index 00000000..a125c90a --- /dev/null +++ b/src/rep/rep_backup.c @@ -0,0 +1,3373 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/fop.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +/* + * Context information needed for buffer management during the building of a + * list of database files present in the environment. When fully built, the + * buffer is in the form of an UPDATE message: a (marshaled) update_args, + * followed by some number of (marshaled) fileinfo_args. + * + * Note that the fileinfo for the first file in the list always appears at + * (constant) offset __REP_UPDATE_SIZE in the buffer. + */ +typedef struct { + u_int8_t *buf; /* Buffer base address. */ + u_int32_t size; /* Total allocated buffer size. */ + u_int8_t *fillptr; /* Pointer to first unused space. */ + u_int32_t count; /* Number of entries currently in list. */ + u_int32_t version; /* Rep version of marshaled format. */ +} FILE_LIST_CTX; +#define FIRST_FILE_PTR(buf) ((buf) + __REP_UPDATE_SIZE) + +/* + * Function that performs any desired processing on a single file, as part of + * the traversal of a list of database files, such as with internal init. + */ +typedef int (FILE_WALK_FN) __P((ENV *, __rep_fileinfo_args *, void *)); + +static FILE_WALK_FN __rep_check_uid; +static int __rep_clean_interrupted __P((ENV *)); +static FILE_WALK_FN __rep_cleanup_nimdbs; +static int __rep_filedone __P((ENV *, DB_THREAD_INFO *ip, int, + REP *, __rep_fileinfo_args *, u_int32_t)); +static int __rep_find_dbs __P((ENV *, FILE_LIST_CTX *)); +static FILE_WALK_FN __rep_find_inmem; +static int __rep_get_fileinfo __P((ENV *, const char *, + const char *, __rep_fileinfo_args *, u_int8_t *)); +static int __rep_get_file_list __P((ENV *, + DB_FH *, u_int32_t, u_int32_t *, DBT *)); +static int __rep_is_replicated_db __P((const char *, const char *)); +static int __rep_log_setup __P((ENV *, + REP *, u_int32_t, u_int32_t, DB_LSN *)); +static int __rep_mpf_open __P((ENV *, DB_MPOOLFILE **, + __rep_fileinfo_args *, u_int32_t)); +static int __rep_nextfile __P((ENV *, int, REP *)); +static int __rep_page_gap __P((ENV *, + REP *, __rep_fileinfo_args *, u_int32_t)); +static int __rep_page_sendpages __P((ENV *, DB_THREAD_INFO *, int, + __rep_control_args *, __rep_fileinfo_args *, DB_MPOOLFILE *, DB *)); +static int __rep_queue_filedone __P((ENV *, + DB_THREAD_INFO *, REP *, __rep_fileinfo_args *)); +static int __rep_remove_all __P((ENV *, u_int32_t, DBT *)); +static FILE_WALK_FN __rep_remove_by_list; +static int __rep_remove_by_prefix __P((ENV *, const char *, const char *, + size_t, APPNAME)); +static FILE_WALK_FN __rep_remove_file; +static int __rep_remove_logs __P((ENV *)); +static int __rep_remove_nimdbs __P((ENV *)); +static int __rep_rollback __P((ENV *, DB_LSN *)); +static int __rep_unlink_by_list __P((ENV *, u_int32_t, + u_int8_t *, u_int32_t, u_int32_t)); +static FILE_WALK_FN __rep_unlink_file; +static int __rep_walk_filelist __P((ENV *, u_int32_t, u_int8_t *, + u_int32_t, u_int32_t, FILE_WALK_FN *, void *)); +static int __rep_walk_dir __P((ENV *, const char *, FILE_LIST_CTX*)); +static int __rep_write_page __P((ENV *, + DB_THREAD_INFO *, REP *, __rep_fileinfo_args *)); + +/* + * __rep_update_req - + * Process an update_req and send the file information to clients. + * + * PUBLIC: int __rep_update_req __P((ENV *, __rep_control_args *)); + */ +int +__rep_update_req(env, rp) + ENV *env; + __rep_control_args *rp; +{ + DBT updbt, vdbt; + DB_LOG *dblp; + DB_LOGC *logc; + DB_LSN lsn; + DB_REP *db_rep; + REP *rep; + __rep_update_args u_args; + FILE_LIST_CTX context; + size_t updlen; + u_int32_t flag, version; + int ret, t_ret; + + /* + * Start by allocating 1Meg, which ought to be plenty enough to describe + * all databases in the environment. (If it's not, __rep_walk_dir can + * grow the size.) + * + * The data we send looks like this: + * __rep_update_args + * __rep_fileinfo_args + * __rep_fileinfo_args + * ... + */ + db_rep = env->rep_handle; + rep = db_rep->region; + + REP_SYSTEM_LOCK(env); + if (F_ISSET(rep, REP_F_INUPDREQ)) { + REP_SYSTEM_UNLOCK(env); + return (0); + } + F_SET(rep, REP_F_INUPDREQ); + REP_SYSTEM_UNLOCK(env); + + dblp = env->lg_handle; + logc = NULL; + if ((ret = __os_calloc(env, 1, MEGABYTE, &context.buf)) != 0) + goto err_noalloc; + context.size = MEGABYTE; + context.count = 0; + context.version = rp->rep_version; + + /* Reserve space for the update_args, and fill in file info. */ + context.fillptr = FIRST_FILE_PTR(context.buf); + if ((ret = __rep_find_dbs(env, &context)) != 0) + goto err; + + /* + * Now get our first LSN. We send the lsn of the first + * non-archivable log file. + */ + flag = DB_SET; + if ((ret = __log_get_stable_lsn(env, &lsn, 0)) != 0) { + if (ret != DB_NOTFOUND) + goto err; + /* + * If ret is DB_NOTFOUND then there is no checkpoint + * in this log, that is okay, just start at the beginning. + */ + ret = 0; + flag = DB_FIRST; + } + + /* + * Now get the version number of the log file of that LSN. + */ + if ((ret = __log_cursor(env, &logc)) != 0) + goto err; + + memset(&vdbt, 0, sizeof(vdbt)); + /* + * Set our log cursor on the LSN we are sending. Or + * to the first LSN if we have no stable LSN. + */ + if ((ret = __logc_get(logc, &lsn, &vdbt, flag)) != 0) { + /* + * We could be racing a fresh master starting up. If we + * have no log records, assume an initial LSN and current + * log version. + */ + if (ret != DB_NOTFOUND) + goto err; + INIT_LSN(lsn); + version = DB_LOGVERSION; + } else { + if ((ret = __logc_version(logc, &version)) != 0) + goto err; + } + /* + * Package up the update information. + */ + u_args.first_lsn = lsn; + u_args.first_vers = version; + u_args.num_files = context.count; + if ((ret = __rep_update_marshal(env, rp->rep_version, + &u_args, context.buf, __REP_UPDATE_SIZE, &updlen)) != 0) + goto err; + DB_ASSERT(env, updlen == __REP_UPDATE_SIZE); + + /* + * We have all the file information now. Send it. + */ + DB_INIT_DBT(updbt, context.buf, context.fillptr - context.buf); + + LOG_SYSTEM_LOCK(env); + lsn = ((LOG *)dblp->reginfo.primary)->lsn; + LOG_SYSTEM_UNLOCK(env); + (void)__rep_send_message( + env, DB_EID_BROADCAST, REP_UPDATE, &lsn, &updbt, 0, 0); + +err: __os_free(env, context.buf); +err_noalloc: + if (logc != NULL && (t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + REP_SYSTEM_LOCK(env); + F_CLR(rep, REP_F_INUPDREQ); + REP_SYSTEM_UNLOCK(env); + return (ret); +} + +/* + * __rep_find_dbs - + * Walk through all the named files/databases including those in the + * environment or data_dirs and those that in named and in-memory. We + * need to open them, gather the necessary information and then close + * them. + * + * May be called either while holding REP_SYSTEM_LOCK or without. + */ +static int +__rep_find_dbs(env, context) + ENV *env; + FILE_LIST_CTX *context; +{ + DB_ENV *dbenv; + int ret; + char **ddir, *real_dir; + + dbenv = env->dbenv; + ret = 0; + real_dir = NULL; + + /* + * If we have a data directory, walk it get a list of the + * replicated user databases. + */ + if (dbenv->db_data_dir != NULL) { + for (ddir = dbenv->db_data_dir; *ddir != NULL; ++ddir) { + if ((ret = __db_appname(env, + DB_APP_NONE, *ddir, NULL, &real_dir)) != 0) + break; + if ((ret = __rep_walk_dir(env, + real_dir, context)) != 0) + break; + __os_free(env, real_dir); + real_dir = NULL; + } + } + /* + * Walk the environment directory to get a list of the + * replication subsystem's persistent internal system databases. + * If the application does not have a separate data directory, + * then the walk_dir will return all the user databases as well. + */ + if (ret == 0) + ret = __rep_walk_dir(env, env->db_home, context); + + /* Now, collect any in-memory named databases. */ + if (ret == 0) + ret = __rep_walk_dir(env, NULL, context); + + if (real_dir != NULL) + __os_free(env, real_dir); + return (ret); +} + +/* + * __rep_walk_dir -- + * + * This is the routine that walks a directory and fills in the structures + * that we use to generate messages to the client telling it what + * files are available. If the directory name is NULL, then we should + * walk the list of in-memory named files. + */ +static int +__rep_walk_dir(env, dir, context) + ENV *env; + const char *dir; + FILE_LIST_CTX *context; +{ + __rep_fileinfo_args tmpfp; + size_t avail, len; + int cnt, first_file, i, ret; + u_int8_t uid[DB_FILE_ID_LEN]; + char *file, **names, *subdb; + + if (dir == NULL) { + VPRINT(env, (env, DB_VERB_REP_SYNC, + "Walk_dir: Getting info for in-memory named files")); + if ((ret = __memp_inmemlist(env, &names, &cnt)) != 0) + return (ret); + } else { + VPRINT(env, (env, DB_VERB_REP_SYNC, + "Walk_dir: Getting info for dir: %s", dir)); + if ((ret = __os_dirlist(env, dir, 0, &names, &cnt)) != 0) + return (ret); + } + VPRINT(env, (env, DB_VERB_REP_SYNC, "Walk_dir: Dir %s has %d files", + (dir == NULL) ? "INMEM" : dir, cnt)); + first_file = 1; + for (i = 0; i < cnt; i++) { + VPRINT(env, (env, DB_VERB_REP_SYNC, + "Walk_dir: File %d name: %s", i, names[i])); + if (!__rep_is_replicated_db(names[i], dir)) + continue; + + /* We found a file to process. */ + if (dir == NULL) { + file = NULL; + subdb = names[i]; + } else { + file = names[i]; + subdb = NULL; + } + if ((ret = __rep_get_fileinfo(env, + file, subdb, &tmpfp, uid)) != 0) { + /* + * If we find a file that isn't a database, skip it. + */ + RPRINT(env, (env, DB_VERB_REP_SYNC, + "Walk_dir: File %d %s: returned error %s", + i, names[i], db_strerror(ret))); + ret = 0; + continue; + } + RPRINT(env, (env, DB_VERB_REP_SYNC, + "Walk_dir: File %s at 0x%lx: pgsize %lu, max_pgno %lu", + names[i], P_TO_ULONG(context->fillptr), + (u_long)tmpfp.pgsize, (u_long)tmpfp.max_pgno)); + + /* + * On the first time through the loop, check to see if the file + * we're about to add is already on the list. If it is, it must + * have been added in a previous call, and that means the + * directory we're currently scanning has already been scanned + * before. (This can happen if the user called + * env->set_data_dir() more than once for the same directory.) + * If that's the case, we're done: not only is it a waste of + * time to scan the same directory again, but doing so would + * result in the same files appearing in the list more than + * once. + */ + if (first_file && dir != NULL && + (ret = __rep_walk_filelist(env, context->version, + FIRST_FILE_PTR(context->buf), context->size, + context->count, __rep_check_uid, uid)) != 0) { + if (ret == DB_KEYEXIST) + ret = 0; + goto err; + } + first_file = 0; + + /* + * Finally we know that this file is a suitable database file + * that we haven't yet included on our list. + */ + tmpfp.filenum = context->count++; + + DB_SET_DBT(tmpfp.info, names[i], strlen(names[i]) + 1); + DB_SET_DBT(tmpfp.uid, uid, DB_FILE_ID_LEN); +retry: avail = (size_t)(&context->buf[context->size] - + context->fillptr); + ret = __rep_fileinfo_marshal(env, context->version, + &tmpfp, context->fillptr, avail, &len); + if (ret == ENOMEM) { + /* + * Here, 'len' is the total space in use in the buffer. + */ + len = (size_t)(context->fillptr - context->buf); + context->size *= 2; + + if ((ret = __os_realloc(env, + context->size, &context->buf)) != 0) + goto err; + context->fillptr = context->buf + len; + + /* + * Now that we've reallocated the space, try to + * store it again. + */ + goto retry; + } + /* + * Here, 'len' (still) holds the length of the marshaled + * information about the current file (as filled in by the last + * call to __rep_fileinfo_marshal()). + */ + context->fillptr += len; + } +err: + __os_dirfree(env, names, cnt); + return (ret); +} + +/* + * Returns a boolean to indicate whether a file/database with the given name + * should be included in internal init. + */ +static int +__rep_is_replicated_db(name, dir) + const char *name, *dir; +{ + if (strcmp(name, "DB_CONFIG") == 0 || strcmp(name, "pragma") == 0) + return (0); + if (strncmp(name, LFPREFIX, sizeof(LFPREFIX) - 1) == 0) + return (0); + + /* + * Remaining things that don't have a "__db" prefix are eligible. + */ + if (strncmp(name, DB_REGION_PREFIX, sizeof(DB_REGION_PREFIX) - 1) != 0) + return (1); + + /* Here, we know we have a "__db" name. */ + if (name[sizeof(DB_REGION_PREFIX) - 1] == 'p') + return (1); /* Partition files are eligible. */ + + /* + * Replicated system databases are eligible. When on disk, both DBs are + * sub-databases of a single database file. + */ + if (dir == NULL) { + if (strcmp(name, REPMEMBERSHIP) == 0 || + strcmp(name, REPLSNHIST) == 0) + return (1); + } else { + if (strcmp(name, REPSYSDBNAME) == 0) + return (1); + } + + /* Some other "__db" named file. */ + return (0); +} + +/* + * Check whether the given uid is already present in the list of files being + * built in the context buffer. A return of DB_KEYEXIST means it is. + */ +static int +__rep_check_uid(env, rfp, uid) + ENV *env; + __rep_fileinfo_args *rfp; + void *uid; +{ + int ret; + + ret = 0; + if (memcmp(rfp->uid.data, uid, DB_FILE_ID_LEN) == 0) { + VPRINT(env, (env, DB_VERB_REP_SYNC, + "Check_uid: Found matching file.")); + ret = DB_KEYEXIST; + } + return (ret); + +} + +static int +__rep_get_fileinfo(env, file, subdb, rfp, uid) + ENV *env; + const char *file, *subdb; + __rep_fileinfo_args *rfp; + u_int8_t *uid; +{ + DB *dbp; + DBC *dbc; + DBMETA *dbmeta; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep; + int lorder, ret, t_ret; + + dbp = NULL; + dbc = NULL; + pagep = NULL; + mpf = NULL; + + ENV_GET_THREAD_INFO(env, ip); + + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + goto err; + if ((ret = __db_open(dbp, ip, NULL, file, subdb, DB_UNKNOWN, + DB_RDONLY | (F_ISSET(env, ENV_THREAD) ? DB_THREAD : 0), + 0, PGNO_BASE_MD)) != 0) + goto err; + + if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0) + goto err; + if ((ret = __memp_fget(dbp->mpf, &dbp->meta_pgno, ip, dbc->txn, + 0, &pagep)) != 0) + goto err; + /* + * We have the meta page. Set up our information. + */ + dbmeta = (DBMETA *)pagep; + rfp->pgno = 0; + /* + * Queue is a special-case. We need to set max_pgno to 0 so that + * the client can compute the pages from the meta-data. + */ + if (dbp->type == DB_QUEUE) + rfp->max_pgno = 0; + else + rfp->max_pgno = dbmeta->last_pgno; + rfp->pgsize = dbp->pgsize; + memcpy(uid, dbp->fileid, DB_FILE_ID_LEN); + rfp->type = (u_int32_t)dbp->type; + rfp->db_flags = dbp->flags; + rfp->finfo_flags = 0; + /* + * Send the lorder of this database. + */ + (void)__db_get_lorder(dbp, &lorder); + if (lorder == 1234) + FLD_SET(rfp->finfo_flags, REPINFO_DB_LITTLEENDIAN); + else + FLD_CLR(rfp->finfo_flags, REPINFO_DB_LITTLEENDIAN); + + ret = __memp_fput(dbp->mpf, ip, pagep, dbc->priority); + pagep = NULL; + if (ret != 0) + goto err; +err: + if (pagep != NULL && (t_ret = + __memp_fput(mpf, ip, pagep, dbc->priority)) != 0 && ret == 0) + ret = t_ret; + if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + if (dbp != NULL && (t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __rep_page_req + * Process a page_req and send the page information to the client. + * + * PUBLIC: int __rep_page_req __P((ENV *, + * PUBLIC: DB_THREAD_INFO *, int, __rep_control_args *, DBT *)); + */ +int +__rep_page_req(env, ip, eid, rp, rec) + ENV *env; + DB_THREAD_INFO *ip; + int eid; + __rep_control_args *rp; + DBT *rec; +{ + __rep_fileinfo_args *msgfp; + DB_MPOOLFILE *mpf; + DB_REP *db_rep; + REP *rep; + int ret, t_ret; + u_int8_t *next; + + db_rep = env->rep_handle; + rep = db_rep->region; + + if ((ret = __rep_fileinfo_unmarshal(env, rp->rep_version, + &msgfp, rec->data, rec->size, &next)) != 0) + return (ret); + + DB_TEST_SET(env->test_abort, DB_TEST_NO_PAGES); + RPRINT(env, (env, DB_VERB_REP_SYNC, + "page_req: file %d page %lu to %lu", + msgfp->filenum, (u_long)msgfp->pgno, (u_long)msgfp->max_pgno)); + + /* + * We need to open the file and then send its pages. + * If we cannot open the file, we send REP_FILE_FAIL. + */ + VPRINT(env, (env, DB_VERB_REP_SYNC, + "page_req: Open %d via mpf_open", msgfp->filenum)); + if ((ret = __rep_mpf_open(env, &mpf, msgfp, 0)) != 0) { + RPRINT(env, (env, DB_VERB_REP_SYNC, + "page_req: Open %d failed", msgfp->filenum)); + if (F_ISSET(rep, REP_F_MASTER)) + (void)__rep_send_message(env, eid, REP_FILE_FAIL, + NULL, rec, 0, 0); + else + ret = DB_NOTFOUND; + goto err; + } + + ret = __rep_page_sendpages(env, ip, eid, rp, msgfp, mpf, NULL); + t_ret = __memp_fclose(mpf, 0); + if (ret == 0 && t_ret != 0) + ret = t_ret; +err: +DB_TEST_RECOVERY_LABEL + __os_free(env, msgfp); + return (ret); +} + +static int +__rep_page_sendpages(env, ip, eid, rp, msgfp, mpf, dbp) + ENV *env; + DB_THREAD_INFO *ip; + int eid; + __rep_control_args *rp; + __rep_fileinfo_args *msgfp; + DB_MPOOLFILE *mpf; + DB *dbp; +{ + DB *qdbp; + DBC *qdbc; + DBT msgdbt; + DB_LOG *dblp; + DB_LSN lsn; + DB_REP *db_rep; + PAGE *pagep; + REP *rep; + REP_BULK bulk; + REP_THROTTLE repth; + db_pgno_t p; + uintptr_t bulkoff; + size_t len, msgsz; + u_int32_t bulkflags, use_bulk; + int opened, ret, t_ret; + u_int8_t *buf; + + dblp = env->lg_handle; + db_rep = env->rep_handle; + rep = db_rep->region; + opened = 0; + t_ret = 0; + qdbp = NULL; + qdbc = NULL; + buf = NULL; + bulk.addr = NULL; + use_bulk = FLD_ISSET(rep->config, REP_C_BULK); + if (msgfp->type == (u_int32_t)DB_QUEUE) { + if (dbp == NULL) { + if ((ret = __db_create_internal(&qdbp, env, 0)) != 0) + goto err; + /* + * We need to check whether this is in-memory so that + * we pass the name correctly as either the file or + * the database name. + */ + if ((ret = __db_open(qdbp, ip, NULL, + FLD_ISSET(msgfp->db_flags, DB_AM_INMEM) ? + NULL : msgfp->info.data, + FLD_ISSET(msgfp->db_flags, DB_AM_INMEM) ? + msgfp->info.data : NULL, + DB_UNKNOWN, + DB_RDONLY | (F_ISSET(env, ENV_THREAD) ? DB_THREAD : 0), + 0, PGNO_BASE_MD)) != 0) + goto err; + opened = 1; + } else + qdbp = dbp; + if ((ret = __db_cursor(qdbp, ip, NULL, &qdbc, 0)) != 0) + goto err; + } + msgsz = __REP_FILEINFO_SIZE + DB_FILE_ID_LEN + msgfp->pgsize; + if ((ret = __os_calloc(env, 1, msgsz, &buf)) != 0) + goto err; + memset(&msgdbt, 0, sizeof(msgdbt)); + VPRINT(env, (env, DB_VERB_REP_SYNC, + "sendpages: file %d page %lu to %lu", + msgfp->filenum, (u_long)msgfp->pgno, (u_long)msgfp->max_pgno)); + memset(&repth, 0, sizeof(repth)); + /* + * If we're doing bulk transfer, allocate a bulk buffer to put our + * pages in. We still need to initialize the throttle info + * because if we encounter a page larger than our entire bulk + * buffer, we need to send it as a singleton. + * + * Use a local var so that we don't need to worry if someone else + * turns on/off bulk in the middle of our call here. + */ + if (use_bulk && (ret = __rep_bulk_alloc(env, &bulk, eid, + &bulkoff, &bulkflags, REP_BULK_PAGE)) != 0) + goto err; + REP_SYSTEM_LOCK(env); + repth.gbytes = rep->gbytes; + repth.bytes = rep->bytes; + repth.type = REP_PAGE; + repth.data_dbt = &msgdbt; + REP_SYSTEM_UNLOCK(env); + + for (p = msgfp->pgno; p <= msgfp->max_pgno; p++) { + if (msgfp->type == (u_int32_t)DB_QUEUE && p != 0) { + /* + * If queue returns ENOENT or if queue is not configured + * convert it into PAGE_NOTFOUND. Queue might return + * ENOENT if an entire extent file does not exist in the + * "middle" of the database. + */ +#ifdef HAVE_QUEUE + if ((ret = __qam_fget(qdbc, &p, 0, &pagep)) == ENOENT) +#endif + ret = DB_PAGE_NOTFOUND; + } else + ret = __memp_fget(mpf, &p, ip, NULL, 0, &pagep); + msgfp->pgno = p; + if (ret == DB_PAGE_NOTFOUND) { + if (F_ISSET(rep, REP_F_MASTER)) { + ret = 0; + RPRINT(env, (env, DB_VERB_REP_SYNC, + "sendpages: PAGE_FAIL on page %lu", + (u_long)p)); + if ((ret = __rep_fileinfo_marshal(env, + rp->rep_version, msgfp, buf, + msgsz, &len)) != 0) + goto err; + LOG_SYSTEM_LOCK(env); + lsn = ((LOG *)dblp->reginfo.primary)->lsn; + LOG_SYSTEM_UNLOCK(env); + DB_SET_DBT(msgdbt, buf, len); + (void)__rep_send_message(env, eid, + REP_PAGE_FAIL, &lsn, &msgdbt, 0, 0); + continue; + } else + ret = DB_NOTFOUND; + goto err; + } else if (ret != 0) + goto err; + else + DB_SET_DBT(msgfp->info, pagep, msgfp->pgsize); + len = 0; + /* + * Send along an indication of the byte order of this mpool + * page. Since mpool always keeps pages in the native byte + * order of the local environment, this is simply my + * environment's byte order. + * + * Since pages can be served from a variety of sites when using + * client-to-client synchronization, the receiving client needs + * to know the byte order of each page independently. + */ + if (F_ISSET(env, ENV_LITTLEENDIAN)) + FLD_SET(msgfp->finfo_flags, REPINFO_PG_LITTLEENDIAN); + else + FLD_CLR(msgfp->finfo_flags, REPINFO_PG_LITTLEENDIAN); + RPRINT(env, (env, DB_VERB_REP_SYNC, + "sendpages: %lu, page lsn [%lu][%lu]", (u_long)p, + (u_long)pagep->lsn.file, (u_long)pagep->lsn.offset)); + ret = __rep_fileinfo_marshal(env, rp->rep_version, + msgfp, buf, msgsz, &len); + if (msgfp->type != (u_int32_t)DB_QUEUE || p == 0) + t_ret = __memp_fput(mpf, + ip, pagep, DB_PRIORITY_UNCHANGED); +#ifdef HAVE_QUEUE + else + /* + * We don't need an #else for HAVE_QUEUE here because if + * we're not compiled with queue, then we're guaranteed + * to have set REP_PAGE_FAIL above. + */ + t_ret = __qam_fput(qdbc, p, pagep, qdbp->priority); +#endif + if (t_ret != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err; + + DB_ASSERT(env, len <= msgsz); + DB_SET_DBT(msgdbt, buf, len); + + LOG_SYSTEM_LOCK(env); + repth.lsn = ((LOG *)dblp->reginfo.primary)->lsn; + LOG_SYSTEM_UNLOCK(env); + /* + * If we are configured for bulk, try to send this as a bulk + * request. If not configured, or it is too big for bulk + * then just send normally. + */ + if (use_bulk) + ret = __rep_bulk_message(env, &bulk, &repth, + &repth.lsn, &msgdbt, 0); + if (!use_bulk || ret == DB_REP_BULKOVF) + ret = __rep_send_throttle(env, eid, &repth, 0, 0); + VPRINT(env, (env, DB_VERB_REP_SYNC, + "sendpages: %lu, lsn [%lu][%lu]", (u_long)p, + (u_long)repth.lsn.file, (u_long)repth.lsn.offset)); + /* + * If we have REP_PAGE_MORE we need to break this loop. + * Otherwise, with REP_PAGE, we keep going. + */ + if (repth.type == REP_PAGE_MORE || ret != 0) { + /* Ignore send failure, except to break the loop. */ + if (ret == DB_REP_UNAVAIL) + ret = 0; + break; + } + } + +err: + /* + * We're done, force out whatever remains in the bulk buffer and + * free it. + */ + if (use_bulk && bulk.addr != NULL && + (t_ret = __rep_bulk_free(env, &bulk, 0)) != 0 && ret == 0 && + t_ret != DB_REP_UNAVAIL) + ret = t_ret; + if (qdbc != NULL && (t_ret = __dbc_close(qdbc)) != 0 && ret == 0) + ret = t_ret; + if (opened && (t_ret = __db_close(qdbp, NULL, DB_NOSYNC)) != 0 && + ret == 0) + ret = t_ret; + if (buf != NULL) + __os_free(env, buf); + return (ret); +} + +/* + * __rep_update_setup + * Process and setup with this file information. + * + * PUBLIC: int __rep_update_setup __P((ENV *, int, __rep_control_args *, + * PUBLIC: DBT *, time_t, DB_LSN *)); + */ +int +__rep_update_setup(env, eid, rp, rec, savetime, lsn) + ENV *env; + int eid; + __rep_control_args *rp; + DBT *rec; + time_t savetime; + DB_LSN *lsn; +{ + DB_LOG *dblp; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + LOG *lp; + REGENV *renv; + REGINFO *infop; + REP *rep; + __rep_update_args *rup; + DB_LSN verify_lsn; + int clientdb_locked, *origbuf, ret; + u_int32_t count, size; + u_int8_t *end, *next; + + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + infop = env->reginfo; + renv = infop->primary; + clientdb_locked = 0; + ret = 0; + + MUTEX_LOCK(env, rep->mtx_clientdb); + verify_lsn = lp->verify_lsn; + MUTEX_UNLOCK(env, rep->mtx_clientdb); + REP_SYSTEM_LOCK(env); + if (rep->sync_state != SYNC_UPDATE || IN_ELECTION(rep)) { + REP_SYSTEM_UNLOCK(env); + return (0); + } + rep->sync_state = SYNC_OFF; + + if ((ret = __rep_update_unmarshal(env, rp->rep_version, + &rup, rec->data, rec->size, &next)) != 0) + return (ret); + DB_ASSERT(env, next == FIRST_FILE_PTR((u_int8_t*)rec->data)); + + /* + * If we're doing an abbreviated internal init, it's because we found a + * sync point but we needed to materialize any NIMDBs. However, if we + * now see that there are no NIMDBs we can just skip to verify_match, + * just as we would have done if we had already loaded the NIMDBs. In + * other words, if there are no NIMDBs, then I can trivially say that + * I've already loaded all of them! The whole abbreviated internal init + * turns out not to have been necessary after all. + */ + if (F_ISSET(rep, REP_F_ABBREVIATED)) { + count = rup->num_files; + end = &((u_int8_t*)rec->data)[rec->size]; + size = (u_int32_t)(end - next); + if ((ret = __rep_walk_filelist(env, rp->rep_version, + next, size, count, __rep_find_inmem, NULL)) == 0) { + /* + * Not found: there are no NIMDBs on the list. Revert + * to VERIFY state, so that we can pick up where we left + * off, except that from now on (i.e., future master + * changes) we can skip checking for NIMDBs if we find a + * sync point. + */ + RPRINT(env, (env, DB_VERB_REP_SYNC, + "UPDATE msg reveals no NIMDBs")); + F_SET(rep, REP_F_NIMDBS_LOADED); + rep->sync_state = SYNC_VERIFY; + F_CLR(rep, REP_F_ABBREVIATED); + ret = __rep_notify_threads(env, AWAIT_NIMDB); + + REP_SYSTEM_UNLOCK(env); + if (ret == 0 && (ret = __rep_verify_match(env, + &verify_lsn, savetime)) == DB_REP_WOULDROLLBACK) + *lsn = verify_lsn; + __os_free(env, rup); + return (ret); + } else if (ret != DB_KEYEXIST) + goto err; + } + + /* + * We know we're the first to come in here due to the + * SYNC_UPDATE state. + */ + rep->sync_state = SYNC_PAGE; + /* + * We should not ever be in internal init with a lease granted. + */ + DB_ASSERT(env, + !IS_USING_LEASES(env) || __rep_islease_granted(env) == 0); + + /* + * We do not clear REP_LOCKOUT_* in this code. + * We'll eventually call the normal __rep_verify_match recovery + * code and that will clear all the flags and allow others to + * proceed. We lockout both the messages and API here. + * We lockout messages briefly because we are about to reset + * all our LSNs and we do not want another thread possibly + * using/needing those. We have to lockout the API for + * the duration of internal init. + */ + if ((ret = __rep_lockout_msg(env, rep, 1)) != 0) + goto err; + + if ((ret = __rep_lockout_api(env, rep)) != 0) + goto err; + /* + * We need to update the timestamp and kill any open handles + * on this client. The files are changing completely. + */ + (void)time(&renv->rep_timestamp); + + REP_SYSTEM_UNLOCK(env); + MUTEX_LOCK(env, rep->mtx_clientdb); + __os_gettime(env, &lp->rcvd_ts, 1); + lp->wait_ts = rep->request_gap; + ZERO_LSN(lp->ready_lsn); + ZERO_LSN(lp->verify_lsn); + ZERO_LSN(lp->prev_ckp); + ZERO_LSN(lp->waiting_lsn); + ZERO_LSN(lp->max_wait_lsn); + ZERO_LSN(lp->max_perm_lsn); + if (db_rep->rep_db == NULL) + ret = __rep_client_dbinit(env, 0, REP_DB); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + if (ret != 0) + goto err_nolock; + + /* + * We need to empty out any old log records that might be in the + * temp database. + */ + ENV_GET_THREAD_INFO(env, ip); + if ((ret = __db_truncate(db_rep->rep_db, ip, NULL, &count)) != 0) + goto err_nolock; + STAT_SET(env, + rep, log_queued, rep->stat.st_log_queued, 0, &lp->ready_lsn); + + REP_SYSTEM_LOCK(env); + if (F_ISSET(rep, REP_F_ABBREVIATED)) { + /* + * For an abbreviated internal init, the place from which we'll + * want to request master's logs after (NIMDB) pages are loaded + * is precisely the sync point we found during VERIFY. We'll + * roll back to there in a moment. + * + * We don't need first_vers, because it's only used with + * __log_newfile, which only happens with non-ABBREVIATED + * internal init. + */ + rep->first_lsn = verify_lsn; + } else { + /* + * We will remove all logs we have so we need to request + * from the master's beginning. + */ + rep->first_lsn = rup->first_lsn; + rep->first_vers = rup->first_vers; + } + rep->last_lsn = rp->lsn; + rep->nfiles = rup->num_files; + + RPRINT(env, (env, DB_VERB_REP_SYNC, + "Update setup for %d files.", rep->nfiles)); + RPRINT(env, (env, DB_VERB_REP_SYNC, + "Update setup: First LSN [%lu][%lu].", + (u_long)rep->first_lsn.file, (u_long)rep->first_lsn.offset)); + RPRINT(env, (env, DB_VERB_REP_SYNC, + "Update setup: Last LSN [%lu][%lu]", + (u_long)rep->last_lsn.file, (u_long)rep->last_lsn.offset)); + + if (rep->nfiles > 0) { + rep->infoversion = rp->rep_version; + rep->originfolen = rep->infolen = + rec->size - __REP_UPDATE_SIZE; + MUTEX_LOCK(env, renv->mtx_regenv); + ret = __env_alloc(infop, (size_t)rep->infolen, &origbuf); + MUTEX_UNLOCK(env, renv->mtx_regenv); + if (ret != 0) + goto err; + else + rep->originfo_off = R_OFFSET(infop, origbuf); + memcpy(R_ADDR(infop, rep->originfo_off), + FIRST_FILE_PTR((u_int8_t*)rec->data), rep->infolen); + } + + /* + * Clear the decks to make room for the logs and databases that we will + * request as part of this internal init. For a normal, full internal + * init, that means all logs and databases. For an abbreviated internal + * init, it means only the NIMDBs, and only that portion of the log + * after the sync point. + */ + if (F_ISSET(rep, REP_F_ABBREVIATED)) { + /* + * Note that in order to pare the log back to the sync point, we + * can't just crudely hack it off there. We need to make sure + * that pages in regular databases get rolled back to a state + * consistent with that sync point. So we have to do a real + * recovery step. + */ + RPRINT(env, (env, DB_VERB_REP_SYNC, + "Will roll back for abbreviated internal init")); + if ((ret = __rep_rollback(env, &rep->first_lsn)) != 0) { + if (ret == DB_REP_WOULDROLLBACK) { + DB_ASSERT(env, LOG_COMPARE(&rep->first_lsn, + &verify_lsn) == 0); + *lsn = verify_lsn; + } + goto err; + } + ret = __rep_remove_nimdbs(env); + } else + ret = __rep_remove_all(env, rp->rep_version, rec); + if (ret != 0) + goto err; + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_MSG); + + REP_SYSTEM_UNLOCK(env); + MUTEX_LOCK(env, rep->mtx_clientdb); + clientdb_locked = 1; + REP_SYSTEM_LOCK(env); + rep->curfile = 0; + ret = __rep_nextfile(env, eid, rep); + if (ret != 0) + goto err; + + if (0) { +err_nolock: REP_SYSTEM_LOCK(env); + } + +err: /* + * If we get an error, we cannot leave ourselves in the RECOVER_PAGE + * state because we have no file information. That also means undo'ing + * the rep_lockout. We need to move back to the RECOVER_UPDATE stage. + * In the non-error path, we will have already cleared LOCKOUT_MSG, + * but it doesn't hurt to clear it again. + */ + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_MSG); + if (ret != 0) { + if (rep->originfo_off != INVALID_ROFF) { + MUTEX_LOCK(env, renv->mtx_regenv); + __env_alloc_free(infop, + R_ADDR(infop, rep->originfo_off)); + MUTEX_UNLOCK(env, renv->mtx_regenv); + rep->originfo_off = INVALID_ROFF; + } + RPRINT(env, (env, DB_VERB_REP_SYNC, + "Update_setup: Error: Clear PAGE, set UPDATE again. %s", + db_strerror(ret))); + rep->sync_state = SYNC_UPDATE; + CLR_LOCKOUT_BDB(rep); + } + REP_SYSTEM_UNLOCK(env); + if (clientdb_locked) + MUTEX_UNLOCK(env, rep->mtx_clientdb); + __os_free(env, rup); + return (ret); +} + +static int +__rep_find_inmem(env, rfp, unused) + ENV *env; + __rep_fileinfo_args *rfp; + void *unused; +{ + COMPQUIET(env, NULL); + COMPQUIET(unused, NULL); + + return (FLD_ISSET(rfp->db_flags, DB_AM_INMEM) ? DB_KEYEXIST : 0); +} + +/* + * Removes any currently existing NIMDBs. We do this at the beginning of + * abbreviated internal init, when any existing NIMDBs should be intact, so + * walk_dir should produce reliable results. + */ +static int +__rep_remove_nimdbs(env) + ENV *env; +{ + FILE_LIST_CTX context; + int ret; + + if ((ret = __os_calloc(env, 1, MEGABYTE, &context.buf)) != 0) + return (ret); + context.size = MEGABYTE; + context.count = 0; + context.fillptr = context.buf; + context.version = DB_REPVERSION; + + /* NB: "NULL" asks walk_dir to consider only in-memory DBs */ + if ((ret = __rep_walk_dir(env, NULL, &context)) != 0) + goto out; + + if ((ret = __rep_closefiles(env)) != 0) + goto out; + + ret = __rep_walk_filelist(env, context.version, context.buf, + context.size, context.count, __rep_remove_file, NULL); + +out: + __os_free(env, context.buf); + return (ret); +} + +/* + * Removes all existing logs and databases, at the start of internal init. But + * before we do, write a list of the databases onto the init file, so that in + * case we crash in the middle, we'll know how to resume when we restart. + * Finally, also write into the init file the UPDATE message from the master (in + * the "rec" DBT), which includes the (new) list of databases we intend to + * request copies of (again, so that we know what to do if we crash in the + * middle). + * + * For the sake of simplicity, these database lists are in the form of an UPDATE + * message (since we already have the mechanisms in place), even though strictly + * speaking that contains more information than we really need to store. + * + * !!! Must be called with the REP_SYSTEM_LOCK held. + */ +static int +__rep_remove_all(env, msg_version, rec) + ENV *env; + u_int32_t msg_version; + DBT *rec; +{ + FILE_LIST_CTX context; + __rep_update_args u_args; + DB_FH *fhp; + DB_REP *db_rep; +#ifdef HAVE_REPLICATION_THREADS + DBT dbt; +#endif + REP *rep; + size_t cnt, updlen; + u_int32_t bufsz, fvers, mvers, zero; + int ret, t_ret; + char *fname; + + fname = NULL; + fhp = NULL; +#ifdef HAVE_REPLICATION_THREADS + dbt.data = NULL; +#endif + db_rep = env->rep_handle; + rep = db_rep->region; + + /* + * 1. Get list of databases currently present at this client, which we + * intend to remove. + */ + if ((ret = __os_calloc(env, 1, MEGABYTE, &context.buf)) != 0) + return (ret); + context.size = MEGABYTE; + context.count = 0; + context.version = DB_REPVERSION; + + /* Reserve space for the marshaled update_args. */ + context.fillptr = FIRST_FILE_PTR(context.buf); + + if ((ret = __rep_find_dbs(env, &context)) != 0) + goto out; + ZERO_LSN(u_args.first_lsn); + u_args.first_vers = 0; + u_args.num_files = context.count; + if ((ret = __rep_update_marshal(env, DB_REPVERSION, + &u_args, context.buf, __REP_UPDATE_SIZE, &updlen)) != 0) + goto out; + DB_ASSERT(env, updlen == __REP_UPDATE_SIZE); + + /* + * 2. Before removing anything, safe-store the database list, so that in + * case we crash before we've removed them all, when we restart we + * can clean up what we were doing. Only write database list to + * file if not running in-memory replication. + * + * The original version of the file contains: + * data1 size (4 bytes) + * data1 + * data2 size (possibly) (4 bytes) + * data2 (possibly) + * + * As of 4.7 the file has the following form: + * 0 (4 bytes - to indicate a new style file) + * file version (4 bytes) + * data1 version (4 bytes) + * data1 size (4 bytes) + * data1 + * data2 version (possibly) (4 bytes) + * data2 size (possibly) (4 bytes) + * data2 (possibly) + */ + if (!FLD_ISSET(rep->config, REP_C_INMEM)) { + if ((ret = __db_appname(env, + DB_APP_NONE, REP_INITNAME, NULL, &fname)) != 0) + goto out; + /* Sanity check that the write size fits into 32 bits. */ + DB_ASSERT(env, (size_t)(context.fillptr - context.buf) == + (u_int32_t)(context.fillptr - context.buf)); + bufsz = (u_int32_t)(context.fillptr - context.buf); + + /* + * (Short writes aren't possible, so we don't have to verify + * 'cnt'.) This first list is generated internally, so it is + * always in the form of the current message version. + */ + zero = 0; + fvers = REP_INITVERSION; + mvers = DB_REPVERSION; + if ((ret = __os_open(env, fname, 0, + DB_OSO_CREATE | DB_OSO_TRUNC, DB_MODE_600, &fhp)) != 0 || + (ret = + __os_write(env, fhp, &zero, sizeof(zero), &cnt)) != 0 || + (ret = + __os_write(env, fhp, &fvers, sizeof(fvers), &cnt)) != 0 || + (ret = + __os_write(env, fhp, &mvers, sizeof(mvers), &cnt)) != 0 || + (ret = + __os_write(env, fhp, &bufsz, sizeof(bufsz), &cnt)) != 0 || + (ret = + __os_write(env, fhp, context.buf, bufsz, &cnt)) != 0 || + (ret = __os_fsync(env, fhp)) != 0) { + __db_err(env, ret, "%s", fname); + goto out; + } + } + + /* + * 3. Go ahead and remove logs and databases. The databases get removed + * according to the list we just finished safe-storing. + * + * Clearing NIMDBS_LOADED might not really be necessary, since once + * we've committed to removing all there's no chance of doing an + * abbreviated internal init. This just keeps us honest. + */ + if ((ret = __rep_remove_logs(env)) != 0) + goto out; + if ((ret = __rep_closefiles(env)) != 0) + goto out; + F_CLR(rep, REP_F_NIMDBS_LOADED); + if ((ret = __rep_walk_filelist(env, context.version, + FIRST_FILE_PTR(context.buf), context.size, + context.count, __rep_remove_file, NULL)) != 0) + goto out; + + /* + * 4. Safe-store the (new) list of database files we intend to copy from + * the master (again, so that in case we crash before we're finished + * doing so, we'll have enough information to clean up and start over + * again). This list is the list from the master, so it uses + * the message version. Only write to file if not running + * in-memory replication. + */ + if (!FLD_ISSET(rep->config, REP_C_INMEM)) { + mvers = msg_version; + if ((ret = + __os_write(env, fhp, &mvers, sizeof(mvers), &cnt)) != 0 || + (ret = __os_write(env, fhp, + &rec->size, sizeof(rec->size), &cnt)) != 0 || + (ret = + __os_write(env, fhp, rec->data, rec->size, &cnt)) != 0 || + (ret = __os_fsync(env, fhp)) != 0) { + __db_err(env, ret, "%s", fname); + goto out; + } +#ifdef HAVE_REPLICATION_THREADS + /* Invite repmgr to save any info it needs. */ + if ((ret = __repmgr_init_save(env, &dbt)) != 0) + goto out; + if (dbt.size > 0 && + ((ret = __os_write(env, fhp, + &dbt.size, sizeof(dbt.size), &cnt)) != 0 || + (ret = __os_write(env, fhp, + dbt.data, dbt.size, &cnt)) != 0)) + goto out; +#endif + } + +out: +#ifdef HAVE_REPLICATION_THREADS + if (dbt.data != NULL) + __os_free(env, dbt.data); +#endif + if (fhp != NULL && (t_ret = __os_closehandle(env, fhp)) && ret == 0) + ret = t_ret; + if (fname != NULL) + __os_free(env, fname); + __os_free(env, context.buf); + return (ret); +} + +/* + * __rep_remove_logs - + * Remove our logs to prepare for internal init. + */ +static int +__rep_remove_logs(env) + ENV *env; +{ + DB_LOG *dblp; + DB_LSN lsn; + LOG *lp; + u_int32_t fnum, lastfile; + int ret; + char *name; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + ret = 0; + + /* + * Call memp_sync to flush any pages that might be in the log buffers + * and not on disk before we remove files on disk. If there were no + * dirty pages, the log isn't flushed. Yet the log buffers could still + * be dirty: __log_flush should take care of this rare situation. + */ + if ((ret = __memp_sync_int(env, + NULL, 0, DB_SYNC_CACHE | DB_SYNC_INTERRUPT_OK, NULL, NULL)) != 0) + return (ret); + if ((ret = __log_flush(env, NULL)) != 0) + return (ret); + /* + * Forcibly remove existing log files or reset + * the in-memory log space. + */ + if (lp->db_log_inmemory) { + ZERO_LSN(lsn); + if ((ret = __log_zero(env, &lsn)) != 0) + return (ret); + } else { + lastfile = lp->lsn.file; + for (fnum = 1; fnum <= lastfile; fnum++) { + if ((ret = __log_name(dblp, fnum, &name, NULL, 0)) != 0) + return (ret); + (void)time(&lp->timestamp); + (void)__os_unlink(env, name, 0); + __os_free(env, name); + } + } + return (0); +} + +/* + * Removes a file during internal init. Assumes underlying subsystems are + * active; therefore, this can't be used for internal init crash recovery. + */ +static int +__rep_remove_file(env, rfp, unused) + ENV *env; + __rep_fileinfo_args *rfp; + void *unused; +{ + DB *dbp; +#ifdef HAVE_QUEUE + DB_THREAD_INFO *ip; +#endif + char *name; + int ret, t_ret; + + COMPQUIET(unused, NULL); + dbp = NULL; + name = rfp->info.data; + + /* + * Calling __fop_remove will both purge any matching + * fileid from mpool and unlink it on disk. + */ +#ifdef HAVE_QUEUE + /* + * Handle queue separately. __fop_remove will not + * remove extent files. Use __qam_remove to remove + * extent files that might exist under this name. Note that + * in-memory queue databases can't have extent files. + */ + if (rfp->type == (u_int32_t)DB_QUEUE && + !FLD_ISSET(rfp->db_flags, DB_AM_INMEM)) { + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + return (ret); + + /* + * At present, qam_remove expects the passed-in dbp to have a + * locker allocated, and if not, db_open allocates a locker + * which qam_remove then leaks. + * + * TODO: it would be better to avoid cobbling together this + * sequence of low-level operations, if fileops provided some + * API to allow us to remove a database without write-locking + * its handle. + */ + if ((ret = __lock_id(env, NULL, &dbp->locker)) != 0) + goto out; + + ENV_GET_THREAD_INFO(env, ip); + VPRINT(env, (env, DB_VERB_REP_SYNC, + "QAM: Unlink %s via __qam_remove", name)); + if ((ret = __qam_remove(dbp, ip, NULL, name, NULL, 0)) != 0) { + RPRINT(env, (env, DB_VERB_REP_SYNC, + "qam_remove returned %d", ret)); + goto out; + } + } +#endif + /* + * We call fop_remove even if we've called qam_remove. + * That will only have removed extent files. Now + * we need to deal with the actual file itself. + */ + if (FLD_ISSET(rfp->db_flags, DB_AM_INMEM)) { + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + return (ret); + MAKE_INMEM(dbp); + F_SET(dbp, DB_AM_RECOVER); /* Skirt locking. */ + ret = __db_inmem_remove(dbp, NULL, name); + } else + ret = __fop_remove(env, + NULL, rfp->uid.data, name, NULL, + __rep_is_internal_rep_file(rfp->info.data) ? + DB_APP_NONE : DB_APP_DATA, 0); +#ifdef HAVE_QUEUE +out: +#endif + if (dbp != NULL && + (t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __rep_bulk_page + * Process a bulk page message. + * + * PUBLIC: int __rep_bulk_page __P((ENV *, + * PUBLIC: DB_THREAD_INFO *, int, __rep_control_args *, DBT *)); + */ +int +__rep_bulk_page(env, ip, eid, rp, rec) + ENV *env; + DB_THREAD_INFO *ip; + int eid; + __rep_control_args *rp; + DBT *rec; +{ + __rep_control_args tmprp; + __rep_bulk_args b_args; + int ret; + u_int8_t *p, *ep; + + /* + * We're going to be modifying the rp LSN contents so make + * our own private copy to play with. We need to set the + * rectype to REP_PAGE because we're calling through __rep_page + * to process each page, and lower functions make decisions + * based on the rectypes (for throttling/gap processing) + */ + memcpy(&tmprp, rp, sizeof(tmprp)); + tmprp.rectype = REP_PAGE; + ret = 0; + for (ep = (u_int8_t *)rec->data + rec->size, p = (u_int8_t *)rec->data; + p < ep;) { + /* + * First thing in the buffer is the length. Then the LSN + * of this page, then the page info itself. + */ + if ((ret = __rep_bulk_unmarshal(env, + &b_args, p, rec->size, &p)) != 0) + return (ret); + VPRINT(env, (env, DB_VERB_REP_SYNC, + "rep_bulk_page: Processing LSN [%lu][%lu]", + (u_long)tmprp.lsn.file, (u_long)tmprp.lsn.offset)); + VPRINT(env, (env, DB_VERB_REP_SYNC, + "rep_bulk_page: p %#lx ep %#lx pgrec data %#lx, size %lu (%#lx)", + P_TO_ULONG(p), P_TO_ULONG(ep), + P_TO_ULONG(b_args.bulkdata.data), + (u_long)b_args.bulkdata.size, + (u_long)b_args.bulkdata.size)); + /* + * Now send the page info DBT to the page processing function. + */ + ret = __rep_page(env, ip, eid, &tmprp, &b_args.bulkdata); + VPRINT(env, (env, DB_VERB_REP_SYNC, + "rep_bulk_page: rep_page ret %d", ret)); + + /* + * If this set of pages is already done just return. + */ + if (ret != 0) { + if (ret == DB_REP_PAGEDONE) + ret = 0; + break; + } + } + return (ret); +} + +/* + * __rep_page + * Process a page message. This processes any page related + * message: REP_PAGE, REP_PAGE_FAIL and REP_PAGE_MORE. + * + * PUBLIC: int __rep_page __P((ENV *, + * PUBLIC: DB_THREAD_INFO *, int, __rep_control_args *, DBT *)); + */ +int +__rep_page(env, ip, eid, rp, rec) + ENV *env; + DB_THREAD_INFO *ip; + int eid; + __rep_control_args *rp; + DBT *rec; +{ + + DB_REP *db_rep; + DBT key, data; + REP *rep; + __rep_fileinfo_args *msgfp; + db_recno_t recno; + int ret; + char *msg; + + ret = 0; + db_rep = env->rep_handle; + rep = db_rep->region; + + if (rep->sync_state != SYNC_PAGE) + return (DB_REP_PAGEDONE); + + if (rp->rectype == REP_PAGE_FAIL) + msg = "PAGE_FAIL"; + else if (rp->rectype == REP_PAGE_MORE) + msg = "PAGE_MORE"; + else + msg = "PAGE"; + /* + * If we restarted internal init, it is possible to receive + * an old REP_PAGE message, while we're in the current + * stage of recovering pages. Until we have some sort of + * an init generation number, ignore any message that has + * a message LSN that is before this internal init's first_lsn. + */ + if (LOG_COMPARE(&rp->lsn, &rep->first_lsn) < 0) { + RPRINT(env, (env, DB_VERB_REP_SYNC, + "%s: Old page: msg LSN [%lu][%lu] first_lsn [%lu][%lu]", + msg, (u_long)rp->lsn.file, (u_long)rp->lsn.offset, + (u_long)rep->first_lsn.file, + (u_long)rep->first_lsn.offset)); + return (DB_REP_PAGEDONE); + } + if ((ret = __rep_fileinfo_unmarshal(env, rp->rep_version, + &msgfp, rec->data, rec->size, NULL)) != 0) + return (ret); + MUTEX_LOCK(env, rep->mtx_clientdb); + REP_SYSTEM_LOCK(env); + /* + * Check if the world changed. + */ + if (rep->sync_state != SYNC_PAGE) { + ret = DB_REP_PAGEDONE; + goto err; + } + /* + * We should not ever be in internal init with a lease granted. + */ + DB_ASSERT(env, + !IS_USING_LEASES(env) || __rep_islease_granted(env) == 0); + + VPRINT(env, (env, DB_VERB_REP_SYNC, + "%s: Received page %lu from file %d", + msg, (u_long)msgfp->pgno, msgfp->filenum)); + /* + * Check if this page is from the file we're expecting. + * This may be an old or delayed page message. + */ + /* + * !!! + * If we allow dbrename/dbremove on the master while a client + * is updating, then we'd have to verify the file's uid here too. + */ + if (msgfp->filenum != rep->curfile) { + VPRINT(env, (env, DB_VERB_REP_SYNC, + "Msg file %d != curfile %d", + msgfp->filenum, rep->curfile)); + ret = DB_REP_PAGEDONE; + goto err; + } + /* + * We want to create/open our dbp to the database + * where we'll keep our page information. + */ + if ((ret = __rep_client_dbinit(env, 1, REP_PG)) != 0) { + RPRINT(env, (env, DB_VERB_REP_SYNC, + "%s: Client_dbinit %s", msg, db_strerror(ret))); + goto err; + } + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + recno = (db_recno_t)(msgfp->pgno + 1); + key.data = &recno; + key.ulen = key.size = sizeof(db_recno_t); + key.flags = DB_DBT_USERMEM; + + /* + * If we already have this page, then we don't want to bother + * rewriting it into the file. Otherwise, any other error + * we want to return. + */ + ret = __db_put(db_rep->file_dbp, ip, NULL, &key, &data, DB_NOOVERWRITE); + if (ret == DB_KEYEXIST) { + VPRINT(env, (env, DB_VERB_REP_SYNC, + "%s: Received duplicate page %lu from file %d", + msg, (u_long)msgfp->pgno, msgfp->filenum)); + STAT(rep->stat.st_pg_duplicated++); + PERFMON4(env, rep, pg_duplicated, eid, + msgfp->pgno, msgfp->filenum, rep->stat.st_pg_duplicated); + ret = 0; + goto err; + } + if (ret != 0) + goto err; + + /* + * We put the page in the database file itself. + */ + if (rp->rectype != REP_PAGE_FAIL) { + VPRINT(env, (env, DB_VERB_REP_SYNC, + "%s: Write page %lu into mpool", msg, (u_long)msgfp->pgno)); + if ((ret = __rep_write_page(env, ip, rep, msgfp)) != 0) { + /* + * We got an error storing the page, therefore, we need + * remove this page marker from the page database too. + * !!! + * I'm ignoring errors from the delete because we want + * to return the original error. If we cannot write the + * page and we cannot delete the item we just put, + * what should we do? Panic the env and return + * DB_RUNRECOVERY? + */ + (void)__db_del(db_rep->file_dbp, NULL, NULL, &key, 0); + goto err; + } + } + STAT_INC(env, rep, pg_record, rep->stat.st_pg_records, eid); + rep->npages++; + + /* + * Now check the LSN on the page and save it if it is later + * than the one we have. + */ + if (LOG_COMPARE(&rp->lsn, &rep->last_lsn) > 0) + rep->last_lsn = rp->lsn; + + /* + * We've successfully written the page. Now we need to see if + * we're done with this file. __rep_filedone will check if we + * have all the pages expected and if so, set up for the next + * file and send out a page request for the next file's pages. + */ + ret = __rep_filedone(env, ip, eid, rep, msgfp, rp->rectype); + +err: REP_SYSTEM_UNLOCK(env); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + + __os_free(env, msgfp); + return (ret); +} + +/* + * __rep_write_page - + * Write this page into a database. + */ +static int +__rep_write_page(env, ip, rep, msgfp) + ENV *env; + DB_THREAD_INFO *ip; + REP *rep; + __rep_fileinfo_args *msgfp; +{ + DB db; + DBT pgcookie; + DB_MPOOLFILE *mpf; + DB_PGINFO *pginfo; + DB_REP *db_rep; + REGINFO *infop; + __rep_fileinfo_args *rfp; + int ret; + void *dst; + + db_rep = env->rep_handle; + infop = env->reginfo; + rfp = NULL; + + /* + * If this is the first page we're putting in this database, we need + * to create the mpool file. Otherwise call memp_fget to create the + * page in mpool. Then copy the data to the page, and memp_fput the + * page to give it back to mpool. + * + * We need to create the file, removing any existing file and associate + * the correct file ID with the new one. + */ + GET_CURINFO(rep, infop, rfp); + if (db_rep->file_mpf == NULL) { + if (!FLD_ISSET(rfp->db_flags, DB_AM_INMEM)) { + /* + * Recreate the file on disk. We'll be putting + * the data into the file via mpool. System + * databases should go into the environment + * directory, not the data directory. + */ + RPRINT(env, (env, DB_VERB_REP_SYNC, + "rep_write_page: Calling fop_create for %s", + (char *)rfp->info.data)); + if ((ret = __fop_create(env, NULL, NULL, + rfp->info.data, NULL, + __rep_is_internal_rep_file(rfp->info.data) ? + DB_APP_NONE : DB_APP_DATA, env->db_mode, 0)) != 0) + goto err; + } + + if ((ret = + __rep_mpf_open(env, &db_rep->file_mpf, rfp, + FLD_ISSET(rfp->db_flags, DB_AM_INMEM) ? + DB_CREATE : 0)) != 0) + goto err; + } + /* + * Handle queue specially. If we're a QUEUE database, we need to + * use the __qam_fget/put calls. We need to use db_rep->queue_dbc for + * that. That dbp is opened after getting the metapage for the + * queue database. Since the meta-page is always in the queue file, + * we'll use the normal path for that first page. After that we + * can assume the dbp is opened. + */ + if (msgfp->type == (u_int32_t)DB_QUEUE && msgfp->pgno != 0) { +#ifdef HAVE_QUEUE + ret = __qam_fget(db_rep->queue_dbc, &msgfp->pgno, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &dst); +#else + /* + * This always returns an error. + */ + ret = __db_no_queue_am(env); +#endif + } else + ret = __memp_fget(db_rep->file_mpf, &msgfp->pgno, ip, NULL, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &dst); + + if (ret != 0) + goto err; + + /* + * Before writing this page into our local mpool, see if its byte order + * needs to be swapped. When in mpool the page should be in the native + * byte order of our local environment. But the page image we've + * received may be in the opposite order (as indicated in finfo_flags). + */ + if ((F_ISSET(env, ENV_LITTLEENDIAN) && + !FLD_ISSET(msgfp->finfo_flags, REPINFO_PG_LITTLEENDIAN)) || + (!F_ISSET(env, ENV_LITTLEENDIAN) && + FLD_ISSET(msgfp->finfo_flags, REPINFO_PG_LITTLEENDIAN))) { + VPRINT(env, (env, DB_VERB_REP_SYNC, + "write_page: Page %d needs to be swapped", msgfp->pgno)); + /* + * Set up a dbp to pass into the swap functions. We need + * only a few things: The environment and any special + * dbp flags and some obvious basics like db type and + * pagesize. Those flags were set back in rep_mpf_open + * and are available in the pgcookie set up with the + * mpoolfile associated with this database. + */ + memset(&db, 0, sizeof(db)); + db.env = env; + db.type = (DBTYPE)msgfp->type; + db.pgsize = msgfp->pgsize; + mpf = db_rep->file_mpf; + if ((ret = __memp_get_pgcookie(mpf, &pgcookie)) != 0) + goto err; + pginfo = (DB_PGINFO *)pgcookie.data; + db.flags = pginfo->flags; + if ((ret = __db_pageswap(env, + &db, msgfp->info.data, msgfp->pgsize, NULL, 1)) != 0) + goto err; + } + + memcpy(dst, msgfp->info.data, msgfp->pgsize); +#ifdef HAVE_QUEUE + if (msgfp->type == (u_int32_t)DB_QUEUE && msgfp->pgno != 0) + ret = __qam_fput(db_rep->queue_dbc, + msgfp->pgno, dst, db_rep->queue_dbc->priority); + else +#endif + ret = __memp_fput(db_rep->file_mpf, + ip, dst, db_rep->file_dbp->priority); + +err: return (ret); +} + +/* + * __rep_page_gap - + * After we've put the page into the database, we need to check if + * we have a page gap and whether we need to request pages. + */ +static int +__rep_page_gap(env, rep, msgfp, type) + ENV *env; + REP *rep; + __rep_fileinfo_args *msgfp; + u_int32_t type; +{ + DBC *dbc; + DBT data, key; + DB_LOG *dblp; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + LOG *lp; + REGINFO *infop; + __rep_fileinfo_args *rfp; + db_recno_t recno; + int ret, t_ret; + + db_rep = env->rep_handle; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + infop = env->reginfo; + ret = 0; + dbc = NULL; + + /* + * We've successfully put this page into our file. + * Now we need to account for it and re-request new pages + * if necessary. + */ + /* + * We already hold both the db mutex and rep mutex. + */ + GET_CURINFO(rep, infop, rfp); + + /* + * Make sure we're still talking about the same file. + * If not, we're done here. + */ + if (rfp->filenum != msgfp->filenum) { + ret = DB_REP_PAGEDONE; + goto err; + } + + /* + * We have 3 possible states: + * 1. We receive a page we already have accounted for. + * msg pgno < ready pgno + * 2. We receive a page that is beyond a gap. + * msg pgno > ready pgno + * 3. We receive the page we're expecting next. + * msg pgno == ready pgno + */ + /* + * State 1. This can happen once we put our page record into the + * database, but by the time we acquire the mutex other + * threads have already accounted for this page and moved on. + * We just want to return. + */ + if (msgfp->pgno < rep->ready_pg) { + VPRINT(env, (env, DB_VERB_REP_SYNC, + "PAGE_GAP: pgno %lu < ready %lu, waiting %lu", + (u_long)msgfp->pgno, (u_long)rep->ready_pg, + (u_long)rep->waiting_pg)); + goto err; + } + + /* + * State 2. This page is beyond the page we're expecting. + * We need to update waiting_pg if this page is less than + * (earlier) the current waiting_pg. There is nothing + * to do but see if we need to request. + */ + VPRINT(env, (env, DB_VERB_REP_SYNC, + "PAGE_GAP: pgno %lu, max_pg %lu ready %lu, waiting %lu max_wait %lu", + (u_long)msgfp->pgno, (u_long)rfp->max_pgno, (u_long)rep->ready_pg, + (u_long)rep->waiting_pg, (u_long)rep->max_wait_pg)); + if (msgfp->pgno > rep->ready_pg) { + /* + * We receive a page larger than the one we're expecting. + */ + __os_gettime(env, &rep->last_pg_ts, 1); + if (rep->waiting_pg == PGNO_INVALID || + msgfp->pgno < rep->waiting_pg) + rep->waiting_pg = msgfp->pgno; + } else { + /* + * We received the page we're expecting. + */ + rep->ready_pg++; + __os_gettime(env, &lp->rcvd_ts, 1); + if (rep->ready_pg == rep->waiting_pg) { + /* + * If we get here we know we just filled a gap. + * Move the cursor to that place and then walk + * forward looking for the next gap, if it exists. + * Similar to log gaps, if we fill a gap we want to + * request the next gap right away if it has been + * a while since we last received a later page. + */ + lp->rcvd_ts = rep->last_pg_ts; + lp->wait_ts = rep->request_gap; + rep->max_wait_pg = PGNO_INVALID; + /* + * We need to walk the recno database looking for the + * next page we need or expect. + */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + ENV_GET_THREAD_INFO(env, ip); + if ((ret = __db_cursor(db_rep->file_dbp, ip, NULL, + &dbc, 0)) != 0) + goto err; + /* + * Set cursor to the first waiting page. + * Page numbers/record numbers are offset by 1. + */ + recno = (db_recno_t)rep->waiting_pg + 1; + key.data = &recno; + key.ulen = key.size = sizeof(db_recno_t); + key.flags = DB_DBT_USERMEM; + /* + * We know that page is there, this should + * find the record. + */ + ret = __dbc_get(dbc, &key, &data, DB_SET); + if (ret != 0) + goto err; + VPRINT(env, (env, DB_VERB_REP_SYNC, + "PAGE_GAP: Set cursor for ready %lu, waiting %lu", + (u_long)rep->ready_pg, (u_long)rep->waiting_pg)); + } + while (ret == 0 && rep->ready_pg == rep->waiting_pg) { + rep->ready_pg++; + ret = __dbc_get(dbc, &key, &data, DB_NEXT); + /* + * If we get to the end of the list, there are no + * more gaps. Reset waiting_pg. + */ + if (ret == DB_NOTFOUND || ret == DB_KEYEMPTY) { + rep->waiting_pg = PGNO_INVALID; + VPRINT(env, (env, DB_VERB_REP_SYNC, + "PAGE_GAP: Next cursor No next - ready %lu, waiting %lu", + (u_long)rep->ready_pg, + (u_long)rep->waiting_pg)); + break; + } + /* + * Subtract 1 from waiting_pg because record numbers + * are 1-based and pages are 0-based and we added 1 + * into the page number when we put it into the db. + */ + rep->waiting_pg = *(db_pgno_t *)key.data; + rep->waiting_pg--; + VPRINT(env, (env, DB_VERB_REP_SYNC, + "PAGE_GAP: Next cursor ready %lu, waiting %lu", + (u_long)rep->ready_pg, (u_long)rep->waiting_pg)); + } + } + + /* + * If we filled a gap and now have the entire file, there's + * nothing to do. We're done when ready_pg is > max_pgno + * because ready_pg is larger than the last page we received. + */ + if (rep->ready_pg > rfp->max_pgno) + goto err; + + /* + * Check if we need to ask for more pages. + */ + if ((rep->waiting_pg != PGNO_INVALID && + rep->ready_pg != rep->waiting_pg) || type == REP_PAGE_MORE) { + /* + * We got a page but we may still be waiting for more. + * If we got REP_PAGE_MORE we always want to ask for more. + * We need to set rfp->pgno to the current page number + * we will use to ask for more pages. + */ + if (type == REP_PAGE_MORE) + rfp->pgno = msgfp->pgno; + if ((__rep_check_doreq(env, rep) || type == REP_PAGE_MORE) && + ((ret = __rep_pggap_req(env, rep, rfp, + (type == REP_PAGE_MORE) ? REP_GAP_FORCE : 0)) != 0)) + goto err; + } else { + lp->wait_ts = rep->request_gap; + rep->max_wait_pg = PGNO_INVALID; + } + +err: + if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __rep_init_cleanup - + * Clean up internal initialization pieces. + * + * !!! + * Caller must hold client database mutex (mtx_clientdb) and REP_SYSTEM_LOCK. + * + * PUBLIC: int __rep_init_cleanup __P((ENV *, REP *, int)); + */ +int +__rep_init_cleanup(env, rep, force) + ENV *env; + REP *rep; + int force; +{ + DB *queue_dbp; + DB_REP *db_rep; + REGENV *renv; + REGINFO *infop; + int ret, t_ret; + + db_rep = env->rep_handle; + infop = env->reginfo; + renv = infop->primary; + ret = 0; + /* + * 1. Close up the file data pointer we used. + * 2. Close/reset the page database. + * 3. Close/reset the queue database if we're forcing a cleanup. + * 4. Free current file info. + * 5. If we have all files or need to force, free original file info. + */ + if (db_rep->file_mpf != NULL) { + ret = __memp_fclose(db_rep->file_mpf, 0); + db_rep->file_mpf = NULL; + } + if (db_rep->file_dbp != NULL) { + t_ret = __db_close(db_rep->file_dbp, NULL, DB_NOSYNC); + db_rep->file_dbp = NULL; + if (ret == 0) + ret = t_ret; + } + if (force && db_rep->queue_dbc != NULL) { + queue_dbp = db_rep->queue_dbc->dbp; + if ((t_ret = __dbc_close(db_rep->queue_dbc)) != 0 && ret == 0) + ret = t_ret; + db_rep->queue_dbc = NULL; + if ((t_ret = __db_close(queue_dbp, NULL, DB_NOSYNC)) != 0 && + ret == 0) + ret = t_ret; + } + if (rep->curinfo_off != INVALID_ROFF) { + MUTEX_LOCK(env, renv->mtx_regenv); + __env_alloc_free(infop, R_ADDR(infop, rep->curinfo_off)); + MUTEX_UNLOCK(env, renv->mtx_regenv); + rep->curinfo_off = INVALID_ROFF; + } + if (IN_INTERNAL_INIT(rep) && force) { + RPRINT(env, (env, DB_VERB_REP_SYNC, + "clean up interrupted internal init")); + t_ret = F_ISSET(rep, REP_F_ABBREVIATED) ? + __rep_walk_filelist(env, rep->infoversion, + R_ADDR(infop, rep->originfo_off), rep->originfolen, + rep->nfiles, __rep_cleanup_nimdbs, NULL) : + __rep_clean_interrupted(env); + if (ret == 0) + ret = t_ret; + + if (rep->originfo_off != INVALID_ROFF) { + MUTEX_LOCK(env, renv->mtx_regenv); + __env_alloc_free(infop, + R_ADDR(infop, rep->originfo_off)); + MUTEX_UNLOCK(env, renv->mtx_regenv); + rep->originfo_off = INVALID_ROFF; + } + } + + return (ret); +} + +/* + * Remove NIMDBs that may have been fully or partially loaded during an + * abbreviated internal init, when the init gets interrupted. At this point, + * we know that any databases we have processed are listed in originfo. + */ +static int +__rep_cleanup_nimdbs(env, rfp, unused) + ENV *env; + __rep_fileinfo_args *rfp; + void *unused; +{ + DB *dbp; + char *namep; + int ret, t_ret; + + COMPQUIET(unused, NULL); + + ret = 0; + dbp = NULL; + + if (FLD_ISSET(rfp->db_flags, DB_AM_INMEM)) { + namep = rfp->info.data; + + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + goto out; + MAKE_INMEM(dbp); + F_SET(dbp, DB_AM_RECOVER); /* Skirt locking. */ + + /* + * Some of these "files" (actually NIMDBs) may not exist + * yet, simply because the interrupted abbreviated + * internal init had not yet progressed far enough to + * retrieve them. So ENOENT is an acceptable outcome. + */ + if ((ret = __db_inmem_remove(dbp, NULL, namep)) == ENOENT) + ret = 0; + if ((t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + } + +out: + return (ret); +} + +/* + * Clean up files involved in an interrupted internal init. + */ +static int +__rep_clean_interrupted(env) + ENV *env; +{ + REP *rep; + DB_LOG *dblp; + LOG *lp; + REGINFO *infop; + int ret, t_ret; + + rep = env->rep_handle->region; + infop = env->reginfo; + + /* + * 1. logs + * a) remove old log files + * b) set up initial log file #1 + * 2. database files + * 3. the "init file" + * + * Steps 1 and 2 can be attempted independently. Step 1b is + * dependent on successful completion of 1a. + */ + + /* Step 1a. */ + if ((ret = __rep_remove_logs(env)) == 0) { + /* + * Since we have no logs, recover by making it look like + * the case when a new client first starts up, namely we + * have nothing but a fresh log file #1. This is a + * little wasteful, since we may soon remove this log + * file again. But it's insignificant in the context of + * interrupted internal init. + */ + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + /* Step 1b. */ + ret = __rep_log_setup(env, + rep, 1, DB_LOGVERSION, &lp->ready_lsn); + } + + /* Step 2. */ + if ((t_ret = __rep_walk_filelist(env, rep->infoversion, + R_ADDR(infop, rep->originfo_off), rep->originfolen, + rep->nfiles, __rep_remove_by_list, NULL)) != 0 && ret == 0) + ret = t_ret; + + /* + * Step 3 must not be done if anything fails along the way, because the + * init file's raison d'etre is to show that some files remain to be + * cleaned up. + */ + if (ret == 0) + ret = __rep_remove_init_file(env); + + return (ret); +} + +/* + * __rep_filedone - + * We need to check if we're done with the current file after + * processing the current page. Stat the database to see if + * we have all the pages. If so, we need to clean up/close + * this one, set up for the next one, and ask for its pages, + * or if this is the last file, request the log records and + * move to the REP_RECOVER_LOG state. + */ +static int +__rep_filedone(env, ip, eid, rep, msgfp, type) + ENV *env; + DB_THREAD_INFO *ip; + int eid; + REP *rep; + __rep_fileinfo_args *msgfp; + u_int32_t type; +{ + REGINFO *infop; + __rep_fileinfo_args *rfp; + int ret; + + /* + * We've put our page, now we need to do any gap processing + * that might be needed to re-request pages. + */ + ret = __rep_page_gap(env, rep, msgfp, type); + /* + * The world changed while we were doing gap processing. + * We're done here. + */ + if (ret == DB_REP_PAGEDONE) + return (0); + + infop = env->reginfo; + GET_CURINFO(rep, infop, rfp); + /* + * max_pgno is 0-based and npages is 1-based, so we don't have + * all the pages until npages is > max_pgno. + */ + VPRINT(env, (env, DB_VERB_REP_SYNC, + "FILEDONE: have %lu pages. Need %lu.", + (u_long)rep->npages, (u_long)rfp->max_pgno + 1)); + if (rep->npages <= rfp->max_pgno) + return (0); + + /* + * If we're queue and we think we have all the pages for this file, + * we need to do special queue processing. Queue is handled in + * several stages. + */ + if (rfp->type == (u_int32_t)DB_QUEUE && + ((ret = __rep_queue_filedone(env, ip, rep, rfp)) != + DB_REP_PAGEDONE)) + return (ret); + /* + * We have all the pages for this file. Clean up. + */ + if ((ret = __rep_init_cleanup(env, rep, 0)) != 0) + goto err; + + rep->curfile++; + ret = __rep_nextfile(env, eid, rep); +err: + return (ret); +} + +/* + * Starts requesting pages for the next file in the list (if any), or if not, + * proceeds to the next stage: requesting logs. + * + * !!! + * Must be called with both clientdb_mutex and REP_SYSTEM, though we may drop + * REP_SYSTEM_LOCK momentarily in order to send a LOG_REQ (but not a PAGE_REQ). + */ +static int +__rep_nextfile(env, eid, rep) + ENV *env; + int eid; + REP *rep; +{ + DBT dbt; + __rep_logreq_args lr_args; + DB_LOG *dblp; + LOG *lp; + REGENV *renv; + REGINFO *infop; + __rep_fileinfo_args *curinfo, *rfp; + int *curbuf, ret; + u_int8_t *buf, *info_ptr, lrbuf[__REP_LOGREQ_SIZE], *nextinfo; + size_t len, msgsz; + + infop = env->reginfo; + renv = infop->primary; + rfp = NULL; + + /* + * Always direct the next request to the master (at least nominally), + * regardless of where the current response came from. The application + * can always still redirect it to another client. + */ + if (rep->master_id != DB_EID_INVALID) + eid = rep->master_id; + + while (rep->curfile < rep->nfiles) { + /* Set curinfo to next file and examine it. */ + info_ptr = R_ADDR(infop, + rep->originfo_off + (rep->originfolen - rep->infolen)); + if ((ret = __rep_fileinfo_unmarshal(env, + rep->infoversion, &rfp, info_ptr, + rep->infolen, &nextinfo)) != 0) { + RPRINT(env, (env, DB_VERB_REP_SYNC, + "NEXTINFO: Fileinfo read: %s", db_strerror(ret))); + return (ret); + } + rep->infolen -= (u_int32_t)(nextinfo - info_ptr); + MUTEX_LOCK(env, renv->mtx_regenv); + ret = __env_alloc(infop, sizeof(__rep_fileinfo_args) + + rfp->uid.size + rfp->info.size, &curbuf); + MUTEX_UNLOCK(env, renv->mtx_regenv); + if (ret != 0) { + __os_free(env, rfp); + return (ret); + } + else + rep->curinfo_off = R_OFFSET(infop, curbuf); + /* Copy fileinfo basic structure into curinfo. */ + memcpy(R_ADDR(infop, rep->curinfo_off), + (u_int8_t*)rfp, sizeof(__rep_fileinfo_args)); + /* Set up curinfo pointers to uid and info DBT data. */ + GET_CURINFO(rep, infop, curinfo); + /* Copy uid and info DBT data from originfo buffer. */ + if (rfp->uid.size > 0) + memcpy(curinfo->uid.data, + rfp->uid.data, rfp->uid.size); + if (rfp->info.size > 0) + memcpy(curinfo->info.data, + rfp->info.data, rfp->info.size); + __os_free(env, rfp); + + /* Skip over regular DB's in "abbreviated" internal inits. */ + if (F_ISSET(rep, REP_F_ABBREVIATED) && + !FLD_ISSET(curinfo->db_flags, DB_AM_INMEM)) { + VPRINT(env, (env, DB_VERB_REP_SYNC, + "Skipping file %d in abbreviated internal init", + curinfo->filenum)); + MUTEX_LOCK(env, renv->mtx_regenv); + __env_alloc_free(infop, + R_ADDR(infop, rep->curinfo_off)); + MUTEX_UNLOCK(env, renv->mtx_regenv); + rep->curinfo_off = INVALID_ROFF; + rep->curfile++; + continue; + } + + /* Request this file's pages. */ + DB_ASSERT(env, curinfo->pgno == 0); + rep->ready_pg = 0; + rep->npages = 0; + rep->waiting_pg = PGNO_INVALID; + rep->max_wait_pg = PGNO_INVALID; + memset(&dbt, 0, sizeof(dbt)); + RPRINT(env, (env, DB_VERB_REP_SYNC, + "Next file %d: pgsize %lu, maxpg %lu", + curinfo->filenum, (u_long)curinfo->pgsize, + (u_long)curinfo->max_pgno)); + msgsz = __REP_FILEINFO_SIZE + + curinfo->uid.size + curinfo->info.size; + if ((ret = __os_calloc(env, 1, msgsz, &buf)) != 0) + return (ret); + if ((ret = __rep_fileinfo_marshal(env, rep->infoversion, + curinfo, buf, msgsz, &len)) != 0) { + __os_free(env, buf); + return (ret); + } + DB_INIT_DBT(dbt, buf, len); + (void)__rep_send_message(env, eid, REP_PAGE_REQ, + NULL, &dbt, 0, DB_REP_ANYWHERE); + __os_free(env, buf); + + return (0); + } + + RPRINT(env, (env, DB_VERB_REP_SYNC, + "NEXTFILE: have %d files. RECOVER_LOG now", rep->nfiles)); + /* + * Move to REP_RECOVER_LOG state. + * Request logs. + */ + /* + * We need to do a sync here so that any later opens + * can find the file and file id. We need to do it + * before we clear SYNC_PAGE so that we do not + * try to flush the log. + */ + if ((ret = __memp_sync_int(env, NULL, 0, + DB_SYNC_CACHE | DB_SYNC_INTERRUPT_OK, NULL, NULL)) != 0) + return (ret); + rep->sync_state = SYNC_LOG; + memset(&dbt, 0, sizeof(dbt)); + lr_args.endlsn = rep->last_lsn; + if ((ret = __rep_logreq_marshal(env, &lr_args, lrbuf, + __REP_LOGREQ_SIZE, &len)) != 0) + return (ret); + DB_INIT_DBT(dbt, lrbuf, len); + + /* + * Get the logging subsystem ready to receive the first log record we + * are going to ask for. In the case of a normal internal init, this is + * pretty simple, since we only deal in whole log files. In the + * ABBREVIATED case we've already taken care of this, back when we + * processed the UPDATE message, because we had to do it by rolling back + * to a sync point at an arbitrary LSN. + */ + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + /* + * Update ready_lsn so that future rerequests and VERIFY_FAILs know + * where to start. + */ + if (!F_ISSET(rep, REP_F_ABBREVIATED) && + (ret = __rep_log_setup(env, rep, + rep->first_lsn.file, rep->first_vers, &lp->ready_lsn)) != 0) + return (ret); + RPRINT(env, (env, DB_VERB_REP_SYNC, + "NEXTFILE: LOG_REQ from LSN [%lu][%lu] to [%lu][%lu]", + (u_long)rep->first_lsn.file, (u_long)rep->first_lsn.offset, + (u_long)rep->last_lsn.file, (u_long)rep->last_lsn.offset)); + REP_SYSTEM_UNLOCK(env); + __os_gettime(env, &lp->rcvd_ts, 1); + lp->wait_ts = rep->request_gap; + (void)__rep_send_message(env, eid, + REP_LOG_REQ, &rep->first_lsn, &dbt, REPCTL_INIT, DB_REP_ANYWHERE); + REP_SYSTEM_LOCK(env); + return (0); +} + +/* + * Run a recovery, for the purpose of rolling back the client environment to a + * specific sync point, in preparation for doing an abbreviated internal init + * (materializing only NIMDBs, when we already have the on-disk DBs). + * + * REP_SYSTEM_LOCK should be held on entry, and will be held on exit, but we + * drop it momentarily during the call. + */ +static int +__rep_rollback(env, lsnp) + ENV *env; + DB_LSN *lsnp; +{ + DB_LOG *dblp; + DB_REP *db_rep; + LOG *lp; + REP *rep; + DB_THREAD_INFO *ip; + DB_LSN trunclsn; + int ret; + u_int32_t unused; + + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + ENV_GET_THREAD_INFO(env, ip); + + DB_ASSERT(env, FLD_ISSET(rep->lockout_flags, + REP_LOCKOUT_API | REP_LOCKOUT_MSG | REP_LOCKOUT_OP)); + + REP_SYSTEM_UNLOCK(env); + + if ((ret = __rep_dorecovery(env, lsnp, &trunclsn)) != 0) + goto errlock; + + MUTEX_LOCK(env, rep->mtx_clientdb); + lp->ready_lsn = trunclsn; + ZERO_LSN(lp->waiting_lsn); + ZERO_LSN(lp->max_wait_lsn); + lp->max_perm_lsn = *lsnp; + lp->wait_ts = rep->request_gap; + __os_gettime(env, &lp->rcvd_ts, 1); + ZERO_LSN(lp->verify_lsn); + + if (db_rep->rep_db == NULL && + (ret = __rep_client_dbinit(env, 0, REP_DB)) != 0) { + MUTEX_UNLOCK(env, rep->mtx_clientdb); + goto errlock; + } + + F_SET(db_rep->rep_db, DB_AM_RECOVER); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + ret = __db_truncate(db_rep->rep_db, ip, NULL, &unused); + MUTEX_LOCK(env, rep->mtx_clientdb); + F_CLR(db_rep->rep_db, DB_AM_RECOVER); + STAT_SET(env, rep, log_queued, rep->stat.st_log_queued, 0, lsnp); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + +errlock: + REP_SYSTEM_LOCK(env); + + return (ret); +} + +/* + * __rep_mpf_open - + * Create and open the mpool file for a database. + * Used by both master and client to bring files into mpool. + */ +static int +__rep_mpf_open(env, mpfp, rfp, flags) + ENV *env; + DB_MPOOLFILE **mpfp; + __rep_fileinfo_args *rfp; + u_int32_t flags; +{ + DB db; + int ret; + + if ((ret = __memp_fcreate(env, mpfp)) != 0) + return (ret); + + /* + * We need a dbp to pass into to __env_mpool. Set up + * only the parts that it needs. + */ + memset(&db, 0, sizeof(db)); + db.env = env; + db.type = (DBTYPE)rfp->type; + db.pgsize = rfp->pgsize; + memcpy(db.fileid, rfp->uid.data, DB_FILE_ID_LEN); + db.flags = rfp->db_flags; + /* We need to make sure the dbp isn't marked open. */ + F_CLR(&db, DB_AM_OPEN_CALLED); + /* + * The byte order of this database may be different from my local native + * byte order. If so, set the swap bit so that the necessary swapping + * will be done during file I/O. + */ + if ((F_ISSET(env, ENV_LITTLEENDIAN) && + !FLD_ISSET(rfp->finfo_flags, REPINFO_DB_LITTLEENDIAN)) || + (!F_ISSET(env, ENV_LITTLEENDIAN) && + FLD_ISSET(rfp->finfo_flags, REPINFO_DB_LITTLEENDIAN))) { + RPRINT(env, (env, DB_VERB_REP_SYNC, + "rep_mpf_open: Different endian database. Set swap bit.")); + F_SET(&db, DB_AM_SWAP); + } else + F_CLR(&db, DB_AM_SWAP); + + db.mpf = *mpfp; + if (F_ISSET(&db, DB_AM_INMEM)) + (void)__memp_set_flags(db.mpf, DB_MPOOL_NOFILE, 1); + if ((ret = __env_mpool(&db, rfp->info.data, flags)) != 0) { + (void)__memp_fclose(db.mpf, 0); + *mpfp = NULL; + } + return (ret); +} + +/* + * __rep_pggap_req - + * Request a page gap. Assumes the caller holds the rep_mutex. + * + * PUBLIC: int __rep_pggap_req __P((ENV *, REP *, __rep_fileinfo_args *, + * PUBLIC: u_int32_t)); + */ +int +__rep_pggap_req(env, rep, reqfp, gapflags) + ENV *env; + REP *rep; + __rep_fileinfo_args *reqfp; + u_int32_t gapflags; +{ + DBT max_pg_dbt; + REGINFO *infop; + __rep_fileinfo_args *curinfo, *tmpfp, t; + size_t len, msgsz; + u_int32_t flags; + int alloc, master, ret; + u_int8_t *buf; + + infop = env->reginfo; + ret = 0; + alloc = 0; + /* + * There is a window where we have to set REP_RECOVER_PAGE when + * we receive the update information to transition from getting + * file information to getting page information. However, that + * thread does release and then reacquire mutexes. So, we might + * try re-requesting before the original thread can get curinfo + * setup. If curinfo isn't set up there is nothing to do. + */ + if (rep->curinfo_off == INVALID_ROFF) + return (0); + GET_CURINFO(rep, infop, curinfo); + if (reqfp == NULL) { + if ((ret = __rep_finfo_alloc(env, curinfo, &tmpfp)) != 0) + return (ret); + alloc = 1; + } else { + t = *reqfp; + tmpfp = &t; + } + + /* + * If we've never requested this page, then + * request everything between it and the first + * page we have. If we have requested this page + * then only request this record, not the entire gap. + */ + flags = 0; + memset(&max_pg_dbt, 0, sizeof(max_pg_dbt)); + /* + * If this is a PAGE_MORE and we're forcing then we want to + * force the request to ask for the next page after this one. + */ + if (FLD_ISSET(gapflags, REP_GAP_FORCE)) + tmpfp->pgno++; + else + tmpfp->pgno = rep->ready_pg; + msgsz = __REP_FILEINFO_SIZE + + tmpfp->uid.size + tmpfp->info.size; + if ((ret = __os_calloc(env, 1, msgsz, &buf)) != 0) + goto err; + if (rep->max_wait_pg == PGNO_INVALID || + FLD_ISSET(gapflags, REP_GAP_FORCE | REP_GAP_REREQUEST)) { + /* + * Request the gap - set max to waiting_pg - 1 or if + * there is no waiting_pg, just ask for one. + */ + if (rep->waiting_pg == PGNO_INVALID) { + if (FLD_ISSET(gapflags, + REP_GAP_FORCE | REP_GAP_REREQUEST)) + rep->max_wait_pg = curinfo->max_pgno; + else + rep->max_wait_pg = rep->ready_pg; + } else { + /* + * If we're forcing, and waiting_pg is less than + * the page we want to start this request at, then + * we set max_wait_pg to the max pgno in the file. + */ + if (FLD_ISSET(gapflags, REP_GAP_FORCE) && + rep->waiting_pg < tmpfp->pgno) + rep->max_wait_pg = curinfo->max_pgno; + else + rep->max_wait_pg = rep->waiting_pg - 1; + } + tmpfp->max_pgno = rep->max_wait_pg; + /* + * Gap requests are "new" and can go anywhere. + */ + if (FLD_ISSET(gapflags, REP_GAP_REREQUEST)) + flags = DB_REP_REREQUEST; + else + flags = DB_REP_ANYWHERE; + } else { + /* + * Request 1 page - set max to ready_pg. + */ + rep->max_wait_pg = rep->ready_pg; + tmpfp->max_pgno = rep->ready_pg; + /* + * If we're dropping to singletons, this is a rerequest. + */ + flags = DB_REP_REREQUEST; + } + if ((master = rep->master_id) != DB_EID_INVALID) { + + STAT_INC(env, + rep, pg_request, rep->stat.st_pg_requested, master); + /* + * We need to request the pages, but we need to get the + * new info into rep->finfo. Assert that the sizes never + * change. The only thing this should do is change + * the pgno field. Everything else remains the same. + */ + if ((ret = __rep_fileinfo_marshal(env, rep->infoversion, + tmpfp, buf, msgsz, &len)) == 0) { + DB_INIT_DBT(max_pg_dbt, buf, len); + DB_ASSERT(env, len == max_pg_dbt.size); + (void)__rep_send_message(env, master, + REP_PAGE_REQ, NULL, &max_pg_dbt, 0, flags); + } + } else + (void)__rep_send_message(env, DB_EID_BROADCAST, + REP_MASTER_REQ, NULL, NULL, 0, 0); + + __os_free(env, buf); +err: + if (alloc) + __os_free(env, tmpfp); + return (ret); +} + +/* + * __rep_finfo_alloc - + * Allocate and initialize a fileinfo structure. + * + * PUBLIC: int __rep_finfo_alloc __P((ENV *, __rep_fileinfo_args *, + * PUBLIC: __rep_fileinfo_args **)); + */ +int +__rep_finfo_alloc(env, rfpsrc, rfpp) + ENV *env; + __rep_fileinfo_args *rfpsrc, **rfpp; +{ + __rep_fileinfo_args *rfp; + size_t size; + int ret; + void *uidp, *infop; + + /* + * Allocate enough for the structure and the two DBT data areas. + */ + size = sizeof(__rep_fileinfo_args) + rfpsrc->uid.size + + rfpsrc->info.size; + if ((ret = __os_malloc(env, size, &rfp)) != 0) + return (ret); + + /* + * Copy the structure itself, and then set the DBT data pointers + * to their space and copy the data itself as well. + */ + memcpy(rfp, rfpsrc, sizeof(__rep_fileinfo_args)); + uidp = (u_int8_t *)rfp + sizeof(__rep_fileinfo_args); + rfp->uid.data = uidp; + memcpy(uidp, rfpsrc->uid.data, rfpsrc->uid.size); + + infop = (u_int8_t *)uidp + rfpsrc->uid.size; + rfp->info.data = infop; + memcpy(infop, rfpsrc->info.data, rfpsrc->info.size); + *rfpp = rfp; + return (ret); +} + +/* + * __rep_log_setup - + * We know our first LSN and need to reset the log subsystem + * to get our logs set up for the proper file. + */ +static int +__rep_log_setup(env, rep, file, version, lsnp) + ENV *env; + REP *rep; + u_int32_t file; + u_int32_t version; + DB_LSN *lsnp; +{ + DB_LOG *dblp; + DB_LSN lsn; + DB_TXNMGR *mgr; + DB_TXNREGION *region; + LOG *lp; + int ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + mgr = env->tx_handle; + region = mgr->reginfo.primary; + + /* + * Set up the log starting at the file number of the first LSN we + * need to get from the master. + */ + LOG_SYSTEM_LOCK(env); + if ((ret = __log_newfile(dblp, &lsn, file, version)) == 0 && + lsnp != NULL) + *lsnp = lsn; + LOG_SYSTEM_UNLOCK(env); + + /* + * We reset first_lsn to the lp->lsn. We were given the LSN of + * the checkpoint and we now need the LSN for the beginning of + * the file, which __log_newfile conveniently set up for us + * in lp->lsn. + */ + rep->first_lsn = lp->lsn; + TXN_SYSTEM_LOCK(env); + ZERO_LSN(region->last_ckp); + TXN_SYSTEM_UNLOCK(env); + return (ret); +} + +/* + * __rep_queue_filedone - + * Determine if we're really done getting the pages for a queue file. + * Queue is handled in several steps. + * 1. First we get the meta page only. + * 2. We use the meta-page information to figure out first and last + * page numbers (and if queue wraps, first can be > last. + * 3. If first < last, we do a REP_PAGE_REQ for all pages. + * 4. If first > last, we REP_PAGE_REQ from first -> max page number. + * Then we'll ask for page 1 -> last. + * + * This function can return several things: + * DB_REP_PAGEDONE - if we're done with this file. + * 0 - if we're not done with this file. + * error - if we get an error doing some operations. + * + * This function will open a dbp handle to the queue file. This is needed + * by most of the QAM macros. We'll open it on the first pass through + * here and we'll close it whenever we decide we're done. + */ +static int +__rep_queue_filedone(env, ip, rep, rfp) + ENV *env; + DB_THREAD_INFO *ip; + REP *rep; + __rep_fileinfo_args *rfp; +{ +#ifndef HAVE_QUEUE + COMPQUIET(ip, NULL); + COMPQUIET(rep, NULL); + COMPQUIET(rfp, NULL); + return (__db_no_queue_am(env)); +#else + DB *queue_dbp; + DB_REP *db_rep; + db_pgno_t first, last; + u_int32_t flags; + int empty, ret, t_ret; + + db_rep = env->rep_handle; + ret = 0; + queue_dbp = NULL; + if (db_rep->queue_dbc == NULL) { + /* + * We need to do a sync here so that the open + * can find the file and file id. + */ + if ((ret = __memp_sync_int(env, NULL, 0, + DB_SYNC_CACHE | DB_SYNC_INTERRUPT_OK, NULL, NULL)) != 0) + goto out; + if ((ret = + __db_create_internal(&queue_dbp, env, 0)) != 0) + goto out; + flags = DB_NO_AUTO_COMMIT | + (F_ISSET(env, ENV_THREAD) ? DB_THREAD : 0); + /* + * We need to check whether this is in-memory so that we pass + * the name correctly as either the file or the database name. + */ + if ((ret = __db_open(queue_dbp, ip, NULL, + FLD_ISSET(rfp->db_flags, DB_AM_INMEM) ? NULL : + rfp->info.data, + FLD_ISSET(rfp->db_flags, DB_AM_INMEM) ? rfp->info.data : + NULL, + DB_QUEUE, flags, 0, PGNO_BASE_MD)) != 0) + goto out; + + if ((ret = __db_cursor(queue_dbp, + ip, NULL, &db_rep->queue_dbc, 0)) != 0) + goto out; + } else + queue_dbp = db_rep->queue_dbc->dbp; + + if ((ret = __queue_pageinfo(queue_dbp, + &first, &last, &empty, 0, 0)) != 0) + goto out; + RPRINT(env, (env, DB_VERB_REP_SYNC, + "Queue fileinfo: first %lu, last %lu, empty %d", + (u_long)first, (u_long)last, empty)); + /* + * We can be at the end of 3 possible states. + * 1. We have received the meta-page and now need to get the + * rest of the pages in the database. + * 2. We have received from first -> max_pgno. We might be done, + * or we might need to ask for wrapped pages. + * 3. We have received all pages in the file. We're done. + */ + if (rfp->max_pgno == 0) { + /* + * We have just received the meta page. Set up the next + * pages to ask for and check if the file is empty. + */ + if (empty) + goto out; + if (first > last) { + rfp->max_pgno = + QAM_RECNO_PAGE(db_rep->queue_dbc->dbp, UINT32_MAX); + } else + rfp->max_pgno = last; + RPRINT(env, (env, DB_VERB_REP_SYNC, + "Queue fileinfo: First req: first %lu, last %lu", + (u_long)first, (u_long)rfp->max_pgno)); + goto req; + } else if (rfp->max_pgno != last) { + /* + * If max_pgno != last that means we're dealing with a + * wrapped situation. Request next batch of pages. + * Set npages to 1 because we already have page 0, the + * meta-page, now we need pages 1-max_pgno. + */ + first = 1; + rfp->max_pgno = last; + RPRINT(env, (env, DB_VERB_REP_SYNC, + "Queue fileinfo: Wrap req: first %lu, last %lu", + (u_long)first, (u_long)last)); +req: + /* + * Since we're simulating a "gap" to resend new PAGE_REQ + * for this file, we need to set waiting page to last + 1 + * so that we'll ask for all from ready_pg -> last. + */ + rep->npages = first; + rep->ready_pg = first; + rep->waiting_pg = rfp->max_pgno + 1; + rep->max_wait_pg = PGNO_INVALID; + ret = __rep_pggap_req(env, rep, rfp, 0); + return (ret); + } + /* + * max_pgno == last + * If we get here, we have all the pages we need. + * Close the dbp and return. + */ +out: + if (db_rep->queue_dbc != NULL && + (t_ret = __dbc_close(db_rep->queue_dbc)) != 0 && ret == 0) + ret = t_ret; + db_rep->queue_dbc = NULL; + + if (queue_dbp != NULL && + (t_ret = __db_close(queue_dbp, NULL, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + if (ret == 0) + ret = DB_REP_PAGEDONE; + return (ret); +#endif +} + +/* + * PUBLIC: int __rep_remove_init_file __P((ENV *)); + */ +int +__rep_remove_init_file(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + int ret; + char *name; + + db_rep = env->rep_handle; + rep = db_rep->region; + + /* + * If running in-memory replication, return without any file + * operations. + */ + if (FLD_ISSET(rep->config, REP_C_INMEM)) + return (0); + + /* Abbreviated internal init doesn't use an init file. */ + if (F_ISSET(rep, REP_F_ABBREVIATED)) + return (0); + + if ((ret = __db_appname(env, + DB_APP_NONE, REP_INITNAME, NULL, &name)) != 0) + return (ret); + (void)__os_unlink(env, name, 0); + __os_free(env, name); + return (0); +} + +/* + * Checks for the existence of the internal init flag file. If it exists, we + * remove all logs and databases, and then remove the flag file. This is + * intended to force the internal init to start over again, and thus affords + * protection against a client crashing during internal init. This function + * must be called before normal recovery in order to be properly effective. + * + * !!! + * This function should only be called during initial set-up of the environment, + * before various subsystems are initialized. It doesn't rely on the + * subsystems' code having been initialized, and it summarily deletes files "out + * from under" them, which might disturb the subsystems if they were up. + * + * PUBLIC: int __rep_reset_init __P((ENV *)); + */ +int +__rep_reset_init(env) + ENV *env; +{ + DB_FH *fhp; + __rep_update_args *rup; + DBT dbt; + char *allocated_dir, *dir, *init_name; + size_t cnt; + u_int32_t dbtvers, fvers, zero; + u_int8_t *next; + int ret, t_ret; + + allocated_dir = NULL; + rup = NULL; + dbt.data = NULL; + + if ((ret = __db_appname(env, + DB_APP_NONE, REP_INITNAME, NULL, &init_name)) != 0) + return (ret); + + if ((ret = __os_open( + env, init_name, 0, DB_OSO_RDONLY, DB_MODE_600, &fhp)) != 0) { + if (ret == ENOENT) + ret = 0; + goto out; + } + + RPRINT(env, (env, DB_VERB_REP_SYNC, + "Cleaning up interrupted internal init")); + + /* There are a few possibilities: + * 1. no init file, or less than 1 full file list + * 2. exactly one full file list + * 3. more than one, less then a second full file list + * 4. second file list in full + * + * In cases 2 or 4, we need to remove all logs, and then remove files + * according to the (most recent) file list. (In case 1 or 3, we don't + * have to do anything.) + * + * The __rep_get_file_list function takes care of folding these cases + * into two simple outcomes. + * + * As of 4.7, the first 4 bytes are 0. Read the first 4 bytes now. + * If they are non-zero it means we have an old-style init file. + * Otherwise, pass the file version in to rep_get_file_list. + */ + if ((ret = __os_read(env, fhp, &zero, sizeof(zero), &cnt)) != 0) + goto out; + /* + * If we read successfully, but not enough, then unlink the file. + */ + if (cnt != sizeof(zero)) + goto rm; + if (zero != 0) { + /* + * Old style file. We have to set fvers to the 4.6 + * version of the file and also rewind the file so + * that __rep_get_file_list can read out the length itself. + */ + if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) + goto out; + fvers = REP_INITVERSION_46; + } else if ((ret = __os_read(env, + fhp, &fvers, sizeof(fvers), &cnt)) != 0) + goto out; + else if (cnt != sizeof(fvers)) + goto rm; + ret = __rep_get_file_list(env, fhp, fvers, &dbtvers, &dbt); + if ((t_ret = __os_closehandle(env, fhp)) != 0 || ret != 0) { + if (ret == 0) + ret = t_ret; + goto out; + } + if (dbt.data == NULL) { + /* + * The init file did not end with an intact file list. Since we + * never start log/db removal without an intact file list + * sync'ed to the init file, this must mean we don't have any + * partial set of files to clean up. So all we need to do is + * remove the init file. + */ + goto rm; + } + + /* Remove all log files. */ + if (env->dbenv->db_log_dir == NULL) + dir = env->db_home; + else { + if ((ret = __db_appname(env, + DB_APP_NONE, env->dbenv->db_log_dir, NULL, &dir)) != 0) + goto out; + allocated_dir = dir; + } + + if ((ret = __rep_remove_by_prefix(env, + dir, LFPREFIX, sizeof(LFPREFIX)-1, DB_APP_LOG)) != 0) + goto out; + + /* + * Remove databases according to the list, and queue extent files by + * searching them out on a walk through the data_dir's. + */ + if ((ret = __rep_update_unmarshal(env, dbtvers, + &rup, dbt.data, dbt.size, &next)) != 0) + goto out; + if ((ret = __rep_unlink_by_list(env, dbtvers, + next, dbt.size, rup->num_files)) != 0) + goto out; + + /* Here, we've established that the file exists. */ +rm: (void)__os_unlink(env, init_name, 0); +out: if (rup != NULL) + __os_free(env, rup); + if (allocated_dir != NULL) + __os_free(env, allocated_dir); + if (dbt.data != NULL) + __os_free(env, dbt.data); + + __os_free(env, init_name); + return (ret); +} + +/* + * Reads the last fully intact file list from the init file. If the file ends + * with a partial list (or is empty), we're not interested in it. Lack of a + * full file list is indicated by a NULL dbt->data. On success, the list is + * returned in allocated space, which becomes the responsibility of the caller. + * + * The file format is a u_int32_t buffer length, in native format, followed by + * the file list itself, in the same format as in an UPDATE message (though + * many parts of it in this case are meaningless). + */ +static int +__rep_get_file_list(env, fhp, fvers, dbtvers, dbt) + ENV *env; + DB_FH *fhp; + u_int32_t fvers; + u_int32_t *dbtvers; + DBT *dbt; +{ +#ifdef HAVE_REPLICATION_THREADS + DBT mgrdbt; +#endif + u_int32_t length, mvers; + size_t cnt; + int i, ret; + + /* At most 2 file lists: old and new. */ + dbt->data = NULL; + mvers = DB_REPVERSION_46; + length = 0; +#ifdef HAVE_REPLICATION_THREADS + mgrdbt.data = NULL; +#endif + for (i = 1; i <= 2; i++) { + if (fvers >= REP_INITVERSION_47) { + if ((ret = __os_read(env, fhp, &mvers, + sizeof(mvers), &cnt)) != 0) + goto err; + if (cnt == 0 && dbt->data != NULL) + break; + if (cnt != sizeof(mvers)) + goto err; + } + if ((ret = __os_read(env, + fhp, &length, sizeof(length), &cnt)) != 0) + goto err; + + /* + * Reaching the end here is fine, if we've been through at least + * once already. + */ + if (cnt == 0 && dbt->data != NULL) + break; + if (cnt != sizeof(length)) + goto err; + + if ((ret = __os_realloc(env, + (size_t)length, &dbt->data)) != 0) + goto err; + + if ((ret = __os_read( + env, fhp, dbt->data, length, &cnt)) != 0 || + cnt != (size_t)length) + goto err; + } + +#ifdef HAVE_REPLICATION_THREADS + if (i == 3) { + if ((ret = __os_read(env, fhp, + &mgrdbt.size, sizeof(mgrdbt.size), &cnt)) != 0) + goto err; + if (cnt == 0) + goto absent; + if (cnt != sizeof(mgrdbt.size)) + goto err; + if ((ret = __os_malloc(env, + (size_t)mgrdbt.size, &mgrdbt.data)) != 0) + goto err; + if ((ret = __os_read(env, fhp, + mgrdbt.data, mgrdbt.size, &cnt)) != 0 && + cnt != (size_t)mgrdbt.size) + goto err; + /* Repmgr takes ownership of the allocated memory. */ + if ((ret = __repmgr_init_restore(env, &mgrdbt)) != 0) + goto err; + } +absent: +#endif + + *dbtvers = mvers; + dbt->size = length; + return (0); + +err: +#ifdef HAVE_REPLICATION_THREADS + if (mgrdbt.data != NULL) + __os_free(env, mgrdbt.data); +#endif + /* + * Note that it's OK to get here with a zero value in 'ret': it means we + * read less than we expected, and dbt->data == NULL indicates to the + * caller that we don't have an intact list. + */ + if (dbt->data != NULL) + __os_free(env, dbt->data); + dbt->data = NULL; + return (ret); +} + +/* + * Removes every file in a given directory that matches a given prefix. Notice + * how similar this is to __rep_walk_dir. + */ +static int +__rep_remove_by_prefix(env, dir, prefix, pref_len, appname) + ENV *env; + const char *dir; + const char *prefix; + size_t pref_len; + APPNAME appname; /* What kind of name. */ +{ + char *namep, **names; + int cnt, i, ret; + + if ((ret = __os_dirlist(env, dir, 0, &names, &cnt)) != 0) + return (ret); + for (i = 0; i < cnt; i++) { + if (strncmp(names[i], prefix, pref_len) == 0) { + if ((ret = __db_appname(env, + appname, names[i], NULL, &namep)) != 0) + goto out; + (void)__os_unlink(env, namep, 0); + __os_free(env, namep); + } + } +out: __os_dirfree(env, names, cnt); + return (ret); +} + +/* + * Removes database files according to the contents of a list. + * + * This function must support removal either during environment creation, or + * when an internal init is reset in the middle. This means it must work + * regardless of whether underlying subsystems are initialized. However, it may + * assume that databases are not open. That means there is no REP! + */ +static int +__rep_unlink_by_list(env, version, files, size, count) + ENV *env; + u_int32_t version; + u_int8_t *files; + u_int32_t size; + u_int32_t count; +{ + DB_ENV *dbenv; + char **ddir, *dir; + int ret; + + dbenv = env->dbenv; + + if ((ret = __rep_walk_filelist(env, version, + files, size, count, __rep_unlink_file, NULL)) != 0) + goto out; + + /* Notice how similar this code is to __rep_find_dbs. */ + if (dbenv->db_data_dir == NULL) + ret = __rep_remove_by_prefix(env, env->db_home, + QUEUE_EXTENT_PREFIX, sizeof(QUEUE_EXTENT_PREFIX) - 1, + DB_APP_DATA); + else { + for (ddir = dbenv->db_data_dir; *ddir != NULL; ++ddir) { + if ((ret = __db_appname(env, + DB_APP_NONE, *ddir, NULL, &dir)) != 0) + break; + ret = __rep_remove_by_prefix(env, dir, + QUEUE_EXTENT_PREFIX, sizeof(QUEUE_EXTENT_PREFIX)-1, + DB_APP_DATA); + __os_free(env, dir); + if (ret != 0) + break; + } + } + +out: + return (ret); +} + +static int +__rep_unlink_file(env, rfp, unused) + ENV *env; + __rep_fileinfo_args *rfp; + void *unused; +{ + char *namep; + int ret; + + COMPQUIET(unused, NULL); + + if ((ret = __db_appname(env, + DB_APP_DATA, rfp->info.data, NULL, &namep)) == 0) { + (void)__os_unlink(env, namep, 0); + __os_free(env, namep); + } + return (ret); +} + +static int +__rep_remove_by_list(env, rfp, unused) + ENV *env; + __rep_fileinfo_args *rfp; + void *unused; +{ + int ret; + + COMPQUIET(unused, NULL); + + if ((ret = __rep_remove_file(env, rfp, NULL)) == ENOENT) { + /* + * If the file already doesn't exist, that's perfectly + * OK. This can easily happen if we're cleaning up an + * interrupted internal init, and we only got part-way + * through the list of files. + */ + ret = 0; + } + return (ret); +} + +static int +__rep_walk_filelist(env, version, files, size, count, fn, arg) + ENV *env; + u_int32_t version; + u_int8_t *files; + u_int32_t size; + u_int32_t count; + FILE_WALK_FN *fn; + void *arg; +{ + __rep_fileinfo_args *rfp; + u_int8_t *next; + int ret; + + ret = 0; + rfp = NULL; + while (count-- > 0) { + if ((ret = __rep_fileinfo_unmarshal(env, version, + &rfp, files, size, &next)) != 0) + break; + size -= (u_int32_t)(next - files); + files = next; + + if ((ret = (*fn)(env, rfp, arg)) != 0) + break; + __os_free(env, rfp); + rfp = NULL; + } + + if (rfp != NULL) + __os_free(env, rfp); + return (ret); +} diff --git a/src/rep/rep_elect.c b/src/rep/rep_elect.c new file mode 100644 index 00000000..fd86cdf0 --- /dev/null +++ b/src/rep/rep_elect.c @@ -0,0 +1,1481 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" + +/* + * We need to check sites == nsites, not more than half + * like we do in __rep_elect and the VOTE2 code. The + * reason is that we want to process all the incoming votes + * and not short-circuit once we reach more than half. The + * real winner's vote may be in the last half. + */ +#define IS_PHASE1_DONE(rep) \ + ((rep)->sites >= (rep)->nsites && (rep)->winner != DB_EID_INVALID) + +#define I_HAVE_WON(rep, winner) \ + ((rep)->votes >= (rep)->nvotes && winner == (rep)->eid) + +static void __rep_cmp_vote __P((ENV *, REP *, int, DB_LSN *, + u_int32_t, u_int32_t, u_int32_t, u_int32_t, u_int32_t)); +static int __rep_elect_init + __P((ENV *, u_int32_t, u_int32_t, int *, u_int32_t *)); +static int __rep_fire_elected __P((ENV *, REP *, u_int32_t)); +static void __rep_elect_master __P((ENV *, REP *)); +static int __rep_grow_sites __P((ENV *, u_int32_t)); +static void __rep_send_vote __P((ENV *, DB_LSN *, u_int32_t, + u_int32_t, u_int32_t, u_int32_t, u_int32_t, u_int32_t, int, + u_int32_t, u_int32_t)); +static int __rep_tally __P((ENV *, REP *, int, u_int32_t *, u_int32_t, int)); +static int __rep_wait __P((ENV *, db_timeout_t *, int, u_int32_t, u_int32_t)); + +/* + * __rep_elect_pp -- + * Called after master failure to hold/participate in an election for + * a new master. + * + * PUBLIC: int __rep_elect_pp + * PUBLIC: __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t)); + */ +int +__rep_elect_pp(dbenv, given_nsites, nvotes, flags) + DB_ENV *dbenv; + u_int32_t given_nsites, nvotes; + u_int32_t flags; +{ + DB_REP *db_rep; + ENV *env; + int ret; + + env = dbenv->env; + db_rep = env->rep_handle; + ret = 0; + + ENV_REQUIRES_CONFIG_XX( + env, rep_handle, "DB_ENV->rep_elect", DB_INIT_REP); + + if (APP_IS_REPMGR(env)) { + __db_errx(env, DB_STR("3527", +"DB_ENV->rep_elect: cannot call from Replication Manager application")); + return (EINVAL); + } + + /* We need a transport function because we send messages. */ + if (db_rep->send == NULL) { + __db_errx(env, DB_STR("3528", + "DB_ENV->rep_elect: must be called after DB_ENV->rep_set_transport")); + return (EINVAL); + } + + if (!IS_REP_STARTED(env)) { + __db_errx(env, DB_STR("3529", + "DB_ENV->rep_elect: must be called after DB_ENV->rep_start")); + return (EINVAL); + } + + if (IS_USING_LEASES(env) && given_nsites != 0) { + __db_errx(env, DB_STR("3530", + "DB_ENV->rep_elect: nsites must be zero if leases configured")); + return (EINVAL); + } + + ret = __rep_elect_int(env, given_nsites, nvotes, flags); + + /* + * The DB_REP_IGNORE return code can be of use to repmgr (which of + * course calls __rep_elect_int directly), but it may too subtle to be + * useful for (Base API) applications: so preserve the pre-existing API + * behavior for applications by making this look like a 0. + */ + if (ret == DB_REP_IGNORE) + ret = 0; + return (ret); +} + +/* + * __rep_elect_int -- + * Internal processing to hold/participate in an election for + * a new master after master failure. + * + * PUBLIC: int __rep_elect_int + * PUBLIC: __P((ENV *, u_int32_t, u_int32_t, u_int32_t)); + */ +int +__rep_elect_int(env, given_nsites, nvotes, flags) + ENV *env; + u_int32_t given_nsites, nvotes; + u_int32_t flags; +{ + DB_LOG *dblp; + DB_LOGC *logc; + DB_LSN lsn; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + LOG *lp; + REP *rep; + int done, elected, in_progress; + int need_req, ret, send_vote, t_ret; + u_int32_t ack, ctlflags, data_gen, egen, nsites; + u_int32_t orig_tally, priority, realpri, repflags, tiebreaker; + db_timeout_t timeout; + + COMPQUIET(flags, 0); + + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + elected = 0; + egen = 0; + ret = 0; + + /* + * Specifying 0 for nsites signals us to use the value configured + * previously via rep_set_nsites. Similarly, if the given nvotes is 0, + * it asks us to compute the value representing a simple majority. + */ + nsites = given_nsites == 0 ? rep->config_nsites : given_nsites; + ack = nvotes == 0 ? ELECTION_MAJORITY(nsites) : nvotes; + + /* + * XXX + * If users give us less than a majority, they run the risk of + * having a network partition. However, this also allows the + * scenario of master/1 client to elect the client. Allow + * sub-majority values, but give a warning. + */ + if (ack <= (nsites / 2)) { + __db_errx(env, DB_STR_A("3531", + "DB_ENV->rep_elect:WARNING: nvotes (%d) is sub-majority with nsites (%d)", + "%d %d"), nvotes, nsites); + } + + if (nsites < ack) { + __db_errx(env, DB_STR_A("3532", + "DB_ENV->rep_elect: nvotes (%d) is larger than nsites (%d)", + "%d %d"), ack, nsites); + return (EINVAL); + } + + realpri = rep->priority; + + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Start election nsites %d, ack %d, priority %d", + nsites, ack, realpri)); + + /* + * Special case when having an election while running with + * sites of potentially mixed versions. We set a bit indicating + * we're an electable site, but set our priority to 0. + * Old sites will never elect us, with 0 priority, but if all + * we have are new sites, then we can elect the best electable + * site of the group. + * Thus 'priority' is this special, possibly-fake, effective + * priority that we'll use for this election, while 'realpri' is our + * real, configured priority, as retrieved from REP region. + */ + ctlflags = realpri != 0 ? REPCTL_ELECTABLE : 0; + ENV_ENTER(env, ip); + + orig_tally = 0; + /* If we are already master, simply broadcast that fact and return. */ + if (F_ISSET(rep, REP_F_MASTER)) { +master: LOG_SYSTEM_LOCK(env); + lsn = lp->lsn; + LOG_SYSTEM_UNLOCK(env); + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_NEWMASTER, &lsn, NULL, 0, 0); + if (IS_USING_LEASES(env)) + ret = __rep_lease_refresh(env); + if (ret == 0) + ret = DB_REP_IGNORE; + goto envleave; + } + REP_SYSTEM_LOCK(env); + + /* + * If leases are configured, wait for them to expire, and + * see if we can discover the master while waiting. + */ + if (IS_USING_LEASES(env) && + (timeout = __rep_lease_waittime(env)) != 0) { + FLD_SET(rep->elect_flags, REP_E_PHASE0); + egen = rep->egen; + REP_SYSTEM_UNLOCK(env); + (void)__rep_send_message(env, DB_EID_BROADCAST, + REP_MASTER_REQ, NULL, NULL, 0, 0); + + /* + * The only possible non-zero return from __rep_wait() is a + * panic for a mutex failure. So the state of the PHASE0 flag + * doesn't matter much. If that changes in the future, it is + * still best not to clear the flag after an error, because + * another thread might be in the middle of its PHASE0 wait (and + * not getting an error), so we wouldn't want to cut short its + * wait. If there isn't another concurrent thread, the worst + * that would happen would be that we would leave the flag set, + * until the next time we came through here and completed a + * wait. Note that the code here is the only place where we + * check this flag. + */ + if ((ret = __rep_wait(env, + &timeout, 0, egen, REP_E_PHASE0)) != 0) + goto envleave; + REP_SYSTEM_LOCK(env); + repflags = rep->elect_flags; + FLD_CLR(rep->elect_flags, REP_E_PHASE0); + /* + * If any other thread cleared PHASE0 while we were waiting, + * then we're done. Either we heard from a master, or some + * other thread completed its PHASE0 wait. + * + * Or, we could have waited long enough for our lease grant to + * expire. Check it to make sure. + */ + RPRINT(env, (env, DB_VERB_REP_ELECT, + "after PHASE0 wait, flags 0x%x, elect_flags 0x%x", + rep->flags, rep->elect_flags)); + if (!FLD_ISSET(repflags, REP_E_PHASE0) || + __rep_islease_granted(env) || egen != rep->egen) + goto unlck_lv; + F_SET(rep, REP_F_LEASE_EXPIRED); + } + + /* + * After acquiring the mutex, and possibly waiting for leases to + * expire, without the mutex, we need to recheck our state. It + * may have changed. If we are now master, we're done. + */ + if (F_ISSET(rep, REP_F_MASTER)) { + REP_SYSTEM_UNLOCK(env); + goto master; + } + if ((ret = __rep_elect_init(env, nsites, ack, + &in_progress, &orig_tally)) != 0) + goto unlck_lv; + /* + * If another thread is in the middle of an election we + * just quietly return and not interfere. + */ + if (in_progress) { + ret = DB_REP_IGNORE; + goto unlck_lv; + } + + /* + * Count threads in the guts of rep_elect, so that we only clear + * lockouts when the last thread is finishing. The "guts" start here, + * and do not include the above test where we "quietly return" via + * envleave. + * + * Closely associated with that is the notion that the current thread + * "owns" the right to process the election at the current egen. We set + * the local variable "egen" now to "our" egen; if rep->egen ever + * advances "out from under us" we know it's time to yield to a new + * generation. Our egen value was vetted in __rep_elect_init(), and we + * have not dropped the mutex since then. + * + * Other than occasionally checking that "our" egen still matches the + * current latest rep->egen, there should be no use of rep->egen in this + * function after this point. + */ + rep->elect_th++; + egen = rep->egen; + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Election thread owns egen %lu", (u_long)egen)); + + priority = lp->persist.version != DB_LOGVERSION ? 0 : realpri; +#ifdef CONFIG_TEST + /* + * This allows us to unit test the ELECTABLE flag simply by + * using the priority values. + */ + if (priority > 0 && priority <= 5) { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Artificially setting priority 0 (ELECTABLE) for CONFIG_TEST mode")); + DB_ASSERT(env, ctlflags == REPCTL_ELECTABLE); + priority = 0; + } +#endif + __os_gettime(env, &rep->etime, 1); + + /* + * Default to the normal timeout unless the user configured + * a full election timeout and we think we need a full election. + */ + rep->full_elect = 0; + timeout = rep->elect_timeout; + if (!F_ISSET(rep, REP_F_GROUP_ESTD) && rep->full_elect_timeout != 0) { + rep->full_elect = 1; + timeout = rep->full_elect_timeout; + } + + /* + * We need to lockout applying incoming log records during + * the election. We need to use a special rep_lockout_apply + * instead of rep_lockout_msg because we do not want to + * lockout all incoming messages, like other VOTEs! + */ + if ((ret = __rep_lockout_apply(env, rep, 0)) != 0) + goto err_locked; + if ((ret = __rep_lockout_archive(env, rep)) != 0) + goto err_locked; + + /* + * Since the lockout step (above) could have dropped the mutex, we must + * check to see if we still own the right to proceed with the election + * at this egen. + */ + if (rep->egen != egen) { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Found egen %lu, abandon my election at egen %lu", + (u_long)rep->egen, (u_long)egen)); + goto err_locked; + } + + /* Generate a randomized tiebreaker value. */ + __os_unique_id(env, &tiebreaker); + + FLD_SET(rep->elect_flags, REP_E_PHASE1); + FLD_CLR(rep->elect_flags, REP_E_TALLY); + /* + * We made sure that leases were expired before starting the + * election, but an existing master may be slow in responding. + * If, during lockout, acquiring mutexes, etc, the client has now + * re-granted its lease, we're done - a master exists. + */ + if (IS_USING_LEASES(env) && + __rep_islease_granted(env)) { + ret = 0; + goto err_locked; + } + + /* + * If we are in the middle of recovering or internal + * init, we participate, but we set our priority to 0 + * and turn off REPCTL_ELECTABLE. Check whether we + * are in an internal init state. If not, + * then that is okay, we can be elected (i.e. we are not + * in an inconsistent state). + */ + INIT_LSN(lsn); + if (ISSET_LOCKOUT_BDB(rep) || IN_INTERNAL_INIT(rep) || + rep->sync_state == SYNC_UPDATE) { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Setting priority 0, unelectable, due to internal init/recovery")); + priority = 0; + ctlflags = 0; + data_gen = 0; + } else { + /* + * Use the last commit record as the LSN in the vote. + */ + if ((ret = __log_cursor(env, &logc)) != 0) + goto err_locked; + /* + * If we've walked back and there are no commit records, + * then reset LSN to INIT_LSN. + */ + if ((ret = __rep_log_backup(env, + logc, &lsn, REP_REC_COMMIT)) == DB_NOTFOUND) { + INIT_LSN(lsn); + ret = 0; + } + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + goto err_locked; + if ((ret = __rep_get_datagen(env, &data_gen)) != 0) + goto err_locked; + } + + /* + * We are about to participate at this egen. We must + * write out the next egen before participating in this one + * so that if we crash we can never participate in this egen + * again. + */ + if ((ret = __rep_write_egen(env, rep, egen + 1)) != 0) + goto err_locked; + + /* Tally our own vote */ + if ((ret = __rep_tally(env, rep, rep->eid, &rep->sites, egen, 1)) + != 0) { + /* + * __rep_tally is telling us that this vote is a duplicate. But + * this is our own vote in this case, and that should be + * impossible for a given egen. + */ + DB_ASSERT(env, ret != DB_REP_IGNORE); + goto err_locked; + } + __rep_cmp_vote(env, rep, rep->eid, &lsn, priority, rep->gen, data_gen, + tiebreaker, ctlflags); + + RPRINT(env, (env, DB_VERB_REP_ELECT, "Beginning an election")); + + /* + * Now send vote, remembering the details in case we need them later in + * order to send out a duplicate VOTE1. We must save the nsites and + * nvotes values that we originally send in the VOTE1 message, separate + * from rep->nsites and rep->nvotes, since the latter can change when we + * receive a VOTE1 from another site. + */ + send_vote = DB_EID_INVALID; + done = IS_PHASE1_DONE(rep); + rep->vote1.lsn = lsn; + rep->vote1.nsites = nsites; + rep->vote1.nvotes = ack; + rep->vote1.priority = priority; + rep->vote1.tiebreaker = tiebreaker; + rep->vote1.ctlflags = ctlflags; + rep->vote1.data_gen = data_gen; + REP_SYSTEM_UNLOCK(env); + + __rep_send_vote(env, &lsn, nsites, ack, priority, tiebreaker, egen, + data_gen, DB_EID_BROADCAST, REP_VOTE1, ctlflags); + DB_ENV_TEST_RECOVERY(env, DB_TEST_ELECTVOTE1, ret, NULL); + if (done) { + REP_SYSTEM_LOCK(env); + goto vote; + } + + ret = __rep_wait(env, &timeout, rep->full_elect, egen, REP_E_PHASE1); + REP_SYSTEM_LOCK(env); + if (ret != 0) + goto err_locked; + if (rep->egen > egen) + /* + * For one reason or another, this election cycle is over; it + * doesn't matter why. + */ + goto out; + + if (FLD_ISSET(rep->elect_flags, REP_E_PHASE2)) { + /* Received enough votes while waiting to move us to phase 2. */ + REP_SYSTEM_UNLOCK(env); + goto phase2; + } + + /* + * If we got here, we haven't heard from everyone, but we've + * run out of time, so it's time to decide if we have enough + * votes to pick a winner and if so, to send out a vote to + * the winner. + */ + if (rep->sites >= rep->nvotes) { +vote: + /* We think we've seen enough to cast a vote. */ + send_vote = rep->winner; + /* + * See if we won. This will make sure we + * don't count ourselves twice if we're racing + * with incoming votes. + */ + if (rep->winner == rep->eid) { + if ((ret =__rep_tally(env, + rep, rep->eid, &rep->votes, egen, 2)) != 0 && + ret != DB_REP_IGNORE) + goto err_locked; + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Counted my vote %d", rep->votes)); + } + FLD_SET(rep->elect_flags, REP_E_PHASE2); + FLD_CLR(rep->elect_flags, REP_E_PHASE1); + } + if (send_vote == DB_EID_INVALID) { + /* We do not have enough votes to elect. */ + if (rep->sites >= rep->nvotes) + __db_errx(env, DB_STR_A("3533", + "No electable site found: recvd %d of %d votes from %d sites", + "%d %d %d"), rep->sites, rep->nvotes, rep->nsites); + else + __db_errx(env, DB_STR_A("3534", + "Not enough votes to elect: recvd %d of %d from %d sites", + "%d %d %d"), rep->sites, rep->nvotes, rep->nsites); + ret = DB_REP_UNAVAIL; + goto err_locked; + } + REP_SYSTEM_UNLOCK(env); + + /* + * We have seen enough vote1's. Now we need to wait + * for all the vote2's. + */ + if (send_vote != rep->eid) { + RPRINT(env, (env, DB_VERB_REP_ELECT, "Sending vote")); + __rep_send_vote(env, NULL, 0, 0, 0, 0, egen, 0, + send_vote, REP_VOTE2, 0); + /* + * If we are NOT the new master we want to send + * our vote to the winner, and wait longer. The + * reason is that the winner may be "behind" us + * in the election waiting and if the master is + * down, the winner will wait the full timeout + * and we want to give the winner enough time to + * process all the votes. Otherwise we could + * incorrectly return DB_REP_UNAVAIL and start a + * new election before the winner can declare + * itself. + */ + timeout = timeout * 2; + } + +phase2: + if (I_HAVE_WON(rep, rep->winner)) { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Skipping phase2 wait: already got %d votes", rep->votes)); + REP_SYSTEM_LOCK(env); + goto i_won; + } + ret = __rep_wait(env, &timeout, rep->full_elect, egen, REP_E_PHASE2); + REP_SYSTEM_LOCK(env); + /* + * Since at "err_lock" we're expected to have the lock, it's convenient + * to acquire it before testing "ret" here, since we need it anyway for + * the following stuff. + */ + if (ret != 0) + goto err_locked; + if (rep->egen > egen || !IN_ELECTION(rep)) + goto out; + + /* We must have timed out. */ + ret = DB_REP_UNAVAIL; + + RPRINT(env, (env, DB_VERB_REP_ELECT, + "After phase 2: votes %d, nvotes %d, nsites %d", + rep->votes, rep->nvotes, rep->nsites)); + + if (I_HAVE_WON(rep, rep->winner)) { +i_won: __rep_elect_master(env, rep); + ret = 0; + elected = 1; + } +err_locked: + /* + * If we get here because of a non-election error, then we did not tally + * our vote. In that case we do not want to discard all known election + * info. + */ + if (ret == 0 || ret == DB_REP_UNAVAIL) + __rep_elect_done(env, rep); + else if (orig_tally) + FLD_SET(rep->elect_flags, orig_tally); + +#ifdef CONFIG_TEST + if (0) { +DB_TEST_RECOVERY_LABEL + REP_SYSTEM_LOCK(env); + } +#endif + +out: + /* + * We're leaving, so decrement thread count. If it's still >0 after + * that, another thread has come along to handle a later egen. Only the + * last thread to come through here should clear the lockouts. + */ + need_req = 0; + DB_ASSERT(env, rep->elect_th > 0); + rep->elect_th--; + if (rep->elect_th == 0) { + need_req = F_ISSET(rep, REP_F_SKIPPED_APPLY) && + !I_HAVE_WON(rep, rep->winner); + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_APPLY); + F_CLR(rep, REP_F_SKIPPED_APPLY); + } + /* + * Only clear archiving lockout if the election failed. If + * it succeeded, we keep archiving disabled until we either + * become master or complete synchronization with a master. + */ + if (ret != 0 && rep->elect_th == 0) + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_ARCHIVE); + REP_SYSTEM_UNLOCK(env); + /* + * If we skipped any log records, request them now. + */ + if (need_req && (t_ret = __rep_resend_req(env, 0)) != 0 && + (ret == 0 || ret == DB_REP_UNAVAIL || ret == DB_REP_IGNORE)) + ret = t_ret; + + /* Note that "elected" implies ret cannot be DB_REP_UNAVAIL here. */ + if (elected) { + /* + * The only way ret can be non-zero is if __rep_resend_req() + * failed. So we don't have to check for UNAVAIL and IGNORE in + * deciding whether we're overwriting ret, as we did above. + */ + DB_ASSERT(env, ret != DB_REP_UNAVAIL && ret != DB_REP_IGNORE); + if ((t_ret = __rep_fire_elected(env, rep, egen)) != 0 && + ret == 0) + ret = t_ret; + } + + RPRINT(env, (env, DB_VERB_REP_ELECT, + "%s %d, e_th %lu, egen %lu, flag 0x%lx, e_fl 0x%lx, lo_fl 0x%lx", + "Ended election with ", ret, + (u_long) rep->elect_th, (u_long)rep->egen, + (u_long)rep->flags, (u_long)rep->elect_flags, + (u_long)rep->lockout_flags)); + + if (0) { +unlck_lv: REP_SYSTEM_UNLOCK(env); + } +envleave: + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __rep_vote1 -- + * Handle incoming vote1 message on a client. + * + * PUBLIC: int __rep_vote1 __P((ENV *, __rep_control_args *, DBT *, int)); + */ +int +__rep_vote1(env, rp, rec, eid) + ENV *env; + __rep_control_args *rp; + DBT *rec; + int eid; +{ + DBT data_dbt; + DB_LOG *dblp; + DB_LSN lsn; + DB_REP *db_rep; + LOG *lp; + REP *rep; + REP_OLD_VOTE_INFO *ovi; + VOTE1_CONTENT vote1; + __rep_egen_args egen_arg; + __rep_vote_info_v5_args tmpvi5; + __rep_vote_info_args tmpvi, *vi; + u_int32_t egen; + int elected, master, resend, ret; + u_int8_t buf[__REP_MAXMSG_SIZE]; + size_t len; + + COMPQUIET(egen, 0); + + elected = resend = ret = 0; + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + if (F_ISSET(rep, REP_F_MASTER)) { + RPRINT(env, (env, DB_VERB_REP_ELECT, "Master received vote")); + LOG_SYSTEM_LOCK(env); + lsn = lp->lsn; + LOG_SYSTEM_UNLOCK(env); + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_NEWMASTER, &lsn, NULL, 0, 0); + return (ret); + } + + /* + * In 4.7 we changed to having fixed sized u_int32_t's from + * non-fixed 'int' fields in the vote structure. + */ + if (rp->rep_version < DB_REPVERSION_47) { + ovi = (REP_OLD_VOTE_INFO *)rec->data; + tmpvi.egen = ovi->egen; + tmpvi.nsites = (u_int32_t)ovi->nsites; + tmpvi.nvotes = (u_int32_t)ovi->nvotes; + tmpvi.priority = (u_int32_t)ovi->priority; + tmpvi.tiebreaker = ovi->tiebreaker; + tmpvi.data_gen = 0; + } else if (rp->rep_version < DB_REPVERSION_52) { + if ((ret = __rep_vote_info_v5_unmarshal(env, + &tmpvi5, rec->data, rec->size, NULL)) != 0) + return (ret); + tmpvi.egen = tmpvi5.egen; + tmpvi.nsites = tmpvi5.nsites; + tmpvi.nvotes = tmpvi5.nvotes; + tmpvi.priority = tmpvi5.priority; + tmpvi.tiebreaker = tmpvi5.tiebreaker; + tmpvi.data_gen = 0; + } else + if ((ret = __rep_vote_info_unmarshal(env, + &tmpvi, rec->data, rec->size, NULL)) != 0) + return (ret); + vi = &tmpvi; + REP_SYSTEM_LOCK(env); + + /* + * If we get a vote from a later election gen, we + * clear everything from the current one, and we'll + * start over by tallying it. If we get an old vote, + * send an ALIVE to the old participant. + */ + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Received vote1 egen %lu, egen %lu", + (u_long)vi->egen, (u_long)rep->egen)); + if (vi->egen < rep->egen) { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Received old vote %lu, egen %lu, ignoring vote1", + (u_long)vi->egen, (u_long)rep->egen)); + egen_arg.egen = rep->egen; + REP_SYSTEM_UNLOCK(env); + if (rep->version < DB_REPVERSION_47) + DB_INIT_DBT(data_dbt, &egen_arg.egen, + sizeof(egen_arg.egen)); + else { + if ((ret = __rep_egen_marshal(env, + &egen_arg, buf, __REP_EGEN_SIZE, &len)) != 0) + return (ret); + DB_INIT_DBT(data_dbt, buf, len); + } + (void)__rep_send_message(env, + eid, REP_ALIVE, &rp->lsn, &data_dbt, 0, 0); + return (0); + } + if (vi->egen > rep->egen) { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Received VOTE1 from egen %lu, my egen %lu", + (u_long)vi->egen, (u_long)rep->egen)); + /* + * Terminate an election that may be in progress at the old + * egen. Whether or not there was one, this call will result in + * HOLDELECTION (assuming no unexpected failures crop up). + */ + __rep_elect_done(env, rep); + rep->egen = vi->egen; + } + + /* + * If this site (sender of the VOTE1) is the first to the party, simply + * initialize values from the message. Otherwise, see if the site knows + * about more sites, and/or requires more votes, than we do. + */ + if (!IN_ELECTION_TALLY(rep)) { + FLD_SET(rep->elect_flags, REP_E_TALLY); + rep->nsites = vi->nsites; + rep->nvotes = vi->nvotes; + } else { + if (vi->nsites > rep->nsites) + rep->nsites = vi->nsites; + if (vi->nvotes > rep->nvotes) + rep->nvotes = vi->nvotes; + } + + /* + * Ignore vote1's if we're in phase 2. + */ + if (FLD_ISSET(rep->elect_flags, REP_E_PHASE2)) { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "In phase 2, ignoring vote1")); + goto err; + } + + /* + * Record this vote. If we're ignoring it, there's nothing more we need + * to do. + */ + if ((ret = __rep_tally(env, rep, eid, &rep->sites, vi->egen, 1)) != 0) { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Tally returned %d, sites %d", ret, rep->sites)); + if (ret == DB_REP_IGNORE) + ret = 0; + goto err; + } + + RPRINT(env, (env, DB_VERB_REP_ELECT, +"Incoming vote: (eid)%d (pri)%lu %s (gen)%lu (egen)%lu (datagen)%lu [%lu,%lu]", + eid, (u_long)vi->priority, + F_ISSET(rp, REPCTL_ELECTABLE) ? "ELECTABLE" : "", + (u_long)rp->gen, (u_long)vi->egen, (u_long)vi->data_gen, + (u_long)rp->lsn.file, (u_long)rp->lsn.offset)); + if (rep->sites > 1) + RPRINT(env, (env, DB_VERB_REP_ELECT, +"Existing vote: (eid)%d (pri)%lu (gen)%lu (datagen)%lu (sites)%d [%lu,%lu]", + rep->winner, (u_long)rep->w_priority, + (u_long)rep->w_gen, (u_long)rep->w_datagen, rep->sites, + (u_long)rep->w_lsn.file, + (u_long)rep->w_lsn.offset)); + + __rep_cmp_vote(env, rep, eid, &rp->lsn, vi->priority, + rp->gen, vi->data_gen, vi->tiebreaker, rp->flags); + /* + * If you get a vote and you're not yet "in an election" at the proper + * egen, we've already recorded this vote. But that is all we need to + * do. But if you are in an election, check to see if we ought to send + * an extra VOTE1. We know that the VOTE1 we have received is not a + * duplicated, because of the successful return from __rep_tally(), + * above. + */ + if (IN_ELECTION(rep)) { + /* + * If we're doing a full election, and we're into phase 1 (no + * REP_E_TALLY), then resend, in case the sender of this VOTE1 + * missed our VOTE1. + */ + if (rep->full_elect && + FLD_ISSET((rep)->elect_flags, REP_E_PHASE1)) { + resend = 1; + vote1 = rep->vote1; + egen = rep->egen; + } + } else { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Not in election, but received vote1 0x%x 0x%x", + rep->flags, rep->elect_flags)); + ret = DB_REP_HOLDELECTION; + goto err; + } + + master = rep->winner; + lsn = rep->w_lsn; + if (IS_PHASE1_DONE(rep)) { + RPRINT(env, (env, DB_VERB_REP_ELECT, "Phase1 election done")); + RPRINT(env, (env, DB_VERB_REP_ELECT, "Voting for %d%s", + master, master == rep->eid ? "(self)" : "")); + egen = rep->egen; + FLD_SET(rep->elect_flags, REP_E_PHASE2); + FLD_CLR(rep->elect_flags, REP_E_PHASE1); + if (master == rep->eid) { + if ((ret =__rep_tally(env, rep, rep->eid, + &rep->votes, egen, 2)) != 0 && + ret != DB_REP_IGNORE) + goto err; + ret = 0; + RPRINT(env, (env, DB_VERB_REP_ELECT, + "After phase 1 done: counted vote %d of %d", + rep->votes, rep->nvotes)); + if (I_HAVE_WON(rep, rep->winner)) { + __rep_elect_master(env, rep); + elected = 1; + } + goto err; + } + REP_SYSTEM_UNLOCK(env); + + /* Vote for someone else. */ + __rep_send_vote(env, NULL, 0, 0, 0, 0, egen, 0, + master, REP_VOTE2, 0); + } else +err: REP_SYSTEM_UNLOCK(env); + + /* + * Note that if we're elected, there's no need for resending our VOTE1, + * even if we thought it might have been necessary a moment ago. + */ + if (elected) + ret = __rep_fire_elected(env, rep, egen); + else if (resend) + __rep_send_vote(env, + &vote1.lsn, vote1.nsites, vote1.nvotes, vote1.priority, + vote1.tiebreaker, egen, vote1.data_gen, + eid, REP_VOTE1, vote1.ctlflags); + return (ret); +} + +/* + * __rep_vote2 -- + * Handle incoming vote2 message on a client. + * + * PUBLIC: int __rep_vote2 __P((ENV *, __rep_control_args *, DBT *, int)); + */ +int +__rep_vote2(env, rp, rec, eid) + ENV *env; + __rep_control_args *rp; + DBT *rec; + int eid; +{ + DB_LOG *dblp; + DB_LSN lsn; + DB_REP *db_rep; + LOG *lp; + REP *rep; + REP_OLD_VOTE_INFO *ovi; + __rep_vote_info_args tmpvi, *vi; + u_int32_t egen; + int ret; + + ret = 0; + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + RPRINT(env, (env, DB_VERB_REP_ELECT, "We received a vote%s", + F_ISSET(rep, REP_F_MASTER) ? " (master)" : "")); + if (F_ISSET(rep, REP_F_MASTER)) { + LOG_SYSTEM_LOCK(env); + lsn = lp->lsn; + LOG_SYSTEM_UNLOCK(env); + STAT_INC(env, + rep, election_won, rep->stat.st_elections_won, rep->egen); + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_NEWMASTER, &lsn, NULL, 0, 0); + if (IS_USING_LEASES(env)) + ret = __rep_lease_refresh(env); + return (ret); + } + + REP_SYSTEM_LOCK(env); + egen = rep->egen; + + /* + * We might be the last to the party and we haven't had + * time to tally all the vote1's, but others have and + * decided we're the winner. So, if we're in the process + * of tallying sites, keep the vote so that when our + * election thread catches up we'll have the votes we + * already received. + */ + /* + * In 4.7 we changed to having fixed sized u_int32_t's from + * non-fixed 'int' fields in the vote structure. + */ + if (rp->rep_version < DB_REPVERSION_47) { + ovi = (REP_OLD_VOTE_INFO *)rec->data; + tmpvi.egen = ovi->egen; + tmpvi.nsites = (u_int32_t)ovi->nsites; + tmpvi.nvotes = (u_int32_t)ovi->nvotes; + tmpvi.priority = (u_int32_t)ovi->priority; + tmpvi.tiebreaker = ovi->tiebreaker; + } else + if ((ret = __rep_vote_info_unmarshal(env, + &tmpvi, rec->data, rec->size, NULL)) != 0) + return (ret); + vi = &tmpvi; + if (!IN_ELECTION_TALLY(rep) && vi->egen >= rep->egen) { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Not in election gen %lu, at %lu, got vote", + (u_long)vi->egen, (u_long)rep->egen)); + ret = DB_REP_HOLDELECTION; + goto err; + } + + /* + * Record this vote. In a VOTE2, the only valid entry + * in the vote information is the election generation. + * + * There are several things which can go wrong that we + * need to account for: + * 1. If we receive a latent VOTE2 from an earlier election, + * we want to ignore it. + * 2. If we receive a VOTE2 from a site from which we never + * received a VOTE1, we want to record it, because we simply + * may be processing messages out of order or its vote1 got lost, + * but that site got all the votes it needed to send it. + * 3. If we have received a duplicate VOTE2 from this election + * from the same site we want to ignore it. + * 4. If this is from the current election and someone is + * really voting for us, then we finally get to record it. + */ + /* + * Case 1. + */ + if (vi->egen != rep->egen) { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Bad vote egen %lu. Mine %lu", + (u_long)vi->egen, (u_long)rep->egen)); + ret = 0; + goto err; + } + + /* + * __rep_tally takes care of cases 2, 3 and 4. + */ + if ((ret = __rep_tally(env, rep, eid, &rep->votes, vi->egen, 2)) != 0) { + if (ret == DB_REP_IGNORE) + ret = 0; + goto err; + } + RPRINT(env, (env, DB_VERB_REP_ELECT, "Counted vote %d of %d", + rep->votes, rep->nvotes)); + if (I_HAVE_WON(rep, rep->winner)) { + __rep_elect_master(env, rep); + ret = DB_REP_NEWMASTER; + } + +err: REP_SYSTEM_UNLOCK(env); + if (ret == DB_REP_NEWMASTER) + ret = __rep_fire_elected(env, rep, egen); + return (ret); +} + +/* + * __rep_tally -- + * Handle incoming vote message on a client. This will record either a + * VOTE1 or a VOTE2, depending on the "phase" value the caller passed in. + * + * This function will return: + * 0 if we successfully tally the vote; + * DB_REP_IGNORE if the vote is properly ignored; + * (anything else) in case of an unexpected error. + * + * !!! Caller must hold REP_SYSTEM_LOCK. + */ +static int +__rep_tally(env, rep, eid, countp, egen, phase) + ENV *env; + REP *rep; + int eid; + u_int32_t *countp; + u_int32_t egen; + int phase; +{ + REP_VTALLY *tally, *vtp; + u_int32_t i; + int ret; + + if (rep->nsites > rep->asites && + (ret = __rep_grow_sites(env, rep->nsites)) != 0) { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Grow sites returned error %d", ret)); + return (ret); + } + if (phase == 1) + tally = R_ADDR(env->reginfo, rep->tally_off); + else + tally = R_ADDR(env->reginfo, rep->v2tally_off); + vtp = &tally[0]; + for (i = 0; i < *countp;) { + /* + * Ignore votes from earlier elections (i.e. we've heard + * from this site in this election, but its vote from an + * earlier election got delayed and we received it now). + * However, if we happened to hear from an earlier vote + * and we recorded it and we're now hearing from a later + * election we want to keep the updated one. Note that + * updating the entry will not increase the count. + * Also ignore votes that are duplicates. + */ + if (vtp->eid == eid) { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Tally found[%d] (%d, %lu), this vote (%d, %lu)", + i, vtp->eid, (u_long)vtp->egen, + eid, (u_long)egen)); + if (vtp->egen >= egen) + return (DB_REP_IGNORE); + else { + vtp->egen = egen; + return (0); + } + } + i++; + vtp = &tally[i]; + } + + /* + * If we get here, we have a new voter we haven't seen before. Tally + * this vote. + */ + RPRINT(env, (env, DB_VERB_REP_ELECT, "Tallying VOTE%d[%d] (%d, %lu)", + phase, i, eid, (u_long)egen)); + + vtp->eid = eid; + vtp->egen = egen; + (*countp)++; + return (0); +} + +/* + * __rep_cmp_vote -- + * Compare incoming vote1 message on a client. Called with the db_rep + * mutex held. + * + */ +static void +__rep_cmp_vote(env, rep, eid, lsnp, priority, gen, data_gen, tiebreaker, flags) + ENV *env; + REP *rep; + int eid; + DB_LSN *lsnp; + u_int32_t priority; + u_int32_t data_gen, flags, gen, tiebreaker; +{ + int cmp, like_pri; + + cmp = LOG_COMPARE(lsnp, &rep->w_lsn); + /* + * If we've seen more than one, compare us to the best so far. + * If we're the first, make ourselves the winner to start. + */ + if (rep->sites > 1 && + (priority != 0 || LF_ISSET(REPCTL_ELECTABLE))) { + /* + * Special case, if we have a mixed version group of sites, + * we set priority to 0, but set the ELECTABLE flag so that + * all sites talking at lower versions can correctly elect. + * If a non-zero priority comes in and current winner is + * zero priority (but was electable), then the non-zero + * site takes precedence no matter what its LSN is. + * + * Then the data_gen determines the winner. The site with + * the more recent generation of data wins. + * + * Then LSN is determinant only if we're comparing + * like-styled version/priorities at the same data_gen. I.e. + * both with 0/ELECTABLE priority or both with non-zero + * priority. Then actual priority value if LSNs + * are equal, then tiebreaker if both are equal. + */ + /* + * Make note if we're comparing the same types of priorities + * that indicate electability or not. We know we are + * electable if we are here. + */ + like_pri = (priority == 0 && rep->w_priority == 0) || + (priority != 0 && rep->w_priority != 0); + + if ((priority != 0 && rep->w_priority == 0) || + (like_pri && data_gen > rep->w_datagen) || + (like_pri && data_gen == rep->w_datagen && cmp > 0) || + (cmp == 0 && (priority > rep->w_priority || + (priority == rep->w_priority && + (tiebreaker > rep->w_tiebreaker))))) { + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Accepting new vote")); + rep->winner = eid; + rep->w_priority = priority; + rep->w_lsn = *lsnp; + rep->w_gen = gen; + rep->w_datagen = data_gen; + rep->w_tiebreaker = tiebreaker; + } + } else if (rep->sites == 1) { + if (priority != 0 || LF_ISSET(REPCTL_ELECTABLE)) { + /* Make ourselves the winner to start. */ + rep->winner = eid; + rep->w_priority = priority; + rep->w_gen = gen; + rep->w_datagen = data_gen; + rep->w_lsn = *lsnp; + rep->w_tiebreaker = tiebreaker; + } else { + rep->winner = DB_EID_INVALID; + rep->w_priority = 0; + rep->w_gen = 0; + rep->w_datagen = 0; + ZERO_LSN(rep->w_lsn); + rep->w_tiebreaker = 0; + } + } +} + +/* + * __rep_elect_init + * Initialize an election. Sets beginp non-zero if the election is + * already in progress; makes it 0 otherwise. Leaves it untouched if we return + * DB_REP_NEWMASTER. + * + * Caller holds the REP_SYSTEM mutex, and relies on us not dropping it. + */ +static int +__rep_elect_init(env, nsites, nvotes, beginp, otally) + ENV *env; + u_int32_t nsites, nvotes; + int *beginp; + u_int32_t *otally; +{ + DB_REP *db_rep; + REP *rep; + int ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + + ret = 0; + + if (otally != NULL) + *otally = FLD_ISSET(rep->elect_flags, REP_E_TALLY); + + DB_ASSERT(env, rep->spent_egen <= rep->egen); + *beginp = rep->spent_egen == rep->egen; + if (!*beginp) { + /* + * Make sure that we always initialize all the election fields + * before putting ourselves in an election state. That means + * issuing calls that can fail (allocation) before setting all + * the variables. + */ + if (nsites > rep->asites && + (ret = __rep_grow_sites(env, nsites)) != 0) + goto err; + DB_ENV_TEST_RECOVERY(env, DB_TEST_ELECTINIT, ret, NULL); + rep->spent_egen = rep->egen; + + STAT_INC(env, rep, election, rep->stat.st_elections, rep->egen); + + /* + * If we're the first to the party, we simply set initial + * values: pre-existing values would be left over from previous + * election. + */ + if (!IN_ELECTION_TALLY(rep)) { + rep->nsites = nsites; + rep->nvotes = nvotes; + } else { + if (nsites > rep->nsites) + rep->nsites = nsites; + if (nvotes > rep->nvotes) + rep->nvotes = nvotes; + } + } +DB_TEST_RECOVERY_LABEL +err: + return (ret); +} + +/* + * __rep_elect_master + * Set up for new master from election. Must be called with + * the replication region mutex held. + */ +static void +__rep_elect_master(env, rep) + ENV *env; + REP *rep; +{ + if (F_ISSET(rep, REP_F_MASTERELECT | REP_F_MASTER)) { + /* We've been through here already; avoid double counting. */ + return; + } + + F_SET(rep, REP_F_MASTERELECT); + STAT_INC(env, rep, election_won, rep->stat.st_elections_won, rep->egen); + + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Got enough votes to win; election done; (prev) gen %lu", + (u_long)rep->gen)); +} + +static int +__rep_fire_elected(env, rep, egen) + ENV *env; + REP *rep; + u_int32_t egen; +{ + REP_EVENT_LOCK(env); + if (rep->notified_egen < egen) { + __rep_fire_event(env, DB_EVENT_REP_ELECTED, NULL); + rep->notified_egen = egen; + } + REP_EVENT_UNLOCK(env); + return (0); +} + +/* + * Compute a sleep interval. + * + * The user specifies an overall timeout function, but checking is cheap and the + * timeout may be a generous upper bound. So sleep for the smaller of .5s and + * timeout/10. Make sure we sleep at least 1usec if timeout < 10. + */ +#define SLEEPTIME(timeout) \ + ((timeout > 5000000) ? 500000 : ((timeout >= 10) ? timeout / 10 : 1)) + +/* + * __rep_wait -- + * + * Sleep until the indicated phase is over, or the timeout expires. The phase + * is over when someone clears the phase flag (in the course of processing an + * incoming message). This could either be a normal progression one one phase + * to the other, or it could be due to receiving a NEWMASTER or an egen change. + * In all cases we simply return 0, and the caller should check the state of the + * world (generally under mutex protection) to decide what to do next. + */ +static int +__rep_wait(env, timeoutp, full_elect, egen, flags) + ENV *env; + db_timeout_t *timeoutp; + int full_elect; + u_int32_t egen, flags; +{ + DB_REP *db_rep; + REP *rep; + int done; + u_int32_t sleeptime, sleeptotal, timeout; + + db_rep = env->rep_handle; + rep = db_rep->region; + done = 0; + + timeout = *timeoutp; + sleeptime = SLEEPTIME(timeout); + sleeptotal = 0; + while (sleeptotal < timeout) { + __os_yield(env, 0, sleeptime); + sleeptotal += sleeptime; + REP_SYSTEM_LOCK(env); + /* + * Check if group membership changed while we were + * sleeping. Specifically we're trying for a full + * election and someone is telling us we're joining + * a previously established replication group. (This is not + * applicable for the phase 0 wait, which uses a completely + * unrelated timeout value.) + */ + if (!LF_ISSET(REP_E_PHASE0) && + full_elect && F_ISSET(rep, REP_F_GROUP_ESTD)) { + *timeoutp = rep->elect_timeout; + timeout = *timeoutp; + if (sleeptotal >= timeout) + done = 1; + else + sleeptime = SLEEPTIME(timeout); + } + + if (egen != rep->egen || !FLD_ISSET(rep->elect_flags, flags)) + done = 1; + REP_SYSTEM_UNLOCK(env); + + if (done) + return (0); + } + return (0); +} + +/* + * __rep_grow_sites -- + * Called to allocate more space in the election tally information. + * Called with the rep mutex held. We need to call the region mutex, so + * we need to make sure that we *never* acquire those mutexes in the + * opposite order. + */ +static int +__rep_grow_sites(env, nsites) + ENV *env; + u_int32_t nsites; +{ + REGENV *renv; + REGINFO *infop; + REP *rep; + int ret, *tally; + u_int32_t nalloc; + + rep = env->rep_handle->region; + + /* + * Allocate either twice the current allocation or nsites, + * whichever is more. + */ + nalloc = 2 * rep->asites; + if (nalloc < nsites) + nalloc = nsites; + + infop = env->reginfo; + renv = infop->primary; + MUTEX_LOCK(env, renv->mtx_regenv); + + /* + * We allocate 2 tally regions, one for tallying VOTE1's and + * one for VOTE2's. Always grow them in tandem, because if we + * get more VOTE1's we'll always expect more VOTE2's then too. + */ + if ((ret = __env_alloc(infop, + (size_t)nalloc * sizeof(REP_VTALLY), &tally)) == 0) { + if (rep->tally_off != INVALID_ROFF) + __env_alloc_free( + infop, R_ADDR(infop, rep->tally_off)); + rep->tally_off = R_OFFSET(infop, tally); + if ((ret = __env_alloc(infop, + (size_t)nalloc * sizeof(REP_VTALLY), &tally)) == 0) { + /* Success */ + if (rep->v2tally_off != INVALID_ROFF) + __env_alloc_free(infop, + R_ADDR(infop, rep->v2tally_off)); + rep->v2tally_off = R_OFFSET(infop, tally); + rep->asites = nalloc; + rep->nsites = nsites; + } else { + /* + * We were unable to allocate both. So, we must + * free the first one and reinitialize. If + * v2tally_off is valid, it is from an old + * allocation and we are clearing it all out due + * to the error. + */ + if (rep->v2tally_off != INVALID_ROFF) + __env_alloc_free(infop, + R_ADDR(infop, rep->v2tally_off)); + __env_alloc_free(infop, + R_ADDR(infop, rep->tally_off)); + rep->v2tally_off = rep->tally_off = INVALID_ROFF; + rep->asites = 0; + } + } + MUTEX_UNLOCK(env, renv->mtx_regenv); + return (ret); +} + +/* + * __rep_send_vote + * Send this site's vote for the election. + */ +static void +__rep_send_vote(env, lsnp, + nsites, nvotes, pri, tie, egen, data_gen, eid, vtype, flags) + ENV *env; + DB_LSN *lsnp; + int eid; + u_int32_t nsites, nvotes, pri; + u_int32_t flags, egen, data_gen, tie, vtype; +{ + DB_REP *db_rep; + DBT vote_dbt; + REP *rep; + REP_OLD_VOTE_INFO ovi; + __rep_vote_info_args vi; + __rep_vote_info_v5_args vi5; + u_int8_t buf[__REP_VOTE_INFO_SIZE]; + size_t len; + + db_rep = env->rep_handle; + rep = db_rep->region; + + memset(&vi, 0, sizeof(vi)); + memset(&vote_dbt, 0, sizeof(vote_dbt)); + + /* + * In 4.7 we went to fixed sized fields. They may not be + * the same as the sizes in older versions. In 5.2 we + * added the data_gen. + */ + if (rep->version < DB_REPVERSION_47) { + ovi.egen = egen; + ovi.priority = (int) pri; + ovi.nsites = (int) nsites; + ovi.nvotes = (int) nvotes; + ovi.tiebreaker = tie; + DB_INIT_DBT(vote_dbt, &ovi, sizeof(ovi)); + } else if (rep->version < DB_REPVERSION_52) { + vi5.egen = egen; + vi5.priority = pri; + vi5.nsites = nsites; + vi5.nvotes = nvotes; + vi5.tiebreaker = tie; + (void)__rep_vote_info_v5_marshal(env, &vi5, buf, + __REP_VOTE_INFO_SIZE, &len); + DB_INIT_DBT(vote_dbt, buf, len); + } else { + vi.egen = egen; + vi.priority = pri; + vi.nsites = nsites; + vi.nvotes = nvotes; + vi.tiebreaker = tie; + vi.data_gen = data_gen; + (void)__rep_vote_info_marshal(env, &vi, buf, + __REP_VOTE_INFO_SIZE, &len); + DB_INIT_DBT(vote_dbt, buf, len); + } + + (void)__rep_send_message(env, eid, vtype, lsnp, &vote_dbt, flags, 0); +} diff --git a/src/rep/rep_lease.c b/src/rep/rep_lease.c new file mode 100644 index 00000000..5fcf3c68 --- /dev/null +++ b/src/rep/rep_lease.c @@ -0,0 +1,545 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" + +static void __rep_find_entry __P((ENV *, REP *, int, REP_LEASE_ENTRY **)); + +/* + * __rep_update_grant - + * Update a client's lease grant for this perm record + * and send the grant to the master. Caller must + * hold the mtx_clientdb mutex. Timespec given is in + * host local format. + * + * PUBLIC: int __rep_update_grant __P((ENV *, db_timespec *)); + */ +int +__rep_update_grant(env, ts) + ENV *env; + db_timespec *ts; +{ + DBT lease_dbt; + DB_LOG *dblp; + DB_REP *db_rep; + LOG *lp; + REP *rep; + __rep_grant_info_args gi; + db_timespec mytime; + u_int8_t buf[__REP_GRANT_INFO_SIZE]; + int master, ret; + size_t len; + + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + timespecclear(&mytime); + + /* + * Get current time, and add in the (skewed) lease duration + * time to send the grant to the master. + */ + __os_gettime(env, &mytime, 1); + timespecadd(&mytime, &rep->lease_duration); + REP_SYSTEM_LOCK(env); + /* + * If we are in an election, we cannot grant the lease. + * We need to check under the region mutex. + */ + if (IN_ELECTION(rep)) { + REP_SYSTEM_UNLOCK(env); + return (0); + } + if (timespeccmp(&mytime, &rep->grant_expire, >)) + rep->grant_expire = mytime; + F_CLR(rep, REP_F_LEASE_EXPIRED); + REP_SYSTEM_UNLOCK(env); + + /* + * Send the LEASE_GRANT message with the current lease grant + * no matter if we've actually extended the lease or not. + */ + gi.msg_sec = (u_int32_t)ts->tv_sec; + gi.msg_nsec = (u_int32_t)ts->tv_nsec; + + if ((ret = __rep_grant_info_marshal(env, &gi, buf, + __REP_GRANT_INFO_SIZE, &len)) != 0) + return (ret); + DB_INIT_DBT(lease_dbt, buf, len); + /* + * Don't send to the master if this site has zero priority because + * our site cannot count toward the data being safe. + */ + if ((master = rep->master_id) != DB_EID_INVALID && rep->priority > 0) + (void)__rep_send_message(env, master, REP_LEASE_GRANT, + &lp->max_perm_lsn, &lease_dbt, 0, 0); + return (0); +} + +/* + * __rep_islease_granted - + * Return 0 if this client has no outstanding lease granted. + * Return 1 otherwise. + * Caller must hold the REP_SYSTEM (region) mutex, and (rep_elect) relies + * on us not dropping it. + * + * PUBLIC: int __rep_islease_granted __P((ENV *)); + */ +int +__rep_islease_granted(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + db_timespec mytime; + + db_rep = env->rep_handle; + rep = db_rep->region; + /* + * Get current time and compare against our granted lease. + */ + timespecclear(&mytime); + __os_gettime(env, &mytime, 1); + + return (timespeccmp(&mytime, &rep->grant_expire, <=) ? 1 : 0); +} + +/* + * __rep_lease_table_alloc - + * Allocate the lease table on a master. Called with rep mutex + * held. We need to acquire the env region mutex, so we need to + * make sure we never acquire those mutexes in the opposite order. + * + * PUBLIC: int __rep_lease_table_alloc __P((ENV *, u_int32_t)); + */ +int +__rep_lease_table_alloc(env, nsites) + ENV *env; + u_int32_t nsites; +{ + REGENV *renv; + REGINFO *infop; + REP *rep; + REP_LEASE_ENTRY *le, *table; + int *lease, ret; + u_int32_t i; + + rep = env->rep_handle->region; + + infop = env->reginfo; + renv = infop->primary; + MUTEX_LOCK(env, renv->mtx_regenv); + /* + * If we have an old table from some other time, free it and + * allocate ourselves a new one that is known to be for + * the right number of sites. + */ + if (rep->lease_off != INVALID_ROFF) { + __env_alloc_free(infop, + R_ADDR(infop, rep->lease_off)); + rep->lease_off = INVALID_ROFF; + } + ret = __env_alloc(infop, (size_t)nsites * sizeof(REP_LEASE_ENTRY), + &lease); + MUTEX_UNLOCK(env, renv->mtx_regenv); + if (ret != 0) + return (ret); + else + rep->lease_off = R_OFFSET(infop, lease); + table = R_ADDR(infop, rep->lease_off); + for (i = 0; i < nsites; i++) { + le = &table[i]; + le->eid = DB_EID_INVALID; + timespecclear(&le->start_time); + timespecclear(&le->end_time); + ZERO_LSN(le->lease_lsn); + } + return (0); +} + +/* + * __rep_lease_grant - + * Handle incoming REP_LEASE_GRANT message on a master. + * + * PUBLIC: int __rep_lease_grant __P((ENV *, __rep_control_args *, DBT *, int)); + */ +int +__rep_lease_grant(env, rp, rec, eid) + ENV *env; + __rep_control_args *rp; + DBT *rec; + int eid; +{ + DB_REP *db_rep; + REP *rep; + __rep_grant_info_args gi; + REP_LEASE_ENTRY *le; + db_timespec msg_time; + int ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + if ((ret = __rep_grant_info_unmarshal(env, + &gi, rec->data, rec->size, NULL)) != 0) + return (ret); + timespecset(&msg_time, gi.msg_sec, gi.msg_nsec); + le = NULL; + + /* + * Get current time, and add in the (skewed) lease duration + * time to send the grant to the master. + */ + REP_SYSTEM_LOCK(env); + __rep_find_entry(env, rep, eid, &le); + /* + * We either get back this site's entry, or an empty entry + * that we need to initialize. + */ + DB_ASSERT(env, le != NULL); + /* + * Update the entry if it is an empty entry or if the new + * lease grant is a later start time than the current one. + */ + VPRINT(env, (env, DB_VERB_REP_LEASE, + "lease_grant: grant msg time %lu %lu", + (u_long)msg_time.tv_sec, (u_long)msg_time.tv_nsec)); + if (le->eid == DB_EID_INVALID || + timespeccmp(&msg_time, &le->start_time, >)) { + le->eid = eid; + le->start_time = msg_time; + le->end_time = le->start_time; + timespecadd(&le->end_time, &rep->lease_duration); + VPRINT(env, (env, DB_VERB_REP_LEASE, + "lease_grant: eid %d, start %lu %lu, end %lu %lu, duration %lu %lu", + le->eid, (u_long)le->start_time.tv_sec, (u_long)le->start_time.tv_nsec, + (u_long)le->end_time.tv_sec, (u_long)le->end_time.tv_nsec, + (u_long)rep->lease_duration.tv_sec, (u_long)rep->lease_duration.tv_nsec)); + } + /* + * Only update the lease table with a larger LSN value + * than the previous entry. This handles the case of a + * lagging record with a later start time, which is + * sometimes possible when a failed lease check resends + * the last permanent record. + */ + if (LOG_COMPARE(&rp->lsn, &le->lease_lsn) > 0) { + le->lease_lsn = rp->lsn; + VPRINT(env, (env, DB_VERB_REP_LEASE, + "lease_grant: eid %d, lease_lsn [%lu][%lu]", + le->eid, (u_long)le->lease_lsn.file, + (u_long)le->lease_lsn.offset)); + } + REP_SYSTEM_UNLOCK(env); + return (0); +} + +/* + * Find the entry for the given EID. Or the first empty one. + */ +static void +__rep_find_entry(env, rep, eid, lep) + ENV *env; + REP *rep; + int eid; + REP_LEASE_ENTRY **lep; +{ + REGINFO *infop; + REP_LEASE_ENTRY *le, *table; + u_int32_t i; + + infop = env->reginfo; + table = R_ADDR(infop, rep->lease_off); + + for (i = 0; i < rep->config_nsites; i++) { + le = &table[i]; + /* + * Find either the one that matches the client's + * EID or the first empty one. + */ + if (le->eid == eid || le->eid == DB_EID_INVALID) { + *lep = le; + return; + } + } + return; +} + +/* + * __rep_lease_check - + * Return 0 if this master holds valid leases and can confirm + * its mastership. If leases are expired, an attempt is made + * to refresh the leases. If that fails, then return the + * DB_REP_LEASE_EXPIRED error to the user. No mutexes held. + * + * PUBLIC: int __rep_lease_check __P((ENV *, int)); + */ +int +__rep_lease_check(env, refresh) + ENV *env; + int refresh; +{ + DB_LOG *dblp; + DB_LSN lease_lsn; + DB_REP *db_rep; + LOG *lp; + REGINFO *infop; + REP *rep; + REP_LEASE_ENTRY *le, *table; + db_timespec curtime; + int max_tries, ret, tries; + u_int32_t i, min_leases, valid_leases; + + infop = env->reginfo; + tries = 0; + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + LOG_SYSTEM_LOCK(env); + lease_lsn = lp->max_perm_lsn; + LOG_SYSTEM_UNLOCK(env); +#ifdef HAVE_STATISTICS + rep->stat.st_lease_chk++; +#endif + /* + * Set the maximum number of retries to be 2x the lease timeout + * so that if a site is waiting to sync, it has a chance to do so. + */ + max_tries = (int)(rep->lease_timeout / (LEASE_REFRESH_USEC / 2)); + if (max_tries < LEASE_REFRESH_MIN) + max_tries = LEASE_REFRESH_MIN; +retry: + REP_SYSTEM_LOCK(env); + min_leases = rep->config_nsites / 2; + ret = 0; + __os_gettime(env, &curtime, 1); + VPRINT(env, (env, DB_VERB_REP_LEASE, +"%s %d of %d refresh %d min_leases %lu curtime %lu %lu, maxLSN [%lu][%lu]", + "lease_check: try ", tries, max_tries, refresh, + (u_long)min_leases, (u_long)curtime.tv_sec, + (u_long)curtime.tv_nsec, + (u_long)lease_lsn.file, + (u_long)lease_lsn.offset)); + table = R_ADDR(infop, rep->lease_off); + for (i = 0, valid_leases = 0; + i < rep->config_nsites && valid_leases < min_leases; i++) { + le = &table[i]; + /* + * Count this lease as valid if: + * - It is a valid entry (has an EID). + * - The lease has not expired. + * - The LSN is up to date. + */ + if (le->eid != DB_EID_INVALID) { + VPRINT(env, (env, DB_VERB_REP_LEASE, + "lease_check: valid %lu eid %d, lease_lsn [%lu][%lu]", + (u_long)valid_leases, le->eid, + (u_long)le->lease_lsn.file, + (u_long)le->lease_lsn.offset)); + VPRINT(env, (env, DB_VERB_REP_LEASE, + "lease_check: endtime %lu %lu", + (u_long)le->end_time.tv_sec, + (u_long)le->end_time.tv_nsec)); + } + if (le->eid != DB_EID_INVALID && + timespeccmp(&le->end_time, &curtime, >=) && + LOG_COMPARE(&le->lease_lsn, &lease_lsn) >= 0) + valid_leases++; + } + REP_SYSTEM_UNLOCK(env); + + /* + * Now see if we have enough. + */ + VPRINT(env, (env, DB_VERB_REP_LEASE, "valid %lu, min %lu", + (u_long)valid_leases, (u_long)min_leases)); + if (valid_leases < min_leases) { +#ifdef HAVE_STATISTICS + rep->stat.st_lease_chk_misses++; +#endif + if (!refresh || tries > max_tries) + ret = DB_REP_LEASE_EXPIRED; + else { + /* + * If we are successful, we need to recheck the leases + * because the lease grant messages may have raced with + * the PERM acknowledgement. Give the grant messages + * a chance to arrive and be processed. + */ + if (((tries % 10) == 5 && + (ret = __rep_lease_refresh(env)) == 0) || + (tries % 10) != 5) { + /* + * If we were successful sending, but + * not in racing the message threads, + * then yield the processor so that + * the message threads get a chance + * to run. + */ + if (tries > 0) + __os_yield(env, 0, LEASE_REFRESH_USEC); + tries++; +#ifdef HAVE_STATISTICS + rep->stat.st_lease_chk_refresh++; +#endif + goto retry; + } + } + } + + if (ret == DB_REP_LEASE_EXPIRED) + RPRINT(env, (env, DB_VERB_REP_LEASE, + "lease_check: Expired. Only %lu valid", + (u_long)valid_leases)); + return (ret); +} + +/* + * __rep_lease_refresh - + * Find the last permanent record and send that out so that it + * forces clients to grant their leases. + * + * If there is no permanent record, this function cannot refresh + * leases. That should not happen because the master should write + * a checkpoint when it starts, if there is no other perm record. + * + * PUBLIC: int __rep_lease_refresh __P((ENV *)); + */ +int +__rep_lease_refresh(env) + ENV *env; +{ + DBT rec; + DB_LOGC *logc; + DB_LSN lsn; + int ret, t_ret; + + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + + memset(&rec, 0, sizeof(rec)); + memset(&lsn, 0, sizeof(lsn)); + /* + * Use __rep_log_backup to find the last PERM record. + */ + if ((ret = __rep_log_backup(env, logc, &lsn, REP_REC_PERM)) != 0) { + /* + * If there is no PERM record, then we get DB_NOTFOUND. + */ + if (ret == DB_NOTFOUND) + ret = 0; + goto err; + } + + if ((ret = __logc_get(logc, &lsn, &rec, DB_CURRENT)) != 0) + goto err; + + (void)__rep_send_message(env, DB_EID_BROADCAST, REP_LOG, &lsn, + &rec, REPCTL_LEASE, 0); + +err: if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __rep_lease_expire - + * Proactively expire all leases granted to us. + * Assume the caller holds the REP_SYSTEM (region) mutex. + * + * PUBLIC: int __rep_lease_expire __P((ENV *)); + */ +int +__rep_lease_expire(env) + ENV *env; +{ + DB_REP *db_rep; + REGINFO *infop; + REP *rep; + REP_LEASE_ENTRY *le, *table; + int ret; + u_int32_t i; + + ret = 0; + db_rep = env->rep_handle; + rep = db_rep->region; + infop = env->reginfo; + + if (rep->lease_off != INVALID_ROFF) { + table = R_ADDR(infop, rep->lease_off); + /* + * Expire all leases forcibly. We are guaranteed that the + * start_time for all leases are not in the future. Therefore, + * set the end_time to the start_time. + */ + for (i = 0; i < rep->config_nsites; i++) { + le = &table[i]; + le->end_time = le->start_time; + } + } + return (ret); +} + +/* + * __rep_lease_waittime - + * Return the amount of time remaining on a granted lease. + * Assume the caller holds the REP_SYSTEM (region) mutex. + * + * PUBLIC: db_timeout_t __rep_lease_waittime __P((ENV *)); + */ +db_timeout_t +__rep_lease_waittime(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + db_timespec exptime, mytime; + db_timeout_t to; + + db_rep = env->rep_handle; + rep = db_rep->region; + exptime = rep->grant_expire; + to = 0; + /* + * If the lease has never been granted, we must wait a full + * lease timeout because we could be freshly rebooted after + * a crash and a lease could be granted from a previous + * incarnation of this client. However, if the lease has never + * been granted, and this client has already waited a full + * lease timeout, we know our lease cannot be granted and there + * is no need to wait again. + */ + RPRINT(env, (env, DB_VERB_REP_LEASE, + "wait_time: grant_expire %lu %lu lease_to %lu", + (u_long)exptime.tv_sec, (u_long)exptime.tv_nsec, + (u_long)rep->lease_timeout)); + if (!timespecisset(&exptime)) { + if (!F_ISSET(rep, REP_F_LEASE_EXPIRED)) + to = rep->lease_timeout; + } else { + __os_gettime(env, &mytime, 1); + RPRINT(env, (env, DB_VERB_REP_LEASE, + "wait_time: mytime %lu %lu, grant_expire %lu %lu", + (u_long)mytime.tv_sec, (u_long)mytime.tv_nsec, + (u_long)exptime.tv_sec, (u_long)exptime.tv_nsec)); + if (timespeccmp(&mytime, &exptime, <=)) { + /* + * If the current time is before the grant expiration + * compute the difference and return remaining grant + * time. + */ + timespecsub(&exptime, &mytime); + DB_TIMESPEC_TO_TIMEOUT(to, &exptime, 1); + } + } + return (to); +} diff --git a/src/rep/rep_log.c b/src/rep/rep_log.c new file mode 100644 index 00000000..77fca9d5 --- /dev/null +++ b/src/rep/rep_log.c @@ -0,0 +1,1044 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" + +static int __rep_chk_newfile __P((ENV *, DB_LOGC *, REP *, + __rep_control_args *, int)); +static int __rep_log_split __P((ENV *, DB_THREAD_INFO *, + __rep_control_args *, DBT *, DB_LSN *, DB_LSN *)); + +/* + * __rep_allreq -- + * Handle a REP_ALL_REQ message. + * + * PUBLIC: int __rep_allreq __P((ENV *, __rep_control_args *, int)); + */ +int +__rep_allreq(env, rp, eid) + ENV *env; + __rep_control_args *rp; + int eid; +{ + DBT data_dbt, newfiledbt; + DB_LOGC *logc; + DB_LSN log_end, oldfilelsn; + DB_REP *db_rep; + REP *rep; + REP_BULK bulk; + REP_THROTTLE repth; + __rep_newfile_args nf_args; + uintptr_t bulkoff; + u_int32_t bulkflags, end_flag, flags, use_bulk; + int arch_flag, ret, t_ret; + u_int8_t buf[__REP_NEWFILE_SIZE]; + size_t len; + + ret = 0; + db_rep = env->rep_handle; + rep = db_rep->region; + end_flag = 0; + arch_flag = 0; + + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + memset(&data_dbt, 0, sizeof(data_dbt)); + /* + * If we're doing bulk transfer, allocate a bulk buffer to put our + * log records in. We still need to initialize the throttle info + * because if we encounter a log record larger than our entire bulk + * buffer, we need to send it as a singleton and also we want to + * support throttling with bulk. + * + * Use a local var so we don't need to worry if someone else turns + * on/off bulk in the middle of our call. + */ + use_bulk = FLD_ISSET(rep->config, REP_C_BULK); + bulk.addr = NULL; + if (use_bulk && (ret = __rep_bulk_alloc(env, &bulk, eid, + &bulkoff, &bulkflags, REP_BULK_LOG)) != 0) + goto err; + memset(&repth, 0, sizeof(repth)); + REP_SYSTEM_LOCK(env); + if ((ret = __rep_lockout_archive(env, rep)) != 0) { + REP_SYSTEM_UNLOCK(env); + goto err; + } + arch_flag = 1; + repth.gbytes = rep->gbytes; + repth.bytes = rep->bytes; + oldfilelsn = repth.lsn = rp->lsn; + repth.type = REP_LOG; + repth.data_dbt = &data_dbt; + REP_SYSTEM_UNLOCK(env); + + /* + * Get the LSN of the end of the log, so that in our reading loop + * (below), we can recognize when we get there, and set the + * REPCTL_LOG_END flag. + */ + if ((ret = __logc_get(logc, &log_end, &data_dbt, DB_LAST)) != 0) { + if (ret == DB_NOTFOUND && F_ISSET(rep, REP_F_MASTER)) + ret = 0; + goto err; + } + + flags = IS_ZERO_LSN(rp->lsn) || + IS_INIT_LSN(rp->lsn) ? DB_FIRST : DB_SET; + /* + * We get the first item so that a client servicing requests + * can distinguish between not having the records and reaching + * the end of its log. Return the DB_NOTFOUND if the client + * cannot get the record. Return 0 if we finish the loop and + * sent all that we have. + */ + ret = __logc_get(logc, &repth.lsn, &data_dbt, flags); + /* + * If the client is asking for all records + * because it doesn't have any, and our first + * record is not in the first log file, then + * the client is outdated and needs to get a + * VERIFY_FAIL. + */ + if (ret == 0 && repth.lsn.file != 1 && flags == DB_FIRST) { + if (F_ISSET(rep, REP_F_CLIENT)) + ret = DB_NOTFOUND; + else + (void)__rep_send_message(env, eid, + REP_VERIFY_FAIL, &repth.lsn, NULL, 0, 0); + goto err; + } + /* + * If we got DB_NOTFOUND it could be because the LSN we were + * given is at the end of the log file and we need to switch + * log files. Reinitialize and get the current record when we return. + */ + if (ret == DB_NOTFOUND) { + ret = __rep_chk_newfile(env, logc, rep, rp, eid); + /* + * If we still get DB_NOTFOUND the client gave us a + * bad or unknown LSN. Ignore it if we're the master. + * Any other error is returned. + */ + if (ret == 0) + ret = __logc_get(logc, &repth.lsn, + &data_dbt, DB_CURRENT); + if (ret == DB_NOTFOUND && F_ISSET(rep, REP_F_MASTER)) { + ret = 0; + goto err; + } + if (ret != 0) + goto err; + } + + /* + * For singleton log records, we break when we get a REP_LOG_MORE. + * Or if we're not using throttling, or we are using bulk, we stop + * when we reach the end (i.e. ret != 0). + */ + for (end_flag = 0; + ret == 0 && repth.type != REP_LOG_MORE && end_flag == 0; + ret = __logc_get(logc, &repth.lsn, &data_dbt, DB_NEXT)) { + /* + * If we just changed log files, we need to send the + * version of this log file to the client. + */ + if (repth.lsn.file != oldfilelsn.file) { + if ((ret = __logc_version(logc, &nf_args.version)) != 0) + break; + memset(&newfiledbt, 0, sizeof(newfiledbt)); + if (rep->version < DB_REPVERSION_47) + DB_INIT_DBT(newfiledbt, &nf_args.version, + sizeof(nf_args.version)); + else { + if ((ret = __rep_newfile_marshal(env, &nf_args, + buf, __REP_NEWFILE_SIZE, &len)) != 0) + goto err; + DB_INIT_DBT(newfiledbt, buf, len); + } + (void)__rep_send_message(env, + eid, REP_NEWFILE, &oldfilelsn, &newfiledbt, + REPCTL_RESEND, 0); + } + + /* + * Mark the end of the ALL_REQ response to show that the + * receiving client should now be "caught up" with the + * replication group. If we're the master, then our log end is + * certainly authoritative. If we're another client, only if we + * ourselves have reached STARTUPDONE. + */ + end_flag = (LOG_COMPARE(&repth.lsn, &log_end) >= 0 && + (F_ISSET(rep, REP_F_MASTER) || + rep->stat.st_startup_complete)) ? + REPCTL_LOG_END : 0; + /* + * If we are configured for bulk, try to send this as a bulk + * request. If not configured, or it is too big for bulk + * then just send normally. + */ + if (use_bulk) + ret = __rep_bulk_message(env, &bulk, &repth, + &repth.lsn, &data_dbt, (REPCTL_RESEND | end_flag)); + if (!use_bulk || ret == DB_REP_BULKOVF) + ret = __rep_send_throttle(env, + eid, &repth, 0, end_flag); + if (ret != 0) + break; + /* + * If we are about to change files, then we'll need the + * last LSN in the previous file. Save it here. + */ + oldfilelsn = repth.lsn; + oldfilelsn.offset += logc->len; + } + + if (ret == DB_NOTFOUND || ret == DB_REP_UNAVAIL) + ret = 0; + /* + * We're done, force out whatever remains in the bulk buffer and + * free it. + */ +err: + /* + * We could have raced an unlink from an earlier log_archive + * and the user is removing the files themselves, now. If + * we get an error indicating the log file might no longer + * exist, ignore it. + */ + if (ret == ENOENT) + ret = 0; + if (bulk.addr != NULL && (t_ret = __rep_bulk_free(env, &bulk, + (REPCTL_RESEND | end_flag))) != 0 && ret == 0 && + t_ret != DB_REP_UNAVAIL) + ret = t_ret; + if (arch_flag) { + REP_SYSTEM_LOCK(env); + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_ARCHIVE); + REP_SYSTEM_UNLOCK(env); + } + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __rep_log -- + * Handle a REP_LOG/REP_LOG_MORE message. + * + * PUBLIC: int __rep_log __P((ENV *, DB_THREAD_INFO *, + * PUBLIC: __rep_control_args *, DBT *, int, time_t, DB_LSN *)); + */ +int +__rep_log(env, ip, rp, rec, eid, savetime, ret_lsnp) + ENV *env; + DB_THREAD_INFO *ip; + __rep_control_args *rp; + DBT *rec; + int eid; + time_t savetime; + DB_LSN *ret_lsnp; +{ + DB_LOG *dblp; + DB_LSN last_lsn, lsn; + DB_REP *db_rep; + LOG *lp; + REP *rep; + int is_dup, master, ret; + u_int32_t gapflags; + + is_dup = ret = 0; + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + ret = __rep_apply(env, ip, rp, rec, ret_lsnp, &is_dup, &last_lsn); + switch (ret) { + /* + * We're in an internal backup and we've gotten + * all the log we need to run recovery. Do so now. + */ + case DB_REP_LOGREADY: + if ((ret = + __rep_logready(env, rep, savetime, &last_lsn)) != 0) + goto out; + break; + /* + * If we get any of the "normal" returns, we only process + * LOG_MORE if this is not a duplicate record. If the + * record is a duplicate we don't want to handle LOG_MORE + * and request a multiple data stream (or trigger internal + * initialization) since this could be a very old record + * that no longer exists on the master. + */ + case DB_REP_ISPERM: + case DB_REP_NOTPERM: + case 0: + if (is_dup) + goto out; + else + break; + /* + * Any other return (errors), we're done. + */ + default: + goto out; + } + if (rp->rectype == REP_LOG_MORE) { + master = rep->master_id; + + /* + * Keep the cycle from stalling: In case we got the LOG_MORE out + * of order, before some preceding log records, we want to make + * sure our follow-up request resumes from where the LOG_MORE + * said it should. (If the preceding log records never arrive, + * normal gap processing should take care of asking for them.) + * But if we already have this record and/or more, we need to + * ask to resume from what we need. The upshot is we need the + * max of lp->lsn and the lsn from the message. + */ + MUTEX_LOCK(env, rep->mtx_clientdb); + lsn = lp->ready_lsn; + if (LOG_COMPARE(&rp->lsn, &lsn) > 0) + lsn = rp->lsn; + + /* + * If the master_id is invalid, this means that since + * the last record was sent, somebody declared an + * election and we may not have a master to request + * things of. + * + * This is not an error; when we find a new master, + * we'll re-negotiate where the end of the log is and + * try to bring ourselves up to date again anyway. + */ + if (master == DB_EID_INVALID) { + ret = 0; + MUTEX_UNLOCK(env, rep->mtx_clientdb); + goto out; + } + /* + * If we're waiting for records, set the wait_ts + * high so that we avoid re-requesting too soon and + * end up with multiple data streams. + */ + if (IS_ZERO_LSN(lp->waiting_lsn)) + lp->wait_ts = rep->max_gap; + /* + * If preceding log records were from the master, send the + * request for further log records to the master instead of + * allowing it to default to ANYWHERE. + */ + gapflags = REP_GAP_FORCE; + if (master == eid) + gapflags = gapflags | REP_GAP_REREQUEST; + ret = __rep_loggap_req(env, rep, &lsn, gapflags); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + } +out: + return (ret); +} + +/* + * __rep_bulk_log -- + * Handle a REP_BULK_LOG message. + * + * PUBLIC: int __rep_bulk_log __P((ENV *, DB_THREAD_INFO *, + * PUBLIC: __rep_control_args *, DBT *, time_t, DB_LSN *)); + */ +int +__rep_bulk_log(env, ip, rp, rec, savetime, ret_lsnp) + ENV *env; + DB_THREAD_INFO *ip; + __rep_control_args *rp; + DBT *rec; + time_t savetime; + DB_LSN *ret_lsnp; +{ + DB_LSN last_lsn; + DB_REP *db_rep; + REP *rep; + int ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + + ret = __rep_log_split(env, ip, rp, rec, ret_lsnp, &last_lsn); + switch (ret) { + /* + * We're in an internal backup and we've gotten + * all the log we need to run recovery. Do so now. + */ + case DB_REP_LOGREADY: + ret = __rep_logready(env, rep, savetime, &last_lsn); + break; + /* + * Any other return (errors), we're done. + */ + default: + break; + } + return (ret); +} + +/* + * __rep_log_split -- + * - Split a log buffer into individual records. + * + * This is used by a client to process a bulk log message from the + * master and convert it into individual __rep_apply requests. + */ +static int +__rep_log_split(env, ip, rp, rec, ret_lsnp, last_lsnp) + ENV *env; + DB_THREAD_INFO *ip; + __rep_control_args *rp; + DBT *rec; + DB_LSN *ret_lsnp; + DB_LSN *last_lsnp; +{ + DBT logrec; + DB_LSN next_new_lsn, save_lsn, tmp_lsn; + __rep_control_args tmprp; + __rep_bulk_args b_args; + int is_dup, ret, save_ret; + u_int32_t save_flags; + u_int8_t *p, *ep; + + memset(&logrec, 0, sizeof(logrec)); + ZERO_LSN(next_new_lsn); + ZERO_LSN(save_lsn); + ZERO_LSN(tmp_lsn); + /* + * We're going to be modifying the rp LSN contents so make + * our own private copy to play with. + */ + memcpy(&tmprp, rp, sizeof(tmprp)); + /* + * We send the bulk buffer on a PERM record, so often we will have + * DB_LOG_PERM set. However, we only want to mark the last LSN + * we have as a PERM record. So clear it here, and when we're on + * the last record below, set it. The same applies if the sender + * set REPCTL_LOG_END on this message. We want the end of the + * bulk buffer to be marked as the end. + */ + save_flags = F_ISSET(rp, REPCTL_LOG_END | REPCTL_PERM); + F_CLR(&tmprp, REPCTL_LOG_END | REPCTL_PERM); + is_dup = ret = save_ret = 0; + for (ep = (u_int8_t *)rec->data + rec->size, p = (u_int8_t *)rec->data; + p < ep; ) { + /* + * First thing in the buffer is the length. Then the LSN + * of this record, then the record itself. + */ + if (rp->rep_version < DB_REPVERSION_47) { + memcpy(&b_args.len, p, sizeof(b_args.len)); + p += sizeof(b_args.len); + memcpy(&tmprp.lsn, p, sizeof(DB_LSN)); + p += sizeof(DB_LSN); + logrec.data = p; + logrec.size = b_args.len; + p += b_args.len; + } else { + if ((ret = __rep_bulk_unmarshal(env, + &b_args, p, rec->size, &p)) != 0) + return (ret); + tmprp.lsn = b_args.lsn; + logrec.data = b_args.bulkdata.data; + logrec.size = b_args.len; + } + VPRINT(env, (env, DB_VERB_REP_MISC, + "log_rep_split: Processing LSN [%lu][%lu]", + (u_long)tmprp.lsn.file, (u_long)tmprp.lsn.offset)); + VPRINT(env, (env, DB_VERB_REP_MISC, + "log_rep_split: p %#lx ep %#lx logrec data %#lx, size %lu (%#lx)", + P_TO_ULONG(p), P_TO_ULONG(ep), P_TO_ULONG(logrec.data), + (u_long)logrec.size, (u_long)logrec.size)); + if (p >= ep && save_flags) + F_SET(&tmprp, save_flags); + /* + * A previous call to __rep_apply indicated an earlier + * record is a dup and the next_new_lsn we are waiting for. + * Skip log records until we catch up with next_new_lsn. + */ + if (is_dup && LOG_COMPARE(&tmprp.lsn, &next_new_lsn) < 0) { + VPRINT(env, (env, DB_VERB_REP_MISC, + "log_split: Skip dup LSN [%lu][%lu]", + (u_long)tmprp.lsn.file, (u_long)tmprp.lsn.offset)); + continue; + } + is_dup = 0; + ret = __rep_apply(env, ip, + &tmprp, &logrec, &tmp_lsn, &is_dup, last_lsnp); + VPRINT(env, (env, DB_VERB_REP_MISC, + "log_split: rep_apply ret %d, dup %d, tmp_lsn [%lu][%lu]", + ret, is_dup, (u_long)tmp_lsn.file, (u_long)tmp_lsn.offset)); + if (is_dup) + next_new_lsn = tmp_lsn; + switch (ret) { + /* + * If we received the pieces we need for running recovery, + * short-circuit because recovery will truncate the log to + * the LSN we want anyway. + */ + case DB_REP_LOGREADY: + goto out; + /* + * If we just handled a special record, retain that information. + */ + case DB_REP_ISPERM: + case DB_REP_NOTPERM: + save_ret = ret; + save_lsn = tmp_lsn; + ret = 0; + break; + /* + * Normal processing, do nothing, just continue. + */ + case 0: + break; + /* + * If we get an error, then stop immediately. + */ + default: + goto out; + } + } +out: + /* + * If we finish processing successfully, set our return values + * based on what we saw. + */ + if (ret == 0) { + ret = save_ret; + *ret_lsnp = save_lsn; + } + return (ret); +} + +/* + * __rep_log_req -- + * Handle a REP_LOG_REQ message. + * + * PUBLIC: int __rep_logreq __P((ENV *, __rep_control_args *, DBT *, int)); + */ +int +__rep_logreq(env, rp, rec, eid) + ENV *env; + __rep_control_args *rp; + DBT *rec; + int eid; +{ + DBT data_dbt, newfiledbt; + DB_LOGC *logc; + DB_LSN firstlsn, lsn, oldfilelsn; + DB_REP *db_rep; + REP *rep; + REP_BULK bulk; + REP_THROTTLE repth; + __rep_logreq_args lr_args; + __rep_newfile_args nf_args; + uintptr_t bulkoff; + u_int32_t bulkflags, use_bulk; + int count, ret, t_ret; + u_int8_t buf[__REP_NEWFILE_SIZE]; + size_t len; + + ret = 0; + db_rep = env->rep_handle; + rep = db_rep->region; + + /* COMPQUIET_LSN is what this is... */ + ZERO_LSN(lr_args.endlsn); + + if (rec != NULL && rec->size != 0) { + if (rp->rep_version < DB_REPVERSION_47) + lr_args.endlsn = *(DB_LSN *)rec->data; + else if ((ret = __rep_logreq_unmarshal(env, &lr_args, + rec->data, rec->size, NULL)) != 0) + return (ret); + RPRINT(env, (env, DB_VERB_REP_MISC, + "[%lu][%lu]: LOG_REQ max lsn: [%lu][%lu]", + (u_long) rp->lsn.file, (u_long)rp->lsn.offset, + (u_long)lr_args.endlsn.file, + (u_long)lr_args.endlsn.offset)); + } + /* + * There are several different cases here. + * 1. We asked logc_get for a particular LSN and got it. + * 2. We asked logc_get for an LSN and it's not found because it is + * beyond the end of a log file and we need a NEWFILE msg. + * and then the record that was requested. + * 3. We asked logc_get for an LSN and it is already archived. + * 4. We asked logc_get for an LSN and it simply doesn't exist, but + * doesn't meet any of those other criteria, in which case + * it's an error (that should never happen on a master). + * + * If we have a valid LSN and the request has a data_dbt with + * it, the sender is asking for a chunk of log records. + * Then we need to send all records up to the LSN in the data dbt. + */ + memset(&data_dbt, 0, sizeof(data_dbt)); + oldfilelsn = lsn = rp->lsn; + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + REP_SYSTEM_LOCK(env); + if ((ret = __rep_lockout_archive(env, rep)) != 0) { + REP_SYSTEM_UNLOCK(env); + goto err; + } + REP_SYSTEM_UNLOCK(env); + if ((ret = __logc_get(logc, &lsn, &data_dbt, DB_SET)) == 0) { + /* Case 1 */ + (void)__rep_send_message(env, + eid, REP_LOG, &lsn, &data_dbt, REPCTL_RESEND, 0); + oldfilelsn.offset += logc->len; + } else if (ret == DB_NOTFOUND) { + /* + * If logc_get races with log_archive, it might return + * DB_NOTFOUND. We expect there to be some log record + * that is the first one. Loop until we either get + * a log record or some error. Since we only expect + * to get this racing log_archive, bound it to a few + * tries. + */ + count = 0; + do { + ret = __logc_get(logc, &firstlsn, &data_dbt, DB_FIRST); + count++; + } while (ret == DB_NOTFOUND && count < 10); + if (ret != 0) + goto err; + if (LOG_COMPARE(&firstlsn, &rp->lsn) > 0) { + /* Case 3 */ + if (F_ISSET(rep, REP_F_CLIENT)) { + ret = DB_NOTFOUND; + goto err; + } + (void)__rep_send_message(env, eid, + REP_VERIFY_FAIL, &rp->lsn, NULL, 0, 0); + ret = 0; + goto err; + } + ret = __rep_chk_newfile(env, logc, rep, rp, eid); + if (ret == DB_NOTFOUND) { + /* Case 4 */ + /* + * If we still get DB_NOTFOUND the client gave us an + * unknown LSN, perhaps at the end of the log. Ignore + * it if we're the master. Return DB_NOTFOUND if + * we are the client. + */ + if (F_ISSET(rep, REP_F_MASTER)) { + __db_errx(env, DB_STR_A("3501", + "Request for LSN [%lu][%lu] not found", + "%lu %lu"), (u_long)rp->lsn.file, + (u_long)rp->lsn.offset); + ret = 0; + goto err; + } else + ret = DB_NOTFOUND; + } + } + + if (ret != 0) + goto err; + + /* + * If the user requested a gap, send the whole thing, while observing + * the limits from rep_set_limit. + * + * If we're doing bulk transfer, allocate a bulk buffer to put our + * log records in. We still need to initialize the throttle info + * because if we encounter a log record larger than our entire bulk + * buffer, we need to send it as a singleton. + * + * Use a local var so we don't need to worry if someone else turns + * on/off bulk in the middle of our call. + */ + use_bulk = FLD_ISSET(rep->config, REP_C_BULK); + if (use_bulk && (ret = __rep_bulk_alloc(env, &bulk, eid, + &bulkoff, &bulkflags, REP_BULK_LOG)) != 0) + goto err; + memset(&repth, 0, sizeof(repth)); + REP_SYSTEM_LOCK(env); + repth.gbytes = rep->gbytes; + repth.bytes = rep->bytes; + repth.type = REP_LOG; + repth.data_dbt = &data_dbt; + REP_SYSTEM_UNLOCK(env); + while (ret == 0 && rec != NULL && rec->size != 0 && + repth.type == REP_LOG) { + if ((ret = + __logc_get(logc, &repth.lsn, &data_dbt, DB_NEXT)) != 0) { + /* + * If we're a client and we only have part of the gap, + * return DB_NOTFOUND so that we send a REREQUEST + * back to the requester and it can ask for more. + */ + if (ret == DB_NOTFOUND && F_ISSET(rep, REP_F_MASTER)) + ret = 0; + break; + } + if (LOG_COMPARE(&repth.lsn, &lr_args.endlsn) >= 0) + break; + if (repth.lsn.file != oldfilelsn.file) { + if ((ret = __logc_version(logc, &nf_args.version)) != 0) + break; + memset(&newfiledbt, 0, sizeof(newfiledbt)); + if (rep->version < DB_REPVERSION_47) + DB_INIT_DBT(newfiledbt, &nf_args.version, + sizeof(nf_args.version)); + else { + if ((ret = __rep_newfile_marshal(env, &nf_args, + buf, __REP_NEWFILE_SIZE, &len)) != 0) + goto err; + DB_INIT_DBT(newfiledbt, buf, len); + } + (void)__rep_send_message(env, + eid, REP_NEWFILE, &oldfilelsn, &newfiledbt, + REPCTL_RESEND, 0); + } + /* + * If we are configured for bulk, try to send this as a bulk + * request. If not configured, or it is too big for bulk + * then just send normally. + */ + if (use_bulk) + ret = __rep_bulk_message(env, &bulk, &repth, + &repth.lsn, &data_dbt, REPCTL_RESEND); + if (!use_bulk || ret == DB_REP_BULKOVF) + ret = __rep_send_throttle(env, eid, &repth, 0, 0); + if (ret != 0) { + /* Ignore send failure, except to break the loop. */ + if (ret == DB_REP_UNAVAIL) + ret = 0; + break; + } + /* + * If we are about to change files, then we'll need the + * last LSN in the previous file. Save it here. + */ + oldfilelsn = repth.lsn; + oldfilelsn.offset += logc->len; + } + + /* + * We're done, force out whatever remains in the bulk buffer and + * free it. + */ + if (use_bulk && (t_ret = __rep_bulk_free(env, &bulk, + REPCTL_RESEND)) != 0 && ret == 0 && + t_ret != DB_REP_UNAVAIL) + ret = t_ret; +err: + /* + * We could have raced an unlink from an earlier log_archive + * and the user is removing the files themselves, now. If + * we get an error indicating the log file might no longer + * exist, ignore it. + */ + if (ret == ENOENT) + ret = 0; + REP_SYSTEM_LOCK(env); + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_ARCHIVE); + REP_SYSTEM_UNLOCK(env); + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __rep_loggap_req - + * Request a log gap. Assumes the caller holds the REP->mtx_clientdb. + * + * lsnp is the current LSN we're handling. It is used to help decide + * if we ask for a gap or singleton. + * gapflags are flags that may override the algorithm or control the + * processing in some way. + * + * PUBLIC: int __rep_loggap_req __P((ENV *, REP *, DB_LSN *, u_int32_t)); + */ +int +__rep_loggap_req(env, rep, lsnp, gapflags) + ENV *env; + REP *rep; + DB_LSN *lsnp; + u_int32_t gapflags; +{ + DBT max_lsn_dbt, *max_lsn_dbtp; + DB_LOG *dblp; + DB_LSN next_lsn; + LOG *lp; + __rep_logreq_args lr_args; + size_t len; + u_int32_t ctlflags, flags, type; + int master, ret; + u_int8_t buf[__REP_LOGREQ_SIZE]; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + if (FLD_ISSET(gapflags, REP_GAP_FORCE)) + next_lsn = *lsnp; + else + next_lsn = lp->ready_lsn; + ctlflags = flags = 0; + type = REP_LOG_REQ; + ret = 0; + + /* + * Check if we need to ask for the gap. + * We ask for the gap if: + * We are forced to with gapflags. + * If max_wait_lsn is ZERO_LSN - we've never asked for + * records before. + * If we asked for a single record and received it. + * + * If we want a gap, but don't have an ending LSN (waiting_lsn) + * send an ALL_REQ. This is primarily used by REP_REREQUEST when + * an ALL_REQ was not able to be fulfilled by another client. + */ + if (FLD_ISSET(gapflags, (REP_GAP_FORCE | REP_GAP_REREQUEST)) || + IS_ZERO_LSN(lp->max_wait_lsn) || + (lsnp != NULL && LOG_COMPARE(lsnp, &lp->max_wait_lsn) == 0)) { + lp->max_wait_lsn = lp->waiting_lsn; + /* + * In SYNC_LOG, make sure max_wait_lsn is set to avoid sending + * an ALL_REQ that could create an unnecessary dual data stream. + */ + if (rep->sync_state == SYNC_LOG && + IS_ZERO_LSN(lp->max_wait_lsn)) + lp->max_wait_lsn = rep->last_lsn; + /* + * If we are forcing a gap, we need to send a max_wait_lsn + * that may be beyond the current gap/waiting_lsn (but + * it may not be). If we cannot determine any future + * waiting LSN, then it should be zero. If we're in + * internal init, it should be our ending LSN. + */ + if (FLD_ISSET(gapflags, REP_GAP_FORCE)) { + if (LOG_COMPARE(&lp->max_wait_lsn, lsnp) <= 0) { + if (rep->sync_state == SYNC_LOG) { + DB_ASSERT(env, LOG_COMPARE(lsnp, + &rep->last_lsn) <= 0); + lp->max_wait_lsn = rep->last_lsn; + } else + ZERO_LSN(lp->max_wait_lsn); + } + } + if (IS_ZERO_LSN(lp->max_wait_lsn)) + type = REP_ALL_REQ; + memset(&max_lsn_dbt, 0, sizeof(max_lsn_dbt)); + lr_args.endlsn = lp->max_wait_lsn; + if (rep->version < DB_REPVERSION_47) + DB_INIT_DBT(max_lsn_dbt, &lp->max_wait_lsn, + sizeof(DB_LSN)); + else { + if ((ret = __rep_logreq_marshal(env, &lr_args, buf, + __REP_LOGREQ_SIZE, &len)) != 0) + goto err; + DB_INIT_DBT(max_lsn_dbt, buf, len); + } + max_lsn_dbtp = &max_lsn_dbt; + /* + * Gap requests are "new" and can go anywhere, unless + * this is already a re-request. + */ + if (FLD_ISSET(gapflags, REP_GAP_REREQUEST)) + flags = DB_REP_REREQUEST; + else + flags = DB_REP_ANYWHERE; + } else { + max_lsn_dbtp = NULL; + lp->max_wait_lsn = next_lsn; + /* + * If we're dropping to singletons, this is a re-request. + */ + flags = DB_REP_REREQUEST; + } + if ((master = rep->master_id) != DB_EID_INVALID) { + STAT_INC(env, + rep, log_request, rep->stat.st_log_requested, master); + if (rep->sync_state == SYNC_LOG) + ctlflags = REPCTL_INIT; + (void)__rep_send_message(env, master, + type, &next_lsn, max_lsn_dbtp, ctlflags, flags); + } else + (void)__rep_send_message(env, DB_EID_BROADCAST, + REP_MASTER_REQ, NULL, NULL, 0, 0); +err: + return (ret); +} + +/* + * __rep_logready - + * Handle getting back REP_LOGREADY. Any call to __rep_apply + * can return it. + * + * PUBLIC: int __rep_logready __P((ENV *, REP *, time_t, DB_LSN *)); + */ +int +__rep_logready(env, rep, savetime, last_lsnp) + ENV *env; + REP *rep; + time_t savetime; + DB_LSN *last_lsnp; +{ + REGENV *renv; + REGINFO *infop; + int ret; + + infop = env->reginfo; + renv = infop->primary; + if ((ret = __log_flush(env, NULL)) != 0) + goto err; + if ((ret = __rep_verify_match(env, last_lsnp, savetime)) != 0) + goto err; + + REP_SYSTEM_LOCK(env); + ZERO_LSN(rep->first_lsn); + + if (rep->originfo_off != INVALID_ROFF) { + MUTEX_LOCK(env, renv->mtx_regenv); + __env_alloc_free(infop, R_ADDR(infop, rep->originfo_off)); + MUTEX_UNLOCK(env, renv->mtx_regenv); + rep->originfo_off = INVALID_ROFF; + } + + rep->sync_state = SYNC_OFF; + F_SET(rep, REP_F_NIMDBS_LOADED); + ret = __rep_notify_threads(env, AWAIT_NIMDB); + REP_SYSTEM_UNLOCK(env); + if (ret != 0) + goto err; + + return (0); + +err: + DB_ASSERT(env, ret != DB_REP_WOULDROLLBACK); + __db_errx(env, DB_STR("3502", + "Client initialization failed. Need to manually restore client")); + return (__env_panic(env, ret)); +} + +/* + * __rep_chk_newfile -- + * Determine if getting DB_NOTFOUND is because we're at the + * end of a log file and need to send a NEWFILE message. + * + * This function handles these cases: + * [Case 1 was that we found the record we were looking for - it + * is already handled by the caller.] + * 2. We asked logc_get for an LSN and it's not found because it is + * beyond the end of a log file and we need a NEWFILE msg. + * 3. We asked logc_get for an LSN and it simply doesn't exist, but + * doesn't meet any of those other criteria, in which case + * we return DB_NOTFOUND and the caller decides if it's an error. + * + * This function returns 0 if we had to send a message and the bad + * LSN is dealt with and DB_NOTFOUND if this really is an unknown LSN + * (on a client) and errors if it isn't found on the master. + */ +static int +__rep_chk_newfile(env, logc, rep, rp, eid) + ENV *env; + DB_LOGC *logc; + REP *rep; + __rep_control_args *rp; + int eid; +{ + DBT data_dbt, newfiledbt; + DB_LOG *dblp; + DB_LSN endlsn; + LOG *lp; + __rep_newfile_args nf_args; + int ret; + u_int8_t buf[__REP_NEWFILE_SIZE]; + size_t len; + + ret = 0; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + memset(&data_dbt, 0, sizeof(data_dbt)); + LOG_SYSTEM_LOCK(env); + endlsn = lp->lsn; + LOG_SYSTEM_UNLOCK(env); + if (endlsn.file > rp->lsn.file) { + /* + * Case 2: + * Need to find the LSN of the last record in + * file lsn.file so that we can send it with + * the NEWFILE call. In order to do that, we + * need to try to get {lsn.file + 1, 0} and + * then backup. + */ + endlsn.file = rp->lsn.file + 1; + endlsn.offset = 0; + if ((ret = __logc_get(logc, + &endlsn, &data_dbt, DB_SET)) != 0 || + (ret = __logc_get(logc, + &endlsn, &data_dbt, DB_PREV)) != 0) { + RPRINT(env, (env, DB_VERB_REP_MISC, + "Unable to get prev of [%lu][%lu]", + (u_long)rp->lsn.file, + (u_long)rp->lsn.offset)); + /* + * We want to push the error back + * to the client so that the client + * does an internal backup. The + * client asked for a log record + * we no longer have and it is + * outdated. + * XXX - This could be optimized by + * having the master perform and + * send a REP_UPDATE message. We + * currently want the client to set + * up its 'update' state prior to + * requesting REP_UPDATE_REQ. + * + * If we're a client servicing a request + * just return DB_NOTFOUND. + */ + if (F_ISSET(rep, REP_F_MASTER)) { + ret = 0; + (void)__rep_send_message(env, eid, + REP_VERIFY_FAIL, &rp->lsn, + NULL, 0, 0); + } else + ret = DB_NOTFOUND; + } else { + endlsn.offset += logc->len; + if ((ret = __logc_version(logc, + &nf_args.version)) == 0) { + memset(&newfiledbt, 0, + sizeof(newfiledbt)); + if (rep->version < DB_REPVERSION_47) + DB_INIT_DBT(newfiledbt, + &nf_args.version, + sizeof(nf_args.version)); + else { + if ((ret = __rep_newfile_marshal(env, + &nf_args, buf, __REP_NEWFILE_SIZE, + &len)) != 0) + return (ret); + DB_INIT_DBT(newfiledbt, buf, len); + } + (void)__rep_send_message(env, eid, + REP_NEWFILE, &endlsn, + &newfiledbt, REPCTL_RESEND, 0); + } + } + } else + ret = DB_NOTFOUND; + + return (ret); +} diff --git a/src/rep/rep_method.c b/src/rep/rep_method.c new file mode 100644 index 00000000..9559711c --- /dev/null +++ b/src/rep/rep_method.c @@ -0,0 +1,3027 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __rep_abort_prepared __P((ENV *)); +static int __rep_await_condition __P((ENV *, + struct rep_waitgoal *, db_timeout_t)); +static int __rep_bt_cmp __P((DB *, const DBT *, const DBT *)); +static int __rep_check_applied __P((ENV *, + DB_THREAD_INFO *, DB_COMMIT_INFO *, struct rep_waitgoal *)); +static void __rep_config_map __P((ENV *, u_int32_t *, u_int32_t *)); +static u_int32_t __rep_conv_vers __P((ENV *, u_int32_t)); +static int __rep_read_lsn_history __P((ENV *, + DB_THREAD_INFO *, DB_TXN **, DBC **, u_int32_t, + __rep_lsn_hist_data_args *, struct rep_waitgoal *, u_int32_t)); +static int __rep_restore_prepared __P((ENV *)); +static int __rep_save_lsn_hist __P((ENV *, DB_THREAD_INFO *, DB_LSN *)); +/* + * __rep_env_create -- + * Replication-specific initialization of the ENV structure. + * + * PUBLIC: int __rep_env_create __P((DB_ENV *)); + */ +int +__rep_env_create(dbenv) + DB_ENV *dbenv; +{ + DB_REP *db_rep; + ENV *env; + int ret; + + env = dbenv->env; + + if ((ret = __os_calloc(env, 1, sizeof(DB_REP), &db_rep)) != 0) + return (ret); + + db_rep->eid = DB_EID_INVALID; + db_rep->bytes = REP_DEFAULT_THROTTLE; + DB_TIMEOUT_TO_TIMESPEC(DB_REP_REQUEST_GAP, &db_rep->request_gap); + DB_TIMEOUT_TO_TIMESPEC(DB_REP_MAX_GAP, &db_rep->max_gap); + db_rep->elect_timeout = 2 * US_PER_SEC; /* 2 seconds */ + db_rep->chkpt_delay = 30 * US_PER_SEC; /* 30 seconds */ + db_rep->my_priority = DB_REP_DEFAULT_PRIORITY; + /* + * Make no clock skew the default. Setting both fields + * to the same non-zero value means no skew. + */ + db_rep->clock_skew = 1; + db_rep->clock_base = 1; + FLD_SET(db_rep->config, REP_C_AUTOINIT); + FLD_SET(db_rep->config, REP_C_AUTOROLLBACK); + + /* + * Turn on system messages by default. + */ + FLD_SET(dbenv->verbose, DB_VERB_REP_SYSTEM); + +#ifdef HAVE_REPLICATION_THREADS + if ((ret = __repmgr_env_create(env, db_rep)) != 0) { + __os_free(env, db_rep); + return (ret); + } +#endif + + env->rep_handle = db_rep; + return (0); +} + +/* + * __rep_env_destroy -- + * Replication-specific destruction of the ENV structure. + * + * PUBLIC: void __rep_env_destroy __P((DB_ENV *)); + */ +void +__rep_env_destroy(dbenv) + DB_ENV *dbenv; +{ + ENV *env; + + env = dbenv->env; + + if (env->rep_handle != NULL) { +#ifdef HAVE_REPLICATION_THREADS + __repmgr_env_destroy(env, env->rep_handle); +#endif + __os_free(env, env->rep_handle); + env->rep_handle = NULL; + } +} + +/* + * __rep_get_config -- + * Return the replication subsystem configuration. + * + * PUBLIC: int __rep_get_config __P((DB_ENV *, u_int32_t, int *)); + */ +int +__rep_get_config(dbenv, which, onp) + DB_ENV *dbenv; + u_int32_t which; + int *onp; +{ + DB_REP *db_rep; + ENV *env; + REP *rep; + u_int32_t mapped; + + env = dbenv->env; + +#undef OK_FLAGS +#define OK_FLAGS \ + (DB_REP_CONF_AUTOINIT | DB_REP_CONF_AUTOROLLBACK | \ + DB_REP_CONF_BULK | DB_REP_CONF_DELAYCLIENT | DB_REP_CONF_INMEM | \ + DB_REP_CONF_LEASE | DB_REP_CONF_NOWAIT | \ + DB_REPMGR_CONF_2SITE_STRICT | DB_REPMGR_CONF_ELECTIONS) + + if (FLD_ISSET(which, ~OK_FLAGS)) + return (__db_ferr(env, "DB_ENV->rep_get_config", 0)); + + db_rep = env->rep_handle; + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_get_config", DB_INIT_REP); + + mapped = 0; + __rep_config_map(env, &which, &mapped); + if (REP_ON(env)) { + rep = db_rep->region; + if (FLD_ISSET(rep->config, mapped)) + *onp = 1; + else + *onp = 0; + } else { + if (FLD_ISSET(db_rep->config, mapped)) + *onp = 1; + else + *onp = 0; + } + return (0); +} + +/* + * __rep_set_config -- + * Configure the replication subsystem. + * + * PUBLIC: int __rep_set_config __P((DB_ENV *, u_int32_t, int)); + */ +int +__rep_set_config(dbenv, which, on) + DB_ENV *dbenv; + u_int32_t which; + int on; +{ + DB_LOG *dblp; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + LOG *lp; + REP *rep; + REP_BULK bulk; + u_int32_t mapped, orig; + int ret, t_ret; + + env = dbenv->env; + db_rep = env->rep_handle; + ret = 0; + +#undef OK_FLAGS +#define OK_FLAGS \ + (DB_REP_CONF_AUTOINIT | DB_REP_CONF_AUTOROLLBACK | \ + DB_REP_CONF_BULK | DB_REP_CONF_DELAYCLIENT | DB_REP_CONF_INMEM | \ + DB_REP_CONF_LEASE | DB_REP_CONF_NOWAIT | \ + DB_REPMGR_CONF_2SITE_STRICT | DB_REPMGR_CONF_ELECTIONS) +#define REPMGR_FLAGS (REP_C_2SITE_STRICT | REP_C_ELECTIONS) + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_set_config", DB_INIT_REP); + + if (FLD_ISSET(which, ~OK_FLAGS)) + return (__db_ferr(env, "DB_ENV->rep_set_config", 0)); + + mapped = 0; + __rep_config_map(env, &which, &mapped); + + if (APP_IS_BASEAPI(env) && FLD_ISSET(mapped, REPMGR_FLAGS)) { + __db_errx(env, DB_STR_A("3548", + "%s cannot configure repmgr settings from base replication application", + "%s"), "DB_ENV->rep_set_config:"); + return (EINVAL); + } + + if (REP_ON(env)) { +#ifdef HAVE_REPLICATION_THREADS + if ((ret = __repmgr_valid_config(env, mapped)) != 0) + return (ret); +#endif + + ENV_ENTER(env, ip); + + rep = db_rep->region; + /* + * In-memory replication must be called before calling + * env->open. If it is turned on and off before env->open, + * it doesn't matter. Any attempt to turn it on or off after + * env->open is intercepted by this error. + */ + if (FLD_ISSET(mapped, REP_C_INMEM)) { + __db_errx(env, DB_STR_A("3549", +"%s in-memory replication must be configured before DB_ENV->open", + "%s"), "DB_ENV->rep_set_config:"); + ENV_LEAVE(env, ip); + return (EINVAL); + } + /* + * Leases must be turned on before calling rep_start. + * Leases can never be turned off once they're turned on. + */ + if (FLD_ISSET(mapped, REP_C_LEASE)) { + if (F_ISSET(rep, REP_F_START_CALLED)) { + __db_errx(env, DB_STR("3550", + "DB_ENV->rep_set_config: leases must be " + "configured before DB_ENV->rep_start")); + ret = EINVAL; + } + if (on == 0) { + __db_errx(env, DB_STR("3551", + "DB_ENV->rep_set_config: leases cannot be turned off")); + ret = EINVAL; + } + if (ret != 0) { + ENV_LEAVE(env, ip); + return (ret); + } + } + MUTEX_LOCK(env, rep->mtx_clientdb); + REP_SYSTEM_LOCK(env); + orig = rep->config; + if (on) + FLD_SET(rep->config, mapped); + else + FLD_CLR(rep->config, mapped); + + /* + * Bulk transfer requires special processing if it is getting + * toggled. + */ + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + if (FLD_ISSET(rep->config, REP_C_BULK) && + !FLD_ISSET(orig, REP_C_BULK)) + db_rep->bulk = R_ADDR(&dblp->reginfo, lp->bulk_buf); + REP_SYSTEM_UNLOCK(env); + + /* + * If turning bulk off and it was on, send out whatever is in + * the buffer already. + */ + if (FLD_ISSET(orig, REP_C_BULK) && + !FLD_ISSET(rep->config, REP_C_BULK) && lp->bulk_off != 0) { + memset(&bulk, 0, sizeof(bulk)); + if (db_rep->bulk == NULL) + bulk.addr = + R_ADDR(&dblp->reginfo, lp->bulk_buf); + else + bulk.addr = db_rep->bulk; + bulk.offp = &lp->bulk_off; + bulk.len = lp->bulk_len; + bulk.type = REP_BULK_LOG; + bulk.eid = DB_EID_BROADCAST; + bulk.flagsp = &lp->bulk_flags; + ret = __rep_send_bulk(env, &bulk, 0); + } + MUTEX_UNLOCK(env, rep->mtx_clientdb); + + ENV_LEAVE(env, ip); + +#ifdef HAVE_REPLICATION_THREADS + /* + * If turning ELECTIONS on, and it was off, check whether we + * need to start an election immediately. + */ + if (!FLD_ISSET(orig, REP_C_ELECTIONS) && + FLD_ISSET(rep->config, REP_C_ELECTIONS) && + (t_ret = __repmgr_turn_on_elections(env)) != 0 && ret == 0) + ret = t_ret; +#endif + } else { + if (on) + FLD_SET(db_rep->config, mapped); + else + FLD_CLR(db_rep->config, mapped); + } + /* Configuring 2SITE_STRICT, etc. makes this a repmgr application */ + if (ret == 0 && FLD_ISSET(mapped, REPMGR_FLAGS)) + APP_SET_REPMGR(env); + return (ret); +} + +static void +__rep_config_map(env, inflagsp, outflagsp) + ENV *env; + u_int32_t *inflagsp, *outflagsp; +{ + COMPQUIET(env, NULL); + + if (FLD_ISSET(*inflagsp, DB_REP_CONF_AUTOINIT)) { + FLD_SET(*outflagsp, REP_C_AUTOINIT); + FLD_CLR(*inflagsp, DB_REP_CONF_AUTOINIT); + } + if (FLD_ISSET(*inflagsp, DB_REP_CONF_AUTOROLLBACK)) { + FLD_SET(*outflagsp, REP_C_AUTOROLLBACK); + FLD_CLR(*inflagsp, DB_REP_CONF_AUTOROLLBACK); + } + if (FLD_ISSET(*inflagsp, DB_REP_CONF_BULK)) { + FLD_SET(*outflagsp, REP_C_BULK); + FLD_CLR(*inflagsp, DB_REP_CONF_BULK); + } + if (FLD_ISSET(*inflagsp, DB_REP_CONF_DELAYCLIENT)) { + FLD_SET(*outflagsp, REP_C_DELAYCLIENT); + FLD_CLR(*inflagsp, DB_REP_CONF_DELAYCLIENT); + } + if (FLD_ISSET(*inflagsp, DB_REP_CONF_INMEM)) { + FLD_SET(*outflagsp, REP_C_INMEM); + FLD_CLR(*inflagsp, DB_REP_CONF_INMEM); + } + if (FLD_ISSET(*inflagsp, DB_REP_CONF_LEASE)) { + FLD_SET(*outflagsp, REP_C_LEASE); + FLD_CLR(*inflagsp, DB_REP_CONF_LEASE); + } + if (FLD_ISSET(*inflagsp, DB_REP_CONF_NOWAIT)) { + FLD_SET(*outflagsp, REP_C_NOWAIT); + FLD_CLR(*inflagsp, DB_REP_CONF_NOWAIT); + } + if (FLD_ISSET(*inflagsp, DB_REPMGR_CONF_2SITE_STRICT)) { + FLD_SET(*outflagsp, REP_C_2SITE_STRICT); + FLD_CLR(*inflagsp, DB_REPMGR_CONF_2SITE_STRICT); + } + if (FLD_ISSET(*inflagsp, DB_REPMGR_CONF_ELECTIONS)) { + FLD_SET(*outflagsp, REP_C_ELECTIONS); + FLD_CLR(*inflagsp, DB_REPMGR_CONF_ELECTIONS); + } + DB_ASSERT(env, *inflagsp == 0); +} + +/* + * __rep_start_pp -- + * Become a master or client, and start sending messages to participate + * in the replication environment. Must be called after the environment + * is open. + * + * PUBLIC: int __rep_start_pp __P((DB_ENV *, DBT *, u_int32_t)); + */ +int +__rep_start_pp(dbenv, dbt, flags) + DB_ENV *dbenv; + DBT *dbt; + u_int32_t flags; +{ + DB_REP *db_rep; + ENV *env; + + env = dbenv->env; + db_rep = env->rep_handle; + + ENV_REQUIRES_CONFIG_XX( + env, rep_handle, "DB_ENV->rep_start", DB_INIT_REP); + + if (APP_IS_REPMGR(env)) { + __db_errx(env, DB_STR("3552", +"DB_ENV->rep_start: cannot call from Replication Manager application")); + return (EINVAL); + } + + switch (LF_ISSET(DB_REP_CLIENT | DB_REP_MASTER)) { + case DB_REP_CLIENT: + case DB_REP_MASTER: + break; + default: + __db_errx(env, DB_STR("3553", + "DB_ENV->rep_start: must specify DB_REP_CLIENT or DB_REP_MASTER")); + return (EINVAL); + } + + /* We need a transport function because we send messages. */ + if (db_rep->send == NULL) { + __db_errx(env, DB_STR("3554", + "DB_ENV->rep_start: must be called after DB_ENV->rep_set_transport")); + return (EINVAL); + } + + return (__rep_start_int(env, dbt, flags)); +} + +/* + * __rep_start_int -- + * Internal processing to become a master or client and start sending + * messages to participate in the replication environment. If this is + * a newly created environment, then this site has likely been in an + * initial, undefined state - neither master nor client. What that means + * is that as a non-client, it can write log records locally (such as + * those generated by recovery) and as a non-master, it does not attempt + * to send those log records elsewhere. + * + * We must protect rep_start_int, which may change the world, with the rest + * of the DB library. Each API interface will count itself as it enters + * the library. Rep_start_int checks the following: + * + * rep->msg_th - this is the count of threads currently in rep_process_message + * rep->handle_cnt - number of threads actively using a dbp in library. + * rep->txn_cnt - number of active txns. + * REP_LOCKOUT_* - Replication flag that indicates that we wish to run + * recovery, and want to prohibit new transactions from entering and cause + * existing ones to return immediately (with a DB_LOCK_DEADLOCK error). + * + * There is also the renv->rep_timestamp which is updated whenever significant + * events (i.e., new masters, log rollback, etc). Upon creation, a handle + * is associated with the current timestamp. Each time a handle enters the + * library it must check if the handle timestamp is the same as the one + * stored in the replication region. This prevents the use of handles on + * clients that reference non-existent files whose creation was backed out + * during a synchronizing recovery. + * + * PUBLIC: int __rep_start_int __P((ENV *, DBT *, u_int32_t)); + */ +int +__rep_start_int(env, dbt, flags) + ENV *env; + DBT *dbt; + u_int32_t flags; +{ + DB *dbp; + DB_LOG *dblp; + DB_LOGC *logc; + DB_LSN lsn, perm_lsn; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + DB_TXNREGION *region; + LOG *lp; + REGENV *renv; + REGINFO *infop; + REP *rep; + db_timeout_t tmp; + u_int32_t new_gen, oldvers, pending_event, role; + int interrupting, locked, ret, role_chg, start_th, t_ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + infop = env->reginfo; + renv = infop->primary; + interrupting = locked = 0; + pending_event = DB_EVENT_NO_SUCH_EVENT; + role = LF_ISSET(DB_REP_CLIENT | DB_REP_MASTER); + start_th = 0; + + /* + * If we're using master leases, check that all needed + * setup has been done, including setting the lease timeout. + */ + if (IS_USING_LEASES(env) && rep->lease_timeout == 0) { + __db_errx(env, DB_STR("3555", +"DB_ENV->rep_start: must call DB_ENV->rep_set_timeout for leases first")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + + /* Serialize rep_start() calls. */ + MUTEX_LOCK(env, rep->mtx_repstart); + start_th = 1; + + /* + * In order to correctly check log files for old versions, we + * need to flush the logs. Serialize log flush to make sure it is + * always done just before the log old version check. Otherwise it + * is possible that another thread in rep_start could write LSN history + * and create a new log file that is not yet fully there for the log + * old version check. + */ + if ((ret = __log_flush(env, NULL)) != 0) + goto out; + + REP_SYSTEM_LOCK(env); + role_chg = (!F_ISSET(rep, REP_F_MASTER) && role == DB_REP_MASTER) || + (!F_ISSET(rep, REP_F_CLIENT) && role == DB_REP_CLIENT); + + /* + * There is no need for lockout if all we're doing is sending a message. + * In fact, lockout could be harmful: the typical use of this "duplicate + * client" style of call is when the application has to poll, seeking + * for a master. If the resulting NEWMASTER message were to arrive when + * we had messages locked out, we would discard it, resulting in further + * delay. + */ + if (role == DB_REP_CLIENT && !role_chg) { + REP_SYSTEM_UNLOCK(env); + if ((ret = __dbt_usercopy(env, dbt)) == 0) + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_NEWCLIENT, NULL, dbt, 0, 0); + goto out; + } + + if (FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_MSG)) { + /* + * There is already someone in msg lockout. Return. + */ + RPRINT(env, (env, DB_VERB_REP_MISC, + "Thread already in msg lockout")); + REP_SYSTEM_UNLOCK(env); + goto out; + } else if ((ret = __rep_lockout_msg(env, rep, 0)) != 0) + goto errunlock; + + /* + * If we are internal init and we try to become master, reject it. + * Our environment databases/logs are in an inconsistent state and + * we cannot become master. + */ + if (IN_INTERNAL_INIT(rep) && role == DB_REP_MASTER) { + __db_errx(env, DB_STR("3556", + "DB_ENV->rep_start: Cannot become master during internal init")); + ret = DB_REP_UNAVAIL; + goto errunlock; + } + + /* + * Wait for any active txns or mpool ops to complete, and + * prevent any new ones from occurring, only if we're + * changing roles. + */ + if (role_chg) { + if ((ret = __rep_lockout_api(env, rep)) != 0) + goto errunlock; + locked = 1; + } + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + if (role == DB_REP_MASTER) { + if (role_chg) { + /* + * If we were previously a client, it's possible we + * could have an interruptible STARTSYNC in progress. + * Interrupt it now, so that it doesn't slow down our + * transition to master, and because its effects aren't + * doing us any good anyway. + */ + (void)__memp_set_config( + env->dbenv, DB_MEMP_SYNC_INTERRUPT, 1); + interrupting = 1; + + /* + * If we're upgrading from having been a client, + * preclose, so that we close our temporary database + * and any files we opened while doing a rep_apply. + * If we don't we can infinitely leak file ids if + * the master crashed with files open (the likely + * case). If we don't close them we can run into + * problems if we try to remove that file or long + * running applications end up with an unbounded + * number of used fileids, each getting written + * on checkpoint. Just close them. + * Then invalidate all files open in the logging + * region. These are files open by other processes + * attached to the environment. They must be + * closed by the other processes when they notice + * the change in role. + */ + if ((ret = __rep_preclose(env)) != 0) + goto errunlock; + + new_gen = rep->gen + 1; + /* + * There could have been any number of failed + * elections, so jump the gen if we need to now. + */ + if (rep->egen > rep->gen) + new_gen = rep->egen; + SET_GEN(new_gen); + /* + * If the "group" has only one site, it's OK to start as + * master without an election. This is how repmgr + * builds up a primordial group, by induction. + */ + if (IS_USING_LEASES(env) && + rep->config_nsites > 1 && + !F_ISSET(rep, REP_F_MASTERELECT)) { + __db_errx(env, DB_STR("3557", +"rep_start: Cannot become master without being elected when using leases.")); + ret = EINVAL; + goto errunlock; + } + if (F_ISSET(rep, REP_F_MASTERELECT)) { + __rep_elect_done(env, rep); + F_CLR(rep, REP_F_MASTERELECT); + } else if (FLD_ISSET(rep->config, REP_C_INMEM)) + /* + * Help detect if application has ignored our + * recommendation against reappointing same + * master after a crash/reboot when running + * in-memory replication. Doing this allows a + * slight chance of two masters at the same + * generation, resulting in client crashes. + */ + RPRINT(env, (env, DB_VERB_REP_MISC, + "Appointed new master while running in-memory replication.")); + if (rep->egen <= rep->gen) + rep->egen = rep->gen + 1; + RPRINT(env, (env, DB_VERB_REP_MISC, + "New master gen %lu, egen %lu", + (u_long)rep->gen, (u_long)rep->egen)); + /* + * If not running in-memory replication, write + * gen file. + */ + if (!FLD_ISSET(rep->config, REP_C_INMEM) && + (ret = __rep_write_gen(env, rep, rep->gen)) != 0) + goto errunlock; + } + /* + * Set lease duration assuming clients have faster clock. + * Master needs to compensate so that clients do not + * expire their grant while the master thinks it is valid. + */ + if (IS_USING_LEASES(env) && + (role_chg || !IS_REP_STARTED(env))) { + /* + * If we have already granted our lease, we + * cannot become master. + */ + if ((ret = __rep_islease_granted(env))) { + __db_errx(env, DB_STR("3558", + "rep_start: Cannot become master with outstanding lease granted.")); + ret = EINVAL; + goto errunlock; + } + /* + * Set max_perm_lsn to last PERM record on master. + */ + if ((ret = __log_cursor(env, &logc)) != 0) + goto errunlock; + ret = __rep_log_backup(env, logc, &perm_lsn, + REP_REC_PERM); + (void)__logc_close(logc); + /* + * If we found a perm LSN use it. Otherwise, if + * no perm LSN exists, initialize. + */ + if (ret == 0) + lp->max_perm_lsn = perm_lsn; + else if (ret == DB_NOTFOUND) + INIT_LSN(lp->max_perm_lsn); + else + goto errunlock; + + /* + * Simply compute the larger ratio for the lease. + */ + tmp = (db_timeout_t)((double)rep->lease_timeout / + ((double)rep->clock_skew / + (double)rep->clock_base)); + DB_TIMEOUT_TO_TIMESPEC(tmp, &rep->lease_duration); + if ((ret = __rep_lease_table_alloc(env, + rep->config_nsites)) != 0) + goto errunlock; + } + rep->master_id = rep->eid; + STAT_INC(env, rep, + master_change, rep->stat.st_master_changes, rep->eid); + +#ifdef DIAGNOSTIC + if (!F_ISSET(rep, REP_F_GROUP_ESTD)) + RPRINT(env, (env, DB_VERB_REP_MISC, + "Establishing group as master.")); +#endif + /* + * When becoming a master, clear the following flags: + * CLIENT: Site is no longer a client. + * ABBREVIATED: Indicates abbreviated internal init, which + * cannot occur on a master. + * MASTERELECT: Indicates that this master is elected + * rather than appointed. If we're changing roles we + * used this flag above for error checks and election + * cleanup. + * SKIPPED_APPLY: Indicates that client apply skipped + * some log records during an election, no longer + * applicable on master. + * DELAY: Indicates user config to delay initial client + * sync with new master, doesn't apply to master. + * LEASE_EXPIRED: Applies to client leases which are + * now defunct on master. + * NEWFILE: Used to delay client apply during newfile + * operation, not applicable to master. + */ + F_CLR(rep, REP_F_CLIENT | REP_F_ABBREVIATED | + REP_F_MASTERELECT | REP_F_SKIPPED_APPLY | REP_F_DELAY | + REP_F_LEASE_EXPIRED | REP_F_NEWFILE); + /* + * When becoming a master, set the following flags: + * MASTER: Indicate that this site is master. + * GROUP_ESTD: Having a master means a that replication + * group exists. + * NIMDBS_LOADED: Inmem dbs are always present on a master. + */ + F_SET(rep, REP_F_MASTER | REP_F_GROUP_ESTD | + REP_F_NIMDBS_LOADED); + /* Master cannot be in internal init. */ + rep->sync_state = SYNC_OFF; + + /* + * We're master. Set the versions to the current ones. + */ + oldvers = lp->persist.version; + /* + * If we're moving forward to the current version, we need + * to force the log file to advance and reset the + * recovery table since it contains pointers to old + * recovery functions. + */ + VPRINT(env, (env, DB_VERB_REP_MISC, + "rep_start: Old log version was %lu", (u_long)oldvers)); + if (lp->persist.version != DB_LOGVERSION) { + if ((ret = __env_init_rec(env, DB_LOGVERSION)) != 0) + goto errunlock; + } + rep->version = DB_REPVERSION; + /* + * When becoming a master, clear the following lockouts: + * ARCHIVE: Used to keep logs while client may be + * inconsistent, not needed on master. + * MSG: We set this above to block message processing while + * becoming a master, can turn messages back on here. + */ + FLD_CLR(rep->lockout_flags, + REP_LOCKOUT_ARCHIVE | REP_LOCKOUT_MSG); + REP_SYSTEM_UNLOCK(env); + LOG_SYSTEM_LOCK(env); + lsn = lp->lsn; + LOG_SYSTEM_UNLOCK(env); + + /* + * Send the NEWMASTER message first so that clients know + * subsequent messages are coming from the right master. + * We need to perform all actions below no matter what + * regarding errors. + */ + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_NEWMASTER, &lsn, NULL, 0, 0); + ret = 0; + if (role_chg) { + pending_event = DB_EVENT_REP_MASTER; + /* + * If prepared transactions have not been restored + * look to see if there are any. If there are, + * then mark the open files, otherwise close them. + */ + region = env->tx_handle->reginfo.primary; + if (region->stat.st_nrestores == 0 && + (t_ret = __rep_restore_prepared(env)) != 0 && + ret == 0) + ret = t_ret; + if (region->stat.st_nrestores != 0) { + if ((t_ret = __dbreg_mark_restored(env)) != 0 && + ret == 0) + ret = t_ret; + } else { + ret = __dbreg_invalidate_files(env, 0); + if ((t_ret = __rep_closefiles(env)) != 0 && + ret == 0) + ret = t_ret; + } + + REP_SYSTEM_LOCK(env); + F_SET(rep, REP_F_SYS_DB_OP); + REP_SYSTEM_UNLOCK(env); + if ((t_ret = __txn_recycle_id(env, 0)) != 0 && ret == 0) + ret = t_ret; + + /* + * Write LSN history database, ahead of unlocking the + * API so that clients can always know the heritage of + * any transaction they receive via replication. + */ + if ((t_ret = __rep_save_lsn_hist(env, ip, &lsn)) != 0 && + ret == 0) + ret = t_ret; + + REP_SYSTEM_LOCK(env); + rep->gen_base_lsn = lsn; + rep->master_envid = renv->envid; + F_CLR(rep, REP_F_SYS_DB_OP); + CLR_LOCKOUT_BDB(rep); + locked = 0; + REP_SYSTEM_UNLOCK(env); + (void)__memp_set_config( + env->dbenv, DB_MEMP_SYNC_INTERRUPT, 0); + interrupting = 0; + } + } else { + /* + * Start a non-client as a client. + */ + rep->master_id = DB_EID_INVALID; + /* + * A non-client should not have been participating in an + * election, so most election flags should be off. The TALLY + * flag is an exception because it is set any time we receive + * a VOTE1 and there is no reason to clear and lose it for an + * election that may begin shortly. + */ + DB_ASSERT(env, !FLD_ISSET(rep->elect_flags, ~REP_E_TALLY)); + /* + * A non-client should not have the following client flags + * set and should not be in internal init. + */ + DB_ASSERT(env, !F_ISSET(rep, + REP_F_ABBREVIATED | REP_F_DELAY | REP_F_NEWFILE)); + DB_ASSERT(env, rep->sync_state == SYNC_OFF); + + if ((ret = __log_get_oldversion(env, &oldvers)) != 0) + goto errunlock; + RPRINT(env, (env, DB_VERB_REP_MISC, + "rep_start: Found old version log %d", oldvers)); + if (oldvers >= DB_LOGVERSION_MIN) { + __log_set_version(env, oldvers); + if ((ret = __env_init_rec(env, oldvers)) != 0) + goto errunlock; + oldvers = __rep_conv_vers(env, oldvers); + DB_ASSERT(env, oldvers != DB_REPVERSION_INVALID); + rep->version = oldvers; + } + /* + * When becoming a client, clear the following flags: + * MASTER: Site is no longer a master. + * MASTERELECT: Indicates that a master is elected + * rather than appointed, not applicable on client. + */ + F_CLR(rep, REP_F_MASTER | REP_F_MASTERELECT); + F_SET(rep, REP_F_CLIENT); + + /* + * On a client, compute the lease duration on the + * assumption that the client has a fast clock. + * Expire any existing leases we might have held as + * a master. + */ + if (IS_USING_LEASES(env) && !IS_REP_STARTED(env)) { + if ((ret = __rep_lease_expire(env)) != 0) + goto errunlock; + /* + * Since the master is also compensating on its + * side as well, we're being doubly conservative + * to compensate on the client side. Theoretically, + * this compensation is not necessary, as it is + * effectively doubling the skew compensation. + * But we are making guarantees based on time and + * skews across machines. So we are being extra + * cautious. + */ + tmp = (db_timeout_t)((double)rep->lease_timeout * + ((double)rep->clock_skew / + (double)rep->clock_base)); + DB_TIMEOUT_TO_TIMESPEC(tmp, &rep->lease_duration); + if (rep->lease_off != INVALID_ROFF) { + MUTEX_LOCK(env, renv->mtx_regenv); + __env_alloc_free(infop, + R_ADDR(infop, rep->lease_off)); + MUTEX_UNLOCK(env, renv->mtx_regenv); + rep->lease_off = INVALID_ROFF; + } + } + REP_SYSTEM_UNLOCK(env); + + /* + * Abort any prepared transactions that were restored + * by recovery. We won't be able to create any txns of + * our own until they're resolved, but we can't resolve + * them ourselves; the master has to. If any get + * resolved as commits, we'll redo them when commit + * records come in. Aborts will simply be ignored. + */ + if ((ret = __rep_abort_prepared(env)) != 0) + goto errlock; + + /* + * Since we're changing roles we need to init the db. + */ + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + goto errlock; + /* + * Ignore errors, because if the file doesn't exist, + * this is perfectly OK. + */ + MUTEX_LOCK(env, rep->mtx_clientdb); + (void)__db_remove(dbp, ip, NULL, REPDBNAME, + NULL, DB_FORCE); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + /* + * Set pending_event after calls that can fail. + */ + pending_event = DB_EVENT_REP_CLIENT; + + REP_SYSTEM_LOCK(env); + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_MSG); + if (locked) { + CLR_LOCKOUT_BDB(rep); + locked = 0; + } + + if (F_ISSET(env, ENV_PRIVATE)) + /* + * If we think we're a new client, and we have a + * private env, set our gen number down to 0. + * Otherwise, we can restart and think + * we're ready to accept a new record (because our + * gen is okay), but really this client needs to + * sync with the master. + */ + SET_GEN(0); + REP_SYSTEM_UNLOCK(env); + + /* + * Announce ourselves and send out our data. + */ + if ((ret = __dbt_usercopy(env, dbt)) != 0) + goto out; + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_NEWCLIENT, NULL, dbt, 0, 0); + } + + if (0) { + /* + * We have separate labels for errors. If we're returning an + * error before we've set REP_LOCKOUT_MSG, we use 'err'. If + * we are erroring while holding the region mutex, then we use + * 'errunlock' label. If we error without holding the rep + * mutex we must use 'errlock'. + */ +errlock: REP_SYSTEM_LOCK(env); +errunlock: FLD_CLR(rep->lockout_flags, REP_LOCKOUT_MSG); + if (locked) + CLR_LOCKOUT_BDB(rep); + if (interrupting) + (void)__memp_set_config( + env->dbenv, DB_MEMP_SYNC_INTERRUPT, 0); + REP_SYSTEM_UNLOCK(env); + } +out: + if (ret == 0) { + REP_SYSTEM_LOCK(env); + F_SET(rep, REP_F_START_CALLED); + REP_SYSTEM_UNLOCK(env); + } + if (pending_event != DB_EVENT_NO_SUCH_EVENT) + __rep_fire_event(env, pending_event, NULL); + if (start_th) + MUTEX_UNLOCK(env, rep->mtx_repstart); + __dbt_userfree(env, dbt, NULL, NULL); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * Write the current generation's base LSN into the history database. + */ +static int +__rep_save_lsn_hist(env, ip, lsnp) + ENV *env; + DB_THREAD_INFO *ip; + DB_LSN *lsnp; +{ + DB_REP *db_rep; + REP *rep; + REGENV *renv; + DB_TXN *txn; + DB *dbp; + DBT key_dbt, data_dbt; + __rep_lsn_hist_key_args key; + __rep_lsn_hist_data_args data; + u_int8_t key_buf[__REP_LSN_HIST_KEY_SIZE]; + u_int8_t data_buf[__REP_LSN_HIST_DATA_SIZE]; + db_timespec now; + int ret, t_ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + renv = env->reginfo->primary; + txn = NULL; + ret = 0; + + if ((ret = __txn_begin(env, ip, NULL, &txn, DB_IGNORE_LEASE)) != 0) + return (ret); + + /* + * Use the cached handle to the history database if it is already open. + * Since we're becoming master, we don't expect to need it after this, + * so clear the cached handle and close the database once we've written + * our update. + */ + if ((dbp = db_rep->lsn_db) == NULL && + (ret = __rep_open_sysdb(env, + ip, txn, REPLSNHIST, DB_CREATE, &dbp)) != 0) + goto err; + + key.version = REP_LSN_HISTORY_FMT_VERSION; + key.gen = rep->gen; + __rep_lsn_hist_key_marshal(env, &key, key_buf); + + data.envid = renv->envid; + data.lsn = *lsnp; + __os_gettime(env, &now, 0); + data.hist_sec = (u_int32_t)now.tv_sec; + data.hist_nsec = (u_int32_t)now.tv_nsec; + __rep_lsn_hist_data_marshal(env, &data, data_buf); + + DB_INIT_DBT(key_dbt, key_buf, sizeof(key_buf)); + DB_INIT_DBT(data_dbt, data_buf, sizeof(data_buf)); + + ret = __db_put(dbp, ip, txn, &key_dbt, &data_dbt, 0); +err: + if (dbp != NULL && + (t_ret = __db_close(dbp, txn, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + db_rep->lsn_db = NULL; + + DB_ASSERT(env, txn != NULL); + if ((t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * Open existing LSN history database, wherever it may be (on disk or in + * memory). If it doesn't exist, create it only if DB_CREATE is specified by + * our caller. + * + * If we could be sure that all sites in the replication group had matching + * REP_C_INMEM settings (that never changed over time), we could simply look for + * the database in the place where we knew it should be. The code here tries to + * be more flexible/resilient to mis-matching INMEM settings, even though we + * recommend against that. + * PUBLIC: int __rep_open_sysdb __P((ENV *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, const char *, u_int32_t, DB **)); + */ +int +__rep_open_sysdb(env, ip, txn, dbname, flags, dbpp) + ENV *env; + DB_THREAD_INFO *ip; + DB_TXN *txn; + const char *dbname; + u_int32_t flags; + DB **dbpp; +{ + DB_REP *db_rep; + REP *rep; + DB *dbp; + char *fname; + u_int32_t myflags; + int ret, t_ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + return (ret); + + myflags = DB_INTERNAL_DB | (F_ISSET(env, ENV_THREAD) ? DB_THREAD : 0); + + /* + * First, try opening it as a sub-database within a disk-resident + * database file. (If success, skip to the end.) + */ + if ((ret = __db_open(dbp, ip, txn, + REPSYSDBNAME, dbname, DB_BTREE, myflags, 0, PGNO_BASE_MD)) == 0) + goto found; + if (ret != ENOENT) + goto err; + + /* + * Here, the file was not found. Next, try opening it as an in-memory + * database (after the necessary clean-up). + */ + ret = __db_close(dbp, txn, DB_NOSYNC); + dbp = NULL; + if (ret != 0 || (ret = __db_create_internal(&dbp, env, 0)) != 0) + goto err; + if ((ret = __db_open(dbp, ip, txn, + NULL, dbname, DB_BTREE, myflags, 0, PGNO_BASE_MD)) == 0) + goto found; + if (ret != ENOENT) + goto err; + + /* + * Here, the database was not found either on disk or in memory. Create + * it, according to our local INMEM setting. + */ + ret = __db_close(dbp, txn, DB_NOSYNC); + dbp = NULL; + if (ret != 0) + goto err; + if (LF_ISSET(DB_CREATE)) { + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + goto err; + if ((ret = __db_set_pagesize(dbp, REPSYSDBPGSZ)) != 0) + goto err; + FLD_SET(myflags, DB_CREATE); + fname = FLD_ISSET(rep->config, REP_C_INMEM) ? + NULL : REPSYSDBNAME; + if ((ret = __db_open(dbp, ip, txn, fname, + dbname, DB_BTREE, myflags, 0, PGNO_BASE_MD)) == 0) + goto found; + } else + ret = ENOENT; + +err: + if (dbp != NULL && (t_ret = __db_close(dbp, txn, DB_NOSYNC)) != 0 && + (ret == 0 || ret == ENOENT)) + ret = t_ret; + return (ret); + +found: + *dbpp = dbp; + return (0); +} + +/* + * __rep_client_dbinit -- + * + * Initialize the LSN database on the client side. This is called from the + * client initialization code. The startup flag value indicates if + * this is the first thread/process starting up and therefore should create + * the LSN database. This routine must be called once by each process acting + * as a client. + * + * Assumes caller holds appropriate mutex. + * + * PUBLIC: int __rep_client_dbinit __P((ENV *, int, repdb_t)); + */ +int +__rep_client_dbinit(env, startup, which) + ENV *env; + int startup; + repdb_t which; +{ + DB *dbp, **rdbpp; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + REP *rep; + int ret, t_ret; + u_int32_t flags; + const char *fname, *name, *subdb; + + db_rep = env->rep_handle; + rep = db_rep->region; + dbp = NULL; + + if (which == REP_DB) { + name = REPDBNAME; + rdbpp = &db_rep->rep_db; + } else { + name = REPPAGENAME; + rdbpp = &db_rep->file_dbp; + } + /* Check if this has already been called on this environment. */ + if (*rdbpp != NULL) + return (0); + + ENV_GET_THREAD_INFO(env, ip); + + /* Set up arguments for __db_remove and __db_open calls. */ + fname = name; + subdb = NULL; + if (FLD_ISSET(rep->config, REP_C_INMEM)) { + fname = NULL; + subdb = name; + } + + if (startup) { + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + goto err; + /* + * Prevent in-memory database remove from writing to + * non-existent logs. + */ + if (FLD_ISSET(rep->config, REP_C_INMEM)) + (void)__db_set_flags(dbp, DB_TXN_NOT_DURABLE); + /* + * Ignore errors, because if the file doesn't exist, this + * is perfectly OK. + */ + (void)__db_remove(dbp, ip, NULL, fname, subdb, DB_FORCE); + } + + if ((ret = __db_create_internal(&dbp, env, 0)) != 0) + goto err; + if (which == REP_DB && + (ret = __bam_set_bt_compare(dbp, __rep_bt_cmp)) != 0) + goto err; + + /* Don't write log records on the client. */ + if ((ret = __db_set_flags(dbp, DB_TXN_NOT_DURABLE)) != 0) + goto err; + + flags = DB_NO_AUTO_COMMIT | DB_CREATE | DB_INTERNAL_DB | + (F_ISSET(env, ENV_THREAD) ? DB_THREAD : 0); + + if ((ret = __db_open(dbp, ip, NULL, fname, subdb, + (which == REP_DB ? DB_BTREE : DB_RECNO), + flags, 0, PGNO_BASE_MD)) != 0) + goto err; + + *rdbpp = dbp; + + if (0) { +err: if (dbp != NULL && + (t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + *rdbpp = NULL; + } + + return (ret); +} + +/* + * __rep_bt_cmp -- + * + * Comparison function for the LSN table. We use the entire control + * structure as a key (for simplicity, so we don't have to merge the + * other fields in the control with the data field), but really only + * care about the LSNs. + */ +static int +__rep_bt_cmp(dbp, dbt1, dbt2) + DB *dbp; + const DBT *dbt1, *dbt2; +{ + DB_LSN lsn1, lsn2; + __rep_control_args *rp1, *rp2; + + COMPQUIET(dbp, NULL); + + rp1 = dbt1->data; + rp2 = dbt2->data; + + (void)__ua_memcpy(&lsn1, &rp1->lsn, sizeof(DB_LSN)); + (void)__ua_memcpy(&lsn2, &rp2->lsn, sizeof(DB_LSN)); + + if (lsn1.file > lsn2.file) + return (1); + + if (lsn1.file < lsn2.file) + return (-1); + + if (lsn1.offset > lsn2.offset) + return (1); + + if (lsn1.offset < lsn2.offset) + return (-1); + + return (0); +} + +/* + * __rep_abort_prepared -- + * Abort any prepared transactions that recovery restored. + * + * This is used by clients that have just run recovery, since + * they cannot/should not call txn_recover and handle prepared transactions + * themselves. + */ +static int +__rep_abort_prepared(env) + ENV *env; +{ +#define PREPLISTSIZE 50 + DB_LOG *dblp; + DB_PREPLIST prep[PREPLISTSIZE], *p; + DB_TXNMGR *mgr; + DB_TXNREGION *region; + LOG *lp; + int ret; + long count, i; + u_int32_t op; + + mgr = env->tx_handle; + region = mgr->reginfo.primary; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + if (region->stat.st_nrestores == 0) + return (0); + + op = DB_FIRST; + do { + if ((ret = __txn_recover(env, + prep, PREPLISTSIZE, &count, op)) != 0) + return (ret); + for (i = 0; i < count; i++) { + p = &prep[i]; + if ((ret = __txn_abort(p->txn)) != 0) + return (ret); + env->rep_handle->region->op_cnt--; + env->rep_handle->region->max_prep_lsn = lp->lsn; + region->stat.st_nrestores--; + } + op = DB_NEXT; + } while (count == PREPLISTSIZE); + + return (0); +} + +/* + * __rep_restore_prepared -- + * Restore to a prepared state any prepared but not yet committed + * transactions. + * + * This performs, in effect, a "mini-recovery"; it is called from + * __rep_start by newly upgraded masters. There may be transactions that an + * old master prepared but did not resolve, which we need to restore to an + * active state. + */ +static int +__rep_restore_prepared(env) + ENV *env; +{ + DBT rec; + DB_LOGC *logc; + DB_LSN ckp_lsn, lsn; + DB_REP *db_rep; + DB_TXNHEAD *txninfo; + REP *rep; + __txn_ckp_args *ckp_args; + __txn_regop_args *regop_args; + __txn_prepare_args *prep_args; + int ret, t_ret; + u_int32_t hi_txn, low_txn, rectype, status, txnid, txnop; + + db_rep = env->rep_handle; + rep = db_rep->region; + if (IS_ZERO_LSN(rep->max_prep_lsn)) { + VPRINT(env, (env, DB_VERB_REP_MISC, + "restore_prep: No prepares. Skip.")); + return (0); + } + txninfo = NULL; + ckp_args = NULL; + prep_args = NULL; + regop_args = NULL; + ZERO_LSN(ckp_lsn); + ZERO_LSN(lsn); + + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + + /* + * Get our first LSN to see if the prepared LSN is still + * available. If so, it might be unresolved. If not, + * then it is guaranteed to be resolved. + */ + memset(&rec, 0, sizeof(DBT)); + if ((ret = __logc_get(logc, &lsn, &rec, DB_FIRST)) != 0) { + __db_errx(env, DB_STR("3559", "First record not found")); + goto err; + } + /* + * If the max_prep_lsn is no longer available, we're sure + * that txn has been resolved. We're done. + */ + if (rep->max_prep_lsn.file < lsn.file) { + VPRINT(env, (env, DB_VERB_REP_MISC, + "restore_prep: Prepare resolved. Skip")); + ZERO_LSN(rep->max_prep_lsn); + goto done; + } + /* + * We need to consider the set of records between the most recent + * checkpoint LSN and the end of the log; any txn in that + * range, and only txns in that range, could still have been + * active, and thus prepared but not yet committed (PBNYC), + * when the old master died. + * + * Find the most recent checkpoint LSN, and get the record there. + * If there is no checkpoint in the log, start off by getting + * the very first record in the log instead. + */ + if ((ret = __txn_getckp(env, &lsn)) == 0) { + if ((ret = __logc_get(logc, &lsn, &rec, DB_SET)) != 0) { + __db_errx(env, DB_STR_A("3560", + "Checkpoint record at LSN [%lu][%lu] not found", + "%lu %lu"), (u_long)lsn.file, (u_long)lsn.offset); + goto err; + } + + if ((ret = __txn_ckp_read( + env, rec.data, &ckp_args)) == 0) { + ckp_lsn = ckp_args->ckp_lsn; + __os_free(env, ckp_args); + } + if (ret != 0) { + __db_errx(env, DB_STR_A("3561", + "Invalid checkpoint record at [%lu][%lu]", + "%lu %lu"), (u_long)lsn.file, (u_long)lsn.offset); + goto err; + } + + if ((ret = __logc_get(logc, &ckp_lsn, &rec, DB_SET)) != 0) { + __db_errx(env, DB_STR_A("3562", + "Checkpoint LSN record [%lu][%lu] not found", + "%lu %lu"), + (u_long)ckp_lsn.file, (u_long)ckp_lsn.offset); + goto err; + } + } else if ((ret = __logc_get(logc, &lsn, &rec, DB_FIRST)) != 0) { + if (ret == DB_NOTFOUND) { + /* An empty log means no PBNYC txns. */ + ret = 0; + goto done; + } + __db_errx(env, DB_STR("3563", + "Attempt to get first log record failed")); + goto err; + } + + /* + * We use the same txnlist infrastructure that recovery does; + * it demands an estimate of the high and low txnids for + * initialization. + * + * First, the low txnid. + */ + do { + /* txnid is after rectype, which is a u_int32. */ + LOGCOPY_32(env, &low_txn, + (u_int8_t *)rec.data + sizeof(u_int32_t)); + if (low_txn != 0) + break; + } while ((ret = __logc_get(logc, &lsn, &rec, DB_NEXT)) == 0); + + /* If there are no txns, there are no PBNYC txns. */ + if (ret == DB_NOTFOUND) { + ret = 0; + goto done; + } else if (ret != 0) + goto err; + + /* Now, the high txnid. */ + if ((ret = __logc_get(logc, &lsn, &rec, DB_LAST)) != 0) { + /* + * Note that DB_NOTFOUND is unacceptable here because we + * had to have looked at some log record to get this far. + */ + __db_errx(env, DB_STR("3564", + "Final log record not found")); + goto err; + } + do { + /* txnid is after rectype, which is a u_int32. */ + LOGCOPY_32(env, &hi_txn, + (u_int8_t *)rec.data + sizeof(u_int32_t)); + if (hi_txn != 0) + break; + } while ((ret = __logc_get(logc, &lsn, &rec, DB_PREV)) == 0); + if (ret == DB_NOTFOUND) { + ret = 0; + goto done; + } else if (ret != 0) + goto err; + + /* We have a high and low txnid. Initialise the txn list. */ + if ((ret = __db_txnlist_init(env, + NULL, low_txn, hi_txn, NULL, &txninfo)) != 0) + goto err; + + /* + * Now, walk backward from the end of the log to ckp_lsn. Any + * prepares that we hit without first hitting a commit or + * abort belong to PBNYC txns, and we need to apply them and + * restore them to a prepared state. + * + * Note that we wind up applying transactions out of order. + * Since all PBNYC txns still held locks on the old master and + * were isolated, this should be safe. + */ + F_SET(env->lg_handle, DBLOG_RECOVER); + for (ret = __logc_get(logc, &lsn, &rec, DB_LAST); + ret == 0 && LOG_COMPARE(&lsn, &ckp_lsn) > 0; + ret = __logc_get(logc, &lsn, &rec, DB_PREV)) { + LOGCOPY_32(env, &rectype, rec.data); + switch (rectype) { + case DB___txn_regop: + /* + * It's a commit or abort--but we don't care + * which! Just add it to the list of txns + * that are resolved. + */ + if ((ret = __txn_regop_read( + env, rec.data, ®op_args)) != 0) + goto err; + txnid = regop_args->txnp->txnid; + txnop = regop_args->opcode; + __os_free(env, regop_args); + + ret = __db_txnlist_find(env, + txninfo, txnid, &status); + if (ret == DB_NOTFOUND) + ret = __db_txnlist_add(env, txninfo, + txnid, txnop, &lsn); + else if (ret != 0) + goto err; + break; + case DB___txn_prepare: + /* + * It's a prepare. If its not aborted and + * we haven't put the txn on our list yet, it + * hasn't been resolved, so apply and restore it. + */ + if ((ret = __txn_prepare_read( + env, rec.data, &prep_args)) != 0) + goto err; + ret = __db_txnlist_find(env, txninfo, + prep_args->txnp->txnid, &status); + if (ret == DB_NOTFOUND) { + if (prep_args->opcode == TXN_ABORT) + ret = __db_txnlist_add(env, txninfo, + prep_args->txnp->txnid, + prep_args->opcode, &lsn); + else if ((ret = + __rep_process_txn(env, &rec)) == 0) { + /* + * We are guaranteed to be single + * threaded here. We need to + * account for this newly + * instantiated txn in the op_cnt + * so that it is counted when it is + * resolved. + */ + rep->op_cnt++; + ret = __txn_restore_txn(env, + &lsn, prep_args); + } + } else if (ret != 0) + goto err; + __os_free(env, prep_args); + break; + default: + continue; + } + } + + /* It's not an error to have hit the beginning of the log. */ + if (ret == DB_NOTFOUND) + ret = 0; + +done: +err: t_ret = __logc_close(logc); + F_CLR(env->lg_handle, DBLOG_RECOVER); + + if (txninfo != NULL) + __db_txnlist_end(env, txninfo); + + return (ret == 0 ? t_ret : ret); +} + +/* + * __rep_get_limit -- + * Get the limit on the amount of data that will be sent during a single + * invocation of __rep_process_message. + * + * PUBLIC: int __rep_get_limit __P((DB_ENV *, u_int32_t *, u_int32_t *)); + */ +int +__rep_get_limit(dbenv, gbytesp, bytesp) + DB_ENV *dbenv; + u_int32_t *gbytesp, *bytesp; +{ + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + REP *rep; + + env = dbenv->env; + db_rep = env->rep_handle; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_get_limit", DB_INIT_REP); + + if (REP_ON(env)) { + rep = db_rep->region; + ENV_ENTER(env, ip); + REP_SYSTEM_LOCK(env); + if (gbytesp != NULL) + *gbytesp = rep->gbytes; + if (bytesp != NULL) + *bytesp = rep->bytes; + REP_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else { + if (gbytesp != NULL) + *gbytesp = db_rep->gbytes; + if (bytesp != NULL) + *bytesp = db_rep->bytes; + } + + return (0); +} + +/* + * __rep_set_limit -- + * Set a limit on the amount of data that will be sent during a single + * invocation of __rep_process_message. + * + * PUBLIC: int __rep_set_limit __P((DB_ENV *, u_int32_t, u_int32_t)); + */ +int +__rep_set_limit(dbenv, gbytes, bytes) + DB_ENV *dbenv; + u_int32_t gbytes, bytes; +{ + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + REP *rep; + + env = dbenv->env; + db_rep = env->rep_handle; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_set_limit", DB_INIT_REP); + + if (bytes > GIGABYTE) { + gbytes += bytes / GIGABYTE; + bytes = bytes % GIGABYTE; + } + + if (REP_ON(env)) { + rep = db_rep->region; + ENV_ENTER(env, ip); + REP_SYSTEM_LOCK(env); + rep->gbytes = gbytes; + rep->bytes = bytes; + REP_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else { + db_rep->gbytes = gbytes; + db_rep->bytes = bytes; + } + + return (0); +} + +/* + * PUBLIC: int __rep_set_nsites_pp __P((DB_ENV *, u_int32_t)); + */ +int +__rep_set_nsites_pp(dbenv, n) + DB_ENV *dbenv; + u_int32_t n; +{ + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + db_rep = env->rep_handle; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_set_nsites", DB_INIT_REP); + if (APP_IS_REPMGR(env)) { + __db_errx(env, DB_STR("3565", +"DB_ENV->rep_set_nsites: cannot call from Replication Manager application")); + return (EINVAL); + } + if ((ret = __rep_set_nsites_int(env, n)) == 0) + APP_SET_BASEAPI(env); + return (ret); +} + +/* + * PUBLIC: int __rep_set_nsites_int __P((ENV *, u_int32_t)); + */ +int +__rep_set_nsites_int(env, n) + ENV *env; + u_int32_t n; +{ + DB_REP *db_rep; + REP *rep; + int ret; + + db_rep = env->rep_handle; + + ret = 0; + if (REP_ON(env)) { + rep = db_rep->region; + rep->config_nsites = n; + if (IS_USING_LEASES(env) && + IS_REP_MASTER(env) && IS_REP_STARTED(env)) { + REP_SYSTEM_LOCK(env); + ret = __rep_lease_table_alloc(env, n); + REP_SYSTEM_UNLOCK(env); + } + } else + db_rep->config_nsites = n; + return (ret); +} + +/* + * PUBLIC: int __rep_get_nsites __P((DB_ENV *, u_int32_t *)); + */ +int +__rep_get_nsites(dbenv, n) + DB_ENV *dbenv; + u_int32_t *n; +{ + DB_REP *db_rep; + ENV *env; + REP *rep; + + env = dbenv->env; + db_rep = env->rep_handle; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_get_nsites", DB_INIT_REP); + + if (APP_IS_REPMGR(env)) + return (__repmgr_get_nsites(env, n)); + if (REP_ON(env)) { + rep = db_rep->region; + *n = rep->config_nsites; + } else + *n = db_rep->config_nsites; + + return (0); +} + +/* + * PUBLIC: int __rep_set_priority __P((DB_ENV *, u_int32_t)); + */ +int +__rep_set_priority(dbenv, priority) + DB_ENV *dbenv; + u_int32_t priority; +{ + DB_REP *db_rep; + ENV *env; + REP *rep; + u_int32_t prev; + int ret; + + env = dbenv->env; + db_rep = env->rep_handle; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_set_priority", DB_INIT_REP); + + ret = 0; + if (REP_ON(env)) { + rep = db_rep->region; + prev = rep->priority; + rep->priority = priority; +#ifdef HAVE_REPLICATION_THREADS + ret = __repmgr_chg_prio(env, prev, priority); +#endif + } else + db_rep->my_priority = priority; + return (ret); +} + +/* + * PUBLIC: int __rep_get_priority __P((DB_ENV *, u_int32_t *)); + */ +int +__rep_get_priority(dbenv, priority) + DB_ENV *dbenv; + u_int32_t *priority; +{ + DB_REP *db_rep; + ENV *env; + REP *rep; + + env = dbenv->env; + db_rep = env->rep_handle; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_get_priority", DB_INIT_REP); + + if (REP_ON(env)) { + rep = db_rep->region; + *priority = rep->priority; + } else + *priority = db_rep->my_priority; + return (0); +} + +/* + * PUBLIC: int __rep_set_timeout __P((DB_ENV *, int, db_timeout_t)); + */ +int +__rep_set_timeout(dbenv, which, timeout) + DB_ENV *dbenv; + int which; + db_timeout_t timeout; +{ + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + REP *rep; + int repmgr_timeout, ret; + + env = dbenv->env; + db_rep = env->rep_handle; + rep = db_rep->region; + ret = 0; + repmgr_timeout = 0; + + if (timeout == 0 && (which == DB_REP_CONNECTION_RETRY || + which == DB_REP_ELECTION_TIMEOUT || which == DB_REP_LEASE_TIMEOUT || + which == DB_REP_ELECTION_RETRY)) { + __db_errx(env, DB_STR("3566", "timeout value must be > 0")); + return (EINVAL); + } + + if (which == DB_REP_ACK_TIMEOUT || which == DB_REP_CONNECTION_RETRY || + which == DB_REP_ELECTION_RETRY || + which == DB_REP_HEARTBEAT_MONITOR || + which == DB_REP_HEARTBEAT_SEND) + repmgr_timeout = 1; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_set_timeout", DB_INIT_REP); + + if (APP_IS_BASEAPI(env) && repmgr_timeout) { + __db_errx(env, DB_STR_A("3567", +"%scannot set Replication Manager timeout from base replication application", + "%s"), "DB_ENV->rep_set_timeout:"); + return (EINVAL); + } + if (which == DB_REP_LEASE_TIMEOUT && IS_REP_STARTED(env)) { + ret = EINVAL; + __db_errx(env, DB_STR_A("3568", +"%s: lease timeout must be set before DB_ENV->rep_start.", + "%s"), "DB_ENV->rep_set_timeout"); + return (EINVAL); + } + + switch (which) { + case DB_REP_CHECKPOINT_DELAY: + if (REP_ON(env)) + rep->chkpt_delay = timeout; + else + db_rep->chkpt_delay = timeout; + break; + case DB_REP_ELECTION_TIMEOUT: + if (REP_ON(env)) + rep->elect_timeout = timeout; + else + db_rep->elect_timeout = timeout; + break; + case DB_REP_FULL_ELECTION_TIMEOUT: + if (REP_ON(env)) + rep->full_elect_timeout = timeout; + else + db_rep->full_elect_timeout = timeout; + break; + case DB_REP_LEASE_TIMEOUT: + if (REP_ON(env)) + rep->lease_timeout = timeout; + else + db_rep->lease_timeout = timeout; + break; +#ifdef HAVE_REPLICATION_THREADS + case DB_REP_ACK_TIMEOUT: + if (REP_ON(env)) + rep->ack_timeout = timeout; + else + db_rep->ack_timeout = timeout; + break; + case DB_REP_CONNECTION_RETRY: + if (REP_ON(env)) + rep->connection_retry_wait = timeout; + else + db_rep->connection_retry_wait = timeout; + break; + case DB_REP_ELECTION_RETRY: + if (REP_ON(env)) + rep->election_retry_wait = timeout; + else + db_rep->election_retry_wait = timeout; + break; + case DB_REP_HEARTBEAT_MONITOR: + if (REP_ON(env)) + rep->heartbeat_monitor_timeout = timeout; + else + db_rep->heartbeat_monitor_timeout = timeout; + break; + case DB_REP_HEARTBEAT_SEND: + if (REP_ON(env)) + rep->heartbeat_frequency = timeout; + else + db_rep->heartbeat_frequency = timeout; + break; +#endif + default: + __db_errx(env, DB_STR("3569", + "Unknown timeout type argument to DB_ENV->rep_set_timeout")); + ret = EINVAL; + } + + /* Setting a repmgr timeout makes this a repmgr application */ + if (ret == 0 && repmgr_timeout) + APP_SET_REPMGR(env); + return (ret); +} + +/* + * PUBLIC: int __rep_get_timeout __P((DB_ENV *, int, db_timeout_t *)); + */ +int +__rep_get_timeout(dbenv, which, timeout) + DB_ENV *dbenv; + int which; + db_timeout_t *timeout; +{ + DB_REP *db_rep; + ENV *env; + REP *rep; + + env = dbenv->env; + db_rep = env->rep_handle; + rep = db_rep->region; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_get_timeout", DB_INIT_REP); + + switch (which) { + case DB_REP_CHECKPOINT_DELAY: + *timeout = REP_ON(env) ? + rep->chkpt_delay : db_rep->chkpt_delay; + break; + case DB_REP_ELECTION_TIMEOUT: + *timeout = REP_ON(env) ? + rep->elect_timeout : db_rep->elect_timeout; + break; + case DB_REP_FULL_ELECTION_TIMEOUT: + *timeout = REP_ON(env) ? + rep->full_elect_timeout : db_rep->full_elect_timeout; + break; + case DB_REP_LEASE_TIMEOUT: + *timeout = REP_ON(env) ? + rep->lease_timeout : db_rep->lease_timeout; + break; +#ifdef HAVE_REPLICATION_THREADS + case DB_REP_ACK_TIMEOUT: + *timeout = REP_ON(env) ? + rep->ack_timeout : db_rep->ack_timeout; + break; + case DB_REP_CONNECTION_RETRY: + *timeout = REP_ON(env) ? + rep->connection_retry_wait : db_rep->connection_retry_wait; + break; + case DB_REP_ELECTION_RETRY: + *timeout = REP_ON(env) ? + rep->election_retry_wait : db_rep->election_retry_wait; + break; + case DB_REP_HEARTBEAT_MONITOR: + *timeout = REP_ON(env) ? rep->heartbeat_monitor_timeout : + db_rep->heartbeat_monitor_timeout; + break; + case DB_REP_HEARTBEAT_SEND: + *timeout = REP_ON(env) ? + rep->heartbeat_frequency : db_rep->heartbeat_frequency; + break; +#endif + default: + __db_errx(env, DB_STR("3570", + "unknown timeout type argument to DB_ENV->rep_get_timeout")); + return (EINVAL); + } + + return (0); +} + +/* + * __rep_get_request -- + * Get the minimum and maximum number of log records that we wait + * before retransmitting. + * + * PUBLIC: int __rep_get_request + * PUBLIC: __P((DB_ENV *, db_timeout_t *, db_timeout_t *)); + */ +int +__rep_get_request(dbenv, minp, maxp) + DB_ENV *dbenv; + db_timeout_t *minp, *maxp; +{ + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + REP *rep; + + env = dbenv->env; + db_rep = env->rep_handle; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_get_request", DB_INIT_REP); + + if (REP_ON(env)) { + rep = db_rep->region; + ENV_ENTER(env, ip); + /* + * We acquire the mtx_region or mtx_clientdb mutexes as needed. + */ + REP_SYSTEM_LOCK(env); + if (minp != NULL) + DB_TIMESPEC_TO_TIMEOUT((*minp), &rep->request_gap, 0); + if (maxp != NULL) + DB_TIMESPEC_TO_TIMEOUT((*maxp), &rep->max_gap, 0); + REP_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else { + if (minp != NULL) + DB_TIMESPEC_TO_TIMEOUT((*minp), + &db_rep->request_gap, 0); + if (maxp != NULL) + DB_TIMESPEC_TO_TIMEOUT((*maxp), &db_rep->max_gap, 0); + } + + return (0); +} + +/* + * __rep_set_request -- + * Set the minimum and maximum number of log records that we wait + * before retransmitting. + * + * PUBLIC: int __rep_set_request __P((DB_ENV *, db_timeout_t, db_timeout_t)); + */ +int +__rep_set_request(dbenv, min, max) + DB_ENV *dbenv; + db_timeout_t min, max; +{ + DB_LOG *dblp; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + LOG *lp; + REP *rep; + + env = dbenv->env; + db_rep = env->rep_handle; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_set_request", DB_INIT_REP); + + if (min == 0 || max < min) { + __db_errx(env, DB_STR("3571", + "DB_ENV->rep_set_request: Invalid min or max values")); + return (EINVAL); + } + if (REP_ON(env)) { + rep = db_rep->region; + ENV_ENTER(env, ip); + /* + * We acquire the mtx_region or mtx_clientdb mutexes as needed. + */ + REP_SYSTEM_LOCK(env); + DB_TIMEOUT_TO_TIMESPEC(min, &rep->request_gap); + DB_TIMEOUT_TO_TIMESPEC(max, &rep->max_gap); + REP_SYSTEM_UNLOCK(env); + + MUTEX_LOCK(env, rep->mtx_clientdb); + dblp = env->lg_handle; + if (dblp != NULL && (lp = dblp->reginfo.primary) != NULL) { + DB_TIMEOUT_TO_TIMESPEC(min, &lp->wait_ts); + } + MUTEX_UNLOCK(env, rep->mtx_clientdb); + ENV_LEAVE(env, ip); + } else { + DB_TIMEOUT_TO_TIMESPEC(min, &db_rep->request_gap); + DB_TIMEOUT_TO_TIMESPEC(max, &db_rep->max_gap); + } + + return (0); +} + +/* + * __rep_set_transport_pp -- + * Set the transport function for replication. + * + * PUBLIC: int __rep_set_transport_pp __P((DB_ENV *, int, + * PUBLIC: int (*)(DB_ENV *, const DBT *, const DBT *, const DB_LSN *, + * PUBLIC: int, u_int32_t))); + */ +int +__rep_set_transport_pp(dbenv, eid, f_send) + DB_ENV *dbenv; + int eid; + int (*f_send) __P((DB_ENV *, + const DBT *, const DBT *, const DB_LSN *, int, u_int32_t)); +{ + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + db_rep = env->rep_handle; + ret = 0; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_set_transport", DB_INIT_REP); + + if (APP_IS_REPMGR(env)) { + __db_errx(env, DB_STR("3572", + "DB_ENV->rep_set_transport: cannot call from " + "Replication Manager application")); + return (EINVAL); + } + + if (f_send == NULL) { + __db_errx(env, DB_STR("3573", + "DB_ENV->rep_set_transport: no send function specified")); + return (EINVAL); + } + + if (eid < 0) { + __db_errx(env, DB_STR("3574", + "DB_ENV->rep_set_transport: eid must be greater than or equal to 0")); + return (EINVAL); + } + + if ((ret = __rep_set_transport_int(env, eid, f_send)) == 0) + /* + * Setting a non-repmgr send function makes this a base API + * application. + */ + APP_SET_BASEAPI(env); + + return (ret); +} + +/* + * __rep_set_transport_int -- + * Set the internal values for the transport function for replication. + * + * PUBLIC: int __rep_set_transport_int __P((ENV *, int, + * PUBLIC: int (*)(DB_ENV *, const DBT *, const DBT *, const DB_LSN *, + * PUBLIC: int, u_int32_t))); + */ +int +__rep_set_transport_int(env, eid, f_send) + ENV *env; + int eid; + int (*f_send) __P((DB_ENV *, + const DBT *, const DBT *, const DB_LSN *, int, u_int32_t)); +{ + DB_REP *db_rep; + REP *rep; + + db_rep = env->rep_handle; + db_rep->send = f_send; + if (REP_ON(env)) { + rep = db_rep->region; + rep->eid = eid; + } else + db_rep->eid = eid; + return (0); +} + +/* + * PUBLIC: int __rep_get_clockskew __P((DB_ENV *, u_int32_t *, u_int32_t *)); + */ +int +__rep_get_clockskew(dbenv, fast_clockp, slow_clockp) + DB_ENV *dbenv; + u_int32_t *fast_clockp, *slow_clockp; +{ + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + REP *rep; + + env = dbenv->env; + db_rep = env->rep_handle; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_get_clockskew", DB_INIT_REP); + + if (REP_ON(env)) { + rep = db_rep->region; + ENV_ENTER(env, ip); + REP_SYSTEM_LOCK(env); + *fast_clockp = rep->clock_skew; + *slow_clockp = rep->clock_base; + REP_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else { + *fast_clockp = db_rep->clock_skew; + *slow_clockp = db_rep->clock_base; + } + + return (0); +} + +/* + * PUBLIC: int __rep_set_clockskew __P((DB_ENV *, u_int32_t, u_int32_t)); + */ +int +__rep_set_clockskew(dbenv, fast_clock, slow_clock) + DB_ENV *dbenv; + u_int32_t fast_clock, slow_clock; +{ + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + REP *rep; + int ret; + + env = dbenv->env; + db_rep = env->rep_handle; + ret = 0; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->rep_set_clockskew", DB_INIT_REP); + + /* + * Check for valid values. The fast clock should be a larger + * number than the slow clock. We use the slow clock value as + * our base for adjustment - therefore, a 2% difference should + * be fast == 102, slow == 100. Check for values being 0. If + * they are, then set them both to 1 internally. + * + * We will use these numbers to compute the larger ratio to be + * most conservative about the user's intention. + */ + if (fast_clock == 0 || slow_clock == 0) { + /* + * If one value is zero, reject if both aren't zero. + */ + if (slow_clock != 0 || fast_clock != 0) { + __db_errx(env, DB_STR("3575", + "DB_ENV->rep_set_clockskew: Zero only valid for " + "when used for both arguments")); + return (EINVAL); + } + fast_clock = 1; + slow_clock = 1; + } + if (fast_clock < slow_clock) { + __db_errx(env, DB_STR("3576", + "DB_ENV->rep_set_clockskew: slow_clock value is " + "larger than fast_clock_value")); + return (EINVAL); + } + if (REP_ON(env)) { + rep = db_rep->region; + if (IS_REP_STARTED(env)) { + __db_errx(env, DB_STR("3577", + "DB_ENV->rep_set_clockskew: must be called before DB_ENV->rep_start")); + return (EINVAL); + } + ENV_ENTER(env, ip); + REP_SYSTEM_LOCK(env); + rep->clock_skew = fast_clock; + rep->clock_base = slow_clock; + REP_SYSTEM_UNLOCK(env); + ENV_LEAVE(env, ip); + } else { + db_rep->clock_skew = fast_clock; + db_rep->clock_base = slow_clock; + } + return (ret); +} + +/* + * __rep_flush -- + * Re-push the last log record to all clients, in case they've lost + * messages and don't know it. + * + * PUBLIC: int __rep_flush __P((DB_ENV *)); + */ +int +__rep_flush(dbenv) + DB_ENV *dbenv; +{ + DBT rec; + DB_LOGC *logc; + DB_LSN lsn; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + int ret, t_ret; + + env = dbenv->env; + db_rep = env->rep_handle; + + ENV_REQUIRES_CONFIG_XX( + env, rep_handle, "DB_ENV->rep_flush", DB_INIT_REP); + + if (IS_REP_CLIENT(env)) + return (0); + + /* We need a transport function because we send messages. */ + if (db_rep->send == NULL) { + __db_errx(env, DB_STR("3578", + "DB_ENV->rep_flush: must be called after DB_ENV->rep_set_transport")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + + memset(&rec, 0, sizeof(rec)); + memset(&lsn, 0, sizeof(lsn)); + + if ((ret = __logc_get(logc, &lsn, &rec, DB_LAST)) != 0) + goto err; + + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_LOG, &lsn, &rec, 0, 0); + +err: if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __rep_sync -- + * Force a synchronization to occur between this client and the master. + * This is the other half of configuring DELAYCLIENT. + * + * PUBLIC: int __rep_sync __P((DB_ENV *, u_int32_t)); + */ +int +__rep_sync(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + DB_LOG *dblp; + DB_LSN lsn; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + LOG *lp; + REP *rep; + int master, ret; + u_int32_t repflags, type; + + env = dbenv->env; + db_rep = env->rep_handle; + + COMPQUIET(flags, 0); + + ENV_REQUIRES_CONFIG_XX( + env, rep_handle, "DB_ENV->rep_sync", DB_INIT_REP); + + /* We need a transport function because we send messages. */ + if (db_rep->send == NULL) { + __db_errx(env, DB_STR("3579", + "DB_ENV->rep_sync: must be called after DB_ENV->rep_set_transport")); + return (EINVAL); + } + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + rep = db_rep->region; + ret = 0; + + ENV_ENTER(env, ip); + + /* + * Simple cases. If we're not in the DELAY state we have nothing + * to do. If we don't know who the master is, send a MASTER_REQ. + */ + MUTEX_LOCK(env, rep->mtx_clientdb); + lsn = lp->verify_lsn; + MUTEX_UNLOCK(env, rep->mtx_clientdb); + REP_SYSTEM_LOCK(env); + master = rep->master_id; + if (master == DB_EID_INVALID) { + REP_SYSTEM_UNLOCK(env); + (void)__rep_send_message(env, DB_EID_BROADCAST, + REP_MASTER_REQ, NULL, NULL, 0, 0); + goto out; + } + /* + * We want to hold the rep mutex to test and then clear the + * DELAY flag. Racing threads in here could otherwise result + * in dual data streams. + */ + if (!F_ISSET(rep, REP_F_DELAY)) { + REP_SYSTEM_UNLOCK(env); + goto out; + } + + DB_ASSERT(env, + !IS_USING_LEASES(env) || __rep_islease_granted(env) == 0); + + /* + * If we get here, we clear the delay flag and kick off a + * synchronization. From this point forward, we will + * synchronize until the next time the master changes. + */ + F_CLR(rep, REP_F_DELAY); + if (IS_ZERO_LSN(lsn) && !FLD_ISSET(rep->config, REP_C_AUTOINIT)) { + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_ARCHIVE); + CLR_RECOVERY_SETTINGS(rep); + ret = DB_REP_JOIN_FAILURE; + REP_SYSTEM_UNLOCK(env); + goto out; + } + REP_SYSTEM_UNLOCK(env); + /* + * When we set REP_F_DELAY, we set verify_lsn to the real verify lsn if + * we need to verify, or we zeroed it out if this is a client that needs + * internal init. So, send the type of message now that + * __rep_new_master delayed sending. + */ + if (IS_ZERO_LSN(lsn)) { + DB_ASSERT(env, rep->sync_state == SYNC_UPDATE); + type = REP_UPDATE_REQ; + repflags = 0; + } else { + DB_ASSERT(env, rep->sync_state == SYNC_VERIFY); + type = REP_VERIFY_REQ; + repflags = DB_REP_ANYWHERE; + } + (void)__rep_send_message(env, master, type, &lsn, NULL, 0, repflags); + +out: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * PUBLIC: int __rep_txn_applied __P((ENV *, + * PUBLIC: DB_THREAD_INFO *, DB_COMMIT_INFO *, db_timeout_t)); + */ +int +__rep_txn_applied(env, ip, commit_info, timeout) + ENV *env; + DB_THREAD_INFO *ip; + DB_COMMIT_INFO *commit_info; + db_timeout_t timeout; +{ + REP *rep; + db_timespec limit, now, t; + db_timeout_t duration; + struct rep_waitgoal reason; + int locked, ret, t_ret; + + if (commit_info->gen == 0) { + __db_errx(env, DB_STR("3580", + "non-replication commit token in replication env")); + return (EINVAL); + } + + rep = env->rep_handle->region; + + VPRINT(env, (env, DB_VERB_REP_MISC, + "checking txn_applied: gen %lu, envid %lu, LSN [%lu][%lu]", + (u_long)commit_info->gen, (u_long)commit_info->envid, + (u_long)commit_info->lsn.file, (u_long)commit_info->lsn.offset)); + locked = 0; + __os_gettime(env, &limit, 1); + TIMESPEC_ADD_DB_TIMEOUT(&limit, timeout); + +retry: + /* + * The checking is done within the scope of the handle count, but if we + * end up having to wait that part is not. If a lockout sequence begins + * while we're waiting, it will wake us up, and we'll come back here to + * try entering the scope again, at which point we'll get an error so + * that we return immediately. + */ + if ((ret = __op_handle_enter(env)) != 0) + goto out; + + ret = __rep_check_applied(env, ip, commit_info, &reason); + t_ret = __env_db_rep_exit(env); + + /* + * Between here and __rep_check_applied() we use DB_TIMEOUT privately to + * mean that the transaction hasn't been applied yet, but it still + * plausibly could be soon; think of it as meaning "not yet". So + * DB_TIMEOUT doesn't necessarily mean that DB_TIMEOUT is the ultimate + * return that the application will see. + * + * When we get this "not yet", we check the actual time remaining. If + * the time has expired, then indeed we can simply pass DB_TIMEOUT back + * up to the calling application. But if not, it tells us that we have + * a chance to wait and try again. This is a nice division of labor, + * because it means the lower level functions (__rep_check_applied() and + * below) do not have to mess with any actual time computations, or + * waiting, at all. + */ + if (ret == DB_TIMEOUT && t_ret == 0 && F_ISSET(rep, REP_F_CLIENT)) { + __os_gettime(env, &now, 1); + if (timespeccmp(&now, &limit, <)) { + + /* Compute how much time remains before the limit. */ + t = limit; + timespecsub(&t, &now); + DB_TIMESPEC_TO_TIMEOUT(duration, &t, 1); + + /* + * Wait for whatever __rep_check_applied told us we + * needed to wait for. But first, check the condition + * again under mutex protection, in case there was a + * close race. + */ + if (reason.why == AWAIT_LSN || + reason.why == AWAIT_HISTORY) { + MUTEX_LOCK(env, rep->mtx_clientdb); + locked = 1; + } + REP_SYSTEM_LOCK(env); + ret = __rep_check_goal(env, &reason); + if (locked) { + MUTEX_UNLOCK(env, rep->mtx_clientdb); + locked = 0; + } + if (ret == DB_TIMEOUT) { + /* + * The usual case: we haven't reached our goal + * yet, even after checking again while holding + * mutex. + */ + ret = __rep_await_condition(env, + &reason, duration); + + /* + * If it were possible for + * __rep_await_condition() to return DB_TIMEOUT + * that would confuse the outer "if" statement + * here. + */ + DB_ASSERT(env, ret != DB_TIMEOUT); + } + REP_SYSTEM_UNLOCK(env); + if (ret != 0) + goto out; + + /* + * Note that the "reason" that check_applied set, and + * that await_condition waited for, does not necessarily + * represent a final result ready to return to the + * user. In some cases there may be a few state changes + * necessary before we are able to determine the final + * result. Thus whenever we complete a successful wait + * we need to cycle back and check the full txn_applied + * question again. + */ + goto retry; + } + } + + if (t_ret != 0 && + (ret == 0 || ret == DB_TIMEOUT || ret == DB_NOTFOUND)) + ret = t_ret; + +out: + return (ret); +} + +/* + * The only non-zero return code from this function is for unexpected errors. + * We normally return 0, regardless of whether the wait terminated because the + * condition was satisfied or the timeout expired. + */ +static int +__rep_await_condition(env, reasonp, duration) + ENV *env; + struct rep_waitgoal *reasonp; + db_timeout_t duration; +{ + REGENV *renv; + REGINFO *infop; + REP *rep; + struct __rep_waiter *waiter; + int ret; + + rep = env->rep_handle->region; + infop = env->reginfo; + renv = infop->primary; + + /* + * Acquire the first lock on the self-blocking mutex when we first + * allocate it. Thereafter when it's on the free list we know that + * first lock has already been taken. + */ + if ((waiter = SH_TAILQ_FIRST(&rep->free_waiters, + __rep_waiter)) == NULL) { + MUTEX_LOCK(env, renv->mtx_regenv); + if ((ret = __env_alloc(env->reginfo, + sizeof(struct __rep_waiter), &waiter)) == 0) { + memset(waiter, 0, sizeof(*waiter)); + if ((ret = __mutex_alloc(env, MTX_REP_WAITER, + DB_MUTEX_SELF_BLOCK, &waiter->mtx_repwait)) != 0) + __env_alloc_free(infop, waiter); + } + MUTEX_UNLOCK(env, renv->mtx_regenv); + if (ret != 0) + return (ret); + + MUTEX_LOCK(env, waiter->mtx_repwait); + } else + SH_TAILQ_REMOVE(&rep->free_waiters, + waiter, links, __rep_waiter); + waiter->flags = 0; + waiter->goal = *reasonp; + SH_TAILQ_INSERT_HEAD(&rep->waiters, + waiter, links, __rep_waiter); + + VPRINT(env, (env, DB_VERB_REP_MISC, + "waiting for condition %d", (int)reasonp->why)); + REP_SYSTEM_UNLOCK(env); + /* Wait here for conditions to become more favorable. */ + MUTEX_WAIT(env, waiter->mtx_repwait, duration); + REP_SYSTEM_LOCK(env); + + if (!F_ISSET(waiter, REP_F_WOKEN)) + SH_TAILQ_REMOVE(&rep->waiters, waiter, links, __rep_waiter); + SH_TAILQ_INSERT_HEAD(&rep->free_waiters, waiter, links, __rep_waiter); + + return (0); +} + +/* + * Check whether the transaction is currently applied. If it is not, but it + * might likely become applied in the future, then return DB_TIMEOUT. It's the + * caller's duty to figure out whether to wait or not in that case. Here we + * only do an immediate check of the current state of affairs. + */ +static int +__rep_check_applied(env, ip, commit_info, reasonp) + ENV *env; + DB_THREAD_INFO *ip; + DB_COMMIT_INFO *commit_info; + struct rep_waitgoal *reasonp; +{ + DB_LOG *dblp; + DB_REP *db_rep; + LOG *lp; + REP *rep; + DB_TXN *txn; + DBC *dbc; + __rep_lsn_hist_data_args hist, hist2; + DB_LSN lsn; + u_int32_t gen; + int ret, t_ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + gen = rep->gen; + txn = NULL; + dbc = NULL; + + if (F_ISSET(rep, REP_F_MASTER)) { + LOG_SYSTEM_LOCK(env); + lsn = lp->lsn; + LOG_SYSTEM_UNLOCK(env); + } else { + MUTEX_LOCK(env, rep->mtx_clientdb); + lsn = lp->max_perm_lsn; + MUTEX_UNLOCK(env, rep->mtx_clientdb); + } + + /* + * The first thing to consider is whether we're in the right gen. + * The token gen either matches our current gen, or is left over from an + * older gen, or in rare circumstances could be from a "future" gen that + * we haven't learned about yet (or that got rolled back). + */ + if (commit_info->gen == gen) { + ret = __rep_read_lsn_history(env, + ip, &txn, &dbc, gen, &hist, reasonp, DB_SET); + if (ret == DB_NOTFOUND) { + /* + * We haven't yet received the LSN history of the + * current generation from the master. Return + * DB_TIMEOUT to tell the caller it needs to wait and + * tell it to wait for the LSN history. + * + * Note that this also helps by eliminating the weird + * period between receiving a new gen (from a NEWMASTER) + * and the subsequent syncing with that new gen. We + * really only want to return success at the current gen + * once we've synced. + */ + ret = DB_TIMEOUT; + reasonp->why = AWAIT_HISTORY; + reasonp->u.lsn = lsn; + } + if (ret != 0) + goto out; + + if (commit_info->envid != hist.envid) { + /* + * Gens match, but envids don't: means there were two + * masters at the same gen, and the txn of interest was + * rolled back. + */ + ret = DB_NOTFOUND; + goto out; + } + + if (LOG_COMPARE(&commit_info->lsn, &lsn) > 0) { + /* + * We haven't yet gotten the LSN of interest, but we can + * expect it soon; so wait for it. + */ + ret = DB_TIMEOUT; + reasonp->why = AWAIT_LSN; + reasonp->u.lsn = commit_info->lsn; + goto out; + } + + if (LOG_COMPARE(&commit_info->lsn, &hist.lsn) >= 0) { + /* + * The LSN of interest is in the past, but within the + * range claimed for this gen. Success! (We have read + * consistency.) + */ + ret = 0; + goto out; + } + + /* + * There must have been a DUPMASTER at some point: the + * description of the txn of interest doesn't match what we see + * in the history available to us now. + */ + ret = DB_NOTFOUND; + + } else if (commit_info->gen < gen || gen == 0) { + /* + * Transaction from an old gen. Read this gen's base LSN, plus + * that of the next higher gen, because we want to check that + * the token LSN is within the close/open range defined by + * [base,next). + */ + ret = __rep_read_lsn_history(env, + ip, &txn, &dbc, commit_info->gen, &hist, reasonp, DB_SET); + t_ret = __rep_read_lsn_history(env, + ip, &txn, &dbc, commit_info->gen, &hist2, reasonp, DB_NEXT); + if (ret == DB_NOTFOUND) { + /* + * If the desired gen is not in our database, it could + * mean either of two things. 1. The whole gen could + * have been rolled back. 2. We could just be really + * far behind on replication. Reading ahead to the next + * following gen, which we likely need anyway, helps us + * decide which case to conclude. + */ + if (t_ret == 0) + /* + * Second read succeeded, so "being behind in + * replication" is not a viable reason for + * having failed to find the first read. + * Therefore, the gen must have been rolled + * back, and the proper result is NOTFOUND to + * indicate that. + */ + goto out; + if (t_ret == DB_NOTFOUND) { + /* + * Second read also got a NOTFOUND: we're + * definitely "behind" (we don't even have + * current gen's history). So, waiting is the + * correct result. + */ + ret = DB_TIMEOUT; + reasonp->why = AWAIT_HISTORY; + reasonp->u.lsn = lsn; + goto out; + } + /* + * Here, t_ret is something unexpected, which trumps the + * NOTFOUND returned from the first read. + */ + ret = t_ret; + goto out; + } + if (ret != 0) + goto out; /* Unexpected error, first read. */ + if (commit_info->envid != hist.envid) { + /* + * (We don't need the second read in order to make this + * test.) + * + * We have info for the indicated gen, but the envids + * don't match, meaning the txn was written at a dup + * master and that gen instance was rolled back. + */ + ret = DB_NOTFOUND; + goto out; + } + + /* Examine result of second read. */ + if ((ret = t_ret) == DB_NOTFOUND) { + /* + * We haven't even heard about our current gen yet, so + * it's worth waiting for it. + */ + ret = DB_TIMEOUT; + reasonp->why = AWAIT_HISTORY; + reasonp->u.lsn = lsn; + } else if (ret != 0) + goto out; /* Second read returned unexpeced error. */ + + /* + * We now have the history info for the gen of the txn, and for + * the subsequent gen. All we have to do is see if the LSN is + * in range. + */ + if (LOG_COMPARE(&commit_info->lsn, &hist.lsn) >= 0 && + LOG_COMPARE(&commit_info->lsn, &hist2.lsn) < 0) + ret = 0; + else + ret = DB_NOTFOUND; + } else { + /* + * Token names a future gen. If we're a client and the LSN also + * is in the future, then it's possible we just haven't caught + * up yet, so we can wait for it. Otherwise, it must have been + * part of a generation that got lost in a roll-back. + */ + if (F_ISSET(rep, REP_F_CLIENT) && + LOG_COMPARE(&commit_info->lsn, &lsn) > 0) { + reasonp->why = AWAIT_GEN; + reasonp->u.gen = commit_info->gen; + return (DB_TIMEOUT); + } + return (DB_NOTFOUND); + } + +out: + if (dbc != NULL && + (t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + if (txn != NULL && + (t_ret = __db_txn_auto_resolve(env, txn, 1, ret)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * The txn and dbc handles are owned by caller, though we create them if + * necessary. Caller is responsible for closing them. + */ +static int +__rep_read_lsn_history(env, ip, txn, dbc, gen, gen_infop, reasonp, flags) + ENV *env; + DB_THREAD_INFO *ip; + DB_TXN **txn; + DBC **dbc; + u_int32_t gen; + __rep_lsn_hist_data_args *gen_infop; + struct rep_waitgoal *reasonp; + u_int32_t flags; +{ + DB_REP *db_rep; + REP *rep; + DB *dbp; + __rep_lsn_hist_key_args key; + u_int8_t key_buf[__REP_LSN_HIST_KEY_SIZE]; + u_int8_t data_buf[__REP_LSN_HIST_DATA_SIZE]; + DBT key_dbt, data_dbt; + u_int32_t desired_gen; + int ret, tries; + + db_rep = env->rep_handle; + rep = db_rep->region; + ret = 0; + + DB_ASSERT(env, flags == DB_SET || flags == DB_NEXT); + + /* Simply return cached info, if we already have it. */ + desired_gen = flags == DB_SET ? gen : gen + 1; + REP_SYSTEM_LOCK(env); + if (rep->gen == desired_gen && !IS_ZERO_LSN(rep->gen_base_lsn)) { + gen_infop->lsn = rep->gen_base_lsn; + gen_infop->envid = rep->master_envid; + goto unlock; + } + REP_SYSTEM_UNLOCK(env); + + tries = 0; +retry: + if (*txn == NULL && + (ret = __txn_begin(env, ip, NULL, txn, 0)) != 0) + return (ret); + + if ((dbp = db_rep->lsn_db) == NULL) { + if ((ret = __rep_open_sysdb(env, + ip, *txn, REPLSNHIST, 0, &dbp)) != 0) { + /* + * If the database isn't there, it could be because it's + * memory-resident, and we haven't yet sync'ed with the + * master to materialize it. (It could make sense to + * include a test for INMEM in this conditional + * expression, if we were sure all sites had matching + * INMEM settings; but since we don't enforce that, + * leaving it out makes for more optimistic behavior.) + */ + if (ret == ENOENT && + !F_ISSET(rep, REP_F_NIMDBS_LOADED | REP_F_MASTER)) { + ret = DB_TIMEOUT; + reasonp->why = AWAIT_NIMDB; + } + goto err; + } + db_rep->lsn_db = dbp; + } + + if (*dbc == NULL && + (ret = __db_cursor(dbp, ip, *txn, dbc, 0)) != 0) + goto err; + + if (flags == DB_SET) { + key.version = REP_LSN_HISTORY_FMT_VERSION; + key.gen = gen; + __rep_lsn_hist_key_marshal(env, &key, key_buf); + } + DB_INIT_DBT(key_dbt, key_buf, __REP_LSN_HIST_KEY_SIZE); + key_dbt.ulen = __REP_LSN_HIST_KEY_SIZE; + F_SET(&key_dbt, DB_DBT_USERMEM); + + memset(&data_dbt, 0, sizeof(data_dbt)); + data_dbt.data = data_buf; + data_dbt.ulen = __REP_LSN_HIST_DATA_SIZE; + F_SET(&data_dbt, DB_DBT_USERMEM); + if ((ret = __dbc_get(*dbc, &key_dbt, &data_dbt, flags)) != 0) { + if ((ret == DB_LOCK_DEADLOCK || ret == DB_LOCK_NOTGRANTED) && + ++tries < 5) { /* Limit of 5 is an arbitrary choice. */ + ret = __dbc_close(*dbc); + *dbc = NULL; + if (ret != 0) + goto err; + ret = __txn_abort(*txn); + *txn = NULL; + if (ret != 0) + goto err; + __os_yield(env, 0, 10000); /* Arbitrary duration. */ + goto retry; + } + goto err; + } + + /* + * In the DB_NEXT case, we don't know what the next gen is. Unmarshal + * the key too, just so that we can check whether it matches the current + * gen, for setting the cache. Note that, interestingly, the caller + * doesn't care what the key is in that case! + */ + if ((ret = __rep_lsn_hist_key_unmarshal(env, + &key, key_buf, __REP_LSN_HIST_KEY_SIZE, NULL)) != 0) + goto err; + ret = __rep_lsn_hist_data_unmarshal(env, + gen_infop, data_buf, __REP_LSN_HIST_DATA_SIZE, NULL); + + REP_SYSTEM_LOCK(env); + if (rep->gen == key.gen) { + rep->gen_base_lsn = gen_infop->lsn; + rep->master_envid = gen_infop->envid; + } +unlock: + REP_SYSTEM_UNLOCK(env); + +err: + return (ret); +} + +/* + * __rep_conv_vers -- + * Convert from a log version to the replication message version + * that release used. + */ +static u_int32_t +__rep_conv_vers(env, log_ver) + ENV *env; + u_int32_t log_ver; +{ + COMPQUIET(env, NULL); + + /* + * We can't use a switch statement, some of the DB_LOGVERSION_XX + * constants are the same + */ + if (log_ver == DB_LOGVERSION) + return (DB_REPVERSION); + /* 5.0 and 5.1 had identical log and rep versions. */ + if (log_ver == DB_LOGVERSION_51) + return (DB_REPVERSION_51); + if (log_ver == DB_LOGVERSION_48p2) + return (DB_REPVERSION_48); + if (log_ver == DB_LOGVERSION_48) + return (DB_REPVERSION_48); + if (log_ver == DB_LOGVERSION_47) + return (DB_REPVERSION_47); + if (log_ver == DB_LOGVERSION_46) + return (DB_REPVERSION_46); + if (log_ver == DB_LOGVERSION_45) + return (DB_REPVERSION_45); + if (log_ver == DB_LOGVERSION_44) + return (DB_REPVERSION_44); + return (DB_REPVERSION_INVALID); +} diff --git a/src/rep/rep_record.c b/src/rep/rep_record.c new file mode 100644 index 00000000..568b60e0 --- /dev/null +++ b/src/rep/rep_record.c @@ -0,0 +1,2582 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __rep_collect_txn __P((ENV *, DB_LSN *, LSN_COLLECTION *)); +static int __rep_do_ckp __P((ENV *, DBT *, __rep_control_args *)); +static int __rep_fire_newmaster __P((ENV *, u_int32_t, int)); +static int __rep_fire_startupdone __P((ENV *, u_int32_t, int)); +static int __rep_getnext __P((ENV *, DB_THREAD_INFO *)); +static int __rep_lsn_cmp __P((const void *, const void *)); +static int __rep_newfile __P((ENV *, __rep_control_args *, DBT *)); +static int __rep_process_rec __P((ENV *, DB_THREAD_INFO *, __rep_control_args *, + DBT *, db_timespec *, DB_LSN *)); +static int __rep_remfirst __P((ENV *, DB_THREAD_INFO *, DBT *, DBT *)); +static int __rep_skip_msg __P((ENV *, REP *, int, u_int32_t)); + +/* Used to consistently designate which messages ought to be received where. */ + +#define MASTER_ONLY(rep, rp) do { \ + if (!F_ISSET(rep, REP_F_MASTER)) { \ + RPRINT(env, (env, DB_VERB_REP_MSGS, \ + "Master record received on client")); \ + REP_PRINT_MESSAGE(env, \ + eid, rp, "rep_process_message", 0); \ + /* Just skip/ignore it. */ \ + ret = 0; \ + goto errlock; \ + } \ +} while (0) + +#define CLIENT_ONLY(rep, rp) do { \ + if (!F_ISSET(rep, REP_F_CLIENT)) { \ + RPRINT(env, (env, DB_VERB_REP_MSGS, \ + "Client record received on master")); \ + /* \ + * Only broadcast DUPMASTER if leases are not \ + * in effect. If I am an old master, using \ + * leases and I get a newer message, my leases \ + * had better all be expired. \ + */ \ + if (IS_USING_LEASES(env)) \ + DB_ASSERT(env, \ + __rep_lease_check(env, 0) == \ + DB_REP_LEASE_EXPIRED); \ + else { \ + REP_PRINT_MESSAGE(env, \ + eid, rp, "rep_process_message", 0); \ + (void)__rep_send_message(env, DB_EID_BROADCAST, \ + REP_DUPMASTER, NULL, NULL, 0, 0); \ + } \ + ret = DB_REP_DUPMASTER; \ + goto errlock; \ + } \ +} while (0) + +/* + * If a client is attempting to service a request and its gen is not in + * sync with its database state, it cannot service the request. Currently + * the only way to know this is with the heavy hammer of knowing (or not) + * who the master is. If the master is invalid, force a rerequest. + * If we receive an ALIVE, we update both gen and invalidate the + * master_id. + */ +#define CLIENT_MASTERCHK do { \ + if (F_ISSET(rep, REP_F_CLIENT)) { \ + if (master_id == DB_EID_INVALID) { \ + STAT(rep->stat.st_client_svc_miss++); \ + ret = __rep_skip_msg(env, rep, eid, rp->rectype);\ + goto errlock; \ + } \ + } \ +} while (0) + +/* + * If a client is attempting to service a request it does not have, + * call rep_skip_msg to skip this message and force a rerequest to the + * sender. We don't hold the mutex for the stats and may miscount. + */ +#define CLIENT_REREQ do { \ + if (F_ISSET(rep, REP_F_CLIENT)) { \ + STAT(rep->stat.st_client_svc_req++); \ + if (ret == DB_NOTFOUND) { \ + STAT(rep->stat.st_client_svc_miss++); \ + ret = __rep_skip_msg(env, rep, eid, rp->rectype);\ + } \ + } \ +} while (0) + +#define RECOVERING_SKIP do { \ + if (IS_REP_CLIENT(env) && recovering) { \ + /* Not holding region mutex, may miscount */ \ + STAT(rep->stat.st_msgs_recover++); \ + ret = __rep_skip_msg(env, rep, eid, rp->rectype); \ + goto errlock; \ + } \ +} while (0) + +/* + * If we're recovering the log we only want log records that are in the + * range we need to recover. Otherwise we can end up storing a huge + * number of "new" records, only to truncate the temp database later after + * we run recovery. If we are actively delaying a sync-up, we also skip + * all incoming log records until the application requests sync-up. + */ +#define RECOVERING_LOG_SKIP do { \ + if (F_ISSET(rep, REP_F_DELAY) || \ + rep->master_id == DB_EID_INVALID || \ + (recovering && \ + (rep->sync_state != SYNC_LOG || \ + LOG_COMPARE(&rp->lsn, &rep->last_lsn) >= 0))) { \ + /* Not holding region mutex, may miscount */ \ + STAT(rep->stat.st_msgs_recover++); \ + ret = __rep_skip_msg(env, rep, eid, rp->rectype); \ + goto errlock; \ + } \ +} while (0) + +#define ANYSITE(rep) + +/* + * __rep_process_message_pp -- + * + * This routine takes an incoming message and processes it. + * + * control: contains the control fields from the record + * rec: contains the actual record + * eid: the environment id of the sender of the message; + * ret_lsnp: On DB_REP_ISPERM and DB_REP_NOTPERM returns, contains the + * lsn of the maximum permanent or current not permanent log record + * (respectively). + * + * PUBLIC: int __rep_process_message_pp + * PUBLIC: __P((DB_ENV *, DBT *, DBT *, int, DB_LSN *)); + */ +int +__rep_process_message_pp(dbenv, control, rec, eid, ret_lsnp) + DB_ENV *dbenv; + DBT *control, *rec; + int eid; + DB_LSN *ret_lsnp; +{ + ENV *env; + int ret; + + env = dbenv->env; + ret = 0; + + ENV_REQUIRES_CONFIG_XX( + env, rep_handle, "DB_ENV->rep_process_message", DB_INIT_REP); + + if (APP_IS_REPMGR(env)) { + __db_errx(env, DB_STR_A("3512", + "%s cannot call from Replication Manager application", + "%s"), "DB_ENV->rep_process_message:"); + return (EINVAL); + } + + /* Control argument must be non-Null. */ + if (control == NULL || control->size == 0) { + __db_errx(env, DB_STR("3513", + "DB_ENV->rep_process_message: control argument must be specified")); + return (EINVAL); + } + + /* + * Make sure site is a master or a client, which implies that + * replication has been started. + */ + if (!IS_REP_MASTER(env) && !IS_REP_CLIENT(env)) { + __db_errx(env, DB_STR("3514", + "Environment not configured as replication master or client")); + return (EINVAL); + } + + if ((ret = __dbt_usercopy(env, control)) != 0 || + (ret = __dbt_usercopy(env, rec)) != 0) { + __dbt_userfree(env, control, rec, NULL); + __db_errx(env, DB_STR("3515", + "DB_ENV->rep_process_message: error retrieving DBT contents")); + return (ret); + } + + ret = __rep_process_message_int(env, control, rec, eid, ret_lsnp); + + __dbt_userfree(env, control, rec, NULL); + return (ret); +} + +/* + * __rep_process_message_int -- + * + * This routine performs the internal steps to process an incoming message. + * + * PUBLIC: int __rep_process_message_int + * PUBLIC: __P((ENV *, DBT *, DBT *, int, DB_LSN *)); + */ +int +__rep_process_message_int(env, control, rec, eid, ret_lsnp) + ENV *env; + DBT *control, *rec; + int eid; + DB_LSN *ret_lsnp; +{ + DBT data_dbt; + DB_LOG *dblp; + DB_LSN last_lsn, lsn; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + LOG *lp; + REGENV *renv; + REGINFO *infop; + REP *rep; + REP_46_CONTROL *rp46; + REP_OLD_CONTROL *orp; + __rep_control_args *rp, tmprp; + __rep_egen_args egen_arg; + size_t len; + u_int32_t gen, rep_version; + int cmp, do_sync, lockout, master_id, recovering, ret, t_ret; + time_t savetime; + u_int8_t buf[__REP_MAXMSG_SIZE]; + + ret = 0; + do_sync = 0; + lockout = 0; + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + infop = env->reginfo; + renv = infop->primary; + /* + * Casting this to REP_OLD_CONTROL is just kind of stylistic: the + * rep_version field of course has to be in the same offset in all + * versions in order for this to work. + * + * We can look at the rep_version unswapped here because if we're + * talking to an old version, it will always be unswapped. If + * we're talking to a new version, the only issue is if it is + * swapped and we take one of the old version conditionals + * incorrectly. The rep_version would need to be very, very + * large for a swapped version to look like a small, older + * version. There is no problem here looking at it unswapped. + */ + rep_version = ((REP_OLD_CONTROL *)control->data)->rep_version; + if (rep_version <= DB_REPVERSION_45) { + orp = (REP_OLD_CONTROL *)control->data; + if (rep_version == DB_REPVERSION_45 && + F_ISSET(orp, REPCTL_INIT_45)) { + F_CLR(orp, REPCTL_INIT_45); + F_SET(orp, REPCTL_INIT); + } + tmprp.rep_version = orp->rep_version; + tmprp.log_version = orp->log_version; + tmprp.lsn = orp->lsn; + tmprp.rectype = orp->rectype; + tmprp.gen = orp->gen; + tmprp.flags = orp->flags; + tmprp.msg_sec = 0; + tmprp.msg_nsec = 0; + } else if (rep_version == DB_REPVERSION_46) { + rp46 = (REP_46_CONTROL *)control->data; + tmprp.rep_version = rp46->rep_version; + tmprp.log_version = rp46->log_version; + tmprp.lsn = rp46->lsn; + tmprp.rectype = rp46->rectype; + tmprp.gen = rp46->gen; + tmprp.flags = rp46->flags; + tmprp.msg_sec = (u_int32_t)rp46->msg_time.tv_sec; + tmprp.msg_nsec = (u_int32_t)rp46->msg_time.tv_nsec; + } else + if ((ret = __rep_control_unmarshal(env, &tmprp, + control->data, control->size, NULL)) != 0) + return (ret); + rp = &tmprp; + if (ret_lsnp != NULL) + ZERO_LSN(*ret_lsnp); + + ENV_ENTER(env, ip); + + REP_PRINT_MESSAGE(env, eid, rp, "rep_process_message", 0); + /* + * Check the version number for both rep and log. If it is + * an old version we support, convert it. Otherwise complain. + */ + if (rp->rep_version < DB_REPVERSION) { + if (rp->rep_version < DB_REPVERSION_MIN) { + __db_errx(env, DB_STR_A("3516", + "unsupported old replication message version %lu, minimum version %d", + "%lu %d"), (u_long)rp->rep_version, + DB_REPVERSION_MIN); + + ret = EINVAL; + goto errlock; + } + VPRINT(env, (env, DB_VERB_REP_MSGS, + "Received record %lu with old rep version %lu", + (u_long)rp->rectype, (u_long)rp->rep_version)); + rp->rectype = __rep_msg_from_old(rp->rep_version, rp->rectype); + DB_ASSERT(env, rp->rectype != REP_INVALID); + /* + * We should have a valid new record type for all the old + * versions. + */ + VPRINT(env, (env, DB_VERB_REP_MSGS, + "Converted to record %lu with old rep version %lu", + (u_long)rp->rectype, (u_long)rp->rep_version)); + } else if (rp->rep_version > DB_REPVERSION) { + __db_errx(env, DB_STR_A("3517", + "unexpected replication message version %lu, expected %d", + "%lu %d"), (u_long)rp->rep_version, DB_REPVERSION); + ret = EINVAL; + goto errlock; + } + + if (rp->log_version < DB_LOGVERSION) { + if (rp->log_version < DB_LOGVERSION_MIN) { + __db_errx(env, DB_STR_A("3518", + "unsupported old replication log version %lu, minimum version %d", + "%lu %d"), (u_long)rp->log_version, + DB_LOGVERSION_MIN); + ret = EINVAL; + goto errlock; + } + VPRINT(env, (env, DB_VERB_REP_MSGS, + "Received record %lu with old log version %lu", + (u_long)rp->rectype, (u_long)rp->log_version)); + } else if (rp->log_version > DB_LOGVERSION) { + __db_errx(env, DB_STR_A("3519", + "unexpected log record version %lu, expected %d", + "%lu %d"), (u_long)rp->log_version, DB_LOGVERSION); + ret = EINVAL; + goto errlock; + } + + /* + * Acquire the replication lock. + */ + REP_SYSTEM_LOCK(env); + if (FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_MSG)) { + /* + * If we're racing with a thread in rep_start, then + * just ignore the message and return. + */ + RPRINT(env, (env, DB_VERB_REP_MSGS, + "Racing replication msg lockout, ignore message.")); + /* + * Although we're ignoring the message, there are a few + * we need to pay a bit of attention to anyway. All of + * these cases are mutually exclusive. + * 1. If it is a PERM message, we don't want to return 0. + * 2. If it is a NEWSITE message let the app know so it can + * do whatever it needs for connection purposes. + * 3. If it is a c2c request, tell the sender we're not + * going to handle it. + */ + if (F_ISSET(rp, REPCTL_PERM)) + ret = DB_REP_IGNORE; + REP_SYSTEM_UNLOCK(env); + /* + * If this is new site information return DB_REP_NEWSITE so + * that the user can use whatever information may have been + * sent for connections. + */ + if (rp->rectype == REP_NEWSITE) + ret = DB_REP_NEWSITE; + /* + * If another client has sent a c2c request to us, it may be a + * long time before it resends the request (due to its dual data + * streams avoidance heuristic); let it know we can't serve the + * request just now. + */ + if (F_ISSET(rep, REP_F_CLIENT) && REP_MSG_REQ(rp->rectype)) { + STAT(rep->stat.st_client_svc_req++); + STAT(rep->stat.st_client_svc_miss++); + (void)__rep_send_message(env, + eid, REP_REREQUEST, NULL, NULL, 0, 0); + } + goto out; + } + rep->msg_th++; + gen = rep->gen; + master_id = rep->master_id; + recovering = IS_REP_RECOVERING(rep); + savetime = renv->rep_timestamp; + + STAT(rep->stat.st_msgs_processed++); + REP_SYSTEM_UNLOCK(env); + + /* + * Check for lease configuration matching. Leases must be + * configured all or none. If I am a client and I receive a + * message requesting a lease, and I'm not using leases, that + * is an error. + */ + if (!IS_USING_LEASES(env) && + (F_ISSET(rp, REPCTL_LEASE) || rp->rectype == REP_LEASE_GRANT)) { + __db_errx(env, DB_STR("3520", + "Inconsistent lease configuration")); + RPRINT(env, (env, DB_VERB_REP_MSGS, + "Client received lease message and not using leases")); + ret = EINVAL; + ret = __env_panic(env, ret); + goto errlock; + } + + /* + * Check for generation number matching. Ignore any old messages + * except requests that are indicative of a new client that needs + * to get in sync. + */ + if (rp->gen < gen && rp->rectype != REP_ALIVE_REQ && + rp->rectype != REP_NEWCLIENT && rp->rectype != REP_MASTER_REQ && + rp->rectype != REP_DUPMASTER && rp->rectype != REP_VOTE1) { + /* + * We don't hold the rep mutex, and could miscount if we race. + */ + STAT(rep->stat.st_msgs_badgen++); + if (F_ISSET(rp, REPCTL_PERM)) + ret = DB_REP_IGNORE; + goto errlock; + } + + if (rp->gen > gen) { + /* + * If I am a master and am out of date with a lower generation + * number, I am in bad shape and should downgrade. + */ + if (F_ISSET(rep, REP_F_MASTER)) { + STAT(rep->stat.st_dupmasters++); + ret = DB_REP_DUPMASTER; + /* + * Only broadcast DUPMASTER if leases are not + * in effect. If I am an old master, using + * leases and I get a newer message, my leases + * had better all be expired. + */ + if (IS_USING_LEASES(env)) + DB_ASSERT(env, + __rep_lease_check(env, 0) == + DB_REP_LEASE_EXPIRED); + else if (rp->rectype != REP_DUPMASTER) + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_DUPMASTER, + NULL, NULL, 0, 0); + goto errlock; + } + + /* + * I am a client and am out of date. If this is an election, + * or a response from the first site I contacted, then I can + * accept the generation number and participate in future + * elections and communication. Otherwise, I need to hear about + * a new master and sync up. + */ + if (rp->rectype == REP_ALIVE || + rp->rectype == REP_VOTE1 || rp->rectype == REP_VOTE2) { + REP_SYSTEM_LOCK(env); + RPRINT(env, (env, DB_VERB_REP_MSGS, + "Updating gen from %lu to %lu", + (u_long)gen, (u_long)rp->gen)); + rep->master_id = DB_EID_INVALID; + gen = rp->gen; + SET_GEN(gen); + /* + * Updating of egen will happen when we process the + * message below for each message type. + */ + REP_SYSTEM_UNLOCK(env); + if (rp->rectype == REP_ALIVE) + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_MASTER_REQ, NULL, + NULL, 0, 0); + } else if (rp->rectype != REP_NEWMASTER) { + /* + * Ignore this message, retransmit if needed. + */ + if (__rep_check_doreq(env, rep)) + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_MASTER_REQ, + NULL, NULL, 0, 0); + goto errlock; + } + /* + * If you get here, then you're a client and either you're + * in an election or you have a NEWMASTER or an ALIVE message + * whose processing will do the right thing below. + */ + } + + /* + * If the sender is part of an established group, so are we now. + */ + if (F_ISSET(rp, REPCTL_GROUP_ESTD)) { + REP_SYSTEM_LOCK(env); +#ifdef DIAGNOSTIC + if (!F_ISSET(rep, REP_F_GROUP_ESTD)) + RPRINT(env, (env, DB_VERB_REP_MSGS, + "I am now part of an established group")); +#endif + F_SET(rep, REP_F_GROUP_ESTD); + REP_SYSTEM_UNLOCK(env); + } + + /* + * We need to check if we're in recovery and if we are + * then we need to ignore any messages except VERIFY*, VOTE*, + * NEW* and ALIVE_REQ, or backup related messages: UPDATE*, + * PAGE* and FILE*. We need to also accept LOG messages + * if we're copying the log for recovery/backup. + */ + switch (rp->rectype) { + case REP_ALIVE: + /* + * Handle even if we're recovering. + */ + ANYSITE(rep); + if (rp->rep_version < DB_REPVERSION_47) + egen_arg.egen = *(u_int32_t *)rec->data; + else if ((ret = __rep_egen_unmarshal(env, &egen_arg, + rec->data, rec->size, NULL)) != 0) + return (ret); + REP_SYSTEM_LOCK(env); + if (egen_arg.egen > rep->egen) { + /* + * If we're currently working futilely at processing an + * obsolete egen, treat it like an egen update, so that + * we abort the current rep_elect() call and signal the + * application to start a new one. + */ + if (rep->spent_egen == rep->egen) + ret = DB_REP_HOLDELECTION; + + RPRINT(env, (env, DB_VERB_REP_MSGS, + "Received ALIVE egen of %lu, mine %lu", + (u_long)egen_arg.egen, (u_long)rep->egen)); + __rep_elect_done(env, rep); + rep->egen = egen_arg.egen; + } + REP_SYSTEM_UNLOCK(env); + break; + case REP_ALIVE_REQ: + /* + * Handle even if we're recovering. + */ + ANYSITE(rep); + LOG_SYSTEM_LOCK(env); + lsn = lp->lsn; + LOG_SYSTEM_UNLOCK(env); +#ifdef CONFIG_TEST + /* + * Send this first, before the ALIVE message because of the + * way the test suite and messaging is done sequentially. + * In some sequences it is possible to get into a situation + * where the test suite cannot get the later NEWMASTER because + * we break out of the messaging loop too early. + */ + if (F_ISSET(rep, REP_F_MASTER)) + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_NEWMASTER, &lsn, NULL, 0, 0); +#endif + REP_SYSTEM_LOCK(env); + egen_arg.egen = rep->egen; + REP_SYSTEM_UNLOCK(env); + if (rep->version < DB_REPVERSION_47) + DB_INIT_DBT(data_dbt, &egen_arg.egen, + sizeof(egen_arg.egen)); + else { + if ((ret = __rep_egen_marshal(env, + &egen_arg, buf, __REP_EGEN_SIZE, &len)) != 0) + goto errlock; + DB_INIT_DBT(data_dbt, buf, len); + } + (void)__rep_send_message(env, + eid, REP_ALIVE, &lsn, &data_dbt, 0, 0); + break; + case REP_ALL_REQ: + RECOVERING_SKIP; + CLIENT_MASTERCHK; + ret = __rep_allreq(env, rp, eid); + CLIENT_REREQ; + break; + case REP_BULK_LOG: + RECOVERING_LOG_SKIP; + CLIENT_ONLY(rep, rp); + ret = __rep_bulk_log(env, ip, rp, rec, savetime, ret_lsnp); + break; + case REP_BULK_PAGE: + /* + * Handle even if we're recovering. + */ + CLIENT_ONLY(rep, rp); + ret = __rep_bulk_page(env, ip, eid, rp, rec); + break; + case REP_DUPMASTER: + /* + * Handle even if we're recovering. + */ + if (F_ISSET(rep, REP_F_MASTER)) + ret = DB_REP_DUPMASTER; + break; +#ifdef NOTYET + case REP_FILE: /* TODO */ + CLIENT_ONLY(rep, rp); + break; + case REP_FILE_REQ: + ret = __rep_send_file(env, rec, eid); + break; +#endif + case REP_FILE_FAIL: + /* + * Handle even if we're recovering. + */ + CLIENT_ONLY(rep, rp); + /* + * Clean up any internal init that was in progress. + */ + if (eid == rep->master_id) { + REP_SYSTEM_LOCK(env); + /* + * If we're already locking out messages, give up. + */ + if (FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_MSG)) + goto errhlk; + /* + * Lock out other messages to prevent race + * conditions. + */ + if ((ret = + __rep_lockout_msg(env, rep, 1)) != 0) { + goto errhlk; + } + lockout = 1; + /* + * Need mtx_clientdb to safely clean up + * page database in __rep_init_cleanup(). + */ + REP_SYSTEM_UNLOCK(env); + MUTEX_LOCK(env, rep->mtx_clientdb); + REP_SYSTEM_LOCK(env); + /* + * Clean up internal init if one was in progress. + */ + if (ISSET_LOCKOUT_BDB(rep)) { + RPRINT(env, (env, DB_VERB_REP_MSGS, + "FILE_FAIL is cleaning up old internal init")); +#ifdef CONFIG_TEST + STAT(rep->stat.st_filefail_cleanups++); +#endif + ret = __rep_init_cleanup(env, rep, DB_FORCE); + F_CLR(rep, REP_F_ABBREVIATED); + CLR_RECOVERY_SETTINGS(rep); + } + MUTEX_UNLOCK(env, rep->mtx_clientdb); + if (ret != 0) { + RPRINT(env, (env, DB_VERB_REP_MSGS, + "FILE_FAIL error cleaning up internal init: %d", ret)); + goto errhlk; + } + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_MSG); + lockout = 0; + /* + * Restart internal init, setting UPDATE flag and + * zeroing applicable LSNs. + */ + rep->sync_state = SYNC_UPDATE; + ZERO_LSN(rep->first_lsn); + ZERO_LSN(rep->ckp_lsn); + REP_SYSTEM_UNLOCK(env); + (void)__rep_send_message(env, eid, REP_UPDATE_REQ, + NULL, NULL, 0, 0); + } + break; + case REP_LEASE_GRANT: + /* + * Handle even if we're recovering. + */ + MASTER_ONLY(rep, rp); + ret = __rep_lease_grant(env, rp, rec, eid); + break; + case REP_LOG: + case REP_LOG_MORE: + RECOVERING_LOG_SKIP; + CLIENT_ONLY(rep, rp); + ret = __rep_log(env, ip, rp, rec, eid, savetime, ret_lsnp); + break; + case REP_LOG_REQ: + RECOVERING_SKIP; + CLIENT_MASTERCHK; + if (F_ISSET(rp, REPCTL_INIT)) + MASTER_UPDATE(env, renv); + ret = __rep_logreq(env, rp, rec, eid); + CLIENT_REREQ; + break; + case REP_NEWSITE: + /* + * Handle even if we're recovering. + */ + /* We don't hold the rep mutex, and may miscount. */ + STAT(rep->stat.st_newsites++); + + /* This is a rebroadcast; simply tell the application. */ + if (F_ISSET(rep, REP_F_MASTER)) { + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + LOG_SYSTEM_LOCK(env); + lsn = lp->lsn; + LOG_SYSTEM_UNLOCK(env); + (void)__rep_send_message(env, + eid, REP_NEWMASTER, &lsn, NULL, 0, 0); + } + ret = DB_REP_NEWSITE; + break; + case REP_NEWCLIENT: + /* + * Handle even if we're recovering. + */ + /* + * This message was received and should have resulted in the + * application entering the machine ID in its machine table. + * We respond to this with an ALIVE to send relevant information + * to the new client (if we are a master, we'll send a + * NEWMASTER, so we only need to send the ALIVE if we're a + * client). But first, broadcast the new client's record to + * all the clients. + */ + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_NEWSITE, &rp->lsn, rec, 0, 0); + + ret = DB_REP_NEWSITE; + + if (F_ISSET(rep, REP_F_CLIENT)) { + REP_SYSTEM_LOCK(env); + egen_arg.egen = rep->egen; + + /* + * Clean up any previous master remnants by making + * master_id invalid and cleaning up any internal + * init that was in progress. + */ + if (eid == rep->master_id) { + rep->master_id = DB_EID_INVALID; + + /* + * Already locking out messages, must be + * in sync-up recover or internal init, + * give up. + */ + if (FLD_ISSET(rep->lockout_flags, + REP_LOCKOUT_MSG)) + goto errhlk; + + /* + * Lock out other messages to prevent race + * conditions. + */ + if ((t_ret = + __rep_lockout_msg(env, rep, 1)) != 0) { + ret = t_ret; + goto errhlk; + } + lockout = 1; + + /* + * Need mtx_clientdb to safely clean up + * page database in __rep_init_cleanup(). + */ + REP_SYSTEM_UNLOCK(env); + MUTEX_LOCK(env, rep->mtx_clientdb); + REP_SYSTEM_LOCK(env); + + /* + * Clean up internal init if one was in + * progress. + */ + if (ISSET_LOCKOUT_BDB(rep)) { + RPRINT(env, (env, DB_VERB_REP_MSGS, + "NEWCLIENT is cleaning up old internal init for invalid master")); + t_ret = __rep_init_cleanup(env, + rep, DB_FORCE); + F_CLR(rep, REP_F_ABBREVIATED); + CLR_RECOVERY_SETTINGS(rep); + } + MUTEX_UNLOCK(env, rep->mtx_clientdb); + if (t_ret != 0) { + ret = t_ret; + RPRINT(env, (env, DB_VERB_REP_MSGS, + "NEWCLIENT error cleaning up internal init for invalid master: %d", ret)); + goto errhlk; + } + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_MSG); + lockout = 0; + } + REP_SYSTEM_UNLOCK(env); + if (rep->version < DB_REPVERSION_47) + DB_INIT_DBT(data_dbt, &egen_arg.egen, + sizeof(egen_arg.egen)); + else { + if ((ret = __rep_egen_marshal(env, &egen_arg, + buf, __REP_EGEN_SIZE, &len)) != 0) + goto errlock; + DB_INIT_DBT(data_dbt, buf, len); + } + (void)__rep_send_message(env, DB_EID_BROADCAST, + REP_ALIVE, &rp->lsn, &data_dbt, 0, 0); + break; + } + /* FALLTHROUGH */ + case REP_MASTER_REQ: + RECOVERING_SKIP; + if (F_ISSET(rep, REP_F_MASTER)) { + LOG_SYSTEM_LOCK(env); + lsn = lp->lsn; + LOG_SYSTEM_UNLOCK(env); + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_NEWMASTER, &lsn, NULL, 0, 0); + if (IS_USING_LEASES(env)) + (void)__rep_lease_refresh(env); + } + /* + * If there is no master, then we could get into a state + * where an old client lost the initial ALIVE message and + * is calling an election under an old gen and can + * never get to the current gen. + */ + if (F_ISSET(rep, REP_F_CLIENT) && rp->gen < gen) { + REP_SYSTEM_LOCK(env); + egen_arg.egen = rep->egen; + if (eid == rep->master_id) + rep->master_id = DB_EID_INVALID; + REP_SYSTEM_UNLOCK(env); + if (rep->version < DB_REPVERSION_47) + DB_INIT_DBT(data_dbt, &egen_arg.egen, + sizeof(egen_arg.egen)); + else { + if ((ret = __rep_egen_marshal(env, &egen_arg, + buf, __REP_EGEN_SIZE, &len)) != 0) + goto errlock; + DB_INIT_DBT(data_dbt, buf, len); + } + (void)__rep_send_message(env, eid, + REP_ALIVE, &rp->lsn, &data_dbt, 0, 0); + } + break; + case REP_NEWFILE: + RECOVERING_LOG_SKIP; + CLIENT_ONLY(rep, rp); + ret = __rep_apply(env, + ip, rp, rec, ret_lsnp, NULL, &last_lsn); + if (ret == DB_REP_LOGREADY) + ret = __rep_logready(env, rep, savetime, &last_lsn); + break; + case REP_NEWMASTER: + /* + * Handle even if we're recovering. + */ + ANYSITE(rep); + if (F_ISSET(rep, REP_F_MASTER) && + eid != rep->eid) { + /* We don't hold the rep mutex, and may miscount. */ + STAT(rep->stat.st_dupmasters++); + ret = DB_REP_DUPMASTER; + if (IS_USING_LEASES(env)) + DB_ASSERT(env, + __rep_lease_check(env, 0) == + DB_REP_LEASE_EXPIRED); + else + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_DUPMASTER, + NULL, NULL, 0, 0); + break; + } + if ((ret = + __rep_new_master(env, rp, eid)) == DB_REP_NEWMASTER) + ret = __rep_fire_newmaster(env, rp->gen, eid); + break; + case REP_PAGE: + case REP_PAGE_FAIL: + case REP_PAGE_MORE: + /* + * Handle even if we're recovering. + */ + CLIENT_ONLY(rep, rp); + ret = __rep_page(env, ip, eid, rp, rec); + if (ret == DB_REP_PAGEDONE) + ret = 0; + break; + case REP_PAGE_REQ: + RECOVERING_SKIP; + CLIENT_MASTERCHK; + MASTER_UPDATE(env, renv); + ret = __rep_page_req(env, ip, eid, rp, rec); + CLIENT_REREQ; + break; + case REP_REREQUEST: + /* + * Handle even if we're recovering. Don't do a master + * check. + */ + CLIENT_ONLY(rep, rp); + /* + * Don't hold any mutex, may miscount. + */ + STAT(rep->stat.st_client_rerequests++); + ret = __rep_resend_req(env, 1); + break; + case REP_START_SYNC: + RECOVERING_SKIP; + MUTEX_LOCK(env, rep->mtx_clientdb); + cmp = LOG_COMPARE(&rp->lsn, &lp->ready_lsn); + /* + * The comparison needs to be <= because the LSN in + * the message can be the LSN of the first outstanding + * txn, which may be the LSN immediately after the + * previous commit. The ready_lsn is the LSN of the + * next record expected. In that case, the LSNs + * could be equal and the client has the commit and + * wants to sync. [SR #15338] + */ + if (cmp <= 0) { + MUTEX_UNLOCK(env, rep->mtx_clientdb); + do_sync = 1; + } else { + STAT(rep->stat.st_startsync_delayed++); + /* + * There are cases where keeping the first ckp_lsn + * LSN is advantageous and cases where keeping + * a later LSN is better. If random, earlier + * log records are missing, keeping the later + * LSN seems to be better. That is what we'll + * do for now. + */ + if (LOG_COMPARE(&rp->lsn, &rep->ckp_lsn) > 0) + rep->ckp_lsn = rp->lsn; + RPRINT(env, (env, DB_VERB_REP_MSGS, + "Delayed START_SYNC memp_sync due to missing records.")); + RPRINT(env, (env, DB_VERB_REP_MSGS, + "ready LSN [%lu][%lu], ckp_lsn [%lu][%lu]", + (u_long)lp->ready_lsn.file, (u_long)lp->ready_lsn.offset, + (u_long)rep->ckp_lsn.file, (u_long)rep->ckp_lsn.offset)); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + } + break; + case REP_UPDATE: + /* + * Handle even if we're recovering. + */ + CLIENT_ONLY(rep, rp); + if ((ret = __rep_update_setup(env, + eid, rp, rec, savetime, &lsn)) == DB_REP_WOULDROLLBACK && + ret_lsnp != NULL) { + /* + * Not for a normal internal init. But this could + * happen here if we had to ask for an UPDATE message in + * order to check for materializing NIMDBs; in other + * words, an "abbreviated internal init." + */ + *ret_lsnp = lsn; + } + break; + case REP_UPDATE_REQ: + /* + * Handle even if we're recovering. + */ + MASTER_ONLY(rep, rp); + infop = env->reginfo; + renv = infop->primary; + MASTER_UPDATE(env, renv); + ret = __rep_update_req(env, rp); + break; + case REP_VERIFY: + if (recovering) { + MUTEX_LOCK(env, rep->mtx_clientdb); + cmp = LOG_COMPARE(&lp->verify_lsn, &rp->lsn); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + /* + * If this is not the verify record I want, skip it. + */ + if (cmp != 0) { + ret = __rep_skip_msg( + env, rep, eid, rp->rectype); + break; + } + } + CLIENT_ONLY(rep, rp); + if ((ret = __rep_verify(env, rp, rec, eid, savetime)) == + DB_REP_WOULDROLLBACK && ret_lsnp != NULL) + *ret_lsnp = rp->lsn; + break; + case REP_VERIFY_FAIL: + /* + * Handle even if we're recovering. + */ + CLIENT_ONLY(rep, rp); + ret = __rep_verify_fail(env, rp); + break; + case REP_VERIFY_REQ: + RECOVERING_SKIP; + CLIENT_MASTERCHK; + ret = __rep_verify_req(env, rp, eid); + CLIENT_REREQ; + break; + case REP_VOTE1: + /* + * Handle even if we're recovering. + */ + ret = __rep_vote1(env, rp, rec, eid); + break; + case REP_VOTE2: + /* + * Handle even if we're recovering. + */ + ret = __rep_vote2(env, rp, rec, eid); + break; + default: + __db_errx(env, DB_STR_A("3521", + "DB_ENV->rep_process_message: unknown replication message: type %lu", + "%lu"), (u_long)rp->rectype); + ret = EINVAL; + break; + } + +errlock: + REP_SYSTEM_LOCK(env); +errhlk: if (lockout) + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_MSG); + rep->msg_th--; + REP_SYSTEM_UNLOCK(env); + if (do_sync) { + MUTEX_LOCK(env, rep->mtx_ckp); + lsn = rp->lsn; + /* + * This is the REP_START_SYNC sync, and so we permit it to be + * interrupted. + */ + ret = __memp_sync( + env, DB_SYNC_CHECKPOINT | DB_SYNC_INTERRUPT_OK, &lsn); + MUTEX_UNLOCK(env, rep->mtx_ckp); + RPRINT(env, (env, DB_VERB_REP_MSGS, + "START_SYNC: Completed sync [%lu][%lu]", + (u_long)lsn.file, (u_long)lsn.offset)); + } +out: + if (ret == 0 && F_ISSET(rp, REPCTL_PERM)) { + if (ret_lsnp != NULL) + *ret_lsnp = rp->lsn; + ret = DB_REP_NOTPERM; + } + __dbt_userfree(env, control, rec, NULL); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __rep_apply -- + * + * Handle incoming log records on a client, applying when possible and + * entering into the bookkeeping table otherwise. This routine manages + * the state of the incoming message stream -- processing records, via + * __rep_process_rec, when possible and enqueuing in the __db.rep.db + * when necessary. As gaps in the stream are filled in, this is where + * we try to process as much as possible from __db.rep.db to catch up. + * + * PUBLIC: int __rep_apply __P((ENV *, DB_THREAD_INFO *, __rep_control_args *, + * PUBLIC: DBT *, DB_LSN *, int *, DB_LSN *)); + */ +int +__rep_apply(env, ip, rp, rec, ret_lsnp, is_dupp, last_lsnp) + ENV *env; + DB_THREAD_INFO *ip; + __rep_control_args *rp; + DBT *rec; + DB_LSN *ret_lsnp; + int *is_dupp; + DB_LSN *last_lsnp; +{ + DB *dbp; + DBT control_dbt, key_dbt; + DBT rec_dbt; + DB_LOG *dblp; + DB_LSN max_lsn, save_lsn; + DB_REP *db_rep; + LOG *lp; + REP *rep; + db_timespec msg_time, max_ts; + u_int32_t gen, rectype; + int cmp, event, master, newfile_seen, ret, set_apply, t_ret; + + COMPQUIET(gen, 0); + COMPQUIET(master, DB_EID_INVALID); + + db_rep = env->rep_handle; + rep = db_rep->region; + event = ret = set_apply = 0; + memset(&control_dbt, 0, sizeof(control_dbt)); + memset(&rec_dbt, 0, sizeof(rec_dbt)); + ZERO_LSN(max_lsn); + timespecclear(&max_ts); + timespecset(&msg_time, rp->msg_sec, rp->msg_nsec); + cmp = -2; /* OOB value that LOG_COMPARE can't return. */ + + dblp = env->lg_handle; + MUTEX_LOCK(env, rep->mtx_clientdb); + /* + * Lazily open the temp db. Always set the startup flag to 0 + * because it was initialized from rep_start. + */ + if (db_rep->rep_db == NULL && + (ret = __rep_client_dbinit(env, 0, REP_DB)) != 0) { + MUTEX_UNLOCK(env, rep->mtx_clientdb); + goto out; + } + dbp = db_rep->rep_db; + lp = dblp->reginfo.primary; + newfile_seen = 0; + REP_SYSTEM_LOCK(env); + if (rep->sync_state == SYNC_LOG && + LOG_COMPARE(&lp->ready_lsn, &rep->first_lsn) < 0) + lp->ready_lsn = rep->first_lsn; + cmp = LOG_COMPARE(&rp->lsn, &lp->ready_lsn); + /* + * If we are going to skip or process any message other + * than a duplicate, make note of it if we're in an + * election so that the election can rerequest proactively. + */ + if (FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_APPLY) && cmp >= 0) + F_SET(rep, REP_F_SKIPPED_APPLY); + + /* + * If we're in the middle of processing a NEWFILE, we've dropped + * the mutex and if this matches it is a duplicate record. We + * do not want this call taking the "matching" code below because + * we may then process later records in the temp db and the + * original NEWFILE may not have the log file ready. It will + * process those temp db items when it completes. + */ + if (F_ISSET(rep, REP_F_NEWFILE) && cmp == 0) + cmp = -1; + + if (cmp == 0) { + /* + * If we are in an election (i.e. we've sent a vote + * with an LSN in it), then we drop the next record + * we're expecting. When we find a master, we'll + * either go into sync, or if it was an existing + * master, rerequest this one record (later records + * are accumulating in the temp db). + * + * We can simply return here, and rep_process_message + * will set NOTPERM if necessary for this record. + */ + if (FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_APPLY)) { + /* + * We will simply return now. All special return + * processing should be ignored because the special + * values are just initialized. Variables like + * max_lsn are still 0. + */ + RPRINT(env, (env, DB_VERB_REP_MISC, + "rep_apply: In election. Ignoring [%lu][%lu]", + (u_long)rp->lsn.file, (u_long)rp->lsn.offset)); + REP_SYSTEM_UNLOCK(env); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + goto out; + } + rep->apply_th++; + set_apply = 1; + VPRINT(env, (env, DB_VERB_REP_MISC, + "rep_apply: Set apply_th %d", rep->apply_th)); + REP_SYSTEM_UNLOCK(env); + if (rp->rectype == REP_NEWFILE) + newfile_seen = 1; + if ((ret = __rep_process_rec(env, ip, + rp, rec, &max_ts, &max_lsn)) != 0) + goto err; + /* + * If we get the record we are expecting, reset + * the count of records we've received and are applying + * towards the request interval. + */ + __os_gettime(env, &lp->rcvd_ts, 1); + ZERO_LSN(lp->max_wait_lsn); + + /* + * The __rep_remfirst() and __rep_getnext() functions each open, + * use and then close a cursor on the temp db, each time through + * the loop. Although this may seem excessive, it is necessary + * to avoid locking problems with checkpoints. + */ + while (ret == 0 && + LOG_COMPARE(&lp->ready_lsn, &lp->waiting_lsn) == 0) { + /* + * We just filled in a gap in the log record stream. + * Write subsequent records to the log. + */ +gap_check: + if ((ret = __rep_remfirst(env, ip, + &control_dbt, &rec_dbt)) != 0) + goto err; + + rp = (__rep_control_args *)control_dbt.data; + timespecset(&msg_time, rp->msg_sec, rp->msg_nsec); + rec = &rec_dbt; + if (rp->rectype == REP_NEWFILE) + newfile_seen = 1; + if ((ret = __rep_process_rec(env, ip, + rp, rec, &max_ts, &max_lsn)) != 0) + goto err; + + STAT(--rep->stat.st_log_queued); + + /* + * Since we just filled a gap in the log stream, and + * we're writing subsequent records to the log, we want + * to use rcvd_ts and wait_ts so that we will + * request the next gap if we end up with a gap and + * not so recent records in the temp db, but not + * request if recent records are in the temp db and + * likely to arrive on its own shortly. We want to + * avoid requesting the record in that case. Also + * reset max_wait_lsn because the next gap is a + * fresh gap. + */ + lp->rcvd_ts = lp->last_ts; + lp->wait_ts = rep->request_gap; + if ((ret = __rep_getnext(env, ip)) == DB_NOTFOUND) { + __os_gettime(env, &lp->rcvd_ts, 1); + ret = 0; + break; + } else if (ret != 0) + goto err; + } + + /* + * Check if we're at a gap in the table and if so, whether we + * need to ask for any records. + */ + if (!IS_ZERO_LSN(lp->waiting_lsn) && + LOG_COMPARE(&lp->ready_lsn, &lp->waiting_lsn) != 0) { + /* + * We got a record and processed it, but we may + * still be waiting for more records. If we + * filled a gap we keep a count of how many other + * records are in the temp database and if we should + * request the next gap at this time. + */ + if (__rep_check_doreq(env, rep) && (ret = + __rep_loggap_req(env, rep, &rp->lsn, 0)) != 0) + goto err; + } else { + lp->wait_ts = rep->request_gap; + ZERO_LSN(lp->max_wait_lsn); + } + + } else if (cmp > 0) { + /* + * The LSN is higher than the one we were waiting for. + * This record isn't in sequence; add it to the temporary + * database, update waiting_lsn if necessary, and perform + * calculations to determine if we should issue requests + * for new records. + */ + REP_SYSTEM_UNLOCK(env); + memset(&key_dbt, 0, sizeof(key_dbt)); + key_dbt.data = rp; + key_dbt.size = sizeof(*rp); + ret = __db_put(dbp, ip, NULL, &key_dbt, rec, DB_NOOVERWRITE); + if (ret == 0) { + STAT(rep->stat.st_log_queued++); + __os_gettime(env, &lp->last_ts, 1); +#ifdef HAVE_STATISTICS + rep->stat.st_log_queued_total++; + if (rep->stat.st_log_queued_max < + rep->stat.st_log_queued) + rep->stat.st_log_queued_max = + rep->stat.st_log_queued; +#endif + } + + if (ret == DB_KEYEXIST) + ret = 0; + if (ret != 0) + goto done; + + if (IS_ZERO_LSN(lp->waiting_lsn) || + LOG_COMPARE(&rp->lsn, &lp->waiting_lsn) < 0) { + /* + * If this is a new gap, then reset the rcvd_ts so + * that an out-of-order record after an idle period + * does not (likely) immediately rerequest. + */ + if (IS_ZERO_LSN(lp->waiting_lsn)) + __os_gettime(env, &lp->rcvd_ts, 1); + lp->waiting_lsn = rp->lsn; + } + + if (__rep_check_doreq(env, rep) && + (ret = __rep_loggap_req(env, rep, &rp->lsn, 0) != 0)) + goto err; + + /* + * If this is permanent; let the caller know that we have + * not yet written it to disk, but we've accepted it. + */ + if (ret == 0 && F_ISSET(rp, REPCTL_PERM)) { + max_lsn = rp->lsn; + ret = DB_REP_NOTPERM; + } + goto done; + } else { + STAT(rep->stat.st_log_duplicated++); + REP_SYSTEM_UNLOCK(env); + if (is_dupp != NULL) { + *is_dupp = 1; + /* + * Could get overwritten by max_lsn later. + * But max_lsn is guaranteed <= ready_lsn, so + * it would be a more conservative LSN to return. + */ + *ret_lsnp = lp->ready_lsn; + } + LOGCOPY_32(env, &rectype, rec->data); + if (rectype == DB___txn_regop || rectype == DB___txn_ckp) + max_lsn = lp->max_perm_lsn; + /* + * We check REPCTL_LEASE here, because this client may + * have leases configured but the master may not (especially + * in a mixed version group. If the master has leases + * configured, all clients must also. + */ + if (IS_USING_LEASES(env) && + F_ISSET(rp, REPCTL_LEASE) && + timespecisset(&msg_time)) { + if (timespeccmp(&msg_time, &lp->max_lease_ts, >)) + max_ts = msg_time; + else + max_ts = lp->max_lease_ts; + } + goto done; + } + + /* Check if we need to go back into the table. */ + if (ret == 0 && LOG_COMPARE(&lp->ready_lsn, &lp->waiting_lsn) == 0) + goto gap_check; + +done: +err: /* + * In case of a race, to make sure only one thread can get + * DB_REP_LOGREADY, zero out rep->last_lsn to show that we've gotten to + * this point. + */ + REP_SYSTEM_LOCK(env); + if (ret == 0 && + rep->sync_state == SYNC_LOG && + !IS_ZERO_LSN(rep->last_lsn) && + LOG_COMPARE(&lp->ready_lsn, &rep->last_lsn) >= 0) { + *last_lsnp = max_lsn; + ZERO_LSN(rep->last_lsn); + ZERO_LSN(max_lsn); + ret = DB_REP_LOGREADY; + } + /* + * Only decrement if we were actually applying log records. + * We do not care if we processed a dup record or put one + * in the temp db. + */ + if (set_apply) { + rep->apply_th--; + VPRINT(env, (env, DB_VERB_REP_MISC, + "rep_apply: Decrement apply_th %d [%lu][%lu]", + rep->apply_th, (u_long)lp->ready_lsn.file, + (u_long)lp->ready_lsn.offset)); + } + + if (ret == 0 && rep->sync_state != SYNC_LOG && + !IS_ZERO_LSN(max_lsn)) { + if (ret_lsnp != NULL) + *ret_lsnp = max_lsn; + ret = DB_REP_ISPERM; + DB_ASSERT(env, LOG_COMPARE(&max_lsn, &lp->max_perm_lsn) >= 0); + lp->max_perm_lsn = max_lsn; + if ((t_ret = __rep_notify_threads(env, AWAIT_LSN)) != 0) + ret = t_ret; + } + + /* + * Start-up is complete when we process (or have already processed) up + * to the end of the replication group's log. In case we miss that + * message, as a back-up, we also recognize start-up completion when we + * actually process a live log record. Having cmp==0 here (with a good + * "ret" value) implies we actually processed the record. + */ + if ((ret == 0 || ret == DB_REP_ISPERM) && + rep->stat.st_startup_complete == 0 && + rep->sync_state != SYNC_LOG && + ((cmp <= 0 && F_ISSET(rp, REPCTL_LOG_END)) || + (cmp == 0 && !F_ISSET(rp, REPCTL_RESEND)))) { + rep->stat.st_startup_complete = 1; + event = 1; + gen = rep->gen; + master = rep->master_id; + } + REP_SYSTEM_UNLOCK(env); + /* + * If we've processed beyond the needed LSN for a pending + * start sync, start it now. We can compare >= here + * because ready_lsn is the next record we expect. + * Since ckp_lsn can point to the last commit record itself, + * but if it does and ready_lsn == commit (i.e. we haven't + * written the commit yet), we can still start to sync + * because we're guaranteed no additional buffers can + * be dirtied. + */ + if (!IS_ZERO_LSN(rep->ckp_lsn) && + LOG_COMPARE(&lp->ready_lsn, &rep->ckp_lsn) >= 0) { + save_lsn = rep->ckp_lsn; + ZERO_LSN(rep->ckp_lsn); + } else + ZERO_LSN(save_lsn); + + /* + * If this is a perm record, we are using leases, update the lease + * grant. We must hold the clientdb mutex. We must not hold + * the region mutex because rep_update_grant will acquire it. + */ + if (ret == DB_REP_ISPERM && IS_USING_LEASES(env) && + timespecisset(&max_ts)) { + if ((t_ret = __rep_update_grant(env, &max_ts)) != 0) + ret = t_ret; + else if (timespeccmp(&max_ts, &lp->max_lease_ts, >)) + lp->max_lease_ts = max_ts; + } + + MUTEX_UNLOCK(env, rep->mtx_clientdb); + if (!IS_ZERO_LSN(save_lsn)) { + /* + * Now call memp_sync holding only the ckp mutex. + */ + MUTEX_LOCK(env, rep->mtx_ckp); + RPRINT(env, (env, DB_VERB_REP_MISC, + "Starting delayed __memp_sync call [%lu][%lu]", + (u_long)save_lsn.file, (u_long)save_lsn.offset)); + t_ret = __memp_sync(env, + DB_SYNC_CHECKPOINT | DB_SYNC_INTERRUPT_OK, &save_lsn); + MUTEX_UNLOCK(env, rep->mtx_ckp); + } + if (event) { + RPRINT(env, (env, DB_VERB_REP_MISC, + "Start-up is done [%lu][%lu]", + (u_long)rp->lsn.file, (u_long)rp->lsn.offset)); + + if ((t_ret = __rep_fire_startupdone(env, gen, master)) != 0) { + DB_ASSERT(env, ret == 0 || ret == DB_REP_ISPERM); + /* Failure trumps either of those values. */ + ret = t_ret; + goto out; + } + } + if ((ret == 0 || ret == DB_REP_ISPERM) && + newfile_seen && lp->db_log_autoremove) + __log_autoremove(env); + if (control_dbt.data != NULL) + __os_ufree(env, control_dbt.data); + if (rec_dbt.data != NULL) + __os_ufree(env, rec_dbt.data); + +out: + switch (ret) { + case 0: + break; + case DB_REP_ISPERM: + VPRINT(env, (env, DB_VERB_REP_MSGS, + "Returning ISPERM [%lu][%lu], cmp = %d", + (u_long)max_lsn.file, (u_long)max_lsn.offset, cmp)); + break; + case DB_REP_LOGREADY: + RPRINT(env, (env, DB_VERB_REP_MSGS, + "Returning LOGREADY up to [%lu][%lu], cmp = %d", + (u_long)last_lsnp->file, + (u_long)last_lsnp->offset, cmp)); + break; + case DB_REP_NOTPERM: + if (rep->sync_state != SYNC_LOG && + !IS_ZERO_LSN(max_lsn) && ret_lsnp != NULL) + *ret_lsnp = max_lsn; + + VPRINT(env, (env, DB_VERB_REP_MSGS, + "Returning NOTPERM [%lu][%lu], cmp = %d", + (u_long)max_lsn.file, (u_long)max_lsn.offset, cmp)); + break; + default: + RPRINT(env, (env, DB_VERB_REP_MSGS, + "Returning %d [%lu][%lu], cmp = %d", ret, + (u_long)max_lsn.file, (u_long)max_lsn.offset, cmp)); + break; + } + + return (ret); +} + +/* + * __rep_process_txn -- + * + * This is the routine that actually gets a transaction ready for + * processing. + * + * PUBLIC: int __rep_process_txn __P((ENV *, DBT *)); + */ +int +__rep_process_txn(env, rec) + ENV *env; + DBT *rec; +{ + DBT data_dbt, *lock_dbt; + DB_LOCKER *locker; + DB_LOCKREQ req, *lvp; + DB_LOGC *logc; + DB_LSN prev_lsn, *lsnp; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + DB_TXNHEAD *txninfo; + LSN_COLLECTION lc; + REP *rep; + __txn_regop_args *txn_args; + __txn_regop_42_args *txn42_args; + __txn_prepare_args *prep_args; + u_int32_t rectype; + u_int i; + int ret, t_ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + logc = NULL; + txn_args = NULL; + txn42_args = NULL; + prep_args = NULL; + txninfo = NULL; + + ENV_ENTER(env, ip); + memset(&data_dbt, 0, sizeof(data_dbt)); + if (F_ISSET(env, ENV_THREAD)) + F_SET(&data_dbt, DB_DBT_REALLOC); + + /* + * There are two phases: First, we have to traverse backwards through + * the log records gathering the list of all LSNs in the transaction. + * Once we have this information, we can loop through and then apply it. + * + * We may be passed a prepare (if we're restoring a prepare on upgrade) + * instead of a commit (the common case). Check which it is and behave + * appropriately. + */ + LOGCOPY_32(env, &rectype, rec->data); + memset(&lc, 0, sizeof(lc)); + if (rectype == DB___txn_regop) { + /* + * We're the end of a transaction. Make sure this is + * really a commit and not an abort! + */ + if (rep->version >= DB_REPVERSION_44) { + if ((ret = __txn_regop_read( + env, rec->data, &txn_args)) != 0) + return (ret); + if (txn_args->opcode != TXN_COMMIT) { + __os_free(env, txn_args); + return (0); + } + prev_lsn = txn_args->prev_lsn; + lock_dbt = &txn_args->locks; + } else { + if ((ret = __txn_regop_42_read( + env, rec->data, &txn42_args)) != 0) + return (ret); + if (txn42_args->opcode != TXN_COMMIT) { + __os_free(env, txn42_args); + return (0); + } + prev_lsn = txn42_args->prev_lsn; + lock_dbt = &txn42_args->locks; + } + } else { + /* We're a prepare. */ + DB_ASSERT(env, rectype == DB___txn_prepare); + + if ((ret = __txn_prepare_read( + env, rec->data, &prep_args)) != 0) + return (ret); + prev_lsn = prep_args->prev_lsn; + lock_dbt = &prep_args->locks; + } + + /* Get locks. */ + if ((ret = __lock_id(env, NULL, &locker)) != 0) + goto err1; + + /* We are always more important than user transactions. */ + locker->priority = DB_LOCK_MAXPRIORITY; + + if ((ret = + __lock_get_list(env, locker, 0, DB_LOCK_WRITE, lock_dbt)) != 0) + goto err; + + /* Phase 1. Get a list of the LSNs in this transaction, and sort it. */ + if ((ret = __rep_collect_txn(env, &prev_lsn, &lc)) != 0) + goto err; + qsort(lc.array, lc.nlsns, sizeof(DB_LSN), __rep_lsn_cmp); + + /* + * The set of records for a transaction may include dbreg_register + * records. Create a txnlist so that they can keep track of file + * state between records. + */ + if ((ret = __db_txnlist_init(env, ip, 0, 0, NULL, &txninfo)) != 0) + goto err; + + /* Phase 2: Apply updates. */ + if ((ret = __log_cursor(env, &logc)) != 0) + goto err; + for (lsnp = &lc.array[0], i = 0; i < lc.nlsns; i++, lsnp++) { + if ((ret = __logc_get(logc, lsnp, &data_dbt, DB_SET)) != 0) { + __db_errx(env, DB_STR_A("3522", + "failed to read the log at [%lu][%lu]", "%lu %lu"), + (u_long)lsnp->file, (u_long)lsnp->offset); + goto err; + } + if ((ret = __db_dispatch(env, &env->recover_dtab, + &data_dbt, lsnp, DB_TXN_APPLY, txninfo)) != 0) { + __db_errx(env, DB_STR_A("3523", + "transaction failed at [%lu][%lu]", "%lu %lu"), + (u_long)lsnp->file, (u_long)lsnp->offset); + goto err; + } + } + +err: memset(&req, 0, sizeof(req)); + req.op = DB_LOCK_PUT_ALL; + if ((t_ret = + __lock_vec(env, locker, 0, &req, 1, &lvp)) != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = __lock_id_free(env, locker)) != 0 && ret == 0) + ret = t_ret; + +err1: if (txn_args != NULL) + __os_free(env, txn_args); + if (txn42_args != NULL) + __os_free(env, txn42_args); + if (prep_args != NULL) + __os_free(env, prep_args); + if (lc.array != NULL) + __os_free(env, lc.array); + + if (logc != NULL && (t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + + if (txninfo != NULL) + __db_txnlist_end(env, txninfo); + + if (F_ISSET(&data_dbt, DB_DBT_REALLOC) && data_dbt.data != NULL) + __os_ufree(env, data_dbt.data); + +#ifdef HAVE_STATISTICS + if (ret == 0) + /* + * We don't hold the rep mutex, and could miscount if we race. + */ + rep->stat.st_txns_applied++; +#endif + + return (ret); +} + +/* + * __rep_collect_txn + * Recursive function that will let us visit every entry in a transaction + * chain including all child transactions so that we can then apply + * the entire transaction family at once. + */ +static int +__rep_collect_txn(env, lsnp, lc) + ENV *env; + DB_LSN *lsnp; + LSN_COLLECTION *lc; +{ + __txn_child_args *argp; + DB_LOGC *logc; + DB_LSN c_lsn; + DBT data; + u_int32_t rectype; + u_int nalloc; + int ret, t_ret; + + memset(&data, 0, sizeof(data)); + F_SET(&data, DB_DBT_REALLOC); + + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + + while (!IS_ZERO_LSN(*lsnp) && + (ret = __logc_get(logc, lsnp, &data, DB_SET)) == 0) { + LOGCOPY_32(env, &rectype, data.data); + if (rectype == DB___txn_child) { + if ((ret = __txn_child_read( + env, data.data, &argp)) != 0) + goto err; + c_lsn = argp->c_lsn; + *lsnp = argp->prev_lsn; + __os_free(env, argp); + ret = __rep_collect_txn(env, &c_lsn, lc); + } else { + if (lc->nalloc < lc->nlsns + 1) { + nalloc = lc->nalloc == 0 ? 20 : lc->nalloc * 2; + if ((ret = __os_realloc(env, + nalloc * sizeof(DB_LSN), &lc->array)) != 0) + goto err; + lc->nalloc = nalloc; + } + lc->array[lc->nlsns++] = *lsnp; + + /* + * Explicitly copy the previous lsn. The record + * starts with a u_int32_t record type, a u_int32_t + * txn id, and then the DB_LSN (prev_lsn) that we + * want. We copy explicitly because we have no idea + * what kind of record this is. + */ + LOGCOPY_TOLSN(env, lsnp, (u_int8_t *)data.data + + sizeof(u_int32_t) + sizeof(u_int32_t)); + } + + if (ret != 0) + goto err; + } + if (ret != 0) + __db_errx(env, DB_STR_A("3524", + "collect failed at: [%lu][%lu]", "%lu %lu"), + (u_long)lsnp->file, (u_long)lsnp->offset); + +err: if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + if (data.data != NULL) + __os_ufree(env, data.data); + return (ret); +} + +/* + * __rep_lsn_cmp -- + * qsort-type-compatible wrapper for LOG_COMPARE. + */ +static int +__rep_lsn_cmp(lsn1, lsn2) + const void *lsn1, *lsn2; +{ + + return (LOG_COMPARE((DB_LSN *)lsn1, (DB_LSN *)lsn2)); +} + +/* + * __rep_newfile -- + * NEWFILE messages have the LSN of the last record in the previous + * log file. When applying a NEWFILE message, make sure we haven't already + * swapped files. Assume caller hold mtx_clientdb. + */ +static int +__rep_newfile(env, rp, rec) + ENV *env; + __rep_control_args *rp; + DBT *rec; +{ + DB_LOG *dblp; + DB_LSN tmplsn; + DB_REP *db_rep; + LOG *lp; + REP *rep; + __rep_newfile_args nf_args; + int ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + db_rep = env->rep_handle; + rep = db_rep->region; + + /* + * If a newfile is already in progress, just ignore. + */ + if (F_ISSET(rep, REP_F_NEWFILE)) + return (0); + if (rp->lsn.file + 1 > lp->ready_lsn.file) { + if (rec == NULL || rec->size == 0) { + RPRINT(env, (env, DB_VERB_REP_MISC, +"rep_newfile: Old-style NEWFILE msg. Use control msg log version: %lu", + (u_long) rp->log_version)); + nf_args.version = rp->log_version; + } else if (rp->rep_version < DB_REPVERSION_47) + nf_args.version = *(u_int32_t *)rec->data; + else if ((ret = __rep_newfile_unmarshal(env, &nf_args, + rec->data, rec->size, NULL)) != 0) + return (ret); + RPRINT(env, (env, DB_VERB_REP_MISC, + "rep_newfile: File %lu vers %lu", + (u_long)rp->lsn.file + 1, (u_long)nf_args.version)); + + /* + * We drop the mtx_clientdb mutex during + * the file operation, and then reacquire it when + * we're done. We avoid colliding with new incoming + * log records because lp->ready_lsn is not getting + * updated and there is no real log record at this + * ready_lsn. We avoid colliding with a duplicate + * NEWFILE message by setting an in-progress flag. + */ + REP_SYSTEM_LOCK(env); + F_SET(rep, REP_F_NEWFILE); + REP_SYSTEM_UNLOCK(env); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + LOG_SYSTEM_LOCK(env); + ret = __log_newfile(dblp, &tmplsn, 0, nf_args.version); + LOG_SYSTEM_UNLOCK(env); + MUTEX_LOCK(env, rep->mtx_clientdb); + REP_SYSTEM_LOCK(env); + F_CLR(rep, REP_F_NEWFILE); + REP_SYSTEM_UNLOCK(env); + if (ret == 0) + lp->ready_lsn = tmplsn; + return (ret); + } else + /* We've already applied this NEWFILE. Just ignore it. */ + return (0); +} + +/* + * __rep_do_ckp -- + * Perform the memp_sync necessary for this checkpoint without holding the + * REP->mtx_clientdb. Callers of this function must hold REP->mtx_clientdb + * and must not be holding the region mutex. + */ +static int +__rep_do_ckp(env, rec, rp) + ENV *env; + DBT *rec; + __rep_control_args *rp; +{ + DB_ENV *dbenv; + __txn_ckp_args *ckp_args; + DB_LSN ckp_lsn; + REP *rep; + int ret; + + dbenv = env->dbenv; + + /* Crack the log record and extract the checkpoint LSN. */ + if ((ret = __txn_ckp_read(env, rec->data, &ckp_args)) != 0) + return (ret); + ckp_lsn = ckp_args->ckp_lsn; + __os_free(env, ckp_args); + + rep = env->rep_handle->region; + + MUTEX_UNLOCK(env, rep->mtx_clientdb); + DB_TEST_WAIT(env, env->test_check); + + /* + * Sync the memory pool. + * + * This is the real PERM lock record/ckp. We cannot return ISPERM + * if we haven't truly completed the checkpoint, so we don't allow + * this call to be interrupted. + * + * We may be overlapping our log record with an in-progress startsync + * of this checkpoint; suppress the max_write settings on any running + * cache-flush operation so it completes quickly. + */ + (void)__memp_set_config(dbenv, DB_MEMP_SUPPRESS_WRITE, 1); + MUTEX_LOCK(env, rep->mtx_ckp); + ret = __memp_sync(env, DB_SYNC_CHECKPOINT, &ckp_lsn); + MUTEX_UNLOCK(env, rep->mtx_ckp); + (void)__memp_set_config(dbenv, DB_MEMP_SUPPRESS_WRITE, 0); + + /* Update the last_ckp in the txn region. */ + if (ret == 0) + ret = __txn_updateckp(env, &rp->lsn); + else { + __db_errx(env, DB_STR_A("3525", + "Error syncing ckp [%lu][%lu]", "%lu %lu"), + (u_long)ckp_lsn.file, (u_long)ckp_lsn.offset); + ret = __env_panic(env, ret); + } + + MUTEX_LOCK(env, rep->mtx_clientdb); + return (ret); +} + +/* + * __rep_remfirst -- + * Remove the first entry from the __db.rep.db + */ +static int +__rep_remfirst(env, ip, cntrl, rec) + ENV *env; + DB_THREAD_INFO *ip; + DBT *cntrl; + DBT *rec; +{ + DB *dbp; + DBC *dbc; + DB_REP *db_rep; + int ret, t_ret; + + db_rep = env->rep_handle; + dbp = db_rep->rep_db; + if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0) + return (ret); + + /* The DBTs need to persist through another call. */ + F_SET(cntrl, DB_DBT_REALLOC); + F_SET(rec, DB_DBT_REALLOC); + if ((ret = __dbc_get(dbc, cntrl, rec, DB_RMW | DB_FIRST)) == 0) + ret = __dbc_del(dbc, 0); + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __rep_getnext -- + * Get the next record out of the __db.rep.db table. + */ +static int +__rep_getnext(env, ip) + ENV *env; + DB_THREAD_INFO *ip; +{ + DB *dbp; + DBC *dbc; + DBT lsn_dbt, nextrec_dbt; + DB_LOG *dblp; + DB_REP *db_rep; + LOG *lp; + __rep_control_args *rp; + int ret, t_ret; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + db_rep = env->rep_handle; + dbp = db_rep->rep_db; + + if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0) + return (ret); + + /* + * Update waiting_lsn. We need to move it + * forward to the LSN of the next record + * in the queue. + * + * If the next item in the database is a log + * record--the common case--we're not + * interested in its contents, just in its LSN. + * Optimize by doing a partial get of the data item. + */ + memset(&nextrec_dbt, 0, sizeof(nextrec_dbt)); + F_SET(&nextrec_dbt, DB_DBT_PARTIAL); + nextrec_dbt.ulen = nextrec_dbt.dlen = 0; + + memset(&lsn_dbt, 0, sizeof(lsn_dbt)); + ret = __dbc_get(dbc, &lsn_dbt, &nextrec_dbt, DB_FIRST); + if (ret != DB_NOTFOUND && ret != 0) + goto err; + + if (ret == DB_NOTFOUND) { + ZERO_LSN(lp->waiting_lsn); + /* + * Whether or not the current record is + * simple, there's no next one, and + * therefore we haven't got anything + * else to do right now. Break out. + */ + goto err; + } + rp = (__rep_control_args *)lsn_dbt.data; + lp->waiting_lsn = rp->lsn; + +err: if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __rep_process_rec -- + * + * Given a record in 'rp', process it. In the case of a NEWFILE, that means + * potentially switching files. In the case of a checkpoint, it means doing + * the checkpoint, and in other cases, it means simply writing the record into + * the log. + */ +static int +__rep_process_rec(env, ip, rp, rec, ret_tsp, ret_lsnp) + ENV *env; + DB_THREAD_INFO *ip; + __rep_control_args *rp; + DBT *rec; + db_timespec *ret_tsp; + DB_LSN *ret_lsnp; +{ + DB *dbp; + DBT control_dbt, key_dbt, rec_dbt; + DB_LOG *dblp; + DB_REP *db_rep; + DB_LOGC *logc; + LOG *lp; + REP *rep; + DB_LSN lsn; + db_timespec msg_time; + u_int32_t rectype, txnid; + int ret, t_ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + dbp = db_rep->rep_db; + ret = 0; + + memset(&rec_dbt, 0, sizeof(rec_dbt)); + if (rp->rectype == REP_NEWFILE) { + if ((ret = __rep_newfile(env, rp, rec)) != 0) + return (ret); + + /* + * In SYNC_LOG, in case the end-of-log sync point happens to be + * right at the file boundary, we need to make sure ret_lsnp + * points to a real log record, rather than the "dead space" at + * the end of the file that the NEWFILE msg normally points to. + */ + if (rep->sync_state == SYNC_LOG) { + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + if ((ret = __logc_get(logc, + &lsn, &rec_dbt, DB_LAST)) == 0) + *ret_lsnp = lsn; + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + } + return (ret); + } + + LOGCOPY_32(env, &rectype, rec->data); + memset(&control_dbt, 0, sizeof(control_dbt)); + timespecset(&msg_time, rp->msg_sec, rp->msg_nsec); + + /* + * We write all records except for checkpoint records here. + * All non-checkpoint records need to appear in the log before + * we take action upon them (i.e., we enforce write-ahead logging). + * However, we can't write the checkpoint record here until the + * data buffers are actually written to disk, else we are creating + * an invalid log -- one that says all data before a certain point + * has been written to disk. + * + * If two threads are both processing the same checkpoint record + * (because, for example, it was resent and the original finally + * arrived), we handle that below by checking for the existence of + * the log record when we add it to the replication database. + * + * Any log records that arrive while we are processing the checkpoint + * are added to the bookkeeping database because ready_lsn is not yet + * updated to point after the checkpoint record. + */ + if (rectype != DB___txn_ckp || rep->sync_state == SYNC_LOG) { + if ((ret = __log_rep_put(env, &rp->lsn, rec, 0)) != 0) + return (ret); + STAT(rep->stat.st_log_records++); + if (rep->sync_state == SYNC_LOG) { + *ret_lsnp = rp->lsn; + goto out; + } + } + + switch (rectype) { + case DB___dbreg_register: + /* + * DB opens occur in the context of a transaction, so we can + * simply handle them when we process the transaction. Closes, + * however, are not transaction-protected, so we have to handle + * them here. + * + * It should be unsafe for the master to do a close of a file + * that was opened in an active transaction, so we should be + * guaranteed to get the ordering right. + * + * !!! + * The txn ID is the second 4-byte field of the log record. + * We should really be calling __dbreg_register_read() and + * working from the __dbreg_register_args structure, but this + * is considerably faster and the order of the fields won't + * change. + */ + LOGCOPY_32(env, &txnid, + (u_int8_t *)rec->data + sizeof(u_int32_t)); + if (txnid == TXN_INVALID) + ret = __db_dispatch(env, &env->recover_dtab, + rec, &rp->lsn, DB_TXN_APPLY, NULL); + break; + case DB___txn_regop: + /* + * If an application is doing app-specific recovery + * and acquires locks while applying a transaction, + * it can deadlock. Any other locks held by this + * thread should have been discarded in the + * __rep_process_txn error path, so if we simply + * retry, we should eventually succeed. + */ + do { + ret = 0; + if (!F_ISSET(db_rep, DBREP_OPENFILES)) { + ret = __txn_openfiles(env, ip, NULL, 1); + F_SET(db_rep, DBREP_OPENFILES); + } + if (ret == 0) + ret = __rep_process_txn(env, rec); + } while (ret == DB_LOCK_DEADLOCK || ret == DB_LOCK_NOTGRANTED); + + /* Now flush the log unless we're running TXN_NOSYNC. */ + if (ret == 0 && !F_ISSET(env->dbenv, DB_ENV_TXN_NOSYNC)) + ret = __log_flush(env, NULL); + if (ret != 0) { + __db_errx(env, DB_STR_A("3526", + "Error processing txn [%lu][%lu]", "%lu %lu"), + (u_long)rp->lsn.file, (u_long)rp->lsn.offset); + ret = __env_panic(env, ret); + } + *ret_lsnp = rp->lsn; + break; + case DB___txn_prepare: + ret = __log_flush(env, NULL); + /* + * Save the biggest prepared LSN we've seen. + */ + rep->max_prep_lsn = rp->lsn; + VPRINT(env, (env, DB_VERB_REP_MSGS, + "process_rec: prepare at [%lu][%lu]", + (u_long)rep->max_prep_lsn.file, + (u_long)rep->max_prep_lsn.offset)); + break; + case DB___txn_ckp: + /* + * We do not want to hold the REP->mtx_clientdb mutex while + * syncing the mpool, so if we get a checkpoint record we are + * supposed to process, add it to the __db.rep.db, do the + * memp_sync and then go back and process it later, when the + * sync has finished. If this record is already in the table, + * then some other thread will process it, so simply return + * REP_NOTPERM. + */ + memset(&key_dbt, 0, sizeof(key_dbt)); + key_dbt.data = rp; + key_dbt.size = sizeof(*rp); + + /* + * We want to put this record into the tmp DB only if + * it doesn't exist, so use DB_NOOVERWRITE. + */ + ret = __db_put(dbp, ip, NULL, &key_dbt, rec, DB_NOOVERWRITE); + if (ret == DB_KEYEXIST) { + if (ret_lsnp != NULL) + *ret_lsnp = rp->lsn; + ret = DB_REP_NOTPERM; + } + if (ret != 0) + break; + + /* + * Now, do the checkpoint. Regardless of + * whether the checkpoint succeeds or not, + * we need to remove the record we just put + * in the temporary database. If the + * checkpoint failed, return an error. We + * will act like we never received the + * checkpoint. + */ + if ((ret = __rep_do_ckp(env, rec, rp)) == 0) + ret = __log_rep_put(env, &rp->lsn, rec, + DB_LOG_CHKPNT); + if ((t_ret = __rep_remfirst(env, ip, + &control_dbt, &rec_dbt)) != 0 && ret == 0) + ret = t_ret; + /* + * If we're successful putting the log record in the + * log, flush it for a checkpoint. + */ + if (ret == 0) { + *ret_lsnp = rp->lsn; + ret = __log_flush(env, NULL); + if (ret == 0 && lp->db_log_autoremove) + __log_autoremove(env); + } + break; + default: + break; + } + +out: + if (ret == 0 && F_ISSET(rp, REPCTL_PERM)) + *ret_lsnp = rp->lsn; + if (IS_USING_LEASES(env) && + F_ISSET(rp, REPCTL_LEASE)) + *ret_tsp = msg_time; + /* + * Set ret_lsnp before flushing the log because if the + * flush fails, we've still written the record to the + * log and the LSN has been entered. + */ + if (ret == 0 && F_ISSET(rp, REPCTL_FLUSH)) + ret = __log_flush(env, NULL); + if (control_dbt.data != NULL) + __os_ufree(env, control_dbt.data); + if (rec_dbt.data != NULL) + __os_ufree(env, rec_dbt.data); + + return (ret); +} + +/* + * __rep_resend_req -- + * We might have dropped a message, we need to resend our request. + * The request we send is dependent on what recovery state we're in. + * The caller holds no locks. + * + * PUBLIC: int __rep_resend_req __P((ENV *, int)); + */ +int +__rep_resend_req(env, rereq) + ENV *env; + int rereq; +{ + DB_LOG *dblp; + DB_LSN lsn, *lsnp; + DB_REP *db_rep; + LOG *lp; + REP *rep; + int master, ret; + repsync_t sync_state; + u_int32_t gapflags, msgtype, repflags, sendflags; + + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + ret = 0; + lsnp = NULL; + msgtype = REP_INVALID; + sendflags = 0; + + repflags = rep->flags; + sync_state = rep->sync_state; + /* + * If we are delayed we do not rerequest anything. + */ + if (FLD_ISSET(repflags, REP_F_DELAY)) + return (ret); + gapflags = rereq ? REP_GAP_REREQUEST : 0; + + if (sync_state == SYNC_VERIFY) { + MUTEX_LOCK(env, rep->mtx_clientdb); + lsn = lp->verify_lsn; + MUTEX_UNLOCK(env, rep->mtx_clientdb); + if (!IS_ZERO_LSN(lsn)) { + msgtype = REP_VERIFY_REQ; + lsnp = &lsn; + sendflags = DB_REP_REREQUEST; + } + } else if (sync_state == SYNC_UPDATE) { + /* + * UPDATE_REQ only goes to the master. + */ + msgtype = REP_UPDATE_REQ; + } else if (sync_state == SYNC_PAGE) { + REP_SYSTEM_LOCK(env); + ret = __rep_pggap_req(env, rep, NULL, gapflags); + REP_SYSTEM_UNLOCK(env); + } else { + MUTEX_LOCK(env, rep->mtx_clientdb); + ret = __rep_loggap_req(env, rep, NULL, gapflags); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + } + + if (msgtype != REP_INVALID) { + master = rep->master_id; + if (master == DB_EID_INVALID) + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_MASTER_REQ, NULL, NULL, 0, 0); + else + (void)__rep_send_message(env, + master, msgtype, lsnp, NULL, 0, sendflags); + } + + return (ret); +} + +/* + * __rep_check_doreq -- + * PUBLIC: int __rep_check_doreq __P((ENV *, REP *)); + * + * Check if we need to send another request. If so, compare with + * the request limits the user might have set. This assumes the + * caller holds the REP->mtx_clientdb mutex. Returns 1 if a request + * needs to be made, and 0 if it does not. + */ +int +__rep_check_doreq(env, rep) + ENV *env; + REP *rep; +{ + + DB_LOG *dblp; + LOG *lp; + db_timespec now; + int req; + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + __os_gettime(env, &now, 1); + timespecsub(&now, &lp->rcvd_ts); + req = timespeccmp(&now, &lp->wait_ts, >=); + if (req) { + /* + * Add wait_ts to itself to double it. + */ + timespecadd(&lp->wait_ts, &lp->wait_ts); + if (timespeccmp(&lp->wait_ts, &rep->max_gap, >)) + lp->wait_ts = rep->max_gap; + __os_gettime(env, &lp->rcvd_ts, 1); + } + return (req); +} + +/* + * __rep_skip_msg - + * + * If we're in recovery we want to skip/ignore the message, but + * we also need to see if we need to re-request any retransmissions. + */ +static int +__rep_skip_msg(env, rep, eid, rectype) + ENV *env; + REP *rep; + int eid; + u_int32_t rectype; +{ + int do_req, ret; + + ret = 0; + /* + * If we have a request message from a client then immediately + * send a REP_REREQUEST back to that client since we're skipping it. + */ + if (F_ISSET(rep, REP_F_CLIENT) && REP_MSG_REQ(rectype)) + do_req = 1; + else { + /* Check for need to retransmit. */ + MUTEX_LOCK(env, rep->mtx_clientdb); + do_req = __rep_check_doreq(env, rep); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + } + /* + * Don't respond to a MASTER_REQ with + * a MASTER_REQ or REREQUEST. + */ + if (do_req && rectype != REP_MASTER_REQ) { + /* + * There are three cases: + * 1. If we don't know who the master is, then send MASTER_REQ. + * 2. If the message we're skipping came from the master, + * then we need to rerequest. + * 3. If the message didn't come from a master (i.e. client + * to client), then send a rerequest back to the sender so + * the sender can rerequest it elsewhere, if we are a client. + */ + if (rep->master_id == DB_EID_INVALID) /* Case 1. */ + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_MASTER_REQ, NULL, NULL, 0, 0); + else if (eid == rep->master_id) /* Case 2. */ + ret = __rep_resend_req(env, 0); + else if (F_ISSET(rep, REP_F_CLIENT)) /* Case 3. */ + (void)__rep_send_message(env, + eid, REP_REREQUEST, NULL, NULL, 0, 0); + } + return (ret); +} + +/* + * __rep_check_missing -- + * PUBLIC: int __rep_check_missing __P((ENV *, u_int32_t, DB_LSN *)); + * + * Check for and request any missing client information. + */ +int +__rep_check_missing(env, gen, master_perm_lsn) + ENV *env; + u_int32_t gen; + DB_LSN *master_perm_lsn; +{ + DB_LOG *dblp; + DB_LSN *end_lsn; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + LOG *lp; + REGINFO *infop; + REP *rep; + __rep_fileinfo_args *curinfo; + int do_req, has_log_gap, has_page_gap, ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + infop = env->reginfo; + has_log_gap = has_page_gap = ret = 0; + + ENV_ENTER(env, ip); + MUTEX_LOCK(env, rep->mtx_clientdb); + REP_SYSTEM_LOCK(env); + /* + * Check if we are okay to proceed with this operation. If not, + * do not rerequest anything. + */ + if (!F_ISSET(rep, REP_F_CLIENT) || rep->master_id == DB_EID_INVALID || + gen != rep->gen || FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_MSG)) { + REP_SYSTEM_UNLOCK(env); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + /* + * If this client is out-of-date, ask the master to identify + * itself so that this client will synchronize with the + * master's later generation. + */ + if (gen > rep->gen && __rep_check_doreq(env, rep)) + (void)__rep_send_message(env, + DB_EID_BROADCAST, REP_MASTER_REQ, + NULL, NULL, 0, 0); + goto out; + } + + /* + * Prevent message lockout by counting ourself here. + * Setting rep->msg_th will prevent a major system + * change, such as a role change or running recovery, from + * occuring before sending out any rerequests. + */ + rep->msg_th++; + REP_SYSTEM_UNLOCK(env); + + /* Check that it is time to request missing information. */ + if ((do_req = __rep_check_doreq(env, rep))) { + /* Check for interior or tail page gap. */ + REP_SYSTEM_LOCK(env); + if (rep->sync_state == SYNC_PAGE && + rep->curinfo_off != INVALID_ROFF) { + GET_CURINFO(rep, infop, curinfo); + has_page_gap = + rep->waiting_pg != PGNO_INVALID || + rep->ready_pg <= curinfo->max_pgno; + } + REP_SYSTEM_UNLOCK(env); + } + /* Check for interior or tail log gap. */ + if (do_req && !has_page_gap) { + lp = dblp->reginfo.primary; + /* + * The LOG_COMPARE test is <= because ready_lsn is + * the next LSN we are expecting but we do not have + * it yet. If the needed LSN is at this LSN, it + * means we are missing the last record we need. + */ + if (rep->sync_state == SYNC_LOG) + end_lsn = &rep->last_lsn; + else + end_lsn = master_perm_lsn; + has_log_gap = !IS_ZERO_LSN(lp->waiting_lsn) || + LOG_COMPARE(&lp->ready_lsn, end_lsn) <= 0; + } + MUTEX_UNLOCK(env, rep->mtx_clientdb); + /* + * If it is time to send a request, only do so if we + * have a log gap or a page gap, or we need to resend an + * UPDATE_REQ or VERIFY_REQ, or we are in SYNC_LOG to keep + * requesting to the current known end of the log. + */ + do_req = do_req && (has_log_gap || has_page_gap || + rep->sync_state == SYNC_LOG || + rep->sync_state == SYNC_UPDATE || + rep->sync_state == SYNC_VERIFY); + /* + * Determines request type from current replication + * state and resends request. The request may have + * the DB_REP_ANYWHERE flag enabled if appropriate. + */ + if (do_req) + ret = __rep_resend_req(env, 0); + + REP_SYSTEM_LOCK(env); + rep->msg_th--; + REP_SYSTEM_UNLOCK(env); + +out: ENV_LEAVE(env, ip); + return (ret); +} + +static int +__rep_fire_newmaster(env, gen, master) + ENV *env; + u_int32_t gen; + int master; +{ + DB_REP *db_rep; + REP *rep; + + db_rep = env->rep_handle; + rep = db_rep->region; + + REP_EVENT_LOCK(env); + /* + * The firing of this event should be idempotent with respect to a + * particular generation number. + */ + if (rep->newmaster_event_gen < gen) { + __rep_fire_event(env, DB_EVENT_REP_NEWMASTER, &master); + rep->newmaster_event_gen = gen; + } + REP_EVENT_UNLOCK(env); + return (0); +} + +static int +__rep_fire_startupdone(env, gen, master) + ENV *env; + u_int32_t gen; + int master; +{ + DB_REP *db_rep; + REP *rep; + + db_rep = env->rep_handle; + rep = db_rep->region; + + REP_EVENT_LOCK(env); + /* + * Usually NEWMASTER will already have been fired. But if not, fire + * it here now, to ensure the application receives events in the + * expected order. + */ + if (rep->newmaster_event_gen < gen) { + __rep_fire_event(env, DB_EVENT_REP_NEWMASTER, &master); + rep->newmaster_event_gen = gen; + } + + /* + * Caller already ensures that it only tries to fire STARTUPDONE once + * per generation. If we did not want to rely on that, we could add a + * simple boolean flag (to the set of data protected by the mtx_event). + * The precise meaning of that flag would be "STARTUPDONE has been fired + * for the generation value stored in `newmaster_event_gen'". Then the + * more accurate test here would be simply to check that flag, and fire + * the event (and set the flag) if it were not already set. + */ + if (rep->newmaster_event_gen == gen) + __rep_fire_event(env, DB_EVENT_REP_STARTUPDONE, NULL); + REP_EVENT_UNLOCK(env); + return (0); +} diff --git a/src/rep/rep_region.c b/src/rep/rep_region.c new file mode 100644 index 00000000..0348894b --- /dev/null +++ b/src/rep/rep_region.c @@ -0,0 +1,608 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +static int __rep_egen_init __P((ENV *, REP *)); +static int __rep_gen_init __P((ENV *, REP *)); + +/* + * __rep_open -- + * Initialize the shared memory state for the replication system. + * + * PUBLIC: int __rep_open __P((ENV *)); + */ +int +__rep_open(env) + ENV *env; +{ + DB_REP *db_rep; + REGENV *renv; + REGINFO *infop; + REP *rep; + int i, ret; + char *p; + char fname[sizeof(REP_DIAGNAME) + 3]; + + db_rep = env->rep_handle; + infop = env->reginfo; + renv = infop->primary; + ret = 0; + DB_ASSERT(env, DBREP_DIAG_FILES < 100); + + if (renv->rep_off == INVALID_ROFF) { + /* Must create the region. */ + if ((ret = __env_alloc(infop, sizeof(REP), &rep)) != 0) + return (ret); + memset(rep, 0, sizeof(*rep)); + + /* + * We have the region; fill in the values. Some values may + * have been configured before we open the region, and those + * are taken from the DB_REP structure. + */ + if ((ret = __mutex_alloc( + env, MTX_REP_REGION, 0, &rep->mtx_region)) != 0) + return (ret); + /* + * Because we have no way to prevent deadlocks and cannot log + * changes made to it, we single-thread access to the client + * bookkeeping database. This is suboptimal, but it only gets + * accessed when messages arrive out-of-order, so it should + * stay small and not be used in a high-performance app. + */ + if ((ret = __mutex_alloc( + env, MTX_REP_DATABASE, 0, &rep->mtx_clientdb)) != 0) + return (ret); + + if ((ret = __mutex_alloc( + env, MTX_REP_CHKPT, 0, &rep->mtx_ckp)) != 0) + return (ret); + + if ((ret = __mutex_alloc( + env, MTX_REP_DIAG, 0, &rep->mtx_diag)) != 0) + return (ret); + + if ((ret = __mutex_alloc( + env, MTX_REP_EVENT, 0, &rep->mtx_event)) != 0) + return (ret); + + if ((ret = __mutex_alloc( + env, MTX_REP_START, 0, &rep->mtx_repstart)) != 0) + return (ret); + + rep->diag_off = 0; + rep->diag_index = 0; + rep->newmaster_event_gen = 0; + rep->notified_egen = 0; + rep->curinfo_off = INVALID_ROFF; + rep->lease_off = INVALID_ROFF; + rep->originfo_off = INVALID_ROFF; + rep->tally_off = INVALID_ROFF; + rep->v2tally_off = INVALID_ROFF; + rep->eid = db_rep->eid; + rep->master_id = DB_EID_INVALID; + rep->version = DB_REPVERSION; + + SH_TAILQ_INIT(&rep->waiters); + SH_TAILQ_INIT(&rep->free_waiters); + + rep->config = db_rep->config; + /* + * In-memory replication files must be set before we open + * the env, so we know if it is in memory here. + */ + if (FLD_ISSET(rep->config, REP_C_INMEM)) + FLD_CLR(env->dbenv->verbose, DB_VERB_REP_SYSTEM); + + if ((ret = __rep_gen_init(env, rep)) != 0) + return (ret); + if ((ret = __rep_egen_init(env, rep)) != 0) + return (ret); + rep->gbytes = db_rep->gbytes; + rep->bytes = db_rep->bytes; + rep->request_gap = db_rep->request_gap; + rep->max_gap = db_rep->max_gap; + rep->config_nsites = db_rep->config_nsites; + rep->elect_timeout = db_rep->elect_timeout; + rep->full_elect_timeout = db_rep->full_elect_timeout; + rep->lease_timeout = db_rep->lease_timeout; + rep->clock_skew = db_rep->clock_skew; + rep->clock_base = db_rep->clock_base; + timespecclear(&rep->lease_duration); + timespecclear(&rep->grant_expire); + rep->chkpt_delay = db_rep->chkpt_delay; + rep->priority = db_rep->my_priority; + + if ((ret = __rep_lockout_archive(env, rep)) != 0) + return (ret); + + /* Copy application type flags if set before env open. */ + if (F_ISSET(db_rep, DBREP_APP_REPMGR)) + F_SET(rep, REP_F_APP_REPMGR); + if (F_ISSET(db_rep, DBREP_APP_BASEAPI)) + F_SET(rep, REP_F_APP_BASEAPI); + + /* Initialize encapsulating region. */ + renv->rep_off = R_OFFSET(infop, rep); + (void)time(&renv->rep_timestamp); + renv->op_timestamp = 0; + F_CLR(renv, DB_REGENV_REPLOCKED); + +#ifdef HAVE_REPLICATION_THREADS + if ((ret = __repmgr_open(env, rep)) != 0) + return (ret); +#endif + } else { + rep = R_ADDR(infop, renv->rep_off); + /* + * Prevent an application type mismatch between a process + * and the environment it is trying to join. + */ + if ((F_ISSET(db_rep, DBREP_APP_REPMGR) && + F_ISSET(rep, REP_F_APP_BASEAPI)) || + (F_ISSET(db_rep, DBREP_APP_BASEAPI) && + F_ISSET(rep, REP_F_APP_REPMGR))) { + __db_errx(env, DB_STR("3535", + "Application type mismatch for a replication " + "process joining the environment")); + return (EINVAL); + } +#ifdef HAVE_REPLICATION_THREADS + if ((ret = __repmgr_join(env, rep)) != 0) + return (ret); +#endif + } + + db_rep->region = rep; + /* + * Open the diagnostic message files for this env handle. We do + * this no matter if we created the environment or not. + */ + if (FLD_ISSET(rep->config, REP_C_INMEM)) + goto out; + for (i = 0; i < DBREP_DIAG_FILES; i++) { + db_rep->diagfile[i] = NULL; + (void)snprintf(fname, sizeof(fname), REP_DIAGNAME, i); + if ((ret = __db_appname(env, DB_APP_NONE, fname, + NULL, &p)) != 0) + goto err; + ret = __os_open(env, p, 0, DB_OSO_CREATE, DB_MODE_600, + &db_rep->diagfile[i]); + __os_free(env, p); + if (ret != 0) + goto err; + } + +out: + return (0); + +err: + (void)__rep_close_diagfiles(env); + return (ret); +} + +/* + * __rep_close_diagfiles -- + * Close any diag message files that are open. + * + * PUBLIC: int __rep_close_diagfiles __P((ENV *)); + */ +int +__rep_close_diagfiles(env) + ENV *env; +{ + DB_REP *db_rep; + int i, ret, t_ret; + + db_rep = env->rep_handle; + ret = t_ret = 0; + + for (i = 0; i < DBREP_DIAG_FILES; i++) { + if (db_rep->diagfile[i] != NULL && + (t_ret = __os_closehandle(env, db_rep->diagfile[i])) != 0 && + ret == 0) + ret = t_ret; + db_rep->diagfile[i] = NULL; + } + return (ret); +} + +/* + * __rep_env_refresh -- + * Replication-specific refresh of the ENV structure. + * + * PUBLIC: int __rep_env_refresh __P((ENV *)); + */ +int +__rep_env_refresh(env) + ENV *env; +{ + DB_REP *db_rep; + REGENV *renv; + REGINFO *infop; + REP *rep; + struct __rep_waiter *waiter; + int ret, t_ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + infop = env->reginfo; + renv = infop->primary; + ret = 0; + + /* + * If we are the last reference closing the env, clear our knowledge of + * belonging to a group and that there is a valid handle where + * rep_start had already been called. + */ + if (renv->refcnt == 1) { + F_CLR(rep, REP_F_GROUP_ESTD); + F_CLR(rep, REP_F_START_CALLED); + } + +#ifdef HAVE_REPLICATION_THREADS + ret = __repmgr_env_refresh(env); +#endif + + /* + * If a private region, return the memory to the heap. Not needed for + * filesystem-backed or system shared memory regions, that memory isn't + * owned by any particular process. + */ + if (F_ISSET(env, ENV_PRIVATE)) { + if (rep != NULL) { + ret = __mutex_free(env, &rep->mtx_region); + if ((t_ret = __mutex_free(env, + &rep->mtx_clientdb)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __mutex_free(env, + &rep->mtx_ckp)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __mutex_free(env, + &rep->mtx_diag)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __mutex_free(env, + &rep->mtx_event)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __mutex_free(env, + &rep->mtx_repstart)) != 0 && ret == 0) + ret = t_ret; + + /* Discard commit queue elements. */ + DB_ASSERT(env, SH_TAILQ_EMPTY(&rep->waiters)); + while ((waiter = SH_TAILQ_FIRST(&rep->free_waiters, + __rep_waiter)) != NULL) { + SH_TAILQ_REMOVE(&rep->free_waiters, + waiter, links, __rep_waiter); + __env_alloc_free(env->reginfo, waiter); + } + + if (rep->curinfo_off != INVALID_ROFF) + __env_alloc_free(infop, + R_ADDR(infop, rep->curinfo_off)); + if (rep->lease_off != INVALID_ROFF) + __env_alloc_free(infop, + R_ADDR(infop, rep->lease_off)); + if (rep->originfo_off != INVALID_ROFF) + __env_alloc_free(infop, + R_ADDR(infop, rep->originfo_off)); + if (rep->tally_off != INVALID_ROFF) + __env_alloc_free(infop, + R_ADDR(infop, rep->tally_off)); + if (rep->v2tally_off != INVALID_ROFF) + __env_alloc_free(infop, + R_ADDR(infop, rep->v2tally_off)); + } + + if (renv->rep_off != INVALID_ROFF) + __env_alloc_free(infop, R_ADDR(infop, renv->rep_off)); + } + if ((t_ret = __rep_close_diagfiles(env)) != 0 && ret == 0) + ret = t_ret; + + env->rep_handle->region = NULL; + return (ret); +} + +/* + * __rep_close -- + * Shut down all of replication. + * + * PUBLIC: int __rep_env_close __P((ENV *)); + */ +int +__rep_env_close(env) + ENV *env; +{ + int ret, t_ret; + + ret = __rep_preclose(env); + if ((t_ret = __rep_closefiles(env)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __rep_preclose -- + * If we are a client, shut down our client database and send + * any outstanding bulk buffers. + * + * PUBLIC: int __rep_preclose __P((ENV *)); + */ +int +__rep_preclose(env) + ENV *env; +{ + DB_LOG *dblp; + DB_REP *db_rep; + LOG *lp; + DB *dbp; + REP_BULK bulk; + int ret, t_ret; + + ret = 0; + + db_rep = env->rep_handle; + dblp = env->lg_handle; + + /* + * If we have a rep region, we can preclose. Otherwise, return. + * If we're on an error path from env open, we may not have + * a region, even though we have a handle. + */ + if (db_rep == NULL || db_rep->region == NULL) + return (ret); + + if ((dbp = db_rep->lsn_db) != NULL) { + ret = __db_close(dbp, NULL, DB_NOSYNC); + db_rep->lsn_db = NULL; + } + + MUTEX_LOCK(env, db_rep->region->mtx_clientdb); + if (db_rep->rep_db != NULL) { + if ((t_ret = __db_close(db_rep->rep_db, + NULL, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + db_rep->rep_db = NULL; + } + /* + * We could be called early in an env_open error path, so + * only do this if we have a log region set up. + */ + if (dblp == NULL) + goto out; + lp = dblp->reginfo.primary; + /* + * If we have something in the bulk buffer, send anything in it + * if we are able to. + */ + if (lp->bulk_off != 0 && db_rep->send != NULL) { + memset(&bulk, 0, sizeof(bulk)); + bulk.addr = R_ADDR(&dblp->reginfo, lp->bulk_buf); + bulk.offp = &lp->bulk_off; + bulk.len = lp->bulk_len; + bulk.type = REP_BULK_LOG; + bulk.eid = DB_EID_BROADCAST; + bulk.flagsp = &lp->bulk_flags; + /* + * Ignore send errors here. This can be called on the + * env->close path - make a best attempt to send. + */ + (void)__rep_send_bulk(env, &bulk, 0); + } +out: MUTEX_UNLOCK(env, db_rep->region->mtx_clientdb); + return (ret); +} + +/* + * __rep_closefiles -- + * If we were a client and are now a master, close all databases + * we've opened while applying messages as a client. This can + * be called from __env_close and we need to check if the env, + * handles and regions are set up, or not. + * + * PUBLIC: int __rep_closefiles __P((ENV *)); + */ +int +__rep_closefiles(env) + ENV *env; +{ + DB_LOG *dblp; + DB_REP *db_rep; + int ret; + + ret = 0; + + db_rep = env->rep_handle; + dblp = env->lg_handle; + + if (db_rep == NULL || db_rep->region == NULL) + return (ret); + if (dblp == NULL) + return (ret); + if ((ret = __dbreg_close_files(env, 0)) == 0) + F_CLR(db_rep, DBREP_OPENFILES); + + return (ret); +} + +/* + * __rep_egen_init -- + * Initialize the value of egen in the region. Called only from + * __rep_region_init, which is guaranteed to be single-threaded + * as we create the rep region. We set the rep->egen field which + * is normally protected by db_rep->region->mutex. + */ +static int +__rep_egen_init(env, rep) + ENV *env; + REP *rep; +{ + DB_FH *fhp; + int ret; + size_t cnt; + char *p; + + if ((ret = __db_appname(env, + DB_APP_NONE, REP_EGENNAME, NULL, &p)) != 0) + return (ret); + /* + * If the file doesn't exist, create it now and initialize with 1. + */ + if (__os_exists(env, p, NULL) != 0) { + rep->egen = rep->gen + 1; + if ((ret = __rep_write_egen(env, rep, rep->egen)) != 0) + goto err; + } else { + /* + * File exists, open it and read in our egen. + */ + if ((ret = __os_open(env, p, 0, + DB_OSO_RDONLY, DB_MODE_600, &fhp)) != 0) + goto err; + if ((ret = __os_read(env, fhp, &rep->egen, sizeof(u_int32_t), + &cnt)) != 0 || cnt != sizeof(u_int32_t)) + goto err1; + RPRINT(env, (env, DB_VERB_REP_MISC, "Read in egen %lu", + (u_long)rep->egen)); +err1: (void)__os_closehandle(env, fhp); + } +err: __os_free(env, p); + return (ret); +} + +/* + * __rep_write_egen -- + * Write out the egen into the env file. + * + * PUBLIC: int __rep_write_egen __P((ENV *, REP *, u_int32_t)); + * + * Caller relies on us not dropping the REP_SYSTEM_LOCK. + */ +int +__rep_write_egen(env, rep, egen) + ENV *env; + REP *rep; + u_int32_t egen; +{ + DB_FH *fhp; + int ret; + size_t cnt; + char *p; + + /* + * If running in-memory replication, return without any file + * operations. + */ + if (FLD_ISSET(rep->config, REP_C_INMEM)) { + return (0); + } + + if ((ret = __db_appname(env, + DB_APP_NONE, REP_EGENNAME, NULL, &p)) != 0) + return (ret); + if ((ret = __os_open( + env, p, 0, DB_OSO_CREATE | DB_OSO_TRUNC, DB_MODE_600, &fhp)) == 0) { + if ((ret = __os_write(env, fhp, &egen, sizeof(u_int32_t), + &cnt)) != 0 || ((ret = __os_fsync(env, fhp)) != 0)) + __db_err(env, ret, "%s", p); + (void)__os_closehandle(env, fhp); + } + __os_free(env, p); + return (ret); +} + +/* + * __rep_gen_init -- + * Initialize the value of gen in the region. Called only from + * __rep_region_init, which is guaranteed to be single-threaded + * as we create the rep region. We set the rep->gen field which + * is normally protected by db_rep->region->mutex. + */ +static int +__rep_gen_init(env, rep) + ENV *env; + REP *rep; +{ + DB_FH *fhp; + int ret; + size_t cnt; + char *p; + + if ((ret = __db_appname(env, + DB_APP_NONE, REP_GENNAME, NULL, &p)) != 0) + return (ret); + + if (__os_exists(env, p, NULL) != 0) { + /* + * File doesn't exist, create it now and initialize with 0. + */ + SET_GEN(0); + if ((ret = __rep_write_gen(env, rep, rep->gen)) != 0) + goto err; + } else { + /* + * File exists, open it and read in our gen. + */ + if ((ret = __os_open(env, p, 0, + DB_OSO_RDONLY, DB_MODE_600, &fhp)) != 0) + goto err; + if ((ret = __os_read(env, fhp, &rep->gen, sizeof(u_int32_t), + &cnt)) < 0 || cnt == 0) + goto err1; + RPRINT(env, (env, DB_VERB_REP_MISC, "Read in gen %lu", + (u_long)rep->gen)); +err1: (void)__os_closehandle(env, fhp); + } +err: __os_free(env, p); + return (ret); +} + +/* + * __rep_write_gen -- + * Write out the gen into the env file. + * + * PUBLIC: int __rep_write_gen __P((ENV *, REP *, u_int32_t)); + */ +int +__rep_write_gen(env, rep, gen) + ENV *env; + REP *rep; + u_int32_t gen; +{ + DB_FH *fhp; + int ret; + size_t cnt; + char *p; + + /* + * If running in-memory replication, return without any file + * operations. + */ + if (FLD_ISSET(rep->config, REP_C_INMEM)) { + return (0); + } + + if ((ret = __db_appname(env, + DB_APP_NONE, REP_GENNAME, NULL, &p)) != 0) + return (ret); + if ((ret = __os_open( + env, p, 0, DB_OSO_CREATE | DB_OSO_TRUNC, DB_MODE_600, &fhp)) == 0) { + if ((ret = __os_write(env, fhp, &gen, sizeof(u_int32_t), + &cnt)) != 0 || ((ret = __os_fsync(env, fhp)) != 0)) + __db_err(env, ret, "%s", p); + (void)__os_closehandle(env, fhp); + } + __os_free(env, p); + return (ret); +} diff --git a/src/rep/rep_stat.c b/src/rep/rep_stat.c new file mode 100644 index 00000000..c0dbecd0 --- /dev/null +++ b/src/rep/rep_stat.c @@ -0,0 +1,692 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +#ifdef HAVE_STATISTICS +static int __rep_print_all __P((ENV *, u_int32_t)); +static int __rep_print_stats __P((ENV *, u_int32_t)); +static int __rep_stat __P((ENV *, DB_REP_STAT **, u_int32_t)); +static int __rep_stat_summary_print __P((ENV *)); +static const char *__rep_syncstate_to_string __P((repsync_t)); + +/* + * Print the individual statistic for items that appear both in the full and + * the summary replication statistics output. + */ +#define PRINT_LOGQUEUED(sp) do { \ + __db_dl(env, "Number of log records currently queued", \ + (u_long)(sp)->st_log_queued); \ +} while (0) + +#define PRINT_MAXPERMLSN(sp) do { \ + __db_msg(env, "%lu/%lu\t%s", \ + (u_long)(sp)->st_max_perm_lsn.file, \ + (u_long)(sp)->st_max_perm_lsn.offset, \ + (sp)->st_max_perm_lsn.file == 0 ? \ + "No maximum permanent LSN" : \ + "Maximum permanent LSN"); \ +} while (0) + +#define PRINT_MSGSRECOVER(sp) do { \ + __db_dl(env, "Number of messages ignored due to pending recovery", \ + (u_long)(sp)->st_msgs_recover); \ +} while (0) + +#define PRINT_MSGSSENDFAILURES(sp) do { \ + __db_dl(env, "Number of failed message sends", \ + (u_long)(sp)->st_msgs_send_failures); \ +} while (0) + +#define PRINT_STARTUPCOMPLETE(sp) do { \ + if ((sp)->st_startup_complete == 0) \ + __db_msg(env, "Startup incomplete"); \ + else \ + __db_msg(env, "Startup complete"); \ +} while (0) + +#define PRINT_STATUS(sp, is_client) do { \ + is_client = 0; \ + switch ((sp)->st_status) { \ + case DB_REP_MASTER: \ + __db_msg(env, \ + "Environment configured as a replication master"); \ + break; \ + case DB_REP_CLIENT: \ + __db_msg(env, \ + "Environment configured as a replication client"); \ + is_client = 1; \ + break; \ + default: \ + __db_msg(env, \ + "Environment not configured for replication"); \ + break; \ + } \ +} while (0) + +/* + * __rep_stat_pp -- + * ENV->rep_stat pre/post processing. + * + * PUBLIC: int __rep_stat_pp __P((DB_ENV *, DB_REP_STAT **, u_int32_t)); + */ +int +__rep_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_REP_STAT **statp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG_XX( + env, rep_handle, "DB_ENV->rep_stat", DB_INIT_REP); + + if ((ret = __db_fchk(env, + "DB_ENV->rep_stat", flags, DB_STAT_CLEAR)) != 0) + return (ret); + + ENV_ENTER(env, ip); + ret = __rep_stat(env, statp, flags); + ENV_LEAVE(env, ip); + + return (ret); +} + +/* + * __rep_stat -- + * ENV->rep_stat. + */ +static int +__rep_stat(env, statp, flags) + ENV *env; + DB_REP_STAT **statp; + u_int32_t flags; +{ + DB_LOG *dblp; + DB_REP *db_rep; + DB_REP_STAT *stats; + LOG *lp; + REP *rep; + u_int32_t startupdone; + uintmax_t queued; + int dolock, ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + *statp = NULL; + + /* Allocate a stat struct to return to the user. */ + if ((ret = __os_umalloc(env, sizeof(DB_REP_STAT), &stats)) != 0) + return (ret); + + /* + * Read without holding the lock. If we are in client recovery, we + * copy just the stats struct so we won't block. We only copy out + * those stats that don't require acquiring any mutex. + */ + dolock = IS_REP_RECOVERING(rep) ? 0 : 1; + memcpy(stats, &rep->stat, sizeof(*stats)); + + /* Copy out election stats. */ + if (FLD_ISSET(rep->elect_flags, REP_E_PHASE1)) + stats->st_election_status = 1; + else if (FLD_ISSET(rep->elect_flags, REP_E_PHASE2)) + stats->st_election_status = 2; + + stats->st_election_nsites = rep->sites; + stats->st_election_cur_winner = rep->winner; + stats->st_election_priority = rep->w_priority; + stats->st_election_gen = rep->w_gen; + stats->st_election_datagen = rep->w_datagen; + stats->st_election_lsn = rep->w_lsn; + stats->st_election_votes = rep->votes; + stats->st_election_nvotes = rep->nvotes; + stats->st_election_tiebreaker = rep->w_tiebreaker; + + /* Copy out other info that's protected by the rep mutex. */ + stats->st_env_id = rep->eid; + stats->st_env_priority = rep->priority; + stats->st_nsites = rep->nsites; + stats->st_master = rep->master_id; + stats->st_gen = rep->gen; + stats->st_egen = rep->egen; + + if (F_ISSET(rep, REP_F_MASTER)) + stats->st_status = DB_REP_MASTER; + else if (F_ISSET(rep, REP_F_CLIENT)) + stats->st_status = DB_REP_CLIENT; + else + stats->st_status = 0; + + if (LF_ISSET(DB_STAT_CLEAR)) { + queued = rep->stat.st_log_queued; + startupdone = rep->stat.st_startup_complete; + memset(&rep->stat, 0, sizeof(rep->stat)); + rep->stat.st_log_queued = rep->stat.st_log_queued_total = + rep->stat.st_log_queued_max = queued; + rep->stat.st_startup_complete = startupdone; + } + + /* + * Log-related replication info is stored in the log system and + * protected by the log region lock. + */ + if (dolock) + MUTEX_LOCK(env, rep->mtx_clientdb); + if (F_ISSET(rep, REP_F_CLIENT)) { + stats->st_next_lsn = lp->ready_lsn; + stats->st_waiting_lsn = lp->waiting_lsn; + stats->st_next_pg = rep->ready_pg; + stats->st_waiting_pg = rep->waiting_pg; + stats->st_max_lease_sec = (u_int32_t)lp->max_lease_ts.tv_sec; + stats->st_max_lease_usec = (u_int32_t) + (lp->max_lease_ts.tv_nsec / NS_PER_US); + } else { + if (F_ISSET(rep, REP_F_MASTER)) { + LOG_SYSTEM_LOCK(env); + stats->st_next_lsn = lp->lsn; + LOG_SYSTEM_UNLOCK(env); + } else + ZERO_LSN(stats->st_next_lsn); + ZERO_LSN(stats->st_waiting_lsn); + stats->st_max_lease_sec = 0; + stats->st_max_lease_usec = 0; + } + stats->st_max_perm_lsn = lp->max_perm_lsn; + if (dolock) + MUTEX_UNLOCK(env, rep->mtx_clientdb); + + *statp = stats; + return (0); +} + +/* + * __rep_stat_print_pp -- + * ENV->rep_stat_print pre/post processing. + * + * PUBLIC: int __rep_stat_print_pp __P((DB_ENV *, u_int32_t)); + */ +int +__rep_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG_XX( + env, rep_handle, "DB_ENV->rep_stat_print", DB_INIT_REP); + + if ((ret = __db_fchk(env, "DB_ENV->rep_stat_print", + flags, DB_STAT_ALL | DB_STAT_CLEAR | DB_STAT_SUMMARY)) != 0) + return (ret); + + ENV_ENTER(env, ip); + ret = __rep_stat_print(env, flags); + ENV_LEAVE(env, ip); + + return (ret); +} + +/* + * __rep_stat_print -- + * ENV->rep_stat_print method. + * + * PUBLIC: int __rep_stat_print __P((ENV *, u_int32_t)); + */ +int +__rep_stat_print(env, flags) + ENV *env; + u_int32_t flags; +{ + u_int32_t orig_flags; + int ret; + + orig_flags = flags; + LF_CLR(DB_STAT_CLEAR | DB_STAT_SUBSYSTEM); + if (LF_ISSET(DB_STAT_SUMMARY)) + return (__rep_stat_summary_print(env)); + + if (flags == 0 || LF_ISSET(DB_STAT_ALL)) { + ret = __rep_print_stats(env, orig_flags); + if (flags == 0 || ret != 0) + return (ret); + } + + if (LF_ISSET(DB_STAT_ALL) && + (ret = __rep_print_all(env, orig_flags)) != 0) + return (ret); + + return (0); +} + +/* + * __rep_print_stats -- + * Print out default statistics. + */ +static int +__rep_print_stats(env, flags) + ENV *env; + u_int32_t flags; +{ + DB_REP_STAT *sp; + int is_client, ret; + char *p; + + if ((ret = __rep_stat(env, &sp, flags)) != 0) + return (ret); + + if (LF_ISSET(DB_STAT_ALL)) + __db_msg(env, "Default replication region information:"); + PRINT_STATUS(sp, is_client); + + __db_msg(env, "%lu/%lu\t%s", + (u_long)sp->st_next_lsn.file, (u_long)sp->st_next_lsn.offset, + is_client ? "Next LSN expected" : "Next LSN to be used"); + __db_msg(env, "%lu/%lu\t%s", + (u_long)sp->st_waiting_lsn.file, (u_long)sp->st_waiting_lsn.offset, + sp->st_waiting_lsn.file == 0 ? + "Not waiting for any missed log records" : + "LSN of first log record we have after missed log records"); + PRINT_MAXPERMLSN(sp); + + __db_dl(env, "Next page number expected", (u_long)sp->st_next_pg); + p = sp->st_waiting_pg == PGNO_INVALID ? + "Not waiting for any missed pages" : + "Page number of first page we have after missed pages"; + __db_msg(env, "%lu\t%s", (u_long)sp->st_waiting_pg, p); + __db_dl(env, + "Number of duplicate master conditions originally detected at this site", + (u_long)sp->st_dupmasters); + if (sp->st_env_id != DB_EID_INVALID) + __db_dl(env, "Current environment ID", (u_long)sp->st_env_id); + else + __db_msg(env, "No current environment ID"); + __db_dl(env, + "Current environment priority", (u_long)sp->st_env_priority); + __db_dl(env, "Current generation number", (u_long)sp->st_gen); + __db_dl(env, + "Election generation number for the current or next election", + (u_long)sp->st_egen); + __db_dl(env, "Number of lease validity checks", + (u_long)sp->st_lease_chk); + __db_dl(env, "Number of invalid lease validity checks", + (u_long)sp->st_lease_chk_misses); + __db_dl(env, + "Number of lease refresh attempts during lease validity checks", + (u_long)sp->st_lease_chk_refresh); + __db_dl(env, "Number of live messages sent while using leases", + (u_long)sp->st_lease_sends); + __db_dl(env, "Number of duplicate log records received", + (u_long)sp->st_log_duplicated); + PRINT_LOGQUEUED(sp); + __db_dl(env, "Maximum number of log records ever queued at once", + (u_long)sp->st_log_queued_max); + __db_dl(env, "Total number of log records queued", + (u_long)sp->st_log_queued_total); + __db_dl(env, + "Number of log records received and appended to the log", + (u_long)sp->st_log_records); + __db_dl(env, "Number of log records missed and requested", + (u_long)sp->st_log_requested); + if (sp->st_master != DB_EID_INVALID) + __db_dl(env, "Current master ID", (u_long)sp->st_master); + else + __db_msg(env, "No current master ID"); + __db_dl(env, "Number of times the master has changed", + (u_long)sp->st_master_changes); + __db_dl(env, + "Number of messages received with a bad generation number", + (u_long)sp->st_msgs_badgen); + __db_dl(env, "Number of messages received and processed", + (u_long)sp->st_msgs_processed); + PRINT_MSGSRECOVER(sp); + PRINT_MSGSSENDFAILURES(sp); + __db_dl(env, "Number of messages sent", (u_long)sp->st_msgs_sent); + __db_dl(env, + "Number of new site messages received", (u_long)sp->st_newsites); + __db_dl(env, + "Number of environments used in the last election", + (u_long)(sp)->st_nsites); + __db_dl(env, "Transmission limited", (u_long)sp->st_nthrottles); + __db_dl(env, "Number of outdated conditions detected", + (u_long)sp->st_outdated); + __db_dl(env, "Number of duplicate page records received", + (u_long)sp->st_pg_duplicated); + __db_dl(env, "Number of page records received and added to databases", + (u_long)sp->st_pg_records); + __db_dl(env, "Number of page records missed and requested", + (u_long)sp->st_pg_requested); + PRINT_STARTUPCOMPLETE(sp); + __db_dl(env, + "Number of transactions applied", (u_long)sp->st_txns_applied); + + __db_dl(env, "Number of startsync messages delayed", + (u_long)sp->st_startsync_delayed); + + __db_dl(env, "Number of elections held", (u_long)sp->st_elections); + __db_dl(env, + "Number of elections won", (u_long)sp->st_elections_won); + + if (sp->st_election_status == 0) { + __db_msg(env, "No election in progress"); + if (sp->st_election_sec > 0 || sp->st_election_usec > 0) + __db_msg(env, + "%lu.%.6lu\tDuration of last election (seconds)", + (u_long)sp->st_election_sec, + (u_long)sp->st_election_usec); + } else { + __db_dl(env, "Current election phase", + (u_long)sp->st_election_status); + __db_dl(env, + "Environment ID of the winner of the current or last election", + (u_long)sp->st_election_cur_winner); + __db_dl(env, + "Master generation number of the winner of the current or last election", + (u_long)sp->st_election_gen); + __db_dl(env, + "Master data generation number of the winner of the current or last election", + (u_long)sp->st_election_datagen); + __db_msg(env, + "%lu/%lu\tMaximum LSN of the winner of the current or last election", + (u_long)sp->st_election_lsn.file, + (u_long)sp->st_election_lsn.offset); + __db_dl(env, + "Number of sites responding to this site during the current election", + (u_long)sp->st_election_nsites); + __db_dl(env, + "Number of votes required in the current or last election", + (u_long)sp->st_election_nvotes); + __db_dl(env, + "Priority of the winner of the current or last election", + (u_long)sp->st_election_priority); + __db_dl(env, + "Tiebreaker value of the winner of the current or last election", + (u_long)sp->st_election_tiebreaker); + __db_dl(env, + "Number of votes received during the current election", + (u_long)sp->st_election_votes); + } + __db_dl(env, "Number of bulk buffer sends triggered by full buffer", + (u_long)sp->st_bulk_fills); + __db_dl(env, "Number of single records exceeding bulk buffer size", + (u_long)sp->st_bulk_overflows); + __db_dl(env, "Number of records added to a bulk buffer", + (u_long)sp->st_bulk_records); + __db_dl(env, "Number of bulk buffers sent", + (u_long)sp->st_bulk_transfers); + __db_dl(env, "Number of re-request messages received", + (u_long)sp->st_client_rerequests); + __db_dl(env, + "Number of request messages this client failed to process", + (u_long)sp->st_client_svc_miss); + __db_dl(env, "Number of request messages received by this client", + (u_long)sp->st_client_svc_req); + if (sp->st_max_lease_sec > 0 || sp->st_max_lease_usec > 0) + __db_msg(env, + "%lu.%.6lu\tDuration of maximum lease (seconds)", + (u_long)sp->st_max_lease_sec, + (u_long)sp->st_max_lease_usec); + + __os_ufree(env, sp); + + return (0); +} + +/* + * __rep_print_all -- + * Display debugging replication region statistics. + */ +static int +__rep_print_all(env, flags) + ENV *env; + u_int32_t flags; +{ + static const FN rep_cfn[] = { + { REP_C_2SITE_STRICT, "REP_C_2SITE_STRICT" }, + { REP_C_AUTOINIT, "REP_C_AUTOINIT" }, + { REP_C_AUTOROLLBACK, "REP_C_AUTOROLLBACK" }, + { REP_C_BULK, "REP_C_BULK" }, + { REP_C_DELAYCLIENT, "REP_C_DELAYCLIENT" }, + { REP_C_ELECTIONS, "REP_C_ELECTIONS" }, + { REP_C_INMEM, "REP_C_INMEM" }, + { REP_C_LEASE, "REP_C_LEASE" }, + { REP_C_NOWAIT, "REP_C_NOWAIT" }, + { 0, NULL } + }; + static const FN rep_efn[] = { + { REP_E_PHASE0, "REP_E_PHASE0" }, + { REP_E_PHASE1, "REP_E_PHASE1" }, + { REP_E_PHASE2, "REP_E_PHASE2" }, + { REP_E_TALLY, "REP_E_TALLY" }, + { 0, NULL } + }; + static const FN rep_fn[] = { + { REP_F_ABBREVIATED, "REP_F_ABBREVIATED" }, + { REP_F_APP_BASEAPI, "REP_F_APP_BASEAPI" }, + { REP_F_APP_REPMGR, "REP_F_APP_REPMGR" }, + { REP_F_CLIENT, "REP_F_CLIENT" }, + { REP_F_DELAY, "REP_F_DELAY" }, + { REP_F_GROUP_ESTD, "REP_F_GROUP_ESTD" }, + { REP_F_LEASE_EXPIRED, "REP_F_LEASE_EXPIRED" }, + { REP_F_MASTER, "REP_F_MASTER" }, + { REP_F_MASTERELECT, "REP_F_MASTERELECT" }, + { REP_F_NEWFILE, "REP_F_NEWFILE" }, + { REP_F_NIMDBS_LOADED, "REP_F_NIMDBS_LOADED" }, + { REP_F_SKIPPED_APPLY, "REP_F_SKIPPED_APPLY" }, + { REP_F_START_CALLED, "REP_F_START_CALLED" }, + { 0, NULL } + }; + static const FN rep_lfn[] = { + { REP_LOCKOUT_API, "REP_LOCKOUT_API" }, + { REP_LOCKOUT_APPLY, "REP_LOCKOUT_APPLY" }, + { REP_LOCKOUT_ARCHIVE, "REP_LOCKOUT_ARCHIVE" }, + { REP_LOCKOUT_MSG, "REP_LOCKOUT_MSG" }, + { REP_LOCKOUT_OP, "REP_LOCKOUT_OP" }, + { 0, NULL } + }; + static const FN dbrep_fn[] = { + { DBREP_APP_BASEAPI, "DBREP_APP_BASEAPI" }, + { DBREP_APP_REPMGR, "DBREP_APP_REPMGR" }, + { DBREP_OPENFILES, "DBREP_OPENFILES" }, + { 0, NULL } + }; + DB_LOG *dblp; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + LOG *lp; + REGENV *renv; + REGINFO *infop; + REP *rep; + char time_buf[CTIME_BUFLEN]; + + db_rep = env->rep_handle; + rep = db_rep->region; + infop = env->reginfo; + renv = infop->primary; + ENV_ENTER(env, ip); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "DB_REP handle information:"); + + if (db_rep->rep_db == NULL) + STAT_ISSET("Bookkeeping database", db_rep->rep_db); + else + (void)__db_stat_print(db_rep->rep_db, ip, flags); + + __db_prflags(env, NULL, db_rep->flags, dbrep_fn, NULL, "\tFlags"); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "REP handle information:"); + __mutex_print_debug_single(env, + "Replication region mutex", rep->mtx_region, flags); + __mutex_print_debug_single(env, + "Bookkeeping database mutex", rep->mtx_clientdb, flags); + + STAT_LONG("Environment ID", rep->eid); + STAT_LONG("Master environment ID", rep->master_id); + STAT_ULONG("Election generation", rep->egen); + STAT_ULONG("Last active egen", rep->spent_egen); + STAT_ULONG("Master generation", rep->gen); + STAT_LONG("Space allocated for sites", rep->asites); + STAT_LONG("Sites in group", rep->nsites); + STAT_LONG("Votes needed for election", rep->nvotes); + STAT_LONG("Priority in election", rep->priority); + __db_dlbytes(env, "Limit on data sent in a single call", + rep->gbytes, (u_long)0, rep->bytes); + STAT_LONG("Request gap seconds", rep->request_gap.tv_sec); + STAT_LONG("Request gap microseconds", + rep->request_gap.tv_nsec / NS_PER_US); + STAT_LONG("Maximum gap seconds", rep->max_gap.tv_sec); + STAT_LONG("Maximum gap microseconds", + rep->max_gap.tv_nsec / NS_PER_US); + + STAT_ULONG("Callers in rep_proc_msg", rep->msg_th); + STAT_ULONG("Callers in rep_elect", rep->elect_th); + STAT_ULONG("Library handle count", rep->handle_cnt); + STAT_ULONG("Multi-step operation count", rep->op_cnt); + __db_msg(env, "%.24s\tRecovery timestamp", + renv->rep_timestamp == 0 ? + "0" : __os_ctime(&renv->rep_timestamp, time_buf)); + + STAT_LONG("Sites heard from", rep->sites); + STAT_LONG("Current winner", rep->winner); + STAT_LONG("Winner priority", rep->w_priority); + STAT_ULONG("Winner generation", rep->w_gen); + STAT_ULONG("Winner data generation", rep->w_datagen); + STAT_LSN("Winner LSN", &rep->w_lsn); + STAT_LONG("Winner tiebreaker", rep->w_tiebreaker); + STAT_LONG("Votes for this site", rep->votes); + + STAT_STRING("Synchronization State", + __rep_syncstate_to_string(rep->sync_state)); + __db_prflags(env, NULL, rep->config, rep_cfn, NULL, + "\tConfig Flags"); + __db_prflags(env, NULL, rep->elect_flags, rep_efn, NULL, + "\tElect Flags"); + __db_prflags(env, NULL, rep->lockout_flags, rep_lfn, + NULL, "\tLockout Flags"); + __db_prflags(env, NULL, rep->flags, rep_fn, NULL, "\tFlags"); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "LOG replication information:"); + MUTEX_LOCK(env, rep->mtx_clientdb); + dblp = env->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + STAT_LSN("First log record after a gap", &lp->waiting_lsn); + STAT_LSN("Maximum permanent LSN processed", &lp->max_perm_lsn); + STAT_LSN("LSN waiting to verify", &lp->verify_lsn); + STAT_LSN("Maximum LSN requested", &lp->max_wait_lsn); + STAT_LONG("Time to wait before requesting seconds", lp->wait_ts.tv_sec); + STAT_LONG("Time to wait before requesting microseconds", + lp->wait_ts.tv_nsec / NS_PER_US); + STAT_LSN("Next LSN expected", &lp->ready_lsn); + STAT_LONG("Maximum lease timestamp seconds", lp->max_lease_ts.tv_sec); + STAT_LONG("Maximum lease timestamp microseconds", + lp->max_lease_ts.tv_nsec / NS_PER_US); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + ENV_LEAVE(env, ip); + + return (0); +} + +static const char * +__rep_syncstate_to_string(state) + repsync_t state; +{ + switch (state) { + case SYNC_OFF: + return ("Not Synchronizing"); + case SYNC_LOG: + return ("SYNC_LOG"); + case SYNC_PAGE: + return ("SYNC_PAGE"); + case SYNC_UPDATE: + return ("SYNC_UPDATE"); + case SYNC_VERIFY: + return ("SYNC_VERIFY"); + default: + break; + } + return ("UNKNOWN STATE"); +} + +/* + * __rep_stat_summary_print -- + * Print out a brief summary of replication statistics. + */ +static int +__rep_stat_summary_print(env) + ENV *env; +{ + DB_REP *db_rep; + DB_REP_STAT *sp; + REP *rep; + int is_client, ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + ret = 0; + if ((ret = __rep_stat(env, &sp, 0)) == 0) { + PRINT_STATUS(sp, is_client); + if (is_client) + PRINT_STARTUPCOMPLETE(sp); + PRINT_MAXPERMLSN(sp); + /* + * Use the number of sites that is kept up-to-date most + * frequently. The rep_stat st_nsites is only current + * as of the last election. + */ + __db_dl(env, "Number of environments in the replication group", + (u_long)rep->config_nsites); + PRINT_MSGSSENDFAILURES(sp); + PRINT_MSGSRECOVER(sp); + PRINT_LOGQUEUED(sp); + __os_ufree(env, sp); + } + return (ret); +} + +#else /* !HAVE_STATISTICS */ + +int +__rep_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_REP_STAT **statp; + u_int32_t flags; +{ + COMPQUIET(statp, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} + +int +__rep_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} +#endif diff --git a/src/rep/rep_stub.c b/src/rep/rep_stub.c new file mode 100644 index 00000000..440feef2 --- /dev/null +++ b/src/rep/rep_stub.c @@ -0,0 +1,425 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef HAVE_REPLICATION +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +/* + * If the library wasn't compiled with replication support, various routines + * aren't available. Stub them here, returning an appropriate error. + */ +static int __db_norep __P((ENV *)); + +/* + * __db_norep -- + * Error when a Berkeley DB build doesn't include replication support. + */ +static int +__db_norep(env) + ENV *env; +{ + __db_errx(env, DB_STR("3581", + "library build did not include support for replication")); + return (DB_OPNOTSUP); +} + +int +__db_rep_enter(dbp, checkgen, checklock, return_now) + DB *dbp; + int checkgen, checklock, return_now; +{ + COMPQUIET(checkgen, 0); + COMPQUIET(checklock, 0); + COMPQUIET(return_now, 0); + return (__db_norep(dbp->env)); +} + +int +__env_rep_enter(env, checklock) + ENV *env; + int checklock; +{ + COMPQUIET(checklock, 0); + return (__db_norep(env)); +} + +int +__env_db_rep_exit(env) + ENV *env; +{ + return (__db_norep(env)); +} + +int +__op_rep_enter(env, local_nowait, obey_user) + ENV *env; + int local_nowait, obey_user; +{ + COMPQUIET(local_nowait, 0); + COMPQUIET(obey_user, 0); + return (__db_norep(env)); +} + +int +__op_rep_exit(env) + ENV *env; +{ + return (__db_norep(env)); +} + +int +__archive_rep_enter(env) + ENV *env; +{ + COMPQUIET(env, NULL); + return (0); +} + +int +__archive_rep_exit(env) + ENV *env; +{ + COMPQUIET(env, NULL); + return (0); +} + +int +__rep_bulk_message(env, bulkp, repth, lsnp, dbt, flags) + ENV *env; + REP_BULK *bulkp; + REP_THROTTLE *repth; + DB_LSN *lsnp; + const DBT *dbt; + u_int32_t flags; +{ + COMPQUIET(bulkp, NULL); + COMPQUIET(repth, NULL); + COMPQUIET(lsnp, NULL); + COMPQUIET(dbt, NULL); + COMPQUIET(flags, 0); + return (__db_norep(env)); +} + +int +__rep_env_refresh(env) + ENV *env; +{ + COMPQUIET(env, NULL); + return (0); +} + +int +__rep_elect_pp(dbenv, nsites, nvotes, flags) + DB_ENV *dbenv; + u_int32_t nsites, nvotes; + u_int32_t flags; +{ + COMPQUIET(nsites, 0); + COMPQUIET(nvotes, 0); + COMPQUIET(flags, 0); + return (__db_norep(dbenv->env)); +} + +int +__rep_flush(dbenv) + DB_ENV *dbenv; +{ + return (__db_norep(dbenv->env)); +} + +int +__rep_lease_check(env, refresh) + ENV *env; + int refresh; +{ + COMPQUIET(refresh, 0); + return (__db_norep(env)); +} + +int +__rep_lease_expire(env) + ENV *env; +{ + return (__db_norep(env)); +} + +void +__rep_msg(env, msg) + const ENV *env; + const char *msg; +{ + COMPQUIET(env, NULL); + COMPQUIET(msg, NULL); + return; +} + +int +__rep_get_clockskew(dbenv, fast_clockp, slow_clockp) + DB_ENV *dbenv; + u_int32_t *fast_clockp, *slow_clockp; +{ + COMPQUIET(fast_clockp, NULL); + COMPQUIET(slow_clockp, NULL); + return (__db_norep(dbenv->env)); +} + +int +__rep_set_clockskew(dbenv, fast_clock, slow_clock) + DB_ENV *dbenv; + u_int32_t fast_clock, slow_clock; +{ + COMPQUIET(fast_clock, 0); + COMPQUIET(slow_clock, 0); + return (__db_norep(dbenv->env)); +} + +int +__rep_set_nsites_pp(dbenv, n) + DB_ENV *dbenv; + u_int32_t n; +{ + COMPQUIET(n, 0); + return (__db_norep(dbenv->env)); +} + +int +__rep_get_nsites(dbenv, n) + DB_ENV *dbenv; + u_int32_t *n; +{ + COMPQUIET(n, NULL); + return (__db_norep(dbenv->env)); +} + +int +__rep_set_priority(dbenv, priority) + DB_ENV *dbenv; + u_int32_t priority; +{ + COMPQUIET(priority, 0); + return (__db_norep(dbenv->env)); +} + +int +__rep_get_priority(dbenv, priority) + DB_ENV *dbenv; + u_int32_t *priority; +{ + COMPQUIET(priority, NULL); + return (__db_norep(dbenv->env)); +} + +int +__rep_set_timeout(dbenv, which, timeout) + DB_ENV *dbenv; + int which; + db_timeout_t timeout; +{ + COMPQUIET(which, 0); + COMPQUIET(timeout, 0); + return (__db_norep(dbenv->env)); +} + +int +__rep_get_timeout(dbenv, which, timeout) + DB_ENV *dbenv; + int which; + db_timeout_t *timeout; +{ + COMPQUIET(which, 0); + COMPQUIET(timeout, NULL); + return (__db_norep(dbenv->env)); +} + +int +__rep_get_config(dbenv, which, onp) + DB_ENV *dbenv; + u_int32_t which; + int *onp; +{ + COMPQUIET(which, 0); + COMPQUIET(onp, NULL); + return (__db_norep(dbenv->env)); +} + +int +__rep_set_config(dbenv, which, on) + DB_ENV *dbenv; + u_int32_t which; + int on; +{ + COMPQUIET(which, 0); + COMPQUIET(on, 0); + return (__db_norep(dbenv->env)); +} + +int +__rep_get_limit(dbenv, gbytesp, bytesp) + DB_ENV *dbenv; + u_int32_t *gbytesp, *bytesp; +{ + COMPQUIET(gbytesp, NULL); + COMPQUIET(bytesp, NULL); + return (__db_norep(dbenv->env)); +} + +int +__rep_open(env) + ENV *env; +{ + COMPQUIET(env, NULL); + return (0); +} + +int +__rep_preclose(env) + ENV *env; +{ + return (__db_norep(env)); +} + +int +__rep_process_message_pp(dbenv, control, rec, eid, ret_lsnp) + DB_ENV *dbenv; + DBT *control, *rec; + int eid; + DB_LSN *ret_lsnp; +{ + COMPQUIET(control, NULL); + COMPQUIET(rec, NULL); + COMPQUIET(eid, 0); + COMPQUIET(ret_lsnp, NULL); + return (__db_norep(dbenv->env)); +} + +int +__rep_send_message(env, eid, rtype, lsnp, dbtp, logflags, repflags) + ENV *env; + int eid; + u_int32_t rtype; + DB_LSN *lsnp; + const DBT *dbtp; + u_int32_t logflags, repflags; +{ + COMPQUIET(eid, 0); + COMPQUIET(rtype, 0); + COMPQUIET(lsnp, NULL); + COMPQUIET(dbtp, NULL); + COMPQUIET(logflags, 0); + COMPQUIET(repflags, 0); + return (__db_norep(env)); +} + +int +__rep_set_limit(dbenv, gbytes, bytes) + DB_ENV *dbenv; + u_int32_t gbytes, bytes; +{ + COMPQUIET(gbytes, 0); + COMPQUIET(bytes, 0); + return (__db_norep(dbenv->env)); +} + +int +__rep_set_transport_pp(dbenv, eid, f_send) + DB_ENV *dbenv; + int eid; + int (*f_send) __P((DB_ENV *, const DBT *, const DBT *, const DB_LSN *, + int, u_int32_t)); +{ + COMPQUIET(eid, 0); + COMPQUIET(f_send, NULL); + return (__db_norep(dbenv->env)); +} + +int +__rep_set_request(dbenv, min, max) + DB_ENV *dbenv; + u_int32_t min, max; +{ + COMPQUIET(min, 0); + COMPQUIET(max, 0); + return (__db_norep(dbenv->env)); +} + +int +__rep_get_request(dbenv, minp, maxp) + DB_ENV *dbenv; + u_int32_t *minp, *maxp; +{ + COMPQUIET(minp, NULL); + COMPQUIET(maxp, NULL); + return (__db_norep(dbenv->env)); +} + +int +__rep_start_pp(dbenv, dbt, flags) + DB_ENV *dbenv; + DBT *dbt; + u_int32_t flags; +{ + COMPQUIET(dbt, NULL); + COMPQUIET(flags, 0); + return (__db_norep(dbenv->env)); +} + +int +__rep_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_REP_STAT **statp; + u_int32_t flags; +{ + COMPQUIET(statp, NULL); + COMPQUIET(flags, 0); + return (__db_norep(dbenv->env)); +} + +int +__rep_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + return (__db_norep(dbenv->env)); +} + +int +__rep_stat_print(env, flags) + ENV *env; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + return (__db_norep(env)); +} + +int +__rep_sync(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + return (__db_norep(dbenv->env)); +} + +int +__rep_txn_applied(env, ip, commit_info, timeout) + ENV *env; + DB_THREAD_INFO *ip; + DB_COMMIT_INFO *commit_info; + db_timeout_t timeout; +{ + COMPQUIET(ip, 0); + COMPQUIET(commit_info, NULL); + COMPQUIET(timeout, 0); + return (__db_norep(env)); +} +#endif /* !HAVE_REPLICATION */ diff --git a/src/rep/rep_util.c b/src/rep/rep_util.c new file mode 100644 index 00000000..70b76dcc --- /dev/null +++ b/src/rep/rep_util.c @@ -0,0 +1,2705 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +#ifdef REP_DIAGNOSTIC +#include "dbinc/db_page.h" +#include "dbinc/fop.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/qam.h" +#endif + +/* + * rep_util.c: + * Miscellaneous replication-related utility functions, including + * those called by other subsystems. + */ +#define TIMESTAMP_CHECK(env, ts, renv) do { \ + if (renv->op_timestamp != 0 && \ + renv->op_timestamp + DB_REGENV_TIMEOUT < ts) { \ + REP_SYSTEM_LOCK(env); \ + F_CLR(renv, DB_REGENV_REPLOCKED); \ + renv->op_timestamp = 0; \ + REP_SYSTEM_UNLOCK(env); \ + } \ +} while (0) + +static int __rep_lockout_int __P((ENV *, REP *, u_int32_t *, u_int32_t, + const char *, u_int32_t)); +static int __rep_newmaster_empty __P((ENV *, int)); +static int __rep_print_int __P((ENV *, u_int32_t, const char *, va_list)); +#ifdef REP_DIAGNOSTIC +static void __rep_print_logmsg __P((ENV *, const DBT *, DB_LSN *)); +#endif +static int __rep_show_progress __P((ENV *, const char *, int mins)); + +/* + * __rep_bulk_message -- + * This is a wrapper for putting a record into a bulk buffer. Since + * we have different bulk buffers, the caller must hand us the information + * we need to put the record into the correct buffer. All bulk buffers + * are protected by the REP->mtx_clientdb. + * + * PUBLIC: int __rep_bulk_message __P((ENV *, REP_BULK *, REP_THROTTLE *, + * PUBLIC: DB_LSN *, const DBT *, u_int32_t)); + */ +int +__rep_bulk_message(env, bulk, repth, lsn, dbt, flags) + ENV *env; + REP_BULK *bulk; + REP_THROTTLE *repth; + DB_LSN *lsn; + const DBT *dbt; + u_int32_t flags; +{ + DB_REP *db_rep; + REP *rep; + __rep_bulk_args b_args; + size_t len; + int ret; + u_int32_t recsize, typemore; + u_int8_t *p; + + db_rep = env->rep_handle; + rep = db_rep->region; + ret = 0; + + /* + * Figure out the total number of bytes needed for this record. + * !!! The marshalling code includes the given len, but also + * puts its own copy of the dbt->size with the DBT portion of + * the record. Account for that here. + */ + recsize = sizeof(len) + dbt->size + sizeof(DB_LSN) + sizeof(dbt->size); + + /* + * If *this* buffer is actively being transmitted, don't wait, + * just return so that it can be sent as a singleton. + */ + MUTEX_LOCK(env, rep->mtx_clientdb); + if (FLD_ISSET(*(bulk->flagsp), BULK_XMIT)) { + MUTEX_UNLOCK(env, rep->mtx_clientdb); + return (DB_REP_BULKOVF); + } + + /* + * If the record is bigger than the buffer entirely, send the + * current buffer and then return DB_REP_BULKOVF so that this + * record is sent as a singleton. Do we have enough info to + * do that here? XXX + */ + if (recsize > bulk->len) { + RPRINT(env, (env, DB_VERB_REP_MSGS, + "bulk_msg: Record %d (0x%x) larger than entire buffer 0x%x", + recsize, recsize, bulk->len)); + STAT(rep->stat.st_bulk_overflows++); + (void)__rep_send_bulk(env, bulk, flags); + /* + * XXX __rep_send_message... + */ + MUTEX_UNLOCK(env, rep->mtx_clientdb); + return (DB_REP_BULKOVF); + } + /* + * If this record doesn't fit, send the current buffer. + * Sending the buffer will reset the offset, but we will + * drop the mutex while sending so we need to keep checking + * if we're racing. + */ + while (recsize + *(bulk->offp) > bulk->len) { + RPRINT(env, (env, DB_VERB_REP_MSGS, + "bulk_msg: Record %lu (%#lx) doesn't fit. Send %lu (%#lx) now.", + (u_long)recsize, (u_long)recsize, + (u_long)bulk->len, (u_long)bulk->len)); + STAT(rep->stat.st_bulk_fills++); + if ((ret = __rep_send_bulk(env, bulk, flags)) != 0) { + MUTEX_UNLOCK(env, rep->mtx_clientdb); + return (ret); + } + } + + /* + * If we're using throttling, see if we are at the throttling + * limit before we do any more work here, by checking if the + * call to rep_send_throttle changed the repth->type to the + * *_MORE message type. If the throttling code hits the limit + * then we're done here. + */ + if (bulk->type == REP_BULK_LOG) + typemore = REP_LOG_MORE; + else + typemore = REP_PAGE_MORE; + if (repth != NULL) { + if ((ret = __rep_send_throttle(env, + bulk->eid, repth, REP_THROTTLE_ONLY, flags)) != 0) { + MUTEX_UNLOCK(env, rep->mtx_clientdb); + return (ret); + } + if (repth->type == typemore) { + VPRINT(env, (env, DB_VERB_REP_MSGS, + "bulk_msg: Record %lu (0x%lx) hit throttle limit.", + (u_long)recsize, (u_long)recsize)); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + return (ret); + } + } + + /* + * Now we own the buffer, and we know our record fits into it. + * The buffer is structured with the len, LSN and then the record. + * Copy the record into the buffer. Then if we need to, + * send the buffer. + */ + p = bulk->addr + *(bulk->offp); + b_args.len = dbt->size; + b_args.lsn = *lsn; + b_args.bulkdata = *dbt; + /* + * If we're the first record, we need to save the first + * LSN in the bulk structure. + */ + if (*(bulk->offp) == 0) + bulk->lsn = *lsn; + if (rep->version < DB_REPVERSION_47) { + len = 0; + memcpy(p, &dbt->size, sizeof(dbt->size)); + p += sizeof(dbt->size); + memcpy(p, lsn, sizeof(DB_LSN)); + p += sizeof(DB_LSN); + memcpy(p, dbt->data, dbt->size); + p += dbt->size; + } else if ((ret = __rep_bulk_marshal(env, &b_args, p, + bulk->len, &len)) != 0) + goto err; + *(bulk->offp) = (roff_t)(p + len - bulk->addr); + STAT(rep->stat.st_bulk_records++); + /* + * Send the buffer if it is a perm record or a force. + */ + if (LF_ISSET(REPCTL_PERM)) { + VPRINT(env, (env, DB_VERB_REP_MSGS, + "bulk_msg: Send buffer after copy due to PERM")); + ret = __rep_send_bulk(env, bulk, flags); + } +err: + MUTEX_UNLOCK(env, rep->mtx_clientdb); + return (ret); + +} + +/* + * __rep_send_bulk -- + * This function transmits the bulk buffer given. It assumes the + * caller holds the REP->mtx_clientdb. We may release it and reacquire + * it during this call. We will return with it held. + * + * PUBLIC: int __rep_send_bulk __P((ENV *, REP_BULK *, u_int32_t)); + */ +int +__rep_send_bulk(env, bulkp, ctlflags) + ENV *env; + REP_BULK *bulkp; + u_int32_t ctlflags; +{ + DBT dbt; + DB_REP *db_rep; + REP *rep; + int ret; + + /* + * If the offset is 0, we're done. There is nothing to send. + */ + if (*(bulkp->offp) == 0) + return (0); + + db_rep = env->rep_handle; + rep = db_rep->region; + + /* + * Set that this buffer is being actively transmitted. + */ + FLD_SET(*(bulkp->flagsp), BULK_XMIT); + DB_INIT_DBT(dbt, bulkp->addr, *(bulkp->offp)); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + VPRINT(env, (env, DB_VERB_REP_MSGS, + "send_bulk: Send %d (0x%x) bulk buffer bytes", dbt.size, dbt.size)); + + /* + * Unlocked the mutex and now send the message. + */ + STAT(rep->stat.st_bulk_transfers++); + if ((ret = __rep_send_message(env, + bulkp->eid, bulkp->type, &bulkp->lsn, &dbt, ctlflags, 0)) != 0) + ret = DB_REP_UNAVAIL; + + MUTEX_LOCK(env, rep->mtx_clientdb); + /* + * Ready the buffer for further records. + */ + *(bulkp->offp) = 0; + FLD_CLR(*(bulkp->flagsp), BULK_XMIT); + return (ret); +} + +/* + * __rep_bulk_alloc -- + * This function allocates and initializes an internal bulk buffer. + * This is used by the master when fulfilling a request for a chunk of + * log records or a bunch of pages. + * + * PUBLIC: int __rep_bulk_alloc __P((ENV *, REP_BULK *, int, uintptr_t *, + * PUBLIC: u_int32_t *, u_int32_t)); + */ +int +__rep_bulk_alloc(env, bulkp, eid, offp, flagsp, type) + ENV *env; + REP_BULK *bulkp; + int eid; + uintptr_t *offp; + u_int32_t *flagsp, type; +{ + int ret; + + memset(bulkp, 0, sizeof(REP_BULK)); + *offp = *flagsp = 0; + bulkp->len = MEGABYTE; + if ((ret = __os_malloc(env, bulkp->len, &bulkp->addr)) != 0) + return (ret); + + /* + * The cast is safe because offp is an "out" parameter. The value + * of offp is meaningless when calling __rep_bulk_alloc. + */ + bulkp->offp = (roff_t *)offp; + bulkp->type = type; + bulkp->eid = eid; + bulkp->flagsp = flagsp; + return (ret); +} + +/* + * __rep_bulk_free -- + * This function sends the remainder of the bulk buffer and frees it. + * + * PUBLIC: int __rep_bulk_free __P((ENV *, REP_BULK *, u_int32_t)); + */ +int +__rep_bulk_free(env, bulkp, flags) + ENV *env; + REP_BULK *bulkp; + u_int32_t flags; +{ + DB_REP *db_rep; + int ret; + + db_rep = env->rep_handle; + + MUTEX_LOCK(env, db_rep->region->mtx_clientdb); + ret = __rep_send_bulk(env, bulkp, flags); + MUTEX_UNLOCK(env, db_rep->region->mtx_clientdb); + __os_free(env, bulkp->addr); + return (ret); +} + +/* + * __rep_send_message -- + * This is a wrapper for sending a message. It takes care of constructing + * the control structure and calling the user's specified send function. + * + * PUBLIC: int __rep_send_message __P((ENV *, int, + * PUBLIC: u_int32_t, DB_LSN *, const DBT *, u_int32_t, u_int32_t)); + */ +int +__rep_send_message(env, eid, rtype, lsnp, dbt, ctlflags, repflags) + ENV *env; + int eid; + u_int32_t rtype; + DB_LSN *lsnp; + const DBT *dbt; + u_int32_t ctlflags, repflags; +{ + DBT cdbt, scrap_dbt; + DB_ENV *dbenv; + DB_LOG *dblp; + DB_REP *db_rep; + LOG *lp; + REP *rep; + REP_46_CONTROL cntrl46; + REP_OLD_CONTROL ocntrl; + __rep_control_args cntrl; + db_timespec msg_time; + int ret; + u_int32_t myflags; + u_int8_t buf[__REP_CONTROL_SIZE]; + size_t len; + + dbenv = env->dbenv; + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + ret = 0; + +#if defined(DEBUG_ROP) || defined(DEBUG_WOP) + if (db_rep->send == NULL) + return (0); +#endif + + /* Set up control structure. */ + memset(&cntrl, 0, sizeof(cntrl)); + memset(&ocntrl, 0, sizeof(ocntrl)); + memset(&cntrl46, 0, sizeof(cntrl46)); + if (lsnp == NULL) + ZERO_LSN(cntrl.lsn); + else + cntrl.lsn = *lsnp; + /* + * Set the rectype based on the version we need to speak. + */ + if (rep->version == DB_REPVERSION) + cntrl.rectype = rtype; + else if (rep->version < DB_REPVERSION) { + cntrl.rectype = __rep_msg_to_old(rep->version, rtype); + VPRINT(env, (env, DB_VERB_REP_MSGS, + "rep_send_msg: rtype %lu to version %lu record %lu.", + (u_long)rtype, (u_long)rep->version, + (u_long)cntrl.rectype)); + if (cntrl.rectype == REP_INVALID) + return (ret); + } else { + __db_errx(env, DB_STR_A("3503", + "rep_send_message: Unknown rep version %lu, my version %lu", + "%lu %lu"), (u_long)rep->version, (u_long)DB_REPVERSION); + return (__env_panic(env, EINVAL)); + } + cntrl.flags = ctlflags; + cntrl.rep_version = rep->version; + cntrl.log_version = lp->persist.version; + cntrl.gen = rep->gen; + + /* Don't assume the send function will be tolerant of NULL records. */ + if (dbt == NULL) { + memset(&scrap_dbt, 0, sizeof(DBT)); + dbt = &scrap_dbt; + } + + /* + * There are several types of records: commit and checkpoint records + * that affect database durability, regular log records that might + * be buffered on the master before being transmitted, and control + * messages which don't require the guarantees of permanency, but + * should not be buffered. + * + * There are request records that can be sent anywhere, and there + * are rerequest records that the app might want to send to the master. + */ + myflags = repflags; + if (FLD_ISSET(ctlflags, REPCTL_PERM)) { + /* + * When writing to a system database, skip setting the PERMANENT + * flag. We don't care; we don't want to wait; and the + * application shouldn't be distracted/confused in case there is + * a failure. + */ + if (!F_ISSET(rep, REP_F_SYS_DB_OP)) + myflags |= DB_REP_PERMANENT; + } else if (rtype != REP_LOG || FLD_ISSET(ctlflags, REPCTL_RESEND)) + myflags |= DB_REP_NOBUFFER; + + /* + * Let everyone know if we've been in an established group. + */ + if (F_ISSET(rep, REP_F_GROUP_ESTD)) + F_SET(&cntrl, REPCTL_GROUP_ESTD); + + /* + * If we are a master sending a perm record, then set the + * REPCTL_LEASE flag to have the client reply. Also set + * the start time that the client will echo back to us. + * + * !!! If we are a master, using leases, we had better not be + * sending to an older version. + */ + if (IS_REP_MASTER(env) && IS_USING_LEASES(env) && + FLD_ISSET(ctlflags, REPCTL_LEASE | REPCTL_PERM)) { + F_SET(&cntrl, REPCTL_LEASE); + DB_ASSERT(env, rep->version == DB_REPVERSION); + __os_gettime(env, &msg_time, 1); + cntrl.msg_sec = (u_int32_t)msg_time.tv_sec; + cntrl.msg_nsec = (u_int32_t)msg_time.tv_nsec; + } + + REP_PRINT_MESSAGE(env, eid, &cntrl, "rep_send_message", myflags); +#ifdef REP_DIAGNOSTIC + if (FLD_ISSET( + env->dbenv->verbose, DB_VERB_REP_MSGS) && rtype == REP_LOG) + __rep_print_logmsg(env, dbt, lsnp); +#endif + + /* + * If DB_REP_PERMANENT is set, the LSN better be non-zero. + */ + DB_ASSERT(env, !FLD_ISSET(myflags, DB_REP_PERMANENT) || + !IS_ZERO_LSN(cntrl.lsn)); + + /* + * If we're talking to an old version, send an old control structure. + */ + memset(&cdbt, 0, sizeof(cdbt)); + if (rep->version <= DB_REPVERSION_45) { + if (rep->version == DB_REPVERSION_45 && + F_ISSET(&cntrl, REPCTL_INIT)) { + F_CLR(&cntrl, REPCTL_INIT); + F_SET(&cntrl, REPCTL_INIT_45); + } + ocntrl.rep_version = cntrl.rep_version; + ocntrl.log_version = cntrl.log_version; + ocntrl.lsn = cntrl.lsn; + ocntrl.rectype = cntrl.rectype; + ocntrl.gen = cntrl.gen; + ocntrl.flags = cntrl.flags; + cdbt.data = &ocntrl; + cdbt.size = sizeof(ocntrl); + } else if (rep->version == DB_REPVERSION_46) { + cntrl46.rep_version = cntrl.rep_version; + cntrl46.log_version = cntrl.log_version; + cntrl46.lsn = cntrl.lsn; + cntrl46.rectype = cntrl.rectype; + cntrl46.gen = cntrl.gen; + cntrl46.msg_time.tv_sec = (time_t)cntrl.msg_sec; + cntrl46.msg_time.tv_nsec = (long)cntrl.msg_nsec; + cntrl46.flags = cntrl.flags; + cdbt.data = &cntrl46; + cdbt.size = sizeof(cntrl46); + } else { + (void)__rep_control_marshal(env, &cntrl, buf, + __REP_CONTROL_SIZE, &len); + DB_INIT_DBT(cdbt, buf, len); + } + + /* + * We set the LSN above to something valid. Give the master the + * actual LSN so that they can coordinate with permanent records from + * the client if they want to. + * + * !!! Even though we marshalled the control message for transmission, + * give the transport function the real LSN. + */ + ret = db_rep->send(dbenv, &cdbt, dbt, &cntrl.lsn, eid, myflags); + + /* + * We don't hold the rep lock, so this could miscount if we race. + * I don't think it's worth grabbing the mutex for that bit of + * extra accuracy. + */ + if (ret != 0) { + RPRINT(env, (env, DB_VERB_REP_MSGS, + "rep_send_function returned: %d", ret)); +#ifdef HAVE_STATISTICS + rep->stat.st_msgs_send_failures++; + } else + rep->stat.st_msgs_sent++; +#else + } +#endif + return (ret); +} + +#ifdef REP_DIAGNOSTIC +/* + * __rep_print_logmsg -- + * This is a debugging routine for printing out log records that + * we are about to transmit to a client. + */ +static void +__rep_print_logmsg(env, logdbt, lsnp) + ENV *env; + const DBT *logdbt; + DB_LSN *lsnp; +{ + static int first = 1; + static DB_DISTAB dtab; + + if (first) { + first = 0; + + (void)__bam_init_print(env, &dtab); + (void)__crdel_init_print(env, &dtab); + (void)__db_init_print(env, &dtab); + (void)__dbreg_init_print(env, &dtab); + (void)__fop_init_print(env, &dtab); + (void)__ham_init_print(env, &dtab); + (void)__qam_init_print(env, &dtab); + (void)__repmgr_init_print(env, &dtab); + (void)__txn_init_print(env, &dtab); + } + + (void)__db_dispatch( + env, &dtab, (DBT *)logdbt, lsnp, DB_TXN_PRINT, NULL); +} +#endif + +/* + * __rep_new_master -- + * Called after a master election to sync back up with a new master. + * It's possible that we already know of this new master in which case + * we don't need to do anything. + * + * This is written assuming that this message came from the master; we + * need to enforce that in __rep_process_record, but right now, we have + * no way to identify the master. + * + * PUBLIC: int __rep_new_master __P((ENV *, __rep_control_args *, int)); + */ +int +__rep_new_master(env, cntrl, eid) + ENV *env; + __rep_control_args *cntrl; + int eid; +{ + DBT dbt; + DB_LOG *dblp; + DB_LOGC *logc; + DB_LSN first_lsn, lsn; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + LOG *lp; + REGENV *renv; + REGINFO *infop; + REP *rep; + db_timeout_t lease_to; + u_int32_t unused, vers; + int change, do_req, lockout_msg, ret, t_ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + ret = 0; + logc = NULL; + lockout_msg = 0; + REP_SYSTEM_LOCK(env); + change = rep->gen != cntrl->gen || rep->master_id != eid; + /* + * If we're hearing from a current or new master, then we + * want to clear EPHASE0 in case this site is waiting to + * hear from the master. + */ + FLD_CLR(rep->elect_flags, REP_E_PHASE0); + if (change) { + /* + * If we are already locking out others, we're either + * in the middle of sync-up recovery or internal init + * when this newmaster comes in (we also lockout in + * rep_start, but we cannot be racing that because we + * don't allow rep_proc_msg when rep_start is going on). + * + * We're about to become the client of a new master. Since we + * want to be able to sync with the new master as quickly as + * possible, interrupt any STARTSYNC from the old master. The + * new master may need to rely on acks from us and the old + * STARTSYNC is now irrelevant. + * + * Note that, conveniently, the "lockout_msg" flag defines the + * section of this code path during which both "message lockout" + * and "memp sync interrupt" are in effect. + */ + if (FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_MSG)) + goto lckout; + + if ((ret = __rep_lockout_msg(env, rep, 1)) != 0) + goto errlck; + + (void)__memp_set_config(env->dbenv, DB_MEMP_SYNC_INTERRUPT, 1); + lockout_msg = 1; + /* + * We must wait any remaining lease time before accepting + * this new master. This must be after the lockout above + * so that no new message can be processed and re-grant + * the lease out from under us. + */ + if (IS_USING_LEASES(env) && + ((lease_to = __rep_lease_waittime(env)) != 0)) { + REP_SYSTEM_UNLOCK(env); + __os_yield(env, 0, (u_long)lease_to); + REP_SYSTEM_LOCK(env); + F_SET(rep, REP_F_LEASE_EXPIRED); + } + + vers = lp->persist.version; + if (cntrl->log_version != vers) { + /* + * Set everything up to the lower version. If we're + * going to be upgrading to the latest version that + * can happen automatically as we process later log + * records. We likely want to sync to earlier version. + */ + DB_ASSERT(env, vers != 0); + if (cntrl->log_version < vers) + vers = cntrl->log_version; + RPRINT(env, (env, DB_VERB_REP_MISC, + "newmaster: Setting log version to %d",vers)); + __log_set_version(env, vers); + if ((ret = __env_init_rec(env, vers)) != 0) + goto errlck; + } + + REP_SYSTEM_UNLOCK(env); + + MUTEX_LOCK(env, rep->mtx_clientdb); + __os_gettime(env, &lp->rcvd_ts, 1); + lp->wait_ts = rep->request_gap; + ZERO_LSN(lp->verify_lsn); + ZERO_LSN(lp->prev_ckp); + ZERO_LSN(lp->waiting_lsn); + ZERO_LSN(lp->max_wait_lsn); + /* + * Open if we need to, in preparation for the truncate + * we'll do in a moment. + */ + if (db_rep->rep_db == NULL && + (ret = __rep_client_dbinit(env, 0, REP_DB)) != 0) { + MUTEX_UNLOCK(env, rep->mtx_clientdb); + goto err; + } + + /* + * If we were in the middle of an internal initialization + * and we've discovered a new master instead, clean up + * our old internal init information. We need to clean + * up any flags and unlock our lockout. + */ + REP_SYSTEM_LOCK(env); + if (ISSET_LOCKOUT_BDB(rep)) { + ret = __rep_init_cleanup(env, rep, DB_FORCE); + /* + * Note that if an in-progress internal init was indeed + * "cleaned up", clearing these flags now will allow the + * application to see a completely empty database + * environment for a moment (until the master responds + * to our ALL_REQ). + */ + F_CLR(rep, REP_F_ABBREVIATED); + CLR_RECOVERY_SETTINGS(rep); + } + MUTEX_UNLOCK(env, rep->mtx_clientdb); + if (ret != 0) { + /* TODO: consider add'l error recovery steps. */ + goto errlck; + } + ENV_GET_THREAD_INFO(env, ip); + if ((ret = __db_truncate(db_rep->rep_db, ip, NULL, &unused)) + != 0) + goto errlck; + STAT(rep->stat.st_log_queued = 0); + + /* + * This needs to be performed under message lockout + * if we're actually changing master. + */ + __rep_elect_done(env, rep); + RPRINT(env, (env, DB_VERB_REP_MISC, + "Updating gen from %lu to %lu from master %d", + (u_long)rep->gen, (u_long)cntrl->gen, eid)); + SET_GEN(cntrl->gen); + rep->mgen = cntrl->gen; + if ((ret = __rep_notify_threads(env, AWAIT_GEN)) != 0) + goto errlck; + (void)__rep_write_gen(env, rep, rep->gen); + if (rep->egen <= rep->gen) + rep->egen = rep->gen + 1; + rep->master_id = eid; + STAT(rep->stat.st_master_changes++); + rep->stat.st_startup_complete = 0; + rep->version = cntrl->rep_version; + RPRINT(env, (env, DB_VERB_REP_MISC, + "egen: %lu. rep version %lu", + (u_long)rep->egen, (u_long)rep->version)); + + /* + * If we're delaying client sync-up, we know we have a + * new/changed master now, set flag indicating we are + * actively delaying. + */ + if (FLD_ISSET(rep->config, REP_C_DELAYCLIENT)) + F_SET(rep, REP_F_DELAY); + if ((ret = __rep_lockout_archive(env, rep)) != 0) + goto errlck; + rep->sync_state = SYNC_VERIFY; + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_MSG); + (void)__memp_set_config(env->dbenv, DB_MEMP_SYNC_INTERRUPT, 0); + lockout_msg = 0; + } else + __rep_elect_done(env, rep); + REP_SYSTEM_UNLOCK(env); + + MUTEX_LOCK(env, rep->mtx_clientdb); + lsn = lp->ready_lsn; + + if (!change) { + ret = 0; + do_req = __rep_check_doreq(env, rep); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + /* + * If there wasn't a change, we might still have some + * catching up or verification to do. + */ + if (do_req && + (rep->sync_state != SYNC_OFF || + LOG_COMPARE(&lsn, &cntrl->lsn) < 0)) { + ret = __rep_resend_req(env, 0); + if (ret != 0) + RPRINT(env, (env, DB_VERB_REP_MISC, + "resend_req ret is %lu", (u_long)ret)); + } + /* + * If we're not in one of the recovery modes, we need to + * clear the ARCHIVE flag. Elections set ARCHIVE + * and if we called an election and found the same + * master, we need to clear ARCHIVE here. + */ + if (rep->sync_state == SYNC_OFF) { + REP_SYSTEM_LOCK(env); + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_ARCHIVE); + REP_SYSTEM_UNLOCK(env); + } + return (ret); + } + MUTEX_UNLOCK(env, rep->mtx_clientdb); + + /* + * If the master changed, we need to start the process of + * figuring out what our last valid log record is. However, + * if both the master and we agree that the max LSN is 0,0, + * then there is no recovery to be done. If we are at 0 and + * the master is not, then we just need to request all the log + * records from the master. + */ + if (IS_INIT_LSN(lsn) || IS_ZERO_LSN(lsn)) { + if ((ret = __rep_newmaster_empty(env, eid)) != 0) + goto err; + goto newmaster_complete; + } + + memset(&dbt, 0, sizeof(dbt)); + /* + * If this client is farther ahead on the log file than the master, see + * if there is any overlap in the logs. If not, the client is too + * far ahead of the master and the client will start over. + */ + if (cntrl->lsn.file < lsn.file) { + if ((ret = __log_cursor(env, &logc)) != 0) + goto err; + ret = __logc_get(logc, &first_lsn, &dbt, DB_FIRST); + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + if (ret == DB_NOTFOUND) + goto notfound; + else if (ret != 0) + goto err; + if (cntrl->lsn.file < first_lsn.file) + goto notfound; + } + if ((ret = __log_cursor(env, &logc)) != 0) + goto err; + ret = __rep_log_backup(env, logc, &lsn, REP_REC_PERM); + if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + if (ret == DB_NOTFOUND) + goto notfound; + else if (ret != 0) + goto err; + + /* + * Finally, we have a record to ask for. + */ + MUTEX_LOCK(env, rep->mtx_clientdb); + lp->verify_lsn = lsn; + __os_gettime(env, &lp->rcvd_ts, 1); + lp->wait_ts = rep->request_gap; + MUTEX_UNLOCK(env, rep->mtx_clientdb); + if (!F_ISSET(rep, REP_F_DELAY)) + (void)__rep_send_message(env, + eid, REP_VERIFY_REQ, &lsn, NULL, 0, DB_REP_ANYWHERE); + goto newmaster_complete; + +err: /* + * If we failed, we need to clear the flags we may have set above + * because we're not going to be setting the verify_lsn. + */ + REP_SYSTEM_LOCK(env); +errlck: if (lockout_msg) { + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_MSG); + (void)__memp_set_config(env->dbenv, DB_MEMP_SYNC_INTERRUPT, 0); + } + F_CLR(rep, REP_F_DELAY); + CLR_RECOVERY_SETTINGS(rep); +lckout: REP_SYSTEM_UNLOCK(env); + return (ret); + +notfound: + /* + * If we don't have an identification record, we still + * might have some log records but we're discarding them + * to sync up with the master from the start. + * Therefore, truncate our log and treat it as if it + * were empty. In-memory logs can't be completely + * zeroed using __log_vtruncate, so just zero them out. + */ + RPRINT(env, (env, DB_VERB_REP_MISC, + "No commit or ckp found. Truncate log.")); + if (lp->db_log_inmemory) { + ZERO_LSN(lsn); + ret = __log_zero(env, &lsn); + } else { + INIT_LSN(lsn); + ret = __log_vtruncate(env, &lsn, &lsn, NULL); + } + if (ret != 0 && ret != DB_NOTFOUND) + return (ret); + infop = env->reginfo; + renv = infop->primary; + REP_SYSTEM_LOCK(env); + (void)time(&renv->rep_timestamp); + REP_SYSTEM_UNLOCK(env); + if ((ret = __rep_newmaster_empty(env, eid)) != 0) + goto err; +newmaster_complete: + return (DB_REP_NEWMASTER); +} + +/* + * __rep_newmaster_empty + * Handle the case of a NEWMASTER message received when we have an empty + * log. This requires internal init. If we can't do that because + * AUTOINIT off, return JOIN_FAILURE. If F_DELAY is in effect, don't even + * consider AUTOINIT yet, because they could change it before rep_sync call. + */ +static int +__rep_newmaster_empty(env, eid) + ENV *env; + int eid; +{ + DB_REP *db_rep; + LOG *lp; + REP *rep; + int msg, ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + lp = env->lg_handle->reginfo.primary; + msg = ret = 0; + + MUTEX_LOCK(env, rep->mtx_clientdb); + REP_SYSTEM_LOCK(env); + lp->wait_ts = rep->request_gap; + + /* Usual case is to skip to UPDATE state; we may revise this below. */ + rep->sync_state = SYNC_UPDATE; + + if (F_ISSET(rep, REP_F_DELAY)) { + /* + * Having properly set up wait_ts for later, nothing more to + * do now. + */ + } else if (!FLD_ISSET(rep->config, REP_C_AUTOINIT)) { + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_ARCHIVE); + CLR_RECOVERY_SETTINGS(rep); + ret = DB_REP_JOIN_FAILURE; + } else { + /* Normal case: not DELAY but AUTOINIT. */ + msg = 1; + } + REP_SYSTEM_UNLOCK(env); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + + if (msg) + (void)__rep_send_message(env, eid, REP_UPDATE_REQ, + NULL, NULL, 0, 0); + return (ret); +} + +/* + * __rep_elect_done + * Clear all election information for this site. Assumes the + * caller hold the region mutex. + * + * PUBLIC: void __rep_elect_done __P((ENV *, REP *)); + */ +void +__rep_elect_done(env, rep) + ENV *env; + REP *rep; +{ + int inelect; + db_timespec endtime; + + inelect = IN_ELECTION(rep); + FLD_CLR(rep->elect_flags, REP_E_PHASE1 | REP_E_PHASE2 | REP_E_TALLY); + + rep->sites = 0; + rep->votes = 0; + if (inelect) { + if (timespecisset(&rep->etime)) { + __os_gettime(env, &endtime, 1); + timespecsub(&endtime, &rep->etime); +#ifdef HAVE_STATISTICS + rep->stat.st_election_sec = (u_int32_t)endtime.tv_sec; + rep->stat.st_election_usec = (u_int32_t) + (endtime.tv_nsec / NS_PER_US); +#endif + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Election finished in %lu.%09lu sec", + (u_long)endtime.tv_sec, (u_long)endtime.tv_nsec)); + timespecclear(&rep->etime); + } + rep->egen++; + } + RPRINT(env, (env, DB_VERB_REP_ELECT, + "Election done; egen %lu", (u_long)rep->egen)); +} + +/* + * __env_rep_enter -- + * + * Check if we are in the middle of replication initialization and/or + * recovery, and if so, disallow operations. If operations are allowed, + * increment handle-counts, so that we do not start recovery while we + * are operating in the library. + * + * PUBLIC: int __env_rep_enter __P((ENV *, int)); + */ +int +__env_rep_enter(env, checklock) + ENV *env; + int checklock; +{ + DB_REP *db_rep; + REGENV *renv; + REGINFO *infop; + REP *rep; + int cnt, ret; + time_t timestamp; + + /* Check if locks have been globally turned off. */ + if (F_ISSET(env->dbenv, DB_ENV_NOLOCKING)) + return (0); + + db_rep = env->rep_handle; + rep = db_rep->region; + + infop = env->reginfo; + renv = infop->primary; + if (checklock && F_ISSET(renv, DB_REGENV_REPLOCKED)) { + (void)time(×tamp); + TIMESTAMP_CHECK(env, timestamp, renv); + /* + * Check if we're still locked out after checking + * the timestamp. + */ + if (F_ISSET(renv, DB_REGENV_REPLOCKED)) + return (EINVAL); + } + + REP_SYSTEM_LOCK(env); + for (cnt = 0; FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_API);) { + REP_SYSTEM_UNLOCK(env); + /* + * We're spinning - environment may be hung. Check if + * recovery has been initiated. + */ + PANIC_CHECK(env); + if (FLD_ISSET(rep->config, REP_C_NOWAIT)) { + __db_errx(env, DB_STR("3504", + "Operation locked out. Waiting for replication lockout to complete")); + return (DB_REP_LOCKOUT); + } + __os_yield(env, 1, 0); + if (++cnt % 60 == 0 && + (ret = __rep_show_progress(env, + DB_STR_P("DB_ENV handle"), cnt / 60)) != 0) + return (ret); + REP_SYSTEM_LOCK(env); + } + rep->handle_cnt++; + REP_SYSTEM_UNLOCK(env); + + return (0); +} + +static int +__rep_show_progress(env, which, mins) + ENV *env; + const char *which; + int mins; +{ + DB_LOG *dblp; + LOG *lp; + REP *rep; + DB_LSN ready_lsn; + + rep = env->rep_handle->region; + dblp = env->lg_handle; + lp = dblp == NULL ? NULL : dblp->reginfo.primary; + +#define WAITING_MSG DB_STR_A("3505", \ + "%s waiting %d minutes for replication lockout to complete", "%s %d") +#define WAITING_ARGS WAITING_MSG, which, mins + + __db_errx(env, WAITING_ARGS); + RPRINT(env, (env, DB_VERB_REP_SYNC, WAITING_ARGS)); + + if (lp == NULL) + ZERO_LSN(ready_lsn); + else { + MUTEX_LOCK(env, rep->mtx_clientdb); + ready_lsn = lp->ready_lsn; + MUTEX_UNLOCK(env, rep->mtx_clientdb); + } + REP_SYSTEM_LOCK(env); + switch (rep->sync_state) { + case SYNC_PAGE: +#define PAGE_MSG DB_STR_A("3506", \ + "SYNC_PAGE: files %lu/%lu; pages %lu (%lu next)", "%lu %lu %lu %lu") +#define PAGE_ARGS (u_long)rep->curfile, (u_long)rep->nfiles, \ + (u_long)rep->npages, (u_long)rep->ready_pg + __db_errx(env, PAGE_MSG, PAGE_ARGS); + RPRINT(env, (env, DB_VERB_REP_SYNC, PAGE_MSG, PAGE_ARGS)); + break; + case SYNC_LOG: +#define LSN_ARG(lsn) (u_long)(lsn).file, (u_long)(lsn).offset +#define LOG_LSN_ARGS LSN_ARG(ready_lsn), \ + LSN_ARG(rep->first_lsn), LSN_ARG(rep->last_lsn) +#ifdef HAVE_STATISTICS +#define LOG_MSG DB_STR_A("3507", \ + "SYNC_LOG: thru [%lu][%lu] from [%lu][%lu]/[%lu][%lu] (%lu queued)",\ + "%lu %lu %lu %lu %lu %lu %lu") +#define LOG_ARGS LOG_LSN_ARGS, (u_long)rep->stat.st_log_queued +#else +#define LOG_MSG DB_STR_A("3508", \ + "SYNC_LOG: thru [%lu][%lu] from [%lu][%lu]/[%lu][%lu]", \ + "%lu %lu %lu %lu %lu %lu") +#define LOG_ARGS LOG_LSN_ARGS +#endif + __db_errx(env, LOG_MSG, LOG_ARGS); + RPRINT(env, (env, DB_VERB_REP_SYNC, LOG_MSG, LOG_ARGS)); + break; + default: + RPRINT(env, (env, DB_VERB_REP_SYNC, + "sync state %d", (int)rep->sync_state)); + break; + } + REP_SYSTEM_UNLOCK(env); + return (0); +} + +/* + * __env_db_rep_exit -- + * + * Decrement handle count upon routine exit. + * + * PUBLIC: int __env_db_rep_exit __P((ENV *)); + */ +int +__env_db_rep_exit(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + + /* Check if locks have been globally turned off. */ + if (F_ISSET(env->dbenv, DB_ENV_NOLOCKING)) + return (0); + + db_rep = env->rep_handle; + rep = db_rep->region; + + REP_SYSTEM_LOCK(env); + rep->handle_cnt--; + REP_SYSTEM_UNLOCK(env); + + return (0); +} + +/* + * __db_rep_enter -- + * Called in replicated environments to keep track of in-use handles + * and prevent any concurrent operation during recovery. If checkgen is + * non-zero, then we verify that the dbp has the same handle as the env. + * + * If return_now is non-zero, we'll return DB_DEADLOCK immediately, else we'll + * sleep before returning DB_DEADLOCK. Without the sleep, it is likely + * the application will immediately try again and could reach a retry + * limit before replication has a chance to finish. The sleep increases + * the probability that an application retry will succeed. + * + * Typically calls with txns set return_now so that we return immediately. + * We want to return immediately because we want the txn to abort ASAP + * so that the lockout can proceed. + * + * PUBLIC: int __db_rep_enter __P((DB *, int, int, int)); + */ +int +__db_rep_enter(dbp, checkgen, checklock, return_now) + DB *dbp; + int checkgen, checklock, return_now; +{ + DB_REP *db_rep; + ENV *env; + REGENV *renv; + REGINFO *infop; + REP *rep; + time_t timestamp; + + env = dbp->env; + /* Check if locks have been globally turned off. */ + if (F_ISSET(env->dbenv, DB_ENV_NOLOCKING)) + return (0); + + db_rep = env->rep_handle; + rep = db_rep->region; + infop = env->reginfo; + renv = infop->primary; + + if (checklock && F_ISSET(renv, DB_REGENV_REPLOCKED)) { + (void)time(×tamp); + TIMESTAMP_CHECK(env, timestamp, renv); + /* + * Check if we're still locked out after checking + * the timestamp. + */ + if (F_ISSET(renv, DB_REGENV_REPLOCKED)) + return (EINVAL); + } + REP_SYSTEM_LOCK(env); + /* + * !!! + * Note, we are checking REP_LOCKOUT_OP, but we are + * incrementing rep->handle_cnt. That seems like a mismatch, + * but the intention is to return DEADLOCK to the application + * which will cause them to abort the txn quickly and allow + * the lockout to proceed. + * + * The correctness of doing this depends on the fact that + * lockout of the API always sets REP_LOCKOUT_OP first. + */ + if (FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_OP)) { + REP_SYSTEM_UNLOCK(env); + if (!return_now) + __os_yield(env, 5, 0); + return (DB_LOCK_DEADLOCK); + } + + if (checkgen && dbp->timestamp != renv->rep_timestamp) { + REP_SYSTEM_UNLOCK(env); + return (DB_REP_HANDLE_DEAD); + } + rep->handle_cnt++; + REP_SYSTEM_UNLOCK(env); + + return (0); +} + +/* + * Check for permission to increment handle_cnt, and do so if possible. Used in + * cases where we want to count an operation in the context of a transaction, + * but the operation does not involve a DB handle. + * + * PUBLIC: int __op_handle_enter __P((ENV *)); + */ +int +__op_handle_enter(env) + ENV *env; +{ + REP *rep; + int ret; + + rep = env->rep_handle->region; + REP_SYSTEM_LOCK(env); + if (FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_OP)) + ret = DB_LOCK_DEADLOCK; + else { + rep->handle_cnt++; + ret = 0; + } + REP_SYSTEM_UNLOCK(env); + return (ret); +} + +/* + * __op_rep_enter -- + * + * Check if we are in the middle of replication initialization and/or + * recovery, and if so, disallow new multi-step operations, such as + * transaction and memp gets. If operations are allowed, + * increment the op_cnt, so that we do not start recovery while we have + * active operations. + * + * PUBLIC: int __op_rep_enter __P((ENV *, int, int)); + */ +int +__op_rep_enter(env, local_nowait, obey_user) + ENV *env; + int local_nowait, obey_user; +{ + DB_REP *db_rep; + REP *rep; + int cnt, ret; + + /* Check if locks have been globally turned off. */ + if (F_ISSET(env->dbenv, DB_ENV_NOLOCKING)) + return (0); + + db_rep = env->rep_handle; + rep = db_rep->region; + + REP_SYSTEM_LOCK(env); + for (cnt = 0; FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_OP);) { + REP_SYSTEM_UNLOCK(env); + /* + * We're spinning - environment may be hung. Check if + * recovery has been initiated. + */ + PANIC_CHECK(env); + if (local_nowait) + return (DB_REP_LOCKOUT); + if (FLD_ISSET(rep->config, REP_C_NOWAIT) && obey_user) { + __db_errx(env, DB_STR("3509", + "Operation locked out. Waiting for replication lockout to complete")); + return (DB_REP_LOCKOUT); + } + __os_yield(env, 5, 0); + cnt += 5; + if (++cnt % 60 == 0 && + (ret = __rep_show_progress(env, + "__op_rep_enter", cnt / 60)) != 0) + return (ret); + REP_SYSTEM_LOCK(env); + } + rep->op_cnt++; + REP_SYSTEM_UNLOCK(env); + + return (0); +} + +/* + * __op_rep_exit -- + * + * Decrement op count upon transaction commit/abort/discard or + * memp_fput. + * + * PUBLIC: int __op_rep_exit __P((ENV *)); + */ +int +__op_rep_exit(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + + /* Check if locks have been globally turned off. */ + if (F_ISSET(env->dbenv, DB_ENV_NOLOCKING)) + return (0); + + db_rep = env->rep_handle; + rep = db_rep->region; + + REP_SYSTEM_LOCK(env); + DB_ASSERT(env, rep->op_cnt > 0); + rep->op_cnt--; + REP_SYSTEM_UNLOCK(env); + + return (0); +} + +/* + * __archive_rep_enter + * Used by log_archive to determine if it is okay to remove + * log files. + * + * PUBLIC: int __archive_rep_enter __P((ENV *)); + */ +int +__archive_rep_enter(env) + ENV *env; +{ + DB_REP *db_rep; + REGENV *renv; + REGINFO *infop; + REP *rep; + time_t timestamp; + int ret; + + ret = 0; + infop = env->reginfo; + renv = infop->primary; + + /* + * This is tested before REP_ON below because we always need + * to obey if any replication process has disabled archiving. + * Everything is in the environment region that we need here. + */ + if (F_ISSET(renv, DB_REGENV_REPLOCKED)) { + (void)time(×tamp); + TIMESTAMP_CHECK(env, timestamp, renv); + /* + * Check if we're still locked out after checking + * the timestamp. + */ + if (F_ISSET(renv, DB_REGENV_REPLOCKED)) + return (DB_REP_LOCKOUT); + } + + if (!REP_ON(env)) + return (0); + + db_rep = env->rep_handle; + rep = db_rep->region; + REP_SYSTEM_LOCK(env); + if (FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_ARCHIVE)) + ret = DB_REP_LOCKOUT; + else + rep->arch_th++; + REP_SYSTEM_UNLOCK(env); + return (ret); +} + +/* + * __archive_rep_exit + * Clean up accounting for log archive threads. + * + * PUBLIC: int __archive_rep_exit __P((ENV *)); + */ +int +__archive_rep_exit(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + + if (!REP_ON(env)) + return (0); + + db_rep = env->rep_handle; + rep = db_rep->region; + REP_SYSTEM_LOCK(env); + rep->arch_th--; + REP_SYSTEM_UNLOCK(env); + return (0); +} + +/* + * __rep_lockout_archive -- + * Coordinate with other threads archiving log files so that + * we can run and know that no log files will be removed out + * from underneath us. + * Assumes the caller holds the region mutex. + * + * PUBLIC: int __rep_lockout_archive __P((ENV *, REP *)); + */ +int +__rep_lockout_archive(env, rep) + ENV *env; + REP *rep; +{ + return (__rep_lockout_int(env, rep, &rep->arch_th, 0, + "arch_th", REP_LOCKOUT_ARCHIVE)); +} + +/* + * __rep_lockout_api -- + * Coordinate with other threads in the library and active txns so + * that we can run single-threaded, for recovery or internal backup. + * Assumes the caller holds the region mutex. + * + * PUBLIC: int __rep_lockout_api __P((ENV *, REP *)); + */ +int +__rep_lockout_api(env, rep) + ENV *env; + REP *rep; +{ + int ret; + + /* + * We must drain long-running operations first. We check + * REP_LOCKOUT_OP in __db_rep_enter in order to allow them + * to abort existing txns quickly. Therefore, we must + * always lockout REP_LOCKOUT_OP first, then REP_LOCKOUT_API. + */ + if ((ret = __rep_lockout_int(env, rep, &rep->op_cnt, 0, + "op_cnt", REP_LOCKOUT_OP)) != 0) + return (ret); + if ((ret = __rep_lockout_int(env, rep, &rep->handle_cnt, 0, + "handle_cnt", REP_LOCKOUT_API)) != 0) + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_OP); + return (ret); +} + +/* + * PUBLIC: int __rep_take_apilockout __P((ENV *)); + * + * For use by repmgr (keep the module boundaries reasonably clean). + */ +int +__rep_take_apilockout(env) + ENV *env; +{ + REP *rep; + int ret; + + rep = env->rep_handle->region; + REP_SYSTEM_LOCK(env); + ret = __rep_lockout_api(env, rep); + REP_SYSTEM_UNLOCK(env); + return (ret); +} + +/* + * PUBLIC: int __rep_clear_apilockout __P((ENV *)); + */ +int +__rep_clear_apilockout(env) + ENV *env; +{ + REP *rep; + + rep = env->rep_handle->region; + + REP_SYSTEM_LOCK(env); + CLR_LOCKOUT_BDB(rep); + REP_SYSTEM_UNLOCK(env); + return (0); +} + +/* + * __rep_lockout_apply -- + * Coordinate with other threads processing messages so that + * we can run single-threaded and know that no incoming + * message can apply new log records. + * This call should be short-term covering a specific critical + * operation where we need to make sure no new records change + * the log. Currently used to coordinate with elections. + * Assumes the caller holds the region mutex. + * + * PUBLIC: int __rep_lockout_apply __P((ENV *, REP *, u_int32_t)); + */ +int +__rep_lockout_apply(env, rep, apply_th) + ENV *env; + REP *rep; + u_int32_t apply_th; +{ + return (__rep_lockout_int(env, rep, &rep->apply_th, apply_th, + "apply_th", REP_LOCKOUT_APPLY)); +} + +/* + * __rep_lockout_msg -- + * Coordinate with other threads processing messages so that + * we can run single-threaded and know that no incoming + * message can change the world (i.e., like a NEWMASTER message). + * This call should be short-term covering a specific critical + * operation where we need to make sure no new messages arrive + * in the middle and all message threads are out before we start it. + * Assumes the caller holds the region mutex. + * + * PUBLIC: int __rep_lockout_msg __P((ENV *, REP *, u_int32_t)); + */ +int +__rep_lockout_msg(env, rep, msg_th) + ENV *env; + REP *rep; + u_int32_t msg_th; +{ + return (__rep_lockout_int(env, rep, &rep->msg_th, msg_th, + "msg_th", REP_LOCKOUT_MSG)); +} + +/* + * __rep_lockout_int -- + * Internal common code for locking out and coordinating + * with other areas of the code. + * Assumes the caller holds the region mutex. + * + */ +static int +__rep_lockout_int(env, rep, fieldp, field_val, msg, lockout_flag) + ENV *env; + REP *rep; + u_int32_t *fieldp; + const char *msg; + u_int32_t field_val, lockout_flag; +{ + int ret, wait_cnt; + + FLD_SET(rep->lockout_flags, lockout_flag); + for (wait_cnt = 0; *fieldp > field_val;) { + if ((ret = __rep_notify_threads(env, LOCKOUT)) != 0) + return (ret); + REP_SYSTEM_UNLOCK(env); + /* We're spinning - environment may be hung. Check if + * recovery has been initiated. + */ + PANIC_CHECK(env); + __os_yield(env, 1, 0); +#ifdef DIAGNOSTIC + if (wait_cnt == 5) { + RPRINT(env, (env, DB_VERB_REP_MISC, + "Waiting for %s (%lu) to complete lockout to %lu", + msg, (u_long)*fieldp, (u_long)field_val)); + __db_errx(env, DB_STR_A("3510", +"Waiting for %s (%lu) to complete replication lockout", + "%s %lu"), msg, (u_long)*fieldp); + } + if (++wait_cnt % 60 == 0) + __db_errx(env, DB_STR_A("3511", +"Waiting for %s (%lu) to complete replication lockout for %d minutes", + "%s %lu %d"), msg, (u_long)*fieldp, wait_cnt / 60); +#endif + REP_SYSTEM_LOCK(env); + } + + COMPQUIET(msg, NULL); + return (0); +} + +/* + * __rep_send_throttle - + * Send a record, throttling if necessary. Callers of this function + * will throttle - breaking out of their loop, if the repth->type field + * changes from the normal message type to the *_MORE message type. + * This function will send the normal type unless throttling gets invoked. + * Then it sets the type field and sends the _MORE message. + * + * Throttling is always only relevant in serving requests, so we always send + * with REPCTL_RESEND. Additional desired flags can be passed in the ctlflags + * argument. + * + * PUBLIC: int __rep_send_throttle __P((ENV *, int, REP_THROTTLE *, + * PUBLIC: u_int32_t, u_int32_t)); + */ +int +__rep_send_throttle(env, eid, repth, flags, ctlflags) + ENV *env; + int eid; + REP_THROTTLE *repth; + u_int32_t ctlflags, flags; +{ + DB_REP *db_rep; + REP *rep; + u_int32_t size, typemore; + int check_limit; + + check_limit = repth->gbytes != 0 || repth->bytes != 0; + /* + * If we only want to do throttle processing and we don't have it + * turned on, return immediately. + */ + if (!check_limit && LF_ISSET(REP_THROTTLE_ONLY)) + return (0); + + db_rep = env->rep_handle; + rep = db_rep->region; + typemore = 0; + if (repth->type == REP_LOG) + typemore = REP_LOG_MORE; + if (repth->type == REP_PAGE) + typemore = REP_PAGE_MORE; + DB_ASSERT(env, typemore != 0); + + /* + * data_dbt.size is only the size of the log + * record; it doesn't count the size of the + * control structure. Factor that in as well + * so we're not off by a lot if our log records + * are small. + */ + size = repth->data_dbt->size + sizeof(__rep_control_args); + if (check_limit) { + while (repth->bytes <= size) { + if (repth->gbytes > 0) { + repth->bytes += GIGABYTE; + --(repth->gbytes); + continue; + } + /* + * We don't hold the rep mutex, + * and may miscount. + */ + STAT(rep->stat.st_nthrottles++); + repth->type = typemore; + goto snd; + } + repth->bytes -= size; + } + /* + * Always send if it is typemore, otherwise send only if + * REP_THROTTLE_ONLY is not set. + * + * NOTE: It is the responsibility of the caller to marshal, if + * needed, the data_dbt. This function just sends what it is given. + */ +snd: if ((repth->type == typemore || !LF_ISSET(REP_THROTTLE_ONLY)) && + (__rep_send_message(env, eid, repth->type, + &repth->lsn, repth->data_dbt, (REPCTL_RESEND | ctlflags), 0) != 0)) + return (DB_REP_UNAVAIL); + return (0); +} + +/* + * __rep_msg_to_old -- + * Convert current message numbers to old message numbers. + * + * PUBLIC: u_int32_t __rep_msg_to_old __P((u_int32_t, u_int32_t)); + */ +u_int32_t +__rep_msg_to_old(version, rectype) + u_int32_t version, rectype; +{ + /* + * We need to convert from current message numbers to old numbers and + * we need to convert from old numbers to current numbers. Offset by + * one for more readable code. + */ + /* + * Everything for version 0 is invalid, there is no version 0. + */ + static const u_int32_t table[DB_REPVERSION][REP_MAX_MSG+1] = { + /* There is no DB_REPVERSION 0. */ + { REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID }, + /* + * 4.2/DB_REPVERSION 1 no longer supported. + */ + { REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID }, + /* + * 4.3/DB_REPVERSION 2 no longer supported. + */ + { REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID }, + /* + * From 4.7 message number To 4.4/4.5 message number + */ + { REP_INVALID, /* NO message 0 */ + 1, /* REP_ALIVE */ + 2, /* REP_ALIVE_REQ */ + 3, /* REP_ALL_REQ */ + 4, /* REP_BULK_LOG */ + 5, /* REP_BULK_PAGE */ + 6, /* REP_DUPMASTER */ + 7, /* REP_FILE */ + 8, /* REP_FILE_FAIL */ + 9, /* REP_FILE_REQ */ + REP_INVALID, /* REP_LEASE_GRANT */ + 10, /* REP_LOG */ + 11, /* REP_LOG_MORE */ + 12, /* REP_LOG_REQ */ + 13, /* REP_MASTER_REQ */ + 14, /* REP_NEWCLIENT */ + 15, /* REP_NEWFILE */ + 16, /* REP_NEWMASTER */ + 17, /* REP_NEWSITE */ + 18, /* REP_PAGE */ + 19, /* REP_PAGE_FAIL */ + 20, /* REP_PAGE_MORE */ + 21, /* REP_PAGE_REQ */ + 22, /* REP_REREQUEST */ + REP_INVALID, /* REP_START_SYNC */ + 23, /* REP_UPDATE */ + 24, /* REP_UPDATE_REQ */ + 25, /* REP_VERIFY */ + 26, /* REP_VERIFY_FAIL */ + 27, /* REP_VERIFY_REQ */ + 28, /* REP_VOTE1 */ + 29 /* REP_VOTE2 */ + }, + /* + * From 4.7 message number To 4.6 message number. There are + * NO message differences between 4.6 and 4.7. The + * control structure changed. + */ + { REP_INVALID, /* NO message 0 */ + 1, /* REP_ALIVE */ + 2, /* REP_ALIVE_REQ */ + 3, /* REP_ALL_REQ */ + 4, /* REP_BULK_LOG */ + 5, /* REP_BULK_PAGE */ + 6, /* REP_DUPMASTER */ + 7, /* REP_FILE */ + 8, /* REP_FILE_FAIL */ + 9, /* REP_FILE_REQ */ + 10, /* REP_LEASE_GRANT */ + 11, /* REP_LOG */ + 12, /* REP_LOG_MORE */ + 13, /* REP_LOG_REQ */ + 14, /* REP_MASTER_REQ */ + 15, /* REP_NEWCLIENT */ + 16, /* REP_NEWFILE */ + 17, /* REP_NEWMASTER */ + 18, /* REP_NEWSITE */ + 19, /* REP_PAGE */ + 20, /* REP_PAGE_FAIL */ + 21, /* REP_PAGE_MORE */ + 22, /* REP_PAGE_REQ */ + 23, /* REP_REREQUEST */ + 24, /* REP_START_SYNC */ + 25, /* REP_UPDATE */ + 26, /* REP_UPDATE_REQ */ + 27, /* REP_VERIFY */ + 28, /* REP_VERIFY_FAIL */ + 29, /* REP_VERIFY_REQ */ + 30, /* REP_VOTE1 */ + 31 /* REP_VOTE2 */ + }, + /* + * From 5.2 message number To 4.7 message number. There are + * NO message differences between 4.7 and 5.2. The + * content of vote1 changed. + */ + { REP_INVALID, /* NO message 0 */ + 1, /* REP_ALIVE */ + 2, /* REP_ALIVE_REQ */ + 3, /* REP_ALL_REQ */ + 4, /* REP_BULK_LOG */ + 5, /* REP_BULK_PAGE */ + 6, /* REP_DUPMASTER */ + 7, /* REP_FILE */ + 8, /* REP_FILE_FAIL */ + 9, /* REP_FILE_REQ */ + 10, /* REP_LEASE_GRANT */ + 11, /* REP_LOG */ + 12, /* REP_LOG_MORE */ + 13, /* REP_LOG_REQ */ + 14, /* REP_MASTER_REQ */ + 15, /* REP_NEWCLIENT */ + 16, /* REP_NEWFILE */ + 17, /* REP_NEWMASTER */ + 18, /* REP_NEWSITE */ + 19, /* REP_PAGE */ + 20, /* REP_PAGE_FAIL */ + 21, /* REP_PAGE_MORE */ + 22, /* REP_PAGE_REQ */ + 23, /* REP_REREQUEST */ + 24, /* REP_START_SYNC */ + 25, /* REP_UPDATE */ + 26, /* REP_UPDATE_REQ */ + 27, /* REP_VERIFY */ + 28, /* REP_VERIFY_FAIL */ + 29, /* REP_VERIFY_REQ */ + 30, /* REP_VOTE1 */ + 31 /* REP_VOTE2 */ + } + }; + return (table[version][rectype]); +} + +/* + * __rep_msg_from_old -- + * Convert old message numbers to current message numbers. + * + * PUBLIC: u_int32_t __rep_msg_from_old __P((u_int32_t, u_int32_t)); + */ +u_int32_t +__rep_msg_from_old(version, rectype) + u_int32_t version, rectype; +{ + /* + * We need to convert from current message numbers to old numbers and + * we need to convert from old numbers to current numbers. Offset by + * one for more readable code. + */ + /* + * Everything for version 0 is invalid, there is no version 0. + */ + static const u_int32_t table[DB_REPVERSION][REP_MAX_MSG+1] = { + /* There is no DB_REPVERSION 0. */ + { REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID }, + /* + * 4.2/DB_REPVERSION 1 no longer supported. + */ + { REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID }, + /* + * 4.3/DB_REPVERSION 2 no longer supported. + */ + { REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID, + REP_INVALID, REP_INVALID, REP_INVALID, REP_INVALID }, + /* + * From 4.4/4.5 message number To 4.7 message number + */ + { REP_INVALID, /* NO message 0 */ + 1, /* 1, REP_ALIVE */ + 2, /* 2, REP_ALIVE_REQ */ + 3, /* 3, REP_ALL_REQ */ + 4, /* 4, REP_BULK_LOG */ + 5, /* 5, REP_BULK_PAGE */ + 6, /* 6, REP_DUPMASTER */ + 7, /* 7, REP_FILE */ + 8, /* 8, REP_FILE_FAIL */ + 9, /* 9, REP_FILE_REQ */ + /* 10, REP_LEASE_GRANT doesn't exist */ + 11, /* 10, REP_LOG */ + 12, /* 11, REP_LOG_MORE */ + 13, /* 12, REP_LOG_REQ */ + 14, /* 13, REP_MASTER_REQ */ + 15, /* 14, REP_NEWCLIENT */ + 16, /* 15, REP_NEWFILE */ + 17, /* 16, REP_NEWMASTER */ + 18, /* 17, REP_NEWSITE */ + 19, /* 18, REP_PAGE */ + 20, /* 19, REP_PAGE_FAIL */ + 21, /* 20, REP_PAGE_MORE */ + 22, /* 21, REP_PAGE_REQ */ + 23, /* 22, REP_REREQUEST */ + /* 24, REP_START_SYNC doesn't exist */ + 25, /* 23, REP_UPDATE */ + 26, /* 24, REP_UPDATE_REQ */ + 27, /* 25, REP_VERIFY */ + 28, /* 26, REP_VERIFY_FAIL */ + 29, /* 27, REP_VERIFY_REQ */ + 30, /* 28, REP_VOTE1 */ + 31, /* 29, REP_VOTE2 */ + REP_INVALID, /* 30, 4.4/4.5 no message */ + REP_INVALID /* 31, 4.4/4.5 no message */ + }, + /* + * From 4.6 message number To 4.7 message number. There are + * NO message differences between 4.6 and 4.7. The + * control structure changed. + */ + { REP_INVALID, /* NO message 0 */ + 1, /* 1, REP_ALIVE */ + 2, /* 2, REP_ALIVE_REQ */ + 3, /* 3, REP_ALL_REQ */ + 4, /* 4, REP_BULK_LOG */ + 5, /* 5, REP_BULK_PAGE */ + 6, /* 6, REP_DUPMASTER */ + 7, /* 7, REP_FILE */ + 8, /* 8, REP_FILE_FAIL */ + 9, /* 9, REP_FILE_REQ */ + 10, /* 10, REP_LEASE_GRANT */ + 11, /* 11, REP_LOG */ + 12, /* 12, REP_LOG_MORE */ + 13, /* 13, REP_LOG_REQ */ + 14, /* 14, REP_MASTER_REQ */ + 15, /* 15, REP_NEWCLIENT */ + 16, /* 16, REP_NEWFILE */ + 17, /* 17, REP_NEWMASTER */ + 18, /* 18, REP_NEWSITE */ + 19, /* 19, REP_PAGE */ + 20, /* 20, REP_PAGE_FAIL */ + 21, /* 21, REP_PAGE_MORE */ + 22, /* 22, REP_PAGE_REQ */ + 23, /* 22, REP_REREQUEST */ + 24, /* 24, REP_START_SYNC */ + 25, /* 25, REP_UPDATE */ + 26, /* 26, REP_UPDATE_REQ */ + 27, /* 27, REP_VERIFY */ + 28, /* 28, REP_VERIFY_FAIL */ + 29, /* 29, REP_VERIFY_REQ */ + 30, /* 30, REP_VOTE1 */ + 31 /* 31, REP_VOTE2 */ + }, + /* + * From 4.7 message number To 5.2 message number. There are + * NO message differences between them. The vote1 contents + * changed. + */ + { REP_INVALID, /* NO message 0 */ + 1, /* 1, REP_ALIVE */ + 2, /* 2, REP_ALIVE_REQ */ + 3, /* 3, REP_ALL_REQ */ + 4, /* 4, REP_BULK_LOG */ + 5, /* 5, REP_BULK_PAGE */ + 6, /* 6, REP_DUPMASTER */ + 7, /* 7, REP_FILE */ + 8, /* 8, REP_FILE_FAIL */ + 9, /* 9, REP_FILE_REQ */ + 10, /* 10, REP_LEASE_GRANT */ + 11, /* 11, REP_LOG */ + 12, /* 12, REP_LOG_MORE */ + 13, /* 13, REP_LOG_REQ */ + 14, /* 14, REP_MASTER_REQ */ + 15, /* 15, REP_NEWCLIENT */ + 16, /* 16, REP_NEWFILE */ + 17, /* 17, REP_NEWMASTER */ + 18, /* 18, REP_NEWSITE */ + 19, /* 19, REP_PAGE */ + 20, /* 20, REP_PAGE_FAIL */ + 21, /* 21, REP_PAGE_MORE */ + 22, /* 22, REP_PAGE_REQ */ + 23, /* 22, REP_REREQUEST */ + 24, /* 24, REP_START_SYNC */ + 25, /* 25, REP_UPDATE */ + 26, /* 26, REP_UPDATE_REQ */ + 27, /* 27, REP_VERIFY */ + 28, /* 28, REP_VERIFY_FAIL */ + 29, /* 29, REP_VERIFY_REQ */ + 30, /* 30, REP_VOTE1 */ + 31 /* 31, REP_VOTE2 */ + } + }; + return (table[version][rectype]); +} + +/* + * __rep_print_system -- + * Optionally print a verbose message, including to the system file. + * + * PUBLIC: int __rep_print_system __P((ENV *, u_int32_t, const char *, ...)) + * PUBLIC: __attribute__ ((__format__ (__printf__, 3, 4))); + */ +int +#ifdef STDC_HEADERS +__rep_print_system(ENV *env, u_int32_t verbose, const char *fmt, ...) +#else +__rep_print_system(env, verbose, fmt, va_alist) + ENV *env; + u_int32_t verbose; + const char *fmt; + va_dcl +#endif +{ + va_list ap; + int ret; + +#ifdef STDC_HEADERS + va_start(ap, fmt); +#else + va_start(ap); +#endif + ret = __rep_print_int(env, verbose | DB_VERB_REP_SYSTEM, fmt, ap); + va_end(ap); + return (ret); +} + +/* + * __rep_print -- + * Optionally print a verbose message. + * + * PUBLIC: int __rep_print __P((ENV *, u_int32_t, const char *, ...)) + * PUBLIC: __attribute__ ((__format__ (__printf__, 3, 4))); + */ +int +#ifdef STDC_HEADERS +__rep_print(ENV *env, u_int32_t verbose, const char *fmt, ...) +#else +__rep_print(env, verbose, fmt, va_alist) + ENV *env; + u_int32_t verbose; + const char *fmt; + va_dcl +#endif +{ + va_list ap; + int ret; + +#ifdef STDC_HEADERS + va_start(ap, fmt); +#else + va_start(ap); +#endif + ret = __rep_print_int(env, verbose, fmt, ap); + va_end(ap); + return (ret); +} + +/* + * __rep_print_int -- + * Optionally print a verbose message. + * + * NOTE: + * One anomaly is that the messaging functions expect/use/require + * void functions. The use of a mutex in __rep_print_int requires + * a return value. + */ +static int +__rep_print_int(env, verbose, fmt, ap) + ENV *env; + u_int32_t verbose; + const char *fmt; + va_list ap; +{ + DB_MSGBUF mb; + REP *rep; + db_timespec ts; + pid_t pid; + db_threadid_t tid; + int diag_msg; + u_int32_t regular_msg, tmp_verbose; + const char *s; + char buf[DB_THREADID_STRLEN]; + + tmp_verbose = env->dbenv->verbose; + if (FLD_ISSET(tmp_verbose, verbose | DB_VERB_REPLICATION) == 0) + return (0); + DB_MSGBUF_INIT(&mb); + + diag_msg = 0; + if (REP_ON(env)) { + rep = env->rep_handle->region; + /* + * If system diag messages are configured and this message's + * verbose level includes DB_VERB_REP_SYSTEM, this is a diag + * message. This means it will be written to the diagnostic + * message files. + */ + diag_msg = FLD_ISSET(tmp_verbose, DB_VERB_REP_SYSTEM) && + FLD_ISSET(verbose, DB_VERB_REP_SYSTEM) && + !FLD_ISSET(rep->config, REP_C_INMEM); + } else + rep = NULL; + /* + * We need to know if this message should be printed out + * via the regular, user mechanism. + */ + FLD_CLR(tmp_verbose, DB_VERB_REP_SYSTEM); + regular_msg = FLD_ISSET(tmp_verbose, + verbose | DB_VERB_REPLICATION); + + /* + * It is possible we could be called before the env is finished + * getting set up and we want to skip that. + */ + if (diag_msg == 0 && regular_msg == 0) + return (0); + s = NULL; + if (env->dbenv->db_errpfx != NULL) + s = env->dbenv->db_errpfx; + else if (rep != NULL) { + if (F_ISSET(rep, REP_F_CLIENT)) + s = "CLIENT"; + else if (F_ISSET(rep, REP_F_MASTER)) + s = "MASTER"; + } + if (s == NULL) + s = "REP_UNDEF"; + __os_id(env->dbenv, &pid, &tid); + if (diag_msg) + MUTEX_LOCK(env, rep->mtx_diag); + __os_gettime(env, &ts, 1); + __db_msgadd(env, &mb, "[%lu:%lu][%s] %s: ", + (u_long)ts.tv_sec, (u_long)ts.tv_nsec/NS_PER_US, + env->dbenv->thread_id_string(env->dbenv, pid, tid, buf), s); + + __db_msgadd_ap(env, &mb, fmt, ap); + + DB_MSGBUF_REP_FLUSH(env, &mb, diag_msg, regular_msg); + if (diag_msg) + MUTEX_UNLOCK(env, rep->mtx_diag); + return (0); +} + +/* + * PUBLIC: void __rep_print_message + * PUBLIC: __P((ENV *, int, __rep_control_args *, char *, u_int32_t)); + */ +void +__rep_print_message(env, eid, rp, str, flags) + ENV *env; + int eid; + __rep_control_args *rp; + char *str; + u_int32_t flags; +{ + u_int32_t ctlflags, rectype, verbflag; + char ftype[64], *home, *type; + + rectype = rp->rectype; + ctlflags = rp->flags; + verbflag = DB_VERB_REP_MSGS | DB_VERB_REPLICATION; + if (rp->rep_version != DB_REPVERSION) + rectype = __rep_msg_from_old(rp->rep_version, rectype); + switch (rectype) { + case REP_ALIVE: + FLD_SET(verbflag, DB_VERB_REP_ELECT | DB_VERB_REP_MISC); + type = "alive"; + break; + case REP_ALIVE_REQ: + type = "alive_req"; + break; + case REP_ALL_REQ: + FLD_SET(verbflag, DB_VERB_REP_MISC); + type = "all_req"; + break; + case REP_BULK_LOG: + FLD_SET(verbflag, DB_VERB_REP_MISC); + type = "bulk_log"; + break; + case REP_BULK_PAGE: + FLD_SET(verbflag, DB_VERB_REP_SYNC); + type = "bulk_page"; + break; + case REP_DUPMASTER: + FLD_SET(verbflag, DB_VERB_REP_SYSTEM); + type = "dupmaster"; + break; + case REP_FILE: + type = "file"; + break; + case REP_FILE_FAIL: + type = "file_fail"; + break; + case REP_FILE_REQ: + type = "file_req"; + break; + case REP_LEASE_GRANT: + FLD_SET(verbflag, DB_VERB_REP_LEASE); + type = "lease_grant"; + break; + case REP_LOG: + FLD_SET(verbflag, DB_VERB_REP_MISC); + type = "log"; + break; + case REP_LOG_MORE: + FLD_SET(verbflag, DB_VERB_REP_MISC); + type = "log_more"; + break; + case REP_LOG_REQ: + FLD_SET(verbflag, DB_VERB_REP_MISC); + type = "log_req"; + break; + case REP_MASTER_REQ: + type = "master_req"; + break; + case REP_NEWCLIENT: + FLD_SET(verbflag, DB_VERB_REP_MISC | DB_VERB_REP_SYSTEM); + type = "newclient"; + break; + case REP_NEWFILE: + FLD_SET(verbflag, DB_VERB_REP_MISC); + type = "newfile"; + break; + case REP_NEWMASTER: + FLD_SET(verbflag, DB_VERB_REP_MISC | DB_VERB_REP_SYSTEM); + type = "newmaster"; + break; + case REP_NEWSITE: + type = "newsite"; + break; + case REP_PAGE: + FLD_SET(verbflag, DB_VERB_REP_SYNC); + type = "page"; + break; + case REP_PAGE_FAIL: + FLD_SET(verbflag, DB_VERB_REP_SYNC); + type = "page_fail"; + break; + case REP_PAGE_MORE: + FLD_SET(verbflag, DB_VERB_REP_SYNC); + type = "page_more"; + break; + case REP_PAGE_REQ: + FLD_SET(verbflag, DB_VERB_REP_SYNC); + type = "page_req"; + break; + case REP_REREQUEST: + type = "rerequest"; + break; + case REP_START_SYNC: + FLD_SET(verbflag, DB_VERB_REP_MISC); + type = "start_sync"; + break; + case REP_UPDATE: + FLD_SET(verbflag, DB_VERB_REP_SYNC | DB_VERB_REP_SYSTEM); + type = "update"; + break; + case REP_UPDATE_REQ: + FLD_SET(verbflag, DB_VERB_REP_SYNC | DB_VERB_REP_SYSTEM); + type = "update_req"; + break; + case REP_VERIFY: + FLD_SET(verbflag, DB_VERB_REP_SYNC | DB_VERB_REP_SYSTEM); + type = "verify"; + break; + case REP_VERIFY_FAIL: + FLD_SET(verbflag, DB_VERB_REP_SYNC | DB_VERB_REP_SYSTEM); + type = "verify_fail"; + break; + case REP_VERIFY_REQ: + FLD_SET(verbflag, DB_VERB_REP_SYNC | DB_VERB_REP_SYSTEM); + type = "verify_req"; + break; + case REP_VOTE1: + FLD_SET(verbflag, DB_VERB_REP_ELECT | DB_VERB_REP_SYSTEM); + type = "vote1"; + break; + case REP_VOTE2: + FLD_SET(verbflag, DB_VERB_REP_ELECT | DB_VERB_REP_SYSTEM); + type = "vote2"; + break; + default: + type = "NOTYPE"; + break; + } + + /* + * !!! + * If adding new flags to print out make sure the aggregate + * length cannot overflow the buffer. + */ + ftype[0] = '\0'; + if (LF_ISSET(DB_REP_ANYWHERE)) + (void)strcat(ftype, " any"); /* 4 */ + if (FLD_ISSET(ctlflags, REPCTL_FLUSH)) + (void)strcat(ftype, " flush"); /* 10 */ + /* + * We expect most of the time the messages will indicate + * group membership. Only print if we're not already + * part of a group. + */ + if (!FLD_ISSET(ctlflags, REPCTL_GROUP_ESTD)) + (void)strcat(ftype, " nogroup"); /* 18 */ + if (FLD_ISSET(ctlflags, REPCTL_LEASE)) + (void)strcat(ftype, " lease"); /* 24 */ + if (LF_ISSET(DB_REP_NOBUFFER)) + (void)strcat(ftype, " nobuf"); /* 30 */ + if (FLD_ISSET(ctlflags, REPCTL_PERM)) + (void)strcat(ftype, " perm"); /* 35 */ + if (LF_ISSET(DB_REP_REREQUEST)) + (void)strcat(ftype, " rereq"); /* 41 */ + if (FLD_ISSET(ctlflags, REPCTL_RESEND)) + (void)strcat(ftype, " resend"); /* 48 */ + if (FLD_ISSET(ctlflags, REPCTL_LOG_END)) + (void)strcat(ftype, " logend"); /* 55 */ + + /* + * !!! + * We selectively turned on bits using different verbose settings + * that relate to each message type. Therefore, since the + * DB_VERB_REP_SYSTEM flag is explicitly set above when wanted, + * we *must* use the VPRINT macro here. It will correctly + * handle the messages whether or not the SYSTEM flag is set. + */ + if ((home = env->db_home) == NULL) + home = "NULL"; + VPRINT(env, (env, verbflag, + "%s %s: msgv = %lu logv %lu gen = %lu eid %d, type %s, LSN [%lu][%lu] %s", + home, str, + (u_long)rp->rep_version, (u_long)rp->log_version, (u_long)rp->gen, + eid, type, (u_long)rp->lsn.file, (u_long)rp->lsn.offset, ftype)); + /* + * Make sure the version is close, and not swapped + * here. Check for current version, +/- a little bit. + */ + DB_ASSERT(env, rp->rep_version <= DB_REPVERSION+10); + DB_ASSERT(env, rp->log_version <= DB_LOGVERSION+10); +} + +/* + * PUBLIC: void __rep_fire_event __P((ENV *, u_int32_t, void *)); + */ +void +__rep_fire_event(env, event, info) + ENV *env; + u_int32_t event; + void *info; +{ + int ret; + + /* + * Give repmgr first crack at handling all replication-related events. + * If it can't (or chooses not to) handle the event fully, then pass it + * along to the application. + */ + ret = __repmgr_handle_event(env, event, info); + DB_ASSERT(env, ret == 0 || ret == DB_EVENT_NOT_HANDLED); + + if (ret == DB_EVENT_NOT_HANDLED) + DB_EVENT(env, event, info); +} + +/* + * __rep_msg -- + * Rep system diagnostic messaging routine. + * This function is called from the __db_msg subsystem to + * write out diagnostic messages to replication-owned files. + * + * PUBLIC: void __rep_msg __P((const ENV *, const char *)); + */ +void +__rep_msg(env, msg) + const ENV *env; + const char *msg; +{ + DB_FH *fhp; + DB_REP *db_rep; + REP *rep; + int i; + size_t cnt, nlcnt; + char nl = '\n'; + + if (PANIC_ISSET(env)) + return; + db_rep = env->rep_handle; + rep = db_rep->region; + DB_ASSERT((ENV *)env, !FLD_ISSET(rep->config, REP_C_INMEM)); + /* + * We know the only way we get here is with the mutex locked. So + * we can read, modify and change all the diag related fields. + */ + i = rep->diag_index; + fhp = db_rep->diagfile[i]; + + if (db_rep->diag_off != rep->diag_off) + (void)__os_seek((ENV *)env, fhp, 0, 0, rep->diag_off); + if (__os_write((ENV *)env, fhp, (void *)msg, strlen(msg), &cnt) != 0) + return; + if (__os_write((ENV *)env, fhp, &nl, 1, &nlcnt) != 0) + return; + db_rep->diag_off = rep->diag_off += (cnt + nlcnt); + /* + * If writing this message put us over the file size threshold, + * then we reset to the next file. We don't care if it is + * exactly at the size, some amount over the file size is fine. + */ + if (rep->diag_off >= REP_DIAGSIZE) { + rep->diag_index = (++i % DBREP_DIAG_FILES); + rep->diag_off = 0; + } + return; +} + +/* + * PUBLIC: int __rep_notify_threads __P((ENV *, rep_waitreason_t)); + * + * Caller must hold rep region mutex. In the AWAIT_LSN case, caller must also + * hold mtx_clientdb. + */ +int +__rep_notify_threads(env, wake_reason) + ENV *env; + rep_waitreason_t wake_reason; +{ + REP *rep; + struct __rep_waiter *waiter; + struct rep_waitgoal *goal; + int ret, wake; + + ret = 0; + rep = env->rep_handle->region; + + SH_TAILQ_FOREACH(waiter, &rep->waiters, links, __rep_waiter) { + goal = &waiter->goal; + wake = 0; + if (wake_reason == LOCKOUT) { + F_SET(waiter, REP_F_PENDING_LOCKOUT); + wake = 1; + } else if (wake_reason == goal->why || + (goal->why == AWAIT_HISTORY && wake_reason == AWAIT_LSN)) { + /* + * It's important that we only call __rep_check_goal + * with "goals" that match the wake_reason passed to us + * (modulo the LSN-to-HISTORY equivalence), because the + * caller has ensured that it is holding the appropriate + * mutexes depending on the wake_reason. + */ + if ((ret = __rep_check_goal(env, goal)) == 0) + wake = 1; + else if (ret == DB_TIMEOUT) + ret = 0; + else + goto out; + } + + if (wake) { + MUTEX_UNLOCK(env, waiter->mtx_repwait); + SH_TAILQ_REMOVE(&rep->waiters, + waiter, links, __rep_waiter); + F_SET(waiter, REP_F_WOKEN); + } + } + +out: + return (ret); +} + +/* + * A "wait goal" describes a condition that a thread may be waiting for. + * Evaluate the condition, returning 0 if the condition has been satisfied, and + * DB_TIMEOUT if not. + * + * Caller must hold REP_SYSTEM lock and/or mtx_clientdb as appropriate. + * + * PUBLIC: int __rep_check_goal __P((ENV *, struct rep_waitgoal *)); + */ +int +__rep_check_goal(env, goal) + ENV *env; + struct rep_waitgoal *goal; +{ + REP *rep; + LOG *lp; + int ret; + + rep = env->rep_handle->region; + lp = env->lg_handle->reginfo.primary; + ret = DB_TIMEOUT; /* Pessimistic, to start. */ + + /* + * Note that while AWAIT_LSN and AWAIT_HISTORY look similar, they are + * actually quite different. With AWAIT_LSN, the u.lsn is the LSN of + * the commit of the transaction the caller is waiting for. So we need + * to make sure we have gotten at least that far, thus ">=". + * + * For AWAIT_HISTORY, the u.lsn is simply a copy of whatever the current + * max_perm_lsn was at the time we last checked. So anything if we have + * anything *beyond* that then we should wake up again and check to see + * if we now have the desired history (thus ">"). Thus when we're + * waiting for HISTORY we're going to get woken *at every commit we + * receive*! Fortunately it should be coming as the first transaction + * after the gen change, and waiting for HISTORY should be extremely + * rare anyway. + */ + switch (goal->why) { + case AWAIT_LSN: + /* Have we reached our goal LSN? */ + if (LOG_COMPARE(&lp->max_perm_lsn, &goal->u.lsn) >= 0) + ret = 0; + break; + case AWAIT_HISTORY: + /* + * Have we made any progress whatsoever, beyond where we were at + * the time the waiting thread noted the current LSN? + * When we have to wait for replication of the LSN history + * database, we don't know what LSN it's going to occur at. So + * we have to wake up every time we get a new transaction. + * Fortunately, this should be exceedingly rare, and the number + * of transactions we have to plow through should almost never + * be more than 1. + */ + if (LOG_COMPARE(&lp->max_perm_lsn, &goal->u.lsn) > 0) + ret = 0; + break; + case AWAIT_GEN: + if (rep->gen >= goal->u.gen) + ret = 0; + break; + case AWAIT_NIMDB: + if (F_ISSET(rep, REP_F_NIMDBS_LOADED)) + ret = 0; + break; + default: + DB_ASSERT(env, 0); + } + return (ret); +} + +/* + * __rep_log_backup -- + * + * Walk backwards in the log looking for specific kinds of records. + * + * PUBLIC: int __rep_log_backup __P((ENV *, DB_LOGC *, DB_LSN *, u_int32_t)); + */ +int +__rep_log_backup(env, logc, lsn, match) + ENV *env; + DB_LOGC *logc; + DB_LSN *lsn; + u_int32_t match; +{ + DBT mylog; + u_int32_t rectype; + int ret; + + ret = 0; + memset(&mylog, 0, sizeof(mylog)); + while ((ret = __logc_get(logc, lsn, &mylog, DB_PREV)) == 0) { + LOGCOPY_32(env, &rectype, mylog.data); + /* + * Check the record type against the desired match type(s). + */ + if ((match == REP_REC_COMMIT && + rectype == DB___txn_regop) || + (match == REP_REC_PERM && + (rectype == DB___txn_ckp || rectype == DB___txn_regop))) + break; + } + return (ret); +} + +/* + * __rep_get_maxpermlsn -- + * + * Safely retrieve the current max_perm_lsn value. + * + * PUBLIC: int __rep_get_maxpermlsn __P((ENV *, DB_LSN *)); + */ +int +__rep_get_maxpermlsn(env, max_perm_lsnp) + ENV *env; + DB_LSN *max_perm_lsnp; +{ + DB_LOG *dblp; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + LOG *lp; + REP *rep; + + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + ENV_ENTER(env, ip); + MUTEX_LOCK(env, rep->mtx_clientdb); + *max_perm_lsnp = lp->max_perm_lsn; + MUTEX_UNLOCK(env, rep->mtx_clientdb); + ENV_LEAVE(env, ip); + return (0); +} + +/* + * __rep_is_internal_rep_file -- + * + * Return 1 if filename is an internal replication file; 0 otherwise. + * Works for all internal replication files including internal database + * files. + * + * PUBLIC: int __rep_is_internal_rep_file __P((char *)); + */ +int +__rep_is_internal_rep_file(filename) + char *filename; +{ + return (strncmp(filename, + REPFILEPREFIX, sizeof(REPFILEPREFIX) - 1) == 0 ? 1 : 0); +} + +/* + * Get the last generation number from the LSN history database. + * + * PUBLIC: int __rep_get_datagen __P((ENV *, u_int32_t *)); + */ +int +__rep_get_datagen(env, data_genp) + ENV *env; + u_int32_t *data_genp; +{ + DB_REP *db_rep; + DB_TXN *txn; + DB *dbp; + DBC *dbc; + __rep_lsn_hist_key_args key; + u_int8_t key_buf[__REP_LSN_HIST_KEY_SIZE]; + u_int8_t data_buf[__REP_LSN_HIST_DATA_SIZE]; + DBT key_dbt, data_dbt; + u_int32_t flags; + int ret, t_ret, tries; + + db_rep = env->rep_handle; + ret = 0; + *data_genp = 0; + tries = 0; + flags = DB_LAST; +retry: + if ((ret = __txn_begin(env, NULL, NULL, &txn, DB_IGNORE_LEASE)) != 0) + return (ret); + + if ((dbp = db_rep->lsn_db) == NULL) { + if ((ret = __rep_open_sysdb(env, + NULL, txn, REPLSNHIST, 0, &dbp)) != 0) { + /* + * If the database isn't there, it could be because it's + * memory-resident, and we haven't yet sync'ed with the + * master to materialize it. It could be that this is + * a brand new environment. We have a 0 datagen. + * That is not an error. + */ + ret = 0; + goto out; + } + db_rep->lsn_db = dbp; + } + + if ((ret = __db_cursor(dbp, NULL, txn, &dbc, 0)) != 0) + goto out; + + DB_INIT_DBT(key_dbt, key_buf, __REP_LSN_HIST_KEY_SIZE); + key_dbt.ulen = __REP_LSN_HIST_KEY_SIZE; + F_SET(&key_dbt, DB_DBT_USERMEM); + + memset(&data_dbt, 0, sizeof(data_dbt)); + data_dbt.data = data_buf; + data_dbt.ulen = __REP_LSN_HIST_DATA_SIZE; + F_SET(&data_dbt, DB_DBT_USERMEM); + if ((ret = __dbc_get(dbc, &key_dbt, &data_dbt, flags)) != 0) { + if ((ret == DB_LOCK_DEADLOCK || ret == DB_LOCK_NOTGRANTED) && + ++tries < 5) /* Limit of 5 is an arbitrary choice. */ + ret = 0; + if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __txn_abort(txn)) != 0 && ret == 0) + ret = t_ret; + /* + * If we have any kind of error at this point, bail. + * Otherwise pause and try again. + */ + if (ret != 0) + goto err; + __os_yield(env, 0, 10000); /* Arbitrary duration. */ + goto retry; + } + if ((ret = __dbc_close(dbc)) == 0 && + (ret = __rep_lsn_hist_key_unmarshal(env, + &key, key_buf, __REP_LSN_HIST_KEY_SIZE, NULL)) == 0) + *data_genp = key.gen; +out: + if ((t_ret = __txn_commit(txn, DB_TXN_NOSYNC)) != 0 && ret == 0) + ret = t_ret; +err: + return (ret); +} diff --git a/src/rep/rep_verify.c b/src/rep/rep_verify.c new file mode 100644 index 00000000..a66eb1fd --- /dev/null +++ b/src/rep/rep_verify.c @@ -0,0 +1,751 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" + +static int __rep_internal_init __P((ENV *, u_int32_t)); + +/* + * __rep_verify -- + * Handle a REP_VERIFY message. + * + * PUBLIC: int __rep_verify __P((ENV *, __rep_control_args *, DBT *, + * PUBLIC: int, time_t)); + */ +int +__rep_verify(env, rp, rec, eid, savetime) + ENV *env; + __rep_control_args *rp; + DBT *rec; + int eid; + time_t savetime; +{ + DBT mylog; + DB_LOG *dblp; + DB_LOGC *logc; + DB_LSN lsn, prev_ckp; + DB_REP *db_rep; + LOG *lp; + REP *rep; + __txn_ckp_args *ckp_args; + u_int32_t logflag, rectype; + int master, match, ret, t_ret; + + ret = 0; + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + /* Do nothing if VERIFY is not set. */ + if (rep->sync_state != SYNC_VERIFY) + return (ret); + +#ifdef DIAGNOSTIC + /* + * We should not ever be in internal init with a lease granted. + */ + if (IS_USING_LEASES(env)) { + REP_SYSTEM_LOCK(env); + DB_ASSERT(env, __rep_islease_granted(env) == 0); + REP_SYSTEM_UNLOCK(env); + } +#endif + + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + memset(&mylog, 0, sizeof(mylog)); + /* If verify_lsn of ZERO is passed in, get last log. */ + MUTEX_LOCK(env, rep->mtx_clientdb); + logflag = IS_ZERO_LSN(lp->verify_lsn) ? DB_LAST : DB_SET; + prev_ckp = lp->prev_ckp; + MUTEX_UNLOCK(env, rep->mtx_clientdb); + if ((ret = __logc_get(logc, &rp->lsn, &mylog, logflag)) != 0) + goto out; + match = 0; + if (mylog.size == rec->size && + memcmp(mylog.data, rec->data, rec->size) == 0) + match = 1; + /* + * If we don't have a match, backup to the previous + * identification record and try again. + */ + if (match == 0) { + master = rep->master_id; + /* + * We will eventually roll back over this log record (unless we + * ultimately have to give up and do an internal init). So, if + * it was a checkpoint, make sure we don't end up without any + * checkpoints left in the entire log. + */ + LOGCOPY_32(env, &rectype, mylog.data); + DB_ASSERT(env, ret == 0); + if (!lp->db_log_inmemory && rectype == DB___txn_ckp) { + if ((ret = __txn_ckp_read(env, + mylog.data, &ckp_args)) != 0) + goto out; + lsn = ckp_args->last_ckp; + __os_free(env, ckp_args); + MUTEX_LOCK(env, rep->mtx_clientdb); + lp->prev_ckp = lsn; + MUTEX_UNLOCK(env, rep->mtx_clientdb); + if (IS_ZERO_LSN(lsn)) { + /* + * No previous checkpoints? The only way this + * is OK is if we have the entire log, all the + * way back to file #1. + */ + if ((ret = __logc_get(logc, + &lsn, &mylog, DB_FIRST)) != 0) + goto out; + if (lsn.file != 1) { + ret = __rep_internal_init(env, 0); + goto out; + } + + /* Restore position of log cursor. */ + if ((ret = __logc_get(logc, + &rp->lsn, &mylog, DB_SET)) != 0) + goto out; + } + } + if ((ret = __rep_log_backup(env, logc, &lsn, + REP_REC_PERM)) == 0) { + MUTEX_LOCK(env, rep->mtx_clientdb); + lp->verify_lsn = lsn; + __os_gettime(env, &lp->rcvd_ts, 1); + lp->wait_ts = rep->request_gap; + MUTEX_UNLOCK(env, rep->mtx_clientdb); + if (master != DB_EID_INVALID) + eid = master; + (void)__rep_send_message(env, eid, REP_VERIFY_REQ, + &lsn, NULL, 0, DB_REP_ANYWHERE); + } else if (ret == DB_NOTFOUND) { + /* + * We've either run out of records because + * logs have been removed or we've rolled back + * all the way to the beginning. + */ + ret = __rep_internal_init(env, 0); + } + } else { + /* + * We have a match, so we can probably do a simple sync, without + * needing internal init. But first, check for a couple of + * special cases. + */ + + if (!lp->db_log_inmemory && !IS_ZERO_LSN(prev_ckp)) { + /* + * We previously saw a checkpoint, which means we may + * now be about to roll back over it and lose it. Make + * sure we'll end up still having at least one other + * checkpoint. (Note that if the current record -- the + * one we've just matched -- happens to be a checkpoint, + * then it must be the same as the prev_ckp we're now + * about to try reading. Which means we wouldn't really + * have to read it. But checking for that special case + * doesn't seem worth the trouble.) + */ + if ((ret = __logc_get(logc, + &prev_ckp, &mylog, DB_SET)) != 0) { + if (ret == DB_NOTFOUND) + ret = __rep_internal_init(env, 0); + goto out; + } + /* + * We succeeded reading for the prev_ckp, so it's safe + * to fall through to the verify_match. + */ + } + /* + * Mixed version internal init doesn't work with 4.4, so we + * can't load NIMDBs from a very old-version master. So, fib to + * ourselves that they're already loaded, so that we don't try. + */ + if (rep->version == DB_REPVERSION_44) { + REP_SYSTEM_LOCK(env); + F_SET(rep, REP_F_NIMDBS_LOADED); + REP_SYSTEM_UNLOCK(env); + } + if (F_ISSET(rep, REP_F_NIMDBS_LOADED)) + ret = __rep_verify_match(env, &rp->lsn, savetime); + else { + /* + * Even though we found a match, we haven't yet loaded + * any NIMDBs, so we have to do an abbreviated internal + * init. We leave lp->verify_lsn set to the matching + * sync point, in case upon eventual examination of the + * UPDATE message it turns out there are no NIMDBs + * (since we can then skip back to a verify_match + * outcome). + */ + ret = __rep_internal_init(env, REP_F_ABBREVIATED); + } + } + +out: if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +static int +__rep_internal_init(env, abbrev) + ENV *env; + u_int32_t abbrev; +{ + REP *rep; + int master, ret; + + rep = env->rep_handle->region; + REP_SYSTEM_LOCK(env); +#ifdef HAVE_STATISTICS + if (!abbrev) + rep->stat.st_outdated++; +#endif + + /* + * What we call "abbreviated internal init" is really just NIMDB + * materialization, and we always do that even if AUTOINIT has been + * turned off. + */ + if (!FLD_ISSET(rep->config, REP_C_AUTOINIT) && !abbrev) + ret = DB_REP_JOIN_FAILURE; + else { + rep->sync_state = SYNC_UPDATE; + if (abbrev) { + RPRINT(env, (env, DB_VERB_REP_SYNC, + "send UPDATE_REQ, merely to check for NIMDB refresh")); + F_SET(rep, REP_F_ABBREVIATED); + } else + F_CLR(rep, REP_F_ABBREVIATED); + ZERO_LSN(rep->first_lsn); + ZERO_LSN(rep->ckp_lsn); + ret = 0; + } + master = rep->master_id; + REP_SYSTEM_UNLOCK(env); + if (ret == 0 && master != DB_EID_INVALID) + (void)__rep_send_message(env, + master, REP_UPDATE_REQ, NULL, NULL, 0, 0); + return (ret); +} + +/* + * __rep_verify_fail -- + * Handle a REP_VERIFY_FAIL message. + * + * PUBLIC: int __rep_verify_fail __P((ENV *, __rep_control_args *)); + */ +int +__rep_verify_fail(env, rp) + ENV *env; + __rep_control_args *rp; +{ + DB_LOG *dblp; + DB_REP *db_rep; + LOG *lp; + REP *rep; + int clnt_lock_held, lockout, master, ret; + + clnt_lock_held = lockout = 0; + ret = 0; + db_rep = env->rep_handle; + rep = db_rep->region; + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + + /* + * If we are already in the middle of updating (PAGE or UPDATE state), + * then we ignore this message. + */ + if (rep->sync_state == SYNC_PAGE || rep->sync_state == SYNC_UPDATE) + return (0); + REP_SYSTEM_LOCK(env); + /* + * We should not ever be in internal init with a lease granted. + */ + DB_ASSERT(env, + !IS_USING_LEASES(env) || __rep_islease_granted(env) == 0); + + /* + * Clean up old internal init in progress if: + * REP_C_AUTOINIT is configured and + * we are recovering LOG and this LSN is in the range we need. + */ + if (rep->sync_state == SYNC_LOG && + LOG_COMPARE(&rep->first_lsn, &rp->lsn) <= 0 && + LOG_COMPARE(&rep->last_lsn, &rp->lsn) >= 0) { + /* + * Already locking out messages, give up. + */ + if (FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_MSG)) + goto unlock; + + /* + * Lock out other messages to prevent race conditions. + */ + if ((ret = __rep_lockout_msg(env, rep, 1)) != 0) + goto unlock; + lockout = 1; + + /* + * Clean up internal init if one was in progress. + */ + if (ISSET_LOCKOUT_BDB(rep)) { + RPRINT(env, (env, DB_VERB_REP_SYNC, + "VERIFY_FAIL is cleaning up old internal init for missing log")); + if ((ret = + __rep_init_cleanup(env, rep, DB_FORCE)) != 0) { + RPRINT(env, (env, DB_VERB_REP_SYNC, + "VERIFY_FAIL error cleaning up internal init for missing log: %d", ret)); + goto msglck; + } + CLR_RECOVERY_SETTINGS(rep); + } + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_MSG); + lockout = 0; + } + + REP_SYSTEM_UNLOCK(env); + MUTEX_LOCK(env, rep->mtx_clientdb); + clnt_lock_held = 1; + REP_SYSTEM_LOCK(env); + /* + * Commence an internal init if: + * We are in VERIFY state and the failing LSN is the one we + * were verifying or + * we're recovering LOG and this LSN is in the range we need or + * we are in normal state (no recovery flags set) and + * the failing LSN is the one we're ready for. + * + * We don't want an old or delayed VERIFY_FAIL message to throw us + * into internal initialization when we shouldn't be. + */ + if ((rep->sync_state == SYNC_VERIFY && + LOG_COMPARE(&rp->lsn, &lp->verify_lsn) == 0) || + (rep->sync_state == SYNC_LOG && + LOG_COMPARE(&rep->first_lsn, &rp->lsn) <= 0 && + LOG_COMPARE(&rep->last_lsn, &rp->lsn) >= 0) || + (rep->sync_state == SYNC_OFF && + LOG_COMPARE(&rp->lsn, &lp->ready_lsn) >= 0)) { + /* + * Update stats. + */ + STAT(rep->stat.st_outdated++); + + /* + * If REP_C_AUTOINIT is turned off, return + * DB_REP_JOIN_FAILURE instead of doing internal init. + */ + if (!FLD_ISSET(rep->config, REP_C_AUTOINIT)) { + ret = DB_REP_JOIN_FAILURE; + goto unlock; + } + + /* + * Do the internal init. + */ + rep->sync_state = SYNC_UPDATE; + ZERO_LSN(rep->first_lsn); + ZERO_LSN(rep->ckp_lsn); + lp->wait_ts = rep->request_gap; + master = rep->master_id; + REP_SYSTEM_UNLOCK(env); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + if (master != DB_EID_INVALID) + (void)__rep_send_message(env, + master, REP_UPDATE_REQ, NULL, NULL, 0, 0); + } else { + /* + * Otherwise ignore this message. + */ +msglck: if (lockout) + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_MSG); +unlock: REP_SYSTEM_UNLOCK(env); + if (clnt_lock_held) + MUTEX_UNLOCK(env, rep->mtx_clientdb); + } + return (ret); +} + +/* + * __rep_verify_req -- + * Handle a REP_VERIFY_REQ message. + * + * PUBLIC: int __rep_verify_req __P((ENV *, __rep_control_args *, int)); + */ +int +__rep_verify_req(env, rp, eid) + ENV *env; + __rep_control_args *rp; + int eid; +{ + DBT *d, data_dbt; + DB_LOGC *logc; + DB_REP *db_rep; + REP *rep; + u_int32_t type; + int old, ret; + + ret = 0; + db_rep = env->rep_handle; + rep = db_rep->region; + + type = REP_VERIFY; + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + d = &data_dbt; + memset(d, 0, sizeof(data_dbt)); + F_SET(logc, DB_LOG_SILENT_ERR); + ret = __logc_get(logc, &rp->lsn, d, DB_SET); + /* + * If the LSN was invalid, then we might get a DB_NOTFOUND + * we might get an EIO, we could get anything. + * If we get a DB_NOTFOUND, then there is a chance that + * the LSN comes before the first file present in which + * case we need to return a fail so that the client can + * perform an internal init or return a REP_JOIN_FAILURE. + * + * If we're a client servicing this request and we get a + * NOTFOUND, return it so the caller can rerequest from + * a better source. + */ + if (ret == DB_NOTFOUND) { + if (F_ISSET(rep, REP_F_CLIENT)) { + (void)__logc_close(logc); + return (DB_NOTFOUND); + } + if (__log_is_outdated(env, rp->lsn.file, &old) == 0 && + old != 0) + type = REP_VERIFY_FAIL; + } + + if (ret != 0) + d = NULL; + + (void)__rep_send_message(env, eid, type, &rp->lsn, d, 0, 0); + return (__logc_close(logc)); +} + +/* + * PUBLIC: int __rep_dorecovery __P((ENV *, DB_LSN *, DB_LSN *)); + */ +int +__rep_dorecovery(env, lsnp, trunclsnp) + ENV *env; + DB_LSN *lsnp, *trunclsnp; +{ + DBT mylog; + DB_LOGC *logc; + DB_LSN last_ckp, lsn; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + REP *rep; + int ret, rollback, skip_rec, t_ret, update; + u_int32_t rectype, opcode; + __txn_regop_args *txnrec; + __txn_regop_42_args *txn42rec; + + db_rep = env->rep_handle; + rep = db_rep->region; + ENV_GET_THREAD_INFO(env, ip); + + /* Figure out if we are backing out any committed transactions. */ + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + + memset(&mylog, 0, sizeof(mylog)); + if (rep->sync_state == SYNC_LOG) { + /* + * Internal init can never skip recovery. + * Internal init must always update the timestamp and + * force dead handles. + */ + skip_rec = 0; + update = 1; + } else { + skip_rec = 1; + update = 0; + } + rollback = 0; + while (update == 0 && + (ret = __logc_get(logc, &lsn, &mylog, DB_PREV)) == 0 && + LOG_COMPARE(&lsn, lsnp) > 0) { + LOGCOPY_32(env, &rectype, mylog.data); + /* + * Find out if we can skip recovery completely. If we + * are backing up over any record a client usually + * cares about, we must run recovery. + * + * Skipping sync-up recovery can be pretty scary! + * Here's why we can do it: + * If a master downgraded to client and is now running + * sync-up to a new master, that old master must have + * waited for any outstanding txns to resolve before + * becoming a client. Also we are in lockout so there + * can be no other operations right now. + * + * If the client wrote a commit record to the log, but + * was descheduled before processing the txn, and then + * a new master was found, we must've let the txn get + * processed because right now we are the only message + * thread allowed to be running. + */ + DB_ASSERT(env, rep->op_cnt == 0); + DB_ASSERT(env, rep->msg_th == 1); + if (rectype == DB___txn_regop || rectype == DB___txn_ckp || + rectype == DB___dbreg_register) + skip_rec = 0; + if (rectype == DB___txn_regop) { + if (rep->version >= DB_REPVERSION_44) { + if ((ret = __txn_regop_read( + env, mylog.data, &txnrec)) != 0) + goto err; + opcode = txnrec->opcode; + __os_free(env, txnrec); + } else { + if ((ret = __txn_regop_42_read( + env, mylog.data, &txn42rec)) != 0) + goto err; + opcode = txn42rec->opcode; + __os_free(env, txn42rec); + } + if (opcode != TXN_ABORT) { + rollback = 1; + update = 1; + } + } + } + /* + * Handle if the logc_get fails. + */ + if (ret != 0) + goto err; + + /* + * If we successfully run recovery, we've opened all the necessary + * files. We are guaranteed to be single-threaded here, so no mutex + * is necessary. + */ + if (skip_rec) { + if ((ret = __log_get_stable_lsn(env, &last_ckp, 0)) != 0) { + if (ret != DB_NOTFOUND) + goto err; + ZERO_LSN(last_ckp); + } + RPRINT(env, (env, DB_VERB_REP_SYNC, + "Skip sync-up rec. Truncate log to [%lu][%lu], ckp [%lu][%lu]", + (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)last_ckp.file, (u_long)last_ckp.offset)); + ret = __log_vtruncate(env, lsnp, &last_ckp, trunclsnp); + } else { + if (rollback && !FLD_ISSET(rep->config, REP_C_AUTOROLLBACK)) { + ret = DB_REP_WOULDROLLBACK; + goto err; + } + ret = __db_apprec(env, ip, lsnp, trunclsnp, update, 0); + } + + if (ret != 0) + goto err; + F_SET(db_rep, DBREP_OPENFILES); + + /* + * If we've just updated the env handle timestamp, then we would get + * HANDLE_DEAD next time we tried to use our LSN history database. So, + * close it here now, to save ourselves the trouble of worrying about it + * later. + */ + if (update && db_rep->lsn_db != NULL) { + ret = __db_close(db_rep->lsn_db, NULL, DB_NOSYNC); + db_rep->lsn_db = NULL; + } + +err: if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __rep_verify_match -- + * We have just received a matching log record during verification. + * Figure out if we're going to need to run recovery. If so, wait until + * everything else has exited the library. If not, set up the world + * correctly and move forward. + * + * PUBLIC: int __rep_verify_match __P((ENV *, DB_LSN *, time_t)); + */ +int +__rep_verify_match(env, reclsnp, savetime) + ENV *env; + DB_LSN *reclsnp; + time_t savetime; +{ + DB_LOG *dblp; + DB_LSN trunclsn; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + LOG *lp; + REGENV *renv; + REGINFO *infop; + REP *rep; + int done, event, master, ret; + u_int32_t unused; + + dblp = env->lg_handle; + db_rep = env->rep_handle; + rep = db_rep->region; + lp = dblp->reginfo.primary; + ret = 0; + event = 0; + infop = env->reginfo; + renv = infop->primary; + ENV_GET_THREAD_INFO(env, ip); + + /* + * Check if the savetime is different than our current time stamp. + * If it is, then we're racing with another thread trying to recover + * and we lost. We must give up. + */ + MUTEX_LOCK(env, rep->mtx_clientdb); + done = savetime != renv->rep_timestamp; + if (done) { + MUTEX_UNLOCK(env, rep->mtx_clientdb); + return (0); + } + ZERO_LSN(lp->verify_lsn); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + + /* + * Make sure the world hasn't changed while we tried to get + * the lock. If it hasn't then it's time for us to kick all + * operations out of DB and run recovery. + */ + REP_SYSTEM_LOCK(env); + if (FLD_ISSET(rep->lockout_flags, REP_LOCKOUT_MSG) || + (rep->sync_state != SYNC_LOG && + ISSET_LOCKOUT_BDB(rep))) { + /* + * We lost. The world changed and we should do nothing. + */ + STAT(rep->stat.st_msgs_recover++); + goto errunlock; + } + + /* + * Lockout all message threads but ourselves. + */ + if ((ret = __rep_lockout_msg(env, rep, 1)) != 0) + goto errunlock; + + /* + * Lockout the API and wait for operations to complete. + */ + if ((ret = __rep_lockout_api(env, rep)) != 0) + goto errunlock; + + /* OK, everyone is out, we can now run recovery. */ + REP_SYSTEM_UNLOCK(env); + + if ((ret = __rep_dorecovery(env, reclsnp, &trunclsn)) != 0 || + (ret = __rep_remove_init_file(env)) != 0) { + REP_SYSTEM_LOCK(env); + FLD_CLR(rep->lockout_flags, + REP_LOCKOUT_API | REP_LOCKOUT_MSG | REP_LOCKOUT_OP); + goto errunlock; + } + + /* + * The log has been truncated (either directly by us or by __db_apprec) + * We want to make sure we're waiting for the LSN at the new end-of-log, + * not some later point. + */ + MUTEX_LOCK(env, rep->mtx_clientdb); + lp->ready_lsn = trunclsn; + ZERO_LSN(lp->waiting_lsn); + ZERO_LSN(lp->max_wait_lsn); + lp->max_perm_lsn = *reclsnp; + lp->wait_ts = rep->request_gap; + __os_gettime(env, &lp->rcvd_ts, 1); + ZERO_LSN(lp->verify_lsn); + ZERO_LSN(lp->prev_ckp); + + /* + * Discard any log records we have queued; we're about to re-request + * them, and can't trust the ones in the queue. We need to set the + * DB_AM_RECOVER bit in this handle, so that the operation doesn't + * deadlock. + */ + if (db_rep->rep_db == NULL && + (ret = __rep_client_dbinit(env, 0, REP_DB)) != 0) { + MUTEX_UNLOCK(env, rep->mtx_clientdb); + goto out; + } + + F_SET(db_rep->rep_db, DB_AM_RECOVER); + MUTEX_UNLOCK(env, rep->mtx_clientdb); + ret = __db_truncate(db_rep->rep_db, ip, NULL, &unused); + MUTEX_LOCK(env, rep->mtx_clientdb); + F_CLR(db_rep->rep_db, DB_AM_RECOVER); + + REP_SYSTEM_LOCK(env); + STAT(rep->stat.st_log_queued = 0); + if (IN_INTERNAL_INIT(rep)) + event = 1; + CLR_RECOVERY_SETTINGS(rep); + FLD_CLR(rep->lockout_flags, REP_LOCKOUT_ARCHIVE | REP_LOCKOUT_MSG); + if (ret != 0) + goto errunlock2; + + /* + * If the master_id is invalid, this means that since + * the last record was sent, something happened to the + * master and we may not have a master to request + * things of. + * + * This is not an error; when we find a new master, + * we'll re-negotiate where the end of the log is and + * try to bring ourselves up to date again anyway. + */ + master = rep->master_id; + REP_SYSTEM_UNLOCK(env); + if (master == DB_EID_INVALID) { + MUTEX_UNLOCK(env, rep->mtx_clientdb); + ret = 0; + } else { + /* + * We're making an ALL_REQ. But now that we've + * cleared the flags, we're likely receiving new + * log records from the master, resulting in a gap + * immediately. So to avoid multiple data streams, + * set the wait_ts value high now to give the master + * a chance to start sending us these records before + * the gap code re-requests the same gap. Wait_recs + * will get reset once we start receiving these + * records. + */ + lp->wait_ts = rep->max_gap; + MUTEX_UNLOCK(env, rep->mtx_clientdb); + (void)__rep_send_message(env, + master, REP_ALL_REQ, reclsnp, NULL, 0, DB_REP_ANYWHERE); + } + if (event) + __rep_fire_event(env, DB_EVENT_REP_INIT_DONE, NULL); + if (0) { +errunlock2: MUTEX_UNLOCK(env, rep->mtx_clientdb); +errunlock: REP_SYSTEM_UNLOCK(env); + } +out: return (ret); +} diff --git a/src/repmgr/repmgr.msg b/src/repmgr/repmgr.msg new file mode 100644 index 00000000..020f2e9c --- /dev/null +++ b/src/repmgr/repmgr.msg @@ -0,0 +1,119 @@ +PREFIX __repmgr + +INCLUDE #include "db_int.h" +INCLUDE #include "dbinc/db_swap.h" +INCLUDE + +BEGIN_MSG handshake +ARG port u_int16_t +ARG alignment u_int16_t +ARG ack_policy u_int32_t +ARG flags u_int32_t +END + +BEGIN_MSG v3handshake +ARG port u_int16_t +ARG priority u_int32_t +ARG flags u_int32_t +END + +BEGIN_MSG v2handshake +ARG port u_int16_t +ARG priority u_int32_t +END + +BEGIN_MSG parm_refresh +ARG ack_policy u_int32_t +ARG flags u_int32_t +END + +BEGIN_MSG permlsn +ARG generation u_int32_t +ARG lsn DB_LSN +END + +BEGIN_MSG version_proposal +ARG min u_int32_t +ARG max u_int32_t +END + +BEGIN_MSG version_confirmation +ARG version u_int32_t +END + +BEGIN_MSG msg_hdr +ARG type u_int8_t +ARG word1 u_int32_t +ARG word2 u_int32_t +END + +/* Metadata that goes along with user message on a DB_CHANNEL. */ +BEGIN_MSG msg_metadata +ARG tag u_int32_t +ARG limit u_int32_t +ARG flags u_int32_t +END + +/* + * The membership database has a record for each site in the group, plus one + * extra meta-data record. The key of the meta-data record has a zero-length + * host, and a port value of 0. + */ +BEGIN_MSG membership_key check_length +ARG host DBT +ARG port u_int16_t +END + +BEGIN_MSG membership_data +ARG flags u_int32_t +END + +/* + * The "format" identifies the content and layout of the records within the + * membership database (i.e., some of the items defined here in this *.msg + * file). It should be incremented when the layouts change in future Berkeley + * DB releases. The "version" counts group changes that the application makes + * by adding or removing sites; thus it varies dynamically thoughout the life of + * a group, during a single release of Berkeley DB. + */ +BEGIN_MSG member_metadata +ARG format u_int32_t +ARG version u_int32_t +END + +/* + * When a new site wants to join a group, it "guesses" that the configured + * "helper" site is the master, and sends the request there. When that guess + * is wrong, the helper site responds with the location of the current master, + * in effect "forwarding" the request. + */ +BEGIN_MSG gm_fwd check_length +ARG host DBT +ARG port u_int16_t +ARG gen u_int32_t +END + +/* Membership list version header: */ +BEGIN_MSG membr_vers +ARG version u_int32_t +ARG gen u_int32_t +END +BEGIN_MSG site_info check_length +ARG host DBT +ARG port u_int16_t +ARG flags u_int32_t +END + +/* + * If site A breaks or rejects a connection from site B, it first + * tries to send B this message containing site A's currently known + * membership DB version. Site B can use this to decide what to do. + * If site B knows of a later version, it should retry the connection + * to site A later, polling at it until site A catches up. However, if + * site B's known version is less, it means that site B is no longer in + * the group, and so instead it should shut down and notify the application. + */ +BEGIN_MSG connect_reject +ARG version u_int32_t +ARG gen u_int32_t +END diff --git a/src/repmgr/repmgr.src b/src/repmgr/repmgr.src new file mode 100644 index 00000000..28f41f42 --- /dev/null +++ b/src/repmgr/repmgr.src @@ -0,0 +1,23 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +DBPRIVATE +PREFIX __repmgr + +INCLUDE #include "db_int.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_dispatch.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc_auto/repmgr_auto.h" +INCLUDE + +BEGIN member 52 200 +ARG version u_int32_t lu +ARG prev_status u_int32_t lu +ARG status u_int32_t lu +DBT host DBT s +ARG port u_int32_t lu +END diff --git a/src/repmgr/repmgr_auto.c b/src/repmgr/repmgr_auto.c new file mode 100644 index 00000000..19eb24d4 --- /dev/null +++ b/src/repmgr/repmgr_auto.c @@ -0,0 +1,32 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc_auto/repmgr_auto.h" + +DB_LOG_RECSPEC __repmgr_member_desc[] = { + {LOGREC_ARG, SSZ(__repmgr_member_args, version), "version", "%lu"}, + {LOGREC_ARG, SSZ(__repmgr_member_args, prev_status), "prev_status", "%lu"}, + {LOGREC_ARG, SSZ(__repmgr_member_args, status), "status", "%lu"}, + {LOGREC_DBT, SSZ(__repmgr_member_args, host), "host", ""}, + {LOGREC_ARG, SSZ(__repmgr_member_args, port), "port", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +/* + * PUBLIC: int __repmgr_init_recover __P((ENV *, DB_DISTAB *)); + */ +int +__repmgr_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __repmgr_member_recover, DB___repmgr_member)) != 0) + return (ret); + return (0); +} diff --git a/src/repmgr/repmgr_automsg.c b/src/repmgr/repmgr_automsg.c new file mode 100644 index 00000000..8fe4548e --- /dev/null +++ b/src/repmgr/repmgr_automsg.c @@ -0,0 +1,748 @@ +/* Do not edit: automatically built by gen_msg.awk. */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_swap.h" + +/* + * PUBLIC: void __repmgr_handshake_marshal __P((ENV *, + * PUBLIC: __repmgr_handshake_args *, u_int8_t *)); + */ +void +__repmgr_handshake_marshal(env, argp, bp) + ENV *env; + __repmgr_handshake_args *argp; + u_int8_t *bp; +{ + DB_HTONS_COPYOUT(env, bp, argp->port); + DB_HTONS_COPYOUT(env, bp, argp->alignment); + DB_HTONL_COPYOUT(env, bp, argp->ack_policy); + DB_HTONL_COPYOUT(env, bp, argp->flags); +} + +/* + * PUBLIC: int __repmgr_handshake_unmarshal __P((ENV *, + * PUBLIC: __repmgr_handshake_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__repmgr_handshake_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_handshake_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REPMGR_HANDSHAKE_SIZE) + goto too_few; + DB_NTOHS_COPYIN(env, argp->port, bp); + DB_NTOHS_COPYIN(env, argp->alignment, bp); + DB_NTOHL_COPYIN(env, argp->ack_policy, bp); + DB_NTOHL_COPYIN(env, argp->flags, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_handshake message")); + return (EINVAL); +} + +/* + * PUBLIC: void __repmgr_v3handshake_marshal __P((ENV *, + * PUBLIC: __repmgr_v3handshake_args *, u_int8_t *)); + */ +void +__repmgr_v3handshake_marshal(env, argp, bp) + ENV *env; + __repmgr_v3handshake_args *argp; + u_int8_t *bp; +{ + DB_HTONS_COPYOUT(env, bp, argp->port); + DB_HTONL_COPYOUT(env, bp, argp->priority); + DB_HTONL_COPYOUT(env, bp, argp->flags); +} + +/* + * PUBLIC: int __repmgr_v3handshake_unmarshal __P((ENV *, + * PUBLIC: __repmgr_v3handshake_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__repmgr_v3handshake_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_v3handshake_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REPMGR_V3HANDSHAKE_SIZE) + goto too_few; + DB_NTOHS_COPYIN(env, argp->port, bp); + DB_NTOHL_COPYIN(env, argp->priority, bp); + DB_NTOHL_COPYIN(env, argp->flags, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_v3handshake message")); + return (EINVAL); +} + +/* + * PUBLIC: void __repmgr_v2handshake_marshal __P((ENV *, + * PUBLIC: __repmgr_v2handshake_args *, u_int8_t *)); + */ +void +__repmgr_v2handshake_marshal(env, argp, bp) + ENV *env; + __repmgr_v2handshake_args *argp; + u_int8_t *bp; +{ + DB_HTONS_COPYOUT(env, bp, argp->port); + DB_HTONL_COPYOUT(env, bp, argp->priority); +} + +/* + * PUBLIC: int __repmgr_v2handshake_unmarshal __P((ENV *, + * PUBLIC: __repmgr_v2handshake_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__repmgr_v2handshake_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_v2handshake_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REPMGR_V2HANDSHAKE_SIZE) + goto too_few; + DB_NTOHS_COPYIN(env, argp->port, bp); + DB_NTOHL_COPYIN(env, argp->priority, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_v2handshake message")); + return (EINVAL); +} + +/* + * PUBLIC: void __repmgr_parm_refresh_marshal __P((ENV *, + * PUBLIC: __repmgr_parm_refresh_args *, u_int8_t *)); + */ +void +__repmgr_parm_refresh_marshal(env, argp, bp) + ENV *env; + __repmgr_parm_refresh_args *argp; + u_int8_t *bp; +{ + DB_HTONL_COPYOUT(env, bp, argp->ack_policy); + DB_HTONL_COPYOUT(env, bp, argp->flags); +} + +/* + * PUBLIC: int __repmgr_parm_refresh_unmarshal __P((ENV *, + * PUBLIC: __repmgr_parm_refresh_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__repmgr_parm_refresh_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_parm_refresh_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REPMGR_PARM_REFRESH_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->ack_policy, bp); + DB_NTOHL_COPYIN(env, argp->flags, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_parm_refresh message")); + return (EINVAL); +} + +/* + * PUBLIC: void __repmgr_permlsn_marshal __P((ENV *, + * PUBLIC: __repmgr_permlsn_args *, u_int8_t *)); + */ +void +__repmgr_permlsn_marshal(env, argp, bp) + ENV *env; + __repmgr_permlsn_args *argp; + u_int8_t *bp; +{ + DB_HTONL_COPYOUT(env, bp, argp->generation); + DB_HTONL_COPYOUT(env, bp, argp->lsn.file); + DB_HTONL_COPYOUT(env, bp, argp->lsn.offset); +} + +/* + * PUBLIC: int __repmgr_permlsn_unmarshal __P((ENV *, + * PUBLIC: __repmgr_permlsn_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__repmgr_permlsn_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_permlsn_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REPMGR_PERMLSN_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->generation, bp); + DB_NTOHL_COPYIN(env, argp->lsn.file, bp); + DB_NTOHL_COPYIN(env, argp->lsn.offset, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_permlsn message")); + return (EINVAL); +} + +/* + * PUBLIC: void __repmgr_version_proposal_marshal __P((ENV *, + * PUBLIC: __repmgr_version_proposal_args *, u_int8_t *)); + */ +void +__repmgr_version_proposal_marshal(env, argp, bp) + ENV *env; + __repmgr_version_proposal_args *argp; + u_int8_t *bp; +{ + DB_HTONL_COPYOUT(env, bp, argp->min); + DB_HTONL_COPYOUT(env, bp, argp->max); +} + +/* + * PUBLIC: int __repmgr_version_proposal_unmarshal __P((ENV *, + * PUBLIC: __repmgr_version_proposal_args *, u_int8_t *, size_t, + * PUBLIC: u_int8_t **)); + */ +int +__repmgr_version_proposal_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_version_proposal_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REPMGR_VERSION_PROPOSAL_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->min, bp); + DB_NTOHL_COPYIN(env, argp->max, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_version_proposal message")); + return (EINVAL); +} + +/* + * PUBLIC: void __repmgr_version_confirmation_marshal __P((ENV *, + * PUBLIC: __repmgr_version_confirmation_args *, u_int8_t *)); + */ +void +__repmgr_version_confirmation_marshal(env, argp, bp) + ENV *env; + __repmgr_version_confirmation_args *argp; + u_int8_t *bp; +{ + DB_HTONL_COPYOUT(env, bp, argp->version); +} + +/* + * PUBLIC: int __repmgr_version_confirmation_unmarshal __P((ENV *, + * PUBLIC: __repmgr_version_confirmation_args *, u_int8_t *, size_t, + * PUBLIC: u_int8_t **)); + */ +int +__repmgr_version_confirmation_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_version_confirmation_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REPMGR_VERSION_CONFIRMATION_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->version, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_version_confirmation message")); + return (EINVAL); +} + +/* + * PUBLIC: void __repmgr_msg_hdr_marshal __P((ENV *, + * PUBLIC: __repmgr_msg_hdr_args *, u_int8_t *)); + */ +void +__repmgr_msg_hdr_marshal(env, argp, bp) + ENV *env; + __repmgr_msg_hdr_args *argp; + u_int8_t *bp; +{ + *bp++ = argp->type; + DB_HTONL_COPYOUT(env, bp, argp->word1); + DB_HTONL_COPYOUT(env, bp, argp->word2); +} + +/* + * PUBLIC: int __repmgr_msg_hdr_unmarshal __P((ENV *, + * PUBLIC: __repmgr_msg_hdr_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__repmgr_msg_hdr_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_msg_hdr_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REPMGR_MSG_HDR_SIZE) + goto too_few; + argp->type = *bp++; + DB_NTOHL_COPYIN(env, argp->word1, bp); + DB_NTOHL_COPYIN(env, argp->word2, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_msg_hdr message")); + return (EINVAL); +} + +/* + * PUBLIC: void __repmgr_msg_metadata_marshal __P((ENV *, + * PUBLIC: __repmgr_msg_metadata_args *, u_int8_t *)); + */ +void +__repmgr_msg_metadata_marshal(env, argp, bp) + ENV *env; + __repmgr_msg_metadata_args *argp; + u_int8_t *bp; +{ + DB_HTONL_COPYOUT(env, bp, argp->tag); + DB_HTONL_COPYOUT(env, bp, argp->limit); + DB_HTONL_COPYOUT(env, bp, argp->flags); +} + +/* + * PUBLIC: int __repmgr_msg_metadata_unmarshal __P((ENV *, + * PUBLIC: __repmgr_msg_metadata_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__repmgr_msg_metadata_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_msg_metadata_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REPMGR_MSG_METADATA_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->tag, bp); + DB_NTOHL_COPYIN(env, argp->limit, bp); + DB_NTOHL_COPYIN(env, argp->flags, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_msg_metadata message")); + return (EINVAL); +} + +/* + * PUBLIC: int __repmgr_membership_key_marshal __P((ENV *, + * PUBLIC: __repmgr_membership_key_args *, u_int8_t *, size_t, size_t *)); + */ +int +__repmgr_membership_key_marshal(env, argp, bp, max, lenp) + ENV *env; + __repmgr_membership_key_args *argp; + u_int8_t *bp; + size_t *lenp, max; +{ + u_int8_t *start; + + if (max < __REPMGR_MEMBERSHIP_KEY_SIZE + + (size_t)argp->host.size) + return (ENOMEM); + start = bp; + + DB_HTONL_COPYOUT(env, bp, argp->host.size); + if (argp->host.size > 0) { + memcpy(bp, argp->host.data, argp->host.size); + bp += argp->host.size; + } + DB_HTONS_COPYOUT(env, bp, argp->port); + + *lenp = (size_t)(bp - start); + return (0); +} + +/* + * PUBLIC: int __repmgr_membership_key_unmarshal __P((ENV *, + * PUBLIC: __repmgr_membership_key_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__repmgr_membership_key_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_membership_key_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + size_t needed; + + needed = __REPMGR_MEMBERSHIP_KEY_SIZE; + if (max < needed) + goto too_few; + DB_NTOHL_COPYIN(env, argp->host.size, bp); + argp->host.data = bp; + needed += (size_t)argp->host.size; + if (max < needed) + goto too_few; + bp += argp->host.size; + DB_NTOHS_COPYIN(env, argp->port, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_membership_key message")); + return (EINVAL); +} + +/* + * PUBLIC: void __repmgr_membership_data_marshal __P((ENV *, + * PUBLIC: __repmgr_membership_data_args *, u_int8_t *)); + */ +void +__repmgr_membership_data_marshal(env, argp, bp) + ENV *env; + __repmgr_membership_data_args *argp; + u_int8_t *bp; +{ + DB_HTONL_COPYOUT(env, bp, argp->flags); +} + +/* + * PUBLIC: int __repmgr_membership_data_unmarshal __P((ENV *, + * PUBLIC: __repmgr_membership_data_args *, u_int8_t *, size_t, + * PUBLIC: u_int8_t **)); + */ +int +__repmgr_membership_data_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_membership_data_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REPMGR_MEMBERSHIP_DATA_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->flags, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_membership_data message")); + return (EINVAL); +} + +/* + * PUBLIC: void __repmgr_member_metadata_marshal __P((ENV *, + * PUBLIC: __repmgr_member_metadata_args *, u_int8_t *)); + */ +void +__repmgr_member_metadata_marshal(env, argp, bp) + ENV *env; + __repmgr_member_metadata_args *argp; + u_int8_t *bp; +{ + DB_HTONL_COPYOUT(env, bp, argp->format); + DB_HTONL_COPYOUT(env, bp, argp->version); +} + +/* + * PUBLIC: int __repmgr_member_metadata_unmarshal __P((ENV *, + * PUBLIC: __repmgr_member_metadata_args *, u_int8_t *, size_t, + * PUBLIC: u_int8_t **)); + */ +int +__repmgr_member_metadata_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_member_metadata_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REPMGR_MEMBER_METADATA_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->format, bp); + DB_NTOHL_COPYIN(env, argp->version, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_member_metadata message")); + return (EINVAL); +} + +/* + * PUBLIC: int __repmgr_gm_fwd_marshal __P((ENV *, + * PUBLIC: __repmgr_gm_fwd_args *, u_int8_t *, size_t, size_t *)); + */ +int +__repmgr_gm_fwd_marshal(env, argp, bp, max, lenp) + ENV *env; + __repmgr_gm_fwd_args *argp; + u_int8_t *bp; + size_t *lenp, max; +{ + u_int8_t *start; + + if (max < __REPMGR_GM_FWD_SIZE + + (size_t)argp->host.size) + return (ENOMEM); + start = bp; + + DB_HTONL_COPYOUT(env, bp, argp->host.size); + if (argp->host.size > 0) { + memcpy(bp, argp->host.data, argp->host.size); + bp += argp->host.size; + } + DB_HTONS_COPYOUT(env, bp, argp->port); + DB_HTONL_COPYOUT(env, bp, argp->gen); + + *lenp = (size_t)(bp - start); + return (0); +} + +/* + * PUBLIC: int __repmgr_gm_fwd_unmarshal __P((ENV *, + * PUBLIC: __repmgr_gm_fwd_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__repmgr_gm_fwd_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_gm_fwd_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + size_t needed; + + needed = __REPMGR_GM_FWD_SIZE; + if (max < needed) + goto too_few; + DB_NTOHL_COPYIN(env, argp->host.size, bp); + argp->host.data = bp; + needed += (size_t)argp->host.size; + if (max < needed) + goto too_few; + bp += argp->host.size; + DB_NTOHS_COPYIN(env, argp->port, bp); + DB_NTOHL_COPYIN(env, argp->gen, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_gm_fwd message")); + return (EINVAL); +} + +/* + * PUBLIC: void __repmgr_membr_vers_marshal __P((ENV *, + * PUBLIC: __repmgr_membr_vers_args *, u_int8_t *)); + */ +void +__repmgr_membr_vers_marshal(env, argp, bp) + ENV *env; + __repmgr_membr_vers_args *argp; + u_int8_t *bp; +{ + DB_HTONL_COPYOUT(env, bp, argp->version); + DB_HTONL_COPYOUT(env, bp, argp->gen); +} + +/* + * PUBLIC: int __repmgr_membr_vers_unmarshal __P((ENV *, + * PUBLIC: __repmgr_membr_vers_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__repmgr_membr_vers_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_membr_vers_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REPMGR_MEMBR_VERS_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->version, bp); + DB_NTOHL_COPYIN(env, argp->gen, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_membr_vers message")); + return (EINVAL); +} + +/* + * PUBLIC: int __repmgr_site_info_marshal __P((ENV *, + * PUBLIC: __repmgr_site_info_args *, u_int8_t *, size_t, size_t *)); + */ +int +__repmgr_site_info_marshal(env, argp, bp, max, lenp) + ENV *env; + __repmgr_site_info_args *argp; + u_int8_t *bp; + size_t *lenp, max; +{ + u_int8_t *start; + + if (max < __REPMGR_SITE_INFO_SIZE + + (size_t)argp->host.size) + return (ENOMEM); + start = bp; + + DB_HTONL_COPYOUT(env, bp, argp->host.size); + if (argp->host.size > 0) { + memcpy(bp, argp->host.data, argp->host.size); + bp += argp->host.size; + } + DB_HTONS_COPYOUT(env, bp, argp->port); + DB_HTONL_COPYOUT(env, bp, argp->flags); + + *lenp = (size_t)(bp - start); + return (0); +} + +/* + * PUBLIC: int __repmgr_site_info_unmarshal __P((ENV *, + * PUBLIC: __repmgr_site_info_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__repmgr_site_info_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_site_info_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + size_t needed; + + needed = __REPMGR_SITE_INFO_SIZE; + if (max < needed) + goto too_few; + DB_NTOHL_COPYIN(env, argp->host.size, bp); + argp->host.data = bp; + needed += (size_t)argp->host.size; + if (max < needed) + goto too_few; + bp += argp->host.size; + DB_NTOHS_COPYIN(env, argp->port, bp); + DB_NTOHL_COPYIN(env, argp->flags, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_site_info message")); + return (EINVAL); +} + +/* + * PUBLIC: void __repmgr_connect_reject_marshal __P((ENV *, + * PUBLIC: __repmgr_connect_reject_args *, u_int8_t *)); + */ +void +__repmgr_connect_reject_marshal(env, argp, bp) + ENV *env; + __repmgr_connect_reject_args *argp; + u_int8_t *bp; +{ + DB_HTONL_COPYOUT(env, bp, argp->version); + DB_HTONL_COPYOUT(env, bp, argp->gen); +} + +/* + * PUBLIC: int __repmgr_connect_reject_unmarshal __P((ENV *, + * PUBLIC: __repmgr_connect_reject_args *, u_int8_t *, size_t, u_int8_t **)); + */ +int +__repmgr_connect_reject_unmarshal(env, argp, bp, max, nextp) + ENV *env; + __repmgr_connect_reject_args *argp; + u_int8_t *bp; + size_t max; + u_int8_t **nextp; +{ + if (max < __REPMGR_CONNECT_REJECT_SIZE) + goto too_few; + DB_NTOHL_COPYIN(env, argp->version, bp); + DB_NTOHL_COPYIN(env, argp->gen, bp); + + if (nextp != NULL) + *nextp = bp; + return (0); + +too_few: + __db_errx(env, DB_STR("3675", + "Not enough input bytes to fill a __repmgr_connect_reject message")); + return (EINVAL); +} + diff --git a/src/repmgr/repmgr_autop.c b/src/repmgr/repmgr_autop.c new file mode 100644 index 00000000..8d7c1974 --- /dev/null +++ b/src/repmgr/repmgr_autop.c @@ -0,0 +1,44 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" + +#ifdef HAVE_REPLICATION_THREADS +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc_auto/repmgr_auto.h" + +/* + * PUBLIC: int __repmgr_member_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__repmgr_member_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__repmgr_member", __repmgr_member_desc, info)); +} + +/* + * PUBLIC: int __repmgr_init_print __P((ENV *, DB_DISTAB *)); + */ +int +__repmgr_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __repmgr_member_print, DB___repmgr_member)) != 0) + return (ret); + return (0); +} +#endif /* HAVE_REPLICATION_THREADS */ diff --git a/src/repmgr/repmgr_elect.c b/src/repmgr/repmgr_elect.c new file mode 100644 index 00000000..4ec77055 --- /dev/null +++ b/src/repmgr/repmgr_elect.c @@ -0,0 +1,550 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +static db_timeout_t __repmgr_compute_response_time __P((ENV *)); +static int __repmgr_elect __P((ENV *, u_int32_t, db_timespec *)); +static int __repmgr_elect_main __P((ENV *, REPMGR_RUNNABLE *)); +static void *__repmgr_elect_thread __P((void *)); +static int send_membership __P((ENV *)); + +/* + * Starts an election thread. + * + * PUBLIC: int __repmgr_init_election __P((ENV *, u_int32_t)); + * + * !!! + * Caller must hold mutex. + */ +int +__repmgr_init_election(env, flags) + ENV *env; + u_int32_t flags; +{ + DB_REP *db_rep; + REPMGR_RUNNABLE *th; + int ret; + u_int i, new_size; + + COMPQUIET(th, NULL); + + db_rep = env->rep_handle; + if (db_rep->finished) { + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "ignoring elect thread request %#lx; repmgr is finished", + (u_long)flags)); + return (0); + } + + /* Find an available slot, indexed by 'i'; allocate more if needed. */ + for (i = 0; i < db_rep->aelect_threads; i++) { + th = db_rep->elect_threads[i]; + if (th == NULL) + break; + if (th->finished) { + if ((ret = __repmgr_thread_join(th)) != 0) + return (ret); + /* Reuse the space in a moment. */ + break; + } + } + if (i == db_rep->aelect_threads) { + new_size = db_rep->aelect_threads + 1; + if ((ret = __os_realloc(env, + sizeof(REPMGR_RUNNABLE*) * new_size, + &db_rep->elect_threads)) != 0) + return (ret); + db_rep->aelect_threads = new_size; + STAT(db_rep->region->mstat.st_max_elect_threads = new_size); + th = db_rep->elect_threads[i] = NULL; + } + + if (th == NULL && + (ret = __os_malloc(env, sizeof(REPMGR_RUNNABLE), &th)) != 0) + return (ret); + th->run = __repmgr_elect_thread; + th->args.flags = flags; + + if ((ret = __repmgr_thread_start(env, th)) == 0) + STAT(db_rep->region->mstat.st_elect_threads++); + else { + __os_free(env, th); + th = NULL; + } + db_rep->elect_threads[i] = th; + + return (ret); +} + +static void * +__repmgr_elect_thread(argsp) + void *argsp; +{ + REPMGR_RUNNABLE *th; + ENV *env; + int ret; + + th = argsp; + env = th->env; + + RPRINT(env, (env, DB_VERB_REPMGR_MISC, "starting election thread")); + + if ((ret = __repmgr_elect_main(env, th)) != 0) { + __db_err(env, ret, "election thread failed"); + (void)__repmgr_thread_failure(env, ret); + } + + RPRINT(env, (env, DB_VERB_REPMGR_MISC, "election thread is exiting")); + th->finished = TRUE; + return (NULL); +} + +static int +__repmgr_elect_main(env, th) + ENV *env; + REPMGR_RUNNABLE *th; +{ + DB_REP *db_rep; + REP *rep; +#ifdef DB_WIN32 + DWORD duration; + db_timeout_t t; +#else + struct timespec deadline; +#endif + db_timespec failtime, now, repstart_time, target, wait_til; + db_timeout_t response_time; + u_int32_t flags; + int done_repstart, ret, suppress_election; + enum { ELECTION, REPSTART } action; + + COMPQUIET(action, ELECTION); + + db_rep = env->rep_handle; + rep = db_rep->region; + flags = th->args.flags; + + if (LF_ISSET(ELECT_F_EVENT_NOTIFY)) + DB_EVENT(env, DB_EVENT_REP_MASTER_FAILURE, NULL); + + /* + * As a freshly started thread, lay claim to the title of being + * "preferred". If an older thread is sleeping for retry, when it wakes + * up it will relinquish its role (since there's no need for multiple + * threads to sleep and retry). + */ + LOCK_MUTEX(db_rep->mutex); + db_rep->preferred_elect_thr = th; + UNLOCK_MUTEX(db_rep->mutex); + + /* + * The 'done_repstart' flag keeps track of which was our most recent + * operation (repstart or election), so that we can alternate + * appropriately. There are a few different ways this thread can be + * invoked, and all but one specify some form of immediate election be + * called. The one exception is at initial start-up, where we + * first probe for a master by sending out rep_start(CLIENT) calls. + */ + if (LF_ISSET(ELECT_F_IMMED)) { + /* + * When the election succeeds, we've successfully completed + * everything we need to do. If it fails in an unexpected way, + * we abort all processing as usual. The only time we need to + * stay in here and do some more work is on DB_REP_UNAVAIL, + * in which case we want to wait a while and retry later. + */ + if ((ret = __repmgr_elect(env, flags, &failtime)) == + DB_REP_UNAVAIL) + done_repstart = FALSE; + else + goto out; + } else { + /* + * We didn't really have an election failure, because in this + * case we haven't even done an election yet. But the timing + * we want turns out the same: we want to wait for the election + * retry time and then call for an election if nothing else + * interesting happens before then. + */ + __os_gettime(env, &failtime, 1); + + /* + * Although we didn't do a repstart in this thread, we know that + * our caller did one just before creating the thread. + */ + done_repstart = TRUE; + } + + LOCK_MUTEX(db_rep->mutex); + for (;;) { + ret = 0; + + if (db_rep->finished) + goto unlock; + + /* + * If we've become the master (which could happen after an + * election in another election thread), or we find we have a + * working connection to a known master, then we're quite + * content: that's really the essential purpose of this whole + * thread. + */ + if (__repmgr_master_is_known(env)) + goto unlock; + + /* + * When circumstances force us to do an immediate election, we + * may be forced to create multiple threads in order to do so. + * But we certainly don't need multiple threads sleeping, + * alternating and retrying. The "preferred election thread" is + * the one that has the authority and responsibility to + * persevere until our work is done. Note that this role can + * switch from one thread to another, depending on the timing of + * events. In particular, when an election fails the thread + * that got the failure becomes the chosen one that will remain + * to avenge the failure. + */ + if (db_rep->preferred_elect_thr != th) + goto unlock; + + timespecclear(&wait_til); + __os_gettime(env, &now, 1); + + /* + * See if it's time to retry the operation. Normally it's an + * election we're interested in retrying. But we refrain from + * calling for elections if so configured. + */ + suppress_election = LF_ISSET(ELECT_F_STARTUP) ? + db_rep->init_policy == DB_REP_CLIENT : + !FLD_ISSET(rep->config, REP_C_ELECTIONS); + repstart_time = db_rep->repstart_time; + target = suppress_election ? repstart_time : failtime; + TIMESPEC_ADD_DB_TIMEOUT(&target, rep->election_retry_wait); + if (timespeccmp(&now, &target, >=)) { + /* + * We've surpassed our target retry time. + * However, elections should generally alternate with + * rep_start calls, so do that if we haven't done one + * since the last election. + */ + action = suppress_election ? REPSTART : + (done_repstart ? ELECTION : REPSTART); + + } else if (db_rep->new_connection) { + /* Seen a recent new connection, let's do rep_start. */ + action = REPSTART; + } else + wait_til = target; + + if (!timespecisset(&wait_til)) { + response_time = __repmgr_compute_response_time(env); + target = repstart_time; + TIMESPEC_ADD_DB_TIMEOUT(&target, response_time); + if (timespeccmp(&now, &target, <)) { + /* We haven't waited long enough. */ + wait_til = target; + } + } + + if (timespecisset(&wait_til)) { +#ifdef DB_WIN32 + timespecsub(&wait_til, &now); + DB_TIMESPEC_TO_TIMEOUT(t, &wait_til, TRUE); + duration = t / US_PER_MS; + if ((ret = SignalObjectAndWait(*db_rep->mutex, + db_rep->check_election, duration, FALSE)) != + WAIT_OBJECT_0 && ret != WAIT_TIMEOUT) + goto out; + + LOCK_MUTEX(db_rep->mutex); + + /* + * Although there could be multiple threads, only the + * "preferred" thread resets the event object. If the + * others tried to do so, the preferred thread might + * miss the wake-up. Another way of saying this is that + * the precise meaning of the check_election event is + * that "there may be some election-thread-related work + * to do, and the correct thread to do it has not yet + * been woken up". + */ + if (ret == WAIT_OBJECT_0 && + db_rep->preferred_elect_thr == th && + !ResetEvent(db_rep->check_election)) { + ret = GetLastError(); + goto unlock; + } +#else + deadline.tv_sec = wait_til.tv_sec; + deadline.tv_nsec = wait_til.tv_nsec; + if ((ret = pthread_cond_timedwait( + &db_rep->check_election, db_rep->mutex, &deadline)) + != ETIMEDOUT && ret != 0) + goto unlock; +#endif + continue; + } + + UNLOCK_MUTEX(db_rep->mutex); + if (action == ELECTION) { + db_rep->new_connection = FALSE; + if ((ret = __repmgr_elect(env, 0, &failtime)) == + DB_REP_UNAVAIL) + done_repstart = FALSE; + else + goto out; + LOCK_MUTEX(db_rep->mutex); + db_rep->preferred_elect_thr = th; + } else { + DB_ASSERT(env, action == REPSTART); + + db_rep->new_connection = FALSE; + if ((ret = __repmgr_repstart(env, DB_REP_CLIENT)) != 0) + goto out; + done_repstart = TRUE; + + LOCK_MUTEX(db_rep->mutex); + __os_gettime(env, &db_rep->repstart_time, 1); + } + } + +#ifdef HAVE_STATISTICS + /* + * We normally don't bother taking a mutex to increment statistics. But + * in this case, since we're incrementing and decrementing in pairs, it + * could be very weird if we were "off somewhat". For example, we could + * get a negative value. And this is not a high-traffic, performance- + * critical path. + * On the other hand, it suffices to take repmgr's (handle-based) + * mutex, rather than the rep mutex which normally protects shared + * memory, since all election thread activity must be occurring in the + * single listener process, under control of one single rep handle. + */ +out: + LOCK_MUTEX(db_rep->mutex); +unlock: + rep->mstat.st_elect_threads--; + UNLOCK_MUTEX(db_rep->mutex); +#else +unlock: + UNLOCK_MUTEX(db_rep->mutex); +out: +#endif + return (ret); +} + +static db_timeout_t +__repmgr_compute_response_time(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + db_timeout_t ato, eto; + + db_rep = env->rep_handle; + rep = db_rep->region; + + /* + * Avoid crowding operations too close together. If we've just recently + * done a rep_start, wait a moment in case there's a master out there, + * to give it a chance to respond with a NEWMASTER message. This is + * particularly an issue at start-up time, when we're likely to have + * several "new connection establishment" events bombarding us with lots + * of rep_start requests in quick successtion. + * + * We don't have a separate user configuration for rep_start response, + * but it's reasonable to expect it to be similar to either the ack + * timeout or the election timeout, whichever is smaller. However, only + * consider the ack timeout if all signs point to it being in use. + */ + ato = rep->ack_timeout; + eto = rep->elect_timeout; + if (ato > 0 && + rep->perm_policy != DB_REPMGR_ACKS_NONE && + rep->priority > 0 && + ato < eto) + return (ato); + + return (eto); +} + +static int +__repmgr_elect(env, flags, failtimep) + ENV *env; + u_int32_t flags; + db_timespec *failtimep; +{ + DB_REP *db_rep; + REP *rep; + u_int32_t invitation, nsites, nvotes; + int ret, t_ret; + + db_rep = env->rep_handle; + nsites = db_rep->region->config_nsites; + DB_ASSERT(env, nsites > 0); + + /* + * With only 2 sites in the group, even a single failure could make it + * impossible to get a majority. So, fudge a little, unless the user + * really wants strict safety. + */ + if (nsites == 2 && + !FLD_ISSET(db_rep->region->config, REP_C_2SITE_STRICT)) + nvotes = 1; + else + nvotes = ELECTION_MAJORITY(nsites); + + if (LF_ISSET(ELECT_F_INVITEE)) { + /* + * We're going to the election party because we were invited by + * another site. Accept the other site's suggested value, if + * it's reasonable. (I.e., the other site may have wanted to do + * a "fast" election after losing contact with the master. If + * so, let's not spoil it by imposing our own full nsites count + * on it.) + */ + rep = db_rep->region; + invitation = rep->nsites; + if (invitation == nsites || invitation == nsites - 1) { + nsites = invitation; + } + } + if (LF_ISSET(ELECT_F_FAST) && nsites > nvotes) { + /* + * If we're doing an election because we noticed that the master + * failed, it's reasonable to expect that the master won't + * participate. By not waiting for its vote, we can probably + * complete the election faster. But note that we shouldn't + * allow this to affect nvotes calculation. + * + * However, if we have 2 sites, and strict majority is turned + * on, now nvotes would be 2, and it doesn't make sense to + * rep_elect to see nsites of 1 in that case. So only decrement + * nsites if it currently exceeds nvotes. + */ + nsites--; + } + /* The rule for leases overrides all of the above. */ + if (IS_USING_LEASES(env)) + nsites = 0; + + switch (ret = __rep_elect_int(env, nsites, nvotes, 0)) { + case DB_REP_UNAVAIL: + __os_gettime(env, failtimep, 1); + DB_EVENT(env, DB_EVENT_REP_ELECTION_FAILED, NULL); + if ((t_ret = send_membership(env)) != 0) + ret = t_ret; + break; + + case 0: + if (db_rep->takeover_pending) + ret = __repmgr_claim_victory(env); + break; + + case DB_REP_IGNORE: + ret = 0; + break; + + default: + __db_err(env, ret, DB_STR("3629", + "unexpected election failure")); + break; + } + return (ret); +} + +/* + * If an election fails with DB_REP_UNAVAIL, it could be because a participating + * site has an obsolete, too-high notion of the group size. (This could happen + * if the site was down/disconnected during removal of some (other) sites.) To + * remedy this, broadcast a current copy of the membership list. Since all + * sites are doing this, and we always ratchet to the most up-to-date version, + * this should bring all sites up to date. We only do this after a failure, + * during what will normally be an idle period anyway, so that we don't slow + * down a first election following the loss of an active master. + */ +static int +send_membership(env) + ENV *env; +{ + DB_REP *db_rep; + u_int8_t *buf; + size_t len; + int ret; + + db_rep = env->rep_handle; + buf = NULL; + LOCK_MUTEX(db_rep->mutex); + if ((ret = __repmgr_marshal_member_list(env, &buf, &len)) != 0) + goto out; + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Broadcast latest membership list")); + ret = __repmgr_bcast_own_msg(env, REPMGR_SHARING, buf, len); +out: + UNLOCK_MUTEX(db_rep->mutex); + if (buf != NULL) + __os_free(env, buf); + return (ret); +} + +/* + * Becomes master after we've won an election, if we can. + * + * PUBLIC: int __repmgr_claim_victory __P((ENV *)); + */ +int +__repmgr_claim_victory(env) + ENV *env; +{ + int ret; + + env->rep_handle->takeover_pending = FALSE; + if ((ret = __repmgr_become_master(env)) == DB_REP_UNAVAIL) { + ret = 0; + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Won election but lost race with DUPMASTER client intent")); + } + return (ret); +} + +/* + * When turning on elections in an already-running system, check to see if we're + * in a state where we need an election (i.e., we would have started one + * previously if elections hadn't been turned off), and if so start one. + * + * PUBLIC: int __repmgr_turn_on_elections __P((ENV *)); + */ +int +__repmgr_turn_on_elections(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + int ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + ret = 0; + + DB_ASSERT(env, REP_ON(env)); + LOCK_MUTEX(db_rep->mutex); + if (db_rep->selector == NULL || + !FLD_ISSET(rep->config, REP_C_ELECTIONS) || + __repmgr_master_is_known(env)) + goto out; + + ret = __repmgr_init_election(env, ELECT_F_IMMED); + +out: + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} diff --git a/src/repmgr/repmgr_method.c b/src/repmgr/repmgr_method.c new file mode 100644 index 00000000..d28ff8f4 --- /dev/null +++ b/src/repmgr/repmgr_method.c @@ -0,0 +1,3045 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/txn.h" + +/* Context for an API thread waiting for response to a synchronous request. */ +struct response_wait { + REPMGR_CONNECTION *conn; + u_int32_t index; +}; + +static int addr_chk __P((const ENV *, const char *, u_int)); +static void adjust_bulk_response __P((ENV *, DBT *)); +static int bad_callback_method __P((DB_CHANNEL *, const char *)); +static void copy_body __P((u_int8_t *, REPMGR_IOVECS *)); +static int get_shared_netaddr __P((ENV *, int, repmgr_netaddr_t *)); +static int establish_connection __P((ENV *, int, REPMGR_CONNECTION **)); +static int get_channel_connection __P((CHANNEL *, REPMGR_CONNECTION **)); +static int init_dbsite __P((ENV *, int, const char *, u_int, DB_SITE **)); +static int join_group_at_site __P((ENV *, repmgr_netaddr_t *)); +static int kick_blockers __P((ENV *, REPMGR_CONNECTION *, void *)); +static int make_request_conn __P((ENV *, + repmgr_netaddr_t *, REPMGR_CONNECTION **)); +static int set_local_site __P((DB_SITE *, u_int32_t)); +static int read_own_msg __P((ENV *, + REPMGR_CONNECTION *, u_int32_t *, u_int8_t **, size_t *)); +static int refresh_site __P((DB_SITE *)); +static int __repmgr_await_threads __P((ENV *)); +static int __repmgr_build_data_out __P((ENV *, + DBT *, u_int32_t, __repmgr_msg_metadata_args *, REPMGR_IOVECS **iovecsp)); +static int __repmgr_build_msg_out __P((ENV *, + DBT *, u_int32_t, __repmgr_msg_metadata_args *, REPMGR_IOVECS **iovecsp)); +static int repmgr_only __P((ENV *, const char *)); +static int __repmgr_restart __P((ENV *, int, u_int32_t)); +static int __repmgr_remove_site __P((DB_SITE *)); +static int __repmgr_remove_site_pp __P((DB_SITE *)); +static int __repmgr_start_msg_threads __P((ENV *, u_int)); +static int request_self __P((ENV *, DBT *, u_int32_t, DBT *, u_int32_t)); +static int response_complete __P((ENV *, void *)); +static int send_msg_conn __P((ENV *, REPMGR_CONNECTION *, DBT *, u_int32_t)); +static int send_msg_self __P((ENV *, REPMGR_IOVECS *, u_int32_t)); +static int site_by_addr __P((ENV *, const char *, u_int, DB_SITE **)); + +/* + * PUBLIC: int __repmgr_start __P((DB_ENV *, int, u_int32_t)); + */ +int +__repmgr_start(dbenv, nthreads, flags) + DB_ENV *dbenv; + int nthreads; + u_int32_t flags; +{ + DB_REP *db_rep; + REP *rep; + REPMGR_SITE *me, *site; + DB_THREAD_INFO *ip; + ENV *env; + int first, is_listener, locked, min, need_masterseek, ret, start_master; + u_int i, n; + + env = dbenv->env; + db_rep = env->rep_handle; + rep = db_rep->region; + + switch (flags) { + case 0: + case DB_REP_CLIENT: + case DB_REP_ELECTION: + case DB_REP_MASTER: + break; + default: + __db_errx(env, DB_STR("3635", + "repmgr_start: unrecognized flags parameter value")); + return (EINVAL); + } + + ENV_REQUIRES_CONFIG_XX( + env, rep_handle, "DB_ENV->repmgr_start", DB_INIT_REP); + if (!F_ISSET(env, ENV_THREAD)) { + __db_errx(env, DB_STR("3636", + "Replication Manager needs an environment with DB_THREAD")); + return (EINVAL); + } + + if (APP_IS_BASEAPI(env)) + return (repmgr_only(env, "repmgr_start")); + + /* Check that the required initialization has been done. */ + if (!IS_VALID_EID(db_rep->self_eid)) { + __db_errx(env, DB_STR("3637", + "A local site must be named before calling repmgr_start")); + return (EINVAL); + } + + if (db_rep->finished) { + __db_errx(env, DB_STR("3638", "repmgr is shutting down")); + return (EINVAL); + } + db_rep->init_policy = flags; + if ((ret = __rep_set_transport_int(env, + db_rep->self_eid, __repmgr_send)) != 0) + return (ret); + if (!REPMGR_INITED(db_rep) && (ret = __repmgr_init(env)) != 0) + return (ret); + /* + * As a prerequisite to starting replication, get our list of remote + * sites properly set up. Mainly this involves reading the group + * membership database; but alternatively, deciding what to do when it's + * not present (which depends on various conditions). + */ + start_master = (flags == DB_REP_MASTER); + + if (db_rep->restored_list != NULL) { + ret = __repmgr_refresh_membership(env, + db_rep->restored_list, db_rep->restored_list_length); + __os_free(env, db_rep->restored_list); + db_rep->restored_list = NULL; + } else { + ret = __repmgr_reload_gmdb(env); + me = SITE_FROM_EID(db_rep->self_eid); + if (ret == 0) { + if (me->membership != SITE_PRESENT) + /* + * We have a database but the local site is not + * shown as "present" in the group. We must + * have been removed from the group, or perhaps + * we're being created via hot backup. In + * either case the thing to do is to try to + * join. + */ + ret = __repmgr_join_group(env); + } else if (ret == ENOENT) { + ENV_ENTER(env, ip); + if (FLD_ISSET(me->config, DB_GROUP_CREATOR)) + start_master = TRUE; + /* + * LEGACY is inconsistent with CREATOR, but start_master + * could still be true due to "flags" being passed as + * DB_REP_MASTER. In that case, being started as master + * is irrelevant to establishing initial membership + * list: LEGACY always takes precedence if set. + */ + if (FLD_ISSET(me->config, DB_LEGACY)) { + LOCK_MUTEX(db_rep->mutex); + db_rep->membership_version = 1; + db_rep->member_version_gen = 1; + for (n = i = 0; i < db_rep->site_cnt; i++) { + site = SITE_FROM_EID(i); + if (!FLD_ISSET(site->config, DB_LEGACY)) + continue; + if ((ret = __repmgr_set_membership(env, + site->net_addr.host, + site->net_addr.port, + SITE_PRESENT)) != 0) + break; + n++; + } + ret = __rep_set_nsites_int(env, n); + DB_ASSERT(env, ret == 0); + UNLOCK_MUTEX(db_rep->mutex); + } else if (start_master) { + LOCK_MUTEX(db_rep->mutex); + db_rep->membership_version = 1; + db_rep->member_version_gen = 1; + if ((ret = __repmgr_set_membership(env, + me->net_addr.host, me->net_addr.port, + SITE_PRESENT)) == 0) { + ret = __rep_set_nsites_int(env, 1); + DB_ASSERT(env, ret == 0); + } + UNLOCK_MUTEX(db_rep->mutex); + } else + ret = __repmgr_join_group(env); + ENV_LEAVE(env, ip); + } + } + if (ret != 0) + return (ret); + + DB_ASSERT(env, start_master || + SITE_FROM_EID(db_rep->self_eid)->membership == SITE_PRESENT); + + /* + * Figure out the current situation. The current invocation of + * repmgr_start() is either the first one (on the given env handle), or + * a subsequent one. If we've already got a select thread running, then + * this must be a subsequent one. + * + * Then, in case there could be multiple processes, we're either the + * main listener process or a subordinate process. On a "subsequent" + * repmgr_start() call we already have enough information to know which + * it is. Otherwise, negotiate with information in the shared region to + * claim the listener role if possible. + * + * To avoid a race, once we decide we're in the first call, mark the + * handle as started, so that no other thread thinks the same thing. + */ + LOCK_MUTEX(db_rep->mutex); + locked = TRUE; + if (db_rep->mgr_started) { + first = FALSE; + is_listener = !IS_SUBORDINATE(db_rep); + } else { + first = TRUE; + db_rep->mgr_started = TRUE; + + ENV_ENTER(env, ip); + MUTEX_LOCK(env, rep->mtx_repmgr); + if (rep->listener == 0) { + is_listener = TRUE; + __os_id(dbenv, &rep->listener, NULL); + } else { + is_listener = FALSE; + nthreads = 0; + } + MUTEX_UNLOCK(env, rep->mtx_repmgr); + ENV_LEAVE(env, ip); + + /* + * Since we're the first repmgr_start() call, we will have to + * start threads. Therefore, we require a flags value (to tell + * us how). + */ + if (flags == 0) { + __db_errx(env, DB_STR("3639", + "a non-zero flags value is required for initial repmgr_start() call")); + ret = EINVAL; + goto err; + } + } + UNLOCK_MUTEX(db_rep->mutex); + locked = FALSE; + + if (!first) { + /* + * Subsequent call is allowed when ELECTIONS are turned off, so + * that the application can make its own dynamic role changes. + * It's also allowed in any case, if not trying to change roles + * (flags == 0), in order to change number of message processing + * threads. The __repmgr_restart() function will take care of + * these cases entirely. + */ + if (!is_listener || (flags != 0 && + FLD_ISSET(db_rep->region->config, REP_C_ELECTIONS))) { + __db_errx(env, DB_STR("3640", + "repmgr is already started")); + ret = EINVAL; + } else + ret = __repmgr_restart(env, nthreads, flags); + return (ret); + } + + /* + * The minimum legal number of threads is either 1 or 0, depending upon + * whether we're the main process or a subordinate. + */ + min = is_listener ? 1 : 0; + if (nthreads < min) { + __db_errx(env, DB_STR_A("3641", + "repmgr_start: nthreads parameter must be >= %d", + "%d"), min); + ret = EINVAL; + goto err; + } + + /* + * Ensure at least one more thread (for channel messages and GMDB + * requests) beyond those set aside to avoid starvation of rep + * messages. + * + * Note that it's OK to silently fudge the number here, because the + * documentation says that "[i]n addition to these message processing + * threads, the Replication Manager creates and manages a few of its own + * threads of control." + */ + min = RESERVED_MSG_TH(env) + 1; + if (nthreads < min && is_listener) + nthreads = min; + + if (is_listener) { + if ((ret = __repmgr_listen(env)) != 0) + goto err; + /* + * Make some sort of call to rep_start before starting message + * processing threads, to ensure that incoming messages being + * processed always have a rep context properly configured. + * Note that even if we're starting without recovery, we need a + * rep_start call in case we're using leases. Leases keep track + * of rep_start calls even within an env region lifetime. + */ + if (start_master) { + ret = __repmgr_become_master(env); + /* No other repmgr threads running yet. */ + DB_ASSERT(env, ret != DB_REP_UNAVAIL); + if (ret != 0) + goto err; + need_masterseek = FALSE; + } else { + if ((ret = __repmgr_repstart(env, DB_REP_CLIENT)) != 0) + goto err; + /* + * The repmgr election code starts elections only if + * the DB_REP_ELECTION start flag was specified, but + * it performs other actions to help find a master for + * DB_REP_CLIENT, which is why we need_masterseek for + * both cases. + */ + need_masterseek = TRUE; + } + + LOCK_MUTEX(db_rep->mutex); + locked = TRUE; + + /* + * Since these allocated memory blocks are used by other + * threads, we have to be a bit careful about freeing them in + * case of any errors. __repmgr_await_threads (which we call in + * the err: coda below) takes care of that. + * + * Start by allocating enough space for 2 election threads. We + * occasionally need that many; more are possible, but would be + * extremely rare. + */ +#define ELECT_THREADS_ALLOC 2 + + if ((ret = __os_calloc(env, ELECT_THREADS_ALLOC, + sizeof(REPMGR_RUNNABLE *), &db_rep->elect_threads)) != 0) + goto err; + db_rep->aelect_threads = ELECT_THREADS_ALLOC; + STAT(rep->mstat.st_max_elect_threads = ELECT_THREADS_ALLOC); + + if ((ret = __os_calloc(env, (u_int)nthreads, + sizeof(REPMGR_RUNNABLE *), &db_rep->messengers)) != 0) + goto err; + db_rep->athreads = (u_int)nthreads; + + db_rep->nthreads = 0; + if ((ret = + __repmgr_start_msg_threads(env, (u_int)nthreads)) != 0) + goto err; + + if (need_masterseek) { + /* + * The repstart_time field records that time when we + * last issued a rep_start(CLIENT) that sent out a + * NEWCLIENT message. We use it to avoid doing so + * twice in quick succession (to give the master a + * reasonable chance to respond). The rep_start() + * that we just issued above doesn't count, because we + * haven't established any connections yet, and so no + * message could have been sent out. The instant we + * get our first connection set up we want to send out + * our first real NEWCLIENT. + */ + timespecclear(&db_rep->repstart_time); + + if ((ret = __repmgr_init_election(env, + ELECT_F_STARTUP)) != 0) + goto err; + } + UNLOCK_MUTEX(db_rep->mutex); + locked = FALSE; + } + /* All processes (even non-listeners) need a select() thread. */ + if ((ret = __repmgr_start_selector(env)) == 0) + return (is_listener ? 0 : DB_REP_IGNORE); + +err: + /* If we couldn't succeed at everything, undo the parts we did do. */ + if (db_rep->selector != NULL) { + if (!locked) + LOCK_MUTEX(db_rep->mutex); + (void)__repmgr_stop_threads(env); + UNLOCK_MUTEX(db_rep->mutex); + locked = FALSE; + (void)__repmgr_await_threads(env); + } + if (!locked) + LOCK_MUTEX(db_rep->mutex); + (void)__repmgr_net_close(env); + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} + +/* + * PUBLIC: int __repmgr_valid_config __P((ENV *, u_int32_t)); + */ +int +__repmgr_valid_config(env, flags) + ENV *env; + u_int32_t flags; +{ + DB_REP *db_rep; + int ret; + + db_rep = env->rep_handle; + ret = 0; + + DB_ASSERT(env, REP_ON(env)); + LOCK_MUTEX(db_rep->mutex); + + /* (Can't check IS_SUBORDINATE if select thread isn't running yet.) */ + if (LF_ISSET(REP_C_ELECTIONS) && + db_rep->selector != NULL && IS_SUBORDINATE(db_rep)) { + __db_errx(env, DB_STR("3642", + "can't configure repmgr elections from subordinate process")); + ret = EINVAL; + } + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} + +/* + * Starts message processing threads. On entry, the actual number of threads + * already active is db_rep->nthreads; the desired number of threads is passed + * as "n". + * + * Caller must hold mutex. + */ +static int +__repmgr_start_msg_threads(env, n) + ENV *env; + u_int n; +{ + DB_REP *db_rep; + REPMGR_RUNNABLE *messenger; + int ret; + + db_rep = env->rep_handle; + DB_ASSERT(env, db_rep->athreads >= n); + while (db_rep->nthreads < n) { + if ((ret = __os_calloc(env, + 1, sizeof(REPMGR_RUNNABLE), &messenger)) != 0) + return (ret); + + messenger->run = __repmgr_msg_thread; + if ((ret = __repmgr_thread_start(env, messenger)) != 0) { + __os_free(env, messenger); + return (ret); + } + db_rep->messengers[db_rep->nthreads++] = messenger; + } + return (0); +} + +/* + * Handles a repmgr_start() call that occurs when repmgr is already running. + * This is allowed (when elections are not in use), to dynamically change + * master/client role. It is also allowed (regardless of the ELECTIONS setting) + * to change the number of msg processing threads. + */ +static int +__repmgr_restart(env, nthreads, flags) + ENV *env; + int nthreads; + u_int32_t flags; +{ + DB_REP *db_rep; + REP *rep; + REPMGR_RUNNABLE **th; + u_int32_t cur_repflags; + int locked, ret, t_ret; + u_int delta, i, min, nth; + + th = NULL; + locked = FALSE; + + if (flags == DB_REP_ELECTION) { + __db_errx(env, DB_STR("3643", + "subsequent repmgr_start() call may not specify DB_REP_ELECTION")); + return (EINVAL); + } + if (nthreads < 0) { + __db_errx(env, DB_STR("3644", + "repmgr_start: nthreads parameter must be >= 0")); + return (EINVAL); + } + + ret = 0; + db_rep = env->rep_handle; + DB_ASSERT(env, REP_ON(env)); + rep = db_rep->region; + + cur_repflags = F_ISSET(rep, REP_F_MASTER | REP_F_CLIENT); + DB_ASSERT(env, cur_repflags); + if (FLD_ISSET(cur_repflags, REP_F_MASTER) && + flags == DB_REP_CLIENT) + ret = __repmgr_become_client(env); + else if (FLD_ISSET(cur_repflags, REP_F_CLIENT) && + flags == DB_REP_MASTER) + ret = __repmgr_become_master(env); + if (ret != 0) + return (ret); + + if (nthreads == 0) + return (0); + nth = (u_int)nthreads; + LOCK_MUTEX(db_rep->mutex); + locked = TRUE; + min = RESERVED_MSG_TH(env) + db_rep->non_rep_th; + if (nth < min) + nth = min; + + if (nth > db_rep->nthreads) { + /* + * To increase the number of threads, first allocate more space, + * unless we already have enough unused space available. + */ + if (db_rep->athreads < nth) { + if ((ret = __os_realloc(env, + sizeof(REPMGR_RUNNABLE *) * nth, + &db_rep->messengers)) != 0) + goto out; + db_rep->athreads = nth; + } + ret = __repmgr_start_msg_threads(env, nth); + } else if (nth < db_rep->nthreads) { + /* + * Remove losers from array, and then wait for each of them. We + * have to make an array copy, because we have to drop the mutex + * to wait for the threads to complete, and if we left the real + * array in the handle in the pending state while waiting, + * another thread could come along wanting to make another + * change, and would make a mess. + * The alternative is about as inelegant: we could do these + * one at a time here if we added another field to the handle, + * to keep track of both the actual number of threads and the + * user's desired number of threads. + */ + /* + * Make sure signalling the condition variable works, before + * making a mess of the data structures. Although it may seem a + * little backwards, it doesn't really matter since we're + * holding the mutex. Once we allocate the temp array and grab + * ownership of the loser thread structs, we must continue + * trying (even if errors) so that we definitely free the + * memory. + */ + if ((ret = __repmgr_wake_msngers(env, nth)) != 0) + goto out; + delta = db_rep->nthreads - nth; + if ((ret = __os_calloc(env, (size_t)delta, + sizeof(REPMGR_RUNNABLE *), &th)) != 0) + goto out; + for (i = 0; i < delta; i++) { + th[i] = db_rep->messengers[nth + i]; + th[i]->quit_requested = TRUE; + db_rep->messengers[nth + i] = NULL; + } + db_rep->nthreads = nth; + UNLOCK_MUTEX(db_rep->mutex); + locked = FALSE; + + DB_ASSERT(env, ret == 0); + for (i = 0; i < delta; i++) { + if ((t_ret = __repmgr_thread_join(th[i])) != 0 && + ret == 0) + ret = t_ret; + __os_free(env, th[i]); + } + __os_free(env, th); + } + +out: if (locked) + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} + +/* + * PUBLIC: int __repmgr_autostart __P((ENV *)); + * + * Preconditions: rep_start() has been called; we're within an ENV_ENTER. + */ +int +__repmgr_autostart(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + int ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + + DB_ASSERT(env, REP_ON(env)); + LOCK_MUTEX(db_rep->mutex); + + if (REPMGR_INITED(db_rep)) + ret = 0; + else + ret = __repmgr_init(env); + if (ret != 0) + goto out; + + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Automatically joining existing repmgr env")); + + /* + * We're only called if we're a master, which means we've had a + * rep_start() call, which means we must have had a previous + * rep_set_transport() call (in the region, in a separate env handle). + * We could therefore get away with simply poking in a pointer to our + * send function; but we need to dig up our EID value anyway, so we + * might as well set it properly. + */ + db_rep->self_eid = rep->eid; + if ((ret = __rep_set_transport_int(env, + db_rep->self_eid, __repmgr_send)) != 0) + goto out; + + if (db_rep->selector == NULL && !db_rep->finished) + ret = __repmgr_start_selector(env); + +out: + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} + +/* + * PUBLIC: int __repmgr_start_selector __P((ENV *)); + */ +int +__repmgr_start_selector(env) + ENV *env; +{ + DB_REP *db_rep; + REPMGR_RUNNABLE *selector; + int ret; + + db_rep = env->rep_handle; + if ((ret = __os_calloc(env, 1, sizeof(REPMGR_RUNNABLE), &selector)) + != 0) + return (ret); + selector->run = __repmgr_select_thread; + + /* + * In case the select thread ever examines db_rep->selector, set it + * before starting the thread (since once we create it we could be + * racing with it). + */ + db_rep->selector = selector; + if ((ret = __repmgr_thread_start(env, selector)) != 0) { + __db_err(env, ret, DB_STR("3645", + "can't start selector thread")); + __os_free(env, selector); + db_rep->selector = NULL; + return (ret); + } + + return (0); +} + +/* + * PUBLIC: int __repmgr_close __P((ENV *)); + */ +int +__repmgr_close(env) + ENV *env; +{ + DB_REP *db_rep; + int ret, t_ret; + + ret = 0; + db_rep = env->rep_handle; + if (db_rep->selector != NULL) { + if (!db_rep->finished) { + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Stopping repmgr threads")); + LOCK_MUTEX(db_rep->mutex); + ret = __repmgr_stop_threads(env); + UNLOCK_MUTEX(db_rep->mutex); + } + if ((t_ret = __repmgr_await_threads(env)) != 0 && ret == 0) + ret = t_ret; + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Repmgr threads are finished")); + } + + if ((t_ret = __repmgr_net_close(env)) != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = __repmgr_deinit(env)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * PUBLIC: int __repmgr_set_ack_policy __P((DB_ENV *, int)); + */ +int +__repmgr_set_ack_policy(dbenv, policy) + DB_ENV *dbenv; + int policy; +{ + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + REP *rep; + int ret; + + env = dbenv->env; + db_rep = env->rep_handle; + rep = db_rep->region; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->repmgr_set_ack_policy", DB_INIT_REP); + + if (APP_IS_BASEAPI(env)) + return (repmgr_only(env, "repmgr_set_ack_policy")); + + switch (policy) { + case DB_REPMGR_ACKS_ALL: + case DB_REPMGR_ACKS_ALL_AVAILABLE: + case DB_REPMGR_ACKS_ALL_PEERS: + case DB_REPMGR_ACKS_NONE: + case DB_REPMGR_ACKS_ONE: + case DB_REPMGR_ACKS_ONE_PEER: + case DB_REPMGR_ACKS_QUORUM: + if (REP_ON(env)) { + if (rep->perm_policy != policy) { + rep->perm_policy = policy; + if ((ret = __repmgr_bcast_parm_refresh(env)) + != 0) + return (ret); + } + } else + db_rep->perm_policy = policy; + /* + * Setting an ack policy makes this a replication manager + * application. + */ + APP_SET_REPMGR(env); + return (0); + default: + __db_errx(env, DB_STR("3646", + "unknown ack_policy in DB_ENV->repmgr_set_ack_policy")); + return (EINVAL); + } +} + +/* + * PUBLIC: int __repmgr_get_ack_policy __P((DB_ENV *, int *)); + */ +int +__repmgr_get_ack_policy(dbenv, policy) + DB_ENV *dbenv; + int *policy; +{ + ENV *env; + DB_REP *db_rep; + REP *rep; + + env = dbenv->env; + db_rep = env->rep_handle; + rep = db_rep->region; + + *policy = REP_ON(env) ? rep->perm_policy : db_rep->perm_policy; + return (0); +} + +/* + * PUBLIC: int __repmgr_env_create __P((ENV *, DB_REP *)); + */ +int +__repmgr_env_create(env, db_rep) + ENV *env; + DB_REP *db_rep; +{ + int ret; + + /* Set some default values. */ + db_rep->ack_timeout = DB_REPMGR_DEFAULT_ACK_TIMEOUT; + db_rep->connection_retry_wait = DB_REPMGR_DEFAULT_CONNECTION_RETRY; + db_rep->election_retry_wait = DB_REPMGR_DEFAULT_ELECTION_RETRY; + db_rep->config_nsites = 0; + db_rep->perm_policy = DB_REPMGR_ACKS_QUORUM; + FLD_SET(db_rep->config, REP_C_ELECTIONS); + FLD_SET(db_rep->config, REP_C_2SITE_STRICT); + + db_rep->self_eid = DB_EID_INVALID; + db_rep->listen_fd = INVALID_SOCKET; + TAILQ_INIT(&db_rep->connections); + TAILQ_INIT(&db_rep->retries); + + db_rep->input_queue.size = 0; + STAILQ_INIT(&db_rep->input_queue.header); + + __repmgr_env_create_pf(db_rep); + ret = __repmgr_create_mutex(env, &db_rep->mutex); + + return (ret); +} + +/* + * PUBLIC: void __repmgr_env_destroy __P((ENV *, DB_REP *)); + */ +void +__repmgr_env_destroy(env, db_rep) + ENV *env; + DB_REP *db_rep; +{ + if (db_rep->restored_list != NULL) + __os_free(env, db_rep->restored_list); + (void)__repmgr_queue_destroy(env); + __repmgr_net_destroy(env, db_rep); + if (db_rep->messengers != NULL) { + __os_free(env, db_rep->messengers); + db_rep->messengers = NULL; + } + if (db_rep->mutex != NULL) { + (void)__repmgr_destroy_mutex(env, db_rep->mutex); + db_rep->mutex = NULL; + } +} + +/* + * PUBLIC: int __repmgr_stop_threads __P((ENV *)); + * + * Caller must hold mutex; + */ +int +__repmgr_stop_threads(env) + ENV *env; +{ + DB_REP *db_rep; + int ret; + + db_rep = env->rep_handle; + + db_rep->finished = TRUE; + if ((ret = __repmgr_signal(&db_rep->check_election)) != 0) + return (ret); + + /* + * Because we've set "finished", it's enough to wake msg_avail, even on + * Windows. (We don't need to wake per-thread Event Objects here, as we + * did in the case of only wanting to stop a subset of msg threads.) + */ + if ((ret = __repmgr_signal(&db_rep->msg_avail)) != 0) + return (ret); + + if ((ret = __repmgr_each_connection(env, + kick_blockers, NULL, TRUE)) != 0) + return (ret); + + return (__repmgr_wake_main_thread(env)); +} + +static int +kick_blockers(env, conn, unused) + ENV *env; + REPMGR_CONNECTION *conn; + void *unused; +{ + int ret, t_ret; + + COMPQUIET(unused, NULL); + + ret = __repmgr_signal(&conn->drained); + if ((t_ret = __repmgr_wake_waiters(env, + &conn->response_waiters)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * "Joins" all repmgr background threads. + */ +static int +__repmgr_await_threads(env) + ENV *env; +{ + DB_REP *db_rep; + REPMGR_RUNNABLE *th; + REPMGR_SITE *site; + int ret, t_ret; + u_int i; + + db_rep = env->rep_handle; + ret = 0; + + /* + * First wait for the threads we started explicitly. Then wait for + * those "remote descendent" threads that these first threads may have + * started. This order is important, because, for example, the select + * thread, in its last gasp, may have started yet another new instance + * of a connector thread. + */ + + /* Message processing threads. */ + for (i = 0; + i < db_rep->nthreads && db_rep->messengers[i] != NULL; i++) { + th = db_rep->messengers[i]; + if ((t_ret = __repmgr_thread_join(th)) != 0 && ret == 0) + ret = t_ret; + __os_free(env, th); + } + __os_free(env, db_rep->messengers); + db_rep->messengers = NULL; + + /* The select() loop thread. */ + if (db_rep->selector != NULL) { + if ((t_ret = __repmgr_thread_join(db_rep->selector)) != 0 && + ret == 0) + ret = t_ret; + __os_free(env, db_rep->selector); + db_rep->selector = NULL; + } + + /* Election threads. */ + for (i = 0; i < db_rep->aelect_threads; i++) { + th = db_rep->elect_threads[i]; + if (th != NULL) { + if ((t_ret = __repmgr_thread_join(th)) != 0 && ret == 0) + ret = t_ret; + __os_free(env, th); + } + } + __os_free(env, db_rep->elect_threads); + db_rep->aelect_threads = 0; + + /* Threads opening outgoing socket connections. */ + FOR_EACH_REMOTE_SITE_INDEX(i) { + LOCK_MUTEX(db_rep->mutex); + site = SITE_FROM_EID(i); + th = site->connector; + site->connector = NULL; + UNLOCK_MUTEX(db_rep->mutex); + if (th != NULL) { + if ((t_ret = __repmgr_thread_join(th)) != 0 && ret == 0) + ret = t_ret; + __os_free(env, th); + } + } + + return (ret); +} + +/* + * PUBLIC: int __repmgr_local_site __P((DB_ENV *, DB_SITE **)); + */ +int +__repmgr_local_site(dbenv, sitep) + DB_ENV *dbenv; + DB_SITE **sitep; +{ + DB_REP *db_rep; + ENV *env; + + env = dbenv->env; + db_rep = env->rep_handle; + + if (!IS_VALID_EID(db_rep->self_eid)) + return (DB_NOTFOUND); + return (__repmgr_site_by_eid(dbenv, db_rep->self_eid, sitep)); +} + +static int +addr_chk(env, host, port) + const ENV *env; + const char *host; + u_int port; +{ + if (host == NULL || host[0] == '\0') { + __db_errx(env, DB_STR("3648", + "repmgr_site: a host name is required")); + return (EINVAL); + } + if (port == 0 || port > UINT16_MAX) { + __db_errx(env, DB_STR_A("3649", + "repmgr_site: port out of range [1,%u]", "%u"), UINT16_MAX); + return (EINVAL); + } + return (0); +} + +/* + * PUBLIC: int __repmgr_channel __P((DB_ENV *, int, DB_CHANNEL **, u_int32_t)); + */ +int +__repmgr_channel(dbenv, eid, dbchannelp, flags) + DB_ENV *dbenv; + int eid; + DB_CHANNEL **dbchannelp; + u_int32_t flags; +{ + ENV *env; + DB_THREAD_INFO *ip; + REP *rep; + DB_REP *db_rep; + DB_CHANNEL *dbchannel; + CHANNEL *channel; + REPMGR_CONNECTION *conn; + int cur_eid, master, ret; + + channel = NULL; + dbchannel = NULL; + conn = NULL; + + env = dbenv->env; + if ((ret = __db_fchk(env, "DB_ENV->repmgr_channel", flags, 0)) != 0) + return (ret); + db_rep = env->rep_handle; + rep = db_rep->region; + + if (db_rep->selector == NULL) { + __db_errx(env, DB_STR("3650", + "DB_ENV->repmgr_channel: must be called after DB_ENV->repmgr_start")); + return (EINVAL); + } + /* + * Note that repmgr_start() checks DB_INIT_REP, ENV_THREAD and + * APP_IS_BASEAPI. + */ + if (db_rep->finished) { + __db_errx(env, DB_STR("3651", "repmgr is shutting down")); + return (EINVAL); + } + + if (eid == DB_EID_MASTER) { + if ((master = rep->master_id) == DB_EID_INVALID) + return (DB_REP_UNAVAIL); + cur_eid = master; + } else if (IS_KNOWN_REMOTE_SITE(eid)) + cur_eid = eid; + else { + __db_errx(env, DB_STR_A("3652", + "%d is not a valid remote EID", "%d"), eid); + return (EINVAL); + } + + ENV_ENTER(env, ip); + if ((ret = __os_calloc(env, 1, sizeof(DB_CHANNEL), &dbchannel)) != 0 || + (ret = __os_calloc(env, 1, sizeof(CHANNEL), &channel)) != 0) + goto err; + dbchannel->channel = channel; + channel->db_channel = dbchannel; + channel->env = env; + + /* Preserve EID as passed by the caller (not cur_eid). */ + dbchannel->eid = eid; + dbchannel->timeout = DB_REPMGR_DEFAULT_CHANNEL_TIMEOUT; + + dbchannel->close = __repmgr_channel_close; + dbchannel->send_msg = __repmgr_send_msg; + dbchannel->send_request = __repmgr_send_request; + dbchannel->set_timeout = __repmgr_channel_timeout; + + if (cur_eid != db_rep->self_eid && + (ret = establish_connection(env, cur_eid, &conn)) != 0) + goto err; + + if (IS_VALID_EID(eid)) { + DB_ASSERT(env, conn != NULL); + channel->c.conn = conn; + } else { + /* + * If the configured EID is one of the special ones (MASTER or + * BROADCAST) we need a mutex for dynamic messing with + * connections that could happen later. + */ + if ((ret = __repmgr_create_mutex(env, + &channel->c.conns.mutex)) != 0) + goto err; + + if (conn != NULL) { + /* + * Allocate enough array elements to use cur_eid as an + * index; save the number of slots allocated as "cnt." + */ + if ((ret = __os_calloc(env, + (u_int)cur_eid + 1, sizeof(REPMGR_CONNECTION *), + &channel->c.conns.array)) != 0) + goto err; + channel->c.conns.cnt = (u_int)cur_eid + 1; + channel->c.conns.array[cur_eid] = conn; + } + } + + if (conn != NULL) { + LOCK_MUTEX(db_rep->mutex); + conn->ref_count++; + UNLOCK_MUTEX(db_rep->mutex); + } + + *dbchannelp = dbchannel; + +err: + if (ret != 0) { + if (conn != NULL) + (void)__repmgr_disable_connection(env, conn); + if (channel != NULL) { + if (!IS_VALID_EID(eid) && + channel->c.conns.mutex != NULL) + (void)__repmgr_destroy_mutex(env, + channel->c.conns.mutex); + __os_free(env, channel); + } + if (dbchannel != NULL) + __os_free(env, dbchannel); + } + ENV_LEAVE(env, ip); + return (ret); +} + +static int +get_shared_netaddr(env, eid, netaddr) + ENV *env; + int eid; + repmgr_netaddr_t *netaddr; +{ + DB_REP *db_rep; + REP *rep; + REGINFO *infop; + SITEINFO *base, *p; + int ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + + MUTEX_LOCK(env, rep->mtx_repmgr); + + if ((u_int)eid >= rep->site_cnt) { + ret = DB_NOTFOUND; + goto err; + } + DB_ASSERT(env, rep->siteinfo_off != INVALID_ROFF); + + infop = env->reginfo; + base = R_ADDR(infop, rep->siteinfo_off); + p = &base[eid]; + netaddr->host = R_ADDR(infop, p->addr.host); + netaddr->port = p->addr.port; + ret = 0; + +err: + MUTEX_UNLOCK(env, rep->mtx_repmgr); + return (ret); +} + +static int +establish_connection(env, eid, connp) + ENV *env; + int eid; + REPMGR_CONNECTION **connp; +{ + DB_REP *db_rep; + REPMGR_CONNECTION *conn; + DBT vi; + repmgr_netaddr_t netaddr; + __repmgr_msg_hdr_args msg_hdr; + __repmgr_version_confirmation_args conf; + int alloc, locked, ret, unused; + + db_rep = env->rep_handle; + alloc = locked = FALSE; + + if ((ret = get_shared_netaddr(env, eid, &netaddr)) != 0) + return (ret); + + if ((ret = __repmgr_connect(env, &netaddr, &conn, &unused)) != 0) + return (ret); + conn->type = APP_CONNECTION; + + /* Read a handshake msg, to get version confirmation and parameters. */ + if ((ret = __repmgr_read_conn(conn)) != 0) + goto out; + /* + * We can only get here after having read the full 9 bytes that we + * expect, so this can't fail. + */ + DB_ASSERT(env, conn->reading_phase == SIZES_PHASE); + ret = __repmgr_msg_hdr_unmarshal(env, &msg_hdr, + conn->msg_hdr_buf, __REPMGR_MSG_HDR_SIZE, NULL); + DB_ASSERT(env, ret == 0); + __repmgr_iovec_init(&conn->iovecs); + conn->reading_phase = DATA_PHASE; + + if ((ret = __repmgr_prepare_simple_input(env, conn, &msg_hdr)) != 0) + goto out; + alloc = TRUE; + + if ((ret = __repmgr_read_conn(conn)) != 0) + goto out; + + /* + * Analyze the handshake msg, and stash relevant info. + */ + if ((ret = __repmgr_find_version_info(env, conn, &vi)) != 0) + goto out; + DB_ASSERT(env, vi.size > 0); + if ((ret = __repmgr_version_confirmation_unmarshal(env, + &conf, vi.data, vi.size, NULL)) != 0) + goto out; + + if (conf.version < CHANNEL_MIN_VERSION) { + ret = DB_REP_UNAVAIL; + goto out; + } + + conn->version = conf.version; + + if ((ret = __repmgr_send_handshake(env, + conn, NULL, 0, APP_CHANNEL_CONNECTION)) != 0) + goto out; + conn->state = CONN_READY; + __repmgr_reset_for_reading(conn); + if ((ret = __repmgr_set_nonblock_conn(conn)) != 0) { + __db_err(env, ret, DB_STR("3653", "set_nonblock channel")); + goto out; + } + + /* + * Turn over the responsibility for reading on this connection to the + * select() thread. + */ + LOCK_MUTEX(db_rep->mutex); + locked = TRUE; + if ((ret = __repmgr_wake_main_thread(env)) != 0) + goto out; + + /* + * Share this new connection with the select thread, which will + * hereafter own the exclusive right to read input from it. Once we get + * past this point, we can't unilaterally close and destroy the + * connection if a retryable connection error happens. Fortunately, + * we're now at the point where everything has succeeded; so there will + * be no more errors. + */ + TAILQ_INSERT_TAIL(&db_rep->connections, conn, entries); + conn->ref_count++; + *connp = conn; + +out: + if (locked) + UNLOCK_MUTEX(db_rep->mutex); + + if (ret != 0) { + /* + * Since we can't have given the connection to the select() + * thread yet, clean-up is as simple as this: + */ + (void)__repmgr_close_connection(env, conn); + (void)__repmgr_destroy_conn(env, conn); + } + + if (alloc) { + DB_ASSERT(env, conn->input.repmgr_msg.cntrl.size > 0); + __os_free(env, conn->input.repmgr_msg.cntrl.data); + DB_ASSERT(env, conn->input.repmgr_msg.rec.size > 0); + __os_free(env, conn->input.repmgr_msg.rec.data); + } + return (ret); +} + +/* + * PUBLIC: int __repmgr_set_msg_dispatch __P((DB_ENV *, + * PUBLIC: void (*)(DB_ENV *, DB_CHANNEL *, DBT *, u_int32_t, u_int32_t), + * PUBLIC: u_int32_t)); + */ +int +__repmgr_set_msg_dispatch(dbenv, dispatch, flags) + DB_ENV *dbenv; + void (*dispatch) __P((DB_ENV *, + DB_CHANNEL *, DBT *, u_int32_t, u_int32_t)); + u_int32_t flags; +{ + ENV *env; + DB_REP *db_rep; + DB_THREAD_INFO *ip; + int ret; + + env = dbenv->env; + if ((ret = __db_fchk(env, + "DB_ENV->repmgr_msg_dispatch", flags, 0)) != 0) + return (ret); + if (APP_IS_BASEAPI(env)) + return (repmgr_only(env, "repmgr_msg_dispatch")); + + db_rep = env->rep_handle; + db_rep->msg_dispatch = dispatch; + APP_SET_REPMGR(env); + return (0); +} + +/* + * Implementation of DB_CHANNEL->send_msg() method for use in a normal channel + * explicitly created by the message-originator application. + * + * PUBLIC: int __repmgr_send_msg __P((DB_CHANNEL *, + * PUBLIC: DBT *, u_int32_t, u_int32_t)); + */ +int +__repmgr_send_msg(db_channel, msg, nmsg, flags) + DB_CHANNEL *db_channel; + DBT *msg; + u_int32_t nmsg; + u_int32_t flags; +{ + ENV *env; + DB_THREAD_INFO *ip; + CHANNEL *channel; + REPMGR_CONNECTION *conn; + int ret; + + channel = db_channel->channel; + env = channel->env; + if ((ret = __db_fchk(env, + "DB_CHANNEL->send_msg", flags, 0)) != 0) + return (ret); + + ENV_ENTER(env, ip); + if ((ret = get_channel_connection(channel, &conn)) == 0) + ret = send_msg_conn(env, conn, msg, nmsg); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * Sends an async msg on the given connection (or just copies it locally if conn + * is NULL, since that means we're "sending to the master" when we ourselves are + * the master). + */ +static int +send_msg_conn(env, conn, msg, nmsg) + ENV *env; + REPMGR_CONNECTION *conn; + DBT *msg; + u_int32_t nmsg; +{ + DB_REP *db_rep; + REPMGR_IOVECS *iovecs; + __repmgr_msg_metadata_args meta; + int ret; + + db_rep = env->rep_handle; + memset(&meta, 0, sizeof(meta)); + if (conn == NULL) { + /* Sending to DB_EID_MASTER when we ourselves are master. */ + if ((ret = __repmgr_build_data_out(env, + msg, nmsg, &meta, &iovecs)) != 0) + return (ret); + ret = send_msg_self(env, iovecs, nmsg); + } else { + if ((ret = __repmgr_build_msg_out(env, + msg, nmsg, &meta, &iovecs)) != 0) + return (ret); + LOCK_MUTEX(db_rep->mutex); + ret = __repmgr_send_many(env, conn, iovecs, 0); + UNLOCK_MUTEX(db_rep->mutex); + } + + __os_free(env, iovecs); + return (ret); +} + +/* + * Simulate sending by simply copying the message into a msg struct to be + * queued. On input, iovecs is ready to "send", with first slot set aside for + * message header. + */ +static int +send_msg_self(env, iovecs, nmsg) + ENV *env; + REPMGR_IOVECS *iovecs; + u_int32_t nmsg; +{ + REPMGR_MESSAGE *msg; + size_t align, bodysize, structsize; + u_int8_t *membase; + int ret; + + align = sizeof(double); + bodysize = iovecs->total_bytes - __REPMGR_MSG_HDR_SIZE; + structsize = (size_t)DB_ALIGN((size_t)(sizeof(REPMGR_MESSAGE) + + nmsg * sizeof(DBT)), align); + if ((ret = __os_malloc(env, structsize + bodysize, &membase)) != 0) + return (ret); + + msg = (void*)membase; + membase += structsize; + + /* + * Build a msg struct that looks like what would be received in the + * usual case. + */ + msg->msg_hdr.type = REPMGR_APP_MESSAGE; + APP_MSG_BUFFER_SIZE(msg->msg_hdr) = (u_int32_t)bodysize; + APP_MSG_SEGMENT_COUNT(msg->msg_hdr) = nmsg; + + msg->v.appmsg.conn = NULL; + + /* + * The "buf" is the message body (as [if] transmitted); i.e., it + * excludes the header (which we've just constructed separately). So, + * skip over slot 0 in the iovecs, which had been reserved for the hdr. + */ + DB_INIT_DBT(msg->v.appmsg.buf, membase, bodysize); + copy_body(membase, iovecs); + + return (__repmgr_queue_put(env, msg)); +} + +/* + * Copies a message body into a single contiguous buffer. The given iovecs is + * assumed to have the first slot reserved for a message header, and we skip + * that part. + */ +static void +copy_body(membase, iovecs) + u_int8_t *membase; + REPMGR_IOVECS *iovecs; +{ + size_t sz; + int i; + + for (i = 1; i < iovecs->count; i++) { + if ((sz = (size_t)iovecs->vectors[i].iov_len) > 0) { + memcpy(membase, iovecs->vectors[i].iov_base, sz); + membase += sz; + } + } +} + +/* + * Gets a connection to be used for sending, either an async message or a + * request. On a DB_EID_MASTER channel this entails checking the current + * master, and possibly opening a new connection if the master has changed. + * Allow an old connection to stay intact, because responses to previous + * requests could still be arriving (though often the connection will have died + * anyway, if the master changed due to failure of the old master). + * + * If the local site is currently master, then for a master channel we return + * (via connp) a NULL pointer. + */ +static int +get_channel_connection(channel, connp) + CHANNEL *channel; + REPMGR_CONNECTION **connp; +{ + ENV *env; + DB_REP *db_rep; + REP *rep; + REPMGR_CONNECTION *conn; + DB_CHANNEL *db_channel; + int eid, ret; + + env = channel->env; + db_rep = env->rep_handle; + rep = db_rep->region; + db_channel = channel->db_channel; + + /* + * On a specific-EID channel it's very simple, because there is only + * ever one connection, which was established when the channel was + * created. + */ + if (db_channel->eid >= 0) { + *connp = channel->c.conn; + return (0); + } + + /* + * For now we only support one connection at a time. When we support + * DB_EID_BROADCAST channels in the future, we will have to loop through + * all connected sites. + */ + DB_ASSERT(env, db_channel->eid == DB_EID_MASTER); + eid = rep->master_id; + if (eid == db_rep->self_eid) { + *connp = NULL; + return (0); + } + if (eid == DB_EID_INVALID) + return (DB_REP_UNAVAIL); + + LOCK_MUTEX(channel->c.conns.mutex); + if ((u_int)eid >= channel->c.conns.cnt) { + /* + * Allocate an array big enough such that `eid' is a valid + * index; initialize the newly allocated (tail) portion. + */ + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Grow master-channel array to accommodate EID %d", eid)); + if ((ret = __os_realloc(env, + sizeof(REPMGR_CONNECTION *) * ((u_int)eid + 1), + &channel->c.conns.array)) != 0) + goto out; + memset(&channel->c.conns.array[channel->c.conns.cnt], + 0, + sizeof(REPMGR_CONNECTION *) * + (((u_int)eid + 1) - channel->c.conns.cnt)); + channel->c.conns.cnt = (u_int)eid + 1; + } + DB_ASSERT(env, (u_int)eid < channel->c.conns.cnt); + + if ((conn = channel->c.conns.array[eid]) == NULL) { + if ((ret = establish_connection(env, eid, &conn)) != 0) + goto out; + + /* + * Even though `conn' is a newly created object, by the time we + * get here it has already been given out to the select() + * thread, so we should hold the mutex while incrementing the + * ref count. + */ + LOCK_MUTEX(db_rep->mutex); + channel->c.conns.array[eid] = conn; + conn->ref_count++; + UNLOCK_MUTEX(db_rep->mutex); + } + + *connp = conn; + ret = 0; +out: + UNLOCK_MUTEX(channel->c.conns.mutex); + return (ret); +} + +/* + * PUBLIC: int __repmgr_send_request __P((DB_CHANNEL *, + * PUBLIC: DBT *, u_int32_t, DBT *, db_timeout_t, u_int32_t)); + */ +int +__repmgr_send_request(db_channel, request, nrequest, response, timeout, flags) + DB_CHANNEL *db_channel; + DBT *request; + u_int32_t nrequest; + DBT *response; + db_timeout_t timeout; + u_int32_t flags; +{ + ENV *env; + DB_THREAD_INFO *ip; + DB_REP *db_rep; + CHANNEL *channel; + REPMGR_CONNECTION *conn; + REPMGR_IOVECS *iovecs; + REPMGR_RESPONSE *resp; + struct response_wait ctx; + __repmgr_msg_metadata_args meta; + size_t sz; + void *dummy; + u_int32_t i, n; + int ret; + + channel = db_channel->channel; + env = channel->env; + db_rep = env->rep_handle; + + if ((ret = __db_fchk(env, + "DB_CHANNEL->send_request", flags, DB_MULTIPLE)) != 0) + return (ret); + + if (db_channel->eid == DB_EID_BROADCAST) { + __db_errx(env, DB_STR("3654", + "DB_CHANNEL->send_request() not supported on DB_EID_BROADCAST channel")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + ret = get_channel_connection(channel, &conn); + ENV_LEAVE(env, ip); + if (ret != 0) + return (ret); + + if (conn == NULL) + return (request_self(env, request, nrequest, response, flags)); + + /* Find an available array slot, or grow the array if necessary. */ + LOCK_MUTEX(db_rep->mutex); + for (i = 0; i < conn->aresp; i++) + if (!(F_ISSET(&conn->responses[i], RESP_IN_USE))) + break; + if (i == conn->aresp) { + n = conn->aresp == 0 ? 1 : conn->aresp * 2; + ret = __os_realloc(env, + sizeof(REPMGR_RESPONSE) * n, &conn->responses); + memset(&conn->responses[i], 0, + sizeof(REPMGR_RESPONSE) * (n - i)); + conn->aresp = n; + } + resp = &conn->responses[i]; + resp->flags = RESP_IN_USE | RESP_THREAD_WAITING; + resp->dbt = *response; + resp->ret = 0; + UNLOCK_MUTEX(db_rep->mutex); + + /* + * The index "i" is stable, but the address in the "resp" pointer could + * change while we drop the mutex, if another thread has to grow the + * allocated array. So we can't use "resp" again until after we set it + * again, from "i", under mutex protection. + */ + + meta.tag = i; + meta.flags = REPMGR_REQUEST_MSG_TYPE | + (LF_ISSET(DB_MULTIPLE) ? REPMGR_MULTI_RESP : 0) | + (F_ISSET(response, DB_DBT_USERMEM) ? REPMGR_RESPONSE_LIMIT : 0); + meta.limit = response->ulen; + + /* + * Build an iovecs structure describing the request message, and then + * send it. + */ + if ((ret = __repmgr_build_msg_out(env, + request, nrequest, &meta, &iovecs)) != 0) { + /* + * Since we haven't sent the message yet, there's no chance the + * select thread has started relying on the REPMGR_RESPONSE, so + * it's easy to deallocate it. + */ + LOCK_MUTEX(db_rep->mutex); + F_CLR(&conn->responses[i], RESP_IN_USE | RESP_THREAD_WAITING); + UNLOCK_MUTEX(db_rep->mutex); + return (ret); + } + + timeout = timeout > 0 ? timeout : db_channel->timeout; + LOCK_MUTEX(db_rep->mutex); + ret = __repmgr_send_many(env, conn, iovecs, timeout); + if (ret == DB_TIMEOUT) + F_CLR(&conn->responses[i], RESP_IN_USE | RESP_THREAD_WAITING); + UNLOCK_MUTEX(db_rep->mutex); + __os_free(env, iovecs); + if (ret != 0) { + /* + * An error while writing will force the connection to be + * closed, busted, abandoned. Since there could be a few app + * threads waiting, *any* abandoning of a connection will have + * to wake up those threads, with a COMPLETE indication and an + * error code. That's more than we want to tackle here. + */ + return (ret); + } + + /* + * Here, we've successfully sent the request. Once we've gotten this + * far, the select thread owns the REPMGR_RESPONSE slot until it marks + * it complete. + */ + ctx.conn = conn; + ctx.index = i; + LOCK_MUTEX(db_rep->mutex); + ret = __repmgr_await_cond(env, + response_complete, &ctx, timeout, &conn->response_waiters); + + resp = &conn->responses[i]; + if (ret == 0) { + DB_ASSERT(env, F_ISSET(resp, RESP_COMPLETE)); + *response = resp->dbt; + if ((ret = resp->ret) == 0 && LF_ISSET(DB_MULTIPLE)) + adjust_bulk_response(env, response); + F_CLR(resp, RESP_IN_USE | RESP_THREAD_WAITING); + + } else { + F_CLR(resp, RESP_THREAD_WAITING); + if (ret == DB_TIMEOUT && F_ISSET(resp, RESP_READING)) { + /* + * The select thread is in the midst of reading the + * response, but we're about to yank the buffer out from + * under it. So, replace it with a dummy buffer. + * (There's no way to abort the reading of a message + * part-way through.) + * + * Notice that whatever buffer the user is getting back, + * including her own in the case of USERMEM, may already + * have some partial data written into it. + * + * We always read responses in just one single chunk, so + * figuring out the needed buffer size is fairly simple. + */ + DB_ASSERT(env, conn->iovecs.offset == 0 && + conn->iovecs.count == 1); + sz = conn->iovecs.vectors[0].iov_len; + + if ((ret = __os_malloc(env, sz, &dummy)) != 0) + goto out; + __repmgr_iovec_init(&conn->iovecs); + DB_INIT_DBT(resp->dbt, dummy, sz); + __repmgr_add_dbt(&conn->iovecs, &resp->dbt); + F_SET(resp, RESP_DUMMY_BUF); + } + } + +out: + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} + +static int +response_complete(env, ctx) + ENV *env; + void *ctx; +{ + REPMGR_CONNECTION *conn; + struct response_wait *rw; + + COMPQUIET(env, NULL); + + rw = ctx; + conn = rw->conn; + return (F_ISSET(&conn->responses[rw->index], RESP_COMPLETE) || + conn->state == CONN_DEFUNCT); +} + +/* + * "Send" a request to ourselves, by invoking the application's call-back + * function directly, in the case where a channel directed to DB_EID_MASTER is + * used on a master. + */ +static int +request_self(env, request, nrequest, response, flags) + ENV *env; + DBT *request; + u_int32_t nrequest; + DBT *response; + u_int32_t flags; +{ + DB_REP *db_rep; + DB_CHANNEL db_channel; + CHANNEL channel; + __repmgr_msg_metadata_args meta; + + db_rep = env->rep_handle; + if (db_rep->msg_dispatch == NULL) { + __db_errx(env, DB_STR("3655", + "No message dispatch call-back function has been configured")); + return (DB_NOSERVER); + } + + db_channel.channel = &channel; + db_channel.send_msg = __repmgr_send_response; + + /* Supply stub functions for methods inapplicable in msg disp func. */ + db_channel.close = __repmgr_channel_close_inval; + db_channel.send_request = __repmgr_send_request_inval; + db_channel.set_timeout = __repmgr_channel_timeout_inval; + + channel.env = env; + channel.c.conn = NULL; + channel.responded = FALSE; + channel.meta = &meta; + channel.response.dbt = *response; + + meta.flags = REPMGR_REQUEST_MSG_TYPE | + (LF_ISSET(DB_MULTIPLE) ? REPMGR_MULTI_RESP : 0) | + (F_ISSET(response, DB_DBT_USERMEM) ? REPMGR_RESPONSE_LIMIT : 0); + meta.limit = response->ulen; + + (*db_rep->msg_dispatch)(env->dbenv, + &db_channel, request, nrequest, DB_REPMGR_NEED_RESPONSE); + + if (!channel.responded) { + __db_errx(env, DB_STR("3656", + "Application failed to provide a response")); + return (DB_KEYEMPTY); + } else { + response->data = channel.response.dbt.data; + response->size = channel.response.dbt.size; + if (LF_ISSET(DB_MULTIPLE)) + adjust_bulk_response(env, response); + } + return (0); +} + +static void +adjust_bulk_response(env, response) + ENV *env; + DBT *response; +{ + u_int32_t n, *p; + +#ifndef DIAGNOSTIC + COMPQUIET(env, NULL); +#endif + + /* + * Convert bulk-buffer segment info to host byte-order, and count + * segments. See the definition of DB_MULTIPLE_INIT for a reminder of + * the structure of a bulk buffer. Each segment has both an offset and + * a length, so "n" ends up as the number of u_int32_t words we (might) + * need to shuffle, below. + */ + p = (u_int32_t *)((u_int8_t *)response->data + + response->size - sizeof(u_int32_t)); + for (n = 1; *p != (u_int32_t)-1; p -= 2) { + DB_ASSERT(env, p > (u_int32_t *)response->data); + p[0] = ntohl(p[0]); + p[-1] = ntohl(p[-1]); + n += 2; + } + /* + * The bulk pointers appear at the end of the transmitted response, so + * unless the buffer happened to be exactly the right size we need to + * shuffle them to the end of the buffer. + */ + if (F_ISSET(response, DB_DBT_USERMEM)) + memmove((u_int8_t *)response->data + + response->ulen - n * sizeof(u_int32_t), + p, n * sizeof(u_int32_t)); + else + response->ulen = response->size; +} + +/* + * Implementation of DB_CHANNEL->send_msg() method for use in recipient's msg + * dispatch callback function. + * + * PUBLIC: int __repmgr_send_response __P((DB_CHANNEL *, + * PUBLIC: DBT *, u_int32_t, u_int32_t)); + */ +int +__repmgr_send_response(db_channel, msg, nmsg, flags) + DB_CHANNEL *db_channel; + DBT *msg; + u_int32_t nmsg; + u_int32_t flags; +{ + ENV *env; + DB_REP *db_rep; + CHANNEL *channel; + REPMGR_CONNECTION *conn; + REPMGR_IOVECS iovecs, *iovecsp; + DBT *dbt; + __repmgr_msg_hdr_args msg_hdr; + u_int8_t msg_hdr_buf[__REPMGR_MSG_HDR_SIZE], *msg_hdr_buf_p; + size_t sz; + int alloc, ret; + + COMPQUIET(iovecsp, NULL); + + channel = db_channel->channel; + env = channel->env; + db_rep = env->rep_handle; + conn = channel->c.conn; + + if ((ret = __db_fchk(env, + "DB_CHANNEL->send_msg", flags, 0)) != 0) + return (ret); + + if (!F_ISSET(channel->meta, REPMGR_REQUEST_MSG_TYPE)) + return (send_msg_conn(env, conn, msg, nmsg)); + + if (channel->responded) { + __db_errx(env, DB_STR("3657", + "a response has already been sent")); + return (EINVAL); + } + + alloc = FALSE; + if (F_ISSET(channel->meta, REPMGR_MULTI_RESP)) { + /* + * Originator accepts bulk format: response can be any number of + * segments. + */ + if ((ret = __repmgr_build_data_out(env, + msg, nmsg, NULL, &iovecsp)) != 0) + goto out; + alloc = TRUE; + + /* + * Set buffer pointer to space we "know" build_data_out reserved + * for us. + */ + msg_hdr_buf_p = (u_int8_t *)iovecsp->vectors[0].iov_base; + msg_hdr.type = REPMGR_APP_RESPONSE; + APP_RESP_TAG(msg_hdr) = channel->meta->tag; + APP_RESP_BUFFER_SIZE(msg_hdr) = + (u_int32_t)(iovecsp->total_bytes - __REPMGR_MSG_HDR_SIZE); + __repmgr_msg_hdr_marshal(env, &msg_hdr, msg_hdr_buf_p); + } else if (nmsg > 1) { + __db_errx(env, DB_STR("3658", + "originator does not accept multi-segment response")); + goto small; + } else { + iovecsp = &iovecs; + __repmgr_iovec_init(iovecsp); + msg_hdr.type = REPMGR_APP_RESPONSE; + APP_RESP_TAG(msg_hdr) = channel->meta->tag; + __repmgr_add_buffer(iovecsp, + msg_hdr_buf, __REPMGR_MSG_HDR_SIZE); + if (nmsg == 0) + APP_RESP_BUFFER_SIZE(msg_hdr) = 0; + else if ((APP_RESP_BUFFER_SIZE(msg_hdr) = msg->size) > 0) + __repmgr_add_dbt(iovecsp, msg); + __repmgr_msg_hdr_marshal(env, &msg_hdr, msg_hdr_buf); + } + + if (F_ISSET(channel->meta, REPMGR_RESPONSE_LIMIT) && + (APP_RESP_BUFFER_SIZE(msg_hdr) > channel->meta->limit)) { + __db_errx(env, DB_STR("3659", + "originator's USERMEM buffer too small")); +small: + if (conn == NULL) + channel->response.ret = DB_BUFFER_SMALL; + else + (void)__repmgr_send_err_resp(env, + channel, DB_BUFFER_SMALL); + ret = EINVAL; + } else { + if (conn == NULL) { + sz = APP_RESP_BUFFER_SIZE(msg_hdr); + dbt = &channel->response.dbt; + if (F_ISSET(dbt, DB_DBT_MALLOC)) + ret = __os_umalloc(env, sz, &dbt->data); + else if (F_ISSET(dbt, DB_DBT_REALLOC)) { + if (dbt->data == NULL || dbt->size < sz) + ret = __os_urealloc(env, + sz, &dbt->data); + else + ret = 0; + } + dbt->size = (u_int32_t)sz; + copy_body(dbt->data, iovecsp); + channel->response.ret = 0; + ret = 0; + } else { + LOCK_MUTEX(db_rep->mutex); + ret = __repmgr_send_many(env, conn, iovecsp, 0); + UNLOCK_MUTEX(db_rep->mutex); + } + } + +out: + if (alloc) + __os_free(env, iovecsp); + + /* + * Once we've handed the tag back to the originator it becomes + * meaningless, so we can't use it again. Note the fact that we've + * responded, so that we don't try. + */ + channel->responded = TRUE; + + return (ret); +} + +static int +__repmgr_build_msg_out(env, msg, nmsg, meta, iovecsp) + ENV *env; + DBT *msg; + u_int32_t nmsg; + __repmgr_msg_metadata_args *meta; + REPMGR_IOVECS **iovecsp; +{ + REPMGR_IOVECS *iovecs; + __repmgr_msg_hdr_args msg_hdr; + u_int8_t *msg_hdr_buf; + int ret; + + if ((ret = __repmgr_build_data_out(env, msg, nmsg, meta, &iovecs)) != 0) + return (ret); + + /* + * The IOVECS holds the entire message to be transmitted, including the + * 9-byte header. The header contains the length of the remaining part + * of the message. The header buffer area is of course pointed to by + * the first of the io vectors. + */ + msg_hdr_buf = (u_int8_t *)iovecs->vectors[0].iov_base; + msg_hdr.type = REPMGR_APP_MESSAGE; + APP_MSG_BUFFER_SIZE(msg_hdr) = + (u_int32_t)(iovecs->total_bytes - __REPMGR_MSG_HDR_SIZE); + APP_MSG_SEGMENT_COUNT(msg_hdr) = nmsg; + __repmgr_msg_hdr_marshal(env, &msg_hdr, msg_hdr_buf); + + *iovecsp = iovecs; + return (0); +} + +/* + * Allocate and build most of an outgoing message, leaving it up to the caller + * to fill in the header afterwards. + */ +static int +__repmgr_build_data_out(env, msg, nmsg, meta, iovecsp) + ENV *env; + DBT *msg; + u_int32_t nmsg; + __repmgr_msg_metadata_args *meta; + REPMGR_IOVECS **iovecsp; +{ + REPMGR_IOVECS *iovecs; + u_int32_t *bulk_base, *bulk_ptr, i, n; + u_int8_t *membase, *meta_buf, *msg_hdr_buf, *p, *pad; + void *inc_p; + size_t align, bulk_area_sz, memsize, segments, sz, offset; + int ret; + + COMPQUIET(pad, NULL); + + /* + * The actual message as it will be sent on the wire is composed of the + * following parts: + * + * (a) the 9-byte header + * (b) for each msg DBT ('nmsg' of them): + * (b.1) the data itself, and + * (b.2) an alignment pad, if necessary + * (c) trailing section for bulk-style pointers (2 words per segment, + * plus a -1 end-marker) + * (d) message meta-data (optionally) + * + * Note that nmsg could be 0. + */ + + /* First, count how many segments need padding. */ + n = 0; + align = sizeof(double); + for (i = 0; i < nmsg; i++) { + p = msg[i].data; + p = &p[msg[i].size]; + inc_p = ALIGNP_INC(p, align); + if ((u_int8_t *)inc_p > p) + n++; + } + + /* + * Here we allocate memory to hold the actual pieces of the message we + * will send, plus the iovecs structure that points to those pieces. We + * don't include the memory for the user's data (item (b.1) from the + * above explanation), since the user is supplying them directly. Also + * note that we can reuse just one padding buffer even if we need to + * send it (i.e., point to it from an iovec) more than once. + * + * According to the list of message segments explained above, the total + * number of iovec elements we need is (1 + nmsg + n + 1 + f(meta)). + */ + segments = nmsg + n + (meta == NULL ? 2 : 3); + sz = segments > MIN_IOVEC ? REPMGR_IOVECS_ALLOC_SZ(segments) : + sizeof(REPMGR_IOVECS); + + bulk_area_sz = (nmsg * 2 + 1) * sizeof(u_int32_t); + memsize = sz + __REPMGR_MSG_HDR_SIZE + + bulk_area_sz + (n > 0 ? align : 0) + __REPMGR_MSG_METADATA_SIZE; + + if ((ret = __os_malloc(env, memsize, &membase)) != 0) + return (ret); + p = membase; + iovecs = (REPMGR_IOVECS *)p; + p += sz; + bulk_base = (u_int32_t *)p; + p += bulk_area_sz; + if (n > 0) { + pad = p; + memset(pad, 0, align); + p += align; + } + msg_hdr_buf = p; + p += __REPMGR_MSG_HDR_SIZE; + meta_buf = p; + + /* + * The message header appears first (on the wire), so we have to add its + * buffer address to the iovec list first. But we don't actually + * compose the content; that's the responsibility of the caller, after + * we return. + */ + __repmgr_iovec_init(iovecs); + __repmgr_add_buffer(iovecs, msg_hdr_buf, __REPMGR_MSG_HDR_SIZE); + + offset = 0; + bulk_ptr = &bulk_base[2*nmsg + 1]; /* Work backward from the end. */ + for (i = 0; i < nmsg; i++) { + p = msg[i].data; + sz = (size_t)msg[i].size; + + /* + * Format of bulk pointers is similar to the usage of + * DB_MULTIPLE_NEXT, but note that the lengths we pass are of + * course for the actual data itself, not including any + * padding. + */ + *--bulk_ptr = htonl((u_long)offset); + *--bulk_ptr = htonl((u_long)sz); + + __repmgr_add_dbt(iovecs, &msg[i]); + offset += sz; + + p = &p[sz]; + inc_p = ALIGNP_INC(p, align); + if ((u_int8_t *)inc_p > p) { + DB_ASSERT(env, n > 0); + sz = (size_t)((u_int8_t *)inc_p - p); + DB_ASSERT(env, sz <= align); + __repmgr_add_buffer(iovecs, pad, sz); + offset += sz; + } + } + *--bulk_ptr = (u_int32_t)-1; + __repmgr_add_buffer(iovecs, bulk_ptr, bulk_area_sz); + + if (meta != NULL) { + __repmgr_msg_metadata_marshal(env, meta, meta_buf); + __repmgr_add_buffer(iovecs, + meta_buf, __REPMGR_MSG_METADATA_SIZE); + } + + *iovecsp = iovecs; + return (0); +} + +/* + * PUBLIC: int __repmgr_channel_close __P((DB_CHANNEL *, u_int32_t)); + */ +int +__repmgr_channel_close(dbchan, flags) + DB_CHANNEL *dbchan; + u_int32_t flags; +{ + ENV *env; + DB_REP *db_rep; + REPMGR_CONNECTION *conn; + CHANNEL *channel; + u_int32_t i; + int ret, t_ret; + + channel = dbchan->channel; + env = channel->env; + ret = __db_fchk(env, "DB_CHANNEL->close", flags, 0); + db_rep = env->rep_handle; + + /* + * Disable connection(s) (if not already done due to an error having + * occurred previously); release our reference to conn struct(s). + */ + LOCK_MUTEX(db_rep->mutex); + if (dbchan->eid >= 0) { + conn = channel->c.conn; + if (conn->state != CONN_DEFUNCT && + (t_ret = __repmgr_disable_connection(env, conn)) != 0 && + ret == 0) + ret = t_ret; + if ((t_ret = __repmgr_decr_conn_ref(env, conn)) != 0 && + ret == 0) + ret = t_ret; + } else if (channel->c.conns.cnt > 0) { + for (i = 0; i < channel->c.conns.cnt; i++) + if ((conn = channel->c.conns.array[i]) != NULL) { + if (conn->state != CONN_DEFUNCT && + (t_ret = __repmgr_disable_connection(env, + conn)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __repmgr_decr_conn_ref(env, + conn)) != 0 && ret == 0) + ret = t_ret; + } + __os_free(env, channel->c.conns.array); + } + UNLOCK_MUTEX(db_rep->mutex); + + if (!IS_VALID_EID(dbchan->eid) && channel->c.conns.mutex != NULL && + (t_ret = __repmgr_destroy_mutex(env, + channel->c.conns.mutex)) != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = __repmgr_wake_main_thread(env)) != 0 && ret == 0) + ret = t_ret; + + __os_free(env, channel); + __os_free(env, dbchan); + + return (ret); +} + +/* + * PUBLIC: int __repmgr_channel_timeout __P((DB_CHANNEL *, db_timeout_t)); + */ +int +__repmgr_channel_timeout(chan, timeout) + DB_CHANNEL *chan; + db_timeout_t timeout; +{ + chan->timeout = timeout; + return (0); +} + +/* + * PUBLIC: int __repmgr_send_request_inval __P((DB_CHANNEL *, + * PUBLIC: DBT *, u_int32_t, DBT *, db_timeout_t, u_int32_t)); + */ +int +__repmgr_send_request_inval(dbchan, request, nrequest, response, timeout, flags) + DB_CHANNEL *dbchan; + DBT *request; + u_int32_t nrequest; + DBT *response; + db_timeout_t timeout; + u_int32_t flags; +{ + COMPQUIET(request, NULL); + COMPQUIET(nrequest, 0); + COMPQUIET(response, NULL); + COMPQUIET(timeout, 0); + COMPQUIET(flags, 0); + return (bad_callback_method(dbchan, "send_request")); +} + +/* + * PUBLIC: int __repmgr_channel_close_inval __P((DB_CHANNEL *, u_int32_t)); + */ +int +__repmgr_channel_close_inval(dbchan, flags) + DB_CHANNEL *dbchan; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + return (bad_callback_method(dbchan, "close")); +} + +/* + * PUBLIC: int __repmgr_channel_timeout_inval __P((DB_CHANNEL *, db_timeout_t)); + */ +int +__repmgr_channel_timeout_inval(dbchan, timeout) + DB_CHANNEL *dbchan; + db_timeout_t timeout; +{ + COMPQUIET(timeout, 0); + return (bad_callback_method(dbchan, "set_timeout")); +} + +static int +bad_callback_method(chan, method) + DB_CHANNEL *chan; + const char *method; +{ + __db_errx(chan->channel->env, DB_STR_A("3660", + "%s() invalid on DB_CHANNEL supplied to msg dispatch function", + "%s"), method); + return (EINVAL); +} + +static int +repmgr_only(env, method) + ENV *env; + const char *method; +{ + __db_errx(env, DB_STR_A("3661", + "%s: cannot call from base replication application", + "%s"), method); + return (EINVAL); +} + +/* + * Attempts to join the replication group, by finding a remote "helper" site and + * sending a request message to it. + * + * PUBLIC: int __repmgr_join_group __P((ENV *)); + */ +int +__repmgr_join_group(env) + ENV *env; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + repmgr_netaddr_t addr; + u_int i; + int pass, ret; + + db_rep = env->rep_handle; + + /* + * Make two passes through the site list. On the first pass, try + * joining via an existing, fully "present" site whom we've found in the + * membership database. If that is fruitless, on the second pass try + * any site marked as a bootstrap helper. + * + * On the first attempt to join, when we have found no database, the + * first pass will produce nothing. On a later attempt to rejoin after + * having been removed, it's better to give priority to existing + * remaining sites from the database, and only rely on bootstrap helpers + * as a last resort. + * + * pass 0 => present members + * pass 1 => helpers + */ + LOCK_MUTEX(db_rep->mutex); + for (pass = 0; pass <= 1; pass++) { + FOR_EACH_REMOTE_SITE_INDEX(i) { + site = SITE_FROM_EID(i); + if (pass == 0 && site->membership != SITE_PRESENT) + continue; + if (pass == 1 && + !FLD_ISSET(site->config, DB_BOOTSTRAP_HELPER)) + continue; + addr = site->net_addr; + UNLOCK_MUTEX(db_rep->mutex); + if ((ret = join_group_at_site(env, + &addr)) == DB_REP_UNAVAIL) { + LOCK_MUTEX(db_rep->mutex); + continue; + } + return (ret); + } + } + UNLOCK_MUTEX(db_rep->mutex); + return (DB_REP_UNAVAIL); +} + +/* + * Sends a request message to another site, asking for permission to join the + * replication group. Ideally the other site is the master, because only the + * master can grant that request. But since we're not currently part of the + * group, we generally don't know which site is master. If the target site is + * not master, it will respond by telling us who is. + */ +static int +join_group_at_site(env, addrp) + ENV *env; + repmgr_netaddr_t *addrp; +{ + DB_REP *db_rep; + REPMGR_CONNECTION *conn; + SITE_STRING_BUFFER addr_buf; + repmgr_netaddr_t addr, myaddr; + __repmgr_gm_fwd_args fwd; + __repmgr_site_info_args site_info; + u_int8_t *p, *response_buf, siteinfo_buf[MAX_MSG_BUF]; + char host_buf[MAXHOSTNAMELEN + 1], *host; + u_int32_t gen, type; + size_t len; + int ret, t_ret; + + db_rep = env->rep_handle; + + LOCK_MUTEX(db_rep->mutex); + myaddr = SITE_FROM_EID(db_rep->self_eid)->net_addr; + UNLOCK_MUTEX(db_rep->mutex); + len = strlen(myaddr.host) + 1; + DB_INIT_DBT(site_info.host, myaddr.host, len); + site_info.port = myaddr.port; + site_info.flags = 0; + ret = __repmgr_site_info_marshal(env, + &site_info, siteinfo_buf, sizeof(siteinfo_buf), &len); + DB_ASSERT(env, ret == 0); + + conn = NULL; + response_buf = NULL; + gen = 0; + RPRINT(env, (env, DB_VERB_REPMGR_MISC, "try join request to site %s", + __repmgr_format_addr_loc(addrp, addr_buf))); +retry: + if ((ret = make_request_conn(env, addrp, &conn)) != 0) + return (ret); + if ((ret = __repmgr_send_sync_msg(env, conn, + REPMGR_JOIN_REQUEST, siteinfo_buf, (u_int32_t)len)) != 0) + goto err; + + if ((ret = read_own_msg(env, + conn, &type, &response_buf, &len)) != 0) + goto err; + + if (type == REPMGR_GM_FAILURE) { + ret = DB_REP_UNAVAIL; + goto err; + } + if (type == REPMGR_GM_FORWARD) { + /* + * The remote site we thought was master is telling us that some + * other site has become master. Retry with the new master. + * However, in order to avoid an endless cycle, only continue + * retrying as long as the master gen is advancing. + */ + ret = __repmgr_close_connection(env, conn); + if ((t_ret = __repmgr_destroy_conn(env, conn)) != 0 && + ret == 0) + ret = t_ret; + conn = NULL; + if (ret != 0) + goto err; + + ret = __repmgr_gm_fwd_unmarshal(env, &fwd, + response_buf, len, &p); + DB_ASSERT(env, ret == 0); + if (fwd.gen > gen) { + if (fwd.host.size > MAXHOSTNAMELEN + 1) { + ret = DB_REP_UNAVAIL; + goto err; + } + host = fwd.host.data; + host[fwd.host.size-1] = '\0'; /* Just to be sure. */ + (void)strcpy(host_buf, host); + addr.host = host_buf; + addr.port = fwd.port; + addrp = &addr; + gen = fwd.gen; + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "will retry join request at forwarded master %s, gen %lu", + __repmgr_format_addr_loc(addrp, addr_buf), + (u_long)gen)); + __os_free(env, response_buf); + response_buf = NULL; + goto retry; + } else { + ret = DB_REP_UNAVAIL; + goto err; + } + } + if (type == REPMGR_JOIN_SUCCESS) + ret = __repmgr_refresh_membership(env, response_buf, len); + else + ret = DB_REP_UNAVAIL; /* Invalid response: protocol violation */ + +err: + if (conn != NULL) { + if ((t_ret = __repmgr_close_connection(env, conn)) != 0 && + ret != 0) + ret = t_ret; + if ((t_ret = __repmgr_destroy_conn(env, conn)) != 0 && + ret != 0) + ret = t_ret; + } + if (response_buf != NULL) + __os_free(env, response_buf); + + return (ret); +} + +/* + * Reads a whole message, when we expect to get a REPMGR_OWN_MSG. + */ +static int +read_own_msg(env, conn, typep, bufp, lenp) + ENV *env; + REPMGR_CONNECTION *conn; + u_int32_t *typep; + u_int8_t **bufp; + size_t *lenp; +{ + __repmgr_msg_hdr_args msg_hdr; + u_int8_t *buf; + u_int32_t type; + size_t size; + int ret; + + __repmgr_reset_for_reading(conn); + if ((ret = __repmgr_read_conn(conn)) != 0) + goto err; + ret = __repmgr_msg_hdr_unmarshal(env, &msg_hdr, + conn->msg_hdr_buf, __REPMGR_MSG_HDR_SIZE, NULL); + DB_ASSERT(env, ret == 0); + + if ((conn->msg_type = msg_hdr.type) != REPMGR_OWN_MSG) { + ret = DB_REP_UNAVAIL; /* Protocol violation. */ + goto err; + } + type = REPMGR_OWN_MSG_TYPE(msg_hdr); + if ((size = (size_t)REPMGR_OWN_BUF_SIZE(msg_hdr)) > 0) { + conn->reading_phase = DATA_PHASE; + __repmgr_iovec_init(&conn->iovecs); + + if ((ret = __os_malloc(env, size, &buf)) != 0) + goto err; + conn->input.rep_message = NULL; + + __repmgr_add_buffer(&conn->iovecs, buf, size); + if ((ret = __repmgr_read_conn(conn)) != 0) { + __os_free(env, buf); + goto err; + } + *bufp = buf; + } + + *typep = type; + *lenp = size; + +err: + return (ret); +} + +static int +make_request_conn(env, addr, connp) + ENV *env; + repmgr_netaddr_t *addr; + REPMGR_CONNECTION **connp; +{ + DBT vi; + __repmgr_msg_hdr_args msg_hdr; + __repmgr_version_confirmation_args conf; + REPMGR_CONNECTION *conn; + int alloc, ret, unused; + + alloc = FALSE; + if ((ret = __repmgr_connect(env, addr, &conn, &unused)) != 0) + return (ret); + conn->type = APP_CONNECTION; + + /* Read a handshake msg, to get version confirmation and parameters. */ + if ((ret = __repmgr_read_conn(conn)) != 0) + goto err; + /* + * We can only get here after having read the full 9 bytes that we + * expect, so this can't fail. + */ + DB_ASSERT(env, conn->reading_phase == SIZES_PHASE); + ret = __repmgr_msg_hdr_unmarshal(env, &msg_hdr, + conn->msg_hdr_buf, __REPMGR_MSG_HDR_SIZE, NULL); + DB_ASSERT(env, ret == 0); + __repmgr_iovec_init(&conn->iovecs); + conn->reading_phase = DATA_PHASE; + + if ((ret = __repmgr_prepare_simple_input(env, conn, &msg_hdr)) != 0) + goto err; + alloc = TRUE; + + if ((ret = __repmgr_read_conn(conn)) != 0) + goto err; + + /* + * Analyze the handshake msg, and stash relevant info. + */ + if ((ret = __repmgr_find_version_info(env, conn, &vi)) != 0) + goto err; + DB_ASSERT(env, vi.size > 0); + if ((ret = __repmgr_version_confirmation_unmarshal(env, + &conf, vi.data, vi.size, NULL)) != 0) + goto err; + + if (conf.version < GM_MIN_VERSION) { + ret = DB_REP_UNAVAIL; + goto err; + } + conn->version = conf.version; + +err: + if (alloc) { + DB_ASSERT(env, conn->input.repmgr_msg.cntrl.size > 0); + __os_free(env, conn->input.repmgr_msg.cntrl.data); + DB_ASSERT(env, conn->input.repmgr_msg.rec.size > 0); + __os_free(env, conn->input.repmgr_msg.rec.data); + } + __repmgr_reset_for_reading(conn); + if (ret == 0) + *connp = conn; + else { + (void)__repmgr_close_connection(env, conn); + (void)__repmgr_destroy_conn(env, conn); + } + return (ret); +} + +/* + * PUBLIC: int __repmgr_site __P((DB_ENV *, + * PUBLIC: const char *, u_int, DB_SITE **, u_int32_t)); + */ +int +__repmgr_site(dbenv, host, port, sitep, flags) + DB_ENV *dbenv; + const char *host; + u_int port; + DB_SITE **sitep; + u_int32_t flags; +{ + int ret; + + if ((ret = __db_fchk(dbenv->env, "repmgr_site", flags, 0)) == 0) + ret = site_by_addr(dbenv->env, host, port, sitep); + + return ret; +} + +static int +site_by_addr(env, host, port, sitep) + ENV *env; + const char *host; + u_int port; + DB_SITE **sitep; +{ + DB_THREAD_INFO *ip; + DB_REP *db_rep; + DB_SITE *dbsite; + REPMGR_SITE *site; + int eid, locked, ret; + + COMPQUIET(ip, NULL); + PANIC_CHECK(env); + db_rep = env->rep_handle; + ENV_NOT_CONFIGURED(env, db_rep->region, "repmgr_site", DB_INIT_REP); + if (APP_IS_BASEAPI(env)) + return (repmgr_only(env, "repmgr_site")); + if ((ret = addr_chk(env, host, port)) != 0) + return (ret); + + if (REP_ON(env)) { + LOCK_MUTEX(db_rep->mutex); + ENV_ENTER(env, ip); + locked = TRUE; + } else + locked = FALSE; + ret = __repmgr_find_site(env, host, port, &eid); + site = SITE_FROM_EID(eid); + /* + * Point to the stable, permanent copy of the host name. That's the one + * we want the DB_SITE handle to point to; just like site_by_eid() does. + */ + host = site->net_addr.host; + if (locked) { + ENV_LEAVE(env, ip); + UNLOCK_MUTEX(db_rep->mutex); + } + if (ret != 0) + return (ret); + + if ((ret = init_dbsite(env, eid, host, port, &dbsite)) != 0) + return (ret); + + /* Manipulating a site makes this a replication manager application. */ + APP_SET_REPMGR(env); + *sitep = dbsite; + return (0); +} + +/* + * PUBLIC: int __repmgr_site_by_eid __P((DB_ENV *, int, DB_SITE **)); + */ +int +__repmgr_site_by_eid(dbenv, eid, sitep) + DB_ENV *dbenv; + int eid; + DB_SITE **sitep; +{ + ENV *env; + DB_REP *db_rep; + REPMGR_SITE *site; + DB_SITE *dbsite; + int ret; + + env = dbenv->env; + PANIC_CHECK(env); + db_rep = env->rep_handle; + + if (eid < 0 || eid >= (int)db_rep->site_cnt) + return (DB_NOTFOUND); + site = SITE_FROM_EID(eid); + + if ((ret = init_dbsite(env, eid, + site->net_addr.host, site->net_addr.port, &dbsite)) != 0) + return (ret); + *sitep = dbsite; + return (0); +} + +static int +init_dbsite(env, eid, host, port, sitep) + ENV *env; + int eid; + const char *host; + u_int port; + DB_SITE **sitep; +{ + DB_SITE *dbsite; + int ret; + + if ((ret = __os_calloc(env, 1, sizeof(DB_SITE), &dbsite)) != 0) + return (ret); + + dbsite->env = env; + dbsite->eid = eid; + dbsite->host = host; + dbsite->port = port; + dbsite->flags = (REP_ON(env) ? 0 : DB_SITE_PREOPEN); + + dbsite->get_address = __repmgr_get_site_address; + dbsite->get_config = __repmgr_get_config; + dbsite->get_eid = __repmgr_get_eid; + dbsite->set_config = __repmgr_site_config; + dbsite->remove = __repmgr_remove_site_pp; + dbsite->close = __repmgr_site_close; + + *sitep = dbsite; + return (0); +} + +/* + * PUBLIC: int __repmgr_get_site_address __P((DB_SITE *, + * PUBLIC: const char **, u_int *)); + */ +int +__repmgr_get_site_address(dbsite, hostp, port) + DB_SITE *dbsite; + const char **hostp; + u_int *port; +{ + if (hostp != NULL) + *hostp = dbsite->host; + if (port != NULL) + *port = dbsite->port; + return (0); +} + +/* + * PUBLIC: int __repmgr_get_eid __P((DB_SITE *, int *)); + */ +int +__repmgr_get_eid(dbsite, eidp) + DB_SITE *dbsite; + int *eidp; +{ + int ret; + + if ((ret = refresh_site(dbsite)) != 0) + return (ret); + + if (F_ISSET(dbsite, DB_SITE_PREOPEN)) { + __db_errx(dbsite->env, DB_STR("3662", + "Can't determine EID before env open")); + return (EINVAL); + } + *eidp = dbsite->eid; + return (0); +} + +/* + * PUBLIC: int __repmgr_get_config __P((DB_SITE *, u_int32_t, u_int32_t *)); + */ +int +__repmgr_get_config(dbsite, which, valuep) + DB_SITE *dbsite; + u_int32_t which; + u_int32_t *valuep; +{ + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + REGINFO *infop; + REP *rep; + REPMGR_SITE *site; + SITEINFO *sites; + int ret; + + env = dbsite->env; + db_rep = env->rep_handle; + + if ((ret = refresh_site(dbsite)) != 0) + return (ret); + LOCK_MUTEX(db_rep->mutex); + site = SITE_FROM_EID(dbsite->eid); + if (REP_ON(env)) { + rep = db_rep->region; + infop = env->reginfo; + + ENV_ENTER(env, ip); + MUTEX_LOCK(env, rep->mtx_repmgr); + sites = R_ADDR(infop, rep->siteinfo_off); + + site->config = sites[dbsite->eid].config; + + MUTEX_UNLOCK(env, rep->mtx_repmgr); + ENV_LEAVE(env, ip); + } + *valuep = FLD_ISSET(site->config, which) ? 1 : 0; + UNLOCK_MUTEX(db_rep->mutex); + return (0); +} + +/* + * PUBLIC: int __repmgr_site_config __P((DB_SITE *, u_int32_t, u_int32_t)); + */ +int +__repmgr_site_config(dbsite, which, value) + DB_SITE *dbsite; + u_int32_t which; + u_int32_t value; +{ + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + REGINFO *infop; + REP *rep; + REPMGR_SITE *site; + SITEINFO *sites; + int ret; + + env = dbsite->env; + db_rep = env->rep_handle; + + if ((ret = refresh_site(dbsite)) != 0) + return (ret); + switch (which) { + case DB_BOOTSTRAP_HELPER: + case DB_REPMGR_PEER: + if (dbsite->eid == db_rep->self_eid) { + __db_errx(env, DB_STR("3663", + "Site config value not applicable to local site")); + return (EINVAL); + } + break; + case DB_GROUP_CREATOR: + if (IS_VALID_EID(db_rep->self_eid) && + dbsite->eid != db_rep->self_eid) { + __db_errx(env, DB_STR("3664", + "Site config value not applicable to remote site")); + return (EINVAL); + } + break; + case DB_LEGACY: + /* Applicable to either local or remote site. */ + break; + case DB_LOCAL_SITE: + /* + * This special case needs extra processing, to set the + * "self_eid" index in addition to the flag bit. + */ + if ((ret = set_local_site(dbsite, value)) != 0) + return (ret); + break; + default: + __db_errx(env, + DB_STR("3665", "Unrecognized site config value")); + return (EINVAL); + } + + if (REP_ON(env)) { + rep = db_rep->region; + infop = env->reginfo; + + LOCK_MUTEX(db_rep->mutex); + ENV_ENTER(env, ip); + MUTEX_LOCK(env, rep->mtx_repmgr); + sites = R_ADDR(infop, rep->siteinfo_off); + site = SITE_FROM_EID(dbsite->eid); + + /* + * Make sure we're up to date with shared memory version. After + * env open, we never set private without also updating shared. + * But another process could have set the shared one, so shared + * is always "best." + */ + site->config = sites[dbsite->eid].config; + if (value) + FLD_SET(site->config, which); + else + FLD_CLR(site->config, which); + if (site->config != sites[dbsite->eid].config) { + sites[dbsite->eid].config = site->config; + rep->siteinfo_seq++; + } + MUTEX_UNLOCK(env, rep->mtx_repmgr); + ENV_LEAVE(env, ip); + UNLOCK_MUTEX(db_rep->mutex); + } else { + site = SITE_FROM_EID(dbsite->eid); + if (value) + FLD_SET(site->config, which); + else + FLD_CLR(site->config, which); + } + return (0); +} + +static int +set_local_site(dbsite, value) + DB_SITE *dbsite; + u_int32_t value; +{ + DB_REP *db_rep; + DB_THREAD_INFO *ip; + ENV *env; + REP *rep; + REPMGR_SITE *site; + int locked, ret; + + COMPQUIET(rep, NULL); + COMPQUIET(ip, NULL); + env = dbsite->env; + db_rep = env->rep_handle; + + locked = FALSE; + if (REP_ON(env)) { + rep = db_rep->region; + + LOCK_MUTEX(db_rep->mutex); + ENV_ENTER(env, ip); + MUTEX_LOCK(env, rep->mtx_repmgr); + locked = TRUE; + /* Make sure we're in sync first. */ + if (IS_VALID_EID(rep->self_eid)) + db_rep->self_eid = rep->self_eid; + } + ret = 0; + if (!value && db_rep->self_eid == dbsite->eid) { + __db_errx(env, DB_STR("3666", + "A previously given local site may not be unset")); + ret = EINVAL; + } else if (IS_VALID_EID(db_rep->self_eid) && + db_rep->self_eid != dbsite->eid) { + __db_errx(env, DB_STR("3667", + "A (different) local site has already been set")); + ret = EINVAL; + } else { + site = SITE_FROM_EID(dbsite->eid); + if (FLD_ISSET(site->config, + DB_BOOTSTRAP_HELPER | DB_REPMGR_PEER)) { + __db_errx(env, DB_STR("3668", + "Local site cannot have HELPER or PEER attributes")); + ret = EINVAL; + } + } + if (ret == 0) { + db_rep->self_eid = dbsite->eid; + if (locked) { + rep->self_eid = dbsite->eid; + rep->siteinfo_seq++; + } + } + if (locked) { + MUTEX_UNLOCK(env, rep->mtx_repmgr); + ENV_LEAVE(env, ip); + UNLOCK_MUTEX(db_rep->mutex); + } + return (ret); +} + +/* + * Brings the dbsite's EID up to date, in case it got shuffled around across an + * env open. + */ +static int +refresh_site(dbsite) + DB_SITE *dbsite; +{ + DB_REP *db_rep; + ENV *env; + REPMGR_SITE *site; + + env = dbsite->env; + PANIC_CHECK(env); + if (F_ISSET(dbsite, DB_SITE_PREOPEN) && REP_ON(env)) { + db_rep = env->rep_handle; + LOCK_MUTEX(db_rep->mutex); + site = __repmgr_lookup_site(env, dbsite->host, dbsite->port); + DB_ASSERT(env, site != NULL); + dbsite->eid = EID_FROM_SITE(site); + F_CLR(dbsite, DB_SITE_PREOPEN); + UNLOCK_MUTEX(db_rep->mutex); + } + return (0); +} + +static int +__repmgr_remove_site_pp(dbsite) + DB_SITE *dbsite; +{ + int ret, t_ret; + + ret = __repmgr_remove_site(dbsite); + /* + * The remove() method is documented as a destructor, which means that + * absolutely all calls must deallocate the handle, including error + * cases, even mutex failures. + */ + if ((t_ret = __repmgr_site_close(dbsite)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +static int +__repmgr_remove_site(dbsite) + DB_SITE *dbsite; +{ + ENV *env; + DB_REP *db_rep; + REP *rep; + REPMGR_CONNECTION *conn; + repmgr_netaddr_t addr; + __repmgr_site_info_args site_info; + u_int8_t *response_buf, siteinfo_buf[MAX_MSG_BUF]; + size_t len; + u_int32_t type; + int master, ret, t_ret; + + if ((ret = refresh_site(dbsite)) != 0) + return (ret); + env = dbsite->env; + db_rep = env->rep_handle; + rep = db_rep->region; + + if (db_rep->finished || !SELECTOR_RUNNING(db_rep)) { + __db_errx(env, DB_STR("3669", + "repmgr threads are not running")); + return (EINVAL); + } + + if (!IS_VALID_EID((master = rep->master_id))) + return (DB_REP_UNAVAIL); + LOCK_MUTEX(db_rep->mutex); + addr = SITE_FROM_EID(master)->net_addr; + UNLOCK_MUTEX(db_rep->mutex); + + len = strlen(dbsite->host) + 1; + DB_INIT_DBT(site_info.host, dbsite->host, len); + site_info.port = dbsite->port; + site_info.flags = 0; + ret = __repmgr_site_info_marshal(env, + &site_info, siteinfo_buf, sizeof(siteinfo_buf), &len); + DB_ASSERT(env, ret == 0); + + conn = NULL; + response_buf = NULL; + if ((ret = make_request_conn(env, &addr, &conn)) != 0) + return (ret); + if ((ret = __repmgr_send_sync_msg(env, conn, + REPMGR_REMOVE_REQUEST, siteinfo_buf, (u_int32_t)len)) != 0) + goto err; + if ((ret = read_own_msg(env, + conn, &type, &response_buf, &len)) != 0) + goto err; + ret = type == REPMGR_REMOVE_SUCCESS ? 0 : DB_REP_UNAVAIL; +err: + if (conn != NULL) { + if ((t_ret = __repmgr_close_connection(env, conn)) != 0 && + ret != 0) + ret = t_ret; + if ((t_ret = __repmgr_destroy_conn(env, conn)) != 0 && + ret != 0) + ret = t_ret; + } + if (response_buf != NULL) + __os_free(env, response_buf); + return (ret); +} + +/* + * PUBLIC: int __repmgr_site_close __P((DB_SITE *)); + */ +int +__repmgr_site_close(dbsite) + DB_SITE *dbsite; +{ + __os_free(dbsite->env, dbsite); + return (0); +} diff --git a/src/repmgr/repmgr_msg.c b/src/repmgr/repmgr_msg.c new file mode 100644 index 00000000..7e8d3331 --- /dev/null +++ b/src/repmgr/repmgr_msg.c @@ -0,0 +1,1638 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/txn.h" +#include "dbinc_auto/repmgr_auto.h" + +static int dispatch_app_message __P((ENV *, REPMGR_MESSAGE *)); +static int finish_gmdb_update __P((ENV *, + DB_THREAD_INFO *, DBT *, u_int32_t, u_int32_t, __repmgr_member_args *)); +static int incr_gm_version __P((ENV *, DB_THREAD_INFO *, DB_TXN *)); +static void marshal_site_data __P((ENV *, u_int32_t, u_int8_t *, DBT *)); +static void marshal_site_key __P((ENV *, + repmgr_netaddr_t *, u_int8_t *, DBT *, __repmgr_member_args *)); +static int message_loop __P((ENV *, REPMGR_RUNNABLE *)); +static int process_message __P((ENV*, DBT*, DBT*, int)); +static int reject_fwd __P((ENV *, REPMGR_CONNECTION *)); +static int rescind_pending __P((ENV *, + DB_THREAD_INFO *, int, u_int32_t, u_int32_t)); +static int resolve_limbo_int __P((ENV *, DB_THREAD_INFO *)); +static int resolve_limbo_wrapper __P((ENV *, DB_THREAD_INFO *)); +static int send_permlsn __P((ENV *, u_int32_t, DB_LSN *)); +static int send_permlsn_conn __P((ENV *, + REPMGR_CONNECTION *, u_int32_t, DB_LSN *)); +static int serve_join_request __P((ENV *, + DB_THREAD_INFO *, REPMGR_MESSAGE *)); +static int serve_remove_request __P((ENV *, + DB_THREAD_INFO *, REPMGR_MESSAGE *)); +static int serve_repmgr_request __P((ENV *, REPMGR_MESSAGE *)); + +/* + * Map one of the phase-1/provisional membership status values to its + * corresponding ultimate goal status: if "adding", the goal is to be fully + * "present". Otherwise ("deleting") the goal is to not even appear in the + * database at all (0). + */ +#define NEXT_STATUS(s) ((s) == SITE_ADDING ? SITE_PRESENT : 0) + +/* + * PUBLIC: void *__repmgr_msg_thread __P((void *)); + */ +void * +__repmgr_msg_thread(argsp) + void *argsp; +{ + REPMGR_RUNNABLE *th; + ENV *env; + int ret; + + th = argsp; + env = th->env; + + if ((ret = message_loop(env, th)) != 0) { + __db_err(env, ret, "message thread failed"); + (void)__repmgr_thread_failure(env, ret); + } + return (NULL); +} + +static int +message_loop(env, th) + ENV *env; + REPMGR_RUNNABLE *th; +{ + DB_REP *db_rep; + REP *rep; + REPMGR_MESSAGE *msg; + REPMGR_CONNECTION *conn; + REPMGR_SITE *site; + __repmgr_permlsn_args permlsn; + int incremented, ret, t_ret; + u_int32_t membership; + + COMPQUIET(membership, 0); + db_rep = env->rep_handle; + rep = db_rep->region; + LOCK_MUTEX(db_rep->mutex); + while ((ret = __repmgr_queue_get(env, &msg, th)) == 0) { + incremented = FALSE; + if (IS_DEFERRABLE(msg->msg_hdr.type)) { + /* + * Count threads currently processing channel requests + * or GMDB operations, so that we can limit the number + * of them, in order to avoid starving more important + * rep messages. + */ + db_rep->non_rep_th++; + incremented = TRUE; + } + if (msg->msg_hdr.type == REPMGR_REP_MESSAGE) { + site = SITE_FROM_EID(msg->v.repmsg.originating_eid); + membership = site->membership; + } + UNLOCK_MUTEX(db_rep->mutex); + + switch (msg->msg_hdr.type) { + case REPMGR_REP_MESSAGE: + if (membership != SITE_PRESENT) + break; + while ((ret = process_message(env, + &msg->v.repmsg.control, &msg->v.repmsg.rec, + msg->v.repmsg.originating_eid)) == DB_LOCK_DEADLOCK) + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "repmgr deadlock retry")); + break; + case REPMGR_APP_MESSAGE: + ret = dispatch_app_message(env, msg); + conn = msg->v.appmsg.conn; + if (conn != NULL) { + LOCK_MUTEX(db_rep->mutex); + t_ret = __repmgr_decr_conn_ref(env, conn); + UNLOCK_MUTEX(db_rep->mutex); + if (t_ret != 0 && ret == 0) + ret = t_ret; + } + break; + case REPMGR_OWN_MSG: + ret = serve_repmgr_request(env, msg); + break; + case REPMGR_HEARTBEAT: + if ((ret = __repmgr_permlsn_unmarshal(env, + &permlsn, msg->v.repmsg.control.data, + msg->v.repmsg.control.size, NULL)) != 0) + ret = DB_REP_UNAVAIL; + else if (rep->master_id == db_rep->self_eid) { + /* + * If a master receives a heartbeat, there + * may be a dupmaster. Resend latest log + * message to prompt base replication to + * detect it without the need for application + * activity. + */ + ret = __rep_flush(env->dbenv); + } else { + /* + * Use heartbeat message to initiate rerequest + * processing. + */ + ret = __rep_check_missing(env, + permlsn.generation, &permlsn.lsn); + } + break; + default: + ret = __db_unknown_path(env, "message loop"); + break; + } + + __os_free(env, msg); + LOCK_MUTEX(db_rep->mutex); + if (incremented) + db_rep->non_rep_th--; + if (ret != 0) + goto out; + } + /* + * A return of DB_REP_UNAVAIL from __repmgr_queue_get() merely means we + * should finish gracefully. + */ + if (ret == DB_REP_UNAVAIL) + ret = 0; +out: + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} + +static int +dispatch_app_message(env, msg) + ENV *env; + REPMGR_MESSAGE *msg; +{ + DB_REP *db_rep; + DB_CHANNEL db_channel; + CHANNEL channel; + __repmgr_msg_metadata_args meta; + DBT *dbt, *segment; + u_int32_t flags, i, size, *uiptr; + u_int8_t *data; + void *ptr; + int ret; + + COMPQUIET(size, 0); + + db_rep = env->rep_handle; + + db_channel.channel = &channel; + db_channel.send_msg = __repmgr_send_response; + + /* Supply stub functions for methods inapplicable in msg disp func. */ + db_channel.close = __repmgr_channel_close_inval; + db_channel.send_request = __repmgr_send_request_inval; + db_channel.set_timeout = __repmgr_channel_timeout_inval; + + channel.msg = msg; + channel.env = env; + channel.c.conn = msg->v.appmsg.conn; + channel.responded = FALSE; + channel.meta = &meta; + + /* + * The user data is in a form similar to that of a bulk buffer. + * However, there's also our meta-data tacked on to the end of it. + * Fortunately, the meta-data is fixed length, so it's easy to peel it + * off. + * + * The user data "bulk buffer" lacks the usual "-1" end-marker. But + * that's OK, because we already know how many segments there are (from + * the message header). Convert this information into the DBT array + * that we will pass to the user's function. + * + * (See the definition of DB_MULTIPLE_INIT for a reminder of the format + * of a bulk buffer.) + */ + dbt = &msg->v.appmsg.buf; + data = dbt->data; + dbt->size -= __REPMGR_MSG_METADATA_SIZE; + ret = __repmgr_msg_metadata_unmarshal(env, + &meta, &data[dbt->size], __REPMGR_MSG_METADATA_SIZE, NULL); + DB_ASSERT(env, ret == 0); + + dbt->ulen = dbt->size; + DB_MULTIPLE_INIT(ptr, dbt); + for (i = 0; i < APP_MSG_SEGMENT_COUNT(msg->msg_hdr); i++) { + segment = &msg->v.appmsg.segments[i]; + uiptr = ptr; + *uiptr = ntohl(*uiptr); + uiptr[-1] = ntohl(uiptr[-1]); + DB_MULTIPLE_NEXT(ptr, dbt, data, size); + DB_ASSERT(env, data != NULL); + DB_INIT_DBT(*segment, data, size); + } + + flags = F_ISSET(&meta, REPMGR_REQUEST_MSG_TYPE) ? + DB_REPMGR_NEED_RESPONSE : 0; + + if (db_rep->msg_dispatch == NULL) { + __db_errx(env, DB_STR("3670", + "No message dispatch call-back function has been configured")); + if (F_ISSET(channel.meta, REPMGR_REQUEST_MSG_TYPE)) + return (__repmgr_send_err_resp(env, + &channel, DB_NOSERVER)); + else + return (0); + } + + (*db_rep->msg_dispatch)(env->dbenv, + &db_channel, &msg->v.appmsg.segments[0], + APP_MSG_SEGMENT_COUNT(msg->msg_hdr), flags); + + if (F_ISSET(channel.meta, REPMGR_REQUEST_MSG_TYPE) && + !channel.responded) { + __db_errx(env, DB_STR("3671", + "Application failed to provide a response")); + return (__repmgr_send_err_resp(env, &channel, DB_KEYEMPTY)); + } + + return (0); +} + +/* + * PUBLIC: int __repmgr_send_err_resp __P((ENV *, CHANNEL *, int)); + */ +int +__repmgr_send_err_resp(env, channel, err) + ENV *env; + CHANNEL *channel; + int err; +{ + DB_REP *db_rep; + REPMGR_CONNECTION *conn; + REPMGR_IOVECS iovecs; + __repmgr_msg_hdr_args msg_hdr; + u_int8_t msg_hdr_buf[__REPMGR_MSG_HDR_SIZE]; + int ret; + + db_rep = env->rep_handle; + msg_hdr.type = REPMGR_RESP_ERROR; + + /* Make it non-negative, so we can send on wire without worry. */ + DB_ASSERT(env, err < 0); + RESP_ERROR_CODE(msg_hdr) = (u_int32_t)(-err); + + RESP_ERROR_TAG(msg_hdr) = channel->meta->tag; + + __repmgr_iovec_init(&iovecs); + __repmgr_msg_hdr_marshal(env, &msg_hdr, msg_hdr_buf); + __repmgr_add_buffer(&iovecs, msg_hdr_buf, __REPMGR_MSG_HDR_SIZE); + + conn = channel->c.conn; + LOCK_MUTEX(db_rep->mutex); + ret = __repmgr_send_many(env, conn, &iovecs, 0); + UNLOCK_MUTEX(db_rep->mutex); + + return (ret); +} + +static int +process_message(env, control, rec, eid) + ENV *env; + DBT *control, *rec; + int eid; +{ + DB_LSN lsn; + DB_REP *db_rep; + REP *rep; + int dirty, ret, t_ret; + u_int32_t generation; + + db_rep = env->rep_handle; + rep = db_rep->region; + + /* + * Save initial generation number, in case it changes in a close race + * with a NEWMASTER. + */ + generation = rep->gen; + + ret = 0; + switch (t_ret = + __rep_process_message_int(env, control, rec, eid, &lsn)) { + case 0: + if (db_rep->takeover_pending) + ret = __repmgr_claim_victory(env); + break; + + case DB_REP_HOLDELECTION: + LOCK_MUTEX(db_rep->mutex); + ret = __repmgr_init_election(env, + ELECT_F_IMMED | ELECT_F_INVITEE); + UNLOCK_MUTEX(db_rep->mutex); + break; + + case DB_REP_DUPMASTER: + /* + * Initiate an election if we're configured to be using + * elections, but only if we're *NOT* using leases. When using + * leases, there is never any uncertainty over which site is the + * rightful master, and only the loser gets the DUPMASTER return + * code. + */ + if ((ret = __repmgr_become_client(env)) == 0 && + FLD_ISSET(rep->config, REP_C_LEASE | REP_C_ELECTIONS) + == REP_C_ELECTIONS) { + LOCK_MUTEX(db_rep->mutex); + ret = __repmgr_init_election(env, ELECT_F_IMMED); + UNLOCK_MUTEX(db_rep->mutex); + } + DB_EVENT(env, DB_EVENT_REP_DUPMASTER, NULL); + break; + + case DB_REP_ISPERM: +#ifdef CONFIG_TEST + if (env->test_abort == DB_TEST_REPMGR_PERM) + VPRINT(env, (env, DB_VERB_REPMGR_MISC, + "ISPERM: Test hook. Skip ACK for permlsn [%lu][%lu]", + (u_long)lsn.file, (u_long)lsn.offset)); +#endif + DB_TEST_SET(env->test_abort, DB_TEST_REPMGR_PERM); + ret = send_permlsn(env, generation, &lsn); +DB_TEST_RECOVERY_LABEL + break; + + case DB_LOCK_DEADLOCK: + case DB_REP_IGNORE: + case DB_REP_NEWSITE: + case DB_REP_NOTPERM: + break; + + case DB_REP_JOIN_FAILURE: + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "repmgr fires join failure event")); + DB_EVENT(env, DB_EVENT_REP_JOIN_FAILURE, NULL); + break; + + case DB_REP_WOULDROLLBACK: + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "repmgr fires would-rollback event")); + DB_EVENT(env, DB_EVENT_REP_WOULD_ROLLBACK, &lsn); + break; + + default: + __db_err(env, t_ret, "DB_ENV->rep_process_message"); + ret = t_ret; + } + + if (ret != 0) + goto err; + LOCK_MUTEX(db_rep->mutex); + dirty = db_rep->gmdb_dirty; + db_rep->gmdb_dirty = FALSE; + UNLOCK_MUTEX(db_rep->mutex); + if (dirty) { + if ((ret = __op_rep_enter(env, FALSE, FALSE)) != 0) + goto err; + ret = __repmgr_reload_gmdb(env); + t_ret = __op_rep_exit(env); + if (ret == ENOENT) + ret = 0; + else if (ret == DB_DELETED) + ret = __repmgr_bow_out(env); + if (t_ret != 0 && ret == 0) + ret = t_ret; + } +err: + return (ret); +} + +/* + * Handle replication-related events. Returns only 0 or DB_EVENT_NOT_HANDLED; + * no other error returns are tolerated. + * + * PUBLIC: int __repmgr_handle_event __P((ENV *, u_int32_t, void *)); + */ +int +__repmgr_handle_event(env, event, info) + ENV *env; + u_int32_t event; + void *info; +{ + DB_REP *db_rep; + + db_rep = env->rep_handle; + + if (db_rep->selector == NULL) { + /* Repmgr is not in use, so all events go to application. */ + return (DB_EVENT_NOT_HANDLED); + } + + switch (event) { + case DB_EVENT_REP_ELECTED: + DB_ASSERT(env, info == NULL); + db_rep->takeover_pending = TRUE; + + /* + * The application doesn't really need to see this, because the + * purpose of this event is to tell the winning site that it + * should call rep_start(MASTER), and in repmgr we do that + * automatically. Still, they could conceivably be curious, and + * it doesn't hurt anything to let them know. + */ + break; + case DB_EVENT_REP_INIT_DONE: + db_rep->gmdb_dirty = TRUE; + break; + case DB_EVENT_REP_NEWMASTER: + DB_ASSERT(env, info != NULL); + + /* Application still needs to see this. */ + break; + default: + break; + } + return (DB_EVENT_NOT_HANDLED); +} + +static int +send_permlsn(env, generation, lsn) + ENV *env; + u_int32_t generation; + DB_LSN *lsn; +{ + DB_REP *db_rep; + REP *rep; + REPMGR_CONNECTION *conn; + REPMGR_SITE *site; + int ack, bcast, master, policy, ret; + u_int eid; + + db_rep = env->rep_handle; + rep = db_rep->region; + ret = 0; + master = rep->master_id; + LOCK_MUTEX(db_rep->mutex); + + /* + * If the file number has changed, send it to everyone, regardless of + * anything else. Otherwise, send it to the master if we know a master, + * and that master's ack policy requires it. + */ + bcast = FALSE; + if (LOG_COMPARE(lsn, &db_rep->perm_lsn) > 0) { + if (lsn->file > db_rep->perm_lsn.file) { + bcast = TRUE; + VPRINT(env, (env, DB_VERB_REPMGR_MISC, + "send_permlsn: broadcast [%lu][%lu]", + (u_long)lsn->file, (u_long)lsn->offset)); + } + db_rep->perm_lsn = *lsn; + } + if (IS_KNOWN_REMOTE_SITE(master)) { + site = SITE_FROM_EID(master); + /* + * Use master's ack policy if we know it; use our own if the + * master is too old (down-rev) to have told us its policy. + */ + policy = site->ack_policy > 0 ? + site->ack_policy : rep->perm_policy; + if (policy == DB_REPMGR_ACKS_NONE || + (IS_PEER_POLICY(policy) && rep->priority == 0)) + ack = FALSE; + else + ack = TRUE; + } else { + site = NULL; + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "dropping ack with no known master")); + ack = FALSE; + } + + /* + * Send to master first, since we need to send to all its connections. + */ + if (site != NULL && (bcast || ack)) { + if (IS_SITE_AVAILABLE(site) && + (ret = send_permlsn_conn(env, + site->ref.conn, generation, lsn)) != 0) + goto unlock; + TAILQ_FOREACH(conn, &site->sub_conns, entries) { + if ((ret = send_permlsn_conn(env, + conn, generation, lsn)) != 0) + goto unlock; + } + } + if (bcast) { + /* + * Send to everyone except the master (since we've already done + * that, above). + */ + FOR_EACH_REMOTE_SITE_INDEX(eid) { + if ((int)eid == master) + continue; + site = SITE_FROM_EID(eid); + /* + * Send the ack out on primary connection only. + */ + if (site->state == SITE_CONNECTED && + (ret = send_permlsn_conn(env, + site->ref.conn, generation, lsn)) != 0) + goto unlock; + } + } + +unlock: + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} + +/* + * Sends a perm LSN message on one connection, if it needs it. + * + * !!! Called with mutex held. + */ +static int +send_permlsn_conn(env, conn, generation, lsn) + ENV *env; + REPMGR_CONNECTION *conn; + u_int32_t generation; + DB_LSN *lsn; +{ + DBT control2, rec2; + __repmgr_permlsn_args permlsn; + u_int8_t buf[__REPMGR_PERMLSN_SIZE]; + int ret; + + ret = 0; + + if (conn->state == CONN_READY) { + DB_ASSERT(env, conn->version > 0); + permlsn.generation = generation; + memcpy(&permlsn.lsn, lsn, sizeof(DB_LSN)); + if (conn->version == 1) { + control2.data = &permlsn; + control2.size = sizeof(permlsn); + } else { + __repmgr_permlsn_marshal(env, &permlsn, buf); + control2.data = buf; + control2.size = __REPMGR_PERMLSN_SIZE; + } + rec2.size = 0; + /* + * It's hard to imagine anyone would care about a lost ack if + * the path to the master is so congested as to need blocking; + * so pass "maxblock" argument as 0. + */ + if ((ret = __repmgr_send_one(env, conn, REPMGR_PERMLSN, + &control2, &rec2, 0)) == DB_REP_UNAVAIL) + ret = __repmgr_bust_connection(env, conn); + } + return (ret); +} + +static int +serve_repmgr_request(env, msg) + ENV *env; + REPMGR_MESSAGE *msg; +{ + DB_THREAD_INFO *ip; + DBT *dbt; + REPMGR_CONNECTION *conn; + int ret, t_ret; + + ENV_ENTER(env, ip); + switch (REPMGR_OWN_MSG_TYPE(msg->msg_hdr)) { + case REPMGR_JOIN_REQUEST: + ret = serve_join_request(env, ip, msg); + break; + case REPMGR_REJOIN: + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "One try at rejoining group automatically")); + if ((ret = __repmgr_join_group(env)) == DB_REP_UNAVAIL) + ret = __repmgr_bow_out(env); + break; + case REPMGR_REMOVE_REQUEST: + ret = serve_remove_request(env, ip, msg); + break; + case REPMGR_RESOLVE_LIMBO: + ret = resolve_limbo_wrapper(env, ip); + break; + case REPMGR_SHARING: + dbt = &msg->v.gmdb_msg.request; + ret = __repmgr_refresh_membership(env, dbt->data, dbt->size); + break; + default: + ret = __db_unknown_path(env, "serve_repmgr_request"); + break; + } + if ((conn = msg->v.gmdb_msg.conn) != NULL) { + if ((t_ret = __repmgr_close_connection(env, conn)) != 0 && + ret == 0) + ret = t_ret; + if ((t_ret = __repmgr_decr_conn_ref(env, conn)) != 0 && + ret == 0) + ret = t_ret; + } + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * Attempts to fulfill a remote site's request to join the replication group. + * Only the master can grant this request, so if we've received this request + * when we're not the master, we'll send an appropriate failure message instead. + */ +static int +serve_join_request(env, ip, msg) + ENV *env; + DB_THREAD_INFO *ip; + REPMGR_MESSAGE *msg; +{ + DB_REP *db_rep; + REPMGR_CONNECTION *conn; + DBT *dbt; + __repmgr_site_info_args site_info; + u_int8_t *buf; + char *host; + size_t len; + u_int32_t status; + int eid, ret, t_ret; + + db_rep = env->rep_handle; + COMPQUIET(status, 0); + + conn = msg->v.gmdb_msg.conn; + dbt = &msg->v.gmdb_msg.request; + ret = __repmgr_site_info_unmarshal(env, + &site_info, dbt->data, dbt->size, NULL); + + host = site_info.host.data; + host[site_info.host.size - 1] = '\0'; + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Request to join group from %s:%u", host, (u_int)site_info.port)); + + if ((ret = __repmgr_hold_master_role(env, conn)) == DB_REP_UNAVAIL) + return (0); + if (ret != 0) + return (ret); + + LOCK_MUTEX(db_rep->mutex); + if ((ret = __repmgr_find_site(env, host, site_info.port, &eid)) == 0) { + DB_ASSERT(env, eid != db_rep->self_eid); + status = SITE_FROM_EID(eid)->membership; + } + UNLOCK_MUTEX(db_rep->mutex); + if (ret != 0) + goto err; + + switch (status) { + case 0: + case SITE_ADDING: + ret = __repmgr_update_membership(env, ip, eid, SITE_ADDING); + break; + case SITE_PRESENT: + /* Already in desired state. */ + break; + case SITE_DELETING: + ret = rescind_pending(env, + ip, eid, SITE_DELETING, SITE_PRESENT); + break; + default: + ret = __db_unknown_path(env, "serve_join_request"); + break; + } + if (ret != 0) + goto err; + + LOCK_MUTEX(db_rep->mutex); + ret = __repmgr_marshal_member_list(env, &buf, &len); + UNLOCK_MUTEX(db_rep->mutex); + if (ret != 0) + goto err; + ret = __repmgr_send_sync_msg(env, conn, REPMGR_JOIN_SUCCESS, + buf, (u_int32_t)len); + __os_free(env, buf); + +err: + + if ((t_ret = __repmgr_rlse_master_role(env)) != 0 && ret == 0) + ret = t_ret; + + if (ret == DB_REP_UNAVAIL) + ret = __repmgr_send_sync_msg(env, conn, + REPMGR_GM_FAILURE, NULL, 0); + + return (ret); +} + +static int +serve_remove_request(env, ip, msg) + ENV *env; + DB_THREAD_INFO *ip; + REPMGR_MESSAGE *msg; +{ + DB_REP *db_rep; + REPMGR_CONNECTION *conn; + REPMGR_SITE *site; + DBT *dbt; + __repmgr_site_info_args site_info; + char *host; + u_int32_t status, type; + int eid, ret, t_ret; + + COMPQUIET(status, 0); + db_rep = env->rep_handle; + + conn = msg->v.gmdb_msg.conn; + dbt = &msg->v.gmdb_msg.request; + ret = __repmgr_site_info_unmarshal(env, + &site_info, dbt->data, dbt->size, NULL); + + host = site_info.host.data; + host[site_info.host.size - 1] = '\0'; + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Request to remove %s:%u from group", host, (u_int)site_info.port)); + + if ((ret = __repmgr_hold_master_role(env, conn)) == DB_REP_UNAVAIL) + return (0); + if (ret != 0) + return (ret); + + LOCK_MUTEX(db_rep->mutex); + if ((site = __repmgr_lookup_site(env, host, site_info.port)) == NULL) + eid = DB_EID_INVALID; + else { + eid = EID_FROM_SITE(site); + status = site->membership; + } + UNLOCK_MUTEX(db_rep->mutex); + if (eid == DB_EID_INVALID) { + /* Doesn't exist: already been removed. */ + ret = 0; + goto err; + } else if (eid == db_rep->self_eid) { + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Reject request to remove current master")); + ret = DB_REP_UNAVAIL; + goto err; + } + + switch (status) { + case 0: + /* Already in desired state. */ + break; + case SITE_ADDING: + ret = rescind_pending(env, ip, eid, SITE_ADDING, 0); + break; + case SITE_PRESENT: + case SITE_DELETING: + ret = __repmgr_update_membership(env, ip, eid, SITE_DELETING); + break; + default: + ret = __db_unknown_path(env, "serve_remove_request"); + break; + } +err: + if ((t_ret = __repmgr_rlse_master_role(env)) != 0 && ret == 0) + ret = t_ret; + switch (ret) { + case 0: + type = REPMGR_REMOVE_SUCCESS; + break; + case DB_REP_UNAVAIL: + type = REPMGR_GM_FAILURE; + break; + default: + return (ret); + } + return (__repmgr_send_sync_msg(env, conn, type, NULL, 0)); +} + +/* + * Runs a limbo resolution on a message processing thread, upon request from the + * send() function when it notices that a user transaction has gotten a perm + * success. (It wouldn't work for the user thread to do it in-line.) + */ +static int +resolve_limbo_wrapper(env, ip) + ENV *env; + DB_THREAD_INFO *ip; +{ + int do_close, ret, t_ret; + + if ((ret = __repmgr_hold_master_role(env, NULL)) == DB_REP_UNAVAIL) + return (0); + if (ret != 0) + return (ret); +retry: + if ((ret = __repmgr_setup_gmdb_op(env, ip, NULL, 0)) != 0) + goto rlse; + + /* + * A limbo resolution request is merely a "best effort" attempt to + * shorten the duration of a pending change. So if it fails for lack of + * acks again, no one really cares. + */ + if ((ret = resolve_limbo_int(env, ip)) == DB_REP_UNAVAIL) { + do_close = FALSE; + ret = 0; + } else + do_close = TRUE; + + if ((t_ret = __repmgr_cleanup_gmdb_op(env, do_close)) != 0 && + ret == 0) + ret = t_ret; + if (ret == DB_LOCK_DEADLOCK || ret == DB_LOCK_NOTGRANTED) + goto retry; +rlse: + if ((t_ret = __repmgr_rlse_master_role(env)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * Checks for the need to resolve limbo (failure of a previous GMDB update to + * get enough acks), and does it if nec. No-op if none needed. + * + * Must be called within setup_gmdb_op/cleanup_gmdb_op context. + */ +static int +resolve_limbo_int(env, ip) + ENV *env; + DB_THREAD_INFO *ip; +{ + DB_REP *db_rep; + DB_TXN *txn; + REPMGR_SITE *site; + DB_LSN orig_lsn; + DBT key_dbt, data_dbt; + __repmgr_member_args logrec; + repmgr_netaddr_t addr; + u_int32_t orig_status, status; + int eid, locked, ret, t_ret; + u_int8_t data_buf[__REPMGR_MEMBERSHIP_DATA_SIZE]; + u_int8_t key_buf[MAX_MSG_BUF]; + + db_rep = env->rep_handle; + ret = 0; + + LOCK_MUTEX(db_rep->mutex); + locked = TRUE; + + /* + * Is there a previous GMDB update failure currently pending? If not, + * there's nothing for us to do. + */ + eid = db_rep->limbo_victim; + if (eid == DB_EID_INVALID) + goto out; + site = SITE_FROM_EID(eid); + addr = site->net_addr; + marshal_site_key(env, &addr, key_buf, &key_dbt, &logrec); + orig_status = site->membership; + if (orig_status == SITE_PRESENT || orig_status == 0) + goto out; + + if (IS_ZERO_LSN(db_rep->limbo_failure)) + goto out; + + /* + * There are potentially two parts: the self-update of the existing + * limbo record, and then the finishing-off if the first is successful. + * We might only have to do the finishing-off, if some arbitrary random + * txn triggered a limbo resolution request on a msg processing thread. + */ + if (LOG_COMPARE(&db_rep->durable_lsn, &db_rep->limbo_failure) > 0) { + /* + * Nice! Limbo has been resolved by an arbitrary other txn + * succeeding subsequently. So we don't have to do the + * "self-update" part. + */ + } else { + /* + * Do a self-update, to try to trigger a "durable". Since + * nothing in the database is changing, we need neither an ASL + * hint nor a bump in the version sequence. + */ + orig_lsn = db_rep->limbo_failure; + db_rep->active_gmdb_update = gmdb_primary; + UNLOCK_MUTEX(db_rep->mutex); + locked = FALSE; + + if ((ret = __txn_begin(env, + ip, NULL, &txn, DB_IGNORE_LEASE)) != 0) + goto out; + + marshal_site_data(env, orig_status, data_buf, &data_dbt); + + ret = __db_put(db_rep->gmdb, ip, txn, &key_dbt, &data_dbt, 0); + if ((t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) != 0 && + ret == 0) + ret = t_ret; + if (ret != 0) + goto out; + + /* + * Check to see whether we got another PERM failure. This is + * quite possible in the case where a GMDB request is being + * retried by a requestor, but unlikely if we had a resolution + * via an "arbitrary" txn. + */ + LOCK_MUTEX(db_rep->mutex); + locked = TRUE; + if (LOG_COMPARE(&db_rep->limbo_failure, &orig_lsn) > 0) { + db_rep->limbo_resolution_needed = TRUE; + ret = DB_REP_UNAVAIL; + goto out; + } + } + DB_ASSERT(env, locked); + + /* + * Here, either we didn't need to do the self-update, or we did it and + * it succeeded. So now we're ready to do the second phase update. + */ + db_rep->limbo_victim = DB_EID_INVALID; + UNLOCK_MUTEX(db_rep->mutex); + locked = FALSE; + status = NEXT_STATUS(orig_status); + if ((ret = finish_gmdb_update(env, + ip, &key_dbt, orig_status, status, &logrec)) != 0) + goto out; + + /* Track modified membership status in our in-memory sites array. */ + LOCK_MUTEX(db_rep->mutex); + locked = TRUE; + if ((ret = __repmgr_set_membership(env, + addr.host, addr.port, status)) != 0) + goto out; + __repmgr_set_sites(env); + +out: + if (locked) + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} + +/* + * Update a specific record in the Group Membership database. The record to be + * updated is implied by "eid"; "pstatus" is the provisional status (ADDING or + * DELETING) to be used in the first phase of the update. The ultimate goal + * status is inferred (ADDING -> PRESENT, or DELETING -> 0). + * + * PUBLIC: int __repmgr_update_membership __P((ENV *, + * PUBLIC: DB_THREAD_INFO *, int, u_int32_t)); + */ +int +__repmgr_update_membership(env, ip, eid, pstatus) + ENV *env; + DB_THREAD_INFO *ip; + int eid; + u_int32_t pstatus; /* Provisional status. */ +{ + DB_REP *db_rep; + REPMGR_SITE *site; + DB_TXN *txn; + DB_LSN lsn, orig_lsn; + DBT key_dbt, data_dbt; + __repmgr_member_args logrec; + repmgr_netaddr_t addr; + u_int32_t orig_status, ult_status; + int do_close, locked, ret, t_ret; + u_int8_t key_buf[MAX_MSG_BUF]; + u_int8_t status_buf[__REPMGR_MEMBERSHIP_DATA_SIZE]; + + DB_ASSERT(env, pstatus == SITE_ADDING || pstatus == SITE_DELETING); + + db_rep = env->rep_handle; + COMPQUIET(orig_status, 0); + COMPQUIET(addr.host, NULL); + COMPQUIET(addr.port, 0); + +retry: + txn = NULL; + locked = FALSE; + DB_ASSERT(env, db_rep->gmdb_busy); + if ((ret = __repmgr_setup_gmdb_op(env, ip, NULL, 0)) != 0) + return (ret); + + /* + * Usually we'll keep the GMDB closed, to conserve resources, since + * changes should be rare. However, if a PERM FAIL puts us in limbo, we + * expect to clean that up as soon as we can; so leave it open for now + * in that case. + */ + do_close = TRUE; + + /* + * Before attempting any fresh updates, resolve any lingering incomplete + * updates from the past (i.e., those that resulted in PERM_FAIL). If + * we can't, then we mustn't proceed with any more updates. Getting an + * additional perm failure would increase the dissonance between the + * effective group size and the number of sites from which we can safely + * accept acks. Besides, if we can't clear the previous failure, + * there's practically no hope that a new update would fare any better. + */ + if ((ret = resolve_limbo_int(env, ip)) != 0) { + if (ret == DB_REP_UNAVAIL) + do_close = FALSE; + goto err; + } + + /* + * If there was a successful limbo resolution, it could have either been + * for some unrelated change, or it could have been the same change our + * caller is now (re-)trying to perform. In the latter case, we have + * nothing more to do -- resolve_limbo() has done it all for us! To + * find out, compare the site's current status with the ultimate goal + * status associated with the provisional status that was passed to us + * as input. + */ + LOCK_MUTEX(db_rep->mutex); + locked = TRUE; + DB_ASSERT(env, IS_KNOWN_REMOTE_SITE(eid)); + site = SITE_FROM_EID(eid); + if ((orig_status = site->membership) == NEXT_STATUS(pstatus)) + goto err; + addr = site->net_addr; + + /* + * Anticipate modified membership status in our in-memory sites array. + * This forces us into an awkward rescission, below, if our transaction + * suffers a hard failure and must be aborted. But it's necessary + * because of the requirement that, on additions, the quorum computation + * must be based on the incremented nsites value. An alternative might + * possibly be to increment nsites separately from adding the new site + * to the array, or even having a special epicycle at the point where + * send() counts acks (we'd have to make active_gmdb_update richer), but + * those seem even more confusing. + */ + if ((ret = __repmgr_set_membership(env, + addr.host, addr.port, pstatus)) != 0) + goto err; + __repmgr_set_sites(env); + + /* + * Hint to our send() function that we want to know the result of ack + * counting. + */ + orig_lsn = db_rep->limbo_failure; + db_rep->active_gmdb_update = gmdb_primary; + UNLOCK_MUTEX(db_rep->mutex); + locked = FALSE; + + if ((ret = __txn_begin(env, ip, NULL, &txn, DB_IGNORE_LEASE)) != 0) + goto err; + marshal_site_key(env, &addr, key_buf, &key_dbt, &logrec); + marshal_site_data(env, pstatus, status_buf, &data_dbt); + if ((ret = __db_put(db_rep->gmdb, + ip, txn, &key_dbt, &data_dbt, 0)) != 0) + goto err; + if ((ret = incr_gm_version(env, ip, txn)) != 0) + goto err; + + /* + * Add some information to the log for this txn. This is an annotation, + * for the sole purpose of enabling the client to notice whenever a + * change has occurred in this database. It has nothing to do with + * local recovery. + */ + ZERO_LSN(lsn); + if ((ret = __repmgr_member_log(env, + txn, &lsn, 0, db_rep->membership_version, + orig_status, pstatus, &logrec.host, logrec.port)) != 0) + goto err; + ret = __txn_commit(txn, 0); + txn = NULL; + if (ret != 0) + goto err; + + LOCK_MUTEX(db_rep->mutex); + locked = TRUE; + + if (LOG_COMPARE(&db_rep->limbo_failure, &orig_lsn) > 0) { + /* + * Failure LSN advanced, meaning this update wasn't acked by + * enough clients. + */ + db_rep->limbo_resolution_needed = TRUE; + db_rep->limbo_victim = eid; + ret = DB_REP_UNAVAIL; + do_close = FALSE; + goto err; + } + + /* Now we'll complete the status change. */ + ult_status = NEXT_STATUS(pstatus); + UNLOCK_MUTEX(db_rep->mutex); + locked = FALSE; + + if ((ret = finish_gmdb_update(env, ip, + &key_dbt, pstatus, ult_status, &logrec)) != 0) + goto err; + + /* Track modified membership status in our in-memory sites array. */ + LOCK_MUTEX(db_rep->mutex); + locked = TRUE; + ret = __repmgr_set_membership(env, addr.host, addr.port, ult_status); + __repmgr_set_sites(env); + +err: + if (locked) + UNLOCK_MUTEX(db_rep->mutex); + if (txn != NULL) { + DB_ASSERT(env, ret != 0); + (void)__txn_abort(txn); + /* + * We've just aborted the txn which moved the site info from + * orig_status to something else, so restore that value now so + * that we keep in sync. + */ + (void)__repmgr_set_membership(env, + addr.host, addr.port, orig_status); + } + if ((t_ret = __repmgr_cleanup_gmdb_op(env, do_close)) != 0 && + ret == 0) + ret = t_ret; + if (ret == DB_LOCK_DEADLOCK || ret == DB_LOCK_NOTGRANTED) + goto retry; + return (ret); +} + +/* + * Rescind a partially completed membership DB change, setting the new status to + * the value given. + */ +static int +rescind_pending(env, ip, eid, cur_status, new_status) + ENV *env; + DB_THREAD_INFO *ip; + int eid; + u_int32_t cur_status, new_status; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + DBT key_dbt; + __repmgr_member_args logrec; + repmgr_netaddr_t addr; + u_int8_t key_buf[MAX_MSG_BUF]; + int ret, t_ret; + + db_rep = env->rep_handle; + +retry: + if ((ret = __repmgr_setup_gmdb_op(env, ip, NULL, 0)) != 0) + return (ret); + + LOCK_MUTEX(db_rep->mutex); + DB_ASSERT(env, IS_KNOWN_REMOTE_SITE(eid)); + site = SITE_FROM_EID(eid); + addr = site->net_addr; + UNLOCK_MUTEX(db_rep->mutex); + + marshal_site_key(env, &addr, key_buf, &key_dbt, &logrec); + if ((ret = finish_gmdb_update(env, + ip, &key_dbt, cur_status, new_status, &logrec)) != 0) + goto err; + + /* Track modified membership status in our in-memory sites array. */ + LOCK_MUTEX(db_rep->mutex); + ret = __repmgr_set_membership(env, addr.host, addr.port, new_status); + __repmgr_set_sites(env); + UNLOCK_MUTEX(db_rep->mutex); + +err: + if ((t_ret = __repmgr_cleanup_gmdb_op(env, TRUE)) != 0 && + ret == 0) + ret = t_ret; + if (ret == DB_LOCK_DEADLOCK || ret == DB_LOCK_NOTGRANTED) + goto retry; + return (ret); +} + +/* + * Caller must have already taken care of serializing this operation + * (hold_master_role(), setup_gmdb_op()). + */ +static int +incr_gm_version(env, ip, txn) + ENV *env; + DB_THREAD_INFO *ip; + DB_TXN *txn; +{ + DB_REP *db_rep; + u_int32_t version; + int ret; + + db_rep = env->rep_handle; + version = db_rep->membership_version + 1; + if ((ret = __repmgr_set_gm_version(env, ip, txn, version)) == 0) + db_rep->membership_version = version; + return (ret); +} + +/* + * PUBLIC: int __repmgr_set_gm_version __P((ENV *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, u_int32_t)); + */ +int +__repmgr_set_gm_version(env, ip, txn, version) + ENV *env; + DB_THREAD_INFO *ip; + DB_TXN *txn; + u_int32_t version; +{ + DB_REP *db_rep; + DBT key_dbt, data_dbt; + __repmgr_membership_key_args key; + __repmgr_member_metadata_args metadata; + u_int8_t key_buf[__REPMGR_MEMBERSHIP_KEY_SIZE + 1]; + u_int8_t metadata_buf[__REPMGR_MEMBER_METADATA_SIZE]; + size_t len; + int ret; + + db_rep = env->rep_handle; + + metadata.format = REPMGR_GMDB_FMT_VERSION; + metadata.version = version; + __repmgr_member_metadata_marshal(env, &metadata, metadata_buf); + DB_INIT_DBT(data_dbt, metadata_buf, __REPMGR_MEMBER_METADATA_SIZE); + + DB_INIT_DBT(key.host, NULL, 0); + key.port = 0; + ret = __repmgr_membership_key_marshal(env, + &key, key_buf, sizeof(key_buf), &len); + DB_ASSERT(env, ret == 0); + DB_INIT_DBT(key_dbt, key_buf, len); + + if ((ret = __db_put(db_rep->gmdb, + ip, txn, &key_dbt, &data_dbt, 0)) != 0) + return (ret); + return (0); +} + +/* + * Performs the second phase of a 2-phase membership DB operation: an "adding" + * site becomes fully "present" in the group; a "deleting" site is finally + * really deleted. + */ +static int +finish_gmdb_update(env, ip, key_dbt, prev_status, status, logrec) + ENV *env; + DB_THREAD_INFO *ip; + DBT *key_dbt; + u_int32_t prev_status, status; + __repmgr_member_args *logrec; +{ + DB_REP *db_rep; + DB_LSN lsn; + DB_TXN *txn; + DBT data_dbt; + u_int8_t data_buf[__REPMGR_MEMBERSHIP_DATA_SIZE]; + int ret, t_ret; + + db_rep = env->rep_handle; + + db_rep->active_gmdb_update = gmdb_secondary; + if ((ret = __txn_begin(env, ip, NULL, &txn, DB_IGNORE_LEASE)) != 0) + return (ret); + + if (status == 0) + ret = __db_del(db_rep->gmdb, ip, txn, key_dbt, 0); + else { + marshal_site_data(env, status, data_buf, &data_dbt); + ret = __db_put(db_rep->gmdb, ip, txn, key_dbt, &data_dbt, 0); + } + if (ret != 0) + goto err; + + if ((ret = incr_gm_version(env, ip, txn)) != 0) + goto err; + + ZERO_LSN(lsn); + if ((ret = __repmgr_member_log(env, + txn, &lsn, 0, db_rep->membership_version, + prev_status, status, &logrec->host, logrec->port)) != 0) + goto err; + +err: + if ((t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * Set up everything we need to update the Group Membership database. This may + * or may not include providing a transaction in which to do the updates + * (depending on whether the caller wants the creation of the database to be in + * the same transaction as the updates). + * + * PUBLIC: int __repmgr_setup_gmdb_op __P((ENV *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN **, u_int32_t)); + */ +int +__repmgr_setup_gmdb_op(env, ip, txnp, flags) + ENV *env; + DB_THREAD_INFO *ip; + DB_TXN **txnp; + u_int32_t flags; +{ + DB_REP *db_rep; + DB_TXN *txn; + DB *dbp; + int ret, was_open; + + db_rep = env->rep_handle; + + dbp = NULL; + txn = NULL; + + /* + * If the caller provided a place to return a txn handle, create it and + * perform any open operation as part of that txn. The caller is + * responsible for disposing of the txn. Otherwise, only begin a txn if + * we need to do the open and in that case commit it right after the + * open. + */ + DB_ASSERT(env, db_rep->gmdb_busy); + was_open = db_rep->gmdb != NULL; + if ((txnp != NULL || !was_open) && + (ret = __txn_begin(env, ip, NULL, &txn, DB_IGNORE_LEASE)) != 0) + goto err; + + if (!was_open) { + DB_ASSERT(env, txn != NULL); + /* + * Opening the membership database is like a secondary GMDB + * operation, in the sense that we don't care how many clients + * ack it, yet we don't want the application to see any perm + * failure events. + */ + DB_ASSERT(env, db_rep->active_gmdb_update == none); + db_rep->active_gmdb_update = gmdb_secondary; + ret = __rep_open_sysdb(env, + ip, txn, REPMEMBERSHIP, flags, &dbp); + if (ret == 0 && txnp == NULL) { + /* The txn was just for the open operation. */ + ret = __txn_commit(txn, 0); + txn = NULL; + } + db_rep->active_gmdb_update = none; + if (ret != 0) + goto err; + } + + /* + * Lock out normal API operations. Again because we need to know that + * if a PERM_FAIL occurs, it was associated with our txn. Also, so that + * we avoid confusing the application with a PERM_FAIL event for our own + * txn. + */ + if ((ret = __rep_take_apilockout(env)) != 0) + goto err; + + /* + * Here, all steps have succeeded. Stash and/or pass back the fruits of + * our labor. + */ + if (!was_open) { + DB_ASSERT(env, dbp != NULL); + db_rep->gmdb = dbp; + } + if (txnp != NULL) { + DB_ASSERT(env, txn != NULL); + *txnp = txn; + } + /* + * In the successful case, a later call to cleanup_gmdb_op will + * ENV_LEAVE. + */ + return (0); + +err: + DB_ASSERT(env, ret != 0); + if (dbp != NULL) + (void)__db_close(dbp, txn, DB_NOSYNC); + if (txn != NULL) + (void)__txn_abort(txn); + return (ret); +} + +/* + * PUBLIC: int __repmgr_cleanup_gmdb_op __P((ENV *, int)); + */ +int +__repmgr_cleanup_gmdb_op(env, do_close) + ENV *env; + int do_close; +{ + DB_REP *db_rep; + int ret, t_ret; + + db_rep = env->rep_handle; + db_rep->active_gmdb_update = none; + ret = __rep_clear_apilockout(env); + + if (do_close && db_rep->gmdb != NULL) { + if ((t_ret = __db_close(db_rep->gmdb, NULL, DB_NOSYNC) != 0) && + ret == 0) + ret = t_ret; + db_rep->gmdb = NULL; + } + return (ret); +} + +/* + * Check whether we're currently master, and if so hold that role so that we can + * perform a Group Membership database operation. After a successful call, the + * caller must call rlse_master_role to release the hold. + * + * If we can't guarantee that we can remain master, send an appropriate failure + * message on the given connection (unless NULL). + * + * We also ensure that only one GMDB operation will take place at time, for a + * couple of reasons: if we get a PERM_FAIL it means the fate of the change is + * indeterminate, so we have to assume the worst. We have to assume the higher + * value of nsites, yet we can't accept ack from the questionable site. If we + * allowed concurrent operations, this could lead to more than one questionable + * site, which would be even worse. Also, when we get a PERM_FAIL we want to + * know which txn failed, and that would be messy if there could be several. + * + * Of course we can't simply take the mutex for the duration, because + * the mutex needs to be available in order to send out the log + * records. + * + * PUBLIC: int __repmgr_hold_master_role __P((ENV *, REPMGR_CONNECTION *)); + */ +int +__repmgr_hold_master_role(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + DB_REP *db_rep; + REP *rep; + int ret, t_ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + + LOCK_MUTEX(db_rep->mutex); + if ((ret = __repmgr_await_gmdbop(env)) == 0) { + /* + * If we're currently master, but client_intent is set, it means + * that another thread is on the way to becoming master, so we + * can't promise to hold the master role for the caller: we've + * lost a close race. + */ + if (rep->master_id != db_rep->self_eid || + db_rep->client_intent) + ret = DB_REP_UNAVAIL; + else + db_rep->gmdb_busy = TRUE; + } + UNLOCK_MUTEX(db_rep->mutex); + if (conn != NULL && ret == DB_REP_UNAVAIL && + (t_ret = reject_fwd(env, conn)) != 0) + ret = t_ret; + return (ret); +} + +/* + * Releases the "master role" lock once we're finished performing a membership + * DB operation. + * + * PUBLIC: int __repmgr_rlse_master_role __P((ENV *)); + */ +int +__repmgr_rlse_master_role(env) + ENV *env; +{ + DB_REP *db_rep; + int ret; + + db_rep = env->rep_handle; + LOCK_MUTEX(db_rep->mutex); + db_rep->gmdb_busy = FALSE; + ret = __repmgr_signal(&db_rep->gmdb_idle); + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} + +/* + * Responds to a membership change request in the case we're not currently + * master. If we know the master, responds with a "forward" message, to tell + * the requestor who is master. Otherwise rejects it outright. + */ +static int +reject_fwd(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + DB_REP *db_rep; + REP *rep; + SITE_STRING_BUFFER site_string; + __repmgr_gm_fwd_args fwd; + repmgr_netaddr_t addr; + u_int8_t buf[MAX_MSG_BUF]; + u_int32_t msg_type; + size_t len; + int ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + + if (IS_KNOWN_REMOTE_SITE(rep->master_id)) { + msg_type = REPMGR_GM_FORWARD; + LOCK_MUTEX(db_rep->mutex); + addr = SITE_FROM_EID(rep->master_id)->net_addr; + UNLOCK_MUTEX(db_rep->mutex); + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Forwarding request to master %s", + __repmgr_format_addr_loc(&addr, site_string))); + fwd.host.data = addr.host; + fwd.host.size = (u_int32_t)strlen(fwd.host.data) + 1; + fwd.port = addr.port; + fwd.gen = rep->mgen; + ret = __repmgr_gm_fwd_marshal(env, + &fwd, buf, sizeof(buf), &len); + DB_ASSERT(env, ret == 0); + } else { + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Rejecting membership request with no known master")); + msg_type = REPMGR_GM_FAILURE; + len = 0; + } + + return (__repmgr_send_sync_msg(env, conn, + msg_type, buf, (u_int32_t)len)); +} + +/* + * The length of "buf" must be at least MAX_GMDB_KEY. + */ +static void +marshal_site_key(env, addr, buf, dbt, logrec) + ENV *env; + repmgr_netaddr_t *addr; + u_int8_t *buf; + DBT *dbt; + __repmgr_member_args *logrec; +{ + __repmgr_membership_key_args key; + size_t len; + int ret; + + DB_INIT_DBT(key.host, addr->host, strlen(addr->host) + 1); + logrec->host = key.host; + key.port = addr->port; + logrec->port = key.port; + ret = __repmgr_membership_key_marshal(env, + &key, buf, MAX_MSG_BUF, &len); + DB_ASSERT(env, ret == 0); + DB_INIT_DBT(*dbt, buf, len); +} + +static void +marshal_site_data(env, status, buf, dbt) + ENV *env; + u_int32_t status; + u_int8_t *buf; + DBT *dbt; +{ + __repmgr_membership_data_args member_status; + + member_status.flags = status; + __repmgr_membership_data_marshal(env, &member_status, buf); + DB_INIT_DBT(*dbt, buf, __REPMGR_MEMBERSHIP_DATA_SIZE); +} + +/* + * PUBLIC: void __repmgr_set_sites __P((ENV *)); + * + * Caller must hold mutex. + */ +void +__repmgr_set_sites(env) + ENV *env; +{ + DB_REP *db_rep; + int ret; + u_int32_t n; + u_int i; + + db_rep = env->rep_handle; + + for (i = 0, n = 0; i < db_rep->site_cnt; i++) { + if (db_rep->sites[i].membership > 0) + n++; + } + ret = __rep_set_nsites_int(env, n); + DB_ASSERT(env, ret == 0); +} diff --git a/src/repmgr/repmgr_net.c b/src/repmgr/repmgr_net.c new file mode 100644 index 00000000..24f7c1c3 --- /dev/null +++ b/src/repmgr/repmgr_net.c @@ -0,0 +1,1882 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/mp.h" + +/* + * The functions in this module implement a simple wire protocol for + * transmitting messages of various types. Every message consists of a 9-byte + * header followed by a body (though the body could be 0-length). The header is + * the marshaled form of the "msg_hdr" structure defined in repmgr.src. The + * interpretation of header fields depends on message type, and is defined in + * repmgr.h. But as a general principle, in all cases there is enough + * information in the header for us to know the total size of the body, and the + * total amount of memory we need to allocate for storing and processing the + * message. + */ + +/* + * In sending a message, we first try to send it in-line, in the sending thread, + * and without first copying the message, by using scatter/gather I/O, using + * iovecs to point to the various pieces of the message. If that all works + * without blocking, that's optimal. + * If we find that, for a particular connection, we can't send without + * blocking, then we must copy the message for sending later in the select() + * thread. In the course of doing that, we might as well "flatten" the message, + * forming one single buffer, to simplify life. Not only that, once we've gone + * to the trouble of doing that, other sites to which we also want to send the + * message (in the case of a broadcast), may as well take advantage of the + * simplified structure also. + * The sending_msg structure below holds it all. Note that this structure, + * and the "flat_msg" structure, are allocated separately, because (1) the + * flat_msg version is usually not needed; and (2) when a flat_msg is needed, it + * will need to live longer than the wrapping sending_msg structure. + * Note that, for the broadcast case, where we're going to use this + * repeatedly, the iovecs is a template that must be copied, since in normal use + * the iovecs pointers and lengths get adjusted after every partial write. + */ +struct sending_msg { + REPMGR_IOVECS *iovecs; + REPMGR_FLAT *fmsg; +}; + +/* Context for a thread waiting for client acks for PERM message. */ +struct repmgr_permanence { + DB_LSN lsn; /* LSN whose ack this thread is waiting for. */ + u_int threshold; /* Number of client acks needed. */ + int policy; /* Ack policy to be used for this txn. */ +}; + +#ifdef CONFIG_TEST +static u_int fake_port __P((ENV *, u_int)); +#endif +static int final_cleanup __P((ENV *, REPMGR_CONNECTION *, void *)); +static int flatten __P((ENV *, struct sending_msg *)); +static int is_permanent __P((ENV *, void *)); +static int __repmgr_finish_connect + __P((ENV *, socket_t s, REPMGR_CONNECTION **)); +static int __repmgr_propose_version __P((ENV *, REPMGR_CONNECTION *)); +static int __repmgr_start_connect __P((ENV*, socket_t *, ADDRINFO *, int *)); +static void setup_sending_msg __P((ENV *, + struct sending_msg *, u_int8_t *, u_int, const DBT *, const DBT *)); +static int __repmgr_send_internal + __P((ENV *, REPMGR_CONNECTION *, struct sending_msg *, db_timeout_t)); +static int enqueue_msg + __P((ENV *, REPMGR_CONNECTION *, struct sending_msg *, size_t)); +static REPMGR_SITE *__repmgr_available_site __P((ENV *, int)); +static REPMGR_SITE *__repmgr_find_available_peer __P((ENV *)); + +/* + * Connects to the given network address, using blocking operations. Any thread + * synchronization is the responsibility of the caller. + * + * PUBLIC: int __repmgr_connect __P((ENV *, + * PUBLIC: repmgr_netaddr_t *, REPMGR_CONNECTION **, int *)); + */ +int +__repmgr_connect(env, netaddr, connp, errp) + ENV *env; + repmgr_netaddr_t *netaddr; + REPMGR_CONNECTION **connp; + int *errp; +{ + REPMGR_CONNECTION *conn; + ADDRINFO *ai0, *ai; + socket_t sock; + int err, ret; + u_int port; + + COMPQUIET(err, 0); +#ifdef CONFIG_TEST + port = fake_port(env, netaddr->port); +#else + port = netaddr->port; +#endif + if ((ret = __repmgr_getaddr(env, netaddr->host, port, 0, &ai0)) != 0) + return (ret); + + /* + * Try each address on the list, until success. Note that if several + * addresses on the list produce retryable error, we can only pass back + * to our caller the last one. + */ + for (ai = ai0; ai != NULL; ai = ai->ai_next) { + switch ((ret = __repmgr_start_connect(env, &sock, ai, &err))) { + case 0: + if ((ret = __repmgr_finish_connect(env, + sock, &conn)) == 0) + *connp = conn; + else + (void)closesocket(sock); + goto out; + case DB_REP_UNAVAIL: + continue; + default: + goto out; + } + } + +out: + __os_freeaddrinfo(env, ai0); + if (ret == DB_REP_UNAVAIL) { + __repmgr_print_conn_err(env, netaddr, err); + *errp = err; + } + return (ret); +} + +static int +__repmgr_start_connect(env, socket_result, ai, err) + ENV *env; + socket_t *socket_result; + ADDRINFO *ai; + int *err; +{ + socket_t s; + int ret; + + if ((s = socket(ai->ai_family, + ai->ai_socktype, ai->ai_protocol)) == SOCKET_ERROR) { + ret = net_errno; + __db_err(env, ret, "create socket"); + return (ret); + } + + if (connect(s, ai->ai_addr, (socklen_t)ai->ai_addrlen) != 0) { + *err = net_errno; + (void)closesocket(s); + return (DB_REP_UNAVAIL); + } + RPRINT(env, (env, DB_VERB_REPMGR_MISC, "connection established")); + + *socket_result = s; + return (0); +} + +static int +__repmgr_finish_connect(env, s, connp) + ENV *env; + socket_t s; + REPMGR_CONNECTION **connp; +{ + REPMGR_CONNECTION *conn; + int ret; + + if ((ret = __repmgr_new_connection(env, &conn, s, CONN_CONNECTED)) != 0) + return (ret); + + if ((ret = __repmgr_propose_version(env, conn)) == 0) + *connp = conn; + else + (void)__repmgr_destroy_conn(env, conn); + return (ret); +} + +static int +__repmgr_propose_version(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + DB_REP *db_rep; + __repmgr_version_proposal_args versions; + repmgr_netaddr_t *my_addr; + size_t hostname_len, rec_length; + u_int8_t *buf, *p; + int ret; + + db_rep = env->rep_handle; + my_addr = &SITE_FROM_EID(db_rep->self_eid)->net_addr; + + /* + * In repmgr wire protocol version 1, a handshake message had a rec part + * that looked like this: + * + * +-----------------+----+ + * | host name ... | \0 | + * +-----------------+----+ + * + * To ensure its own sanity, the old repmgr would write a NUL into the + * last byte of a received message, and then use normal C library string + * operations (e.g., strlen, strcpy). + * + * Now, a version proposal has a rec part that looks like this: + * + * +-----------------+----+------------------+------+ + * | host name ... | \0 | extra info ... | \0 | + * +-----------------+----+------------------+------+ + * + * The "extra info" contains the version parameters, in marshaled form. + */ + + hostname_len = strlen(my_addr->host); + rec_length = hostname_len + 1 + + __REPMGR_VERSION_PROPOSAL_SIZE + 1; + if ((ret = __os_malloc(env, rec_length, &buf)) != 0) + goto out; + p = buf; + (void)strcpy((char*)p, my_addr->host); + + p += hostname_len + 1; + versions.min = DB_REPMGR_MIN_VERSION; + versions.max = DB_REPMGR_VERSION; + __repmgr_version_proposal_marshal(env, &versions, p); + + ret = __repmgr_send_v1_handshake(env, conn, buf, rec_length); + __os_free(env, buf); +out: + return (ret); +} + +/* + * __repmgr_send -- + * The send function for DB_ENV->rep_set_transport. + * + * PUBLIC: int __repmgr_send __P((DB_ENV *, const DBT *, const DBT *, + * PUBLIC: const DB_LSN *, int, u_int32_t)); + */ +int +__repmgr_send(dbenv, control, rec, lsnp, eid, flags) + DB_ENV *dbenv; + const DBT *control, *rec; + const DB_LSN *lsnp; + int eid; + u_int32_t flags; +{ + DB_REP *db_rep; + REP *rep; + ENV *env; + REPMGR_CONNECTION *conn; + REPMGR_SITE *site; + struct repmgr_permanence perm; + db_timeout_t maxblock; + u_int32_t available, nclients, needed, npeers_sent, nsites_sent, quorum; + int policy, ret, t_ret; + + env = dbenv->env; + db_rep = env->rep_handle; + rep = db_rep->region; + ret = 0; + COMPQUIET(quorum, 0); + + LOCK_MUTEX(db_rep->mutex); + + /* + * If we're already "finished", we can't send anything. This covers the + * case where a bulk buffer is flushed at env close, or perhaps an + * unexpected __repmgr_thread_failure. + */ + if (db_rep->finished) { + ret = DB_REP_UNAVAIL; + goto out; + } + + /* + * Check whether we need to refresh our site address information with + * more recent updates from shared memory. + */ + if (rep->siteinfo_seq > db_rep->siteinfo_seq && + (ret = __repmgr_sync_siteaddr(env)) != 0) + goto out; + + if (eid == DB_EID_BROADCAST) { + if ((ret = __repmgr_send_broadcast(env, REPMGR_REP_MESSAGE, + control, rec, &nsites_sent, &npeers_sent)) != 0) + goto out; + } else { + DB_ASSERT(env, IS_KNOWN_REMOTE_SITE(eid)); + + /* + * Since repmgr's simple c2c implementation doesn't truly manage + * staged synchronization it doesn't work well with master + * leases. So, disable it during the time when a new master may + * be trying to establish its first set of lease grants. + */ + if (IS_USING_LEASES(env) && !rep->stat.st_startup_complete) + LF_CLR(DB_REP_ANYWHERE); + /* + * If this is a request that can be sent anywhere, then see if + * we can send it to our peer (to save load on the master), but + * not if it's a rerequest, 'cuz that likely means we tried this + * already and failed. + */ + if ((flags & (DB_REP_ANYWHERE | DB_REP_REREQUEST)) == + DB_REP_ANYWHERE && + (site = __repmgr_find_available_peer(env)) != NULL) { + VPRINT(env, (env, DB_VERB_REPMGR_MISC, + "sending request to peer")); + } else if ((site = __repmgr_available_site(env, eid)) == + NULL) { + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "ignoring message sent to unavailable site")); + ret = DB_REP_UNAVAIL; + goto out; + } + + conn = site->ref.conn; + + /* + * In case the connection is clogged up and we have to wait for + * space on the output queue, how long shall we wait? We could + * of course create a new timeout configuration type, so that + * the application could set it directly. But that would start + * to overwhelm the user with too many choices to think about. + * We already have an ACK timeout, which is the user's estimate + * of how long it should take to send a message to the client, + * have it be processed, and return a message back to us. We + * multiply that by the queue size, because that's how many + * messages have to be swallowed up by the client before we're + * able to start sending again (at least to a rough + * approximation). + */ + maxblock = OUT_QUEUE_LIMIT * + (rep->ack_timeout == 0 ? + DB_REPMGR_DEFAULT_ACK_TIMEOUT : rep->ack_timeout); + if ((ret = __repmgr_send_one(env, conn, REPMGR_REP_MESSAGE, + control, rec, maxblock)) == DB_REP_UNAVAIL && + (t_ret = __repmgr_bust_connection(env, conn)) != 0) + ret = t_ret; + if (ret != 0) + goto out; + + nsites_sent = 1; + npeers_sent = F_ISSET(site, SITE_ELECTABLE) ? 1 : 0; + } + + /* + * Right now, nsites and npeers represent the (maximum) number of sites + * we've attempted to begin sending the message to. Of course we + * haven't really received any ack's yet. But since we've only sent to + * nsites/npeers other sites, that's the maximum number of ack's we + * could possibly expect. If even that number fails to satisfy our PERM + * policy, there's no point waiting for something that will never + * happen. + */ + if (LF_ISSET(DB_REP_PERMANENT)) { + /* Adjust so as not to count the local site. */ + nclients = db_rep->region->config_nsites -1; + + /* + * When doing membership DB changes, avoid some impossible + * situations. + */ + policy = rep->perm_policy; + switch (db_rep->active_gmdb_update) { + case gmdb_primary: + if (policy == DB_REPMGR_ACKS_ALL || + policy == DB_REPMGR_ACKS_ALL_PEERS) + policy = DB_REPMGR_ACKS_ALL_AVAILABLE; + else if (policy == DB_REPMGR_ACKS_QUORUM && + nclients == 1) + nclients = 0; + else if ((policy == DB_REPMGR_ACKS_ONE || + policy == DB_REPMGR_ACKS_ONE_PEER) && + nclients == 1) { + nclients = 0; + policy = DB_REPMGR_ACKS_QUORUM; + } + break; + case gmdb_secondary: + policy = DB_REPMGR_ACKS_NONE; + break; + case none: + break; + } + switch (policy) { + case DB_REPMGR_ACKS_NONE: + needed = 0; + COMPQUIET(available, 0); + break; + + case DB_REPMGR_ACKS_ONE: + needed = 1; + available = nsites_sent; + break; + + case DB_REPMGR_ACKS_ALL: + /* Number of sites in the group besides myself. */ + needed = nclients; + available = nsites_sent; + break; + + case DB_REPMGR_ACKS_ONE_PEER: + needed = 1; + available = npeers_sent; + break; + + case DB_REPMGR_ACKS_ALL_PEERS: + /* + * Too hard to figure out "needed", since we're not + * keeping track of how many peers we have; so just skip + * the optimization in this case. + */ + needed = 1; + available = npeers_sent; + break; + + case DB_REPMGR_ACKS_QUORUM: + case DB_REPMGR_ACKS_ALL_AVAILABLE: + /* + * The minimum number of acks necessary to ensure that + * the transaction is durable if an election is held. + * + * Unless instructed otherwise, our special handling for + * 2-site groups means that a client that loses contact + * with the master elects itself master (even though + * that doesn't constitute a majority). In order to + * provide the expected guarantee implied by the + * definition of "quorum" we have to fudge the ack + * calculation in this case: specifically, we need to + * make sure that the client has received it in order + * for us to consider it "perm". Thus, if nclients is + * 1, needed should be 1. + * + * While we're at it, if nclients is 0 (a nascent + * "group" consisting of nothing but a master), surely + * the number of acks we need should be 0. + * + * Note that turning the usual strict behavior back on + * in a 2-site group results in "0" as the number of + * clients needed to ack a txn in order for it to have + * arrived at a quorum. This is the correct result, + * strange as it may seem! This may well mean that in a + * 2-site group the QUORUM policy is rarely the right + * choice. + * + * When a GMDB update adds the second site, force + * "strict" behavior: in that case nsites is 2, but the + * new site is not yet allowed to contribute an ack. + */ + if (nclients > 1 || + FLD_ISSET(db_rep->region->config, + REP_C_2SITE_STRICT) || + db_rep->active_gmdb_update == gmdb_primary) + needed = nclients / 2; + else + needed = nclients; + if (policy == DB_REPMGR_ACKS_ALL_AVAILABLE) { + quorum = needed; + needed = available = nsites_sent; + } else { + available = npeers_sent; + quorum = 0; + } + break; + + default: + COMPQUIET(available, 0); + COMPQUIET(needed, 0); + (void)__db_unknown_path(env, "__repmgr_send"); + break; + } + if (needed == 0) + goto out; + if (available < needed) { + ret = DB_REP_UNAVAIL; + goto out; + } + + /* In ALL_PEERS case, display of "needed" might be confusing. */ + VPRINT(env, (env, DB_VERB_REPMGR_MISC, + "will await acknowledgement: need %u", needed)); + perm.lsn = *lsnp; + perm.threshold = needed; + perm.policy = policy; + ret = __repmgr_await_cond(env, is_permanent, + &perm, rep->ack_timeout, &db_rep->ack_waiters); + /* + * If using ACKS_ALL_AVAILABLE and all possible sites acked, + * only return success if we have a quorum minimum available + * to ensure data integrity. + */ + if (ret == 0 && + policy == DB_REPMGR_ACKS_ALL_AVAILABLE && + available < quorum) + ret = DB_REP_UNAVAIL; + } + +out: UNLOCK_MUTEX(db_rep->mutex); + if (LF_ISSET(DB_REP_PERMANENT)) { + if (ret != 0) { + switch (db_rep->active_gmdb_update) { + case none: + /* + * Fire perm-failed event to the application as + * usual; no other bookkeeping needed here. + */ + STAT(db_rep->region->mstat.st_perm_failed++); + DB_EVENT(env, DB_EVENT_REP_PERM_FAILED, NULL); + break; + case gmdb_primary: + /* + * Since this is a membership DB operation, + * refrain from bothering the application about + * it (with an event that it wouldn't be + * expecting), and make a note of the failure so + * we can resolve it later. + */ + db_rep->limbo_failure = *lsnp; + /* FALLTHROUGH */ + case gmdb_secondary: + /* Merely refrain from firing event. */ + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "GMDB perm failure %d at [%lu][%lu]", + (int)db_rep->active_gmdb_update, + (u_long)lsnp->file, (u_long)lsnp->offset)); + break; + } + } else if (db_rep->limbo_resolution_needed) { + /* + * A previous membership DB operation failed, leaving us + * "in limbo", but now some perm operation has completed + * successfully. Since the ack of any txn implies ack + * of all txns that occur before it (in LSN order), we + * now know that the previous failure can be resolved. + * We can't do it here in this thread, so put a request + * on the message processing queue to have it handled + * later. + */ + db_rep->durable_lsn = *lsnp; + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "perm success [%lu][%lu] with limbo resolution needed", + (u_long)lsnp->file, (u_long)lsnp->offset)); + db_rep->limbo_resolution_needed = FALSE; + + /* Don't trump ret, even if it's zero. */ + LOCK_MUTEX(db_rep->mutex); + if ((t_ret = __repmgr_defer_op(env, + REPMGR_RESOLVE_LIMBO)) != 0) + __db_err(env, t_ret, "repmgr_defer_op"); + UNLOCK_MUTEX(db_rep->mutex); + } + } + return (ret); +} + +static REPMGR_SITE * +__repmgr_available_site(env, eid) + ENV *env; + int eid; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + + db_rep = env->rep_handle; + site = SITE_FROM_EID(eid); + if (IS_SITE_HANDSHAKEN(site)) + return (site); + return (NULL); +} + +/* + * Synchronize our list of sites with new information that has been added to the + * list in the shared region. + * + * PUBLIC: int __repmgr_sync_siteaddr __P((ENV *)); + */ +int +__repmgr_sync_siteaddr(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + u_int added; + int ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + + ret = 0; + + MUTEX_LOCK(env, rep->mtx_repmgr); + + if (!IS_VALID_EID(db_rep->self_eid)) + db_rep->self_eid = rep->self_eid; + + added = db_rep->site_cnt; + if ((ret = __repmgr_copy_in_added_sites(env)) == 0) + ret = __repmgr_init_new_sites(env, added, db_rep->site_cnt); + + MUTEX_UNLOCK(env, rep->mtx_repmgr); + return (ret); +} + +/* + * Sends message to all sites with which we currently have an active + * connection. Sets result parameters according to how many sites we attempted + * to begin sending to, even if we did nothing more than queue it for later + * delivery. + * + * !!! + * Caller must hold env->mutex. + * PUBLIC: int __repmgr_send_broadcast __P((ENV *, u_int, + * PUBLIC: const DBT *, const DBT *, u_int *, u_int *)); + */ +int +__repmgr_send_broadcast(env, type, control, rec, nsitesp, npeersp) + ENV *env; + u_int type; + const DBT *control, *rec; + u_int *nsitesp, *npeersp; +{ + DB_REP *db_rep; + REP *rep; + struct sending_msg msg; + REPMGR_CONNECTION *conn; + REPMGR_SITE *site; + REPMGR_IOVECS iovecs; + u_int8_t msg_hdr_buf[__REPMGR_MSG_HDR_SIZE]; + u_int eid, nsites, npeers; + int full_member, ret; + + static const u_int version_max_msg_type[] = { + 0, + REPMGR_MAX_V1_MSG_TYPE, + REPMGR_MAX_V2_MSG_TYPE, + REPMGR_MAX_V3_MSG_TYPE, + REPMGR_MAX_V4_MSG_TYPE + }; + + db_rep = env->rep_handle; + rep = db_rep->region; + + /* + * Sending a broadcast is quick, because we allow no blocking. So it + * shouldn't much matter. But just in case, take the timestamp before + * sending, so that if anything we err on the side of keeping clients + * placated (i.e., possibly sending a heartbeat slightly more frequently + * than necessary). + */ + __os_gettime(env, &db_rep->last_bcast, 1); + + msg.iovecs = &iovecs; + setup_sending_msg(env, &msg, msg_hdr_buf, type, control, rec); + nsites = npeers = 0; + + /* Send to (only the main connection with) every site. */ + FOR_EACH_REMOTE_SITE_INDEX(eid) { + if ((site = __repmgr_available_site(env, (int)eid)) == NULL) + continue; + /* + * Exclude non-member sites, unless we're the master, since it's + * useful to keep letting a removed site see updates so that it + * learns of its own removal, and will know to rejoin at its + * next reboot. + */ + if (site->membership == SITE_PRESENT) + full_member = TRUE; + else { + full_member = FALSE; + if (rep->master_id != db_rep->self_eid) + continue; + } + conn = site->ref.conn; + + DB_ASSERT(env, IS_KNOWN_REMOTE_SITE(conn->eid) && + conn->version > 0 && + conn->version <= DB_REPMGR_VERSION); + + /* + * Skip if the type of message we're sending is beyond the range + * of known message types for this connection's version. + * + * !!! + * Don't be misled by the apparent generality of this simple + * test. It works currently, because the only kinds of messages + * that we broadcast are REP_MESSAGE and HEARTBEAT. But in the + * future other kinds of messages might require more intricate + * per-connection-version customization (for example, + * per-version message format conversion, addition of new + * fields, etc.). + */ + if (type > version_max_msg_type[conn->version]) + continue; + + /* + * Broadcast messages are either application threads committing + * transactions, or replication status message that we can + * afford to lose. So don't allow blocking for them (pass + * maxblock argument as 0). + */ + if ((ret = __repmgr_send_internal(env, conn, &msg, 0)) == 0) { + if (full_member) { + /* + * Since the purpose of the counting is to + * manage waiting for acks, only count sites we + * send to from which we can reasonably expect + * to get an ack. When a site is not a fully + * "present" member of the group we can't accept + * an ack from it. + */ + nsites++; + if (F_ISSET(site, SITE_ELECTABLE)) + npeers++; + } + } else if (ret == DB_TIMEOUT) { + /* + * Couldn't send because of a full output queue. + * Incrementing counters would be wrong, but it's + * otherwise OK in the sense that the connection isn't + * definitively known to be broken, and rep protocol + * always allows us to drop a message if we have to. + */ + ret = 0; + } else if (ret == DB_REP_UNAVAIL) { + if ((ret = __repmgr_bust_connection(env, conn)) != 0) + return (ret); + } else + return (ret); + } + + *nsitesp = nsites; + *npeersp = npeers; + return (0); +} + +/* + * __repmgr_send_one -- + * Send a message to a site, or if you can't just yet, make a copy of it + * and arrange to have it sent later. 'rec' may be NULL, in which case we send + * a zero length and no data. + * + * !!! + * Note that the mutex should be held through this call. + * It doubles as a synchronizer to make sure that two threads don't + * intersperse writes that are part of two single messages. + * + * PUBLIC: int __repmgr_send_one __P((ENV *, REPMGR_CONNECTION *, + * PUBLIC: u_int, const DBT *, const DBT *, db_timeout_t)); + */ +int +__repmgr_send_one(env, conn, msg_type, control, rec, maxblock) + ENV *env; + REPMGR_CONNECTION *conn; + u_int msg_type; + const DBT *control, *rec; + db_timeout_t maxblock; +{ + struct sending_msg msg; + REPMGR_IOVECS iovecs; + u_int8_t hdr_buf[__REPMGR_MSG_HDR_SIZE]; + int ret; + + msg.iovecs = &iovecs; + setup_sending_msg(env, &msg, hdr_buf, msg_type, control, rec); + if ((ret = + __repmgr_send_internal(env, conn, &msg, maxblock)) == DB_TIMEOUT && + maxblock == 0) + ret = 0; + return (ret); +} + +/* + * PUBLIC: int __repmgr_send_many __P((ENV *, + * PUBLIC: REPMGR_CONNECTION *, REPMGR_IOVECS *, db_timeout_t)); + */ +int +__repmgr_send_many(env, conn, iovecs, maxblock) + ENV *env; + REPMGR_CONNECTION *conn; + REPMGR_IOVECS *iovecs; + db_timeout_t maxblock; +{ + struct sending_msg msg; + int ret; + + if (conn->state == CONN_DEFUNCT) + return (DB_REP_UNAVAIL); + msg.iovecs = iovecs; + msg.fmsg = NULL; + if ((ret = + __repmgr_send_internal(env, conn, &msg, maxblock)) == DB_TIMEOUT && + maxblock == 0) + ret = 0; + if (ret != 0 && ret != DB_TIMEOUT) + (void)__repmgr_disable_connection(env, conn); + return (ret); +} + +/* + * PUBLIC: int __repmgr_send_own_msg __P((ENV *, + * PUBLIC: REPMGR_CONNECTION *, u_int32_t, u_int8_t *, u_int32_t)); + */ +int +__repmgr_send_own_msg(env, conn, type, buf, len) + ENV *env; + REPMGR_CONNECTION *conn; + u_int8_t *buf; + u_int32_t len, type; +{ + REPMGR_IOVECS iovecs; + struct sending_msg msg; + __repmgr_msg_hdr_args msg_hdr; + u_int8_t hdr_buf[__REPMGR_MSG_HDR_SIZE]; + + if (conn->version < OWN_MIN_VERSION) + return (0); + msg_hdr.type = REPMGR_OWN_MSG; + REPMGR_OWN_BUF_SIZE(msg_hdr) = len; + REPMGR_OWN_MSG_TYPE(msg_hdr) = type; + __repmgr_msg_hdr_marshal(env, &msg_hdr, hdr_buf); + + __repmgr_iovec_init(&iovecs); + __repmgr_add_buffer(&iovecs, hdr_buf, __REPMGR_MSG_HDR_SIZE); + if (len > 0) + __repmgr_add_buffer(&iovecs, buf, len); + + msg.iovecs = &iovecs; + msg.fmsg = NULL; + return (__repmgr_send_internal(env, conn, &msg, 0)); +} + +/* + * Attempts a "best effort" to send a message on the given site. If there is an + * excessive backlog of message already queued on the connection, what shall we + * do? If the caller doesn't mind blocking, we'll wait (a limited amount of + * time) for the queue to drain. Otherwise we'll simply drop the message. This + * is always allowed by the replication protocol. But in the case of a + * multi-message response to a request like PAGE_REQ, LOG_REQ or ALL_REQ we + * almost always get a flood of messages that instantly fills our queue, so + * blocking improves performance (by avoiding the need for the client to + * re-request). + */ +static int +__repmgr_send_internal(env, conn, msg, maxblock) + ENV *env; + REPMGR_CONNECTION *conn; + struct sending_msg *msg; + db_timeout_t maxblock; +{ + DB_REP *db_rep; + SITE_STRING_BUFFER buffer; + int ret; + size_t total_written; + + db_rep = env->rep_handle; + + DB_ASSERT(env, conn->state != CONN_DEFUNCT); + if (!STAILQ_EMPTY(&conn->outbound_queue)) { + /* + * Output to this site is currently owned by the select() + * thread, so we can't try sending in-line here. We can only + * queue the msg for later. + */ + VPRINT(env, (env, DB_VERB_REPMGR_MISC, + "msg to %s to be queued", + __repmgr_format_eid_loc(db_rep, conn, buffer))); + if (conn->out_queue_length >= OUT_QUEUE_LIMIT && + maxblock > 0 && conn->state != CONN_CONGESTED) { + VPRINT(env, (env, DB_VERB_REPMGR_MISC, + "block thread, awaiting output queue space")); + conn->ref_count++; + ret = __repmgr_await_drain(env, conn, maxblock); + conn->ref_count--; + VPRINT(env, (env, DB_VERB_REPMGR_MISC, + "drain returned %d (%d,%d)", ret, + db_rep->finished, conn->out_queue_length)); + if (db_rep->finished) + return (DB_TIMEOUT); + if (ret != 0) + return (ret); + if (STAILQ_EMPTY(&conn->outbound_queue)) + goto empty; + } + if (conn->out_queue_length < OUT_QUEUE_LIMIT) + return (enqueue_msg(env, conn, msg, 0)); + else { + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "queue limit exceeded")); + STAT(env->rep_handle-> + region->mstat.st_msgs_dropped++); + return (DB_TIMEOUT); + } + } +empty: + if ((ret = __repmgr_write_iovecs(env, + conn, msg->iovecs, &total_written)) == 0) + return (0); + switch (ret) { + case WOULDBLOCK: +#if defined(DB_REPMGR_EAGAIN) && DB_REPMGR_EAGAIN != WOULDBLOCK + case DB_REPMGR_EAGAIN: +#endif + break; + default: +#ifdef EBADF + DB_ASSERT(env, ret != EBADF); +#endif + __repmgr_fire_conn_err_event(env, conn, ret); + STAT(env->rep_handle->region->mstat.st_connection_drop++); + return (DB_REP_UNAVAIL); + } + + VPRINT(env, (env, DB_VERB_REPMGR_MISC, "wrote only %lu bytes to %s", + (u_long)total_written, + __repmgr_format_eid_loc(db_rep, conn, buffer))); + /* + * We can't send any more without blocking: queue (a pointer to) a + * "flattened" copy of the message, so that the select() thread will + * finish sending it later. + */ + if ((ret = enqueue_msg(env, conn, msg, total_written)) != 0) + return (ret); + + STAT(env->rep_handle->region->mstat.st_msgs_queued++); + + /* + * Wake the main select thread so that it can discover that it has + * received ownership of this connection. Note that we didn't have to + * do this in the previous case (above), because the non-empty queue + * implies that the select() thread is already managing ownership of + * this connection. + */ + return (__repmgr_wake_main_thread(env)); +} + +/* + * PUBLIC: int __repmgr_write_iovecs __P((ENV *, REPMGR_CONNECTION *, + * PUBLIC: REPMGR_IOVECS *, size_t *)); + */ +int +__repmgr_write_iovecs(env, conn, iovecs, writtenp) + ENV *env; + REPMGR_CONNECTION *conn; + REPMGR_IOVECS *iovecs; + size_t *writtenp; +{ + REPMGR_IOVECS iovec_buf, *v; + size_t nw, sz, total_written; + int ret; + + /* + * Send as much data to the site as we can, without blocking. Keep + * writing as long as we're making some progress. + * + * Make a scratch copy of iovecs for our use, since we destroy it in the + * process of adjusting pointers after each partial I/O. The minimal + * REPMGR_IOVECS struct template is usually enough. But for app + * messages that need more than 3 segments we allocate a separate + * buffer. + */ + if (iovecs->count <= MIN_IOVEC) { + v = &iovec_buf; + sz = sizeof(iovec_buf); + } else { + sz = (size_t)REPMGR_IOVECS_ALLOC_SZ((u_int)iovecs->count); + if ((ret = __os_malloc(env, sz, &v)) != 0) + return (ret); + } + memcpy(v, iovecs, sz); + + total_written = 0; + while ((ret = __repmgr_writev(conn->fd, &v->vectors[v->offset], + v->count-v->offset, &nw)) == 0) { + total_written += nw; + if (__repmgr_update_consumed(v, nw)) /* all written */ + break; + } + *writtenp = total_written; + if (v != &iovec_buf) + __os_free(env, v); + return (ret); +} + +/* + * Count up how many sites have ack'ed the given LSN. Returns TRUE if enough + * sites have ack'ed; FALSE otherwise. + * + * !!! + * Caller must hold the mutex. + */ +static int +is_permanent(env, context) + ENV *env; + void *context; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + struct repmgr_permanence *perm; + u_int eid, nsites, npeers; + int is_perm, has_missing_peer, policy; + + db_rep = env->rep_handle; + perm = context; + policy = perm->policy; + + if (policy == DB_REPMGR_ACKS_NONE) + return (TRUE); + + nsites = npeers = 0; + has_missing_peer = FALSE; + FOR_EACH_REMOTE_SITE_INDEX(eid) { + site = SITE_FROM_EID(eid); + if (site->membership != SITE_PRESENT) + continue; + if (!F_ISSET(site, SITE_HAS_PRIO)) { + /* + * Never connected to this site: since we can't know + * whether it's a peer, assume the worst. + */ + has_missing_peer = TRUE; + continue; + } + + if (LOG_COMPARE(&site->max_ack, &perm->lsn) >= 0) { + nsites++; + if (F_ISSET(site, SITE_ELECTABLE)) + npeers++; + } else { + /* This site hasn't ack'ed the message. */ + if (F_ISSET(site, SITE_ELECTABLE)) + has_missing_peer = TRUE; + } + } + VPRINT(env, (env, DB_VERB_REPMGR_MISC, + "checking perm result, %lu, %lu, %d", + (u_long)nsites, (u_long)npeers, has_missing_peer)); + + switch (policy) { + case DB_REPMGR_ACKS_ALL: + case DB_REPMGR_ACKS_ALL_AVAILABLE: + case DB_REPMGR_ACKS_ONE: + is_perm = (nsites >= perm->threshold); + break; + case DB_REPMGR_ACKS_ONE_PEER: + case DB_REPMGR_ACKS_QUORUM: + is_perm = (npeers >= perm->threshold); + break; + case DB_REPMGR_ACKS_ALL_PEERS: + is_perm = !has_missing_peer; + break; + default: + is_perm = FALSE; + (void)__db_unknown_path(env, "is_permanent"); + } + return (is_perm); +} + +/* + * Abandons a connection, to recover from an error. Takes necessary recovery + * action. Note that we don't actually close and clean up the connection here; + * that happens later, in the select() thread main loop. See further + * explanation at function __repmgr_disable_connection(). + * + * PUBLIC: int __repmgr_bust_connection __P((ENV *, REPMGR_CONNECTION *)); + * + * !!! + * Caller holds mutex. + */ +int +__repmgr_bust_connection(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + DB_REP *db_rep; + REP *rep; + REPMGR_SITE *site; + u_int32_t flags; + int ret, subordinate_conn, eid; + + db_rep = env->rep_handle; + rep = db_rep->region; + ret = 0; + + eid = conn->eid; + if ((ret = __repmgr_disable_connection(env, conn)) != 0) + return (ret); + + /* + * Take any/all appropriate recovery steps, depending on the nature of + * the connection, whom it was with, and our current role. + */ + if (conn->type == REP_CONNECTION && IS_KNOWN_REMOTE_SITE(eid)) { + site = SITE_FROM_EID(eid); + subordinate_conn = (conn != site->ref.conn); + + if (!subordinate_conn && + (ret = __repmgr_schedule_connection_attempt(env, + (u_int)eid, FALSE)) != 0) + return (ret); + + /* + * If the failed connection was the one between us and the + * master, assume that the master may have failed, and call for + * an election. But only do this for the connection to the main + * master process, not a subordinate one. And only do it if + * we're our site's main process, not a subordinate one. And + * skip it if the application has configured us not to do + * elections. + */ + if (!IS_SUBORDINATE(db_rep) && + !subordinate_conn && eid == rep->master_id) { + /* + * Even if we're not doing elections, defer the event + * notification to later execution in the election + * thread. We don't want to fire an event in the select + * thread, and certainly not while holding the mutex. + */ + flags = ELECT_F_EVENT_NOTIFY; + if (FLD_ISSET(db_rep->region->config, REP_C_ELECTIONS)) + LF_SET(ELECT_F_IMMED | ELECT_F_FAST); + else + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Master failure, but no elections")); + + if ((ret = __repmgr_init_election(env, flags)) != 0) + return (ret); + } + + /* + * If we're the master site, and we lose a main connection to a + * client (whether we're the main replication process or a + * subordinate process), then the client is going to have + * trouble receiving live log records from us. So, set the + * temporary log archive block timer, to give the client a + * fighting chance to restart/recover/reconnect. (We don't care + * about the client's subordinate connections to us -- i.e., + * connections with a subordinate process at the client site -- + * because those sites can only be reading, not applying updates + * from us.) + */ + if (!subordinate_conn && rep->master_id == db_rep->self_eid) { + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Repmgr: bust connection. Block archive")); + MASTER_UPDATE(env, (REGENV *)env->reginfo->primary); + } + } + return (0); +} + +/* + * Remove a connection from the possibility of any further activity, making sure + * it ends up on the main connections list, so that it will be cleaned up at the + * next opportunity in the select() thread. + * + * Various threads write onto TCP/IP sockets, and an I/O error could occur at + * any time. However, only the dedicated "select()" thread may close the socket + * file descriptor, because under POSIX we have to drop our mutex and then call + * select() as two distinct (non-atomic) operations. + * + * To simplify matters, there is a single place in the select thread where we + * close and clean up after any defunct connection. Even if the I/O error + * happens in the select thread we follow this convention. + * + * When an error occurs, we disable the connection (mark it defunct so that no + * one else will try to use it, and so that the select thread will find it and + * clean it up), and then usually take some additional recovery action: schedule + * a connection retry for later, and possibly call for an election if it was a + * connection to the master. (This happens in the function + * __repmgr_bust_connection.) But sometimes we don't want to do the recovery + * part; just the disabling part. + * + * PUBLIC: int __repmgr_disable_connection __P((ENV *, REPMGR_CONNECTION *)); + */ +int +__repmgr_disable_connection(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + REPMGR_RESPONSE *resp; + u_int32_t i; + int eid, ret, t_ret; + + db_rep = env->rep_handle; + ret = 0; + + conn->state = CONN_DEFUNCT; + if (conn->type == REP_CONNECTION) { + eid = conn->eid; + if (IS_VALID_EID(eid)) { + site = SITE_FROM_EID(eid); + if (conn != site->ref.conn) + /* It's a subordinate connection. */ + TAILQ_REMOVE(&site->sub_conns, conn, entries); + TAILQ_INSERT_TAIL(&db_rep->connections, conn, entries); + conn->ref_count++; + } + conn->eid = -1; + } else if (conn->type == APP_CONNECTION) { + for (i = 0; i < conn->aresp; i++) { + resp = &conn->responses[i]; + if (F_ISSET(resp, RESP_IN_USE) && + F_ISSET(resp, RESP_THREAD_WAITING)) { + F_SET(resp, RESP_COMPLETE); + resp->ret = DB_REP_UNAVAIL; + } + } + ret = __repmgr_wake_waiters(env, &conn->response_waiters); + } + if ((t_ret = __repmgr_signal(&conn->drained)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __repmgr_wake_main_thread(env)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * PUBLIC: int __repmgr_cleanup_defunct __P((ENV *, REPMGR_CONNECTION *)); + * + * Caller should hold mutex, since we remove connection from main list. + */ +int +__repmgr_cleanup_defunct(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + DB_REP *db_rep; + int ret, t_ret; + + db_rep = env->rep_handle; + + ret = __repmgr_close_connection(env, conn); + + TAILQ_REMOVE(&db_rep->connections, conn, entries); + if ((t_ret = __repmgr_decr_conn_ref(env, conn)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * PUBLIC: int __repmgr_close_connection __P((ENV *, REPMGR_CONNECTION *)); + */ +int +__repmgr_close_connection(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + int ret; +#ifdef DB_WIN32 + int t_ret; +#endif + + ret = 0; + if (conn->fd != INVALID_SOCKET && + closesocket(conn->fd) == SOCKET_ERROR) { + ret = net_errno; + __db_err(env, ret, DB_STR("3582", "closing socket")); + } + conn->fd = INVALID_SOCKET; +#ifdef DB_WIN32 + if (conn->event_object != WSA_INVALID_EVENT && + !WSACloseEvent(conn->event_object)) { + t_ret = net_errno; + __db_err(env, t_ret, DB_STR("3583", + "releasing WSA event object")); + if (ret == 0) + ret = t_ret; + } + conn->event_object = WSA_INVALID_EVENT; +#endif + return (ret); +} + +/* + * Decrements a connection's ref count; destroys the connection when the ref + * count reaches zero. + * + * PUBLIC: int __repmgr_decr_conn_ref __P((ENV *, REPMGR_CONNECTION *)); + */ +int +__repmgr_decr_conn_ref(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + DB_ASSERT(env, conn->ref_count > 0); + return (--conn->ref_count > 0 ? 0 : + __repmgr_destroy_conn(env, conn)); +} + +/* + * Destroys a conn struct, by freeing all memory and associated resources. + * (This is a destructor, so it always must run to completion, and of course the + * passed-in object no longer exists upon return.) + * + * PUBLIC: int __repmgr_destroy_conn __P((ENV *, REPMGR_CONNECTION *)); + * + * Caller is responsible for holding mutex if necessary; we make no assumption + * here, since we operate only on the given connection, in isolation. (However, + * note that if this conn has messages on its outbound queue, those are shared + * objects, and we decrement the ref count. So in that case the mutex will need + * to be held.) + */ +int +__repmgr_destroy_conn(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + QUEUED_OUTPUT *out; + REPMGR_FLAT *msg; + REPMGR_RESPONSE *resp; + DBT *dbt; + int ret, t_ret; + + ret = 0; + + DB_ASSERT(env, conn->ref_count == 0); + /* + * Deallocate any input and output buffers we may have. + */ + if (conn->reading_phase == DATA_PHASE) { + switch (conn->msg_type) { + case REPMGR_OWN_MSG: + if (conn->input.rep_message == NULL) + break; + /* FALLTHROUGH */ + case REPMGR_APP_MESSAGE: + case REPMGR_HEARTBEAT: + case REPMGR_REP_MESSAGE: + __os_free(env, conn->input.rep_message); + break; + + case REPMGR_APP_RESPONSE: + /* + * DATA_PHASE of an APP_RESPONSE is another way of + * saying there must be a cur_resp, and it must be + * READING. + */ + DB_ASSERT(env, conn->cur_resp < conn->aresp && + conn->responses != NULL); + resp = &conn->responses[conn->cur_resp]; + DB_ASSERT(env, F_ISSET(resp, RESP_READING)); + if (F_ISSET(resp, RESP_DUMMY_BUF)) + __os_free(env, resp->dbt.data); + break; + + case REPMGR_PERMLSN: + case REPMGR_HANDSHAKE: + dbt = &conn->input.repmgr_msg.cntrl; + if (dbt->size > 0) + __os_free(env, dbt->data); + dbt = &conn->input.repmgr_msg.rec; + if (dbt->size > 0) + __os_free(env, dbt->data); + break; + + case REPMGR_RESP_ERROR: + /* + * This type doesn't use a DATA_PHASE, so this should be + * impossible. + */ + default: + ret = __db_unknown_path(env, "destroy_conn"); + } + } + + if (conn->type == APP_CONNECTION && conn->responses != NULL) + __os_free(env, conn->responses); + + if ((t_ret = __repmgr_destroy_waiters(env, + &conn->response_waiters)) != 0 && ret == 0) + ret = t_ret; + + while (!STAILQ_EMPTY(&conn->outbound_queue)) { + out = STAILQ_FIRST(&conn->outbound_queue); + STAILQ_REMOVE_HEAD(&conn->outbound_queue, entries); + msg = out->msg; + if (--msg->ref_count <= 0) + __os_free(env, msg); + __os_free(env, out); + } + if ((t_ret = __repmgr_free_cond(&conn->drained)) != 0 && + ret == 0) + ret = t_ret; + + __os_free(env, conn); + return (ret); +} + +static int +enqueue_msg(env, conn, msg, offset) + ENV *env; + REPMGR_CONNECTION *conn; + struct sending_msg *msg; + size_t offset; +{ + QUEUED_OUTPUT *q_element; + int ret; + + if (msg->fmsg == NULL && ((ret = flatten(env, msg)) != 0)) + return (ret); + if ((ret = __os_malloc(env, sizeof(QUEUED_OUTPUT), &q_element)) != 0) + return (ret); + q_element->msg = msg->fmsg; + msg->fmsg->ref_count++; /* encapsulation would be sweeter */ + q_element->offset = offset; + + /* Put it on the connection's outbound queue. */ + STAILQ_INSERT_TAIL(&conn->outbound_queue, q_element, entries); + conn->out_queue_length++; + return (0); +} + +/* + * Either "control" or "rec" (or both) may be NULL, in which case we treat it + * like a zero-length DBT. + */ +static void +setup_sending_msg(env, msg, hdr_buf, type, control, rec) + ENV *env; + struct sending_msg *msg; + u_int8_t *hdr_buf; + u_int type; + const DBT *control, *rec; +{ + __repmgr_msg_hdr_args msg_hdr; + + /* + * Since we know that the msg hdr is a fixed size, we can add its buffer + * to the iovecs before actually marshaling the content. But the + * add_buffer and add_dbt calls have to be in the right order. + */ + __repmgr_iovec_init(msg->iovecs); + __repmgr_add_buffer(msg->iovecs, hdr_buf, __REPMGR_MSG_HDR_SIZE); + + msg_hdr.type = type; + + if ((REP_MSG_CONTROL_SIZE(msg_hdr) = + (control == NULL ? 0 : control->size)) > 0) + __repmgr_add_dbt(msg->iovecs, control); + + if ((REP_MSG_REC_SIZE(msg_hdr) = (rec == NULL ? 0 : rec->size)) > 0) + __repmgr_add_dbt(msg->iovecs, rec); + + __repmgr_msg_hdr_marshal(env, &msg_hdr, hdr_buf); + msg->fmsg = NULL; +} + +/* + * Convert a message stored as iovec pointers to various pieces, into flattened + * form, by copying all the pieces, and then make the iovec just point to the + * new simplified form. + */ +static int +flatten(env, msg) + ENV *env; + struct sending_msg *msg; +{ + u_int8_t *p; + size_t msg_size; + int i, ret; + + DB_ASSERT(env, msg->fmsg == NULL); + + msg_size = msg->iovecs->total_bytes; + if ((ret = __os_malloc(env, sizeof(*msg->fmsg) + msg_size, + &msg->fmsg)) != 0) + return (ret); + msg->fmsg->length = msg_size; + msg->fmsg->ref_count = 0; + p = &msg->fmsg->data[0]; + + for (i = 0; i < msg->iovecs->count; i++) { + memcpy(p, msg->iovecs->vectors[i].iov_base, + msg->iovecs->vectors[i].iov_len); + p = &p[msg->iovecs->vectors[i].iov_len]; + } + __repmgr_iovec_init(msg->iovecs); + __repmgr_add_buffer(msg->iovecs, &msg->fmsg->data[0], msg_size); + return (0); +} + +/* + * Scan the list of remote sites, returning the first one that is a peer, + * is not the current master, and is available. + */ +static REPMGR_SITE * +__repmgr_find_available_peer(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + REPMGR_SITE *site; + u_int i; + + db_rep = env->rep_handle; + rep = db_rep->region; + FOR_EACH_REMOTE_SITE_INDEX(i) { + site = &db_rep->sites[i]; + if (FLD_ISSET(site->config, DB_REPMGR_PEER) && + EID_FROM_SITE(site) != rep->master_id && + IS_SITE_AVAILABLE(site)) + return (site); + } + return (NULL); +} + +/* + * Copy host/port values into the given netaddr struct. Allocates memory for + * the copy of the host name, which becomes the responsibility of the caller. + * + * PUBLIC: int __repmgr_pack_netaddr __P((ENV *, const char *, + * PUBLIC: u_int, repmgr_netaddr_t *)); + */ +int +__repmgr_pack_netaddr(env, host, port, addr) + ENV *env; + const char *host; + u_int port; + repmgr_netaddr_t *addr; +{ + int ret; + + DB_ASSERT(env, host != NULL); + + if ((ret = __os_strdup(env, host, &addr->host)) != 0) + return (ret); + addr->port = (u_int16_t)port; + return (0); +} + +/* + * PUBLIC: int __repmgr_getaddr __P((ENV *, + * PUBLIC: const char *, u_int, int, ADDRINFO **)); + */ +int +__repmgr_getaddr(env, host, port, flags, result) + ENV *env; + const char *host; + u_int port; + int flags; /* Matches struct addrinfo declaration. */ + ADDRINFO **result; +{ + ADDRINFO *answer, hints; + char buffer[10]; /* 2**16 fits in 5 digits. */ + + /* + * Ports are really 16-bit unsigned values, but it's too painful to + * push that type through the API. + */ + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = flags; + (void)snprintf(buffer, sizeof(buffer), "%u", port); + + /* + * Although it's generally bad to discard error information, the return + * code from __os_getaddrinfo is undependable. Our callers at least + * would like to be able to distinguish errors in getaddrinfo (which we + * want to consider to be re-tryable), from other failure (e.g., EINVAL, + * above). + */ + if (__os_getaddrinfo(env, host, port, buffer, &hints, &answer) != 0) + return (DB_REP_UNAVAIL); + *result = answer; + + return (0); +} + +/* + * Initialize a socket for listening. Sets a file descriptor for the socket, + * ready for an accept() call in a thread that we're happy to let block. + * + * PUBLIC: int __repmgr_listen __P((ENV *)); + */ +int +__repmgr_listen(env) + ENV *env; +{ + ADDRINFO *ai; + DB_REP *db_rep; + repmgr_netaddr_t *addrp; + char *why; + int sockopt, ret; + socket_t s; + + db_rep = env->rep_handle; + + /* Use OOB value as sentinel to show no socket open. */ + s = INVALID_SOCKET; + + addrp = &SITE_FROM_EID(db_rep->self_eid)->net_addr; + if ((ret = __repmgr_getaddr(env, + addrp->host, addrp->port, AI_PASSIVE, &ai)) != 0) + return (ret); + + /* + * Given the assert is correct, we execute the loop at least once, which + * means 'why' will have been set by the time it's needed. But of + * course lint doesn't know about DB_ASSERT. + */ + COMPQUIET(why, ""); + DB_ASSERT(env, ai != NULL); + for (; ai != NULL; ai = ai->ai_next) { + + if ((s = socket(ai->ai_family, + ai->ai_socktype, ai->ai_protocol)) == INVALID_SOCKET) { + why = DB_STR("3584", "can't create listen socket"); + continue; + } + + /* + * When testing, it's common to kill and restart regularly. On + * some systems, this causes bind to fail with "address in use" + * errors unless this option is set. + */ + sockopt = 1; + if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (sockopt_t)&sockopt, + sizeof(sockopt)) != 0) { + why = DB_STR("3585", + "can't set REUSEADDR socket option"); + break; + } + + if (bind(s, ai->ai_addr, (socklen_t)ai->ai_addrlen) != 0) { + why = DB_STR("3586", + "can't bind socket to listening address"); + (void)closesocket(s); + s = INVALID_SOCKET; + continue; + } + + if (listen(s, 5) != 0) { + why = DB_STR("3587", "listen()"); + break; + } + + if ((ret = __repmgr_set_nonblocking(s)) != 0) { + __db_err(env, ret, DB_STR("3588", + "can't unblock listen socket")); + goto clean; + } + + db_rep->listen_fd = s; + goto out; + } + + ret = net_errno; + __db_err(env, ret, "%s", why); +clean: if (s != INVALID_SOCKET) + (void)closesocket(s); +out: + __os_freeaddrinfo(env, ai); + return (ret); +} + +/* + * PUBLIC: int __repmgr_net_close __P((ENV *)); + */ +int +__repmgr_net_close(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + int ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + + ret = __repmgr_each_connection(env, final_cleanup, NULL, FALSE); + + if (db_rep->listen_fd != INVALID_SOCKET) { + if (closesocket(db_rep->listen_fd) == SOCKET_ERROR && ret == 0) + ret = net_errno; + db_rep->listen_fd = INVALID_SOCKET; + rep->listener = 0; + } + return (ret); +} + +/* Called only from env->close(), so we know we're single threaded. */ +static int +final_cleanup(env, conn, unused) + ENV *env; + REPMGR_CONNECTION *conn; + void *unused; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + int ret, t_ret; + + COMPQUIET(unused, NULL); + db_rep = env->rep_handle; + + ret = __repmgr_close_connection(env, conn); + /* Remove the connection from whatever list it's on, if any. */ + if (conn->type == REP_CONNECTION && IS_VALID_EID(conn->eid)) { + site = SITE_FROM_EID(conn->eid); + + if (site->state == SITE_CONNECTED && conn == site->ref.conn) { + /* Not on any list, so no need to do anything. */ + } else + TAILQ_REMOVE(&site->sub_conns, conn, entries); + t_ret = __repmgr_destroy_conn(env, conn); + + } else { + TAILQ_REMOVE(&db_rep->connections, conn, entries); + t_ret = __repmgr_decr_conn_ref(env, conn); + } + if (t_ret != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * PUBLIC: void __repmgr_net_destroy __P((ENV *, DB_REP *)); + */ +void +__repmgr_net_destroy(env, db_rep) + ENV *env; + DB_REP *db_rep; +{ + REPMGR_RETRY *retry; + REPMGR_SITE *site; + u_int i; + + if (db_rep->sites == NULL) + return; + + while (!TAILQ_EMPTY(&db_rep->retries)) { + retry = TAILQ_FIRST(&db_rep->retries); + TAILQ_REMOVE(&db_rep->retries, retry, entries); + __os_free(env, retry); + } + + DB_ASSERT(env, TAILQ_EMPTY(&db_rep->connections)); + + for (i = 0; i < db_rep->site_cnt; i++) { + site = &db_rep->sites[i]; + DB_ASSERT(env, TAILQ_EMPTY(&site->sub_conns)); + __repmgr_cleanup_netaddr(env, &site->net_addr); + } + __os_free(env, db_rep->sites); + db_rep->sites = NULL; +} + +#ifdef CONFIG_TEST +/* + * Substitute a fake target port instead of the port actually configured, for + * certain types of testing, if desired. + * + * When a DB_TEST_FAKE_PORT environment variable is present, it names a TCP/IP + * port on which a "port arbiter" service may be running. If it is indeed + * running, we should send it a request to ask it what "fake" port to use in + * place of the given "real" port. (The "real" port is the port normally + * configured, and present in the membership database.) The arbiter is not + * always running for all tests, so if it's not present it simply means we + * should not substitute a fake port. Also, even if it is running, in some + * tests we don't want to substitute a fake port: in that case, the arbiter's + * response could name the same port as the "real" port we sent it. + * + * !!! This is only used for testing. + */ +static u_int +fake_port(env, port) + ENV *env; + u_int port; +{ +#define MIN_PORT 1 +#define MAX_PORT 65535 + ADDRINFO *ai0, *ai; + db_iovec_t iovec; + char *arbiter, buf[100], *end, *p; + socket_t s; + long result; + size_t count; + int ret; + u_int arbiter_port; + + if ((arbiter = getenv("DB_TEST_FAKE_PORT")) == NULL) + return (port); + if (__db_getlong(env->dbenv, "repmgr_net.c:fake_port", + arbiter, MIN_PORT, MAX_PORT, &result) != 0) + return (port); + arbiter_port = (u_int)result; + + /* + * Send a message of the form "{config,Port}" onto a connection to + * arbiter_port. + */ + if ((ret = __repmgr_getaddr(env, + "localhost", arbiter_port, 0, &ai0)) != 0) { + __db_err(env, ret, "fake_port:getaddr"); + return (port); + } + s = INVALID_SOCKET; + for (ai = ai0; ai != NULL; ai = ai->ai_next) { + if ((s = socket(ai->ai_family, + ai->ai_socktype, ai->ai_protocol)) == SOCKET_ERROR) { + ret = net_errno; + s = INVALID_SOCKET; + __db_err(env, ret, "fake_port:socket"); + goto err; + } + /* + * Note that port substitution is used in only a small number of + * tests. When there is no "port arbiter" running, it's not an + * error; it just means we should use the normal configured port + * as is. + */ + if (connect(s, ai->ai_addr, (socklen_t)ai->ai_addrlen) != 0) { + ret = net_errno; + (void)closesocket(s); + s = INVALID_SOCKET; + } + } + if (ret != 0) + goto err; + (void)snprintf(buf, sizeof(buf), "{config,%u}\r\n", port); + iovec.iov_base = buf; + iovec.iov_len = (u_long)strlen(buf); + while ((ret = __repmgr_writev(s, &iovec, 1, &count)) == 0) { + iovec.iov_base = (u_int8_t *)iovec.iov_base + count; + if ((iovec.iov_len -= (u_long)count) == 0) + break; + } + if (ret != 0) { + __db_err(env, ret, "fake_port:writev"); + goto err; + } + + /* The response should be a line telling us what port to use. */ + iovec.iov_base = buf; + iovec.iov_len = sizeof(buf); + p = buf; + while ((ret = __repmgr_readv(s, &iovec, 1, &count)) == 0) { + if (count == 0) { + __db_errx(env, "fake_port: premature EOF"); + goto err; + } + /* Keep reading until we get a line end. */ + for (p = iovec.iov_base, end = &p[count]; p < end; p++) + if (*p == '\r' || *p == '\n') + break; + if (p < end) { + *p = '\0'; + break; + } + iovec.iov_base = (u_int8_t *)iovec.iov_base + count; + iovec.iov_len -= (u_long)count; + DB_ASSERT(env, iovec.iov_len > 0); + } + if (ret != 0) + goto err; + + if (__db_getlong(env->dbenv, "repmgr_net.c:fake_port", + buf, MIN_PORT, MAX_PORT, &result) == 0) + port = (u_int)result; + +err: + /* + * Note that we always return some port value, even if an error happens. + * Since this is just test code: if an error prevented proper fake port + * substitution, it should result in a test failure. + */ + if (s != INVALID_SOCKET) + (void)closesocket(s); + __os_freeaddrinfo(env, ai0); + return (port); +} +#endif diff --git a/src/repmgr/repmgr_posix.c b/src/repmgr/repmgr_posix.c new file mode 100644 index 00000000..70df3fff --- /dev/null +++ b/src/repmgr/repmgr_posix.c @@ -0,0 +1,799 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * Invalid open file descriptor value, that can be used as an out-of-band + * sentinel to mark our signalling pipe as unopened. + */ +#define NO_SUCH_FILE_DESC (-1) + +/* Aggregated control info needed for preparing for select() call. */ +struct io_info { + fd_set *reads, *writes; + int maxfd; +}; + +static int __repmgr_conn_work __P((ENV *, REPMGR_CONNECTION *, void *)); +static int prepare_io __P((ENV *, REPMGR_CONNECTION *, void *)); + +/* + * Starts the thread described in the argument, and stores the resulting thread + * ID therein. + * + * PUBLIC: int __repmgr_thread_start __P((ENV *, REPMGR_RUNNABLE *)); + */ +int +__repmgr_thread_start(env, runnable) + ENV *env; + REPMGR_RUNNABLE *runnable; +{ + pthread_attr_t *attrp; +#if defined(_POSIX_THREAD_ATTR_STACKSIZE) && defined(DB_STACKSIZE) + pthread_attr_t attributes; + size_t size; + int ret; + + attrp = &attributes; + if ((ret = pthread_attr_init(&attributes)) != 0) { + __db_err(env, ret, DB_STR("3630", + "pthread_attr_init in repmgr_thread_start")); + return (ret); + } + + size = DB_STACKSIZE; + +#ifdef PTHREAD_STACK_MIN + if (size < PTHREAD_STACK_MIN) + size = PTHREAD_STACK_MIN; +#endif + if ((ret = pthread_attr_setstacksize(&attributes, size)) != 0) { + __db_err(env, ret, DB_STR("3631", + "pthread_attr_setstacksize in repmgr_thread_start")); + return (ret); + } +#else + attrp = NULL; +#endif + + runnable->finished = FALSE; + runnable->quit_requested = FALSE; + runnable->env = env; + + return (pthread_create(&runnable->thread_id, attrp, + runnable->run, runnable)); +} + +/* + * PUBLIC: int __repmgr_thread_join __P((REPMGR_RUNNABLE *)); + */ +int +__repmgr_thread_join(thread) + REPMGR_RUNNABLE *thread; +{ + return (pthread_join(thread->thread_id, NULL)); +} + +/* + * PUBLIC: int __repmgr_set_nonblock_conn __P((REPMGR_CONNECTION *)); + */ +int +__repmgr_set_nonblock_conn(conn) + REPMGR_CONNECTION *conn; +{ + return (__repmgr_set_nonblocking(conn->fd)); +} + +/* + * PUBLIC: int __repmgr_set_nonblocking __P((socket_t)); + */ +int +__repmgr_set_nonblocking(fd) + socket_t fd; +{ + int flags; + + if ((flags = fcntl(fd, F_GETFL, 0)) < 0) + return (errno); + if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) < 0) + return (errno); + return (0); +} + +/* + * PUBLIC: int __repmgr_wake_waiters __P((ENV *, waiter_t *)); + * + * Wake any "waiter" threads (either sending threads waiting for acks, or + * channel users waiting for response to request). + * + * !!! + * Caller must hold the db_rep->mutex, if this thread synchronization is to work + * properly. + */ +int +__repmgr_wake_waiters(env, waiter) + ENV *env; + waiter_t *waiter; +{ + COMPQUIET(env, NULL); + return (pthread_cond_broadcast(waiter)); +} + +/* + * Waits a limited time for a condition to become true. (If the limit is 0 we + * wait forever.) All calls share just the one db_rep->mutex, but use whatever + * waiter_t the caller passes us. + * + * PUBLIC: int __repmgr_await_cond __P((ENV *, + * PUBLIC: PREDICATE, void *, db_timeout_t, waiter_t *)); + */ +int +__repmgr_await_cond(env, pred, ctx, timeout, wait_condition) + ENV *env; + PREDICATE pred; + void *ctx; + db_timeout_t timeout; + waiter_t *wait_condition; +{ + DB_REP *db_rep; + struct timespec deadline; + int ret, timed; + + db_rep = env->rep_handle; + if ((timed = (timeout > 0))) + __repmgr_compute_wait_deadline(env, &deadline, timeout); + else + COMPQUIET(deadline.tv_sec, 0); + + while (!(*pred)(env, ctx)) { + if (timed) + ret = pthread_cond_timedwait(wait_condition, + db_rep->mutex, &deadline); + else + ret = pthread_cond_wait(wait_condition, db_rep->mutex); + if (db_rep->finished) + return (DB_REP_UNAVAIL); + if (ret == ETIMEDOUT) + return (DB_TIMEOUT); + if (ret != 0) + return (ret); + } + return (0); +} + +/* + * Waits for an in-progress membership DB operation (if any) to complete. + * + * PUBLIC: int __repmgr_await_gmdbop __P((ENV *)); + * + * Caller holds mutex; we drop it while waiting. + */ +int +__repmgr_await_gmdbop(env) + ENV *env; +{ + DB_REP *db_rep; + int ret; + + db_rep = env->rep_handle; + while (db_rep->gmdb_busy) + if ((ret = pthread_cond_wait(&db_rep->gmdb_idle, + db_rep->mutex)) != 0) + return (ret); + return (0); +} + +/* + * __repmgr_compute_wait_deadline -- + * Computes a deadline time a certain distance into the future. + * + * PUBLIC: void __repmgr_compute_wait_deadline __P((ENV*, + * PUBLIC: struct timespec *, db_timeout_t)); + */ +void +__repmgr_compute_wait_deadline(env, result, wait) + ENV *env; + struct timespec *result; + db_timeout_t wait; +{ + /* + * The result is suitable for the pthread_cond_timewait call. (That + * call uses nano-second resolution; elsewhere we use microseconds.) + * + * Start with "now"; then add the "wait" offset. + * + * A db_timespec is the same as a "struct timespec" so we can pass + * result directly to the underlying Berkeley DB OS routine. + * + * !!! + * We use the system clock for the pthread_cond_timedwait call, but + * that's not optimal on systems with monotonic timers. Instead, + * we should call pthread_condattr_setclock on systems where it and + * monotonic timers are available, and then configure both this call + * and the subsequent pthread_cond_timewait call to use a monotonic + * timer. + */ + __os_gettime(env, (db_timespec *)result, 0); + TIMESPEC_ADD_DB_TIMEOUT(result, wait); +} + +/* + * PUBLIC: int __repmgr_await_drain __P((ENV *, + * PUBLIC: REPMGR_CONNECTION *, db_timeout_t)); + * + * Waits for space to become available on the connection's output queue. + * Various ways we can exit: + * + * 1. queue becomes non-full + * 2. exceed time limit + * 3. connection becomes defunct (due to error in another thread) + * 4. repmgr is shutting down + * 5. any unexpected system resource failure + * + * In cases #3 and #5 we return an error code. Caller is responsible for + * distinguishing the remaining cases if desired, though we do help with #2 by + * showing the connection as congested. + * + * !!! + * Caller must hold repmgr->mutex. + */ +int +__repmgr_await_drain(env, conn, timeout) + ENV *env; + REPMGR_CONNECTION *conn; + db_timeout_t timeout; +{ + DB_REP *db_rep; + struct timespec deadline; + int ret; + + db_rep = env->rep_handle; + + __repmgr_compute_wait_deadline(env, &deadline, timeout); + + ret = 0; + while (conn->out_queue_length >= OUT_QUEUE_LIMIT) { + ret = pthread_cond_timedwait(&conn->drained, + db_rep->mutex, &deadline); + switch (ret) { + case 0: + if (db_rep->finished) + goto out; /* #4. */ + /* + * Another thread could have stumbled into an error on + * the socket while we were waiting. + */ + if (conn->state == CONN_DEFUNCT) { + ret = DB_REP_UNAVAIL; /* #3. */ + goto out; + } + break; + case ETIMEDOUT: + conn->state = CONN_CONGESTED; + ret = 0; + goto out; /* #2. */ + default: + goto out; /* #5. */ + } + } + /* #1. */ + +out: + return (ret); +} + +/* + * PUBLIC: int __repmgr_alloc_cond __P((cond_var_t *)); + * + * Initialize a condition variable (in allocated space). + */ +int +__repmgr_alloc_cond(c) + cond_var_t *c; +{ + return (pthread_cond_init(c, NULL)); +} + +/* + * PUBLIC: int __repmgr_free_cond __P((cond_var_t *)); + * + * Clean up a previously initialized condition variable. + */ +int +__repmgr_free_cond(c) + cond_var_t *c; +{ + return (pthread_cond_destroy(c)); +} + +/* + * PUBLIC: void __repmgr_env_create_pf __P((DB_REP *)); + */ +void +__repmgr_env_create_pf(db_rep) + DB_REP *db_rep; +{ + db_rep->read_pipe = db_rep->write_pipe = NO_SUCH_FILE_DESC; +} + +/* + * "Platform"-specific mutex creation function. + * + * PUBLIC: int __repmgr_create_mutex_pf __P((mgr_mutex_t *)); + */ +int +__repmgr_create_mutex_pf(mutex) + mgr_mutex_t *mutex; +{ + return (pthread_mutex_init(mutex, NULL)); +} + +/* + * PUBLIC: int __repmgr_destroy_mutex_pf __P((mgr_mutex_t *)); + */ +int +__repmgr_destroy_mutex_pf(mutex) + mgr_mutex_t *mutex; +{ + return (pthread_mutex_destroy(mutex)); +} + +/* + * PUBLIC: int __repmgr_init __P((ENV *)); + */ +int +__repmgr_init(env) + ENV *env; +{ + DB_REP *db_rep; + struct sigaction sigact; + int ack_inited, elect_inited, file_desc[2], gmdb_inited, queue_inited; + int ret; + + db_rep = env->rep_handle; + + /* + * Make sure we're not ignoring SIGPIPE, 'cuz otherwise we'd be killed + * just for trying to write onto a socket that had been reset. Note + * that we don't undo this in case of a later error, since we document + * that we leave the signal handling state like this, even after env + * close. + */ + if (sigaction(SIGPIPE, NULL, &sigact) == -1) { + ret = errno; + __db_err(env, ret, DB_STR("3632", + "can't access signal handler")); + return (ret); + } + if (sigact.sa_handler == SIG_DFL) { + sigact.sa_handler = SIG_IGN; + sigact.sa_flags = 0; + if (sigaction(SIGPIPE, &sigact, NULL) == -1) { + ret = errno; + __db_err(env, ret, DB_STR("3633", + "can't access signal handler")); + return (ret); + } + } + + ack_inited = elect_inited = gmdb_inited = queue_inited = FALSE; + if ((ret = __repmgr_init_waiters(env, &db_rep->ack_waiters)) != 0) + goto err; + ack_inited = TRUE; + + if ((ret = pthread_cond_init(&db_rep->check_election, NULL)) != 0) + goto err; + elect_inited = TRUE; + + if ((ret = pthread_cond_init(&db_rep->gmdb_idle, NULL)) != 0) + goto err; + gmdb_inited = TRUE; + + if ((ret = pthread_cond_init(&db_rep->msg_avail, NULL)) != 0) + goto err; + queue_inited = TRUE; + + if ((ret = pipe(file_desc)) == -1) { + ret = errno; + goto err; + } + + db_rep->read_pipe = file_desc[0]; + db_rep->write_pipe = file_desc[1]; + return (0); +err: + if (queue_inited) + (void)pthread_cond_destroy(&db_rep->msg_avail); + if (gmdb_inited) + (void)pthread_cond_destroy(&db_rep->gmdb_idle); + if (elect_inited) + (void)pthread_cond_destroy(&db_rep->check_election); + if (ack_inited) + (void)__repmgr_destroy_waiters(env, &db_rep->ack_waiters); + db_rep->read_pipe = db_rep->write_pipe = NO_SUCH_FILE_DESC; + + return (ret); +} + +/* + * PUBLIC: int __repmgr_deinit __P((ENV *)); + */ +int +__repmgr_deinit(env) + ENV *env; +{ + DB_REP *db_rep; + int ret, t_ret; + + db_rep = env->rep_handle; + + if (!(REPMGR_INITED(db_rep))) + return (0); + + ret = pthread_cond_destroy(&db_rep->msg_avail); + + if ((t_ret = pthread_cond_destroy(&db_rep->gmdb_idle)) != 0 && + ret == 0) + ret = t_ret; + + if ((t_ret = pthread_cond_destroy(&db_rep->check_election)) != 0 && + ret == 0) + ret = t_ret; + + if ((t_ret = __repmgr_destroy_waiters(env, + &db_rep->ack_waiters)) != 0 && ret == 0) + ret = t_ret; + + if (close(db_rep->read_pipe) == -1 && ret == 0) + ret = errno; + if (close(db_rep->write_pipe) == -1 && ret == 0) + ret = errno; + + db_rep->read_pipe = db_rep->write_pipe = NO_SUCH_FILE_DESC; + return (ret); +} + +/* + * PUBLIC: int __repmgr_init_waiters __P((ENV *, waiter_t *)); + */ +int +__repmgr_init_waiters(env, waiters) + ENV *env; + waiter_t *waiters; +{ + COMPQUIET(env, NULL); + return (pthread_cond_init(waiters, NULL)); +} + +/* + * PUBLIC: int __repmgr_destroy_waiters __P((ENV *, waiter_t *)); + */ +int +__repmgr_destroy_waiters(env, waiters) + ENV *env; + waiter_t *waiters; +{ + COMPQUIET(env, NULL); + return (pthread_cond_destroy(waiters)); +} + +/* + * PUBLIC: int __repmgr_lock_mutex __P((mgr_mutex_t *)); + */ +int +__repmgr_lock_mutex(mutex) + mgr_mutex_t *mutex; +{ + return (pthread_mutex_lock(mutex)); +} + +/* + * PUBLIC: int __repmgr_unlock_mutex __P((mgr_mutex_t *)); + */ +int +__repmgr_unlock_mutex(mutex) + mgr_mutex_t *mutex; +{ + return (pthread_mutex_unlock(mutex)); +} + +/* + * Signals a condition variable. + * + * !!! + * Caller must hold mutex. + * + * PUBLIC: int __repmgr_signal __P((cond_var_t *)); + */ +int +__repmgr_signal(v) + cond_var_t *v; +{ + return (pthread_cond_broadcast(v)); +} + +/* + * Wake repmgr message processing threads, expressly for the purpose of shutting + * some subset of them down. + * + * !!! + * Caller must hold mutex. + * + * PUBLIC: int __repmgr_wake_msngers __P((ENV*, u_int)); + */ +int +__repmgr_wake_msngers(env, n) + ENV *env; + u_int n; +{ + DB_REP *db_rep; + + COMPQUIET(n, 0); + + db_rep = env->rep_handle; + return (__repmgr_signal(&db_rep->msg_avail)); +} + +/* + * PUBLIC: int __repmgr_wake_main_thread __P((ENV*)); + * + * Can be called either with or without the mutex being held. + */ +int +__repmgr_wake_main_thread(env) + ENV *env; +{ + DB_REP *db_rep; + u_int8_t any_value; + + COMPQUIET(any_value, 0); + db_rep = env->rep_handle; + + /* + * It doesn't matter what byte value we write. Just the appearance of a + * byte in the stream is enough to wake up the select() thread reading + * the pipe. + */ + if (write(db_rep->write_pipe, VOID_STAR_CAST &any_value, 1) == -1) + return (errno); + return (0); +} + +/* + * PUBLIC: int __repmgr_writev __P((socket_t, db_iovec_t *, int, size_t *)); + */ +int +__repmgr_writev(fd, iovec, buf_count, byte_count_p) + socket_t fd; + db_iovec_t *iovec; + int buf_count; + size_t *byte_count_p; +{ + int nw, result; + + if ((nw = writev(fd, iovec, buf_count)) == -1) { + /* Why? See note at __repmgr_readv(). */ + result = errno; + DB_ASSERT(NULL, result != 0); + return (result); + } + *byte_count_p = (size_t)nw; + return (0); +} + +/* + * PUBLIC: int __repmgr_readv __P((socket_t, db_iovec_t *, int, size_t *)); + */ +int +__repmgr_readv(fd, iovec, buf_count, byte_count_p) + socket_t fd; + db_iovec_t *iovec; + int buf_count; + size_t *byte_count_p; +{ + int result; + ssize_t nw; + + if ((nw = readv(fd, iovec, buf_count)) == -1) { + /* + * Why bother to assert this obvious "truth"? On some systems + * when the library is loaded into a single-threaded Tcl + * configuration the differing errno mechanisms apparently + * conflict, and we occasionally "see" a 0 value here! And that + * turns out to be painful to debug. + */ + result = errno; + DB_ASSERT(NULL, result != 0); + return (result); + } + *byte_count_p = (size_t)nw; + return (0); +} + +/* + * PUBLIC: int __repmgr_select_loop __P((ENV *)); + */ +int +__repmgr_select_loop(env) + ENV *env; +{ + struct timeval select_timeout, *select_timeout_p; + DB_REP *db_rep; + db_timespec timeout; + fd_set reads, writes; + struct io_info io_info; + int ret; + u_int8_t buf[10]; /* arbitrary size */ + + db_rep = env->rep_handle; + /* + * Almost this entire thread operates while holding the mutex. But note + * that it never blocks, except in the call to select() (which is the + * one place we relinquish the mutex). + */ + LOCK_MUTEX(db_rep->mutex); + if ((ret = __repmgr_first_try_connections(env)) != 0) + goto out; + for (;;) { + FD_ZERO(&reads); + FD_ZERO(&writes); + + /* + * Figure out which sockets to ask for input and output. It's + * simple for the signalling pipe and listen socket; but depends + * on backlog states for the connections to other sites. + */ + FD_SET((u_int)db_rep->read_pipe, &reads); + io_info.maxfd = db_rep->read_pipe; + + if (!IS_SUBORDINATE(db_rep)) { + FD_SET((u_int)db_rep->listen_fd, &reads); + if (db_rep->listen_fd > io_info.maxfd) + io_info.maxfd = db_rep->listen_fd; + } + + io_info.reads = &reads; + io_info.writes = &writes; + if ((ret = __repmgr_each_connection(env, + prepare_io, &io_info, TRUE)) != 0) + goto out; + + if (__repmgr_compute_timeout(env, &timeout)) { + /* Convert the timespec to a timeval. */ + select_timeout.tv_sec = timeout.tv_sec; + select_timeout.tv_usec = timeout.tv_nsec / NS_PER_US; + select_timeout_p = &select_timeout; + } else { + /* No time-based events, so wait only for I/O. */ + select_timeout_p = NULL; + } + + UNLOCK_MUTEX(db_rep->mutex); + + if ((ret = select(io_info.maxfd + 1, + &reads, &writes, NULL, select_timeout_p)) == -1) { + switch (ret = errno) { + case EINTR: + case EWOULDBLOCK: + LOCK_MUTEX(db_rep->mutex); + continue; /* simply retry */ + default: + __db_err(env, ret, DB_STR("3634", + "select")); + return (ret); + } + } + LOCK_MUTEX(db_rep->mutex); + if (db_rep->finished) { + ret = 0; + goto out; + } + + /* + * Timer expiration events include retrying of lost connections. + * Obviously elements can be added to the connection list there. + */ + if ((ret = __repmgr_check_timeouts(env)) != 0) + goto out; + + if ((ret = __repmgr_each_connection(env, + __repmgr_conn_work, &io_info, TRUE)) != 0) + goto out; + + /* + * Read any bytes in the signalling pipe. Note that we don't + * actually need to do anything with them; they're just there to + * wake us up when necessary. + */ + if (FD_ISSET((u_int)db_rep->read_pipe, &reads) && + read(db_rep->read_pipe, VOID_STAR_CAST buf, + sizeof(buf)) <= 0) { + ret = errno; + goto out; + } + /* + * Obviously elements can be added to the connection list here. + */ + if (!IS_SUBORDINATE(db_rep) && + FD_ISSET((u_int)db_rep->listen_fd, &reads) && + (ret = __repmgr_accept(env)) != 0) + goto out; + } +out: + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} + +/* + * Examines a connection to see what sort of I/O to ask for. Clean up defunct + * connections. + */ +static int +prepare_io(env, conn, info_) + ENV *env; + REPMGR_CONNECTION *conn; + void *info_; +{ + struct io_info *info; + + info = info_; + + if (conn->state == CONN_DEFUNCT) + return (__repmgr_cleanup_defunct(env, conn)); + + if (!STAILQ_EMPTY(&conn->outbound_queue)) { + FD_SET((u_int)conn->fd, info->writes); + if (conn->fd > info->maxfd) + info->maxfd = conn->fd; + } + /* + * For now we always accept incoming data. If we ever implement some + * kind of flow control, we should override it for fledgling connections + * (!IS_VALID_EID(conn->eid)) -- in other words, allow reading such a + * connection even during flow control duress. + */ + FD_SET((u_int)conn->fd, info->reads); + if (conn->fd > info->maxfd) + info->maxfd = conn->fd; + + return (0); +} + +/* + * Examine a connection, to see what work needs to be done. + */ +static int +__repmgr_conn_work(env, conn, info_) + ENV *env; + REPMGR_CONNECTION *conn; + void *info_; +{ + struct io_info *info; + int ret; + u_int fd; + + ret = 0; + fd = (u_int)conn->fd; + info = info_; + + if (conn->state == CONN_DEFUNCT) + return (0); + + if (FD_ISSET(fd, info->writes)) + ret = __repmgr_write_some(env, conn); + + if (ret == 0 && FD_ISSET(fd, info->reads)) + ret = __repmgr_read_from_site(env, conn); + + if (ret == DB_REP_UNAVAIL) + ret = __repmgr_bust_connection(env, conn); + return (ret); +} diff --git a/src/repmgr/repmgr_queue.c b/src/repmgr/repmgr_queue.c new file mode 100644 index 00000000..de13e725 --- /dev/null +++ b/src/repmgr/repmgr_queue.c @@ -0,0 +1,180 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +static REPMGR_MESSAGE *available_work __P((ENV *)); + +/* + * Deallocates memory used by all messages on the queue. + * + * PUBLIC: int __repmgr_queue_destroy __P((ENV *)); + */ +int +__repmgr_queue_destroy(env) + ENV *env; +{ + DB_REP *db_rep; + REPMGR_MESSAGE *m; + REPMGR_CONNECTION *conn; + int ret, t_ret; + + db_rep = env->rep_handle; + + ret = 0; + while (!STAILQ_EMPTY(&db_rep->input_queue.header)) { + m = STAILQ_FIRST(&db_rep->input_queue.header); + STAILQ_REMOVE_HEAD(&db_rep->input_queue.header, entries); + if (m->msg_hdr.type == REPMGR_APP_MESSAGE) { + if ((conn = m->v.appmsg.conn) != NULL && + (t_ret = __repmgr_decr_conn_ref(env, conn)) != 0 && + ret == 0) + ret = t_ret; + } + __os_free(env, m); + } + return (ret); +} + +/* + * PUBLIC: int __repmgr_queue_get __P((ENV *, + * PUBLIC: REPMGR_MESSAGE **, REPMGR_RUNNABLE *)); + * + * Get the first input message from the queue and return it to the caller. The + * caller hereby takes responsibility for the entire message buffer, and should + * free it when done. + * + * Caller must hold mutex. + */ +int +__repmgr_queue_get(env, msgp, th) + ENV *env; + REPMGR_MESSAGE **msgp; + REPMGR_RUNNABLE *th; +{ + DB_REP *db_rep; + REPMGR_MESSAGE *m; +#ifdef DB_WIN32 + HANDLE wait_events[2]; +#endif + int ret; + + ret = 0; + db_rep = env->rep_handle; + + while ((m = available_work(env)) == NULL && + !db_rep->finished && !th->quit_requested) { +#ifdef DB_WIN32 + /* + * On Windows, msg_avail means either there's something in the + * queue, or we're all finished. So, reset the event if that is + * not true. + */ + if (STAILQ_EMPTY(&db_rep->input_queue.header) && + !db_rep->finished && + !ResetEvent(db_rep->msg_avail)) { + ret = GetLastError(); + goto err; + } + wait_events[0] = db_rep->msg_avail; + wait_events[1] = th->quit_event; + UNLOCK_MUTEX(db_rep->mutex); + ret = WaitForMultipleObjects(2, wait_events, FALSE, INFINITE); + LOCK_MUTEX(db_rep->mutex); + if (ret == WAIT_FAILED) { + ret = GetLastError(); + goto err; + } + +#else + if ((ret = pthread_cond_wait(&db_rep->msg_avail, + db_rep->mutex)) != 0) + goto err; +#endif + } + if (db_rep->finished || th->quit_requested) + ret = DB_REP_UNAVAIL; + else { + STAILQ_REMOVE(&db_rep->input_queue.header, + m, __repmgr_message, entries); + db_rep->input_queue.size--; + *msgp = m; + } + +err: + return (ret); +} + +/* + * Gets an "available" item of work (i.e., a message) from the input queue. If + * there are plenty of message threads currently available, then we simply + * return the first thing on the queue, regardless of what type of message it + * is. But otherwise skip over any message type that may possibly turn out to + * be "long-running", so that we avoid starving out the important rep message + * processing. + */ +static REPMGR_MESSAGE * +available_work(env) + ENV *env; +{ + DB_REP *db_rep; + REPMGR_MESSAGE *m; + + db_rep = env->rep_handle; + if (STAILQ_EMPTY(&db_rep->input_queue.header)) + return (NULL); + /* + * The "non_rep_th" field is the dynamically varying count of threads + * currently processing non-replication messages (a.k.a. possibly + * long-running messages, a.k.a. "deferrable"). We always ensure that + * db_rep->nthreads > reserved. + */ + if (db_rep->nthreads > db_rep->non_rep_th + RESERVED_MSG_TH(env)) + return (STAILQ_FIRST(&db_rep->input_queue.header)); + STAILQ_FOREACH(m, &db_rep->input_queue.header, entries) { + if (!IS_DEFERRABLE(m->msg_hdr.type)) + return (m); + } + return (NULL); +} + +/* + * PUBLIC: int __repmgr_queue_put __P((ENV *, REPMGR_MESSAGE *)); + * + * !!! + * Caller must hold repmgr->mutex. + */ +int +__repmgr_queue_put(env, msg) + ENV *env; + REPMGR_MESSAGE *msg; +{ + DB_REP *db_rep; + + db_rep = env->rep_handle; + + STAILQ_INSERT_TAIL(&db_rep->input_queue.header, msg, entries); + db_rep->input_queue.size++; + + return (__repmgr_signal(&db_rep->msg_avail)); +} + +/* + * PUBLIC: int __repmgr_queue_size __P((ENV *)); + * + * !!! + * Caller must hold repmgr->mutex. + */ +int +__repmgr_queue_size(env) + ENV *env; +{ + return (env->rep_handle->input_queue.size); +} diff --git a/src/repmgr/repmgr_rec.c b/src/repmgr/repmgr_rec.c new file mode 100644 index 00000000..41827aff --- /dev/null +++ b/src/repmgr/repmgr_rec.c @@ -0,0 +1,45 @@ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc_auto/repmgr_auto.h" + +/* + * __repmgr_member_recover -- + * Recovery function for member. + * + * PUBLIC: int __repmgr_member_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__repmgr_member_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __repmgr_member_args *argp; + int ret; + + COMPQUIET(info, NULL); + COMPQUIET(op, DB_TXN_APPLY); + + REC_PRINT(__repmgr_member_print); + REC_NOOP_INTRO(__repmgr_member_read); + + /* + * The annotation log record describes the update in enough detail for + * us to be able to optimize our tracking of it at clients sites. + * However, for now we just simply reread the whole (small) database + * each time, since changes happen so seldom (and we need to have the + * code for reading the whole thing anyway, for other cases). + */ + env->rep_handle->gmdb_dirty = TRUE; + + *lsnp = argp->prev_lsn; + ret = 0; + + REC_NOOP_CLOSE; +} diff --git a/src/repmgr/repmgr_sel.c b/src/repmgr/repmgr_sel.c new file mode 100644 index 00000000..18741fca --- /dev/null +++ b/src/repmgr/repmgr_sel.c @@ -0,0 +1,1971 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +typedef int (*HEARTBEAT_ACTION) __P((ENV *)); + +static int accept_handshake __P((ENV *, REPMGR_CONNECTION *, char *)); +static int accept_v1_handshake __P((ENV *, REPMGR_CONNECTION *, char *)); +static int __repmgr_call_election __P((ENV *)); +static int __repmgr_connector_main __P((ENV *, REPMGR_RUNNABLE *)); +static void *__repmgr_connector_thread __P((void *)); +static int dispatch_msgin __P((ENV *, REPMGR_CONNECTION *)); +static int __repmgr_next_timeout __P((ENV *, + db_timespec *, HEARTBEAT_ACTION *)); +static int prepare_input __P((ENV *, REPMGR_CONNECTION *)); +static int process_own_msg __P((ENV *, REPMGR_CONNECTION *)); +static int process_parameters __P((ENV *, + REPMGR_CONNECTION *, char *, u_int, u_int32_t, int, u_int32_t)); +static int read_version_response __P((ENV *, REPMGR_CONNECTION *)); +static int record_permlsn __P((ENV *, REPMGR_CONNECTION *)); +static int __repmgr_retry_connections __P((ENV *)); +static int __repmgr_send_heartbeat __P((ENV *)); +static int send_version_response __P((ENV *, REPMGR_CONNECTION *)); +static int __repmgr_try_one __P((ENV *, u_int)); + +#define ONLY_HANDSHAKE(env, conn) do { \ + if (conn->msg_type != REPMGR_HANDSHAKE) { \ + __db_errx(env, DB_STR_A("3613", \ + "unexpected msg type %d in state %d", "%d %d"), \ + (int)conn->msg_type, conn->state); \ + return (DB_REP_UNAVAIL); \ + } \ +} while (0) + +/* + * PUBLIC: void *__repmgr_select_thread __P((void *)); + */ +void * +__repmgr_select_thread(argsp) + void *argsp; +{ + REPMGR_RUNNABLE *args; + ENV *env; + int ret; + + args = argsp; + env = args->env; + + if ((ret = __repmgr_select_loop(env)) == DB_DELETED) + ret = __repmgr_bow_out(env); + if (ret != 0) { + __db_err(env, ret, DB_STR("3614", "select loop failed")); + (void)__repmgr_thread_failure(env, ret); + } + return (NULL); +} + +/* + * PUBLIC: int __repmgr_bow_out __P((ENV *)); + */ +int +__repmgr_bow_out(env) + ENV *env; +{ + DB_REP *db_rep; + int ret; + + RPRINT(env, (env, DB_VERB_REPMGR_MISC, "Stopping repmgr threads")); + db_rep = env->rep_handle; + LOCK_MUTEX(db_rep->mutex); + ret = __repmgr_stop_threads(env); + UNLOCK_MUTEX(db_rep->mutex); + DB_EVENT(env, DB_EVENT_REP_LOCAL_SITE_REMOVED, NULL); + return (ret); +} + +/* + * PUBLIC: int __repmgr_accept __P((ENV *)); + */ +int +__repmgr_accept(env) + ENV *env; +{ + DB_REP *db_rep; + REPMGR_CONNECTION *conn; + ACCEPT_ADDR siaddr; + socklen_t addrlen; + socket_t s; + int ret; + + db_rep = env->rep_handle; + addrlen = sizeof(siaddr); + if ((s = accept(db_rep->listen_fd, (struct sockaddr *)&siaddr, + &addrlen)) == -1) { + /* + * Some errors are innocuous and so should be ignored. MSDN + * Library documents the Windows ones; the Unix ones are + * advocated in Stevens' UNPv1, section 16.6; and Linux + * Application Development, p. 416. + */ + switch (ret = net_errno) { +#ifdef DB_WIN32 + case WSAECONNRESET: + case WSAEWOULDBLOCK: +#else + case EINTR: + case EWOULDBLOCK: + case ECONNABORTED: + case ENETDOWN: +#ifdef EPROTO + case EPROTO: +#endif + case ENOPROTOOPT: + case EHOSTDOWN: +#ifdef ENONET + case ENONET: +#endif + case EHOSTUNREACH: + case EOPNOTSUPP: + case ENETUNREACH: +#endif + VPRINT(env, (env, DB_VERB_REPMGR_MISC, + "accept error %d considered innocuous", ret)); + return (0); + default: + __db_err(env, ret, DB_STR("3615", "accept error")); + return (ret); + } + } + RPRINT(env, (env, DB_VERB_REPMGR_MISC, "accepted a new connection")); + + if ((ret = + __repmgr_new_connection(env, &conn, s, CONN_NEGOTIATE)) != 0) { + (void)closesocket(s); + return (ret); + } + if ((ret = __repmgr_set_nonblock_conn(conn)) != 0) { + __db_err(env, ret, DB_STR("3616", + "can't set nonblock after accept")); + (void)__repmgr_destroy_conn(env, conn); + return (ret); + } + + F_SET(conn, CONN_INCOMING); + + /* + * We don't yet know which site this connection is coming from. So for + * now, put it on the "orphans" list; we'll move it to the appropriate + * site struct later when we discover who we're talking with, and what + * type of connection it is. + */ + conn->eid = -1; + TAILQ_INSERT_TAIL(&db_rep->connections, conn, entries); + conn->ref_count++; + + return (0); +} + +/* + * Computes how long we should wait for input, in other words how long until we + * have to wake up and do something. Returns TRUE if timeout is set; FALSE if + * there is nothing to wait for. + * + * Note that the resulting timeout could be zero; but it can't be negative. + * + * PUBLIC: int __repmgr_compute_timeout __P((ENV *, db_timespec *)); + */ +int +__repmgr_compute_timeout(env, timeout) + ENV *env; + db_timespec *timeout; +{ + DB_REP *db_rep; + REPMGR_RETRY *retry; + db_timespec now, t; + int have_timeout; + + db_rep = env->rep_handle; + + /* + * There are two factors to consider: are heartbeats in use? and, do we + * have any sites with broken connections that we ought to retry? + */ + have_timeout = __repmgr_next_timeout(env, &t, NULL); + + /* List items are in order, so we only have to examine the first one. */ + if (!TAILQ_EMPTY(&db_rep->retries)) { + retry = TAILQ_FIRST(&db_rep->retries); + if (have_timeout) { + /* Choose earliest timeout deadline. */ + t = timespeccmp(&retry->time, &t, <) ? retry->time : t; + } else { + t = retry->time; + have_timeout = TRUE; + } + } + + if (have_timeout) { + __os_gettime(env, &now, 1); + if (timespeccmp(&now, &t, >=)) + timespecclear(timeout); + else { + *timeout = t; + timespecsub(timeout, &now); + } + } + + return (have_timeout); +} + +/* + * Figures out the next heartbeat-related thing to be done, and when it should + * be done. The code is factored this way because this computation needs to be + * done both before each select() call, and after (when we're checking for timer + * expiration). + */ +static int +__repmgr_next_timeout(env, deadline, action) + ENV *env; + db_timespec *deadline; + HEARTBEAT_ACTION *action; +{ + DB_REP *db_rep; + REP *rep; + HEARTBEAT_ACTION my_action; + REPMGR_CONNECTION *conn; + REPMGR_SITE *site; + db_timespec t; + + db_rep = env->rep_handle; + rep = db_rep->region; + + if (rep->master_id == db_rep->self_eid && + rep->heartbeat_frequency > 0) { + t = db_rep->last_bcast; + TIMESPEC_ADD_DB_TIMEOUT(&t, rep->heartbeat_frequency); + my_action = __repmgr_send_heartbeat; + } else if ((conn = __repmgr_master_connection(env)) != NULL && + !IS_SUBORDINATE(db_rep) && + rep->heartbeat_monitor_timeout > 0 && + conn->version >= HEARTBEAT_MIN_VERSION) { + /* + * If we have a working connection to a heartbeat-aware master, + * let's monitor it. Otherwise there's really nothing we can + * do. + */ + site = SITE_FROM_EID(rep->master_id); + t = site->last_rcvd_timestamp; + TIMESPEC_ADD_DB_TIMEOUT(&t, rep->heartbeat_monitor_timeout); + my_action = __repmgr_call_election; + } else + return (FALSE); + + *deadline = t; + if (action != NULL) + *action = my_action; + return (TRUE); +} + +/* + * Sends a heartbeat message. + * + * repmgr also uses the heartbeat facility to manage rerequests. We + * send the master's current generation and max_perm_lsn with the heartbeat + * message to help a client determine whether it has all master transactions. + * When a client receives a heartbeat message, it also checks whether it + * needs to rerequest anything. Note that heartbeats must be enabled for + * this rerequest processing to occur. + */ +static int +__repmgr_send_heartbeat(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + DBT control, rec; + __repmgr_permlsn_args permlsn; + u_int8_t buf[__REPMGR_PERMLSN_SIZE]; + u_int unused1, unused2; + int ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + + permlsn.generation = rep->gen; + if ((ret = __rep_get_maxpermlsn(env, &permlsn.lsn)) != 0) + return (ret); + __repmgr_permlsn_marshal(env, &permlsn, buf); + control.data = buf; + control.size = __REPMGR_PERMLSN_SIZE; + + DB_INIT_DBT(rec, NULL, 0); + return (__repmgr_send_broadcast(env, + REPMGR_HEARTBEAT, &control, &rec, &unused1, &unused2)); +} + +/* + * PUBLIC: REPMGR_CONNECTION *__repmgr_master_connection __P((ENV *)); + + */ +REPMGR_CONNECTION * +__repmgr_master_connection(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + REPMGR_SITE *master; + int master_id; + + db_rep = env->rep_handle; + rep = db_rep->region; + master_id = rep->master_id; + + if (!IS_KNOWN_REMOTE_SITE(master_id)) + return (NULL); + master = SITE_FROM_EID(master_id); + if (IS_SITE_HANDSHAKEN(master)) + return master->ref.conn; + return (NULL); +} + +static int +__repmgr_call_election(env) + ENV *env; +{ + REPMGR_CONNECTION *conn; + + conn = __repmgr_master_connection(env); + DB_ASSERT(env, conn != NULL); + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "heartbeat monitor timeout expired")); + STAT(env->rep_handle->region->mstat.st_connection_drop++); + return (__repmgr_bust_connection(env, conn)); +} + +/* + * PUBLIC: int __repmgr_check_timeouts __P((ENV *)); + * + * !!! + * Assumes caller holds the mutex. + */ +int +__repmgr_check_timeouts(env) + ENV *env; +{ + db_timespec when, now; + HEARTBEAT_ACTION action; + int ret; + + /* + * Figure out the next heartbeat-related thing to be done. Then, if + * it's time to do it, do so. + */ + if (__repmgr_next_timeout(env, &when, &action)) { + __os_gettime(env, &now, 1); + if (timespeccmp(&when, &now, <=) && + (ret = (*action)(env)) != 0) + return (ret); + } + + return (__repmgr_retry_connections(env)); +} + +/* + * Initiates connection attempts for any sites on the idle list whose retry + * times have expired. + */ +static int +__repmgr_retry_connections(env) + ENV *env; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + REPMGR_RETRY *retry; + db_timespec now; + u_int eid; + int ret; + + db_rep = env->rep_handle; + __os_gettime(env, &now, 1); + + while (!TAILQ_EMPTY(&db_rep->retries)) { + retry = TAILQ_FIRST(&db_rep->retries); + if (timespeccmp(&retry->time, &now, >=)) + break; /* since items are in time order */ + + TAILQ_REMOVE(&db_rep->retries, retry, entries); + + eid = retry->eid; + __os_free(env, retry); + site = SITE_FROM_EID(eid); + DB_ASSERT(env, site->state == SITE_PAUSING); + + if (site->membership == SITE_PRESENT) { + if ((ret = __repmgr_try_one(env, eid)) != 0) + return (ret); + } else + site->state = SITE_IDLE; + } + return (0); +} + +/* + * PUBLIC: int __repmgr_first_try_connections __P((ENV *)); + * + * !!! + * Assumes caller holds the mutex. + */ +int +__repmgr_first_try_connections(env) + ENV *env; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + u_int eid; + int ret; + + db_rep = env->rep_handle; + FOR_EACH_REMOTE_SITE_INDEX(eid) { + site = SITE_FROM_EID(eid); + /* + * Normally all sites would be IDLE here. But if a user thread + * triggered an auto-start in a subordinate process, our send() + * function may have found new sites when it sync'ed site + * addresses, and that action causes connection attempts to be + * scheduled (resulting in PAUSING state here, or conceivably + * even CONNECTING or CONNECTED). + */ + if (site->state == SITE_IDLE && + site->membership == SITE_PRESENT && + (ret = __repmgr_try_one(env, eid)) != 0) + return (ret); + } + return (0); +} + +/* + * Starts a thread to open a connection to the site at the given EID. + */ +static int +__repmgr_try_one(env, eid) + ENV *env; + u_int eid; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + REPMGR_RUNNABLE *th; + int ret; + + db_rep = env->rep_handle; + site = SITE_FROM_EID(eid); + th = site->connector; + if (th == NULL) { + if ((ret = __os_malloc(env, sizeof(REPMGR_RUNNABLE), &th)) != 0) + return (ret); + site->connector = th; + } else if (th->finished) { + if ((ret = __repmgr_thread_join(th)) != 0) + return (ret); + } else { + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "eid %lu previous connector thread still running; will retry", + (u_long)eid)); + return (__repmgr_schedule_connection_attempt(env, + eid, FALSE)); + } + + site->state = SITE_CONNECTING; + + th->run = __repmgr_connector_thread; + th->args.eid = (int)eid; + if ((ret = __repmgr_thread_start(env, th)) != 0) { + __os_free(env, th); + site->connector = NULL; + } + return (ret); +} + +static void * +__repmgr_connector_thread(argsp) + void *argsp; +{ + REPMGR_RUNNABLE *th; + ENV *env; + int ret; + + th = argsp; + env = th->env; + + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "starting connector thread, eid %u", th->args.eid)); + if ((ret = __repmgr_connector_main(env, th)) != 0) { + __db_err(env, ret, DB_STR("3617", "connector thread failed")); + (void)__repmgr_thread_failure(env, ret); + } + RPRINT(env, (env, DB_VERB_REPMGR_MISC, "connector thread is exiting")); + + th->finished = TRUE; + return (NULL); +} + +static int +__repmgr_connector_main(env, th) + ENV *env; + REPMGR_RUNNABLE *th; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + REPMGR_CONNECTION *conn; + DB_REPMGR_CONN_ERR info; + repmgr_netaddr_t netaddr; + SITE_STRING_BUFFER site_string; + int err, ret, t_ret; + + db_rep = env->rep_handle; + ret = 0; + + LOCK_MUTEX(db_rep->mutex); + site = SITE_FROM_EID(th->args.eid); + if (site->state != SITE_CONNECTING && db_rep->finished) + goto unlock; + + /* + * Drop the mutex during operations that could block. During those + * times, the site struct could move (if we had to grow the sites + * array), but host wouldn't. + * + * Also, during those times we might receive an incoming connection from + * the site, which would change its state. So, check state each time we + * reacquire the mutex, and quit if the state of the world changed while + * we were away. + */ + netaddr = site->net_addr; + RPRINT(env, (env, DB_VERB_REPMGR_MISC, "connecting to %s", + __repmgr_format_site_loc(site, site_string))); + UNLOCK_MUTEX(db_rep->mutex); + + if ((ret = __repmgr_connect(env, &netaddr, &conn, &err)) == 0) { + DB_EVENT(env, DB_EVENT_REP_CONNECT_ESTD, &th->args.eid); + LOCK_MUTEX(db_rep->mutex); + if ((ret = __repmgr_set_nonblock_conn(conn)) != 0) { + __db_err(env, ret, DB_STR("3618", + "set_nonblock in connnect thread")); + goto cleanup; + } + conn->type = REP_CONNECTION; + site = SITE_FROM_EID(th->args.eid); + if (site->state != SITE_CONNECTING || db_rep->finished) + goto cleanup; + + conn->eid = (int)th->args.eid; + site = SITE_FROM_EID(th->args.eid); + site->ref.conn = conn; + site->state = SITE_CONNECTED; + __os_gettime(env, &site->last_rcvd_timestamp, 1); + ret = __repmgr_wake_main_thread(env); + } else if (ret == DB_REP_UNAVAIL) { + /* Retryable error while trying to connect: retry later. */ + info.eid = th->args.eid; + info.error = err; + DB_EVENT(env, DB_EVENT_REP_CONNECT_TRY_FAILED, &info); + STAT(db_rep->region->mstat.st_connect_fail++); + + LOCK_MUTEX(db_rep->mutex); + site = SITE_FROM_EID(th->args.eid); + if (site->state != SITE_CONNECTING || db_rep->finished) { + ret = 0; + goto unlock; + } + ret = __repmgr_schedule_connection_attempt(env, + (u_int)th->args.eid, FALSE); + } else + goto out; + + if (0) { +cleanup: + if ((t_ret = __repmgr_destroy_conn(env, conn)) != 0 && + ret == 0) + ret = t_ret; + } + +unlock: + UNLOCK_MUTEX(db_rep->mutex); +out: + return (ret); +} + +/* + * PUBLIC: int __repmgr_send_v1_handshake __P((ENV *, + * PUBLIC: REPMGR_CONNECTION *, void *, size_t)); + */ +int +__repmgr_send_v1_handshake(env, conn, buf, len) + ENV *env; + REPMGR_CONNECTION *conn; + void *buf; + size_t len; +{ + DB_REP *db_rep; + REP *rep; + repmgr_netaddr_t *my_addr; + DB_REPMGR_V1_HANDSHAKE buffer; + DBT cntrl, rec; + + db_rep = env->rep_handle; + rep = db_rep->region; + my_addr = &SITE_FROM_EID(db_rep->self_eid)->net_addr; + + /* + * We're about to send from a structure that has padding holes in it. + * Initializing it keeps Valgrind happy, plus we really shouldn't be + * sending out random garbage anyway (pro forma privacy issue). + */ + memset(&buffer, 0, sizeof(buffer)); + buffer.version = 1; + buffer.priority = htonl(rep->priority); + buffer.port = my_addr->port; + cntrl.data = &buffer; + cntrl.size = sizeof(buffer); + + rec.data = buf; + rec.size = (u_int32_t)len; + + /* + * It would of course be disastrous to block the select() thread, so + * pass the "maxblock" argument as 0. Fortunately blocking should + * never be necessary here, because the hand-shake is always the first + * thing we send. Which is a good thing, because it would be almost as + * disastrous if we allowed ourselves to drop a handshake. + */ + return (__repmgr_send_one(env, + conn, REPMGR_HANDSHAKE, &cntrl, &rec, 0)); +} + +/* + * PUBLIC: int __repmgr_read_from_site __P((ENV *, REPMGR_CONNECTION *)); + * + * !!! + * Caller is assumed to hold repmgr->mutex, 'cuz we call queue_put() from here. + */ +int +__repmgr_read_from_site(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + int ret; + + db_rep = env->rep_handle; + + /* + * Loop, just in case we get EINTR and need to restart the I/O. (All + * other branches return.) + */ + for (;;) { + switch ((ret = __repmgr_read_conn(conn))) { +#ifndef DB_WIN32 + case EINTR: + continue; +#endif + +#if defined(DB_REPMGR_EAGAIN) && DB_REPMGR_EAGAIN != WOULDBLOCK + case DB_REPMGR_EAGAIN: +#endif + case WOULDBLOCK: + return (0); + + case DB_REP_UNAVAIL: + /* Error 0 is understood to mean EOF. */ + __repmgr_fire_conn_err_event(env, conn, 0); + STAT(env->rep_handle-> + region->mstat.st_connection_drop++); + return (DB_REP_UNAVAIL); + + case 0: + if (IS_VALID_EID(conn->eid)) { + site = SITE_FROM_EID(conn->eid); + __os_gettime(env, + &site->last_rcvd_timestamp, 1); + } + return (conn->reading_phase == SIZES_PHASE ? + prepare_input(env, conn) : + dispatch_msgin(env, conn)); + + default: +#ifdef EBADF + DB_ASSERT(env, ret != EBADF); +#endif + __repmgr_fire_conn_err_event(env, conn, ret); + STAT(db_rep->region->mstat.st_connection_drop++); + return (DB_REP_UNAVAIL); + } + } +} + +/* + * Reads in the current input phase, as defined by the connection's IOVECS + * struct. + * + * Returns DB_REP_UNAVAIL for EOF. + * + * Makes no assumption about synchronization: it's up to the caller to hold + * mutex if necessary. + * + * PUBLIC: int __repmgr_read_conn __P((REPMGR_CONNECTION *)); + */ +int +__repmgr_read_conn(conn) + REPMGR_CONNECTION *conn; +{ + size_t nr; + int ret; + + /* + * Keep reading pieces as long as we're making some progress, or until + * we complete the current read phase as defined in iovecs. + */ + for (;;) { + if ((ret = __repmgr_readv(conn->fd, + &conn->iovecs.vectors[conn->iovecs.offset], + conn->iovecs.count - conn->iovecs.offset, &nr)) != 0) + return (ret); + + if (nr == 0) + return (DB_REP_UNAVAIL); + + if (__repmgr_update_consumed(&conn->iovecs, nr)) { + /* We've fully read as much as we wanted. */ + return (0); + } + } +} + +/* + * Having finished reading the 9-byte message header, figure out what kind of + * message we're about to receive, and prepare input buffers accordingly. The + * header includes enough information for us to figure out how much buffer space + * we need to allocate (though in some cases we need to do a bit of computation + * to arrive at the answer). + * + * Caller must hold mutex. + */ +static int +prepare_input(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ +#define MEM_ALIGN sizeof(double) + DBT *dbt; + __repmgr_msg_hdr_args msg_hdr; + REPMGR_RESPONSE *resp; + u_int32_t control_size, rec_size, size; + size_t memsize, control_offset, rec_offset; + void *membase; + int ret, skip; + + DB_ASSERT(env, conn->reading_phase == SIZES_PHASE); + + /* + * We can only get here after having read the full 9 bytes that we + * expect, so this can't fail. + */ + ret = __repmgr_msg_hdr_unmarshal(env, &msg_hdr, + conn->msg_hdr_buf, __REPMGR_MSG_HDR_SIZE, NULL); + DB_ASSERT(env, ret == 0); + + __repmgr_iovec_init(&conn->iovecs); + skip = FALSE; + + switch ((conn->msg_type = msg_hdr.type)) { + case REPMGR_HEARTBEAT: + /* + * The underlying byte-receiving mechanism will already have + * noted the fact that we got some traffic on this connection, + * which is all that is needed to monitor the heartbeat. But + * we also put the heartbeat message on the message queue so + * that it will perform rerequest processing. + */ + case REPMGR_REP_MESSAGE: + env->rep_handle->seen_repmsg = TRUE; + control_size = REP_MSG_CONTROL_SIZE(msg_hdr); + rec_size = REP_MSG_REC_SIZE(msg_hdr); + if (control_size == 0) { + if (conn->msg_type == REPMGR_HEARTBEAT) { + /* + * Got an old-style heartbeat without payload, + * nothing to do. + */ + skip = TRUE; + break; + } else { + __db_errx(env, DB_STR("3619", + "illegal size for rep msg")); + return (DB_REP_UNAVAIL); + } + } + /* + * Allocate a block of memory large enough to hold a + * DB_REPMGR_MESSAGE wrapper, plus the (one or) two DBT + * data areas that it points to. Start by calculating + * the total memory needed. + */ + memsize = DB_ALIGN(sizeof(REPMGR_MESSAGE), MEM_ALIGN); + control_offset = memsize; + memsize += control_size; + if (rec_size > 0) { + memsize = DB_ALIGN(memsize, MEM_ALIGN); + rec_offset = memsize; + memsize += rec_size; + } else + COMPQUIET(rec_offset, 0); + if ((ret = __os_malloc(env, memsize, &membase)) != 0) + return (ret); + conn->input.rep_message = membase; + conn->input.rep_message->msg_hdr = msg_hdr; + conn->input.rep_message->v.repmsg.originating_eid = conn->eid; + + DB_INIT_DBT(conn->input.rep_message->v.repmsg.control, + (u_int8_t*)membase + control_offset, control_size); + __repmgr_add_dbt(&conn->iovecs, + &conn->input.rep_message->v.repmsg.control); + + if (rec_size > 0) { + DB_INIT_DBT(conn->input.rep_message->v.repmsg.rec, + (rec_size > 0 ? + (u_int8_t*)membase + rec_offset : NULL), + rec_size); + __repmgr_add_dbt(&conn->iovecs, + &conn->input.rep_message->v.repmsg.rec); + } else + DB_INIT_DBT(conn->input.rep_message->v.repmsg.rec, + NULL, 0); + break; + + case REPMGR_APP_MESSAGE: + /* + * We need a buffer big enough to hold the REPMGR_MESSAGE struct + * and the data that we expect to receive on the wire. We must + * extend the struct size for the variable-length DBT array at + * the end. + */ + size = DB_ALIGN((size_t)(sizeof(REPMGR_MESSAGE) + + APP_MSG_SEGMENT_COUNT(msg_hdr) * sizeof(DBT)), + MEM_ALIGN); + memsize = size + APP_MSG_BUFFER_SIZE(msg_hdr); + if ((ret = __os_malloc(env, memsize, &membase)) != 0) + return (ret); + conn->input.rep_message = membase; + conn->input.rep_message->msg_hdr = msg_hdr; + conn->input.rep_message->v.appmsg.conn = conn; + + DB_INIT_DBT(conn->input.rep_message->v.appmsg.buf, + (u_int8_t*)membase + size, + APP_MSG_BUFFER_SIZE(msg_hdr)); + __repmgr_add_dbt(&conn->iovecs, + &conn->input.rep_message->v.appmsg.buf); + break; + + case REPMGR_OWN_MSG: + size = sizeof(REPMGR_MESSAGE) + REPMGR_OWN_BUF_SIZE(msg_hdr); + if ((ret = __os_malloc(env, size, &membase)) != 0) + return (ret); + conn->input.rep_message = membase; + conn->input.rep_message->msg_hdr = msg_hdr; + + /* + * Save "conn" pointer in case this turns out to be a one-shot + * request. If it isn't, it won't matter. + */ + /* + * An OWN msg that arrives in PARAMETERS state has bypassed the + * final handshake, implying that this connection is to be used + * for a one-shot GMDB request. + */ + DB_ASSERT(env, REPMGR_OWN_BUF_SIZE(msg_hdr) > 0); + DB_INIT_DBT(conn->input.rep_message->v.gmdb_msg.request, + (u_int8_t*)membase + sizeof(REPMGR_MESSAGE), + REPMGR_OWN_BUF_SIZE(msg_hdr)); + __repmgr_add_dbt(&conn->iovecs, + &conn->input.rep_message->v.gmdb_msg.request); + break; + + case REPMGR_APP_RESPONSE: + size = APP_RESP_BUFFER_SIZE(msg_hdr); + conn->cur_resp = APP_RESP_TAG(msg_hdr); + DB_ASSERT(env, conn->cur_resp < conn->aresp); + resp = &conn->responses[conn->cur_resp]; + DB_ASSERT(env, F_ISSET(resp, RESP_IN_USE)); + + dbt = &resp->dbt; + + /* + * Prepare to read message body into either the user-supplied + * buffer, or one we allocate here. + */ + ret = 0; + if (!F_ISSET(resp, RESP_THREAD_WAITING)) { + /* Caller already timed out; allocate dummy buffer. */ + if (size > 0) { + memset(dbt, 0, sizeof(*dbt)); + ret = __os_malloc(env, size, &dbt->data); + F_SET(resp, RESP_DUMMY_BUF); + } else + F_CLR(resp, RESP_IN_USE); + } else if (F_ISSET(dbt, DB_DBT_MALLOC)) + ret = __os_umalloc(env, size, &dbt->data); + else if (F_ISSET(dbt, DB_DBT_REALLOC)) { + if (dbt->data == NULL || dbt->size < size) + ret = __os_urealloc(env, size, &dbt->data); + } else if (F_ISSET(dbt, DB_DBT_USERMEM)) { + /* Recipient should have checked size limit. */ + DB_ASSERT(env, size <= dbt->ulen); + } + dbt->size = size; + if (ret != 0) + return (ret); + + if (size > 0) { + __repmgr_add_dbt(&conn->iovecs, dbt); + F_SET(resp, RESP_READING); + } else { + skip = TRUE; + if (F_ISSET(resp, RESP_THREAD_WAITING)) { + F_SET(resp, RESP_COMPLETE); + if ((ret = __repmgr_wake_waiters(env, + &conn->response_waiters)) != 0) + return (ret); + } + } + break; + + case REPMGR_RESP_ERROR: + DB_ASSERT(env, RESP_ERROR_TAG(msg_hdr) < conn->aresp && + conn->responses != NULL); + resp = &conn->responses[RESP_ERROR_TAG(msg_hdr)]; + DB_ASSERT(env, !F_ISSET(resp, RESP_READING)); + if (F_ISSET(resp, RESP_THREAD_WAITING)) { + F_SET(resp, RESP_COMPLETE); + + /* + * DB errors are always negative, but we only send + * unsigned values on the wire. + */ + resp->ret = -((int)RESP_ERROR_CODE(msg_hdr)); + if ((ret = __repmgr_wake_waiters(env, + &conn->response_waiters)) != 0) + return (ret); + } else + F_CLR(resp, RESP_IN_USE); + skip = TRUE; + break; + + case REPMGR_HANDSHAKE: + case REPMGR_PERMLSN: + if ((ret = __repmgr_prepare_simple_input(env, + conn, &msg_hdr)) != 0) + return (ret); + break; + + default: + return (__db_unknown_path(env, "prepare_input")); + } + + if (skip) { + /* + * We can skip the DATA_PHASE, because the current message type + * only has a header, no following data. + */ + __repmgr_reset_for_reading(conn); + } else + conn->reading_phase = DATA_PHASE; + + return (0); +} + +/* + * PUBLIC: int __repmgr_prepare_simple_input __P((ENV *, + * PUBLIC: REPMGR_CONNECTION *, __repmgr_msg_hdr_args *)); + */ +int +__repmgr_prepare_simple_input(env, conn, msg_hdr) + ENV *env; + REPMGR_CONNECTION *conn; + __repmgr_msg_hdr_args *msg_hdr; +{ + DBT *dbt; + u_int32_t control_size, rec_size; + int ret; + + control_size = REP_MSG_CONTROL_SIZE(*msg_hdr); + rec_size = REP_MSG_REC_SIZE(*msg_hdr); + + dbt = &conn->input.repmgr_msg.cntrl; + if ((dbt->size = control_size) > 0) { + if ((ret = __os_malloc(env, + dbt->size, &dbt->data)) != 0) + return (ret); + __repmgr_add_dbt(&conn->iovecs, dbt); + } + + dbt = &conn->input.repmgr_msg.rec; + if ((dbt->size = rec_size) > 0) { + if ((ret = __os_malloc(env, + dbt->size, &dbt->data)) != 0) { + dbt = &conn->input.repmgr_msg.cntrl; + if (dbt->size > 0) + __os_free(env, dbt->data); + return (ret); + } + __repmgr_add_dbt(&conn->iovecs, dbt); + } + return (0); +} + +/* + * Processes an incoming message, depending on our current state. + * + * Caller must hold mutex. + */ +static int +dispatch_msgin(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + REPMGR_RUNNABLE *th; + REPMGR_RESPONSE *resp; + DBT *dbt; + char *hostname; + int eid, ret; + + DB_ASSERT(env, conn->reading_phase == DATA_PHASE); + db_rep = env->rep_handle; + + switch (conn->state) { + case CONN_CONNECTED: + /* + * In this state, we know we're working with an outgoing + * connection. We've sent a version proposal, and now expect + * the response (which could be a dumb old V1 handshake). + */ + ONLY_HANDSHAKE(env, conn); + + /* + * Here is a good opportunity to clean up this site's connector + * thread, because we generally come through here after making + * an outgoing connection, yet we're out of the main loop, so we + * don't hit this often. + */ + eid = conn->eid; + DB_ASSERT(env, IS_KNOWN_REMOTE_SITE(conn->eid)); + site = SITE_FROM_EID(eid); + th = site->connector; + if (th != NULL && th->finished) { + if ((ret = __repmgr_thread_join(th)) != 0) + return (ret); + __os_free(env, th); + site->connector = NULL; + } + + if ((ret = read_version_response(env, conn)) != 0) + return (ret); + break; + + case CONN_NEGOTIATE: + /* + * Since we're in this state, we know we're working with an + * incoming connection, and this is the first message we've + * received. So it must be a version negotiation proposal (or a + * legacy V1 handshake). (We'll verify this of course.) + */ + ONLY_HANDSHAKE(env, conn); + if ((ret = send_version_response(env, conn)) != 0) + return (ret); + break; + + case CONN_PARAMETERS: + /* + * We've previously agreed on a (>1) version, so we expect + * either the other side's parameters handshake, or possibly a + * GMDB request on a one-shot, dedicated connection. + */ + switch (conn->msg_type) { + case REPMGR_HANDSHAKE: + dbt = &conn->input.repmgr_msg.rec; + hostname = dbt->data; + hostname[dbt->size-1] = '\0'; + if ((ret = accept_handshake(env, conn, hostname)) != 0) + return (ret); + conn->state = CONN_READY; + break; + case REPMGR_OWN_MSG: + /* + * GM change requests arrive in their own dedicated + * connections, and when they're served the entire + * connection isn't needed any more. So the message + * processing thread will do the entire job of serving + * the request and finishing off the connection; so we + * don't have to read it any more. Note that normally + * whenever we remove a connection from our list we + * decrement the reference count; but we also increment + * it whenever we pass a reference over to the message + * processing threads' queue. So in this case it's a + * wash. + */ + conn->input.rep_message->v.gmdb_msg.conn = conn; + TAILQ_REMOVE(&db_rep->connections, conn, entries); + if ((ret = __repmgr_queue_put(env, + conn->input.rep_message)) != 0) + return (ret); + break; + + default: + __db_errx(env, DB_STR_A("3620", + "unexpected msg type %d in PARAMETERS state", "%d"), + (int)conn->msg_type); + return (DB_REP_UNAVAIL); + } + + break; + + case CONN_READY: + case CONN_CONGESTED: + /* + * We have a complete message, so process it. Acks and + * handshakes get processed here, in line. Regular rep messages + * get posted to a queue, to be handled by a thread from the + * message thread pool. + */ + switch (conn->msg_type) { + case REPMGR_PERMLSN: + if ((ret = record_permlsn(env, conn)) != 0) + return (ret); + break; + + case REPMGR_HEARTBEAT: + case REPMGR_APP_MESSAGE: + case REPMGR_REP_MESSAGE: + if ((ret = __repmgr_queue_put(env, + conn->input.rep_message)) != 0) + return (ret); + /* + * The queue has taken over responsibility for the + * rep_message buffer, and will free it later. + */ + if (conn->msg_type == REPMGR_APP_MESSAGE) + conn->ref_count++; + break; + + case REPMGR_OWN_MSG: + /* + * Since we're in one of the "ready" states we know this + * isn't a one-shot request, so we are not giving + * ownership of this connection over to the message + * thread queue; we're going to keep reading on it + * ourselves. The message thread that processes this + * request has no need for a connection anyway, since + * there is no response that needs to be returned. + */ + conn->input.rep_message->v.gmdb_msg.conn = NULL; + if ((ret = process_own_msg(env, conn)) != 0) + return (ret); + break; + + case REPMGR_APP_RESPONSE: + DB_ASSERT(env, conn->cur_resp < conn->aresp && + conn->responses != NULL); + resp = &conn->responses[conn->cur_resp]; + DB_ASSERT(env, F_ISSET(resp, RESP_READING)); + F_CLR(resp, RESP_READING); + if (F_ISSET(resp, RESP_THREAD_WAITING)) { + F_SET(resp, RESP_COMPLETE); + if ((ret = __repmgr_wake_waiters(env, + &conn->response_waiters)) != 0) + return (ret); + } else { + /* + * If the calling thread is no longer with us, + * yet we're reading, it can only mean we're + * reading into a dummy buffer, so free it now. + */ + DB_ASSERT(env, F_ISSET(resp, RESP_DUMMY_BUF)); + __os_free(env, resp->dbt.data); + F_CLR(resp, RESP_IN_USE); + } + break; + + case REPMGR_RESP_ERROR: + default: + __db_errx(env, DB_STR_A("3621", + "unexpected msg type rcvd in ready state: %d", + "%d"), (int)conn->msg_type); + return (DB_REP_UNAVAIL); + } + break; + + case CONN_DEFUNCT: + break; + + default: + DB_ASSERT(env, FALSE); + } + + switch (conn->msg_type) { + case REPMGR_HANDSHAKE: + case REPMGR_PERMLSN: + dbt = &conn->input.repmgr_msg.cntrl; + if (dbt->size > 0) + __os_free(env, dbt->data); + dbt = &conn->input.repmgr_msg.rec; + if (dbt->size > 0) + __os_free(env, dbt->data); + break; + default: + /* + * Some messages in REPMGR_OWN_MSG format are also handled + */ + break; + } + __repmgr_reset_for_reading(conn); + return (0); +} + +/* + * Process one of repmgr's "own" message types, and one that occurs on a regular + * (not one-shot) connection. + */ +static int +process_own_msg(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + DB_REP *db_rep; + DBT *dbt; + REPMGR_SITE *site; + REPMGR_MESSAGE *msg; + __repmgr_connect_reject_args reject; + __repmgr_parm_refresh_args parms; + int ret; + + ret = 0; + /* + * Set "msg" to point to the message struct. If we do all necessary + * processing here now, leave it set so that it can be freed. On the + * other hand, if we pass it off to the message queue for later + * processing by a message thread, we want to avoid freeing the memory + * here, so clear the pointer in such a case. + */ + switch (REPMGR_OWN_MSG_TYPE((msg = conn->input.rep_message)->msg_hdr)) { + case REPMGR_CONNECT_REJECT: + dbt = &msg->v.gmdb_msg.request; + if ((ret = __repmgr_connect_reject_unmarshal(env, + &reject, dbt->data, dbt->size, NULL)) != 0) + return (DB_REP_UNAVAIL); + + /* + * If we're being rejected by someone who has more up-to-date + * membership information than we do, it means we have been + * removed from the group. If we've just gotten started, we can + * make one attempt at automatically rejoining; otherwise we bow + * out gracefully. + */ + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "got rejection msg citing version %lu/%lu", + (u_long)reject.gen, (u_long)reject.version)); + + if (__repmgr_gmdb_version_cmp(env, + reject.gen, reject.version) > 0) { + if (env->rep_handle->seen_repmsg) + ret = DB_DELETED; + else if ((ret = __repmgr_defer_op(env, + REPMGR_REJOIN)) == 0) + ret = DB_REP_UNAVAIL; + } else + ret = DB_REP_UNAVAIL; + DB_ASSERT(env, ret != 0); + return (ret); + + case REPMGR_SHARING: + if ((ret = __repmgr_queue_put(env, msg)) != 0) + return (ret); + /* Show that we no longer own this memory. */ + msg = NULL; + break; + + case REPMGR_PARM_REFRESH: + dbt = &conn->input.rep_message->v.gmdb_msg.request; + if ((ret = __repmgr_parm_refresh_unmarshal(env, + &parms, dbt->data, dbt->size, NULL)) != 0) + return (DB_REP_UNAVAIL); + db_rep = env->rep_handle; + DB_ASSERT(env, conn->type == REP_CONNECTION && + IS_KNOWN_REMOTE_SITE(conn->eid)); + site = SITE_FROM_EID(conn->eid); + site->ack_policy = (int)parms.ack_policy; + if (F_ISSET(&parms, ELECTABLE_SITE)) + F_SET(site, SITE_ELECTABLE); + else + F_CLR(site, SITE_ELECTABLE); + F_SET(site, SITE_HAS_PRIO); + break; + + case REPMGR_GM_FAILURE: + case REPMGR_GM_FORWARD: + case REPMGR_JOIN_REQUEST: + case REPMGR_JOIN_SUCCESS: + case REPMGR_REMOVE_REQUEST: + case REPMGR_RESOLVE_LIMBO: + default: + return (__db_unknown_path(env, "process_own_msg")); + } + /* + * If we haven't given ownership of the msg buffer to another thread, + * free it now. + */ + if (msg != NULL) + __os_free(env, msg); + return (ret); +} + +/* + * Examine and verify the incoming version proposal message, and send an + * appropriate response. + */ +static int +send_version_response(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + DB_REP *db_rep; + __repmgr_version_proposal_args versions; + __repmgr_version_confirmation_args conf; + repmgr_netaddr_t *my_addr; + char *hostname; + u_int8_t buf[__REPMGR_VERSION_CONFIRMATION_SIZE+1]; + DBT vi; + int ret; + + db_rep = env->rep_handle; + my_addr = &SITE_FROM_EID(db_rep->self_eid)->net_addr; + + if ((ret = __repmgr_find_version_info(env, conn, &vi)) != 0) + return (ret); + if (vi.size == 0) { + /* No version info, so we must be talking to a v1 site. */ + hostname = conn->input.repmgr_msg.rec.data; + if ((ret = accept_v1_handshake(env, conn, hostname)) != 0) + return (ret); + if ((ret = __repmgr_send_v1_handshake(env, + conn, my_addr->host, strlen(my_addr->host) + 1)) != 0) + return (ret); + conn->state = CONN_READY; + } else { + if ((ret = __repmgr_version_proposal_unmarshal(env, + &versions, vi.data, vi.size, NULL)) != 0) + return (DB_REP_UNAVAIL); + + if (DB_REPMGR_VERSION >= versions.min && + DB_REPMGR_VERSION <= versions.max) + conf.version = DB_REPMGR_VERSION; + else if (versions.max >= DB_REPMGR_MIN_VERSION && + versions.max <= DB_REPMGR_VERSION) + conf.version = versions.max; + else { + /* + * User must have wired up a combination of versions + * exceeding what we said we'd support. + */ + __db_errx(env, DB_STR_A("3622", + "No available version between %lu and %lu", + "%lu %lu"), (u_long)versions.min, + (u_long)versions.max); + return (DB_REP_UNAVAIL); + } + conn->version = conf.version; + + __repmgr_version_confirmation_marshal(env, &conf, buf); + buf[__REPMGR_VERSION_CONFIRMATION_SIZE] = '\0'; + DB_ASSERT(env, !IS_SUBORDINATE(db_rep)); + if ((ret = __repmgr_send_handshake(env, + conn, buf, sizeof(buf), 0)) != 0) + return (ret); + + conn->state = CONN_PARAMETERS; + } + return (ret); +} + +/* + * Sends a version-aware handshake to the remote site, only after we've verified + * that it is indeed version-aware. We can send either v2 or v3 handshake, + * depending on the connection's version. + * + * PUBLIC: int __repmgr_send_handshake __P((ENV *, + * PUBLIC: REPMGR_CONNECTION *, void *, size_t, u_int32_t)); + */ +int +__repmgr_send_handshake(env, conn, opt, optlen, flags) + ENV *env; + REPMGR_CONNECTION *conn; + void *opt; + size_t optlen; + u_int32_t flags; +{ + DB_REP *db_rep; + REP *rep; + DBT cntrl, rec; + __repmgr_handshake_args hs; + __repmgr_v2handshake_args v2hs; + __repmgr_v3handshake_args v3hs; + repmgr_netaddr_t *my_addr; + size_t hostname_len, rec_len; + void *buf; + u_int8_t *p; + u_int32_t cntrl_len; + int ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + my_addr = &SITE_FROM_EID(db_rep->self_eid)->net_addr; + + /* + * The cntrl part has various parameters (varies by version). The rec + * part has the host name, followed by whatever optional extra data was + * passed to us. + * + * Version awareness was introduced with protocol version 2 (so version + * 1 is handled elsewhere). + */ + switch (conn->version) { + case 2: + cntrl_len = __REPMGR_V2HANDSHAKE_SIZE; + break; + case 3: + cntrl_len = __REPMGR_V3HANDSHAKE_SIZE; + break; + case 4: + cntrl_len = __REPMGR_HANDSHAKE_SIZE; + break; + default: + return (__db_unknown_path(env, "__repmgr_send_handshake")); + } + hostname_len = strlen(my_addr->host); + rec_len = hostname_len + 1 + + (opt == NULL ? 0 : optlen); + + if ((ret = __os_malloc(env, cntrl_len + rec_len, &buf)) != 0) + return (ret); + + cntrl.data = p = buf; + switch (conn->version) { + case 2: + /* Not allowed to use multi-process feature in v2 group. */ + DB_ASSERT(env, !IS_SUBORDINATE(db_rep)); + v2hs.port = my_addr->port; + v2hs.priority = rep->priority; + __repmgr_v2handshake_marshal(env, &v2hs, p); + break; + case 3: + v3hs.port = my_addr->port; + v3hs.priority = rep->priority; + v3hs.flags = flags; + __repmgr_v3handshake_marshal(env, &v3hs, p); + break; + case 4: + hs.port = my_addr->port; + hs.alignment = MEM_ALIGN; + hs.ack_policy = (u_int32_t)rep->perm_policy; + hs.flags = flags; + if (rep->priority > 0) + F_SET(&hs, ELECTABLE_SITE); + __repmgr_handshake_marshal(env, &hs, p); + break; + default: + DB_ASSERT(env, FALSE); + break; + } + cntrl.size = cntrl_len; + + p = rec.data = &p[cntrl_len]; + (void)strcpy((char*)p, my_addr->host); + p += hostname_len + 1; + if (opt != NULL) { + memcpy(p, opt, optlen); + p += optlen; + } + rec.size = (u_int32_t)(p - (u_int8_t*)rec.data); + + /* Never block on select thread: pass maxblock as 0. */ + ret = __repmgr_send_one(env, + conn, REPMGR_HANDSHAKE, &cntrl, &rec, 0); + __os_free(env, buf); + return (ret); +} + +static int +read_version_response(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + DB_REP *db_rep; + __repmgr_version_confirmation_args conf; + DBT vi; + char *hostname; + u_int32_t flags; + int ret; + + db_rep = env->rep_handle; + + if ((ret = __repmgr_find_version_info(env, conn, &vi)) != 0) + return (ret); + hostname = conn->input.repmgr_msg.rec.data; + if (vi.size == 0) { + if ((ret = accept_v1_handshake(env, conn, hostname)) != 0) + return (ret); + } else { + if ((ret = __repmgr_version_confirmation_unmarshal(env, + &conf, vi.data, vi.size, NULL)) != 0) + return (DB_REP_UNAVAIL); + if (conf.version >= DB_REPMGR_MIN_VERSION && + conf.version <= DB_REPMGR_VERSION) + conn->version = conf.version; + else { + /* + * Remote site "confirmed" a version outside of the + * range we proposed. It should never do that. + */ + __db_errx(env, DB_STR_A("3623", + "Can't support confirmed version %lu", "%lu"), + (u_long)conf.version); + return (DB_REP_UNAVAIL); + } + + if ((ret = accept_handshake(env, conn, hostname)) != 0) + return (ret); + flags = IS_SUBORDINATE(db_rep) ? REPMGR_SUBORDINATE : 0; + if ((ret = __repmgr_send_handshake(env, + conn, NULL, 0, flags)) != 0) + return (ret); + } + conn->state = CONN_READY; + return (ret); +} + +/* + * Examine the rec part of a handshake message to see if it has any version + * information in it. This is the magic that lets us allows version-aware sites + * to exchange information, and yet avoids tripping up v1 sites, which don't + * know how to look for it. + * + * PUBLIC: int __repmgr_find_version_info __P((ENV *, + * PUBLIC: REPMGR_CONNECTION *, DBT *)); + */ +int +__repmgr_find_version_info(env, conn, vi) + ENV *env; + REPMGR_CONNECTION *conn; + DBT *vi; +{ + DBT *dbt; + char *hostname; + u_int32_t hostname_len; + + dbt = &conn->input.repmgr_msg.rec; + if (dbt->size == 0) { + __db_errx(env, DB_STR("3624", + "handshake is missing rec part")); + return (DB_REP_UNAVAIL); + } + hostname = dbt->data; + hostname[dbt->size-1] = '\0'; + hostname_len = (u_int32_t)strlen(hostname); + if (hostname_len + 1 == dbt->size) { + /* + * The rec DBT held only the host name. This is a simple legacy + * V1 handshake; it contains no version information. + */ + vi->size = 0; + } else { + /* + * There's more data than just the host name. The remainder is + * available to be treated as a normal byte buffer (and read in + * by one of the unmarshal functions). Note that the remaining + * length should not include the padding byte that we have + * already clobbered. + */ + vi->data = &((u_int8_t *)dbt->data)[hostname_len + 1]; + vi->size = (dbt->size - (hostname_len+1)) - 1; + } + return (0); +} + +static int +accept_handshake(env, conn, hostname) + ENV *env; + REPMGR_CONNECTION *conn; + char *hostname; +{ + __repmgr_handshake_args hs; + __repmgr_v2handshake_args hs2; + __repmgr_v3handshake_args hs3; + u_int port; + u_int32_t ack, flags; + int electable; + + switch (conn->version) { + case 2: + if (__repmgr_v2handshake_unmarshal(env, &hs2, + conn->input.repmgr_msg.cntrl.data, + conn->input.repmgr_msg.cntrl.size, NULL) != 0) + return (DB_REP_UNAVAIL); + port = hs2.port; + electable = hs2.priority > 0; + ack = flags = 0; + break; + case 3: + if (__repmgr_v3handshake_unmarshal(env, &hs3, + conn->input.repmgr_msg.cntrl.data, + conn->input.repmgr_msg.cntrl.size, NULL) != 0) + return (DB_REP_UNAVAIL); + port = hs3.port; + electable = hs3.priority > 0; + flags = hs3.flags; + ack = 0; + break; + case 4: + if (__repmgr_handshake_unmarshal(env, &hs, + conn->input.repmgr_msg.cntrl.data, + conn->input.repmgr_msg.cntrl.size, NULL) != 0) + return (DB_REP_UNAVAIL); + port = hs.port; + electable = F_ISSET(&hs, ELECTABLE_SITE); + flags = hs.flags; + ack = hs.ack_policy; + break; + default: + return (__db_unknown_path(env, "accept_handshake")); + } + + return (process_parameters(env, + conn, hostname, port, ack, electable, flags)); +} + +static int +accept_v1_handshake(env, conn, hostname) + ENV *env; + REPMGR_CONNECTION *conn; + char *hostname; +{ + DB_REPMGR_V1_HANDSHAKE *handshake; + u_int32_t prio; + int electable; + + handshake = conn->input.repmgr_msg.cntrl.data; + if (conn->input.repmgr_msg.cntrl.size != sizeof(*handshake) || + handshake->version != 1) { + __db_errx(env, DB_STR("3625", "malformed V1 handshake")); + return (DB_REP_UNAVAIL); + } + + conn->version = 1; + prio = ntohl(handshake->priority); + electable = prio > 0; + return (process_parameters(env, + conn, hostname, handshake->port, 0, electable, 0)); +} + +/* Caller must hold mutex. */ +static int +process_parameters(env, conn, host, port, ack, electable, flags) + ENV *env; + REPMGR_CONNECTION *conn; + char *host; + u_int port; + int electable; + u_int32_t ack, flags; +{ + DB_REP *db_rep; + REPMGR_RETRY *retry; + REPMGR_SITE *site; + __repmgr_connect_reject_args reject; + u_int8_t reject_buf[__REPMGR_CONNECT_REJECT_SIZE]; + int eid, ret, sockopt; + + db_rep = env->rep_handle; + + /* Connection state can be used to discern incoming versus outgoing. */ + if (conn->state == CONN_CONNECTED) { + /* + * Since we initiated this as an outgoing connection, we + * obviously already know the host, port and site. We just need + * the other site's electability flag (which we'll grab below, + * after the big "else" clause). + */ + DB_ASSERT(env, IS_KNOWN_REMOTE_SITE(conn->eid)); + site = SITE_FROM_EID(conn->eid); + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "handshake from connection to %s:%lu EID %u", + site->net_addr.host, + (u_long)site->net_addr.port, conn->eid)); + } else { + DB_ASSERT(env, conn->state == CONN_NEGOTIATE || + conn->state == CONN_PARAMETERS); + /* + * Incoming connection: until now we haven't known what kind of + * connection we're dealing with (and in the case of a + * REP_CONNECTION, what its EID is); so it must be on the + * "orphans" list. But now that we've received the parameters + * we'll be able to figure all that out. + */ + if (LF_ISSET(APP_CHANNEL_CONNECTION)) { + conn->type = APP_CONNECTION; + return (0); + } else + conn->type = REP_CONNECTION; + + /* + * Now that we've been given the host and port, use them to find + * the site. + */ + if ((site = __repmgr_lookup_site(env, host, port)) != NULL && + site->membership == SITE_PRESENT) { + TAILQ_REMOVE(&db_rep->connections, conn, entries); + conn->ref_count--; + + eid = EID_FROM_SITE(site); + if (LF_ISSET(REPMGR_SUBORDINATE)) { + /* + * Accept it, as a supplementary source of + * input, but nothing else. + */ + TAILQ_INSERT_TAIL(&site->sub_conns, + conn, entries); + conn->eid = eid; + +#ifdef SO_KEEPALIVE + sockopt = 1; + if (setsockopt(conn->fd, SOL_SOCKET, + SO_KEEPALIVE, (sockopt_t)&sockopt, + sizeof(sockopt)) != 0) { + ret = net_errno; + __db_err(env, ret, DB_STR("3626", + "can't set KEEPALIVE socket option")); + return (ret); + } +#endif + } else { + DB_EVENT(env, + DB_EVENT_REP_CONNECT_ESTD, &eid); + switch (site->state) { + case SITE_PAUSING: + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "handshake from paused site %s:%u EID %u", + host, port, eid)); + retry = site->ref.retry; + TAILQ_REMOVE(&db_rep->retries, + retry, entries); + __os_free(env, retry); + break; + case SITE_CONNECTED: + /* + * We got an incoming connection for a + * site we were already connected to; at + * least we thought we were. + */ + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "connection from %s:%u EID %u supersedes existing", + host, port, eid)); + + /* + * No need for site-oriented recovery, + * since we now have a replacement + * connection; so skip bust_connection() + * and call disable_conn() directly. + */ + if ((ret = __repmgr_disable_connection( + env, site->ref.conn)) != 0) + return (ret); + break; + case SITE_CONNECTING: + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "handshake from connecting site %s:%u EID %u", + host, port, eid)); + /* + * Connector thread will give up when it + * sees this site's state change, so we + * don't have to do anything else here. + */ + break; + default: + DB_ASSERT(env, FALSE); + } + conn->eid = eid; + site->state = SITE_CONNECTED; + site->ref.conn = conn; + __os_gettime(env, + &site->last_rcvd_timestamp, 1); + } + } else { + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "rejecting connection from unknown or provisional site %s:%u", + host, port)); + reject.version = db_rep->membership_version; + reject.gen = db_rep->member_version_gen; + __repmgr_connect_reject_marshal(env, + &reject, reject_buf); + + if ((ret = __repmgr_send_own_msg(env, conn, + REPMGR_CONNECT_REJECT, reject_buf, + __REPMGR_CONNECT_REJECT_SIZE)) != 0) + return (ret); + + /* + * Since we haven't set conn->eid, bust_connection will + * not schedule a retry for this "failure", which is + * exactly what we want. + */ + return (DB_REP_UNAVAIL); + } + } + + if (electable) + F_SET(site, SITE_ELECTABLE); + else + F_CLR(site, SITE_ELECTABLE); + F_SET(site, SITE_HAS_PRIO); + site->ack_policy = (int)ack; + + /* + * If we're moping around wishing we knew who the master was, then + * getting in touch with another site might finally provide sufficient + * connectivity to find out. + */ + if (!IS_SUBORDINATE(db_rep) && /* us */ + !__repmgr_master_is_known(env) && + !LF_ISSET(REPMGR_SUBORDINATE)) { /* the remote site */ + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "handshake with no known master to wake election thread")); + db_rep->new_connection = TRUE; + if ((ret = __repmgr_signal(&db_rep->check_election)) != 0) + return (ret); + } + + return (0); +} + +static int +record_permlsn(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + __repmgr_permlsn_args *ackp, ack; + SITE_STRING_BUFFER location; + u_int32_t gen; + int ret; + + db_rep = env->rep_handle; + + DB_ASSERT(env, conn->version > 0 && + IS_READY_STATE(conn->state) && IS_VALID_EID(conn->eid)); + site = SITE_FROM_EID(conn->eid); + + /* + * Extract the LSN. Save it only if it is an improvement over what the + * site has already ack'ed. + */ + if (conn->version == 1) { + ackp = conn->input.repmgr_msg.cntrl.data; + if (conn->input.repmgr_msg.cntrl.size != sizeof(ack) || + conn->input.repmgr_msg.rec.size != 0) { + __db_errx(env, DB_STR("3627", "bad ack msg size")); + return (DB_REP_UNAVAIL); + } + } else { + ackp = &ack; + if ((ret = __repmgr_permlsn_unmarshal(env, ackp, + conn->input.repmgr_msg.cntrl.data, + conn->input.repmgr_msg.cntrl.size, NULL)) != 0) + return (DB_REP_UNAVAIL); + } + + /* Ignore stale acks. */ + gen = db_rep->region->gen; + if (ackp->generation < gen) { + VPRINT(env, (env, DB_VERB_REPMGR_MISC, + "ignoring stale ack (%lu<%lu), from %s", + (u_long)ackp->generation, (u_long)gen, + __repmgr_format_site_loc(site, location))); + return (0); + } + VPRINT(env, (env, DB_VERB_REPMGR_MISC, + "got ack [%lu][%lu](%lu) from %s", (u_long)ackp->lsn.file, + (u_long)ackp->lsn.offset, (u_long)ackp->generation, + __repmgr_format_site_loc(site, location))); + + if (ackp->generation == gen && + LOG_COMPARE(&ackp->lsn, &site->max_ack) == 1) { + memcpy(&site->max_ack, &ackp->lsn, sizeof(DB_LSN)); + if ((ret = __repmgr_wake_waiters(env, + &db_rep->ack_waiters)) != 0) + return (ret); + } + return (0); +} + +/* + * PUBLIC: int __repmgr_write_some __P((ENV *, REPMGR_CONNECTION *)); + */ +int +__repmgr_write_some(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + QUEUED_OUTPUT *output; + REPMGR_FLAT *msg; + int bytes, ret; + + while (!STAILQ_EMPTY(&conn->outbound_queue)) { + output = STAILQ_FIRST(&conn->outbound_queue); + msg = output->msg; + if ((bytes = sendsocket(conn->fd, &msg->data[output->offset], + msg->length - output->offset, 0)) == SOCKET_ERROR) { + switch (ret = net_errno) { + case WOULDBLOCK: +#if defined(DB_REPMGR_EAGAIN) && DB_REPMGR_EAGAIN != WOULDBLOCK + case DB_REPMGR_EAGAIN: +#endif + return (0); + default: + __repmgr_fire_conn_err_event(env, conn, ret); + STAT(env->rep_handle-> + region->mstat.st_connection_drop++); + return (DB_REP_UNAVAIL); + } + } + + if ((output->offset += (size_t)bytes) >= msg->length) { + STAILQ_REMOVE_HEAD(&conn->outbound_queue, entries); + __os_free(env, output); + conn->out_queue_length--; + if (--msg->ref_count <= 0) + __os_free(env, msg); + + /* + * We've achieved enough movement to free up at least + * one space in the outgoing queue. Wake any message + * threads that may be waiting for space. Leave + * CONGESTED state so that when the queue reaches the + * high-water mark again, the filling thread will be + * allowed to try waiting again. + */ + conn->state = CONN_READY; + if ((ret = __repmgr_signal(&conn->drained)) != 0) + return (ret); + } + } + + return (0); +} diff --git a/src/repmgr/repmgr_stat.c b/src/repmgr/repmgr_stat.c new file mode 100644 index 00000000..8bcc85fa --- /dev/null +++ b/src/repmgr/repmgr_stat.c @@ -0,0 +1,355 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifdef HAVE_STATISTICS +static int __repmgr_print_all __P((ENV *, u_int32_t)); +static int __repmgr_print_sites __P((ENV *)); +static int __repmgr_print_stats __P((ENV *, u_int32_t)); +static int __repmgr_stat __P((ENV *, DB_REPMGR_STAT **, u_int32_t)); + +/* + * __repmgr_stat_pp -- + * DB_ENV->repmgr_stat pre/post processing. + * + * PUBLIC: int __repmgr_stat_pp __P((DB_ENV *, DB_REPMGR_STAT **, u_int32_t)); + */ +int +__repmgr_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_REPMGR_STAT **statp; + u_int32_t flags; +{ + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG_XX( + env, rep_handle, "DB_ENV->repmgr_stat", DB_INIT_REP); + + if ((ret = __db_fchk(env, + "DB_ENV->repmgr_stat", flags, DB_STAT_CLEAR)) != 0) + return (ret); + + return (__repmgr_stat(env, statp, flags)); +} + +/* + * __repmgr_stat -- + * ENV->repmgr_stat. + */ +static int +__repmgr_stat(env, statp, flags) + ENV *env; + DB_REPMGR_STAT **statp; + u_int32_t flags; +{ + DB_REP *db_rep; + DB_REPMGR_STAT *copy, *stats; + uintmax_t tmp; + int ret; + + db_rep = env->rep_handle; + stats = &db_rep->region->mstat; + + *statp = NULL; + + /* Allocate a stat struct to return to the user. */ + if ((ret = __os_umalloc(env, sizeof(DB_REPMGR_STAT), ©)) != 0) + return (ret); + + memcpy(copy, stats, sizeof(*stats)); + if (LF_ISSET(DB_STAT_CLEAR)) { + tmp = stats->st_max_elect_threads; + memset(stats, 0, sizeof(DB_REPMGR_STAT)); + stats->st_max_elect_threads = tmp; + } + + *statp = copy; + return (0); +} + +/* + * __repmgr_stat_print_pp -- + * DB_ENV->repmgr_stat_print pre/post processing. + * + * PUBLIC: int __repmgr_stat_print_pp __P((DB_ENV *, u_int32_t)); + */ +int +__repmgr_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG_XX( + env, rep_handle, "DB_ENV->repmgr_stat_print", DB_INIT_REP); + + if ((ret = __db_fchk(env, "DB_ENV->repmgr_stat_print", + flags, DB_STAT_ALL | DB_STAT_CLEAR)) != 0) + return (ret); + + return (__repmgr_stat_print(env, flags)); +} + +/* + * PUBLIC: int __repmgr_stat_print __P((ENV *, u_int32_t)); + */ +int +__repmgr_stat_print(env, flags) + ENV *env; + u_int32_t flags; +{ + u_int32_t orig_flags; + int ret; + + orig_flags = flags; + LF_CLR(DB_STAT_CLEAR | DB_STAT_SUBSYSTEM); + if (flags == 0 || LF_ISSET(DB_STAT_ALL)) { + if ((ret = __repmgr_print_stats(env, orig_flags)) == 0) + ret = __repmgr_print_sites(env); + if (flags == 0 || ret != 0) + return (ret); + } + + if (LF_ISSET(DB_STAT_ALL) && + (ret = __repmgr_print_all(env, orig_flags)) != 0) + return (ret); + + return (0); +} + +static int +__repmgr_print_stats(env, flags) + ENV *env; + u_int32_t flags; +{ + DB_REPMGR_STAT *sp; + int ret; + + if ((ret = __repmgr_stat(env, &sp, flags)) != 0) + return (ret); + + __db_dl(env, "Number of PERM messages not acknowledged", + (u_long)sp->st_perm_failed); + __db_dl(env, "Number of messages queued due to network delay", + (u_long)sp->st_msgs_queued); + __db_dl(env, "Number of messages discarded due to queue length", + (u_long)sp->st_msgs_dropped); + __db_dl(env, "Number of existing connections dropped", + (u_long)sp->st_connection_drop); + __db_dl(env, "Number of failed new connection attempts", + (u_long)sp->st_connect_fail); + __db_dl(env, "Number of currently active election threads", + (u_long)sp->st_elect_threads); + __db_dl(env, "Election threads for which space is reserved", + (u_long)sp->st_max_elect_threads); + + __os_ufree(env, sp); + + return (0); +} + +static int +__repmgr_print_sites(env) + ENV *env; +{ + DB_REPMGR_SITE *list; + DB_MSGBUF mb; + u_int count, i; + int ret; + + if ((ret = __repmgr_site_list(env->dbenv, &count, &list)) != 0) + return (ret); + + if (count == 0) + return (0); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "DB_REPMGR site information:"); + + DB_MSGBUF_INIT(&mb); + for (i = 0; i < count; ++i) { + __db_msgadd(env, &mb, "%s (eid: %d, port: %u", + list[i].host, list[i].eid, list[i].port); + if (list[i].status != 0) + __db_msgadd(env, &mb, ", %sconnected", + list[i].status == DB_REPMGR_CONNECTED ? "" : "dis"); + __db_msgadd(env, &mb, ", %speer", + F_ISSET(&list[i], DB_REPMGR_ISPEER) ? "" : "non-"); + __db_msgadd(env, &mb, ")"); + DB_MSGBUF_FLUSH(env, &mb); + } + + __os_ufree(env, list); + + return (0); +} + +/* + * __repmgr_print_all -- + * Display debugging replication manager statistics. + */ +static int +__repmgr_print_all(env, flags) + ENV *env; + u_int32_t flags; +{ + COMPQUIET(env, NULL); + COMPQUIET(flags, 0); + return (0); +} + +#else /* !HAVE_STATISTICS */ + +int +__repmgr_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_REPMGR_STAT **statp; + u_int32_t flags; +{ + COMPQUIET(statp, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} + +int +__repmgr_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} +#endif + +/* + * PUBLIC: int __repmgr_site_list __P((DB_ENV *, u_int *, DB_REPMGR_SITE **)); + */ +int +__repmgr_site_list(dbenv, countp, listp) + DB_ENV *dbenv; + u_int *countp; + DB_REPMGR_SITE **listp; +{ + DB_REP *db_rep; + REP *rep; + DB_REPMGR_SITE *status; + ENV *env; + DB_THREAD_INFO *ip; + REPMGR_SITE *site; + size_t array_size, total_size; + int eid, locked, ret; + u_int count, i; + char *name; + + env = dbenv->env; + db_rep = env->rep_handle; + ret = 0; + + ENV_NOT_CONFIGURED( + env, db_rep->region, "DB_ENV->repmgr_site_list", DB_INIT_REP); + + if (REP_ON(env)) { + rep = db_rep->region; + LOCK_MUTEX(db_rep->mutex); + locked = TRUE; + + ENV_ENTER(env, ip); + if (rep->siteinfo_seq > db_rep->siteinfo_seq) + ret = __repmgr_sync_siteaddr(env); + ENV_LEAVE(env, ip); + if (ret != 0) + goto err; + } else { + rep = NULL; + locked = FALSE; + } + + /* Initialize for empty list or error return. */ + *countp = 0; + *listp = NULL; + + /* + * First, add up how much memory we need for the host names, excluding + * the local site. + */ + for (i = 0, count = 0, total_size = 0; i < db_rep->site_cnt; i++) { + site = &db_rep->sites[i]; + + if ((int)i == db_rep->self_eid || site->membership == 0) + continue; + + /* Make room for the NUL terminating byte. */ + total_size += strlen(site->net_addr.host) + 1; + count++; + } + if (count == 0) + goto err; + array_size = sizeof(DB_REPMGR_SITE) * count; + total_size += array_size; + + if ((ret = __os_umalloc(env, total_size, &status)) != 0) + goto err; + + /* + * Put the storage for the host names after the array of structs. This + * way, the caller can free the whole thing in one single operation. + */ + name = (char *)((u_int8_t *)status + array_size); + for (eid = 0, i = 0; eid < (int)db_rep->site_cnt; eid++) { + site = &db_rep->sites[eid]; + if (eid == db_rep->self_eid || site->membership == 0) + continue; + + /* If we don't have rep, we can't really know EID yet. */ + status[i].eid = rep ? eid : DB_EID_INVALID; + + status[i].host = name; + (void)strcpy(name, site->net_addr.host); + name += strlen(name) + 1; + + status[i].port = site->net_addr.port; + + status[i].flags = 0; + + if (FLD_ISSET(site->config, DB_REPMGR_PEER)) + F_SET(&status[i], DB_REPMGR_ISPEER); + + /* + * If we haven't started a communications thread, connection + * status is kind of meaningless. This distinction is useful + * for calls from the db_stat utility: it could be useful for + * db_stat to display known sites with EID; but would be + * confusing for it to display "disconnected" if another process + * does indeed have a connection established (db_stat can't know + * that). + */ + status[i].status = db_rep->selector == NULL ? 0 : + (site->state == SITE_CONNECTED && + IS_READY_STATE(site->ref.conn->state) ? + DB_REPMGR_CONNECTED : DB_REPMGR_DISCONNECTED); + i++; + } + + *countp = count; + *listp = status; + +err: if (locked) + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} diff --git a/src/repmgr/repmgr_stub.c b/src/repmgr/repmgr_stub.c new file mode 100644 index 00000000..ab5fc66d --- /dev/null +++ b/src/repmgr/repmgr_stub.c @@ -0,0 +1,262 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef HAVE_REPLICATION_THREADS +#include "db_config.h" + +#include "db_int.h" + +/* + * If the library wasn't compiled with replication support, various routines + * aren't available. Stub them here, returning an appropriate error. + */ +static int __db_norepmgr __P((DB_ENV *)); + +/* + * __db_norepmgr -- + * Error when a Berkeley DB build doesn't include replication mgr support. + */ +static int +__db_norepmgr(dbenv) + DB_ENV *dbenv; +{ + __db_errx(dbenv->env, DB_STR("3628", + "library build did not include support for the Replication Manager")); + return (DB_OPNOTSUP); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_close __P((ENV *)); + * PUBLIC: #endif + */ +int +__repmgr_close(env) + ENV *env; +{ + COMPQUIET(env, NULL); + return (0); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_get_ack_policy __P((DB_ENV *, int *)); + * PUBLIC: #endif + */ +int +__repmgr_get_ack_policy(dbenv, policy) + DB_ENV *dbenv; + int *policy; +{ + COMPQUIET(policy, NULL); + return (__db_norepmgr(dbenv)); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_set_ack_policy __P((DB_ENV *, int)); + * PUBLIC: #endif + */ +int +__repmgr_set_ack_policy(dbenv, policy) + DB_ENV *dbenv; + int policy; +{ + COMPQUIET(policy, 0); + return (__db_norepmgr(dbenv)); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_site + * PUBLIC: __P((DB_ENV *, const char *, u_int, DB_SITE **, u_int32_t)); + * PUBLIC: #endif + */ +int +__repmgr_site(dbenv, host, port, dbsitep, flags) + DB_ENV *dbenv; + const char *host; + u_int port; + DB_SITE **dbsitep; + u_int32_t flags; +{ + COMPQUIET(host, NULL); + COMPQUIET(port, 0); + COMPQUIET(dbsitep, NULL); + COMPQUIET(flags, 0); + return (__db_norepmgr(dbenv)); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_site_by_eid __P((DB_ENV *, int, DB_SITE **)); + * PUBLIC: #endif + */ +int +__repmgr_site_by_eid(dbenv, eid, dbsitep) + DB_ENV *dbenv; + int eid; + DB_SITE **dbsitep; +{ + COMPQUIET(eid, 0); + COMPQUIET(dbsitep, NULL); + return (__db_norepmgr(dbenv)); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_local_site + * PUBLIC: __P((DB_ENV *, DB_SITE **)); + * PUBLIC: #endif + */ +int +__repmgr_local_site(dbenv, dbsitep) + DB_ENV *dbenv; + DB_SITE **dbsitep; +{ + COMPQUIET(dbsitep, NULL); + return (__db_norepmgr(dbenv)); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_site_list __P((DB_ENV *, u_int *, DB_REPMGR_SITE **)); + * PUBLIC: #endif + */ +int +__repmgr_site_list(dbenv, countp, listp) + DB_ENV *dbenv; + u_int *countp; + DB_REPMGR_SITE **listp; +{ + COMPQUIET(countp, NULL); + COMPQUIET(listp, NULL); + return (__db_norepmgr(dbenv)); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_start __P((DB_ENV *, int, u_int32_t)); + * PUBLIC: #endif + */ +int +__repmgr_start(dbenv, nthreads, flags) + DB_ENV *dbenv; + int nthreads; + u_int32_t flags; +{ + COMPQUIET(nthreads, 0); + COMPQUIET(flags, 0); + return (__db_norepmgr(dbenv)); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_stat_pp __P((DB_ENV *, DB_REPMGR_STAT **, u_int32_t)); + * PUBLIC: #endif + */ +int +__repmgr_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_REPMGR_STAT **statp; + u_int32_t flags; +{ + COMPQUIET(statp, NULL); + COMPQUIET(flags, 0); + return (__db_norepmgr(dbenv)); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_stat_print_pp __P((DB_ENV *, u_int32_t)); + * PUBLIC: #endif + */ +int +__repmgr_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + return (__db_norepmgr(dbenv)); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_handle_event __P((ENV *, u_int32_t, void *)); + * PUBLIC: #endif + */ +int +__repmgr_handle_event(env, event, info) + ENV *env; + u_int32_t event; + void *info; +{ + COMPQUIET(env, NULL); + COMPQUIET(event, 0); + COMPQUIET(info, NULL); + + /* + * It's not an error for this function to be called. Replication calls + * this to let repmgr handle events. If repmgr isn't part of the build, + * all replication events should be forwarded to the application. + */ + return (DB_EVENT_NOT_HANDLED); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_channel __P((DB_ENV *, int, DB_CHANNEL **, u_int32_t)); + * PUBLIC: #endif + */ +int +__repmgr_channel(dbenv, eid, dbchannelp, flags) + DB_ENV *dbenv; + int eid; + DB_CHANNEL **dbchannelp; + u_int32_t flags; +{ + COMPQUIET(eid, 0); + COMPQUIET(dbchannelp, NULL); + COMPQUIET(flags, 0); + return (__db_norepmgr(dbenv)); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_set_msg_dispatch __P((DB_ENV *, + * PUBLIC: void (*)(DB_ENV *, DB_CHANNEL *, DBT *, u_int32_t, u_int32_t), + * PUBLIC: u_int32_t)); + * PUBLIC: #endif + */ +int +__repmgr_set_msg_dispatch(dbenv, dispatch, flags) + DB_ENV *dbenv; + void (*dispatch) __P((DB_ENV *, + DB_CHANNEL *, DBT *, u_int32_t, u_int32_t)); + u_int32_t flags; +{ + COMPQUIET(dispatch, NULL); + COMPQUIET(flags, 0); + return (__db_norepmgr(dbenv)); +} + +/* + * PUBLIC: #ifndef HAVE_REPLICATION_THREADS + * PUBLIC: int __repmgr_init_recover __P((ENV *, DB_DISTAB *)); + * PUBLIC: #endif + */ +int +__repmgr_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + COMPQUIET(env, NULL); + COMPQUIET(dtabp, NULL); + return (0); +} +#endif /* !HAVE_REPLICATION_THREADS */ diff --git a/src/repmgr/repmgr_util.c b/src/repmgr/repmgr_util.c new file mode 100644 index 00000000..b67e993d --- /dev/null +++ b/src/repmgr/repmgr_util.c @@ -0,0 +1,2026 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/txn.h" + +#define INITIAL_SITES_ALLOCATION 3 /* Arbitrary guess. */ +#define RETRY_TIME_ADJUST 200000 /* Arbitrary estimate. */ + +static int get_eid __P((ENV *, const char *, u_int, int *)); +static int __repmgr_addrcmp __P((repmgr_netaddr_t *, repmgr_netaddr_t *)); +static int read_gmdb __P((ENV *, DB_THREAD_INFO *, u_int8_t **, size_t *)); + +/* + * Schedules a future attempt to re-establish a connection with the given site. + * Usually, we wait the configured retry_wait period. But if the "immediate" + * parameter is given as TRUE, we'll make the wait time 0, and put the request + * at the _beginning_ of the retry queue. + * + * PUBLIC: int __repmgr_schedule_connection_attempt __P((ENV *, u_int, int)); + * + * !!! + * Caller should hold mutex. + * + * Unless an error occurs, we always attempt to wake the main thread; + * __repmgr_bust_connection relies on this behavior. + */ +int +__repmgr_schedule_connection_attempt(env, eid, immediate) + ENV *env; + u_int eid; + int immediate; +{ + DB_REP *db_rep; + REP *rep; + REPMGR_RETRY *retry, *target; + REPMGR_SITE *site; + db_timespec t; + int cmp, ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + if ((ret = __os_malloc(env, sizeof(*retry), &retry)) != 0) + return (ret); + + site = SITE_FROM_EID(eid); + __os_gettime(env, &t, 1); + if (immediate) + TAILQ_INSERT_HEAD(&db_rep->retries, retry, entries); + else { + TIMESPEC_ADD_DB_TIMEOUT(&t, rep->connection_retry_wait); + + /* + * Although it's extremely rare, two sites could be trying to + * connect to each other simultaneously, and each could kill its + * own connection when it received the other's. And this could + * continue, in sync, since configured retry times are usually + * the same. So, perturb one site's retry time by a small + * amount to break the cycle. Since each site has its own + * address, it's always possible to decide which is "greater + * than". + * (The mnemonic is that a server conventionally has a + * small well-known port number. And clients have the right to + * connect to servers, not the other way around.) + */ + cmp = __repmgr_addrcmp(&site->net_addr, + &SITE_FROM_EID(db_rep->self_eid)->net_addr); + DB_ASSERT(env, cmp != 0); + if (cmp == 1) + TIMESPEC_ADD_DB_TIMEOUT(&t, RETRY_TIME_ADJUST); + + /* + * Insert the new "retry" on the (time-ordered) list in its + * proper position. To do so, find the list entry ("target") + * with a later time; insert the new entry just before that. + */ + TAILQ_FOREACH(target, &db_rep->retries, entries) { + if (timespeccmp(&target->time, &t, >)) + break; + } + if (target == NULL) + TAILQ_INSERT_TAIL(&db_rep->retries, retry, entries); + else + TAILQ_INSERT_BEFORE(target, retry, entries); + + } + retry->eid = eid; + retry->time = t; + + site->state = SITE_PAUSING; + site->ref.retry = retry; + + return (__repmgr_wake_main_thread(env)); +} + +/* + * Compare two network addresses (lexicographically), and return -1, 0, or 1, as + * the first is less than, equal to, or greater than the second. + */ +static int +__repmgr_addrcmp(addr1, addr2) + repmgr_netaddr_t *addr1, *addr2; +{ + int cmp; + + cmp = strcmp(addr1->host, addr2->host); + if (cmp != 0) + return (cmp); + + if (addr1->port < addr2->port) + return (-1); + else if (addr1->port > addr2->port) + return (1); + return (0); +} + +/* + * Initialize the necessary control structures to begin reading a new input + * message. + * + * PUBLIC: void __repmgr_reset_for_reading __P((REPMGR_CONNECTION *)); + */ +void +__repmgr_reset_for_reading(con) + REPMGR_CONNECTION *con; +{ + con->reading_phase = SIZES_PHASE; + __repmgr_iovec_init(&con->iovecs); + __repmgr_add_buffer(&con->iovecs, + con->msg_hdr_buf, __REPMGR_MSG_HDR_SIZE); +} + +/* + * Constructs a DB_REPMGR_CONNECTION structure. + * + * PUBLIC: int __repmgr_new_connection __P((ENV *, + * PUBLIC: REPMGR_CONNECTION **, socket_t, int)); + */ +int +__repmgr_new_connection(env, connp, s, state) + ENV *env; + REPMGR_CONNECTION **connp; + socket_t s; + int state; +{ + REPMGR_CONNECTION *c; + int ret; + + if ((ret = __os_calloc(env, 1, sizeof(REPMGR_CONNECTION), &c)) != 0) + return (ret); + if ((ret = __repmgr_alloc_cond(&c->drained)) != 0) { + __os_free(env, c); + return (ret); + } + if ((ret = __repmgr_init_waiters(env, &c->response_waiters)) != 0) { + (void)__repmgr_free_cond(&c->drained); + __os_free(env, c); + return (ret); + } + + c->fd = s; + c->state = state; + c->type = UNKNOWN_CONN_TYPE; +#ifdef DB_WIN32 + c->event_object = WSA_INVALID_EVENT; +#endif + + STAILQ_INIT(&c->outbound_queue); + c->out_queue_length = 0; + + __repmgr_reset_for_reading(c); + *connp = c; + + return (0); +} + +/* + * PUBLIC: int __repmgr_new_site __P((ENV *, REPMGR_SITE**, + * PUBLIC: const char *, u_int)); + * + * Manipulates the process-local copy of the sites list. So, callers should + * hold the db_rep->mutex (except for single-threaded, pre-open configuration). + */ +int +__repmgr_new_site(env, sitep, host, port) + ENV *env; + REPMGR_SITE **sitep; + const char *host; + u_int port; +{ + DB_REP *db_rep; + REPMGR_CONNECTION *conn; + REPMGR_SITE *site, *sites; + char *p; + u_int i, new_site_max; + int ret; + + db_rep = env->rep_handle; + if (db_rep->site_cnt >= db_rep->site_max) { + new_site_max = db_rep->site_max == 0 ? + INITIAL_SITES_ALLOCATION : db_rep->site_max * 2; + if ((ret = __os_malloc(env, + sizeof(REPMGR_SITE) * new_site_max, &sites)) != 0) + return (ret); + if (db_rep->site_max > 0) { + /* + * For each site in the array, copy the old struct to + * the space allocated for the new struct. But the + * sub_conns list header (and one of the conn structs on + * the list, if any) contain pointers to the address of + * the old list header; so we have to move them + * explicitly. If not for that, we could use a simple + * __os_realloc() call. + */ + for (i = 0; i < db_rep->site_cnt; i++) { + sites[i] = db_rep->sites[i]; + TAILQ_INIT(&sites[i].sub_conns); + while (!TAILQ_EMPTY( + &db_rep->sites[i].sub_conns)) { + conn = TAILQ_FIRST( + &db_rep->sites[i].sub_conns); + TAILQ_REMOVE( + &db_rep->sites[i].sub_conns, + conn, entries); + TAILQ_INSERT_TAIL(&sites[i].sub_conns, + conn, entries); + } + } + __os_free(env, db_rep->sites); + } + db_rep->sites = sites; + db_rep->site_max = new_site_max; + } + if ((ret = __os_strdup(env, host, &p)) != 0) { + /* No harm in leaving the increased site_max intact. */ + return (ret); + } + site = &db_rep->sites[db_rep->site_cnt++]; + + site->net_addr.host = p; + site->net_addr.port = (u_int16_t)port; + + ZERO_LSN(site->max_ack); + site->ack_policy = 0; + site->alignment = 0; + site->flags = 0; + timespecclear(&site->last_rcvd_timestamp); + TAILQ_INIT(&site->sub_conns); + site->connector = NULL; + site->state = SITE_IDLE; + + site->membership = 0; + site->config = 0; + + *sitep = site; + return (0); +} + +/* + * PUBLIC: int __repmgr_create_mutex __P((ENV *, mgr_mutex_t **)); + */ +int +__repmgr_create_mutex(env, mtxp) + ENV *env; + mgr_mutex_t **mtxp; +{ + mgr_mutex_t *mtx; + int ret; + + if ((ret = __os_malloc(env, sizeof(mgr_mutex_t), &mtx)) == 0 && + (ret = __repmgr_create_mutex_pf(mtx)) != 0) { + __os_free(env, mtx); + } + if (ret == 0) + *mtxp = mtx; + return (ret); +} + +/* + * PUBLIC: int __repmgr_destroy_mutex __P((ENV *, mgr_mutex_t *)); + */ +int +__repmgr_destroy_mutex(env, mtx) + ENV *env; + mgr_mutex_t *mtx; +{ + int ret; + + ret = __repmgr_destroy_mutex_pf(mtx); + __os_free(env, mtx); + return (ret); +} + +/* + * Kind of like a destructor for a repmgr_netaddr_t: cleans up any subordinate + * allocated memory pointed to by the addr, though it does not free the struct + * itself. + * + * PUBLIC: void __repmgr_cleanup_netaddr __P((ENV *, repmgr_netaddr_t *)); + */ +void +__repmgr_cleanup_netaddr(env, addr) + ENV *env; + repmgr_netaddr_t *addr; +{ + if (addr->host != NULL) { + __os_free(env, addr->host); + addr->host = NULL; + } +} + +/* + * PUBLIC: void __repmgr_iovec_init __P((REPMGR_IOVECS *)); + */ +void +__repmgr_iovec_init(v) + REPMGR_IOVECS *v; +{ + v->offset = v->count = 0; + v->total_bytes = 0; +} + +/* + * PUBLIC: void __repmgr_add_buffer __P((REPMGR_IOVECS *, void *, size_t)); + * + * !!! + * There is no checking for overflow of the vectors[5] array. + */ +void +__repmgr_add_buffer(v, address, length) + REPMGR_IOVECS *v; + void *address; + size_t length; +{ + if (length > 0) { + v->vectors[v->count].iov_base = address; + v->vectors[v->count++].iov_len = (u_long)length; + v->total_bytes += length; + } +} + +/* + * PUBLIC: void __repmgr_add_dbt __P((REPMGR_IOVECS *, const DBT *)); + */ +void +__repmgr_add_dbt(v, dbt) + REPMGR_IOVECS *v; + const DBT *dbt; +{ + if (dbt->size > 0) { + v->vectors[v->count].iov_base = dbt->data; + v->vectors[v->count++].iov_len = dbt->size; + v->total_bytes += dbt->size; + } +} + +/* + * Update a set of iovecs to reflect the number of bytes transferred in an I/O + * operation, so that the iovecs can be used to continue transferring where we + * left off. + * Returns TRUE if the set of buffers is now fully consumed, FALSE if more + * remains. + * + * PUBLIC: int __repmgr_update_consumed __P((REPMGR_IOVECS *, size_t)); + */ +int +__repmgr_update_consumed(v, byte_count) + REPMGR_IOVECS *v; + size_t byte_count; +{ + db_iovec_t *iov; + int i; + + for (i = v->offset; ; i++) { + DB_ASSERT(NULL, i < v->count && byte_count > 0); + iov = &v->vectors[i]; + if (byte_count > iov->iov_len) { + /* + * We've consumed (more than) this vector's worth. + * Adjust count and continue. + */ + byte_count -= iov->iov_len; + } else { + /* + * Adjust length of remaining portion of vector. + * byte_count can never be greater than iov_len, or we + * would not be in this section of the if clause. + */ + iov->iov_len -= (u_int32_t)byte_count; + if (iov->iov_len > 0) { + /* + * Still some left in this vector. Adjust base + * address too, and leave offset pointing here. + */ + iov->iov_base = (void *) + ((u_int8_t *)iov->iov_base + byte_count); + v->offset = i; + } else { + /* + * Consumed exactly to a vector boundary. + * Advance to next vector for next time. + */ + v->offset = i+1; + } + /* + * If offset has reached count, the entire thing is + * consumed. + */ + return (v->offset >= v->count); + } + } +} + +/* + * Builds a buffer containing our network address information, suitable for + * publishing as cdata via a call to rep_start, and sets up the given DBT to + * point to it. The buffer is dynamically allocated memory, and the caller must + * assume responsibility for it. + * + * PUBLIC: int __repmgr_prepare_my_addr __P((ENV *, DBT *)); + */ +int +__repmgr_prepare_my_addr(env, dbt) + ENV *env; + DBT *dbt; +{ + DB_REP *db_rep; + repmgr_netaddr_t addr; + size_t size, hlen; + u_int16_t port_buffer; + u_int8_t *ptr; + int ret; + + db_rep = env->rep_handle; + LOCK_MUTEX(db_rep->mutex); + addr = SITE_FROM_EID(db_rep->self_eid)->net_addr; + UNLOCK_MUTEX(db_rep->mutex); + /* + * The cdata message consists of the 2-byte port number, in network byte + * order, followed by the null-terminated host name string. + */ + port_buffer = htons(addr.port); + size = sizeof(port_buffer) + (hlen = strlen(addr.host) + 1); + if ((ret = __os_malloc(env, size, &ptr)) != 0) + return (ret); + + DB_INIT_DBT(*dbt, ptr, size); + + memcpy(ptr, &port_buffer, sizeof(port_buffer)); + ptr = &ptr[sizeof(port_buffer)]; + memcpy(ptr, addr.host, hlen); + + return (0); +} + +/* + * !!! + * This may only be called after threads have been started, because we don't + * know the answer until we have established group membership (e.g., reading the + * membership database). That should be OK, because we only need this + * for starting an election, or counting acks after sending a PERM message. + * + * PUBLIC: int __repmgr_get_nsites __P((ENV *, u_int32_t *)); + */ +int +__repmgr_get_nsites(env, nsitesp) + ENV *env; + u_int32_t *nsitesp; +{ + DB_REP *db_rep; + u_int32_t nsites; + + db_rep = env->rep_handle; + + if ((nsites = db_rep->region->config_nsites) == 0) { + __db_errx(env, DB_STR("3672", + "Nsites unknown before repmgr_start()")); + return (EINVAL); + } + *nsitesp = nsites; + return (0); +} + +/* + * PUBLIC: int __repmgr_thread_failure __P((ENV *, int)); + */ +int +__repmgr_thread_failure(env, why) + ENV *env; + int why; +{ + DB_REP *db_rep; + + db_rep = env->rep_handle; + LOCK_MUTEX(db_rep->mutex); + (void)__repmgr_stop_threads(env); + UNLOCK_MUTEX(db_rep->mutex); + return (__env_panic(env, why)); +} + +/* + * Format a printable representation of a site location, suitable for inclusion + * in an error message. The buffer must be at least as big as + * MAX_SITE_LOC_STRING. + * + * PUBLIC: char *__repmgr_format_eid_loc __P((DB_REP *, + * PUBLIC: REPMGR_CONNECTION *, char *)); + * + * Caller must hold mutex. + */ +char * +__repmgr_format_eid_loc(db_rep, conn, buffer) + DB_REP *db_rep; + REPMGR_CONNECTION *conn; + char *buffer; +{ + int eid; + + if (conn->type == APP_CONNECTION) + snprintf(buffer, + MAX_SITE_LOC_STRING, "(application channel)"); + else if (conn->type == REP_CONNECTION && + IS_VALID_EID(eid = conn->eid)) + (void)__repmgr_format_site_loc(SITE_FROM_EID(eid), buffer); + else + snprintf(buffer, MAX_SITE_LOC_STRING, "(unidentified site)"); + return (buffer); +} + +/* + * PUBLIC: char *__repmgr_format_site_loc __P((REPMGR_SITE *, char *)); + */ +char * +__repmgr_format_site_loc(site, buffer) + REPMGR_SITE *site; + char *buffer; +{ + return (__repmgr_format_addr_loc(&site->net_addr, buffer)); +} + +/* + * PUBLIC: char *__repmgr_format_addr_loc __P((repmgr_netaddr_t *, char *)); + */ +char * +__repmgr_format_addr_loc(addr, buffer) + repmgr_netaddr_t *addr; + char *buffer; +{ + snprintf(buffer, MAX_SITE_LOC_STRING, "site %s:%lu", + addr->host, (u_long)addr->port); + return (buffer); +} + +/* + * PUBLIC: int __repmgr_repstart __P((ENV *, u_int32_t)); + */ +int +__repmgr_repstart(env, flags) + ENV *env; + u_int32_t flags; +{ + DBT my_addr; + int ret; + + /* Include "cdata" in case sending to old-version site. */ + if ((ret = __repmgr_prepare_my_addr(env, &my_addr)) != 0) + return (ret); + ret = __rep_start_int(env, &my_addr, flags); + __os_free(env, my_addr.data); + if (ret != 0) + __db_err(env, ret, DB_STR("3673", "rep_start")); + return (ret); +} + +/* + * PUBLIC: int __repmgr_become_master __P((ENV *)); + */ +int +__repmgr_become_master(env) + ENV *env; +{ + DB_REP *db_rep; + DB_THREAD_INFO *ip; + DB *dbp; + DB_TXN *txn; + REPMGR_SITE *site; + DBT key_dbt, data_dbt; + __repmgr_membership_key_args key; + __repmgr_membership_data_args member_status; + repmgr_netaddr_t addr; + u_int32_t status; + u_int8_t data_buf[__REPMGR_MEMBERSHIP_DATA_SIZE]; + u_int8_t key_buf[MAX_MSG_BUF]; + size_t len; + u_int i; + int ret, t_ret; + + db_rep = env->rep_handle; + dbp = NULL; + txn = NULL; + + /* Examine membership list to see if we have a victim in limbo. */ + LOCK_MUTEX(db_rep->mutex); + ZERO_LSN(db_rep->limbo_failure); + ZERO_LSN(db_rep->durable_lsn); + db_rep->limbo_victim = DB_EID_INVALID; + db_rep->limbo_resolution_needed = FALSE; + FOR_EACH_REMOTE_SITE_INDEX(i) { + site = SITE_FROM_EID(i); + if (site->membership == SITE_ADDING || + site->membership == SITE_DELETING) { + db_rep->limbo_victim = (int)i; + db_rep->limbo_resolution_needed = TRUE; + + /* + * Since there can never be more than one limbo victim, + * when we find one we don't have to continue looking + * for others. + */ + break; + } + } + db_rep->client_intent = FALSE; + UNLOCK_MUTEX(db_rep->mutex); + + if ((ret = __repmgr_repstart(env, DB_REP_MASTER)) != 0) + return (ret); + + if (db_rep->have_gmdb) + return (0); + + db_rep->member_version_gen = db_rep->region->gen; + ENV_ENTER(env, ip); + if ((ret = __repmgr_hold_master_role(env, NULL)) != 0) + goto leave; +retry: + if ((ret = __repmgr_setup_gmdb_op(env, ip, &txn, DB_CREATE)) != 0) + goto err; + + DB_ASSERT(env, txn != NULL); + dbp = db_rep->gmdb; + DB_ASSERT(env, dbp != NULL); + + /* Write the meta-data record. */ + if ((ret = __repmgr_set_gm_version(env, ip, txn, 1)) != 0) + goto err; + + /* Write a record representing each site in the group. */ + for (i = 0; i < db_rep->site_cnt; i++) { + LOCK_MUTEX(db_rep->mutex); + site = SITE_FROM_EID(i); + addr = site->net_addr; + status = site->membership; + UNLOCK_MUTEX(db_rep->mutex); + if (status == 0) + continue; + DB_INIT_DBT(key.host, addr.host, strlen(addr.host) + 1); + key.port = addr.port; + ret = __repmgr_membership_key_marshal(env, + &key, key_buf, sizeof(key_buf), &len); + DB_ASSERT(env, ret == 0); + DB_INIT_DBT(key_dbt, key_buf, len); + member_status.flags = status; + __repmgr_membership_data_marshal(env, &member_status, data_buf); + DB_INIT_DBT(data_dbt, data_buf, __REPMGR_MEMBERSHIP_DATA_SIZE); + if ((ret = __db_put(dbp, ip, txn, &key_dbt, &data_dbt, 0)) != 0) + goto err; + } + +err: + if (txn != NULL) { + if ((t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) != 0 && + ret == 0) + ret = t_ret; + if ((t_ret = __repmgr_cleanup_gmdb_op(env, TRUE)) != 0 && + ret == 0) + ret = t_ret; + } + if (ret == DB_LOCK_DEADLOCK || ret == DB_LOCK_NOTGRANTED) + goto retry; + if ((t_ret = __repmgr_rlse_master_role(env)) != 0 && ret == 0) + ret = t_ret; +leave: + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * Visits all the connections we know about, performing the desired action. + * "err_quit" determines whether we give up, or soldier on, in case of an + * error. + * + * PUBLIC: int __repmgr_each_connection __P((ENV *, + * PUBLIC: CONNECTION_ACTION, void *, int)); + * + * !!! + * Caller must hold mutex. + */ +int +__repmgr_each_connection(env, callback, info, err_quit) + ENV *env; + CONNECTION_ACTION callback; + void *info; + int err_quit; +{ + DB_REP *db_rep; + REPMGR_CONNECTION *conn, *next; + REPMGR_SITE *site; + u_int eid; + int ret, t_ret; + +#define HANDLE_ERROR \ + do { \ + if (err_quit) \ + return (t_ret); \ + if (ret == 0) \ + ret = t_ret; \ + } while (0) + + db_rep = env->rep_handle; + ret = 0; + + /* + * We might have used TAILQ_FOREACH here, except that in some cases we + * need to unlink an element along the way. + */ + for (conn = TAILQ_FIRST(&db_rep->connections); + conn != NULL; + conn = next) { + next = TAILQ_NEXT(conn, entries); + + if ((t_ret = (*callback)(env, conn, info)) != 0) + HANDLE_ERROR; + } + + FOR_EACH_REMOTE_SITE_INDEX(eid) { + site = SITE_FROM_EID(eid); + + if (site->state == SITE_CONNECTED) { + conn = site->ref.conn; + if ((t_ret = (*callback)(env, conn, info)) != 0) + HANDLE_ERROR; + } + + for (conn = TAILQ_FIRST(&site->sub_conns); + conn != NULL; + conn = next) { + next = TAILQ_NEXT(conn, entries); + if ((t_ret = (*callback)(env, conn, info)) != 0) + HANDLE_ERROR; + } + } + + return (0); +} + +/* + * Initialize repmgr's portion of the shared region area. Note that we can't + * simply get the REP* address from the env as we usually do, because at the + * time of this call it hasn't been linked into there yet. + * + * This function is only called during creation of the region. If anything + * fails, our caller will panic and remove the region. So, if we have any + * failure, we don't have to clean up any partial allocation. + * + * PUBLIC: int __repmgr_open __P((ENV *, void *)); + */ +int +__repmgr_open(env, rep_) + ENV *env; + void *rep_; +{ + DB_REP *db_rep; + REP *rep; + int ret; + + db_rep = env->rep_handle; + rep = rep_; + + if ((ret = __mutex_alloc(env, MTX_REPMGR, 0, &rep->mtx_repmgr)) != 0) + return (ret); + + DB_ASSERT(env, rep->siteinfo_seq == 0 && db_rep->siteinfo_seq == 0); + rep->siteinfo_off = INVALID_ROFF; + rep->siteinfo_seq = 0; + if ((ret = __repmgr_share_netaddrs(env, rep, 0, db_rep->site_cnt)) != 0) + return (ret); + + rep->self_eid = db_rep->self_eid; + rep->perm_policy = db_rep->perm_policy; + rep->ack_timeout = db_rep->ack_timeout; + rep->connection_retry_wait = db_rep->connection_retry_wait; + rep->election_retry_wait = db_rep->election_retry_wait; + rep->heartbeat_monitor_timeout = db_rep->heartbeat_monitor_timeout; + rep->heartbeat_frequency = db_rep->heartbeat_frequency; + return (ret); +} + +/* + * Join an existing environment, by setting up our local site info structures + * from shared network address configuration in the region. + * + * As __repmgr_open(), note that we can't simply get the REP* address from the + * env as we usually do, because at the time of this call it hasn't been linked + * into there yet. + * + * PUBLIC: int __repmgr_join __P((ENV *, void *)); + */ +int +__repmgr_join(env, rep_) + ENV *env; + void *rep_; +{ + DB_REP *db_rep; + REGINFO *infop; + REP *rep; + SITEINFO *p; + REPMGR_SITE *site, temp; + repmgr_netaddr_t *addrp; + char *host; + u_int i, j; + int ret; + + db_rep = env->rep_handle; + infop = env->reginfo; + rep = rep_; + ret = 0; + + MUTEX_LOCK(env, rep->mtx_repmgr); + + /* + * Merge local and shared lists of remote sites. Note that the + * placement of entries in the shared array must not change. To + * accomplish the merge, pull in entries from the shared list, into the + * proper position, shuffling not-yet-resolved local entries if + * necessary. Then add any remaining locally known entries to the + * shared list. + */ + i = 0; + if (rep->siteinfo_off != INVALID_ROFF) { + p = R_ADDR(infop, rep->siteinfo_off); + + /* For each address in the shared list ... */ + for (; i < rep->site_cnt; i++) { + host = R_ADDR(infop, p[i].addr.host); + + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Site %s:%lu found at EID %u", + host, (u_long)p[i].addr.port, i)); + /* + * Find it in the local list. Everything before 'i' + * already matches the shared list, and is therefore in + * the right place. So we only need to search starting + * from 'i'. When found, local config values will be + * used because they are assumed to be "fresher". But + * membership status is not, since this process hasn't + * been active (running) yet. + */ + for (j = i; j < db_rep->site_cnt; j++) { + site = &db_rep->sites[j]; + addrp = &site->net_addr; + if (strcmp(host, addrp->host) == 0 && + p[i].addr.port == addrp->port) { + p[i].config = site->config; + site->membership = p[i].status; + break; + } + } + + /* + * When not found in local list, copy peer values + * from shared list. + */ + if (j == db_rep->site_cnt) { + if ((ret = __repmgr_new_site(env, + &site, host, p[i].addr.port)) != 0) + goto unlock; + site->config = p[i].config; + site->membership = p[i].status; + } + DB_ASSERT(env, j < db_rep->site_cnt); + + /* Found or added at 'j', but belongs at 'i': swap. */ + if (i != j) { + temp = db_rep->sites[j]; + db_rep->sites[j] = db_rep->sites[i]; + db_rep->sites[i] = temp; + /* + * If we're moving the entry that self_eid + * points to, then adjust self_eid to match. + * For now this is still merely our original, + * in-process pointer; we have yet to make sure + * it matches the one from shared memory. + */ + if (db_rep->self_eid == (int)j) + db_rep->self_eid = (int)i; + } + } + } + if ((ret = __repmgr_share_netaddrs(env, rep, i, db_rep->site_cnt)) != 0) + goto unlock; + if (db_rep->self_eid == DB_EID_INVALID) + db_rep->self_eid = rep->self_eid; + else if (rep->self_eid == DB_EID_INVALID) + rep->self_eid = db_rep->self_eid; + else if (db_rep->self_eid != rep->self_eid) { + __db_errx(env, DB_STR("3674", + "A mismatching local site address has been set in the environment")); + ret = EINVAL; + goto unlock; + } + + db_rep->siteinfo_seq = rep->siteinfo_seq; +unlock: + MUTEX_UNLOCK(env, rep->mtx_repmgr); + return (ret); +} + +/* + * PUBLIC: int __repmgr_env_refresh __P((ENV *env)); + */ +int +__repmgr_env_refresh(env) + ENV *env; +{ + int ret; + + ret = F_ISSET(env, ENV_PRIVATE) ? + __mutex_free(env, &env->rep_handle->region->mtx_repmgr) : 0; + + return (ret); +} + +/* + * Copies new remote site information from the indicated private array slots + * into the shared region. The corresponding shared array slots do not exist + * yet; they must be allocated. + * + * PUBLIC: int __repmgr_share_netaddrs __P((ENV *, void *, u_int, u_int)); + * + * !!! The rep pointer is passed, because it may not yet have been installed + * into the env handle. + * + * !!! Assumes caller holds mtx_repmgr lock. + */ +int +__repmgr_share_netaddrs(env, rep_, start, limit) + ENV *env; + void *rep_; + u_int start, limit; +{ + DB_REP *db_rep; + REP *rep; + REGINFO *infop; + REGENV *renv; + SITEINFO *orig, *shared_array; + char *host, *hostbuf; + size_t sz; + u_int i, n; + int eid, ret, touched; + + db_rep = env->rep_handle; + infop = env->reginfo; + renv = infop->primary; + rep = rep_; + ret = 0; + touched = FALSE; + + MUTEX_LOCK(env, renv->mtx_regenv); + + for (i = start; i < limit; i++) { + if (rep->site_cnt >= rep->site_max) { + /* Table is full, we need more space. */ + if (rep->siteinfo_off == INVALID_ROFF) { + n = INITIAL_SITES_ALLOCATION; + sz = n * sizeof(SITEINFO); + if ((ret = __env_alloc(infop, + sz, &shared_array)) != 0) + goto out; + } else { + n = 2 * rep->site_max; + sz = n * sizeof(SITEINFO); + if ((ret = __env_alloc(infop, + sz, &shared_array)) != 0) + goto out; + orig = R_ADDR(infop, rep->siteinfo_off); + memcpy(shared_array, orig, + sizeof(SITEINFO) * rep->site_cnt); + __env_alloc_free(infop, orig); + } + rep->siteinfo_off = R_OFFSET(infop, shared_array); + rep->site_max = n; + } else + shared_array = R_ADDR(infop, rep->siteinfo_off); + + DB_ASSERT(env, rep->site_cnt < rep->site_max && + rep->siteinfo_off != INVALID_ROFF); + + host = db_rep->sites[i].net_addr.host; + sz = strlen(host) + 1; + if ((ret = __env_alloc(infop, sz, &hostbuf)) != 0) + goto out; + eid = (int)rep->site_cnt++; + (void)strcpy(hostbuf, host); + shared_array[eid].addr.host = R_OFFSET(infop, hostbuf); + shared_array[eid].addr.port = db_rep->sites[i].net_addr.port; + shared_array[eid].config = db_rep->sites[i].config; + shared_array[eid].status = db_rep->sites[i].membership; + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "EID %d is assigned for site %s:%lu", + eid, host, (u_long)shared_array[eid].addr.port)); + touched = TRUE; + } + +out: + if (touched) + db_rep->siteinfo_seq = ++rep->siteinfo_seq; + MUTEX_UNLOCK(env, renv->mtx_regenv); + return (ret); +} + +/* + * Copy into our local list any newly added/changed remote site + * configuration information. + * + * !!! Caller must hold db_rep->mutex and mtx_repmgr locks. + * + * PUBLIC: int __repmgr_copy_in_added_sites __P((ENV *)); + */ +int +__repmgr_copy_in_added_sites(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + REGINFO *infop; + SITEINFO *base, *p; + REPMGR_SITE *site; + char *host; + int ret; + u_int i; + + db_rep = env->rep_handle; + rep = db_rep->region; + + if (rep->siteinfo_off == INVALID_ROFF) + goto out; + + infop = env->reginfo; + base = R_ADDR(infop, rep->siteinfo_off); + + /* Create private array slots for new sites. */ + for (i = db_rep->site_cnt; i < rep->site_cnt; i++) { + p = &base[i]; + host = R_ADDR(infop, p->addr.host); + if ((ret = __repmgr_new_site(env, + &site, host, p->addr.port)) != 0) + return (ret); + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Site %s:%lu found at EID %u", + host, (u_long)p->addr.port, i)); + } + + /* Make sure info is up to date for all sites, old and new. */ + for (i = 0; i < db_rep->site_cnt; i++) { + p = &base[i]; + site = SITE_FROM_EID(i); + site->config = p->config; + site->membership = p->status; + } + +out: + /* + * We always make sure our local list has been brought up to date with + * the shared list before adding to the local list (except before env + * open of course). So here there should be nothing on our local list + * not yet in shared memory. + */ + DB_ASSERT(env, db_rep->site_cnt == rep->site_cnt); + db_rep->siteinfo_seq = rep->siteinfo_seq; + return (0); +} + +/* + * Initialize a range of sites newly added to our site list array. Process each + * array entry in the range from <= x < limit. Passing from >= limit is + * allowed, and is effectively a no-op. + * + * PUBLIC: int __repmgr_init_new_sites __P((ENV *, u_int, u_int)); + * + * !!! Assumes caller holds db_rep->mutex. + */ +int +__repmgr_init_new_sites(env, from, limit) + ENV *env; + u_int from, limit; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + u_int i; + int ret; + + db_rep = env->rep_handle; + + if (db_rep->selector == NULL) + return (0); + for (i = from; i < limit; i++) { + site = SITE_FROM_EID(i); + + if (site->membership == SITE_PRESENT && + (ret = __repmgr_schedule_connection_attempt(env, + i, TRUE)) != 0) + return (ret); + } + + return (0); +} + +/* + * PUBLIC: int __repmgr_failchk __P((ENV *)); + */ +int +__repmgr_failchk(env) + ENV *env; +{ + DB_ENV *dbenv; + DB_REP *db_rep; + REP *rep; + db_threadid_t unused; + + dbenv = env->dbenv; + db_rep = env->rep_handle; + rep = db_rep->region; + + COMPQUIET(unused, 0); + MUTEX_LOCK(env, rep->mtx_repmgr); + + /* + * Check to see if the main (listener) replication process may have died + * without cleaning up the flag. If so, we only have to clear it, and + * another process should then be able to come along and become the + * listener. So in either case we can return success. + */ + if (rep->listener != 0 && !dbenv->is_alive(dbenv, + rep->listener, unused, DB_MUTEX_PROCESS_ONLY)) + rep->listener = 0; + MUTEX_UNLOCK(env, rep->mtx_repmgr); + + return (0); +} + +/* + * PUBLIC: int __repmgr_master_is_known __P((ENV *)); + */ +int +__repmgr_master_is_known(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + int master; + + db_rep = env->rep_handle; + rep = db_rep->region; + master = rep->master_id; + + /* + * We are the master, or we know of a master and have a healthy + * connection to it. + */ + return (master == db_rep->self_eid || + __repmgr_master_connection(env) != NULL); +} + +/* + * PUBLIC: int __repmgr_stable_lsn __P((ENV *, DB_LSN *)); + * + * This function may be called before any of repmgr's threads have + * been started. This code must not be called before env open. + * Currently that is impossible since its only caller is log_archive + * which itself cannot be called before env_open. + */ +int +__repmgr_stable_lsn(env, stable_lsn) + ENV *env; + DB_LSN *stable_lsn; +{ + DB_LSN min_lsn; + DB_REP *db_rep; + REP *rep; + REPMGR_SITE *site; + u_int eid; + + db_rep = env->rep_handle; + rep = db_rep->region; + + ZERO_LSN(min_lsn); + LOCK_MUTEX(db_rep->mutex); + FOR_EACH_REMOTE_SITE_INDEX(eid) { + site = SITE_FROM_EID(eid); + /* + * Record the smallest ack'ed LSN from all connected sites. + * If we're a client, ignore the master because the master + * does not maintain nor send out its repmgr perm LSN in + * this way. + */ + if ((int)eid == rep->master_id) + continue; + if (IS_SITE_AVAILABLE(site) && + !IS_ZERO_LSN(site->max_ack) && + (IS_ZERO_LSN(min_lsn) || + LOG_COMPARE(&site->max_ack, &min_lsn) < 0)) + min_lsn = site->max_ack; + } + UNLOCK_MUTEX(db_rep->mutex); + if (!IS_ZERO_LSN(min_lsn) && LOG_COMPARE(&min_lsn, stable_lsn) < 0) + *stable_lsn = min_lsn; + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "Repmgr_stable_lsn: Returning stable_lsn[%lu][%lu]", + (u_long)stable_lsn->file, (u_long)stable_lsn->offset)); + return (0); +} + +/* + * PUBLIC: int __repmgr_send_sync_msg __P((ENV *, REPMGR_CONNECTION *, + * PUBLIC: u_int32_t, u_int8_t *, u_int32_t)); + */ +int +__repmgr_send_sync_msg(env, conn, type, buf, len) + ENV *env; + REPMGR_CONNECTION *conn; + u_int8_t *buf; + u_int32_t len, type; +{ + REPMGR_IOVECS iovecs; + __repmgr_msg_hdr_args msg_hdr; + u_int8_t hdr_buf[__REPMGR_MSG_HDR_SIZE]; + size_t unused; + + msg_hdr.type = REPMGR_OWN_MSG; + REPMGR_OWN_BUF_SIZE(msg_hdr) = len; + REPMGR_OWN_MSG_TYPE(msg_hdr) = type; + __repmgr_msg_hdr_marshal(env, &msg_hdr, hdr_buf); + + __repmgr_iovec_init(&iovecs); + __repmgr_add_buffer(&iovecs, hdr_buf, __REPMGR_MSG_HDR_SIZE); + if (len > 0) + __repmgr_add_buffer(&iovecs, buf, len); + + return (__repmgr_write_iovecs(env, conn, &iovecs, &unused)); +} + +/* + * Produce a membership list from the known info currently in memory. + * + * PUBLIC: int __repmgr_marshal_member_list __P((ENV *, u_int8_t **, size_t *)); + * + * Caller must hold mutex. + */ +int +__repmgr_marshal_member_list(env, bufp, lenp) + ENV *env; + u_int8_t **bufp; + size_t *lenp; +{ + DB_REP *db_rep; + REP *rep; + REPMGR_SITE *site; + __repmgr_membr_vers_args membr_vers; + __repmgr_site_info_args site_info; + u_int8_t *buf, *p; + size_t bufsize, len; + u_int i; + int ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + + /* Compute a (generous) upper bound on needed buffer size. */ + bufsize = __REPMGR_MEMBR_VERS_SIZE + + db_rep->site_cnt * (__REPMGR_SITE_INFO_SIZE + MAXHOSTNAMELEN + 1); + if ((ret = __os_malloc(env, bufsize, &buf)) != 0) + return (ret); + p = buf; + + membr_vers.version = db_rep->membership_version; + membr_vers.gen = rep->gen; + __repmgr_membr_vers_marshal(env, &membr_vers, p); + p += __REPMGR_MEMBR_VERS_SIZE; + + for (i = 0; i < db_rep->site_cnt; i++) { + site = SITE_FROM_EID(i); + if (site->membership == 0) + continue; + + site_info.host.data = site->net_addr.host; + site_info.host.size = (u_int32_t)strlen(site->net_addr.host) + 1; + site_info.port = site->net_addr.port; + site_info.flags = site->membership; + + ret = __repmgr_site_info_marshal(env, + &site_info, p, (size_t)(&buf[bufsize]-p), &len); + DB_ASSERT(env, ret == 0); + p += len; + } + len = (size_t)(p - buf); + + *bufp = buf; + *lenp = len; + DB_ASSERT(env, ret == 0); + return (0); +} + +/* + * Produce a membership list by reading the database. + */ +static int +read_gmdb(env, ip, bufp, lenp) + ENV *env; + DB_THREAD_INFO *ip; + u_int8_t **bufp; + size_t *lenp; +{ + DB_TXN *txn; + DB *dbp; + DBC *dbc; + DBT key_dbt, data_dbt; + __repmgr_membership_key_args key; + __repmgr_membership_data_args member_status; + __repmgr_member_metadata_args metadata; + __repmgr_membr_vers_args membr_vers; + __repmgr_site_info_args site_info; + u_int8_t data_buf[__REPMGR_MEMBERSHIP_DATA_SIZE]; + u_int8_t key_buf[MAX_MSG_BUF]; + u_int8_t metadata_buf[__REPMGR_MEMBER_METADATA_SIZE]; + char *host; + size_t bufsize, len; + u_int8_t *buf, *p; + u_int32_t gen; + int ret, t_ret; + + txn = NULL; + dbp = NULL; + dbc = NULL; + buf = NULL; + COMPQUIET(len, 0); + + if ((ret = __rep_get_datagen(env, &gen)) != 0) + return (ret); + if ((ret = __txn_begin(env, ip, NULL, &txn, DB_IGNORE_LEASE)) != 0) + goto err; + if ((ret = __rep_open_sysdb(env, ip, txn, REPMEMBERSHIP, 0, &dbp)) != 0) + goto err; + if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) + goto err; + + memset(&key_dbt, 0, sizeof(key_dbt)); + key_dbt.data = key_buf; + key_dbt.ulen = sizeof(key_buf); + F_SET(&key_dbt, DB_DBT_USERMEM); + memset(&data_dbt, 0, sizeof(data_dbt)); + data_dbt.data = metadata_buf; + data_dbt.ulen = sizeof(metadata_buf); + F_SET(&data_dbt, DB_DBT_USERMEM); + + /* Get metadata record, make sure key looks right. */ + if ((ret = __dbc_get(dbc, &key_dbt, &data_dbt, DB_NEXT)) != 0) + goto err; + ret = __repmgr_membership_key_unmarshal(env, + &key, key_buf, key_dbt.size, NULL); + DB_ASSERT(env, ret == 0); + DB_ASSERT(env, key.host.size == 0); + DB_ASSERT(env, key.port == 0); + ret = __repmgr_member_metadata_unmarshal(env, + &metadata, metadata_buf, data_dbt.size, NULL); + DB_ASSERT(env, ret == 0); + DB_ASSERT(env, metadata.format == REPMGR_GMDB_FMT_VERSION); + DB_ASSERT(env, metadata.version > 0); + + bufsize = 1000; /* Initial guess. */ + if ((ret = __os_malloc(env, bufsize, &buf)) != 0) + goto err; + membr_vers.version = metadata.version; + membr_vers.gen = gen; + __repmgr_membr_vers_marshal(env, &membr_vers, buf); + p = &buf[__REPMGR_MEMBR_VERS_SIZE]; + + data_dbt.data = data_buf; + data_dbt.ulen = sizeof(data_buf); + while ((ret = __dbc_get(dbc, &key_dbt, &data_dbt, DB_NEXT)) == 0) { + ret = __repmgr_membership_key_unmarshal(env, + &key, key_buf, key_dbt.size, NULL); + DB_ASSERT(env, ret == 0); + DB_ASSERT(env, key.host.size <= MAXHOSTNAMELEN + 1 && + key.host.size > 1); + host = (char*)key.host.data; + DB_ASSERT(env, host[key.host.size-1] == '\0'); + DB_ASSERT(env, key.port > 0); + + ret = __repmgr_membership_data_unmarshal(env, + &member_status, data_buf, data_dbt.size, NULL); + DB_ASSERT(env, ret == 0); + DB_ASSERT(env, member_status.flags != 0); + + site_info.host = key.host; + site_info.port = key.port; + site_info.flags = member_status.flags; + if ((ret = __repmgr_site_info_marshal(env, &site_info, + p, (size_t)(&buf[bufsize]-p), &len)) == ENOMEM) { + bufsize *= 2; + len = (size_t)(p - buf); + if ((ret = __os_realloc(env, bufsize, &buf)) != 0) + goto err; + p = &buf[len]; + ret = __repmgr_site_info_marshal(env, + &site_info, p, (size_t)(&buf[bufsize]-p), &len); + DB_ASSERT(env, ret == 0); + } + p += len; + } + len = (size_t)(p - buf); + if (ret == DB_NOTFOUND) + ret = 0; + +err: + if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) + ret = t_ret; + if (dbp != NULL && + (t_ret = __db_close(dbp, txn, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + if (txn != NULL && + (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) != 0 && ret == 0) + ret = t_ret; + if (ret == 0) { + *bufp = buf; + *lenp = len; + } else if (buf != NULL) + __os_free(env, buf); + return (ret); +} + +/* + * Refresh our sites array from the given membership list. + * + * PUBLIC: int __repmgr_refresh_membership __P((ENV *, + * PUBLIC: u_int8_t *, size_t)); + */ +int +__repmgr_refresh_membership(env, buf, len) + ENV *env; + u_int8_t *buf; + size_t len; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + __repmgr_membr_vers_args membr_vers; + __repmgr_site_info_args site_info; + char *host; + u_int8_t *p; + u_int16_t port; + u_int32_t i, n; + int eid, ret; + + db_rep = env->rep_handle; + + /* + * Membership list consists of membr_vers followed by a number of + * site_info structs. + */ + ret = __repmgr_membr_vers_unmarshal(env, &membr_vers, buf, len, &p); + DB_ASSERT(env, ret == 0); + + if (db_rep->finished) + return (0); + /* Ignore obsolete versions. */ + if (__repmgr_gmdb_version_cmp(env, + membr_vers.gen, membr_vers.version) <= 0) + return (0); + + LOCK_MUTEX(db_rep->mutex); + + db_rep->membership_version = membr_vers.version; + db_rep->member_version_gen = membr_vers.gen; + + for (i = 0; i < db_rep->site_cnt; i++) + F_CLR(SITE_FROM_EID(i), SITE_TOUCHED); + + for (n = 0; p < &buf[len]; ++n) { + ret = __repmgr_site_info_unmarshal(env, + &site_info, p, (size_t)(&buf[len] - p), &p); + DB_ASSERT(env, ret == 0); + + host = site_info.host.data; + DB_ASSERT(env, + (u_int8_t*)site_info.host.data + site_info.host.size <= p); + host[site_info.host.size-1] = '\0'; + port = site_info.port; + + if ((ret = __repmgr_set_membership(env, + host, port, site_info.flags)) != 0) + goto err; + + if ((ret = __repmgr_find_site(env, host, port, &eid)) != 0) + goto err; + F_SET(SITE_FROM_EID(eid), SITE_TOUCHED); + } + ret = __rep_set_nsites_int(env, n); + DB_ASSERT(env, ret == 0); + + /* Scan "touched" flags so as to notice sites that have been removed. */ + for (i = 0; i < db_rep->site_cnt; i++) { + site = SITE_FROM_EID(i); + if (F_ISSET(site, SITE_TOUCHED)) + continue; + host = site->net_addr.host; + port = site->net_addr.port; + if ((ret = __repmgr_set_membership(env, host, port, 0)) != 0) + goto err; + } + +err: + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} + +/* + * PUBLIC: int __repmgr_reload_gmdb __P((ENV *)); + */ +int +__repmgr_reload_gmdb(env) + ENV *env; +{ + DB_THREAD_INFO *ip; + u_int8_t *buf; + size_t len; + int ret; + + ENV_ENTER(env, ip); + if ((ret = read_gmdb(env, ip, &buf, &len)) == 0) { + env->rep_handle->have_gmdb = TRUE; + ret = __repmgr_refresh_membership(env, buf, len); + __os_free(env, buf); + } + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * Return 1, 0, or -1, as the given gen/version combination is >, =, or < our + * currently known version. + * + * PUBLIC: int __repmgr_gmdb_version_cmp __P((ENV *, u_int32_t, u_int32_t)); + */ +int +__repmgr_gmdb_version_cmp(env, gen, version) + ENV *env; + u_int32_t gen, version; +{ + DB_REP *db_rep; + u_int32_t g, v; + + db_rep = env->rep_handle; + g = db_rep->member_version_gen; + v = db_rep->membership_version; + + if (gen == g) + return (version == v ? 0 : + (version < v ? -1 : 1)); + return (gen < g ? -1 : 1); +} + +/* + * PUBLIC: int __repmgr_init_save __P((ENV *, DBT *)); + */ +int +__repmgr_init_save(env, dbt) + ENV *env; + DBT *dbt; +{ + DB_REP *db_rep; + u_int8_t *buf; + size_t len; + int ret; + + db_rep = env->rep_handle; + LOCK_MUTEX(db_rep->mutex); + if (db_rep->site_cnt == 0) { + dbt->data = NULL; + dbt->size = 0; + ret = 0; + } else if ((ret = __repmgr_marshal_member_list(env, &buf, &len)) == 0) { + dbt->data = buf; + dbt->size = (u_int32_t)len; + } + UNLOCK_MUTEX(db_rep->mutex); + + return (ret); +} + +/* + * PUBLIC: int __repmgr_init_restore __P((ENV *, DBT *)); + */ +int +__repmgr_init_restore(env, dbt) + ENV *env; + DBT *dbt; +{ + DB_REP *db_rep; + + db_rep = env->rep_handle; + db_rep->restored_list = dbt->data; + db_rep->restored_list_length = dbt->size; + return (0); +} + +/* + * Generates an internal request for a deferred operation, to be performed on a + * separate thread (conveniently, a message-processing thread). + * + * PUBLIC: int __repmgr_defer_op __P((ENV *, u_int32_t)); + * + * Caller should hold mutex. + */ +int +__repmgr_defer_op(env, op) + ENV *env; + u_int32_t op; +{ + REPMGR_MESSAGE *msg; + int ret; + + /* + * Overload REPMGR_MESSAGE to convey the type of operation being + * requested. For now "op" is all we need; plenty of room for expansion + * if needed in the future. + * + * Leave msg->v.gmdb_msg.conn NULL to show no conn to be cleaned up. + */ + if ((ret = __os_calloc(env, 1, sizeof(*msg), &msg)) != 0) + return (ret); + msg->msg_hdr.type = REPMGR_OWN_MSG; + REPMGR_OWN_MSG_TYPE(msg->msg_hdr) = op; + ret = __repmgr_queue_put(env, msg); + return (ret); +} + +/* + * PUBLIC: void __repmgr_fire_conn_err_event __P((ENV *, + * PUBLIC: REPMGR_CONNECTION *, int)); + */ +void +__repmgr_fire_conn_err_event(env, conn, err) + ENV *env; + REPMGR_CONNECTION *conn; + int err; +{ + DB_REP *db_rep; + DB_REPMGR_CONN_ERR info; + + db_rep = env->rep_handle; + if (conn->type == REP_CONNECTION && IS_VALID_EID(conn->eid)) { + __repmgr_print_conn_err(env, + &SITE_FROM_EID(conn->eid)->net_addr, err); + info.eid = conn->eid; + info.error = err; + DB_EVENT(env, DB_EVENT_REP_CONNECT_BROKEN, &info); + } +} + +/* + * PUBLIC: void __repmgr_print_conn_err __P((ENV *, repmgr_netaddr_t *, int)); + */ +void +__repmgr_print_conn_err(env, netaddr, err) + ENV *env; + repmgr_netaddr_t *netaddr; + int err; +{ + SITE_STRING_BUFFER site_loc_buf; + char msgbuf[200]; /* Arbitrary size. */ + + (void)__repmgr_format_addr_loc(netaddr, site_loc_buf); + /* TCP/IP sockets API convention: 0 indicates "end-of-file". */ + if (err == 0) + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "EOF on connection to %s", site_loc_buf)); + else + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "`%s' (%d) on connection to %s", + __os_strerror(err, msgbuf, sizeof(msgbuf)), + err, site_loc_buf)); +} + +/* + * Change role from master to client, but if a GMDB operation is in progress, + * wait for it to finish first. + * + * PUBLIC: int __repmgr_become_client __P((ENV *)); + */ +int +__repmgr_become_client(env) + ENV *env; +{ + DB_REP *db_rep; + int ret; + + db_rep = env->rep_handle; + LOCK_MUTEX(db_rep->mutex); + if ((ret = __repmgr_await_gmdbop(env)) == 0) + db_rep->client_intent = TRUE; + UNLOCK_MUTEX(db_rep->mutex); + return (ret == 0 ? __repmgr_repstart(env, DB_REP_CLIENT) : ret); +} + +/* + * Looks up a site from our local (in-process) list, or returns NULL if not + * found. + * + * PUBLIC: REPMGR_SITE *__repmgr_lookup_site __P((ENV *, const char *, u_int)); + */ +REPMGR_SITE * +__repmgr_lookup_site(env, host, port) + ENV *env; + const char *host; + u_int port; +{ + DB_REP *db_rep; + REPMGR_SITE *site; + u_int i; + + db_rep = env->rep_handle; + for (i = 0; i < db_rep->site_cnt; i++) { + site = &db_rep->sites[i]; + + if (strcmp(site->net_addr.host, host) == 0 && + site->net_addr.port == port) + return (site); + } + + return (NULL); +} + +/* + * Look up a site, or add it if it doesn't already exist. + * + * Caller must hold db_rep mutex and be within ENV_ENTER context, unless this is + * a pre-open call. + * + * PUBLIC: int __repmgr_find_site __P((ENV *, const char *, u_int, int *)); + */ +int +__repmgr_find_site(env, host, port, eidp) + ENV *env; + const char *host; + u_int port; + int *eidp; +{ + DB_REP *db_rep; + REP *rep; + REPMGR_SITE *site; + int eid, ret; + + db_rep = env->rep_handle; + ret = 0; + if (REP_ON(env)) { + rep = db_rep->region; + MUTEX_LOCK(env, rep->mtx_repmgr); + ret = get_eid(env, host, port, &eid); + MUTEX_UNLOCK(env, rep->mtx_repmgr); + } else { + if ((site = __repmgr_lookup_site(env, host, port)) == NULL && + (ret = __repmgr_new_site(env, &site, host, port)) != 0) + return (ret); + eid = EID_FROM_SITE(site); + } + if (ret == 0) + *eidp = eid; + return (ret); +} + +/* + * Get the EID of the named remote site, even if it means creating a new entry + * in our table if it doesn't already exist. + * + * Caller must hold both db_rep mutex and mtx_repmgr. + */ +static int +get_eid(env, host, port, eidp) + ENV *env; + const char *host; + u_int port; + int *eidp; +{ + DB_REP *db_rep; + REP *rep; + REPMGR_SITE *site; + int eid, ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + + if ((ret = __repmgr_copy_in_added_sites(env)) != 0) + return (ret); + if ((site = __repmgr_lookup_site(env, host, port)) == NULL) { + /* + * Store both locally and in shared region. + */ + if ((ret = __repmgr_new_site(env, &site, host, port)) != 0) + return (ret); + + eid = EID_FROM_SITE(site); + DB_ASSERT(env, (u_int)eid == db_rep->site_cnt - 1); + if ((ret = __repmgr_share_netaddrs(env, + rep, (u_int)eid, db_rep->site_cnt)) == 0) { + /* Show that a change was made. */ + db_rep->siteinfo_seq = ++rep->siteinfo_seq; + } else { + /* + * Rescind the local slot we just added, so that we at + * least keep the two lists in sync. + */ + db_rep->site_cnt--; + __repmgr_cleanup_netaddr(env, &site->net_addr); + } + } else + eid = EID_FROM_SITE(site); + if (ret == 0) + *eidp = eid; + return (ret); +} + +/* + * Sets the named remote site's group membership status to the given value, + * creating it first if it doesn't already exist. Adjusts connections + * accordingly. + * + * PUBLIC: int __repmgr_set_membership __P((ENV *, + * PUBLIC: const char *, u_int, u_int32_t)); + * + * Caller must host db_rep mutex, and be in ENV_ENTER context. + */ +int +__repmgr_set_membership(env, host, port, status) + ENV *env; + const char *host; + u_int port; + u_int32_t status; +{ + DB_REP *db_rep; + REP *rep; + REGINFO *infop; + REPMGR_SITE *site; + SITEINFO *sites; + u_int32_t orig; + int eid, ret; + + db_rep = env->rep_handle; + rep = db_rep->region; + infop = env->reginfo; + + COMPQUIET(orig, 0); + COMPQUIET(site, NULL); + DB_ASSERT(env, REP_ON(env)); + + MUTEX_LOCK(env, rep->mtx_repmgr); + if ((ret = get_eid(env, host, port, &eid)) == 0) { + site = SITE_FROM_EID(eid); + orig = site->membership; + sites = R_ADDR(infop, rep->siteinfo_off); + + RPRINT(env, (env, DB_VERB_REPMGR_MISC, + "set membership for %s:%lu %lu (was %lu)", + host, (u_long)port, (u_long)status, (u_long)orig )); + if (status != sites[eid].status) { + /* + * Show that a change is occurring. + * + * The call to get_eid() might have also bumped the + * sequence number, and since this is all happening + * within a single critical section it would be possible + * to avoid "wasting" a sequence number. But it's + * hardly worth the trouble and mental complexity: the + * sequence number counts changes that occur within an + * env region lifetime, so there should be plenty. + * We'll run out of membership DB version numbers long + * before this becomes a problem. + */ + db_rep->siteinfo_seq = ++rep->siteinfo_seq; + } + + /* Set both private and shared copies of the info. */ + site->membership = status; + sites[eid].status = status; + } + MUTEX_UNLOCK(env, rep->mtx_repmgr); + + /* + * If our notion of the site's membership changed, we may need to create + * or kill a connection. + */ + if (ret == 0 && !db_rep->finished && SELECTOR_RUNNING(db_rep)) { + + if (eid == db_rep->self_eid && status != SITE_PRESENT) + ret = DB_DELETED; + else if (orig != SITE_PRESENT && status == SITE_PRESENT && + site->state == SITE_IDLE) { + /* + * Here we might have just joined a group, or we might + * be an existing site and we've just learned of another + * site joining the group. In the former case, we + * certainly want to connect right away; in the later + * case it might be better to wait, because the new site + * probably isn't quite ready to accept our connection. + * But deciding which case we're in here would be messy, + * so for now we just keep it simple and always try + * connecting immediately. The resulting connection + * failure shouldn't hurt anything, because we'll just + * naturally try again later. + */ + ret = __repmgr_schedule_connection_attempt(env, + (u_int)eid, TRUE); + if (eid != db_rep->self_eid) + DB_EVENT(env, DB_EVENT_REP_SITE_ADDED, &eid); + } else if (orig != 0 && status == 0) + DB_EVENT(env, DB_EVENT_REP_SITE_REMOVED, &eid); + + /* + * Callers are responsible for adjusting nsites, even though in + * a way it would make sense to do it here. It's awkward to do + * it here at start-up/join time, when we load up starting from + * an empty array. Then we would get rep_set_nsites() + * repeatedly, and when leases were in use that would thrash the + * lease table adjustment. + */ + } + return (ret); +} + +/* + * PUBLIC: int __repmgr_bcast_parm_refresh __P((ENV *)); + */ +int +__repmgr_bcast_parm_refresh(env) + ENV *env; +{ + DB_REP *db_rep; + REP *rep; + __repmgr_parm_refresh_args parms; + u_int8_t buf[__REPMGR_PARM_REFRESH_SIZE]; + int ret; + + DB_ASSERT(env, REP_ON(env)); + db_rep = env->rep_handle; + rep = db_rep->region; + LOCK_MUTEX(db_rep->mutex); + parms.ack_policy = (u_int32_t)rep->perm_policy; + if (rep->priority == 0) + parms.flags = 0; + else + parms.flags = SITE_ELECTABLE; + __repmgr_parm_refresh_marshal(env, &parms, buf); + ret = __repmgr_bcast_own_msg(env, + REPMGR_PARM_REFRESH, buf, __REPMGR_PARM_REFRESH_SIZE); + UNLOCK_MUTEX(db_rep->mutex); + return (ret); +} + +/* + * PUBLIC: int __repmgr_chg_prio __P((ENV *, u_int32_t, u_int32_t)); + */ +int +__repmgr_chg_prio(env, prev, cur) + ENV *env; + u_int32_t prev, cur; +{ + if ((prev == 0 && cur != 0) || + (prev != 0 && cur == 0)) + return (__repmgr_bcast_parm_refresh(env)); + return (0); +} + +/* + * PUBLIC: int __repmgr_bcast_own_msg __P((ENV *, + * PUBLIC: u_int32_t, u_int8_t *, size_t)); + * + * Caller must hold mutex. + */ +int +__repmgr_bcast_own_msg(env, type, buf, len) + ENV *env; + u_int32_t type; + u_int8_t *buf; + size_t len; +{ + DB_REP *db_rep; + REPMGR_CONNECTION *conn; + REPMGR_SITE *site; + int ret; + u_int i; + + db_rep = env->rep_handle; + if (!SELECTOR_RUNNING(db_rep)) + return (0); + FOR_EACH_REMOTE_SITE_INDEX(i) { + site = SITE_FROM_EID(i); + if (site->state != SITE_CONNECTED) + continue; + conn = site->ref.conn; + if (conn->state != CONN_READY) + continue; + if ((ret = __repmgr_send_own_msg(env, + conn, type, buf, (u_int32_t)len)) != 0 && + (ret = __repmgr_bust_connection(env, conn)) != 0) + return (ret); + } + return (0); +} diff --git a/src/repmgr/repmgr_windows.c b/src/repmgr/repmgr_windows.c new file mode 100644 index 00000000..625ba6de --- /dev/null +++ b/src/repmgr/repmgr_windows.c @@ -0,0 +1,835 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* Convert time-out from microseconds to milliseconds, rounding up. */ +#define DB_TIMEOUT_TO_WINDOWS_TIMEOUT(t) (((t) + (US_PER_MS - 1)) / US_PER_MS) + +typedef struct __cond_waiter { + HANDLE event; + PREDICATE pred; + void *ctx; + int next_free; +} COND_WAITER; + +#define WAITER_SLOT_IN_USE(w) ((w)->pred != NULL) + +/* + * Array slots [0:next_avail-1] are initialized, and either in use or on the + * free list. Slots beyond that are virgin territory, whose memory contents + * could be garbage. In particular, note that slots [0:next_avail-1] have a + * Win32 Event Object created for them, which have to be freed when cleaning up + * this data structure. + * + * "first_free" points to a list of not-in-use slots threaded through the first + * section of the array. + */ +struct __cond_waiters_table { + struct __cond_waiter *array; + int size; + int next_avail; + int first_free; +}; + +/* + * Aggregated control info needed for preparing for WSAWaitForMultipleEvents() + * call. + */ +struct io_info { + REPMGR_CONNECTION **connections; + WSAEVENT *events; + DWORD nevents; +}; + +static int allocate_wait_slot __P((ENV *, int *, COND_WAITERS_TABLE *)); +static void free_wait_slot __P((ENV *, int, COND_WAITERS_TABLE *)); +static int handle_completion __P((ENV *, REPMGR_CONNECTION *)); +static int prepare_io __P((ENV *, REPMGR_CONNECTION *, void *)); + +int +__repmgr_thread_start(env, runnable) + ENV *env; + REPMGR_RUNNABLE *runnable; +{ + HANDLE event, thread_id; + + runnable->finished = FALSE; + runnable->quit_requested = FALSE; + runnable->env = env; + + if ((event = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL) + return (GetLastError()); + thread_id = CreateThread(NULL, 0, + (LPTHREAD_START_ROUTINE)runnable->run, runnable, 0, NULL); + if (thread_id == NULL) { + CloseHandle(event); + return (GetLastError()); + } + runnable->thread_id = thread_id; + runnable->quit_event = event; + return (0); +} + +int +__repmgr_thread_join(thread) + REPMGR_RUNNABLE *thread; +{ + int ret; + + ret = 0; + if (WaitForSingleObject(thread->thread_id, INFINITE) != WAIT_OBJECT_0) + ret = GetLastError(); + if (!CloseHandle(thread->thread_id) && ret == 0) + ret = GetLastError(); + if (!CloseHandle(thread->quit_event) && ret == 0) + ret = GetLastError(); + + return (ret); +} + +int +__repmgr_set_nonblocking(s) + SOCKET s; +{ + int ret; + u_long onoff; + + onoff = 1; /* any non-zero value */ + if ((ret = ioctlsocket(s, FIONBIO, &onoff)) == SOCKET_ERROR) + return (WSAGetLastError()); + return (0); +} + +int +__repmgr_set_nonblock_conn(conn) + REPMGR_CONNECTION *conn; +{ + int ret; + + if ((ret = __repmgr_set_nonblocking(conn->fd)) != 0) + return (ret); + + if ((conn->event_object = WSACreateEvent()) == WSA_INVALID_EVENT) { + ret = net_errno; + return (ret); + } + return (0); +} + +/* + * !!! + * Caller must hold the repmgr->mutex, if this thread synchronization is to work + * properly. + */ +int +__repmgr_wake_waiters(env, w) + ENV *env; + waiter_t *w; +{ + DB_REP *db_rep; + COND_WAITERS_TABLE *waiters; + COND_WAITER *slot; + int i, ret; + + ret = 0; + db_rep = env->rep_handle; + waiters = *w; + for (i = 0; i < waiters->next_avail; i++) { + slot = &waiters->array[i]; + if (!WAITER_SLOT_IN_USE(slot)) + continue; + if ((*slot->pred)(env, slot->ctx) || db_rep->finished) + if (!SetEvent(slot->event) && ret == 0) + ret = GetLastError(); + } + return (ret); +} + +/* + * !!! + * Caller must hold mutex. + */ +int +__repmgr_await_cond(env, pred, ctx, timeout, waiters_p) + ENV *env; + PREDICATE pred; + void *ctx; + db_timeout_t timeout; + waiter_t *waiters_p; +{ + COND_WAITERS_TABLE *waiters; + COND_WAITER *waiter; + DB_REP *db_rep; + REP *rep; + DWORD ret, win_timeout; + int i; + + db_rep = env->rep_handle; + rep = db_rep->region; + waiters = *waiters_p; + + if ((ret = allocate_wait_slot(env, &i, waiters)) != 0) + goto err; + waiter = &waiters->array[i]; + + win_timeout = timeout > 0 ? + DB_TIMEOUT_TO_WINDOWS_TIMEOUT(timeout) : INFINITE; + waiter->pred = pred; + waiter->ctx = ctx; + if ((ret = SignalObjectAndWait(*db_rep->mutex, + waiter->event, win_timeout, FALSE)) == WAIT_FAILED) { + ret = GetLastError(); + } else if (ret == WAIT_TIMEOUT) + ret = DB_TIMEOUT; + else + DB_ASSERT(env, ret == WAIT_OBJECT_0); + + LOCK_MUTEX(db_rep->mutex); + free_wait_slot(env, i, waiters); + if (db_rep->finished) + ret = DB_REP_UNAVAIL; + +err: + return (ret); +} + +/* + * !!! + * Caller must hold the mutex. + */ +static int +allocate_wait_slot(env, resultp, table) + ENV *env; + int *resultp; + COND_WAITERS_TABLE *table; +{ + COND_WAITER *w; + HANDLE event; + int i, ret; + + if (table->first_free == -1) { + if (table->next_avail >= table->size) { + /* + * Grow the array. + */ + table->size *= 2; + w = table->array; + if ((ret = __os_realloc(env, table->size * sizeof(*w), + &w)) != 0) + return (ret); + table->array = w; + } + if ((event = CreateEvent(NULL, + FALSE, FALSE, NULL)) == NULL) { + /* No need to rescind the memory reallocation. */ + return (GetLastError()); + } + + /* + * Here if, one way or another, we're good to go for using the + * next slot (for the first time). + */ + i = table->next_avail++; + w = &table->array[i]; + w->event = event; + } else { + i = table->first_free; + w = &table->array[i]; + table->first_free = w->next_free; + } + *resultp = i; + return (0); +} + +static void +free_wait_slot(env, slot_index, table) + ENV *env; + int slot_index; + COND_WAITERS_TABLE *table; +{ + DB_REP *db_rep; + COND_WAITER *slot; + + db_rep = env->rep_handle; + slot = &table->array[slot_index]; + + slot->pred = NULL; /* show it's not in use */ + slot->next_free = table->first_free; + table->first_free = slot_index; +} + +int +__repmgr_await_gmdbop(env) + ENV *env; +{ + DB_REP *db_rep; + int ret; + + db_rep = env->rep_handle; + while (db_rep->gmdb_busy) { + if (!ResetEvent(db_rep->gmdb_idle)) + return (GetLastError()); + ret = SignalObjectAndWait(*db_rep->mutex, + db_rep->gmdb_idle, INFINITE, FALSE); + LOCK_MUTEX(db_rep->mutex); + if (ret == WAIT_FAILED) + return (GetLastError()); + DB_ASSERT(env, ret == WAIT_OBJECT_0); + } + return (0); +} + +/* (See requirements described in repmgr_posix.c.) */ +int +__repmgr_await_drain(env, conn, timeout) + ENV *env; + REPMGR_CONNECTION *conn; + db_timeout_t timeout; +{ + DB_REP *db_rep; + db_timespec deadline, delta, now; + db_timeout_t t; + DWORD duration, ret; + int round_up; + + db_rep = env->rep_handle; + + __os_gettime(env, &deadline, 1); + TIMESPEC_ADD_DB_TIMEOUT(&deadline, timeout); + + while (conn->out_queue_length >= OUT_QUEUE_LIMIT) { + if (!ResetEvent(conn->drained)) + return (GetLastError()); + + /* How long until the deadline? */ + __os_gettime(env, &now, 1); + if (timespeccmp(&now, &deadline, >=)) { + conn->state = CONN_CONGESTED; + return (0); + } + delta = deadline; + timespecsub(&delta, &now); + round_up = TRUE; + DB_TIMESPEC_TO_TIMEOUT(t, &delta, round_up); + duration = DB_TIMEOUT_TO_WINDOWS_TIMEOUT(t); + + ret = SignalObjectAndWait(*db_rep->mutex, + conn->drained, duration, FALSE); + LOCK_MUTEX(db_rep->mutex); + if (ret == WAIT_FAILED) + return (GetLastError()); + else if (ret == WAIT_TIMEOUT) { + conn->state = CONN_CONGESTED; + return (0); + } else + DB_ASSERT(env, ret == WAIT_OBJECT_0); + + if (db_rep->finished) + return (0); + if (conn->state == CONN_DEFUNCT) + return (DB_REP_UNAVAIL); + } + return (0); +} + +/* + * Creates a manual reset event, which is usually our best choice when we may + * have multiple threads waiting on a single event. + */ +int +__repmgr_alloc_cond(c) + cond_var_t *c; +{ + HANDLE event; + + if ((event = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL) + return (GetLastError()); + *c = event; + return (0); +} + +int +__repmgr_free_cond(c) + cond_var_t *c; +{ + if (CloseHandle(*c)) + return (0); + return (GetLastError()); +} + +void +__repmgr_env_create_pf(db_rep) + DB_REP *db_rep; +{ +} + +int +__repmgr_create_mutex_pf(mutex) + mgr_mutex_t *mutex; +{ + if ((*mutex = CreateMutex(NULL, FALSE, NULL)) == NULL) + return (GetLastError()); + return (0); +} + +int +__repmgr_destroy_mutex_pf(mutex) + mgr_mutex_t *mutex; +{ + return (CloseHandle(*mutex) ? 0 : GetLastError()); +} + +int +__repmgr_init(env) + ENV *env; +{ + DB_REP *db_rep; + WSADATA wsaData; + int ret; + + db_rep = env->rep_handle; + + if ((ret = WSAStartup(MAKEWORD(2, 2), &wsaData)) != 0) { + __db_err(env, ret, DB_STR("3589", + "unable to initialize Windows networking")); + return (ret); + } + + if ((db_rep->signaler = CreateEvent(NULL, /* security attr */ + FALSE, /* (not) of the manual reset variety */ + FALSE, /* (not) initially signaled */ + NULL)) == NULL) /* name */ + goto geterr; + + if ((db_rep->msg_avail = CreateEvent(NULL, TRUE, FALSE, NULL)) + == NULL) + goto geterr; + + if ((db_rep->check_election = CreateEvent(NULL, TRUE, FALSE, NULL)) + == NULL) + goto geterr; + + if ((db_rep->gmdb_idle = CreateEvent(NULL, TRUE, FALSE, NULL)) + == NULL) + goto geterr; + + if ((ret = __repmgr_init_waiters(env, &db_rep->ack_waiters)) != 0) + goto err; + return (0); + +geterr: + ret = GetLastError(); +err: + if (db_rep->gmdb_idle != NULL) + CloseHandle(db_rep->gmdb_idle); + if (db_rep->check_election != NULL) + CloseHandle(db_rep->check_election); + if (db_rep->msg_avail != NULL) + CloseHandle(db_rep->msg_avail); + if (db_rep->signaler != NULL) + CloseHandle(db_rep->signaler); + db_rep->msg_avail = + db_rep->check_election = + db_rep->gmdb_idle = + db_rep->signaler = NULL; + (void)WSACleanup(); + return (ret); +} + +int +__repmgr_deinit(env) + ENV *env; +{ + DB_REP *db_rep; + int ret, t_ret; + + db_rep = env->rep_handle; + if (!(REPMGR_INITED(db_rep))) + return (0); + + ret = 0; + if (WSACleanup() == SOCKET_ERROR) + ret = WSAGetLastError(); + + if ((t_ret = __repmgr_destroy_waiters(env, &db_rep->ack_waiters)) + != 0 && ret == 0) + ret = t_ret; + + if (!CloseHandle(db_rep->gmdb_idle) && ret == 0) + ret = GetLastError(); + + if (!CloseHandle(db_rep->check_election) && ret == 0) + ret = GetLastError(); + + if (!CloseHandle(db_rep->msg_avail) && ret == 0) + ret = GetLastError(); + + if (!CloseHandle(db_rep->signaler) && ret == 0) + ret = GetLastError(); + db_rep->msg_avail = + db_rep->check_election = + db_rep->gmdb_idle = + db_rep->signaler = NULL; + + return (ret); +} + +int +__repmgr_init_waiters(env, waiters) + ENV *env; + waiter_t *waiters; +{ +#define INITIAL_ALLOCATION 5 /* arbitrary size */ + COND_WAITERS_TABLE *table; + int ret; + + table = NULL; + + if ((ret = + __os_calloc(env, 1, sizeof(COND_WAITERS_TABLE), &table)) != 0) + return (ret); + + if ((ret = __os_calloc(env, INITIAL_ALLOCATION, sizeof(COND_WAITER), + &table->array)) != 0) { + __os_free(env, table); + return (ret); + } + + table->size = INITIAL_ALLOCATION; + table->first_free = -1; + table->next_avail = 0; + + /* There's a restaurant joke in there somewhere. */ + *waiters = table; + return (0); +} + +int +__repmgr_destroy_waiters(env, waitersp) + ENV *env; + waiter_t *waitersp; +{ + waiter_t waiters; + int i, ret; + + waiters = *waitersp; + ret = 0; + for (i = 0; i < waiters->next_avail; i++) { + if (!CloseHandle(waiters->array[i].event) && ret == 0) + ret = GetLastError(); + } + __os_free(env, waiters->array); + __os_free(env, waiters); + return (ret); +} + +int +__repmgr_lock_mutex(mutex) + mgr_mutex_t *mutex; +{ + if (WaitForSingleObject(*mutex, INFINITE) == WAIT_OBJECT_0) + return (0); + return (GetLastError()); +} + +int +__repmgr_unlock_mutex(mutex) + mgr_mutex_t *mutex; +{ + if (ReleaseMutex(*mutex)) + return (0); + return (GetLastError()); +} + +int +__repmgr_signal(v) + cond_var_t *v; +{ + return (SetEvent(*v) ? 0 : GetLastError()); +} + +int +__repmgr_wake_msngers(env, n) + ENV *env; + u_int n; +{ + DB_REP *db_rep; + u_int i; + + db_rep = env->rep_handle; + + /* Ask all threads beyond index 'n' to shut down. */ + for (i = n; i< db_rep->nthreads; i++) + if (!SetEvent(db_rep->messengers[i]->quit_event)) + return (GetLastError()); + return (0); +} + +int +__repmgr_wake_main_thread(env) + ENV *env; +{ + if (!SetEvent(env->rep_handle->signaler)) + return (GetLastError()); + return (0); +} + +int +__repmgr_writev(fd, iovec, buf_count, byte_count_p) + socket_t fd; + db_iovec_t *iovec; + int buf_count; + size_t *byte_count_p; +{ + DWORD bytes; + + if (WSASend(fd, iovec, + (DWORD)buf_count, &bytes, 0, NULL, NULL) == SOCKET_ERROR) + return (net_errno); + + *byte_count_p = (size_t)bytes; + return (0); +} + +int +__repmgr_readv(fd, iovec, buf_count, xfr_count_p) + socket_t fd; + db_iovec_t *iovec; + int buf_count; + size_t *xfr_count_p; +{ + DWORD bytes, flags; + + flags = 0; + if (WSARecv(fd, iovec, + (DWORD)buf_count, &bytes, &flags, NULL, NULL) == SOCKET_ERROR) + return (net_errno); + + *xfr_count_p = (size_t)bytes; + return (0); +} + +int +__repmgr_select_loop(env) + ENV *env; +{ + DB_REP *db_rep; + DWORD ret; + DWORD select_timeout; + REPMGR_CONNECTION *connections[WSA_MAXIMUM_WAIT_EVENTS]; + WSAEVENT events[WSA_MAXIMUM_WAIT_EVENTS]; + db_timespec timeout; + WSAEVENT listen_event; + WSANETWORKEVENTS net_events; + struct io_info io_info; + int i; + + db_rep = env->rep_handle; + io_info.connections = connections; + io_info.events = events; + + if ((listen_event = WSACreateEvent()) == WSA_INVALID_EVENT) { + __db_err(env, net_errno, DB_STR("3590", + "can't create event for listen socket")); + return (net_errno); + } + if (!IS_SUBORDINATE(db_rep) && + WSAEventSelect(db_rep->listen_fd, listen_event, FD_ACCEPT) == + SOCKET_ERROR) { + ret = net_errno; + __db_err(env, ret, DB_STR("3591", + "can't enable event for listener")); + (void)WSACloseEvent(listen_event); + goto out; + } + + LOCK_MUTEX(db_rep->mutex); + if ((ret = __repmgr_first_try_connections(env)) != 0) + goto unlock; + for (;;) { + /* Start with the two events that we always wait for. */ +#define SIGNALER_INDEX 0 +#define LISTENER_INDEX 1 + events[SIGNALER_INDEX] = db_rep->signaler; + if (IS_SUBORDINATE(db_rep)) + io_info.nevents = 1; + else { + events[LISTENER_INDEX] = listen_event; + io_info.nevents = 2; + } + + if ((ret = __repmgr_each_connection(env, + prepare_io, &io_info, TRUE)) != 0) + goto unlock; + + if (__repmgr_compute_timeout(env, &timeout)) + select_timeout = + (DWORD)(timeout.tv_sec * MS_PER_SEC + + timeout.tv_nsec / NS_PER_MS); + else { + /* No time-based events to wake us up. */ + select_timeout = WSA_INFINITE; + } + + UNLOCK_MUTEX(db_rep->mutex); + ret = WSAWaitForMultipleEvents( + io_info.nevents, events, FALSE, select_timeout, FALSE); + if (db_rep->finished) { + ret = 0; + goto out; + } + LOCK_MUTEX(db_rep->mutex); + + /* + * !!! + * Note that `ret' remains set as the return code from + * WSAWaitForMultipleEvents, above. + */ + if (ret >= WSA_WAIT_EVENT_0 && + ret < WSA_WAIT_EVENT_0 + io_info.nevents) { + if ((i = ret - WSA_WAIT_EVENT_0) == SIGNALER_INDEX) { + /* Another thread woke us. */ + } else if (!IS_SUBORDINATE(db_rep) && + i == LISTENER_INDEX) { + if ((ret = WSAEnumNetworkEvents( + db_rep->listen_fd, listen_event, + &net_events)) == SOCKET_ERROR) { + ret = net_errno; + goto unlock; + } + DB_ASSERT(env, + net_events.lNetworkEvents & FD_ACCEPT); + if ((ret = net_events.iErrorCode[FD_ACCEPT_BIT]) + != 0) + goto unlock; + if ((ret = __repmgr_accept(env)) != 0) + goto unlock; + } else { + if (connections[i]->state != CONN_DEFUNCT && + (ret = handle_completion(env, + connections[i])) != 0) + goto unlock; + } + } else if (ret == WSA_WAIT_TIMEOUT) { + if ((ret = __repmgr_check_timeouts(env)) != 0) + goto unlock; + } else if (ret == WSA_WAIT_FAILED) { + ret = net_errno; + goto unlock; + } + } + +unlock: + UNLOCK_MUTEX(db_rep->mutex); +out: + if (!CloseHandle(listen_event) && ret == 0) + ret = GetLastError(); + return (ret); +} + +static int +prepare_io(env, conn, info_) + ENV *env; + REPMGR_CONNECTION *conn; + void *info_; +{ + struct io_info *info; + long desired_events; + int ret; + + if (conn->state == CONN_DEFUNCT) + return (__repmgr_cleanup_defunct(env, conn)); + + /* + * Note that even if we're suffering flow control, we + * nevertheless still read if we haven't even yet gotten + * a handshake. Why? (1) Handshakes are important; and + * (2) they don't hurt anything flow-control-wise. + */ + info = info_; + + /* + * If we ever implemented flow control, we would have some conditions to + * examine here. But as it is, we always are willing to accept I/O on + * every connection. + * + * We can only handle as many connections as the number of events the + * WSAWaitForMultipleEvents function allows (minus 2, for our overhead: + * the listener and the signaler). + */ + DB_ASSERT(env, info->nevents < WSA_MAXIMUM_WAIT_EVENTS); + info->events[info->nevents] = conn->event_object; + info->connections[info->nevents++] = conn; + + desired_events = FD_READ | FD_CLOSE; + if (!STAILQ_EMPTY(&conn->outbound_queue)) + desired_events |= FD_WRITE; + if (WSAEventSelect(conn->fd, + conn->event_object, desired_events) == SOCKET_ERROR) { + ret = net_errno; + __db_err(env, ret, DB_STR_A("3592", + "can't set event bits 0x%lx", "%lx"), desired_events); + } else + ret = 0; + + return (ret); +} + +static int +handle_completion(env, conn) + ENV *env; + REPMGR_CONNECTION *conn; +{ + int error, ret; + WSANETWORKEVENTS events; + + if ((ret = WSAEnumNetworkEvents(conn->fd, conn->event_object, &events)) + == SOCKET_ERROR) { + error = net_errno; + __db_err(env, error, DB_STR("3593", "EnumNetworkEvents")); + goto report; + } + + /* Check both writing and reading. */ + if (events.lNetworkEvents & FD_CLOSE) { + error = events.iErrorCode[FD_CLOSE_BIT]; + goto report; + } + + if (events.lNetworkEvents & FD_WRITE) { + if (events.iErrorCode[FD_WRITE_BIT] != 0) { + error = events.iErrorCode[FD_WRITE_BIT]; + goto report; + } else if ((ret = + __repmgr_write_some(env, conn)) != 0) + goto err; + } + + if (events.lNetworkEvents & FD_READ) { + if (events.iErrorCode[FD_READ_BIT] != 0) { + error = events.iErrorCode[FD_READ_BIT]; + goto report; + } else if ((ret = + __repmgr_read_from_site(env, conn)) != 0) + goto err; + } + + if (0) { +report: + __repmgr_fire_conn_err_event(env, conn, error); + STAT(env->rep_handle->region->mstat.st_connection_drop++); + ret = DB_REP_UNAVAIL; + } +err: + if (ret == DB_REP_UNAVAIL) + ret = __repmgr_bust_connection(env, conn); + return (ret); +} diff --git a/src/sequence/seq_stat.c b/src/sequence/seq_stat.c new file mode 100644 index 00000000..a26b15a4 --- /dev/null +++ b/src/sequence/seq_stat.c @@ -0,0 +1,275 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" +#ifdef HAVE_64BIT_TYPES + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc_auto/sequence_ext.h" + +#ifdef HAVE_STATISTICS +static int __seq_print_all __P((DB_SEQUENCE *, u_int32_t)); +static int __seq_print_stats __P((DB_SEQUENCE *, u_int32_t)); + +/* + * __seq_stat -- + * Get statistics from the sequence. + * + * PUBLIC: int __seq_stat __P((DB_SEQUENCE *, DB_SEQUENCE_STAT **, u_int32_t)); + */ +int +__seq_stat(seq, spp, flags) + DB_SEQUENCE *seq; + DB_SEQUENCE_STAT **spp; + u_int32_t flags; +{ + DB *dbp; + DBT data; + DB_SEQUENCE_STAT *sp; + DB_SEQ_RECORD record; + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + dbp = seq->seq_dbp; + env = dbp->env; + + SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->stat"); + + switch (flags) { + case DB_STAT_CLEAR: + case DB_STAT_ALL: + case 0: + break; + default: + return (__db_ferr(env, "DB_SEQUENCE->stat", 0)); + } + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) { + handle_check = 0; + goto err; + } + + /* Allocate and clear the structure. */ + if ((ret = __os_umalloc(env, sizeof(*sp), &sp)) != 0) + goto err; + memset(sp, 0, sizeof(*sp)); + + if (seq->mtx_seq != MUTEX_INVALID) { + __mutex_set_wait_info( + env, seq->mtx_seq, &sp->st_wait, &sp->st_nowait); + + if (LF_ISSET(DB_STAT_CLEAR)) + __mutex_clear(env, seq->mtx_seq); + } + memset(&data, 0, sizeof(data)); + data.data = &record; + data.ulen = sizeof(record); + data.flags = DB_DBT_USERMEM; +retry: if ((ret = __db_get(dbp, ip, NULL, &seq->seq_key, &data, 0)) != 0) { + if (ret == DB_BUFFER_SMALL && + data.size > sizeof(seq->seq_record)) { + if ((ret = __os_malloc(env, + data.size, &data.data)) != 0) + goto err; + data.ulen = data.size; + goto retry; + } + goto err; + } + + if (data.data != &record) + memcpy(&record, data.data, sizeof(record)); + sp->st_current = record.seq_value; + sp->st_value = seq->seq_record.seq_value; + sp->st_last_value = seq->seq_last_value; + sp->st_min = seq->seq_record.seq_min; + sp->st_max = seq->seq_record.seq_max; + sp->st_cache_size = seq->seq_cache_size; + sp->st_flags = seq->seq_record.flags; + + *spp = sp; + if (data.data != &record) + __os_free(env, data.data); + + /* Release replication block. */ +err: if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __seq_stat_print -- + * Print statistics from the sequence. + * + * PUBLIC: int __seq_stat_print __P((DB_SEQUENCE *, u_int32_t)); + */ +int +__seq_stat_print(seq, flags) + DB_SEQUENCE *seq; + u_int32_t flags; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + dbp = seq->seq_dbp; + env = dbp->env; + + SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->stat_print"); + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) { + handle_check = 0; + goto err; + } + + if ((ret = __seq_print_stats(seq, flags)) != 0) + goto err; + + if (LF_ISSET(DB_STAT_ALL) && + (ret = __seq_print_all(seq, flags)) != 0) + goto err; + + /* Release replication block. */ +err: if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + return (ret); + +} + +static const FN __db_seq_flags_fn[] = { + { DB_SEQ_DEC, "decrement" }, + { DB_SEQ_INC, "increment" }, + { DB_SEQ_RANGE_SET, "range set (internal)" }, + { DB_SEQ_WRAP, "wraparound at end" }, + { 0, NULL } +}; + +/* + * __db_get_seq_flags_fn -- + * Return the __db_seq_flags_fn array. + * + * PUBLIC: const FN * __db_get_seq_flags_fn __P((void)); + */ +const FN * +__db_get_seq_flags_fn() +{ + return (__db_seq_flags_fn); +} + +/* + * __seq_print_stats -- + * Display sequence stat structure. + */ +static int +__seq_print_stats(seq, flags) + DB_SEQUENCE *seq; + u_int32_t flags; +{ + DB_SEQUENCE_STAT *sp; + ENV *env; + int ret; + + env = seq->seq_dbp->env; + + if ((ret = __seq_stat(seq, &sp, flags)) != 0) + return (ret); + __db_dl_pct(env, "The number of sequence locks that required waiting", + (u_long)sp->st_wait, + DB_PCT(sp->st_wait, sp->st_wait + sp->st_nowait), NULL); + STAT_FMT("The current sequence value", + INT64_FMT, db_seq_t, sp->st_current); + STAT_FMT("The cached sequence value", + INT64_FMT, db_seq_t, sp->st_value); + STAT_FMT("The last cached sequence value", + INT64_FMT, db_seq_t, sp->st_last_value); + STAT_FMT("The minimum sequence value", + INT64_FMT, db_seq_t, sp->st_min); + STAT_FMT("The maximum sequence value", + INT64_FMT, db_seq_t, sp->st_max); + STAT_ULONG("The cache size", sp->st_cache_size); + __db_prflags(env, NULL, + sp->st_flags, __db_seq_flags_fn, NULL, "\tSequence flags"); + __os_ufree(seq->seq_dbp->env, sp); + return (0); +} + +/* + * __seq_print_all -- + * Display sequence debugging information - none for now. + * (The name seems a bit strange, no?) + */ +static int +__seq_print_all(seq, flags) + DB_SEQUENCE *seq; + u_int32_t flags; +{ + COMPQUIET(seq, NULL); + COMPQUIET(flags, 0); + return (0); +} + +#else /* !HAVE_STATISTICS */ + +int +__seq_stat(seq, statp, flags) + DB_SEQUENCE *seq; + DB_SEQUENCE_STAT **statp; + u_int32_t flags; +{ + COMPQUIET(statp, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(seq->seq_dbp->env)); +} + +int +__seq_stat_print(seq, flags) + DB_SEQUENCE *seq; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + + return (__db_stat_not_built(seq->seq_dbp->env)); +} + +/* + * __db_get_seq_flags_fn -- + * Return the __db_seq_flags_fn array. + * + * PUBLIC: const FN * __db_get_seq_flags_fn __P((void)); + */ +const FN * +__db_get_seq_flags_fn() +{ + static const FN __db_seq_flags_fn[] = { + { 0, NULL } + }; + + /* + * !!! + * The Tcl API uses this interface, stub it off. + */ + return (__db_seq_flags_fn); +} +#endif /* !HAVE_STATISTICS */ +#endif /* HAVE_64BIT_TYPES */ diff --git a/src/sequence/sequence.c b/src/sequence/sequence.c new file mode 100644 index 00000000..c73b6535 --- /dev/null +++ b/src/sequence/sequence.c @@ -0,0 +1,1011 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/txn.h" +#include "dbinc_auto/sequence_ext.h" + +#ifdef HAVE_64BIT_TYPES +/* + * Sequences must be architecture independent but they are stored as user + * data in databases so the code here must handle the byte ordering. We + * store them in little-endian byte ordering. If we are on a big-endian + * machine we swap in and out when we read from the database. seq->seq_rp + * always points to the record in native ordering. + * + * Version 1 always stored things in native format so if we detect this we + * upgrade on the fly and write the record back at open time. + */ +#define SEQ_SWAP(rp) \ + do { \ + M_32_SWAP((rp)->seq_version); \ + M_32_SWAP((rp)->flags); \ + M_64_SWAP((rp)->seq_value); \ + M_64_SWAP((rp)->seq_max); \ + M_64_SWAP((rp)->seq_min); \ + } while (0) + +#define SEQ_SWAP_IN(env, seq) \ + do { \ + if (!F_ISSET((env), ENV_LITTLEENDIAN)) { \ + memcpy(&seq->seq_record, seq->seq_data.data, \ + sizeof(seq->seq_record)); \ + SEQ_SWAP(&seq->seq_record); \ + } \ + } while (0) + +#define SEQ_SWAP_OUT(env, seq) \ + do { \ + if (!F_ISSET((env), ENV_LITTLEENDIAN)) { \ + memcpy(seq->seq_data.data, \ + &seq->seq_record, sizeof(seq->seq_record));\ + SEQ_SWAP((DB_SEQ_RECORD*)seq->seq_data.data); \ + } \ + } while (0) + +static int __seq_chk_cachesize __P((ENV *, int32_t, db_seq_t, db_seq_t)); +static int __seq_close __P((DB_SEQUENCE *, u_int32_t)); +static int __seq_close_pp __P((DB_SEQUENCE *, u_int32_t)); +static int __seq_get + __P((DB_SEQUENCE *, DB_TXN *, int32_t, db_seq_t *, u_int32_t)); +static int __seq_get_cachesize __P((DB_SEQUENCE *, int32_t *)); +static int __seq_get_db __P((DB_SEQUENCE *, DB **)); +static int __seq_get_flags __P((DB_SEQUENCE *, u_int32_t *)); +static int __seq_get_key __P((DB_SEQUENCE *, DBT *)); +static int __seq_get_range __P((DB_SEQUENCE *, db_seq_t *, db_seq_t *)); +static int __seq_initial_value __P((DB_SEQUENCE *, db_seq_t)); +static int __seq_open_pp __P((DB_SEQUENCE *, DB_TXN *, DBT *, u_int32_t)); +static int __seq_remove __P((DB_SEQUENCE *, DB_TXN *, u_int32_t)); +static int __seq_set_cachesize __P((DB_SEQUENCE *, int32_t)); +static int __seq_set_flags __P((DB_SEQUENCE *, u_int32_t)); +static int __seq_set_range __P((DB_SEQUENCE *, db_seq_t, db_seq_t)); +static int __seq_update + __P((DB_SEQUENCE *, DB_THREAD_INFO *, DB_TXN *, int32_t, u_int32_t)); + +/* + * db_sequence_create -- + * DB_SEQUENCE constructor. + * + * EXTERN: int db_sequence_create __P((DB_SEQUENCE **, DB *, u_int32_t)); + */ +int +db_sequence_create(seqp, dbp, flags) + DB_SEQUENCE **seqp; + DB *dbp; + u_int32_t flags; +{ + DB_SEQUENCE *seq; + ENV *env; + int ret; + + env = dbp->env; + + DB_ILLEGAL_BEFORE_OPEN(dbp, "db_sequence_create"); + + /* Check for invalid function flags. */ + switch (flags) { + case 0: + break; + default: + return (__db_ferr(env, "db_sequence_create", 0)); + } + + if (dbp->type == DB_HEAP) { + __db_errx(env, DB_STR("4016", + "Heap databases may not be used with sequences.")); + return (EINVAL); + + } + + /* Allocate the sequence. */ + if ((ret = __os_calloc(env, 1, sizeof(*seq), &seq)) != 0) + return (ret); + + seq->seq_dbp = dbp; + seq->close = __seq_close_pp; + seq->get = __seq_get; + seq->get_cachesize = __seq_get_cachesize; + seq->set_cachesize = __seq_set_cachesize; + seq->get_db = __seq_get_db; + seq->get_flags = __seq_get_flags; + seq->get_key = __seq_get_key; + seq->get_range = __seq_get_range; + seq->initial_value = __seq_initial_value; + seq->open = __seq_open_pp; + seq->remove = __seq_remove; + seq->set_flags = __seq_set_flags; + seq->set_range = __seq_set_range; + seq->stat = __seq_stat; + seq->stat_print = __seq_stat_print; + seq->seq_rp = &seq->seq_record; + *seqp = seq; + + return (0); +} + +/* + * __seq_open -- + * DB_SEQUENCE->open method. + * + */ +static int +__seq_open_pp(seq, txn, keyp, flags) + DB_SEQUENCE *seq; + DB_TXN *txn; + DBT *keyp; + u_int32_t flags; +{ + DB *dbp; + DB_SEQ_RECORD *rp; + DB_THREAD_INFO *ip; + ENV *env; + u_int32_t tflags; + int handle_check, txn_local, ret, t_ret; +#define SEQ_OPEN_FLAGS (DB_CREATE | DB_EXCL | DB_THREAD) + + dbp = seq->seq_dbp; + env = dbp->env; + txn_local = 0; + + STRIP_AUTO_COMMIT(flags); + SEQ_ILLEGAL_AFTER_OPEN(seq, "DB_SEQUENCE->open"); + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { + handle_check = 0; + goto err; + } + + if ((ret = __db_fchk(env, + "DB_SEQUENCE->open", flags, SEQ_OPEN_FLAGS)) != 0) + goto err; + + if (keyp->size == 0) { + __db_errx(env, DB_STR("4001", + "Zero length sequence key specified")); + ret = EINVAL; + goto err; + } + + if ((ret = __db_get_flags(dbp, &tflags)) != 0) + goto err; + + /* + * We can let replication clients open sequences, but must + * check later that they do not update them. + */ + if (F_ISSET(dbp, DB_AM_RDONLY)) { + ret = __db_rdonly(dbp->env, "DB_SEQUENCE->open"); + goto err; + } + if (FLD_ISSET(tflags, DB_DUP)) { + __db_errx(env, DB_STR("4002", + "Sequences not supported in databases configured for duplicate data")); + ret = EINVAL; + goto err; + } + + if (LF_ISSET(DB_THREAD)) { + if ((ret = __mutex_alloc(env, + MTX_SEQUENCE, DB_MUTEX_PROCESS_ONLY, &seq->mtx_seq)) != 0) + goto err; + } + + memset(&seq->seq_data, 0, sizeof(DBT)); + if (F_ISSET(env, ENV_LITTLEENDIAN)) { + seq->seq_data.data = &seq->seq_record; + seq->seq_data.flags = DB_DBT_USERMEM; + } else { + if ((ret = __os_umalloc(env, + sizeof(seq->seq_record), &seq->seq_data.data)) != 0) + goto err; + seq->seq_data.flags = DB_DBT_REALLOC; + } + + seq->seq_data.ulen = seq->seq_data.size = sizeof(seq->seq_record); + seq->seq_rp = &seq->seq_record; + + if ((ret = __dbt_usercopy(env, keyp)) != 0) + goto err; + + memset(&seq->seq_key, 0, sizeof(DBT)); + if ((ret = __os_malloc(env, keyp->size, &seq->seq_key.data)) != 0) + goto err; + memcpy(seq->seq_key.data, keyp->data, keyp->size); + seq->seq_key.size = seq->seq_key.ulen = keyp->size; + seq->seq_key.flags = DB_DBT_USERMEM; + +retry: if ((ret = __db_get(dbp, ip, + txn, &seq->seq_key, &seq->seq_data, 0)) != 0) { + if (ret == DB_BUFFER_SMALL && + seq->seq_data.size > sizeof(seq->seq_record)) { + seq->seq_data.flags = DB_DBT_REALLOC; + seq->seq_data.data = NULL; + goto retry; + } + if ((ret != DB_NOTFOUND && ret != DB_KEYEMPTY) || + !LF_ISSET(DB_CREATE)) + goto err; + if (IS_REP_CLIENT(env) && + !F_ISSET(dbp, DB_AM_NOT_DURABLE)) { + ret = __db_rdonly(env, "DB_SEQUENCE->open"); + goto err; + } + ret = 0; + + rp = &seq->seq_record; + if (!F_ISSET(rp, DB_SEQ_RANGE_SET)) { + rp->seq_max = INT64_MAX; + rp->seq_min = INT64_MIN; + } + /* INC is the default. */ + if (!F_ISSET(rp, DB_SEQ_DEC)) + F_SET(rp, DB_SEQ_INC); + + rp->seq_version = DB_SEQUENCE_VERSION; + + if (rp->seq_value > rp->seq_max || + rp->seq_value < rp->seq_min) { + __db_errx(env, DB_STR("4003", + "Sequence value out of range")); + ret = EINVAL; + goto err; + } else { + SEQ_SWAP_OUT(env, seq); + /* Create local transaction as necessary. */ + if (IS_DB_AUTO_COMMIT(dbp, txn)) { + if ((ret = + __txn_begin(env, ip, NULL, &txn, 0)) != 0) + goto err; + txn_local = 1; + } + + if ((ret = __db_put(dbp, ip, txn, &seq->seq_key, + &seq->seq_data, DB_NOOVERWRITE)) != 0) { + __db_errx(env, DB_STR("4004", + "Sequence create failed")); + goto err; + } + } + } else if (LF_ISSET(DB_CREATE) && LF_ISSET(DB_EXCL)) { + ret = EEXIST; + goto err; + } else if (seq->seq_data.size < sizeof(seq->seq_record)) { + __db_errx(env, DB_STR("4005", + "Bad sequence record format")); + ret = EINVAL; + goto err; + } + + if (F_ISSET(env, ENV_LITTLEENDIAN)) + seq->seq_rp = seq->seq_data.data; + + /* + * The first release was stored in native mode. + * Check the version number before swapping. + */ + rp = seq->seq_data.data; + if (rp->seq_version == DB_SEQUENCE_OLDVER) { +oldver: if (IS_REP_CLIENT(env) && + !F_ISSET(dbp, DB_AM_NOT_DURABLE)) { + ret = __db_rdonly(env, "DB_SEQUENCE->open"); + goto err; + } + rp->seq_version = DB_SEQUENCE_VERSION; + if (!F_ISSET(env, ENV_LITTLEENDIAN)) { + if (IS_DB_AUTO_COMMIT(dbp, txn)) { + if ((ret = + __txn_begin(env, ip, NULL, &txn, 0)) != 0) + goto err; + txn_local = 1; + goto retry; + } + memcpy(&seq->seq_record, rp, sizeof(seq->seq_record)); + SEQ_SWAP_OUT(env, seq); + } + if ((ret = __db_put(dbp, + ip, txn, &seq->seq_key, &seq->seq_data, 0)) != 0) + goto err; + } + rp = seq->seq_rp; + + SEQ_SWAP_IN(env, seq); + + if (rp->seq_version != DB_SEQUENCE_VERSION) { + /* + * The database may have moved from one type + * of machine to another, check here. + * If we moved from little-end to big-end then + * the swap above will make the version correct. + * If the move was from big to little + * then we need to swap to see if this + * is an old version. + */ + if (rp->seq_version == DB_SEQUENCE_OLDVER) + goto oldver; + M_32_SWAP(rp->seq_version); + if (rp->seq_version == DB_SEQUENCE_OLDVER) { + SEQ_SWAP(rp); + goto oldver; + } + M_32_SWAP(rp->seq_version); + __db_errx(env, DB_STR_A("4006", + "Unsupported sequence version: %d", "%d"), + rp->seq_version); + goto err; + } + + seq->seq_last_value = seq->seq_prev_value = rp->seq_value; + if (F_ISSET(rp, DB_SEQ_INC)) + seq->seq_last_value--; + else + seq->seq_last_value++; + + /* + * It's an error to specify a cache larger than the range of sequences. + */ + if (seq->seq_cache_size != 0 && (ret = __seq_chk_cachesize( + env, seq->seq_cache_size, rp->seq_max, rp->seq_min)) != 0) + goto err; + +err: if (txn_local && + (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) + ret = t_ret; + if (ret != 0) { + __os_free(env, seq->seq_key.data); + seq->seq_key.data = NULL; + } + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + __dbt_userfree(env, keyp, NULL, NULL); + return (ret); +} + +/* + * __seq_get_cachesize -- + * Accessor for value passed into DB_SEQUENCE->set_cachesize call. + * + */ +static int +__seq_get_cachesize(seq, cachesize) + DB_SEQUENCE *seq; + int32_t *cachesize; +{ + SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->get_cachesize"); + + *cachesize = seq->seq_cache_size; + return (0); +} + +/* + * __seq_set_cachesize -- + * DB_SEQUENCE->set_cachesize. + * + */ +static int +__seq_set_cachesize(seq, cachesize) + DB_SEQUENCE *seq; + int32_t cachesize; +{ + ENV *env; + int ret; + + env = seq->seq_dbp->env; + + if (cachesize < 0) { + __db_errx(env, DB_STR("4007", + "Cache size must be >= 0")); + return (EINVAL); + } + + /* + * It's an error to specify a cache larger than the range of sequences. + */ + if (SEQ_IS_OPEN(seq) && (ret = __seq_chk_cachesize(env, + cachesize, seq->seq_rp->seq_max, seq->seq_rp->seq_min)) != 0) + return (ret); + + seq->seq_cache_size = cachesize; + return (0); +} + +#define SEQ_SET_FLAGS (DB_SEQ_WRAP | DB_SEQ_INC | DB_SEQ_DEC) +/* + * __seq_get_flags -- + * Accessor for flags passed into DB_SEQUENCE->open call + * + */ +static int +__seq_get_flags(seq, flagsp) + DB_SEQUENCE *seq; + u_int32_t *flagsp; +{ + SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->get_flags"); + + *flagsp = F_ISSET(seq->seq_rp, SEQ_SET_FLAGS); + return (0); +} + +/* + * __seq_set_flags -- + * DB_SEQUENCE->set_flags. + * + */ +static int +__seq_set_flags(seq, flags) + DB_SEQUENCE *seq; + u_int32_t flags; +{ + DB_SEQ_RECORD *rp; + ENV *env; + int ret; + + env = seq->seq_dbp->env; + rp = seq->seq_rp; + + SEQ_ILLEGAL_AFTER_OPEN(seq, "DB_SEQUENCE->set_flags"); + + if ((ret = __db_fchk( + env, "DB_SEQUENCE->set_flags", flags, SEQ_SET_FLAGS)) != 0) + return (ret); + if ((ret = __db_fcchk(env, + "DB_SEQUENCE->set_flags", flags, DB_SEQ_DEC, DB_SEQ_INC)) != 0) + return (ret); + + if (LF_ISSET(DB_SEQ_DEC | DB_SEQ_INC)) + F_CLR(rp, DB_SEQ_DEC | DB_SEQ_INC); + F_SET(rp, flags); + + return (0); +} + +/* + * __seq_initial_value -- + * DB_SEQUENCE->initial_value. + * + */ +static int +__seq_initial_value(seq, value) + DB_SEQUENCE *seq; + db_seq_t value; +{ + DB_SEQ_RECORD *rp; + ENV *env; + + env = seq->seq_dbp->env; + SEQ_ILLEGAL_AFTER_OPEN(seq, "DB_SEQUENCE->initial_value"); + + rp = seq->seq_rp; + if (F_ISSET(rp, DB_SEQ_RANGE_SET) && + (value > rp->seq_max || value < rp->seq_min)) { + __db_errx(env, DB_STR("4008", + "Sequence value out of range")); + return (EINVAL); + } + + rp->seq_value = value; + + return (0); +} + +/* + * __seq_get_range -- + * Accessor for range passed into DB_SEQUENCE->set_range call + * + */ +static int +__seq_get_range(seq, minp, maxp) + DB_SEQUENCE *seq; + db_seq_t *minp, *maxp; +{ + SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->get_range"); + + *minp = seq->seq_rp->seq_min; + *maxp = seq->seq_rp->seq_max; + return (0); +} + +/* + * __seq_set_range -- + * SEQUENCE->set_range. + * + */ +static int +__seq_set_range(seq, min, max) + DB_SEQUENCE *seq; + db_seq_t min, max; +{ + DB_SEQ_RECORD *rp; + ENV *env; + + env = seq->seq_dbp->env; + SEQ_ILLEGAL_AFTER_OPEN(seq, "DB_SEQUENCE->set_range"); + + rp = seq->seq_rp; + if (min >= max) { + __db_errx(env, DB_STR("4009", + "Minimum sequence value must be less than maximum sequence value")); + return (EINVAL); + } + + rp->seq_min = min; + rp->seq_max = max; + F_SET(rp, DB_SEQ_RANGE_SET); + + return (0); +} + +static int +__seq_update(seq, ip, txn, delta, flags) + DB_SEQUENCE *seq; + DB_THREAD_INFO *ip; + DB_TXN *txn; + int32_t delta; + u_int32_t flags; +{ + DB *dbp; + DBT *data, ldata; + DB_SEQ_RECORD *rp; + ENV *env; + int32_t adjust; + int ret, txn_local, need_mutex; + + dbp = seq->seq_dbp; + env = dbp->env; + need_mutex = 0; + data = &seq->seq_data; + + /* + * Create a local transaction as necessary, check for consistent + * transaction usage, and, if we have no transaction but do have + * locking on, acquire a locker id for the handle lock acquisition. + */ + if (IS_DB_AUTO_COMMIT(dbp, txn)) { + if ((ret = __txn_begin(env, ip, NULL, &txn, flags)) != 0) + return (ret); + txn_local = 1; + } else + txn_local = 0; + + /* Check for consistent transaction usage. */ + if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0) + goto err; + /* + * If we are in a global transaction avoid deadlocking on the mutex. + * The write lock on the data will prevent two updaters getting in + * at once. Fetch the data then see if things are what we thought + * they were. + */ + if (txn_local == 0 && txn != NULL) { + MUTEX_UNLOCK(env, seq->mtx_seq); + need_mutex = 1; + data = &ldata; + data->data = NULL; + data->flags = DB_DBT_REALLOC; + } + +retry: if ((ret = __db_get(dbp, ip, + txn, &seq->seq_key, data, DB_RMW)) != 0) { + if (ret == DB_BUFFER_SMALL && + seq->seq_data.size > sizeof(seq->seq_record)) { + data->flags = DB_DBT_REALLOC; + data->data = NULL; + goto retry; + } + goto err; + } + + if (data->size < sizeof(seq->seq_record)) { + __db_errx(env, DB_STR("4010", + "Bad sequence record format")); + ret = EINVAL; + goto err; + } + + /* We have an exclusive lock on the data, see if we raced. */ + if (need_mutex) { + MUTEX_LOCK(env, seq->mtx_seq); + need_mutex = 0; + rp = seq->seq_rp; + /* + * Note that caching must be off if we have global + * transaction so the value we fetch from the database + * is the correct current value. + */ + if (data->size <= seq->seq_data.size) { + memcpy(seq->seq_data.data, data->data, data->size); + __os_ufree(env, data->data); + } else { + seq->seq_data.data = data->data; + seq->seq_data.size = data->size; + } + } + if (F_ISSET(env, ENV_LITTLEENDIAN)) + seq->seq_rp = seq->seq_data.data; + SEQ_SWAP_IN(env, seq); + rp = seq->seq_rp; + + if (F_ISSET(rp, DB_SEQ_WRAPPED)) + goto overflow; + + adjust = delta > seq->seq_cache_size ? delta : seq->seq_cache_size; + + /* + * Check whether this operation will cause the sequence to wrap. + * + * The sequence minimum and maximum values can be INT64_MIN and + * INT64_MAX, so we need to do the test carefully to cope with + * arithmetic overflow. The first part of the test below checks + * whether we will hit the end of the 64-bit range. The second part + * checks whether we hit the end of the sequence. + */ +again: if (F_ISSET(rp, DB_SEQ_INC)) { + if (rp->seq_value + adjust - 1 < rp->seq_value || + rp->seq_value + adjust - 1 > rp->seq_max) { + /* Don't wrap just to fill the cache. */ + if (adjust > delta) { + adjust = delta; + goto again; + } + if (F_ISSET(rp, DB_SEQ_WRAP)) + rp->seq_value = rp->seq_min; + else { +overflow: __db_errx(env, DB_STR("4011", + "Sequence overflow")); + ret = EINVAL; + goto err; + } + } + /* See if we are at the end of the 64 bit range. */ + if (!F_ISSET(rp, DB_SEQ_WRAP) && + rp->seq_value + adjust < rp->seq_value) + F_SET(rp, DB_SEQ_WRAPPED); + } else { + if ((rp->seq_value - adjust) + 1 > rp->seq_value || + (rp->seq_value - adjust) + 1 < rp->seq_min) { + /* Don't wrap just to fill the cache. */ + if (adjust > delta) { + adjust = delta; + goto again; + } + if (F_ISSET(rp, DB_SEQ_WRAP)) + rp->seq_value = rp->seq_max; + else + goto overflow; + } + /* See if we are at the end of the 64 bit range. */ + if (!F_ISSET(rp, DB_SEQ_WRAP) && + rp->seq_value - adjust > rp->seq_value) + F_SET(rp, DB_SEQ_WRAPPED); + adjust = -adjust; + } + + rp->seq_value += adjust; + SEQ_SWAP_OUT(env, seq); + ret = __db_put(dbp, ip, txn, &seq->seq_key, &seq->seq_data, 0); + rp->seq_value -= adjust; + if (ret != 0) { + __db_errx(env, DB_STR("4012", + "Sequence update failed")); + goto err; + } + seq->seq_last_value = rp->seq_value + adjust; + if (F_ISSET(rp, DB_SEQ_INC)) + seq->seq_last_value--; + else + seq->seq_last_value++; + +err: if (need_mutex) { + if (data->data != NULL) + __os_ufree(env, data->data); + MUTEX_LOCK(env, seq->mtx_seq); + } + return (txn_local ? __db_txn_auto_resolve( + env, txn, LF_ISSET(DB_TXN_NOSYNC), ret) : ret); +} + +static int +__seq_get(seq, txn, delta, retp, flags) + DB_SEQUENCE *seq; + DB_TXN *txn; + int32_t delta; + db_seq_t *retp; + u_int32_t flags; +{ + DB *dbp; + DB_SEQ_RECORD *rp; + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret; + + dbp = seq->seq_dbp; + env = dbp->env; + rp = seq->seq_rp; + ret = 0; + + STRIP_AUTO_COMMIT(flags); + SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->get"); + + if (delta < 0 || (delta == 0 && !LF_ISSET(DB_CURRENT))) { + __db_errx(env, "Sequence delta must be greater than 0"); + return (EINVAL); + } + + if (seq->seq_cache_size != 0 && txn != NULL) { + __db_errx(env, + "Sequence with non-zero cache may not specify transaction handle"); + return (EINVAL); + } + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) + return (ret); + + MUTEX_LOCK(env, seq->mtx_seq); + + if (handle_check && IS_REP_CLIENT(env) && + !F_ISSET(dbp, DB_AM_NOT_DURABLE)) { + ret = __db_rdonly(env, "DB_SEQUENCE->get"); + goto err; + } + + if (rp->seq_min + delta > rp->seq_max) { + __db_errx(env, DB_STR("4013", "Sequence overflow")); + ret = EINVAL; + goto err; + } + + if (LF_ISSET(DB_CURRENT)) { + *retp = seq->seq_prev_value; + } else if (F_ISSET(rp, DB_SEQ_INC)) { + if (seq->seq_last_value + 1 - rp->seq_value < delta && + (ret = __seq_update(seq, ip, txn, delta, flags)) != 0) + goto err; + + rp = seq->seq_rp; + *retp = rp->seq_value; + seq->seq_prev_value = rp->seq_value; + rp->seq_value += delta; + } else { + if ((rp->seq_value - seq->seq_last_value) + 1 < delta && + (ret = __seq_update(seq, ip, txn, delta, flags)) != 0) + goto err; + + rp = seq->seq_rp; + *retp = rp->seq_value; + seq->seq_prev_value = rp->seq_value; + rp->seq_value -= delta; + } + +err: MUTEX_UNLOCK(env, seq->mtx_seq); + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __seq_get_db -- + * Accessor for dbp passed into db_sequence_create call + * + */ +static int +__seq_get_db(seq, dbpp) + DB_SEQUENCE *seq; + DB **dbpp; +{ + *dbpp = seq->seq_dbp; + return (0); +} + +/* + * __seq_get_key -- + * Accessor for key passed into DB_SEQUENCE->open call + * + */ +static int +__seq_get_key(seq, key) + DB_SEQUENCE *seq; + DBT *key; +{ + SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->get_key"); + + if (F_ISSET(key, DB_DBT_USERCOPY)) + return (__db_retcopy(seq->seq_dbp->env, key, + seq->seq_key.data, seq->seq_key.size, NULL, 0)); + + key->data = seq->seq_key.data; + key->size = key->ulen = seq->seq_key.size; + key->flags = seq->seq_key.flags; + return (0); +} + +/* + * __seq_close_pp -- + * Close a sequence pre/post processing + * + */ +static int +__seq_close_pp(seq, flags) + DB_SEQUENCE *seq; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + int ret; + + ENV_ENTER(seq->seq_dbp->env, ip); + ret = __seq_close(seq, flags); + ENV_LEAVE(seq->seq_dbp->env, ip); + + return (ret); +} + +/* + * __seq_close -- + * Close a sequence + * + */ +static int +__seq_close(seq, flags) + DB_SEQUENCE *seq; + u_int32_t flags; +{ + ENV *env; + int ret, t_ret; + + ret = 0; + env = seq->seq_dbp->env; + + if (flags != 0) + ret = __db_ferr(env, "DB_SEQUENCE->close", 0); + + if ((t_ret = __mutex_free(env, &seq->mtx_seq)) != 0 && ret == 0) + ret = t_ret; + + if (seq->seq_key.data != NULL) + __os_free(env, seq->seq_key.data); + if (seq->seq_data.data != NULL && + seq->seq_data.data != &seq->seq_record) + __os_ufree(env, seq->seq_data.data); + seq->seq_key.data = NULL; + + memset(seq, CLEAR_BYTE, sizeof(*seq)); + __os_free(env, seq); + + return (ret); +} + +/* + * __seq_remove -- + * Remove a sequence from the database. + */ +static int +__seq_remove(seq, txn, flags) + DB_SEQUENCE *seq; + DB_TXN *txn; + u_int32_t flags; +{ + DB *dbp; + DB_THREAD_INFO *ip; + ENV *env; + int handle_check, ret, t_ret, txn_local; + + dbp = seq->seq_dbp; + env = dbp->env; + txn_local = 0; + + SEQ_ILLEGAL_BEFORE_OPEN(seq, "DB_SEQUENCE->remove"); + + /* + * Flags can only be 0, unless the database has DB_AUTO_COMMIT enabled. + * Then DB_TXN_NOSYNC is allowed. + */ + if (flags != 0 && + (flags != DB_TXN_NOSYNC || !IS_DB_AUTO_COMMIT(dbp, txn))) + return (__db_ferr(env, "DB_SEQUENCE->remove illegal flag", 0)); + + ENV_ENTER(env, ip); + + /* Check for replication block. */ + handle_check = IS_ENV_REPLICATED(env); + if (handle_check && + (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) { + handle_check = 0; + goto err; + } + + /* + * Create a local transaction as necessary, check for consistent + * transaction usage, and, if we have no transaction but do have + * locking on, acquire a locker id for the handle lock acquisition. + */ + if (IS_DB_AUTO_COMMIT(dbp, txn)) { + if ((ret = __txn_begin(env, ip, NULL, &txn, flags)) != 0) + return (ret); + txn_local = 1; + } + + /* Check for consistent transaction usage. */ + if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0) + goto err; + + ret = __db_del(dbp, ip, txn, &seq->seq_key, 0); + + if ((t_ret = __seq_close(seq, 0)) != 0 && ret == 0) + ret = t_ret; + + /* Release replication block. */ + if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; +err: if (txn_local && (t_ret = + __db_txn_auto_resolve(env, txn, 0, ret)) != 0 && ret == 0) + ret = t_ret; + + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __seq_chk_cachesize -- + * Validate the cache size vs. the range. + */ +static int +__seq_chk_cachesize(env, cachesize, max, min) + ENV *env; + int32_t cachesize; + db_seq_t max, min; +{ + /* + * It's an error to specify caches larger than the sequence range. + * + * The min and max of the range can be either positive or negative, + * the difference will fit in an unsigned variable of the same type. + * Assume a 2's complement machine, and simply subtract. + */ + if ((u_int32_t)cachesize > (u_int64_t)max - (u_int64_t)min) { + __db_errx(env, DB_STR("4014", + "Number of items to be cached is larger than the sequence range")); + return (EINVAL); + } + return (0); +} + +#else /* !HAVE_64BIT_TYPES */ + +int +db_sequence_create(seqp, dbp, flags) + DB_SEQUENCE **seqp; + DB *dbp; + u_int32_t flags; +{ + COMPQUIET(seqp, NULL); + COMPQUIET(flags, 0); + __db_errx(dbp->env, DB_STR("4015", + "library build did not include support for sequences")); + return (DB_OPNOTSUP); +} +#endif /* HAVE_64BIT_TYPES */ diff --git a/src/txn/txn.c b/src/txn/txn.c new file mode 100644 index 00000000..4139c167 --- /dev/null +++ b/src/txn/txn.c @@ -0,0 +1,2161 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/hmac.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +#define LOG_FLAGS(txn) \ + (DB_LOG_COMMIT | (F_ISSET(txn, TXN_SYNC) ? \ + DB_FLUSH : (F_ISSET(txn, TXN_WRITE_NOSYNC) ? \ + DB_LOG_WRNOSYNC : 0))) + +/* + * __txn_isvalid enumerated types. We cannot simply use the transaction + * statuses, because different statuses need to be handled differently + * depending on the caller. + */ +typedef enum { + TXN_OP_ABORT, + TXN_OP_COMMIT, + TXN_OP_DISCARD, + TXN_OP_PREPARE +} txnop_t; + +static int __txn_abort_pp __P((DB_TXN *)); +static int __txn_applied __P((ENV *, + DB_THREAD_INFO *, DB_COMMIT_INFO *, db_timeout_t)); +static void __txn_build_token __P((DB_TXN *, DB_LSN *)); +static int __txn_begin_int __P((DB_TXN *)); +static int __txn_close_cursors __P((DB_TXN *)); +static int __txn_commit_pp __P((DB_TXN *, u_int32_t)); +static int __txn_discard __P((DB_TXN *, u_int32_t)); +static int __txn_dispatch_undo + __P((ENV *, DB_TXN *, DBT *, DB_LSN *, DB_TXNHEAD *)); +static int __txn_end __P((DB_TXN *, int)); +static int __txn_isvalid __P((const DB_TXN *, txnop_t)); +static int __txn_undo __P((DB_TXN *)); +static int __txn_set_commit_token __P((DB_TXN *txn, DB_TXN_TOKEN *)); +static void __txn_set_txn_lsnp __P((DB_TXN *, DB_LSN **, DB_LSN **)); + +#define TxnAlloc "Unable to allocate a transaction handle" + +/* + * __txn_begin_pp -- + * ENV->txn_begin pre/post processing. + * + * PUBLIC: int __txn_begin_pp __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t)); + */ +int +__txn_begin_pp(dbenv, parent, txnpp, flags) + DB_ENV *dbenv; + DB_TXN *parent, **txnpp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int rep_check, ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, env->tx_handle, "txn_begin", DB_INIT_TXN); + + if ((ret = __db_fchk(env, + "txn_begin", flags, + DB_IGNORE_LEASE |DB_READ_COMMITTED | DB_READ_UNCOMMITTED | + DB_TXN_FAMILY | DB_TXN_NOSYNC | DB_TXN_SNAPSHOT | DB_TXN_SYNC | + DB_TXN_WAIT | DB_TXN_WRITE_NOSYNC | DB_TXN_NOWAIT | + DB_TXN_BULK)) != 0) + return (ret); + if ((ret = __db_fcchk(env, "txn_begin", flags, + DB_TXN_WRITE_NOSYNC | DB_TXN_NOSYNC, DB_TXN_SYNC)) != 0) + return (ret); + if ((ret = __db_fcchk(env, "txn_begin", + flags, DB_TXN_WRITE_NOSYNC, DB_TXN_NOSYNC)) != 0) + return (ret); + if (parent != NULL && LF_ISSET(DB_TXN_FAMILY)) { + __db_errx(env, DB_STR("4521", + "Family transactions cannot have parents")); + return (EINVAL); + } else if (IS_REAL_TXN(parent) && + !F_ISSET(parent, TXN_SNAPSHOT) && LF_ISSET(DB_TXN_SNAPSHOT)) { + __db_errx(env, DB_STR("4522", + "Child transaction snapshot setting must match parent")); + return (EINVAL); + } + + ENV_ENTER(env, ip); + + /* Replication accounts for top-level transactions. */ + rep_check = IS_ENV_REPLICATED(env) && + !IS_REAL_TXN(parent) && !LF_ISSET(DB_TXN_FAMILY); + + if (rep_check && (ret = __op_rep_enter(env, 0, 1)) != 0) + goto err; + + ret = __txn_begin(env, ip, parent, txnpp, flags); + + /* + * We only decrement the count if the operation fails. + * Otherwise the count will be decremented when the + * txn is resolved by txn_commit, txn_abort, etc. + */ + if (ret != 0 && rep_check) + (void)__op_rep_exit(env); + +err: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __txn_begin -- + * ENV->txn_begin. + * + * This is a wrapper to the actual begin process. We allocate a DB_TXN + * structure for the caller and then call into __txn_begin_int code. + * + * Internally, we use TXN_DETAIL structures, but the DB_TXN structure + * provides access to the transaction ID and the offset in the transaction + * region of the TXN_DETAIL structure. + * + * PUBLIC: int __txn_begin __P((ENV *, + * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DB_TXN **, u_int32_t)); + */ +int +__txn_begin(env, ip, parent, txnpp, flags) + ENV *env; + DB_THREAD_INFO *ip; + DB_TXN *parent, **txnpp; + u_int32_t flags; +{ + DB_ENV *dbenv; + DB_LOCKREGION *region; + DB_TXN *txn; + TXN_DETAIL *ptd, *td; + int ret; + + if (F_ISSET(env, ENV_FORCE_TXN_BULK)) + flags |= DB_TXN_BULK; + + *txnpp = NULL; + if ((ret = __os_calloc(env, 1, sizeof(DB_TXN), &txn)) != 0) { + __db_errx(env, TxnAlloc); + return (ret); + } + + dbenv = env->dbenv; + txn->mgrp = env->tx_handle; + txn->parent = parent; + if (parent != NULL && F_ISSET(parent, TXN_FAMILY)) + parent = NULL; + TAILQ_INIT(&txn->kids); + TAILQ_INIT(&txn->events); + STAILQ_INIT(&txn->logs); + TAILQ_INIT(&txn->my_cursors); + TAILQ_INIT(&txn->femfs); + txn->flags = TXN_MALLOC; + txn->thread_info = + ip != NULL ? ip : (parent != NULL ? parent->thread_info : NULL); + + /* + * Set the sync mode for commit. Any local bits override those + * in the environment. SYNC is the default. + */ + if (LF_ISSET(DB_TXN_SYNC)) + F_SET(txn, TXN_SYNC); + else if (LF_ISSET(DB_TXN_NOSYNC)) + F_SET(txn, TXN_NOSYNC); + else if (LF_ISSET(DB_TXN_WRITE_NOSYNC)) + F_SET(txn, TXN_WRITE_NOSYNC); + else if (F_ISSET(dbenv, DB_ENV_TXN_NOSYNC)) + F_SET(txn, TXN_NOSYNC); + else if (F_ISSET(dbenv, DB_ENV_TXN_WRITE_NOSYNC)) + F_SET(txn, TXN_WRITE_NOSYNC); + else + F_SET(txn, TXN_SYNC); + + if (LF_ISSET(DB_TXN_NOWAIT) || + (F_ISSET(dbenv, DB_ENV_TXN_NOWAIT) && !LF_ISSET(DB_TXN_WAIT))) + F_SET(txn, TXN_NOWAIT); + if (LF_ISSET(DB_READ_COMMITTED)) + F_SET(txn, TXN_READ_COMMITTED); + if (LF_ISSET(DB_READ_UNCOMMITTED)) + F_SET(txn, TXN_READ_UNCOMMITTED); + if (LF_ISSET(DB_TXN_FAMILY)) + F_SET(txn, TXN_FAMILY | TXN_INFAMILY | TXN_READONLY); + if (LF_ISSET(DB_TXN_SNAPSHOT) || F_ISSET(dbenv, DB_ENV_TXN_SNAPSHOT) || + (parent != NULL && F_ISSET(parent, TXN_SNAPSHOT))) + F_SET(txn, TXN_SNAPSHOT); + if (LF_ISSET(DB_IGNORE_LEASE)) + F_SET(txn, TXN_IGNORE_LEASE); + + /* + * We set TXN_BULK only for the outermost transaction. This + * is a temporary limitation; in the future we will allow it + * for nested transactions as well. See #17669 for details. + * + * Also, ignore requests for DB_TXN_BULK if replication is enabled. + */ + if (LF_ISSET(DB_TXN_BULK) && parent == NULL && !REP_ON(txn->mgrp->env)) + F_SET(txn, TXN_BULK); + + if ((ret = __txn_begin_int(txn)) != 0) + goto err; + td = txn->td; + + if (parent != NULL) { + ptd = parent->td; + TAILQ_INSERT_HEAD(&parent->kids, txn, klinks); + SH_TAILQ_INSERT_HEAD(&ptd->kids, td, klinks, __txn_detail); + } + + if (LOCKING_ON(env)) { + region = env->lk_handle->reginfo.primary; + if (parent != NULL) { + ret = __lock_inherit_timeout(env, + parent->locker, txn->locker); + /* No parent locker set yet. */ + if (ret == EINVAL) { + parent = NULL; + ret = 0; + } + if (ret != 0) + goto err; + } + + /* + * Parent is NULL if we have no parent + * or it has no timeouts set. + */ + if (parent == NULL && region->tx_timeout != 0) + if ((ret = __lock_set_timeout(env, txn->locker, + region->tx_timeout, DB_SET_TXN_TIMEOUT)) != 0) + goto err; + } + + *txnpp = txn; + PERFMON2(env, txn, begin, txn->txnid, flags); + return (0); + +err: + __os_free(env, txn); + return (ret); +} + +/* + * __txn_recycle_id -- + * Find a range of useable transaction ids. + * + * PUBLIC: int __txn_recycle_id __P((ENV *, int)); + */ +int +__txn_recycle_id(env, locked) + ENV *env; + int locked; +{ + DB_LSN null_lsn; + DB_TXNMGR *mgr; + DB_TXNREGION *region; + TXN_DETAIL *td; + u_int32_t *ids; + int nids, ret; + + mgr = env->tx_handle; + region = mgr->reginfo.primary; + + if ((ret = __os_malloc(env, + sizeof(u_int32_t) * region->curtxns, &ids)) != 0) { + __db_errx(env, DB_STR("4523", + "Unable to allocate transaction recycle buffer")); + return (ret); + } + nids = 0; + SH_TAILQ_FOREACH(td, ®ion->active_txn, links, __txn_detail) + ids[nids++] = td->txnid; + region->last_txnid = TXN_MINIMUM - 1; + region->cur_maxid = TXN_MAXIMUM; + if (nids != 0) + __db_idspace(ids, nids, + ®ion->last_txnid, ®ion->cur_maxid); + __os_free(env, ids); + + /* + * Check LOGGING_ON rather than DBENV_LOGGING as we want to emit this + * record at the end of recovery. + */ + if (LOGGING_ON(env)) { + if (locked) + TXN_SYSTEM_UNLOCK(env); + ret = __txn_recycle_log(env, NULL, &null_lsn, + 0, region->last_txnid + 1, region->cur_maxid); + /* Make it simple on the caller, if error we hold the lock. */ + if (locked && ret != 0) + TXN_SYSTEM_LOCK(env); + } + + return (ret); +} + +/* + * __txn_begin_int -- + * Normal DB version of txn_begin. + */ +static int +__txn_begin_int(txn) + DB_TXN *txn; +{ + DB_ENV *dbenv; + DB_TXNMGR *mgr; + DB_TXNREGION *region; + ENV *env; + TXN_DETAIL *td; + u_int32_t id; + int inserted, ret; + + mgr = txn->mgrp; + env = mgr->env; + dbenv = env->dbenv; + region = mgr->reginfo.primary; + td = NULL; + inserted = 0; + + TXN_SYSTEM_LOCK(env); + if (!F_ISSET(txn, TXN_COMPENSATE) && F_ISSET(region, TXN_IN_RECOVERY)) { + __db_errx(env, DB_STR("4524", + "operation not permitted during recovery")); + ret = EINVAL; + goto err; + } + + /* + * Allocate a new transaction id. Our current valid range can span + * the maximum valid value, so check for it and wrap manually. + */ + if (region->last_txnid == TXN_MAXIMUM && + region->cur_maxid != TXN_MAXIMUM) + region->last_txnid = TXN_MINIMUM - 1; + + /* Allocate a new transaction detail structure. */ + if ((ret = + __env_alloc(&mgr->reginfo, sizeof(TXN_DETAIL), &td)) != 0) { + __db_errx(env, DB_STR("4525", + "Unable to allocate memory for transaction detail")); + goto err; + } + + id = ++region->last_txnid; + +#ifdef HAVE_STATISTICS + STAT_INC(env, txn, nbegins, region->stat.st_nbegins, id); + STAT_INC(env, txn, nactive, region->stat.st_nactive, id); + if (region->stat.st_nactive > region->stat.st_maxnactive) + STAT_SET(env, txn, maxnactive, + region->stat.st_maxnactive, region->stat.st_nactive, id); +#endif + + td->txnid = id; + dbenv->thread_id(dbenv, &td->pid, &td->tid); + + ZERO_LSN(td->last_lsn); + ZERO_LSN(td->begin_lsn); + SH_TAILQ_INIT(&td->kids); + if (txn->parent != NULL && !F_ISSET(txn->parent, TXN_FAMILY)) + td->parent = R_OFFSET(&mgr->reginfo, txn->parent->td); + else + td->parent = INVALID_ROFF; + td->name = INVALID_ROFF; + MAX_LSN(td->read_lsn); + MAX_LSN(td->visible_lsn); + td->mvcc_ref = 0; + td->mvcc_mtx = MUTEX_INVALID; + td->status = TXN_RUNNING; + td->flags = F_ISSET(txn, TXN_NOWAIT) ? TXN_DTL_NOWAIT : 0; + td->nlog_dbs = 0; + td->nlog_slots = TXN_NSLOTS; + td->log_dbs = R_OFFSET(&mgr->reginfo, td->slots); + + /* XA specific fields. */ + td->xa_ref = 1; + td->xa_br_status = TXN_XA_IDLE; + + /* Place transaction on active transaction list. */ + SH_TAILQ_INSERT_HEAD(®ion->active_txn, td, links, __txn_detail); + region->curtxns++; + + /* Increment bulk transaction counter while holding transaction lock. */ + if (F_ISSET(txn, TXN_BULK)) + ((DB_TXNREGION *)env->tx_handle->reginfo.primary)->n_bulk_txn++; + + inserted = 1; + + if (region->last_txnid == region->cur_maxid) { + if ((ret = __txn_recycle_id(env, 1)) != 0) + goto err; + } else + TXN_SYSTEM_UNLOCK(env); + + txn->txnid = id; + txn->td = td; + + /* Allocate a locker for this txn. */ + if (LOCKING_ON(env) && (ret = + __lock_getlocker(env->lk_handle, id, 1, &txn->locker)) != 0) + goto err; + + txn->abort = __txn_abort_pp; + txn->commit = __txn_commit_pp; + txn->discard = __txn_discard; + txn->get_name = __txn_get_name; + txn->get_priority = __txn_get_priority; + txn->id = __txn_id; + txn->prepare = __txn_prepare; + txn->set_commit_token = __txn_set_commit_token; + txn->set_txn_lsnp = __txn_set_txn_lsnp; + txn->set_name = __txn_set_name; + txn->set_priority = __txn_set_priority; + txn->set_timeout = __txn_set_timeout; + + /* We can't call __txn_set_priority until txn->td is set. */ + if (LOCKING_ON(env) && (ret = __txn_set_priority(txn, + txn->parent == NULL ? + TXN_PRIORITY_DEFAULT : txn->parent->locker->priority)) != 0) + goto err; + else + td->priority = 0; + + /* + * If this is a transaction family, we must link the child to the + * maximal grandparent in the lock table for deadlock detection. + */ + if (txn->parent != NULL) { + if (LOCKING_ON(env) && (ret = __lock_addfamilylocker(env, + txn->parent->txnid, txn->txnid, + F_ISSET(txn->parent, TXN_FAMILY))) != 0) + goto err; + + /* + * If the parent is only used to establish compatability, do + * not reference it again. + */ + if (F_ISSET(txn->parent, TXN_FAMILY)) { + txn->parent = NULL; + F_SET(txn, TXN_INFAMILY); + } + } + + if (F_ISSET(txn, TXN_MALLOC)) { + MUTEX_LOCK(env, mgr->mutex); + TAILQ_INSERT_TAIL(&mgr->txn_chain, txn, links); + MUTEX_UNLOCK(env, mgr->mutex); + } + + return (0); + +err: if (inserted) { + TXN_SYSTEM_LOCK(env); + SH_TAILQ_REMOVE(®ion->active_txn, td, links, __txn_detail); + region->curtxns--; + if (F_ISSET(txn, TXN_BULK)) + ((DB_TXNREGION *) + env->tx_handle->reginfo.primary)->n_bulk_txn--; + } + if (td != NULL) + __env_alloc_free(&mgr->reginfo, td); + TXN_SYSTEM_UNLOCK(env); + return (ret); +} + +/* + * __txn_continue + * Fill in the fields of the local transaction structure given + * the detail transaction structure. Optionally link transactions + * to transaction manager list. + * + * PUBLIC: int __txn_continue __P((ENV *, + * PUBLIC: DB_TXN *, TXN_DETAIL *, DB_THREAD_INFO *, int)); + */ +int +__txn_continue(env, txn, td, ip, add_to_list) + ENV *env; + DB_TXN *txn; + TXN_DETAIL *td; + DB_THREAD_INFO *ip; + int add_to_list; +{ + DB_LOCKREGION *region; + DB_TXNMGR *mgr; + int ret; + + ret = 0; + + /* + * This code follows the order of the structure definition so it + * is relatively easy to make sure that we are setting everything. + */ + mgr = txn->mgrp = env->tx_handle; + txn->parent = NULL; + txn->thread_info = ip; + txn->txnid = td->txnid; + txn->name = NULL; + txn->td = td; + td->xa_ref++; + + /* This never seems to be used: txn->expire */ + txn->txn_list = NULL; + + TAILQ_INIT(&txn->kids); + TAILQ_INIT(&txn->events); + STAILQ_INIT(&txn->logs); + + /* + * These fields should never persist across different processes as we + * require that cursors be opened/closed within the same service routine + * and we disallow file level operations in XA transactions. + */ + TAILQ_INIT(&txn->my_cursors); + TAILQ_INIT(&txn->femfs); + + /* Put the transaction onto the transaction manager's list. */ + if (add_to_list) { + MUTEX_LOCK(env, mgr->mutex); + TAILQ_INSERT_TAIL(&mgr->txn_chain, txn, links); + MUTEX_UNLOCK(env, mgr->mutex); + } + + txn->token_buffer = 0; + txn->cursors = 0; + + txn->abort = __txn_abort_pp; + txn->commit = __txn_commit_pp; + txn->discard = __txn_discard; + txn->get_name = __txn_get_name; + txn->get_priority = __txn_get_priority; + txn->id = __txn_id; + txn->prepare = __txn_prepare; + txn->set_commit_token = __txn_set_commit_token; + txn->set_name = __txn_set_name; + txn->set_priority = __txn_set_priority; + txn->set_timeout = __txn_set_timeout; + txn->set_txn_lsnp = __txn_set_txn_lsnp; + + /* XXX Do we need to explicitly set a SYNC flag here? */ + txn->flags = TXN_MALLOC | + (F_ISSET(td, TXN_DTL_NOWAIT) ? TXN_NOWAIT : 0); + txn->xa_thr_status = TXN_XA_THREAD_NOTA; + + /* + * If this is a restored transaction, we need to propagate that fact + * to the process-local structure. However, if it's not a restored + * transaction, we need to make sure that we have a locker associated + * with this transaction. + */ + if (F_ISSET(td, TXN_DTL_RESTORED)) + F_SET(txn, TXN_RESTORED); + else + if ((ret = __lock_getlocker(env->lk_handle, + txn->txnid, 0, &txn->locker)) == 0) + ret = __txn_set_priority(txn, td->priority); + + if (LOCKING_ON(env)) { + region = env->lk_handle->reginfo.primary; + if (region->tx_timeout != 0 && + (ret = __lock_set_timeout(env, txn->locker, + region->tx_timeout, DB_SET_TXN_TIMEOUT)) != 0) + return (ret); + txn->lock_timeout = region->tx_timeout; + } + + return (ret); +} + +/* + * __txn_commit_pp -- + * Interface routine to TXN->commit. + */ +static int +__txn_commit_pp(txn, flags) + DB_TXN *txn; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int rep_check, ret, t_ret; + + env = txn->mgrp->env; + rep_check = IS_ENV_REPLICATED(env) && + txn->parent == NULL && IS_REAL_TXN(txn); + + ENV_ENTER(env, ip); + ret = __txn_commit(txn, flags); + if (rep_check && (t_ret = __op_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __txn_commit -- + * Commit a transaction. + * + * PUBLIC: int __txn_commit __P((DB_TXN *, u_int32_t)); + */ +int +__txn_commit(txn, flags) + DB_TXN *txn; + u_int32_t flags; +{ + DBT list_dbt; + DB_LOCKREQ request; + DB_TXN *kid; + ENV *env; + REGENV *renv; + REGINFO *infop; + TXN_DETAIL *td; + DB_LSN token_lsn; + u_int32_t id; + int ret, t_ret; + + env = txn->mgrp->env; + td = txn->td; + PERFMON2(env, txn, commit, txn->txnid, flags); + + DB_ASSERT(env, txn->xa_thr_status == TXN_XA_THREAD_NOTA || + td->xa_ref == 1); + /* + * A common mistake in Berkeley DB programs is to mis-handle deadlock + * return. If the transaction deadlocked, they want abort, not commit. + */ + if (F_ISSET(txn, TXN_DEADLOCK)) { + ret = __db_txn_deadlock_err(env, txn); + goto err; + } + + /* Close registered cursors before committing. */ + if ((ret = __txn_close_cursors(txn)) != 0) + goto err; + + if ((ret = __txn_isvalid(txn, TXN_OP_COMMIT)) != 0) + return (ret); + + /* + * Check for master leases at the beginning. If we are a master and + * cannot have valid leases now, we error and abort this txn. There + * should always be a perm record in the log because the master updates + * the LSN history system database in rep_start() (with IGNORE_LEASE + * set). + */ + if (txn->parent == NULL && IS_REP_MASTER(env) && + IS_USING_LEASES(env) && !F_ISSET(txn, TXN_IGNORE_LEASE) && + (ret = __rep_lease_check(env, 1)) != 0) { + DB_ASSERT(env, ret != DB_NOTFOUND); + goto err; + } + + infop = env->reginfo; + renv = infop->primary; + /* + * No mutex is needed as envid is read-only once it is set. + */ + id = renv->envid; + + /* + * We clear flags that are incorrect, ignoring any flag errors, and + * default to synchronous operations. By definition, transaction + * handles are dead when we return, and this error should never + * happen, but we don't want to fail in the field 'cause the app is + * specifying the wrong flag for some reason. + */ + if (__db_fchk(env, "DB_TXN->commit", flags, + DB_TXN_NOSYNC | DB_TXN_SYNC | DB_TXN_WRITE_NOSYNC) != 0) + flags = DB_TXN_SYNC; + if (__db_fcchk(env, "DB_TXN->commit", flags, + DB_TXN_SYNC, DB_TXN_NOSYNC | DB_TXN_WRITE_NOSYNC) != 0) + flags = DB_TXN_SYNC; + + if (LF_ISSET(DB_TXN_WRITE_NOSYNC)) { + F_CLR(txn, TXN_SYNC_FLAGS); + F_SET(txn, TXN_WRITE_NOSYNC); + } + if (LF_ISSET(DB_TXN_NOSYNC)) { + F_CLR(txn, TXN_SYNC_FLAGS); + F_SET(txn, TXN_NOSYNC); + } + if (LF_ISSET(DB_TXN_SYNC)) { + F_CLR(txn, TXN_SYNC_FLAGS); + F_SET(txn, TXN_SYNC); + } + + DB_ASSERT(env, F_ISSET(txn, TXN_SYNC_FLAGS)); + + /* + * Commit any unresolved children. If anyone fails to commit, + * then try to abort the rest of the kids and then abort the parent. + * Abort should never fail; if it does, we bail out immediately. + */ + while ((kid = TAILQ_FIRST(&txn->kids)) != NULL) + if ((ret = __txn_commit(kid, flags)) != 0) + while ((kid = TAILQ_FIRST(&txn->kids)) != NULL) + if ((t_ret = __txn_abort(kid)) != 0) + return (__env_panic(env, t_ret)); + + /* + * If there are any log records, write a log record and sync the log, + * else do no log writes. If the commit is for a child transaction, + * we do not need to commit the child synchronously since it may still + * abort (if its parent aborts), and otherwise its parent or ultimate + * ancestor will write synchronously. + */ + ZERO_LSN(token_lsn); + if (DBENV_LOGGING(env) && (!IS_ZERO_LSN(td->last_lsn) || + STAILQ_FIRST(&txn->logs) != NULL)) { + if (txn->parent == NULL) { + /* + * We are about to free all the read locks for this + * transaction below. Some of those locks might be + * handle locks which should not be freed, because + * they will be freed when the handle is closed. Check + * the events and preprocess any trades now so we don't + * release the locks below. + */ + if ((ret = + __txn_doevents(env, txn, TXN_COMMIT, 1)) != 0) + goto err; + + memset(&request, 0, sizeof(request)); + if (LOCKING_ON(env)) { + request.op = DB_LOCK_PUT_READ; + if (IS_REP_MASTER(env) && + !IS_ZERO_LSN(td->last_lsn)) { + memset(&list_dbt, 0, sizeof(list_dbt)); + request.obj = &list_dbt; + } + ret = __lock_vec(env, + txn->locker, 0, &request, 1, NULL); + } + + if (ret == 0 && !IS_ZERO_LSN(td->last_lsn)) { + ret = __txn_flush_fe_files(txn); + if (ret == 0) + ret = __txn_regop_log(env, txn, + &td->visible_lsn, LOG_FLAGS(txn), + TXN_COMMIT, + (int32_t)time(NULL), id, + request.obj); + if (ret == 0) + token_lsn = td->last_lsn = + td->visible_lsn; +#ifdef DIAGNOSTIC + if (ret == 0) { + DB_LSN s_lsn; + + DB_ASSERT(env, __log_current_lsn_int( + env, &s_lsn, NULL, NULL) == 0); + DB_ASSERT(env, LOG_COMPARE( + &td->visible_lsn, &s_lsn) <= 0); + COMPQUIET(s_lsn.file, 0); + } +#endif + } + + if (request.obj != NULL && request.obj->data != NULL) + __os_free(env, request.obj->data); + if (ret != 0) + goto err; + } else { + /* Log the commit in the parent! */ + if (!IS_ZERO_LSN(td->last_lsn) && + (ret = __txn_child_log(env, txn->parent, + &((TXN_DETAIL *)txn->parent->td)->last_lsn, + 0, txn->txnid, &td->last_lsn)) != 0) { + goto err; + } + if (STAILQ_FIRST(&txn->logs) != NULL) { + /* + * Put the child first so we back it out first. + * All records are undone in reverse order. + */ + STAILQ_CONCAT(&txn->logs, &txn->parent->logs); + txn->parent->logs = txn->logs; + STAILQ_INIT(&txn->logs); + } + + F_SET(txn->parent, TXN_CHILDCOMMIT); + } + } + if (txn->token_buffer != NULL && ret == 0 && DBENV_LOGGING(env)) + __txn_build_token(txn, &token_lsn); + + if (txn->txn_list != NULL) { + __db_txnlist_end(env, txn->txn_list); + txn->txn_list = NULL; + } + + if (ret != 0) + goto err; + + /* + * Check for master leases at the end of only a normal commit. + * If we're a child, that is not a perm record. If we are a + * master and cannot get valid leases now, something happened + * during the commit. The only thing to do is panic. + */ + if (txn->parent == NULL && IS_REP_MASTER(env) && + IS_USING_LEASES(env) && !F_ISSET(txn, TXN_IGNORE_LEASE) && + (ret = __rep_lease_check(env, 1)) != 0) + return (__env_panic(env, ret)); + + /* + * This is here rather than in __txn_end because __txn_end is + * called too late during abort. So commit and abort each + * call it independently. + */ + __txn_reset_fe_watermarks(txn); + + /* This is OK because __txn_end can only fail with a panic. */ + return (__txn_end(txn, 1)); + +err: /* + * If we are prepared, then we "must" be able to commit. We panic here + * because even though the coordinator might be able to retry it is not + * clear it would know to do that. Otherwise we'll try to abort. If + * that is successful, then we return whatever was in ret (that is, the + * reason we failed). If the abort was unsuccessful, abort probably + * returned DB_RUNRECOVERY and we need to propagate that up. + */ + if (td->status == TXN_PREPARED) + return (__env_panic(env, ret)); + + if ((t_ret = __txn_abort(txn)) != 0) + ret = t_ret; + return (ret); +} + +/* + * __txn_close_cursors + * Close a transaction's registered cursors, all its cursors are + * guaranteed to be closed. + */ +static int +__txn_close_cursors(txn) + DB_TXN *txn; +{ + int ret, tret; + DBC *dbc; + + ret = tret = 0; + dbc = NULL; + + if (txn == NULL) + return (0); + + while ((dbc = TAILQ_FIRST(&txn->my_cursors)) != NULL) { + + DB_ASSERT(dbc->env, txn == dbc->txn); + + /* + * Unregister the cursor from its transaction, regardless + * of return. + */ + TAILQ_REMOVE(&(txn->my_cursors), dbc, txn_cursors); + dbc->txn_cursors.tqe_next = NULL; + dbc->txn_cursors.tqe_prev = NULL; + + /* Removed from the active queue here. */ + if (F_ISSET(dbc, DBC_ACTIVE)) + ret = __dbc_close(dbc); + + dbc->txn = NULL; + + /* We have to close all cursors anyway, so continue on error. */ + if (ret != 0) { + __db_err(dbc->env, ret, "__dbc_close"); + if (tret == 0) + tret = ret; + } + } + txn->my_cursors.tqh_first = NULL; + txn->my_cursors.tqh_last = NULL; + + return (tret);/* Return the first error if any. */ +} + +/* + * __txn_set_commit_token -- + * Store a pointer to user's commit token buffer, for later use. + */ +static int +__txn_set_commit_token(txn, tokenp) + DB_TXN *txn; + DB_TXN_TOKEN *tokenp; +{ + ENV *env; + + env = txn->mgrp->env; + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_TXN->set_commit_token", DB_INIT_LOG); + if (txn->parent != NULL) { + __db_errx(env, DB_STR("4526", + "commit token unavailable for nested txn")); + return (EINVAL); + } + if (IS_REP_CLIENT(env)) { + __db_errx(env, DB_STR("4527", + "may not be called on a replication client")); + return (EINVAL); + } + + txn->token_buffer = tokenp; + +#ifdef DIAGNOSTIC + /* + * Applications may rely on the contents of the token buffer becoming + * valid only after a successful commit(). So it is not strictly + * necessary to initialize the buffer here. But in case they get + * confused we initialize it here to a recognizably invalid value. + */ + memset(tokenp, 0, DB_TXN_TOKEN_SIZE); +#endif + + return (0); +} + +/* + * __txn_build_token -- + * Stash a token describing the committing transaction into the buffer + * previously designated by the user. Called only in the case where the user + * has indeed supplied a buffer address. + */ +static void +__txn_build_token(txn, lsnp) + DB_TXN *txn; + DB_LSN *lsnp; +{ + ENV *env; + REGENV *renv; + u_int8_t *bp; + u_int32_t gen, version; + + bp = txn->token_buffer->buf; + env = txn->mgrp->env; + renv = env->reginfo->primary; + + /* Marshal the information into external form. */ + version = REP_COMMIT_TOKEN_FMT_VERSION; + gen = REP_ON(env) ? env->rep_handle->region->gen : 0; + DB_HTONL_COPYOUT(env, bp, version); + DB_HTONL_COPYOUT(env, bp, gen); + DB_HTONL_COPYOUT(env, bp, renv->envid); + DB_HTONL_COPYOUT(env, bp, lsnp->file); + DB_HTONL_COPYOUT(env, bp, lsnp->offset); +} + +/* + * __txn_abort_pp -- + * Interface routine to TXN->abort. + */ +static int +__txn_abort_pp(txn) + DB_TXN *txn; +{ + DB_THREAD_INFO *ip; + ENV *env; + int rep_check, ret, t_ret; + + env = txn->mgrp->env; + rep_check = IS_ENV_REPLICATED(env) && + txn->parent == NULL && IS_REAL_TXN(txn); + + ENV_ENTER(env, ip); + ret = __txn_abort(txn); + if (rep_check && (t_ret = __op_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __txn_abort -- + * Abort a transaction. + * + * PUBLIC: int __txn_abort __P((DB_TXN *)); + */ +int +__txn_abort(txn) + DB_TXN *txn; +{ + DB_LOCKREQ request; + DB_TXN *kid; + ENV *env; + REGENV *renv; + REGINFO *infop; + TXN_DETAIL *td; + u_int32_t id; + int ret; + + env = txn->mgrp->env; + td = txn->td; + /* + * Do not abort an XA transaction if another process is still using + * it, however make sure that it is aborted when the last process + * tries to abort it. + */ + if (txn->xa_thr_status != TXN_XA_THREAD_NOTA && td->xa_ref > 1) { + td->status = TXN_NEED_ABORT; + return (0); + } + + PERFMON1(env, txn, abort, txn->txnid); + /* + * Close registered cursors before the abort. Even if the call fails, + * all cursors are closed. + */ + if ((ret = __txn_close_cursors(txn)) != 0) + return (__env_panic(env, ret)); + + /* Ensure that abort always fails fatally. */ + if ((ret = __txn_isvalid(txn, TXN_OP_ABORT)) != 0) + return (__env_panic(env, ret)); + + /* + * Clear the watermarks now. Can't do this in __txn_end because + * __db_refresh, called from undo, will free the DB_MPOOLFILEs. + */ + __txn_reset_fe_watermarks(txn); + + /* + * Try to abort any unresolved children. + * + * Abort either succeeds or panics the region. As soon as we + * see any failure, we just get out of here and return the panic + * up. + */ + while ((kid = TAILQ_FIRST(&txn->kids)) != NULL) + if ((ret = __txn_abort(kid)) != 0) + return (ret); + + infop = env->reginfo; + renv = infop->primary; + /* + * No mutex is needed as envid is read-only once it is set. + */ + id = renv->envid; + + /* + * Fast path -- no need to do anything fancy if there were no + * modifications (e.g., log records) for this transaction. + * We still call txn_undo to cleanup the txn_list from our + * children. + */ + if (IS_ZERO_LSN(td->last_lsn) && STAILQ_FIRST(&txn->logs) == NULL) { + if (txn->txn_list == NULL) + goto done; + else + goto undo; + } + + if (LOCKING_ON(env)) { + /* Allocate a locker for this restored txn if necessary. */ + if (txn->locker == NULL && + (ret = __lock_getlocker(env->lk_handle, + txn->txnid, 1, &txn->locker)) != 0) + return (__env_panic(env, ret)); + /* + * We are about to free all the read locks for this transaction + * below. Some of those locks might be handle locks which + * should not be freed, because they will be freed when the + * handle is closed. Check the events and preprocess any + * trades now so that we don't release the locks below. + */ + if ((ret = __txn_doevents(env, txn, TXN_ABORT, 1)) != 0) + return (__env_panic(env, ret)); + + /* Turn off timeouts. */ + if ((ret = __lock_set_timeout(env, + txn->locker, 0, DB_SET_TXN_TIMEOUT)) != 0) + return (__env_panic(env, ret)); + + if ((ret = __lock_set_timeout(env, + txn->locker, 0, DB_SET_LOCK_TIMEOUT)) != 0) + return (__env_panic(env, ret)); + + request.op = DB_LOCK_UPGRADE_WRITE; + request.obj = NULL; + if ((ret = __lock_vec( + env, txn->locker, 0, &request, 1, NULL)) != 0) + return (__env_panic(env, ret)); + } +undo: if ((ret = __txn_undo(txn)) != 0) + return (__env_panic(env, ret)); + + /* + * Normally, we do not need to log aborts. However, if we + * are a distributed transaction (i.e., we have a prepare), + * then we log the abort so we know that this transaction + * was actually completed. + */ +done: if (DBENV_LOGGING(env) && td->status == TXN_PREPARED && + (ret = __txn_regop_log(env, txn, &td->last_lsn, + LOG_FLAGS(txn), TXN_ABORT, (int32_t)time(NULL), id, NULL)) != 0) + return (__env_panic(env, ret)); + + /* __txn_end always panics if it errors, so pass the return along. */ + return (__txn_end(txn, 0)); +} + +/* + * __txn_discard -- + * Interface routine to TXN->discard. + */ +static int +__txn_discard(txn, flags) + DB_TXN *txn; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int rep_check, ret, t_ret; + + env = txn->mgrp->env; + rep_check = IS_ENV_REPLICATED(env) && + txn->parent == NULL && IS_REAL_TXN(txn); + + ENV_ENTER(env, ip); + ret = __txn_discard_int(txn, flags); + if (rep_check && (t_ret = __op_rep_exit(env)) != 0 && ret == 0) + ret = t_ret; + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __txn_discard -- + * Free the per-process resources associated with this txn handle. + * + * PUBLIC: int __txn_discard_int __P((DB_TXN *, u_int32_t flags)); + */ +int +__txn_discard_int(txn, flags) + DB_TXN *txn; + u_int32_t flags; +{ + DB_TXNMGR *mgr; + ENV *env; + int ret; + + COMPQUIET(flags, 0); + + mgr = txn->mgrp; + env = mgr->env; + + /* Close registered cursors. */ + if ((ret = __txn_close_cursors(txn)) != 0) + return (ret); + + if ((ret = __txn_isvalid(txn, TXN_OP_DISCARD)) != 0) + return (ret); + + /* Should be no children. */ + DB_ASSERT(env, TAILQ_FIRST(&txn->kids) == NULL); + + /* Free the space. */ + MUTEX_LOCK(env, mgr->mutex); + mgr->n_discards++; + if (F_ISSET(txn, TXN_MALLOC)) { + TAILQ_REMOVE(&mgr->txn_chain, txn, links); + } + MUTEX_UNLOCK(env, mgr->mutex); + if (F_ISSET(txn, TXN_MALLOC) && + txn->xa_thr_status != TXN_XA_THREAD_ASSOCIATED) + __os_free(env, txn); + + return (0); +} + +/* + * __txn_prepare -- + * Flush the log so a future commit is guaranteed to succeed. + * + * PUBLIC: int __txn_prepare __P((DB_TXN *, u_int8_t *)); + */ +int +__txn_prepare(txn, gid) + DB_TXN *txn; + u_int8_t *gid; +{ + DBT list_dbt, gid_dbt; + DB_LOCKREQ request; + DB_THREAD_INFO *ip; + DB_TXN *kid; + ENV *env; + TXN_DETAIL *td; + u_int32_t lflags; + int ret; + + env = txn->mgrp->env; + td = txn->td; + PERFMON2(env, txn, prepare, txn->txnid, gid); + DB_ASSERT(env, txn->xa_thr_status == TXN_XA_THREAD_NOTA || + td->xa_ref == 1); + ENV_ENTER(env, ip); + + /* Close registered cursors. */ + if ((ret = __txn_close_cursors(txn)) != 0) + goto err; + + if ((ret = __txn_isvalid(txn, TXN_OP_PREPARE)) != 0) + goto err; + if (F_ISSET(txn, TXN_DEADLOCK)) { + ret = __db_txn_deadlock_err(env, txn); + goto err; + } + + /* Commit any unresolved children. */ + while ((kid = TAILQ_FIRST(&txn->kids)) != NULL) + if ((ret = __txn_commit(kid, DB_TXN_NOSYNC)) != 0) + goto err; + + /* We must set the global transaction ID here. */ + memcpy(td->gid, gid, DB_GID_SIZE); + if ((ret = __txn_doevents(env, txn, TXN_PREPARE, 1)) != 0) + goto err; + memset(&request, 0, sizeof(request)); + if (LOCKING_ON(env)) { + request.op = DB_LOCK_PUT_READ; + if (!IS_ZERO_LSN(td->last_lsn)) { + memset(&list_dbt, 0, sizeof(list_dbt)); + request.obj = &list_dbt; + } + if ((ret = __lock_vec(env, + txn->locker, 0, &request, 1, NULL)) != 0) + goto err; + + } + if (DBENV_LOGGING(env)) { + memset(&gid_dbt, 0, sizeof(gid)); + gid_dbt.data = gid; + gid_dbt.size = DB_GID_SIZE; + lflags = DB_LOG_COMMIT | DB_FLUSH; + if ((ret = __txn_prepare_log(env, + txn, &td->last_lsn, lflags, TXN_PREPARE, + &gid_dbt, &td->begin_lsn, request.obj)) != 0) + __db_err(env, ret, DB_STR("4528", + "DB_TXN->prepare: log_write failed")); + + if (request.obj != NULL && request.obj->data != NULL) + __os_free(env, request.obj->data); + if (ret != 0) + goto err; + + } + + MUTEX_LOCK(env, txn->mgrp->mutex); + td->status = TXN_PREPARED; + MUTEX_UNLOCK(env, txn->mgrp->mutex); +err: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __txn_id -- + * Return the transaction ID. + * + * PUBLIC: u_int32_t __txn_id __P((DB_TXN *)); + */ +u_int32_t +__txn_id(txn) + DB_TXN *txn; +{ + return (txn->txnid); +} + +/* + * __txn_get_name -- + * Get a descriptive string from a transaction. + * + * PUBLIC: int __txn_get_name __P((DB_TXN *, const char **)); + */ +int +__txn_get_name(txn, namep) + DB_TXN *txn; + const char **namep; +{ + *namep = txn->name; + + return (0); +} + +/* + * __txn_set_name -- + * Set a descriptive string for a transaction. + * + * PUBLIC: int __txn_set_name __P((DB_TXN *, const char *)); + */ +int +__txn_set_name(txn, name) + DB_TXN *txn; + const char *name; +{ + DB_THREAD_INFO *ip; + DB_TXNMGR *mgr; + ENV *env; + TXN_DETAIL *td; + size_t len; + int ret; + char *p; + + mgr = txn->mgrp; + env = mgr->env; + td = txn->td; + len = strlen(name) + 1; + + if ((ret = __os_realloc(env, len, &txn->name)) != 0) + return (ret); + memcpy(txn->name, name, len); + + ENV_ENTER(env, ip); + TXN_SYSTEM_LOCK(env); + if (td->name != INVALID_ROFF) { + __env_alloc_free( + &mgr->reginfo, R_ADDR(&mgr->reginfo, td->name)); + td->name = INVALID_ROFF; + } + if ((ret = __env_alloc(&mgr->reginfo, len, &p)) != 0) { + TXN_SYSTEM_UNLOCK(env); + __db_errx(env, DB_STR("4529", + "Unable to allocate memory for transaction name")); + + __os_free(env, txn->name); + txn->name = NULL; + + ENV_LEAVE(env, ip); + return (ret); + } + TXN_SYSTEM_UNLOCK(env); + td->name = R_OFFSET(&mgr->reginfo, p); + memcpy(p, name, len); + +#ifdef DIAGNOSTIC + /* + * If DIAGNOSTIC is set, map the name into the log so users can track + * operations through the log. + */ + if (DBENV_LOGGING(env)) + (void)__log_printf(env, txn, "transaction %#lx named %s", + (u_long)txn->txnid, name); +#endif + + ENV_LEAVE(env, ip); + return (0); +} + +/* + * __txn_get_priority -- + * Get a transaction's priority level + * PUBLIC: int __txn_get_priority __P((DB_TXN *, u_int32_t *)); + */ +int +__txn_get_priority(txn, priorityp) + DB_TXN *txn; + u_int32_t *priorityp; +{ + if (txn->locker == NULL) + return EINVAL; + + *priorityp = txn->locker->priority; + return (0); +} + +/* + * __txn_set_priority -- + * Assign a transaction a priority level + * PUBLIC: int __txn_set_priority __P((DB_TXN *, u_int32_t)); + */ +int +__txn_set_priority(txn, priority) + DB_TXN *txn; + u_int32_t priority; +{ + if (txn->locker == NULL) + return EINVAL; + + txn->locker->priority = priority; + ((TXN_DETAIL *)txn->td)->priority = priority; + + return (0); +} + +/* + * __txn_set_timeout -- + * ENV->set_txn_timeout. + * PUBLIC: int __txn_set_timeout __P((DB_TXN *, db_timeout_t, u_int32_t)); + */ +int +__txn_set_timeout(txn, timeout, op) + DB_TXN *txn; + db_timeout_t timeout; + u_int32_t op; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = txn->mgrp->env; + + if (op != DB_SET_TXN_TIMEOUT && op != DB_SET_LOCK_TIMEOUT) + return (__db_ferr(env, "DB_TXN->set_timeout", 0)); + + ENV_ENTER(env, ip); + ret = __lock_set_timeout( env, txn->locker, timeout, op); + ENV_LEAVE(txn->mgrp->env, ip); + return (ret); +} + +/* + * __txn_isvalid -- + * Return 0 if the DB_TXN is reasonable, otherwise panic. + */ +static int +__txn_isvalid(txn, op) + const DB_TXN *txn; + txnop_t op; +{ + DB_TXNMGR *mgr; + DB_TXNREGION *region; + ENV *env; + TXN_DETAIL *td; + + mgr = txn->mgrp; + env = mgr->env; + region = mgr->reginfo.primary; + + /* Check for recovery. */ + if (!F_ISSET(txn, TXN_COMPENSATE) && + F_ISSET(region, TXN_IN_RECOVERY)) { + __db_errx(env, DB_STR("4530", + "operation not permitted during recovery")); + goto err; + } + + /* Check for live cursors. */ + if (txn->cursors != 0) { + __db_errx(env, DB_STR("4531", + "transaction has active cursors")); + goto err; + } + + /* Check transaction's state. */ + td = txn->td; + + /* Handle any operation specific checks. */ + switch (op) { + case TXN_OP_DISCARD: + /* + * Since we're just tossing the per-process space; there are + * a lot of problems with the transaction that we can tolerate. + */ + + /* Transaction is already been reused. */ + if (txn->txnid != td->txnid) + return (0); + + /* + * What we've got had better be either a prepared or + * restored transaction. + */ + if (td->status != TXN_PREPARED && + !F_ISSET(td, TXN_DTL_RESTORED)) { + __db_errx(env, DB_STR("4532", + "not a restored transaction")); + return (__env_panic(env, EINVAL)); + } + + return (0); + case TXN_OP_PREPARE: + if (txn->parent != NULL) { + /* + * This is not fatal, because you could imagine an + * application that simply prepares everybody because + * it doesn't distinguish between children and parents. + * I'm not arguing this is good, but I could imagine + * someone doing it. + */ + __db_errx(env, DB_STR("4533", + "Prepare disallowed on child transactions")); + return (EINVAL); + } + break; + case TXN_OP_ABORT: + case TXN_OP_COMMIT: + default: + break; + } + + switch (td->status) { + case TXN_PREPARED: + if (op == TXN_OP_PREPARE) { + __db_errx(env, DB_STR("4534", + "transaction already prepared")); + /* + * Txn_prepare doesn't blow away the user handle, so + * in this case, give the user the opportunity to + * abort or commit. + */ + return (EINVAL); + } + break; + case TXN_RUNNING: + case TXN_NEED_ABORT: + break; + case TXN_ABORTED: + case TXN_COMMITTED: + default: + __db_errx(env, DB_STR_A("4535", + "transaction already %s", "%s"), + td->status == TXN_COMMITTED ? + DB_STR_P("committed") : DB_STR_P("aborted")); + goto err; + } + + return (0); + +err: /* + * If there's a serious problem with the transaction, panic. TXN + * handles are dead by definition when we return, and if you use + * a cursor you forgot to close, we have no idea what will happen. + */ + return (__env_panic(env, EINVAL)); +} + +/* + * __txn_end -- + * Internal transaction end routine. + */ +static int +__txn_end(txn, is_commit) + DB_TXN *txn; + int is_commit; +{ + DB_LOCKREQ request; + DB_TXNLOGREC *lr; + DB_TXNMGR *mgr; + DB_TXNREGION *region; + ENV *env; + TXN_DETAIL *ptd, *td; + db_mutex_t mvcc_mtx; + int do_closefiles, ret; + + mgr = txn->mgrp; + env = mgr->env; + region = mgr->reginfo.primary; + do_closefiles = 0; + + /* Process commit events. */ + if ((ret = __txn_doevents(env, + txn, is_commit ? TXN_COMMIT : TXN_ABORT, 0)) != 0) + return (__env_panic(env, ret)); + + /* End the transaction. */ + td = txn->td; + if (td->nlog_dbs != 0 && + (ret = __txn_dref_fname(env, txn)) != 0 && ret != EIO) + return (__env_panic(env, ret)); + + if (td->mvcc_ref != 0 && IS_MAX_LSN(td->visible_lsn)) { + /* + * Some pages were dirtied but nothing was logged. This can + * happen easily if we are aborting, but there are also cases + * in the compact code where pages are dirtied unconditionally + * and then we find out that there is no work to do. + * + * We need to make sure that the versions become visible to + * future transactions. We need to set visible_lsn before + * setting td->status to ensure safe reads of visible_lsn in + * __memp_fget. + */ + if ((ret = __log_current_lsn_int(env, &td->visible_lsn, + NULL, NULL)) != 0) + return (__env_panic(env, ret)); + } + + /* + * Release the locks. + * + * __txn_end cannot return an simple error, we MUST return + * success/failure from commit or abort, ignoring any internal + * errors. So, we panic if something goes wrong. We can't + * deadlock here because we're not acquiring any new locks, + * so DB_LOCK_DEADLOCK is just as fatal as any other error. + */ + if (LOCKING_ON(env)) { + /* Allocate a locker for this restored txn if necessary. */ + if (txn->locker == NULL && + (ret = __lock_getlocker(env->lk_handle, + txn->txnid, 1, &txn->locker)) != 0) + return (__env_panic(env, ret)); + request.op = txn->parent == NULL || + is_commit == 0 ? DB_LOCK_PUT_ALL : DB_LOCK_INHERIT; + request.obj = NULL; + if ((ret = __lock_vec(env, + txn->locker, 0, &request, 1, NULL)) != 0) + return (__env_panic(env, ret)); + } + + TXN_SYSTEM_LOCK(env); + td->status = is_commit ? TXN_COMMITTED : TXN_ABORTED; + SH_TAILQ_REMOVE(®ion->active_txn, td, links, __txn_detail); + region->curtxns--; + if (F_ISSET(td, TXN_DTL_RESTORED)) { + region->stat.st_nrestores--; + do_closefiles = region->stat.st_nrestores == 0; + } + + if (td->name != INVALID_ROFF) { + __env_alloc_free(&mgr->reginfo, + R_ADDR(&mgr->reginfo, td->name)); + td->name = INVALID_ROFF; + } + if (td->nlog_slots != TXN_NSLOTS) + __env_alloc_free(&mgr->reginfo, + R_ADDR(&mgr->reginfo, td->log_dbs)); + + if (txn->parent != NULL) { + ptd = txn->parent->td; + SH_TAILQ_REMOVE(&ptd->kids, td, klinks, __txn_detail); + } else if ((mvcc_mtx = td->mvcc_mtx) != MUTEX_INVALID) { + MUTEX_LOCK(env, mvcc_mtx); + if (td->mvcc_ref != 0) { + SH_TAILQ_INSERT_HEAD(®ion->mvcc_txn, + td, links, __txn_detail); + + /* + * The transaction has been added to the list of + * committed snapshot transactions with active pages. + * It needs to be freed when the last page is evicted. + */ + F_SET(td, TXN_DTL_SNAPSHOT); +#ifdef HAVE_STATISTICS + STAT_INC(env, txn, + nsnapshot, region->stat.st_nsnapshot, txn->txnid); + if (region->stat.st_nsnapshot > + region->stat.st_maxnsnapshot) + STAT_SET(env, txn, maxnsnapshot, + region->stat.st_maxnsnapshot, + region->stat.st_nsnapshot, + txn->txnid); +#endif + td = NULL; + } + MUTEX_UNLOCK(env, mvcc_mtx); + if (td != NULL) + if ((ret = __mutex_free(env, &td->mvcc_mtx)) != 0) + return (__env_panic(env, ret)); + } + + if (td != NULL) + __env_alloc_free(&mgr->reginfo, td); + +#ifdef HAVE_STATISTICS + if (is_commit) + STAT_INC(env, + txn, ncommits, region->stat.st_ncommits, txn->txnid); + else + STAT_INC(env, + txn, naborts, region->stat.st_naborts, txn->txnid); + STAT_DEC(env, txn, nactive, region->stat.st_nactive, txn->txnid); +#endif + + /* Increment bulk transaction counter while holding transaction lock. */ + if (F_ISSET(txn, TXN_BULK)) + ((DB_TXNREGION *)env->tx_handle->reginfo.primary)->n_bulk_txn--; + + TXN_SYSTEM_UNLOCK(env); + + /* + * The transaction cannot get more locks, remove its locker info, + * if any. + */ + if (LOCKING_ON(env) && (ret = + __lock_freelocker(env->lk_handle, txn->locker)) != 0) + return (__env_panic(env, ret)); + if (txn->parent != NULL) + TAILQ_REMOVE(&txn->parent->kids, txn, klinks); + + /* Free the space. */ + while ((lr = STAILQ_FIRST(&txn->logs)) != NULL) { + STAILQ_REMOVE(&txn->logs, lr, __txn_logrec, links); + __os_free(env, lr); + } + if (txn->name != NULL) { + __os_free(env, txn->name); + txn->name = NULL; + } + + /* + * Free the transaction structure if we allocated it and if we are + * not in an XA transaction that will be freed when we exit the XA + * wrapper routines. + */ + if (F_ISSET(txn, TXN_MALLOC) && + txn->xa_thr_status != TXN_XA_THREAD_ASSOCIATED) { + MUTEX_LOCK(env, mgr->mutex); + TAILQ_REMOVE(&mgr->txn_chain, txn, links); + MUTEX_UNLOCK(env, mgr->mutex); + + __os_free(env, txn); + } + + if (do_closefiles) { + /* + * Otherwise, we have resolved the last outstanding prepared + * txn and need to invalidate the fileids that were left + * open for those txns and then close them. + */ + (void)__dbreg_invalidate_files(env, 1); + (void)__dbreg_close_files(env, 1); + if (IS_REP_MASTER(env)) + F_CLR(env->rep_handle, DBREP_OPENFILES); + F_CLR(env->lg_handle, DBLOG_OPENFILES); + mgr->n_discards = 0; + (void)__txn_checkpoint(env, 0, 0, + DB_CKP_INTERNAL | DB_FORCE); + } + + return (0); +} + +static int +__txn_dispatch_undo(env, txn, rdbt, key_lsn, txnlist) + ENV *env; + DB_TXN *txn; + DBT *rdbt; + DB_LSN *key_lsn; + DB_TXNHEAD *txnlist; +{ + int ret; + + txnlist->td = txn->td; + ret = __db_dispatch(env, &env->recover_dtab, + rdbt, key_lsn, DB_TXN_ABORT, txnlist); + if (ret == DB_SURPRISE_KID) { + F_SET(txn, TXN_CHILDCOMMIT); + ret = 0; + } + if (ret == 0 && F_ISSET(txn, TXN_CHILDCOMMIT) && IS_ZERO_LSN(*key_lsn)) + ret = __db_txnlist_lsnget(env, txnlist, key_lsn, 0); + + return (ret); +} + +/* + * __txn_undo -- + * Undo the transaction with id txnid. + */ +static int +__txn_undo(txn) + DB_TXN *txn; +{ + DBT rdbt; + DB_LOGC *logc; + DB_LSN key_lsn; + DB_TXN *ptxn; + DB_TXNHEAD *txnlist; + DB_TXNLOGREC *lr; + DB_TXNMGR *mgr; + ENV *env; + int ret, t_ret; + + mgr = txn->mgrp; + env = mgr->env; + logc = NULL; + txnlist = NULL; + ret = 0; + + if (!LOGGING_ON(env)) + return (0); + + /* + * This is the simplest way to code this, but if the mallocs during + * recovery turn out to be a performance issue, we can do the + * allocation here and use DB_DBT_USERMEM. + */ + memset(&rdbt, 0, sizeof(rdbt)); + + /* + * Allocate a txnlist for children and aborted page allocs. + * We need to associate the list with the maximal parent + * so that aborted pages are recovered when that transaction + * is committed or aborted. + */ + for (ptxn = txn->parent; ptxn != NULL && ptxn->parent != NULL;) + ptxn = ptxn->parent; + + if (ptxn != NULL && ptxn->txn_list != NULL) + txnlist = ptxn->txn_list; + else if (txn->txn_list != NULL) + txnlist = txn->txn_list; + else if ((ret = __db_txnlist_init(env, + txn->thread_info, 0, 0, NULL, &txnlist)) != 0) + return (ret); + else if (ptxn != NULL) + ptxn->txn_list = txnlist; + + /* + * Take log records from the linked list stored in the transaction, + * then from the log. + */ + STAILQ_FOREACH(lr, &txn->logs, links) { + rdbt.data = lr->data; + rdbt.size = 0; + LSN_NOT_LOGGED(key_lsn); + ret = + __txn_dispatch_undo(env, txn, &rdbt, &key_lsn, txnlist); + if (ret != 0) { + __db_err(env, ret, DB_STR("4536", + "DB_TXN->abort: in-memory log undo failed")); + goto err; + } + } + + key_lsn = ((TXN_DETAIL *)txn->td)->last_lsn; + + if (!IS_ZERO_LSN(key_lsn) && + (ret = __log_cursor(env, &logc)) != 0) + goto err; + + while (!IS_ZERO_LSN(key_lsn)) { + /* + * The dispatch routine returns the lsn of the record + * before the current one in the key_lsn argument. + */ + if ((ret = __logc_get(logc, &key_lsn, &rdbt, DB_SET)) == 0) { + ret = __txn_dispatch_undo(env, + txn, &rdbt, &key_lsn, txnlist); + } + + if (ret != 0) { + __db_err(env, ret, DB_STR_A("4537", + "DB_TXN->abort: log undo failed for LSN: %lu %lu", + "%lu %lu"), (u_long)key_lsn.file, + (u_long)key_lsn.offset); + goto err; + } + } + +err: if (logc != NULL && (t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + + if (ptxn == NULL && txnlist != NULL) + __db_txnlist_end(env, txnlist); + return (ret); +} + +/* + * __txn_activekids -- + * Return if this transaction has any active children. + * + * PUBLIC: int __txn_activekids __P((ENV *, u_int32_t, DB_TXN *)); + */ +int +__txn_activekids(env, rectype, txn) + ENV *env; + u_int32_t rectype; + DB_TXN *txn; +{ + /* + * On a child commit, we know that there are children (i.e., the + * committing child at the least. In that case, skip this check. + */ + if (F_ISSET(txn, TXN_COMPENSATE) || rectype == DB___txn_child) + return (0); + + if (TAILQ_FIRST(&txn->kids) != NULL) { + __db_errx(env, DB_STR("4538", + "Child transaction is active")); + return (EPERM); + } + return (0); +} + +/* + * __txn_force_abort -- + * Force an abort record into the log if the commit record + * failed to get to disk. + * + * PUBLIC: int __txn_force_abort __P((ENV *, u_int8_t *)); + */ +int +__txn_force_abort(env, buffer) + ENV *env; + u_int8_t *buffer; +{ + DB_CIPHER *db_cipher; + HDR hdr, *hdrp; + u_int32_t offset, opcode, sum_len; + u_int8_t *bp, *key; + size_t hdrsize, rec_len; + int ret; + + db_cipher = env->crypto_handle; + + /* + * This routine depends on the layout of HDR and the __txn_regop + * record in txn.src. We are passed the beginning of the commit + * record in the log buffer and overwrite the commit with an abort + * and recalculate the checksum. + */ + hdrsize = CRYPTO_ON(env) ? HDR_CRYPTO_SZ : HDR_NORMAL_SZ; + + hdrp = (HDR *)buffer; + memcpy(&hdr.prev, buffer + SSZ(HDR, prev), sizeof(hdr.prev)); + memcpy(&hdr.len, buffer + SSZ(HDR, len), sizeof(hdr.len)); + if (LOG_SWAPPED(env)) + __log_hdrswap(&hdr, CRYPTO_ON(env)); + rec_len = hdr.len - hdrsize; + + offset = sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN); + if (CRYPTO_ON(env)) { + key = db_cipher->mac_key; + sum_len = DB_MAC_KEY; + if ((ret = db_cipher->decrypt(env, db_cipher->data, + &hdrp->iv[0], buffer + hdrsize, rec_len)) != 0) + return (__env_panic(env, ret)); + } else { + key = NULL; + sum_len = sizeof(u_int32_t); + } + bp = buffer + hdrsize + offset; + opcode = TXN_ABORT; + LOGCOPY_32(env, bp, &opcode); + + if (CRYPTO_ON(env) && + (ret = db_cipher->encrypt(env, + db_cipher->data, &hdrp->iv[0], buffer + hdrsize, rec_len)) != 0) + return (__env_panic(env, ret)); + +#ifdef HAVE_LOG_CHECKSUM + __db_chksum(&hdr, buffer + hdrsize, rec_len, key, NULL); + if (LOG_SWAPPED(env)) + __log_hdrswap(&hdr, CRYPTO_ON(env)); + memcpy(buffer + SSZA(HDR, chksum), hdr.chksum, sum_len); +#endif + + return (0); +} + +/* + * __txn_preclose -- + * Before we can close an environment, we need to check if we were in the + * middle of taking care of restored transactions. If so, close the files + * we opened. + * + * PUBLIC: int __txn_preclose __P((ENV *)); + */ +int +__txn_preclose(env) + ENV *env; +{ + DB_TXNMGR *mgr; + DB_TXNREGION *region; + int do_closefiles, ret; + + mgr = env->tx_handle; + region = mgr->reginfo.primary; + do_closefiles = 0; + + TXN_SYSTEM_LOCK(env); + if (region != NULL && + region->stat.st_nrestores <= mgr->n_discards && + mgr->n_discards != 0) + do_closefiles = 1; + TXN_SYSTEM_UNLOCK(env); + + if (do_closefiles) { + /* + * Set the DBLOG_RECOVER flag while closing these files so they + * do not create additional log records that will confuse future + * recoveries. + */ + F_SET(env->lg_handle, DBLOG_RECOVER); + ret = __dbreg_close_files(env, 0); + F_CLR(env->lg_handle, DBLOG_RECOVER); + } else + ret = 0; + + return (ret); +} + +/* + * __txn_reset -- + * Reset the last txnid to its minimum value, and log the reset. + * + * PUBLIC: int __txn_reset __P((ENV *)); + */ +int +__txn_reset(env) + ENV *env; +{ + DB_LSN scrap; + DB_TXNREGION *region; + + region = env->tx_handle->reginfo.primary; + region->last_txnid = TXN_MINIMUM; + + DB_ASSERT(env, LOGGING_ON(env)); + return (__txn_recycle_log(env, + NULL, &scrap, 0, TXN_MINIMUM, TXN_MAXIMUM)); +} + +/* + * txn_set_txn_lsnp -- + * Set the pointer to the begin_lsn field if that field is zero. + * Set the pointer to the last_lsn field. + */ +static void +__txn_set_txn_lsnp(txn, blsnp, llsnp) + DB_TXN *txn; + DB_LSN **blsnp, **llsnp; +{ + TXN_DETAIL *td; + + td = txn->td; + *llsnp = &td->last_lsn; + + while (txn->parent != NULL) + txn = txn->parent; + + td = txn->td; + if (IS_ZERO_LSN(td->begin_lsn)) + *blsnp = &td->begin_lsn; +} + +/* + * PUBLIC: int __txn_applied_pp __P((DB_ENV *, + * PUBLIC: DB_TXN_TOKEN *, db_timeout_t, u_int32_t)); + */ +int +__txn_applied_pp(dbenv, token, timeout, flags) + DB_ENV *dbenv; + DB_TXN_TOKEN *token; + db_timeout_t timeout; + u_int32_t flags; +{ + ENV *env; + DB_THREAD_INFO *ip; + DB_COMMIT_INFO commit_info; + u_int8_t *bp; + int ret; + + env = dbenv->env; + + if (flags != 0) + return (__db_ferr(env, "DB_ENV->txn_applied", 0)); + + /* Unmarshal the token from its stored form. */ + bp = token->buf; + DB_NTOHL_COPYIN(env, commit_info.version, bp); + DB_ASSERT(env, commit_info.version == REP_COMMIT_TOKEN_FMT_VERSION); + DB_NTOHL_COPYIN(env, commit_info.gen, bp); + DB_NTOHL_COPYIN(env, commit_info.envid, bp); + DB_NTOHL_COPYIN(env, commit_info.lsn.file, bp); + DB_NTOHL_COPYIN(env, commit_info.lsn.offset, bp); + + /* + * Check for a token representing a transaction that committed without + * any log records having been written. Ideally an application should + * be smart enough to avoid trying to use a token from such an "empty" + * transaction. But in some cases it might be difficult for them to + * keep track, so we don't really forbid it. + */ + if (IS_ZERO_LSN(commit_info.lsn)) + return (DB_KEYEMPTY); + + ENV_REQUIRES_CONFIG(env, + env->lg_handle, "DB_ENV->txn_applied", DB_INIT_LOG); + + ENV_ENTER(env, ip); + ret = __txn_applied(env, ip, &commit_info, timeout); + ENV_LEAVE(env, ip); + return (ret); +} + +static int +__txn_applied(env, ip, commit_info, timeout) + ENV *env; + DB_THREAD_INFO *ip; + DB_COMMIT_INFO *commit_info; + db_timeout_t timeout; +{ + LOG *lp; + DB_LSN lsn; + REGENV *renv; + + /* + * The lockout protection scope between __op_handle_enter and + * __env_db_rep_exit is handled within __rep_txn_applied, and is not + * needed here since the rest of this function only runs in a + * non-replication env. + */ + if (REP_ON(env)) + return (__rep_txn_applied(env, ip, commit_info, timeout)); + + if (commit_info->gen != 0) { + __db_errx(env, DB_STR("4539", + "replication commit token in non-replication env")); + return (EINVAL); + } + + lp = env->lg_handle->reginfo.primary; + LOG_SYSTEM_LOCK(env); + lsn = lp->lsn; + LOG_SYSTEM_UNLOCK(env); + + renv = env->reginfo->primary; + + if (renv->envid == commit_info->envid && + LOG_COMPARE(&commit_info->lsn, &lsn) <= 0) + return (0); + return (DB_NOTFOUND); +} diff --git a/src/txn/txn.src b/src/txn/txn.src new file mode 100644 index 00000000..526ab260 --- /dev/null +++ b/src/txn/txn.src @@ -0,0 +1,120 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +DBPRIVATE +PREFIX __txn + +INCLUDE #include "db_int.h" +INCLUDE #include "dbinc/crypto.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_dispatch.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/lock.h" +INCLUDE #include "dbinc/txn.h" +INCLUDE + +/* + * This is the standard log operation for commit. + * Note that we are using an int32_t for the timestamp. This means that + * in 2039 we will need to deprecate this log record and create one that + * either changes the Epoch or has a 64-bit offset. + * NOTE: The opcode MUST be the first argument in these records, because + * the force_abort code overwrites it with an ABORT should the write to + * the log fail. + * envid: + * Environment ID of this operation (4.4+). + */ +BEGIN_COMPAT regop 42 10 +ARG opcode u_int32_t lu +TIME timestamp int32_t ld +LOCKS locks DBT s +END + +BEGIN regop 44 10 +ARG opcode u_int32_t lu +TIME timestamp int32_t ld +ARG envid u_int32_t lu +LOCKS locks DBT s +END + +/* + * This is the checkpoint record. It contains the lsn that the checkpoint + * guarantees and a pointer to the last checkpoint so we can walk backwards + * by checkpoint. + * + * ckp_lsn: + * The lsn in the log of the most recent point at which all begun + * transactions have been aborted. This is the point for which + * the checkpoint is relevant. + * last_ckp: + * The previous checkpoint. + * timestamp: + * See comment in commit about timestamps. + * envid: + * Environment ID of this checkpoint (4.3+). + * rep_gen: + * Persistent replication generation number (4.2-4.5 only). + * Renamed to 'spare' in 4.6. + */ +BEGIN_COMPAT ckp 42 11 +POINTER ckp_lsn DB_LSN * lu +POINTER last_ckp DB_LSN * lu +TIME timestamp int32_t ld +ARG rep_gen u_int32_t lu +END + +BEGIN ckp 43 11 +POINTER ckp_lsn DB_LSN * lu +POINTER last_ckp DB_LSN * lu +TIME timestamp int32_t ld +ARG envid u_int32_t lu +ARG spare u_int32_t lu +END + +/* + * This is the (new) log operation for a child commit. It is + * logged as a record in the PARENT. The child field contains + * the transaction ID of the child committing and the c_lsn is + * the last LSN of the child's log trail. + */ +BEGIN child 42 12 +ARG child u_int32_t lx +POINTER c_lsn DB_LSN * lu +END + + +/* + * This is the standard log operation for prepare. + * NOTE: The opcode MUST be the first argument in these records, because + * the force_abort code overwrites it with an ABORT should the write to + * the log fail. + */ +BEGIN_COMPAT xa_regop 42 13 +ARG opcode u_int32_t lu +DBT xid DBT s +ARG formatID int32_t ld +ARG gtrid u_int32_t lu +ARG bqual u_int32_t lu +POINTER begin_lsn DB_LSN * lu +LOCKS locks DBT s +END + +BEGIN prepare 48 13 +ARG opcode u_int32_t lu +DBT gid DBT s +POINTER begin_lsn DB_LSN * lu +LOCKS locks DBT s +END + +/* + * Log the fact that we are recycling txnids. + */ +BEGIN recycle 42 14 +ARG min u_int32_t lu +ARG max u_int32_t lu +END diff --git a/src/txn/txn_auto.c b/src/txn/txn_auto.c new file mode 100644 index 00000000..926d3653 --- /dev/null +++ b/src/txn/txn_auto.c @@ -0,0 +1,93 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/txn.h" + +DB_LOG_RECSPEC __txn_regop_42_desc[] = { + {LOGREC_ARG, SSZ(__txn_regop_42_args, opcode), "opcode", "%lu"}, + {LOGREC_TIME, SSZ(__txn_regop_42_args, timestamp), "timestamp", ""}, + {LOGREC_LOCKS, SSZ(__txn_regop_42_args, locks), "locks", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __txn_regop_desc[] = { + {LOGREC_ARG, SSZ(__txn_regop_args, opcode), "opcode", "%lu"}, + {LOGREC_TIME, SSZ(__txn_regop_args, timestamp), "timestamp", ""}, + {LOGREC_ARG, SSZ(__txn_regop_args, envid), "envid", "%lu"}, + {LOGREC_LOCKS, SSZ(__txn_regop_args, locks), "locks", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __txn_ckp_42_desc[] = { + {LOGREC_POINTER, SSZ(__txn_ckp_42_args, ckp_lsn), "ckp_lsn", ""}, + {LOGREC_POINTER, SSZ(__txn_ckp_42_args, last_ckp), "last_ckp", ""}, + {LOGREC_TIME, SSZ(__txn_ckp_42_args, timestamp), "timestamp", ""}, + {LOGREC_ARG, SSZ(__txn_ckp_42_args, rep_gen), "rep_gen", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __txn_ckp_desc[] = { + {LOGREC_POINTER, SSZ(__txn_ckp_args, ckp_lsn), "ckp_lsn", ""}, + {LOGREC_POINTER, SSZ(__txn_ckp_args, last_ckp), "last_ckp", ""}, + {LOGREC_TIME, SSZ(__txn_ckp_args, timestamp), "timestamp", ""}, + {LOGREC_ARG, SSZ(__txn_ckp_args, envid), "envid", "%lu"}, + {LOGREC_ARG, SSZ(__txn_ckp_args, spare), "spare", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __txn_child_desc[] = { + {LOGREC_ARG, SSZ(__txn_child_args, child), "child", "%lx"}, + {LOGREC_POINTER, SSZ(__txn_child_args, c_lsn), "c_lsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __txn_xa_regop_42_desc[] = { + {LOGREC_ARG, SSZ(__txn_xa_regop_42_args, opcode), "opcode", "%lu"}, + {LOGREC_DBT, SSZ(__txn_xa_regop_42_args, xid), "xid", ""}, + {LOGREC_ARG, SSZ(__txn_xa_regop_42_args, formatID), "formatID", "%ld"}, + {LOGREC_ARG, SSZ(__txn_xa_regop_42_args, gtrid), "gtrid", "%lu"}, + {LOGREC_ARG, SSZ(__txn_xa_regop_42_args, bqual), "bqual", "%lu"}, + {LOGREC_POINTER, SSZ(__txn_xa_regop_42_args, begin_lsn), "begin_lsn", ""}, + {LOGREC_LOCKS, SSZ(__txn_xa_regop_42_args, locks), "locks", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __txn_prepare_desc[] = { + {LOGREC_ARG, SSZ(__txn_prepare_args, opcode), "opcode", "%lu"}, + {LOGREC_DBT, SSZ(__txn_prepare_args, gid), "gid", ""}, + {LOGREC_POINTER, SSZ(__txn_prepare_args, begin_lsn), "begin_lsn", ""}, + {LOGREC_LOCKS, SSZ(__txn_prepare_args, locks), "locks", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __txn_recycle_desc[] = { + {LOGREC_ARG, SSZ(__txn_recycle_args, min), "min", "%lu"}, + {LOGREC_ARG, SSZ(__txn_recycle_args, max), "max", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +/* + * PUBLIC: int __txn_init_recover __P((ENV *, DB_DISTAB *)); + */ +int +__txn_init_recover(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_regop_recover, DB___txn_regop)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_ckp_recover, DB___txn_ckp)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_child_recover, DB___txn_child)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_prepare_recover, DB___txn_prepare)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_recycle_recover, DB___txn_recycle)) != 0) + return (ret); + return (0); +} diff --git a/src/txn/txn_autop.c b/src/txn/txn_autop.c new file mode 100644 index 00000000..0924a401 --- /dev/null +++ b/src/txn/txn_autop.c @@ -0,0 +1,175 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/txn.h" + +/* + * PUBLIC: int __txn_regop_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_regop_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__txn_regop_42", __txn_regop_42_desc, info)); +} + +/* + * PUBLIC: int __txn_regop_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_regop_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__txn_regop", __txn_regop_desc, info)); +} + +/* + * PUBLIC: int __txn_ckp_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_ckp_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__txn_ckp_42", __txn_ckp_42_desc, info)); +} + +/* + * PUBLIC: int __txn_ckp_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_ckp_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__txn_ckp", __txn_ckp_desc, info)); +} + +/* + * PUBLIC: int __txn_child_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_child_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__txn_child", __txn_child_desc, info)); +} + +/* + * PUBLIC: int __txn_xa_regop_42_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_xa_regop_42_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__txn_xa_regop_42", __txn_xa_regop_42_desc, info)); +} + +/* + * PUBLIC: int __txn_prepare_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_prepare_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__txn_prepare", __txn_prepare_desc, info)); +} + +/* + * PUBLIC: int __txn_recycle_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__txn_recycle_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__txn_recycle", __txn_recycle_desc, info)); +} + +/* + * PUBLIC: int __txn_init_print __P((ENV *, DB_DISTAB *)); + */ +int +__txn_init_print(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_regop_print, DB___txn_regop)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_ckp_print, DB___txn_ckp)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_child_print, DB___txn_child)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_prepare_print, DB___txn_prepare)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_recycle_print, DB___txn_recycle)) != 0) + return (ret); + return (0); +} diff --git a/src/txn/txn_chkpt.c b/src/txn/txn_chkpt.c new file mode 100644 index 00000000..bffc1801 --- /dev/null +++ b/src/txn/txn_chkpt.c @@ -0,0 +1,410 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +/* + * __txn_checkpoint_pp -- + * ENV->txn_checkpoint pre/post processing. + * + * PUBLIC: int __txn_checkpoint_pp + * PUBLIC: __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t)); + */ +int +__txn_checkpoint_pp(dbenv, kbytes, minutes, flags) + DB_ENV *dbenv; + u_int32_t kbytes, minutes, flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->tx_handle, "txn_checkpoint", DB_INIT_TXN); + + /* + * On a replication client, all transactions are read-only; therefore, + * a checkpoint is a null-op. + * + * We permit txn_checkpoint, instead of just rendering it illegal, + * so that an application can just let a checkpoint thread continue + * to operate as it gets promoted or demoted between being a + * master and a client. + */ + if (IS_REP_CLIENT(env)) + return (0); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, + (__txn_checkpoint(env, kbytes, minutes, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __txn_checkpoint -- + * ENV->txn_checkpoint. + * + * PUBLIC: int __txn_checkpoint + * PUBLIC: __P((ENV *, u_int32_t, u_int32_t, u_int32_t)); + */ +int +__txn_checkpoint(env, kbytes, minutes, flags) + ENV *env; + u_int32_t kbytes, minutes, flags; +{ + DB_LOG *dblp; + DB_LSN ckp_lsn, last_ckp; + DB_TXNMGR *mgr; + DB_TXNREGION *region; + LOG *lp; + REGENV *renv; + REGINFO *infop; + time_t last_ckp_time, now; + u_int32_t bytes, id, logflags, mbytes, op; + int ret; + + ret = 0; + + /* + * A client will only call through here during recovery, + * so just sync the Mpool and go home. We want to be sure + * that since queue meta pages are not rolled back that they + * are clean in the cache prior to any transaction log + * truncation due to syncup. + */ + if (IS_REP_CLIENT(env)) { + if (MPOOL_ON(env) && + (ret = __memp_sync(env, DB_SYNC_CHECKPOINT, NULL)) != 0) { + __db_err(env, ret, DB_STR("4518", + "txn_checkpoint: failed to flush the buffer cache")); + return (ret); + } + return (0); + } + + dblp = env->lg_handle; + lp = dblp->reginfo.primary; + mgr = env->tx_handle; + region = mgr->reginfo.primary; + infop = env->reginfo; + renv = infop->primary; + /* + * No mutex is needed as envid is read-only once it is set. + */ + id = renv->envid; + + MUTEX_LOCK(env, region->mtx_ckp); + /* + * The checkpoint LSN is an LSN such that all transactions begun before + * it are complete. Our first guess (corrected below based on the list + * of active transactions) is the last-written LSN. + */ + if ((ret = __log_current_lsn_int(env, &ckp_lsn, &mbytes, &bytes)) != 0) + goto err; + + if (!LF_ISSET(DB_FORCE)) { + /* Don't checkpoint a quiescent database. */ + if (bytes == 0 && mbytes == 0) + goto err; + + /* + * If either kbytes or minutes is non-zero, then only take the + * checkpoint if more than "minutes" minutes have passed or if + * more than "kbytes" of log data have been written since the + * last checkpoint. + */ + if (kbytes != 0 && + mbytes * 1024 + bytes / 1024 >= (u_int32_t)kbytes) + goto do_ckp; + + if (minutes != 0) { + (void)time(&now); + + TXN_SYSTEM_LOCK(env); + last_ckp_time = region->time_ckp; + TXN_SYSTEM_UNLOCK(env); + + if (now - last_ckp_time >= (time_t)(minutes * 60)) + goto do_ckp; + } + + /* + * If we checked time and data and didn't go to checkpoint, + * we're done. + */ + if (minutes != 0 || kbytes != 0) + goto err; + } + + /* + * We must single thread checkpoints otherwise the chk_lsn may get out + * of order. We need to capture the start of the earliest currently + * active transaction (chk_lsn) and then flush all buffers. While + * doing this we we could then be overtaken by another checkpoint that + * sees a later chk_lsn but competes first. An archive process could + * then remove a log this checkpoint depends on. + */ +do_ckp: + if ((ret = __txn_getactive(env, &ckp_lsn)) != 0) + goto err; + + /* + * Checkpoints in replication groups can cause performance problems. + * + * As on the master, checkpoint on the replica requires the cache be + * flushed. The problem occurs when a client has dirty cache pages + * to write when the checkpoint record arrives, and the client's PERM + * response is necessary in order to meet the system's durability + * guarantees. In this case, the master will have to wait until the + * client completes its cache flush and writes the checkpoint record + * before subsequent transactions can be committed. The delay may + * cause transactions to timeout waiting on client response, which + * can cause nasty ripple effects in the system's overall throughput. + * [#15338] + * + * First, we send a start-sync record when the checkpoint starts so + * clients can start flushing their cache in preparation for the + * arrival of the checkpoint record. + */ + if (LOGGING_ON(env) && IS_REP_MASTER(env)) { +#ifdef HAVE_REPLICATION_THREADS + /* + * If repmgr is configured in the shared environment, but no + * send() function configured for this process, assume we have a + * replication-unaware process that wants to automatically + * participate in replication (i.e., sending replication + * messages to clients). + */ + if (env->rep_handle->send == NULL && + F_ISSET(env, ENV_THREAD) && APP_IS_REPMGR(env) && + (ret = __repmgr_autostart(env)) != 0) + goto err; +#endif + if (env->rep_handle->send != NULL) + (void)__rep_send_message(env, DB_EID_BROADCAST, + REP_START_SYNC, &ckp_lsn, NULL, 0, 0); + } + + /* Flush the cache. */ + if (MPOOL_ON(env) && + (ret = __memp_sync_int( + env, NULL, 0, DB_SYNC_CHECKPOINT, NULL, NULL)) != 0) { + __db_err(env, ret, DB_STR("4519", + "txn_checkpoint: failed to flush the buffer cache")); + goto err; + } + + /* + * The client won't have more dirty pages to flush from its cache than + * the master did, but there may be differences between the hardware, + * I/O configuration and workload on the master and the client that + * can result in the client being unable to finish its cache flush as + * fast as the master. A way to avoid the problem is to pause after + * the master completes its checkpoint and before the actual checkpoint + * record is logged, giving the replicas additional time to finish. + * + * !!! + * Currently turned off when testing, because it makes the test suite + * take a long time to run. + */ +#ifndef CONFIG_TEST + if (LOGGING_ON(env) && + IS_REP_MASTER(env) && env->rep_handle->send != NULL && + !LF_ISSET(DB_CKP_INTERNAL) && + env->rep_handle->region->chkpt_delay != 0) + __os_yield(env, 0, env->rep_handle->region->chkpt_delay); +#endif + + /* + * Because we can't be a replication client here, and because + * recovery (somewhat unusually) calls txn_checkpoint and expects + * it to write a log message, LOGGING_ON is the correct macro here. + */ + if (LOGGING_ON(env)) { + TXN_SYSTEM_LOCK(env); + last_ckp = region->last_ckp; + TXN_SYSTEM_UNLOCK(env); + /* + * Put out records for the open files before we log + * the checkpoint. The records are certain to be at + * or after ckp_lsn, but before the checkpoint record + * itself, so they're sure to be included if we start + * recovery from the ckp_lsn contained in this + * checkpoint. + */ + logflags = DB_LOG_CHKPNT; + /* + * If this is a normal checkpoint, log files as checkpoints. + * If we are recovering, only log as DBREG_RCLOSE if + * there are no prepared txns. Otherwise, it should + * stay as DBREG_CHKPNT. + */ + op = DBREG_CHKPNT; + if (!IS_RECOVERING(env)) + logflags |= DB_FLUSH; + else if (region->stat.st_nrestores == 0) + op = DBREG_RCLOSE; + if ((ret = __dbreg_log_files(env, op)) != 0 || + (ret = __txn_ckp_log(env, NULL, &ckp_lsn, logflags, + &ckp_lsn, &last_ckp, (int32_t)time(NULL), id, 0)) != 0) { + __db_err(env, ret, DB_STR_A("4520", + "txn_checkpoint: log failed at LSN [%ld %ld]", + "%ld %ld"), + (long)ckp_lsn.file, (long)ckp_lsn.offset); + goto err; + } + + if ((ret = __txn_updateckp(env, &ckp_lsn)) != 0) + goto err; + } + +err: MUTEX_UNLOCK(env, region->mtx_ckp); + if (ret == 0 && lp->db_log_autoremove) + __log_autoremove(env); + return (ret); +} + +/* + * __txn_getactive -- + * Find the oldest active transaction and figure out its "begin" LSN. + * This is the lowest LSN we can checkpoint, since any record written + * after it may be involved in a transaction and may therefore need + * to be undone in the case of an abort. + * + * We check both the file and offset for 0 since the lsn may be in + * transition. If it is then we don't care about this txn because it + * must be starting after we set the initial value of lsnp in the caller. + * All txns must initalize their begin_lsn before writing to the log. + * + * PUBLIC: int __txn_getactive __P((ENV *, DB_LSN *)); + */ +int +__txn_getactive(env, lsnp) + ENV *env; + DB_LSN *lsnp; +{ + DB_TXNMGR *mgr; + DB_TXNREGION *region; + TXN_DETAIL *td; + + mgr = env->tx_handle; + region = mgr->reginfo.primary; + + TXN_SYSTEM_LOCK(env); + SH_TAILQ_FOREACH(td, ®ion->active_txn, links, __txn_detail) + if (td->begin_lsn.file != 0 && + td->begin_lsn.offset != 0 && + LOG_COMPARE(&td->begin_lsn, lsnp) < 0) + *lsnp = td->begin_lsn; + TXN_SYSTEM_UNLOCK(env); + + return (0); +} + +/* + * __txn_getckp -- + * Get the LSN of the last transaction checkpoint. + * + * PUBLIC: int __txn_getckp __P((ENV *, DB_LSN *)); + */ +int +__txn_getckp(env, lsnp) + ENV *env; + DB_LSN *lsnp; +{ + DB_LSN lsn; + DB_TXNMGR *mgr; + DB_TXNREGION *region; + + mgr = env->tx_handle; + region = mgr->reginfo.primary; + + TXN_SYSTEM_LOCK(env); + lsn = region->last_ckp; + TXN_SYSTEM_UNLOCK(env); + + if (IS_ZERO_LSN(lsn)) + return (DB_NOTFOUND); + + *lsnp = lsn; + return (0); +} + +/* + * __txn_updateckp -- + * Update the last_ckp field in the transaction region. This happens + * at the end of a normal checkpoint and also when a replication client + * receives a checkpoint record. + * + * PUBLIC: int __txn_updateckp __P((ENV *, DB_LSN *)); + */ +int +__txn_updateckp(env, lsnp) + ENV *env; + DB_LSN *lsnp; +{ + DB_TXNMGR *mgr; + DB_TXNREGION *region; + + mgr = env->tx_handle; + region = mgr->reginfo.primary; + + /* + * We want to make sure last_ckp only moves forward; since we drop + * locks above and in log_put, it's possible for two calls to + * __txn_ckp_log to finish in a different order from how they were + * called. + */ + TXN_SYSTEM_LOCK(env); + if (LOG_COMPARE(®ion->last_ckp, lsnp) < 0) { + region->last_ckp = *lsnp; + (void)time(®ion->time_ckp); + } + TXN_SYSTEM_UNLOCK(env); + + return (0); +} diff --git a/src/txn/txn_failchk.c b/src/txn/txn_failchk.c new file mode 100644 index 00000000..3f7e98fa --- /dev/null +++ b/src/txn/txn_failchk.c @@ -0,0 +1,101 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/txn.h" + +/* + * __txn_failchk -- + * Check for transactions started by dead threads of control. + * + * PUBLIC: int __txn_failchk __P((ENV *)); + */ +int +__txn_failchk(env) + ENV *env; +{ + DB_ENV *dbenv; + DB_TXN *ktxn, *txn; + DB_TXNMGR *mgr; + DB_TXNREGION *region; + TXN_DETAIL *ktd, *td; + db_threadid_t tid; + int ret; + char buf[DB_THREADID_STRLEN]; + pid_t pid; + + mgr = env->tx_handle; + dbenv = env->dbenv; + region = mgr->reginfo.primary; + +retry: TXN_SYSTEM_LOCK(env); + + SH_TAILQ_FOREACH(td, ®ion->active_txn, links, __txn_detail) { + /* + * If this is a child transaction, skip it. + * The parent will take care of it. + */ + if (td->parent != INVALID_ROFF) + continue; + /* + * If the txn is prepared, then it does not matter + * what the state of the thread is. + */ + if (td->status == TXN_PREPARED) + continue; + + /* If the thread is still alive, it's not a problem. */ + if (dbenv->is_alive(dbenv, td->pid, td->tid, 0)) + continue; + + if (F_ISSET(td, TXN_DTL_INMEMORY)) { + TXN_SYSTEM_UNLOCK(env); + return (__db_failed(env, DB_STR("4501", + "Transaction has in memory logs"), + td->pid, td->tid)); + } + + /* Abort the transaction. */ + TXN_SYSTEM_UNLOCK(env); + if ((ret = __os_calloc(env, 1, sizeof(DB_TXN), &txn)) != 0) + return (ret); + if ((ret = __txn_continue(env, txn, td, NULL, 1)) != 0) + return (ret); + SH_TAILQ_FOREACH(ktd, &td->kids, klinks, __txn_detail) { + if (F_ISSET(ktd, TXN_DTL_INMEMORY)) + return (__db_failed(env, DB_STR("4502", + "Transaction has in memory logs"), + td->pid, td->tid)); + if ((ret = + __os_calloc(env, 1, sizeof(DB_TXN), &ktxn)) != 0) + return (ret); + if ((ret = + __txn_continue(env, ktxn, ktd, NULL, 1)) != 0) + return (ret); + ktxn->parent = txn; + ktxn->mgrp = txn->mgrp; + TAILQ_INSERT_HEAD(&txn->kids, ktxn, klinks); + } + pid = td->pid; + tid = td->tid; + (void)dbenv->thread_id_string(dbenv, pid, tid, buf); + __db_msg(env, DB_STR_A("4503", + "Aborting txn %#lx: %s", "%#lx %s"), + (u_long)txn->txnid, buf); + if ((ret = __txn_abort(txn)) != 0) + return (__db_failed(env, DB_STR("4504", + "Transaction abort failed"), pid, tid)); + goto retry; + } + + TXN_SYSTEM_UNLOCK(env); + + return (0); +} diff --git a/src/txn/txn_method.c b/src/txn/txn_method.c new file mode 100644 index 00000000..531dd78d --- /dev/null +++ b/src/txn/txn_method.c @@ -0,0 +1,124 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/txn.h" + +/* + * __txn_env_create -- + * Transaction specific initialization of the DB_ENV structure. + * + * PUBLIC: int __txn_env_create __P((DB_ENV *)); + */ +int +__txn_env_create(dbenv) + DB_ENV *dbenv; +{ + /* + * !!! + * Our caller has not yet had the opportunity to reset the panic + * state or turn off mutex locking, and so we can neither check + * the panic state or acquire a mutex in the DB_ENV create path. + */ + dbenv->tx_max = 0; + + return (0); +} + +/* + * __txn_env_destroy -- + * Transaction specific destruction of the DB_ENV structure. + * + * PUBLIC: void __txn_env_destroy __P((DB_ENV *)); + */ +void +__txn_env_destroy(dbenv) + DB_ENV *dbenv; +{ + COMPQUIET(dbenv, NULL); +} + +/* + * PUBLIC: int __txn_get_tx_max __P((DB_ENV *, u_int32_t *)); + */ +int +__txn_get_tx_max(dbenv, tx_maxp) + DB_ENV *dbenv; + u_int32_t *tx_maxp; +{ + ENV *env; + + env = dbenv->env; + + ENV_NOT_CONFIGURED(env, + env->tx_handle, "DB_ENV->get_tx_max", DB_INIT_TXN); + + if (TXN_ON(env)) { + /* Cannot be set after open, no lock required to read. */ + *tx_maxp = ((DB_TXNREGION *) + env->tx_handle->reginfo.primary)->maxtxns; + } else + *tx_maxp = dbenv->tx_max; + return (0); +} + +/* + * __txn_set_tx_max -- + * DB_ENV->set_tx_max. + * + * PUBLIC: int __txn_set_tx_max __P((DB_ENV *, u_int32_t)); + */ +int +__txn_set_tx_max(dbenv, tx_max) + DB_ENV *dbenv; + u_int32_t tx_max; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_tx_max"); + + dbenv->tx_max = tx_max; + return (0); +} + +/* + * PUBLIC: int __txn_get_tx_timestamp __P((DB_ENV *, time_t *)); + */ +int +__txn_get_tx_timestamp(dbenv, timestamp) + DB_ENV *dbenv; + time_t *timestamp; +{ + *timestamp = dbenv->tx_timestamp; + return (0); +} + +/* + * __txn_set_tx_timestamp -- + * Set the transaction recovery timestamp. + * + * PUBLIC: int __txn_set_tx_timestamp __P((DB_ENV *, time_t *)); + */ +int +__txn_set_tx_timestamp(dbenv, timestamp) + DB_ENV *dbenv; + time_t *timestamp; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_tx_timestamp"); + + dbenv->tx_timestamp = *timestamp; + return (0); +} diff --git a/src/txn/txn_rec.c b/src/txn/txn_rec.c new file mode 100644 index 00000000..4658645c --- /dev/null +++ b/src/txn/txn_rec.c @@ -0,0 +1,616 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/lock.h" +#include "dbinc/txn.h" +#include "dbinc/db_am.h" + +/* + * PUBLIC: int __txn_regop_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + * + * These records are only ever written for commits. Normally, we redo any + * committed transaction, however if we are doing recovery to a timestamp, then + * we may treat transactions that committed after the timestamp as aborted. + */ +int +__txn_regop_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_regop_args *argp; + DB_TXNHEAD *headp; + int ret; + u_int32_t status; + +#ifdef DEBUG_RECOVER + (void)__txn_regop_print(env, dbtp, lsnp, op, info); +#endif + + if ((ret = __txn_regop_read(env, dbtp->data, &argp)) != 0) + return (ret); + + headp = info; + /* + * We are only ever called during FORWARD_ROLL or BACKWARD_ROLL. + * We check for the former explicitly and the last two clauses + * apply to the BACKWARD_ROLL case. + */ + + if (op == DB_TXN_FORWARD_ROLL) { + /* + * If this was a 2-phase-commit transaction, then it + * might already have been removed from the list, and + * that's OK. Ignore the return code from remove. + */ + if ((ret = __db_txnlist_remove(env, + info, argp->txnp->txnid)) != DB_NOTFOUND && ret != 0) + goto err; + } else if ((env->dbenv->tx_timestamp != 0 && + argp->timestamp > (int32_t)env->dbenv->tx_timestamp) || + (!IS_ZERO_LSN(headp->trunc_lsn) && + LOG_COMPARE(&headp->trunc_lsn, lsnp) < 0)) { + /* + * We failed either the timestamp check or the trunc_lsn check, + * so we treat this as an abort even if it was a commit record. + */ + if ((ret = __db_txnlist_update(env, info, + argp->txnp->txnid, TXN_ABORT, NULL, &status, 1)) != 0) + goto err; + else if (status != TXN_IGNORE && status != TXN_OK) + goto err; + } else { + /* This is a normal commit; mark it appropriately. */ + if ((ret = __db_txnlist_update(env, + info, argp->txnp->txnid, argp->opcode, lsnp, + &status, 0)) == DB_NOTFOUND) { + if ((ret = __db_txnlist_add(env, + info, argp->txnp->txnid, + argp->opcode == TXN_ABORT ? + TXN_IGNORE : argp->opcode, lsnp)) != 0) + goto err; + } else if (ret != 0 || + (status != TXN_IGNORE && status != TXN_OK)) + goto err; + } + + if (ret == 0) + *lsnp = argp->prev_lsn; + + if (0) { +err: __db_errx(env, DB_STR_A("4514", + "txnid %lx commit record found, already on commit list", + "%lx"), (u_long)argp->txnp->txnid); + ret = EINVAL; + } + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __txn_prepare_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + * + * These records are only ever written for prepares. + */ +int +__txn_prepare_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_prepare_args *argp; + DBT *lock_dbt; + DB_TXNHEAD *headp; + DB_LOCKTAB *lt; + u_int32_t status; + int ret; + +#ifdef DEBUG_RECOVER + (void)__txn_prepare_print(env, dbtp, lsnp, op, info); +#endif + + if ((ret = __txn_prepare_read(env, dbtp->data, &argp)) != 0) + return (ret); + + if (argp->opcode != TXN_PREPARE && argp->opcode != TXN_ABORT) { + ret = EINVAL; + goto err; + } + headp = info; + + /* + * The return value here is either a DB_NOTFOUND or it is + * the transaction status from the list. It is not a normal + * error return, so we must make sure that in each of the + * cases below, we overwrite the ret value so we return + * appropriately. + */ + ret = __db_txnlist_find(env, info, argp->txnp->txnid, &status); + + /* + * If we are rolling forward, then an aborted prepare + * indicates that this may be the last record we'll see for + * this transaction ID, so we should remove it from the list. + */ + + if (op == DB_TXN_FORWARD_ROLL) { + if ((ret = __db_txnlist_remove(env, + info, argp->txnp->txnid)) != 0) + goto txn_err; + } else if (op == DB_TXN_BACKWARD_ROLL && status == TXN_PREPARE) { + /* + * On the backward pass, we have four possibilities: + * 1. The transaction is already committed, no-op. + * 2. The transaction is already aborted, no-op. + * 3. The prepare failed and was aborted, mark as abort. + * 4. The transaction is neither committed nor aborted. + * Treat this like a commit and roll forward so that + * the transaction can be resurrected in the region. + * We handle cases 3 and 4 here; cases 1 and 2 + * are the final clause below. + */ + if (argp->opcode == TXN_ABORT) { + if ((ret = __db_txnlist_update(env, + info, argp->txnp->txnid, + TXN_ABORT, NULL, &status, 0)) != 0 && + status != TXN_PREPARE) + goto txn_err; + ret = 0; + } + /* + * This is prepared, but not yet committed transaction. We + * need to add it to the transaction list, so that it gets + * rolled forward. We also have to add it to the region's + * internal state so it can be properly aborted or committed + * after recovery (see txn_recover). + */ + else if ((ret = __db_txnlist_remove(env, + info, argp->txnp->txnid)) != 0) { +txn_err: __db_errx(env, + DB_STR_A("4515", + "transaction not in list %lx", "%lx"), + (u_long)argp->txnp->txnid); + ret = DB_NOTFOUND; + } else if (IS_ZERO_LSN(headp->trunc_lsn) || + LOG_COMPARE(&headp->trunc_lsn, lsnp) >= 0) { + if ((ret = __db_txnlist_add(env, + info, argp->txnp->txnid, TXN_COMMIT, lsnp)) == 0) { + /* Re-acquire the locks for this transaction. */ + lock_dbt = &argp->locks; + if (LOCKING_ON(env)) { + lt = env->lk_handle; + if ((ret = __lock_getlocker(lt, + argp->txnp->txnid, 1, + &argp->txnp->locker)) != 0) + goto err; + if ((ret = __lock_get_list(env, + argp->txnp->locker, 0, + DB_LOCK_WRITE, lock_dbt)) != 0) + goto err; + } + + ret = __txn_restore_txn(env, lsnp, argp); + } + } + } else + ret = 0; + + if (ret == 0) + *lsnp = argp->prev_lsn; + +err: __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __txn_ckp_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__txn_ckp_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_ckp_args *argp; + int ret; + +#ifdef DEBUG_RECOVER + __txn_ckp_print(env, dbtp, lsnp, op, info); +#endif + if ((ret = __txn_ckp_read(env, dbtp->data, &argp)) != 0) + return (ret); + + if (op == DB_TXN_BACKWARD_ROLL) + __db_txnlist_ckp(env, info, lsnp); + + *lsnp = argp->last_ckp; + __os_free(env, argp); + return (DB_TXN_CKP); +} + +/* + * __txn_child_recover + * Recover a commit record for a child transaction. + * + * PUBLIC: int __txn_child_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__txn_child_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_child_args *argp; + u_int32_t c_stat, p_stat, tmpstat; + int ret, t_ret; + +#ifdef DEBUG_RECOVER + (void)__txn_child_print(env, dbtp, lsnp, op, info); +#endif + if ((ret = __txn_child_read(env, dbtp->data, &argp)) != 0) + return (ret); + + /* + * This is a record in a PARENT's log trail indicating that a + * child committed. If we are aborting, return the childs last + * record's LSN. If we are in recovery, then if the + * parent is committing, we set ourselves up to commit, else + * we do nothing. + */ + if (op == DB_TXN_ABORT) { + *lsnp = argp->c_lsn; + ret = __db_txnlist_lsnadd(env, info, &argp->prev_lsn); + goto out; + } else if (op == DB_TXN_BACKWARD_ROLL) { + /* Child might exist -- look for it. */ + ret = __db_txnlist_find(env, info, argp->child, &c_stat); + t_ret = + __db_txnlist_find(env, info, argp->txnp->txnid, &p_stat); + if (ret != 0 && ret != DB_NOTFOUND) + goto out; + if (t_ret != 0 && t_ret != DB_NOTFOUND) { + ret = t_ret; + goto out; + } + /* + * If the parent is in state COMMIT or IGNORE, then we apply + * that to the child, else we need to abort the child. + */ + + if (ret == DB_NOTFOUND || + c_stat == TXN_OK || c_stat == TXN_COMMIT) { + if (t_ret == DB_NOTFOUND || + (p_stat != TXN_COMMIT && p_stat != TXN_IGNORE)) + c_stat = TXN_ABORT; + else + c_stat = p_stat; + + if (ret == DB_NOTFOUND) + ret = __db_txnlist_add(env, + info, argp->child, c_stat, NULL); + else + ret = __db_txnlist_update(env, info, + argp->child, c_stat, NULL, &tmpstat, 0); + } else if (c_stat == TXN_EXPECTED) { + /* + * The open after this create succeeded. If the + * parent succeeded, we don't want to redo; if the + * parent aborted, we do want to undo. + */ + switch (p_stat) { + case TXN_COMMIT: + case TXN_IGNORE: + c_stat = TXN_IGNORE; + break; + default: + c_stat = TXN_ABORT; + } + ret = __db_txnlist_update(env, + info, argp->child, c_stat, NULL, &tmpstat, 0); + } else if (c_stat == TXN_UNEXPECTED) { + /* + * The open after this create failed. If the parent + * is rolling forward, we need to roll forward. If + * the parent failed, then we do not want to abort + * (because the file may not be the one in which we + * are interested). + */ + ret = __db_txnlist_update(env, info, argp->child, + p_stat == TXN_COMMIT ? TXN_COMMIT : TXN_IGNORE, + NULL, &tmpstat, 0); + } + } else if (op == DB_TXN_OPENFILES) { + /* + * If we have a partial subtransaction, then the whole + * transaction should be ignored. + */ + if ((ret = __db_txnlist_find(env, + info, argp->child, &c_stat)) == DB_NOTFOUND) + ret = __db_txnlist_update(env, info, + argp->txnp->txnid, TXN_IGNORE, + NULL, &p_stat, 1); + } else if (DB_REDO(op)) { + /* Forward Roll */ + if ((ret = + __db_txnlist_remove(env, info, argp->child)) != 0) + __db_errx(env, DB_STR_A("4516", + "Transaction not in list %x", "%x"), argp->child); + } + + if (ret == 0) + *lsnp = argp->prev_lsn; + +out: __os_free(env, argp); + + return (ret); +} + +/* + * __txn_restore_txn -- + * Using only during XA recovery. If we find any transactions that are + * prepared, but not yet committed, then we need to restore the transaction's + * state into the shared region, because the TM is going to issue an abort + * or commit and we need to respond correctly. + * + * lsnp is the LSN of the returned LSN + * argp is the prepare record (in an appropriate structure) + * + * PUBLIC: int __txn_restore_txn __P((ENV *, DB_LSN *, __txn_prepare_args *)); + */ +int +__txn_restore_txn(env, lsnp, argp) + ENV *env; + DB_LSN *lsnp; + __txn_prepare_args *argp; +{ + DB_TXNMGR *mgr; + DB_TXNREGION *region; + TXN_DETAIL *td; + int ret; + + if (argp->gid.size == 0) + return (0); + + mgr = env->tx_handle; + region = mgr->reginfo.primary; + TXN_SYSTEM_LOCK(env); + + /* Allocate a new transaction detail structure. */ + if ((ret = __env_alloc(&mgr->reginfo, sizeof(TXN_DETAIL), &td)) != 0) { + TXN_SYSTEM_UNLOCK(env); + return (ret); + } + + /* Place transaction on active transaction list. */ + SH_TAILQ_INSERT_HEAD(®ion->active_txn, td, links, __txn_detail); + region->curtxns++; + + td->txnid = argp->txnp->txnid; + __os_id(env->dbenv, &td->pid, &td->tid); + td->last_lsn = *lsnp; + td->begin_lsn = argp->begin_lsn; + td->parent = INVALID_ROFF; + td->name = INVALID_ROFF; + SH_TAILQ_INIT(&td->kids); + MAX_LSN(td->read_lsn); + MAX_LSN(td->visible_lsn); + td->mvcc_ref = 0; + td->mvcc_mtx = MUTEX_INVALID; + td->status = TXN_PREPARED; + td->flags = TXN_DTL_RESTORED; + memcpy(td->gid, argp->gid.data, argp->gid.size); + td->nlog_dbs = 0; + td->nlog_slots = TXN_NSLOTS; + td->log_dbs = R_OFFSET(&mgr->reginfo, td->slots); + + region->stat.st_nrestores++; +#ifdef HAVE_STATISTICS + STAT_INC(env, txn, nactive, region->stat.st_nactive, td->txnid); + if (region->stat.st_nactive > region->stat.st_maxnactive) + STAT_SET(env, txn, maxnactive, region->stat.st_maxnactive, + region->stat.st_nactive, td->txnid); +#endif + TXN_SYSTEM_UNLOCK(env); + return (0); +} + +/* + * __txn_recycle_recover -- + * Recovery function for recycle. + * + * PUBLIC: int __txn_recycle_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__txn_recycle_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_recycle_args *argp; + int ret; + +#ifdef DEBUG_RECOVER + (void)__txn_child_print(env, dbtp, lsnp, op, info); +#endif + if ((ret = __txn_recycle_read(env, dbtp->data, &argp)) != 0) + return (ret); + + COMPQUIET(lsnp, NULL); + + if ((ret = __db_txnlist_gen(env, info, + DB_UNDO(op) ? -1 : 1, argp->min, argp->max)) != 0) + return (ret); + + __os_free(env, argp); + + return (0); +} + +/* + * PUBLIC: int __txn_regop_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + * + * These records are only ever written for commits. Normally, we redo any + * committed transaction, however if we are doing recovery to a timestamp, then + * we may treat transactions that committed after the timestamp as aborted. + */ +int +__txn_regop_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_regop_42_args *argp; + DB_TXNHEAD *headp; + u_int32_t status; + int ret; + +#ifdef DEBUG_RECOVER + (void)__txn_regop_42_print(env, dbtp, lsnp, op, info); +#endif + + if ((ret = __txn_regop_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + headp = info; + /* + * We are only ever called during FORWARD_ROLL or BACKWARD_ROLL. + * We check for the former explicitly and the last two clauses + * apply to the BACKWARD_ROLL case. + */ + + if (op == DB_TXN_FORWARD_ROLL) { + /* + * If this was a 2-phase-commit transaction, then it + * might already have been removed from the list, and + * that's OK. Ignore the return code from remove. + */ + if ((ret = __db_txnlist_remove(env, + info, argp->txnp->txnid)) != DB_NOTFOUND && ret != 0) + goto err; + } else if ((env->dbenv->tx_timestamp != 0 && + argp->timestamp > (int32_t)env->dbenv->tx_timestamp) || + (!IS_ZERO_LSN(headp->trunc_lsn) && + LOG_COMPARE(&headp->trunc_lsn, lsnp) < 0)) { + /* + * We failed either the timestamp check or the trunc_lsn check, + * so we treat this as an abort even if it was a commit record. + */ + if ((ret = __db_txnlist_update(env, info, + argp->txnp->txnid, TXN_ABORT, NULL, &status, 1)) != 0) + goto err; + else if (status != TXN_IGNORE && status != TXN_OK) + goto err; + } else { + /* This is a normal commit; mark it appropriately. */ + if ((ret = __db_txnlist_update(env, + info, argp->txnp->txnid, argp->opcode, lsnp, + &status, 0)) == DB_NOTFOUND) { + if ((ret = __db_txnlist_add(env, + info, argp->txnp->txnid, + argp->opcode == TXN_ABORT ? + TXN_IGNORE : argp->opcode, lsnp)) != 0) + goto err; + } else if (ret != 0 || + (status != TXN_IGNORE && status != TXN_OK)) + goto err; + } + + if (ret == 0) + *lsnp = argp->prev_lsn; + + if (0) { +err: __db_errx(env, DB_STR_A("4517", + "txnid %lx commit record found, already on commit list", + "%lx"), (u_long)argp->txnp->txnid); + ret = EINVAL; + } + __os_free(env, argp); + + return (ret); +} + +/* + * PUBLIC: int __txn_ckp_42_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__txn_ckp_42_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __txn_ckp_42_args *argp; + int ret; + +#ifdef DEBUG_RECOVER + __txn_ckp_42_print(env, dbtp, lsnp, op, info); +#endif + if ((ret = __txn_ckp_42_read(env, dbtp->data, &argp)) != 0) + return (ret); + + if (op == DB_TXN_BACKWARD_ROLL) + __db_txnlist_ckp(env, info, lsnp); + + *lsnp = argp->last_ckp; + __os_free(env, argp); + return (DB_TXN_CKP); +} diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c new file mode 100644 index 00000000..b1e5c8fa --- /dev/null +++ b/src/txn/txn_recover.c @@ -0,0 +1,317 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/txn.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc_auto/db_auto.h" +#include "dbinc_auto/crdel_auto.h" +#include "dbinc_auto/db_ext.h" + +/* + * __txn_recover_pp -- + * ENV->txn_recover pre/post processing. + * + * PUBLIC: int __txn_recover_pp __P((DB_ENV *, + * PUBLIC: DB_PREPLIST *, long, long *, u_int32_t)); + */ +int +__txn_recover_pp(dbenv, preplist, count, retp, flags) + DB_ENV *dbenv; + DB_PREPLIST *preplist; + long count, *retp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG( + env, env->tx_handle, "txn_recover", DB_INIT_TXN); + + if (F_ISSET((DB_TXNREGION *)env->tx_handle->reginfo.primary, + TXN_IN_RECOVERY)) { + __db_errx(env, DB_STR("4505", + "operation not permitted while in recovery")); + return (EINVAL); + } + + if (flags != DB_FIRST && flags != DB_NEXT) + return (__db_ferr(env, "DB_ENV->txn_recover", 0)); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, + (__txn_recover(env, preplist, count, retp, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __txn_recover -- + * ENV->txn_recover. + * + * PUBLIC: int __txn_recover __P((ENV *, + * PUBLIC: DB_PREPLIST *, long, long *, u_int32_t)); + */ +int +__txn_recover(env, txns, count, retp, flags) + ENV *env; + DB_PREPLIST *txns; + long count, *retp; + u_int32_t flags; +{ + /* + * Public API to retrieve the list of prepared, but not yet committed + * transactions. See __txn_get_prepared for details. This function + * and __db_xa_recover both wrap that one. + */ + return (__txn_get_prepared(env, + NULL, txns, count, retp, flags)); + +} + +/* + * __txn_get_prepared -- + * Returns a list of prepared (and for XA, heuristically completed) + * transactions (less than or equal to the count parameter). One of + * xids or txns must be set to point to an array of the appropriate type. + * The count parameter indicates the number of entries in the xids and/or + * txns array. The retp parameter will be set to indicate the number of + * entries returned in the xids/txns array. Flags indicates the operation, + * one of DB_FIRST or DB_NEXT. + * + * PUBLIC: int __txn_get_prepared __P((ENV *, + * PUBLIC: XID *, DB_PREPLIST *, long, long *, u_int32_t)); + */ +int +__txn_get_prepared(env, xids, txns, count, retp, flags) + ENV *env; + XID *xids; + DB_PREPLIST *txns; + long count; /* This is long for XA compatibility. */ + long *retp; + u_int32_t flags; +{ + DB_LSN min; + DB_PREPLIST *prepp; + DB_THREAD_INFO *ip; + DB_TXNMGR *mgr; + DB_TXNREGION *region; + TXN_DETAIL *td; + XID *xidp; + long i; + int restored, ret; + + *retp = 0; + MAX_LSN(min); + prepp = txns; + xidp = xids; + restored = ret = 0; + + /* + * If we are starting a scan, then we traverse the active transaction + * list once making sure that all transactions are marked as not having + * been collected. Then on each pass, we mark the ones we collected + * so that if we cannot collect them all at once, we can finish up + * next time with a continue. + */ + + mgr = env->tx_handle; + region = mgr->reginfo.primary; + + /* + * During this pass we need to figure out if we are going to need + * to open files. We need to open files if we've never collected + * before (in which case, none of the COLLECTED bits will be set) + * and the ones that we are collecting are restored (if they aren't + * restored, then we never crashed; just the main server did). + */ + TXN_SYSTEM_LOCK(env); + ENV_GET_THREAD_INFO(env, ip); + + /* Now begin collecting active transactions. */ + for (td = SH_TAILQ_FIRST(®ion->active_txn, __txn_detail); + td != NULL && *retp < count; + td = SH_TAILQ_NEXT(td, links, __txn_detail)) { + if (td->status != TXN_PREPARED || + (flags != DB_FIRST && F_ISSET(td, TXN_DTL_COLLECTED))) + continue; + + if (F_ISSET(td, TXN_DTL_RESTORED)) + restored = 1; + + if (xids != NULL) { + xidp->formatID = td->format; + /* + * XID structure uses longs; use use u_int32_t's as we + * log them to disk. Cast them to make the conversion + * explicit. + */ + xidp->gtrid_length = (long)td->gtrid; + xidp->bqual_length = (long)td->bqual; + memcpy(xidp->data, td->gid, sizeof(td->gid)); + xidp++; + } + + if (txns != NULL) { + if ((ret = __os_calloc(env, + 1, sizeof(DB_TXN), &prepp->txn)) != 0) { + TXN_SYSTEM_UNLOCK(env); + goto err; + } + prepp->txn->td = td; + memcpy(prepp->gid, td->gid, sizeof(td->gid)); + prepp++; + } + + if (!IS_ZERO_LSN(td->begin_lsn) && + LOG_COMPARE(&td->begin_lsn, &min) < 0) + min = td->begin_lsn; + + (*retp)++; + F_SET(td, TXN_DTL_COLLECTED); + } + if (flags == DB_FIRST) + for (; td != NULL; td = SH_TAILQ_NEXT(td, links, __txn_detail)) + F_CLR(td, TXN_DTL_COLLECTED); + TXN_SYSTEM_UNLOCK(env); + + /* + * Now link all the transactions into the transaction manager's list. + */ + if (txns != NULL && *retp != 0) { + MUTEX_LOCK(env, mgr->mutex); + for (i = 0; i < *retp; i++) { + if ((ret = __txn_continue(env, + txns[i].txn, txns[i].txn->td, ip, 0)) != 0) + goto err; + F_SET(txns[i].txn, TXN_MALLOC); + if (F_ISSET(env->dbenv, DB_ENV_TXN_NOSYNC)) + F_SET(txns[i].txn, TXN_NOSYNC); + else if (F_ISSET(env->dbenv, DB_ENV_TXN_WRITE_NOSYNC)) + F_SET(txns[i].txn, TXN_WRITE_NOSYNC); + else + F_SET(txns[i].txn, TXN_SYNC); + TAILQ_INSERT_TAIL(&mgr->txn_chain, txns[i].txn, links); + } + MUTEX_UNLOCK(env, mgr->mutex); + + /* + * If we are restoring, update our count of outstanding + * transactions. + */ + if (REP_ON(env)) { + REP_SYSTEM_LOCK(env); + env->rep_handle->region->op_cnt += (u_long)*retp; + REP_SYSTEM_UNLOCK(env); + } + + } + + /* If recovery already opened the files for us, don't do it here. */ + if (restored != 0 && flags == DB_FIRST && + !F_ISSET(env->lg_handle, DBLOG_OPENFILES)) + ret = __txn_openfiles(env, ip, &min, 0); + + if (0) { +err: TXN_SYSTEM_UNLOCK(env); + } + return (ret); +} + +/* + * __txn_openfiles -- + * Call env_openfiles. + * + * PUBLIC: int __txn_openfiles __P((ENV *, DB_THREAD_INFO *, DB_LSN *, int)); + */ +int +__txn_openfiles(env, ip, min, force) + ENV *env; + DB_THREAD_INFO *ip; + DB_LSN *min; + int force; +{ + DBT data; + DB_LOGC *logc; + DB_LSN open_lsn; + DB_TXNHEAD *txninfo; + __txn_ckp_args *ckp_args; + int ret, t_ret; + + /* + * Figure out the last checkpoint before the smallest + * start_lsn in the region. + */ + logc = NULL; + if ((ret = __log_cursor(env, &logc)) != 0) + goto err; + + memset(&data, 0, sizeof(data)); + if ((ret = __txn_getckp(env, &open_lsn)) == 0) + while (!IS_ZERO_LSN(open_lsn) && (ret = + __logc_get(logc, &open_lsn, &data, DB_SET)) == 0 && + (force || + (min != NULL && LOG_COMPARE(min, &open_lsn) < 0))) { + /* Format the log record. */ + if ((ret = __txn_ckp_read( + env, data.data, &ckp_args)) != 0) { + __db_errx(env, DB_STR_A("4506", + "Invalid checkpoint record at [%lu][%lu]", + "%lu %lu"), (u_long)open_lsn.file, + (u_long)open_lsn.offset); + goto err; + } + /* + * If force is set, then we're forcing ourselves + * to go back far enough to open files. + * Use ckp_lsn and then break out of the loop. + */ + open_lsn = force ? ckp_args->ckp_lsn : + ckp_args->last_ckp; + __os_free(env, ckp_args); + if (force) { + if ((ret = __logc_get(logc, &open_lsn, + &data, DB_SET)) != 0) + goto err; + break; + } + } + + /* + * There are several ways by which we may have gotten here. + * - We got a DB_NOTFOUND -- we need to read the first + * log record. + * - We found a checkpoint before min. We're done. + * - We found a checkpoint after min who's last_ckp is 0. We + * need to start at the beginning of the log. + * - We are forcing an openfiles and we have our ckp_lsn. + */ + if ((ret == DB_NOTFOUND || IS_ZERO_LSN(open_lsn)) && (ret = + __logc_get(logc, &open_lsn, &data, DB_FIRST)) != 0) { + __db_errx(env, DB_STR("4507", "No log records")); + goto err; + } + + if ((ret = __db_txnlist_init(env, ip, 0, 0, NULL, &txninfo)) != 0) + goto err; + ret = __env_openfiles( + env, logc, txninfo, &data, &open_lsn, NULL, (double)0, 0); + if (txninfo != NULL) + __db_txnlist_end(env, txninfo); + +err: + if (logc != NULL && (t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} diff --git a/src/txn/txn_region.c b/src/txn/txn_region.c new file mode 100644 index 00000000..b52de86e --- /dev/null +++ b/src/txn/txn_region.c @@ -0,0 +1,518 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" + +static int __txn_init __P((ENV *, DB_TXNMGR *)); + +/* + * __txn_open -- + * Open a transaction region. + * + * PUBLIC: int __txn_open __P((ENV *)); + */ +int +__txn_open(env) + ENV *env; +{ + DB_TXNMGR *mgr; + DB_TXNREGION *region; + int ret; + + /* Create/initialize the transaction manager structure. */ + if ((ret = __os_calloc(env, 1, sizeof(DB_TXNMGR), &mgr)) != 0) + return (ret); + TAILQ_INIT(&mgr->txn_chain); + mgr->env = env; + + /* Join/create the txn region. */ + if ((ret = __env_region_share(env, &mgr->reginfo)) != 0) + goto err; + + /* If we created the region, initialize it. */ + if (F_ISSET(&mgr->reginfo, REGION_CREATE)) + if ((ret = __txn_init(env, mgr)) != 0) + goto err; + + /* Set the local addresses. */ + region = mgr->reginfo.primary = + R_ADDR(&mgr->reginfo, + ((REGENV *)env->reginfo->primary)->tx_primary); + + /* If threaded, acquire a mutex to protect the active TXN list. */ + if ((ret = __mutex_alloc( + env, MTX_TXN_ACTIVE, DB_MUTEX_PROCESS_ONLY, &mgr->mutex)) != 0) + goto err; + + mgr->reginfo.mtx_alloc = region->mtx_region; + env->tx_handle = mgr; + return (0); + +err: env->tx_handle = NULL; + if (mgr->reginfo.addr != NULL) + (void)__env_region_detach(env, &mgr->reginfo, 0); + + (void)__mutex_free(env, &mgr->mutex); + __os_free(env, mgr); + return (ret); +} + +/* + * __txn_init -- + * Initialize a transaction region in shared memory. + */ +static int +__txn_init(env, mgr) + ENV *env; + DB_TXNMGR *mgr; +{ + DB_ENV *dbenv; + DB_LSN last_ckp; + DB_TXNREGION *region; + int ret; + + dbenv = env->dbenv; + + /* + * Find the last checkpoint in the log. + */ + ZERO_LSN(last_ckp); + if (LOGGING_ON(env)) { + /* + * The log system has already walked through the last + * file. Get the LSN of a checkpoint it may have found. + */ + if ((ret = __log_get_cached_ckp_lsn(env, &last_ckp)) != 0) + return (ret); + + /* + * If that didn't work, look backwards from the beginning of + * the last log file until we find the last checkpoint. + */ + if (IS_ZERO_LSN(last_ckp) && + (ret = __txn_findlastckp(env, &last_ckp, NULL)) != 0) + return (ret); + } + + if ((ret = __env_alloc(&mgr->reginfo, + sizeof(DB_TXNREGION), &mgr->reginfo.primary)) != 0) { + __db_errx(env, DB_STR("4508", + "Unable to allocate memory for the transaction region")); + return (ret); + } + ((REGENV *)env->reginfo->primary)->tx_primary = + R_OFFSET(&mgr->reginfo, mgr->reginfo.primary); + region = mgr->reginfo.primary; + memset(region, 0, sizeof(*region)); + + /* We share the region so we need the same mutex. */ + region->mtx_region = ((REGENV *)env->reginfo->primary)->mtx_regenv; + mgr->reginfo.mtx_alloc = region->mtx_region; + + region->maxtxns = dbenv->tx_max; + region->inittxns = dbenv->tx_init; + region->last_txnid = TXN_MINIMUM; + region->cur_maxid = TXN_MAXIMUM; + + if ((ret = __mutex_alloc( + env, MTX_TXN_CHKPT, 0, ®ion->mtx_ckp)) != 0) + return (ret); + region->last_ckp = last_ckp; + region->time_ckp = time(NULL); + + memset(®ion->stat, 0, sizeof(region->stat)); +#ifdef HAVE_STATISTICS + region->stat.st_maxtxns = region->maxtxns; + region->stat.st_inittxns = region->inittxns; +#endif + + SH_TAILQ_INIT(®ion->active_txn); + SH_TAILQ_INIT(®ion->mvcc_txn); + return (ret); +} + +/* + * __txn_findlastckp -- + * Find the last checkpoint in the log, walking backwards from the + * max_lsn given or the beginning of the last log file. (The + * log system looked through the last log file when it started up.) + * + * PUBLIC: int __txn_findlastckp __P((ENV *, DB_LSN *, DB_LSN *)); + */ +int +__txn_findlastckp(env, lsnp, max_lsn) + ENV *env; + DB_LSN *lsnp; + DB_LSN *max_lsn; +{ + DBT dbt; + DB_LOGC *logc; + DB_LSN lsn; + int ret, t_ret; + u_int32_t rectype; + + ZERO_LSN(*lsnp); + + if ((ret = __log_cursor(env, &logc)) != 0) + return (ret); + + /* Get the last LSN. */ + memset(&dbt, 0, sizeof(dbt)); + if (max_lsn != NULL) { + lsn = *max_lsn; + if ((ret = __logc_get(logc, &lsn, &dbt, DB_SET)) != 0) + goto err; + } else { + if ((ret = __logc_get(logc, &lsn, &dbt, DB_LAST)) != 0) + goto err; + /* + * Twiddle the last LSN so it points to the beginning of the + * last file; we know there's no checkpoint after that, since + * the log system already looked there. + */ + lsn.offset = 0; + } + + /* Read backwards, looking for checkpoints. */ + while ((ret = __logc_get(logc, &lsn, &dbt, DB_PREV)) == 0) { + if (dbt.size < sizeof(u_int32_t)) + continue; + LOGCOPY_32(env, &rectype, dbt.data); + if (rectype == DB___txn_ckp) { + *lsnp = lsn; + break; + } + } + +err: if ((t_ret = __logc_close(logc)) != 0 && ret == 0) + ret = t_ret; + + /* + * Not finding a checkpoint is not an error; there may not exist + * one in the log. + */ + return ((ret == 0 || ret == DB_NOTFOUND) ? 0 : ret); +} + +/* + * __txn_env_refresh -- + * Clean up after the transaction system on a close or failed open. + * + * PUBLIC: int __txn_env_refresh __P((ENV *)); + */ +int +__txn_env_refresh(env) + ENV *env; +{ + DB_TXN *txn; + DB_TXNMGR *mgr; + REGINFO *reginfo; + u_int32_t txnid; + int aborted, ret, t_ret; + + ret = 0; + mgr = env->tx_handle; + reginfo = &mgr->reginfo; + + /* + * This function can only be called once per process (i.e., not + * once per thread), so no synchronization is required. + * + * The caller is probably doing something wrong if close is called with + * active transactions. Try and abort any active transactions that are + * not prepared, but it's quite likely the aborts will fail because + * recovery won't find open files. If we can't abort any of the + * unprepared transaction, panic, we have to run recovery to get back + * to a known state. + */ + aborted = 0; + if (TAILQ_FIRST(&mgr->txn_chain) != NULL) { + while ((txn = TAILQ_FIRST(&mgr->txn_chain)) != NULL) { + /* Prepared transactions are OK. */ + txnid = txn->txnid; + if (((TXN_DETAIL *)txn->td)->status == TXN_PREPARED) { + if ((ret = __txn_discard_int(txn, 0)) != 0) { + __db_err(env, ret, DB_STR_A("4509", + "unable to discard txn %#lx", + "%#lx"), (u_long)txnid); + break; + } + continue; + } + aborted = 1; + if ((t_ret = __txn_abort(txn)) != 0) { + __db_err(env, t_ret, DB_STR_A("4510", + "unable to abort transaction %#lx", "%#lx"), + (u_long)txnid); + ret = __env_panic(env, t_ret); + break; + } + } + if (aborted) { + __db_errx(env, DB_STR("4511", + "Error: closing the transaction region with active transactions")); + if (ret == 0) + ret = EINVAL; + } + } + + /* Discard the per-thread lock. */ + if ((t_ret = __mutex_free(env, &mgr->mutex)) != 0 && ret == 0) + ret = t_ret; + + /* Detach from the region. */ + if (F_ISSET(env, ENV_PRIVATE)) + reginfo->mtx_alloc = MUTEX_INVALID; + if ((t_ret = __env_region_detach(env, reginfo, 0)) != 0 && ret == 0) + ret = t_ret; + + __os_free(env, mgr); + + env->tx_handle = NULL; + return (ret); +} + +/* + * __txn_region_mutex_count -- + * Return the number of mutexes the txn region will need. + * + * PUBLIC: u_int32_t __txn_region_mutex_count __P((ENV *)); + */ +u_int32_t +__txn_region_mutex_count(env) + ENV *env; +{ + COMPQUIET(env, NULL); + /* + * We need a mutex for DB_TXNMGR structure, two mutexes for + * the DB_TXNREGION structure. + */ + return (1 + 2); +} +/* + * __txn_region_mutex_max -- + * Return the number of additional mutexes the txn region will need. + * + * PUBLIC: u_int32_t __txn_region_mutex_max __P((ENV *)); + */ +u_int32_t +__txn_region_mutex_max(env) + ENV *env; +{ + DB_ENV *dbenv; + u_int32_t count; + + dbenv = env->dbenv; + + if ((count = dbenv->tx_max) == 0) + count = DEF_MAX_TXNS; + /* We may need a mutex for each MVCC txn. */ + return (count > dbenv->tx_init ? count - dbenv->tx_init : 0); +} + +/* + * __txn_region_size -- + * Return the amount of space needed for the txn region. + * PUBLIC: size_t __txn_region_size __P((ENV *)); + */ +size_t +__txn_region_size(env) + ENV *env; +{ + DB_ENV *dbenv; + size_t s; + + dbenv = env->dbenv; + + /* + * Make the region large enough to hold the primary transaction region + * structure, txn_init transaction detail structures, txn_init chunks of + * overhead required by the underlying shared region allocator for each + * chunk of memory, txn_max transaction names, at an average of 20 + * bytes each, and 10KB for safety. + */ + s = sizeof(DB_TXNREGION) + dbenv->tx_init * + (sizeof(TXN_DETAIL) + __env_alloc_overhead() + 20) + 10 * 1024; + return (s); +} + +/* + * __txn_region_max -- + * Return the additional amount of space needed for the txn region. + * PUBLIC: size_t __txn_region_max __P((ENV *)); + */ +size_t +__txn_region_max(env) + ENV *env; +{ + DB_ENV *dbenv; + size_t s; + u_int32_t count; + + dbenv = env->dbenv; + + if ((count = dbenv->tx_max) == 0) + count = DEF_MAX_TXNS; + if (count <= dbenv->tx_init) + return (0); + s = (count - dbenv->tx_init) * + (sizeof(TXN_DETAIL) + __env_alloc_overhead() + 20); + return (s); +} + +/* + * __txn_id_set -- + * Set the current transaction ID and current maximum unused ID (for + * testing purposes only). + * + * PUBLIC: int __txn_id_set __P((ENV *, u_int32_t, u_int32_t)); + */ +int +__txn_id_set(env, cur_txnid, max_txnid) + ENV *env; + u_int32_t cur_txnid, max_txnid; +{ + DB_TXNMGR *mgr; + DB_TXNREGION *region; + int ret; + + ENV_REQUIRES_CONFIG(env, env->tx_handle, "txn_id_set", DB_INIT_TXN); + + mgr = env->tx_handle; + region = mgr->reginfo.primary; + region->last_txnid = cur_txnid; + region->cur_maxid = max_txnid; + + ret = 0; + if (cur_txnid < TXN_MINIMUM) { + __db_errx(env, DB_STR_A("4512", + "Current ID value %lu below minimum", "%lu"), + (u_long)cur_txnid); + ret = EINVAL; + } + if (max_txnid < TXN_MINIMUM) { + __db_errx(env, DB_STR_A("4513", + "Maximum ID value %lu below minimum", "%lu"), + (u_long)max_txnid); + ret = EINVAL; + } + return (ret); +} + +/* + * __txn_oldest_reader -- + * Find the oldest "read LSN" of any active transaction' + * MVCC changes older than this can safely be discarded from the cache. + * + * PUBLIC: int __txn_oldest_reader __P((ENV *, DB_LSN *)); + */ +int +__txn_oldest_reader(env, lsnp) + ENV *env; + DB_LSN *lsnp; +{ + DB_LSN old_lsn; + DB_TXNMGR *mgr; + DB_TXNREGION *region; + TXN_DETAIL *td; + int ret; + + if ((mgr = env->tx_handle) == NULL) + return (0); + region = mgr->reginfo.primary; + + if ((ret = __log_current_lsn_int(env, &old_lsn, NULL, NULL)) != 0) + return (ret); + + TXN_SYSTEM_LOCK(env); + SH_TAILQ_FOREACH(td, ®ion->active_txn, links, __txn_detail) + if (LOG_COMPARE(&td->read_lsn, &old_lsn) < 0) + old_lsn = td->read_lsn; + + *lsnp = old_lsn; + TXN_SYSTEM_UNLOCK(env); + + return (0); +} + +/* + * __txn_add_buffer -- + * Add to the count of buffers created by the given transaction. + * + * PUBLIC: int __txn_add_buffer __P((ENV *, TXN_DETAIL *)); + */ +int +__txn_add_buffer(env, td) + ENV *env; + TXN_DETAIL *td; +{ + DB_ASSERT(env, td != NULL); + + MUTEX_LOCK(env, td->mvcc_mtx); + DB_ASSERT(env, td->mvcc_ref < UINT32_MAX); + ++td->mvcc_ref; + MUTEX_UNLOCK(env, td->mvcc_mtx); + + COMPQUIET(env, NULL); + return (0); +} + +/* + * __txn_remove_buffer -- + * Remove a buffer from a transaction -- free the transaction if necessary. + * + * PUBLIC: int __txn_remove_buffer __P((ENV *, TXN_DETAIL *, db_mutex_t)); + */ +int +__txn_remove_buffer(env, td, hash_mtx) + ENV *env; + TXN_DETAIL *td; + db_mutex_t hash_mtx; +{ + DB_TXNMGR *mgr; + DB_TXNREGION *region; + int need_free, ret; + + DB_ASSERT(env, td != NULL); + ret = 0; + mgr = env->tx_handle; + region = mgr->reginfo.primary; + + MUTEX_LOCK(env, td->mvcc_mtx); + DB_ASSERT(env, td->mvcc_ref > 0); + + /* + * We free the transaction detail here only if this is the last + * reference and td is on the list of committed snapshot transactions + * with active pages. + */ + need_free = (--td->mvcc_ref == 0) && F_ISSET(td, TXN_DTL_SNAPSHOT); + MUTEX_UNLOCK(env, td->mvcc_mtx); + + if (need_free) { + MUTEX_UNLOCK(env, hash_mtx); + + ret = __mutex_free(env, &td->mvcc_mtx); + td->mvcc_mtx = MUTEX_INVALID; + + TXN_SYSTEM_LOCK(env); + SH_TAILQ_REMOVE(®ion->mvcc_txn, td, links, __txn_detail); + STAT_DEC(env, + txn, nsnapshot, region->stat.st_nsnapshot, td->txnid); + __env_alloc_free(&mgr->reginfo, td); + TXN_SYSTEM_UNLOCK(env); + + MUTEX_READLOCK(env, hash_mtx); + } + + return (ret); +} diff --git a/src/txn/txn_stat.c b/src/txn/txn_stat.c new file mode 100644 index 00000000..4d006c9c --- /dev/null +++ b/src/txn/txn_stat.c @@ -0,0 +1,461 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/txn.h" + +#ifdef HAVE_STATISTICS +static int __txn_compare __P((const void *, const void *)); +static int __txn_print_all __P((ENV *, u_int32_t)); +static int __txn_print_stats __P((ENV *, u_int32_t)); +static int __txn_stat __P((ENV *, DB_TXN_STAT **, u_int32_t)); +static char *__txn_status __P((DB_TXN_ACTIVE *)); +static char *__txn_xa_status __P((DB_TXN_ACTIVE *)); +static void __txn_gid __P((ENV *, DB_MSGBUF *, DB_TXN_ACTIVE *)); + +/* + * __txn_stat_pp -- + * DB_ENV->txn_stat pre/post processing. + * + * PUBLIC: int __txn_stat_pp __P((DB_ENV *, DB_TXN_STAT **, u_int32_t)); + */ +int +__txn_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_TXN_STAT **statp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->tx_handle, "DB_ENV->txn_stat", DB_INIT_TXN); + + if ((ret = __db_fchk(env, + "DB_ENV->txn_stat", flags, DB_STAT_CLEAR)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__txn_stat(env, statp, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __txn_stat -- + * ENV->txn_stat. + */ +static int +__txn_stat(env, statp, flags) + ENV *env; + DB_TXN_STAT **statp; + u_int32_t flags; +{ + DB_TXNMGR *mgr; + DB_TXNREGION *region; + DB_TXN_STAT *stats; + TXN_DETAIL *td; + size_t nbytes; + u_int32_t maxtxn, ndx; + int ret; + + *statp = NULL; + mgr = env->tx_handle; + region = mgr->reginfo.primary; + + TXN_SYSTEM_LOCK(env); + maxtxn = region->curtxns; + nbytes = sizeof(DB_TXN_STAT) + sizeof(DB_TXN_ACTIVE) * maxtxn; + if ((ret = __os_umalloc(env, nbytes, &stats)) != 0) { + TXN_SYSTEM_UNLOCK(env); + return (ret); + } + + memcpy(stats, ®ion->stat, sizeof(region->stat)); + stats->st_last_txnid = region->last_txnid; + stats->st_last_ckp = region->last_ckp; + stats->st_time_ckp = region->time_ckp; + stats->st_txnarray = (DB_TXN_ACTIVE *)&stats[1]; + + for (ndx = 0, + td = SH_TAILQ_FIRST(®ion->active_txn, __txn_detail); + td != NULL && ndx < maxtxn; + td = SH_TAILQ_NEXT(td, links, __txn_detail), ++ndx) { + stats->st_txnarray[ndx].txnid = td->txnid; + if (td->parent == INVALID_ROFF) + stats->st_txnarray[ndx].parentid = TXN_INVALID; + else + stats->st_txnarray[ndx].parentid = + ((TXN_DETAIL *)R_ADDR(&mgr->reginfo, + td->parent))->txnid; + stats->st_txnarray[ndx].pid = td->pid; + stats->st_txnarray[ndx].tid = td->tid; + stats->st_txnarray[ndx].lsn = td->begin_lsn; + stats->st_txnarray[ndx].read_lsn = td->read_lsn; + stats->st_txnarray[ndx].mvcc_ref = td->mvcc_ref; + stats->st_txnarray[ndx].status = td->status; + stats->st_txnarray[ndx].xa_status = td->xa_br_status; + stats->st_txnarray[ndx].priority = td->priority; + + if (td->status == TXN_PREPARED) + memcpy(stats->st_txnarray[ndx].gid, + td->gid, sizeof(td->gid)); + if (td->name != INVALID_ROFF) { + (void)strncpy(stats->st_txnarray[ndx].name, + R_ADDR(&mgr->reginfo, td->name), + sizeof(stats->st_txnarray[ndx].name) - 1); + stats->st_txnarray[ndx].name[ + sizeof(stats->st_txnarray[ndx].name) - 1] = '\0'; + } else + stats->st_txnarray[ndx].name[0] = '\0'; + } + + __mutex_set_wait_info(env, region->mtx_region, + &stats->st_region_wait, &stats->st_region_nowait); + stats->st_regsize = (roff_t)mgr->reginfo.rp->size; + if (LF_ISSET(DB_STAT_CLEAR)) { + if (!LF_ISSET(DB_STAT_SUBSYSTEM)) + __mutex_clear(env, region->mtx_region); + memset(®ion->stat, 0, sizeof(region->stat)); + region->stat.st_maxtxns = region->maxtxns; + region->stat.st_inittxns = region->inittxns; + region->stat.st_maxnactive = + region->stat.st_nactive = stats->st_nactive; + region->stat.st_maxnsnapshot = + region->stat.st_nsnapshot = stats->st_nsnapshot; + } + + TXN_SYSTEM_UNLOCK(env); + + *statp = stats; + return (0); +} + +/* + * __txn_stat_print_pp -- + * DB_ENV->txn_stat_print pre/post processing. + * + * PUBLIC: int __txn_stat_print_pp __P((DB_ENV *, u_int32_t)); + */ +int +__txn_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret; + + env = dbenv->env; + + ENV_REQUIRES_CONFIG(env, + env->tx_handle, "DB_ENV->txn_stat_print", DB_INIT_TXN); + + if ((ret = __db_fchk(env, "DB_ENV->txn_stat_print", + flags, DB_STAT_ALL | DB_STAT_ALLOC | DB_STAT_CLEAR)) != 0) + return (ret); + + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, (__txn_stat_print(env, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +/* + * __txn_stat_print + * ENV->txn_stat_print method. + * + * PUBLIC: int __txn_stat_print __P((ENV *, u_int32_t)); + */ +int +__txn_stat_print(env, flags) + ENV *env; + u_int32_t flags; +{ + u_int32_t orig_flags; + int ret; + + orig_flags = flags; + LF_CLR(DB_STAT_CLEAR | DB_STAT_SUBSYSTEM); + if (flags == 0 || LF_ISSET(DB_STAT_ALL)) { + ret = __txn_print_stats(env, orig_flags); + if (flags == 0 || ret != 0) + return (ret); + } + + if (LF_ISSET(DB_STAT_ALL) && + (ret = __txn_print_all(env, orig_flags)) != 0) + return (ret); + + return (0); +} + +/* + * __txn_print_stats -- + * Display default transaction region statistics. + */ +static int +__txn_print_stats(env, flags) + ENV *env; + u_int32_t flags; +{ + DB_ENV *dbenv; + DB_MSGBUF mb; + DB_TXN_ACTIVE *txn; + DB_TXN_STAT *sp; + u_int32_t i; + int ret; + char buf[DB_THREADID_STRLEN], time_buf[CTIME_BUFLEN]; + + dbenv = env->dbenv; + + if ((ret = __txn_stat(env, &sp, flags)) != 0) + return (ret); + + if (LF_ISSET(DB_STAT_ALL)) + __db_msg(env, "Default transaction region information:"); + __db_msg(env, "%lu/%lu\t%s", + (u_long)sp->st_last_ckp.file, (u_long)sp->st_last_ckp.offset, + sp->st_last_ckp.file == 0 ? + "No checkpoint LSN" : "File/offset for last checkpoint LSN"); + if (sp->st_time_ckp == 0) + __db_msg(env, "0\tNo checkpoint timestamp"); + else + __db_msg(env, "%.24s\tCheckpoint timestamp", + __os_ctime(&sp->st_time_ckp, time_buf)); + __db_msg(env, "%#lx\tLast transaction ID allocated", + (u_long)sp->st_last_txnid); + __db_dl(env, "Maximum number of active transactions configured", + (u_long)sp->st_maxtxns); + __db_dl(env, "Initial number of transactions configured", + (u_long)sp->st_inittxns); + __db_dl(env, "Active transactions", (u_long)sp->st_nactive); + __db_dl(env, + "Maximum active transactions", (u_long)sp->st_maxnactive); + __db_dl(env, + "Number of transactions begun", (u_long)sp->st_nbegins); + __db_dl(env, + "Number of transactions aborted", (u_long)sp->st_naborts); + __db_dl(env, + "Number of transactions committed", (u_long)sp->st_ncommits); + __db_dl(env, "Snapshot transactions", (u_long)sp->st_nsnapshot); + __db_dl(env, "Maximum snapshot transactions", + (u_long)sp->st_maxnsnapshot); + __db_dl(env, + "Number of transactions restored", (u_long)sp->st_nrestores); + + __db_dlbytes(env, "Region size", + (u_long)0, (u_long)0, (u_long)sp->st_regsize); + __db_dl_pct(env, + "The number of region locks that required waiting", + (u_long)sp->st_region_wait, DB_PCT(sp->st_region_wait, + sp->st_region_wait + sp->st_region_nowait), NULL); + + qsort(sp->st_txnarray, + sp->st_nactive, sizeof(sp->st_txnarray[0]), __txn_compare); + __db_msg(env, "Active transactions:"); + DB_MSGBUF_INIT(&mb); + for (i = 0; i < sp->st_nactive; ++i) { + txn = &sp->st_txnarray[i]; + __db_msgadd(env, &mb, "\t%lx: %s; xa_status %s;" + " pid/thread %s; begin LSN: file/offset %lu/%lu", + (u_long)txn->txnid, __txn_status(txn), __txn_xa_status(txn), + dbenv->thread_id_string(dbenv, txn->pid, txn->tid, buf), + (u_long)txn->lsn.file, (u_long)txn->lsn.offset); + if (txn->parentid != 0) + __db_msgadd(env, &mb, + "; parent: %lx", (u_long)txn->parentid); + if (!IS_MAX_LSN(txn->read_lsn)) + __db_msgadd(env, &mb, "; read LSN: %lu/%lu", + (u_long)txn->read_lsn.file, + (u_long)txn->read_lsn.offset); + if (txn->mvcc_ref != 0) + __db_msgadd(env, &mb, + "; mvcc refcount: %lu", (u_long)txn->mvcc_ref); + if (LOCKING_ON(env)) + __db_msgadd(env, &mb, + "; priority: %lu", (u_long)txn->priority); + if (txn->name[0] != '\0') + __db_msgadd(env, &mb, "; \"%s\"", txn->name); + if (txn->status == TXN_PREPARE) + __txn_gid(env, &mb, txn); + DB_MSGBUF_FLUSH(env, &mb); + } + + __os_ufree(env, sp); + + return (0); +} + +/* + * __txn_print_all -- + * Display debugging transaction region statistics. + */ +static int +__txn_print_all(env, flags) + ENV *env; + u_int32_t flags; +{ + static const FN fn[] = { + { TXN_IN_RECOVERY, "TXN_IN_RECOVERY" }, + { 0, NULL } + }; + DB_TXNMGR *mgr; + DB_TXNREGION *region; + char time_buf[CTIME_BUFLEN]; + + mgr = env->tx_handle; + region = mgr->reginfo.primary; + + TXN_SYSTEM_LOCK(env); + + __db_print_reginfo(env, &mgr->reginfo, "Transaction", flags); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "DB_TXNMGR handle information:"); + __mutex_print_debug_single(env, "DB_TXNMGR mutex", mgr->mutex, flags); + __db_dl(env, + "Number of transactions discarded", (u_long)mgr->n_discards); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + __db_msg(env, "DB_TXNREGION handle information:"); + __mutex_print_debug_single( + env, "DB_TXNREGION region mutex", region->mtx_region, flags); + STAT_ULONG("Maximum number of active txns", region->maxtxns); + STAT_HEX("Last transaction ID allocated", region->last_txnid); + STAT_HEX("Current maximum unused ID", region->cur_maxid); + + __mutex_print_debug_single( + env, "checkpoint mutex", region->mtx_ckp, flags); + STAT_LSN("Last checkpoint LSN", ®ion->last_ckp); + __db_msg(env, + "%.24s\tLast checkpoint timestamp", + region->time_ckp == 0 ? "0" : + __os_ctime(®ion->time_ckp, time_buf)); + + __db_prflags(env, NULL, region->flags, fn, NULL, "\tFlags"); + + __db_msg(env, "%s", DB_GLOBAL(db_line)); + TXN_SYSTEM_UNLOCK(env); + + return (0); +} + +static char * +__txn_status(txn) + DB_TXN_ACTIVE *txn; +{ + switch (txn->status) { + case TXN_ABORTED: + return ("aborted"); + case TXN_COMMITTED: + return ("committed"); + case TXN_NEED_ABORT: + return ("need abort"); + case TXN_PREPARED: + return ("prepared"); + case TXN_RUNNING: + return ("running"); + default: + break; + } + return ("unknown state"); +} + +static char * +__txn_xa_status(txn) + DB_TXN_ACTIVE *txn; +{ + switch (txn->xa_status) { + case TXN_XA_ACTIVE: + return ("xa active"); + case TXN_XA_DEADLOCKED: + return ("xa deadlock"); + case TXN_XA_IDLE: + return ("xa idle"); + case TXN_XA_PREPARED: + return ("xa prepared"); + case TXN_XA_ROLLEDBACK: + return ("xa rollback"); + default: + break; + } + return ("no xa state"); +} + +static void +__txn_gid(env, mbp, txn) + ENV *env; + DB_MSGBUF *mbp; + DB_TXN_ACTIVE *txn; +{ + u_int32_t v, *xp; + u_int i; + int cnt; + + __db_msgadd(env, mbp, "\n\tGID:"); + for (cnt = 0, xp = (u_int32_t *)txn->gid, i = 0;;) { + memcpy(&v, xp++, sizeof(u_int32_t)); + __db_msgadd(env, mbp, "%#lx ", (u_long)v); + if ((i += sizeof(u_int32_t)) >= DB_GID_SIZE) + break; + if (++cnt == 4) { + DB_MSGBUF_FLUSH(env, mbp); + __db_msgadd(env, mbp, "\t\t"); + cnt = 0; + } + } +} + +static int +__txn_compare(a1, b1) + const void *a1, *b1; +{ + const DB_TXN_ACTIVE *a, *b; + + a = a1; + b = b1; + + if (a->txnid > b->txnid) + return (1); + if (a->txnid < b->txnid) + return (-1); + return (0); +} + +#else /* !HAVE_STATISTICS */ + +int +__txn_stat_pp(dbenv, statp, flags) + DB_ENV *dbenv; + DB_TXN_STAT **statp; + u_int32_t flags; +{ + COMPQUIET(statp, NULL); + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} + +int +__txn_stat_print_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + COMPQUIET(flags, 0); + + return (__db_stat_not_built(dbenv->env)); +} +#endif diff --git a/src/txn/txn_util.c b/src/txn/txn_util.c new file mode 100644 index 00000000..119e8502 --- /dev/null +++ b/src/txn/txn_util.c @@ -0,0 +1,654 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" +#include "dbinc/db_am.h" + +typedef struct __txn_event TXN_EVENT; +struct __txn_event { + TXN_EVENT_T op; + TAILQ_ENTRY(__txn_event) links; + union { + struct { + /* Delayed close. */ + DB *dbp; + } c; + struct { + /* Delayed remove. */ + char *name; + u_int8_t *fileid; + int inmem; + } r; + struct { + /* Lock event. */ + DB_LOCK lock; + DB_LOCKER *locker; + DB *dbp; + } t; + } u; +}; + +#define TXN_TOP_PARENT(txn) do { \ + while (txn->parent != NULL) \ + txn = txn->parent; \ +} while (0) + +static void __clear_fe_watermark __P((DB_TXN *, DB *)); + +/* + * __txn_closeevent -- + * + * Creates a close event that can be added to the [so-called] commit list, so + * that we can redo a failed DB handle close once we've aborted the transaction. + * + * PUBLIC: int __txn_closeevent __P((ENV *, DB_TXN *, DB *)); + */ +int +__txn_closeevent(env, txn, dbp) + ENV *env; + DB_TXN *txn; + DB *dbp; +{ + int ret; + TXN_EVENT *e; + + e = NULL; + if ((ret = __os_calloc(env, 1, sizeof(TXN_EVENT), &e)) != 0) + return (ret); + + e->u.c.dbp = dbp; + e->op = TXN_CLOSE; + TXN_TOP_PARENT(txn); + TAILQ_INSERT_TAIL(&txn->events, e, links); + + return (0); +} + +/* + * __txn_remevent -- + * + * Creates a remove event that can be added to the commit list. + * + * PUBLIC: int __txn_remevent __P((ENV *, + * PUBLIC: DB_TXN *, const char *, u_int8_t *, int)); + */ +int +__txn_remevent(env, txn, name, fileid, inmem) + ENV *env; + DB_TXN *txn; + const char *name; + u_int8_t *fileid; + int inmem; +{ + int ret; + TXN_EVENT *e; + + e = NULL; + if ((ret = __os_calloc(env, 1, sizeof(TXN_EVENT), &e)) != 0) + return (ret); + + if ((ret = __os_strdup(env, name, &e->u.r.name)) != 0) + goto err; + + if (fileid != NULL) { + if ((ret = __os_calloc(env, + 1, DB_FILE_ID_LEN, &e->u.r.fileid)) != 0) { + __os_free(env, e->u.r.name); + goto err; + } + memcpy(e->u.r.fileid, fileid, DB_FILE_ID_LEN); + } + + e->u.r.inmem = inmem; + e->op = TXN_REMOVE; + TAILQ_INSERT_TAIL(&txn->events, e, links); + + return (0); + +err: __os_free(env, e); + + return (ret); +} + +/* + * __txn_remrem -- + * Remove a remove event because the remove has been superceeded, + * by a create of the same name, for example. + * + * PUBLIC: void __txn_remrem __P((ENV *, DB_TXN *, const char *)); + */ +void +__txn_remrem(env, txn, name) + ENV *env; + DB_TXN *txn; + const char *name; +{ + TXN_EVENT *e, *next_e; + + for (e = TAILQ_FIRST(&txn->events); e != NULL; e = next_e) { + next_e = TAILQ_NEXT(e, links); + if (e->op != TXN_REMOVE || strcmp(name, e->u.r.name) != 0) + continue; + TAILQ_REMOVE(&txn->events, e, links); + __os_free(env, e->u.r.name); + if (e->u.r.fileid != NULL) + __os_free(env, e->u.r.fileid); + __os_free(env, e); + } + + return; +} + +/* + * __txn_lockevent -- + * + * Add a lockevent to the commit-queue. The lock event indicates a locker + * trade. + * + * PUBLIC: int __txn_lockevent __P((ENV *, + * PUBLIC: DB_TXN *, DB *, DB_LOCK *, DB_LOCKER *)); + */ +int +__txn_lockevent(env, txn, dbp, lock, locker) + ENV *env; + DB_TXN *txn; + DB *dbp; + DB_LOCK *lock; + DB_LOCKER *locker; +{ + int ret; + TXN_EVENT *e; + + if (!LOCKING_ON(env)) + return (0); + + e = NULL; + if ((ret = __os_calloc(env, 1, sizeof(TXN_EVENT), &e)) != 0) + return (ret); + + e->u.t.locker = locker; + e->u.t.lock = *lock; + e->u.t.dbp = dbp; + e->op = TXN_TRADE; + /* This event goes on the current transaction, not its parent. */ + TAILQ_INSERT_TAIL(&txn->events, e, links); + dbp->cur_txn = txn; + + return (0); +} + +/* + * __txn_remlock -- + * Remove a lock event because the locker is going away. We can remove + * by lock (using offset) or by locker_id (or by both). + * + * PUBLIC: void __txn_remlock __P((ENV *, DB_TXN *, DB_LOCK *, DB_LOCKER *)); + */ +void +__txn_remlock(env, txn, lock, locker) + ENV *env; + DB_TXN *txn; + DB_LOCK *lock; + DB_LOCKER *locker; +{ + TXN_EVENT *e, *next_e; + + for (e = TAILQ_FIRST(&txn->events); e != NULL; e = next_e) { + next_e = TAILQ_NEXT(e, links); + if ((e->op != TXN_TRADE && e->op != TXN_TRADED) || + (e->u.t.lock.off != lock->off && e->u.t.locker != locker)) + continue; + TAILQ_REMOVE(&txn->events, e, links); + __os_free(env, e); + } + + return; +} + +/* + * __txn_doevents -- + * Process the list of events associated with a transaction. On commit, + * apply the events; on abort, just toss the entries. + * + * PUBLIC: int __txn_doevents __P((ENV *, DB_TXN *, int, int)); + */ + +/* + * Trade a locker associated with a thread for one that is associated + * only with the handle. Mark the locker so failcheck will know. + */ +#define DO_TRADE do { \ + memset(&req, 0, sizeof(req)); \ + req.lock = e->u.t.lock; \ + req.op = DB_LOCK_TRADE; \ + t_ret = __lock_vec(env, txn->parent ? \ + txn->parent->locker : e->u.t.locker, 0, &req, 1, NULL); \ + if (t_ret == 0) { \ + if (txn->parent != NULL) { \ + e->u.t.dbp->cur_txn = txn->parent; \ + e->u.t.dbp->cur_locker = txn->parent->locker; \ + } else { \ + e->op = TXN_TRADED; \ + e->u.t.dbp->cur_locker = e->u.t.locker; \ + F_SET(e->u.t.dbp->cur_locker, \ + DB_LOCKER_HANDLE_LOCKER); \ + if (opcode != TXN_PREPARE) \ + e->u.t.dbp->cur_txn = NULL; \ + } \ + } else if (t_ret == DB_NOTFOUND) \ + t_ret = 0; \ + if (t_ret != 0 && ret == 0) \ + ret = t_ret; \ +} while (0) + +int +__txn_doevents(env, txn, opcode, preprocess) + ENV *env; + DB_TXN *txn; + int opcode, preprocess; +{ + DB_LOCKREQ req; + TXN_EVENT *e, *enext; + int ret, t_ret; + + ret = 0; + + /* + * This phase only gets called if we have a phase where we + * release read locks. Since not all paths will call this + * phase, we have to check for it below as well. So, when + * we do the trade, we update the opcode of the entry so that + * we don't try the trade again. + */ + if (preprocess) { + for (e = TAILQ_FIRST(&txn->events); + e != NULL; e = enext) { + enext = TAILQ_NEXT(e, links); + if (e->op != TXN_TRADE || + IS_WRITELOCK(e->u.t.lock.mode)) + continue; + DO_TRADE; + if (txn->parent != NULL) { + TAILQ_REMOVE(&txn->events, e, links); + TAILQ_INSERT_HEAD( + &txn->parent->events, e, links); + } + } + return (ret); + } + + /* + * Prepare should only cause a preprocess, since the transaction + * isn't over. + */ + DB_ASSERT(env, opcode != TXN_PREPARE); + while ((e = TAILQ_FIRST(&txn->events)) != NULL) { + TAILQ_REMOVE(&txn->events, e, links); + /* + * Most deferred events should only happen on + * commits, not aborts or prepares. The one exception + * is a close which gets done on commit and abort, but + * not prepare. If we're not doing operations, then we + * can just go free resources. + */ + if (opcode == TXN_ABORT && e->op != TXN_CLOSE) + goto dofree; + switch (e->op) { + case TXN_CLOSE: + if ((t_ret = __db_close(e->u.c.dbp, + NULL, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + break; + case TXN_REMOVE: + if (txn->parent != NULL) + TAILQ_INSERT_TAIL( + &txn->parent->events, e, links); + else if (e->u.r.fileid != NULL) { + if ((t_ret = __memp_nameop(env, + e->u.r.fileid, NULL, e->u.r.name, + NULL, e->u.r.inmem)) != 0 && ret == 0) + ret = t_ret; + } else if ((t_ret = + __os_unlink(env, e->u.r.name, 0)) != 0 && ret == 0) + ret = t_ret; + break; + case TXN_TRADE: + DO_TRADE; + if (txn->parent != NULL) { + TAILQ_INSERT_HEAD( + &txn->parent->events, e, links); + continue; + } + /* Fall through */ + case TXN_TRADED: + /* Downgrade the lock. */ + if ((t_ret = __lock_downgrade(env, + &e->u.t.lock, DB_LOCK_READ, 0)) != 0 && ret == 0) + ret = t_ret; + break; + default: + /* This had better never happen. */ + DB_ASSERT(env, 0); + } +dofree: + /* Free resources here. */ + switch (e->op) { + case TXN_REMOVE: + if (txn->parent != NULL) + continue; + if (e->u.r.fileid != NULL) + __os_free(env, e->u.r.fileid); + __os_free(env, e->u.r.name); + break; + case TXN_TRADE: + if (opcode == TXN_ABORT) + e->u.t.dbp->cur_txn = NULL; + break; + case TXN_CLOSE: + case TXN_TRADED: + default: + break; + } + __os_free(env, e); + } + + return (ret); +} + +/* + * PUBLIC: int __txn_record_fname __P((ENV *, DB_TXN *, FNAME *)); + */ +int +__txn_record_fname(env, txn, fname) + ENV *env; + DB_TXN *txn; + FNAME *fname; +{ + DB_LOG *dblp; + DB_TXNMGR *mgr; + TXN_DETAIL *td; + roff_t fname_off; + roff_t *np, *ldbs; + u_int32_t i; + int ret; + + if ((td = txn->td) == NULL) + return (0); + mgr = env->tx_handle; + dblp = env->lg_handle; + fname_off = R_OFFSET(&dblp->reginfo, fname); + + /* See if we already have a ref to this DB handle. */ + ldbs = R_ADDR(&mgr->reginfo, td->log_dbs); + for (i = 0, np = ldbs; i < td->nlog_dbs; i++, np++) + if (*np == fname_off) + return (0); + + if (td->nlog_slots <= td->nlog_dbs) { + TXN_SYSTEM_LOCK(env); + if ((ret = __env_alloc(&mgr->reginfo, + sizeof(roff_t) * (td->nlog_slots << 1), &np)) != 0) { + TXN_SYSTEM_UNLOCK(env); + return (ret); + } + + memcpy(np, ldbs, td->nlog_dbs * sizeof(roff_t)); + if (td->nlog_slots > TXN_NSLOTS) + __env_alloc_free(&mgr->reginfo, ldbs); + + TXN_SYSTEM_UNLOCK(env); + td->log_dbs = R_OFFSET(&mgr->reginfo, np); + ldbs = np; + td->nlog_slots = td->nlog_slots << 1; + } + + ldbs[td->nlog_dbs] = fname_off; + td->nlog_dbs++; + fname->txn_ref++; + + return (0); +} + +/* + * __txn_dref_fnam -- + * Either pass the fname to our parent txn or decrement the refcount + * and close the fileid if it goes to zero. + * + * PUBLIC: int __txn_dref_fname __P((ENV *, DB_TXN *)); + */ +int +__txn_dref_fname(env, txn) + ENV *env; + DB_TXN *txn; +{ + DB_LOG *dblp; + DB_TXNMGR *mgr; + FNAME *fname; + roff_t *np; + TXN_DETAIL *ptd, *td; + u_int32_t i; + int ret; + + td = txn->td; + + if (td->nlog_dbs == 0) + return (0); + + mgr = env->tx_handle; + dblp = env->lg_handle; + ret = 0; + + ptd = txn->parent != NULL ? txn->parent->td : NULL; + + np = R_ADDR(&mgr->reginfo, td->log_dbs); + np += td->nlog_dbs - 1; + for (i = 0; i < td->nlog_dbs; i++, np--) { + fname = R_ADDR(&dblp->reginfo, *np); + MUTEX_LOCK(env, fname->mutex); + if (ptd != NULL) { + ret = __txn_record_fname(env, txn->parent, fname); + fname->txn_ref--; + MUTEX_UNLOCK(env, fname->mutex); + } else if (fname->txn_ref == 1) { + MUTEX_UNLOCK(env, fname->mutex); + DB_ASSERT(env, fname->txn_ref != 0); + ret = __dbreg_close_id_int( + env, fname, DBREG_CLOSE, 0); + } else { + fname->txn_ref--; + MUTEX_UNLOCK(env, fname->mutex); + } + if (ret != 0 && ret != EIO) + break; + } + + return (ret); +} + +/* + * Common removal routine. This is called only after verifying that + * the DB_MPOOLFILE is in the list. + */ +static void +__clear_fe_watermark(txn, db) + DB_TXN *txn; + DB *db; +{ + MPOOLFILE *mpf; + + mpf = db->mpf->mfp; + mpf->fe_watermark = PGNO_INVALID; + mpf->fe_txnid = 0U; + mpf->fe_nlws = 0U; + TAILQ_REMOVE(&txn->femfs, db, felink); +} + +/* + * __txn_reset_fe_watermarks + * Reset the file extension state of MPOOLFILEs involved in this transaction. + * + * PUBLIC: void __txn_reset_fe_watermarks __P((DB_TXN *)); + */ +void +__txn_reset_fe_watermarks(txn) + DB_TXN *txn; +{ + DB *db; + + if (txn->parent) { + DB_ASSERT(txn->mgrp->env, TAILQ_FIRST(&txn->femfs) == NULL); + } + + while ((db = TAILQ_FIRST(&txn->femfs))) + __clear_fe_watermark(txn, db); +} + +/* + * __txn_remove_fe_watermark + * Remove a watermark from the transaction's list + * + * PUBLIC: void __txn_remove_fe_watermark __P((DB_TXN *,DB *)); + */ +void +__txn_remove_fe_watermark(txn, db) + DB_TXN *txn; + DB *db; +{ + DB *db_tmp; + + if (txn == NULL || !F_ISSET(txn, TXN_BULK)) + return; + + TAILQ_FOREACH(db_tmp, &txn->femfs, felink) { + if (db_tmp == db) { + __clear_fe_watermark(txn, db); + break; + } + } +} + +/* + * __txn_add_fe_watermark + * + * Add an entry to the transaction's list of + * file_extension_watermarks, if warranted. Also, set the watermark + * page number in the MPOOLFILE. The metadata lock associated with + * the mfp must be held when this function is called. + * + * PUBLIC: void __txn_add_fe_watermark __P((DB_TXN *, DB *, db_pgno_t)); + */ +void +__txn_add_fe_watermark(txn, db, pgno) + DB_TXN *txn; + DB *db; + db_pgno_t pgno; +{ + MPOOLFILE *mfp; + + if (txn == NULL || !F_ISSET(txn, TXN_BULK)) + return; + + mfp = db->mpf->mfp; + /* If the watermark is already set, there's nothing to do. */ + if (mfp->fe_watermark != PGNO_INVALID) { +#ifdef DIAGNOSTIC + DB_ASSERT(txn->mgrp->env, mfp->fe_txnid == txn->txnid); +#endif + return; + } + + /* We can update MPOOLFILE because the metadata lock is held. */ + mfp->fe_watermark = pgno; + mfp->fe_txnid = txn->txnid; + + TAILQ_INSERT_TAIL(&txn->femfs, db, felink); +} + +/* + * __txn_flush_fe_files + * For every extended file in which a log record write was skipped, + * flush the data pages. This is called during commit. + * + * PUBLIC: int __txn_flush_fe_files __P((DB_TXN *)); + */ +int +__txn_flush_fe_files(txn) + DB_TXN *txn; +{ + DB *db; + ENV *env; + int ret; + + env = txn->mgrp->env; + + DB_ASSERT(env, txn->mgrp != NULL); + DB_ASSERT(env, env != NULL); + +#ifdef DIAGNOSTIC + DB_ASSERT(env, txn->parent == NULL); +#endif + + TAILQ_FOREACH(db, &txn->femfs, felink) { + if (db->mpf->mfp->fe_nlws > 0 && + (ret = __memp_sync_int(env, db->mpf, 0, + DB_SYNC_FILE, NULL, NULL))) + return (ret); + } + + return (0); +} + +/* + * __txn_pg_above_fe_watermark -- + * + * Test whether there is a file extension watermark for the given + * database, and, if so, whether the given page number is above the + * watermark. If this test returns true, then logging of the page's + * update can be suppressed when the file extension/bulk loading + * optimization is in force. + * + * PUBLIC: int __txn_pg_above_fe_watermark + * PUBLIC: __P((DB_TXN*, MPOOLFILE*, db_pgno_t)); + */ +int +__txn_pg_above_fe_watermark(txn, mpf, pgno) + DB_TXN *txn; + MPOOLFILE *mpf; + db_pgno_t pgno; +{ + ENV *env; + int skip; + + if (txn == NULL || (!F_ISSET(txn, TXN_BULK)) || + mpf->fe_watermark == PGNO_INVALID) + return (0); + + env = txn->mgrp->env; + + skip = 0; + TXN_SYSTEM_LOCK(env); + if (((DB_TXNREGION *)env->tx_handle->reginfo.primary)->n_hotbackup > 0) + skip = 1; + TXN_SYSTEM_UNLOCK(env); + if (skip) + return (0); + + /* + * If the watermark is a valid page number, then the extending + * transaction should be the current outermost transaction. + */ + DB_ASSERT(txn->mgrp->env, mpf->fe_txnid == txn->txnid); + + return (mpf->fe_watermark <= pgno); +} diff --git a/src/xa/xa.c b/src/xa/xa.c new file mode 100644 index 00000000..083325ba --- /dev/null +++ b/src/xa/xa.c @@ -0,0 +1,1063 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/txn.h" +#include "dbinc_auto/xa_ext.h" + +static void corrupted_env __P((ENV *, int)); + +static int __xa_get_txn __P((ENV *, + XID *, TXN_DETAIL *, DB_TXN **, u_long, int)); +static void __xa_put_txn __P((ENV *, DB_TXN *)); + +static int __xa_txn_get_prepared + __P((ENV *, XID *, DB_PREPLIST *, long, long *, u_int32_t)); +static int __xa_thread_enter __P((ENV *, DB_THREAD_INFO **)); + +static int __db_xa_close __P((char *, int, long)); +static int __db_xa_commit __P((XID *, int, long)); +static int __db_xa_complete __P((int *, int *, int, long)); +static int __db_xa_end __P((XID *, int, long)); +static int __db_xa_forget __P((XID *, int, long)); +static int __db_xa_open __P((char *, int, long)); +static int __db_xa_prepare __P((XID *, int, long)); +static int __db_xa_recover __P((XID *, long, int, long)); +static int __db_xa_rollback __P((XID *, int, long)); +static int __db_xa_start __P((XID *, int, long)); + +/* + * Possible flag values: + * Dynamic registration 0 => no dynamic registration + * TMREGISTER => dynamic registration + * Asynchronous operation 0 => no support for asynchrony + * TMUSEASYNC => async support + * Migration support 0 => migration of transactions across + * threads is possible + * TMNOMIGRATE => no migration across threads + */ +const struct xa_switch_t db_xa_switch = { + "Berkeley DB", /* name[RMNAMESZ] */ + TMNOMIGRATE, /* flags */ + 0, /* version */ + __db_xa_open, /* xa_open_entry */ + __db_xa_close, /* xa_close_entry */ + __db_xa_start, /* xa_start_entry */ + __db_xa_end, /* xa_end_entry */ + __db_xa_rollback, /* xa_rollback_entry */ + __db_xa_prepare, /* xa_prepare_entry */ + __db_xa_commit, /* xa_commit_entry */ + __db_xa_recover, /* xa_recover_entry */ + __db_xa_forget, /* xa_forget_entry */ + __db_xa_complete /* xa_complete_entry */ +}; + +/* + * __xa_get_txn -- + * Return a pointer to the current transaction structure for the + * designated environment. We take the XA flags so we can specifically + * test for TMJOIN and TMRESUME. These are testing for compliance with + * the XA state machine. The various cases are: + * + * TMRESUME: DB_TXN should already exist for this thread and should be + * in state SUSPENDED. Either error or change state. + * TMJOIN: DB_TXN should *not* exist, but TXN_DETAIL should -- create + * the DB_TXN and __txn_continue it. + * neither: Neither DB_TXN nor TXN_DETAIL should exist (td should be NULL) -- + * start transaction. + * + * In addition, we use this to retrieve the current txn during __db_xa_end. + * In this case, the td and the txn should exist and the txn should currently + * be associated. + * + */ +static int +__xa_get_txn(env, xid, td, txnp, flags, ending) + ENV *env; + XID *xid; + TXN_DETAIL *td; + DB_TXN **txnp; + u_long flags; + int ending; +{ + DB_ENV *dbenv; + DB_THREAD_INFO *ip; + int ret; + + dbenv = env->dbenv; + COMPQUIET(ip, NULL); + ENV_ENTER_RET(env, ip, ret); + if (ret != 0) + return (XAER_RMFAIL); + else + ret = XA_OK; + DB_ASSERT(env, ip != NULL); + if (ending != 0) + DB_ASSERT(env, + ip->dbth_xa_status == TXN_XA_THREAD_ASSOCIATED); + else + DB_ASSERT(env, + ip->dbth_xa_status != TXN_XA_THREAD_ASSOCIATED); + + /* + * Two cases: the transaction should already exist in this + * environment or it should not. If it should exist, then + * we should have found its detail and the JOIN or RESUME + * flags should have been set. + */ + if (td == NULL) { + DB_ASSERT(env, ending == 0); + if (LF_ISSET(TMJOIN | TMRESUME)) + ret = XAER_NOTA; + else if ((ret = __txn_begin(env, + ip, NULL, txnp, DB_TXN_NOWAIT)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4540", + "xa_get_txn: transaction begin failed")); + ret = XAER_RMERR; + } else { + SH_TAILQ_INSERT_HEAD(&ip->dbth_xatxn, + *txnp, xa_links, __db_txn); + (*txnp)->xa_thr_status = TXN_XA_THREAD_ASSOCIATED; + ip->dbth_xa_status = TXN_XA_THREAD_ASSOCIATED; + + /* Initialize XA fields in the detail structure. */ + /* XXX Does this need protection of the TXN lock? */ + td = (TXN_DETAIL *)((*txnp)->td); + memcpy(td->gid, xid->data, XIDDATASIZE); + td->bqual = (u_int32_t)xid->bqual_length; + td->gtrid = (u_int32_t)xid->gtrid_length; + td->format = (int32_t)xid->formatID; + td->xa_br_status = TXN_XA_ACTIVE; + } + } else { + /* If we get here, the tranaction exists. */ + if (ending == 0 && !LF_ISSET(TMRESUME) && !LF_ISSET(TMJOIN)) { + ret = XAER_DUPID; + goto out; + } + + SH_TAILQ_FOREACH(*txnp, &ip->dbth_xatxn, xa_links, __db_txn) + if ((*txnp)->td == td) + break; + + /* Check that we are not a child transaction. */ + if (td->parent != INVALID_ROFF) { + dbenv->err(dbenv, EINVAL, DB_STR("4541", + "xa_get_txn: XA transaction with parent")); + ret = XAER_RMERR; + goto out; + } + + if (*txnp != NULL) { + if (ending) { + DB_ASSERT(env, (*txnp)->xa_thr_status == + TXN_XA_THREAD_ASSOCIATED); + DB_ASSERT(env, (*txnp) == + SH_TAILQ_FIRST(&ip->dbth_xatxn, __db_txn)); + } else if (LF_ISSET(TMRESUME)) { + DB_ASSERT(env, (*txnp)->xa_thr_status == + TXN_XA_THREAD_SUSPENDED); + DB_ASSERT(env, ip->dbth_xa_status == + TXN_XA_THREAD_SUSPENDED); + (*txnp)->xa_thr_status = + TXN_XA_THREAD_ASSOCIATED; + ip->dbth_xa_status = TXN_XA_THREAD_ASSOCIATED; + if ((*txnp) != + SH_TAILQ_FIRST(&ip->dbth_xatxn, __db_txn)) { + SH_TAILQ_REMOVE(&ip->dbth_xatxn, + (*txnp), xa_links, __db_txn); + SH_TAILQ_INSERT_HEAD(&ip->dbth_xatxn, + (*txnp), xa_links, __db_txn); + } + if (td->xa_br_status == TXN_XA_IDLE) + td->xa_br_status = TXN_XA_ACTIVE; + } else + ret = XAER_PROTO; + } else { + if (LF_ISSET(TMRESUME)) { + dbenv->err(dbenv, EINVAL, DB_STR("4542", + "xa_get_txn: transaction does not exist")); + ret = XAER_PROTO; + } else if ((ret = + __os_malloc(env, sizeof(DB_TXN), txnp)) == 0) { + /* We are joining this branch. */ + ret = __txn_continue(env, *txnp, td, ip, 1); + if (ret != 0) { + dbenv->err(dbenv, ret, DB_STR("4543", + "xa_get_txn: txn_continue fails")); + ret = XAER_RMFAIL; + } + ip->dbth_xa_status = TXN_XA_THREAD_ASSOCIATED; + (*txnp)->xa_thr_status = + TXN_XA_THREAD_ASSOCIATED; + SH_TAILQ_INSERT_HEAD(&ip->dbth_xatxn, + (*txnp), xa_links, __db_txn); + if (td->xa_br_status == TXN_XA_IDLE) + td->xa_br_status = TXN_XA_ACTIVE; + } else { + dbenv->err(dbenv, ret, DB_STR("4544", + "xa_get_txn: os_malloc failed")); + ret = XAER_RMERR; + } + } + } +out: ENV_LEAVE(env, ip); + return (ret); +} + +/* + * Release use of this transaction. + */ +static void +__xa_put_txn(env, txnp) + ENV *env; + DB_TXN *txnp; +{ + DB_THREAD_INFO *ip; + TXN_DETAIL *td; + + ip = txnp->thread_info; + DB_ASSERT(env, ip != NULL); + SH_TAILQ_REMOVE(&ip->dbth_xatxn, txnp, xa_links, __db_txn); + TAILQ_REMOVE(&txnp->mgrp->txn_chain, txnp, links); + td = txnp->td; + DB_ASSERT(env, td->xa_ref > 0); + td->xa_ref--; + __os_free(env, txnp); + ip->dbth_xa_status = TXN_XA_THREAD_UNASSOCIATED; +} + +static +int __xa_thread_enter(env, ipp) + ENV *env; + DB_THREAD_INFO **ipp; +{ + int ret; + DB_THREAD_INFO *ip; + + COMPQUIET(ip, NULL); + ENV_ENTER_RET(env, ip, ret); + if (ret == 0) + ip->dbth_xa_status = TXN_XA_THREAD_UNASSOCIATED; + *ipp = ip; + return (ret); +} + +/* + * __xa_txn_get_prepared -- + * Wrap the internal call to __txn_get_prepared so that we can call + * it from XA. XA routines are not considered to be running "inside" the + * library, so when they make calls into the library, we need to use interface + * routines that support replication and failchk. Since __txn_get_prepared + * is internal, there is no user API to call, so we use this wrapper routine + * instead. + */ +static int +__xa_txn_get_prepared(env, xids, txns, count, retp, flags) + ENV *env; + XID *xids; + DB_PREPLIST *txns; + long count; /* This is long for XA compatibility. */ + long *retp; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + int ret; + + ip = NULL; + ENV_ENTER(env, ip); + REPLICATION_WRAP(env, + (__txn_get_prepared(env, xids, txns, count, retp, flags)), 0, ret); + ENV_LEAVE(env, ip); + return (ret); +} + +#define XA_FLAGS \ + (DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | \ + DB_INIT_TXN | DB_THREAD | DB_REGISTER | DB_RECOVER) + +/* + * __db_xa_open -- + * The open call in the XA protocol. The rmid field is an id number + * that the TM assigned us and will pass us on every xa call. We need to + * map that rmid number into a env structure that we create during + * initialization. The file xa_map.c implements all such xa->db mappings. + * The xa_info field is instance specific information. We require + * that the value of DB_HOME be passed in xa_info. Since xa_info is the + * only thing that we get to pass to db_env_create, any config information + * will have to be done via a config file instead of via the db_env_create + * call. + */ +static int +__db_xa_open(xa_info, rmid, arg_flags) + char *xa_info; + int rmid; + long arg_flags; +{ + DB_ENV *dbenv; + DB_THREAD_INFO *ip; + ENV *env; + int inmem, ret; + u_long flags; + + flags = (u_long)arg_flags; /* Conversion for bit operations. */ + ret = 0; + + if (LF_ISSET(TMASYNC)) + return (XAER_ASYNC); + if (flags != TMNOFLAGS) + return (XAER_INVAL); + + /* Verify if we already have this environment open. */ + if (__db_rmid_to_env(rmid, &env) == 0) { + env->xa_ref++; + /* Indicate that this thread is in an XA environment. */ + if ((ret = __xa_thread_enter(env, &ip)) == 0) { + DB_ASSERT(env, ip != NULL); + ENV_LEAVE(env, ip); + return (XA_OK); + } else + return (XAER_RMERR); + } + + /* Open a new environment. */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4545", + "xa_open: Failure creating env handle")); + return (XAER_RMERR); + } + if ((ret = dbenv->set_thread_count(dbenv, 25)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4546", + "xa_open: Failure setting thread count")); + goto err; + } + env = dbenv->env; + if ((ret = dbenv->open(dbenv, xa_info, XA_FLAGS, 0)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4547", + "xa_open: Failure opening environment")); + goto err; + } + + /* + * Make sure that the environment is not configured for in-memory + * logging. + */ + if ((ret = dbenv->log_get_config(dbenv, + DB_LOG_IN_MEMORY, &inmem)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4548", + "xa_open: Failure getting log configuration")); + goto err; + } + if (inmem != 0) { + dbenv->err(dbenv, EINVAL, DB_STR("4549", + "xa_open: In-memory logging not allowed in XA environment")); + goto err; + } + + /* Create the mapping. */ + __db_map_rmid(rmid, env); + env->xa_ref = 1; + + /* Indicate that this thread is in an XA environment. */ + if ((ret = __xa_thread_enter(env, &ip)) == 0) { + ENV_LEAVE(env, ip); + return (XA_OK); + } else + return (XAER_RMERR); + +err: (void)dbenv->close(dbenv, 0); + /* + * If the environment is corrupt, then we need to get all threads + * and processes out of it and run recovery. There is no particularly + * clean way to do that, so we'll use a really big hammer and + * crash the server. + */ + if (ret == DB_RUNRECOVERY) + exit(1); + + return (XAER_RMERR); +} + +/* + * __db_xa_close -- + * The close call of the XA protocol. The only trickiness here + * is that if there are any active transactions, we must fail. It is + * *not* an error to call close on an environment that has already been + * closed (I am interpreting that to mean it's OK to call close on an + * environment that has never been opened). + */ +static int +__db_xa_close(xa_info, rmid, arg_flags) + char *xa_info; + int rmid; + long arg_flags; +{ + DB_THREAD_INFO *ip; + ENV *env; + int ret, t_ret; + u_long flags; + + COMPQUIET(xa_info, NULL); + COMPQUIET(ip, NULL); + ret = 0; + + flags = (u_long)arg_flags; /* Conversion for bit operations. */ + + if (LF_ISSET(TMASYNC)) + return (XAER_ASYNC); + if (flags != TMNOFLAGS) + return (XAER_INVAL); + + /* If the environment is closed, then we're done. */ + if (__db_rmid_to_env(rmid, &env) != 0) + return (XA_OK); + + /* Check if there are any pending transactions. */ + ENV_ENTER_RET(env, ip, ret); + /* + * If the environment is corrupt, then we need to get all threads + * and processes out of it and run recovery. There is no particularly + * clean way to do that, so we'll use a really big hammer and + * crash the server. + */ + if (ret == DB_RUNRECOVERY) + exit(1); + else if (ret != 0) + return (XAER_RMFAIL); + + /* + * If we are calling close without ever having called open, then we + * don't want to do anything, because if we do, our ref counts would + * be all wrong. + */ + if (ip->dbth_xa_status == TXN_XA_THREAD_NOTA) { + ret = XAER_PROTO; + goto err; + } + + /* + * It is an error for a transaction manager to call xa_close from + * a thread of control that is associated with a transaction branch. + */ + if (SH_TAILQ_FIRST(&ip->dbth_xatxn, __db_txn) != NULL) { + ret = XAER_PROTO; + goto err; + } + + if (env->xa_ref > 1) { + env->xa_ref--; + goto err; + } else { + /* Destroy the mapping. */ + ret = __db_unmap_rmid(rmid); + + /* Close the environment. */ + t_ret = env->dbenv->close(env->dbenv, 0); + + if (ret != 0 || t_ret != 0) + ret = XAER_RMERR; + /* Don't try to leave an environment we just closed. */ + goto out; + } + +err: ENV_LEAVE(env, ip); +out: return (ret == 0 ? XA_OK : ret); +} + +/* + * __db_xa_start -- + * Begin a transaction for the current resource manager. + */ +static int +__db_xa_start(xid, rmid, arg_flags) + XID *xid; + int rmid; + long arg_flags; +{ + DB_ENV *dbenv; + DB_TXN *txnp; + ENV *env; + TXN_DETAIL *td; + int ret; + u_long flags; + + flags = (u_long)arg_flags; /* Conversion for bit operations. */ + ret = 0; + +#define OK_FLAGS (TMJOIN | TMRESUME | TMNOWAIT | TMASYNC | TMNOFLAGS) + if (LF_ISSET(~OK_FLAGS)) + return (XAER_INVAL); + + if (LF_ISSET(TMJOIN) && LF_ISSET(TMRESUME)) + return (XAER_INVAL); + + if (LF_ISSET(TMASYNC)) + return (XAER_ASYNC); + + if (__db_rmid_to_env(rmid, &env) != 0) + return (XAER_PROTO); + dbenv = env->dbenv; + + /* Die if the environment is corrupted. */ + PANIC_CHECK_RET(env, ret); + if (ret == DB_RUNRECOVERY) + exit(1); + + /* + * If td comes back NULL, then we know that we don't have a + * transaction yet. + */ + if ((ret = __db_xid_to_txn(env, xid, &td)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4550", + "xa_start: failure mapping xid")); + return (XAER_RMFAIL); + } + + /* + * This can't block, so we can ignore TMNOWAIT. + * + * Other error conditions: RMERR, OUTSIDE, PROTO, RB* + */ + if (td != NULL) { + if (td->xa_br_status == TXN_XA_DEADLOCKED) + return (XA_RBDEADLOCK); + if (td->xa_br_status == TXN_XA_ROLLEDBACK) + return (XA_RBOTHER); + } + if ((ret = __xa_get_txn(env, xid, td, &txnp, flags, 0)) != 0) + return (ret); + + return (XA_OK); +} + +/* + * __db_xa_end -- + * Disassociate the current transaction from the current process. + */ +static int +__db_xa_end(xid, rmid, arg_flags) + XID *xid; + int rmid; + long arg_flags; +{ + DB_ENV *dbenv; + DB_TXN *txn; + ENV *env; + TXN_DETAIL *td; + int ret; + u_long flags; + + flags = (u_long)arg_flags; /* Convert for bit manipulation. */ + if (flags != TMNOFLAGS && !LF_ISSET(TMSUSPEND | TMSUCCESS | TMFAIL)) + return (XAER_INVAL); + + if (__db_rmid_to_env(rmid, &env) != 0) + return (XAER_PROTO); + dbenv = env->dbenv; + + if ((ret = __db_xid_to_txn(env, xid, &td)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4551", + "xa_end: failure mapping xid")); + return (XAER_RMFAIL); + } + if (td == NULL) + return (XAER_NOTA); + + if ((ret = __xa_get_txn(env, xid, td, &txn, flags, 1)) != 0) + return (ret); + + /* We are ending; make sure there are no open cursors. */ + if (txn->cursors != 0) { + dbenv->err(dbenv, EINVAL, DB_STR("4552", + "xa_end: cannot end with open cursors")); + return (XAER_RMERR); + } + + if (td != txn->td) { + dbenv->err(dbenv, ret, DB_STR("4553", + "xa_end: txn_detail mismatch")); + return (XAER_RMERR); + } + + if (td->xa_br_status == TXN_XA_DEADLOCKED) + return (XA_RBDEADLOCK); + + /* + * This happens if this process timed out, + * and the TMS called __db_xa_rollback + * while this process was holding the txn. + * Need to handle the txn in this process. + */ + if (td->status == TXN_NEED_ABORT) { + if (txn->abort(txn) != 0) + return (XAER_RMERR); + __xa_put_txn(env, txn); + return (XA_RBOTHER); + } + + if (td->xa_br_status == TXN_XA_IDLE) { + dbenv->err(dbenv, EINVAL, DB_STR("4554", + "xa_end: ending transaction that is idle")); + return (XAER_PROTO); + } + + /* + * If we are deadlocked or prepared, don't change this, but + * if we are active and the only handle, then make this transaction + * idle. + */ + if (td->xa_ref == 1 && td->xa_br_status == TXN_XA_ACTIVE) + td->xa_br_status = TXN_XA_IDLE; + if (LF_ISSET(TMSUSPEND)) { + txn->thread_info->dbth_xa_status = TXN_XA_THREAD_SUSPENDED; + txn->xa_thr_status = TXN_XA_THREAD_SUSPENDED; + } else { + __xa_put_txn(env, txn); + } + return (XA_OK); +} + +/* + * If, during a transaction completion operation (commit, abort, prepare) + * we detect a corrupt environment, we must close and reopen the + * environment and check if the transaction in question exists. If it + * does, then we can complete the operation as requested. If it does + * not, then we have to return aborted, because we just recovered the + * environment, aborting this transaction. + */ +static void +corrupted_env(env, rmid) + ENV *env; + int rmid; +{ + DB_ENV *dbenv; + const char *path; + char *home; + int ret; + ENV *env2; + + COMPQUIET(home, NULL); + ret = 0; + dbenv = env->dbenv; + path = NULL; + if (dbenv->get_home(dbenv, &path) != 0) + goto err; + if (path != NULL && (__os_strdup(NULL, path, &home) != 0)) + goto err; + /* + * Check that no one else came in and cleaned + * up the environment before we could. If they + * did then just call __db_xa_open to get the + * new environment. If they have not then + * unmap the old handle so no one else can get + * it. + */ + if (__db_rmid_to_env(rmid, &env2) == 0) { + PANIC_CHECK_RET(env2, ret); + if (ret != 0) + (void)__db_unmap_rmid(rmid); + } + + /* + * If we cannot get the environment then it is + * corrupted and are currently unable to run recovery. + * In that case all we can do is crash and restart, + * and recovery will clean up the lost transaction. + */ + if ( __db_xa_open(home, rmid, 0) != XA_OK) + goto err; + + __os_free(NULL, home); + if (0) { +err: exit(1); + } +} + +/* + * __db_xa_prepare -- + * Sync the log to disk so we can guarantee recoverability. + */ +static int +__db_xa_prepare(xid, rmid, arg_flags) + XID *xid; + int rmid; + long arg_flags; +{ + DB_ENV *dbenv; + DB_TXN *txnp; + ENV *env; + TXN_DETAIL *td; + int ret; + u_long flags; + + flags = (u_long)arg_flags; /* Conversion for bit operations. */ + ret = 0; + + if (LF_ISSET(TMASYNC)) + return (XAER_ASYNC); + if (flags != TMNOFLAGS) + return (XAER_INVAL); + + /* + * We need to know if we've ever called prepare on this. + * As part of the prepare, we set the xa_status field to + * reflect that fact that prepare has been called, and if + * it's ever called again, it's an error. + */ + if (__db_rmid_to_env(rmid, &env) != 0) + return (XAER_PROTO); + dbenv = env->dbenv; + + /* + * If the environment is corrupted, reopen it or die if that + * is not possible. + */ + PANIC_CHECK_RET(env, ret); + if (ret == DB_RUNRECOVERY) { + corrupted_env(env, rmid); + if (__db_rmid_to_env(rmid, &env) != 0) + return (XAER_PROTO); + dbenv = env->dbenv; + } + + if ((ret = __db_xid_to_txn(env, xid, &td)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4555", + "xa_prepare: failure mapping xid")); + return (XAER_RMFAIL); + } + if (td == NULL) { + dbenv->err(dbenv, EINVAL, DB_STR("4556", + "xa_prepare: xid not found")); + return (XAER_NOTA); + } + + if (td->xa_br_status == TXN_XA_DEADLOCKED) + return (XA_RBDEADLOCK); + if (td->xa_br_status == TXN_XA_ROLLEDBACK) + return (XA_RBOTHER); + + if (td->xa_br_status != TXN_XA_ACTIVE && + td->xa_br_status != TXN_XA_IDLE) { + dbenv->err(dbenv, EINVAL, DB_STR("4557", + "xa_prepare: transaction neither active nor idle")); + return (XAER_PROTO); + } + + /* Now, fill in the global transaction structure. */ + if ((ret = __xa_get_txn(env, xid, td, &txnp, TMJOIN, 0)) != 0) + return (ret); + + if ((ret = txnp->prepare(txnp, (u_int8_t *)xid->data)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4558", + "xa_prepare: txnp->prepare failed")); + td->xa_br_status = TXN_XA_IDLE; + return (XAER_RMERR); + } + td->xa_br_status = TXN_XA_PREPARED; + + __xa_put_txn(env, txnp); + return (XA_OK); +} + +/* + * __db_xa_commit -- + * Commit the transaction + */ +static int +__db_xa_commit(xid, rmid, arg_flags) + XID *xid; + int rmid; + long arg_flags; +{ + DB_ENV *dbenv; + DB_TXN *txnp; + ENV *env; + TXN_DETAIL *td; + int ret; + u_long flags; + + flags = (u_long)arg_flags; /* Conversion for bit operations. */ + ret = 0; + + if (LF_ISSET(TMASYNC)) + return (XAER_ASYNC); +#undef OK_FLAGS +#define OK_FLAGS (TMNOFLAGS | TMNOWAIT | TMONEPHASE) + if (LF_ISSET(~OK_FLAGS)) + return (XAER_INVAL); + + /* + * We need to know if we've ever called prepare on this. + * We can verify this by examining the xa_status field. + */ + if (__db_rmid_to_env(rmid, &env) != 0) + return (XAER_PROTO); + dbenv = env->dbenv; + + /* + * If the environment is corrupted, reopen it or die if that + * is not possible. + */ + PANIC_CHECK_RET(env, ret); + if (ret == DB_RUNRECOVERY) { + corrupted_env(env, rmid); + if (__db_rmid_to_env(rmid, &env) != 0) + return (XAER_PROTO); + dbenv = env->dbenv; + } + + if ((ret = __db_xid_to_txn(env, xid, &td)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4559", + "xa_commit: failure mapping xid")); + return (XAER_RMFAIL); + } + if (td == NULL) { + dbenv->err(dbenv, EINVAL, DB_STR("4560", + "xa_commit: xid not found")); + return (XAER_NOTA); + } + + if (td->xa_br_status == TXN_XA_DEADLOCKED) + return (XA_RBDEADLOCK); + + if (td->xa_br_status == TXN_XA_ROLLEDBACK) + return (XA_RBOTHER); + + if (LF_ISSET(TMONEPHASE) && td->xa_br_status != TXN_XA_IDLE) { + dbenv->err(dbenv, EINVAL, DB_STR("4561", + "xa_commit: commiting transaction active in branch")); + return (XAER_PROTO); + } + + if (!LF_ISSET(TMONEPHASE) && td->xa_br_status != TXN_XA_PREPARED) { + dbenv->err(dbenv, EINVAL, DB_STR("4562", + "xa_commit: attempting to commit unprepared transaction")); + return (XAER_PROTO); + } + + /* Now, fill in the global transaction structure. */ + if ((ret = __xa_get_txn(env, xid, td, &txnp, TMJOIN, 0)) != 0) + return (ret); + + /* + * Because this transaction is curently associated, commit will not free + * the transaction structure, which is good, because we need to do that + * in xa_put_txn below. + */ + if ((ret = txnp->commit(txnp, 0)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4563", + "xa_commit: txnp->commit failed")); + return (XAER_RMERR); + } + + __xa_put_txn(env, txnp); + return (XA_OK); +} + +/* + * __db_xa_recover -- + * Returns a list of prepared and heuristically completed transactions. + * + * The return value is the number of xids placed into the xid array (less + * than or equal to the count parameter). The flags are going to indicate + * whether we are starting a scan or continuing one. + */ +static int +__db_xa_recover(xids, count, rmid, flags) + XID *xids; + long count, flags; + int rmid; +{ + ENV *env; + int ret; + u_int32_t newflags; + long rval; + + /* If the environment is closed, then we're done. */ + if (__db_rmid_to_env(rmid, &env) != 0) + return (XAER_PROTO); + + if (LF_ISSET(TMSTARTRSCAN)) + newflags = DB_FIRST; + else if (LF_ISSET(TMENDRSCAN)) + newflags = DB_LAST; + else + newflags = DB_NEXT; + + rval = 0; + if ((ret = __xa_txn_get_prepared(env, + xids, NULL, count, &rval, newflags)) != 0) { + env->dbenv->err(env->dbenv, ret, DB_STR("4564", + "xa_recover: txn_get_prepared failed")); + return (XAER_RMERR); + } + + return (rval); +} + +/* + * __db_xa_rollback + * Abort an XA transaction. + */ +static int +__db_xa_rollback(xid, rmid, arg_flags) + XID *xid; + int rmid; + long arg_flags; +{ + DB_ENV *dbenv; + DB_TXN *txnp; + ENV *env; + TXN_DETAIL *td; + int ret; + u_long flags; + + flags = (u_long)arg_flags; /* Conversion for bit operations. */ + ret = 0; + + if (LF_ISSET(TMASYNC)) + return (XAER_ASYNC); + if (flags != TMNOFLAGS) + return (XAER_INVAL); + + if (__db_rmid_to_env(rmid, &env) != 0) + return (XAER_PROTO); + dbenv = env->dbenv; + + /* + * If the environment is corrupted, reopen it or die if that + * is not possible. + */ + PANIC_CHECK_RET(env, ret); + if (ret == DB_RUNRECOVERY) { + corrupted_env(env, rmid); + if (__db_rmid_to_env(rmid, &env) != 0) + return (XAER_PROTO); + dbenv = env->dbenv; + } + + if ((ret = __db_xid_to_txn(env, xid, &td)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4565", + "xa_rollback: failure mapping xid")); + return (XAER_RMFAIL); + } if (td == NULL) { + dbenv->err(dbenv, ret, DB_STR("4566", + "xa_rollback: xid not found")); + return (XAER_NOTA); + } + + if (td->xa_br_status == TXN_XA_DEADLOCKED) + return (XA_RBDEADLOCK); + + if (td->xa_br_status == TXN_XA_ROLLEDBACK) + return (XA_RBOTHER); + + if (td->xa_br_status != TXN_XA_ACTIVE && + td->xa_br_status != TXN_XA_IDLE && + td->xa_br_status != TXN_XA_PREPARED) { + dbenv->err(dbenv, EINVAL, DB_STR_A("4567", + "xa_rollback: transaction in invalid state %d", + "%d"), (int)td->xa_br_status); + return (XAER_PROTO); + } + + /* Now, fill in the global transaction structure. */ + if ((ret = __xa_get_txn(env, xid, td, &txnp, TMJOIN, 0)) != 0) + return (ret); + /* + * Normally abort frees the txnp, but if this is an associated XA + * transaction, then abort will not free it; we do that below. + */ + if ((ret = txnp->abort(txnp)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4568", + "xa_rollback: failure aborting transaction")); + return (XAER_RMERR); + } + + __xa_put_txn(env, txnp); + + return (XA_OK); +} + +/* + * __db_xa_forget -- + * Forget about an XID for a transaction that was heuristically + * completed. Since we do not heuristically complete anything, I + * don't think we have to do anything here, but we should make sure + * that we reclaim the slots in the txnid table. + */ +static int +__db_xa_forget(xid, rmid, arg_flags) + XID *xid; + int rmid; + long arg_flags; +{ + DB_ENV *dbenv; + DB_TXN *txnp; + ENV *env; + TXN_DETAIL *td; + int ret; + u_long flags; + + flags = (u_long)arg_flags; /* Conversion for bit operations. */ + + if (LF_ISSET(TMASYNC)) + return (XAER_ASYNC); + if (flags != TMNOFLAGS) + return (XAER_INVAL); + + if (__db_rmid_to_env(rmid, &env) != 0) + return (XAER_PROTO); + dbenv = env->dbenv; + + /* + * If mapping is gone, then we're done. + */ + if ((ret = __db_xid_to_txn(env, xid, &td)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4569", + "xa_forget: failure mapping xid")); + return (XAER_RMFAIL); + } + if (td == NULL) { + dbenv->err(dbenv, ret, DB_STR("4570", + "xa_forget: xid not found")); + return (XA_OK); + } + + if ((ret = __xa_get_txn(env, xid, td, &txnp, TMJOIN, 0)) != 0) + return (ret); + + if ((ret = txnp->discard(txnp, 0)) != 0) { + dbenv->err(dbenv, ret, DB_STR("4571", + "xa_forget: txnp->discard failed")); + return (XAER_RMFAIL); + } + + __xa_put_txn(env, txnp); + return (XA_OK); +} + +/* + * __db_xa_complete -- + * Used to wait for asynchronous operations to complete. Since we're + * not doing asynch, this is an invalid operation. + */ +static int +__db_xa_complete(handle, retval, rmid, flags) + int *handle, *retval, rmid; + long flags; +{ + COMPQUIET(handle, NULL); + COMPQUIET(retval, NULL); + COMPQUIET(rmid, 0); + COMPQUIET(flags, 0); + + return (XAER_INVAL); +} diff --git a/src/xa/xa_map.c b/src/xa/xa_map.c new file mode 100644 index 00000000..4917bf61 --- /dev/null +++ b/src/xa/xa_map.c @@ -0,0 +1,152 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/txn.h" +#include "dbinc_auto/xa_ext.h" + +/* + * This file contains all the mapping information that we need to support + * the DB/XA interface. + */ + +/* + * __db_rmid_to_env + * Return the environment associated with a given XA rmid. + * + * PUBLIC: int __db_rmid_to_env __P((int, ENV **)); + */ +int +__db_rmid_to_env(rmid, envp) + int rmid; + ENV **envp; +{ + ENV *env; + + *envp = NULL; + if (TAILQ_EMPTY(&DB_GLOBAL(envq))) + TAILQ_INIT(&DB_GLOBAL(envq)); + + /* + * When we map an rmid, move that environment to be the first one in + * the list of environments, so we acquire the correct environment + * in DB->open. + */ + for (env = TAILQ_FIRST(&DB_GLOBAL(envq)); + env != NULL; env = TAILQ_NEXT(env, links)) { + if (env->xa_rmid == rmid) { + *envp = env; + if (env != TAILQ_FIRST(&DB_GLOBAL(envq))) { + TAILQ_REMOVE(&DB_GLOBAL(envq), env, links); + TAILQ_INSERT_HEAD(&DB_GLOBAL(envq), env, links); + } + return (0); + } + } + return (1); +} + +/* + * __db_xid_to_txn + * Return the txn that corresponds to this XID. + * + * PUBLIC: int __db_xid_to_txn __P((ENV *, XID *, TXN_DETAIL **)); + */ +int +__db_xid_to_txn(env, xid, tdp) + ENV *env; + XID *xid; + TXN_DETAIL **tdp; +{ + DB_TXNMGR *mgr; + DB_TXNREGION *region; + u_int8_t *gid; + + mgr = env->tx_handle; + region = mgr->reginfo.primary; + + /* + * Search the internal active transaction table to find the + * matching xid. If this is a performance hit, then we + * can create a hash table, but I doubt it's worth it. + */ + TXN_SYSTEM_LOCK(env); + gid = (u_int8_t *)(xid->data); + SH_TAILQ_FOREACH(*tdp, ®ion->active_txn, links, __txn_detail) + if (memcmp(gid, (*tdp)->gid, sizeof((*tdp)->gid)) == 0) + break; + TXN_SYSTEM_UNLOCK(env); + + /* + * This returns an error, because TXN_SYSTEM_{UN}LOCK may return + * an error. + */ + return (0); +} + +/* + * __db_map_rmid + * Create a mapping between the specified rmid and environment. + * + * PUBLIC: void __db_map_rmid __P((int, ENV *)); + */ +void +__db_map_rmid(rmid, env) + int rmid; + ENV *env; +{ + env->xa_rmid = rmid; + TAILQ_INSERT_HEAD(&DB_GLOBAL(envq), env, links); +} + +/* + * __db_unmap_rmid + * Destroy the mapping for the given rmid. + * + * PUBLIC: int __db_unmap_rmid __P((int)); + */ +int +__db_unmap_rmid(rmid) + int rmid; +{ + ENV *e; + + for (e = TAILQ_FIRST(&DB_GLOBAL(envq)); + e->xa_rmid != rmid; + e = TAILQ_NEXT(e, links)) + ; + + if (e == NULL) + return (EINVAL); + + TAILQ_REMOVE(&DB_GLOBAL(envq), e, links); + return (0); +} + +/* + * __db_unmap_xid + * Destroy the mapping for the specified XID. + * + * PUBLIC: void __db_unmap_xid __P((ENV *, XID *, size_t)); + */ + +void +__db_unmap_xid(env, xid, off) + ENV *env; + XID *xid; + size_t off; +{ + TXN_DETAIL *td; + + COMPQUIET(xid, NULL); + + td = R_ADDR(&env->tx_handle->reginfo, off); + memset(td->gid, 0, sizeof(td->gid)); +} diff --git a/test/c/README b/test/c/README new file mode 100644 index 00000000..9205cfa2 --- /dev/null +++ b/test/c/README @@ -0,0 +1,91 @@ + +The C test cases are currently (loosly) based on the CuTest harness. Loosely +because the harness has been heavily modified from the original version. + +There are still a few old test cases in the source tree. Those will be +converted to run within CuTest at some stage. New tests should all work within +the CuTest structure. + +=================== BUILDING ================ + +To build CuTest on POSIX (*nix) run "make cutest" + +To build CuTest on Windows load the test_cutest.vcproj project into the +Berkeley DB solution file, and build the resulting project. It should depend +on the Berkeley DB library (db). + + +=================== RUNNING ================ + +To run CuTest on POSIX (*nix) run: +./cutest +from the build_unix directory. + +To run CuTest on Windows run: + +./test_cutest.exe +from the build_windows/Win32/Debug directory. + +=================== WRITING ================ + +To write a new test case, open the relevant test suite source file under +test/c/suites/ + +Create a new function, with a name that begins with "Test", that returns an int +and takes a CuTest * argument. For example: + +int TestExampleFunction(CuTest *ct) + + +To write a new test suite (to test a different type of functionality): + * Create a source file in test/c/suites the name beginning with Test. + Of course, as with any new source file, it needs to be + mentioned in dist/Makefile.in and the corresponding Windows files. + * Include "CuTest.h" in that source file + * Probably include "test_util.h" in the new source file, to access + shared utility functions and macros. + * Add any test cases, conforming to the test case guidelines above. + * Optionally specify setup and teardown functions for the suite. + Named SuiteSetup and SuiteTeardown respectively. + * Optionally specify setup and teardown functions for each test case + in the suite. Named TestSetup and TestTeardown + respectively. + * Use the CuAssertXXX macros to validate behavior. + * See TestEnvConfig.c for an example of good practice. + + +When adding a new test case or a new test suite, it's necessary to run the a +script to regenerate the test harness wrapper. You'll need a shell script to +do that. The script is: +test/c/cutest/gen_tester.sh + +It should be run as: +$ cd test/c/cutest +$ ./gen_tester.sh > CuTests.c + + +NOTE: A comment on naming. + +All tests and suites should be clearly named. Long names are fine, and more +descriptive is generally better. + +A test suite should be named to reflect the component and functionality that +is being tested. +An individual test case should be named to identify the specific functionality +being tested. +It's best to avoid including SR numbers in test and suite names. Relevant SRs +should be referenced in comments. + +=================== FUTURES ================a + + * Add a configuration object to the setup and teardown functions, so they can + pass and maintain state. + * Fix the naming so that it's more Berkeley DB compatible (not camel case). + * Implement the RunTest and RunSuite functionality in test/c/cutest/CuTest.c, + so that it's possible to run parts of the test suite. The format will be: + +./cutest -s TestSuiteName -s TestSuiteName -t TestSuiteName:TestCase + * Implement more shared functionality in test/c/common, move the function + implementations from the header file in there, so shared library builds will + work. + diff --git a/test/c/chk.ctests b/test/c/chk.ctests new file mode 100644 index 00000000..1c76e495 --- /dev/null +++ b/test/c/chk.ctests @@ -0,0 +1,70 @@ +#!/bin/sh - +# +# $Id$ +# +# Check to make sure we can run DB 1.85 code. +d=../../ +b=./tmp_build/ +s=$d/src + +mkdir -p $b + +[ -f $d/LICENSE ] || { + echo 'FAIL: Test must be run from scr directory.' + exit 1 +} + +nocleanup=0 +while [ $# -gt 0 ] +do + case "$1" in + -nocleanup) + nocleanup=1; shift;; + *) + echo "Unrecognized option: $1, ignoring" + shift;; + esac +done + +opts="--enable-compat185 --disable-shared" +echo "Building DB library, this can take a while." +(cd $b && ../../../dist/configure $opts > /dev/null && make libdb.a > /dev/null) || { + echo 'FAIL: unable to build libdb.a' + exit 1 +} + +# if compiling on linux blade server, add -pthread on cc +CINC="-I$b -I$s -I$s/dbinc" +[ `uname` = "Linux" ] && CINC="$CINC -pthread" + +for i in `ls test_*.c`; do + + echo "=== Running $i ===" | tee -a compile.out + if cc -g -Wall $CINC $i $b/libdb.a -o t >> compile.out 2>&1; then + : + else + echo "FAIL: unable to compile test program $i" + exit 1 + fi + + if ./t; then + : + else + echo "FAIL: test program failed" + exit 1 + fi + rm -f ./t +done + +# Cleanup. +# TODO: The test should be consistent, so this cleanup isn't so haphazard. +# Alternatively we could build each test in a sub-dir and cleanup after +# individual runs. +rm a.db __db.* output +rm -rf ./TESTDIR + +if [ nocleanup = 0 ]; then + rm -rf compile.out $b +fi + +exit 0 diff --git a/test/c/common/test_util.h b/test/c/common/test_util.h new file mode 100644 index 00000000..b501dc70 --- /dev/null +++ b/test/c/common/test_util.h @@ -0,0 +1,76 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * A header file that defines things that are common to many C test cases. + */ + +#include "db_config.h" + +#include "db_int.h" +#include + +#define TEST_ENV "TESTDIR" +#define TEST_ENVx(x) "TESTDIR#x" + +static int teardown_envdir(const char *dir); + +static int setup_envdir(const char *dir, u_int32_t remove) +{ + int ret; + + if (remove && (ret = teardown_envdir(dir)) != 0) + return (ret); + return (__os_mkdir(NULL, dir, 0755)); +} + +/* + * This might seem complicated, but it works on all platforms. + */ +static int teardown_envdir(const char *dir) +{ + int cnt, i, isdir, ret; + char buf[1024], **names; + + ret = 0; + + /* If the directory doesn't exist, we're done. */ + if (__os_exists(NULL, dir, &isdir) != 0) + return (0); + + /* Get a list of the directory contents. */ + if ((ret = __os_dirlist(NULL, dir, 0, &names, &cnt)) != 0) + return (ret); + + /* Go through the file name list, remove each file in the list */ + for (i = 0; i < cnt; ++i) { + (void)snprintf(buf, sizeof(buf), + "%s%c%s", dir, PATH_SEPARATOR[0], names[i]); + if ((ret = __os_exists(NULL, buf, &isdir)) != 0) + goto file_err; + if (isdir) + teardown_envdir(buf); + if (!isdir && (ret = __os_unlink(NULL, buf, 0)) != 0) { +file_err: fprintf(stderr, "%s: %s\n", + buf, db_strerror(ret)); + break; + } + } + + __os_dirfree(NULL, names, cnt); + + /* + * If we removed the contents of the directory, remove the directory + * itself. + */ + if (i == cnt && (ret = rmdir(dir)) != 0) + fprintf(stderr, + "%s: %s\n", dir, db_strerror(errno)); + return (ret); +} + diff --git a/test/c/cutest/CuTest.c b/test/c/cutest/CuTest.c new file mode 100644 index 00000000..dfaae913 --- /dev/null +++ b/test/c/cutest/CuTest.c @@ -0,0 +1,460 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "CuTest.h" + +/*-------------------------------------------------------------------------* + * Functions added by Berkeley DB, that allow more control over which tests + * are run. They assume a populated TestSuite array called g_suites + * is available globally. + *-------------------------------------------------------------------------*/ + +extern TestSuite g_suites[]; +int RunAllSuites(void) +{ + int failCount, i; + CuString *output; + failCount = 0; + for(i = 0; strlen(g_suites[i].name) != 0; i++) { + printf("Running suite %s\n", g_suites[i].name); + output = CuStringNew(); + failCount += g_suites[i].fn(output); + printf("%s\n", output->buffer); + CuStringDelete(output); + printf("Finished suite %s\n", g_suites[i].name); + } + return (failCount); +} + +int RunSuite(const char * suite) +{ + int failCount, i; + CuString *output; + failCount = 0; + for(i = 0; strlen(g_suites[i].name) != 0; i++) + if (strcmp(g_suites[i].name, suite) == 0) { + output = CuStringNew(); + failCount = g_suites[i].fn(output); + printf("%s\n", output->buffer); + CuStringDelete(output); + break; + } + + return (failCount); +} + +int RunTest(const char * suite, const char *test) +{ + fprintf(stderr, "TODO: Implement RunTest: %s:%s.\n", suite, test); + return (1); +} + +/*-------------------------------------------------------------------------* + * CuStr + *-------------------------------------------------------------------------*/ + +char* CuStrAlloc(int size) +{ + char* newStr = (char*) malloc( sizeof(char) * (size) ); + return newStr; +} + +char* CuStrCopy(const char* old) +{ + int len = (int)strlen(old); + char* newStr = CuStrAlloc(len + 1); + if (newStr != NULL) + strcpy(newStr, old); + else + fprintf(stderr, "%s: malloc in CuStrCopy.\n", CU_FAIL_HEADER); + return newStr; +} + +/*-------------------------------------------------------------------------* + * CuString + *-------------------------------------------------------------------------*/ + +void CuStringInit(CuString* str) +{ + str->length = 0; + str->size = STRING_MAX; + str->buffer = (char*) malloc(sizeof(char) * str->size); + str->buffer[0] = '\0'; +} + +CuString* CuStringNew(void) +{ + CuString* str = (CuString*) malloc(sizeof(CuString)); + CuStringInit(str); + return str; +} + +void CuStringDelete(CuString *str) +{ + if (!str) return; + free(str->buffer); + free(str); +} + +int CuStringResize(CuString* str, int newSize) +{ + char *newStr; + newStr = (char*) realloc(str->buffer, sizeof(char) * newSize); + if (newStr != 0) { + str->size = newSize; + str->buffer = newStr; + return (0); + } + return (ENOMEM); +} + +int CuStringAppend(CuString* str, const char* text, int dump) +{ + int length; + + if (text == NULL) { + text = "NULL"; + } + + length = (int)strlen(text); + if ((str->length + length + 1 >= str->size) && + CuStringResize(str, str->length + length + 1 + STRING_INC) != 0) { + if (dump) { + fprintf(stderr, "%s:%s\n%s\n", CU_FAIL_HEADER, + "String append in test framework failed due to" + "malloc failure. Outputting appended text instead.", + text); + } + return (ENOMEM); + } + str->length += length; + strcat(str->buffer, text); + return (0); +} + +int CuStringAppendChar(CuString* str, char ch, int dump) +{ + char text[2]; + text[0] = ch; + text[1] = '\0'; + return (CuStringAppend(str, text, dump)); +} + +int CuStringAppendFormat(CuString* str, int dump, const char* format, ...) +{ + va_list argp; + char buf[HUGE_STRING_LEN]; + va_start(argp, format); + vsprintf(buf, format, argp); + va_end(argp); + return (CuStringAppend(str, buf, dump)); +} + +int CuStringInsert(CuString* str, const char* text, int pos, int dump) +{ + int length = (int)strlen(text); + if (pos > str->length) + pos = str->length; + if ((str->length + length + 1 >= str->size) && + CuStringResize(str, str->length + length + 1 + STRING_INC) != 0) { + if (dump) { + fprintf(stderr, "%s:%s\n%s\n", CU_FAIL_HEADER, + "String append in test framework failed due to" + "malloc failure. Outputting appended text instead.", + text); + } + return (ENOMEM); + } + memmove(str->buffer + pos + length, str->buffer + pos, + (str->length - pos) + 1); + str->length += length; + memcpy(str->buffer + pos, text, length); + return (0); +} + +/*-------------------------------------------------------------------------* + * CuTest + *-------------------------------------------------------------------------*/ + +void CuTestInit(CuTest* t, const char* name, TestFunction function, TestSetupFunction setup, TestTeardownFunction teardown) +{ + t->name = CuStrCopy(name); + t->failed = 0; + t->ran = 0; + t->message = NULL; + t->function = function; + t->jumpBuf = NULL; + t->TestSetup = setup; + t->TestTeardown = teardown; +} + +CuTest* CuTestNew(const char* name, TestFunction function, TestSetupFunction setup, TestTeardownFunction teardown) +{ + CuTest* tc = CU_ALLOC(CuTest); + if (tc != NULL) + CuTestInit(tc, name, function, setup, teardown); + else + fprintf(stderr, "%s: %s%s\n", CU_FAIL_HEADER, + "Error initializing test case: ", name); + return tc; +} + +void CuTestDelete(CuTest *t) +{ + if (!t) return; + free(t->name); + free(t); +} + +void CuTestRun(CuTest* tc) +{ + jmp_buf buf; + if (tc->TestSetup != NULL) + (tc->TestSetup)(tc); + tc->jumpBuf = &buf; + if (setjmp(buf) == 0) + { + tc->ran = 1; + (tc->function)(tc); + } + if (tc->TestTeardown != NULL) + (tc->TestTeardown)(tc); + tc->jumpBuf = 0; +} + +static void CuFailInternal(CuTest* tc, const char* file, int line, CuString* string) +{ + char buf[HUGE_STRING_LEN]; + + sprintf(buf, "%s:%d: ", file, line); + (void)CuStringInsert(string, buf, 0, 1); + + if (tc == NULL) + { + /* + * Output the message now, it's come from overriding + * __db_assert. + * TODO: It'd be nice to somehow map this onto a CuTest, so the + * assert can be effectively trapped. Since Berkeley DB + * doesn't necessarily return error after an assert, so + * a test case can "pass" even after a __db_assert. + * Could trap this output, and map it back to the test + * case, but I want to be careful about allowing multi- + * threaded use at some stage. + */ + fprintf(stderr, "DB internal assert: %s\n", string->buffer); + } else { + tc->failed = 1; + tc->message = string->buffer; + if (tc->jumpBuf != 0) longjmp(*(tc->jumpBuf), 0); + } +} + +void CuFail_Line(CuTest* tc, const char* file, int line, const char* message2, const char* message) +{ + CuString string; + + CuStringInit(&string); + if (message2 != NULL) + { + CuStringAppend(&string, message2, 1); + CuStringAppend(&string, ": ", 1); + } + CuStringAppend(&string, message, 1); + CuFailInternal(tc, file, line, &string); +} + +void CuAssert_Line(CuTest* tc, const char* file, int line, const char* message, int condition) +{ + if (condition) return; + CuFail_Line(tc, file, line, NULL, message); +} + +void CuAssertStrEquals_LineMsg(CuTest* tc, const char* file, int line, const char* message, + const char* expected, const char* actual) +{ + CuString string; + if ((expected == NULL && actual == NULL) || + (expected != NULL && actual != NULL && + strcmp(expected, actual) == 0)) + { + return; + } + + CuStringInit(&string); + if (message != NULL) + { + CuStringAppend(&string, message, 1); + CuStringAppend(&string, ": ", 1); + } + CuStringAppend(&string, "expected <", 1); + CuStringAppend(&string, expected, 1); + CuStringAppend(&string, "> but was <", 1); + CuStringAppend(&string, actual, 1); + CuStringAppend(&string, ">", 1); + CuFailInternal(tc, file, line, &string); +} + +void CuAssertIntEquals_LineMsg(CuTest* tc, const char* file, int line, const char* message, + int expected, int actual) +{ + char buf[STRING_MAX]; + if (expected == actual) return; + sprintf(buf, "expected <%d> but was <%d>", expected, actual); + CuFail_Line(tc, file, line, message, buf); +} + +void CuAssertDblEquals_LineMsg(CuTest* tc, const char* file, int line, const char* message, + double expected, double actual, double delta) +{ + char buf[STRING_MAX]; + if (fabs(expected - actual) <= delta) return; + sprintf(buf, "expected <%f> but was <%f>", expected, actual); + + CuFail_Line(tc, file, line, message, buf); +} + +void CuAssertPtrEquals_LineMsg(CuTest* tc, const char* file, int line, const char* message, + void* expected, void* actual) +{ + char buf[STRING_MAX]; + if (expected == actual) return; + sprintf(buf, "expected pointer <0x%p> but was <0x%p>", expected, actual); + CuFail_Line(tc, file, line, message, buf); +} + + +/*-------------------------------------------------------------------------* + * CuSuite + *-------------------------------------------------------------------------*/ + +void CuSuiteInit(CuSuite* testSuite, const char *name, + SuiteSetupFunction setup, SuiteTeardownFunction teardown) +{ + testSuite->name = name; + testSuite->count = 0; + testSuite->failCount = 0; + testSuite->SuiteSetup = setup; + testSuite->SuiteTeardown = teardown; + testSuite->context = NULL; + memset(testSuite->list, 0, sizeof(testSuite->list)); +} + +CuSuite* CuSuiteNew(const char *name, + SuiteSetupFunction setup, SuiteTeardownFunction teardown) +{ + CuSuite* testSuite = CU_ALLOC(CuSuite); + if (testSuite != NULL) + CuSuiteInit(testSuite, name, setup, teardown); + else + fprintf(stderr, "%s: %s%s\n", CU_FAIL_HEADER, + "Error initializing test suite: ", name); + return testSuite; +} + +void CuSuiteDelete(CuSuite *testSuite) +{ + unsigned int n; + for (n=0; n < MAX_TEST_CASES; n++) + { + if (testSuite->list[n]) + { + CuTestDelete(testSuite->list[n]); + } + } + free(testSuite); + +} + +void CuSuiteAdd(CuSuite* testSuite, CuTest *testCase) +{ + assert(testSuite->count < MAX_TEST_CASES); + testSuite->list[testSuite->count] = testCase; + testSuite->count++; + testCase->suite = testSuite; +} + +void CuSuiteAddSuite(CuSuite* testSuite, CuSuite* testSuite2) +{ + int i; + for (i = 0 ; i < testSuite2->count ; ++i) + { + CuTest* testCase = testSuite2->list[i]; + CuSuiteAdd(testSuite, testCase); + } +} + +void CuSuiteRun(CuSuite* testSuite) +{ + int i; + for (i = 0 ; i < testSuite->count ; ++i) + { + CuTest* testCase = testSuite->list[i]; + if (testSuite->SuiteSetup != NULL) + (testSuite->SuiteSetup)(testSuite); + CuTestRun(testCase); + if (testSuite->SuiteTeardown != NULL) + (testSuite->SuiteTeardown)(testSuite); + if (testCase->failed) { testSuite->failCount += 1; } + } +} + +void CuSuiteSummary(CuSuite* testSuite, CuString* summary) +{ + int i; + for (i = 0 ; i < testSuite->count ; ++i) + { + CuTest* testCase = testSuite->list[i]; + CuStringAppend(summary, testCase->failed ? "F" : ".", 1); + } + CuStringAppend(summary, "\n\n", 1); +} + +void CuSuiteDetails(CuSuite* testSuite, CuString* details) +{ + int i; + int failCount = 0; + + if (testSuite->failCount == 0) + { + int passCount = testSuite->count - testSuite->failCount; + const char* testWord = passCount == 1 ? "test" : "tests"; + CuStringAppendFormat(details, 1, "OK (%d %s)\n", passCount, testWord); + } + else + { + if (testSuite->failCount == 1) + CuStringAppend(details, "There was 1 failure:\n", 1); + else + CuStringAppendFormat(details, 1, "There were %d failures:\n", testSuite->failCount); + + for (i = 0 ; i < testSuite->count ; ++i) + { + CuTest* testCase = testSuite->list[i]; + if (testCase->failed) + { + failCount++; + CuStringAppendFormat(details, 1, "%d) %s: %s\n", + failCount, testCase->name, testCase->message); + } + } + CuStringAppend(details, "\n!!!FAILURES!!!\n", 1); + + CuStringAppendFormat(details, 1, "Runs: %d ", testSuite->count); + CuStringAppendFormat(details, 1, "Passes: %d ", testSuite->count - testSuite->failCount); + CuStringAppendFormat(details, 1, "Fails: %d\n", testSuite->failCount); + } +} diff --git a/test/c/cutest/CuTest.h b/test/c/cutest/CuTest.h new file mode 100644 index 00000000..7fcab4ad --- /dev/null +++ b/test/c/cutest/CuTest.h @@ -0,0 +1,160 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#ifndef CU_TEST_H +#define CU_TEST_H + +#include +#include + +#define CUTEST_VERSION "CuTest 1.6" + +/* CuString */ + +char* CuStrAlloc(int size); +char* CuStrCopy(const char* old); + +#define CU_ALLOC(TYPE) ((TYPE*) malloc(sizeof(TYPE))) + +#define HUGE_STRING_LEN 8192 +#define STRING_MAX 256 +#define STRING_INC 256 + +#define CU_FAIL_HEADER "INTERNAL TEST HARNESS FAILURE" + +typedef struct +{ + int length; + int size; + char* buffer; +} CuString; + +void CuStringInit(CuString* str); +CuString* CuStringNew(void); +void CuStringRead(CuString* str, const char* path); +int CuStringAppend(CuString* str, const char* text, int dump); +int CuStringAppendChar(CuString* str, char ch, int dump); +int CuStringAppendFormat(CuString* str, int dump, const char* format, ...); +int CuStringInsert(CuString* str, const char* text, int pos, int dump); +int CuStringResize(CuString* str, int newSize); +void CuStringDelete(CuString* str); + +/* CuTest */ + +typedef struct CuSuite CuSuite; +typedef struct CuTest CuTest; + +typedef int (*TestFunction)(CuTest *); +typedef int (*TestSetupFunction)(CuTest *); +typedef int (*TestTeardownFunction)(CuTest *); +typedef int (*SuiteSetupFunction)(CuSuite *); +typedef int (*SuiteTeardownFunction)(CuSuite *); + +struct CuTest +{ + char* name; + TestFunction function; + int (*TestSetup)(CuTest *); + int (*TestTeardown)(CuTest *); + int failed; + int ran; + /* + * TODO: Change message from a const char * to a CuString, so that + * test cases can add content. + */ + const char* message; + CuSuite *suite; + jmp_buf *jumpBuf; + void *context; /* For private use by individual test. */ +}; + +void CuTestInit(CuTest* t, const char* name, TestFunction function, + TestSetupFunction setup, TestTeardownFunction teardown); +CuTest* CuTestNew(const char* name, TestFunction function, + TestSetupFunction setup, TestTeardownFunction teardown); +void CuTestRun(CuTest* tc); +void CuTestDelete(CuTest *t); + +int TestEnvConfigTestSetup(CuTest *); +int TestEnvConfigTestTeardown(CuTest *); + +/* Internal versions of assert functions -- use the public versions */ +void CuFail_Line(CuTest* tc, const char* file, int line, const char* message2, const char* message); +void CuAssert_Line(CuTest* tc, const char* file, int line, const char* message, int condition); +void CuAssertStrEquals_LineMsg(CuTest* tc, + const char* file, int line, const char* message, + const char* expected, const char* actual); +void CuAssertIntEquals_LineMsg(CuTest* tc, + const char* file, int line, const char* message, + int expected, int actual); +void CuAssertDblEquals_LineMsg(CuTest* tc, + const char* file, int line, const char* message, + double expected, double actual, double delta); +void CuAssertPtrEquals_LineMsg(CuTest* tc, + const char* file, int line, const char* message, + void* expected, void* actual); + +/* public assert functions */ + +#define CuFail(tc, ms) CuFail_Line( (tc), __FILE__, __LINE__, NULL, (ms)) +#define CuAssert(tc, ms, cond) CuAssert_Line((tc), __FILE__, __LINE__, (ms), (cond)) +#define CuAssertTrue(tc, cond) CuAssert_Line((tc), __FILE__, __LINE__, "assert failed", (cond)) + +#define CuAssertStrEquals(tc,ex,ac) CuAssertStrEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) +#define CuAssertStrEquals_Msg(tc,ms,ex,ac) CuAssertStrEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac)) +#define CuAssertIntEquals(tc,ex,ac) CuAssertIntEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) +#define CuAssertIntEquals_Msg(tc,ms,ex,ac) CuAssertIntEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac)) +#define CuAssertDblEquals(tc,ex,ac,dl) CuAssertDblEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac),(dl)) +#define CuAssertDblEquals_Msg(tc,ms,ex,ac,dl) CuAssertDblEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac),(dl)) +#define CuAssertPtrEquals(tc,ex,ac) CuAssertPtrEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) +#define CuAssertPtrEquals_Msg(tc,ms,ex,ac) CuAssertPtrEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac)) + +#define CuAssertPtrNotNull(tc,p) CuAssert_Line((tc),__FILE__,__LINE__,"null pointer unexpected",(p != NULL)) +#define CuAssertPtrNotNullMsg(tc,msg,p) CuAssert_Line((tc),__FILE__,__LINE__,(msg),(p != NULL)) + +/* CuSuite */ + +#define MAX_TEST_CASES 1024 + +#define SUITE_ADD_TEST(SUITE,TEST,SETUP_FN, TEARDOWN_FN) \ + CuSuiteAdd(SUITE, CuTestNew(#TEST, TEST, SETUP_FN, TEARDOWN_FN)) + +typedef struct { + char *name; + int (*fn)(CuString*); +} TestSuite; + +struct CuSuite +{ + const char *name; + int count; + CuTest* list[MAX_TEST_CASES]; + int failCount; + + int (*SuiteSetup)(CuSuite *); + int (*SuiteTeardown)(CuSuite *); + void *context; /* For private use by individual suite. */ +}; + + +void CuSuiteInit(CuSuite* testSuite, const char *name, + SuiteSetupFunction setup, SuiteTeardownFunction teardown); +CuSuite* CuSuiteNew(const char *name, + SuiteSetupFunction setup, SuiteTeardownFunction teardown); +void CuSuiteDelete(CuSuite *testSuite); +void CuSuiteAdd(CuSuite* testSuite, CuTest *testCase); +void CuSuiteAddSuite(CuSuite* testSuite, CuSuite* testSuite2); +void CuSuiteRun(CuSuite* testSuite); +void CuSuiteSummary(CuSuite* testSuite, CuString* summary); +void CuSuiteDetails(CuSuite* testSuite, CuString* details); + +/* Berkeley DB extensions. */ +int RunAllSuites(); +int RunSuite(const char *suite); +int RunTest(const char *suite, const char *test); + +#endif /* CU_TEST_H */ diff --git a/test/c/cutest/CuTests.c b/test/c/cutest/CuTests.c new file mode 100644 index 00000000..0c6d59fa --- /dev/null +++ b/test/c/cutest/CuTests.c @@ -0,0 +1,252 @@ + + +/* This is auto-generated code. Edit at your own peril. */ +#include +#include + +#include "CuTest.h" + +extern int TestChannelSuiteSetup(CuSuite *suite); +extern int TestChannelSuiteTeardown(CuSuite *suite); +extern int TestChannelTestSetup(CuTest *test); +extern int TestChannelTestTeardown(CuTest *test); +extern int TestChannelFeature(CuTest *ct); +extern int TestDbTuner(CuTest *ct); +extern int TestNoEncryptedDb(CuTest *ct); +extern int TestEncryptedDbFlag(CuTest *ct); +extern int TestEncryptedDb(CuTest *ct); +extern int TestEncryptedDbFlagAndDb(CuTest *ct); +extern int TestEnvWithNoEncryption(CuTest *ct); +extern int TestEnvWithEncryptedDbFlag(CuTest *ct); +extern int TestEnvWithEncryptedDb(CuTest *ct); +extern int TestEnvWithEncryptedDbFlagAndDb(CuTest *ct); +extern int TestEncyptedEnv(CuTest *ct); +extern int TestEncyptedEnvWithEncyptedDbFlag(CuTest *ct); +extern int TestEncyptedEnvWithEncyptedDb(CuTest *ct); +extern int TestEncyptedEnvWithEncryptedDbFlagAndDb(CuTest *ct); +extern int TestEnvConfigSuiteSetup(CuSuite *ct); +extern int TestEnvConfigSuiteTeardown(CuSuite *ct); +extern int TestEnvConfigTestSetup(CuTest *ct); +extern int TestEnvConfigTestTeardown(CuTest *ct); +extern int TestSetTxMax(CuTest *ct); +extern int TestSetLogMax(CuTest *ct); +extern int TestSetLogBufferSize(CuTest *ct); +extern int TestSetLogRegionSize(CuTest *ct); +extern int TestGetLockConflicts(CuTest *ct); +extern int TestSetLockDetect(CuTest *ct); +extern int TestLockMaxLocks(CuTest *ct); +extern int TestLockMaxLockers(CuTest *ct); +extern int TestSetLockMaxObjects(CuTest *ct); +extern int TestSetLockTimeout(CuTest *ct); +extern int TestSetTransactionTimeout(CuTest *ct); +extern int TestSetCachesize(CuTest *ct); +extern int TestSetThreadCount(CuTest *ct); /* SKIP */ +extern int TestKeyExistErrorReturn(CuTest *ct); +extern int TestPartialSuiteSetup(CuSuite *ct); +extern int TestPartialSuiteTeardown(CuSuite *ct); +extern int TestPartialTestSetup(CuTest *ct); +extern int TestPartialTestTeardown(CuTest *ct); +extern int TestDbPartialGet(CuTest *ct); +extern int TestDbPartialPGet(CuTest *ct); +extern int TestCursorPartialGet(CuTest *ct); +extern int TestCursorPartialPGet(CuTest *ct); +extern int TestQueue(CuTest *ct); + +int RunChannelTests(CuString *output) +{ + CuSuite *suite = CuSuiteNew("TestChannel", + TestChannelSuiteSetup, TestChannelSuiteTeardown); + int count; + + SUITE_ADD_TEST(suite, TestChannelFeature, + TestChannelTestSetup, TestChannelTestTeardown); + + CuSuiteRun(suite); + CuSuiteSummary(suite, output); + CuSuiteDetails(suite, output); + count = suite->failCount; + CuSuiteDelete(suite); + return (count); +} + +int RunDbTunerTests(CuString *output) +{ + CuSuite *suite = CuSuiteNew("TestDbTuner", + NULL, NULL); + int count; + + SUITE_ADD_TEST(suite, TestDbTuner, + NULL, NULL); + + CuSuiteRun(suite); + CuSuiteSummary(suite, output); + CuSuiteDetails(suite, output); + count = suite->failCount; + CuSuiteDelete(suite); + return (count); +} + +int RunEncryptionTests(CuString *output) +{ + CuSuite *suite = CuSuiteNew("TestEncryption", + NULL, NULL); + int count; + + SUITE_ADD_TEST(suite, TestNoEncryptedDb, + NULL, NULL); + SUITE_ADD_TEST(suite, TestEncryptedDbFlag, + NULL, NULL); + SUITE_ADD_TEST(suite, TestEncryptedDb, + NULL, NULL); + SUITE_ADD_TEST(suite, TestEncryptedDbFlagAndDb, + NULL, NULL); + SUITE_ADD_TEST(suite, TestEnvWithNoEncryption, + NULL, NULL); + SUITE_ADD_TEST(suite, TestEnvWithEncryptedDbFlag, + NULL, NULL); + SUITE_ADD_TEST(suite, TestEnvWithEncryptedDb, + NULL, NULL); + SUITE_ADD_TEST(suite, TestEnvWithEncryptedDbFlagAndDb, + NULL, NULL); + SUITE_ADD_TEST(suite, TestEncyptedEnv, + NULL, NULL); + SUITE_ADD_TEST(suite, TestEncyptedEnvWithEncyptedDbFlag, + NULL, NULL); + SUITE_ADD_TEST(suite, TestEncyptedEnvWithEncyptedDb, + NULL, NULL); + SUITE_ADD_TEST(suite, TestEncyptedEnvWithEncryptedDbFlagAndDb, + NULL, NULL); + + CuSuiteRun(suite); + CuSuiteSummary(suite, output); + CuSuiteDetails(suite, output); + count = suite->failCount; + CuSuiteDelete(suite); + return (count); +} + +int RunEnvConfigTests(CuString *output) +{ + CuSuite *suite = CuSuiteNew("TestEnvConfig", + TestEnvConfigSuiteSetup, TestEnvConfigSuiteTeardown); + int count; + + SUITE_ADD_TEST(suite, TestSetTxMax, + TestEnvConfigTestSetup, TestEnvConfigTestTeardown); + SUITE_ADD_TEST(suite, TestSetLogMax, + TestEnvConfigTestSetup, TestEnvConfigTestTeardown); + SUITE_ADD_TEST(suite, TestSetLogBufferSize, + TestEnvConfigTestSetup, TestEnvConfigTestTeardown); + SUITE_ADD_TEST(suite, TestSetLogRegionSize, + TestEnvConfigTestSetup, TestEnvConfigTestTeardown); + SUITE_ADD_TEST(suite, TestGetLockConflicts, + TestEnvConfigTestSetup, TestEnvConfigTestTeardown); + SUITE_ADD_TEST(suite, TestSetLockDetect, + TestEnvConfigTestSetup, TestEnvConfigTestTeardown); + SUITE_ADD_TEST(suite, TestLockMaxLocks, + TestEnvConfigTestSetup, TestEnvConfigTestTeardown); + SUITE_ADD_TEST(suite, TestLockMaxLockers, + TestEnvConfigTestSetup, TestEnvConfigTestTeardown); + SUITE_ADD_TEST(suite, TestSetLockMaxObjects, + TestEnvConfigTestSetup, TestEnvConfigTestTeardown); + SUITE_ADD_TEST(suite, TestSetLockTimeout, + TestEnvConfigTestSetup, TestEnvConfigTestTeardown); + SUITE_ADD_TEST(suite, TestSetTransactionTimeout, + TestEnvConfigTestSetup, TestEnvConfigTestTeardown); + SUITE_ADD_TEST(suite, TestSetCachesize, + TestEnvConfigTestSetup, TestEnvConfigTestTeardown); + + CuSuiteRun(suite); + CuSuiteSummary(suite, output); + CuSuiteDetails(suite, output); + count = suite->failCount; + CuSuiteDelete(suite); + return (count); +} + +int RunEnvMethodTests(CuString *output) +{ + CuSuite *suite = CuSuiteNew("TestEnvMethod", + NULL, NULL); + int count; + + SUITE_ADD_TEST(suite, TestSetThreadCount, + NULL, NULL); /* SKIP */ + + CuSuiteRun(suite); + CuSuiteSummary(suite, output); + CuSuiteDetails(suite, output); + count = suite->failCount; + CuSuiteDelete(suite); + return (count); +} + +int RunKeyExistErrorReturnTests(CuString *output) +{ + CuSuite *suite = CuSuiteNew("TestKeyExistErrorReturn", + NULL, NULL); + int count; + + SUITE_ADD_TEST(suite, TestKeyExistErrorReturn, + NULL, NULL); + + CuSuiteRun(suite); + CuSuiteSummary(suite, output); + CuSuiteDetails(suite, output); + count = suite->failCount; + CuSuiteDelete(suite); + return (count); +} + +int RunPartialTests(CuString *output) +{ + CuSuite *suite = CuSuiteNew("TestPartial", + TestPartialSuiteSetup, TestPartialSuiteTeardown); + int count; + + SUITE_ADD_TEST(suite, TestDbPartialGet, + TestPartialTestSetup, TestPartialTestTeardown); + SUITE_ADD_TEST(suite, TestDbPartialPGet, + TestPartialTestSetup, TestPartialTestTeardown); + SUITE_ADD_TEST(suite, TestCursorPartialGet, + TestPartialTestSetup, TestPartialTestTeardown); + SUITE_ADD_TEST(suite, TestCursorPartialPGet, + TestPartialTestSetup, TestPartialTestTeardown); + + CuSuiteRun(suite); + CuSuiteSummary(suite, output); + CuSuiteDetails(suite, output); + count = suite->failCount; + CuSuiteDelete(suite); + return (count); +} + +int RunQueueTests(CuString *output) +{ + CuSuite *suite = CuSuiteNew("TestQueue", + NULL, NULL); + int count; + + SUITE_ADD_TEST(suite, TestQueue, + NULL, NULL); + + CuSuiteRun(suite); + CuSuiteSummary(suite, output); + CuSuiteDetails(suite, output); + count = suite->failCount; + CuSuiteDelete(suite); + return (count); +} + +TestSuite g_suites[] = { + { "TestChannel", RunChannelTests }, + { "TestDbTuner", RunDbTunerTests }, + { "TestEncryption", RunEncryptionTests }, + { "TestEnvConfig", RunEnvConfigTests }, + { "TestEnvMethod", RunEnvMethodTests }, + { "TestKeyExistErrorReturn", RunKeyExistErrorReturnTests }, + { "TestPartial", RunPartialTests }, + { "TestQueue", RunQueueTests }, + { "", NULL }, +}; + diff --git a/test/c/cutest/Runner.c b/test/c/cutest/Runner.c new file mode 100644 index 00000000..16fa358c --- /dev/null +++ b/test/c/cutest/Runner.c @@ -0,0 +1,127 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * A simple main function that gives a command line to the CuTest suite. + * It lets users run the entire suites (default), or choose individual + * suite(s), or individual tests. + */ +#include +#include +#include + +#include "CuTest.h" +#include "db.h" + +#ifdef _WIN32 +extern int getopt(int, char * const *, const char *); +#endif + +int append_case(char **cases, int *num_cases, const char *this_case); +int usage(); +void CuTestAssertForDb(const char *msg, const char *file, int line); + +const char *progname; + +int main(int argc, char **argv) +{ +#define MAX_CASES 1000 + extern char *optarg; + extern int optind; + char *suites[MAX_CASES], *tests[MAX_CASES]; + int ch, failed, i, num_suites, num_tests, verbose; + char *test; + + progname = argv[0]; + + num_suites = num_tests = verbose = 0; + while ((ch = getopt(argc, argv, "s:t:v")) != EOF) + switch (ch) { + case 's': + append_case(suites, &num_suites, optarg); + break; + case 't': + append_case(tests, &num_tests, optarg); + break; + case 'v': + verbose = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc != 0) + return (usage()); + + /* Setup the assert to override the default DB one. */ + db_env_set_func_assert(CuTestAssertForDb); + failed = 0; + if (num_tests == 0 && num_suites == 0) + failed = RunAllSuites(); + else { + for(i = 0; i < num_suites; i++) + failed += RunSuite(suites[i]); + for(i = 0; i < num_tests; i++) { + test = strchr(tests[i], ':'); + if (test == NULL) { + fprintf(stderr, "Invalid test case: %s\n", tests[i]); + continue; + } + /* + * Replace the ':' with NULL, to split the current + * value into two strings. + */ + *test = '\0'; + ++test; + failed += RunTest(tests[i], test); + } + } + while(num_suites != 0) + free(suites[num_suites--]); + while(num_tests != 0) + free(tests[num_tests--]); + if (failed > 0) + return (1); + else + return (0); +} + +int append_case(char **cases, int *pnum_cases, const char *this_case) +{ + int num_cases; + + num_cases = *pnum_cases; + + if (num_cases >= MAX_CASES) + return (1); + + cases[num_cases] = strdup(this_case); + + if (cases[num_cases] == NULL) + return (1); + + ++(*pnum_cases); + return (0); +} + +void CuTestAssertForDb(const char *msg, const char *file, int line) +{ + CuFail_Line(NULL, file, line, NULL, msg); +} + +int +usage() +{ + (void)fprintf(stderr, "usage: %s %s\n", progname, + "[-s suite] [-t test] -v. Multiple test and suite args allowed."); + return (EXIT_FAILURE); +} + diff --git a/test/c/cutest/gen_tester.sh b/test/c/cutest/gen_tester.sh new file mode 100644 index 00000000..05f94bbc --- /dev/null +++ b/test/c/cutest/gen_tester.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash + +# Auto generate single AllTests file for CuTest. +# Searches through all *.c files in the current directory. +# Prints to stdout. +# Author: Asim Jalis +# Date: 01/08/2003 + +# Edited 2010 Alex G to match DB tree layout. + +if test $# -eq 0 ; then FILES=../suites/*.c ; else FILES=$* ; fi + + +echo ' + +/* This is auto-generated code. Edit at your own peril. */ +#include +#include + +#include "CuTest.h" +' + +NEXT_STRING=`grep -h "int Test" ${FILES} | sed -e 's/ {/;/' -e 's/^/extern /'` +echo "$NEXT_STRING +" + +# Want to turn TestEnvConfig.c into: +# Function name RunEnvConfigTests +# Suite name TestEnvConfig +for f in `ls ${FILES}`; do + SUITE_NAME=`basename $f .c` + RUNNER_NAME=`echo $SUITE_NAME | sed -e 's/Test\(.*\)/Run\1Tests/'` + TEST_STR=`grep "${SUITE_NAME}SuiteSetup" $f` + if [ "$TEST_STR"x != "x" ]; then + SETUP_FN=${SUITE_NAME}SuiteSetup + else + SETUP_FN="NULL" + fi + TEST_STR=`grep "${SUITE_NAME}SuiteTeardown" $f` + if [ "$TEST_STR"x != "x" ]; then + TEARDOWN_FN=${SUITE_NAME}SuiteTeardown + else + TEARDOWN_FN="NULL" + fi + echo \ +"int $RUNNER_NAME(CuString *output) +{ + CuSuite *suite = CuSuiteNew(\"$SUITE_NAME\", + $SETUP_FN, $TEARDOWN_FN); + int count; +" + + TEST_STR=`grep "${SUITE_NAME}TestSetup" $f` + if [ "$TEST_STR"x != "x" ]; then + SETUP_FN=${SUITE_NAME}TestSetup + else + SETUP_FN="NULL" + fi + TEST_STR=`grep "${SUITE_NAME}TestTeardown" $f` + if [ "$TEST_STR"x != "x" ]; then + TEARDOWN_FN=${SUITE_NAME}TestTeardown + else + TEARDOWN_FN="NULL" + fi + NEXT_STRING=`grep -h "int Test" $f | sed -e '/[Suite|Test]Setup/d' -e '/[Suite|Test]Teardown/d' -e 's/^int /\tSUITE_ADD_TEST(suite, /' -e "s/(CuTest \*ct) {/,\n\t $SETUP_FN, $TEARDOWN_FN);/"` + echo \ +"$NEXT_STRING +" + + echo \ +' CuSuiteRun(suite); + CuSuiteSummary(suite, output); + CuSuiteDetails(suite, output); + count = suite->failCount; + CuSuiteDelete(suite); + return (count); +} +' +done + +echo \ +'TestSuite g_suites[] = {' + +for f in `ls ${FILES}`; do + SUITE_NAME=`basename $f .c` + RUNNER_NAME=`echo $SUITE_NAME | sed -e 's/Test\(.*\)/Run\1Tests/'` + echo \ +" { \"$SUITE_NAME\", $RUNNER_NAME }," +done +echo \ +' { "", NULL }, +}; +' + diff --git a/test/c/cutest/license.txt b/test/c/cutest/license.txt new file mode 100644 index 00000000..fd81689d --- /dev/null +++ b/test/c/cutest/license.txt @@ -0,0 +1,38 @@ +NOTE + +The license is based on the zlib/libpng license. For more details see +http://www.opensource.org/licenses/zlib-license.html. The intent of the +license is to: + +- keep the license as simple as possible +- encourage the use of CuTest in both free and commercial applications + and libraries +- keep the source code together +- give credit to the CuTest contributors for their work + +If you ship CuTest in source form with your source distribution, the +following license document must be included with it in unaltered form. +If you find CuTest useful we would like to hear about it. + +LICENSE + +Copyright (c) 2003 Asim Jalis + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not +claim that you wrote the original software. If you use this software in +a product, an acknowledgment in the product documentation would be +appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not +be misrepresented as being the original software. + +3. This notice may not be removed or altered from any source +distribution. diff --git a/test/c/suites/TestChannel.c b/test/c/suites/TestChannel.c new file mode 100644 index 00000000..59a37747 --- /dev/null +++ b/test/c/suites/TestChannel.c @@ -0,0 +1,1265 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef _WIN32 +#include +#include +#include +#include +#endif + +#include "../cutest/CuTest.h" +#include "../common/test_util.h" + +#define MAX_SEGS 10 +#define MAX_MSGS 10 + +#undef LOCK_MUTEX +#undef UNLOCK_MUTEX +#ifdef _WIN32 +#define sleep(s) Sleep(1000 * (s)) +typedef HANDLE mutex_t; +#define mutex_init(m) \ + (((*(m) = CreateMutex(NULL, FALSE, NULL)) != NULL) ? 0 : -1) +#define mutex_lock(m) \ + ((WaitForSingleObject(*(m), INFINITE) == WAIT_OBJECT_0) ? \ + 0 : GetLastError()) +#define mutex_unlock(m) (ReleaseMutex(*(m)) ? 0 : GetLastError()) +#define mutex_destroy(m) (CloseHandle(*(m)) ? 0 : GetLastError()) +typedef HANDLE cond_t; +#define cond_init(c) ((*(c) = CreateEvent(NULL, \ + TRUE, FALSE, NULL)) == NULL ? GetLastError() : 0) +#define cond_wait(c, m) (SignalObjectAndWait(*(m), *(c), INFINITE, FALSE) == WAIT_OBJECT_0 ? \ + 0 : GetLastError()) +#define cond_wake(c) (SetEvent(*(c)) ? 0 : GetLastError()) +#else +typedef pthread_mutex_t mutex_t; +#define mutex_init(m) pthread_mutex_init((m), NULL) +#define mutex_lock(m) pthread_mutex_lock(m) +#define mutex_unlock(m) pthread_mutex_unlock(m) +#define mutex_destroy(m) pthread_mutex_destroy(m) +typedef pthread_cond_t cond_t; +#define cond_init(c) pthread_cond_init((c), NULL) +#define cond_wait(c, m) pthread_cond_wait((c), (m)) +#define cond_wake(c) pthread_cond_broadcast(c) +#endif + +struct channel_test_globals { + CuTest *test; + mutex_t mtx; + cond_t cond; +}; + +struct report { + int dbt_count; + DBT dbt[MAX_SEGS]; + + int msg_count; + char *msg[MAX_MSGS]; + + int done, ret; +}; + +struct reports { + mutex_t m; + int count; + struct report rpt[2]; +}; + +struct env_info { + struct report *rpt; + struct reports *rpts; + struct channel_test_globals *g; + int startupdone; +}; + +struct msginfo { + DB_ENV *dbenv; + int count; +}; + +typedef int (*PRED) __P((void *)); + +static int await_condition __P((PRED, void *, long)); +static int check_dbt_string __P((DBT *, const char *)); +static void clear_rpt __P((DB_ENV *)); +static void clear_rpt_int __P((struct report *)); +static int env_done __P((void *)); +static struct report *get_rpt __P((const DB_ENV *)); +static int fortify __P((DB_ENV *, struct channel_test_globals *)); +static int has_msgs __P((void *)); +static void msg_disp __P((DB_ENV *, DB_CHANNEL *, DBT *, u_int32_t, u_int32_t)); +static void msg_disp2 __P((DB_ENV *, DB_CHANNEL *, DBT *, u_int32_t, u_int32_t)); +static void msg_disp3 __P((DB_ENV *, DB_CHANNEL *, DBT *, u_int32_t, u_int32_t)); +static void msg_disp4 __P((DB_ENV *, DB_CHANNEL *, DBT *, u_int32_t, u_int32_t)); +static void msg_disp5 __P((DB_ENV *, DB_CHANNEL *, DBT *, u_int32_t, u_int32_t)); +static int mystrcmp __P((char *, const char *)); +static void notify __P((DB_ENV *, u_int32_t, void *)); +static int is_started __P((void *)); +static void td __P((DB_ENV *)); +static void test_data_init __P((DBT *, char *)); +static void test_zeroes __P((DB_CHANNEL *, DB_ENV *, CuTest *)); +static int two_done __P((void *)); + +#define LOCK_MUTEX(m) do { \ + int __ret; \ + __ret = mutex_lock(m); \ + assert(__ret == 0); \ +} while (0) + +#define UNLOCK_MUTEX(m) do { \ + int __ret; \ + __ret = mutex_unlock(m); \ + assert(__ret == 0); \ +} while (0) + +int TestChannelSuiteSetup(CuSuite *suite) { + return (0); +} + +int TestChannelSuiteTeardown(CuSuite *suite) { + return (0); +} + +int TestChannelTestSetup(CuTest *test) { + struct channel_test_globals *g; + int ret; + + if ((g = calloc(1, sizeof(*g))) == NULL) + return (ENOMEM); + if ((ret = mutex_init(&g->mtx)) != 0) { + free(g); + return (ret); + } + if ((ret = cond_init(&g->cond)) != 0) { + mutex_destroy(&g->mtx); + free(g); + return (ret); + } + g->test = test; + test->context = g; + return (0); +} + +int TestChannelTestTeardown(CuTest *test) { + struct channel_test_globals *g; + int ret; + + g = test->context; + assert(g != NULL); + ret = mutex_destroy(&g->mtx); + free(g); + test->context = NULL; + return (ret); +} + +static void +myerrcall(const DB_ENV *dbenv, const char *errpfx, const char *msg) { + struct report *rpt = get_rpt(dbenv); + + assert(rpt->msg_count < MAX_MSGS); + assert((rpt->msg[rpt->msg_count++] = strdup(msg)) != NULL); +} + +static int +fortify(dbenv, g) + DB_ENV *dbenv; + struct channel_test_globals *g; +{ + struct report *rpt; + struct env_info *info; + + if ((info = calloc(1, sizeof(*info))) == NULL) + return (ENOMEM); + if ((rpt = calloc(1, sizeof(*rpt))) == NULL) { + free(info); + return (ENOMEM); + } + info->rpt = rpt; + info->rpts = NULL; + info->g = g; + info->startupdone = 0; + dbenv->app_private = info; + return (0); +} + +static int +setup(envp1, envp2, envp3, g) + DB_ENV **envp1, **envp2, **envp3; + struct channel_test_globals *g; +{ + DB_ENV *dbenv1, *dbenv2, *dbenv3; + DB_SITE *dbsite; + u_int32_t flags; + int ret; + +#define CHECK(call) \ + do { \ + if ((ret = (call)) != 0) { \ + fprintf(stderr, "error %d from %s", ret, #call); \ + goto err; \ + } \ + } while (0); + + dbenv1 = dbenv2 = dbenv3 = NULL; + CHECK(db_env_create(&dbenv1, 0)); + CHECK(fortify(dbenv1, g)); + dbenv1->set_errpfx(dbenv1, "ENV1"); + dbenv1->set_errcall(dbenv1, myerrcall); + flags = DB_INIT_REP | DB_INIT_LOG | DB_INIT_LOCK | DB_INIT_MPOOL | + DB_INIT_TXN | DB_RECOVER | DB_THREAD | DB_CREATE; + setup_envdir("DIR1", 1); + CHECK(dbenv1->open(dbenv1, "DIR1", flags, 0)); + + CHECK(dbenv1->rep_set_config(dbenv1, DB_REPMGR_CONF_ELECTIONS, 0)); + CHECK(dbenv1->repmgr_site(dbenv1, "localhost", 6000, &dbsite, 0)); + CHECK(dbsite->set_config(dbsite, DB_LOCAL_SITE, 1)); + CHECK(dbsite->close(dbsite)); + CHECK(dbenv1->set_event_notify(dbenv1, notify)); + CHECK(dbenv1->repmgr_msg_dispatch(dbenv1, msg_disp, 0)); + CHECK(dbenv1->rep_set_timeout(dbenv1, + DB_REP_CONNECTION_RETRY, 3000000)); + CHECK(dbenv1->repmgr_start(dbenv1, 2, DB_REP_MASTER)); + + CHECK(db_env_create(&dbenv2, 0)); + CHECK(fortify(dbenv2, g)); + dbenv2->set_errpfx(dbenv2, "ENV2"); + dbenv2->set_errcall(dbenv2, myerrcall); + setup_envdir("DIR2", 1); + CHECK(dbenv2->open(dbenv2, "DIR2", flags, 0)); + CHECK(dbenv2->rep_set_config(dbenv2, DB_REPMGR_CONF_ELECTIONS, 0)); + + CHECK(dbenv2->repmgr_site(dbenv2, "localhost", 6001, &dbsite, 0)); + CHECK(dbsite->set_config(dbsite, DB_LOCAL_SITE, 1)); + CHECK(dbsite->close(dbsite)); + CHECK(dbenv2->repmgr_site(dbenv2, "localhost", 6000, &dbsite, 0)); + CHECK(dbsite->set_config(dbsite, DB_BOOTSTRAP_HELPER, 1)); + CHECK(dbsite->close(dbsite)); + CHECK(dbenv2->set_event_notify(dbenv2, notify)); + CHECK(dbenv2->rep_set_timeout(dbenv2, + DB_REP_CONNECTION_RETRY, 3000000)); + CHECK(dbenv2->repmgr_start(dbenv2, 2, DB_REP_CLIENT)); + + await_condition(is_started, dbenv2, 60); + if (!is_started(dbenv2)) { + dbenv2->errx(dbenv2, "startup done not achieved in 60 seconds"); + ret = DB_TIMEOUT; + goto err; + } + + CHECK(db_env_create(&dbenv3, 0)); + CHECK(fortify(dbenv3, g)); + dbenv3->set_errpfx(dbenv3, "ENV3"); + dbenv3->set_errcall(dbenv3, myerrcall); + CHECK(dbenv3->repmgr_msg_dispatch(dbenv3, msg_disp2, 0)); + setup_envdir("DIR3", 1); + CHECK(dbenv3->open(dbenv3, "DIR3", flags, 0)); + CHECK(dbenv3->rep_set_config(dbenv3, DB_REPMGR_CONF_ELECTIONS, 0)); + + CHECK(dbenv3->repmgr_site(dbenv3, "localhost", 6002, &dbsite, 0)); + CHECK(dbsite->set_config(dbsite, DB_LOCAL_SITE, 1)); + CHECK(dbsite->close(dbsite)); + CHECK(dbenv3->repmgr_site(dbenv3, "localhost", 6000, &dbsite, 0)); + CHECK(dbsite->set_config(dbsite, DB_BOOTSTRAP_HELPER, 1)); + CHECK(dbsite->close(dbsite)); + CHECK(dbenv3->set_event_notify(dbenv3, notify)); + CHECK(dbenv3->rep_set_timeout(dbenv3, + DB_REP_CONNECTION_RETRY, 3000000)); + CHECK(dbenv3->repmgr_start(dbenv3, 2, DB_REP_CLIENT)); + + await_condition(is_started, dbenv3, 60); + if (!is_started(dbenv3)) { + dbenv3->errx(dbenv3, "startup done not achieved in 60 seconds"); + ret = DB_TIMEOUT; + goto err; + } + + *envp1 = dbenv1; + *envp2 = dbenv2; + *envp3 = dbenv3; + return (0); + +err: + if (dbenv3 != NULL) + td(dbenv3); + if (dbenv2 != NULL) + td(dbenv2); + if (dbenv1 != NULL) + td(dbenv1); + return (ret); +} + +static void +td(dbenv) + DB_ENV *dbenv; +{ + struct env_info *info; + + dbenv->set_errcall(dbenv, NULL); + dbenv->set_event_notify(dbenv, NULL); + + info = dbenv->app_private; + dbenv->close(dbenv, 0); + if (info != NULL) { + clear_rpt_int(info->rpt); + free(info->rpt); + free(info); + } +} + +static void +clear_rpt_int(rpt) + struct report *rpt; +{ + int i; + + for (i = 0; i < rpt->dbt_count; i++) + free(rpt->dbt[i].data); + rpt->dbt_count = 0; + + for (i = 0; i < rpt->msg_count; i++) + free(rpt->msg[i]); + rpt->msg_count = 0; + + rpt->done = 0; +} + +static void +clear_rpt(dbenv) + DB_ENV *dbenv; +{ + struct env_info *info; + struct report *rpt; + + info = dbenv->app_private; + rpt = info->rpt; + clear_rpt_int(rpt); +} + +static int +env_done(ctx) + void *ctx; +{ + DB_ENV *dbenv = ctx; + struct report *rpt = get_rpt(dbenv); + + return (rpt->done); +} + +static void +await_done(dbenv) + DB_ENV *dbenv; +{ + await_condition(env_done, dbenv, 60); + assert(env_done(dbenv)); +} + +static int +has_msgs(ctx) + void *ctx; +{ + struct msginfo *inf = ctx; + DB_ENV *dbenv = inf->dbenv; + struct report *rpt = get_rpt(dbenv); + + return (rpt->msg_count == inf->count); +} + +static struct report * +get_rpt(dbenv) + const DB_ENV *dbenv; +{ + struct env_info *info; + + if ((info = dbenv->app_private) == NULL) + return (NULL); + return (info->rpt); +} + +int TestChannelFeature(CuTest *ct) { + DB_ENV *dbenv1, *dbenv2, *dbenv3; + DB_CHANNEL *ch; + DB_REP_STAT *stats; + DB_SITE *dbsite; + DBT dbt, rdbts[10], resp; + struct channel_test_globals *g; + struct report *rpt; + struct reports rpts; + struct msginfo info; + char *p; + void *pointer, *vp, *buffer; + u_int8_t short_buf[4]; + size_t sz; + int done, eid, ret; + +#ifdef _WIN32 + setvbuf(stdout, NULL, _IONBF, 0); +#endif + printf("this is a test for repmgr channels feature\n"); + + g = ct->context; + CuAssertTrue(ct, (ret = setup(&dbenv1, &dbenv2, &dbenv3, g) == 0)); + + /* + * For this first section, we're sending to ENV2. TODO: make that more + * clear, without duplicating the hard-coding of the port. + */ + CuAssertTrue(ct, + (ret = dbenv1->repmgr_site(dbenv1, + "localhost", 6001, &dbsite, 0)) == 0); + CuAssertTrue(ct, (ret = dbsite->get_eid(dbsite, &eid)) == 0); + CuAssertTrue(ct, (ret = dbsite->close(dbsite)) == 0); + CuAssertTrue(ct, (ret = dbenv1->repmgr_channel(dbenv1, eid, &ch, 0)) == 0); + + memset(&dbt, 0, sizeof(dbt)); + p = "foobar"; + dbt.data = p; + dbt.size = (u_int32_t)strlen(p) + 1; + memset(&resp, 0, sizeof(resp)); + resp.flags = DB_DBT_MALLOC; + printf("1. send async msg with no msg dispatch in place\n"); + clear_rpt(dbenv2); + CuAssertTrue(ct, (ret = ch->send_msg(ch, &dbt, 1, 0)) == 0); + + /* Wait til dbenv2 has reported 1 msg. */ + info.dbenv = dbenv2; + info.count = 1; + await_condition(has_msgs, &info, 60); + rpt = get_rpt(dbenv2); + CuAssertTrue(ct, rpt->msg_count == 1); + CuAssertTrue(ct, mystrcmp(rpt->msg[0], + "No message dispatch call-back function has been configured") == 0); + + printf("2. send request with no msg dispatch in place\n"); + clear_rpt(dbenv2); + ret = ch->send_request(ch, &dbt, 1, &resp, 0, 0); + CuAssertTrue(ct, ret == DB_NOSERVER); + if (resp.data != NULL) + free(resp.data); + await_condition(has_msgs, &info, 60); + CuAssertTrue(ct, rpt->msg_count == 1); + CuAssertTrue(ct, mystrcmp(rpt->msg[0], + "No message dispatch call-back function has been configured") == 0); + + CuAssertTrue(ct, (ret = dbenv2->repmgr_msg_dispatch(dbenv2, msg_disp, 0)) == 0); + + printf("3. send request where recip forgot resp\n"); + clear_rpt(dbenv2); + ret = ch->send_request(ch, &dbt, 1, &resp, 0, 0); + CuAssertTrue(ct, ret == DB_KEYEMPTY); + if (resp.data != NULL) + free(resp.data); + await_done(dbenv2); + CuAssertTrue(ct, rpt->msg_count == 1); + CuAssertTrue(ct, mystrcmp(rpt->msg[0], + "Application failed to provide a response") == 0); + + printf("4. now with dispatch fn installed, send a simple async msg\n"); + clear_rpt(dbenv2); + test_data_init(&dbt, "Mr. Watson -- come here -- I want to see you."); + CuAssertTrue(ct, (ret = ch->send_msg(ch, &dbt, 1, 0)) == 0); + await_done(dbenv2); + CuAssertTrue(ct, rpt->dbt_count == 1); + check_dbt_string(&rpt->dbt[0], + "Mr. Watson -- come here -- I want to see you."); + CuAssertTrue(ct, rpt->msg_count == 0); + + printf("5. send a multi-seg request\n"); + clear_rpt(dbenv2); + memset(&resp, 0, sizeof(resp)); + resp.flags = DB_DBT_MALLOC; + test_data_init(&rdbts[0], "I wish I were a fish"); + test_data_init(&rdbts[1], "I wish I were a bass"); + test_data_init(&rdbts[2], + "I'd climb up on a slippery rock and slide down on my ... hands and knees"); + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 3, &resp, 0, 0)) == 0); + check_dbt_string(&resp, "this is the answer to the request"); + if (resp.data) + free(resp.data); + await_done(dbenv2); + CuAssertTrue(ct, rpt->dbt_count == 3); + check_dbt_string(&rpt->dbt[0], "I wish I were a fish"); + check_dbt_string(&rpt->dbt[1], "I wish I were a bass"); + check_dbt_string(&rpt->dbt[2], + "I'd climb up on a slippery rock and slide down on my ... hands and knees"); + CuAssertTrue(ct, rpt->msg_count == 0); + + test_zeroes(ch, dbenv2, ct); + + printf("7. send request with too-small USERMEM buffer\n"); + clear_rpt(dbenv2); + resp.data = short_buf; + resp.ulen = sizeof(short_buf); + resp.flags = DB_DBT_USERMEM; + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 3, &resp, 0, 0)) == DB_BUFFER_SMALL); + await_done(dbenv2); + CuAssertTrue(ct, rpt->msg_count == 1); + CuAssertTrue(ct, mystrcmp(rpt->msg[0], + "originator's USERMEM buffer too small") == 0); + CuAssertTrue(ct, rpt->ret == EINVAL); + +#define BUFLEN 20000 + buffer = malloc(BUFLEN); + if (buffer == NULL) + return (2); + resp.data = buffer; + resp.ulen = BUFLEN; + resp.flags = DB_DBT_USERMEM; + + printf("8. send USERMEM request without necessary DB_MULTIPLE\n"); + clear_rpt(dbenv2); + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 2, &resp, 0, 0)) == DB_BUFFER_SMALL); + await_done(dbenv2); + CuAssertTrue(ct, rpt->msg_count == 1); + CuAssertTrue(ct, mystrcmp(rpt->msg[0], + "originator does not accept multi-segment response") == 0); + CuAssertTrue(ct, rpt->ret == EINVAL); + + printf("9. send USERMEM request with DB_MULTIPLE\n"); + clear_rpt(dbenv2); + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 2, &resp, 0, DB_MULTIPLE)) == 0); + DB_MULTIPLE_INIT(pointer, &resp); + DB_MULTIPLE_NEXT(pointer, &resp, vp, sz); + CuAssertTrue(ct, rpt->ret == 0); + CuAssertTrue(ct, strcmp((char*)vp, "roses are red") == 0); + CuAssertTrue(ct, sz == strlen((char*)vp) + 1); + DB_MULTIPLE_NEXT(pointer, &resp, vp, sz); + CuAssertTrue(ct, strcmp((char*)vp, "violets are blue") == 0); + CuAssertTrue(ct, sz == strlen((char*)vp) + 1); + DB_MULTIPLE_NEXT(pointer, &resp, vp, sz); + CuAssertTrue(ct, pointer == NULL); + + ch->close(ch, 0); + + + /* ------------------------------- */ + + + CuAssertTrue(ct, (ret = dbenv2->repmgr_channel(dbenv2, DB_EID_MASTER, &ch, 0)) == 0); + CuAssertTrue(ct, (ret = dbenv1->repmgr_msg_dispatch(dbenv1, msg_disp2, 0)) == 0); + + // do a request to master + // switch masters + // do a request to new master + printf("(now we try a couple of operations on a master channel)\n"); + + printf("10. send request to original master\n"); + rpt = get_rpt(dbenv1); + clear_rpt(dbenv1); + resp.data = buffer; + resp.ulen = BUFLEN; + resp.flags = DB_DBT_USERMEM; + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 1, &resp, 0, 0)) == 0); + check_dbt_string(&resp, "ENV1"); + await_done(dbenv1); + CuAssertTrue(ct, rpt->ret == 0); + CuAssertTrue(ct, rpt->dbt_count == 1); + check_dbt_string(&rpt->dbt[0], "I wish I were a fish"); + + printf("switch master and wait for our client to see the change\n"); + ((struct env_info *)dbenv2->app_private)->startupdone = 0; + CuAssertTrue(ct, (ret = dbenv1->repmgr_start(dbenv1, 0, DB_REP_CLIENT)) == 0); + sleep(1); /* workaround for 19329 */ + for (done = 0; ; ) { + /* + * Become master, and then make sure it really happened. + * Occasionally a race develops, where we're still holding on to + * the msg lockout at env3 at this point, in which case the + * rep_start() call (underlying our repmgr_start() call here) is + * simply dropped on the floor. + */ + CuAssertTrue(ct, (ret = dbenv3->repmgr_start(dbenv3, + 0, DB_REP_MASTER)) == 0); + CuAssertTrue(ct, (ret = dbenv3->rep_stat(dbenv3, + &stats, 0)) == 0); + done = stats->st_status == DB_REP_MASTER; + free(stats); + if (done) + break; + sleep(1); + }; + + /* + * !!! + * Workaround for 19297: wait until verify dance is complete at env2, + * because (just a little bit) later we're going to switch master again, + * to env2. If rep_start(MASTER) at env2 happens while processing + * VERIFY match record, core rep ignores the rep_start() (even though it + * returns 0). + */ + LOCK_MUTEX(&g->mtx); + while (!((struct env_info *)dbenv2->app_private)->startupdone) { + cond_wait(&g->cond, &g->mtx); + } +// TODO: fix these macros so that this ridiculous hack isn't necessary +#ifndef _WIN32 + UNLOCK_MUTEX(&g->mtx); +#endif + + + printf("11. send request which should go to new master (only)\n"); + clear_rpt(dbenv1); + clear_rpt(dbenv3); + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 1, &resp, 0, 0)) == 0); + check_dbt_string(&resp, "ENV3"); + rpt = get_rpt(dbenv3); + await_done(dbenv3); + CuAssertTrue(ct, rpt->ret == 0); + CuAssertTrue(ct, rpt->dbt_count == 1); + check_dbt_string(&rpt->dbt[0], "I wish I were a fish"); + rpt = get_rpt(dbenv1); + CuAssertTrue(ct, !rpt->done); /* old master shouldn't have recvd anything */ + + printf("switch master again, to ``self''\n"); + CuAssertTrue(ct, (ret = dbenv3->repmgr_start(dbenv3, 0, DB_REP_CLIENT)) == 0); + CuAssertTrue(ct, (ret = dbenv2->repmgr_start(dbenv2, 0, DB_REP_MASTER)) == 0); + /* No need to wait for env2 to see that env2 has become master. */ + + clear_rpt(dbenv1); + clear_rpt(dbenv2); + clear_rpt(dbenv3); + printf("12. send to self, async\n"); + CuAssertTrue(ct, (ret = ch->send_msg(ch, &dbt, 1, 0)) == 0); + await_done(dbenv2); + rpt = get_rpt(dbenv2); + CuAssertTrue(ct, rpt->dbt_count == 1); + check_dbt_string(&rpt->dbt[0], + "Mr. Watson -- come here -- I want to see you."); + CuAssertTrue(ct, rpt->msg_count == 0); + printf(" (check that other two sites didn't receive it)\n"); + sleep(1); + CuAssertTrue(ct, !get_rpt(dbenv1)->done); + CuAssertTrue(ct, !get_rpt(dbenv3)->done); + + printf("13. send-to-self request\n"); + clear_rpt(dbenv2); + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 2, &resp, 0, DB_MULTIPLE)) == 0); + DB_MULTIPLE_INIT(pointer, &resp); + DB_MULTIPLE_NEXT(pointer, &resp, vp, sz); + CuAssertTrue(ct, rpt->ret == 0); + CuAssertTrue(ct, strcmp((char*)vp, "roses are red") == 0); + CuAssertTrue(ct, sz == strlen((char*)vp) + 1); + DB_MULTIPLE_NEXT(pointer, &resp, vp, sz); + CuAssertTrue(ct, strcmp((char*)vp, "violets are blue") == 0); + CuAssertTrue(ct, sz == strlen((char*)vp) + 1); + DB_MULTIPLE_NEXT(pointer, &resp, vp, sz); + CuAssertTrue(ct, pointer == NULL); + + /* + * re-test the 0-length cases, in the send-to-self context (the + * implementation has a bunch of separate code) + */ + test_zeroes(ch, dbenv2, ct); + + ch->close(ch, 0); + + /* ---------------------------------------- */ + + // If you go from env2 to env, we know that it's port 6000. TODO: make this + // more robust by storing it some more structured form. + // + CuAssertTrue(ct, (ret = dbenv1->repmgr_msg_dispatch(dbenv1, msg_disp3, 0)) == 0); + CuAssertTrue(ct, + (ret = dbenv2->repmgr_site(dbenv2, + "localhost", 6000, &dbsite, 0)) == 0); + CuAssertTrue(ct, (ret = dbsite->get_eid(dbsite, &eid)) == 0); + CuAssertTrue(ct, (ret = dbsite->close(dbsite)) == 0); + CuAssertTrue(ct, (ret = dbenv2->repmgr_channel(dbenv2, eid, &ch, 0)) == 0); + + printf("14. send request to site that has been shut down\n"); + td(dbenv1); + memset(&resp, 0, sizeof(resp)); + resp.flags = DB_DBT_MALLOC; + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 2, &resp, 0, 0)) == + DB_REP_UNAVAIL); + if (resp.data != NULL) + free(resp.data); + + // TODO: a much more interesting case is to have the remote site shut + // down while waiting for the response, because that exercises some + // clean-up code. But I guess that requires running in a couple of + // threads. + + ch->close(ch, 0); + + printf("15. try to connect to a down site\n"); + CuAssertTrue(ct, (ret = dbenv2->repmgr_channel(dbenv2, eid, &ch, 0)) == DB_REP_UNAVAIL); + + printf("16. try to connect to a non-existent EID\n"); + CuAssertTrue(ct, (ret = dbenv2->repmgr_channel(dbenv2, 1732, &ch, 0)) == EINVAL); + + printf("17. connect master to self from the start\n"); + CuAssertTrue(ct, (ret = dbenv2->repmgr_channel(dbenv2, DB_EID_MASTER, &ch, 0)) == 0); + CuAssertTrue(ct, (ret = dbenv2->repmgr_msg_dispatch(dbenv2, msg_disp2, 0)) == 0); + rpt = get_rpt(dbenv2); + clear_rpt(dbenv2); + resp.data = buffer; + resp.ulen = BUFLEN; + resp.flags = DB_DBT_USERMEM; + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 1, &resp, 0, 0)) == 0); + check_dbt_string(&resp, "ENV2"); + await_done(dbenv2); + CuAssertTrue(ct, rpt->ret == 0); + CuAssertTrue(ct, rpt->dbt_count == 1); + check_dbt_string(&rpt->dbt[0], "I wish I were a fish"); + + ch->close(ch, 0); + + /* + * Send an async message from env2 to env3, at which point env3 will + * reply by returning two async messages back to env2. + */ + printf("18. test async replies to (async) messages\n"); + CuAssertTrue(ct, (ret = dbenv3->repmgr_msg_dispatch(dbenv3, msg_disp3, 0)) == 0); + CuAssertTrue(ct, (ret = dbenv2->repmgr_msg_dispatch(dbenv2, msg_disp4, 0)) == 0); + CuAssertTrue(ct, + (ret = dbenv2->repmgr_site(dbenv2, + "localhost", 6002, &dbsite, 0)) == 0); + CuAssertTrue(ct, (ret = dbsite->get_eid(dbsite, &eid)) == 0); + CuAssertTrue(ct, (ret = dbsite->close(dbsite)) == 0); + CuAssertTrue(ct, (ret = dbenv2->repmgr_channel(dbenv2, eid, &ch, 0)) == 0); + rpt = get_rpt(dbenv3); + clear_rpt(dbenv3); + ((struct env_info *)dbenv2->app_private)->rpts = &rpts; + memset(&rpts, 0, sizeof(rpts)); + mutex_init(&rpts.m); + CuAssertTrue(ct, (ret = ch->send_msg(ch, rdbts, 1, 0)) == 0); + await_done(dbenv3); + CuAssertTrue(ct, rpt->ret == 0); + CuAssertTrue(ct, rpt->dbt_count == 1); + check_dbt_string(&rpt->dbt[0], "I wish I were a fish"); + CuAssertTrue(ct, await_condition(two_done, dbenv2, 10)); + CuAssertTrue(ct, rpts.rpt[0].done); + CuAssertTrue(ct, rpts.rpt[0].dbt_count == 1); + check_dbt_string(&rpts.rpt[0].dbt[0], "roses may be pink"); + + CuAssertTrue(ct, rpts.rpt[1].done); + CuAssertTrue(ct, rpts.rpt[1].dbt_count == 1); + check_dbt_string(&rpts.rpt[1].dbt[0], "I think"); + clear_rpt_int(&rpts.rpt[0]); + clear_rpt_int(&rpts.rpt[1]); + + ch->close(ch, 0); + sleep(1); /* wait for "EOF on connection" msg before cleaning, below */ + // This kluge disappears when GM fixes that err msg to become an event + + printf("19. test illegal calls from the msg disp function\n"); + clear_rpt(dbenv3); + CuAssertTrue(ct, (ret = dbenv3->repmgr_msg_dispatch(dbenv3, msg_disp5, 0)) == 0); + CuAssertTrue(ct, (ret = dbenv2->repmgr_channel(dbenv2, eid, &ch, 0)) == 0); + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 1, &resp, 0, 0)) == 0); + await_done(dbenv3); + rpt = get_rpt(dbenv3); + CuAssertTrue(ct, rpt->ret == EINVAL); + CuAssertTrue(ct, rpt->msg_count == 3); + CuAssertTrue(ct, mystrcmp(rpt->msg[0], + "set_timeout() invalid on DB_CHANNEL supplied to msg dispatch function") == 0); + CuAssertTrue(ct, mystrcmp(rpt->msg[1], + "close() invalid on DB_CHANNEL supplied to msg dispatch function") == 0); + CuAssertTrue(ct, mystrcmp(rpt->msg[2], +"send_request() invalid on DB_CHANNEL supplied to msg dispatch function") == 0); + ch->close(ch, 0); + + free(buffer); + + td(dbenv2); + td(dbenv3); + return (0); +} + +static int +two_done(ctx) + void *ctx; +{ + DB_ENV *dbenv = ctx; + struct reports *rpts = ((struct env_info *)dbenv->app_private)->rpts; + + return (rpts->count == 2 && rpts->rpt[0].done && rpts->rpt[1].done); +} + +/* return 1 ("true") for a match, 0 ("false") otherwise */ +static int +check_dbt_string(dbt, s) + DBT *dbt; + const char *s; +{ + if (dbt->size != strlen(s)) + return (0); + if (dbt->size == 0) + return (1); + return (strcmp((char*)dbt->data, s) == 0); +} + +static int +is_started(ctx) + void *ctx; +{ + DB_ENV *dbenv = ctx; + DB_REP_STAT *st; + u_int32_t ans; + int ret; + + if ((ret = dbenv->rep_stat(dbenv, &st, 0)) != 0) { + dbenv->err(dbenv, ret, "rep_stat"); + return (0); + } + ans = st->st_startup_complete; + free(st); + return (ans); +} + +static int +await_condition(pred, ctx, limit) + PRED pred; + void *ctx; + long limit; +{ +#ifndef _WIN32 + struct timeval t; +#endif + time_t tim; + + tim = time(NULL) + limit; + while (time(NULL) < tim) { + if ((*pred)(ctx)) + return (1); + // sleep 1/10th of a second at a time + // (maybe Windows can use select() too, if include Winsock2.h) +#ifdef _WIN32 + Sleep(100); +#else + t.tv_sec = 0; + t.tv_usec = 100000; + select(0, NULL, NULL, NULL, &t); +#endif + } + return (0); +} + + +static void +notify(dbenv, event, unused) + DB_ENV *dbenv; + u_int32_t event; + void *unused; +{ + struct channel_test_globals *g; + struct env_info *info; + + if (event == DB_EVENT_PANIC) { + fprintf(stderr, "BDB panic"); + abort(); + } else if (event == DB_EVENT_REP_STARTUPDONE) { + info = dbenv->app_private; + g = info->g; + LOCK_MUTEX(&g->mtx); + info->startupdone = 1; + cond_wake(&g->cond); + UNLOCK_MUTEX(&g->mtx); + } +} + +static void +msg_disp(dbenv, ch, request, nseg, flags) + DB_ENV *dbenv; + DB_CHANNEL *ch; + DBT *request; + u_int32_t nseg; + u_int32_t flags; +{ + CuTest *ct; + struct report *rpt = get_rpt(dbenv); + DBT answer, mult[3]; + char *p; + size_t sz; + u_int32_t i; + int ret; + + ct = ((struct env_info *)dbenv->app_private)->g->test; + CuAssertTrue(ct, nseg < MAX_SEGS); + for (i = 0; i < nseg; i++) { + if ((sz = (rpt->dbt[rpt->dbt_count].size = request[i].size)) > 0) { + CuAssertTrue(ct, (rpt->dbt[rpt->dbt_count].data = malloc(sz)) != NULL); + memcpy(rpt->dbt[rpt->dbt_count].data, + request[i].data, sz); + } else + rpt->dbt[rpt->dbt_count].data = NULL; + rpt->dbt_count++; + } + + ret = 0; + if (flags & DB_REPMGR_NEED_RESPONSE) { + if (nseg == 2) { + /* Try a multi-segment response. */ + memset(&mult, 0, sizeof(mult)); + p = "roses are red"; + mult[0].data = p; + mult[0].size = (u_int32_t)strlen(p) + 1; + p = "violets are blue"; + mult[1].data = p; + mult[1].size = (u_int32_t)strlen(p) + 1; + ret = ch->send_msg(ch, &mult[0], 2, 0); + } else if (nseg == 1) { + // pretend to ``forget'' to respond + } else if (nseg == 4) { + // send a response of zero segments + ret = ch->send_msg(ch, &answer, 0, 0); + } else if (nseg == 5) { + // send a response with a segment of zero length + memset(&answer, 0, sizeof(answer)); + answer.size = 0; + ret = ch->send_msg(ch, &answer, 1, 0); + + // TODO: we still need to try this with the DB_MULTIPLE approach too + } else if (nseg == 6) { + // patience, ... + /* Try a multi-segment response. */ + memset(&mult, 0, sizeof(mult)); + p = "roses are red"; + mult[0].data = p; + mult[0].size = (u_int32_t)strlen(p) + 1; + p = "violets are blue"; + mult[1].size = 0; + mult[2].data = p; + mult[2].size = (u_int32_t)strlen(p) + 1; + ret = ch->send_msg(ch, &mult[0], 3, 0); + + } else { + memset(&answer, 0, sizeof(answer)); + p = "this is the answer to the request"; + answer.data = p; + answer.size = (u_int32_t)strlen(p) + 1; + ret = ch->send_msg(ch, &answer, 1, 0); + } + } + rpt->ret = ret; + rpt->done = 1; +} + +static void +msg_disp2(dbenv, ch, request, nseg, flags) + DB_ENV *dbenv; + DB_CHANNEL *ch; + DBT *request; + u_int32_t nseg; + u_int32_t flags; +{ + CuTest *ct; + struct report *rpt = get_rpt(dbenv); + DBT answer; + const char *p; + char buf[100]; + size_t sz; + u_int32_t i; + int ret; + + ct = ((struct env_info *)dbenv->app_private)->g->test; + CuAssertTrue(ct, nseg < MAX_SEGS); + for (i = 0; i < nseg; i++) { + if ((sz = (rpt->dbt[rpt->dbt_count].size = request[i].size)) > 0) { + CuAssertTrue(ct, (rpt->dbt[rpt->dbt_count].data = malloc(sz)) != NULL); + memcpy(rpt->dbt[rpt->dbt_count].data, + request[i].data, sz); + } else + rpt->dbt[rpt->dbt_count].data = NULL; + rpt->dbt_count++; + } + + if (flags & DB_REPMGR_NEED_RESPONSE) { + memset(&answer, 0, sizeof(answer)); + dbenv->get_errpfx(dbenv, &p); + strncpy(buf, p, sizeof(buf)); + answer.data = buf; + answer.size = (u_int32_t)strlen(p) + 1; + if (answer.size > sizeof(buf)) + answer.size = sizeof(buf); + ret = ch->send_msg(ch, &answer, 1, 0); + } + rpt->ret = ret; + rpt->done = 1; +} + +/* Test async replies to (async) messages. */ +static void +msg_disp3(dbenv, ch, request, nseg, flags) + DB_ENV *dbenv; + DB_CHANNEL *ch; + DBT *request; + u_int32_t nseg; + u_int32_t flags; +{ + CuTest *ct; + struct report *rpt = get_rpt(dbenv); + DBT answer; + char *p; + size_t sz; + u_int32_t i; + int ret; + + ct = ((struct env_info *)dbenv->app_private)->g->test; + CuAssertTrue(ct, nseg < MAX_SEGS); + for (i = 0; i < nseg; i++) { + if ((sz = (rpt->dbt[rpt->dbt_count].size = request[i].size)) > 0) { + CuAssertTrue(ct, (rpt->dbt[rpt->dbt_count].data = malloc(sz)) != NULL); + memcpy(rpt->dbt[rpt->dbt_count].data, + request[i].data, sz); + } else + rpt->dbt[rpt->dbt_count].data = NULL; + rpt->dbt_count++; + } + + ret = 0; + + // TODO: test that multiple calls to send_msg are not allowed on a request. + CuAssertTrue(ct, !(flags & DB_REPMGR_NEED_RESPONSE)); + + memset(&answer, 0, sizeof(answer)); + p = "roses may be pink"; + answer.data = p; + answer.size = (u_int32_t)strlen(p) + 1; + ret = ch->send_msg(ch, &answer, 1, 0); + + if (ret == 0) { + p = "I think"; + answer.data = p; + answer.size = (u_int32_t)strlen(p) + 1; + ret = ch->send_msg(ch, &answer, 1, 0); + } + rpt->ret = ret; + rpt->done = 1; +} +static void +msg_disp4(dbenv, ch, request, nseg, flags) + DB_ENV *dbenv; + DB_CHANNEL *ch; + DBT *request; + u_int32_t nseg; + u_int32_t flags; +{ + CuTest *ct; + struct reports *rpts = ((struct env_info *)dbenv->app_private)->rpts; + struct report *rpt; + size_t sz; + u_int32_t i; + + ct = ((struct env_info *)dbenv->app_private)->g->test; + LOCK_MUTEX(&rpts->m); + rpt = &rpts->rpt[rpts->count++]; + UNLOCK_MUTEX(&rpts->m); + + + CuAssertTrue(ct, !(flags & DB_REPMGR_NEED_RESPONSE)); + CuAssertTrue(ct, nseg < MAX_SEGS); + for (i = 0; i < nseg; i++) { + if ((sz = (rpt->dbt[rpt->dbt_count].size = request[i].size)) > 0) { + CuAssertTrue(ct, (rpt->dbt[rpt->dbt_count].data = malloc(sz)) != NULL); + memcpy(rpt->dbt[rpt->dbt_count].data, + request[i].data, sz); + } else + rpt->dbt[rpt->dbt_count].data = NULL; + rpt->dbt_count++; + } + + rpt->done = 1; +} + +static void +msg_disp5(dbenv, ch, request, nseg, flags) + DB_ENV *dbenv; + DB_CHANNEL *ch; + DBT *request; + u_int32_t nseg; + u_int32_t flags; +{ + struct report *rpt = get_rpt(dbenv); + DBT answer; + u_int8_t buf[100]; + char *p; + int ret; + + memset(&answer, 0, sizeof(answer)); + answer.flags = DB_DBT_USERMEM; + answer.ulen = sizeof(buf); + answer.data = buf; + if ((ret = ch->set_timeout(ch, 45000000)) != EINVAL || + (ret = ch->close(ch, 0)) != EINVAL) + rpt->ret = ret; + else + rpt->ret = ch->send_request(ch, request, nseg, &answer, 0, 0); + + memset(&answer, 0, sizeof(answer)); + p = "roses may be pink"; + answer.data = p; + answer.size = (u_int32_t)strlen(p) + 1; + ret = ch->send_msg(ch, &answer, 1, 0); + + rpt->done = 1; +} + +static void +test_data_init(dbt, data) + DBT *dbt; + char *data; +{ + memset(dbt, 0, sizeof(*dbt)); + dbt->data = data; + dbt->size = (u_int32_t)strlen(data) + 1; +} + +static void +test_zeroes(ch, dest, ct) + DB_CHANNEL *ch; + DB_ENV *dest; /* destination env handle */ + CuTest *ct; +{ + DBT resp; + DBT rdbts[6]; + struct report *rpt; + void *pointer, *vp; + size_t sz; + int i, ret; + + memset(&resp, 0, sizeof(resp)); + resp.flags = DB_DBT_MALLOC; + +#define DATA0 "Dear kindly judge, your honor" +#define DATA1 "my parents treat me rough" +#define DATA2 "with all their marijuana" +#define DATA3 "they won't give me a puff" +#define DATA4 "The didn't wanna have me, but somehow I was had" +#define DATA5 "Leapin' lizards, that's why I'm so bad!" + +#undef ADD_TEST_DATA +#define ADD_TEST_DATA(x) do { test_data_init(&rdbts[i++], (x)); } while (0); + + i = 0; + ADD_TEST_DATA(DATA0); + ADD_TEST_DATA(DATA1); + ADD_TEST_DATA(DATA2); + ADD_TEST_DATA(DATA3); + ADD_TEST_DATA(DATA4); + ADD_TEST_DATA(DATA5); + + rpt = get_rpt(dest); + + printf("6. send zero-segment request\n"); + clear_rpt(dest); + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 0, &resp, 0, 0)) == 0); + CuAssertTrue(ct, rpt->dbt_count == 0); + CuAssertTrue(ct, rpt->msg_count == 0); + check_dbt_string(&resp, "this is the answer to the request"); + if (resp.data) + free(resp.data); + + printf("6.a) send request with a zero-length segment (now why would anyone want to do that?)\n"); + clear_rpt(dest); + rdbts[1].size = 0; + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 3, &resp, 0, 0)) == 0); + await_done(dest); + CuAssertTrue(ct, rpt->dbt_count == 3); + check_dbt_string(&rpt->dbt[0], DATA0); + check_dbt_string(&rpt->dbt[1], ""); + check_dbt_string(&rpt->dbt[2], DATA2); + CuAssertTrue(ct, rpt->msg_count == 0); + check_dbt_string(&resp, "this is the answer to the request"); + if (resp.data) + free(resp.data); + i = 1; ADD_TEST_DATA(DATA1); /* restore perturbed test data */ + + printf("6.b) get a zero-length response\n"); + clear_rpt(dest); + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 4, &resp, 0, 0)) == 0); + await_done(dest); + CuAssertTrue(ct, rpt->dbt_count == 4); + CuAssertTrue(ct, rpt->msg_count == 0); + CuAssertTrue(ct, rpt->ret == 0); + CuAssertTrue(ct, resp.size == 0); + if (resp.data) + free(resp.data); + + printf("6.c) get a zero-length response (alternate version)\n"); + clear_rpt(dest); + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 5, &resp, 0, 0)) == 0); + await_done(dest); + CuAssertTrue(ct, rpt->dbt_count == 5); + CuAssertTrue(ct, rpt->msg_count == 0); + CuAssertTrue(ct, rpt->ret == 0); + CuAssertTrue(ct, resp.size == 0); + if (resp.data) + free(resp.data); + + printf("6.d) get a zero-length response (DB_MULTIPLE, zero segments)\n"); + clear_rpt(dest); + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 4, &resp, 0, DB_MULTIPLE)) == 0); + await_done(dest); + CuAssertTrue(ct, rpt->msg_count == 0); + CuAssertTrue(ct, rpt->ret == 0); + DB_MULTIPLE_INIT(pointer, &resp); + DB_MULTIPLE_NEXT(pointer, &resp, vp, sz); + CuAssertTrue(ct, pointer == NULL); + if (resp.data) + free(resp.data); + + printf("6.e) get a zero-length response (DB_MULTIPLE, a zero-length segment)\n"); + clear_rpt(dest); + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 5, &resp, 0, DB_MULTIPLE)) == 0); + await_done(dest); + CuAssertTrue(ct, rpt->msg_count == 0); + CuAssertTrue(ct, rpt->ret == 0); + DB_MULTIPLE_INIT(pointer, &resp); + DB_MULTIPLE_NEXT(pointer, &resp, vp, sz); + CuAssertTrue(ct, sz == 0); + DB_MULTIPLE_NEXT(pointer, &resp, vp, sz); + CuAssertTrue(ct, pointer == NULL); + if (resp.data) + free(resp.data); + + printf("6.f) get a zero-length response (DB_MULTIPLE, a zero-length segment in the middle)\n"); + clear_rpt(dest); + CuAssertTrue(ct, (ret = ch->send_request(ch, rdbts, 6, &resp, 0, DB_MULTIPLE)) == 0); + await_done(dest); + CuAssertTrue(ct, rpt->msg_count == 0); + CuAssertTrue(ct, rpt->ret == 0); + DB_MULTIPLE_INIT(pointer, &resp); + DB_MULTIPLE_NEXT(pointer, &resp, vp, sz); + CuAssertTrue(ct, rpt->ret == 0); + CuAssertTrue(ct, strcmp((char*)vp, "roses are red") == 0); + CuAssertTrue(ct, sz == strlen((char*)vp) + 1); + DB_MULTIPLE_NEXT(pointer, &resp, vp, sz); + CuAssertTrue(ct, sz == 0); + DB_MULTIPLE_NEXT(pointer, &resp, vp, sz); + CuAssertTrue(ct, strcmp((char*)vp, "violets are blue") == 0); + CuAssertTrue(ct, sz == strlen((char*)vp) + 1); + DB_MULTIPLE_NEXT(pointer, &resp, vp, sz); + CuAssertTrue(ct, pointer == NULL); + if (resp.data) + free(resp.data); +} + +/* + * Compare, but skip over BDB error msg number at beginning of `actual'. + */ +static int +mystrcmp(actual, expected) + char *actual; + const char *expected; +{ + char *p; + + for (p = actual; *p != '\0' && !isspace(*p); p++) + ; + for (; *p != '\0' && isspace(*p); p++) + ; + return (strcmp(p, expected)); +} diff --git a/test/c/suites/TestDbTuner.c b/test/c/suites/TestDbTuner.c new file mode 100644 index 00000000..56810ca7 --- /dev/null +++ b/test/c/suites/TestDbTuner.c @@ -0,0 +1,249 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * A c unit test program for db_tuner [#18910]. + * + * There are total 8 * 3 * 4 * 2 = 192 test cases, + * which are from: + * 8 possible pagesize (8), + * 3 types of dup (default, DB_DUP, DB_DUPSORT) (3), + * whether key/data item is small or big (4), + * whether the number of data per key is larger than 1 or not (2). + * + * This test program test db_tuner on all above possible btree databases. + */ +#include +#include +#include +#include +#include + +#include "db.h" +#include "CuTest.h" + +#define random(x) (rand() % x) + +int open_db(DB_ENV **, DB **, char *, char *, u_int32_t, int); +int run_test(CuTest *, u_int32_t, int, int, int, int, int); +int store_db(DB *, int, int, int, int); + +const char *progname = "TestDbTuner"; +int total_cases, success_cases; + +int TestDbTuner(CuTest *ct) { + u_int32_t pgsize; + int i, j; + + total_cases = success_cases = 0; + + printf("Test db_tuner on various different kinds of btree, %s\n", + "so it takes some time."); + /* + * Total 8 * 3 * 4 * 2 = 192 test cases, currently not test on + * big key, because producing btree databases with too big key will + * cost too much time. So here are 96 test cases. + * + * If you want test btree databases with big keys, please add follows: + * run_test(ct, pgsize, 50, 16367, 1, 1000, j); + * run_test(ct, pgsize, 50, 16367, 100, 1000, j); + * run_test(ct, pgsize, 16367, 16367, 1, 1000, j); + * run_test(ct, pgsize, 16367, 16367, 100, 1000, j); + */ + for (i = 0; i < 8; i++) { + pgsize = (1 << i) * 512; + for (j = 0; j < 3; j++) { + run_test(ct, pgsize, 50, 0, 1, 1000, j); + run_test(ct, pgsize, 50, 0, 100, 1000, j); + run_test(ct, pgsize, 16367, 0, 1, 1000, j); + run_test(ct, pgsize, 16367, 0, 100, 1000, j); + } + } + + printf("\n\nTESTING db_tuner on %d btree databases:\n", total_cases); + printf(".............................................................." + "\t \t%0.2f%% passed (%d/%d).\n", + (((double)success_cases/total_cases) * 100), + success_cases, total_cases); + + return (EXIT_SUCCESS); +} + +/* + * run_test: + * CuTest *ct: + * u_int32_t pgsize: 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536 + * int nrecs: number of unique key, {1000} + * int mkeys: max-length of key, {0: small; 16367: large for big key} + * int mdatas: max-length of data, {50: small; 16367: large for big data} + * int mdups: max-number of data per key, {1, 200} + * int duptype: {default, DB_DUP, DB_DUPSORT} + */ +int +run_test(ct, pgsize, mdatas, mkeys, mdups, nrecs, duptype) + CuTest *ct; + u_int32_t pgsize; + int mdatas, mkeys, mdups, nrecs, duptype; +{ + DB *dbp; + DB_ENV *dbenv; + char cmd[1000]; + char db_file_name[100], *home; + + home = "TESTDIR"; + total_cases++; + + /* Step 1: Prepare the database based on the parameters. */ + sprintf(db_file_name, "p%d_n%d_k%d_d%d_s%d_D%d.db", + pgsize, nrecs, mkeys, mdatas, mdups, duptype); + + TestEnvConfigTestSetup(ct); + + CuAssert(ct, "open_db", + open_db(&dbenv, &dbp, db_file_name, home, pgsize, duptype) == 0); + + CuAssert(ct, "store_db", + store_db(dbp, nrecs, mkeys, mdatas, mdups) == 0); + + if (dbp != NULL) + CuAssert(ct, "DB->close", dbp->close(dbp, 0) == 0); + + /* Step 2: Test db_tuner utility. */ +#ifdef _WIN32 + sprintf(cmd, "Win32\\Debug\\db_tuner.exe -h %s -d %s -v >/null 2>&1", + home, db_file_name); +#else + sprintf(cmd, "./db_tuner -d %s//%s -v >/dev/null 2>&1", + home, db_file_name); +#endif + system(cmd); + + if (dbenv != NULL) + CuAssert(ct, "DB_ENV->close failed", + dbenv->close(dbenv, 0) == 0); + + TestEnvConfigTestTeardown(ct); + success_cases++; + + return (0); +} + +/* Produce the btree database for the specific test case. */ +int +store_db(dbp, nrecs, mkeys, mdatas, mdups) + DB *dbp; + int nrecs, mkeys, mdatas, mdups; +{ + DBT key, data; + db_recno_t recno; + int i, ret; + int data_sz, key_sz, dup_rec; + char str[37] = "abcdefghijklmnopqrstuvwxyz0123456789"; + char *buf; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + buf = (char *)malloc(sizeof(char) * mdatas); + memset(buf, 0, sizeof(char) * mdatas); + for (recno = 1; recno <= (db_recno_t)nrecs; recno++) { + data_sz = random(mdatas); + + for (i = 0; i < data_sz; ++i) + buf[i] = str[i % 37]; + buf[data_sz] = '\0'; + + key_sz = mkeys < sizeof(recno) ? sizeof(recno) : random(mkeys); + + key.data = &recno; + key.size = key_sz < sizeof(recno) ? sizeof(recno) : key_sz; + + dup_rec = random(mdups); + for (i = 0; i <= dup_rec; ++i) { + data.data = buf; + data.size = (u_int32_t)strlen(buf) + i + 1; + + if ((ret = dbp->put(dbp, NULL, &key, &data, 0)) != 0) { + dbp->err(dbp, ret, "DB->put"); + break; + } + } + } + + return (ret); +} + +int +open_db(dbenvp, dbpp, dbname, home, pgsize, duptype) + DB_ENV **dbenvp; + DB **dbpp; + char *dbname, *home; + u_int32_t pgsize; + int duptype; +{ + DB *dbp; + DB_ENV *dbenv; + int ret; + + dbp = NULL; + dbenv = NULL; + + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, "db_env_create: %s\n", db_strerror(ret)); + return (EXIT_FAILURE); + } + + *dbenvp = dbenv; + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + if ((ret = + dbenv->set_cachesize(dbenv, (u_int32_t)0, + (u_int32_t)262144000 * 2, 1)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->set_cachesize"); + return (EXIT_FAILURE); + } + + if ((ret = dbenv->mutex_set_max(dbenv, 10000)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->mutex_set_max"); + return (EXIT_FAILURE); + } + + if ((ret = dbenv->open(dbenv, home, + DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + return (EXIT_FAILURE); + } + + if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + return (EXIT_FAILURE); + } + + *dbpp = dbp; + + if ((ret = dbp->set_pagesize(dbp, pgsize)) != 0) { + dbenv->err(dbenv, ret, "DB->set_pagesize"); + return (EXIT_FAILURE); + } + + if (duptype && (ret = + dbp->set_flags(dbp, duptype == 1 ? DB_DUP : DB_DUPSORT)) != 0) { + dbenv->err(dbenv, ret, "DB->set_flags"); + return (EXIT_FAILURE); + } + + if ((ret = + dbp->open(dbp, NULL, dbname, NULL, DB_BTREE, DB_CREATE, 0)) != 0) { + dbenv->err(dbenv, ret, "%s: DB->open", dbname); + return (EXIT_FAILURE); + } + + return (EXIT_SUCCESS); +} diff --git a/test/c/suites/TestEncryption.c b/test/c/suites/TestEncryption.c new file mode 100644 index 00000000..2045d690 --- /dev/null +++ b/test/c/suites/TestEncryption.c @@ -0,0 +1,350 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + * + * A c unit test program for encrypting a database. [#19539]. + * + * There are two ways to encrypt a database: + * i) If the database is attached to an opened environment, use + * DB_ENV->set_encrypt() and DB->set_flags(); + * ii) Otherwise, use DB->set_encrypt(). + * + * Note: + * 1) DB->set_encrypt() cannot used in an opened environment, + * no matter whether this environment is encrypted or not. + * 2) Single DB_ENV->set_encrypt() is not enough to encrypt a database + * inside an encrypted environment, DB->set_flag should also be applied to + * this specific database. + * 3) DB->set_flag() should only be called in an encrypted environment. + */ +#include +#include +#include +#include + +#include "db.h" +#include "CuTest.h" + +const char *progname_crypt = "encryption"; +#define DATABASE "encryption.db" +#define PASSWORD "ENCRYPT_KEY" + +typedef struct crypt_config { + int in_env; + int is_env_encrypt; + int is_db_encrypt; + int is_db_flags_encrypt; +} CRYPT_CONFIG; + +int closeDb __P((DB_ENV *, DB *)); +int dbPutGet __P((CuTest *, DB *)); +int encryptTestCase __P((CuTest *, int, int, int, int)); +int initConfig __P((CRYPT_CONFIG *, int, int, int, int)); +int openDb __P((CuTest *, DB **dbp, DB_ENV *, char *, CRYPT_CONFIG *)); +int openEnv __P((CuTest *, DB_ENV **, char *, int)); +int reOpen __P((CuTest *, DB **, char *, CRYPT_CONFIG *)); + +int TestNoEncryptedDb(CuTest *ct) { + CuAssert(ct, "TestNoEncryption", encryptTestCase(ct, 0, 0, 0, 0) == 0); + return (0); +} + +int TestEncryptedDbFlag(CuTest *ct) { + CuAssert(ct, "TestEncryptedDbFlag", + encryptTestCase(ct, 0, 0, 0, 1) == 0); + return (0); +} + +int TestEncryptedDb(CuTest *ct) { + CuAssert(ct, "TestEncryptedDb", encryptTestCase(ct, 0, 0, 1, 0) == 0); + return (0); +} + +int TestEncryptedDbFlagAndDb(CuTest *ct) { + CuAssert(ct, "TestEncryptedDbFlagAndDb", + encryptTestCase(ct, 0, 0, 1, 1) == 0); + return (0); +} + +int TestEnvWithNoEncryption(CuTest *ct) { + CuAssert(ct, "TestEnvWithNoEncryption", + encryptTestCase(ct, 1, 0, 0, 0) == 0); + return (0); +} + +int TestEnvWithEncryptedDbFlag(CuTest *ct) { + CuAssert(ct, "TestEnvWithEncryptedDbFlag", + encryptTestCase(ct, 1, 0, 0, 1) == 0); + return (0); +} + +int TestEnvWithEncryptedDb(CuTest *ct) { + CuAssert(ct, "TestEnvWithEncryptedDb", + encryptTestCase(ct, 1, 0, 1, 0) == 0); + return (0); +} + +int TestEnvWithEncryptedDbFlagAndDb(CuTest *ct) { + CuAssert(ct, "TestEnvWithEncryptedDbFlagAndDb", + encryptTestCase(ct, 1, 0, 1, 1) == 0); + return (0); +} + +int TestEncyptedEnv(CuTest *ct) { + CuAssert(ct, "TestEncyptedEnv", encryptTestCase(ct, 1, 1, 0, 0) == 0); + return (0); +} + +int TestEncyptedEnvWithEncyptedDbFlag(CuTest *ct) { + CuAssert(ct, "TestEncyptedEnvWithEncyptedDbFlag", + encryptTestCase(ct, 1, 1, 0, 1) == 0); + return (0); +} + +int TestEncyptedEnvWithEncyptedDb(CuTest *ct) { + CuAssert(ct, "TestEncyptedEnvWithEncyptedDb", + encryptTestCase(ct, 1, 1, 1, 0) == 0); + return (0); +} + +int TestEncyptedEnvWithEncryptedDbFlagAndDb(CuTest *ct) { + CuAssert(ct, "TestEncyptedEnvWithEncryptedDbFlagAndDb", + encryptTestCase(ct, 1, 1, 1, 1) == 0); + return (0); +} + +int initConfig(CRYPT_CONFIG *crypt, int in_env, int is_env_encrypt, + int is_db_encrypt, int is_db_flags_encrypt) +{ + memset(crypt, 0, sizeof(CRYPT_CONFIG)); + crypt->in_env = in_env; + crypt->is_env_encrypt = is_env_encrypt; + crypt->is_db_encrypt = is_db_encrypt; + crypt->is_db_flags_encrypt = is_db_flags_encrypt; + + return (0); +} + +/* + * in_env: whether open the database in an environment + * is_env_encrypt: whether call DB_ENV->set_encrypt() with DB_ENCRYPT_AES + * is_db_encrypt: whether call DB->set_encrypt() with DB_ENCRYPT_AES + * is_db_flags_encrypt: whether call DB->set_flags() with DB_ENCRYPT + */ +int encryptTestCase(CuTest *ct, int in_env, int is_env_encrypt, + int is_db_encrypt, int is_db_flags_encrypt) +{ + CRYPT_CONFIG crypt; + DB_ENV *dbenv; + DB *dbp; + char dbname[100], *home; + int ret; + + dbenv = NULL; + dbp = NULL; + home = "TESTDIR"; + ret = 0; + + TestEnvConfigTestSetup(ct); + initConfig(&crypt, in_env, is_env_encrypt, is_db_encrypt, + is_db_flags_encrypt); + + if (in_env) { + strcpy(dbname, DATABASE); + CuAssert(ct, "Open environment", + openEnv(ct, &dbenv, home, is_env_encrypt) == 0); + } else + sprintf(dbname, ".//%s//%s", home, DATABASE); + + ret = openDb(ct, &dbp, dbenv, dbname, &crypt); + + /* Close the dbenv, dbp handle.*/ + CuAssert(ct, "closeDb", closeDb(dbenv, dbp) == 0); + dbenv = NULL; + dbp = NULL; + + /* Re-open the database and do some operations. */ + if (!ret) { + sprintf(dbname, ".//%s//%s", home, DATABASE); + CuAssert(ct, "Re-open database", + reOpen(ct, &dbp, dbname, &crypt) == 0); + + /* Close the dbenv, dbp handle. */ + CuAssert(ct, "closeDb", closeDb(dbenv, dbp) == 0); + dbenv = NULL; + dbp = NULL; + } + + TestEnvConfigTestTeardown(ct); + + return (0); +} + +int openEnv(CuTest *ct, DB_ENV **dbenvp, char *home, int is_encrypt) +{ + DB_ENV *dbenv; + + dbenv = NULL; + + CuAssert(ct, "db_env_create", db_env_create(&dbenv, 0) == 0); + + *dbenvp = dbenv; + + dbenv->set_errcall(dbenv, NULL); + + if (is_encrypt) + CuAssert(ct, "DB_ENV->set_encrypt:DB_ENCRYPT_AES", + dbenv->set_encrypt(dbenv, PASSWORD, DB_ENCRYPT_AES) == 0); + + CuAssert(ct, "DB_ENV->open", dbenv->open(dbenv, home, + DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE, 0) == 0); + + return (0); +} + +int openDb(CuTest *ct, DB **dbpp, DB_ENV *dbenv, char *dbname, + CRYPT_CONFIG *crypt) +{ + DB *dbp; + int expected_value, ret; + + dbp = NULL; + ret = 0; + + CuAssert(ct, "openDb:db_create:%s", db_create(&dbp, dbenv, 0) == 0); + + *dbpp = dbp; + + dbp->set_errcall(dbp, NULL); + + /* + * DB->set_flag() with DB_ENCRYPT should only be used in + * an encrypted environment. + */ + if (crypt->is_db_flags_encrypt) { + ret = dbp->set_flags(dbp, DB_ENCRYPT); + + if (!crypt->in_env || !crypt->is_env_encrypt) + expected_value = EINVAL; + else + expected_value = 0; + + CuAssert(ct, "openDb:DB->set_flags:DB_ENCRYPT", + ret == expected_value); + } + + /* DB->set_encrypt() cannot be used in an opened environment. */ + if (!ret && crypt->is_db_encrypt) { + ret = dbp->set_encrypt(dbp, PASSWORD, DB_ENCRYPT_AES); + + if (crypt->in_env) + expected_value = EINVAL; + else + expected_value = 0; + + CuAssert(ct, "openDb:DB->set_encrypt:DB_ENCRYPT_AES", + ret == expected_value); + } + + if (!ret) { + CuAssert(ct, "openDb: DB->open", dbp->open(dbp, NULL, + dbname, NULL, DB_BTREE, DB_CREATE, 0) == 0); + CuAssert(ct, "dbPutGet", dbPutGet(ct, dbp) == 0); + } + + return (ret); +} + +/* Re-open the previous database and check the encryption. */ +int reOpen(CuTest *ct, DB **dbpp, char *dbname, CRYPT_CONFIG *crypt) +{ + DB *dbp; + int expected_value, ret; + + dbp = NULL; + expected_value = ret = 0; + + CuAssert(ct, "reOpen: db_create fails", + db_create(&dbp, NULL, 0) == 0); + + *dbpp = dbp; + + dbp->set_errcall(dbp, NULL); + + /* Re-open a database should fail if the database is encrypted.*/ + ret = dbp->open(dbp, NULL, dbname, NULL, DB_UNKNOWN, 0, 0); + + /* + * If call DB_ENV->set_encrypt and DB->set_flag with DB_ENCRYPT, + * the database should be successfully encrypted, hence, re-open + * should be fail. + * + * Or + * + * If call DB->set_encrypt without in an environment, + * then the database can be successfully encrypted, hence, + * re-open should fails and return error code EINVAL. + */ + if ((crypt->in_env && crypt->is_env_encrypt && + crypt->is_db_flags_encrypt) || + (crypt->is_db_encrypt && !crypt->in_env)) + expected_value = EINVAL; + + CuAssert(ct, "reOpen: DB->open", ret == expected_value); + + /* Do some put and get operations if re-open is successful. */ + if (!ret) + CuAssert(ct, "dbPutGet", dbPutGet(ct, dbp) == 0); + + return (0); +} + +int dbPutGet(CuTest *ct, DB *dbp) +{ + DBT key, data; + char buf[1024]; + const char *str = "abcdefghijklmnopqrst"; + int cnt, ret; + size_t len; + + ret = 0; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + srand((int)time(NULL)); + + for (cnt = 1; cnt < 10001; cnt++) { + len = rand() % strlen(str) + 1; + (void)sprintf(buf, "%05d_%*s", cnt, len, str); + + key.data = &cnt; + key.size = sizeof(cnt); + + data.data = buf; + data.size = (u_int32_t)strlen(buf) + 1; + + CuAssert(ct, "DB->put", + dbp->put(dbp, NULL, &key, &data, 0) == 0); + } + + return (0); +} + +int closeDb(DB_ENV *dbenv, DB *dbp) +{ + int ret = 0; + + if (dbp != NULL && (ret = dbp->close(dbp, 0)) != 0) + fprintf(stderr, "%s: DB->close: %s", + progname_crypt, db_strerror(ret)); + + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) + fprintf(stderr, "%s: DB_ENV->close: %s", + progname_crypt, db_strerror(ret)); + + return (ret); +} diff --git a/test/c/suites/TestEnvConfig.c b/test/c/suites/TestEnvConfig.c new file mode 100644 index 00000000..2bbf4833 --- /dev/null +++ b/test/c/suites/TestEnvConfig.c @@ -0,0 +1,303 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#include + +#include +#include +#include + +#include "db.h" +#include "CuTest.h" +#include "test_util.h" + +#define ENV { \ + if (dbenv != NULL) \ + CuAssertTrue(ct, dbenv->close(dbenv, 0) == 0); \ + CuAssertTrue(ct, db_env_create(&dbenv, 0) == 0); \ + dbenv->set_errfile(dbenv, stderr); \ +} + +int TestEnvConfigSuiteSetup(CuSuite *ct) { + return (0); +} + +int TestEnvConfigSuiteTeardown(CuSuite *ct) { + return (0); +} + +int TestEnvConfigTestSetup(CuTest *ct) { + setup_envdir(TEST_ENV, 1); + return (0); +} + +int TestEnvConfigTestTeardown(CuTest *ct) { + teardown_envdir(TEST_ENV); + return (0); +} + +int TestSetTxMax(CuTest *ct) { + DB_ENV *dbenv; + u_int32_t v; + + dbenv = NULL; + /* tx_max: NOT reset at run-time. */ + ENV + CuAssertTrue(ct, dbenv->set_tx_max(dbenv, 37) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, + TEST_ENV, DB_CREATE | DB_INIT_TXN, 0666) == 0); + CuAssertTrue(ct, dbenv->get_tx_max(dbenv, &v) == 0); + CuAssertTrue(ct, v == 37); + ENV + CuAssertTrue(ct, dbenv->set_tx_max(dbenv, 63) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, TEST_ENV, DB_JOINENV, 0666) == 0); + CuAssertTrue(ct, dbenv->get_tx_max(dbenv, &v) == 0); + CuAssertTrue(ct, v == 37); + return (0); +} + +int TestSetLogMax(CuTest *ct) { + DB_ENV *dbenv; + u_int32_t v; + + dbenv = NULL; + /* lg_max: reset at run-time. */ + ENV + CuAssertTrue(ct, dbenv->set_lg_max(dbenv, 37 * 1024 * 1024) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, + TEST_ENV, DB_CREATE | DB_INIT_LOG, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lg_max(dbenv, &v) == 0); + CuAssertTrue(ct, v == 37 * 1024 * 1024); + ENV + CuAssertTrue(ct, dbenv->set_lg_max(dbenv, 63 * 1024 * 1024) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, TEST_ENV, DB_JOINENV, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lg_max(dbenv, &v) == 0); + CuAssertTrue(ct, v == 63 * 1024 * 1024); + return (0); +} + +int TestSetLogBufferSize(CuTest *ct) { + DB_ENV *dbenv; + u_int32_t v; + + dbenv = NULL; + /* lg_bsize: NOT reset at run-time. */ + ENV + CuAssertTrue(ct, dbenv->set_lg_bsize(dbenv, 37 * 1024) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, + TEST_ENV, DB_CREATE | DB_INIT_LOG, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lg_bsize(dbenv, &v) == 0); + CuAssertTrue(ct, v == 37 * 1024); + ENV + CuAssertTrue(ct, dbenv->set_lg_bsize(dbenv, 63 * 1024) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, TEST_ENV, DB_JOINENV, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lg_bsize(dbenv, &v) == 0); + CuAssertTrue(ct, v == 37 * 1024); + return (0); +} + +int TestSetLogRegionSize(CuTest *ct) { + DB_ENV *dbenv; + u_int32_t v; + + dbenv = NULL; + /* lg_regionmax: NOT reset at run-time. */ + ENV + CuAssertTrue(ct, dbenv->set_lg_regionmax(dbenv, 137 * 1024) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, + TEST_ENV, DB_CREATE | DB_INIT_LOG, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lg_regionmax(dbenv, &v) == 0); + CuAssertTrue(ct, v == 137 * 1024); + ENV + CuAssertTrue(ct, dbenv->set_lg_regionmax(dbenv, 163 * 1024) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, TEST_ENV, DB_JOINENV, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lg_regionmax(dbenv, &v) == 0); + CuAssertTrue(ct, v == 137 * 1024); + return (0); +} + +int TestGetLockConflicts(CuTest *ct) { + DB_ENV *dbenv; + const u_int8_t *lk_conflicts; + int lk_modes, nmodes; + u_int8_t conflicts[40]; + + dbenv = NULL; + /* lk_get_lk_conflicts: NOT reset at run-time. */ + ENV + memset(conflicts, 'a', sizeof(conflicts)); + nmodes = 6; + CuAssertTrue(ct, + dbenv->set_lk_conflicts(dbenv, conflicts, nmodes) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, + TEST_ENV, DB_CREATE | DB_INIT_LOCK, 0666) == 0); + CuAssertTrue(ct, + dbenv->get_lk_conflicts(dbenv, &lk_conflicts, &lk_modes) == 0); + CuAssertTrue(ct, lk_conflicts[0] == 'a'); + CuAssertTrue(ct, lk_modes == 6); + ENV + memset(conflicts, 'b', sizeof(conflicts)); + nmodes = 8; + CuAssertTrue(ct, + dbenv->set_lk_conflicts(dbenv, conflicts, nmodes) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, TEST_ENV, DB_JOINENV, 0666) == 0); + CuAssertTrue(ct, + dbenv->get_lk_conflicts(dbenv, &lk_conflicts, &lk_modes) == 0); + CuAssertTrue(ct, lk_conflicts[0] == 'a'); + CuAssertTrue(ct, lk_modes == 6); + + return (0); +} + +int TestSetLockDetect(CuTest *ct) { + DB_ENV *dbenv; + u_int32_t v; + + dbenv = NULL; + /* lk_detect: NOT reset at run-time. */ + ENV + CuAssertTrue(ct, dbenv->set_lk_detect(dbenv, DB_LOCK_MAXLOCKS) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, + TEST_ENV, DB_CREATE | DB_INIT_LOCK, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lk_detect(dbenv, &v) == 0); + CuAssertTrue(ct, v == DB_LOCK_MAXLOCKS); + ENV + CuAssertTrue(ct, dbenv->set_lk_detect(dbenv, DB_LOCK_DEFAULT) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, TEST_ENV, DB_JOINENV, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lk_detect(dbenv, &v) == 0); + CuAssertTrue(ct, v == DB_LOCK_MAXLOCKS); + return (0); +} + +int TestLockMaxLocks(CuTest *ct) { + DB_ENV *dbenv; + u_int32_t v; + + dbenv = NULL; + /* lk_max_locks: NOT reset at run-time. */ + ENV + CuAssertTrue(ct, dbenv->set_lk_max_locks(dbenv, 1037) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, + TEST_ENV, DB_CREATE | DB_INIT_LOCK, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lk_max_locks(dbenv, &v) == 0); + CuAssertTrue(ct, v == 1037); + ENV + CuAssertTrue(ct, dbenv->set_lk_max_locks(dbenv, 1063) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, TEST_ENV, DB_JOINENV, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lk_max_locks(dbenv, &v) == 0); + CuAssertTrue(ct, v == 1037); + return (0); +} + +int TestLockMaxLockers(CuTest *ct) { + DB_ENV *dbenv; + u_int32_t v; + + dbenv = NULL; + /* lk_max_lockers: NOT reset at run-time. */ + ENV + CuAssertTrue(ct, dbenv->set_lk_max_lockers(dbenv, 37) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, + TEST_ENV, DB_CREATE | DB_INIT_LOCK, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lk_max_lockers(dbenv, &v) == 0); + CuAssertTrue(ct, v == 37); + ENV + CuAssertTrue(ct, dbenv->set_lk_max_lockers(dbenv, 63) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, TEST_ENV, DB_JOINENV, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lk_max_lockers(dbenv, &v) == 0); + CuAssertTrue(ct, v == 37); + return (0); +} + +int TestSetLockMaxObjects(CuTest *ct) { + DB_ENV *dbenv; + u_int32_t v; + + dbenv = NULL; + /* lk_max_objects: NOT reset at run-time. */ + ENV + CuAssertTrue(ct, dbenv->set_lk_max_objects(dbenv, 1037) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, + TEST_ENV, DB_CREATE | DB_INIT_LOCK, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lk_max_objects(dbenv, &v) == 0); + CuAssertTrue(ct, v == 1037); + ENV + CuAssertTrue(ct, dbenv->set_lk_max_objects(dbenv, 1063) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, TEST_ENV, DB_JOINENV, 0666) == 0); + CuAssertTrue(ct, dbenv->get_lk_max_objects(dbenv, &v) == 0); + CuAssertTrue(ct, v == 1037); + return (0); +} + +int TestSetLockTimeout(CuTest *ct) { + DB_ENV *dbenv; + db_timeout_t timeout; + + dbenv = NULL; + /* lock timeout: reset at run-time. */ + ENV + CuAssertTrue(ct, + dbenv->set_timeout(dbenv, 37, DB_SET_LOCK_TIMEOUT) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, + TEST_ENV, DB_CREATE | DB_INIT_LOCK, 0666) == 0); + CuAssertTrue(ct, + dbenv->get_timeout(dbenv, &timeout, DB_SET_LOCK_TIMEOUT) == 0); + CuAssertTrue(ct, timeout == 37); + ENV + CuAssertTrue(ct, + dbenv->set_timeout(dbenv, 63, DB_SET_LOCK_TIMEOUT) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, TEST_ENV, DB_JOINENV, 0666) == 0); + CuAssertTrue(ct, + dbenv->get_timeout(dbenv, &timeout, DB_SET_LOCK_TIMEOUT) == 0); + CuAssertTrue(ct, timeout == 63); + return (0); +} + +int TestSetTransactionTimeout(CuTest *ct) { + DB_ENV *dbenv; + db_timeout_t timeout; + + dbenv = NULL; + /* txn timeout: reset at run-time. */ + ENV + CuAssertTrue(ct, + dbenv->set_timeout(dbenv, 37, DB_SET_TXN_TIMEOUT) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, + TEST_ENV, DB_CREATE | DB_INIT_LOCK, 0666) == 0); + CuAssertTrue(ct, + dbenv->get_timeout(dbenv, &timeout, DB_SET_TXN_TIMEOUT) == 0); + CuAssertTrue(ct, timeout == 37); + ENV + CuAssertTrue(ct, + dbenv->set_timeout(dbenv, 63, DB_SET_TXN_TIMEOUT) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, TEST_ENV, DB_JOINENV, 0666) == 0); + CuAssertTrue(ct, + dbenv->get_timeout(dbenv, &timeout, DB_SET_TXN_TIMEOUT) == 0); + CuAssertTrue(ct, timeout == 63); + return (0); +} + +int TestSetCachesize(CuTest *ct) { + DB_ENV *dbenv; + int ncache; + u_int32_t a, b; + + dbenv = NULL; + /* cache size: NOT reset at run-time. */ + ENV + CuAssertTrue(ct, dbenv->set_cachesize(dbenv, 1, 131072, 3) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, + TEST_ENV, DB_CREATE | DB_INIT_MPOOL, 0666) == 0); + CuAssertTrue(ct, dbenv->get_cachesize(dbenv, &a, &b, &ncache) == 0); + CuAssertTrue(ct, dbenv->get_cachesize(dbenv, &a, &b, &ncache) == 0); + CuAssertTrue(ct, a == 1 && b == 131072 && ncache == 3); + ENV + CuAssertTrue(ct, dbenv->set_cachesize(dbenv, 2, 262144, 1) == 0); + CuAssertTrue(ct, dbenv->open(dbenv, TEST_ENV, DB_JOINENV, 0666) == 0); + CuAssertTrue(ct, dbenv->get_cachesize(dbenv, &a, &b, &ncache) == 0); + CuAssertTrue(ct, a == 1 && b == 131072 && ncache == 3); + return (0); +} diff --git a/test/c/suites/TestEnvMethod.c b/test/c/suites/TestEnvMethod.c new file mode 100644 index 00000000..adf1c653 --- /dev/null +++ b/test/c/suites/TestEnvMethod.c @@ -0,0 +1,33 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db.h" +#include "CuTest.h" + +int TestSetThreadCount(CuTest *ct) { /* SKIP */ + + DB_ENV *dbenv; + DB *db; + + CuAssert(ct, "db_env_create", db_env_create(&dbenv, 0) == 0); + + dbenv->set_errpfx(dbenv, "TestSetThreadCount"); + CuAssert(ct, "set_thread_count", dbenv->set_thread_count(dbenv, 2) == 0); + CuAssert(ct, "env->open", dbenv->open(dbenv, ".", + DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | + DB_INIT_TXN | DB_PRIVATE | DB_THREAD, 0) == 0); + + CuAssert(ct, "db_create", db_create(&db, dbenv, 0) == 0); + CuAssert(ct, "DB->open", db->open( + db, NULL, NULL, "TestSetThreadCount", DB_HASH, DB_CREATE, 0) == 0); + + db->close(db, 0); + dbenv->close(dbenv, 0); + + return (0); +} diff --git a/test/c/suites/TestKeyExistErrorReturn.c b/test/c/suites/TestKeyExistErrorReturn.c new file mode 100644 index 00000000..4b6aa47e --- /dev/null +++ b/test/c/suites/TestKeyExistErrorReturn.c @@ -0,0 +1,273 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * Test a situation ([#19345]) where a deadlock is returned via + * a secondary index when doing and update. + * + * TestKeyExistErrorReturn.c + */ +#include +#include +#include + +#include "db.h" +#include "CuTest.h" + +#ifdef _WIN32 +#include +#define PATHD '\\' +extern int getopt(int, char * const *, const char *); +extern char *optarg; + +/* Wrap Windows thread API to make it look POSIXey. */ +typedef HANDLE thread_t; +#define thread_create(thrp, attr, func, arg) \ + (((*(thrp) = CreateThread(NULL, 0, \ + (LPTHREAD_START_ROUTINE)(func), (arg), 0, NULL)) == NULL) ? -1 : 0) +#define thread_join(thr, statusp) \ + ((WaitForSingleObject((thr), INFINITE) == WAIT_OBJECT_0) && \ + ((statusp == NULL) ? 0 : \ + (GetExitCodeThread((thr), (LPDWORD)(statusp)) ? 0 : -1))) + +typedef HANDLE mutex_t; +#define mutex_init(m, attr) \ + (((*(m) = CreateMutex(NULL, FALSE, NULL)) != NULL) ? 0 : -1) +#define mutex_lock(m) \ + ((WaitForSingleObject(*(m), INFINITE) == WAIT_OBJECT_0) ? 0 : -1) +#define mutex_unlock(m) (ReleaseMutex(*(m)) ? 0 : -1) +#else +#include +#include +#define PATHD '/' + +typedef pthread_t thread_t; +#define thread_create(thrp, attr, func, arg) \ + pthread_create((thrp), (attr), (func), (arg)) +#define thread_join(thr, statusp) pthread_join((thr), (statusp)) + +typedef pthread_mutex_t mutex_t; +#define mutex_init(m, attr) pthread_mutex_init((m), (attr)) +#define mutex_lock(m) pthread_mutex_lock(m) +#define mutex_unlock(m) pthread_mutex_unlock(m) +#endif + +#define NUMWRITERS 5 + +/* Forward declarations */ +int count_records __P((DB *, DB_TXN *)); +int assoc_callback __P((DB *, const DBT *, const DBT *, DBT *)); +void *writer_thread __P((void *)); + +int global_thread_num; +mutex_t thread_num_lock; + +int TestKeyExistErrorReturn(CuTest *ct) { + DB *pdbp; + DB *sdbp; + DB_ENV *dbenv; + + const char *sec_db_file = "secondary.db"; + const char *pri_db_file = "primary.db"; + const char *env_dir = "TESTDIR"; + int i; + thread_t writer_threads[NUMWRITERS]; + u_int32_t db_flags, env_flags; + + pdbp = sdbp = NULL; + dbenv = NULL; + db_flags = DB_CREATE | DB_AUTO_COMMIT | DB_READ_UNCOMMITTED; + env_flags = DB_CREATE | DB_RECOVER | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN | DB_THREAD; + + TestEnvConfigTestSetup(ct); + + CuAssert(ct, "db_env_create", db_env_create(&dbenv, 0) == 0); + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, "TestKeyExistErrorReturn"); + + /* Run deadlock detector on every lock conflict. */ + CuAssert(ct, "dbenv->set_lk_detect", + dbenv->set_lk_detect(dbenv, DB_LOCK_MINWRITE) == 0); + + CuAssert(ct, "dbenv->open", + dbenv->open(dbenv, env_dir, env_flags, 0) == 0); + + CuAssert(ct, "db_create", db_create(&pdbp, dbenv, 0) == 0); + CuAssert(ct, "pdbp->open", pdbp->open(pdbp, NULL, + pri_db_file, NULL, DB_BTREE, db_flags, 0) == 0); + + CuAssert(ct, "db_create", db_create(&sdbp, dbenv, 0) == 0); + CuAssert(ct, "sdbp->set_flags", sdbp->set_flags(sdbp, + DB_DUPSORT) == 0); + CuAssert(ct, "sdbp->open", sdbp->open(sdbp, NULL, sec_db_file, + NULL, DB_BTREE, db_flags, 0) == 0); + + CuAssert(ct, "DB->associate", pdbp->associate(pdbp, NULL, sdbp, + assoc_callback, DB_AUTO_COMMIT) == 0); + + /* Initialize a mutex. Used to help provide thread ids. */ + (void)mutex_init(&thread_num_lock, NULL); + + for (i = 0; i < NUMWRITERS; ++i) + (void)thread_create(&writer_threads[i], NULL, + writer_thread, (void *)pdbp); + + for (i = 0; i < NUMWRITERS; ++i) + (void)thread_join(writer_threads[i], NULL); + + if (sdbp != NULL) + CuAssert(ct, "sdbp->close", sdbp->close(sdbp, 0) == 0); + if (pdbp != NULL) + CuAssert(ct, "pdbp->close", pdbp->close(pdbp, 0) == 0); + if (dbenv != NULL) + CuAssert(ct, "dbenv->close", dbenv->close(dbenv, 0) == 0); + + TestEnvConfigTestTeardown(ct); + + return (EXIT_SUCCESS); +} + +void * +writer_thread(void *args) +{ + DB *dbp; + DB_ENV *dbenv; + DBT key, data; + DB_TXN *txn; + + char *key_strings[] = {"001", "002", "003", "004", "005", + "006", "007", "008", "009", "010"}; + int i, j, payload, ret, thread_num; + int retry_count, max_retries = 20; + + dbp = (DB *)args; + dbenv = dbp->dbenv; + + /* Get the thread number */ + (void)mutex_lock(&thread_num_lock); + global_thread_num++; + thread_num = global_thread_num; + (void)mutex_unlock(&thread_num_lock); + + /* Initialize the random number generator */ + srand(thread_num); + + /* Write 50 times and then quit */ + for (i = 0; i < 50; i++) { + retry_count = 0; /* Used for deadlock retries */ +retry: + ret = dbenv->txn_begin(dbenv, NULL, &txn, 0); + if (ret != 0) { + dbenv->err(dbenv, ret, "txn_begin failed"); + return ((void *)EXIT_FAILURE); + } + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + for (j = 0; j < 10; j++) { + /* Set up our key and data DBTs. */ + data.data = key_strings[j]; + data.size = (u_int32_t)strlen(key_strings[j]) + 1; + + payload = rand() + i; + key.data = &payload; + key.size = sizeof(int); + + switch (ret = dbp->put(dbp, txn, &key, &data, + DB_NOOVERWRITE)) { + case 0: + break; + case DB_KEYEXIST: + break; + case DB_LOCK_DEADLOCK: + (void)txn->abort(txn); + if (retry_count < max_retries) { + retry_count++; + goto retry; + } + return ((void *)EXIT_FAILURE); + default: + dbenv->err(dbenv, ret, "db put failed"); + ret = txn->abort(txn); + + if (ret != 0) + dbenv->err(dbenv, ret, + "txn abort failed"); + return ((void *)EXIT_FAILURE); + } + } + + if ((ret = txn->commit(txn, 0)) != 0) { + dbenv->err(dbenv, ret, "txn commit failed"); + return ((void *)EXIT_FAILURE); + } + } + return ((void *)EXIT_SUCCESS); +} + +int +count_records(DB *dbp, DB_TXN *txn) +{ + DBT key, data; + DBC *cursorp; + int count, ret; + + cursorp = NULL; + count = 0; + + /* Get the cursor */ + ret = dbp->cursor(dbp, txn, &cursorp, DB_READ_UNCOMMITTED); + if (ret != 0) { + dbp->err(dbp, ret, "count_records: cursor open failed."); + goto cursor_err; + } + + /* Get the key DBT used for the database read */ + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + do { + ret = cursorp->get(cursorp, &key, &data, DB_NEXT); + switch (ret) { + case 0: + count++; + break; + case DB_NOTFOUND: + break; + default: + dbp->err(dbp, ret, "Count records unspecified error"); + goto cursor_err; + } + } while (ret == 0); + +cursor_err: + if (cursorp != NULL) { + ret = cursorp->close(cursorp); + if (ret != 0) { + dbp->err(dbp, ret, + "count_records: cursor close failed."); + } + } + + return (count); +} + +int +assoc_callback(pdbp, pkey, pdata, skey) + DB *pdbp; + const DBT *pkey; + const DBT *pdata; + DBT *skey; +{ + memset(skey, 0, sizeof(DBT)); + skey->data = pdata->data; + skey->size = pdata->size; + + return (EXIT_SUCCESS); +} diff --git a/test/c/suites/TestPartial.c b/test/c/suites/TestPartial.c new file mode 100644 index 00000000..8cd16d93 --- /dev/null +++ b/test/c/suites/TestPartial.c @@ -0,0 +1,380 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +/* + * Test combination of DB_DBT_PARTIAL and flags in DB->get, DB->pget, + * DBcursor->get, DBcursor->pget. + * + * TestPartial.c + */ +#include + +#include +#include +#include + +#include "db.h" +#include "CuTest.h" +#include "test_util.h" + +const char *dbName = "TestPartial.db"; +const char *sdbName = "TestPartialSec.db"; + +int expected = EINVAL; + +/* Test utilities declaration */ +static void CheckDbPartial(CuTest *, DB *, int, u_int32_t, int); +static void CheckCursorPartial(CuTest *, DB *, int, u_int32_t, int); +static int CloseDb(DB *); +static int Getkeystring(DB *, const DBT *, const DBT *, DBT *); +static void OpenDb(CuTest *, DB *, DB **, const char *, DBTYPE, u_int32_t); +static void PopulateDb(CuTest *, DB *, u_int32_t, u_int32_t); + +int TestPartialSuiteSetup(CuSuite *ct) { + return (0); +} + +int TestPartialSuiteTeardown(CuSuite *ct) { + return (0); +} + +int TestPartialTestSetup(CuTest *ct) { + remove(dbName); + remove(sdbName); + return (0); +} + +int TestPartialTestTeardown(CuTest *ct) { + remove(dbName); + remove(sdbName); + return (0); +} + +/* Test cases */ +int TestDbPartialGet(CuTest *ct) { + DB *pdb; + + OpenDb(ct, NULL, &pdb, dbName, DB_BTREE, 0); + PopulateDb(ct, pdb, 100, 1); + CheckDbPartial(ct, pdb, 0, 0, expected); + CheckDbPartial(ct, pdb, 0, DB_GET_BOTH, expected); + CuAssert(ct, "DB->close", CloseDb(pdb) == 0); + CuAssert(ct, "remove()", remove(dbName) == 0); + + OpenDb(ct, NULL, &pdb, dbName, DB_BTREE, DB_RECNUM); + PopulateDb(ct, pdb, 100, 1); + CheckDbPartial(ct, pdb, 0, DB_SET_RECNO, 0); + CuAssert(ct, "DB->close", CloseDb(pdb) == 0); + CuAssert(ct, "remove()", remove(dbName) == 0); + + OpenDb(ct, NULL, &pdb, dbName, DB_QUEUE, 0); + PopulateDb(ct, pdb, 100, 1); + CheckDbPartial(ct, pdb, 0, DB_CONSUME, 0); + CheckDbPartial(ct, pdb, 0, DB_CONSUME_WAIT, 0); + CuAssert(ct, "DB->close", CloseDb(pdb) == 0); + CuAssert(ct, "remove()", remove(dbName) == 0); + + return (0); +} + +int TestDbPartialPGet(CuTest *ct) { + DB *pdb, *sdb; + + OpenDb(ct, NULL, &pdb, dbName, DB_BTREE, 0); + OpenDb(ct, pdb, &sdb, sdbName, DB_BTREE, 0); + PopulateDb(ct, pdb, 100, 1); + CheckDbPartial(ct, sdb, 1, 0, expected); + CheckDbPartial(ct, sdb, 1, DB_GET_BOTH, expected); + CuAssert(ct, "DB->close", CloseDb(sdb) == 0); + CuAssert(ct, "DB->close", CloseDb(pdb) == 0); + CuAssert(ct, "remove()", remove(dbName) == 0); + CuAssert(ct, "remove()", remove(sdbName) == 0); + + OpenDb(ct, NULL, &pdb, dbName, DB_BTREE, 0); + OpenDb(ct, pdb, &sdb, sdbName, DB_BTREE, DB_RECNUM); + PopulateDb(ct, pdb, 100, 1); + CheckDbPartial(ct, sdb, 1, DB_SET_RECNO, 0); + CuAssert(ct, "DB->close", CloseDb(sdb) == 0); + CuAssert(ct, "DB->close", CloseDb(pdb) == 0); + CuAssert(ct, "remove()", remove(dbName) == 0); + CuAssert(ct, "remove()", remove(sdbName) == 0); + + /* DB_CONSUME makes no sense on a secondary index, so no test. */ + + return (0); +} + +int TestCursorPartialGet(CuTest *ct) { + DB *pdb; + + OpenDb(ct, NULL, &pdb, dbName, DB_BTREE, 0); + PopulateDb(ct, pdb, 100, 1); + CheckCursorPartial(ct, pdb, 0, DB_GET_BOTH, expected); + CheckCursorPartial(ct, pdb, 0, DB_GET_BOTH_RANGE, expected); + CheckCursorPartial(ct, pdb, 0, DB_SET, expected); + CheckCursorPartial(ct, pdb, 0, DB_SET_RANGE, 0); + CuAssert(ct, "DB->close", CloseDb(pdb) == 0); + CuAssert(ct, "remove()", remove(dbName) == 0); + + OpenDb(ct, NULL, &pdb, dbName, DB_BTREE, DB_RECNUM); + PopulateDb(ct, pdb, 100, 1); + CheckCursorPartial(ct, pdb, 0, DB_GET_RECNO, 0); + CheckCursorPartial(ct, pdb, 0, DB_SET_RECNO, 0); + CuAssert(ct, "DB->close", CloseDb(pdb) == 0); + CuAssert(ct, "remove()", remove(dbName) == 0); + + OpenDb(ct, NULL, &pdb, dbName, DB_BTREE, DB_DUP); + PopulateDb(ct, pdb, 100, 3); + CheckCursorPartial(ct, pdb, 0, DB_CURRENT, 0); + CheckCursorPartial(ct, pdb, 0, DB_FIRST, 0); + CheckCursorPartial(ct, pdb, 0, DB_LAST, 0); + CheckCursorPartial(ct, pdb, 0, DB_NEXT, 0); + CheckCursorPartial(ct, pdb, 0, DB_NEXT_DUP, 0); + CheckCursorPartial(ct, pdb, 0, DB_NEXT_NODUP, 0); + CheckCursorPartial(ct, pdb, 0, DB_PREV, 0); + CheckCursorPartial(ct, pdb, 0, DB_PREV_DUP, 0); + CheckCursorPartial(ct, pdb, 0, DB_PREV_NODUP, 0); + CuAssert(ct, "DB->close", CloseDb(pdb) == 0); + CuAssert(ct, "remove()", remove(dbName) == 0); + + return (0); +} + +int TestCursorPartialPGet(CuTest *ct) { + DB *pdb, *sdb; + + OpenDb(ct, NULL, &pdb, dbName, DB_BTREE, 0); + OpenDb(ct, pdb, &sdb, sdbName, DB_BTREE, 0); + PopulateDb(ct, pdb, 100, 1); + CheckCursorPartial(ct, sdb, 1, DB_GET_BOTH, expected); + CheckCursorPartial(ct, sdb, 1, DB_GET_BOTH_RANGE, expected); + CheckCursorPartial(ct, sdb, 1, DB_SET, expected); + CheckCursorPartial(ct, sdb, 1, DB_SET_RANGE, 0); + CuAssert(ct, "DB->close", CloseDb(sdb) == 0); + CuAssert(ct, "DB->close", CloseDb(pdb) == 0); + CuAssert(ct, "remove()", remove(dbName) == 0); + CuAssert(ct, "remove()", remove(sdbName) == 0); + + OpenDb(ct, NULL, &pdb, dbName, DB_BTREE, 0); + OpenDb(ct, pdb, &sdb, sdbName, DB_BTREE, DB_RECNUM); + PopulateDb(ct, pdb, 100, 1); + CheckCursorPartial(ct, sdb, 1, DB_SET_RECNO, 0); + CuAssert(ct, "DB->close", CloseDb(sdb) == 0); + CuAssert(ct, "DB->close", CloseDb(pdb) == 0); + CuAssert(ct, "remove()", remove(dbName) == 0); + CuAssert(ct, "remove()", remove(sdbName) == 0); + + return (0); +} + +/* Test utilities */ +static void CheckDbPartial(CuTest *ct, + DB *dbp, int isSec, u_int32_t flags, int value) { + DBT key, data, pkey; + char kstr[10], dstr[14]; + u_int32_t indx = 5, dupindx = 0; + db_recno_t rid = 5; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + memset(&pkey, 0, sizeof(DBT)); + memset(kstr, 0, sizeof(char) * 10); + memset(dstr, 0, sizeof(char) * 14); + + key.doff = 2; + key.dlen = 1; + key.flags = DB_DBT_PARTIAL; + if (flags == DB_CONSUME || flags == DB_CONSUME_WAIT || + flags == DB_SET_RECNO) { + if (flags == DB_SET_RECNO) { + key.data = &rid; + key.size = sizeof(db_recno_t); + } + } else if (flags == DB_GET_BOTH || flags == 0) { + memcpy(kstr, &indx, sizeof(u_int32_t)); + memcpy(kstr + 4, "hello", sizeof(char) * 5); + key.data = kstr; + key.size = 10; + if (flags == DB_GET_BOTH) { + if (isSec == 0) { + memcpy(dstr, kstr, sizeof(char) * 9); + memcpy(dstr + 9, &dupindx, sizeof(u_int32_t)); + data.data = dstr; + data.size = 14; + } else { + pkey.data = kstr; + pkey.size = 10; + } + } + } else { + fprintf(stderr, "Invalid test flags: %d\n", flags); + return; + } + + if (isSec == 0) { + CuAssert(ct, "DB->get", + dbp->get(dbp, NULL, &key, &data, flags) == value); + if (value == 0) + CuAssert(ct, "Partial DBT size", key.size == key.dlen); + } else { + CuAssert(ct, "DB->pget", + dbp->pget(dbp, NULL, &key, &pkey, &data, flags) == value); + if (value == 0) + CuAssert(ct, "Partial DBT size", key.size == key.dlen); + + /* Partial pkey is always invalid. */ + pkey.doff = 1; + pkey.dlen = 2; + pkey.flags = DB_DBT_PARTIAL; + CuAssert(ct, "DB->pget", dbp->pget(dbp, NULL, &key, &pkey, + &data, flags) == expected); + } +} + +static void CheckCursorPartial(CuTest *ct, + DB *dbp, int isSec, u_int32_t flags, int value) { + DBT key, data, pkey; + DBC *cursor = NULL; + char kstr[10], dstr[14]; + u_int32_t indx = 5, dupindx = 0; + db_recno_t rid = 5; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + memset(&pkey, 0, sizeof(DBT)); + memset(kstr, 0, sizeof(char) * 10); + memcpy(kstr, &indx, sizeof(u_int32_t)); + memcpy(kstr + 4, "hello", sizeof(char) * 5); + memset(dstr, 0, sizeof(char) * 14); + memcpy(dstr, kstr, sizeof(char) * 9); + memcpy(dstr + 9, &dupindx, sizeof(u_int32_t)); + + CuAssert(ct, "DB->cursor", dbp->cursor(dbp, NULL, &cursor, 0) == 0); + + key.doff = 2; + key.dlen = 1; + key.flags = DB_DBT_PARTIAL; + if (flags == DB_SET_RECNO) { + key.data = &rid; + key.size = sizeof(db_recno_t); + } else if (flags == DB_GET_BOTH || flags == DB_GET_BOTH_RANGE || + flags == DB_SET || flags == DB_SET_RANGE) { + key.data = kstr; + key.size = 10; + if (flags == DB_GET_BOTH || flags == DB_GET_BOTH_RANGE) { + if (isSec == 0) { + data.data = dstr; + data.size = 14; + } else { + pkey.data = kstr; + pkey.size = 10; + } + } + } else if (flags == DB_CURRENT || flags == DB_GET_RECNO || + flags == DB_NEXT || flags == DB_NEXT_DUP || + flags == DB_NEXT_NODUP) { + CuAssert(ct, "DBC->get", + cursor->get(cursor, &key, &data, DB_FIRST) == 0); + } else if (flags == DB_PREV || flags == DB_PREV_DUP || + flags == DB_PREV_NODUP) { + CuAssert(ct, "DBC->get", + cursor->get(cursor, &key, &data, DB_LAST) == 0); + } else { + if (flags != DB_FIRST && flags != DB_LAST) { + fprintf(stderr, "Invalid test flags: %d\n", flags); + return; + } + } + + if (isSec == 0) { + CuAssert(ct, "DBC->get", + cursor->get(cursor, &key, &data, flags) == value); + if (value == 0) + CuAssert(ct, "Partial DBT size", key.size == key.dlen); + } else { + CuAssert(ct, "DBC->pget", + cursor->pget(cursor, &key, &pkey, &data, flags) == value); + if (value == 0) + CuAssert(ct, "Partial DBT size", key.size == key.dlen); + + /* Partial pkey is always invalid. */ + pkey.doff = 1; + pkey.dlen = 2; + pkey.flags = DB_DBT_PARTIAL; + CuAssert(ct, "DBC->pget", cursor->pget(cursor, &key, &pkey, + &data, flags) == expected); + } + + cursor->close(cursor); +} + +static int CloseDb(DB *dbp) { + return dbp->close(dbp, 0); +} + +static void OpenDb(CuTest *ct, DB *ldbp, DB **dbpp, const char *dbName, + DBTYPE type, u_int32_t openFlags) { + DB *dbp = NULL; + + CuAssert(ct, "db_create", db_create(&dbp, NULL, 0) == 0); + dbp->set_errcall(dbp, NULL); + CuAssert(ct, "DB->set_flags", dbp->set_flags(dbp, openFlags) == 0); + if (type == DB_QUEUE) + CuAssert(ct, "DB->set_re_len", + dbp->set_re_len(dbp, ldbp == NULL ? 14 : 10) == 0); + CuAssert(ct, "DB->open", dbp->open(dbp, NULL, dbName, NULL, type, + DB_CREATE, 0) == 0); + if (ldbp != NULL) + CuAssert(ct, "DB->associate", ldbp->associate(ldbp, NULL, dbp, + Getkeystring, 0) == 0); + *dbpp = dbp; +} + +static int Getkeystring( + DB *secondary, const DBT *pkey, const DBT *pdata, DBT *skey) +{ + memset(skey, 0, sizeof(DBT)); + skey->data = pdata->data; + skey->size = pkey->size; + return (0); +} + +static void PopulateDb(CuTest *ct, DB *db, u_int32_t nkeys, u_int32_t ndups) { + DBT key, data; + char kstr[10], dstr[14]; + DBTYPE type; + u_int32_t i, j, start; + + start = 0; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + memset(kstr, 0, sizeof(char) * 10); + memcpy(kstr + 4, "hello", 5 * sizeof(char)); + memset(dstr, 0, sizeof(char) * 14); + + CuAssert(ct, "DB->get_type", db->get_type(db, &type) == 0); + + for (i = 1; i <= nkeys; i++) { + key.data = kstr; + if (type == DB_BTREE || type == DB_HASH) { + memcpy(kstr, &i, sizeof(u_int32_t)); + key.size = sizeof(char) * 10; + } else { + memcpy(kstr, &i, sizeof(u_int32_t)); + key.size = sizeof(db_recno_t); + } + for (j = 0; j < ndups; j++) { + memcpy(dstr, kstr, sizeof(char) * 9); + memcpy(dstr + 9, &j, sizeof(u_int32_t)); + data.data = dstr; + data.size = sizeof(char) * 14; + + CuAssert(ct, "DB->put", + db->put(db, NULL, &key, &data, 0) == 0); + } + } +} diff --git a/test/c/suites/TestQueue.c b/test/c/suites/TestQueue.c new file mode 100644 index 00000000..c6f48331 --- /dev/null +++ b/test/c/suites/TestQueue.c @@ -0,0 +1,845 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. + * + * A C test for the queue access method. + * TODO: Make this more consistent with the CuTest harness. + */ + +#include "db.h" +#include "CuTest.h" + +#include + +#include +#include +#include +#include + +#ifndef _WINDOWS +#include +#include +#endif + +#include "dbinc/queue.h" +#include "dbinc/shqueue.h" + +typedef enum { + FORWARD_WALK_FAILED = 1, + FOREACH_WALK_FAILED, + LIST_END_NOT_MARKED_FAILURE, + PREV_WALK_FAILED, + REVERSE_FOREACH_WALK_FAILED, + EXPECTED_HEAD_FAILED +} FAILURE_REASON; + +const char *failure_reason_names[] = { + "", + "walking the list using the _NEXT forward failed", + "walking the list using the _FOREACH macro failed", + "what was expected to be the last element wasn't marked as such", + "walking the list using the _PREV macro failed", + "walking the list using the _REVERSE_FOREACH macro failed", + "expected to be at the head of the list" +}; + +SH_LIST_HEAD(sh_lq); +struct sh_le { + char content; + SH_LIST_ENTRY sh_les; +}; + +/* create a string from the content of a list queue */ +char * +sh_l_as_string(l) + struct sh_lq *l; +{ + static char buf[1024]; + struct sh_le *ele = SH_LIST_FIRST(l, sh_le); + int i = 1; + + buf[0] = '"'; + while (ele != NULL) { + buf[i] = ele->content; + ele = SH_LIST_NEXT(ele, sh_les, sh_le); + if (ele != NULL) + buf[++i] = ' '; + i++; + } + buf[i++] = '"'; + buf[i] = '\0'; + return buf; +} + +/* init a list queue */ +struct sh_lq * +sh_l_init(items) + const char *items; +{ + const char *c = items; + struct sh_le *ele = NULL, *last_ele = (struct sh_le*)-1; + struct sh_lq *l = calloc(1, sizeof(struct sh_lq)); + + SH_LIST_INIT(l); + + while (*c != '\0') { + if (c[0] != ' ') { + last_ele = ele; + ele = calloc(1, sizeof(struct sh_le)); + ele->content = c[0]; + if (SH_LIST_EMPTY(l)) + SH_LIST_INSERT_HEAD(l, ele, sh_les, sh_le); + else + SH_LIST_INSERT_AFTER( + last_ele, ele, sh_les, sh_le); + } + c++; + } + return (l); +} + +struct sh_lq * +sh_l_remove_head(l) + struct sh_lq *l; +{ + struct sh_le *ele = SH_LIST_FIRST(l, sh_le); + + SH_LIST_REMOVE_HEAD(l, sh_les, sh_le); + if (ele != NULL) + free(ele); + + return (l); +} + +struct sh_lq * +sh_l_remove_tail(l) + struct sh_lq *l; +{ + struct sh_le *ele = SH_LIST_FIRST(l, sh_le); + + if (SH_LIST_EMPTY(l)) + return (l); + + while (SH_LIST_NEXT(ele, sh_les, sh_le) != NULL) + ele = SH_LIST_NEXT(ele, sh_les, sh_le); + + if (ele) { + SH_LIST_REMOVE(ele, sh_les, sh_le); + free(ele); + } + return (l); +} + +struct sh_lq * +sh_l_remove_item(l, item) + struct sh_lq *l; + const char *item; +{ + struct sh_le *ele = SH_LIST_FIRST(l, sh_le); + + while (ele != NULL) { + if (ele->content == item[0]) + break; + ele = SH_LIST_NEXT(ele, sh_les, sh_le); + } + if (ele) + SH_LIST_REMOVE(ele, sh_les, sh_le); + return (l); +} + +struct sh_lq * +sh_l_insert_head(l, item) + struct sh_lq *l; + const char *item; +{ + struct sh_le *ele = calloc(1, sizeof(struct sh_le)); + + ele->content = item[0]; + SH_LIST_INSERT_HEAD(l, ele, sh_les, sh_le); + + return (l); +} + +struct sh_lq * +sh_l_insert_tail(l, item) + struct sh_lq *l; + const char *item; +{ + struct sh_le *ele = NULL; + struct sh_le *last_ele = SH_LIST_FIRST(l, sh_le); + + if (last_ele != NULL) + while (SH_LIST_NEXT(last_ele, sh_les, sh_le) != NULL) + last_ele = SH_LIST_NEXT(last_ele, sh_les, sh_le); + + if (last_ele == NULL) { + ele = calloc(1, sizeof(struct sh_le)); + ele->content = item[0]; + SH_LIST_INSERT_HEAD(l, ele, sh_les, sh_le); + } else { + ele = calloc(1, sizeof(struct sh_le)); + ele->content = item[0]; + SH_LIST_INSERT_AFTER(last_ele, ele, sh_les, sh_le); + } + + return (l); +} + +struct sh_lq * +sh_l_insert_before(l, item, before_item) + struct sh_lq *l; + const char *item; + const char *before_item; +{ + struct sh_le *ele = NULL; + struct sh_le *before_ele = SH_LIST_FIRST(l, sh_le); + + while (before_ele != NULL) { + if (before_ele->content == before_item[0]) + break; + before_ele = SH_LIST_NEXT(before_ele, sh_les, sh_le); + } + if (before_ele != NULL) { + ele = calloc(1, sizeof(struct sh_le)); + ele->content = item[0]; + SH_LIST_INSERT_BEFORE(l, before_ele, ele, sh_les, sh_le); + } + return (l); +} + +struct sh_lq * +sh_l_insert_after(l, item, after_item) + struct sh_lq *l; + const char *item; + const char *after_item; +{ + struct sh_le *ele = NULL; + struct sh_le *after_ele = SH_LIST_FIRST(l, sh_le); + + while (after_ele != NULL) { + if (after_ele->content == after_item[0]) + break; + after_ele = SH_LIST_NEXT(after_ele, sh_les, sh_le); + } + if (after_ele != NULL) { + ele = calloc(1, sizeof(struct sh_le)); + ele->content = item[0]; + SH_LIST_INSERT_AFTER(after_ele, ele, sh_les, sh_le); + } + return (l); +} + +void +sh_l_discard(l) + struct sh_lq *l; +{ + struct sh_le *ele = NULL; + + while ((ele = SH_LIST_FIRST(l, sh_le)) != NULL) { + SH_LIST_REMOVE(ele, sh_les, sh_le); + free(ele); + } + + free(l); +} + +int +sh_l_verify(l, items) + struct sh_lq *l; + const char *items; +{ + const char *c = items; + struct sh_le *ele = NULL, *lele = NULL; + int i = 0, nele = 0; + + while (*c != '\0') { + if (c[0] != ' ') + nele++; + c++; + } + + /* use the FOREACH macro to walk the list */ + c = items; + i = 0; + SH_LIST_FOREACH(ele, l, sh_les, sh_le) { + if (ele->content != c[0]) + return (FOREACH_WALK_FAILED); + i++; + c +=2; + } + if (i != nele) + return (FOREACH_WALK_FAILED); + i = 0; + if (items[0] != '\0') { + /* walk the list forward */ + c = items; + ele = SH_LIST_FIRST(l, sh_le); + while (*c != '\0') { + lele = ele; + if (c[0] != ' ') { + if (ele->content != c[0]) + return (FORWARD_WALK_FAILED); + i++; + ele = SH_LIST_NEXT(ele, sh_les, sh_le); + } + c++; + } + ele = lele; + + if (i != nele) + return (FOREACH_WALK_FAILED); + + /* ele should be the last element in the list... */ + /* ... so sle_next should be -1 */ + if (ele->sh_les.sle_next != -1) + return (LIST_END_NOT_MARKED_FAILURE); + + /* and NEXT needs to be NULL */ + if (SH_LIST_NEXT(ele, sh_les, sh_le) != NULL) + return (LIST_END_NOT_MARKED_FAILURE); + + /* + * walk the list backwards using PREV macro, first move c + * back a bit + */ + c--; + i = 0; + while (c >= items) { + if (c[0] != ' ') { + lele = ele; + if (ele->content != c[0]) + return (PREV_WALK_FAILED); + ele = SH_LIST_PREV(ele, sh_les, sh_le); + i++; + } + c--; + } + ele = lele; + + if (i != nele) + return (PREV_WALK_FAILED); + + if (ele != SH_LIST_FIRST(l, sh_le)) + return (EXPECTED_HEAD_FAILED); + } + return (0); +} + +SH_TAILQ_HEAD(sh_tq); +struct sh_te { + char content; + SH_TAILQ_ENTRY sh_tes; +}; + +/* create a string from the content of a list queue */ +char * +sh_t_as_string(l) + struct sh_tq *l; +{ + static char buf[1024]; + struct sh_te *ele = SH_TAILQ_FIRST(l, sh_te); + int i = 1; + + buf[0] = '"'; + while (ele != NULL) { + buf[i] = ele->content; + ele = SH_TAILQ_NEXT(ele, sh_tes, sh_te); + if (ele != NULL) + buf[++i] = ' '; + i++; + } + buf[i++] = '"'; + buf[i] = '\0'; + return (buf); +} + +/* init a tail queue */ +struct sh_tq * +sh_t_init(items) + const char *items; +{ + const char *c = items; + struct sh_te *ele = NULL, *last_ele = (struct sh_te*)-1; + struct sh_tq *l = calloc(1, sizeof(struct sh_tq)); + + SH_TAILQ_INIT(l); + + while (*c != '\0') { + if (c[0] != ' ') { + ele = calloc(1, sizeof(struct sh_te)); + ele->content = c[0]; + + if (SH_TAILQ_EMPTY(l)) + SH_TAILQ_INSERT_HEAD(l, ele, sh_tes, sh_te); + else + SH_TAILQ_INSERT_AFTER( + l, last_ele, ele, sh_tes, sh_te); + last_ele = ele; + } + c++; + } + return (l); +} + +struct sh_tq * +sh_t_remove_head(l) + struct sh_tq *l; +{ + struct sh_te *ele = SH_TAILQ_FIRST(l, sh_te); + + if (ele != NULL) + SH_TAILQ_REMOVE(l, ele, sh_tes, sh_te); + + free(ele); + + return (l); +} + +struct sh_tq * +sh_t_remove_tail(l) + struct sh_tq *l; +{ + struct sh_te *ele = SH_TAILQ_FIRST(l, sh_te); + + if (SH_TAILQ_EMPTY(l)) + return (l); + + while (SH_TAILQ_NEXT(ele, sh_tes, sh_te) != NULL) + ele = SH_TAILQ_NEXT(ele, sh_tes, sh_te); + + if (ele != NULL) { + SH_TAILQ_REMOVE(l, ele, sh_tes, sh_te); + free(ele); + } + + return (l); +} + +struct sh_tq * +sh_t_remove_item(l, item) + struct sh_tq *l; + const char *item; +{ + struct sh_te *ele = SH_TAILQ_FIRST(l, sh_te); + + while (ele != NULL) { + if (ele->content == item[0]) + break; + ele = SH_TAILQ_NEXT(ele, sh_tes, sh_te); + } + if (ele != NULL) + SH_TAILQ_REMOVE(l, ele, sh_tes, sh_te); + + return (l); +} + +struct sh_tq * +sh_t_insert_head(l, item) + struct sh_tq *l; + const char *item; +{ + struct sh_te *ele = calloc(1, sizeof(struct sh_te)); + + ele->content = item[0]; + SH_TAILQ_INSERT_HEAD(l, ele, sh_tes, sh_te); + + return (l); +} + +struct sh_tq * +sh_t_insert_tail(l, item) + struct sh_tq *l; + const char *item; +{ + struct sh_te *ele = 0; + ele = calloc(1, sizeof(struct sh_te)); + ele->content = item[0]; + SH_TAILQ_INSERT_TAIL(l, ele, sh_tes); + return l; +} + +struct sh_tq * +sh_t_insert_before(l, item, before_item) + struct sh_tq *l; + const char *item; + const char *before_item; +{ + struct sh_te *ele = NULL; + struct sh_te *before_ele = SH_TAILQ_FIRST(l, sh_te); + + while (before_ele != NULL) { + if (before_ele->content == before_item[0]) + break; + before_ele = SH_TAILQ_NEXT(before_ele, sh_tes, sh_te); + } + + if (before_ele != NULL) { + ele = calloc(1, sizeof(struct sh_te)); + ele->content = item[0]; + SH_TAILQ_INSERT_BEFORE(l, before_ele, ele, sh_tes, sh_te); + } + + return (l); +} + +struct sh_tq * +sh_t_insert_after(l, item, after_item) + struct sh_tq *l; + const char *item; + const char *after_item; +{ + struct sh_te *ele = NULL; + struct sh_te *after_ele = SH_TAILQ_FIRST(l, sh_te); + + while (after_ele != NULL) { + if (after_ele->content == after_item[0]) + break; + after_ele = SH_TAILQ_NEXT(after_ele, sh_tes, sh_te); + } + + if (after_ele != NULL) { + ele = calloc(1, sizeof(struct sh_te)); + ele->content = item[0]; + SH_TAILQ_INSERT_AFTER(l, after_ele, ele, sh_tes, sh_te); + } + + return (l); +} + +void +sh_t_discard(l) + struct sh_tq *l; +{ + struct sh_te *ele = NULL; + + while ((ele = SH_TAILQ_FIRST(l, sh_te)) != NULL) { + SH_TAILQ_REMOVE(l, ele, sh_tes, sh_te); + free(ele); + } + free(l); +} + +int +sh_t_verify(l, items) + struct sh_tq *l; + const char *items; +{ + const char *c = items, *b = NULL; + struct sh_te *ele = NULL, *lele = NULL; + int i = 0, nele = 0; + + while (*c != '\0') { + if (c[0] != ' ') + nele++; + c++; + } + + /* use the FOREACH macro to walk the list */ + c = items; + i = 0; + SH_TAILQ_FOREACH(ele, l, sh_tes, sh_te) { + if (ele->content != c[0]) + return (FOREACH_WALK_FAILED); + i++; + c +=2; + } + if (i != nele) + return (FOREACH_WALK_FAILED); + i = 0; + if (items[0] != '\0') { + /* walk the list forward */ + c = items; + ele = SH_TAILQ_FIRST(l, sh_te); + while (*c != '\0') { + lele = ele; + if (c[0] != ' ') { + if (ele->content != c[0]) + return (FORWARD_WALK_FAILED); + i++; + ele = SH_TAILQ_NEXT(ele, sh_tes, sh_te); + } + c++; + } + + if (i != nele) + return (FOREACH_WALK_FAILED); + + if (lele != SH_TAILQ_LAST(l, sh_tes, sh_te)) + return (LIST_END_NOT_MARKED_FAILURE); + ele = lele; + + /* ele should be the last element in the list... */ + /* ... so sle_next should be -1 */ + if (ele->sh_tes.stqe_next != -1) + return (LIST_END_NOT_MARKED_FAILURE); + + /* and NEXT needs to be NULL */ + if (SH_TAILQ_NEXT(ele, sh_tes, sh_te) != NULL) + return (LIST_END_NOT_MARKED_FAILURE); + + /* walk the list backwards using SH_LIST_PREV macro */ + c--; + b = c; + i = 0; + while (c >= items) { + if (c[0] != ' ') { + lele = ele; + if (ele->content != c[0]) + return (PREV_WALK_FAILED); + ele = SH_TAILQ_PREV(l, ele, sh_tes, sh_te); + i++; + } + c--; + } + ele = lele; + + if (i != nele) + return (PREV_WALK_FAILED); + + if (ele != SH_TAILQ_FIRST(l, sh_te)) + return (-1); + + /* c should be the last character in the array, walk backwards + from here using FOREACH_REVERSE and check the values again */ + c = b; + i = 0; + ele = SH_TAILQ_LAST(l, sh_tes, sh_te); + SH_TAILQ_FOREACH_REVERSE(ele, l, sh_tes, sh_te) { + if (ele->content != c[0]) + return (REVERSE_FOREACH_WALK_FAILED); + i++; + c -=2; + } + if (i != nele) + return (REVERSE_FOREACH_WALK_FAILED); + } + return (0); +} + +int +sh_t_verify_TAILQ_LAST(l, items) + struct sh_tq *l; + const char *items; +{ + const char *c = items; + struct sh_te *ele = NULL; + + c = items; + while (*c != '\0') { + c++; + } + if (c == items) { + /* items is empty, so last should be NULL */ + if (SH_TAILQ_LAST(l, sh_tes, sh_te) != NULL) + return (-1); + } else { + c--; + ele = SH_TAILQ_LAST(l, sh_tes, sh_te); + if (ele->content != c[0]) + return (-1); + } + return (0); +} + +typedef void *qds_t; +struct { + const char *name; + qds_t *(*f_init)(const char *); + qds_t *(*f_remove_head)(qds_t *); + qds_t *(*f_remove_tail)(qds_t *); + qds_t *(*f_remove_item)(qds_t *, const char *); + qds_t *(*f_insert_head)(qds_t *, const char *); + qds_t *(*f_insert_tail)(qds_t *, const char *); + qds_t *(*f_insert_before)(qds_t *, const char *, const char *); + qds_t *(*f_insert_after)(qds_t *, const char *, const char *); + qds_t *(*f_discard)(qds_t *); + char *(*f_as_string)(qds_t *); + int (*f_verify)(qds_t *, const char *); +} qfns[]= { +{ "sh_list", + (qds_t*(*)(const char *))sh_l_init, + (qds_t*(*)(qds_t *))sh_l_remove_head, + (qds_t*(*)(qds_t *))sh_l_remove_tail, + (qds_t*(*)(qds_t *, const char *))sh_l_remove_item, + (qds_t*(*)(qds_t *, const char *))sh_l_insert_head, + (qds_t*(*)(qds_t *, const char *))sh_l_insert_tail, + (qds_t*(*)(qds_t *, const char *, const char *))sh_l_insert_before, + (qds_t*(*)(qds_t *, const char *, const char *))sh_l_insert_after, + (qds_t*(*)(qds_t *))sh_l_discard, + (char *(*)(qds_t *))sh_l_as_string, + (int(*)(qds_t *, const char *))sh_l_verify }, +{ "sh_tailq", + (qds_t*(*)(const char *))sh_t_init, + (qds_t*(*)(qds_t *))sh_t_remove_head, + (qds_t*(*)(qds_t *))sh_t_remove_tail, + (qds_t*(*)(qds_t *, const char *))sh_t_remove_item, + (qds_t*(*)(qds_t *, const char *))sh_t_insert_head, + (qds_t*(*)(qds_t *, const char *))sh_t_insert_tail, + (qds_t*(*)(qds_t *, const char *, const char *))sh_t_insert_before, + (qds_t*(*)(qds_t *, const char *, const char *))sh_t_insert_after, + (qds_t*(*)(qds_t *))sh_t_discard, + (char *(*)(qds_t *))sh_t_as_string, + (int(*)(qds_t *, const char *))sh_t_verify } +}; + +typedef enum { + INSERT_BEFORE, + INSERT_AFTER, + INSERT_HEAD, + INSERT_TAIL, + REMOVE_HEAD, + REMOVE_ITEM, + REMOVE_TAIL, +} OP; + +const char *op_names[] = { + "INSERT_BEFORE", + "INSERT_AFTER", + "INSERT_HEAD", + "INSERT_TAIL", + "REMOVE_HEAD", + "REMOVE_ITEM", + "REMOVE_TAIL" }; + +struct { + char *init; /* initial state. */ + char *final; /* final state. */ + char *elem; /* element to operate on */ + char *insert; /* element to insert */ + OP op; /* operation. */ +} ops[] = { + + /* most operations on a empty list */ + { "", "", NULL, NULL, REMOVE_HEAD }, + { "", "", NULL, NULL, REMOVE_TAIL }, + { "", "A", NULL, "A", INSERT_HEAD }, + { "", "A", NULL, "A", INSERT_TAIL }, + + /* all operations on a one element list */ + { "A", "", NULL, NULL, REMOVE_HEAD }, + { "A", "", NULL, NULL, REMOVE_TAIL }, + { "A", "", "A", NULL, REMOVE_ITEM }, + { "B", "A B", NULL, "A", INSERT_HEAD }, + { "A", "A B", NULL, "B", INSERT_TAIL }, + { "B", "A B", "B", "A", INSERT_BEFORE }, + { "A", "A B", "A", "B", INSERT_AFTER }, + + /* all operations on a two element list */ + { "A B", "B", NULL, NULL, REMOVE_HEAD }, + { "A B", "A", NULL, NULL, REMOVE_TAIL }, + { "A B", "A", "B", NULL, REMOVE_ITEM }, + { "A B", "B", "A", NULL, REMOVE_ITEM }, + { "B C", "A B C", NULL, "A", INSERT_HEAD }, + { "A B", "A B C", NULL, "C", INSERT_TAIL }, + { "B C", "A B C", "B", "A", INSERT_BEFORE }, + { "A C", "A B C", "C", "B", INSERT_BEFORE }, + { "A C", "A B C", "A", "B", INSERT_AFTER }, + { "A C", "A C B", "C", "B", INSERT_AFTER }, + + /* all operations on a three element list */ + + { "A B C", "B C", NULL, NULL, REMOVE_HEAD }, + { "A B C", "A B", NULL, NULL, REMOVE_TAIL }, + { "A B C", "A B", "C", NULL, REMOVE_ITEM }, + { "A B C", "A C", "B", NULL, REMOVE_ITEM }, + { "A B C", "B C", "A", NULL, REMOVE_ITEM }, + { "B C D", "A B C D", NULL, "A", INSERT_HEAD }, + { "A B C", "A B C D", NULL, "D", INSERT_TAIL }, + { "A B C", "X A B C", "A", "X", INSERT_BEFORE }, + { "A B C", "A X B C", "B", "X", INSERT_BEFORE }, + { "A B C", "A B X C", "C", "X", INSERT_BEFORE }, + { "A B C", "A X B C", "A", "X", INSERT_AFTER }, + { "A B C", "A B X C", "B", "X", INSERT_AFTER }, + { "A B C", "A B C X", "C", "X", INSERT_AFTER }, +}; + +int TestQueue(CuTest *ct) { + void *list; + int fc, tc; /* tc is total count, fc is failed count */ + int eval, i, t, result; + + eval = 0; + for (t = 0; t < sizeof(qfns) / sizeof(qfns[0]); ++t) { + fc = tc = 0; + printf("TESTING: %s\n", qfns[t].name); + + for (i = 0; i < sizeof(ops) / sizeof(ops[0]); i++) { + list = qfns[t].f_init(ops[i].init); + result = qfns[t].f_verify(list, ops[i].init); + if (result == 0) { + fc++; + putchar('.'); + } else { + putchar('+'); /* + means failed before op */ + printf("\nVerify failed: %s\n", + failure_reason_names[result]); + eval = 1; + } + if (!strcmp("sh_tailq", qfns[t].name)) { + result = + sh_t_verify_TAILQ_LAST(list, ops[i].init); + } +#ifdef VERBOSE + printf("\ncase %d %s in %s init: \"%s\" desired: \"%s\" elem: \"%s\" insert: \"%s\"\n", + i, op_names[ops[i].op], qfns[t].name, + ops[i].init, ops[i].final, + ops[i].elem, ops[i].insert); + fflush(stdout); +#endif + tc++; + switch (ops[i].op) { + case REMOVE_HEAD: + qfns[t].f_remove_head(list); + break; + case REMOVE_TAIL: + qfns[t].f_remove_tail(list); + break; + case REMOVE_ITEM: + qfns[t].f_remove_item(list, ops[i].elem); + break; + case INSERT_HEAD: + qfns[t].f_insert_head(list, ops[i].insert); + break; + case INSERT_TAIL: + qfns[t].f_insert_tail(list, ops[i].insert); + break; + case INSERT_BEFORE: + qfns[t].f_insert_before( + list, ops[i].insert, ops[i].elem); + break; + case INSERT_AFTER: + qfns[t].f_insert_after( + list, ops[i].insert, ops[i].elem); + break; + } + if (!strcmp("sh_tailq", op_names[ops[i].op])) { + result = sh_t_verify_TAILQ_LAST(list, + ops[i].final); + } + if (result == 0) + result = qfns[t].f_verify(list, ops[i].final); + if (result == 0) { + fc++; + putchar('.'); + } else { + putchar('*'); /* * means failed after op */ + printf("\ncase %d %s in %s init: \"%s\" desired: \"%s\" elem: \"%s\" insert: \"%s\" got: %s - %s\n", + i, op_names[ops[i].op], qfns[t].name, + ops[i].init, ops[i].final, + ops[i].elem, ops[i].insert, + qfns[t].f_as_string(list), + failure_reason_names[result]); + fflush(stdout); + eval = 1; + } + + tc++; + qfns[t].f_discard(list); + } + + printf("\t%0.2f%% passed (%d/%d).\n", + (((double)fc/tc) * 100), fc, tc); + } + return (eval); +} diff --git a/test/c/test_api_methods.c b/test/c/test_api_methods.c new file mode 100644 index 00000000..fe39bfc8 --- /dev/null +++ b/test/c/test_api_methods.c @@ -0,0 +1,95 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +/* added to get clean compile on linux blade server else pread undefined */ +#ifdef __linux__ +#define __USE_UNIX98 +#endif +#include + +#include "db.h" + +#define E(api, func, name) { \ + if ((ret = api(func)) != 0) { \ + fprintf(stderr, "%s: %s", name, db_strerror(ret)); \ + return (1); \ + } \ +} + +#define F(api, func1, func2, name) { \ + if ((ret = api(func1, func2)) != 0) { \ + fprintf(stderr, "%s: %s", name, db_strerror(ret)); \ + return (1); \ + } \ +} + +void +dirfree(char **namesp, int cnt) +{ return; } +int +dirlist(const char *dir, char ***namesp, int *cntp) +{ return (0); } +int +exists(const char *path, int *isdirp) +{ return (0); } +int +ioinfo(const char *path, + int fd, u_int32_t *mbytesp, u_int32_t *bytesp, u_int32_t *iosizep) +{ return (0); } +int +file_map(DB_ENV *dbenv, char *path, size_t len, int is_readonly, void **addr) +{ return (0); } +int +region_map(DB_ENV *dbenv, char *path, size_t len, int *is_create, void **addr) +{ return (0); } +int +seek(int fd, off_t offset, int whence) +{ return (0); } +int +local_sleep(u_long seconds, u_long microseconds) +{ return (0); } +int +unmap(DB_ENV *dbenv, void *addr) +{ return (0); } + +int +main(int argc, char *argv[]) +{ + int ret; + + E(db_env_set_func_close, close, "close"); + E(db_env_set_func_dirfree, dirfree, "dirfree"); + E(db_env_set_func_dirlist, dirlist, "dirlist"); + E(db_env_set_func_exists, exists, "exists"); + F(db_env_set_func_file_map, file_map, unmap, "file map"); + E(db_env_set_func_free, free, "free"); + E(db_env_set_func_fsync, fsync, "fsync"); + E(db_env_set_func_ftruncate, ftruncate, "ftruncate"); + E(db_env_set_func_ioinfo, ioinfo, "ioinfo"); + E(db_env_set_func_malloc, malloc, "malloc"); + E(db_env_set_func_open, open, "open"); + E(db_env_set_func_pread, pread, "pread"); + E(db_env_set_func_pwrite, pwrite, "pwrite"); + E(db_env_set_func_read, read, "read"); + E(db_env_set_func_realloc, realloc, "realloc"); + F(db_env_set_func_region_map, region_map, unmap, "region map"); + E(db_env_set_func_rename, rename, "rename"); + E(db_env_set_func_seek, seek, "seek"); + E(db_env_set_func_unlink, unlink, "unlink"); + E(db_env_set_func_write, write, "write"); + E(db_env_set_func_yield, local_sleep, "sleep/yield"); + + return (0); +} diff --git a/test/c/test_log_verify.c b/test/c/test_log_verify.c new file mode 100644 index 00000000..1088f233 --- /dev/null +++ b/test/c/test_log_verify.c @@ -0,0 +1,265 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include "db_config.h" +#include "db_int.h" +#include "db.h" +#include "dbinc/log_verify.h" + +static int put_get_cmp_ckp __P((DB_LOG_VRFY_INFO *, VRFY_CKP_INFO *, + DB_LSN)); + +static int put_get_cmp_ts __P((DB_LOG_VRFY_INFO *, VRFY_TIMESTAMP_INFO *, + DB_LSN)); +static int put_get_cmp_freg __P((DB_LOG_VRFY_INFO *, VRFY_FILEREG_INFO *, + const DBT *)); +static int put_get_cmp __P((DB_LOG_VRFY_INFO *, VRFY_TXN_INFO *, u_int32_t)); +static int dbt_cmp __P((const DBT *, const DBT *)); +static int dbtarr_cmp __P((const DBT *, const DBT *, u_int32_t)); +/* + * __rem_last_recycle_lsn , clear_fileups __put_filelife, __del_filelife + * __get_filelife __get_filereg_by_dbregid __add_dbregid __get_last_ckp_info + * __get_latest_timestamp_info _find_lsnrg_by_timerg __add_txnrange + * __get_aborttxn __txn_started __add_page_to_txn __del_txn_pages + */ +int +main(argc, argv) + int argc; + char **argv; +{ + int i, ret; + DB_LOG_VERIFY_CONFIG cfg; + DB_LOG_VRFY_INFO *lvinfop; + VRFY_TXN_INFO txninfo; + VRFY_FILEREG_INFO freginfo; + VRFY_CKP_INFO ckpinfo; + VRFY_TIMESTAMP_INFO tsinfo; + DB_LSN rlsn; + char *buf; + u_int32_t bufsz; + DBT fid, *qdbt; + DB_THREAD_INFO *ip; + DB_ENV *dbenv; + + memset(&cfg, 0, sizeof(cfg)); + buf = malloc(bufsz = 2048);// trash bytes to make DBT fileids. + cfg.temp_envhome = NULL; + cfg.cachesize = 8 * 1024 * 1024; + + lvinfop = NULL; + memset(&txninfo, 0, sizeof(txninfo)); + memset(&freginfo, 0, sizeof(freginfo)); + memset(&ckpinfo, 0, sizeof(ckpinfo)); + memset(&tsinfo, 0, sizeof(tsinfo)); + memset(&fid, 0, sizeof(fid)); + db_env_create(&dbenv, 0); + dbenv->open(dbenv, NULL, DB_CREATE | DB_INIT_MPOOL, 0644); + + ENV_ENTER(dbenv->env, ip); + if (__create_log_vrfy_info(&cfg, &lvinfop, ip)) + return -1; + + + txninfo.txnid = 80000001; + rlsn.file = 1; + put_get_cmp(lvinfop, &txninfo, txninfo.txnid); + for (i = 1000; i <= 2000; i += 100) { + rlsn.offset = i; + if (ret = __add_recycle_lsn_range(lvinfop, &rlsn, 80000000, 80000300)) + goto err; + if (ret = put_get_cmp(lvinfop, &txninfo, txninfo.txnid)) + goto err; + if (i % 200) { + fid.data = buf + abs(rand()) % (bufsz / 2); + fid.size = (char *)fid.data - buf; + if (ret = __add_file_updated(&txninfo, &fid, i)) + goto err; + } + if ((i % 200 == 0) && (ret = __del_file_updated(&txninfo, &fid))) + goto err; + if (ret = put_get_cmp(lvinfop, &txninfo, txninfo.txnid)) + goto err; + } + freginfo.fileid = fid; + freginfo.fname = "mydb.db"; + if (ret = put_get_cmp_freg(lvinfop, &freginfo, &freginfo.fileid)) + goto err; + + ckpinfo.lsn.file = 2; + ckpinfo.lsn.offset = 3201; + ckpinfo.ckplsn.file = 2; + ckpinfo.ckplsn.offset = 2824; + if (ret = put_get_cmp_ckp(lvinfop, &ckpinfo, ckpinfo.lsn)) + goto err; + + tsinfo.lsn.file = 1; + tsinfo.lsn.offset = 829013; + tsinfo.timestamp = time(NULL); + tsinfo.logtype = 123; + if (ret = put_get_cmp_ts(lvinfop, &tsinfo, tsinfo.lsn)) + goto err; + +err: + __destroy_log_vrfy_info(lvinfop); + ENV_LEAVE(dbenv->env, ip); + dbenv->close(dbenv, 0); + return ret; +} + +static int +put_get_cmp_ckp(lvinfop, ckp, lsn) + DB_LOG_VRFY_INFO *lvinfop; + VRFY_CKP_INFO *ckp; + DB_LSN lsn; +{ + int ret; + VRFY_CKP_INFO *ckppp; + + ckppp = NULL; + if (ret = __put_ckp_info(lvinfop, ckp)) + goto err; + + if (ret = __get_ckp_info(lvinfop, lsn, &ckppp)) + goto err; + if (memcmp(ckp, ckppp, sizeof(VRFY_CKP_INFO))) { + fprintf(stderr, +"\n__get_ckp_info got different ckp info than the one put by __put_ckp_info"); + goto err; + } +err: + if (ckppp) + __os_free(NULL, ckppp); + if (ret) + printf("\nError in put_get_cmp_ckp"); + return ret; +} + +static int +put_get_cmp_ts(lvinfop, ts, lsn) + DB_LOG_VRFY_INFO *lvinfop; + VRFY_TIMESTAMP_INFO *ts; + DB_LSN lsn; +{ + int ret; + VRFY_TIMESTAMP_INFO *tsp; + + tsp = NULL; + if (ret = __put_timestamp_info(lvinfop, ts)) + goto err; + + if (ret = __get_timestamp_info(lvinfop, lsn, &tsp)) + goto err; + if (memcmp(ts, tsp, sizeof(VRFY_TIMESTAMP_INFO))) { + fprintf(stderr, +"\n__get_timestamp_info got different timestamp info than the one put by __put_timestamp_info"); + goto err; + } +err: + if (tsp) + __os_free(NULL, tsp); + if (ret) + printf("\nError in put_get_cmp_ts"); + return ret; +} + +static int +put_get_cmp_freg(lvinfop, freg, fid) + DB_LOG_VRFY_INFO *lvinfop; + VRFY_FILEREG_INFO *freg; + const DBT *fid; +{ + int ret; + VRFY_FILEREG_INFO *freginfop; + + freginfop = NULL; + if (ret = __put_filereg_info(lvinfop, freg)) + goto err; + + if (ret = __get_filereg_info(lvinfop, fid, &freginfop)) + goto err; + if (memcmp(freg, freginfop, FILE_REG_INFO_FIXSIZE) || + dbt_cmp(&(freg->fileid), &(freginfop->fileid)) || + strcmp(freg->fname, freginfop->fname)) { + fprintf(stderr, +"\n__get_filereg_info got different filereg info than the one put by __put_filereg_info"); + goto err; + } +err: + + if (freginfop) + __free_filereg_info(freginfop); + if (ret) + printf("\nError in put_get_cmp_freg"); + return ret; +} + + +static int +dbt_cmp(d1, d2) + const DBT *d1; + const DBT *d2; +{ + int ret; + + if (ret = d1->size - d2->size) + return ret; + + if (ret = memcmp(d1->data, d2->data, d1->size)) + return ret; + + return 0; +} + +static int +dbtarr_cmp(a1, a2, len) + const DBT *a1; + const DBT *a2; + u_int32_t len; +{ + int i, ret; + + for (i = 0; i < len; i++) { + if (ret = a1[i].size - a2[i].size) + return ret; + if (ret = memcmp(a1[i].data, a2[i].data, a1[i].size)) + return ret; + } + + return 0; +} + +static int +put_get_cmp(lvinfop, txninfo, tid) + DB_LOG_VRFY_INFO *lvinfop; + VRFY_TXN_INFO *txninfo; + u_int32_t tid; +{ + int ret; + VRFY_TXN_INFO *txninfop; + + txninfop = NULL; + if (ret = __put_txn_vrfy_info(lvinfop, txninfo)) + goto err; + + if (ret = __get_txn_vrfy_info(lvinfop, tid, &txninfop)) + goto err; + if (memcmp(txninfo, txninfop, TXN_VERIFY_INFO_FIXSIZE) || + memcmp(txninfo->recycle_lsns, txninfop->recycle_lsns, + sizeof(DB_LSN) * txninfo->num_recycle) || + dbtarr_cmp(txninfo->fileups, txninfop->fileups, + txninfop->filenum)) { + fprintf(stderr, +"\n__get_txn_vrfy_info got different txinfo than the one put by __put_txn_vrfy_info"); + goto err; + } +err: + if (txninfop) + __free_txninfo(txninfop); + if (ret) + printf("\nError in put_get_cmp"); + return ret; +} diff --git a/test/cxx/README b/test/cxx/README new file mode 100644 index 00000000..8671eb99 --- /dev/null +++ b/test/cxx/README @@ -0,0 +1,36 @@ +# $Id$ + +Use the scripts testall or testone to run all, or just one of the C++ +tests. You must be in this directory to run them. For example, + + $ export LIBS="-L/usr/include/BerkeleyDB/lib" + $ export CXXFLAGS="-I/usr/include/BerkeleyDB/include" + $ export LD_LIBRARY_PATH="/usr/include/BerkeleyDB/lib" + $ ./testone TestAppendRecno + $ ./testall + +The scripts will use c++ in your path. Set environment variables $CXX +to override this. It will also honor any $CXXFLAGS and $LIBS +variables that are set, except that -c are silently removed from +$CXXFLAGS (since we do the compilation in one step). + +To run successfully, you will probably need to set $LD_LIBRARY_PATH +to be the directory containing libdb_cxx-X.Y.so + +As an alternative, use the --prefix= option, a la configure +to set the top of the BerkeleyDB install directory. This forces +the proper options to be added to $LIBS, $CXXFLAGS $LD_LIBRARY_PATH. +For example, + + $ ./testone --prefix=/usr/include/BerkeleyDB TestAppendRecno + $ ./testall --prefix=/usr/include/BerkeleyDB + +The test framework is pretty simple. Any .cpp file in this +directory that is not mentioned in the 'ignore' file represents a +test. If the test is not compiled successfully, the compiler output +is left in .compileout . Otherwise, the java program is run in +a clean subdirectory using as input .testin, or if that doesn't +exist, /dev/null. Output and error from the test run are put into +.out, .err . If .testout, .testerr exist, +they are used as reference files and any differences are reported. +If either of the reference files does not exist, /dev/null is used. diff --git a/test/cxx/TestConstruct01.cpp b/test/cxx/TestConstruct01.cpp new file mode 100644 index 00000000..a21f6483 --- /dev/null +++ b/test/cxx/TestConstruct01.cpp @@ -0,0 +1,321 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * Do some regression tests for constructors. + * Run normally (without arguments) it is a simple regression test. + * Run with a numeric argument, it repeats the regression a number + * of times, to try to determine if there are memory leaks. + */ +#include + +#include +#include +#include +#include +#ifndef _MSC_VER +#include +#endif + +#include +#include + +using namespace std; + +#define ERR(a) \ + do { \ + cout << "FAIL: " << (a) << "\n"; sysexit(1); \ + } while (0) + +#define ERR2(a1,a2) \ + do { \ + cout << "FAIL: " << (a1) << ": " << (a2) << "\n"; sysexit(1); \ + } while (0) + +#define ERR3(a1,a2,a3) \ + do { \ + cout << "FAIL: " << (a1) << ": " << (a2) << ": " << (a3) << "\n"; sysexit(1); \ + } while (0) + +#define CHK(a) \ + do { \ + int _ret; \ + if ((_ret = (a)) != 0) { \ + ERR3("DB function " #a " has bad return", _ret, DbEnv::strerror(_ret)); \ + } \ + } while (0) + +#ifdef VERBOSE +#define DEBUGOUT(a) cout << a << "\n" +#else +#define DEBUGOUT(a) +#endif + +#define CONSTRUCT01_DBNAME "construct01.db" +#define CONSTRUCT01_DBDIR "." +#define CONSTRUCT01_DBFULLPATH (CONSTRUCT01_DBDIR "/" CONSTRUCT01_DBNAME) + +int itemcount; // count the number of items in the database + +// A good place to put a breakpoint... +// +void sysexit(int status) +{ + exit(status); +} + +void check_file_removed(const char *name, int fatal) +{ + unlink(name); +#if 0 + if (access(name, 0) == 0) { + if (fatal) + cout << "FAIL: "; + cout << "File \"" << name << "\" still exists after run\n"; + if (fatal) + sysexit(1); + } +#endif +} + +// Check that key/data for 0 - count-1 are already present, +// and write a key/data for count. The key and data are +// both "0123...N" where N == count-1. +// +// For some reason on Windows, we need to open using the full pathname +// of the file when there is no environment, thus the 'has_env' +// variable. +// +void rundb(Db *db, int count, int has_env) +{ + const char *name; + + if (has_env) + name = CONSTRUCT01_DBNAME; + else + name = CONSTRUCT01_DBFULLPATH; + + db->set_error_stream(&cerr); + + // We don't really care about the pagesize, but we do want + // to make sure adjusting Db specific variables works before + // opening the db. + // + CHK(db->set_pagesize(1024)); + CHK(db->open(NULL, name, NULL, DB_BTREE, count ? 0 : DB_CREATE, 0664)); + + // The bit map of keys we've seen + long bitmap = 0; + + // The bit map of keys we expect to see + long expected = (1 << (count+1)) - 1; + + char outbuf[10]; + int i; + for (i=0; iput(0, &key, &data, DB_NOOVERWRITE)); + + // Acquire a cursor for the table. + Dbc *dbcp; + CHK(db->cursor(NULL, &dbcp, 0)); + + // Walk through the table, checking + Dbt readkey; + Dbt readdata; + while (dbcp->get(&readkey, &readdata, DB_NEXT) == 0) { + char *key_string = (char *)readkey.get_data(); + char *data_string = (char *)readdata.get_data(); + DEBUGOUT("Got: " << key_string << ": " << data_string); + int len = strlen(key_string); + long bit = (1 << len); + if (len > count) { + ERR("reread length is bad"); + } + else if (strcmp(data_string, key_string) != 0) { + ERR("key/data don't match"); + } + else if ((bitmap & bit) != 0) { + ERR("key already seen"); + } + else if ((expected & bit) == 0) { + ERR("key was not expected"); + } + else { + bitmap |= bit; + expected &= ~(bit); + for (i=0; iclose()); + CHK(db->close(0)); +} + +void t1(int except_flag) +{ + cout << " Running test 1:\n"; + Db db(0, except_flag); + rundb(&db, itemcount++, 0); + cout << " finished.\n"; +} + +void t2(int except_flag) +{ + cout << " Running test 2:\n"; + Db db(0, except_flag); + rundb(&db, itemcount++, 0); + cout << " finished.\n"; +} + +void t3(int except_flag) +{ + cout << " Running test 3:\n"; + Db db(0, except_flag); + rundb(&db, itemcount++, 0); + cout << " finished.\n"; +} + +void t4(int except_flag) +{ + cout << " Running test 4:\n"; + DbEnv env(except_flag); + CHK(env.open(CONSTRUCT01_DBDIR, DB_CREATE | DB_INIT_MPOOL, 0)); + Db db(&env, 0); + CHK(db.close(0)); + CHK(env.close(0)); + cout << " finished.\n"; +} + +void t5(int except_flag) +{ + cout << " Running test 5:\n"; + DbEnv env(except_flag); + CHK(env.open(CONSTRUCT01_DBDIR, DB_CREATE | DB_INIT_MPOOL, 0)); + Db db(&env, 0); + rundb(&db, itemcount++, 1); + // Note we cannot reuse the old Db! + Db anotherdb(&env, 0); + + anotherdb.set_errpfx("test5"); + rundb(&anotherdb, itemcount++, 1); + CHK(env.close(0)); + cout << " finished.\n"; +} + +void t6(int except_flag) +{ + cout << " Running test 6:\n"; + + /* From user [#2939] */ + int err; + + DbEnv* penv = new DbEnv(DB_CXX_NO_EXCEPTIONS); + penv->set_cachesize(0, 32 * 1024, 0); + penv->open(CONSTRUCT01_DBDIR, DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL, 0); + + //LEAK: remove this block and leak disappears + Db* pdb = new Db(penv,0); + if ((err = pdb->close(0)) != 0) { + fprintf(stderr, "Error closing Db: %s\n", db_strerror(err)); + } + delete pdb; + //LEAK: remove this block and leak disappears + + if ((err = penv->close(0)) != 0) { + fprintf(stderr, "Error closing DbEnv: %s\n", db_strerror(err)); + } + delete penv; + + cout << " finished.\n"; +} + +// remove any existing environment or database +void removeall() +{ + { + DbEnv tmpenv(DB_CXX_NO_EXCEPTIONS); + (void)tmpenv.remove(CONSTRUCT01_DBDIR, DB_FORCE); + } + + check_file_removed(CONSTRUCT01_DBFULLPATH, 1); + for (int i=0; i<8; i++) { + char buf[20]; + sprintf(buf, "__db.00%d", i); + check_file_removed(buf, 1); + } +} + +int doall(int except_flag) +{ + itemcount = 0; + try { + // before and after the run, removing any + // old environment/database. + // + removeall(); + t1(except_flag); + t2(except_flag); + t3(except_flag); + t4(except_flag); + t5(except_flag); + t6(except_flag); + + removeall(); + return 0; + } + catch (DbException &dbe) { + ERR2("EXCEPTION RECEIVED", dbe.what()); + } + return 1; +} + +int main(int argc, char *argv[]) +{ + int iterations = 1; + if (argc > 1) { + iterations = atoi(argv[1]); + if (iterations < 0) { + ERR("Usage: construct01 count"); + } + } + for (int i=0; i +#include + +using namespace std; + +int main(int argc, char *argv[]) +{ + try { + DbEnv *dbenv = new DbEnv(0); + DbTxn *dbtxn; + u_int8_t conflicts[10]; + + dbenv->set_error_stream(&cerr); + dbenv->set_timeout(0x90000000, + DB_SET_LOCK_TIMEOUT); + dbenv->set_lg_bsize(0x1000); + dbenv->set_lg_dir("."); + dbenv->set_lg_max(0x10000000); + dbenv->set_lg_regionmax(0x100000); + dbenv->set_lk_conflicts(conflicts, sizeof(conflicts)); + dbenv->set_lk_detect(DB_LOCK_DEFAULT); + dbenv->set_lk_max_lockers(100); + dbenv->set_lk_max_locks(10); + dbenv->set_lk_max_objects(1000); + dbenv->set_mp_mmapsize(0x10000); + + // Need to open the environment so we + // can get a transaction. + // + dbenv->open(".", DB_CREATE | DB_INIT_TXN | + DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL, + 0644); + + dbenv->txn_begin(NULL, &dbtxn, DB_TXN_NOWAIT); + dbtxn->set_timeout(0xA0000000, DB_SET_TXN_TIMEOUT); + dbtxn->abort(); + + dbenv->close(0); + + // We get a db, one for each type. + // That's because once we call (for instance) + // set_bt_minkey, DB 'knows' that this is a + // Btree Db, and it cannot be used to try Hash + // or Recno functions. + // + Db *db_bt = new Db(NULL, 0); + db_bt->set_bt_minkey(100); + db_bt->set_cachesize(0, 0x100000, 0); + db_bt->close(0); + + Db *db_h = new Db(NULL, 0); + db_h->set_h_ffactor(0x10); + db_h->set_h_nelem(100); + db_h->set_lorder(0); + db_h->set_pagesize(0x10000); + db_h->close(0); + + Db *db_re = new Db(NULL, 0); + db_re->set_re_delim('@'); + db_re->set_re_pad(10); + db_re->set_re_source("re.in"); + db_re->close(0); + + Db *db_q = new Db(NULL, 0); + db_q->set_q_extentsize(200); + db_q->close(0); + + } + catch (DbException &dbe) { + cerr << "Db Exception: " << dbe.what() << "\n"; + } + return 0; +} diff --git a/test/cxx/TestKeyRange.cpp b/test/cxx/TestKeyRange.cpp new file mode 100644 index 00000000..c64ff43c --- /dev/null +++ b/test/cxx/TestKeyRange.cpp @@ -0,0 +1,169 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * NOTE: AccessExample changed to test Db.key_range. We made a global + * change of /AccessExample/TestKeyRange/, the only other changes are + * marked with comments that are notated as 'ADDED'. + */ +#include + +#include +#include +#include +#include +#ifndef _MSC_VER +#include +#endif + +#include +#include + +using namespace std; +class TestKeyRange +{ +public: + TestKeyRange(); + void run(); + +private: + static const char FileName[]; + + // no need for copy and assignment + TestKeyRange(const TestKeyRange &); + void operator = (const TestKeyRange &); +}; + +static void usage(); // forward + +int main(int argc, char *argv[]) +{ + if (argc > 1) { + usage(); + } + + // Use a try block just to report any errors. + // An alternate approach to using exceptions is to + // use error models (see DbEnv::set_error_model()) so + // that error codes are returned for all Berkeley DB methods. + // + try { + TestKeyRange app; + app.run(); + return 0; + } + catch (DbException &dbe) { + cerr << "TestKeyRange: " << dbe.what() << "\n"; + return 1; + } +} + +static void usage() +{ + cerr << "usage: TestKeyRange\n"; + exit(1); +} + +const char TestKeyRange::FileName[] = "access.db"; + +TestKeyRange::TestKeyRange() +{ +} + +void TestKeyRange::run() +{ + // Remove the previous database. + (void)unlink(FileName); + + // Create the database object. + // There is no environment for this simple example. + Db db(0, 0); + + db.set_error_stream(&cerr); + db.set_errpfx("TestKeyRange"); + db.set_pagesize(1024); /* Page size: 1K. */ + db.set_cachesize(0, 32 * 1024, 0); + db.open(NULL, FileName, NULL, DB_BTREE, DB_CREATE, 0664); + + // + // Insert records into the database, where the key is the user + // input and the data is the user input in reverse order. + // + char buf[1024]; + char rbuf[1024]; + char *t; + char *p; + int ret; + int len; + Dbt *firstkey = NULL; + char firstbuf[1024]; + + for (;;) { + cout << "input>"; + cout.flush(); + + cin.getline(buf, sizeof(buf)); + if (cin.eof()) + break; + + if ((len = strlen(buf)) <= 0) + continue; + for (t = rbuf, p = buf + (len - 1); p >= buf;) + *t++ = *p--; + *t++ = '\0'; + + Dbt key(buf, len + 1); + Dbt data(rbuf, len + 1); + if (firstkey == NULL) { + strcpy(firstbuf, buf); + firstkey = new Dbt(firstbuf, len + 1); + } + + ret = db.put(0, &key, &data, DB_NOOVERWRITE); + if (ret == DB_KEYEXIST) { + cout << "Key " << buf << " already exists.\n"; + } + cout << "\n"; + } + + // We put a try block around this section of code + // to ensure that our database is properly closed + // in the event of an error. + // + try { + // Acquire a cursor for the table. + Dbc *dbcp; + db.cursor(NULL, &dbcp, 0); + + /*ADDED...*/ + DB_KEY_RANGE range; + memset(&range, 0, sizeof(range)); + + db.key_range(NULL, firstkey, &range, 0); + printf("less: %f\n", range.less); + printf("equal: %f\n", range.equal); + printf("greater: %f\n", range.greater); + /*end ADDED*/ + + Dbt key; + Dbt data; + + // Walk through the table, printing the key/data pairs. + while (dbcp->get(&key, &data, DB_NEXT) == 0) { + char *key_string = (char *)key.get_data(); + char *data_string = (char *)data.get_data(); + cout << key_string << " : " << data_string << "\n"; + } + dbcp->close(); + } + catch (DbException &dbe) { + cerr << "TestKeyRange: " << dbe.what() << "\n"; + } + + db.close(0); +} diff --git a/test/cxx/TestKeyRange.testin b/test/cxx/TestKeyRange.testin new file mode 100644 index 00000000..a2b6bd74 --- /dev/null +++ b/test/cxx/TestKeyRange.testin @@ -0,0 +1,8 @@ +first line is alphabetically somewhere in the middle. +Blah blah +let's have exactly eight lines of input. +stuff +more stuff +and even more stuff +lastly +but not leastly. diff --git a/test/cxx/TestKeyRange.testout b/test/cxx/TestKeyRange.testout new file mode 100644 index 00000000..25b2e1a8 --- /dev/null +++ b/test/cxx/TestKeyRange.testout @@ -0,0 +1,19 @@ +input> +input> +input> +input> +input> +input> +input> +input> +input>less: 0.375000 +equal: 0.125000 +greater: 0.500000 +Blah blah : halb halB +and even more stuff : ffuts erom neve dna +but not leastly. : .yltsael ton tub +first line is alphabetically somewhere in the middle. : .elddim eht ni erehwemos yllacitebahpla si enil tsrif +lastly : yltsal +let's have exactly eight lines of input. : .tupni fo senil thgie yltcaxe evah s'tel +more stuff : ffuts erom +stuff : ffuts diff --git a/test/cxx/TestLogc.cpp b/test/cxx/TestLogc.cpp new file mode 100644 index 00000000..bd9712c6 --- /dev/null +++ b/test/cxx/TestLogc.cpp @@ -0,0 +1,108 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * A basic regression test for the Logc class. + */ + +#include +#include + +using namespace std; + +static void show_dbt(ostream &os, Dbt *dbt) +{ + int i; + int size = dbt->get_size(); + unsigned char *data = (unsigned char *)dbt->get_data(); + + os << "size: " << size << " data: "; + for (i=0; iopen(".", DB_CREATE | DB_INIT_LOG | + DB_INIT_TXN | DB_INIT_MPOOL, 0); + + // Do some database activity to get something into the log. + Db *db1 = new Db(env, 0); + env->txn_begin(NULL, &dbtxn, DB_TXN_NOWAIT); + db1->open(dbtxn, "first.db", NULL, DB_BTREE, DB_CREATE, 0); + Dbt *key = new Dbt((char *)"a", 1); + Dbt *data = new Dbt((char *)"b", 1); + db1->put(dbtxn, key, data, 0); + key->set_data((char *)"c"); + data->set_data((char *)"d"); + db1->put(dbtxn, key, data, 0); + dbtxn->commit(0); + + env->txn_begin(NULL, &dbtxn, DB_TXN_NOWAIT); + key->set_data((char *)"e"); + data->set_data((char *)"f"); + db1->put(dbtxn, key, data, 0); + key->set_data((char *)"g"); + data->set_data((char *)"h"); + db1->put(dbtxn, key, data, 0); + dbtxn->commit(0); + db1->close(0); + + // flush the log + env->log_flush(NULL); + + // Now get a log cursor and walk through. + DbLogc *logc; + + env->log_cursor(&logc, 0); + int ret = 0; + DbLsn lsn; + Dbt *dbt = new Dbt(); + u_int32_t flags = DB_FIRST; + + int count = 0; + while ((ret = logc->get(&lsn, dbt, flags)) == 0) { + + // We ignore the contents of the log record, + // it's not portable. Even the exact count + // is may change when the underlying implementation + // changes, we'll just make sure at the end we saw + // 'enough'. + // + // cout << "logc.get: " << count; + // show_dbt(cout, dbt); + // cout << "\n"; + // + count++; + flags = DB_NEXT; + } + if (ret != DB_NOTFOUND) { + cerr << "*** FAIL: logc.get returned: " + << DbEnv::strerror(ret) << "\n"; + } + logc->close(0); + + // There has to be at *least* 12 log records, + // 2 for each put, plus for commits. + // + if (count < 12) + cerr << "*** FAIL: not enough log records\n"; + + cout << "TestLogc done.\n"; + } + catch (DbException &dbe) { + cerr << "*** FAIL: " << dbe.what() <<"\n"; + } + return 0; +} diff --git a/test/cxx/TestLogc.testout b/test/cxx/TestLogc.testout new file mode 100644 index 00000000..afac3af7 --- /dev/null +++ b/test/cxx/TestLogc.testout @@ -0,0 +1 @@ +TestLogc done. diff --git a/test/cxx/TestMulti.cpp b/test/cxx/TestMulti.cpp new file mode 100644 index 00000000..5b37d1b7 --- /dev/null +++ b/test/cxx/TestMulti.cpp @@ -0,0 +1,207 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. + */ + +#include "db_cxx.h" +#include +using namespace std; + +void test1() +{ + int numberOfKeysToWrite= 10000; + Db db(0,DB_CXX_NO_EXCEPTIONS); + db.set_pagesize(512); + int err= db.open(0, "test1.db", 0, DB_BTREE, DB_CREATE, 0); + { + int i= 0; + Dbt key(&i,sizeof(i)); + Dbt data(&i,sizeof(i)); + for(;iget(&multikey,&multidata,DB_NEXT|DB_MULTIPLE_KEY); + if(err==0) + { + Dbt key, data; + DbMultipleKeyDataIterator i(multidata); + while(err==0 && i.next(key,data)) + { + int actualKey= *((int*)key.get_data()); + int actualData= *((int*)data.get_data()); + if(actualKey!=actualData) + { + std::cout << "Error: key/data mismatch. " << actualKey << "!=" << actualData << std::endl; + err= -1; + } + else + { + check[actualKey]++; + } + numberOfKeysRead++; + } + } else if(err!=DB_NOTFOUND) + std::cout << "Error: dbc->get: " << db_strerror(err) << std::endl; + } + if(numberOfKeysRead!=numberOfKeysToWrite) + { + std::cout << "Error: key count mismatch. " << numberOfKeysRead << "!=" << numberOfKeysToWrite << std::endl; + } + for(int n=0;nclose(); + } + + db.close(0); +} + +void test2() +{ + int numberOfKeysToWrite= 10000; + Db db(0,DB_CXX_NO_EXCEPTIONS); + db.set_flags(DB_DUP); + db.set_pagesize(512); + int err= db.open(0, "test2.db", 0, DB_BTREE, DB_CREATE, 0); + + { + int i= 0; + int k= 0; + Dbt key(&k,sizeof(k)); + Dbt data(&i,sizeof(i)); + for(;iget(&multikey,&multidata,DB_NEXT|DB_MULTIPLE); + if(err==0) + { + Dbt data; + DbMultipleDataIterator i(multidata); + while(err==0 && i.next(data)) + { + int actualData= *((int*)data.get_data()); + if(numberOfKeysRead!=actualData) + { + std::cout << "Error: key/data mismatch. " << numberOfKeysRead << "!=" << actualData << std::endl; + err= -1; + } + numberOfKeysRead++; + } + } else if(err!=DB_NOTFOUND) + std::cout << "Error: dbc->get: " << db_strerror(err) << std::endl; + } + if(numberOfKeysRead!=numberOfKeysToWrite) + { + std::cout << "Error: key count mismatch. " << numberOfKeysRead << "!=" << numberOfKeysToWrite << std::endl; + } + dbc->close(); + } + db.close(0); +} + +void test3() +{ + int numberOfKeysToWrite= 10000; + Db db(0,DB_CXX_NO_EXCEPTIONS); + db.set_pagesize(512); + int err= db.open(0, "test3.db", 0, DB_RECNO, DB_CREATE, 0); + + { + int i= 0; + Dbt key; + Dbt data(&i,sizeof(i)); + for(;iget(&multikey,&multidata,DB_NEXT|DB_MULTIPLE_KEY); + if(err==0) + { + u_int32_t recno= 0; + Dbt data; + DbMultipleRecnoDataIterator i(multidata); + while(err==0 && i.next(recno,data)) + { + int actualData= *((int*)data.get_data()); + if(recno!=actualData+1) + { + std::cout << "Error: recno/data mismatch. " << recno << "!=" << actualData << "+1" << std::endl; + err= -1; + } + numberOfKeysRead++; + } + } else if(err!=DB_NOTFOUND) + std::cout << "Error: dbc->get: " << db_strerror(err) << std::endl; + } + if(numberOfKeysRead!=numberOfKeysToWrite) + { + std::cout << "Error: key count mismatch. " << numberOfKeysRead << "!=" << numberOfKeysToWrite << std::endl; + } + dbc->close(); + } + + db.close(0); +} + +int main() +{ + test1(); + test2(); + test3(); + return (0); +} + diff --git a/test/cxx/TestSimpleAccess.cpp b/test/cxx/TestSimpleAccess.cpp new file mode 100644 index 00000000..2f49ead8 --- /dev/null +++ b/test/cxx/TestSimpleAccess.cpp @@ -0,0 +1,67 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * Do some regression tests for constructors. + * Run normally (without arguments) it is a simple regression test. + * Run with a numeric argument, it repeats the regression a number + * of times, to try to determine if there are memory leaks. + */ + +#include +#include +using namespace std; + +int main(int argc, char *argv[]) +{ + try { + Db *db = new Db(NULL, 0); + db->open(NULL, "my.db", NULL, DB_BTREE, DB_CREATE, 0644); + + // populate our massive database. + // all our strings include null for convenience. + // Note we have to cast for idiomatic + // usage, since newer gcc requires it. + Dbt *keydbt = new Dbt((char *)"key", 4); + Dbt *datadbt = new Dbt((char *)"data", 5); + db->put(NULL, keydbt, datadbt, 0); + + // Now, retrieve. We could use keydbt over again, + // but that wouldn't be typical in an application. + Dbt *goodkeydbt = new Dbt((char *)"key", 4); + Dbt *badkeydbt = new Dbt((char *)"badkey", 7); + Dbt *resultdbt = new Dbt(); + resultdbt->set_flags(DB_DBT_MALLOC); + + int ret; + + if ((ret = db->get(NULL, goodkeydbt, resultdbt, 0)) != 0) { + cout << "get: " << DbEnv::strerror(ret) << "\n"; + } + else { + char *result = (char *)resultdbt->get_data(); + cout << "got data: " << result << "\n"; + } + + if ((ret = db->get(NULL, badkeydbt, resultdbt, 0)) != 0) { + // We expect this... + cout << "get using bad key: " + << DbEnv::strerror(ret) << "\n"; + } + else { + char *result = (char *)resultdbt->get_data(); + cout << "*** got data using bad key!!: " + << result << "\n"; + } + cout << "finished test\n"; + } + catch (DbException &dbe) { + cerr << "Db Exception: " << dbe.what(); + } + return 0; +} diff --git a/test/cxx/TestSimpleAccess.testout b/test/cxx/TestSimpleAccess.testout new file mode 100644 index 00000000..a7d79f0c --- /dev/null +++ b/test/cxx/TestSimpleAccess.testout @@ -0,0 +1,3 @@ +got data: data +get using bad key: BDB0073 DB_NOTFOUND: No matching key/data pair found +finished test diff --git a/test/cxx/TestTruncate.cpp b/test/cxx/TestTruncate.cpp new file mode 100644 index 00000000..7639f01a --- /dev/null +++ b/test/cxx/TestTruncate.cpp @@ -0,0 +1,84 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * Do some regression tests for constructors. + * Run normally (without arguments) it is a simple regression test. + * Run with a numeric argument, it repeats the regression a number + * of times, to try to determine if there are memory leaks. + */ + +#include +#include +using namespace std; + +int main(int argc, char *argv[]) +{ + try { + Db *db = new Db(NULL, 0); + db->open(NULL, "my.db", NULL, DB_BTREE, DB_CREATE, 0644); + + // populate our massive database. + // all our strings include null for convenience. + // Note we have to cast for idiomatic + // usage, since newer gcc requires it. + Dbt *keydbt = new Dbt((char*)"key", 4); + Dbt *datadbt = new Dbt((char*)"data", 5); + db->put(NULL, keydbt, datadbt, 0); + + // Now, retrieve. We could use keydbt over again, + // but that wouldn't be typical in an application. + Dbt *goodkeydbt = new Dbt((char*)"key", 4); + Dbt *badkeydbt = new Dbt((char*)"badkey", 7); + Dbt *resultdbt = new Dbt(); + resultdbt->set_flags(DB_DBT_MALLOC); + + int ret; + + if ((ret = db->get(NULL, goodkeydbt, resultdbt, 0)) != 0) { + cout << "get: " << DbEnv::strerror(ret) << "\n"; + } + else { + char *result = (char *)resultdbt->get_data(); + cout << "got data: " << result << "\n"; + } + + if ((ret = db->get(NULL, badkeydbt, resultdbt, 0)) != 0) { + // We expect this... + cout << "get using bad key: " + << DbEnv::strerror(ret) << "\n"; + } + else { + char *result = (char *)resultdbt->get_data(); + cout << "*** got data using bad key!!: " + << result << "\n"; + } + + // Now, truncate and make sure that it's really gone. + cout << "truncating data...\n"; + u_int32_t nrecords; + db->truncate(NULL, &nrecords, 0); + cout << "truncate returns " << nrecords << "\n"; + if ((ret = db->get(NULL, goodkeydbt, resultdbt, 0)) != 0) { + // We expect this... + cout << "after truncate get: " + << DbEnv::strerror(ret) << "\n"; + } + else { + char *result = (char *)resultdbt->get_data(); + cout << "got data: " << result << "\n"; + } + + db->close(0); + cout << "finished test\n"; + } + catch (DbException &dbe) { + cerr << "Db Exception: " << dbe.what(); + } + return 0; +} diff --git a/test/cxx/TestTruncate.testout b/test/cxx/TestTruncate.testout new file mode 100644 index 00000000..0e22b9dc --- /dev/null +++ b/test/cxx/TestTruncate.testout @@ -0,0 +1,6 @@ +got data: data +get using bad key: BDB0073 DB_NOTFOUND: No matching key/data pair found +truncating data... +truncate returns 1 +after truncate get: BDB0073 DB_NOTFOUND: No matching key/data pair found +finished test diff --git a/test/cxx/chk.cxxtests b/test/cxx/chk.cxxtests new file mode 100644 index 00000000..c277548d --- /dev/null +++ b/test/cxx/chk.cxxtests @@ -0,0 +1,71 @@ +#!/bin/sh - +# +# $Id$ +# +# Check to make sure that regression tests for C++ run. + +TEST_CXX_SRCDIR=../test/cxx # must be a relative directory + +# All paths must be relative to a subdirectory of the build directory +LIBS="-L.. -ldb_cxx" +CXXFLAGS="-I.. -I../../src/dbinc" + +[ `uname` = "Linux" ] && LIBS="$LIBS -lpthread" + +# Test must be run from a local build directory, not from a test +# directory. +[ -f db_config.h ] || { + echo 'FAIL: chk.cxxtests must be run from a local build directory.' + exit 1 +} +[ -d ../src ] || { + echo 'FAIL: chk.cxxtests must be run from a local build directory.' + exit 1 +} +[ -f libdb.a ] || make libdb.a || { + echo 'FAIL: unable to build libdb.a' + exit 1 +} +[ -f libdb_cxx.a ] || make libdb_cxx.a || { + echo 'FAIL: unable to build libdb_cxx.a' + exit 1 +} +CXX=`sed -e '/^CXX=/!d' -e 's/^CXX=//' -e 's/.*mode=compile *//' Makefile` +echo " ====== cxx tests using $CXX" +testnames=`cd $TEST_CXX_SRCDIR; ls *.cpp | sed -e 's/\.cpp$//'` + +for testname in $testnames; do + if grep -x $testname $TEST_CXX_SRCDIR/ignore > /dev/null; then + echo " **** cxx test $testname ignored" + continue + fi + + echo " ==== cxx test $testname" + rm -rf TESTCXX; mkdir TESTCXX + cd ./TESTCXX + testprefix=../$TEST_CXX_SRCDIR/$testname + + ${CXX} ${CXXFLAGS} -o $testname $testprefix.cpp ${LIBS} > $testname.compileout 2>&1 || { + echo "FAIL: compilation of $testname failed, see TESTCXX/$testname.compileout" + exit 1 + } + rm -f $testname.compileout + infile=$testprefix.testin + [ -f $infile ] || infile=/dev/null + goodoutfile=$testprefix.testout + [ -f $goodoutfile ] || goodoutfile=/dev/null + gooderrfile=$testprefix.testerr + [ -f $gooderrfile ] || gooderrfile=/dev/null + ./$testname <$infile > $testname.out 2> $testname.err + cmp $testname.out $goodoutfile > /dev/null || { + echo "FAIL: $testname output differs: see $testname.out, $goodoutfile" + exit 1 + } + cmp $testname.err $gooderrfile > /dev/null || { + echo "FAIL: $testname error differs: see $testname.err, $gooderrfile" + exit 1 + } + cd .. +done +rm -rf TESTCXX +exit 0 diff --git a/test/cxx/ignore b/test/cxx/ignore new file mode 100644 index 00000000..bcd98b57 --- /dev/null +++ b/test/cxx/ignore @@ -0,0 +1,4 @@ +# +# $Id$ +# +# A list of tests to ignore diff --git a/test/cxx/testall b/test/cxx/testall new file mode 100644 index 00000000..5d1ceba0 --- /dev/null +++ b/test/cxx/testall @@ -0,0 +1,32 @@ +#!/bin/sh - +# $Id$ +# +# Run all the C++ regression tests + +ecode=0 +prefixarg="" +stdinarg="" +while : +do + case "$1" in + --prefix=* ) + prefixarg="$1"; shift;; + --stdin ) + stdinarg="$1"; shift;; + * ) + break + esac +done +files="`find . -name \*.cpp -print`" +for file in $files; do + name=`echo $file | sed -e 's:^\./::' -e 's/\.cpp$//'` + if grep $name ignore > /dev/null; then + echo " **** cxx test $name ignored" + else + echo " ==== cxx test $name" + if ! sh ./testone $prefixarg $stdinarg $name; then + ecode=1 + fi + fi +done +exit $ecode diff --git a/test/cxx/testone b/test/cxx/testone new file mode 100644 index 00000000..c1fa93b1 --- /dev/null +++ b/test/cxx/testone @@ -0,0 +1,122 @@ +#!/bin/sh - +# $Id$ +# +# Run just one C++ regression test, the single argument +# is the basename of the test, e.g. TestRpcServer + +error() +{ + echo '' >&2 + echo "C++ regression error: $@" >&2 + echo '' >&2 + ecode=1 +} + +# compares the result against the good version, +# reports differences, and removes the result file +# if there are no differences. +# +compare_result() +{ + good="$1" + latest="$2" + if [ ! -e "$good" ]; then + echo "Note: $good does not exist" + return + fi + tmpout=/tmp/blddb$$.tmp + diff "$good" "$latest" > $tmpout + if [ -s $tmpout ]; then + nbad=`grep '^[0-9]' $tmpout | wc -l` + error "$good and $latest differ in $nbad places." + else + rm $latest + fi + rm -f $tmpout +} + +ecode=0 +stdinflag=n +gdbflag=n +CXX=${CXX:-c++} +LIBS=${LIBS:-} + +# remove any -c option in the CXXFLAGS +CXXFLAGS="`echo " ${CXXFLAGS} " | sed -e 's/ -c //g'`" + +# determine the prefix of the install tree +prefix="" +while : +do + case "$1" in + --prefix=* ) + prefix="`echo $1 | sed -e 's/--prefix=//'`"; shift + LIBS="-L$prefix/lib -ldb_cxx $LIBS" + CXXFLAGS="-I$prefix/include $CXXFLAGS" + export LD_LIBRARY_PATH="$prefix/lib:$LD_LIBRARY_PATH" + ;; + --stdin ) + stdinflag=y; shift + ;; + --gdb ) + CXXFLAGS="-g $CXXFLAGS" + gdbflag=y; shift + ;; + * ) + break + ;; + esac +done + +if [ "$#" = 0 ]; then + echo 'Usage: testone [ --prefix= | --stdin ] TestName' + exit 1 +fi +name="$1" + +# compile +rm -rf TESTDIR; mkdir TESTDIR +cd ./TESTDIR + +${CXX} ${CXXFLAGS} -o $name ../$name.cpp ${LIBS} > ../$name.compileout 2>&1 +if [ $? != 0 -o -s ../$name.compileout ]; then + error "compilation of $name failed, see $name.compileout" + exit 1 +fi +rm -f ../$name.compileout + +# find input and error file +infile=../$name.testin +if [ ! -f $infile ]; then + infile=/dev/null +fi + +# run and diff results +rm -rf TESTDIR +if [ "$gdbflag" = y ]; then + if [ -s $infile ]; then + echo "Input file is $infile" + fi + gdb ./$name + exit 0 +elif [ "$stdinflag" = y ]; then + ./$name >../$name.out 2>../$name.err +else + ./$name <$infile >../$name.out 2>../$name.err +fi +cd .. + +testerr=$name.testerr +if [ ! -f $testerr ]; then + testerr=/dev/null +fi + +testout=$name.testout +if [ ! -f $testout ]; then + testout=/dev/null +fi + +compare_result $testout $name.out +compare_result $testerr $name.err +rm -rf TESTDIR +exit $ecode diff --git a/test/micro/README b/test/micro/README new file mode 100644 index 00000000..2d35358f --- /dev/null +++ b/test/micro/README @@ -0,0 +1,84 @@ +# $Id$ + +A simple framework for core Berkeley DB micro-benchmarks, intended for +two purposes: to certify a port of Berkeley DB to a new platform, and +to provide micro-benchmark information between different Berkeley DB +releases. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +To run the tests: + +1. Unpack and build the Berkeley DB releases you want to run against. +(Note that test_micro is only known to work on release 4.0 and later.) + +2. Run the tests: + + % sh test_micro + + By default, tests are run for all of the Berkeley DB builds found in + the current working directory. A build is identified by its + top-level name, and is expected to be of the form: + + db-.. + + and the fact the Berkeley DB library has been built in the standard + location in that directory tree (for example, "build_unix/libdb.a". + Directories with other names and directories without a library will + be ignored. + + You can run a subset of the tests using command-line arguments: + + % sh test_micro 3 # Run test 3 + % sh test_micro 3-5 # Run tests 3-5 + % sh test_micro 3- # Run test 3 to the maximum test + % sh test_micro -3 # Run tests 1-3 + + You can run on a subset of the releases using the MAJOR and MINOR + environment variables: + + % env MAJOR=4 MINOR=2 sh test_micro # Run on 4.2.XX + # Run on 4.1.XX and 4.2.XX + % env MAJOR=4 MINOR='[12]' sh test_micro + +3. If you want to generate the micro-benchmark output build the HTML + page after the run: + + % sh report + + The output of the tests and the web page are created in the directory + RUN.hostname (for example, "RUN.abyssinian"). The tests are numeric + filenames in that directory (for example, "1", "2", "36"). The web + page output is in the file "hostname.html". + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +To run different test configurations: + +1. Modify the configuration files in the configs/ directory to run the + tests you want to run. The configuration file configs/run.std is the + tests that are run by the test_micro shell script. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +To add a new test program: + +1. Add a new file to the SOURCE directory, and build it as part of + building the test_micro program. This will require changes to + the test_micro shell script, as well as the standard build rules + for all of the Berkeley DB systems, found in the dist/ directory. + + The file must output the following line on stdout: + + # title + major.minor.patchoperations/second + + For example: + + # 10000 Btree database open/close pairs + 3.0.55 29600.69 + 3.1.17 30438.25 + +2. Modify the file test_micro/test_micro.c to exec your new command + (this should only require changing the cmdlist structure at the top + of that file). + +3. Modify the test_micro configuration files in the configs/ directory + to run your new command. diff --git a/test/micro/configs/run.small b/test/micro/configs/run.small new file mode 100644 index 00000000..5f5bbe34 --- /dev/null +++ b/test/micro/configs/run.small @@ -0,0 +1,127 @@ +b_curalloc -c 100 +b_curwalk -c 10 -d 10 -Ss -t btree -w 10 -C 524288 -P 1024 +b_curwalk -c 10 -d 10 -Ss -t hash -w 10 -C 524288 -P 1024 +b_curwalk -c 10 -d 10 -p -t btree -w 10 -C 524288 -P 1024 +b_curwalk -c 10 -d 10 -p -t hash -w 10 -C 524288 -P 1024 +b_curwalk -c 10 -d 10 -pSs -t btree -w 10 -C 524288 -P 1024 +b_curwalk -c 10 -d 10 -pSs -t hash -w 10 -C 524288 -P 1024 +b_curwalk -c 10 -d 10 -ps -t btree -w 10 -C 524288 -P 1024 +b_curwalk -c 10 -d 10 -ps -t hash -w 10 -C 524288 -P 1024 +b_curwalk -c 10 -d 10 -s -t btree -w 10 -C 524288 -P 1024 +b_curwalk -c 10 -d 10 -s -t hash -w 10 -C 524288 -P 1024 +b_curwalk -c 10 -d 100 -t btree -w 10 -C 524288 -P 1024 +b_curwalk -c 10 -d 100 -t hash -w 10 -C 524288 -P 1024 +b_curwalk -c 10 -d 10 -s -t btree -w 10 -C 524288 -P 1024 +b_curwalk -c 100 -d 10 -s -t hash -w 10 -C 524288 -P 1024 +b_curwalk -c 100 -d 10 -t btree -w 10 -C 524288 -P 1024 +b_curwalk -c 100 -d 10 -t hash -w 10 -C 524288 -P 1024 +b_curwalk -c 100 -p -t btree -w 10 -C 524288 -P 1024 +b_curwalk -c 100 -p -t hash -w 10 -C 524288 -P 1024 +b_curwalk -c 100 -p -t heap -w 10 -C 524288 -P 1024 +b_curwalk -c 100 -p -t queue -w 10 -C 524288 -P 1024 +b_curwalk -c 100 -p -t recno -w 10 -C 524288 -P 1024 +b_curwalk -c 100 -t btree -w 10 -C 524288 -P 1024 +b_curwalk -c 100 -t hash -w 10 -C 524288 -P 1024 +b_curwalk -c 100 -t heap -w 10 -C 524288 -P 1024 +b_curwalk -c 100 -t queue -w 10 -C 524288 -P 1024 +b_curwalk -c 100 -t recno -w 10 -C 524288 -P 1024 + +b_del -c 100 -t btree -C 524288 +b_del -c 100 -t hash -C 524288 +b_del -c 100 -t queue -C 524288 +b_del -c 100 -t recno -C 524288 +b_del -w -c 100 -t btree -C 524288 +b_del -w -c 100 -t hash -C 524288 +b_del -w -c 100 -t heap -C 524288 +b_del -w -c 100 -t queue -C 524288 +b_del -w -c 100 -t recno -C 524288 + +b_get -c 100 -t btree -C 524288 +b_get -c 100 -t hash -C 524288 +b_get -c 100 -t heap -C 524288 +b_get -c 100 -t queue -C 524288 +b_get -c 100 -t recno -C 524288 + +b_inmem -d 32 -k 8 -o 10 -P 1024 -C 524288 bulk +b_inmem -d 32 -k 8 -o 10 -P 1024 -C 524288 txn-sync +b_inmem -d 32 -k 8 -o 10 -P 1024 -C 524288 read +b_inmem -d 32 -k 8 -o 100 -P 1024 -C 524288 txn-nosync +b_inmem -d 32 -k 8 -o 100 -P 1024 -C 524288 txn-read +b_inmem -d 32 -k 8 -o 100 -P 1024 -C 524288 txn-write +b_inmem -d 32 -k 8 -o 100 -P 1024 -C 524288 txn-write-nosync +b_inmem -d 32 -k 8 -o 100 -P 1024 -C 524288 write + +b_load -c 100 -t btree -C 524288 +b_load -c 100 -t hash -C 524288 +b_load -c 100 -t heap -C 524288 +b_load -c 100 -t queue -C 524288 +b_load -c 100 -t recno -C 524288 +b_load -d -c 100 -t btree -C 524288 +b_load -d -c 100 -t hash -C 524288 + +b_open -c 100 -d -t btree +b_open -c 100 -d -t hash +b_open -c 100 -d -t heap +b_open -c 100 -d -t queue +b_open -c 100 -d -t recno +b_open -c 100 -f -t btree +b_open -c 100 -f -t hash +b_open -c 100 -f -t heap +b_open -c 100 -f -t queue +b_open -c 100 -f -t recno +b_open -c 100 -fd -t btree +b_open -c 100 -fd -t hash +b_open -c 100 -fd -t recno +b_open -c 100 -t btree +b_open -c 100 -t hash +b_open -c 100 -t heap +b_open -c 100 -t queue +b_open -c 100 -t recno + +b_put -c 10 -d 64 -t btree -C 524288 +b_put -c 10 -d 64 -t hash -C 524288 +b_put -c 10 -d 64 -t heap -C 524288 +b_put -c 10 -d 64 -t queue -C 524288 +b_put -c 10 -d 64 -t recno -C 524288 +b_put -c 100 -s 10 -t btree -C 524288 +b_put -c 100 -s 10 -t hash -C 524288 +b_put -c 100 -s 10 -t heap -C 524288 +b_put -c 100 -s 10 -t queue -C 524288 +b_put -c 100 -s 10 -t recno -C 524288 +b_put -c 100 -t btree -C 524288 +b_put -c 100 -t hash -C 524288 +b_put -c 100 -t heap -C 524288 +b_put -c 100 -t queue -C 524288 +b_put -c 100 -t recno -C 524288 + +b_recover -c 100 -C 524288 + +b_txn -a -c 100 +b_txn -c 100 + +b_txn_write -a -c 100 +b_txn_write -ar -c 100 +b_txn_write -c 100 +b_txn_write -r -c 100 +b_txn_write -rw -c 100 +b_txn_write -w -c 100 + +b_workload -w A -c 100 -g 10 -d 20 -p 1024 -t btree +b_workload -w A -c 100 -g 10 -d 20 -p 1024 -t hash +b_workload -w A -c 100 -g 10 -d 20 -p 1024 -t btree +b_workload -w A -c 100 -g 10 -d 20 -p 1024 -t hash +b_workload -w A -c 100 -g 10 -d 20 -p 1024 -t btree +b_workload -w A -c 100 -g 10 -d 20 -p 1024 -t hash +b_workload -w A -c 100 -g 10 -d 20 -p 1024 -t btree +b_workload -w A -c 100 -g 10 -d 20 -p 1024 -t hash +b_workload -w A -c 100 -g 10 -d 256 -p 1024 -t btree +b_workload -w A -c 100 -g 10 -d 256 -p 1024 -t hash +b_workload -w A -c 100 -g 10 -o -t btree -p 1024 +b_workload -w A -c 100 -g 10 -o -t hash -p 1024 +b_workload -w E -c 100 -g 10 -d 20 -p 1024 -t btree +b_workload -w E -c 100 -g 10 -d 20 -p 1024 -t hash +b_workload -w F -c 100 -g 10 -d 20 -p 1024 -t btree +b_workload -w F -c 100 -g 10 -d 20 -p 1024 -t hash +b_workload -w G -c 100 -g 10 -d 20 -p 1024 -t btree +b_workload -w G -c 100 -g 10 -d 20 -p 1024 -t hash +b_workload -w H -c 100 -d 20 -p 1024 -t hash diff --git a/test/micro/configs/run.std b/test/micro/configs/run.std new file mode 100644 index 00000000..41060932 --- /dev/null +++ b/test/micro/configs/run.std @@ -0,0 +1,134 @@ +b_curalloc -c 1000000 + +b_curwalk -c 100 -d 1000 -Ss -t btree -w 100 +b_curwalk -c 100 -d 1000 -Ss -t hash -w 100 +b_curwalk -c 100 -d 1000 -p -t btree -w 100 +b_curwalk -c 100 -d 1000 -p -t hash -w 100 +b_curwalk -c 100 -d 1000 -pSs -t btree -w 100 +b_curwalk -c 100 -d 1000 -pSs -t hash -w 100 +b_curwalk -c 100 -d 1000 -ps -t btree -w 100 +b_curwalk -c 100 -d 1000 -ps -t hash -w 100 +b_curwalk -c 100 -d 1000 -s -t btree -w 100 +b_curwalk -c 100 -d 1000 -s -t hash -w 100 +b_curwalk -c 100 -d 1000 -t btree -w 100 +b_curwalk -c 100 -d 1000 -t hash -w 100 +b_curwalk -c 10000 -d 10 -s -t btree -w 100 +b_curwalk -c 10000 -d 10 -s -t hash -w 100 +b_curwalk -c 10000 -d 10 -t btree -w 100 +b_curwalk -c 10000 -d 10 -t hash -w 100 +b_curwalk -c 100000 -p -t btree -w 100 +b_curwalk -c 100000 -p -t hash -w 100 +b_curwalk -c 100000 -p -t heap -w 100 +b_curwalk -c 100000 -p -t queue -w 100 +b_curwalk -c 100000 -p -t recno -w 100 +b_curwalk -c 100000 -t btree -w 100 +b_curwalk -c 100000 -t hash -w 100 +b_curwalk -c 100000 -t heap -w 100 +b_curwalk -c 100000 -t queue -w 100 +b_curwalk -c 100000 -t recno -w 100 + +b_del -c 100000 -t btree +b_del -c 100000 -t hash +b_del -c 100000 -t queue +b_del -c 100000 -t recno +b_del -w -c 100000 -t btree +b_del -w -c 100000 -t hash +b_del -w -c 100000 -t heap +b_del -w -c 100000 -t queue +b_del -w -c 100000 -t recno + +b_get -c 1000000 -t btree +b_get -c 1000000 -t hash +b_get -c 1000000 -t heap +b_get -c 1000000 -t queue +b_get -c 1000000 -t recno + +b_inmem -d 32 -k 8 -o 100000 -P 32768 bulk +b_inmem -d 32 -k 8 -o 100000 -P 32768 txn-sync +b_inmem -d 32 -k 8 -o 1000000 -P 32768 read +b_inmem -d 32 -k 8 -o 1000000 -P 32768 txn-nosync +b_inmem -d 32 -k 8 -o 1000000 -P 32768 txn-read +b_inmem -d 32 -k 8 -o 1000000 -P 32768 txn-write +b_inmem -d 32 -k 8 -o 1000000 -P 32768 txn-write-nosync +b_inmem -d 32 -k 8 -o 1000000 -P 32768 write + +b_load -c 1000000 -t btree +b_load -c 100000 -t hash +b_load -c 1000000 -t heap +b_load -c 1000000 -t queue +b_load -c 1000000 -t recno +b_load -d -c 1000000 -t btree +b_load -d -c 1000000 -t hash + +b_open -c 10000 -d -t btree +b_open -c 10000 -d -t hash +b_open -c 10000 -d -t heap +b_open -c 10000 -d -t queue +b_open -c 10000 -d -t recno +b_open -c 10000 -f -t btree +b_open -c 10000 -f -t hash +b_open -c 10000 -f -t heap +b_open -c 10000 -f -t queue +b_open -c 10000 -f -t recno +b_open -c 10000 -fd -t btree +b_open -c 10000 -fd -t hash +b_open -c 10000 -fd -t recno +b_open -c 10000 -t btree +b_open -c 10000 -t hash +b_open -c 10000 -t heap +b_open -c 10000 -t queue +b_open -c 10000 -t recno + +b_put -c 100000 -d 204800 -t btree +b_put -c 100000 -d 204800 -t hash +b_put -c 100000 -d 204800 -t heap +b_put -c 100000 -d 204800 -t recno +b_put -c 1000000 -s 10 -t btree +b_put -c 1000000 -s 10 -t hash +b_put -c 1000000 -s 10 -t heap +b_put -c 1000000 -s 10 -t queue +b_put -c 1000000 -s 10 -t recno +b_put -c 1000000 -t btree +b_put -c 1000000 -t hash +b_put -c 1000000 -t heap +b_put -c 1000000 -t queue +b_put -c 1000000 -t recno + +b_recover -c 1000000 + +b_txn -a -c 1000000 +b_txn -c 1000000 + +b_txn_write -a -c 100000 +b_txn_write -ar -c 100000 +b_txn_write -c 10000 +b_txn_write -r -c 10000 +b_txn_write -rw -c 100000 +b_txn_write -w -c 100000 + +b_workload -w A -c 100000 -g 10 -d 20 -p 1024 -t btree +b_workload -w A -c 100000 -g 10 -d 20 -p 1024 -t hash +b_workload -w A -c 100000 -g 10 -d 20 -p 32768 -t btree +b_workload -w A -c 100000 -g 10 -d 20 -p 32768 -t hash +b_workload -w A -c 100000 -g 10 -d 20 -p 4096 -t btree +b_workload -w A -c 100000 -g 10 -d 20 -p 4096 -t hash +b_workload -w A -c 100000 -g 10 -d 20 -p 8192 -t btree +b_workload -w A -c 100000 -g 10 -d 20 -p 8192 -t hash +b_workload -w A -c 100000 -g 10 -d 256 -p 1024 -t btree +b_workload -w A -c 100000 -g 10 -d 256 -p 1024 -t hash +b_workload -w A -c 100000 -o -t btree +b_workload -w A -c 100000 -o -t hash +b_workload -w E -c 100000 -g 10 -d 20 -p 1024 -t btree +b_workload -w E -c 100000 -g 10 -d 20 -p 1024 -t hash +b_workload -w F -c 100000 -g 10 -d 20 -p 1024 -t btree +b_workload -w F -c 100000 -g 10 -d 20 -p 1024 -t hash +b_workload -w G -c 100000 -g 10 -d 20 -p 1024 -t btree +b_workload -w G -c 100000 -g 10 -d 20 -p 1024 -t hash +b_workload -w H -c 10000000 -d 20 -p 1024 -t hash + +b_latch -c 100000000 +b_latch -c 100000000 -n 1 +b_latch -c 100000000 -n 2 +b_latch -c 100000000 -n 4 +b_latch -c 100000000 -n 8 +b_latch -c 100000000 -n 64 diff --git a/test/micro/configs/run.workload b/test/micro/configs/run.workload new file mode 100644 index 00000000..4d289e23 --- /dev/null +++ b/test/micro/configs/run.workload @@ -0,0 +1,38 @@ +b_workload -m 1 -w A -v -c 100000 -o -t hash +b_workload -m 2 -w A -v -c 100000 -o -t btree +b_workload -m 3 -w A -v -c 100000 -g 10 -d 20 -p 32768 -t hash +b_workload -m 4 -w A -v -c 100000 -g 10 -d 20 -p 32768 -t btree +b_workload -m 5 -w A -v -c 100000 -g 10 -d 20 -p 8192 -t hash +b_workload -m 6 -w A -v -c 100000 -g 10 -d 20 -p 8192 -t btree +b_workload -m 7 -w A -v -c 100000 -g 10 -d 20 -p 4096 -t hash +b_workload -m 8 -w A -v -c 100000 -g 10 -d 20 -p 4096 -t btree +b_workload -m 9 -w A -v -c 100000 -g 10 -d 20 -p 1024 -t hash +b_workload -m 10 -w A -v -c 100000 -g 10 -d 20 -p 1024 -t btree +b_workload -m 11 -w A -v -c 10000 -g 10 -d 256 -p 1024 -t hash +b_workload -m 12 -w A -v -c 10000 -g 10 -d 256 -p 1024 -t btree +b_workload -m 13 -w F -v -c 100000 -g 10 -d 20 -p 1024 -t hash +b_workload -m 14 -w F -v -c 100000 -g 10 -d 20 -p 1024 -t btree +b_workload -m 15 -w G -v -c 100000 -g 10 -d 20 -p 1024 -t hash +b_workload -m 16 -w G -v -c 100000 -g 10 -d 20 -p 1024 -t btree +# Run put/get(G) with different page sizes, since fill factor +# is interesting when not deleting all of the elements. +b_workload -m 17 -w E -v -c 100000 -g 10 -d 20 -p 32768 -t hash +b_workload -m 18 -w E -v -c 100000 -g 10 -d 20 -p 32768 -t btree +b_workload -m 19 -w E -v -c 100000 -g 10 -d 20 -p 8192 -t hash +b_workload -m 20 -w E -v -c 100000 -g 10 -d 20 -p 8192 -t btree +b_workload -m 21 -w E -v -c 100000 -g 10 -d 20 -p 4096 -t hash +b_workload -m 22 -w E -v -c 100000 -g 10 -d 20 -p 4096 -t btree +b_workload -m 23 -w E -v -c 100000 -g 10 -d 20 -p 1024 -t hash +b_workload -m 24 -w E -v -c 100000 -g 10 -d 20 -p 1024 -t btree +b_workload -m 25 -w E -v -c 10000 -g 10 -d 256 -p 1024 -t hash +b_workload -m 26 -w E -v -c 10000 -g 10 -d 256 -p 1024 -t btree +b_workload -m 27 -w E -v -c 100000 -g 10 -o -d 10 -p 1024 -t hash +b_workload -m 28 -w E -v -c 100000 -g 10 -o -d 10 -p 1024 -t btree +b_workload -m 29 -w A -v -c 10000 -g 10 -d 1024 -p 512 -t hash +b_workload -m 30 -w A -v -c 10000 -g 10 -d 1024 -p 512 -t btree +b_workload -m 31 -w H -v -c 1000000 -d 20 -p 8192 -t hash +b_workload -m 32 -w H -v -c 1000000 -d 20 -p 8192 -t btree +b_workload -m 33 -w H -v -c 1000000 -d 20 -p 512 -t hash +b_workload -m 34 -w H -v -c 1000000 -d 20 -p 512 -t btree +b_workload -m 35 -w H -v -c 10000000 -d 20 -p 512 -t hash +b_workload -m 36 -w H -v -c 10000000 -d 20 -p 512 -t btree diff --git a/test/micro/report b/test/micro/report new file mode 100644 index 00000000..c7358dbc --- /dev/null +++ b/test/micro/report @@ -0,0 +1,121 @@ +#! /bin/sh +# +# $Id$ + +# Use our pathname to locate the absolute path for our awk scripts. +t=`dirname $0` +h=`(cd $t && echo $PWD)` + +# We need a temporary file, and we need to clean it up after failure. +tmp="$PWD/__t" +trap 'rm -f $tmp; exit 1' 1 2 3 13 15 +trap 'rm -f $tmp; exit 0' 0 + +# header -- +# Output HTML page header. +# $1: directory name. +header() +{ + echo "" + echo "" + machine=`echo $1 | sed 's/.*\.//'` + echo "Berkeley DB test_micro run: $machine" + echo "" + echo "" + echo "

Berkeley DB test_micro run: $machine

" + echo "

`date`

" + test -f UNAME && cat UNAME +} + +# footer -- +# Output HTML page footer. +footer() +{ + echo "" + echo "" +} + +# table -- +# Create a table. +# $1: output file +table() +{ + title="Test $1: `egrep '^#' $1 | sort -u | sed 's/^#[ ]*//'`" + echo "
" + echo "" + echo "" + echo "" + echo "" + echo "" + echo "" + + # You can set the MAJOR and MINOR environment variables to limit + # the BDB releases for which a report is created. + # + # Process the output into a single line per release. + egrep "^${MAJOR:-[0-9][0-9]*}.${MINOR:-*}" $1 | + awk -f $h/report.awk | + sort -n > $tmp + + # Get the release count, and maximum value. + nrel=`wc -l $tmp` + max=`sort -k 2 -n -t ":" < $tmp | tail -1 | awk -F: '{print $2}'` + + # Display the output. + IFS=":" + cat $tmp | while true; do + # release, average, runs, percent, standard deviation + read rel avg runs percent rsd + if test "X$rel" = "X" ; then + break; + fi + + # echo "read: rel $rel, avg $avg, runs $runs, percent $percent, rsd $rsd" > /dev/stderr + + echo "" + echo "" + echo "" + done + echo "
$title
ReleaseOperations/second
$rel
" + echo "" + echo "" + if [ "$max" = "0.00" ];then + t=0 + else + t=`echo "400 * ($avg/($max + $max/10))" | bc -l` + fi + t=`printf %.0f $t` + echo "" + t=`echo "400 - $t" | bc` + echo "" + echo "" + echo "" + if test "X$percent" != "X" -o "X$rsd" != "X"; then + echo -n "" + fi + echo "" + echo "
    
$avg
  ("
+			if test "X$percent" = "X"; then
+				echo -n '***'
+			else
+				echo -n "-$percent"
+			fi
+			if test "X$rsd" != "X"; then
+				echo -n ", $rsd rsd, $runs runs"
+			fi
+			echo ")
" + echo "
" +} + +for i in RUN.*; do + echo "Building $i..." + name=`echo $i | sed 's/RUN.//'` + (cd $i + header $i + for j in `ls [0-9]* | sort -n`; do + table $j + done + footer) > $i/$name.html +done + +exit 0 diff --git a/test/micro/report.awk b/test/micro/report.awk new file mode 100644 index 00000000..6ae33038 --- /dev/null +++ b/test/micro/report.awk @@ -0,0 +1,40 @@ +# $Id$ + +/^[^#]/ { + total[$1] += $2 + sum[$1] += $2 * $2 + ++count[$1]; +} +END { + # Compute the average, find the maximum. + for (i in total) { + avg[i] = total[i] / count[i]; + if (max < avg[i]) + max = avg[i] + } + + for (i in total) { + # Calculate variance by raw score method. + var = (sum[i] - ((total[i] * total[i]) / count[i])) / count[i]; + + # The standard deviation is the square root of the variance. + stdv = sqrt(var); + + # Display the release value, the average score, and run count. + printf("%s:%.2f:%d:", i, avg[i], count[i]); + + # If this run wasn't the fastest, display the percent by which + # this run was slower. + if (max != avg[i]) + printf("%.0f%%", ((max - avg[i]) / max) * 100); + + printf(":"); + + # If there was more than a single run, display the relative + # standard deviation. + if (count[i] > 1) + printf("%.0f%%", stdv * 100 / avg[i]); + + printf("\n"); + } +} diff --git a/test/micro/source/LIST b/test/micro/source/LIST new file mode 100644 index 00000000..8c02b4a4 --- /dev/null +++ b/test/micro/source/LIST @@ -0,0 +1,101 @@ +Test list: + +b_curalloc + Cursor allocation + + usage: b_curalloc [-c count] + + -c Cursor count + +b_curwalk + Walk a cursor through N records + + usage: b_curwalk [-pSs] [-C cachesz] + [-c cnt] [-d dupcnt] [-P pagesz] [-t type] [-w walkcnt] + -C Cache size + -c Record count + -d Duplicate record count + -P Page size + -p Walk backward instead of forward + -S Skip duplicates + -s Sort duplicates + -t Database type (B | H | Q | R) + +b_del + Delete records + + usage: b_del [-w] [-C cachesz] [-c count] [-t type] + + -C Cache size + -c Record count + -t Database type (B | H | Q | R) + -w Delete through cursor + +b_get + Read records + + usage: b_get [-C cachesz] [-c count] [-t type] + + -C Cache size + -c Record count + -t Database type (B | H | Q | R) + +b_load + Insert records + + usage: b_load [-d] [-C cachesz] [-c count] [-t type] + + -C Cache size + -c Record count + -d Use duplicate records + -t Database type (B | H | Q | R) + +b_open + Database open/close + + usage: b_open [-df] [-c count] [-t type] + + -d Open/close a subdatabase + -f Open/close a physical file + -c Open/close count + -t Database type (B | H | Q | R) + +b_put + Overwrite record + + usage: b_put [-C cachesz] + [-c count] [-d bytes] [-s secondaries] [-t type] + + -C Cache size + -c Record count + -d Data size + -s Number of secondaries + -t Database type (B | H | Q | R) + +b_recover + Run recovery + + usage: b_recover [-C cachesz] [-c count] + + -C Cache size + -c Transactions to recover + +b_txn + Abort or commit a transaction containing no operations + + usage: b_txn [-a] [-c count] + + -a Abort rather than commit + -c Transaction count + +b_txn_write + Write/commit transaction + + usage: b_txn_write [-arw] [-c count] + + -a nosync + -c Transaction count + -r Configure replication stub callback + -w write-nosync + +b_workload diff --git a/test/micro/source/b_curalloc.c b/test/micro/source/b_curalloc.c new file mode 100644 index 00000000..e454818d --- /dev/null +++ b/test/micro/source/b_curalloc.c @@ -0,0 +1,69 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include "bench.h" + +static int usage(void); + +int +b_curalloc(int argc, char *argv[]) +{ + extern char *optarg; + extern int optind; + DB *dbp; + DBC *curp; + int ch, i, count; + + count = 100000; + while ((ch = getopt(argc, argv, "c:")) != EOF) + switch (ch) { + case 'c': + count = atoi(optarg); + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + if (argc != 0) + return (usage()); + + /* Create the database. */ + DB_BENCH_ASSERT(db_create(&dbp, NULL, 0) == 0); + dbp->set_errfile(dbp, stderr); + +#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) + DB_BENCH_ASSERT(dbp->open( + dbp, NULL, TESTFILE, NULL, DB_BTREE, DB_CREATE, 0666) == 0); +#else + DB_BENCH_ASSERT( + dbp->open(dbp, TESTFILE, NULL, DB_BTREE, DB_CREATE, 0666) == 0); +#endif + + /* Allocate a cursor count times. */ + TIMER_START; + for (i = 0; i < count; ++i) { + DB_BENCH_ASSERT(dbp->cursor(dbp, NULL, &curp, 0) == 0); + DB_BENCH_ASSERT(curp->c_close(curp) == 0); + } + TIMER_STOP; + + printf("# %d cursor allocations\n", count); + TIMER_DISPLAY(count); + + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); + + return (0); +} + +static int +usage() +{ + (void)fprintf(stderr, "usage: b_curalloc [-c count]\n"); + return (EXIT_FAILURE); +} diff --git a/test/micro/source/b_curwalk.c b/test/micro/source/b_curwalk.c new file mode 100644 index 00000000..f74d6d6e --- /dev/null +++ b/test/micro/source/b_curwalk.c @@ -0,0 +1,241 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include "bench.h" + +static int usage(void); + +int +b_curwalk(int argc, char *argv[]) +{ + extern char *optarg; + extern int optind; + DB *dbp; + DBTYPE type; + DBC *dbc; + DBT key, data; +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + DB_HEAP_RID rid; +#endif + db_recno_t recno; + u_int32_t cachesize, pagesize, walkflags; + int ch, i, count, dupcount, j; + int prev, ret, skipdupwalk, sorted, walkcount; + char *ts, dbuf[32], kbuf[32]; + + type = DB_BTREE; + cachesize = 10 * MEGABYTE; + pagesize = 16 * 1024; + count = 100000; + dupcount = prev = skipdupwalk = sorted = 0; + walkcount = 1000; + ts = "Btree"; + while ((ch = getopt(argc, argv, "C:c:d:P:pSst:w:")) != EOF) + switch (ch) { + case 'C': + cachesize = (u_int32_t)atoi(optarg); + break; + case 'c': + count = atoi(optarg); + break; + case 'd': + dupcount = atoi(optarg); + break; + case 'P': + pagesize = (u_int32_t)atoi(optarg); + break; + case 'p': + prev = 1; + break; + case 'S': + skipdupwalk = 1; + break; + case 's': + sorted = 1; + break; + case 't': + switch (optarg[0]) { + case 'B': case 'b': + ts = "Btree"; + type = DB_BTREE; + break; + case 'H': case 'h': + if (optarg[1] == 'E' || optarg[1] == 'e') { +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + if (b_util_have_heap()) + return (0); + ts = "Heap"; + type = DB_HEAP; +#else + fprintf(stderr, + "b_curwalk: Heap is not supported! \n"); + return (EXIT_SUCCESS); +#endif + } else { + if (b_util_have_hash()) + return (0); + ts = "Hash"; + type = DB_HASH; + } + break; + case 'Q': case 'q': + if (b_util_have_queue()) + return (0); + ts = "Queue"; + type = DB_QUEUE; + break; + case 'R': case 'r': + ts = "Recno"; + type = DB_RECNO; + break; + default: + return (usage()); + } + break; + case 'w': + walkcount = atoi(optarg); + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + if (argc != 0) + return (usage()); + + /* + * Queue and Recno don't support duplicates. + */ +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + if (dupcount != 0 && + (type == DB_QUEUE || type == DB_RECNO || type == DB_HEAP)) { + fprintf(stderr, + "b_curwalk: Queue, Recno and Heap don't support duplicates\n"); + return (usage()); + } +#else + if (dupcount != 0 && (type == DB_QUEUE || type == DB_RECNO)) { + fprintf(stderr, + "b_curwalk: Queue and Recno don't support duplicates\n"); + return (usage()); + } +#endif + +#if DB_VERSION_MAJOR < 3 || DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR == 0 +#define DB_PREV_NODUP 0 + /* + * DB_PREV_NODUP wasn't available until after 3.0.55. + * + * For some reason, testing sorted duplicates doesn't work either. + * I don't really care about 3.0.55 any more, just ignore it. + */ + return (0); +#endif + /* Create the database. */ + DB_BENCH_ASSERT(db_create(&dbp, NULL, 0) == 0); + DB_BENCH_ASSERT(dbp->set_cachesize(dbp, 0, cachesize, 0) == 0); + DB_BENCH_ASSERT(dbp->set_pagesize(dbp, pagesize) == 0); + dbp->set_errfile(dbp, stderr); + + /* Set record length for Queue. */ + if (type == DB_QUEUE) + DB_BENCH_ASSERT(dbp->set_re_len(dbp, 20) == 0); + + /* Set duplicates flag. */ + if (dupcount != 0) + DB_BENCH_ASSERT( + dbp->set_flags(dbp, sorted ? DB_DUPSORT : DB_DUP) == 0); + +#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) + DB_BENCH_ASSERT(dbp->open( + dbp, NULL, TESTFILE, NULL, type, DB_CREATE, 0666) == 0); +#else + DB_BENCH_ASSERT(dbp->open( + dbp, TESTFILE, NULL, type, DB_CREATE, 0666) == 0); +#endif + + /* Initialize the data. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + + /* Insert count in-order key/data pairs. */ + data.data = dbuf; + data.size = 20; + if (type == DB_BTREE || type == DB_HASH) { + key.size = 10; + key.data = kbuf; + for (i = 0; i < count; ++i) { + (void)snprintf(kbuf, sizeof(kbuf), "%010d", i); + for (j = 0; j <= dupcount; ++j) { + (void)snprintf(dbuf, sizeof(dbuf), "%020d", j); + DB_BENCH_ASSERT( + dbp->put(dbp, NULL, &key, &data, 0) == 0); + } + } +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + } else if (type == DB_HEAP) { + key.data = &rid; + key.size = sizeof(rid); + for (i = 0; i < count; ++i) + DB_BENCH_ASSERT( + dbp->put(dbp, NULL, &key, &data, DB_APPEND) == 0); +#endif + } else { + key.data = &recno; + key.size = sizeof(recno); + for (i = 0, recno = 1; i < count; ++i, ++recno) + DB_BENCH_ASSERT( + dbp->put(dbp, NULL, &key, &data, 0) == 0); + } + + walkflags = prev ? + (skipdupwalk ? DB_PREV_NODUP : DB_PREV) : + (skipdupwalk ? DB_NEXT_NODUP : DB_NEXT); + + /* Walk the cursor through the tree N times. */ + TIMER_START; + for (i = 0; i < walkcount; ++i) { + DB_BENCH_ASSERT(dbp->cursor(dbp, NULL, &dbc, 0) == 0); + while ((ret = dbc->c_get(dbc, &key, &data, walkflags)) == 0) + ; + DB_BENCH_ASSERT(ret == DB_NOTFOUND); + DB_BENCH_ASSERT(dbc->c_close(dbc) == 0); + } + TIMER_STOP; + + printf("# %d %s %s cursor of %d 10/20 byte key/data items", + walkcount, ts, prev ? + (skipdupwalk ? "DB_PREV_NODUP" : "DB_PREV") : + (skipdupwalk ? "DB_NEXT_NODUP" : "DB_NEXT"), + count); + if (dupcount != 0) + printf(" with %d dups", dupcount); + printf("\n"); + + /* + * An "operation" is traversal of a single key/data pair -- not a + * return of the key/data pair, since some versions of this test + * skip duplicate key/data pairs. + * + * Use a "double" so we don't overflow. + */ + TIMER_DISPLAY((double)count * walkcount); + + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); + + return (EXIT_SUCCESS); +} + +static int +usage() +{ + (void)fprintf(stderr, "%s\n\t%s\n", + "usage: b_curwalk [-pSs] [-C cachesz]", + "[-c cnt] [-d dupcnt] [-P pagesz] [-t type] [-w walkcnt]"); + return (EXIT_FAILURE); +} diff --git a/test/micro/source/b_del.c b/test/micro/source/b_del.c new file mode 100644 index 00000000..b526e631 --- /dev/null +++ b/test/micro/source/b_del.c @@ -0,0 +1,202 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include "bench.h" + +static int usage(void); + +int +b_del(int argc, char *argv[]) +{ + extern char *optarg; + extern int optind; + DB *dbp; + DBC *dbc; + DBT key, data; + DBTYPE type; +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + DB_HEAP_RID rid; +#endif + db_recno_t recno; + u_int32_t cachesize; + int ch, i, count, ret, use_cursor; + char *ts, buf[32]; + + type = DB_BTREE; + cachesize = MEGABYTE; + count = 100000; + use_cursor = 0; + ts = "Btree"; + while ((ch = getopt(argc, argv, "C:c:t:w")) != EOF) + switch (ch) { + case 'C': + cachesize = (u_int32_t)atoi(optarg); + break; + case 'c': + count = atoi(optarg); + break; + case 't': + switch (optarg[0]) { + case 'B': case 'b': + ts = "Btree"; + type = DB_BTREE; + break; + case 'H': case 'h': + if (optarg[1] == 'E' || optarg[1] == 'e') { +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + if (b_util_have_heap()) + return (0); + ts = "Heap"; + type = DB_HEAP; +#else + fprintf(stderr, + "b_curwalk: Heap is not supported! \n"); + return (EXIT_SUCCESS); +#endif + } else { + if (b_util_have_hash()) + return (0); + ts = "Hash"; + type = DB_HASH; + } + break; + case 'Q': case 'q': + if (b_util_have_queue()) + return (0); + ts = "Queue"; + type = DB_QUEUE; + break; + case 'R': case 'r': + ts = "Recno"; + type = DB_RECNO; + break; + default: + return (usage()); + } + break; + case 'w': + use_cursor = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + if (argc != 0) + return (usage()); + + /* Create the database. */ + DB_BENCH_ASSERT(db_create(&dbp, NULL, 0) == 0); + DB_BENCH_ASSERT(dbp->set_cachesize(dbp, 0, cachesize, 0) == 0); + dbp->set_errfile(dbp, stderr); + +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + /* Need a cursor if using Heap. */ + if (type == DB_HEAP && !use_cursor) { + printf("Heap databases require the -w flag.\n"); + return (-1); + } +#endif + + /* Set record length for Queue. */ + if (type == DB_QUEUE) + DB_BENCH_ASSERT(dbp->set_re_len(dbp, 20) == 0); + +#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) + DB_BENCH_ASSERT( + dbp->open(dbp, NULL, TESTFILE, NULL, type, DB_CREATE, 0666) == 0); +#else + DB_BENCH_ASSERT( + dbp->open(dbp, TESTFILE, NULL, type, DB_CREATE, 0666) == 0); +#endif + + /* Initialize the data. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + data.data = "01234567890123456789"; + data.size = 20; + + /* Store a key/data pair. */ + switch (type) { + case DB_BTREE: + case DB_HASH: + key.data = buf; + key.size = 10; + break; +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + case DB_HEAP: + key.data = &rid; + key.size = sizeof(rid); + break; +#endif + case DB_QUEUE: + case DB_RECNO: + key.data = &recno; + key.size = sizeof(recno); + break; + case DB_UNKNOWN: + b_util_abort(); + break; + } + + /* Insert count in-order key/data pairs. */ + if (type == DB_BTREE || type == DB_HASH) + for (i = 0; i < count; ++i) { + (void)snprintf(buf, sizeof(buf), "%010d", i); + DB_BENCH_ASSERT( + dbp->put(dbp, NULL, &key, &data, 0) == 0); + } +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + else if (type == DB_HEAP) + for (i = 0; i < count; i++) + DB_BENCH_ASSERT( + dbp->put(dbp, NULL, &key, &data, DB_APPEND) == 0); +#endif + else + for (i = 0, recno = 1; i < count; ++i, ++recno) + DB_BENCH_ASSERT( + dbp->put(dbp, NULL, &key, &data, 0) == 0); + + /* Delete the records. */ + TIMER_START; + if (use_cursor) { + DB_BENCH_ASSERT(dbp->cursor(dbp, NULL, &dbc, 0) == 0); + while ((ret = dbc->c_get(dbc, &key, &data, DB_NEXT)) == 0) + DB_BENCH_ASSERT(dbc->c_del(dbc, 0) == 0); + DB_BENCH_ASSERT (ret == DB_NOTFOUND); + } else + if (type == DB_BTREE || type == DB_HASH) + for (i = 0; i < count; ++i) { + (void)snprintf(buf, sizeof(buf), "%010d", i); + DB_BENCH_ASSERT( + dbp->del(dbp, NULL, &key, 0) == 0); + } + else + for (i = 0, recno = 1; i < count; ++i, ++recno) + DB_BENCH_ASSERT( + dbp->del(dbp, NULL, &key, 0) == 0); + + TIMER_STOP; + + printf( + "# %d %s database in-order delete of 10/20 byte key/data pairs using %s\n", + count, ts, use_cursor ? "a cursor" : "the key"); + TIMER_DISPLAY(count); + + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); + + return (0); +} + +static int +usage() +{ + (void)fprintf(stderr, + "usage: b_del [-w] [-C cachesz] [-c count] [-t type]\n"); + return (EXIT_FAILURE); +} diff --git a/test/micro/source/b_get.c b/test/micro/source/b_get.c new file mode 100644 index 00000000..30bea8d8 --- /dev/null +++ b/test/micro/source/b_get.c @@ -0,0 +1,188 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include "bench.h" + +static int usage(void); + +u_int32_t part_callback(dbp, dbt) + DB *dbp; + DBT *dbt; +{ + extern u_int32_t __ham_func2(DB *, const void *, u_int32_t); + return (__ham_func2(dbp, dbt->data, dbt->size)); +} + +int +b_get(int argc, char *argv[]) +{ + extern char *optarg; + extern int optind; + DB *dbp; + DBTYPE type; + DBT key, data; +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + DB_HEAP_RID rid; +#endif + db_recno_t recno; + u_int32_t cachesize; + int ch, i, count; + char *ts; + + type = DB_BTREE; + cachesize = MEGABYTE; + count = 100000; + ts = "Btree"; + while ((ch = getopt(argc, argv, "C:c:t:")) != EOF) + switch (ch) { + case 'C': + cachesize = (u_int32_t)atoi(optarg); + break; + case 'c': + count = atoi(optarg); + break; + case 't': + switch (optarg[0]) { + case 'B': case 'b': + ts = "Btree"; + type = DB_BTREE; + break; + case 'H': case 'h': + if (optarg[1] == 'E' || optarg[1] == 'e') { +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + if (b_util_have_heap()) + return (0); + ts = "Heap"; + type = DB_HEAP; +#else + fprintf(stderr, + "b_curwalk: Heap is not supported! \n"); + return (EXIT_SUCCESS); +#endif + } else { + if (b_util_have_hash()) + return (0); + ts = "Hash"; + type = DB_HASH; + } + break; + case 'Q': case 'q': + if (b_util_have_queue()) + return (0); + ts = "Queue"; + type = DB_QUEUE; + break; + case 'R': case 'r': + ts = "Recno"; + type = DB_RECNO; + break; + default: + return (usage()); + } + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + if (argc != 0) + return (usage()); + + /* Create the database. */ + DB_BENCH_ASSERT(db_create(&dbp, NULL, 0) == 0); + DB_BENCH_ASSERT(dbp->set_cachesize(dbp, 0, cachesize, 0) == 0); + dbp->set_errfile(dbp, stderr); + + /* Set record length for Queue. */ + if (type == DB_QUEUE) + DB_BENCH_ASSERT(dbp->set_re_len(dbp, 10) == 0); +#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR == 7 && DB_VERSION_PATCH == 30 + if (type == DB_BTREE) { + DBT keys[3]; + + memset(keys, 0, sizeof(keys)); + keys[0].data = "a"; + keys[0].size = 1; + keys[1].data = "b"; + keys[1].size = 1; + keys[2].data = "c"; + keys[2].size = 1; + + DB_BENCH_ASSERT( + dbp->set_partition_keys(dbp, 4, keys, NULL) == 0); + } + + if (type == DB_HASH) { + DB_BENCH_ASSERT( + dbp->set_partition_callback(dbp, 4, part_callback) == 0); + } +#endif + +#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) + DB_BENCH_ASSERT( + dbp->open(dbp, NULL, TESTFILE, NULL, type, DB_CREATE, 0666) == 0); +#else + DB_BENCH_ASSERT( + dbp->open(dbp, TESTFILE, NULL, type, DB_CREATE, 0666) == 0); +#endif + + /* Store a key/data pair. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + switch (type) { + case DB_BTREE: + case DB_HASH: + key.data = "aaaaa"; + key.size = 5; + break; +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + case DB_HEAP: + key.data = &rid; + key.size = sizeof(rid); + break; +#endif + case DB_QUEUE: + case DB_RECNO: + recno = 1; + key.data = &recno; + key.size = sizeof(recno); + break; + case DB_UNKNOWN: + b_util_abort(); + break; + } + data.data = "bbbbb"; + data.size = 5; + +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + DB_BENCH_ASSERT(dbp->put( + dbp, NULL, &key, &data, type == DB_HEAP ? DB_APPEND : 0) == 0); +#else + DB_BENCH_ASSERT(dbp->put(dbp, NULL, &key, &data, 0) == 0); +#endif + /* Retrieve the key/data pair count times. */ + TIMER_START; + for (i = 0; i < count; ++i) + DB_BENCH_ASSERT(dbp->get(dbp, NULL, &key, &data, 0) == 0); + TIMER_STOP; + + printf("# %d %s database get of cached key/data item\n", count, ts); + TIMER_DISPLAY(count); + + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); + + return (0); +} + +static int +usage() +{ + (void)fprintf(stderr, + "usage: b_get [-C cachesz] [-c count] [-t type]\n"); + return (EXIT_FAILURE); +} diff --git a/test/micro/source/b_inmem.c b/test/micro/source/b_inmem.c new file mode 100644 index 00000000..31665298 --- /dev/null +++ b/test/micro/source/b_inmem.c @@ -0,0 +1,426 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "bench.h" + +#if DB_VERSION_MAJOR > 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR > 0 +/* + * The in-memory tests don't run on early releases of Berkeley DB. + */ +#undef MEGABYTE +#define MEGABYTE (1024 * 1024) + +u_int32_t bulkbufsize = 4 * MEGABYTE; +u_int32_t cachesize = 32 * MEGABYTE; +u_int32_t datasize = 32; +u_int32_t keysize = 8; +u_int32_t logbufsize = 8 * MEGABYTE; +u_int32_t numitems; +u_int32_t pagesize = 32 * 1024; + +FILE *fp; + +static void op_ds __P((u_int, int)); +static void op_ds_bulk __P((u_int, u_int *)); +static void op_tds __P((u_int, int, u_int32_t, u_int32_t)); +static int usage __P((void)); + +static void +op_ds(u_int ops, int update) +{ + DB_ENV *dbenv; + char *letters = "abcdefghijklmnopqrstuvwxuz"; + DB *dbp; + DBT key, data; + char *keybuf, *databuf; + DB_MPOOL_STAT *gsp; + + DB_BENCH_ASSERT((keybuf = malloc(keysize)) != NULL); + DB_BENCH_ASSERT((databuf = malloc(datasize)) != NULL); + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.data = keybuf; + key.size = keysize; + memset(keybuf, 'a', keysize); + + data.data = databuf; + data.size = datasize; + memset(databuf, 'b', datasize); + + DB_BENCH_ASSERT(db_create(&dbp, NULL, 0) == 0); + dbenv = dbp->dbenv; + dbp->set_errfile(dbp, stderr); + + DB_BENCH_ASSERT(dbp->set_pagesize(dbp, pagesize) == 0); + DB_BENCH_ASSERT(dbp->open( + dbp, NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0666) == 0); + + (void)dbenv->memp_stat(dbenv, &gsp, NULL, DB_STAT_CLEAR); + + if (update) { + TIMER_START; + for (; ops > 0; --ops) { + keybuf[(ops % keysize)] = letters[(ops % 26)]; + DB_BENCH_ASSERT( + dbp->put(dbp, NULL, &key, &data, 0) == 0); + } + TIMER_STOP; + } else { + DB_BENCH_ASSERT(dbp->put(dbp, NULL, &key, &data, 0) == 0); + TIMER_START; + for (; ops > 0; --ops) + DB_BENCH_ASSERT( + dbp->get(dbp, NULL, &key, &data, 0) == 0); + TIMER_STOP; + } + + if (dbenv->memp_stat(dbenv, &gsp, NULL, 0) == 0) + DB_BENCH_ASSERT(gsp->st_cache_miss == 0); + + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); +} + +static void +op_ds_bulk(u_int ops, u_int *totalp) +{ + DB_ENV *dbenv; + DB *dbp; + DBC *dbc; + DBT key, data; + u_int32_t len, klen; + u_int i, total; + char *keybuf, *databuf; + void *pointer, *dp, *kp; + DB_MPOOL_STAT *gsp; + + DB_BENCH_ASSERT((keybuf = malloc(keysize)) != NULL); + DB_BENCH_ASSERT((databuf = malloc(bulkbufsize)) != NULL); + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.data = keybuf; + key.size = keysize; + + data.data = databuf; + data.size = datasize; + memset(databuf, 'b', datasize); + + DB_BENCH_ASSERT(db_create(&dbp, NULL, 0) == 0); + dbenv = dbp->dbenv; + dbp->set_errfile(dbp, stderr); + + DB_BENCH_ASSERT(dbp->set_pagesize(dbp, pagesize) == 0); + DB_BENCH_ASSERT(dbp->set_cachesize(dbp, 0, cachesize, 1) == 0); + DB_BENCH_ASSERT( + dbp->open(dbp, NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0666) == 0); + + for (i = 1; i <= numitems; ++i) { + (void)snprintf(keybuf, keysize, "%7d", i); + DB_BENCH_ASSERT(dbp->put(dbp, NULL, &key, &data, 0) == 0); + } + +#if 0 + fp = fopen("before", "w"); + dbp->set_msgfile(dbp, fp); + DB_BENCH_ASSERT (dbp->stat_print(dbp, DB_STAT_ALL) == 0); +#endif + + DB_BENCH_ASSERT(dbp->cursor(dbp, NULL, &dbc, 0) == 0); + + data.ulen = bulkbufsize; + data.flags = DB_DBT_USERMEM; + + (void)dbenv->memp_stat(dbenv, &gsp, NULL, DB_STAT_CLEAR); + + TIMER_START; + for (total = 0; ops > 0; --ops) { + DB_BENCH_ASSERT(dbc->c_get( + dbc, &key, &data, DB_FIRST | DB_MULTIPLE_KEY) == 0); + DB_MULTIPLE_INIT(pointer, &data); + while (pointer != NULL) { + DB_MULTIPLE_KEY_NEXT(pointer, &data, kp, klen, dp, len); + if (kp != NULL) + ++total; + } + } + TIMER_STOP; + *totalp = total; + + if (dbenv->memp_stat(dbenv, &gsp, NULL, 0) == 0) + DB_BENCH_ASSERT(gsp->st_cache_miss == 0); + +#if 0 + fp = fopen("before", "w"); + dbp->set_msgfile(dbp, fp); + DB_BENCH_ASSERT (dbp->stat_print(dbp, DB_STAT_ALL) == 0); +#endif + + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); + + COMPQUIET(dp, NULL); + COMPQUIET(klen, 0); + COMPQUIET(len, 0); +} + +static void +op_tds(u_int ops, int update, u_int32_t env_flags, u_int32_t log_flags) +{ + DB *dbp; + DBT key, data; + DB_ENV *dbenv; + DB_MPOOL_STAT *gsp; + DB_TXN *txn; + char *keybuf, *databuf; + + DB_BENCH_ASSERT((keybuf = malloc(keysize)) != NULL); + DB_BENCH_ASSERT((databuf = malloc(datasize)) != NULL); + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.data = keybuf; + key.size = keysize; + memset(keybuf, 'a', keysize); + + data.data = databuf; + data.size = datasize; + memset(databuf, 'b', datasize); + + DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0); + + dbenv->set_errfile(dbenv, stderr); + + /* General environment configuration. */ +#ifdef DB_AUTO_COMMIT + DB_BENCH_ASSERT(dbenv->set_flags(dbenv, DB_AUTO_COMMIT, 1) == 0); +#endif + if (env_flags != 0) + DB_BENCH_ASSERT(dbenv->set_flags(dbenv, env_flags, 1) == 0); + + /* Logging configuration. */ + if (log_flags != 0) +#if DB_VERSION_MINOR >= 7 || DB_VERSION_MAJOR > 4 + DB_BENCH_ASSERT( + dbenv->log_set_config(dbenv, log_flags, 1) == 0); +#else + DB_BENCH_ASSERT(dbenv->set_flags(dbenv, log_flags, 1) == 0); +#endif +#ifdef DB_LOG_INMEMORY + if (!(log_flags & DB_LOG_INMEMORY)) +#endif +#ifdef DB_LOG_IN_MEMORY + if (!(log_flags & DB_LOG_IN_MEMORY)) +#endif + DB_BENCH_ASSERT(dbenv->set_lg_max(dbenv, logbufsize * 10) == 0); + DB_BENCH_ASSERT(dbenv->set_lg_bsize(dbenv, logbufsize) == 0); + + DB_BENCH_ASSERT(dbenv->open(dbenv, "TESTDIR", + DB_CREATE | DB_PRIVATE | DB_INIT_LOCK | + DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN, 0666) == 0); + + DB_BENCH_ASSERT(db_create(&dbp, dbenv, 0) == 0); + DB_BENCH_ASSERT(dbp->set_pagesize(dbp, pagesize) == 0); + DB_BENCH_ASSERT(dbp->open( + dbp, NULL, TESTFILE, NULL, DB_BTREE, DB_CREATE, 0666) == 0); + + if (update) { + (void)dbenv->memp_stat(dbenv, &gsp, NULL, DB_STAT_CLEAR); + + TIMER_START; + for (; ops > 0; --ops) + DB_BENCH_ASSERT( + dbp->put(dbp, NULL, &key, &data, 0) == 0); + TIMER_STOP; + + if (dbenv->memp_stat(dbenv, &gsp, NULL, 0) == 0) + DB_BENCH_ASSERT(gsp->st_page_out == 0); + } else { + DB_BENCH_ASSERT(dbp->put(dbp, NULL, &key, &data, 0) == 0); + (void)dbenv->memp_stat(dbenv, &gsp, NULL, DB_STAT_CLEAR); + + TIMER_START; + for (; ops > 0; --ops) { + DB_BENCH_ASSERT( + dbenv->txn_begin(dbenv, NULL, &txn, 0) == 0); + DB_BENCH_ASSERT( + dbp->get(dbp, NULL, &key, &data, 0) == 0); + DB_BENCH_ASSERT(txn->commit(txn, 0) == 0); + } + TIMER_STOP; + + if (dbenv->memp_stat(dbenv, &gsp, NULL, 0) == 0) + DB_BENCH_ASSERT(gsp->st_cache_miss == 0); + } + + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); + DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0); +} + +#define DEFAULT_OPS 1000000 + +int +b_inmem(int argc, char *argv[]) +{ + extern char *optarg; + extern int optind; + u_int ops, total; + int ch; + + if ((progname = strrchr(argv[0], '/')) == NULL) + progname = argv[0]; + else + ++progname; + + ops = 0; + while ((ch = getopt(argc, argv, "b:C:d:k:l:o:P:")) != EOF) + switch (ch) { + case 'b': + bulkbufsize = (u_int32_t)atoi(optarg); + break; + case 'C': + cachesize = (u_int32_t)atoi(optarg); + break; + case 'd': + datasize = (u_int)atoi(optarg); + break; + case 'k': + keysize = (u_int)atoi(optarg); + break; + case 'l': + logbufsize = (u_int32_t)atoi(optarg); + break; + case 'o': + ops = (u_int)atoi(optarg); + break; + case 'P': + pagesize = (u_int32_t)atoi(optarg); + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc != 1) + return (usage()); + + numitems = (cachesize / (keysize + datasize - 1)) / 2; + + if (strcasecmp(argv[0], "read") == 0) { + if (ops == 0) + ops = DEFAULT_OPS; + op_ds(ops, 0); + printf( + "# %u in-memory Btree database reads of %u/%u byte key/data pairs\n", + ops, keysize, datasize); + } else if (strcasecmp(argv[0], "bulk") == 0) { + if (keysize < 8) { + fprintf(stderr, + "%s: bulk read requires a key size >= 10\n", progname); + return (EXIT_FAILURE); + } + /* + * The ops value is the number of bulk operations, not key get + * operations. Reduce the value so the test doesn't take so + * long, and use the returned number of retrievals as the ops + * value for timing purposes. + */ + if (ops == 0) + ops = 100000; + op_ds_bulk(ops, &total); + ops = total; + printf( + "# %u bulk in-memory Btree database reads of %u/%u byte key/data pairs\n", + ops, keysize, datasize); + } else if (strcasecmp(argv[0], "write") == 0) { + if (ops == 0) + ops = DEFAULT_OPS; + op_ds(ops, 1); + printf( + "# %u in-memory Btree database writes of %u/%u byte key/data pairs\n", + ops, keysize, datasize); + } else if (strcasecmp(argv[0], "txn-read") == 0) { + if (ops == 0) + ops = DEFAULT_OPS; + op_tds(ops, 0, 0, 0); + printf( + "# %u transactional in-memory Btree database reads of %u/%u %s", + ops, keysize, datasize, "byte key/data pairs\n"); + } else if (strcasecmp(argv[0], "txn-write") == 0) { + if (ops == 0) + ops = DEFAULT_OPS; +#if defined(DB_LOG_INMEMORY) || defined(DB_LOG_IN_MEMORY) +#if defined(DB_LOG_INMEMORY) + op_tds(ops, 1, 0, DB_LOG_INMEMORY); +#else + op_tds(ops, 1, 0, DB_LOG_IN_MEMORY); +#endif + printf( + "# %u transactional in-memory logging Btree database writes of %u/%u%s", + ops, keysize, datasize, " byte key/data pairs\n"); +#else + return (EXIT_SUCCESS); +#endif + } else if (strcasecmp(argv[0], "txn-nosync") == 0) { + if (ops == 0) + ops = DEFAULT_OPS; + op_tds(ops, 1, DB_TXN_NOSYNC, 0); + printf( + "# %u transactional nosync logging Btree database writes of %u/%u %s", + ops, keysize, datasize, "byte key/data pairs\n"); + } else if (strcasecmp(argv[0], "txn-write-nosync") == 0) { + if (ops == 0) + ops = DEFAULT_OPS; +#ifdef DB_TXN_WRITE_NOSYNC + op_tds(ops, 1, DB_TXN_WRITE_NOSYNC, 0); + printf( + "# %u transactional OS-write/nosync logging Btree database writes of %u/%u%s", + ops, keysize, datasize, " byte key/data pairs\n"); +#else + return (EXIT_SUCCESS); +#endif + } else if (strcasecmp(argv[0], "txn-sync") == 0) { + /* + * Flushing to disk takes a long time, reduce the number of + * default ops. + */ + if (ops == 0) + ops = 100000; + op_tds(ops, 1, 0, 0); + printf( + "# %u transactional logging Btree database writes of %u/%u %s", + ops, keysize, datasize, "byte key/data pairs\n"); + } else { + fprintf(stderr, "%s: unknown keyword %s\n", progname, argv[0]); + return (EXIT_FAILURE); + } + + TIMER_DISPLAY(ops); + return (EXIT_SUCCESS); +} + +static int +usage() +{ + fprintf(stderr, "usage: %s %s%s%s%s", + progname, "[-b bulkbufsz] [-C cachesz]\n\t", + "[-d datasize] [-k keysize] [-l logbufsz] [-o ops] [-P pagesz]\n\t", + "[read | bulk | write | txn-read |\n\t", + "txn-write | txn-nosync | txn-write-nosync | txn-sync]\n"); + return (EXIT_FAILURE); +} +#else +int +b_inmem(int argc, char *argv[]) +{ + COMPQUIET(argc, 0); + COMPQUIET(argv, NULL); + return (0); +} +#endif diff --git a/test/micro/source/b_latch.c b/test/micro/source/b_latch.c new file mode 100644 index 00000000..b9dd27d9 --- /dev/null +++ b/test/micro/source/b_latch.c @@ -0,0 +1,199 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "bench.h" + +#ifdef _POSIX_THREADS +typedef struct { + pthread_t id; + DB_ENV *dbenv; + int iterations; + db_mutex_t mutex; + int contentions; +} threadinfo_t; + +static void *latch_threadmain __P((void *)); +#endif + +static int time_latches __P((DB_ENV *, db_mutex_t, int)); + +#define LATCH_THREADS_MAX 100 + +/* Return the environment needed for __mutex_lock(), depending on release. + */ +#if DB_VERSION_MAJOR <4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 7 +#define ENV_ARG(dbenv) (dbenv) +#else +#define ENV_ARG(dbenv) ((dbenv)->env) +#endif + +/* + * In the mulithreaded latch test each thread locks and updates this variable. + * It detects contention when the value of this counter changes during the + * mutex lock call. + */ +static int CurrentCounter = 0; +static int latch_usage __P((void)); + +static int +latch_usage() +{ + (void)fprintf(stderr, "usage: b_latch [-c number of %s", + "lock+unlock pairs] [-n number of threads]\n"); + return (EXIT_FAILURE); +} + +/* + * time_latches -- + * Repeat acquire and release of an exclusive latch, counting the + * number of times that 'someone else' got it just as we tried to. + */ +static int time_latches(dbenv, mutex, iterations) + DB_ENV *dbenv; + db_mutex_t mutex; + int iterations; +{ + int contended, i, previous; + + contended = 0; + for (i = 0; i < iterations; ++i) { + previous = CurrentCounter; + DB_BENCH_ASSERT(__mutex_lock(ENV_ARG(dbenv), mutex) == 0); + if (previous != CurrentCounter) + contended++; + CurrentCounter++; + DB_BENCH_ASSERT(__mutex_unlock(ENV_ARG(dbenv), mutex) == 0); + } + return (contended); +} + +#ifdef _POSIX_THREADS +/* + * latch_threadmain -- + * Entry point for multithreaded latching test. + * + * Currently only supported for POSIX threads. + */ +static void * +latch_threadmain(arg) + void *arg; +{ + threadinfo_t *info = arg; + + info->contentions = time_latches(info->dbenv, + info->mutex, info->iterations); + + return ((void *) 0); +} +#endif + +/* + * b_latch -- + * Measure the speed of latching and mutex operations. + * + * + */ +int +b_latch(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB_ENV *dbenv; + int ch, count, nthreads; +#ifdef _POSIX_THREADS + threadinfo_t threads[LATCH_THREADS_MAX]; + int i, ret; + void *status; +#endif + db_mutex_t mutex; + int contended; + + contended = 0; + count = 1000000; + nthreads = 0; /* Default to running the test without extra threads */ + while ((ch = getopt(argc, argv, "c:n:")) != EOF) + switch (ch) { + case 'c': + count = atoi(optarg); + break; + case 'n': + nthreads = atoi(optarg); + break; + case '?': + default: + return (latch_usage()); + } + argc -= optind; + argv += optind; + if (argc != 0 || count < 1 || nthreads < 0 || + nthreads > LATCH_THREADS_MAX) + return (latch_usage()); +#ifndef _POSIX_THREADS + if (nthreads > 1) { + (void)fprintf(stderr, + "Sorry, support for -n %d: threads not yet available\n", + nthreads); + exit(EXIT_FAILURE); + } +#endif + + /* Create the environment. */ + DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0); + dbenv->set_errfile(dbenv, stderr); +#if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR < 1 + DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, + NULL, DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN | DB_PRIVATE, 0666) == 0); +#else + DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, + DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN | DB_PRIVATE | DB_THREAD, 0666) == 0); +#endif + DB_BENCH_ASSERT(dbenv->mutex_alloc(dbenv, DB_MUTEX_SELF_BLOCK, + &mutex) == 0); +#ifdef _POSIX_THREADS + for (i = 0; i < nthreads; i++) { + threads[i].dbenv = dbenv; + threads[i].mutex = mutex; + threads[i].iterations = + nthreads <= 1 ? count : count / nthreads; + } +#endif + + /* Start and acquire and release a mutex count times. If there's + * posix support and a non-zero number of threads start them. + */ + TIMER_START; +#ifdef _POSIX_THREADS + if (nthreads > 0) { + for (i = 0; i < nthreads; i++) + DB_BENCH_ASSERT(pthread_create(&threads[i].id, + NULL, latch_threadmain, &threads[i]) == 0); + for (i = 0; i < nthreads; i++) { + ret = pthread_join(threads[i].id, &status); + DB_BENCH_ASSERT(ret == 0); + contended += threads[i].contentions; + } + + } else +#endif + contended = time_latches(dbenv, mutex, count); + TIMER_STOP; + + printf("# %d mutex lock-unlock pairs of %d thread%s\n", count, + nthreads, nthreads == 1 ? "" : "s"); + TIMER_DISPLAY(count); + + DB_BENCH_ASSERT(dbenv->mutex_free(dbenv, mutex) == 0); + DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0); + COMPQUIET(contended, 0); + + return (0); +} diff --git a/test/micro/source/b_load.c b/test/micro/source/b_load.c new file mode 100644 index 00000000..0f920a5b --- /dev/null +++ b/test/micro/source/b_load.c @@ -0,0 +1,197 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include "bench.h" + +static int usage(void); + +int +b_load(int argc, char *argv[]) +{ + extern char *optarg; + extern int optind; + DB *dbp; + DBTYPE type; + DBT key, data; +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + DB_HEAP_RID rid; +#endif + db_recno_t recno; + u_int32_t cachesize; + int ch, i, count, duplicate; + char *ts, buf[32]; + + type = DB_BTREE; + cachesize = MEGABYTE; + count = 100000; + duplicate = 0; + ts = "Btree"; + while ((ch = getopt(argc, argv, "C:c:dt:")) != EOF) + switch (ch) { + case 'C': + cachesize = (u_int32_t)atoi(optarg); + break; + case 'c': + count = atoi(optarg); + break; + case 'd': + duplicate = 1; + break; + case 't': + switch (optarg[0]) { + case 'B': case 'b': + ts = "Btree"; + type = DB_BTREE; + break; + case 'H': case 'h': + if (optarg[1] == 'E' || optarg[1] == 'e') { +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + if (b_util_have_heap()) + return (0); + ts = "Heap"; + type = DB_HEAP; +#else + fprintf(stderr, + "b_curwalk: Heap is not supported! \n"); + return (EXIT_SUCCESS); +#endif + } else { + if (b_util_have_hash()) + return (0); + ts = "Hash"; + type = DB_HASH; + } + break; + case 'Q': case 'q': + if (b_util_have_queue()) + return (0); + ts = "Queue"; + type = DB_QUEUE; + break; + case 'R': case 'r': + ts = "Recno"; + type = DB_RECNO; + break; + default: + return (usage()); + } + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + if (argc != 0) + return (usage()); + + /* Usage. */ +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + if (duplicate && + (type == DB_QUEUE || type == DB_RECNO || type == DB_HEAP)) { + fprintf(stderr, + "b_load: Queue, Recno and Heap don't support duplicates\n"); + return (usage()); + } +#else + if (duplicate && (type == DB_QUEUE || type == DB_RECNO)) { + fprintf(stderr, + "b_load: Queue an Recno don't support duplicates\n"); + return (usage()); + } +#endif + +#if DB_VERSION_MAJOR < 3 || DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR < 1 + /* + * DB versions prior to 3.1.17 didn't have off-page duplicates, so + * this test can run forever. + */ + if (duplicate) + return (0); +#endif + + /* Create the database. */ + DB_BENCH_ASSERT(db_create(&dbp, NULL, 0) == 0); + DB_BENCH_ASSERT(dbp->set_cachesize(dbp, 0, cachesize, 0) == 0); + if (duplicate) + DB_BENCH_ASSERT(dbp->set_flags(dbp, DB_DUP) == 0); + dbp->set_errfile(dbp, stderr); + + /* Set record length for Queue. */ + if (type == DB_QUEUE) + DB_BENCH_ASSERT(dbp->set_re_len(dbp, 20) == 0); + +#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) + DB_BENCH_ASSERT( + dbp->open(dbp, NULL, TESTFILE, NULL, type, DB_CREATE, 0666) == 0); +#else + DB_BENCH_ASSERT( + dbp->open(dbp, TESTFILE, NULL, type, DB_CREATE, 0666) == 0); +#endif + + /* Initialize the data. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + + /* Insert count in-order key/data pairs. */ + TIMER_START; + if (duplicate) { + key.size = 10; + key.data = "01234567890123456789"; + data.data = buf; + data.size = 20; + for (i = 0; i < count; ++i) { + (void)snprintf(buf, sizeof(buf), "%020d", i); + DB_BENCH_ASSERT( + dbp->put(dbp, NULL, &key, &data, 0) == 0); + } + } else { + data.data = buf; + data.size = 20; + if (type == DB_BTREE || type == DB_HASH) { + key.size = 10; + key.data = buf; + for (i = 0; i < count; ++i) { + (void)snprintf(buf, sizeof(buf), "%010d", i); + DB_BENCH_ASSERT( + dbp->put(dbp, NULL, &key, &data, 0) == 0); + } +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + } else if (type == DB_HEAP) { + key.data = &rid; + key.size = sizeof(rid); + for (i = 0; i < count; ++i) + DB_BENCH_ASSERT(dbp->put(dbp, + NULL, &key, &data, DB_APPEND) == 0); +#endif + } else { + key.data = &recno; + key.size = sizeof(recno); + for (i = 0, recno = 1; i < count; ++i, ++recno) + DB_BENCH_ASSERT( + dbp->put(dbp, NULL, &key, &data, 0) == 0); + } + } + + TIMER_STOP; + + printf("# %d %s database in-order put of 10/20 byte key/data %sitems\n", + count, ts, duplicate ? "duplicate " : ""); + TIMER_DISPLAY(count); + + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); + + return (0); +} + +static int +usage() +{ + (void)fprintf(stderr, + "usage: b_load [-d] [-C cachesz] [-c count] [-t type]\n"); + return (EXIT_FAILURE); +} diff --git a/test/micro/source/b_open.c b/test/micro/source/b_open.c new file mode 100644 index 00000000..c8af0718 --- /dev/null +++ b/test/micro/source/b_open.c @@ -0,0 +1,157 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include "bench.h" + +static int usage(void); + +int +b_open(int argc, char *argv[]) +{ + extern char *optarg; + extern int optind; + DB_ENV *dbenv; + DB *dbp; + DBTYPE type; + int ch, i, count; + char *fname, *dbname, *ts; + + type = DB_BTREE; + count = 1000; + fname = dbname = NULL; + ts = "Btree"; + while ((ch = getopt(argc, argv, "c:dft:")) != EOF) + switch (ch) { + case 'c': + count = atoi(optarg); + break; + case 'd': + dbname = "dbname"; + break; + case 'f': + fname = "filename"; + break; + case 't': + switch (optarg[0]) { + case 'B': case 'b': + ts = "Btree"; + type = DB_BTREE; + break; + case 'H': case 'h': + if (optarg[1] == 'E' || optarg[1] == 'e') { +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + if (b_util_have_heap()) + return (0); + ts = "Heap"; + type = DB_HEAP; +#else + fprintf(stderr, + "b_curwalk: Heap is not supported! \n"); + return (EXIT_SUCCESS); +#endif + } else { + if (b_util_have_hash()) + return (0); + ts = "Hash"; + type = DB_HASH; + } + break; + case 'Q': case 'q': + if (b_util_have_queue()) + return (0); + ts = "Queue"; + type = DB_QUEUE; + break; + case 'R': case 'r': + ts = "Recno"; + type = DB_RECNO; + break; + default: + return (usage()); + } + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + if (argc != 0) + return (usage()); + +#if DB_VERSION_MAJOR < 4 + /* + * Don't run in-memory database tests on versions less than 3, it + * takes forever and eats memory. + */ + if (fname == NULL && dbname == NULL) + return (0); +#endif +#if DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 4 + /* + * Named in-memory databases weren't available until 4.4. + */ + if (fname == NULL && dbname != NULL) + return (0); +#endif + + /* Create the environment. */ + DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0); + dbenv->set_errfile(dbenv, stderr); +#if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR == 0 + DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, + NULL, DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE, 0666) == 0); +#else + DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, + DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE, 0666) == 0); +#endif + + /* Create the database. */ + DB_BENCH_ASSERT(db_create(&dbp, dbenv, 0) == 0); + +#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) + DB_BENCH_ASSERT(dbp->open( + dbp, NULL, fname, dbname, type, DB_CREATE, 0666) == 0); +#else + DB_BENCH_ASSERT(dbp->open( + dbp, fname, dbname, type, DB_CREATE, 0666) == 0); +#endif + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); + + /* Open the database count times. */ + TIMER_START; + for (i = 0; i < count; ++i) { + DB_BENCH_ASSERT(db_create(&dbp, dbenv, 0) == 0); +#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) + DB_BENCH_ASSERT(dbp->open( + dbp, NULL, fname, dbname, type, DB_CREATE, 0666) == 0); +#else + DB_BENCH_ASSERT(dbp->open( + dbp, fname, dbname, type, DB_CREATE, 0666) == 0); +#endif + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); + } + TIMER_STOP; + + printf("# %d %s %sdatabase open/close pairs\n", + count, ts, + fname == NULL ? + (dbname == NULL ? "in-memory " : "named in-memory ") : + (dbname == NULL ? "" : "sub-")); + TIMER_DISPLAY(count); + + DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0); + + return (0); +} + +static int +usage() +{ + (void)fprintf(stderr, "usage: b_open [-df] [-c count] [-t type]\n"); + return (EXIT_FAILURE); +} diff --git a/test/micro/source/b_put.c b/test/micro/source/b_put.c new file mode 100644 index 00000000..596bba42 --- /dev/null +++ b/test/micro/source/b_put.c @@ -0,0 +1,250 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include "bench.h" + +static int usage(void); +static int b_put_secondary(DB *, const DBT *, const DBT *, DBT *); + +int +b_put(int argc, char *argv[]) +{ + extern char *optarg; + extern int optind; + DB_ENV *dbenv; + DB *dbp, **second; + DBTYPE type; + DBT key, data; +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + DB_HEAP_RID rid; +#endif + db_recno_t recno; + u_int32_t cachesize, dsize; + int ch, i, count, secondaries; + char *ts, buf[64]; + + second = NULL; + type = DB_BTREE; + cachesize = MEGABYTE; + dsize = 20; + count = 100000; + secondaries = 0; + ts = "Btree"; + while ((ch = getopt(argc, argv, "C:c:d:s:t:")) != EOF) + switch (ch) { + case 'C': + cachesize = (u_int32_t)atoi(optarg); + break; + case 'c': + count = atoi(optarg); + break; + case 'd': + dsize = (u_int32_t)atoi(optarg); + break; + case 's': + secondaries = atoi(optarg); + break; + case 't': + switch (optarg[0]) { + case 'B': case 'b': + ts = "Btree"; + type = DB_BTREE; + break; + case 'H': case 'h': + if (optarg[1] == 'E' || optarg[1] == 'e') { +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + if (b_util_have_heap()) + return (0); + ts = "Heap"; + type = DB_HEAP; +#else + fprintf(stderr, + "b_curwalk: Heap is not supported! \n"); + return (EXIT_SUCCESS); +#endif + } else { + if (b_util_have_hash()) + return (0); + ts = "Hash"; + type = DB_HASH; + } + break; + case 'Q': case 'q': + if (b_util_have_queue()) + return (0); + ts = "Queue"; + type = DB_QUEUE; + break; + case 'R': case 'r': + ts = "Recno"; + type = DB_RECNO; + break; + default: + return (usage()); + } + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + if (argc != 0) + return (usage()); + +#if DB_VERSION_MAJOR < 3 || DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR < 3 + /* + * Secondaries were added after DB 3.2.9. + */ + if (secondaries) + return (0); +#endif + + /* Create the environment. */ + DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0); + dbenv->set_errfile(dbenv, stderr); + DB_BENCH_ASSERT(dbenv->set_cachesize(dbenv, 0, cachesize, 0) == 0); +#if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR < 1 + DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, + NULL, DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE, 0666) == 0); +#else + DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, + DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE, 0666) == 0); +#endif + + /* + * Create the database. + * Optionally set the record length for Queue. + */ + DB_BENCH_ASSERT(db_create(&dbp, dbenv, 0) == 0); + if (type == DB_QUEUE) + DB_BENCH_ASSERT(dbp->set_re_len(dbp, dsize) == 0); +#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) + DB_BENCH_ASSERT( + dbp->open(dbp, NULL, TESTFILE, NULL, type, DB_CREATE, 0666) == 0); +#else + DB_BENCH_ASSERT( + dbp->open(dbp, TESTFILE, NULL, type, DB_CREATE, 0666) == 0); +#endif + + /* Optionally create the secondaries. */ + if (secondaries != 0) { + DB_BENCH_ASSERT((second = + calloc(sizeof(DB *), (size_t)secondaries)) != NULL); + for (i = 0; i < secondaries; ++i) { + DB_BENCH_ASSERT(db_create(&second[i], dbenv, 0) == 0); + (void)snprintf(buf, sizeof(buf), "%d.db", i); +#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) + DB_BENCH_ASSERT(second[i]->open(second[i], NULL, + buf, NULL, DB_BTREE, DB_CREATE, 0600) == 0); +#else + DB_BENCH_ASSERT(second[i]->open(second[i], + buf, NULL, DB_BTREE, DB_CREATE, 0600) == 0); +#endif +#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) + /* + * The DB_TXN argument to Db.associate was added in + * 4.1.25. + */ + DB_BENCH_ASSERT(dbp->associate( + dbp, NULL, second[i], b_put_secondary, 0) == 0); +#else + DB_BENCH_ASSERT(dbp->associate( + dbp, second[i], b_put_secondary, 0) == 0); +#endif + } + } + + /* Store a key/data pair. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + switch (type) { + case DB_BTREE: + case DB_HASH: + key.data = "01234567890123456789"; + key.size = 20; + break; +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + case DB_HEAP: + key.data = &rid; + key.size = sizeof(rid); + break; +#endif + case DB_QUEUE: + case DB_RECNO: + recno = 1; + key.data = &recno; + key.size = sizeof(recno); + break; + case DB_UNKNOWN: + b_util_abort(); + break; + } + + data.size = dsize; + DB_BENCH_ASSERT( + (data.data = malloc((size_t)dsize)) != NULL); + + /* Store the key/data pair count times. */ + TIMER_START; + for (i = 0; i < count; ++i) { + /* Change data value so the secondaries are updated. */ + (void)snprintf(data.data, data.size, "%10lu", (u_long)i); +#if DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR >= 2) + DB_BENCH_ASSERT(dbp->put(dbp, + NULL, &key, &data, type == DB_HEAP ? DB_APPEND : 0) == 0); +#else + DB_BENCH_ASSERT(dbp->put(dbp, NULL, &key, &data, 0) == 0); +#endif + } + TIMER_STOP; + + if (type == DB_BTREE || type == DB_HASH) + printf( + "# %d %s database put of 10 byte key, %lu byte data", + count, ts, (u_long)dsize); + else + printf("# %d %s database put of key, %lu byte data", + count, ts, (u_long)dsize); + if (secondaries) + printf(" with %d secondaries", secondaries); + printf("\n"); + TIMER_DISPLAY(count); + + if (second != NULL) { + for (i = 0; i < secondaries; ++i) + DB_BENCH_ASSERT(second[i]->close(second[i], 0) == 0); + free(second); + } + + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); + DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0); + + return (0); +} + +static int +b_put_secondary(dbp, pkey, pdata, skey) + DB *dbp; + const DBT *pkey, *pdata; + DBT *skey; +{ + skey->data = pdata->data; + skey->size = pdata->size; + + COMPQUIET(dbp, NULL); + COMPQUIET(pkey, NULL); + return (0); +} + +static int +usage() +{ + (void)fprintf(stderr, "usage: b_put %s\n", + "[-C cachesz] [-c count] [-d bytes] [-s secondaries] [-t type]"); + return (EXIT_FAILURE); +} diff --git a/test/micro/source/b_recover.c b/test/micro/source/b_recover.c new file mode 100644 index 00000000..ca061d98 --- /dev/null +++ b/test/micro/source/b_recover.c @@ -0,0 +1,141 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include "bench.h" + +static int usage(void); + +int +b_recover(int argc, char *argv[]) +{ + extern char *optarg; + extern int optind; + DB *dbp; + DBT key, data; + DB_ENV *dbenv; + DB_TXN *txn; + u_int32_t cachesize; + int ch, i, count; + + /* + * Recover was too slow before release 4.0 that it's not worth + * running the test. + */ +#if DB_VERSION_MAJOR < 4 + return (0); +#endif + cachesize = MEGABYTE; + count = 1000; + while ((ch = getopt(argc, argv, "C:c:")) != EOF) + switch (ch) { + case 'C': + cachesize = (u_int32_t)atoi(optarg); + break; + case 'c': + count = atoi(optarg); + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + if (argc != 0) + return (usage()); + + /* Create the environment. */ + DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0); + dbenv->set_errfile(dbenv, stderr); + DB_BENCH_ASSERT(dbenv->set_cachesize(dbenv, 0, cachesize, 0) == 0); + +#define OFLAGS \ + (DB_CREATE | DB_INIT_LOCK | \ + DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_PRIVATE) +#if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR == 0 + DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, NULL, OFLAGS, 0666) == 0); +#endif +#if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR == 1 + DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, OFLAGS, 0666) == 0); +#endif +#if DB_VERSION_MAJOR > 3 || DB_VERSION_MINOR > 1 + DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, OFLAGS, 0666) == 0); +#endif + + /* Create the database. */ + DB_BENCH_ASSERT(db_create(&dbp, dbenv, 0) == 0); +#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) + DB_BENCH_ASSERT(dbp->open(dbp, NULL, + TESTFILE, NULL, DB_BTREE, DB_CREATE | DB_AUTO_COMMIT, 0666) == 0); +#else + DB_BENCH_ASSERT( + dbp->open(dbp, TESTFILE, NULL, DB_BTREE, DB_CREATE, 0666) == 0); +#endif + + /* Initialize the data. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.size = data.size = 20; + key.data = data.data = "01234567890123456789"; + + /* Start/commit a transaction count times. */ + for (i = 0; i < count; ++i) { +#if DB_VERSION_MAJOR < 4 + DB_BENCH_ASSERT( + txn_begin(dbenv, NULL, &txn, DB_TXN_NOSYNC) == 0); + DB_BENCH_ASSERT(dbp->put(dbp, txn, &key, &data, 0) == 0); + DB_BENCH_ASSERT(txn_commit(txn, 0) == 0); +#else + DB_BENCH_ASSERT( + dbenv->txn_begin(dbenv, NULL, &txn, DB_TXN_NOSYNC) == 0); + DB_BENCH_ASSERT(dbp->put(dbp, txn, &key, &data, 0) == 0); + DB_BENCH_ASSERT(txn->commit(txn, 0) == 0); +#endif + } + + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); + DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0); + + /* Create a new DB_ENV handle. */ + DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0); + dbenv->set_errfile(dbenv, stderr); + DB_BENCH_ASSERT( + dbenv->set_cachesize(dbenv, 0, 1048576 /* 1MB */, 0) == 0); + + /* Now run recovery. */ + TIMER_START; +#if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR == 0 + DB_BENCH_ASSERT(dbenv->open( + dbenv, TESTDIR, NULL, OFLAGS | DB_RECOVER, 0666) == 0); +#endif +#if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR == 1 + DB_BENCH_ASSERT( + dbenv->open(dbenv, TESTDIR, OFLAGS | DB_RECOVER, 0666) == 0); +#endif +#if DB_VERSION_MAJOR > 3 || DB_VERSION_MINOR > 1 + DB_BENCH_ASSERT( + dbenv->open(dbenv, TESTDIR, OFLAGS | DB_RECOVER, 0666) == 0); +#endif + TIMER_STOP; + + /* + * We divide the time by the number of transactions, so an "operation" + * is the recovery of a single transaction. + */ + printf("# recovery after %d transactions\n", count); + TIMER_DISPLAY(count); + + DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0); + + return (0); +} + +static int +usage() +{ + (void)fprintf(stderr, "usage: b_recover [-C cachesz] [-c count]\n"); + return (EXIT_FAILURE); +} diff --git a/test/micro/source/b_txn.c b/test/micro/source/b_txn.c new file mode 100644 index 00000000..7a4c2914 --- /dev/null +++ b/test/micro/source/b_txn.c @@ -0,0 +1,93 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include "bench.h" + +static int usage(void); + +int +b_txn(int argc, char *argv[]) +{ + extern char *optarg; + extern int optind; + DB_ENV *dbenv; + DB_TXN *txn; + int tabort, ch, i, count; + + count = 1000; + tabort = 0; + while ((ch = getopt(argc, argv, "ac:")) != EOF) + switch (ch) { + case 'a': + tabort = 1; + break; + case 'c': + count = atoi(optarg); + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + if (argc != 0) + return (usage()); + + /* Create the environment. */ + DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0); + dbenv->set_errfile(dbenv, stderr); +#if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR < 1 + DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, + NULL, DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN | DB_PRIVATE, 0666) == 0); +#else + DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, + DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN | DB_PRIVATE, 0666) == 0); +#endif + + /* Start and commit/abort a transaction count times. */ + TIMER_START; + if (tabort) + for (i = 0; i < count; ++i) { +#if DB_VERSION_MAJOR < 4 + DB_BENCH_ASSERT(txn_begin(dbenv, NULL, &txn, 0) == 0); + DB_BENCH_ASSERT(txn_abort(txn) == 0); +#else + DB_BENCH_ASSERT( + dbenv->txn_begin(dbenv, NULL, &txn, 0) == 0); + DB_BENCH_ASSERT(txn->abort(txn) == 0); +#endif + } + else + for (i = 0; i < count; ++i) { +#if DB_VERSION_MAJOR < 4 + DB_BENCH_ASSERT(txn_begin(dbenv, NULL, &txn, 0) == 0); + DB_BENCH_ASSERT(txn_commit(txn, 0) == 0); +#else + DB_BENCH_ASSERT( + dbenv->txn_begin(dbenv, NULL, &txn, 0) == 0); + DB_BENCH_ASSERT(txn->commit(txn, 0) == 0); +#endif + } + TIMER_STOP; + + printf("# %d empty transaction start/%s pairs\n", + count, tabort ? "abort" : "commit"); + TIMER_DISPLAY(count); + + DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0); + + return (0); +} + +static int +usage() +{ + (void)fprintf(stderr, "usage: b_txn [-a] [-c count]\n"); + return (EXIT_FAILURE); +} diff --git a/test/micro/source/b_txn_write.c b/test/micro/source/b_txn_write.c new file mode 100644 index 00000000..eac3851e --- /dev/null +++ b/test/micro/source/b_txn_write.c @@ -0,0 +1,172 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#include "bench.h" + +static int usage __P((void)); + +#ifdef DB_INIT_REP +static int b_txn_write_send __P((DB_ENV *, + const DBT *, const DBT *, const DB_LSN *, int, u_int32_t)); + +/* + * b_txn_write_send -- + * A stubbed-out replication message function. + */ +static int +b_txn_write_send(dbenv, control, rec, lsn, eid, flags) + DB_ENV *dbenv; + const DBT *control, *rec; + const DB_LSN *lsn; + int eid; + u_int32_t flags; +{ + COMPQUIET(dbenv, NULL); + COMPQUIET(control, NULL); + COMPQUIET(rec, NULL); + COMPQUIET(lsn, NULL); + COMPQUIET(eid, 0); + COMPQUIET(flags, 0); + return (0); +} +#endif + +int +b_txn_write(int argc, char *argv[]) +{ + extern char *optarg; + extern int optind; + DB *dbp; + DBT key, data; + DB_ENV *dbenv; + DB_TXN *txn; + u_int32_t flags, oflags; + int ch, i, count, rep_stub; + char *config; + + count = 1000; + oflags = flags = 0; + rep_stub = 0; + config = "synchronous"; + while ((ch = getopt(argc, argv, "ac:rw")) != EOF) + switch (ch) { + case 'a': + config = "nosync"; + flags = DB_TXN_NOSYNC; + break; + case 'c': + count = atoi(optarg); + break; + case 'r': +#ifdef DB_INIT_REP + rep_stub = 1; +#else + exit(0); +#endif + break; + case 'w': + config = "write-nosync"; +#ifdef DB_TXN_WRITE_NOSYNC + flags = DB_TXN_WRITE_NOSYNC; +#else + exit(0); +#endif + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + if (argc != 0) + return (usage()); + + /* Create the environment. */ + DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0); + dbenv->set_errfile(dbenv, stderr); + +#ifdef DB_INIT_REP + if (rep_stub) { +#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 5 || DB_VERSION_MAJOR > 4 + DB_BENCH_ASSERT( + dbenv->rep_set_transport(dbenv, 1, b_txn_write_send) == 0); +#else + DB_BENCH_ASSERT( + dbenv->set_rep_transport(dbenv, 1, b_txn_write_send) == 0); +#endif + oflags |= DB_INIT_REP; + } +#endif + oflags |= DB_CREATE | DB_INIT_LOCK | + DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_PRIVATE; +#if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR == 0 + DB_BENCH_ASSERT( + dbenv->open(dbenv, TESTDIR, NULL, flags | oflags, 0666) == 0); +#endif +#if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR == 1 + DB_BENCH_ASSERT( + dbenv->open(dbenv, TESTDIR, flags | oflags, 0666) == 0); +#endif +#if DB_VERSION_MAJOR > 3 || DB_VERSION_MINOR > 1 + if (flags != 0) + DB_BENCH_ASSERT(dbenv->set_flags(dbenv, flags, 1) == 0); + DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, oflags, 0666) == 0); +#endif + +#ifdef DB_INIT_REP + if (rep_stub) + DB_BENCH_ASSERT( + dbenv->rep_start(dbenv, NULL, DB_REP_MASTER) == 0); +#endif + + /* Create the database. */ + DB_BENCH_ASSERT(db_create(&dbp, dbenv, 0) == 0); +#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) + DB_BENCH_ASSERT(dbp->open(dbp, NULL, + TESTFILE, NULL, DB_BTREE, DB_CREATE | DB_AUTO_COMMIT, 0666) == 0); +#else + DB_BENCH_ASSERT( + dbp->open(dbp, TESTFILE, NULL, DB_BTREE, DB_CREATE, 0666) == 0); +#endif + + /* Initialize the data. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.size = data.size = 20; + key.data = data.data = "01234567890123456789"; + + /* Start/commit a transaction count times. */ + TIMER_START; + for (i = 0; i < count; ++i) { +#if DB_VERSION_MAJOR < 4 + DB_BENCH_ASSERT(txn_begin(dbenv, NULL, &txn, 0) == 0); + DB_BENCH_ASSERT(dbp->put(dbp, txn, &key, &data, 0) == 0); + DB_BENCH_ASSERT(txn_commit(txn, 0) == 0); +#else + DB_BENCH_ASSERT(dbenv->txn_begin(dbenv, NULL, &txn, 0) == 0); + DB_BENCH_ASSERT(dbp->put(dbp, txn, &key, &data, 0) == 0); + DB_BENCH_ASSERT(txn->commit(txn, 0) == 0); +#endif + } + TIMER_STOP; + + printf("# %d %stransactions write %s commit pairs\n", + count, rep_stub ? "replicated ": "", config); + TIMER_DISPLAY(count); + + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); + DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0); + + return (0); +} + +static int +usage() +{ + (void)fprintf(stderr, "usage: b_txn_write [-arw] [-c count]\n"); + return (EXIT_FAILURE); +} diff --git a/test/micro/source/b_uname.c b/test/micro/source/b_uname.c new file mode 100644 index 00000000..9ed3aa3d --- /dev/null +++ b/test/micro/source/b_uname.c @@ -0,0 +1,147 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "bench.h" + +#define UNAMEFILE "NODENAME" + +static int write_info __P((FILE *)); + +int +b_uname() +{ + FILE *fp; + int ret; + + if ((fp = fopen(UNAMEFILE, "w")) == NULL) + goto file_err; + + ret = write_info(fp); + + if (fclose(fp) != 0) { +file_err: fprintf(stderr, + "%s: %s: %s\n", progname, UNAMEFILE, strerror(errno)); + return (1); + } + + return (ret); +} + +#ifdef DB_WIN32 +static int +write_info(fp) + FILE *fp; +{ + OSVERSIONINFO osver; + SYSTEM_INFO sysinfo; + char *p; + +#ifdef DB_WINCE + p = "WinCE"; +#else + { + DWORD len; + char buf[1024]; + + len = sizeof(buf) - 1; + GetComputerName(buf, &len); + p = buf; + } +#endif + fprintf(fp, "

%s, ", p); + + GetSystemInfo(&sysinfo); + switch (sysinfo.wProcessorArchitecture) { + case PROCESSOR_ARCHITECTURE_ALPHA: + p = "alpha"; + break; + case PROCESSOR_ARCHITECTURE_INTEL: + p = "x86"; + break; + case PROCESSOR_ARCHITECTURE_MIPS: + p = "mips"; + break; + case PROCESSOR_ARCHITECTURE_PPC: + p = "ppc"; + break; + default: + p = "unknown"; + break; + } + fprintf(fp, "%s
\n", p); + memset(&osver, 0, sizeof(osver)); + osver.dwOSVersionInfoSize = sizeof(osver); + GetVersionEx(&osver); + switch (osver.dwPlatformId) { + case VER_PLATFORM_WIN32_NT: /* NT, Windows 2000 or Windows XP */ + if (osver.dwMajorVersion == 4) + p = "Windows NT4x"; + else if (osver.dwMajorVersion <= 3) + p = "Windows NT3x"; + else if (osver.dwMajorVersion == 5 && osver.dwMinorVersion < 1) + p = "Windows 2000"; + else if (osver.dwMajorVersion >= 5) + p = "Windows XP"; + else + p = "unknown"; + break; + case VER_PLATFORM_WIN32_WINDOWS: /* Win95, Win98 or WinME */ + if ((osver.dwMajorVersion > 4) || + ((osver.dwMajorVersion == 4) && (osver.dwMinorVersion > 0))) { + if (osver.dwMinorVersion >= 90) + p = "Windows ME"; + else + p = "Windows 98"; + } else + p = "Windows 95"; + break; + case VER_PLATFORM_WIN32s: /* Windows 3.x */ + p = "Windows"; + break; + default: + p = "unknown"; + break; + } + fprintf(fp, + "%s, %ld.%02ld", p, osver.dwMajorVersion, osver.dwMinorVersion); + return (0); +} + +#elif defined(HAVE_VXWORKS) +static int +write_info(fp) + FILE *fp; +{ + fprintf(fp, "

VxWorks"); + return (0); +} + +#else /* POSIX */ +#include + +static int +write_info(fp) + FILE *fp; +{ + struct utsname name; + + if (uname(&name) == 0) + fprintf(fp, "

%s, %s
\n%s, %s, %s

\n", name.nodename, + name.machine, name.sysname, name.release, name.version); + else { + /* + * We've seen random failures on some systems, complain and + * skip the call if it fails. + */ + fprintf(stderr, "%s: uname: %s\n", progname, strerror(errno)); + + fprintf(fp, "

POSIX"); + } + return (0); +} +#endif diff --git a/test/micro/source/b_util.c b/test/micro/source/b_util.c new file mode 100644 index 00000000..6c952bde --- /dev/null +++ b/test/micro/source/b_util.c @@ -0,0 +1,170 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "bench.h" + +static int testdir_remove __P((char *)); + +int +b_util_have_hash() +{ +#if defined(HAVE_HASH) ||\ + DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 2 + return (0); +#else + fprintf(stderr, + "library build did not include support for the Hash access method\n"); + return (1); +#endif +} + +int +b_util_have_heap() +{ +#if defined(HAVE_HEAP) ||\ + DB_VERSION_MAJOR < 5 || DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR < 2 + return (0); +#else + fprintf(stderr, + "library build did not include support for the Heap access method\n"); + return (1); +#endif +} + +int +b_util_have_queue() +{ +#if defined(HAVE_QUEUE) ||\ + DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 2 + return (0); +#else + fprintf(stderr, + "library build did not include support for the Queue access method\n"); + return (1); +#endif +} + +/* + * b_util_dir_setup -- + * Create the test directory. + */ +int +b_util_dir_setup() +{ + int ret; + +#if DB_VERSION_MAJOR > 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR > 3 + if ((ret = __os_mkdir(NULL, TESTDIR, 0755)) != 0) { +#else + if ((ret = mkdir(TESTDIR, 0755)) != 0) { +#endif + fprintf(stderr, + "%s: %s: %s\n", progname, TESTDIR, db_strerror(ret)); + return (1); + } + return (0); +} + +#if DB_VERSION_MAJOR > 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR > 4 +#define OS_EXISTS(a, b, c) __os_exists(a, b, c) +#else +#define OS_EXISTS(a, b, c) __os_exists(b, c) +#endif + +/* + * b_util_dir_teardown + * Clean up the test directory. + */ +int +b_util_dir_teardown() +{ + int ret; + + if (OS_EXISTS(NULL, TESTFILE, NULL) == 0 && + (ret = b_util_unlink(TESTFILE)) != 0) { + fprintf(stderr, + "%s: %s: %s\n", progname, TESTFILE, db_strerror(ret)); + return (1); + } + return (testdir_remove(TESTDIR) ? 1 : 0); +} + +/* + * testdir_remove -- + * Remove a directory and all its contents, the "dir" must contain no + * subdirectories, because testdir_remove will not recursively delete + * all subdirectories. + */ +static int +testdir_remove(dir) + char *dir; +{ + int cnt, i, isdir, ret; + char buf[1024], **names; + + ret = 0; + + /* If the directory doesn't exist, we're done. */ + if (OS_EXISTS(NULL, dir, &isdir) != 0) + return (0); + + /* Get a list of the directory contents. */ +#if DB_VERSION_MAJOR > 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR > 6 + if ((ret = __os_dirlist(NULL, dir, 0, &names, &cnt)) != 0) + return (ret); +#else + if ((ret = __os_dirlist(NULL, dir, &names, &cnt)) != 0) + return (ret); +#endif + /* Go through the file name list, remove each file in the list */ + for (i = 0; i < cnt; ++i) { + (void)snprintf(buf, sizeof(buf), + "%s%c%s", dir, PATH_SEPARATOR[0], names[i]); + if ((ret = OS_EXISTS(NULL, buf, &isdir)) != 0) + goto file_err; + if (!isdir && (ret = b_util_unlink(buf)) != 0) { +file_err: fprintf(stderr, "%s: %s: %s\n", + progname, buf, db_strerror(ret)); + break; + } + } + + __os_dirfree(NULL, names, cnt); + + /* + * If we removed the contents of the directory, remove the directory + * itself. + */ + if (i == cnt && (ret = rmdir(dir)) != 0) + fprintf(stderr, + "%s: %s: %s\n", progname, dir, db_strerror(errno)); + return (ret); +} + +void +b_util_abort() +{ +#if DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 6 + abort(); +#elif DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR == 6 + __os_abort(); +#else + __os_abort(NULL); +#endif +} + +int +b_util_unlink(path) + char *path; +{ +#if DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 7 + return (__os_unlink(NULL, path)); +#else + return (__os_unlink(NULL, path, 0)); +#endif +} diff --git a/test/micro/source/b_workload.c b/test/micro/source/b_workload.c new file mode 100644 index 00000000..dc407f78 --- /dev/null +++ b/test/micro/source/b_workload.c @@ -0,0 +1,631 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "bench.h" +#include "b_workload.h" + +static int dump_verbose_stats __P((DB *, CONFIG *)); +static int is_del_workload __P((int)); +static int is_get_workload __P((int)); +static int is_put_workload __P((int)); +static int run_mixed_workload __P((DB *, CONFIG *)); +static int run_std_workload __P((DB *, CONFIG *)); +static int usage __P((void)); +static char *workload_str __P((int)); + +/* + * General TODO list: + * * The workload type. Might work better as a bitmask than the current enum. + * * Improve the verbose stats, so they can be easily parsed. + * * Think about doing automatic btree/hash comparison in here. + */ +int +b_workload(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + CONFIG conf; + DB *dbp; + DB_ENV *dbenv; + int ch, ffactor, ksz; + + dbenv = NULL; + memset(&conf, 0, sizeof(conf)); + conf.seed = 124087; + srand(conf.seed); + + conf.pcount = 100000; + conf.ts = "Btree"; + conf.type = DB_BTREE; + conf.dsize = 20; + conf.presize = 0; + conf.workload = T_PUT_GET_DELETE; + + while ((ch = getopt(argc, argv, "b:c:d:e:g:ik:m:op:r:t:vw:")) != EOF) + switch (ch) { + case 'b': + conf.cachesz = atoi(optarg); + break; + case 'c': + conf.pcount = atoi(optarg); + break; + case 'd': + conf.dsize = atoi(optarg); + break; + case 'e': + conf.cursor_del = atoi(optarg); + break; + case 'g': + conf.gcount = atoi(optarg); + break; + case 'i': + conf.presize = 1; + break; + case 'k': + conf.ksize = atoi(optarg); + break; + case 'm': + conf.message = optarg; + break; + case 'o': + conf.orderedkeys = 1; + break; + case 'p': + conf.pagesz = atoi(optarg); + break; + case 'r': + conf.num_dups = atoi(optarg); + break; + case 't': + switch (optarg[0]) { + case 'B': case 'b': + conf.ts = "Btree"; + conf.type = DB_BTREE; + break; + case 'H': case 'h': + if (b_util_have_hash()) + return (0); + conf.ts = "Hash"; + conf.type = DB_HASH; + break; + default: + return (usage()); + } + break; + case 'v': + conf.verbose = 1; + break; + case 'w': + switch (optarg[0]) { + case 'A': + conf.workload = T_PUT_GET_DELETE; + break; + case 'B': + conf.workload = T_GET; + break; + case 'C': + conf.workload = T_PUT; + break; + case 'D': + conf.workload = T_DELETE; + break; + case 'E': + conf.workload = T_PUT_GET; + break; + case 'F': + conf.workload = T_PUT_DELETE; + break; + case 'G': + conf.workload = T_GET_DELETE; + break; + case 'H': + conf.workload = T_MIXED; + break; + default: + return (usage()); + } + break; + case '?': + default: + fprintf(stderr, "Invalid option: %c\n", ch); + return (usage()); + } + argc -= optind; + argv += optind; + if (argc != 0) + return (usage()); + + /* + * Validate the input parameters if specified. + */ + if (conf.pagesz != 0) + DB_BENCH_ASSERT(conf.pagesz >= 512 && conf.pagesz <= 65536 && + ((conf.pagesz & (conf.pagesz - 1)) == 0)); + + if (conf.cachesz != 0) + DB_BENCH_ASSERT(conf.cachesz > 20480); + DB_BENCH_ASSERT(conf.ksize == 0 || conf.orderedkeys == 0); + + /* Create the environment. */ + DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0); + dbenv->set_errfile(dbenv, stderr); + if (conf.cachesz != 0) + DB_BENCH_ASSERT( + dbenv->set_cachesize(dbenv, 0, conf.cachesz, 0) == 0); + +#if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR < 1 + DB_BENCH_ASSERT(dbenv->open(dbenv, "TESTDIR", + NULL, DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE, 0666) == 0); +#else + DB_BENCH_ASSERT(dbenv->open(dbenv, "TESTDIR", + DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE, 0666) == 0); +#endif + + DB_BENCH_ASSERT(db_create(&dbp, dbenv, 0) == 0); + if (conf.pagesz != 0) + DB_BENCH_ASSERT( + dbp->set_pagesize(dbp, conf.pagesz) == 0); + if (conf.presize != 0 && conf.type == DB_HASH) { + ksz = (conf.orderedkeys != 0) ? sizeof(u_int32_t) : conf.ksize; + if (ksz == 0) + ksz = 10; + ffactor = (conf.pagesz - 32)/(ksz + conf.dsize + 8); + fprintf(stderr, "ffactor: %d\n", ffactor); + DB_BENCH_ASSERT( + dbp->set_h_ffactor(dbp, ffactor) == 0); + DB_BENCH_ASSERT( + dbp->set_h_nelem(dbp, conf.pcount*10) == 0); + } +#if DB_VERSION_MAJOR > 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) + DB_BENCH_ASSERT(dbp->open( + dbp, NULL, TESTFILE, NULL, conf.type, DB_CREATE, 0666) == 0); +#else + DB_BENCH_ASSERT(dbp->open( + dbp, TESTFILE, NULL, conf.type, DB_CREATE, 0666) == 0); +#endif + + if (conf.workload == T_MIXED) + run_mixed_workload(dbp, &conf); + else + run_std_workload(dbp, &conf); + + if (is_put_workload(conf.workload) == 0) + timespecadd(&conf.tot_time, &conf.put_time); + if (is_get_workload(conf.workload) == 0) + timespecadd(&conf.tot_time, &conf.get_time); + if (is_del_workload(conf.workload) == 0) + timespecadd(&conf.tot_time, &conf.del_time); + + /* Ensure data is flushed for following measurements. */ + DB_BENCH_ASSERT(dbp->sync(dbp, 0) == 0); + + if (conf.verbose != 0) + dump_verbose_stats(dbp, &conf); + + DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); + DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0); + + /* + * Construct a string for benchmark output. + * + * Insert HTML in-line to make the output prettier -- ugly, but easy. + */ + printf("# workload test: %s: %s
%lu ops", + conf.ts, workload_str(conf.workload), (u_long)conf.pcount); + if (conf.ksize != 0) + printf(", key size: %lu", (u_long)conf.ksize); + if (conf.dsize != 0) + printf(", data size: %lu", (u_long)conf.dsize); + if (conf.pagesz != 0) + printf(", page size: %lu", (u_long)conf.pagesz); + else + printf(", page size: default"); + if (conf.cachesz != 0) + printf(", cache size: %lu", (u_long)conf.cachesz); + else + printf(", cache size: default"); + printf(", %s keys", conf.orderedkeys == 1 ? "ordered" : "unordered"); + printf(", num dups: %lu", (u_long)conf.num_dups); + printf("\n"); + + if (conf.workload != T_MIXED) { + if (conf.message != NULL) + printf("%s %s ", conf.message, conf.ts); + TIME_DISPLAY(conf.pcount, conf.tot_time); + } else + TIMER_DISPLAY(conf.pcount); + + return (0); +} + +/* + * The mixed workload is designed to simulate a somewhat real + * usage scenario. + * NOTES: * rand is used to decide on the current operation. This will + * be repeatable, since the same seed is always used. + * * All added keys are stored in a FIFO queue, this is not very + * space efficient, but is the best way I could come up with to + * insert random key values, and be able to retrieve/delete them. + * * TODO: the workload will currently only work with unordered + * fixed length keys. + */ +#define GET_PROPORTION 90 +#define PUT_PROPORTION 7 +#define DEL_PROPORTION 3 + +static int +run_mixed_workload(dbp, config) + DB *dbp; + CONFIG *config; +{ + DBT key, data; + size_t next_op, i, ioff, inscount; + char kbuf[KBUF_LEN]; + struct bench_q operation_queue; + + /* Having ordered insertion does not make sense here */ + DB_BENCH_ASSERT(config->orderedkeys == 0); + + srand(config->seed); + memset(&operation_queue, 0, sizeof(struct bench_q)); + + ioff = 0; + INIT_KEY(key, config); + memset(&data, 0, sizeof(data)); + DB_BENCH_ASSERT( + (data.data = malloc(data.size = config->dsize)) != NULL); + + /* + * Add an initial sample set of data to the DB. + * This should add some stability, and reduce the likelihood + * of deleting all of the entries in the DB. + */ + inscount = 2 * config->pcount; + if (inscount > 100000) + inscount = 100000; + + for (i = 0; i < inscount; ++i) { + GET_KEY_NEXT(key, config, kbuf, i); + BENCH_Q_TAIL_INSERT(operation_queue, kbuf); + DB_BENCH_ASSERT(dbp->put(dbp, NULL, &key, &data, 0) == 0); + } + + TIMER_START; + for (i = 0; i < config->pcount; ++i) { + next_op = rand()%100; + + if (next_op < GET_PROPORTION ) { + BENCH_Q_POP_PUSH(operation_queue, kbuf); + key.data = kbuf; + key.size = sizeof(kbuf); + dbp->get(dbp, NULL, &key, &data, 0); + } else if (next_op < GET_PROPORTION+PUT_PROPORTION) { + GET_KEY_NEXT(key, config, kbuf, i); + BENCH_Q_TAIL_INSERT(operation_queue, kbuf); + dbp->put(dbp, NULL, &key, &data, 0); + } else { + BENCH_Q_POP(operation_queue, kbuf); + key.data = kbuf; + key.size = sizeof(kbuf); + dbp->del(dbp, NULL, &key, 0); + } + } + TIMER_STOP; + TIMER_GET(config->tot_time); + + return (0); +} + +static int +run_std_workload(dbp, config) + DB *dbp; + CONFIG *config; +{ + DBT key, data; + DBC *dbc; + u_int32_t i; + int ret; + char kbuf[KBUF_LEN]; + + /* Setup a key/data pair. */ + INIT_KEY(key, config); + memset(&data, 0, sizeof(data)); + DB_BENCH_ASSERT( + (data.data = malloc(data.size = config->dsize)) != NULL); + + /* Store the key/data pair count times. */ + TIMER_START; + for (i = 0; i < config->pcount; ++i) { + GET_KEY_NEXT(key, config, kbuf, i); + DB_BENCH_ASSERT(dbp->put(dbp, NULL, &key, &data, 0) == 0); + } + TIMER_STOP; + TIMER_GET(config->put_time); + + if (is_get_workload(config->workload) == 0) { + TIMER_START; + for (i = 0; i <= config->gcount; ++i) { + DB_BENCH_ASSERT(dbp->cursor(dbp, NULL, &dbc, 0) == 0); + while ((dbc->c_get(dbc, &key, &data, DB_NEXT)) == 0); + DB_BENCH_ASSERT(dbc->c_close(dbc) == 0); + } + TIMER_STOP; + TIMER_GET(config->get_time); + } + + if (is_del_workload(config->workload) == 0) { + /* reset rand to reproduce key sequence. */ + srand(config->seed); + + TIMER_START; + if (config->cursor_del != 0) { + DB_BENCH_ASSERT(dbp->cursor(dbp, NULL, &dbc, 0) == 0); + while ( + (ret = dbc->c_get(dbc, &key, &data, DB_NEXT)) == 0) + DB_BENCH_ASSERT(dbc->c_del(dbc, 0) == 0); + DB_BENCH_ASSERT (ret == DB_NOTFOUND); + } else { + INIT_KEY(key, config); + for (i = 0; i < config->pcount; ++i) { + GET_KEY_NEXT(key, config, kbuf, i); + + ret = dbp->del(dbp, NULL, &key, 0); + /* + * Random key generation can cause dups, + * so NOTFOUND result is OK. + */ + if (config->ksize == 0) + DB_BENCH_ASSERT + (ret == 0 || ret == DB_NOTFOUND); + else + DB_BENCH_ASSERT(ret == 0); + } + } + TIMER_STOP; + TIMER_GET(config->del_time); + } + return (0); +} + +static int +dump_verbose_stats(dbp, config) + DB *dbp; + CONFIG *config; +{ +/* + * It would be nice to be able to define stat as _stat on + * Windows, but that substitutes _stat for the db call as well. + */ +#ifdef DB_WIN32 + struct _stat fstat; +#else + struct stat fstat; +#endif + DB_HASH_STAT *hstat; + DB_BTREE_STAT *bstat; + double free_prop; + char path[1024]; + +#ifdef DB_BENCH_INCLUDE_CONFIG_SUMMARY + printf("Completed workload benchmark.\n"); + printf("Configuration summary:\n"); + printf("\tworkload type: %d\n", (int)config->workload); + printf("\tdatabase type: %s\n", config->ts); + if (config->cachesz != 0) + printf("\tcache size: %lu\n", (u_long)config->cachesz); + if (config->pagesz != 0) + printf("\tdatabase page size: %lu\n", (u_long)config->pagesz); + printf("\tput element count: %lu\n", (u_long)config->pcount); + if ( is_get_workload(config->workload) == 0) + printf("\tget element count: %lu\n", (u_long)config->gcount); + if (config->orderedkeys) + printf("\tInserting items in order\n"); + else if (config->ksize == 0) + printf("\tInserting keys with size 10\n"); + else + printf( + "\tInserting keys with size: %lu\n", (u_long)config->ksize); + + printf("\tInserting data elements size: %lu\n", (u_long)config->dsize); + + if (is_del_workload(config->workload) == 0) { + if (config->cursor_del) + printf("\tDeleting items using a cursor\n"); + else + printf("\tDeleting items without a cursor\n"); + } +#endif /* DB_BENCH_INCLUDE_CONFIG_SUMMARY */ + + if (is_put_workload(config->workload) == 0) + printf("%s Time spent inserting (%lu) (%s) items: %lu/%lu\n", + config->message[0] == '\0' ? "" : config->message, + (u_long)config->pcount, config->ts, + (u_long)config->put_time.tv_sec, config->put_time.tv_nsec); + + if (is_get_workload(config->workload) == 0) + printf("%s Time spent getting (%lu) (%s) items: %lu/%lu\n", + config->message[0] == '\0' ? "" : config->message, + (u_long)config->pcount * ((config->gcount == 0) ? + 1 : config->gcount), config->ts, + (u_long)config->get_time.tv_sec, config->get_time.tv_nsec); + + if (is_del_workload(config->workload) == 0) + printf("%s Time spent deleting (%lu) (%s) items: %lu/%lu\n", + config->message[0] == '\0' ? "" : config->message, + (u_long)config->pcount, config->ts, + (u_long)config->del_time.tv_sec, config->del_time.tv_nsec); + + (void)snprintf(path, sizeof(path), + "%s%c%s", TESTDIR, PATH_SEPARATOR[0], TESTFILE); +#ifdef DB_WIN32 + if (_stat(path, &fstat) == 0) { +#else + if (stat(path, &fstat) == 0) { +#endif + printf("%s Size of db file (%s): %lu K\n", + config->message[0] == '\0' ? "" : config->message, + config->ts, (u_long)fstat.st_size/1024); + } + + if (config->type == DB_HASH) { +#if DB_VERSION_MAJOR < 3 || DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR <= 2 + DB_BENCH_ASSERT(dbp->stat(dbp, &hstat, NULL, 0) == 0); +#elif DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 2 + DB_BENCH_ASSERT(dbp->stat(dbp, &hstat, 0) == 0); +#else + DB_BENCH_ASSERT(dbp->stat(dbp, NULL, &hstat, 0) == 0); +#endif + /* + * Hash fill factor is a bit tricky. Want to include + * both bucket and overflow buckets (not offpage). + */ + free_prop = hstat->hash_pagesize*hstat->hash_buckets; + free_prop += hstat->hash_pagesize*hstat->hash_overflows; + free_prop = + (free_prop - hstat->hash_bfree - hstat->hash_ovfl_free)/ + free_prop; + printf("%s db fill factor (%s): %.2f%%\n", + config->message[0] == '\0' ? "" : config->message, + config->ts, free_prop*100); + free(hstat); + } else { /* Btree */ +#if DB_VERSION_MAJOR < 3 || DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR <= 2 + DB_BENCH_ASSERT(dbp->stat(dbp, &bstat, NULL, 0) == 0); +#elif DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 2 + DB_BENCH_ASSERT(dbp->stat(dbp, &bstat, 0) == 0); +#else + DB_BENCH_ASSERT(dbp->stat(dbp, NULL, &bstat, 0) == 0); +#endif + free_prop = bstat->bt_pagesize*bstat->bt_leaf_pg; + free_prop = (free_prop-bstat->bt_leaf_pgfree)/free_prop; + printf("%s db fill factor (%s): %.2f%%\n", + config->message[0] == '\0' ? "" : config->message, + config->ts, free_prop*100); + free(bstat); + } + return (0); +} + +static char * +workload_str(workload) + int workload; +{ + static char buf[128]; + + switch (workload) { + case T_PUT_GET_DELETE: + return ("PUT/GET/DELETE"); + /* NOTREACHED */ + case T_GET: + return ("GET"); + /* NOTREACHED */ + case T_PUT: + return ("PUT"); + /* NOTREACHED */ + case T_DELETE: + return ("DELETE"); + /* NOTREACHED */ + case T_PUT_GET: + return ("PUT/GET"); + /* NOTREACHED */ + case T_PUT_DELETE: + return ("PUT/DELETE"); + /* NOTREACHED */ + case T_GET_DELETE: + return ("GET/DELETE"); + /* NOTREACHED */ + case T_MIXED: + snprintf(buf, sizeof(buf), "MIXED (get: %d, put: %d, del: %d)", + (int)GET_PROPORTION, + (int)PUT_PROPORTION, (int)DEL_PROPORTION); + return (buf); + default: + break; + } + + exit(usage()); + /* NOTREACHED */ +} + +static int +is_get_workload(workload) + int workload; +{ + switch (workload) { + case T_GET: + case T_PUT_GET: + case T_PUT_GET_DELETE: + case T_GET_DELETE: + return 0; + } + return 1; +} + +static int +is_put_workload(workload) + int workload; +{ + switch (workload) { + case T_PUT: + case T_PUT_GET: + case T_PUT_GET_DELETE: + case T_PUT_DELETE: + return 0; + } + return 1; +} + +static int +is_del_workload(workload) + int workload; +{ + switch (workload) { + case T_DELETE: + case T_PUT_DELETE: + case T_PUT_GET_DELETE: + case T_GET_DELETE: + return 0; + } + return 1; +} + +static int +usage() +{ + (void)fprintf(stderr, + "usage: b_workload [-b cachesz] [-c count] [-d bytes] [-e]\n"); + (void)fprintf(stderr, + "\t[-g getitrs] [-i] [-k keysize] [-m message] [-o] [-p pagesz]\n"); + (void)fprintf(stderr, "\t[-r dup_count] [-t type] [-w type]\n"); + + (void)fprintf(stderr, "Where:\n"); + (void)fprintf(stderr, "\t-b the size of the DB cache.\n"); + (void)fprintf(stderr, "\t-c the number of elements to be measured.\n"); + (void)fprintf(stderr, "\t-d the size of each data element.\n"); + (void)fprintf(stderr, "\t-e delete entries using a cursor.\n"); + (void)fprintf(stderr, "\t-g number of get cursor traverses.\n"); + (void)fprintf(stderr, "\t-i Pre-init hash DB bucket count.\n"); + (void)fprintf(stderr, "\t-k the size of each key inserted.\n"); + (void)fprintf(stderr, "\t-m message pre-pended to log output.\n"); + (void)fprintf(stderr, "\t-o keys should be ordered for insert.\n"); + (void)fprintf(stderr, "\t-p the page size for the database.\n"); + (void)fprintf(stderr, "\t-r the number of duplicates to insert\n"); + (void)fprintf(stderr, "\t-t type of the underlying database.\n"); + (void)fprintf(stderr, "\t-w the workload to measure, available:\n"); + (void)fprintf(stderr, "\t\tA - PUT_GET_DELETE\n"); + (void)fprintf(stderr, "\t\tB - GET\n"); + (void)fprintf(stderr, "\t\tC - PUT\n"); + (void)fprintf(stderr, "\t\tD - DELETE\n"); + (void)fprintf(stderr, "\t\tE - PUT_GET\n"); + (void)fprintf(stderr, "\t\tF - PUT_DELETE\n"); + (void)fprintf(stderr, "\t\tG - GET_DELETE\n"); + (void)fprintf(stderr, "\t\tH - MIXED\n"); + return (EXIT_FAILURE); +} diff --git a/test/micro/source/b_workload.h b/test/micro/source/b_workload.h new file mode 100644 index 00000000..c1c5c8c0 --- /dev/null +++ b/test/micro/source/b_workload.h @@ -0,0 +1,153 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +/* + * Macros to help with initializing/assigning key dbts + */ + +#define KBUF_LEN 12 +#define INIT_KEY(key, config) do { \ + memset(&key, 0, sizeof(key)); \ + if (config->orderedkeys) { \ + key.size = sizeof (u_int32_t); \ + } else if (config->ksize != 0) { \ + DB_BENCH_ASSERT( \ + (key.data = malloc(key.size = config->ksize)) != NULL); \ + } else { \ + key.data = kbuf; \ + key.size = 10; \ + } \ + } while (0) + +#define GET_KEY_NEXT(key, config, kbuf, i) do { \ + size_t tmp_int; \ + if (config->orderedkeys) { \ + /* Will be sorted on little-endian system. */ \ + tmp_int = i; \ + M_32_SWAP(tmp_int); \ + key.data = &tmp_int; \ + } else if (config->ksize == 0) { \ + /* \ + * This will produce duplicate keys. \ + * That is not such a big deal, since we are \ + * using the same seed to srand each time, \ + * the scenario is reproducible. \ + */ \ + (void)snprintf(kbuf, sizeof(kbuf), "%10d", rand()); \ + } else { \ + /* TODO: Not sure of the best approach here. */ \ + (void)snprintf(key.data, config->ksize, "%10lu", (u_long)i); \ + } \ + } while (0) + +/* Taken from dbinc/db_swap.h */ +#undef M_32_SWAP +#define M_32_SWAP(a) { \ + u_int32_t _tmp; \ + _tmp = (u_int32_t)a; \ + ((u_int8_t *)&a)[0] = ((u_int8_t *)&_tmp)[3]; \ + ((u_int8_t *)&a)[1] = ((u_int8_t *)&_tmp)[2]; \ + ((u_int8_t *)&a)[2] = ((u_int8_t *)&_tmp)[1]; \ + ((u_int8_t *)&a)[3] = ((u_int8_t *)&_tmp)[0]; \ +} + +/* + * A singly linked list, that maintains a pointer + * to the start and the end of the queue. + * Should be possible to use a STAILQ, but this seemed easier + */ +typedef struct bench_qentry { + char data[KBUF_LEN]; + struct bench_qentry *next; +}bench_qentry; +typedef struct bench_q { + struct bench_qentry *head; + struct bench_qentry *tail; +} bench_q; +#define BENCH_Q_TAIL_INSERT(queue, buf) do { \ + struct bench_qentry *entry; \ + DB_BENCH_ASSERT( \ + (entry = malloc(sizeof(struct bench_qentry))) != NULL); \ + memcpy(entry->data, buf, sizeof(entry->data)); \ + if (queue.head == NULL) \ + queue.head = queue.tail = entry; \ + else { \ + queue.tail->next = entry; \ + queue.tail = entry; \ + } \ +} while (0) + +#define BENCH_Q_POP(queue, buf) do { \ + struct bench_qentry *popped = queue.head; \ + if (popped == NULL) \ + break; \ + if (queue.head->next == NULL) \ + queue.head = queue.tail = NULL; \ + else \ + queue.head = queue.head->next; \ + memcpy(buf, popped->data, sizeof(buf)); \ + free(popped); \ +} while (0) + +/* + * Retrieve the head of the queue, save the data into user + * buffer, and push the item back onto the end of the list. + * Same functionality as pop/insert, but saves a malloc/free + */ +#define BENCH_Q_POP_PUSH(queue, buf) do { \ + struct bench_qentry *popped = queue.head; \ + if (popped == NULL) \ + break; \ + if (queue.head->next == NULL) \ + queue.head = queue.tail = NULL; \ + else \ + queue.head = queue.head->next; \ + memcpy(buf, popped->data, sizeof(buf)); \ + if (queue.head == NULL) \ + queue.head = queue.tail = popped; \ + else { \ + queue.tail->next = popped; \ + queue.tail = popped; \ + } \ +} while (0) + +typedef enum { + T_PUT, + T_GET, + T_DELETE, + T_PUT_GET, + T_PUT_DELETE, + T_PUT_GET_DELETE, + T_GET_DELETE, + T_MIXED +} test_type; + +typedef struct +{ + u_int32_t ksize; + u_int32_t dsize; + size_t orderedkeys; + size_t num_dups; + u_int32_t pagesz; + u_int32_t cachesz; + u_int32_t pcount; + size_t gcount; + size_t cursor_del; + size_t verbose; + test_type workload; + u_int32_t seed; + size_t presize; + DBTYPE type; + char *ts; + char *message; + /* Fields used to store timing information */ + db_timespec put_time; + db_timespec get_time; + db_timespec del_time; + db_timespec tot_time; +} CONFIG; diff --git a/test/micro/source/bench.h b/test/micro/source/bench.h new file mode 100644 index 00000000..4faa860d --- /dev/null +++ b/test/micro/source/bench.h @@ -0,0 +1,218 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ +#ifndef _BENCH_H_ +#define _BENCH_H_ +#include "db_config.h" + +#include "db_int.h" + +#if DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 5 +/* + * Older releases of Berkeley DB don't include standard include files in + * db_int.h. + */ +#ifdef DB_WIN32 +#define WIN32_LEAN_AND_MEAN 1 +#include +#include +#include +#else +#include +#include + +#include +#include +#include +#include +#endif +#endif + +#define TESTDIR "TESTDIR" +#define TESTFILE "test_micro.db" + +/* + * Implement a custom assert to allow consistent behavior across builds and + * platforms. + * + * The BDB library DB_ASSERT implementation is only enabled in diagnostic + * builds -- so is not suitable here. + */ +#define DB_BENCH_ASSERT(e) do { \ + (e) ? (void)0 : \ + (fprintf(stderr, \ + "assert failure: %s/%d: \"%s\"\n", __FILE__, __LINE__, #e), \ + b_util_abort()); \ +} while (0) + +#ifndef NS_PER_SEC +#define NS_PER_SEC 1000000000 /* Nanoseconds in a second */ +#endif +#ifndef NS_PER_US +#define NS_PER_US 1000 /* Nanoseconds in a microsecond */ +#endif +#ifndef MS_PER_NS +#define MS_PER_NS 1000000 /* Milliseconds in a nanosecond */ +#endif + +#ifdef DB_TIMEOUT_TO_TIMESPEC +/* + * We have the timer routines in the Berkeley DB library after their conversion + * to the POSIX timespec interfaces. We'd rather use something that gives us + * better information than elapsed wallclock time, so use getrusage instead if + * it's available. + */ +#ifdef HAVE_GETRUSAGE +#include + +#define SET_TIMER_FROM_GETRUSAGE(tp) do { \ + struct rusage __usage; \ + DB_BENCH_ASSERT(getrusage(RUSAGE_SELF, &__usage) == 0); \ + (tp)->tv_sec = \ + __usage.ru_utime.tv_sec + __usage.ru_stime.tv_sec; \ + (tp)->tv_nsec = NS_PER_US * \ + (__usage.ru_utime.tv_usec + __usage.ru_stime.tv_usec); \ +} while (0); + +#define TIMER_START SET_TIMER_FROM_GETRUSAGE(&__start_time); +#define TIMER_STOP SET_TIMER_FROM_GETRUSAGE(&__end_time); + +#elif defined(DB_WIN32) && !defined(DB_WINCE) + +#define SET_TIMER_FROM_GETPROCESSTIMES(tp) do { \ + FILETIME lpCreationTime, lpExitTime, lpKernelTime, lpUserTIme; \ + LARGE_INTEGER large_int; \ + LONGLONG __ns_since_epoch; \ + DB_BENCH_ASSERT( \ + GetProcessTimes(GetCurrentProcess(), &lpCreationTime, \ + &lpExitTime, &lpKernelTime, &lpUserTIme) != 0); \ + memcpy(&large_int, &lpKernelTime, sizeof(lpKernelTime)); \ + __ns_since_epoch = (large_int.QuadPart * 100); \ + (tp)->tv_sec = (time_t)(__ns_since_epoch / NS_PER_SEC); \ + (tp)->tv_nsec = (long)(__ns_since_epoch % NS_PER_SEC); \ + memcpy(&large_int, &lpUserTIme, sizeof(lpUserTIme)); \ + __ns_since_epoch = (large_int.QuadPart * 100); \ + (tp)->tv_sec += (time_t)(__ns_since_epoch / NS_PER_SEC); \ + (tp)->tv_nsec += (long)(__ns_since_epoch % NS_PER_SEC); \ +} while (0); + +#define TIMER_START SET_TIMER_FROM_GETPROCESSTIMES(&__start_time); +#define TIMER_STOP SET_TIMER_FROM_GETPROCESSTIMES(&__end_time); + +#else /* !HAVEGETRUSAGE && !DB_WIN32 */ + +#if DB_VERSION_MAJOR > 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR > 6 +#define TIMER_START __os_gettime(NULL, &__start_time, 1) +#define TIMER_STOP __os_gettime(NULL, &__end_time, 1) +#else +#define TIMER_START __os_gettime(NULL, &__start_time) +#define TIMER_STOP __os_gettime(NULL, &__end_time) +#endif +#endif /* !HAVE_GETRUSAGE */ + +#else /* !DB_TIMEOUT_TO_TIMESPEC */ + +#if defined(HAVE_CLOCK_GETTIME) +typedef struct timespec db_timespec; +#else +typedef struct { + time_t tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ +} db_timespec; +#endif + +#define timespecadd(vvp, uvp) \ + do { \ + (vvp)->tv_sec += (uvp)->tv_sec; \ + (vvp)->tv_nsec += (uvp)->tv_nsec; \ + if ((vvp)->tv_nsec >= NS_PER_SEC) { \ + (vvp)->tv_sec++; \ + (vvp)->tv_nsec -= NS_PER_SEC; \ + } \ + } while (0) +#define timespecsub(vvp, uvp) \ + do { \ + (vvp)->tv_sec -= (uvp)->tv_sec; \ + (vvp)->tv_nsec -= (uvp)->tv_nsec; \ + if ((vvp)->tv_nsec < 0) { \ + (vvp)->tv_sec--; \ + (vvp)->tv_nsec += NS_PER_SEC; \ + } \ + } while (0) + +#define TIMER_START CLOCK(__start_time) +#define TIMER_STOP CLOCK(__end_time) + +#if defined(HAVE_CLOCK_GETTIME) +#define CLOCK(tm) do { \ + DB_BENCH_ASSERT(clock_gettime( \ + CLOCK_REALTIME, (struct timespec *)&(tm)) == 0); \ +} while (0) +#elif defined(DB_WIN32) +#define CLOCK(tm) do { \ + struct _timeb __now; \ + _ftime(&__now); \ + (tm).tv_sec = __now.time; \ + (tm).tv_nsec = __now.millitm * MS_PER_NS; \ +} while (0) +#else +#define CLOCK(tm) do { \ + struct timeval __tp; \ + DB_BENCH_ASSERT(gettimeofday(&__tp, NULL) == 0); \ + (tm).tv_sec = __tp.tv_sec; \ + (tm).tv_nsec = __tp.tv_usec * NS_PER_US; \ +} while (0) +#endif +#endif /* !DB_TIMEOUT_TO_TIMESPEC */ + +extern db_timespec __start_time, __end_time; + +#define TIMER_GET(tm) do { \ + tm = __end_time; \ + timespecsub(&(tm), &__start_time); \ +} while (0) +#define TIMER_DISPLAY(ops) do { \ + db_timespec __tmp_time; \ + __tmp_time = __end_time; \ + timespecsub(&__tmp_time, &__start_time); \ + TIME_DISPLAY(ops, __tmp_time); \ +} while (0) +#define TIME_DISPLAY(ops, tm) do { \ + double __secs; \ + int __major, __minor, __patch; \ + __secs = (tm).tv_sec + (double)(tm).tv_nsec / NS_PER_SEC; \ + (void)db_version(&__major, &__minor, &__patch); \ + printf("%d.%d.%d\t%.2f\n", __major, __minor, __patch, \ + (__secs == 0) ? 0.0 : (ops) / __secs); \ +} while (0) + +extern char *progname; /* program name */ + +int b_curalloc __P((int, char *[])); +int b_curwalk __P((int, char *[])); +int b_del __P((int, char *[])); +int b_get __P((int, char *[])); +int b_inmem __P((int, char *[])); +int b_latch __P((int, char *[])); +int b_load __P((int, char *[])); +int b_open __P((int, char *[])); +int b_put __P((int, char *[])); +int b_recover __P((int, char *[])); +int b_txn __P((int, char *[])); +int b_txn_write __P((int, char *[])); +int b_uname __P((void)); +void b_util_abort __P((void)); +int b_util_dir_setup __P((void)); +int b_util_dir_teardown __P((void)); +int b_util_have_hash __P((void)); +int b_util_have_heap __P((void)); +int b_util_have_queue __P((void)); +int b_util_unlink __P((char *)); +int b_workload __P((int, char *[])); +u_int32_t part_callback __P((DB *, DBT *)); + +#endif /* !_BENCH_H_ */ diff --git a/test/micro/source/test_micro.c b/test/micro/source/test_micro.c new file mode 100644 index 00000000..272cf398 --- /dev/null +++ b/test/micro/source/test_micro.c @@ -0,0 +1,211 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "bench.h" + +int main __P((int, char *[])); + +static int run __P((char *)); +static int usage __P((void)); + +char *progname; /* program name */ +db_timespec __start_time, __end_time; /* TIMER_START & TIMER_END */ + +static int test_start = 1; /* first test to run */ +static int test_end = 0; /* last test to run */ + +static struct { + char *name; /* command name */ + int (*f)(int, char *[]); /* function */ +} cmdlist[] = { + { "b_curalloc", b_curalloc }, + { "b_curwalk", b_curwalk }, + { "b_del", b_del }, + { "b_get", b_get }, + { "b_inmem", b_inmem }, + { "b_latch", b_latch }, + { "b_load", b_load }, + { "b_open", b_open }, + { "b_put", b_put }, + { "b_recover", b_recover }, + { "b_txn", b_txn }, + { "b_txn_write", b_txn_write }, + { "b_workload", b_workload }, + { NULL, NULL } +}; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + int ch, ret; + char *run_directory, *ifile; + + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + run_directory = NULL; + ifile = "run.std"; + while ((ch = getopt(argc, argv, "d:e:i:s:")) != EOF) + switch (ch) { + case 'd': + run_directory = optarg; + break; + case 'e': + test_end = atoi(optarg); + break; + case 'i': + ifile = optarg; + break; + case 's': + test_start = atoi(optarg); + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + /* Run in the target directory. */ + if (run_directory != NULL && chdir(run_directory) != 0) { + fprintf(stderr, + "%s: %s: %s\n", progname, run_directory, strerror(errno)); + return (1); + } + + /* Clean up any left-over test directory. */ + if (b_util_dir_teardown()) + return (1); + + ret = run(ifile); + + return (ret ? EXIT_FAILURE : EXIT_SUCCESS); +} + +/* + * run -- + * Read a configuration file and run the tests. + */ +static int +run(ifile) + char *ifile; +{ +#ifdef HAVE_GETOPT_OPTRESET + extern int optreset; +#endif + extern int optind; + static int test_cur = 0; + FILE *ifp; + int argc, cmdindx, lineno, ret; + char *p, cmd[1024], path[1024], **argv; + + /* Identify the run. */ + if (b_uname() != 0) + return (1); + + /* Open the list of tests. */ + if ((ifp = fopen(ifile, "r")) == NULL) { + fprintf(stderr, + "%s: %s: %s\n", progname, ifile, strerror(errno)); + return (1); + } + + for (lineno = 1; fgets(cmd, sizeof(cmd), ifp) != NULL; ++lineno) { + /* + * Nul-terminate the command line; check for a trailing \r + * on Windows. + */ + if ((p = strchr(cmd, '\n')) == NULL) { +format_err: fprintf(stderr, "%s: %s: line %d: illegal input\n", + progname, ifile, lineno); + return (1); + } + if (p > cmd && p[-1] == '\r') + --p; + *p = '\0'; + + /* Skip empty lines and comments. */ + if (cmd[0] == '\0' || cmd[0] == '#') + continue; + + /* Optionally limit the test run to specific tests. */ + if (++test_cur < test_start || + (test_end != 0 && test_cur > test_end)) + continue; + + fprintf(stderr, "%d: %s\n", test_cur, cmd); + + /* Find the command. */ + if ((p = strchr(cmd, ' ')) == NULL) + goto format_err; + *p++ = '\0'; + for (cmdindx = 0; cmdlist[cmdindx].name != NULL; ++cmdindx) + if (strcmp(cmd, cmdlist[cmdindx].name) == 0) + break; + if (cmdlist[cmdindx].name == NULL) + goto format_err; + + /* Build argc/argv. */ + if (__db_util_arg(cmd, p, &argc, &argv) != 0) + return (1); + + /* Re-direct output into the test log file. */ + (void)snprintf(path, sizeof(path), "%d", test_cur); + if (freopen(path, "a", stdout) == NULL) { + fprintf(stderr, + "%s: %s: %s\n", progname, path, strerror(errno)); + return (1); + } + + /* + * Each underlying "program" re-parses its arguments -- + * reset getopt. + */ +#ifdef HAVE_GETOPT_OPTRESET + optreset = 1; +#endif + optind = 1; + + /* Prepare the test directory. */ + if (b_util_dir_setup()) + return (1); + + ret = cmdlist[cmdindx].f(argc, argv); + + /* Clean up the test directory. */ + if (b_util_dir_teardown()) + return (1); + + (void)fflush(stdout); + +#if DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 1 + __os_free(NULL, argv, 0); +#else + __os_free(NULL, argv); +#endif + if (ret != 0) + return (ret); + } + + return (0); +} + +static int +usage() +{ + (void)fprintf(stderr, + "usage: %s [-d directory] [-e end] [-i input] [-s start]\n", + progname); + return (EXIT_FAILURE); +} diff --git a/test/micro/test_micro b/test/micro/test_micro new file mode 100644 index 00000000..3cd2d0e9 --- /dev/null +++ b/test/micro/test_micro @@ -0,0 +1,171 @@ +#! /bin/sh +# +# $Id$ +LIBS=${LIBS:-"-lpthread"} +WINBUILDDIR="Win32/Release" + +CYGWIN=0 +HOSTOS="`uname -o 2>/dev/null||uname -s 2>/dev/null`" +if test `echo "$HOSTOS"|grep -i cygwin|wc -l` -gt 0;then + CYGWIN=1 +fi + +# build_test_micro_posix +# Build test_micro on a POSIX system. +build_test_micro_posix() +{ + # See if there's a test_micro binary already. + test $clean -eq 0 && test -x test_micro && return 0 + + echo 'Compiling test_micro on posix system...' + rm -f test_micro + CC=${CC:-gcc} + if [ "$CC" = "gcc" ]; then + CC="$CC -O3 -Wall" + else + CC="$CC -O" + fi + $CC -I. -I../src/dbinc -I../src/dbinc_auto -I.. -I../src -I$h/source \ + $SRC -o test_micro ./libdb.a $LIBS || return 1 +} + +# build_test_micro_windows +# Build test_micro on a Windows system. +build_test_micro_windows() +{ + # See if there's a test_micro binary already. + test $clean -eq 0 && test -x test_micro && return 0 + + echo 'Compiling test_micro on windows ...' + rm -f test_micro + + cl /nologo /o test_micro /DDB_WIN32 /G6 /Ox /MD\ + -I./ -I../ -I../src/ -I$h/source/ -I../src/dbinc -I../src/dbinc_auto \ + $SRC $WINSRC ./$WINBUILDDIR/libdb*.lib ./Release/libdb*.lib \ + ws2_32.lib advapi32.lib +} + +# run -- +# $1: args +run() +{ + # You can set the MAJOR and MINOR environment variables to limit + # the BDB releases on which the tests are run. + echo Versions db-${MAJOR:-[3-9]}.${MINOR:-*}.* + for i in db-${MAJOR:-[3-9]}.${MINOR:-*}.*; do + + major=`echo $i|sed "s/db-//g"|cut -d . -f 1` + minor=`echo $i|sed "s/db-//g"|cut -d . -f 2` + if test $major -gt "4";then + WINBUILDDIR="Win32/Release" + elif test $major -lt "4";then + WINBUILDDIR="Release" + elif test "X$minor" = "X" -o "$minor" -lt "8";then + WINBUILDDIR="Release" + else + WINBUILDDIR="Win32/Release" + fi + + if [ -f $i/$variant/libdb.a ] ; then + (cd $i/$variant/ && + build_test_micro_posix || exit 1) + elif [ -f $i/build_windows/${WINBUILDDIR}/libdb??.lib ] ; then + (cd $i/build_windows && + build_test_micro_windows || exit 1) + fi + + echo "$i run begins: `date`" + echo "test_micro $1..." + if [ -f $i/$variant/libdb.a ] ; then + (cd $i/$variant/ && ./test_micro $1 || exit 1) + if [ -f $t/gmon.out ] ; then + mv $t/gmon.out $i/$variant + gprof $i/$variant/.libs/lt-test_micro $i/$variant/gmon.out > $i/$variant/gprof.out + fi + elif [ -f $i/build_windows/${WINBUILDDIR}/libdb??.lib ] ; then + (cd $i/build_windows/ && ./test_micro $1 || exit 1) + fi + echo "$i run ends: `date`" + done +} + +# Get a path to this shellscript. +t=`dirname $0` +h=`(cd $t && pwd)` +if [ "$CYGWIN" = "1" ];then + h="`cygpath -m -a \"$h\"`" +fi +# We may need to re-compile, create a list of our sources. +SRC="$h/source/b_curalloc.c $h/source/b_curwalk.c $h/source/b_del.c +$h/source/b_get.c $h/source/b_inmem.c $h/source/b_load.c $h/source/b_latch.c +$h/source/b_open.c $h/source/b_put.c $h/source/b_recover.c +$h/source/b_txn.c $h/source/b_txn_write.c $h/source/b_uname.c +$h/source/b_util.c $h/source/b_workload.c $h/source/test_micro.c +$h/../../src/common/util_arg.c" + +WINSRC="$h/../../src/clib/getopt.c" + +# Process arguments. +clean=0 # Rebuild test_micro +workload=0 # Run workload tests +start_test=0 # Start test +end_test=0 # End test +variant=build_unix +while : + do case "$1" in + -c) # Rebuild test_micro. + clean=1 + shift;; + -w) # Run workload tests + workload=1 + shift;; + [1-9]*-[0-9]*) # Range: -3, 3-, 3-10 + start_test=`echo $1|sed 's/-.*//'` + start_test=${start_test:=1} + end_test=`echo $1|sed 's/.*-//'` + end_test=${end_test:=0} + shift;; + [1-9]*) # Specific test + start_test="$1" + end_test="$1" + shift;; + -v) variant=$2 # get code here, rather than from build_unix + shift; shift;; + *) + break;; + esac +done +test "$#" -ne 0 && { + echo 'usage: test_micro [-cw] [# | #- | -# | #-#]' >& 2 + exit 1 +} + +if test $start_test != 0; then + cmd="$cmd -s $start_test" +fi +if test $end_test != 0; then + cmd="$cmd -e $end_test" +fi + +# Create the run directory, and initialize test_micro's arguments. +t=RUN.`hostname | sed 's/\..*//'` +[ -d $t ] || mkdir $t +if [ "$CYGWIN" = "1" ];then + cmd="$cmd -d `(cd $t && cygpath -m -a \"$PWD\")`" +else + cmd="$cmd -d `(cd $t && pwd)`" +fi +# Set input file. +if test "$workload" -eq 1; then + cmd="$cmd -i $h/configs/run.workload" +else + cmd="$cmd -i $h/configs/run.std" +fi + +# Flush any I/O, just to get as a clean a timing as we can, ignore errors, +# sync is privleged on some systems. +(sync && sleep 1 2>&1) > /dev/null + +run "$cmd" + +exit 0 diff --git a/test/tcl/README b/test/tcl/README new file mode 100644 index 00000000..fbcddc09 --- /dev/null +++ b/test/tcl/README @@ -0,0 +1,147 @@ +Rules for the Berkeley DB and Berkeley DB-XML test suites + +1. Test Naming + +The primary script for running Berkeley DB scripts is named +'test.tcl'. The primary script for running DB-XML is named +'xmltest.tcl'. + +Tests are named with a (prefix, test number) combination. The +prefix indicates the type of test (lock, log, xml, etc.). The +prefix 'test' is used for plain vanilla DB testing. Test numbers +are 3 digits long, starting with 001. + +Procedures common to a group of tests, or to all tests, are placed +in files named 'xxxutils.tcl'. At the moment, we have the following +utilities files: + +testutils.tcl Utilities common to all DB tests +reputils.tcl Utilities for replication testing. +siutils.tcl Utilities for secondary index testing. +xmlutils.tcl Utilities for XML testing. + +2. Internal test structure + +Each line within a test should be no more than 80 characters long. + +Each test starts with a section like the following: + +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2010 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test001 +# TEST Small keys/data +# TEST Put/get per key +# TEST Dump file +# TEST Close, reopen +# TEST Dump file +# TEST +# TEST Use the first 10,000 entries from the dictionary. +# TEST Insert each with self as key and data; retrieve each. +# TEST After all are entered, retrieve all; compare output to original. +# TEST Close file, reopen, do retrieve and re-verify. + +First we refer to the license and assert copyright, then comes the CVS +header string. The section of lines beginning # TEST is used to +automatically maintain the TESTS file, a listing of all tests and +what they do. Use this section to briefly describe the test's purpose +and structure. + +Next comes the main procedure of the test, which has the same name +as the tcl file. The test should be liberally commented, and also +should use 'puts' to send messages to the output file. + +Sections of a test are identified with letters: test001.a, test001.b, +test001.c. + +Here's some typical output: + + puts "Test$tnum: $method ($args) $nentries equal key/data pairs" + puts "\tTest$tnum.a: put/get loop" + puts "\tTest$tnum.b: dump file" + puts "\tTest$tnum.c: close, open, and dump file" + puts "\tTest$tnum.d: close, open, and dump file in reverse direction" + +The reporting of the current value of the args is particularly +useful, allowing us to say at a glance that "testxxx is failing in +btree" or whatever. Each line of output must begin with the test name. +We use this to separate expected informational output from errors. + +Ancillary procedures follow the main procedure. Procedures used +by more than one test should go into the appropriate XXXutils.tcl +file. + +3. Reporting failures + +Failures in tests are reported with a message starting with the +prefix "FAIL:". Failures in tests are usually caught with the +error_check_good and error_check_bad routines to compare an +actual return value to an expected return value. These routines +take care of putting the "FAIL:" prefix on the message. + +4. Running tests + +Any single test can be run from the tclsh prompt by typing the +name of the test. If it's a test from the 'testxxx' group, you +should also specify the method you'd like to test: + + log001 + test001 btree + +To run one of the 'testxxx' tests for all methods, use the +run_test procedure: + + run_test test001 + +Any group of tests (the subsystems lock, log, test, etc.) can be +run by typing + + r $sub + +where sub is the name of the subsystem. + +For any of the following methods + +run_method +run_secmethod +run_secenv +run_reptest +run_repmethod +run_envmethod +run_recd + +you can type + +run (suffix method start stop). + +For example, to run test010 through test020 in btree using +run_method: + + run method btree 10 20 + +Or the same tests in repmethod: + + run repmethod btree 10 20 + +Notice the missing underbar. + +If you omit the start and stop numbers, you'll get all the tests: + + run method btree + +run_recd is a special case, in that it runs the recdxxx tests; +all the others run the testxxx tests. + +To run the standard test suite, type run_std at the tclsh prompt. +To run all the tests, type run_all. + +If you are running run_std or run_all, you may use the run_parallel +interface to speed things up or to test under conditions of high +system load. Run_parallel creates a list of all tests in the run, +reorders the tests randomly, then runs the tests in a number of +parallel processes. To run run_std in five processes type + + run_parallel 5 run_std diff --git a/test/tcl/TESTS b/test/tcl/TESTS new file mode 100644 index 00000000..350ad620 --- /dev/null +++ b/test/tcl/TESTS @@ -0,0 +1,3405 @@ +# Automatically built by dist/s_test; may require local editing. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +backup + Test of hotbackup functionality. + + Do all the of the following tests with and without + the -c (checkpoint) option; and with and without the + transactional bulk loading optimization. Make sure + that -c and -d (data_dir) are not allowed together. + + (1) Test that plain and simple hotbackup works. + (2) Test with -data_dir (-d). + (3) Test updating an existing hot backup (-u). + (4) Test with absolute path. + (5) Test with DB_CONFIG (-D), setting log_dir (-l) + and data_dir (-d). + (6) DB_CONFIG and update. + (7) Repeat hot backup (non-update) with DB_CONFIG and + existing directories. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +bigfile001 + Create a database greater than 4 GB in size. Close, verify. + Grow the database somewhat. Close, reverify. Lather, rinse, + repeat. Since it will not work on all systems, this test is + not run by default. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +bigfile002 + This one should be faster and not require so much disk space, + although it doesn't test as extensively. Create an mpool file + with 1K pages. Dirty page 6000000. Sync. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +Cold-boot a 4-site group. The first two sites start quickly and + initiate an election. The other two sites don't join the election until + the middle of the long full election timeout period. It's important that + the number of sites that start immediately be a sub-majority, because + that's the case that used to have a bug in it [#18456]. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +dbm + Historic DBM interface test. Use the first 1000 entries from the + dictionary. Insert each with self as key and data; retrieve each. + After all are entered, retrieve all; compare output to original. + Then reopen the file, re-retrieve everything. Finally, delete + everything. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +db_reptest + Wrapper to configure and run the db_reptest program. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +dead001 + Use two different configurations to test deadlock detection among a + variable number of processes. One configuration has the processes + deadlocked in a ring. The other has the processes all deadlocked on + a single resource. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +dead002 + Same test as dead001, but use "detect on every collision" instead + of separate deadlock detector. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +dead003 + + Same test as dead002, but explicitly specify DB_LOCK_OLDEST and + DB_LOCK_YOUNGEST. Verify the correct lock was aborted/granted. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +dead006 + use timeouts rather than the normal dd algorithm. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +dead007 + Tests for locker and txn id wraparound. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +dead008 + Run dead001 deadlock test using priorities + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +dead009 + Run dead002 deadlock test using priorities + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +dead010 + + Same test as dead003, except the actual youngest and oldest will have + higher priorities. Verify that the oldest/youngest of the lower + priority lockers gets killed. Doesn't apply to 2 procs. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +dead011 + Test out the minlocks, maxlocks, and minwrites options + to the deadlock detector when priorities are used. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env001 + Test of env remove interface (formerly env_remove). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env002 + Test of DB_LOG_DIR and env name resolution. + With an environment path specified using -home, and then again + with it specified by the environment variable DB_HOME: + 1) Make sure that the set_lg_dir option is respected + a) as a relative pathname. + b) as an absolute pathname. + 2) Make sure that the DB_LOG_DIR db_config argument is respected, + again as relative and absolute pathnames. + 3) Make sure that if -both- db_config and a file are present, + only the file is respected (see doc/env/naming.html). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env003 + Test DB_TMP_DIR and env name resolution + With an environment path specified using -home, and then again + with it specified by the environment variable DB_HOME: + 1) Make sure that the DB_TMP_DIR config file option is respected + a) as a relative pathname. + b) as an absolute pathname. + 2) Make sure that the -tmp_dir config option is respected, + again as relative and absolute pathnames. + 3) Make sure that if -both- -tmp_dir and a file are present, + only the file is respected (see doc/env/naming.html). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env004 + Test multiple data directories. Do a bunch of different opens + to make sure that the files are detected in different directories. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env005 + Test that using subsystems without initializing them correctly + returns an error. Cannot test mpool, because it is assumed in + the Tcl code. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env006 + Make sure that all the utilities exist and run. + Test that db_load -r options don't blow up. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env007 + Test DB_CONFIG config file options for berkdb env. + 1) Make sure command line option is respected + 2) Make sure that config file option is respected + 3) Make sure that if -both- DB_CONFIG and the set_ + method is used, only the file is respected. + Then test all known config options. + Also test config options on berkdb open. This isn't + really env testing, but there's no better place to put it. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env008 + Test environments and subdirectories. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env009 + Test calls to all the various stat functions. We have several + sprinkled throughout the test suite, but this will ensure that + we run all of them at least once. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env010 + Run recovery in an empty directory, and then make sure we can still + create a database in that directory. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env011 + Run with region overwrite flag. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env012 + Test DB_REGISTER. + + DB_REGISTER will fail on systems without fcntl. If it + fails, make sure we got the expected DB_OPNOTSUP return. + + Then, the real tests: + For each test, we start a process that opens an env with -register. + + 1. Verify that a 2nd process can enter the existing env with -register. + + 2. Kill the 1st process, and verify that the 2nd process can enter + with "-register -recover". + + 3. Kill the 1st process, and verify that the 2nd process cannot + enter with just "-register". + + 4. While the 1st process is still running, a 2nd process enters + with "-register". Kill the 1st process. Verify that a 3rd process + can enter with "-register -recover". Verify that the 3rd process, + entering, causes process 2 to fail with the message DB_RUNRECOVERY. + + 5. We had a bug where recovery was always run with -register + if there were empty slots in the process registry file. Verify + that recovery doesn't automatically run if there is an empty slot. + + 6. Verify process cannot connect when specifying -failchk and an + isalive function has not been declared. + + 7. Verify that a 2nd process can enter the existing env with -register + and -failchk and having specified an isalive function + + 8. Kill the 1st process, and verify that the 2nd process can enter + with "-register -failchk -recover" + + 9. 2nd process enters with "-register -failchk". Kill the 1st process. + 2nd process may get blocked on a mutex held by process one. Verify + 3rd process can enter with "-register -recover -failchk". 3rd process + should run failchk, clear out open txn/log from process 1. It will + enter env without need for any additional recovery. We look for + "Freeing log information .." sentence in the log for 3rd process as + an indication that failchk ran. If DB_RUNRECOVERY were returned + instead it would mean failchk could not recover. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env013 + Test of basic functionality of fileid_reset. + + Create a database in an env. Copy it to a new file within + the same env. Reset the file id and make sure it has changed. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env014 + + Make sure that attempts to open an environment with + incompatible flags (e.g. replication without transactions) + fail with the appropriate messages. + + A new thread of control joining an env automatically + initializes the same subsystems as the original env. + Make sure that the attempt to change subsystems when + joining an env fails with the appropriate messages. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env015 + Rename the underlying directory of an env, make sure everything + still works. Test runs with regular named databases and with + in-memory named databases. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env016 + Replication settings and DB_CONFIG + + Create a DB_CONFIG for various replication settings. Use + rep_stat or getter functions to verify they're set correctly. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env017 + Check documented "stat" fields against the fields + returned by the "stat" functions. Make sure they + match, and that none are missing. + These are the stat functions we test: + env log_stat + env lock_stat + env txn_stat + env mutex_stat + env rep_stat + env repmgr_stat + env mpool_stat + db stat + seq stat + db compact_stat + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env018 + Test getters when joining an env. When a second handle is + opened on an existing env, get_open_flags needs to return + the correct flags to the second handle so it knows what sort + of environment it's just joined. + + For several different flags to env_open, open an env. Open + a second handle on the same env, get_open_flags and verify + the flag is returned. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env019 + Test that stats are correctly set and reported when + an env is accessed from a second process. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env020 + Check if the output information for stat_print is expected. + These are the stat_print functions we test: + env stat_print + env lock_stat_print + env log_stat_print + env mpool_stat_print + env mutex_stat_print + env rep_stat_print + env repmgr_stat_print + env txn_stat_print + db stat_print + seq stat_print + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +env021 + Test the operations on a transaction in a CDS environment. + These are the operations we test: + $txn abort + $txn commit + $txn id + $txn prepare + $txn setname name + $txn getname + $txn discard + $txn set_timeout + In these operations, we only support the following: + $txn id + $txn commit + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +fop001.tcl + Test two file system operations combined in one transaction. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +fop002.tcl + Test file system operations in the presence of bad permissions. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +fop003 + + Test behavior of create and truncate for compatibility + with sendmail. + 1. DB_TRUNCATE is not allowed with locking or transactions. + 2. Can -create into zero-length existing file. + 3. Can -create into non-zero-length existing file if and + only if DB_TRUNCATE is specified. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +fop004 + Test of DB->rename(). (formerly test075) + Test that files can be renamed from one directory to another. + Test that files can be renamed using absolute or relative + pathnames. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +fop005 + Test of DB->remove() + Formerly test080. + Test use of dbremove with and without envs, with absolute + and relative paths, and with subdirectories. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +fop006 + Test file system operations in multiple simultaneous + transactions. Start one transaction, do a file operation. + Start a second transaction, do a file operation. Abort + or commit txn1, then abort or commit txn2, and check for + appropriate outcome. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +fop007 + Test file system operations on named in-memory databases. + Combine two ops in one transaction. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +fop008 + Test file system operations on named in-memory databases. + Combine two ops in one transaction. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +fop009 + Test file system operations in child transactions. + Combine two ops in one child transaction. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +fop010 + Test file system operations in child transactions. + Two ops, each in its own child txn. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +fop011 + Test file system operations in child transactions. + Combine two ops in one child transaction, with in-emory + databases. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +fop012 + Test file system operations in child transactions. + Two ops, each in its own child txn, with in-memory dbs. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +jointest + Test duplicate assisted joins. Executes 1, 2, 3 and 4-way joins + with differing index orders and selectivity. + + We'll test 2-way, 3-way, and 4-way joins and figure that if those + work, everything else does as well. We'll create test databases + called join1.db, join2.db, join3.db, and join4.db. The number on + the database describes the duplication -- duplicates are of the + form 0, N, 2N, 3N, ... where N is the number of the database. + Primary.db is the primary database, and null.db is the database + that has no matching duplicates. + + We should test this on all btrees, all hash, and a combination thereof + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +lock001 + Make sure that the basic lock tests work. Do some simple gets + and puts for a single locker. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +lock002 + Exercise basic multi-process aspects of lock. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +lock003 + Exercise multi-process aspects of lock. Generate a bunch of parallel + testers that try to randomly obtain locks; make sure that the locks + correctly protect corresponding objects. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +lock004 + Test locker ids wraping around. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +lock005 + Check that page locks are being released properly. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +lock006 + Test lock_vec interface. We do all the same things that + lock001 does, using lock_vec instead of lock_get and lock_put, + plus a few more things like lock-coupling. + 1. Get and release one at a time. + 2. Release with put_obj (all locks for a given locker/obj). + 3. Release with put_all (all locks for a given locker). + Regularly check lock_stat to verify all locks have been + released. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +log001 + Read/write log records. + Test with and without fixed-length, in-memory logging, + and encryption. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +log002 + Tests multiple logs + Log truncation + LSN comparison and file functionality. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +log003 + Verify that log_flush is flushing records correctly. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +log004 + Make sure that if we do PREVs on a log, but the beginning of the + log has been truncated, we do the right thing. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +log005 + Check that log file sizes can change on the fly. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +log006 + Test log file auto-remove. + Test normal operation. + Test a long-lived txn. + Test log_archive flags. + Test db_archive flags. + Test turning on later. + Test setting via DB_CONFIG. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +log007 + Test of in-memory logging bugs. [#11505] + + Test db_printlog with in-memory logs. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +log008 + Test what happens if a txn_ckp record falls into a + different log file than the DBREG_CKP records generated + by the same checkpoint. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +log009 + Test of logging and getting log file version information. + Each time we cross a log file boundary verify we can + get the version via the log cursorlag. + Do this both forward and backward. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +memp001 + Randomly updates pages. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +memp002 + Tests multiple processes accessing and modifying the same files. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +memp003 + Test reader-only/writer process combinations; we use the access methods + for testing. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +memp004 + Test that small read-only databases are mapped into memory. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +memp005 + Make sure that db pagesize does not interfere with mpool pagesize. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +mut001 + Exercise the mutex API. + + Allocate, lock, unlock, and free a bunch of mutexes. + Set basic configuration options and check mutex_stat and + the mutex getters for the correct values. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +mut002 + Two-process mutex test. + + Allocate and lock a self-blocking mutex. Start another process. + Try to lock the mutex again -- it will block. + Unlock the mutex from the other process, and the blocked + lock should be obtained. Clean up. + Do another test with a "-process-only" mutex. The second + process should not be able to unlock the mutex. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +mut003 + Try doing mutex operations out of order. Make sure + we get appropriate errors. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +plat001 + + Test of portability of sequences. + + Create and dump a database containing sequences. Save the dump. + This test is used in conjunction with the upgrade tests, which + will compare the saved dump to a locally created dump. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd001 + Per-operation recovery tests for non-duplicate, non-split + messages. Makes sure that we exercise redo, undo, and do-nothing + condition. Any test that appears with the message (change state) + indicates that we've already run the particular test, but we are + running it again so that we can change the state of the data base + to prepare for the next test (this applies to all other recovery + tests as well). + + These are the most basic recovery tests. We do individual recovery + tests for each operation in the access method interface. First we + create a file and capture the state of the database (i.e., we copy + it. Then we run a transaction containing a single operation. In + one test, we abort the transaction and compare the outcome to the + original copy of the file. In the second test, we restore the + original copy of the database and then run recovery and compare + this against the actual database. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd002 + Split recovery tests. For every known split log message, makes sure + that we exercise redo, undo, and do-nothing condition. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd003 + Duplicate recovery tests. For every known duplicate log message, + makes sure that we exercise redo, undo, and do-nothing condition. + + Test all the duplicate log messages and recovery operations. We make + sure that we exercise all possible recovery actions: redo, undo, undo + but no fix necessary and redo but no fix necessary. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd004 + Big key test where big key gets elevated to internal page. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd005 + Verify reuse of file ids works on catastrophic recovery. + + Make sure that we can do catastrophic recovery even if we open + files using the same log file id. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd006 + Nested transactions. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd007 + File create/delete tests. + + This is a recovery test for create/delete of databases. We have + hooks in the database so that we can abort the process at various + points and make sure that the transaction doesn't commit. We + then need to recover and make sure the file is correctly existing + or not, as the case may be. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd008 + Test deeply nested transactions and many-child transactions. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd009 + Verify record numbering across split/reverse splits and recovery. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd010 + Test stability of btree duplicates across btree off-page dup splits + and reverse splits and across recovery. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd011 + Verify that recovery to a specific timestamp works. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd012 + Test of log file ID management. [#2288] + Test recovery handling of file opens and closes. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd013 + Test of cursor adjustment on child transaction aborts. [#2373] + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd014 + This is a recovery test for create/delete of queue extents. We + then need to recover and make sure the file is correctly existing + or not, as the case may be. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd015 + This is a recovery test for testing lots of prepared txns. + This test is to force the use of txn_recover to call with the + DB_FIRST flag and then DB_NEXT. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd016 + Test recovery after checksum error. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd017 + Test recovery and security. This is basically a watered + down version of recd001 just to verify that encrypted environments + can be recovered. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd018 + Test recover of closely interspersed checkpoints and commits. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd019 + Test txn id wrap-around and recovery. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd020 + Test creation of intermediate directories -- an + undocumented, UNIX-only feature. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd021 + Test of failed opens in recovery. + + If a file was deleted through the file system (and not + within Berkeley DB), an error message should appear. + Test for regular files and subdbs. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd022 + Test that pages allocated by an aborted subtransaction + within an aborted prepared parent transaction are returned + to the free list after recovery. This exercises + __db_pg_prepare in systems without FTRUNCATE. [#7403] + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd023 + Test recover of reverse split. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd024 + Test recovery of streaming partial insert operations. These are + operations that do multiple partial puts that append to an existing + data item (as long as the data item is on an overflow page). + The interesting cases are: + * Simple streaming operations + * Operations that cause the overflow item to flow onto another page. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +recd025 + Basic tests for transaction bulk loading and recovery. + In particular, verify that the tricky hot backup protocol works. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep001 + Replication rename and forced-upgrade test. + + Run rep_test in a replicated master environment. + Verify that the database on the client is correct. + Next, remove the database, close the master, upgrade the + client, reopen the master, and make sure the new master can + correctly run rep_test and propagate it in the other direction. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep002 + Basic replication election test. + + Run a modified version of test001 in a replicated master + environment; hold an election among a group of clients to + make sure they select a proper master from amongst themselves, + in various scenarios. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep003 + Repeated shutdown/restart replication test + + Run a quick put test in a replicated master environment; + start up, shut down, and restart client processes, with + and without recovery. To ensure that environment state + is transient, use DB_PRIVATE. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep005 + Replication election test with error handling. + + Run rep_test in a replicated master environment; + hold an election among a group of clients to make sure they select + a proper master from amongst themselves, forcing errors at various + locations in the election path. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep006 + Replication and non-rep env handles. + + Run a modified version of test001 in a replicated master + environment; verify that the database on the client is correct. + Next, create a non-rep env handle to the master env. + Attempt to open the database r/w to force error. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep007 + Replication and bad LSNs + + Run rep_test in a replicated master env. + Close the client. Make additional changes to master. + Close the master. Open the client as the new master. + Make several different changes. Open the old master as + the client. Verify periodically that contents are correct. + This test is not appropriate for named in-memory db testing + because the databases are lost when both envs are closed. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep008 + Replication, back up and synchronizing + + Run a modified version of test001 in a replicated master + environment. + Close master and client. + Copy the master log to the client. + Clean the master. + Reopen the master and client. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep009 + Replication and DUPMASTERs + Run test001 in a replicated environment. + + Declare one of the clients to also be a master. + Close a client, clean it and then declare it a 2nd master. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep010 + Replication and ISPERM + + With consecutive message processing, make sure every + DB_REP_PERMANENT is responded to with an ISPERM when + processed. With gaps in the processing, make sure + every DB_REP_PERMANENT is responded to with an ISPERM + or a NOTPERM. Verify in both cases that the LSN returned + with ISPERM is found in the log. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep011 + Replication: test open handle across an upgrade. + + Open and close test database in master environment. + Update the client. Check client, and leave the handle + to the client open as we close the masterenv and upgrade + the client to master. Reopen the old master as client + and catch up. Test that we can still do a put to the + handle we created on the master while it was still a + client, and then make sure that the change can be + propagated back to the new client. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep012 + Replication and dead DB handles. + + Run a modified version of test001 in a replicated master env. + Run in replicated environment with secondary indices too. + Make additional changes to master, but not to the client. + Downgrade the master and upgrade the client with open db handles. + Verify that the roll back on clients gives dead db handles. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep013 + Replication and swapping master/clients with open dbs. + + Run a modified version of test001 in a replicated master env. + Make additional changes to master, but not to the client. + Swap master and client. + Verify that the roll back on clients gives dead db handles. + Rerun the test, turning on client-to-client synchronization. + Swap and verify several times. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep014 + Replication and multiple replication handles. + Test multiple client handles, opening and closing to + make sure we get the right openfiles. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep015 + Locking across multiple pages with replication. + + Open master and client with small pagesize and + generate more than one page and generate off-page + dups on the first page (second key) and last page + (next-to-last key). + Within a single transaction, for each database, open + 2 cursors and delete the first and last entries (this + exercises locks on regular pages). Intermittently + update client during the process. + Within a single transaction, for each database, open + 2 cursors. Walk to the off-page dups and delete one + from each end (this exercises locks on off-page dups). + Intermittently update client. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep016 + Replication election test with varying required nvotes. + + Run a modified version of test001 in a replicated master environment; + hold an election among a group of clients to make sure they select + the master with varying required participants. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep017 + Concurrency with checkpoints. + + Verify that we achieve concurrency in the presence of checkpoints. + Here are the checks that we wish to make: + While dbenv1 is handling the checkpoint record: + Subsequent in-order log records are accepted. + Accepted PERM log records get NOTPERM + A subsequent checkpoint gets NOTPERM + After checkpoint completes, next txn returns PERM + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep018 + Replication with dbremove. + + Verify that the attempt to remove a database file + on the master hangs while another process holds a + handle on the client. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep019 + Replication and multiple clients at same LSN. + Have several clients at the same LSN. Run recovery at + different times. Declare a client master and after sync-up + verify all client logs are identical. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep020 + Replication elections - test election generation numbers. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep021 + Replication and multiple environments. + Run similar tests in separate environments, making sure + that some data overlaps. Then, "move" one client env + from one replication group to another and make sure that + we do not get divergent logs. We either match the first + record and end up with identical logs or we get an error. + Verify all client logs are identical if successful. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep022 + Replication elections - test election generation numbers + during simulated network partition. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep023 + Replication using two master handles. + + Open two handles on one master env. Create two + databases, one through each master handle. Process + all messages through the first master handle. Make + sure changes made through both handles are picked + up properly. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep024 + Replication page allocation / verify test + + Start a master (site 1) and a client (site 2). Master + closes (simulating a crash). Site 2 becomes the master + and site 1 comes back up as a client. Verify database. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep025 + Test of DB_REP_JOIN_FAILURE. + + One master, one client. + Generate several log files. + Remove old master log files. + Delete client files and restart client. + Put one more record to the master. At the next + processing of messages, the client should get JOIN_FAILURE. + Recover with a hot failover. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep026 + Replication elections - simulate a crash after sending + a vote. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep027 + Replication and secondary indexes. + + Set up a secondary index on the master and make sure + it can be accessed from the client. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep028 + Replication and non-rep env handles. (Also see rep006.) + + Open second non-rep env on client, and create a db + through this handle. Open the db on master and put + some data. Check whether the non-rep handle keeps + working. Also check if opening the client database + in the non-rep env writes log records. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep029 + Test of internal initialization. + + One master, one client. + Generate several log files. + Remove old master log files. + Delete client files and restart client. + Put one more record to the master. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep030 + Test of internal initialization multiple files and pagesizes. + Hold some databases open on master. + + One master, one client using a data_dir for internal init. + Generate several log files. + Remove old master log files. + Delete client files and restart client. + Put one more record to the master. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep031 + Test of internal initialization and blocked operations. + + One master, one client. + Put one more record to the master. + Test that internal initialization blocks: + log_archive, rename, remove, fileid_reset, lsn_reset. + Sleep 30+ seconds. + Test that blocked operations are now unblocked. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep032 + Test of log gap processing. + + One master, one client. + Run rep_test. + Run rep_test without sending messages to client. + Make sure client missing the messages catches up properly. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep033 + Test of internal initialization with rename and remove of dbs. + + One master, one client. + Generate several databases. Replicate to client. + Do some renames and removes, both before and after + closing the client. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep034 + Test of STARTUPDONE notification. + + STARTUPDONE can now be recognized without the need for new "live" log + records from the master (under favorable conditions). The response to + the ALL_REQ at the end of synchronization includes an end-of-log marker + that now triggers it. However, the message containing that end marker + could get lost, so live log records still serve as a back-up mechanism. + The end marker may also be set under c2c sync, but only if the serving + client has itself achieved STARTUPDONE. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep035 + Test sync-up recovery in replication. + + We need to fork off 3 child tclsh processes to operate + on Site 3's (client always) home directory: + Process 1 continually calls lock_detect. + Process 2 continually calls txn_checkpoint. + Process 3 continually calls memp_trickle. + Process 4 continually calls log_archive. + Sites 1 and 2 will continually swap being master + (forcing site 3 to continually run sync-up recovery) + New master performs 1 operation, replicates and downgrades. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep036 + Multiple master processes writing to the database. + One process handles all message processing. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep037 + Test of internal initialization and page throttling. + + One master, one client, force page throttling. + Generate several log files. + Remove old master log files. + Delete client files and restart client. + Put one more record to the master. + Verify page throttling occurred. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep038 + Test of internal initialization and ongoing master updates. + + One master, one client. + Generate several log files. + Remove old master log files. + Delete client files and restart client. + Put more records on master while initialization is in progress. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep039 + Test of interrupted internal initialization. The + interruption is due to a changed master, or the client crashing, + or both. + + One master, two clients. + Generate several log files. Remove old master log files. + Restart client, optionally having "cleaned" client env dir. Either + way, this has the effect of forcing an internal init. + Interrupt the internal init. + Vary the number of times we process messages to make sure + the interruption occurs at varying stages of the first internal + initialization. + + Run for btree and queue only because of the number of permutations. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep040 + Test of racing rep_start and transactions. + + One master, one client. + Have master in the middle of a transaction. + Call rep_start to make master a client. + Commit the transaction. + Call rep_start to make master the master again. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep041 + Turn replication on and off at run-time. + + Start a master with replication OFF (noop transport function). + Run rep_test to advance log files and archive. + Start up client; change master to working transport function. + Now replication is ON. + Do more ops, make sure client is up to date. + Close client, turn replication OFF on master, do more ops. + Repeat from point A. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep042 + Concurrency with updates. + + Verify racing role changes and updates don't result in + pages with LSN 0,1. Set up an environment that is master. + Spawn child process that does a delete, but using the + $env check so that it sleeps in the middle of the call. + Master downgrades and then sleeps as a client so that + child will run. Verify child does not succeed (should + get read-only error) due to role change in the middle of + its call. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep043 + + Constant writes during upgrade/downgrade. + + Three envs take turns being master. Each env + has a child process which does writes all the + time. They will succeed when that env is master + and fail when it is not. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep044 + + Test rollbacks with open file ids. + + We have one master with two handles and one client. + Each time through the main loop, we open a db, write + to the db, and close the db. Each one of these actions + is propagated to the client, or a roll back is forced + by swapping masters. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep045 + + Replication with versions. + + Mimic an application where a database is set up in the + background and then put into a replication group for use. + The "version database" identifies the current live + version, the database against which queries are made. + For example, the version database might say the current + version is 3, and queries would then be sent to db.3. + Version 4 is prepared for use while version 3 is in use. + When version 4 is complete, the version database is updated + to point to version 4 so queries can be directed there. + + This test has a master and two clients. One client swaps + roles with the master, and the other client runs constantly + in another process. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep046 + Replication and basic bulk transfer. + Set bulk transfer replication option. + Run long txns on master and then commit. Process on client + and verify contents. Run a very long txn so that logging + must send the log. Process and verify on client. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep047 + Replication and log gap bulk transfers. + Set bulk transfer replication option. + Run test. Start a new client (to test ALL_REQ and bulk). + Run small test again. Clear messages for 1 client. + Run small test again to test LOG_REQ gap processing and bulk. + Process and verify on clients. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep048 + Replication and log gap bulk transfers. + Have two master env handles. Turn bulk on in + one (turns it on for both). Turn it off in the other. + While toggling, send log records from both handles. + Process message and verify master and client match. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep049 + Replication and delay syncing clients - basic test. + + Open and start up a master and two clients. Turn on delay sync + in the delayed client. Change master, add data and process messages. + Verify delayed client does not match. Make additional changes and + update the delayted client. Verify all match. + Add in a fresh delayed client to test delay of ALL_REQ. + Process startup messages and verify freshc client has no database. + Sync and verify fresh client matches. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep050 + Replication and delay syncing clients - change master test. + + Open and start up master and 4 clients. Turn on delay for 3 clients. + Switch masters, add data and verify delayed clients are out of date. + Make additional changes to master. And change masters again. + Sync/update delayed client and verify. The 4th client is a brand + new delayed client added in to test the non-verify path. + + Then test two different things: + 1. Swap master again while clients are still delayed. + 2. Swap master again while sync is proceeding for one client. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep051 + Test of compaction with replication. + + Run rep_test in a replicated master environment. + Delete a large number of entries and compact with -freespace. + Propagate the changes to the client and make sure client and + master match. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep052 + Test of replication with NOWAIT. + + One master, one client. After initializing + everything normally, close client and let the + master get ahead -- far enough that the master + no longer has the client's last log file. + Reopen the client and turn on NOWAIT. + Process a few messages to get the client into + recovery mode, and verify that lockout occurs + on a txn API call (txn_begin) and an env API call. + Process all the messages and verify that lockout + is over. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep053 + Replication and basic client-to-client synchronization. + + Open and start up master and 1 client. + Start up a second client later and verify it sync'ed from + the original client, not the master. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep054 + Test of internal initialization where a far-behind + client takes over as master. + + One master, two clients. + Run rep_test and process. + Close client 1. + Run rep_test, opening new databases, and processing + messages. Archive as we go so that log files get removed. + Close master and reopen client 1 as master. Process messages. + Verify that new master and client are in sync. + Run rep_test again, adding data to one of the new + named databases. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep055 + Test of internal initialization and log archiving. + + One master, one client. + Generate several log files. + Remove old master log files and generate several more. + Get list of archivable files from db_archive and restart client. + As client is in the middle of internal init, remove + the log files returned earlier by db_archive. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep058 + + Replication with early databases + + Mimic an application where they create a database before + calling rep_start, thus writing log records on a client + before it is a client. Verify we cannot join repl group. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep060 + Test of normally running clients and internal initialization. + Have a client running normally, but slow/far behind the master. + Then the master checkpoints and archives, causing the client + to suddenly be thrown into internal init. This test tests + that we clean up the old files/pages in mpool and dbreg. + Also test same thing but the app holding an open dbp as well. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep061 + Test of internal initialization multiple files and pagesizes + with page gaps. + + One master, one client. + Generate several log files. + Remove old master log files. + Delete client files and restart client. + Put one more record to the master. + Force some page messages to get dropped. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep062 + Test of internal initialization where client has a different + kind of database than the master. + + Create a master of one type, and let the client catch up. + Close the client. + Remove the database on the master, and create a new + database of the same name but a different type. + Run the master ahead far enough that internal initialization + will be required on the reopen of the client. + Reopen the client and verify. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep063 + Replication election test with simulated different versions + for each site. This tests that old sites with real priority + trump ELECTABLE sites with zero priority even with greater LSNs. + There is a special case in the code for testing that if the + priority is <= 10, we simulate mixed versions for elections. + + Run a rep_test in a replicated master environment and close; + hold an election among a group of clients to make sure they select + the master with varying LSNs and priorities. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep064 + Replication rename and forced-upgrade test. + + The test verifies that the client correctly + (internally) closes files when upgrading to master. + It does this by having the master have a database + open, then crashing. The client upgrades to master, + and attempts to remove the open database. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep065 + Tests replication running with different versions. + This capability is introduced with 4.5. + + Start a replication group of 1 master and N sites, all + running some historical version greater than or equal to 4.4. + Take down a client and bring it up again running current. + Run some upgrades, make sure everything works. + + Each site runs the tcllib of its own version, but uses + the current tcl code (e.g. test.tcl). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep066 + Replication and dead log handles. + + Run rep_test on master and a client. + Simulate client crashes (master continues) until log 2. + Open 2nd master env handle and put something in log and flush. + Downgrade master, restart client as master. + Run rep_test on newmaster until log 2. + New master writes log records, newclient processes records + and 2nd newclient env handle calls log_flush. + New master commits, newclient processes and should succeed. + Make sure 2nd handle detects the old log handle and doesn't + write to a stale handle (if it does, the processing of the + commit will fail). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep067 + Full election timeout test. + + Verify that elections use a separate "full election timeout" (if such + configuration is in use) instead of the normal timeout, when the + replication group is "cold-booted" (all sites starting with recovery). + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep068 + Verify replication of dbreg operations does not hang clients. + In a simple replication group, create a database with very + little data. With DB_TXN_NOSYNC the database can be created + at the client even though the log is not flushed. If we crash + and restart, the application of the log starts over again, even + though the database is still there. The application can open + the database before replication tries to re-apply the create. + This causes a hang as replication waits to be able to get a + handle lock. + + Run for btree only because access method shouldn't matter. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep069 + Test of internal initialization and elections. + + If a client is in a recovery mode of any kind, it + participates in elections at priority 0 so it can + never be elected master. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep070 + Test of startup_done condition with idle master. + + Join a client to an existing master, and verify that + the client detects startup_done even if the master + does not execute any new transactions. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep071 + Test of multiple simultaneous client env handles and + upgrading/downgrading. Tests use of temp db handle + internally. + + Open a master and 2 handles to the same client env. + Run rep_test. + Close master and upgrade client to master using one env handle. + Run rep_test again, and then downgrade back to client. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep072 + Verify that internal init does not leak resources from + the locking subsystem. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep073 + + Test of allowing clients to create and update their own scratch + databases within the environment. Doing so requires the use + use of the DB_TXN_NOT_DURABLE flag for those databases. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep074 + Verify replication withstands send errors processing requests. + + Run for btree only because access method shouldn't matter. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep075 + Replication and prepared transactions. + Test having outstanding prepared transactions and simulating + crashing or upgrading or downgrading sites. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep076 + Replication elections - what happens if elected client + does not become master? + + Set up a master and 3 clients. Take down master, run election. + The elected client will ignore the fact that it's been elected, + so we still have 2 clients. + + Run another election, a regular election that allows the winner + to become master, and make sure it goes okay. We do this both + for the client that ignored its election and for the other client. + + This simulates what would happen if, say, we had a temporary + network partition and lost the winner. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep077 + + Replication, recovery and applying log records immediately. + Master and 1 client. Start up both sites. + Close client and run rep_test on the master so that the + log record is the same LSN the client would be expecting. + Reopen client with recovery and verify the client does not + try to apply that "expected" record before it synchronizes + with the master. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep078 + + Replication and basic lease test. + Set leases on master and 2 clients. + Do a lease operation and process to all clients. + Read with lease on master. Do another lease operation + and don't process on any client. Try to read with + on the master and verify it fails. Process the messages + to the clients and retry the read. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep079 + Replication leases and invalid usage. + + Open a client without leases. Attempt to set leases after rep_start. + Attempt to declare as master without election. + Run an election with an nsites parameter value. + Elect a master with leases. Put some data and send to clients. + Cleanly shutdown master env. Restart without + recovery and verify leases are expired and refreshed. + Add a new client without leases to a group using leases. + Test errors if we cannot get leases before/after txn_commit. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep080 + AUTOINIT off with empty client logs. + + Verify that a fresh client trying to join the group for + the first time observes the setting of DELAY_SYNC and !AUTOINIT + properly. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep081 + Test of internal initialization and missing database files. + + One master, one client, two databases. + Generate several log files. + Remove old master log files. + Start up client. + Remove or replace one master database file while client initialization + is in progress, make sure other master database can keep processing. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep082 + Sending replication requests to correct master site. + + Regression test for a bug [#16592] where a client could send an + UPDATE_REQ to another client instead of the master. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep083 + Replication clients must never send VERIFY_FAIL to a c2c request. + + Regression test for a bug [#16592] where a client could send a + VERIFY_FAIL to another client, which is illegal. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep084 + Abbreviated internal init for named in-memory databases (NIMDBs). + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep085 + Skipping unnecessary abbreviated internal init. + + Make sure that once we've materialized NIMDBs, we don't bother + trying to do it again on subsequent sync without recovery. Make + sure we do probe for the need to materialize NIMDBs, but don't do + any internal init at all if there are no NIMDBs. Note that in order to + do this test we don't even need any NIMDBs. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep086 + Interrupted abbreviated internal init. + + Make sure we cleanly remove partially loaded named in-memory + databases (NIMDBs). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep087 + Abbreviated internal init with open file handles. + + Client has open handle to an on-disk DB when abbreviated + internal init starts. Make sure we lock out access, and make sure + it ends up as HANDLE_DEAD. Also, make sure that if there are + no NIMDBs, that we *don't* get HANDLE_DEAD. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep088 + Replication roll-back preserves checkpoint. + + Create a situation where a client has to roll back its + log, discarding some existing transactions, in order to sync + with a new master. + + 1. When the client still has its entire log file history, all + the way back to log file #1, it's OK if the roll-back discards + any/all checkpoints. + 2. When old log files have been archived, if the roll-back would + remove all existing checkpoints it must be forbidden. The log + must always have a checkpoint (or all files back through #1). + The client must do internal init or return JOIN_FAILURE. + 3. (the normal case) Old log files archived, and a checkpoint + still exists in the portion of the log which will remain after + the roll-back: no internal-init/JOIN_FAILURE necessary. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep089 + Test of proper clean-up of mpool during interrupted internal init. + + Have a client in the middle of internal init when a new master + generation comes along, forcing the client to interrupt the internal + init, including doing the clean-up. The client is in the middle of + retrieving database pages, so that we are forced to clean up mpool. + (Regression test for bug 17121) + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep090 + Test of AUTO_REMOVE on both master and client sites. + + One master, one client. Set AUTO_REMOVE on the client env. + Generate several log files. + Verify the client has properly removed the log files. + Turn on AUTO_REMOVE on the master and generate more log files. + Confirm both envs have the same log files. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep091 + Read-your-writes consistency. + Write transactions at the master, and then call the txn_applied() + method to see whether the client has received and applied them yet. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep092 + Read-your-writes consistency. + Test events in one thread (process) waking up another sleeping thread, + before a timeout expires. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep093 + Egen changes during election. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep094 + Full election with less than majority initially connected. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep095 + Test of internal initialization use of shared region memory. + + One master, one client. Create a gap that requires internal + initialization. Start the internal initialization in this + parent process and complete it in a separate child process. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep096 + Replication and db_replicate utility. + + Create a master and client environment. Open them. + Start a db_replicate process on each. Create a database on + the master and write some data. Then verify contents. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep097 + + Replication and lease data durability test. + Set leases on master and 2 clients. + Have the original master go down and a client take over. + Have the old master rejoin as client, but go down again. + The other two sites do one txn, while the original master's + LSN extends beyond due to running recovery. + Original Master rejoins while new master fails. Make sure remaining + original site is elected, with the smaller LSN, but with txn data. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rep098 + Test of internal initialization and page deallocation. + + Use one master, one client. + Generate several log files. + Remove old master log files. + Start a client. + After client gets UPDATE file information, delete entries to + remove pages in the database. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr001 + Basic repmgr test. + + Run all mix-and-match combinations of the basic_repmgr_test. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr002 + Basic repmgr test. + + Run all combinations of the basic_repmgr_election_test. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr003 + Basic repmgr init test. + + Run all combinations of the basic_repmgr_init_test. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr007 + Basic repmgr client shutdown/restart test. + + Start an appointed master site and two clients. Shut down and + restart each client, processing transactions after each restart. + Verify all expected transactions are replicated. + + Run for btree only because access method shouldn't matter. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr009 + repmgr API error test. + + Try a variety of repmgr calls that result in errors. Also + try combinations of repmgr and base replication API calls + that result in errors. + + Run for btree only because access method shouldn't matter. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr010 + Acknowledgement policy and timeout test. + + Verify that "quorum" acknowledgement policy succeeds with fewer than + nsites running. Verify that "all" acknowledgement policy results in + ack failures with fewer than nsites running. + + Run for btree only because access method shouldn't matter. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr011 + repmgr two site strict majority test. + + Start an appointed master and one client with 2 site strict + majority set. Shut down the master site, wait and verify that + the client site was not elected master. Start up master site + and verify that transactions are processed as expected. + + Run for btree only because access method shouldn't matter. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr012 + repmgr heartbeat test. + + Start an appointed master and one client site. Set heartbeat + send and monitor values and process some transactions. Stop + sending heartbeats from master and verify that client sees + a dropped connection. + + Run for btree only because access method shouldn't matter. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr013 + Site list test. + + Configure a master and two clients where one client is a peer of + the other and verify resulting site lists. + + Run for btree only because access method shouldn't matter. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr017 + repmgr in-memory cache overflow test. + + Start an appointed master site and one client, putting databases, + environment regions, logs and replication files in-memory. Set + very small cachesize and run enough transactions to overflow cache. + Shut down and restart master and client, giving master a larger cache. + Run and verify a small number of transactions. + + Run for btree only because access method shouldn't matter. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr018 + Check repmgr stats. + + Start an appointed master and one client. Shut down the client, + run some transactions at the master and verify that there are + acknowledgement failures and one dropped connection. Shut down + and restart client again and verify that there are two dropped + connections. + + Run for btree only because access method shouldn't matter. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr023 + Test of JOIN_FAILURE event for repmgr applications. + + Run for btree only because access method shouldn't matter. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr024 + Test of group-wide log archiving awareness. + Verify that log archiving will use the ack from the clients in + its decisions about what log files are allowed to be archived. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr025 + repmgr heartbeat rerequest test. + + Start an appointed master site and one client. Use a test hook + to inhibit PAGE_REQ processing at the master (i.e., "lose" some + messages). + Start a second client that gets stuck in internal init. Wait + long enough to rely on the heartbeat rerequest to request the + missing pages, rescind the test hook and verify that all + data appears on both clients. + + Run for btree only because access method shouldn't matter. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr026 + Test of "full election" timeouts. + 1. Cold boot with all sites present. + 2. Cold boot with some sites missing. + 3. Partial-participation election with one client having seen a master, + but another just starting up fresh. + 4. Partial participation, with all participants already having seen a + master. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr027 + Test of "full election" timeouts, where a client starts up and joins the + group during the middle of an election. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr028 + Repmgr allows applications to choose master explicitly, instead of + relying on elections. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr029 + Test repmgr group membership: create, join, re-join and remove from + repmgr group and observe changes in group membership database. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr030 + repmgr multiple client-to-client peer test. + + Start an appointed master and three clients. The third client + configures the other two clients as peers and delays client + sync. Add some data and confirm that the third client uses first + client as a peer. Close the master so that the first client now + becomes the master. Add some more data and confirm that the + third client now uses the second client as a peer. + + Run for btree only because access method shouldn't matter. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr032 + The (undocumented) AUTOROLLBACK config feature. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr100 + Basic test of repmgr's multi-process master support. + + Set up a simple 2-site group, create data and replicate it. + Add a second process at the master and have it write some + updates. It does not explicitly start repmgr (nor do any + replication configuration, for that matter). Its first + update triggers initiation of connections, and so it doesn't + get to the client without a log request. But later updates + should go directly. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr101 + Repmgr support for multi-process master. + + Start two processes at the master. + Add a client site (not previously known to the master + processes), and make sure + both master processes connect to it. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr102 + Ensuring exactly one listener process. + + Start a repmgr process with a listener. + Start a second process, and see that it does not become the listener. + Shut down the first process (gracefully). Now a second process should + become listener. + Kill the listener process abruptly. Running failchk should show that + recovery is necessary. Run recovery and start a clean listener. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr105 + Repmgr recognition of peer setting, across processes. + + Set up a master and two clients, synchronized with some data. + Add a new client, configured to use c2c sync with one of the original + clients. Check stats to make sure the correct c2c peer was used. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr106 + Simple smoke test for repmgr elections with multi-process envs. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr107 + Repmgr combined with replication-unaware process at master. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr108 + Subordinate connections and processes should not trigger elections. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr109 + Test repmgr's internal juggling of peer EID's. + + Set up master and 2 clients, A and B. + Add a third client (C), with two processes. + The first process will be configured to know about A. + The second process will know about B, and set that as peer, + but when it joins the env site B will have to be shuffled + into a later position in the list, because A is already first. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr110 + Multi-process repmgr start-up policies. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr111 + Multi-process repmgr with env open before set local site. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +repmgr112 + Multi-process repmgr ack policies. + + Subordinate processes sending live log records must observe the + ack policy set by the main process. Also, a policy change made by a + subordinate process should be observed by all processes. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rsrc001 + Recno backing file test. Try different patterns of adding + records and making sure that the corresponding file matches. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rsrc002 + Recno backing file test #2: test of set_re_delim. Specify a backing + file with colon-delimited records, and make sure they are correctly + interpreted. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rsrc003 + Recno backing file test. Try different patterns of adding + records and making sure that the corresponding file matches. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +rsrc004 + Recno backing file test for EOF-terminated records. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +scr### + The scr### directories are shell scripts that test a variety of + things, including things about the distribution itself. These + tests won't run on most systems, so don't even try to run them. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb001 Tests mixing db and subdb operations + Tests mixing db and subdb operations + Create a db, add data, try to create a subdb. + Test naming db and subdb with a leading - for correct parsing + Existence check -- test use of -excl with subdbs + + Test non-subdb and subdb operations + Test naming (filenames begin with -) + Test existence (cannot create subdb of same name with -excl) + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb002 + Tests basic subdb functionality + Small keys, small data + Put/get per key + Dump file + Close, reopen + Dump file + + Use the first 10,000 entries from the dictionary. + Insert each with self as key and data; retrieve each. + After all are entered, retrieve all; compare output to original. + Close file, reopen, do retrieve and re-verify. + Then repeat using an environment. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb003 + Tests many subdbs + Creates many subdbs and puts a small amount of + data in each (many defaults to 1000) + + Use the first 1000 entries from the dictionary as subdbnames. + Insert each with entry as name of subdatabase and a partial list + as key/data. After all are entered, retrieve all; compare output + to original. Close file, reopen, do retrieve and re-verify. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb004 + Tests large subdb names + subdb name = filecontents, + key = filename, data = filecontents + Put/get per key + Dump file + Dump subdbs, verify data and subdb name match + + Create 1 db with many large subdbs. Use the contents as subdb names. + Take the source files and dbtest executable and enter their names as + the key with their contents as data. After all are entered, retrieve + all; compare output to original. Close file, reopen, do retrieve and + re-verify. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb005 + Tests cursor operations in subdbs + Put/get per key + Verify cursor operations work within subdb + Verify cursor operations do not work across subdbs + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb006 + Tests intra-subdb join + + We'll test 2-way, 3-way, and 4-way joins and figure that if those work, + everything else does as well. We'll create test databases called + sub1.db, sub2.db, sub3.db, and sub4.db. The number on the database + describes the duplication -- duplicates are of the form 0, N, 2N, 3N, + ... where N is the number of the database. Primary.db is the primary + database, and sub0.db is the database that has no matching duplicates. + All of these are within a single database. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb007 + Tests page size difference errors between subdbs. + If the physical file already exists, we ignore pagesize specifications + on any subsequent -creates. + + 1. Create/open a subdb with system default page size. + Create/open a second subdb specifying a different page size. + The create should succeed, but the pagesize of the new db + will be the system default page size. + 2. Create/open a subdb with a specified, non-default page size. + Create/open a second subdb specifying a different page size. + The create should succeed, but the pagesize of the new db + will be the specified page size from the first create. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb008 + Tests explicit setting of lorders for subdatabases -- the + lorder should be ignored. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb009 + Test DB->rename() method for subdbs + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb010 + Test DB->remove() method and DB->truncate() for subdbs + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb011 + Test deleting Subdbs with overflow pages + Create 1 db with many large subdbs. + Test subdatabases with overflow pages. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb012 + Test subdbs with locking and transactions + Tests creating and removing subdbs while handles + are open works correctly, and in the face of txns. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb013 + Tests in-memory subdatabases. + Create an in-memory subdb. Test for persistence after + overflowing the cache. Test for conflicts when we have + two in-memory files. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb014 + Tests mixing in-memory named and in-memory unnamed dbs. + Create a regular in-memory db, add data. + Create a named in-memory db. + Try to create the same named in-memory db again (should fail). + Try to create a different named in-memory db (should succeed). + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb015 + Tests basic in-memory named database functionality + Small keys, small data + Put/get per key + + Use the first 10,000 entries from the dictionary. + Insert each with self as key and data; retrieve each. + After all are entered, retrieve all; compare output to original. + Close file, reopen, do retrieve and re-verify. + Then repeat using an environment. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb016 + Creates many in-memory named dbs and puts a small amount of + data in each (many defaults to 100) + + Use the first 100 entries from the dictionary as names. + Insert each with entry as name of subdatabase and a partial list + as key/data. After all are entered, retrieve all; compare output + to original. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb017 + Test DB->rename() for in-memory named databases. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb018 + Tests join of in-memory named databases. + + We'll test 2-way, 3-way, and 4-way joins and figure that if those work, + everything else does as well. We'll create test databases called + sub1.db, sub2.db, sub3.db, and sub4.db. The number on the database + describes the duplication -- duplicates are of the form 0, N, 2N, 3N, + ... where N is the number of the database. Primary.db is the primary + database, and sub0.db is the database that has no matching duplicates. + All of these are within a single database. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb019 + Tests in-memory subdatabases. + Create an in-memory subdb. Test for persistence after + overflowing the cache. Test for conflicts when we have + two in-memory files. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdb020 + Tests in-memory subdatabases. + Create an in-memory subdb with one page size. Close, and + open with a different page size: should fail. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdbtest001 + Tests multiple access methods in one subdb + Open several subdbs, each with a different access method + Small keys, small data + Put/get per key per subdb + Dump file, verify per subdb + Close, reopen per subdb + Dump file, verify per subdb + + Make several subdb's of different access methods all in one DB. + Rotate methods and repeat [#762]. + Use the first 10,000 entries from the dictionary. + Insert each with self as key and data; retrieve each. + After all are entered, retrieve all; compare output to original. + Close file, reopen, do retrieve and re-verify. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sdbtest002 + Tests multiple access methods in one subdb access by multiple + processes. + Open several subdbs, each with a different access method + Small keys, small data + Put/get per key per subdb + Fork off several child procs to each delete selected + data from their subdb and then exit + Dump file, verify contents of each subdb is correct + Close, reopen per subdb + Dump file, verify per subdb + + Make several subdb's of different access methods all in one DB. + Fork of some child procs to each manipulate one subdb and when + they are finished, verify the contents of the databases. + Use the first 10,000 entries from the dictionary. + Insert each with self as key and data; retrieve each. + After all are entered, retrieve all; compare output to original. + Close file, reopen, do retrieve and re-verify. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sec001 + Test of security interface + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sec002 + Test of security interface and catching errors in the + face of attackers overwriting parts of existing files. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +si001 + Secondary index put/delete with lorder test + + Put data in primary db and check that pget on secondary + index finds the right entries. Alter the primary in the + following ways, checking for correct data each time: + Overwrite data in primary database. + Delete half of entries through primary. + Delete half of remaining entries through secondary. + Append data (for record-based primaries only). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +si002 + Basic cursor-based secondary index put/delete test + + Cursor put data in primary db and check that pget + on secondary index finds the right entries. + Open and use a second cursor to exercise the cursor + comparison API on secondaries. + Overwrite while walking primary, check pget again. + Overwrite while walking secondary (use c_pget), check + pget again. + Cursor delete half of entries through primary, check. + Cursor delete half of remainder through secondary, check. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +si003 + si001 with secondaries created and closed mid-test + Basic secondary index put/delete test with secondaries + created mid-test. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +si004 + si002 with secondaries created and closed mid-test + Basic cursor-based secondary index put/delete test, with + secondaries created mid-test. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +si005 + Basic secondary index put/delete test with transactions + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +si006 + + Test -immutable_key interface. + + DB_IMMUTABLE_KEY is an optimization to be used when a + secondary key will not be changed. It does not prevent + a deliberate change to the secondary key, it just does not + propagate that change when it is made to the primary. + This test verifies that a change to the primary is propagated + to the secondary or not as specified by -immutable_key. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +si007 + Secondary index put/delete with lorder test + + This test is the same as si001 with the exception + that we create and populate the primary and THEN + create the secondaries and associate them with -create. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +si008 + Secondary index put/delete with lorder test + + This test is the same as si001 except that we + create the secondaries with different byte orders: + one native, one swapped. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sijointest: Secondary index and join test. + This used to be si005.tcl. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +sql001 + Test db_replicate using a simple SQL app. + + Start db_replicate on master side and client side, + and do various operations using dbsql on master side. + After every operation, we will check the records on both sides, + to make sure we get same results from both sides. + Also try an insert operation on client side; it should fail. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test001 + Small keys/data + Put/get per key + Dump file + Close, reopen + Dump file + + Use the first 10,000 entries from the dictionary. + Insert each with self as key and data; retrieve each. + After all are entered, retrieve all; compare output to original. + Close file, reopen, do retrieve and re-verify. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test002 + Small keys/medium data + Put/get per key + Dump file + Close, reopen + Dump file + + Use the first 10,000 entries from the dictionary. + Insert each with self as key and a fixed, medium length data string; + retrieve each. After all are entered, retrieve all; compare output + to original. Close file, reopen, do retrieve and re-verify. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test003 + Small keys/large data + Put/get per key + Dump file + Close, reopen + Dump file + + Take the source files and dbtest executable and enter their names + as the key with their contents as data. After all are entered, + retrieve all; compare output to original. Close file, reopen, do + retrieve and re-verify. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test004 + Small keys/medium data + Put/get per key + Sequential (cursor) get/delete + + Check that cursor operations work. Create a database. + Read through the database sequentially using cursors and + delete each element. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test005 + Small keys/medium data + Put/get per key + Close, reopen + Sequential (cursor) get/delete + + Check that cursor operations work. Create a database; close + it and reopen it. Then read through the database sequentially + using cursors and delete each element. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test006 + Small keys/medium data + Put/get per key + Keyed delete and verify + + Keyed delete test. + Create database. + Go through database, deleting all entries by key. + Then do the same for unsorted and sorted dups. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test007 + Small keys/medium data + Put/get per key + Close, reopen + Keyed delete + + Check that delete operations work. Create a database; close + database and reopen it. Then issues delete by key for each + entry. (Test006 plus reopen) + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test008 + Small keys/large data + Put/get per key + Loop through keys by steps (which change) + ... delete each key at step + ... add each key back + ... change step + Confirm that overflow pages are getting reused + + Take the source files and dbtest executable and enter their names as + the key with their contents as data. After all are entered, begin + looping through the entries; deleting some pairs and then readding them. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test009 + Small keys/large data + Same as test008; close and reopen database + + Check that we reuse overflow pages. Create database with lots of + big key/data pairs. Go through and delete and add keys back + randomly. Then close the DB and make sure that we have everything + we think we should. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test010 + Duplicate test + Small key/data pairs. + + Use the first 10,000 entries from the dictionary. + Insert each with self as key and data; add duplicate records for each. + After all are entered, retrieve all; verify output. + Close file, reopen, do retrieve and re-verify. + This does not work for recno + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test011 + Duplicate test + Small key/data pairs. + Test DB_KEYFIRST, DB_KEYLAST, DB_BEFORE and DB_AFTER. + To test off-page duplicates, run with small pagesize. + + Use the first 10,000 entries from the dictionary. + Insert each with self as key and data; add duplicate records for each. + Then do some key_first/key_last add_before, add_after operations. + This does not work for recno + + To test if dups work when they fall off the main page, run this with + a very tiny page size. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test012 + Large keys/small data + Same as test003 except use big keys (source files and + executables) and small data (the file/executable names). + + Take the source files and dbtest executable and enter their contents + as the key with their names as data. After all are entered, retrieve + all; compare output to original. Close file, reopen, do retrieve and + re-verify. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test013 + Partial put test + Overwrite entire records using partial puts. + Make sure that NOOVERWRITE flag works. + + 1. Insert 10000 keys and retrieve them (equal key/data pairs). + 2. Attempt to overwrite keys with NO_OVERWRITE set (expect error). + 3. Actually overwrite each one with its datum reversed. + + No partial testing here. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test014 + Exercise partial puts on short data + Run 5 combinations of numbers of characters to replace, + and number of times to increase the size by. + + Partial put test, small data, replacing with same size. The data set + consists of the first nentries of the dictionary. We will insert them + (and retrieve them) as we do in test 1 (equal key/data pairs). Then + we'll try to perform partial puts of some characters at the beginning, + some at the end, and some at the middle. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test015 + Partial put test + Partial put test where the key does not initially exist. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test016 + Partial put test + Partial put where the datum gets shorter as a result of the put. + + Partial put test where partial puts make the record smaller. + Use the first 10,000 entries from the dictionary. + Insert each with self as key and a fixed, medium length data string; + retrieve each. After all are entered, go back and do partial puts, + replacing a random-length string with the key value. + Then verify. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test017 + Basic offpage duplicate test. + + Run duplicates with small page size so that we test off page duplicates. + Then after we have an off-page database, test with overflow pages too. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test018 + Offpage duplicate test + Key_{first,last,before,after} offpage duplicates. + Run duplicates with small page size so that we test off page + duplicates. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test019 + Partial get test. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test020 + In-Memory database tests. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test021 + Btree range tests. + + Use the first 10,000 entries from the dictionary. + Insert each with self, reversed as key and self as data. + After all are entered, retrieve each using a cursor SET_RANGE, and + getting about 20 keys sequentially after it (in some cases we'll + run out towards the end of the file). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test022 + Test of DB->getbyteswapped(). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test023 + Duplicate test + Exercise deletes and cursor operations within a duplicate set. + Add a key with duplicates (first time on-page, second time off-page) + Number the dups. + Delete dups and make sure that CURRENT/NEXT/PREV work correctly. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test024 + Record number retrieval test. + Test the Btree and Record number get-by-number functionality. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test025 + DB_APPEND flag test. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test026 + Small keys/medium data w/duplicates + Put/get per key. + Loop through keys -- delete each key + ... test that cursors delete duplicates correctly + + Keyed delete test through cursor. If ndups is small; this will + test on-page dups; if it's large, it will test off-page dups. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test027 + Off-page duplicate test + Test026 with parameters to force off-page duplicates. + + Check that delete operations work. Create a database; close + database and reopen it. Then issues delete by key for each + entry. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test028 + Cursor delete test + Test put operations after deleting through a cursor. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test029 + Test the Btree and Record number renumbering. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test030 + Test DB_NEXT_DUP Functionality. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test031 + Duplicate sorting functionality + Make sure DB_NODUPDATA works. + + Use the first 10,000 entries from the dictionary. + Insert each with self as key and "ndups" duplicates + For the data field, prepend random five-char strings (see test032) + that we force the duplicate sorting code to do something. + Along the way, test that we cannot insert duplicate duplicates + using DB_NODUPDATA. + + By setting ndups large, we can make this an off-page test + After all are entered, retrieve all; verify output. + Close file, reopen, do retrieve and re-verify. + This does not work for recno + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test032 + DB_GET_BOTH, DB_GET_BOTH_RANGE + + Use the first 10,000 entries from the dictionary. Insert each with + self as key and "ndups" duplicates. For the data field, prepend the + letters of the alphabet in a random order so we force the duplicate + sorting code to do something. By setting ndups large, we can make + this an off-page test. By setting overflow to be 1, we can make + this an overflow test. + + Test the DB_GET_BOTH functionality by retrieving each dup in the file + explicitly. Test the DB_GET_BOTH_RANGE functionality by retrieving + the unique key prefix (cursor only). Finally test the failure case. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test033 + DB_GET_BOTH without comparison function + + Use the first 10,000 entries from the dictionary. Insert each with + self as key and data; add duplicate records for each. After all are + entered, retrieve all and verify output using DB_GET_BOTH (on DB and + DBC handles) and DB_GET_BOTH_RANGE (on a DBC handle) on existent and + nonexistent keys. + + XXX + This does not work for rbtree. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test034 + test032 with off-page or overflow case with non-duplicates + and duplicates. + + DB_GET_BOTH, DB_GET_BOTH_RANGE functionality with off-page + or overflow case within non-duplicates and duplicates. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test035 + Test033 with off-page non-duplicates and duplicates + DB_GET_BOTH functionality with off-page non-duplicates + and duplicates. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test036 + Test KEYFIRST and KEYLAST when the key doesn't exist + Put nentries key/data pairs (from the dictionary) using a cursor + and KEYFIRST and KEYLAST (this tests the case where use use cursor + put for non-existent keys). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test037 + Test DB_RMW + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test038 + DB_GET_BOTH, DB_GET_BOTH_RANGE on deleted items + + Use the first 10,000 entries from the dictionary. Insert each with + self as key and "ndups" duplicates. For the data field, prepend the + letters of the alphabet in a random order so we force the duplicate + sorting code to do something. By setting ndups large, we can make + this an off-page test + + Test the DB_GET_BOTH and DB_GET_BOTH_RANGE functionality by retrieving + each dup in the file explicitly. Then remove each duplicate and try + the retrieval again. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test039 + DB_GET_BOTH/DB_GET_BOTH_RANGE on deleted items without comparison + function. + + Use the first 10,000 entries from the dictionary. Insert each with + self as key and "ndups" duplicates. For the data field, prepend the + letters of the alphabet in a random order so we force the duplicate + sorting code to do something. By setting ndups large, we can make + this an off-page test. + + Test the DB_GET_BOTH and DB_GET_BOTH_RANGE functionality by retrieving + each dup in the file explicitly. Then remove each duplicate and try + the retrieval again. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test040 + Test038 with off-page duplicates + DB_GET_BOTH functionality with off-page duplicates. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test041 + Test039 with off-page duplicates + DB_GET_BOTH functionality with off-page duplicates. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test042 + Concurrent Data Store test (CDB) + + Multiprocess DB test; verify that locking is working for the + concurrent access method product. + + Use the first "nentries" words from the dictionary. Insert each with + self as key and a fixed, medium length data string. Then fire off + multiple processes that bang on the database. Each one should try to + read and write random keys. When they rewrite, they'll append their + pid to the data string (sometimes doing a rewrite sometimes doing a + partial put). Some will use cursors to traverse through a few keys + before finding one to write. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test043 + Recno renumbering and implicit creation test + Test the Record number implicit creation and renumbering options. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test044 + Small system integration tests + Test proper functioning of the checkpoint daemon, + recovery, transactions, etc. + + System integration DB test: verify that locking, recovery, checkpoint, + and all the other utilities basically work. + + The test consists of $nprocs processes operating on $nfiles files. A + transaction consists of adding the same key/data pair to some random + number of these files. We generate a bimodal distribution in key size + with 70% of the keys being small (1-10 characters) and the remaining + 30% of the keys being large (uniform distribution about mean $key_avg). + If we generate a key, we first check to make sure that the key is not + already in the dataset. If it is, we do a lookup. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test045 + Small random tester + Runs a number of random add/delete/retrieve operations. + Tests both successful conditions and error conditions. + + Run the random db tester on the specified access method. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test046 + Overwrite test of small/big key/data with cursor checks. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test047 + DBcursor->c_get get test with SET_RANGE option. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test048 + Cursor stability across Btree splits. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test049 + Cursor operations on uninitialized cursors. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test050 + Overwrite test of small/big key/data with cursor checks for Recno. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test051 + Fixed-length record Recno test. + 0. Test various flags (legal and illegal) to open + 1. Test partial puts where dlen != size (should fail) + 2. Partial puts for existent record -- replaces at beg, mid, and + end of record, as well as full replace + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test052 + Renumbering record Recno test. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test053 + Test of the DB_REVSPLITOFF flag in the Btree and Btree-w-recnum + methods. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test054 + Cursor maintenance during key/data deletion. + + This test checks for cursor maintenance in the presence of deletes. + There are N different scenarios to tests: + 1. No duplicates. Cursor A deletes a key, do a GET for the key. + 2. No duplicates. Cursor is positioned right before key K, Delete K, + do a next on the cursor. + 3. No duplicates. Cursor is positioned on key K, do a regular delete + of K, do a current get on K. + 4. Repeat 3 but do a next instead of current. + 5. Duplicates. Cursor A is on the first item of a duplicate set, A + does a delete. Then we do a non-cursor get. + 6. Duplicates. Cursor A is in a duplicate set and deletes the item. + do a delete of the entire Key. Test cursor current. + 7. Continue last test and try cursor next. + 8. Duplicates. Cursor A is in a duplicate set and deletes the item. + Cursor B is in the same duplicate set and deletes a different item. + Verify that the cursor is in the right place. + 9. Cursors A and B are in the place in the same duplicate set. A + deletes its item. Do current on B. + 10. Continue 8 and do a next on B. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test055 + Basic cursor operations. + This test checks basic cursor operations. + There are N different scenarios to tests: + 1. (no dups) Set cursor, retrieve current. + 2. (no dups) Set cursor, retrieve next. + 3. (no dups) Set cursor, retrieve prev. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test056 + Cursor maintenance during deletes. + Check if deleting a key when a cursor is on a duplicate of that + key works. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test057 + Cursor maintenance during key deletes. + 1. Delete a key with a cursor. Add the key back with a regular + put. Make sure the cursor can't get the new item. + 2. Put two cursors on one item. Delete through one cursor, + check that the other sees the change. + 3. Same as 2, with the two cursors on a duplicate. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test058 + Verify that deleting and reading duplicates results in correct ordering. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test059 + Cursor ops work with a partial length of 0. + Make sure that we handle retrieves of zero-length data items correctly. + The following ops, should allow a partial data retrieve of 0-length. + db_get + db_cget FIRST, NEXT, LAST, PREV, CURRENT, SET, SET_RANGE + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test060 + Test of the DB_EXCL flag to DB->open(). + 1) Attempt to open and create a nonexistent database; verify success. + 2) Attempt to reopen it; verify failure. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test061 + Test of txn abort and commit for in-memory databases. + a) Put + abort: verify absence of data + b) Put + commit: verify presence of data + c) Overwrite + abort: verify that data is unchanged + d) Overwrite + commit: verify that data has changed + e) Delete + abort: verify that data is still present + f) Delete + commit: verify that data has been deleted + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test062 + Test of partial puts (using DB_CURRENT) onto duplicate pages. + Insert the first 200 words into the dictionary 200 times each with + self as key and :self as data. Use partial puts to + append self again to data; verify correctness. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test063 + Test of the DB_RDONLY flag to DB->open + Attempt to both DB->put and DBC->c_put into a database + that has been opened DB_RDONLY, and check for failure. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test064 + Test of DB->get_type + Create a database of type specified by method. + Make sure DB->get_type returns the right thing with both a normal + and DB_UNKNOWN open. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test065 + Test of DB->stat, both -DB_FAST_STAT and row + counts with DB->stat -txn. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test066 + Test of cursor overwrites of DB_CURRENT w/ duplicates. + + Make sure a cursor put to DB_CURRENT acts as an overwrite in a + database with duplicates. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test067 + Test of DB_CURRENT partial puts onto almost empty duplicate + pages, with and without DB_DUP_SORT. + + Test of DB_CURRENT partial puts on almost-empty duplicate pages. + This test was written to address the following issue, #2 in the + list of issues relating to bug #0820: + + 2. DBcursor->put, DB_CURRENT flag, off-page duplicates, hash and btree: + In Btree, the DB_CURRENT overwrite of off-page duplicate records + first deletes the record and then puts the new one -- this could + be a problem if the removal of the record causes a reverse split. + Suggested solution is to acquire a cursor to lock down the current + record, put a new record after that record, and then delete using + the held cursor. + + It also tests the following, #5 in the same list of issues: + 5. DBcursor->put, DB_AFTER/DB_BEFORE/DB_CURRENT flags, DB_DBT_PARTIAL + set, duplicate comparison routine specified. + The partial change does not change how data items sort, but the + record to be put isn't built yet, and that record supplied is the + one that's checked for ordering compatibility. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test068 + Test of DB_BEFORE and DB_AFTER with partial puts. + Make sure DB_BEFORE and DB_AFTER work properly with partial puts, and + check that they return EINVAL if DB_DUPSORT is set or if DB_DUP is not. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test069 + Test of DB_CURRENT partial puts without duplicates-- test067 w/ + small ndups to ensure that partial puts to DB_CURRENT work + correctly in the absence of duplicate pages. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test070 + Test of DB_CONSUME (Four consumers, 1000 items.) + + Fork off six processes, four consumers and two producers. + The producers will each put 20000 records into a queue; + the consumers will each get 10000. + Then, verify that no record was lost or retrieved twice. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test071 + Test of DB_CONSUME (One consumer, 10000 items.) + This is DB Test 70, with one consumer, one producers, and 10000 items. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test072 + Test of cursor stability when duplicates are moved off-page. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test073 + Test of cursor stability on duplicate pages. + + Does the following: + a. Initialize things by DB->putting ndups dups and + setting a reference cursor to point to each. + b. c_put ndups dups (and correspondingly expanding + the set of reference cursors) after the last one, making sure + after each step that all the reference cursors still point to + the right item. + c. Ditto, but before the first one. + d. Ditto, but after each one in sequence first to last. + e. Ditto, but after each one in sequence from last to first. + occur relative to the new datum) + f. Ditto for the two sequence tests, only doing a + DBC->c_put(DB_CURRENT) of a larger datum instead of adding a + new one. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test074 + Test of DB_NEXT_NODUP. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test076 + Test creation of many small databases in a single environment. [#1528]. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test077 + Test of DB_GET_RECNO [#1206]. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test078 + Test of DBC->c_count(). [#303] + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test079 + Test of deletes in large trees. (test006 w/ sm. pagesize). + + Check that delete operations work in large btrees. 10000 entries + and a pagesize of 512 push this out to a four-level btree, with a + small fraction of the entries going on overflow pages. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test081 + Test off-page duplicates and overflow pages together with + very large keys (key/data as file contents). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test082 + Test of DB_PREV_NODUP (uses test074). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test083 + Test of DB->key_range. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test084 + Basic sanity test (test001) with large (64K) pages. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test085 + Test of cursor behavior when a cursor is pointing to a deleted + btree key which then has duplicates added. [#2473] + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test086 + Test of cursor stability across btree splits/rsplits with + subtransaction aborts (a variant of test048). [#2373] + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test087 + Test of cursor stability when converting to and modifying + off-page duplicate pages with subtransaction aborts. [#2373] + + Does the following: + a. Initialize things by DB->putting ndups dups and + setting a reference cursor to point to each. Do each put twice, + first aborting, then committing, so we're sure to abort the move + to off-page dups at some point. + b. c_put ndups dups (and correspondingly expanding + the set of reference cursors) after the last one, making sure + after each step that all the reference cursors still point to + the right item. + c. Ditto, but before the first one. + d. Ditto, but after each one in sequence first to last. + e. Ditto, but after each one in sequence from last to first. + occur relative to the new datum) + f. Ditto for the two sequence tests, only doing a + DBC->c_put(DB_CURRENT) of a larger datum instead of adding a + new one. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test088 + Test of cursor stability across btree splits with very + deep trees (a variant of test048). [#2514] + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test089 + Concurrent Data Store test (CDB) + + Enhanced CDB testing to test off-page dups, cursor dups and + cursor operations like c_del then c_get. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test090 + Test for functionality near the end of the queue using test001. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test091 + Test of DB_CONSUME_WAIT. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test092 + Test of DB_DIRTY_READ [#3395] + + We set up a database with nentries in it. We then open the + database read-only twice. One with dirty reads and one without. + We open the database for writing and update some entries in it. + Then read those new entries via db->get (clean and dirty), and + via cursors (clean and dirty). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test093 + Test set_bt_compare (btree key comparison function) and + set_h_compare (hash key comparison function). + + Open a database with a comparison function specified, + populate, and close, saving a list with that key order as + we do so. Reopen and read in the keys, saving in another + list; the keys should be in the order specified by the + comparison function. Sort the original saved list of keys + using the comparison function, and verify that it matches + the keys as read out of the database. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test094 + Test using set_dup_compare. + + Use the first 10,000 entries from the dictionary. + Insert each with self as key and data; retrieve each. + After all are entered, retrieve all; compare output to original. + Close file, reopen, do retrieve and re-verify. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test095 + Bulk get test for methods supporting dups. [#2934] + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test096 + Db->truncate test. + For all methods: + Test that truncate empties an existing database. + Test that truncate-write in an aborted txn doesn't + change the original contents. + Test that truncate-write in a committed txn does + overwrite the original contents. + For btree and hash, do the same in a database with offpage dups. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test097 + Open up a large set of database files simultaneously. + Adjust for local file descriptor resource limits. + Then use the first 1000 entries from the dictionary. + Insert each with self as key and a fixed, medium length data string; + retrieve each. After all are entered, retrieve all; compare output + to original. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test098 + Test of DB_GET_RECNO and secondary indices. Open a primary and + a secondary, and do a normal cursor get followed by a get_recno. + (This is a smoke test for "Bug #1" in [#5811].) + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test099 + + Test of DB->get and DBC->c_get with set_recno and get_recno. + + Populate a small btree -recnum database. + After all are entered, retrieve each using -recno with DB->get. + Open a cursor and do the same for DBC->c_get with set_recno. + Verify that set_recno sets the record number position properly. + Verify that get_recno returns the correct record numbers. + + Using the same database, open 3 cursors and position one at + the beginning, one in the middle, and one at the end. Delete + by cursor and check that record renumbering is done properly. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test100 + Test for functionality near the end of the queue + using test025 (DB_APPEND). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test101 + Test for functionality near the end of the queue + using test070 (DB_CONSUME). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test102 + Bulk get test for record-based methods. [#2934] + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test103 + Test bulk get when record numbers wrap around. + + Load database with items starting before and ending after + the record number wrap around point. Run bulk gets (-multi_key) + with various buffer sizes and verify the contents returned match + the results from a regular cursor get. + + Then delete items to create a sparse database and make sure it + still works. Test both -multi and -multi_key since they behave + differently. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test106 + + + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test107 + Test of read-committed (degree 2 isolation). [#8689] + + We set up a database. Open a read-committed transactional cursor and + a regular transactional cursor on it. Position each cursor on one page, + and do a put to a different page. + + Make sure that: + - the put succeeds if we are using degree 2 isolation. + - the put deadlocks within a regular transaction with + a regular cursor. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test109 + + Test of sequences. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test110 + Partial get test with duplicates. + + For hash and btree, create and populate a database + with dups. Randomly selecting offset and length, + retrieve data from each record and make sure we + get what we expect. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test111 + Test database compaction. + + Populate a database. Remove a high proportion of entries. + Dump and save contents. Compact the database, dump again, + and make sure we still have the same contents. + Add back some entries, delete more entries (this time by + cursor), dump, compact, and do the before/after check again. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test112 + Test database compaction with a deep tree. + + This is a lot like test111, but with a large number of + entries and a small page size to make the tree deep. + To make it simple we use numerical keys all the time. + + Dump and save contents. Compact the database, dump again, + and make sure we still have the same contents. + Add back some entries, delete more entries (this time by + cursor), dump, compact, and do the before/after check again. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test113 + Test database compaction with duplicates. + + This is essentially test111 with duplicates. + To make it simple we use numerical keys all the time. + + Dump and save contents. Compact the database, dump again, + and make sure we still have the same contents. + Add back some entries, delete more entries (this time by + cursor), dump, compact, and do the before/after check again. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test114 + Test database compaction with overflows. + + Populate a database. Remove a high proportion of entries. + Dump and save contents. Compact the database, dump again, + and make sure we still have the same contents. + Add back some entries, delete more entries (this time by + cursor), dump, compact, and do the before/after check again. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test115 + Test database compaction with user-specified btree sort. + + This is essentially test111 with the user-specified sort. + Populate a database. Remove a high proportion of entries. + Dump and save contents. Compact the database, dump again, + and make sure we still have the same contents. + Add back some entries, delete more entries (this time by + cursor), dump, compact, and do the before/after check again. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test116 + Test of basic functionality of lsn_reset. + + Create a database in an env. Copy it to a new file within + the same env. Reset the page LSNs. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test117 + Test database compaction with requested fill percent. + + Populate a database. Remove a high proportion of entries. + Dump and save contents. Compact the database, requesting + fill percentages starting at 10% and working our way up to + 100. On each cycle, make sure we still have the same contents. + + Unlike the other compaction tests, this one does not + use -freespace. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test119 + Test behavior when Berkeley DB returns DB_BUFFER_SMALL on a cursor. + + If the user-supplied buffer is not large enough to contain + the returned value, DB returns BUFFER_SMALL. If it does, + check that the cursor does not move -- if it moves, it will + skip items. [#13815] + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test120 + Test of multi-version concurrency control. + + Test basic functionality: a snapshot transaction started + before a regular transaction's put can't see the modification. + A snapshot transaction started after the put can see it. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test121 + Tests of multi-version concurrency control. + + MVCC and cursor adjustment. + Set up a -snapshot cursor and position it in the middle + of a database. + Write to the database, both before and after the cursor, + and verify that it stays on the same position. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test122 + Tests of multi-version concurrency control. + + MVCC and databases that turn multi-version on and off. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test123 + Concurrent Data Store cdsgroup smoke test. + + Open a CDS env with -cdb_alldb. + Start a "txn" with -cdsgroup. + Create two databases in the env, do a cursor put + in both within the same txn. This should succeed. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test124 + + Test db->verify with noorderchk and orderchkonly flags. + + Create a db with a non-standard sort order. Check that + it fails a regular verify and succeeds with -noorderchk. + Do a similar test with a db containing subdbs, one with + the standard order and another with non-standard. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test125 + Test cursor comparison API. + + The cursor comparison API reports whether two cursors within + the same database are at the same position. It does not report + any information about relative position. + + 1. Test two uninitialized cursors (error). + 2. Test one uninitialized cursor, one initialized (error). + 3. Test two cursors in different databases (error). + 4. Put two cursors in the same place, test for match. Walk + them back and forth a bit, more matching. + 5. Two cursors in the same spot. Delete through one. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test126 + Test database bulk update for non-duplicate databases. + + Put with -multiple, then with -multiple_key, + and make sure the items in database are what we put. + Later, delete some items with -multiple, then with -multiple_key, + and make sure if the correct items are deleted. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test127 + Test database bulk update. + + This is essentially test126 with duplicates. + To make it simple we use numerical keys all the time. + + Put with -multiple, then with -multiple_key, + and make sure the items in database are what we want. + Later, delete some items with -multiple, then with -multiple_key, + and make sure if the correct items are deleted. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test128 + Test database bulk update for sub database and duplicate database. + + This is essentially test126 with sub database and secondary database. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test129 + Test database bulk update for duplicate sub database. + + This is essentially test127 with sub database. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test130 + Test moving of subdatabase metadata pages. + + Populate num_db sub-database. Open multiple handles on each. + Remove a high proportion of entries. + Dump and save contents. Compact the database, dump again, + and make sure we still have the same contents. + Make sure handles and cursors still work after compaction. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test131 + Test foreign database operations. + Create a foreign db, and put some records into it. + Then associate the foreign db with a secondary db, and + put records into the primary db. + Do operations in the foreign db and check results. + Finally, verify the foreign relation between the foreign db + and secondary db. + Here, we test three different foreign delete constraints: + - DB_FOREIGN_ABORT + - DB_FOREIGN_CASCADE + - DB_FOREIGN_NULLIFY + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test132 + Test foreign database operations on sub databases and + in-memory databases. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test133 + Test Cursor Cleanup. + Open a primary database and a secondary database, + then open 3 cursors on the secondary database, and + point them at the first item. + Do the following operations in loops: + * The 1st cursor will delete the current item. + * The 2nd cursor will also try to delete the current item. + * Move all the 3 cursors to get the next item and check the returns. + Finally, move the 3rd cursor once. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +test134 + Test cursor cleanup for sub databases. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +txn001 + Begin, commit, abort testing. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +txn002 + Verify that read-only transactions do not write log records. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +txn003 + Test abort/commit/prepare of txns with outstanding child txns. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +txn004 + Test of wraparound txnids (txn001) + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +txn005 + Test transaction ID wraparound and recovery. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +txn008 + Test of wraparound txnids (txn002) + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +txn009 + Test of wraparound txnids (txn003) + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +txn010 + Test DB_ENV->txn_checkpoint arguments/flags + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +txn011 + Test durable and non-durable txns. + Test a mixed env (with both durable and non-durable + dbs), then a purely non-durable env. Make sure commit + and abort work, and that only the log records we + expect are written. + Test that we can't get a durable handle on an open ND + database, or vice versa. Test that all subdb's + must be of the same type (D or ND). + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +txn012 + Test txn->getname and txn->setname. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +txn013 + Test of txns used in the wrong environment. + Set up two envs. Start a txn in one env, and attempt to use it + in the other env. Verify we get the appropriate error message. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +txn014 + Test of parent and child txns working on the same database. + A txn that will become a parent create a database. + A txn that will not become a parent creates another database. + Start a child txn of the 1st txn. + Verify that the parent txn is disabled while child is open. + 1. Child reads contents with child handle (should succeed). + 2. Child reads contents with parent handle (should succeed). + Verify that the non-parent txn can read from its database, + and that the child txn cannot. + Return to the child txn. + 3. Child writes with child handle (should succeed). + 4. Child writes with parent handle (should succeed). + + Commit the child, verify that the parent can write again. + Check contents of database with a second child. diff --git a/test/tcl/archive.tcl b/test/tcl/archive.tcl new file mode 100644 index 00000000..48310101 --- /dev/null +++ b/test/tcl/archive.tcl @@ -0,0 +1,255 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Options are: +# -checkrec +# -maxfilesize +proc archive { { inmem 0 } args } { + global alphabet + source ./include.tcl + + # Set defaults + if { $inmem == 1 } { + set maxbsize [expr 8 * [expr 1024 * 1024]] + set desc "in-memory" + } else { + set maxbsize [expr 8 * 1024] + set desc "on-disk" + } + set maxfile [expr 32 * 1024] + set checkrec 500 + for { set i 0 } { $i < [llength $args] } {incr i} { + switch -regexp -- [lindex $args $i] { + -c.* { incr i; set checkrec [lindex $args $i] } + -d.* { incr i; set testdir [lindex $args $i] } + -m.* { incr i; set maxfile [lindex $args $i] } + default { + puts "FAIL:[timestamp] archive usage" + puts "usage: archive -checkrec \ + -dir -maxfilesize " + return + } + } + } + + # Clean out old log if it existed + puts "Archive: Log archive test (using $desc logging)." + puts "Unlinking log: error message OK" + env_cleanup $testdir + + # Now run the various functionality tests + if { $inmem == 0 } { + set eflags "-create -txn -home $testdir \ + -log_buffer $maxbsize -log_max $maxfile" + } else { + set eflags "-create -txn -home $testdir -log_inmemory \ + -log_buffer $maxbsize -log_max $maxfile" + } + set dbenv [eval {berkdb_env} $eflags] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + set logc [$dbenv log_cursor] + error_check_good log_cursor [is_valid_logc $logc $dbenv] TRUE + + # The basic test structure here is that we write a lot of log + # records (enough to fill up 100 log files; each log file it + # small). We start with three txns and open a database in + # each transaction. Then, in a loop, we take periodic + # checkpoints. Between each pair of checkpoints, we end one + # transaction; when no transactions are left, we start up three + # new ones, letting them overlap checkpoints as well. + # + # The pattern that we create is: + # 1. Create TXN1, TXN2, TXN3 and open dbs within the txns. + # 2. Write a bunch of additional log records. + # 3. Checkpoint. + # 4. Archive, checking that we list the right files. + # 5. Commit one transaction. + # 6. If no txns left, start 3 new ones. + # 7. Until we've gone through enough records, return to step 2. + + set baserec "1:$alphabet:2:$alphabet:3:$alphabet:4:$alphabet" + puts "\tArchive.a: Writing log records; checkpoint every $checkrec records" + set nrecs $maxfile + set rec 0:$baserec + + # Begin 1st transaction and record current log file. Open + # a database in the transaction; the log file won't be + # removable until the transaction is aborted or committed. + set t1 [$dbenv txn] + error_check_good t1:txn_begin [is_valid_txn $t1 $dbenv] TRUE + + set l1 [lindex [lindex [$logc get -last] 0] 0] + set lsnlist [list $l1] + + set tdb1 [eval {berkdb_open -create -mode 0644} \ + -env $dbenv -txn $t1 -btree tdb1.db] + error_check_good dbopen [is_valid_db $tdb1] TRUE + + # Do the same for a 2nd and 3rd transaction. + set t2 [$dbenv txn] + error_check_good t2:txn_begin [is_valid_txn $t2 $dbenv] TRUE + set l2 [lindex [lindex [$logc get -last] 0] 0] + lappend lsnlist $l2 + set tdb2 [eval {berkdb_open -create -mode 0644} \ + -env $dbenv -txn $t2 -btree tdb2.db] + error_check_good dbopen [is_valid_db $tdb2] TRUE + + set t3 [$dbenv txn] + error_check_good t3:txn_begin [is_valid_txn $t3 $dbenv] TRUE + set l3 [lindex [lindex [$logc get -last] 0] 0] + lappend lsnlist $l3 + set tdb3 [eval {berkdb_open -create -mode 0644} \ + -env $dbenv -txn $t3 -btree tdb3.db] + error_check_good dbopen [is_valid_db $tdb3] TRUE + + # Keep a list of active transactions and databases opened + # within those transactions. + set txnlist [list "$t1 $tdb1" "$t2 $tdb2" "$t3 $tdb3"] + + # Loop through a large number of log records, checkpointing + # and checking db_archive periodically. + for { set i 1 } { $i <= $nrecs } { incr i } { + set rec $i:$baserec + set lsn [$dbenv log_put $rec] + error_check_bad log_put [llength $lsn] 0 + if { [expr $i % $checkrec] == 0 } { + + # Take a checkpoint + $dbenv txn_checkpoint + set ckp_file [lindex [lindex [$logc get -last] 0] 0] + catch { archive_command -h $testdir -a } res_log_full + if { [string first db_archive $res_log_full] == 0 } { + set res_log_full "" + } + catch { archive_command -h $testdir } res_log + if { [string first db_archive $res_log] == 0 } { + set res_log "" + } + catch { archive_command -h $testdir -l } res_alllog + catch { archive_command -h $testdir -a -s } \ + res_data_full + catch { archive_command -h $testdir -s } res_data + + if { $inmem == 0 } { + error_check_good nlogfiles [llength $res_alllog] \ + [lindex [lindex [$logc get -last] 0] 0] + } else { + error_check_good nlogfiles [llength $res_alllog] 0 + } + + error_check_good logs_match [llength $res_log_full] \ + [llength $res_log] + error_check_good data_match [llength $res_data_full] \ + [llength $res_data] + + # Check right number of log files + if { $inmem == 0 } { + set expected [min $ckp_file [expr [lindex $lsnlist 0] - 1]] + error_check_good nlogs [llength $res_log] $expected + } + + # Check that the relative names are a subset of the + # full names + set n 0 + foreach x $res_log { + error_check_bad log_name_match:$res_log \ + [string first $x \ + [lindex $res_log_full $n]] -1 + incr n + } + + set n 0 + foreach x $res_data { + error_check_bad log_name_match:$res_data \ + [string first $x \ + [lindex $res_data_full $n]] -1 + incr n + } + + # Commit a transaction and close the associated db. + set t [lindex [lindex $txnlist 0] 0] + set tdb [lindex [lindex $txnlist 0] 1] + if { [string length $t] != 0 } { + error_check_good txn_commit:$t [$t commit] 0 + error_check_good tdb_close:$tdb [$tdb close] 0 + set txnlist [lrange $txnlist 1 end] + set lsnlist [lrange $lsnlist 1 end] + } + + # If we're down to no transactions, start some new ones. + if { [llength $txnlist] == 0 } { + set t1 [$dbenv txn] + error_check_bad tx_begin $t1 NULL + error_check_good \ + tx_begin [is_substr $t1 $dbenv] 1 + set tdb1 [eval {berkdb_open -create -mode 0644} \ + -env $dbenv -txn $t1 -btree tdb1.db] + error_check_good dbopen [is_valid_db $tdb1] TRUE + set l1 [lindex [lindex [$logc get -last] 0] 0] + lappend lsnlist $l1 + + set t2 [$dbenv txn] + error_check_bad tx_begin $t2 NULL + error_check_good \ + tx_begin [is_substr $t2 $dbenv] 1 + set tdb2 [eval {berkdb_open -create -mode 0644} \ + -env $dbenv -txn $t2 -btree tdb2.db] + error_check_good dbopen [is_valid_db $tdb2] TRUE + set l2 [lindex [lindex [$logc get -last] 0] 0] + lappend lsnlist $l2 + + set t3 [$dbenv txn] + error_check_bad tx_begin $t3 NULL + error_check_good \ + tx_begin [is_substr $t3 $dbenv] 1 + set tdb3 [eval {berkdb_open -create -mode 0644} \ + -env $dbenv -txn $t3 -btree tdb3.db] + error_check_good dbopen [is_valid_db $tdb3] TRUE + set l3 [lindex [lindex [$logc get -last] 0] 0] + lappend lsnlist $l3 + + set txnlist [list "$t1 $tdb1" "$t2 $tdb2" "$t3 $tdb3"] + } + } + } + # Commit any transactions still running. + puts "\tArchive.b: Commit any transactions still running." + foreach pair $txnlist { + set t [lindex $pair 0] + set tdb [lindex $pair 1] + error_check_good txn_commit:$t [$t commit] 0 + error_check_good tdb_close:$tdb [$tdb close] 0 + } + + # Close and unlink the file + error_check_good log_cursor_close [$logc close] 0 + reset_env $dbenv +} + +proc archive_command { args } { + source ./include.tcl + + # Catch a list of files output by db_archive. + catch { eval exec $util_path/db_archive $args } output + + if { $is_windows_test == 1 || 1 } { + # On Windows, convert all filenames to use forward slashes. + regsub -all {[\\]} $output / output + } + + # Output the [possibly-transformed] list. + return $output +} + +proc min { a b } { + if {$a < $b} { + return $a + } else { + return $b + } +} diff --git a/test/tcl/backup.tcl b/test/tcl/backup.tcl new file mode 100644 index 00000000..b6f6ab61 --- /dev/null +++ b/test/tcl/backup.tcl @@ -0,0 +1,290 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST backup +# TEST Test of hotbackup functionality. +# TEST +# TEST Do all the of the following tests with and without +# TEST the -c (checkpoint) option; and with and without the +# TEST transactional bulk loading optimization. Make sure +# TEST that -c and -d (data_dir) are not allowed together. +# TEST +# TEST (1) Test that plain and simple hotbackup works. +# TEST (2) Test with -data_dir (-d). +# TEST (3) Test updating an existing hot backup (-u). +# TEST (4) Test with absolute path. +# TEST (5) Test with DB_CONFIG (-D), setting log_dir (-l) +# TEST and data_dir (-d). +# TEST (6) DB_CONFIG and update. +# TEST (7) Repeat hot backup (non-update) with DB_CONFIG and +# TEST existing directories. + +proc backup { {nentries 1000} } { + source ./include.tcl + global util_path + + set omethod "-btree" + set testfile "foo.db" + set backupdir "backup" + + # Set up small logs so we quickly create more than one. + set log_size 20000 + set env_flags " -create -txn -home $testdir -log_max $log_size" + set db_flags " -create $omethod -auto_commit $testfile " + + foreach txnmode { normal bulk } { + if { $txnmode == "bulk" } { + set bmsg "with bulk optimization" + } else { + set bmsg "without bulk optimization" + } + foreach option { checkpoint nocheckpoint } { + if { $option == "checkpoint" } { + set c "c" + set msg "with checkpoint" + } else { + set c "" + set msg "without checkpoint" + } + puts "Backuptest $bmsg $msg." + + env_cleanup $testdir + env_cleanup $backupdir + + set env [eval {berkdb_env} $env_flags] + set db [eval {berkdb_open} -env $env $db_flags] + if { $txnmode == "bulk" } { + set txn [$env txn -txn_bulk] + } else { + set txn [$env txn] + } + populate $db $omethod $txn $nentries 0 0 + $txn commit + + # Backup directory is empty before hot backup. + set files [glob -nocomplain $backupdir/*] + error_check_good no_files [llength $files] 0 + + puts "\tBackuptest.a: Hot backup to directory $backupdir." + if {[catch { eval exec $util_path/db_hotbackup\ + -${c}vh $testdir -b $backupdir } res ] } { + error "FAIL: $res" + } + + set logfiles [glob $backupdir/log*] + error_check_bad found_logs [llength $logfiles] 0 + error_check_good found_db [file exists $backupdir/$testfile] 1 + + # If either checkpoint or bulk is in effect, the copy + # will exactly match the original database. + if { $option == "checkpoint" || $txnmode == "bulk"} { + + puts "\tBackuptest.a2: Verify backup matches original file." + if {[catch { eval exec $util_path/db_dump\ + -f $testdir/dump1 $testdir/$testfile } res ] } { + error "FAIL db_dump: $res" + } + if {[catch { eval exec $util_path/db_dump\ + -f $testdir/dump2 $backupdir/$testfile } res ] } { + error "FAIL db_dump: $res" + } + error_check_good compare_dump \ + [filecmp $testdir/dump1 $testdir/dump2] 0 + } + + error_check_good db_close [$db close] 0 + error_check_good env_close [$env close] 0 + env_cleanup $testdir + + puts "\tBackuptest.b: Hot backup with data_dir." + file mkdir $testdir/data1 + error_check_good db_data_dir\ + [file exists $testdir/data1/$testfile] 0 + + # Create a new env with data_dir. + set env [eval {berkdb_env_noerr} $env_flags -data_dir data1] + set db [eval {berkdb_open} -env $env $db_flags] + + if { $txnmode == "bulk" } { + set txn [$env txn -txn_bulk] + } else { + set txn [$env txn] + } + + populate $db $omethod $txn $nentries 0 0 + $txn commit + + # Check that data went into data_dir. + error_check_good db_data_dir\ + [file exists $testdir/data1/$testfile] 1 + + # You may not specify both -d (data_dir) and -c (checkpoint). + set msg2 "cannot specify -d and -c" + if { $option == "checkpoint" } { + catch {eval exec $util_path/db_hotbackup\ + -${c}vh $testdir -b $backupdir\ + -d $testdir/data1} res + error_check_good c_and_d [is_substr $res $msg2] 1 + } else { + if {[catch {eval exec $util_path/db_hotbackup\ + -${c}vh $testdir -b $backupdir\ + -d $testdir/data1} res] } { + error "FAIL: $res" + } + # Check that logs and db are in backupdir. + error_check_good db_backup\ + [file exists $backupdir/$testfile] 1 + set logfiles [glob $backupdir/log*] + error_check_bad logs_backed_up [llength $logfiles] 0 + } + + # Add more data and try the "update" flag. + puts "\tBackuptest.c: Update existing hot backup." + + if { $txnmode == "bulk" } { + set txn [$env txn -txn_bulk] + } else { + set txn [$env txn] + } + + populate $db $omethod $txn [expr $nentries * 2] 0 0 + $txn commit + + if { $option == "checkpoint" } { + catch {eval exec $util_path/db_hotbackup\ + -${c}vuh $testdir -b backup -d $testdir/data1} res + error_check_good c_and_d [is_substr $res $msg2] 1 + } else { + if {[catch {eval exec $util_path/db_hotbackup\ + -${c}vuh $testdir -b backup\ + -d $testdir/data1} res] } { + error "FAIL: $res" + } + # There should be more log files now. + set newlogfiles [glob $backupdir/log*] + error_check_bad more_logs $newlogfiles $logfiles + } + + puts "\tBackuptest.d: Hot backup with full path." + set fullpath [pwd] + if { $option == "checkpoint" } { + catch {eval exec $util_path/db_hotbackup\ + -${c}vh $testdir -b backup\ + -d $fullpath/$testdir/data1} res + error_check_good c_and_d [is_substr $res $msg2] 1 + } else { + if {[catch {eval exec $util_path/db_hotbackup\ + -${c}vh $testdir -b backup\ + -d $fullpath/$testdir/data1} res] } { + error "FAIL: $res" + } + } + + error_check_good db_close [$db close] 0 + error_check_good env_close [$env close] 0 + env_cleanup $testdir + env_cleanup $backupdir + + puts "\tBackuptest.e: Hot backup with DB_CONFIG." + backuptest_makeconfig + set msg3 "use of -l with DB_CONFIG file is deprecated" + + set env [eval {berkdb_env_noerr} $env_flags] + set db [eval {berkdb_open} -env $env $db_flags] + + if { $txnmode == "bulk" } { + set txn [$env txn -txn_bulk] + } else { + set txn [$env txn] + } + + populate $db $omethod $txn $nentries 0 0 + $txn commit + + # With checkpoint, this fails. Without checkpoint, + # just look for the warning message. + if { $option == "checkpoint" } { + catch {eval exec $util_path/db_hotbackup\ + -${c}vh $testdir -b $backupdir -l logs\ + -d $testdir/data1} res + error_check_good c_and_d [is_substr $res $msg2] 1 + } else { + catch {eval exec $util_path/db_hotbackup\ + -${c}vh $testdir -b $backupdir -l logs\ + -d $testdir/data1} res + error_check_good l_and_config \ + [is_substr $res $msg3] 1 + + # Check that logs and db are in backupdir. + error_check_good db_backup\ + [file exists $backupdir/$testfile] 1 + set logfiles [glob $backupdir/log*] + error_check_bad logs_backed_up [llength $logfiles] 0 + } + + if { $txnmode == "bulk" } { + set txn [$env txn -txn_bulk] + } else { + set txn [$env txn] + } + + populate $db $omethod $txn [expr $nentries * 2] 0 0 + $txn commit + + puts "\tBackuptest.f:\ + Hot backup update with DB_CONFIG." + if { $option == "checkpoint" } { + catch {eval exec $util_path/db_hotbackup\ + -${c}vuh $testdir -b backup -l logs\ + -d $testdir/data1} res + error_check_good c_and_d [is_substr $res $msg2] 1 + } else { + catch {eval exec $util_path/db_hotbackup\ + -${c}vuh $testdir -b backup -l logs\ + -d $testdir/data1} res + error_check_good l_and_config \ + [is_substr $res $msg3] 1 + + # There should be more log files now. + set newlogfiles [glob $backupdir/log*] + error_check_bad more_logs $newlogfiles $logfiles + } + + # Repeat with directories already there to test cleaning. + # We are not doing an update this time. + puts "\tBackuptest.g:\ + Hot backup with -D (non-update)." + if { [catch { eval exec $util_path/db_hotbackup\ + -${c}vh $testdir -b $backupdir -D } res] } { + error "FAIL: $res" + } + + # We are not doing an update this time. + puts "\tBackuptest.g:\ + Hot backup with DB_CONFIG (non-update)." + if { [catch { eval exec $util_path/db_hotbackup\ + -${c}vh $testdir -b $backupdir } res] } { + error "FAIL: $res" + } + + error_check_good db_close [$db close] 0 + error_check_good env_close [$env close] 0 + } + } +} + +proc backuptest_makeconfig { } { + source ./include.tcl + + file mkdir $testdir/logs + file mkdir $testdir/data1 + + set cid [open $testdir/DB_CONFIG w] + puts $cid "set_lg_dir logs" + puts $cid "set_data_dir data1" + close $cid +} + diff --git a/test/tcl/bigfile001.tcl b/test/tcl/bigfile001.tcl new file mode 100644 index 00000000..c65499db --- /dev/null +++ b/test/tcl/bigfile001.tcl @@ -0,0 +1,79 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST bigfile001 +# TEST Create a database greater than 4 GB in size. Close, verify. +# TEST Grow the database somewhat. Close, reverify. Lather, rinse, +# TEST repeat. Since it will not work on all systems, this test is +# TEST not run by default. +proc bigfile001 { { itemsize 4096 } \ + { nitems 1048576 } { growby 5000 } { growtms 2 } args } { + source ./include.tcl + + set method "btree" + set args [convert_args $method $args] + set omethod [convert_method $method] + global is_fat32 + if { $is_fat32 } { + puts "Skipping bigfile001 for FAT32 file system." + return + } + puts "Bigfile001: $method ($args) $nitems * $itemsize bytes of data" + + env_cleanup $testdir + + # Create the database. Use 64K pages; we want a good fill + # factor, and page size doesn't matter much. Use a 50MB + # cache; that should be manageable, and will help + # performance. + set dbname $testdir/big.db + + set db [eval {berkdb_open -create} {-pagesize 65536 \ + -cachesize {0 50000000 0}} $omethod $args $dbname] + error_check_good db_open [is_valid_db $db] TRUE + + puts "\tBigfile001.a: Creating database..." + flush stdout + + set data [string repeat z $itemsize] + + for { set i 0 } { $i < $nitems } { incr i } { + set key key[format %08u $i] + + error_check_good db_put($i) [$db put $key $data] 0 + + if { $i % 50000 == 0 } { + set pct [expr 100 * $i / $nitems] + puts "\tBigfile001.a: $pct%..." + flush stdout + } + } + puts "\tBigfile001.a: 100%." + error_check_good db_close [$db close] 0 + + puts "\tBigfile001.b: Verifying database..." + error_check_good verify \ + [verify_dir $testdir "\t\t" 0 0 1 50000000] 0 + + puts "\tBigfile001.c: Grow database $growtms times by $growby items" + + for { set j 0 } { $j < $growtms } { incr j } { + set db [eval {berkdb_open} {-cachesize {0 50000000 0}} $dbname] + error_check_good db_open [is_valid_db $db] TRUE + puts -nonewline "\t\tBigfile001.c.1: Adding $growby items..." + flush stdout + for { set i 0 } { $i < $growby } { incr i } { + set key key[format %08u $i].$j + error_check_good db_put($j.$i) [$db put $key $data] 0 + } + error_check_good db_close [$db close] 0 + puts "done." + + puts "\t\tBigfile001.c.2: Verifying database..." + error_check_good verify($j) \ + [verify_dir $testdir "\t\t\t" 0 0 1 50000000] 0 + } +} diff --git a/test/tcl/bigfile002.tcl b/test/tcl/bigfile002.tcl new file mode 100644 index 00000000..46293162 --- /dev/null +++ b/test/tcl/bigfile002.tcl @@ -0,0 +1,45 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST bigfile002 +# TEST This one should be faster and not require so much disk space, +# TEST although it doesn't test as extensively. Create an mpool file +# TEST with 1K pages. Dirty page 6000000. Sync. +proc bigfile002 { args } { + source ./include.tcl + global is_fat32 + if { $is_fat32 } { + puts "Skipping bigfile002 for FAT32 file system." + return + } + puts "Bigfile002: Creating large, sparse file through mpool..." + flush stdout + + env_cleanup $testdir + + # Create env. + set env [berkdb_env -create -home $testdir] + error_check_good valid_env [is_valid_env $env] TRUE + + # Create the file. + set name big002.file + set file [$env mpool -create -pagesize 1024 $name] + + # Dirty page 6000000 + set pg [$file get -create 6000000] + error_check_good pg_init [$pg init A] 0 + error_check_good pg_set [$pg is_setto A] 1 + + # Put page back. + error_check_good pg_put [$pg put] 0 + + # Fsync. + error_check_good fsync [$file fsync] 0 + + # Close. + error_check_good fclose [$file close] 0 + error_check_good env_close [$env close] 0 +} diff --git a/test/tcl/byteorder.tcl b/test/tcl/byteorder.tcl new file mode 100644 index 00000000..e9419540 --- /dev/null +++ b/test/tcl/byteorder.tcl @@ -0,0 +1,33 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Byte Order Test +# Use existing tests and run with both byte orders. +proc byteorder { method {nentries 1000} } { + source ./include.tcl + puts "Byteorder: $method $nentries" + + eval {test001 $method $nentries 0 0 "001" -lorder 1234} + eval {verify_dir $testdir} + eval {test001 $method $nentries 0 0 "001" -lorder 4321} + eval {verify_dir $testdir} + eval {test003 $method -lorder 1234} + eval {verify_dir $testdir} + eval {test003 $method -lorder 4321} + eval {verify_dir $testdir} + eval {test010 $method $nentries 5 "010" -lorder 1234} + eval {verify_dir $testdir} + eval {test010 $method $nentries 5 "010" -lorder 4321} + eval {verify_dir $testdir} + eval {test011 $method $nentries 5 "011" -lorder 1234} + eval {verify_dir $testdir} + eval {test011 $method $nentries 5 "011" -lorder 4321} + eval {verify_dir $testdir} + eval {test018 $method $nentries -lorder 1234} + eval {verify_dir $testdir} + eval {test018 $method $nentries -lorder 4321} + eval {verify_dir $testdir} +} diff --git a/test/tcl/conscript.tcl b/test/tcl/conscript.tcl new file mode 100644 index 00000000..85c81206 --- /dev/null +++ b/test/tcl/conscript.tcl @@ -0,0 +1,123 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Script for DB_CONSUME test (test070.tcl). +# Usage: conscript dir file runtype nitems outputfile tnum args +# dir: DBHOME directory +# file: db file on which to operate +# runtype: PRODUCE, CONSUME or WAIT -- which am I? +# nitems: number of items to put or get +# outputfile: where to log consumer results +# tnum: test number + +proc consumescript_produce { db_cmd nitems tnum args } { + source ./include.tcl + global mydata + + set pid [pid] + puts "\tTest$tnum: Producer $pid starting, producing $nitems items." + + set db [eval $db_cmd] + error_check_good db_open:$pid [is_valid_db $db] TRUE + + set oret -1 + set ret 0 + for { set ndx 0 } { $ndx < $nitems } { incr ndx } { + set oret $ret + if { 0xffffffff > 0 && $oret > 0x7fffffff } { + incr oret [expr 0 - 0x100000000] + } + set ret [$db put -append [chop_data q $mydata]] + error_check_good db_put \ + [expr $oret > $ret ? \ + ($oret > 0x7fffffff && $ret < 0x7fffffff) : 1] 1 + + } + + set ret [catch {$db close} res] + error_check_good db_close:$pid $ret 0 + puts "\t\tTest$tnum: Producer $pid finished." +} + +proc consumescript_consume { db_cmd nitems tnum outputfile mode args } { + source ./include.tcl + global mydata + set pid [pid] + puts "\tTest$tnum: Consumer $pid starting, seeking $nitems items." + + set db [eval $db_cmd] + error_check_good db_open:$pid [is_valid_db $db] TRUE + + set oid [open $outputfile a] + + for { set ndx 0 } { $ndx < $nitems } { } { + set ret [$db get $mode] + if { [llength $ret] > 0 } { + error_check_good correct_data:$pid \ + [lindex [lindex $ret 0] 1] [pad_data q $mydata] + set rno [lindex [lindex $ret 0] 0] + puts $oid $rno + incr ndx + } else { + # No data to consume; wait. + } + } + + error_check_good output_close:$pid [close $oid] "" + + set ret [catch {$db close} res] + error_check_good db_close:$pid $ret 0 + puts "\t\tTest$tnum: Consumer $pid finished." +} + +source ./include.tcl +source $test_path/test.tcl + +# Verify usage +if { $argc < 6 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +set usage "conscript.tcl dir file runtype nitems outputfile tnum" + +# Initialize arguments +set dir [lindex $argv 0] +set file [lindex $argv 1] +set runtype [lindex $argv 2] +set nitems [lindex $argv 3] +set outputfile [lindex $argv 4] +set tnum [lindex $argv 5] +# args is the string "{ -len 20 -pad 0}", so we need to extract the +# " -len 20 -pad 0" part. +set args [lindex [lrange $argv 6 end] 0] + +set mydata "consumer data" + +# Open env +set dbenv [berkdb_env -home $dir ] +error_check_good db_env_create [is_valid_env $dbenv] TRUE + +# Figure out db opening command. +set db_cmd [concat {berkdb_open -create -mode 0644 -queue -env}\ + $dbenv $args $file] + +# Invoke consumescript_produce or consumescript_consume based on $runtype +if { $runtype == "PRODUCE" } { + # Producers have nothing to log; make sure outputfile is null. + error_check_good no_producer_outputfile $outputfile "" + consumescript_produce $db_cmd $nitems $tnum $args +} elseif { $runtype == "CONSUME" } { + consumescript_consume $db_cmd $nitems $tnum $outputfile -consume $args +} elseif { $runtype == "WAIT" } { + consumescript_consume $db_cmd $nitems $tnum $outputfile -consume_wait \ + $args +} else { + error_check_good bad_args $runtype \ + "either PRODUCE, CONSUME, or WAIT" +} +error_check_good env_close [$dbenv close] 0 +exit diff --git a/test/tcl/db_reptest.tcl b/test/tcl/db_reptest.tcl new file mode 100644 index 00000000..9bbcd7ca --- /dev/null +++ b/test/tcl/db_reptest.tcl @@ -0,0 +1,1171 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST db_reptest +# TEST Wrapper to configure and run the db_reptest program. + +# +# TODO: +# late client start. +# Number of message proc threads. +# + +global last_nsites +set last_nsites 0 + +# +# There are several user-level procs that the user may invoke. +# 1. db_reptest - Runs randomized configurations in a loop. +# 2. basic_db_reptest - Runs a simple set configuration once, +# as a smoke test. +# 3. restore_db_reptest 'dir' - Runs the configuration given in 'dir' +# in a loop. The purpose is either to reproduce a problem +# that some configuration encountered, or test a fix. +# 4. db_reptest_prof - Runs a single randomized configuration +# and generates gprof profiling information for that run. +# 5. basic_db_reptest_prof - Runs a simple set configuration and +# generates gprof profiling information for that run. +# 6. restore_db_reptest_prof - Runs the configuration given in 'dir' and +# generates gprof profiling information for one run. +# + +# +# db_reptest - Run a randomized configuration. Run the test +# 'count' times in a loop, or until 'stopstr' is seen in the OUTPUT +# files or if no count or string is given, it is an infinite loop. +# +proc db_reptest { { stopstr "" } {count -1} } { + berkdb srand [pid] + set cmd "db_reptest_int random" + db_reptest_loop $cmd $stopstr $count +} + +# +# Run a basic reptest. The types are: +# Basic 0 - Two sites, start with site 1 as master, 5 worker threads, btree, +# run 100 seconds, onesite remote knowledge. +# Basic 1 - Three sites, all sites start as client, 5 worker threads, btree +# run 150 seconds, full remote knowledge. +# +proc basic_db_reptest { { basic 0 } } { + global util_path + + if { [file exists $util_path/db_reptest] == 0 } { + puts "Skipping db_reptest. Is it built?" + return + } + if { $basic == 0 } { + db_reptest_int basic0 + } + if { $basic == 1 } { + db_reptest_int basic1 + } +} + +proc basic_db_reptest_prof { { basic 0 } } { + basic_db_reptest $basic + generate_profiles +} + +# +# Restore a configuration from the given directory and +# run that configuration in a loop 'count' times or until +# 'stopstr' is seen in the OUTPUT files or if no count or +# string is given, it is an infinite loop. +# +proc restore_db_reptest { restoredir { stopstr "" } { count -1 } } { + set cmd "db_reptest_int restore $restoredir/SAVE_RUN" + db_reptest_loop $cmd $stopstr $count +} + +proc restore_db_reptest_prof { restoredir } { + restore_db_reptest $restoredir "" 1 + generate_profiles +} + +# +# Run a single randomized iteration and then generate the profile +# information for each site. +# +proc db_reptest_prof { } { + berkdb srand [pid] + set cmd "db_reptest_int random" + db_reptest_loop $cmd "" 1 + generate_profiles +} + +proc generate_profiles {} { + global envdirs + global num_sites + global util_path + + # + # Once it is complete, generate profile information. + # + for { set i 1 } { $i <= $num_sites } { incr i } { + set gmon NULL + set known_gmons \ + { $envdirs($i)/db_reptest.gmon $envdirs($i)/gmon.out } + foreach gfile $known_gmons { + if { [file exists $gfile] } { + set gmon $gfile + break + } + } + if { $gmon == "NULL" } { + puts "No gmon file. Was it built with profiling?" + return + } + set prof_out db_reptest.$i.OUT + set stat [catch {exec gprof $util_path/db_reptest \ + $gmon >>& $prof_out} ret] + if { $stat != 0 } { + puts "FAIL: gprof: $ret" + } + error_check_good gprof $stat 0 + puts "Profiled output for site $i: $prof_out" + } +} + +proc db_reptest_profile { } { + db_reptest_prof +} + +# +# Wrapper to run the command in a loop, 'count' times. +# +proc db_reptest_loop { cmd stopstr count } { + global util_path + + if { [file exists $util_path/db_reptest] == 0 } { + puts "Skipping db_reptest. Is it built?" + return + } + set iteration 1 + set start_time [clock format [clock seconds] -format "%H:%M %D"] + while { 1 } { + puts -nonewline "ITERATION $iteration: " + puts -nonewline \ + [clock format [clock seconds] -format "%H:%M %D"] + puts " (Started: $start_time)" + + # + eval $cmd + + puts -nonewline "COMPLETED $iteration: " + puts [clock format [clock seconds] -format "%H:%M %D"] + incr iteration + # + # If we've been given a string to look for, run until we + # see it. Or if not, skip to the count check. + # + if { [string length $stopstr] > 0 } { + set found [search_output $stopstr] + if { $found } { + break + } + } + if { $count > 0 && $iteration > $count } { + break + } + } +} + +# +# Internal version of db_reptest that all user-level procs +# eventually call. It will configure a single run of +# db_reptest based on the configuration type specified +# in 'cfgtype'. This proc will: +# Configure a run of db_reptest +# Run db_reptest +# Verify the sites after db_reptest completes. +# +proc db_reptest_int { cfgtype { restoredir NULL } } { + source ./include.tcl + global envdirs + global num_sites + + env_cleanup $testdir + + set savedir TESTDIR/SAVE_RUN + reptest_cleanup $savedir + + # + # Get all the default or random values needed for the test + # and its args first. + # + set runtime 0 + # + # Get number of sites first because pretty much everything else + # after here depends on how many sites there are. + # + set num_sites [get_nsites $cfgtype $restoredir] + set use_lease [get_lease $cfgtype $restoredir] + set use_peers [get_peers $cfgtype] + # + # Only use kill if we have > 2 sites. + # Returns a list. An empty list means this will not be a kill test. + # Otherwise the list has 3 values, the kill type and 2 kill sites. + # See the 'get_kill' proc for a description of kill types. + # + set kill_type 0 + set kill_site 0 + set kill_remove 0 + set site_remove 0 + if { $num_sites > 2 } { + set kill [get_kill $cfgtype $restoredir $num_sites] + if { [llength $kill] > 0 } { + set kill_type [lindex $kill 0] + set kill_site [lindex $kill 1] + set kill_remove [lindex $kill 2] + } else { + # If we are not doing a kill test, determine if + # we are doing a remove test. + set site_remove [get_remove $cfgtype $num_sites] + } + } + if { $cfgtype != "restore" } { + if { $use_lease } { + set use_master 0 + } else { + set use_master [get_usemaster $cfgtype] + if { $site_remove == $use_master } { + set site_remove 0 + } + } + set master_site [get_mastersite $cfgtype $use_master $num_sites] + set noelect [get_noelect $use_master] + set master2_site [get_secondary_master \ + $noelect $master_site $kill_site $num_sites] + set workers [get_workers $cfgtype $use_lease] + set dbtype [get_dbtype $cfgtype] + set runtime [get_runtime $cfgtype] + puts -nonewline "Running: $num_sites sites, $runtime seconds " + if { $kill_site } { + puts -nonewline "kill site $kill_site " + if { $kill_remove } { + puts -nonewline "removed by site $kill_remove " + } + } elseif { $site_remove } { + puts -nonewline "remove site $site_remove " + } + if { $use_lease } { + puts "with leases" + } elseif { $use_master } { + set master_text "master site $master_site" + if { $noelect } { + set master_text [concat $master_text \ + "no elections"] + } + if { $master2_site } { + set master_text [concat $master_text \ + "secondary master site $master2_site"] + } + puts "$master_text" + } else { + puts "no master" + } + } + # + # This loop sets up the args to the invocation of db_reptest + # for each site. + # + set portlist [available_ports $num_sites] + for { set i 1 } {$i <= $num_sites } { incr i } { + set envdirs($i) TESTDIR/ENV$i + set homedirs($i) ../ENV$i + reptest_cleanup $envdirs($i) + # + # If we are restoring the args, just read them from the + # saved location for this sites. Otherwise build up + # the args for each piece we need. + # + if { $cfgtype == "restore" } { + set cid [open $restoredir/DB_REPTEST_ARGS.$i r] + set prog_args($i) [read $cid] + close $cid + if { $runtime == 0 } { + set runtime [parse_runtime $prog_args($i)] + puts "Runtime: $runtime" + } + } else { + set nmsg [berkdb random_int 1 [expr $num_sites * 2]] + set prog_args($i) \ + "-v -c $workers -t $dbtype -T $runtime -m $nmsg " + set prog_args($i) \ + [concat $prog_args($i) "-h $homedirs($i)"] + set prog_args($i) \ + [concat $prog_args($i) "-o $num_sites"] + # + # Add in if this site should remove itself. + # + if { $site_remove == $i } { + set prog_args($i) [concat $prog_args($i) "-r"] + } + # + # Add in if this site should kill itself. + # + if { $kill_site == $i } { + set prog_args($i) [concat $prog_args($i) "-k"] + } + # + # Add in if this site should remove a killed site. + # + if { $kill_remove == $i } { + set kport [lindex $portlist \ + [expr $kill_site - 1]] + set prog_args($i) [concat $prog_args($i) \ + "-K $kport"] + } + # + # Add in if this site starts as a master or client. + # + if { $i == $master_site } { + set state($i) MASTER + set prog_args($i) [concat $prog_args($i) "-M"] + } else { + set state($i) CLIENT + # + # If we have a master, then we just want to + # start as a client. Otherwise start with + # elections. + # + if { $use_master } { + set prog_args($i) \ + [concat $prog_args($i) "-C"] + } else { + set prog_args($i) \ + [concat $prog_args($i) "-E"] + } + } + # + # Add in if we are in no elections mode and if we are + # the secondary master. + # + if { $noelect } { + set prog_args($i) [concat $prog_args($i) "-n"] + if { $i == $master2_site } { + set prog_args($i) \ + [concat $prog_args($i) "-s"] + } + } + } + save_db_reptest $savedir ARGS $i $prog_args($i) + } + + # Now make the DB_CONFIG file for each site. + reptest_make_config $savedir $num_sites envdirs state \ + $use_lease $use_peers $kill_site $portlist $cfgtype $restoredir + + # Run the test + run_db_reptest $savedir envdirs $num_sites $runtime $use_lease + puts "Test run complete. Verify." + + # Verify the test run. + verify_db_reptest $num_sites envdirs $kill_site + + # Show the summary files + print_summary + +} + +# +# Make a DB_CONFIG file for all sites in the group +# +proc reptest_make_config { savedir nsites edirs st lease peers kill \ + portlist cfgtype restoredir } { + upvar $edirs envdirs + upvar $st state + global rporttype + + # + # Generate global config values that should be the same + # across all sites, such as number of sites and log size, etc. + # + set rporttype NULL + set default_cfglist { + { "set_flags" "DB_TXN_NOSYNC" } + { "rep_set_request" "150000 2400000" } + { "rep_set_timeout" "db_rep_checkpoint_delay 0" } + { "rep_set_timeout" "db_rep_connection_retry 2000000" } + { "rep_set_timeout" "db_rep_heartbeat_monitor 1000000" } + { "rep_set_timeout" "db_rep_heartbeat_send 500000" } + { "set_cachesize" "0 4194304 1" } + { "set_lg_max" "131072" } + { "set_lk_detect" "db_lock_default" } + { "set_verbose" "db_verb_recovery" } + { "set_verbose" "db_verb_replication" } + } + + set acks { db_repmgr_acks_all db_repmgr_acks_all_peers \ + db_repmgr_acks_none db_repmgr_acks_one db_repmgr_acks_one_peer \ + db_repmgr_acks_quorum } + + # + # 2site strict and ack policy must be the same on all sites. + # + if { $cfgtype == "random" } { + if { $nsites == 2 } { + set strict [berkdb random_int 0 1] + } else { + set strict 0 + } + if { $lease } { + # + # 2site strict with leases must have ack policy of + # one because quorum acks are ignored in this case, + # resulting in lease expired panics on some platforms. + # + if { $strict } { + set ackpolicy db_repmgr_acks_one + } else { + set ackpolicy db_repmgr_acks_quorum + } + } else { + set done 0 + while { $done == 0 } { + set acksz [expr [llength $acks] - 1] + set myack [berkdb random_int 0 $acksz] + set ackpolicy [lindex $acks $myack] + # + # Only allow the "none" policy with 2 sites + # otherwise it can overwhelm the system and + # it is a rarely used option. + # + if { $ackpolicy == "db_repmgr_acks_none" && \ + $nsites > 2 } { + continue + } + # + # Only allow "all" or "all_peers" policies + # if not killing a site, otherwise the + # unavailable site will cause the master + # to ignore acks and blast the clients with + # log records. + # + if { $kill && \ + ($ackpolicy == "db_repmgr_acks_all" || \ + $ackpolicy == + "db_repmgr_acks_all_peers") } { + continue + } + set done 1 + } + } + } else { + set ackpolicy db_repmgr_acks_one + } + # + # Set known_master to the initial master or if one is not + # assigned, randomly assign the group creator. + # + set known_master 0 + if { $cfgtype != "restore" } { + for { set i 1 } { $i <= $nsites } { incr i } { + if { $state($i) == "MASTER" } { + set known_master $i + } + } + if { $known_master == 0 } { + set known_master [berkdb random_int 1 $nsites] + } + } + for { set i 1 } { $i <= $nsites } { incr i } { + # + # If we're restoring we just need to copy it. + # + if { $cfgtype == "restore" } { + file copy $restoredir/DB_CONFIG.$i \ + $envdirs($i)/DB_CONFIG + file copy $restoredir/DB_CONFIG.$i \ + $savedir/DB_CONFIG.$i + continue + } + # + # Otherwise set up per-site config information + # + set cfglist $default_cfglist + + # + # Add lease configuration if needed. We're running all + # locally, so there is no clock skew. + # + set allist [get_ack_lease_timeouts $lease] + if { $lease } { + # + # We need to have an ack timeout > lease timeout. + # Otherwise txns can get committed without waiting + # long enough for leases to get granted. + # + lappend cfglist { "rep_set_config" "db_rep_conf_lease" } + lappend cfglist { "rep_set_timeout" \ + "db_rep_lease_timeout [lindex $allist 1]" } + lappend cfglist { "rep_set_timeout" \ + "db_rep_ack_timeout [lindex $allist 0]" } + } else { + lappend cfglist { "rep_set_timeout" \ + "db_rep_ack_timeout [lindex $allist 0]" } + } + + # + # Priority + # + if { $state($i) == "MASTER" } { + lappend cfglist { "rep_set_priority" 100 } + } else { + if { $cfgtype == "random" } { + set pri [berkdb random_int 10 25] + } else { + set pri 20 + } + set litem [list rep_set_priority $pri] + lappend cfglist $litem + } + # + # Others: limit size, bulk, 2site strict + # + if { $cfgtype == "random" } { + set limit_sz [berkdb random_int 15000 1000000] + set bulk [berkdb random_int 0 1] + if { $bulk } { + lappend cfglist \ + { "rep_set_config" "db_rep_conf_bulk" } + } + # + # 2site strict was set above for all sites but + # should only be used for sites in random configs. + # + if { $strict } { + lappend cfglist { "rep_set_config" \ + "db_repmgr_conf_2site_strict" } + } + } else { + set limit_sz 100000 + } + set litem [list rep_set_limit "0 $limit_sz"] + lappend cfglist $litem + set litem [list repmgr_set_ack_policy $ackpolicy] + lappend cfglist $litem + # + # Now set up the local and remote ports. If we are the + # known_master (either master or group creator) set the + # group creator flag on. + # + set lport($i) [lindex $portlist [expr $i - 1]] + if { $i == $known_master } { + set litem [list repmgr_site \ + "localhost $lport($i) db_local_site on \ + db_group_creator on"] + } else { + set litem [list repmgr_site \ + "localhost $lport($i) db_local_site on"] + } + lappend cfglist $litem + set rport($i) [get_rport $portlist $i $nsites \ + $known_master $cfgtype] + # + # Declare all sites bootstrap helpers. + # + foreach p $rport($i) { + if { $peers } { + set litem [list repmgr_site "localhost $p \ + db_bootstrap_helper on db_repmgr_peer on"] + } else { + set litem [list repmgr_site "localhost $p \ + db_bootstrap_helper on"] + } + # + # If we have full knowledge, assume a legacy system. + # + if { $cfgtype == "full" } { + lappend litem "db_legacy on" + } + lappend cfglist $litem + } + # + # Now write out the DB_CONFIG file. + # + set cid [open $envdirs($i)/DB_CONFIG a] + foreach c $cfglist { + set carg [subst [lindex $c 0]] + set cval [subst [lindex $c 1]] + puts $cid "$carg $cval" + } + close $cid + set cid [open $envdirs($i)/DB_CONFIG r] + set cfg [read $cid] + close $cid + + save_db_reptest $savedir CONFIG $i $cfg + } + +} + +proc reptest_cleanup { dir } { + # + # For now, just completely remove it all. We might want + # to use env_cleanup at some point in the future. + # + fileremove -f $dir + file mkdir $dir +} + + +proc save_db_reptest { savedir op site savelist } { + # + # Save a copy of the configuration and args used to run this + # instance of the test. + # + if { $op == "CONFIG" } { + set outfile $savedir/DB_CONFIG.$site + } else { + set outfile $savedir/DB_REPTEST_ARGS.$site + } + set cid [open $outfile a] + puts -nonewline $cid $savelist + close $cid +} + +proc run_db_reptest { savedir edirs numsites runtime use_lease } { + source ./include.tcl + upvar $edirs envdirs + global killed_procs + + set pids {} + # + # Wait three times workload run time plus an ack_timeout for each site + # to kill a run. The ack_timeout is especially significant for runs + # where leases are in use because they take much longer to get started. + # + set ack_timeout [lindex [get_ack_lease_timeouts $use_lease] 0] + set watch_time [expr $runtime * 3 + \ + [expr $ack_timeout / 1000000] * $numsites] + for {set i 1} {$i <= $numsites} {incr i} { + lappend pids [exec $tclsh_path $test_path/wrap_reptest.tcl \ + $savedir/DB_REPTEST_ARGS.$i $envdirs($i) \ + $savedir/site$i.log &] + tclsleep 1 + } + watch_procs $pids 15 $watch_time + set killed [llength $killed_procs] + if { $killed > 0 } { + error "Processes $killed_procs never finished" + } +} + +proc verify_db_reptest { num_sites edirs kill } { + upvar $edirs envdirs + + set startenv 1 + set cmpeid 2 + if { $kill == 1 } { + set startenv 2 + set cmpeid 3 + } + set envbase [berkdb_env_noerr -home $envdirs($startenv)] + for { set i $cmpeid } { $i <= $num_sites } { incr i } { + if { $i == $kill } { + continue + } + set cmpenv [berkdb_env_noerr -home $envdirs($i)] + puts "Compare $envdirs($startenv) with $envdirs($i)" + # + # Compare 2 envs. We assume the name of the database that + # db_reptest creates and know it is 'am1.db'. + # We want as other args: + # 0 - compare_shared_portion + # 1 - match databases + # 0 - don't compare logs (for now) + rep_verify $envdirs($startenv) $envbase $envdirs($i) $cmpenv \ + 0 1 0 am1.db + $cmpenv close + } + $envbase close +} + +proc get_nsites { cfgtype restoredir } { + global last_nsites + + # + # Figure out the number of sites. We use 'glob' to get all of + # the valid DB_CONFIG files in the restoredir. That command uses + # a single digit match, so the maximum number of sites must be <= 9. + # Match DB_CONFIG.# so that it does not consider anything like an + # emacs save file. + # + set maxsites 5 + # + # If someone changes maxsites to be too big, it will break the + # 'glob' below. Catch that now. + # + if { $maxsites > 9 } { + error "Max sites too large." + } + if { $cfgtype == "restore" } { + set ret [catch {glob $restoredir/DB_CONFIG.\[1-$maxsites\]} \ + result] + if { $ret != 0 } { + error "Could not get config list: $result" + } + return [llength $result] + } + if { $cfgtype == "random" } { + # + # Sometimes 'random' doesn't seem to do a good job. I have + # seen on all iterations after the first one, nsites is + # always 2, 100% of the time. Add this bit to make sure + # this nsites values is different from the last iteration. + # + set n [berkdb random_int 2 $maxsites] + while { $n == $last_nsites } { + set n [berkdb random_int 2 $maxsites] +puts "Getting random nsites between 2 and $maxsites. Got $n, last_nsites $last_nsites" + } + set last_nsites $n + return $n + } + if { $cfgtype == "basic0" } { + return 2 + } + if { $cfgtype == "basic1" } { + return 3 + } + return -1 +} + +# +# Run with master leases? 25%/75% (use a master lease 25% of the time). +# +proc get_lease { cfgtype restoredir } { + # + # The number of sites must be the same for all. Read the + # first site's saved DB_CONFIG file if we're restoring since + # we only know we have at least 1 site. + # + if { $cfgtype == "restore" } { + set use_lease 0 + set cid [open $restoredir/DB_CONFIG.1 r] + while { [gets $cid cfglist] } { +# puts "Read in: $cfglist" + if { [llength $cfglist] == 0 } { + break; + } + set cfg [lindex $cfglist 0] + if { $cfg == "rep_set_config" } { + set lease [lindex $cfglist 1] + if { $lease == "db_rep_conf_lease" } { + set use_lease 1 + break; + } + } + } + close $cid + return $use_lease + } + if { $cfgtype == "random" } { + set leases { 1 0 0 0 } + set len [expr [llength $leases] - 1] + set i [berkdb random_int 0 $len] + return [lindex $leases $i] + } + if { $cfgtype == "basic0" } { + return 0 + } + if { $cfgtype == "basic1" } { + return 0 + } +} + +# +# Do a kill test about half the time. We randomly choose a +# site number to kill, it could be a master or a client. If +# we want to remove the site from the group, randomly choose +# a site to do the removal. +# +# We return a list with the kill type and the sites. Return +# an empty list if we don't kill any site. There are 2 variants: +# +# 1: Die - A site just kills itself but remains part of the group. +# Return a list {1 deadsite# 0}. +# 2: Removal - A site kills itself, and some site will also remove +# the dead site from the group. (Could be the same site that is dying). +# {2 deadsite# removalsite#}. +# +proc get_kill { cfgtype restoredir num_sites } { + set nokill "" + if { $cfgtype == "restore" } { + set ksite 0 + set ktype 0 + set rsite 0 + for { set i 1 } { $i <= $num_sites } { incr i } { + set cid [open $restoredir/DB_REPTEST_ARGS.$i r] + # !!! + # We currently assume the args file is 1 line. + # + gets $cid arglist + close $cid +# puts "Read in: $arglist" + set dokill [lsearch $arglist "-k"] + set dorem [lsearch $arglist "-K"] + # + # Only 1 of those 3 should ever be set. If we + # find -K, we have all the information we need + # and can break the loop. If we find a -k we might + # find a later -K so we keep looking. + # + if { $dokill != -1 } { + set ksite $i + set ktype 1 + } + # + # If it is a remote removal kill type, we are + # the site doing the removing and we need to get + # the site to remove from the arg. $dorem is the + # index of the arg, so + 1 is the site number. + # The site in the arg is the port number so grab + # the site number out of it. + # + if { $dorem != -1 } { + set ktype 2 + set kport [lindex $arglist [expr $dorem + 1]] + set ksite [site_from_port $kport $num_sites] + set rsite $i + break + } + } + if { $ktype == 0 } { + return $nokill + } else { + return [list $ktype $ksite $rsite] + } + } + if { $cfgtype == "random" } { + # Do a kill test half the time. + set k { 0 0 0 1 1 1 0 1 1 0 } + set len [expr [llength $k] - 1] + set i [berkdb random_int 0 $len] + if { [lindex $k $i] == 1 } { + set ktype 1 + set ksite [berkdb random_int 1 $num_sites] + set rsite 0 + # Do a removal half the time we do a kill. + set k { 0 0 0 1 1 1 0 1 1 0 } + set len [expr [llength $k] - 1] + set i [berkdb random_int 0 $len] + if { [lindex $k $i] == 1 } { + set ktype 2 + set rsite [berkdb random_int 1 $num_sites] + } + set klist [list $ktype $ksite $rsite] + } else { + set klist $nokill + } + return $klist + } + if { $cfgtype == "basic0" || $cfgtype == "basic1" } { + return $nokill + } else { + error "Get_kill: Invalid config type $cfgtype" + } +} + +# +# If we want to run a remove/rejoin, which site? This proc +# will return a site number of a site to remove/rejoin or +# it will return 0 if no removal test. Sites are numbered +# starting at 1. +# +proc get_remove { cfgtype nsites } { +# +# For now, until the "restart a dead carcass" work is done +# post-5.2, we don't use this option. 5.2 requires a site +# to shutdown if it gets removed while it is alive. +# +return 0 + if { $cfgtype == "random" } { + # Do a remove test half the time we're called. + set k { 0 0 0 1 1 1 0 1 1 0 } + set len [expr [llength $k] - 1] + set i [berkdb random_int 0 $len] + if { [lindex $k $i] == 1 } { + set rsite [berkdb random_int 1 $nsites] + } else { + set rsite 0 + } + return $rsite + } else { + return 0 + } +} + +# +# Use peers or only the master for requests? 25%/75% (use a peer 25% +# of the time and master 75%) +# +proc get_peers { cfgtype } { + if { $cfgtype == "random" } { + set peer { 0 0 0 1 } + set len [expr [llength $peer] - 1] + set i [berkdb random_int 0 $len] + return [lindex $peer $i] + } else { + return 0 + } +} + +# +# Start with a master or all clients? 25%/75% (use a master 25% +# of the time and have all clients 75%) +# +proc get_usemaster { cfgtype } { + if { $cfgtype == "random" } { + set mst { 1 0 0 0 } + set len [expr [llength $mst] - 1] + set i [berkdb random_int 0 $len] + return [lindex $mst $i] + } + if { $cfgtype == "basic0" } { + return 1 + } + if { $cfgtype == "basic1" } { + return 0 + } +} + +# +# If we use a master, which site? This proc will return +# the site number of the mastersite, or it will return +# 0 if no site should start as master. Sites are numbered +# starting at 1. +# +proc get_mastersite { cfgtype usemaster nsites } { + if { $usemaster == 0 } { + return 0 + } + if { $cfgtype == "random" } { + return [berkdb random_int 1 $nsites] + } + if { $cfgtype == "basic0" } { + return 1 + } + if { $cfgtype == "basic1" } { + return 0 + } +} + +# +# If we are using a master, use no elections 20% of the time. +# +proc get_noelect { usemaster } { + if { $usemaster } { + set noelect { 0 0 1 0 0 } + set len [expr [llength $noelect] - 1] + set i [berkdb random_int 0 $len] + return [lindex $noelect $i] + } else { + return 0 + } +} + +# +# If we are using no elections mode and we are going to kill the initial +# master, select a different site to start up as master after the initial +# master is killed. +# +proc get_secondary_master { noelect master_site kill nsites } { + if { $noelect == 0 || $kill != $master_site} { + return 0 + } + set master2_site [berkdb random_int 1 $nsites] + while { $master2_site == $master_site } { + set master2_site [berkdb random_int 1 $nsites] + } + return $master2_site +} + +# +# This is the number of worker threads performing the workload. +# This is not the number of message processing threads. +# +# Scale back the number of worker threads if leases are in use. +# The timing with leases can be fairly sensitive and since all sites +# run on the local machine, too many workers on every site can +# overwhelm the system, causing lost messages and delays that make +# the tests fail. Rather than try to tweak timeouts, just reduce +# the workloads a bit. +# +proc get_workers { cfgtype lease } { + if { $cfgtype == "random" } { + if { $lease } { + return [berkdb random_int 2 4] + } else { + return [berkdb random_int 2 8] + } + } + if { $cfgtype == "basic0" || $cfgtype == "basic1" } { + return 5 + } +} + +proc get_dbtype { cfgtype } { + if { $cfgtype == "random" } { + # + # 50% btree, 25% queue 12.5% hash 12.5% recno + # We favor queue only because there is special handling + # for queue in internal init. + # +# set methods {btree btree btree btree queue queue hash recno} + set methods {btree btree btree btree hash recno} + set len [expr [llength $methods] - 1] + set i [berkdb random_int 0 $len] + return [lindex $methods $i] + } + if { $cfgtype == "basic0" || $cfgtype == "basic1" } { + return btree + } +} + +proc get_runtime { cfgtype } { + if { $cfgtype == "random" } { + return [berkdb random_int 100 500] + } + if { $cfgtype == "basic0" } { + return 100 + } + if { $cfgtype == "basic1" } { + return 150 + } +} + +proc get_rport { portlist i num_sites known_master cfgtype} { + global rporttype + + if { $cfgtype == "random" && $rporttype == "NULL" } { + set types {backcirc forwcirc full onesite} + set len [expr [llength $types] - 1] + set rindex [berkdb random_int 0 $len] + set rporttype [lindex $types $rindex] + } + if { $cfgtype == "basic0" } { + set rporttype onesite + } + if { $cfgtype == "basic1" } { + set rporttype full + } + # + # This produces a circular knowledge ring. Either forward + # or backward. In the forwcirc, ENV1 knows (via -r) about + # ENV2, ENV2 knows about ENV3, ..., ENVX knows about ENV1. + # + if { $rporttype == "forwcirc" } { + if { $i != $num_sites } { + return [list [lindex $portlist $i]] + } else { + return [list [lindex $portlist 0]] + } + } + if { $rporttype == "backcirc" } { + if { $i != 1 } { + return [list [lindex $portlist [expr $i - 2]]] + } else { + return [list [lindex $portlist [expr $num_sites - 1]]] + } + } + # + # This produces a configuration where site N does not know + # about any other site and every other site knows about site N. + # Site N must either be the master or group creator. + # NOTE: Help_site_i subtracts one because site numbers + # are 1-based and list indices are 0-based. + # + if { $rporttype == "onesite" } { + set helper_site [expr $known_master - 1] + if { $i == $known_master } { + return {} + } + return [lindex $portlist $helper_site] + } + # + # This produces a fully connected configuration + # + if { $rporttype == "full" } { + set rlist {} + for { set site 1 } { $site <= $num_sites } { incr site } { + if { $site != $i } { + lappend rlist \ + [lindex $portlist [expr $site - 1]] + } + } + return $rlist + } +} + +# +# We need to have an ack timeout > lease timeout. Otherwise txns can get +# committed without waiting long enough for leases to get granted. We +# return a list {acktimeout# leasetimeout#}, with leasetimeout#=0 if leases +# are not in use. +# +proc get_ack_lease_timeouts { useleases } { + if { $useleases } { + return [list 20000000 10000000] + } else { + return [list 5000000 0] + } +} + +proc parse_runtime { progargs } { + set i [lsearch $progargs "-T"] + set val [lindex $progargs [expr $i + 1]] + return $val +} + +proc print_summary { } { + source ./include.tcl + global envdirs + + set ret [catch {glob $testdir/summary.*} result] + if { $ret == 0 } { + set sumfiles $result + } else { + puts "Could not get summary list: $result" + return 1 + } + foreach f $sumfiles { + puts "==== $f ====" + set ret [catch {open $f} fd] + if { $ret != 0 } { + puts "Error opening $f: $fd" + continue + } + while { [gets $fd line] >= 0 } { + puts "$line" + } + close $fd + } + return 0 +} + +proc search_output { stopstr } { + source ./include.tcl + + set ret [catch {glob $testdir/E*/OUTPUT} result] + if { $ret == 0 } { + set outfiles $result + } else { + puts "Could not find any OUTPUT files: $result" + return 0 + } + set found 0 + foreach f $outfiles { + set ret [catch {exec grep $stopstr $f > /dev/null} result] + if { $ret == 0 } { + puts "$f: Match found: $stopstr" + set found 1 + } + } + return $found +} diff --git a/test/tcl/dbm.tcl b/test/tcl/dbm.tcl new file mode 100644 index 00000000..83558abb --- /dev/null +++ b/test/tcl/dbm.tcl @@ -0,0 +1,127 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST dbm +# TEST Historic DBM interface test. Use the first 1000 entries from the +# TEST dictionary. Insert each with self as key and data; retrieve each. +# TEST After all are entered, retrieve all; compare output to original. +# TEST Then reopen the file, re-retrieve everything. Finally, delete +# TEST everything. +proc dbm { { nentries 1000 } } { + source ./include.tcl + + puts "DBM interfaces test: $nentries" + + # Create the database and open the dictionary + set testfile $testdir/dbmtest + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir NULL + + error_check_good dbminit [berkdb dbminit $testfile] 0 + set did [open $dict] + + set flags "" + set txn "" + set count 0 + set skippednullkey 0 + + puts "\tDBM.a: put/get loop" + # Here is the loop where we put and get each key/data pair + while { [gets $did str] != -1 && $count < $nentries } { + # DBM can't handle zero-length keys + if { [string length $str] == 0 } { + set skippednullkey 1 + continue + } + + set ret [berkdb store $str $str] + error_check_good dbm_store $ret 0 + + set d [berkdb fetch $str] + error_check_good dbm_fetch $d $str + incr count + } + close $did + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tDBM.b: dump file" + set oid [open $t1 w] + for { set key [berkdb firstkey] } { $key != -1 } {\ + set key [berkdb nextkey $key] } { + puts $oid $key + set d [berkdb fetch $key] + error_check_good dbm_refetch $d $key + } + + # If we had to skip a zero-length key, juggle things to cover up + # this fact in the dump. + if { $skippednullkey == 1 } { + puts $oid "" + incr nentries 1 + } + + close $oid + + # Now compare the keys to see if they match the dictionary (or ints) + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + filesort $t1 $t3 + + error_check_good DBM:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + puts "\tDBM.c: close, open, and dump file" + + # Now, reopen the file and run the last test again. + error_check_good dbminit2 [berkdb dbminit $testfile] 0 + set oid [open $t1 w] + + for { set key [berkdb firstkey] } { $key != -1 } {\ + set key [berkdb nextkey $key] } { + puts $oid $key + set d [berkdb fetch $key] + error_check_good dbm_refetch $d $key + } + if { $skippednullkey == 1 } { + puts $oid "" + } + close $oid + + # Now compare the keys to see if they match the dictionary (or ints) + filesort $t1 $t3 + + error_check_good DBM:diff($t2,$t3) \ + [filecmp $t2 $t3] 0 + + # Now, reopen the file and delete each entry + puts "\tDBM.d: sequential scan and delete" + + error_check_good dbminit3 [berkdb dbminit $testfile] 0 + set oid [open $t1 w] + + for { set key [berkdb firstkey] } { $key != -1 } {\ + set key [berkdb nextkey $key] } { + puts $oid $key + set ret [berkdb delete $key] + error_check_good dbm_delete $ret 0 + } + if { $skippednullkey == 1 } { + puts $oid "" + } + close $oid + + # Now compare the keys to see if they match the dictionary (or ints) + filesort $t1 $t3 + + error_check_good DBM:diff($t2,$t3) \ + [filecmp $t2 $t3] 0 + + error_check_good "dbm_close" [berkdb dbmclose] 0 +} diff --git a/test/tcl/dbscript.tcl b/test/tcl/dbscript.tcl new file mode 100644 index 00000000..0a9bf9a2 --- /dev/null +++ b/test/tcl/dbscript.tcl @@ -0,0 +1,358 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Random db tester. +# Usage: dbscript file numops min_del max_add key_avg data_avgdups +# method: method (we pass this in so that fixed-length records work) +# file: db file on which to operate +# numops: number of operations to do +# ncurs: number of cursors +# min_del: minimum number of keys before you disable deletes. +# max_add: maximum number of keys before you disable adds. +# key_avg: average key size +# data_avg: average data size +# dups: 1 indicates dups allowed, 0 indicates no dups +# errpct: What percent of operations should generate errors +# seed: Random number generator seed (-1 means use pid) + +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl + +set usage "dbscript file numops ncurs min_del max_add key_avg data_avg dups errpcnt args" + +# Verify usage +if { $argc < 10 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set method [lindex $argv 0] +set file [lindex $argv 1] +set numops [ lindex $argv 2 ] +set ncurs [ lindex $argv 3 ] +set min_del [ lindex $argv 4 ] +set max_add [ lindex $argv 5 ] +set key_avg [ lindex $argv 6 ] +set data_avg [ lindex $argv 7 ] +set dups [ lindex $argv 8 ] +set errpct [ lindex $argv 9 ] +set args [ lindex $argv 10 ] + +berkdb srand $rand_init + +puts "Beginning execution for [pid]" +puts "$file database" +puts "$numops Operations" +puts "$ncurs cursors" +puts "$min_del keys before deletes allowed" +puts "$max_add or fewer keys to add" +puts "$key_avg average key length" +puts "$data_avg average data length" +puts "$method $args" +if { $dups != 1 } { + puts "No dups" +} else { + puts "Dups allowed" +} +puts "$errpct % Errors" + +flush stdout + +set db [eval {berkdb_open} $args $file] +set cerr [catch {error_check_good dbopen [is_substr $db db] 1} cret] +if {$cerr != 0} { + puts $cret + return +} +# set method [$db get_type] +set record_based [is_record_based $method] + +# Initialize globals including data +global nkeys +global l_keys +global a_keys + +set nkeys [db_init $db 1] +puts "Initial number of keys: $nkeys" + +set pflags "" +set gflags "" +set txn "" + +# Open the cursors +set curslist {} +for { set i 0 } { $i < $ncurs } { incr i } { + set dbc [$db cursor] + set cerr [catch {error_check_good dbcopen [is_substr $dbc $db.c] 1} cret] + if {$cerr != 0} { + puts $cret + return + } + set cerr [catch {error_check_bad cursor_create $dbc NULL} cret] + if {$cerr != 0} { + puts $cret + return + } + lappend curslist $dbc + +} + +# On each iteration we're going to generate random keys and +# data. We'll select either a get/put/delete operation unless +# we have fewer than min_del keys in which case, delete is not +# an option or more than max_add in which case, add is not +# an option. The tcl global arrays a_keys and l_keys keep track +# of key-data pairs indexed by key and a list of keys, accessed +# by integer. +set adds 0 +set puts 0 +set gets 0 +set dels 0 +set bad_adds 0 +set bad_puts 0 +set bad_gets 0 +set bad_dels 0 + +for { set iter 0 } { $iter < $numops } { incr iter } { + set op [pick_op $min_del $max_add $nkeys] + set err [is_err $errpct] + + # The op0's indicate that there aren't any duplicates, so we + # exercise regular operations. If dups is 1, then we'll use + # cursor ops. + switch $op$dups$err { + add00 { + incr adds + + set k [random_data $key_avg 1 a_keys $record_based] + set data [random_data $data_avg 0 0] + set data [chop_data $method $data] + set ret [eval {$db put} $txn $pflags \ + {-nooverwrite $k $data}] + set cerr [catch {error_check_good put $ret 0} cret] + if {$cerr != 0} { + puts $cret + return + } + newpair $k [pad_data $method $data] + } + add01 { + incr bad_adds + set k [random_key] + set data [random_data $data_avg 0 0] + set data [chop_data $method $data] + set ret [eval {$db put} $txn $pflags \ + {-nooverwrite $k $data}] + set cerr [catch {error_check_good put $ret 0} cret] + if {$cerr != 0} { + puts $cret + return + } + # Error case so no change to data state + } + add10 { + incr adds + set dbcinfo [random_cursor $curslist] + set dbc [lindex $dbcinfo 0] + if { [berkdb random_int 1 2] == 1 } { + # Add a new key + set k [random_data $key_avg 1 a_keys \ + $record_based] + set data [random_data $data_avg 0 0] + set data [chop_data $method $data] + set ret [eval {$dbc put} $txn \ + {-keyfirst $k $data}] + newpair $k [pad_data $method $data] + } else { + # Add a new duplicate + set dbc [lindex $dbcinfo 0] + set k [lindex $dbcinfo 1] + set data [random_data $data_avg 0 0] + + set op [pick_cursput] + set data [chop_data $method $data] + set ret [eval {$dbc put} $txn {$op $k $data}] + adddup $k [lindex $dbcinfo 2] $data + } + } + add11 { + # TODO + incr bad_adds + set ret 1 + } + put00 { + incr puts + set k [random_key] + set data [random_data $data_avg 0 0] + set data [chop_data $method $data] + set ret [eval {$db put} $txn {$k $data}] + changepair $k [pad_data $method $data] + } + put01 { + incr bad_puts + set k [random_key] + set data [random_data $data_avg 0 0] + set data [chop_data $method $data] + set ret [eval {$db put} $txn $pflags \ + {-nooverwrite $k $data}] + set cerr [catch {error_check_good put $ret 0} cret] + if {$cerr != 0} { + puts $cret + return + } + # Error case so no change to data state + } + put10 { + incr puts + set dbcinfo [random_cursor $curslist] + set dbc [lindex $dbcinfo 0] + set k [lindex $dbcinfo 1] + set data [random_data $data_avg 0 0] + set data [chop_data $method $data] + + set ret [eval {$dbc put} $txn {-current $data}] + changedup $k [lindex $dbcinfo 2] $data + } + put11 { + incr bad_puts + set k [random_key] + set data [random_data $data_avg 0 0] + set data [chop_data $method $data] + set dbc [$db cursor] + set ret [eval {$dbc put} $txn {-current $data}] + set cerr [catch {error_check_good curs_close \ + [$dbc close] 0} cret] + if {$cerr != 0} { + puts $cret + return + } + # Error case so no change to data state + } + get00 { + incr gets + set k [random_key] + set val [eval {$db get} $txn {$k}] + set data [pad_data $method [lindex [lindex $val 0] 1]] + if { $data == $a_keys($k) } { + set ret 0 + } else { + set ret "FAIL: Error got |$data| expected |$a_keys($k)|" + } + # Get command requires no state change + } + get01 { + incr bad_gets + set k [random_data $key_avg 1 a_keys $record_based] + set ret [eval {$db get} $txn {$k}] + # Error case so no change to data state + } + get10 { + incr gets + set dbcinfo [random_cursor $curslist] + if { [llength $dbcinfo] == 3 } { + set ret 0 + else + set ret 0 + } + # Get command requires no state change + } + get11 { + incr bad_gets + set k [random_key] + set dbc [$db cursor] + if { [berkdb random_int 1 2] == 1 } { + set dir -next + } else { + set dir -prev + } + set ret [eval {$dbc get} $txn {-next $k}] + set cerr [catch {error_check_good curs_close \ + [$dbc close] 0} cret] + if {$cerr != 0} { + puts $cret + return + } + # Error and get case so no change to data state + } + del00 { + incr dels + set k [random_key] + set ret [eval {$db del} $txn {$k}] + rempair $k + } + del01 { + incr bad_dels + set k [random_data $key_avg 1 a_keys $record_based] + set ret [eval {$db del} $txn {$k}] + # Error case so no change to data state + } + del10 { + incr dels + set dbcinfo [random_cursor $curslist] + set dbc [lindex $dbcinfo 0] + set ret [eval {$dbc del} $txn] + remdup [lindex dbcinfo 1] [lindex dbcinfo 2] + } + del11 { + incr bad_dels + set c [$db cursor] + set ret [eval {$c del} $txn] + set cerr [catch {error_check_good curs_close \ + [$c close] 0} cret] + if {$cerr != 0} { + puts $cret + return + } + # Error case so no change to data state + } + } + if { $err == 1 } { + # Verify failure. + set cerr [catch {error_check_good $op$dups$err:$k \ + [is_substr Error $ret] 1} cret] + if {$cerr != 0} { + puts $cret + return + } + } else { + # Verify success + set cerr [catch {error_check_good $op$dups$err:$k $ret 0} cret] + if {$cerr != 0} { + puts $cret + return + } + } + + flush stdout +} + +# Close cursors and file +foreach i $curslist { + set r [$i close] + set cerr [catch {error_check_good cursor_close:$i $r 0} cret] + if {$cerr != 0} { + puts $cret + return + } +} + +set r [$db close] +set cerr [catch {error_check_good db_close:$db $r 0} cret] +if {$cerr != 0} { + puts $cret + return +} + +puts "[timestamp] [pid] Complete" +puts "Successful ops: $adds adds $gets gets $puts puts $dels dels" +puts "Error ops: $bad_adds adds $bad_gets gets $bad_puts puts $bad_dels dels" +flush stdout + +eval filecheck $file {$txn} $args + +exit diff --git a/test/tcl/ddoyscript.tcl b/test/tcl/ddoyscript.tcl new file mode 100644 index 00000000..983806c3 --- /dev/null +++ b/test/tcl/ddoyscript.tcl @@ -0,0 +1,171 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Deadlock detector script tester. +# Usage: ddoyscript dir lockerid numprocs +# dir: DBHOME directory +# lockerid: Lock id for this locker +# numprocs: Total number of processes running +# myid: id of this process -- +# the order that the processes are created is the same +# in which their lockerid's were allocated so we know +# that there is a locker age relationship that is isomorphic +# with the order releationship of myid's. + +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl + +set usage "ddoyscript dir lockerid numprocs oldoryoung" + +# Verify usage +if { $argc != 5 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set dir [lindex $argv 0] +set lockerid [ lindex $argv 1 ] +set numprocs [ lindex $argv 2 ] +set old_or_young [lindex $argv 3] +set myid [lindex $argv 4] + +set myenv [berkdb_env -lock -home $dir -create -mode 0644] +error_check_bad lock_open $myenv NULL +error_check_good lock_open [is_substr $myenv "env"] 1 + +# There are two cases here -- oldest/youngest or a ring locker. + +if { $myid == 0 || $myid == [expr $numprocs - 1] } { + set waitobj NULL + set ret 0 + + if { $myid == 0 } { + set objid 2 + if { $old_or_young == "o" } { + set waitobj [expr $numprocs - 1] + } + } else { + if { $old_or_young == "y" } { + set waitobj 0 + } + set objid 4 + } + + # Acquire own read lock + if {[catch {$myenv lock_get read $lockerid $myid} selflock] != 0} { + puts $errorInfo + } else { + error_check_good selfget:$objid [is_substr $selflock $myenv] 1 + } + + # Acquire read lock + if {[catch {$myenv lock_get read $lockerid $objid} lock1] != 0} { + puts $errorInfo + } else { + error_check_good lockget:$objid [is_substr $lock1 $myenv] 1 + } + + tclsleep 10 + + if { $waitobj == "NULL" } { + # Sleep for a good long while + tclsleep 90 + } else { + # Acquire write lock + if {[catch {$myenv lock_get write $lockerid $waitobj} lock2] + != 0} { + puts $errorInfo + set ret ERROR + } else { + error_check_good lockget:$waitobj \ + [is_substr $lock2 $myenv] 1 + + # Now release it + if {[catch {$lock2 put} err] != 0} { + puts $errorInfo + set ret ERROR + } else { + error_check_good lockput:oy:$objid $err 0 + } + } + + } + + # Release self lock + if {[catch {$selflock put} err] != 0} { + puts $errorInfo + if { $ret == 0 } { + set ret ERROR + } + } else { + error_check_good selfput:oy:$myid $err 0 + if { $ret == 0 } { + set ret 1 + } + } + + # Release first lock + if {[catch {$lock1 put} err] != 0} { + puts $errorInfo + if { $ret == 0 } { + set ret ERROR + } + } else { + error_check_good lockput:oy:$objid $err 0 + if { $ret == 0 } { + set ret 1 + } + } + +} else { + # Make sure that we succeed if we're locking the same object as + # oldest or youngest. + if { [expr $myid % 2] == 0 } { + set mode read + } else { + set mode write + } + # Obtain first lock (should always succeed). + if {[catch {$myenv lock_get $mode $lockerid $myid} lock1] != 0} { + puts $errorInfo + } else { + error_check_good lockget:$myid [is_substr $lock1 $myenv] 1 + } + + tclsleep 30 + + set nextobj [expr $myid + 1] + if { $nextobj == [expr $numprocs - 1] } { + set nextobj 1 + } + + set ret 1 + if {[catch {$myenv lock_get write $lockerid $nextobj} lock2] != 0} { + if {[string match "*DEADLOCK*" $lock2] == 1} { + set ret DEADLOCK + } else { + set ret ERROR + } + } else { + error_check_good lockget:$nextobj [is_substr $lock2 $myenv] 1 + } + + # Now release the first lock + error_check_good lockput:$lock1 [$lock1 put] 0 + + if {$ret == 1} { + error_check_bad lockget:$nextobj $lock2 NULL + error_check_good lockget:$nextobj [is_substr $lock2 $myenv] 1 + error_check_good lockput:$lock2 [$lock2 put] 0 + } +} + +puts $ret +error_check_good lock_id_free [$myenv lock_id_free $lockerid] 0 +error_check_good envclose [$myenv close] 0 +exit diff --git a/test/tcl/ddscript.tcl b/test/tcl/ddscript.tcl new file mode 100644 index 00000000..07879d70 --- /dev/null +++ b/test/tcl/ddscript.tcl @@ -0,0 +1,43 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Deadlock detector script tester. +# Usage: ddscript dir test lockerid objid numprocs +# dir: DBHOME directory +# test: Which test to run +# lockerid: Lock id for this locker +# objid: Object id to lock. +# numprocs: Total number of processes running + +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl + +set usage "ddscript dir test lockerid objid numprocs" + +# Verify usage +if { $argc != 5 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set dir [lindex $argv 0] +set test [ lindex $argv 1 ] +set lockerid [ lindex $argv 2 ] +set objid [ lindex $argv 3 ] +set numprocs [ lindex $argv 4 ] + +set myenv [berkdb_env -lock -home $dir -create -mode 0644 ] +error_check_bad lock_open $myenv NULL +error_check_good lock_open [is_substr $myenv "env"] 1 + +puts [eval $test $myenv $lockerid $objid $numprocs] + +error_check_good lock_id_free [$myenv lock_id_free $lockerid] 0 +error_check_good envclose [$myenv close] 0 + +exit diff --git a/test/tcl/dead001.tcl b/test/tcl/dead001.tcl new file mode 100644 index 00000000..cfeb52ff --- /dev/null +++ b/test/tcl/dead001.tcl @@ -0,0 +1,133 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST dead001 +# TEST Use two different configurations to test deadlock detection among a +# TEST variable number of processes. One configuration has the processes +# TEST deadlocked in a ring. The other has the processes all deadlocked on +# TEST a single resource. +proc dead001 { { procs "2 4 10" } {tests "ring clump" } \ + {timeout 0} {tnum "001"} {pri 0} } { + source ./include.tcl + global lock_curid + global lock_maxid + + if {$timeout > 0 && $pri > 0} { + puts "Dead$tnum: Both timeout and priority cannot be set." + return + } + + set msg "" + if { $pri == 1 } { + set msg " with priority" + } + puts "Dead$tnum: Deadlock detector tests timeout $timeout" + + env_cleanup $testdir + + # Create the environment. + puts "\tDead$tnum.a: creating environment" + set env [berkdb_env -create \ + -mode 0644 -lock -lock_timeout $timeout -home $testdir] + error_check_good lock_env:open [is_valid_env $env] TRUE + + foreach t $tests { + foreach n $procs { + if {$timeout == 0 } { + set dpid [exec $util_path/db_deadlock -v -t 0.100000 \ + -h $testdir >& $testdir/dd.out &] + } else { + set dpid [exec $util_path/db_deadlock -v -t 0.100000 \ + -ae -h $testdir >& $testdir/dd.out &] + } + + sentinel_init + set pidlist "" + set ret [$env lock_id_set $lock_curid $lock_maxid] + error_check_good lock_id_set $ret 0 + + # Fire off the tests + puts "\tDead$tnum: $n procs of test $t $msg" + for { set i 0 } { $i < $n } { incr i } { + set locker [$env lock_id] + if {$pri == 1} { + $env lock_set_priority $locker $i + } + puts "$tclsh_path $test_path/wrap.tcl \ + ddscript.tcl $testdir/dead$tnum.log.$i \ + $testdir $t $locker $i $n" + set p [exec $tclsh_path $test_path/wrap.tcl \ + ddscript.tcl $testdir/dead$tnum.log.$i \ + $testdir $t $locker $i $n &] + lappend pidlist $p + } + watch_procs $pidlist 5 + + # Now check output + # dead: the number of aborted lockers + # clean: the number of non-aborted lockers + # killed: the highest aborted locker + # kept: the highest non-aborted locker + # In a ring, only one locker is aborted. If testing + # priorities, it should be 0, the lowest priority. + # In a clump, only one locker is not aborted. If testing + # priorities, it should be n, the highest priority. + set dead 0 + set clean 0 + set other 0 + set killed $n + set kept $n + for { set i 0 } { $i < $n } { incr i } { + set did [open $testdir/dead$tnum.log.$i] + while { [gets $did val] != -1 } { + # If the line comes from the + # profiling tool, ignore it. + if { [string first \ + "profiling:" $val] == 0 } { + continue + } + switch $val { + DEADLOCK { + incr dead + set killed $i + } + 1 { + incr clean + set kept $i + } + default { incr other } + } + } + close $did + } + tclkill $dpid + puts "\tDead$tnum: dead check..." + dead_check $t $n $timeout $dead $clean $other + if {$pri == 1} { + if {$t == "ring"} { + # Only the lowest priority killed in a + # ring + error_check_good low_priority_killed \ + $killed 0 + } elseif {$t == "clump"} { + # All but the highest priority killed in a + # clump + error_check_good high_priority_kept \ + $kept [expr $n - 1] + } + } + } + } + + # Windows needs files closed before deleting files, so pause a little + tclsleep 3 + fileremove -f $testdir/dd.out + # Remove log files + for { set i 0 } { $i < $n } { incr i } { + fileremove -f $testdir/dead$tnum.log.$i + } + error_check_good lock_env:close [$env close] 0 +} diff --git a/test/tcl/dead002.tcl b/test/tcl/dead002.tcl new file mode 100644 index 00000000..3d83374a --- /dev/null +++ b/test/tcl/dead002.tcl @@ -0,0 +1,126 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST dead002 +# TEST Same test as dead001, but use "detect on every collision" instead +# TEST of separate deadlock detector. +proc dead002 { { procs "2 4 10" } {tests "ring clump" } \ + {timeout 0} {tnum 002} {pri 0} } { + source ./include.tcl + + if {$timeout > 0 && $pri > 0} { + puts "Dead$tnum: Both timeout and priority cannot be set." + return + } + set msg "" + if { $pri == 1 } { + set msg " with priority" + } + puts "Dead$tnum: Deadlock detector tests (detect on every collision)" + + env_cleanup $testdir + + # Create the environment. + puts "\tDead$tnum.a: creating environment" + set lmode "default" + if { $timeout != 0 } { + set lmode "expire" + } + set env [berkdb_env \ + -create -mode 0644 -home $testdir \ + -lock -lock_timeout $timeout -lock_detect $lmode] + error_check_good lock_env:open [is_valid_env $env] TRUE + + foreach t $tests { + foreach n $procs { + set pidlist "" + sentinel_init + + # Fire off the tests + puts "\tDead$tnum: $n procs of test $t $msg" + for { set i 0 } { $i < $n } { incr i } { + set locker [$env lock_id] + if {$pri == 1} { + $env lock_set_priority $locker $i + } + puts "$tclsh_path $test_path/wrap.tcl \ + ddscript.tcl $testdir/dead$tnum.log.$i \ + $testdir $t $locker $i $n" + set p [exec $tclsh_path $test_path/wrap.tcl \ + ddscript.tcl $testdir/dead$tnum.log.$i \ + $testdir $t $locker $i $n &] + lappend pidlist $p + # If we're running with timeouts, pause so that + # locks will have a chance to time out. + if { $timeout != 0 } { + tclsleep 2 + } + } + watch_procs $pidlist 5 + + # Now check output + # dead: the number of aborted lockers + # clean: the number of non-aborted lockers + # killed: the highest aborted locker + # kept: the highest non-aborted locker + # In a ring, only one locker is aborted. If testing + # priorities, it should be 0, the lowest priority. + # In a clump, only one locker is not aborted. If testing + # priorities, it should be n, the highest priority. + set dead 0 + set clean 0 + set other 0 + set killed $n + set kept $n + for { set i 0 } { $i < $n } { incr i } { + set did [open $testdir/dead$tnum.log.$i] + while { [gets $did val] != -1 } { + # If the line comes from the + # profiling tool, ignore it. + if { [string first \ + "profiling:" $val] == 0 } { + continue + } + switch $val { + DEADLOCK { + incr dead + set killed $i + } + 1 { + incr clean + set kept $i + } + default { incr other } + } + } + close $did + } + + puts "\tDead$tnum: dead check ..." + dead_check $t $n $timeout $dead $clean $other + if { $pri == 1 } { + if { $t == "ring" } { + # Only the lowest priority killed in a + # ring + error_check_good low_priority_killed \ + $killed 0 + } elseif { $t == "clump" } { + # All but the highest priority killed in a + # clump + error_check_good high_priority_kept \ + $kept [expr $n - 1] + } + } + } + } + + fileremove -f $testdir/dd.out + # Remove log files + for { set i 0 } { $i < $n } { incr i } { + fileremove -f $testdir/dead$tnum.log.$i + } + error_check_good lock_env:close [$env close] 0 +} diff --git a/test/tcl/dead003.tcl b/test/tcl/dead003.tcl new file mode 100644 index 00000000..f3a7635c --- /dev/null +++ b/test/tcl/dead003.tcl @@ -0,0 +1,167 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST dead003 +# TEST +# TEST Same test as dead002, but explicitly specify DB_LOCK_OLDEST and +# TEST DB_LOCK_YOUNGEST. Verify the correct lock was aborted/granted. +proc dead003 { {procs "2 4 10"} {tests "ring clump"} {tnum "003"} {pri 0} } { + source ./include.tcl + global lock_curid + global lock_maxid + + set detects { oldest youngest } + set msg "" + if { $pri == 1 } { + set msg " with priority" + } + puts "Dead$tnum: Deadlock detector tests: $detects" + + # Create the environment. + foreach d $detects { + env_cleanup $testdir + puts "\tDead$tnum.a: creating environment for $d" + set env [berkdb_env \ + -create -mode 0644 -home $testdir -lock -lock_detect $d] + error_check_good lock_env:open [is_valid_env $env] TRUE + + foreach t $tests { + foreach n $procs { + if {$pri == 1 && $n == 2} { + puts "Skipping test for $n procs \ + with priority." + } + set pidlist "" + sentinel_init + set ret [$env lock_id_set \ + $lock_curid $lock_maxid] + error_check_good lock_id_set $ret 0 + + # Fire off the tests + puts "\tDead$tnum: $n procs of test $t $msg" + for { set i 0 } { $i < $n } { incr i } { + set locker [$env lock_id] + # If testing priorities, set the oldest + # and youngest lockers to a higher + # priority, meaning the second oldest + # or second youngest locker will be + # aborted. + if {$pri == 1} { + if {$i == 0 || \ + $i == [expr $n - 1]} { + $env lock_set_priority \ + $locker 1 + } else { + $env lock_set_priority \ + $locker 0 + } + + } + puts "$tclsh_path\ + test_path/ddscript.tcl $testdir \ + $t $locker $i $n >& \ + $testdir/dead$tnum.log.$i" + set p [exec $tclsh_path \ + $test_path/wrap.tcl \ + ddscript.tcl \ + $testdir/dead$tnum.log.$i $testdir \ + $t $locker $i $n &] + lappend pidlist $p + } + watch_procs $pidlist 5 + + # Now check output + # dead: the number of aborted lockers + # clean: the number of non-aborted lockers + # killed: the highest aborted locker + # kept: the highest non-aborted locker + # In a ring, only one locker is aborted. + # In a clump, only one locker is not aborted. + set dead 0 + set clean 0 + set other 0 + set killed $n + set kept $n + for { set i 0 } { $i < $n } { incr i } { + set did [open $testdir/dead$tnum.log.$i] + while { [gets $did val] != -1 } { + # If the line comes from the + # profiling tool, ignore it. + if { [string first \ + "profiling:" $val] == 0 } { + continue + } + switch $val { + DEADLOCK { + incr dead + set killed $i + } + 1 { + incr clean + set kept $i + } + default { incr other } + } + } + close $did + } + puts "\tDead$tnum: dead check..." + dead_check $t $n 0 $dead $clean $other + # + # If we get here we know we have the + # correct number of dead/clean procs, as + # checked by dead_check above. Now verify + # that the right process was the one. + puts "\tDead$tnum: Verify $d locks were aborted" + if {$pri == 0} { + set l "" + if { $d == "oldest" } { + set l [expr $n - 1] + } + if { $d == "youngest" } { + set l 0 + } + set did [open $testdir/dead$tnum.log.$l] + while { [gets $did val] != -1 } { + # If the line comes from the + # profiling tool, ignore it. + if { [string first \ + "profiling:" $val] == 0 } { + continue + } + error_check_good check_abort \ + $val 1 + } + close $did + } else { + if {$d == "oldest" && $t == "clump"} { + error_check_good check_abort \ + $kept [expr $n - 1] + } + if {$d == "oldest" && $t == "ring"} { + error_check_good check_abort \ + $killed 1 + } + if {$d == "youngest" && $t == "clump"} { + error_check_good check_abort \ + $kept 0 + } + if {$d == "youngest" && $t == "ring"} { + error_check_good check_abort \ + $killed [expr $n - 2] + } + } + } + } + + fileremove -f $testdir/dd.out + # Remove log files + for { set i 0 } { $i < $n } { incr i } { + fileremove -f $testdir/dead$tnum.log.$i + } + error_check_good lock_env:close [$env close] 0 + } +} diff --git a/test/tcl/dead004.tcl b/test/tcl/dead004.tcl new file mode 100644 index 00000000..b4d83d09 --- /dev/null +++ b/test/tcl/dead004.tcl @@ -0,0 +1,113 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Deadlock Test 4. +# This test is designed to make sure that we handle youngest and oldest +# deadlock detection even when the youngest and oldest transactions in the +# system are not involved in the deadlock (that is, we want to abort the +# youngest/oldest which is actually involved in the deadlock, not simply +# the youngest/oldest in the system). +# Since this is used for transaction systems, the locker ID is what we +# use to identify age (smaller number is older). +# +# The set up is that we have a total of 6 processes. The oldest (locker 0) +# and the youngest (locker 5) simply acquire a lock, hold it for a long time +# and then release it. The rest form a ring, obtaining lock N and requesting +# a lock on (N+1) mod 4. The deadlock detector ought to pick locker 1 or 4 +# to abort and not 0 or 5. + +proc dead004 { {tnum "004"} } { + source ./include.tcl + global lock_curid + global lock_maxid + + foreach a { o y } { + puts "Dead$tnum: Deadlock detector test -a $a" + env_cleanup $testdir + + # Create the environment. + puts "\tDead$tnum.a: creating environment" + set env [berkdb_env -create -mode 0644 -lock -home $testdir] + error_check_good lock_env:open [is_valid_env $env] TRUE + + set dpid [exec $util_path/db_deadlock -v -t 5 -a $a \ + -h $testdir >& $testdir/dd.out &] + + set procs 6 + + foreach n $procs { + + sentinel_init + set pidlist "" + set ret [$env lock_id_set $lock_curid $lock_maxid] + error_check_good lock_id_set $ret 0 + + # Fire off the tests + puts "\tDead$tnum: $n procs" + for { set i 0 } { $i < $n } { incr i } { + set locker [$env lock_id] + puts "$tclsh_path $test_path/wrap.tcl \ + $testdir/dead$tnum.log.$i \ + ddoyscript.tcl $testdir $locker $n $a $i" + set p [exec $tclsh_path \ + $test_path/wrap.tcl \ + ddoyscript.tcl $testdir/dead$tnum.log.$i \ + $testdir $locker $n $a $i &] + lappend pidlist $p + } + watch_procs $pidlist 5 + + } + # Now check output + set dead 0 + set clean 0 + set other 0 + for { set i 0 } { $i < $n } { incr i } { + set did [open $testdir/dead$tnum.log.$i] + while { [gets $did val] != -1 } { + # If the line comes from the + # profiling tool, ignore it. + if { [string first \ + "profiling:" $val] == 0 } { + continue + } + switch $val { + DEADLOCK { incr dead } + 1 { incr clean } + default { incr other } + } + } + close $did + } + tclkill $dpid + + puts "\tDead$tnum: dead check..." + dead_check oldyoung $n 0 $dead $clean $other + + # Now verify that neither the oldest nor the + # youngest were the deadlock. + set did [open $testdir/dead$tnum.log.0] + error_check_bad file:young [gets $did val] -1 + error_check_good read:young $val 1 + close $did + + set did [open $testdir/dead$tnum.log.[expr $procs - 1]] + error_check_bad file:old [gets $did val] -1 + error_check_good read:old $val 1 + close $did + + # Windows needs files closed before deleting files, + # so pause a little + tclsleep 2 + fileremove -f $testdir/dd.out + + # Remove log files + for { set i 0 } { $i < $n } { incr i } { + fileremove -f $testdir/dead$tnum.log.$i + } + error_check_good lock_env:close [$env close] 0 + } +} diff --git a/test/tcl/dead005.tcl b/test/tcl/dead005.tcl new file mode 100644 index 00000000..73a13f27 --- /dev/null +++ b/test/tcl/dead005.tcl @@ -0,0 +1,143 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Deadlock Test 5. +# Test out the minlocks, maxlocks, and minwrites options +# to the deadlock detector. +proc dead005 { { procs "4 6 10" } \ + {tests "maxlocks maxwrites minlocks minwrites" } { tnum "005" } { pri 0 } } { + source ./include.tcl + + set msg "" + if { $pri == 1 } { + set msg " with priority" + } + + foreach t $tests { + puts "Dead$tnum.$t: deadlock detection tests" + env_cleanup $testdir + + # Create the environment. + set env [berkdb_env -create -mode 0644 -lock -home $testdir] + error_check_good lock_env:open [is_valid_env $env] TRUE + case $t { + maxlocks { set to m } + maxwrites { set to W } + minlocks { set to n } + minwrites { set to w } + } + foreach n $procs { + set dpid [exec $util_path/db_deadlock -v -t 0.100000 \ + -h $testdir -a $to >& $testdir/dd.out &] + sentinel_init + set pidlist "" + + # Fire off the tests + puts "\tDead$tnum: $t test with $n procs $msg" + for { set i 0 } { $i < $n } { incr i } { + set locker [$env lock_id] + # Configure priorities, if necessary, such that + # the absolute max or min is a higher priority. + # The number of locks for each locker is set by + # countlocks in testutils.tcl. + if {$pri == 1} { + if {$t == "maxlocks"} { + set half [expr $n / 2] + if {$i < $half} { + set lk_pri 0 + } else { + set lk_pri 1 + } + } elseif {$t == "maxwrites"} { + if {$i == 0 || $i == 1} { + set lk_pri 0 + } else { + set lk_pri 1 + } + } elseif {$t == "minlocks"} { + set half [expr $n / 2] + if {$i >= $half} { + set lk_pri 0 + } else { + set lk_pri 1 + } + } elseif {$t == "minwrites"} { + if {$i == 0 || $i == 2} { + set lk_pri 0 + } else { + set lk_pri 1 + } + } + $env lock_set_priority $locker $lk_pri + } + puts "$tclsh_path $test_path/wrap.tcl \ + $testdir/dead$tnum.log.$i \ + ddscript.tcl $testdir $t $locker $i $n" + set p [exec $tclsh_path \ + $test_path/wrap.tcl \ + ddscript.tcl $testdir/dead$tnum.log.$i \ + $testdir $t $locker $i $n &] + lappend pidlist $p + } + watch_procs $pidlist 5 + + # Now check output + set dead 0 + set clean 0 + set other 0 + for { set i 0 } { $i < $n } { incr i } { + set did [open $testdir/dead$tnum.log.$i] + while { [gets $did val] != -1 } { + # If the line comes from the + # profiling tool, ignore it. + if { [string first \ + "profiling:" $val] == 0 } { + continue + } + switch $val { + DEADLOCK { incr dead } + 1 { incr clean } + default { incr other } + } + } + close $did + } + tclkill $dpid + puts "\tDead$tnum: dead check..." + dead_check $t $n 0 $dead $clean $other + # Now verify that the correct participant + # got deadlocked. + if {$pri == 0} { + switch $t { + maxlocks {set f [expr $n - 1]} + maxwrites {set f 2} + minlocks {set f 0} + minwrites {set f 1} + } + } else { + switch $t { + maxlocks {set f [expr [expr $n / 2] - 1]} + maxwrites {set f 0} + minlocks {set f [expr $n / 2]} + minwrites {set f 0} + } + } + + set did [open $testdir/dead$tnum.log.$f] + error_check_bad file:$t [gets $did val] -1 + error_check_good read($f):$t $val DEADLOCK + close $did + } + error_check_good lock_env:close [$env close] 0 + # Windows needs files closed before deleting them, so pause + tclsleep 2 + fileremove -f $testdir/dd.out + # Remove log files + for { set i 0 } { $i < $n } { incr i } { + fileremove -f $testdir/dead$tnum.log.$i + } + } +} diff --git a/test/tcl/dead006.tcl b/test/tcl/dead006.tcl new file mode 100644 index 00000000..89151ea5 --- /dev/null +++ b/test/tcl/dead006.tcl @@ -0,0 +1,15 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST dead006 +# TEST use timeouts rather than the normal dd algorithm. +proc dead006 { { procs "2 4 10" } {tests "ring clump" } \ + {timeout 1000} {tnum 006} } { + source ./include.tcl + + dead001 $procs $tests $timeout $tnum + dead002 $procs $tests $timeout $tnum +} diff --git a/test/tcl/dead007.tcl b/test/tcl/dead007.tcl new file mode 100644 index 00000000..7d836443 --- /dev/null +++ b/test/tcl/dead007.tcl @@ -0,0 +1,35 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST dead007 +# TEST Tests for locker and txn id wraparound. +proc dead007 { {tnum "007"} } { + source ./include.tcl + global lock_curid + global lock_maxid + + set save_curid $lock_curid + set save_maxid $lock_maxid + puts "Dead$tnum.a -- wrap around" + set lock_curid [expr $lock_maxid - 2] + dead001 "2 10" "ring clump" "0" $tnum + ## Oldest/youngest breaks when the id wraps + # dead003 "4 10" + dead004 $tnum + + puts "Dead$tnum.b -- extend space" + set lock_maxid [expr $lock_maxid - 3] + set lock_curid [expr $lock_maxid - 1] + dead001 "4 10" "ring clump" "0" $tnum + ## Oldest/youngest breaks when the id wraps + # dead003 "10" + dead004 $tnum + + set lock_curid $save_curid + set lock_maxid $save_maxid + # Return the empty string so we don't return lock_maxid. + return "" +} diff --git a/test/tcl/dead008.tcl b/test/tcl/dead008.tcl new file mode 100644 index 00000000..8a25375d --- /dev/null +++ b/test/tcl/dead008.tcl @@ -0,0 +1,13 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST dead008 +# TEST Run dead001 deadlock test using priorities +proc dead008 { {tnum "008"} } { + source ./include.tcl + dead001 "2 4 10" "ring clump" "0" $tnum 1 + +} diff --git a/test/tcl/dead009.tcl b/test/tcl/dead009.tcl new file mode 100644 index 00000000..1e71f8ff --- /dev/null +++ b/test/tcl/dead009.tcl @@ -0,0 +1,13 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST dead009 +# TEST Run dead002 deadlock test using priorities +proc dead009 { {tnum "009"} } { + source ./include.tcl + dead002 "2 4 10" "ring clump" "0" $tnum 1 + +} diff --git a/test/tcl/dead010.tcl b/test/tcl/dead010.tcl new file mode 100644 index 00000000..198384f8 --- /dev/null +++ b/test/tcl/dead010.tcl @@ -0,0 +1,16 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST dead010 +# TEST +# TEST Same test as dead003, except the actual youngest and oldest will have +# TEST higher priorities. Verify that the oldest/youngest of the lower +# TEST priority lockers gets killed. Doesn't apply to 2 procs. +proc dead010 { {procs "4 10"} {tests "ring clump"} {tnum "010"} } { + source ./include.tcl + + dead003 $procs $tests $tnum 1 +} diff --git a/test/tcl/dead011.tcl b/test/tcl/dead011.tcl new file mode 100644 index 00000000..0f8dc3c7 --- /dev/null +++ b/test/tcl/dead011.tcl @@ -0,0 +1,15 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST dead011 +# TEST Test out the minlocks, maxlocks, and minwrites options +# TEST to the deadlock detector when priorities are used. +proc dead011 { { procs "4 6 10" } \ + {tests "maxlocks maxwrites minlocks minwrites" } { tnum "011" } } { + source ./include.tcl + + dead005 $procs $tests $tnum 1 +} diff --git a/test/tcl/env001.tcl b/test/tcl/env001.tcl new file mode 100644 index 00000000..2cc581af --- /dev/null +++ b/test/tcl/env001.tcl @@ -0,0 +1,145 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env001 +# TEST Test of env remove interface (formerly env_remove). +proc env001 { } { + global errorInfo + global errorCode + + source ./include.tcl + + set testfile $testdir/env.db + set t1 $testdir/t1 + + puts "Env001: Test of environment remove interface." + env_cleanup $testdir + + # Try opening without Create flag should error + puts "\tEnv001.a: Open without create (should fail)." + catch {set env [berkdb_env_noerr -home $testdir]} ret + error_check_good env:fail [is_substr $ret "no such file"] 1 + + # Now try opening with create + puts "\tEnv001.b: Open with create." + set env [berkdb_env -create -mode 0644 -home $testdir] + error_check_bad env:$testdir $env NULL + error_check_good env:$testdir [is_substr $env "env"] 1 + + # Make sure that close works. + puts "\tEnv001.c: Verify close." + error_check_good env:close:$env [$env close] 0 + + # Make sure we can reopen. + puts "\tEnv001.d: Remove on closed environments." + puts "\t\tEnv001.d.1: Verify re-open." + set env [berkdb_env -home $testdir] + error_check_bad env:$testdir $env NULL + error_check_good env:$testdir [is_substr $env "env"] 1 + + # remove environment + puts "\t\tEnv001.d.2: Close environment." + error_check_good env:close [$env close] 0 + puts "\t\tEnv001.d.3: Try remove with force (should succeed)." + error_check_good \ + envremove [berkdb envremove -force -home $testdir] 0 + + # HP-UX doesn't allow a second handle on an open env. + if { $is_hp_test != 1 } { + puts "\tEnv001.e: Remove on open environments." + puts "\t\tEnv001.e.1: Env is open by single proc,\ + remove no force." + set env [berkdb_env -create -mode 0644 -home $testdir] + error_check_bad env:$testdir $env NULL + error_check_good env:$testdir [is_substr $env "env"] 1 + set stat [catch {berkdb envremove -home $testdir} ret] + error_check_good env:remove $stat 1 + error_check_good env:close [$env close] 0 + } + + puts \ + "\t\tEnv001.e.2: Env is open by single proc, remove with force." + if { $is_hp_test != 1 } { + set env [berkdb_env_noerr -create -mode 0644 -home $testdir] + error_check_bad env:$testdir $env NULL + error_check_good env:$testdir [is_substr $env "env"] 1 + set stat [catch {berkdb envremove -force -home $testdir} ret] + error_check_good env:remove(force) $ret 0 + # + # Even though the underlying env is gone, we need to close + # the handle. + # + set stat [catch {$env close} ret] + error_check_bad env:close_after_remove $stat 0 + error_check_good env:close_after_remove \ + [is_substr $ret "recovery"] 1 + } + + puts "\t\tEnv001.e.3: Env is open by 2 procs, remove no force." + # should fail + set env [berkdb_env -create -mode 0644 -home $testdir] + error_check_bad env:$testdir $env NULL + error_check_good env:$testdir [is_substr $env "env"] 1 + + set f1 [open |$tclsh_path r+] + puts $f1 "source $test_path/test.tcl" + + set remote_env [send_cmd $f1 "berkdb_env_noerr -home $testdir"] + error_check_good remote:env_open [is_valid_env $remote_env] TRUE + # First close our env, but leave remote open + error_check_good env:close [$env close] 0 + catch {berkdb envremove -home $testdir} ret + error_check_good envremove:2procs:noforce [is_substr $errorCode EBUSY] 1 + # + # even though it failed, $env is no longer valid, so remove it in + # the remote process + set remote_close [send_cmd $f1 "$remote_env close"] + error_check_good remote_close $remote_close 0 + + # exit remote process + set err [catch { close $f1 } result] + error_check_good close_remote_process $err 0 + + puts "\t\tEnv001.e.4: Env is open by 2 procs, remove with force." + if { $is_hp_test != 1 } { + set env [berkdb_env_noerr -create -mode 0644 -home $testdir] + error_check_bad env:$testdir $env NULL + error_check_good env:$testdir [is_substr $env "env"] 1 + set f1 [open |$tclsh_path r+] + puts $f1 "source $test_path/test.tcl" + + set remote_env [send_cmd $f1 "berkdb_env -home $testdir"] + error_check_good remote:env_open [is_valid_env $remote_env] TRUE + + catch {berkdb envremove -force -home $testdir} ret + error_check_good envremove:2procs:force $ret 0 + # + # We still need to close our handle. + # + set stat [catch {$env close} ret] + error_check_bad env:close_after_error $stat 0 + error_check_good env:close_after_error \ + [is_substr $ret recovery] 1 + + # Close down remote process + set err [catch { close $f1 } result] + error_check_good close_remote_process $err 0 + } + + # Try opening in a different dir + puts "\tEnv001.f: Try opening env in another directory." + if { [file exists $testdir/NEWDIR] != 1 } { + file mkdir $testdir/NEWDIR + } + set eflags "-create -home $testdir/NEWDIR -mode 0644" + set env [eval {berkdb_env} $eflags] + error_check_bad env:open $env NULL + error_check_good env:close [$env close] 0 + error_check_good berkdb:envremove \ + [berkdb envremove -home $testdir/NEWDIR] 0 + + puts "\tEnv001 complete." +} diff --git a/test/tcl/env002.tcl b/test/tcl/env002.tcl new file mode 100644 index 00000000..829de457 --- /dev/null +++ b/test/tcl/env002.tcl @@ -0,0 +1,155 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env002 +# TEST Test of DB_LOG_DIR and env name resolution. +# TEST With an environment path specified using -home, and then again +# TEST with it specified by the environment variable DB_HOME: +# TEST 1) Make sure that the set_lg_dir option is respected +# TEST a) as a relative pathname. +# TEST b) as an absolute pathname. +# TEST 2) Make sure that the DB_LOG_DIR db_config argument is respected, +# TEST again as relative and absolute pathnames. +# TEST 3) Make sure that if -both- db_config and a file are present, +# TEST only the file is respected (see doc/env/naming.html). +proc env002 { } { + # env002 is essentially just a small driver that runs + # env002_body--formerly the entire test--twice; once, it + # supplies a "home" argument to use with environment opens, + # and the second time it sets DB_HOME instead. + # Note that env002_body itself calls env002_run_test to run + # the body of the actual test and check for the presence + # of logs. The nesting, I hope, makes this test's structure simpler. + + global env + source ./include.tcl + + puts "Env002: set_lg_dir test." + + puts "\tEnv002: Running with -home argument to berkdb_env." + env002_body "-home $testdir" + + puts "\tEnv002: Running with environment variable DB_HOME set." + set env(DB_HOME) $testdir + env002_body "-use_environ" + + unset env(DB_HOME) + + puts "\tEnv002: Running with both DB_HOME and -home set." + # Should respect -only- -home, so we give it a bogus + # environment variable setting. + set env(DB_HOME) $testdir/bogus_home + env002_body "-use_environ -home $testdir" + unset env(DB_HOME) + +} + +proc env002_body { home_arg } { + source ./include.tcl + + env_cleanup $testdir + set logdir "logs_in_here" + + file mkdir $testdir/$logdir + + # Set up full path to $logdir for when we test absolute paths. + set curdir [pwd] + cd $testdir/$logdir + set fulllogdir [pwd] + cd $curdir + + env002_make_config $logdir + + # Run the meat of the test. + env002_run_test a 1 "relative path, config file" $home_arg \ + $testdir/$logdir + + env_cleanup $testdir + + file mkdir $fulllogdir + env002_make_config $fulllogdir + + # Run the test again + env002_run_test a 2 "absolute path, config file" $home_arg \ + $fulllogdir + + env_cleanup $testdir + + # Now we try without a config file, but instead with db_config + # relative paths + file mkdir $testdir/$logdir + env002_run_test b 1 "relative path, db_config" "$home_arg \ + -log_dir $logdir -data_dir ." \ + $testdir/$logdir + + env_cleanup $testdir + + # absolute + file mkdir $fulllogdir + env002_run_test b 2 "absolute path, db_config" "$home_arg \ + -log_dir $fulllogdir -data_dir ." \ + $fulllogdir + + env_cleanup $testdir + + # Now, set db_config -and- have a # DB_CONFIG file, and make + # sure only the latter is honored. + + file mkdir $testdir/$logdir + env002_make_config $logdir + + # note that we supply a -nonexistent- log dir to db_config + env002_run_test c 1 "relative path, both db_config and file" \ + "$home_arg -log_dir $testdir/bogus \ + -data_dir ." $testdir/$logdir + env_cleanup $testdir + + file mkdir $fulllogdir + env002_make_config $fulllogdir + + # note that we supply a -nonexistent- log dir to db_config + env002_run_test c 2 "relative path, both db_config and file" \ + "$home_arg -log_dir $fulllogdir/bogus \ + -data_dir ." $fulllogdir +} + +proc env002_run_test { major minor msg env_args log_path} { + global testdir + set testfile "env002.db" + + puts "\t\tEnv002.$major.$minor: $msg" + + # Create an environment, with logging, and scribble some + # stuff in a [btree] database in it. + # puts [concat {berkdb_env -create -log -private} $env_args] + set dbenv [eval {berkdb_env -create -log -private} $env_args] + error_check_good env_open [is_valid_env $dbenv] TRUE + set db [berkdb_open -env $dbenv -create -btree -mode 0644 $testfile] + error_check_good db_open [is_valid_db $db] TRUE + + set key "some_key" + set data "some_data" + + error_check_good db_put \ + [$db put $key [chop_data btree $data]] 0 + + error_check_good db_close [$db close] 0 + error_check_good env_close [$dbenv close] 0 + + # Now make sure the log file is where we want it to be. + error_check_good db_exists [file exists $testdir/$testfile] 1 + error_check_good log_exists \ + [file exists $log_path/log.0000000001] 1 +} + +proc env002_make_config { logdir } { + global testdir + + set cid [open $testdir/DB_CONFIG w] + puts $cid "set_data_dir ." + puts $cid "set_lg_dir $logdir" + close $cid +} diff --git a/test/tcl/env003.tcl b/test/tcl/env003.tcl new file mode 100644 index 00000000..66893404 --- /dev/null +++ b/test/tcl/env003.tcl @@ -0,0 +1,148 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env003 +# TEST Test DB_TMP_DIR and env name resolution +# TEST With an environment path specified using -home, and then again +# TEST with it specified by the environment variable DB_HOME: +# TEST 1) Make sure that the DB_TMP_DIR config file option is respected +# TEST a) as a relative pathname. +# TEST b) as an absolute pathname. +# TEST 2) Make sure that the -tmp_dir config option is respected, +# TEST again as relative and absolute pathnames. +# TEST 3) Make sure that if -both- -tmp_dir and a file are present, +# TEST only the file is respected (see doc/env/naming.html). +proc env003 { } { + # env003 is essentially just a small driver that runs + # env003_body twice. First, it supplies a "home" argument + # to use with environment opens, and the second time it sets + # DB_HOME instead. + # Note that env003_body itself calls env003_run_test to run + # the body of the actual test. + + global env + source ./include.tcl + + puts "Env003: DB_TMP_DIR test." + + puts "\tEnv003: Running with -home argument to berkdb_env." + env003_body "-home $testdir" + + puts "\tEnv003: Running with environment variable DB_HOME set." + set env(DB_HOME) $testdir + env003_body "-use_environ" + + unset env(DB_HOME) + + puts "\tEnv003: Running with both DB_HOME and -home set." + # Should respect -only- -home, so we give it a bogus + # environment variable setting. + set env(DB_HOME) $testdir/bogus_home + env003_body "-use_environ -home $testdir" + unset env(DB_HOME) +} + +proc env003_body { home_arg } { + source ./include.tcl + + env_cleanup $testdir + set tmpdir "tmpfiles_in_here" + file mkdir $testdir/$tmpdir + + # Set up full path to $tmpdir for when we test absolute paths. + set curdir [pwd] + cd $testdir/$tmpdir + set fulltmpdir [pwd] + cd $curdir + + # Create DB_CONFIG + env003_make_config $tmpdir + + # Run the meat of the test. + env003_run_test a 1 "relative path, config file" $home_arg \ + $testdir/$tmpdir + + env003_make_config $fulltmpdir + + # Run the test again + env003_run_test a 2 "absolute path, config file" $home_arg \ + $fulltmpdir + + # Now we try without a config file, but instead with db_config + # relative paths + env003_run_test b 1 "relative path, db_config" "$home_arg \ + -tmp_dir $tmpdir -data_dir ." \ + $testdir/$tmpdir + + # absolute paths + env003_run_test b 2 "absolute path, db_config" "$home_arg \ + -tmp_dir $fulltmpdir -data_dir ." \ + $fulltmpdir + + # Now, set db_config -and- have a # DB_CONFIG file, and make + # sure only the latter is honored. + + file mkdir $testdir/bogus + env003_make_config $tmpdir + + env003_run_test c 1 "relative path, both db_config and file" \ + "$home_arg -tmp_dir $testdir/bogus -data_dir ." \ + $testdir/$tmpdir + + file mkdir $fulltmpdir/bogus + env003_make_config $fulltmpdir + + env003_run_test c 2 "absolute path, both db_config and file" \ + "$home_arg -tmp_dir $fulltmpdir/bogus -data_dir ." \ + $fulltmpdir +} + +proc env003_run_test { major minor msg env_args tmp_path} { + global testdir + global alphabet + global errorCode + + puts "\t\tEnv003.$major.$minor: $msg" + + # Create an environment and small-cached in-memory database to + # use. + set dbenv [eval {berkdb_env -create -home $testdir} $env_args \ + {-cachesize {0 50000 1}}] + error_check_good env_open [is_valid_env $dbenv] TRUE + + set db [berkdb_open -env $dbenv -create -btree] + error_check_good db_open [is_valid_db $db] TRUE + + # Fill the database with more than its cache can fit. + # + # When CONFIG_TEST is defined, the tempfile is left linked so + # we can check for its existence. Size the data to overfill + # the cache--the temp file is created lazily, so it is created + # when the cache overflows. + # + set key "key" + set data [repeat $alphabet 2000] + error_check_good db_put [$db put $key $data] 0 + + # Check for exactly one temp file. + set ret [glob -nocomplain $tmp_path/BDB*] + error_check_good temp_file_exists [llength $ret] 1 + + # Can't remove temp file until db is closed on Windows. + error_check_good db_close [$db close] 0 + fileremove -f $ret + error_check_good env_close [$dbenv close] 0 + +} + +proc env003_make_config { tmpdir } { + global testdir + + set cid [open $testdir/DB_CONFIG w] + puts $cid "set_data_dir ." + puts $cid "set_tmp_dir $tmpdir" + close $cid +} diff --git a/test/tcl/env004.tcl b/test/tcl/env004.tcl new file mode 100644 index 00000000..ed29aabc --- /dev/null +++ b/test/tcl/env004.tcl @@ -0,0 +1,94 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env004 +# TEST Test multiple data directories. Do a bunch of different opens +# TEST to make sure that the files are detected in different directories. +proc env004 { } { + source ./include.tcl + + set method "hash" + set omethod [convert_method $method] + set args [convert_args $method ""] + + puts "Env004: Multiple data directory test." + + env_cleanup $testdir + file mkdir $testdir/data1 + file mkdir $testdir/data2 + file mkdir $testdir/data3 + + puts "\tEnv004.a: Multiple data directories in DB_CONFIG file" + + # Create a config file + set cid [open $testdir/DB_CONFIG w] + puts $cid "set_data_dir ." + puts $cid "set_data_dir data1" + puts $cid "set_data_dir data2" + puts $cid "set_data_dir data3" + close $cid + + set e [berkdb_env -create -private -home $testdir] + error_check_good dbenv [is_valid_env $e] TRUE + ddir_test $method $e $args + error_check_good env_close [$e close] 0 + + puts "\tEnv004.b: Multiple data directories in berkdb_env call." + env_cleanup $testdir + file mkdir $testdir/data1 + file mkdir $testdir/data2 + file mkdir $testdir/data3 + + # Now call dbenv with config specified + set e [berkdb_env -create -private \ + -data_dir . -data_dir data1 -data_dir data2 \ + -data_dir data3 -home $testdir] + error_check_good dbenv [is_valid_env $e] TRUE + ddir_test $method $e $args + error_check_good env_close [$e close] 0 +} + +proc ddir_test { method e args } { + source ./include.tcl + + set args [convert_args $args] + set omethod [convert_method $method] + + # Now create one file in each directory + set db1 [eval {berkdb_open -create \ + -truncate -mode 0644 $omethod -env $e} $args {data1/datafile1.db}] + error_check_good dbopen1 [is_valid_db $db1] TRUE + + set db2 [eval {berkdb_open -create \ + -truncate -mode 0644 $omethod -env $e} $args {data2/datafile2.db}] + error_check_good dbopen2 [is_valid_db $db2] TRUE + + set db3 [eval {berkdb_open -create \ + -truncate -mode 0644 $omethod -env $e} $args {data3/datafile3.db}] + error_check_good dbopen3 [is_valid_db $db3] TRUE + + # Close the files + error_check_good db_close1 [$db1 close] 0 + error_check_good db_close2 [$db2 close] 0 + error_check_good db_close3 [$db3 close] 0 + + # Now, reopen the files without complete pathnames and make + # sure that we find them. + + set db1 [berkdb_open -env $e datafile1.db] + error_check_good dbopen1 [is_valid_db $db1] TRUE + + set db2 [berkdb_open -env $e datafile2.db] + error_check_good dbopen2 [is_valid_db $db2] TRUE + + set db3 [berkdb_open -env $e datafile3.db] + error_check_good dbopen3 [is_valid_db $db3] TRUE + + # Finally close all the files + error_check_good db_close1 [$db1 close] 0 + error_check_good db_close2 [$db2 close] 0 + error_check_good db_close3 [$db3 close] 0 +} diff --git a/test/tcl/env005.tcl b/test/tcl/env005.tcl new file mode 100644 index 00000000..04a51aa3 --- /dev/null +++ b/test/tcl/env005.tcl @@ -0,0 +1,51 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env005 +# TEST Test that using subsystems without initializing them correctly +# TEST returns an error. Cannot test mpool, because it is assumed in +# TEST the Tcl code. +proc env005 { } { + source ./include.tcl + + puts "Env005: Uninitialized env subsystems test." + + env_cleanup $testdir + puts "\tEnv005.a: Creating env with no subsystems." + set e [berkdb_env_noerr -create -home $testdir] + error_check_good dbenv [is_valid_env $e] TRUE + set db [berkdb_open -create -btree $testdir/env005.db] + error_check_good dbopen [is_valid_db $db] TRUE + + set rlist { + { "lock_detect" "Env005.b0"} + { "lock_get read 1 1" "Env005.b1"} + { "lock_id" "Env005.b2"} + { "lock_stat" "Env005.b3"} + { "lock_timeout 100" "Env005.b4"} + { "log_archive" "Env005.c0"} + { "log_cursor" "Env005.c1"} + { "log_file {1 1}" "Env005.c2"} + { "log_flush" "Env005.c3"} + { "log_put record" "Env005.c4"} + { "log_stat" "Env005.c5"} + { "txn" "Env005.d0"} + { "txn_checkpoint" "Env005.d1"} + { "txn_stat" "Env005.d2"} + { "txn_timeout 100" "Env005.d3"} + } + + foreach pair $rlist { + set cmd [lindex $pair 0] + set msg [lindex $pair 1] + puts "\t$msg: $cmd" + set stat [catch {eval $e $cmd} ret] + error_check_good $cmd $stat 1 + error_check_good $cmd.err [is_substr $ret invalid] 1 + } + error_check_good dbclose [$db close] 0 + error_check_good envclose [$e close] 0 +} diff --git a/test/tcl/env006.tcl b/test/tcl/env006.tcl new file mode 100644 index 00000000..103c6716 --- /dev/null +++ b/test/tcl/env006.tcl @@ -0,0 +1,90 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env006 +# TEST Make sure that all the utilities exist and run. +# TEST Test that db_load -r options don't blow up. +proc env006 { } { + source ./include.tcl + + puts "Env006: Run underlying utilities." + + set rlist { + { "db_archive" "Env006.a"} + { "db_checkpoint" "Env006.b"} + { "db_deadlock" "Env006.c"} + { "db_dump" "Env006.d"} + { "db_load" "Env006.e"} + { "db_printlog" "Env006.f"} + { "db_recover" "Env006.g"} + { "db_stat" "Env006.h"} + { "db_upgrade" "Env006.h"} + { "db_verify" "Env006.h"} + } + foreach pair $rlist { + set cmd [lindex $pair 0] + set msg [lindex $pair 1] + + puts "\t$msg: $cmd" + + set stat [catch {exec $util_path/$cmd -?} ret] + error_check_good $cmd $stat 1 + + # + # Check for "usage", but only check "sage" so that + # we can handle either Usage or usage. + # + error_check_good $cmd.err [is_substr $ret sage] 1 + } + + env_cleanup $testdir + set env [eval berkdb_env -create -home $testdir -txn] + error_check_good env_open [is_valid_env $env] TRUE + + set sub SUBDB + foreach case { noenv env } { + if { $case == "env" } { + set envargs " -env $env " + set homeargs " -h $testdir " + set testfile env006.db + } else { + set envargs "" + set homeargs "" + set testfile $testdir/env006.db + } + + puts "\tEnv006.i: Testing db_load -r with $case." + set db [eval berkdb_open -create $envargs -btree $testfile] + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + + set ret [eval \ + exec $util_path/db_load -r lsn $homeargs $testfile] + error_check_good db_load_r_lsn $ret "" + set ret [eval \ + exec $util_path/db_load -r fileid $homeargs $testfile] + error_check_good db_load_r_fileid $ret "" + + error_check_good db_remove \ + [eval {berkdb dbremove} $envargs $testfile] 0 + + puts "\tEnv006.j: Testing db_load -r with $case and subdbs." + set db [eval berkdb_open -create $envargs -btree $testfile $sub] + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + + set ret [eval \ + exec {$util_path/db_load} -r lsn $homeargs $testfile] + error_check_good db_load_r_lsn $ret "" + set ret [eval \ + exec {$util_path/db_load} -r fileid $homeargs $testfile] + error_check_good db_load_r_fileid $ret "" + + error_check_good \ + db_remove [eval {berkdb dbremove} $envargs $testfile] 0 + } + error_check_good env_close [$env close] 0 +} diff --git a/test/tcl/env007.tcl b/test/tcl/env007.tcl new file mode 100644 index 00000000..11c26953 --- /dev/null +++ b/test/tcl/env007.tcl @@ -0,0 +1,923 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env007 +# TEST Test DB_CONFIG config file options for berkdb env. +# TEST 1) Make sure command line option is respected +# TEST 2) Make sure that config file option is respected +# TEST 3) Make sure that if -both- DB_CONFIG and the set_ +# TEST method is used, only the file is respected. +# TEST Then test all known config options. +# TEST Also test config options on berkdb open. This isn't +# TEST really env testing, but there's no better place to put it. +proc env007 { } { + global errorInfo + global errorCode + global passwd + global has_crypto + source ./include.tcl + + puts "Env007: DB_CONFIG and getters test." + puts "Env007.a: Test berkdb env options using getters and stat." + + # Set up options we can check via stat or getters. Structure + # of the list is: + # 0. Arg used in berkdb env command + # 1. Arg used in DB_CONFIG file + # 2. Value assigned in berkdb env command + # 3. Value assigned in DB_CONFIG file + # 4. Message output during test + # 5. Stat command to run (empty if we can't get the info + # from stat). + # 6. String to search for in stat output + # 7. Which arg to check in stat (needed for cases where + # we set more than one args at a time, but stat can + # only check one args, like cachesize) + # 8. Arg used in getter + # + # The initial values for both locks and lock objects have silently + # enforced minimums of 50 * #cpus. These values work for up to 8 cpus. + set rlist { + { " -txn_init " "set_memory_init DB_MEM_TRANSACTION" "19" "31" + "Env007.a1: Txn Init" "txn_stat" + "Initial txns" "0" "get_tx_init" } + { " -txn_max " "set_tx_max" "29" "51" + "Env007.a1: Txn Max" "txn_stat" + "Maximum txns" "0" "get_tx_max" } + { " -lock_locks " "set_memory_init DB_MEM_LOCK" "12407" "12429" + "Env007.a2: Lock Init" "lock_stat" + "Initial locks" "0" "get_lk_init_locks" } + { " -lock_max_locks " "set_lk_max_locks" "1070" "1290" + "Env007.a2: Lock Max" "lock_stat" + "Maximum locks" "0" "get_lk_max_locks" } + { " -lock_lockers " "set_memory_init DB_MEM_LOCKER" "150" "200" + "Env007.a3: Init Lockers" "lock_stat" + "Initial lockers" "0" "get_lk_init_lockers" } + { " -lock_max_lockers " "set_lk_max_lockers" "1500" "2000" + "Env007.a3: Max Lockers" "lock_stat" + "Maximum lockers" "0" "get_lk_max_lockers" } + { " -lock_objects " "set_memory_init DB_MEM_LOCKOBJECT" "12405" "12408" + "Env007.a4: Init Objects" "lock_stat" + "Initial objects" "0" "get_lk_init_objects" } + { " -lock_max_objects " "set_lk_max_objects" "1500" "2000" + "Env007.a4: Max Objects" "lock_stat" + "Maximum objects" "0" "get_lk_max_objects" } + { " -log_buffer " "set_lg_bsize" "65536" "131072" + "Env007.a5: Log Bsize" "log_stat" + "Log record cache size" "0" "get_lg_bsize" } + { " -log_max " "set_lg_max" "8388608" "9437184" + "Env007.a6: Log Max" "log_stat" + "Current log file size" "0" "get_lg_max" } + { " -cachesize " "set_cachesize" "0 536870912 1" "1 0 1" + "Env007.a7.0: Cachesize" "mpool_stat" + "Cache size (gbytes)" "0" "get_cachesize" } + { " -cachesize " "set_cachesize" "0 536870912 1" "1 0 1" + "Env007.a7.1: Cachesize" "mpool_stat" + "Cache size (bytes)" "1" "get_cachesize" } + { " -cachesize " "set_cachesize" "0 536870912 1" "1 0 1" + "Env007.a7.2: Cachesize" "mpool_stat" + "Number of caches" "2" "get_cachesize" } + { " -lock_timeout " "set_lock_timeout" "100" "120" + "Env007.a8: Lock Timeout" "lock_stat" + "Lock timeout value" "0" "get_timeout lock" } + { " -log_regionmax " "set_lg_regionmax" "8388608" "4194304" + "Env007.a9: Log Regionmax" "" + "Region size" "0" "get_lg_regionmax" } + { " -mpool_max_openfd " "set_mp_max_openfd" "17" "27" + "Env007.a10: Mmap max openfd" "mpool_stat" + "Maximum open file descriptors" "0" "get_mp_max_openfd" } + { " -mpool_max_write " "set_mp_max_write" "37 47" "57 67" + "Env007.a11.1: Mmap max write" "mpool_stat" + "Sleep after writing maximum buffers" "1" "get_mp_max_write" } + { " -mpool_mmap_size " "set_mp_mmapsize" "12582912" "8388608" + "Env007.a12: Mmapsize" "mpool_stat" + "Maximum memory-mapped file size" "0" "get_mp_mmapsize" } + { " -shm_key " "set_shm_key" "15" "35" + "Env007.a13: Shm Key" "" + "" "" "get_shm_key" } + { " -tmp_dir " "set_tmp_dir" "." "./TEMPDIR" + "Env007.a14: Temp dir" "" + "" "" "get_tmp_dir" } + { " -txn_timeout " "set_txn_timeout" "100" "120" + "Env007.a15: Txn timeout" "lock_stat" + "Transaction timeout value" "0" "get_timeout txn" } + { " -log_filemode " "set_lg_filemode" "417" "637" + "Env007.a16: Log FileMode" "log_stat" + "Log file mode" "0" "get_lg_filemode" } + {" -lock_partitions " "set_lk_partitions" "10" "20" + "Env007.a17: Lock Partitions" "lock_stat" + "Number of lock table partitions" "0" "get_lk_partitions" } + {" -mutex_set_align " "mutex_set_align" "8" "16" + "Env007.a18: Mutex align" "mutex_stat" + "Mutex align" "0" "mutex_get_align" } + {" -mutex_set_incr " "mutex_set_increment" "1000" "1500" + "Env007.a19: Mutex increment" "" + "" "" "mutex_get_incr" } + {" -mutex_set_max " "mutex_set_max" "2000" "2500" + "Env007.a20: Mutex max" "mutex_stat" + "Mutex max" "0" "mutex_get_max" } + {" -mutex_set_tas_spins " "mutex_set_tas_spins" "60" "85" + "Env007.a21: Mutex tas spins" "mutex_stat" + "Mutex TAS spins" "0" "mutex_get_tas_spins" } + {" -pagesize " "set_mp_pagesize" "4096" "8192" + "Env007.a22: Mpool pagesize" "" + "" "" "get_mp_pagesize" } + {" -reg_timeout " "set_reg_timeout" "25000" "35000" + "Env007.a23: Register timeout" "" + "" "" "get_timeout reg" } + } + + set e "berkdb_env_noerr -create -mode 0644 -home $testdir -txn " + set qnxexclude {set_cachesize} + + foreach item $rlist { + set envarg [lindex $item 0] + set configarg [lindex $item 1] + set envval [lindex $item 2] + set configval [lindex $item 3] + set msg [lindex $item 4] + set statcmd [lindex $item 5] + set statstr [lindex $item 6] + set index [lindex $item 7] + set getter [lindex $item 8] + + if { $is_qnx_test && + [lsearch $qnxexclude $configarg] != -1 } { + puts "\tEnv007.a: Skipping $configarg for QNX" + continue + } + + env_cleanup $testdir + + # First verify using just env args + puts "\t$msg Environment argument only" + set env [eval $e $envarg {$envval}] + error_check_good envopen:0 [is_valid_env $env] TRUE + error_check_good get_envval [eval $env $getter] $envval + if { $statcmd != "" } { + set statenvval [lindex $envval $index] + # log_stat reports the sum of the specified + # region size and the log buffer size. + if { $statstr == "Region size" } { + set lbufsize 32768 + set statenvval [expr $statenvval + $lbufsize] + } + env007_check $env $statcmd $statstr $statenvval + } + error_check_good envclose:0 [$env close] 0 + + env_cleanup $testdir + env007_make_config $configarg $configval + + # Verify using just config file + puts "\t$msg Config file only" + set env [eval $e] + error_check_good envopen:1 [is_valid_env $env] TRUE + error_check_good get_configval1 [eval $env $getter] $configval + if { $statcmd != "" } { + set statconfigval [lindex $configval $index] + if { $statstr == "Region size" } { + set statconfigval \ + [expr $statconfigval + $lbufsize] + } + env007_check $env $statcmd $statstr $statconfigval + } + error_check_good envclose:1 [$env close] 0 + + # Now verify using env args and config args + puts "\t$msg Environment arg and config file" + set env [eval $e $envarg {$envval}] + error_check_good envopen:2 [is_valid_env $env] TRUE + # Getter should retrieve config val, not envval. + error_check_good get_configval2 [eval $env $getter] $configval + if { $statcmd != "" } { + env007_check $env $statcmd $statstr $statconfigval + } + error_check_good envclose:2 [$env close] 0 + } + + # + # Test all options that can be set in DB_CONFIG. Write it out + # to the file and make sure we can open the env. This execs + # the config file code. Also check with a getter that the + # expected value is returned. + # + puts "\tEnv007.b1: Test berkdb env config options using getters\ + and env open." + + # The cfglist variable contains options that can be set in DB_CONFIG. + set cfglist { + { "set_data_dir" "." "get_data_dirs" "." } + { "add_data_dir" "." "get_data_dirs" "." } + { "set_create_dir" "." "get_create_dir" "."} + { "set_flags" "db_auto_commit" "get_flags" "-auto_commit" } + { "set_flags" "db_cdb_alldb" "get_flags" "-cdb_alldb" } + { "set_flags" "db_direct_db" "get_flags" "-direct_db" } + { "set_flags" "db_dsync_db" "get_flags" "-dsync_db" } + { "set_flags" "db_multiversion" "get_flags" "-multiversion" } + { "set_flags" "db_nolocking" "get_flags" "-nolock" } + { "set_flags" "db_nommap" "get_flags" "-nommap" } + { "set_flags" "db_nopanic" "get_flags" "-nopanic" } + { "set_flags" "db_overwrite" "get_flags" "-overwrite" } + { "set_flags" "db_region_init" "get_flags" "-region_init" } + { "set_flags" "db_time_notgranted" "get_flags" "-time_notgranted" } + { "set_flags" "db_txn_nosync" "get_flags" "-nosync" } + { "set_flags" "db_txn_nowait" "get_flags" "-nowait" } + { "set_flags" "db_txn_snapshot" "get_flags" "-snapshot" } + { "set_flags" "db_txn_write_nosync" "get_flags" "-wrnosync" } + { "set_flags" "db_yieldcpu" "get_flags" "-yield" } + { "set_flags" "db_log_inmemory" "log_get_config" "inmemory" } + { "set_flags" "db_direct_log" "log_get_config" "direct" } + { "set_flags" "db_dsync_log" "log_get_config" "dsync" } + { "set_flags" "db_log_autoremove" "log_get_config" "autoremove" } + { "set_lg_bsize" "65536" "get_lg_bsize" "65536" } + { "set_lg_dir" "." "get_lg_dir" "." } + { "set_lg_max" "8388608" "get_lg_max" "8388608" } + { "set_lg_regionmax" "262144" "get_lg_regionmax" "262144" } + { "set_lk_detect" "db_lock_default" "get_lk_detect" "default" } + { "set_lk_detect" "db_lock_expire" "get_lk_detect" "expire" } + { "set_lk_detect" "db_lock_maxlocks" "get_lk_detect" "maxlocks" } + { "set_lk_detect" "db_lock_maxwrite" "get_lk_detect" "maxwrite" } + { "set_lk_detect" "db_lock_minlocks" "get_lk_detect" "minlocks" } + { "set_lk_detect" "db_lock_minwrite" "get_lk_detect" "minwrite" } + { "set_lk_detect" "db_lock_oldest" "get_lk_detect" "oldest" } + { "set_lk_detect" "db_lock_random" "get_lk_detect" "random" } + { "set_lk_detect" "db_lock_youngest" "get_lk_detect" "youngest" } + { "set_lk_max_lockers" "1500" "get_lk_max_lockers" "1500" } + { "set_lk_max_locks" "1290" "get_lk_max_locks" "1290" } + { "set_lk_max_objects" "1500" "get_lk_max_objects" "1500" } + { "set_lk_partitions" "5" "get_lk_partitions" "5" } + { "set_lock_timeout" "100" "get_timeout lock" "100" } + { "set_mp_mmapsize" "12582912" "get_mp_mmapsize" "12582912" } + { "set_mp_max_write" "10 20" "get_mp_max_write" "10 20" } + { "set_mp_max_openfd" "10" "get_mp_max_openfd" "10" } + { "set_mp_pagesize" "8192" "get_mp_pagesize" "8192" } + { "set_open_flags" "db_private" "get_open_flags" "-private" } + { "set_open_flags" "db_private on" "get_open_flags" "-private" } + { "set_open_flags" "db_init_rep" "get_open_flags" "-rep" } + { "set_open_flags" "db_thread" "get_open_flags" "-thread" } + { "set_region_init" "1" "get_flags" "-region_init" } + { "set_reg_timeout" "60" "get_timeout reg" "60" } + { "set_shm_key" "15" "get_shm_key" "15" } + { "set_tas_spins" "15" "get_tas_spins" "15" } + { "set_tmp_dir" "." "get_tmp_dir" "." } + { "set_tx_max" "31" "get_tx_max" "31" } + { "set_txn_timeout" "50" "get_timeout txn" "50" } + { "set_verbose" "db_verb_deadlock" "get_verbose deadlock" "on" } + { "set_verbose" "db_verb_register" "get_verbose register" "on" } + { "set_verbose" "db_verb_replication" "get_verbose rep" "on" } + { "set_verbose" "db_verb_rep_elect" "get_verbose rep_elect" "on" } + { "set_verbose" "db_verb_rep_lease" "get_verbose rep_lease" "on" } + { "set_verbose" "db_verb_rep_misc" "get_verbose rep_misc" "on" } + { "set_verbose" "db_verb_rep_msgs" "get_verbose rep_msgs" "on" } + { "set_verbose" "db_verb_rep_sync" "get_verbose rep_sync" "on" } + { "set_verbose" "db_verb_rep_system" "get_verbose rep_system" "on" } + { "set_verbose" "db_verb_repmgr_connfail" + "get_verbose repmgr_connfail" "on" } + { "set_verbose" "db_verb_repmgr_misc" "get_verbose repmgr_misc" "on" } + { "set_verbose" "db_verb_waitsfor" "get_verbose wait" "on" } + { "log_set_config" "db_log_direct" "log_get_config" "direct" } + { "log_set_config" "db_log_dsync" "log_get_config" "dsync" } + { "log_set_config" "db_log_auto_remove" "log_get_config" "autoremove" } + { "log_set_config" "db_log_in_memory" "log_get_config" "inmemory" } + { "log_set_config" "db_log_zero" "log_get_config" "zero" } + { "mutex_set_align" "8" "mutex_get_align" "8" } + { "mutex_set_increment" "100" "mutex_get_incr" "100" } + { "mutex_set_max" "1000" "mutex_get_max" "1000" } + { "mutex_set_tas_spins" "32" "mutex_get_tas_spins" "32" } + } + + env_cleanup $testdir + set e "berkdb_env_noerr -create -mode 0644 -home $testdir \ + -txn -lock -log -thread" + set directlist {db_direct_db db_log_direct db_direct_log} + + foreach item $cfglist { + env_cleanup $testdir + set configarg [lindex $item 0] + set configval [lindex $item 1] + set getter [lindex $item 2] + set getval [lindex $item 3] + + set extra_cmd {} + if {$configarg == "set_create_dir"} { + set extra_cmd "-add_dir $configval" + } + + env007_make_config $configarg $configval + + # Verify using config file + puts "\t\tEnv007.b1: $configarg $configval" + + # Unconfigured/unsupported direct I/O is not reported + # as a failure. + set directmsg \ + "direct I/O either not configured or not supported" + if {[catch { eval $e $extra_cmd} env ]} { + if { [lsearch $directlist $configval] != -1 && \ + [is_substr $env $directmsg] == 1 } { + continue + } else { + puts "FAIL: $env" + continue + } + } + error_check_good envvalid:1 [is_valid_env $env] TRUE + + # get_open_flags returns a whole string of flags, so pick + # out the one we're looking for. The other getters are + # specific to the designated flag. + if { $getter == "get_open_flags" } { + set flags [eval $env $getter] + error_check_good flag_found [is_substr $flags $getval] 1 + } elseif { $getter == "log_get_config" } { + error_check_good log_get [eval $env $getter $getval] 1 + } else { + error_check_good getter:1 [eval $env $getter] $getval + } + + error_check_good envclose:1 [$env close] 0 + } + + # Some verbose options send output to stdout that we want + # to inspect, or don't want to send to the screen -- handle + # these by running them in their own Tcl shell. + # + # We use the same method as section .b1, but cfglist has one + # additional argument where we specify the message to look + # for in the log file. + + puts "\tEnv007.b2: Test berkdb env config options using getters\ + and env open in a separate Tcl shell." + + # The cfglist variable contains options that can be set in DB_CONFIG. + set cfglist { + { "set_verbose" "db_verb_recovery" \ + "get_verbose recovery" "on" "No log files found"} + { "set_verbose" "db_verb_fileops" \ + "get_verbose fileops" "on" "fileops" } + { "set_verbose" "db_verb_fileops_all" \ + "get_verbose fileops_all" "on" "fileops: write" } + } + + foreach item $cfglist { + env_cleanup $testdir + + set configarg [lindex $item 0] + set configval [lindex $item 1] + set getter [lindex $item 2] + set getval [lindex $item 3] + set checkmsg [lindex $item 4] + + puts "\t\tEnv007.b2: $configarg $configval" + + set pid [exec $tclsh_path $test_path/wrap.tcl \ + env007script.tcl $testdir/env007.b2.output $configarg \ + $configval $getter $getval &] + + tclsleep 2 + watch_procs $pid 5 + error_check_good found_message \ + [findstring $checkmsg $testdir/env007.b2.output] 1 + + # Clean up before the next part. + fileremove -f $testdir/DB_CONFIG + } + + puts "\tEnv007.c: Test berkdb env options using getters and env open." + # The envopenlist variable contains options that can be set using + # berkdb env. We always set -mpool. + + set envopenlist { + { "-system_mem" "-shm_key 20" "-system_mem" "get_open_flags" } + { "-cdb" "" "-cdb" "get_open_flags" } + { "-errpfx" "FOO" "FOO" "get_errpfx" } + { "-lock" "" "-lock" "get_open_flags" } + { "-log" "" "-log" "get_open_flags" } + { "" "" "-mpool" "get_open_flags" } + { "-txn -rep" "" "-rep" "get_open_flags" } + { "-txn" "" "-txn" "get_open_flags" } + { "-recover" "-txn" "-recover" "get_open_flags" } + { "-recover_fatal" "-txn" "-recover_fatal" "get_open_flags" } + { "-register" "-txn -recover" "-register" "get_open_flags" } + { "-use_environ" "" "-use_environ" "get_open_flags" } + { "-use_environ_root" "" "-use_environ_root" "get_open_flags" } + { "" "" "-create" "get_open_flags" } + { "-private" "" "-private" "get_open_flags" } + { "-thread" "" "-thread" "get_open_flags" } + { "-txn_timestamp" "100000000" "100000000" "get_tx_timestamp" } + } + + if { $has_crypto == 1 } { + lappend envopenlist { + "-encryptaes" "$passwd" "-encryptaes" "get_encrypt_flags" } + } + + set e "berkdb_env_noerr -create -mode 0644 -home $testdir" + set qnxexclude {-system_mem} + foreach item $envopenlist { + env_cleanup $testdir + set envarg [lindex $item 0] + set envval [lindex $item 1] + set retval [lindex $item 2] + set getter [lindex $item 3] + + if { $is_qnx_test && + [lsearch $qnxexclude $envarg] != -1} { + puts "\t\tEnv007: Skipping $envarg for QNX" + continue + } + + puts "\t\tEnv007.c: $envarg $retval" + + # Set up env + set ret [catch {eval $e $envarg $envval} env] + + if { $ret != 0 } { + # If the env open failed, it may be because we're on a + # platform such as HP-UX 10 that won't support mutexes + # in shmget memory. Verify that the return value was + # EINVAL or EOPNOTSUPP and bail gracefully. + error_check_good \ + is_shm_test [is_substr $envarg -system_mem] 1 + error_check_good returned_error [expr \ + [is_substr $errorCode EINVAL] || \ + [is_substr $errorCode EOPNOTSUPP]] 1 + puts "Warning: platform\ + does not support mutexes in shmget memory." + puts "Skipping shared memory mpool test." + } else { + error_check_good env_open [is_valid_env $env] TRUE + + # Check that getter retrieves expected retval. + set get_retval [eval $env $getter] + if { [is_substr $get_retval $retval] != 1 } { + puts "FAIL: $retval\ + should be a substring of $get_retval" + continue + } + error_check_good envclose [$env close] 0 + + # The -encryptany flag can only be tested on an existing + # environment that supports encryption, so do it here. + if { $has_crypto == 1 } { + if { $envarg == "-encryptaes" } { + set env [eval berkdb_env -home $testdir\ + -encryptany $passwd] + error_check_good get_encryptany \ + [eval $env get_encrypt_flags] \ + "-encryptaes" + error_check_good envclose [$env close] 0 + } + } + } + } + + puts "\tEnv007.d: Test berkdb env options using set_flags and getters." + + # The flaglist variable contains options that can be set using + # $env set_flags. + set flaglist { + { "-direct_db" } + { "-dsync_db" } + { "-nolock" } + { "-nommap" } + { "-nopanic" } + { "-nosync" } + { "-overwrite" } + { "-panic" } + { "-snapshot" } + { "-time_notgranted" } + { "-wrnosync" } + { "-hotbackup_in_progress" } + } + set e "berkdb_env_noerr -create -mode 0644 -txn -home $testdir" + set directlist {-direct_db} + foreach item $flaglist { + set flag [lindex $item 0] + env_cleanup $testdir + + puts "\t\tEnv007.d: $flag" + # Set up env + set env [eval $e] + error_check_good envopen [is_valid_env $env] TRUE + + # Use set_flags to turn on new env characteristics. + # + # Unconfigured/unsupported direct I/O is not reported + # as a failure. + if {[catch { $env set_flags $flag on } res ]} { + if { [lsearch $directlist $flag] != -1 && \ + [is_substr $res $directmsg] == 1 } { + error_check_good env_close [$env close] 0 + continue + } else { + puts "FAIL: $res" + error_check_good env_close [$env close] 0 + continue + } + } else { + error_check_good "flag $flag on" $res 0 + } + + # Check that getter retrieves expected retval. + # A call to get_flags with -panic will report + # that the env is panicked, so skip over this and + # turn it off. + if { $flag != "-panic" } { + set get_retval [eval $env get_flags] + if { [is_substr $get_retval $flag] != 1 } { + puts "FAIL: $flag should\ + be a substring of $get_retval" + error_check_good env_close [$env close] 0 + continue + } + } + + # Use set_flags to turn off env characteristics, make sure + # they are gone. + error_check_good "flag $flag off" [$env set_flags $flag off] 0 + set get_retval [eval $env get_flags] + if { [is_substr $get_retval $flag] == 1 } { + puts "FAIL: $flag should not be in $get_retval" + error_check_good env_close [$env close] 0 + continue + } + + error_check_good envclose [$env close] 0 + } + puts "\tEnv007.d1: Test berkdb env options using log_set_config and getters." + + # The flaglist variable contains options that can be set using + # $env log_config. + set flaglist { + { "autoremove" } + { "direct" } + { "dsync" } + { "zero" } + } + set e "berkdb_env_noerr -create -txn -mode 0644 -home $testdir" + set directlist {direct} + foreach item $flaglist { + set flag [lindex $item 0] + env_cleanup $testdir + + # Set up env + set env [eval $e] + error_check_good envopen [is_valid_env $env] TRUE + + # Use set_flags to turn on new env characteristics. + # + # Unconfigured/unsupported direct I/O is not reported + # as a failure. + if {[catch { $env log_config $flag on } res ]} { + if { [lsearch $directlist $flag] != -1 && \ + [is_substr $res $directmsg] == 1 } { + error_check_good env_close [$env close] 0 + continue + } else { + puts "FAIL: $res" + error_check_good env_close [$env close] 0 + continue + } + } else { + error_check_good "flag $flag on" $res 0 + } + + # Check that getter retrieves expected retval. + set get_retval [eval $env log_get_config $flag] + if { $get_retval != 1 } { + puts "FAIL: $flag is not on" + error_check_good env_close [$env close] 0 + continue + } + # Use set_flags to turn off env characteristics, make sure + # they are gone. + error_check_good "flag $flag off" [$env log_config $flag off] 0 + set get_retval [eval $env log_get_config $flag] + if { $get_retval == 1 } { + puts "FAIL: $flag should off" + error_check_good env_close [$env close] 0 + continue + } + + error_check_good envclose [$env close] 0 + } + + puts "\tEnv007.e: Test env get_home." + env_cleanup $testdir + # Set up env + set env [eval $e] + error_check_good env_open [is_valid_env $env] TRUE + # Test for correct value. + set get_retval [eval $env get_home] + error_check_good get_home $get_retval $testdir + error_check_good envclose [$env close] 0 + + puts "\tEnv007.f: Test that bad config values are rejected." + set cfglist { + { "set_cache_max" "1" } + { "set_data_dir" "dir1 dir2" } + { "add_data_dir" "dir1 dir2" } + { "set_create_dir" "dir1 dir2" } + { "set_intermediate_dir_mode" "0644 0666" } + { "set_cachesize" "1048576" } + { "set_flags" "db_xxx" } + { "set_flags" "1" } + { "set_flags" "db_txn_nosync x" } + { "set_flags" "db_txn_nosync x x1" } + { "log_set_config" "db_log_xxx" } + { "log_set_config" "db_log_auto_remove x" } + { "log_set_config" "db_log_auto_remove x x1" } + { "set_lg_bsize" "db_xxx" } + { "set_lg_max" "db_xxx" } + { "set_lg_dir" "dir1 dir2" } + { "set_lg_regionmax" "db_xxx" } + { "set_lock_timeout" "lock 500"} + { "set_lk_detect" "db_xxx" } + { "set_lk_detect" "1" } + { "set_lk_detect" "db_lock_youngest x" } + { "set_lk_max_locks" "db_xxx" } + { "set_lk_max_lockers" "db_xxx" } + { "set_lk_max_objects" "db_xxx" } + { "set_mp_max_openfd" "1 2" } + { "set_mp_max_write" "1 2 3" } + { "set_mp_mmapsize" "db_xxx" } + { "set_open_flags" "db_private db_thread db_init_rep" } + { "set_open_flags" "db_private x" } + { "set_open_flags" "db_xxx" } + { "set_region_init" "db_xxx" } + { "set_region_init" "db_xxx 1" } + { "set_region_init" "100" } + { "set_reg_timeout" "reg 5000" } + { "set_shm_key" "db_xxx" } + { "set_shm_key" ""} + { "set_shm_key" "11 12 13"} + { "set_tas_spins" "db_xxx" } + { "set_tmp_dir" "dir1 dir2" } + { "set_tx_max" "db_xxx" } + { "set_txn_timeout" "txn 5000" } + { "set_verbose" "db_xxx" } + { "set_verbose" "1" } + { "set_verbose" "db_verb_recovery x" } + { "set_verbose" "db_verb_recovery x x1" } + } + + set e "berkdb_env_noerr -create -mode 0644 \ + -home $testdir -log -lock -txn " + foreach item $cfglist { + set configarg [lindex $item 0] + set configval [lindex $item 1] + + env007_make_config $configarg $configval + + # verify using just config file + set stat [catch {eval $e} ret] + error_check_good envopen $stat 1 + error_check_good error [is_substr $errorCode EINVAL] 1 + } + + puts "\tEnv007.g: Config name error set_xxx" + set e "berkdb_env_noerr -create -mode 0644 \ + -home $testdir -log -lock -txn " + env007_make_config "set_xxx" 1 + set stat [catch {eval $e} ret] + error_check_good envopen $stat 1 + error_check_good error [is_substr $errorInfo \ + "unrecognized name-value pair"] 1 + + puts "\tEnv007.h: Test berkdb open flags and getters." + # Check options that we configure with berkdb open and + # query via getters. Structure of the list is: + # 0. Flag used in berkdb open command + # 1. Value specified to flag + # 2. Specific method, if needed + # 3. Arg used in getter + set olist { + { "-minkey" "4" " -btree " "get_bt_minkey" } + { "-cachesize" "0 1048576 1" "" "get_cachesize" } + { "" "FILENAME DBNAME" "" "get_dbname" } + { "" "" "" "get_env" } + { "-errpfx" "ERROR:" "" "get_errpfx" } + { "" "-chksum" "" "get_flags" } + { "-delim" "58" "-recno" "get_re_delim" } + { "" "-dup" "" "get_flags" } + { "" "-dup -dupsort" "" "get_flags" } + { "" "-dup" "-hash" "get_flags" } + { "" "-dup -dupsort" "-hash" "get_flags" } + { "" "-recnum" "" "get_flags" } + { "" "-revsplitoff" "" "get_flags" } + { "" "-revsplitoff" "-hash" "get_flags" } + { "" "-inorder" "-queue" "get_flags" } + { "" "-renumber" "-recno" "get_flags" } + { "" "-snapshot" "-recno" "get_flags" } + { "" "-create" "" "get_open_flags" } + { "" "-create -read_uncommitted" "" "get_open_flags" } + { "" "-create -excl" "" "get_open_flags" } + { "" "-create -nommap" "" "get_open_flags" } + { "" "-create -thread" "" "get_open_flags" } + { "" "-create -truncate" "" "get_open_flags" } + { "-ffactor" "40" " -hash " "get_h_ffactor" } + { "-lorder" "4321" "" "get_lorder" } + { "-nelem" "10000" " -hash " "get_h_nelem" } + { "-pagesize" "4096" "" "get_pagesize" } + { "-extent" "4" "-queue" "get_q_extentsize" } + { "-len" "20" "-recno" "get_re_len" } + { "-pad" "0" "-recno" "get_re_pad" } + { "-source" "include.tcl" "-recno" "get_re_source" } + } + + set o "berkdb_open_noerr -create -mode 0644" + foreach item $olist { + cleanup $testdir NULL + set flag [lindex $item 0] + set flagval [lindex $item 1] + set method [lindex $item 2] + if { $method == "" } { + set method " -btree " + } + set getter [lindex $item 3] + + puts "\t\tEnv007.h: $flag $flagval $method" + + # Check that open is successful with the flag. + # The option -cachesize requires grouping for $flagval. + if { $flag == "-cachesize" } { + set ret [catch {eval $o $method $flag {$flagval}\ + $testdir/a.db} db] + } else { + set ret [catch {eval $o $method $flag $flagval\ + $testdir/a.db} db] + } + if { $ret != 0 } { + # If the open failed, it may be because we're on a + # platform such as HP-UX 10 that won't support + # locks in process-local memory. + # Verify that the return value was EOPNOTSUPP + # and bail gracefully. + error_check_good \ + is_thread_test [is_substr $flagval -thread] 1 + error_check_good returned_error [expr \ + [is_substr $errorCode EINVAL] || \ + [is_substr $errorCode EOPNOTSUPP]] 1 + puts "Warning: platform does not support\ + locks inside process-local memory." + puts "Skipping test of -thread flag." + } else { + error_check_good dbopen:0 [is_valid_db $db] TRUE + + # Check that getter retrieves the correct value. + # Cachesizes under 500MB are adjusted upward to + # about 25% so just make sure we're in the right + # ballpark, between 1.2 and 1.3 of the original value. + if { $flag == "-cachesize" } { + set retval [eval $db $getter] + set retbytes [lindex $retval 1] + set setbytes [lindex $flagval 1] + error_check_good cachesize_low [expr\ + $retbytes > [expr $setbytes * 6 / 5]] 1 + error_check_good cachesize_high [expr\ + $retbytes < [expr $setbytes * 13 / 10]] 1 + } else { + error_check_good get_flagval \ + [eval $db $getter] $flagval + } + error_check_good dbclose:0 [$db close] 0 + } + } + + puts "\tEnv007.i: Test berkdb_open -rdonly." + # This test is done separately because -rdonly can only be specified + # on an already existing database. + set flag "-rdonly" + set db [eval berkdb_open $flag $testdir/a.db] + error_check_good open_rdonly [is_valid_db $db] TRUE + + error_check_good get_rdonly [eval $db get_open_flags] $flag + error_check_good dbclose:0 [$db close] 0 + + puts "\tEnv007.j: Test berkdb open flags and getters\ + requiring environments." + # Check options that we configure with berkdb open and + # query via getters. Structure of the list is: + # 0. Flag used in berkdb open command + # 1. Value specified to flag + # 2. Specific method, if needed + # 3. Arg used in getter + # 4. Additional flags needed in setting up env + + set elist { + { "" "-auto_commit" "" "get_open_flags" "" } + { "" "-notdurable" "" "get_flags" "" } + } + + if { $has_crypto == 1 } { + lappend elist \ + { "" "-encrypt" "" "get_flags" "-encryptaes $passwd" } + } + + set e "berkdb_env -create -home $testdir -txn " + set o "berkdb_open -create -btree -mode 0644 " + foreach item $elist { + env_cleanup $testdir + set flag [lindex $item 0] + set flagval [lindex $item 1] + set method [lindex $item 2] + if { $method == "" } { + set method " -btree " + } + set getter [lindex $item 3] + set envflag [lindex $item 4] + + puts "\t\tEnv007.j: $flag $flagval" + + # Check that open is successful with the flag. + set env [eval $e $envflag] + set db [eval $o -env $env $flag $flagval a.db] + error_check_good dbopen:0 [is_valid_db $db] TRUE + + # Check that getter retrieves the correct value + set get_flagval [eval $db $getter] + error_check_good get_flagval [is_substr $get_flagval $flagval] 1 + error_check_good dbclose [$db close] 0 + error_check_good envclose [$env close] 0 + } + + puts "\tEnv007.k: Test berkdb_open DB_TXN_NOSYNC and DB_TXN_WRITE_NOSYNC." + # Test all combinations of DB_TXN_NOSYNC and DB_TXN_WRITE_NOSYNC. If we're + # setting both of them, the previous setting would be cleared. + set cfglist { + { "db_txn_nosync" "on" "db_txn_write_nosync" "on" "-nosync" "0" "-wrnosync" "1"} + { "db_txn_nosync" "off" "db_txn_write_nosync" "on" "-nosync" "0" "-wrnosync" "1"} + { "db_txn_nosync" "on" "db_txn_write_nosync" "off" "-nosync" "1" "-wrnosync" "0"} + { "db_txn_nosync" "off" "db_txn_write_nosync" "off" "-nosync" "0" "-wrnosync" "0"} + { "db_txn_write_nosync" "on" "db_txn_nosync" "on" "-wrnosync" "0" "-nosync" "1"} + { "db_txn_write_nosync" "off" "db_txn_nosync" "on" "-wrnosync" "0" "-nosync" "1"} + { "db_txn_write_nosync" "on" "db_txn_nosync" "off" "-wrnosync" "1" "-nosync" "0"} + { "db_txn_write_nosync" "off" "db_txn_nosync" "off" "-wrnosync" "0" "-nosync" "0"} + } + + foreach item $cfglist { + env_cleanup $testdir + set cfg1 [lindex $item 0] + set val1 [lindex $item 1] + set cfg2 [lindex $item 2] + set val2 [lindex $item 3] + set chk_cfg1 [lindex $item 4] + set chk_val1 [lindex $item 5] + set chk_cfg2 [lindex $item 6] + set chk_val2 [lindex $item 7] + + env007_append_config "w" "set_flags" "$cfg1" "$val1" + env007_append_config "a" "set_flags" "$cfg2" "$val2" + + set env [eval $e] + error_check_good envopen:1 [is_valid_env $env] TRUE + + # Check flags + set flags [eval $env "get_flags"] + error_check_good flag_found [is_substr $flags $chk_cfg1] $chk_val1 + error_check_good flag_found [is_substr $flags $chk_cfg2] $chk_val2 + error_check_good envclose [$env close] 0 + } +} + +proc env007_check { env statcmd statstr testval } { + set stat [$env $statcmd] + set checked 0 + foreach statpair $stat { + if {$checked == 1} { + break + } + set statmsg [lindex $statpair 0] + set statval [lindex $statpair 1] + if {[is_substr $statmsg $statstr] != 0} { + set checked 1 + error_check_good $statstr:ck $statval $testval + } + } + error_check_good $statstr:test $checked 1 +} + +proc env007_make_config { carg cval } { + global testdir + + set cid [open $testdir/DB_CONFIG w] + puts $cid "$carg $cval" + close $cid +} + +proc env007_append_config { mode carg cval onoff } { + global testdir + + set cid [open $testdir/DB_CONFIG $mode] + puts $cid "$carg $cval $onoff" + close $cid +} + +proc env007_eval_env { e } { + eval $e +} diff --git a/test/tcl/env007script.tcl b/test/tcl/env007script.tcl new file mode 100644 index 00000000..9f4aa4c4 --- /dev/null +++ b/test/tcl/env007script.tcl @@ -0,0 +1,37 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# env007script - for use with env007. +# Usage: configarg configval getter getval +# + +source ./include.tcl + +set usage "env007script configarg configval getter getval" + +# Verify usage +if { $argc != 4 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set configarg [lindex $argv 0] +set configval [lindex $argv 1] +set getter [lindex $argv 2] +set getval [lindex $argv 3] + +set e "berkdb_env_noerr -create -mode 0644 -home $testdir -txn" + +env007_make_config $configarg $configval + +# Verify using config file +set dbenv [eval $e] +error_check_good envvalid:1 [is_valid_env $dbenv] TRUE +set db [berkdb_open_noerr -create -env $dbenv -btree env007script.db] +error_check_good dbvalid:1 [is_valid_db $db] TRUE +error_check_good dbclose [$db close] 0 +error_check_good getter:1 [eval $dbenv $getter] $getval +error_check_good envclose:1 [$dbenv close] 0 + diff --git a/test/tcl/env008.tcl b/test/tcl/env008.tcl new file mode 100644 index 00000000..c84e7dc5 --- /dev/null +++ b/test/tcl/env008.tcl @@ -0,0 +1,72 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env008 +# TEST Test environments and subdirectories. +proc env008 { } { + global errorInfo + global errorCode + + source ./include.tcl + + env_cleanup $testdir + + set subdir 1/1 + set subdir1 1/2 + file mkdir $testdir/$subdir $testdir/$subdir1 + set testfile $subdir/env.db + + puts "Env008: Test of environments and subdirectories." + + puts "\tEnv008.a: Create env and db." + set env [berkdb_env -create -mode 0644 -home $testdir -txn] + error_check_good env [is_valid_env $env] TRUE + + puts "\tEnv008.b: Remove db in subdir." + env008_db $env $testfile + error_check_good dbremove:$testfile \ + [berkdb dbremove -env $env $testfile] 0 + + # + # Rather than remaking the db every time for the renames + # just move around the new file name to another new file + # name. + # + puts "\tEnv008.c: Rename db in subdir." + env008_db $env $testfile + set newfile $subdir/new.db + error_check_good dbrename:$testfile/.. \ + [berkdb dbrename -env $env $testfile $newfile] 0 + set testfile $newfile + + puts "\tEnv008.d: Rename db to parent dir." + set newfile $subdir/../new.db + error_check_good dbrename:$testfile/.. \ + [berkdb dbrename -env $env $testfile $newfile] 0 + set testfile $newfile + + puts "\tEnv008.e: Rename db to child dir." + set newfile $subdir/env.db + error_check_good dbrename:$testfile/.. \ + [berkdb dbrename -env $env $testfile $newfile] 0 + set testfile $newfile + + puts "\tEnv008.f: Rename db to another dir." + set newfile $subdir1/env.db + error_check_good dbrename:$testfile/.. \ + [berkdb dbrename -env $env $testfile $newfile] 0 + + error_check_good envclose [$env close] 0 + puts "\tEnv008 complete." +} + +proc env008_db { env testfile } { + set db [berkdb_open -env $env -create -btree $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + set ret [$db put key data] + error_check_good dbput $ret 0 + error_check_good dbclose [$db close] 0 +} diff --git a/test/tcl/env009.tcl b/test/tcl/env009.tcl new file mode 100644 index 00000000..6fd4e6c4 --- /dev/null +++ b/test/tcl/env009.tcl @@ -0,0 +1,81 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env009 +# TEST Test calls to all the various stat functions. We have several +# TEST sprinkled throughout the test suite, but this will ensure that +# TEST we run all of them at least once. +proc env009 { } { + source ./include.tcl + + puts "Env009: Various stat functions test." + + env_cleanup $testdir + puts "\tEnv009.a: Setting up env and a database." + + set e [berkdb_env -create -home $testdir -txn] + error_check_good dbenv [is_valid_env $e] TRUE + set dbbt [berkdb_open -create -btree $testdir/env009bt.db] + error_check_good dbopen [is_valid_db $dbbt] TRUE + set dbh [berkdb_open -create -hash $testdir/env009h.db] + error_check_good dbopen [is_valid_db $dbh] TRUE + set dbq [berkdb_open -create -queue $testdir/env009q.db] + error_check_good dbopen [is_valid_db $dbq] TRUE + + puts "\tEnv009.b: Setting up replication master and client envs." + replsetup $testdir/MSGQUEUEDIR + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + file mkdir $masterdir + file mkdir $clientdir + + repladd 1 + set repenv(M) [berkdb_env -create -home $masterdir \ + -txn -rep_master -rep_transport [list 1 replsend]] + repladd 2 + set repenv(C) [berkdb_env -create -home $clientdir \ + -txn -rep_client -rep_transport [list 2 replsend]] + + set rlist { + { "lock_stat" "Maximum locks" "Env009.c" $e } + { "log_stat" "Magic" "Env009.d" "$e" } + { "mpool_stat" "Number of caches" "Env009.e" "$e"} + { "txn_stat" "Maximum txns" "Env009.f" "$e" } + { "rep_stat" "{Environment ID} 1" "Env009.g (Master)" "$repenv(M)"} + { "rep_stat" "{Environment ID} 2" "Env009.h (Client)" "$repenv(C)"} + } + + foreach set $rlist { + set cmd [lindex $set 0] + set str [lindex $set 1] + set msg [lindex $set 2] + set env [lindex $set 3] + puts "\t$msg: $cmd" + set ret [eval $env $cmd] + error_check_good $cmd [is_substr $ret $str] 1 + } + + puts "\tEnv009.i: btree stats" + set ret [$dbbt stat] + error_check_good $cmd [is_substr $ret "Leaf pages"] 1 + + puts "\tEnv009.j: hash stats" + set ret [$dbh stat] + error_check_good $cmd [is_substr $ret "Buckets"] 1 + + puts "\tEnv009.k: queue stats" + set ret [$dbq stat] + error_check_good $cmd [is_substr $ret "Extent size"] 1 + + # Clean up. + error_check_good dbclose [$dbbt close] 0 + error_check_good dbclose [$dbh close] 0 + error_check_good dbclose [$dbq close] 0 + error_check_good masterenvclose [$repenv(M) close] 0 + error_check_good clientenvclose [$repenv(C) close] 0 + replclose $testdir/MSGQUEUEDIR + error_check_good envclose [$e close] 0 +} diff --git a/test/tcl/env010.tcl b/test/tcl/env010.tcl new file mode 100644 index 00000000..a779ff96 --- /dev/null +++ b/test/tcl/env010.tcl @@ -0,0 +1,49 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env010 +# TEST Run recovery in an empty directory, and then make sure we can still +# TEST create a database in that directory. +proc env010 { } { + source ./include.tcl + + puts "Env010: Test of recovery in an empty directory." + + # Create a new directory used only for this test + + if { [file exists $testdir/EMPTYDIR] != 1 } { + file mkdir $testdir/EMPTYDIR + } else { + puts "\nDirectory already exists." + } + + # Do the test twice, for regular recovery and catastrophic + # Open environment and recover, but don't create a database + + foreach rmethod {recover recover_fatal} { + + puts "\tEnv010: Creating env for $rmethod test." + env_cleanup $testdir/EMPTYDIR + set e [berkdb_env \ + -create -home $testdir/EMPTYDIR -txn -$rmethod] + error_check_good dbenv [is_valid_env $e] TRUE + + # Open and close a database + # The method doesn't matter, so picked btree arbitrarily + + set db [eval {berkdb_open -env $e \ + -btree -create -mode 0644} ] + error_check_good dbopen [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + + # Close environment + + error_check_good envclose [$e close] 0 + error_check_good berkdb:envremove \ + [berkdb envremove -home $testdir/EMPTYDIR] 0 + } + puts "\tEnv010 complete." +} diff --git a/test/tcl/env011.tcl b/test/tcl/env011.tcl new file mode 100644 index 00000000..fc8266c8 --- /dev/null +++ b/test/tcl/env011.tcl @@ -0,0 +1,38 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env011 +# TEST Run with region overwrite flag. +proc env011 { } { + source ./include.tcl + + puts "Env011: Test of region overwriting." + env_cleanup $testdir + + puts "\tEnv011: Creating/closing env for open test." + set e [berkdb_env -create -overwrite -home $testdir -txn] + error_check_good dbenv [is_valid_env $e] TRUE + set db [eval \ + {berkdb_open -auto_commit -env $e -btree -create -mode 0644} ] + error_check_good dbopen [is_valid_db $db] TRUE + set ret [eval {$db put} "aaa" "data"] + error_check_good put $ret 0 + set ret [eval {$db put} "bbb" "data"] + error_check_good put $ret 0 + error_check_good db_close [$db close] 0 + error_check_good envclose [$e close] 0 + + puts "\tEnv011: Opening the environment with overwrite set." + set e [berkdb_env -create -overwrite -home $testdir -txn -recover] + error_check_good dbenv [is_valid_env $e] TRUE + error_check_good envclose [$e close] 0 + + puts "\tEnv011: Removing the environment with overwrite set." + error_check_good berkdb:envremove \ + [berkdb envremove -home $testdir -overwrite] 0 + + puts "\tEnv011 complete." +} diff --git a/test/tcl/env012.tcl b/test/tcl/env012.tcl new file mode 100644 index 00000000..9ef24257 --- /dev/null +++ b/test/tcl/env012.tcl @@ -0,0 +1,393 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env012 +# TEST Test DB_REGISTER. +# TEST +# TEST DB_REGISTER will fail on systems without fcntl. If it +# TEST fails, make sure we got the expected DB_OPNOTSUP return. +# TEST +# TEST Then, the real tests: +# TEST For each test, we start a process that opens an env with -register. +# TEST +# TEST 1. Verify that a 2nd process can enter the existing env with -register. +# TEST +# TEST 2. Kill the 1st process, and verify that the 2nd process can enter +# TEST with "-register -recover". +# TEST +# TEST 3. Kill the 1st process, and verify that the 2nd process cannot +# TEST enter with just "-register". +# TEST +# TEST 4. While the 1st process is still running, a 2nd process enters +# TEST with "-register". Kill the 1st process. Verify that a 3rd process +# TEST can enter with "-register -recover". Verify that the 3rd process, +# TEST entering, causes process 2 to fail with the message DB_RUNRECOVERY. +# TEST +# TEST 5. We had a bug where recovery was always run with -register +# TEST if there were empty slots in the process registry file. Verify +# TEST that recovery doesn't automatically run if there is an empty slot. +# TEST +# TEST 6. Verify process cannot connect when specifying -failchk and an +# TEST isalive function has not been declared. +# TEST +# TEST 7. Verify that a 2nd process can enter the existing env with -register +# TEST and -failchk and having specified an isalive function +# TEST +# TEST 8. Kill the 1st process, and verify that the 2nd process can enter +# TEST with "-register -failchk -recover" +# TEST +# TEST 9. 2nd process enters with "-register -failchk". Kill the 1st process. +# TEST 2nd process may get blocked on a mutex held by process one. Verify +# TEST 3rd process can enter with "-register -recover -failchk". 3rd process +# TEST should run failchk, clear out open txn/log from process 1. It will +# TEST enter env without need for any additional recovery. We look for +# TEST "Freeing log information .." sentence in the log for 3rd process as +# TEST an indication that failchk ran. If DB_RUNRECOVERY were returned +# TEST instead it would mean failchk could not recover. + +proc env012 { } { + source ./include.tcl + set tnum "012" + + puts "Env$tnum: Test of DB_REGISTER." + + puts "\tEnv$tnum.a: Platforms without fcntl fail with DB_OPNOTSUP." + env_cleanup $testdir + if {[catch {eval {berkdb_env} \ + -create -home $testdir -txn -register -recover} env]} { + error_check_good fail_OPNOTSUP [is_substr $env DB_OPNOTSUP] 1 + puts "Skipping env$tnum; DB_REGISTER is not supported." + } + error_check_good env_close [$env close] 0 + + puts "\tEnv$tnum.b: Second process can join with -register." + env_cleanup $testdir + set testfile TESTFILE + set key KEY + set data DATA1 + + puts "\t\tEnv$tnum.b1: Start process 1." + set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p1 \ + $testdir $testfile PUT $key $data RECOVER 0 10 &] + + # Wait a while so process 1 has a chance to get going. + tclsleep 2 + + puts "\t\tEnv$tnum.b2: Start process 2." + set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p2 \ + $testdir $testfile GET $key $data 0 0 0 &] + + watch_procs $p1 1 120 + watch_procs $p2 1 120 + + # Check log files for failures. + logcheck $testdir/env$tnum.log.p1 + logcheck $testdir/env$tnum.log.p2 + + puts "\tEnv$tnum.c: Second process can join with -register\ + -recover after first process is killed." + env_cleanup $testdir + + puts "\t\tEnv$tnum.c1: Start process 1." + set pids {} + set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p1 \ + $testdir $testfile PUT $key $data RECOVER 0 10 &] + lappend pids $p1 + tclsleep 2 + + puts "\t\tEnv$tnum.c2: Kill process 1." + set pids [findprocessids $testdir $pids] + foreach pid $pids { + tclkill $pid + } + + puts "\t\tEnv$tnum.c3: Start process 2." + set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p2 \ + $testdir $testfile GET $key $data RECOVER 0 0 &] + + watch_procs $p2 1 120 + + # Check log files for failures. + logcheck $testdir/env$tnum.log.p1 + logcheck $testdir/env$tnum.log.p2 + + if { $is_windows_test == 1 } { + puts "Skipping sections .d and .e on Windows platform." + } else { + puts "\tEnv$tnum.d: Second process cannot join without -recover\ + after first process is killed." + env_cleanup $testdir + + puts "\t\tEnv$tnum.d1: Start process 1." + set pids {} + set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p1 \ + $testdir $testfile PUT $key $data RECOVER 0 10 &] + lappend pids $p1 + tclsleep 2 + + puts "\t\tEnv$tnum.d2: Kill process 1." + set pids [findprocessids $testdir $pids] + foreach pid $pids { + tclkill $pid + } + + puts "\t\tEnv$tnum.d3: Start process 2." + set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p2 \ + $testdir $testfile GET $key $data 0 0 0 &] + tclsleep 2 + watch_procs $p2 1 120 + + # Check log files. Log p1 should be clean, but we + # expect DB_RUNRECOVERY in log p2. + logcheck $testdir/env$tnum.log.p1 + logcheckfails $testdir/env$tnum.log.p2 DB_RUNRECOVERY + + puts "\tEnv$tnum.e: Running registered process detects failure." + env_cleanup $testdir + + puts "\t\tEnv$tnum.e1: Start process 1." + set pids {} + set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p1 \ + $testdir $testfile PUT $key $data RECOVER 0 10 &] + lappend pids $p1 + tclsleep 2 + + # Identify child process to kill later. + set pids [findprocessids $testdir $pids] + + puts "\t\tEnv$tnum.e2: Start process 2." + set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p2 \ + $testdir $testfile LOOP $key $data 0 0 10 &] + + puts "\t\tEnv$tnum.e3: Kill process 1." + foreach pid $pids { + tclkill $pid + } + + puts "\t\tEnv$tnum.e4: Start process 3." + set p3 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p3 \ + $testdir $testfile GET $key $data RECOVER 0 0 &] + tclsleep 2 + + watch_procs $p2 1 120 + watch_procs $p3 1 120 + + # Check log files. Logs p1 and p3 should be clean, but we + # expect DB_RUNRECOVERY in log p2. + logcheck $testdir/env$tnum.log.p1 + logcheckfails $testdir/env$tnum.log.p2 DB_RUNRECOVERY + logcheck $testdir/env$tnum.log.p3 + } + + puts "\tEnv$tnum.f: Empty slot shouldn't cause automatic recovery." + + # Create 2 empty slots in the registry by letting two processes + # run to completion. + puts "\t\tEnv$tnum.f1: Start process 1." + set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p1 \ + $testdir $testfile PUT $key $data RECOVER 0 1 &] + + puts "\t\tEnv$tnum.f2: Start process 2." + set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p2 \ + $testdir $testfile GET $key $data 0 0 1 &] + + watch_procs $p1 1 60 + watch_procs $p2 1 60 + + logcheck $testdir/env$tnum.log.p1 + logcheck $testdir/env$tnum.log.p2 + + # Start two more process. Neither should signal a need for recovery. + puts "\t\tEnv$tnum.f3: Start process 3." + set p3 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p3 \ + $testdir $testfile GET $key $data RECOVER 0 10 &] + + tclsleep 2 + + puts "\t\tEnv$tnum.f4: Start process 4." + set p4 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p4 \ + $testdir $testfile PUT $key $data 0 0 10 &] + + watch_procs $p3 1 120 + watch_procs $p4 1 120 + + # Check log files: neither process should have returned DB_RUNRECOVERY. + logcheck $testdir/env$tnum.log.p3 + logcheck $testdir/env$tnum.log.p4 + + puts "\tEnv$tnum.g: One process with -register & -failchk & no isalive" + + # use -failchk only, test will fail as isalive function is needed + puts "\t\tEnv$tnum.g1: Start process 1." + env_cleanup $testdir + + set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p1 \ + $testdir $testfile PUT $key $data RECOVER FAILCHK0 10 &] + + watch_procs $p1 1 60 + + # Check log files for failures. Expect to see a failure. + logcheckfails $testdir/env$tnum.log.p1 DB_FAILCHK + + puts "\tEnv$tnum.h: Second process joins with -register and -failchk." + env_cleanup $testdir + + # use -failchk and -isalive flags + puts "\t\tEnv$tnum.h1: Start process 1." + set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p1 \ + $testdir $testfile PUT $key $data RECOVER FAILCHK1 10 &] + + # Wait a while so process 1 has a chance to get going. + tclsleep 2 + + puts "\t\tEnv$tnum.h2: Start process 2." + set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p2 \ + $testdir $testfile GET $key $data 0 FAILCHK1 0 &] + + watch_procs $p1 1 120 + watch_procs $p2 1 120 + + # Check log files for failures. + logcheck $testdir/env$tnum.log.p1 + logcheck $testdir/env$tnum.log.p2 + + puts "\tEnv$tnum.i: Second process can join with -register\ + -recover -failchk after first process is killed." + env_cleanup $testdir + + puts "\t\tEnv$tnum.i1: Start process 1." + set pids {} + set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p1 \ + $testdir $testfile PUT $key $data RECOVER FAILCHK1 10 &] + lappend pids $p1 + tclsleep 2 + + puts "\t\tEnv$tnum.i2: Kill process 1." + set pids [findprocessids $testdir $pids] + foreach pid $pids { + tclkill $pid + } + + puts "\t\tEnv$tnum.i3: Start process 2." + set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p2 \ + $testdir $testfile GET $key $data RECOVER FAILCHK1 0 &] + + watch_procs $p2 1 120 + + # Check log files for failures. + logcheck $testdir/env$tnum.log.p1 + logcheck $testdir/env$tnum.log.p2 + + if { $is_windows_test == 1 } { + puts "Skipping sections .j on Windows platform." + } else { + puts "\tEnv$tnum.j: Running registered process detects failure and recovers." + env_cleanup $testdir + + puts "\t\tEnv$tnum.j1: Start process 1." + set pids {} + set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p1 \ + $testdir $testfile LOOP $key $data RECOVER FAILCHK1 5 &] + lappend pids $p1 + tclsleep 2 + + # Identify child process to kill later. + set pids [findprocessids $testdir $pids] + + puts "\t\tEnv$tnum.j2: Start process 2." + set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p2 \ + $testdir $testfile LOOP $key $data 0 0 10 &] + + puts "\t\tEnv$tnum.j3: Kill process 1." + foreach pid $pids { + tclkill $pid + } + + #identify child process 2, do after process 1 has died + lappend pids $p2 + + # Identify child process to kill later. + set pids [findprocessids $testdir $pids] + + puts "\t\tEnv$tnum.j4: Start process 3." + set p3 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \ + $testdir/env$tnum.log.p3 \ + $testdir $testfile GET $key $data RECOVER FAILCHK1 0 &] + #sleep for approx 20 seconds -- process 2 should still be going + tclsleep 20 + + puts "\t\tEnv$tnum.j5: Kill process 2." + foreach pid $pids { + tclkill $pid + } + + watch_procs $p3 1 30 + + # Check log files. Logs p1 and p2 should be clean, but we + # expect failchk messages in p3 from cleanup + logcheckfails $testdir/env$tnum.log.p3 Freeing + logcheck $testdir/env$tnum.log.p2 + logcheck $testdir/env$tnum.log.p1 + } +} + +# Check log file and report failures with FAIL. Use this when +# we don't expect failures. +proc logcheck { logname } { + set errstrings [eval findfail $logname] + foreach errstring $errstrings { + puts "FAIL: error in $logname : $errstring" + } +} + +# When we expect a failure, verify we find the one we expect. +proc logcheckfails { logname message } { + set f [open $logname r] + while { [gets $f line] >= 0 } { + if { [is_substr $line $message] == 1 } { + close $f + return 0 + } + } + close $f + puts "FAIL: Did not find expected error $message." +} + +# The script wrap.tcl creates a parent and a child process. We +# can't see the child pids, so find them by their sentinel files. +# This creates a list where the parent pid is always listed +# before the child pid. +proc findprocessids { testdir plist } { + set beginfiles [glob $testdir/begin.*] + foreach b $beginfiles { + regsub $testdir/begin. $b {} pid + if { [lsearch -exact $plist $pid] == -1 } { + lappend plist $pid + } + } + return $plist +} + diff --git a/test/tcl/env013.tcl b/test/tcl/env013.tcl new file mode 100644 index 00000000..985b5731 --- /dev/null +++ b/test/tcl/env013.tcl @@ -0,0 +1,84 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env013 +# TEST Test of basic functionality of fileid_reset. +# TEST +# TEST Create a database in an env. Copy it to a new file within +# TEST the same env. Reset the file id and make sure it has changed. +proc env013 { } { + source ./include.tcl + global util_path + + puts "Env013: Test fileid_reset." + + set testfile A.db + set dupfile B.db + set nentries 500 + set filenames "A B C D E" + + foreach lorder { 1234 4321 } { + puts "\tEnv013.a: Creating env." + env_cleanup $testdir + set env [berkdb_env -create -home $testdir -txn] + error_check_good dbenv [is_valid_env $env] TRUE + + # Open database A, populate and close. + puts "\tEnv013.b: Creating database with lorder $lorder." + foreach filename $filenames { + set db [eval {berkdb_open \ + -pagesize 8192 -env $env -auto_commit \ + -btree -create -mode 0644 $testfile $filename} ] + error_check_good dbopen [is_valid_db $db] TRUE + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + for { set i 0 } { $i < $nentries } { incr i } { + set key KEY.$i + set data DATA.$i + error_check_good\ + db_put [$db put -txn $t $key $data] 0 + } + error_check_good txn_commit [$t commit] 0 + error_check_good db_close [$db close] 0 + } + + # Copy database file A to database file B for fileid testing. + puts "\tEnv013.c: Copy database." + file copy -force $testdir/$testfile $testdir/$dupfile + + # Reset B's fileid and confirm the ID has changed. + puts "\tEnv013.d: Resetting file id for copied database." + error_check_good fileid_reset [$env id_reset $dupfile] 0 + set orig_id [getfileid $testdir/$testfile] + puts "\tEnv013.d: orig: $orig_id" + set new_id [getfileid $testdir/$dupfile] + puts "\tEnv013.d: new: $new_id" + error_check_bad id_changed $orig_id $new_id + + # Verify and open B. + puts "\tEnv013.e: Verify and open database copy." + error_check_good verify [verify_dir $testdir "\tEnv013.e: "] 0 + set db [eval {berkdb_open} \ + -env $env -auto_commit -btree -mode 0644 -rdonly $dupfile] + error_check_good dup_open [is_valid_db $db] TRUE + + # Clean up. + error_check_good db_close [$db close] 0 + error_check_good env_close [$env close] 0 + } +} + +# Get file id number, identified as "uid" in db_stat. +proc getfileid { db } { + global util_path + + set ret [exec $util_path/db_dump -da $db] + set uidstart [string first "uid:" $ret] + set uidend [string first "\tminkey:" $ret] + set uid [string range $ret $uidstart $uidend] + set uid [string trimright $uid] + return $uid +} diff --git a/test/tcl/env014.tcl b/test/tcl/env014.tcl new file mode 100644 index 00000000..7ef06658 --- /dev/null +++ b/test/tcl/env014.tcl @@ -0,0 +1,117 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env014 +# TEST +# TEST Make sure that attempts to open an environment with +# TEST incompatible flags (e.g. replication without transactions) +# TEST fail with the appropriate messages. +# TEST +# TEST A new thread of control joining an env automatically +# TEST initializes the same subsystems as the original env. +# TEST Make sure that the attempt to change subsystems when +# TEST joining an env fails with the appropriate messages. + +proc env014 { } { + source ./include.tcl + + set tnum "014" + puts "Env$tnum: Environment subsystem initialization and env joins." + env_cleanup $testdir + + # Open an env with -recover but not -create; should fail. + puts "\tEnv$tnum.a: Open env with -recover but not -create." + catch {set env [berkdb_env_noerr -recover -txn -home $testdir]} ret + error_check_good recover_wo_create \ + [is_substr $ret "requires the create flag"] 1 + + # Open an env with -recover but not -txn; should fail. + puts "\tEnv$tnum.b: Open env with -recover but not -txn." + catch {set env [berkdb_env_noerr -create -recover -home $testdir]} ret + error_check_good recover_wo_txn \ + [is_substr $ret "requires transaction support"] 1 + + # Open an env with -replication but not -lock; should fail. + puts "\tEnv$tnum.c: Open env with -rep but not -lock." + catch {set env\ + [berkdb_env_noerr -create -rep_master -home $testdir]} ret + error_check_good rep_wo_lock \ + [is_substr $ret "requires locking support"] 1 + + # Open an env with -replication but not -txn; should fail. + puts "\tEnv$tnum.d: Open env with -rep but not -txn." + catch {set env\ + [berkdb_env_noerr -create -rep_master -lock -home $testdir]} ret + error_check_good rep_wo_txn \ + [is_substr $ret "requires transaction support"] 1 + + # Skip remainder of test for HP-UX; HP-UX does not allow + # opening a second handle on an environment. + if { $is_hp_test == 1 } { + puts "Skipping remainder of env$tnum for HP-UX." + return + } + + # Join -txn env with -cdb; should fail. + puts "\tEnv$tnum.e: Join -txn env with -cdb." + set env [berkdb_env_noerr -create -home $testdir -txn] + error_check_good env_open [is_valid_env $env] TRUE + + catch {set env2 [berkdb_env_noerr -home $testdir -cdb]} ret + error_check_good txn+cdb [is_substr $ret "incompatible"] 1 + error_check_good env_close [$env close] 0 + error_check_good env_remove [berkdb envremove -force -home $testdir] 0 + + # Join -cdb env with -txn; should fail. + puts "\tEnv$tnum.f: Join -cdb env with -txn." + set env [berkdb_env_noerr -create -home $testdir -cdb] + error_check_good env_open [is_valid_env $env] TRUE + + catch {set env2 [berkdb_env_noerr -home $testdir -txn]} ret + error_check_good cdb+txn [is_substr $ret "incompatible"] 1 + error_check_good env_close [$env close] 0 + error_check_good env_remove [berkdb envremove -force -home $testdir] 0 + + # Open an env with -txn. Join the env, and start a txn. + puts "\tEnv$tnum.g: Join -txn env, and start a txn." + set env [berkdb_env_noerr -create -home $testdir -txn] + error_check_good env_open [is_valid_env $env] TRUE + set env2 [berkdb_env_noerr -home $testdir] + error_check_good env2_open [is_valid_env $env2] TRUE + + set txn [$env2 txn] + error_check_good env2_txn [is_valid_txn $txn $env2] TRUE + error_check_good txn_commit [$txn commit] 0 + + error_check_good env2_close [$env2 close] 0 + error_check_good env_close [$env close] 0 + error_check_good env_remove [berkdb envremove -force -home $testdir] 0 + + # Join -txn env with -lock; should succeed and use txns. + puts "\tEnv$tnum.h: Join -txn env with -lock, and start a txn." + set env [berkdb_env_noerr -create -home $testdir -txn] + error_check_good env_open [is_valid_env $env] TRUE + set env2 [berkdb_env_noerr -home $testdir -lock] + error_check_good env2_open [is_valid_env $env2] TRUE + + set txn [$env2 txn] + error_check_good env2_txn [is_valid_txn $txn $env2] TRUE + error_check_good txn_commit [$txn commit] 0 + + error_check_good env2_close [$env2 close] 0 + error_check_good env_close [$env close] 0 + error_check_good env_remove [berkdb envremove -force -home $testdir] 0 + + # Join plain vanilla env with -txn; should fail. + puts "\tEnv$tnum.i: Join plain vanilla env with -txn." + set env [berkdb_env_noerr -create -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + catch {set env2 [berkdb_env_noerr -home $testdir -txn]} ret + error_check_good ds+txn [is_substr $ret "incompatible"] 1 + + error_check_good env_close [$env close] 0 + error_check_good env_remove [berkdb envremove -force -home $testdir] 0 +} diff --git a/test/tcl/env015.tcl b/test/tcl/env015.tcl new file mode 100644 index 00000000..237184fd --- /dev/null +++ b/test/tcl/env015.tcl @@ -0,0 +1,85 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env015 +# TEST Rename the underlying directory of an env, make sure everything +# TEST still works. Test runs with regular named databases and with +# TEST in-memory named databases. +proc env015 { } { + source ./include.tcl + + env_cleanup $testdir + set newdir NEWDIR + + puts "Env015: Test of renaming env directories." + + foreach dbtype { inmem ondisk } { + puts "\tEnv015.a: Create env." + set env [berkdb_env -create -mode 0644 -home $testdir] + error_check_good env [is_valid_env $env] TRUE + + puts "\tEnv015.b: Create $dbtype db." + if { $dbtype == "inmem" } { + set testfile { "" file1.db } + } else { + set testfile file1.db + } + set db [eval {berkdb_open} -create -env $env -btree $testfile] + error_check_good db_open [is_valid_db $db] TRUE + for { set i 0 } { $i < 10 } { incr i } { + error_check_good db_put [$db put $i $i] 0 + } + + # When the database is on disk, we have a file handle open + # during the attempt to rename the directory. As far as we + # can tell, Windows doesn't allow this (that is, Windows + # doesn't allow directories to be renamed when there is an + # open handle inside them). For QNX, tclsh can not rename a + # directory correctly while there are shared memory files in + # that directory. + puts "\tEnv015.b: Rename directory." + if { $is_windows_test || $is_qnx_test } { + file mkdir $newdir + eval file rename -force [glob $testdir/*] $newdir + fileremove -force $testdir + } else { + file rename -force $testdir $newdir + } + + puts "\tEnv015.c: Database is still available in new directory." + for { set i 0 } { $i < 10 } { incr i } { + set ret [$db get $i] + error_check_good db_get [lindex [lindex $ret 0] 1] $i + } + + puts "\tEnv015.d: Can't open database in old directory." + catch {set db2 [eval \ + {berkdb_open} -env $env -btree $testdir/$testfile]} db2 + error_check_bad open_fails [is_valid_db $db2] TRUE + + puts \ + "\tEnv015.e: Recreate directory with original name and use it." + file mkdir $testdir + set newenv [berkdb_env -create -mode 0644 -home $testdir] + error_check_good newenv [is_valid_env $env] TRUE + + set newdb [berkdb_open -create -env $newenv -btree foo.db] + error_check_good newdb_open [is_valid_db $newdb] TRUE + + # There should not be any data in the new db. + for { set i 0 } { $i < 10 } { incr i } { + set ret [$newdb get $i] + error_check_good db_get [llength $ret] 0 + } + + # Clean up. + error_check_good db_close [$db close] 0 + error_check_good newdb_close [$newdb close] 0 + error_check_good envclose [$env close] 0 + error_check_good newenvclose [$newenv close] 0 + fileremove -f $newdir + } +} diff --git a/test/tcl/env016.tcl b/test/tcl/env016.tcl new file mode 100644 index 00000000..efed1b75 --- /dev/null +++ b/test/tcl/env016.tcl @@ -0,0 +1,243 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env016 +# TEST Replication settings and DB_CONFIG +# TEST +# TEST Create a DB_CONFIG for various replication settings. Use +# TEST rep_stat or getter functions to verify they're set correctly. +# +proc env016 { } { + global errorCode + source ./include.tcl + + puts "Env016: Replication DB_CONFIG settings." + + # + # Test options that we query via rep_stat. + # Structure of the list is: + # 0. Arg used in DB_CONFIG. + # 1. Value assigned in DB_CONFIG. + # 2. Message output during test. + # 3. String to search for in stat output. + # + set slist { + { "rep_set_priority" "1" "Env016.a0: Priority" + "Environment priority" } + } + puts "\tEnv016.a: Check settings via rep_stat." + foreach l $slist { + set carg [lindex $l 0] + set val [lindex $l 1] + set msg [lindex $l 2] + set str [lindex $l 3] + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + repladd 1 + + # Open a master. + puts "\t\t$msg" + # + # Create DB_CONFIG. + # + env016_make_config $masterdir $carg $val + # + # Open env. + # + set ma_envcmd "berkdb_env_noerr -create -txn nosync \ + -home $masterdir -errpfx MASTER -rep_master \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd] + # + # Verify value + # + set gval [stat_field $masterenv rep_stat $str] + error_check_good stat_get $gval $val + + error_check_good masterenv_close [$masterenv close] 0 + replclose $testdir/MSGQUEUEDIR + } + + # Test options that we query via getter functions. + # Structure of the list is: + # 0. Arg used in DB_CONFIG. + # 1. Value assigned in DB_CONFIG. + # 2. Message output during test. + # 3. Getter command. + # 4. Getter results expected if different from #1 value. + set glist { + { "rep_set_clockskew" "102 100" "Env016.b0: Rep clockskew" + "rep_get_clockskew" } + { "rep_set_config" "db_rep_conf_autoinit off" + "Env016.b1: Rep config: autoinit" + "rep_get_config autoinit" "0" } + { "rep_set_config" "db_rep_conf_bulk" + "Env016.b1: Rep config: bulk" + "rep_get_config bulk" "1" } + { "rep_set_config" "db_rep_conf_delayclient" + "Env016.b1: Rep config: delayclient" + "rep_get_config delayclient" "1" } + { "rep_set_config" "db_rep_conf_inmem" + "Env016.b1: Rep config: inmem" + "rep_get_config inmem" "1" } + { "rep_set_config" "db_rep_conf_lease" + "Env016.b1: Rep config: lease" + "rep_get_config lease" "1" } + { "rep_set_config" "db_rep_conf_nowait" + "Env016.b1: Rep config: nowait" + "rep_get_config nowait" "1" } + { "rep_set_config" "db_repmgr_conf_elections off" + "Env016.b1: Repmgr config: elections" + "rep_get_config mgrelections" "0" } + { "rep_set_config" "db_repmgr_conf_2site_strict" + "Env016.b1: Repmgr config: 2 site strict" + "rep_get_config mgr2sitestrict" "1" } + { "rep_set_limit" "0 1048576" "Env016.b2: Rep limit" + "rep_get_limit" } + { "rep_set_nsites" "6" "Env016.b3: Rep nsites" + "rep_get_nsites" } + { "rep_set_priority" "1" "Env016.b4: Rep priority" + "rep_get_priority" } + { "rep_set_request" "5000 10000" "Env016.b5: Rep request" + "rep_get_request" } + { "rep_set_timeout" "db_rep_ack_timeout 50000" + "Env016.b6: Rep ack timeout" + "rep_get_timeout ack" "50000" } + { "rep_set_timeout" "db_rep_checkpoint_delay 500000" + "Env016.b6: Rep ckp timeout" + "rep_get_timeout checkpoint_delay" "500000" } + { "rep_set_timeout" "db_rep_connection_retry 500000" + "Env016.b6: Rep connection retry timeout" + "rep_get_timeout connection_retry" "500000" } + { "rep_set_timeout" "db_rep_election_timeout 500000" + "Env016.b6: Rep elect timeout" "rep_get_timeout election" + "500000" } + { "rep_set_timeout" "db_rep_election_retry 100000" + "Env016.b6: Rep election retry timeout" + "rep_get_timeout election_retry" + "100000" } + { "rep_set_timeout" "db_rep_full_election_timeout 500000" + "Env016.b6: Rep full election timeout" + "rep_get_timeout full_election" "500000" } + { "rep_set_timeout" "db_rep_heartbeat_monitor 50000" + "Env016.b6: Rep heartbeat monitor timeout" + "rep_get_timeout heartbeat_monitor" "50000" } + { "rep_set_timeout" "db_rep_heartbeat_send 50000" + "Env016.b6: Rep heartbeat send timeout" + "rep_get_timeout heartbeat_send" "50000" } + { "rep_set_timeout" "db_rep_lease_timeout 500" + "Env016.b6: Rep lease timeout" + "rep_get_timeout lease" "500" } + { "repmgr_set_ack_policy" "db_repmgr_acks_all" + "Env016.b8: Repmgr acks_all" + "repmgr_get_ack_policy" "all" } + { "repmgr_set_ack_policy" "db_repmgr_acks_all_available" + "Env016.b8: Repmgr acks_all_available" + "repmgr_get_ack_policy" "allavailable" } + { "repmgr_set_ack_policy" "db_repmgr_acks_all_peers" + "Env016.b8: Repmgr acks_all_peers" + "repmgr_get_ack_policy" "allpeers" } + { "repmgr_set_ack_policy" "db_repmgr_acks_none" + "Env016.b8: Repmgr acks_none" + "repmgr_get_ack_policy" "none" } + { "repmgr_set_ack_policy" "db_repmgr_acks_one" + "Env016.b8: Repmgr acks_one" + "repmgr_get_ack_policy" "one" } + { "repmgr_set_ack_policy" "db_repmgr_acks_one_peer" + "Env016.b8: Repmgr acks_one_peer" + "repmgr_get_ack_policy" "onepeer" } + { "repmgr_set_ack_policy" "db_repmgr_acks_quorum" + "Env016.b8: Repmgr acks_quorum" + "repmgr_get_ack_policy" "quorum" } + { "repmgr_site" "example.com 49200 db_local_site on" + "Env016.b9: Repmgr set local site" + "repmgr_get_local_site" "example.com 49200" } + } + puts "\tEnv016.b: Check settings via getter functions." + foreach l $glist { + set carg [lindex $l 0] + set val [lindex $l 1] + set msg [lindex $l 2] + set getter [lindex $l 3] + if { [llength $l] > 4 } { + set getval [lindex $l 4] + } else { + set getval $val + } + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + repladd 1 + + # Open a master. + puts "\t\t$msg" + # + # Create DB_CONFIG. + # + env016_make_config $masterdir $carg $val + # + # Open env. + # + set ma_envcmd "berkdb_env_noerr -create -txn \ + -home $masterdir -rep" + set masterenv [eval $ma_envcmd] + # + # Verify value + # + set gval [eval $masterenv $getter] + error_check_good stat_get $gval $getval + + error_check_good masterenv_close [$masterenv close] 0 + replclose $testdir/MSGQUEUEDIR + } + + + + puts "\tEnv016.c: Test that bad rep config values are rejected." + set bad_glist { + { "rep_set_clockskew" "103" } + { "rep_set_config" "db_rep_conf_bulk x" } + { "rep_set_config" "db_rep_conf_xxx" } + { "rep_set_config" "db_rep_conf_bulk x x1" } + { "rep_set_limit" "1" } + { "rep_set_nsites" "5 x" } + { "rep_set_priority" "100 200" } + { "rep_set_request" "500" } + { "rep_set_timeout" "db_rep_ack_timeout" } + { "rep_set_timeout" "db_rep_xxx_timeout 50" } + { "repmgr_set_ack_policy" "db_repmgr_acks_all on" } + { "repmgr_set_ack_policy" "db_repmgr_acks_xxx" } + { "repmgr_site" "localhost" } + { "repmgr_site" "localhost 10001 peer" } + { "repmgr_site" "localhost 10001 xxxx on" } + } + + foreach l $bad_glist { + set carg [lindex $l 0] + set val [lindex $l 1] + + env_cleanup $testdir + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + + env016_make_config $masterdir $carg $val + + set ma_envcmd "berkdb_env_noerr -create -txn \ + -home $masterdir -rep" + set masterenv [catch {eval $ma_envcmd} ret] + error_check_good envopen $masterenv 1 + error_check_good error [is_substr $errorCode EINVAL] 1 + } +} + +proc env016_make_config { dir carg cval } { + set cid [open $dir/DB_CONFIG w] + puts $cid "$carg $cval" + close $cid +} diff --git a/test/tcl/env017.tcl b/test/tcl/env017.tcl new file mode 100644 index 00000000..9f54c169 --- /dev/null +++ b/test/tcl/env017.tcl @@ -0,0 +1,674 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env017 +# TEST Check documented "stat" fields against the fields +# TEST returned by the "stat" functions. Make sure they +# TEST match, and that none are missing. +# TEST These are the stat functions we test: +# TEST env log_stat +# TEST env lock_stat +# TEST env txn_stat +# TEST env mutex_stat +# TEST env rep_stat +# TEST env repmgr_stat +# TEST env mpool_stat +# TEST db stat +# TEST seq stat +# TEST db compact_stat + + +proc env017 { } { + puts "\nEnv017: Check the integrity of the various stat" + env017_log_stat + env017_lock_stat + env017_txn_stat + env017_mutex_stat + env017_rep_stat + env017_repmgr_stat + env017_mpool_stat + env017_db_stat + env017_seq_stat + env017_compact_stat +} + +# Check the log stat field. +proc env017_log_stat { } { + puts "\nEnv017: Check the Log stat field" + set check_type log_stat_check + set stat_method log_stat + set envargs {-create -log} + set map_list { + { "Magic" st_magic } + { "Log file Version" st_version } + { "Region size" st_regsize } + { "Log file mode" st_mode } + { "Log record cache size" st_lg_bsize } + { "Current log file size" st_lg_size } + { "Initial fileid allocation" st_fileid_init } + { "Current fileids in use" st_nfileid } + { "Maximum fileids ever used" st_maxnfileid } + { "Log file records written" st_record } + { "Mbytes written" st_w_mbytes } + { "Bytes written (over Mb)" st_w_bytes } + { "Mbytes written since checkpoint" st_wc_mbytes } + { "Bytes written (over Mb) since checkpoint" + st_wc_bytes } + { "Times log written" st_wcount } + { "Times log written because cache filled up" + st_wcount_fill } + { "Times log read from disk" st_rcount } + { "Times log flushed to disk" st_scount } + { "Current log file number" st_cur_file } + { "Current log file offset" st_cur_offset } + { "On-disk log file number" st_disk_file } + { "On-disk log file offset" st_disk_offset } + { "Max commits in a log flush" st_maxcommitperflush } + { "Min commits in a log flush" st_mincommitperflush } + { "Number of region lock waits" st_region_wait } + { "Number of region lock nowaits" st_region_nowait } + } + set doc_list [list st_magic st_version st_mode st_lg_bsize st_lg_size \ + st_fileid_init st_nfileid st_maxnfileid \ + st_record st_w_mbytes st_w_bytes st_wc_mbytes st_wc_bytes \ + st_wcount st_wcount_fill st_rcount st_scount st_cur_file \ + st_cur_offset st_disk_file st_disk_offset st_maxcommitperflush \ + st_mincommitperflush st_regsize st_region_wait st_region_nowait ] + env017_stat_check \ + $map_list $doc_list $check_type $stat_method $envargs +} + +# Check the lock stat field. +proc env017_lock_stat { } { + puts "\nEnv017: Check the lock stat field" + set check_type lock_stat_check + set stat_method lock_stat + set envargs {-create -lock} + set map_list { + { "Region size" st_regsize } + { "Last allocated locker ID" st_id } + { "Current maximum unused locker ID" st_cur_maxid } + { "Initial locks" st_initlocks } + { "Allocated locks" st_locks } + { "Maximum locks" st_maxlocks } + { "Initial lockers" st_initlockers } + { "Allocated lockers" st_lockers } + { "Maximum lockers" st_maxlockers } + { "Initial objects" st_initobjects } + { "Allocated objects" st_objects } + { "Maximum objects" st_maxobjects } + { "Lock modes" st_nmodes } + { "Number of lock table partitions" st_partitions } + { "Size of object hash table" st_tablesize } + { "Current number of locks" st_nlocks } + { "Maximum number of locks so far" st_maxnlocks } + { "Maximum number of locks in any hash bucket" + st_maxhlocks } + { "Maximum number of lock steals for an empty partition" + st_locksteals } + { "Maximum number lock steals in any partition" + st_maxlsteals } + { "Current number of lockers" st_nlockers } + { "Maximum number of lockers so far" st_maxnlockers } + { "Current number of objects" st_nobjects } + { "Maximum number of objects so far" st_maxnobjects } + { "Maximum number of objects in any hash bucket" + st_maxhobjects } + { "Maximum number of object steals for an empty partition" + st_objectsteals } + { "Maximum number object steals in any partition" + st_maxosteals } + { "Lock requests" st_nrequests } + { "Lock releases" st_nreleases } + { "Lock upgrades" st_nupgrade } + { "Lock downgrades" st_ndowngrade } + { "Number of conflicted locks for which we waited" + st_lock_wait } + { "Number of conflicted locks for which we did not wait" + st_lock_nowait } + { "Deadlocks detected" st_ndeadlocks } + { "Number of region lock waits" st_region_wait } + { "Number of region lock nowaits" st_region_nowait } + { "Number of object allocation waits" st_objs_wait } + { "Number of object allocation nowaits" st_objs_nowait } + { "Number of locker allocation waits" st_lockers_wait } + { "Number of locker allocation nowaits" st_lockers_nowait } + { "Maximum hash bucket length" st_hash_len } + { "Lock timeout value" st_locktimeout } + { "Number of lock timeouts" st_nlocktimeouts } + { "Transaction timeout value" st_txntimeout } + { "Number of transaction timeouts" st_ntxntimeouts } + { "Number lock partition mutex waits" st_part_wait } + { "Number lock partition mutex nowaits" st_part_nowait } + { "Maximum number waits on any lock partition mutex" + st_part_max_wait } + { "Maximum number nowaits on any lock partition mutex" + st_part_max_nowait } + } + set doc_list [list st_id st_cur_maxid st_nmodes \ + st_initlocks st_initlockers st_initobjects \ + st_locks st_lockers st_objects \ + st_maxlocks st_maxlockers st_maxobjects \ + st_partitions st_tablesize st_nlocks st_maxnlocks \ + st_maxhlocks st_locksteals st_maxlsteals st_nlockers \ + st_maxnlockers st_nobjects st_maxnobjects st_maxhobjects \ + st_objectsteals st_maxosteals st_nrequests st_nreleases st_nupgrade\ + st_ndowngrade st_lock_wait st_lock_nowait st_ndeadlocks \ + st_locktimeout st_nlocktimeouts st_txntimeout st_ntxntimeouts \ + st_objs_wait st_objs_nowait st_lockers_wait st_lockers_nowait \ + st_hash_len st_regsize st_part_wait st_part_nowait st_part_max_wait\ + st_part_max_nowait st_region_wait st_region_nowait] + env017_stat_check \ + $map_list $doc_list $check_type $stat_method $envargs +} + +# Check the txn stat field. +proc env017_txn_stat { } { + puts "\nEnv017: Check the transaction stat field" + set check_type txn_stat_check + set stat_method txn_stat + set envargs {-create -txn} + set map_list { + { "Region size" st_regsize } + { "LSN of last checkpoint" st_last_ckp } + { "Time of last checkpoint" st_time_ckp } + { "Last txn ID allocated" st_last_txnid } + { "Maximum txns" st_maxtxns } + { "Initial txns" st_inittxns } + { "Number aborted txns" st_naborts } + { "Number txns begun" st_nbegins } + { "Number committed txns" st_ncommits } + { "Number active txns" st_nactive } + { "Number of snapshot txns" st_nsnapshot } + { "Number restored txns" st_nrestores } + { "Maximum active txns" st_maxnactive } + { "Maximum snapshot txns" st_maxnsnapshot } + { "Number of region lock waits" st_region_wait } + { "Number of region lock nowaits" st_region_nowait } + } + set doc_list [list st_last_ckp st_time_ckp st_last_txnid st_maxtxns \ + st_inittxns st_nactive st_nsnapshot st_maxnactive st_maxnsnapshot \ + st_nbegins st_naborts st_ncommits st_nrestores st_regsize \ + st_region_wait st_region_nowait ] + env017_stat_check \ + $map_list $doc_list $check_type $stat_method $envargs +} + +#Check the mutex stat field. +proc env017_mutex_stat { } { + puts "\nEnv017: Check the mutex stat field" + set check_type mutex_stat_check + set stat_method mutex_stat + set envargs {-create} + set map_list { + { "Mutex align" st_mutex_align } + { "Mutex TAS spins" st_mutex_tas_spins } + { "Initial mutex count" st_mutex_init } + { "Mutex count" st_mutex_cnt } + { "Mutex max" st_mutex_max } + { "Free mutexes" st_mutex_free } + { "Mutexes in use" st_mutex_inuse } + { "Max in use" st_mutex_inuse_max } + { "Mutex region size" st_regsize } + { "Mutex region max" st_regmax } + { "Number of region waits" st_region_wait } + { "Number of region no waits" st_region_nowait } + } + set doc_list [list st_mutex_align st_mutex_tas_spins st_mutex_init \ + st_mutex_cnt st_mutex_max st_mutex_free st_mutex_inuse \ + st_mutex_inuse_max st_regsize st_regmax st_region_wait \ + st_region_nowait ] + + env017_stat_check \ + $map_list $doc_list $check_type $stat_method $envargs +} + +# Check the rep stat field. +proc env017_rep_stat { } { + puts "\nEnv017: Check the replication stat field" + set check_type rep_stat_check + set stat_method rep_stat + set envargs {-create -rep -log -txn} + set map_list { + { "Role" st_status} + { "Next LSN expected" st_next_lsn } + { "First missed LSN" st_waiting_lsn } + { "Maximum permanent LSN" st_max_perm_lsn } + { "Bulk buffer fills" st_bulk_fills } + { "Bulk buffer overflows" st_bulk_overflows } + { "Bulk records stored" st_bulk_records } + { "Bulk buffer transfers" st_bulk_transfers } + { "Client service requests" st_client_svc_req } + { "Client service req misses" st_client_svc_miss } + { "Client rerequests" st_client_rerequests } + { "Duplicate master conditions" st_dupmasters } + { "Environment ID" st_env_id } + { "Environment priority" st_env_priority } + { "Generation number" st_gen } + { "Election generation number" st_egen } + { "Startup complete" st_startup_complete } + { "Lease messages sent" st_lease_sends } + { "Lease checks" st_lease_chk } + { "Lease check invalid" st_lease_chk_misses } + { "Lease check refresh" st_lease_chk_refresh } + { "Duplicate log records received" st_log_duplicated } + { "Current log records queued" st_log_queued } + { "Maximum log records queued" st_log_queued_max } + { "Total log records queued" st_log_queued_total } + { "Log records received" st_log_records } + { "Log records requested" st_log_requested } + { "Master environment ID" st_master } + { "Master changes" st_master_changes } + { "Messages with bad generation number" st_msgs_badgen } + { "Messages processed" st_msgs_processed } + { "Messages ignored for recovery" st_msgs_recover } + { "Message send failures" st_msgs_send_failures } + { "Messages sent" st_msgs_sent } + { "New site messages" st_newsites } + { "Number of sites in replication group" st_nsites } + { "Transmission limited" st_nthrottles } + { "Outdated conditions" st_outdated } + { "Transactions applied" st_txns_applied } + { "Next page expected" st_next_pg } + { "First missed page" st_waiting_pg } + { "Duplicate pages received" st_pg_duplicated } + { "Pages received" st_pg_records } + { "Pages requested" st_pg_requested } + { "Elections held" st_elections } + { "Elections won" st_elections_won } + { "Election phase" st_election_status } + { "Election winner" st_election_cur_winner } + { "Election generation number" st_election_gen } + { "Election data generation number" st_election_datagen } + { "Election max LSN" st_election_lsn } + { "Election sites" st_election_nsites } + { "Election nvotes" st_election_nvotes } + { "Election priority" st_election_priority } + { "Election tiebreaker" st_election_tiebreaker } + { "Election votes" st_election_votes } + { "Election seconds" st_election_sec } + { "Election usecs" st_election_usec } + { "Start-sync operations delayed" + st_startsync_delayed } + { "Maximum lease seconds" st_max_lease_sec } + { "Maximum lease usecs" st_max_lease_usec } + { "File fail cleanups done" st_filefail_cleanups } + } + set doc_list [list st_bulk_fills st_bulk_overflows st_bulk_records \ + st_bulk_transfers st_client_rerequests st_client_svc_miss \ + st_client_svc_req st_dupmasters st_egen st_election_cur_winner \ + st_election_gen st_election_datagen st_election_lsn st_election_nsites \ + st_election_nvotes st_election_priority st_election_sec \ + st_election_status st_election_tiebreaker st_election_usec \ + st_election_votes st_elections st_elections_won st_env_id \ + st_env_priority st_filefail_cleanups st_gen st_lease_sends \ + st_lease_chk st_lease_chk_misses st_lease_chk_refresh \ + st_log_duplicated \ + st_log_queued st_log_queued_max st_log_queued_total st_log_records \ + st_log_requested st_master st_master_changes st_max_lease_sec \ + st_max_lease_usec st_max_perm_lsn st_msgs_badgen st_msgs_processed\ + st_msgs_recover st_msgs_send_failures st_msgs_sent st_newsites \ + st_next_lsn st_next_pg st_nsites st_nthrottles st_outdated \ + st_pg_duplicated st_pg_records st_pg_requested \ + st_startsync_delayed st_startup_complete st_status st_txns_applied\ + st_waiting_lsn st_waiting_pg ] + env017_stat_check \ + $map_list $doc_list $check_type $stat_method $envargs +} + +# Check the repmgr stat field. +proc env017_repmgr_stat { } { + puts "\nEnv017: Check the repmgr stat field" + set check_type repmgr_stat_check + set stat_method repmgr_stat + set envargs {-create -txn -rep} + set map_list { + { "Acknowledgement failures" st_perm_failed } + { "Messages delayed" st_msgs_queued} + { "Messages discarded" st_msgs_dropped} + { "Connections dropped" st_connection_drop} + { "Failed re-connects" st_connect_fail} + { "Election threads" st_elect_threads} + { "Max elect threads" st_max_elect_threads} + } + set doc_list [list st_perm_failed st_msgs_queued st_msgs_dropped \ + st_connection_drop st_connect_fail st_elect_threads \ + st_max_elect_threads ] + env017_stat_check \ + $map_list $doc_list $check_type $stat_method $envargs +} + +# Check the mpool stat field. +proc env017_mpool_stat { } { + puts "\nEnv017: Check the mpool stat field" + set check_type mpool_stat_check + set stat_method mpool_stat + set envargs {-create} + set map_list { + { "Cache size (gbytes)" st_gbytes } + { "Cache size (bytes)" st_bytes } + { "Number of caches" st_ncache } + { "Maximum number of caches" st_max_ncache } + { "Region size" st_regsize } + { "Region max" st_regmax } + { "Maximum memory-mapped file size" st_mmapsize } + { "Maximum open file descriptors" st_maxopenfd } + { "Maximum sequential buffer writes" st_maxwrite } + { "Sleep after writing maximum buffers" st_maxwrite_sleep } + { "Pages mapped into address space" st_map } + { "Cache hits" st_cache_hit } + { "Cache misses" st_cache_miss } + { "Pages created" st_page_create } + { "Pages read in" st_page_in } + { "Pages written" st_page_out } + { "Clean page evictions" st_ro_evict } + { "Dirty page evictions" st_rw_evict } + { "Dirty pages trickled" st_page_trickle } + { "Cached pages" st_pages } + { "Cached clean pages" st_page_clean } + { "Cached dirty pages" st_page_dirty } + { "Hash buckets" st_hash_buckets } + { "Mutexes for hash buckets" st_hash_mutexes } + { "Default pagesize" st_pagesize } + { "Hash lookups" st_hash_searches } + { "Longest hash chain found" st_hash_longest } + { "Hash elements examined" st_hash_examined } + { "Number of hash bucket nowaits" st_hash_nowait } + { "Number of hash bucket waits" st_hash_wait } + { "Maximum number of hash bucket nowaits" + st_hash_max_nowait } + { "Maximum number of hash bucket waits" st_hash_max_wait } + { "Number of region lock nowaits" st_region_nowait } + { "Number of region lock waits" st_region_wait } + { "Buffers frozen" st_mvcc_frozen } + { "Buffers thawed" st_mvcc_thawed } + { "Frozen buffers freed" st_mvcc_freed } + { "Page allocations" st_alloc } + { "Buckets examined during allocation" st_alloc_buckets } + { "Maximum buckets examined during allocation" + st_alloc_max_buckets } + { "Pages examined during allocation" st_alloc_pages } + { "Maximum pages examined during allocation" + st_alloc_max_pages } + { "Threads waiting on buffer I/O" st_io_wait} + { "Number of syncs interrupted" st_sync_interrupted} + } + set doc_list [list st_gbytes st_bytes st_ncache st_max_ncache \ + st_regsize st_regmax st_mmapsize st_maxopenfd st_maxwrite \ + st_maxwrite_sleep st_map st_cache_hit st_cache_miss \ + st_page_create st_page_in st_page_out st_ro_evict st_rw_evict \ + st_page_trickle st_pages st_page_clean st_page_dirty \ + st_hash_buckets st_hash_mutexes st_pagesize st_hash_searches \ + st_hash_longest st_hash_examined st_hash_nowait st_hash_wait \ + st_hash_max_nowait st_hash_max_wait st_region_wait \ + st_region_nowait st_mvcc_frozen st_mvcc_thawed st_mvcc_freed \ + st_alloc st_alloc_buckets st_alloc_max_buckets st_alloc_pages \ + st_alloc_max_pages st_io_wait st_sync_interrupted ] + env017_stat_check \ + $map_list $doc_list $check_type $stat_method $envargs +} + +# Check the db stat field. +proc env017_db_stat { } { + puts "\nEnv017: Check the db stat field" + set hash_map_list { + { "Magic" hash_magic } + { "Version" hash_version } + { "Page size" hash_pagesize } + { "Page count" hash_pagecnt } + { "Number of keys" hash_nkeys } + { "Number of records" hash_ndata } + { "Fill factor" hash_ffactor } + { "Buckets" hash_buckets } + { "Free pages" hash_free } + { "Bytes free" hash_bfree } + { "Number of big pages" hash_bigpages } + { "Big pages bytes free" hash_big_bfree } + { "Overflow pages" hash_overflows } + { "Overflow bytes free" hash_ovfl_free } + { "Duplicate pages" hash_dup } + { "Duplicate pages bytes free" hash_dup_free } + { "Flags" flags } + } + set queue_map_list { + { "Magic" qs_magic } + { "Version" qs_version } + { "Page size" qs_pagesize } + { "Extent size" qs_extentsize } + { "Number of keys" qs_nkeys } + { "Number of records" qs_ndata } + { "Record length" qs_re_len } + { "Record pad" qs_re_pad } + { "First record number" qs_first_recno } + { "Last record number" qs_cur_recno } + { "Number of pages" qs_pages } + { "Bytes free" qs_pgfree} + { "Flags" flags } + } + set btree_map_list { + { "Magic" bt_magic } + { "Version" bt_version } + { "Number of keys" bt_nkeys } + { "Number of records" bt_ndata } + { "Minimum keys per page" bt_minkey } + { "Fixed record length" bt_re_len } + { "Record pad" bt_re_pad } + { "Page size" bt_pagesize } + { "Page count" bt_pagecnt } + { "Levels" bt_levels } + { "Internal pages" bt_int_pg } + { "Leaf pages" bt_leaf_pg } + { "Duplicate pages" bt_dup_pg } + { "Overflow pages" bt_over_pg } + { "Empty pages" bt_empty_pg } + { "Pages on freelist" bt_free } + { "Internal pages bytes free" bt_int_pgfree } + { "Leaf pages bytes free" bt_leaf_pgfree } + { "Duplicate pages bytes free" bt_dup_pgfree } + { "Bytes free in overflow pages" bt_over_pgfree } + { "Flags" flags } + } + set hash_doc_list [list hash_magic hash_version hash_nkeys hash_ndata \ + hash_pagecnt hash_pagesize hash_ffactor hash_buckets hash_free \ + hash_bfree hash_bigpages hash_big_bfree hash_overflows \ + hash_ovfl_free hash_dup hash_dup_free flags] + + set btree_doc_list [list bt_magic bt_version bt_nkeys bt_ndata \ + bt_pagecnt bt_pagesize bt_minkey bt_re_len bt_re_pad bt_levels \ + bt_int_pg bt_leaf_pg bt_dup_pg bt_over_pg bt_empty_pg bt_free \ + bt_int_pgfree bt_leaf_pgfree bt_dup_pgfree bt_over_pgfree flags ] + + set queue_doc_list [list qs_magic qs_version qs_nkeys qs_ndata \ + qs_pagesize qs_extentsize qs_pages qs_re_len qs_re_pad qs_pgfree \ + qs_first_recno qs_cur_recno flags ] + + # Check the hash db stat field. + puts "\tEnv017: Check the hash db stat" + env017_dbstat_check \ + $hash_map_list $hash_doc_list {hash_db_stat_check} {-create -hash} + + # Check the queue db stat field. + puts "\tEnv017: Check the queue db stat" + env017_dbstat_check \ + $queue_map_list $queue_doc_list {queue_db_stat_check} \ + {-create -queue} + + # Check the btree/recno db stat field. + puts "\tEnv017: Check the btree/recno db stat" + env017_dbstat_check \ + $btree_map_list $btree_doc_list {btree_db_stat_check} \ + {-create -btree} +} + + +# Check the sequence stat field. +proc env017_seq_stat { } { + puts "\nEnv017: Check the sequence stat field" + source ./include.tcl + env_cleanup $testdir + set file1 db1.db + set db1 [berkdb open -create -btree $testdir/$file1] + error_check_good is_valid_db [is_valid_db $db1] TRUE + set seq [berkdb sequence -create -min 0 -max 1024768 $db1 seq_key1] + error_check_good is_valid_seq [is_valid_seq $seq] TRUE + set stat_list [$seq stat] + set map_list { + { "Wait" st_wait } + { "No wait" st_nowait } + { "Current" st_current } + { "Cached" st_value } + { "Max Cached" st_last_value } + { "Min" st_min } + { "Max" st_max } + { "Cache size" st_cache_size} + { "Flags" st_flags} + } + set doc_list [list st_wait st_nowait st_current st_value \ + st_last_value st_min st_max st_cache_size st_flags] + env017_do_check $map_list $stat_list $doc_list {seq_stat} + error_check_good "$seq close" [$seq close] 0 + error_check_good "$db1 close" [$db1 close] 0 +} + +# Check the compact stat field. +proc env017_compact_stat { } { + puts "\nEnv017: Check the compact stat field" + source ./include.tcl + env_cleanup $testdir + set file1 db1.db + set db1 [berkdb open -create -btree $testdir/$file1] + error_check_good is_valid_db [is_valid_db $db1] TRUE + set ret [catch {eval $db1 compact -freespace}] + error_check_good compact_ok $ret 0 + set stat_list [$db1 compact_stat] + set map_list { + { "Pages freed" compact_pages_free } + { "Pages truncated" compact_pages_truncated } + { "Pages examined" compact_pages_examine } + { "Levels removed" compact_levels } + { "Deadlocks encountered" compact_deadlock } + { "Empty buckets" compact_empty_buckets } + } + set doc_list [list compact_pages_free compact_pages_truncated \ + compact_pages_examine compact_levels compact_deadlock \ + compact_empty_buckets] + env017_do_check $map_list $stat_list $doc_list {compact_stat} + error_check_good "$db1 close" [$db1 close] 0 +} + +# This is common proc for the stat method called by env handle. +proc env017_stat_check { map_list doc_list check_type stat_method \ + {envargs {}} } { + source ./include.tcl + set extopts { + {""} + {"-thread"} + {"-private" {"mutex_stat" "requires.*mutex.*subsystem"}} + {"-thread -private"} + } + + foreach extopt $extopts { + set extarg [lindex $extopt 0] + set failmsg "" + set fail 0 + if {[llength $extopt] > 1} { + set len [llength $extopt] + for {set i 1} {$i < $len} {incr i} { + set item [lindex $extopt $i] + set stat [lindex $item 0] + if {$stat == $stat_method} { + set failmsg [lindex $item 1] + set fail 1 + break + } + } + } + + env_cleanup $testdir + puts "\tEnv017: Check DB_ENV->$stat_method ($envargs $extarg)" + set env [eval berkdb_env_noerr $extarg $envargs -home $testdir] + error_check_good is_valid_env [is_valid_env $env] TRUE + if {$fail == 0} { + set stat_list [$env $stat_method] + env017_do_check \ + $map_list $stat_list $doc_list $check_type + } else { + set ret [catch {eval $env $stat_method} res] + error_check_bad $stat_method $ret 0 + error_check_bad chk_err [regexp $failmsg $res] 0 + } + error_check_good "$env close" [$env close] 0 + } +} + +# This is common proc for db stat. +proc env017_dbstat_check { map_list doc_list check_type {dbargs {}} } { + source ./include.tcl + env_cleanup $testdir + set filename "db1.db" + set db [eval berkdb_open_noerr $dbargs $testdir/$filename] + error_check_good is_valid_db [is_valid_db $db] TRUE + set stat_list [$db stat] + env017_do_check $map_list $stat_list $doc_list $check_type + error_check_good "$db close" [$db close] 0 +} + +# This proc does the actual checking job. +proc env017_do_check { map_list stat_list doc_list check_type } { + # Check if all the items in the stat_list have the corresponding + # item in doc_list. + foreach l $map_list { + set field_map([lindex $l 0]) [lindex $l 1] + } + puts "\tEnv017: Check from stat_list" + set res_stat_list {} + foreach item $stat_list { + puts "\t\tEnv017: Checking item [lindex $item 0]" + if {![info exists field_map([lindex $item 0])]} { + lappend res_stat_list [lindex $item 0] + continue + } + set cur_field $field_map([lindex $item 0]) + if {[lsearch -exact $doc_list $cur_field] == -1} { + lappend res_stat_list [lindex $item 0] + } + } + if {[llength $res_stat_list]>0} { + puts -nonewline "FAIL: in stat_list of $check_type, " + puts "Mismatch Items: $res_stat_list" + } + + # Check if all the items in the doc_list have the corresponding + # record in the stat_list. + foreach l $map_list { + set field_map([lindex $l 1]) [lindex $l 0] + } + + set stat_field_list {} + + foreach item $stat_list { + lappend stat_field_list [lindex $item 0] + } + + set res_doc_list {} + puts "\tEnv017: Check from doc_list" + foreach item $doc_list { + puts "\t\tEnv017: Checking item [lindex $item 0]" + if {![info exists field_map([lindex $item 0])]} { + lappend res_doc_list [lindex $item 0] + continue + } + set cur_field $field_map([lindex $item 0]) + if {[lsearch -exact $stat_field_list $cur_field] == -1} { + lappend res_doc_list [lindex $item 0] + } + } + if {[llength $res_doc_list]>0} { + puts -nonewline "FAIL: in doc_list of $check_type, " + puts "Mismatch Items: $res_doc_list" + } +} + diff --git a/test/tcl/env018.tcl b/test/tcl/env018.tcl new file mode 100644 index 00000000..5f8db49d --- /dev/null +++ b/test/tcl/env018.tcl @@ -0,0 +1,57 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env018 +# TEST Test getters when joining an env. When a second handle is +# TEST opened on an existing env, get_open_flags needs to return +# TEST the correct flags to the second handle so it knows what sort +# TEST of environment it's just joined. +# TEST +# TEST For several different flags to env_open, open an env. Open +# TEST a second handle on the same env, get_open_flags and verify +# TEST the flag is returned. +proc env018 { } { + source ./include.tcl + set tnum "018" + + puts "Env$tnum: Test of join_env and getters." + + # Skip for HP-UX where a second handle on an env is not allowed. + if { $is_hp_test == 1 } { + puts "Skipping env$tnum for HP-UX." + return + } + + # Set up flags to use in opening envs. + set flags { -cdb -lock -log -txn } + + foreach flag $flags { + env_cleanup $testdir + + puts "\t\tEnv$tnum.a: Open env with $flag." + set e1 [eval {berkdb_env} -create -home $testdir $flag] + error_check_good e1_open [is_valid_env $e1] TRUE + + puts "\t\tEnv$tnum.b: Join the env." + set e2 [eval {berkdb_env} -home $testdir] + error_check_good e2_open [is_valid_env $e2] TRUE + + # Get open flags for both envs. + set e1_flags_returned [$e1 get_open_flags] + set e2_flags_returned [$e2 get_open_flags] + + # Test that the flag given to the original env is + # returned by a call to the second env. + puts "\t\tEnv$tnum.c: Check that flag is returned." + error_check_good flag_is_returned \ + [is_substr $e2_flags_returned $flag] 1 + + # Clean up. + error_check_good e1_close [$e1 close] 0 + error_check_good e2_close [$e2 close] 0 + } +} + diff --git a/test/tcl/env019.tcl b/test/tcl/env019.tcl new file mode 100644 index 00000000..590ee5cd --- /dev/null +++ b/test/tcl/env019.tcl @@ -0,0 +1,68 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env019 +# TEST Test that stats are correctly set and reported when +# TEST an env is accessed from a second process. + +proc env019 { } { + source ./include.tcl + + puts "Env019: Test stats when env is joined from a second process." + + # Set up test cases. + # The first two elements are the flag and value for the original + # env. The third and fourth elements are the flag and value for + # the second process. + # + set testcases { + { "-cachesize" "{0 1048576 1}" "-cachesize" "{0 2097152 1}" } + { "-cachesize" "{0 2097152 1}" "-cachesize" "{0 1048576 1}" } + { "-cachesize" "{0 2097152 1}" "-cachesize" "{0 2097152 1}" } + { "-cachesize" "{0 1048576 1}" {} {} } + } + + foreach case $testcases { + + env_cleanup $testdir + + # Extract the values + set flag1 [ lindex $case 0 ] + set value1 [ lindex $case 1 ] + set flag2 [ lindex $case 2 ] + set value2 [ lindex $case 3 ] + + # Set up the original env. + set e [eval {berkdb_env -create -home $testdir} $flag1 $value1 ] + error_check_good dbenv [is_valid_env $e] TRUE + + # Start up a second process to join the env. + puts "$tclsh_path $test_path/wrap.tcl env019script.tcl\ + $testdir/env019.log $flag2 $value2 &" + set pid [exec $tclsh_path $test_path/wrap.tcl env019script.tcl\ + $testdir/env019.log $flag2 $value2 &] + + watch_procs $pid 5 + + # Read the value observed by the second process. + set db [eval berkdb_open -env $e -btree values.db] + set get_gbytes [lindex [lindex [$db get gbytes] 0] 1] + set get_bytes [lindex [lindex [$db get bytes] 0] 1] + set get_ncache [lindex [lindex [$db get ncache] 0] 1] + + # Now get the answer from memp_stat. + set set_gbytes [stat_field $e mpool_stat "Cache size (gbytes)"] + set set_bytes [stat_field $e mpool_stat "Cache size (bytes)"] + set set_ncache [stat_field $e mpool_stat "Number of caches"] + + error_check_good gbytes [string equal $get_gbytes $set_gbytes] 1 + error_check_good bytes [string equal $get_bytes $set_bytes] 1 + error_check_good ncache [string equal $get_ncache $set_ncache] 1 + error_check_good db_close [$db close] 0 + error_check_good env_close [$e close] 0 + } + +} diff --git a/test/tcl/env019script.tcl b/test/tcl/env019script.tcl new file mode 100644 index 00000000..c2cb7f4e --- /dev/null +++ b/test/tcl/env019script.tcl @@ -0,0 +1,50 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# For use with env019, a test of stat values returned by +# different processes accessing the same env. + +source ./include.tcl +source $test_path/test.tcl + +set usage "env019script flag value" + +# Verify usage +if { $argc != 2 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + puts $argc + exit +} + +# Initialize arguments +set flag [ lindex $argv 0 ] +set value [ lindex $argv 1 ] + +# Join the env, trying to set the flag values +set e [eval {berkdb_env -home $testdir} $flag $value] +error_check_good env_open [is_valid_env $e] TRUE + +# Get the settings for the env. This second process should +# not be able to override the settings created when the env +# was originally opened. +set gbytes [stat_field $e mpool_stat "Cache size (gbytes)"] +set bytes [stat_field $e mpool_stat "Cache size (bytes)"] +set ncache [stat_field $e mpool_stat "Number of caches"] + +#puts "gbytes is $gbytes" +#puts "bytes is $bytes" +#puts "ncache is $ncache" + +# Store the values so the first process can inspect them. +set db [berkdb_open -create -env $e -btree values.db] +error_check_good put_gbytes [eval {$db put} "gbytes" $gbytes] 0 +error_check_good put_bytes [eval {$db put} "bytes" $bytes] 0 +error_check_good put_ncache [eval {$db put} "ncache" $ncache] 0 +error_check_good db_close [$db close] 0 + +# Close environment system +error_check_good env_close [$e close] 0 +exit diff --git a/test/tcl/env020.tcl b/test/tcl/env020.tcl new file mode 100644 index 00000000..0950ff71 --- /dev/null +++ b/test/tcl/env020.tcl @@ -0,0 +1,1378 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env020 +# TEST Check if the output information for stat_print is expected. +# TEST These are the stat_print functions we test: +# TEST env stat_print +# TEST env lock_stat_print +# TEST env log_stat_print +# TEST env mpool_stat_print +# TEST env mutex_stat_print +# TEST env rep_stat_print +# TEST env repmgr_stat_print +# TEST env txn_stat_print +# TEST db stat_print +# TEST seq stat_print +# + +proc env020 { } { + puts "Env020: Check the output of the various stat_print" + env020_init + env020_env_stat_print + env020_lock_stat_print + env020_log_stat_print + env020_mpool_stat_print + env020_mutex_stat_print + env020_rep_stat_print + env020_repmgr_stat_print + env020_txn_stat_print + env020_bt_stat_print + env020_ham_stat_print + env020_ram_stat_print + env020_qam_stat_print + env020_seq_stat_print +} + +# This is to create the include file for later use. +# As in this test, we define many global variables, and +# they are used in various procs. In order to avoid a +# long list of "global XXXX", we create an include file and +# every proc could use these global variables after including +# the file. +proc env020_init_include { } { + set f [open "./env020_include.tcl" w] + puts $f "global section_separator" + puts $f "global statprt_pattern" + puts $f "global region_statprt_pattern " + puts $f "global lk_statprt_pattern_def" + puts $f "global lk_statprt_pattern_params" + puts $f "global lk_statprt_pattern_conf" + puts $f "global lk_statprt_pattern_lockers" + puts $f "global lk_statprt_pattern_objects" + puts $f "global log_statprt_pattern_def" + puts $f "global log_statprt_pattern_DBLOG" + puts $f "global log_statprt_pattern_LOG" + puts $f "global mp_statprt_pattern_def" + puts $f "global mp_statprt_pattern_MPOOL" + puts $f "global mp_statprt_pattern_DB_MPOOL" + puts $f "global mp_statprt_pattern_DB_MPOOLFILE" + puts $f "global mp_statprt_pattern_MPOOLFILE" + puts $f "global mp_statprt_pattern_Cache" + puts $f "global mut_statprt_pattern_def" + puts $f "global mut_statprt_pattern_mutex" + puts $f "global mut_statprt_pattern_DB_MUTEXREGION" + puts $f "global rep_statprt_pattern_def" + puts $f "global rep_statprt_pattern_DB_REP" + puts $f "global rep_statprt_pattern_REP" + puts $f "global rep_statprt_pattern_LOG" + puts $f "global repmgr_statprt_pattern_def" + puts $f "global repmgr_statprt_pattern_sites" + puts $f "global txn_statprt_pattern_def" + puts $f "global txn_statprt_pattern_DB_TXNMGR" + puts $f "global txn_statprt_pattern_DB_TXNREGION" + puts $f "global env_statprt_pattern_Main" + puts $f "global env_statprt_pattern_filehandle" + puts $f "global env_statprt_pattern_ENV" + puts $f "global env_statprt_pattern_DB_ENV" + puts $f "global env_statprt_pattern_per_region" + close $f +} + +# For different regions, we show different description information. +proc env020_def_pattern {type} { + set defpat "" + switch $type { + lock { + set defpat "Default locking region information:" + } + log { + set defpat "Default logging region information:" + } + mp { + set defpat "Default cache region information:" + } + mut { + set defpat "Default mutex region information:" + } + rep { + set defpat "Default replication region information:" + } + txn { + set defpat "Default transaction region information:" + } + env { + set defpat "Default database environment information:" + } + } + return [list $defpat] +} + +# This proc initializes all the global variables mentioned before. +# These variables are regular expression patterns. +proc env020_init { } { + env020_init_include + source "./env020_include.tcl" + + set section_separator { + "=-=-=-=-=-=-=-=-=-=-=-=-=" + } + + set region_statprt_pattern { + "REGINFO information" + "Region type" + "Region ID" + "Region name" + "Region address" + "Region allocation head" + "Region primary address" + "Region maximum allocation" + "Region allocated" + "Region allocations.*allocations.*failures.*frees.*longest" + "Allocations by power-of-two sizes" + {\s*\d*\s*KB} + "Region flags" + } + + set lk_statprt_pattern_def { + # DIAGNOSTIC Information + "Hash bucket" + "Partition" + "The number of partition mutex requests that required waiting" + "Maximum hash bucket length" + "Total number of locks requested" + "Total number of locks released" + "Total number of locks upgraded" + "Total number of locks downgraded" + "Lock requests not available due to conflicts, for which we waited" + "Lock requests not available due to conflicts, for which we did not wait" + "Number of locks that have timed out" + "Number of transactions that have timed out" + # Default Lock information + "Last allocated locker ID" + "Current maximum unused locker ID" + "Number of lock modes" + "Initial number of locks allocated" + "Initial number of lockers allocated" + "Initial number of lock objects allocated" + "Maximum number of locks possible" + "Maximum number of lockers possible" + "Maximum number of lock objects possible" + "Current number of locks allocated" + "Current number of lockers allocated" + "Current number of lock objects allocated" + "Size of object hash table" + "Number of lock object partitions" + "Number of current locks" + "Number of locks that have timed out" + "Transaction timeout value" + "The number of region locks that required waiting" + "Maximum number of locks at any one time" + "Maximum number of locks in any one bucket" + "Maximum number of locks stolen by for an empty partition" + "Maximum number of locks stolen for any one partition" + "Number of current lockers" + "Maximum number of lockers at any one time" + "Number of current lock objects" + "Maximum number of lock objects at any one time" + "Maximum number of lock objects in any one bucket" + "Maximum number of objects stolen by for an empty partition" + "Maximum number of objects stolen for any one partition" + "Total number of locks requested" + "Total number of locks released" + "Total number of locks upgraded" + "Total number of locks downgraded" + "Lock requests not available due to conflicts, for which we waited" + "Lock requests not available due to conflicts, for which we did not wait" + "Number of deadlocks" + "Lock timeout value" + "Number of transactions that have timed out" + "Region size" + "The number of partition locks that required waiting" + "The maximum number of times any partition lock was waited for" + "The number of object queue operations that required waiting" + "The number of locker allocations that required waiting" + "Maximum hash bucket length" + } + + set lk_statprt_pattern_params { + "Lock region parameters" + "Lock region region mutex" + "locker table size" + "object table size" + "obj_off" + "locker_off" + "need_dd" + "next_timeout:" + } + + # Need to check why it is an empty table. + set lk_statprt_pattern_conf { + "Lock conflict matrix" + } + + set lk_statprt_pattern_lockers { + "Locks grouped by lockers" + {Locker.*Mode.*Count Status.*Object} + {locks held.*locks.*pid/thread.*priority} + "expires" + "lk timeout" + "lk expires" + {READ|WRITE|IWR|IWRITE|NG|READ_UNCOMMITTED|WAS_WRITE|WAIT|UNKNOWN} + } + + set lk_statprt_pattern_objects { + "Locks grouped by object" + {Locker.*Mode.*Count Status.*Object} + {READ|WRITE|IWR|IWRITE|NG|READ_UNCOMMITTED|WAS_WRITE|WAIT|UNKNOWN} + {^$} + } + + set log_statprt_pattern_def { + "Log magic number" + "Log version number" + "Log record cache size" + "Log file mode" + "Current log file size" + "Initial fileid allocation" + "Current fileids in use" + "Maximum fileids used" + "Records entered into the log" + "Log bytes written" + "Log bytes written since last checkpoint" + "Total log file I/O writes" + "Total log file I/O writes due to overflow" + "Total log file flushes" + "Total log file I/O reads" + "Current log file number" + "Current log file offset" + "On-disk log file number" + "On-disk log file offset" + "Maximum commits in a log flush" + "Minimum commits in a log flush" + "Region size" + "The number of region locks that required waiting" + } + + set log_statprt_pattern_DBLOG { + "DB_LOG handle information" + "DB_LOG handle mutex" + "Log file name" + # File handle information + "Log file handle" + "file-handle.file name" + "file-handle.mutex" + "file-handle.reference count" + "file-handle.file descriptor" + "file-handle.page number" + "file-handle.page size" + "file-handle.page offset" + "file-handle.seek count" + "file-handle.read count" + "file-handle.write count" + "file-handle.flags" + "Flags" + } + + set log_statprt_pattern_LOG { + "LOG handle information" + "LOG region mutex" + "File name list mutex" + "persist.magic" + "persist.version" + "persist.log_size" + "log file permissions mode" + "current file offset LSN" + "first buffer byte LSN" + "current buffer offset" + "current file write offset" + "length of last record" + "log flush in progress" + "Log flush mutex" + "last sync LSN" + "cached checkpoint LSN" + "log buffer size" + "log file size" + "next log file size" + "transactions waiting to commit" + "LSN of first commit" + } + + set mp_statprt_pattern_def { + "Total cache size" + "Number of caches" + "Maximum number of caches" + "Pool individual cache size" + "Maximum memory-mapped file size" + "Maximum open file descriptors" + "Maximum sequential buffer writes" + "Sleep after writing maximum sequential buffers" + "Pool File:" + "Page size" + "Requested pages mapped into the process' address space" + "Requested pages found in the cache" + "Requested pages not found in the cache" + "Pages created in the cache" + "Pages read into the cache" + "Pages written from the cache to the backing file" + "Clean pages forced from the cache" + "Dirty pages forced from the cache" + "Dirty pages written by trickle-sync thread" + "Current total page count" + "Current clean page count" + "Current dirty page count" + "Number of hash buckets used for page location" + "Number of mutexes for the hash buckets" + "Assumed page size used" + "Total number of times hash chains searched for a page" + "The longest hash chain searched for a page" + "Total number of hash chain entries checked for page" + "The number of hash bucket locks that required waiting" + "The maximum number of times any hash bucket lock was waited for" + "The number of region locks that required waiting" + "The number of buffers frozen" + "The number of buffers thawed" + "The number of frozen buffers freed" + "The number of page allocations" + "The number of hash buckets examined during allocations" + "The maximum number of hash buckets examined for an allocation" + "The number of pages examined during allocations" + "The max number of pages examined for an allocation" + "Threads waited on page I/O" + "The number of times a sync is interrupted" + # Pool file information + "Pool File" + "Page size" + "Requested pages mapped into the process' address space" + "Requested pages found in the cache" + "Requested pages not found in the cache" + "Pages created in the cache" + "Pages read into the cache" + "Pages written from the cache to the backing file" + } + + set mp_statprt_pattern_MPOOL { + "MPOOL structure" + "MPOOL region mutex" + "Maximum checkpoint LSN" + "Hash table entries" + "Hash table mutexes" + "Hash table last-checked" + "Hash table LRU priority" + "Hash table LRU generation" + "Put counter" + } + + set mp_statprt_pattern_DB_MPOOL { + "DB_MPOOL handle information" + "DB_MPOOL handle mutex" + "Underlying cache regions" + } + + set mp_statprt_pattern_DB_MPOOLFILE { + "DB_MPOOLFILE structures" + {File #\d*} + "Reference count" + "Pinned block reference count" + "Clear length" + "ID" + "File type" + "LSN offset" + "Max gbytes" + "Max bytes" + "Cache priority" + "mmap address" + "mmap length" + "Flags" + "File handle" + # File handle information + "file-handle.file name" + "file-handle.mutex" + "file-handle.reference count" + "file-handle.file descriptor" + "file-handle.page number" + "file-handle.page size" + "file-handle.page offset" + "file-handle.seek count" + "file-handle.read count" + "file-handle.write count" + "file-handle.flags" + } + + set mp_statprt_pattern_MPOOLFILE { + "MPOOLFILE structures" + {File #\d*} + "Mutex" + "Revision count" + "Reference count" + "Block count" + "Last page number" + "Original last page number" + "Maximum page number" + "Type" + "Priority" + "Page's LSN offset" + "Page's clear length" + "ID" + "Flags" + } + + set mp_statprt_pattern_Cache { + {Cache #\d*} + "BH hash table" + {^bucket \d*:.*} + "pageno, file, ref, LSN, address, priority, flags" + {\d*, #\d*,\s*\d*,\s*\d*/\d*, 0[xX][0-9a-fA-F]*, \d*} + } + + set mut_statprt_pattern_def { + # General mutex information + "Mutex region size" + "The number of region locks that required waiting" + "Mutex alignment" + "Mutex test-and-set spins" + "Mutex total count" + "Mutex free count" + "Mutex in-use count" + "Mutex maximum in-use count" + "Mutex counts" + "Unallocated" + # Mutex type + "application allocated" + "atomic emulation" + "db handle" + "env dblist" + "env handle" + "env region" + "lock region" + "logical lock" + "log filename" + "log flush" + "log handle" + "log region" + "mpoolfile handle" + "mpool buffer" + "mpool filehandle" + "mpool file bucket" + "mpool handle" + "mpool hash bucket" + "mpool region" + "mutex region" + "mutex test" + "replication manager" + "replication checkpoint" + "replication database" + "replication diagnostics" + "replication event" + "replication region" + "replication role config" + "replication txn apply" + "sequence" + "twister" + "Tcl events" + "txn active list" + "transaction checkpoint" + "txn commit" + "txn mvcc" + "txn region" + "unknown mutex type" + } + + set mut_statprt_pattern_DB_MUTEXREGION { + "DB_MUTEXREGION structure" + "DB_MUTEXREGION region mutex" + "Size of the aligned mutex" + "Next free mutex" + } + + set mut_statprt_pattern_mutex { + "wait/nowait, pct wait, holder, flags" + } + + set rep_statprt_pattern_def { + "Environment configured as a replication master" + "Environment configured as a replication client" + "Environment not configured for replication" + "Next LSN to be used" + "Next LSN expected" + "Not waiting for any missed log records" + "LSN of first log record we have after missed log records" + "No maximum permanent LSN" + "Maximum permanent LSN" + "Next page number expected" + "Not waiting for any missed pages" + "Page number of first page we have after missed pages" + "Number of duplicate master conditions originally detected at this site" + "Current environment ID" + "No current environment ID" + "Current environment priority" + "Current generation number" + "Election generation number for the current or next election" + "Number of duplicate log records received" + "Number of log records currently queued" + "Maximum number of log records ever queued at once" + "Total number of log records queued" + "Number of log records received and appended to the log" + "Number of log records missed and requested" + "Current master ID" + "No current master ID" + "Number of times the master has changed" + "Number of messages received with a bad generation number" + "Number of messages received and processed" + "Number of messages ignored due to pending recovery" + "Number of failed message sends" + "Number of messages sent" + "Number of new site messages received" + "Number of environments used in the last election" + "Transmission limited" + "Number of outdated conditions detected" + "Number of duplicate page records received" + "Number of page records received and added to databases" + "Number of page records missed and requested" + "Startup incomplete" + "Startup complete" + "Number of transactions applied" + "Number of startsync messages delayed" + "Number of elections held" + "Number of elections won" + "No election in progress" + "Duration of last election" + "Current election phase" + "Environment ID of the winner of the current or last election" + "Master generation number of the winner of the current or last election" + "Master data generation number of the winner of the current or last election" + "Maximum LSN of the winner of the current or last election" + "Number of sites responding to this site during the current election" + "Number of votes required in the current or last election" + "Priority of the winner of the current or last election" + "Tiebreaker value of the winner of the current or last election" + "Number of votes received during the current election" + "Number of bulk buffer sends triggered by full buffer" + "Number of single records exceeding bulk buffer size" + "Number of records added to a bulk buffer" + "Number of bulk buffers sent" + "Number of re-request messages received" + "Number of request messages this client failed to process" + "Number of request messages received by this client" + "Duration of maximum lease" + "Number of lease validity checks" + "Number of invalid lease validity checks" + "Number of lease refresh attempts during lease validity checks" + "Number of live messages sent while using leases" + } + + # This needs to concat with db_print information. + set rep_statprt_pattern_DB_REP { + "DB_REP handle information" + "Bookkeeping database" + "Flags" + } + + set rep_statprt_pattern_REP { + "REP handle information" + "Replication region mutex" + "Bookkeeping database mutex" + "Environment ID" + "Master environment ID" + "Election generation" + "Last active egen" + "Master generation" + "Space allocated for sites" + "Sites in group" + "Votes needed for election" + "Priority in election" + "Limit on data sent in a single call" + "Request gap seconds" + "Request gap microseconds" + "Maximum gap seconds" + "Maximum gap microseconds" + "Callers in rep_proc_msg" + "Callers in rep_elect" + "Library handle count" + "Multi-step operation count" + "Recovery timestamp" + "Sites heard from" + "Current winner" + "Winner priority" + "Winner generation" + "Winner data generation" + "Winner LSN" + "Winner tiebreaker" + "Votes for this site" + "Synchronization State" + "Config Flags" + "Elect Flags" + "Lockout Flags" + "Flags" + } + + set rep_statprt_pattern_LOG { + "LOG replication information" + "First log record after a gap" + "Maximum permanent LSN processed" + "LSN waiting to verify" + "Maximum LSN requested" + "Time to wait before requesting seconds" + "Time to wait before requesting microseconds" + "Next LSN expected" + "Maximum lease timestamp seconds" + "Maximum lease timestamp microseconds" + } + + set repmgr_statprt_pattern_def { + "Number of PERM messages not acknowledged" + "Number of messages queued due to network delay" + "Number of messages discarded due to queue length" + "Number of existing connections dropped" + "Number of failed new connection attempts" + "Number of currently active election threads" + "Election threads for which space is reserved" + } + + set repmgr_statprt_pattern_sites { + "DB_REPMGR site information:" + "eid:.*port:.*" + } + + set txn_statprt_pattern_def { + "No checkpoint LSN" + "File/offset for last checkpoint LSN" + "Checkpoint timestamp" + "No checkpoint timestamp" + "Last transaction ID allocated" + "Maximum number of active transactions configured" + "Active transactions" + "Maximum active transactions" + "Number of transactions begun" + "Number of transactions aborted" + "Number of transactions committed" + "Snapshot transactions" + "Maximum snapshot transactions" + "Number of transactions restored" + "Region size" + "The number of region locks that required waiting" + # Information for Active transactions + "Active transactions" + "running.*begin LSN" + } + + set txn_statprt_pattern_DB_TXNMGR { + "DB_TXNMGR handle information" + "DB_TXNMGR mutex" + "Number of transactions discarded" + } + + set txn_statprt_pattern_DB_TXNREGION { + "DB_TXNREGION handle information" + "DB_TXNREGION region mutex" + "Maximum number of active txns" + "Last transaction ID allocated" + "Current maximum unused ID" + "checkpoint mutex" + "Last checkpoint LSN" + "Last checkpoint timestamp" + "Flags" + } + + set env_statprt_pattern_Main { + "Local time" + "Magic number" + "Panic value" + "Environment version" + "Btree version" + "Hash version" + "Lock version" + "Log version" + "Queue version" + "Sequence version" + "Txn version" + "Creation time" + "Environment ID" + "Primary region allocation and reference count mutex" + "References" + "Current region size" + "Maximum region size" + } + + set env_statprt_pattern_filehandle { + "Environment file handle information" + # File handle Information. + "file-handle.file name" + "file-handle.mutex" + "file-handle.reference count" + "file-handle.file descriptor" + "file-handle.page number" + "file-handle.page size" + "file-handle.page offset" + "file-handle.seek count" + "file-handle.read count" + "file-handle.write count" + "file-handle.flags" + } + + set env_statprt_pattern_ENV { + "ENV" + "DB_ENV handle mutex" + "Errcall" + "Errfile" + "Errpfx" + "Msgfile" + "Msgcall" + "AppDispatch" + "Event" + "Feedback" + "Free" + "Panic" + "Malloc" + "Realloc" + "IsAlive" + "ThreadId" + "ThreadIdString" + "Log dir" + "Tmp dir" + "Data dir" + "Intermediate directory mode" + "Shared memory key" + "Password" + "App private" + "Api1 internal" + "Api2 internal" + "Verbose flags" + "Mutex align" + "Mutex cnt" + "Mutex inc" + "Mutex tas spins" + "Lock conflicts" + "Lock modes" + "Lock detect" + "Lock init" + "Lock init lockers" + "Lock init objects" + "Lock max" + "Lock max lockers" + "Lock max objects" + "Lock partitions" + "Lock object hash table size" + "Lock timeout" + "Log bsize" + "Log file mode" + "Log region max" + "Log size" + "Cache GB" + "Cache B" + "Cache max GB" + "Cache max B" + "Cache mmap size" + "Cache max open fd" + "Cache max write" + "Cache number" + "Cache max write sleep" + "Txn init" + "Txn max" + "Txn timestamp" + "Txn timeout" + "Thread count" + "Registry" + "Registry offset" + "Registry timeout" + "Public environment flags" + } + + set env_statprt_pattern_DB_ENV { + "DB_ENV" + "ENV handle mutex" + "Home" + "Open flags" + "Mode" + "Pid cache" + "Lockfhp" + "Locker" + "Internal recovery table" + "Number of recovery table slots" + "External recovery table" + "Number of recovery table slots" + "Thread hash buckets" + "Thread hash table" + "Mutex initial count" + "Mutex initial max" + "ENV list of DB handles mutex" + "DB reference count" + "MT mutex" + "Crypto handle" + "Lock handle" + "Log handle" + "Cache handle" + "Mutex handle" + "Replication handle" + "Txn handle" + "User copy" + "Test abort" + "Test check" + "Test copy" + "Private environment flags" + } + + set env_statprt_pattern_per_region { + "Per region database environment information" + {.*\sRegion:} + "Region ID" + "Segment ID" + "Size" + "Initialization flags" + "Region slots" + "Replication flags" + "Operation timestamp" + "Replication timestamp" + } +} + +proc env020_lock_stat_print { } { + source "./env020_include.tcl" + + set opts {"" "-clear" "-lk_conf" "-lk_lockers" "-lk_objects" + "-lk_params" "-all"} + set patterns [list $lk_statprt_pattern_def $lk_statprt_pattern_def \ + [concat $section_separator $region_statprt_pattern \ + $lk_statprt_pattern_conf] \ + [concat $section_separator $region_statprt_pattern \ + $lk_statprt_pattern_lockers] \ + [concat $section_separator $region_statprt_pattern \ + $lk_statprt_pattern_objects] \ + [concat $section_separator $region_statprt_pattern \ + $lk_statprt_pattern_params] \ + [concat $section_separator [env020_def_pattern lock] \ + $region_statprt_pattern $lk_statprt_pattern_def \ + $lk_statprt_pattern_conf $lk_statprt_pattern_lockers \ + $lk_statprt_pattern_objects $lk_statprt_pattern_params]] + set check_type lock_stat_print + set stp_method lock_stat_print + + env020_env_stp_chk $opts $patterns $check_type $stp_method +} + +proc env020_log_stat_print { } { + source "./env020_include.tcl" + + set opts {"" "-clear" "-all"} + set patterns [list $log_statprt_pattern_def $log_statprt_pattern_def \ + [concat $section_separator [env020_def_pattern log] \ + $region_statprt_pattern $log_statprt_pattern_def \ + $log_statprt_pattern_DBLOG $log_statprt_pattern_LOG]] + set check_type log_stat_print + set stp_method log_stat_print + + env020_env_stp_chk $opts $patterns $check_type $stp_method +} + +proc env020_mpool_stat_print { } { + source "./env020_include.tcl" + + set opts {"" "-clear" "-hash" "-all"} + set patterns [list $mp_statprt_pattern_def $mp_statprt_pattern_def \ + [concat $section_separator $region_statprt_pattern \ + $mp_statprt_pattern_MPOOL $mp_statprt_pattern_DB_MPOOL \ + $mp_statprt_pattern_DB_MPOOLFILE $mp_statprt_pattern_MPOOLFILE \ + $mp_statprt_pattern_Cache] \ + [concat $section_separator $region_statprt_pattern \ + $mp_statprt_pattern_MPOOL $mp_statprt_pattern_DB_MPOOL \ + $mp_statprt_pattern_DB_MPOOLFILE $mp_statprt_pattern_MPOOLFILE \ + $mp_statprt_pattern_Cache $mp_statprt_pattern_def \ + [env020_def_pattern mp]]] + set check_type mpool_stat_print + set stp_method mpool_stat_print + + env020_env_stp_chk $opts $patterns $check_type $stp_method +} + +proc env020_mutex_stat_print { } { + source "./env020_include.tcl" + + set opts {"" "-clear" "-all"} + set patterns [list $mut_statprt_pattern_def $mut_statprt_pattern_def \ + [concat $section_separator $region_statprt_pattern \ + [env020_def_pattern mut] $mut_statprt_pattern_def \ + $mut_statprt_pattern_mutex $mut_statprt_pattern_DB_MUTEXREGION]] + set check_type mutex_stat_print + set stp_method mutex_stat_print + + env020_env_stp_chk $opts $patterns $check_type $stp_method +} + +proc env020_rep_stat_print { } { + source "./env020_include.tcl" + + set opts {"" "-clear" "-all"} + set patterns [list $rep_statprt_pattern_def $rep_statprt_pattern_def \ + [concat $section_separator [env020_def_pattern rep] \ + $region_statprt_pattern $rep_statprt_pattern_def \ + $rep_statprt_pattern_DB_REP $rep_statprt_pattern_REP \ + $rep_statprt_pattern_LOG]] + set check_type rep_stat_print + set stp_method rep_stat_print + + env020_env_stp_chk $opts $patterns $check_type $stp_method +} + +proc env020_repmgr_stat_print { } { + source "./env020_include.tcl" + + set opts {"" "-clear" "-all"} + set patterns [list [concat $repmgr_statprt_pattern_def \ + $repmgr_statprt_pattern_sites] $repmgr_statprt_pattern_def \ + [concat $repmgr_statprt_pattern_def $repmgr_statprt_pattern_sites]] + set check_type repmgr_stat_print + set stp_method repmgr_stat_print + + env020_env_stp_chk $opts $patterns $check_type $stp_method +} + +proc env020_txn_stat_print { } { + source "./env020_include.tcl" + + set opts {"" "-clear" "-all"} + set patterns [list $txn_statprt_pattern_def $txn_statprt_pattern_def \ + [concat $section_separator [env020_def_pattern txn] \ + $region_statprt_pattern $txn_statprt_pattern_def \ + $txn_statprt_pattern_DB_TXNMGR $txn_statprt_pattern_DB_TXNREGION]] + set check_type txn_stat_print + set stp_method txn_stat_print + + env020_env_stp_chk $opts $patterns $check_type $stp_method +} + +proc env020_env_stat_print { } { + source "./env020_include.tcl" + + set opts {"" "-clear" "-all" "-subsystem"} + set patterns [list \ + [concat $env_statprt_pattern_Main $section_separator \ + $env_statprt_pattern_filehandle] \ + [concat $env_statprt_pattern_Main $section_separator \ + $env_statprt_pattern_filehandle] \ + [concat $section_separator [env020_def_pattern env] \ + $region_statprt_pattern $env_statprt_pattern_Main \ + $env_statprt_pattern_filehandle $env_statprt_pattern_ENV \ + $env_statprt_pattern_DB_ENV $env_statprt_pattern_per_region] \ + [concat $section_separator $env_statprt_pattern_Main \ + $env_statprt_pattern_filehandle $log_statprt_pattern_def \ + $lk_statprt_pattern_def $mp_statprt_pattern_def \ + $rep_statprt_pattern_def $repmgr_statprt_pattern_def \ + $txn_statprt_pattern_def $mut_statprt_pattern_def]] + set check_type stat_print + set stp_method stat_print + + env020_env_stp_chk $opts $patterns $check_type $stp_method +} + + +# Proc to check the patterns and lines in the message file. +# The basic flow is that: +# 1 Create an environment, some transactions and databases. +# 2 Run specified stat_print with various options, the message +# for different options will go to different message files. +# 3 Check each line in the message files to see if it matches +# any of the specified patterns. +# Notice that, when we change the message file, the previous one +# will be flushed and closed. +proc env020_env_stp_chk {opts patterns check_type stp_method} { + source ./include.tcl + set envarg {-create -txn -lock -log -rep} + set extopts { + {""} + {"-thread"} + {"-private" {"mutex_stat_print" "requires.*mutex.*subsystem"}} + {"-thread -private"} + } + + foreach extopt $extopts { + set extarg [lindex $extopt 0] + set failmsg "" + set fail 0 + if {[llength $extopt] > 1} { + set len [llength $extopt] + for {set i 1} {$i < $len} {incr i} { + set item [lindex $extopt $i] + set stp [lindex $item 0] + if {$stp == $stp_method} { + set failmsg [lindex $item 1] + set fail 1 + break + } + } + } + + puts "\tEnv020: Check DB_ENV->$stp_method ($envarg $extarg)" + env_cleanup $testdir + # Open the env + set env [eval berkdb_env_noerr $envarg $extarg\ + -home $testdir -msgfile $testdir/msgfile] + error_check_good is_valid_env [is_valid_env $env] TRUE + + # Create two txns + set txn1 [$env txn] + error_check_good is_vaild_txn [is_valid_txn $txn1 $env] TRUE + set txn2 [$env txn] + error_check_good is_valid_txn [is_valid_txn $txn2 $env] TRUE + + # Open 4 dbs + set db1 [berkdb_open_noerr -create -env $env \ + -btree -auto_commit db1.db] + error_check_good is_valid_db [is_valid_db $db1] TRUE + set db2 [berkdb_open_noerr -create -env $env \ + -btree -auto_commit db2.db "subdb1"] + error_check_good is_valid_db [is_valid_db $db2] TRUE + set db3 [berkdb_open_noerr -create -env $env \ + -btree -auto_commit "" "subdb1"] + error_check_good is_valid_db [is_valid_db $db3] TRUE + set db4 [berkdb_open_noerr -create -env $env \ + -btree -auto_commit "" ""] + error_check_good is_valid_db [is_valid_db $db4] TRUE + + # Call txn_checkpoint + error_check_good txn_chkpt [$env txn_checkpoint] 0 + + set len [llength $opts] + for {set i 0} {$i < $len} {incr i} { + set opt [lindex $opts $i] + if {$opt == ""} { + puts "\t\tUsing the default option" + } else { + puts "\t\tUsing $opt option" + } + set pattern [lindex $patterns $i] + $env msgfile $testdir/msgfile.$i + if {$fail == 0} { + error_check_good "${check_type}($opts)" \ + [eval $env $stp_method $opt] 0 + $env msgfile /dev/stdout + env020_check_output $pattern $testdir/msgfile.$i + } else { + set ret [catch {eval $env $stp_method $opt} res] + $env msgfile /dev/stdout + error_check_bad $stp_method $ret 0 + error_check_bad chk_err [regexp $failmsg $res] 0 + } + file delete -force $testdir/msgfile.$i + error_check_good "file_not_exists" \ + [file exists $testdir/msgfile.$i] 0 + } + + error_check_good "$txn1 commit" [$txn1 commit] 0 + error_check_good "$txn2 commit" [$txn2 commit] 0 + error_check_good "$db4 close" [$db4 close] 0 + error_check_good "$db3 close" [$db3 close] 0 + error_check_good "$db2 close" [$db2 close] 0 + error_check_good "$db1 close" [$db1 close] 0 + error_check_good "$env close" [$env close] 0 + } +} + +proc env020_check_output {pattern msgfile} { + set f [open $msgfile r] + set failed 0 + while {[gets $f line] >= 0} { + set line_found 0 + foreach pat $pattern { + if {[regexp $pat $line] != 0} { + set line_found 1 + break + } + } + if {$line_found == 0} { + puts "BAD STAT STRING: $line" + set failed 1 + } + } + close $f + return $failed +} + +proc env020_bt_stat_print {} { + set pattern { + "Local time" + # Btree information + "Btree magic number" + "Btree version number" + "Byte order" + "Flags" + "Minimum keys per-page" + "Underlying database page size" + "Overflow key/data size" + "Number of levels in the tree" + "Number of unique keys in the tree" + "Number of data items in the tree" + "Number of tree internal pages" + "Number of bytes free in tree internal pages" + "Number of tree leaf pages" + "Number of bytes free in tree leaf pages" + "Number of tree duplicate pages" + "Number of bytes free in tree duplicate pages" + "Number of tree overflow pages" + "Number of bytes free in tree overflow pages" + "Number of empty pages" + "Number of pages on the free list" + } + + set all_pattern { + "Default Btree/Recno database information" + # Btree cursor information + "Overflow size" + "Order" + "Internal Flags" + } + + puts "\tEnv020: Check DB->stat_print for btree" + env020_db_stat_print btree $pattern $all_pattern +} + +proc env020_ham_stat_print {} { + set pattern { + "Local time" + # Hash information + "Hash magic number" + "Hash version number" + "Byte order" + "Flags" + "Number of pages in the database" + "Underlying database page size" + "Specified fill factor" + "Number of keys in the database" + "Number of data items in the database" + "Number of hash buckets" + "Number of bytes free on bucket pages" + "Number of overflow pages" + "Number of bytes free in overflow pages" + "Number of bucket overflow pages" + "Number of bytes free in bucket overflow pages" + "Number of duplicate pages" + "Number of bytes free in duplicate pages" + "Number of pages on the free list" + } + + set all_pattern { + "Default Hash database information" + # HAM cursor information + "Bucket traversing" + "Bucket locked" + "Duplicate set offset" + "Current duplicate length" + "Total duplicate set length" + "Bytes needed for add" + "Page on which we can insert" + "Order" + "Internal Flags" + } + + puts "\tEnv020: Check DB->stat_print for hash" + env020_db_stat_print hash $pattern $all_pattern +} + +proc env020_ram_stat_print {} { + set pattern { + "Local time" + # Btree information + "Btree magic number" + "Btree version number" + "Byte order" + "Flags" + "Fixed-length record size" + "Fixed-length record pad" + "Underlying database page size" + "Number of levels in the tree" + "Number of records in the tree" + "Number of data items in the tree" + "Number of tree internal pages" + "Number of bytes free in tree internal pages" + "Number of tree leaf pages" + "Number of bytes free in tree leaf pages" + "Number of tree duplicate pages" + "Number of bytes free in tree duplicate pages" + "Number of tree overflow pages" + "Number of bytes free in tree overflow pages" + "Number of empty pages" + "Number of pages on the free list" + } + + set all_pattern { + "Default Btree/Recno database information" + # Btree cursor information + "Overflow size" + {^[\d\s]*Recno} + "Order" + "Internal Flags" + } + + puts "\tEnv020: Check DB->stat_print for recno" + env020_db_stat_print recno $pattern $all_pattern +} + +proc env020_qam_stat_print { } { + set pattern { + "Local time" + # Queue information + "Queue magic number" + "Queue version number" + "Fixed-length record size" + "Fixed-length record pad" + "Underlying database page size" + "Underlying database extent size" + "Number of records in the database" + "Number of data items in the database" + "Number of database pages" + "Number of bytes free in database pages" + "First undeleted record" + "Next available record number" + } + + set all_pattern { + "Default Queue database information" + } + + puts "\tEnv020: Check DB->stat_print for queue" + env020_db_stat_print queue $pattern $all_pattern +} + +proc env020_db_stat_print {method pattern all_pattern} { + source ./include.tcl + set dball_pattern { + "Local time" + "=-=-=-=-=-=-=-=-=-=-=-=-=" + # DB Handle information + "DB handle information" + "Page size" + "Append recno" + "Feedback" + "Dup compare" + "App private" + "DbEnv" + "Type" + "Thread mutex" + "File" + "Database" + "Open flags" + "File ID" + "Cursor adjust ID" + "Meta pgno" + "Locker ID" + "Handle lock" + "Associate lock" + "Replication handle timestamp" + "Secondary callback" + "Primary handle" + "api internal" + "Btree/Recno internal" + "Hash internal" + "Queue internal" + "Flags" + "File naming information" + # DB register information. + "DB handle FNAME contents" + "log ID" + "Meta pgno" + "create txn" + "refcount" + "Flags" + # DB cursor information + "DB handle cursors" + "Active queue" + "DBC" + "Associated dbp" + "Associated txn" + "Internal" + "Default locker ID" + "Locker" + "Type" + "Off-page duplicate cursor" + "Referenced page" + "Root" + "Page number" + "Page index" + "Lock mode" + "Flags" + "Join queue" + "Free queue" + } + env_cleanup $testdir + set env [eval berkdb_env_noerr -create -home $testdir] + error_check_good is_valid_env [is_valid_env $env] TRUE + + # Test using the default option. + puts "\t\tUsing the default option" + set db [eval berkdb_open_noerr -create -env $env -$method \ + -msgfile $testdir/msgfile1 db1.db] + + error_check_good db_stat_print [$db stat_print] 0 + error_check_good "$db close" [$db close] 0 + env020_check_output $pattern $testdir/msgfile1 + + # Test using stat_print -fast + puts "\t\tUsing -fast option" + set db [eval berkdb_open_noerr -create -env $env -$method \ + -msgfile $testdir/msgfile2 db2.db] + error_check_good db_stat_print [$db stat_print -fast] 0 + error_check_good "$db close" [$db close] 0 + env020_check_output $pattern $testdir/msgfile2 + + # Test using stat_print -all + puts "\t\tUsing -all option" + set db [eval berkdb_open_noerr -create -env $env -$method \ + -msgfile $testdir/msgfile3 db3.db] + error_check_good db_stat_print [$db stat_print -all] 0 + error_check_good "$db close" [$db close] 0 + env020_check_output [concat $dball_pattern $pattern $all_pattern] \ + $testdir/msgfile3 + + error_check_good "$env close" [$env close] 0 + + file delete -force $testdir/msgfile1 + error_check_good "file_not_exists" [file exists $testdir/msgfile1] 0 + file delete -force $testdir/msgfile2 + error_check_good "file_not_exists" [file exists $testdir/msgfile2] 0 + file delete -force $testdir/msgfile3 + error_check_good "file_not_exists" [file exists $testdir/msgfile3] 0 +} + +proc env020_seq_stat_print { } { + source ./include.tcl + set pattern { + "The number of sequence locks that required waiting" + "The current sequence value" + "The cached sequence value" + "The last cached sequence value" + "The minimum sequence value" + "The maximum sequence value" + "The cache size" + "Sequence flags" + } + + puts "\tEnv020: Check DB_SEQUENCE->stat_print" + env_cleanup $testdir + set env [eval berkdb_env_noerr -create -home $testdir] + error_check_good is_valid_env [is_valid_env $env] TRUE + set db [eval berkdb_open_noerr -create -env $env -btree \ + -msgfile $testdir/msgfile1 db1.db] + set seq [eval berkdb sequence -create $db key1] + error_check_good check_seq [is_valid_seq $seq] TRUE + error_check_good seq_stat_print [$seq stat_print] 0 + + $env msgfile $testdir/msgfile2 + error_check_good seq_stat_print [$seq stat_print -clear] 0 + + error_check_good seq_close [$seq close] 0 + error_check_good "$db close" [$db close] 0 + error_check_good "$env close" [$env close] 0 + + puts "\t\tUsing the default option" + env020_check_output $pattern $testdir/msgfile1 + + puts "\t\tUsing -clear option" + env020_check_output $pattern $testdir/msgfile2 + + file delete -force $testdir/msgfile1 + error_check_good "file_not_exists" [file exists $testdir/msgfile1] 0 + file delete -force $testdir/msgfile2 + error_check_good "file_not_exists" [file exists $testdir/msgfile2] 0 +} diff --git a/test/tcl/env021.tcl b/test/tcl/env021.tcl new file mode 100644 index 00000000..c8e9ebb5 --- /dev/null +++ b/test/tcl/env021.tcl @@ -0,0 +1,79 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST env021 +# TEST Test the operations on a transaction in a CDS environment. +# TEST These are the operations we test: +# TEST $txn abort +# TEST $txn commit +# TEST $txn id +# TEST $txn prepare +# TEST $txn setname name +# TEST $txn getname +# TEST $txn discard +# TEST $txn set_timeout +# TEST In these operations, we only support the following: +# TEST $txn id +# TEST $txn commit + +proc env021 { } { + source ./include.tcl + env_cleanup $testdir + + puts "Env021: Test operations on a transaction in a CDS environment." + + set indx 0 + puts "\tEnv021.$indx: Test DB_ENV->cdsgroup_begin in\ + an environment without DB_INIT_CDB configured" + file mkdir $testdir/envdir$indx + set env1 [berkdb_env_noerr -create -home $testdir/envdir$indx \ + -lock -log -txn -mode 0644] + error_check_good is_valid_env [is_valid_env $env1] TRUE + set txn1 [catch {eval $env1 cdsgroup} res] + error_check_good env_without_cdb [expr \ + [is_substr $res "requires"] && [is_substr $res "DB_INIT_CDB"]] 1 + error_check_good env_close [$env1 close] 0 + + set env021parms { + {"abort" "abort" 0} + {"commit" "commit" 1} + {"id" "id" 1} + {"prepare" "prepare env021gid1" 0} + {"setname" "setname env021txn1" 0} + {"getname" "getname" 0} + {"discard" "discard" 0} + {"set_timeout" "set_timeout 1000" 0} + } + + foreach param $env021parms { + set testname [lindex $param 0] + set cmd [lindex $param 1] + set success [lindex $param 2] + incr indx + puts "\tEnv021.$indx: Test DB_TXN->$testname" + file mkdir $testdir/envdir$indx + set env1 [berkdb_env_noerr -create -home $testdir/envdir$indx \ + -cdb -mode 0644] + error_check_good is_valid_env [is_valid_env $env1] TRUE + set txn1 [$env1 cdsgroup] + error_check_good is_valid_txn [is_valid_txn $txn1 $env1] TRUE + set ret [catch {eval $txn1 $cmd} res] + if {$success} { + error_check_bad "$txn1 $cmd" \ + [is_substr $res "CDS groups do not support"] 1 + } else { + error_check_good "$txn1 $cmd" \ + [is_substr $res "CDS groups do not support"] 1 + } + set ret [catch {$txn1 commit} res] + if {$ret} { + error_check_good \ + txn_commit [is_substr $res "invalid command name"] 1 + } + error_check_good env_close [$env1 close] 0 + } +} + diff --git a/test/tcl/envscript.tcl b/test/tcl/envscript.tcl new file mode 100644 index 00000000..069ce6ab --- /dev/null +++ b/test/tcl/envscript.tcl @@ -0,0 +1,100 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Envscript -- for use with env012, DB_REGISTER test. +# Usage: envscript testdir testfile putget key data recover failchk wait +# testdir: directory containing the env we are joining. +# testfile: file name for database. +# putget: What to do in the db: put, get, or loop. +# key: key to store or get +# data: data to store or get +# recover: include or omit the -recover flag in opening the env. +# failchk: include or omit the -failchk flag in opening the env. 2 options +# here, one with just -failchk and one with both -failchk & -isalive +# wait: how many seconds to wait before closing env at end of test. + +source ./include.tcl +source $test_path/testutils.tcl + +set usage "envscript testdir testfile putget key data recover failchk wait" + +# Verify usage +if { $argc != 8 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set testdir [ lindex $argv 0 ] +set testfile [ lindex $argv 1 ] +set putget [lindex $argv 2 ] +set key [ lindex $argv 3 ] +set data [ lindex $argv 4 ] +set recover [ lindex $argv 5 ] +set failchk [lindex $argv 6 ] +set wait [ lindex $argv 7 ] + +set flag1 {} +if { $recover == "RECOVER" } { + set flag1 " -recover " +} + +set flag2 {} +if {$failchk == "FAILCHK0" } { + set flag2 " -failchk " +} +if {$failchk == "FAILCHK1"} { + set flag2 " -failchk -isalive my_isalive -reg_timeout 100 " +} + +# Open and register environment. +if {[catch {eval {berkdb_env} \ + -create -home $testdir -txn -register $flag1 $flag2} dbenv]} { + puts "FAIL: opening env returned $dbenv" +} +error_check_good envopen [is_valid_env $dbenv] TRUE + +# Open database, put or get, close database. +if {[catch {eval {berkdb_open} \ + -create -auto_commit -btree -env $dbenv $testfile} db]} { + puts "FAIL: opening db returned $db" +} +error_check_good dbopen [is_valid_db $db] TRUE + +switch $putget { + PUT { + set txn [$dbenv txn] + error_check_good db_put [eval {$db put} -txn $txn $key $data] 0 + error_check_good txn_commit [$txn commit] 0 + } + GET { + set ret [$db get $key] + error_check_good db_get [lindex [lindex $ret 0] 1] $data + } + LOOP { + while { 1 } { + set txn [$dbenv txn] + error_check_good db_put \ + [eval {$db put} -txn $txn $key $data] 0 + error_check_good txn_commit [$txn commit] 0 + tclsleep 1 + } + } + default { + puts "FAIL: Unrecognized putget value $putget" + } +} + +error_check_good db_close [$db close] 0 + +# Wait. +while { $wait > 0 } { +puts "waiting ... wait is $wait" + tclsleep 1 + incr wait -1 +} + +error_check_good env_close [$dbenv close] 0 diff --git a/test/tcl/fop001.tcl b/test/tcl/fop001.tcl new file mode 100644 index 00000000..19121db7 --- /dev/null +++ b/test/tcl/fop001.tcl @@ -0,0 +1,332 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST fop001.tcl +# TEST Test two file system operations combined in one transaction. +proc fop001 { method { inmem 0 } { childtxn 0 } args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + # The variable inmem determines whether the test is being + # run with regular named databases or named in-memory databases. + set txntype "transaction" + if { $inmem == 0 } { + if { $childtxn == 0 } { + set tnum "001" + } else { + set tnum "009" + set txntype "child transaction" + puts "Fop001 with child txns is called fop009." + } + set string "regular named databases" + set operator do_op + } else { + if {[is_queueext $method] } { + puts "Skipping in-memory test for method $method." + return + } + if { $childtxn == 0 } { + set tnum "007" + puts "Fop001 with in-memory dbs is called fop007." + } else { + set tnum "011" + set txntype "child transaction" + puts "Fop001 with in-memory dbs\ + and child txns is called fop011." + } + set string "in-memory named databases" + set operator do_inmem_op + } + + puts "\nFop$tnum: ($method)\ + Two file system ops in one $txntype for $string." + + set exists {a b} + set noexist {foo bar} + set open {} + set cases {} + set ops {rename remove open open_create open_excl truncate} + + # Set up all sensible two-op cases (op1 succeeds). + foreach retval { 0 "file exists" "no such file" } { + foreach op1 {rename remove open open_excl \ + open_create truncate} { + foreach op2 $ops { + append cases " " [create_tests $op1 $op2 \ + $exists $noexist $open $retval] + } + } + } + + # Set up evil two-op cases (op1 fails). Omit open_create + # and truncate from op1 list -- open_create always succeeds + # and truncate requires a successful open. + foreach retval { 0 "file exists" "no such file" } { + foreach op1 { rename remove open open_excl } { + foreach op2 $ops { + append cases " " [create_badtests $op1 $op2 \ + $exists $noexist $open $retval] + } + } + } + + # The structure of each case is: + # {{op1 {names1} result end1} {op2 {names2} result} {remaining}} + # A result of "0" indicates no error is expected. + # Otherwise, the result is the expected error message. + # + # The "end1" variable indicates whether the first txn + # ended with an abort or a commit, and is not used + # in this test. + # + # The "remaining" variable lists the files that should + # exist at the end of the test case. + # + # Comment this loop out to remove the list of cases. +# set i 1 +# foreach case $cases { +# puts "\tFop$tnum:$i: $case" +# incr i +# } + + set testid 0 + + # Run all the cases + foreach case $cases { + env_cleanup $testdir + incr testid + + # Extract elements of the case + set op1 [lindex [lindex $case 0] 0] + set names1 [lindex [lindex $case 0] 1] + set res1 [lindex [lindex $case 0] 2] + + set op2 [lindex [lindex $case 1] 0] + set names2 [lindex [lindex $case 1] 1] + set res2 [lindex [lindex $case 1] 2] + set remaining [lindex [lindex $case 1] 3] + + # Use the list of remaining files to derive + # the list of files that should be gone. + set allnames { a b foo bar } + set gone {} + foreach f $allnames { + set idx [lsearch -exact $remaining $f] + if { $idx == -1 } { + lappend gone $f + } + } + + puts -nonewline "\tFop$tnum.$testid: $op1 ($names1), " + puts "then $op2 ($names2)." + + # The variable 'when' describes when to resolve a txn -- + # before or after closing any open databases. + foreach when { before after } { + + # Create transactional environment. + set env [berkdb_env -create -home $testdir -txn nosync] + error_check_good is_valid_env [is_valid_env $env] TRUE + + # Create two databases, dba and dbb. + if { $inmem == 0 } { + set dba [eval {berkdb_open -create} $omethod \ + $args -env $env -auto_commit a] + } else { + set dba [eval {berkdb_open -create} $omethod \ + $args -env $env -auto_commit { "" a }] + } + error_check_good dba_open [is_valid_db $dba] TRUE + error_check_good dba_put [$dba put 1 a] 0 + error_check_good dba_close [$dba close] 0 + + if { $inmem == 0 } { + set dbb [eval {berkdb_open -create} $omethod \ + $args -env $env -auto_commit b] + } else { + set dbb [eval {berkdb_open -create} $omethod \ + $args -env $env -auto_commit { "" b }] + } + error_check_good dbb_open [is_valid_db $dbb] TRUE + error_check_good dbb_put [$dbb put 1 b] 0 + error_check_good dbb_close [$dbb close] 0 + + # The variable 'end' describes how to resolve the txn. + # We run the 'abort' first because that leaves the env + # properly set up for the 'commit' test. + foreach end {abort commit} { + + # Start transaction + set parent [$env txn] + set parent_end "commit" + set msg "" + if { $childtxn } { + set child [$env txn -parent $parent] + set txn $child + set msg "(committing parent)" + if { [berkdb random_int 0 1] == 0 } { + set parent_end "abort" + set msg "(aborting parent)" + } + } else { + set txn $parent + } + + puts "\t\tFop$tnum.$testid:\ + $end $when closing database. $msg" + + # Execute and check operation 1 + set result1 [$operator \ + $omethod $op1 $names1 $txn $env $args] + if { $res1 == 0 } { + error_check_good \ + op1_should_succeed $result1 $res1 + } else { + set error [extract_error $result1] + error_check_good \ + op1_wrong_failure $error $res1 + } + + # Execute and check operation 2 + set result2 [$operator \ + $omethod $op2 $names2 $txn $env $args] + if { $res2 == 0 } { + error_check_good \ + op2_should_succeed $result2 $res2 + } else { + set error [extract_error $result2] + error_check_good \ + op2_wrong_failure $error $res2 + } + + if { $when == "before" } { + error_check_good txn_$end [$txn $end] 0 + if { $childtxn } { + error_check_good parent_end \ + [$parent $parent_end] 0 + } + + # If the txn was aborted, we still + # have the original two databases. + # Otherwise check for the expected + # remaining files. + if { $end == "abort" ||\ + $parent_end == "abort" } { + error_check_good db_exists \ + [database_exists \ + $inmem $testdir a] 1 + error_check_good db_exists \ + [database_exists \ + $inmem $testdir b] 1 + } else { + foreach db $remaining { + error_check_good db_exists \ + [database_exists \ + $inmem $testdir $db] 1 + } + foreach db $gone { + error_check_good db_gone \ + [database_exists \ + $inmem $testdir $db] 0 + } + } + + close_db_handles + } else { + close_db_handles + error_check_good txn_$end [$txn $end] 0 + if { $childtxn } { + error_check_good resolve_parent \ + [$parent $parent_end] 0 + } + + if { $end == "abort" || $parent_end == "abort" } { + error_check_good db_exists \ + [database_exists \ + $inmem $testdir a] 1 + error_check_good db_exists \ + [database_exists \ + $inmem $testdir b] 1 + } else { + foreach db $remaining { + error_check_good db_exists \ + [database_exists \ + $inmem $testdir $db] 1 + } + foreach db $gone { + error_check_good db_gone \ + [database_exists \ + $inmem $testdir $db] 0 + } + + } + } + } + + # Clean up for next case + error_check_good env_close [$env close] 0 + error_check_good envremove \ + [berkdb envremove -home $testdir] 0 + env_cleanup $testdir + } + } +} + +proc database_exists { inmem testdir name } { + if { $inmem == 1 } { + return [inmem_exists $testdir $name] + } else { + return [file exists $testdir/$name] + } + +} + +# This is a real hack. We need to figure out if an in-memory named +# file exists. In a perfect world we could use mpool stat. Unfortunately, +# mpool_stat returns files that have deadfile set and we need to not consider +# those files to be meaningful. So, we are parsing the output of db_stat -MA +# (I told you this was a hack) If we ever change the output, this is going +# to break big time. Here is what we assume: +# A file is represented by: File #N name +# The last field printed for a file is Flags +# If the file is dead, deadfile will show up in the flags +proc inmem_exists { dir filename } { + source ./include.tcl + set infile 0 + set islive 0 + set name "" + set s [exec $util_path/db_stat -MA -h $dir] + foreach i $s { + if { $i == "File" } { + set infile 1 + set islive 1 + set name "" + } elseif { $i == "Flags" } { + set infile 0 + if { $name != "" && $islive } { + return 1 + } + } elseif { $infile != 0 } { + incr infile + } + + if { $islive } { + if { $i == "deadfile," || $i == "deadfile" } { + set islive 0 + } + } + + if { $infile == 3 } { + if { $i == $filename } { + set name $filename + } + } + } + return 0 +} + diff --git a/test/tcl/fop002.tcl b/test/tcl/fop002.tcl new file mode 100644 index 00000000..32bd6f45 --- /dev/null +++ b/test/tcl/fop002.tcl @@ -0,0 +1,135 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST fop002.tcl +# TEST Test file system operations in the presence of bad permissions. +proc fop002 { method args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + env_cleanup $testdir + puts "\nFop002: ($method) File system ops and permissions." + if { $is_windows_test == 1 } { + puts "\tSkipping permissions test for Windows platform." + return + } + + # Create database with -rw-r--r-- permissions. + set perms "0644" + set testfile $testdir/a.db + set destfile $testdir/b.db + + set db [eval \ + {berkdb_open -create} $omethod $args -mode $perms $testfile] + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_put [$db put 1 [chop_data $method a]] 0 + error_check_good db_close [$db close] 0 + + # Eliminate all read and write permission, and try to execute + # file ops. They should fail. + set res [exec chmod 0000 $testfile] + error_check_good remove_permissions [llength $res] 0 + # Put remove last on the list of ops since it should succeed + # at the end of the test, removing the test file. + set ops [list open_create open rename remove] + set rdonly 0 + + puts "\tFop002.a: Test with neither read nor write permission." + foreach op $ops { + puts "\t\tFop002.a: Testing $op for failure." + switch $op { + open { + test_$op $testfile $omethod $args $rdonly 1 + } + rename { + test_$op $testfile $destfile 1 + } + open_create { + test_$op $testfile $omethod $args 1 + } + remove { + test_$op $testfile 1 + } + } + } + + # Change permissions to read-only. + puts "\tFop002.b: Test with read-only permission." + set rdonly 1 + + set res [exec chmod 0444 $testfile] + error_check_good set_readonly [llength $res] 0 + + foreach op $ops { + puts "\t\tFop002.b: Testing $op for success." + switch $op { + open { + test_$op $testfile $omethod $args $rdonly 0 + } + rename { + test_$op $testfile $destfile 0 + # Move it back so later tests work + test_$op $destfile $testfile 0 + } + open_create { + puts "\t\tSkipping open_create with read-only." + } + remove { + test_$op $testfile 0 + } + } + } +} + +proc test_remove { testfile {expectfail 0} } { + catch { berkdb dbremove $testfile } res + if { $expectfail == 1 } { + error_check_good remove_err $res "db remove:permission denied" + } else { + error_check_good remove $res 0 + } +} + +proc test_rename { testfile destfile {expectfail 0} } { + catch { berkdb dbrename $testfile $destfile } res + if { $expectfail == 1 } { + error_check_good rename_err $res "db rename:permission denied" + } else { + error_check_good rename $res 0 + } +} + +proc test_open_create { testfile omethod args {expectfail 0} } { + set stat [catch { set db \ + [eval {berkdb_open -create} $omethod $args $testfile]} res] + if { $expectfail == 1 } { + error_check_good open_create_err $res \ + "db open:permission denied" + } else { + error_check_good open_create $stat 0 + # Since we succeeded, we have to close the db. + error_check_good db_close [$db close] 0 + } +} + +proc test_open { testfile omethod args {readonly 0} {expectfail 0} } { + if { $readonly == 1 } { + set stat [catch {set db \ + [eval {berkdb_open -rdonly} $omethod $args $testfile]} res] + } else { + set stat [catch {set db [berkdb_open $omethod $testfile]} res] + } + if { $expectfail == 1 } { + error_check_good open_err $res \ + "db open:permission denied" + } else { + error_check_good db_open $stat 0 + error_check_good db_close [$db close] 0 + } +} + diff --git a/test/tcl/fop003.tcl b/test/tcl/fop003.tcl new file mode 100644 index 00000000..b11e257f --- /dev/null +++ b/test/tcl/fop003.tcl @@ -0,0 +1,127 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST fop003 +# TEST +# TEST Test behavior of create and truncate for compatibility +# TEST with sendmail. +# TEST 1. DB_TRUNCATE is not allowed with locking or transactions. +# TEST 2. Can -create into zero-length existing file. +# TEST 3. Can -create into non-zero-length existing file if and +# TEST only if DB_TRUNCATE is specified. +proc fop003 { method args } { + global errorInfo + source ./include.tcl + env_cleanup $testdir + + if { [is_btree $method] != 1 } { + puts "Skipping fop003 for method $method" + return + } + + set args [convert_args $method $args] + set omethod [convert_method $method] + + set tnum "003" + set testfile fop$tnum.db + puts "Fop$tnum ($method): Test of required behavior for sendmail." + + puts "\tFop$tnum.a: -truncate is not allowed within\ + txn or locking env." + set envflags "lock txn" + foreach flag $envflags { + set env [berkdb_env_noerr -create -home $testdir -$flag] + set db [eval {berkdb_open_noerr -create} \ + $omethod $args -env $env $testfile] + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + catch {[berkdb_open_noerr -truncate $omethod $args -env $env \ + $testfile]} res + error_check_good "$flag env not allowed" [is_substr $res \ + "DB_TRUNCATE illegal with locking specified"] 1 + error_check_good dbremove [$env dbremove $testfile] 0 + error_check_good env_close [$env close] 0 + error_check_good envremove [berkdb envremove -home $testdir] 0 + } + + # Create an empty file, then open with -create. We get an + # error message warning us that this does not look like a + # DB file, but the open should succeed. + foreach tflag { "" "-truncate" } { + puts "\tFop$tnum.b: -create $tflag is allowed on open of\ + existing zero-length file." + set fd [open $testdir/foo w] + close $fd + catch {set db [eval {berkdb_open_noerr -create} \ + $tflag $omethod $args $testdir/foo]} res + error_check_good open_fail [is_substr $errorInfo \ + "unexpected file type or format"] 1 + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + } + + # Create a non-empty non-DB file, then open with -create. This + # should fail. Try again with -truncate, and it should + # succeed, and the file should be empty. + foreach tflag { "" "-truncate" } { + if { $tflag == "-truncate" } { + puts "\tFop$tnum.c: -create with -truncate\ + succeeds on open of non-empty non-DB file." + } else { + puts "\tFop$tnum.c: -create without -truncate\ + fails on open of non-empty non-DB file." + } + set fd [open $testdir/bar w] + puts $fd "junk" + close $fd + catch {set db [eval {berkdb_open_noerr -create} \ + $tflag $omethod $args $testdir/bar]} ret + if { $tflag == "-truncate" } { + # We expect an error message, but also an open db. + error_check_good open_fail [is_substr $errorInfo \ + "unexpected file type or format"] 1 + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_handle \ + [llength [berkdb handles]] 1 + error_check_good db_close [$db close] 0 + } else { + error_check_good no_db_handle \ + [llength [berkdb handles]] 0 + } + fileremove -f $testdir/bar + } + + puts "\tFop$tnum.d: -create is ignored on open of existing\ + non-zero-length file." + # Create a db file. Close and reopen with -create. Make + # sure that we still have the same file by checking the contents. + set key 1 + set data "data" + set file "file.db" + set db [eval {berkdb_open -create $omethod} $args $testdir/$file] + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_put [$db put $key [chop_data $method $data]] 0 + error_check_good db_close [$db close] 0 + set db [eval {berkdb_open -create $omethod} $args $testdir/$file] + error_check_good db_open2 [is_valid_db $db] TRUE + set ret [$db get $key] + error_check_good db_get \ + [lindex [lindex $ret 0] 1] [pad_data $method $data] + error_check_good db_close2 [$db close] 0 + + puts "\tFop$tnum.e: -create is allowed on open -truncate of\ + existing non-zero-length file." + # Use the file we already have with -truncate flag. The open + # should be successful, and when we query for the key that + # used to be there, we should get nothing. + set db [eval \ + {berkdb_open -create -truncate $omethod} $args $testdir/$file] + error_check_good db_open3 [is_valid_db $db] TRUE + set ret [$db get $key] + error_check_good db_get [lindex [lindex $ret 0] 1] "" + error_check_good db_close3 [$db close] 0 + +} diff --git a/test/tcl/fop004.tcl b/test/tcl/fop004.tcl new file mode 100644 index 00000000..3b7a8104 --- /dev/null +++ b/test/tcl/fop004.tcl @@ -0,0 +1,260 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST fop004 +# TEST Test of DB->rename(). (formerly test075) +# TEST Test that files can be renamed from one directory to another. +# TEST Test that files can be renamed using absolute or relative +# TEST pathnames. +proc fop004 { method { tnum "004" } args } { + global encrypt + global errorCode + global errorInfo + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Fop$tnum: ($method $args): Test of DB->rename()" + + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + # If we are using an env, then skip this test. + # It needs its own. + incr eindex + set env [lindex $args $eindex] + puts "Skipping fop$tnum for env $env" + return + } + if { $encrypt != 0 } { + puts "Skipping fop$tnum for security" + return + } + cleanup $testdir NULL + + # Define absolute pathnames + set curdir [pwd] + cd $testdir + set fulldir [pwd] + cd $curdir + set reldir $testdir + + # Name subdirectories for renaming from one directory to another. + set subdira A + set subdirb B + + # Set up absolute and relative pathnames for test + set paths [list "absolute $fulldir" "relative $reldir"] + set files [list "fop$tnum-old.db fop$tnum-new.db {name change}" \ + "fop$tnum.db fop$tnum.db {directory change}"] + + foreach pathinfo $paths { + set pathtype [lindex $pathinfo 0] + set path [lindex $pathinfo 1] + foreach fileinfo $files { + set desc [lindex $fileinfo 2] + puts "Fop$tnum: Test of $pathtype path $path with $desc" + set env NULL + set envargs "" + + # Loop through test using the following rename options + # 1. no environment, not in transaction + # 2. with environment, not in transaction + # 3. rename with auto-commit + # 4. rename in committed transaction + # 5. rename in aborted transaction + + foreach op "noenv env auto commit abort" { + + puts "\tFop$tnum.a: Create/rename with $op" + # If we are using an env, then testfile should + # be the db name. Otherwise it is the path we + # are testing and the name. + # + set old [lindex $fileinfo 0] + set new [lindex $fileinfo 1] + # Set up subdirectories if necessary. + if { $desc == "directory change" } { + file mkdir $testdir/$subdira + file mkdir $testdir/$subdirb + set oldname $subdira/$old + set newname $subdirb/$new + set oldextent $subdira/__dbq.$old.0 + set newextent $subdirb/__dbq.$new.0 + } else { + set oldname $old + set newname $new + set oldextent __dbq.$old.0 + set newextent __dbq.$new.0 + } + # If we don't have an env, we're going to + # operate on the file using its absolute + # or relative path. Tack it on the front. + if { $op == "noenv" } { + set oldfile $path/$oldname + set newfile $path/$newname + set oldextent $path/$oldextent + set newextent $path/$newextent + } else { + set oldfile $oldname + set newfile $newname + set txnarg "" + if { $op == "auto" || $op == "commit" \ + || $op == "abort" } { + set txnarg " -txn" + } + set env [eval {berkdb_env -create} \ + $txnarg -home $path] + set envargs "-env $env" + error_check_good \ + env_open [is_valid_env $env] TRUE + } + + # Files don't exist before starting the test. + # + check_file_exist $oldfile $env $path 0 + check_file_exist $newfile $env $path 0 + + puts "\t\tFop$tnum.a.1: Create file $oldfile" + set db [eval {berkdb_open -create -mode 0644} \ + $omethod $envargs $args $oldfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Use numeric key so record-based methods + # don't need special treatment. + set key 1 + set data data + + error_check_good dbput \ + [$db put $key [chop_data $method $data]] 0 + error_check_good dbclose [$db close] 0 + + puts "\t\tFop$tnum.a.2:\ + Rename file to $newfile" + check_file_exist $oldfile $env $path 1 + check_file_exist $newfile $env $path 0 + + # Regular renames use berkdb dbrename + # Txn-protected renames use $env dbrename. + if { $op == "noenv" || $op == "env" } { + error_check_good rename [eval \ + {berkdb dbrename} $envargs \ + $oldfile $newfile] 0 + } elseif { $op == "auto" } { + error_check_good rename [eval \ + {$env dbrename} -auto_commit \ + $oldfile $newfile] 0 + } else { + # $op is "abort" or "commit" + set txn [$env txn] + error_check_good rename [eval \ + {$env dbrename} -txn $txn \ + $oldfile $newfile] 0 + error_check_good txn_$op [$txn $op] 0 + } + + if { $op != "abort" } { + check_file_exist $oldfile $env $path 0 + check_file_exist $newfile $env $path 1 + } else { + check_file_exist $oldfile $env $path 1 + check_file_exist $newfile $env $path 0 + } + + # Check that extent files moved too, unless + # we aborted the rename. + if { [is_queueext $method ] == 1 } { + if { $op != "abort" } { + check_file_exist \ + $oldextent $env $path 0 + check_file_exist \ + $newextent $env $path 1 + } else { + check_file_exist \ + $oldextent $env $path 1 + check_file_exist \ + $newextent $env $path 0 + } + } + + puts "\t\tFop$tnum.a.3: Check file contents" + # Open again with create to make sure we're not + # caching. In the normal case (no env), we + # already know the file doesn't exist. + set odb [eval {berkdb_open -create -mode 0644} \ + $envargs $omethod $args $oldfile] + set ndb [eval {berkdb_open -create -mode 0644} \ + $envargs $omethod $args $newfile] + error_check_good \ + odb_open [is_valid_db $odb] TRUE + error_check_good \ + ndb_open [is_valid_db $ndb] TRUE + + # The DBT from the "old" database should be + # empty, not the "new" one, except in the case + # of an abort. + set odbt [$odb get $key] + if { $op == "abort" } { + error_check_good \ + odbt_has_data [llength $odbt] 1 + } else { + set ndbt [$ndb get $key] + error_check_good \ + odbt_empty [llength $odbt] 0 + error_check_bad \ + ndbt_empty [llength $ndbt] 0 + error_check_good ndbt \ + [lindex [lindex $ndbt 0] 1] \ + [pad_data $method $data] + } + error_check_good odb_close [$odb close] 0 + error_check_good ndb_close [$ndb close] 0 + + # Now there's both an old and a new. Rename the + # "new" to the "old" and make sure that fails. + # + puts "\tFop$tnum.b: Make sure rename fails\ + instead of overwriting" + set envargs "" + if { $env != "NULL" } { + error_check_good \ + env_close [$env close] 0 + set env [berkdb_env_noerr -home $path] + set envargs " -env $env" + error_check_good env_open2 \ + [is_valid_env $env] TRUE + } + set ret [catch {eval {berkdb dbrename} \ + $envargs $newfile $oldfile} res] + error_check_bad rename_overwrite $ret 0 + error_check_good rename_overwrite_ret \ + [is_substr $errorCode EEXIST] 1 + + # Verify and then start over from a clean slate. + verify_dir $path "\tFop$tnum.c: " + verify_dir $path/$subdira "\tFop$tnum.c: " + verify_dir $path/$subdirb "\tFop$tnum.c: " + if { $env != "NULL" } { + error_check_good \ + env_close2 [$env close] 0 + } + env_cleanup $path + check_file_exist $oldfile $env $path 0 + check_file_exist $newfile $env $path 0 + } + } + } +} + +proc check_file_exist { filename env path expected } { + if { $env != "NULL" } { + error_check_good "$filename exists in env" \ + [file exists $path/$filename] $expected + } else { + error_check_good \ + "$filename exists" [file exists $filename] $expected + } +} diff --git a/test/tcl/fop005.tcl b/test/tcl/fop005.tcl new file mode 100644 index 00000000..fe89e3d8 --- /dev/null +++ b/test/tcl/fop005.tcl @@ -0,0 +1,147 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST fop005 +# TEST Test of DB->remove() +# TEST Formerly test080. +# TEST Test use of dbremove with and without envs, with absolute +# TEST and relative paths, and with subdirectories. + +proc fop005 { method args } { + source ./include.tcl + + set tnum "005" + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Fop$tnum: ($method $args): Test of DB->remove()" + + # Determine full path + set curdir [pwd] + cd $testdir + set fulldir [pwd] + cd $curdir + set reldir $testdir + + # If we are using an env, then skip this test. + # It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Skipping fop$tnum for env $env" + return + } + cleanup $testdir NULL + + # Set up absolute and relative pathnames, and a subdirectory. + set subdira A + set filename fop$tnum.db + set extentname __dbq.$filename.0 + set paths [list $fulldir $reldir] + set files [list "$filename $extentname"\ + "$subdira/$filename $subdira/$extentname"] + + foreach path $paths { + foreach fileset $files { + set filename [lindex $fileset 0] + set extentname [lindex $fileset 1] + + # Loop through test using the following options: + # 1. no environment, not in transaction + # 2. with environment, not in transaction + # 3. remove with auto-commit + # 4. remove in committed transaction + # 5. remove in aborted transaction + + foreach op "noenv env auto commit abort" { + file mkdir $testdir/$subdira + if { $op == "noenv" } { + set file $path/$filename + set extentfile $path/$extentname + set env NULL + set envargs "" + } else { + set file $filename + set extentfile $extentname + set largs " -txn" + if { $op == "env" } { + set largs "" + } + set env [eval {berkdb_env -create \ + -home $path} $largs] + set envargs " -env $env " + error_check_good \ + env_open [is_valid_env $env] TRUE + } + + puts "\tFop$tnum: dbremove with $op\ + in path $path" + puts "\t\tFop$tnum.a.1: Create file $file" + set db [eval {berkdb_open -create -mode 0644} \ + $omethod $envargs $args {$file}] + error_check_good db_open [is_valid_db $db] TRUE + + # Use a numeric key so record-based methods + # don't need special treatment. + set key 1 + set data [pad_data $method data] + + error_check_good dbput \ + [$db put $key [chop_data $method $data]] 0 + error_check_good dbclose [$db close] 0 + check_file_exist $file $env $path 1 + if { [is_queueext $method] == 1 } { + check_file_exist \ + $extentfile $env $path 1 + } + + # Use berkdb dbremove for non-txn tests + # and $env dbremove for transactional tests + puts "\t\tFop$tnum.a.2: Remove file" + if { $op == "noenv" || $op == "env" } { + error_check_good remove_$op \ + [eval {berkdb dbremove} \ + $envargs $file] 0 + } elseif { $op == "auto" } { + error_check_good remove_$op \ + [eval {$env dbremove} \ + -auto_commit $file] 0 + } else { + # $op is "abort" or "commit" + set txn [$env txn] + error_check_good remove_$op \ + [eval {$env dbremove} \ + -txn $txn $file] 0 + error_check_good txn_$op [$txn $op] 0 + } + + puts "\t\tFop$tnum.a.3: Check that file is gone" + # File should now be gone, unless the op is an + # abort. Check extent files if necessary. + if { $op != "abort" } { + check_file_exist $file $env $path 0 + if { [is_queueext $method] == 1 } { + check_file_exist \ + $extentfile $env $path 0 + } + } else { + check_file_exist $file $env $path 1 + if { [is_queueext $method] == 1 } { + check_file_exist \ + $extentfile $env $path 1 + } + } + + if { $env != "NULL" } { + error_check_good envclose [$env close] 0 + } + env_cleanup $path + check_file_exist $file $env $path 0 + } + } + } +} diff --git a/test/tcl/fop006.tcl b/test/tcl/fop006.tcl new file mode 100644 index 00000000..5fb93901 --- /dev/null +++ b/test/tcl/fop006.tcl @@ -0,0 +1,241 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST fop006 +# TEST Test file system operations in multiple simultaneous +# TEST transactions. Start one transaction, do a file operation. +# TEST Start a second transaction, do a file operation. Abort +# TEST or commit txn1, then abort or commit txn2, and check for +# TEST appropriate outcome. +proc fop006 { method { inmem 0 } { childtxn 0 } args } { + source ./include.tcl + + # The variable inmem determines whether the test is being + # run on regular named databases or named in-memory databases. + set txntype "" + if { $inmem == 0 } { + if { $childtxn == 0 } { + set tnum "006" + } else { + set tnum "010" + set txntype "child" + puts "Fop006 with child txns is called fop010." + } + set string "regular named databases" + set operator do_op + } else { + if { [is_queueext $method] } { + puts "Skipping in-memory test for method $method." + return + } + if { $childtxn == 0 } { + set tnum "008" + puts "Fop006 with in-memory dbs is called fop008." + } else { + set tnum "012" + set txntype "child" + puts "Fop006 with in-memory dbs\ + and child txns is called fop012." + } + set string "in-memory named databases" + set operator do_inmem_op + } + + if { [is_btree $method] != 1 } { + puts "Skipping fop$tnum for method $method" + return + } + + set args [convert_args $method $args] + set omethod [convert_method $method] + + env_cleanup $testdir + puts "\nFop$tnum ($method): Two file system ops,\ + each in its own $txntype transaction, for $string." + + set exists {a b} + set noexist {foo bar} + set open {} + set cases {} + set ops {open open_create open_excl rename remove truncate} + + # Set up cases where op1 is successful. + foreach retval { 0 "file exists" "no such file" } { + foreach end1 {abort commit} { + foreach op1 $ops { + foreach op2 $ops { + append cases " " [create_tests\ + $op1 $op2 $exists $noexist\ + $open $retval $end1] + } + } + } + } + + # Set up evil two-op cases (op1 fails). Omit open_create + # and truncate from op1 list -- open_create always succeeds + # and truncate requires a successful open. + foreach retval { 0 "file exists" "no such file" } { + foreach op1 { rename remove open open_excl } { + foreach op2 $ops { + append cases " " [create_badtests $op1 $op2 \ + $exists $noexist $open $retval $end1] + } + } + } + + # The structure of each case is: + # {{op1 {args} result end} {op2 {args} result}} + # A result of "0" indicates no error is expected. Otherwise, + # the result is the expected error message. The value of "end" + # indicates whether the transaction will be aborted or committed. + # + # Comment this loop out to remove the list of cases. +# set i 1 +# foreach case $cases { +# puts "\tFop$tnum.$i: $case" +# incr i +# } + + # To run a particular case, add the case in this format and + # uncomment. +# set cases { +# {{open_create {a} 0 abort} {rename {a bar} 0 {b bar}}} +# } + + set testid 0 + + # Run all the cases + foreach case $cases { + incr testid + + # Extract elements of the case + set op1 [lindex [lindex $case 0] 0] + set names1 [lindex [lindex $case 0] 1] + set res1 [lindex [lindex $case 0] 2] + set end1 [lindex [lindex $case 0] 3] + + set op2 [lindex [lindex $case 1] 0] + set names2 [lindex [lindex $case 1] 1] + set res2 [lindex [lindex $case 1] 2] + set remaining [lindex [lindex $case 1] 3] + + # Use the list of remaining files to derive + # the list of files that should be gone. + set allnames { a b foo bar } + set gone {} + foreach f $allnames { + set idx [lsearch -exact $remaining $f] + if { $idx == -1 } { + lappend gone $f + } + } + + puts -nonewline "\tFop$tnum.$testid: $op1 ($names1) $res1 $end1; " + puts " $op2 ($names2) $res2. Files remaining: $remaining." + + foreach end2 { abort commit } { + # Create transactional environment. + set env [berkdb_env -create -home $testdir -txn nosync] + error_check_good is_valid_env [is_valid_env $env] TRUE + + # Create databases. + if { $inmem == 0 } { + set db [eval {berkdb_open -create} \ + $omethod $args -env $env -auto_commit a] + } else { + set db [eval {berkdb_open -create} \ + $omethod $args -env $env -auto_commit {""} a] + } + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_put \ + [$db put 1 [chop_data $method a]] 0 + error_check_good db_close [$db close] 0 + + if { $inmem == 0 } { + set db [eval {berkdb_open -create} \ + $omethod $args -env $env -auto_commit b] + } else { + set db [eval {berkdb_open -create} \ + $omethod $args -env $env -auto_commit {""} b] + } + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_put \ + [$db put 1 [chop_data $method a]] 0 + error_check_good db_close [$db close] 0 + + # Start transaction 1 and perform a file op. + set parent1 [$env txn] + if { $childtxn } { + set child1 [$env txn -parent $parent1] + set txn1 $child1 + } else { + set txn1 $parent1 + } + + error_check_good \ + txn_begin [is_valid_txn $txn1 $env] TRUE + set result1 [$operator $omethod $op1 $names1 $txn1 $env $args] + if { $res1 == 0 } { + error_check_good \ + op1_should_succeed $result1 $res1 + } else { + set error [extract_error $result1] + error_check_good op1_wrong_failure $error $res1 + } + + # Start transaction 2 before ending transaction 1. + set pid [exec $tclsh_path $test_path/wrap.tcl \ + fopscript.tcl $testdir/fop$tnum.log $operator \ + $omethod $op2 $end2 $res2 $names2 $childtxn &] + + # Sleep a bit to give txn2 a chance to block. + tclsleep 2 + + # End transaction 1 and close any open db handles. + # Txn2 will now unblock and finish. + error_check_good txn1_$end1 [$txn1 $end1] 0 + if { $childtxn } { + error_check_good parent1_commit \ + [$parent1 commit] 0 + } + set handles [berkdb handles] + foreach handle $handles { + if {[string range $handle 0 1] == "db" } { + error_check_good \ + db_close [$handle close] 0 + } + } + watch_procs $pid 1 60 1 + + # Check whether the expected databases exist. + if { $end2 == "commit" } { + foreach db $remaining { + error_check_good db_exists \ + [database_exists \ + $inmem $testdir $db] 1 + } + foreach db $gone { + error_check_good db_gone \ + [database_exists \ + $inmem $testdir $db] 0 + } + } + + # Clean up for next case + error_check_good env_close [$env close] 0 + catch { [berkdb envremove -home $testdir] } res + + # Check for errors in log file. + set errstrings [eval findfail $testdir/fop$tnum.log] + foreach str $errstrings { + puts "FAIL: error message in log file: $str" + } + env_cleanup $testdir + } + } +} + diff --git a/test/tcl/fop007.tcl b/test/tcl/fop007.tcl new file mode 100644 index 00000000..611fcfdb --- /dev/null +++ b/test/tcl/fop007.tcl @@ -0,0 +1,21 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST fop007 +# TEST Test file system operations on named in-memory databases. +# TEST Combine two ops in one transaction. +proc fop007 { method args } { + + # Queue extents are not allowed with in-memory databases. + if { [is_queueext $method] == 1 } { + puts "Skipping fop007 for method $method." + return + } + eval {fop001 $method 1 0} $args +} + + + diff --git a/test/tcl/fop008.tcl b/test/tcl/fop008.tcl new file mode 100644 index 00000000..cee20a63 --- /dev/null +++ b/test/tcl/fop008.tcl @@ -0,0 +1,15 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST fop008 +# TEST Test file system operations on named in-memory databases. +# TEST Combine two ops in one transaction. +proc fop008 { method args } { + eval {fop006 $method 1 0} $args +} + + + diff --git a/test/tcl/fop009.tcl b/test/tcl/fop009.tcl new file mode 100644 index 00000000..cdc5cc5d --- /dev/null +++ b/test/tcl/fop009.tcl @@ -0,0 +1,22 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST fop009 +# TEST Test file system operations in child transactions. +# TEST Combine two ops in one child transaction. +proc fop009 { method args } { + + # Run for btree only to cut down on redundant testing. + if { [is_btree $method] == 0 } { + puts "Skipping fop009 for method $method" + return + } + + eval {fop001 $method 0 1} $args +} + + + diff --git a/test/tcl/fop010.tcl b/test/tcl/fop010.tcl new file mode 100644 index 00000000..4580e9a9 --- /dev/null +++ b/test/tcl/fop010.tcl @@ -0,0 +1,22 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST fop010 +# TEST Test file system operations in child transactions. +# TEST Two ops, each in its own child txn. +proc fop010 { method args } { + + # Run for btree only to cut down on redundant testing. + if { [is_btree $method] == 0 } { + puts "Skipping fop010 for method $method" + return + } + + eval {fop006 $method 0 1} $args +} + + + diff --git a/test/tcl/fop011.tcl b/test/tcl/fop011.tcl new file mode 100644 index 00000000..0f5d68f6 --- /dev/null +++ b/test/tcl/fop011.tcl @@ -0,0 +1,23 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST fop011 +# TEST Test file system operations in child transactions. +# TEST Combine two ops in one child transaction, with in-emory +# TEST databases. +proc fop011 { method args } { + + # Run for btree only to cut down on redundant testing. + if { [is_btree $method] == 0 } { + puts "Skipping fop011 for method $method" + return + } + + eval {fop001 $method 1 1} $args +} + + + diff --git a/test/tcl/fop012.tcl b/test/tcl/fop012.tcl new file mode 100644 index 00000000..f343d1fd --- /dev/null +++ b/test/tcl/fop012.tcl @@ -0,0 +1,22 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST fop012 +# TEST Test file system operations in child transactions. +# TEST Two ops, each in its own child txn, with in-memory dbs. +proc fop012 { method args } { + + # Run for btree only to cut down on redundant testing. + if { [is_btree $method] == 0 } { + puts "Skipping fop012 for method $method" + return + } + + eval {fop006 $method 1 1} $args +} + + + diff --git a/test/tcl/fopscript.tcl b/test/tcl/fopscript.tcl new file mode 100644 index 00000000..5e54ccdb --- /dev/null +++ b/test/tcl/fopscript.tcl @@ -0,0 +1,81 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Fop006 script - test of fileops in multiple transactions +# Usage: fopscript +# omethod: access method for database +# op: file operation to perform +# end: how to end the transaction (abort or commit) +# result: expected result of the transaction +# names: name(s) of files to operate on +# childtxn: do we use child txns in this test? +# args: additional args to do_op + +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl + +set usage "fopscript operator omethod op end result names childtxn args" + +# Verify usage +if { $argc < 7 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set operator [ lindex $argv 0 ] +set omethod [ lindex $argv 1 ] +set op [ lindex $argv 2 ] +set end [ lindex $argv 3 ] +set result [ lindex $argv 4 ] +set names [ lindex $argv 5 ] +set childtxn [ lindex $argv 6 ] +set args [lindex [lrange $argv 7 end] 0] + +# Join the env +set dbenv [eval berkdb_env -home $testdir] +error_check_good envopen [is_valid_env $dbenv] TRUE + +# Start transaction +puts "\tFopscript.a: begin 2nd transaction (will block)" +set parent2 [$dbenv txn] +if { $childtxn } { + set child2 [$dbenv txn -parent $parent2] + set txn2 $child2 +} else { + set txn2 $parent2 +} +error_check_good txn2_begin [is_valid_txn $txn2 $dbenv] TRUE + +# Execute op2 +set op2result [$operator $omethod $op $names $txn2 $dbenv $args] + +# End txn2 +error_check_good txn2_end [$txn2 $end] 0 +if { $childtxn } { + error_check_good parent2_commit [$parent2 commit] 0 +} +if {$result == 0} { + error_check_good op2_should_succeed $op2result $result +} else { + set error [extract_error $op2result] + error_check_good op2_wrong_failure $error $result +} + +# Close any open db handles. We had to wait until now +# because you can't close a database inside a transaction. +set handles [berkdb handles] +foreach handle $handles { + if {[string range $handle 0 1] == "db" } { + error_check_good db_close [$handle close] 0 + } +} + +# Close the env +error_check_good dbenv_close [$dbenv close] 0 +puts "\tFopscript completed successfully" + diff --git a/test/tcl/foputils.tcl b/test/tcl/foputils.tcl new file mode 100644 index 00000000..aebf8a3f --- /dev/null +++ b/test/tcl/foputils.tcl @@ -0,0 +1,548 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +proc do_op {omethod op names txn env {largs ""}} { + switch -exact $op { + delete { do_delete $names } + rename { do_rename $names $txn $env } + remove { do_remove $names $txn $env } + noop { do_noop } + open_create { do_create $omethod $names $txn $env $largs } + open { do_open $omethod $names $txn $env $largs } + open_excl { do_create_excl $omethod $names $txn $env $largs } + truncate { do_truncate $omethod $names $txn $env $largs } + default { puts "FAIL: operation $op not recognized" } + } +} + +proc do_subdb_op {omethod op names txn env {largs ""}} { + # + # The 'noop' and 'delete' actions are the same + # for subdbs as for regular db files. + # + switch -exact $op { + delete { do_delete $names } + rename { do_subdb_rename $names $txn $env } + remove { do_subdb_remove $names $txn $env } + noop { do_noop } + default { puts "FAIL: operation $op not recognized" } + } +} + +proc do_inmem_op {omethod op names txn env {largs ""}} { + # + # The in-memory versions of do_op are different in + # that we don't need to pass in the filename, just + # the subdb names. + # + switch -exact $op { + delete { do_delete $names } + rename { do_inmem_rename $names $txn $env } + remove { do_inmem_remove $names $txn $env } + noop { do_noop } + open_create { do_inmem_create $omethod $names $txn $env $largs } + open { do_inmem_open $omethod $names $txn $env $largs } + open_excl { do_inmem_create_excl $omethod $names $txn $env $largs } + truncate { do_inmem_truncate $omethod $names $txn $env $largs } + default { puts "FAIL: operation $op not recognized" } + } +} + +proc do_delete {names} { + # + # This is the odd man out among the ops -- it's not a Berkeley + # DB file operation, but mimics an operation done externally, + # as if a user deleted a file with "rm" or "erase". + # + # We assume the file is found in $testdir. + # + global testdir + + if {[catch [fileremove -f $testdir/$names] result]} { + return $result + } else { + return 0 + } +} + +proc do_noop { } { + # Do nothing. Report success. + return 0 +} + +proc do_rename {names txn env} { + # Pull db names out of $names + set oldname [lindex $names 0] + set newname [lindex $names 1] + + if {[catch {eval $env dbrename -txn $txn \ + $oldname $newname} result]} { + return $result + } else { + return 0 + } +} + +proc do_subdb_rename {names txn env} { + # Pull db and subdb names out of $names + set filename [lindex $names 0] + set oldsname [lindex $names 1] + set newsname [lindex $names 2] + + if {[catch {eval $env dbrename -txn $txn $filename \ + $oldsname $newsname} result]} { + return $result + } else { + return 0 + } +} + +proc do_inmem_rename {names txn env} { + # Pull db and subdb names out of $names + set filename "" + set oldsname [lindex $names 0] + set newsname [lindex $names 1] + if {[catch {eval $env dbrename -txn $txn {$filename} \ + $oldsname $newsname} result]} { + return $result + } else { + return 0 + } +} + + +proc do_remove {names txn env} { + if {[catch {eval $env dbremove -txn $txn $names} result]} { + return $result + } else { + return 0 + } +} + +proc do_subdb_remove {names txn env} { + set filename [lindex $names 0] + set subname [lindex $names 1] + if {[catch {eval $env dbremove -txn $txn $filename $subname} result]} { + return $result + } else { + return 0 + } +} + +proc do_inmem_remove {names txn env} { + if {[catch {eval $env dbremove -txn $txn {""} $names} result]} { + return $result + } else { + return 0 + } +} + +proc do_create {omethod names txn env {largs ""}} { + if {[catch {eval berkdb_open -create $omethod $largs -env $env \ + -txn $txn $names} result]} { + return $result + } else { + return 0 + } +} + +proc do_inmem_create {omethod names txn env {largs ""}} { + if {[catch {eval berkdb_open -create $omethod $largs -env $env \ + -txn $txn "" $names} result]} { + return $result + } else { + return 0 + } +} + +proc do_open {omethod names txn env {largs ""}} { + if {[catch {eval berkdb_open $omethod $largs -env $env \ + -txn $txn $names} result]} { + return $result + } else { + return 0 + } +} + +proc do_inmem_open {omethod names txn env {largs ""}} { + if {[catch {eval berkdb_open $omethod $largs -env $env \ + -txn $txn {""} $names} result]} { + return $result + } else { + return 0 + } +} + +proc do_create_excl {omethod names txn env {largs ""}} { + if {[catch {eval berkdb_open -create -excl $omethod $largs -env $env \ + -txn $txn $names} result]} { + return $result + } else { + return 0 + } +} + +proc do_inmem_create_excl {omethod names txn env {largs ""}} { + if {[catch {eval berkdb_open -create -excl $omethod $largs -env $env \ + -txn $txn {""} $names} result]} { + return $result + } else { + return 0 + } +} + +proc do_truncate {omethod names txn env {largs ""}} { + # First we have to get a handle. We omit the -create flag + # because testing of truncate is meaningful only in cases + # where the database already exists. + set db [eval {berkdb_open $omethod} $largs {-env $env -txn $txn $names}] + error_check_good db_open [is_valid_db $db] TRUE + + if {[catch {$db truncate -txn $txn} result]} { + return $result + } else { + return 0 + } +} + +proc do_inmem_truncate {omethod names txn env {largs ""}} { + set db [eval {berkdb_open $omethod} $largs {-env $env -txn $txn "" $names}] + error_check_good db_open [is_valid_db $db] TRUE + + if {[catch {$db truncate -txn $txn} result]} { + return $result + } else { + return 0 + } +} + +proc create_tests { op1 op2 exists noexist open retval { end1 "" } } { + set retlist {} + set redundant {} + switch $op1 { + rename { + # Use first element from exists list + set from [lindex $exists 0] + # Use first element from noexist list + set to [lindex $noexist 0] + + # This is the first operation, which should succeed + set op1ret [list $op1 "$from $to" 0 $end1] + + # Adjust 'exists' and 'noexist' list if txn was + # not aborted. + if { $end1 != "abort" } { + set exists [lreplace $exists 0 0 $to] + set noexist [lreplace $noexist 0 0 $from] + } else { + # Eliminate the 2nd element in noexist: it is + # equivalent to the 1st (neither ever exists). + set noexist [lreplace $noexist 1 1] + set redundant [lindex $exists 1] + set exists [lreplace $exists 1 1] + } + } + remove { + set from [lindex $exists 0] + set op1ret [list $op1 $from 0 $end1] + + if { $end1 != "abort" } { + set exists [lreplace $exists 0 0] + set noexist [lreplace $noexist 0 0 $from] + } else { + set redundant [lindex $exists 1] + set exists [lreplace $exists 1 1] + set noexist [lreplace $noexist 1 1] + } + } + open_create - + open - + truncate { + set from [lindex $exists 0] + set op1ret [list $op1 $from 0 $end1] + + if { $end1 != "abort" } { + set exists [lreplace $exists 0 0] + set open [list $from] + } else { + set redundant [lindex $exists 1] + set exists [lreplace $exists 1 1] + } + + # Eliminate the 2nd element in noexist: it is + # equivalent to the 1st (neither ever exists). + set noexist [lreplace $noexist 1 1] + } + open_excl { + # Use first element from noexist list + set from [lindex $noexist 0] + set op1ret [list $op1 $from 0 $end1] + + if { $end1 != "abort" } { + set noexist [lreplace $noexist 0 0] + set open [list $from] + } else { + set noexist [lreplace $noexist 1 1] + } + # It would be redundant to test both elements + # on the 'exists' list, but we do have to + # keep track of both. + set redundant [lindex $exists 1] + set exists [lreplace $exists 1 1] + } + } + + # Generate possible second operations given the return value. + set op2list [create_op2 $op2 $exists $noexist $open $redundant $retval] + + foreach o $op2list { + lappend retlist [list $op1ret $o] + } + return $retlist +} + +proc create_badtests { op1 op2 exists noexist open retval {end1 ""} } { + set retlist {} + switch $op1 { + rename { + # Use first element from exists list + set from [lindex $exists 0] + # Use first element from noexist list + set to [lindex $noexist 0] + + # This is the first operation, which should fail + set op1list1 \ + [list $op1 "$to $to" "no such file" $end1] + set op1list2 \ + [list $op1 "$to $from" "no such file" $end1] + set op1list3 \ + [list $op1 "$from $from" "file exists" $end1] + set op1list [list $op1list1 $op1list2 $op1list3] + + # Since the op failed, trim the 'exists' and 'noexist' + # lists to a single case each. It is redundant + # to test both elements on the 'exists' list, but + # keep track of the fact that both still exist. + set noexist [lreplace $noexist 1 1] + set redundant [lindex $exists 1] + set exists [lreplace $exists 1 1] + + # Generate second operations given the return value. + set op2list [create_op2 \ + $op2 $exists $noexist $open $redundant $retval] + foreach op1 $op1list { + foreach op2 $op2list { + lappend retlist [list $op1 $op2] + } + } + return $retlist + } + remove - + open - + truncate { + set file [lindex $noexist 0] + set op1list [list $op1 $file "no such file" $end1] + + set noexist [lreplace $noexist 1 1] + set redundant [lindex $exists 1] + set exists [lreplace $exists 1 1] + + set op2list [create_op2 \ + $op2 $exists $noexist $open $redundant $retval] + foreach op2 $op2list { + lappend retlist [list $op1list $op2] + } + return $retlist + } + open_excl { + set file [lindex $exists 0] + set op1list [list $op1 $file "file exists" $end1] + + set noexist [lreplace $noexist 1 1] + set redundant [lindex $exists 1] + set exists [lreplace $exists 1 1] + + set op2list [create_op2 \ + $op2 $exists $noexist $open $redundant $retval] + foreach op2 $op2list { + lappend retlist [list $op1list $op2] + } + return $retlist + } + } +} + +proc create_op2 { op2 exists noexist open redundant retval } { + set retlist {} + set existing [concat $exists $open $redundant] + switch $op2 { + rename { + # Successful renames arise from renaming existing + # to non-existing files. + if { $retval == 0 } { + set old $exists + set new $noexist + set retlist \ + [build_retlist $op2 $old $new $retval $existing] + } + # "File exists" errors arise from renaming existing + # to existing files. + if { $retval == "file exists" } { + set old $exists + set new $exists + set retlist \ + [build_retlist $op2 $old $new $retval $existing] + } + # "No such file" errors arise from renaming files + # that don't exist. + if { $retval == "no such file" } { + set old $noexist + set new $exists + set retlist1 \ + [build_retlist $op2 $old $new $retval $existing] + + set old $noexist + set new $noexist + set retlist2 \ + [build_retlist $op2 $old $new $retval $existing] + + set retlist [concat $retlist1 $retlist2] + } + } + remove { + # Successful removes result from removing existing + # files. + if { $retval == 0 } { + set file $exists + } + # "File exists" does not happen in remove. + if { $retval == "file exists" } { + return + } + # "No such file" errors arise from trying to remove + # files that don't exist. + if { $retval == "no such file" } { + set file $noexist + } + set retlist [build_retlist $op2 $file "" $retval $existing] + } + open_create { + # Open_create should be successful with existing, + # open, or non-existing files. + if { $retval == 0 } { + set file [concat $exists $open $noexist] + } + # "File exists" and "no such file" + # do not happen in open_create. + if { $retval == "file exists" || \ + $retval == "no such file" } { + return + } + set retlist [build_retlist $op2 $file "" $retval $existing] + } + open { + # Open should be successful with existing or open files. + if { $retval == 0 } { + set file [concat $exists $open] + } + # "No such file" errors arise from trying to open + # non-existent files. + if { $retval == "no such file" } { + set file $noexist + } + # "File exists" errors do not happen in open. + if { $retval == "file exists" } { + return + } + set retlist [build_retlist $op2 $file "" $retval $existing] + } + open_excl { + # Open_excl should be successful with non-existent files. + if { $retval == 0 } { + set file $noexist + } + # "File exists" errors arise from trying to open + # existing files. + if { $retval == "file exists" } { + set file [concat $exists $open] + } + # "No such file" errors do not arise in open_excl. + if { $retval == "no such file" } { + return + } + set retlist [build_retlist $op2 $file "" $retval $existing] + } + truncate { + # Truncate should be successful with existing files. + if { $retval == 0 } { + set file $exists + } + # No other return values are meaningful to test since + # do_truncate starts with an open and we've already + # tested open. + if { $retval == "no such file" || \ + $retval == "file exists" } { + return + } + set retlist [build_retlist $op2 $file "" $retval $existing] + } + } + return $retlist +} + +proc build_retlist { op2 file1 file2 retval existing } { + set retlist {} + + if { $file2 == "" } { + foreach f1 $file1 { + # If we're expecting a successful operation, we have + # adjust the list of files that remain after the op + # in certain cases. + set remaining $existing + if { $retval == 0 } { + switch $op2 { + remove { + set idx [lsearch -exact $remaining $f1] + set remaining \ + [lreplace $remaining $idx $idx] + } + open_create - + open - + open_excl { + set idx [lsearch -exact $remaining $f1] + if { $idx == -1 } { + set remaining \ + [lappend remaining $f1] + } + } + } + } + lappend retlist [list $op2 $f1 $retval $remaining] + } + } else { + foreach f1 $file1 { + foreach f2 $file2 { + set remaining $existing + if { $op2 == "rename" && $retval == 0 } { + set idx [lsearch -exact $remaining $f1] + set remaining [lreplace $remaining $idx $idx] + set remaining [lappend remaining $f2] + } + lappend retlist [list $op2 "$f1 $f2" $retval $remaining] + } + } + } + return $retlist +} + +proc extract_error { message } { + if { [is_substr $message "exists"] == 1 } { + set message "file exists" + } elseif {[is_substr $message "no such file"] == 1 } { + set message "no such file" + } + return $message +} diff --git a/test/tcl/hsearch.tcl b/test/tcl/hsearch.tcl new file mode 100644 index 00000000..47dfea8c --- /dev/null +++ b/test/tcl/hsearch.tcl @@ -0,0 +1,50 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Historic Hsearch interface test. +# Use the first 1000 entries from the dictionary. +# Insert each with self as key and data; retrieve each. +# After all are entered, retrieve all; compare output to original. +# Then reopen the file, re-retrieve everything. +# Finally, delete everything. +proc hsearch { { nentries 1000 } } { + source ./include.tcl + + puts "HSEARCH interfaces test: $nentries" + + # Create the database and open the dictionary + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir NULL + + error_check_good hcreate [berkdb hcreate $nentries] 0 + set did [open $dict] + set count 0 + + puts "\tHSEARCH.a: put/get loop" + # Here is the loop where we put and get each key/data pair + while { [gets $did str] != -1 && $count < $nentries } { + set ret [berkdb hsearch $str $str enter] + error_check_good hsearch:enter $ret 0 + + set d [berkdb hsearch $str 0 find] + error_check_good hsearch:find $d $str + incr count + } + close $did + + puts "\tHSEARCH.b: re-get loop" + set did [open $dict] + # Here is the loop where we retrieve each key + while { [gets $did str] != -1 && $count < $nentries } { + set d [berkdb hsearch $str 0 find] + error_check_good hsearch:find $d $str + incr count + } + close $did + error_check_good hdestroy [berkdb hdestroy] 0 +} diff --git a/test/tcl/include.tcl b/test/tcl/include.tcl new file mode 100644 index 00000000..03b8a37e --- /dev/null +++ b/test/tcl/include.tcl @@ -0,0 +1,29 @@ +# Automatically built by dist/s_test; may require local editing. + +set tclsh_path @TCL_TCLSH@ +set tcllib .libs/libdb_tcl-@DB_VERSION_MAJOR@.@DB_VERSION_MINOR@@LIBTSO_MODSUFFIX@ + +set src_root @srcdir@/.. +set test_path @srcdir@/../test/tcl +set je_root @srcdir@/../../je + +global testdir +set testdir ./TESTDIR + +global dict +global util_path + +global is_freebsd_test +global is_hp_test +global is_linux_test +global is_osx_test +global is_qnx_test +global is_sunos_test +global is_windows_test +global is_windows9x_test + +global valid_methods +global checking_valid_methods +global test_recopts + +set KILL "@KILL@" diff --git a/test/tcl/join.tcl b/test/tcl/join.tcl new file mode 100644 index 00000000..a8e6c5a3 --- /dev/null +++ b/test/tcl/join.tcl @@ -0,0 +1,454 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST jointest +# TEST Test duplicate assisted joins. Executes 1, 2, 3 and 4-way joins +# TEST with differing index orders and selectivity. +# TEST +# TEST We'll test 2-way, 3-way, and 4-way joins and figure that if those +# TEST work, everything else does as well. We'll create test databases +# TEST called join1.db, join2.db, join3.db, and join4.db. The number on +# TEST the database describes the duplication -- duplicates are of the +# TEST form 0, N, 2N, 3N, ... where N is the number of the database. +# TEST Primary.db is the primary database, and null.db is the database +# TEST that has no matching duplicates. +# TEST +# TEST We should test this on all btrees, all hash, and a combination thereof +proc jointest { {psize 8192} {with_dup_dups 0} {flags 0} } { + global testdir + global rand_init + source ./include.tcl + + env_cleanup $testdir + berkdb srand $rand_init + + # Use one environment for all database opens so we don't + # need oodles of regions. + set env [berkdb_env -create -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + + # With the new offpage duplicate code, we don't support + # duplicate duplicates in sorted dup sets. Thus, if with_dup_dups + # is greater than one, run only with "-dup". + if { $with_dup_dups > 1 } { + set doptarray {"-dup"} + } else { + set doptarray {"-dup -dupsort" "-dup" RANDOMMIX RANDOMMIX } + } + + # NB: these flags are internal only, ok + foreach m "DB_BTREE DB_HASH DB_BOTH" { + # run with two different random mixes. + foreach dopt $doptarray { + set opt [list "-env" $env $dopt] + + puts "Join test: ($m $dopt) psize $psize,\ + $with_dup_dups dup\ + dups, flags $flags." + + build_all $m $psize $opt oa $with_dup_dups + + # null.db is db_built fifth but is referenced by + # zero; set up the option array appropriately. + set oa(0) $oa(5) + + # Build the primary + puts "\tBuilding the primary database $m" + set oflags "-create -truncate -mode 0644 -env $env\ + [conv $m [berkdb random_int 1 2]]" + set db [eval {berkdb_open} $oflags primary.db] + error_check_good dbopen [is_valid_db $db] TRUE + for { set i 0 } { $i < 1000 } { incr i } { + set key [format "%04d" $i] + set ret [$db put $key stub] + error_check_good "primary put" $ret 0 + } + error_check_good "primary close" [$db close] 0 + set did [open $dict] + gets $did str + do_join primary.db "1 0" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "2 0" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "3 0" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "4 0" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "1" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "2" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "3" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "4" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "1 2" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "1 2 3" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "1 2 3 4" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "2 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "3 2 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "4 3 2 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "1 3" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "3 1" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "1 4" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "4 1" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "2 3" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "3 2" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "2 4" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "4 2" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "3 4" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "4 3" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "2 3 4" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "3 4 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "4 2 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "0 2 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "3 2 0" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "4 3 2 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "4 3 0 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "3 3 3" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "2 2 3 3" $str oa $flags\ + $with_dup_dups + gets $did str2 + gets $did str + do_join primary.db "1 2" $str oa $flags\ + $with_dup_dups "3" $str2 + + # You really don't want to run this section + # with $with_dup_dups > 2. + if { $with_dup_dups <= 2 } { + gets $did str2 + gets $did str + do_join primary.db "1 2 3" $str\ + oa $flags $with_dup_dups "3 3 1" $str2 + gets $did str2 + gets $did str + do_join primary.db "4 0 2" $str\ + oa $flags $with_dup_dups "4 3 3" $str2 + gets $did str2 + gets $did str + do_join primary.db "3 2 1" $str\ + oa $flags $with_dup_dups "0 2" $str2 + gets $did str2 + gets $did str + do_join primary.db "2 2 3 3" $str\ + oa $flags $with_dup_dups "1 4 4" $str2 + gets $did str2 + gets $did str + do_join primary.db "2 2 3 3" $str\ + oa $flags $with_dup_dups "0 0 4 4" $str2 + gets $did str2 + gets $did str + do_join primary.db "2 2 3 3" $str2\ + oa $flags $with_dup_dups "2 4 4" $str + gets $did str2 + gets $did str + do_join primary.db "2 2 3 3" $str2\ + oa $flags $with_dup_dups "0 0 4 4" $str + } + close $did + } + } + + error_check_good env_close [$env close] 0 +} + +proc build_all { method psize opt oaname with_dup_dups {nentries 100} } { + global testdir + db_build join1.db $nentries 50 1 [conv $method 1]\ + $psize $opt $oaname $with_dup_dups + db_build join2.db $nentries 25 2 [conv $method 2]\ + $psize $opt $oaname $with_dup_dups + db_build join3.db $nentries 16 3 [conv $method 3]\ + $psize $opt $oaname $with_dup_dups + db_build join4.db $nentries 12 4 [conv $method 4]\ + $psize $opt $oaname $with_dup_dups + db_build null.db $nentries 0 5 [conv $method 5]\ + $psize $opt $oaname $with_dup_dups +} + +proc conv { m i } { + switch -- $m { + DB_HASH { return "-hash"} + "-hash" { return "-hash"} + DB_BTREE { return "-btree"} + "-btree" { return "-btree"} + DB_BOTH { + if { [expr $i % 2] == 0 } { + return "-hash"; + } else { + return "-btree"; + } + } + } +} + +proc random_opts { } { + set j [berkdb random_int 0 1] + if { $j == 0 } { + return " -dup" + } else { + return " -dup -dupsort" + } +} + +proc db_build { name nkeys ndups dup_interval method psize lopt oaname \ + with_dup_dups } { + source ./include.tcl + + # Get array of arg names (from two levels up the call stack) + upvar 2 $oaname oa + + # Search for "RANDOMMIX" in $opt, and if present, replace + # with " -dup" or " -dup -dupsort" at random. + set i [lsearch $lopt RANDOMMIX] + if { $i != -1 } { + set lopt [lreplace $lopt $i $i [random_opts]] + } + + # Save off db_open arguments for this database. + set opt [eval concat $lopt] + set oa($dup_interval) $opt + + # Create the database and open the dictionary + set oflags "-create -truncate -mode 0644 $method\ + -pagesize $psize" + set db [eval {berkdb_open} $oflags $opt $name] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + set count 0 + puts -nonewline "\tBuilding $name: $nkeys keys " + puts -nonewline "with $ndups duplicates at interval of $dup_interval" + if { $with_dup_dups > 0 } { + puts "" + puts "\t\tand $with_dup_dups duplicate duplicates." + } else { + puts "." + } + for { set count 0 } { [gets $did str] != -1 && $count < $nkeys } { + incr count} { + set str $str$name + # We need to make sure that the dups are inserted in a + # random, or near random, order. Do this by generating + # them and putting each in a list, then sorting the list + # at random. + set duplist {} + for { set i 0 } { $i < $ndups } { incr i } { + set data [format "%04d" [expr $i * $dup_interval]] + lappend duplist $data + } + # randomize the list + for { set i 0 } { $i < $ndups } {incr i } { + # set j [berkdb random_int $i [expr $ndups - 1]] + set j [expr ($i % 2) + $i] + if { $j >= $ndups } { set j $i } + set dupi [lindex $duplist $i] + set dupj [lindex $duplist $j] + set duplist [lreplace $duplist $i $i $dupj] + set duplist [lreplace $duplist $j $j $dupi] + } + foreach data $duplist { + if { $with_dup_dups != 0 } { + for { set j 0 }\ + { $j < $with_dup_dups }\ + {incr j} { + set ret [$db put $str $data] + error_check_good put$j $ret 0 + } + } else { + set ret [$db put $str $data] + error_check_good put $ret 0 + } + } + + if { $ndups == 0 } { + set ret [$db put $str NODUP] + error_check_good put $ret 0 + } + } + close $did + error_check_good close:$name [$db close] 0 +} + +proc do_join { primary dbs key oanm flags with_dup_dups {dbs2 ""} {key2 ""} } { + global testdir + source ./include.tcl + + upvar $oanm oa + + puts -nonewline "\tJoining: $dbs on $key" + if { $dbs2 == "" } { + puts "" + } else { + puts " with $dbs2 on $key2" + } + + # Open all the databases + set p [berkdb_open -unknown $testdir/$primary] + error_check_good "primary open" [is_valid_db $p] TRUE + + set dblist "" + set curslist "" + + set ndx [llength $dbs] + + foreach i [concat $dbs $dbs2] { + set opt $oa($i) + set db [eval {berkdb_open -unknown} $opt [n_to_name $i]] + error_check_good "[n_to_name $i] open" [is_valid_db $db] TRUE + set curs [$db cursor] + error_check_good "$db cursor" \ + [is_substr $curs "$db.c"] 1 + lappend dblist $db + lappend curslist $curs + + if { $ndx > 0 } { + set realkey [concat $key[n_to_name $i]] + } else { + set realkey [concat $key2[n_to_name $i]] + } + + set pair [$curs get -set $realkey] + error_check_good cursor_set:$realkey:$pair \ + [llength [lindex $pair 0]] 2 + + incr ndx -1 + } + + set join_curs [eval {$p join} $curslist] + error_check_good join_cursor \ + [is_substr $join_curs "$p.c"] 1 + + # Calculate how many dups we expect. + # We go through the list of indices. If we find a 0, then we + # expect 0 dups. For everything else, we look at pairs of numbers, + # if the are relatively prime, multiply them and figure out how + # many times that goes into 50. If they aren't relatively prime, + # take the number of times the larger goes into 50. + set expected 50 + set last 1 + foreach n [concat $dbs $dbs2] { + if { $n == 0 } { + set expected 0 + break + } + if { $last == $n } { + continue + } + + if { [expr $last % $n] == 0 || [expr $n % $last] == 0 } { + if { $n > $last } { + set last $n + set expected [expr 50 / $last] + } + } else { + set last [expr $n * $last / [gcd $n $last]] + set expected [expr 50 / $last] + } + } + + # If $with_dup_dups is greater than zero, each datum has + # been inserted $with_dup_dups times. So we expect the number + # of dups to go up by a factor of ($with_dup_dups)^(number of databases) + + if { $with_dup_dups > 0 } { + foreach n [concat $dbs $dbs2] { + set expected [expr $expected * $with_dup_dups] + } + } + + set ndups 0 + if { $flags == " -join_item"} { + set l 1 + } else { + set flags "" + set l 2 + } + for { set pair [eval {$join_curs get} $flags] } { \ + [llength [lindex $pair 0]] == $l } { + set pair [eval {$join_curs get} $flags] } { + set k [lindex [lindex $pair 0] 0] + foreach i $dbs { + error_check_bad valid_dup:$i:$dbs $i 0 + set kval [string trimleft $k 0] + if { [string length $kval] == 0 } { + set kval 0 + } + error_check_good valid_dup:$i:$dbs [expr $kval % $i] 0 + } + incr ndups + } + error_check_good number_of_dups:$dbs $ndups $expected + + error_check_good close_primary [$p close] 0 + foreach i $curslist { + error_check_good close_cursor:$i [$i close] 0 + } + foreach i $dblist { + error_check_good close_index:$i [$i close] 0 + } +} + +proc n_to_name { n } { +global testdir + if { $n == 0 } { + return null.db; + } else { + return join$n.db; + } +} + +proc gcd { a b } { + set g 1 + + for { set i 2 } { $i <= $a } { incr i } { + if { [expr $a % $i] == 0 && [expr $b % $i] == 0 } { + set g $i + } + } + return $g +} diff --git a/test/tcl/lock001.tcl b/test/tcl/lock001.tcl new file mode 100644 index 00000000..6186c482 --- /dev/null +++ b/test/tcl/lock001.tcl @@ -0,0 +1,121 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# + +# TEST lock001 +# TEST Make sure that the basic lock tests work. Do some simple gets +# TEST and puts for a single locker. +proc lock001 { {iterations 1000} } { + source ./include.tcl + global lock_curid + global lock_maxid + + set save_curid $lock_curid + set save_maxid $lock_maxid + + # Set defaults + # Adjusted to make exact match of isqrt + #set conflicts { 3 0 0 0 0 0 1 0 1 1} + #set conflicts { 3 0 0 0 0 1 0 1 1} + + set conflicts { 0 0 0 0 0 1 0 1 1} + set nmodes [isqrt [llength $conflicts]] + + # Cleanup + env_cleanup $testdir + + # Open the region we'll use for testing. + set eflags "-create -lock -home $testdir -mode 0644 \ + -lock_conflict {$nmodes {$conflicts}}" + set env [eval {berkdb_env} $eflags] + error_check_good env [is_valid_env $env] TRUE + error_check_good lock_id_set \ + [$env lock_id_set $lock_curid $lock_maxid] 0 + + puts "Lock001: test basic lock operations" + set locker [$env lock_id] + # Get and release each type of lock + puts "\tLock001.a: get and release each type of lock" + foreach m {ng write read} { + set obj obj$m + set lockp [$env lock_get $m $locker $obj] + error_check_good lock_get:a [is_blocked $lockp] 0 + error_check_good lock_get:a [is_substr $lockp $env] 1 + set ret [ $lockp put ] + error_check_good lock_put $ret 0 + } + + # Get a bunch of locks for the same locker; these should work + set obj OBJECT + puts "\tLock001.b: Get a bunch of locks for the same locker" + foreach m {ng write read} { + set lockp [$env lock_get $m $locker $obj ] + lappend locklist $lockp + error_check_good lock_get:b [is_blocked $lockp] 0 + error_check_good lock_get:b [is_substr $lockp $env] 1 + } + release_list $locklist + + set locklist {} + # Check that reference counted locks work + puts "\tLock001.c: reference counted locks." + for {set i 0} { $i < 10 } {incr i} { + set lockp [$env lock_get -nowait write $locker $obj] + error_check_good lock_get:c [is_blocked $lockp] 0 + error_check_good lock_get:c [is_substr $lockp $env] 1 + lappend locklist $lockp + } + release_list $locklist + + # Finally try some failing locks + set locklist {} + foreach i {ng write read} { + set lockp [$env lock_get $i $locker $obj] + lappend locklist $lockp + error_check_good lock_get:d [is_blocked $lockp] 0 + error_check_good lock_get:d [is_substr $lockp $env] 1 + } + + # Change the locker + set locker [$env lock_id] + set blocklist {} + # Skip NO_LOCK lock. + puts "\tLock001.d: Change the locker, acquire read and write." + foreach i {write read} { + catch {$env lock_get -nowait $i $locker $obj} ret + error_check_good lock_get:e [is_substr $ret "not granted"] 1 + #error_check_good lock_get:e [is_substr $lockp $env] 1 + #error_check_good lock_get:e [is_blocked $lockp] 0 + } + # Now release original locks + release_list $locklist + + # Now re-acquire blocking locks + set locklist {} + puts "\tLock001.e: Re-acquire blocking locks." + foreach i {write read} { + set lockp [$env lock_get -nowait $i $locker $obj ] + error_check_good lock_get:f [is_substr $lockp $env] 1 + error_check_good lock_get:f [is_blocked $lockp] 0 + lappend locklist $lockp + } + + # Now release new locks + release_list $locklist + error_check_good free_id [$env lock_id_free $locker] 0 + + error_check_good envclose [$env close] 0 + +} + +# Blocked locks appear as lockmgrN.lockM\nBLOCKED +proc is_blocked { l } { + if { [string compare $l BLOCKED ] == 0 } { + return 1 + } else { + return 0 + } +} diff --git a/test/tcl/lock002.tcl b/test/tcl/lock002.tcl new file mode 100644 index 00000000..0480e54b --- /dev/null +++ b/test/tcl/lock002.tcl @@ -0,0 +1,154 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST lock002 +# TEST Exercise basic multi-process aspects of lock. +proc lock002 { {conflicts {0 0 0 0 0 1 0 1 1} } } { + source ./include.tcl + + puts "Lock002: Basic multi-process lock tests." + + env_cleanup $testdir + + set nmodes [isqrt [llength $conflicts]] + + # Open the lock + mlock_open $nmodes $conflicts + mlock_wait +} + +# Make sure that we can create a region; destroy it, attach to it, +# detach from it, etc. +proc mlock_open { nmodes conflicts } { + source ./include.tcl + global lock_curid + global lock_maxid + + puts "\tLock002.a multi-process open/close test" + + # Open/Create region here. Then close it and try to open from + # other test process. + set env_cmd [concat "berkdb_env -create -mode 0644 -lock \ + -lock_conflict" [list [list $nmodes $conflicts]] "-home $testdir"] + set local_env [eval $env_cmd] + $local_env lock_id_set $lock_curid $lock_maxid + error_check_good env_open [is_valid_env $local_env] TRUE + + set ret [$local_env close] + error_check_good env_close $ret 0 + + # Open from other test process + set env_cmd "berkdb_env -mode 0644 -home $testdir" + + set f1 [open |$tclsh_path r+] + puts $f1 "source $test_path/test.tcl" + + set remote_env [send_cmd $f1 $env_cmd] + error_check_good remote:env_open [is_valid_env $remote_env] TRUE + + # Now make sure that we can reopen the region. + set local_env [eval $env_cmd] + error_check_good env_open [is_valid_env $local_env] TRUE + set ret [$local_env close] + error_check_good env_close $ret 0 + + # Try closing the remote region + set ret [send_cmd $f1 "$remote_env close"] + error_check_good remote:lock_close $ret 0 + + # Try opening for create. Will succeed because region exists. + set env_cmd [concat "berkdb_env -create -mode 0644 -lock \ + -lock_conflict" [list [list $nmodes $conflicts]] "-home $testdir"] + set local_env [eval $env_cmd] + error_check_good remote:env_open [is_valid_env $local_env] TRUE + + # close locally + reset_env $local_env + + # Close and exit remote + set ret [send_cmd $f1 "reset_env $remote_env"] + + catch { close $f1 } result +} + +proc mlock_wait { } { + source ./include.tcl + + puts "\tLock002.b multi-process get/put wait test" + + # Open region locally + set env_cmd "berkdb_env -home $testdir" + set local_env [eval $env_cmd] + error_check_good env_open [is_valid_env $local_env] TRUE + + # Open region remotely + set f1 [open |$tclsh_path r+] + + puts $f1 "source $test_path/test.tcl" + + set remote_env [send_cmd $f1 $env_cmd] + error_check_good remote:env_open [is_valid_env $remote_env] TRUE + + # Get a write lock locally; try for the read lock + # remotely. We hold the locks for several seconds + # so that we can use timestamps to figure out if the + # other process waited. + set locker1 [$local_env lock_id] + set local_lock [$local_env lock_get write $locker1 object1] + error_check_good lock_get [is_valid_lock $local_lock $local_env] TRUE + + # Now request a lock that we expect to hang; generate + # timestamps so we can tell if it actually hangs. + set locker2 [send_cmd $f1 "$remote_env lock_id"] + set remote_lock [send_timed_cmd $f1 1 \ + "set lock \[$remote_env lock_get write $locker2 object1\]"] + + # Now sleep before releasing lock + tclsleep 5 + set result [$local_lock put] + error_check_good lock_put $result 0 + + # Now get the result from the other script + set result [rcv_result $f1] + error_check_good lock_get:remote_time [expr $result > 4] 1 + + # Now get the remote lock + set remote_lock [send_cmd $f1 "puts \$lock"] + error_check_good remote:lock_get \ + [is_valid_lock $remote_lock $remote_env] TRUE + + # Now make the other guy wait 5 seconds and then release his + # lock while we try to get a write lock on it. + set start [timestamp -r] + + set ret [send_cmd $f1 "tclsleep 5"] + + set ret [send_cmd $f1 "$remote_lock put"] + + set local_lock [$local_env lock_get write $locker1 object1] + error_check_good lock_get:time \ + [expr [expr [timestamp -r] - $start] > 2] 1 + error_check_good lock_get:local \ + [is_valid_lock $local_lock $local_env] TRUE + + # Now check remote's result + set result [rcv_result $f1] + error_check_good lock_put:remote $result 0 + + # Clean up remote + set result [send_cmd $f1 "$remote_env lock_id_free $locker2" ] + error_check_good remote_free_id $result 0 + set ret [send_cmd $f1 "reset_env $remote_env"] + + close $f1 + + # Now close up locally + set ret [$local_lock put] + error_check_good lock_put $ret 0 + error_check_good lock_id_free [$local_env lock_id_free $locker1] 0 + + reset_env $local_env +} diff --git a/test/tcl/lock003.tcl b/test/tcl/lock003.tcl new file mode 100644 index 00000000..2bc4df13 --- /dev/null +++ b/test/tcl/lock003.tcl @@ -0,0 +1,100 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST lock003 +# TEST Exercise multi-process aspects of lock. Generate a bunch of parallel +# TEST testers that try to randomly obtain locks; make sure that the locks +# TEST correctly protect corresponding objects. +proc lock003 { {iter 500} {max 1000} {procs 5} } { + source ./include.tcl + global lock_curid + global lock_maxid + + set ldegree 5 + set objs 75 + set reads 65 + set wait 1 + set conflicts { 0 0 0 0 0 1 0 1 1} + set seeds {} + + puts "Lock003: Multi-process random lock test" + + # Clean up after previous runs + env_cleanup $testdir + + # Open/create the lock region + puts "\tLock003.a: Create environment" + set e [berkdb_env -create -lock -home $testdir] + error_check_good env_open [is_substr $e env] 1 + $e lock_id_set $lock_curid $lock_maxid + + error_check_good env_close [$e close] 0 + + # Now spawn off processes + set pidlist {} + + for { set i 0 } {$i < $procs} {incr i} { + if { [llength $seeds] == $procs } { + set s [lindex $seeds $i] + } +# puts "$tclsh_path\ +# $test_path/wrap.tcl \ +# lockscript.tcl $testdir/$i.lockout\ +# $testdir $iter $objs $wait $ldegree $reads &" + set p [exec $tclsh_path $test_path/wrap.tcl \ + lockscript.tcl $testdir/lock003.$i.out \ + $testdir $iter $objs $wait $ldegree $reads &] + lappend pidlist $p + } + + puts "\tLock003.b: $procs independent processes now running" + watch_procs $pidlist 30 10800 + + # Check for test failure + set errstrings [eval findfail [glob $testdir/lock003.*.out]] + foreach str $errstrings { + puts "FAIL: error message in .out file: $str" + } + + # Remove log files + for { set i 0 } {$i < $procs} {incr i} { + fileremove -f $testdir/lock003.$i.out + } +} + +# Create and destroy flag files to show we have an object locked, and +# verify that the correct files exist or don't exist given that we've +# just read or write locked a file. +proc lock003_create { rw obj } { + source ./include.tcl + + set pref $testdir/L3FLAG + set f [open $pref.$rw.[pid].$obj w] + close $f +} + +proc lock003_destroy { obj } { + source ./include.tcl + + set pref $testdir/L3FLAG + set f [glob -nocomplain $pref.*.[pid].$obj] + error_check_good l3_destroy [llength $f] 1 + fileremove $f +} + +proc lock003_vrfy { rw obj } { + source ./include.tcl + + set pref $testdir/L3FLAG + if { [string compare $rw "write"] == 0 } { + set fs [glob -nocomplain $pref.*.*.$obj] + error_check_good "number of other locks on $obj" [llength $fs] 0 + } else { + set fs [glob -nocomplain $pref.write.*.$obj] + error_check_good "number of write locks on $obj" [llength $fs] 0 + } +} + diff --git a/test/tcl/lock004.tcl b/test/tcl/lock004.tcl new file mode 100644 index 00000000..6e832bab --- /dev/null +++ b/test/tcl/lock004.tcl @@ -0,0 +1,28 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST lock004 +# TEST Test locker ids wraping around. + +proc lock004 {} { + source ./include.tcl + global lock_curid + global lock_maxid + + set save_curid $lock_curid + set save_maxid $lock_maxid + + set lock_curid [expr $lock_maxid - 1] + puts "Lock004: Locker id wraparound test" + puts "\tLock004.a: repeat lock001-lock003 with wraparound lockids" + + lock001 + lock002 + lock003 + + set lock_curid $save_curid + set lock_maxid $save_maxid +} diff --git a/test/tcl/lock005.tcl b/test/tcl/lock005.tcl new file mode 100644 index 00000000..247e6228 --- /dev/null +++ b/test/tcl/lock005.tcl @@ -0,0 +1,176 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST lock005 +# TEST Check that page locks are being released properly. + +proc lock005 { } { + source ./include.tcl + + puts "Lock005: Page lock release test" + + # Clean up after previous runs + env_cleanup $testdir + + # Open/create the lock region + set e [berkdb_env -create -lock -home $testdir -txn -log] + error_check_good env_open [is_valid_env $e] TRUE + + # Open/create the database + set db [berkdb open -create -auto_commit -env $e -len 10 -queue q.db] + error_check_good dbopen [is_valid_db $db] TRUE + + # Check that records are locking by trying to + # fetch a record on the wrong transaction. + puts "\tLock005.a: Verify that we are locking" + + # Start the first transaction + set txn1 [$e txn -nowait] + error_check_good txn_begin [is_valid_txn $txn1 $e] TRUE + set ret [catch {$db put -txn $txn1 -append record1} recno1] + error_check_good dbput_txn1 $ret 0 + + # Start second txn while the first is still running ... + set txn2 [$e txn -nowait] + error_check_good txn_begin [is_valid_txn $txn2 $e] TRUE + + # ... and try to get a record from the first txn (should fail) + set ret [catch {$db get -txn $txn2 $recno1} res] + error_check_good dbget_wrong_record \ + [is_substr $res "deadlock"] 1 + + # End transactions + error_check_good txn1commit [$txn1 commit] 0 + how_many_locks 1 $e + error_check_good txn2commit [$txn2 commit] 0 + # The number of locks stays the same here because the first + # lock is released and the second lock was never granted. + how_many_locks 1 $e + + # Test lock behavior for both abort and commit + puts "\tLock005.b: Verify locks after abort or commit" + foreach endorder {forward reverse} { + end_order_test $db $e commit abort $endorder + end_order_test $db $e abort commit $endorder + end_order_test $db $e commit commit $endorder + end_order_test $db $e abort abort $endorder + } + + # Clean up + error_check_good db_close [$db close] 0 + error_check_good env_close [$e close] 0 +} + +proc end_order_test { db e txn1end txn2end endorder } { + # Start one transaction + set txn1 [$e txn -nowait] + error_check_good txn_begin [is_valid_txn $txn1 $e] TRUE + set ret [catch {$db put -txn $txn1 -append record1} recno1] + error_check_good dbput_txn1 $ret 0 + + # Check number of locks + how_many_locks 2 $e + + # Start a second transaction while first is still running + set txn2 [$e txn -nowait] + error_check_good txn_begin [is_valid_txn $txn2 $e] TRUE + set ret [catch {$db put -txn $txn2 -append record2} recno2] + error_check_good dbput_txn2 $ret 0 + how_many_locks 3 $e + + # Now commit or abort one txn and make sure the other is okay + if {$endorder == "forward"} { + # End transaction 1 first + puts "\tLock005.b.1: $txn1end txn1 then $txn2end txn2" + error_check_good txn_$txn1end [$txn1 $txn1end] 0 + how_many_locks 2 $e + + # txn1 is now ended, but txn2 is still running + set ret1 [catch {$db get -txn $txn2 $recno1} res1] + set ret2 [catch {$db get -txn $txn2 $recno2} res2] + if { $txn1end == "commit" } { + error_check_good txn2_sees_txn1 $ret1 0 + error_check_good txn2_sees_txn2 $ret2 0 + } else { + # transaction 1 was aborted + error_check_good txn2_cantsee_txn1 [llength $res1] 0 + } + + # End transaction 2 second + error_check_good txn_$txn2end [$txn2 $txn2end] 0 + how_many_locks 1 $e + + # txn1 and txn2 should both now be invalid + # The get no longer needs to be transactional + set ret3 [catch {$db get $recno1} res3] + set ret4 [catch {$db get $recno2} res4] + + if { $txn2end == "commit" } { + error_check_good txn2_sees_txn1 $ret3 0 + error_check_good txn2_sees_txn2 $ret4 0 + error_check_good txn2_has_record2 \ + [is_substr $res4 "record2"] 1 + } else { + # transaction 2 was aborted + error_check_good txn2_cantsee_txn1 $ret3 0 + error_check_good txn2_aborted [llength $res4] 0 + } + + } elseif { $endorder == "reverse" } { + # End transaction 2 first + puts "\tLock005.b.2: $txn2end txn2 then $txn1end txn1" + error_check_good txn_$txn2end [$txn2 $txn2end] 0 + how_many_locks 2 $e + + # txn2 is ended, but txn1 is still running + set ret1 [catch {$db get -txn $txn1 $recno1} res1] + set ret2 [catch {$db get -txn $txn1 $recno2} res2] + if { $txn2end == "commit" } { + error_check_good txn1_sees_txn1 $ret1 0 + error_check_good txn1_sees_txn2 $ret2 0 + } else { + # transaction 2 was aborted + error_check_good txn1_cantsee_txn2 [llength $res2] 0 + } + + # End transaction 1 second + error_check_good txn_$txn1end [$txn1 $txn1end] 0 + how_many_locks 1 $e + + # txn1 and txn2 should both now be invalid + # The get no longer needs to be transactional + set ret3 [catch {$db get $recno1} res3] + set ret4 [catch {$db get $recno2} res4] + + if { $txn1end == "commit" } { + error_check_good txn1_sees_txn1 $ret3 0 + error_check_good txn1_sees_txn2 $ret4 0 + error_check_good txn1_has_record1 \ + [is_substr $res3 "record1"] 1 + } else { + # transaction 1 was aborted + error_check_good txn1_cantsee_txn2 $ret4 0 + error_check_good txn1_aborted [llength $res3] 0 + } + } +} + +proc how_many_locks { expected env } { + set stat [$env lock_stat] + set str "Current number of locks" + set checked 0 + foreach statpair $stat { + if { $checked == 1 } { + break + } + if { [is_substr [lindex $statpair 0] $str] != 0} { + set checked 1 + set nlocks [lindex $statpair 1] + error_check_good expected_nlocks $nlocks $expected + } + } + error_check_good checked $checked 1 +} diff --git a/test/tcl/lock006.tcl b/test/tcl/lock006.tcl new file mode 100644 index 00000000..0bf70a3b --- /dev/null +++ b/test/tcl/lock006.tcl @@ -0,0 +1,186 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST lock006 +# TEST Test lock_vec interface. We do all the same things that +# TEST lock001 does, using lock_vec instead of lock_get and lock_put, +# TEST plus a few more things like lock-coupling. +# TEST 1. Get and release one at a time. +# TEST 2. Release with put_obj (all locks for a given locker/obj). +# TEST 3. Release with put_all (all locks for a given locker). +# TEST Regularly check lock_stat to verify all locks have been +# TEST released. +proc lock006 { } { + source ./include.tcl + global lock_curid + global lock_maxid + + set save_curid $lock_curid + set save_maxid $lock_maxid + + # Cleanup + env_cleanup $testdir + + # Open the region we'll use for testing. + set eflags "-create -lock -home $testdir" + set env [eval {berkdb_env} $eflags] + error_check_good env [is_valid_env $env] TRUE + error_check_good lock_id_set \ + [$env lock_id_set $lock_curid $lock_maxid] 0 + + puts "Lock006: test basic lock operations using lock_vec interface" + set locker [$env lock_id] + set modes {ng write read iwrite iread iwr} + + # Get and release each type of lock. + puts "\tLock006.a: get and release one at a time" + foreach m $modes { + set obj obj$m + set lockp [$env lock_vec $locker "get $obj $m"] + error_check_good lock_vec_get:a [is_blocked $lockp] 0 + error_check_good lock_vec_get:a [is_valid_lock $lockp $env] TRUE + error_check_good lock_vec_put:a \ + [$env lock_vec $locker "put $lockp"] 0 + } + how_many_locks 0 $env + + # Get a bunch of locks for the same locker; these should work + set obj OBJECT + puts "\tLock006.b: Get many locks for 1 locker,\ + release with put_all." + foreach m $modes { + set lockp [$env lock_vec $locker "get $obj $m"] + error_check_good lock_vec_get:b [is_blocked $lockp] 0 + error_check_good lock_vec_get:b [is_valid_lock $lockp $env] TRUE + } + how_many_locks 6 $env + error_check_good release [$env lock_vec $locker put_all] 0 + how_many_locks 0 $env + + puts "\tLock006.c: Get many locks for 1 locker,\ + release with put_obj." + foreach m $modes { + set lockp [$env lock_vec $locker "get $obj $m"] + error_check_good lock_vec_get:b [is_blocked $lockp] 0 + error_check_good lock_vec_get:b [is_valid_lock $lockp $env] TRUE + } + error_check_good release [$env lock_vec $locker "put_obj $obj"] 0 +# how_many_locks 0 $env + how_many_locks 6 $env + + # Get many locks for the same locker on more than one object. + # Release with put_all. + set obj2 OBJECT2 + puts "\tLock006.d: Get many locks on 2 objects for 1 locker,\ + release with put_all." + foreach m $modes { + set lockp [$env lock_vec $locker "get $obj $m"] + error_check_good lock_vec_get:b [is_blocked $lockp] 0 + error_check_good lock_vec_get:b [is_valid_lock $lockp $env] TRUE + } + foreach m $modes { + set lockp [$env lock_vec $locker "get $obj2 $m"] + error_check_good lock_vec_get:b [is_blocked $lockp] 0 + error_check_good lock_vec_get:b [is_valid_lock $lockp $env] TRUE + } + error_check_good release [$env lock_vec $locker put_all] 0 +# how_many_locks 0 $env + how_many_locks 6 $env + + # Check that reference counted locks work. + puts "\tLock006.e: reference counted locks." + for {set i 0} { $i < 10 } {incr i} { + set lockp [$env lock_vec -nowait $locker "get $obj write"] + error_check_good lock_vec_get:c [is_blocked $lockp] 0 + error_check_good lock_vec_get:c [is_valid_lock $lockp $env] TRUE + } + error_check_good put_all [$env lock_vec $locker put_all] 0 +# how_many_locks 0 $env + how_many_locks 6 $env + + # Lock-coupling. Get a lock on object 1. Get a lock on object 2, + # release object 1, and so on. + puts "\tLock006.f: Lock-coupling." + set locker2 [$env lock_id] + + foreach m { read write iwrite iread iwr } { + set lockp [$env lock_vec $locker "get OBJ0 $m"] + set iter 0 + set nobjects 10 + while { $iter < 3 } { + for { set i 1 } { $i <= $nobjects } { incr i } { + set lockv [$env lock_vec $locker \ + "get OBJ$i $m" "put $lockp"] + + # Make sure another locker can get an exclusive + # lock on the object just released. + set lock2p [$env lock_vec -nowait $locker2 \ + "get OBJ[expr $i - 1] write" ] + error_check_good release_lock2 [$env lock_vec \ + $locker2 "put $lock2p"] 0 + + # Make sure another locker can't get an exclusive + # lock on the object just locked. + catch {$env lock_vec -nowait $locker2 \ + "get OBJ$i write"} ret + error_check_good not_granted \ + [is_substr $ret "not granted"] 1 + + set lockp [lindex $lockv 0] + if { $i == $nobjects } { + incr iter + } + } + } + error_check_good lock_put [$env lock_vec $locker "put $lockp"] 0 +# how_many_locks 0 $env + how_many_locks 6 $env + } + + # Finally try some failing locks. Set up a write lock on object. + foreach m { write } { + set lockp [$env lock_vec $locker "get $obj $m"] + error_check_good lock_vec_get:d [is_blocked $lockp] 0 + error_check_good lock_vec_get:d [is_valid_lock $lockp $env] TRUE + } + + # Change the locker + set newlocker [$env lock_id] + # Skip NO_LOCK. + puts "\tLock006.g: Change the locker, try to acquire read and write." + foreach m { read write iwrite iread iwr } { + catch {$env lock_vec -nowait $newlocker "get $obj $m"} ret + error_check_good lock_vec_get:d [is_substr $ret "not granted"] 1 + } + + # Now release original locks + error_check_good put_all [$env lock_vec $locker {put_all}] 0 + error_check_good free_id [$env lock_id_free $locker] 0 + + # Now re-acquire blocking locks + puts "\tLock006.h: Re-acquire blocking locks." + foreach m { read write iwrite iread iwr } { + set lockp [$env lock_vec -nowait $newlocker "get $obj $m"] + error_check_good lock_get:e [is_valid_lock $lockp $env] TRUE + error_check_good lock_get:e [is_blocked $lockp] 0 + } + + # Now release new locks + error_check_good put_all [$env lock_vec $newlocker {put_all}] 0 + error_check_good free_id [$env lock_id_free $newlocker] 0 + + error_check_good envclose [$env close] 0 + +} + +# Blocked locks appear as lockmgrN.lockM\nBLOCKED +proc is_blocked { l } { + if { [string compare $l BLOCKED ] == 0 } { + return 1 + } else { + return 0 + } +} diff --git a/test/tcl/lockscript.tcl b/test/tcl/lockscript.tcl new file mode 100644 index 00000000..c68da1fa --- /dev/null +++ b/test/tcl/lockscript.tcl @@ -0,0 +1,116 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Random lock tester. +# Usage: lockscript dir numiters numobjs sleepint degree readratio +# dir: lock directory. +# numiters: Total number of iterations. +# numobjs: Number of objects on which to lock. +# sleepint: Maximum sleep interval. +# degree: Maximum number of locks to acquire at once +# readratio: Percent of locks that should be reads. + +source ./include.tcl +source $test_path/test.tcl + +set usage "lockscript dir numiters numobjs sleepint degree readratio" + +# Verify usage +if { $argc != 6 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set dir [lindex $argv 0] +set numiters [ lindex $argv 1 ] +set numobjs [ lindex $argv 2 ] +set sleepint [ lindex $argv 3 ] +set degree [ lindex $argv 4 ] +set readratio [ lindex $argv 5 ] + +# Initialize random number generator +global rand_init +berkdb srand $rand_init + + +catch { berkdb_env -create -lock -home $dir } e +error_check_good env_open [is_substr $e env] 1 +catch { $e lock_id } locker +error_check_good locker [is_valid_locker $locker] TRUE + +puts -nonewline "Beginning execution for $locker: $numiters $numobjs " +puts "$sleepint $degree $readratio" +flush stdout + +for { set iter 0 } { $iter < $numiters } { incr iter } { + set nlocks [berkdb random_int 1 $degree] + # We will always lock objects in ascending order to avoid + # deadlocks. + set lastobj 1 + set locklist {} + set objlist {} + for { set lnum 0 } { $lnum < $nlocks } { incr lnum } { + # Pick lock parameters + set obj [berkdb random_int $lastobj $numobjs] + set lastobj [expr $obj + 1] + set x [berkdb random_int 1 100 ] + if { $x <= $readratio } { + set rw read + } else { + set rw write + } + puts "[timestamp -c] $locker $lnum: $rw $obj" + + # Do get; add to list + catch {$e lock_get $rw $locker $obj} lockp + error_check_good lock_get [is_valid_lock $lockp $e] TRUE + + # Create a file to flag that we've a lock of the given + # type, after making sure only other read locks exist + # (if we're read locking) or no other locks exist (if + # we're writing). + lock003_vrfy $rw $obj + lock003_create $rw $obj + lappend objlist [list $obj $rw] + + lappend locklist $lockp + if {$lastobj > $numobjs} { + break + } + } + # Pick sleep interval + puts "[timestamp -c] $locker sleeping" + # We used to sleep 1 to $sleepint seconds. This makes the test + # run for hours. Instead, make it sleep for 10 to $sleepint * 100 + # milliseconds, for a maximum sleep time of 0.5 s. + after [berkdb random_int 10 [expr $sleepint * 100]] + puts "[timestamp -c] $locker awake" + + # Now release locks + puts "[timestamp -c] $locker released locks" + + # Delete our locking flag files, then reverify. (Note that the + # locking flag verification function assumes that our own lock + # is not currently flagged.) + foreach pair $objlist { + set obj [lindex $pair 0] + set rw [lindex $pair 1] + lock003_destroy $obj + lock003_vrfy $rw $obj + } + + release_list $locklist + flush stdout +} + +set ret [$e close] +error_check_good env_close $ret 0 + +puts "[timestamp -c] $locker Complete" +flush stdout + +exit diff --git a/test/tcl/log001.tcl b/test/tcl/log001.tcl new file mode 100644 index 00000000..35bf9c83 --- /dev/null +++ b/test/tcl/log001.tcl @@ -0,0 +1,143 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# + +# TEST log001 +# TEST Read/write log records. +# TEST Test with and without fixed-length, in-memory logging, +# TEST and encryption. +proc log001 { } { + global passwd + global has_crypto + global rand_init + + berkdb srand $rand_init + set iter 1000 + + set max [expr 1024 * 128] + foreach fixedlength { 0 1 } { + foreach inmem { 1 0 } { + log001_body $max $iter $fixedlength $inmem + log001_body $max [expr $iter * 15] $fixedlength $inmem + + # Skip encrypted tests if not supported. + if { $has_crypto == 0 } { + continue + } + log001_body $max\ + $iter $fixedlength $inmem "-encryptaes $passwd" + log001_body $max\ + [expr $iter * 15] $fixedlength $inmem "-encryptaes $passwd" + } + } +} + +proc log001_body { max nrecs fixedlength inmem {encargs ""} } { + source ./include.tcl + + puts -nonewline "Log001: Basic put/get log records: " + if { $fixedlength == 1 } { + puts -nonewline "fixed-length ($encargs)" + } else { + puts -nonewline "variable-length ($encargs)" + } + + # In-memory logging requires a large enough log buffer that + # any active transaction can be aborted. + if { $inmem == 1 } { + set lbuf [expr 8 * [expr 1024 * 1024]] + puts " with in-memory logging." + } else { + puts " with on-disk logging." + } + + env_cleanup $testdir + + set logargs "" + if { $inmem == 1 } { + set logargs "-log_inmemory -log_buffer $lbuf" + } + set env [eval {berkdb_env -log -create -home $testdir -mode 0644} \ + $encargs $logargs -log_max $max] + error_check_good envopen [is_valid_env $env] TRUE + + # We will write records to the log and make sure we can + # read them back correctly. We'll use a standard pattern + # repeated some number of times for each record. + set lsn_list {} + set rec_list {} + puts "\tLog001.a: Writing $nrecs log records" + for { set i 0 } { $i < $nrecs } { incr i } { + set rec "" + for { set j 0 } { $j < [expr $i % 10 + 1] } {incr j} { + set rec $rec$i:logrec:$i + } + if { $fixedlength != 1 } { + set rec $rec:[random_data 237 0 0] + } + set lsn [$env log_put $rec] + error_check_bad log_put [is_substr $lsn log_cmd] 1 + lappend lsn_list $lsn + lappend rec_list $rec + } + + # Open a log cursor. + set logc [$env log_cursor] + error_check_good logc [is_valid_logc $logc $env] TRUE + + puts "\tLog001.b: Retrieving log records sequentially (forward)" + set i 0 + for { set grec [$logc get -first] } { [llength $grec] != 0 } { + set grec [$logc get -next]} { + error_check_good log_get:seq [lindex $grec 1] \ + [lindex $rec_list $i] + incr i + } + + puts "\tLog001.c: Retrieving log records sequentially (backward)" + set i [llength $rec_list] + for { set grec [$logc get -last] } { [llength $grec] != 0 } { + set grec [$logc get -prev] } { + incr i -1 + error_check_good \ + log_get:seq [lindex $grec 1] [lindex $rec_list $i] + } + + puts "\tLog001.d: Retrieving log records sequentially by LSN" + set i 0 + foreach lsn $lsn_list { + set grec [$logc get -set $lsn] + error_check_good \ + log_get:seq [lindex $grec 1] [lindex $rec_list $i] + incr i + } + + puts "\tLog001.e: Retrieving log records randomly by LSN" + set m [expr [llength $lsn_list] - 1] + for { set i 0 } { $i < $nrecs } { incr i } { + set recno [berkdb random_int 0 $m ] + set lsn [lindex $lsn_list $recno] + set grec [$logc get -set $lsn] + error_check_good \ + log_get:seq [lindex $grec 1] [lindex $rec_list $recno] + } + + puts "\tLog001.f: Retrieving first/current, last/current log record" + set grec [$logc get -first] + error_check_good log_get:seq [lindex $grec 1] [lindex $rec_list 0] + set grec [$logc get -current] + error_check_good log_get:seq [lindex $grec 1] [lindex $rec_list 0] + set i [expr [llength $rec_list] - 1] + set grec [$logc get -last] + error_check_good log_get:seq [lindex $grec 1] [lindex $rec_list $i] + set grec [$logc get -current] + error_check_good log_get:seq [lindex $grec 1] [lindex $rec_list $i] + + # Close and unlink the file + error_check_good log_cursor:close:$logc [$logc close] 0 + error_check_good env:close [$env close] 0 + error_check_good envremove [berkdb envremove -home $testdir] 0 +} diff --git a/test/tcl/log002.tcl b/test/tcl/log002.tcl new file mode 100644 index 00000000..54f3e012 --- /dev/null +++ b/test/tcl/log002.tcl @@ -0,0 +1,101 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST log002 +# TEST Tests multiple logs +# TEST Log truncation +# TEST LSN comparison and file functionality. +proc log002 { } { + global rand_init + error_check_good set_random_seed [berkdb srand $rand_init] 0 + + foreach inmem { 1 0 } { + log002_body $inmem + } +} + +proc log002_body { inmem } { + source ./include.tcl + + puts "Log002: Multiple log test w/trunc, file, compare functionality" + + env_cleanup $testdir + + set max [expr 1024 * 128] + + set logargs "" + if { $inmem == 0 } { + puts "Log002: Using on-disk logging." + } else { + puts "Log002: Using in-memory logging." + set lbuf [expr 8 * [expr 1024 * 1024]] + set logargs "-log_inmemory -log_buffer $lbuf" + } + set env [eval {berkdb_env} -create -home $testdir -log \ + -mode 0644 $logargs -log_max $max] + error_check_good envopen [is_valid_env $env] TRUE + + # We'll record every hundredth record for later use + set info_list {} + + puts "\tLog002.a: Writing log records" + set i 0 + for {set s 0} { $s < [expr 3 * $max] } { incr s $len } { + set rec [random_data 120 0 0] + set len [string length $rec] + set lsn [$env log_put $rec] + + if { [expr $i % 100 ] == 0 } { + lappend info_list [list $lsn $rec] + } + incr i + } + + puts "\tLog002.b: Checking log_compare" + set last {0 0} + foreach p $info_list { + set l [lindex $p 0] + if { [llength $last] != 0 } { + error_check_good \ + log_compare [$env log_compare $l $last] 1 + error_check_good \ + log_compare [$env log_compare $last $l] -1 + error_check_good \ + log_compare [$env log_compare $l $l] 0 + } + set last $l + } + + puts "\tLog002.c: Checking log_file" + if { $inmem == 0 } { + set flist [glob $testdir/log*] + foreach p $info_list { + set lsn [lindex $p 0] + set f [$env log_file $lsn] + + # Change backslash separators on Windows to forward + # slashes, as the rest of the test suite expects. + regsub -all {\\} $f {/} f + error_check_bad log_file:$f [lsearch $flist $f] -1 + } + } + + puts "\tLog002.d: Verifying records" + + set logc [$env log_cursor] + error_check_good log_cursor [is_valid_logc $logc $env] TRUE + + for {set i [expr [llength $info_list] - 1] } { $i >= 0 } { incr i -1} { + set p [lindex $info_list $i] + set grec [$logc get -set [lindex $p 0]] + error_check_good log_get:$env [lindex $grec 1] [lindex $p 1] + } + + # Close and unlink the file + error_check_good log_cursor:close:$logc [$logc close] 0 + error_check_good env:close [$env close] 0 + error_check_good envremove [berkdb envremove -home $testdir] 0 +} diff --git a/test/tcl/log003.tcl b/test/tcl/log003.tcl new file mode 100644 index 00000000..aca0c881 --- /dev/null +++ b/test/tcl/log003.tcl @@ -0,0 +1,143 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST log003 +# TEST Verify that log_flush is flushing records correctly. +proc log003 { } { + global rand_init + error_check_good set_random_seed [berkdb srand $rand_init] 0 + + # Even though log_flush doesn't do anything for in-memory + # logging, we want to make sure calling it doesn't break + # anything. + foreach inmem { 1 0 } { + log003_body $inmem + } +} + +proc log003_body { inmem } { + source ./include.tcl + + puts -nonewline "Log003: Verify log_flush behavior" + if { $inmem == 0 } { + puts " (on-disk logging)." + } else { + puts " (in-memory logging)." + } + + set max [expr 1024 * 128] + env_cleanup $testdir + set short_rec "abcdefghijklmnopqrstuvwxyz" + set long_rec [repeat $short_rec 200] + set very_long_rec [repeat $long_rec 4] + + foreach rec "$short_rec $long_rec $very_long_rec" { + puts "\tLog003.a: Verify flush on [string length $rec] byte rec" + + set logargs "" + if { $inmem == 1 } { + set logargs "-log_inmemory -log_buffer [expr $max * 2]" + } + set env [eval {berkdb_env} -log -home $testdir -create \ + -mode 0644 $logargs -log_max $max] + error_check_good envopen [is_valid_env $env] TRUE + + set lsn [$env log_put $rec] + error_check_bad log_put [lindex $lsn 0] "ERROR:" + set ret [$env log_flush $lsn] + error_check_good log_flush $ret 0 + + # Now, we want to crash the region and recheck. Closing the + # log does not flush any records, so we'll use a close to + # do the "crash" + set ret [$env close] + error_check_good log_env:close $ret 0 + + # Now, remove the log region + #set ret [berkdb envremove -home $testdir] + #error_check_good env:remove $ret 0 + + # Re-open the log and try to read the record. + set env [berkdb_env -create -home $testdir \ + -log -mode 0644 -log_max $max] + error_check_good envopen [is_valid_env $env] TRUE + + set logc [$env log_cursor] + error_check_good log_cursor [is_valid_logc $logc $env] TRUE + + set gotrec [$logc get -first] + error_check_good lp_get [lindex $gotrec 1] $rec + + # Close and unlink the file + error_check_good log_cursor:close:$logc [$logc close] 0 + error_check_good env:close:$env [$env close] 0 + error_check_good envremove [berkdb envremove -home $testdir] 0 + log_cleanup $testdir + } + + if { $inmem == 1 } { + puts "Log003: Skipping remainder of test for in-memory logging." + return + } + + foreach rec "$short_rec $long_rec $very_long_rec" { + puts "\tLog003.b: \ + Verify flush on non-last record [string length $rec]" + + set env [berkdb_env -log -home $testdir \ + -create -mode 0644 -log_max $max] + + error_check_good envopen [is_valid_env $env] TRUE + + # Put 10 random records + for { set i 0 } { $i < 10 } { incr i} { + set r [random_data 450 0 0] + set lsn [$env log_put $r] + error_check_bad log_put [lindex $lsn 0] "ERROR:" + } + + # Put the record we are interested in + set save_lsn [$env log_put $rec] + error_check_bad log_put [lindex $save_lsn 0] "ERROR:" + + # Put 10 more random records + for { set i 0 } { $i < 10 } { incr i} { + set r [random_data 450 0 0] + set lsn [$env log_put $r] + error_check_bad log_put [lindex $lsn 0] "ERROR:" + } + + # Now check the flush + set ret [$env log_flush $save_lsn] + error_check_good log_flush $ret 0 + + # Now, we want to crash the region and recheck. Closing the + # log does not flush any records, so we'll use a close to + # do the "crash". + # + # Now, close and remove the log region + error_check_good env:close:$env [$env close] 0 + set ret [berkdb envremove -home $testdir] + error_check_good env:remove $ret 0 + + # Re-open the log and try to read the record. + set env [berkdb_env -log -home $testdir \ + -create -mode 0644 -log_max $max] + error_check_good envopen [is_valid_env $env] TRUE + + set logc [$env log_cursor] + error_check_good log_cursor [is_valid_logc $logc $env] TRUE + + set gotrec [$logc get -set $save_lsn] + error_check_good lp_get [lindex $gotrec 1] $rec + + # Close and unlink the file + error_check_good log_cursor:close:$logc [$logc close] 0 + error_check_good env:close:$env [$env close] 0 + error_check_good envremove [berkdb envremove -home $testdir] 0 + log_cleanup $testdir + } +} diff --git a/test/tcl/log004.tcl b/test/tcl/log004.tcl new file mode 100644 index 00000000..e083ad99 --- /dev/null +++ b/test/tcl/log004.tcl @@ -0,0 +1,51 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# + +# TEST log004 +# TEST Make sure that if we do PREVs on a log, but the beginning of the +# TEST log has been truncated, we do the right thing. +proc log004 { } { + foreach inmem { 1 0 } { + log004_body $inmem + } +} + +proc log004_body { inmem } { + source ./include.tcl + + puts "Log004: Prev on log when beginning of log has been truncated." + # Use archive test to populate log + env_cleanup $testdir + puts "\tLog004.a: Call archive to populate log." + archive $inmem + + # Delete all log files under 100 + puts "\tLog004.b: Delete all log files under 100." + set ret [catch { glob $testdir/log.00000000* } result] + if { $ret == 0 } { + eval fileremove -f $result + } + + # Now open the log and get the first record and try a prev + puts "\tLog004.c: Open truncated log, attempt to access missing portion." + set env [berkdb_env -create -log -home $testdir] + error_check_good envopen [is_valid_env $env] TRUE + + set logc [$env log_cursor] + error_check_good log_cursor [is_valid_logc $logc $env] TRUE + + set ret [$logc get -first] + error_check_bad log_get [llength $ret] 0 + + # This should give DB_NOTFOUND which is a ret of length 0 + catch {$logc get -prev} ret + error_check_good log_get_prev [string length $ret] 0 + + puts "\tLog004.d: Close log and environment." + error_check_good log_cursor_close [$logc close] 0 + error_check_good log_close [$env close] 0 +} diff --git a/test/tcl/log005.tcl b/test/tcl/log005.tcl new file mode 100644 index 00000000..dedc39e2 --- /dev/null +++ b/test/tcl/log005.tcl @@ -0,0 +1,117 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST log005 +# TEST Check that log file sizes can change on the fly. +proc log005 { } { + + # Skip the test for HP-UX, where we can't do the second + # env open. + global is_hp_test + if { $is_hp_test == 1 } { + puts "Log005: Skipping for HP-UX." + return + } + + foreach inmem { 1 0 } { + log005_body $inmem + } +} +proc log005_body { inmem } { + source ./include.tcl + env_cleanup $testdir + + puts -nonewline "Log005: Check that log file sizes can change" + if { $inmem == 0 } { + puts " (on-disk logging)." + } else { + puts " (in-memory logging)." + } + + # Open the environment, set and check the log file size. + puts "\tLog005.a: open, set and check the log file size." + set logargs "" + if { $inmem == 1 } { + set lbuf [expr 1024 * 1024] + set logargs "-log_inmemory -log_buffer $lbuf" + } + set env [eval {berkdb_env} -create -home $testdir \ + $logargs -log_max 1000000 -txn] + error_check_good envopen [is_valid_env $env] TRUE + set db [berkdb_open \ + -env $env -create -mode 0644 -btree -auto_commit a.db] + error_check_good dbopen [is_valid_db $db] TRUE + + # Get the current log file maximum. + set max [log005_stat $env "Current log file size"] + error_check_good max_set $max 1000000 + + # Reset the log file size using a second open, and make sure + # it changes. + puts "\tLog005.b: reset during open, check the log file size." + set envtmp [berkdb_env -home $testdir -log_max 900000 -txn] + error_check_good envtmp_open [is_valid_env $envtmp] TRUE + error_check_good envtmp_close [$envtmp close] 0 + + set tmp [log005_stat $env "Current log file size"] + error_check_good max_changed 900000 $tmp + + puts "\tLog005.c: fill in the current log file size." + # Fill in the current log file. + set new_lsn 0 + set data [repeat "a" 1024] + for { set i 1 } \ + { [log005_stat $env "Current log file number"] != 2 } \ + { incr i } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set ret [$db put -txn $t $i $data] + error_check_good put $ret 0 + error_check_good txn [$t commit] 0 + + set last_lsn $new_lsn + set new_lsn [log005_stat $env "Current log file offset"] + } + + # The last LSN in the first file should be more than our new + # file size. + error_check_good "lsn check < 900000" [expr 900000 < $last_lsn] 1 + + # Close down the environment. + error_check_good db_close [$db close] 0 + error_check_good env_close [$env close] 0 + + if { $inmem == 1 } { + puts "Log005: Skipping remainder of test for in-memory logging." + return + } + + puts "\tLog005.d: check the log file size is unchanged after recovery." + # Open again, running recovery. Verify the log file size is as we + # left it. + set env [berkdb_env -create -home $testdir -recover -txn] + error_check_good env_open [is_valid_env $env] TRUE + + set tmp [log005_stat $env "Current log file size"] + error_check_good after_recovery 900000 $tmp + + error_check_good env_close [$env close] 0 +} + +# log005_stat -- +# Return the current log statistics. +proc log005_stat { env s } { + set stat [$env log_stat] + foreach statpair $stat { + set statmsg [lindex $statpair 0] + set statval [lindex $statpair 1] + if {[is_substr $statmsg $s] != 0} { + return $statval + } + } + puts "FAIL: log005: stat string $s not found" + return 0 +} diff --git a/test/tcl/log006.tcl b/test/tcl/log006.tcl new file mode 100644 index 00000000..bc8d4155 --- /dev/null +++ b/test/tcl/log006.tcl @@ -0,0 +1,230 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST log006 +# TEST Test log file auto-remove. +# TEST Test normal operation. +# TEST Test a long-lived txn. +# TEST Test log_archive flags. +# TEST Test db_archive flags. +# TEST Test turning on later. +# TEST Test setting via DB_CONFIG. +proc log006 { } { + source ./include.tcl + + puts "Log006: Check auto-remove of log files." + env_cleanup $testdir + + # Open the environment, set auto-remove flag. Use smaller log + # files to make more of them. + puts "\tLog006.a: open environment, populate database." + set lbuf 16384 + set lmax 65536 + set env [berkdb_env_noerr -log_remove \ + -create -home $testdir -log_buffer $lbuf -log_max $lmax -txn] + error_check_good envopen [is_valid_env $env] TRUE + + log006_put $testdir $env + + # + # Check log files. Using the small log file size, we should have + # have made a lot of log files. Check that we have only a few left. + # Dividing by 5 tests that at least 80% of the files are gone. + # + set log_number [stat_field $env log_stat "Current log file number"] + set log_expect [expr $log_number / 5] + + puts "\tLog006.b: Check log files removed." + set lfiles [glob -nocomplain $testdir/log.*] + set remlen [llength $lfiles] + error_check_good lfiles_len [expr $remlen < $log_expect] 1 + error_check_good lfiles [lsearch $lfiles $testdir/log.0000000001] -1 + # Save last log file for later check. + # Files may not be sorted, sort them and then save the last filename. + set oldfile [lindex [lsort -ascii $lfiles] end] + + # Rerun log006_put with a long lived txn. + # + puts "\tLog006.c: Rerun put loop with long-lived transaction." + cleanup $testdir $env + set txn [$env txn] + error_check_good txn [is_valid_txn $txn $env] TRUE + + # Give the txn something to do so no files can be removed. + set testfile temp.db + set db [eval {berkdb_open_noerr -create -mode 0644} \ + -env $env -txn $txn -pagesize 8192 -btree $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + log006_put $testdir $env + + puts "\tLog006.d: Check log files not removed." + set lfiles [glob -nocomplain $testdir/log.*] + error_check_good lfiles2_len [expr [llength $lfiles] > $remlen] 1 + set lfiles [lsort -ascii $lfiles] + error_check_good lfiles_chk [lsearch $lfiles $oldfile] 0 + error_check_good txn_commit [$txn commit] 0 + error_check_good db_close [$db close] 0 + error_check_good ckp1 [$env txn_checkpoint] 0 + error_check_good ckp2 [$env txn_checkpoint] 0 + + puts "\tLog006.e: Run log_archive with -auto_remove flag." + # When we're done, only the last log file should remain. + set lfiles [glob -nocomplain $testdir/log.*] + set oldfile [lindex [lsort -ascii $lfiles] end] + + # First, though, verify mutual-exclusiveness of flag. + foreach f {-arch_abs -arch_data -arch_log} { + set stat [catch {eval $env log_archive -arch_remove $f} ret] + error_check_good stat $stat 1 + error_check_good la:$f:fail [is_substr $ret "illegal flag"] 1 + } + # Now run it for real. + set stat [catch {$env log_archive -arch_remove} ret] + error_check_good stat $stat 0 + + puts "\tLog006.f: Check only $oldfile remains." + set lfiles [glob -nocomplain $testdir/log.*] + error_check_good 1log [llength $lfiles] 1 + error_check_good lfiles_chk [lsearch $lfiles $oldfile] 0 + + puts "\tLog006.g: Rerun put loop with long-lived transaction." + set txn [$env txn] + error_check_good txn [is_valid_txn $txn $env] TRUE + log006_put $testdir $env + error_check_good txn_commit [$txn commit] 0 + error_check_good ckp1 [$env txn_checkpoint] 0 + error_check_good ckp2 [$env txn_checkpoint] 0 + error_check_good env_close [$env close] 0 + + # + # Test db_archive's auto-remove flag. + # After we are done, only the last log file should be there. + # First check that the delete flag cannot be used with any + # of the other flags. + # + puts "\tLog006.h: Run db_archive with delete flag." + set lfiles [glob -nocomplain $testdir/log.*] + set oldfile [lindex [lsort -ascii $lfiles] end] + # + # Again, first check illegal flag combinations with db_archive. + # + foreach f {-a -l -s} { + set stat [catch {exec $util_path/db_archive $f -d -h $testdir} \ + ret] + error_check_good stat $stat 1 + error_check_good la:fail [is_substr $ret "illegal flag"] 1 + } + set stat [catch {exec $util_path/db_archive -d -h $testdir} ret] + error_check_good stat $stat 0 + + puts "\tLog006.i: Check only $oldfile remains." + set lfiles [glob -nocomplain $testdir/log.*] + error_check_good 1log [llength $lfiles] 1 + error_check_good lfiles_chk [lsearch $lfiles $oldfile] 0 + + # + # Now rerun some parts with other env settings tested. + # + env_cleanup $testdir + + # First test that the option can be turned on later. + # 1. Open env w/o auto-remove. + # 2. Run log006_put. + # 3. Verify log files all there. + # 4. Call env set_flags to turn it on. + # 5. Run log006_put. + # 6. Verify log files removed. + puts "\tLog006.j: open environment w/o auto remove, populate database." + set env [berkdb_env -recover \ + -create -home $testdir -log_buffer $lbuf -log_max $lmax -txn] + error_check_good envopen [is_valid_env $env] TRUE + + log006_put $testdir $env + + puts "\tLog006.k: Check log files not removed." + set lfiles [glob -nocomplain $testdir/log.*] + error_check_good lfiles2_len [expr [llength $lfiles] > $remlen] 1 + set lfiles [lsort -ascii $lfiles] + error_check_good lfiles [lsearch $lfiles $testdir/log.0000000001] 0 + + puts "\tLog006.l: turn on auto remove and repopulate database." + error_check_good sf [$env log_config autoremove on] 0 + + log006_put $testdir $env + + puts "\tLog006.m: Check log files removed." + set lfiles [glob -nocomplain $testdir/log.*] + error_check_good lfiles_len [expr [llength $lfiles] < $log_expect] 1 + error_check_good lfiles [lsearch $lfiles $testdir/log.0000000001] -1 + error_check_good env_close [$env close] 0 + + # + # Configure via DB_CONFIG. + # + env_cleanup $testdir + + puts "\tLog006.n: Test setting via DB_CONFIG." + # Open the environment, w/o remove flag, but DB_CONFIG. + set cid [open $testdir/DB_CONFIG w] + puts $cid "log_set_config db_log_auto_remove" + close $cid + set env [berkdb_env -recover \ + -create -home $testdir -log_buffer $lbuf -log_max $lmax -txn] + error_check_good envopen [is_valid_env $env] TRUE + + log006_put $testdir $env + + puts "\tLog006.o: Check log files removed." + set lfiles [glob -nocomplain $testdir/log.*] + error_check_good lfiles_len [expr [llength $lfiles] < $log_expect] 1 + error_check_good lfiles [lsearch $lfiles $testdir/log.0000000001] -1 + error_check_good env_close [$env close] 0 + +} + +# +# Modified from test003. +# +proc log006_put { testdir env } { + set testfile log006.db + # + # Specify a pagesize so we can control how many log files + # are created and left over. + # + set db [eval {berkdb_open_noerr -create -mode 0644} \ + -env $env -auto_commit -pagesize 8192 -btree $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set lmax [$env get_lg_max] + set file_list [get_file_list] + set count 0 + foreach f $file_list { + if { [string compare [file type $f] "file"] != 0 } { + continue + } + set key $f + # Should really catch errors + set fid [open $f r] + fconfigure $fid -translation binary + # Read in less than the maximum log size. + set data [read $fid [expr $lmax - [expr $lmax / 8]]] + close $fid + + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + set ret [eval {$db put} $txn {$key $data}] + error_check_good put $ret 0 + error_check_good txn [$t commit] 0 + if { $count % 10 == 0 } { + error_check_good ckp($count) [$env txn_checkpoint] 0 + } + + incr count + } + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/log007.tcl b/test/tcl/log007.tcl new file mode 100644 index 00000000..6468b67f --- /dev/null +++ b/test/tcl/log007.tcl @@ -0,0 +1,110 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST log007 +# TEST Test of in-memory logging bugs. [#11505] +# TEST +# TEST Test db_printlog with in-memory logs. +# +proc log007 { } { + global testdir + global util_path + set tnum "007" + + puts "Log$tnum: Test in-memory logs with db_printlog." + + # Log size is small so we quickly create more than one. + # Since we are in-memory the buffer is larger than the + # file size. + set pagesize 4096 + append args " -pagesize $pagesize " + set log_max [expr $pagesize * 2] + set log_buf [expr $log_max * 2] + + # We have 13-byte records. We want to fill slightly more + # than one virtual log file on each iteration. The first + # record always has an offset of 28. + # + set recsize 13 + set recsperfile [expr [expr $log_max - 28] / $recsize] + set nrecs [expr $recsperfile + 1] + + # Open environment. + env_cleanup $testdir + set flags " -create -txn -home $testdir \ + -log_inmemory -log_buffer $log_buf -log_max $log_max" + set env [eval {berkdb_env} $flags] + error_check_good env_open [is_valid_env $env] TRUE + + set iter 15 + set lastfile 1 + for { set i 0 } { $i < $iter } { incr i } { + puts "\tLog$tnum.a.$i: Writing $nrecs 13-byte log records." + set lsn_list {} + for { set j 0 } { $j < $nrecs } { incr j } { + set rec "1" + # Make the first record one byte larger for each + # successive log file so we hit the end of the + # log file at each of the 13 possibilities. + set nentries [expr [expr $i * $nrecs] + $j] + if { [expr $nentries % 628] == 0 } { + append firstrec a + set ret [$env log_put $firstrec] + } else { + set ret [$env log_put $rec] + } + error_check_bad log_put [is_substr $ret log_cmd] 1 + lappend lsn_list $ret + } + + # Open a log cursor. + set m_logc [$env log_cursor] + error_check_good m_logc [is_valid_logc $m_logc $env] TRUE + + # Check that we're in the expected virtual log file. + set first [$m_logc get -first] + error_check_good first_lsn [lindex $first 0] "[expr $i + 1] 28" + set last [$m_logc get -last] + + puts "\tLog$tnum.b.$i: Read log records sequentially." + set j 0 + for { set logrec [$m_logc get -first] } \ + { [llength $logrec] != 0 } \ + { set logrec [$m_logc get -next]} { + set file [lindex [lindex $logrec 0] 0] + if { $file != $lastfile } { + # We have entered a new virtual log file. + set lastfile $file + } + set offset [lindex [lindex $logrec 0] 1] + set lsn($j) "\[$file\]\[$offset\]" + incr j + } + error_check_good cursor_close [$m_logc close] 0 + + puts "\tLog$tnum.c.$i: Compare printlog to log records." + set stat [catch {eval exec $util_path/db_printlog \ + -h $testdir > $testdir/prlog} result] + error_check_good stat_prlog $stat 0 + + # Make sure the results of printlog contain all the same + # LSNs we saved when walking the files with the log cursor. + set j 0 + set fd [open $testdir/prlog r] + while { [gets $fd record] >= 0 } { + # A log record begins with "[". + if { [string match {\[*} $record] == 1 } { + error_check_good \ + check_prlog [is_substr $record $lsn($j)] 1 + incr j + } + } + close $fd + } + + error_check_good env_close [$env close] 0 + error_check_good env_remove [berkdb envremove -home $testdir] 0 +} diff --git a/test/tcl/log008.tcl b/test/tcl/log008.tcl new file mode 100644 index 00000000..ccce4b81 --- /dev/null +++ b/test/tcl/log008.tcl @@ -0,0 +1,46 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST log008 +# TEST Test what happens if a txn_ckp record falls into a +# TEST different log file than the DBREG_CKP records generated +# TEST by the same checkpoint. + +proc log008 { { nhandles 100 } args } { + source ./include.tcl + set tnum "008" + + puts "Log$tnum: Checkpoint test with records spanning log files." + env_cleanup $testdir + + # Set up env command for use later. + set envcmd "berkdb_env -create -txn -home $testdir" + + # Start up a child process which will open a bunch of handles + # on a database and write to it, running until it creates a + # checkpoint with records spanning two log files. + puts "\tLog$tnum.a: Spawning child tclsh." + set pid [exec $tclsh_path $test_path/wrap.tcl \ + log008script.tcl $testdir/log008script.log $nhandles &] + + watch_procs $pid 3 + + puts "\tLog$tnum.b: Child is done." + + # Join the env with recovery. This ought to work. + puts "\tLog$tnum.c: Join abandoned child env with recovery." + set env [eval $envcmd -recover] + + # Clean up. + error_check_good env_close [$env close] 0 + + # Check log file for failures. + set errstrings [eval findfail $testdir/log008script.log] + foreach str $errstrings { + puts "FAIL: error message in log008 log file: $str" + } +} + diff --git a/test/tcl/log008script.tcl b/test/tcl/log008script.tcl new file mode 100644 index 00000000..ab77f70b --- /dev/null +++ b/test/tcl/log008script.tcl @@ -0,0 +1,84 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Log008 script - dbreg_ckp and txn_ckp records spanning log files. +# +# Usage: log008script + +source ./include.tcl +set tnum "008" +set usage "log008script nhandles" + +# Verify usage +if { $argc != 1 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set nhandles [ lindex $argv 0 ] + +# We make the log files small so it's likely that the +# records will end up in different files. +set maxbsize [expr 8 * 1024] +set maxfile [expr 32 * 1024] + +# Set up environment. +set envcmd "berkdb_env -create -txn -home $testdir \ + -log_buffer $maxbsize -log_max $maxfile" +set dbenv [eval $envcmd] +error_check_good dbenv [is_valid_env $dbenv] TRUE + +# Open a lot of database handles. +set filename TESTFILE +set handlelist {} +for { set i 0 } { $i < $nhandles } { incr i } { + set db [berkdb_open \ + -create -env $dbenv -auto_commit -btree $filename] + lappend handlelist $db +} + +# Fill log files, checking LSNs before and after a checkpoint, +# until we generate a case where the records span two log files. +set i 0 +while { 1 } { + set txn [$dbenv txn] + foreach handle $handlelist { + error_check_good \ + db_put [$handle put -txn $txn key.$i data.$i] 0 + incr i + } + error_check_good txn_commit [$txn commit] 0 + + # Find current LSN file number. + set filenum [stat_field $dbenv log_stat "Current log file number"] + + # Checkpoint. + error_check_good checkpoint [$dbenv txn_checkpoint] 0 + + # Find current LSN. + set newfilenum [stat_field $dbenv log_stat "Current log file number"] + if { [expr $newfilenum > $filenum] } { + break + } +} + +# Do one more transactional operation per fileid. +set txn [$dbenv txn] +foreach handle $handlelist { + error_check_good \ + db_put [$handle put -txn $txn key.$i data.$i] 0 + incr i +} +error_check_good txn_commit [$txn commit] 0 + +# Archive, deleting the log files we think we no longer need. +# Flush first to be sure everything is on disk for db_archive. +$dbenv log_flush +set stat [eval exec $util_path/db_archive -d -h $testdir] + +# Child is done. Exit, abandoning the env instead of closing it. +exit diff --git a/test/tcl/log009.tcl b/test/tcl/log009.tcl new file mode 100644 index 00000000..2da13c47 --- /dev/null +++ b/test/tcl/log009.tcl @@ -0,0 +1,122 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST log009 +# TEST Test of logging and getting log file version information. +# TEST Each time we cross a log file boundary verify we can +# TEST get the version via the log cursorlag. +# TEST Do this both forward and backward. +# +proc log009 { } { + source ./include.tcl + global errorInfo + + env_cleanup $testdir + set niter 200 + set method btree + + puts "Log009: Retrieve log version using log cursor." + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_buf [expr $pagesize * 2] + set log_max [expr $log_buf * 4] + + # Open an env. + set envcmd "berkdb_env_noerr -create \ + -log_buffer $log_buf -log_max $log_max -txn -home $testdir" + set env [eval $envcmd] + error_check_good env [is_valid_env $env] TRUE + + set stop 0 + set start 0 + # + # Loop until we have at least 3 log files. + # + while { $stop == 0 } { + puts "\tLog009.a: Running test in to generate log files." + eval rep_test \ + $method $env NULL $niter $start $start 0 0 $largs + incr start $niter + + set last_log [get_logfile $env last] + if { $last_log >= 3 } { + set stop 1 + } + } + + # We now have at least 3 log files. Walk a cursor both ways + # through the log and make sure we can get the version when we + # cross a log file boundary. + set curfile 0 + set logc [$env log_cursor] + error_check_good logc [is_valid_logc $logc $env] TRUE + + puts "\tLog009.b: Try to get version on unset cursor." + set stat [catch {eval $logc version} ret] + error_check_bad stat $stat 0 + error_check_good err [is_substr $ret "unset cursor"] 1 + + # Check walking forward through logs looking for log + # file boundaries. + # + puts "\tLog009.c: Walk log forward checking persist." + for { set logrec [$logc get -first] } \ + { [llength $logrec] != 0 } \ + { set logrec [$logc get -next] } { + set lsn [lindex $logrec 0] + set lsnfile [lindex $lsn 0] + if { $curfile != $lsnfile } { + log009_check $logc $logrec + set curfile $lsnfile + } + } + error_check_good logclose [$logc close] 0 + + set curfile 0 + set logc [$env log_cursor] + error_check_good logc [is_valid_logc $logc $env] TRUE + # + # Check walking backward through logs looking for log + # file boundaries. + # + puts "\tLog009.d: Walk log backward checking persist." + for { set logrec [$logc get -last] } \ + { [llength $logrec] != 0 } \ + { set logrec [$logc get -prev] } { + set lsn [lindex $logrec 0] + set lsnfile [lindex $lsn 0] + if { $curfile != $lsnfile } { + log009_check $logc $logrec + set curfile $lsnfile + } + } + error_check_good logclose [$logc close] 0 + error_check_good env_close [$env close] 0 +} + +proc log009_check { logc logrec } { + set version [$logc version] + # + # We don't have ready access to the current log + # version, but make sure it is something reasonable. + # + # !!! + # First readable log is 8, current log version + # is pretty far from 20. + # + set reasonable [expr $version > 7 && $version < 20] + error_check_good persist $reasonable 1 + # + # Verify that getting the version doesn't move + # or change the log cursor in any way. + # + set logrec1 [$logc get -current] + error_check_good current $logrec $logrec1 +} diff --git a/test/tcl/logtrack.list b/test/tcl/logtrack.list new file mode 100644 index 00000000..6a130659 --- /dev/null +++ b/test/tcl/logtrack.list @@ -0,0 +1,67 @@ +PREFIX __crdel +BEGIN metasub 42 142 +BEGIN inmem_create 44 138 +BEGIN inmem_rename 44 139 +BEGIN inmem_remove 44 140 +PREFIX __db +BEGIN addrem 50 41 +BEGIN big 50 43 +BEGIN ovref 42 44 +BEGIN debug 42 47 +BEGIN noop 42 48 +BEGIN pg_alloc 43 49 +BEGIN pg_free 43 50 +BEGIN cksum 42 51 +BEGIN pg_freedata 43 52 +BEGIN pg_init 43 60 +BEGIN pg_trunc 50 66 +BEGIN realloc 50 36 +BEGIN relink 44 147 +BEGIN merge 47 148 +BEGIN pgno 44 149 +PREFIX __dbreg +BEGIN register 42 2 +PREFIX __bam +BEGIN split 50 62 +BEGIN rsplit 42 63 +BEGIN adj 42 55 +BEGIN cadjust 42 56 +BEGIN cdel 42 57 +BEGIN repl 42 58 +BEGIN irep 50 67 +BEGIN root 42 59 +BEGIN curadj 42 64 +BEGIN rcuradj 42 65 +PREFIX __fop +BEGIN create 48 143 +BEGIN remove 42 144 +BEGIN write 48 145 +BEGIN rename 48 146 +BEGIN file_remove 42 141 +PREFIX __ham +BEGIN insdel 50 21 +BEGIN newpage 42 22 +BEGIN splitdata 42 24 +BEGIN replace 50 25 +BEGIN copypage 42 28 +BEGIN metagroup 43 29 +BEGIN groupalloc 43 32 +BEGIN changeslot 50 35 +BEGIN contract 50 37 +BEGIN curadj 42 33 +BEGIN chgpg 42 34 +PREFIX __heap +BEGIN addrem 49 151 +BEGIN pg_alloc 49 152 +PREFIX __qam +BEGIN incfirst 42 84 +BEGIN mvptr 42 85 +BEGIN del 42 79 +BEGIN add 42 80 +BEGIN delext 42 83 +PREFIX __txn +BEGIN regop 44 10 +BEGIN ckp 43 11 +BEGIN child 42 12 +BEGIN prepare 48 13 +BEGIN recycle 42 14 diff --git a/test/tcl/logtrack.tcl b/test/tcl/logtrack.tcl new file mode 100644 index 00000000..df10709e --- /dev/null +++ b/test/tcl/logtrack.tcl @@ -0,0 +1,142 @@ +# See the file LICENSE for redistribution information +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# logtrack.tcl: A collection of routines, formerly implemented in Perl +# as log.pl, to track which log record types the test suite hits. + +set ltsname "logtrack_seen.db" +set ltlist $test_path/logtrack.list +set tmpname "logtrack_tmp" + +proc logtrack_clean { } { + global ltsname + + file delete -force $ltsname + + return +} + +proc logtrack_init { } { + global ltsname + + logtrack_clean + + # Create an empty tracking database. + [berkdb_open -create -truncate -btree $ltsname] close + + return +} + +# Dump the logs for directory dirname and record which log +# records were seen. +proc logtrack_read { dirname } { + global ltsname tmpname util_path + global encrypt passwd + + set seendb [berkdb_open $ltsname] + error_check_good seendb_open [is_valid_db $seendb] TRUE + + file delete -force $tmpname + set pargs " -N -h $dirname " + if { $encrypt > 0 } { + append pargs " -P $passwd " + } + set ret [catch {eval exec $util_path/db_printlog $pargs > $tmpname} res] + error_check_good printlog $ret 0 + error_check_good tmpfile_exists [file exists $tmpname] 1 + + set f [open $tmpname r] + while { [gets $f record] >= 0 } { + set r [regexp {\[[^\]]*\]\[[^\]]*\]([^\:]*)\:} $record whl name] + if { $r == 1 } { + error_check_good seendb_put [$seendb put $name ""] 0 + } + } + close $f + file delete -force $tmpname + + error_check_good seendb_close [$seendb close] 0 +} + +# Print the log record types that were seen but should not have been +# seen and the log record types that were not seen but should have been seen. +proc logtrack_summary { } { + global ltsname ltlist testdir + global one_test + + set seendb [berkdb_open $ltsname] + error_check_good seendb_open [is_valid_db $seendb] TRUE + set existdb [berkdb_open -create -btree] + error_check_good existdb_open [is_valid_db $existdb] TRUE + set deprecdb [berkdb_open -create -btree] + error_check_good deprecdb_open [is_valid_db $deprecdb] TRUE + + error_check_good ltlist_exists [file exists $ltlist] 1 + set f [open $ltlist r] + set pref "" + while { [gets $f line] >= 0 } { + # Get the keyword, the first thing on the line: + # BEGIN/DEPRECATED/IGNORED/PREFIX + set keyword [lindex $line 0] + + if { [string compare $keyword PREFIX] == 0 } { + # New prefix. + set pref [lindex $line 1] + } elseif { [string compare $keyword BEGIN] == 0 } { + # A log type we care about; put it on our list. + + # Skip noop and debug. + if { [string compare [lindex $line 1] noop] == 0 } { + continue + } + if { [string compare [lindex $line 1] debug] == 0 } { + continue + } + + error_check_good exist_put [$existdb put \ + ${pref}_[lindex $line 1] ""] 0 + } elseif { [string compare $keyword DEPRECATED] == 0 || + [string compare $keyword IGNORED] == 0 } { + error_check_good deprec_put [$deprecdb put \ + ${pref}_[lindex $line 1] ""] 0 + } + } + + error_check_good exist_curs \ + [is_valid_cursor [set ec [$existdb cursor]] $existdb] TRUE + while { [llength [set dbt [$ec get -next]]] != 0 } { + set rec [lindex [lindex $dbt 0] 0] + if { [$seendb count $rec] == 0 && $one_test == "ALL" } { + if { $rec == "__db_pg_prepare" } { + puts "WARNING: log record type $rec can be\ + seen only on systems without FTRUNCATE." + } + puts "WARNING: log record type $rec: not tested" + } + } + error_check_good exist_curs_close [$ec close] 0 + + error_check_good seen_curs \ + [is_valid_cursor [set sc [$existdb cursor]] $existdb] TRUE + while { [llength [set dbt [$sc get -next]]] != 0 } { + set rec [lindex [lindex $dbt 0] 0] + if { [$existdb count $rec] == 0 } { + if { [$deprecdb count $rec] == 0 } { + puts "WARNING: log record type $rec: unknown" + } else { + puts \ + "WARNING: log record type $rec: deprecated" + } + } + } + error_check_good seen_curs_close [$sc close] 0 + + error_check_good seendb_close [$seendb close] 0 + error_check_good existdb_close [$existdb close] 0 + error_check_good deprecdb_close [$deprecdb close] 0 + + logtrack_clean +} diff --git a/test/tcl/mdbscript.tcl b/test/tcl/mdbscript.tcl new file mode 100644 index 00000000..e7e2019f --- /dev/null +++ b/test/tcl/mdbscript.tcl @@ -0,0 +1,402 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Process script for the multi-process db tester. + +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl + +global dbenv +global klock +global l_keys +global procid +global alphabet + +# In Tcl, when there are multiple catch handlers, *all* handlers +# are called, so we have to resort to this hack. +# +global exception_handled + +set exception_handled 0 + +set datastr $alphabet$alphabet + +# Usage: mdbscript dir file nentries iter procid procs seed +# dir: DBHOME directory +# file: db file on which to operate +# nentries: number of entries taken from dictionary +# iter: number of operations to run +# procid: this processes' id number +# procs: total number of processes running +set usage "mdbscript method dir file nentries iter procid procs" + +# Verify usage +if { $argc < 7 } { + puts "FAIL:[timestamp] test042: Usage: $usage" + exit +} + +# Initialize arguments +set method [lindex $argv 0] +set dir [lindex $argv 1] +set file [lindex $argv 2] +set nentries [ lindex $argv 3 ] +set iter [ lindex $argv 4 ] +set procid [ lindex $argv 5 ] +set procs [ lindex $argv 6 ] +set args [ lindex $argv 7 ] + +set pflags "" +set gflags "" +set txn "" + +set renum [is_rrecno $method] +set omethod [convert_method $method] + +if { [is_record_based $method] == 1 } { + append gflags " -recno" +} + +# Initialize seed +global rand_init + +# We want repeatable results, but we also want each instance of mdbscript +# to do something different. So we add the procid to the fixed seed. +# (Note that this is a serial number given by the caller, not a pid.) +berkdb srand [expr $rand_init + $procid] + +puts "Beginning execution for [pid] $method" +puts "$dir db_home" +puts "$file database" +puts "$nentries data elements" +puts "$iter iterations" +puts "$procid process id" +puts "$procs processes" +eval set args $args +puts "args: $args" + +set klock NOLOCK + +# Note: all I/O operations, and especially flush, are expensive +# on Win2000 at least with Tcl version 8.3.2. So we'll avoid +# flushes in the main part of the loop below. +flush stdout + +set dbenv [berkdb_env -create -cdb -home $dir] +#set dbenv [berkdb_env -create -cdb -log -home $dir] +error_check_good dbenv [is_valid_env $dbenv] TRUE + +set locker [ $dbenv lock_id ] + +set db [eval {berkdb_open} -env $dbenv $omethod $args {$file}] +error_check_good dbopen [is_valid_db $db] TRUE + +# Init globals (no data) +set nkeys [db_init $db 0] +puts "Initial number of keys: $nkeys" +tclsleep 5 + +proc get_lock { k } { + global dbenv + global procid + global locker + global klock + global DB_LOCK_WRITE + global DB_LOCK_NOWAIT + global errorInfo + global exception_handled + # Make sure that the key isn't in the middle of + # a delete operation + if {[catch {$dbenv lock_get -nowait write $locker $k} klock] != 0 } { + set exception_handled 1 + + error_check_good \ + get_lock [is_substr $errorInfo "DB_LOCK_NOTGRANTED"] 1 + puts "Warning: key $k locked" + set klock NOLOCK + return 1 + } else { + error_check_good get_lock [is_valid_lock $klock $dbenv] TRUE + } + return 0 +} + +# If we are renumbering, then each time we delete an item, the number of +# items in the file is temporarily decreased, so the highest record numbers +# do not exist. To make sure this doesn't happen, we never generate the +# highest few record numbers as keys. +# +# For record-based methods, record numbers begin at 1, while for other keys, +# we begin at 0 to index into an array. +proc rand_key { method nkeys renum procs} { + if { $renum == 1 } { + return [berkdb random_int 1 [expr $nkeys - $procs]] + } elseif { [is_record_based $method] == 1 } { + return [berkdb random_int 1 $nkeys] + } else { + return [berkdb random_int 0 [expr $nkeys - 1]] + } +} + +# On each iteration we're going to randomly pick a key. +# 1. We'll either get it (verifying that its contents are reasonable). +# 2. Put it (using an overwrite to make the data be datastr:ID). +# 3. Get it and do a put through the cursor, tacking our ID on to +# 4. Get it, read forward some random number of keys. +# 5. Get it, read forward some random number of keys and do a put (replace). +# 6. Get it, read forward some random number of keys and do a del. And then +# do a put of the key. +set gets 0 +set getput 0 +set overwrite 0 +set seqread 0 +set seqput 0 +set seqdel 0 +set dlen [string length $datastr] + +for { set i 0 } { $i < $iter } { incr i } { + set op [berkdb random_int 0 5] + puts "iteration $i operation $op" + set close_cursor 0 + if {[catch { + switch $op { + 0 { + incr gets + set k [rand_key $method $nkeys $renum $procs] + if {[is_record_based $method] == 1} { + set key $k + } else { + set key [lindex $l_keys $k] + } + + if { [get_lock $key] == 1 } { + incr i -1 + continue; + } + + set rec [eval {$db get} $txn $gflags {$key}] + error_check_bad "$db get $key" [llength $rec] 0 + set partial [string range \ + [lindex [lindex $rec 0] 1] 0 [expr $dlen - 1]] + error_check_good \ + "$db get $key" $partial [pad_data $method $datastr] + } + 1 { + incr overwrite + set k [rand_key $method $nkeys $renum $procs] + if {[is_record_based $method] == 1} { + set key $k + } else { + set key [lindex $l_keys $k] + } + + set data $datastr:$procid + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $data]}] + error_check_good "$db put $key" $ret 0 + } + 2 { + incr getput + set dbc [$db cursor -update] + error_check_good "$db cursor" \ + [is_valid_cursor $dbc $db] TRUE + set close_cursor 1 + set k [rand_key $method $nkeys $renum $procs] + if {[is_record_based $method] == 1} { + set key $k + } else { + set key [lindex $l_keys $k] + } + + if { [get_lock $key] == 1 } { + incr i -1 + error_check_good "$dbc close" \ + [$dbc close] 0 + set close_cursor 0 + continue; + } + + set ret [$dbc get -set $key] + error_check_good \ + "$dbc get $key" [llength [lindex $ret 0]] 2 + set rec [lindex [lindex $ret 0] 1] + set partial [string range $rec 0 [expr $dlen - 1]] + error_check_good \ + "$dbc get $key" $partial [pad_data $method $datastr] + append rec ":$procid" + set ret [$dbc put \ + -current [chop_data $method $rec]] + error_check_good "$dbc put $key" $ret 0 + error_check_good "$dbc close" [$dbc close] 0 + set close_cursor 0 + } + 3 - + 4 - + 5 { + if { $op == 3 } { + set flags "" + } else { + set flags -update + } + set dbc [eval {$db cursor} $flags] + error_check_good "$db cursor" \ + [is_valid_cursor $dbc $db] TRUE + set close_cursor 1 + set k [rand_key $method $nkeys $renum $procs] + if {[is_record_based $method] == 1} { + set key $k + } else { + set key [lindex $l_keys $k] + } + + if { [get_lock $key] == 1 } { + incr i -1 + error_check_good "$dbc close" \ + [$dbc close] 0 + set close_cursor 0 + continue; + } + + set ret [$dbc get -set $key] + error_check_good \ + "$dbc get $key" [llength [lindex $ret 0]] 2 + + # Now read a few keys sequentially + set nloop [berkdb random_int 0 10] + if { [berkdb random_int 0 1] == 0 } { + set flags -next + } else { + set flags -prev + } + while { $nloop > 0 } { + set lastret $ret + set ret [eval {$dbc get} $flags] + # Might read beginning/end of file + if { [llength $ret] == 0} { + set ret $lastret + break + } + incr nloop -1 + } + switch $op { + 3 { + incr seqread + } + 4 { + incr seqput + set rec [lindex [lindex $ret 0] 1] + set partial [string range $rec 0 \ + [expr $dlen - 1]] + error_check_good "$dbc get $key" \ + $partial [pad_data $method $datastr] + append rec ":$procid" + set ret [$dbc put -current \ + [chop_data $method $rec]] + error_check_good \ + "$dbc put $key" $ret 0 + } + 5 { + incr seqdel + set k [lindex [lindex $ret 0] 0] + # We need to lock the item we're + # deleting so that someone else can't + # try to do a get while we're + # deleting + error_check_good "$klock put" \ + [$klock put] 0 + set klock NOLOCK + set cur [$dbc get -current] + error_check_bad get_current \ + [llength $cur] 0 + set key [lindex [lindex $cur 0] 0] + if { [get_lock $key] == 1 } { + incr i -1 + error_check_good "$dbc close" \ + [$dbc close] 0 + set close_cursor 0 + continue + } + set ret [$dbc del] + error_check_good "$dbc del" $ret 0 + set rec $datastr + append rec ":$procid" + if { $renum == 1 } { + set ret [$dbc put -before \ + [chop_data $method $rec]] + error_check_good \ + "$dbc put $k" $ret $k + } elseif { \ + [is_record_based $method] == 1 } { + error_check_good "$dbc close" \ + [$dbc close] 0 + set close_cursor 0 + set ret [$db put $k \ + [chop_data $method $rec]] + error_check_good \ + "$db put $k" $ret 0 + } else { + set ret [$dbc put -keylast $k \ + [chop_data $method $rec]] + error_check_good \ + "$dbc put $k" $ret 0 + } + } + } + if { $close_cursor == 1 } { + error_check_good \ + "$dbc close" [$dbc close] 0 + set close_cursor 0 + } + } + } + } res] != 0} { + global errorInfo; + global exception_handled; + +# puts $errorInfo + + set fnl [string first "\n" $errorInfo] + set theError [string range $errorInfo 0 [expr $fnl - 1]] + + if { [string compare $klock NOLOCK] != 0 } { + catch {$klock put} + } + if {$close_cursor == 1} { + catch {$dbc close} + set close_cursor 0 + } + + if {[string first FAIL $theError] == 0 && \ + $exception_handled != 1} { + flush stdout + error "FAIL:[timestamp] test042: key $k: $theError" + } + set exception_handled 0 + } else { + if { [string compare $klock NOLOCK] != 0 } { + error_check_good "$klock put" [$klock put] 0 + set klock NOLOCK + } + } +} + +error_check_good db_close_catch [catch {$db close} ret] 0 +error_check_good db_close $ret 0 +error_check_good dbenv_close [$dbenv close] 0 + +flush stdout +exit + +puts "[timestamp] [pid] Complete" +puts "Successful ops: " +puts "\t$gets gets" +puts "\t$overwrite overwrites" +puts "\t$getput getputs" +puts "\t$seqread seqread" +puts "\t$seqput seqput" +puts "\t$seqdel seqdel" +flush stdout diff --git a/test/tcl/memp001.tcl b/test/tcl/memp001.tcl new file mode 100644 index 00000000..a3336afd --- /dev/null +++ b/test/tcl/memp001.tcl @@ -0,0 +1,209 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# + +# TEST memp001 +# TEST Randomly updates pages. +proc memp001 { } { + source ./include.tcl + + # Some hosts can reserve a shared memory key for + # particular uses -- for example, bdbcn11 uses "1". + # Pick a different number. + + set shm_key 20 + + memp001_body 1 "" + memp001_body 3 "" + memp001_body 1 -private + memp001_body 3 -private + if { $is_qnx_test } { + puts "Skipping remainder of memp001 for\ + environments in system memory on QNX" + return + } + memp001_body 1 "-system_mem -shm_key $shm_key" + memp001_body 3 "-system_mem -shm_key $shm_key" +} + +proc memp001_body { ncache flags } { + source ./include.tcl + global rand_init + + set nfiles 5 + set iter 500 + set psize 512 + set cachearg "-cachesize {0 400000 $ncache}" + + puts \ +"Memp001: { $flags } random update $iter iterations on $nfiles files." + # + # Check if this platform supports this set of flags + # + if { [mem_chk $flags] == 1 } { + return + } + + env_cleanup $testdir + puts "\tMemp001.a: Create env with $ncache caches" + set env [eval {berkdb_env -create -mode 0644} \ + $cachearg {-home $testdir} $flags] + error_check_good env_open [is_valid_env $env] TRUE + + # + # Do a simple mpool_stat call to verify the number of caches + # just to exercise the stat code. + set stat [$env mpool_stat] + set str "Number of caches" + set checked 0 + foreach statpair $stat { + if { $checked == 1 } { + break + } + if { [is_substr [lindex $statpair 0] $str] != 0} { + set checked 1 + error_check_good ncache [lindex $statpair 1] $ncache + } + } + error_check_good checked $checked 1 + + # Open N memp files + puts "\tMemp001.b: Create $nfiles mpool files" + for {set i 1} {$i <= $nfiles} {incr i} { + set fname "data_file.$i" + file_create $testdir/$fname 50 $psize + + set mpools($i) \ + [$env mpool -create -pagesize $psize -mode 0644 $fname] + error_check_good mp_open [is_substr $mpools($i) $env.mp] 1 + } + + # Now, loop, picking files at random + berkdb srand $rand_init + puts "\tMemp001.c: Random page replacement loop" + for {set i 0} {$i < $iter} {incr i} { + set mpool $mpools([berkdb random_int 1 $nfiles]) + set p(1) [get_range $mpool 10] + set p(2) [get_range $mpool 10] + set p(3) [get_range $mpool 10] + set p(1) [replace $mpool $p(1)] + set p(3) [replace $mpool $p(3)] + set p(4) [get_range $mpool 20] + set p(4) [replace $mpool $p(4)] + set p(5) [get_range $mpool 10] + set p(6) [get_range $mpool 20] + set p(7) [get_range $mpool 10] + set p(8) [get_range $mpool 20] + set p(5) [replace $mpool $p(5)] + set p(6) [replace $mpool $p(6)] + set p(9) [get_range $mpool 40] + set p(9) [replace $mpool $p(9)] + set p(10) [get_range $mpool 40] + set p(7) [replace $mpool $p(7)] + set p(8) [replace $mpool $p(8)] + set p(9) [replace $mpool $p(9) ] + set p(10) [replace $mpool $p(10)] + # + # We now need to put all the pages we have here or + # else they end up pinned. + # + for {set x 1} { $x <= 10} {incr x} { + error_check_good pgput [$p($x) put] 0 + } + } + + # Close N memp files, close the environment. + puts "\tMemp001.d: Close mpools" + for {set i 1} {$i <= $nfiles} {incr i} { + error_check_good memp_close:$mpools($i) [$mpools($i) close] 0 + } + error_check_good envclose [$env close] 0 + + for {set i 1} {$i <= $nfiles} {incr i} { + fileremove -f $testdir/data_file.$i + } +} + +proc file_create { fname nblocks blocksize } { + set fid [open $fname w] + for {set i 0} {$i < $nblocks} {incr i} { + seek $fid [expr $i * $blocksize] start + puts -nonewline $fid $i + } + seek $fid [expr $nblocks * $blocksize - 1] + + # We don't end the file with a newline, because some platforms (like + # Windows) emit CR/NL. There does not appear to be a BINARY open flag + # that prevents this. + puts -nonewline $fid "Z" + close $fid + + # Make sure it worked + if { [file size $fname] != $nblocks * $blocksize } { + error "FAIL: file_create could not create correct file size" + } +} + +proc get_range { mpool max } { + set pno [berkdb random_int 0 $max] + set p [eval $mpool get $pno] + error_check_good page [is_valid_page $p $mpool] TRUE + set got [$p pgnum] + if { $got != $pno } { + puts "Get_range: Page mismatch page |$pno| val |$got|" + } + set ret [$p init "Page is pinned by [pid]"] + error_check_good page_init $ret 0 + + return $p +} + +proc replace { mpool p { args "" } } { + set pgno [$p pgnum] + + set ret [$p init "Page is unpinned by [pid]"] + error_check_good page_init $ret 0 + + set ret [$p put] + error_check_good page_put $ret 0 + + set p2 [eval $mpool get $args $pgno] + error_check_good page [is_valid_page $p2 $mpool] TRUE + + return $p2 +} + +proc mem_chk { flags } { + source ./include.tcl + global errorCode + + # Open the memp with region init specified + env_cleanup $testdir + + set cachearg " -cachesize {0 400000 3}" + set ret [catch {eval {berkdb_env_noerr -create -mode 0644}\ + $cachearg {-region_init -home $testdir} $flags} env] + if { $ret != 0 } { + # If the env open failed, it may be because we're on a platform + # such as HP-UX 10 that won't support mutexes in shmget memory. + # Or QNX, which doesn't support system memory at all. + # Verify that the return value was EINVAL or EOPNOTSUPP + # and bail gracefully. + error_check_good is_shm_test [is_substr $flags -system_mem] 1 + error_check_good returned_error [expr \ + [is_substr $errorCode EINVAL] || \ + [is_substr $errorCode EOPNOTSUPP]] 1 + puts "Warning:\ + platform does not support mutexes in shmget memory." + puts "Skipping shared memory mpool test." + return 1 + } + error_check_good env_open [is_valid_env $env] TRUE + error_check_good env_close [$env close] 0 + env_cleanup $testdir + + return 0 +} diff --git a/test/tcl/memp002.tcl b/test/tcl/memp002.tcl new file mode 100644 index 00000000..fb5fc2c0 --- /dev/null +++ b/test/tcl/memp002.tcl @@ -0,0 +1,74 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# + +# TEST memp002 +# TEST Tests multiple processes accessing and modifying the same files. +proc memp002 { } { + source ./include.tcl + # + # Multiple processes not supported by private memory so don't + # run memp002_body with -private. + # + memp002_body "" + if { $is_qnx_test } { + puts "Skipping remainder of memp002 for\ + environments in system memory on QNX" + return + } + set shm_key 20 + memp002_body "-system_mem -shm_key $shm_key" +} + +proc memp002_body { flags } { + source ./include.tcl + + puts "Memp002: {$flags} Multiprocess mpool tester" + + set procs 4 + set psizes "512 1024 2048 4096 8192" + set iterations 500 + set npages 100 + + # Check if this combination of flags is supported by this arch. + if { [mem_chk $flags] == 1 } { + return + } + + set iter [expr $iterations / $procs] + + # Clean up old stuff and create new. + env_cleanup $testdir + + for { set i 0 } { $i < [llength $psizes] } { incr i } { + fileremove -f $testdir/file$i + } + set e [eval {berkdb_env -create -lock -home $testdir} $flags] + error_check_good dbenv [is_valid_env $e] TRUE + + set pidlist {} + for { set i 0 } { $i < $procs } {incr i} { + + puts "$tclsh_path\ + $test_path/mpoolscript.tcl $testdir $i $procs \ + $iter $psizes $npages 3 $flags > \ + $testdir/memp002.$i.out &" + set p [exec $tclsh_path $test_path/wrap.tcl \ + mpoolscript.tcl $testdir/memp002.$i.out $testdir $i $procs \ + $iter $psizes $npages 3 $flags &] + lappend pidlist $p + } + puts "Memp002: $procs independent processes now running" + watch_procs $pidlist 30 + + # Check for test failure + set errstrings [eval findfail [glob $testdir/memp002.*.out]] + foreach str $errstrings { + puts "FAIL: error message in log file: $str" + } + + reset_env $e +} diff --git a/test/tcl/memp003.tcl b/test/tcl/memp003.tcl new file mode 100644 index 00000000..a9b3bff2 --- /dev/null +++ b/test/tcl/memp003.tcl @@ -0,0 +1,160 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST memp003 +# TEST Test reader-only/writer process combinations; we use the access methods +# TEST for testing. +proc memp003 { } { + source ./include.tcl + global rand_init + error_check_good set_random_seed [berkdb srand $rand_init] 0 + # + # Multiple processes not supported by private memory so don't + # run memp003_body with -private. + # + memp003_body "" + if { $is_qnx_test } { + puts "Skipping remainder of memp003 for\ + environments in system memory on QNX" + return + } + set shm_key 20 + memp003_body "-system_mem -shm_key $shm_key" +} + +proc memp003_body { flags } { + global alphabet + source ./include.tcl + + puts "Memp003: {$flags} Reader/Writer tests" + + if { [mem_chk $flags] == 1 } { + return + } + + env_cleanup $testdir + set psize 1024 + set nentries 500 + set testfile mpool.db + set t1 $testdir/t1 + + # Create an environment that the two processes can share, with + # 20 pages per cache. + set c [list 0 [expr $psize * 20 * 3] 3] + set dbenv [eval {berkdb_env \ + -create -lock -home $testdir -cachesize $c} $flags] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + # First open and create the file. + set db [berkdb_open -env $dbenv -create \ + -mode 0644 -pagesize $psize -btree $testfile] + error_check_good dbopen/RW [is_valid_db $db] TRUE + + set did [open $dict] + set txn "" + set count 0 + + puts "\tMemp003.a: create database" + set keys "" + # Here is the loop where we put and get each key/data pair + while { [gets $did str] != -1 && $count < $nentries } { + lappend keys $str + + set ret [eval {$db put} $txn {$str $str}] + error_check_good put $ret 0 + + set ret [eval {$db get} $txn {$str}] + error_check_good get $ret [list [list $str $str]] + + incr count + } + close $did + error_check_good close [$db close] 0 + + # Now open the file for read-only + set db [berkdb_open -env $dbenv -rdonly $testfile] + error_check_good dbopen/RO [is_substr $db db] 1 + + puts "\tMemp003.b: verify a few keys" + # Read and verify a couple of keys; saving them to check later + set testset "" + for { set i 0 } { $i < 10 } { incr i } { + set ndx [berkdb random_int 0 [expr $nentries - 1]] + set key [lindex $keys $ndx] + if { [lsearch $testset $key] != -1 } { + incr i -1 + continue; + } + + # The remote process stuff is unhappy with + # zero-length keys; make sure we don't pick one. + if { [llength $key] == 0 } { + incr i -1 + continue + } + + lappend testset $key + + set ret [eval {$db get} $txn {$key}] + error_check_good get/RO $ret [list [list $key $key]] + } + + puts "\tMemp003.c: retrieve and modify keys in remote process" + # Now open remote process where we will open the file RW + set f1 [open |$tclsh_path r+] + puts $f1 "source $test_path/test.tcl" + puts $f1 "flush stdout" + flush $f1 + + set c [concat "{" [list 0 [expr $psize * 20 * 3] 3] "}" ] + set remote_env [send_cmd $f1 \ + "berkdb_env -create -lock -home $testdir -cachesize $c $flags"] + error_check_good remote_dbenv [is_valid_env $remote_env] TRUE + + set remote_db [send_cmd $f1 "berkdb_open -env $remote_env $testfile"] + error_check_good remote_dbopen [is_valid_db $remote_db] TRUE + + foreach k $testset { + # Get the key + set ret [send_cmd $f1 "$remote_db get $k"] + error_check_good remote_get $ret [list [list $k $k]] + + # Now replace the key + set ret [send_cmd $f1 "$remote_db put $k $k$k"] + error_check_good remote_put $ret 0 + } + + puts "\tMemp003.d: verify changes in local process" + foreach k $testset { + set ret [eval {$db get} $txn {$key}] + error_check_good get_verify/RO $ret [list [list $key $key$key]] + } + + puts "\tMemp003.e: Fill up the cache with dirty buffers" + foreach k $testset { + # Now rewrite the keys with BIG data + set data [replicate $alphabet 32] + set ret [send_cmd $f1 "$remote_db put $k $data"] + error_check_good remote_put $ret 0 + } + + puts "\tMemp003.f: Get more pages for the read-only file" + dump_file $db $txn $t1 nop + + puts "\tMemp003.g: Sync from the read-only file" + error_check_good db_sync [$db sync] 0 + error_check_good db_close [$db close] 0 + + set ret [send_cmd $f1 "$remote_db close"] + error_check_good remote_get $ret 0 + + # Close the environment both remotely and locally. + set ret [send_cmd $f1 "$remote_env close"] + error_check_good remote:env_close $ret 0 + close $f1 + + reset_env $dbenv +} diff --git a/test/tcl/memp004.tcl b/test/tcl/memp004.tcl new file mode 100644 index 00000000..a1a8c99b --- /dev/null +++ b/test/tcl/memp004.tcl @@ -0,0 +1,82 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# + +# TEST memp004 +# TEST Test that small read-only databases are mapped into memory. +proc memp004 { } { + global is_qnx_test + source ./include.tcl + + puts "Memp004: Test of memory-mapped read-only databases" + + if { $is_qnx_test } { + puts "Memp004: skipping for QNX" + return + } + + env_cleanup $testdir + set testfile memp004.db + + # Create an environment. + puts "memp004.a: Create an environment and database" + set dbenv [eval {berkdb_env -create -home $testdir -private}] + error_check_good dbenv [is_valid_env $dbenv] TRUE + set db [berkdb_open -env $dbenv -create -mode 0644 -btree $testfile] + error_check_good dbopen/$testfile/RW [is_valid_db $db] TRUE + + # Put each key/data pair. + set did [open $dict] + set keys "" + set count 0 + while { [gets $did str] != -1 && $count < 1000 } { + lappend keys $str + + set ret [eval {$db put} {$str $str}] + error_check_good put $ret 0 + + incr count + } + close $did + error_check_good close [$db close] 0 + + # Discard the environment. + error_check_good close [$dbenv close] 0 + + puts "memp004.b: Re-create the environment and open database read-only" + set dbenv [eval {berkdb_env -create -home $testdir}] + error_check_good dbenv [is_valid_env $dbenv] TRUE + set db [berkdb_open -env $dbenv -rdonly $testfile] + error_check_good dbopen/$testfile/RO [is_substr $db db] 1 + + # Read a couple of keys. + set c [eval {$db cursor}] + for { set i 0 } { $i < 500 } { incr i } { + set ret [$c get -next] + } + + puts "memp004.c: Check mpool statistics" + set tmp [memp004_stat $dbenv "Pages mapped into address space"] + error_check_good "mmap check: $tmp >= 500" [expr $tmp >= 500] 1 + + error_check_good db_close [$db close] 0 + reset_env $dbenv +} + +# memp004_stat -- +# Return the current mpool statistics. +proc memp004_stat { env s } { + set stat [$env mpool_stat] + foreach statpair $stat { + set statmsg [lindex $statpair 0] + set statval [lindex $statpair 1] + if {[is_substr $statmsg $s] != 0} { + return $statval + } + } + puts "FAIL: memp004: stat string $s not found" + return 0 +} diff --git a/test/tcl/memp005.tcl b/test/tcl/memp005.tcl new file mode 100644 index 00000000..a019dca9 --- /dev/null +++ b/test/tcl/memp005.tcl @@ -0,0 +1,49 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST memp005 +# TEST Make sure that db pagesize does not interfere with mpool pagesize. +proc memp005 { } { + source ./include.tcl + + puts "Memp005: Test interaction of database and mpool pagesize." + env_cleanup $testdir + + # Set the mpool pagesize. + puts "\tMemp005.a: Set mpool pagesize." + set mp_pagesize 1024 + set e [eval {berkdb_env -create -pagesize $mp_pagesize -home $testdir} ] + error_check_good dbenv [is_valid_env $e] TRUE + + # Check the pagesize through mpool_stat and through the getter. + set mpool_stat_pagesize [stat_field $e mpool_stat "Default pagesize"] + error_check_good check_mp_pagesize $mp_pagesize $mpool_stat_pagesize + set get_mp_pagesize [$e get_mp_pagesize] + error_check_good check_getter_pagesize $get_mp_pagesize $mp_pagesize + + # Set a different database pagesize. + puts "\tMemp005.b: Set different database pagesize." + set db_pagesize 2048 + set db [eval {berkdb_open -create\ + -pagesize $db_pagesize -env $e -btree foo.db} ] + + # Make sure the mpool pagesize and database pagesizes are correct. + # Check both the stats and the getters. + puts "\tMemp005.c: Check values." + set mpool_stat_pagesize [stat_field $e mpool_stat "Default pagesize"] + error_check_good check_mp_pagesize $mp_pagesize $mpool_stat_pagesize + set get_mp_pagesize [$e get_mp_pagesize] + error_check_good check_mpgetter_pagesize $get_mp_pagesize $mp_pagesize + set db_stat_pagesize [stat_field $db stat "Page size"] + error_check_good check_db_pagesize $db_pagesize $db_stat_pagesize + set db_get_pagesize [$db get_pagesize] + error_check_good check_dbgetter_pagesize $db_pagesize $db_get_pagesize + + # Clean up. + error_check_good db_close [$db close] 0 + error_check_good env_close [$e close] 0 + +} diff --git a/test/tcl/mpoolscript.tcl b/test/tcl/mpoolscript.tcl new file mode 100644 index 00000000..5b0414ee --- /dev/null +++ b/test/tcl/mpoolscript.tcl @@ -0,0 +1,174 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Random multiple process mpool tester. +# Usage: mpoolscript dir id numiters numfiles numpages sleepint +# dir: lock directory. +# id: Unique identifier for this process. +# maxprocs: Number of procs in this test. +# numiters: Total number of iterations. +# pgsizes: Pagesizes for the different files. Length of this item indicates +# how many files to use. +# numpages: Number of pages per file. +# sleepint: Maximum sleep interval. +# flags: Flags for env open + +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl + +set usage \ + "mpoolscript dir id maxprocs numiters pgsizes numpages sleepint flags" + +# Verify usage +if { $argc != 8 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + puts $argc + exit +} + +# Initialize arguments +set dir [lindex $argv 0] +set id [lindex $argv 1] +set maxprocs [lindex $argv 2] +set numiters [ lindex $argv 3 ] +set pgsizes [ lindex $argv 4 ] +set numpages [ lindex $argv 5 ] +set sleepint [ lindex $argv 6 ] +set flags [ lindex $argv 7] + +# Initialize seed +global rand_init +berkdb srand $rand_init + +# Give time for all processes to start up. +tclsleep 10 + +puts -nonewline "Beginning execution for $id: $maxprocs $dir $numiters" +puts " $pgsizes $numpages $sleepint" +flush stdout + +# Figure out how small/large to make the cache +set max 0 +foreach i $pgsizes { + if { $i > $max } { + set max $i + } +} + +set cache [list 0 [expr $maxprocs * ([lindex $pgsizes 0] + $max)] 1] +set env_cmd {berkdb_env -lock -cachesize $cache -home $dir} +set e [eval $env_cmd $flags] +error_check_good env_open [is_valid_env $e] TRUE + +# Now open files +set mpools {} +set nfiles 0 +foreach psize $pgsizes { + set mp [$e mpool -create -mode 0644 -pagesize $psize file$nfiles] + error_check_good memp_fopen:$nfiles [is_valid_mpool $mp $e] TRUE + lappend mpools $mp + incr nfiles +} + +puts "Establishing long-term pin on file 0 page $id for process $id" + +# Set up the long-pin page +set locker [$e lock_id] +set lock [$e lock_get write $locker 0:$id] +error_check_good lock_get [is_valid_lock $lock $e] TRUE + +set mp [lindex $mpools 0] +set master_page [$mp get -create -dirty $id] +error_check_good mp_get:$master_page [is_valid_page $master_page $mp] TRUE + +set r [$master_page init MASTER$id] +error_check_good page_init $r 0 + +# Release the lock but keep the page pinned +set r [$lock put] +error_check_good lock_put $r 0 + +# Main loop. On each iteration, we'll check every page in each of +# of the files. On any file, if we see the appropriate tag in the +# field, we'll rewrite the page, else we won't. Keep track of +# how many pages we actually process. +set pages 0 +for { set iter 0 } { $iter < $numiters } { incr iter } { + puts "[timestamp]: iteration $iter, $pages pages set so far" + flush stdout + for { set fnum 1 } { $fnum < $nfiles } { incr fnum } { + if { [expr $fnum % 2 ] == 0 } { + set pred [expr ($id + $maxprocs - 1) % $maxprocs] + } else { + set pred [expr ($id + $maxprocs + 1) % $maxprocs] + } + + set mpf [lindex $mpools $fnum] + for { set p 0 } { $p < $numpages } { incr p } { + set lock [$e lock_get write $locker $fnum:$p] + error_check_good lock_get:$fnum:$p \ + [is_valid_lock $lock $e] TRUE + + # Now, get the page + set pp [$mpf get -create -dirty $p] + error_check_good page_get:$fnum:$p \ + [is_valid_page $pp $mpf] TRUE + + if { [$pp is_setto $pred] == 0 || [$pp is_setto 0] == 0 } { + # Set page to self. + set r [$pp init $id] + error_check_good page_init:$fnum:$p $r 0 + incr pages + set r [$pp put] + error_check_good page_put:$fnum:$p $r 0 + } else { + error_check_good page_put:$fnum:$p [$pp put] 0 + } + error_check_good lock_put:$fnum:$p [$lock put] 0 + } + } + tclsleep [berkdb random_int 1 $sleepint] +} + +# Now verify your master page, release its pin, then verify everyone else's +puts "$id: End of run verification of master page" +set r [$master_page is_setto MASTER$id] +error_check_good page_check $r 1 +set r [$master_page put] +error_check_good page_put $r 0 + +set i [expr ($id + 1) % $maxprocs] +set mpf [lindex $mpools 0] + +while { $i != $id } { + set p [$mpf get -create $i] + error_check_good mp_get [is_valid_page $p $mpf] TRUE + + set p1 [$mpf get -dirty $i] + error_check_good mp_get_p1_dirty [is_valid_page $p1 $mpf] TRUE + error_check_good page_put:$p1 [$p1 put] 0 + + if { [$p is_setto MASTER$i] != 1 } { + puts "Warning: Master page $i not set." + } + error_check_good page_put:$p [$p put] 0 + + set i [expr ($i + 1) % $maxprocs] +} + +# Close files +foreach i $mpools { + set r [$i close] + error_check_good mpf_close $r 0 +} + +# Close environment system +set r [$e close] +error_check_good env_close $r 0 + +puts "[timestamp] $id Complete" +flush stdout diff --git a/test/tcl/mut001.tcl b/test/tcl/mut001.tcl new file mode 100644 index 00000000..9e8696fe --- /dev/null +++ b/test/tcl/mut001.tcl @@ -0,0 +1,111 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# +# TEST mut001 +# TEST Exercise the mutex API. +# TEST +# TEST Allocate, lock, unlock, and free a bunch of mutexes. +# TEST Set basic configuration options and check mutex_stat and +# TEST the mutex getters for the correct values. + +proc mut001 { } { + source ./include.tcl + env_cleanup $testdir + + puts "Mut001: Basic mutex interface testing." + + # Open an env. + set env [berkdb_env -create -home $testdir -private -mutex_set_incr 100] + + # Allocate, lock, unlock, and free a bunch of mutexes. + set nmutexes 100 + puts "\tMut001.a: Allocate a bunch of mutexes." + for { set i 0 } { $i < $nmutexes } { incr i } { + set mutexid($i) [$env mutex] + } + puts "\tMut001.b: Lock the mutexes." + for { set i 0 } { $i < $nmutexes } { incr i } { + error_check_good mutex_lock [$env mutex_lock $mutexid($i)] 0 + } + puts "\tMut001.c: Unlock the mutexes." + for { set i 0 } { $i < $nmutexes } { incr i } { + error_check_good mutex_unlock [$env mutex_unlock $mutexid($i)] 0 + } + puts "\tMut001.d: Free the mutexes." + for { set i 0 } { $i < $nmutexes } { incr i } { + error_check_good mutex_free [$env mutex_free $mutexid($i)] 0 + } + + # Clean up the env. We'll need new envs to test the configuration + # options, because they cannot be set after the env is open. + error_check_good env_close [$env close] 0 + env_cleanup $testdir + + puts "\tMut001.e: Set the mutex alignment." + set mutex_align 8 + set env [berkdb_env -create -home $testdir -mutex_set_align $mutex_align] + + set stat_align [stat_field $env mutex_stat "Mutex align"] + set get_align [$env mutex_get_align] + error_check_good stat_align $stat_align $mutex_align + error_check_good get_align $get_align $mutex_align + + # Find the number of mutexes allocated by default. We'll need + # this later, when we try the "mutex_set_increment" option. + set default_count [stat_field $env mutex_stat "Mutex count"] + + error_check_good env_close [$env close] 0 + env_cleanup $testdir + + puts "\tMut001.f: Set the maximum number of mutexes." + set mutex_count 2000 + set env [berkdb_env -create -home $testdir -mutex_set_max $mutex_count] + + set stat_count [stat_field $env mutex_stat "Mutex max"] + set get_count [$env mutex_get_max] + error_check_good stat_count $stat_count $mutex_count + error_check_good get_count $get_count $mutex_count + + error_check_good env_close [$env close] 0 + env_cleanup $testdir + + puts "\tMut001.g: Raise the maximum number of mutexes." + set mutex_incr 500 + set mutex_count [expr $default_count + $mutex_incr] + + set env [berkdb_env -create -home $testdir \ + -mutex_set_max $default_count -mutex_set_incr $mutex_incr] + + set stat_count [stat_field $env mutex_stat "Mutex max"] + error_check_good stat_increment $stat_count $mutex_count + set get_count [$env mutex_get_max] + error_check_good get_increment $get_count $mutex_count + + error_check_good env_close [$env close] 0 + env_cleanup $testdir + + puts "\tMut001.h: Set and reset the number of TAS mutex spins." + set mutex_tas_spins 50 + + set env [berkdb_env -create -home $testdir -mutex_set_tas_spins $mutex_tas_spins] + set stat_spins [stat_field $env mutex_stat "Mutex TAS spins"] + error_check_good stat_spins $stat_spins $mutex_tas_spins + set get_spins [$env mutex_get_tas_spins] + error_check_good get_spins $get_spins $mutex_tas_spins + + # TAS spins can be reset any time. + set mutex_tas_spins 1 + error_check_good reset_spins [$env mutex_set_tas_spins $mutex_tas_spins] 0 + set stat_spins [stat_field $env mutex_stat "Mutex TAS spins"] + error_check_good stat_spins_reset $stat_spins $mutex_tas_spins + set get_spins [$env mutex_get_tas_spins] + error_check_good get_spins_reset $get_spins $mutex_tas_spins + + error_check_good env_close [$env close] 0 + env_cleanup $testdir +} + diff --git a/test/tcl/mut002.tcl b/test/tcl/mut002.tcl new file mode 100644 index 00000000..da566460 --- /dev/null +++ b/test/tcl/mut002.tcl @@ -0,0 +1,52 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST mut002 +# TEST Two-process mutex test. +# TEST +# TEST Allocate and lock a self-blocking mutex. Start another process. +# TEST Try to lock the mutex again -- it will block. +# TEST Unlock the mutex from the other process, and the blocked +# TEST lock should be obtained. Clean up. +# TEST Do another test with a "-process-only" mutex. The second +# TEST process should not be able to unlock the mutex. + +proc mut002 { } { + source ./include.tcl + + puts "Mut002: Two process mutex test." + + # Open an env. + set env [berkdb_env -create -home $testdir] + + puts "\tMut002.a: Allocate and lock a mutex." + set mutex [$env mutex -self_block] + error_check_good obtained_lock [$env mutex_lock $mutex] 0 + + # Start a second process. + puts "\tMut002.b: Start another process." + set p2 [exec $tclsh_path $test_path/wrap.tcl mut002script.tcl\ + $testdir/mut002.log $testdir $mutex &] + + # Try to lock the mutex again. This will hang until the second + # process unlocks it. + $env mutex_lock $mutex + + watch_procs $p2 1 20 + + # Clean up, and check the log file from process 2. + error_check_good mutex_unlock [$env mutex_unlock $mutex] 0 + error_check_good env_close [$env close] 0 + + # We expect the log file to be empty. If there are any + # messages, report them as failures. + set fd [open $testdir/mut002.log r] + while { [gets $fd line] >= 0 } { + puts "FAIL: unexpected output in log file mut002: $line" + } + close $fd +} + diff --git a/test/tcl/mut002script.tcl b/test/tcl/mut002script.tcl new file mode 100644 index 00000000..b9390168 --- /dev/null +++ b/test/tcl/mut002script.tcl @@ -0,0 +1,39 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Mut002script: for use with mut002, a 2-process mutex test. +# Usage: mut002script testdir +# testdir: directory containing the env we are joining. +# mutex: id of mutex + +source ./include.tcl + +set usage "mut002script testdir mutex" + +# Verify usage +if { $argc != 2 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments. +set testdir [ lindex $argv 0 ] +set mutex [ lindex $argv 1 ] + +# Open environment. +if {[catch {eval {berkdb_env} -create -home $testdir } dbenv]} { + puts "FAIL: opening env returned $dbenv" +} +error_check_good envopen [is_valid_env $dbenv] TRUE + +# Pause for a while to let the original process block. +tclsleep 10 + +# Unlock the mutex and let the original process proceed. +$dbenv mutex_unlock $mutex + +# Clean up. +error_check_good env_close [$dbenv close] 0 diff --git a/test/tcl/mut003.tcl b/test/tcl/mut003.tcl new file mode 100644 index 00000000..7014f80e --- /dev/null +++ b/test/tcl/mut003.tcl @@ -0,0 +1,38 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST mut003 +# TEST Try doing mutex operations out of order. Make sure +# TEST we get appropriate errors. + +proc mut003 { } { + source ./include.tcl + env_cleanup $testdir + + puts "Mut003: Out of order mutex operations." + + # Allocate a mutex. Try to unlock it before it's locked. + puts "\tMut003.a: Try to unlock a mutex that's not locked." + set env [berkdb_env_noerr -create -home $testdir] + set mutex [$env mutex] + catch { $env mutex_unlock $mutex } res + error_check_good \ + already_unlocked [is_substr $res "lock already unlocked"] 1 + env_cleanup $testdir + + # Allocate and lock a mutex. Try to unlock it twice. + puts "\tMut003.b: Try to unlock a mutex twice." + set env [berkdb_env_noerr -create -home $testdir] + set mutex [$env mutex] + error_check_good mutex_lock [$env mutex_lock $mutex] 0 + error_check_good mutex_unlock [$env mutex_unlock $mutex] 0 + catch { $env mutex_unlock $mutex } res + error_check_good \ + already_unlocked [is_substr $res "lock already unlocked"] 1 + env_cleanup $testdir + +} + diff --git a/test/tcl/ndbm.tcl b/test/tcl/ndbm.tcl new file mode 100644 index 00000000..4b1e8c7b --- /dev/null +++ b/test/tcl/ndbm.tcl @@ -0,0 +1,143 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Historic NDBM interface test. +# Use the first 1000 entries from the dictionary. +# Insert each with self as key and data; retrieve each. +# After all are entered, retrieve all; compare output to original. +# Then reopen the file, re-retrieve everything. +# Finally, delete everything. +proc ndbm { { nentries 1000 } } { + source ./include.tcl + + puts "NDBM interfaces test: $nentries" + + # Create the database and open the dictionary + set testfile $testdir/ndbmtest + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir NULL + + set db [berkdb ndbm_open -create -truncate -mode 0644 $testfile] + error_check_good ndbm_open [is_substr $db ndbm] 1 + set did [open $dict] + + error_check_good rdonly_false [$db rdonly] 0 + + set flags 0 + set txn 0 + set count 0 + set skippednullkey 0 + + puts "\tNDBM.a: put/get loop" + # Here is the loop where we put and get each key/data pair + while { [gets $did str] != -1 && $count < $nentries } { + # NDBM can't handle zero-length keys + if { [string length $str] == 0 } { + set skippednullkey 1 + continue + } + + set ret [$db store $str $str insert] + error_check_good ndbm_store $ret 0 + + set d [$db fetch $str] + error_check_good ndbm_fetch $d $str + incr count + } + close $did + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tNDBM.b: dump file" + set oid [open $t1 w] + for { set key [$db firstkey] } { $key != -1 } { + set key [$db nextkey] } { + puts $oid $key + set d [$db fetch $key] + error_check_good ndbm_refetch $d $key + } + + # If we had to skip a zero-length key, juggle things to cover up + # this fact in the dump. + if { $skippednullkey == 1 } { + puts $oid "" + incr nentries 1 + } + close $oid + + # Now compare the keys to see if they match the dictionary (or ints) + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + filesort $t1 $t3 + + error_check_good NDBM:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + # File descriptors tests won't work under Windows. + if { $is_windows_test != 1 } { + puts "\tNDBM.c: pagf/dirf test" + set fd [$db pagfno] + error_check_bad pagf $fd -1 + set fd [$db dirfno] + error_check_bad dirf $fd -1 + } + + puts "\tNDBM.d: close, open, and dump file" + + # Now, reopen the file and run the last test again. + error_check_good ndbm_close [$db close] 0 + set db [berkdb ndbm_open -rdonly $testfile] + error_check_good ndbm_open2 [is_substr $db ndbm] 1 + set oid [open $t1 w] + + error_check_good rdonly_true [$db rdonly] "rdonly:not owner" + + for { set key [$db firstkey] } { $key != -1 } { + set key [$db nextkey] } { + puts $oid $key + set d [$db fetch $key] + error_check_good ndbm_refetch2 $d $key + } + if { $skippednullkey == 1 } { + puts $oid "" + } + close $oid + + # Now compare the keys to see if they match the dictionary (or ints) + filesort $t1 $t3 + + error_check_good NDBM:diff($t2,$t3) \ + [filecmp $t2 $t3] 0 + + # Now, reopen the file and delete each entry + puts "\tNDBM.e: sequential scan and delete" + + error_check_good ndbm_close [$db close] 0 + set db [berkdb ndbm_open $testfile] + error_check_good ndbm_open3 [is_substr $db ndbm] 1 + set oid [open $t1 w] + + for { set key [$db firstkey] } { $key != -1 } { + set key [$db nextkey] } { + puts $oid $key + set ret [$db delete $key] + error_check_good ndbm_delete $ret 0 + } + if { $skippednullkey == 1 } { + puts $oid "" + } + close $oid + + # Now compare the keys to see if they match the dictionary (or ints) + filesort $t1 $t3 + + error_check_good NDBM:diff($t2,$t3) \ + [filecmp $t2 $t3] 0 + error_check_good ndbm_close [$db close] 0 +} diff --git a/test/tcl/parallel.tcl b/test/tcl/parallel.tcl new file mode 100644 index 00000000..50734d0b --- /dev/null +++ b/test/tcl/parallel.tcl @@ -0,0 +1,375 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# Code to load up the tests in to the Queue database +# $Id$ +proc load_queue { file {dbdir RUNQUEUE} nitems } { + global serial_tests + global num_serial + global num_parallel + + puts -nonewline "Loading run queue with $nitems items..." + flush stdout + + set env [berkdb_env -create -lock -home $dbdir] + error_check_good dbenv [is_valid_env $env] TRUE + + # Open two databases, one for tests that may be run + # in parallel, the other for tests we want to run + # while only a single process is testing. + set db [eval {berkdb_open -env $env -create \ + -mode 0644 -len 200 -queue queue.db} ] + error_check_good dbopen [is_valid_db $db] TRUE + set serialdb [eval {berkdb_open -env $env -create \ + -mode 0644 -len 200 -queue serialqueue.db} ] + error_check_good dbopen [is_valid_db $serialdb] TRUE + + set fid [open $file] + + set count 0 + + while { [gets $fid str] != -1 } { + set testarr($count) $str + incr count + } + + # Randomize array of tests. + set rseed [pid] + berkdb srand $rseed + puts -nonewline "randomizing..." + flush stdout + for { set i 0 } { $i < $count } { incr i } { + set tmp $testarr($i) + + set j [berkdb random_int $i [expr $count - 1]] + + set testarr($i) $testarr($j) + set testarr($j) $tmp + } + + if { [string compare ALL $nitems] != 0 } { + set maxload $nitems + } else { + set maxload $count + } + + puts "loading..." + flush stdout + set num_serial 0 + set num_parallel 0 + for { set i 0 } { $i < $maxload } { incr i } { + set str $testarr($i) + # Push serial tests into serial testing db, others + # into parallel db. + if { [is_serial $str] } { + set ret [eval {$serialdb put -append $str}] + error_check_good put:serialdb [expr $ret > 0] 1 + incr num_serial + } else { + set ret [eval {$db put -append $str}] + error_check_good put:paralleldb [expr $ret > 0] 1 + incr num_parallel + } + } + + error_check_good maxload $maxload [expr $num_serial + $num_parallel] + puts "Loaded $maxload records: $num_serial in serial,\ + $num_parallel in parallel." + close $fid + $db close + $serialdb close + $env close +} + +proc init_runqueue { {dbdir RUNQUEUE} nitems list} { + + if { [file exists $dbdir] != 1 } { + file mkdir $dbdir + } + puts "Creating test list..." + $list ALL -n + load_queue ALL.OUT $dbdir $nitems + file delete TEST.LIST + file rename ALL.OUT TEST.LIST +} + +proc run_parallel { nprocs {list run_all} {nitems ALL} } { + global num_serial + global num_parallel + + # Forcibly remove stuff from prior runs, if it's still there. + fileremove -f ./RUNQUEUE + set dirs [glob -nocomplain ./PARALLEL_TESTDIR.*] + set files [glob -nocomplain ALL.OUT.*] + foreach file $files { + fileremove -f $file + } + foreach dir $dirs { + fileremove -f $dir + } + + set basename ./PARALLEL_TESTDIR + set queuedir ./RUNQUEUE + source ./include.tcl + + mkparalleldirs $nprocs $basename $queuedir + + init_runqueue $queuedir $nitems $list + + set basedir [pwd] + set queuedir ../../[string range $basedir \ + [string last "/" $basedir] end]/$queuedir + + # Run serial tests in parallel testdir 0. + run_queue 0 $basename.0 $queuedir serial $num_serial + + set pidlist {} + # Run parallel tests in testdirs 1 through n. + for { set i 1 } { $i <= $nprocs } { incr i } { + set ret [catch { + set p [exec $tclsh_path << \ + "source $test_path/test.tcl; run_queue $i \ + $basename.$i $queuedir parallel $num_parallel" &] + lappend pidlist $p + set f [open $testdir/begin.$p w] + close $f + } res] + } + watch_procs $pidlist 300 1000000 + + set failed 0 + for { set i 0 } { $i <= $nprocs } { incr i } { + if { [file exists ALL.OUT.$i] == 1 } { + puts -nonewline "Checking output from ALL.OUT.$i ... " + if { [check_output ALL.OUT.$i] == 1 } { + set failed 1 + } + puts " done." + } + } + if { $failed == 0 } { + puts "Regression tests succeeded." + } else { + puts "Regression tests failed." + puts "Review UNEXPECTED OUTPUT lines above for errors." + puts "Complete logs found in ALL.OUT.x files" + } +} + +proc run_queue { i rundir queuedir {qtype parallel} {nitems 0} } { + set builddir [pwd] + file delete $builddir/ALL.OUT.$i + cd $rundir + + puts "Starting $qtype run_queue process $i (pid [pid])." + + source ./include.tcl + global env + + set dbenv [berkdb_env -create -lock -home $queuedir] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + if { $qtype == "parallel" } { + set db [eval {berkdb_open -env $dbenv \ + -mode 0644 -queue queue.db} ] + error_check_good dbopen [is_valid_db $db] TRUE + } elseif { $qtype == "serial" } { + set db [eval {berkdb_open -env $dbenv \ + -mode 0644 -queue serialqueue.db} ] + error_check_good serialdbopen [is_valid_db $db] TRUE + } else { + puts "FAIL: queue type $qtype not recognized" + } + + set dbc [eval $db cursor] + error_check_good cursor [is_valid_cursor $dbc $db] TRUE + + set count 0 + set waitcnt 0 + set starttime [timestamp -r] + + while { $waitcnt < 5 } { + set line [$db get -consume] + if { [ llength $line ] > 0 } { + set cmd [lindex [lindex $line 0] 1] + set num [lindex [lindex $line 0] 0] + set o [open $builddir/ALL.OUT.$i a] + puts $o "\nExecuting record $num ([timestamp -w]):\n" + set tdir "TESTDIR.$i" + regsub -all {TESTDIR} $cmd $tdir cmd + puts $o $cmd + close $o + if { [expr {$num % 10} == 0] && $nitems != 0 } { + puts -nonewline \ + "Starting test $num of $nitems $qtype items. " + set now [timestamp -r] + set elapsed_secs [expr $now - $starttime] + set secs_per_test [expr $elapsed_secs / $num] + set esttotal [expr $nitems * $secs_per_test] + set remaining [expr $esttotal - $elapsed_secs] + if { $remaining < 3600 } { + puts "\tRough guess: less than 1\ + hour left." + } else { + puts "\tRough guess: \ + [expr $remaining / 3600] hour(s) left." + } + } +# puts "Process $i, record $num:\n$cmd" + set env(PURIFYOPTIONS) \ + "-log-file=./test$num.%p -follow-child-processes -messages=first" + set env(PURECOVOPTIONS) \ + "-counts-file=./cov.pcv -log-file=./cov.log -follow-child-processes" + if [catch {exec $tclsh_path \ + << "source $test_path/test.tcl; $cmd" \ + >>& $builddir/ALL.OUT.$i } res] { + set o [open $builddir/ALL.OUT.$i a] + puts $o "FAIL: '$cmd': $res" + close $o + } + env_cleanup $testdir + set o [open $builddir/ALL.OUT.$i a] + puts $o "\nEnding record $num ([timestamp])\n" + close $o + incr count + } else { + incr waitcnt + tclsleep 1 + } + } + + set now [timestamp -r] + set elapsed [expr $now - $starttime] + puts "Process $i: $count commands executed in [format %02u:%02u \ + [expr $elapsed / 3600] [expr ($elapsed % 3600) / 60]]" + + error_check_good close_parallel_cursor_$i [$dbc close] 0 + error_check_good close_parallel_db_$i [$db close] 0 + error_check_good close_parallel_env_$i [$dbenv close] 0 + + # + # We need to put the pid file in the builddir's idea + # of testdir, not this child process' local testdir. + # Therefore source builddir's include.tcl to get its + # testdir. + # !!! This resets testdir, so don't do anything else + # local to the child after this. + source $builddir/include.tcl + + set f [open $builddir/$testdir/end.[pid] w] + close $f + cd $builddir +} + +proc mkparalleldirs { nprocs basename queuedir } { + source ./include.tcl + set dir [pwd] + + if { $is_windows_test != 1 } { + set EXE "" + } else { + set EXE ".exe" + } + for { set i 0 } { $i <= $nprocs } { incr i } { + set destdir $basename.$i + catch {file mkdir $destdir} + puts "Created $destdir" + if { $is_windows_test == 1 } { + catch {file mkdir $destdir/$buildpath} + catch {eval file copy \ + [eval glob {$dir/$buildpath/*.dll}] $destdir/$buildpath} + catch {eval file copy \ + [eval glob {$dir/$buildpath/db_{checkpoint,deadlock}$EXE} \ + {$dir/$buildpath/db_{dump,load,printlog,recover,stat,upgrade}$EXE} \ + {$dir/$buildpath/db_{archive,verify,hotbackup,log_verify}$EXE}] \ + {$dir/$buildpath/dbkill$EXE} \ + $destdir/$buildpath} + catch {eval file copy \ + [eval glob -nocomplain {$dir/$buildpath/db_{reptest,repsite,replicate}$EXE}] \ + $destdir/$buildpath} + } + catch {eval file copy \ + [eval glob {$dir/{.libs,include.tcl}}] $destdir} + # catch {eval file copy $dir/$queuedir $destdir} + catch {eval file copy \ + [eval glob {$dir/db_{checkpoint,deadlock}$EXE} \ + {$dir/db_{dump,load,printlog,recover,stat,upgrade}$EXE} \ + {$dir/db_{archive,verify,hotbackup,log_verify}$EXE}] \ + $destdir} + catch {eval file copy \ + [eval glob -nocomplain {$dir/db_{reptest,repsite,replicate}$EXE}] $destdir} + + # Create modified copies of include.tcl in parallel + # directories so paths still work. + + set infile [open ./include.tcl r] + set d [read $infile] + close $infile + + regsub {test_path } $d {test_path ../} d + regsub {src_root } $d {src_root ../} d + set tdir "TESTDIR.$i" + regsub -all {TESTDIR} $d $tdir d + set outfile [open $destdir/include.tcl w] + puts $outfile $d + close $outfile + } +} + +proc run_ptest { nprocs test args } { + global parms + global valid_methods + set basename ./PARALLEL_TESTDIR + set queuedir NULL + source ./include.tcl + + mkparalleldirs $nprocs $basename $queuedir + + if { [info exists parms($test)] } { + foreach method $valid_methods { + if { [eval exec_ptest $nprocs $basename \ + $test $method $args] != 0 } { + break + } + } + } else { + eval exec_ptest $nprocs $basename $test $args + } +} + +proc exec_ptest { nprocs basename test args } { + source ./include.tcl + + set basedir [pwd] + set pidlist {} + puts "Running $nprocs parallel runs of $test" + for { set i 1 } { $i <= $nprocs } { incr i } { + set outf ALL.OUT.$i + fileremove -f $outf + set ret [catch { + set p [exec $tclsh_path << \ + "cd $basename.$i;\ + source ../$test_path/test.tcl;\ + $test $args" >& $outf &] + lappend pidlist $p + set f [open $testdir/begin.$p w] + close $f + } res] + } + watch_procs $pidlist 30 36000 + set failed 0 + for { set i 1 } { $i <= $nprocs } { incr i } { + if { [check_output ALL.OUT.$i] == 1 } { + set failed 1 + puts "Test $test failed in process $i." + } + } + if { $failed == 0 } { + puts "Test $test succeeded all processes" + return 0 + } else { + puts "Test failed: stopping" + return 1 + } +} diff --git a/test/tcl/plat001.tcl b/test/tcl/plat001.tcl new file mode 100644 index 00000000..8c4c19c0 --- /dev/null +++ b/test/tcl/plat001.tcl @@ -0,0 +1,74 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST plat001 +# TEST +# TEST Test of portability of sequences. +# TEST +# TEST Create and dump a database containing sequences. Save the dump. +# TEST This test is used in conjunction with the upgrade tests, which +# TEST will compare the saved dump to a locally created dump. + +proc plat001 { method {tnum "001"} args } { + source ./include.tcl + global fixed_len + global util_path + + # Fixed_len must be increased from the default to + # accommodate fixed-record length methods. + set orig_fixed_len $fixed_len + set fixed_len 128 + set args [convert_args $method $args] + set omethod [convert_method $method] + + set eindex [lsearch -exact $args "-env"] + set txnenv 0 + if { $eindex == -1 } { + set testfile $testdir/plat$tnum.db + set testdump $testdir/plat$tnum.dmp + set env NULL + } else { + set testfile plat$tnum.db + set testdump plat$tnum.dmp + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + cleanup $testdir $env + + # Make the key numeric so we can test record-based methods. + set key 1 + + puts "\tPlat$tnum.a: Create $method db with a sequence." + set db [eval {berkdb_open -create -mode 0644} $args $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set init 1 + set min $init + set max 1000000000 + set seq [eval {berkdb sequence} \ + -create -init $init -min $min -max $max $db $key] + error_check_good is_valid_seq [is_valid_seq $seq] TRUE + + error_check_good seq_close [$seq close] 0 + error_check_good db_close [$db close] 0 + + puts "\tPlat$tnum.b: Dump the db." + set stat [catch {eval {exec $util_path/db_dump} -f $testdump \ + $testfile} ret] + error_check_good sequence_dump $stat 0 + + puts "\tPlat$tnum.c: Delete the db." + error_check_good db_delete [fileremove $testfile] "" + + set fixed_len $orig_fixed_len + return +} diff --git a/test/tcl/portable.tcl b/test/tcl/portable.tcl new file mode 100644 index 00000000..8de17b9f --- /dev/null +++ b/test/tcl/portable.tcl @@ -0,0 +1,351 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ + +source ./include.tcl + +global gen_portable +set gen_portable 0 + +global portable_dir +global portable_be +global portable_method +global portable_name + +proc test_portable_logs { { archived_test_loc } } { + source ./include.tcl + global test_names + global portable_dir + global tcl_platform + global saved_logvers + + if { [string match /* $archived_test_loc] != 1 } { + puts "Specify an absolute path for the archived files." + return + } + + # Identify endianness of the machine we are testing on. + if { [big_endian] } { + set myendianness be + } else { + set myendianness le + } + + if { [file exists $archived_test_loc/logversion] == 1 } { + set fd [open $archived_test_loc/logversion r] + set saved_logvers [read $fd] + close $fd + } else { + puts "Old log version number must be available \ + in $archived_test_loc/logversion" + return + } + + fileremove -f PORTABLE.OUT + set o [open PORTABLE.OUT a] + + puts -nonewline $o "Log portability test started at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + puts $o [berkdb version -string] + puts $o "Testing $e files" + + puts -nonewline "Log portability test started at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + puts [berkdb version -string] + puts "Testing $e files" + + set portable_dir $archived_test_loc + puts $o "Using archived databases in $portable_dir." + puts "Using archived databases in $portable_dir." + close $o + +foreach version [glob $portable_dir/*] { + if { [string first CVS $version] != -1 } { continue } + regexp \[^\/\]*$ $version version + + # Test only files where the endianness of the db does + # not match the endianness of the test platform. + # + set dbendianness [string range $version end-1 end] + if { [string equal $myendianness $dbendianness] } { + puts "Skipping test of $version \ + on $myendianness platform." + } else { + set o [open PORTABLE.OUT a] + puts $o "Testing $dbendianness files" + close $o + puts "Testing $dbendianness files" + + foreach method [glob -nocomplain $portable_dir/$version/*] { + regexp \[^\/\]*$ $method method + set o [open PORTABLE.OUT a] + puts $o "\nTesting $method files" + close $o + puts "\tTesting $method files" + + foreach file [lsort -dictionary \ + [glob -nocomplain \ + $portable_dir/$version/$method/*]] { + regexp (\[^\/\]*)\.tar\.gz$ \ + $file dummy name + + cleanup $testdir NULL 1 + set curdir [pwd] + cd $testdir + set tarfd [open "|tar xf -" w] + cd $curdir + + catch {exec gunzip -c \ + "$portable_dir/$version/$method/$name.tar.gz" \ + >@$tarfd} + close $tarfd + + set f [open $testdir/$name.tcldump \ + {RDWR CREAT}] + close $f + + # We exec a separate tclsh for each + # separate subtest to keep the + # testing process from consuming a + # tremendous amount of memory. + # + # Then recover the db. + if { [file exists \ + $testdir/$name.db] } { +#puts "found file $testdir/$name.db" + if { [catch {exec $tclsh_path \ + << "source \ + $test_path/test.tcl;\ + _recover_test $testdir \ + $version $method $name \ + $dbendianness" >>& \ + PORTABLE.OUT } message] } { + set o [open \ + PORTABLE.OUT a] + puts $o "FAIL: $message" + close $o + } + } + } + } + } +} + + set o [open PORTABLE.OUT a] + puts -nonewline $o "Completed at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + close $o + + puts -nonewline "Completed at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + + # Don't provide a return value. + return +} + +proc _recover_test { dir version method name dbendianness } { + source ./include.tcl + global errorInfo + + puts "Recover db using opposite endian log: \ + $version $method $name.db" + + set omethod [convert_method $method] + + # Move the saved database; we'll need to compare it to + # the recovered database. + catch { file rename -force $testdir/$name.db \ + $testdir/$name.db.init } res + + # Recover. + set ret [catch {eval {exec} $util_path/db_recover -h $testdir} res] + if { $ret != 0 } { + puts "FAIL: db_recover outputted $res" + } + error_check_good db_recover $ret 0 + + # Compare the original database to the recovered database. + set dbinit [berkdb_open $omethod $testdir/$name.db.init] + set db [berkdb_open $omethod $testdir/$name.db] + db_compare $dbinit $db $testdir/$name.db.init \ + $testdir/$name.db + + # Verify. + error_check_good db_verify [verify_dir $testdir "" 0 0 1] 0 + +} + +proc generate_portable_logs { destination_dir } { + global gen_portable + global gen_dump + global portable_dir + global portable_be + global portable_method + global portable_name + global valid_methods + global test_names + global parms + source ./include.tcl + + if { [string match /* $destination_dir] != 1 } { + puts "Specify an absolute path for the archived files." + return + } + + set portable_dir $destination_dir + env_cleanup $testdir + + fileremove -f GENERATE.OUT + set o [open GENERATE.OUT a] + + puts -nonewline $o "Generating files for portability test. Started at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + puts $o [berkdb version -string] + + puts -nonewline "Generating files for portability test. Started at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + puts [berkdb version -string] + + close $o + + # Create a file that contains the log version number. + # If necessary, create the directory to contain the file. + if { [file exists $destination_dir] == 0 } { + file mkdir $destination_dir + } else { + puts "$destination_dir already exists, exiting." + return + } + + set env [berkdb_env -create -log -home $testdir] + error_check_good is_valid_env [is_valid_env $env] TRUE + + set lv [open $destination_dir/logversion w] + puts $lv [get_log_vers $env] + close $lv + + error_check_good env_close [$env close] 0 + + # Generate test databases for each access method and endianness. + set gen_portable 1 + foreach method $valid_methods { + set o [open GENERATE.OUT a] + puts $o "\nGenerating $method files" + close $o + puts "\tGenerating $method files" + set portable_method $method + +# Select a variety of tests. +set test_names(test) "test002 test011 test013 test017 \ + test021 test024 test027 test028" + foreach test $test_names(test) { + if { [info exists parms($test)] != 1 } { + continue + } + + set o [open GENERATE.OUT a] + puts $o "\t\tGenerating files for $test" + close $o + puts "\t\tGenerating files for $test" + + foreach portable_be { 0 1 } { + set portable_name $test + if [catch {exec $tclsh_path \ + << "source $test_path/test.tcl;\ + global gen_portable portable_be;\ + global portable_method portable_name;\ + global portable_dir;\ + set gen_portable 1;\ + set portable_be $portable_be;\ + set portable_method $portable_method;\ + set portable_name $portable_name;\ + set portable_dir $portable_dir;\ + run_envmethod -$method $test" \ + >>& GENERATE.OUT} res] { + puts "FAIL: run_envmethod \ + $test $method" + } + cleanup $testdir NULL 1 + } + } + } + + set gen_portable 0 + set o [open GENERATE.OUT a] + puts -nonewline $o "Completed at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + puts -nonewline "Completed at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + close $o +} + +proc save_portable_files { dir } { + global portable_dir + global portable_be + global portable_method + global portable_name + global gen_portable + global gen_dump + source ./include.tcl + + set vers [berkdb version] + set maj [lindex $vers 0] + set min [lindex $vers 1] + + if { [big_endian] } { + set myendianness be + } else { + set myendianness le + } + + if { $portable_be == 1 } { + set version_dir "$myendianness-$maj.${min}be" + set en be + } else { + set version_dir "$myendianness-$maj.${min}le" + set en le + } + + set dest $portable_dir/$version_dir/$portable_method + if { [file exists $portable_dir/$version_dir/$portable_method] == 0 } { + file mkdir $dest + } + + if { $gen_portable == 1 } { + # Some tests skip some access methods, so we + # only try to save files if there is a datafile + # file. + set dbfiles [glob -nocomplain $dir/*.db] + if { [llength $dbfiles] > 0 } { + set logfiles [glob -nocomplain $dir/log.*] + set dbfile [lindex $dbfiles 0] + + # We arbitrarily name the tar file where we save + # everything after the first database file we + # find. This works because the database files + # are almost always named after the test. + set basename [string range $dbfile \ + [expr [string length $dir] + 1] end-3] + + set cwd [pwd] + cd $dest + set dest [pwd] + cd $cwd + cd $dir + if { [catch { + eval exec tar -cvf $dest/$basename.tar \ + [glob -nocomplain *.db log.* \ + __dbq.$basename-$en.db.*] + exec gzip --best $dest/$basename.tar + } res ] } { + puts "FAIL: tar/gzip of $basename failed\ + with message $res" + } + cd $cwd + } + } +} + + diff --git a/test/tcl/recd001.tcl b/test/tcl/recd001.tcl new file mode 100644 index 00000000..6dbc5801 --- /dev/null +++ b/test/tcl/recd001.tcl @@ -0,0 +1,258 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd001 +# TEST Per-operation recovery tests for non-duplicate, non-split +# TEST messages. Makes sure that we exercise redo, undo, and do-nothing +# TEST condition. Any test that appears with the message (change state) +# TEST indicates that we've already run the particular test, but we are +# TEST running it again so that we can change the state of the data base +# TEST to prepare for the next test (this applies to all other recovery +# TEST tests as well). +# TEST +# TEST These are the most basic recovery tests. We do individual recovery +# TEST tests for each operation in the access method interface. First we +# TEST create a file and capture the state of the database (i.e., we copy +# TEST it. Then we run a transaction containing a single operation. In +# TEST one test, we abort the transaction and compare the outcome to the +# TEST original copy of the file. In the second test, we restore the +# TEST original copy of the database and then run recovery and compare +# TEST this against the actual database. +proc recd001 { method {select 0} args } { + global fixed_len + source ./include.tcl + + # puts "$args" + set envargs "" + set zero_idx [lsearch -exact $args "-zero_log"] + if { $zero_idx != -1 } { + set args [lreplace $args $zero_idx $zero_idx] + set envargs "-zero_log" + } + + set orig_fixed_len $fixed_len + set opts [convert_args $method $args] + set omethod [convert_method $method] + + puts "Recd001: $method operation/transaction tests ($envargs)" + + # Create the database and environment. + env_cleanup $testdir + + # The recovery tests were originally written to + # do a command, abort, do it again, commit, and then + # repeat the sequence with another command. Each command + # tends to require that the previous command succeeded and + # left the database a certain way. To avoid cluttering up the + # op_recover interface as well as the test code, we create two + # databases; one does abort and then commit for each op, the + # other does prepare, prepare-abort, and prepare-commit for each + # op. If all goes well, this allows each command to depend + # exactly one successful iteration of the previous command. + set testfile recd001.db + set testfile2 recd001-2.db + + set flags "-create -txn wrnosync -home $testdir $envargs" + + # For queue databases, we end up locking all records from one + # to the end of the queue, which depends on the default pagesize. + # Assume that page sizes default to 16KB or less, then we need 4K + # locks. + if { [is_record_based $method] == 1 } { + set flags "$flags -lock_max_locks 5000 -lock_max_objects 5000" + } + + puts "\tRecd001.a.0: creating environment" + set env_cmd "berkdb_env $flags" + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + # + # We need to create a database to get the pagesize (either + # the default or whatever might have been specified). + # Then remove it so we can compute fixed_len and create the + # real database. + set oflags "-create -auto_commit $omethod -mode 0644 \ + -env $dbenv $opts $testfile" + # puts "$oflags" + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set stat [$db stat] + # + # Compute the fixed_len based on the pagesize being used. + # We want the fixed_len to be 1/4 the pagesize. + # + set pg [get_pagesize $stat] + error_check_bad get_pagesize $pg -1 + set fixed_len [expr $pg / 4] + error_check_good db_close [$db close] 0 + error_check_good dbremove [berkdb dbremove -env $dbenv $testfile] 0 + + # Convert the args again because fixed_len is now real. + # Create the databases and close the environment. + # cannot specify db truncate in txn protected env!!! + set opts [convert_args $method $args] + set omethod [convert_method $method] + set oflags "-create -auto_commit $omethod -mode 0644 \ + -env $dbenv $opts $testfile" + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + + set oflags "-create -auto_commit $omethod -mode 0644 \ + -env $dbenv $opts $testfile2" + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + + error_check_good env_close [$dbenv close] 0 + + puts "\tRecd001.a.1: Verify db_printlog can read logfile" + set tmpfile $testdir/printlog.out + set stat [catch {exec $util_path/db_printlog -h $testdir \ + > $tmpfile} ret] + error_check_good db_printlog $stat 0 + fileremove $tmpfile + + # List of recovery tests: {CMD MSG} pairs. + set rlist { + { {DB put -txn TXNID $key $data} "Recd001.b: put"} + { {DB del -txn TXNID $key} "Recd001.c: delete"} + { {DB put -txn TXNID $bigkey $data} "Recd001.d: big key put"} + { {DB del -txn TXNID $bigkey} "Recd001.e: big key delete"} + { {DB put -txn TXNID $key $bigdata} "Recd001.f: big data put"} + { {DB del -txn TXNID $key} "Recd001.g: big data delete"} + { {DB put -txn TXNID $key $data} "Recd001.h: put (change state)"} + { {DB put -txn TXNID $key $newdata} "Recd001.i: overwrite"} + { {DB put -txn TXNID -partial "$off $len" $key $partial_grow} + "Recd001.j: partial put growing"} + { {DB put -txn TXNID $key $newdata} "Recd001.k: overwrite (fix)"} + { {DB put -txn TXNID -partial "$off $len" $key $partial_shrink} + "Recd001.l: partial put shrinking"} + { {DB put -txn TXNID -append $data} "Recd001.m: put -append"} + { {DB get -txn TXNID -consume} "Recd001.n: db get -consume"} + } + + # These are all the data values that we're going to need to read + # through the operation table and run the recovery tests. + + if { [is_record_based $method] == 1 } { + set key 1 + } else { + set key recd001_key + } + set data recd001_data + set newdata NEWrecd001_dataNEW + set off 3 + set len 12 + + set partial_grow replacement_record_grow + set partial_shrink xxx + if { [is_fixed_length $method] == 1 } { + set len [string length $partial_grow] + set partial_shrink $partial_grow + } + set bigdata [replicate $key $fixed_len] + if { [is_record_based $method] == 1 } { + set bigkey $fixed_len + } else { + set bigkey [replicate $key $fixed_len] + } + + foreach pair $rlist { + set cmd [subst [lindex $pair 0]] + set msg [lindex $pair 1] + if { $select != 0 } { + set tag [lindex $msg 0] + set tail [expr [string length $tag] - 2] + set tag [string range $tag $tail $tail] + if { [lsearch $select $tag] == -1 } { + continue + } + } + + if { [is_queue $method] != 1 } { + if { [string first append $cmd] != -1 } { + continue + } + if { [string first consume $cmd] != -1 } { + continue + } + } + +# if { [is_fixed_length $method] == 1 } { +# if { [string first partial $cmd] != -1 } { +# continue +# } +# } + op_recover abort $testdir $env_cmd $testfile $cmd $msg $args + op_recover commit $testdir $env_cmd $testfile $cmd $msg $args + # + # Note that since prepare-discard ultimately aborts + # the txn, it must come before prepare-commit. + # + op_recover prepare-abort $testdir $env_cmd $testfile2 \ + $cmd $msg $args + op_recover prepare-discard $testdir $env_cmd $testfile2 \ + $cmd $msg $args + op_recover prepare-commit $testdir $env_cmd $testfile2 \ + $cmd $msg $args + } + set fixed_len $orig_fixed_len + + if { [is_fixed_length $method] == 1 } { + puts "Skipping remainder of test for fixed length methods" + return + } + + # + # Check partial extensions. If we add a key/data to the database + # and then expand it using -partial, then recover, recovery was + # failing in #3944. Check that scenario here. + # + # !!! + # We loop here because on each iteration, we need to clean up + # the old env (i.e. this test does not depend on earlier runs). + # If we run it without cleaning up the env inbetween, we do not + # test the scenario of #3944. + # + set len [string length $data] + set len2 256 + set part_data [replicate "abcdefgh" 32] + set p [list 0 $len] + set cmd [subst \ + {DB put -txn TXNID -partial "$len $len2" $key $part_data}] + set msg "Recd001.o: partial put prepopulated/expanding" + foreach op {abort commit prepare-abort prepare-discard prepare-commit} { + env_cleanup $testdir + + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + set t [$dbenv txn] + error_check_good txn_begin [is_valid_txn $t $dbenv] TRUE + set oflags "-create $omethod -mode 0644 \ + -env $dbenv -txn $t $opts $testfile" + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set oflags "-create $omethod -mode 0644 \ + -env $dbenv -txn $t $opts $testfile2" + set db2 [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db2] TRUE + + set ret [$db put -txn $t -partial $p $key $data] + error_check_good dbput $ret 0 + + set ret [$db2 put -txn $t -partial $p $key $data] + error_check_good dbput $ret 0 + error_check_good txncommit [$t commit] 0 + error_check_good dbclose [$db close] 0 + error_check_good dbclose [$db2 close] 0 + error_check_good dbenvclose [$dbenv close] 0 + + op_recover $op $testdir $env_cmd $testfile $cmd $msg $args + } + return +} diff --git a/test/tcl/recd002.tcl b/test/tcl/recd002.tcl new file mode 100644 index 00000000..09658242 --- /dev/null +++ b/test/tcl/recd002.tcl @@ -0,0 +1,108 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd002 +# TEST Split recovery tests. For every known split log message, makes sure +# TEST that we exercise redo, undo, and do-nothing condition. +proc recd002 { method {select 0} args} { + source ./include.tcl + global rand_init + + set envargs "" + set zero_idx [lsearch -exact $args "-zero_log"] + if { $zero_idx != -1 } { + set args [lreplace $args $zero_idx $zero_idx] + set envargs "-zero_log" + } + + set args [convert_args $method $args] + set omethod [convert_method $method] + + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Recd002: skipping for specific pagesizes" + return + } + berkdb srand $rand_init + + # Queues don't do splits, so we don't really need the small page + # size and the small page size is smaller than the record, so it's + # a problem. + if { [string compare $omethod "-queue"] == 0 } { + set pagesize 4096 + } else { + set pagesize 512 + } + puts "Recd002: $method split recovery tests ($envargs)" + + env_cleanup $testdir + set testfile recd002.db + set testfile2 recd002-2.db + set eflags "-create -txn wrnosync -lock_max_locks 2000 -home $testdir $envargs" + + puts "\tRecd002.a: creating environment" + set env_cmd "berkdb_env $eflags" + set dbenv [eval $env_cmd] + error_check_bad dbenv $dbenv NULL + + # Create the databases. We will use a small page size so that splits + # happen fairly quickly. + set oflags "-create -auto_commit $args $omethod -mode 0644 -env $dbenv\ + -pagesize $pagesize $testfile" + set db [eval {berkdb_open} $oflags] + error_check_bad db_open $db NULL + error_check_good db_open [is_substr $db db] 1 + error_check_good db_close [$db close] 0 + set oflags "-create -auto_commit $args $omethod -mode 0644 -env $dbenv\ + -pagesize $pagesize $testfile2" + set db [eval {berkdb_open} $oflags] + error_check_bad db_open $db NULL + error_check_good db_open [is_substr $db db] 1 + error_check_good db_close [$db close] 0 + reset_env $dbenv + + # List of recovery tests: {CMD MSG} pairs + set slist { + { {populate DB $omethod TXNID $n 0 0} "Recd002.b: splits"} + { {unpopulate DB TXNID $r} "Recd002.c: Remove keys"} + } + + # If pages are 512 bytes, then adding 512 key/data pairs + # should be more than sufficient. + set n 512 + set r [expr $n / 2 ] + foreach pair $slist { + set cmd [subst [lindex $pair 0]] + set msg [lindex $pair 1] + if { $select != 0 } { + set tag [lindex $msg 0] + set tail [expr [string length $tag] - 2] + set tag [string range $tag $tail $tail] + if { [lsearch $select $tag] == -1 } { + continue + } + } + op_recover abort $testdir $env_cmd $testfile $cmd $msg $args + op_recover commit $testdir $env_cmd $testfile $cmd $msg $args + # + # Note that since prepare-discard ultimately aborts + # the txn, it must come before prepare-commit. + # + op_recover prepare-abort $testdir $env_cmd $testfile2 \ + $cmd $msg $args + op_recover prepare-discard $testdir $env_cmd $testfile2 \ + $cmd $msg $args + op_recover prepare-commit $testdir $env_cmd $testfile2 \ + $cmd $msg $args + } + + puts "\tRecd002.d: Verify db_printlog can read logfile" + set tmpfile $testdir/printlog.out + set stat [catch {exec $util_path/db_printlog -h $testdir \ + > $tmpfile} ret] + error_check_good db_printlog $stat 0 + fileremove $tmpfile +} diff --git a/test/tcl/recd003.tcl b/test/tcl/recd003.tcl new file mode 100644 index 00000000..1ca4f314 --- /dev/null +++ b/test/tcl/recd003.tcl @@ -0,0 +1,125 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd003 +# TEST Duplicate recovery tests. For every known duplicate log message, +# TEST makes sure that we exercise redo, undo, and do-nothing condition. +# TEST +# TEST Test all the duplicate log messages and recovery operations. We make +# TEST sure that we exercise all possible recovery actions: redo, undo, undo +# TEST but no fix necessary and redo but no fix necessary. +proc recd003 { method {select 0} args } { + source ./include.tcl + global rand_init + + set envargs "" + set zero_idx [lsearch -exact $args "-zero_log"] + if { $zero_idx != -1 } { + set args [lreplace $args $zero_idx $zero_idx] + set envargs "-zero_log" + } + + set largs [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_record_based $method] == 1 || [is_rbtree $method] == 1 } { + puts "Recd003 skipping for method $method" + return + } + puts "Recd003: $method duplicate recovery tests ($envargs)" + + berkdb srand $rand_init + + env_cleanup $testdir + # See comment in recd001.tcl for why there are two database files... + set testfile recd003.db + set testfile2 recd003-2.db + set eflags "-create -txn wrnosync -home $testdir $envargs" + + puts "\tRecd003.a: creating environment" + set env_cmd "berkdb_env $eflags" + set dbenv [eval $env_cmd] + error_check_bad dbenv $dbenv NULL + + # Create the databases. + set oflags \ + "-create -auto_commit $largs -mode 0644 $omethod -dup -env $dbenv $testfile" + set db [eval {berkdb_open} $oflags] + error_check_bad db_open $db NULL + error_check_good db_open [is_substr $db db] 1 + error_check_good db_close [$db close] 0 + set oflags \ + "-create -auto_commit $largs -mode 0644 $omethod -dup -env $dbenv $testfile2" + set db [eval {berkdb_open} $oflags] + error_check_bad db_open $db NULL + error_check_good db_open [is_substr $db db] 1 + error_check_good db_close [$db close] 0 + reset_env $dbenv + + # These are all the data values that we're going to need to read + # through the operation table and run the recovery tests. + set n 10 + set dupn 2000 + set bign 500 + + # List of recovery tests: {CMD MSG} pairs + set dlist { + { {populate DB $omethod TXNID $n 1 0} + "Recd003.b: add dups"} + { {DB del -txn TXNID duplicate_key} + "Recd003.c: remove dups all at once"} + { {populate DB $omethod TXNID $n 1 0} + "Recd003.d: add dups (change state)"} + { {unpopulate DB TXNID 0} + "Recd003.e: remove dups 1 at a time"} + { {populate DB $omethod TXNID $dupn 1 0} + "Recd003.f: dup split"} + { {DB del -txn TXNID duplicate_key} + "Recd003.g: remove dups (change state)"} + { {populate DB $omethod TXNID $n 1 1} + "Recd003.h: add big dup"} + { {DB del -txn TXNID duplicate_key} + "Recd003.i: remove big dup all at once"} + { {populate DB $omethod TXNID $n 1 1} + "Recd003.j: add big dup (change state)"} + { {unpopulate DB TXNID 0} + "Recd003.k: remove big dup 1 at a time"} + { {populate DB $omethod TXNID $bign 1 1} + "Recd003.l: split big dup"} + } + + foreach pair $dlist { + set cmd [subst [lindex $pair 0]] + set msg [lindex $pair 1] + if { $select != 0 } { + set tag [lindex $msg 0] + set tail [expr [string length $tag] - 2] + set tag [string range $tag $tail $tail] + if { [lsearch $select $tag] == -1 } { + continue + } + } + op_recover abort $testdir $env_cmd $testfile $cmd $msg $largs + op_recover commit $testdir $env_cmd $testfile $cmd $msg $largs + # + # Note that since prepare-discard ultimately aborts + # the txn, it must come before prepare-commit. + # + op_recover prepare-abort $testdir $env_cmd $testfile2 \ + $cmd $msg $largs + op_recover prepare-discard $testdir $env_cmd $testfile2 \ + $cmd $msg $largs + op_recover prepare-commit $testdir $env_cmd $testfile2 \ + $cmd $msg $largs + } + + puts "\tRecd003.m: Verify db_printlog can read logfile" + set tmpfile $testdir/printlog.out + set stat [catch {exec $util_path/db_printlog -h $testdir \ + > $tmpfile} ret] + error_check_good db_printlog $stat 0 + fileremove $tmpfile +} diff --git a/test/tcl/recd004.tcl b/test/tcl/recd004.tcl new file mode 100644 index 00000000..06844f60 --- /dev/null +++ b/test/tcl/recd004.tcl @@ -0,0 +1,103 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd004 +# TEST Big key test where big key gets elevated to internal page. +proc recd004 { method {select 0} args } { + source ./include.tcl + global rand_init + + set envargs "" + set zero_idx [lsearch -exact $args "-zero_log"] + if { $zero_idx != -1 } { + set args [lreplace $args $zero_idx $zero_idx] + set envargs "-zero_log" + } + + set opts [convert_args $method $args] + set omethod [convert_method $method] + + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Recd004: skipping for specific pagesizes" + return + } + if { [is_record_based $method] == 1 } { + puts "Recd004 skipping for method $method" + return + } + puts "Recd004: $method big-key on internal page recovery tests ($envargs)" + + berkdb srand $rand_init + + env_cleanup $testdir + set testfile recd004.db + set testfile2 recd004-2.db + set eflags "-create -txn wrnosync -home $testdir $envargs" + puts "\tRecd004.a: creating environment" + set env_cmd "berkdb_env $eflags" + set dbenv [eval $env_cmd] + error_check_bad dbenv $dbenv NULL + + # Create the databases. We will use a small page size so that we + # elevate quickly + set oflags "-create -mode 0644 \ + -auto_commit $omethod -env $dbenv $opts -pagesize 512 $testfile" + set db [eval {berkdb_open} $oflags] + error_check_bad db_open $db NULL + error_check_good db_open [is_substr $db db] 1 + error_check_good db_close [$db close] 0 + set oflags "-create -mode 0644 \ + -auto_commit $omethod -env $dbenv $opts -pagesize 512 $testfile2" + set db [eval {berkdb_open} $oflags] + error_check_bad db_open $db NULL + error_check_good db_open [is_substr $db db] 1 + error_check_good db_close [$db close] 0 + reset_env $dbenv + + # List of recovery tests: {CMD MSG} pairs + set slist { + { {big_populate DB TXNID $n} "Recd004.b: big key elevation"} + { {unpopulate DB TXNID 0} "Recd004.c: Remove keys"} + } + + # If pages are 512 bytes, then adding 512 key/data pairs + # should be more than sufficient. + set n 512 + foreach pair $slist { + set cmd [subst [lindex $pair 0]] + set msg [lindex $pair 1] + if { $select != 0 } { + set tag [lindex $msg 0] + set tail [expr [string length $tag] - 2] + set tag [string range $tag $tail $tail] + if { [lsearch $select $tag] == -1 } { + continue + } + } + op_recover abort \ + $testdir $env_cmd $testfile $cmd $msg $opts + op_recover commit \ + $testdir $env_cmd $testfile $cmd $msg $opts + # + # Note that since prepare-discard ultimately aborts + # the txn, it must come before prepare-commit. + # + op_recover prepare-abort $testdir $env_cmd $testfile2 \ + $cmd $msg $opts + op_recover prepare-discard $testdir $env_cmd $testfile2 \ + $cmd $msg $opts + op_recover prepare-commit $testdir $env_cmd $testfile2 \ + $cmd $msg $opts + } + + puts "\tRecd004.d: Verify db_printlog can read logfile" + set tmpfile $testdir/printlog.out + set stat [catch {exec $util_path/db_printlog -h $testdir \ + > $tmpfile} ret] + error_check_good db_printlog $stat 0 + fileremove $tmpfile +} diff --git a/test/tcl/recd005.tcl b/test/tcl/recd005.tcl new file mode 100644 index 00000000..3bed7707 --- /dev/null +++ b/test/tcl/recd005.tcl @@ -0,0 +1,271 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd005 +# TEST Verify reuse of file ids works on catastrophic recovery. +# TEST +# TEST Make sure that we can do catastrophic recovery even if we open +# TEST files using the same log file id. +proc recd005 { method args } { + source ./include.tcl + global rand_init + + set envargs "" + set zero_idx [lsearch -exact $args "-zero_log"] + if { $zero_idx != -1 } { + set args [lreplace $args $zero_idx $zero_idx] + set envargs "-zero_log" + } + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Recd005: $method catastrophic recovery ($envargs)" + + berkdb srand $rand_init + + set testfile1 recd005.1.db + set testfile2 recd005.2.db + set max_locks 2000 + set eflags "-create -txn wrnosync -lock_max_locks $max_locks \ + -lock_max_objects $max_locks -home $testdir $envargs" + + set tnum 0 + foreach sizes "{1000 10} {10 1000}" { + foreach ops "{abort abort} {abort commit} {commit abort} \ + {commit commit}" { + env_cleanup $testdir + incr tnum + + set s1 [lindex $sizes 0] + set s2 [lindex $sizes 1] + set op1 [lindex $ops 0] + set op2 [lindex $ops 1] + puts "\tRecd005.$tnum: $s1 $s2 $op1 $op2" + + puts "\tRecd005.$tnum.a: creating environment" + set env_cmd "berkdb_env $eflags" + set dbenv [eval $env_cmd] + error_check_bad dbenv $dbenv NULL + + # Create the two databases. + set oflags "-create \ + -auto_commit -mode 0644 -env $dbenv $args $omethod" + set db1 [eval {berkdb_open} $oflags $testfile1] + error_check_bad db_open $db1 NULL + error_check_good db_open [is_substr $db1 db] 1 + error_check_good db_close [$db1 close] 0 + + set db2 [eval {berkdb_open} $oflags $testfile2] + error_check_bad db_open $db2 NULL + error_check_good db_open [is_substr $db2 db] 1 + error_check_good db_close [$db2 close] 0 + $dbenv close + + set dbenv [eval $env_cmd] + puts "\tRecd005.$tnum.b: Populating databases" + eval {do_one_file $testdir \ + $method $dbenv $env_cmd $testfile1 $s1 $op1 } $args + eval {do_one_file $testdir \ + $method $dbenv $env_cmd $testfile2 $s2 $op2 } $args + + puts "\tRecd005.$tnum.c: Verifying initial population" + eval {check_file \ + $testdir $env_cmd $testfile1 $op1 } $args + eval {check_file \ + $testdir $env_cmd $testfile2 $op2 } $args + + # Now, close the environment (so that recovery will work + # on NT which won't allow delete of an open file). + reset_env $dbenv + + berkdb debug_check + puts -nonewline \ + "\tRecd005.$tnum.d: About to run recovery ... " + flush stdout + + set stat [catch \ + {exec $util_path/db_recover -h $testdir -c} \ + result] + if { $stat == 1 } { + error "Recovery error: $result." + } + puts "complete" + + # Substitute a file that will need recovery and try + # running recovery again. + if { $op1 == "abort" } { + file copy -force $testdir/$testfile1.afterop \ + $testdir/$testfile1 + move_file_extent $testdir $testfile1 \ + afterop copy + } else { + file copy -force $testdir/$testfile1.init \ + $testdir/$testfile1 + move_file_extent $testdir $testfile1 init copy + } + if { $op2 == "abort" } { + file copy -force $testdir/$testfile2.afterop \ + $testdir/$testfile2 + move_file_extent $testdir $testfile2 \ + afterop copy + } else { + file copy -force $testdir/$testfile2.init \ + $testdir/$testfile2 + move_file_extent $testdir $testfile2 init copy + } + + berkdb debug_check + puts -nonewline "\tRecd005.$tnum.e:\ + About to run recovery on pre-op database ... " + flush stdout + + set stat \ + [catch {exec $util_path/db_recover \ + -h $testdir -c} result] + if { $stat == 1 } { + error "Recovery error: $result." + } + puts "complete" + + set dbenv [eval $env_cmd] + eval {check_file \ + $testdir $env_cmd $testfile1 $op1 } $args + eval {check_file \ + $testdir $env_cmd $testfile2 $op2 } $args + reset_env $dbenv + + puts "\tRecd005.$tnum.f:\ + Verify db_printlog can read logfile" + set tmpfile $testdir/printlog.out + set stat [catch \ + {exec $util_path/db_printlog -h $testdir \ + > $tmpfile} ret] + error_check_good db_printlog $stat 0 + fileremove $tmpfile + } + } +} + +proc do_one_file { dir method env env_cmd filename num op args} { + source ./include.tcl + + set init_file $dir/$filename.t1 + set afterop_file $dir/$filename.t2 + set final_file $dir/$filename.t3 + + # Save the initial file and open the environment and the first file + file copy -force $dir/$filename $dir/$filename.init + copy_extent_file $dir $filename init + + # If method is heap, copy other files + if { [is_heap $method] == 1 } { + append filename1 $filename "1" + file copy -force $dir/$filename1 $dir/$filename.init1 + copy_extent_file $dir $filename1 init + + append filename2 $filename "2" + file copy -force $dir/$filename2 $dir/$filename.init2 + copy_extent_file $dir $filename2 init + } + set oflags "-auto_commit -unknown -env $env" + set db [eval {berkdb_open} $oflags $args $filename] + + # Dump out file contents for initial case + eval open_and_dump_file $filename $env $init_file nop \ + dump_file_direction "-first" "-next" $args + + set txn [$env txn] + error_check_bad txn_begin $txn NULL + error_check_good txn_begin [is_substr $txn $env] 1 + + # Now fill in the db and the txnid in the command + populate $db $method $txn $num 0 0 + + # Sync the file so that we can capture a snapshot to test + # recovery. + error_check_good sync:$db [$db sync] 0 + file copy -force $dir/$filename $dir/$filename.afterop + copy_extent_file $dir $filename afterop + + #if we are doing heap, we have more files to copy + if { [is_heap $method] == 1 } { + file copy -force $dir/$filename1 $dir/$filename.afterop1 + copy_extent_file $dir $filename1 afterop + + file copy -force $dir/$filename2 $dir/$filename.afterop2 + copy_extent_file $dir $filename2 afterop + } + + eval open_and_dump_file $testdir/$filename.afterop NULL \ + $afterop_file nop dump_file_direction "-first" "-next" $args + error_check_good txn_$op:$txn [$txn $op] 0 + + if { $op == "commit" } { + puts "\t\tFile $filename executed and committed." + } else { + puts "\t\tFile $filename executed and aborted." + } + + # Dump out file and save a copy. + error_check_good sync:$db [$db sync] 0 + eval open_and_dump_file $testdir/$filename NULL $final_file nop \ + dump_file_direction "-first" "-next" $args + file copy -force $dir/$filename $dir/$filename.final + copy_extent_file $dir $filename final + + #if we are doing heap, we have more files to copy + if { [is_heap $method] == 1 } { + file copy -force $dir/$filename1 $dir/$filename.final1 + copy_extent_file $dir $filename1 final + + file copy -force $dir/$filename2 $dir/$filename.final2 + copy_extent_file $dir $filename2 final + } + + # If this is an abort, it should match the original file. + # If this was a commit, then this file should match the + # afterop file. + if { $op == "abort" } { + filesort $init_file $init_file.sort + filesort $final_file $final_file.sort + error_check_good \ + diff(initial,post-$op):diff($init_file,$final_file) \ + [filecmp $init_file.sort $final_file.sort] 0 + } else { + filesort $afterop_file $afterop_file.sort + filesort $final_file $final_file.sort + error_check_good \ + diff(post-$op,pre-commit):diff($afterop_file,$final_file) \ + [filecmp $afterop_file.sort $final_file.sort] 0 + } + + error_check_good close:$db [$db close] 0 +} + +proc check_file { dir env_cmd filename op args} { + source ./include.tcl + + set init_file $dir/$filename.t1 + set afterop_file $dir/$filename.t2 + set final_file $dir/$filename.t3 + + eval open_and_dump_file $testdir/$filename NULL $final_file nop \ + dump_file_direction "-first" "-next" $args + if { $op == "abort" } { + filesort $init_file $init_file.sort + filesort $final_file $final_file.sort + error_check_good \ + diff(initial,post-$op):diff($init_file,$final_file) \ + [filecmp $init_file.sort $final_file.sort] 0 + } else { + filesort $afterop_file $afterop_file.sort + filesort $final_file $final_file.sort + error_check_good \ + diff(pre-commit,post-$op):diff($afterop_file,$final_file) \ + [filecmp $afterop_file.sort $final_file.sort] 0 + } +} diff --git a/test/tcl/recd006.tcl b/test/tcl/recd006.tcl new file mode 100644 index 00000000..d781ef84 --- /dev/null +++ b/test/tcl/recd006.tcl @@ -0,0 +1,268 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd006 +# TEST Nested transactions. +proc recd006 { method {select 0} args } { + global kvals + source ./include.tcl + + set envargs "" + set zero_idx [lsearch -exact $args "-zero_log"] + if { $zero_idx != -1 } { + set args [lreplace $args $zero_idx $zero_idx] + set envargs "-zero_log" + } + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_record_based $method] == 1 || [is_rbtree $method] == 1 } { + puts "Recd006 skipping for method $method" + return + } + puts "Recd006: $method nested transactions ($envargs)" + + # Create the database and environment. + env_cleanup $testdir + + set dbfile recd006.db + set testfile $testdir/$dbfile + + puts "\tRecd006.a: create database" + set oflags "-create $args $omethod $testfile" + set db [eval {berkdb_open} $oflags] + error_check_good dbopen [is_valid_db $db] TRUE + + # Make sure that we have enough entries to span a couple of + # different pages. + set did [open $dict] + set count 0 + while { [gets $did str] != -1 && $count < 1000 } { + if { [string compare $omethod "-recno"] == 0 } { + set key [expr $count + 1] + } else { + set key $str + } + + set ret [$db put -nooverwrite $key $str] + error_check_good put $ret 0 + + incr count + } + close $did + + # Variables used below: + # p1: a pair of keys that are likely to be on the same page. + # p2: a pair of keys that are likely to be on the same page, + # but on a page different than those in p1. + set dbc [$db cursor] + error_check_good dbc [is_substr $dbc $db] 1 + + set ret [$dbc get -first] + error_check_bad dbc_get:DB_FIRST [llength $ret] 0 + set p1 [lindex [lindex $ret 0] 0] + set kvals($p1) [lindex [lindex $ret 0] 1] + + set ret [$dbc get -next] + error_check_bad dbc_get:DB_NEXT [llength $ret] 0 + lappend p1 [lindex [lindex $ret 0] 0] + set kvals([lindex [lindex $ret 0] 0]) [lindex [lindex $ret 0] 1] + + set ret [$dbc get -last] + error_check_bad dbc_get:DB_LAST [llength $ret] 0 + set p2 [lindex [lindex $ret 0] 0] + set kvals($p2) [lindex [lindex $ret 0] 1] + + set ret [$dbc get -prev] + error_check_bad dbc_get:DB_PREV [llength $ret] 0 + lappend p2 [lindex [lindex $ret 0] 0] + set kvals([lindex [lindex $ret 0] 0]) [lindex [lindex $ret 0] 1] + + error_check_good dbc_close [$dbc close] 0 + error_check_good db_close [$db close] 0 + + # Now create the full transaction environment. + set eflags "-create -txn wrnosync -home $testdir" + + puts "\tRecd006.b: creating environment" + set env_cmd "berkdb_env $eflags" + set dbenv [eval $env_cmd] + error_check_bad dbenv $dbenv NULL + + # Reset the environment. + reset_env $dbenv + + set p1 [list $p1] + set p2 [list $p2] + + # List of recovery tests: {CMD MSG} pairs + set rlist { + { {nesttest DB TXNID ENV 1 $p1 $p2 commit commit} + "Recd006.c: children (commit commit)"} + { {nesttest DB TXNID ENV 0 $p1 $p2 commit commit} + "Recd006.d: children (commit commit)"} + { {nesttest DB TXNID ENV 1 $p1 $p2 commit abort} + "Recd006.e: children (commit abort)"} + { {nesttest DB TXNID ENV 0 $p1 $p2 commit abort} + "Recd006.f: children (commit abort)"} + { {nesttest DB TXNID ENV 1 $p1 $p2 abort abort} + "Recd006.g: children (abort abort)"} + { {nesttest DB TXNID ENV 0 $p1 $p2 abort abort} + "Recd006.h: children (abort abort)"} + { {nesttest DB TXNID ENV 1 $p1 $p2 abort commit} + "Recd006.i: children (abort commit)"} + { {nesttest DB TXNID ENV 0 $p1 $p2 abort commit} + "Recd006.j: children (abort commit)"} + } + + foreach pair $rlist { + set cmd [subst [lindex $pair 0]] + set msg [lindex $pair 1] + if { $select != 0 } { + set tag [lindex $msg 0] + set tail [expr [string length $tag] - 2] + set tag [string range $tag $tail $tail] + if { [lsearch $select $tag] == -1 } { + continue + } + } + op_recover abort $testdir $env_cmd $dbfile $cmd $msg $args + op_recover commit $testdir $env_cmd $dbfile $cmd $msg $args + } + + puts "\tRecd006.k: Verify db_printlog can read logfile" + set tmpfile $testdir/printlog.out + set stat [catch {exec $util_path/db_printlog -h $testdir \ + > $tmpfile} ret] + error_check_good db_printlog $stat 0 + fileremove $tmpfile +} + +# Do the nested transaction test. +# We want to make sure that children inherit properly from their +# parents and that locks are properly handed back to parents +# and that the right thing happens on commit/abort. +# In particular: +# Write lock on parent, properly acquired by child. +# Committed operation on child gives lock to parent so that +# other child can also get the lock. +# Aborted op by child releases lock so other child can get it. +# Correct database state if child commits +# Correct database state if child aborts +proc nesttest { db parent env do p1 p2 child1 child2} { + global kvals + source ./include.tcl + + if { $do == 1 } { + set func toupper + } else { + set func tolower + } + + # Do an RMW on the parent to get a write lock. + set p10 [lindex $p1 0] + set p11 [lindex $p1 1] + set p20 [lindex $p2 0] + set p21 [lindex $p2 1] + + set ret [$db get -rmw -txn $parent $p10] + set res $ret + set Dret [lindex [lindex $ret 0] 1] + if { [string compare $Dret $kvals($p10)] == 0 || + [string compare $Dret [string toupper $kvals($p10)]] == 0 } { + set val 0 + } else { + set val $Dret + } + error_check_good get_parent_RMW $val 0 + + # OK, do child 1 + set kid1 [$env txn -parent $parent] + error_check_good kid1 [is_valid_txn $kid1 $env] TRUE + + # Reading write-locked parent object should be OK + #puts "\tRead write-locked parent object for kid1." + set ret [$db get -txn $kid1 $p10] + error_check_good kid1_get10 $ret $res + + # Now update this child + set data [lindex [lindex [string $func $ret] 0] 1] + set ret [$db put -txn $kid1 $p10 $data] + error_check_good kid1_put10 $ret 0 + + #puts "\tKid1 successful put." + + # Now start child2 + #puts "\tBegin txn for kid2." + set kid2 [$env txn -parent $parent] + error_check_good kid2 [is_valid_txn $kid2 $env] TRUE + + # Getting anything in the p1 set should deadlock, so let's + # work on the p2 set. + set data [string $func $kvals($p20)] + #puts "\tPut data for kid2." + set ret [$db put -txn $kid2 $p20 $data] + error_check_good kid2_put20 $ret 0 + + #puts "\tKid2 data put successful." + + # Now let's do the right thing to kid1 + puts -nonewline "\tKid1 $child1..." + if { [string compare $child1 "commit"] == 0 } { + error_check_good kid1_commit [$kid1 commit] 0 + } else { + error_check_good kid1_abort [$kid1 abort] 0 + } + puts "complete" + + # In either case, child2 should now be able to get the + # lock, either because it is inherited by the parent + # (commit) or because it was released (abort). + set data [string $func $kvals($p11)] + set ret [$db put -txn $kid2 $p11 $data] + error_check_good kid2_put11 $ret 0 + + # Now let's do the right thing to kid2 + puts -nonewline "\tKid2 $child2..." + if { [string compare $child2 "commit"] == 0 } { + error_check_good kid2_commit [$kid2 commit] 0 + } else { + error_check_good kid2_abort [$kid2 abort] 0 + } + puts "complete" + + # Now, let parent check that the right things happened. + # First get all four values + set p10_check [lindex [lindex [$db get -txn $parent $p10] 0] 0] + set p11_check [lindex [lindex [$db get -txn $parent $p11] 0] 0] + set p20_check [lindex [lindex [$db get -txn $parent $p20] 0] 0] + set p21_check [lindex [lindex [$db get -txn $parent $p21] 0] 0] + + if { [string compare $child1 "commit"] == 0 } { + error_check_good parent_kid1 $p10_check \ + [string tolower [string $func $kvals($p10)]] + } else { + error_check_good \ + parent_kid1 $p10_check [string tolower $kvals($p10)] + } + if { [string compare $child2 "commit"] == 0 } { + error_check_good parent_kid2 $p11_check \ + [string tolower [string $func $kvals($p11)]] + error_check_good parent_kid2 $p20_check \ + [string tolower [string $func $kvals($p20)]] + } else { + error_check_good parent_kid2 $p11_check $kvals($p11) + error_check_good parent_kid2 $p20_check $kvals($p20) + } + + # Now do a write on the parent for 21 whose lock it should + # either have or should be available. + set ret [$db put -txn $parent $p21 [string $func $kvals($p21)]] + error_check_good parent_put21 $ret 0 + + return 0 +} diff --git a/test/tcl/recd007.tcl b/test/tcl/recd007.tcl new file mode 100644 index 00000000..0ae70ced --- /dev/null +++ b/test/tcl/recd007.tcl @@ -0,0 +1,1073 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd007 +# TEST File create/delete tests. +# TEST +# TEST This is a recovery test for create/delete of databases. We have +# TEST hooks in the database so that we can abort the process at various +# TEST points and make sure that the transaction doesn't commit. We +# TEST then need to recover and make sure the file is correctly existing +# TEST or not, as the case may be. +proc recd007 { method args } { + global fixed_len + source ./include.tcl + + env_cleanup $testdir + set envargs "" + set data_dir "" + set dir_cmd "" + set zero_idx [lsearch -exact $args "-zero_log"] + if { $zero_idx != -1 } { + set args [lreplace $args $zero_idx $zero_idx] + set envargs "-zero_log" + } + set zero_idx [lsearch -exact $args "-data_dir"] + if { $zero_idx != -1 } { + set end [expr $zero_idx + 1] + append envargs [lrange $args $zero_idx $end] + set data_dir [lrange $args $end $end] + set dir_cmd "if {\[file exists $testdir/$data_dir] == 0 } {exec mkdir $testdir/$data_dir} ; " + set args [lreplace $args $zero_idx $end] + } + + set orig_fixed_len $fixed_len + set opts [convert_args $method $args] + set omethod [convert_method $method] + + puts "Recd007: $method operation/transaction tests ($envargs)" + + # Create the database and environment. + + set testfile recd007.db + set flags "-create -txn wrnosync -home $testdir $envargs" + + puts "\tRecd007.a: creating environment" + set env_cmd "$dir_cmd berkdb_env $flags" + + set env [eval $env_cmd] + + # We need to create a database to get the pagesize (either + # the default or whatever might have been specified). + # Then remove it so we can compute fixed_len and create the + # real database. + set oflags "-create \ + -auto_commit $omethod -mode 0644 -env $env $opts $testfile" + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set stat [$db stat] + # + # Compute the fixed_len based on the pagesize being used. + # We want the fixed_len to be 1/4 the pagesize. + # + set pg [get_pagesize $stat] + error_check_bad get_pagesize $pg -1 + set fixed_len [expr $pg / 4] + error_check_good db_close [$db close] 0 + error_check_good dbremove [berkdb dbremove -env $env $testfile] 0 + error_check_good log_flush [$env log_flush] 0 + error_check_good envclose [$env close] 0 + + # Convert the args again because fixed_len is now real. + set opts [convert_args $method $args] + set save_opts $opts + set moreopts {" -lorder 1234 " " -lorder 1234 -chksum " \ + " -lorder 4321 " " -lorder 4321 -chksum "} + + # List of recovery tests: {HOOKS MSG} pairs + # Where each HOOK is a list of {COPY ABORT} + # + set rlist { + { {"none" "preopen"} "Recd007.b0: none/preopen"} + { {"none" "postopen"} "Recd007.b1: none/postopen"} + { {"none" "postlogmeta"} "Recd007.b2: none/postlogmeta"} + { {"none" "postlog"} "Recd007.b3: none/postlog"} + { {"none" "postsync"} "Recd007.b4: none/postsync"} + { {"postopen" "none"} "Recd007.c0: postopen/none"} + { {"postlogmeta" "none"} "Recd007.c1: postlogmeta/none"} + { {"postlog" "none"} "Recd007.c2: postlog/none"} + { {"postsync" "none"} "Recd007.c3: postsync/none"} + { {"postopen" "postopen"} "Recd007.d: postopen/postopen"} + { {"postopen" "postlogmeta"} "Recd007.e: postopen/postlogmeta"} + { {"postopen" "postlog"} "Recd007.f: postopen/postlog"} + { {"postlog" "postlog"} "Recd007.g: postlog/postlog"} + { {"postlogmeta" "postlogmeta"} "Recd007.h: postlogmeta/postlogmeta"} + { {"postlogmeta" "postlog"} "Recd007.i: postlogmeta/postlog"} + { {"postlog" "postsync"} "Recd007.j: postlog/postsync"} + { {"postsync" "postsync"} "Recd007.k: postsync/postsync"} + } + + # These are all the data values that we're going to need to read + # through the operation table and run the recovery tests. + + foreach pair $rlist { + set cmd [lindex $pair 0] + set msg [lindex $pair 1] + # + # Run natively + # + file_recover_create $testdir $env_cmd $omethod \ + $save_opts $testfile $cmd $msg $data_dir + foreach o $moreopts { + set opts $save_opts + append opts $o + file_recover_create $testdir $env_cmd $omethod \ + $opts $testfile $cmd $msg $data_dir + } + } + + set rlist { + { {"none" "predestroy"} "Recd007.l0: none/predestroy"} + { {"none" "postdestroy"} "Recd007.l1: none/postdestroy"} + { {"predestroy" "none"} "Recd007.m0: predestroy/none"} + { {"postdestroy" "none"} "Recd007.m1: postdestroy/none"} + { {"predestroy" "predestroy"} "Recd007.n: predestroy/predestroy"} + { {"predestroy" "postdestroy"} "Recd007.o: predestroy/postdestroy"} + { {"postdestroy" "postdestroy"} "Recd007.p: postdestroy/postdestroy"} + } + + foreach op { dbremove dbrename dbtruncate } { + foreach pair $rlist { + set cmd [lindex $pair 0] + set msg [lindex $pair 1] + file_recover_delete $testdir $env_cmd $omethod \ + $save_opts $testfile $cmd $msg $op $data_dir + foreach o $moreopts { + set opts $save_opts + append opts $o + file_recover_delete $testdir $env_cmd $omethod \ + $opts $testfile $cmd $msg $op $data_dir + } + } + } + + if { $is_windows_test != 1 && $is_hp_test != 1 } { + set env_cmd "$dir_cmd ; berkdb_env_noerr $flags" + do_file_recover_delmk $testdir $env_cmd $method $opts $testfile $data_dir + } + + puts "\tRecd007.r: Verify db_printlog can read logfile" + set tmpfile $testdir/printlog.out + set stat [catch {exec $util_path/db_printlog -h $testdir \ + > $tmpfile} ret] + error_check_good db_printlog $stat 0 + fileremove $tmpfile + set fixed_len $orig_fixed_len + return +} + +proc file_recover_create { dir env_cmd method opts dbfile cmd msg data_dir} { + # + # We run this test on each of these scenarios: + # 1. Creating just a database + # 2. Creating a database with a subdb + # 3. Creating a 2nd subdb in a database + puts "\t$msg ($opts) create with a database" + do_file_recover_create $dir $env_cmd $method $opts $dbfile \ + 0 $cmd $msg $data_dir + if { [is_queue $method] == 1 || [is_partitioned $opts] == 1 || + [is_heap $method] == 1 } { + puts "\tSkipping subdatabase tests for method $method" + return + } + puts "\t$msg ($opts) create with a database and subdb" + do_file_recover_create $dir $env_cmd $method $opts $dbfile \ + 1 $cmd $msg $data_dir + puts "\t$msg ($opts) create with a database and 2nd subdb" + do_file_recover_create $dir $env_cmd $method $opts $dbfile \ + 2 $cmd $msg $data_dir + +} + +proc do_file_recover_create { dir env_cmd method opts dbfile sub cmd msg data_dir} { + global log_log_record_types + source ./include.tcl + + # Keep track of the log types we've seen + if { $log_log_record_types == 1} { + logtrack_read $dir + } + + env_cleanup $dir + set dflags "-dar" + # Open the environment and set the copy/abort locations + set env [eval $env_cmd] + set copy [lindex $cmd 0] + set abort [lindex $cmd 1] + error_check_good copy_location [is_valid_create_loc $copy] 1 + error_check_good abort_location [is_valid_create_loc $abort] 1 + + if {([string first "logmeta" $copy] != -1 || \ + [string first "logmeta" $abort] != -1) && \ + [is_btree $method] == 0 } { + puts "\tSkipping for method $method" + $env test copy none + $env test abort none + error_check_good log_flush [$env log_flush] 0 + error_check_good env_close [$env close] 0 + return + } + + # Basically non-existence is our initial state. When we + # abort, it is also our final state. + # + switch $sub { + 0 { + set oflags "-create $method -auto_commit -mode 0644 \ + -env $env $opts $dbfile" + } + 1 { + set oflags "-create $method -auto_commit -mode 0644 \ + -env $env $opts $dbfile sub0" + } + 2 { + # + # If we are aborting here, then we need to + # create a first subdb, then create a second + # + set oflags "-create $method -auto_commit -mode 0644 \ + -env $env $opts $dbfile sub0" + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + set init_file $dir/$data_dir/$dbfile.init + catch { file copy -force $dir/$data_dir/$dbfile $init_file } res + set oflags "-create $method -auto_commit -mode 0644 \ + -env $env $opts $dbfile sub1" + } + default { + puts "\tBad value $sub for sub" + return + } + } + # + # Set our locations to copy and abort + # + set ret [eval $env test copy $copy] + error_check_good test_copy $ret 0 + set ret [eval $env test abort $abort] + error_check_good test_abort $ret 0 + + puts "\t\tExecuting command" + set ret [catch {eval {berkdb_open} $oflags} db] + + # Sync the mpool so any changes to the file that are + # in mpool get written to the disk file before the + # diff. + $env mpool_sync + + # + # If we don't abort, then we expect success. + # If we abort, we expect no file created. + # + if {[string first "none" $abort] == -1} { + # + # Operation was aborted, verify it does + # not exist. + # + puts "\t\tCommand executed and aborted." + error_check_bad db_open ret 0 + + # + # Check that the file does not exist. Final state. + # + if { $sub != 2 } { + error_check_good db_open:exists \ + [file exists $dir/$data_dir/$dbfile] 0 + } else { + error_check_good \ + diff(init,postcreate):diff($init_file,$dir/$data_dir/$dbfile)\ + [dbdump_diff $dflags $init_file $dir $data_dir/$dbfile] 0 + } + } else { + # + # Operation was committed, verify it exists. + # + puts "\t\tCommand executed and committed." + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + + # + # Check that the file exists. + # + error_check_good db_open [file exists $dir/$data_dir/$dbfile] 1 + set init_file $dir/$data_dir/$dbfile.init + catch { file copy -force $dir/$data_dir/$dbfile $init_file } res + + if { [is_queue $method] == 1 || [is_partitioned $opts] == 1} { + copy_extent_file $dir/$data_dir $dbfile init + } + } + error_check_good log_flush [$env log_flush] 0 + error_check_good env_close [$env close] 0 + + # + # Run recovery here. Should be a no-op. Verify that + # the file still doesn't exist or change (depending on sub) + # when we are done. + # + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery ... " + flush stdout + + set env [eval $env_cmd -recover_fatal] + error_check_good env_close [$env close] 0 +# set stat [catch {exec $util_path/db_recover -h $dir -c} result] +# if { $stat == 1 } { +# error "FAIL: Recovery error: $result." +# return +# } + puts "complete" + if { $sub != 2 && [string first "none" $abort] == -1} { + # + # Operation was aborted, verify it still does + # not exist. Only done with file creations. + # + error_check_good after_recover1 [file exists $dir/$data_dir/$dbfile] 0 + } else { + # + # Operation was committed or just a subdb was aborted. + # Verify it did not change. + # + error_check_good \ + diff(initial,post-recover1):diff($init_file,$dir/$data_dir/$dbfile) \ + [dbdump_diff $dflags $init_file $dir $data_dir/$dbfile] 0 + # + # Need a new copy to get the right LSN into the file. + # + catch { file copy -force $dir/$data_dir/$dbfile $init_file } res + + if { [is_queue $method] == 1 || [is_partitioned $opts] == 1 } { + copy_extent_file $dir/$data_dir $dbfile init + } + } + + # If we didn't make a copy, then we are done. + # + if {[string first "none" $copy] != -1} { + return + } + + # + # Now move the .afterop file to $dbfile. Run recovery again. + # + copy_afterop $dir + + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery ... " + flush stdout + + set env [eval $env_cmd -recover_fatal] + error_check_good env_close [$env close] 0 +# set stat [catch {exec $util_path/db_recover -h $dir -c} result] +# if { $stat == 1 } { +# error "FAIL: Recovery error: $result." +# return +# } + puts "complete" + if { $sub != 2 && [string first "none" $abort] == -1} { + # + # Operation was aborted, verify it still does + # not exist. Only done with file creations. + # + error_check_good after_recover2 [file exists $dir/$data_dir/$dbfile] 0 + } else { + # + # Operation was committed or just a subdb was aborted. + # Verify it did not change. + # + error_check_good \ + diff(initial,post-recover2):diff($init_file,$dir/$data_dir/$dbfile) \ + [dbdump_diff $dflags $init_file $dir $data_dir/$dbfile] 0 + } + +} + +proc file_recover_delete { dir env_cmd method opts dbfile cmd msg op data_dir} { + # + # We run this test on each of these scenarios: + # 1. Deleting/Renaming just a database + # 2. Deleting/Renaming a database with a subdb + # 3. Deleting/Renaming a 2nd subdb in a database + puts "\t$msg $op ($opts) with a database" + do_file_recover_delete $dir $env_cmd $method $opts $dbfile \ + 0 $cmd $msg $op $data_dir + if { [is_queue $method] == 1 || [is_partitioned $opts] == 1 || + [is_heap $method] == 1} { + puts "\tSkipping subdatabase tests for method $method" + return + } + + puts "\t$msg $op ($opts) with a database and subdb" + do_file_recover_delete $dir $env_cmd $method $opts $dbfile \ + 1 $cmd $msg $op $data_dir + puts "\t$msg $op ($opts) with a database and 2nd subdb" + do_file_recover_delete $dir $env_cmd $method $opts $dbfile \ + 2 $cmd $msg $op $data_dir + +} + +proc do_file_recover_delete { dir env_cmd method opts dbfile sub cmd msg op data_dir} { + global log_log_record_types + source ./include.tcl + + # Keep track of the log types we've seen + if { $log_log_record_types == 1} { + logtrack_read $dir + } + + env_cleanup $dir + # Open the environment and set the copy/abort locations + set env [eval $env_cmd] + set copy [lindex $cmd 0] + set abort [lindex $cmd 1] + error_check_good copy_location [is_valid_delete_loc $copy] 1 + error_check_good abort_location [is_valid_delete_loc $abort] 1 + + if { [is_record_based $method] == 1 } { + set key1 1 + set key2 2 + } else { + set key1 recd007_key1 + set key2 recd007_key2 + } + set data1 recd007_data0 + set data2 recd007_data1 + set data3 NEWrecd007_data2 + + # + # Depending on what sort of subdb we want, if any, our + # args to the open call will be different (and if we + # want a 2nd subdb, we create the first here. + # + # XXX + # For dbtruncate, we want oflags to have "$env" in it, + # not have the value currently in 'env'. That is why + # the '$' is protected below. Later on we use oflags + # but with a new $env we just opened. + # + switch $sub { + 0 { + set subdb "" + set new $dbfile.new + set dflags "-dar" + set oflags "-create $method -auto_commit -mode 0644 \ + -env \$env $opts $dbfile" + } + 1 { + set subdb sub0 + set new $subdb.new + set dflags "" + set oflags "-create $method -auto_commit -mode 0644 \ + -env \$env $opts $dbfile $subdb" + } + 2 { + # + # If we are aborting here, then we need to + # create a first subdb, then create a second + # + set subdb sub1 + set new $subdb.new + set dflags "" + set oflags "-create $method -auto_commit -mode 0644 \ + -env \$env $opts $dbfile sub0" + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set txn [$env txn] + set ret [$db put -txn $txn $key1 $data1] + error_check_good db_put $ret 0 + error_check_good commit [$txn commit] 0 + error_check_good db_close [$db close] 0 + set oflags "-create $method -auto_commit -mode 0644 \ + -env \$env $opts $dbfile $subdb" + } + default { + puts "\tBad value $sub for sub" + return + } + } + + # + # Set our locations to copy and abort + # + set ret [eval $env test copy $copy] + error_check_good test_copy $ret 0 + set ret [eval $env test abort $abort] + error_check_good test_abort $ret 0 + + # + # Open our db, add some data, close and copy as our + # init file. + # + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set txn [$env txn] + set ret [$db put -txn $txn $key1 $data1] + error_check_good db_put $ret 0 + set ret [$db put -txn $txn $key2 $data2] + error_check_good db_put $ret 0 + error_check_good commit [$txn commit] 0 + error_check_good db_close [$db close] 0 + + $env mpool_sync + + set init_file $dir/$data_dir/$dbfile.init + catch { file copy -force $dir/$data_dir/$dbfile $init_file } res + + if { [is_queue $method] == 1 || [is_partitioned $opts] == 1} { + copy_extent_file $dir/$data_dir $dbfile init + } + + # + # If we don't abort, then we expect success. + # If we abort, we expect no file removed. + # + switch $op { + "dbrename" { + set ret [catch { eval {berkdb} $op -env $env -auto_commit \ + $dbfile $subdb $new } remret] + } + "dbremove" { + set ret [catch { eval {berkdb} $op -env $env -auto_commit \ + $dbfile $subdb } remret] + } + "dbtruncate" { + set txn [$env txn] + set db [eval {berkdb_open_noerr -env} \ + $env -auto_commit $opts $dbfile $subdb] + error_check_good dbopen [is_valid_db $db] TRUE + error_check_good txnbegin [is_valid_txn $txn $env] TRUE + set ret [catch {$db truncate -txn $txn} remret] + } + } + $env mpool_sync + if { $abort == "none" } { + if { $op == "dbtruncate" } { + error_check_good txncommit [$txn commit] 0 + error_check_good dbclose [$db close] 0 + } + # + # Operation was committed, verify it. + # + puts "\t\tCommand executed and committed." + error_check_good $op $ret 0 + # + # If a dbtruncate, check that truncate returned the number + # of items previously in the database. + # + if { [string compare $op "dbtruncate"] == 0 } { + error_check_good remret $remret 2 + } + recd007_check $op $sub $dir $dbfile $subdb $new $env $oflags $data_dir + } else { + # + # Operation was aborted, verify it did not change. + # + if { $op == "dbtruncate" } { + error_check_good txnabort [$txn abort] 0 + error_check_good dbclose [$db close] 0 + } + puts "\t\tCommand executed and aborted." + error_check_good $op $ret 1 + + # + # Check that the file exists. Final state. + # Compare against initial file. + # + error_check_good post$op.1 [file exists $dir/$data_dir/$dbfile] 1 + error_check_good \ + diff(init,post$op.2):diff($init_file,$dir/$data_dir/$dbfile)\ + [dbdump_diff $dflags $init_file $dir $data_dir/$dbfile] 0 + } + $env mpool_sync + error_check_good log_flush [$env log_flush] 0 + error_check_good env_close [$env close] 0 + catch { file copy -force $dir/$data_dir/$dbfile $init_file } res + if { [is_queue $method] == 1 || [is_partitioned $opts] == 1} { + copy_extent_file $dir/$data_dir $dbfile init + } + + + # + # Run recovery here. Should be a no-op. Verify that + # the file still doesn't exist or change (depending on abort) + # when we are done. + # + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery ... " + flush stdout + + set env [eval $env_cmd -recover_fatal] + error_check_good env_close [$env close] 0 +# set stat [catch {exec $util_path/db_recover -h $dir -c} result] +# if { $stat == 1 } { +# error "FAIL: Recovery error: $result." +# return +# } + + puts "complete" + + if { $abort == "none" } { + # + # Operate was committed. + # + set env [eval $env_cmd] + recd007_check $op $sub $dir $dbfile $subdb $new $env $oflags $data_dir + error_check_good log_flush [$env log_flush] 0 + error_check_good env_close [$env close] 0 + } else { + # + # Operation was aborted, verify it did not change. + # + berkdb debug_check + error_check_good \ + diff(initial,post-recover1):diff($init_file,$dir/$data_dir/$dbfile) \ + [dbdump_diff $dflags $init_file $dir $data_dir/$dbfile] 0 + } + + # + # If we didn't make a copy, then we are done. + # + if {[string first "none" $copy] != -1} { + return + } + + # + # Now restore the .afterop file(s) to their original name. + # Run recovery again. + # + copy_afterop $dir + + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery ... " + flush stdout + + set env [eval $env_cmd -recover_fatal] + error_check_good env_close [$env close] 0 +# set stat [catch {exec $util_path/db_recover -h $dir -c} result] +# if { $stat == 1 } { +# error "FAIL: Recovery error: $result." +# return +# } + puts "complete" + + if { [string first "none" $abort] != -1} { + set env [eval $env_cmd] + recd007_check $op $sub $dir $dbfile $subdb $new $env $oflags $data_dir + error_check_good log_flush [$env log_flush] 0 + error_check_good env_close [$env close] 0 + } else { + # + # Operation was aborted, verify it did not change. + # + error_check_good \ + diff(initial,post-recover2):diff($init_file,$dir/$data_dir/$dbfile) \ + [dbdump_diff $dflags $init_file $dir $data_dir/$dbfile] 0 + } + +} + +# +# This function tests a specific case of recovering after a db removal. +# This is for SR #2538. Basically we want to test that: +# - Make an env. +# - Make/close a db. +# - Remove the db. +# - Create another db of same name. +# - Sync db but leave open. +# - Run recovery. +# - Verify no recovery errors and that new db is there. +proc do_file_recover_delmk { dir env_cmd method opts dbfile data_dir} { + global log_log_record_types + source ./include.tcl + + # Keep track of the log types we've seen + if { $log_log_record_types == 1} { + logtrack_read $dir + } + set omethod [convert_method $method] + + puts "\tRecd007.q: Delete and recreate a database" + env_cleanup $dir + # Open the environment and set the copy/abort locations + set env [eval $env_cmd] + error_check_good env_open [is_valid_env $env] TRUE + + if { [is_record_based $method] == 1 } { + set key 1 + } else { + set key recd007_key + } + set data1 recd007_data + set data2 NEWrecd007_data2 + set data3 LASTrecd007_data3 + + set oflags \ + "-create $omethod -auto_commit -mode 0644 $opts $dbfile" + + # + # Open our db, add some data, close and copy as our + # init file. + # + set db [eval {berkdb_open_noerr} -env $env $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set txn [$env txn] + set ret [$db put -txn $txn $key $data1] + error_check_good db_put $ret 0 + error_check_good commit [$txn commit] 0 + error_check_good db_close [$db close] 0 + file copy -force $testdir/$data_dir/$dbfile $testdir/$data_dir/${dbfile}.1 + + set ret \ + [catch { berkdb dbremove -env $env -auto_commit $dbfile } remret] + + # + # Operation was committed, verify it does + # not exist. + # + puts "\t\tCommand executed and committed." + error_check_good dbremove $ret 0 + error_check_good dbremove.1 [file exists $dir/$data_dir/$dbfile] 0 + + # + # Now create a new db with the same name. + # + set db [eval {berkdb_open_noerr} -env $env $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set txn [$env txn] + set ret [$db put -txn $txn $key [chop_data $method $data2]] + error_check_good db_put $ret 0 + error_check_good commit [$txn commit] 0 + error_check_good db_sync [$db sync] 0 + + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery ... " + flush stdout + + set envr [eval $env_cmd -recover_fatal] + error_check_good env_close [$envr close] 0 +# set stat [catch {exec $util_path/db_recover -h $dir -c} result] +# if { $stat == 1 } { +# error "FAIL: Recovery error: $result." +# return +# } + puts "complete" +# error_check_good db_recover $stat 0 + error_check_good file_exist [file exists $dir/$data_dir/$dbfile] 1 + # + # Since we ran recovery on the open db/env, we need to + # catch these calls. Basically they are there to clean + # up the Tcl widgets. + # + set stat [catch {$db close} ret] + error_check_bad dbclose_after_remove $stat 0 + error_check_good dbclose_after_remove [is_substr $ret recovery] 1 + set stat [catch {$env log_flush} ret] + set stat [catch {$env close} ret] + error_check_bad envclose_after_remove $stat 0 + error_check_good envclose_after_remove [is_substr $ret recovery] 1 + + # + # Reopen env and db and verify 2nd database is there. + # + set env [eval $env_cmd] + error_check_good env_open [is_valid_env $env] TRUE + set db [eval {berkdb_open} -env $env $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set ret [$db get $key] + error_check_good dbget [llength $ret] 1 + set kd [lindex $ret 0] + error_check_good key [lindex $kd 0] $key + error_check_good data2 [lindex $kd 1] [pad_data $method $data2] + + error_check_good dbclose [$db close] 0 + error_check_good log_flush [$env log_flush] 0 + error_check_good envclose [$env close] 0 + + # + # Copy back the original database and run recovery again. + # SR [#13026] + # + puts "\t\tRecover from first database" + file copy -force $testdir/$data_dir/${dbfile}.1 $testdir/$data_dir/$dbfile + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery ... " + flush stdout + + set env [eval $env_cmd -recover_fatal] + error_check_good env_close [$env close] 0 +# set stat [catch {exec $util_path/db_recover -h $dir -c} result] +# if { $stat == 1 } { +# error "FAIL: Recovery error: $result." +# return +# } + puts "complete" +# error_check_good db_recover $stat 0 + error_check_good db_recover.1 [file exists $dir/$data_dir/$dbfile] 1 + + # + # Reopen env and db and verify 2nd database is there. + # + set env [eval $env_cmd] + error_check_good env_open [is_valid_env $env] TRUE + set db [eval {berkdb_open_noerr} -env $env $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set ret [$db get $key] + error_check_good dbget [llength $ret] 1 + set kd [lindex $ret 0] + error_check_good key [lindex $kd 0] $key + error_check_good data2 [lindex $kd 1] [pad_data $method $data2] + + error_check_good dbclose [$db close] 0 + + file copy -force $testdir/$data_dir/$dbfile $testdir/$data_dir/${dbfile}.2 + + puts "\t\tRemove second db" + set ret \ + [catch { berkdb dbremove -env $env -auto_commit $dbfile } remret] + + # + # Operation was committed, verify it does + # not exist. + # + puts "\t\tCommand executed and committed." + error_check_good dbremove $ret 0 + error_check_good dbremove.2 [file exists $dir/$data_dir/$dbfile] 0 + + # + # Now create a new db with the same name. + # + puts "\t\tAdd a third version of the database" + set db [eval {berkdb_open_noerr} -env $env $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set txn [$env txn] + set ret [$db put -txn $txn $key [chop_data $method $data3]] + error_check_good db_put $ret 0 + error_check_good commit [$txn commit] 0 + error_check_good db_sync [$db sync] 0 + + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery ... " + flush stdout + + set envr [eval $env_cmd -recover_fatal] + error_check_good env_close [$envr close] 0 +# set stat [catch {exec $util_path/db_recover -h $dir -c} result] +# if { $stat == 1 } { +# error "FAIL: Recovery error: $result." +# return +# } + puts "complete" +# error_check_good db_recover $stat 0 + error_check_good file_exist [file exists $dir/$data_dir/$dbfile] 1 + + # + # Since we ran recovery on the open db/env, we need to + # catch these calls to clean up the Tcl widgets. + # + set stat [catch {$db close} ret] + error_check_bad dbclose_after_remove $stat 0 + error_check_good dbclose_after_remove [is_substr $ret recovery] 1 + set stat [catch {$env log_flush} ret] + set stat [catch {$env close} ret] + error_check_bad envclose_after_remove $stat 0 + error_check_good envclose_after_remove [is_substr $ret recovery] 1 + + # + # Copy back the second database and run recovery again. + # + puts "\t\tRecover from second database" + file copy -force $testdir/$data_dir/${dbfile}.2 $testdir/$data_dir/$dbfile + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery ... " + flush stdout + + set envr [eval $env_cmd -recover_fatal] + error_check_good env_close [$envr close] 0 +# set stat [catch {exec $util_path/db_recover -h $dir -c} result] +# if { $stat == 1 } { +# error "FAIL: Recovery error: $result." +# return +# } + puts "complete" +# error_check_good db_recover $stat 0 + error_check_good file_exist.2 [file exists $dir/$data_dir/$dbfile] 1 + + # + # Reopen env and db and verify 3rd database is there. + # + set env [eval $env_cmd] + error_check_good env_open [is_valid_env $env] TRUE + set db [eval {berkdb_open} -env $env $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set ret [$db get $key] + error_check_good dbget [llength $ret] 1 + set kd [lindex $ret 0] + error_check_good key [lindex $kd 0] $key + error_check_good data2 [lindex $kd 1] [pad_data $method $data3] + + error_check_good dbclose [$db close] 0 + error_check_good log_flush [$env log_flush] 0 + error_check_good envclose [$env close] 0 +} + +proc is_valid_create_loc { loc } { + switch $loc { + none - + preopen - + postopen - + postlogmeta - + postlog - + postsync + { return 1 } + default + { return 0 } + } +} + +proc is_valid_delete_loc { loc } { + switch $loc { + none - + predestroy - + postdestroy - + postremcall + { return 1 } + default + { return 0 } + } +} + +# Do a logical diff on the db dump files. We expect that either +# the files are identical, or if they differ, that it is exactly +# just a free/invalid page. +# Return 1 if they are different, 0 if logically the same (or identical). +# +proc dbdump_diff { flags initfile dir dbfile } { + source ./include.tcl + + set initdump $initfile.dump + set dbdump $dbfile.dump + + set stat [catch {eval {exec $util_path/db_dump} $flags -f $initdump \ + $initfile} ret] + error_check_good "dbdump.init $flags $initfile" $stat 0 + + # Do a dump without the freelist which should eliminate any + # recovery differences. + set stat [catch {eval {exec $util_path/db_dump} $flags -f $dir/$dbdump \ + $dir/$dbfile} ret] + error_check_good dbdump.db $stat 0 + + set stat [filecmp $dir/$dbdump $initdump] + + if {$stat == 0} { + return 0 + } + puts "diff: $dbdump $initdump gives:\n$ret" + return 1 +} + +proc recd007_check { op sub dir dbfile subdb new env oflags data_dir} { + # + # No matter how many subdbs we have, dbtruncate will always + # have a file, and if we open our particular db, it should + # have no entries. + # + if { $sub == 0 } { + if { $op == "dbremove" } { + error_check_good $op:not-exist:$dir/$dbfile \ + [file exists $dir/$data_dir/$dbfile] 0 + } elseif { $op == "dbrename"} { + error_check_good $op:exist \ + [file exists $dir/$data_dir/$dbfile] 0 + error_check_good $op:exist2 \ + [file exists $dir/$data_dir/$dbfile.new] 1 + } else { + error_check_good $op:exist \ + [file exists $dir/$data_dir/$dbfile] 1 + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set dbc [$db cursor] + error_check_good dbc_open \ + [is_valid_cursor $dbc $db] TRUE + set ret [$dbc get -first] + error_check_good dbget1 [llength $ret] 0 + error_check_good dbc_close [$dbc close] 0 + error_check_good db_close [$db close] 0 + } + return + } else { + set t1 $dir/t1 + # + # If we have subdbs, check that all but the last one + # are there, and the last one is correctly operated on. + # + set db [berkdb_open -rdonly -env $env $dbfile] + error_check_good dbopen [is_valid_db $db] TRUE + set c [eval {$db cursor}] + error_check_good db_cursor [is_valid_cursor $c $db] TRUE + set d [$c get -last] + if { $op == "dbremove" } { + if { $sub == 1 } { + error_check_good subdb:rem [llength $d] 0 + } else { + error_check_bad subdb:rem [llength $d] 0 + set sdb [lindex [lindex $d 0] 0] + error_check_bad subdb:rem1 $sdb $subdb + } + } elseif { $op == "dbrename"} { + set sdb [lindex [lindex $d 0] 0] + error_check_good subdb:ren $sdb $new + if { $sub != 1 } { + set d [$c get -prev] + error_check_bad subdb:ren [llength $d] 0 + set sdb [lindex [lindex $d 0] 0] + error_check_good subdb:ren1 \ + [is_substr "new" $sdb] 0 + } + } else { + set sdb [lindex [lindex $d 0] 0] + set dbt [berkdb_open -rdonly -env $env $dbfile $sdb] + error_check_good db_open [is_valid_db $dbt] TRUE + set dbc [$dbt cursor] + error_check_good dbc_open \ + [is_valid_cursor $dbc $dbt] TRUE + set ret [$dbc get -first] + error_check_good dbget2 [llength $ret] 0 + error_check_good dbc_close [$dbc close] 0 + error_check_good db_close [$dbt close] 0 + if { $sub != 1 } { + set d [$c get -prev] + error_check_bad subdb:ren [llength $d] 0 + set sdb [lindex [lindex $d 0] 0] + set dbt [berkdb_open -rdonly -env $env \ + $dbfile $sdb] + error_check_good db_open [is_valid_db $dbt] TRUE + set dbc [$db cursor] + error_check_good dbc_open \ + [is_valid_cursor $dbc $db] TRUE + set ret [$dbc get -first] + error_check_bad dbget3 [llength $ret] 0 + error_check_good dbc_close [$dbc close] 0 + error_check_good db_close [$dbt close] 0 + } + } + error_check_good dbcclose [$c close] 0 + error_check_good db_close [$db close] 0 + } +} + +proc copy_afterop { dir } { + set r [catch { set filecopy [glob $dir/*.afterop] } res] + if { $r == 1 } { + return + } + foreach f $filecopy { + set orig [string range $f 0 \ + [expr [string last "." $f] - 1]] + catch { file rename -force $f $orig} res + } +} diff --git a/test/tcl/recd008.tcl b/test/tcl/recd008.tcl new file mode 100644 index 00000000..5c112c15 --- /dev/null +++ b/test/tcl/recd008.tcl @@ -0,0 +1,226 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd008 +# TEST Test deeply nested transactions and many-child transactions. +proc recd008 { method {breadth 4} {depth 4} args} { + global kvals + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Recd008: $method $breadth X $depth deeply nested transactions" + + # Create the database and environment. + env_cleanup $testdir + + set dbfile recd008.db + + puts "\tRecd008.a: create database" + set db [eval {berkdb_open -create} $args $omethod $testdir/$dbfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Make sure that we have enough entries to span a couple of + # different pages. + set did [open $dict] + set count 0 + while { [gets $did str] != -1 && $count < 1000 } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + if { $count == 500} { + set p1 $key + set kvals($p1) $str + } + set ret [$db put $key [chop_data $method $str]] + error_check_good put $ret 0 + + incr count + } + close $did + error_check_good db_close [$db close] 0 + + set txn_max [expr int([expr pow($breadth,$depth)])] + if { $txn_max < 20 } { + set txn_max 20 + } + + puts "\tRecd008.b: create environment for $txn_max transactions" + + set max_locks 2500 + set eflags "-mode 0644 -create -lock_max_locks $max_locks \ + -lock_max_objects $max_locks -txn_max $txn_max -txn wrnosync -home $testdir" + set env_cmd "berkdb_env $eflags" + set dbenv [eval $env_cmd] + error_check_good env_open [is_valid_env $dbenv] TRUE + + reset_env $dbenv + + set rlist { + { {recd008_parent abort ENV DB $method $p1 TXNID 1 1 $breadth $depth} + "Recd008.c: child abort parent" } + { {recd008_parent commit ENV DB $method $p1 TXNID 1 1 $breadth $depth} + "Recd008.d: child commit parent" } + } + foreach pair $rlist { + set cmd [subst [lindex $pair 0]] + set msg [lindex $pair 1] + op_recover abort $testdir $env_cmd $dbfile $cmd $msg $args + eval recd008_setkval $dbfile $p1 $args + op_recover commit $testdir $env_cmd $dbfile $cmd $msg $args + eval recd008_setkval $dbfile $p1 $args + } + + puts "\tRecd008.e: Verify db_printlog can read logfile" + set tmpfile $testdir/printlog.out + set stat [catch {exec $util_path/db_printlog -h $testdir \ + > $tmpfile} ret] + error_check_good db_printlog $stat 0 + fileremove $tmpfile +} + +proc recd008_setkval { dbfile p1 args} { + global kvals + source ./include.tcl + + set db [eval {berkdb_open} $args $testdir/$dbfile] + error_check_good dbopen [is_valid_db $db] TRUE + set ret [$db get $p1] + error_check_good dbclose [$db close] 0 + set kvals($p1) [lindex [lindex $ret 0] 1] +} + +# This is a lot like the op_recover procedure. We cannot use that +# because it was not meant to be called recursively. This proc +# knows about depth/breadth and file naming so that recursive calls +# don't overwrite various initial and afterop files, etc. +# +# The basic flow of this is: +# (Initial file) +# Parent begin transaction (in op_recover) +# Parent starts children +# Recursively call recd008_recover +# (children modify p1) +# Parent modifies p1 +# (Afterop file) +# Parent commit/abort (in op_recover) +# (Final file) +# Recovery test (in op_recover) +proc recd008_parent { op env db method p1key parent b0 d0 breadth depth } { + global kvals + source ./include.tcl + + # + # Save copy of original data + # Acquire lock on data + # + set olddata [pad_data $method $kvals($p1key)] + set ret [$db get -rmw -txn $parent $p1key] + set Dret [lindex [lindex $ret 0] 1] + error_check_good get_parent_RMW $Dret $olddata + + # + # Parent spawns off children + # + set ret [recd008_txn $op $env $db $method $p1key $parent \ + $b0 $d0 $breadth $depth] + + puts "Child runs complete. Parent modifies data." + + # + # Parent modifies p1 + # + set newdata $olddata.parent + set ret [$db put -txn $parent $p1key [chop_data $method $newdata]] + error_check_good db_put $ret 0 + + # + # Save value in kvals for later comparison + # + switch $op { + "commit" { + set kvals($p1key) $newdata + } + "abort" { + set kvals($p1key) $olddata + } + } + return 0 +} + +proc recd008_txn { op env db method p1key parent b0 d0 breadth depth } { + global log_log_record_types + global kvals + source ./include.tcl + + for {set d 1} {$d < $d0} {incr d} { + puts -nonewline "\t" + } + puts "Recd008_txn: $op parent:$parent $breadth $depth ($b0 $d0)" + + # Save the initial file and open the environment and the file + for {set b $b0} {$b <= $breadth} {incr b} { + # + # Begin child transaction + # + set t [$env txn -parent $parent] + error_check_bad txn_begin $t NULL + error_check_good txn_begin [is_valid_txn $t $env] TRUE + set startd [expr $d0 + 1] + set child $b:$startd:$t + set olddata [pad_data $method $kvals($p1key)] + set newdata $olddata.$child + set ret [$db get -rmw -txn $t $p1key] + set Dret [lindex [lindex $ret 0] 1] + error_check_good get_parent_RMW $Dret $olddata + + # + # Recursively call to set up nested transactions/children + # + for {set d $startd} {$d <= $depth} {incr d} { + set ret [recd008_txn commit $env $db $method $p1key $t \ + $b $d $breadth $depth] + set ret [recd008_txn abort $env $db $method $p1key $t \ + $b $d $breadth $depth] + } + # + # Modifies p1. + # + set ret [$db put -txn $t $p1key [chop_data $method $newdata]] + error_check_good db_put $ret 0 + + # + # Commit or abort + # + for {set d 1} {$d < $startd} {incr d} { + puts -nonewline "\t" + } + puts "Executing txn_$op:$t" + error_check_good txn_$op:$t [$t $op] 0 + for {set d 1} {$d < $startd} {incr d} { + puts -nonewline "\t" + } + set ret [$db get -rmw -txn $parent $p1key] + set Dret [lindex [lindex $ret 0] 1] + set newdata [pad_data $method $newdata] + switch $op { + "commit" { + puts "Command executed and committed." + error_check_good get_parent_RMW $Dret $newdata + set kvals($p1key) $newdata + } + "abort" { + puts "Command executed and aborted." + error_check_good get_parent_RMW $Dret $olddata + set kvals($p1key) $olddata + } + } + } + return 0 +} diff --git a/test/tcl/recd009.tcl b/test/tcl/recd009.tcl new file mode 100644 index 00000000..3a34c736 --- /dev/null +++ b/test/tcl/recd009.tcl @@ -0,0 +1,179 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd009 +# TEST Verify record numbering across split/reverse splits and recovery. +proc recd009 { method {select 0} args} { + global fixed_len + source ./include.tcl + + if { [is_rbtree $method] != 1 && [is_rrecno $method] != 1} { + puts "Recd009 skipping for method $method." + return + } + + set opts [convert_args $method $args] + set method [convert_method $method] + + puts "\tRecd009: Test record numbers across splits and recovery" + + set testfile recd009.db + env_cleanup $testdir + set mkeys 1000 + set nkeys 5 + set data "data" + + puts "\tRecd009.a: Create $method environment and database." + set flags "-create -txn -home $testdir" + + set env_cmd "berkdb_env $flags" + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + set oflags "-env $dbenv -auto_commit -pagesize 8192 -create -mode 0644 $opts $method" + set db [eval {berkdb_open} $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Fill page with small key/data pairs. Keep at leaf. + puts "\tRecd009.b: Fill page with $nkeys small key/data pairs." + for { set i 1 } { $i <= $nkeys } { incr i } { + if { [is_recno $method] == 1 } { + set key $i + } else { + set key key000$i + } + set ret [$db put $key $data$i] + error_check_good dbput $ret 0 + } + error_check_good db_close [$db close] 0 + error_check_good env_close [$dbenv close] 0 + + set newnkeys [expr $nkeys + 1] + # List of recovery tests: {CMD MSG} pairs. + set rlist { + { {recd009_split DB TXNID 1 $method $newnkeys $mkeys} + "Recd009.c: split"} + { {recd009_split DB TXNID 0 $method $newnkeys $mkeys} + "Recd009.d: reverse split"} + } + + foreach pair $rlist { + set cmd [subst [lindex $pair 0]] + set msg [lindex $pair 1] + if { $select != 0 } { + set tag [lindex $msg 0] + set tail [expr [string length $tag] - 2] + set tag [string range $tag $tail $tail] + if { [lsearch $select $tag] == -1 } { + continue + } + } + set reverse [string first "reverse" $msg] + if { $reverse == -1 } { + set abortkeys $nkeys + set commitkeys $mkeys + set abortpg 0 + set commitpg 1 + } else { + set abortkeys $mkeys + set commitkeys $nkeys + set abortpg 1 + set commitpg 0 + } + op_recover abort $testdir $env_cmd $testfile $cmd $msg $args + recd009_recnocheck $testdir $testfile $opts $abortkeys $abortpg + op_recover commit $testdir $env_cmd $testfile $cmd $msg $args + recd009_recnocheck $testdir $testfile $opts \ + $commitkeys $commitpg + } + puts "\tRecd009.e: Verify db_printlog can read logfile" + set tmpfile $testdir/printlog.out + set stat [catch {exec $util_path/db_printlog -h $testdir \ + > $tmpfile} ret] + error_check_good db_printlog $stat 0 + fileremove $tmpfile +} + +# +# This procedure verifies that the database has only numkeys number +# of keys and that they are in order. +# +proc recd009_recnocheck { tdir testfile opts numkeys numpg} { + source ./include.tcl + + set db [eval {berkdb_open} $opts $tdir/$testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + puts "\tRecd009_recnocheck: Verify page count of $numpg on split." + set stat [$db stat] + error_check_bad stat:check-split [is_substr $stat \ + "{{Internal pages} 0}"] $numpg + + set type [$db get_type] + set dbc [$db cursor] + error_check_good dbcursor [is_valid_cursor $dbc $db] TRUE + set i 1 + puts "\tRecd009_recnocheck: Checking $numkeys record numbers." + for {set d [$dbc get -first]} { [llength $d] != 0 } { + set d [$dbc get -next]} { + if { [is_btree $type] } { + set thisi [$dbc get -get_recno] + } else { + set thisi [lindex [lindex $d 0] 0] + } + error_check_good recno_check $i $thisi + error_check_good record_count [expr $i <= $numkeys] 1 + incr i + } + error_check_good curs_close [$dbc close] 0 + error_check_good db_close [$db close] 0 +} + +proc recd009_split { db txn split method nkeys mkeys } { + global errorCode + source ./include.tcl + + set data "data" + + set isrecno [is_recno $method] + # if mkeys is above 1000, need to adjust below for lexical order + if { $split == 1 } { + puts "\tRecd009_split: Add $mkeys pairs to force split." + for {set i $nkeys} { $i <= $mkeys } { incr i } { + if { $isrecno == 1 } { + set key $i + } else { + if { $i >= 100 } { + set key key0$i + } elseif { $i >= 10 } { + set key key00$i + } else { + set key key000$i + } + } + set ret [$db put -txn $txn $key $data$i] + error_check_good dbput:more $ret 0 + } + } else { + puts "\tRecd009_split: Delete added keys to force reverse split." + # Since rrecno renumbers, we delete downward. + for {set i $mkeys} { $i >= $nkeys } { set i [expr $i - 1] } { + if { $isrecno == 1 } { + set key $i + } else { + if { $i >= 100 } { + set key key0$i + } elseif { $i >= 10 } { + set key key00$i + } else { + set key key000$i + } + } + error_check_good db_del:$i [$db del -txn $txn $key] 0 + } + } + return 0 +} diff --git a/test/tcl/recd010.tcl b/test/tcl/recd010.tcl new file mode 100644 index 00000000..a06e7756 --- /dev/null +++ b/test/tcl/recd010.tcl @@ -0,0 +1,256 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd010 +# TEST Test stability of btree duplicates across btree off-page dup splits +# TEST and reverse splits and across recovery. +proc recd010 { method {select 0} args} { + if { [is_btree $method] != 1 } { + puts "Recd010 skipping for method $method." + return + } + + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Recd010: skipping for specific pagesizes" + return + } + set largs $args + append largs " -dup " + recd010_main $method $select $largs + append largs " -dupsort " + recd010_main $method $select $largs +} + +proc recd010_main { method select largs } { + global fixed_len + global kvals + global kvals_dups + source ./include.tcl + + + set opts [convert_args $method $largs] + set method [convert_method $method] + + puts "Recd010 ($opts): Test duplicates across splits and recovery" + + set testfile recd010.db + env_cleanup $testdir + # + # Set pagesize small to generate lots of off-page dups + # + set page 512 + set mkeys 1000 + set firstkeys 5 + set data "data" + set key "recd010_key" + + puts "\tRecd010.a: Create environment and database." + set flags "-create -txn wrnosync -home $testdir" + + set env_cmd "berkdb_env $flags" + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + set oflags "-env $dbenv -auto_commit -create -mode 0644 $opts $method" + set db [eval {berkdb_open} -pagesize $page $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Fill page with small key/data pairs. Keep at leaf. + puts "\tRecd010.b: Fill page with $firstkeys small dups." + for { set i 1 } { $i <= $firstkeys } { incr i } { + set ret [$db put $key $data$i] + error_check_good dbput $ret 0 + } + set kvals 1 + set kvals_dups $firstkeys + error_check_good db_close [$db close] 0 + error_check_good env_close [$dbenv close] 0 + + # List of recovery tests: {CMD MSG} pairs. + if { $mkeys < 100 } { + puts "Recd010 mkeys of $mkeys too small" + return + } + set rlist { + { {recd010_split DB TXNID 1 2 $mkeys} + "Recd010.c: btree split 2 large dups"} + { {recd010_split DB TXNID 0 2 $mkeys} + "Recd010.d: btree reverse split 2 large dups"} + { {recd010_split DB TXNID 1 10 $mkeys} + "Recd010.e: btree split 10 dups"} + { {recd010_split DB TXNID 0 10 $mkeys} + "Recd010.f: btree reverse split 10 dups"} + { {recd010_split DB TXNID 1 100 $mkeys} + "Recd010.g: btree split 100 dups"} + { {recd010_split DB TXNID 0 100 $mkeys} + "Recd010.h: btree reverse split 100 dups"} + } + + foreach pair $rlist { + set cmd [subst [lindex $pair 0]] + set msg [lindex $pair 1] + if { $select != 0 } { + set tag [lindex $msg 0] + set tail [expr [string length $tag] - 2] + set tag [string range $tag $tail $tail] + if { [lsearch $select $tag] == -1 } { + continue + } + } + set reverse [string first "reverse" $msg] + op_recover abort $testdir $env_cmd $testfile $cmd $msg $largs + recd010_check $testdir $testfile $opts abort $reverse $firstkeys + op_recover commit $testdir $env_cmd $testfile $cmd $msg $largs + recd010_check $testdir $testfile $opts commit $reverse $firstkeys + } + puts "\tRecd010.i: Verify db_printlog can read logfile" + set tmpfile $testdir/printlog.out + set stat [catch {exec $util_path/db_printlog -h $testdir \ + > $tmpfile} ret] + error_check_good db_printlog $stat 0 + fileremove $tmpfile +} + +# +# This procedure verifies that the database has only numkeys number +# of keys and that they are in order. +# +proc recd010_check { tdir testfile opts op reverse origdups } { + global kvals + global kvals_dups + source ./include.tcl + + set db [eval {berkdb_open} $opts $tdir/$testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set data "data" + + if { $reverse == -1 } { + puts "\tRecd010_check: Verify split after $op" + } else { + puts "\tRecd010_check: Verify reverse split after $op" + } + + set stat [$db stat] + if { [expr ([string compare $op "abort"] == 0 && $reverse == -1) || \ + ([string compare $op "commit"] == 0 && $reverse != -1)]} { + set numkeys 0 + set allkeys [expr $numkeys + 1] + set numdups $origdups + # + # If we abort the adding of dups, or commit + # the removal of dups, either way check that + # we are back at the beginning. Check that: + # - We have 0 internal pages. + # - We have only 1 key (the original we primed the db + # with at the beginning of the test). + # - We have only the original number of dups we primed + # the db with at the beginning of the test. + # + error_check_good stat:orig0 [is_substr $stat \ + "{{Internal pages} 0}"] 1 + error_check_good stat:orig1 [is_substr $stat \ + "{{Number of keys} 1}"] 1 + error_check_good stat:orig2 [is_substr $stat \ + "{{Number of records} $origdups}"] 1 + } else { + set numkeys $kvals + set allkeys [expr $numkeys + 1] + set numdups $kvals_dups + # + # If we abort the removal of dups, or commit the + # addition of dups, check that: + # - We have > 0 internal pages. + # - We have the number of keys. + # + error_check_bad stat:new0 [is_substr $stat \ + "{{Internal pages} 0}"] 1 + error_check_good stat:new1 [is_substr $stat \ + "{{Number of keys} $allkeys}"] 1 + } + + set dbc [$db cursor] + error_check_good dbcursor [is_valid_cursor $dbc $db] TRUE + puts "\tRecd010_check: Checking key and duplicate values" + set key "recd010_key" + # + # Check dups are there as they should be. + # + for {set ki 0} {$ki < $numkeys} {incr ki} { + set datacnt 0 + for {set d [$dbc get -set $key$ki]} { [llength $d] != 0 } { + set d [$dbc get -nextdup]} { + set thisdata [lindex [lindex $d 0] 1] + if { $datacnt < 10 } { + set pdata $data.$ki.00$datacnt + } elseif { $datacnt < 100 } { + set pdata $data.$ki.0$datacnt + } else { + set pdata $data.$ki.$datacnt + } + error_check_good dup_check $thisdata $pdata + incr datacnt + } + error_check_good dup_count $datacnt $numdups + } + # + # Check that the number of expected keys (allkeys) are + # all of the ones that exist in the database. + # + set dupkeys 0 + set lastkey "" + for {set d [$dbc get -first]} { [llength $d] != 0 } { + set d [$dbc get -next]} { + set thiskey [lindex [lindex $d 0] 0] + if { [string compare $lastkey $thiskey] != 0 } { + incr dupkeys + } + set lastkey $thiskey + } + error_check_good key_check $allkeys $dupkeys + error_check_good curs_close [$dbc close] 0 + error_check_good db_close [$db close] 0 +} + +proc recd010_split { db txn split nkeys mkeys } { + global errorCode + global kvals + global kvals_dups + source ./include.tcl + + set data "data" + set key "recd010_key" + + set numdups [expr $mkeys / $nkeys] + + set kvals $nkeys + set kvals_dups $numdups + if { $split == 1 } { + puts \ +"\tRecd010_split: Add $nkeys keys, with $numdups duplicates each to force split." + for {set k 0} { $k < $nkeys } { incr k } { + for {set i 0} { $i < $numdups } { incr i } { + if { $i < 10 } { + set pdata $data.$k.00$i + } elseif { $i < 100 } { + set pdata $data.$k.0$i + } else { + set pdata $data.$k.$i + } + set ret [$db put -txn $txn $key$k $pdata] + error_check_good dbput:more $ret 0 + } + } + } else { + puts \ +"\tRecd010_split: Delete $nkeys keys to force reverse split." + for {set k 0} { $k < $nkeys } { incr k } { + error_check_good db_del:$k [$db del -txn $txn $key$k] 0 + } + } + return 0 +} diff --git a/test/tcl/recd011.tcl b/test/tcl/recd011.tcl new file mode 100644 index 00000000..41377821 --- /dev/null +++ b/test/tcl/recd011.tcl @@ -0,0 +1,135 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd011 +# TEST Verify that recovery to a specific timestamp works. +proc recd011 { method {niter 200} {ckpt_freq 15} {sleep_time 1} args } { + source ./include.tcl + global rand_init + berkdb srand $rand_init + + set args [convert_args $method $args] + set omethod [convert_method $method] + set tnum "011" + + puts "Recd$tnum ($method $args): Test recovery to a specific timestamp." + + set testfile recd$tnum.db + env_cleanup $testdir + + set i 0 + if { [is_record_based $method] == 1 } { + set key 1 + set bigkey 1001 + } else { + set key KEY + set bigkey BIGKEY + } + + puts "\tRecd$tnum.a: Create environment and database." + set bufsize [expr 8 * 1024] + set maxsize [expr 8 * $bufsize] + set flags "-create -txn wrnosync -home $testdir -log_buffer $bufsize \ + -log_max $maxsize" + + set env_cmd "berkdb_env $flags" + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + set oflags "-auto_commit -env $dbenv -create -mode 0644 $args $omethod" + set db [eval {berkdb_open} $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Main loop: every second or so, increment the db in a txn. + puts "\t\tInitial Checkpoint" + error_check_good "Initial Checkpoint" [$dbenv txn_checkpoint] 0 + + puts "\tRecd$tnum.b ($niter iterations):\ + Transaction-protected increment loop." + for { set i 0 } { $i <= $niter } { incr i } { + set str [random_data 4096 0 NOTHING] + set data $i + set bigdata $i$str + + # Put, in a txn. + set txn [$dbenv txn] + error_check_good txn_begin [is_valid_txn $txn $dbenv] TRUE + error_check_good db_put \ + [$db put -txn $txn $key [chop_data $method $data]] 0 + error_check_good db_put \ + [$db put -txn $txn $bigkey [chop_data $method $bigdata]] 0 + error_check_good txn_commit [$txn commit] 0 + + # We need to sleep before taking the timestamp to guarantee + # that the timestamp is *after* this transaction commits. + # Since the resolution of the system call used by Berkeley DB + # is less than a second, rounding to the nearest second can + # otherwise cause off-by-one errors in the test. + tclsleep $sleep_time + + set timeof($i) [timestamp -r] + + # If an appropriate period has elapsed, checkpoint. + if { $i % $ckpt_freq == $ckpt_freq - 1 } { + puts "\t\tIteration $i: Checkpointing." + error_check_good ckpt($i) [$dbenv txn_checkpoint] 0 + } + + # Sleep again to ensure that the next operation definitely + # occurs after the timestamp. + tclsleep $sleep_time + } + error_check_good db_close [$db close] 0 + error_check_good env_close [$dbenv close] 0 + + # Now, loop through and recover to each timestamp, verifying the + # expected increment. + puts "\tRecd$tnum.c: Recover to each timestamp and check." + for { set i $niter } { $i >= 0 } { incr i -1 } { + + # Run db_recover. + set t [clock format $timeof($i) -format "%y%m%d%H%M.%S"] + # puts $t + berkdb debug_check + set ret [catch {exec $util_path/db_recover -h $testdir -t $t} r] + error_check_good db_recover($i,$t,$r) $ret 0 + + # Now open the db and check the timestamp. + set db [eval {berkdb_open} $args $testdir/$testfile] + error_check_good db_open($i) [is_valid_db $db] TRUE + + set dbt [$db get $key] + set datum [lindex [lindex $dbt 0] 1] + error_check_good timestamp_recover $datum [pad_data $method $i] + + error_check_good db_close [$db close] 0 + } + + # Finally, recover to a time well before the first timestamp + # and well after the last timestamp. The latter should + # be just like the timestamp of the last test performed; + # the former should fail. + puts "\tRecd$tnum.d: Recover to before the first timestamp." + set t [clock format [expr $timeof(0) - 1000] -format "%y%m%d%H%M.%S"] + set ret [catch {exec $util_path/db_recover -h $testdir -t $t} r] + error_check_bad db_recover(before,$t) $ret 0 + + puts "\tRecd$tnum.e: Recover to after the last timestamp." + set t [clock format \ + [expr $timeof($niter) + 1000] -format "%y%m%d%H%M.%S"] + set ret [catch {exec $util_path/db_recover -h $testdir -t $t} r] + error_check_good db_recover(after,$t) $ret 0 + + # Now open the db and check the timestamp. + set db [eval {berkdb_open} $args $testdir/$testfile] + error_check_good db_open(after) [is_valid_db $db] TRUE + + set dbt [$db get $key] + set datum2 [lindex [lindex $dbt 0] 1] + + error_check_good timestamp_recover $datum2 $datum + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/recd012.tcl b/test/tcl/recd012.tcl new file mode 100644 index 00000000..41d46110 --- /dev/null +++ b/test/tcl/recd012.tcl @@ -0,0 +1,434 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd012 +# TEST Test of log file ID management. [#2288] +# TEST Test recovery handling of file opens and closes. +proc recd012 { method {start 0} \ + {niter 49} {noutiter 25} {niniter 100} {ndbs 5} args } { + source ./include.tcl + + set tnum "012" + set pagesize 512 + + if { $is_qnx_test } { + set niter 40 + } + + puts "Recd$tnum $method ($args): Test recovery file management." + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Recd012: skipping for specific pagesizes" + return + } + + for { set i $start } { $i <= $niter } { incr i } { + env_cleanup $testdir + + # For repeatability, we pass in the iteration number + # as a parameter and use that in recd012_body to seed + # the random number generator to randomize our operations. + # This lets us re-run a potentially failing iteration + # without having to start from the beginning and work + # our way to it. + # + # The number of databases ranges from 4 to 8 and is + # a function of $niter + # set ndbs [expr ($i % 5) + 4] + + recd012_body \ + $method $ndbs $i $noutiter $niniter $pagesize $tnum $args + } +} + +proc recd012_body { method {ndbs 5} iter noutiter niniter psz tnum {largs ""} } { + global alphabet rand_init fixed_len recd012_ofkey recd012_ofckptkey + source ./include.tcl + + set largs [convert_args $method $largs] + set omethod [convert_method $method] + + puts "\tRecd$tnum $method ($largs): Iteration $iter" + puts "\t\tRecd$tnum.a: Create environment and $ndbs databases." + + # We run out of lockers during some of the recovery runs, so + # we need to make sure that we specify a DB_CONFIG that will + # give us enough lockers. + set f [open $testdir/DB_CONFIG w] + puts $f "set_lk_max_lockers 5000" + close $f + + set flags "-create -txn wrnosync -home $testdir" + set env_cmd "berkdb_env $flags" + error_check_good env_remove [berkdb envremove -home $testdir] 0 + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + # Initialize random number generator based on $iter. + berkdb srand [expr $iter + $rand_init] + + # Initialize database that keeps track of number of open files (so + # we don't run out of descriptors). + set ofname of.db + set txn [$dbenv txn] + error_check_good open_txn_begin [is_valid_txn $txn $dbenv] TRUE + set ofdb [berkdb_open -env $dbenv -txn $txn\ + -create -dup -mode 0644 -btree -pagesize 512 $ofname] + error_check_good of_open [is_valid_db $ofdb] TRUE + error_check_good open_txn_commit [$txn commit] 0 + set oftxn [$dbenv txn] + error_check_good of_txn [is_valid_txn $oftxn $dbenv] TRUE + error_check_good of_put [$ofdb put -txn $oftxn $recd012_ofkey 1] 0 + error_check_good of_put2 [$ofdb put -txn $oftxn $recd012_ofckptkey 0] 0 + error_check_good of_put3 [$ofdb put -txn $oftxn $recd012_ofckptkey 0] 0 + error_check_good of_txn_commit [$oftxn commit] 0 + error_check_good of_close [$ofdb close] 0 + + # Create ndbs databases to work in, and a file listing db names to + # pick from. + set f [open $testdir/dblist w] + + set oflags "-auto_commit -env $dbenv \ + -create -mode 0644 -pagesize $psz $largs $omethod" + for { set i 0 } { $i < $ndbs } { incr i } { + # 50-50 chance of being a subdb, unless we're a queue or partitioned. + if { [berkdb random_int 0 1] || \ + [is_queue $method] || [is_partitioned $largs] || + [is_heap $method] } { + # not a subdb + set dbname recd$tnum-$i.db + } else { + # subdb + set dbname "recd$tnum-subdb.db s$i" + } + puts $f $dbname + set db [eval {berkdb_open} $oflags $dbname] + error_check_good db($i) [is_valid_db $db] TRUE + error_check_good db($i)_close [$db close] 0 + } + close $f + error_check_good env_close [$dbenv close] 0 + + # Now we get to the meat of things. Our goal is to do some number + # of opens, closes, updates, and shutdowns (simulated here by a + # close of all open handles and a close/reopen of the environment, + # with or without an envremove), matching the regular expression + # + # ((O[OUC]+S)+R+V) + # + # We'll repeat the inner + a random number up to $niniter times, + # and the outer + a random number up to $noutiter times. + # + # In order to simulate shutdowns, we'll perform the opens, closes, + # and updates in a separate process, which we'll exit without closing + # all handles properly. The environment will be left lying around + # before we run recovery 50% of the time. + set out [berkdb random_int 1 $noutiter] + puts \ + "\t\tRecd$tnum.b: Performing $out recoveries of up to $niniter ops." + for { set i 0 } { $i < $out } { incr i } { + set child [open "|$tclsh_path" w] + + # For performance, don't source everything, + # just what we'll need. + puts $child "load $tcllib" + puts $child "set fixed_len $fixed_len" + puts $child "source $test_path/testutils.tcl" + puts $child "source $test_path/recd$tnum.tcl" + + set rnd [expr $iter * 10000 + $i * 100 + $rand_init] + + # Go. + berkdb debug_check + puts $child "recd012_dochild {$env_cmd} $rnd $i $niniter\ + $ndbs $tnum $method $ofname $largs" + close $child + + # Run recovery 0-3 times. + set nrecs [berkdb random_int 0 3] + for { set j 0 } { $j < $nrecs } { incr j } { + berkdb debug_check + set ret [catch {exec $util_path/db_recover \ + -h $testdir} res] + if { $ret != 0 } { + puts "FAIL: db_recover returned with nonzero\ + exit status, output as follows:" + file mkdir /tmp/12out + set fd [open /tmp/12out/[pid] w] + puts $fd $res + close $fd + } + error_check_good recover($j) $ret 0 + } + } + + # Run recovery one final time; it doesn't make sense to + # check integrity if we do not. + set ret [catch {exec $util_path/db_recover -h $testdir} res] + if { $ret != 0 } { + puts "FAIL: db_recover returned with nonzero\ + exit status, output as follows:" + puts $res + } + + # Make sure each datum is the correct filename. + puts "\t\tRecd$tnum.c: Checking data integrity." + set dbenv [berkdb_env -create -private -home $testdir] + error_check_good env_open_integrity [is_valid_env $dbenv] TRUE + set f [open $testdir/dblist r] + set i 0 + while { [gets $f dbinfo] > 0 } { + set db [eval berkdb_open -env $dbenv $largs $dbinfo] + error_check_good dbopen($dbinfo) [is_valid_db $db] TRUE + + set dbc [$db cursor] + error_check_good cursor [is_valid_cursor $dbc $db] TRUE + + for { set dbt [$dbc get -first] } { [llength $dbt] > 0 } \ + { set dbt [$dbc get -next] } { + error_check_good integrity [lindex [lindex $dbt 0] 1] \ + [pad_data $method $dbinfo] + } + error_check_good dbc_close [$dbc close] 0 + error_check_good db_close [$db close] 0 + } + close $f + error_check_good env_close_integrity [$dbenv close] 0 + + # Verify + error_check_good verify \ + [verify_dir $testdir "\t\tRecd$tnum.d: " 0 0 1] 0 +} + +proc recd012_dochild { env_cmd rnd outiter niniter ndbs tnum method\ + ofname args } { + global recd012_ofkey + source ./include.tcl + if { [is_record_based $method] } { + set keybase "" + } else { + set keybase .[repeat abcdefghijklmnopqrstuvwxyz 4] + } + + # Initialize our random number generator, repeatably based on an arg. + berkdb srand $rnd + + # Open our env. + set dbenv [eval $env_cmd] + error_check_good env_open [is_valid_env $dbenv] TRUE + + # Find out how many databases appear to be open in the log--we + # don't want recovery to run out of filehandles. + set txn [$dbenv txn] + error_check_good child_txn_begin [is_valid_txn $txn $dbenv] TRUE + set ofdb [berkdb_open -env $dbenv -txn $txn $ofname] + error_check_good child_txn_commit [$txn commit] 0 + + set oftxn [$dbenv txn] + error_check_good of_txn [is_valid_txn $oftxn $dbenv] TRUE + set dbt [$ofdb get -txn $oftxn $recd012_ofkey] + error_check_good of_get [lindex [lindex $dbt 0] 0] $recd012_ofkey + set nopenfiles [lindex [lindex $dbt 0] 1] + + error_check_good of_commit [$oftxn commit] 0 + + # Read our dbnames + set f [open $testdir/dblist r] + set i 0 + while { [gets $f dbname($i)] > 0 } { + incr i + } + close $f + + # We now have $ndbs extant databases. + # Open one of them, just to get us started. + set opendbs {} + set oflags "-env $dbenv $args" + + # Start a transaction, just to get us started. + set curtxn [$dbenv txn] + error_check_good txn [is_valid_txn $curtxn $dbenv] TRUE + + # Inner loop. Do $in iterations of a random open, close, or + # update, where $in is between 1 and $niniter. + set in [berkdb random_int 1 $niniter] + for { set j 0 } { $j < $in } { incr j } { + set op [berkdb random_int 0 2] + switch $op { + 0 { + # Open. + recd012_open + } + 1 { + # Update. Put random-number$keybase as key, + # filename as data, into random database. + set num_open [llength $opendbs] + if { $num_open == 0 } { + # If none are open, do an open first. + recd012_open + set num_open [llength $opendbs] + } + set n [berkdb random_int 0 [expr $num_open - 1]] + set pair [lindex $opendbs $n] + set udb [lindex $pair 0] + set uname [lindex $pair 1] + + set key [berkdb random_int 1000 1999]$keybase + set data [chop_data $method $uname] + error_check_good put($uname,$udb,$key,$data) \ + [$udb put -txn $curtxn $key $data] 0 + + # One time in four, commit the transaction. + if { [berkdb random_int 0 3] == 0 && 0 } { + error_check_good txn_recommit \ + [$curtxn commit] 0 + set curtxn [$dbenv txn] + error_check_good txn_reopen \ + [is_valid_txn $curtxn $dbenv] TRUE + } + } + 2 { + # Close. + if { [llength $opendbs] == 0 } { + # If none are open, open instead of closing. + recd012_open + continue + } + + # Commit curtxn first, lest we self-deadlock. + error_check_good txn_recommit [$curtxn commit] 0 + + # Do it. + set which [berkdb random_int 0 \ + [expr [llength $opendbs] - 1]] + + set db [lindex [lindex $opendbs $which] 0] + error_check_good db_choice [is_valid_db $db] TRUE + global errorCode errorInfo + + error_check_good db_close \ + [[lindex [lindex $opendbs $which] 0] close] 0 + + set opendbs [lreplace $opendbs $which $which] + incr nopenfiles -1 + + # Reopen txn. + set curtxn [$dbenv txn] + error_check_good txn_reopen \ + [is_valid_txn $curtxn $dbenv] TRUE + } + } + + # One time in two hundred, checkpoint. + if { [berkdb random_int 0 199] == 0 } { + puts "\t\t\tRecd$tnum:\ + Random checkpoint after operation $outiter.$j." + error_check_good txn_ckpt \ + [$dbenv txn_checkpoint] 0 + set nopenfiles \ + [recd012_nopenfiles_ckpt $dbenv $ofdb $nopenfiles] + } + } + + # We have to commit curtxn. It'd be kind of nice not to, but + # if we start in again without running recovery, we may block + # ourselves. + error_check_good curtxn_commit [$curtxn commit] 0 + + # Put back the new number of open files. + set oftxn [$dbenv txn] + error_check_good of_txn [is_valid_txn $oftxn $dbenv] TRUE + error_check_good of_del [$ofdb del -txn $oftxn $recd012_ofkey] 0 + error_check_good of_put \ + [$ofdb put -txn $oftxn $recd012_ofkey $nopenfiles] 0 + error_check_good of_commit [$oftxn commit] 0 + error_check_good ofdb_close [$ofdb close] 0 +} + +proc recd012_open { } { + # This is basically an inline and has to modify curtxn, + # so use upvars. + upvar curtxn curtxn + upvar ndbs ndbs + upvar dbname dbname + upvar dbenv dbenv + upvar oflags oflags + upvar opendbs opendbs + upvar nopenfiles nopenfiles + + # Return without an open if we've already opened too many files-- + # we don't want to make recovery run out of filehandles. + if { $nopenfiles > 30 } { + #puts "skipping--too many open files" + return -code break + } + + # Commit curtxn first, lest we self-deadlock. + error_check_good txn_recommit \ + [$curtxn commit] 0 + + # Do it. + set which [berkdb random_int 0 [expr $ndbs - 1]] + + set db [eval berkdb_open -auto_commit $oflags $dbname($which)] + + lappend opendbs [list $db $dbname($which)] + + # Reopen txn. + set curtxn [$dbenv txn] + error_check_good txn_reopen [is_valid_txn $curtxn $dbenv] TRUE + + incr nopenfiles +} + +# Update the database containing the number of files that db_recover has +# to contend with--we want to avoid letting it run out of file descriptors. +# We do this by keeping track of the number of unclosed opens since the +# checkpoint before last. +# $recd012_ofkey stores this current value; the two dups available +# at $recd012_ofckptkey store the number of opens since the last checkpoint +# previous. +# Thus, if the current value is 17 when we do a checkpoint, and the +# stored values are 3 and 8, the new current value (which we return) +# is 14, and the new stored values are 8 and 6. +proc recd012_nopenfiles_ckpt { env db nopenfiles } { + global recd012_ofckptkey + set txn [$env txn] + error_check_good nopenfiles_ckpt_txn [is_valid_txn $txn $env] TRUE + + set dbc [$db cursor -txn $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + # Get the first ckpt value and delete it. + set dbt [$dbc get -set $recd012_ofckptkey] + error_check_good set [llength $dbt] 1 + + set discard [lindex [lindex $dbt 0] 1] + error_check_good del [$dbc del] 0 + + set nopenfiles [expr $nopenfiles - $discard] + + # Get the next ckpt value + set dbt [$dbc get -nextdup] + error_check_good set2 [llength $dbt] 1 + + # Calculate how many opens we've had since this checkpoint before last. + set onlast [lindex [lindex $dbt 0] 1] + set sincelast [expr $nopenfiles - $onlast] + + # Put this new number at the end of the dup set. + error_check_good put [$dbc put -keylast $recd012_ofckptkey $sincelast] 0 + + # We should never deadlock since we're the only one in this db. + error_check_good dbc_close [$dbc close] 0 + error_check_good txn_commit [$txn commit] 0 + + return $nopenfiles +} + +# globals -- it's not worth passing these around, as they're constants +set recd012_ofkey OPENFILES +set recd012_ofckptkey CKPTS diff --git a/test/tcl/recd013.tcl b/test/tcl/recd013.tcl new file mode 100644 index 00000000..71268a82 --- /dev/null +++ b/test/tcl/recd013.tcl @@ -0,0 +1,291 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd013 +# TEST Test of cursor adjustment on child transaction aborts. [#2373] +# +# XXX +# Other tests that cover more specific variants of the same issue +# are in the access method tests for now. This is probably wrong; we +# put this one here because they're closely based on and intertwined +# with other, non-transactional cursor stability tests that are among +# the access method tests, and because we need at least one test to +# fit under recd and keep logtrack from complaining. We'll sort out the mess +# later; the important thing, for now, is that everything that needs to gets +# tested. (This really shouldn't be under recd at all, since it doesn't +# run recovery!) +proc recd013 { method { nitems 100 } args } { + source ./include.tcl + global alphabet log_log_record_types + + set args [convert_args $method $args] + set omethod [convert_method $method] + set tnum "013" + set pgsz 512 + + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + + puts "Recd$tnum $method ($args): Test of aborted cursor adjustments." + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Recd013: skipping for specific pagesizes" + return + } + + set testfile recd$tnum.db + env_cleanup $testdir + + set i 0 + if { [is_record_based $method] == 1 } { + set keybase "" + } else { + set keybase "key" + } + + puts "\tRecd$tnum.a:\ + Create environment, database, and parent transaction." + set flags "-create -txn wrnosync -home $testdir" + + set env_cmd "berkdb_env $flags" + set env [eval $env_cmd] + error_check_good dbenv [is_valid_env $env] TRUE + + set oflags \ + "-auto_commit -env $env -create -mode 0644 -pagesize $pgsz $args $omethod" + set db [eval {berkdb_open} $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Create a database containing $nitems items, numbered with odds. + # We'll then put the even numbers during the body of the test. + set txn [$env txn] + error_check_good init_txn [is_valid_txn $txn $env] TRUE + for { set i 1 } { $i <= 2 * $nitems } { incr i 2 } { + set key $keybase$i + set data [chop_data $method $i$alphabet] + + # First, try to put the item in a child transaction, + # then abort and verify all the cursors we've done up until + # now. + set ctxn [$env txn -parent $txn] + error_check_good child_txn($i) [is_valid_txn $ctxn $env] TRUE + error_check_good fake_put($i) [$db put -txn $ctxn $key $data] 0 + error_check_good ctxn_abort($i) [$ctxn abort] 0 + for { set j 1 } { $j < $i } { incr j 2 } { + error_check_good dbc_get($j):1 [$dbc($j) get -current] \ + [list [list $keybase$j \ + [pad_data $method $j$alphabet]]] + } + + # Then put for real. + error_check_good init_put($i) [$db put -txn $txn $key $data] 0 + + # Set a cursor of the parent txn to each item. + set dbc($i) [$db cursor -txn $txn] + error_check_good dbc_getset($i) \ + [$dbc($i) get -set $key] \ + [list [list $keybase$i [pad_data $method $i$alphabet]]] + + # And verify all the cursors, including the one we just + # created. + for { set j 1 } { $j <= $i } { incr j 2 } { + error_check_good dbc_get($j):2 [$dbc($j) get -current] \ + [list [list $keybase$j \ + [pad_data $method $j$alphabet]]] + } + } + + puts "\t\tRecd$tnum.a.1: Verify cursor stability after init." + for { set i 1 } { $i <= 2 * $nitems } { incr i 2 } { + error_check_good dbc_get($i):3 [$dbc($i) get -current] \ + [list [list $keybase$i [pad_data $method $i$alphabet]]] + } + + puts "\tRecd$tnum.b: Put test." + puts "\t\tRecd$tnum.b.1: Put items." + set ctxn [$env txn -parent $txn] + error_check_good txn [is_valid_txn $ctxn $env] TRUE + for { set i 2 } { $i <= 2 * $nitems } { incr i 2 } { + set key $keybase$i + set data [chop_data $method $i$alphabet] + error_check_good child_put($i) [$db put -txn $ctxn $key $data] 0 + + # If we're a renumbering recno, this is uninteresting. + # Stir things up by putting a few additional records at + # the beginning. + if { [is_rrecno $method] == 1 } { + set curs [$db cursor -txn $ctxn] + error_check_bad llength_get_first \ + [llength [$curs get -first]] 0 + error_check_good cursor [is_valid_cursor $curs $db] TRUE + # expect a recno! + error_check_good rrecno_put($i) \ + [$curs put -before ADDITIONAL.$i] 1 + error_check_good curs_close [$curs close] 0 + } + } + + puts "\t\tRecd$tnum.b.2: Verify cursor stability after abort." + error_check_good ctxn_abort [$ctxn abort] 0 + + for { set i 1 } { $i <= 2 * $nitems } { incr i 2 } { + error_check_good dbc_get($i):4 [$dbc($i) get -current] \ + [list [list $keybase$i [pad_data $method $i$alphabet]]] + } + + # Clean up cursors. + for { set i 1 } { $i <= 2 * $nitems } { incr i 2 } { + error_check_good dbc($i)_close [$dbc($i) close] 0 + } + + # Sync and verify. + error_check_good txn_commit [$txn commit] 0 + set txn [$env txn] + error_check_good txn [is_valid_txn $txn $env] TRUE + + error_check_good db_sync [$db sync] 0 + error_check_good db_verify \ + [verify_dir $testdir "\t\tRecd$tnum.b.3: " 0 0 $nodump] 0 + + # Now put back all the even records, this time in the parent. + # Commit and re-begin the transaction so we can abort and + # get back to a nice full database. + for { set i 2 } { $i <= 2 * $nitems } { incr i 2 } { + set key $keybase$i + set data [chop_data $method $i$alphabet] + error_check_good child_put($i) [$db put -txn $txn $key $data] 0 + } + error_check_good txn_commit [$txn commit] 0 + set txn [$env txn] + error_check_good txn [is_valid_txn $txn $env] TRUE + + # Delete test. Set a cursor to each record. Delete the even ones + # in the parent and check cursor stability. Then open a child + # transaction, and delete the odd ones. Verify that the database + # is empty. + puts "\tRecd$tnum.c: Delete test." + unset dbc + + # Create cursors pointing at each item. + for { set i 1 } { $i <= 2 * $nitems } { incr i } { + set dbc($i) [$db cursor -txn $txn] + error_check_good dbc($i)_create [is_valid_cursor $dbc($i) $db] \ + TRUE + error_check_good dbc_getset($i) [$dbc($i) get -set $keybase$i] \ + [list [list $keybase$i [pad_data $method $i$alphabet]]] + } + + puts "\t\tRecd$tnum.c.1: Delete even items in child txn and abort." + + if { [is_rrecno $method] != 1 } { + set init 2 + set bound [expr 2 * $nitems] + set step 2 + } else { + # In rrecno, deletes will renumber the items, so we have + # to take that into account when we delete by recno. + set init 2 + set bound [expr $nitems + 1] + set step 1 + } + + set ctxn [$env txn -parent $txn] + for { set i $init } { $i <= $bound } { incr i $step } { + error_check_good del($i) [$db del -txn $ctxn $keybase$i] 0 + } + error_check_good ctxn_abort [$ctxn abort] 0 + + # Verify that no items are deleted. + for { set i 1 } { $i <= 2 * $nitems } { incr i } { + error_check_good dbc_get($i):5 [$dbc($i) get -current] \ + [list [list $keybase$i [pad_data $method $i$alphabet]]] + } + + puts "\t\tRecd$tnum.c.2: Delete even items in child txn and commit." + set ctxn [$env txn -parent $txn] + for { set i $init } { $i <= $bound } { incr i $step } { + error_check_good del($i) [$db del -txn $ctxn $keybase$i] 0 + } + error_check_good ctxn_commit [$ctxn commit] 0 + + # Verify that even items are deleted and odd items are not. + for { set i 1 } { $i <= 2 * $nitems } { incr i 2 } { + if { [is_rrecno $method] != 1 } { + set j $i + } else { + set j [expr ($i - 1) / 2 + 1] + } + error_check_good dbc_get($i):6 [$dbc($i) get -current] \ + [list [list $keybase$j [pad_data $method $i$alphabet]]] + } + for { set i 2 } { $i <= 2 * $nitems } { incr i 2 } { + error_check_good dbc_get($i):7 [$dbc($i) get -current] "" + } + + puts "\t\tRecd$tnum.c.3: Delete odd items in child txn." + + set ctxn [$env txn -parent $txn] + + for { set i 1 } { $i <= 2 * $nitems } { incr i 2 } { + if { [is_rrecno $method] != 1 } { + set j $i + } else { + # If this is an rrecno, just delete the first + # item repeatedly--the renumbering will make + # that delete everything. + set j 1 + } + error_check_good del($i) [$db del -txn $ctxn $keybase$j] 0 + } + + # Verify that everyone's deleted. + for { set i 1 } { $i <= 2 * $nitems } { incr i } { + error_check_good get_deleted($i) \ + [llength [$db get -txn $ctxn $keybase$i]] 0 + } + + puts "\t\tRecd$tnum.c.4: Verify cursor stability after abort." + error_check_good ctxn_abort [$ctxn abort] 0 + + # Verify that even items are deleted and odd items are not. + for { set i 1 } { $i <= 2 * $nitems } { incr i 2 } { + if { [is_rrecno $method] != 1 } { + set j $i + } else { + set j [expr ($i - 1) / 2 + 1] + } + error_check_good dbc_get($i):8 [$dbc($i) get -current] \ + [list [list $keybase$j [pad_data $method $i$alphabet]]] + } + for { set i 2 } { $i <= 2 * $nitems } { incr i 2 } { + error_check_good dbc_get($i):9 [$dbc($i) get -current] "" + } + + # Clean up cursors. + for { set i 1 } { $i <= 2 * $nitems } { incr i } { + error_check_good dbc($i)_close [$dbc($i) close] 0 + } + + # Sync and verify. + error_check_good db_sync [$db sync] 0 + error_check_good db_verify \ + [verify_dir $testdir "\t\tRecd$tnum.c.5: " 0 0 $nodump] 0 + + puts "\tRecd$tnum.d: Clean up." + error_check_good txn_commit [$txn commit] 0 + error_check_good db_close [$db close] 0 + error_check_good log_flush [$env log_flush] 0 + error_check_good env_close [$env close] 0 + error_check_good verify_dir \ + [verify_dir $testdir "\t\tRecd$tnum.d.1: " 0 0 $nodump] 0 + + if { $log_log_record_types == 1 } { + logtrack_read $testdir + } +} diff --git a/test/tcl/recd014.tcl b/test/tcl/recd014.tcl new file mode 100644 index 00000000..ad7de693 --- /dev/null +++ b/test/tcl/recd014.tcl @@ -0,0 +1,446 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd014 +# TEST This is a recovery test for create/delete of queue extents. We +# TEST then need to recover and make sure the file is correctly existing +# TEST or not, as the case may be. +proc recd014 { method args} { + global fixed_len + source ./include.tcl + + if { ![is_queueext $method] == 1 } { + puts "Recd014: Skipping for method $method" + return + } + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Recd014: skipping for specific pagesizes" + return + } + + set orig_fixed_len $fixed_len + # + # We will use 512-byte pages, to be able to control + # when extents get created/removed. + # + set fixed_len 300 + + set opts [convert_args $method $args] + set omethod [convert_method $method] + # + # We want to set -extent 1 instead of what + # convert_args gave us. + # + set exti [lsearch -exact $opts "-extent"] + incr exti + set opts [lreplace $opts $exti $exti 1] + + puts "Recd014: $method extent creation/deletion tests" + + # Create the database and environment. + env_cleanup $testdir + + set testfile recd014.db + set flags "-create -txn wrnosync -home $testdir" + + puts "\tRecd014.a: creating environment" + set env_cmd "berkdb_env $flags" + + puts "\tRecd014.b: Create test commit" + ext_recover_create $testdir $env_cmd $omethod \ + $opts $testfile commit + puts "\tRecd014.b: Create test abort" + ext_recover_create $testdir $env_cmd $omethod \ + $opts $testfile abort + + puts "\tRecd014.c: Consume test commit" + ext_recover_consume $testdir $env_cmd $omethod \ + $opts $testfile commit + puts "\tRecd014.c: Consume test abort" + ext_recover_consume $testdir $env_cmd $omethod \ + $opts $testfile abort + + set fixed_len $orig_fixed_len + puts "\tRecd014.d: Verify db_printlog can read logfile" + set tmpfile $testdir/printlog.out + set stat [catch {exec $util_path/db_printlog -h $testdir \ + > $tmpfile} ret] + error_check_good db_printlog $stat 0 + fileremove $tmpfile +} + +proc ext_recover_create { dir env_cmd method opts dbfile txncmd } { + global log_log_record_types + global fixed_len + global alphabet + source ./include.tcl + + # Keep track of the log types we've seen + if { $log_log_record_types == 1} { + logtrack_read $dir + } + + env_cleanup $dir + # Open the environment and set the copy/abort locations + set env [eval $env_cmd] + + set init_file $dir/$dbfile.init + set noenvflags "-create $method -mode 0644 -pagesize 512 $opts $dbfile" + set oflags "-env $env $noenvflags" + + set t [$env txn] + error_check_good txn_begin [is_valid_txn $t $env] TRUE + + set ret [catch {eval {berkdb_open} -txn $t $oflags} db] + error_check_good txn_commit [$t commit] 0 + + set t [$env txn] + error_check_good txn_begin [is_valid_txn $t $env] TRUE + + # + # The command to execute to create an extent is a put. + # We are just creating the first one, so our extnum is 0. + # extnum must be in the format that make_ext_file expects, + # but we just leave out the file name. + # + set extnum "/__dbq..0" + set data [chop_data $method [replicate $alphabet 512]] + puts "\t\tExecuting command" + set putrecno [$db put -txn $t -append $data] + error_check_good db_put $putrecno 1 + + # Sync the db so any changes to the file that are + # in mpool get written to the disk file before the + # diff. + puts "\t\tSyncing" + error_check_good db_sync [$db sync] 0 + + catch { file copy -force $dir/$dbfile $dir/$dbfile.afterop } res + copy_extent_file $dir $dbfile afterop + + error_check_good txn_$txncmd:$t [$t $txncmd] 0 + # + # If we don't abort, then we expect success. + # If we abort, we expect no file created. + # + set dbq [make_ext_filename $dir $dbfile $extnum] + error_check_good extput:exists1 [file exists $dbq] 1 + set ret [$db get $putrecno] + if {$txncmd == "abort"} { + # + # Operation was aborted. Verify our entry is not there. + # + puts "\t\tCommand executed and aborted." + error_check_good db_get [llength $ret] 0 + } else { + # + # Operation was committed, verify it exists. + # + puts "\t\tCommand executed and committed." + error_check_good db_get [llength $ret] 1 + catch { file copy -force $dir/$dbfile $init_file } res + copy_extent_file $dir $dbfile init + } + set t [$env txn] + error_check_good txn_begin [is_valid_txn $t $env] TRUE + error_check_good db_close [$db close] 0 + error_check_good txn_commit [$t commit] 0 + error_check_good env_close [$env close] 0 + + # + # Run recovery here. Should be a no-op. Verify that + # the file still does/n't exist when we are done. + # + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery (no-op) ... " + flush stdout + + set stat [catch {exec $util_path/db_recover -h $dir -c} result] + if { $stat == 1 } { + error "FAIL: Recovery error: $result." + return + } + puts "complete" + # + # Verify it did not change. + # + error_check_good extput:exists2 [file exists $dbq] 1 + ext_create_check $dir $txncmd $init_file $dbfile $noenvflags $putrecno + + # + # Need a new copy to get the right LSN into the file. + # + catch { file copy -force $dir/$dbfile $init_file } res + copy_extent_file $dir $dbfile init + + # + # Undo. + # Now move the .afterop file to $dbfile. Run recovery again. + # + file copy -force $dir/$dbfile.afterop $dir/$dbfile + move_file_extent $dir $dbfile afterop copy + + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery (afterop) ... " + flush stdout + + set stat [catch {exec $util_path/db_recover -h $dir -c} result] + if { $stat == 1 } { + error "FAIL: Recovery error: $result." + return + } + puts "complete" + ext_create_check $dir $txncmd $init_file $dbfile $noenvflags $putrecno + + # + # To redo, remove the dbfiles. Run recovery again. + # + catch { file rename -force $dir/$dbfile $dir/$dbfile.renamed } res + copy_extent_file $dir $dbfile renamed rename + + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery (init) ... " + flush stdout + + set stat [catch {exec $util_path/db_recover -h $dir -c} result] + # + # !!! + # Even though db_recover exits with status 0, it should print out + # a warning because the file didn't exist. Db_recover writes this + # to stderr. Tcl assumes that ANYTHING written to stderr is an + # error, so even though we exit with 0 status, we still get an + # error back from 'catch'. Look for the warning. + # + if { $stat == 1 && [is_substr $result "warning"] == 0 } { + error "FAIL: Recovery error: $result." + return + } + puts "complete" + + # + # Verify it was redone. However, since we removed the files + # to begin with, recovery with abort will not recreate the + # extent. Recovery with commit will. + # + if {$txncmd == "abort"} { + error_check_good extput:exists3 [file exists $dbq] 0 + } else { + error_check_good extput:exists3 [file exists $dbq] 1 + } +} + +proc ext_create_check { dir txncmd init_file dbfile oflags putrecno } { + if { $txncmd == "commit" } { + # + # Operation was committed. Verify it did not change. + # + error_check_good \ + diff(initial,post-recover2):diff($init_file,$dir/$dbfile) \ + [dbdump_diff "-dar" $init_file $dir $dbfile] 0 + } else { + # + # Operation aborted. The file is there, but make + # sure the item is not. + # + set xdb [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $xdb] TRUE + set ret [$xdb get $putrecno] + error_check_good db_get [llength $ret] 0 + error_check_good db_close [$xdb close] 0 + } +} + +proc ext_recover_consume { dir env_cmd method opts dbfile txncmd} { + global log_log_record_types + global alphabet + source ./include.tcl + + # Keep track of the log types we've seen + if { $log_log_record_types == 1} { + logtrack_read $dir + } + + env_cleanup $dir + # Open the environment and set the copy/abort locations + set env [eval $env_cmd] + + set oflags "-create -auto_commit $method -mode 0644 -pagesize 512 \ + -env $env $opts $dbfile" + + # + # Open our db, add some data, close and copy as our + # init file. + # + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + + set extnum "/__dbq..0" + set data [chop_data $method [replicate $alphabet 512]] + + set txn [$env txn] + error_check_good txn_begin [is_valid_txn $txn $env] TRUE + set putrecno [$db put -txn $txn -append $data] + error_check_good db_put $putrecno 1 + error_check_good commit [$txn commit] 0 + error_check_good db_close [$db close] 0 + + puts "\t\tExecuting command" + + set init_file $dir/$dbfile.init + catch { file copy -force $dir/$dbfile $init_file } res + copy_extent_file $dir $dbfile init + + # + # If we don't abort, then we expect success. + # If we abort, we expect no file removed until recovery is run. + # + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + + set t [$env txn] + error_check_good txn_begin [is_valid_txn $t $env] TRUE + + set dbcmd "$db get -txn $t -consume" + set ret [eval $dbcmd] + error_check_good db_sync [$db sync] 0 + + catch { file copy -force $dir/$dbfile $dir/$dbfile.afterop } res + copy_extent_file $dir $dbfile afterop + + error_check_good txn_$txncmd:$t [$t $txncmd] 0 + error_check_good db_sync [$db sync] 0 + set dbq [make_ext_filename $dir $dbfile $extnum] + if {$txncmd == "abort"} { + # + # Operation was aborted, verify ext did not change. + # + puts "\t\tCommand executed and aborted." + + # + # Check that the file exists. Final state. + # Since we aborted the txn, we should be able + # to get to our original entry. + # + error_check_good postconsume.1 [file exists $dbq] 1 + error_check_good \ + diff(init,postconsume.2):diff($init_file,$dir/$dbfile)\ + [dbdump_diff "-dar" $init_file $dir $dbfile] 0 + } else { + # + # Operation was committed, verify it does + # not exist. + # + puts "\t\tCommand executed and committed." + # + # Check file existence. Consume operations remove + # the extent when we move off, which we should have + # done. + error_check_good consume_exists [file exists $dbq] 0 + } + error_check_good db_close [$db close] 0 + error_check_good env_close [$env close] 0 + + # + # Run recovery here on what we ended up with. Should be a no-op. + # + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery (no-op) ... " + flush stdout + + set stat [catch {exec $util_path/db_recover -h $dir -c} result] + if { $stat == 1 } { + error "FAIL: Recovery error: $result." + return + } + puts "complete" + if { $txncmd == "abort"} { + # + # Operation was aborted, verify it did not change. + # + error_check_good \ + diff(initial,post-recover1):diff($init_file,$dir/$dbfile) \ + [dbdump_diff "-dar" $init_file $dir $dbfile] 0 + } else { + # + # Operation was committed, verify it does + # not exist. Both operations should result + # in no file existing now that we've run recovery. + # + error_check_good after_recover1 [file exists $dbq] 0 + } + + # + # Run recovery here. Re-do the operation. + # Verify that the file doesn't exist + # (if we committed) or change (if we aborted) + # when we are done. + # + catch { file copy -force $dir/$dbfile $init_file } res + copy_extent_file $dir $dbfile init + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery (init) ... " + flush stdout + + set stat [catch {exec $util_path/db_recover -h $dir -c} result] + if { $stat == 1 } { + error "FAIL: Recovery error: $result." + return + } + puts "complete" + if { $txncmd == "abort"} { + # + # Operation was aborted, verify it did not change. + # + error_check_good \ + diff(initial,post-recover1):diff($init_file,$dir/$dbfile) \ + [dbdump_diff "-dar" $init_file $dir $dbfile] 0 + } else { + # + # Operation was committed, verify it does + # not exist. Both operations should result + # in no file existing now that we've run recovery. + # + error_check_good after_recover2 [file exists $dbq] 0 + } + + # + # Now move the .afterop file to $dbfile. Run recovery again. + # + set filecopy [glob $dir/*.afterop] + set afterop [lindex $filecopy 0] + file rename -force $afterop $dir/$dbfile + set afterop [string range $afterop \ + [expr [string last "/" $afterop] + 1] \ + [string last "." $afterop]] + move_file_extent $dir $dbfile afterop rename + + berkdb debug_check + puts -nonewline "\t\tAbout to run recovery (afterop) ... " + flush stdout + + set stat [catch {exec $util_path/db_recover -h $dir -c} result] + if { $stat == 1 } { + error "FAIL: Recovery error: $result." + return + } + puts "complete" + + if { $txncmd == "abort"} { + # + # Operation was aborted, verify it did not change. + # + error_check_good \ + diff(initial,post-recover2):diff($init_file,$dir/$dbfile) \ + [dbdump_diff "-dar" $init_file $dir $dbfile] 0 + } else { + # + # Operation was committed, verify it still does + # not exist. + # + error_check_good after_recover3 [file exists $dbq] 0 + } +} diff --git a/test/tcl/recd015.tcl b/test/tcl/recd015.tcl new file mode 100644 index 00000000..2110bb8e --- /dev/null +++ b/test/tcl/recd015.tcl @@ -0,0 +1,151 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd015 +# TEST This is a recovery test for testing lots of prepared txns. +# TEST This test is to force the use of txn_recover to call with the +# TEST DB_FIRST flag and then DB_NEXT. +proc recd015 { method args } { + source ./include.tcl + global rand_init + error_check_good set_random_seed [berkdb srand $rand_init] 0 + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Recd015: $method ($args) prepared txns test" + + # Create the database and environment. + + set numtxns 1 + set testfile NULL + + set env_cmd "berkdb_env -create -txn wrnosync -home $testdir" + set msg "\tRecd015.a" + foreach op { abort commit discard } { + puts "$msg: Simple test to prepare $numtxns txn with $op " + env_cleanup $testdir + recd015_body $env_cmd $testfile $numtxns $msg $op + } + + # + # Now test large numbers of prepared txns to test DB_NEXT + # on txn_recover. + # + set numtxns 10000 + set txnmax [expr $numtxns + 5] + set env_cmd "berkdb_env -create -txn_max $txnmax \ + -lock_max_lockers $txnmax -txn -home $testdir" + + set msg "\tRecd015.b" + foreach op { abort commit discard } { + puts "$msg: Large test to prepare $numtxns txn with $op" + env_cleanup $testdir + recd015_body $env_cmd $testfile $numtxns $msg $op + } + + set stat [catch {exec $util_path/db_printlog -h $testdir \ + > $testdir/LOG } ret] + error_check_good db_printlog $stat 0 + fileremove $testdir/LOG +} + +proc recd015_body { env_cmd testfile numtxns msg op } { + source ./include.tcl + + sentinel_init + set gidf $testdir/gidfile + fileremove -f $gidf + set pidlist {} + puts "$msg.0: Executing child script to prepare txns" + berkdb debug_check + set p [exec $tclsh_path $test_path/wrap.tcl recd15scr.tcl \ + $testdir/recdout $env_cmd $testfile $gidf $numtxns &] + + lappend pidlist $p + watch_procs $pidlist 5 + set f1 [open $testdir/recdout r] + set r [read $f1] + puts $r + close $f1 + fileremove -f $testdir/recdout + + berkdb debug_check + puts -nonewline "$msg.1: Running recovery ... " + flush stdout + berkdb debug_check + set env [eval $env_cmd -recover] + error_check_good dbenv-recover [is_valid_env $env] TRUE + puts "complete" + + puts "$msg.2: getting txns from txn_recover" + set txnlist [$env txn_recover] + error_check_good txnlist_len [llength $txnlist] $numtxns + + set gfd [open $gidf r] + set i 0 + while { [gets $gfd gid] != -1 } { + set gids($i) $gid + incr i + } + close $gfd + # + # Make sure we have as many as we expect + error_check_good num_gids $i $numtxns + + set i 0 + puts "$msg.3: comparing GIDs and $op txns" + foreach tpair $txnlist { + set txn [lindex $tpair 0] + set gid [lindex $tpair 1] + error_check_good gidcompare $gid $gids($i) + error_check_good txn:$op [$txn $op] 0 + incr i + } + if { $op != "discard" } { + error_check_good envclose [$env close] 0 + return + } + # + # If we discarded, now do it again and randomly resolve some + # until all txns are resolved. + # + puts "$msg.4: resolving/discarding txns" + set txnlist [$env txn_recover] + set len [llength $txnlist] + set opval(1) "abort" + set opcnt(1) 0 + set opval(2) "commit" + set opcnt(2) 0 + set opval(3) "discard" + set opcnt(3) 0 + while { $len != 0 } { + set opicnt(1) 0 + set opicnt(2) 0 + set opicnt(3) 0 + # + # Abort/commit or discard them randomly until + # all are resolved. + # + for { set i 0 } { $i < $len } { incr i } { + set t [lindex $txnlist $i] + set txn [lindex $t 0] + set newop [berkdb random_int 1 3] + set ret [$txn $opval($newop)] + error_check_good txn_$opval($newop):$i $ret 0 + incr opcnt($newop) + incr opicnt($newop) + } +# puts "$opval(1): $opicnt(1) Total: $opcnt(1)" +# puts "$opval(2): $opicnt(2) Total: $opcnt(2)" +# puts "$opval(3): $opicnt(3) Total: $opcnt(3)" + + set txnlist [$env txn_recover] + set len [llength $txnlist] + } + + error_check_good envclose [$env close] 0 +} diff --git a/test/tcl/recd016.tcl b/test/tcl/recd016.tcl new file mode 100644 index 00000000..983acc63 --- /dev/null +++ b/test/tcl/recd016.tcl @@ -0,0 +1,180 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd016 +# TEST Test recovery after checksum error. +proc recd016 { method args} { + global fixed_len + global log_log_record_types + global datastr + source ./include.tcl + + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Recd016: skipping for specific pagesizes" + return + } + if { [is_queueext $method] == 1 || [is_partitioned $args]} { + puts "Recd016: skipping for method $method" + return + } + + puts "Recd016: $method recovery after checksum error" + + # Create the database and environment. + env_cleanup $testdir + + set testfile recd016.db + set flags "-create -txn wrnosync -home $testdir" + + puts "\tRecd016.a: creating environment" + set env_cmd "berkdb_env $flags" + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + set pgsize 512 + set orig_fixed_len $fixed_len + set fixed_len [expr $pgsize / 4] + set opts [convert_args $method $args] + set omethod [convert_method $method] + set oflags "-create $omethod -mode 0644 \ + -auto_commit -chksum -pagesize $pgsize $opts $testfile" + set db [eval {berkdb_open} -env $dbenv $oflags] + + # + # Put some data. + # + set nument 50 + puts "\tRecd016.b: Put some data" + for { set i 1 } { $i <= $nument } { incr i } { + # Use 'i' as key so method doesn't matter + set key $i + set data $i$datastr + + # Put, in a txn. + set txn [$dbenv txn] + error_check_good txn_begin [is_valid_txn $txn $dbenv] TRUE + error_check_good db_put \ + [$db put -txn $txn $key [chop_data $method $data]] 0 + error_check_good txn_commit [$txn commit] 0 + } + error_check_good db_close [$db close] 0 + error_check_good log_flush [$dbenv log_flush] 0 + error_check_good env_close [$dbenv close] 0 + # + # We need to remove the env so that we don't get cached + # pages. + # + error_check_good env_remove [berkdb envremove -home $testdir] 0 + + puts "\tRecd016.c: Overwrite part of database" + # + # First just touch some bits in the file. We want to go + # through the paging system, so touch some data pages, + # like the middle of page 2. + # We should get a checksum error for the checksummed file. + # + set pg 2 + set fid [open $testdir/$testfile r+] + fconfigure $fid -translation binary + set seeklen [expr $pgsize * $pg + 200] + seek $fid $seeklen start + set byte [read $fid 1] + binary scan $byte c val + set newval [expr ~$val] + set newbyte [binary format c $newval] + seek $fid $seeklen start + puts -nonewline $fid $newbyte + close $fid + + # + # Verify we get the checksum error. When we get it, it should + # log the error as well, so when we run recovery we'll need to + # do catastrophic recovery. We do this in a sub-process so that + # the files are closed after the panic. + # + set f1 [open |$tclsh_path r+] + puts $f1 "source $test_path/test.tcl" + + set env_cmd "berkdb_env_noerr $flags" + set dbenv [send_cmd $f1 $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + set db [send_cmd $f1 "{berkdb_open_noerr} -env $dbenv $oflags"] + error_check_good db [is_valid_db $db] TRUE + + # We need to set non-blocking mode so that after each command + # we can read all the remaining output from that command and + # we can know what the output from one command is. + fconfigure $f1 -blocking 0 + set ret [read $f1] + set got_err 0 + for { set i 1 } { $i <= $nument } { incr i } { + set stat [send_cmd $f1 "catch {$db get $i} r"] + set getret [send_cmd $f1 "puts \$r"] + set ret [read $f1] + if { $stat == 1 } { + error_check_good dbget:fail [is_substr $getret \ + "checksum error: page $pg"] 1 + set got_err 1 + break + } else { + set key [lindex [lindex $getret 0] 0] + set data [lindex [lindex $getret 0] 1] + error_check_good keychk $key $i + error_check_good datachk $data \ + [pad_data $method $i$datastr] + } + } + error_check_good got_chksum $got_err 1 + set ret [send_cmd $f1 "$db close"] + set extra [read $f1] + error_check_good db:fail [is_substr $ret "run recovery"] 1 + + set ret [send_cmd $f1 "$dbenv close"] + error_check_good env_close:fail [is_substr $ret "handles still open"] 1 + close $f1 + + # Keep track of the log types we've seen + if { $log_log_record_types == 1} { + logtrack_read $testdir + } + + puts "\tRecd016.d: Run normal recovery" + set ret [catch {exec $util_path/db_recover -h $testdir} r] + error_check_good db_recover $ret 1 + error_check_good dbrec:fail \ + [is_substr $r "checksum error"] 1 + + catch {fileremove $testdir/$testfile} ret + puts "\tRecd016.e: Run catastrophic recovery" + set ret [catch {exec $util_path/db_recover -c -h $testdir} r] + error_check_good db_recover $ret 0 + + # + # Now verify the data was reconstructed correctly. + # + set env_cmd "berkdb_env_noerr $flags" + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + set db [eval {berkdb_open} -env $dbenv $oflags] + error_check_good db [is_valid_db $db] TRUE + + for { set i 1 } { $i <= $nument } { incr i } { + set stat [catch {$db get $i} ret] + error_check_good stat $stat 0 + set key [lindex [lindex $ret 0] 0] + set data [lindex [lindex $ret 0] 1] + error_check_good keychk $key $i + error_check_good datachk $data [pad_data $method $i$datastr] + } + error_check_good db_close [$db close] 0 + error_check_good log_flush [$dbenv log_flush] 0 + error_check_good env_close [$dbenv close] 0 + set fixed_len $orig_fixed_len + return +} diff --git a/test/tcl/recd017.tcl b/test/tcl/recd017.tcl new file mode 100644 index 00000000..bc76e625 --- /dev/null +++ b/test/tcl/recd017.tcl @@ -0,0 +1,157 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd017 +# TEST Test recovery and security. This is basically a watered +# TEST down version of recd001 just to verify that encrypted environments +# TEST can be recovered. +proc recd017 { method {select 0} args} { + global fixed_len + global encrypt + global passwd + global has_crypto + source ./include.tcl + + # Skip test if release does not support encryption. + if { $has_crypto == 0 } { + puts "Skipping recd017 for non-crypto release." + return + } + + set orig_fixed_len $fixed_len + set opts [convert_args $method $args] + set omethod [convert_method $method] + + puts "Recd017: $method operation/transaction tests" + + # Create the database and environment. + env_cleanup $testdir + + # The recovery tests were originally written to + # do a command, abort, do it again, commit, and then + # repeat the sequence with another command. Each command + # tends to require that the previous command succeeded and + # left the database a certain way. To avoid cluttering up the + # op_recover interface as well as the test code, we create two + # databases; one does abort and then commit for each op, the + # other does prepare, prepare-abort, and prepare-commit for each + # op. If all goes well, this allows each command to depend + # exactly one successful iteration of the previous command. + set testfile recd017.db + set testfile2 recd017-2.db + + set flags "-create -encryptaes $passwd -txn wrnosync -home $testdir" + + puts "\tRecd017.a.0: creating environment" + set env_cmd "berkdb_env $flags" + convert_encrypt $env_cmd + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + # + # We need to create a database to get the pagesize (either + # the default or whatever might have been specified). + # Then remove it so we can compute fixed_len and create the + # real database. + set oflags "-create -auto_commit $omethod -mode 0644 \ + -env $dbenv -encrypt $opts $testfile" + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set stat [$db stat] + # + # Compute the fixed_len based on the pagesize being used. + # We want the fixed_len to be 1/4 the pagesize. + # + set pg [get_pagesize $stat] + error_check_bad get_pagesize $pg -1 + set fixed_len [expr $pg / 4] + error_check_good db_close [$db close] 0 + error_check_good dbremove [berkdb dbremove -env $dbenv $testfile] 0 + + # Convert the args again because fixed_len is now real. + # Create the databases and close the environment. + # cannot specify db truncate in txn protected env!!! + set opts [convert_args $method $args] + convert_encrypt $env_cmd + set omethod [convert_method $method] + set oflags "-create -auto_commit $omethod -mode 0644 \ + -env $dbenv -encrypt $opts $testfile" + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + + set oflags "-create -auto_commit $omethod -mode 0644 \ + -env $dbenv -encrypt $opts $testfile2" + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + + error_check_good env_close [$dbenv close] 0 + + puts "\tRecd017.a.1: Verify db_printlog can read logfile" + set tmpfile $testdir/printlog.out + set stat [catch {exec $util_path/db_printlog -h $testdir -P $passwd \ + > $tmpfile} ret] + error_check_good db_printlog $stat 0 + fileremove $tmpfile + + # List of recovery tests: {CMD MSG} pairs. + set rlist { + { {DB put -txn TXNID $key $data} "Recd017.b: put"} + { {DB del -txn TXNID $key} "Recd017.c: delete"} + } + + # These are all the data values that we're going to need to read + # through the operation table and run the recovery tests. + + if { [is_record_based $method] == 1 } { + set key 1 + } else { + set key recd017_key + } + set data recd017_data + foreach pair $rlist { + set cmd [subst [lindex $pair 0]] + set msg [lindex $pair 1] + if { $select != 0 } { + set tag [lindex $msg 0] + set tail [expr [string length $tag] - 2] + set tag [string range $tag $tail $tail] + if { [lsearch $select $tag] == -1 } { + continue + } + } + + if { [is_queue $method] != 1 } { + if { [string first append $cmd] != -1 } { + continue + } + if { [string first consume $cmd] != -1 } { + continue + } + } + +# if { [is_fixed_length $method] == 1 } { +# if { [string first partial $cmd] != -1 } { +# continue +# } +# } + op_recover abort $testdir $env_cmd $testfile $cmd $msg $args + op_recover commit $testdir $env_cmd $testfile $cmd $msg $args + # + # Note that since prepare-discard ultimately aborts + # the txn, it must come before prepare-commit. + # + op_recover prepare-abort $testdir $env_cmd $testfile2 \ + $cmd $msg $args + op_recover prepare-discard $testdir $env_cmd $testfile2 \ + $cmd $msg $args + op_recover prepare-commit $testdir $env_cmd $testfile2 \ + $cmd $msg $args + } + set fixed_len $orig_fixed_len + return +} diff --git a/test/tcl/recd018.tcl b/test/tcl/recd018.tcl new file mode 100644 index 00000000..d3d86326 --- /dev/null +++ b/test/tcl/recd018.tcl @@ -0,0 +1,109 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd018 +# TEST Test recover of closely interspersed checkpoints and commits. +# +# This test is from the error case from #4230. +# +proc recd018 { method {ndbs 10} args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + set tnum "018" + + puts "Recd$tnum ($args): $method recovery of checkpoints and commits." + + set tname recd$tnum.db + env_cleanup $testdir + + set i 0 + if { [is_record_based $method] == 1 } { + set key 1 + set key2 2 + } else { + set key KEY + set key2 KEY2 + } + + puts "\tRecd$tnum.a: Create environment and database." + set flags "-create -txn wrnosync -home $testdir" + + set env_cmd "berkdb_env $flags" + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + set oflags "-auto_commit -env $dbenv -create -mode 0644 $args $omethod" + for { set i 0 } { $i < $ndbs } { incr i } { + set testfile $tname.$i + set db($i) [eval {berkdb_open} $oflags $testfile] + error_check_good dbopen [is_valid_db $db($i)] TRUE + set file $testdir/$testfile.init + catch { file copy -force $testdir/$testfile $file} res + copy_extent_file $testdir $testfile init + } + + # Main loop: Write a record or two to each database. + # Do a commit immediately followed by a checkpoint after each one. + error_check_good "Initial Checkpoint" [$dbenv txn_checkpoint] 0 + + puts "\tRecd$tnum.b Put/Commit/Checkpoint to $ndbs databases" + for { set i 0 } { $i < $ndbs } { incr i } { + set testfile $tname.$i + set data $i + + # Put, in a txn. + set txn [$dbenv txn] + error_check_good txn_begin [is_valid_txn $txn $dbenv] TRUE + error_check_good db_put \ + [$db($i) put -txn $txn $key [chop_data $method $data]] 0 + error_check_good txn_commit [$txn commit] 0 + error_check_good txn_checkpt [$dbenv txn_checkpoint] 0 + if { [expr $i % 2] == 0 } { + set txn [$dbenv txn] + error_check_good txn2 [is_valid_txn $txn $dbenv] TRUE + error_check_good db_put [$db($i) put \ + -txn $txn $key2 [chop_data $method $data]] 0 + error_check_good txn_commit [$txn commit] 0 + error_check_good txn_checkpt [$dbenv txn_checkpoint] 0 + } + error_check_good db_close [$db($i) close] 0 + set file $testdir/$testfile.afterop + catch { file copy -force $testdir/$testfile $file} res + copy_extent_file $testdir $testfile afterop + } + error_check_good env_close [$dbenv close] 0 + + # Now, loop through and recover to each timestamp, verifying the + # expected increment. + puts "\tRecd$tnum.c: Run recovery (no-op)" + set ret [catch {exec $util_path/db_recover -h $testdir} r] + error_check_good db_recover $ret 0 + + puts "\tRecd$tnum.d: Run recovery (initial file)" + for { set i 0 } {$i < $ndbs } { incr i } { + set testfile $tname.$i + set file $testdir/$testfile.init + catch { file copy -force $file $testdir/$testfile } res + move_file_extent $testdir $testfile init copy + } + + set ret [catch {exec $util_path/db_recover -h $testdir} r] + error_check_good db_recover $ret 0 + + puts "\tRecd$tnum.e: Run recovery (after file)" + for { set i 0 } {$i < $ndbs } { incr i } { + set testfile $tname.$i + set file $testdir/$testfile.afterop + catch { file copy -force $file $testdir/$testfile } res + move_file_extent $testdir $testfile afterop copy + } + + set ret [catch {exec $util_path/db_recover -h $testdir} r] + error_check_good db_recover $ret 0 + +} diff --git a/test/tcl/recd019.tcl b/test/tcl/recd019.tcl new file mode 100644 index 00000000..2d3c3f05 --- /dev/null +++ b/test/tcl/recd019.tcl @@ -0,0 +1,122 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd019 +# TEST Test txn id wrap-around and recovery. +proc recd019 { method {numid 50} args} { + global fixed_len + global txn_curid + global log_log_record_types + source ./include.tcl + + set orig_fixed_len $fixed_len + set opts [convert_args $method $args] + set omethod [convert_method $method] + + puts "Recd019: $method txn id wrap-around test" + + # Create the database and environment. + env_cleanup $testdir + + set testfile recd019.db + + set flags "-create -txn wrnosync -home $testdir" + + puts "\tRecd019.a: creating environment" + set env_cmd "berkdb_env $flags" + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + # Test txn wrapping. Force a txn_recycle msg. + # + set new_curid $txn_curid + set new_maxid [expr $new_curid + $numid] + error_check_good txn_id_set [$dbenv txn_id_set $new_curid $new_maxid] 0 + + # + # We need to create a database to get the pagesize (either + # the default or whatever might have been specified). + # Then remove it so we can compute fixed_len and create the + # real database. + set oflags "-create $omethod -mode 0644 \ + -env $dbenv $opts $testfile" + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + set stat [$db stat] + # + # Compute the fixed_len based on the pagesize being used. + # We want the fixed_len to be 1/4 the pagesize. + # + set pg [get_pagesize $stat] + error_check_bad get_pagesize $pg -1 + set fixed_len [expr $pg / 4] + error_check_good db_close [$db close] 0 + error_check_good dbremove [berkdb dbremove -env $dbenv $testfile] 0 + + # Convert the args again because fixed_len is now real. + # Create the databases and close the environment. + # cannot specify db truncate in txn protected env!!! + set opts [convert_args $method $args] + set omethod [convert_method $method] + set oflags "-create $omethod -mode 0644 \ + -env $dbenv -auto_commit $opts $testfile" + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + + # + # Force txn ids to wrap twice and then some. + # + set nument [expr $numid * 3 - 2] + puts "\tRecd019.b: Wrapping txn ids after $numid" + set file $testdir/$testfile.init + catch { file copy -force $testdir/$testfile $file} res + copy_extent_file $testdir $testfile init + for { set i 1 } { $i <= $nument } { incr i } { + # Use 'i' as key so method doesn't matter + set key $i + set data $i + + # Put, in a txn. + set txn [$dbenv txn] + error_check_good txn_begin [is_valid_txn $txn $dbenv] TRUE + error_check_good db_put \ + [$db put -txn $txn $key [chop_data $method $data]] 0 + error_check_good txn_commit [$txn commit] 0 + } + error_check_good db_close [$db close] 0 + set file $testdir/$testfile.afterop + catch { file copy -force $testdir/$testfile $file} res + copy_extent_file $testdir $testfile afterop + error_check_good env_close [$dbenv close] 0 + + # Keep track of the log types we've seen + if { $log_log_record_types == 1} { + logtrack_read $testdir + } + + # Now, loop through and recover. + puts "\tRecd019.c: Run recovery (no-op)" + set ret [catch {exec $util_path/db_recover -h $testdir} r] + error_check_good db_recover $ret 0 + + puts "\tRecd019.d: Run recovery (initial file)" + set file $testdir/$testfile.init + catch { file copy -force $file $testdir/$testfile } res + move_file_extent $testdir $testfile init copy + + set ret [catch {exec $util_path/db_recover -h $testdir} r] + error_check_good db_recover $ret 0 + + puts "\tRecd019.e: Run recovery (after file)" + set file $testdir/$testfile.afterop + catch { file copy -force $file $testdir/$testfile } res + move_file_extent $testdir $testfile afterop copy + + set ret [catch {exec $util_path/db_recover -h $testdir} r] + error_check_good db_recover $ret 0 + set fixed_len $orig_fixed_len + return +} diff --git a/test/tcl/recd020.tcl b/test/tcl/recd020.tcl new file mode 100644 index 00000000..3d56488b --- /dev/null +++ b/test/tcl/recd020.tcl @@ -0,0 +1,81 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd020 +# TEST Test creation of intermediate directories -- an +# TEST undocumented, UNIX-only feature. +# +proc recd020 { method args } { + source ./include.tcl + global tcl_platform + + set args [convert_args $method $args] + set omethod [convert_method $method] + set tnum "020" + set nentries 10 + + if { $tcl_platform(platform) != "unix" } { + puts "Skipping recd$tnum for non-UNIX platform." + return + } + + puts "Recd$tnum ($method):\ + Test creation of intermediate directories in recovery." + + # Create the original intermediate directory. + env_cleanup $testdir + set intdir INTDIR + file mkdir $testdir/$intdir + + set testfile recd$tnum.db + set flags "-create -txn -home $testdir" + + puts "\tRecd$tnum.a: Create environment and populate database." + set env_cmd "berkdb_env $flags" + set env [eval $env_cmd] + error_check_good env [is_valid_env $env] TRUE + + set db [eval berkdb_open \ + -create $omethod $args -env $env -auto_commit $intdir/$testfile] + error_check_good db_open [is_valid_db $db] TRUE + + set txn [$env txn] + set data "data" + for { set i 1 } { $i <= $nentries } { incr i } { + error_check_good db_put [eval \ + {$db put} -txn $txn $i [chop_data $method $data.$i]] 0 + } + error_check_good txn_commit [$txn commit] 0 + error_check_good db_close [$db close] 0 + error_check_good log_flush [$env log_flush] 0 + error_check_good env_close [$env close] 0 + + puts "\tRecd$tnum.b: Remove intermediate directory." + error_check_good directory_there [file exists $testdir/$intdir] 1 + file delete -force $testdir/$intdir + error_check_good directory_gone [file exists $testdir/$intdir] 0 + + puts "\tRecd020.c: Run recovery, recreating intermediate directory." + set env [eval $env_cmd -set_intermediate_dir_mode "rwxr-x--x" -recover] + error_check_good env [is_valid_env $env] TRUE + + puts "\tRecd020.d: Reopen test file to verify success." + set db [eval {berkdb_open} -env $env $args $intdir/$testfile] + error_check_good db_open [is_valid_db $db] TRUE + for { set i 1 } { $i <= $nentries } { incr i } { + set ret [$db get $i] + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_good key $k $i + error_check_good data $d [pad_data $method $data.$i] + } + + # Clean up. + error_check_good db_close [$db close] 0 + error_check_good log_flush [$env log_flush] 0 + error_check_good env_close [$env close] 0 + +} diff --git a/test/tcl/recd021.tcl b/test/tcl/recd021.tcl new file mode 100644 index 00000000..33267f4f --- /dev/null +++ b/test/tcl/recd021.tcl @@ -0,0 +1,278 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd021 +# TEST Test of failed opens in recovery. +# TEST +# TEST If a file was deleted through the file system (and not +# TEST within Berkeley DB), an error message should appear. +# TEST Test for regular files and subdbs. + +proc recd021 { method args } { + source ./include.tcl + global util_path + + set args [convert_args $method $args] + set omethod [convert_method $method] + set nentries 100 + + puts "\nRecd021: ($method)\ + Test failed opens in recovery." + + # The file ops "remove" and "rename" are done within + # Berkeley DB. A "delete" is done directly on the file + # system (as if the user deleted the file). + # + # First test regular files. + # + foreach op { remove rename delete noop } { + env_cleanup $testdir + puts "\tRecd021: Test $op of file in recovery." + + # Create transactional environment. + set env [berkdb_env -create -home $testdir -txn] + error_check_good is_valid_env [is_valid_env $env] TRUE + + # Create database + puts "\t\tRecd021.a.1: Create and populate file." + + if { $op == "rename" } { + set names {A B} + } else { + set names {A} + } + set name [lindex $names 0] + + set db [eval {berkdb_open \ + -create} $omethod $args -env $env -auto_commit $name.db] + error_check_good dba_open [is_valid_db $db] TRUE + + # Checkpoint. + error_check_good txn_checkpoint [$env txn_checkpoint] 0 + for { set i 1 } { $i <= $nentries } { incr i } { + error_check_good dba_put [$db put $i data$i] 0 + } + error_check_good dba_close [$db close] 0 + + # Do operation on file. + puts "\t\tRecd021.b: Do $op on file." + set txn [$env txn] + set ret [do_op $omethod $op $names $txn $env] + error_check_good do_op $ret 0 + error_check_good txn_commit [$txn commit] 0 + error_check_good env_close [$env close] 0 + + # Recover. + puts "\t\tRecd021.c: Recover." + set ret [catch {exec $util_path/db_recover -h $testdir} r] + if { $op == "delete" } { + error_check_good external_delete \ + [is_substr $r "Warning: open failed"] 1 + } else { + error_check_good $op $ret 0 + } + + # Clean up. + error_check_good \ + env_remove [berkdb envremove -force -home $testdir] 0 + fileremove -f $testdir/$name.db + } + + # Test subdbs. + if { [is_queue $method] == 1 } { + puts "Recd021: Skipping test of subdbs for method $method." + return + } + + # The first subdb test just does the op, and is comparable + # to the tests for regular files above. + set trunc 0 + set special {} + foreach op { remove rename delete noop } { + recd021_testsubdb $method $op $nentries $special $trunc $args + } + + # The remainder of the tests are executed first with the log intact, + # then with the log truncated at the __db_subdb_name record. + foreach trunc { 0 1 } { + # Test what happens if subdb2 reuses pages formerly in + # subdb1, after removing subdb1. + set special "reuse" + recd021_testsubdb $method remove $nentries $special $trunc $args + + # Test what happens if a new subdb reuses pages formerly + # in subdb1, after removing subdb1. + set special "newdb" + recd021_testsubdb $method remove $nentries $special $trunc $args + + # Now we test what happens if a new subdb if a different access + # method reuses pages formerly in subdb1, after removing subdb1. + set special "newtypedb" + recd021_testsubdb $method remove $nentries $special $trunc $args + } +} + +proc recd021_testsubdb { method op nentries special trunc largs } { + source ./include.tcl + global util_path + + set omethod [convert_method $method] + env_cleanup $testdir + + puts "\tRecd021: \ + Test $op of subdb in recovery ($special trunc = $trunc)." + + # Create transactional environment. + set env [berkdb_env -create -home $testdir -txn] + error_check_good is_valid_env [is_valid_env $env] TRUE + + # Create database with 2 subdbs + puts "\t\tRecd021.d: Create and populate subdbs." + set sname1 S1 + set sname2 S2 + if { $op == "rename" } { + set names {A S1 NEW_S1} + } elseif { $op == "delete" } { + set names {A} + } else { + set names {A S1} + } + set name [lindex $names 0] + + set sdb1 [eval {berkdb_open -create} $omethod \ + $largs -env $env -auto_commit $name.db $sname1] + error_check_good sdb1_open [is_valid_db $sdb1] TRUE + set sdb2 [eval {berkdb_open -create} $omethod \ + $largs -env $env -auto_commit $name.db $sname2] + error_check_good sdb2_open [is_valid_db $sdb2] TRUE + + # Checkpoint. + error_check_good txn_checkpoint [$env txn_checkpoint] 0 + for { set i 1 } { $i <= $nentries } { incr i } { + error_check_good sdb1_put [$sdb1 put $i data$i] 0 + } + set dumpfile dump.s1.$trunc + set ret [exec $util_path/db_dump -dar -f $dumpfile -h $testdir A.db] + for { set i 1 } { $i <= $nentries } { incr i } { + error_check_good sdb2_put [$sdb2 put $i data$i] 0 + } + error_check_good sdb1_close [$sdb1 close] 0 + + # Do operation on subdb. + puts "\t\tRecd021.e: Do $op on file." + set txn [$env txn] + + if { $trunc == 1 } { + # Create a log cursor to mark where we are before + # doing the op. + set logc [$env log_cursor] + set ret [lindex [$logc get -last] 0] + file copy -force $testdir/log.0000000001 $testdir/log.sav + } + + set ret [do_subdb_op $omethod $op $names $txn $env] + error_check_good do_subdb_op $ret 0 + error_check_good txn_commit [$txn commit] 0 + + if { $trunc == 1 } { + # Walk the log and find the __db_subdb_name entry. + set found 0 + while { $found == 0 } { + set lsn [lindex [$logc get -next] 0] + set lfile [lindex $lsn 0] + set loff [lindex $lsn 1] + set logrec [exec $util_path/db_printlog -h $testdir \ + -b $lfile/$loff -e $lfile/$loff] + if { [is_substr $logrec __db_subdb_name] == 1 } { + set found 1 + } + } + # Create the truncated log, and save it for later. + catch [exec dd if=$testdir/log.0000000001 \ + of=$testdir/log.sav count=$loff bs=1 >& /dev/null ] res + } + + # Here we do the "special" thing, if any. We always + # have to close sdb2, but when we do so varies. + switch -exact -- $special { + "" { + error_check_good sdb2_close [$sdb2 close] 0 + } + reuse { + for { set i [expr $nentries + 1] } \ + { $i <= [expr $nentries * 2]} { incr i } { + error_check_good sdb2_put \ + [$sdb2 put $i data$i] 0 + } + error_check_good sdb2_close [$sdb2 close] 0 + set dumpfile dump.s2.$trunc + set ret [exec $util_path/db_dump -dar \ + -f $dumpfile -h $testdir A.db] + } + newdb { + error_check_good sdb2_close [$sdb2 close] 0 + set sname3 S3 + set sdb3 [eval {berkdb_open -create} $omethod \ + $largs -env $env -auto_commit $name.db $sname3] + error_check_good sdb3_open [is_valid_db $sdb3] TRUE + for { set i 1 } { $i <= $nentries } { incr i } { + error_check_good sdb3_put \ + [$sdb3 put $i data$i] 0 + } + error_check_good sdb3_close [$sdb3 close] 0 + } + newtypedb { + error_check_good sdb2_close [$sdb2 close] 0 + set sname4 S4 + set newmethod [different_method $method] + set args [convert_args $newmethod] + set omethod [convert_method $newmethod] + set sdb4 [eval {berkdb_open -create} $omethod \ + $args -env $env -auto_commit $name.db $sname4] + error_check_good sdb4_open [is_valid_db $sdb4] TRUE + for { set i 1 } { $i <= $nentries } { incr i } { + error_check_good sdb4_put \ + [$sdb4 put $i data$i] 0 + } + error_check_good sdb4_close [$sdb4 close] 0 + } + } + + # Close the env. + error_check_good env_close [$env close] 0 + + if { $trunc == 1 } { + # Swap in the truncated log. + file rename -force $testdir/log.sav $testdir/log.0000000001 + } + + # Recover. + puts "\t\tRecd021.f: Recover." + set ret [catch {exec $util_path/db_recover -h $testdir} r] + if { $op == "delete" || $trunc == 1 && $special != "newdb" } { + error_check_good expect_warning \ + [is_substr $r "Warning: open failed"] 1 + } else { + error_check_good subdb_$op $ret 0 + } + + # Clean up. + error_check_good env_remove [berkdb envremove -force -home $testdir] 0 + fileremove -f $testdir/$name.db +} + +proc different_method { method } { + # Queue methods are omitted, since this is for subdb testing. + set methodlist { -btree -rbtree -recno -frecno -rrecno -hash } + + set method [convert_method $method] + set newmethod $method + while { $newmethod == $method } { + set index [berkdb random_int 0 [expr [llength $methodlist] - 1]] + set newmethod [lindex $methodlist $index] + } + return $newmethod +} diff --git a/test/tcl/recd022.tcl b/test/tcl/recd022.tcl new file mode 100644 index 00000000..eeda6b79 --- /dev/null +++ b/test/tcl/recd022.tcl @@ -0,0 +1,136 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd022 +# TEST Test that pages allocated by an aborted subtransaction +# TEST within an aborted prepared parent transaction are returned +# TEST to the free list after recovery. This exercises +# TEST __db_pg_prepare in systems without FTRUNCATE. [#7403] + +proc recd022 { method args} { + global log_log_record_types + global fixed_len + global is_hp_test + source ./include.tcl + + # Skip test for specified page sizes -- we want to + # specify our own page size. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Recd022: Skipping for specific pagesizes" + return + } + + # Skip the test for HP-UX, where we can't open an env twice. + if { $is_hp_test == 1 } { + puts "Recd022: Skipping for HP-UX." + return + } + + + # Increase size of fixed-length records to match other methods. + set orig_fixed_len $fixed_len + set fixed_len 53 + set opts [convert_args $method $args] + set omethod [convert_method $method] + + puts "Recd022: ($method) Page allocation and recovery" + puts "Recd022: with aborted prepared txns and child txns." + + # Create the database and environment. + env_cleanup $testdir + set testfile recd022.db + + puts "\tRecd022.a: creating environment" + # We open the env and database with _noerr so we don't + # get error messages when cleaning up at the end of the test. + set env_cmd "berkdb_env_noerr -create -txn -home $testdir" + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + # Open database with small pages. + puts "\tRecd022.b: creating database with small pages" + set pagesize 512 + set oflags "-create $omethod -mode 0644 -pagesize $pagesize \ + -env $dbenv -auto_commit $opts $testfile" + set db [eval {berkdb_open_noerr} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + + puts "\tRecd022.c: start transaction, put some data" + set iter 10 + set datasize 53 + set data [repeat "a" $datasize] + set iter2 [expr $iter * 2] + + # Start parent and child txns. + puts "\tRecd022.d: start child txn, put some data" + set parent [$dbenv txn] + set child1 [$dbenv txn -parent $parent] + + # Child puts some new data. + for { set i 1 } {$i <= $iter } { incr i } { + eval {$db put} -txn $child1 $i $data + } + + # Abort the child txn. + puts "\tRecd022.e: abort child txn" + error_check_good child1_abort [$child1 abort] 0 + + # Start a second child. Put some data, enough to allocate + # a new page, then delete it. + puts "\tRecd022.f: start second child txn, put some data" + set child2 [$dbenv txn -parent $parent] + for { set i 1 } { $i <= $iter2 } { incr i } { + eval {$db put} -txn $child2 $i $data + } + for { set i 1 } { $i <= $iter2 } { incr i } { + eval {$db del} -txn $child2 $i + } + + # Put back half the data. + for { set i 1 } { $i <= $iter } { incr i } { + eval {$db put} -txn $child2 $i $data + } + + # Commit second child + puts "\tRecd022.g: commit second child txn, prepare parent" + error_check_good child2_commit [$child2 commit] 0 + + # Prepare parent + error_check_good prepare [$parent prepare "ABC"] 0 + + # Recover, then abort the recovered parent txn + puts "\tRecd022.h: recover, then abort parent" + set env1 [berkdb_env -create -recover -home $testdir -txn] + set txnlist [$env1 txn_recover] + set aborttxn [lindex [lindex $txnlist 0] 0] + error_check_good parent_abort [$aborttxn abort] 0 + + # Verify database and then clean up. We still need to get + # rid of the handles created before recovery. + puts "\tRecd022.i: verify and clean up" + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + verify_dir $testdir "" 1 0 $nodump + set stat [catch {$db close} res] + error_check_good db_close [is_substr $res "run recovery"] 1 + error_check_good env1_close [$env1 close] 0 + set stat [catch {$dbenv close} res] + error_check_good dbenv_close [is_substr $res "run recovery"] 1 + + # Track the log types we've seen + if { $log_log_record_types == 1} { + logtrack_read $testdir + } + + # Set fixed_len back to the global value so we don't + # mess up other tests. + set fixed_len $orig_fixed_len + return +} diff --git a/test/tcl/recd023.tcl b/test/tcl/recd023.tcl new file mode 100644 index 00000000..f8b7beba --- /dev/null +++ b/test/tcl/recd023.tcl @@ -0,0 +1,91 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd023 +# TEST Test recover of reverse split. +# +proc recd023 { method args } { + source ./include.tcl + env_cleanup $testdir + set tnum "023" + + if { [is_btree $method] != 1 && [is_rbtree $method] != 1 } { + puts "Skipping recd$tnum for method $method" + return + } + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + + puts "Recd$tnum ($omethod $args): Recovery of reverse split." + set testfile recd$tnum.db + + puts "\tRecd$tnum.a: Create environment and database." + set flags "-create -txn -home $testdir" + + set env_cmd "berkdb_env $flags" + set env [eval $env_cmd] + error_check_good env [is_valid_env $env] TRUE + + set pagesize 512 + set oflags "$omethod -auto_commit \ + -pagesize $pagesize -create -mode 0644 $args" + set db [eval {berkdb_open} -env $env $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Write to database -- enough to fill at least 3 levels. + puts "\tRecd$tnum.b: Create a 3 level btree database." + set nentries 1000 + set datastr [repeat x 45] + for { set i 1 } { $i < $nentries } { incr i } { + set key a$i + set ret [$db put $key [chop_data $method $datastr]] + error_check_good put $ret 0 + } + + # Verify we have enough levels. + set levels [stat_field $db stat "Levels"] + error_check_good 3_levels [expr $levels >= 3] 1 + + # Save the original database. + file copy -force $testdir/$testfile $testdir/$testfile.save + + # Delete enough pieces to collapse the tree. + puts "\tRecd$tnum.c: Do deletes to collapse database." + for { set count 2 } { $count < 10 } { incr count } { + error_check_good db_del [$db del a$count] 0 + } + for { set count 15 } { $count < 100 } { incr count } { + error_check_good db_del [$db del a$count] 0 + } + for { set count 150 } { $count < 1000 } { incr count } { + error_check_good db_del [$db del a$count] 0 + } + + error_check_good db_close [$db close] 0 + error_check_good verify_dir\ + [verify_dir $testdir "\tRecd$tnum.d: " 0 0 $nodump] 0 + + # Overwrite the current database with the saved database. + file copy -force $testdir/$testfile.save $testdir/$testfile + error_check_good log_flush [$env log_flush] 0 + error_check_good env_close [$env close] 0 + + # Recover the saved database to roll forward and apply the deletes. + set env [berkdb_env -create -txn -home $testdir -recover] + error_check_good env_open [is_valid_env $env] TRUE + error_check_good log_flush [$env log_flush] 0 + error_check_good env_close [$env close] 0 + + error_check_good verify_dir\ + [verify_dir $testdir "\tRecd$tnum.e: " 0 0 $nodump] 0 +} diff --git a/test/tcl/recd024.tcl b/test/tcl/recd024.tcl new file mode 100644 index 00000000..b4b4e270 --- /dev/null +++ b/test/tcl/recd024.tcl @@ -0,0 +1,81 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd024 +# TEST Test recovery of streaming partial insert operations. These are +# TEST operations that do multiple partial puts that append to an existing +# TEST data item (as long as the data item is on an overflow page). +# TEST The interesting cases are: +# TEST * Simple streaming operations +# TEST * Operations that cause the overflow item to flow onto another page. +# TEST +proc recd024 { method args } { + source ./include.tcl + + # puts "$args" + set envargs "" + set pagesize 512 + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_record_based $method] == 1 } { + puts "Recd024 skipping for fixed length access methods." + return + } + set flags "-create -txn -home $testdir $envargs" + set env_cmd "berkdb_env $flags" + + set testfile recd024.db + set testfile2 recd024-2.db + if { [is_record_based $method] == 1 } { + set key 1 + } else { + set key recd024_key + } + + set len 512 + set part_data [replicate "abcdefgh" 64] + set p [list 0 $len] + # Insert one 512 byte data item prior to call. To get it off page. + # Append two more 512 byte data items, to enact the streaming code. + set cmd [subst \ + {DBC put -txn TXNID -partial "512 512" -current $part_data \ + NEW_CMD DBC put -txn TXNID -partial "1024 512" -current $part_data \ + NEW_CMD DBC put -txn TXNID -partial "1536 512" -current $part_data}] + set oflags "-create $omethod -mode 0644 $args \ + -pagesize $pagesize" + set msg "Recd024.a: partial put prepopulated/expanding" + foreach op {commit abort prepare-abort prepare-discard prepare-commit} { + env_cleanup $testdir + + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + set t [$dbenv txn] + error_check_good txn_begin [is_valid_txn $t $dbenv] TRUE + set db [eval {berkdb_open} \ + $oflags -env $dbenv -txn $t $testfile] + error_check_good db_open [is_valid_db $db] TRUE + set db2 [eval {berkdb_open} \ + $oflags -env $dbenv -txn $t $testfile2] + error_check_good db_open [is_valid_db $db2] TRUE + + set ret [$db put -txn $t -partial $p $key $part_data] + error_check_good dbput $ret 0 + + set ret [$db2 put -txn $t -partial $p $key $part_data] + error_check_good dbput $ret 0 + error_check_good txncommit [$t commit] 0 + error_check_good dbclose [$db close] 0 + error_check_good dbclose [$db2 close] 0 + error_check_good dbenvclose [$dbenv close] 0 + + op_recover $op $testdir $env_cmd $testfile $cmd $msg \ + $args + } + return +} + diff --git a/test/tcl/recd025.tcl b/test/tcl/recd025.tcl new file mode 100644 index 00000000..1a04d2c2 --- /dev/null +++ b/test/tcl/recd025.tcl @@ -0,0 +1,237 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST recd025 +# TEST Basic tests for transaction bulk loading and recovery. +# TEST In particular, verify that the tricky hot backup protocol works. + +# These tests check the following conditions + +# 1. We can abort a TXN_BULK transaction, both online and during recovery. +# 2. Tricky hot backup protocol works correctly +# - start bulk txn; populate, adding pages +# - set hotbackup_in_progress (forces chkpt, as bulk txn is in progress) +# - copy database file +# - populate more, adding more pages (with bulk optimization disabled) +# - commit +# - move copy back into test dir +# - roll forward, verify rolled-forward backup matches committed database + +# A more straightforward test of bulk transactions and hot backup is +# in the backup test. + +proc recd025 { method args } { + global fixed_len + global is_hp_test + source ./include.tcl + + # Skip test for specified page sizes -- we want to + # specify our own page size. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Recd025: Skipping for specific pagesizes" + return + } + + # Skip test for heap, as heap does not have BULK ops + if { [is_heap $method] == 1 } { + puts "Recd025 skipping for heap method." + return + } + + # Increase size of fixed-length records to match other methods. + # We pick an arbitrarily larger record size to ensure that we + # allocate several pages. + set orig_fixed_len $fixed_len + set fixed_len 53 + set opts [convert_args $method $args] + set omethod [convert_method $method] + + puts "Recd025: TXN_BULK page allocation and recovery" + + # Create the database and environment. + env_cleanup $testdir + set testfile recd025.db + + puts "\tRecd025.1a: creating environment" + set env_cmd "berkdb_env -create -txn -home $testdir" + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + # Open database with small pages. + puts "\tRecd025.1b: creating and populating database with small pages" + set pagesize 512 + set oflags "-create $omethod -mode 0644 -pagesize $pagesize \ + -env $dbenv -auto_commit $opts $testfile" + set db [eval {berkdb_open} $oflags] + + error_check_good db_open [is_valid_db $db] TRUE + + set batchsize 20 + set lim 0 + set iter 1 + set datasize 53 + set data [repeat "a" $datasize] + + set t [$dbenv txn] + + for {set lim [expr $lim + $batchsize]} {$iter <= $lim } {incr iter} { + eval {$db put} -txn $t $iter $data + } + error_check_good txn_commit [$t commit] 0 + + error_check_good sync:$db [$db sync] 0 + + # Make a copy of the database now, for comparison + + catch { + file copy -force $testdir/$testfile $testdir/$testfile.orig + } res + copy_extent_file $testdir $testfile orig + eval open_and_dump_file $testdir/$testfile.orig NULL \ + $testdir/dump.orig nop dump_file_direction "-first" "-next" $opts + + puts "\tRecd025.1c: start bulk transaction, put data, allocating pages" + + set t [$dbenv txn -txn_bulk] + + for {set lim [expr $lim + $batchsize]} {$iter <= $lim } {incr iter} { + eval {$db put} -txn $t $iter $data + } + + # A copy before aborting + error_check_good sync:$db [$db sync] 0 + catch { + file copy -force $testdir/$testfile $testdir/$testfile.preabort + } res + copy_extent_file $testdir $testfile preabort + + puts "\tRecd025.1d: abort bulk transaction; verify undo of puts" + + error_check_good txn_abort [$t abort] 0 + + error_check_good sync:$db [$db sync] 0 + + eval open_and_dump_file $testdir/$testfile NULL \ + $testdir/dump.postabort nop dump_file_direction "-first" "-next" $opts + + filesort $testdir/dump.orig $testdir/dump.orig.sort + filesort $testdir/dump.postabort $testdir/dump.postabort.sort + + error_check_good verify_abort_diff \ + [filecmp $testdir/dump.orig.sort $testdir/dump.postabort.sort] 0 + + error_check_good db_close [$db close] 0 + reset_env $dbenv + + puts "\tRecd025.1e: recovery with allocations rolled back" + + # Move the preabort file into place, and run recovery + + catch { + file copy -force $testdir/$testfile.preabort $testdir/$testfile + } res + + set stat [catch {eval exec $util_path/db_recover -h $testdir -c } res] + if { $stat == 1 } { + error "FAIL: Recovery error: $res." + } + + eval open_and_dump_file $testdir/$testfile NULL \ + $testdir/dump.postrecovery nop dump_file_direction "-first" "-next" $opts + filesort $testdir/dump.postrecovery $testdir/dump.postrecovery.sort + + error_check_good verify_abort_diff \ + [filecmp $testdir/dump.orig.sort $testdir/dump.postabort.sort] 0 + + + # Now for the really tricky hot backup test. + + puts "\tRecd025.3a: opening environment" + set env_cmd "berkdb_env -create -txn -home $testdir" + set dbenv [eval $env_cmd] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + # Open database + puts "\tRecd025.3b: opening database with small pages" + set oflags "$omethod -pagesize $pagesize \ + -env $dbenv -auto_commit $opts $testfile" + set db [eval {berkdb_open} $oflags] + + error_check_good db_open [is_valid_db $db] TRUE + + puts "\tRecd025.3c: start bulk transaction and add pages" + set t [$dbenv txn -txn_bulk] + + for {set lim [expr $lim + $batchsize]} {$iter <= $lim } {incr iter} { + eval {$db put} -txn $t $iter $data + } + + puts "\tRecd025.3d: Set hotbackup_in_progress, and copy the database" + + $dbenv set_flags -hotbackup_in_progress on + + catch { + file copy -force $testdir/$testfile $testdir/$testfile.hotcopy + } res + + puts "\tRecd025.3e: add more pages and commit" + + + for {set lim [expr $lim + $batchsize] } {$iter <= $lim } {incr iter} { + eval {$db put} -txn $t $iter $data + } + + error_check_good txn_commit [$t commit] 0 + + $dbenv set_flags -hotbackup_in_progress off + + error_check_good db_close [$db close] 0 + reset_env $dbenv + + # dump the finished product + + eval open_and_dump_file $testdir/$testfile NULL \ + $testdir/dump.final nop dump_file_direction "-first" "-next" $opts + + filesort $testdir/dump.final $testdir/dump.final.sort + + puts "\tRecd025.3f: roll forward the hot copy and compare" + + catch { + file copy -force $testdir/$testfile.hotcopy $testdir/$testfile + } res + + # Perform catastrophic recovery, to simulate hot backup behavior. + set stat [catch {eval exec $util_path/db_recover -h $testdir -c } res] + if { $stat == 1 } { + error "FAIL: Recovery error: $res." + } + + eval open_and_dump_file $testdir/$testfile NULL \ + $testdir/dump.recovered_copy nop dump_file_direction "-first" "-next" $opts + filesort $testdir/dump.recovered_copy $testdir/dump.recovered_copy.sort + + error_check_good verify_abort_diff \ + [filecmp $testdir/dump.final.sort $testdir/dump.recovered_copy.sort] 0 + + # Set fixed_len back to the global value so we don't + # mess up other tests. + set fixed_len $orig_fixed_len + return +} + + + + + + + + + + + + diff --git a/test/tcl/recd15scr.tcl b/test/tcl/recd15scr.tcl new file mode 100644 index 00000000..28e27238 --- /dev/null +++ b/test/tcl/recd15scr.tcl @@ -0,0 +1,73 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Recd15 - lots of txns - txn prepare script +# Usage: recd15script envcmd dbcmd gidf numtxns +# envcmd: command to open env +# dbfile: name of database file +# gidf: name of global id file +# numtxns: number of txns to start + +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl + +set usage "recd15script envcmd dbfile gidfile numtxns" + +# Verify usage +if { $argc != 4 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set envcmd [ lindex $argv 0 ] +set dbfile [ lindex $argv 1 ] +set gidfile [ lindex $argv 2 ] +set numtxns [ lindex $argv 3 ] + +set txnmax [expr $numtxns + 5] +set dbenv [eval $envcmd] +error_check_good envopen [is_valid_env $dbenv] TRUE + +set usedb 0 +if { $dbfile != "NULL" } { + set usedb 1 + set db [berkdb_open -auto_commit -env $dbenv $dbfile] + error_check_good dbopen [is_valid_db $db] TRUE +} + +puts "\tRecd015script.a: Begin $numtxns txns" +for {set i 0} {$i < $numtxns} {incr i} { + set t [$dbenv txn] + error_check_good txnbegin($i) [is_valid_txn $t $dbenv] TRUE + set txns($i) $t + if { $usedb } { + set dbc [$db cursor -txn $t] + error_check_good cursor($i) [is_valid_cursor $dbc $db] TRUE + set curs($i) $dbc + } +} + +puts "\tRecd015script.b: Prepare $numtxns txns" +set gfd [open $gidfile w+] +for {set i 0} {$i < $numtxns} {incr i} { + if { $usedb } { + set dbc $curs($i) + error_check_good dbc_close [$dbc close] 0 + } + set t $txns($i) + set gid [make_gid recd015script:$t] + puts $gfd $gid + error_check_good txn_prepare:$t [$t prepare $gid] 0 +} +close $gfd + +# +# We do not close the db or env, but exit with the txns outstanding. +# +puts "\tRecd015script completed successfully" +flush stdout diff --git a/test/tcl/recdscript.tcl b/test/tcl/recdscript.tcl new file mode 100644 index 00000000..da05da28 --- /dev/null +++ b/test/tcl/recdscript.tcl @@ -0,0 +1,37 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Recovery txn prepare script +# Usage: recdscript op dir envcmd dbfile cmd +# op: primary txn operation +# dir: test directory +# envcmd: command to open env +# dbfile: name of database file +# gidf: name of global id file +# cmd: db command to execute + +source ./include.tcl +source $test_path/test.tcl + +set usage "recdscript op dir envcmd dbfile gidfile cmd" + +# Verify usage +if { $argc < 6 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set op [ lindex $argv 0 ] +set dir [ lindex $argv 1 ] +set envcmd [ lindex $argv 2 ] +set dbfile [ lindex $argv 3 ] +set gidfile [ lindex $argv 4 ] +set cmd [ lindex $argv 5 ] +set args [ lindex $argv 6 ] + +eval {op_recover_prep $op $dir $envcmd $dbfile $gidfile $cmd} $args +flush stdout diff --git a/test/tcl/rep001.tcl b/test/tcl/rep001.tcl new file mode 100644 index 00000000..01b29fd5 --- /dev/null +++ b/test/tcl/rep001.tcl @@ -0,0 +1,238 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep001 +# TEST Replication rename and forced-upgrade test. +# TEST +# TEST Run rep_test in a replicated master environment. +# TEST Verify that the database on the client is correct. +# TEST Next, remove the database, close the master, upgrade the +# TEST client, reopen the master, and make sure the new master can +# TEST correctly run rep_test and propagate it in the other direction. + +proc rep001 { method { niter 1000 } { tnum "001" } args } { + global passwd + global has_crypto + global databases_in_memory + global repfiles_in_memory + global env_private + + source ./include.tcl + + if { $checking_valid_methods } { + return "ALL" + } + + # It's possible to run this test with in-memory databases. + set msg "with named databases" + if { $databases_in_memory } { + set msg "with in-memory named databases" + if { [is_queueext $method] == 1 } { + puts "Skipping rep$tnum for method $method" + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "and private env" + } + + # Run tests with and without recovery. If we're doing testing + # of in-memory logging, skip the combination of recovery + # and in-memory logging -- it doesn't make sense. + set logsets [create_logsets 2] + set saved_args $args + + foreach recopt $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $recopt == "-recover" && $logindex != -1 } { + puts "Skipping test with -recover for in-memory logs." + continue + } + set envargs "" + set args $saved_args + puts -nonewline "Rep$tnum: Replication sanity test " + puts "($method $recopt) $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep001_sub $method $niter $tnum $envargs $l $recopt $args + + # Skip encrypted tests if not supported. + if { $has_crypto == 0 || $databases_in_memory } { + continue + } + + # Run the same tests with security. In-memory + # databases don't work with encryption. + append envargs " -encryptaes $passwd " + append args " -encrypt " + puts "Rep$tnum: Replication and security sanity test\ + ($method $recopt)." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep001_sub $method \ + $niter $tnum $envargs $l $recopt $args + } + } +} + +proc rep001_sub { method niter tnum envargs logset recargs largs } { + source ./include.tcl + global testdir + global encrypt + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + set verify_subset \ + [expr { $m_logtype == "in-memory" || $c_logtype == "in-memory" }] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. Adjust the args for master + # and client. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create $repmemargs $privargs \ + -log_max 1000000 $envargs $m_logargs $recargs $verbargs \ + -home $masterdir -errpfx MASTER $m_txnargs -rep_master \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M)] + + # Open a client + repladd 2 + set env_cmd(C) "berkdb_env_noerr -create $repmemargs $privargs \ + -log_max 1000000 $envargs $c_logargs $recargs $verbargs \ + -home $clientdir -errpfx CLIENT $c_txnargs -rep_client \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $env_cmd(C)] + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # db_remove in a moment. + # + $masterenv test force noarchive_timeout + + # Run rep_test in the master (and update client). + puts "\tRep$tnum.a:\ + Running rep_test in replicated env ($envargs $recargs)." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + + puts "\tRep$tnum.b: Verifying client database contents." + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + + rep_verify $masterdir $masterenv \ + $clientdir $clientenv $verify_subset 1 1 + + # Remove the file (and update client). + puts "\tRep$tnum.c: Remove the file on the master and close master." + error_check_good remove \ + [eval {$masterenv dbremove} -auto_commit $dbname] 0 + error_check_good masterenv_close [$masterenv close] 0 + process_msgs $envlist + + puts "\tRep$tnum.d: Upgrade client." + set newmasterenv $clientenv + error_check_good upgrade_client [$newmasterenv rep_start -master] 0 + + # Run rep_test in the new master + puts "\tRep$tnum.e: Running rep_test in new master." + eval rep_test $method $newmasterenv NULL $niter 0 0 0 $largs + set envlist "{$newmasterenv 2}" + process_msgs $envlist + + puts "\tRep$tnum.f: Reopen old master as client and catch up." + # Throttle master so it can't send everything at once + $newmasterenv rep_limit 0 [expr 64 * 1024] + set newclientenv [eval {berkdb_env_noerr -create -recover} \ + $envargs $m_logargs $m_txnargs -errpfx NEWCLIENT $verbargs \ + $privargs $repmemargs \ + {-home $masterdir -rep_client -rep_transport [list 1 replsend]}] + set envlist "{$newclientenv 1} {$newmasterenv 2}" + process_msgs $envlist + + # If we're running with a low number of iterations, we might + # not have had to throttle the data transmission; skip the check. + if { $niter > 200 } { + set nthrottles \ + [stat_field $newmasterenv rep_stat "Transmission limited"] + error_check_bad nthrottles $nthrottles -1 + error_check_bad nthrottles $nthrottles 0 + } + + # Run a modified rep_test in the new master (and update client). + puts "\tRep$tnum.g: Running rep_test in new master." + eval rep_test $method \ + $newmasterenv NULL $niter $niter $niter 0 $largs + process_msgs $envlist + + # Verify the database in the client dir. + puts "\tRep$tnum.h: Verifying new client database contents." + + rep_verify $masterdir $newmasterenv \ + $clientdir $newclientenv $verify_subset 1 1 + + error_check_good newmasterenv_close [$newmasterenv close] 0 + error_check_good newclientenv_close [$newclientenv close] 0 + + if { [lsearch $envargs "-encrypta*"] !=-1 } { + set encrypt 1 + } + error_check_good verify \ + [verify_dir $clientdir "\tRep$tnum.k: " 0 0 1] 0 + + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep002.tcl b/test/tcl/rep002.tcl new file mode 100644 index 00000000..0321ba7a --- /dev/null +++ b/test/tcl/rep002.tcl @@ -0,0 +1,332 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep002 +# TEST Basic replication election test. +# TEST +# TEST Run a modified version of test001 in a replicated master +# TEST environment; hold an election among a group of clients to +# TEST make sure they select a proper master from amongst themselves, +# TEST in various scenarios. + +proc rep002 { method { niter 10 } { nclients 3 } { tnum "002" } args } { + + source ./include.tcl + global repfiles_in_memory + + # Skip for record-based methods. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_record_based $method] != 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_record_based $method] == 1 } { + puts "Rep002: Skipping for method $method." + return + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set logsets [create_logsets [expr $nclients + 1]] + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping test with -recover for in-memory logs." + } + puts "Rep$tnum ($method $r): Replication election\ + test with $nclients clients $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + for { set i 0 } { $i < $nclients } { incr i } { + puts "Rep$tnum: Client $i logs are\ + [lindex $l [expr $i + 1]]" + } + rep002_sub $method $niter $nclients $tnum $l $r $args + } + } +} + +proc rep002_sub { method niter nclients tnum logset recargs largs } { + source ./include.tcl + global repfiles_in_memory + global elect_serial + set elect_timeout 5000000 + + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + set m_logtype [lindex $logset 0] + set m_logargs [adjust_logargs $m_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + + for { set i 0 } { $i < $nclients } { incr i } { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + set c_logtype($i) [lindex $logset [expr $i + 1]] + set c_logargs($i) [adjust_logargs $c_logtype($i)] + set c_txnargs($i) [adjust_txnargs $c_logtype($i)] + } + + # Open a master. + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ + -event $repmemargs \ + -home $masterdir $m_logargs -errpfx MASTER $verbargs \ + $m_txnargs -rep_master -rep_transport \[list 1 replsend\]" + # In an election test, the -recovery arg must not go + # in the env_cmd string because that is going to be + # passed to a child process. + set masterenv [eval $env_cmd(M) $recargs] + + # Open the clients. + for { set i 0 } { $i < $nclients } { incr i } { + set envid [expr $i + 2] + repladd $envid + set env_cmd($i) "berkdb_env_noerr -create -home $clientdir($i) \ + -event $repmemargs \ + $c_logargs($i) $c_txnargs($i) -rep_client -errpfx CLIENT$i \ + $verbargs -rep_transport \[list $envid replsend\]" + set clientenv($i) [eval $env_cmd($i) $recargs] + } + + # Loop, processing first the master's messages, then the client's, + # until both queues are empty. + set envlist {} + lappend envlist "$masterenv 1" + for { set i 0 } { $i < $nclients } { incr i } { + lappend envlist "$clientenv($i) [expr $i + 2]" + } + process_msgs $envlist + + # Run a modified test001 in the master. + puts "\tRep$tnum.a: Running test001 in replicated env." + eval test001 $method $niter 0 0 $tnum -env $masterenv $largs + process_msgs $envlist + + # Verify the database in the client dir. + for { set i 0 } { $i < $nclients } { incr i } { + puts "\tRep$tnum.b: Verifying contents of client database $i." + set testdir [get_home $masterenv] + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + open_and_dump_file test$tnum.db $clientenv($i) $testdir/t1 \ + test001.check dump_file_direction "-first" "-next" + + if { [string compare [convert_method $method] -recno] != 0 } { + filesort $t1 $t3 + } + error_check_good diff_files($t2,$t3) [filecmp $t2 $t3] 0 + + verify_dir $clientdir($i) "\tRep$tnum.c: " 0 0 1 + } + + # Start an election in the first client. + puts "\tRep$tnum.d: Starting election with existing master." + # We want to verify that the master declares the election + # over by fiat, even if everyone uses a lower priority than 20. + # Loop and process all messages, keeping track of which + # sites got a HOLDELECTION and checking that the master i.d. is + # unchanged after the election. + + set origrole [stat_field $masterenv rep_stat "Role"] + error_check_good originally_master $origrole "master" + set origgeneration [stat_field $masterenv rep_stat "Generation number"] + + set got_hold_elect(M) 0 + for { set i 0 } { $i < $nclients } { incr i } { + set got_hold_elect($i) 0 + set elect_pipe($i) INVALID + } + + # Client EIDs are always offset by 2 from the corresponding array index, + # so client 0's EID is 2. + # + set envid 2 + set elect_pipe(0) [start_election C0 $qdir $clientdir(0) $envid \ + [expr $nclients + 1] $nclients 20 $elect_timeout] + + tclsleep 2 + + set got_master 0 + while { 1 } { + set nproced 0 + set he 0 + + incr nproced [replprocessqueue $masterenv 1 0 he] + + if { $he == 1 } { + incr elect_serial + # The master's EID is 1. + set envid 1 + set elect_pipe(M) [start_election CM $qdir \ + $masterdir $envid [expr $nclients + 1] $nclients \ + 0 $elect_timeout] + set got_hold_elect(M) 1 + } + + for { set i 0 } { $i < $nclients } { incr i } { + set he 0 + set envid [expr $i + 2] + incr nproced \ + [replprocessqueue $clientenv($i) $envid 0 he] + if { $he == 1 } { + # error_check_bad client(0)_in_elect $i 0 + if { $elect_pipe($i) != "INVALID" } { + close_election $elect_pipe($i) + } + incr elect_serial + set pfx CHILD$i.$elect_serial + set elect_pipe($i) [start_election $pfx $qdir \ + $clientdir($i) $envid [expr $nclients + 1] \ + $nclients 0 \ + $elect_timeout] + set got_hold_elect($i) 1 + } + } + + if { $nproced == 0 } { + break + } + } + set role [stat_field $masterenv rep_stat "Role"] + set generation [stat_field $masterenv rep_stat "Generation number"] + error_check_good master_unchanged $origrole $role + error_check_good gen_unchanged $origgeneration $generation + cleanup_elections + + # We need multiple clients to proceed from here. + if { $nclients < 2 } { + puts "\tRep$tnum: Skipping for less than two clients." + error_check_good masterenv_close [$masterenv close] 0 + for { set i 0 } { $i < $nclients } { incr i } { + error_check_good clientenv_close($i) \ + [$clientenv($i) close] 0 + } + return + } + + # Make sure all the clients are synced up and ready to be good + # voting citizens. + error_check_good master_flush [$masterenv rep_flush] 0 + process_msgs $envlist + + # Now hold another election in the first client, this time with + # a dead master. + puts "\tRep$tnum.e: Starting election with dead master." + error_check_good masterenv_close [$masterenv close] 0 + set envlist [lreplace $envlist 0 0] + + set m "Rep$tnum.e" + # We're not going to be using err_cmd, so initialize to "none". + # Client #1 has priority 100; everyone else has priority 10. + for { set i 0 } { $i < $nclients } { incr i } { + set err_cmd($i) "none" + set crash($i) 0 + if { $i == 1 } { + set pri($i) 100 + } else { + set pri($i) 10 + } + } + set nsites $nclients + set nvotes $nclients + # The elector calls the first election. The expected winner + # is $win. + set elector 1 + set win 1 + run_election envlist err_cmd pri crash $qdir $m \ + $elector $nsites $nvotes $nclients $win 1 "test$tnum.db" + + # Hold an election with two clients at the same (winning) priority. + # Make sure that the tie gets broken, and that the third client + # does not win. + puts "\tRep$tnum.f: Election with two clients at same priority." + set m "Rep$tnum.f" + # Clients 0 and 1 have high, matching priority. + for { set i 0 } { $i < $nclients } { incr i } { + if { $i >= 2 } { + set pri($i) 10 + } else { + set pri($i) 100 + } + } + + # Run several elections. + set elections 5 + for { set i 0 } { $i < $elections } { incr i } { + # + # The expected winner is 0 or 1. Since run_election can only + # handle one expected winner, catch the result and inspect it. + # + set elector 0 + set win 1 + set altwin 0 + if {[catch {run_election \ + envlist err_cmd pri crash $qdir $m $elector $nsites \ + $nvotes $nclients $win 1 "test$tnum.db"} res]} { + # + # If the primary winner didn't win, make sure + # the alternative winner won. Do all the cleanup + # for that winner normally done in run_election: + # open and close the new master, then reopen as a + # client for the next cycle. + # + error_check_good check_winner [is_substr \ + $res "expected 3, got [expr $altwin + 2]"] 1 + puts "\t$m: Election $i: Alternate winner $altwin won." + error_check_good make_master \ + [$clientenv($altwin) rep_start -master] 0 + + cleanup_elections + process_msgs $envlist + + error_check_good newmaster_close \ + [$clientenv($altwin) close] 0 + set clientenv($altwin) [eval $env_cmd($altwin)] + error_check_good cl($altwin) \ + [is_valid_env $clientenv($altwin)] TRUE + set newelector "$clientenv($altwin) [expr $altwin + 2]" + set envlist [lreplace $envlist $altwin $altwin $newelector] + } else { + puts "\t$m: Election $i: Primary winner $win won." + } + process_msgs $envlist + } + + foreach pair $envlist { + set cenv [lindex $pair 0] + error_check_good cenv_close [$cenv close] 0 + } + + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep003.tcl b/test/tcl/rep003.tcl new file mode 100644 index 00000000..55c4a051 --- /dev/null +++ b/test/tcl/rep003.tcl @@ -0,0 +1,299 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep003 +# TEST Repeated shutdown/restart replication test +# TEST +# TEST Run a quick put test in a replicated master environment; +# TEST start up, shut down, and restart client processes, with +# TEST and without recovery. To ensure that environment state +# TEST is transient, use DB_PRIVATE. + +proc rep003 { method { tnum "003" } args } { + source ./include.tcl + global rep003_dbname rep003_omethod rep003_oargs + global repfiles_in_memory + + # Skip for record-based methods. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_record_based $method] != 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_record_based $method] } { + puts "Rep$tnum: Skipping for method $method" + return + } + + set msg2 "with on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "with in-memory replication files" + } + + set rep003_dbname rep003.db + set rep003_omethod [convert_method $method] + set rep003_oargs [convert_args $method $args] + + # Run the body of the test with and without recovery. If we're + # testing in-memory logging, skip the combination of recovery + # and in-memory logging -- it doesn't make sense. + + set logsets [create_logsets 2] + foreach recopt $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $recopt == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping for\ + in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $recopt):\ + Replication repeated-startup test $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep003_sub $method $tnum $l $recopt $args + } + } +} + +proc rep003_sub { method tnum logset recargs largs } { + source ./include.tcl + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. This test already requires + # -txn, so adjust the logargs only. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + + # Open a master. + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ + -errpfx MASTER $verbargs $repmemargs \ + -home $masterdir -txn $m_logargs -rep_master \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M) $recargs] + error_check_good master_env [is_valid_env $masterenv] TRUE + + puts "\tRep$tnum.a: Simple client startup test." + + # Put item one. + rep003_put $masterenv A1 a-one + + # Open a client. + repladd 2 + set env_cmd(C) "berkdb_env_noerr -create -private -home $clientdir \ + -txn $c_logargs -errpfx CLIENT $verbargs $repmemargs \ + -rep_client -rep_transport \[list 2 replsend\]" + set clientenv [eval $env_cmd(C) $recargs] + error_check_good client_env [is_valid_env $clientenv] TRUE + + # Put another quick item. + rep003_put $masterenv A2 a-two + + # Loop, processing first the master's messages, then the client's, + # until both queues are empty. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + rep003_check $clientenv A1 a-one + rep003_check $clientenv A2 a-two + + error_check_good clientenv_close [$clientenv close] 0 + replclear 2 + + # Now reopen the client after doing another put. + puts "\tRep$tnum.b: Client restart." + rep003_put $masterenv B1 b-one + + set clientenv [eval $env_cmd(C)] + error_check_good client_env [is_valid_env $clientenv] TRUE + + # Loop letting the client and master sync up and get the + # environment initialized. It's a new client env so + # reinitialize the envlist as well. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # The items from part A should be present at all times-- + # if we roll them back, we've screwed up. [#5709] + rep003_check $clientenv A1 a-one + rep003_check $clientenv A2 a-two + + rep003_put $masterenv B2 b-two + + # Loop, processing first the master's messages, then the client's, + # until both queues are empty. + while { 1 } { + set nproced 0 + + incr nproced [replprocessqueue $masterenv 1] + incr nproced [replprocessqueue $clientenv 2] + + # The items from part A should be present at all times-- + # if we roll them back, we've screwed up. [#5709] + rep003_check $clientenv A1 a-one + rep003_check $clientenv A2 a-two + + if { $nproced == 0 } { + break + } + } + + rep003_check $clientenv B1 b-one + rep003_check $clientenv B2 b-two + + error_check_good clientenv_close [$clientenv close] 0 + + replclear 2 + + # Now reopen the client after a recovery. + puts "\tRep$tnum.c: Client restart after recovery." + rep003_put $masterenv C1 c-one + + set clientenv [eval $env_cmd(C) -recover] + error_check_good client_env [is_valid_env $clientenv] TRUE + + # Loop, processing first the master's messages, then the client's, + # until both queues are empty. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # The items from part A should be present at all times-- + # if we roll them back, we've screwed up. [#5709] + rep003_check $clientenv A1 a-one + rep003_check $clientenv A2 a-two + rep003_check $clientenv B1 b-one + rep003_check $clientenv B2 b-two + + rep003_put $masterenv C2 c-two + + # Loop, processing first the master's messages, then the client's, + # until both queues are empty. + while { 1 } { + set nproced 0 + + # The items from part A should be present at all times-- + # if we roll them back, we've screwed up. [#5709] + rep003_check $clientenv A1 a-one + rep003_check $clientenv A2 a-two + rep003_check $clientenv B1 b-one + rep003_check $clientenv B2 b-two + + incr nproced [replprocessqueue $masterenv 1] + incr nproced [replprocessqueue $clientenv 2] + + if { $nproced == 0 } { + break + } + } + + rep003_check $clientenv C1 c-one + rep003_check $clientenv C2 c-two + + error_check_good clientenv_close [$clientenv close] 0 + + replclear 2 + + # Now reopen the client after a catastrophic recovery. + puts "\tRep$tnum.d: Client restart after catastrophic recovery." + rep003_put $masterenv D1 d-one + + set clientenv [eval $env_cmd(C) -recover_fatal] + error_check_good client_env [is_valid_env $clientenv] TRUE + + # Loop, processing first the master's messages, then the client's, + # until both queues are empty. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + rep003_put $masterenv D2 d-two + + # Loop, processing first the master's messages, then the client's, + # until both queues are empty. + while { 1 } { + set nproced 0 + + # The items from part A should be present at all times-- + # if we roll them back, we've screwed up. [#5709] + rep003_check $clientenv A1 a-one + rep003_check $clientenv A2 a-two + rep003_check $clientenv B1 b-one + rep003_check $clientenv B2 b-two + rep003_check $clientenv C1 c-one + rep003_check $clientenv C2 c-two + + incr nproced [replprocessqueue $masterenv 1] + incr nproced [replprocessqueue $clientenv 2] + + if { $nproced == 0 } { + break + } + } + + rep003_check $clientenv D1 d-one + rep003_check $clientenv D2 d-two + + error_check_good clientenv_close [$clientenv close] 0 + + error_check_good masterenv_close [$masterenv close] 0 + replclose $testdir/MSGQUEUEDIR +} + +proc rep003_put { masterenv key data } { + global rep003_dbname rep003_omethod rep003_oargs + + set db [eval {berkdb_open_noerr -create -env $masterenv -auto_commit} \ + $rep003_omethod $rep003_oargs $rep003_dbname] + error_check_good rep3_put_open($key,$data) [is_valid_db $db] TRUE + + set txn [$masterenv txn] + error_check_good rep3_put($key,$data) [$db put -txn $txn $key $data] 0 + error_check_good rep3_put_txn_commit($key,$data) [$txn commit] 0 + + error_check_good rep3_put_close($key,$data) [$db close] 0 +} + +proc rep003_check { env key data } { + global rep003_dbname + + set db [berkdb_open_noerr -rdonly -env $env $rep003_dbname] + error_check_good rep3_check_open($key,$data) [is_valid_db $db] TRUE + + set dbt [$db get $key] + error_check_good rep3_check($key,$data) \ + [lindex [lindex $dbt 0] 1] $data + + error_check_good rep3_put_close($key,$data) [$db close] 0 +} diff --git a/test/tcl/rep005.tcl b/test/tcl/rep005.tcl new file mode 100644 index 00000000..fcb5eb4c --- /dev/null +++ b/test/tcl/rep005.tcl @@ -0,0 +1,358 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep005 +# TEST Replication election test with error handling. +# TEST +# TEST Run rep_test in a replicated master environment; +# TEST hold an election among a group of clients to make sure they select +# TEST a proper master from amongst themselves, forcing errors at various +# TEST locations in the election path. + +proc rep005 { method args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Skip for all methods except btree. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep005: Skipping for method $method." + return + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set tnum "005" + set niter 10 + set nclients 3 + set logsets [create_logsets [expr $nclients + 1]] + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases." + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + # We don't want to run this with -recover - it takes too + # long and doesn't cover any new ground. + set recargs "" + foreach l $logsets { + puts "Rep$tnum ($recargs): Replication election\ + error test with $nclients clients $msg $msg2." + puts -nonewline "Rep$tnum: Started at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + puts "Rep$tnum: Master logs are [lindex $l 0]" + for { set i 0 } { $i < $nclients } { incr i } { + puts "Rep$tnum: Client $i logs are\ + [lindex $l [expr $i + 1]]" + } + rep005_sub $method $tnum \ + $niter $nclients $l $recargs $args + } +} + +proc rep005_sub { method tnum niter nclients logset recargs largs } { + source ./include.tcl + global rand_init + error_check_good set_random_seed [berkdb srand $rand_init] 0 + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + set m_logtype [lindex $logset 0] + set m_logargs [adjust_logargs $m_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + + for { set i 0 } { $i < $nclients } { incr i } { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + set c_logtype($i) [lindex $logset [expr $i + 1]] + set c_logargs($i) [adjust_logargs $c_logtype($i)] + set c_txnargs($i) [adjust_txnargs $c_logtype($i)] + } + + # Open a master. + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ + -event $repmemargs \ + -home $masterdir $m_logargs -errpfx MASTER $verbargs \ + $m_txnargs -rep_master -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M) $recargs] + + set envlist {} + lappend envlist "$masterenv 1" + + # Open the clients. + for { set i 0 } { $i < $nclients } { incr i } { + set envid [expr $i + 2] + repladd $envid + set env_cmd($i) "berkdb_env_noerr -create \ + -event $repmemargs \ + -home $clientdir($i) $c_logargs($i) \ + $c_txnargs($i) -rep_client $verbargs \ + -errpfx CLIENT$i \ + -rep_transport \[list $envid replsend\]" + set clientenv($i) [eval $env_cmd($i) $recargs] + lappend envlist "$clientenv($i) $envid" + } + + # Process startup messages + process_msgs $envlist + # Run rep_test in the master. + puts "\tRep$tnum.a: Running rep_test in replicated env." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + + # Process all the messages and close the master. + process_msgs $envlist + + # Check that databases are in-memory or on-disk as expected. + check_db_location $masterenv + for { set i 0 } { $i < $nclients } { incr i } { + check_db_location $clientenv($i) + } + + error_check_good masterenv_close [$masterenv close] 0 + set envlist [lreplace $envlist 0 0] + + for { set i 0 } { $i < $nclients } { incr i } { + replclear [expr $i + 2] + } + + # We set up the error list for each client. + set m "Rep$tnum" + set count 0 + set win -1 + # + set c0err { none electinit } + set c1err $c0err + set c2err $c0err + set numtests [expr [llength $c0err] * [llength $c1err] * \ + [llength $c2err]] + puts "\t$m.b: Starting $numtests election with error tests" + set last_win -1 + set win -1 + foreach c0 $c0err { + foreach c1 $c1err { + foreach c2 $c2err { + set elist [list $c0 $c1 $c2] + rep005_elect env_cmd envlist $qdir \ + $m $count win last_win $elist $logset + incr count + } + } + } + + foreach pair $envlist { + set cenv [lindex $pair 0] + error_check_good cenv_close [$cenv close] 0 + } + + replclose $testdir/MSGQUEUEDIR + puts -nonewline \ + "Rep$tnum: Completed at: " + puts [clock format [clock seconds] -format "%H:%M %D"] +} + +proc rep005_elect { ecmd celist qdir msg count \ + winner lsn_lose elist logset} { + global timeout_ok + global databases_in_memory + global repfiles_in_memory + upvar $ecmd env_cmd + upvar $celist envlist + upvar $winner win + upvar $lsn_lose last_win + + # Set the proper value for the first time through the + # loop. On subsequent passes, timeout_ok will already + # be set. + if { [info exists timeout_ok] == 0 } { + set timeout_ok 0 + } + + set nclients [llength $elist] + set nsites [expr $nclients + 1] + + set cl_list {} + foreach pair $envlist { + set id [lindex $pair 1] + set i [expr $id - 2] + set clientenv($i) [lindex $pair 0] + set err_cmd($i) [lindex $elist $i] + set elect_pipe($i) INVALID + replclear $id + lappend cl_list $i + } + + # Select winner. We want to test biggest LSN wins, and secondarily + # highest priority wins. If we already have a master, make sure + # we don't start a client in that master. + set el 0 + if { $win == -1 } { + if { $last_win != -1 } { + set cl_list [lreplace $cl_list $last_win $last_win] + set el $last_win + } + set windex [berkdb random_int 0 [expr [llength $cl_list] - 1]] + set win [lindex $cl_list $windex] + } else { + # Easy case, if we have a master, the winner must be the + # same one as last time, just use $win. + # If client0 is the current existing master, start the + # election in client 1. + if {$win == 0} { + set el 1 + } + } + # Winner has priority 100. If we are testing LSN winning, the + # make sure the lowest LSN client has the highest priority. + # Everyone else has priority 10. + for { set i 0 } { $i < $nclients } { incr i } { + set crash($i) 0 + if { $i == $win } { + set pri($i) 100 + } elseif { $i == $last_win } { + set pri($i) 200 + } else { + set pri($i) 10 + } + } + + puts "\t$msg.b.$count: Start election (win=client$win) $elist" + set msg $msg.c.$count + set nsites $nclients + set nvotes $nsites + if { $databases_in_memory } { + set dbname { "" test.db } + } else { + set dbname test.db + } + run_election envlist err_cmd pri crash \ + $qdir $msg $el $nsites $nvotes $nclients $win \ + 0 $dbname 0 $timeout_ok + + # + # Sometimes test elections with an existing master. + # Other times test elections without master by closing the + # master we just elected and creating a new client. + # We want to weight it to close the new master. So, use + # a list to cause closing about 70% of the time. + # + set close_list { 0 0 0 1 1 1 1 1 1 1} + set close_len [expr [llength $close_list] - 1] + set close_index [berkdb random_int 0 $close_len] + + # Unless we close the master, the next election will time out. + set timeout_ok 1 + + if { [lindex $close_list $close_index] == 1 } { + # Declare that we expect the next election to succeed. + set timeout_ok 0 + puts -nonewline "\t\t$msg: Closing " + error_check_good log_flush [$clientenv($win) log_flush] 0 + error_check_good newmaster_close [$clientenv($win) close] 0 + # + # If the next test should win via LSN then remove the + # env before starting the new client so that we + # can guarantee this client doesn't win the next one. + set lsn_win { 0 0 0 0 1 1 1 1 1 1 } + set lsn_len [expr [llength $lsn_win] - 1] + set lsn_index [berkdb random_int 0 $lsn_len] + set rec_arg "" + set win_logs_inmem [expr [string compare [lindex $logset \ + [expr $win + 1]] in-memory] == 0] + if { [lindex $lsn_win $lsn_index] == 1 } { + set last_win $win + set dirindex [lsearch -exact $env_cmd($win) "-home"] + incr dirindex + set lsn_dir [lindex $env_cmd($win) $dirindex] + env_cleanup $lsn_dir + puts -nonewline "and cleaning " + } else { + # + # If we're not cleaning the env, decide if we should + # run recovery upon reopening the env. This causes + # two things: + # 1. Removal of region files which forces the env + # to read its __db.rep.egen file. + # 2. Adding a couple log records, so this client must + # be the next winner as well since it'll have the + # biggest LSN. + # + set rec_win { 0 0 0 0 0 0 1 1 1 1 } + set rec_len [expr [llength $rec_win] - 1] + set rec_index [berkdb random_int 0 $rec_len] + if { !$repfiles_in_memory && \ + [lindex $rec_win $rec_index] == 1 } { + puts -nonewline "and recovering " + set rec_arg "-recover" + # + # If logs are in memory and we are about to + # run recovery, we force ourselves not to win + # the next election because recovery will + # blow away the entire log in memory. + # However, we don't skip this entirely + # because we still want to force reading + # of __db.rep.egen. + # + if { $win_logs_inmem } { + set last_win $win + } else { + set last_win -1 + } + } else { + set last_win -1 + } + } + puts "new master, new client $win" + set clientenv($win) [eval $env_cmd($win) $rec_arg] + error_check_good cl($win) [is_valid_env $clientenv($win)] TRUE + # + # Since we started a new client, we need to replace it + # in the message processing list so that we get the + # new Tcl handle name in there. + set newel "$clientenv($win) [expr $win + 2]" + set envlist [lreplace $envlist $win $win $newel] + if { $rec_arg == "" || $win_logs_inmem } { + set win -1 + } + # + # Since we started a new client we want to give them + # all a chance to process everything outstanding before + # the election on the next iteration. + # + process_msgs $envlist + } +} diff --git a/test/tcl/rep006.tcl b/test/tcl/rep006.tcl new file mode 100644 index 00000000..65bd5f63 --- /dev/null +++ b/test/tcl/rep006.tcl @@ -0,0 +1,224 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep006 +# TEST Replication and non-rep env handles. +# TEST +# TEST Run a modified version of test001 in a replicated master +# TEST environment; verify that the database on the client is correct. +# TEST Next, create a non-rep env handle to the master env. +# TEST Attempt to open the database r/w to force error. + +proc rep006 { method { niter 1000 } { tnum "006" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + set logsets [create_logsets 2] + + # All access methods are allowed. + if { $checking_valid_methods } { + return "ALL" + } + + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "and private env" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping for in-memory logs\ + with -recover." + continue + } + puts "Rep$tnum ($method $r): Replication and\ + non-rep env handles $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep006_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep006_sub { method niter tnum logset recargs largs } { + source ./include.tcl + global testdir + global is_hp_test + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + if { [is_record_based $method] == 1 } { + set checkfunc test001_recno.check + } else { + set checkfunc test001.check + } + + # Open a master. + repladd 1 + set max_locks 2500 + set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ + -lock_max_objects $max_locks -lock_max_locks $max_locks \ + -home $masterdir -errpfx MASTER $verbargs $repmemargs \ + $privargs \ + $m_txnargs $m_logargs -rep_master -rep_transport \ + \[list 1 replsend\]" + set masterenv [eval $env_cmd(M) $recargs] + + # Open a client + repladd 2 + set env_cmd(C) "berkdb_env_noerr -create $c_txnargs $c_logargs \ + -lock_max_objects $max_locks -lock_max_locks $max_locks \ + -home $clientdir -errpfx CLIENT $verbargs $repmemargs \ + $privargs \ + -rep_client -rep_transport \[list 2 replsend\]" + set clientenv [eval $env_cmd(C) $recargs] + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Run a modified test001 in the master (and update client). + puts "\tRep$tnum.a: Running test001 in replicated env." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + + # Check that databases are in-memory or on-disk as expected. + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + check_db_location $masterenv + check_db_location $clientenv + + # Verify the database in the client dir. + puts "\tRep$tnum.b: Verifying client database contents." + set testdir [get_home $masterenv] + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + open_and_dump_file $dbname $clientenv $t1 \ + $checkfunc dump_file_direction "-first" "-next" + + # Determine whether this build is configured with --enable-debug_rop + # or --enable-debug_wop; we'll need to skip portions of the test if so. + # Also check for *not* configuring with diagnostic. That similarly + # forces a different code path and we need to skip portions. + set conf [berkdb getconfig] + set skip_for_config 0 + if { [is_substr $conf "debug_rop"] == 1 \ + || [is_substr $conf "debug_wop"] == 1 \ + || [is_substr $conf "diagnostic"] == 0 \ + || $env_private == 1 } { + set skip_for_config 1 + } + + # Skip if configured with --enable-debug_rop or --enable-debug_wop + # or without --enable-diagnostic, + # because the checkpoint won't fail in those cases. + if { $skip_for_config == 1 } { + puts "\tRep$tnum.c: Skipping based on configuration." + } else { + puts "\tRep$tnum.c: Verifying non-master db_checkpoint." + set stat \ + [catch {exec $util_path/db_checkpoint -h $masterdir -1} ret] + error_check_good open_err $stat 1 + error_check_good \ + open_err1 [is_substr $ret "attempting to modify"] 1 + } + + # We have to skip this bit for HP-UX because we can't open an env + # twice, and for debug_rop/debug_wop because the open won't fail. + if { $is_hp_test == 1 } { + puts "\tRep$tnum.d: Skipping for HP-UX." + } elseif { $skip_for_config == 1 } { + puts "\tRep$tnum.d: Skipping based on configuration." + } else { + puts "\tRep$tnum.d: Verifying non-master access." + + set rdenv [eval {berkdb_env_noerr} \ + -home $masterdir $verbargs $privargs] + error_check_good rdenv [is_valid_env $rdenv] TRUE + # + # Open the db read/write which will cause it to try to + # write out a log record, which should fail. + # + set stat \ + [catch {eval {berkdb_open_noerr} -env $rdenv $dbname} ret] + error_check_good open_err $stat 1 + error_check_good \ + open_err1 [is_substr $ret "attempting to modify"] 1 + error_check_good rdenv_close [$rdenv close] 0 + } + + process_msgs $envlist + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + + error_check_good verify \ + [verify_dir $clientdir "\tRep$tnum.e: " 0 0 1] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep007.tcl b/test/tcl/rep007.tcl new file mode 100644 index 00000000..cfd0918f --- /dev/null +++ b/test/tcl/rep007.tcl @@ -0,0 +1,272 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep007 +# TEST Replication and bad LSNs +# TEST +# TEST Run rep_test in a replicated master env. +# TEST Close the client. Make additional changes to master. +# TEST Close the master. Open the client as the new master. +# TEST Make several different changes. Open the old master as +# TEST the client. Verify periodically that contents are correct. +# TEST This test is not appropriate for named in-memory db testing +# TEST because the databases are lost when both envs are closed. +proc rep007 { method { niter 10 } { tnum "007" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # All access methods are allowed. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 3] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "and private env" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping for\ + in-memory logs with -recover." + continue + } + if { $r == "-recover" && $databases_in_memory } { + puts "Rep$tnum: Skipping for\ + named in-memory databases with -recover." + continue + } + puts "Rep$tnum ($method $r):\ + Replication and bad LSNs $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client1 logs are [lindex $l 1]" + puts "Rep$tnum: Client2 logs are [lindex $l 2]" + rep007_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep007_sub { method niter tnum logset recargs largs } { + global testdir + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + set omethod [convert_method $method] + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR.2 + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + set m_logtype [lindex $logset 0] + set m_logargs [adjust_logargs $m_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + + set c_logtype [lindex $logset 1] + set c_logargs [adjust_logargs $c_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + set c2_logtype [lindex $logset 2] + set c2_logargs [adjust_logargs $c2_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \ + -home $masterdir $verbargs -errpfx MASTER $repmemargs $privargs \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open two clients + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $c_logargs \ + -home $clientdir $verbargs -errpfx CLIENT1 $repmemargs $privargs \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + repladd 3 + set cl2_envcmd "berkdb_env_noerr -create $c2_txnargs $c2_logargs \ + -home $clientdir2 $verbargs -errpfx CLIENT2 $repmemargs $privargs \ + -rep_transport \[list 3 replsend\]" + set cl2env [eval $cl2_envcmd $recargs -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2} {$cl2env 3}" + process_msgs $envlist + + # Run rep_test in the master (and update clients). + puts "\tRep$tnum.a: Running rep_test in replicated env." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + + # Databases should now have identical contents. + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + + rep_verify $masterdir $masterenv $clientdir $clientenv 0 1 1 + rep_verify $masterdir $masterenv $clientdir2 $cl2env 0 1 1 + + puts "\tRep$tnum.b: Close client 1 and make master changes." + # Flush the log so that we don't lose any changes, since we'll be + # relying on having a good log when we run recovery when we open it + # later. + # + $clientenv log_flush + error_check_good client_close [$clientenv close] 0 + + # Change master and propagate changes to client 2. + set start $niter + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + set envlist "{$masterenv 1} {$cl2env 3}" + process_msgs $envlist + + # We need to do a deletion here to cause meta-page updates, + # particularly for queue. Delete the first pair and remember + # what it is -- it should come back after the master is closed + # and reopened as a client. + set db1 [eval {berkdb_open_noerr} -env $masterenv -auto_commit $dbname] + error_check_good dbopen [is_valid_db $db1] TRUE + set txn [$masterenv txn] + set c [eval $db1 cursor -txn $txn] + error_check_good db_cursor [is_valid_cursor $c $db1] TRUE + set first [$c get -first] + set pair [lindex [$c get -first] 0] + set key [lindex $pair 0] + set data [lindex $pair 1] + + error_check_good cursor_del [$c del] 0 + error_check_good dbcclose [$c close] 0 + error_check_good txn_commit [$txn commit] 0 + error_check_good db1_close [$db1 close] 0 + # + # Process the messages to get them out of the db. This also + # propagates the delete to client 2. + # + process_msgs $envlist + + # Nuke those for closed client + replclear 2 + + # Databases 1 and 3 should now have identical contents. + # Database 2 should be different. First check 1 and 3. We + # have to wait to check 2 until the env is open again. + rep_verify $masterdir $masterenv $clientdir2 $cl2env 0 1 1 + + puts "\tRep$tnum.c: Close master, reopen client as master." + $masterenv log_flush + error_check_good master_close [$masterenv close] 0 + set newmasterenv [eval $cl_envcmd $recargs -rep_master] + + # Now we can check that database 2 does not match 3. + rep_verify $clientdir $newmasterenv $clientdir2 $cl2env 0 0 0 + + puts "\tRep$tnum.d: Make incompatible changes to new master." + set envlist "{$newmasterenv 2} {$cl2env 3}" + process_msgs $envlist + + set db [eval {berkdb_open_noerr} \ + -env $newmasterenv -auto_commit -create $omethod $dbname] + error_check_good dbopen [is_valid_db $db] TRUE + set t [$newmasterenv txn] + + # Force in a pair {10 10}. This works for all access + # methods and won't overwrite the old first pair for record-based. + set ret [eval {$db put} -txn $t 10 [chop_data $method 10]] + error_check_good put $ret 0 + error_check_good txn [$t commit] 0 + error_check_good dbclose [$db close] 0 + + eval rep_test $method $newmasterenv NULL $niter $start $start 0 $largs + set envlist "{$newmasterenv 2} {$cl2env 3}" + process_msgs $envlist + + # Nuke those for closed old master + replclear 1 + + # Databases 2 and 3 should now match. + rep_verify $clientdir $newmasterenv $clientdir2 $cl2env 0 1 1 + + puts "\tRep$tnum.e: Open old master as client." + set newclientenv [eval $ma_envcmd -rep_client -recover] + set envlist "{$newclientenv 1} {$newmasterenv 2} {$cl2env 3}" + process_msgs $envlist + + # The pair we deleted earlier from the master should now + # have reappeared. + set db1 [eval {berkdb_open_noerr}\ + -env $newclientenv -auto_commit $dbname] + error_check_good dbopen [is_valid_db $db1] TRUE + set ret [$db1 get -get_both $key [pad_data $method $data]] + error_check_good get_both $ret [list $pair] + error_check_good db1_close [$db1 close] 0 + + set start [expr $niter * 2] + eval rep_test $method $newmasterenv NULL $niter $start $start 0 $largs + set envlist "{$newclientenv 1} {$newmasterenv 2} {$cl2env 3}" + process_msgs $envlist + + # Now all 3 should match again. + rep_verify $masterdir $newclientenv $clientdir $newmasterenv 0 1 1 + rep_verify $masterdir $newclientenv $clientdir2 $cl2env 0 1 1 + + error_check_good newmasterenv_close [$newmasterenv close] 0 + error_check_good newclientenv_close [$newclientenv close] 0 + error_check_good cl2_close [$cl2env close] 0 + replclose $testdir/MSGQUEUEDIR + return +} diff --git a/test/tcl/rep008.tcl b/test/tcl/rep008.tcl new file mode 100644 index 00000000..93d961a5 --- /dev/null +++ b/test/tcl/rep008.tcl @@ -0,0 +1,153 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep008 +# TEST Replication, back up and synchronizing +# TEST +# TEST Run a modified version of test001 in a replicated master +# TEST environment. +# TEST Close master and client. +# TEST Copy the master log to the client. +# TEST Clean the master. +# TEST Reopen the master and client. +proc rep008 { method { niter 10 } { tnum "008" } args } { + + source ./include.tcl + global mixed_mode_logging + global repfiles_in_memory + global env_private + + # Run for btree only. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + # This test depends on copying logs, so can't be run with + # in-memory logging. + if { $mixed_mode_logging > 0 } { + puts "Rep$tnum: Skipping for mixed-mode logging." + return + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "and private env" + } + + set args [convert_args $method $args] + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + puts "Rep$tnum ($method $r):\ + Replication backup and synchronizing $msg2 $msg3." + rep008_sub $method $niter $tnum $r $args + } +} + +proc rep008_sub { method niter tnum recargs largs } { + global testdir + global util_path + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create -txn nosync $verbargs \ + -home $masterdir -errpfx MASTER $repmemargs $privargs \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create -txn nosync $verbargs \ + -home $clientdir -errpfx CLIENT $repmemargs $privargs \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Run a modified test001 in the master (and update client). + puts "\tRep$tnum.a: Running test001 in replicated env." + eval test001 $method $niter 0 0 $tnum -env $masterenv $largs + process_msgs $envlist + + puts "\tRep$tnum.b: Close client and master. Copy logs." + error_check_good client_close [$clientenv close] 0 + error_check_good master_close [$masterenv close] 0 + file copy -force $masterdir/log.0000000001 $testdir/log.save + + puts "\tRep$tnum.c: Clean master and reopen" + # + # Add sleep calls to ensure master's new log doesn't match + # its old one in the ckp timestamp. + # + tclsleep 1 + env_cleanup $masterdir + tclsleep 1 + env_cleanup $clientdir + file copy -force $testdir/log.save $clientdir/log.0000000001 + set masterenv [eval $ma_envcmd $recargs -rep_master] + error_check_good master_env [is_valid_env $masterenv] TRUE + + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + # + # We'll only catch this error if we turn off autoinit. + # Otherwise, the system will throw away everything on the + # client and resync. + # + $clientenv rep_config {autoinit off} + + # Process the messages to get them out of the db. + # + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist 0 NONE err + error_check_bad err $err 0 + error_check_good errchk [is_substr $err "DB_REP_JOIN_FAILURE"] 1 + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep009.tcl b/test/tcl/rep009.tcl new file mode 100644 index 00000000..24a56075 --- /dev/null +++ b/test/tcl/rep009.tcl @@ -0,0 +1,206 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep009 +# TEST Replication and DUPMASTERs +# TEST Run test001 in a replicated environment. +# TEST +# TEST Declare one of the clients to also be a master. +# TEST Close a client, clean it and then declare it a 2nd master. +proc rep009 { method { niter 10 } { tnum "009" } args } { + + source ./include.tcl + global repfiles_in_memory + global env_private + + # Run for btree only. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep009: Skipping for method $method." + return + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "and private env" + } + + set logsets [create_logsets 3] + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts -nonewline "Rep$tnum ($r):" + puts "Replication DUPMASTER test $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client1 logs are [lindex $l 1]" + puts "Rep$tnum: Client2 logs are [lindex $l 2]" + rep009_sub $method $niter $tnum 0 $l $r $args + rep009_sub $method $niter $tnum 1 $l $r $args + } + } +} + +proc rep009_sub { method niter tnum clean logset recargs largs } { + global testdir + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR.2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + set m_logtype [lindex $logset 0] + set m_logargs [adjust_logargs $m_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + + set c_logtype [lindex $logset 1] + set c_logargs [adjust_logargs $c_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + set c2_logtype [lindex $logset 2] + set c2_logargs [adjust_logargs $c2_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \ + -home $masterdir $verbargs -errpfx MASTER $repmemargs \ + $privargs -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client. + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $c_logargs \ + -home $clientdir $verbargs -errpfx CLIENT1 $repmemargs \ + $privargs -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Open a second client. + repladd 3 + set cl2_envcmd "berkdb_env_noerr -create $c2_txnargs $c2_logargs \ + -home $clientdir2 $verbargs -errpfx CLIENT2 $repmemargs \ + $privargs -rep_transport \[list 3 replsend\]" + set cl2env [eval $cl2_envcmd $recargs -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2} {$cl2env 3}" + process_msgs $envlist + + # Run a modified test001 in the master (and update client). + puts "\tRep$tnum.a: Running test001 in replicated env." + eval test001 $method $niter 0 0 $tnum -env $masterenv $largs + process_msgs $envlist + + puts "\tRep$tnum.b: Declare a client to be a master." + if { $clean } { + error_check_good clientenv_close [$clientenv close] 0 + env_cleanup $clientdir + set clientenv [eval $cl_envcmd $recargs -rep_master] + error_check_good client_env [is_valid_env $clientenv] TRUE + } else { + error_check_good client_master [$clientenv rep_start -master] 0 + } + + # + # Process the messages to get them out of the db. + # + for { set i 1 } { $i <= 3 } { incr i } { + set seen_dup($i) 0 + } + while { 1 } { + set nproced 0 + + incr nproced [replprocessqueue \ + $masterenv 1 0 NONE dup1 err1] + incr nproced [replprocessqueue \ + $clientenv 2 0 NONE dup2 err2] + incr nproced [replprocessqueue \ + $cl2env 3 0 NONE dup3 err3] + if { $dup1 != 0 } { + set seen_dup(1) 1 + error_check_good downgrade1 \ + [$masterenv rep_start -client] 0 + } + if { $dup2 != 0 } { + set seen_dup(2) 1 + error_check_good downgrade1 \ + [$clientenv rep_start -client] 0 + } + # + # We might get errors after downgrading as the former + # masters might get old messages from other clients. + # If we get an error make sure it is after downgrade. + if { $err1 != 0 } { + error_check_good seen_dup1_err $seen_dup(1) 1 + error_check_good err1str [is_substr \ + $err1 "invalid argument"] 1 + } + if { $err2 != 0 } { + error_check_good seen_dup2_err $seen_dup(2) 1 + error_check_good err2str [is_substr \ + $err2 "invalid argument"] 1 + } + # + # This should never happen. We'll check below. + # + if { $dup3 != 0 } { + set seen_dup(3) 1 + } + + if { $nproced == 0 } { + break + } + } + error_check_good seen_dup1 $seen_dup(1) 1 + error_check_good seen_dup2 $seen_dup(2) 1 + error_check_bad seen_dup3 $seen_dup(3) 1 + + puts "\tRep$tnum.c: Close environments" + error_check_good master_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + error_check_good cl2_close [$cl2env close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep010.tcl b/test/tcl/rep010.tcl new file mode 100644 index 00000000..c9182472 --- /dev/null +++ b/test/tcl/rep010.tcl @@ -0,0 +1,274 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep010 +# TEST Replication and ISPERM +# TEST +# TEST With consecutive message processing, make sure every +# TEST DB_REP_PERMANENT is responded to with an ISPERM when +# TEST processed. With gaps in the processing, make sure +# TEST every DB_REP_PERMANENT is responded to with an ISPERM +# TEST or a NOTPERM. Verify in both cases that the LSN returned +# TEST with ISPERM is found in the log. +proc rep010 { method { niter 100 } { tnum "010" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Run for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set msg "with on-disk databases" + if { $databases_in_memory } { + set msg "with named in-memory databases" + if { [is_queueext $method] == 1 } { + puts "Skipping rep$tnum for method $method" + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "and private env" + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r): Replication and ISPERM" + puts "Rep$tnum: with $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep010_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep010_sub { method niter tnum logset recargs largs } { + source ./include.tcl + global rand_init + berkdb srand $rand_init + global perm_sent_list + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + set omethod [convert_method $method] + + replsetup $testdir/MSGQUEUEDIR + set perm_sent_list {{}} + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ + $m_logargs $verbargs -errpfx MASTER $repmemargs \ + $privargs -home $masterdir $m_txnargs -rep_master \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M) $recargs] + + # Open a client + repladd 2 + set env_cmd(C) "berkdb_env_noerr -create \ + $c_txnargs $c_logargs $verbargs -rep_client -errpfx CLIENT \ + $privargs -home $clientdir $repmemargs \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $env_cmd(C) $recargs] + + # Bring the client online. Since that now involves internal init, we + # have to avoid the special rep010_process_msgs here, because otherwise + # we would hang trying to open a log cursor. + # + process_msgs "{$masterenv 1} {$clientenv 2}" + + # Open database in master, propagate to client. + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname test.db + } + set db1 [eval {berkdb_open_noerr -create} $omethod -auto_commit \ + -env $masterenv $largs $dbname] + rep010_process_msgs $masterenv $clientenv 1 + + puts "\tRep$tnum.a: Process messages with no gaps." + # Feed operations one at a time to master and immediately + # update client. + for { set i 1 } { $i <= $niter } { incr i } { + set t [$masterenv txn] + error_check_good db_put \ + [eval $db1 put -txn $t $i [chop_data $method data$i]] 0 + error_check_good txn_commit [$t commit] 0 + rep010_process_msgs $masterenv $clientenv 1 + } + + # Replace data. + for { set i 1 } { $i <= $niter } { incr i } { + set t [$masterenv txn] + set ret \ + [$db1 get -get_both -txn $t $i [pad_data $method data$i]] + error_check_good db_put \ + [$db1 put -txn $t $i [chop_data $method newdata$i]] 0 + error_check_good txn_commit [$t commit] 0 + rep010_process_msgs $masterenv $clientenv 1 + } + + # Try some aborts. These do not write permanent messages. + for { set i 1 } { $i <= $niter } { incr i } { + set t [$masterenv txn] + error_check_good db_put [$db1 put -txn $t $i abort$i] 0 + error_check_good txn_abort [$t abort] 0 + rep010_process_msgs $masterenv $clientenv 0 + } + + puts "\tRep$tnum.b: Process messages with gaps." + # To test gaps in message processing, run and commit a whole + # bunch of transactions, then process the messages with skips. + for { set i 1 } { $i <= $niter } { incr i } { + set t [$masterenv txn] + error_check_good db_put [$db1 put -txn $t $i data$i] 0 + error_check_good txn_commit [$t commit] 0 + } + set skip [berkdb random_int 2 8] + rep010_process_msgs $masterenv $clientenv 1 $skip + + check_db_location $masterenv + check_db_location $clientenv + + # Clean up. + error_check_good db1_close [$db1 close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + + replclose $testdir/MSGQUEUEDIR +} + +proc rep010_process_msgs { masterenv clientenv check {skip_interval 0} } { + global perm_response_list + global perm_sent_list + + set perm_response_list {{}} + + while { 1 } { + set nproced 0 + + incr nproced [replprocessqueue $masterenv 1 $skip_interval] + incr nproced [replprocessqueue $clientenv 2 $skip_interval] + + # In this test, the ISPERM and NOTPERM messages are + # sent by the client back to the master. Verify that we + # get ISPERM when the client is caught up to the master + # (i.e. last client LSN in the log matches the LSN returned + # with the ISPERM), and that when we get NOTPERM, the client + # is not caught up. + + # Create a list of the LSNs in the client log. + set lsnlist {} + set logc [$clientenv log_cursor] + error_check_good logc \ + [is_valid_logc $logc $clientenv] TRUE + for { set logrec [$logc get -first] } \ + { [llength $logrec] != 0 } \ + { set logrec [$logc get -next] } { + lappend lsnlist [lindex [lindex $logrec 0] 1] + } + set lastloglsn [lindex $lsnlist end] + + # Parse perm_response_list to find the LSN returned with + # ISPERM or NOTPERM. + set response [lindex $perm_response_list end] + set permtype [lindex $response 0] + set messagelsn [lindex [lindex $response 1] 1] + + if { [llength $response] != 0 } { + if { $permtype == "NOTPERM" } { + # If we got a NOTPERM, the returned LSN has to + # be greater than the last LSN in the log. + error_check_good notpermlsn \ + [expr $messagelsn > $lastloglsn] 1 + } elseif { $permtype == "ISPERM" } { + # If we got an ISPERM, the returned LSN has to + # be in the log. + error_check_bad \ + ispermlsn [lsearch $lsnlist $messagelsn] -1 + } else { + puts "FAIL: unexpected message type $permtype" + } + } + + error_check_good logc_close [$logc close] 0 + + # If we've finished processing all the messages, check + # that the last received permanent message LSN matches the + # last sent permanent message LSN. + if { $nproced == 0 } { + if { $check != 0 } { + set last_sent [lindex $perm_sent_list end] + set last_rec_msg \ + [lindex $perm_response_list end] + set last_received [lindex $last_rec_msg 1] + error_check_good last_message \ + $last_sent $last_received + } + + # If we check correctly; empty out the lists + set perm_response_list {{}} + set perm_sent_list {{}} + break + } + } +} diff --git a/test/tcl/rep011.tcl b/test/tcl/rep011.tcl new file mode 100644 index 00000000..06f7d9d7 --- /dev/null +++ b/test/tcl/rep011.tcl @@ -0,0 +1,203 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep011 +# TEST Replication: test open handle across an upgrade. +# TEST +# TEST Open and close test database in master environment. +# TEST Update the client. Check client, and leave the handle +# TEST to the client open as we close the masterenv and upgrade +# TEST the client to master. Reopen the old master as client +# TEST and catch up. Test that we can still do a put to the +# TEST handle we created on the master while it was still a +# TEST client, and then make sure that the change can be +# TEST propagated back to the new client. + +proc rep011 { method { tnum "011" } args } { + global has_crypto + global passwd + global repfiles_in_memory + global env_private + + source ./include.tcl + + # Run for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "and private env" + } + + set logsets [create_logsets 2] + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + set envargs "" + puts "Rep$tnum.a ($r $envargs $method):\ + Test upgrade of open handles $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep011_sub $method $tnum $envargs $l $r $args + + if { $has_crypto == 0 } { + continue + } + append envargs " -encryptaes $passwd " + append args " -encrypt " + + puts "Rep$tnum.b ($r $envargs):\ + Open handle upgrade test with encryption ($method)." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep011_sub $method $tnum $envargs $l $r $args + } + } +} + +proc rep011_sub { method tnum envargs logset recargs largs } { + source ./include.tcl + global testdir + global encrypt + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ + $m_logargs $envargs $verbargs -home $masterdir $repmemargs \ + $privargs $m_txnargs -errpfx MASTER -rep_master \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M) $recargs] + + # Open a client + repladd 2 + set env_cmd(C) "berkdb_env_noerr -create \ + $c_logargs $envargs $verbargs -home $clientdir $repmemargs \ + $privargs $c_txnargs -errpfx CLIENT -rep_client \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $env_cmd(C) $recargs] + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Open a test database on the master so we can test having + # handles open across an upgrade. + puts "\tRep$tnum.a:\ + Opening test database for post-upgrade client logging test." + set master_upg_db [berkdb_open_noerr \ + -create -auto_commit -btree -env $masterenv rep$tnum-upg.db] + set puttxn [$masterenv txn] + error_check_good master_upg_db_put \ + [$master_upg_db put -txn $puttxn hello world] 0 + error_check_good puttxn_commit [$puttxn commit] 0 + error_check_good master_upg_db_close [$master_upg_db close] 0 + + # Update the client. + process_msgs $envlist + + # Open the cross-upgrade database on the client and check its contents. + set client_upg_db [berkdb_open_noerr \ + -create -auto_commit -btree -env $clientenv rep$tnum-upg.db] + error_check_good client_upg_db_get [$client_upg_db get hello] \ + [list [list hello world]] + # !!! We use this handle later. Don't close it here. + + # Close master. + puts "\tRep$tnum.b: Close master." + error_check_good masterenv_close [$masterenv close] 0 + + puts "\tRep$tnum.c: Upgrade client." + set newmasterenv $clientenv + error_check_good upgrade_client [$newmasterenv rep_start -master] 0 + + puts "\tRep$tnum.d: Reopen old master as client and catch up." + set newclientenv [eval {berkdb_env_noerr -create -recover} $envargs \ + -txn nosync -errpfx NEWCLIENT $verbargs $privargs \ + {-home $masterdir -rep_client -rep_transport [list 1 replsend]}] + set envlist "{$newclientenv 1} {$newmasterenv 2}" + process_msgs $envlist + + # Test put to the database handle we opened back when the new master + # was a client. + puts "\tRep$tnum.e: Test put to handle opened before upgrade." + set puttxn [$newmasterenv txn] + error_check_good client_upg_db_put \ + [$client_upg_db put -txn $puttxn hello there] 0 + error_check_good puttxn_commit [$puttxn commit] 0 + process_msgs $envlist + + # Close the new master's handle for the upgrade-test database; we + # don't need it. Then check to make sure the client did in fact + # update the database. + puts "\tRep$tnum.f: Test that client did update the database." + error_check_good client_upg_db_close [$client_upg_db close] 0 + set newclient_upg_db \ + [berkdb_open_noerr -env $newclientenv rep$tnum-upg.db] + error_check_good newclient_upg_db_get [$newclient_upg_db get hello] \ + [list [list hello there]] + error_check_good newclient_upg_db_close [$newclient_upg_db close] 0 + + error_check_good newmasterenv_close [$newmasterenv close] 0 + error_check_good newclientenv_close [$newclientenv close] 0 + + if { [lsearch $envargs "-encrypta*"] !=-1 } { + set encrypt 1 + } + error_check_good verify \ + [verify_dir $clientdir "\tRep$tnum.g: " 0 0 1] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep012.tcl b/test/tcl/rep012.tcl new file mode 100644 index 00000000..065a5053 --- /dev/null +++ b/test/tcl/rep012.tcl @@ -0,0 +1,299 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep012 +# TEST Replication and dead DB handles. +# TEST +# TEST Run a modified version of test001 in a replicated master env. +# TEST Run in replicated environment with secondary indices too. +# TEST Make additional changes to master, but not to the client. +# TEST Downgrade the master and upgrade the client with open db handles. +# TEST Verify that the roll back on clients gives dead db handles. +proc rep012 { method { niter 10 } { tnum "012" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Run for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 3] + + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "and private env" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r):\ + Replication and dead db handles $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client 0 logs are [lindex $l 1]" + puts "Rep$tnum: Client 1 logs are [lindex $l 2]" + rep012_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep012_sub { method niter tnum logset recargs largs } { + global testdir + global databases_in_memory + global repfiles_in_memory + global env_private + global verbose_check_secondaries + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + set orig_tdir $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR.2 + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set c2_logtype [lindex $logset 2] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set c2_logargs [adjust_logargs $c2_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $m_logargs -errpfx ENV0 $verbargs $repmemargs $privargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set env0 [eval $ma_envcmd $recargs -rep_master] + set masterenv $env0 + + # Open two clients + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs \ + $c_logargs -errpfx ENV1 $verbargs $repmemargs $privargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set env1 [eval $cl_envcmd $recargs -rep_client] + set clientenv $env1 + + repladd 3 + set cl2_envcmd "berkdb_env_noerr -create $c2_txnargs \ + $c2_logargs -errpfx ENV2 $verbargs $repmemargs $privargs \ + -home $clientdir2 -rep_transport \[list 3 replsend\]" + set cl2env [eval $cl2_envcmd $recargs -rep_client] + + if { $databases_in_memory } { + set testfile { "" test$tnum.db } + set pname { "" primary$tnum.db } + set sname { "" secondary$tnum.db } + } else { + set testfile "test$tnum.db" + set pname "primary$tnum.db" + set sname "secondary$tnum.db" + } + set omethod [convert_method $method] + set env0db [eval {berkdb_open_noerr -env $env0 -auto_commit \ + -create -mode 0644} $largs $omethod $testfile] + error_check_good dbopen [is_valid_db $env0db] TRUE + set masterdb $env0db + + set do_secondary 0 + if { [is_btree $method] || [is_hash $method] } { + set do_secondary 1 + # Open the primary + set mpdb [eval {berkdb_open_noerr -env $env0 -auto_commit \ + -create -mode 0644} $largs $omethod $pname] + error_check_good dbopen [is_valid_db $mpdb] TRUE + + # Open the secondary + # Open a 2nd handle to the same secondary + set msdb [eval {berkdb_open_noerr -env $env0 -auto_commit \ + -create -mode 0644} $largs $omethod $sname] + error_check_good dbopen [is_valid_db $msdb] TRUE + error_check_good associate [$mpdb associate \ + [callback_n 0] $msdb] 0 + } + + # Bring the clients online by processing the startup messages. + set envlist "{$env0 1} {$env1 2} {$cl2env 3}" + process_msgs $envlist + + set env1db [eval {berkdb_open_noerr -env $env1 -auto_commit \ + -mode 0644} $largs $omethod $testfile] + set clientdb $env1db + error_check_good dbopen [is_valid_db $env1db] TRUE + set env2db [eval {berkdb_open_noerr -env $cl2env -auto_commit \ + -mode 0644} $largs $omethod $testfile] + error_check_good dbopen [is_valid_db $env2db] TRUE + + # Run a modified test001 in the master (and update clients). + puts "\tRep$tnum.a.0: Running rep_test in replicated env." + eval rep_test $method $masterenv $masterdb $niter 0 0 0 $largs + process_msgs $envlist + + if { $do_secondary } { + # Put some data into the primary + puts "\tRep$tnum.a.1: Putting primary/secondary data on master." + eval rep012_sec $method $mpdb $niter keys data + process_msgs $envlist + + set verbose_check_secondaries 1 + check_secondaries $mpdb $msdb $niter keys data "Rep$tnum.b" + } else { + puts "\tRep$tnum.b: Skipping secondaries for method $method" + } + + # Check that databases are in-memory or on-disk as expected. + # We can only check the secondaries if secondaries are allowed for + # this access method. + set names [list $testfile] + if { $do_secondary } { + lappend names $pname $sname + } + foreach name $names { + eval check_db_location $masterenv $name + eval check_db_location $clientenv $name + eval check_db_location $cl2env $name + } + + puts "\tRep$tnum.c: Run test in master and client 2 only" + set nstart $niter + eval rep_test\ + $method $masterenv $masterdb $niter $nstart $nstart 0 $largs + + # Ignore messages for $env1. + set envlist "{$env0 1} {$cl2env 3}" + process_msgs $envlist + + # Nuke those for client about to become master. + replclear 2 + tclsleep 3 + puts "\tRep$tnum.d: Swap envs" + set tmp $masterenv + set masterenv $clientenv + set clientenv $tmp + error_check_good downgrade [$clientenv rep_start -client] 0 + error_check_good upgrade [$masterenv rep_start -master] 0 + set envlist "{$env0 1} {$env1 2} {$cl2env 3}" + process_msgs $envlist + + # + # At this point, env0 should have rolled back across a txn commit. + # If we do any operation on env0db, we should get an error that + # the handle is dead. + puts "\tRep$tnum.e: Try to access db handle after rollback" + set stat1 [catch {$env0db stat} ret1] + error_check_good stat1 $stat1 1 + error_check_good dead1 [is_substr $ret1 DB_REP_HANDLE_DEAD] 1 + + set stat3 [catch {$env2db stat} ret3] + error_check_good stat3 $stat3 1 + error_check_good dead3 [is_substr $ret3 DB_REP_HANDLE_DEAD] 1 + + if { $do_secondary } { + # + # Check both secondary get and close to detect DEAD_HANDLE. + # + puts "\tRep$tnum.f: Try to access secondary db handles after rollback" + set verbose_check_secondaries 1 + check_secondaries $mpdb $msdb $niter \ + keys data "Rep$tnum.f" errp errs errsg + error_check_good deadp [is_substr $errp DB_REP_HANDLE_DEAD] 1 + error_check_good deads [is_substr $errs DB_REP_HANDLE_DEAD] 1 + error_check_good deadsg [is_substr $errsg DB_REP_HANDLE_DEAD] 1 + puts "\tRep$tnum.g: Closing" + error_check_good mpdb [$mpdb close] 0 + error_check_good msdb [$msdb close] 0 + } else { + puts "\tRep$tnum.f: Closing" + } + + error_check_good env0db [$env0db close] 0 + error_check_good env1db [$env1db close] 0 + error_check_good cl2db [$env2db close] 0 + error_check_good env0_close [$env0 close] 0 + error_check_good env1_close [$env1 close] 0 + error_check_good cl2_close [$cl2env close] 0 + replclose $testdir/MSGQUEUEDIR + set verbose_check_secondaries 0 + set testdir $orig_tdir + return +} + +proc rep012_sec {method pdb niter keysp datap} { + source ./include.tcl + + upvar $keysp keys + upvar $datap data + set did [open $dict] + for { set n 0 } { [gets $did str] != -1 && $n < $niter } { incr n } { + if { [is_record_based $method] == 1 } { + set key [expr $n + 1] + set datum $str + } else { + set key $str + gets $did datum + } + set keys($n) $key + set data($n) [pad_data $method $datum] + + set ret [$pdb put $key [chop_data $method $datum]] + error_check_good put($n) $ret 0 + } + close $did +} diff --git a/test/tcl/rep013.tcl b/test/tcl/rep013.tcl new file mode 100644 index 00000000..bdc5d968 --- /dev/null +++ b/test/tcl/rep013.tcl @@ -0,0 +1,307 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep013 +# TEST Replication and swapping master/clients with open dbs. +# TEST +# TEST Run a modified version of test001 in a replicated master env. +# TEST Make additional changes to master, but not to the client. +# TEST Swap master and client. +# TEST Verify that the roll back on clients gives dead db handles. +# TEST Rerun the test, turning on client-to-client synchronization. +# TEST Swap and verify several times. +proc rep013 { method { niter 10 } { tnum "013" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Run for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 3] + + # Set up named in-memory database testing. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases" + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "and private env" + } + + # Run the body of the test with and without recovery. + set anyopts { "" "anywhere" } + foreach r $test_recopts { + foreach l $logsets { + foreach a $anyopts { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($r $a): Replication and \ + ($method) master/client swapping \ + $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client 0 logs are [lindex $l 1]" + puts "Rep$tnum: Client 1 logs are [lindex $l 2]" + rep013_sub $method $niter $tnum $l $r $a $args + } + } + } +} + +proc rep013_sub { method niter tnum logset recargs anyopt largs } { + global testdir + global anywhere + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + set orig_tdir $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR.2 + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + if { $anyopt == "anywhere" } { + set anywhere 1 + } else { + set anywhere 0 + } + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set c2_logtype [lindex $logset 2] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set c2_logargs [adjust_logargs $c2_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + + # Set number of swaps between master and client. + set nswap 6 + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $m_logargs -errpfx ENV1 $verbargs $repmemargs \ + $privargs -cachesize {0 4194304 3} \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set env1 [eval $ma_envcmd $recargs -rep_master] + + # Open two clients + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs \ + $c_logargs -errpfx ENV2 $verbargs $repmemargs \ + $privargs -cachesize {0 2097152 2} \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set env2 [eval $cl_envcmd $recargs -rep_client] + + repladd 3 + set cl2_envcmd "berkdb_env_noerr -create $c2_txnargs \ + $c2_logargs -errpfx ENV3 $verbargs $repmemargs \ + $privargs -cachesize {0 1048576 1} \ + -home $clientdir2 -rep_transport \[list 3 replsend\]" + set cl2env [eval $cl2_envcmd $recargs -rep_client] + + # Set database name for in-memory or on-disk. + if { $databases_in_memory } { + set testfile { "" "test.db" } + } else { + set testfile "test.db" + } + + set omethod [convert_method $method] + + set env1db_cmd "berkdb_open_noerr -env $env1 -auto_commit \ + -create -mode 0644 $largs $omethod $testfile" + set env1db [eval $env1db_cmd] + error_check_good dbopen [is_valid_db $env1db] TRUE + + # + # Verify that a client creating a database gets an error. + # + set stat [catch {berkdb_open_noerr -env $env2 -auto_commit \ + -create -mode 0644 $largs $omethod $testfile} ret] + error_check_good create_cl $stat 1 + error_check_good cr_str [is_substr $ret "no such file"] 1 + + # Bring the clients online by processing the startup messages. + set envlist "{$env1 1} {$env2 2} {$cl2env 3}" + process_msgs $envlist + + set env2db_cmd "berkdb_open_noerr -env $env2 -auto_commit \ + -mode 0644 $largs $omethod $testfile" + set env2db [eval $env2db_cmd] + error_check_good dbopen [is_valid_db $env2db] TRUE + set env3db_cmd "berkdb_open_noerr -env $cl2env -auto_commit \ + -mode 0644 $largs $omethod $testfile" + set env3db [eval $env3db_cmd] + error_check_good dbopen [is_valid_db $env3db] TRUE + + # + # Set up all the master/client data we're going to need + # to keep track of and swap. + # + set masterenv $env1 + set masterdb $env1db + set mid 1 + set clientenv $env2 + set clientdb $env2db + set cid 2 + set mdb_cmd "berkdb_open_noerr -env $masterenv -auto_commit \ + -mode 0644 $largs $omethod $testfile" + set cdb_cmd "berkdb_open_noerr -env $clientenv -auto_commit \ + -mode 0644 $largs $omethod $testfile" + + # Run a modified test001 in the master (and update clients). + puts "\tRep$tnum.a: Running test001 in replicated env." + eval rep_test $method $masterenv $masterdb $niter 0 0 0 $largs + set envlist "{$env1 1} {$env2 2} {$cl2env 3}" + process_msgs $envlist + + set nstart 0 + for { set i 0 } { $i < $nswap } { incr i } { + puts "\tRep$tnum.b.$i: Check for bad db handles" + set dbl {masterdb clientdb env3db} + set dbcmd {$mdb_cmd $cdb_cmd $env3db_cmd} + + set stat [catch {$masterdb stat} ret] + if { $stat == 1 } { + error_check_good dead [is_substr $ret \ + DB_REP_HANDLE_DEAD] 1 + error_check_good close [$masterdb close] 0 + set masterdb [eval $mdb_cmd] + error_check_good dbopen [is_valid_db $masterdb] TRUE + } + + set stat [catch {$clientdb stat} ret] + if { $stat == 1 } { + error_check_good dead [is_substr $ret \ + DB_REP_HANDLE_DEAD] 1 + error_check_good close [$clientdb close] 0 + set clientdb [eval $cdb_cmd] + error_check_good dbopen [is_valid_db $clientdb] TRUE + } + + set stat [catch {$env3db stat} ret] + if { $stat == 1 } { + error_check_good dead [is_substr $ret \ + DB_REP_HANDLE_DEAD] 1 + error_check_good close [$env3db close] 0 + set env3db [eval $env3db_cmd] + error_check_good dbopen [is_valid_db $env3db] TRUE + } + + set nstart [expr $nstart + $niter] + puts "\tRep$tnum.c.$i: Run test in master and client2 only" + eval rep_test \ + $method $masterenv $masterdb $niter $nstart $nstart 0 $largs + set envlist "{$masterenv $mid} {$cl2env 3}" + process_msgs $envlist + + # Nuke those for client about to become master. + replclear $cid + + # Swap all the info we need. + set tmp $masterenv + set masterenv $clientenv + set clientenv $tmp + + set tmp $masterdb + set masterdb $clientdb + set clientdb $tmp + + set tmp $mid + set mid $cid + set cid $tmp + + set tmp $mdb_cmd + set mdb_cmd $cdb_cmd + set cdb_cmd $tmp + + puts "\tRep$tnum.d.$i: Swap: master $mid, client $cid" + error_check_good downgrade [$clientenv rep_start -client] 0 + error_check_good upgrade [$masterenv rep_start -master] 0 + set envlist "{$env1 1} {$env2 2} {$cl2env 3}" + process_msgs $envlist + } + puts "\tRep$tnum.e: Check message handling of client." + set req3 [stat_field $cl2env rep_stat "Client service requests"] + set rereq1 [stat_field $env1 rep_stat "Client rerequests"] + set rereq2 [stat_field $env2 rep_stat "Client rerequests"] + if { $anyopt == "anywhere" } { + error_check_bad req $req3 0 + error_check_bad rereq1 $rereq1 0 + error_check_bad rereq2 $rereq2 0 + } else { + error_check_good req $req3 0 + error_check_good rereq1 $rereq1 0 + error_check_good rereq2 $rereq2 0 + } + + # Check that databases are in-memory or on-disk as expected. + check_db_location $env1 + check_db_location $env2 + check_db_location $cl2env + + puts "\tRep$tnum.f: Closing" + error_check_good masterdb [$masterdb close] 0 + error_check_good clientdb [$clientdb close] 0 + error_check_good cl2db [$env3db close] 0 + error_check_good env1_close [$env1 close] 0 + error_check_good env2_close [$env2 close] 0 + error_check_good cl2_close [$cl2env close] 0 + replclose $testdir/MSGQUEUEDIR + set testdir $orig_tdir + set anywhere 0 + return +} diff --git a/test/tcl/rep014.tcl b/test/tcl/rep014.tcl new file mode 100644 index 00000000..f7a5ef7f --- /dev/null +++ b/test/tcl/rep014.tcl @@ -0,0 +1,209 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep014 +# TEST Replication and multiple replication handles. +# TEST Test multiple client handles, opening and closing to +# TEST make sure we get the right openfiles. +# +proc rep014 { method { niter 10 } { tnum "014" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Run for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + # We can't open two envs on HP-UX, so just skip the + # whole test since that is at the core of it. + if { $is_hp_test == 1 } { + puts "Rep$tnum: Skipping for HP-UX." + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r): Replication\ + and openfiles $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep014_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep014_sub { method niter tnum logset recargs largs } { + global testdir + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + set orig_tdir $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \ + $verbargs -errpfx MASTER -home $masterdir $repmemargs \ + $privargs -rep_transport \[list 1 replsend\]" + set env0 [eval $ma_envcmd $recargs -rep_master] + set masterenv $env0 + + # Open a client. + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $c_logargs \ + $verbargs -errpfx CLIENT1 -home $clientdir $repmemargs \ + $privargs -rep_transport \[list 2 replsend\]" + set env1 [eval $cl_envcmd $recargs] + error_check_good client_env [is_valid_env $env1] TRUE + set env2 [eval $cl_envcmd] + error_check_good client_env [is_valid_env $env2] TRUE + + error_check_good e1_cl [$env1 rep_start -client] 0 + + # Set up databases for in-memory or on-disk. + if { $databases_in_memory } { + set testfile { "" "test.db" } + } else { + set testfile "test.db" + } + + set omethod [convert_method $method] + set env0db [eval {berkdb_open_noerr -env $env0 -auto_commit \ + -create -mode 0644} $largs $omethod $testfile] + set masterdb $env0db + error_check_good dbopen [is_valid_db $env0db] TRUE + + # Bring the clients online by processing the startup messages. + set envlist "{$env0 1} {$env1 2}" + process_msgs $envlist + + # Run a modified test001 in the master (and update clients). + puts "\tRep$tnum.a: Running test001 in replicated env." + eval rep_test $method $masterenv $masterdb $niter 0 0 0 $largs + process_msgs $envlist + + puts "\tRep$tnum.b: Close and reopen client env." + error_check_good env1_close [$env1 close] 0 + set env1 [eval $cl_envcmd] + error_check_good client_env [is_valid_env $env1] TRUE + error_check_good e1_cl [$env1 rep_start -client] 0 + + puts "\tRep$tnum.c: Run test in master again." + set start $niter + eval rep_test $method $masterenv $masterdb $niter $start 0 0 $largs + set envlist "{$env0 1} {$env1 2}" + process_msgs $envlist + + puts "\tRep$tnum.d: Start and close 2nd client env." + error_check_good e2_pfx [$env2 errpfx CLIENT2] 0 + error_check_good e2_cl [$env2 rep_start -client] 0 + error_check_good env2_close [$env2 close] 0 + + puts "\tRep$tnum.e: Run test in master again." + set start [expr $start + $niter] + error_check_good e1_pfx [$env1 errpfx CLIENT1] 0 + eval rep_test $method $masterenv $masterdb $niter $start 0 0 $largs + process_msgs $envlist + + puts "\tRep$tnum.f: Open env2, close env1, use env2." + set env2 [eval $cl_envcmd] + error_check_good client_env [is_valid_env $env2] TRUE + error_check_good e1_pfx [$env2 errpfx CLIENT2] 0 + error_check_good e2_cl [$env2 rep_start -client] 0 + error_check_good e1_pfx [$env1 errpfx CLIENT1] 0 + + # Check for on-disk or in-memory while we have all 3 envs. + check_db_location $masterenv + check_db_location $env1 + check_db_location $env2 + + error_check_good env1_close [$env1 close] 0 + + puts "\tRep$tnum.g: Run test in master again." + set start [expr $start + $niter] + error_check_good e1_pfx [$env2 errpfx CLIENT2] 0 + eval rep_test $method $masterenv $masterdb $niter $start 0 0 $largs + set envlist "{$env0 1} {$env2 2}" + process_msgs $envlist + + puts "\tRep$tnum.h: Closing" + error_check_good env0db [$env0db close] 0 + error_check_good env0_close [$env0 close] 0 + error_check_good env2_close [$env2 close] 0 + replclose $testdir/MSGQUEUEDIR + set testdir $orig_tdir + return +} diff --git a/test/tcl/rep015.tcl b/test/tcl/rep015.tcl new file mode 100644 index 00000000..3a2d998d --- /dev/null +++ b/test/tcl/rep015.tcl @@ -0,0 +1,329 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep015 +# TEST Locking across multiple pages with replication. +# TEST +# TEST Open master and client with small pagesize and +# TEST generate more than one page and generate off-page +# TEST dups on the first page (second key) and last page +# TEST (next-to-last key). +# TEST Within a single transaction, for each database, open +# TEST 2 cursors and delete the first and last entries (this +# TEST exercises locks on regular pages). Intermittently +# TEST update client during the process. +# TEST Within a single transaction, for each database, open +# TEST 2 cursors. Walk to the off-page dups and delete one +# TEST from each end (this exercises locks on off-page dups). +# TEST Intermittently update client. +# +proc rep015 { method { nentries 100 } { tnum "015" } { ndb 3 } args } { + global repfiles_in_memory + global env_private + global rand_init + berkdb srand $rand_init + + source ./include.tcl + + # Run for btree only. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Skipping rep$tnum for method $method." + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "and private env" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: \ + Skipping for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r):\ + Replication and locking $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep015_sub $method $nentries $tnum $ndb $l $r $args + } + } +} + +proc rep015_sub { method nentries tnum ndb logset recargs largs } { + global testdir + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + set omethod [convert_method $method] + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \ + $verbargs -errpfx MASTER $repmemargs $privargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $c_logargs \ + $verbargs -errpfx CLIENT $repmemargs $privargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Set up the master databases. The small pagesize quickly + # generates multiple pages and off-page dups. + set pagesize 512 + puts "\tRep$tnum.a: Create and populate databases in master." + for { set i 0 } { $i < $ndb } { incr i } { + set db [eval berkdb_open_noerr -create $omethod -auto_commit \ + -pagesize $pagesize -env $masterenv $largs -dup testdb$i.db] + set dblist($i) $db + # + # Populate, being sure to create multiple pages. + # The non-duplicate entries are pairs of the form + # {1, data1} {2, data2}. The duplicates are pairs of + # the form {2, dup1} {2, dup2}, {2, dup3}, etc. + # + for { set j 1 } { $j <= $nentries } { incr j } { + set t [$masterenv txn] + error_check_good put_$db [eval $db put -txn $t \ + $j [chop_data $method data$j]] 0 + error_check_good txn_commit [$t commit] 0 + } + # Create off-page dups on key 2 and next-to-last key. + set t [$masterenv txn] + for { set j 1 } { $j <= $nentries } { incr j } { + error_check_good put_second [eval $db put -txn $t \ + 2 [chop_data $method dup$j]] 0 + error_check_good put_next_to_last [eval $db put \ + -txn $t \ + [expr $nentries - 1] [chop_data $method dup$j]] 0 + } + error_check_good txn_commit [$t commit] 0 + # Make sure there are off-page dups. + set stat [$db stat] + error_check_bad stat:offpage \ + [is_substr $stat "{{Internal pages} 0}"] 1 + } + + puts "\tRep$tnum.b: Propagate setup to clients." + process_msgs $envlist + + # Open client databases so we can exercise locking there too. + for { set i 0 } { $i < $ndb } { incr i } { + set cdb [eval {berkdb_open_noerr} -auto_commit \ + -env $clientenv $largs testdb$i.db] + set cdblist($i) $cdb + } + + # Set up two cursors into each db. Randomly select a cursor + # and do the next thing: position, delete, or close. + foreach option { regular off-page } { + puts "\tRep$tnum.c: Transactional cursor deletes ($option)." + + set t [$masterenv txn] + # Set up two cursors into each db, and initialize the next + # action to be done to POSITION. + for { set i 0 } { $i < [expr $ndb * 2] } { incr i } { + set db $dblist([expr $i / 2]) + set mcurs($i) [eval {$db cursor} -txn $t] + error_check_good mcurs$i \ + [is_valid_cursor $mcurs($i) $db] TRUE + set cnext($i) POSITION + } + + set ct [$clientenv txn] + # Set up two cursors into each client db. + for { set i 0 } { $i < [expr $ndb * 2] } { incr i } { + set cdb $cdblist([expr $i / 2]) + set ccurs($i) [eval {$cdb cursor} -txn $ct] + error_check_good ccurs$i \ + [is_valid_cursor $ccurs($i) $cdb] TRUE + } + + # Randomly pick a cursor to operate on and do the next thing. + # At POSITION, we position that cursor. At DELETE, we delete + # the current item. At CLOSE, we close the cursor. At DONE, + # we do nothing except check to see if all cursors have reached + # DONE, and quit when they have. + # On the off-page dup test, walk to reach an off-page entry, + # and delete that one. + set k 0 + while { 1 } { + # Every nth time through, update the client. +# set n 5 +# if {[expr $k % $n] == 0 } { +# puts "Updating clients" +# process_msgs $envlist +# } +# incr k + set i [berkdb random_int 0 [expr [expr $ndb * 2] - 1]] + set next $cnext($i) + switch -exact -- $next { + POSITION { + do_position $mcurs($i) \ + $i $nentries $option + set cnext($i) DELETE + # Position the client cursors too. + do_position $ccurs($i) \ + $i $nentries $option + } + DELETE { + error_check_good c_del \ + [$mcurs($i) del] 0 + set cnext($i) CLOSE + # Update clients after a delete. + process_msgs $envlist + } + CLOSE { + error_check_good c_close.$i \ + [$mcurs($i) close] 0 + set cnext($i) DONE + # Close the client cursor too. + error_check_good cc_close.$i \ + [$ccurs($i) close] 0 + } + DONE { + set breakflag 1 + for { set j 0 } \ + { $j < [expr $ndb * 2] } \ + { incr j } { + if { $cnext($j) != "DONE" } { + set breakflag 0 + } + } + if { $breakflag == 1 } { + break + } + } + default { + puts "FAIL: Unrecognized \ + next action $next" + } + } + } + error_check_good txn_commit [$t commit] 0 + error_check_good clienttxn_commit [$ct commit] 0 + process_msgs $envlist + } + + # Clean up. + for { set i 0 } { $i < $ndb } { incr i } { + set db $dblist($i) + error_check_good close_$db [$db close] 0 + set cdb $cdblist($i) + error_check_good close_$cdb [$cdb close] 0 + } + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR + return +} + +proc do_position { cursor i nentries option } { + if { [expr $i % 2] == 0 } { + if { $option == "regular" } { + set ret [$cursor get -first] + set key [lindex [lindex $ret 0] 0] + set data [lindex [lindex $ret 0] 1] + error_check_good get_first \ + [string range $data 4 end] $key + } elseif { $option == "off-page" } { + set ret [$cursor get -set 2] + error_check_good get_key_2 \ + [lindex [lindex $ret 0] 0] 2 + error_check_good get_data_2 \ + [lindex [lindex $ret 0] 1] data2 + for { set j 1 } { $j <= 95 } { incr j } { + set ret [$cursor get -nextdup] + error_check_good key_nextdup$j \ + [lindex [lindex $ret 0] 0] 2 + error_check_good data_nextdup$j \ + [lindex [lindex $ret 0] 1] dup$j + } + } + } else { + if { $option == "regular" } { + set ret [$cursor get -set $nentries] + set key [lindex [lindex $ret 0] 0] + set data [lindex [lindex $ret 0] 1] + error_check_good get_set_$nentries \ + [string range $data 4 end] $key + } elseif { $option == "off-page" } { + set ret [$cursor get -last] + set key [lindex [lindex $ret 0] 0] + set data [lindex [lindex $ret 0] 1] + error_check_good get_last \ + [string range $data 3 end] [expr $key + 1] + for { set j 1 } { $j <= 5 } { incr j } { + set ret [$cursor get -prev] + set key [lindex [lindex $ret 0] 0] + set data [lindex [lindex $ret 0] 1] + error_check_good get_prev \ + [string range $data 3 end] \ + [expr [expr $key + 1] - $j] + } + } + } +} diff --git a/test/tcl/rep016.tcl b/test/tcl/rep016.tcl new file mode 100644 index 00000000..55e82fcc --- /dev/null +++ b/test/tcl/rep016.tcl @@ -0,0 +1,289 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep016 +# TEST Replication election test with varying required nvotes. +# TEST +# TEST Run a modified version of test001 in a replicated master environment; +# TEST hold an election among a group of clients to make sure they select +# TEST the master with varying required participants. + +proc rep016 { method args } { + global errorInfo + global databases_in_memory + global repfiles_in_memory + + source ./include.tcl + set tnum "016" + + # Skip for all methods except btree. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + set nclients 5 + set logsets [create_logsets [expr $nclients + 1]] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases" + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r): Replication\ + elections with varying nvotes $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + for { set i 0 } { $i < $nclients } { incr i } { + puts "Rep$tnum: Client $i logs are\ + [lindex $l [expr $i + 1]]" + } + rep016_sub $method $nclients $tnum $l $r $args + } + } +} + +proc rep016_sub { method nclients tnum logset recargs largs } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + set niter 5 + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + + set m_logtype [lindex $logset 0] + set m_logargs [adjust_logargs $m_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + + for { set i 0 } { $i < $nclients } { incr i } { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + set c_logtype($i) [lindex $logset [expr $i + 1]] + set c_logargs($i) [adjust_logargs $c_logtype($i)] + set c_txnargs($i) [adjust_txnargs $c_logtype($i)] + } + + # Open a master. + set envlist {} + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ + -event $repmemargs \ + -home $masterdir $m_txnargs $m_logargs -rep_master $verbargs \ + -errpfx MASTER -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M) $recargs] + lappend envlist "$masterenv 1" + + # Open the clients. + # Don't set -errfile now -- wait until the error catching + # portion of the test is complete. + for { set i 0 } { $i < $nclients } { incr i } { + set envid [expr $i + 2] + repladd $envid + set env_cmd($i) "berkdb_env_noerr -create -home $clientdir($i) \ + -event $repmemargs \ + $c_txnargs($i) $c_logargs($i) -rep_client $verbargs \ + -rep_transport \[list $envid replsend\]" + set clientenv($i) [eval $env_cmd($i) $recargs] + lappend envlist "$clientenv($i) $envid" + } + # Bring the clients online by processing the startup messages. + process_msgs $envlist + + # Run a modified test001 in the master. + puts "\tRep$tnum.a: Running rep_test in replicated env." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + + # Check that databases are in-memory or on-disk as expected. + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + check_db_location $masterenv + for { set i 0 } { $i < $nclients } { incr i } { + check_db_location $clientenv($i) + } + + error_check_good masterenv_close [$masterenv close] 0 + set envlist [lreplace $envlist 0 0] + + puts "\tRep$tnum.b: Error values for rep_elect" + # + # Do all the error catching in client0. We do not need to call + # start_election here to fork a process because we never get + # far enough to send/receive any messages. We just want to + # check the error message. + # + # !!! + # We cannot set -errpfx or -errfile or anything in the + # env_cmd above. Otherwise the correct output won't be set + # in 'ret' below and the test will fail. + # + # First check negative nvotes. + # + set nsites [expr $nclients + 1] + set priority 2 + set timeout 5000000 + # + # Setting nsites to 0 acts as a signal for rep_elect to use + # the configured nsites, but since we haven't set that yet, + # this should still fail. TODO: need another test verifying + # the proper operation when we *have* configured nsites. + # + set nsites 0 + set nvotes 2 + set res [catch {$clientenv(0) rep_elect $nsites $nvotes $priority \ + $timeout} ret] + error_check_bad catch $res 0 + error_check_good ret [is_substr $ret "is larger than nsites"] 1 + + # + # Check nvotes > nsites. + # + set nsites $nclients + set nvotes [expr $nsites + 1] + set res [catch {$clientenv(0) rep_elect $nsites $nvotes $priority \ + $timeout} ret] + error_check_bad catch $res 0 + error_check_good ret [is_substr $ret "is larger than nsites"] 1 + + for { set i 0 } { $i < $nclients } { incr i } { + replclear [expr $i + 2] + # + # This test doesn't use the testing hooks, so + # initialize err_cmd and crash appropriately. + # + set err_cmd($i) "none" + set crash($i) 0 + # + # Initialize the array pri. We'll set it to + # appropriate values when the winner is determined. + # + set pri($i) 0 + # + if { $rep_verbose == 1 } { + $clientenv($i) errpfx CLIENT.$i + $clientenv($i) verbose $verbose_type on + $clientenv($i) errfile /dev/stderr + set env_cmd($i) [concat $env_cmd($i) \ + "-errpfx CLIENT.$i -errfile /dev/stderr "] + } + } + set m "Rep$tnum.c" + puts "\t$m: Check single master/client can elect itself" + # + # 2 sites: 1 master, 1 client. Allow lone client to elect itself. + # Adjust client env list to reflect the single client. + # + set oldenvlist $envlist + set envlist [lreplace $envlist 1 end] + set nsites 2 + set nvotes 1 + set orig_ncl $nclients + set nclients 1 + set elector 0 + set winner 0 + setpriority pri $nclients $winner + run_election envlist err_cmd pri crash\ + $qdir $m $elector $nsites $nvotes $nclients $winner 1 $dbname + + # + # Now run with all clients. Client0 should always get elected + # because it became master and should have a bigger LSN. + # + set nclients $orig_ncl + set envlist [lreplace $oldenvlist 0 0 [lindex $envlist 0]] + + set m "Rep$tnum.d" + puts "\t$m: Elect with 100% client participation" + set nsites $nclients + set nvotes $nclients + set winner [rep016_selectwinner $nsites $nvotes $nclients] + setpriority pri $nclients $winner + run_election envlist err_cmd pri crash\ + $qdir $m $elector $nsites $nvotes $nclients $winner 1 $dbname + + # + # Elect with varying levels of participation. Start with nsites + # as nclients+1 (simulating a down master) and require nclients, + # and fewer (by 1) until we get down to 2 clients. + # + set m "Rep$tnum.e" + puts "\t$m: Elect with varying participation" + set nsites [expr $nclients + 1] + set count 0 + for {set n $nclients} {$n > 1} {incr n -1} { + set m "Rep$tnum.e.$count" + set winner [rep016_selectwinner $nsites $n $n] + setpriority pri $nclients $winner + run_election envlist err_cmd pri crash\ + $qdir $m $elector $nsites $n $n $winner 1 $dbname + incr count + } + + foreach pair $envlist { + set cenv [lindex $pair 0] + error_check_good cenv_close [$cenv close] 0 + } + replclose $testdir/MSGQUEUEDIR +} + +proc rep016_selectwinner { nsites nvotes nclients } { + # + # Special case: When we test with 100% participation, we expect + # client 0 to always win because it has a bigger LSN than the + # rest due to earlier part of the test. This special case is + # kinda gross. + # + if { $nsites != $nvotes } { + set win [berkdb random_int 0 [expr $nclients - 1]] + } else { + set win 0 + } + return $win +} diff --git a/test/tcl/rep017.tcl b/test/tcl/rep017.tcl new file mode 100644 index 00000000..26c9924a --- /dev/null +++ b/test/tcl/rep017.tcl @@ -0,0 +1,263 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep017 +# TEST Concurrency with checkpoints. +# TEST +# TEST Verify that we achieve concurrency in the presence of checkpoints. +# TEST Here are the checks that we wish to make: +# TEST While dbenv1 is handling the checkpoint record: +# TEST Subsequent in-order log records are accepted. +# TEST Accepted PERM log records get NOTPERM +# TEST A subsequent checkpoint gets NOTPERM +# TEST After checkpoint completes, next txn returns PERM +proc rep017 { method { niter 10 } { tnum "017" } args } { + + source ./include.tcl + global repfiles_in_memory + + # Run for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + + puts "Rep$tnum ($method $r):\ + Concurrency with checkpoints $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep017_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep017_sub { method niter tnum logset recargs largs } { + source ./include.tcl + global perm_response_list + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + set omethod [convert_method $method] + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_cmd "berkdb_env_noerr -create $verbargs \ + -log_max 1000000 $m_txnargs $m_logargs $repmemargs \ + -home $masterdir -rep_master -errpfx MASTER \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_cmd $recargs] + + # Open a client + repladd 2 + set cl_cmd "berkdb_env_noerr -create -home $clientdir $verbargs \ + $c_txnargs $c_logargs -rep_client -errpfx CLIENT $repmemargs \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_cmd $recargs] + + # Bring the client online. + process_msgs "{$masterenv 1} {$clientenv 2}" + + # Open database in master, make lots of changes so checkpoint + # will take a while, and propagate to client. + puts "\tRep$tnum.a: Create and populate database." + set dbname rep017.db + set db [eval "berkdb_open_noerr -create $omethod -auto_commit \ + -env $masterenv $largs $dbname"] + for { set i 1 } { $i <= $niter } { incr i } { + set t [$masterenv txn] + error_check_good db_put \ + [eval $db put -txn $t $i [chop_data $method data$i]] 0 + error_check_good txn_commit [$t commit] 0 + } + process_msgs "{$masterenv 1} {$clientenv 2}" 1 + + # Get the master's last LSN before the checkpoint + set pre_ckp_offset \ + [stat_field $masterenv log_stat "Current log file offset"] + + puts "\tRep$tnum.b: Checkpoint on master." + error_check_good checkpoint [$masterenv txn_checkpoint] 0 + + # Now get ckp LSN + set ckp_lsn [stat_field $masterenv txn_stat "LSN of last checkpoint"] + set ckp_offset [lindex $ckp_lsn 1] + + # Fork child process on client. It should process whatever + # it finds in the message queue -- just the checkpoint record, + # for now. It's run in the background so the parent can + # test for whether we're checkpointing at the same time. + # + puts "\tRep$tnum.c: Fork child process on client." + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep017script.tcl $testdir/repscript.log \ + $masterdir $clientdir $rep_verbose $verbose_type &] + + + # We need to wait until we know that the client is processing a + # checkpoint. The checkpoint will consist of some DBREG records + # followed by the actual checkpoint. So, if we've gotten records + # later than the last LSN when the master took the checkpoint, we've + # begin the checkpoint. By test design, we should not finish the + # checkpoint until this process has at least had a chance to run. + # + # In order to do this, we have handles open on the message + # queue from both this process and its child. This is not + # normally legal behavior for an application using Berkeley DB, + # but this test depends on the parent process doing things while + # the child is pausing in the middle of the checkpoint. We are + # very careful to control which process is handling which + # messages. + + puts "\tRep$tnum.d: Test whether client is in checkpoint." + while { 1 } { + set client_off \ + [stat_field $clientenv log_stat "Current log file offset"] + + if { $client_off > $pre_ckp_offset } { + if { $client_off > $ckp_offset } { + # We already completed the checkpoint and + # never got out of here. That's a bug in + # in the test. + error_check_good checkpoint_test \ + not_in_checkpoint should_be_in_checkpoint + } else { + break; + } + } else { + # Not yet up to checkpoint + tclsleep 1 + } + } + + # Main client processes checkpoint 2nd time and should get NOTPERM. + puts "\tRep$tnum.e: Commit and checkpoint return NOTPERM from client" + incr niter + set t [$masterenv txn] + error_check_good db_put [eval $db put \ + -txn $t $niter [chop_data $method data$niter]] 0 + error_check_good txn_commit [$t commit] 0 + error_check_good checkpoint [$masterenv txn_checkpoint] 0 + set ckp2_lsn [stat_field $masterenv txn_stat "LSN of last checkpoint"] + + process_msgs "{$clientenv 2}" 1 + + # Check that the checkpoint record got a NOTPERM + # Find the ckp LSN of the Master and then look for the response + # from that message in the client + set ckp_result "" + foreach i $perm_response_list { + # Everything in the list should be NOTPERM + if { [llength $i] == 0 } { + # Check for sentinel at beginning of list + continue; + } + set ckp_result [lindex $i 0] + error_check_good NOTPERM [is_substr $ckp_result NOTPERM] 1 + if { [lindex $i 1] == $ckp2_lsn } { + break + } + } + error_check_bad perm_response $ckp_result "" + + puts "\tRep$tnum.f: Waiting for child ..." + # Watch until the checkpoint is done. + watch_procs $pid 5 + + # Verify that the checkpoint is now complete on the client and + # that all later messages have been applied. + process_msgs "{$clientenv 2}" 1 + set client_ckp [stat_field $clientenv txn_stat "LSN of last checkpoint"] + error_check_good matching_ckps $client_ckp $ckp2_lsn + + set m_end [stat_field $masterenv log_stat "Current log file offset"] + set c_end [stat_field $clientenv log_stat "Current log file offset"] + error_check_good matching_lsn $c_end $m_end + + # Finally, now that checkpoints are complete; perform another + # perm operation and make sure that it returns ISPERM. + puts "\tRep$tnum.g: No pending ckp; check for ISPERM" + incr niter + set t [$masterenv txn] + error_check_good db_put [eval $db put \ + -txn $t $niter [chop_data $method data$niter]] 0 + error_check_good txn_commit [$t commit] 0 + error_check_good checkpoint [$masterenv txn_checkpoint] 0 + set ckp3_lsn [stat_field $masterenv txn_stat "LSN of last checkpoint"] + + process_msgs "{$clientenv 2}" 1 + + # Check that the checkpoint and commit records got a ISPERM + # Find the ckp LSN of the Master and then look for the response + # from that message in the client + set ckp_result "" + foreach i $perm_response_list { + if { [llength $i] == 0 } { + # Check for sentinel at beginning of list + continue; + } + + # Everything in the list should be ISPERM + set ckp_result [lindex $i 0] + error_check_good ISPERM [is_substr $ckp_result ISPERM] 1 + if { [lindex $i 1] == $ckp3_lsn } { + break + } + } + error_check_bad perm_response $ckp_result "" + + # Clean up. + error_check_good db_close [$db close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep017script.tcl b/test/tcl/rep017script.tcl new file mode 100644 index 00000000..d165b13c --- /dev/null +++ b/test/tcl/rep017script.tcl @@ -0,0 +1,83 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Rep017 script - concurrency with checkpoints. +# +# Repscript exists to process checkpoints, though the +# way it is currently written, it will process whatever +# it finds in the message queue. It requires a one-master +# one-client setup. +# +# Usage: repscript masterdir clientdir rep_verbose verbose_type +# masterdir: master env directory +# clientdir: client env directory +# +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl +source $test_path/reputils.tcl + +set usage "repscript masterdir clientdir rep_verbose verbose_type" + +# Verify usage +if { $argc != 4 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set masterdir [ lindex $argv 0 ] +set clientdir [ lindex $argv 1 ] +set rep_verbose [ lindex $argv 2 ] +set verbose_type [ lindex $argv 3 ] +set verbargs "" +if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " +} + +# Join the queue env. We assume the rep test convention of +# placing the messages in $testdir/MSGQUEUEDIR. +set queueenv [eval berkdb_env -home $testdir/MSGQUEUEDIR] +error_check_good script_qenv_open [is_valid_env $queueenv] TRUE + +# We need to set up our own machids. +# Add 1 for master env id, and 2 for the clientenv id. +# +repladd 1 +repladd 2 + +# Join the master env. +set ma_cmd "berkdb_env_noerr -home $masterdir $verbargs \ + -txn -rep_master -rep_transport \[list 1 replsend\]" +set masterenv [eval $ma_cmd] +error_check_good script_menv_open [is_valid_env $masterenv] TRUE + +puts "Master open" + +# Join the client env. +set cl_cmd "berkdb_env_noerr -home $clientdir $verbargs \ + -txn -rep_client -rep_transport \[list 2 replsend\]" +set clientenv [eval $cl_cmd] +error_check_good script_cenv_open [is_valid_env $clientenv] TRUE + +puts "Everyone open" +tclsleep 10 + +# Make it so that the client sleeps in the middle of checkpoints +$clientenv test check 10 + +puts "Client set" + +# Update the client, in order to process the checkpoint +process_msgs "{$masterenv 1} {$clientenv 2}" + + +puts "Processed messages" + +# Close the envs +error_check_good script_master_close [$masterenv close] 0 +error_check_good script_client_close [$clientenv close] 0 +puts "\tRepscript completed successfully" diff --git a/test/tcl/rep018.tcl b/test/tcl/rep018.tcl new file mode 100644 index 00000000..24279841 --- /dev/null +++ b/test/tcl/rep018.tcl @@ -0,0 +1,188 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep018 +# TEST Replication with dbremove. +# TEST +# TEST Verify that the attempt to remove a database file +# TEST on the master hangs while another process holds a +# TEST handle on the client. +# TEST +proc rep018 { method { niter 10 } { tnum "018" } args } { + + source ./include.tcl + global repfiles_in_memory + + # Run for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r): Replication with dbremove $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep018_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep018_sub { method niter tnum logset recargs largs } { + source ./include.tcl + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + set omethod [convert_method $method] + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + puts "\tRep$tnum.a: Create master and client, bring online." + # Open a master. + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create \ + -log_max 1000000 -home $masterdir $verbargs $repmemargs \ + $m_txnargs $m_logargs -rep_master -errpfx MASTER \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M) $recargs] + + # Open a client + repladd 2 + set env_cmd(C) "berkdb_env_noerr -create -home $clientdir $repmemargs \ + $c_txnargs $c_logargs -rep_client $verbargs -errpfx CLIENT \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $env_cmd(C) $recargs] + + # Bring the client online. + process_msgs "{$masterenv 1} {$clientenv 2}" + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # db_remove in a moment. + # + $masterenv test force noarchive_timeout + + puts "\tRep$tnum.b: Open database on master, propagate to client." + set dbname rep$tnum.db + set db [eval "berkdb_open_noerr -create $omethod -auto_commit \ + -env $masterenv $largs $dbname"] + set t [$masterenv txn] + for { set i 1 } { $i <= $niter } { incr i } { + error_check_good db_put \ + [eval $db put -txn $t $i [chop_data $method data$i]] 0 + } + error_check_good txn_commit [$t commit] 0 + process_msgs "{$masterenv 1} {$clientenv 2}" + + puts "\tRep$tnum.c: Spawn a child tclsh to do client work." + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep018script.tcl $testdir/rep018script.log $clientdir \ + $niter $dbname $method $rep_verbose $verbose_type &] + + puts "\tRep$tnum.d: Close and remove database on master." + error_check_good close_master_db [$db close] 0 + + # Remove database in master env. First make sure the child + # tclsh is done reading the data. + while { 1 } { + if { [file exists $testdir/marker.db] == 0 } { + tclsleep 1 + } else { + set markerenv [berkdb_env -home $testdir -txn] + error_check_good markerenv_open \ + [is_valid_env $markerenv] TRUE + set marker [berkdb_open -unknown -env $markerenv \ + -auto_commit marker.db] + while { [llength [$marker get CHILDREADY]] == 0 } { + tclsleep 1 + } + break + } + } + error_check_good db_remove [$masterenv dbremove -auto_commit $dbname] 0 + + puts "\tRep$tnum.e: Create new database on master with the same name." + set db [eval "berkdb_open_noerr -create $omethod -auto_commit \ + -env $masterenv $largs $dbname"] + error_check_good new_db_open [is_valid_db $db] TRUE + + puts "\tRep$tnum.f: Propagate changes to client. Process should hang." + error_check_good timestamp_remove \ + [$marker put PARENTREMOVE [timestamp -r]] 0 + process_msgs "{$masterenv 1} {$clientenv 2}" + error_check_good timestamp_done \ + [$marker put PARENTDONE [timestamp -r]] 0 + + watch_procs $pid 5 + + puts "\tRep$tnum.g: Check for failure." + # Check marker file for correct timestamp ordering. + set ret [$marker get CHILDDONE] + set childdone [lindex [lindex [lindex $ret 0] 1] 0] + set ret [$marker get PARENTDONE] + set parentdone [lindex [lindex [lindex $ret 0] 1] 0] + if { [expr $childdone - $parentdone] > 0 } { + puts "\tFAIL: parent must complete after child" + } + + # Clean up. + error_check_good marker_db_close [$marker close] 0 + error_check_good market_env_close [$markerenv close] 0 + error_check_good masterdb_close [$db close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + + replclose $testdir/MSGQUEUEDIR + + # Check log file for failures. + set errstrings [eval findfail $testdir/rep018script.log] + foreach str $errstrings { + puts "FAIL: error message in rep018 log file: $str" + } +} + diff --git a/test/tcl/rep018script.tcl b/test/tcl/rep018script.tcl new file mode 100644 index 00000000..d69a414c --- /dev/null +++ b/test/tcl/rep018script.tcl @@ -0,0 +1,98 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Rep018 script - concurrency with checkpoints. +# +# Test dbremove with replication. +# +# Usage: rep018script clientdir dbfile +# clientdir: client env directory +# niter: number of items in file +# dbfile: name of database file +# rep_verbose: Is the test doing verbose reporting? +# verbose_type: What subset of verbose messages? +# +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl +source $test_path/reputils.tcl + +set usage "repscript clientdir niter dbfile method rep_verbose verbose_type" + +# Verify usage +if { $argc != 6 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set clientdir [ lindex $argv 0 ] +set niter [ lindex $argv 1 ] +set dbfile [ lindex $argv 2 ] +set method [ lindex $argv 3 ] +set rep_verbose [ lindex $argv 4 ] +set verbose_type [ lindex $argv 5 ] +set verbargs "" +if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " +} + +# Join the queue env. We assume the rep test convention of +# placing the messages in $testdir/MSGQUEUEDIR. +set queueenv [eval berkdb_env -home $testdir/MSGQUEUEDIR] +error_check_good script_qenv_open [is_valid_env $queueenv] TRUE + +# +# We need to set up our own machids. +# Add 1 for master env id, and 2 for the clientenv id. +# +repladd 1 +repladd 2 + +# Join the client env. +set cl_cmd "berkdb_env_noerr -home $clientdir $verbargs -errpfx CHILD \ + -txn -rep_client -rep_transport \[list 2 replsend\]" +set clientenv [eval $cl_cmd] +error_check_good script_cenv_open [is_valid_env $clientenv] TRUE + +# Make sure we can read data on client. +set db [eval "berkdb_open -env $clientenv $dbfile"] +for { set i 1 } { $i <= $niter } { incr i } { + set ret [lindex [$db get $i] 0] + error_check_good db_get $ret [list $i [pad_data $method data$i]] +} + +# Put a timestamp in a shared file. +set markerenv [berkdb_env -create -home $testdir -txn] +error_check_good markerenv_open [is_valid_env $markerenv] TRUE +set marker \ + [eval "berkdb_open -create -btree -auto_commit -env $markerenv marker.db"] +error_check_good timestamp_ready \ + [$marker put CHILDREADY [timestamp -r]] 0 + +# Give the parent a chance to process messages and hang. +tclsleep 30 + +# Clean up the child so the parent can go forward. +error_check_good timestamp_done \ + [$marker put CHILDDONE [timestamp -r]] 0 +error_check_good client_db_close [$db close] 0 + +# Check that the master is done. +while { [llength [$marker get PARENTDONE]] == 0 } { + tclsleep 1 +} + +# Verify that the newly recreated database is now empty. +set db [eval "berkdb_open -env $clientenv $dbfile"] +set cursor [$db cursor] +error_check_good db_empty [llength [$cursor get -first]] 0 +error_check_good cursor_close [$cursor close] 0 +error_check_good db_close [$db close] 0 +error_check_good marker_db_close [$marker close] 0 +error_check_good markerenv_close [$markerenv close] 0 +error_check_good script_client_close [$clientenv close] 0 + diff --git a/test/tcl/rep019.tcl b/test/tcl/rep019.tcl new file mode 100644 index 00000000..e908eb53 --- /dev/null +++ b/test/tcl/rep019.tcl @@ -0,0 +1,191 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep019 +# TEST Replication and multiple clients at same LSN. +# TEST Have several clients at the same LSN. Run recovery at +# TEST different times. Declare a client master and after sync-up +# TEST verify all client logs are identical. +# +proc rep019 { method { nclients 3 } { tnum "019" } args } { + + source ./include.tcl + global repfiles_in_memory + global env_private + + # Run for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + # This test needs to use recovery, so mixed-mode testing + # isn't appropriate, nor in-memory database testing. + global databases_in_memory + if { $databases_in_memory > 0 } { + puts "Rep$tnum: Skipping for in-memory databases." + return + } + global mixed_mode_logging + if { $mixed_mode_logging > 0 } { + puts "Rep$tnum: Skipping for mixed-mode logging." + return + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + set args [convert_args $method $args] + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + puts "Rep$tnum ($method $r): Replication\ + and $nclients recovered clients in sync $msg2 $msg3." + rep019_sub $method $nclients $tnum $r $args + } +} + +proc rep019_sub { method nclients tnum recargs largs } { + global testdir + global util_path + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + set orig_tdir $testdir + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set niter 100 + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create -txn nosync $verbargs \ + -home $masterdir -rep_master -errpfx MASTER $repmemargs \ + $privargs -rep_transport \[list 1 replsend\]" + set menv [eval $ma_envcmd $recargs] + + for {set i 0} {$i < $nclients} {incr i} { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + set id($i) [expr 2 + $i] + repladd $id($i) + set cl_envcmd($i) "berkdb_env_noerr -create -txn nosync \ + -home $clientdir($i) $verbargs -errpfx CLIENT.$i \ + $repmemargs $privargs \ + -rep_client -rep_transport \[list $id($i) replsend\]" + set clenv($i) [eval $cl_envcmd($i) $recargs] + error_check_good client_env [is_valid_env $clenv($i)] TRUE + } + set testfile "test$tnum.db" + set omethod [convert_method $method] + set masterdb [eval {berkdb_open_noerr -env $menv -auto_commit \ + -create -mode 0644} $largs $omethod $testfile] + error_check_good dbopen [is_valid_db $masterdb] TRUE + + # Bring the clients online by processing the startup messages. + set envlist {} + lappend envlist "$menv 1" + for { set i 0 } { $i < $nclients } { incr i } { + lappend envlist "$clenv($i) $id($i)" + } + process_msgs $envlist + + # Run a modified test001 in the master (and update clients). + puts "\tRep$tnum.a: Running test001 in replicated env." + eval rep_test $method $menv $masterdb $niter 0 0 0 0 $largs + process_msgs $envlist + + error_check_good mdb_cl [$masterdb close] 0 + # Process any close messages. + process_msgs $envlist + + error_check_good menv_cl [$menv close] 0 + puts "\tRep$tnum.b: Close all envs and run recovery in clients." + for {set i 0} {$i < $nclients} {incr i} { + error_check_good cl$i.close [$clenv($i) close] 0 + set hargs($i) "-h $clientdir($i)" + } + foreach sleep {2 1 0} { + for {set i 0} {$i < $nclients} {incr i} { + set stat [catch {eval exec $util_path/db_recover \ + $hargs($i)} result] + error_check_good stat $stat 0 + # + # Need to sleep to make sure recovery's checkpoint + # records have different timestamps. + tclsleep $sleep + } + } + + puts "\tRep$tnum.c: Reopen clients and declare one master." + for {set i 0} {$i < $nclients} {incr i} { + set clenv($i) [eval $cl_envcmd($i) $recargs] + error_check_good client_env [is_valid_env $clenv($i)] TRUE + } + error_check_good master0 [$clenv(0) rep_start -master] 0 + + puts "\tRep$tnum.d: Sync up with other clients." + while { 1 } { + set nproced 0 + + for {set i 0} {$i < $nclients} {incr i} { + incr nproced [replprocessqueue $clenv($i) $id($i)] + } + + if { $nproced == 0 } { + break + } + } + puts "\tRep$tnum.e: Verify client logs match." + set i 0 + error_check_good cl$i.close [$clenv($i) close] 0 + set stat [catch {eval exec $util_path/db_printlog \ + $hargs($i) >& $clientdir($i)/prlog} result] + # + # Note we start the loop at 1 here and compare against client0 + # which became the master. + # + for {set i 1} {$i < $nclients} {incr i} { + error_check_good cl$i.close [$clenv($i) close] 0 + fileremove -f $clientdir($i)/prlog + set stat [catch {eval exec $util_path/db_printlog \ + $hargs($i) >> $clientdir($i)/prlog} result] + error_check_good stat_prlog $stat 0 + error_check_good log_cmp(0,$i) \ + [filecmp $clientdir(0)/prlog $clientdir($i)/prlog] 0 + } + + replclose $testdir/MSGQUEUEDIR + set testdir $orig_tdir + return +} + diff --git a/test/tcl/rep020.tcl b/test/tcl/rep020.tcl new file mode 100644 index 00000000..5e510b67 --- /dev/null +++ b/test/tcl/rep020.tcl @@ -0,0 +1,322 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep020 +# TEST Replication elections - test election generation numbers. +# TEST + +proc rep020 { method args } { + global rand_init + global databases_in_memory + global repfiles_in_memory + + source ./include.tcl + set tnum "020" + + # Run for btree only. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + error_check_good set_random_seed [berkdb srand $rand_init] 0 + set nclients 5 + set logsets [create_logsets [expr $nclients + 1]] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases" + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + foreach l $logsets { + puts "Rep$tnum ($method): Election generation test $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + for { set i 0 } { $i < $nclients } { incr i } { + puts "Rep$tnum: Client $i logs are\ + [lindex $l [expr $i + 1]]" + } + rep020_sub $method $nclients $tnum $l $args + } +} + +proc rep020_sub { method nclients tnum logset largs } { + source ./include.tcl + global errorInfo + global databases_in_memory + global mixed_mode_logging + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + set m_logtype [lindex $logset 0] + set m_logargs [adjust_logargs $m_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + + for { set i 0 } { $i < $nclients } { incr i } { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + set c_logtype($i) [lindex $logset [expr $i + 1]] + set c_logargs($i) [adjust_logargs $c_logtype($i)] + set c_txnargs($i) [adjust_txnargs $c_logtype($i)] + } + + # Open a master. + set envlist {} + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 $verbargs \ + -event $repmemargs \ + -home $masterdir $m_txnargs $m_logargs -rep_master \ + -errpfx MASTER -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M)] + lappend envlist "$masterenv 1" + + # Open the clients. + for { set i 0 } { $i < $nclients } { incr i } { + set envid [expr $i + 2] + repladd $envid + set env_cmd($i) "berkdb_env_noerr -create -event \ + $verbargs -home $clientdir($i) $repmemargs \ + $c_txnargs($i) $c_logargs($i) \ + -rep_client -rep_transport \[list $envid replsend\]" + set clientenv($i) [eval $env_cmd($i)] + lappend envlist "$clientenv($i) $envid" + } + + # Run a modified test001 in the master. + process_msgs $envlist + puts "\tRep$tnum.a: Running rep_test in replicated env." + set niter 10 + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + + # Check that databases are in-memory or on-disk as expected. + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + check_db_location $masterenv + for { set i 0 } { $i < $nclients } { incr i } { + check_db_location $clientenv($i) + } + + # Close master. + error_check_good masterenv_close [$masterenv close] 0 + set envlist [lreplace $envlist 0 0] + + foreach pair $envlist { + set i [expr [lindex $pair 1] - 2] + replclear [expr $i + 2] + set err_cmd($i) "none" + set pri($i) 10 + set crash($i) 0 + if { $rep_verbose == 1 } { + $clientenv($i) errpfx CLIENT$i + $clientenv($i) verbose $verbose_type on + $clientenv($i) errfile /dev/stderr + set env_cmd($i) [concat $env_cmd($i) \ + "-errpfx CLIENT$i -errfile /dev/stderr"] + } + } + + set msg "Rep$tnum.b" + puts "\t$msg: Run elections to increment egen." + + set nelect 2 + set nsites $nclients + set nvotes $nclients + for { set j 0 } { $j < $nelect } { incr j } { + # Pick winner and elector randomly. + set winner [berkdb random_int 0 [expr $nclients - 1]] + setpriority pri $nclients $winner + set elector [berkdb random_int 0 [expr $nclients - 1]] + run_election envlist err_cmd pri crash $qdir \ + $msg $elector $nsites $nvotes $nclients $winner 1 $dbname + } + process_msgs $envlist + + set msg "Rep$tnum.c" + puts "\t$msg: Updating egen when getting an old vote." + + # + # Find the last client and save the election generation number. + # Close the last client and adjust the list of envs to process. + # + set i [expr $nclients - 1] + set last [lindex $envlist end] + set clientenv($i) [lindex $last 0] + set egen($i) \ + [stat_field $clientenv($i) rep_stat "Election generation number"] + error_check_good clientenv_close($i) [$clientenv($i) close] 0 + set envlist [lreplace $envlist end end] + + # Run a few more elections while the last client is closed. + # Make sure we don't pick the closed client as the winner, + # and require votes from one fewer site. + # + set orig_nvotes $nvotes + set orig_nclients $nclients + set nvotes [expr $orig_nvotes - 1] + set nclients [expr $orig_nclients - 1] + for { set j 0 } { $j < $nelect } { incr j } { + set winner [berkdb random_int 0 [expr $nclients - 1]] + setpriority pri $nclients $winner + set elector [berkdb random_int 0 [expr $nclients - 1]] + run_election envlist err_cmd pri crash $qdir \ + $msg $elector $nsites $nvotes $nclients $winner 1 $dbname + } + process_msgs $envlist + # + # Verify that the last client's election generation number has + # changed, and that it matches the other clients. + # + set pair [lindex $envlist 0] + set clenv [lindex $pair 0] + set clegen [stat_field \ + $clenv rep_stat "Election generation number"] + + # Reopen last client's env. Do not run recovery, but do + # process messages to get the egen updated. + replclear $envid + set clientenv($i) [eval $env_cmd($i)] + lappend envlist "$clientenv($i) $envid" + error_check_good client_reopen [is_valid_env $clientenv($i)] TRUE + process_msgs $envlist + + set newegen($i) \ + [stat_field $clientenv($i) rep_stat "Election generation number"] + error_check_bad egen_changed $newegen($i) $egen($i) + error_check_good egen_changed1 $newegen($i) $clegen + + set msg "Rep$tnum.d" + puts "\t$msg: New client starts election." + # + # Run another election, this time called by the last client. + # This should succeed because the last client has already + # caught up to the others for egen. + # + set winner 2 + set nvotes $orig_nvotes + set nclients $orig_nclients + set elector [expr $nclients - 1] + setpriority pri $nclients $winner + run_election envlist err_cmd pri crash $qdir \ + $msg $elector $nsites $nvotes $nclients $winner 0 $dbname + + # Skip this part of the test for mixed-mode logging, + # since we can't recover with in-memory logs. + if { $mixed_mode_logging == 0 } { + set msg "Rep$tnum.e" + puts "\t$msg: Election generation set as expected after recovery." + # Note all client egens. Close, recover, process messages, + # and check that egens are unchanged. + set big_e [big_endian] + foreach pair $envlist { + set i [expr [lindex $pair 1] - 2] + set clientenv($i) [lindex $pair 0] + # Can only get egen file if repfiles on-disk. + if { $repfiles_in_memory == 0 } { + set fid [open $clientdir($i)/__db.rep.egen r] + fconfigure $fid -translation binary + set data [read $fid 4] + if { $big_e } { + binary scan $data I egen($i) + } else { + binary scan $data i egen($i) + } + binary scan $data c val + close $fid + } + $clientenv($i) log_flush + error_check_good \ + clientenv_close($i) [$clientenv($i) close] 0 + set clientenv($i) [eval $env_cmd($i) -recover] + set envlist [lreplace \ + $envlist $i $i "$clientenv($i) [expr $i + 2]"] + + set newegen($i) [stat_field $clientenv($i) \ + rep_stat "Election generation number"] + if { $repfiles_in_memory == 0 } { + error_check_good egen_recovery$i $newegen($i) \ + $egen($i) + } else { + # For rep in-memory, egen expected to start + # over at 1 after close/reopen environment. + error_check_good egen_recovery $newegen($i) 1 + } + } + process_msgs $envlist + + # The election may try to open the old database to force new + # log records. Prevent any attempt to open an old inmem + # database that is no longer there after the recover. + if { $databases_in_memory == 1 } { + set dbname "NULL" + } + + # Run an election. Now the egens should go forward. + set winner [berkdb random_int 0 [expr $nclients - 1]] + setpriority pri $nclients $winner + set elector [berkdb random_int 0 [expr $nclients - 1]] + run_election envlist err_cmd pri crash $qdir \ + $msg $elector $nsites $nvotes $nclients $winner 1 $dbname + + foreach pair $envlist { + set i [expr [lindex $pair 1] - 2] + set clientenv($i) [lindex $pair 0] + set newegen($i) [stat_field $clientenv($i) \ + rep_stat "Election generation number"] + if { $repfiles_in_memory == 0 } { + error_check_good egen_forward \ + [expr $newegen($i) > $egen($i)] 1 + } else { + # For rep in-memory, egen expected to + # increment to 2 after election. + error_check_good egen_recovery $newegen($i) 2 + } + } + } + + foreach pair $envlist { + set cenv [lindex $pair 0] + error_check_good cenv_close [$cenv close] 0 + } + + replclose $testdir/MSGQUEUEDIR +} + diff --git a/test/tcl/rep021.tcl b/test/tcl/rep021.tcl new file mode 100644 index 00000000..7145f738 --- /dev/null +++ b/test/tcl/rep021.tcl @@ -0,0 +1,337 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep021 +# TEST Replication and multiple environments. +# TEST Run similar tests in separate environments, making sure +# TEST that some data overlaps. Then, "move" one client env +# TEST from one replication group to another and make sure that +# TEST we do not get divergent logs. We either match the first +# TEST record and end up with identical logs or we get an error. +# TEST Verify all client logs are identical if successful. +# +proc rep021 { method { nclients 3 } { tnum "021" } args } { + + source ./include.tcl + global repfiles_in_memory + global env_private + + # Run for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + # This test depends on copying logs, so can't be run with + # in-memory logging. + global mixed_mode_logging + if { $mixed_mode_logging > 0 } { + puts "Rep$tnum: Skipping for mixed-mode logging." + return + } + + # This test closes its envs, so it's not appropriate for + # testing of in-memory named databases. + global databases_in_memory + if { $databases_in_memory } { + puts "Rep$tnum: Skipping for in-memory databases." + return + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + set args [convert_args $method $args] + set logsets [create_logsets [expr $nclients + 1]] + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r): Replication and\ + $nclients recovered clients in sync $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + for { set i 0 } { $i < $nclients } { incr i } { + puts "Rep$tnum: Client $i logs are\ + [lindex $l [expr $i + 1]]" + } + rep021_sub $method $nclients $tnum $l $r $args + } + } +} + +proc rep021_sub { method nclients tnum logset recargs largs } { + global testdir + global util_path + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + set orig_tdir $testdir + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set niter 100 + set offset 5 + set masterdir $testdir/MASTERDIR + set masterdir2 $testdir/MASTERDIR.NEW + file mkdir $masterdir + file mkdir $masterdir2 + + set m_logtype [lindex $logset 0] + set m_logargs [adjust_logargs $m_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + + # We want to run the test 3 times in 2 separate repl envs. + # This is a little bit tricky due to how we manage replication + # in Tcl. It assumes one replication group. + # This is tricky because we need to manage/clear the repl + # message queues for the different groups when running + # to one group or the other. + # To accomplish this we run entirely in the 2nd group first. + # We set it up and then close all its envs. Then we run + # to the 1st group, and set it up. Then we add in a client + # from the 2nd group into the existing 1st group. + # Although we're doing them in separate parts, this is + # a description of what we're doing. + # + # 1. First add divergent data to database: + # RepGrp1: Add niter data from 0 to database. + # RepGrp2: Add niter data from offset to database. + # This gives us overlapping data in the databases, but they're + # additions will be at different offsets in the log files. + # + # 2. Add identical data to both databases. + # RepGrp1: Add niter data from niter + offset to database. + # RepGrp2: Add niter data from niter + offset to database. + # This gives us identical data in the databases and logs. + # + # 3. Again add divergent data to databases. + # RepGrp1: Add niter data from niter*2+offset to database. + # RepGrp2: Add niter data from niter*2+offset*2 to database. + # This gives us overlapping data in the databases, but they're + # additions will be at different offsets in the log files. + # + # 4. Add a client from one group to the other. Then try + # to sync up that client. We should get a failure with + # one of the non-matching error messages: + # "Too few log files to sync with master" + # REP_JOIN_FAILURE + + # Open a 2nd master. Make all the 2nd env ids >= 10. + # For the 2nd group, just have 1 master and 1 client. + repladd 10 + set ma2_envcmd "berkdb_env_noerr -create $m_txnargs $verbargs \ + $m_logargs -home $masterdir2 $repmemargs $privargs \ + -rep_master -rep_transport \[list 10 replsend\]" + set menv2 [eval $ma2_envcmd $recargs] + + set clientdir2 $testdir/CLIENTDIR.NEW + file mkdir $clientdir2 + set id2 11 + set c_logtype($id2) [lindex $logset 1] + set c_logargs($id2) [adjust_logargs $c_logtype($id2)] + set c_txnargs($id2) [adjust_txnargs $c_logtype($id2)] + + set id2 11 + repladd $id2 + set cl2_envcmd "berkdb_env_noerr -create $c_txnargs($id2) $verbargs \ + $c_logargs($id2) -home $clientdir2 $repmemargs $privargs \ + -rep_client -rep_transport \[list $id2 replsend\]" + set clenv2 [eval $cl2_envcmd $recargs] + + set testfile "test$tnum.db" + set omethod [convert_method $method] + + set masterdb2 [eval {berkdb_open_noerr -env $menv2 -auto_commit \ + -create -mode 0644} $largs $omethod $testfile] + error_check_good dbopen [is_valid_db $masterdb2] TRUE + + # + # Process startup messages + # + set env2list {} + lappend env2list "$menv2 10" + lappend env2list "$clenv2 $id2" + process_msgs $env2list + + # + # Set up the three runs of rep_test. We need the starting + # point for each phase of the test for each group. + # + set e1phase1 0 + set e2phase1 $offset + set e1phase2 [expr $niter + $offset] + set e2phase2 $e1phase2 + set e1phase3 [expr $e1phase2 + $niter] + set e2phase3 [expr $e2phase2 + $niter + $offset] + + puts "\tRep$tnum.a: Running rep_test in 2nd replicated env." + eval rep_test $method $menv2 $masterdb2 $niter $e2phase1 1 1 $largs + eval rep_test $method $menv2 $masterdb2 $niter $e2phase2 1 1 $largs + eval rep_test $method $menv2 $masterdb2 $niter $e2phase3 1 1 $largs + error_check_good mdb_cl [$masterdb2 close] 0 + process_msgs $env2list + + puts "\tRep$tnum.b: Close 2nd replicated env. Open primary." + error_check_good mdb_cl [$clenv2 close] 0 + error_check_good mdb_cl [$menv2 close] 0 + replclose $testdir/MSGQUEUEDIR + + # + # Run recovery in client now to blow away region files so + # that this client comes in as a "new" client and announces itself. + # + set stat [catch {eval exec $util_path/db_recover -h $clientdir2} result] + error_check_good stat $stat 0 + + # + # Now we've run in the 2nd env. We have everything we need + # set up and existing in that env. Now run the test in the + # 1st env and then we'll try to add in the client. + # + replsetup $testdir/MSGQUEUEDIR + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $verbargs \ + $m_logargs -home $masterdir $repmemargs $privargs \ + -rep_master -rep_transport \[list 1 replsend\]" + set menv [eval $ma_envcmd $recargs] + + for {set i 0} {$i < $nclients} {incr i} { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + set c_logtype($i) [lindex $logset [expr $i + 1]] + set c_logargs($i) [adjust_logargs $c_logtype($i)] + set c_txnargs($i) [adjust_txnargs $c_logtype($i)] + set id($i) [expr 2 + $i] + repladd $id($i) + set cl_envcmd($i) "berkdb_env_noerr -create $c_txnargs($i) \ + $c_logargs($i) -home $clientdir($i) $repmemargs \ + $verbargs $privargs \ + -rep_client -rep_transport \[list $id($i) replsend\]" + set clenv($i) [eval $cl_envcmd($i) $recargs] + } + + set masterdb [eval {berkdb_open_noerr -env $menv -auto_commit \ + -create -mode 0644} $largs $omethod $testfile] + error_check_good dbopen [is_valid_db $masterdb] TRUE + + # Bring the clients online by processing the startup messages. + set envlist {} + lappend envlist "$menv 1" + for { set i 0 } { $i < $nclients } { incr i } { + lappend envlist "$clenv($i) $id($i)" + } + process_msgs $envlist + + # Run a modified test001 in the master (and update clients). + puts "\tRep$tnum.c: Running rep_test in primary replicated env." + eval rep_test $method $menv $masterdb $niter $e1phase1 1 1 $largs + eval rep_test $method $menv $masterdb $niter $e1phase2 1 1 $largs + eval rep_test $method $menv $masterdb $niter $e1phase3 1 1 $largs + error_check_good mdb_cl [$masterdb close] 0 + # Process any close messages. + process_msgs $envlist + + puts "\tRep$tnum.d: Add unrelated client into replication group." + set i $nclients + set orig $nclients + set nclients [expr $nclients + 1] + + set clientdir($i) $clientdir2 + set id($i) [expr 2 + $i] + repladd $id($i) + set cl_envcmd($i) "berkdb_env_noerr -create -txn nosync \ + -home $clientdir($i) $verbargs $repmemargs $privargs \ + -rep_client -rep_transport \[list $id($i) replsend\]" + set clenv($i) [eval $cl_envcmd($i) $recargs] + # + # We'll only catch an error if we turn off autoinit. + # Otherwise, the system will throw away everything on the + # client and resync. + # + $clenv($i) rep_config {autoinit off} + + lappend envlist "$clenv($i) $id($i)" + + fileremove -f $clientdir2/prlog.orig + set stat [catch {eval exec $util_path/db_printlog \ + -h $clientdir2 >> $clientdir2/prlog.orig} result] + + set err 0 + process_msgs $envlist 0 NONE err + + puts "\tRep$tnum.e: Close all envs and run recovery in clients." + error_check_good menv_cl [$menv close] 0 + for {set i 0} {$i < $nclients} {incr i} { + error_check_good cl$i.close [$clenv($i) close] 0 + set hargs($i) "-h $clientdir($i)" + } + set i [expr $nclients - 1] + fileremove -f $clientdir($i)/prlog + set stat [catch {eval exec $util_path/db_printlog \ + -h $clientdir($i) >> $clientdir($i)/prlog} result] + + # If we got an error, then the log should match the original + # and the error message should tell us the client was never + # part of this environment. + # + if { $err != 0 } { + puts "\tRep$tnum.f: Verify client log matches original." + error_check_good log_cmp(orig,$i) \ + [filecmp $clientdir($i)/prlog.orig $clientdir($i)/prlog] 0 + puts "\tRep$tnum.g: Verify client error." + error_check_good errchk [is_substr $err \ + "REP_JOIN_FAILURE"] 1 + } else { + puts "\tRep$tnum.f: Verify client log doesn't match original." + error_check_good log_cmp(orig,$i) \ + [filecmp $clientdir($i)/prlog.orig $clientdir($i)/prlog] 1 + puts "\tRep$tnum.g: Verify new client log matches master." + set stat [catch {eval exec $util_path/db_printlog \ + -h $masterdir >& $masterdir/prlog} result] + fileremove -f $clientdir($i)/prlog + set stat [catch {eval exec $util_path/db_printlog \ + -h $clientdir($i) >> $clientdir($i)/prlog} result] + error_check_good stat_prlog $stat 0 + error_check_good log_cmp(master,$i) \ + [filecmp $masterdir/prlog $clientdir($i)/prlog] 0 + } + + replclose $testdir/MSGQUEUEDIR + set testdir $orig_tdir + return +} + diff --git a/test/tcl/rep022.tcl b/test/tcl/rep022.tcl new file mode 100644 index 00000000..43da6d4f --- /dev/null +++ b/test/tcl/rep022.tcl @@ -0,0 +1,309 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep022 +# TEST Replication elections - test election generation numbers +# TEST during simulated network partition. +# TEST +proc rep022 { method args } { + + source ./include.tcl + global rand_init + global mixed_mode_logging + global databases_in_memory + global repfiles_in_memory + + set tnum "022" + + # Run for btree only. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + if { $mixed_mode_logging > 0 } { + puts "Rep$tnum: Skipping for mixed-mode logging." + return + } + + if { $databases_in_memory > 0 } { + puts "Rep$tnum: Skipping for in-memory databases." + return + } + + # This test can't be run with replication files in memory + # because it depends on using recovery. + if { $repfiles_in_memory } { + puts "Rep$tnum: Skipping for in-memory replication files." + return + } + + error_check_good set_random_seed [berkdb srand $rand_init] 0 + set nclients 5 + set logsets [create_logsets [expr $nclients + 1]] + foreach l $logsets { + puts "Rep$tnum ($method): Election generation test\ + with simulated network partition." + puts "Rep$tnum: Master logs are [lindex $l 0]" + for { set i 0 } { $i < $nclients } { incr i } { + puts "Rep$tnum: Client $i logs are\ + [lindex $l [expr $i + 1]]" + } + rep022_sub $method $nclients $tnum $l $args + } +} + +proc rep022_sub { method nclients tnum logset largs } { + source ./include.tcl + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + set m_logtype [lindex $logset 0] + set m_logargs [adjust_logargs $m_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + + for { set i 0 } { $i < $nclients } { incr i } { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + set c_logtype($i) [lindex $logset [expr $i + 1]] + set c_logargs($i) [adjust_logargs $c_logtype($i)] + set c_txnargs($i) [adjust_txnargs $c_logtype($i)] + } + + # Open a master. + set envlist {} + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 $verbargs \ + -event -home $masterdir $m_txnargs $m_logargs -rep_master \ + -errpfx MASTER -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M)] + lappend envlist "$masterenv 1" + + # Open the clients. + for { set i 0 } { $i < $nclients } { incr i } { + set envid [expr $i + 2] + repladd $envid + set env_cmd($i) "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT.$i -event \ + -home $clientdir($i) $c_txnargs($i) $c_logargs($i) \ + -rep_client -rep_transport \[list $envid replsend\]" + set clientenv($i) [eval $env_cmd($i)] + lappend envlist "$clientenv($i) $envid" + } + + # Bring the clients online by processing the startup messages. + process_msgs $envlist + + # Run a modified test001 in the master. + puts "\tRep$tnum.a: Running rep_test in replicated env." + set niter 10 + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + error_check_good masterenv_close [$masterenv close] 0 + set envlist [lreplace $envlist 0 0] + + foreach pair $envlist { + set i [expr [lindex $pair 1] - 2] + replclear [expr $i + 2] + set err_cmd($i) "none" + set pri($i) 10 + set crash($i) 0 + if { $rep_verbose == 1 } { + $clientenv($i) errpfx CLIENT$i + $clientenv($i) verbose $verbose_type on + $clientenv($i) errfile /dev/stderr + set env_cmd($i) [concat $env_cmd($i) \ + "-errpfx CLIENT$i -errfile /dev/stderr"] + } + } + + set msg "Rep$tnum.b" + puts "\t$msg: Run election for clients 0,1,2." + # + # Run an election with clients 0, 1, and 2. + # Make client 0 be the winner, and let it stay master. + # + set origlist $envlist + set orignclients $nclients + set envlist [lrange $origlist 0 2] + set nclients 3 + set nsites 3 + set nvotes 3 + set winner 0 + setpriority pri $nclients $winner + set elector [berkdb random_int 0 [expr $nclients - 1]] + run_election envlist err_cmd pri crash \ + $qdir $msg $elector $nsites $nvotes $nclients $winner 0 test.db + + set msg "Rep$tnum.c" + puts "\t$msg: Close and reopen client 2 with recovery." + # + # Now close and reopen 2 with recovery. Update the + # list of all client envs with the new information. + # + replclear 5 + replclear 6 + error_check_good flush [$clientenv(2) log_flush] 0 + error_check_good clientenv_close(2) [$clientenv(2) close] 0 + set ret [catch {exec $util_path/db_recover -h $clientdir(2)} r] + set clientenv(2) [eval $env_cmd(2)] + set origlist [lreplace $origlist 2 2 "$clientenv(2) 4"] + + # Get last LSN for client 2. + set logc [$clientenv(2) log_cursor] + error_check_good logc \ + [is_valid_logc $logc $clientenv(2)] TRUE + set lastlsn2 [lindex [lindex [$logc get -last] 0] 1] + error_check_good close_cursor [$logc close] 0 + + set msg "Rep$tnum.d" + puts "\t$msg: Advance client 4's LSN beyond client 2." + # + # This forces the last LSN for client 4 past the last + # LSN for client 2. We want to make sure that client 2's + # later data, but earlier LSN beats client 4's larger LSN. + # + set lastlsn4 0 + while { $lastlsn4 <= $lastlsn2 } { + error_check_good clientenv_close(4) [$clientenv(4) close] 0 + set ret [catch {exec $util_path/db_recover -h $clientdir(4)} r] + set clientenv(4) [eval $env_cmd(4)] + set origlist [lreplace $origlist 4 4 "$clientenv(4) 6"] + set logc [$clientenv(4) log_cursor] + error_check_good logc \ + [is_valid_logc $logc $clientenv(4)] TRUE + set lastlsn4 [lindex [lindex [$logc get -last] 0] 1] + error_check_good close_cursor [$logc close] 0 + } + + set msg "Rep$tnum.e" + puts "\t$msg: Run election for clients 2,3,4." + # + # Run an election with clients 2, 3, 4. + # Make last client be the winner, and let it stay master. + # Need to process messages before running election so + # that clients 2 and 4 update to the right gen with + # client 3. + # + set envlist [lrange $origlist 2 4] + process_msgs $envlist + foreach pair $envlist { + set i [expr [lindex $pair 1] - 2] + set clientenv($i) [lindex $pair 0] + set egen($i) [stat_field \ + $clientenv($i) rep_stat "Election generation number"] + } + set winner 2 + setpriority pri $nclients $winner 2 + set elector [berkdb random_int 2 4] + run_election envlist err_cmd pri crash \ + $qdir $msg $elector $nsites $nvotes $nclients $winner 0 test.db + + # Note egens for all the clients. + set envlist $origlist + foreach pair $envlist { + set i [expr [lindex $pair 1] - 2] + set clientenv($i) [lindex $pair 0] + set egen($i) [stat_field \ + $clientenv($i) rep_stat "Election generation number"] + } + + # Have current master run an operation. + eval rep_test $method $clientenv($winner) NULL $niter 0 0 0 $largs + + # Check that clients 0 and 2 get DUPMASTER messages and + # restart them as clients. + # + puts "\tRep$tnum.f: Check for DUPMASTER" + set envlist0 [lrange $envlist 0 0] + process_msgs $envlist0 0 dup err + error_check_good is_dupmaster0 [lindex $dup 0] 1 + error_check_good downgrade0 [$clientenv(0) rep_start -client] 0 + + set envlist2 [lrange $envlist 2 2] + process_msgs $envlist2 0 dup err + error_check_good is_dupmaster2 [lindex $dup 0] 1 + error_check_good downgrade4 [$clientenv(2) rep_start -client] 0 + + # All DUPMASTER messages are now gone. + # We might get residual errors however because client 2 + # responded as a master to client 0 and then became a + # client immediately. Therefore client 2 might get some + # "master-only" records and return EINVAL. We want to + # ignore those and process records until calm is restored. + set err 1 + while { $err == 1 } { + process_msgs $envlist 0 dup err + error_check_good no_dupmaster $dup 0 + } + + # Check LSNs before new election. + foreach pair $envlist { + set i [expr [lindex $pair 1] - 2] + set logc [$clientenv($i) log_cursor] + error_check_good logc \ + [is_valid_logc $logc $clientenv($i)] TRUE + set lastlsn [lindex [lindex [$logc get -last] 0] 1] + error_check_good cursor_close [$logc close] 0 + } + + set msg "Rep$tnum.g" + puts "\t$msg: Run election for all clients after DUPMASTER." + + # Call a new election with all participants. Make 2 the + # winner, since it should have a high enough LSN to win. + set nclients $orignclients + set nsites $nclients + set nvotes $nclients + set winner 2 + setpriority pri $nclients $winner + set elector [berkdb random_int 0 [expr $nclients - 1]] + run_election envlist err_cmd pri crash \ + $qdir $msg $elector $nsites $nvotes $nclients $winner 0 test.db + + # Pull out new egens. + foreach pair $envlist { + set i [expr [lindex $pair 1] - 2] + set clientenv($i) [lindex $pair 0] + set newegen($i) [stat_field \ + $clientenv($i) rep_stat "Election generation number"] + } + + # Egen numbers should all be the same now, and all greater than + # they were before the election. + set currentegen $newegen(0) + for { set i 0 } { $i < $nclients } { incr i } { + set egen_diff [expr $newegen($i) - $egen($i)] + error_check_good egen_increased [expr $egen_diff > 0] 1 + error_check_good newegens_match $currentegen $newegen($i) + } + + # Clean up. + foreach pair $envlist { + set cenv [lindex $pair 0] + error_check_good cenv_close [$cenv close] 0 + } + + replclose $testdir/MSGQUEUEDIR +} + + diff --git a/test/tcl/rep023.tcl b/test/tcl/rep023.tcl new file mode 100644 index 00000000..66c057aa --- /dev/null +++ b/test/tcl/rep023.tcl @@ -0,0 +1,213 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep023 +# TEST Replication using two master handles. +# TEST +# TEST Open two handles on one master env. Create two +# TEST databases, one through each master handle. Process +# TEST all messages through the first master handle. Make +# TEST sure changes made through both handles are picked +# TEST up properly. +# +proc rep023 { method { niter 10 } { tnum "023" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Run for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + # We can't open two envs on HP-UX, so just skip the + # whole test since that is at the core of it. + if { $is_hp_test == 1 } { + puts "Rep$tnum: Skipping for HP-UX." + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery, and + # with and without -rep_start. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + foreach startopt { 0 1 } { + if { $startopt == 1 } { + set startmsg "with rep_start" + } else { + set startmsg "" + } + puts "Rep$tnum ($method $r $startmsg):\ + Replication with two master handles\ + $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep023_sub $method \ + $niter $tnum $l $r $startopt $args + } + } + } +} + +proc rep023_sub { method niter tnum logset recargs startopt largs } { + global testdir + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open 1st master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \ + $verbargs -errpfx MASTER -home $masterdir $repmemargs \ + $privargs -rep_transport \[list 1 replsend\]" + set masterenv1 [eval $ma_envcmd $recargs -rep_master] + + # Open 2nd handle on master. The master envs will share + # the same envid. + set masterenv2 [eval $ma_envcmd] + if { $startopt == 1 } { + error_check_good rep_start [$masterenv2 rep_start -master] 0 + } + + # Open a client. + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $repmemargs \ + $c_logargs $verbargs -errpfx CLIENT -home $clientdir \ + $privargs -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Bring the clients online by processing the startup messages. + # Process messages on the first masterenv handle, not the second. + set envlist "{$masterenv1 1} {$clientenv 2}" + process_msgs $envlist + + # Set up databases in-memory or on-disk. + if { $databases_in_memory } { + set testfile1 { "" m1$tnum.db } + set testfile2 { "" m2$tnum.db } + } else { + set testfile1 "m1$tnum.db" + set testfile2 "m2$tnum.db" + } + + puts "\tRep$tnum.a: Create database using 1st master handle." + # Create a database using the 1st master. + set omethod [convert_method $method] + set db1 [eval {berkdb_open_noerr -env $masterenv1 -auto_commit \ + -create -mode 0644} $largs $omethod $testfile1] + error_check_good dbopen [is_valid_db $db1] TRUE + + puts "\tRep$tnum.b: Create database using 2nd master handle." + # Create a different database using the 2nd master. + set db2 [eval {berkdb_open_noerr -env $masterenv2 -auto_commit \ + -create -mode 0644} $largs $omethod $testfile2] + error_check_good dbopen [is_valid_db $db2] TRUE + + puts "\tRep$tnum.c: Process messages." + # Process messages. + process_msgs $envlist + + # Check that databases are in-memory or on-disk as expected. + check_db_location $masterenv1 $testfile1 + check_db_location $masterenv2 $testfile1 + check_db_location $masterenv1 $testfile2 + check_db_location $masterenv2 $testfile2 + + puts "\tRep$tnum.d: Run rep_test in 1st master; process messages." + eval rep_test $method $masterenv1 $db1 $niter 0 0 0 $largs + process_msgs $envlist + + puts "\tRep$tnum.e: Run rep_test in 2nd master; process messages." + eval rep_test $method $masterenv2 $db2 $niter 0 0 0 $largs + process_msgs $envlist + + # Contents of the two databases should match. + error_check_good db_compare [db_compare \ + $db1 $db2 $masterdir/$testfile1 $masterdir/$testfile2] 0 + + puts "\tRep$tnum.f: Close 2nd master." + error_check_good db2 [$db2 close] 0 + error_check_good master2_close [$masterenv2 close] 0 + + puts "\tRep$tnum.g: Run test in master again." + eval rep_test $method $masterenv1 $db1 $niter $niter 0 0 $largs + process_msgs $envlist + + puts "\tRep$tnum.h: Closing" + error_check_good db1 [$db1 close] 0 + error_check_good env0_close [$masterenv1 close] 0 + error_check_good env2_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR + return +} diff --git a/test/tcl/rep024.tcl b/test/tcl/rep024.tcl new file mode 100644 index 00000000..2b3a7304 --- /dev/null +++ b/test/tcl/rep024.tcl @@ -0,0 +1,245 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep024 +# TEST Replication page allocation / verify test +# TEST +# TEST Start a master (site 1) and a client (site 2). Master +# TEST closes (simulating a crash). Site 2 becomes the master +# TEST and site 1 comes back up as a client. Verify database. + +proc rep024 { method { niter 1000 } { tnum "024" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Run for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + global fixed_len + set orig_fixed_len $fixed_len + set fixed_len 448 + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # This test is not appropriate for in-memory databases. + if { $databases_in_memory } { + puts "Skipping rep$tnum for named in-memory databases." + return + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run all tests with and without recovery. + set envargs "" + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r): Replication\ + page allocation/verify test $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep024_sub $method $niter $tnum $envargs $l $r $args + } + } + set fixed_len $orig_fixed_len + return +} + +proc rep024_sub { method niter tnum envargs logset recargs largs } { + source ./include.tcl + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. This test requires -txn, so + # we only have to adjust the logargs. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + + # Open a master. + repladd 1 + set env_cmd(1) "berkdb_env_noerr -create $repmemargs \ + -log_max 1000000 $envargs $recargs -home $masterdir \ + -errpfx MASTER $verbargs -txn $m_logargs $privargs \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(1) -rep_master] + + # Open a client + repladd 2 + set env_cmd(2) "berkdb_env_noerr -create $repmemargs \ + -log_max 1000000 $envargs $recargs -home $clientdir \ + -errpfx CLIENT $verbargs -txn $c_logargs $privargs \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $env_cmd(2) -rep_client] + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + puts "\tRep$tnum.a: Add data to master, update client." + # + # This test uses a small page size and a large fixed_len + # so it is easy to force a page allocation. + set key [expr $niter + 1] + set data A + set pagesize 512 + if { [is_fixed_length $method] == 1 } { + set bigdata [repeat $data [expr $pagesize / 2]] + } else { + set bigdata [repeat $data [expr 2 * $pagesize]] + } + + set omethod [convert_method $method] + set testfile "test$tnum.db" + set db [eval "berkdb_open_noerr -create $omethod -auto_commit \ + -pagesize $pagesize -env $masterenv $largs $testfile"] + eval rep_test $method $masterenv $db $niter 0 0 0 0 $largs + $masterenv txn_checkpoint + process_msgs $envlist + + # Close client. Force a page allocation on the master. + # An overflow page (or big page, for hash) will do the job. + # + puts "\tRep$tnum.b: Close client, force page allocation on master." + error_check_good client_close [$clientenv close] 0 + + error_check_good client_verify \ + [verify_dir $clientdir "\tRep$tnum.b: " 0 0 1 0 0] 0 + + set pages1 [r24_check_pages $db $method] + set txn [$masterenv txn] + error_check_good put_bigdata [eval {$db put} \ + -txn $txn {$key [chop_data $method $bigdata]}] 0 + error_check_good txn_commit [$txn commit] 0 + + # Verify that we have allocated new pages. + set pages2 [r24_check_pages $db $method] + set newpages [expr $pages2 - $pages1] + + # Close master and discard messages for site 2. Now everybody + # is closed and sites 1 and 2 have different contents. + puts "\tRep$tnum.c: Close master." + error_check_good db_close [$db close] 0 + error_check_good master_close [$masterenv close] 0 + if { $newpages <= 0 } { + puts "FAIL: no new pages allocated." + return + } + error_check_good master_verify \ + [verify_dir $masterdir "\tRep$tnum.c: " 0 0 1] 0 + + # Run a loop, opening the original client as master and the + # original master as client. Test db_verify. + foreach option { "no new data" "add new data" } { + puts "\tRep$tnum.d: Swap master and client ($option)." + set newmasterenv [eval $env_cmd(2) -rep_master] + set newclientenv [eval $env_cmd(1) -rep_client] + set newmasterdir [$newmasterenv get_home] + set newclientdir [$newclientenv get_home] + set envlist "{$newmasterenv 2} {$newclientenv 1}" + process_msgs $envlist + if { $option == "add new data" } { + set key [expr $niter + 2] + set db [eval "berkdb_open_noerr -create $omethod \ + -auto_commit -pagesize $pagesize \ + -env $newmasterenv $largs $testfile"] + set pages1 [r24_check_pages $db $method] + set txn [$newmasterenv txn] + error_check_good put_bigdata [eval {$db put} \ + -txn $txn {$key [chop_data $method $bigdata]}] 0 + error_check_good txn_commit [$txn commit] 0 + set pages2 [r24_check_pages $db $method] + set newpages [expr $pages2 - $pages1] + error_check_good db_close [$db close] 0 + process_msgs $envlist + } + puts "\tRep$tnum.e: Close master and client, run verify." + # + # Verify_dir will db_verify with its own private environment, + # which means any dirty pages still in our environment won't be + # noticed. So, make sure there are no dirty pages. Running + # checkpoint at the master flushes its cache, and replicating + # that checkpoint to the client makes the client flush its + # cache. + # + $newmasterenv txn_checkpoint + process_msgs $envlist + + error_check_good newmasterenv_close [$newmasterenv close] 0 + error_check_good newclientenv_close [$newclientenv close] 0 + if { $newpages <= 0 } { + puts "FAIL: no new pages allocated." + return + } + # This test can leave unreferenced pages on systems without + # FTRUNCATE and that's OK, so set unref to 0. + error_check_good verify \ + [verify_dir $newmasterdir "\tRep$tnum.f: " 0 0 1 0 0] 0 + error_check_good verify \ + [verify_dir $newclientdir "\tRep$tnum.g: " 0 0 1 0 0] 0 + } + replclose $testdir/MSGQUEUEDIR +} + +proc r24_check_pages { db method } { + if { [is_hash $method] == 1 } { + set pages [stat_field $db stat "Number of big pages"] + } elseif { [is_queue $method] == 1 } { + set pages [stat_field $db stat "Number of pages"] + } elseif { [is_heap $method] == 1 } { + set pages [stat_field $db stat "Number of records"] + } else { + set pages [stat_field $db stat "Overflow pages"] + } + return $pages +} diff --git a/test/tcl/rep025.tcl b/test/tcl/rep025.tcl new file mode 100644 index 00000000..eb995148 --- /dev/null +++ b/test/tcl/rep025.tcl @@ -0,0 +1,229 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep025 +# TEST Test of DB_REP_JOIN_FAILURE. +# TEST +# TEST One master, one client. +# TEST Generate several log files. +# TEST Remove old master log files. +# TEST Delete client files and restart client. +# TEST Put one more record to the master. At the next +# TEST processing of messages, the client should get JOIN_FAILURE. +# TEST Recover with a hot failover. +# +proc rep025 { method { niter 200 } { tnum "025" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Run for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. Skip recovery with in-memory + # logging - it doesn't make sense. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $r): Test of manual\ + initialization and join failure $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep025_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep025_sub { method niter tnum logset recargs largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $repmemargs \ + $m_logargs -log_max $log_max $verbargs -errpfx MASTER $privargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $repmemargs \ + $c_logargs -log_max $log_max $verbargs -errpfx CLIENT $privargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $masterenv test force noarchive_timeout + + # Run a modified test001 in the master (and update client). + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + # Create a gap requiring internal initialization. + set flags "" + set dbhandle NULL + set cid 2 + set start [push_master_ahead $method $masterenv $masterdir $m_logtype \ + $clientenv $cid $dbhandle $start $niter $flags $largs] + + puts "\tRep$tnum.e: Clean client and reopen." + env_cleanup $clientdir + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + set envlist "{$masterenv 1} {$clientenv 2}" + + # Set initialization to manual. + $clientenv rep_config {autoinit off} + process_msgs $envlist 0 NONE err + error_check_good error_on_right_env [lindex $err 0] $clientenv + error_check_good right_error [is_substr $err DB_REP_JOIN_FAILURE] 1 + + # Add records to the master and update client. + puts "\tRep$tnum.f: Update master; client should return error." + # + # Force a log record to create a gap to force rerequest. + # + $masterenv txn_checkpoint -force + process_msgs $envlist 0 NONE err + tclsleep 1 + set entries 100 + eval rep_test $method $masterenv NULL $entries $start $start 0 $largs + incr start $entries + process_msgs $envlist 0 NONE err + error_check_good error_on_right_env [lindex $err 0] $clientenv + error_check_good right_error [is_substr $err DB_REP_JOIN_FAILURE] 1 + + # If the master logs and the databases are on-disk, copy from master + # to client and restart with recovery. If the logs or databases are + # in-memory, we'll have to re-enable internal initialization and + # restart the client. + if { $m_logtype == "on-disk" && $databases_in_memory == 0 } { + puts "\tRep$tnum.g: Hot failover and catastrophic recovery." + error_check_good client_close [$clientenv close] 0 + env_cleanup $clientdir + set files [glob $masterdir/log.* $masterdir/*.db] + foreach f $files { + set filename [file tail $f] + file copy -force $f $clientdir/$filename + } + set clientenv [eval $cl_envcmd -recover_fatal -rep_client] + } else { + puts "\tRep$tnum.g: Restart client forcing internal init." + error_check_good client_close [$clientenv close] 0 + set clientenv [eval $cl_envcmd -rep_client] + $clientenv rep_config {autoinit on} + } + error_check_good client_env [is_valid_env $clientenv] TRUE + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist 0 NONE err + error_check_good no_errors1 $err 0 + + # Adding another entry should not flush out an error. + eval rep_test $method $masterenv NULL $entries $start $start 0 $largs + process_msgs $envlist 0 NONE err + error_check_good no_errors2 $err 0 + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep026.tcl b/test/tcl/rep026.tcl new file mode 100644 index 00000000..bdf65f73 --- /dev/null +++ b/test/tcl/rep026.tcl @@ -0,0 +1,277 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep026 +# TEST Replication elections - simulate a crash after sending +# TEST a vote. + +proc rep026 { method args } { + source ./include.tcl + + global mixed_mode_logging + global databases_in_memory + global repfiles_in_memory + + set tnum "026" + + # Run for btree only. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + # This test uses recovery, so mixed-mode testing, in-memory + # database testing and repfiles in-memory testing aren't appropriate. + if { $mixed_mode_logging > 0 } { + puts "Rep$tnum: Skipping for mixed-mode logging." + return + } + if { $databases_in_memory == 1 } { + puts "Rep$tnum: Skipping for in-memory databases." + return + } + if { $repfiles_in_memory > 0 } { + puts "Rep$tnum: Skipping for in-memory replication files." + return + } + + global rand_init + error_check_good set_random_seed [berkdb srand $rand_init] 0 + + set nclients 5 + set logsets [create_logsets [expr $nclients + 1]] + foreach l $logsets { + puts "Rep$tnum ($method): Election generations -\ + simulate crash after sending a vote." + puts "Rep$tnum: Master logs are [lindex $l 0]" + for { set i 0 } { $i < $nclients } { incr i } { + puts "Rep$tnum: Client $i logs are\ + [lindex $l [expr $i + 1]]" + } + rep026_sub $method $nclients $tnum $l $args + } +} + +proc rep026_sub { method nclients tnum logset largs } { + source ./include.tcl + global machids + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + set m_logtype [lindex $logset 0] + set m_logargs [adjust_logargs $m_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + + for { set i 0 } { $i < $nclients } { incr i } { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + set c_logtype($i) [lindex $logset [expr $i + 1]] + set c_logargs($i) [adjust_logargs $c_logtype($i)] + set c_txnargs($i) [adjust_txnargs $c_logtype($i)] + } + + # Open a master. + set envlist {} + repladd 1 + set env_cmd(M) "berkdb_env -create -log_max 1000000 $verbargs \ + -event -home $masterdir $m_txnargs $m_logargs -rep_master \ + -errpfx MASTER -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M)] + lappend envlist "$masterenv 1" + + # Open the clients. + for { set i 0 } { $i < $nclients } { incr i } { + set envid [expr $i + 2] + repladd $envid + set env_cmd($i) "berkdb_env_noerr -create $verbargs \ + -event -home $clientdir($i) $c_txnargs($i) $c_logargs($i) \ + -rep_client -rep_transport \[list $envid replsend\]" + set clientenv($i) [eval $env_cmd($i)] + error_check_good \ + client_env($i) [is_valid_env $clientenv($i)] TRUE + lappend envlist "$clientenv($i) $envid" + } + # Bring the clients online by processing the startup messages. + process_msgs $envlist + + # Run a modified test001 in the master. + puts "\tRep$tnum.a: Running rep_test in replicated env." + set niter 10 + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + error_check_good masterenv_close [$masterenv close] 0 + set envlist [lreplace $envlist 0 0] + + foreach pair $envlist { + set i [expr [lindex $pair 1] - 2] + replclear [expr $i + 2] + set err_cmd($i) "none" + set crash($i) 0 + set pri($i) 10 + if { $rep_verbose == 1 } { + $clientenv($i) errpfx CLIENT$i + $clientenv($i) verbose $verbose_type on + $clientenv($i) errfile /dev/stderr + set env_cmd($i) [concat $env_cmd($i) \ + "-errpfx CLIENT$i -errfile /dev/stderr"] + } + } + + # In each case we simulate a crash in client C, recover, and + # call a second election. We vary the caller of the second + # election (C or some other client) and when the election + # messages from before the crash are processed - before or + # after the second election. + # + foreach option { "1 b before" "2 c before" "1 d after" "2 e after"} { + # Elector 1 calls the first election, elector 2 + # calls the second election. + set elector1 1 + set elector2 [lindex $option 0] + set let [lindex $option 1] + set restore [lindex $option 2] + + if { $elector1 == $elector2 } { + puts "\tRep$tnum.$let: Simulated crash and recovery\ + (crashing client calls second election)." + } else { + puts "\tRep$tnum.$let: Simulated crash and recovery\ + (non-crashing client calls second election)." + } + + puts "\tRep$tnum.$let: Process messages from crasher\ + $restore 2nd election." + + puts "\t\tRep$tnum.$let.1: Note egens for all clients." + # Note egens for all the clients. + foreach pair $envlist { + set i [expr [lindex $pair 1] - 2] + set clientenv($i) [lindex $pair 0] + set egen($i) [stat_field \ + $clientenv($i) rep_stat "Election generation number"] + } + + # Call an election which simulates a crash after sending + # its VOTE1. + set msg "\tRep$tnum.$let.2" + puts "\t$msg: Start election, simulate a crash." + set nsites $nclients + set nvotes $nclients + # Make the winner the crashing client, since the + # crashing client will have the biggest LSN. + set elector 1 + set winner $elector + set crash($elector) 1 + setpriority pri $nclients $winner + set err_cmd($elector) "electvote1" + run_election envlist err_cmd pri crash $qdir \ + $msg $elector $nsites $nvotes $nclients $winner 0 test.db + set msg "\tRep$tnum.$let.3" + puts "\t$msg: Close and reopen elector with recovery." + # But first flush the log, since we're relying on this client to + # win the next election. + $clientenv($elector) log_flush + error_check_good \ + clientenv_close($elector) [$clientenv($elector) close] 0 + + # Have other clients SKIP the election messages and process + # only C's startup messages. We'll do it by copying the files + # and emptying the originals. + set cwd [pwd] + foreach machid $machids { + file copy -force $qdir/repqueue$machid.db $qdir/save$machid.db + replclear $machid + } + + # Reopen C and process messages. Only the startup messages + # will be available. + set clientenv($elector) [eval $env_cmd($elector) -recover] + set envlist [lreplace $envlist \ + $elector $elector "$clientenv($elector) [expr $elector + 2]"] + process_msgs $envlist + + # Verify egens (should be +1 in C, and unchanged + # in other clients). + foreach pair $envlist { + set i [expr [lindex $pair 1] - 2] + set clientenv($i) [lindex $pair 0] + set newegen($i) [stat_field $clientenv($i) \ + rep_stat "Election generation number"] + if { $i == $elector } { + error_check_good \ + egen+1 $newegen($i) [expr $egen($i) + 1] + } else { + error_check_good \ + egen_unchanged $newegen($i) $egen($i) + } + } + + # First chance to restore messages. + if { $restore == "before" } { + restore_messages $qdir + } + + # Have C call an election (no crash simulation) and process + # all the messages. + set msg "\tRep$tnum.$let.4" + puts "\t$msg: Call second election." + set err_cmd($elector) "none" + set crash($elector) 0 + run_election envlist err_cmd pri crash $qdir \ + $msg $elector2 $nsites $nvotes $nclients $winner 1 test.db + + # Second chance to restore messages. + if { $restore == "after" } { + restore_messages $qdir + } + process_msgs $envlist + + # Verify egens (should be +2 or more in all clients). + puts "\t\tRep$tnum.$let.5: Check egens." + foreach pair $envlist { + set i [expr [lindex $pair 1] - 2] + set clientenv($i) [lindex $pair 0] + set newegen($i) [stat_field \ + $clientenv($i) rep_stat "Election generation number"] + + set mingen [expr $egen($i) + 1] + error_check_good egen+more($i) \ + [expr $newegen($i) >= $mingen] 1 + } + } + + # Clean up. + foreach pair $envlist { + set cenv [lindex $pair 0] + error_check_good cenv_close [$cenv close] 0 + } + replclose $testdir/MSGQUEUEDIR +} + +proc restore_messages { qdir } { + global machids + set cwd [pwd] + foreach machid $machids { + file copy -force $qdir/save$machid.db $qdir/repqueue$machid.db + } +} + diff --git a/test/tcl/rep027.tcl b/test/tcl/rep027.tcl new file mode 100644 index 00000000..a4f33ebc --- /dev/null +++ b/test/tcl/rep027.tcl @@ -0,0 +1,196 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep027 +# TEST Replication and secondary indexes. +# TEST +# TEST Set up a secondary index on the master and make sure +# TEST it can be accessed from the client. + +proc rep027 { method { niter 1000 } { tnum "027" } args } { + + source ./include.tcl + global repfiles_in_memory + global env_private + + # Renumbering recno is not permitted on a primary database. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_rrecno $method] != 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_rrecno $method] == 1 } { + puts "Skipping rep027 for -rrecno." + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r):\ + Replication and secondary indices $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep027_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep027_sub { method niter tnum logset recargs largs } { + source ./include.tcl + global repfiles_in_memory + global env_private + global verbose_check_secondaries + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + set omethod [convert_method $method] + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create $verbargs $repmemargs \ + -log_max 1000000 -home $masterdir -errpfx MASTER $privargs \ + $m_txnargs $m_logargs -rep_master -rep_transport \ + \[list 1 replsend\]" + set masterenv [eval $env_cmd(M) $recargs] + + # Open a client + repladd 2 + set env_cmd(C) "berkdb_env_noerr -create $verbargs $repmemargs \ + $c_txnargs $c_logargs -home $clientdir -errpfx CLIENT $privargs \ + -rep_client -rep_transport \[list 2 replsend\]" + set clientenv [eval $env_cmd(C) $recargs] + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Set up database and secondary index on master. + puts "\tRep$tnum.a: Set up database with secondary index." + set pname "primary$tnum.db" + set sname "secondary$tnum.db" + + # Open the primary. + set pdb [eval {berkdb_open_noerr -create \ + -auto_commit -env} $masterenv $omethod $largs $pname] + error_check_good primary_open [is_valid_db $pdb] TRUE + process_msgs $envlist + + # Open and associate a secondary. + set sdb [eval {berkdb_open_noerr -create \ + -auto_commit -env} $masterenv -btree $sname] + error_check_good second_open [is_valid_db $sdb] TRUE + error_check_good db_associate [$pdb associate [callback_n 0] $sdb] 0 + + # Propagate to client. + process_msgs $envlist + + # Put some data in the master. + set did [open $dict] + for { set n 0 } { [gets $did str] != -1 && $n < $niter } { incr n } { + if { [is_record_based $method] == 1 } { + set key [expr $n + 1] + set datum $str + } else { + set key $str + gets $did datum + } + set keys($n) $key + set data($n) [pad_data $method $datum] + + set ret [$pdb put $key [chop_data $method $datum]] + error_check_good put($n) $ret 0 + } + close $did + process_msgs $envlist + + # Check secondaries on master. + set verbose_check_secondaries 1 + puts "\tRep$tnum.b: Check secondaries on master." + check_secondaries $pdb $sdb $niter keys data "Rep$tnum.b" + error_check_good pdb_close [$pdb close] 0 + error_check_good sdb_close [$sdb close] 0 + process_msgs $envlist + + # Get handles on primary and secondary db on client. + set clientpdb [eval {berkdb_open -auto_commit -env} $clientenv $pname] + error_check_good client_pri [is_valid_db $clientpdb] TRUE + set clientsdb [eval {berkdb_open -auto_commit -env} $clientenv $sname] + error_check_good client_sec [is_valid_db $clientsdb] TRUE + error_check_good client_associate \ + [$clientpdb associate [callback_n 0] $clientsdb] 0 + + # Check secondaries on client. + puts "\tRep$tnum.c: Check secondaries on client." + check_secondaries $clientpdb $clientsdb $niter keys data "Rep$tnum.c" + + # Clean up. + error_check_good clientpdb_close [$clientpdb close] 0 + error_check_good clientsdb_close [$clientsdb close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + + error_check_good verify \ + [verify_dir $clientdir "\tRep$tnum.e: " 0 0 1] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep028.tcl b/test/tcl/rep028.tcl new file mode 100644 index 00000000..8fdebc7c --- /dev/null +++ b/test/tcl/rep028.tcl @@ -0,0 +1,255 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep028 +# TEST Replication and non-rep env handles. (Also see rep006.) +# TEST +# TEST Open second non-rep env on client, and create a db +# TEST through this handle. Open the db on master and put +# TEST some data. Check whether the non-rep handle keeps +# TEST working. Also check if opening the client database +# TEST in the non-rep env writes log records. +# +proc rep028 { method { niter 100 } { tnum "028" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Run for btree only. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "\tRep$tnum: Skipping for method $method." + return + } + + # Skip test for HP-UX because we can't open an env twice. + if { $is_hp_test == 1 } { + puts "\tRep$tnum: Skipping for HP-UX." + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery. + set clopts { "create" "open" } + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + foreach c $clopts { + puts "Rep$tnum ($method $r $c): Replication\ + and non-rep env handles $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep028_sub $method $niter $tnum $l $r $c $args + } + } + } +} + +proc rep028_sub { method niter tnum logset recargs clargs largs } { + source ./include.tcl + global is_hp_test + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + set omethod [convert_method $method] + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + puts "\tRep$tnum.a: Open replicated envs and non-replicated client env." + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create \ + -log_max 1000000 -home $masterdir $verbargs $repmemargs \ + $privargs $m_txnargs $m_logargs -rep_master \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M) $recargs] + + # Open a client + repladd 2 + set env_cmd(C) "berkdb_env_noerr -create $c_txnargs \ + $c_logargs -home $clientdir $verbargs $repmemargs $privargs \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $env_cmd(C) $recargs] + + # Open 2nd non-replication handle on client env, and create + # a db. Note, by not specifying any subsystem args, we + # do a DB_JOINENV, which is what we want. + set nonrepenv [eval {berkdb_env_noerr} $privargs -home $clientdir] + error_check_good nonrepenv [is_valid_env $nonrepenv] TRUE + + # Set up databases in-memory or on-disk. + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + + # If we're testing create, verify that if a non-rep client + # creates a database before the master does, then when that + # client goes to use it, it gets DB_DEAD_HANDLE. + # + if { $clargs == "create" } { + puts "\tRep$tnum.b: Create database non-replicated." + set let c + set nextlet d + set nonrepdb [eval berkdb_open_noerr -auto_commit \ + -create $omethod -env $nonrepenv $dbname] + error_check_good nonrepdb_open [is_valid_db $nonrepdb] TRUE + tclsleep 2 + } else { + set let b + set nextlet c + } + + # + # Now declare the clientenv a client. + # + puts "\tRep$tnum.$let: Declare env as rep client" + error_check_good client [$clientenv rep_start -client] 0 + if { $clargs == "create" } { + # + # We'll only catch this error if we turn off autoinit. + # Otherwise, the system will throw away everything on the + # client and resync. + # + $clientenv rep_config {autoinit off} + } + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist 0 NONE err + # + # In the create case, we'll detect the non-rep log records and + # determine this client was never part of the replication group. + # + if { $clargs == "create" } { + error_check_good errchk [is_substr $err \ + "DB_REP_JOIN_FAILURE"] 1 + error_check_good close [$nonrepdb close] 0 + } else { + # Open the same db through the master handle. Put data + # and process messages. + set db [eval berkdb_open_noerr \ + -create $omethod -env $masterenv -auto_commit $dbname] + error_check_good db_open [is_valid_db $db] TRUE + eval rep_test $method $masterenv $db $niter 0 0 0 $largs + process_msgs $envlist + + # + # If we're the open case, we want to just read the existing + # database through a non-rep readonly handle. Doing so + # should not create log records on the client (but has + # in the past). + # + puts "\tRep$tnum.$nextlet: Open and read database" + set nonrepdb [eval berkdb_open \ + -rdonly -env $nonrepenv $dbname] + error_check_good nonrepdb_open [is_valid_db $nonrepdb] TRUE + # + # If opening wrote log records, we need to process + # some more on the client to notice the end of log + # is now in an unexpected place. + # + eval rep_test $method $masterenv $db $niter 0 0 0 $largs + process_msgs $envlist + error_check_good close [$nonrepdb close] 0 + + # By passing in "NULL" for the database name, we compare + # only the master and client logs, not the databases. + rep_verify $masterdir $masterenv $clientdir $clientenv 0 0 1 NULL + +# set stat [catch {eval exec $util_path/db_printlog \ +# -h $masterdir > $masterdir/prlog} result] +# error_check_good stat_mprlog $stat 0 +# set stat [catch {eval exec $util_path/db_printlog \ +# -h $clientdir > $clientdir/prlog} result] +# error_check_good stat_cprlog $stat 0 +# error_check_good log_cmp \ +# [filecmp $masterdir/prlog $clientdir/prlog] 0 + + # Clean up. + error_check_good db_close [$db close] 0 + + # Check that databases are in-memory or on-disk as expected. + check_db_location $nonrepenv + check_db_location $masterenv + check_db_location $clientenv + } + + error_check_good nonrepenv_close [$nonrepenv close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep029.tcl b/test/tcl/rep029.tcl new file mode 100644 index 00000000..954a498f --- /dev/null +++ b/test/tcl/rep029.tcl @@ -0,0 +1,274 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep029 +# TEST Test of internal initialization. +# TEST +# TEST One master, one client. +# TEST Generate several log files. +# TEST Remove old master log files. +# TEST Delete client files and restart client. +# TEST Put one more record to the master. +# +proc rep029 { method { niter 200 } { tnum "029" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + if { $checking_valid_methods } { + return "ALL" + } + global passwd + global has_crypto + + set args [convert_args $method $args] + set saved_args $args + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set logsets [create_logsets 2] + + # Run the body of the test with and without recovery, + # and with and without cleaning. Skip recovery with in-memory + # logging - it doesn't make sense. + set opts { bulk clean noclean } + foreach r $test_recopts { + foreach c $opts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + set envargs "" + set args $saved_args + puts "Rep$tnum ($method $envargs $r $c $args):\ + Test of internal initialization\ + $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep029_sub $method $niter $tnum $envargs \ + $l $r $c $args + + # Skip encrypted tests if not supported. + if { $has_crypto == 0 || $databases_in_memory } { + continue + } + + # Run same set of tests with security. + # + append envargs " -encryptaes $passwd " + append args " -encrypt " + puts "Rep$tnum ($method $envargs $r $c $args):\ + Test of internal initialization\ + $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep029_sub $method $niter $tnum $envargs \ + $l $r $c $args + } + } + } +} + +proc rep029_sub { method niter tnum envargs logset recargs opts largs } { + global testdir + global passwd + global util_path + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $repmemargs \ + $m_logargs -log_max $log_max $envargs $verbargs $privargs \ + -errpfx MASTER -home $masterdir \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $repmemargs \ + $c_logargs -log_max $log_max $envargs $verbargs $privargs \ + -errpfx CLIENT -home $clientdir \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $masterenv test force noarchive_timeout + + # Run rep_test in the master (and update client). + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist 0 NONE err + error_check_good process_msgs $err 0 + + # Create a gap requiring internal initialization. + set dbhandle NULL + set cid 2 + if { [lsearch $envargs "-encrypta*"] !=-1 } { + set flags "-P $passwd" + } else { + set flags "" + } + set start [push_master_ahead $method $masterenv $masterdir $m_logtype \ + $clientenv $cid $dbhandle $start $niter $flags $largs] + + puts "\tRep$tnum.b: Reopen client ($opts)." + if { $opts == "clean" } { + env_cleanup $clientdir + } + if { $opts == "bulk" } { + error_check_good bulk [$masterenv rep_config {bulk on}] 0 + } + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist 0 NONE err + error_check_good process_msgs $err 0 + if { $opts != "clean" } { + puts "\tRep$tnum.b.1: Trigger log request" + # + # When we don't clean, starting the client doesn't + # trigger any events. We need to generate some log + # records so that the client requests the missing + # logs and that will trigger it. + # + set entries 10 + eval rep_test\ + $method $masterenv NULL $entries $start $start 0 $largs + incr start $entries + process_msgs $envlist 0 NONE err + error_check_good process_msgs $err 0 + } + + puts "\tRep$tnum.c: Verify databases" + # + # If doing bulk testing, turn it off now so that it forces us + # to flush anything currently in the bulk buffer. We need to + # do this because rep_test might have aborted a transaction on + # its last iteration and those log records would still be in + # the bulk buffer causing the log comparison to fail. + # + if { $opts == "bulk" } { + puts "\tRep$tnum.c.1: Turn off bulk transfers." + error_check_good bulk [$masterenv rep_config {bulk off}] 0 + process_msgs $envlist 0 NONE err + error_check_good process_msgs $err 0 + } + + # + # !!! This test CANNOT use rep_verify for logs due to encryption. + # Just compare databases. We either have to copy in + # all the code in rep_verify to adjust the beginning LSN + # or skip the log check for just this one test. + + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 0 + + # Add records to the master and update client. + puts "\tRep$tnum.d: Add more records and check again." + set entries 10 + eval rep_test $method $masterenv NULL $entries $start $start 0 $largs + incr start $entries + process_msgs $envlist 0 NONE err + error_check_good process_msgs $err 0 + + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 0 + + set bulkxfer [stat_field $masterenv rep_stat "Bulk buffer transfers"] + if { $opts == "bulk" } { + error_check_bad bulkxferon $bulkxfer 0 + } else { + error_check_good bulkxferoff $bulkxfer 0 + } + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} + diff --git a/test/tcl/rep030.tcl b/test/tcl/rep030.tcl new file mode 100644 index 00000000..4bdb68fb --- /dev/null +++ b/test/tcl/rep030.tcl @@ -0,0 +1,384 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep030 +# TEST Test of internal initialization multiple files and pagesizes. +# TEST Hold some databases open on master. +# TEST +# TEST One master, one client using a data_dir for internal init. +# TEST Generate several log files. +# TEST Remove old master log files. +# TEST Delete client files and restart client. +# TEST Put one more record to the master. +# +proc rep030 { method { niter 500 } { tnum "030" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. + set opts { noclean clean bulk } + foreach r $test_recopts { + foreach c $opts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $r $c):\ + Internal initialization - hold some\ + databases open on master $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep030_sub $method $niter $tnum $l $r $c $args + } + } + } +} + +proc rep030_sub { method niter tnum logset recargs opts largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set maxpg 16384 + set log_max [expr $maxpg * 8] + set cache [expr $maxpg * 32 ] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Run internal init using a data directory + # + file mkdir $masterdir/data + file mkdir $masterdir/data2 + file mkdir $clientdir/data + file mkdir $clientdir/data2 + # + # Set it twice to test duplicates data_dirs as well + # as multiple, different data dirs + # + set data_diropts " -data_dir data -data_dir data -data_dir data2" + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $repmemargs $privargs \ + $m_logargs -log_max $log_max -errpfx MASTER \ + -cachesize { 0 $cache 1 } $data_diropts $verbargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs \ + $repmemargs $privargs \ + $c_logargs -log_max $log_max -errpfx CLIENT \ + -cachesize { 0 $cache 1 } $data_diropts $verbargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $masterenv test force noarchive_timeout + + # Run rep_test in the master (and update client). + set startpgsz 512 + set pglist "" + for { set pgsz $startpgsz } { $pgsz <= $maxpg } \ + { set pgsz [expr $pgsz * 2] } { + lappend pglist $pgsz + } + set nfiles [llength $pglist] + puts "\tRep$tnum.a.0: Running rep_test $nfiles times in replicated env." + set dbopen "" + for { set i 0 } { $i < $nfiles } { incr i } { + set mult [expr $i * 10] + set nentries [expr $niter + $mult] + set pagesize [lindex $pglist $i] + set largs " -pagesize $pagesize " + eval rep_test $method $masterenv NULL $nentries $mult $mult \ + 0 $largs + process_msgs $envlist + + # + # Everytime we run 'rep_test' we create 'test.db'. So + # rename it each time through the loop. + # + set old "test.db" + set new "test.$i.db" + if { $databases_in_memory == 0 } { + error_check_good rename [$masterenv dbrename \ + -auto_commit $old $new] 0 + } else { + error_check_good inmem_rename [$masterenv dbrename \ + "" $old $new] 0 + } + process_msgs $envlist + # + # We want to keep some databases open so that we test the + # code finding the files in the data dir as well as finding + # them in dbreg list. + # + if { [expr $i % 2 ] == 0 } { + if { $databases_in_memory == 1 } { + set db [berkdb_open_noerr\ + -env $masterenv "" $new] + } else { + set db [berkdb_open_noerr\ + -env $masterenv $new] + } + error_check_good dbopen.$i [is_valid_db $db] TRUE + lappend dbopen $db + } + } + + # Set up a few special databases too. We want one with a subdatabase + # and we want an empty database, in addition to in-memory dbs. + + # Set up databases in-memory or on-disk as expected. + if { $databases_in_memory } { + set testfile { "" "test.db" } + set emptyfile { "" "empty.db" } + } else { + set testfile "test.db" + set emptyfile "empty.db" + } + + # queue and heap do not allow for a subdb + if { [is_queue $method] || [is_heap $method] } { + set sub "" + } else { + set sub "subdb" + } + + set omethod [convert_method $method] + set largs " -pagesize $maxpg " + set largs [convert_args $method $largs] + + # + # Create/close an empty database. + # + set db [eval {berkdb_open_noerr -env $masterenv -auto_commit -create \ + -mode 0644} $largs $omethod $emptyfile] + error_check_good emptydb [is_valid_db $db] TRUE + error_check_good empty_close [$db close] 0 + # + # If we're not using in-mem named databases, open a subdb and + # keep it open. (Do a regular db if method is queue.) + # We need it a few times later on. + # + if { $databases_in_memory } { + set db [eval {berkdb_open_noerr -env $masterenv -auto_commit \ + -create -mode 0644} $largs $omethod $testfile] + } else { + set db [eval {berkdb_open_noerr -env $masterenv -auto_commit \ + -create -mode 0644} $largs $omethod $testfile $sub] + } + error_check_good subdb [is_valid_db $db] TRUE + set start 0 + eval rep_test $method $masterenv $db $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + # Create a gap requiring internal initialization. + set flags "" + set cid 2 + set start [push_master_ahead $method $masterenv $masterdir $m_logtype \ + $clientenv $cid $db $start $niter $flags $largs] + + puts "\tRep$tnum.e: Reopen client ($opts)." + if { $opts == "clean" } { + env_cleanup $clientdir + file mkdir $clientdir/data + file mkdir $clientdir/data2 + } + if { $opts == "bulk" } { + error_check_good bulk [$masterenv rep_config {bulk on}] 0 + } + + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist 0 NONE err + if { $opts != "clean" } { + puts "\tRep$tnum.e.1: Trigger log request" + # + # When we don't clean, starting the client doesn't + # trigger any events. We need to generate some log + # records so that the client requests the missing + # logs and that will trigger it. + # + set entries 100 + eval rep_test $method $masterenv $db $entries $start $start 0 $largs + incr start $entries + process_msgs $envlist 0 NONE err + } + error_check_good subdb_close [$db close] 0 + process_msgs $envlist 0 NONE err + + puts "\tRep$tnum.f: Verify logs and databases" + # + # If doing bulk testing, turn it off now so that it forces us + # to flush anything currently in the bulk buffer. We need to + # do this because rep_test might have aborted a transaction on + # its last iteration and those log records would still be in + # the bulk buffer causing the log comparison to fail. + # + if { $opts == "bulk" } { + puts "\tRep$tnum.f.1: Turn off bulk transfers." + error_check_good bulk [$masterenv rep_config {bulk off}] 0 + process_msgs $envlist 0 NONE err + } + + rep_verify $masterdir $masterenv $clientdir $clientenv\ + 1 1 1 test.db $masterdir/data + for { set i 0 } { $i < $nfiles } { incr i } { + set dbname "test.$i.db" + rep_verify $masterdir $masterenv $clientdir $clientenv \ + 1 1 0 $dbname $masterdir/data + } + + # Close the database held open on master for initialization. + foreach db $dbopen { + error_check_good db_close [$db close] 0 + } + + # Add records to the master and update client. + puts "\tRep$tnum.g: Add more records and check again." + set entries 10 + if { $databases_in_memory } { + set db [eval {berkdb_open_noerr -env $masterenv -auto_commit \ + -mode 0644} $largs $omethod $testfile] + } else { + set db [eval {berkdb_open_noerr -env $masterenv -auto_commit \ + -mode 0644} $largs $omethod $testfile $sub] + + } + error_check_good subdb [is_valid_db $db] TRUE + eval rep_test $method $masterenv $db $entries $niter 0 0 $largs + error_check_good subdb_close [$db close] 0 + process_msgs $envlist 0 NONE err + + rep_verify $masterdir $masterenv $clientdir $clientenv \ + 1 1 0 test.db $masterdir/data + for { set i 0 } { $i < $nfiles } { incr i } { + set dbname "test.$i.db" + rep_verify $masterdir $masterenv $clientdir $clientenv \ + 1 1 0 $dbname $masterdir/data + } + set bulkxfer [stat_field $masterenv rep_stat "Bulk buffer transfers"] + if { $opts == "bulk" } { + error_check_bad bulkxferon $bulkxfer 0 + } else { + error_check_good bulkxferoff $bulkxfer 0 + } + + # Check that databases and logs are in-memory or on-disk as expected. + check_db_location $masterenv $dbname $masterdir/data + check_db_location $clientenv $dbname $clientdir/data + + check_log_location $masterenv + check_log_location $clientenv + + # Make sure there are no rep files in the data + # directories. Even when rep files are on disk, + # they should be in the env's home directory. + no_rep_files_on_disk $masterdir/data + no_rep_files_on_disk $masterdir/data2 + no_rep_files_on_disk $clientdir/data + no_rep_files_on_disk $clientdir/data2 + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep031.tcl b/test/tcl/rep031.tcl new file mode 100644 index 00000000..3aacb40a --- /dev/null +++ b/test/tcl/rep031.tcl @@ -0,0 +1,337 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep031 +# TEST Test of internal initialization and blocked operations. +# TEST +# TEST One master, one client. +# TEST Put one more record to the master. +# TEST Test that internal initialization blocks: +# TEST log_archive, rename, remove, fileid_reset, lsn_reset. +# TEST Sleep 30+ seconds. +# TEST Test that blocked operations are now unblocked. +# +proc rep031 { method { niter 400 } { tnum "031" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # This test is not method-sensitive, but we need to + # exercise log_archive with queue extents, so test + # queueext. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_queueext $method] == 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_queueext $method] == 0 } { + puts "Rep$tnum: Skipping for non-queueext method." + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. Skip recovery with in-memory + # logging - it doesn't make sense. + set cleanopts { clean noclean } + foreach r $test_recopts { + foreach c $cleanopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $r $c $args):\ + Test of internal init and blocked\ + operations $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep031_sub $method $niter $tnum $l $r $c $args + } + } + } +} + +proc rep031_sub { method niter tnum logset recargs clean largs } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $m_logargs -log_max $log_max $verbargs $repmemargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs \ + $c_logargs -log_max $log_max $verbargs $repmemargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + $masterenv test force noarchive_timeout + + # Run rep_test in the master (and update client). + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + # Create a gap requiring internal initialization. + set flags "" + set cid 2 + set dbhandle NULL + set start [push_master_ahead $method $masterenv $masterdir $m_logtype \ + $clientenv $cid $dbhandle $start $niter $flags $largs] + + puts "\tRep$tnum.e: Reopen client ($clean)." + if { $clean == "clean" } { + env_cleanup $clientdir + } + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist 0 NONE err + if { $clean == "noclean" } { + puts "\tRep$tnum.e.1: Trigger log request" + # + # When we don't clean, starting the client doesn't + # trigger any events. We need to generate some log + # records so that the client requests the missing + # logs and that will trigger it. + # + set entries 10 + eval rep_test \ + $method $masterenv NULL $entries $start $start 0 $largs + incr start $entries + process_msgs $envlist 0 NONE err + } + + # + # We have now forced an internal initialization. Verify it is correct. + # + puts "\tRep$tnum.f: Verify logs and databases" + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + + # + # Internal initializations disable certain operations on the master for + # 30 seconds after the last init-related message is received + # by the master. Those operations are dbremove, dbrename and + # log_archive (with removal). + # + puts "\tRep$tnum.g: Try to remove and rename the database." + set dbname "test.db" + set old $dbname + set new $dbname.new + if { $databases_in_memory } { + set stat [catch {$masterenv dbrename -auto_commit "" $old $new} ret] + } else { + set stat [catch {$masterenv dbrename -auto_commit $old $new} ret] + } + error_check_good rename_fail $stat 1 + error_check_good rename_err [is_substr $ret "invalid"] 1 + if { $databases_in_memory } { + set stat [catch {$masterenv dbremove -auto_commit "" $old} ret] + } else { + set stat [catch {$masterenv dbremove -auto_commit $old} ret] + } + error_check_good remove_fail $stat 1 + error_check_good remove_err [is_substr $ret "invalid"] 1 + + # The fileid_reset and lsn_reset operations work on physical files + # so we do not need to test them for in-memory databases. + if { $databases_in_memory != 1 } { + puts "\tRep$tnum.h: Try to reset LSNs and fileid on the database." + set stat [catch {$masterenv id_reset $old} ret] + error_check_good id_reset $stat 1 + error_check_good id_err [is_substr $ret "invalid"] 1 + set stat [catch {$masterenv lsn_reset $old} ret] + error_check_good lsn_reset $stat 1 + error_check_good lsn_err [is_substr $ret "invalid"] 1 + } + + # + # Need entries big enough to generate additional log files. + # However, db_archive will not return an error, it will + # just retain the log file. + # + puts "\tRep$tnum.i: Run rep_test to generate more logs." + set entries 200 + eval rep_test $method $masterenv NULL $entries $start $start 0 $largs + incr start $entries + process_msgs $envlist 0 NONE err + + # Test lockout of archiving only in on-disk case. + if { $m_logtype != "in-memory" } { + puts "\tRep$tnum.j: Try to db_archive." + $masterenv log_flush + set res [eval exec $util_path/db_archive -l -h $masterdir] + set first [lindex $res 0] + set res [eval exec $util_path/db_archive -d -h $masterdir] + set res [eval exec $util_path/db_archive -l -h $masterdir] + error_check_bad log.gone [lsearch -exact $res $first] -1 + + puts "\tRep$tnum.j.0: Exercise log_archive -arch_data." + set res [$masterenv log_archive -arch_data] + error_check_good arch_data [is_substr $res $dbname] 1 + + # Since we're doing queue extents they should be included + # in the results from log_archive -arch_data. + if { [is_queueext $method] } { + error_check_good arch_extent [is_substr $res "__dbq"] 1 + } + + puts "\tRep$tnum.j.1: Try to log_archive in master env." + set res [$masterenv log_archive -arch_remove] + set res [eval exec $util_path/db_archive -l -h $masterdir] + error_check_bad log.gone0 [lsearch -exact $res $first] -1 + + # We can't open a second handle on the env in HP-UX. + if { $is_hp_test != 1 } { + puts "\tRep$tnum.j.1: Log_archive in new non-rep env." + set newenv [berkdb_env_noerr -txn nosync \ + -log_max $log_max -home $masterdir] + error_check_good newenv [is_valid_env $newenv] TRUE + set res [$newenv log_archive -arch_remove] + set res [eval exec \ + $util_path/db_archive -l -h $masterdir] + error_check_bad \ + log.gone1 [lsearch -exact $res $first] -1 + } + } + + # Check that databases are in-memory or on-disk as expected, before + # we try to delete the databases! + check_db_location $masterenv + check_db_location $clientenv + + set timeout 30 + # + # Sleep timeout+2 seconds - The timeout is 30 seconds, but we need + # to sleep a bit longer to make sure we cross the timeout. + # + set to [expr $timeout + 2] + puts "\tRep$tnum.k: Wait $to seconds to timeout" + tclsleep $to + puts "\tRep$tnum.l: Retry blocked operations after wait" + if { $databases_in_memory == 1 } { + set stat [catch {$masterenv dbrename -auto_commit "" $old $new} ret] + error_check_good rename_work $stat 0 + set stat [catch {$masterenv dbremove -auto_commit "" $new} ret] + error_check_good remove_work $stat 0 + } else { + set stat [catch {$masterenv id_reset $old} ret] + error_check_good id_reset_work $stat 0 + set stat [catch {$masterenv lsn_reset $old} ret] + error_check_good lsn_reset_work $stat 0 + set stat [catch {$masterenv dbrename -auto_commit $old $new} ret] + error_check_good rename_work $stat 0 + set stat [catch {$masterenv dbremove -auto_commit $new} ret] + error_check_good remove_work $stat 0 + } + process_msgs $envlist 0 NONE err + + if { $m_logtype != "in-memory" } { + # Remove files via the 2nd non-rep env, check via db_archive. + if { $is_hp_test != 1 } { + $newenv log_flush + set res [$newenv log_archive -arch_remove] + set res \ + [eval exec $util_path/db_archive -l -h $masterdir] + error_check_good \ + log.gone [lsearch -exact $res $first] -1 + error_check_good newenv_close [$newenv close] 0 + } else { + $masterenv log_flush + set res [$masterenv log_archive -arch_remove] + set res \ + [eval exec $util_path/db_archive -l -h $masterdir] + error_check_good \ + log.gone [lsearch -exact $res $first] -1 + } + } + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep032.tcl b/test/tcl/rep032.tcl new file mode 100644 index 00000000..815bc616 --- /dev/null +++ b/test/tcl/rep032.tcl @@ -0,0 +1,207 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep032 +# TEST Test of log gap processing. +# TEST +# TEST One master, one client. +# TEST Run rep_test. +# TEST Run rep_test without sending messages to client. +# TEST Make sure client missing the messages catches up properly. +# +proc rep032 { method { niter 200 } { tnum "032" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery. + set opts { "" "bulk" } + foreach r $test_recopts { + foreach b $opts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r $b $args):\ + Test of log gap processing $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep032_sub $method $niter $tnum $l $r $b $args + } + } + } +} + +proc rep032_sub { method niter tnum logset recargs opts largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $repmemargs \ + $m_logargs $verbargs -home $masterdir -errpfx MASTER \ + $privargs -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + if { $opts == "bulk" } { + error_check_good bulk [$masterenv rep_config {bulk on}] 0 + } + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $repmemargs \ + $c_logargs $verbargs -home $clientdir -errpfx CLIENT \ + $privargs -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Run rep_test in the master (and update client). + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + puts "\tRep$tnum.b: Check client processed everything properly." + set queued [stat_field $clientenv rep_stat "Maximum log records queued"] + set request1 [stat_field $clientenv rep_stat "Log records requested"] + error_check_good queued $queued 0 + + # Run rep_test in the master (don't update client). + # First run with dropping all client messages via replclear. + puts "\tRep$tnum.c: Running rep_test dropping client msgs." + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + replclear 2 + process_msgs $envlist + + # + # Need new operations to force log gap processing to + # request missing pieces. + # + puts "\tRep$tnum.d: Running rep_test again replicated." + # + # Force a checkpoint to cause a gap to force rerequest. + # + $masterenv txn_checkpoint -force + process_msgs $envlist + tclsleep 1 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + puts "\tRep$tnum.e: Check we re-requested and had a backlog." + set queued [stat_field $clientenv rep_stat "Maximum log records queued"] + set request2 [stat_field $clientenv rep_stat "Log records requested"] + error_check_bad queued $queued 0 + error_check_bad request $request1 $request2 + + puts "\tRep$tnum.f: Verify logs and databases" + # + # If doing bulk testing, turn it off now so that it forces us + # to flush anything currently in the bulk buffer. We need to + # do this because rep_test might have aborted a transaction on + # its last iteration and those log records would still be in + # the bulk buffer causing the log comparison to fail. + # + if { $opts == "bulk" } { + puts "\tRep$tnum.f.1: Turn off bulk transfers." + error_check_good bulk [$masterenv rep_config {bulk off}] 0 + process_msgs $envlist 0 NONE err + } + + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + + set bulkxfer [stat_field $masterenv rep_stat "Bulk buffer transfers"] + if { $opts == "bulk" } { + error_check_bad bulkxferon $bulkxfer 0 + } else { + error_check_good bulkxferoff $bulkxfer 0 + } + + check_log_location $masterenv + check_log_location $clientenv + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep033.tcl b/test/tcl/rep033.tcl new file mode 100644 index 00000000..99af8d40 --- /dev/null +++ b/test/tcl/rep033.tcl @@ -0,0 +1,269 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep033 +# TEST Test of internal initialization with rename and remove of dbs. +# TEST +# TEST One master, one client. +# TEST Generate several databases. Replicate to client. +# TEST Do some renames and removes, both before and after +# TEST closing the client. +# +proc rep033 { method { niter 200 } { tnum "033" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + # This test depends on manipulating logs, so can not be run with + # in-memory logging. + global mixed_mode_logging + if { $mixed_mode_logging > 0 } { + puts "Rep$tnum: Skipping for mixed-mode logging." + return + } + + set args [convert_args $method $args] + set omethod [convert_method $method] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. + set envargs "" + set cleanopts { noclean clean } + set when { before after } + foreach r $test_recopts { + foreach c $cleanopts { + foreach w $when { + puts "Rep$tnum ($method $envargs $c $r $w $args):\ + Test of internal initialization $msg $msg2." + rep033_sub $omethod $niter $tnum $envargs \ + $r $c $w $args + } + } + } +} + +proc rep033_sub { method niter tnum envargs recargs clean when largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_buf [expr $pagesize * 2] + set log_max [expr $log_buf * 4] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create -txn nosync \ + -log_buffer $log_buf -log_max $log_max $envargs \ + -errpfx MASTER $verbargs $repmemargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create -txn nosync \ + -log_buffer $log_buf -log_max $log_max $envargs \ + -errpfx CLIENT $verbargs $repmemargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $masterenv test force noarchive_timeout + + # Set up for in-memory or on-disk databases. + if { $databases_in_memory } { + set memargs { "" } + } else { + set memargs "" + } + + puts "\tRep$tnum.a: Create several databases on master." + set oflags " -env $masterenv $method -create -auto_commit " + set dbw [eval {berkdb_open_noerr} $oflags $largs $memargs w.db] + set dbx [eval {berkdb_open_noerr} $oflags $largs $memargs x.db] + set dby [eval {berkdb_open_noerr} $oflags $largs $memargs y.db] + set dbz [eval {berkdb_open_noerr} $oflags $largs $memargs z.db] + error_check_good dbw_close [$dbw close] 0 + error_check_good dbx_close [$dbx close] 0 + error_check_good dby_close [$dby close] 0 + error_check_good dbz_close [$dbz close] 0 + + # Update client, then close. + process_msgs $envlist + + puts "\tRep$tnum.b: Close client." + error_check_good client_close [$clientenv close] 0 + + # If we're doing the rename/remove operations before adding + # databases A and B, manipulate only the existing files. + if { $when == "before" } { + rep033_rename_remove $masterenv + } + + # Run rep_test in the master (don't update client). + # + # We'd like to control the names of these dbs, so give + # rep_test an existing handle. + # + puts "\tRep$tnum.c: Create new databases. Populate with rep_test." + set dba [eval {berkdb_open_noerr} $oflags $largs $memargs a.db] + set dbb [eval {berkdb_open_noerr} $oflags $largs $memargs b.db] + eval rep_test $method $masterenv $dba $niter 0 0 0 $largs + eval rep_test $method $masterenv $dbb $niter 0 0 0 $largs + error_check_good dba_close [$dba close] 0 + error_check_good dbb_close [$dbb close] 0 + + # Throw away messages for client. + replclear 2 + + # If we're doing the rename/remove afterwards, manipulate + # all the files including A and B. + if { $when == "after" } { + rep033_rename_remove $masterenv + } + error_check_good rename_b [eval {$masterenv dbrename} $memargs b.db x.db] 0 + error_check_good remove_a [eval {$masterenv dbremove} $memargs a.db] 0 + + puts "\tRep$tnum.d: Run db_archive on master." + $masterenv log_flush + set res [eval exec $util_path/db_archive -l -h $masterdir] + error_check_bad log.1.present [lsearch -exact $res log.0000000001] -1 + set res [eval exec $util_path/db_archive -d -h $masterdir] + set res [eval exec $util_path/db_archive -l -h $masterdir] + error_check_good log.1.gone [lsearch -exact $res log.0000000001] -1 + + puts "\tRep$tnum.e: Reopen client ($clean)." + if { $clean == "clean" } { + env_cleanup $clientdir + } + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist 0 NONE err + if { $clean == "noclean" } { + puts "\tRep$tnum.e.1: Trigger log request" + # + # When we don't clean, starting the client doesn't + # trigger any events. We need to generate some log + # records so that the client requests the missing + # logs and that will trigger it. + # + set entries 10 + eval rep_test $method $masterenv NULL $entries $niter 0 0 $largs + process_msgs $envlist 0 NONE err + } + + puts "\tRep$tnum.f: Verify logs and databases" + # + # By sending in a NULL for dbname, we only compare logs. + # + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 NULL + # + # ... now the databases, manually. X, Y, and C should exist. + # + set dbnames "x.db w.db c.db" + foreach db $dbnames { + set db1 [eval \ + {berkdb_open_noerr -env $masterenv} $largs -rdonly $memargs $db] + set db2 [eval \ + {berkdb_open_noerr -env $clientenv} $largs -rdonly $memargs $db] + + error_check_good compare:$db [db_compare \ + $db1 $db2 $masterdir/$db $clientdir/$db] 0 + error_check_good db1_close [$db1 close] 0 + error_check_good db2_close [$db2 close] 0 + } + + # A, B, and Z should be gone on client. + error_check_good dba_gone [file exists $clientdir/a.db] 0 + error_check_good dbb_gone [file exists $clientdir/b.db] 0 + # + # Currently we cannot remove z.db on the client because + # we don't own the file namespace. So, we cannot do + # the check below. If that changes, we want the test below. + error_check_good dbz_gone [file exists $clientdir/z.db] 0 + + # Clean up. + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} + +proc rep033_rename_remove { env } { + global databases_in_memory + if { $databases_in_memory } { + set memargs { "" } + } else { + set memargs "" + } + + # Here we manipulate databases W, X, Y, and Z. + # Remove W. + error_check_good remove_w [eval $env dbremove $memargs w.db] 0 + + # Rename X to W, Y to C (an entirely new name). + error_check_good rename_x [eval $env dbrename $memargs x.db w.db] 0 + error_check_good rename_y [eval $env dbrename $memargs y.db c.db] 0 + + # Remove Z. + error_check_good remove_z [eval $env dbremove $memargs z.db] 0 +} diff --git a/test/tcl/rep034.tcl b/test/tcl/rep034.tcl new file mode 100644 index 00000000..9e6dbcbc --- /dev/null +++ b/test/tcl/rep034.tcl @@ -0,0 +1,398 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep034 +# TEST Test of STARTUPDONE notification. +# TEST +# TEST STARTUPDONE can now be recognized without the need for new "live" log +# TEST records from the master (under favorable conditions). The response to +# TEST the ALL_REQ at the end of synchronization includes an end-of-log marker +# TEST that now triggers it. However, the message containing that end marker +# TEST could get lost, so live log records still serve as a back-up mechanism. +# TEST The end marker may also be set under c2c sync, but only if the serving +# TEST client has itself achieved STARTUPDONE. +# +proc rep034 { method { niter 2 } { tnum "034" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + set args [convert_args $method $args] + set logsets [create_logsets 3] + foreach l $logsets { + puts "Rep$tnum ($method $args): Test of\ + startup synchronization detection $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client 0 logs are [lindex $l 1]" + puts "Rep$tnum: Client 1 logs are [lindex $l 2]" + rep034_sub $method $niter $tnum $l $args + } +} + +# This test manages on its own the decision of whether or not to open an +# environment with recovery. (It varies throughout the test.) Therefore there +# is no need to run it twice (as we often do with a loop in the main proc). +# +proc rep034_sub { method niter tnum logset largs } { + global anywhere + global testdir + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + global rep034_got_allreq + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set c2_logtype [lindex $logset 2] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set c2_logargs [adjust_logargs $c2_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + + # In first part of test master serves requests. + # + set anywhere 0 + + # Create a master; add some data. + # + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \ + -event $verbargs -errpfx MASTER $repmemargs $privargs \ + -home $masterdir -rep_master -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd] + puts "\tRep$tnum.a: Create master; add some data." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + + # Bring up a new client, and see that it can get STARTUPDONE with no new + # live transactions at the master. + # + puts "\tRep$tnum.b: Bring up client; check STARTUPDONE." + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $c_logargs \ + -event $verbargs -errpfx CLIENT $repmemargs $privargs \ + -home $clientdir -rep_client -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd] + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + error_check_good done_without_live_txns \ + [stat_field $clientenv rep_stat "Startup complete"] 1 + + # Test that the event got fired as well. In the rest of the test things + # get a little complex (what with having two clients), so only check the + # event part here. The important point is the various ways that + # STARTUPDONE can be computed, so testing the event firing mechanism + # just this once is enough. + # + error_check_good done_event_too [is_startup_done $clientenv] 1 + + # + # Bring up another client. Do additional new txns at master, ensure + # that STARTUPDONE is not triggered at NEWMASTER LSN. + # + puts "\tRep$tnum.c: Another client; no STARTUPDONE at NEWMASTER LSN." + set newmaster_lsn [next_expected_lsn $masterenv] + repladd 3 + # + # !!! Please note that we're giving client2 a special customized version + # of the replication transport call-back function. + # + set cl2_envcmd "berkdb_env_noerr -create $c2_txnargs $c2_logargs \ + -event $verbargs -errpfx CLIENT2 $repmemargs $privargs \ + -home $clientdir2 -rep_client -rep_transport \[list 3 rep034_send\]" + set client2env [eval $cl2_envcmd] + + set envlist "{$masterenv 1} {$clientenv 2} {$client2env 3}" + set verified false + for {set i 0} {$i < 10} {incr i} { + proc_msgs_once $envlist + set client2lsn [next_expected_lsn $client2env] + + # Get to the point where we've gone past where the master's LSN + # was at NEWMASTER time, and make sure we haven't yet gotten + # STARTUPDONE. Ten loop iterations should be plenty. + # + if {[$client2env log_compare $client2lsn $newmaster_lsn] > 0} { + if {![stat_field \ + $client2env rep_stat "Startup complete"]} { + set verified true + } + break; + } + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + } + error_check_good no_newmaster_trigger $verified true + + process_msgs $envlist + error_check_good done_during_live_txns \ + [stat_field $client2env rep_stat "Startup complete"] 1 + + # + # From here on out we use client-to-client sync. + # + set anywhere 1 + + # Here we rely on recovery at client 1. If that client is running with + # in-memory logs or in-memory databases, forgo the remainder of the test. + # + if {$c_logtype eq "in-mem" || $databases_in_memory } { + puts "\tRep$tnum.d: Skip the rest of the test for\ + in-memory logging or databases." + $masterenv close + $clientenv close + $client2env close + replclose $testdir/MSGQUEUEDIR + return + } + + # Shut down client 1. Bring it back, with recovery. Verify that it can + # get STARTUPDONE by syncing to other client, even with no new master + # txns. + # + puts "\tRep$tnum.d: Verify STARTUPDONE using c2c sync." + $clientenv close + set clientenv [eval $cl_envcmd -recover] + set envlist "{$masterenv 1} {$clientenv 2} {$client2env 3}" + + # Clear counters at client2, so that we can check "Client service + # requests" in a moment. + # + $client2env rep_stat -clear + process_msgs $envlist + error_check_good done_via_c2c \ + [stat_field $clientenv rep_stat "Startup complete"] 1 + # + # Make sure our request was served by client2. This isn't a test of c2c + # sync per se, but if this fails it indicates that we're not really + # testing what we thought we were testing. + # + error_check_bad c2c_served_by_master \ + [stat_field $client2env rep_stat "Client service requests"] 0 + + # Verify that we don't get STARTUPDONE if we are using c2c sync to + # another client, and the serving client has not itself reached + # STARTUPDONE, because that suggests that the serving client could be + # way far behind. But that we can still eventually get STARTUPDONE, as + # a fall-back, once the master starts generating new txns again. + # + # To do so, we'll need to restart both clients. Start with the client + # that will serve the request. Turn off "anywhere" process for a moment + # so that we can get this client set up without having the other one + # running. + # + # Now it's client 2 that needs recovery. Forgo the rest of the test if + # it is logging in memory. (We could get this far in mixed mode, with + # client 1 logging on disk.) + # + if {$c2_logtype eq "in-mem"} { + puts "\tRep$tnum.e: Skip rest of test for in-memory logging." + $masterenv close + $clientenv close + $client2env close + replclose $testdir/MSGQUEUEDIR + return + } + puts "\tRep$tnum.e: Check no STARTUPDONE when c2c server is behind." + $clientenv log_flush + $clientenv close + $client2env log_flush + $client2env close + + set anywhere 0 + set client2env [eval $cl2_envcmd -recover] + set envlist "{$masterenv 1} {$client2env 3}" + + # We want client2 to get partway through initialization, but once it + # sends the ALL_REQ to the master, we want to cut things off there. + # Recall that we gave client2 a special "wrapper" version of the + # replication transport call-back function: that function will set a + # flag when it sees an ALL_REQ message go by. + # + set rep034_got_allreq false + while { !$rep034_got_allreq } { + proc_msgs_once $envlist + } + + # + # To make sure we're doing a valid test, verify that we really did + # succeed in getting the serving client into the state we intended. + # + error_check_good serve_from_notstarted \ + [stat_field $client2env rep_stat "Startup complete"] 0 + + # Start up the client to be tested. Make sure it doesn't get + # STARTUPDONE (yet). Again, the checking of service request stats is + # just for test debugging, to make sure we have a valid test. + # + # To add insult to injury, not only do we not get STARTUPDONE from the + # "behind" client, we also don't even get all the log records we need + # (because we didn't allow client2's ALL_REQ to get to the master). + # And no mechanism to let us know that. The only resolution is to wait + # for gap detection to rerequest (which would then go to the master). + # So, set a small rep_request upper bound, so that it doesn't take a ton + # of new live txns to reach the trigger. + # + set anywhere 1 + $client2env rep_stat -clear + replclear 2 + set clientenv [eval $cl_envcmd -recover] + # + # Set to 400 usecs. An average ping to localhost should + # be a few 10s usecs. + # + $clientenv rep_request 400 400 + set envlist "{$masterenv 1} {$clientenv 2} {$client2env 3}" + + # Here we're expecting that the master isn't generating any new log + # records, which is normally the case since we're not generating any new + # transactions there. This is important, because otherwise the client + # could notice its log gap and request the missing records, resulting in + # STARTUPDONE before we're ready for it. When debug_rop is on, just + # scanning the data-dir during UPDATE_REQ processing (which, remember, + # now happens just to check for potential NIMDB re-materialization) + # generates log records, as we open each file we find to see if it's a + # database. So, filter out LOG messages (simulating them being "lost") + # temporarily. + # + if {[is_substr [berkdb getconfig] "debug_rop"]} { + $masterenv rep_transport {1 rep034_send_nolog} + } + while {[rep034_proc_msgs_once $masterenv $clientenv $client2env] > 0} {} + $masterenv rep_transport {1 replsend} + + error_check_good not_from_undone_c2c_client \ + [stat_field $clientenv rep_stat "Startup complete"] 0 + + error_check_bad c2c_served_by_master \ + [stat_field $client2env rep_stat "Client service requests"] 0 + + # Verify that we nevertheless *do* get STARTUPDONE after the master + # starts generating new txns again. Generate two sets of transactions, + # with an unmistakable pause between, to ensure that we trigger the + # client's rerequest timer, which we need in order to pick up the + # missing transactions. The 400 usec is a nice short time; but on + # Windows sometimes it's possible to blast through a single process_msgs + # cycle so quickly that its low-resolution timer reflects no elapsed + # time at all! + # + puts "\tRep$tnum.f: Check STARTUPDONE via fall-back to live txns." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + tclsleep 1 + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + error_check_good fallback_live_txns \ + [stat_field $clientenv rep_stat "Startup complete"] 1 + + $masterenv close + $clientenv close + $client2env close + replclose $testdir/MSGQUEUEDIR + set anywhere 0 +} + +# Do a round of message processing, but juggle things such that client2 can +# never receive a message from the master. +# +# Assumes the usual "{$masterenv 1} {$clientenv 2} {$client2env 3}" structure. +# +proc rep034_proc_msgs_once { masterenv clientenv client2env } { + set nproced [proc_msgs_once "{$masterenv 1}" NONE err] + error_check_good pmonce_1 $err 0 + replclear 3 + + incr nproced [proc_msgs_once "{$clientenv 2} {$client2env 3}" NONE err] + error_check_good pmonce_2 $err 0 + + return $nproced +} + +# Wrapper for replsend. Mostly just a pass-through to the real replsend, except +# we watch for an ALL_REQ, and just set a flag when we see it. +# +proc rep034_send { control rec fromid toid flags lsn } { + global rep034_got_allreq + + if {[berkdb msgtype $control] eq "all_req"} { + set rep034_got_allreq true + } + return [replsend $control $rec $fromid $toid $flags $lsn] +} + +# Another slightly different wrapper for replsend. This one simulates losing +# any broadcast LOG messages from the master. +# +proc rep034_send_nolog { control rec fromid toid flags lsn } { + if {[berkdb msgtype $control] eq "log" && + $fromid == 1 && $toid == -1} { + set result 0 + } else { + set result [replsend $control $rec $fromid $toid $flags $lsn] + } + return $result +} diff --git a/test/tcl/rep035.tcl b/test/tcl/rep035.tcl new file mode 100644 index 00000000..3ab482f9 --- /dev/null +++ b/test/tcl/rep035.tcl @@ -0,0 +1,290 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep035 +# TEST Test sync-up recovery in replication. +# TEST +# TEST We need to fork off 3 child tclsh processes to operate +# TEST on Site 3's (client always) home directory: +# TEST Process 1 continually calls lock_detect. +# TEST Process 2 continually calls txn_checkpoint. +# TEST Process 3 continually calls memp_trickle. +# TEST Process 4 continually calls log_archive. +# TEST Sites 1 and 2 will continually swap being master +# TEST (forcing site 3 to continually run sync-up recovery) +# TEST New master performs 1 operation, replicates and downgrades. + +proc rep035 { method { niter 100 } { tnum "035" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set saved_args $args + set logsets [create_logsets 3] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + foreach l $logsets { + set envargs "" + set args $saved_args + puts "Rep$tnum: Test sync-up recovery ($method) $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client 0 logs are [lindex $l 1]" + puts "Rep$tnum: Client 1 logs are [lindex $l 2]" + rep035_sub $method $niter $tnum $envargs $l $args + } +} + +proc rep035_sub { method niter tnum envargs logset largs } { + source ./include.tcl + global testdir + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir1 $testdir/CLIENTDIR1 + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir1 + file mkdir $clientdir2 + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set c2_logtype [lindex $logset 2] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set c2_logargs [adjust_logargs $c2_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + + # Open a master. + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create $verbargs $repmemargs \ + -log_max 1000000 $envargs -home $masterdir $m_logargs \ + -errpfx MASTER -errfile /dev/stderr $m_txnargs -rep_master \ + -rep_transport \[list 1 replsend\]" + set env1 [eval $env_cmd(M)] + + # Open two clients + repladd 2 + set env_cmd(C1) "berkdb_env_noerr -create $verbargs $repmemargs \ + -log_max 1000000 $envargs -home $clientdir1 $c_logargs \ + -errfile /dev/stderr -errpfx CLIENT $c_txnargs -rep_client \ + -rep_transport \[list 2 replsend\]" + set env2 [eval $env_cmd(C1)] + + # Second client needs lock_detect flag. + repladd 3 + set env_cmd(C2) "berkdb_env_noerr -create $verbargs $repmemargs \ + -log_max 1000000 $envargs -home $clientdir2 $c2_logargs \ + -errpfx CLIENT2 -errfile /dev/stderr $c2_txnargs -rep_client \ + -lock_detect default -rep_transport \[list 3 replsend\]" + set env3 [eval $env_cmd(C2)] + error_check_good client_env [is_valid_env $env3] TRUE + + # Bring the client online by processing the startup messages. + set envlist "{$env1 1} {$env2 2} {$env3 3}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $env1 test force noarchive_timeout + + # We need to fork off 3 child tclsh processes to operate + # on Site 3's (client always) home directory: + # Process 1 continually calls lock_detect (DB_LOCK_DEFAULT) + # Process 2 continually calls txn_checkpoint (DB_FORCE) + # Process 3 continually calls memp_trickle (large % like 90) + # Process 4 continually calls log_archive. + + puts "\tRep$tnum.a: Fork child process running lock_detect on client2." + set pid1 [exec $tclsh_path $test_path/wrap.tcl \ + rep035script.tcl $testdir/lock_detect.log \ + $clientdir2 detect &] + + puts "\tRep$tnum.b:\ + Fork child process running txn_checkpoint on client2." + set pid2 [exec $tclsh_path $test_path/wrap.tcl \ + rep035script.tcl $testdir/txn_checkpoint.log \ + $clientdir2 checkpoint &] + + puts "\tRep$tnum.c: Fork child process running memp_trickle on client2." + set pid3 [exec $tclsh_path $test_path/wrap.tcl \ + rep035script.tcl $testdir/memp_trickle.log \ + $clientdir2 trickle &] + + puts "\tRep$tnum.d: Fork child process running log_archive on client2." + set pid4 [exec $tclsh_path $test_path/wrap.tcl \ + rep035script.tcl $testdir/log_archive.log \ + $clientdir2 archive &] + + # Pause a bit to let the children get going. + tclsleep 5 + + set logfilelist [list lock_detect.log \ + txn_checkpoint.log memp_trickle.log log_archive.log] + set pidlist [list $pid1 $pid2 $pid3 $pid4] + + # + # Sites 1 and 2 will continually swap being master + # forcing site 3 to continually run sync-up recovery. + # New master performs 1 operation, replicates and downgrades. + # Site 3 will always stay a client. + # + # Set up all the master/client data we're going to need + # to keep track of and swap. Set up the handles for rep_test. + # + + set masterenv $env1 + set mid 1 + set clientenv $env2 + set cid 2 + + # Set up databases as in-memory or on-disk as specified. + if { $databases_in_memory } { + set testfile { "" "test$tnum.db" } + } else { + set testfile "test$tnum.db" + } + + set args [convert_args $method] + set omethod [convert_method $method] + set mdb_cmd "{berkdb_open_noerr} -env $masterenv -auto_commit \ + -create $omethod $args -mode 0644 $testfile" + set cdb_cmd "{berkdb_open_noerr} -env $clientenv -auto_commit \ + $omethod $args -mode 0644 $testfile" + + set masterdb [eval $mdb_cmd] + error_check_good dbopen [is_valid_db $masterdb] TRUE + process_msgs $envlist + + set clientdb [eval $cdb_cmd] + error_check_good dbopen [is_valid_db $clientdb] TRUE + + tclsleep 2 + puts "\tRep$tnum.e: Swap master and client $niter times." + for { set i 0 } { $i < $niter } { incr i } { + + # Do a few ops + eval rep_test $method $masterenv $masterdb 2 $i $i 0 $largs + set envlist "{$masterenv $mid} {$clientenv $cid} {$env3 3}" + process_msgs $envlist + + # Do one op on master and process messages and drop + # to clientenv to force sync-up recovery next time. + eval rep_test $method $masterenv $masterdb 1 $i $i 0 $largs + set envlist "{$masterenv $mid} {$env3 3}" + replclear $cid + process_msgs $envlist + + # Swap all the info we need. + set tmp $masterenv + set masterenv $clientenv + set clientenv $tmp + + set tmp $masterdb + set masterdb $clientdb + set clientdb $tmp + + set tmp $mid + set mid $cid + set cid $tmp + + set tmp $mdb_cmd + set mdb_cmd $cdb_cmd + set cdb_cmd $tmp + + puts "\tRep$tnum.e.$i: Swap: master $mid, client $cid" + error_check_good downgrade [$clientenv rep_start -client] 0 + error_check_good upgrade [$masterenv rep_start -master] 0 + set envlist "{$masterenv $mid} {$clientenv $cid} {$env3 3}" + process_msgs $envlist + + # Close old and reopen since we will get HANDLE_DEAD + # otherwise because we dropped messages to the new master. + error_check_good masterdb [$masterdb close] 0 + error_check_good clientdb [$clientdb close] 0 + + set masterdb [eval $mdb_cmd] + error_check_good dbopen [is_valid_db $masterdb] TRUE + + set clientdb [eval $cdb_cmd] + error_check_good dbopen [is_valid_db $clientdb] TRUE + process_msgs $envlist + } + + # Communicate with child processes by creating a marker file. + set markerenv [berkdb_env_noerr -create -home $testdir -txn] + error_check_good markerenv_open [is_valid_env $markerenv] TRUE + set marker [eval "berkdb_open_noerr \ + -create -btree -auto_commit -env $markerenv marker.db"] + error_check_good marker_close [$marker close] 0 + + # Wait for child processes; they should shut down quickly. + watch_procs $pidlist 1 + + # There should not be any messages in the log files. + # If there are, print them out. + foreach file $logfilelist { + puts "\tRep$tnum.f: Checking $file for errors." + set fd [open $testdir/$file r] + while { [gets $fd str] != -1 } { + error "FAIL: found message $str" + } + close $fd + } + + error_check_good masterdb [$masterdb close] 0 + error_check_good clientdb [$clientdb close] 0 + error_check_good env1_close [$env1 close] 0 + error_check_good env2_close [$env2 close] 0 + error_check_good env3_close [$env3 close] 0 + error_check_good markerenv_close [$markerenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep035script.tcl b/test/tcl/rep035script.tcl new file mode 100644 index 00000000..e0180aba --- /dev/null +++ b/test/tcl/rep035script.tcl @@ -0,0 +1,81 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Rep035 script - continually calls lock_detect, txn_checkpoint, +# or mpool_trickle. +# +# Usage: repscript clientdir apicall +# clientdir: client env directory +# apicall: detect, checkpoint, or trickle. +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl +source $test_path/reputils.tcl + +set usage "repscript clientdir apicall" + +# Verify usage +if { $argc != 2 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set clientdir [ lindex $argv 0 ] +set apicall [ lindex $argv 1 ] + +# Join the queue env. We assume the rep test convention of +# placing the messages in $testdir/MSGQUEUEDIR. +set queueenv [eval berkdb_env -home $testdir/MSGQUEUEDIR] +error_check_good script_qenv_open [is_valid_env $queueenv] TRUE + +# Join the client env. +repladd 3 +set envid 3 +set cl2_cmd "berkdb_env_noerr -home $clientdir \ + -errfile /dev/stderr -errpfx CLIENT.$apicall \ + -txn -rep_client -rep_transport \[list $envid replsend\]" +# set cl2_cmd "berkdb_env_noerr -home $clientdir \ +# -errfile /dev/stderr -errpfx CLIENT.$apicall \ +# -verbose {rep on} \ +# -txn -rep_client -rep_transport \[list $envid replsend\]" +set clientenv [eval $cl2_cmd] +error_check_good script_c2env_open [is_valid_env $clientenv] TRUE + +# Run chosen call continuously until the parent script creates +# a marker file to indicate completion. +switch -exact -- $apicall { + archive { + while { [file exists $testdir/marker.db] == 0 } { + $clientenv log_archive -arch_remove +# tclsleep 1 + } + } + detect { + while { [file exists $testdir/marker.db] == 0 } { + $clientenv lock_detect default +# tclsleep 1 + } + } + checkpoint { + while { [file exists $testdir/marker.db] == 0 } { + $clientenv txn_checkpoint -force + tclsleep 1 + } + } + trickle { + while { [file exists $testdir/marker.db] == 0 } { + $clientenv mpool_trickle 90 +# tclsleep 1 + } + } + default { + puts "FAIL: unrecognized API call $apicall + } +} + +error_check_good clientenv_close [$clientenv close] 0 + diff --git a/test/tcl/rep036.tcl b/test/tcl/rep036.tcl new file mode 100644 index 00000000..0d532141 --- /dev/null +++ b/test/tcl/rep036.tcl @@ -0,0 +1,204 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep036 +# TEST Multiple master processes writing to the database. +# TEST One process handles all message processing. + +proc rep036 { method { niter 200 } { tnum "036" } args } { + + source ./include.tcl + global repfiles_in_memory + + # Valid for btree only. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + set saved_args $args + set logsets [create_logsets 3] + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + foreach l $logsets { + set envargs "" + set args $saved_args + puts "Rep$tnum: Test sync-up recovery ($method) $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client 0 logs are [lindex $l 1]" + puts "Rep$tnum: Client 1 logs are [lindex $l 2]" + rep036_sub $method $niter $tnum $envargs $l $args + } +} + +proc rep036_sub { method niter tnum envargs logset args } { + source ./include.tcl + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer. + # We always run this test with -txn, so don't adjust txnargs. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + + # Open a master. + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create $verbargs $repmemargs \ + -log_max 1000000 $envargs -home $masterdir $m_logargs \ + -errpfx MASTER -errfile /dev/stderr -txn -rep_master \ + -rep_transport \[list 1 replsend\]" + set env1 [eval $env_cmd(M)] + + # Open a client + repladd 2 + set env_cmd(C) "berkdb_env_noerr -create $verbargs $repmemargs \ + -log_max 1000000 $envargs -home $clientdir $c_logargs \ + -errfile /dev/stderr -errpfx CLIENT -txn -rep_client \ + -rep_transport \[list 2 replsend\]" + set env2 [eval $env_cmd(C)] + + # Bring the client online by processing the startup messages. + set envlist "{$env1 1} {$env2 2}" + process_msgs $envlist + +# # Start up deadlock detector. +# # Commented out, as are two more sections below - see [#15049]. +# set dpid [eval {exec $util_path/db_deadlock} \ +# -a o -v -t 2.0 -h $masterdir >& $testdir/dd.parent.out &] + + # Set up master database. + set testfile "rep$tnum.db" + set omethod [convert_method $method] + set mdb [eval {berkdb_open_noerr} -env $env1 -auto_commit \ + -create -mode 0644 $omethod $testfile] + error_check_good dbopen [is_valid_db $mdb] TRUE + + # Put a record in the master database. + set key MAIN_KEY + set string MAIN_STRING + set t [$env1 txn] + error_check_good txn [is_valid_txn $t $env1] TRUE + set txn "-txn $t" + + set ret [eval \ + {$mdb put} $txn {$key [chop_data $method $string]}] + error_check_good mdb_put $ret 0 + error_check_good txn_commit [$t commit] 0 + + # Fork two writers that write to the master. + set pidlist {} + foreach writer { 1 2 } { + puts "\tRep$tnum.a: Fork child process WRITER$writer." + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep036script.tcl $testdir/rep036script.log.$writer \ + $masterdir $writer $niter btree &] + lappend pidlist $pid + } + + # Run the main loop until the writers signal completion. + set i 0 + while { [file exists $testdir/1.db] == 0 && \ + [file exists $testdir/2.db] == 0 } { + set string MAIN_STRING.$i + + set t [$env1 txn] + error_check_good txn [is_valid_txn $t $env1] TRUE + set txn "-txn $t" + set ret [eval \ + {$mdb put} $txn {$key [chop_data $method $string]}] + +# # Writing to this database can deadlock. If we do, let the +# # deadlock detector break the lock, wait a second, and try again. +# while { [catch {eval {$mdb put}\ +# $txn {$key [chop_data $method $string]}} ret] } { +# # Make sure the failure is a deadlock. +# error_check_good deadlock [is_substr $ret DB_LOCK_DEADLOCK] 1 +# tclsleep 1 +# } + + + error_check_good mdb_put $ret 0 + error_check_good txn_commit [$t commit] 0 + + if { [expr $i % 10] == 0 } { + puts "\tRep036.c: Wrote MAIN record $i" + } + incr i + + # Process messages. + process_msgs $envlist + + # Wait a while, then do it all again. + tclsleep 1 + } + + + # Confirm that the writers are done and process the messages + # once more to be sure the client is caught up. + watch_procs $pidlist 1 + process_msgs $envlist + +# # We are done with the deadlock detector. +# error_check_good kill_deadlock_detector [tclkill $dpid] "" + + puts "\tRep$tnum.c: Verify logs and databases" + # Check that master and client logs and dbs are identical. + # Logs first ... + set stat [catch {eval exec $util_path/db_printlog \ + -h $masterdir > $masterdir/prlog} result] + error_check_good stat_mprlog $stat 0 + set stat [catch {eval exec $util_path/db_printlog \ + -h $clientdir > $clientdir/prlog} result] + error_check_good mdb [$mdb close] 0 + error_check_good stat_cprlog $stat 0 +# error_check_good log_cmp \ +# [filecmp $masterdir/prlog $clientdir/prlog] 0 + + # ... now the databases. + set db1 [eval {berkdb_open_noerr -env $env1 -rdonly $testfile}] + set db2 [eval {berkdb_open_noerr -env $env2 -rdonly $testfile}] + + error_check_good comparedbs [db_compare \ + $db1 $db2 $masterdir/$testfile $clientdir/$testfile] 0 + error_check_good db1_close [$db1 close] 0 + error_check_good db2_close [$db2 close] 0 + + error_check_good env1_close [$env1 close] 0 + error_check_good env2_close [$env2 close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep036script.tcl b/test/tcl/rep036script.tcl new file mode 100644 index 00000000..d11873d7 --- /dev/null +++ b/test/tcl/rep036script.tcl @@ -0,0 +1,125 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Rep036 script - create additional writers in master env. +# +# Usage: masterdir writerid +# masterdir: Directory of replication master +# writerid: i.d. number for writer +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl +source $test_path/reputils.tcl + +global rand_init +set usage "repscript masterdir writerid nentries method" + +# Verify usage +if { $argc != 4 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set masterdir [ lindex $argv 0 ] +set writerid [ lindex $argv 1 ] +set nentries [ lindex $argv 2 ] +set method [ lindex $argv 3 ] + +# Join the queue env. We assume the rep test convention of +# placing the messages in $testdir/MSGQUEUEDIR. +set queueenv [eval berkdb_env -home $testdir/MSGQUEUEDIR] +error_check_good script_qenv_open [is_valid_env $queueenv] TRUE + +# We need to set up our own machid. +repladd 1 +repladd 2 + +# Start up deadlock detector. +# Commented out - see #15049. +#set dpid [eval {exec $util_path/db_deadlock} \ +# -a o -v -t 2.0 -h $masterdir >& $testdir/dd.writer.$writerid.out &] + +# Join the master env. +set envid 1 +set env_cmd "berkdb_env_noerr -home $masterdir \ + -errfile /dev/stderr -errpfx WRITER.$writerid \ + -txn -rep_master -rep_transport \[list $envid replsend\]" +# set env_cmd "berkdb_env_noerr -home $masterdir \ +# -errfile /dev/stderr -errpfx WRITER.$writerid \ +# -verbose {rep on} \ +# -txn -rep_master -rep_transport \[list $envid replsend\]" +set masterenv [eval $env_cmd] +error_check_good script_env_open [is_valid_env $masterenv] TRUE + +# Open database. +set testfile "rep036.db" +set omethod [convert_method $method] +set mdb [eval {berkdb_open_noerr} -env $masterenv -auto_commit \ + -create $omethod $testfile] +error_check_good dbopen [is_valid_db $mdb] TRUE + +# Write records to the database. +set did [open $dict] +set count 0 +set dictsize 10000 +berkdb srand $rand_init +while { $count < $nentries } { + # + # If nentries exceeds the dictionary size, close + # and reopen to start from the beginning again. + if { [expr [expr $count + 1] % $dictsize] == 0 } { + close $did + set did [open $dict] + } + + gets $did str + set key WRITER.$writerid.$str + set str [reverse $str] + + set t [$masterenv txn] + error_check_good txn [is_valid_txn $t $masterenv] TRUE + set txn "-txn $t" + +# If using deadlock detection, uncomment this and comment the +# following put statement. +# # Writing to this database can deadlock. If we do, let the +# # deadlock detector break the lock, wait a second, and try again. +# while { [catch {eval {$mdb put}\ +# $txn {$key [chop_data $method $str]}} ret] } { +# error_check_good deadlock [is_substr $ret DB_LOCK_DEADLOCK] 1 +# tclsleep 1 +# } + + set ret [eval \ + {$mdb put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + error_check_good txn [$t commit] 0 + + if { [expr $count % 100] == 1 } { + puts "Wrote WRITER.$writerid record $count" + set sleep [berkdb random_int 0 10] + puts "Writer.$writerid sleeping $sleep seconds" + tclsleep $sleep + } + incr count +} +close $did + +# Clean up. +# Uncomment following line if using deadlock detector. +#error_check_good kill_deadlock_detector [tclkill $dpid] "" +error_check_good mdb_close [$mdb close] 0 +error_check_good masterenv_close [$masterenv close] 0 +replclose $testdir/MSGQUEUEDIR + +# Communicate with parent by creating a marker file. +set markerenv [berkdb_env -create -home $testdir -txn] +error_check_good markerenv_open [is_valid_env $markerenv] TRUE +set marker [eval "berkdb_open \ + -create -btree -auto_commit -env $markerenv $writerid.db"] +error_check_good marker_close [$marker close] 0 +error_check_good markerenv_close [$markerenv close] 0 diff --git a/test/tcl/rep037.tcl b/test/tcl/rep037.tcl new file mode 100644 index 00000000..fa5875a9 --- /dev/null +++ b/test/tcl/rep037.tcl @@ -0,0 +1,283 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep037 +# TEST Test of internal initialization and page throttling. +# TEST +# TEST One master, one client, force page throttling. +# TEST Generate several log files. +# TEST Remove old master log files. +# TEST Delete client files and restart client. +# TEST Put one more record to the master. +# TEST Verify page throttling occurred. +# +proc rep037 { method { niter 1500 } { tnum "037" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set saved_args $args + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery, + # and with various configurations. + set configopts { dup bulk clean noclean } + foreach r $test_recopts { + foreach c $configopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + if { $c == "dup" && $databases_in_memory } { + puts "Skipping rep$tnum for dup\ + with in-memory databases." + continue + } + set args $saved_args + puts "Rep$tnum ($method $c $r $args):\ + Test of internal init with page\ + throttling $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep037_sub $method $niter $tnum $l $r $c $args + } + } + } +} + +proc rep037_sub { method niter tnum logset recargs config largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # + # If using bulk processing, just use clean. We could add + # another control loop to do bulk+clean and then bulk+noclean + # but that seems like overkill. + # + set bulk 0 + set clean $config + if { $config == "bulk" } { + set bulk 1 + set clean "clean" + } + # + # If using dups do not clean the env. We want to keep the + # database around to a dbp and cursor to open. + # + set dup 0 + if { $config == "dup" } { + set dup 1 + set clean "noclean" + } + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $repmemargs \ + $m_logargs -log_max $log_max -errpfx MASTER $verbargs \ + $privargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + $masterenv rep_limit 0 [expr 32 * 1024] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $repmemargs \ + $c_logargs -log_max $log_max -errpfx CLIENT $verbargs \ + $privargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + + if { $bulk } { + error_check_good set_bulk [$masterenv rep_config {bulk on}] 0 + } + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $masterenv test force noarchive_timeout + + if { $dup } { + # + # Create a known db for dup cursor testing. + # + puts "\tRep$tnum.a0: Creating dup db." + if { $databases_in_memory == 1 } { + set dupfile { "" "dup.db" } + } else { + set dupfile "dup.db" + } + set dargs [convert_args $method $largs] + set omethod [convert_method $method] + set dupdb [eval {berkdb_open_noerr} -env $masterenv \ + -auto_commit -create -mode 0644 $omethod $dargs $dupfile] + $dupdb close + } + # Run rep_test in the master (and update client). + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + # Create a gap requiring internal initialization. + set flags "" + set cid 2 + set dbhandle NULL + set start [push_master_ahead $method $masterenv $masterdir $m_logtype \ + $clientenv $cid $dbhandle $start $niter $flags $largs] + + puts "\tRep$tnum.e: Reopen client ($clean)." + if { $clean == "clean" } { + env_cleanup $clientdir + } + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + + # + # If testing duplicate cursors, open and close a dup cursor now. All + # we need to do is create a dup cursor and then close both + # cursors before internal init begins. That will make sure + # that the lockout is working correctly. + # + if { $dup } { + puts "\tRep$tnum.e.1: Open/close dup cursor." + set dupdb [eval {berkdb_open_noerr} -env $clientenv $dupfile] + set dbc [$dupdb cursor] + set dbc2 [$dbc dup] + $dbc2 close + $dbc close + $dupdb close + } + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist 0 NONE err + if { $clean == "noclean" } { + puts "\tRep$tnum.e.1: Trigger log request" + # + # When we don't clean, starting the client doesn't + # trigger any events. We need to generate some log + # records so that the client requests the missing + # logs and that will trigger it. + # + set entries 10 + eval rep_test \ + $method $masterenv NULL $entries $start $start 0 $largs + incr start $entries + process_msgs $envlist 0 NONE err + } + + puts "\tRep$tnum.f: Verify logs and databases" + set verify_subset \ + [expr { $m_logtype == "in-memory" || $c_logtype == "in-memory" }] + rep_verify $masterdir $masterenv\ + $clientdir $clientenv $verify_subset 1 1 + + puts "\tRep$tnum.g: Verify throttling." + if { $niter > 1000 } { + set nthrottles \ + [stat_field $masterenv rep_stat "Transmission limited"] + error_check_bad nthrottles $nthrottles -1 + error_check_bad nthrottles $nthrottles 0 + } + + # Make sure log files are on-disk or not as expected. + check_log_location $masterenv + check_log_location $clientenv + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep038.tcl b/test/tcl/rep038.tcl new file mode 100644 index 00000000..cb621dd7 --- /dev/null +++ b/test/tcl/rep038.tcl @@ -0,0 +1,268 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep038 +# TEST Test of internal initialization and ongoing master updates. +# TEST +# TEST One master, one client. +# TEST Generate several log files. +# TEST Remove old master log files. +# TEST Delete client files and restart client. +# TEST Put more records on master while initialization is in progress. +# +proc rep038 { method { niter 200 } { tnum "038" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery, + # and with various options, such as in-memory databases, + # forcing an archive during the middle of init, and normal. + # Skip recovery with in-memory logging - it doesn't make sense. + set testopts { normal archive } + foreach r $test_recopts { + foreach t $testopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $t $r $args): Test of\ + internal init with new records $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep038_sub $method $niter $tnum $l $r $t $args + } + } + } +} + +proc rep038_sub { method niter tnum logset recargs testopt largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $repmemargs \ + $m_logargs -log_max $log_max -errpfx MASTER $verbargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + $masterenv rep_limit 0 0 + + # Run rep_test in the master only. + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + if { $databases_in_memory } { + set testfile { "" "test.db" } + } else { + set testfile "test.db" + } + set omethod [convert_method $method] + set dbargs [convert_args $method $largs] + set mdb [eval {berkdb_open} -env $masterenv -auto_commit\ + -create -mode 0644 $omethod $dbargs $testfile ] + error_check_good reptest_db [is_valid_db $mdb] TRUE + + set stop 0 + while { $stop == 0 } { + # Run rep_test in the master beyond the first log file. + eval rep_test\ + $method $masterenv $mdb $niter $start $start 0 $largs + incr start $niter + + puts "\tRep$tnum.a.1: Run db_archive on master." + if { $m_logtype == "on-disk" } { + $masterenv log_flush + eval exec $util_path/db_archive -d -h $masterdir + } + # + # Make sure we have moved beyond the first log file. + # + set first_master_log [get_logfile $masterenv first] + if { $first_master_log > 1 } { + set stop 1 + } + + } + + puts "\tRep$tnum.b: Open client." + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $repmemargs \ + $c_logargs -log_max $log_max -errpfx CLIENT $verbargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + $clientenv rep_limit 0 0 + set envlist "{$masterenv 1} {$clientenv 2}" + # + # We want to simulate a master continually getting new + # records while an update is going on. Simulate that + # for several iterations and then let the messages finish + # all their processing. + # + set loop 15 + set i 0 + set entries 100 + set archived 0 + set start $niter + set init 0 + while { $i < $loop } { + set nproced 0 + set start [expr $start + $entries] + eval rep_test \ + $method $masterenv $mdb $entries $start $start 0 $largs + incr start $entries + incr nproced [proc_msgs_once $envlist NONE err] + error_check_bad nproced $nproced 0 + # + # If we are testing archiving, we need to make sure that + # the first_lsn for internal init (the last log file we + # have when we first enter init) is no longer available. + # So, the first time through we record init_log, and then + # on subsequent iterations we'll wait for the last log + # to move further. Force a checkpoint and archive. + # + if { $testopt == "archive" && $archived == 0 } { + set clstat [exec $util_path/db_stat \ + -N -r -R A -h $clientdir] + if { $init == 0 && \ + [is_substr $clstat "SYNC_PAGE"] } { + set init_log [get_logfile $masterenv last] + set init 1 + } + if { $init == 0 && \ + [is_substr $clstat "SYNC_LOG"] } { + set init_log [get_logfile $masterenv last] + set init 1 + } + set last_master_log [get_logfile $masterenv last] + set first_master_log [get_logfile $masterenv first] + if { $init && $m_logtype == "on-disk" && \ + $last_master_log > $init_log } { + $masterenv txn_checkpoint -force + $masterenv test force noarchive_timeout + set res [eval exec $util_path/db_archive \ + -d -h $masterdir] + set newlog [get_logfile $masterenv first] + set archived 1 + error_check_good logs \ + [expr $newlog > $init_log] 1 + } elseif { $init && $m_logtype == "in-memory" && \ + $first_master_log > $init_log } { + $masterenv txn_checkpoint -force + $masterenv test force noarchive_timeout + set archived 1 + } + } + incr i + } + set cdb [eval {berkdb_open_noerr} -env $clientenv -auto_commit\ + -create -mode 0644 $omethod $dbargs $testfile] + error_check_good reptest_db [is_valid_db $cdb] TRUE + process_msgs $envlist + + puts "\tRep$tnum.c: Verify logs and databases" + if { $databases_in_memory } { + rep_verify_inmem $masterenv $clientenv $mdb $cdb + } else { + rep_verify $masterdir $masterenv $clientdir $clientenv 1 + } + + # Add records to the master and update client. + puts "\tRep$tnum.d: Add more records and check again." + eval rep_test $method $masterenv $mdb $entries $start $start 0 $largs + incr start $entries + process_msgs $envlist 0 NONE err + if { $databases_in_memory } { + rep_verify_inmem $masterenv $clientenv $mdb $cdb + } else { + rep_verify $masterdir $masterenv $clientdir $clientenv 1 + } + + # Make sure log files are on-disk (or not) as expected. + check_log_location $masterenv + check_log_location $clientenv + + error_check_good mdb_close [$mdb close] 0 + error_check_good cdb_close [$cdb close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep039.tcl b/test/tcl/rep039.tcl new file mode 100644 index 00000000..3ece0597 --- /dev/null +++ b/test/tcl/rep039.tcl @@ -0,0 +1,463 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep039 +# TEST Test of interrupted internal initialization. The +# TEST interruption is due to a changed master, or the client crashing, +# TEST or both. +# TEST +# TEST One master, two clients. +# TEST Generate several log files. Remove old master log files. +# TEST Restart client, optionally having "cleaned" client env dir. Either +# TEST way, this has the effect of forcing an internal init. +# TEST Interrupt the internal init. +# TEST Vary the number of times we process messages to make sure +# TEST the interruption occurs at varying stages of the first internal +# TEST initialization. +# TEST +# TEST Run for btree and queue only because of the number of permutations. +# TEST +proc rep039 { method { niter 200 } { tnum "039" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Run for btree and queue methods only. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_btree $method] == 1 || \ + [is_queue $method] == 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_btree $method] == 0 && [is_queue $method] == 0 } { + puts "Rep$tnum: skipping for non-btree, non-queue method." + return + } + + # Skip for mixed-mode logging -- this test has a very large + # set of iterations already. + global mixed_mode_logging + if { $mixed_mode_logging > 0 } { + puts "Rep$tnum: Skipping for mixed mode logging." + return + } + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set args [convert_args $method $args] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. + set cleanopts { noclean clean } + set archopts { archive noarchive } + set nummsgs 4 + set announce {puts "Rep$tnum ($method $r $clean $a $crash $l $args):\ + Test of internal init. $i message iters. \ + Test $cnt of $maxtest tests $with recovery $msg $msg2."} + foreach r $test_recopts { + if { $r == "-recover" && !$is_windows_test && + !$is_hp_test && !$repfiles_in_memory } { + set crashopts { master_change client_crash both } + } else { + set crashopts { master_change } + } + # Only one of the three sites in the replication group needs to + # be tested with in-memory logs: the "client under test". + # + if { $r == "-recover" } { + set cl_logopts { on-disk } + set with "with" + } else { + set cl_logopts { on-disk in-memory } + set with "without" + } + set maxtest [expr [llength $crashopts] * \ + [llength $cleanopts] * \ + [llength $archopts] * \ + [llength $cl_logopts] * \ + [expr $nummsgs]] + set cnt 1 + foreach crash $crashopts { + foreach clean $cleanopts { + foreach a $archopts { + foreach l $cl_logopts { + for { set i 1 } \ + { $i <= $nummsgs } \ + { incr i } { + eval $announce + rep039_sub $method \ + $niter $tnum $r \ + $clean $a $crash \ + $l $i $args + incr cnt + } + } + } + } + } + } +} + +proc rep039_sub \ + { method niter tnum recargs clean archive crash cl_logopt pmsgs largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set master_change false + set client_crash false + if { $crash == "master_change" } { + set master_change true + } elseif { $crash == "client_crash" } { + set client_crash true + } elseif { $crash == "both" } { + set master_change true + set client_crash true + } else { + error "FAIL:[timestamp] '$crash' is an unrecognized crash type" + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + # This test has three replication sites: a master, a client whose + # behavior is under test, and another client. We'll call them + # "A", "B" and "C". At one point during the test, we may (depending on + # the setting of $master_change) switch roles between the master and the + # other client. + # + # The initial site/role assignments are as follows: + # + # A = master + # B = client under test + # C = other client + # + # In the case where we do switch roles, the roles become: + # + # A = other client + # B = client under test (no change here) + # C = master + # + # Although the real names are A, B, and C, we'll use mnemonic names + # whenever possible. In particular, this means that we'll have to + # re-jigger the mnemonic names after the role switch. + + file mkdir [set dirs(A) $testdir/SITE_A] + file mkdir [set dirs(B) $testdir/SITE_B] + file mkdir [set dirs(C) $testdir/SITE_C] + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_buf [expr $pagesize * 2] + set log_max [expr $log_buf * 4] + + # Set up the three sites: A, B, and C will correspond to EID's + # 1, 2, and 3 in the obvious way. As we start out, site A is always the + # master. + # + repladd 1 + set env_A_cmd "berkdb_env_noerr -create -txn nosync \ + $verbargs $repmemargs \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_A \ + -home $dirs(A) -rep_transport \[list 1 replsend\]" + set envs(A) [eval $env_A_cmd $recargs -rep_master] + + # Open a client + repladd 2 + set txn_arg [adjust_txnargs $cl_logopt] + set log_arg [adjust_logargs $cl_logopt] + if { $cl_logopt == "on-disk" } { + # Override in this case, because we want to specify log_buffer. + set log_arg "-log_buffer $log_buf" + } + set env_B_cmd "berkdb_env_noerr -create $txn_arg \ + $verbargs $repmemargs \ + $log_arg -log_max $log_max -errpfx SITE_B \ + -home $dirs(B) -rep_transport \[list 2 replsend\]" + set envs(B) [eval $env_B_cmd $recargs -rep_client] + + # Open 2nd client + repladd 3 + set env_C_cmd "berkdb_env_noerr -create -txn nosync \ + $verbargs $repmemargs \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_C \ + -home $dirs(C) -rep_transport \[list 3 replsend\]" + set envs(C) [eval $env_C_cmd $recargs -rep_client] + + # Turn off throttling for this test. + foreach site [array names envs] { + $envs($site) rep_limit 0 0 + } + + # Bring the clients online by processing the startup messages. + set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" + process_msgs $envlist + + # Set up the (indirect) mnemonic role names for the first part of the + # test. + set master A + set test_client B + set other C + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $envs($master) test force noarchive_timeout + + # Run rep_test in the master (and update client). + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + eval rep_test $method $envs($master) NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + # Create a gap requiring internal initialization. + set flags "" + set cid 2 + set dbhandle NULL + set m_logtype "on-disk" + set start [push_master_ahead $method $envs($master) $dirs($master) \ + $m_logtype $envs($test_client) $cid $dbhandle $start $niter $flags $largs] + + set envlist "{$envs($master) 1} {$envs($other) 3}" + process_msgs $envlist + + if { $archive == "archive" } { + puts "\tRep$tnum.d: Run db_archive on other client." + $envs($other) log_flush + set res [eval exec $util_path/db_archive -l -h $dirs($other)] + error_check_bad \ + log.1.present [lsearch -exact $res log.0000000001] -1 + set res [eval exec $util_path/db_archive -d -h $dirs($other)] + set res [eval exec $util_path/db_archive -l -h $dirs($other)] + error_check_good \ + log.1.gone [lsearch -exact $res log.0000000001] -1 + } else { + puts "\tRep$tnum.d: Skipping db_archive on other client." + } + + puts "\tRep$tnum.e: Reopen test client ($clean)." + if { $clean == "clean" } { + env_cleanup $dirs($test_client) + } + + # (The test client is always site B, EID 2.) + # + set envs(B) [eval $env_B_cmd $recargs -rep_client] + error_check_good client_env [is_valid_env $envs(B)] TRUE + $envs(B) rep_limit 0 0 + + # Hold an open database handle while doing internal init, to make sure + # no back lock interactions are happening. But only do so some of the + # time, and of course only if it's reasonable to expect the database to + # exist at this point. (It won't, if we're using in-memory databases + # and we've just started the client with recovery, since recovery blows + # away the mpool.) Set up database as in-memory or on-disk first. + # + if { $databases_in_memory } { + set dbname { "" "test.db" } + set have_db [expr {$recargs != "-recover"}] + } else { + set dbname "test.db" + set have_db true + } + + if {$clean == "noclean" && $have_db && [berkdb random_int 0 1] == 1} { + puts "\tRep$tnum.g: Hold open db handle from client app." + set cdb [eval\ + {berkdb_open_noerr -env} $envs($test_client) $dbname] + error_check_good dbopen [is_valid_db $cdb] TRUE + set ccur [$cdb cursor] + error_check_good curs [is_valid_cursor $ccur $cdb] TRUE + set ret [$ccur get -first] + set kd [lindex $ret 0] + set key [lindex $kd 0] + error_check_good cclose [$ccur close] 0 + } else { + puts "\tRep$tnum.g: (No client app handle will be held.)" + set cdb "NONE" + } + + set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" + proc_msgs_once $envlist + + # + # We want to simulate a master continually getting new + # records while an update is going on. + # + set entries 10 + eval rep_test $method $envs($master) NULL $entries $start $start 0 $largs + incr start $entries + # + # We call proc_msgs_once N times to get us into page recovery: + # 1. Send master messages and client finds master. + # 2. Master replies and client does verify. + # 3. Master gives verify_fail and client does update_req. + # 4. Master send update info and client does page_req. + # + # We vary the number of times we call proc_msgs_once (via pmsgs) + # so that we test switching master at each point in the + # internal initialization processing. + # + set nproced 0 + puts "\tRep$tnum.f: Get partially through initialization ($pmsgs iters)" + for { set i 1 } { $i < $pmsgs } { incr i } { + incr nproced [proc_msgs_once $envlist] + } + + if { [string is true $master_change] } { + replclear 1 + replclear 3 + puts "\tRep$tnum.g: Downgrade/upgrade master." + + # Downgrade the existing master to a client, switch around the + # roles, and then upgrade the newly appointed master. + error_check_good downgrade [$envs($master) rep_start -client] 0 + + set master C + set other A + + error_check_good upgrade [$envs($master) rep_start -master] 0 + } + + # Simulate a client crash: simply abandon the handle without closing it. + # Note that this doesn't work on Windows, because there you can't remove + # a file if anyone (including yourself) has it open. This also does not + # work on HP-UX, because there you are not allowed to open a second + # handle on an env. This won't work with in-memory replication files + # because the missing internal init file is the mechanism for cleaning + # up databases from the partial internal init before the client crash. + # + # Note that crashing only makes sense with "-recover". + # + if { [string is true $client_crash] } { + error_check_good assert [string compare $recargs "-recover"] 0 + + set abandoned_env $envs($test_client) + set abandoned true + + set envs($test_client) [eval $env_B_cmd $recargs -rep_client] + $envs($test_client) rep_limit 0 0 + + # Again, remember: whatever the current roles, a site and its EID + # stay linked always. + # + set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" + } else { + set abandoned false + } + + process_msgs $envlist + # + # Now simulate continual updates to the new master. Each + # time through we just process messages once before + # generating more updates. + # + set niter 10 + for { set i 0 } { $i < $niter } { incr i } { + set nproced 0 + eval rep_test $method $envs($master) NULL $entries $start \ + $start 0 $largs + incr start $niter + incr nproced [proc_msgs_once $envlist] + error_check_bad nproced $nproced 0 + } + process_msgs $envlist + + puts "\tRep$tnum.h: Verify logs and databases" + # Whether or not we've switched roles, it's always site A that may have + # had its logs archived away. When the $init_test flag is turned on, + # rep_verify allows the site in the second position to have + # (more-)archived logs, so we have to abuse the calling signature a bit + # here to get this to work. (I.e., even when A is still master and C is + # still the other client, we have to pass things in this order so that + # the $init_test different-sized-logs trick can work.) + # + set init_test 1 + rep_verify $dirs(C) $envs(C) $dirs(A) $envs(A) $init_test + + # Process messages again in case we are running with debug_rop. + process_msgs $envlist + rep_verify $dirs($master) $envs($master) \ + $dirs($test_client) $envs($test_client) $init_test + + # Add records to the master and update client. + puts "\tRep$tnum.i: Add more records and check again." + set entries 10 + eval rep_test $method $envs($master) NULL $entries $start \ + $start 0 $largs + incr start $entries + process_msgs $envlist 0 NONE err + + # Check again that everyone is identical. + rep_verify $dirs(C) $envs(C) $dirs(A) $envs(A) $init_test + process_msgs $envlist + rep_verify $dirs($master) $envs($master) \ + $dirs($test_client) $envs($test_client) $init_test + + if {$cdb != "NONE"} { + if {$abandoned} { + # The $cdb was opened in an env which was then + # abandoned, recovered, marked panic'ed. We don't + # really care; we're just trying to clean up resources. + # + catch {$cdb close} + } else { + error_check_good clientdb_close [$cdb close] 0 + } + } + error_check_good masterenv_close [$envs($master) close] 0 + error_check_good clientenv_close [$envs($test_client) close] 0 + error_check_good clientenv2_close [$envs($other) close] 0 + if { $abandoned } { + catch {$abandoned_env close} + } + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep040.tcl b/test/tcl/rep040.tcl new file mode 100644 index 00000000..185bd555 --- /dev/null +++ b/test/tcl/rep040.tcl @@ -0,0 +1,244 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep040 +# TEST Test of racing rep_start and transactions. +# TEST +# TEST One master, one client. +# TEST Have master in the middle of a transaction. +# TEST Call rep_start to make master a client. +# TEST Commit the transaction. +# TEST Call rep_start to make master the master again. +# +proc rep040 { method { niter 200 } { tnum "040" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. Skip recovery with in-memory + # logging - it doesn't make sense. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $r $args):\ + Test of rep_start racing txns $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep040_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep040_sub { method niter tnum logset recargs largs } { + source ./include.tcl + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + set omethod [convert_method $method] + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \ + -errpfx MASTER $repmemargs \ + -home $masterdir $verbargs -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $c_logargs \ + -errpfx CLIENT $repmemargs \ + -home $clientdir $verbargs -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + + # Set up databases in-memory or on-disk. + if { $databases_in_memory } { + set testfile { "" "rep040.db" } + set testfile1 { "" "rep040A.db" } + } else { + set testfile "rep040.db" + set testfile1 "rep040A.db" + } + + set db [eval {berkdb_open_noerr -env $masterenv -auto_commit -create \ + -mode 0644} $largs $omethod $testfile] + error_check_good rep_db [is_valid_db $db] TRUE + + set db1 [eval {berkdb_open_noerr -env $masterenv -auto_commit -create \ + -mode 0644} $largs $omethod $testfile1] + error_check_good rep_db [is_valid_db $db1] TRUE + + set key [expr $niter + 100] + set key2 [expr $niter + 200] + set data "data1" + set newdata "rep040test" + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Run rep_test in the master (and update client). + puts "\tRep$tnum.a: Running rep_test in replicated env." + eval rep_test $method $masterenv $db $niter 0 0 0 $largs + process_msgs $envlist + + # Get some data on a page + set t [$masterenv txn] + error_check_good txn [is_valid_txn $t $masterenv] TRUE + set ret [$db put -txn $t $key [chop_data $method $data]] + error_check_good put $ret 0 + error_check_good txn [$t commit] 0 + process_msgs $envlist + + # + # Start 2 txns. One that will commit early and one we'll hold + # open a while to test for the warning message. + # + # Now modify the data but don't commit it yet. This will + # update the same page and update the page LSN. + # + set t [$masterenv txn] + error_check_good txn [is_valid_txn $t $masterenv] TRUE + set t2 [$masterenv txn] + error_check_good txn [is_valid_txn $t2 $masterenv] TRUE + set ret [$db put -txn $t $key [chop_data $method $newdata]] + error_check_good put $ret 0 + set ret [$db1 put -txn $t2 $key2 [chop_data $method $newdata]] + error_check_good put $ret 0 + process_msgs $envlist + + # Fork child process and then sleep for more than 1 minute so + # that the child process must block on the open transaction and + # it will print out the wait message. + # + set outfile "$testdir/rep040script.log" + puts "\tRep$tnum.b: Fork master child process and sleep 90 seconds" + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep040script.tcl $outfile $masterdir $databases_in_memory &] + + tclsleep 10 + process_msgs $envlist + error_check_good txn [$t commit] 0 + tclsleep 80 + + error_check_good txn [$t2 commit] 0 + puts "\tRep$tnum.c: Waiting for child ..." + process_msgs $envlist + watch_procs $pid 5 + + process_msgs $envlist + + set t [$masterenv txn] + error_check_good txn [is_valid_txn $t $masterenv] TRUE + set ret [$db put -txn $t $key [chop_data $method $data]] + error_check_good put $ret 0 + error_check_good txn [$t commit] 0 + error_check_good dbclose [$db close] 0 + error_check_good dbclose [$db1 close] 0 + process_msgs $envlist + + # Check that databases are in-memory or on-disk as expected. + check_db_location $masterenv $testfile + check_db_location $masterenv $testfile1 + check_db_location $clientenv $testfile + check_db_location $clientenv $testfile1 + + check_log_location $masterenv + check_log_location $clientenv + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + + # + # Check we detected outstanding txn (t2). + # The message we check for is produced only if the build was + # configured with --enable-diagnostic. + set conf [berkdb getconfig] + if { [is_substr $conf "diagnostic"] == 1 } { + puts "\tRep$tnum.d: Verify waiting and logs" + set ret [catch {open $outfile} ofid] + error_check_good open $ret 0 + set contents [read $ofid] + error_check_good \ + detect [is_substr $contents "Waiting for op_cnt"] 1 + close $ofid + } + + # Check that master and client logs and dbs are identical. + set stat [catch {eval exec $util_path/db_printlog \ + -h $masterdir > $masterdir/prlog} result] + error_check_good stat_mprlog $stat 0 + set stat [catch {eval exec $util_path/db_printlog \ + -h $clientdir > $clientdir/prlog} result] + error_check_good stat_cprlog $stat 0 + error_check_good log_cmp \ + [filecmp $masterdir/prlog $clientdir/prlog] 0 + + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep040script.tcl b/test/tcl/rep040script.tcl new file mode 100644 index 00000000..d4fedc82 --- /dev/null +++ b/test/tcl/rep040script.tcl @@ -0,0 +1,74 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Rep040 script - transaction concurrency with rep_start +# +# Repscript exists to call rep_start. The main script will immediately +# start a transaction, do an operation, then sleep a long time before +# commiting the transaction. We should be blocked on the transaction +# when we call rep_start. The main process should sleep long enough +# that we get a diagnostic message. +# +# Usage: repscript masterdir clientdir +# masterdir: master env directory +# clientdir: client env directory +# +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl +source $test_path/reputils.tcl +global databases_in_memory + +set usage "repscript masterdir inmemdbflag" + +# Verify usage +if { $argc != 2 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set masterdir [ lindex $argv 0 ] +set databases_in_memory [ lindex $argv 1 ] + +puts "databases_in_memory is $databases_in_memory" + +# Join the queue env. We assume the rep test convention of +# placing the messages in $testdir/MSGQUEUEDIR. +set queueenv [eval berkdb_env -home $testdir/MSGQUEUEDIR] +error_check_good script_qenv_open [is_valid_env $queueenv] TRUE + +# We need to set up our own machids. +# Add 1 for master env id, and 2 for the clientenv id. +repladd 1 +repladd 2 + +# Join the master env. +set ma_cmd "berkdb_env_noerr -home $masterdir \ + -errfile /dev/stderr -errpfx CHILD.MA \ + -txn -rep_master -rep_transport \[list 1 replsend\]" +# set ma_cmd "berkdb_env_noerr -home $masterdir \ +# -verbose {rep on} -errfile /dev/stderr -errpfx CHILD.MA \ +# -txn -rep_master -rep_transport \[list 1 replsend\]" +set masterenv [eval $ma_cmd] +error_check_good script_menv_open [is_valid_env $masterenv] TRUE + +puts "Master open" +# Downgrade while transaction is open +error_check_good downgrade [$masterenv rep_start -client] 0 + +tclsleep 10 +# Upgrade again +error_check_good upgrade [$masterenv rep_start -master] 0 +# +# Create a btree database now. +# +rep_test btree $masterenv NULL 10 0 0 0 + +# Close the envs +puts "Closing Masterenv $masterenv" +error_check_good script_master_close [$masterenv close] 0 +puts "\tRepscript completed successfully" diff --git a/test/tcl/rep041.tcl b/test/tcl/rep041.tcl new file mode 100644 index 00000000..371cd77f --- /dev/null +++ b/test/tcl/rep041.tcl @@ -0,0 +1,242 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep041 +# TEST Turn replication on and off at run-time. +# TEST +# TEST Start a master with replication OFF (noop transport function). +# TEST Run rep_test to advance log files and archive. +# TEST Start up client; change master to working transport function. +# TEST Now replication is ON. +# TEST Do more ops, make sure client is up to date. +# TEST Close client, turn replication OFF on master, do more ops. +# TEST Repeat from point A. +# +proc rep041 { method { niter 500 } { tnum "041" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set saved_args $args + + set logsets [create_logsets 2] + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. Skip recovery with in-memory + # logging - it doesn't make sense. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + + set envargs "" + set args $saved_args + puts "Rep$tnum ($method $envargs $r $args):\ + Turn replication on and off, $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep041_sub $method $niter $tnum $envargs \ + $l $r $args + } + } +} + +proc rep041_sub { method niter tnum envargs logset recargs largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + puts "\tRep$tnum.a: Open master with replication OFF." + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $verbargs \ + $m_logargs -log_max $log_max $envargs -errpfx MASTER \ + $repmemargs -home $masterdir -rep" + set masterenv [eval $ma_envcmd $recargs] + $masterenv rep_limit 0 0 + + # Run rep_test in the master to advance log files. + puts "\tRep$tnum.b: Running rep_test to create some log files." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + + # Reset transport function to replnoop, and specify that + # this env will be master. + error_check_good \ + transport_noop [$masterenv rep_transport {1 replnoop}] 0 + error_check_good rep_on [$masterenv rep_start -master] 0 + + # If master is on-disk, archive. + if { $m_logtype != "in-memory" } { + puts "\tRep$tnum.c: Run log_archive - some logs should be removed." + $masterenv log_flush + set res [eval exec $util_path/db_archive -l -h $masterdir] + error_check_bad log.1.present [lsearch -exact $res log.0000000001] -1 + set res [eval exec $util_path/db_archive -d -h $masterdir] + set res [eval exec $util_path/db_archive -l -h $masterdir] + error_check_good log.1.gone [lsearch -exact $res log.0000000001] -1 + } + + # Run rep_test some more - this simulates running without clients. + puts "\tRep$tnum.d: Running rep_test." + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + + # Open a client + puts "\tRep$tnum.e: Open client." + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $verbargs \ + $c_logargs -log_max $log_max $envargs -errpfx CLIENT \ + $repmemargs -home $clientdir \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + $clientenv rep_limit 0 0 + $clientenv rep_request 4000 128000 + + # Set up envlist for processing messages later. + set envlist "{$masterenv 1} {$clientenv 2}" + + # Turn replication on and off more than once. + set repeats 2 + for { set i 0 } { $i < $repeats } { incr i } { + + puts "\tRep$tnum.f.$i: Turn replication ON." + # Reset master transport function to replsend. + error_check_good transport_on \ + [$masterenv rep_transport {1 replsend}] 0 + + # Have the master announce itself so messages will pass. + error_check_good rep_on [$masterenv rep_start -master] 0 + + # Create some new messages, and process them. + set nentries 50 + eval rep_test \ + $method $masterenv NULL $nentries $start $start 0 $largs + incr start $nentries + process_msgs $envlist + + # If there are log records queued, client is not yet caught + # up. Perform innocuous rep_flush and process messages a + # few times to allow client rerequests to occur. Give it + # more than one chance, allowing at least twice the default + # maximum client retransmission request time. + for { set j 0 } { $j < 3 } { incr j } { + if { [stat_field $clientenv \ + rep_stat "Current log records queued"] == 0 } { + break + } + error_check_good master_flush [$masterenv rep_flush] 0 + process_msgs $envlist + tclsleep 1 + } + + puts "\tRep$tnum.g.$i: Verify that client is up to date." + + # Check that master and client contents match, to verify + # that client is up to date. + rep_verify $masterdir $masterenv $clientdir $clientenv 0 1 0 + + # Process messages again -- the rep_verify created some. + process_msgs $envlist + + puts "\tRep$tnum.h.$i: Turn replication OFF on master." + error_check_good \ + transport_off [$masterenv rep_transport {1 replnoop}] 0 + + puts "\tRep$tnum.i.$i: Running rep_test in replicated env." + eval rep_test \ + $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + + puts "\tRep$tnum.j.$i:\ + Process messages; none should be available." + set nproced [proc_msgs_once $envlist NONE err] + error_check_good no_messages $nproced 0 + + # Client and master should NOT match. + puts "\tRep$tnum.k.$i: Master and client should NOT match." + rep_verify $masterdir $masterenv $clientdir $clientenv 0 0 0 + + } + + error_check_good clientenv_close [$clientenv close] 0 + error_check_good masterenv_close [$masterenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep042.tcl b/test/tcl/rep042.tcl new file mode 100644 index 00000000..c334c526 --- /dev/null +++ b/test/tcl/rep042.tcl @@ -0,0 +1,197 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep042 +# TEST Concurrency with updates. +# TEST +# TEST Verify racing role changes and updates don't result in +# TEST pages with LSN 0,1. Set up an environment that is master. +# TEST Spawn child process that does a delete, but using the +# TEST $env check so that it sleeps in the middle of the call. +# TEST Master downgrades and then sleeps as a client so that +# TEST child will run. Verify child does not succeed (should +# TEST get read-only error) due to role change in the middle of +# TEST its call. +proc rep042 { method { niter 10 } { tnum "042" } args } { + + source ./include.tcl + global repfiles_in_memory + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + + puts "Rep$tnum ($method $r):\ + Concurrency with updates $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep042_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep042_sub { method niter tnum logset recargs largs } { + source ./include.tcl + global perm_response_list + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + set omethod [convert_method $method] + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_cmd "berkdb_env_noerr -create $repmemargs \ + -log_max 1000000 $m_txnargs $m_logargs $verbargs \ + -home $masterdir -rep_master -errpfx MASTER \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_cmd $recargs] + + # Open a client + repladd 2 + set cl_cmd "berkdb_env_noerr -create -home $clientdir $repmemargs \ + $c_txnargs $c_logargs $verbargs -errpfx CLIENT -rep_client \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_cmd $recargs] + + # Bring the client online. + process_msgs "{$masterenv 1} {$clientenv 2}" + + puts "\tRep$tnum.a: Create and populate database." + set dbname rep042.db + set db [eval "berkdb_open_noerr -create $omethod -auto_commit \ + -env $masterenv $largs $dbname"] + for { set i 1 } { $i < $niter } { incr i } { + set t [$masterenv txn] + error_check_good db_put \ + [eval $db put -txn $t $i [chop_data $method data$i]] 0 + error_check_good txn_commit [$t commit] 0 + } + process_msgs "{$masterenv 1} {$clientenv 2}" + + set ops {del truncate} + foreach op $ops { + # Fork child process on client. The child will do a delete. + set sleepval 4 + set scrlog $testdir/repscript.log + puts "\tRep$tnum.b: Fork child process on client ($op)." + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep042script.tcl $scrlog \ + $masterdir $sleepval $dbname $op &] + + # Wait for child process to start up. + while { 1 } { + if { [file exists $masterdir/marker.db] == 0 } { + tclsleep 1 + } else { + tclsleep 1 + break + } + } + + puts "\tRep$tnum.c: Downgrade during child $op." + error_check_good downgrade [$masterenv rep_start -client] 0 + + puts "\tRep$tnum.d: Waiting for child ..." + # Watch until the child is done. + watch_procs $pid 5 + puts "\tRep$tnum.e: Upgrade to master again ..." + error_check_good upgrade [$masterenv rep_start -master] 0 + set end [expr $niter * 2] + for { set i $niter } { $i <= $end } { incr i } { + set t [$masterenv txn] + error_check_good db_put \ + [eval $db put -txn $t $i [chop_data $method data$i]] 0 + error_check_good txn_commit [$t commit] 0 + } + process_msgs "{$masterenv 1} {$clientenv 2}" + + # We expect to find the error "attempt to modify a read-only + # database." If we don't, report what we did find as a failure. + set readonly_error [check_script $scrlog "read-only"] + if { $readonly_error != 1 } { + set errstrings [eval findfail $scrlog] + if { [llength $errstrings] > 0 } { + puts "FAIL: unexpected error(s)\ + found in file $scrlog:$errstrings" + } + } + fileremove -f $masterdir/marker.db + } + + # Clean up. + error_check_good db_close [$db close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + + replclose $testdir/MSGQUEUEDIR +} + +proc check_script { log str } { + set ret 0 + set res [catch {open $log} id] + if { $res != 0 } { + puts "FAIL: open of $log failed: $id" + # Return 0 + return $ret + } + while { [gets $id val] != -1 } { +# puts "line: $val" + if { [is_substr $val $str] } { + set ret 1 + break + } + } + close $id + return $ret +} diff --git a/test/tcl/rep042script.tcl b/test/tcl/rep042script.tcl new file mode 100644 index 00000000..d18f56da --- /dev/null +++ b/test/tcl/rep042script.tcl @@ -0,0 +1,78 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Rep042 script - concurrency with updates. + +# Usage: repscript masterdir sleepval dbname +# masterdir: master env directory +# sleepval: sleep value (in secs) to send to env test_check +# dbname: name of database to use +# op: operation: one of del or truncate +# +source ./include.tcl +source $test_path/reputils.tcl + +set usage "repscript masterdir sleepval dbname op" + +# Verify usage +if { $argc != 4 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set masterdir [ lindex $argv 0 ] +set sleepval [ lindex $argv 1 ] +set dbname [lindex $argv 2] +set op [lindex $argv 3] + +# Join the queue env. We assume the rep test convention of +# placing the messages in $testdir/MSGQUEUEDIR. +set queueenv [eval berkdb_env -home $testdir/MSGQUEUEDIR] +error_check_good script_qenv_open [is_valid_env $queueenv] TRUE + +# We need to set up our own machids. +# Add 1 for master env id, and 2 for the clientenv id. +# +repladd 1 +repladd 2 + +# Join the master env. +set ma_cmd "berkdb_env_noerr -home $masterdir \ + -txn -rep_master -rep_transport \[list 1 replsend\]" +# set ma_cmd "berkdb_env_noerr -home $masterdir \ +# -verbose {rep on} -errfile /dev/stderr \ +# -txn -rep_master -rep_transport \[list 1 replsend\]" +set masterenv [eval $ma_cmd] +error_check_good script_menv_open [is_valid_env $masterenv] TRUE + +puts "Master open" +set db [eval "berkdb_open -auto_commit -env $masterenv $dbname"] +error_check_good dbopen [is_valid_db $db] TRUE + +# Make it so that the process sleeps in the middle of a delete. +$masterenv test check $sleepval + +# Create marker file +set marker [open $masterdir/marker.db w] +close $marker + +if { $op == "del" } { + # Just delete record 1 - we know that one is in there. + set stat [catch {$db del 1} ret] + puts "Stat: $stat" + puts "Ret: $ret" +} elseif { $op == "truncate" } { + set stat [catch {$db truncate} ret] + puts "Stat: $stat" + puts "Ret: $ret" +} else { + puts "Stat: FAIL: invalid operation specified" +} +# Close the envs +error_check_good script_db_close [$db close] 0 +error_check_good script_master_close [$masterenv close] 0 +puts "\tRepscript completed successfully" diff --git a/test/tcl/rep043.tcl b/test/tcl/rep043.tcl new file mode 100644 index 00000000..a6ba74da --- /dev/null +++ b/test/tcl/rep043.tcl @@ -0,0 +1,241 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep043 +# TEST +# TEST Constant writes during upgrade/downgrade. +# TEST +# TEST Three envs take turns being master. Each env +# TEST has a child process which does writes all the +# TEST time. They will succeed when that env is master +# TEST and fail when it is not. + +proc rep043 { method { rotations 25 } { tnum "043" } args } { + + source ./include.tcl + global repfiles_in_memory + + # Skip for record-based methods. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_record_based $method] != 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_record_based $method] == 1 } { + puts "Skipping rep$tnum for record-based methods." + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 3] + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r): Constant writes with \ + rotating master $rotations times $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client 0 logs are [lindex $l 1]" + puts "Rep$tnum: Client 1 logs are [lindex $l 2]" + rep043_sub $method $rotations $tnum $l $r $args + } + } +} + +proc rep043_sub { method rotations tnum logset recargs largs } { + source ./include.tcl + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + set orig_tdir $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/ENV0 + set clientdir $testdir/ENV1 + set clientdir2 $testdir/ENV2 + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set c2_logtype [lindex $logset 2] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set c2_logargs [adjust_logargs $c2_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + + set niter 200 + set testfile rep043.db + set omethod [convert_method $method] + + # Since we're constantly switching master in this test run + # each with a different cache size just to verify that cachesize + # doesn't matter for different sites. + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $repmemargs \ + $m_logargs -errpfx ENV0 -errfile /dev/stderr $verbargs \ + -cachesize {0 4194304 3} -lock_detect default \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set env0 [eval $ma_envcmd $recargs -rep_master] + + # Open two clients + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $repmemargs \ + $c_logargs -errpfx ENV1 -errfile /dev/stderr $verbargs \ + -cachesize {0 2097152 2} -lock_detect default \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set env1 [eval $cl_envcmd $recargs -rep_client] + + repladd 3 + set cl2_envcmd "berkdb_env_noerr -create $c2_txnargs $repmemargs \ + $c2_logargs -errpfx ENV2 -errfile /dev/stderr $verbargs \ + -cachesize {0 1048576 1} -lock_detect default \ + -home $clientdir2 -rep_transport \[list 3 replsend\]" + set env2 [eval $cl2_envcmd $recargs -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$env0 1} {$env1 2} {$env2 3}" + process_msgs $envlist + + # Set up marker file. + set markerenv [berkdb_env -create -home $testdir -txn] + error_check_good marker_open [is_valid_env $markerenv] TRUE + set marker [eval "berkdb_open \ + -create -btree -auto_commit -env $markerenv marker.db"] + + # Start the 3 child processes: one for each env. + set pids {} + set dirlist "0 $masterdir 1 $clientdir 2 $clientdir2" + foreach { writer dir } $dirlist { + puts "\tRep$tnum.a: Fork child process WRITER$writer." + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep043script.tcl $testdir/rep043script.log.$writer \ + $dir $writer &] + lappend pids $pid + } + + # For the first iteration, masterenv is $env0. + set masterenv $env0 + set curdir $masterdir + + # Write $niter entries to master, then rotate. + for { set i 0 } { $i < $rotations } { incr i } { + + # Identify current master, determine next master + if { $masterenv == $env0 } { + set nextmasterenv $env1 + set nextdir $clientdir + } elseif { $masterenv == $env1 } { + set nextmasterenv $env2 + set nextdir $clientdir2 + } elseif { $masterenv == $env2 } { + set nextmasterenv $env0 + set nextdir $masterdir + } else { + puts "FAIL: could not identify current master" + return + } + + puts "\tRep$tnum.b.$i: Open master db in $curdir." + set mdb [eval {berkdb_open_noerr} -env $masterenv -auto_commit \ + -mode 0644 $omethod -create $testfile] + error_check_good dbopen [is_valid_db $mdb] TRUE + error_check_good marker_iter [$marker put ITER $i] 0 + + puts "\t\tRep$tnum.c.$i: Put data to master." + for { set j 0 } { $j < $niter } { incr j } { + set key KEY.$i.$j + set data DATA + set t [$masterenv txn] + set stat [catch \ + {eval {$mdb put} -txn $t $key $data} ret] + if { $ret == 0 } { + error_check_good commit [$t commit] 0 + } else { + error_check_good commit [$t abort] 0 + } + } + error_check_good mdb_close [$mdb close] 0 + + # Checkpoint. + error_check_good checkpoint [$masterenv txn_checkpoint] 0 + + process_msgs $envlist + + puts "\t\tRep$tnum.d.$i: Downgrade current master." + error_check_good downgrade [$masterenv rep_start -client] 0 + + puts "\t\tRep$tnum.e.$i: Upgrade next master $nextdir." + error_check_good upgrade [$nextmasterenv rep_start -master] 0 + set masterenv $nextmasterenv + set curdir $nextdir + + process_msgs $envlist + } + + + puts "\tRep$tnum.f: Clean up." + # Tell the child processes we are done. + error_check_good marker_done [$marker put DONE DONE] 0 + error_check_good marker_close [$marker close] 0 + error_check_good markerenv_close [$markerenv close] 0 + + error_check_good env0_close [$env0 close] 0 + error_check_good env1_close [$env1 close] 0 + error_check_good env2_close [$env2 close] 0 + + # Make sure the child processes are done. + watch_procs $pids 1 + + # Check log files for failures. + for { set n 0 } { $n < 3 } { incr n } { + set file rep043script.log.$n + set errstrings [eval findfail $testdir/$file] + foreach str $errstrings { + puts "FAIL: error message in file $file: $str" + } + } + + replclose $testdir/MSGQUEUEDIR + set testdir $orig_tdir + return +} diff --git a/test/tcl/rep043script.tcl b/test/tcl/rep043script.tcl new file mode 100644 index 00000000..6a1a4a44 --- /dev/null +++ b/test/tcl/rep043script.tcl @@ -0,0 +1,125 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Rep043 script - constant writes to an env which may be +# either a master or a client, or changing between the +# two states +# +# Usage: dir writerid +# dir: Directory of writer +# writerid: i.d. number for writer + +set usage "rep043script dir writerid" + +# Verify usage +if { $argc != 2 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set dir [ lindex $argv 0 ] +set writerid [ lindex $argv 1 ] +set nentries 50 + +# Join the queue env. We assume the rep test convention of +# placing the messages in $testdir/MSGQUEUEDIR. +set queueenv [eval berkdb_env -home $testdir/MSGQUEUEDIR] +error_check_good script_qenv_open [is_valid_env $queueenv] TRUE + +# We need to set up our own machids. +set envid [expr $writerid + 1] +repladd $envid +set name "WRITER.$writerid" + +# Pause a bit to give the master a chance to create the database +# before we try to join. +tclsleep 3 + +# Join the env. +set env_cmd "berkdb_env_noerr -home $dir -lock_detect default \ + -errfile /dev/stderr -errpfx WRITER.$writerid \ + -txn -rep_transport \[list $envid replsend\]" +# set env_cmd "berkdb_env_noerr -home $dir \ +# -errfile /dev/stderr -errpfx WRITER.$writerid \ +# -verbose {rep on} -lock_detect default \ +# -txn -rep_transport \[list $envid replsend\]" +set dbenv [eval $env_cmd] +error_check_good script_env_open [is_valid_env $dbenv] TRUE + +# Open database. It's still possible under heavy load that the +# master hasn't created the database, so pause even longer if it's +# not there. +set testfile "rep043.db" +while {[catch {berkdb_open_noerr -errpfx $name -errfile /dev/stderr\ + -env $dbenv -auto_commit $testfile} db]} { + puts "Could not open handle $db, sleeping 1 second." + tclsleep 1 +} +error_check_good dbopen [is_valid_db $db] TRUE + +# Communicate with parent in marker file. +set markerenv [berkdb_env -home $testdir -txn] +error_check_good markerenv_open [is_valid_env $markerenv] TRUE +set marker [eval "berkdb_open \ + -create -btree -auto_commit -env $markerenv marker.db"] + +# Write records to the database. +set iter INIT +set olditer $iter +while { [llength [$marker get DONE]] == 0 } { + for { set i 0 } { $i < $nentries } { incr i } { + set kd [$marker get ITER] + if { [llength $kd] == 0 } { + set iter X + } else { + set iter [lindex [lindex $kd 0] 1] + } + if { $iter != $olditer } { + puts "Entry $i: Iter changed from $olditer to $iter" + set olditer $iter + } + + set key WRITER.$writerid.$iter.$i + set str string.$i + + set t [$dbenv txn] + error_check_good txn [is_valid_txn $t $dbenv] TRUE + set stat [catch {$db put -txn $t $key $str} res] + if { $stat == 0 } { +puts "res is $res, commit" + error_check_good txn_commit [$t commit] 0 + } else { +puts "res is $res, abort" + error_check_good txn_abort [$t abort] 0 + } + + # If the handle is dead, get a new one. + if { [is_substr $res DB_REP_HANDLE_DEAD] == 1 } { +puts "Close - dead handle." + error_check_good db_close [$db close] 0 +puts "Getting new handle" + while {[catch {berkdb_open_noerr -env $dbenv\ + -auto_commit $testfile} db]} { + puts "Could not open handle: $db" + tclsleep 1 + } + error_check_good db_open [is_valid_db $db] TRUE + } + + if { [expr $i % 10] == 1 } { + puts "Wrote WRITER.$writerid.$iter.$i record $i" + } + } + tclsleep 1 +} + +# Clean up. +error_check_good db_close [$db close] 0 +error_check_good dbenv_close [$dbenv close] 0 +replclose $testdir/MSGQUEUEDIR +error_check_good marker_close [$marker close] 0 +error_check_good markerenv_close [$markerenv close] 0 diff --git a/test/tcl/rep044.tcl b/test/tcl/rep044.tcl new file mode 100644 index 00000000..2bbf7aff --- /dev/null +++ b/test/tcl/rep044.tcl @@ -0,0 +1,294 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep044 +# TEST +# TEST Test rollbacks with open file ids. +# TEST +# TEST We have one master with two handles and one client. +# TEST Each time through the main loop, we open a db, write +# TEST to the db, and close the db. Each one of these actions +# TEST is propagated to the client, or a roll back is forced +# TEST by swapping masters. + +proc rep044 { method { tnum "044" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # HP-UX can't open two handles on the same env, so it + # can't run this test. + if { $is_hp_test == 1 } { + puts "Skipping rep$tnum for HP-UX." + return + } + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + puts "Rep$tnum ($method): Replication with rollbacks\ + and open file ids $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client 0 logs are [lindex $l 1]" + rep044_sub $method $tnum $l $args + } +} + +proc rep044_sub { method tnum logset largs } { + source ./include.tcl + set orig_tdir $testdir + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + set masterdir $testdir/ENV0 + set clientdir $testdir/ENV1 + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + set niter 20 + set omethod [convert_method $method] + + # The main loop runs all the permutations of processing/not + # processing the database open to the clients; processing/not + # processing the database writes to the clients; and processing/ + # not processing the database close to the clients. Set up the + # options in advance so the loop is not heavily indented. + # + # Each entry displays { open write close }. + # For example { 1 1 0 } means we process messages after the + # db open and the db writes but not after the db close. + + set optionsets { + {1 1 1} + {1 1 0} + {1 0 1} + {1 0 0} + {0 1 1} + {0 1 0} + {0 0 1} + {0 0 0} + } + + # Main loop. + foreach set $optionsets { + + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + file mkdir $masterdir + file mkdir $clientdir + + set processopens [lindex $set 0] + set processwrites [lindex $set 1] + set processcloses [lindex $set 2] + + set notdoing {} + if { $processopens == 0 } { + append notdoing " OPENS" + } + if { $processwrites == 0 } { + append notdoing " WRITES" + } + if { $processcloses == 0 } { + append notdoing " CLOSES" + } + if { $notdoing != {} } { + puts "Rep$tnum:\ + Loop with $notdoing not processed to client." + } + + # Open a master. + repladd 1 + set envcmd(M0) "berkdb_env_noerr -create $m_txnargs \ + $m_logargs -lock_detect default $repmemargs \ + -errpfx ENV.M0 $verbargs $privargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set menv0 [eval $envcmd(M0) -rep_master] + + # Open second handle on master env. + set envcmd(M1) "berkdb_env_noerr $m_txnargs \ + $m_logargs -lock_detect default $repmemargs \ + -errpfx ENV.M1 $verbargs $privargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set menv1 [eval $envcmd(M1)] + error_check_good rep_start [$menv1 rep_start -master] 0 + + # Open a client + repladd 2 + set envcmd(C) "berkdb_env_noerr -create $c_txnargs \ + $c_logargs -errpfx ENV.C $verbargs $repmemargs \ + -lock_detect default $privargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set cenv [eval $envcmd(C) -rep_client] + + # Bring the client online by processing the startup messages. + set envlist "{$menv0 1} {$cenv 2}" + process_msgs $envlist + + puts "\tRep$tnum.a: Run rep_test in 1st master env." + set start 0 + eval rep_test $method $menv0 NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + puts "\tRep$tnum.b: Open db in 2nd master env." + # Open the db here; we want it to remain open after rep_test. + + # Set up database as in-memory or on-disk. + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + + set db1 [eval {berkdb_open_noerr -env $menv1 -auto_commit \ + -mode 0644} $largs $omethod $dbname] + error_check_good dbopen [is_valid_db $db1] TRUE + + if { $processopens == 1 } { + puts "\tRep$tnum.b1:\ + Process db open messages to client." + process_msgs $envlist + } else { + set start [do_switch $method $niter $start $menv0 $cenv $largs] + } + + puts "\tRep$tnum.c: Write to database in 2nd master." + # We don't use rep_test here, because sometimes we abort. + for { set i 1 } { $i <= $niter } { incr i } { + set t [$menv1 txn] + set key $i + set str STRING.$i + if [catch {eval {$db1 put}\ + -txn $t {$key [chop_data $method $str]}} result] { + # If handle is dead, abort txn, then + # close and reopen db. + error_check_good handle_dead \ + [is_substr $result HANDLE_DEAD] 1 + error_check_good txn_abort [$t abort] 0 + error_check_good close_handle [$db1 close] 0 + set db1 [eval {berkdb_open_noerr \ + -env $menv1 -auto_commit -mode 0644}\ + $largs $omethod $dbname] + } else { + error_check_good txn_commit [$t commit] 0 + } + } + + if { $processwrites == 1 } { + puts "\tRep$tnum.c1:\ + Process db put messages to client." + process_msgs $envlist + } else { + set start [do_switch $method $niter $start $menv0 $cenv $largs] + } + + puts "\tRep$tnum.d: Close database using 2nd master env handle." + error_check_good db_close [$db1 close] 0 + + if { $processcloses == 1 } { + puts "\tRep$tnum.d1:\ + Process db close messages to client." + process_msgs $envlist + } else { + set start [do_switch $method $niter $start $menv0 $cenv $largs] + } + + # Check that databases are in-memory or on-disk as expected. + check_db_location $menv0 + check_db_location $menv1 + check_db_location $cenv + + puts "\tRep$tnum.e: Clean up." + error_check_good menv0_close [$menv0 close] 0 + error_check_good menv1_close [$menv1 close] 0 + error_check_good cenv_close [$cenv close] 0 + + replclose $testdir/MSGQUEUEDIR + } + set testdir $orig_tdir + return +} + +proc do_switch { method niter start masterenv clientenv largs } { + set envlist "{$masterenv 1} {$clientenv 2}" + + # Downgrade master, upgrade client. + error_check_good master_downgrade [$masterenv rep_start -client] 0 + error_check_good client_upgrade [$clientenv rep_start -master] 0 + process_msgs $envlist + + # Run rep_test in the new master. + eval rep_test $method $clientenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + # Downgrade newmaster, upgrade original master. + error_check_good client_downgrade [$clientenv rep_start -client] 0 + error_check_good master_upgrade [$masterenv rep_start -master] 0 + + # Run rep_test in the restored master. + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + return $start +} diff --git a/test/tcl/rep045.tcl b/test/tcl/rep045.tcl new file mode 100644 index 00000000..78b81da4 --- /dev/null +++ b/test/tcl/rep045.tcl @@ -0,0 +1,281 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep045 +# TEST +# TEST Replication with versions. +# TEST +# TEST Mimic an application where a database is set up in the +# TEST background and then put into a replication group for use. +# TEST The "version database" identifies the current live +# TEST version, the database against which queries are made. +# TEST For example, the version database might say the current +# TEST version is 3, and queries would then be sent to db.3. +# TEST Version 4 is prepared for use while version 3 is in use. +# TEST When version 4 is complete, the version database is updated +# TEST to point to version 4 so queries can be directed there. +# TEST +# TEST This test has a master and two clients. One client swaps +# TEST roles with the master, and the other client runs constantly +# TEST in another process. + +proc rep045 { method { tnum "045" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 3] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + puts "Rep$tnum ($method): Replication with version\ + databases $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client 0 logs are [lindex $l 1]" + puts "Rep$tnum: Client 1 logs are [lindex $l 2]" + rep045_sub $method $tnum $l $args + } +} + +proc rep045_sub { method tnum logset largs } { + source ./include.tcl + set orig_tdir $testdir + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set masterdir $testdir/MASTERDIR + set clientdir0 $testdir/CLIENTDIR0 + set clientdir1 $testdir/CLIENTDIR1 + + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + file mkdir $masterdir + file mkdir $clientdir0 + file mkdir $clientdir1 + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set c2_logtype [lindex $logset 2] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set c2_logargs [adjust_logargs $c2_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + + set omethod [convert_method $method] + + # Open a master. + repladd 1 + set envcmd(M0) "berkdb_env_noerr -create $m_txnargs \ + $m_logargs -errpfx ENV.M0 $verbargs $repmemargs \ + -errfile /dev/stderr -lock_detect default \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set menv [eval $envcmd(M0) -rep_master] + + # Open a client + repladd 2 + set envcmd(C0) "berkdb_env_noerr -create $c_txnargs \ + $c_logargs -errpfx ENV.C0 $verbargs $repmemargs \ + -errfile /dev/stderr -lock_detect default \ + -home $clientdir0 -rep_transport \[list 2 replsend\]" + set cenv0 [eval $envcmd(C0) -rep_client] + + # Open second client. + repladd 3 + set envcmd(C1) "berkdb_env_noerr -create $c2_txnargs \ + $c2_logargs -errpfx ENV.C1 $verbargs $repmemargs \ + -errfile /dev/stderr -lock_detect default \ + -home $clientdir1 -rep_transport \[list 3 replsend\]" + set cenv1 [eval $envcmd(C1) -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$menv 1} {$cenv0 2} {$cenv1 3}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # db_remove in a moment. + # + $menv test force noarchive_timeout + + puts "\tRep$tnum.a: Initialize version database." + # Set up variables so we cycle through version numbers 1 + # through maxversion several times. + if { $databases_in_memory } { + set vname { "" "version.db" } + } else { + set vname "version.db" + } + set version 0 + set maxversion 5 + set iter 12 + set nentries 100 + set start 0 + + # The version db is always btree. + set vdb [eval {berkdb_open_noerr -env $menv -create \ + -auto_commit -mode 0644} -btree $vname] + error_check_good init_version [$vdb put VERSION $version] 0 + error_check_good vdb_close [$vdb close] 0 + process_msgs $envlist + + # Start up a separate process that constantly reads data + # from the current official version. + puts "\tRep$tnum.b: Spawn a child tclsh to do client work." + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep045script.tcl $testdir/rep045script.log \ + $clientdir1 $vname $databases_in_memory &] + + # Main loop: update query database, process messages (or don't, + # simulating a failure), announce the new version, process + # messages (or don't), and swap masters. + set version 1 + for { set i 1 } { $i < $iter } { incr i } { + + # If database.N exists on disk, clean it up. + if { $databases_in_memory } { + set dbname { "" "db.$version" } + } else { + set dbname "db.$version" + } + if { [file exists $masterdir/$dbname] == 1 } { + puts "\tRep$tnum.c.$i: Removing old version $version." + error_check_good dbremove \ + [$menv dbremove -auto_commit $dbname] 0 + } + + puts "\tRep$tnum.c.$i: Set up query database $version." + set db [eval berkdb_open_noerr -create -env $menv\ + -auto_commit -mode 0644 $largs $omethod $dbname] + error_check_good db_open [is_valid_db $db] TRUE + eval rep_test $method $menv $db $nentries $start $start 0 $largs + incr start $nentries + error_check_good db_close [$db close] 0 + + # We alternate between processing the messages and + # clearing the messages to simulate a failure. + + set process [expr $i % 2] + if { $process == 1 } { + process_msgs $envlist + } else { + replclear 2 + replclear 3 + } + + # Announce new version. + puts "\tRep$tnum.d.$i: Announce new version $version." + set vdb [eval {berkdb_open_noerr -env $menv \ + -auto_commit -mode 0644} $vname] + error_check_good update_version [$vdb put VERSION $version] 0 + error_check_good vdb_close [$vdb close] 0 + + # Process messages or simulate failure. + if { $process == 1 } { + process_msgs $envlist + } else { + replclear 2 + replclear 3 + } + + # Switch master, update envlist. + puts "\tRep$tnum.e.$i: Switch masters." + set envlist [switch_master $envlist] + + # Update values for next iteration. + set menv [lindex [lindex $envlist 0] 0] + set cenv0 [lindex [lindex $envlist 1] 0] + incr version + if { $version > $maxversion } { + set version 1 + } + } + + # Signal to child that we are done. + set vdb [eval {berkdb_open_noerr -env $menv \ + -auto_commit -mode 0644} $vname] + error_check_good version_done [$vdb put VERSION DONE] 0 + error_check_good vdb_close [$vdb close] 0 + process_msgs $envlist + + # Watch for child to finish. + watch_procs $pid 5 + + puts "\tRep$tnum.f: Clean up." + error_check_good menv_close [$menv close] 0 + error_check_good cenv0_close [$cenv0 close] 0 + error_check_good cenv1_close [$cenv1 close] 0 + + replclose $testdir/MSGQUEUEDIR + + # Check for failures in child's log file. + set errstrings [eval findfail $testdir/rep045script.log] + foreach str $errstrings { + puts "FAIL: error message in log file: $str" + } + + set testdir $orig_tdir + return +} + +proc switch_master { envlist } { + # Find env handles and machine ids. + set menv [lindex [lindex $envlist 0] 0] + set mid [lindex [lindex $envlist 0] 1] + set cenv [lindex [lindex $envlist 1] 0] + set cid [lindex [lindex $envlist 1] 1] + set cenv1 [lindex [lindex $envlist 2] 0] + set cid1 [lindex [lindex $envlist 2] 1] + + # Downgrade master, upgrade client. + error_check_good master_downgrade [$menv rep_start -client] 0 + error_check_good client_upgrade [$cenv rep_start -master] 0 + process_msgs $envlist + + # Adjust envlist. The former client env is the new master, + # and vice versa. + set newenvlist "{$cenv $cid} {$menv $mid} {$cenv1 $cid1}" + return $newenvlist +} diff --git a/test/tcl/rep045script.tcl b/test/tcl/rep045script.tcl new file mode 100644 index 00000000..4d31d37e --- /dev/null +++ b/test/tcl/rep045script.tcl @@ -0,0 +1,164 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Rep045 script - replication with version dbs. +# +# Usage: rep045script clientdir vfile +# clientdir: client env directory +# vfile: name of version file +# +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl +source $test_path/reputils.tcl + +set usage "repscript clientdir vfile" + +# Verify usage +if { $argc != 3 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set clientdir [ lindex $argv 0 ] +set vfile [ lindex $argv 1 ] +global databases_in_memory +set databases_in_memory [ lindex $argv 2 ] +set niter 50 + +# Join the queue env. We assume the rep test convention of +# placing the messages in $testdir/MSGQUEUEDIR. +set queueenv [eval berkdb_env -home $testdir/MSGQUEUEDIR] +error_check_good script_qenv_open [is_valid_env $queueenv] TRUE + +# We need to set up our own machids. +repladd 3 + +# Join the client env. +set cl_cmd "berkdb_env_noerr -home $clientdir \ + -txn -rep_client -rep_transport \[list 3 replsend\]" +# set cl_cmd "berkdb_env_noerr -home $clientdir \ +# -verbose {rep on} -errfile /dev/stderr \ +# -txn -rep_client -rep_transport \[list 3 replsend\]" +set clientenv [eval $cl_cmd] +error_check_good script_cenv_open [is_valid_env $clientenv] TRUE + +# Start up deadlock detector. +set dpid [exec $util_path/db_deadlock \ + -a o -v -t 5 -h $clientdir >& $testdir/dd.out &] + +# Initialize version number. Don't try to open the first +# version database until the master has completed setting it up. +set version 0 +while {[catch {eval {berkdb_open_noerr} -env $clientenv -rdonly $vfile} vdb]} { + if { [is_substr $vdb DB_LOCK_DEADLOCK] == 1 } { + # We're deadlocked. Just wait for the + # deadlock detector to break the deadlock. + tclsleep 1 + } else { + puts "FAIL: vdb open failed: $vdb" + } +} + +while { $version == 0 } { + tclsleep 1 + if { [catch {$vdb get VERSION} res] } { + # If we encounter an error, check what kind of + # error it is. + if { [is_substr $res DB_LOCK_DEADLOCK] == 1 } { + # We're deadlocked. Just wait for the + # deadlock detector to break the deadlock. + } elseif { [is_substr $res DB_REP_HANDLE_DEAD] == 1 } { + # Handle is dead. Get a new handle. + error_check_good vdb_close [$vdb close] 0 + set vdb [eval {berkdb_open} -env $clientenv\ + -rdonly $vfile] + } else { + # We got something we didn't expect. + puts "FAIL: Trying to get version, got $res" + break + } + } else { + # No error was encountered. + set version [lindex [lindex $res 0] 1] + } +} +error_check_good close_vdb [$vdb close] 0 + +# If the parent has gotten really far ahead, it may be done. +# Clean up and exit. +if { $version == "DONE" } { + error_check_good kill_deadlock_detector [tclkill $dpid] "" + error_check_good script_client_close [$clientenv close] 0 + return +} + +if { $databases_in_memory } { + set dbfile [concat \"\" db.$version] +} else { + set dbfile db.$version +} + +# Open completed database version $version. +if {[catch {eval {berkdb_open} -rdonly -env $clientenv $dbfile} db]} { + puts "FAIL: db open failed: $db" +} +error_check_good db_open [is_valid_db $db] TRUE + +# While parent process is not done, read from current database. +# Periodically check version and update current database when +# necessary. +while { 1 } { + set dbc [$db cursor] + set i 0 + error_check_good cursor_open [is_valid_cursor $dbc $db] TRUE + for { set dbt [$dbc get -first] } { $i < $niter } \ + { set dbt [$dbc get -next] } { + incr i + } + error_check_good cursor_close [$dbc close] 0 + + while {[catch {eval {berkdb_open} -env $clientenv -rdonly $vfile} vdb]} { + puts "open failed: vdb is $vdb" + tclsleep 1 + } + set ret [$vdb get VERSION] + + set newversion [lindex [lindex $ret 0] 1] + error_check_good close_vdb [$vdb close] 0 + error_check_bad check_newversion $newversion "" + if { $newversion != $version } { + if { $newversion == "DONE" } { + break + } elseif { $newversion == 0 } { + puts "FAIL: version has reverted to 0" + continue + } else { + error_check_good db_close [$db close] 0 + set version $newversion + if { $databases_in_memory } { + set dbfile [concat \"\" db.$version] + } else { + set dbfile db.$version + } + while {[catch {eval \ + {berkdb_open} -env $clientenv -rdonly $dbfile} db]} { + puts "db open of new db failed: $db" + tclsleep 1 + } + error_check_good db_open [is_valid_db $db] TRUE + } + } + + # Pause a few seconds to allow the parent to do some work. + tclsleep 3 +} + +# Clean up. +error_check_good kill_deadlock_detector [tclkill $dpid] "" +error_check_good db_close [$db close] 0 +error_check_good script_client_close [$clientenv close] 0 diff --git a/test/tcl/rep046.tcl b/test/tcl/rep046.tcl new file mode 100644 index 00000000..5b150378 --- /dev/null +++ b/test/tcl/rep046.tcl @@ -0,0 +1,354 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep046 +# TEST Replication and basic bulk transfer. +# TEST Set bulk transfer replication option. +# TEST Run long txns on master and then commit. Process on client +# TEST and verify contents. Run a very long txn so that logging +# TEST must send the log. Process and verify on client. +# +proc rep046 { method { nentries 200 } { tnum "046" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 3] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery. + set throttle { "throttle" "" } + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping test with -recover for \ + in-memory logs." + continue + } + foreach t $throttle { + puts "Rep$tnum ($method $r $t):\ + Replication and bulk transfer\ + $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client 0 logs are [lindex $l 1]" + puts "Rep$tnum: Client 1 logs are [lindex $l 2]" + rep046_sub $method $nentries $tnum $l $r \ + $t $args + } + } + } +} + +proc rep046_sub { method niter tnum logset recargs throttle largs } { + global overflowword1 + global overflowword2 + global databases_in_memory + global repfiles_in_memory + global env_private + global testdir + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + set orig_tdir $testdir + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set c2_logtype [lindex $logset 2] + + set in_memory_log \ + [expr { $m_logtype == "in-memory" || $c_logtype == "in-memory" || \ + $c2_logtype == "in-memory" }] + + # In-memory logs require a large log buffer, and can not + # be used with -txn nosync. Adjust the args for master + # and client. + # This test has a long transaction, allocate a larger log + # buffer for in-memory test. + set m_logargs [adjust_logargs $m_logtype [expr 60 * 1024 * 1024]] + set c_logargs [adjust_logargs $c_logtype [expr 60 * 1024 * 1024]] + set c2_logargs [adjust_logargs $c2_logtype [expr 60 * 1024 * 1024]] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + + # If replication files or databases are in-memory we'll need a bigger + # cache. + set cacheargs "" + set cacheadj 0 + if { $repfiles_in_memory } { + set cacheadj [expr $cacheadj + 8] + } + if { $databases_in_memory } { + set cacheadj [expr $cacheadj + 20] + } + if { $repfiles_in_memory || $databases_in_memory } { + set cachesize [expr $cacheadj * (1024 * 1024)] + set cacheargs "-cachesize {0 $cachesize 1} " + } + + set bigniter [expr 10000 - [expr 2 * $niter]] + set lkmax [expr $bigniter * 2] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \ + $repmemargs $cacheargs $privargs \ + $verbargs -lock_max_locks 10000 -lock_max_objects 10000 \ + -errpfx MASTER -home $masterdir -rep_master -rep_transport \ + \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs] + error_check_good master_env [is_valid_env $masterenv] TRUE + + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $c_logargs \ + $repmemargs $cacheargs $privargs \ + $verbargs -home $clientdir -errpfx CLIENT \ + -lock_max_locks 10000 -lock_max_objects 10000 \ + -rep_client -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs] + + if { $throttle == "throttle" } { + set clientdir2 $testdir/CLIENTDIR2 + file mkdir $clientdir2 + repladd 3 + set cl2_envcmd "berkdb_env_noerr -create $c2_txnargs $verbargs \ + $repmemargs $cacheargs $privargs \ + $c2_logargs -home $clientdir2 -errpfx CLIENT2 \ + -lock_max_locks 10000 -lock_max_objects 10000 \ + -rep_client -rep_transport \[list 3 replsend\]" + set cl2env [eval $cl2_envcmd $recargs] + set envlist "{$masterenv 1} {$clientenv 2} {$cl2env 3}" + # + # Turn throttling on in master + # + error_check_good thr [$masterenv rep_limit 0 [expr 32 * 1024]] 0 + } else { + set envlist "{$masterenv 1} {$clientenv 2}" + } + # Bring the client online by processing the startup messages. + process_msgs $envlist + + # + # Turn on bulk processing now on the master. + # + error_check_good set_bulk [$masterenv rep_config {bulk on}] 0 + + puts "\tRep$tnum.a: Create and open master database" + # Set up databases as in-memory or on-disk. + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + + set omethod [convert_method $method] + set masterdb [eval {berkdb_open_noerr -env $masterenv -auto_commit \ + -create -mode 0644} $largs $omethod $dbname] + error_check_good dbopen [is_valid_db $masterdb] TRUE + + # Process database. + process_msgs $envlist + + # Run a modified test001 in the master (and update clients). + puts "\tRep$tnum.b: Basic long running txn" + set bulkrec1 [stat_field $masterenv rep_stat "Bulk records stored"] + set bulkxfer1 [stat_field $masterenv rep_stat "Bulk buffer transfers"] + + set overflowword1 "0" + set overflowword2 "0" + rep_test_bulk $method $masterenv $masterdb $niter 0 0 + process_msgs $envlist + set bulkrec2 [stat_field $masterenv rep_stat "Bulk records stored"] + set bulkxfer2 [stat_field $masterenv rep_stat "Bulk buffer transfers"] + error_check_good recstat [expr $bulkrec2 > $bulkrec1] 1 + error_check_good xferstat [expr $bulkxfer2 > $bulkxfer1] 1 + rep_verify $masterdir $masterenv\ + $clientdir $clientenv $in_memory_log 1 1 + + puts "\tRep$tnum.c: Very long txn" + # Determine whether this build is configured with --enable-debug_rop + # or --enable-debug_wop. + set conf [berkdb getconfig] + set debug_rop_wop 0 + if { [is_substr $conf "debug_rop"] == 1 || \ + [is_substr $conf "debug_wop"] == 1 } { + set debug_rop_wop 1 + } + + # If debug_rop/wop is set test will write more info to log. + # An in-memory log has a smaller "file" size than the large + # items written in this part of the test, so skip this section + # if any in-memory logs and debug_rop/wop is set. + if { $in_memory_log == 1 && $debug_rop_wop == 1 } { + puts "\t\tSkipping for in-memory log and debug_rop/wop" + } else { + set skip $niter + set start $niter + set orig $niter + set bulkfill1 [stat_field $masterenv rep_stat \ + "Bulk buffer fills"] + rep_test_bulk $method $masterenv $masterdb $bigniter \ + $start $skip + set start [expr $niter + $bigniter] + if { $throttle == "throttle" } { + # + # If we're throttling clear all messages from client 3 + # so that we force a huge gap that the client will have + # to ask for to invoke a rerequest that throttles. + # + replclear 3 + set old_thr \ + [stat_field $masterenv rep_stat \ + "Transmission limited"] + } + process_msgs $envlist + set bulkfill2 [stat_field $masterenv rep_stat \ + "Bulk buffer fills"] + error_check_good fillstat [expr $bulkfill2 > $bulkfill1] 1 + rep_verify $masterdir $masterenv $clientdir $clientenv \ + $in_memory_log 1 1 + } + + puts "\tRep$tnum.d: Very large data" + + # If debug_rop/wop is set test will write entire item to log. + # An in-memory log has a smaller "file" size than the large + # items written in this part of the test, so skip this section + # if any in-memory logs and debug_rop/wop is set. + if { $in_memory_log == 1 && $debug_rop_wop == 1 } { + puts "\t\tSkipping for in-memory log and debug_rop/wop" + } else { + set bulkovf1 [stat_field $masterenv rep_stat \ + "Bulk buffer overflows"] + set bulkfill1 [stat_field $masterenv rep_stat \ + "Bulk buffer fills"] + # + # Send in '2' exactly because we're sending in the flag to use + # the overflow entries. We have 2 overflow entries. + # If it's fixed length, we can't overflow. Induce throttling + # by putting in a bunch more entries. Create a gap by + # forcing a checkpoint record. + # + $masterenv txn_checkpoint -force + process_msgs $envlist + tclsleep 1 + if { [is_fixed_length $method] == 1 } { + rep_test_bulk $method $masterenv $masterdb $niter \ + $start $start 0 + } else { + rep_test_bulk $method $masterenv $masterdb 2 0 0 1 + } + process_msgs $envlist + + # Generally overflows cannot happen because large data gets + # broken up into overflow pages, and none will be larger than + # the buffer. However, if we're configured for debug_rop/wop + # then we record the data as is and will overflow. + # + set bulkovf2 [stat_field $masterenv rep_stat \ + "Bulk buffer overflows"] + set bulkfill2 [stat_field $masterenv rep_stat \ + "Bulk buffer fills"] + if { [is_fixed_length $method] == 0 } { + error_check_good fillstat1 \ + [expr $bulkfill2 > $bulkfill1] 1 + if { $debug_rop_wop == 1 } { + error_check_good overflows \ + [expr $bulkovf2 > $bulkovf1] 1 + } else { + error_check_good no_overflows $bulkovf2 0 + } + } + } + + # !!! + # Turn off bulk processing now on the master. We need to do + # this because some configurations (like debug_rop/wop) will + # generate log records when verifying the logs and databases. + # We want to control processing those messages. + # + error_check_good set_bulk [$masterenv rep_config {bulk off}] 0 + + if { $in_memory_log == 1 && $debug_rop_wop == 1 } { + puts "\t\tSkipping for in-memory log and debug_rop/wop" + } else { + rep_verify $masterdir $masterenv $clientdir $clientenv \ + $in_memory_log + + if { $throttle == "throttle" } { + puts "\tRep$tnum.e: Verify throttling." + set new_thr \ + [stat_field $masterenv rep_stat \ + "Transmission limited"] + error_check_bad nthrottles1 $new_thr -1 + error_check_bad nthrottles0 $new_thr 0 + error_check_good nthrottles \ + [expr $old_thr < $new_thr] 1 + process_msgs $envlist + rep_verify $masterdir $masterenv $clientdir2 $cl2env \ + $in_memory_log + } + } + + if { $throttle == "throttle" } { + error_check_good cclose [$cl2env close] 0 + } + + error_check_good dbclose [$masterdb close] 0 + error_check_good mclose [$masterenv close] 0 + error_check_good cclose [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep047.tcl b/test/tcl/rep047.tcl new file mode 100644 index 00000000..2c683eaf --- /dev/null +++ b/test/tcl/rep047.tcl @@ -0,0 +1,273 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep047 +# TEST Replication and log gap bulk transfers. +# TEST Set bulk transfer replication option. +# TEST Run test. Start a new client (to test ALL_REQ and bulk). +# TEST Run small test again. Clear messages for 1 client. +# TEST Run small test again to test LOG_REQ gap processing and bulk. +# TEST Process and verify on clients. +# +proc rep047 { method { nentries 200 } { tnum "047" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 3] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. Skip recovery with in-memory + # logging - it doesn't make sense. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $r): Replication\ + and resend bulk transfer $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + puts "Rep$tnum: Client 2 logs are [lindex $l 2]" + rep047_sub $method $nentries $tnum $l $r $args + } + } +} + +proc rep047_sub { method niter tnum logset recargs largs } { + global testdir + global util_path + global overflowword1 overflowword2 + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + set overflowword1 "0" + set overflowword2 "0" + set orig_tdir $testdir + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set c2_logtype [lindex $logset 2] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set c2_logargs [adjust_logargs $c2_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + + set in_memory_log \ + [expr { $m_logtype == "in-memory" || $c_logtype == "in-memory" || \ + $c2_logtype == "in-memory" }] + + # If databases are in-memory we'll need a bigger cache. + set cacheargs "" + if { $databases_in_memory } { + set cachesize [expr 20 * (1024 * 1024)] + set cacheargs "-cachesize {0 $cachesize 1} " + } + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env -create $m_txnargs $m_logargs \ + $verbargs -errpfx MASTER -home $masterdir $repmemargs \ + $privargs \ + $cacheargs -rep_master -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs] + error_check_good master_env [is_valid_env $masterenv] TRUE + + # Open two clients. + repladd 2 + set cl_envcmd "berkdb_env -create $c_txnargs $c_logargs \ + $verbargs -errpfx CLIENT -home $clientdir $repmemargs \ + $privargs \ + $cacheargs -rep_client -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs] + error_check_good client_env [is_valid_env $clientenv] TRUE + + repladd 3 + set cl2_envcmd "berkdb_env -create $c2_txnargs $c2_logargs \ + $verbargs -errpfx CLIENT2 -home $clientdir2 $repmemargs \ + $privargs \ + $cacheargs -rep_client -rep_transport \[list 3 replsend\]" + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + error_check_good set_bulk [$masterenv rep_config {bulk on}] 0 + + puts "\tRep$tnum.a: Create and open master database" + + # Set up databases as in-memory or on-disk. + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + + set omethod [convert_method $method] + set masterdb [eval {berkdb_open -env $masterenv -auto_commit \ + -create -mode 0644} $largs $omethod $dbname] + error_check_good dbopen [is_valid_db $masterdb] TRUE + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Run a modified test001 in the master (and update clients). + puts "\tRep$tnum.b: Basic long running txn" + rep_test_bulk $method $masterenv $masterdb $niter 0 0 0 + process_msgs $envlist + rep_verify $masterdir $masterenv\ + $clientdir $clientenv $in_memory_log 1 1 + + # Clean up after rep_verify: remove the temporary "prlog" file. Now + # that a newly joining client uses internal init, when the master scans + # its directory for database files it complains about prlog not looking + # like a proper db. This is harmless, but it does put a distracting + # error message into the test output. + # + file delete $masterdir/prlog + + puts "\tRep$tnum.c: Bring new client online" + replclear 3 + set bulkxfer1 [stat_field $masterenv rep_stat "Bulk buffer transfers"] + set clientenv2 [eval $cl2_envcmd $recargs] + error_check_good client_env [is_valid_env $clientenv2] TRUE + set envlist "{$masterenv 1} {$clientenv 2} {$clientenv2 3}" + process_msgs $envlist + + set bulkxfer2 [stat_field $masterenv rep_stat "Bulk buffer transfers"] + error_check_good xferstat [expr $bulkxfer2 > $bulkxfer1] 1 + puts "\tRep$tnum.c.0: Take new client offline" + + puts "\tRep$tnum.d: Run small test creating a log gap" + set skip $niter + set start $niter + set niter 10 + rep_test_bulk $method $masterenv $masterdb $niter $start $skip 0 + # + # Skip and clear messages for client 2. + # + replclear 3 + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + puts "\tRep$tnum.e: Bring new client online again" + set envlist "{$masterenv 1} {$clientenv 2} {$clientenv2 3}" + # + # Since we're relying on the client to detect a gap and request missing + # records, reset gap parameters to small values. Otherwise, + # "wait_recs" is still set at its maximum "high" value, due to this + # client having been through an internal init. Send a record to + # create a real gap and not an end-of-record stream pause. + # + $masterenv txn_checkpoint -force + $clientenv2 rep_request 4000 128000 + process_msgs $envlist + tclsleep 1 + set bulkrec1 [stat_field $masterenv rep_stat "Bulk records stored"] + set bulkxfer1 [stat_field $masterenv rep_stat "Bulk buffer transfers"] + set skip [expr $skip + $niter] + set start $skip + rep_test_bulk $method $masterenv $masterdb $niter $start $skip 0 + + process_msgs $envlist + # + # We know we added 2*$niter items to the database so there should be + # at least 2*$niter records stored to the log. Verify that + # when we brought client 2 online, we sent at least 2*$niter more + # records via bulk. + # + set bulkrec2 [stat_field $masterenv rep_stat "Bulk records stored"] + set bulkxfer2 [stat_field $masterenv rep_stat "Bulk buffer transfers"] + set recstat [expr $bulkrec2 > [expr $bulkrec1 + [expr 2 * $niter]]] + error_check_good recstat $recstat 1 + error_check_good xferstat [expr $bulkxfer2 > $bulkxfer1] 1 + + # Turn off bulk processing now on the master. We need to do + # this because some configurations (like debug_rop/wop) will + # generate log records when verifying the logs and databases. + # We want to control processing those messages. + # + error_check_good set_bulk [$masterenv rep_config {bulk off}] 0 + + rep_verify $masterdir $masterenv\ + $clientdir $clientenv $in_memory_log 1 1 + + # Process messages again in case we are running with debug_rop. + process_msgs $envlist + rep_verify $masterdir $masterenv\ + $clientdir2 $clientenv2 $in_memory_log 1 1 + + error_check_good dbclose [$masterdb close] 0 + error_check_good mclose [$masterenv close] 0 + error_check_good cclose [$clientenv close] 0 + error_check_good c2close [$clientenv2 close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep048.tcl b/test/tcl/rep048.tcl new file mode 100644 index 00000000..0e748a22 --- /dev/null +++ b/test/tcl/rep048.tcl @@ -0,0 +1,193 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep048 +# TEST Replication and log gap bulk transfers. +# TEST Have two master env handles. Turn bulk on in +# TEST one (turns it on for both). Turn it off in the other. +# TEST While toggling, send log records from both handles. +# TEST Process message and verify master and client match. +# +proc rep048 { method { nentries 3000 } { tnum "048" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping test with -recover for \ + in-memory logs." + continue + } + puts "Rep$tnum ($method $r): Replication\ + and toggling bulk transfer $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep048_sub $method $nentries $tnum $l $r $args + } + } +} + +proc rep048_sub { method niter tnum logset recargs largs } { + source ./include.tcl + global overflowword1 + global overflowword2 + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set orig_tdir $testdir + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + set overflowword1 "0" + set overflowword2 "0" + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + file mkdir $clientdir + file mkdir $masterdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + set in_memory_log \ + [expr { $m_logtype == "in-memory" || $c_logtype == "in-memory" }] + + # In-memory logs require a large log buffer, and can not + # be used with -txn nosync. Adjust the args for master + # and client. + # This test has a long transaction, allocate a larger log + # buffer for in-memory test. + set m_logargs [adjust_logargs $m_logtype [expr 20 * 1024 * 1024]] + set c_logargs [adjust_logargs $c_logtype [expr 20 * 1024 * 1024]] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # If databases are in-memory we'll need a bigger cache. + set cacheargs "" + if { $databases_in_memory } { + set cachesize [expr 20 * (1024 * 1024)] + set cacheargs "-cachesize {0 $cachesize 1} " + } + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \ + -errpfx MASTER $verbargs -home $masterdir $repmemargs \ + $cacheargs -rep_master -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs] + error_check_good master_env [is_valid_env $masterenv] TRUE + + # Open a client. + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $c_logargs \ + -errpfx CLIENT $verbargs -home $clientdir $repmemargs \ + $cacheargs -rep_client -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs] + error_check_good client_env [is_valid_env $clientenv] TRUE + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + puts "\tRep$tnum.a: Create and open master databases" + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + + set omethod [convert_method $method] + set masterdb [eval {berkdb_open_noerr -env $masterenv -auto_commit \ + -create -mode 0644} $largs $omethod $dbname] + error_check_good dbopen [is_valid_db $masterdb] TRUE + + set scrlog $testdir/repscript.log + puts "\tRep$tnum.b: Fork child process." + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep048script.tcl $scrlog $masterdir $databases_in_memory &] + + # Wait for child process to start up. + while { 1 } { + if { [file exists $masterdir/marker.file] == 0 } { + tclsleep 1 + } else { + tclsleep 1 + break + } + } + # Run a modified test001 in the master (and update clients). + # Call it several times so make sure that we get descheduled. + puts "\tRep$tnum.c: Basic long running txn" + set div 10 + set loop [expr $niter / $div] + set start 0 + for { set i 0 } { $i < $div } {incr i} { + rep_test_bulk $method $masterenv $masterdb $loop $start $start 0 + process_msgs $envlist + set start [expr $start + $loop] + tclsleep 1 + } + error_check_good dbclose [$masterdb close] 0 + set marker [open $masterdir/done.file w] + close $marker + + set bulkxfer1 [stat_field $masterenv rep_stat "Bulk buffer transfers"] + error_check_bad bulk $bulkxfer1 0 + + puts "\tRep$tnum.d: Waiting for child ..." + # Watch until the child is done. + watch_procs $pid 5 + process_msgs $envlist + set childname "child.db" + + rep_verify $masterdir $masterenv $clientdir $clientenv \ + $in_memory_log 1 1 + rep_verify $masterdir $masterenv $clientdir $clientenv \ + 0 1 0 $childname + + error_check_good mclose [$masterenv close] 0 + error_check_good cclose [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep048script.tcl b/test/tcl/rep048script.tcl new file mode 100644 index 00000000..e724d9f0 --- /dev/null +++ b/test/tcl/rep048script.tcl @@ -0,0 +1,84 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Rep048 script - toggle bulk transfer while updates are going on. + +# Usage: repscript masterdir +# masterdir: master env directory +# databases_in_memory: are we using named in-memory databases? +# +source ./include.tcl +source $test_path/reputils.tcl + +set usage "repscript masterdir" + +# Verify usage +if { $argc != 2 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set masterdir [ lindex $argv 0 ] +global databases_in_memory +set databases_in_memory [ lindex $argv 1 ] + +# Join the queue env. We assume the rep test convention of +# placing the messages in $testdir/MSGQUEUEDIR. +set queueenv [eval berkdb_env -home $testdir/MSGQUEUEDIR] +error_check_good script_qenv_open [is_valid_env $queueenv] TRUE + +# +# We need to set up our own machids. +# Add 1 for master env id, and 2 for the clientenv id. +# +repladd 1 +repladd 2 + +# Join the master env. +set ma_cmd "berkdb_env_noerr -home $masterdir \ + -txn -rep_master -rep_transport \[list 1 replsend\]" +# set ma_cmd "berkdb_env_noerr -home $masterdir \ +# -verbose {rep on} -errfile /dev/stderr \ +# -txn -rep_master -rep_transport \[list 1 replsend\]" +set masterenv [eval $ma_cmd] +error_check_good script_menv_open [is_valid_env $masterenv] TRUE + +puts "Master open" +if { $databases_in_memory } { + set dbname { "" "child.db" } +} else { + set dbname "child.db" +} +set db [eval "berkdb_open -create -btree -auto_commit -env $masterenv $dbname"] +error_check_good dbopen [is_valid_db $db] TRUE + +# Create marker file +set marker [open $masterdir/marker.file w] +close $marker + +# +# Keep toggling until the parent indicates it's done. +# +set tog "on" +for { set i 0 } { [file exists $masterdir/done.file] == 0 } { incr i } { +puts "Iter $i: Turn bulk $tog" + error_check_good bulk$tog [$masterenv rep_config [list bulk $tog]] 0 + set t [$masterenv txn] + error_check_good db_put \ + [eval $db put -txn $t $i data$i] 0 + error_check_good txn_commit [$t commit] 0 + if { $tog == "on" } { + set tog "off" + } else { + set tog "on" + } + tclsleep 1 +} +# Close the envs +error_check_good script_db_close [$db close] 0 +error_check_good script_master_close [$masterenv close] 0 +puts "\tRepscript completed successfully" diff --git a/test/tcl/rep049.tcl b/test/tcl/rep049.tcl new file mode 100644 index 00000000..8ee34fad --- /dev/null +++ b/test/tcl/rep049.tcl @@ -0,0 +1,246 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep049 +# TEST Replication and delay syncing clients - basic test. +# TEST +# TEST Open and start up a master and two clients. Turn on delay sync +# TEST in the delayed client. Change master, add data and process messages. +# TEST Verify delayed client does not match. Make additional changes and +# TEST update the delayted client. Verify all match. +# TEST Add in a fresh delayed client to test delay of ALL_REQ. +# TEST Process startup messages and verify freshc client has no database. +# TEST Sync and verify fresh client matches. +# +proc rep049 { method { niter 10 } { tnum "049" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 4] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($r): Replication and\ + ($method) delayed sync-up $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Swap Client logs are [lindex $l 1]" + puts "Rep$tnum: Delay Client logs are [lindex $l 2]" + puts "Rep$tnum: Fresh Client logs are [lindex $l 3]" + rep049_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep049_sub { method niter tnum logset recargs largs } { + global testdir + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + set orig_tdir $testdir + + replsetup $testdir/MSGQUEUEDIR + + set env1dir $testdir/MASTERDIR + set env2dir $testdir/CLIENTDIR + set delaycldir $testdir/DELAYCLDIR + set freshcldir $testdir/FRESHCLDIR + file mkdir $env1dir + file mkdir $env2dir + file mkdir $delaycldir + file mkdir $freshcldir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set dc_logtype [lindex $logset 2] + set fc_logtype [lindex $logset 3] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set dc_logargs [adjust_logargs $dc_logtype] + set fc_logargs [adjust_logargs $fc_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set dc_txnargs [adjust_txnargs $dc_logtype] + set fc_txnargs [adjust_txnargs $fc_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $verbargs \ + $repmemargs $privargs \ + $m_logargs -errpfx ENV1 -cachesize {0 4194304 3} \ + -home $env1dir -rep_transport \[list 1 replsend\]" + set env1 [eval $ma_envcmd $recargs -rep_master] + error_check_good master_env [is_valid_env $env1] TRUE + $env1 rep_limit 0 0 + + # Open two clients + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $verbargs \ + $repmemargs $privargs $c_logargs -errpfx ENV2 \ + -home $env2dir -rep_transport \[list 2 replsend\]" + set env2 [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $env2] TRUE + $env2 rep_limit 0 0 + + repladd 3 + set dc_envcmd "berkdb_env_noerr -create $dc_txnargs \ + $repmemargs $privargs $verbargs $dc_logargs -errpfx ENV3 \ + -home $delaycldir -rep_transport \[list 3 replsend\]" + set dcenv [eval $dc_envcmd $recargs -rep_client] + error_check_good client2_env [is_valid_env $dcenv] TRUE + $dcenv rep_limit 0 0 + + # + # !!! + # We're not using this client yet, but put its command up here. + # We'll do the repladd and execute this env command later. + # + set fc_envcmd "berkdb_env_noerr -create $fc_txnargs \ + $repmemargs $privargs $fc_logargs -errpfx ENV4 $verbargs \ + -home $freshcldir -rep_transport \[list 4 replsend\]" + + # Bring the clients online by processing the startup messages. + set envlist "{$env1 1} {$env2 2} {$dcenv 3}" + process_msgs $envlist + + puts "\tRep$tnum.a: Run rep_test in master env." + set start 0 + eval rep_test $method $env1 NULL $niter $start $start 0 $largs + + process_msgs $envlist + + puts "\tRep$tnum.b: Set delayed sync on client. Basic test." + error_check_good set_delay [$dcenv rep_config {delayclient on}] 0 + # + # Call sync when we're not delayed. Verify it just returns and + # that no messages are generated anywhere. + # + error_check_good sync1 [$dcenv rep_sync] 0 + set nproced [proc_msgs_once $envlist NONE err] + error_check_good nproced $nproced 0 + + puts "\tRep$tnum.c: Swap master/client" + error_check_good downgrade [$env1 rep_start -client] 0 + error_check_good upgrade [$env2 rep_start -master] 0 + + process_msgs $envlist + + puts "\tRep$tnum.d: Run rep_test in new master env" + set start $niter + eval rep_test $method $env2 NULL $niter $start $start 0 $largs + process_msgs $envlist + # + # Delayed client should be different. Former master should be synced. + # + rep_verify $env2dir $env2 $env1dir $env1 1 1 1 + rep_verify $env2dir $env2 $delaycldir $dcenv 0 0 0 + + puts "\tRep$tnum.e: Sync delayed client" + error_check_good rep_sync [$dcenv rep_sync] 0 + process_msgs $envlist + # + # Delayed client should be the same now. + # + rep_verify $env2dir $env2 $delaycldir $dcenv 0 1 1 + + puts "\tRep$tnum.f: Run rep_test after sync-up in new master env" + set start [expr $start + $niter] + eval rep_test $method $env2 NULL $niter $start $start 0 $largs + process_msgs $envlist + # + # Delayed client be caught up and running fine. + # + rep_verify $env2dir $env2 $delaycldir $dcenv 0 1 1 + + puts "\tRep$tnum.g: Add in a fresh delayed client" + repladd 4 + set fcenv [eval $fc_envcmd $recargs -rep_client] + error_check_good client3_env [is_valid_env $fcenv] TRUE + error_check_good set_delay [$fcenv rep_config {delayclient on}] 0 + + set envlist "{$env1 1} {$env2 2} {$dcenv 3} {$fcenv 4}" + process_msgs $envlist + + # Verify that after processing the startup messages, the + # new client has no database and unmatched logs. + set dbname "test.db" + error_check_bad clientdb [file exists $freshcldir/$dbname] 1 + rep_verify $env2dir $env2 $freshcldir $fcenv 0 0 1 NULL + + puts "\tRep$tnum.h: Sync delayed client" + error_check_good rep_sync [$fcenv rep_sync] 0 + process_msgs $envlist + # + # Delayed client should be the same now. + # + rep_verify $env2dir $env2 $freshcldir $fcenv 0 1 1 + + puts "\tRep$tnum.i: Closing" + error_check_good env1_close [$env1 close] 0 + error_check_good env2_close [$env2 close] 0 + error_check_good dc_close [$dcenv close] 0 + error_check_good fc_close [$fcenv close] 0 + replclose $testdir/MSGQUEUEDIR + set testdir $orig_tdir + return +} diff --git a/test/tcl/rep050.tcl b/test/tcl/rep050.tcl new file mode 100644 index 00000000..77f9a2be --- /dev/null +++ b/test/tcl/rep050.tcl @@ -0,0 +1,369 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep050 +# TEST Replication and delay syncing clients - change master test. +# TEST +# TEST Open and start up master and 4 clients. Turn on delay for 3 clients. +# TEST Switch masters, add data and verify delayed clients are out of date. +# TEST Make additional changes to master. And change masters again. +# TEST Sync/update delayed client and verify. The 4th client is a brand +# TEST new delayed client added in to test the non-verify path. +# TEST +# TEST Then test two different things: +# TEST 1. Swap master again while clients are still delayed. +# TEST 2. Swap master again while sync is proceeding for one client. +# +proc rep050 { method { niter 10 } { tnum "050" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 5] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($r): Replication and\ + ($method) delayed sync-up $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client 0 logs are [lindex $l 1]" + puts "Rep$tnum: Delay Client 1 logs are [lindex $l 2]" + puts "Rep$tnum: Delay Client 2 logs are [lindex $l 3]" + puts "Rep$tnum: Delay Client 3 logs are [lindex $l 4]" + rep050_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep050_sub { method niter tnum logset recargs largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + set orig_tdir $testdir + + replsetup $testdir/MSGQUEUEDIR + + set env1dir $testdir/MASTERDIR + set env2dir $testdir/CLIENTDIR + set delaycldir1 $testdir/DELAYCLDIR.1 + set delaycldir2 $testdir/DELAYCLDIR.2 + set delaycldir3 $testdir/DELAYCLDIR.3 + file mkdir $env1dir + file mkdir $env2dir + file mkdir $delaycldir1 + file mkdir $delaycldir2 + file mkdir $delaycldir3 + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set dc1_logtype [lindex $logset 2] + set dc2_logtype [lindex $logset 3] + set dc3_logtype [lindex $logset 4] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set dc1_logargs [adjust_logargs $dc1_logtype] + set dc2_logargs [adjust_logargs $dc2_logtype] + set dc3_logargs [adjust_logargs $dc3_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set dc1_txnargs [adjust_txnargs $dc1_logtype] + set dc2_txnargs [adjust_txnargs $dc2_logtype] + set dc3_txnargs [adjust_txnargs $dc3_logtype] + + # + # XXX rep050 delayed sync-up but change master: + # while client is delayed. + # while client is in the middle of delayed sync. + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $m_logargs -errpfx ENV1 $verbargs $repmemargs $privargs \ + -home $env1dir -rep_transport \[list 1 replsend\]" + set env1 [eval $ma_envcmd $recargs -rep_master] + $env1 rep_limit 0 0 + + # Open two clients + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs \ + $c_logargs -errpfx ENV2 $verbargs $repmemargs $privargs \ + -cachesize {0 2097152 2} \ + -home $env2dir -rep_transport \[list 2 replsend\]" + set env2 [eval $cl_envcmd $recargs -rep_client] + $env2 rep_limit 0 0 + + repladd 3 + set dc1_envcmd "berkdb_env_noerr -create $dc1_txnargs \ + $dc1_logargs -errpfx ENV3 $verbargs $repmemargs $privargs \ + -home $delaycldir1 -rep_transport \[list 3 replsend\]" + set dc1env [eval $dc1_envcmd $recargs -rep_client] + $dc1env rep_limit 0 0 + + repladd 4 + set dc2_envcmd "berkdb_env_noerr -create $dc2_txnargs \ + $dc2_logargs -errpfx ENV4 $verbargs $repmemargs $privargs \ + -home $delaycldir2 -rep_transport \[list 4 replsend\]" + set dc2env [eval $dc2_envcmd $recargs -rep_client] + $dc2env rep_limit 0 0 + + repladd 5 + set dc3_envcmd "berkdb_env_noerr -create $dc3_txnargs \ + $dc3_logargs -errpfx ENV5 $verbargs $repmemargs $privargs \ + -home $delaycldir3 -rep_transport \[list 5 replsend\]" + + # Bring the clients online by processing the startup messages. + # !!! + # NOTE: We set up dc3_envcmd but we do not open the env now. + # Therefore dc3env is not part of the envlist. However, since + # we did the repladd broadcast messages will be sent to it, + # but we will replclear before we start the env. + # + set envlist "{$env1 1} {$env2 2} {$dc1env 3} {$dc2env 4}" + process_msgs $envlist + + puts "\tRep$tnum.a: Run rep_test in master env." + set start 0 + eval rep_test $method $env1 NULL $niter $start $start 0 $largs + + process_msgs $envlist + + puts "\tRep$tnum.b: Set delayed sync on clients 2 and 3" + error_check_good set_delay [$dc1env rep_config {delayclient on}] 0 + error_check_good set_delay [$dc2env rep_config {delayclient on}] 0 + + set oplist { "delayed" "syncing" } + + set masterenv $env1 + set mid 1 + set mdir $env1dir + set clientenv $env2 + set cid 2 + set cdir $env2dir + foreach op $oplist { + # Swap all the info we need. + set tmp $masterenv + set masterenv $clientenv + set clientenv $tmp + + set tmp $mdir + set mdir $cdir + set cdir $mdir + + set tmp $mid + set mid $cid + set cid $tmp + + puts "\tRep$tnum.c: Swap master/client ($op)" + error_check_good downgrade [$clientenv rep_start -client] 0 + error_check_good upgrade [$masterenv rep_start -master] 0 + process_msgs $envlist + + # + # !!! + # At this point, clients 2 and 3 should have DELAY set. + # We should # probably add a field to rep_stat + # to indicate that and test that here.. + # + puts "\tRep$tnum.d: Run rep_test in new master env" + set start [expr $start + $niter] + eval rep_test $method $env2 NULL $niter $start $start 0 $largs + process_msgs $envlist + + # + # Delayed clients should be different. + # Former master should by synced. + # + rep_verify $mdir $masterenv $cdir $clientenv 0 1 1 + rep_verify $mdir $masterenv $delaycldir1 $dc1env 0 0 0 + rep_verify $mdir $masterenv $delaycldir2 $dc2env 0 0 0 + + # + # Run rep_test again, but don't process on former master. + # This makes the master/client different from each other. + # + puts "\tRep$tnum.e: Run rep_test in new master env only" + set start [expr $start + $niter] + eval rep_test \ + $method $masterenv NULL $niter $start $start 0 $largs + replclear $cid + replclear 3 + replclear 4 + replclear 5 + + puts "\tRep$tnum.f: Start 4th, clean delayed client." + set dc3env [eval $dc3_envcmd $recargs -rep_client] + error_check_good client4_env [is_valid_env $dc3env] TRUE + $dc3env rep_limit 0 0 + error_check_good set_delay [$dc3env rep_config \ + {delayclient on}] 0 + set envlist "{$env1 1} {$env2 2} {$dc1env 3} \ + {$dc2env 4} {$dc3env 5}" + process_msgs $envlist + + # + # Now we have a master at point 1, a former master, + # now client at point 2, and two delayed clients at point 3. + # If 'delayed' swap masters now, while the clients are + # in the delayed state but not syncing yet. + # If 'syncing', first call rep_sync, and begin syncing the + # clients, then swap masters in the middle of that process. + # + set nextlet "g" + if { $op == "delayed" } { + # Swap all the info we need. + set tmp $masterenv + set masterenv $clientenv + set clientenv $tmp + + set tmp $mdir + set mdir $cdir + set cdir $mdir + + set tmp $mid + set mid $cid + set cid $tmp + + puts "\tRep$tnum.g: Swap master/client while delayed" + set nextlet "h" + error_check_good downgrade \ + [$clientenv rep_start -client] 0 + error_check_good upgrade \ + [$masterenv rep_start -master] 0 + process_msgs $envlist + } + puts "\tRep$tnum.$nextlet: Run rep_test and sync delayed client" + set start [expr $start + $niter] + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + process_msgs $envlist + error_check_good rep_sync [$dc1env rep_sync] 0 + error_check_good rep_sync [$dc3env rep_sync] 0 + if { $op == "syncing" } { + # + # Process messages twice to get us into syncing, + # but not enough to complete it. Then swap. + # + set nproced [proc_msgs_once $envlist NONE err] + set nproced [proc_msgs_once $envlist NONE err] + + # Swap all the info we need. + set tmp $masterenv + set masterenv $clientenv + set clientenv $tmp + + set tmp $mdir + set mdir $cdir + set cdir $mdir + + set tmp $mid + set mid $cid + set cid $tmp + + puts "\tRep$tnum.h: Swap master/client while syncing" + error_check_good downgrade \ + [$clientenv rep_start -client] 0 + error_check_good upgrade \ + [$masterenv rep_start -master] 0 + } + # + # Now process all messages and verify. + # + puts "\tRep$tnum.i: Process all messages and verify." + process_msgs $envlist + + # + # If we swapped during the last syncing, we need to call + # rep_sync again because the master changed again. + # + if { $op == "syncing" } { + error_check_good rep_sync [$dc1env rep_sync] 0 + error_check_good rep_sync [$dc3env rep_sync] 0 + process_msgs $envlist + } + + # + # Delayed client should be the same now. + # + rep_verify $mdir $masterenv $delaycldir1 $dc1env 0 1 1 + rep_verify $mdir $masterenv $delaycldir3 $dc3env 0 1 1 + rep_verify $mdir $masterenv $delaycldir2 $dc2env 0 0 0 + error_check_good dc3_close [$dc3env close] 0 + env_cleanup $delaycldir3 + set envlist "{$env1 1} {$env2 2} {$dc1env 3} {$dc2env 4}" + + } + puts "\tRep$tnum.j: Sync up 2nd delayed client and verify." + error_check_good rep_sync [$dc2env rep_sync] 0 + process_msgs $envlist + rep_verify $mdir $masterenv $delaycldir2 $dc2env 0 1 1 + + puts "\tRep$tnum.k: Closing" + error_check_good env1_close [$env1 close] 0 + error_check_good env2_close [$env2 close] 0 + error_check_good dc1_close [$dc1env close] 0 + error_check_good dc2_close [$dc2env close] 0 + replclose $testdir/MSGQUEUEDIR + set testdir $orig_tdir + return +} diff --git a/test/tcl/rep051.tcl b/test/tcl/rep051.tcl new file mode 100644 index 00000000..b3bd2fca --- /dev/null +++ b/test/tcl/rep051.tcl @@ -0,0 +1,251 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep051 +# TEST Test of compaction with replication. +# TEST +# TEST Run rep_test in a replicated master environment. +# TEST Delete a large number of entries and compact with -freespace. +# TEST Propagate the changes to the client and make sure client and +# TEST master match. + +proc rep051 { method { niter 1000 } { tnum "051" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Compaction is an option for btree and recno databases only. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_btree $method] == 1 || \ + [is_recno $method] == 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_hash $method] == 1 || + [is_queue $method] == 1 || [is_heap $method] == 1} { + puts "Skipping test$tnum for method $method." + return + } + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run tests with and without recovery. If we're doing testing + # of in-memory logging, skip the combination of recovery + # and in-memory logging -- it doesn't make sense. + set logsets [create_logsets 2] + set saved_args $args + + foreach recopt $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $recopt == "-recover" && $logindex != -1 } { + puts "Skipping test \ + with -recover for in-memory logs." + continue + } + set envargs "" + set args $saved_args + puts "Rep$tnum: Replication with compaction\ + ($method $recopt) $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep051_sub $method \ + $niter $tnum $envargs $l $recopt $args + } + } +} + +proc rep051_sub { method niter tnum envargs logset recargs largs } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + set verify_subset \ + [expr { $m_logtype == "in-memory" || $c_logtype == "in-memory" }] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. Adjust the args for master + # and client. + # This test has a long transaction, allocate a larger log + # buffer for in-memory test. + set m_logargs [adjust_logargs $m_logtype [expr 2 * [expr 1024 * 1024]]] + set c_logargs [adjust_logargs $c_logtype [expr 2 * [expr 1024 * 1024]]] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create $verbargs $privargs \ + -log_max 1000000 $envargs $m_logargs $recargs $repmemargs \ + -home $masterdir -errpfx MASTER $m_txnargs -rep_master \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M)] + + # Open a client + repladd 2 + set env_cmd(C) "berkdb_env_noerr -create $verbargs $privargs \ + -log_max 1000000 $envargs $c_logargs $recargs $repmemargs \ + -home $clientdir -errpfx CLIENT $c_txnargs -rep_client \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $env_cmd(C)] + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Explicitly create the db handle so we can do deletes, + # and also to make the page size small. + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + + set omethod [convert_method $method] + set db [eval {berkdb_open_noerr -env $masterenv -auto_commit\ + -pagesize 512 -create -mode 0644} $largs $omethod $dbname] + error_check_good db_open [is_valid_db $db] TRUE + + # Run rep_test in the master and update client. + puts "\tRep$tnum.a:\ + Running rep_test in replicated env ($envargs $recargs)." + + eval rep_test $method $masterenv $db $niter 0 0 0 $largs + process_msgs $envlist + + # Verify that contents match. + puts "\tRep$tnum.b: Verifying client database contents." + rep_verify $masterdir $masterenv\ + $clientdir $clientenv $verify_subset 1 1 + + # Delete most entries. Since some of our methods renumber, + # delete starting at $niter and working down to 0. + puts "\tRep$tnum.c: Remove most entries, by cursor." + set count [expr $niter - 1] + set n 20 + set t [$masterenv txn] + error_check_good txn [is_valid_txn $t $masterenv] TRUE + set txn "-txn $t" + + set dbc [eval {$db cursor} $txn] + + # Leave every nth item. + set dbt [$dbc get -first] + while { $count > 0 } { + if { [expr $count % $n] != 0 } { + error_check_good dbc_del [$dbc del] 0 + } + set dbt [$dbc get -next] + incr count -1 + } + + error_check_good dbc_close [$dbc close] 0 + error_check_good t_commit [$t commit] 0 + + # Open read-only handle on client, so we can call $db stat. + set client_db \ + [eval {berkdb_open_noerr} -env $clientenv -rdonly $dbname] + error_check_good client_open [is_valid_db $client_db] TRUE + + # Check database size on both client and master. + process_msgs $envlist + set master_pages_before [stat_field $db stat "Page count"] + set client_pages_before [stat_field $client_db stat "Page count"] + error_check_good \ + pages_match_before $client_pages_before $master_pages_before + + # Compact database. + puts "\tRep$tnum.d: Compact database." + set t [$masterenv txn] + error_check_good txn [is_valid_txn $t $masterenv] TRUE + set txn "-txn $t" + + set ret [eval {$db compact} $txn {-freespace}] + + error_check_good t_commit [$t commit] 0 + error_check_good db_sync [$db sync] 0 + + # There will be fewer pages in use after the compact -freespace call. + set master_pages_after [stat_field $db stat "Page count"] + set page_reduction [expr $master_pages_before - $master_pages_after] + error_check_good page_reduction [expr $page_reduction > 0] 1 + + # Process messages so the client sees the reduction in pages used. + process_msgs $envlist + + set client_pages_after [stat_field $client_db stat "Page count"] + error_check_good \ + pages_match_after $client_pages_after $master_pages_after + + # Close client handle. + error_check_good client_handle [$client_db close] 0 + + # Reverify. + puts "\tRep$tnum.b: Verifying client database contents." + rep_verify $masterdir $masterenv\ + $clientdir $clientenv $verify_subset 1 1 + + # Clean up. + error_check_good db_close [$db close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep052.tcl b/test/tcl/rep052.tcl new file mode 100644 index 00000000..e8184585 --- /dev/null +++ b/test/tcl/rep052.tcl @@ -0,0 +1,238 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep052 +# TEST Test of replication with NOWAIT. +# TEST +# TEST One master, one client. After initializing +# TEST everything normally, close client and let the +# TEST master get ahead -- far enough that the master +# TEST no longer has the client's last log file. +# TEST Reopen the client and turn on NOWAIT. +# TEST Process a few messages to get the client into +# TEST recovery mode, and verify that lockout occurs +# TEST on a txn API call (txn_begin) and an env API call. +# TEST Process all the messages and verify that lockout +# TEST is over. + +proc rep052 { method { niter 200 } { tnum "052" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set saved_args $args + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set logsets [create_logsets 2] + set saved_args $args + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery. Skip + # recovery with in-memory logging - it doesn't make sense. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + set envargs "" + set args $saved_args + puts "Rep$tnum ($method $envargs $r $args):\ + Test lockouts with REP_NOWAIT\ + $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep052_sub $method $niter $tnum $envargs \ + $l $r $args + } + } +} + +proc rep052_sub { method niter tnum envargs logset recargs largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $verbargs \ + $repmemargs $privargs \ + $m_logargs -log_max $log_max $envargs -errpfx MASTER \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + $masterenv rep_limit 0 0 + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $verbargs \ + $repmemargs $privargs \ + $c_logargs -log_max $log_max $envargs -errpfx CLIENT \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + $clientenv rep_limit 0 0 + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $masterenv test force noarchive_timeout + + # Run rep_test in the master (and update client). + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + # Find out what exists on the client before closing. We'll need + # to loop until the first master log file > last client log file. + set last_client_log [get_logfile $clientenv last] + + # Create a gap requiring internal initialization. + set flags "" + set cid 2 + set dbhandle NULL + set start [push_master_ahead $method $masterenv $masterdir $m_logtype \ + $clientenv $cid $dbhandle $start $niter $flags $largs] + + puts "\tRep$tnum.e: Reopen client." + env_cleanup $clientdir + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + $clientenv rep_limit 0 0 + set envlist "{$masterenv 1} {$clientenv 2}" + + # Turn on nowait. + $clientenv rep_config {nowait on} + + # Process messages a few times, just enough to get client + # into lockout/recovery mode, but not enough to complete recovery. + set iter 3 + for { set i 0 } { $i < $iter } { incr i } { + set nproced [proc_msgs_once $envlist NONE err] + } + + puts "\tRep$tnum.f: Verify we are locked out of txn API calls." + if { [catch { set txn [$clientenv txn] } res] } { + error_check_good txn_lockout [is_substr $res "DB_REP_LOCKOUT"] 1 + } else { + error "FAIL:[timestamp] Not locked out of txn API calls." + } + + puts "\tRep$tnum.g: Verify we are locked out of env API calls." + if { [catch { set stat [$clientenv lock_stat] } res] } { + error_check_good env_lockout [is_substr $res "DB_REP_LOCKOUT"] 1 + } else { + error "FAIL:[timestamp] Not locked out of env API calls." + } + + # Now catch up and make sure we're not locked out anymore. + process_msgs $envlist + + puts "\tRep$tnum.h: No longer locked out of txn API calls." + if { [catch { set txn [$clientenv txn] } res] } { + puts "FAIL: unable to start txn: $res" + } else { + error_check_good txn_no_lockout [$txn commit] 0 + } + + puts "\tRep$tnum.i: No longer locked out of env API calls." + if { [catch { set stat [$clientenv rep_stat] } res] } { + puts "FAIL: unable to make env call: $res" + } + + puts "\tRep$tnum.h: Verify logs and databases" + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep053.tcl b/test/tcl/rep053.tcl new file mode 100644 index 00000000..ef8047cc --- /dev/null +++ b/test/tcl/rep053.tcl @@ -0,0 +1,235 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep053 +# TEST Replication and basic client-to-client synchronization. +# TEST +# TEST Open and start up master and 1 client. +# TEST Start up a second client later and verify it sync'ed from +# TEST the original client, not the master. +# +proc rep053 { method { niter 200 } { tnum "053" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 3] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. Skip recovery with in-memory + # logging - it doesn't make sense. + set throttle { "throttle" "" } + foreach r $test_recopts { + foreach t $throttle { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $r $t): Replication\ + and client-to-client sync up\ + $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + puts "Rep$tnum: Client2 logs are [lindex $l 2]" + rep053_sub $method $niter $tnum $l $r $t $args + } + } + } +} + +proc rep053_sub { method niter tnum logset recargs throttle largs } { + global anywhere + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + set orig_tdir $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set delaycldir1 $testdir/DELAYCLDIR.1 + file mkdir $masterdir + file mkdir $clientdir + file mkdir $delaycldir1 + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set c2_logtype [lindex $logset 2] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set c2_logargs [adjust_logargs $c2_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $m_logargs -errpfx MASTER $verbargs $repmemargs $privargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open two clients + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs \ + $c_logargs -errpfx CLIENT $verbargs $repmemargs $privargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # If throttling is specified, turn it on here. Throttle the + # client, since this is a test of client-to-client sync-up. + if { $throttle == "throttle" } { + error_check_good \ + throttle [$clientenv rep_limit 0 [expr 8 * 1024]] 0 + } + + # + # Set up delayed client command, but don't eval until later. + # !!! Do NOT put the 'repladd' call here because we don't + # want this client to already have the backlog of records + # when it starts. + # + set dc1_envcmd "berkdb_env_noerr -create $c2_txnargs \ + $c2_logargs -errpfx DELAYCL $verbargs $repmemargs $privargs \ + -home $delaycldir1 -rep_transport \[list 3 replsend\]" + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + puts "\tRep$tnum.a: Run rep_test in master env." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + puts "\tRep$tnum.b: Start new client." + set anywhere 1 + repladd 3 + set newclient [eval $dc1_envcmd $recargs -rep_client] + error_check_good client2_env [is_valid_env $newclient] TRUE + + set envlist "{$masterenv 1} {$clientenv 2} {$newclient 3}" + process_msgs $envlist + + puts "\tRep$tnum.c: Verify sync-up from client." + set req [stat_field $clientenv rep_stat "Client service requests"] + set miss [stat_field $clientenv rep_stat "Client service req misses"] + set rereq [stat_field $newclient rep_stat "Client rerequests"] + + # To complete the internal init, we need a PAGE_REQ and a LOG_REQ. These + # requests get served by $clientenv. Since the end-of-range specified + # in the LOG_REQ points to the very end of the log (i.e., the LSN given + # in the NEWMASTER message), the serving client gets NOTFOUND in its log + # cursor reading loop, and can't tell whether it simply hit the end, or + # is really missing sufficient log records to fulfill the request. So + # it counts a "miss" and generates a rerequest. When internal init + # finishes recovery, it sends an ALL_REQ, for a total of 3 requests in + # the simple case, and more than 3 in the "throttle" case. + # + + set expected_msgs 3 + if { [is_queue $method] } { + # Queue database require an extra request + # to retrieve the meta page. + incr expected_msgs + } + + if { $throttle == "throttle" } { + error_check_good req [expr $req > $expected_msgs] 1 + } else { + error_check_good min_req [expr $req >= $expected_msgs] 1 + set max_expected_msgs [expr $expected_msgs * 2] + error_check_good max_req [expr $req <= $max_expected_msgs] 1 + } + error_check_good miss=rereq $miss $rereq + + # Check for throttling. + if { $throttle == "throttle" } { + set num_throttles \ + [stat_field $clientenv rep_stat "Transmission limited"] + error_check_bad client_throttling $num_throttles 0 + } + + rep_verify $masterdir $masterenv $clientdir $clientenv 0 1 1 + + # Process messages again in case we are running with debug_rop. + process_msgs $envlist + rep_verify $masterdir $masterenv $delaycldir1 $newclient 0 1 1 + + puts "\tRep$tnum.d: Run rep_test more in master env and verify." + set niter 10 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + rep_verify $masterdir $masterenv $clientdir $clientenv 0 1 1 + process_msgs $envlist + rep_verify $masterdir $masterenv $delaycldir1 $newclient 0 1 1 + + puts "\tRep$tnum.e: Closing" + error_check_good master_close [$masterenv close] 0 + error_check_good client_close [$clientenv close] 0 + error_check_good dc1_close [$newclient close] 0 + replclose $testdir/MSGQUEUEDIR + set testdir $orig_tdir + set anywhere 0 + return +} diff --git a/test/tcl/rep054.tcl b/test/tcl/rep054.tcl new file mode 100644 index 00000000..36beb0f7 --- /dev/null +++ b/test/tcl/rep054.tcl @@ -0,0 +1,259 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep054 +# TEST Test of internal initialization where a far-behind +# TEST client takes over as master. +# TEST +# TEST One master, two clients. +# TEST Run rep_test and process. +# TEST Close client 1. +# TEST Run rep_test, opening new databases, and processing +# TEST messages. Archive as we go so that log files get removed. +# TEST Close master and reopen client 1 as master. Process messages. +# TEST Verify that new master and client are in sync. +# TEST Run rep_test again, adding data to one of the new +# TEST named databases. + +proc rep054 { method { nentries 200 } { tnum "054" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + # Skip this test for named in-memory databases; it tries + # to close and re-open envs, which just won't work. + if { $databases_in_memory } { + puts "Skipping Rep$tnum for in-memory databases." + return + } + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 3] + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. Skip recovery with in-memory + # logging - it doesn't make sense. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $r $args): Internal\ + initialization test: far-behind client\ + becomes master $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + puts "Rep$tnum: Client2 logs are [lindex $l 2]" + + rep054_sub $method $nentries $tnum $l $r $args + } + } +} + +proc rep054_sub { method nentries tnum logset recargs largs } { + global testdir + global util_path + global errorInfo + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + set omethod [convert_method $method] + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set c2_logtype [lindex $logset 2] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set c2_logargs [adjust_logargs $c2_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $m_logargs -log_max $log_max $verbargs $repmemargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + error_check_good master_env [is_valid_env $masterenv] TRUE + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs \ + $c_logargs -log_max $log_max $verbargs $repmemargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + + # Open 2nd client + repladd 3 + set cl2_envcmd "berkdb_env_noerr -create $c2_txnargs \ + $c2_logargs -log_max $log_max $verbargs $repmemargs \ + -home $clientdir2 -rep_transport \[list 3 replsend\]" + set clientenv2 [eval $cl2_envcmd $recargs -rep_client] + error_check_good client2_env [is_valid_env $clientenv2] TRUE + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2} {$clientenv2 3}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $masterenv test force noarchive_timeout + + # Run rep_test in the master and in each client. + puts "\tRep$tnum.a: Running rep_test in master & clients." + set start 0 + eval rep_test $method $masterenv NULL $nentries $start $start 0 $largs + incr start $nentries + process_msgs $envlist + + # Master is in sync with both clients. + rep_verify $masterdir $masterenv $clientdir $clientenv + + # Process messages again in case we are running with debug_rop. + process_msgs $envlist + rep_verify $masterdir $masterenv $clientdir2 $clientenv2 + + # Create a gap requiring internal initialization. This is the + # standard gap between the master and the first client -- we will + # make a much larger gap after we've created a new database that + # the first client doesn't know about. + # + set flags "" + set cid 2 + set dbhandle NULL + set start [push_master_ahead $method $masterenv $masterdir $m_logtype \ + $clientenv $cid $dbhandle $start $nentries $flags $largs] + + # Create a database that does not even exist on closed client 1. + set newfile "newtest.db" + set newdb [eval {berkdb_open_noerr -env $masterenv -create \ + -auto_commit -mode 0644} $largs $omethod $newfile] + error_check_good newdb_open [is_valid_db $newdb] TRUE + eval rep_test $method $masterenv $newdb $nentries $start $start 0 $largs + set start [expr $start + $nentries] + set envlist "{$masterenv 1} {$clientenv2 3}" + process_msgs $envlist + + # Identify last master log file. + set last_master_log [get_logfile $masterenv last] + set stop 0 + + # Send the master and client2 far ahead of client 1. Archive + # so there will be a gap between the log files of the closed + # client and the active master and client and we've + # archived away the creation of the new database. + puts "\tRep$tnum.e: Running rep_test in master & remaining client." + while { $stop == 0 } { + + eval rep_test \ + $method $masterenv NULL $nentries $start $start 0 $largs + incr start $nentries + + process_msgs $envlist + + puts "\tRep$tnum.f: Send master ahead of closed client." + if { $m_logtype != "in-memory" } { + $masterenv log_flush + set res [eval exec $util_path/db_archive -d -h $masterdir] + } + if { $c2_logtype != "in-memory" } { + $clientenv2 log_flush + set res [eval exec $util_path/db_archive -d -h $clientdir2] + } + set first_master_log [get_logfile $masterenv first] + if { $first_master_log > $last_master_log } { + set stop 1 + } + } + process_msgs $envlist + + # Master is in sync with client 2. + rep_verify $masterdir $masterenv $clientdir2 $clientenv2 1 + + # Close master. + puts "\tRep$tnum.g: Close master." + error_check_good newdb_close [$newdb close] 0 + error_check_good close_master [$masterenv close] 0 + + # The new database is still there. + error_check_good newfile_exists [file exists $masterdir/$newfile] 1 + + puts "\tRep$tnum.h: Reopen client1 as master." + replclear 2 + set newmasterenv [eval $cl_envcmd $recargs -rep_master] + error_check_good newmasterenv [is_valid_env $newmasterenv] TRUE + + # Force something into the log + $newmasterenv txn_checkpoint -force + + puts "\tRep$tnum.i: Reopen master as client." + set oldmasterenv [eval $ma_envcmd $recargs -rep_client] + error_check_good oldmasterenv [is_valid_env $oldmasterenv] TRUE + set envlist "{$oldmasterenv 1} {$newmasterenv 2} {$clientenv2 3}" + process_msgs $envlist + + rep_verify $clientdir $newmasterenv $masterdir $oldmasterenv 1 + + error_check_good newmasterenv_close [$newmasterenv close] 0 + error_check_good oldmasterenv_close [$oldmasterenv close] 0 + error_check_good clientenv2_close [$clientenv2 close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep055.tcl b/test/tcl/rep055.tcl new file mode 100644 index 00000000..b7dd1225 --- /dev/null +++ b/test/tcl/rep055.tcl @@ -0,0 +1,227 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep055 +# TEST Test of internal initialization and log archiving. +# TEST +# TEST One master, one client. +# TEST Generate several log files. +# TEST Remove old master log files and generate several more. +# TEST Get list of archivable files from db_archive and restart client. +# TEST As client is in the middle of internal init, remove +# TEST the log files returned earlier by db_archive. +# +proc rep055 { method { niter 200 } { tnum "055" } args } { + + source ./include.tcl + global mixed_mode_logging + global databases_in_memory + global repfiles_in_memory + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + # This test is all about log archive issues, so don't run with + # in-memory logging. + if { $mixed_mode_logging > 0 } { + puts "Rep$tnum: Skipping for mixed-mode logging." + return + } + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. + set opts { clean noclean } + foreach r $test_recopts { + foreach c $opts { + puts "Rep$tnum ($method $r $c $args):\ + Test of internal initialization $msg $msg2." + rep055_sub $method $niter $tnum $r $c $args + + } + } +} + +proc rep055_sub { method niter tnum recargs opts largs } { + global testdir + global passwd + global util_path + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_buf [expr $pagesize * 2] + set log_max [expr $log_buf * 4] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create -txn nosync $verbargs \ + $repmemargs \ + -log_buffer $log_buf -log_max $log_max -errpfx MASTER \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + $masterenv rep_limit 0 0 + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create -txn nosync $verbargs \ + $repmemargs \ + -log_buffer $log_buf -log_max $log_max -errpfx CLIENT \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + $clientenv rep_limit 0 0 + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $masterenv test force noarchive_timeout + + # Run rep_test in the master (and update client). + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + # Move the master forward until its first log file has + # a higher number than the last client log file. + set m_logtype "on-disk" + set flags "" + set dbhandle NULL + set cid 2 + set start [push_master_ahead $method $masterenv $masterdir $m_logtype \ + $clientenv $cid $dbhandle $start $niter $flags $largs] + + # Now move the master forward one more log, so that we can + # have a log_archive waiting to happen. + # + set last_master_log [get_logfile $masterenv last] + + set stop 0 + puts "\tRep$tnum.e: Move master logs forward again." + while { $stop == 0 } { + # Run rep_test in the master (don't update client). + eval rep_test \ + $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + replclear 2 + + set last_log [get_logfile $masterenv last] + if { $last_log != $last_master_log } { + set stop 1 + } + } + + puts "\tRep$tnum.f: Get list of files for removal." + $masterenv log_flush + set logs [eval exec $util_path/db_archive -h $masterdir] + + puts "\tRep$tnum.g: Reopen client ($opts)." + if { $opts == "clean" } { + env_cleanup $clientdir + } + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + $clientenv rep_limit 0 0 + set envlist "{$masterenv 1} {$clientenv 2}" + # + # Process messages once to get partially through internal init. + # + proc_msgs_once $envlist NONE err + + if { $opts != "clean" } { + puts "\tRep$tnum.g.1: Trigger log request" + # + # When we don't clean, starting the client doesn't + # trigger any events. We need to generate some log + # records so that the client requests the missing + # logs and that will trigger it. + # + set entries 10 + eval rep_test \ + $method $masterenv NULL $entries $start $start 0 $largs + incr start $entries + # + # Process messages three times to get us into internal init + # but not enough to get us all the way through it. + # + proc_msgs_once $envlist NONE err + proc_msgs_once $envlist NONE err + proc_msgs_once $envlist NONE err + } + + # + # Now in the middle of internal init, remove the log files + # db_archive reported earlier. + # + foreach l $logs { + fileremove -f $masterdir/$l + } + # + # Now finish processing all the messages. + # + process_msgs $envlist 0 NONE err + + puts "\tRep$tnum.h: Verify logs and databases" + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep058.tcl b/test/tcl/rep058.tcl new file mode 100644 index 00000000..d096658c --- /dev/null +++ b/test/tcl/rep058.tcl @@ -0,0 +1,158 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep058 +# TEST +# TEST Replication with early databases +# TEST +# TEST Mimic an application where they create a database before +# TEST calling rep_start, thus writing log records on a client +# TEST before it is a client. Verify we cannot join repl group. + +proc rep058 { method { tnum "058" } args } { + + source ./include.tcl + global repfiles_in_memory + global env_private + + # There should be no difference with methods. Just use btree. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep058: Skipping for method $method." + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping test with -recover for\ + in-memory logs." + continue + } + + puts "Rep$tnum ($method $r): Replication with \ + pre-created databases $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep058_sub $method $tnum $l $r $args + } + } +} + +proc rep058_sub { method tnum logset recargs largs } { + source ./include.tcl + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + set orig_tdir $testdir + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. Adjust the args for master + # and client. + set m_logargs [adjust_logargs $m_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + set omethod [convert_method $method] + + # Open a master. + repladd 1 + set envcmd(M) "berkdb_env_noerr -create $m_txnargs \ + $m_logargs -lock_detect default $verbargs $repmemargs \ + $privargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set menv [eval $envcmd(M) $recargs] + + # Open a client + repladd 2 + set envcmd(C) "berkdb_env_noerr -create $c_txnargs \ + $c_logargs -lock_detect default $verbargs $repmemargs \ + $privargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set cenv [eval $envcmd(C) $recargs] + error_check_good client_env [is_valid_env $cenv] TRUE + + puts "\tRep$tnum.a: Create same database in both envs." + set dbname "test.db" + set mdb [eval {berkdb_open_noerr -env $menv -create \ + -auto_commit -mode 0644} -btree $dbname] + error_check_good open [is_valid_db $mdb] TRUE + set cdb [eval {berkdb_open_noerr -env $cenv -create \ + -auto_commit -mode 0644} -btree $dbname] + error_check_good open [is_valid_db $cdb] TRUE + + puts "\tRep$tnum.b: Start master and client now." + error_check_good master [$menv rep_start -master] 0 + error_check_good client [$cenv rep_start -client] 0 + # + # We'll only catch this error if we turn off autoinit. + # Otherwise, the system will throw away everything on the + # client and resync. + # + $cenv rep_config {autoinit off} + + set envlist "{$menv 1} {$cenv 2}" + process_msgs $envlist 0 NONE err + error_check_good msg_err [is_substr $err "REP_JOIN_FAILURE"] 1 + + puts "\tRep$tnum.c: Clean up." + error_check_good cdb_close [$cdb close] 0 + error_check_good cdb_close [$mdb close] 0 + + error_check_good menv_close [$menv close] 0 + error_check_good cenv_close [$cenv close] 0 + + replclose $testdir/MSGQUEUEDIR + set testdir $orig_tdir + return +} + diff --git a/test/tcl/rep060.tcl b/test/tcl/rep060.tcl new file mode 100644 index 00000000..168fa708 --- /dev/null +++ b/test/tcl/rep060.tcl @@ -0,0 +1,349 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep060 +# TEST Test of normally running clients and internal initialization. +# TEST Have a client running normally, but slow/far behind the master. +# TEST Then the master checkpoints and archives, causing the client +# TEST to suddenly be thrown into internal init. This test tests +# TEST that we clean up the old files/pages in mpool and dbreg. +# TEST Also test same thing but the app holding an open dbp as well. +# +proc rep060 { method { niter 200 } { tnum "060" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Run for btree and queue only. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_btree $method] == 1 || \ + [is_queue $method] == 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_btree $method] != 1 && [is_queue $method] != 1 } { + puts "Skipping rep060 for method $method." + return + } + + set args [convert_args $method $args] + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. Skip recovery with in-memory + # logging - it doesn't make sense. + # + # 'user' means that the "app" (the test in this case) has + # its own handle open to the database. + set opts { "" user } + foreach r $test_recopts { + foreach o $opts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $r $o $args):\ + Test of internal initialization and\ + slow client $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep060_sub $method $niter $tnum $l $r $o $args + } + } + } +} + +proc rep060_sub { method niter tnum logset recargs opt largs } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 4] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $repmemargs \ + $m_logargs -log_max $log_max -errpfx MASTER $verbargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + puts "\tRep$tnum.a: Open client." + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs \ + $repmemargs \ + $c_logargs -log_max $log_max -errpfx CLIENT $verbargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init. + # + $masterenv test force noarchive_timeout + + # Set a low limit so that there are lots of reps between + # master and client. This allows greater control over + # the test. + error_check_good thr [$masterenv rep_limit 0 [expr 10 * 1024]] 0 + + # It is *key* to this test that we have a database handle + # open for the duration of the test. The problem this + # test checks for regards internal init when there are open + # database handles around. + # + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + + set omethod [convert_method $method] + set db [eval {berkdb_open_noerr -env $masterenv -auto_commit \ + -create -mode 0644} $largs $omethod $dbname] + error_check_good dbopen [is_valid_db $db] TRUE + + # Put some data into the database, running the master up past + # log file 10, discarding messages to the client so that it will + # be forced to request them as a gap. + # + puts "\tRep$tnum.c: Run rep_test in master env." + set start 0 + + set stop 0 + set endlog 10 + while { $stop == 0 } { + # Run test in the master (don't update client). + eval rep_test $method \ + $masterenv $db $niter $start $start 0 $largs + incr start $niter + replclear 2 + + if { $m_logtype != "in-memory" } { + $masterenv log_flush + set res \ + [eval exec $util_path/db_archive -l -h $masterdir] + } + # Make sure the master has gone as far as we requested. + set last_master_log [get_logfile $masterenv last] + if { $last_master_log > $endlog } { + set stop 1 + } + } + + # Do one more set of txns at the master, replicating log records + # normally, to give the client a chance to notice how many messages + # it is missing. + # + eval rep_test $method $masterenv $db $niter $start $start 0 $largs + incr start $niter + + set stop 0 + set client_endlog 5 + set last_client_log 0 + set nproced 0 + incr nproced [proc_msgs_once $envlist NONE err] + incr nproced [proc_msgs_once $envlist NONE err] + + puts "\tRep$tnum.d: Client catches up partway." + error_check_good ckp [$masterenv txn_checkpoint] 0 + + # We have checkpointed on the master, but we want to get the + # client a healthy way through the logs before archiving on + # the master. + while { $stop == 0 } { + set nproced 0 + incr nproced [proc_msgs_once $envlist NONE err] + if { $nproced == 0 } { + error_check_good \ + ckp [$masterenv txn_checkpoint -force] 0 + } + + # Stop processing when the client is partway through. + if { $c_logtype != "in-memory" } { + $clientenv log_flush + set res \ + [eval exec $util_path/db_archive -l -h $clientdir] + } + set last_client_log [get_logfile $clientenv last] + set first_client_log [get_logfile $clientenv first] + if { $last_client_log > $client_endlog } { + set stop 1 + } + } + + # + # The user may have the database open itself. + # + if { $opt == "user" } { + set cdb [eval {berkdb_open_noerr -env} $clientenv $dbname] + error_check_good dbopen [is_valid_db $cdb] TRUE + set ccur [$cdb cursor] + error_check_good curs [is_valid_cursor $ccur $cdb] TRUE + set ret [$ccur get -first] + set kd [lindex $ret 0] + set key [lindex $kd 0] + error_check_good cclose [$ccur close] 0 + } else { + set cdb NULL + } + + # Now that the client is well on its way of normal processing, + # simply fairly far behind the master, archive on the master, + # removing the log files the client needs, sending it into + # internal init with the database pages reflecting the client's + # current LSN. + # + puts "\tRep$tnum.e: Force internal initialization." + if { $m_logtype != "in-memory" } { + puts "\tRep$tnum.e1: Archive on master." + $masterenv log_flush + set res [eval exec $util_path/db_archive -d -h $masterdir] + } else { + # Master is in-memory, and we'll need a different + # technique to create the gap forcing internal init. + puts "\tRep$tnum.e1: Run rep_test until gap is created." + set stop 0 + while { $stop == 0 } { + eval rep_test $method $masterenv \ + NULL $niter $start $start 0 $largs + incr start $niter + set first_master_log [get_logfile $masterenv first] + if { $first_master_log > $last_client_log } { + set stop 1 + } + } + } + + puts "\tRep$tnum.f: Process messages." + if { $opt == "user" } { + for { set loop 0 } { $loop < 5 } { incr loop } { + set nproced 0 + incr nproced [proc_msgs_once $envlist] + if { $cdb == "NULL" } { + continue + } + puts "\tRep$tnum.g.$loop: Check user database." + set status [catch {$cdb get $key} ret] + if { $status != 0 } { + # + # For db operations, DB doesn't block, but + # returns DEADLOCK. + # + set is_lock [is_substr $ret DB_LOCK_DEADLOCK] + set is_dead [is_substr $ret DB_REP_HANDLE_DEAD] + error_check_good lock_dead \ + [expr $is_lock || $is_dead] 1 + if { $is_dead } { + error_check_good cclose [$cdb close] 0 + set cdb NULL + } + } + } + } + process_msgs $envlist + + # + # If we get through the user loop with a valid db, then it better + # be a dead handle after we've completed processing all the + # messages and running recovery. + # + if { $cdb != "NULL" } { + puts "\tRep$tnum.h: Check dead handle." + set status [catch {$cdb get $key} ret] + error_check_good status $status 1 + error_check_good is_dead [is_substr $ret DB_REP_HANDLE_DEAD] 1 + error_check_good cclose [$cdb close] 0 + puts "\tRep$tnum.i: Verify correct internal initialization." + } else { + puts "\tRep$tnum.h: Verify correct internal initialization." + } + error_check_good close [$db close] 0 + process_msgs $envlist + + # We have now forced an internal initialization. Verify it is correct. + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + + # Check that logs are in-memory or on-disk as expected. + check_log_location $masterenv + check_log_location $clientenv + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep061.tcl b/test/tcl/rep061.tcl new file mode 100644 index 00000000..bb067526 --- /dev/null +++ b/test/tcl/rep061.tcl @@ -0,0 +1,458 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep061 +# TEST Test of internal initialization multiple files and pagesizes +# TEST with page gaps. +# TEST +# TEST One master, one client. +# TEST Generate several log files. +# TEST Remove old master log files. +# TEST Delete client files and restart client. +# TEST Put one more record to the master. +# TEST Force some page messages to get dropped. +# +proc rep061 { method { niter 500 } { tnum "061" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Run for btree and queue only. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_btree $method] == 1 || [is_queue $method] == 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_btree $method] != 1 && [is_queue $method] != 1 } { + puts "Skipping rep061 for method $method." + return + } + set args [convert_args $method $args] + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. + set opts { noclean clean bulk } + # Try varying drop percentages. + set dpct { 10 5 } + foreach r $test_recopts { + foreach c $opts { + foreach l $logsets { + foreach d $dpct { + set logindex [lsearch -exact $l \ + "in-memory"] + if { $r == "-recover" && \ + $logindex != -1 } { + puts "Skipping rep$tnum \ + for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $r $c):\ + Internal initialization - with\ + $d pct page gaps, $msg $msg2." + puts "Rep$tnum: Master logs are \ + [lindex $l 0]" + puts "Rep$tnum: Client logs are \ + [lindex $l 1]" + rep061_sub $method $niter $tnum \ + $l $r $c $d $args + } + } + } + } +} + +proc rep061_sub { method niter tnum logset recargs opts dpct largs } { + global testdir + global util_path + global drop drop_msg + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set maxpg 16384 + set log_max [expr $maxpg * 8] + set cache [expr $maxpg * 32] + if { $repfiles_in_memory } { + set cache [expr ($maxpg * 32) + (3000 * 1024)] + } + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $verbargs \ + $repmemargs \ + -log_max $log_max -cachesize { 0 $cache 1 } -errpfx MASTER \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $verbargs \ + $repmemargs \ + -log_max $log_max -cachesize { 0 $cache 1 } -errpfx CLIENT \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # + # Since we're dropping messages, set the rerequest values + # lower so we don't wait too long to request what we're + # missing. + # + # + # Set to 200/800 usecs. An average ping to localhost should + # be a few 10s usecs. + # + set conf [berkdb getconfig] + set small_rereq 1 + if { [is_substr $conf "debug_rop"] && [is_queue $method] } { + set small_rereq 0 + } + if { $small_rereq } { + $clientenv rep_request 200 800 + } else { + $clientenv rep_request 20000 80000 + } + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $masterenv test force noarchive_timeout + + # + # Note that by setting these 2 globals below, message dropping + # is automatically enabled. By setting 'drop' to 0, further + # down in the test, we disable message dropping. + # + set drop 1 + set drop_msg [expr 100 / $dpct] + + # Run rep_test in the master (and update client). + set startpgsz 512 + set pglist "" + for { set pgsz $startpgsz } { $pgsz <= $maxpg } \ + { set pgsz [expr $pgsz * 2] } { + lappend pglist $pgsz + } + set nfiles [llength $pglist] + puts "\tRep$tnum.a.0: Running rep_test $nfiles times in replicated env." + set dbopen "" + for { set i 0 } { $i < $nfiles } { incr i } { + set mult [expr $i * 10] + set nentries [expr $niter + $mult] + set pagesize [lindex $pglist $i] + set largs " -pagesize $pagesize " + eval rep_test $method $masterenv NULL $nentries $mult $mult \ + 0 $largs + process_msgs $envlist + + # + # Everytime we run 'rep_test' we create 'test.db'. So + # rename it each time through the loop. + # + set old "test.db" + set new "test.$i.db" + + if { $databases_in_memory } { + error_check_good rename [$masterenv dbrename \ + -auto_commit "" $old $new] 0 + } else { + error_check_good rename [$masterenv dbrename \ + -auto_commit $old $new] 0 + } + process_msgs $envlist + + # + # We want to keep some databases open so that we test the + # code finding the files in the data dir as well as finding + # them in dbreg list. + # + if { [expr $i % 2 ] == 0 } { + if { $databases_in_memory } { + set db [berkdb_open_noerr -env $masterenv "" $new] + } else { + set db [berkdb_open_noerr -env $masterenv $new] + } + error_check_good dbopen.$i [is_valid_db $db] TRUE + lappend dbopen $db + } + } + # + # Set up a few special databases too. We want one with a subdatabase + # and we want an empty database. + # + if { $databases_in_memory } { + set testfile { "" "test.db" } + set emptyfile { "" "empty.db" } + } else { + set testfile "test.db" + set emptyfile "empty.db" + } + + if { [is_queue $method] } { + set sub "" + } else { + set sub "subdb" + } + set omethod [convert_method $method] + set largs " -pagesize $maxpg " + set largs [convert_args $method $largs] + + # + # Create/close an empty database. + # + set db [eval {berkdb_open_noerr -env $masterenv -auto_commit -create \ + -mode 0644} $largs $omethod $emptyfile] + error_check_good emptydb [is_valid_db $db] TRUE + error_check_good empty_close [$db close] 0 + + # If we're not using in-mem named databases, open a subdb and keep + # it open. (Do a regular db if method is queue.) + # We need it a few times later on. + # + if { $databases_in_memory } { + set db [eval {berkdb_open_noerr -env $masterenv -auto_commit\ + -create -mode 0644} $largs $omethod $testfile] + } else { + set db [eval {berkdb_open_noerr -env $masterenv -auto_commit\ + -create -mode 0644} $largs $omethod $testfile $sub] + } + error_check_good subdb [is_valid_db $db] TRUE + eval rep_test $method $masterenv $db $niter 0 0 0 $largs + process_msgs $envlist + + puts "\tRep$tnum.b: Close client." + error_check_good client_close [$clientenv close] 0 + + # + # Run rep_test in the master (don't update client). + # Need to guarantee that we will change log files during + # this run so run with the largest pagesize and double + # the number of entries. + # + puts "\tRep$tnum.c: Running rep_test ( $largs) in replicated env." + set nentries [expr $niter * 2] + eval rep_test $method $masterenv $db $nentries 0 0 0 $largs + replclear 2 + + puts "\tRep$tnum.d: Run db_archive on master." + $masterenv log_flush + set res [eval exec $util_path/db_archive -l -h $masterdir] + error_check_bad log.1.present [lsearch -exact $res log.0000000001] -1 + set res [eval exec $util_path/db_archive -d -h $masterdir] + set res [eval exec $util_path/db_archive -l -h $masterdir] + error_check_good log.1.gone [lsearch -exact $res log.0000000001] -1 + + puts "\tRep$tnum.e: Reopen client ($opts)." + if { $opts == "clean" } { + env_cleanup $clientdir + } + if { $opts == "bulk" } { + error_check_good bulk [$masterenv rep_config {bulk on}] 0 + } + + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + # + # Since we are dropping frequent messages, we set the + # rerequest rate low to make sure the test finishes. + # Only do this for non-debug_rop because that generates + # so much traffic, it hangs the test with rerequests. + # + if { $small_rereq } { + $clientenv rep_request 200 800 + } else { + $clientenv rep_request 20000 80000 + } + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist 0 NONE err + set done 0 + # + # We are done with this loop when the client has achieved + # startup_done and we've looped one more time after turning + # off dropping messages. Otherwise we might get a few + # straggling log records that don't make it over. + # + # Set a maximum iteration count because some methods can get + # into a state where if we're regularly dropping messages we + # can never catch up (queue) and we loop forever. + # + set iter 1 + set max_drop_iter 200 + if { $opts == "bulk" } { + # + # Since bulk is sending several messages at once we need to + # loop more times to allow rerequests to get through. + # + set max_drop_iter [expr $max_drop_iter * 2] + if { $small_rereq } { + $clientenv rep_request 100 400 + } else { + $clientenv rep_request 10000 40000 + } + } + while { $done == 0 } { + puts "\tRep$tnum.e.1.$iter: Trigger log request" + # + # When we don't clean, starting the client doesn't + # trigger any events. We need to generate some log + # records so that the client requests the missing + # logs and that will trigger it. + # + set entries 4 + eval rep_test $method $masterenv $db $entries $niter 0 0 $largs + process_msgs $envlist 0 NONE err + set stat [exec $util_path/db_stat -N -r -R A -h $clientdir] + # + # Loop until we are done with the SYNC_PAGE phase. + # + set in_page [is_substr $stat "SYNC_PAGE"] + if { !$in_page || $iter >= $max_drop_iter } { + # + # If we're dropping, stop doing so. + # If we're not dropping, we're done. + # + if { $drop != 0 } { + set drop 0 + } else { + set done 1 + } + } + incr iter + } + error_check_good subdb_close [$db close] 0 + # + # Stop dropping records, we've sent all the pages. + # We need to do that in order to make sure we get + # all the log records there and can accurately compare. Also, make sure + # enough time has passed so that the client's rep_request timer has + # expired, and make sure there are any messages to send to the client, + # so that there is something to trigger any needed final rerequest. + # + set drop 0 + tclsleep 2 + $masterenv txn_checkpoint -force + process_msgs $envlist 0 NONE err + + puts "\tRep$tnum.f: Verify logs and databases" + # + # If doing bulk testing, turn it off now so that it forces us + # to flush anything currently in the bulk buffer. We need to + # do this because rep_test might have aborted a transaction on + # its last iteration and those log records would still be in + # the bulk buffer causing the log comparison to fail. + # + if { $opts == "bulk" } { + puts "\tRep$tnum.f.1: Turn off bulk transfers." + error_check_good bulk [$masterenv rep_config {bulk off}] 0 + process_msgs $envlist 0 NONE err + } + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + + for { set i 0 } { $i < $nfiles } { incr i } { + set dbname "test.$i.db" + rep_verify $masterdir $masterenv $clientdir $clientenv \ + 1 1 0 $dbname + } + + # + # Close the database held open on master for initialization. + # + foreach db $dbopen { + error_check_good db_close [$db close] 0 + } + + # Add records to the master and update client. + puts "\tRep$tnum.g: Add more records and check again." + set entries 10 + set db [eval {berkdb_open_noerr -env $masterenv -auto_commit \ + -mode 0644} $largs $omethod $testfile $sub] + error_check_good subdb [is_valid_db $db] TRUE + eval rep_test $method $masterenv $db $entries $niter 0 0 $largs + error_check_good subdb_close [$db close] 0 + process_msgs $envlist 0 NONE err + + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + for { set i 0 } { $i < $nfiles } { incr i } { + set dbname "test.$i.db" + rep_verify $masterdir $masterenv $clientdir $clientenv \ + 1 1 0 $dbname + } + set bulkxfer [stat_field $masterenv rep_stat "Bulk buffer transfers"] + if { $opts == "bulk" } { + error_check_bad bulkxferon $bulkxfer 0 + } else { + error_check_good bulkxferoff $bulkxfer 0 + } + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep062.tcl b/test/tcl/rep062.tcl new file mode 100644 index 00000000..18e45ea7 --- /dev/null +++ b/test/tcl/rep062.tcl @@ -0,0 +1,308 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep062 +# TEST Test of internal initialization where client has a different +# TEST kind of database than the master. +# TEST +# TEST Create a master of one type, and let the client catch up. +# TEST Close the client. +# TEST Remove the database on the master, and create a new +# TEST database of the same name but a different type. +# TEST Run the master ahead far enough that internal initialization +# TEST will be required on the reopen of the client. +# TEST Reopen the client and verify. + +proc rep062 { method {tnum "062"} args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # This test uses different access methods internally. + # Called from outside, accept only btree. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] != 1 } { + puts "Skipping rep$tnum for method $method." + return + } + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $r):\ + Internal initialization with change in\ + access method of database $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep062_sub $method $tnum $l $r $args + } + } +} + +proc rep062_sub { method tnum logset recargs largs } { + global testdir + global util_path + global passwd + global has_crypto + global encrypt + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set maxpg 16384 + set log_max [expr $maxpg * 8] + set cache [expr $maxpg * 32] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Set up pairs of databases to test. The first element is whether + # to open an encrypted env, the second is the original database + # method and flags, the third is the replacement database and flags. + set pairlist { + { 0 {btree ""} {hash ""} } + { 0 {queueext "-pagesize 2048"} {queue ""} } + { 0 {queueext ""} {btree ""} } + { 0 {queue ""} {recno ""} } + { 0 {hash ""} {queue ""} } + { 0 {recno ""} {btree ""} } + { 0 {hash ""} {queueext "-pagesize 16384"} } + { 0 {queueext "-pagesize 2048"} {queueext "-pagesize 16384"} } + { 0 {queueext "-pagesize 16384"} {queueext "-pagesize 2048"} } + { 0 {queue ""} {queueext "-pagesize 16384"} } + { 1 {btree ""} {btree "-encrypt"} } + { 1 {btree "-encrypt"} {btree ""} } + { 1 {queue ""} {queue "-encrypt"} } + { 1 {queue "-encrypt"} {queue ""} } + } + + foreach p $pairlist { + # Extract values from the list. + set encryptenv [lindex [lindex $p 0] 0] + set encryptmsg "clear" + if { $has_crypto == 0 && $encryptenv == 1 } { + continue + } + if { $encryptenv == 1 } { + set encryptmsg "encrypted" + } + + set method1 [lindex [lindex $p 1] 0] + set method2 [lindex [lindex $p 2] 0] + if { $databases_in_memory } { + if { [is_queueext $method1] || [is_queueext $method2] } { + puts "Skipping this set for in-memory databases" + continue + } + } + + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + + file mkdir $masterdir + file mkdir $clientdir + + set flags1 [lindex [lindex $p 1] 1] + set flags2 [lindex [lindex $p 2] 1] + + puts "Rep$tnum: Testing with $encryptmsg env." + puts -nonewline "Rep$tnum: Replace [lindex $p 1] " + puts "database with [lindex $p 2] database." + + # Set up flags for encryption if necessary. + set envflags "" + set enc "" + if { $encryptenv == 1 } { + set envflags "-encryptaes $passwd" + set enc " -P $passwd" + } + + # Derive args for specified methods. + set args1 [convert_args $method1 ""] + set args2 [convert_args $method2 ""] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $m_logargs -log_max $log_max $verbargs -errpfx MASTER \ + -cachesize { 0 $cache 1 } $envflags $repmemargs \ + $privargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client. + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs \ + $c_logargs -log_max $log_max $verbargs -errpfx CLIENT \ + -cachesize { 0 $cache 1 } $envflags $repmemargs \ + $privargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $masterenv test force noarchive_timeout + + # Open two databases on the master - one to test different + # methods, one to advance the log, forcing internal + # initialization. + + puts "\tRep$tnum.a: Open test database (it will change methods)." + if { $databases_in_memory } { + set testfile { "" "test.db" } + set testfile2 { "" "test2.db" } + } else { + set testfile "test.db" + set testfile2 "test2.db" + } + + set omethod1 [convert_method $method1] + set db1 [eval {berkdb_open} -env $masterenv -auto_commit \ + -create $omethod1 $flags1 $args1 -mode 0644 $testfile] + error_check_good db1open [is_valid_db $db1] TRUE + + puts "\tRep$tnum.b: Open log-advance database." + set db2 [eval {berkdb_open} -env $masterenv -auto_commit \ + -create $omethod1 $flags1 $args1 -mode 0644 $testfile2] + error_check_good db2open [is_valid_db $db2] TRUE + + puts "\tRep$tnum.c: Add a few records to test db." + set nentries 10 + set start 0 + eval rep_test $method1 \ + $masterenv $db1 $nentries $start $start 0 $args1 + incr start $nentries + process_msgs $envlist + + # Close the database on the master, and if it's on-disk, + # remove it. Now create a new database of different type. + puts "\tRep$tnum.e: Remove test database." + error_check_good db1_close [$db1 close] 0 + error_check_good db1_remove [eval {$masterenv dbremove} $testfile] 0 + + puts "\tRep$tnum.f: \ + Create new test database; same name, different method." + set omethod2 [convert_method $method2] + set db1 [eval {berkdb_open} -env $masterenv -auto_commit \ + -create $omethod2 $flags2 $args2 -mode 0644 $testfile] + error_check_good db1open [is_valid_db $db1] TRUE + + # Create a gap requiring internal initialization. Run the + # master forward in the extra db. + set flags $enc + set dbhandle $db2 + set cid 2 + set niter 100 + set start [push_master_ahead $omethod1 $masterenv $masterdir $m_logtype \ + $clientenv $cid $dbhandle $start $niter $flags $args2] + + puts "\tRep$tnum.i: Reopen client." + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist 0 NONE err + + puts "\tRep$tnum.j: Add a few records to cause initialization." + set entries 20 + eval rep_test $method2 \ + $masterenv $db1 $entries $start $start 0 $args2 + incr start $entries + process_msgs $envlist 0 NONE err + + puts "\tRep$tnum.k: Verify logs and databases" + # Make sure encryption value is correct. + if { $encryptenv == 1 } { + set encrypt 1 + } + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + set testfile2 "test2.db" + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 $testfile2 + + # Check that logs are in-memory or on-disk as expected. + check_log_location $masterenv + check_log_location $clientenv + + error_check_good db1_close [$db1 close] 0 + error_check_good db2_close [$db2 close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR + } +} + diff --git a/test/tcl/rep063.tcl b/test/tcl/rep063.tcl new file mode 100644 index 00000000..38ec6837 --- /dev/null +++ b/test/tcl/rep063.tcl @@ -0,0 +1,388 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep063 +# TEST Replication election test with simulated different versions +# TEST for each site. This tests that old sites with real priority +# TEST trump ELECTABLE sites with zero priority even with greater LSNs. +# TEST There is a special case in the code for testing that if the +# TEST priority is <= 10, we simulate mixed versions for elections. +# TEST +# TEST Run a rep_test in a replicated master environment and close; +# TEST hold an election among a group of clients to make sure they select +# TEST the master with varying LSNs and priorities. +# +proc rep063 { method args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + set tnum "063" + + # Skip for all methods except btree. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + set nclients 3 + set logsets [create_logsets [expr $nclients + 1]] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery. + set recopts { "" "-recover" } + foreach r $recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r): Replication\ + elections with varying versions $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + for { set i 0 } { $i < $nclients } { incr i } { + puts "Rep$tnum: Client $i logs are\ + [lindex $l [expr $i + 1]]" + } + rep063_sub $method $nclients $tnum $l $r $args + } + } +} + +proc rep063_sub { method nclients tnum logset recargs largs } { + source ./include.tcl + global electable_pri + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set niter 80 + + env_cleanup $testdir + + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + + set m_logtype [lindex $logset 0] + set m_logargs [adjust_logargs $m_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + + for { set i 0 } { $i < $nclients } { incr i } { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + set c_logtype($i) [lindex $logset [expr $i + 1]] + set c_logargs($i) [adjust_logargs $c_logtype($i)] + set c_txnargs($i) [adjust_txnargs $c_logtype($i)] + } + + # Open a master. + set envlist {} + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ + -event $repmemargs \ + -home $masterdir $m_txnargs $m_logargs -rep_master $verbargs \ + -errpfx MASTER -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M) $recargs] + error_check_good master_env [is_valid_env $masterenv] TRUE + lappend envlist "$masterenv 1" + + # Open the clients. + for { set i 0 } { $i < $nclients } { incr i } { + set envid [expr $i + 2] + repladd $envid + set env_cmd($i) "berkdb_env_noerr -create -home $clientdir($i) \ + -event $repmemargs \ + $c_txnargs($i) $c_logargs($i) -rep_client \ + -rep_transport \[list $envid replsend\]" + set clientenv($i) [eval $env_cmd($i) $recargs] + error_check_good \ + client_env($i) [is_valid_env $clientenv($i)] TRUE + lappend envlist "$clientenv($i) $envid" + } + # Bring the clients online by processing the startup messages. + process_msgs $envlist + + # Run a modified test001 in the master. + puts "\tRep$tnum.a: Running rep_test in replicated env." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + + # + # We remove some client envs and run rep_test so that we can + # force some client LSNs to be further ahead/behind than others. + # When we're done, the LSNs look like this: + # + # Client0: ...................... + # Client1: ........... + # Client2: ........... + # + # Remove client 1 and 2 from list to process, this guarantees + # client 0 is ahead in LSN. + # + set orig_env $envlist + # + # Counting the master, client 2 is the 4th item, or index 3. + # Client 1 is the 3rd item, or index 2. Remove them both. + # + set envlist [lreplace $envlist 2 3] + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + # + # Put all removed clients back in. Close master and + # remove it from the list. + # + set envlist $orig_env + error_check_good masterenv_close [$masterenv close] 0 + set envlist [lreplace $envlist 0 0] + + for { set i 0 } { $i < $nclients } { incr i } { + replclear [expr $i + 2] + # + # This test doesn't use the testing hooks, so + # initialize err_cmd and crash appropriately. + # + set err_cmd($i) "none" + set crash($i) 0 + # + # Initialize the array pri. We'll set it to + # appropriate values when the winner is determined. + # + set pri($i) 0 + # + if { $rep_verbose == 1 } { + error_check_good pfx [$clientenv($i) errpfx CLIENT$i] 0 + $clientenv($i) verbose $verbose_type on + set env_cmd($i) [concat $env_cmd($i) \ + "-errpfx CLIENT$i $verbargs "] + } + } + set m "Rep$tnum.b" + # + # Client 0 has the biggest LSN of clients 0, 1, 2. + # However, 'setpriority' will set the priority of client 1 + # to simulate client 1 being an "older version" client. + # Client 1 should win even though its LSN is smaller. + # This tests one "older" client and the rest "newer". + # + # Client0: ...................... (0/Electable - bigger LSN) + # Client1: ........... ("old" version - should win) + # Client2: ........... (0/Electable) + # + # + puts "\t$m: Test old client trumps new clients with bigger LSN." + set nsites $nclients + set nvotes $nclients + set winner 1 + set elector 2 + setpriority pri $nclients $winner 0 1 + + # Set up databases as in-memory or on-disk and run the election. + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + run_election envlist err_cmd pri crash\ + $qdir $m $elector $nsites $nvotes $nclients $winner 0 $dbname + + # + # In all of the checks of the Election Priority stat field, + # we use clientenv(2). The reason is that we never expect + # client 2 to be the winner. The env handles of client 0 and 1 + # are getting closed and reopened as a master/client in + # the election and the old recorded handles are invalid. + # This one is known to be valid throughout the entire test. + # + error_check_bad old_pri [stat_field $clientenv(2) rep_stat \ + "Election priority"] 0 + # + # When we finish the election, all clients are at the same LSN. + # Call this proc to make the winner have a larger LSN than the + # other 2 remaining clients, and reopen the winner as a client. + # + rep063_movelsn_reopen $method envlist $env_cmd($winner) $winner $largs + + set m "Rep$tnum.c" + puts "\t$m: Test old client with zero priority new client." + # + # Client 1 now has a bigger LSN, so make client 0 the old client + # and client 1 a real 0 priority new client. + # + # Client0: ........... ("old" version site - should win) + # Client1: ...................... (0 priority for real) + # Client2: ........... (0/Electable) + # + # + set winner 0 + setpriority pri $nclients $winner 0 1 + set pri(1) 0 + run_election envlist err_cmd pri crash $qdir \ + $m $elector $nsites $nvotes $nclients $winner 0 $dbname + error_check_bad old_pri [stat_field $clientenv(2) rep_stat \ + "Election priority"] 0 + rep063_movelsn_reopen $method envlist $env_cmd($winner) $winner $largs + + set m "Rep$tnum.d" + puts "\t$m: Test multiple old clients with new client." + # + # Client 0 now has a bigger LSN, so make client 1 winner. + # We are setting client 2's priority to something bigger so that + # we simulate having 2 "older version" clients (clients 1 and 2) + # and one new client (client 0). This tests that the right client + # among the older versions gets correctly elected even though there + # is a bigger LSN "new" client participating. + # + # Client0: ...................... (0/Electable) + # Client1: ........... ("old" version - should win) + # Client2: ........... ("old" version - real but lower priority) + # + # + set winner 1 + setpriority pri $nclients $winner 0 1 + set pri(2) [expr $pri(1) / 2] + run_election envlist err_cmd pri crash $qdir \ + $m $elector $nsites $nvotes $nclients $winner 0 $dbname + error_check_bad old_pri [stat_field $clientenv(2) rep_stat \ + "Election priority"] 0 + rep063_movelsn_reopen $method envlist $env_cmd($winner) $winner $largs + + set m "Rep$tnum.e" + puts "\t$m: Test new clients, client 1 not electable." + # + # Client 1 now has a bigger LSN, so make it unelectable. + # Set other priorities to electable_pri to make them all equal (and + # all "new" clients). + # + # Client0: ........... (0/Electable) + # Client1: ...................... (0 priority for real) + # Client2: ........... (0/Electable) + # + # + set pri(0) $electable_pri + set pri(1) 0 + set pri(2) $electable_pri + set winner 0 + set altwin 2 + # + # Winner should be zero priority. Winner could be either 0 or 2. + # We just want to make sure that site 1 does not win. So catch + # any election where the primary winner didn't win and check that + # the alternate winner won. (This is similar to rep002). + # + if {[catch {run_election envlist err_cmd pri crash $qdir \ + $m $elector $nsites $nvotes $nclients $winner 0 $dbname} res]} { + error_check_good check_winner [is_substr \ + $res "expected 2, got [expr $altwin + 2]"] 1 + puts "\t$m: Alternate winner $altwin won." + set winner $altwin + error_check_good make_master \ + [$clientenv($winner) rep_start -master] 0 + cleanup_elections + process_msgs $envlist + } + error_check_good elect_pri [stat_field $clientenv(2) rep_stat \ + "Election priority"] 0 + rep063_movelsn_reopen $method envlist $env_cmd($winner) $winner $largs + + # + # Test with all being electable clients. Whichever site won + # last time should still be the winner. + # + set m "Rep$tnum.f" + puts "\t$m: Test all new electable clients." + set nsites $nclients + set nvotes $nclients + set pri(0) $electable_pri + set pri(1) $electable_pri + set pri(2) $electable_pri + replclear [expr $winner + 2] + run_election envlist err_cmd pri crash $qdir \ + $m $elector $nsites $nvotes $nclients $winner 0 $dbname + + foreach pair $envlist { + set cenv [lindex $pair 0] + error_check_good cenv_close [$cenv close] 0 + } + replclose $testdir/MSGQUEUEDIR +} + +# +# Move the LSN ahead on the newly elected master, while not sending +# those messages to the other clients. Then close the env and +# reopen it as a client. Use upvar so that the envlist is +# modified when we return and can get messages. +# +proc rep063_movelsn_reopen { method envlist env_cmd eindex largs } { + upvar $envlist elist + + set clrlist { } + set i 0 + foreach e $elist { + # + # If we find the master env entry, get its env handle. + # If not, then get the id so that we can replclear it later. + # + if { $i == $eindex } { + set masterenv [lindex $e 0] + } else { + lappend clrlist [lindex $e 1] + } + incr i + } + # + # Move this env's LSN ahead. + # + set niter 10 + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + + foreach cl $clrlist { + replclear $cl + } + # + # Now close this env and reopen it as a client. + # + error_check_good newmaster_close [$masterenv close] 0 + set newclenv [eval $env_cmd] + error_check_good cl [is_valid_env $newclenv] TRUE + set newenv "$newclenv [expr $eindex + 2]" + set elist [lreplace $elist $eindex $eindex $newenv] + process_msgs $elist +} diff --git a/test/tcl/rep064.tcl b/test/tcl/rep064.tcl new file mode 100644 index 00000000..7ed7667a --- /dev/null +++ b/test/tcl/rep064.tcl @@ -0,0 +1,178 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep064 +# TEST Replication rename and forced-upgrade test. +# TEST +# TEST The test verifies that the client correctly +# TEST (internally) closes files when upgrading to master. +# TEST It does this by having the master have a database +# TEST open, then crashing. The client upgrades to master, +# TEST and attempts to remove the open database. + +proc rep064 { method { niter 10 } { tnum "064" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Run for btree only. Since we're testing removal of a + # file, method doesn't make any difference. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + set logsets [create_logsets 2] + set args [convert_args $method $args] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r): Replication test\ + closure of open files on upgrade\ + $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep064_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep064_sub { method niter tnum logset recargs largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set m_logargs [adjust_logargs $m_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + + set c_logtype [lindex $logset 1] + set c_logargs [adjust_logargs $c_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \ + -errpfx MASTER -errfile /dev/stderr $verbargs $repmemargs \ + $privargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $c_logargs \ + -errpfx CLIENT -errfile /dev/stderr $verbargs $repmemargs \ + $privargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + puts "\tRep$tnum.a: Open an empty db, and leave it open." + # Set up names for the db to be left open and empty, and + # also for the db that we'll let rep_test open in part .b. + if { $databases_in_memory } { + set opendb { "" "open.db" } + set testfile { "" "test.db" } + } else { + set opendb "open.db" + set testfile "test.db" + } + + set masterdb [eval {berkdb_open}\ + -env $masterenv -create -btree -auto_commit $opendb] + error_check_good db [is_valid_db $masterdb] TRUE + process_msgs $envlist + + # Run a modified test001 in the master (and update client). + puts "\tRep$tnum.b: Open another db, and add some data." + eval rep_test $method $masterenv NULL $niter 0 0 $largs + process_msgs $envlist + + # This simulates a master crashing, we're the only one in the + # group. No need to process messages. + # + puts "\tRep$tnum.c: Upgrade client." + error_check_good client_upg [$clientenv rep_start -master] 0 + + puts "\tRep$tnum.d: Remove open databases." + set stat [catch {eval $clientenv dbremove -auto_commit $opendb} ret] + error_check_good remove_open_file $ret 0 + error_check_good remove_open_file $stat 0 + + set stat [catch {eval $clientenv dbremove -auto_commit $testfile} ret] + error_check_good remove_closed_file $ret 0 + error_check_good remove_closed_file $stat 0 + + error_check_good dbclose [$masterdb close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep065.tcl b/test/tcl/rep065.tcl new file mode 100644 index 00000000..f5800187 --- /dev/null +++ b/test/tcl/rep065.tcl @@ -0,0 +1,445 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep065 +# TEST Tests replication running with different versions. +# TEST This capability is introduced with 4.5. +# TEST +# TEST Start a replication group of 1 master and N sites, all +# TEST running some historical version greater than or equal to 4.4. +# TEST Take down a client and bring it up again running current. +# TEST Run some upgrades, make sure everything works. +# TEST +# TEST Each site runs the tcllib of its own version, but uses +# TEST the current tcl code (e.g. test.tcl). +proc rep065 { method { nsites 3 } args } { + source ./include.tcl + global repfiles_in_memory + global noenv_messaging + set noenv_messaging 1 + + # + # Skip all methods but btree - we don't use the method, as we + # run over all of them with varying versions. + # + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + + if { [is_btree $method] == 0 } { + puts "Rep065: Skipping for method $method." + return + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Make the list of {method version} pairs to test. + # + set mvlist [method_version] + set mvlen [llength $mvlist] + puts "Rep065: Testing the following $mvlen method/version pairs:" + puts "Rep065: $mvlist" + puts "Rep065: $msg2" + set count 1 + set total [llength $mvlist] + set slist [setup_sites $nsites] + foreach i $mvlist { + puts "Rep065: Test iteration $count of $total: $i" + rep065_sub $count $i $nsites $slist + incr count + } + set noenv_messaging 0 +} + +proc rep065_sub { iter mv nsites slist } { + source ./include.tcl + global machids + global util_path + set machids {} + set method [lindex $mv 0] + set vers [lindex $mv 1] + + puts "\tRep065.$iter.a: Set up." + # Whatever directory we started this process from is referred + # to as the controlling directory. It will contain the message + # queue and start all the child processes. + set controldir [pwd] + env_cleanup $controldir/$testdir + replsetup_noenv $controldir/$testdir/MSGQUEUEDIR + + # Set up the historical build directory. The master will start + # running with historical code. + # + # This test presumes we are running in the current build + # directory and that the expected historical builds are + # set up in a similar fashion. If they are not, quit gracefully. + + set pwd [pwd] + set homedir [file dirname [file dirname $pwd]] + # + # Cannot use test_path because that is relative to the current + # directory (which will often be the old release directory). + # We need to send in the pathname to the reputils path to the + # current directory and that will be an absolute pathname. + # + set reputils_path $pwd/../test/tcl + set histdir $homedir/$vers/build_unix + if { [file exists $histdir] == 0 } { + puts -nonewline "Skipping iteration $iter: cannot find" + puts " historical version $vers." + return + } + if { [file exists $histdir/db_verify] == 0 } { + puts -nonewline "Skipping iteration $iter: historical version" + puts " $vers is missing some executables. Is it built?" + return + } + + set histtestdir $histdir/TESTDIR + + env_cleanup $histtestdir + set markerdir $controldir/$testdir/MARKER + file delete -force $markerdir + + # Create site directories. They start running in the historical + # directory, too. They will be upgraded to the current version + # first. + set allids { } + for { set i 0 } { $i < $nsites } { incr i } { + set siteid($i) [expr $i + 1] + set sid $siteid($i) + lappend allids $sid + set histdirs($sid) $histtestdir/SITE.$i + set upgdir($sid) $controldir/$testdir/SITE.$i + file mkdir $histdirs($sid) + file mkdir $upgdir($sid) + } + + # Open master env running 4.4. + # + # We know that slist has all sites starting in the histdir. + # So if we encounter an upgrade value, we upgrade that client + # from the hist dir. + # + set count 1 + foreach sitevers $slist { + puts "\tRep065.b.$iter.$count: Run with sitelist $sitevers." + # + # Delete the marker directory each iteration so that + # we don't find old data in there. + # + file delete -force $markerdir + file mkdir $markerdir + # + # Get the chosen master index from the list of sites. + # + set mindex [get_master $nsites $sitevers] + set meid [expr $mindex + 1] + + # + # Kick off the test processes. We need 1 test process + # per site and 1 message process per site. + # + set pids {} + for { set i 0 } { $i < $nsites } { incr i } { + set upg [lindex $sitevers $i] + set sid $siteid($i) + # + # If we are running "old" set up an array + # saying if this site has run old/new yet. + # The reason is that we want to "upgrade" + # only the first time we go from old to new, + # not every iteration through this loop. + # + if { $upg == 0 } { + puts -nonewline "\t\tRep065.b: Test: Old site $i" + set sitedir($i) $histdirs($sid) + set already_upgraded($i) 0 + } else { + puts -nonewline "\t\tRep065.b: Test: Upgraded site $i" + set sitedir($i) $upgdir($sid) + if { $already_upgraded($i) == 0 } { + upg_repdir $histdirs($sid) $sitedir($i) + } + set already_upgraded($i) 1 + } + if { $sid == $meid } { + set state MASTER + set runtest [list REPTEST $method 15 10] + puts " (MASTER)" + } else { + set state CLIENT + set runtest {REPTEST_GET} + puts " (CLIENT)" + } + lappend pids [exec $tclsh_path $test_path/wrap.tcl \ + rep065script.tcl \ + $controldir/$testdir/$count.S$i.log \ + SKIP \ + START $state \ + $runtest \ + $sid $allids $controldir \ + $sitedir($i) $reputils_path &] + lappend pids [exec $tclsh_path $test_path/wrap.tcl \ + rep065script.tcl \ + $controldir/$testdir/$count.S$i.msg \ + SKIP \ + PROCMSGS $state \ + NULL \ + $sid $allids $controldir \ + $sitedir($i) $reputils_path &] + } + + watch_procs $pids 20 + # + # At this point, clean up any message files. The message + # system leads to a significant number of duplicate + # requests. If the master site handled them after the + # client message processes exited, then there can be + # a large number of "dead" message files waiting for + # non-existent clients. Just clean up everyone. + # + for { set i 0 } { $i < $nsites } { incr i } { + replclear_noenv $siteid($i) + } + + # + # Kick off the verification processes. These just walk + # their own logs and databases, so we don't need to have + # a message process. We need separate processes because + # old sites need to use old utilities. + # + set pids {} + puts "\tRep065.c.$iter.$count: Verify all sites." + for { set i 0 } { $i < $nsites } { incr i } { + if { $siteid($i) == $meid } { + set state MASTER + } else { + set state CLIENT + } + lappend pids [exec $tclsh_path $test_path/wrap.tcl \ + rep065script.tcl \ + $controldir/$testdir/$count.S$i.ver \ + SKIP \ + VERIFY $state \ + {LOG DB} \ + $siteid($i) $allids $controldir \ + $sitedir($i) $reputils_path &] + } + + watch_procs $pids 10 + # + # Now that each site created its verification files, + # we can now verify everyone. + # + for { set i 0 } { $i < $nsites } { incr i } { + if { $i == $mindex } { + continue + } + puts \ + "\t\tRep065.c: Verify: Compare databases master and client $i" + error_check_good db_cmp \ + [filecmp $sitedir($mindex)/VERIFY/dbdump \ + $sitedir($i)/VERIFY/dbdump] 0 + set upg [lindex $sitevers $i] + # !!! + # Although db_printlog works and can read old logs, + # there have been some changes to the output text that + # makes comparing difficult. One possible solution + # is to run db_printlog here, from the current directory + # instead of from the historical directory. + # + if { $upg == 0 } { + puts \ + "\t\tRep065.c: Verify: Compare logs master and client $i" + error_check_good log_cmp \ + [filecmp $sitedir($mindex)/VERIFY/prlog \ + $sitedir($i)/VERIFY/prlog] 0 + } else { + puts \ + "\t\tRep065.c: Verify: Compare LSNs master and client $i" + error_check_good log_cmp \ + [filecmp $sitedir($mindex)/VERIFY/loglsn \ + $sitedir($i)/VERIFY/loglsn] 0 + } + } + + # + # At this point we have a master and sites all up to date + # with each other. Now, one at a time, upgrade the sites + # to the current version and start everyone up again. + incr count + } +} + +proc setup_sites { nsites } { + # + # Set up a list that goes from 0 to $nsites running + # upgraded. A 0 represents running old version and 1 + # represents running upgraded. So, for 3 sites it will look like: + # { 0 0 0 } { 1 0 0 } { 1 1 0 } { 1 1 1 } + # + set sitelist {} + for { set i 0 } { $i <= $nsites } { incr i } { + set l "" + for { set j 1 } { $j <= $nsites } { incr j } { + if { $i < $j } { + lappend l 0 + } else { + lappend l 1 + } + } + lappend sitelist $l + } + return $sitelist +} + +proc upg_repdir { histdir upgdir } { + global util_path + + # + # Upgrade a site to the current version. This entails: + # 1. Removing any old files from the upgrade directory. + # 2. Copy all old version files to upgrade directory. + # 3. Remove any __db files from upgrade directory except __db.rep*gen. + # 4. Force checkpoint in new version. + file delete -force $upgdir + + # Recovery was run before as part of upgradescript. + # Archive dir by copying it to upgrade dir. + file copy -force $histdir $upgdir + set dbfiles [glob -nocomplain $upgdir/__db*] + foreach d $dbfiles { + if { $d == "$upgdir/__db.rep.gen" || + $d == "$upgdir/__db.rep.egen" } { + continue + } + file delete -force $d + } + # Force current version checkpoint + set stat [catch {eval exec $util_path/db_checkpoint -1 -h $upgdir} r] + if { $stat != 0 } { + puts "CHECKPOINT: $upgdir: $r" + } + error_check_good stat_ckp $stat 0 +} + +proc get_master { nsites verslist } { + error_check_good vlist_chk [llength $verslist] $nsites + # + # When we can, simply run an election to get a new master. + # We then verify we got an old client. + # + # For now, randomly pick among the old sites, or if no old + # sites just randomly pick anyone. + # + set old_count 0 + # Pick 1 out of N old sites or 1 out of nsites if all upgraded. + foreach i $verslist { + if { $i == 0 } { + incr old_count + } + } + if { $old_count == 0 } { + set old_count $nsites + } + set master [berkdb random_int 0 [expr $old_count - 1]] + # + # Since the Nth old site may not be at the Nth place in the + # list unless we used the entire list, we need to loop to find + # the right index to return. + if { $old_count == $nsites } { + return $master + } + set ocount 0 + set index 0 + foreach i $verslist { + if { $i == 1 } { + incr index + continue + } + if { $ocount == $master } { + return $index + } + incr ocount + incr index + } + # + # If we get here there is a problem in the code. + # + error "FAIL: get_master problem" +} + +proc method_version { } { + global valid_methods + + set methods $valid_methods + set remaining_methods $methods + set methods_len [expr [llength $remaining_methods] - 1] + + set versions {db-5.1.25 db-5.0.26 \ + db-4.8.30 db-4.7.25 db-4.6.21 db-4.5.20 db-4.4.20} + set remaining_versions $versions + set versions_len [expr [llength $remaining_versions] - 1] + + # Walk through the list of methods and the list of versions and + # pair them at random. Stop when either list is empty. + set mv {} + while { $versions_len >= 0 && $methods_len >= 0 } { + set vidx [berkdb random_int 0 $versions_len] + set midx [berkdb random_int 0 $methods_len] + + set version [lindex $remaining_versions $vidx] + set method [lindex $remaining_methods $midx] + + set remaining_versions [lreplace $remaining_versions $vidx $vidx] + set remaining_methods [lreplace $remaining_methods $midx $midx] + + incr versions_len -1 + incr methods_len -1 + + if { $method != "heap" } { + lappend mv [list $method $version] + } + } + + # If there are remaining versions, randomly assign any of + # the original methods to each one. + while { $versions_len >= 0 } { + + set methods_len [expr [llength $valid_methods] - 1] + set midx [berkdb random_int 0 $methods_len] + + set version [lindex $remaining_versions 0] + set method [lindex $valid_methods $midx] + + set remaining_versions [lreplace $remaining_versions 0 0] + incr versions_len -1 + + lappend mv [list $method $version] + } + + # If there are remaining methods, randomly assign any of + # the original versions to each one. + while { $methods_len >= 0 } { + + set versions_len [expr [llength $versions] - 1] + set vidx [berkdb random_int 0 $versions_len] + + set version [lindex $versions $vidx] + set method [lindex $remaining_methods 0] + + set remaining_methods [lreplace $remaining_methods 0 0] + incr methods_len -1 + + lappend mv [list $method $version] + } + return $mv +} diff --git a/test/tcl/rep065script.tcl b/test/tcl/rep065script.tcl new file mode 100644 index 00000000..539de61d --- /dev/null +++ b/test/tcl/rep065script.tcl @@ -0,0 +1,416 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# rep065script - procs to use at each replication site in the +# replication upgrade test. +# +# type: START, PROCMSGS, VERIFY +# START starts up a replication site and performs an operation. +# the operations are: +# REPTEST runs the rep_test_upg procedure on the master. +# REPTEST_GET run a read-only test on a client. +# REPTEST_ELECT runs an election on the site. +# PROCMSGS processes messages until none are left. +# VERIFY dumps the log and database contents. +# role: master or client +# op: operation to perform +# envid: environment id number for use in replsend +# allids: all env ids we need for sending +# ctldir: controlling directory +# mydir: directory where this participant runs +# reputils_path: location of reputils.tcl + +proc rep065scr_elect { repenv oplist } { + set ver [lindex $oplist 1] + set pri [lindex $oplist 2] +} + +proc rep065scr_reptest { repenv oplist markerdb } { + + set method [lindex $oplist 1] + set niter [lindex $oplist 2] + set loop [lindex $oplist 3] + set start 0 + puts "REPTEST: method $method, niter $niter, loop $loop" + + for {set n 0} {$n < $loop} {incr n} { + puts "REPTEST: call rep_test_upg $n" + eval rep_test_upg $method $repenv NULL $niter $start $start 0 0 + incr start $niter + tclsleep 3 + } + # + # Sleep a bunch to help get the messages worked through. + # + tclsleep 10 + puts "put DONE to marker" + error_check_good marker_done [$markerdb put DONE DONE] 0 + error_check_good marker_sync [$markerdb sync] 0 +} + +proc rep065scr_repget { repenv oplist mydir markerfile } { + set dbname "$mydir/test.db" + set i 0 + while { [file exists $dbname] == 0 } { + tclsleep 2 + incr i + if { $i >= 15 && $i % 5 == 0 } { + puts "After $i seconds, no database exists." + } + if { $i > 180 } { + error "Database never created." + } + } + set loop 1 + while { 1 } { + set markerdb [berkdb_open $markerfile] + error_check_good marker [is_valid_db $markerdb] TRUE + set kd [$markerdb get DONE] + error_check_good marker_close [$markerdb close] 0 + if { [llength $kd] != 0 } { + break + } + set db [berkdb_open -env $repenv $dbname] + error_check_good dbopen [is_valid_db $db] TRUE + set dbc [$db cursor] + set i 0 + error_check_good curs [is_valid_cursor $dbc $db] TRUE + for { set dbt [$dbc get -first ] } \ + { [llength $dbt] > 0 } \ + { set dbt [$dbc get -next] } { + incr i + } + error_check_good dbc_close [$dbc close] 0 + error_check_good db_close [$db close] 0 + puts "REPTEST_GET: after $loop loops: key count $i" + incr loop + tclsleep 2 + } +} +proc rep065scr_starttest { role oplist envid msgdir mydir allids markerfile } { + global qtestdir + global util_path + global repfiles_in_memory + + puts "repladd_noenv $allids" + set qtestdir $msgdir + foreach id $allids { + repladd_noenv $id + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set markerdb [berkdb_open -create -btree $markerfile] + error_check_good marker [is_valid_db $markerdb] TRUE + puts "set up env cmd" + set lockmax 40000 + set logbuf [expr 16 * 1024] + set logmax [expr $logbuf * 4] + if { $role == "MASTER" } { + set rep_env_cmd "berkdb_env_noerr -create -home $mydir \ + -log_max $logmax -log_buffer $logbuf $repmemargs \ + -lock_max_objects $lockmax -lock_max_locks $lockmax \ + -errpfx MASTER -txn -rep_master \ + -rep_transport \[list $envid replsend_noenv\]" + set rep_env_cmd "berkdb_env_noerr -create -home $mydir \ + -log_max $logmax -log_buffer $logbuf $repmemargs \ + -lock_max_objects $lockmax -lock_max_locks $lockmax \ + -errpfx MASTER -txn -rep_master \ + -verbose {rep on} -errfile /dev/stderr \ + -rep_transport \[list $envid replsend_noenv\]" + } elseif { $role == "CLIENT" } { + set rep_env_cmd "berkdb_env_noerr -create -home $mydir \ + -log_max $logmax -log_buffer $logbuf $repmemargs \ + -lock_max_objects $lockmax -lock_max_locks $lockmax \ + -errpfx CLIENT -txn -rep_client \ + -rep_transport \[list $envid replsend_noenv\]" + set rep_env_cmd "berkdb_env_noerr -create -home $mydir \ + -log_max $logmax -log_buffer $logbuf $repmemargs \ + -lock_max_objects $lockmax -lock_max_locks $lockmax \ + -errpfx CLIENT -txn -rep_client \ + -verbose {rep on} -errfile /dev/stderr \ + -rep_transport \[list $envid replsend_noenv\]" + } else { + puts "FAIL: unrecognized replication role $role" + return + } + + # Change directories to where this will run. + # !!! + # mydir is an absolute path of the form + # /build_unix/TESTDIR/MASTERDIR or + # /build_unix/TESTDIR/CLIENTDIR.0 + # + # So we want to run relative to the build_unix directory + cd $mydir/../.. + + puts "open repenv $rep_env_cmd" + set repenv [eval $rep_env_cmd] + error_check_good repenv_open [is_valid_env $repenv] TRUE + + puts "repenv is $repenv" + # + # Indicate that we're done starting up. Sleep to let + # others do the same. + # + puts "put START$envid to marker" + error_check_good marker_done [$markerdb put START$envid START$envid] 0 + error_check_good marker_sync [$markerdb sync] 0 + puts "sleeping after marker" + tclsleep 3 + + # Here is where the real test starts. + # + # Different operations may have different args in their list. + # REPTEST: Args are method, niter, nloops + set op [lindex $oplist 0] + if { $op == "REPTEST" } { + # + # This test writes the marker, so close after it runs. + # + rep065scr_reptest $repenv $oplist $markerdb + error_check_good marker_close [$markerdb close] 0 + } + if { $op == "REPTEST_GET" } { + # + # This test needs to poll the marker. So close it now. + # + error_check_good marker_close [$markerdb close] 0 + rep065scr_repget $repenv $oplist $mydir $markerfile + } + if { $op == "REP_ELECT" } { + # + # This test writes the marker, so close after it runs. + # + rep065scr_elect $repenv $oplist $markerdb + } + puts "Closing env" + $repenv mpool_sync + error_check_good envclose [$repenv close] 0 + +} + +proc rep065scr_msgs { role envid msgdir mydir allids markerfile } { + global qtestdir + global repfiles_in_memory + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + # + # The main test process will write the marker file when it + # has started and when it has completed. We need to + # open/close the marker file because we are in a separate + # process from the writer and we cannot share an env because + # we might be a different BDB release version. + # + set markerdb [berkdb_open -create -btree $markerfile] + error_check_good marker [is_valid_db $markerdb] TRUE + set s [$markerdb get START$envid] + while { [llength $s] == 0 } { + error_check_good marker_close [$markerdb close] 0 + tclsleep 1 + set markerdb [berkdb_open $markerfile] + error_check_good marker [is_valid_db $markerdb] TRUE + set s [$markerdb get START$envid] + } + + puts "repladd_noenv $allids" + set qtestdir $msgdir + foreach id $allids { + repladd_noenv $id + } + + puts "set up env cmd" + if { $role == "MASTER" } { + set rep_env_cmd "berkdb_env_noerr -home $mydir \ + -errpfx MASTER -txn -rep_master $repmemargs \ + -rep_transport \[list $envid replsend_noenv\]" + set rep_env_cmd "berkdb_env_noerr -home $mydir \ + -errpfx MASTER -txn -rep_master $repmemargs \ + -verbose {rep on} -errfile /dev/stderr \ + -rep_transport \[list $envid replsend_noenv\]" + } elseif { $role == "CLIENT" } { + set rep_env_cmd "berkdb_env_noerr -home $mydir \ + -errpfx CLIENT -txn -rep_client $repmemargs \ + -rep_transport \[list $envid replsend_noenv\]" + set rep_env_cmd "berkdb_env_noerr -home $mydir \ + -errpfx CLIENT -txn -rep_client $repmemargs \ + -verbose {rep on} -errfile /dev/stderr \ + -rep_transport \[list $envid replsend_noenv\]" + } else { + puts "FAIL: unrecognized replication role $role" + return + } + + # Change directories to where this will run. + cd $mydir + + puts "open repenv $rep_env_cmd" + set repenv [eval $rep_env_cmd] + error_check_good repenv_open [is_valid_env $repenv] TRUE + + set envlist "{$repenv $envid}" + puts "repenv is $repenv" + while { 1 } { + if { [llength [$markerdb get DONE]] != 0 } { + break + } + process_msgs $envlist 0 NONE NONE 1 + error_check_good marker_close [$markerdb close] 0 + set markerdb [berkdb_open $markerfile] + error_check_good marker [is_valid_db $markerdb] TRUE + tclsleep 1 + } + # + # Process messages in case there are a few more stragglers. + # Just because the main test is done doesn't mean that all + # the messaging is done. Loop for messages as long as + # progress is being made. + # + set nummsg 1 + while { $nummsg != 0 } { + process_msgs $envlist 0 NONE NONE 1 + tclsleep 1 + # First look at messages from us + set nummsg [replmsglen_noenv $envid from] + puts "Still have $nummsg not yet processed by others" + } + error_check_good marker_close [$markerdb close] 0 + replclear_noenv $envid from + tclsleep 1 + replclear_noenv $envid + $repenv mpool_sync + error_check_good envclose [$repenv close] 0 +} + +proc rep065scr_verify { oplist mydir id } { + global util_path + + set rep_env_cmd "berkdb_env_noerr -home $mydir -txn \ + -rep_transport \[list $id replnoop\]" + + # Change directories to where this will run. + # !!! + # mydir is an absolute path of the form + # /build_unix/TESTDIR/MASTERDIR or + # /build_unix/TESTDIR/CLIENTDIR.0 + # + # So we want to run relative to the build_unix directory + cd $mydir/../.. + + foreach op $oplist { + set repenv [eval $rep_env_cmd] + error_check_good env_open [is_valid_env $repenv] TRUE + if { $op == "DB" } { + set dbname "$mydir/test.db" + set db [berkdb_open -env $repenv -rdonly $dbname] + error_check_good dbopen [is_valid_db $db] TRUE + set txn "" + set method [$db get_type] + if { [is_record_based $method] == 1 } { + dump_file $db $txn $mydir/VERIFY/dbdump \ + rep_test_upg.recno.check + } else { + dump_file $db $txn $mydir/VERIFY/dbdump \ + rep_test_upg.check + } + error_check_good dbclose [$db close] 0 + } + if { $op == "LOG" } { + set lgstat [$repenv log_stat] + set lgfile [stat_field $repenv log_stat "Current log file number"] + set lgoff [stat_field $repenv log_stat "Current log file offset"] + puts "Current LSN: $lgfile $lgoff" + set f [open $mydir/VERIFY/loglsn w] + puts $f $lgfile + puts $f $lgoff + close $f + + set stat [catch {eval exec $util_path/db_printlog \ + -h $mydir > $mydir/VERIFY/prlog} result] + if { $stat != 0 } { + puts "PRINTLOG: $result" + } + error_check_good stat_prlog $stat 0 + } + error_check_good envclose [$repenv close] 0 + } + # + # Run recovery locally so that any later upgrades are ready + # to be upgraded. + # + set stat [catch {eval exec $util_path/db_recover -h $mydir} result] + if { $stat != 0 } { + puts "RECOVERY: $result" + } + error_check_good stat_rec $stat 0 + +} + +set usage "upgradescript type role op envid allids ctldir mydir reputils_path" + +# Verify usage +if { $argc != 8 } { + puts stderr "Argc $argc, argv $argv" + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set type [ lindex $argv 0 ] +set role [ lindex $argv 1 ] +set op [ lindex $argv 2 ] +set envid [ lindex $argv 3 ] +set allids [ lindex $argv 4 ] +set ctldir [ lindex $argv 5 ] +set mydir [ lindex $argv 6 ] +set reputils_path [ lindex $argv 7 ] + +set histdir $mydir/../.. +puts "Histdir $histdir" + +set msgtestdir $ctldir/TESTDIR + +global env +cd $histdir +set stat [catch {eval exec ./db_printlog -V} result] +if { $stat != 0 } { + set env(LD_LIBRARY_PATH) ":$histdir:$histdir/.libs:$env(LD_LIBRARY_PATH)" +} +source ./include.tcl +source $test_path/test.tcl + +# The global variable noenv_messaging must be set after sourcing +# test.tcl or its value will be wrong. +global noenv_messaging +set noenv_messaging 1 + +set is_repchild 1 +puts "Did args. now source reputils" +source $reputils_path/reputils.tcl +source $reputils_path/reputilsnoenv.tcl + +set markerdir $msgtestdir/MARKER +set markerfile $markerdir/marker.db + +puts "Calling proc for type $type" +if { $type == "START" } { + rep065scr_starttest $role $op $envid $msgtestdir $mydir $allids $markerfile +} elseif { $type == "PROCMSGS" } { + rep065scr_msgs $role $envid $msgtestdir $mydir $allids $markerfile +} elseif { $type == "VERIFY" } { + file mkdir $mydir/VERIFY + rep065scr_verify $op $mydir $envid +} else { + puts "FAIL: unknown type $type" + return +} diff --git a/test/tcl/rep066.tcl b/test/tcl/rep066.tcl new file mode 100644 index 00000000..3dcf34f4 --- /dev/null +++ b/test/tcl/rep066.tcl @@ -0,0 +1,276 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep066 +# TEST Replication and dead log handles. +# TEST +# TEST Run rep_test on master and a client. +# TEST Simulate client crashes (master continues) until log 2. +# TEST Open 2nd master env handle and put something in log and flush. +# TEST Downgrade master, restart client as master. +# TEST Run rep_test on newmaster until log 2. +# TEST New master writes log records, newclient processes records +# TEST and 2nd newclient env handle calls log_flush. +# TEST New master commits, newclient processes and should succeed. +# TEST Make sure 2nd handle detects the old log handle and doesn't +# TEST write to a stale handle (if it does, the processing of the +# TEST commit will fail). +# +proc rep066 { method { niter 10 } { tnum "066" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Run for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + # This test requires a second handle on an env, and HP-UX + # doesn't support that. + if { $is_hp_test } { + puts "Skipping rep$tnum for HP-UX." + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping\ + for in-memory logs with -recover." + continue + } + puts "Rep$tnum ($method $r): Replication\ + and dead log handles $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep066_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep066_sub { method niter tnum logset recargs largs } { + global testdir + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + # Later we'll open a 2nd handle to this env. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $repmemargs $privargs \ + $m_logargs -errpfx ENV0 -log_max $log_max $verbargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set env0 [eval $ma_envcmd $recargs -rep_master] + set masterenv $env0 + + # Open a client. + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs \ + $repmemargs $privargs \ + $c_logargs -errpfx ENV1 -log_max $log_max $verbargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set env1 [eval $cl_envcmd $recargs -rep_client] + set clientenv $env1 + + # Bring the clients online by processing the startup messages. + set envlist "{$env0 1} {$env1 2}" + process_msgs $envlist + + # Run a modified test001 in the master (and update clients). + puts "\tRep$tnum.a.0: Running rep_test in replicated env." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + + set nstart $niter + set last_client_log [get_logfile $env1 last] + set stop 0 + while { $stop == 0 } { + puts "\tRep$tnum.b: Run test on master until log file changes." + eval rep_test\ + $method $masterenv NULL $niter $nstart $nstart 0 $largs + incr nstart $niter + replclear 2 + set last_master_log [get_logfile $masterenv last] + if { $last_master_log > $last_client_log } { + set stop 1 + } + } + + # Open a 2nd env handle on the master. + # We want to have some operations happen on the normal + # handle and then flush them with this handle. + puts "\tRep$tnum.c: Open 2nd master env and flush log." + set 2ndenv [eval $ma_envcmd -rep_master -errpfx 2NDENV] + error_check_good master_env [is_valid_env $2ndenv] TRUE + + + # Set up databases as in-memory or on-disk. + if { $databases_in_memory } { + set testfile { "" "test.db" } + } else { + set testfile "test.db" + } + + set omethod [convert_method $method] + set txn [$masterenv txn] + error_check_good txn [is_valid_txn $txn $masterenv] TRUE + set db [eval {berkdb_open_noerr -env $masterenv -errpfx MASTER \ + -txn $txn -create -mode 0644} $largs $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Flush on the 2nd handle + set lf [stat_field $2ndenv log_stat "Times log flushed to disk"] + error_check_good flush [$2ndenv log_flush] 0 + set lf2 [stat_field $2ndenv log_stat "Times log flushed to disk"] + error_check_bad log_flush $lf $lf2 + + # The detection of dead log handle is based on a 1-second resolution + # timestamp comparison. Now that we've established the threatening + # source of the dead handle in $2ndenv, wait a moment to make sure that + # the fresh handle that we're about to create gets a later timestamp. + tclsleep 1 + + # Resolve the txn and close the database + error_check_good commit [$txn commit] 0 + error_check_good close [$db close] 0 + + # Nuke those messages for client about to become master. + replclear 2 + + puts "\tRep$tnum.d: Swap envs" + set masterenv $env1 + set clientenv $env0 + error_check_good downgrade [$clientenv rep_start -client] 0 + error_check_good upgrade [$masterenv rep_start -master] 0 + set envlist "{$env0 1} {$env1 2}" + process_msgs $envlist + + # + # At this point, env0 should have rolled back across the log file. + # We need to do some operations on the master, process them on + # the client (but not a commit because that flushes). We want + # the message processing client env (env0) to put records in + # the log buffer and the 2nd env handle to flush the log. + # + puts "\tRep$tnum.e: Run test until create new log file." + # + # Set this to the last log file the old master had. + # + set last_client_log $last_master_log + set last_master_log [get_logfile $masterenv last] + set stop 0 + while { $stop == 0 } { + puts "\tRep$tnum.e: Run test on master until log file changes." + eval rep_test\ + $method $masterenv NULL $niter $nstart $nstart 0 $largs + process_msgs $envlist + incr nstart $niter + set last_master_log [get_logfile $masterenv last] + if { $last_master_log == $last_client_log } { + set stop 1 + } + } + puts "\tRep$tnum.f: Create some log records." + set txn [$masterenv txn] + error_check_good txn [is_valid_txn $txn $masterenv] TRUE + set db [eval {berkdb_open_noerr -env $masterenv -errpfx MASTER \ + -txn $txn -create -mode 0644} $largs $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + process_msgs $envlist + # Flush on the 2nd handle + puts "\tRep$tnum.g: Flush on 2nd env handle." + set lf [stat_field $2ndenv log_stat "Times log flushed to disk"] + error_check_good flush [$2ndenv log_flush] 0 + set lf2 [stat_field $2ndenv log_stat "Times log flushed to disk"] + error_check_bad log_flush2 $lf $lf2 + + # Resolve the txn and close the database + puts "\tRep$tnum.h: Process commit on client env handle." + error_check_good commit [$txn commit] 0 + error_check_good close [$db close] 0 + process_msgs $envlist + + error_check_good cl2_close [$2ndenv close] 0 + error_check_good env0_close [$env0 close] 0 + error_check_good env1_close [$env1 close] 0 + replclose $testdir/MSGQUEUEDIR + return +} + diff --git a/test/tcl/rep067.tcl b/test/tcl/rep067.tcl new file mode 100644 index 00000000..81f5e4f1 --- /dev/null +++ b/test/tcl/rep067.tcl @@ -0,0 +1,323 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST rep067 +# TEST Full election timeout test. +# TEST +# TEST Verify that elections use a separate "full election timeout" (if such +# TEST configuration is in use) instead of the normal timeout, when the +# TEST replication group is "cold-booted" (all sites starting with recovery). +# TEST + +proc rep067 { method args } { + source ./include.tcl + + set tnum "067" + + # Run for btree only. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + puts "Rep$tnum: Full election timeout test." + + # This test consists of three cases, two of which can be handled by + # script that is similar enough to be handled by a single proc + # (rep067a_sub), with a parameter to determine whether a client is + # down. The other case is different enough to warrant its own proc + # (rep067b_sub). + # + rep067a_sub $tnum yes + rep067a_sub $tnum no + rep067b_sub $tnum +} + +# Cold boot the group. Sites A and B come up just fine, but site C might not +# come up (depending on the client_down flag). Hold an election. (The amount +# of time it takes depends on whether site C is running.) Then, shut down site +# A, start site C if it isn't already running, and hold another election. +# +proc rep067a_sub { tnum client_down } { + source ./include.tcl + global rand_init + error_check_good set_random_seed [berkdb srand $rand_init] 0 + global repfiles_in_memory + global rep_verbose + global verbose_type + + # Set up some arbitrary timeout values for this test. The only + # constraint is that they should be large enough, and different enough, + # so as to allow for some amount of measurement imprecision introduced + # by the overhead of the test mechnism. Timeout values themselves + # expressed in microseconds, since they'll be passed to DB; leeway + # values in seconds, so that we can measure the result here in Tcl. + # + set elect_to 15000000 + set elect_secs_leeway 13 + set full_elect_to 30000000 + set full_secs_leeway 27 + + puts -nonewline "Rep$tnum.a: Full election test, " + if { $client_down } { + puts "with one client missing" + puts -nonewline "\tRep$tnum.b: First election" + puts " expected to take [expr $full_elect_to / 1000000] seconds" + } else { + puts "with all clients initially present" + puts "\tRep$tnum.b: First election expected to complete quickly" + } + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + # Configure all three clients. Use EID's starting at 2, because that's + # what run_election expects. + # + set nsites 3 + foreach i { 0 1 2 } eid { 2 3 4 } p { 20 50 100 } { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + + repladd $eid + set env_cmd($i) "berkdb_env_noerr -create \ + -event $repmemargs -home $clientdir($i) \ + -txn -rep_client $verbargs \ + -errpfx CLIENT.$i -rep_transport \[list $eid replsend\]" + + set errcmd($i) "none" + set crash($i) 0 + set pri($i) $p + } + set elect_timeout [list $elect_to $full_elect_to] + + # Start the clients, but perhaps not all of them. + # + set envlist {} + if { $client_down } { + set participants 2 + } else { + set participants 3 + } + for { set i 0 } { $i < $participants } { incr i } { + set clientenv($i) [eval $env_cmd($i)] + set eid [expr $i + 2] + lappend envlist "$clientenv($i) $eid" + } + + process_msgs $envlist + + # In this test, the expected winner is always the last one in the + # array. We made sure of that by arranging the priorities that way. + # This is convenient so that we can remove the winner (master) in the + # second phase, without leaving a hole in the arrays that the + # run_election proc wouldn't cope with. + # + set winner [expr $participants - 1] + set initiator 0 + set nvotes 2 + set reopen_flag 0 + run_election envlist errcmd pri crash \ + $qdir "Rep$tnum.c" $initiator $nsites $nvotes $participants \ + $winner $reopen_flag NULL 0 0 $elect_timeout + set duration [rep067_max_duration $envlist] + puts "\tRep$tnum.d: the election took about $duration seconds" + + if { $client_down } { + # Case #2. + # + # Without full participation on a cold boot, the election should + # take the full long timeout. In any case it should be way more + # than the "normal" timeout. + # + error_check_good duration1a \ + [expr $duration > $full_secs_leeway] 1 + } else { + # Case #1. + # + # With full participation, the election should complete "right + # away". At least it should be way less than the "normal" + # election timeout. + error_check_good duration1b \ + [expr $duration < $elect_secs_leeway] 1 + } + + process_msgs $envlist + + if { !$client_down } { + # Shut down the master and hold another election between the + # remaining two sites. + # + puts "\tRep$tnum.e: Shut down elected master, and run another election" + puts "\tRep$tnum.g: (expected to take [expr $elect_to / 1000000] seconds)" + $clientenv($winner) close + set envlist [lreplace $envlist $winner $winner] + + set winner 1 + set participants 2 + run_election envlist errcmd pri crash \ + $qdir "Rep$tnum.b" $initiator $nsites $nvotes \ + $participants $winner $reopen_flag NULL 0 0 $elect_timeout + set duration [rep067_max_duration $envlist] + + # We don't have full participation, so the election can only be + # won after a timeout. But these clients have seen a master, so + # we shouldn't have to wait for the full-election timeout. + # + puts "\tRep$tnum.g: the election took about $duration seconds" + error_check_good duration2 \ + [expr $duration > $elect_secs_leeway && \ + $duration < $full_secs_leeway] 1 + } + $clientenv(0) close + $clientenv(1) close + + replclose $testdir/MSGQUEUEDIR +} + +# Run an election where one of the clients has seen a master, but the other has +# not. Verify that the first client learns from the second that a master has +# been seen, and allows the election to complete after the normal timeout, +# rather than the full election timeout. +# +proc rep067b_sub { tnum } { + source ./include.tcl + global rand_init + global repfiles_in_memory + global rep_verbose + global verbose_type + + error_check_good set_random_seed [berkdb srand $rand_init] 0 + + set elect_to 10000000 + set elect_secs_leeway 10 + set full_elect_to 180000000 + set full_secs_leeway 100 + + puts "Rep$tnum.a: Mixed full election test" + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + # Start a master and one client. This first step is just setup, for the + # purpose of creating a client that has heard from a master. + # + file mkdir $testdir/MASTERDIR + set mcmd "berkdb_env_noerr -create \ + -event $repmemargs -home $testdir/MASTERDIR \ + -txn -rep_master $verbargs \ + -errpfx MASTER -rep_transport \[list 1 replsend\]" + file mkdir $testdir/CLIENTDIR + set ccmd "berkdb_env_noerr -create \ + -event $repmemargs -home $testdir/CLIENTDIR \ + -txn -rep_client $verbargs \ + -errpfx CLIENT.0 -rep_transport \[list 2 replsend\]" + + puts "\tRep$tnum.b: Start master and first client" + repladd 1 + set menv [eval $mcmd] + repladd 2 + set cenv [eval $ccmd] + process_msgs [list [list $menv 1] [list $cenv 2]] + + puts "\tRep$tnum.c: Shut down master; start other client" + $menv close + + # Now set up for the election test we're really interested in. We'll + # need $ccmd in array position 0 of env_cmd, for passing to + # run_election. Then, start the second client. We now have a mixture + # of clients: one who's seen a master, and the other who hasn't. + # + # The run_election proc assumes an offset of 2 between the array index + # and the EID. Thus EID 3 has to correspond to array index 1, etc. + # + set env_cmd(0) $ccmd + repladd 3 + file mkdir $testdir/CLIENTDIR2 + set env_cmd(1) "berkdb_env_noerr -create \ + -event $repmemargs -home $testdir/CLIENTDIR2 \ + -txn -rep_client $verbargs \ + -errpfx CLIENT.1 -rep_transport \[list 3 replsend\]" + set c2env [eval $env_cmd(1)] + + set envlist {} + foreach i { 0 1 } eid { 2 3 } p { 100 50 } e [list $cenv $c2env] { + set errcmd($i) "none" + set crash($i) 0 + set pri($i) $p + + lappend envlist [list $e $eid] + } + set elect_timeout [list $elect_to $full_elect_to] + + set nsites 3 + set participants 2 + process_msgs $envlist + + puts "\tRep$tnum.d: Election expected to take [expr $elect_to / 1000000] seconds" + set winner 0 + set initiator 0 + set nvotes 2 + set reopen_flag 0 + run_election envlist errcmd pri crash \ + $qdir "Rep$tnum.e" $initiator $nsites $nvotes $participants \ + $winner $reopen_flag NULL 0 0 $elect_timeout + set duration [rep067_max_duration $envlist] + puts "\tRep$tnum.f: the election took about $duration seconds" + + # We don't have full participation, so the election can only be won + # after a timeout. But even if only one client has seen a master, we + # shouldn't have to wait for the full-election timeout. + # + error_check_good duration3 \ + [expr $duration > $elect_secs_leeway && \ + $duration < $full_secs_leeway] 1 + + $cenv close + $c2env close + + replclose $testdir/MSGQUEUEDIR +} + +proc rep067_max_duration { envlist } { + set max 0.0 + foreach pair $envlist { + set env [lindex $pair 0] + set s [stat_field $env rep_stat "Election seconds"] + set u [stat_field $env rep_stat "Election usecs"] + set d [expr ( $u / 1000000.0 ) + $s ] + if { $d > $max } { + set max $d + } + } + return $max +} diff --git a/test/tcl/rep068.tcl b/test/tcl/rep068.tcl new file mode 100644 index 00000000..3e934eb0 --- /dev/null +++ b/test/tcl/rep068.tcl @@ -0,0 +1,222 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep068 +# TEST Verify replication of dbreg operations does not hang clients. +# TEST In a simple replication group, create a database with very +# TEST little data. With DB_TXN_NOSYNC the database can be created +# TEST at the client even though the log is not flushed. If we crash +# TEST and restart, the application of the log starts over again, even +# TEST though the database is still there. The application can open +# TEST the database before replication tries to re-apply the create. +# TEST This causes a hang as replication waits to be able to get a +# TEST handle lock. +# TEST +# TEST Run for btree only because access method shouldn't matter. +# TEST +proc rep068 { method { tnum "068" } args } { + + source ./include.tcl + global repfiles_in_memory + global env_private + + # Run for btree methods only. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_btree $method] } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: skipping for non-btree method." + return + } + + # This test requires a second handle on an env, and HP-UX + # doesn't support that. + if { $is_hp_test } { + puts "Skipping rep$tnum for HP-UX." + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with/without recovery and txn nosync. + foreach s {"nosync" ""} { + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { (($r == "-recover") || ($s == "nosync")) + && ($logindex != -1) } { + puts "Skipping test with -recover or\ + nosync for in-memory logs." + continue + } + # Temporary note: at the moment, this test + # fails when both "-recover" and + # "nosync" are in use, because of problems + # described in SR #15071. + if { ($r == "-recover") && ($s == "nosync") } { + puts "Skipping test with -recover or\ + nosync." + continue + } + puts "Rep$tnum ($method $r $s): Test of dbreg\ + lock conflicts at client $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep068_sub $method $tnum $l $r $s $args + } + } + } +} + +proc rep068_sub { method tnum logset recargs nosync largs } { + global testdir + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + set KEY "any old key" + set DATA "arbitrary data" + set DBNAME "test.db" + + set nosync_args [subst {-txn $nosync}] + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. Adjust the args for master + # and client + # There is no need to adjust txn args for this test since + # the txn args are explicitly set. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_logargs \ + $verbargs -errpfx MASTER $repmemargs $privargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs $nosync_args -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_logargs \ + $verbargs -errpfx CLIENT $repmemargs $privargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs $nosync_args -rep_client] + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Open/create a database, maybe put just one record in it + # abandon the client env, and restart it. Before trying to sync, + # open the database at the client. + + set db [berkdb_open_noerr -auto_commit \ + -btree -create -env $masterenv $DBNAME] + set ret [$db put $KEY $DATA] + error_check_good initial_insert $ret 0 + process_msgs $envlist + + # Simulate a crash and restart of the client, by simply abandoning + # the old environment handle and opening a new one. + # + puts "\tRep$tnum.a: Open a fresh handle onto the client env." + set origclientenv $clientenv + set clientenv [eval $cl_envcmd $recargs $nosync_args -rep_client] + set envlist "{$masterenv 1} {$clientenv 2}" + + # We expect the db creation operation to have been flushed to the log, + # so that at this point recovery will have removed the database (since + # we expect the transaction did not commit). But the bug we are testing + # for is that the applying of replicated transactions hangs if the + # database turns out to be present. Thus, for a stringent test, we want + # to at least try to open the database, and "dare ourselves" not to hang + # if it turns out to be present. + # + if {[catch {set client_db [berkdb_open_noerr \ + -auto_commit -unknown -env $clientenv $DBNAME]} result] == 0} { + puts "\t\tRep$tnum.a(ii): warning: db open at restarted client\ + succeeded unexpectedly" + } else { + set client_db "NULL" + } + + puts "\tRep$tnum.b: Attempting sync-up with db handle open." + process_msgs $envlist + puts "\tRep$tnum.c: Sync-up completed." + + # With rep files in-memory, the attempted sync-up does an abbreviated + # internal init to get the lsn history file. On all but the fastest + # runs, this increases rep_timestamp, resulting in DB_REP_HANDLE_DEAD + # on the get operation unless the database is closed and reopened. + if { $repfiles_in_memory && $client_db != "NULL"} { + $client_db close + set client_db "NULL" + } + + if {$client_db == "NULL"} { + set client_db [berkdb_open_noerr \ + -auto_commit -unknown -env $clientenv $DBNAME] + } + set result [$client_db get $KEY] + error_check_good one_pair [llength $result] 1 + set val [lindex $result 0 1] + error_check_good "value still matches" $val $DATA + puts "\tRep$tnum.d: Confirmed correct data." + + $client_db close + $clientenv close + catch { $origclientenv close } res + + $db close + $masterenv close + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep069.tcl b/test/tcl/rep069.tcl new file mode 100644 index 00000000..859e1a7e --- /dev/null +++ b/test/tcl/rep069.tcl @@ -0,0 +1,265 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep069 +# TEST Test of internal initialization and elections. +# TEST +# TEST If a client is in a recovery mode of any kind, it +# TEST participates in elections at priority 0 so it can +# TEST never be elected master. +# +proc rep069 { method { niter 200 } { tnum "069" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + set args [convert_args $method $args] + set saved_args $args + + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + foreach l $logsets { + set args $saved_args + puts "Rep$tnum ($method $args): Test internal\ + initialization and elections $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep069_sub $method $niter $tnum $l $args + } +} + +proc rep069_sub { method niter tnum logset largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + env_cleanup $testdir + + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + + set nclients 2 + for { set i 0 } { $i < $nclients } { incr i } { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + } + + # Log size is small so we quickly create more than one, and + # can easily force internal initialization. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + set envlist {} + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $repmemargs \ + $m_logargs -log_max $log_max -event $verbargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd -recover -rep_master] + lappend envlist "$masterenv 1" + + # Open clients. + for { set i 0 } { $i < $nclients } { incr i } { + set envid [expr $i + 2] + repladd $envid + set envcmd($i) "berkdb_env_noerr -create \ + $repmemargs \ + $c_txnargs $c_logargs -log_max $log_max \ + -home $clientdir($i) -event $verbargs \ + -rep_transport \[list $envid replsend\]" + set clientenv($i) [eval $envcmd($i) -recover -rep_client] + lappend envlist "$clientenv($i) $envid" + } + + # Bring the clients online by processing the startup messages. + process_msgs $envlist + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # db_archive in a moment. + # + $masterenv test force noarchive_timeout + + # Run rep_test in the master and update clients. + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + eval rep_test \ + $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist 0 NONE err + error_check_good process_msgs $err 0 + + # Create a gap requiring internal initialization. The + # proc push_master_ahead will close & discard messages for + # one of the clients. Take care of the other one here. + # + error_check_good client1_close [$clientenv(1) close] 0 + + set cid 2 + set flags "" + set dbhandle NULL + set start [push_master_ahead $method $masterenv $masterdir $m_logtype \ + $clientenv(0) $cid $dbhandle $start $niter $flags $largs] + + # Clear messages for second closed client. + replclear 3 + + # Adjust envlist for two closed clients. + set envlist [lreplace $envlist 1 2] + + # Reopen clients. + puts "\tRep$tnum.e: Reopen clients." + for { set i 0 } { $i < $nclients } { incr i } { + env_cleanup $clientdir($i) + set clientenv($i) [eval $envcmd($i) -recover -rep_client] + set envid [expr $i + 2] + lappend envlist "$clientenv($i) $envid" + } + + # Run proc_msgs_once until both clients are in internal + # initialization. + # + # We figure out whether each client is in initialization + # by searching for the state of SYNC_UPDATE. As soon as + # a client produces that state, it's marked as being + # in initialization, and stays that way. All clients + # will get to that state on the same iteration. + # + # We will always hit SYNC_UPDATE first, so we only need to + # check for that one (i.e. not SYNC_PAGE or SYNC_LOG). + # + puts "\tRep$tnum.f:\ + Run proc_msgs_once until all clients enter internal init." + set in_init 0 + for { set i 0 } { $i < $nclients } { incr i } { + set initializing($i) 0 + } + + while { $in_init != 1 } { + set nproced [proc_msgs_once $envlist NONE err] + for { set i 0 } { $i < $nclients } { incr i } { + set stat($i) \ + [exec $util_path/db_stat -N -r -R A -h $clientdir(1)] + if {[is_substr $stat($i) "SYNC_UPDATE"] } { + set initializing($i) 1 + } + } + set in_init 1 + for { set i 0 } { $i < $nclients } { incr i } { + if { $initializing($i) == 0 } { + set in_init 0 + } + } + } + + # Call an election. It should fail, because both clients + # are in internal initialization and therefore not electable. + # Indicate failure with winner = -2. + # First, close the master. + error_check_good masterenv_close [$masterenv close] 0 + set envlist [lreplace $envlist 0 0] + + puts "\tRep$tnum.g: Run election; no one will get elected." + set m "Rep$tnum.g" + set nsites $nclients + set nvotes $nclients + set winner -2 + set elector 0 + for { set i 0 } { $i < $nclients } { incr i } { + set err_cmd($i) "none" + set crash($i) 0 + set pri($i) 10 + } + + # This election will time out instead of succeeding. + set timeout_ok 1 + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + run_election envlist err_cmd pri crash \ + $qdir $m $elector $nsites $nvotes $nclients $winner \ + 0 $dbname 0 $timeout_ok + + # Verify that each client saw the message that no + # electable site was found. + puts "\tRep$tnum.h: Check for right error message." + for { set i 0 } { $i < $nclients } { incr i } { + set none_electable 0 + set id [expr $i + 1] + set fd [open $testdir/ELECTION_RESULT.$id r] + while { [gets $fd str] != -1 } { + if { [is_substr $str "Too few remote sites"] == 1 } { + set none_electable 1 + break + } + } + close $fd + error_check_good none_electable $none_electable 1 + } + + # Clean up for the next pass. + for { set i 0 } { $i < $nclients } { incr i } { + $clientenv($i) close + } + + replclose $testdir/MSGQUEUEDIR +} + diff --git a/test/tcl/rep070.tcl b/test/tcl/rep070.tcl new file mode 100644 index 00000000..7c137c76 --- /dev/null +++ b/test/tcl/rep070.tcl @@ -0,0 +1,196 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep070 +# TEST Test of startup_done condition with idle master. +# TEST +# TEST Join a client to an existing master, and verify that +# TEST the client detects startup_done even if the master +# TEST does not execute any new transactions. +# +proc rep070 { method { niter 200 } { tnum "070" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Run for btree and queue only. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_btree $method] == 1 ||\ + [is_queue $method] == 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_btree $method] != 1 && [is_queue $method] != 1 } { + puts "Skipping rep070 for method $method." + return + } + + # This test does not cover any new ground with in-memory + # databases. + if { $databases_in_memory } { + puts "Skipping Rep$tnum for in-memory databases." + return + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # Run the body of the test with and without recovery, + # and with and without cleaning. Skip recovery with in-memory + # logging - it doesn't make sense. + # + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $r $args): Test of\ + internal initialization and startup_done\ + $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep070_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep070_sub { method niter tnum logset recargs largs } { + source ./include.tcl + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $m_logargs $verbargs -errpfx MASTER $repmemargs \ + $privargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Put some data into the database + puts "\tRep$tnum.a: Run rep_test in master env." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + + # Open a client + puts "\tRep$tnum.b: Open client." + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs \ + $c_logargs $verbargs -errpfx CLIENT $repmemargs \ + $privargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + set envlist "{$masterenv 1} {$clientenv 2}" + rep070_verify_startup_done $clientenv $envlist + + # Close and re-open the client. What happens next depends on whether + # we used -recover. + # + $clientenv close + set clientenv [eval $cl_envcmd $recargs -rep_client] + set envlist "{$masterenv 1} {$clientenv 2}" + if { $recargs == "-recover" } { + rep070_verify_startup_done $clientenv $envlist + } else { + error_check_good \ + startup_still_done [rep070_startup_done $clientenv] 1 + } + + rep_verify $masterdir $masterenv $clientdir $clientenv 1 + + check_log_location $masterenv + check_log_location $clientenv + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} + +# Verify that startup_done starts off false, then turns to true at some point, +# and thereafter never reverts to false. +# +proc rep070_verify_startup_done { clientenv envlist } { + # Initially we should not yet have startup_done. + set got_startup_done [rep070_startup_done $clientenv] + error_check_good startup_not_done_yet $got_startup_done 0 + + # Bring the client online little by little. + # + while { [proc_msgs_once $envlist] > 0 } { + set done [rep070_startup_done $clientenv] + + # At some point, startup_done gets turned on. Make sure it + # never gets turned off after that. + # + if { $got_startup_done } { + # We've seen startup_done previously. + error_check_good no_rescind $done 1 + } else { + set got_startup_done $done + } + } + error_check_good startup_done $got_startup_done 1 +} + +proc rep070_startup_done { env } { + stat_field $env rep_stat "Startup complete" +} diff --git a/test/tcl/rep071.tcl b/test/tcl/rep071.tcl new file mode 100644 index 00000000..915b50e8 --- /dev/null +++ b/test/tcl/rep071.tcl @@ -0,0 +1,173 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep071 +# TEST Test of multiple simultaneous client env handles and +# TEST upgrading/downgrading. Tests use of temp db handle +# TEST internally. +# TEST +# TEST Open a master and 2 handles to the same client env. +# TEST Run rep_test. +# TEST Close master and upgrade client to master using one env handle. +# TEST Run rep_test again, and then downgrade back to client. +# +proc rep071 { method { niter 10 } { tnum "071" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Run for btree only. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + # We can't open two envs on HP-UX, so just skip the + # whole test since that is at the core of it. + if { $is_hp_test == 1 } { + puts "Rep$tnum: Skipping for HP-UX." + return + } + + # This test depends on copying logs, so can't be run with + # in-memory logging. + global mixed_mode_logging + if { $mixed_mode_logging > 0 } { + puts "Rep$tnum: Skipping for mixed-mode logging." + return + } + + set args [convert_args $method $args] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + puts "Rep$tnum ($method $r): Replication\ + backup and synchronizing $msg $msg2 $msg3." + rep071_sub $method $niter $tnum $r $args + } +} + +proc rep071_sub { method niter tnum recargs largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create -txn nosync $verbargs \ + -home $masterdir -errpfx MASTER $repmemargs $privargs \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create -txn nosync $verbargs \ + -home $clientdir -errpfx CLIENT $repmemargs $privargs \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good clenv [is_valid_env $clientenv] TRUE + # + # Open a 2nd client handle to the same client env. + # This handle needs to be a full client handle so just + # use the same env command for both. + # + set 2ndclientenv [eval $cl_envcmd -rep_client -errpfx 2ND] + error_check_good cl2env [is_valid_env $2ndclientenv] TRUE + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Run a modified test001 in the master (and update client). + puts "\tRep$tnum.a: Running rep_test in replicated env." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + + puts "\tRep$tnum.b: Downgrade master and upgrade client." + error_check_good master_close [$masterenv rep_start -client] 0 + error_check_good client_close [$clientenv rep_start -master] 0 + + puts "\tRep$tnum.b: Run rep_test." + eval rep_test $method $clientenv NULL $niter 0 0 0 $largs + process_msgs $envlist + + puts "\tRep$tnum.c: Downgrade back to client and upgrade master" + # + # The act of upgrading and downgrading an env, with another + # handle open had issues with open internal db handles. + # So, the existence of the 2nd client env handle is needed + # even though we're not doing anything active with that handle. + # + error_check_good client_close [$clientenv rep_start -client] 0 + error_check_good master_close [$masterenv rep_start -master] 0 + + puts "\tRep$tnum.d: Run rep_test in master." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + + rep_verify $masterdir $masterenv $clientdir $clientenv + + error_check_good master_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + error_check_good clientenv_close [$2ndclientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep072.tcl b/test/tcl/rep072.tcl new file mode 100644 index 00000000..1799a19d --- /dev/null +++ b/test/tcl/rep072.tcl @@ -0,0 +1,199 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep072 +# TEST Verify that internal init does not leak resources from +# TEST the locking subsystem. + +proc rep072 { method { niter 200 } { tnum "072" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Run for btree and queue methods only. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_btree $method] == 1 || \ + [is_queue $method] == 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_btree $method] == 0 && [is_queue $method] == 0 } { + puts "Rep$tnum: skipping for non-btree, non-queue method." + return + } + + set args [convert_args $method $args] + set limit 3 + set check true + + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + foreach l $logsets { + puts "Rep$tnum ($method): Confirm internal init does not\ + leak locks $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep072_sub $method $niter $tnum $l $limit $check $args + } +} + +proc rep072_sub {method {niter 200} {tnum 072} logset \ + {limit 3} {check true} largs} { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. Adjust the args for master + # and client. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $verbargs $repmemargs \ + $m_logargs $m_txnargs -log_max $log_max -errpfx MASTER \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd -rep_master] + $masterenv rep_limit 0 0 + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $verbargs $repmemargs \ + $c_logargs $c_txnargs -log_max $log_max -errpfx CLIENT \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd -rep_client] + $clientenv rep_limit 0 0 + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # $limit is the number of internal init cycles we want to try + for {set count 1} {$count <= $limit} {incr count} { + puts "\tRep$tnum.a: Try internal init cycle number $count" + + # Clobber replication's 30-second anti-archive timer, which will + # have been started by internal init, so that we can do a + # log_archive in a moment. + # + $masterenv test force noarchive_timeout + + # Run rep_test in the master. + puts "\tRep$tnum.b: Running rep_test in replicated env." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + + # We don't use the standard push_master_ahead proc + # here because it would close the client. Part of + # the design of this test is for the client to be + # open but desperately behind. + + puts "\tRep$tnum.c: Leave client alive, but isolated." + set last_client_log [get_logfile $clientenv last] + + set stop 0 + while { $stop == 0 } { + # Run rep_test in the master (don't update client). + puts "\tRep$tnum.d: Running rep_test in replicated env." + eval rep_test \ + $method $masterenv NULL $niter 0 0 0 $largs + # + # Clear messages for client. We want that site + # to get far behind. + # + replclear 2 + if { $m_logtype != "in-memory" } { + puts "\tRep$tnum.e: Run db_archive on master." + $masterenv log_flush + exec $util_path/db_archive -d -h $masterdir + } + set first_master_log [get_logfile $masterenv first] + if { $first_master_log > $last_client_log } { + set stop 1 + } + } + + # + # Run rep_test one more time, this time letting client see + # messages. This will induce client to ask master for missing + # log records, leading to internal init. + # + puts "\tRep$tnum.f: Running rep_test in replicated env." + set entries 10 + eval rep_test $method \ + $masterenv NULL $entries $niter 0 0 $largs + process_msgs $envlist + + set n_lockers [stat_field \ + $clientenv lock_stat "Current number of lockers"] + puts "\tRep$tnum.f: num lockers: $n_lockers" + if {$count == 1} { + set expected_lockers $n_lockers + } elseif {[string is true $check]} { + error_check_good leaking? $n_lockers $expected_lockers + } + } + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep073.tcl b/test/tcl/rep073.tcl new file mode 100644 index 00000000..73da6b6f --- /dev/null +++ b/test/tcl/rep073.tcl @@ -0,0 +1,201 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep073 +# TEST +# TEST Test of allowing clients to create and update their own scratch +# TEST databases within the environment. Doing so requires the use +# TEST use of the DB_TXN_NOT_DURABLE flag for those databases. +# +proc rep073 { method { niter 200 } { tnum "073" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. Skip recovery with in-memory + # logging - it doesn't make sense. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $r $args): Test of\ + non-durable databases and replication\ + $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep073_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep073_sub { method niter tnum logset recargs largs } { + source ./include.tcl + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + set omethod [convert_method $method] + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \ + -errpfx MASTER $repmemargs $privargs \ + -home $masterdir $verbargs -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $c_logargs \ + -errpfx CLIENT $repmemargs $privargs \ + -home $clientdir $verbargs -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Run rep_test in the master (and update client). + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + # Set up databases as in-memory or on-disk. + if { $databases_in_memory } { + set mtestfile { "" "master.db" } + set ctestfile { "" "client.db" } + } else { + set mtestfile "master.db" + set ctestfile "client.db" + } + + puts "\tRep$tnum.b: Open non-durable databases on master and client." + set mdb [eval berkdb_open -create -auto_commit \ + -btree -env $masterenv -notdurable $mtestfile] + set cdb [eval berkdb_open -create -auto_commit \ + -btree -env $clientenv -notdurable $ctestfile] + process_msgs $envlist + + # Verify that neither file exists on the other site. + # Look for the file if it's on-disk, and try to open a handle + # if it's in-memory. + if { $databases_in_memory } { + catch { eval berkdb_open -env $clientenv $mtestfile } ret + error_check_good mtestfile [is_substr $ret "no such file"] 1 + catch { eval berkdb_open -env $masterenv $ctestfile } ret + error_check_good ctestfile [is_substr $ret "no such file"] 1 + } else { + error_check_good master_not_on_client \ + [file exists $clientdir/$mtestfile] 0 + error_check_good client_not_on_master \ + [file exists $masterdir/$ctestfile] 0 + } + + # + # Now write to the master database, process messages and + # make sure nothing gets sent to the client. + # + puts "\tRep$tnum.c: Write to non-durable database on master." + eval rep_test $method $masterenv $mdb $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + if { $databases_in_memory } { + catch { eval berkdb_open -env $clientenv $mtestfile } ret + error_check_good mtestfile [is_substr $ret "no such file"] 1 + } else { + error_check_good master_not_on_client \ + [file exists $clientdir/$mtestfile] 0 + } + + # Make sure client can write to its own database. + puts "\tRep$tnum.d: Write to non-durable database on client." + eval rep_test $method $clientenv $cdb $niter $start $start 0 $largs + process_msgs $envlist + + error_check_good mdb_close [$mdb close] 0 + error_check_good cdb_close [$cdb close] 0 + + rep_verify $masterdir $masterenv $clientdir $clientenv 0 1 1 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep074.tcl b/test/tcl/rep074.tcl new file mode 100644 index 00000000..3bb652fa --- /dev/null +++ b/test/tcl/rep074.tcl @@ -0,0 +1,204 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep074 +# TEST Verify replication withstands send errors processing requests. +# TEST +# TEST Run for btree only because access method shouldn't matter. +# TEST +proc rep074 { method { niter 20 } { tnum "074" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Skip for all methods except btree. + if { $checking_valid_methods } { + return btree + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: skipping for non-btree method $method." + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + foreach l $logsets { + puts "Rep$tnum ($method): Test of send errors processing\ + requests $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep074_sub $method $niter $tnum $l $args + } +} + +proc rep074_sub { method niter tnum logset largs } { + global testdir + global rep074_failure_count + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set rep074_failure_count -1 + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. Adjust the args for master + # and client. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $verbargs -errpfx MASTER \ + -home $masterdir $m_logargs $m_txnargs $repmemargs $privargs \ + -rep_transport \[list 1 rep074_replsend\]" + set masterenv [eval $ma_envcmd -rep_master] + + # Create some new records, so that the master will have something + # substantial to say when asked for LOG_REQ. + # + puts "\tRep$tnum.a: Running rep_test in replicated env." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $verbargs -errpfx CLIENT \ + -home $clientdir $c_logargs $c_txnargs $repmemargs $privargs \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd -rep_client] + set envlist "{$masterenv 1} {$clientenv 2}" + + # Bring the client online by processing the startup messages. This will + # cause the client to send a request to the master. + # + # In the first cycle, the client gets NEWMASTER and sends an UPDATE_REQ. + # In the second cycle, the master answers the UPDATE_REQ with an UPDATE, + # and the client sends a PAGE_REQ. Third, once we've gotten pages, we + # send a LOG_REQ. + # + # 1. NEWCLIENT -> NEWMASTER -> UPDATE_REQ + # 2. UPDATE -> PAGE_REQ + # 3. PAGE -> LOG_REQ + # + puts "\tRep$tnum.b: NEWMASTER -> UPDATE_REQ" + proc_msgs_once $envlist + puts "\tRep$tnum.c: UPDATE -> PAGE_REQ" + proc_msgs_once $envlist + puts "\tRep$tnum.d: PAGE -> LOG_REQ" + proc_msgs_once $envlist + + # Force a sending error at the master while processing the LOG_REQ. + # We should ignore it, and return success to rep_process_message + # + puts "\tRep$tnum.e: Simulate a send error." + set rep074_failure_count [expr $niter / 2] + proc_msgs_once $envlist NONE errorp + + puts "\tRep$tnum.f: Check for good return from rep_process_msg." + error_check_good rep_resilient $errorp 0 + + # Since we interrupted the flow with the simulated error, we don't have + # the log records we need yet. + # + error_check_bad startupdone \ + [stat_field $clientenv rep_stat "Startup complete"] 1 + + # + # Run some more new txns at the master, so that the client eventually + # decides to request the remainder of the LOG_REQ response that it's + # missing. Pause for a second to make sure we reach the lower + # threshold for re-request on fast machines. We need to force a + # checkpoint because we need to create a gap, and then pause to + # reach the rerequest threshold. + # + set rep074_failure_count -1 + $masterenv txn_checkpoint -force + process_msgs $envlist + tclsleep 1 + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + + error_check_good startupdone \ + [stat_field $clientenv rep_stat "Startup complete"] 1 + + $masterenv close + $clientenv close + replclose $testdir/MSGQUEUEDIR +} + +# Failure count < 0 turns off any special failure simulation processing. +# When the count is > 0, it means we should process that many messages normally, +# before invoking a failure. +# +proc rep074_replsend { control rec fromid toid flags lsn } { + global rep074_failure_count + + if { $rep074_failure_count < 0 } { + return [replsend $control $rec $fromid $toid $flags $lsn] + } + + if { $rep074_failure_count > 0 } { + incr rep074_failure_count -1 + return [replsend $control $rec $fromid $toid $flags $lsn] + } + + # Return an arbitrary non-zero value to indicate an error. + return 1 +} diff --git a/test/tcl/rep075.tcl b/test/tcl/rep075.tcl new file mode 100644 index 00000000..0ed24781 --- /dev/null +++ b/test/tcl/rep075.tcl @@ -0,0 +1,558 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep075 +# TEST Replication and prepared transactions. +# TEST Test having outstanding prepared transactions and simulating +# TEST crashing or upgrading or downgrading sites. +# TEST +# +proc rep075 { method { tnum "075" } args } { + + source ./include.tcl + global databases_in_memory + global mixed_mode_logging + global repfiles_in_memory + global env_private + + # Run for all access methods. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep075: Skipping for method $method" + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # Swapping the envs is the only thing that should + # work for: + # HP or private env: can't open two handles on same env. + # in-memory logs: prepared txns don't survive recovery + # NIM databases: can't be recovered + # + if { $is_hp_test == 1 || $mixed_mode_logging > 0 || + $databases_in_memory == 1 || $env_private } { + set prep {swap} + } else { + set prep {dbrecover swap resolve recover envrecover} + } + set ops {commit abort both} + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + + } + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # Run the body of the test with and without recovery. + foreach l $logsets { + foreach p $prep { + foreach o $ops { + puts "Rep$tnum ($method $p $o): Replication\ + and prepared txns $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + puts "Rep$tnum: close DBs after prepare" + rep075_sub $method $tnum $l $p $o 1 $args + puts "Rep$tnum: close DBs before prepare" + rep075_sub $method $tnum $l $p $o 0 $args + } + } + } +} + +proc rep075_sub { method tnum logset prep op after largs } { + global testdir + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + global util_path + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_buf [expr $pagesize * 2] + set log_max [expr $log_buf * 4] + set m_logargs " -log_buffer $log_buf " + set c_logargs " -log_buffer $log_buf " + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $repmemargs $privargs \ + $m_logargs -errpfx ENV0 -log_max $log_max $verbargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set env0 [eval $ma_envcmd -rep_master] + set masterenv $env0 + error_check_good master_env [is_valid_env $env0] TRUE + + # Open a client. + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs \ + $repmemargs $privargs \ + $c_logargs -errpfx ENV1 -log_max $log_max $verbargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set env1 [eval $cl_envcmd -rep_client] + set clientenv $env1 + error_check_good client_env [is_valid_env $env1] TRUE + + repladd 3 + set cl2_envcmd "berkdb_env_noerr -create $c_txnargs \ + $repmemargs $privargs \ + $c_logargs -errpfx ENV2 -log_max $log_max $verbargs \ + -home $clientdir2 -rep_transport \[list 3 replsend\]" + set env2 [eval $cl2_envcmd -rep_client] + set clientenv2 $env2 + error_check_good client_env [is_valid_env $env2] TRUE + + set omethod [convert_method $method] + + # Bring the clients online by processing the startup messages. + set envlist "{$env0 1} {$env1 2} {$env2 3}" + process_msgs $envlist + + # + # Run rep_test in a database with a sub database, or in a + # named in-memory database. + # + if { $databases_in_memory } { + set testfile { "" "test1.db" } + set testfile2 { "" "test2.db" } + set db1 [eval {berkdb_open_noerr -env $masterenv -auto_commit \ + -create -mode 0644} $largs $omethod $testfile] + } else { + set testfile "test1.db" + set testfile2 "test2.db" + set sub "subdb" + set db1 [eval {berkdb_open_noerr -env $masterenv -auto_commit \ + -create -mode 0644} $largs $omethod $testfile $sub] + } + error_check_good dbopen [is_valid_db $db1] TRUE + + puts "\tRep$tnum.a: Running rep_test in replicated env." + set niter 1 + eval rep_test $method $masterenv $db1 $niter 0 0 0 $largs + process_msgs $envlist + + set db [eval {berkdb_open_noerr -env $masterenv -auto_commit \ + -create -mode 0644} $largs $omethod $testfile2] + error_check_good dbopen [is_valid_db $db] TRUE + + # + # Create and prepare 2 transactions: + # One txn is for the first database and one txn for the + # second database. We want to test that we can detect + # when the last restored txn has been resolved. And we + # want to test various files being open. + # + puts "\tRep$tnum.b: Prepare some txns." + set pbnyc 2 + set key key + set data some_data + set txn1 [$masterenv txn] + error_check_good txn [is_valid_txn $txn1 $masterenv] TRUE + error_check_good put [$db1 put -txn $txn1 $key $data] 0 + + set gid [make_gid rep075:$txn1] + error_check_good commit [$txn1 prepare $gid] 0 + + set txn2 [$masterenv txn] + error_check_good txn [is_valid_txn $txn2 $masterenv] TRUE + error_check_good put [$db put -txn $txn2 $key $data] 0 + + set gid [make_gid rep075:$txn2] + error_check_good commit [$txn2 prepare $gid] 0 + if { $after == 0 } { + $db1 close + $db close + } + process_msgs $envlist + + # + # Now we have txns on a master that are PBNYC (prepared but + # not yet committed). Alter the replication system now + # based on what we're testing this time through. + # + puts "\tRep$tnum.c: Reset replication ($prep)." + + if { $op == "commit" } { + set op1 commit + set op2 commit + } elseif { $op == "abort" } { + set op1 abort + set op2 abort + } else { + set i [berkdb random_int 0 1] + if { $i == 0 } { + set op1 commit + set op2 abort + } else { + set op1 abort + set op2 commit + } + } + set oplist [list $op1 $op2] + # + # If we are doing a swap, swap roles between master and client + # and then call txn recover. Master should then commit. + # This operation tests handling prepared txns in replication code. + # + # If we are doing a recover, each site stops using its old + # env handle and then opens a new one, with recovery. + # This operation tests handling prepared txns and then + # starting replication. + # + # If we are doing an envrecover, each site stops using its old + # env handle and then opens a new one, with recovery. + # Each site then opens a 2nd dbenv handle to run txn_recover + # and resolve each operation. + # This operation tests handling prepared txns and then + # starting replication. + # + # If we are doing a resolve, each site prepares the txns + # and then resolves the txns and then stops using the old + # env handle to cause a "crash". We then open a new one + # with recovery. This operation tests handling prepared + # txns and having them resolved. + # + if { $prep == "swap" } { + puts "\tRep$tnum.c.0: Swap roles master->client." + # + # A downgrading master must resolve the txns. So, commit + # them here, but don't send the messages to the client that + # is about to become master. + # + error_check_good commit [$txn1 commit] 0 + error_check_good commit [$txn2 commit] 0 + if { $after == 1 } { + $db1 close + $db close + } + replclear 2 + replclear 3 + set newclient $env0 + error_check_good downgrade [$newclient rep_start -client] 0 + set ctxnlist [$newclient txn_recover] + set newmaster $env1 + puts "\tRep$tnum.c.1: Swap roles client->master." + error_check_good upgrade [$newmaster rep_start -master] 0 + set txnlist [$newmaster txn_recover] + + puts "\tRep$tnum.c.2: Check status of prepared txn." + error_check_good txnlist_len [llength $txnlist] $pbnyc + error_check_good txnlist_len [llength $ctxnlist] 0 + + # + # Now commit that old prepared txn. + # + puts "\tRep$tnum.c.3: Resolve prepared txn ($op)." + rep075_resolve $txnlist $oplist + } elseif { $prep == "recover" } { + # + # To simulate a crash, simply stop using the old handles + # and reopen new ones, with recovery. First flush both + # the log and mpool to disk. + # + set origenv0 $env0 + set origenv1 $env1 + set origtxn1 $txn1 + set origtxn2 $txn2 + puts "\tRep$tnum.c.0: Sync and recover master environment." + error_check_good flush1 [$env0 log_flush] 0 + error_check_good sync1 [$env0 mpool_sync] 0 + if { $after == 1 } { + $db1 close + $db close + } + set env0 [eval $ma_envcmd -recover] + error_check_good master_env [is_valid_env $env0] TRUE + puts "\tRep$tnum.c.1: Run txn_recover on master env." + set txnlist [$env0 txn_recover] + error_check_good txnlist_len [llength $txnlist] $pbnyc + puts "\tRep$tnum.c.2: Resolve txn ($op) on master env." + rep075_resolve $txnlist $oplist + + puts "\tRep$tnum.c.3: Sync and recover client environment." + error_check_good flush1 [$env1 log_flush] 0 + error_check_good sync1 [$env1 mpool_sync] 0 + set env1 [eval $cl_envcmd -recover] + error_check_good client_env [is_valid_env $env1] TRUE + puts "\tRep$tnum.c.4: Run txn_recover on client env." + set txnlist [$env1 txn_recover] + error_check_good txnlist_len [llength $txnlist] $pbnyc + + puts "\tRep$tnum.c.5: Resolve txn ($op) on client env." + rep075_resolve $txnlist $oplist + + puts "\tRep$tnum.c.6: Restart replication on both envs." + error_check_good master [$env0 rep_start -master] 0 + error_check_good client [$env1 rep_start -client] 0 + set newmaster $env0 + set envlist "{$env0 1} {$env1 2} {$env2 3}" + # + # Clean up old Tcl handles. + # + catch {$origenv0 close} res + catch {$origenv1 close} res + catch {$origtxn1 close} res + catch {$origtxn2 close} res + } elseif { $prep == "resolve" } { + # + # Check having prepared txns in the log, but they are + # also resolved before we "crash". + # To simulate a crash, simply stop using the old handles + # and reopen new ones, with recovery. First flush both + # the log and mpool to disk. + # + set origenv0 $env0 + set origenv1 $env1 + set origdb1 $db1 + set origdb $db + puts "\tRep$tnum.c.0: Resolve ($op1 $op2) and recover master." + error_check_good resolve1 [$txn1 $op1] 0 + error_check_good resolve2 [$txn2 $op2] 0 + error_check_good flush0 [$env0 log_flush] 0 + error_check_good sync0 [$env0 mpool_sync] 0 + process_msgs $envlist + set env0 [eval $ma_envcmd -recover] + error_check_good master_env [is_valid_env $env0] TRUE + puts "\tRep$tnum.c.1: Run txn_recover on master env." + set txnlist [$env0 txn_recover] + error_check_good txnlist_len [llength $txnlist] 0 + + puts "\tRep$tnum.c.2: Sync and recover client environment." + error_check_good flush1 [$env1 log_flush] 0 + error_check_good sync1 [$env1 mpool_sync] 0 + set env1 [eval $cl_envcmd -recover] + error_check_good client_env [is_valid_env $env1] TRUE + puts "\tRep$tnum.c.3: Run txn_recover on client env." + set txnlist [$env1 txn_recover] + error_check_good txnlist_len [llength $txnlist] 0 + + puts "\tRep$tnum.c.4: Restart replication on both envs." + error_check_good master [$env0 rep_start -master] 0 + error_check_good client [$env1 rep_start -client] 0 + set newmaster $env0 + set envlist "{$env0 1} {$env1 2} {$env2 3}" + catch {$origenv0 close} res + catch {$origenv1 close} res + catch {$origdb close} res + catch {$origdb1 close} res + } elseif { $prep == "envrecover" || $prep == "dbrecover" } { + # + # To simulate a crash, simply stop using the old handles + # and reopen new ones, with recovery. First flush both + # the log and mpool to disk. + # + set origenv0 $env0 + set origenv1 $env1 + set origtxn1 $txn1 + set origtxn2 $txn2 + puts "\tRep$tnum.c.0: Sync and recover master environment." + error_check_good flush1 [$env0 log_flush] 0 + error_check_good sync1 [$env0 mpool_sync] 0 + set oldgen [stat_field $env0 rep_stat "Generation number"] + error_check_good flush1 [$env1 log_flush] 0 + error_check_good sync1 [$env1 mpool_sync] 0 + if { $after == 1 } { + $db1 close + $db close + } + if { $prep == "dbrecover" } { + set recargs "-h $masterdir -c " + set stat [catch {eval exec $util_path/db_recover \ + -e $recargs} result] + if { $stat == 1 } { + error "FAIL: Recovery error: $result." + } + set recargs "-h $clientdir -c " + set stat [catch {eval exec $util_path/db_recover \ + -e $recargs} result] + if { $stat == 1 } { + error "FAIL: Recovery error: $result." + } + } + # + # !!! + # We still need to open with recovery, even if 'dbrecover' + # because db_recover cannot open the env with replication + # enabled. But db_recover will be the real recovery that + # needs to deal with the prepared txn. This recovery below + # for db_recover, should be a no-op essentially. + # + set recenv0 [eval $ma_envcmd -recover] + error_check_good master_env [is_valid_env $recenv0] TRUE + puts "\tRep$tnum.c.1: Run txn_recover on master env." + set env0 [eval $ma_envcmd] + error_check_good master_env [is_valid_env $env0] TRUE + set txnlist [$env0 txn_recover] + error_check_good txnlist_len [llength $txnlist] $pbnyc + puts "\tRep$tnum.c.2: Resolve txn ($op) on master env." + rep075_resolve $txnlist $oplist + error_check_good recenv0_close [$recenv0 close] 0 + + puts "\tRep$tnum.c.3: Recover client environment." + set recenv1 [eval $cl_envcmd -recover -errpfx "ENV1REC"] + error_check_good client_env [is_valid_env $recenv1] TRUE + puts "\tRep$tnum.c.4: Run txn_recover on client env." + set env1 [eval $cl_envcmd -errpfx "ENV1NEW"] + error_check_good client_env [is_valid_env $env1] TRUE + set txnlist [$env1 txn_recover] + error_check_good txnlist_len [llength $txnlist] $pbnyc + + puts "\tRep$tnum.c.5: Resolve txns ($oplist) on client env." + rep075_resolve $txnlist $oplist + error_check_good recenv1_close [$recenv1 close] 0 + + puts "\tRep$tnum.c.6: Restart replication on both envs." + if { $prep == "dbrecover" } { + # + # XXX Since we ran db_recover, we lost the rep gen + # and clientenv2 cannot detect the change. Until + # SR 15396 is fixed, we'll fake it by becoming + # master, downgrading and then upgrading again to + # advance the generation number. + # + error_check_good master [$env0 rep_start -master] 0 + error_check_good master [$env0 rep_start -client] 0 + replclear 2 + replclear 3 + } + error_check_good master [$env0 rep_start -master] 0 + set gen [stat_field $env0 rep_stat "Generation number"] + # + # If in-memory rep, restarting environment puts gen back + # to 1, the same as oldgen. envrecover doesn't do the extra + # rep_start, so gen is expected to stay at 1 in this case. + # + if { $repfiles_in_memory != 0 && $prep == "envrecover" } { + error_check_good gen $gen $oldgen + } else { + error_check_bad gen $gen $oldgen + } + error_check_good client [$env1 rep_start -client] 0 + set newmaster $env0 + set envlist "{$env0 1} {$env1 2} {$env2 3}" + process_msgs $envlist + # + # Clean up old Tcl handles. + # + catch {$origenv0 close} res + catch {$origenv1 close} res + catch {$origtxn1 close} res + catch {$origtxn2 close} res + } + # + # Run a standard rep_test creating test.db now. + # + eval rep_test $method $newmaster NULL $niter 0 0 0 $largs + process_msgs $envlist + + # + # Verify whether or not the key exists in the databases both + # on the client and the master. + # + puts "\tRep$tnum.d: Verify prepared data." + foreach e $envlist { + set env [lindex $e 0] + if { $databases_in_memory } { + set db1 [eval {berkdb_open_noerr -env $env\ + -auto_commit -create -mode 0644} $largs\ + $omethod $testfile] + } else { + set db1 [eval {berkdb_open_noerr -env $env\ + -auto_commit -create -mode 0644} $largs\ + $omethod $testfile $sub] + } + error_check_good dbopen [is_valid_db $db1] TRUE + set db2 [eval {berkdb_open_noerr -env $env -auto_commit \ + -create -mode 0644} $largs $omethod $testfile2] + error_check_good dbopen [is_valid_db $db2] TRUE + set k1 [$db1 get $key] + set k2 [$db2 get $key] + if { $op1 == "commit" } { + error_check_good key [llength $k1] 1 + } else { + error_check_good key [llength $k1] 0 + } + if { $op2 == "commit" } { + error_check_good key [llength $k2] 1 + } else { + error_check_good key [llength $k2] 0 + } + + error_check_good db_close [$db1 close] 0 + error_check_good db_close [$db2 close] 0 + } + error_check_good env0_close [$env0 close] 0 + error_check_good env1_close [$env1 close] 0 + error_check_good env2_close [$env2 close] 0 + + replclose $testdir/MSGQUEUEDIR + return +} + +proc rep075_resolve { txnlist ops } { + error_check_good resolve_lists [llength $txnlist] [llength $ops] + foreach trec $txnlist op $ops { + set txn [lindex $trec 0] + error_check_good commit [$txn $op] 0 + } +} diff --git a/test/tcl/rep076.tcl b/test/tcl/rep076.tcl new file mode 100644 index 00000000..22d55b7d --- /dev/null +++ b/test/tcl/rep076.tcl @@ -0,0 +1,199 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep076 +# TEST Replication elections - what happens if elected client +# TEST does not become master? +# TEST +# TEST Set up a master and 3 clients. Take down master, run election. +# TEST The elected client will ignore the fact that it's been elected, +# TEST so we still have 2 clients. +# TEST +# TEST Run another election, a regular election that allows the winner +# TEST to become master, and make sure it goes okay. We do this both +# TEST for the client that ignored its election and for the other client. +# TEST +# TEST This simulates what would happen if, say, we had a temporary +# TEST network partition and lost the winner. +# +proc rep076 { method args } { + source ./include.tcl + + global mixed_mode_logging + global databases_in_memory + global repfiles_in_memory + + set tnum "076" + + # Run for btree only. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set nclients 3 + set logsets [create_logsets [expr $nclients + 1]] + set winsets { { 1 1 } { 1 2 } } + foreach l $logsets { + foreach w $winsets { + puts "Rep$tnum ($method): Replication elections -\ + elected client ignores election $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + for { set i 0 } { $i < $nclients } { incr i } { + puts "Rep$tnum: Client $i logs are\ + [lindex $l [expr $i + 1]]" + } + rep076_sub $method $nclients $tnum $l $w $args + } + } +} + +proc rep076_sub { method nclients tnum logset winset largs } { + source ./include.tcl + global machids + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + set m_logtype [lindex $logset 0] + set m_logargs [adjust_logargs $m_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + + for { set i 0 } { $i < $nclients } { incr i } { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + set c_logtype($i) [lindex $logset [expr $i + 1]] + set c_logargs($i) [adjust_logargs $c_logtype($i)] + set c_txnargs($i) [adjust_txnargs $c_logtype($i)] + } + + # Open a master. + set envlist {} + repladd 1 + set env_cmd(M) "berkdb_env -create -log_max 1000000 $verbargs \ + -event $repmemargs \ + -home $masterdir $m_txnargs $m_logargs -rep_master \ + -errpfx MASTER -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M)] + lappend envlist "$masterenv 1" + + # Open the clients. + for { set i 0 } { $i < $nclients } { incr i } { + set envid [expr $i + 2] + repladd $envid + set env_cmd($i) "berkdb_env_noerr -create $verbargs \ + -event $repmemargs \ + -home $clientdir($i) $c_txnargs($i) $c_logargs($i) \ + -rep_client -rep_transport \[list $envid replsend\]" + set clientenv($i) [eval $env_cmd($i)] + error_check_good \ + client_env($i) [is_valid_env $clientenv($i)] TRUE + lappend envlist "$clientenv($i) $envid" + } + + # Bring the clients online by processing the startup messages. + process_msgs $envlist + + # Run a modified test001 in the master. + puts "\tRep$tnum.a: Running rep_test in replicated env." + set niter 10 + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + process_msgs $envlist + + # Close master. + error_check_good masterenv_close [$masterenv close] 0 + set envlist [lreplace $envlist 0 0] + + # Winner1 is set up to win the first election, winner2 + # the second. + set m "Rep$tnum.b" + set winner1 [lindex $winset 0] + set winner2 [lindex $winset 1] + set elector 1 + set nsites $nclients + set nvotes $nclients + setpriority pri $nclients $winner1 + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + + foreach pair $envlist { + set i [expr [lindex $pair 1] - 2] + replclear [expr $i + 2] + set err_cmd($i) "none" + set crash($i) 0 + if { $rep_verbose == 1 } { + $clientenv($i) errpfx CLIENT$i + $clientenv($i) verbose $verbose_type on + $clientenv($i) errfile /dev/stderr + set env_cmd($i) [concat $env_cmd($i) \ + "-errpfx CLIENT$i -errfile /dev/stderr"] + } + } + + + # Run election where winner will ignore its election and + # not be made master. + puts "\tRep$tnum: First winner ignores its election." + run_election envlist err_cmd pri crash $qdir $m\ + $elector $nsites $nvotes $nclients $winner1 0 $dbname 1 + + # Run second election where winner accepts its election and + # is made master. + puts "\tRep$tnum: Second winner accepts its election." + setpriority pri $nclients $winner2 + run_election envlist err_cmd pri crash $qdir $m\ + $elector $nsites $nvotes $nclients $winner2 0 $dbname + + # Clean up. + foreach pair $envlist { + set cenv [lindex $pair 0] + error_check_good cenv_close [$cenv close] 0 + } + + replclose $testdir/MSGQUEUEDIR +} + diff --git a/test/tcl/rep077.tcl b/test/tcl/rep077.tcl new file mode 100644 index 00000000..35534b8a --- /dev/null +++ b/test/tcl/rep077.tcl @@ -0,0 +1,165 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep077 +# TEST +# TEST Replication, recovery and applying log records immediately. +# TEST Master and 1 client. Start up both sites. +# TEST Close client and run rep_test on the master so that the +# TEST log record is the same LSN the client would be expecting. +# TEST Reopen client with recovery and verify the client does not +# TEST try to apply that "expected" record before it synchronizes +# TEST with the master. +# +proc rep077 { method { tnum "077"} args} { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + # Valid for all access methods. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + foreach l $logsets { + puts "Rep$tnum ($method): Recovered client\ + getting immediate log records $msg $msg2 $msg3." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep077_sub $method $tnum $l $args + } +} + +proc rep077_sub { method tnum logset largs} { + global testdir + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + set niter 5 + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set env_cmd(M) "berkdb_env_noerr -create \ + $verbargs $repmemargs $privargs \ + -home $masterdir -errpfx MASTER -txn nosync -rep_master \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $env_cmd(M)] + + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + + # Open a client + repladd 2 + set env_cmd(C) "berkdb_env_noerr -create \ + $verbargs $repmemargs $privargs \ + -home $clientdir -errpfx CLIENT -txn nosync -rep_client \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $env_cmd(C)] + + puts "\tRep$tnum.a: Start up master and client." + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + puts "\tRep$tnum.b: Close client." + $clientenv close + + # + # We want to run rep_test now and DO NOT replclear the + # messages for the closed client. We want to make sure + # that the first message the client sees upon restarting + # is a log record that exactly matches the current + # expected LSN. + # + puts "\tRep$tnum.c: Run rep_test on master with client closed." + # + # Move it forward by sending in niter as start and skip. + # + eval rep_test $method $masterenv NULL $niter $niter $niter 0 $largs + + # We need to reopen with recovery to blow away our idea of + # who the master is, because this client will start up with + # the right generation number and the ready_lsn will be + # set to the right value for the first log record to apply. + # + # However, this client is running recovery and will have + # written its own recovery log records. So, until this + # client finds and synchronizes with the master after + # restarting, its ready_lsn and lp->lsn will not be + # in sync and this client better not try to apply the records. + # + puts "\tRep$tnum.d: Restart client with recovery and process messages." + set clientenv [eval $env_cmd(C) -recover] + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # + # If we didn't crash at this point, we're okay. + # + $masterenv close + $clientenv close + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep078.tcl b/test/tcl/rep078.tcl new file mode 100644 index 00000000..55def654 --- /dev/null +++ b/test/tcl/rep078.tcl @@ -0,0 +1,361 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep078 +# TEST +# TEST Replication and basic lease test. +# TEST Set leases on master and 2 clients. +# TEST Do a lease operation and process to all clients. +# TEST Read with lease on master. Do another lease operation +# TEST and don't process on any client. Try to read with +# TEST on the master and verify it fails. Process the messages +# TEST to the clients and retry the read. +# +proc rep078 { method { tnum "078" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Valid for all access methods. Other lease tests limit the + # test because there is nothing method-specific being tested. + # Use all methods for this basic test. + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + set logsets [create_logsets 3] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery, + # and with and without cleaning. Skip recovery with in-memory + # logging - it doesn't make sense. + # + # Also skip the case where the master is in-memory and at least + # one of the clients is on-disk. If the master is in-memory, + # the wrong site gets elected because on-disk envs write a log + # record when they create the env and in-memory ones do not + # and the test wants to control which env gets elected. + # + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + set master_logs [lindex $l 0] + if { $master_logs == "in-memory" } { + set client_logs [lsearch -exact $l "on-disk"] + if { $client_logs != -1 } { + puts "Skipping for in-memory master\ + and on-disk client." + continue + } + } + + puts "Rep$tnum ($method $r): Replication\ + and basic master leases $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client 1 logs are [lindex $l 1]" + puts "Rep$tnum: Client 2 logs are [lindex $l 2]" + rep078_sub $method $tnum $l $r $args + } + } +} + +proc rep078_sub { method tnum logset recargs largs } { + source ./include.tcl + global testdir + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set c2_logtype [lindex $logset 2] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set c2_logargs [adjust_logargs $c2_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + + # Set leases for 3 sites, 3 second timeout, 0% clock skew + set nsites 3 + set lease_to 3000000 + set lease_tosec [expr $lease_to / 1000000] + set clock_fast 0 + set clock_slow 0 + set testfile test.db + # + # Since we have to use elections, the election code + # assumes a 2-off site id scheme. + # Open a master. + repladd 2 + set err_cmd(0) "none" + set crash(0) 0 + set pri(0) 100 + # + # Note that using the default clock skew should be the same + # as specifying "no skew" through the API. We want to + # test both API usages here. + # + set envcmd(0) "berkdb_env -create $m_txnargs $m_logargs \ + $verbargs -errpfx MASTER -home $masterdir \ + -rep_nsites $nsites -rep_lease \[list $lease_to\] \ + -event $repmemargs \ + -rep_client -rep_transport \[list 2 replsend\]" + set masterenv [eval $envcmd(0) $recargs] + error_check_good master_env [is_valid_env $masterenv] TRUE + + # Open two clients. + repladd 3 + set err_cmd(1) "none" + set crash(1) 0 + set pri(1) 10 + set envcmd(1) "berkdb_env -create $c_txnargs $c_logargs \ + $verbargs -errpfx CLIENT -home $clientdir -rep_nsites $nsites \ + -rep_lease \[list $lease_to $clock_fast $clock_slow\] \ + -event $repmemargs \ + -rep_client -rep_transport \[list 3 replsend\]" + set clientenv [eval $envcmd(1) $recargs] + error_check_good client_env [is_valid_env $clientenv] TRUE + + # + # Make this site priority 0, unelectable. + # + repladd 4 + set err_cmd(2) "none" + set crash(2) 0 + set pri(2) 0 + set envcmd(2) "berkdb_env -create $c2_txnargs $c2_logargs \ + $verbargs -errpfx CLIENT2 -home $clientdir2 -rep_nsites $nsites \ + -rep_lease \[list $lease_to\] \ + -event $repmemargs \ + -rep_client -rep_transport \[list 4 replsend\]" + set clientenv2 [eval $envcmd(2) $recargs] + error_check_good client_env [is_valid_env $clientenv2] TRUE + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 2} {$clientenv 3} {$clientenv2 4}" + set noelect_envlist "{$masterenv 2} {$clientenv2 4}" + process_msgs $envlist + + # + # Run election to get a master. Leases prevent us from + # simply assigning a master. + # + set msg "Rep$tnum.a" + puts "\tRep$tnum.a: Run initial election." + set nvotes $nsites + set winner 0 + set elector [berkdb random_int 0 2] + # + # Note we send in a 0 for nsites because we set nsites back + # when we started running with leases. Master leases require + # that nsites be set before calling rep_start, and master leases + # require that the nsites arg to rep_elect be 0. + # + run_election envlist err_cmd pri crash $qdir $msg \ + $elector 0 $nvotes $nsites $winner 0 NULL + + puts "\tRep$tnum.b: Spawn a child tclsh to do txn work." + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep078script.tcl $testdir/rep078script.log \ + $masterdir $testfile $method &] + + # Let child run, create database and put a txn into it. + # Process messages while we wait for the child to complete + # its txn so that the clients can grant leases. + puts "\tRep$tnum.c: Wait for child to write txn." + while { [file exists $testdir/marker.db] == 0 } { + tclsleep 1 + } + set markerenv [berkdb_env -home $testdir -txn] + error_check_good markerenv_open \ + [is_valid_env $markerenv] TRUE + set marker [berkdb_open -unknown -env $markerenv \ + -auto_commit marker.db] + set kd [$marker get CHILD1] + while { [llength $kd] == 0 } { + process_msgs $envlist + tclsleep 1 + set kd [$marker get CHILD1] + } + process_msgs $envlist + # + # Child sends us the key it used as the data + # of the CHILD1 key. + # + set key [lindex [lindex $kd 0] 1] + + set masterdb [eval \ + {berkdb_open_noerr -env $masterenv -rdonly $testfile}] + error_check_good dbopen [is_valid_db $masterdb] TRUE + + process_msgs $envlist + set omethod [convert_method $method] + set clientdb [eval {berkdb_open_noerr \ + -env $clientenv $omethod -rdonly $testfile}] + error_check_good dbopen [is_valid_db $clientdb] TRUE + + set uselease "" + set ignorelease "-nolease" + puts "\tRep$tnum.d.0: Read with leases." + check_leaseget $masterdb $key $uselease 0 + check_leaseget $clientdb $key $uselease 0 + puts "\tRep$tnum.d.1: Read ignoring leases." + check_leaseget $masterdb $key $ignorelease 0 + check_leaseget $clientdb $key $ignorelease 0 + # + # This should fail because the lease is expired and all + # attempts by master to refresh it will not be processed. + # + set sleep [expr $lease_tosec + 1] + puts "\tRep$tnum.e.0: Sleep $sleep secs to expire leases and read again." + tclsleep $sleep + # + # Verify the master gets REP_LEASE_EXPIRED. Verify that the + # read on the client ignores leases and succeeds. + # + check_leaseget $masterdb $key $uselease REP_LEASE_EXPIRED + check_leaseget $clientdb $key $uselease 0 + puts "\tRep$tnum.e.1: Read ignoring leases." + check_leaseget $masterdb $key $ignorelease 0 + check_leaseget $clientdb $key $ignorelease 0 + + error_check_good timestamp_done \ + [$marker put PARENT1 [timestamp -r]] 0 + + set kd [$marker get CHILD2] + while { [llength $kd] == 0 } { + process_msgs $envlist + tclsleep 1 + set kd [$marker get CHILD2] + } + process_msgs $envlist + # + # Child sends us the key it used as the data + # of the CHILD2 key. + # + set key [lindex [lindex $kd 0] 1] + + puts "\tRep$tnum.f: Child writes txn + ckp. Don't process msgs." + # + # Child has committed the txn and we have processed it. Now + # signal the child process to put a checkpoint, which we + # will not process. That will invalidate leases. + error_check_good timestamp_done \ + [$marker put PARENT2 [timestamp -r]] 0 + + set kd [$marker get CHILD3] + while { [llength $kd] == 0 } { + tclsleep 1 + set kd [$marker get CHILD3] + } + + puts "\tRep$tnum.f.0: Read using leases fails." + check_leaseget $masterdb $key $uselease REP_LEASE_EXPIRED + puts "\tRep$tnum.f.1: Read ignoring leases." + check_leaseget $masterdb $key $ignorelease 0 + puts "\tRep$tnum.g: Process messages to clients." + process_msgs $envlist + puts "\tRep$tnum.h: Verify read with leases now succeeds." + check_leaseget $masterdb $key $uselease 0 + + # + # Now check that if the electable site is down, and only the + # non electable site exists, we do not get valid leases because + # the data is not safe. + # + puts "\tRep$tnum.i: Leases and non-electable sites only." + error_check_good timestamp_done \ + [$marker put PARENT3 [timestamp -r]] 0 + + set kd [$marker get CHILD4] + while { [llength $kd] == 0 } { + tclsleep 1 + set kd [$marker get CHILD4] + } + # + # Only process messages to non-electable site. + # + process_msgs $noelect_envlist + puts "\tRep$tnum.i.0: Verify read using leases fails." + check_leaseget $masterdb $key $uselease REP_LEASE_EXPIRED + + watch_procs $pid 5 + + process_msgs $envlist + puts "\tRep$tnum.j: Downgrade master." + $masterenv rep_start -client + process_msgs $envlist + + rep_verify $masterdir $masterenv $clientdir $clientenv + process_msgs $envlist + rep_verify $masterdir $masterenv $clientdir2 $clientenv2 0 1 0 + + # Clean up. + error_check_good marker_db_close [$marker close] 0 + error_check_good marker_env_close [$markerenv close] 0 + error_check_good masterdb_close [$masterdb close] 0 + error_check_good masterdb_close [$clientdb close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + error_check_good clientenv_close [$clientenv2 close] 0 + + replclose $testdir/MSGQUEUEDIR + + # Check log file for failures. + set errstrings [eval findfail $testdir/rep078script.log] + foreach str $errstrings { + puts "FAIL: error message in rep078 log file: $str" + } +} + diff --git a/test/tcl/rep078script.tcl b/test/tcl/rep078script.tcl new file mode 100644 index 00000000..796b0b6b --- /dev/null +++ b/test/tcl/rep078script.tcl @@ -0,0 +1,134 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Rep078 script - Master leases. +# +# Test master leases and write operations. +# +# Usage: rep078script masterdir dbfile method +# masterdir: master env directory +# dbfile: name of database file +# method: access method +# +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl +source $test_path/reputils.tcl + +set usage "repscript masterdir dbfile method" + +# Verify usage +if { $argc != 3 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set masterdir [ lindex $argv 0 ] +set dbfile [ lindex $argv 1 ] +set method [ lindex $argv 2 ] + +# Join the queue env. We assume the rep test convention of +# placing the messages in $testdir/MSGQUEUEDIR. +set queueenv [eval berkdb_env -home $testdir/MSGQUEUEDIR] +error_check_good script_qenv_open [is_valid_env $queueenv] TRUE + +# +# We need to set up our own machids. +# Add 2 for master env id, and 3 and 4 for the clientenv ids. +# +repladd 2 +repladd 3 +repladd 4 + +# Join the master env. +set ma_cmd "berkdb_env_noerr -home $masterdir \ + -txn -rep_transport \[list 2 replsend\]" +# set ma_cmd "berkdb_env_noerr -home $masterdir \ +# -verbose {rep on} -errfile /dev/stderr \ +# -txn -rep_transport \[list 2 replsend\]" +puts "Joining master env" +set masterenv [eval $ma_cmd] +error_check_good script_menv_open [is_valid_env $masterenv] TRUE + +# Create a marker file. Don't put anything in it yet. The parent +# process will be processing messages while it looks for our +# marker. + + +puts "Create marker file" +set markerenv [berkdb_env -create -home $testdir -txn] +error_check_good markerenv_open [is_valid_env $markerenv] TRUE +set marker \ + [eval "berkdb_open -create -btree -auto_commit -env $markerenv marker.db"] + +# +# Create the database and then do a lease operation. Don't +# process messages in the child process. +# +puts "Open database" +set args [convert_args $method] +puts "args is $args" +set omethod [convert_method $method] +set db [eval "berkdb_open -env $masterenv -auto_commit -create \ + $omethod $args $dbfile"] +error_check_good script_db_open [is_valid_db $db] TRUE + +puts "Do lease op" +set key 1 +do_leaseop $masterenv $db $method $key NULL 0 + +puts "Put CHILD1" +error_check_good child_key \ + [$marker put CHILD1 $key] 0 + +puts "Wait for PARENT1" +# Give the parent a chance to process messages and check leases. +while { [llength [$marker get PARENT1]] == 0 } { + tclsleep 1 +} + +puts "Do lease op 2" +incr key +do_leaseop $masterenv $db $method $key NULL 0 +puts "Put CHILD2" +error_check_good child2_key \ + [$marker put CHILD2 $key] 0 + +puts "Wait for PARENT2" +# Give the parent a chance to process messages and check leases. +while { [llength [$marker get PARENT2]] == 0 } { + tclsleep 1 +} + +# +# After we get PARENT2, do a checkpoint. +# +puts "Write a checkpoint" +$masterenv txn_checkpoint +puts "Put CHILD3" +error_check_good child2_key \ + [$marker put CHILD3 $key] 0 + +puts "Wait for PARENT3" +# Give the parent a chance to process messages and check leases. +while { [llength [$marker get PARENT3]] == 0 } { + tclsleep 1 +} + +puts "Write another checkpoint" +$masterenv txn_checkpoint -force +puts "Put CHILD4" +error_check_good child4_key \ + [$marker put CHILD4 $key] 0 + +puts "Clean up and exit" +# Clean up the child so the parent can go forward. +error_check_good master_db_close [$db close] 0 +error_check_good marker_db_close [$marker close] 0 +error_check_good markerenv_close [$markerenv close] 0 +error_check_good script_master_close [$masterenv close] 0 + diff --git a/test/tcl/rep079.tcl b/test/tcl/rep079.tcl new file mode 100644 index 00000000..8831ce63 --- /dev/null +++ b/test/tcl/rep079.tcl @@ -0,0 +1,350 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep079 +# TEST Replication leases and invalid usage. +# TEST +# TEST Open a client without leases. Attempt to set leases after rep_start. +# TEST Attempt to declare as master without election. +# TEST Run an election with an nsites parameter value. +# TEST Elect a master with leases. Put some data and send to clients. +# TEST Cleanly shutdown master env. Restart without +# TEST recovery and verify leases are expired and refreshed. +# TEST Add a new client without leases to a group using leases. +# TEST Test errors if we cannot get leases before/after txn_commit. +# +proc rep079 { method { tnum "079" } args } { + source ./include.tcl + global repfiles_in_memory + + # Valid for all access methods, but there is no difference + # running it with one method over any other. Just use btree. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + set args [convert_args $method $args] + set logsets [create_logsets 4] + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + foreach l $logsets { + # + # Skip the case where the master is in-memory and at least + # one of the clients is on-disk. If the master is in-memory, + # the wrong site gets elected because on-disk envs write a log + # record when they create the env and in-memory ones do not + # and the test wants to control which env gets elected. + # + set master_logs [lindex $l 0] + if { $master_logs == "in-memory" } { + set client_logs [lsearch -exact $l "on-disk"] + if { $client_logs != -1 } { + puts "Skipping for in-memory master\ + with on-disk client." + continue + } + } + puts "Rep$tnum: Replication leases and invalid usage $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + puts "Rep$tnum: Client 2 logs are [lindex $l 2]" + puts "Rep$tnum: Client 3 logs are [lindex $l 3]" + rep079_sub $method $tnum $l $args + } +} + +proc rep079_sub { method tnum logset largs } { + global testdir + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + set clientdir3 $testdir/CLIENTDIR3 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + file mkdir $clientdir3 + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + set c2_logtype [lindex $logset 2] + set c3_logtype [lindex $logset 3] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set c2_logargs [adjust_logargs $c2_logtype] + set c3_logargs [adjust_logargs $c3_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + set c2_txnargs [adjust_txnargs $c2_logtype] + set c3_txnargs [adjust_txnargs $c3_logtype] + + # Set leases for 4 sites, 1 second timeout, 1% clock skew + # [NOTE: We are not adding in client3 until later so don't + # set it in nvotes.] + set nsites 4 + set nvotes 3 + set lease_to 1000000 + set lease_tosec [expr $lease_to / 1000000] + set clock_fast 101 + set clock_slow 100 + + repladd 2 + # + # Use a command without setting errpfx, errfile or verbose + # so that error messages can be caught correctly. + # + set envcmd_err "berkdb_env_noerr -create $m_txnargs $m_logargs \ + $repmemargs -home $masterdir -rep_transport \[list 2 replsend\]" + + # + # This is the real env command, but we won't use it + # quite yet. + set envcmd(0) "berkdb_env_noerr -create $m_txnargs $m_logargs \ + $repmemargs $verbargs -errpfx MASTER -home $masterdir \ + -event \ + -rep_transport \[list 2 replsend\]" + + # + # Leases must be configured before rep_start is called. + # Open a repl env without leases. Try to configure leases + # after the open has already called rep_start. Open as a client. + # + puts "\tRep$tnum.a: Try to configure leases after rep_start." + set noleaseenv [eval $envcmd_err -rep_client] + $noleaseenv rep_nsites $nsites + set stat [catch {$noleaseenv rep_lease \ + [list $lease_to $clock_fast $clock_slow]} lease] + error_check_bad stat $stat 0 + error_check_good menverror [is_substr $lease "timeout must be set"] 1 + error_check_good close [$noleaseenv close] 0 + env_cleanup $masterdir + + # + # If leases are being used, elections must be used. A site + # cannot simply upgrade itself to master. Test that we cannot + # open as a client and then upgrade ourself to a master just + # by calling rep_start. + # + set upgenv [eval $envcmd_err -rep_client -rep_nsites $nsites \ + -rep_lease \[list $lease_to $clock_fast $clock_slow\]] + puts "\tRep$tnum.b: Try to upgrade a client without election." + set stat [catch {$upgenv rep_start -master} ret] + error_check_bad upg_stat $stat 0 + error_check_good upg_str [is_substr $ret "Cannot become master"] 1 + error_check_good close [$upgenv close] 0 + env_cleanup $masterdir + + # + # Now test inconsistencies dealing with having a group that + # is using lease up and running. For instance, if leases are + # configured, the 'nsites' arg to rep_elect must be 0, etc. + # + # Open the master. Must open as a client and get elected. + # + set err_cmd(0) "none" + set crash(0) 0 + set pri(0) 100 + set masterenv [eval $envcmd(0) -rep_client -rep_nsites $nsites \ + -rep_lease \[list $lease_to $clock_fast $clock_slow\]] + error_check_good master_env [is_valid_env $masterenv] TRUE + + # Open two clients. + repladd 3 + set err_cmd(1) "none" + set crash(1) 0 + set pri(1) 10 + set envcmd(1) "berkdb_env -create $c_txnargs $c_logargs \ + $repmemargs $verbargs -errpfx CLIENT -home $clientdir \ + -event -rep_nsites $nsites \ + -rep_lease \[list $lease_to $clock_fast $clock_slow\] \ + -rep_client -rep_transport \[list 3 replsend\]" + set clientenv [eval $envcmd(1)] + error_check_good client_env [is_valid_env $clientenv] TRUE + + repladd 4 + set err_cmd(2) "none" + set crash(2) 0 + set pri(2) 10 + set envcmd(2) "berkdb_env_noerr -create $c2_txnargs $c2_logargs \ + $repmemargs -home $clientdir2 -event -rep_nsites $nsites \ + -rep_lease \[list $lease_to $clock_fast $clock_slow\] \ + -rep_client -rep_transport \[list 4 replsend\]" + set clientenv2 [eval $envcmd(2)] + error_check_good client_env [is_valid_env $clientenv2] TRUE + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 2} {$clientenv 3} {$clientenv2 4}" + process_msgs $envlist + + # + # Send a non-zero nsites value for an election. That is an error. + # + puts "\tRep$tnum.c: Try to run election with leases and nsites value." + # + # !!! We have not set -errpfx or -errfile in envcmd(2) above + # otherwise the error output won't be set in 'ret' below and + # the test will fail. Set it after this piece of the test. + # + set timeout 5000000 + set res [catch {$clientenv2 rep_elect $nsites $nvotes $pri(2) \ + $timeout} ret] + error_check_bad catch $res 0 + error_check_good ret [is_substr $ret "nsites must be zero"] 1 + + # + # Now we can set verbose args, errpfx, etc. Set it in the + # command (for elections) and also manually add it to the + # current env handle. + # + set envcmd(2) "$envcmd(2) $verbargs -errpfx CLIENT2" + if { $rep_verbose == 1 } { + $clientenv2 verbose $verbose_type on + $clientenv2 errpfx CLIENT2 + } + + # + # This next section will test that a replicated env that is master + # can cleanly close and then reopen without recovery and retain + # its master status. + # + set msg "Rep$tnum.d" + set nvotes [expr $nsites - 1] + set winner 0 + setpriority pri $nsites $winner + set elector [berkdb random_int 0 2] + puts "\tRep$tnum.d: Run election for real to get master." + # + # Run election for real. Set nsites to 0 for this command. + # + repladd 5 + set err_cmd(3) "none" + set crash(3) 0 + set pri(3) 0 + run_election envlist err_cmd pri crash $qdir $msg \ + $elector 0 $nvotes $nsites $winner 0 NULL + + puts "\tRep$tnum.e: Write a checkpoint." + # + # Writing a checkpoint forces a PERM record which will cause + # the clients to grant us their leases. Then, while holding + # the lease grants we can do the next part of the test to + # close and cleanly reopen while holding leases. + $masterenv txn_checkpoint -force + + process_msgs $envlist + + puts "\tRep$tnum.f.0: Close master env." + error_check_good mclose [$masterenv close] 0 + set sleep [expr $lease_tosec + 1] + puts "\tRep$tnum.f.1: Sleep $sleep secs to expire lease grants." + tclsleep $sleep + # + # We should be able to reopen the master env without running + # recovery and still retain our mastership. + set masterenv [eval $envcmd(0) -rep_master -rep_nsites $nsites \ + -rep_lease \[list $lease_to $clock_fast $clock_slow\]] + error_check_good master_env [is_valid_env $masterenv] TRUE + set envlist "{$masterenv 2} {$clientenv 3} {$clientenv2 4}" + + # + # Verify that if a non-lease site tries to join a group that + # is using leases, it gets an error. Configuring leases + # must be all-or-none across all group members. + # + puts "\tRep$tnum.g: Add client3 that does not configure leases." + replclear 5 + set envcmd(3) "berkdb_env_noerr -create $c3_txnargs $c3_logargs \ + -home $clientdir3 -event \ + $repmemargs $verbargs -errpfx CLIENT3 \ + -rep_client -rep_transport \[list 5 replsend\]" + set clientenv3 [eval $envcmd(3)] + error_check_good client_env [is_valid_env $clientenv3] TRUE + + # Bring the clients online by processing the startup messages. + set origlist $envlist + set envlist "{$masterenv 2} {$clientenv 3} \ + {$clientenv2 4} {$clientenv3 5}" + process_msgs $envlist 0 NONE err + + puts "\tRep$tnum.g.1: Verify client fatal error." + error_check_good process_msgs_err [is_substr $err DB_RUNRECOVERY] 1 + # + # Close to reclaim Tcl resources, but we want to catch/ignore + # the continuing DB_RUNRECOVERY error. The extra env_cleanup is + # necessary on Windows; otherwise region files hang around and + # can cause the next test to fail. + # + catch {$clientenv3 close} ret + env_cleanup $clientdir3 + set envlist $origlist + + puts "\tRep$tnum.h: Check expired lease error on txn commit." + # + # Leases are already expired, so attempt to commit should fail. + # (And this will be the 'before we commit' check that returns + # an error, not the 'after' check that panics). + # + set txn [$masterenv txn] + set stat [catch {$txn commit} ret] + error_check_good stat $stat 1 + error_check_good exp [is_substr $ret REP_LEASE_EXPIRED] 1 + + # + # Process messages so that we refresh the leases. + # Then attempt a commit. This too should fail but this time + # it will be the "after" check because we're not processing + # messages to get the commit's lease grant from the client. + $masterenv txn_checkpoint -force + process_msgs $envlist 0 NONE err + + puts "\tRep$tnum.i: Check panic lease error on txn commit." + set txn [$masterenv txn] + set db [eval berkdb_open_noerr -txn $txn -env $masterenv -create \ + -btree -mode 0644 test.db] + set stat [catch {$txn commit} ret] + error_check_good stat $stat 1 + error_check_good exp [is_substr $ret DB_RUNRECOVERY] 1 + set stat [catch {$db close} ret] + + # + # Since we panic'ed the master env, we expect non-zero. But + # we need to close it to clean up the Tcl resources. + # + error_check_good mclose [catch {$masterenv close} ret] 1 + error_check_good cclose [$clientenv close] 0 + error_check_good c2close [$clientenv2 close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep080.tcl b/test/tcl/rep080.tcl new file mode 100644 index 00000000..6e65dc2a --- /dev/null +++ b/test/tcl/rep080.tcl @@ -0,0 +1,196 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep080 +# TEST AUTOINIT off with empty client logs. +# TEST +# TEST Verify that a fresh client trying to join the group for +# TEST the first time observes the setting of DELAY_SYNC and !AUTOINIT +# TEST properly. +# TEST +proc rep080 { method { niter 200 } { tnum "080" } args } { + + source ./include.tcl + global mixed_mode_logging + global databases_in_memory + global repfiles_in_memory + global env_private + + # Skip for all methods except btree. + if { $checking_valid_methods } { + return btree + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: skipping for non-btree method $method." + return + } + + if { $mixed_mode_logging != 0 } { + puts "Rep$tnum: skipping for in-mem (or mixed) logging." + return + } + + set args [convert_args $method $args] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # The {"" "-recover"} loop that is typical in replication tests can be + # useful for tests which close existing environments at some point, and + # then later reopen them. (When we reopen, we do so either with + # recovery, or without it.) But this test never does that. + # + puts "Rep$tnum ($method):\ + Test of AUTOINIT off with empty client logs $msg $msg2 $msg3." + rep080_sub $method $niter $tnum $args +} + +proc rep080_sub { method niter tnum largs } { + global testdir + global databases_in_memory + global repfiles_in_memory + global env_private + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir1 $testdir/CLIENTDIR1 + set clientdir2 $testdir/CLIENTDIR2 + set clientdir3 $testdir/CLIENTDIR3 + set clientdir4 $testdir/CLIENTDIR4 + + file mkdir $masterdir + file mkdir $clientdir1 + file mkdir $clientdir2 + file mkdir $clientdir3 + file mkdir $clientdir4 + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $verbargs -errpfx MASTER \ + $repmemargs $privargs \ + -home $masterdir -txn -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd -rep_master] + set envlist "{$masterenv 1}" + + # Run rep_test in the master. + puts "\tRep$tnum.a: Running rep_test in replicated env." + eval rep_test $method $masterenv NULL $niter 0 0 $largs + process_msgs $envlist + + # Open a client + puts "\tRep$tnum.b: Add a normal client." + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $verbargs -errpfx CLIENT \ + $repmemargs $privargs \ + -home $clientdir1 -txn -rep_transport \[list 2 replsend\]" + set clientenv1 [eval $cl_envcmd -rep_client] + lappend envlist [list $clientenv1 2] + process_msgs $envlist + + rep_verify $masterdir $masterenv $clientdir1 $clientenv1 0 1 1 + + # Open a client with AUTOINIT turned off. + # + puts "\tRep$tnum.c: Add a client with AUTOINIT off (should fail)." + repladd 3 + set cl_envcmd "berkdb_env_noerr -create $verbargs -errpfx CLIENT \ + $repmemargs $privargs \ + -home $clientdir2 -txn -rep_transport \[list 3 replsend\]" + set clientenv2 [eval $cl_envcmd -rep_client] + $clientenv2 rep_config {autoinit off} + + lappend envlist [list $clientenv2 3] + process_msgs $envlist 0 NONE error + error_check_good errchk [is_substr $error JOIN_FAILURE] 1 + + # Open a client with DELAY_SYNC + # + puts "\tRep$tnum.d: Add a client with DELAY_SYNC." + repladd 4 + set cl_envcmd "berkdb_env_noerr -create $verbargs -errpfx CLIENT \ + $repmemargs $privargs \ + -home $clientdir3 -txn -rep_transport \[list 4 replsend\]" + set clientenv3 [eval $cl_envcmd -rep_client] + $clientenv3 rep_config {delayclient on} + + lappend envlist [list $clientenv3 4] + process_msgs $envlist 0 NONE error + error_check_good errchk2 $error 0 + + error_check_bad expect_error [catch {rep_verify \ + $masterdir $masterenv $clientdir3 $clientenv3 0 1 1}] 0 + + error_check_good rep_sync [$clientenv3 rep_sync] 0 + process_msgs $envlist 0 NONE error + error_check_good errchk3 $error 0 + rep_verify $masterdir $masterenv $clientdir3 $clientenv3 + + # Open a client with both DELAY_SYNC and AUTOINIT off. + # + puts "\tRep$tnum.f: Add a client with DELAY_SYNC and AUTOINIT off." + repladd 5 + set cl_envcmd "berkdb_env_noerr -create $verbargs -errpfx CLIENT \ + $repmemargs $privargs \ + -home $clientdir4 -txn -rep_transport \[list 5 replsend\]" + set clientenv4 [eval $cl_envcmd -rep_client] + $clientenv4 rep_config {delayclient on} + $clientenv4 rep_config {autoinit off} + + lappend envlist [list $clientenv4 5] + process_msgs $envlist 0 NONE error + error_check_good process_msgs $error 0 + + error_check_bad expect_error2 [catch {rep_verify\ + $masterdir $masterenv $clientdir4 $clientenv4 0 1 1}] 0 + + error_check_bad rep_sync [catch {$clientenv4 rep_sync} result] 0 + error_check_good errchk5 [is_substr $result JOIN_FAILURE] 1 + + $masterenv close + $clientenv1 close + $clientenv2 close + $clientenv3 close + $clientenv4 close + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep081.tcl b/test/tcl/rep081.tcl new file mode 100644 index 00000000..dbed4076 --- /dev/null +++ b/test/tcl/rep081.tcl @@ -0,0 +1,296 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep081 +# TEST Test of internal initialization and missing database files. +# TEST +# TEST One master, one client, two databases. +# TEST Generate several log files. +# TEST Remove old master log files. +# TEST Start up client. +# TEST Remove or replace one master database file while client initialization +# TEST is in progress, make sure other master database can keep processing. +# +proc rep081 { method { niter 200 } { tnum "081" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Due to the nature of the heap tcl infrastructure, this + # test can fail for heap, although it does not represent + # real BDB failure. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_heap $method] != 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_heap $method] == 1 } { + puts "Skipping test$tnum for method $method." + return + } + + set args [convert_args $method $args] + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run with options to remove or replace the master database file. + set testopts { removefile replacefile } + foreach t $testopts { + foreach l $logsets { + puts "Rep$tnum ($method $t $args): Test of\ + internal init with missing db file $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep081_sub $method $niter $tnum $l $t $args + } + } +} + +proc rep081_sub { method niter tnum logset testopt largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $repmemargs \ + $m_logargs -log_max $log_max -errpfx MASTER $verbargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd -rep_master] + $masterenv rep_limit 0 0 + + # Run rep_test in the master only. + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + if { $databases_in_memory } { + set testfile { "" "test.db" } + set testfile2 { "" "test2.db" } + } else { + set testfile "test.db" + set testfile2 "test2.db" + } + set omethod [convert_method $method] + set dbargs [convert_args $method $largs] + set mdb [eval {berkdb_open_noerr} -env $masterenv -auto_commit\ + -create -mode 0644 $omethod $dbargs $testfile ] + error_check_good reptest_db [is_valid_db $mdb] TRUE + set mdb2 [eval {berkdb_open_noerr} -env $masterenv -auto_commit\ + -create -mode 0644 $omethod $dbargs $testfile2 ] + error_check_good reptest_db2 [is_valid_db $mdb2] TRUE + + set stop 0 + while { $stop == 0 } { + # Run rep_test in the master beyond the first log file. + eval rep_test $method \ + $masterenv $mdb $niter $start $start 0 $largs + eval rep_test $method \ + $masterenv $mdb2 $niter $start $start 0 $largs + incr start $niter + + puts "\tRep$tnum.a.1: Run db_archive on master." + if { $m_logtype == "on-disk" } { + $masterenv log_flush + set res \ + [eval exec $util_path/db_archive -d -h $masterdir] + } + # + # Make sure we have moved beyond the first log file. + # + set first_master_log [get_logfile $masterenv first] + if { $first_master_log > 1 } { + set stop 1 + } + + } + + puts "\tRep$tnum.b: Open client." + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $repmemargs \ + $c_logargs -log_max $log_max -errpfx CLIENT $verbargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd -rep_client] + $clientenv rep_limit 0 0 + set envlist "{$masterenv 1} {$clientenv 2}" + + # Check initial value for number of FILE_FAIL internal init cleanups. + error_check_good ff_cleanup \ + [stat_field $clientenv rep_stat "File fail cleanups done"] 0 + + # + # Process messages in a controlled manner until the update (internal + # init) starts and we can remove or replace the database file. + # + set loop 10 + set i 0 + set entries 100 + set in_rec_page 0 + set dbrem_init 0 + if { $testopt == "replacefile" } { + set errstr "invalid argument" + } else { + set errstr "no such file or directory" + } + while { $i < $loop } { + set nproced 0 + incr nproced [proc_msgs_once $envlist NONE err] + # + # Last time through the loop the mdb database file + # is gone. The master is processing the client's PAGE_REQ + # and not finding the database file it needs so it sends a + # FILE_FAIL and returns an error. Break out of loop if + # expected error seen. + # + if { [is_substr $err $errstr] } { + error_check_good nproced $nproced 0 + break + } else { + error_check_bad nproced $nproced 0 + error_check_good errchk $err 0 + } + # Internal init file is very transient, but exists in + # the rep files on-disk case during the second iteration + # of this loop. Take this chance to make sure the internal + # init file doesn't exist when rep files are in-memory. + if { $i == 1 && $repfiles_in_memory == 1 } { + error_check_good noinit \ + [file exists "$clientdir/__db.rep.init"] 0 + } + # + # When we are in internal init, remove the mdb database file. + # This causes the master to send a FILE_FAIL that will cause + # the client to clean up its internal init. + # + if { $in_rec_page == 0 } { + set clstat [exec $util_path/db_stat \ + -N -r -R A -h $clientdir] + if { $dbrem_init == 0 && \ + [is_substr $clstat "SYNC_PAGE"] } { + set in_rec_page 1 + set dbrem_init 1 + # + # Turn off timer so that client sync doesn't + # prevent db operations. + # + $masterenv test force noarchive_timeout + + # Close and remove mdb. + puts "\tRep$tnum.c: Remove a database file." + error_check_good mdb_close [$mdb close] 0 + error_check_good remove_x [$masterenv \ + dbremove -auto_commit $testfile] 0 + + # Make sure mdb file is really gone. + set dfname [file join $masterdir $testfile] + error_check_good gone [file exists $dfname] 0 + + # Replace mdb file with non-db content. + if { $testopt == "replacefile" } { + puts \ + "\tRep$tnum.c.1: Replace database file." + set repfileid [open $dfname w+] + puts -nonewline $repfileid \ + "This is not a database file." + close $repfileid + } + } + } + incr i + } + + # + # Process two more batches of messages so client can process + # the FILE_FAIL message and the resulting new internal init. + # + puts "\tRep$tnum.d: Process messages including FILE_FAIL." + process_msgs $envlist 0 NONE err + if { $err != 0 } { + error_check_good errchk [is_substr $err $errstr] 1 + } + puts "\tRep$tnum.d.1: Process messages including new internal init." + process_msgs $envlist 0 NONE err + error_check_good errchk $err 0 + + puts "\tRep$tnum.e: Verify logs and databases." + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 test2.db + + # Make sure we have seen a FILE_FAIL internal init cleanup. + error_check_good ff_cleanup \ + [stat_field $clientenv rep_stat "File fail cleanups done"] 1 + + error_check_good mdb_close2 [$mdb2 close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} + + diff --git a/test/tcl/rep082.tcl b/test/tcl/rep082.tcl new file mode 100644 index 00000000..ab6eabf4 --- /dev/null +++ b/test/tcl/rep082.tcl @@ -0,0 +1,204 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST rep082 +# TEST Sending replication requests to correct master site. +# TEST +# TEST Regression test for a bug [#16592] where a client could send an +# TEST UPDATE_REQ to another client instead of the master. +# +proc rep082 { method { niter 200 } { tnum "082" } args } { + source ./include.tcl + global repfiles_in_memory + + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + puts "Rep$tnum: ($method) Test that\ + client doesn't send UPDATE_REQ to another client $msg2." + + rep082_sub $method $niter $tnum $args +} + +proc rep082_sub { method niter tnum largs } { + global testdir + global util_path + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + + file mkdir [set dirA $testdir/A] + file mkdir [set dirB $testdir/B] + file mkdir [set dirC $testdir/C] + + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_buf [expr $pagesize * 2] + set log_max [expr $log_buf * 4] + + repladd 1 + set env_A_cmd "berkdb_env_noerr -create -txn $verbargs $repmemargs \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_A \ + -home $dirA -rep_transport \[list 1 replsend\]" + set envs(A) [eval $env_A_cmd -rep_master] + + # Open a client + repladd 2 + set env_B_cmd "berkdb_env_noerr -create -txn $verbargs $repmemargs \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_B \ + -home $dirB -rep_transport \[list 2 replsend\]" + set envs(B) [eval $env_B_cmd -rep_client] + + # Open 2nd client + repladd 3 + set env_C_cmd "berkdb_env_noerr -create -txn $verbargs $repmemargs \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_C \ + -home $dirC -rep_transport \[list 3 replsend\]" + set envs(C) [eval $env_C_cmd -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" + process_msgs $envlist + + # Run rep_test in the master (and update clients). + puts "\tRep$tnum.a: populate initial portion of log." + eval rep_test $method $envs(A) NULL $niter 0 0 0 $largs + process_msgs $envlist + + $envs(A) close + $envs(B) close + $envs(C) close + + # At this point, we have a first section of the log history that was + # produced at master site A, and is replicated to both other sites. Now + # let's produce a second section of history, also produced at master + # site A, but only replicated to site C; make sure this second section + # spans a log file boundary. Archive log files at site C, so that we + # make sure that site C has only a fraction of this second section. + # + + set envs(A) [eval $env_A_cmd -recover -rep_master] + set envs(C) [eval $env_C_cmd -recover -rep_client] + replclear 2 + process_msgs "{$envs(A) 1} {$envs(C) 3}" + + set last_client_log [get_logfile $envs(C) last] + + set stop 0 + set start 0 + set count 0 + while { $stop == 0 } { + incr count + # Run rep_test in the master (don't update client). + puts "\tRep$tnum.b: Fill log until next log file." + incr start $niter + eval rep_test $method $envs(A) NULL $niter $start $start 0 $largs + + replclear 2 + process_msgs "{$envs(A) 1} {$envs(C) 3}" + + puts "\tRep$tnum.c: Run db_archive on client C." + exec $util_path/db_archive -d -h $dirC + set new_last_log [get_logfile $envs(C) last] + if { $new_last_log > $last_client_log } { + set stop 1 + } + } + + # Now make site B become the master. Since site B was not running + # during the last phase, it does not have any of the "second section of + # log history" that we produced in that phase. So site A will have to + # throw away those transactions in order to sync up with B. HOWEVER, + # site B will now generate yet another new section of log history, which + # is identical to the set of transactions generated a moment ago at site + # A. In other words, although this is the third section of history to + # be generated, we have arranged to have it completely replace the + # second section, and to have it exactly match! Note that we leave site + # C out of the picture during this phase. + # + $envs(A) close + $envs(C) close + set envs(B) [eval $env_B_cmd -recover -rep_master] + set envs(A) [eval $env_A_cmd -recover -rep_client] + + set start 0 + while {$count > 0} { + puts "\tRep$tnum.d: Running rep_test in replicated env." + incr start $niter + eval rep_test $method $envs(B) NULL $niter $start $start 0 $largs + + replclear 3 + process_msgs "{$envs(A) 1} {$envs(B) 2}" + + incr count -1 + } + + # Now start up site C again, but configure it to rely on site A for + # client-to-client synchronization. Recall the known contents of site + # C's transaction log: it has a partial copy of the "second section" of + # log history (it has the end of that section, but not the beginning). + # The transactions in this log will have the same LSN's as are currently + # in place at sites A and B (which, remember, were produced by the + # identical "third section" of history), but the commit record contents + # won't exactly match, because the third section was produced by master + # site B. + # + # During the verify dance, client C will continue to walk back the log, + # finding commit records which find matching LSNs at A/B, but no + # matching contents. When it hits the archived log file boundary it + # will have to give up without having found a match. Thus we have + # produced a situation where an incoming VERIFY message from another + # client (site A) results in client C sending an UPDATE_REQ. We want to + # make sure that client C sends the UPDATE_REQ to the master, rather + # than blindly sending to the same site that produced the VERIFY + # message. + # + puts "\tRep$tnum.e: start client C, with A as peer." + set env_C_cmd "berkdb_env_noerr -create -txn $verbargs \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_C \ + -home $dirC -rep_transport \[list 3 rep082_send\]" + set envs(C) [eval $env_C_cmd -recover -rep_client] + process_msgs "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" + + $envs(C) close + $envs(A) close + $envs(B) close + + replclose $testdir/MSGQUEUEDIR +} + +# We use this special-purpose wrapper send function only in the very last phase +# of the test, and only at site C. Before that we just use the normal send +# function as usual. Since we know exactly what sites are in what roles, we can +# simply hard-code the EID numbers: site B (2) is the master, and site A (1) is +# the desired target site for c2c synchronization. +# +proc rep082_send { control rec fromid toid flags lsn } { + if {$toid == 2 && [lsearch $flags "rerequest"] == -1 \ + && [lsearch $flags "any"] != -1} { + set toid 1 + } + replsend $control $rec $fromid $toid $flags $lsn +} diff --git a/test/tcl/rep083.tcl b/test/tcl/rep083.tcl new file mode 100644 index 00000000..db90cb05 --- /dev/null +++ b/test/tcl/rep083.tcl @@ -0,0 +1,159 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST rep083 +# TEST Replication clients must never send VERIFY_FAIL to a c2c request. +# TEST +# TEST Regression test for a bug [#16592] where a client could send a +# TEST VERIFY_FAIL to another client, which is illegal. +# +proc rep083 { method { niter 200 } { tnum "083" } args } { + source ./include.tcl + global repfiles_in_memory + + if { $checking_valid_methods } { + return "ALL" + } + + set args [convert_args $method $args] + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + puts "Rep$tnum: ($method)\ + Test that client never sends VERIFY_FAIL $msg2." + rep083_sub $method $niter $tnum $args +} + +proc rep083_sub { method niter tnum largs } { + global testdir + global util_path + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + + file mkdir [set dirA $testdir/A] + file mkdir [set dirB $testdir/B] + file mkdir [set dirC $testdir/C] + + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_buf [expr $pagesize * 2] + set log_max [expr $log_buf * 4] + + repladd 1 + set env_A_cmd "berkdb_env_noerr -create -txn $verbargs $repmemargs \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_A \ + -home $dirA -rep_transport \[list 1 replsend\]" + set envs(A) [eval $env_A_cmd -rep_master] + + # Open a client + repladd 2 + set env_B_cmd "berkdb_env_noerr -create -txn $verbargs $repmemargs \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_B \ + -home $dirB -rep_transport \[list 2 replsend\]" + set envs(B) [eval $env_B_cmd -rep_client] + + # Open 2nd client + repladd 3 + set env_C_cmd "berkdb_env_noerr -create -txn $verbargs $repmemargs \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_C \ + -home $dirC -rep_transport \[list 3 rep083_send\]" + set envs(C) [eval $env_C_cmd -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" + process_msgs $envlist + + # Run rep_test in the master (and update clients). + puts "\tRep$tnum.a: populate initial portion of log." + eval rep_test $method $envs(A) NULL $niter 0 0 0 $largs + process_msgs $envlist + + # Take note of the initial value of "Pages received" + set pages_rcvd0 [stat_field $envs(C) rep_stat "Pages received"] + + set last_B_log [get_logfile $envs(B) last] + + # We can't use 'push_master_ahead' here because the clients + # are being left open. + + set stop 0 + set start 0 + while { $stop == 0 } { + # Run rep_test in the master (don't update client). + puts "\tRep$tnum.b: Fill log until next log file." + incr start $niter + eval rep_test $method $envs(A) NULL $niter $start $start 0 $largs + + replclear 3 + process_msgs "{$envs(A) 1} {$envs(B) 2}" + + puts "\tRep$tnum.c: Run db_archive on client B." + $envs(B) log_flush + exec $util_path/db_archive -d -h $dirB + set new_last_log [get_logfile $envs(B) last] + if { $new_last_log > $last_B_log } { + set stop 1 + } + } + + # At this point, client C is far behind (because we've been throwing + # away messages destined to it). And client B has minimal log, because + # we've been aggressively archiving, but the master A has its entire log + # history. Therefore, upon resuming messaging to C, it should be able + # to catch up without doing an internal init. + + puts "\tRep$tnum.d: Write one more txn, and resume msging to C." + incr start $niter + eval rep_test $method $envs(A) NULL 1 $start $start 0 $largs + process_msgs $envlist + + # Pause and do it one more time, to provide time for client C's + # time-based gap request trigger to work. + # + tclsleep 1 + incr start 1 + eval rep_test $method $envs(A) NULL 1 $start $start 0 $largs + process_msgs $envlist + + # Make sure C didn't do an internal init (which we detect by testing + # whether it received any pages recently). + # + error_check_good no_internal_init \ + [stat_field $envs(C) rep_stat "Pages received"] $pages_rcvd0 + $envs(C) close + $envs(A) close + $envs(B) close + + replclose $testdir/MSGQUEUEDIR +} + +# We use this special-purpose wrapper send function only at site C. Since we +# know exactly what sites are in what roles, we can simply hard-code the EID +# numbers: site A (1) is the master, and site B (2) is the desired target site +# for c2c synchronization. +# +proc rep083_send { control rec fromid toid flags lsn } { + if {$toid == 1 && [lsearch $flags "rerequest"] == -1 \ + && [lsearch $flags "any"] != -1} { + set toid 2 + } + replsend $control $rec $fromid $toid $flags $lsn +} diff --git a/test/tcl/rep084.tcl b/test/tcl/rep084.tcl new file mode 100644 index 00000000..aa2158f3 --- /dev/null +++ b/test/tcl/rep084.tcl @@ -0,0 +1,149 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2008, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST rep084 +# TEST Abbreviated internal init for named in-memory databases (NIMDBs). +# TEST +# +proc rep084 { method { niter 200 } { tnum "084" } args } { + source ./include.tcl + + # As an internal init test, run for btree and queue only. + # As an in-memory database test, skip queueext. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_btree $method] == 1 || [is_queue $method] == 1 } { + if { [is_queueext $method] == 0 } { + lappend test_methods $method + } + } + } + return $test_methods + } + if { [is_btree $method] != 1 && [is_queue $method] != 1 } { + puts "Skipping internal init test rep$tnum for method $method." + return + } + if { [is_queueext $method] == 1 } { + puts "Skipping in-memory db test rep$tnum for method $method." + return + } + + set args [convert_args $method $args] + + rep084_sub $method $niter $tnum $args +} + +proc rep084_sub { method niter tnum largs } { + global testdir + global util_path + global env_private + global rep_verbose + global verbose_type + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + puts "Rep$tnum: ($method) Abbreviated internal init for NIMDBs $msg3." + set omethod [convert_method $method] + + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + + file mkdir [set dirA $testdir/A] + file mkdir [set dirB $testdir/B] + file mkdir [set dirC $testdir/C] + + repladd 1 + set env_A_cmd "berkdb_env_noerr -create -txn $verbargs $privargs \ + -errpfx SITE_A \ + -home $dirA -rep_transport \[list 1 replsend\]" + set envs(A) [eval $env_A_cmd -rep_master] + + # Open two clients + repladd 2 + set env_B_cmd "berkdb_env_noerr -create -txn $verbargs $privargs \ + -errpfx SITE_B \ + -home $dirB -rep_transport \[list 2 replsend\]" + set envs(B) [eval $env_B_cmd -rep_client] + + repladd 3 + set env_C_cmd "berkdb_env_noerr -create -txn $verbargs $privargs \ + -errpfx SITE_C \ + -home $dirC -rep_transport \[list 3 replsend\]" + set envs(C) [eval $env_C_cmd -rep_client] + + # Bring the clients online by processing the startup messages. + set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" + process_msgs $envlist + + # Create some data in each of two databases, one a regular DB, and the + # other a NIMDB. + puts "\tRep$tnum.a: insert data." + set start 0 + eval rep_test $method $envs(A) NULL $niter $start $start 0 $largs + set db [eval berkdb_open -env $envs(A) -auto_commit $largs \ + -create $omethod "{}" "mynimdb"] + eval rep_test $method $envs(A) $db $niter $start $start 0 $largs + process_msgs $envlist + + $db close + $envs(B) close + $envs(C) close + + # Restart the clients with recovery, which causes the NIMDB to + # disappear. Before syncing with the master, verify that the NIMDB is + # gone. Verify that the AUTOINIT off setting does not inhibit NIMDB + # materialization. + puts "\tRep$tnum.b: restart with recovery; \ +check expected database existence." + set envs(B) [eval $env_B_cmd -rep_client -recover] + set envs(C) [eval $env_C_cmd -rep_client -recover] + $envs(C) rep_config {autoinit off} + + [berkdb_open -env $envs(B) -auto_commit "test.db"] close + [berkdb_open -env $envs(C) -auto_commit "test.db"] close + error_check_good "NIMDB doesn't exist after recovery" \ + [catch {berkdb_open -env $envs(B) -auto_commit "" "mynimdb"}] 1 + + puts "\tRep$tnum.c: sync with master, NIMDB reappears." + set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" + process_msgs $envlist + + # After syncing with the master, the client should have copies of all + # databases. + # + [berkdb_open -env $envs(B) -auto_commit "test.db"] close + [berkdb_open -env $envs(B) -auto_commit "" "mynimdb"] close + [berkdb_open -env $envs(C) -auto_commit "test.db"] close + [berkdb_open -env $envs(C) -auto_commit "" "mynimdb"] close + + # Run some more updates into the NIMDB at the master, and replicate them + # to the client, to make sure the client can apply transactions onto a + # NIMDB that had disappeared (and is now back). + # + incr start $niter + set db [berkdb_open -env $envs(A) -auto_commit "" "mynimdb"] + eval rep_test $method $envs(A) $db $niter $start $start 0 $largs + process_msgs $envlist + $db close + + $envs(C) close + $envs(B) close + $envs(A) close + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep085.tcl b/test/tcl/rep085.tcl new file mode 100644 index 00000000..1c8628a5 --- /dev/null +++ b/test/tcl/rep085.tcl @@ -0,0 +1,167 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST rep085 +# TEST Skipping unnecessary abbreviated internal init. +# TEST +# TEST Make sure that once we've materialized NIMDBs, we don't bother +# TEST trying to do it again on subsequent sync without recovery. Make +# TEST sure we do probe for the need to materialize NIMDBs, but don't do +# TEST any internal init at all if there are no NIMDBs. Note that in order to +# TEST do this test we don't even need any NIMDBs. + +proc rep085 { method {niter 20} {tnum 085} args } { + source ./include.tcl + + # Run for btree and queue only. Since this is a NIMDB test, + # skip queueext. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_btree $method] == 1 || [is_queue $method] == 1 } { + if { [is_queueext $method] == 0 } { + lappend test_methods $method + } + } + } + return $test_methods + } + if { [is_btree $method] != 1 && [is_queue $method] != 1 } { + puts "Skipping internal init test rep$tnum for method $method." + return + } + if { [is_queueext $method] == 1 } { + puts "Skipping in-memory database test rep$tnum for method $method." + return + } + + set args [convert_args $method $args] + + rep085_sub $method $niter $tnum $args +} + +proc rep085_sub { method niter tnum largs } { + global testdir + global env_private + global rep_verbose + global verbose_type + global rep085_page_msg_count rep085_update_req_count + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + puts "Rep$tnum ($method):\ + Skipping unnecessary abbreviated internal init $msg3." + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + + file mkdir [set dirs(A) $testdir/SITE_A] + file mkdir [set dirs(B) $testdir/SITE_B] + file mkdir [set dirs(C) $testdir/SITE_C] + + set rep085_page_msg_count 0 + set rep085_update_req_count 0 + + puts "\tRep$tnum.a: Create master" + repladd 1 + set env_A_cmd "berkdb_env_noerr -create -txn \ + $verbargs $privargs \ + -errpfx SITE_A -errfile /dev/stderr \ + -home $dirs(A) -rep_transport \[list 1 rep085_send\]" + set envs(A) [eval $env_A_cmd -rep_master] + + puts "\tRep$tnum.b: create (only) a regular DB" + set start 0 + eval rep_test $method $envs(A) NULL $niter $start $start 0 $largs + + puts "\tRep$tnum.c: Create two clients" + repladd 2 + set env_B_cmd "berkdb_env_noerr -create -txn \ + $verbargs $privargs \ + -errpfx SITE_B -errfile /dev/stderr \ + -home $dirs(B) -rep_transport \[list 2 rep085_send\]" + set envs(B) [eval $env_B_cmd -rep_client] + + repladd 3 + set env_C_cmd "berkdb_env_noerr -create -txn \ + $verbargs $privargs \ + -errpfx SITE_C -errfile /dev/stderr \ + -home $dirs(C) -rep_transport \[list 3 rep085_send\]" + set envs(C) [eval $env_C_cmd -rep_client] + + set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" + process_msgs $envlist + + # Note that the initial internal init that we've just done should have + # the effect of setting this flag. The flag indicates that any NIMDBs + # have been loaded, and any full internal init of course accomplishes + # that. If there are no NIMDBs whatsoever (which is the case here), + # then the condition "any NIMDBs are loaded" is trivially satisfied. + # + if { !$env_private } { + assert_rep_flag $dirs(C) REP_F_NIMDBS_LOADED 1 + } + + # Restart client C with recovery, which forces it to check for NIMDBs + # even though a full internal init is not necessary. + # + puts "\tRep$tnum.d: Bounce client C" + $envs(C) close + set envs(C) [eval $env_C_cmd -rep_client -recover] + if { !$env_private } { + assert_rep_flag $dirs(C) REP_F_NIMDBS_LOADED 0 + } + set upd_before $rep085_update_req_count + set pg_before $rep085_page_msg_count + set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" + process_msgs $envlist + error_check_good update.msg.sent \ + $rep085_update_req_count [incr upd_before] + error_check_good no.page.msg $rep085_page_msg_count $pg_before + if { !$env_private } { + assert_rep_flag $dirs(C) REP_F_NIMDBS_LOADED 1 + } + + # Switch masters, forcing client C to re-sync. But this time it already + # knows it has NIMDBs, so even an UPDATE_REQ shouldn't be necessary. + # + puts "\tRep$tnum.e: Switch master to site B" + $envs(A) rep_start -client + $envs(B) rep_start -master + set upd_before $rep085_update_req_count + set pg_before $rep085_page_msg_count + process_msgs $envlist + error_check_good no.update.msg $rep085_update_req_count $upd_before + error_check_good no.page.msg.2 $rep085_page_msg_count $pg_before + + $envs(A) close + $envs(B) close + $envs(C) close + replclose $testdir/MSGQUEUEDIR +} + +proc rep085_send { control rec fromid toid flags lsn } { + global rep085_page_msg_count rep085_update_req_count + + if {[berkdb msgtype $control] eq "page"} { + incr rep085_page_msg_count + } elseif {[berkdb msgtype $control] eq "update_req"} { + incr rep085_update_req_count + } + + return [replsend $control $rec $fromid $toid $flags $lsn] +} diff --git a/test/tcl/rep086.tcl b/test/tcl/rep086.tcl new file mode 100644 index 00000000..ace91a13 --- /dev/null +++ b/test/tcl/rep086.tcl @@ -0,0 +1,160 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST rep086 +# TEST Interrupted abbreviated internal init. +# TEST +# TEST Make sure we cleanly remove partially loaded named in-memory +# TEST databases (NIMDBs). + +proc rep086 { method { tnum "086" } args } { + + source ./include.tcl + + # Run for btree and queue only. Since this is a NIMDB test, + # skip queueext. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_btree $method] == 1 || [is_queue $method] == 1 } { + if { [is_queueext $method] == 0 } { + lappend test_methods $method + } + } + } + return $test_methods + } + if { [is_btree $method] != 1 && [is_queue $method] != 1 } { + puts "Skipping internal init test rep$tnum for method $method." + return + } + if { [is_queueext $method] == 1 } { + puts "Skipping in-memory database test rep$tnum for method $method." + return + } + + set args [convert_args $method $args] + rep086_sub $method $tnum $args +} + +proc rep086_sub { method tnum largs } { + + global testdir + global env_private + global rep_verbose + global verbose_type + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + set omethod [convert_method $method] + + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + + file mkdir [set dirs(A) $testdir/SITE_A] + file mkdir [set dirs(B) $testdir/SITE_B] + + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_buf [expr $pagesize * 2] + set log_max [expr $log_buf * 4] + + puts "Rep$tnum ($method):\ + Test of interrupted abbreviated internal init $msg3." + puts "\tRep$tnum.a: Create master and client." + repladd 1 + set env_A_cmd "berkdb_env_noerr -create -txn \ + $verbargs $privargs \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_A \ + -home $dirs(A) -rep_transport \[list 1 replsend\]" + set envs(A) [eval $env_A_cmd -rep_master] + + # Open a client + repladd 2 + set env_B_cmd "berkdb_env_noerr -create -txn \ + $verbargs $privargs \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_B \ + -home $dirs(B) -rep_transport \[list 2 replsend\]" + set envs(B) [eval $env_B_cmd -rep_client] + + set envlist "{$envs(A) 1} {$envs(B) 2}" + process_msgs $envlist + + puts "\tRep$tnum.b: Create a regular DB and a few NIMDBs." + set niter 200 + set start 0 + eval rep_test $method $envs(A) NULL $niter 0 0 0 $largs + for { set i 1 } { $i <= 3 } { incr i } { + set nimdb [eval {berkdb_open} -env $envs(A) -auto_commit \ + -create $largs $omethod {""} "mynimdb$i"] + eval rep_test $method $envs(A) \ + $nimdb $niter $start $start 0 $largs + $nimdb close + } + process_msgs $envlist + + puts "\tRep$tnum.c: Bounce client so it has to re-materialize the NIMDBs." + $envs(B) close + set envs(B) [eval $env_B_cmd -rep_client -recover] + set envlist "{$envs(A) 1} {$envs(B) 2}" + + # Here's a summary reminder of the messaging that is taking place in + # each of the proc_msgs_once message cycles. + # + # 1. NEWCLIENT -> NEWMASTER -> VERIFY_REQ (the checkpoint written by + # regular recovery) + # 2. -> VERIFY -> (no match) VERIFY_REQ (last txn commit in common) + # 3. -> VERIFY -> (match, but need NIMDBS) UPDATE_REQ + # 4. -> UPDATE -> PAGE_REQ + # 5. -> PAGE -> (limited to partial NIMDB content by rep_limit) + + proc_msgs_once $envlist + proc_msgs_once $envlist + proc_msgs_once $envlist + proc_msgs_once $envlist + + # Before doing cycle # 5, set a ridiculously low limit, so that only the + # first page of the database will be received on this next cycle. + # + $envs(A) rep_limit 0 4 + proc_msgs_once $envlist + + # Just to make sure our test is working the way we think it should, + # verify that we are indeed in SYNC_PAGE state. We can't do this + # for env_private because assert_rep_flag execs a second process. + if { $env_private == 0 } { + assert_rep_flag $dirs(B) SYNC_PAGE 1 + } + + # Now, with only a partial materialization of the NIMDB, downgrade the + # master, which should cause client to realize its internal init is + # interrupted. + # + $envs(A) rep_limit 0 0 + $envs(A) rep_start -client + proc_msgs_once $envlist + + puts "\tRep$tnum.d: Try to open NIMDBs." + for { set i 0 } { $i <= 3 } { incr i } { + set cmd [list berkdb_open -env $envs(B) -auto_commit "" "mynimdb$i"] + error_check_bad "open partially loaded NIMDB" [catch $cmd] 0 + } + + $envs(A) close + $envs(B) close + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep087.tcl b/test/tcl/rep087.tcl new file mode 100644 index 00000000..09ea3daf --- /dev/null +++ b/test/tcl/rep087.tcl @@ -0,0 +1,227 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST rep087 +# TEST Abbreviated internal init with open file handles. +# TEST +# TEST Client has open handle to an on-disk DB when abbreviated +# TEST internal init starts. Make sure we lock out access, and make sure +# TEST it ends up as HANDLE_DEAD. Also, make sure that if there are +# TEST no NIMDBs, that we *don't* get HANDLE_DEAD. + +proc rep087 { method { niter 200 } { tnum "087" } args } { + source ./include.tcl + + # Run for btree and queue only. Since this is a NIMDB test, + # explicitly exclude queueext. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_btree $method] == 1 || [is_queue $method] == 1 } { + if { [is_queueext $method] == 0 } { + lappend test_methods $method + } + } + } + return $test_methods + } + if { [is_btree $method] != 1 && [is_queue $method] != 1 } { + puts "Skipping internal init test rep$tnum for method $method." + return + } + if { [is_queueext $method] == 1 } { + puts "Skipping in-memory database test rep$tnum for method $method." + return + } + + set args [convert_args $method $args] + + # Run test with and without a NIMDB present. + rep087_sub $method $niter $tnum "true" $args + rep087_sub $method $niter $tnum "false" $args +} + +proc rep087_sub { method niter tnum with_nimdb largs } { + global testdir + global env_private + global rep_verbose + global verbose_type + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + if { $with_nimdb} { + set msg "with" + } else { + set msg "without" + } + puts "Rep$tnum ($method): Abbreviated internal init\ + and dead handles, $msg NIMDB $msg3." + if { $niter < 3 } { + set niter 3 + puts "\tRep$tnum: the minimum 'niter' value is 3." + } + + set omethod [convert_method $method] + + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + + file mkdir [set dirs(A) $testdir/SITE_A] + file mkdir [set dirs(B) $testdir/SITE_B] + + puts "\tRep$tnum: Create master and client" + repladd 1 + set env_A_cmd "berkdb_env_noerr -create -txn \ + $verbargs $privargs \ + -errpfx SITE_A \ + -home $dirs(A) -rep_transport \[list 1 replsend\]" + set envs(A) [eval $env_A_cmd -rep_master] + + # Open a client + repladd 2 + set env_B_cmd "berkdb_env_noerr -create -txn \ + $verbargs $privargs \ + -errpfx SITE_B \ + -home $dirs(B) -rep_transport \[list 2 replsend\]" + set envs(B) [eval $env_B_cmd -rep_client] + + set envlist "{$envs(A) 1} {$envs(B) 2}" + process_msgs $envlist + + if { $with_nimdb } { + set msg "and a NIMDB" + } else { + set msg "" + } + puts "\tRep$tnum: Create a regular DB $msg" + set start 0 + eval rep_test $method $envs(A) NULL $niter $start $start 0 $largs + + if { $with_nimdb } { + set nimdb [eval {berkdb_open} -env $envs(A) -auto_commit \ + -create $largs $omethod {"" "mynimdb"}] + eval rep_test $method $envs(A) \ + $nimdb $niter $start $start 0 $largs + $nimdb close + } + process_msgs $envlist + + puts "\tRep$tnum: Restart client with recovery" + # + # In the NIMDB case, this forces the rematerialization of the NIMDB. + # + $envs(B) close + set envs(B) [eval $env_B_cmd -rep_client -recover] + set envlist "{$envs(A) 1} {$envs(B) 2}" + + # Before seeking the master, open a DB handle onto the regular DB. + # At this point, we should be allowed to read it. + # + # Try reading a few records. (How many? We arbitrarily choose to try + # reading three.) Save one of the keys so that we can use it later in a + # "$db get" call. (Superstitiously skip over the first key, in deciding + # which one to save, because it is usually a zero-length string.) + # + set db [berkdb_open_noerr -env $envs(B) -auto_commit test.db] + set c [$db cursor] + $c get -next + set pairs [$c get -next] + set a_key [lindex $pairs 0 0] + $c get -next + $c close + + if { $with_nimdb} { + # At this point, the NIMDB is obviously not available, since it + # was blown away by the recovery/recreation of regions. Let's + # just make sure. + # + error_check_bad no_nimdb \ + [catch {berkdb_open_noerr -env $envs(B) \ + -auto_commit "" "mynimdb"}] 0 + + # Use the usual idiom of processing just one message cycle at a + # time, so that we can check access during the middle of + # internal init. (If no NIMDB, there is no internal init, so + # there's no point in doing this for that case.) + + # 1. NEWCLIENT -> NEWMASTER -> VERIFY_REQ (the checkpoint + # written by regular recovery) + # 2. -> VERIFY -> (no match) VERIFY_REQ (last txn commit in + # common) + # 3. -> VERIFY -> (match, but need NIMDBS) UPDATE_REQ + # 4. -> UPDATE -> PAGE_REQ + # 5. -> PAGE -> (limited to partial NIMDB content by + # rep_limit) + + proc_msgs_once $envlist + proc_msgs_once $envlist + proc_msgs_once $envlist + proc_msgs_once $envlist + + # Before doing cycle # 5, set a ridiculously low limit, so that + # only the first page of the database will be received on this + # next cycle. + # + $envs(A) rep_limit 0 4 + proc_msgs_once $envlist + + # Now we should be blocked from reading from our DB. + puts "\tRep$tnum: Try blocked access (5 second delay)." + error_check_bad should_block [catch {$db get $a_key} ret] 0 + error_check_good deadlock [is_substr $ret DB_LOCK_DEADLOCK] 1 + + # Get rid of any limit for the remainder of the test. + # + $envs(A) rep_limit 0 0 + } + + # Finish off all pending message processing. + # + process_msgs $envlist + + if { $with_nimdb } { + # We should of course be able to open, and read a few + # records from, the NIMDB, now that we've completed the + # abbreviated internal init. + # + set imdb [berkdb_open_noerr -env $envs(B) \ + -auto_commit "" "mynimdb"] + set c [$imdb cursor] + $c get -next + $c get -next + $c get -next + $c close + $imdb close + + puts "\tRep$tnum: Try access to dead handle." + error_check_bad handle_dead [catch {$db get $a_key} ret] 0 + error_check_good $ret [is_substr $ret DB_REP_HANDLE_DEAD] 1 + + $db close + set db [berkdb_open_noerr -env $envs(B) -auto_commit test.db] + error_check_good reaccess_ok [catch {$db get $a_key} ret] 0 + } else { + puts "\tRep$tnum: Try access to still-valid handle" + error_check_good access_ok [catch {$db get $a_key} ret] 0 + } + + puts "\tRep$tnum: Clean up." + $db close + $envs(A) close + $envs(B) close + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep088.tcl b/test/tcl/rep088.tcl new file mode 100644 index 00000000..0f99cce6 --- /dev/null +++ b/test/tcl/rep088.tcl @@ -0,0 +1,244 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST rep088 +# TEST Replication roll-back preserves checkpoint. +# TEST +# TEST Create a situation where a client has to roll back its +# TEST log, discarding some existing transactions, in order to sync +# TEST with a new master. +# TEST +# TEST 1. When the client still has its entire log file history, all +# TEST the way back to log file #1, it's OK if the roll-back discards +# TEST any/all checkpoints. +# TEST 2. When old log files have been archived, if the roll-back would +# TEST remove all existing checkpoints it must be forbidden. The log +# TEST must always have a checkpoint (or all files back through #1). +# TEST The client must do internal init or return JOIN_FAILURE. +# TEST 3. (the normal case) Old log files archived, and a checkpoint +# TEST still exists in the portion of the log which will remain after +# TEST the roll-back: no internal-init/JOIN_FAILURE necessary. +# +# TODO: maybe just reject anything that doesn't comply with my simplified +# rep_test clone, like fixed-length record methods, etc. + +proc rep088 { method { niter 20 } { tnum 088 } args } { + source ./include.tcl + + # Run for btree only. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "\tRep$tnum: Skipping for method $method." + return + } + + set args [convert_args $method $args] + + puts "Rep$tnum ($method): Replication roll-back preserves checkpoint." + # Note: expected result = "sync" means the client should be allowed to + # synchronize normally (to the found sync point), without any need for + # internal init. + # + # Case #1. + puts "Rep$tnum: Rollback without checkpoint, with log file 1" + set archive false + set ckpt false + set result sync + rep088_sub $method $niter $tnum $archive $ckpt $result $args + + # Case #2.(a). + # + puts "Rep$tnum: Forbid rollback over only chkp: join failure" + set archive true + set ckpt false + set result join_failure + rep088_sub $method $niter $tnum $archive $ckpt $result $args + + # Case #2.(b): essentially the same, but allow the internal init to + # happen, so that we verify that the subsequent restart with recovery + # works fine. NB: this is the obvious failure case prior to bug fix + # #16732. + # + puts "Rep$tnum: Forbid rollback over only chkp: internal init" + set archive true + set ckpt false + set result internal_init + rep088_sub $method $niter $tnum $archive $ckpt $result $args + + # Case #3. + puts "Rep$tnum: Rollback with sufficient extra checkpoints" + set archive true + set ckpt true + set result sync + rep088_sub $method $niter $tnum $archive $ckpt $result $args +} + +proc rep088_sub { method niter tnum archive ckpt result largs } { + source ./include.tcl + global testdir + global util_path + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + + file mkdir [set dirA $testdir/A] + file mkdir [set dirB $testdir/B] + file mkdir [set dirC $testdir/C] + + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_buf [expr $pagesize * 2] + set log_max [expr $log_buf * 4] + + puts "\tRep$tnum.a: Create master and two clients" + repladd 1 + set env_A_cmd "berkdb_env -create -txn $verbargs \ + -log_buffer $log_buf -log_max $log_max \ + -errpfx SITE_A -errfile /dev/stderr \ + -home $dirA -rep_transport \[list 1 replsend\]" + set envs(A) [eval $env_A_cmd -rep_master] + + repladd 2 + set env_B_cmd "berkdb_env -create -txn $verbargs \ + -log_buffer $log_buf -log_max $log_max \ + -errpfx SITE_B -errfile /dev/stderr \ + -home $dirB -rep_transport \[list 2 replsend\]" + set envs(B) [eval $env_B_cmd -rep_client] + + repladd 3 + set env_C_cmd "berkdb_env -create -txn $verbargs \ + -log_buffer $log_buf -log_max $log_max \ + -errpfx SITE_C -errfile /dev/stderr \ + -home $dirC -rep_transport \[list 3 replsend\]" + set envs(C) [eval $env_C_cmd -rep_client] + + set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" + process_msgs $envlist + $envs(A) test force noarchive_timeout + + # Using small log file size, push into the second log file. + # + puts "\tRep$tnum.b: Write enough txns to exceed 1 log file" + while { [lsn_file [next_expected_lsn $envs(C)]] == 1 } { + eval rep088_reptest $method $envs(A) $niter $largs + process_msgs $envlist + } + + # To make sure everything still works in the normal case, put in a + # checkpoint here before writing the transactions that will have to be + # rolled back. Later, when the client sees that it must roll back over + # (and discard) the later checkpoint, the fact that this checkpoint is + # here will allow it to proceed. + # + if { $ckpt } { + puts "\tRep$tnum.c: put in an 'extra' checkpoint." + $envs(A) txn_checkpoint + process_msgs $envlist + } + + # Turn off client TBM (the one that will become master later). + # + puts "\tRep$tnum.d: Turn off client B and write more txns" + $envs(B) close + set envlist "{$envs(A) 1} {$envs(C) 3}" + + # Fill a bit more log, and then write a checkpoint. + # + eval rep088_reptest $method $envs(A) $niter $largs + $envs(A) txn_checkpoint + replclear 2 + process_msgs $envlist + + # At the client under test, archive away the first log file. + # + if { $archive } { + puts "\tRep$tnum.e: Archive log at client C" + $envs(C) log_flush + exec $util_path/db_archive -d -h $dirC + } + + # Maybe another cycle of filling and checkpoint. + # + eval rep088_reptest $method $envs(A) $niter $largs + $envs(A) txn_checkpoint + replclear 2 + process_msgs $envlist + + # Now turn off the master, and turn on the TBM site as master. The + # client under test has to sync with the new master. Just to make sure + # I understand what's going on, turn off auto-init. + # + + if { $result != "internal_init" } { + $envs(C) rep_config {autoinit off} + } + puts "\tRep$tnum.f: Switch master to site B, try to sync client C" + $envs(A) close + set envs(B) [eval $env_B_cmd -rep_master] + set envlist "{$envs(B) 2} {$envs(C) 3}" + replclear 1 + set succeeded [catch { process_msgs $envlist } ret] + + switch $result { + internal_init { + error_check_good inited $succeeded 0 + + # Now stop the client, and try restarting it with + # recovery. + # + $envs(C) close + set envs(C) [eval $env_C_cmd -rep_client -recover] + } + join_failure { + error_check_bad no_autoinit $succeeded 0 + error_check_good join_fail \ + [is_substr $ret DB_REP_JOIN_FAILURE] 1 + } + sync { + error_check_good sync_ok $succeeded 0 + error_check_good not_outdated \ + [stat_field $envs(C) rep_stat \ + "Outdated conditions"] 0 + } + default { + error "FAIL: unknown test result option $result" + } + } + + $envs(C) close + $envs(B) close + replclose $testdir/MSGQUEUEDIR +} + +# A simplified clone of proc rep_test, with the crucial distinction that it +# doesn't do any of its own checkpointing. For this test we need explicit +# control of when checkpoints should happen. This proc doesn't support +# access methods using record numbers. +proc rep088_reptest { method env niter args } { + source ./include.tcl + + set omethod [convert_method $method] + set largs [convert_args $method $args] + set db [eval berkdb_open_noerr -env $env -auto_commit \ + -create $omethod $largs test.db] + + set did [open $dict] + for { set i 0 } { $i < $niter && [gets $did str] >= 0 } { incr i } { + set key $str + set str [reverse $str] + $db put $key $str + } + close $did + $db close +} diff --git a/test/tcl/rep089.tcl b/test/tcl/rep089.tcl new file mode 100644 index 00000000..8ad4214c --- /dev/null +++ b/test/tcl/rep089.tcl @@ -0,0 +1,245 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST rep089 +# TEST Test of proper clean-up of mpool during interrupted internal init. +# TEST +# TEST Have a client in the middle of internal init when a new master +# TEST generation comes along, forcing the client to interrupt the internal +# TEST init, including doing the clean-up. The client is in the middle of +# TEST retrieving database pages, so that we are forced to clean up mpool. +# TEST (Regression test for bug 17121) + +proc rep089 { method { niter 200 } { tnum "089" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global env_private + + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_btree $method] == 1 || \ + [is_queue $method] == 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_btree $method] == 0 && [is_queue $method] == 0 } { + puts "Rep$tnum: skipping for non-btree, non-queue method." + return + } + + # It's possible to run this test with in-memory databases. + set msg "with named databases" + if { $databases_in_memory } { + set msg "with in-memory named databases" + if { [is_queueext $method] == 1 } { + puts "Skipping rep$tnum for method $method" + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + set msg3 "" + if { $env_private } { + set msg3 "with private env" + } + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set args [convert_args $method $args] + + puts -nonewline "Rep$tnum: Mpool cleanup on aborted" + puts " internal init $msg $msg2 $msg3." + rep089_sub $method $niter $tnum $args +} + +proc rep089_sub { method niter tnum largs } { + global testdir + global rep_verbose + global verbose_type + global databases_in_memory + global repfiles_in_memory + global env_private + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + set privargs "" + if { $env_private == 1 } { + set privargs " -private " + } + + env_cleanup $testdir + replsetup $testdir/MSGQUEUEDIR + + file mkdir [set masterdir $testdir/MASTERDIR] + file mkdir [set clientdir $testdir/CLIENTDIR] + + # Use a small page size, so that it won't take too long to accumulate a + # large number of pages. + # + append largs " -pagesize 512" + + puts "\tRep$tnum.a: Create a master and three databases." + + # Note that we use a special version of the "send" call-back function + # here and in the client, during this first "set up" part of the test. + # + repladd 1 + set env_cmd(M) "berkdb_env -create -txn $repmemargs $privargs \ + $verbargs -home $masterdir -errpfx MASTER -rep_master" + set masterenv [eval $env_cmd(M) -rep_transport {{1 rep089_send}}] + + # Create three databases. Later, we will interrupt an internal init + # during the middle of transferring pages for the second database. This + # will force the client to exercise clean-up for all three cases: a + # fully intact, already materialized database; one that's in the process + # of being materialized; and one that we haven't even touched yet (i.e., + # totally nonexistent). + # + foreach n {1 2 3} { + rep089_make_data $masterenv test$n.db $method $niter $largs + } + + # Turn off transmission limits, so that we can be sure that we're + # controlling the pace of progress through internal init from the test + # script. + # + $masterenv rep_limit 0 0 + + puts "\tRep$tnum.b: Create a client, and start internal init." + + repladd 2 + set env_cmd(C) "berkdb_env -create -txn $repmemargs $privargs \ + $verbargs -home $clientdir -errpfx CLIENT -rep_client \ + -rep_transport \[list 2 rep089_send\]" + set clientenv [eval $env_cmd(C)] + + set envlist "{$masterenv 1} {$clientenv 2}" + + # Go through just a single message processing cycle at a time, until we + # see that we've received some PAGE messages. Since we're using a + # special send() function that discards some PAGE messages, the client + # will then be left in the middle of REP_F_RECOVER_PAGE state. + # + # We want to interrupt the internal init in the middle of materializing + # the 2nd database. Note that queue databases require two PAGE_REQ + # cycles. + # + if {[is_queue $method]} { + set max_cycles 4 + } else { + set max_cycles 2 + } + + # Do as many cycles as it takes till we see the first PAGE_REQ, then do + # enough additional cycles to bring the total page-related cycles up to + # "max_cycles". Then turn on message dropping for the following cycle. + # + global rep089_page_count + global rep089_pagereq_seen + + set rep089_page_count -1 + set rep089_pagereq_seen 0 + while {! $rep089_pagereq_seen} { + proc_msgs_once $envlist + } + for { set i 1 } { $i < $max_cycles } { incr i } { + proc_msgs_once $envlist + } + set rep089_page_count 0 + proc_msgs_once $envlist + error_check_bad assert_page_count $rep089_page_count 0 + + puts "\tRep$tnum.c: Restart master, to force internal init clean-up." + + # Trigger an interrupted internal init clean-up, by having the master + # restart at a higher gen. + # + replclear 1 + $masterenv close + set masterenv [eval $env_cmd(M) -rep_transport {{1 replsend}} -recover] + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # (If we get to this point without a crash, the bug has been fixed.) + + puts "\tRep$tnum.d: Sanity-check the sync-interrupted stat." + + set n [stat_field $clientenv mpool_stat "Number of syncs interrupted"] + error_check_good nsyncs $n 1 + + $clientenv close + $masterenv close + replclose $testdir/MSGQUEUEDIR +} + +proc rep089_send { control rec fromid toid flags lsn } { + global rep089_page_count + global rep089_pagereq_seen + + if {[berkdb msgtype $control] eq "page_req"} { + set rep089_pagereq_seen 1 + } elseif {[berkdb msgtype $control] eq "page" && $rep089_page_count >= 0 } { + incr rep089_page_count + if { $rep089_page_count < 3 || $rep089_page_count > 10 } { + return 0 + } + } + return [replsend $control $rec $fromid $toid $flags $lsn] +} + +proc rep089_make_data { env dbname method niter largs } { + global databases_in_memory + + if { $databases_in_memory } { + set dbname " {} $dbname " + } + + set omethod [convert_method $method] + set db [eval berkdb_open_noerr -env $env -auto_commit\ + -create -mode 0644 $omethod $largs $dbname] + + # Make sure the database has at least 15 pages. That number is + # arbitrary, but it should be more than the 10 that we skip in + # rep089_send, above. + # + set page_target 15 + if {[is_queue $method]} { + set descriptor "Number of pages" + } else { + set descriptor "Leaf pages" + } + set start 0 + while true { + eval rep_test $omethod $env $db $niter $start 0 0 $largs + incr start $niter + + set npages [stat_field $db stat $descriptor] + if {$npages >= $page_target} { + break + } + } + + $db close +} diff --git a/test/tcl/rep090.tcl b/test/tcl/rep090.tcl new file mode 100644 index 00000000..98f83585 --- /dev/null +++ b/test/tcl/rep090.tcl @@ -0,0 +1,251 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep090 +# TEST Test of AUTO_REMOVE on both master and client sites. +# TEST +# TEST One master, one client. Set AUTO_REMOVE on the client env. +# TEST Generate several log files. +# TEST Verify the client has properly removed the log files. +# TEST Turn on AUTO_REMOVE on the master and generate more log files. +# TEST Confirm both envs have the same log files. +# +proc rep090 { method { niter 50 } { tnum "090" } args } { + source ./include.tcl + global databases_in_memory + global mixed_mode_logging + global repfiles_in_memory + + # Run for btree only. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "\tRep$tnum: Skipping for method $method." + return + } + if { $databases_in_memory } { + puts "\tRep$tnum: Skipping for in-memory databases." + return + } + if { $repfiles_in_memory } { + puts "\tRep$tnum: Skipping for in-memory replication files." + return + } + if { $mixed_mode_logging != 0 } { + puts "\tRep$tnum: Skipping for in-memory log files." + return + } + + set args [convert_args $method $args] + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + # + # Even though we skip for in-memory logs, keep the standard + # log configuration format for Tcl consistent with all other tests. + # + set logsets [create_logsets 2] + set msgopts { dropped normal } + + # Run with options to drop some messages or normal/all messages. + foreach l $logsets { + foreach m $msgopts { + puts "Rep$tnum ($method $args): Test of\ + client log autoremove with $m messages." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep090_sub $method $niter $tnum $l $m $args + } + } +} + +proc rep090_sub { method niter tnum logset msgopt largs } { + global drop drop_msg + global rep_verbose + global testdir + global util_path + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + # Don't turn on autoremove yet on the master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs \ + $m_logargs -log_max $log_max -errpfx MASTER $verbargs \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd -rep_master] + + puts "\tRep$tnum.a: Open client and set autoremove." + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs \ + $c_logargs -log_max $log_max -errpfx CLIENT $verbargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd -rep_client] + $clientenv log_config autoremove on + set envlist "{$masterenv 1} {$clientenv 2}" + + set start 0 + set testfile "test.db" + + set omethod [convert_method $method] + set dbargs [convert_args $method $largs] + set mdb [eval {berkdb_open_noerr} -env $masterenv -auto_commit\ + -create -mode 0644 $omethod $dbargs $testfile ] + error_check_good reptest_db [is_valid_db $mdb] TRUE + + process_msgs $envlist + puts "\tRep$tnum.b: Running rep_test in replicated env." + set stop 0 + set logstop 5 + # + # If we're dropping messages we want to set it up so that the + # rerequest happens after our loop is finished. Set the + # rerequest values high, 10 and 20 seconds. + # + if { $msgopt == "dropped" } { + set req_min 10000000 + set req_max 20000000 + $clientenv rep_request $req_min $req_max + set drop 1 + # Drop 5% of messages, 100/5 or every 20th. + set drop_msg 20 + } + while { $stop == 0 } { + # Run rep_test in the master to $logstop log files. + eval rep_test $method \ + $masterenv $mdb $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + # + # Run until we have at least $logstop log files on the master. + # + set last_master_log [get_logfile $masterenv last] + if { $last_master_log >= $logstop} { + set stop 1 + } + } + # + # Since the client has autoremove turned on, it should have only + # one log file whether we're dropping messages or not. + # + # Flush the logs first to ensure they are on disk. + # + $clientenv log_flush + puts "\tRep$tnum.c: Verify client log file removal." + set cl_logs [eval exec $util_path/db_archive -l -h $clientdir] + error_check_good cllog [llength $cl_logs] 1 + + if { $msgopt == "dropped" } { + # + # If we catch everything up now, the client + # will still have only one log file, but it + # will be a different one. + # + # Turn off dropped messages. + # Sleep beyond max rerequest time. + # Force a message to generate rerequest. + # Process messages + # Verify logs are autoremoved. + # + set drop 0 + set slp [expr $req_max / 1000000] + puts "\tRep$tnum.c.0: Sleep beyond rerequest time ($slp sec)." + tclsleep $slp + + puts "\tRep$tnum.c.1: Generate message." + $masterenv rep_start -master + process_msgs $envlist + + puts "\tRep$tnum.c.2: Verify client log file removal now." + $clientenv log_flush + set new_cl_logs [eval exec $util_path/db_archive -l -h $clientdir] + error_check_bad new_cllog $cl_logs $new_cl_logs + error_check_good cllog [llength $cl_logs] 1 + } + + # + # Turn on autoremove on the master and advance past the end of + # the current log file to cause removal of all earlier logs. + # Also clobber replication's 30-second anti-archive timer. + # + puts "\tRep$tnum.d: Turn on autoremove on master." + $masterenv log_config autoremove on + $masterenv test force noarchive_timeout + set last_master_log [get_logfile $masterenv last] + + puts "\tRep$tnum.e: Running rep_test in replicated env." + set stop 0 + while { $stop == 0 } { + # Run rep_test in the master beyond former last log. + eval rep_test $method \ + $masterenv $mdb $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + # + # Run until we've advanced past the old last log. + # Notice we're letting autoremove do the work, + # not db_archive. + # + set first_master_log [get_logfile $masterenv first] + if { $first_master_log > $last_master_log } { + set stop 1 + } + } + puts "\tRep$tnum.f: Verify both sites file removal." + $masterenv log_flush + set ma_logs [eval exec $util_path/db_archive -l -h $masterdir] + error_check_good malog_1 [llength $ma_logs] 1 + + $clientenv log_flush + set cl_logs [eval exec $util_path/db_archive -l -h $clientdir] + error_check_good cllog2_1 [llength $cl_logs] 1 + + # Make sure both sites have the exact same logs. + error_check_good match_logs $ma_logs $cl_logs + + error_check_good mdb_close2 [$mdb close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} + + diff --git a/test/tcl/rep091.tcl b/test/tcl/rep091.tcl new file mode 100644 index 00000000..247fe4dc --- /dev/null +++ b/test/tcl/rep091.tcl @@ -0,0 +1,789 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST rep091 +# TEST Read-your-writes consistency. +# TEST Write transactions at the master, and then call the txn_applied() +# TEST method to see whether the client has received and applied them yet. +# +proc rep091 { method { niter 20 } { tnum "091" } args } { + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Skip for all methods except btree. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep091: Skipping for method $method." + return + } + + # Set up for on-disk or in-memory databases and make dbname match + # the one assigned in rep_test. + if { $databases_in_memory == 1 } { + set dbname { "" "test.db" } + set msg "using named in-memory databases" + } else { + set dbname "test.db" + set msg "using on-disk databases" + } + + # For this test it's important to test both ways, so for now run both + # under our control. Later, when the containing test infrastructure for + # doing this automatically is more fully developed, we can remove this + # loop and just let the infrastructure handle it. + # + set orig $repfiles_in_memory + foreach repfiles_in_memory {0 1} { + + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } else { + set msg2 "and on-disk replication files" + } + puts "Rep$tnum ($method $args): Test of\ + read-your-writes consistency $msg $msg2." + + rep091a_sub $method $niter $tnum no $dbname $args + rep091a_sub $method $niter $tnum yes $dbname $args + rep091b_sub $method $niter $tnum $dbname $args + rep091c_sub $method $niter $tnum no $dbname $args + rep091c_sub $method $niter $tnum yes $dbname $args + rep091d_sub $method $niter $tnum no $dbname $args + rep091d_sub $method $niter $tnum yes $dbname $args + rep091e_sub $method $niter $tnum $dbname $args + if { $repfiles_in_memory } { + rep091f_sub $method $niter $tnum $dbname $args + } + } + + # Restore original setting, in case any other tests are going to be run + # after this one. + # + set repfiles_in_memory $orig +} + +proc rep091a_sub { method niter tnum future_gen dbname largs } { + global rep_verbose + global testdir + global verbose_type + global repfiles_in_memory + + puts -nonewline "Rep$tnum: read-your-writes consistency, basic test" + if { $future_gen } { + puts ", with extra gen cycle." + } else { + puts "." + } + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + puts "\tRep$tnum.a: Create master and client." + repladd 1 + set ma_envcmd "berkdb_env_noerr -create -txn -errpfx MASTER \ + $repmemargs \ + $verbargs -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd -rep_master] + + repladd 2 + set cl_envcmd "berkdb_env_noerr -create -txn -errpfx CLIENT \ + $repmemargs \ + $verbargs -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd -rep_client] + + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + if { $future_gen } { + # Cycle the gen an extra time, so that the client sees not only + # a future transaction, but a future gen. Restart the master as + # a client first, and communicate with the surviving other + # client, to make sure the master realizes the correct gen. We + # can get away without doing this in the normal case, but when + # repfiles are in memory the master otherwise starts over from + # gen 0, leading to quite a mess. + # + $masterenv close + set masterenv [eval $ma_envcmd -rep_client -recover] + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + $masterenv rep_start -master + } + + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + + puts "\tRep$tnum.b: Write txn and get its commit token." + set omethod [convert_method $method] + set dbargs [convert_args $method $largs] + set db [eval berkdb_open_noerr -env $masterenv -auto_commit \ + -create $omethod $dbargs $dbname ] + set txn [$masterenv txn -token] + $db put -txn $txn "key1" "data1" + set token [$txn commit] + +# binary scan $token IIIII r1 r2 r3 r4 r5 +# puts "the token value is $r1 $r2 $r3 $r4 $r5" + + # For the most part, we're interested in checking txn_applied at + # clients. But it's also supposed to work in a degenerate, simple + # manner at the master. + # + error_check_good applied_at_master [$masterenv txn_applied $token] 0 + + # While we're at it, verify that if we don't ask for the commit token, + # the result of [$env commit] is just a plain "0". (This is more of a + # sanity check on the Tcl API, to help us believe that the rest of this + # test is valid.) + # + set txn [$masterenv txn] + $db put -txn $txn "other key" "any data" + set result [$txn commit] + error_check_good commit_no_token $result 0 + + $db close + + puts "\tRep$tnum.c: Check the applied status of the transaction at the client." + + # Before processing messages, the client will not have heard of the + # transaction yet. But afterwards it should. Check it once with no + # timeout (default 0), and again with a timeout specified to do a simple + # test of waiting. (Multi-thread waiting tests will be done + # separately.) + # + set result [$clientenv txn_applied $token] + error_check_good not_applied [is_substr $result DB_TIMEOUT] 1 + + set start [clock seconds] + set result [$clientenv txn_applied -timeout 10000000 $token] + set duration [expr [clock seconds] - $start] + error_check_good not_yet_applied [expr $duration >= 10] 1 + + process_msgs $envlist + error_check_good txn_applied [$clientenv txn_applied $token] 0 + + # "Empty" transactions are not really interesting either, but again + # they're supposed to be allowed: + # + set txn [$masterenv txn -token] + set token [$txn commit] + set result [$masterenv txn_applied $token] + error_check_good empty_txn [is_substr $result DB_KEYEMPTY] 1 + set result [$clientenv txn_applied $token] + error_check_good empty_txn2 [is_substr $result DB_KEYEMPTY] 1 + + # Check a few simple invalid cases. + # + error_check_bad client_token [catch {$clientenv txn -token} result] 0 + error_check_good cl_tok_msg [is_substr $result "invalid arg"] 1 + set txn [$masterenv txn] + error_check_bad child_token \ + [catch {$masterenv txn -token -parent $txn} result] 0 + error_check_good parent_tok_msg [is_substr $result "invalid arg"] 1 + + $txn abort + $clientenv close + $masterenv close + + replclose $testdir/MSGQUEUEDIR +} + +# Verify that an LSN history database appears correctly at a client even if the +# client is created via a hot backup (instead of internal init). Sites "A" and +# "B" alternate the master role, in order to facilitate accumulating a history +# of gen changes (especially in the in-mem case, where otherwise our LSN history +# database would be lost each time we restarted the master). The "CLIENT2" site +# will be the one to be initialized via hot backup. +# +proc rep091b_sub { method niter tnum dbname largs } { + global rep_verbose + global testdir + global verbose_type + global util_path + global repfiles_in_memory + + puts "Rep$tnum: read-your-writes consistency, hot backup." + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set dira $testdir/DIRA + set dirb $testdir/DIRB + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $dira + file mkdir $dirb + file mkdir $clientdir2 + + repladd 1 + set envcmd_a "berkdb_env_noerr -create -txn -errpfx SITE_A \ + $repmemargs \ + $verbargs -home $dira -rep_transport \[list 1 replsend\]" + set masterenv [eval $envcmd_a -rep_master] + + repladd 2 + set envcmd_b "berkdb_env_noerr -create -txn -errpfx SITE_B \ + $repmemargs \ + $verbargs -home $dirb -rep_transport \[list 2 replsend\]" + set clientenv [eval $envcmd_b -rep_client] + + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + set start 0 + + # Swap master and client role, some arbitrary number of times, in order + # to change the generation number, before writing our transaction of + # interest. + # + set count 2 + for { set i 0 } { $i < $count } { incr i } { + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + set tmp $masterenv + set masterenv $clientenv + set clientenv $tmp + $clientenv rep_start -client + $masterenv rep_start -master + process_msgs $envlist + } + + set omethod [convert_method $method] + set dbargs [convert_args $method $largs] + set db [eval berkdb_open_noerr -env $masterenv -auto_commit \ + $omethod $dbargs $dbname ] + set txn [$masterenv txn -token] + $db put -txn $txn "some key" "some data" + set token [$txn commit] + + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + # Cycle the master gen a few more times, in order to force the token gen + # not to be at the top of the history. Again, the count is arbitrary. + # + $db close + set count 3 + for { set i 0 } { $i < $count } { incr i } { + set tmp $masterenv + set masterenv $clientenv + set clientenv $tmp + $clientenv rep_start -client + $masterenv rep_start -master + process_msgs $envlist + + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + } + + # Note that the choice of which existing directory ("a" or "b") we use + # as the source of the copy doesn't matter. + # + exec $util_path/db_hotbackup -h $dira -b $clientdir2 + + set count 2 + for { set i 0 } { $i < $count } { incr i } { + set tmp $masterenv + set masterenv $clientenv + set clientenv $tmp + $clientenv rep_start -client + $masterenv rep_start -master + process_msgs $envlist + + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + } + + # Create a second transaction, after a few more master gen changes, and + # get its token too. + # + set db [eval berkdb_open_noerr -env $masterenv -auto_commit \ + $omethod $dbargs $dbname ] + set txn [$masterenv txn -token] + $db put -txn $txn "other key" "different data" + set token2 [$txn commit] + + # Start the new client, by running "catastrophic" recovery on the + # hot-backup files. + # + exec $util_path/db_recover -c -h $clientdir2 + repladd 3 + set cl2_envcmd "berkdb_env_noerr -create -txn -errpfx CLIENT2 \ + $repmemargs \ + $verbargs -home $clientdir2 -rep_transport \[list 3 replsend\]" + set clientenv2 [eval $cl2_envcmd -rep_client] + $clientenv2 rep_config {autoinit off} + + # Before syncing with the master, we should be able to confirm the first + # transaction, but not the second one, because the hot backup should + # include an (old) copy of the LSN history database. + # + if {$repfiles_in_memory} { + set result [$clientenv2 txn_applied $token] + error_check_good still_waiting [is_substr $result DB_TIMEOUT] 1 + set result2 [$clientenv2 txn_applied $token2] + error_check_good not_yet_applied [is_substr $result2 DB_TIMEOUT] 1 + } else { + error_check_good txn_applied [$clientenv2 txn_applied $token] 0 + set result2 [$clientenv2 txn_applied $token2] + error_check_good not_yet_applied [is_substr $result2 DB_NOTFOUND] 1 + } + + # Sync with master, and this time token2 should be there. + # + lappend envlist [list $clientenv2 3] + process_msgs $envlist + error_check_good txn_applied2 [$clientenv2 txn_applied $token2] 0 + + $db close + $clientenv2 close + $clientenv close + $masterenv close + + replclose $testdir/MSGQUEUEDIR +} + +# Test detection of rollbacks. +# +proc rep091c_sub { method niter tnum future_gen dbname largs } { + global rep_verbose + global testdir + global verbose_type + global repfiles_in_memory + + puts "Rep$tnum: read-your-writes consistency, rollbacks." + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + puts "\tRep$tnum.a: Create a group of three." + repladd 1 + set ma_envcmd "berkdb_env_noerr -create -txn -errpfx MASTER \ + $repmemargs \ + $verbargs -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd -rep_master] + + repladd 2 + set cl_envcmd "berkdb_env_noerr -create -txn -errpfx CLIENT \ + $repmemargs \ + $verbargs -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd -rep_client] + + repladd 3 + set cl2_envcmd "berkdb_env_noerr -create -txn -errpfx CLIENT2 \ + $repmemargs \ + $verbargs -home $clientdir2 -rep_transport \[list 3 replsend\]" + set clientenv2 [eval $cl2_envcmd -rep_client] + + set envlist "{$masterenv 1} {$clientenv 2} {$clientenv2 3}" + process_msgs $envlist + + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + # If desired, test the case where the token has a "future" gen, by + # bumping of the gen a few times after cutting off the disconnected + # client. + # + if { $future_gen } { + for { set count 3 } { $count > 0 } { incr count -1 } { + $masterenv close + set masterenv [eval $ma_envcmd -rep_master -recover] + eval rep_test $method $masterenv NULL \ + $niter $start $start 0 $largs + incr start $niter + } + set envlist "{$masterenv 1} {$clientenv 2} {$clientenv2 3}" + } + + # Write some more transactions, taking a token for one of them, but + # prevent one of the clients from seeing any of them. + # + puts "\tRep$tnum.b: Write transactions, and get token." + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + set omethod [convert_method $method] + set dbargs [convert_args $method $largs] + set db [eval berkdb_open_noerr -env $masterenv -auto_commit \ + $omethod $dbargs $dbname ] + set txn [$masterenv txn -token] + $db put -txn $txn "some key" "some data" + set token [$txn commit] + $db close + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + replclear 2 + process_msgs $envlist + + puts "\tRep$tnum.c: Switch master, forcing a rollback." + $masterenv close +# $clientenv rep_start -master + $clientenv rep_start -client; # + set envlist "{$clientenv 2} {$clientenv2 3}"; # + process_msgs $envlist; # + $clientenv rep_start -master; # + eval rep_test $method $clientenv NULL $niter $start $start 0 $largs + incr start $niter + replclear 1 +# set envlist "{$clientenv 2} {$clientenv2 3}" + process_msgs $envlist + + puts "\tRep$tnum.d: Check txn_applied at client and at (new) master." + set result [$clientenv2 txn_applied $token] + error_check_good rolled_back [is_substr $result DB_NOTFOUND] 1 + + set result [$clientenv txn_applied $token] + error_check_good rolled_back2 [is_substr $result DB_NOTFOUND] 1 + + $clientenv close + $clientenv2 close + replclose $testdir/MSGQUEUEDIR +} + +# Test envid check. Simulate a network partition: two masters proceed at the +# same gen, in the same LSN range. The envid would be the only way we would +# know that a transaction from the disconnected master is not correctly applied +# at a client in the other partition. +# +proc rep091d_sub { method niter tnum extra_gen dbname largs } { + global rep_verbose + global testdir + global verbose_type + global repfiles_in_memory + + puts -nonewline "Rep$tnum: read-your-writes consistency, partition" + if { $extra_gen } { + puts ", with extra gen." + } else { + puts "." + } + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + puts "\tRep$tnum.a: Create a group of three." + repladd 1 + set ma_envcmd "berkdb_env_noerr -create -txn -errpfx MASTER \ + $repmemargs \ + $verbargs -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd -rep_master] + + repladd 2 + set cl_envcmd "berkdb_env_noerr -create -txn -errpfx CLIENT \ + $repmemargs \ + $verbargs -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd -rep_client] + + repladd 3 + set cl2_envcmd "berkdb_env_noerr -create -txn -errpfx CLIENT2 \ + $repmemargs \ + $verbargs -home $clientdir2 -rep_transport \[list 3 replsend\]" + set clientenv2 [eval $cl2_envcmd -rep_client] + + set envlist "{$masterenv 1} {$clientenv 2} {$clientenv2 3}" + process_msgs $envlist + + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + # Bounce master and client1. Then when they come back up, pretend + # client1 is disconnected, and somehow decides to act as a master (an + # application not using elections, obviously). Note that we must + # replclear appropriately in both directions. + # + $masterenv close + $clientenv close + set masterenv [eval $ma_envcmd -rep_master -recover] + replclear 2 + set clientenv [eval $cl_envcmd -rep_master -recover] + replclear 1 + replclear 3 + set envlist "{$masterenv 1} {$clientenv2 3}" + + puts "\tRep$tnum.b: Run identical series of txns at two masters." + set orig_start $start + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + set omethod [convert_method $method] + set dbargs [convert_args $method $largs] + set db [eval berkdb_open_noerr -env $masterenv -auto_commit \ + $omethod $dbargs $dbname ] + set txn [$masterenv txn] + $db put -txn $txn "some key" "some data" + $txn commit + $db close + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + replclear 2 + process_msgs $envlist + + set start $orig_start + eval rep_test $method $clientenv NULL $niter $start $start 0 $largs + incr start $niter + set db [eval berkdb_open_noerr -env $clientenv -auto_commit \ + $omethod $dbargs $dbname ] + set txn [$clientenv txn -token] + $db put -txn $txn "some key" "some data" + set token [$txn commit] + $db close + eval rep_test $method $clientenv NULL $niter $start $start 0 $largs + incr start $niter + replclear 1 + replclear 3 + + if { $extra_gen } { + $masterenv close + set masterenv [eval $ma_envcmd -rep_master -recover] + set envlist "{$masterenv 1} {$clientenv2 3}" + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + replclear 2 + process_msgs $envlist + } + + puts "\tRep$tnum.c: Check txn_applied." + set result [$clientenv2 txn_applied $token] + error_check_good not_found [is_substr $result DB_NOTFOUND] 1 + + $clientenv close + $clientenv2 close + $masterenv close + replclose $testdir/MSGQUEUEDIR +} + +# Test the simplified, degenerate behavior of a non-replication environment: a +# valid transaction is always considered applied, unless it disappears in a +# backup/restore. +# +proc rep091e_sub { method niter tnum dbname largs } { + global testdir + global util_path + + set backupdir "$testdir/backup" + file mkdir $backupdir + + puts "Rep$tnum: read-your-writes consistency, non-rep env." + env_cleanup $testdir + + set dir $testdir + set envcmd "berkdb_env_noerr -create -txn -home $dir" + set env [eval $envcmd] + + set start 0 + eval rep_test $method $env NULL $niter $start $start 0 $largs + incr start $niter + + exec $util_path/db_hotbackup -h $testdir -b $backupdir + + set omethod [convert_method $method] + set dbargs [convert_args $method $largs] + set db [eval berkdb_open_noerr -env $env -auto_commit \ + $omethod $dbargs $dbname ] + set txn [$env txn -token] + $db put -txn $txn "some key" "some data" + set token [$txn commit] + $db close + + eval rep_test $method $env NULL $niter $start $start 0 $largs + incr start $niter + + error_check_good applied [$env txn_applied $token] 0 + $env close + + set envcmd "berkdb_env_noerr -create -txn -home $backupdir -recover_fatal" + set env [eval $envcmd] + set result [$env txn_applied $token] + error_check_good restored [is_substr $result DB_NOTFOUND] 1 + $env close +} + +# Check proper behavior of txn_applied() at a client before in-mem DB's have +# been materialized. This is only relevant when repfiles_in_memory, so skip it +# in the default case. Sites "A" and "B" will take turns being master, and +# CLIENT2 will be the one whose behavior is under test. +# +proc rep091f_sub { method niter tnum dbname largs } { + global rep_verbose + global testdir + global verbose_type + global util_path + global repfiles_in_memory + + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + puts "Rep$tnum: read-your-writes consistency, missing in-mem DB." + } else { + puts "Rep$tnum: skipping missing in-mem DB test." + return + } + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set dira $testdir/DIRA + set dirb $testdir/DIRB + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $dira + file mkdir $dirb + file mkdir $clientdir2 + + repladd 1 + set envcmd_a "berkdb_env_noerr -create -txn -errpfx SITE_A \ + $repmemargs \ + $verbargs -home $dira -rep_transport \[list 1 replsend\]" + set masterenv [eval $envcmd_a -rep_master] + + repladd 2 + set envcmd_b "berkdb_env_noerr -create -txn -errpfx SITE_B \ + $repmemargs \ + $verbargs -home $dirb -rep_transport \[list 2 replsend\]" + set clientenv [eval $envcmd_b -rep_client] + + repladd 3 + set cl2_envcmd "berkdb_env_noerr -create -txn -errpfx CLIENT2 \ + $repmemargs \ + $verbargs -home $clientdir2 -rep_transport \[list 3 replsend\]" + set clientenv2 [eval $cl2_envcmd -rep_client] + + set envlist "{$masterenv 1} {$clientenv 2} {$clientenv2 3}" + process_msgs $envlist + + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + + # Write the transaction of interest in gen 1. + # + set omethod [convert_method $method] + set dbargs [convert_args $method $largs] + set db [eval berkdb_open_noerr -env $masterenv -auto_commit \ + $omethod $dbargs $dbname ] + set txn [$masterenv txn -token] + $db put -txn $txn "some key" "some data" + set token [$txn commit] + $db close + + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + set tmp $masterenv + set masterenv $clientenv + set clientenv $tmp + $clientenv rep_start -client + $masterenv rep_start -master + process_msgs $envlist + + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + # Bounce the client. Even though it has the transaction of interest, + # and had observed the change to gen 2, when we restart it it won't have + # its LSN history database. + # + $clientenv2 close + set clientenv2 [eval $cl2_envcmd -rep_client -recover] + + set result [$clientenv2 txn_applied $token] + error_check_good not_yet [is_substr $result DB_TIMEOUT] 1 + + # Sync with master, and this time token should be confirmed. + # + set envlist [lreplace $envlist end end [list $clientenv2 3]] + process_msgs $envlist + error_check_good txn_applied [$clientenv2 txn_applied $token] 0 + + $clientenv2 close + $clientenv close + $masterenv close + + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep092.tcl b/test/tcl/rep092.tcl new file mode 100644 index 00000000..6eda2bd1 --- /dev/null +++ b/test/tcl/rep092.tcl @@ -0,0 +1,325 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST rep092 +# TEST Read-your-writes consistency. +# TEST Test events in one thread (process) waking up another sleeping thread, +# TEST before a timeout expires. +# + +proc rep092 { method { niter 20 } { tnum "092" } args } { + source ./include.tcl + global repfiles_in_memory + + # Skip for all methods except btree. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep005: Skipping for method $method." + return + } + + puts "Rep$tnum: read-your-writes consistency, multi-thread wake-up" + foreach pad_flag { no yes } { + foreach txn_flag { no yes } { + rep092a_sub $method $niter \ + $tnum $pad_flag $txn_flag $args + rep092b_sub $method $niter \ + $tnum $pad_flag $txn_flag $args + } + } +} + +proc rep092a_sub { method niter tnum pad in_txn largs } { + source ./include.tcl + global rep_verbose + global testdir + global verbose_type + global repfiles_in_memory + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + puts "\tRep$tnum.a: Create master and client." + repladd 1 + set ma_envcmd "berkdb_env_noerr -create -txn -errpfx MASTER \ + $repmemargs \ + $verbargs -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd -rep_master] + + repladd 2 + set cl_envcmd "berkdb_env_noerr -create -txn -errpfx CLIENT \ + $repmemargs -errfile /dev/stderr \ + $verbargs -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd -rep_client] + + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + puts "\tRep$tnum.b: Create and replicate a few warm-up txns." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + puts "\tRep$tnum.c: Write txn and get its commit token." + if { $pad } { + eval rep_test $method $masterenv \ + NULL $niter $start $start 0 $largs + incr start $niter + } + set omethod [convert_method $method] + set dbargs [convert_args $method $largs] + set db [eval berkdb_open_noerr -env $masterenv -auto_commit \ + -create $omethod $dbargs test.db ] + set txn [$masterenv txn -token] + $db put -txn $txn "key1" "data1" + set token [$txn commit] + $db close + if { $pad } { + eval rep_test $method $masterenv \ + NULL $niter $start $start 0 $largs + incr start $niter + } + + # Don't process msgs just yet. We want to test the behavior when the + # client checks/waits for the transaction more quickly than the client + # receives it. In order to do that in a test, we simulate the + # replication being rather slow, by pausing for a moment after starting + # up the txn_applied thread (in a separate child Tcl process). + # + set pause 5 + + # Define an emergency upper limit on the sleeping time, so that in case + # the code is broken the test won't hang forever. The child process + # should complete promptly, as soon as we apply the transaction. + # + set limit 60 + set tolerance 1 + + # Spawn a process to call txn_applied + # + puts "\tRep$tnum.d: Spawn child process, and pause to let it get started." + set timeout [expr $limit * 1000000] + error_check_good binary_scan [binary scan $token H40 token_chars] 1 + set listing $testdir/repscript.log + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep092script.tcl $listing $clientdir \ + $token_chars $timeout $in_txn $rep_verbose $verbose_type &] + tclsleep $pause + + puts "\tRep$tnum.e: Apply the transaction at the client." + process_msgs $envlist + + watch_procs $pid 1 + set fd [open $listing] + puts "\tRep$tnum.f: Examine the sub-process results." + set report [split [read $fd] "\n"] + close $fd + set result [lindex [lsearch -inline $report RESULT*] 1] + error_check_good wait_result $result 0 + set duration [lindex [lsearch -inline $report DURATION*] 1] + error_check_good no_timeout \ + [expr $duration < $limit - $tolerance] 1 + + # Add a third client. + # + set clientdir2 $testdir/CLIENTDIR2 + file mkdir $clientdir2 + + puts "\tRep$tnum.g: Add another client, and make it master." + repladd 3 + set cl_envcmd2 "berkdb_env_noerr -create -txn -errpfx CLIENT2 \ + $repmemargs \ + $verbargs -home $clientdir2 -rep_transport \[list 3 replsend\]" + set clientenv2 [eval $cl_envcmd2 -rep_client] + + lappend envlist "$clientenv2 3" + process_msgs $envlist + + # Swap roles between master and client2. First client will eventually + # see a gen change, while waiting. + # + $masterenv rep_start -client + $clientenv2 rep_start -master + if { $pad } { + eval rep_test $method $clientenv2 \ + NULL $niter $start $start 0 $largs + incr start $niter + } + set db [eval berkdb_open_noerr -env $clientenv2 -auto_commit \ + -create $omethod $dbargs test.db ] + set txn [$clientenv2 txn -token] + $db put -txn $txn "key2" "data2" + set token [$txn commit] + $db close + if { $pad } { + eval rep_test $method $clientenv2 \ + NULL $niter $start $start 0 $largs + incr start $niter + } + + puts "\tRep$tnum.h: Spawn another child process." + error_check_good binary_scan [binary scan $token H40 token_chars] 1 + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep092script.tcl $listing $clientdir \ + $token_chars $timeout $in_txn $rep_verbose $verbose_type &] + tclsleep 5 + + puts "\tRep$tnum.i: Apply the transaction at the client." + process_msgs $envlist + + watch_procs $pid 1 + set fd [open $listing] + puts "\tRep$tnum.j: Examine the sub-process results." + set report [split [read $fd] "\n"] + close $fd + set result [lindex [lsearch -inline $report RESULT*] 1] + error_check_good wait_result $result 0 + set duration [lindex [lsearch -inline $report DURATION*] 1] + error_check_good no_timeout2 \ + [expr $duration < $limit - $tolerance] 1 + + $clientenv close + $masterenv close + $clientenv2 close + + replclose $testdir/MSGQUEUEDIR +} + +proc rep092b_sub { method niter tnum pad in_txn largs } { + source ./include.tcl + global rep_verbose + global testdir + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + # This part of the test only makes sense with INMEM + # + set repmemargs "-rep_inmem_files " + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + puts "\tRep$tnum.a: Create master and client." + repladd 1 + set ma_envcmd "berkdb_env_noerr -create -txn -errpfx MASTER \ + $repmemargs \ + $verbargs -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd -rep_master] + + repladd 2 + set cl_envcmd "berkdb_env_noerr -create -txn -errpfx CLIENT \ + $repmemargs -errfile /dev/stderr \ + $verbargs -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd -rep_client] + + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + puts "\tRep$tnum.b: Create and replicate a few warm-up txns." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist + + puts "\tRep$tnum.x: Shut down client." + $clientenv close + + puts "\tRep$tnum.c: Write txn and get its commit token." + if { $pad } { + eval rep_test $method $masterenv \ + NULL $niter $start $start 0 $largs + incr start $niter + } + set omethod [convert_method $method] + set dbargs [convert_args $method $largs] + set db [eval berkdb_open_noerr -env $masterenv -auto_commit \ + -create $omethod $dbargs test.db ] + set txn [$masterenv txn -token] + $db put -txn $txn "key1" "data1" + set token [$txn commit] + $db close + if { $pad } { + eval rep_test $method $masterenv \ + NULL $niter $start $start 0 $largs + incr start $niter + } + + puts "\tRep$tnum.x: Restart client, get partway through sync." + set clientenv [eval $cl_envcmd -rep_client -recover] + set envlist "{$masterenv 1} {$clientenv 2}" + + # This will put the client into a state where it doesn't have the LSN + # history database, at a time when it needs to read it. Therefore, it + # will wait for it to be materialized in the "abbreviated internal init" + # cycle that is needed when in-memory databases are involved. + # + proc_msgs_once $envlist + proc_msgs_once $envlist + + set pause 5 + set limit 60 + set tolerance 1 + + # Spawn a process to call txn_applied + # + puts "\tRep$tnum.d: Spawn child process, and pause to let it get started." + set timeout [expr $limit * 1000000] + error_check_good binary_scan [binary scan $token H40 token_chars] 1 + set listing $testdir/repscript.log + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep092script.tcl $listing $clientdir \ + $token_chars $timeout $in_txn $rep_verbose $verbose_type &] + tclsleep $pause + + puts "\tRep$tnum.e: Apply the transaction at the client." + process_msgs $envlist + + watch_procs $pid 1 + set fd [open $listing] + puts "\tRep$tnum.f: Examine the sub-process results." + set report [split [read $fd] "\n"] + close $fd + set result [lindex [lsearch -inline $report RESULT*] 1] + error_check_good wait_result $result 0 + set duration [lindex [lsearch -inline $report DURATION*] 1] + error_check_good no_timeout \ + [expr $duration < $limit - $tolerance] 1 + + $clientenv close + $masterenv close + + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/rep092script.tcl b/test/tcl/rep092script.tcl new file mode 100644 index 00000000..f1dbc64a --- /dev/null +++ b/test/tcl/rep092script.tcl @@ -0,0 +1,74 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# Rep092 script - multi-thread wake-ups in checking read-your-writes +# consistency. +# +# Usage: repscript clientdir token timeout rep_verbose verbose_type +# clientdir: client env directory +# +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl +source $test_path/reputils.tcl + +set usage "repscript clientdir token timeout txn_flag rep_verbose verbose_type" + +# Verify usage +if { $argc != 6 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set clientdir [ lindex $argv 0 ] +set token_chars [ lindex $argv 1 ] +set timeout [ lindex $argv 2 ] +set txn_flag [ lindex $argv 3 ] +set rep_verbose [ lindex $argv 4 ] +set verbose_type [ lindex $argv 5 ] +set verbargs "" +if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " +} + +# Join the client env. +set queueenv [eval berkdb_env -home $testdir/MSGQUEUEDIR] +repladd 2 +set cl_cmd "berkdb_env_noerr -home $clientdir $verbargs \ + -txn -rep_client -rep_transport \[list 2 replsend\]" +set clientenv [eval $cl_cmd] +error_check_good script_cenv_open [is_valid_env $clientenv] TRUE + +set token [binary format H40 $token_chars] + +set start [clock seconds] +if { $txn_flag } { + set txn [$clientenv txn] +} +set count 0 +while {[catch {$clientenv txn_applied -timeout $timeout $token} result]} { + if {[is_substr $result DB_LOCK_DEADLOCK]} { + incr count + if { $txn_flag } { + $txn abort + } + tclsleep 5 + if { $txn_flag } { + set txn [$clientenv txn] + } + } else { + error $result + } +} +if { $txn_flag } { + $txn commit +} +set duration [expr [clock seconds] - $start] +puts "RESULT: $result" +puts "DURATION: $duration" +puts "DEADLOCK_COUNT: $count" + +$clientenv close +$queueenv close diff --git a/test/tcl/rep093.tcl b/test/tcl/rep093.tcl new file mode 100644 index 00000000..46f339f7 --- /dev/null +++ b/test/tcl/rep093.tcl @@ -0,0 +1,252 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST rep093 +# TEST Egen changes during election. +# +proc rep093 { method { niter 20 } { tnum "093" } args } { + source ./include.tcl + + # Skip for all methods except btree. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + rep093_sub $method $niter $tnum yes $args + rep093_sub $method $niter $tnum no $args +} + +# Start an election at site A, at a time when site B already has a higher egen. +# When site B sees the obsolete VOTE1, it responds with an ALIVE, and that causes +# site A recognize an egen update (producing HOLDELECTION, causing us to start +# another election process, and to abandon the first one). +# +# When $lost is true, we discard site A's initial VOTE1 message, and manually +# start an election at site B as well. In this case it's the VOTE1 message that +# causes site A to realize it needs an egen update. The result is similar, +# though the code path is completely different. +# +proc rep093_sub { method niter tnum lost largs } { + global rep_verbose + global testdir + global verbose_type + global repfiles_in_memory + global queuedbs + global elect_serial + + if { $lost } { + set msg " with lost VOTE1 message" + } else { + set msg "" + } + puts "Rep$tnum: Egen change during election$msg." + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup [set qdir $testdir/MSGQUEUEDIR] + + set dira $testdir/SITEA + set dirb $testdir/SITEB + + file mkdir $dira + file mkdir $dirb + + puts "\tRep$tnum.a: Create a small group." + repladd 1 + set envcmda "berkdb_env_noerr -create -txn -errpfx SITEA \ + $repmemargs -event \ + $verbargs -home $dira -rep_transport \[list 1 replsend\]" + + # Site A will be initial master, just so as to create some initial + # data. + # + set enva [eval $envcmda -rep_master] + set masterenv $enva + + repladd 2 + set envcmdb "berkdb_env_noerr -create -txn -errpfx SITEB \ + $repmemargs -event \ + $verbargs -home $dirb -rep_transport \[list 2 replsend\]" + set envb [eval $envcmdb -rep_client] + + set envlist "{$enva 1} {$envb 2}" + process_msgs $envlist + + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + + # Level the playing field, by making site A a client. + puts "\tRep$tnum.b: Make site A a client." + $enva rep_start -client + process_msgs $envlist + + # Arrange for site B to have a higher egen than site A. We do this by + # running a couple of quick election attempts from B in which A does not + # participate. + # + puts "\tRep$tnum.c: Adjust site B's egen." + set pri 100 + set timeout 10000 + set nsites 2 + set nvotes 2 + error_check_bad solitaire1 [catch {$envb rep_elect \ + $nsites $nvotes $pri $timeout} result] 0 + error_check_good solitaire1a [is_substr $result DB_REP_UNAVAIL] 1 + error_check_bad solitaire2 [catch {$envb rep_elect \ + $nsites $nvotes $pri $timeout} result] 0 + error_check_good solitaire2a [is_substr $result DB_REP_UNAVAIL] 1 + + set egena [stat_field $enva rep_stat "Election generation number"] + set egenb [stat_field $envb rep_stat "Election generation number"] + error_check_good starting_egen [expr $egenb > $egena] 1 + replclear 1 + + # Start an election at site A, using a timeout longer than we should + # ever need. + # + puts "\tRep$tnum.d: Start an election at site A." + set envid 1 + set timeout [expr 60 * 1000000] + set elect_serial 1 + set pfx "A.1" + start_election $pfx $qdir $dira $envid $nsites $nvotes $pri $timeout + set elect_pipe($envid) $elect_serial + + set wait_limit 20 + if { $lost } { + # Wait until the child process has gotten as far as sending its + # vote1 message to site B (eid 2). We want to "lose" that + # message, but it won't do any good to replclear before the + # message is actually in the message queue. + # + set envid 2 + set voted false + for { set count 0 } { $count < $wait_limit } { incr count } { + if {[rep093_find_vote1 $envid]} { + set voted true + break; + } + tclsleep 1 + } + error_check_good voted $voted true + replclear $envid + + # Start an election at site B. We expect site A to react to + # this by indicating that we should start another rep_elect() + # call immediately. We'll check this later, when we examine the + # $got_egenchg flag. + # + puts "\tRep$tnum.e: Start election at site B." + incr elect_serial + set pfx "B.$elect_serial" + start_election $pfx $qdir $dirb $envid $nsites $nvotes $pri $timeout + set elect_pipe($envid) $elect_serial + } + + set got_egenchg false + set got_master false + set done false + for { set count 0 } { $count < $wait_limit && !$done} { incr count } { + foreach pair $envlist { + set env [lindex $pair 0] + set envid [lindex $pair 1] + if { [info exists elect_pipe($envid)] } { + check_election $elect_pipe($envid) \ + unavail child_elected + } else { + set child_elected false + } + set parent_elected [is_elected $env] + if { ( $child_elected || $parent_elected ) && \ + !$got_master } { + set got_master true + puts "\tRep$tnum.f: Env [$env get_errpfx]\ + won the election." + $env rep_start -master + if { $env eq $enva } { + set client $envb + } else { + set client $enva + } + } + replprocessqueue $env $envid 0 he + if { $he } { + # In the "lost msg case" the only HOLDELECTION + # indication we should be getting is at site A + # (EID 1). + # + if { $lost } { + error_check_good siteA $envid 1 + } + if { $env eq $enva } { + set got_egenchg true + } + incr elect_serial + set envpfx [$env get_errpfx] + set pfx "$envpfx.$elect_serial" + puts "\tRep$tnum.g: Starting another\ + election $pfx at $envpfx." + set dir [$env get_home] + start_election $pfx $qdir $dir $envid \ + $nsites $nvotes $pri $timeout + set elect_pipe($envid) $elect_serial + } + } + if { $got_master && \ + [stat_field $client rep_stat "Startup complete"] } { + puts "\tRep$tnum.h: Env [$client get_errpfx]\ + has STARTUPDONE." + set done true + } else { + tclsleep 1 + } + } + error_check_good done $done true + error_check_good got_egenchg $got_egenchg true + cleanup_elections + + $enva close + $envb close + replclose $qdir +} + + +proc rep093_find_vote1 { envid } { + global queuedbs + + set dbc [$queuedbs($envid) cursor] + + set result no + for { set records [$dbc get -first] } \ + { [llength $records] > 0 } \ + { set records [$dbc get -next] } { + set dbt_pair [lindex $records 0] + set recno [lindex $dbt_pair 0] + set msg [lindex $dbt_pair 1] + set ctrl [lindex $msg 0] + if {[berkdb msgtype $ctrl] eq "vote1"} { + set result yes + break + } + } + + $dbc close + return $result +} diff --git a/test/tcl/rep094.tcl b/test/tcl/rep094.tcl new file mode 100644 index 00000000..dd59c608 --- /dev/null +++ b/test/tcl/rep094.tcl @@ -0,0 +1,240 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST rep094 +# TEST Full election with less than majority initially connected. +# +# TEST Cold-boot a 4-site group. The first two sites start quickly and +# TEST initiate an election. The other two sites don't join the election until +# TEST the middle of the long full election timeout period. It's important that +# TEST the number of sites that start immediately be a sub-majority, because +# TEST that's the case that used to have a bug in it [#18456]. +# +proc rep094 { method { tnum "094" } args } { + source ./include.tcl + + # Skip for all methods except btree. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep$tnum: Skipping for method $method." + return + } + + rep094_sub $method $tnum +} + +proc rep094_sub { method tnum } { + global rep_verbose + global testdir + global verbose_type + global repfiles_in_memory + global elect_serial + + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + set msg "" + } else { + set repmemargs "" + set msg ", with in-memory replication files" + } + puts "Rep$tnum: Full election starting with minority$msg." + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + replsetup [set qdir $testdir/MSGQUEUEDIR] + + set dira $testdir/SITEA + set dirb $testdir/SITEB + set dirc $testdir/SITEC + set dird $testdir/SITED + + file mkdir $dira + file mkdir $dirb + file mkdir $dirc + file mkdir $dird + + puts "\tRep$tnum.a: Boot first two sites." + repladd 1 + set envcmda "berkdb_env_noerr -create -txn -errpfx SITEA \ + $repmemargs -event \ + $verbargs -home $dira -rep_transport \[list 1 replsend\]" + set enva [eval $envcmda -rep_client] + + repladd 2 + set envcmdb "berkdb_env_noerr -create -txn -errpfx SITEB \ + $repmemargs -event \ + $verbargs -home $dirb -rep_transport \[list 2 replsend\]" + set envb [eval $envcmdb -rep_client] + + set envlist "{$enva 1} {$envb 2}" + process_msgs $envlist + + + # Start an election at site A, with a generous full-election timeout (3 + # minutes). (Note that specifying the timeout as a two-element list is + # the (only) way to set a full election timeout via start_election.) + # + repladd 3 + repladd 4 + puts "\tRep$tnum.b: Start an election at site A." + set pri 100 + set nsites 4 + set nvotes 3 + set envid 1 + set timeout [list [expr 10 * 1000000] [expr 180 * 1000000]] + set elect_serial 1 + set pfx "A.1" + start_election $pfx $qdir $dira $envid $nsites $nvotes $pri $timeout + set elect_pipe($envid) $elect_serial + + # The standard run_election proc is not flexible enough for our needs + # here, so we conduct our own customized version of the loop here. + # What's different about our case here is that we want to start sites C + # and D during the midst of the phase 1 timeout period. + # + set wait_limit 20 + set done false + for { set count 0 } { $count < $wait_limit && !$done} { incr count } { + foreach pair $envlist { + set env [lindex $pair 0] + set envid [lindex $pair 1] + if { [info exists elect_pipe($envid)] } { + check_election $elect_pipe($envid) \ + unavail child_elected + } else { + set child_elected false + } + set parent_elected [is_elected $env] + if { $child_elected || $parent_elected } { + error "FAIL: election succeeded unexpectedly" + } + replprocessqueue $env $envid 0 he + if { $he } { + incr elect_serial + set envpfx [$env get_errpfx] + set pfx "$envpfx.$elect_serial" + puts "\tRep$tnum.c: Starting another\ + election $pfx at $envpfx." + set dir [$env get_home] + start_election $pfx $qdir $dir $envid \ + $nsites $nvotes $pri $timeout + set elect_pipe($envid) $elect_serial + } + } + if { $count >= 10 && $he } { + set done true + } else { + tclsleep 1 + } + } + + # Now that we've waited 10 seconds, start the "slow-booting" + # sites C and D. + # + puts "\tRep$tnum.d: Boot third site, have it join election." + set envcmdc "berkdb_env_noerr -create -txn -errpfx SITEC \ + $repmemargs -event \ + $verbargs -home $dirc -rep_transport \[list 3 replsend\]" + set envc [eval $envcmdc -rep_client] + set envlist "{$enva 1} {$envb 2} {$envc 3}" + process_msgs $envlist + + incr elect_serial + set env $envc + set envid 3 + set envpfx [$env get_errpfx] + set pfx "$envpfx.$elect_serial" + puts "\tRep$tnum.e: Starting straggler election $pfx at $envpfx." + set dir [$env get_home] + start_election $pfx $qdir $dir $envid $nsites $nvotes $pri $timeout + set elect_pipe($envid) $elect_serial + + for { set count 0 } { $count < 5 } { incr count } { + process_msgs $envlist + tclsleep 1 + } + + puts "\tRep$tnum.f: Boot fourth site, join election." + set envcmdd "berkdb_env_noerr -create -txn -errpfx SITED \ + $repmemargs -event \ + $verbargs -home $dird -rep_transport \[list 4 replsend\]" + set envd [eval $envcmdd -rep_client] + set envlist "{$enva 1} {$envb 2} {$envc 3} {$envd 4}" + process_msgs $envlist + + incr elect_serial + set env $envd + set envid 4 + set envpfx [$env get_errpfx] + set pfx "$envpfx.$elect_serial" + puts "\tRep$tnum.g: Starting straggler election $pfx at $envpfx." + set dir [$env get_home] + start_election $pfx $qdir $dir $envid $nsites $nvotes $pri $timeout + set elect_pipe($envid) $elect_serial + process_msgs $envlist + + # The election should now complete promptly. We'll give it 60 seconds, + # just in case this test is running on a slow overloaded system. That's + # still way less than the 3 minutes total full election timeout, so if + # we finish before 60 seconds, it means we succeeded. + # + set wait_limit 60 + set master -1 + for { set count 0 } { $count < $wait_limit } { incr count } { + set finishers 0 + set synced_clients 0 + foreach pair $envlist { + set env [lindex $pair 0] + set envid [lindex $pair 1] + if { [info exists elect_pipe($envid)] } { + if {[check_election $elect_pipe($envid) \ + unavail child_elected]} { + incr finishers + } + } else { + set child_elected false + } + set parent_elected [is_elected $env] + if { ( $child_elected || $parent_elected ) && + $master == -1 } { + set master $envid + $env rep_start -master + } + replprocessqueue $env $envid 0 he + if { $he } { + error "FAIL: got HOLDELECTION unexpectedly" + } + + # Once we've elected a master, start checking for + # startupdone at the clients, just so's we can + # gracefully shut everything down. + # + if { $master > 0 && $envid != $master && + [stat_field $env rep_stat "Startup complete"] } { + incr synced_clients + } + } + + if { $finishers == $nsites && $synced_clients == $nsites - 1 } { + break; + } + tclsleep 1 + } + error_check_good got_newmaster [expr $master > 0] 1 + + cleanup_elections + + $enva close + $envb close + $envc close + $envd close + replclose $qdir +} diff --git a/test/tcl/rep095.tcl b/test/tcl/rep095.tcl new file mode 100644 index 00000000..9cc3414f --- /dev/null +++ b/test/tcl/rep095.tcl @@ -0,0 +1,235 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep095 +# TEST Test of internal initialization use of shared region memory. +# TEST +# TEST One master, one client. Create a gap that requires internal +# TEST initialization. Start the internal initialization in this +# TEST parent process and complete it in a separate child process. +# +proc rep095 { method { niter 200 } { tnum "095" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Skip for all methods except btree. + if { $checking_valid_methods } { + set test_methods { btree } + return $test_methods + } + if { [is_btree $method] == 0 } { + puts "Rep091: Skipping for method $method." + return + } + + set args [convert_args $method $args] + set saved_args $args + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set logsets [create_logsets 2] + + # Run the body of the test with and without recovery, + # and with and without cleaning. Skip recovery with in-memory + # logging - it doesn't make sense. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + set envargs "" + set args $saved_args + puts "Rep$tnum ($method $envargs $r $args):\ + Test of internal initialization $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep095_sub $method $niter $tnum $envargs \ + $l $r $args + } + } +} + +proc rep095_sub { method niter tnum envargs logset recargs largs } { + source ./include.tcl + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $repmemargs \ + $m_logargs -log_max $log_max $envargs $verbargs \ + -errpfx MASTER -home $masterdir \ + -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Open a client + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $repmemargs \ + $c_logargs -log_max $log_max $envargs $verbargs \ + -errpfx CLIENT -home $clientdir \ + -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + + # Bring the client online by processing the startup messages. + set envlist "{$masterenv 1} {$clientenv 2}" + process_msgs $envlist + + # Run rep_test in the master (and update client). + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + eval rep_test $method $masterenv NULL $niter $start $start 0 $largs + incr start $niter + process_msgs $envlist 0 NONE err + error_check_good process_msgs $err 0 + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $masterenv test force noarchive_timeout + + # Create a gap requiring internal initialization. + set dbhandle NULL + set cid 2 + set start [push_master_ahead $method $masterenv $masterdir $m_logtype \ + $clientenv $cid $dbhandle $start $niter "" $largs] + + # Communicate with child process by creating a marker file. + set markerenv [berkdb_env_noerr -create -home $testdir -txn] + error_check_good markerenv_open [is_valid_env $markerenv] TRUE + set marker [eval "berkdb_open_noerr \ + -create -btree -auto_commit -env $markerenv marker.db"] + + # Fork child process. It should process whatever it finds in the + # message queue -- the remaining messages for the internal + # initialization. It is run in a separate process to test multiple + # processes using curinfo and originfo in the shared rep region. + # + puts "\tRep$tnum.b: Fork child process." + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep095script.tcl $testdir/repscript.log \ + $masterdir $clientdir $rep_verbose $verbose_type &] + + # Reopen client, which was closed by push_master_ahead above. + set clientenv [eval $cl_envcmd $recargs -rep_client] + error_check_good client_env [is_valid_env $clientenv] TRUE + set envlist "{$masterenv 1} {$clientenv 2}" + + # Give child time to open environments to avoid lockout conflicts. + while { [llength [$marker get CHILDSETUP]] == 0 } { + tclsleep 1 + } + + set pmsgs 4 + puts "\tRep$tnum.c: Get partway through internal init ($pmsgs iters)." + for { set i 1 } { $i < $pmsgs } { incr i } { + proc_msgs_once $envlist + } + set pagerec [stat_field $clientenv rep_stat "Pages received"] + # Let child know that partial internal init is done. + error_check_good putpartinit [$marker put PARENTPARTINIT 1] 0 + + # Now wait for the child to finish the internal init by simply + # waiting for the child to finish. + puts "\tRep$tnum.d: Waiting for child to finish internal init..." + # Watch until the script is done. + watch_procs $pid 10 + + puts "\tRep$tnum.e: Verify more pages received after child ran." + error_check_good more_pages_received [expr \ + [stat_field $clientenv rep_stat "Pages received"] > $pagerec] 1 + + puts "\tRep$tnum.f: Verify databases" + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + + # Need to refresh our environment handles before processing more + # messages because we abandoned message processing in the middle. + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + set masterenv [eval $ma_envcmd $recargs -rep_master] + set clientenv [eval $cl_envcmd $recargs -rep_client] + set envlist "{$masterenv 1} {$clientenv 2}" + + # Add records to the master and update client. + puts "\tRep$tnum.g: Add more records and check again." + set entries 10 + eval rep_test $method $masterenv NULL $entries $start $start 0 $largs + incr start $entries + process_msgs $envlist 0 NONE err + error_check_good process_msgs $err 0 + + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + + error_check_good marker_close [$marker close] 0 + error_check_good markerenv_close [$markerenv close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} + diff --git a/test/tcl/rep095script.tcl b/test/tcl/rep095script.tcl new file mode 100644 index 00000000..1d7269a1 --- /dev/null +++ b/test/tcl/rep095script.tcl @@ -0,0 +1,91 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Rep095 script - internal initialization use of shared region memory. +# +# Repscript exists to process the remaining messages for +# an internal initialization. The test is set up for the +# parent process to start an internal initialization and +# for this child process to complete it by processing the +# remaining messages in the message queue. This script +# requires a one-master and one-client setup. +# +# Usage: repscript masterdir clientdir rep_verbose verbose_type +# masterdir: master env directory +# clientdir: client env directory +# +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl +source $test_path/reputils.tcl + +set usage "repscript masterdir clientdir rep_verbose verbose_type" + +# Verify usage +if { $argc != 4 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set masterdir [ lindex $argv 0 ] +set clientdir [ lindex $argv 1 ] +set rep_verbose [ lindex $argv 2 ] +set verbose_type [ lindex $argv 3 ] +set verbargs "" +if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " +} + +# Join the queue env. We assume the rep test convention of +# placing the messages in $testdir/MSGQUEUEDIR. +set queueenv [eval berkdb_env -home $testdir/MSGQUEUEDIR] +error_check_good script_qenv_open [is_valid_env $queueenv] TRUE + +# We need to set up our own machids. +# Add 1 for master env id, and 2 for the clientenv id. +# +repladd 1 +repladd 2 + +# Join the master env. +set ma_cmd "berkdb_env_noerr -home $masterdir $verbargs \ + -txn -rep_master -rep_transport \[list 1 replsend\]" +set masterenv [eval $ma_cmd] +error_check_good script_menv_open [is_valid_env $masterenv] TRUE + +puts "Master open" + +# Join the client env. +set cl_cmd "berkdb_env_noerr -home $clientdir $verbargs \ + -txn -rep_client -rep_transport \[list 2 replsend\]" +set clientenv [eval $cl_cmd] +error_check_good script_cenv_open [is_valid_env $clientenv] TRUE + +puts "Everyone open" + +# Join the marker env/database and let parent know child is set up. +set markerenv [berkdb_env -home $testdir -txn] +error_check_good markerenv_open [is_valid_env $markerenv] TRUE +set marker [berkdb_open -unknown -env $markerenv -auto_commit marker.db] +error_check_good putchildsetup [$marker put CHILDSETUP 2] 0 + +# Give parent process time to start the internal init. +while { [llength [$marker get PARENTPARTINIT]] == 0 } { + tclsleep 1 +} + +# Process the remaining messages to finish the internal init in this process. +process_msgs "{$masterenv 1} {$clientenv 2}" + +puts "Processed messages" + +# Close everything. +error_check_good marker_db_close [$marker close] 0 +error_check_good market_env_close [$markerenv close] 0 +error_check_good script_master_close [$masterenv close] 0 +error_check_good script_client_close [$clientenv close] 0 +puts "Repscript completed successfully" diff --git a/test/tcl/rep096.tcl b/test/tcl/rep096.tcl new file mode 100644 index 00000000..0df6cda0 --- /dev/null +++ b/test/tcl/rep096.tcl @@ -0,0 +1,209 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep096 +# TEST Replication and db_replicate utility. +# TEST +# TEST Create a master and client environment. Open them. +# TEST Start a db_replicate process on each. Create a database on +# TEST the master and write some data. Then verify contents. +proc rep096 { method { niter 100 } { tnum "096" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + global util_path + global EXE + + # All access methods are allowed. + if { $checking_valid_methods } { + return "ALL" + } + + # QNX does not support fork() in a multi-threaded environment. + if { $is_qnx_test } { + puts "Skipping Rep$tnum on QNX." + return + } + + if { [file exists $util_path/db_replicate$EXE] == 0 } { + puts "Skipping Rep$tnum with db_replicate. Is it built?" + return + } + + set logsets [create_logsets 2] + + set args [convert_args $method $args] + + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Rep$tnum: Skipping for in-memory logs\ + with -recover." + continue + } + puts "Rep$tnum ($method $r): Db_replicate and\ + non-rep env handles $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep096_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep096_sub { method niter tnum logset recargs largs } { + source ./include.tcl + global testdir + global is_hp_test + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + if { $databases_in_memory } { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + + # + # Make DB_CONFIG files in each directory. Set a client priority + # of 1 so that it sends acks and the master expects acks. + # + puts "\tRep$tnum.a: Creating initial environments." + set nsites 2 + replicate_make_config $masterdir 0 100 + replicate_make_config $clientdir 1 1 + + # + # Open a master and a client, but only with -rep. Note that we need + # to specify -thread also. + # + set max_locks 2500 + set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ + -lock_max_objects $max_locks -lock_max_locks $max_locks \ + -home $masterdir -errpfx MASTER $verbargs $repmemargs \ + $m_txnargs $m_logargs -rep -thread" + set masterenv [eval $env_cmd(M) $recargs] + + set env_cmd(C) "berkdb_env_noerr -create $c_txnargs $c_logargs \ + -lock_max_objects $max_locks -lock_max_locks $max_locks \ + -home $clientdir -errpfx CLIENT $verbargs $repmemargs -rep -thread" + set clientenv [eval $env_cmd(C) $recargs] + + # + # Now start db_replicate on each site. + # + puts "\tRep$tnum.b: Start db_replicate on each env." + set dpid(M) [eval {exec $util_path/db_replicate -h $masterdir} \ + -M -t 5 &] + set dpid(C) [eval {exec $util_path/db_replicate -h $clientdir} &] + + await_startup_done $clientenv + + # + # Force a checkpoint to cause the subordinate connection + # for this Tcl process to get established. However, the + # checkpoint log records will get lost prior to the + # connection getting established. + # + $masterenv txn_checkpoint -force + + # + # Wait for the master and client LSNs to match after this + # checkpoint. That might mean waiting for the rerequest thread + # to run or db_replicate to call rep_flush. + # + await_condition \ + {[stat_field $masterenv rep_stat "Next LSN expected"] == \ + [stat_field $clientenv rep_stat "Next LSN expected"]} + + puts "\tRep$tnum.c: Create database on master." + set omethod [convert_method $method] + set db [eval berkdb_open_noerr -create -env $masterenv -auto_commit \ + -mode 0644 $largs $omethod $dbname] + error_check_good db_open [is_valid_db $db] TRUE + + await_condition \ + {[stat_field $masterenv rep_stat "Next LSN expected"] == \ + [stat_field $clientenv rep_stat "Next LSN expected"]} + + if { !$databases_in_memory } { + puts "\tRep$tnum.d: Verify database exists on client." + error_check_good client_db [file exists $clientdir/$dbname] 1 + } + + # Run a modified test001 in the master (and update client). + puts "\tRep$tnum.e: Running rep_test in replicated env." + eval rep_test $method $masterenv $db $niter 0 0 0 $largs + + await_condition \ + {[stat_field $masterenv rep_stat "Next LSN expected"] == \ + [stat_field $clientenv rep_stat "Next LSN expected"]} + + # Check that databases are in-memory or on-disk as expected. + check_db_location $masterenv + check_db_location $clientenv + + $db close + + await_condition \ + {[stat_field $masterenv rep_stat "Next LSN expected"] == \ + [stat_field $clientenv rep_stat "Next LSN expected"]} + + tclkill $dpid(C) + tclkill $dpid(M) + + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 +} diff --git a/test/tcl/rep097.tcl b/test/tcl/rep097.tcl new file mode 100644 index 00000000..13b3189d --- /dev/null +++ b/test/tcl/rep097.tcl @@ -0,0 +1,306 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep097 +# TEST +# TEST Replication and lease data durability test. +# TEST Set leases on master and 2 clients. +# TEST Have the original master go down and a client take over. +# TEST Have the old master rejoin as client, but go down again. +# TEST The other two sites do one txn, while the original master's +# TEST LSN extends beyond due to running recovery. +# TEST Original Master rejoins while new master fails. Make sure remaining +# TEST original site is elected, with the smaller LSN, but with txn data. +# +proc rep097 { method { tnum "097" } args } { + source ./include.tcl + + # Valid for all access methods. Other lease tests limit the + # test because there is nothing method-specific being tested. + # Use all methods for this basic test. + if { $checking_valid_methods } { + return "btree" + } + + # This test depends on recovery, so can not be run with + # in-memory logging or with rep files in-memory. + global mixed_mode_logging + if { $mixed_mode_logging > 0 } { + puts "Rep$tnum: Skipping for mixed-mode logging." + return + } + global repfiles_in_memory + if { $repfiles_in_memory } { + puts "Rep$tnum: Skipping for in-memory replication files." + return + } + + set args [convert_args $method $args] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + + foreach r $test_recopts { + puts "Rep$tnum ($method $r): Replication\ + and durability of leases $msg." + rep097_sub $method $tnum $r $args + } +} + +proc rep097_sub { method tnum recargs largs } { + source ./include.tcl + global testdir + global databases_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + + set qdir $testdir/MSGQUEUEDIR + replsetup $qdir + + set env0dir $testdir/ENV0 + set env1dir $testdir/ENV1 + set env2dir $testdir/ENV2 + + file mkdir $env0dir + file mkdir $env1dir + file mkdir $env2dir + + # Set leases for 3 sites, 3 second timeout, 0% clock skew + set nsites 3 + set lease_to 3000000 + set lease_tosec [expr $lease_to / 1000000] + set clock_fast 0 + set clock_slow 0 + set testfile test.db + # + # Since we have to use elections, the election code + # assumes a 2-off site id scheme. + # Open the site that will become master, due to priority. + repladd 2 + set err_cmd(0) "none" + set crash(0) 0 + set pri(0) 100 + set envcmd(0) "berkdb_env -create -txn nosync \ + $verbargs -errpfx ENV0 -home $env0dir \ + -rep_nsites $nsites -rep_lease \[list $lease_to\] -event \ + -rep_client -rep_transport \[list 2 replsend\]" + set masterenv [eval $envcmd(0) $recargs] + error_check_good master_env [is_valid_env $masterenv] TRUE + + # Open two clients. + repladd 3 + set err_cmd(1) "none" + set crash(1) 0 + set pri(1) 70 + set envcmd(1) "berkdb_env -create -txn nosync \ + $verbargs -errpfx ENV1 -home $env1dir -rep_nsites $nsites \ + -rep_lease \[list $lease_to $clock_fast $clock_slow\] -event \ + -rep_client -rep_transport \[list 3 replsend\]" + set clientenv [eval $envcmd(1) $recargs] + error_check_good client_env [is_valid_env $clientenv] TRUE + + repladd 4 + set err_cmd(2) "none" + set crash(2) 0 + set pri(2) 30 + set envcmd(2) "berkdb_env -create -txn nosync \ + $verbargs -errpfx ENV2 -home $env2dir \ + -rep_nsites $nsites -rep_lease \[list $lease_to\] -event \ + -rep_client -rep_transport \[list 4 replsend\]" + set clientenv2 [eval $envcmd(2) $recargs] + error_check_good client_env [is_valid_env $clientenv2] TRUE + + # Bring the clients online by processing the startup messages. + set envlist "{$masterenv 2} {$clientenv 3} {$clientenv2 4}" + process_msgs $envlist + + # + # Run election to get a master. Leases prevent us from + # simply assigning a master. + # + set msg "Rep$tnum.a" + puts "\tRep$tnum.a: Run initial election." + set nvotes $nsites + set winner 0 + set elector [berkdb random_int 0 2] + # + # Note we send in a 0 for nsites because we set nsites back + # when we started running with leases. Master leases require + # that nsites be set before calling rep_start, and master leases + # require that the nsites arg to rep_elect be 0. + # + run_election envlist err_cmd pri crash $qdir $msg \ + $elector 0 $nvotes $nsites $winner 0 NULL + + puts "\tRep$tnum.b: Spawn a child tclsh to do txn work." + set pid [exec $tclsh_path $test_path/wrap.tcl \ + rep097script.tcl $testdir/rep097script.log \ + $env0dir $env1dir $testfile $method &] + + # Let child run, create database and put a txn into it. + # Process messages while we wait for the child to complete + # its txn so that the clients can grant leases. + puts "\tRep$tnum.c: Wait for child to write txn." + while { [file exists $testdir/marker.db] == 0 } { + tclsleep 1 + } + set markerenv [berkdb_env -home $testdir -txn] + error_check_good markerenv_open \ + [is_valid_env $markerenv] TRUE + set marker [berkdb_open -unknown -env $markerenv \ + -auto_commit marker.db] + set kd [$marker get CHILD1] + while { [llength $kd] == 0 } { + process_msgs $envlist + tclsleep 1 + set kd [$marker get CHILD1] + } + process_msgs $envlist + + # + # Close the master env handle (simulate a crash). Run an election + # with the remaining 2 sites. + # + set msg "Rep$tnum.d" + puts "\tRep$tnum.d: Run election after master crash." + error_check_good masterenv_close [$masterenv close] 0 + set envlist [lreplace $envlist 0 0] + set nvotes [expr $nsites - 1] + set winner 1 + set elector 1 + run_election envlist err_cmd pri crash $qdir $msg \ + $elector 0 $nvotes $nsites $winner 0 NULL + + # + # Let child process know that the new master is elected. + # + error_check_good timestamp_done \ + [$marker put PARENT1 [timestamp -r]] 0 + + # + # Wait for child to open db and write txn on new master. We still + # have two sites so leases should be able to be granted successfully. + # + set kd [$marker get CHILD2] + while { [llength $kd] == 0 } { + process_msgs $envlist + tclsleep 1 + set kd [$marker get CHILD2] + } + process_msgs $envlist + + # + # Restart the original master env as a client. + # Synchronize with the rest of the group. + # + puts "\tRep$tnum.e: Resync newly rejoined site." + set envstart0 [eval $envcmd(0) -recover] + error_check_good orig_env [is_valid_env $envstart0] TRUE + lappend envlist "$envstart0 2" + process_msgs $envlist + + # + # Close env again after synchronizing. + # + puts "\tRep$tnum.f: Abandon new site." + $envstart0 log_flush + error_check_good masterenv_close [$envstart0 close] 0 + set envlist [lreplace $envlist end end] + + # + # Tell child process to write a txn with just these two sites again. + # + error_check_good timestamp_done \ + [$marker put PARENT2 [timestamp -r]] 0 + + set kd [$marker get CHILD3] + while { [llength $kd] == 0 } { + process_msgs $envlist + tclsleep 1 + set kd [$marker get CHILD3] + } + process_msgs $envlist + # + # Child sends us the key it used as the data + # of the CHILD3 key. This key should be durable. + # + set key [lindex [lindex $kd 0] 1] + + # + # Close the new master and restart the original site again. + # Run an election between the remaining two sites. The + # original site should be ahead in LSN but behind in txns + # and should lose the election. + # + puts "\tRep$tnum.g: Abandon new master and restart old site again." + set envlist [lreplace $envlist 0 0] + error_check_good clientenv_close [$clientenv close] 0 + set envstart1 [eval $envcmd(0) -recover] + error_check_good orig_env [is_valid_env $envstart1] TRUE + lappend envlist "$envstart1 2" + + # + # Make sure recovered env is ahead of client2. + # + set c2file [stat_field $clientenv2 log_stat "Current log file number"] + set c2off [stat_field $clientenv2 log_stat "Current log file offset"] + set e1file [stat_field $envstart1 log_stat "Current log file number"] + set e1off [stat_field $envstart1 log_stat "Current log file offset"] + + if { $e1file == $c2file } { + error_check_good offchk [expr $e1off > $c2off] 1 + } else { + error_check_good filechk [expr $e1file > $c2file] 1 + } + + set msg "Rep$tnum.h" + puts "\tRep$tnum.h: Run election." + set nvotes [expr $nsites - 1] + set winner 2 + set elector 0 + run_election envlist err_cmd pri crash $qdir $msg \ + $elector 0 $nvotes $nsites $winner 0 NULL + + # + # Tell child to exit. + # + error_check_good timestamp_done \ + [$marker put PARENT3 [timestamp -r]] 0 + + set newmaster $clientenv2 + set newstate [stat_field $newmaster rep_stat "Role"] + error_check_good newm $newstate "master" + + set masterdb [eval \ + {berkdb_open_noerr -env $newmaster -rdonly $testfile}] + error_check_good dbopen [is_valid_db $masterdb] TRUE + check_leaseget $masterdb $key "-nolease" 0 + + watch_procs $pid 5 + + # Clean up. + error_check_good marker_db_close [$marker close] 0 + error_check_good marker_env_close [$markerenv close] 0 + error_check_good masterdb_close [$masterdb close] 0 + error_check_good masterenv_close [$envstart1 close] 0 + error_check_good clientenv_close [$clientenv2 close] 0 + + replclose $testdir/MSGQUEUEDIR + + # Check log file for failures. + set errstrings [eval findfail $testdir/rep097script.log] + foreach str $errstrings { + puts "FAIL: error message in rep097 log file: $str" + } +} + diff --git a/test/tcl/rep097script.tcl b/test/tcl/rep097script.tcl new file mode 100644 index 00000000..7073619f --- /dev/null +++ b/test/tcl/rep097script.tcl @@ -0,0 +1,152 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Rep097 script - Master leases. +# +# Test master leases and durability of operations. +# +# Usage: rep097script masterdir newmdir dbfile method +# masterdir: master env directory +# dbfile: name of database file +# method: access method +# +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl +source $test_path/reputils.tcl + +set usage "repscript masterdir newmdir dbfile method" + +# Verify usage +if { $argc != 4 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set masterdir [ lindex $argv 0 ] +set newmdir [ lindex $argv 1 ] +set dbfile [ lindex $argv 2 ] +set method [ lindex $argv 3 ] + +# Join the queue env. We assume the rep test convention of +# placing the messages in $testdir/MSGQUEUEDIR. +set queueenv [eval berkdb_env -home $testdir/MSGQUEUEDIR] +error_check_good script_qenv_open [is_valid_env $queueenv] TRUE + +# +# We need to set up our own machids. +# Add 2 for master env id, and 3 and 4 for the clientenv ids. +# +repladd 2 +repladd 3 +repladd 4 + +# Join the master env. +set ma_cmd "berkdb_env_noerr -home $masterdir \ + -txn -rep_transport \[list 2 replsend\]" +set newma_cmd "berkdb_env_noerr -home $newmdir \ + -txn -rep_transport \[list 3 replsend\]" +# set ma_cmd "berkdb_env_noerr -home $masterdir \ +# -verbose {rep on} -errfile /dev/stderr \ +# -txn -rep_transport \[list 2 replsend\]" +# set newma_cmd "berkdb_env_noerr -home $newmdir \ +# -verbose {rep on} -errfile /dev/stderr \ +# -txn -rep_transport \[list 3 replsend\]" +puts "Joining master env" +set masterenv [eval $ma_cmd] +error_check_good script_menv_open [is_valid_env $masterenv] TRUE + +# Create a marker file. Don't put anything in it yet. The parent +# process will be processing messages while it looks for our +# marker. + + +puts "Create marker file" +set markerenv [berkdb_env -create -home $testdir -txn] +error_check_good markerenv_open [is_valid_env $markerenv] TRUE +set marker \ + [eval "berkdb_open -create -btree -auto_commit -env $markerenv marker.db"] + +# +# Create the database and then do a lease operation. Don't +# process messages in the child process. +# +puts "Open database" +set args [convert_args $method] +puts "args is $args" +set omethod [convert_method $method] +set open_args "-auto_commit -create $omethod $args $dbfile" +set db [eval "berkdb_open -env $masterenv $open_args"] +error_check_good script_db_open [is_valid_db $db] TRUE + +puts "Do lease op" +set key 1 +do_leaseop $masterenv $db $method $key NULL 0 +$db close +$masterenv close + +puts "Put CHILD1" +error_check_good child_key \ + [$marker put CHILD1 $key] 0 + +puts "Wait for PARENT1" +# Give the parent a chance to process messages and check leases. +while { [llength [$marker get PARENT1]] == 0 } { + tclsleep 1 +} + +# +# At this point the master has crashed and the new master elected. +# Open a few copies of the db on the new master. We want several +# db handles to be open so that when the env runs recovery it will +# generate several RCLOSE log records and push the LSN out past +# the remaining sites (that write a txn to the log). +# +set newmenv [eval $newma_cmd] +error_check_good script_newenv_open [is_valid_env $newmenv] TRUE +set num_dbs 10 +for { set i 0 } { $i < $num_dbs } { incr i } { + set ndb($i) [eval "berkdb_open -env $newmenv $open_args"] + error_check_good script_db_open [is_valid_db $ndb($i)] TRUE +} +puts "Do lease op 2" +incr key +do_leaseop $newmenv $ndb(0) $method $key NULL 0 +puts "Put CHILD2" +error_check_good child2_key \ + [$marker put CHILD2 $key] 0 + +puts "Wait for PARENT2" +# Give the parent a chance to process messages and check leases. +while { [llength [$marker get PARENT2]] == 0 } { + tclsleep 1 +} + +# +# After we get PARENT2, write one more txn. +# +incr key +do_leaseop $newmenv $ndb(0) $method $key NULL 0 +puts "Put CHILD3" +error_check_good child2_key \ + [$marker put CHILD3 $key] 0 + +puts "Wait for PARENT3" +# Give the parent a chance to process messages and check leases. +while { [llength [$marker get PARENT3]] == 0 } { + tclsleep 1 +} + +puts "Clean up and exit" +# Clean up the child so the parent can go forward. +for { set i 0 } { $i < $num_dbs } { incr i } { + error_check_good newm_db_close [$ndb($i) close] 0 +} +error_check_good newm_close [$newmenv close] 0 +error_check_good marker_db_close [$marker close] 0 +error_check_good markerenv_close [$markerenv close] 0 + diff --git a/test/tcl/rep098.tcl b/test/tcl/rep098.tcl new file mode 100644 index 00000000..b1474246 --- /dev/null +++ b/test/tcl/rep098.tcl @@ -0,0 +1,273 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rep098 +# TEST Test of internal initialization and page deallocation. +# TEST +# TEST Use one master, one client. +# TEST Generate several log files. +# TEST Remove old master log files. +# TEST Start a client. +# TEST After client gets UPDATE file information, delete entries to +# TEST remove pages in the database. +# +proc rep098 { method { niter 200 } { tnum "098" } args } { + + source ./include.tcl + global databases_in_memory + global repfiles_in_memory + + # Run for btree and queue methods only. + if { $checking_valid_methods } { + set test_methods {} + foreach method $valid_methods { + if { [is_btree $method] == 1 || \ + [is_queue $method] == 1 } { + lappend test_methods $method + } + } + return $test_methods + } + if { [is_btree $method] == 0 && [is_queue $method] == 0 } { + puts "Rep$tnum: skipping for non-btree, non-queue method." + return + } + + set args [convert_args $method $args] + + # This test needs to set its own pagesize. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Rep$tnum: skipping for specific pagesizes" + return + } + + set logsets [create_logsets 2] + + # Set up for on-disk or in-memory databases. + set msg "using on-disk databases" + if { $databases_in_memory } { + set msg "using named in-memory databases" + if { [is_queueext $method] } { + puts -nonewline "Skipping rep$tnum for method " + puts "$method with named in-memory databases." + return + } + } + + set msg2 "and on-disk replication files" + if { $repfiles_in_memory } { + set msg2 "and in-memory replication files" + } + + # Run the body of the test with and without recovery, + # Skip recovery with in-memory logging - it doesn't make sense. + foreach r $test_recopts { + foreach l $logsets { + set logindex [lsearch -exact $l "in-memory"] + if { $r == "-recover" && $logindex != -1 } { + puts "Skipping rep$tnum for -recover\ + with in-memory logs." + continue + } + puts "Rep$tnum ($method $r $args): Test of\ + internal init with page deallocation $msg $msg2." + puts "Rep$tnum: Master logs are [lindex $l 0]" + puts "Rep$tnum: Client logs are [lindex $l 1]" + rep098_sub $method $niter $tnum $l $r $args + } + } +} + +proc rep098_sub { method niter tnum logset recargs largs } { + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global testdir + global util_path + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_max [expr $pagesize * 8] + + set m_logtype [lindex $logset 0] + set c_logtype [lindex $logset 1] + + # In-memory logs cannot be used with -txn nosync. + set m_logargs [adjust_logargs $m_logtype] + set c_logargs [adjust_logargs $c_logtype] + set m_txnargs [adjust_txnargs $m_logtype] + set c_txnargs [adjust_txnargs $c_logtype] + + # Open a master. + repladd 1 + set ma_envcmd "berkdb_env_noerr -create $m_txnargs $repmemargs \ + $m_logargs -log_max $log_max -errpfx MASTER $verbargs \ + -lock_max_locks 20000 \ + -home $masterdir -rep_transport \[list 1 replsend\]" + set masterenv [eval $ma_envcmd $recargs -rep_master] + + # Run rep_test in the master only. + puts "\tRep$tnum.a: Running rep_test in replicated env." + set start 0 + if { $databases_in_memory } { + set testfile { "" "test.db" } + set bigfile { "" "big.db" } + } else { + set testfile "test.db" + set bigfile "big.db" + } + set omethod [convert_method $method] + set dbargs [convert_args $method $largs] + set mdb [eval {berkdb_open} -env $masterenv -auto_commit\ + -create -mode 0644 $omethod $dbargs $testfile ] + error_check_good reptest_db [is_valid_db $mdb] TRUE + # + # Create a database with lots of items forcing lots of pages. + # We want two databases so that we + # + set bigdb [eval {berkdb_open} -env $masterenv -auto_commit\ + -create -mode 0644 $omethod $dbargs $bigfile ] + error_check_good reptest_db [is_valid_db $bigdb] TRUE + + set stop 0 + set bigniter [expr $niter * 10] + while { $stop == 0 } { + eval rep_test \ + $method $masterenv $mdb $niter $start $start 0 $largs + incr start $niter + eval rep_test \ + $method $masterenv $bigdb $bigniter $start $start 0 $largs + incr start $bigniter + + puts "\tRep$tnum.a.1: Run db_archive on master." + if { $m_logtype == "on-disk" } { + $masterenv log_flush + eval exec $util_path/db_archive -d -h $masterdir + } + # + # Make sure we have moved beyond the first log file. + # + set first_master_log [get_logfile $masterenv first] + if { $first_master_log > 1 } { + set stop 1 + } + } + + puts "\tRep$tnum.b: Open client." + repladd 2 + set cl_envcmd "berkdb_env_noerr -create $c_txnargs $repmemargs \ + $c_logargs -log_max $log_max -errpfx CLIENT $verbargs \ + -home $clientdir -rep_transport \[list 2 replsend\]" + set clientenv [eval $cl_envcmd $recargs -rep_client] + set envlist "{$masterenv 1} {$clientenv 2}" + + # + # Process messages until the client gets into the SYNC_PAGE state. + # We want to stop once we have the file information telling us + # how many pages there are, but before # we ask for the pages. + # Then we can remove some of the pages the master said were there. + # + set stop 0 + while { $stop == 0 } { + set nproced 0 + incr nproced [proc_msgs_once $envlist NONE err] + error_check_bad nproced $nproced 0 + set clstat [exec $util_path/db_stat \ + -N -r -R A -h $clientdir] + if { [is_substr $clstat "SYNC_PAGE"] } { + set stop 1 + } + } + # + # At this point the client has all the file info about both + # databases. Now let's remove pages from big.db. + # + if { [is_queue $method] == 0 } { + set consume "" + set statfld "Page count" + } else { + set consume "-consume" + set statfld "Number of pages" + } + set pg1 [stat_field $bigdb stat $statfld] + set txn [$masterenv txn] + set dbc [$bigdb cursor -txn $txn] + # + # Note, we get the last item and then move to the prev so that + # we remove all but the last item. We use the last item instead + # of the first so that the queue head moves. + # + set kd [$dbc get -last] + for { set kd [$dbc get -prev] } { [llength $kd] > 0 } \ + { set kd [$dbc get -prev] } { + error_check_good del_item [eval {$dbc del} $consume] 0 + } + error_check_good dbc_close [$dbc close] 0 + if { [is_queue $method] == 0 } { + set stat [catch {eval $bigdb compact -txn $txn -freespace} ret] + error_check_good compact $stat 0 + } + error_check_good txn_commit [$txn commit] 0 + $bigdb sync + set pg2 [stat_field $bigdb stat $statfld] + error_check_good pgcnt [expr $pg2 < $pg1] 1 + puts "\tRep$tnum.c: Process msgs after page count reduction $pg1 to $pg2" + # + # Now that we've removed pages, let the init complete. + # + process_msgs $envlist + + puts "\tRep$tnum.d: Verify logs and databases" + set cdb [eval {berkdb_open_noerr} -env $clientenv -auto_commit\ + -create -mode 0644 $omethod $dbargs $testfile] + error_check_good reptest_db [is_valid_db $cdb] TRUE + set cbdb [eval {berkdb_open_noerr} -env $clientenv -auto_commit\ + -create -mode 0644 $omethod $dbargs $bigfile] + error_check_good reptest_db [is_valid_db $cdb] TRUE + if { $databases_in_memory } { + rep_verify_inmem $masterenv $clientenv $mdb $cdb + rep_verify_inmem $masterenv $clientenv $bigdb $cbdb + } else { + rep_verify $masterdir $masterenv $clientdir $clientenv 1 + } + + # Make sure log files are on-disk or not as expected. + check_log_location $masterenv + check_log_location $clientenv + + error_check_good mdb_close [$mdb close] 0 + error_check_good mdb_close [$bigdb close] 0 + error_check_good cdb_close [$cdb close] 0 + error_check_good cdb_close [$cbdb close] 0 + error_check_good masterenv_close [$masterenv close] 0 + error_check_good clientenv_close [$clientenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/repmgr001.tcl b/test/tcl/repmgr001.tcl new file mode 100644 index 00000000..9059a0e5 --- /dev/null +++ b/test/tcl/repmgr001.tcl @@ -0,0 +1,198 @@ +# +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr001 +# TEST Basic repmgr test. +# TEST +# TEST Run all mix-and-match combinations of the basic_repmgr_test. +# TEST +proc repmgr001 { {display 0} {run 1} args } { + + source ./include.tcl + if { !$display && $is_freebsd_test == 1 } { + puts "Skipping replication manager tests on FreeBSD platform." + return + } + + run_repmgr_tests basic +} + +# This is the basis for a number of simple repmgr test cases. It creates +# an appointed master and two clients, calls rep_test to process some records +# and verifies the resulting databases. The following parameters control +# runtime options: +# niter - number of records to process +# inmemdb - put databases in-memory (0, 1) +# inmemlog - put logs in-memory (0, 1) +# inmemrep - put replication files in-memory (0, 1) +# envprivate - put region files in-memory (0, 1) +# bulk - use bulk processing (0, 1) +# +proc basic_repmgr_test { niter inmemdb inmemlog \ + inmemrep envprivate bulk args } { + + source ./include.tcl + global rep_verbose + global verbose_type + global overflowword1 + global overflowword2 + global databases_in_memory + set overflowword1 "0" + set overflowword2 "0" + set nsites 3 + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + set method "btree" + set largs [convert_args $method $args] + + # Set databases_in_memory for this test, preserving original value. + if { $inmemdb } { + set restore_dbinmem $databases_in_memory + set databases_in_memory 1 + } + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + set ports [available_ports $nsites] + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. Adjust the args. + set logtype "on-disk" + if { $inmemlog } { + set logtype "in-memory" + } + set logargs [adjust_logargs $logtype] + set txnargs [adjust_txnargs $logtype] + + # Determine in-memory replication argument for environments. + if { $inmemrep } { + set repmemarg "-rep_inmem_files " + } else { + set repmemarg "" + } + + # Determine argument for region files (env -private). + if { $envprivate } { + set private " -private" + } else { + set private "" + } + + # Call the generalized header routine. + print_repmgr_headers basic_repmgr_test $niter $inmemdb \ + $inmemlog $inmemrep $envprivate $bulk + + # Use different connection retry timeout values to handle any + # collisions from starting sites at the same time by retrying + # at different times. + + # Open a master. + puts "\tBasic repmgr test.a: Start an appointed master." + set ma_envcmd "berkdb_env_noerr -create $logargs $verbargs \ + $private \ + -errpfx MASTER -home $masterdir $txnargs -rep -thread \ + -lock_max_locks 10000 -lock_max_objects 10000 $repmemarg" + set masterenv [eval $ma_envcmd] + $masterenv repmgr -ack all \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master + + # Open first client + puts "\tBasic repmgr test.b: Start first client." + set cl_envcmd "berkdb_env_noerr -create $verbargs $logargs \ + $private \ + -errpfx CLIENT -home $clientdir $txnargs -rep -thread \ + -lock_max_locks 10000 -lock_max_objects 10000 $repmemarg" + set clientenv [eval $cl_envcmd] + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 2]] \ + -start client + await_startup_done $clientenv + + # Open second client + puts "\tBasic repmgr test.c: Start second client." + set cl2_envcmd "berkdb_env_noerr -create $verbargs $logargs \ + $private \ + -errpfx CLIENT2 -home $clientdir2 $txnargs -rep -thread \ + -lock_max_locks 10000 -lock_max_objects 10000 $repmemarg" + set clientenv2 [eval $cl2_envcmd] + $clientenv2 repmgr -ack all \ + -timeout {connection_retry 5000000} \ + -local [list localhost [lindex $ports 2]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 1]] \ + -start client + await_startup_done $clientenv2 + + # + # Use of -ack all guarantees replication complete before repmgr send + # function returns and rep_test finishes. + # + puts "\tBasic repmgr test.d: Run some transactions at master." + if { $bulk } { + # Turn on bulk processing on master. + error_check_good set_bulk [$masterenv rep_config {bulk on}] 0 + + eval rep_test_bulk $method $masterenv NULL $niter 0 0 0 $largs + + # Must turn off bulk because some configs (debug_rop/wop) + # generate log records when verifying databases. + error_check_good set_bulk [$masterenv rep_config {bulk off}] 0 + } else { + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + } + + puts "\tBasic repmgr test.e: Verifying client database contents." + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + rep_verify $masterdir $masterenv $clientdir2 $clientenv2 1 1 1 + + # For in-memory replication, verify replication files not there. + if { $inmemrep } { + puts "\tBasic repmgr test.f: Verify no rep files on disk." + no_rep_files_on_disk $masterdir + no_rep_files_on_disk $clientdir + no_rep_files_on_disk $clientdir2 + } + + # For private envs, verify no region files on disk. + if { $envprivate } { + puts "\tBasic repmgr test.g: Verify no region files on disk." + no_region_files_on_disk $masterdir + no_region_files_on_disk $clientdir + no_region_files_on_disk $clientdir2 + } + + # Restore original databases_in_memory value. + if { $inmemdb } { + set databases_in_memory $restore_dbinmem + } + + error_check_good client2_close [$clientenv2 close] 0 + error_check_good client_close [$clientenv close] 0 + error_check_good masterenv_close [$masterenv close] 0 +} + diff --git a/test/tcl/repmgr002.tcl b/test/tcl/repmgr002.tcl new file mode 100644 index 00000000..a7bbac89 --- /dev/null +++ b/test/tcl/repmgr002.tcl @@ -0,0 +1,241 @@ +# +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr002 +# TEST Basic repmgr test. +# TEST +# TEST Run all combinations of the basic_repmgr_election_test. +# TEST +proc repmgr002 { {display 0} {run 1} args } { + + source ./include.tcl + if { !$display && $is_freebsd_test == 1 } { + puts "Skipping replication manager tests on FreeBSD platform." + return + } + + run_repmgr_tests election +} +# +# This is the basis for simple repmgr election test cases. It opens three +# clients of different priorities and makes sure repmgr elects the +# expected master. Then it shuts the master down and makes sure repmgr +# elects the expected remaining client master. Then it makes sure the former +# master can join as a client. The following parameters control +# runtime options: +# niter - number of records to process +# inmemdb - put databases in-memory (0, 1) +# inmemlog - put logs in-memory (0, 1) +# inmemrep - put replication files in-memory (0, 1) +# envprivate - put region files in-memory (0, 1) +# bulk - use bulk processing (0, 1) +# +proc basic_repmgr_election_test { niter inmemdb \ + inmemlog inmemrep envprivate bulk args } { + + source ./include.tcl + global rep_verbose + global verbose_type + global overflowword1 + global overflowword2 + global databases_in_memory + set overflowword1 "0" + set overflowword2 "0" + set nsites 3 + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + set method "btree" + set largs [convert_args $method $args] + + if { $inmemdb } { + set restore_dbinmem $databases_in_memory + set databases_in_memory 1 + } + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + set ports [available_ports $nsites] + + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + set clientdir3 $testdir/CLIENTDIR3 + + file mkdir $clientdir + file mkdir $clientdir2 + file mkdir $clientdir3 + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. Adjust the args. + set logtype "on-disk" + if { $inmemlog } { + set logtype "in-memory" + } + set logargs [adjust_logargs $logtype] + set txnargs [adjust_txnargs $logtype] + + # Determine in-memory replication argument for environments. Group + # Membership needs the "legacy" workaround for in-memory replication + # files. + # + set repmemarg "" + set creator_flag "creator" + set legacy_flag "" + if { $inmemrep } { + set repmemarg " -rep_inmem_files " + set creator_flag "" + set legacy_flag "legacy" + } + + # Determine argument for region files, on disk or in-mem. + set private "" + if { $envprivate } { + set private " -private " + } + + print_repmgr_headers basic_repmgr_election_test $niter $inmemdb\ + $inmemlog $inmemrep $envprivate $bulk + + # Use different connection retry timeout values to handle any + # collisions from starting sites at the same time by retrying + # at different times. + + puts "\tBasic repmgr election test.a: Start three clients." + + # Open first client + set cl_envcmd "berkdb_env_noerr -create \ + $txnargs $verbargs $logargs $private \ + -errpfx CLIENT -home $clientdir -rep -thread $repmemarg" + set clientenv [eval $cl_envcmd] + set cl1_repmgr_conf "-ack all -pri 100 \ + -timeout {connection_retry 20000000} \ + -local { localhost [lindex $ports 0] \ + $creator_flag $legacy_flag } \ + -remote { localhost [lindex $ports 1] $legacy_flag } \ + -remote { localhost [lindex $ports 2] $legacy_flag } \ + -start elect" + eval $clientenv repmgr $cl1_repmgr_conf + + # Open second client + set cl2_envcmd "berkdb_env_noerr -create \ + $txnargs $verbargs $logargs $private \ + -errpfx CLIENT2 -home $clientdir2 -rep -thread $repmemarg" + set clientenv2 [eval $cl2_envcmd] + set cl2_repmgr_conf "-ack all -pri 30 \ + -timeout {connection_retry 10000000} \ + -local { localhost [lindex $ports 1] $legacy_flag } \ + -remote { localhost [lindex $ports 0] $legacy_flag } \ + -remote { localhost [lindex $ports 2] $legacy_flag } \ + -start elect" + eval $clientenv2 repmgr $cl2_repmgr_conf + + puts "\tBasic repmgr election test.b: Elect first client master." + await_expected_master $clientenv + set masterenv $clientenv + set masterdir $clientdir + await_startup_done $clientenv2 + + # Open third client + set cl3_envcmd "berkdb_env_noerr -create \ + $txnargs $verbargs $logargs $private \ + -errpfx CLIENT3 -home $clientdir3 -rep -thread $repmemarg" + set clientenv3 [eval $cl3_envcmd] + set cl3_repmgr_conf "-ack all -pri 20 \ + -timeout {connection_retry 5000000} \ + -local { localhost [lindex $ports 2] $legacy_flag } \ + -remote { localhost [lindex $ports 0] $legacy_flag } \ + -remote { localhost [lindex $ports 1] $legacy_flag } \ + -start elect" + eval $clientenv3 repmgr $cl3_repmgr_conf + await_startup_done $clientenv3 + + # + # Use of -ack all guarantees replication complete before repmgr send + # function returns and rep_test finishes. + # + puts "\tBasic repmgr election test.c: Run some transactions at master." + if { $bulk } { + # Turn on bulk processing on master. + error_check_good set_bulk [$masterenv rep_config {bulk on}] 0 + + eval rep_test_bulk $method $masterenv NULL $niter 0 0 0 $largs + + # Must turn off bulk because some configs (debug_rop/wop) + # generate log records when verifying databases. + error_check_good set_bulk [$masterenv rep_config {bulk off}] 0 + } else { + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + } + + puts "\tBasic repmgr election test.d: Verify client database contents." + rep_verify $masterdir $masterenv $clientdir2 $clientenv2 1 1 1 + rep_verify $masterdir $masterenv $clientdir3 $clientenv3 1 1 1 + + puts "\tBasic repmgr election test.e:\ + Shut down master, elect second client master." + error_check_good client_close [$clientenv close] 0 + await_expected_master $clientenv2 + set masterenv $clientenv2 + await_startup_done $clientenv3 + + # Open -recover to clear env region, including startup_done value. + # Skip for in-memory logs, since that doesn't work with -recover. + if { !$inmemlog } { + puts "\tBasic repmgr election test.f: \ + Restart former master as client." + set clientenv [eval $cl_envcmd -recover] + eval $clientenv repmgr $cl1_repmgr_conf + await_startup_done $clientenv + + puts "\tBasic repmgr election test.g: \ + Run some transactions at new master." + eval rep_test $method $masterenv NULL $niter $niter 0 0 $largs + + puts "\tBasic repmgr election test.h: \ + Verify client database contents." + set masterdir $clientdir2 + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + rep_verify $masterdir $masterenv $clientdir3 $clientenv3 1 1 1 + } + + # For in-memory replication, verify replication files not there. + if { $inmemrep } { + puts "\tBasic repmgr election test.i: \ + Verify no replication files on disk." + no_rep_files_on_disk $clientdir + no_rep_files_on_disk $clientdir2 + no_rep_files_on_disk $clientdir3 + } + + # For private environments, verify region files are not on disk. + if { $envprivate } { + puts "\tBasic repmgr election test.j: \ + Verify no region files on disk." + no_region_files_on_disk $clientdir + no_region_files_on_disk $clientdir2 + no_region_files_on_disk $clientdir3 + } + + # Restore original databases_in_memory value. + if { $inmemdb } { + set databases_in_memory $restore_dbinmem + } + + if { !$inmemlog } { + error_check_good client_close [$clientenv close] 0 + } + error_check_good client3_close [$clientenv3 close] 0 + error_check_good client2_close [$clientenv2 close] 0 +} + diff --git a/test/tcl/repmgr003.tcl b/test/tcl/repmgr003.tcl new file mode 100644 index 00000000..e1e7beaa --- /dev/null +++ b/test/tcl/repmgr003.tcl @@ -0,0 +1,202 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr003 +# TEST Basic repmgr init test. +# TEST +# TEST Run all combinations of the basic_repmgr_init_test. +# TEST +proc repmgr003 { {display 0} {run 1} args } { + + source ./include.tcl + if { !$display && $is_freebsd_test == 1 } { + puts "Skipping replication manager tests on FreeBSD platform." + return + } + + run_repmgr_tests init +} + +# This is the basis for simple repmgr internal init test cases. It starts +# an appointed master and two clients, processing transactions between each +# additional site. Then it verifies all expected transactions are +# replicated. The following parameters control runtime options: +# niter - number of records to process +# inmemdb - put databases in-memory (0, 1) +# inmemlog - put logs in-memory (0, 1) +# inmemrep - put replication files in-memory (0, 1) +# envprivate - put region files in-memory (0, 1) +# bulk - use bulk processing (0, 1) +# +proc basic_repmgr_init_test { niter inmemdb inmemlog \ + inmemrep envprivate bulk args } { + + source ./include.tcl + global rep_verbose + global verbose_type + global overflowword1 + global overflowword2 + global databases_in_memory + set overflowword1 "0" + set overflowword2 "0" + set nsites 3 + + if { $is_freebsd_test == 1 } { + puts "Skipping basic_repmgr_init_test on FreeBSD platform." + return + } + + set method "btree" + set largs [convert_args $method $args] + + # Set databases_in_memory for this test, preserving original value. + if { $inmemdb } { + set restore_dbinmem $databases_in_memory + set databases_in_memory 1 + } + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + set ports [available_ports $nsites] + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + # In-memory logs require a large log buffer, and cannot + # be used with -txn nosync. Adjust the args. + set logtype "on-disk" + if { $inmemlog } { + set logtype "in-memory" + } + + set logargs [adjust_logargs $logtype] + set txnargs [adjust_txnargs $logtype] + + # Determine in-memory replication argument for environments. Also + # beef up cachesize for clients because the second client will need + # to catch up with a few sets of records which could build up in the + # tempdb, which is in-memory in this case. + if { $inmemrep } { + set repmemarg " -rep_inmem_files " + set cachesize [expr 2 * (1024 * 1024)] + set cacheargs "-cachesize { 0 $cachesize 1 }" + } else { + set repmemarg "" + set cacheargs "" + } + + # Determine argument for region files. + if { $envprivate } { + set private "-private " + } else { + set private "" + } + + print_repmgr_headers basic_repmgr_init_test $niter $inmemdb\ + $inmemlog $inmemrep $envprivate $bulk + + # Use different connection retry timeout values to handle any + # collisions from starting sites at the same time by retrying + # at different times. + + # Open a master. + puts "\tBasic repmgr init test.a: Start a master." + set ma_envcmd "berkdb_env_noerr -create $verbargs $private \ + $logargs $txnargs \ + -errpfx MASTER -home $masterdir -rep -thread $repmemarg" + set masterenv [eval $ma_envcmd] + $masterenv repmgr -ack all \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master + + puts "\tBasic repmgr init test.b: Run some transactions at master." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + + # Open first client + puts "\tBasic repmgr init test.c: Start first client." + set cl_envcmd "berkdb_env_noerr -create $verbargs $private \ + $logargs $txnargs $cacheargs \ + -errpfx CLIENT -home $clientdir -rep -thread $repmemarg" + set clientenv [eval $cl_envcmd] + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 2]] \ + -start client + await_startup_done $clientenv + + # + # Use of -ack all guarantees replication complete before repmgr send + # function returns and rep_test finishes. + # + puts "\tBasic repmgr init test.d: Run some more transactions at master." + if { $bulk } { + error_check_good set_bulk [$masterenv rep_config {bulk on}] 0 + eval rep_test_bulk $method $masterenv NULL $niter 0 0 0 $largs + + # Must turn off bulk because some configs (debug_rop/wop) + # generate log records when verifying databases. + error_check_good set_bulk [$masterenv rep_config {bulk off}] 0 + } else { + eval rep_test $method $masterenv NULL $niter $niter 0 0 $largs + } + + # Open second client + puts "\tBasic repmgr init test.e: Start second client." + set cl_envcmd "berkdb_env_noerr -create $verbargs $private \ + $logargs $txnargs $cacheargs \ + -errpfx CLIENT2 -home $clientdir2 -rep -thread $repmemarg" + set clientenv2 [eval $cl_envcmd] + $clientenv2 repmgr -ack all \ + -timeout {connection_retry 5000000} \ + -local [list localhost [lindex $ports 2]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 1]] \ + -start client + await_startup_done $clientenv2 + + puts "\tBasic repmgr init test.f: Verifying client database contents." + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + rep_verify $masterdir $masterenv $clientdir2 $clientenv2 1 1 1 + + # For in-memory replication, verify replication files not there. + if { $inmemrep } { + puts "\tBasic repmgr init test.g:\ + Verify no replication files on disk." + no_rep_files_on_disk $masterdir + no_rep_files_on_disk $clientdir + no_rep_files_on_disk $clientdir2 + } + + # For private envs, verify region files are not on disk. + if { $envprivate } { + puts "\tBasic repmgr init test.h:\ + Verify no region files on disk." + no_region_files_on_disk $masterdir + no_region_files_on_disk $clientdir + no_region_files_on_disk $clientdir2 + } + + # Restore original databases_in_memory value. + if { $inmemdb } { + set databases_in_memory $restore_dbinmem + } + + error_check_good client2_close [$clientenv2 close] 0 + error_check_good client_close [$clientenv close] 0 + error_check_good masterenv_close [$masterenv close] 0 +} diff --git a/test/tcl/repmgr007.tcl b/test/tcl/repmgr007.tcl new file mode 100644 index 00000000..e9beaf2c --- /dev/null +++ b/test/tcl/repmgr007.tcl @@ -0,0 +1,147 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr007 +# TEST Basic repmgr client shutdown/restart test. +# TEST +# TEST Start an appointed master site and two clients. Shut down and +# TEST restart each client, processing transactions after each restart. +# TEST Verify all expected transactions are replicated. +# TEST +# TEST Run for btree only because access method shouldn't matter. +# TEST +proc repmgr007 { { niter 100 } { tnum "007" } args } { + + source ./include.tcl + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + set method "btree" + set args [convert_args $method $args] + + puts "Repmgr$tnum ($method): repmgr client shutdown/restart test." + repmgr007_sub $method $niter $tnum $args +} + +proc repmgr007_sub { method niter tnum largs } { + global testdir + global rep_verbose + global verbose_type + set nsites 3 + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + set ports [available_ports $nsites] + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + # Use different connection retry timeout values to handle any + # collisions from starting sites at the same time by retrying + # at different times. + + # Open a master. + puts "\tRepmgr$tnum.a: Start a master." + set ma_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx MASTER -home $masterdir -txn -rep -thread" + set masterenv [eval $ma_envcmd] + $masterenv repmgr -ack all \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master + + # Open first client + puts "\tRepmgr$tnum.b: Start first client." + set cl_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT -home $clientdir -txn -rep -thread" + set clientenv [eval $cl_envcmd] + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 2]] \ + -start client + await_startup_done $clientenv + + # Open second client + puts "\tRepmgr$tnum.c: Start second client." + set cl2_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT2 -home $clientdir2 -txn -rep -thread" + set clientenv2 [eval $cl2_envcmd] + $clientenv2 repmgr -ack all \ + -timeout {connection_retry 5000000} \ + -local [list localhost [lindex $ports 2]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 1]] \ + -start client + await_startup_done $clientenv2 + + # + # Use of -ack all guarantees replication complete before repmgr send + # function returns and rep_test finishes. + # + puts "\tRepmgr$tnum.d: Run first set of transactions at master." + set start 0 + eval rep_test $method $masterenv NULL $niter $start 0 0 $largs + incr start $niter + + puts "\tRepmgr$tnum.e: Shut down first client, wait and restart it." + error_check_good client_close [$clientenv close] 0 + tclsleep 5 + # Open -recover to clear env region, including startup_done value. + set clientenv [eval $cl_envcmd -recover] + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 2]] \ + -start client + await_startup_done $clientenv + + puts "\tRepmgr$tnum.f: Run second set of transactions at master." + eval rep_test $method $masterenv NULL $niter $start 0 0 $largs + incr start $niter + + puts "\tRepmgr$tnum.g: Verifying client database contents." + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + rep_verify $masterdir $masterenv $clientdir2 $clientenv2 1 1 1 + + puts "\tRepmgr$tnum.h: Shut down second client, wait and restart it." + error_check_good client_close [$clientenv2 close] 0 + tclsleep 5 + # Open -recover to clear env region, including startup_done value. + set clientenv2 [eval $cl2_envcmd -recover] + $clientenv2 repmgr -ack all \ + -timeout {connection_retry 5000000} \ + -local [list localhost [lindex $ports 2]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 1]] \ + -start client + await_startup_done $clientenv2 + + puts "\tRepmgr$tnum.i: Run third set of transactions at master." + eval rep_test $method $masterenv NULL $niter $start 0 0 $largs + + puts "\tRepmgr$tnum.j: Verifying client database contents." + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + rep_verify $masterdir $masterenv $clientdir2 $clientenv2 1 1 1 + + error_check_good client2_close [$clientenv2 close] 0 + error_check_good client_close [$clientenv close] 0 + error_check_good masterenv_close [$masterenv close] 0 +} diff --git a/test/tcl/repmgr009.tcl b/test/tcl/repmgr009.tcl new file mode 100644 index 00000000..8bb09013 --- /dev/null +++ b/test/tcl/repmgr009.tcl @@ -0,0 +1,186 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr009 +# TEST repmgr API error test. +# TEST +# TEST Try a variety of repmgr calls that result in errors. Also +# TEST try combinations of repmgr and base replication API calls +# TEST that result in errors. +# TEST +# TEST Run for btree only because access method shouldn't matter. +# TEST +proc repmgr009 { { niter 10 } { tnum "009" } args } { + + source ./include.tcl + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + set method "btree" + set args [convert_args $method $args] + + puts "Repmgr$tnum ($method): repmgr API error test." + repmgr009_sub $method $niter $tnum $args +} + +proc repmgr009_sub { method niter tnum largs } { + global testdir + global rep_verbose + global verbose_type + set nsites 2 + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + set ports [available_ports [expr $nsites * 5]] + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + set masterdir2 $testdir/MASTERDIR2 + set clientdir $testdir/CLIENTDIR + set norepdir $testdir/NOREPDIR + + file mkdir $masterdir + file mkdir $masterdir2 + file mkdir $clientdir + file mkdir $norepdir + + # Use different connection retry timeout values to handle any + # collisions from starting sites at the same time by retrying + # at different times. + + puts "\tRepmgr$tnum.a: Set up environment without repmgr." + set ma_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx MASTER -home $masterdir -txn -rep -thread" + set masterenv [eval $ma_envcmd] + error_check_good masterenv_close [$masterenv close] 0 + + puts "\tRepmgr$tnum.b: Call repmgr without open master (error)." + catch {$masterenv repmgr -ack all \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master} res + error_check_good errchk [is_substr $res "invalid command"] 1 + + puts "\tRepmgr$tnum.c: Call repmgr_stat without open master (error)." + catch {[stat_field $masterenv repmgr_stat "Connections dropped"]} res + error_check_good errchk [is_substr $res "invalid command"] 1 + + puts "\tRepmgr$tnum.d: Start a master with repmgr." + repladd 1 + set masterenv [eval $ma_envcmd] + $masterenv repmgr -ack all \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master + + puts "\tRepmgr$tnum.e: Start repmgr with no local sites (error)." + set cl_envcmd "berkdb_env_noerr -create $verbargs \ + -home $clientdir -txn -rep -thread" + set clientenv [eval $cl_envcmd] + catch {$clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -remote [list localhost [lindex $ports 7]] \ + -start client} res + error_check_good errchk [is_substr $res \ + "local site must be named before calling repmgr_start"] 1 + error_check_good client_close [$clientenv close] 0 + + puts "\tRepmgr$tnum.f: Start repmgr with two local sites (error)." + set clientenv [eval $cl_envcmd] + catch {$clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 8]] \ + -local [list localhost [lindex $ports 9]] \ + -start client} res + error_check_good errchk [string match "*already*set*" $res] 1 + error_check_good client_close [$clientenv close] 0 + + puts "\tRepmgr$tnum.g: Start a client." + repladd 2 + set clientenv [eval $cl_envcmd -recover] + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -start client + await_startup_done $clientenv + + puts "\tRepmgr$tnum.h: Start repmgr a second time (error)." + catch {$clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -start client} res + error_check_good errchk [is_substr $res "repmgr is already started"] 1 + + puts "\tRepmgr$tnum.i: Call rep_start after starting repmgr (error)." + catch {$clientenv rep_start -client} res + error_check_good errchk [is_substr $res \ + "cannot call from Replication Manager application"] 1 + + puts "\tRepmgr$tnum.j: Call rep_process_message (error)." + set envlist "{$masterenv 1} {$clientenv 2}" + catch {$clientenv rep_process_message 0 0 0} res + error_check_good errchk [is_substr $res \ + "cannot call from Replication Manager application"] 1 + + # + # Use of -ack all guarantees replication complete before repmgr send + # function returns and rep_test finishes. + # + puts "\tRepmgr$tnum.k: Run some transactions at master." + eval rep_test $method $masterenv NULL $niter $niter 0 0 $largs + + puts "\tRepmgr$tnum.l: Call rep_elect (error)." + catch {$clientenv rep_elect 2 2 2 5000000} res + error_check_good errchk [is_substr $res \ + "cannot call from Replication Manager application"] 1 + + puts "\tRepmgr$tnum.m: Verifying client database contents." + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + + error_check_good client_close [$clientenv close] 0 + error_check_good masterenv_close [$masterenv close] 0 + + puts "\tRepmgr$tnum.n: Start a master with base API rep_start." + set ma_envcmd2 "berkdb_env_noerr -create $verbargs \ + -home $masterdir2 -errpfx MASTER -txn -thread -rep_master \ + -rep_transport \[list 1 replsend\]" + set masterenv2 [eval $ma_envcmd2] + + puts "\tRepmgr$tnum.o: Call repmgr after rep_start (error)." + catch {$masterenv2 repmgr -ack all \ + -local [list localhost [lindex $ports 0]] \ + -start master} res + # Internal repmgr calls return EINVAL after hitting + # base API application test. + error_check_good errchk [is_substr $res "invalid argument"] 1 + + error_check_good masterenv_close [$masterenv2 close] 0 + + puts "\tRepmgr$tnum.p: Start an env without starting rep or repmgr." + set norep_envcmd "berkdb_env_noerr -create $verbargs \ + -home $norepdir -errpfx NOREP -txn -thread \ + -rep_transport \[list 1 replsend\]" + set norepenv [eval $norep_envcmd] + + puts "\tRepmgr$tnum.q: Call rep_elect before rep_start (error)." + catch {$norepenv rep_elect 2 2 2 5000000} res + # Internal rep_elect call returns EINVAL if rep_start has not + # been called first. + error_check_good errchk [is_substr $res "invalid argument"] 1 + + error_check_good norepenv_close [$norepenv close] 0 + replclose $testdir/MSGQUEUEDIR +} diff --git a/test/tcl/repmgr010.tcl b/test/tcl/repmgr010.tcl new file mode 100644 index 00000000..d2df08e8 --- /dev/null +++ b/test/tcl/repmgr010.tcl @@ -0,0 +1,167 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr010 +# TEST Acknowledgement policy and timeout test. +# TEST +# TEST Verify that "quorum" acknowledgement policy succeeds with fewer than +# TEST nsites running. Verify that "all" acknowledgement policy results in +# TEST ack failures with fewer than nsites running. +# TEST +# TEST Run for btree only because access method shouldn't matter. +# TEST +proc repmgr010 { { niter 100 } { tnum "010" } args } { + + source ./include.tcl + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + set method "btree" + set args [convert_args $method $args] + + puts "Repmgr$tnum ($method): repmgr ack policy and timeout test." + repmgr010_sub $method $niter $tnum $args +} + +proc repmgr010_sub { method niter tnum largs } { + global testdir + global rep_verbose + global verbose_type + set nsites 3 + + set small_iter [expr $niter / 10] + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + set ports [available_ports $nsites] + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + # Use different connection retry timeout values to handle any + # collisions from starting sites at the same time by retrying + # at different times. + + puts "\tRepmgr$tnum.a: Start master, two clients, ack policy quorum." + # Open a master. + set ma_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx MASTER -home $masterdir -txn -rep -thread" + set masterenv [eval $ma_envcmd] + $masterenv repmgr -ack quorum \ + -timeout {connection_retry 20000000} \ + -timeout {ack 5000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master + + # Open first client + set cl_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT -home $clientdir -txn -rep -thread" + set clientenv [eval $cl_envcmd] + $clientenv repmgr -ack quorum \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 2]] \ + -start client + await_startup_done $clientenv + + # Open second client + set cl2_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT2 -home $clientdir2 -txn -rep -thread" + set clientenv2 [eval $cl2_envcmd] + $clientenv2 repmgr -ack quorum \ + -timeout {connection_retry 5000000} \ + -local [list localhost [lindex $ports 2]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 1]] \ + -start client + await_startup_done $clientenv2 + + puts "\tRepmgr$tnum.b: Run first set of transactions at master." + set start 0 + eval rep_test $method $masterenv NULL $niter $start 0 0 $largs + incr start $niter + + # + # Special verification needed for quorum ack policy. Wait + # longer than ack timeout (default 1 second) then check for + # ack failures (perm_failed events). Quorum only guarantees + # that transactions replicated to one site or the other, so + # test for this condition instead of both sites. + # + puts "\tRepmgr$tnum.c: Verify both client databases, no ack failures." + error_check_good quorum_perm_failed1 \ + [stat_field $masterenv repmgr_stat "Acknowledgement failures"] 0 + catch {rep_verify\ + $masterdir $masterenv $clientdir $clientenv 1 1 1} ver1 + catch {rep_verify\ + $masterdir $masterenv $clientdir2 $clientenv2 1 1 1} ver2 + error_check_good onesite [expr [string length $ver1] == 0 || \ + [string length $ver2] == 0] 1 + + puts "\tRepmgr$tnum.d: Shut down first client." + error_check_good client_close [$clientenv close] 0 + + puts "\tRepmgr$tnum.e: Run second set of transactions at master." + eval rep_test $method $masterenv NULL $small_iter $start 0 0 $largs + incr start $niter + + puts "\tRepmgr$tnum.f: Verify client database, no ack failures." + error_check_good quorum_perm_failed2 \ + [stat_field $masterenv repmgr_stat "Acknowledgement failures"] 0 + rep_verify $masterdir $masterenv $clientdir2 $clientenv2 1 1 1 + + puts "\tRepmgr$tnum.g: Adjust all sites to ack policy all." + # Reopen first client with ack policy all + set cl_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT -home $clientdir -txn -rep -thread" + # Open -recover to clear env region, including startup_done value. + set clientenv [eval $cl_envcmd -recover] + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 2]] \ + -start client + await_startup_done $clientenv + + # Adjust other sites to ack policy all + $masterenv repmgr -ack all + $clientenv2 repmgr -ack all + + puts "\tRepmgr$tnum.h: Shut down first client." + error_check_good client_close [$clientenv close] 0 + set init_perm_failed \ + [stat_field $masterenv repmgr_stat "Acknowledgement failures"] + + # + # Use of -ack all guarantees replication complete before repmgr send + # function returns and rep_test finishes. + # + puts "\tRepmgr$tnum.i: Run third set of transactions at master." + eval rep_test $method $masterenv NULL $small_iter $start 0 0 $largs + + puts "\tRepmgr$tnum.j: Verify client database, some ack failures." + rep_verify $masterdir $masterenv $clientdir2 $clientenv2 1 1 1 + error_check_good all_perm_failed [expr \ + [stat_field $masterenv repmgr_stat "Acknowledgement failures"] \ + > $init_perm_failed] 1 + + error_check_good client2_close [$clientenv2 close] 0 + error_check_good masterenv_close [$masterenv close] 0 +} diff --git a/test/tcl/repmgr011.tcl b/test/tcl/repmgr011.tcl new file mode 100644 index 00000000..2570aea0 --- /dev/null +++ b/test/tcl/repmgr011.tcl @@ -0,0 +1,116 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr011 +# TEST repmgr two site strict majority test. +# TEST +# TEST Start an appointed master and one client with 2 site strict +# TEST majority set. Shut down the master site, wait and verify that +# TEST the client site was not elected master. Start up master site +# TEST and verify that transactions are processed as expected. +# TEST +# TEST Run for btree only because access method shouldn't matter. +# TEST +proc repmgr011 { { niter 100 } { tnum "011" } args } { + + source ./include.tcl + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + set method "btree" + set args [convert_args $method $args] + + puts "Repmgr$tnum ($method): repmgr two site strict majority test." + repmgr011_sub $method $niter $tnum $args +} + +proc repmgr011_sub { method niter tnum largs } { + global testdir + global rep_verbose + global verbose_type + set nsites 2 + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + set ports [available_ports $nsites] + + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $clientdir + file mkdir $clientdir2 + + # Use different connection retry timeout values to handle any + # collisions from starting sites at the same time by retrying + # at different times. + + # Open first client as master and set 2site_strict. + puts "\tRepmgr$tnum.a: Start first client as master." + set cl_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT -home $clientdir -txn -rep -thread" + set clientenv [eval $cl_envcmd] + $clientenv repmgr -ack all \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master + error_check_good c1strict [$clientenv rep_config {mgr2sitestrict on}] 0 + + # Open second client and set 2site_strict. + puts "\tRepmgr$tnum.b: Start second client." + set cl2_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT2 -home $clientdir2 -txn -rep -thread" + set clientenv2 [eval $cl2_envcmd] + $clientenv2 repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -start client + await_startup_done $clientenv2 + error_check_good c2strict [$clientenv2 rep_config \ + {mgr2sitestrict on}] 0 + + # + # Use of -ack all guarantees replication complete before repmgr send + # function returns and rep_test finishes. + # + puts "\tRepmgr$tnum.c: Run first set of transactions at master." + eval rep_test $method $clientenv NULL $niter 0 0 0 $largs + + puts "\tRepmgr$tnum.d: Verifying client database contents." + rep_verify $clientdir $clientenv $clientdir2 $clientenv2 1 1 1 + + puts "\tRepmgr$tnum.e: Shut down first client (current master)." + error_check_good client_close [$clientenv close] 0 + + puts "\tRepmgr$tnum.f: Wait, then verify no master." + tclsleep 20 + error_check_bad c2_master [stat_field $clientenv2 rep_stat "Master"] 1 + + puts "\tRepmgr$tnum.g: Restart first client as master" + set clientenv [eval $cl_envcmd] + $clientenv repmgr -ack all \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 1]] \ + -start master + await_expected_master $clientenv + + puts "\tRepmgr$tnum.h: Run second set of transactions at master." + eval rep_test $method $clientenv NULL $niter $niter 0 0 $largs + + puts "\tRepmgr$tnum.i: Verifying client database contents." + rep_verify $clientdir $clientenv $clientdir2 $clientenv2 1 1 1 + + error_check_good client2_close [$clientenv2 close] 0 + error_check_good client_close [$clientenv close] 0 +} diff --git a/test/tcl/repmgr012.tcl b/test/tcl/repmgr012.tcl new file mode 100644 index 00000000..177b7f2f --- /dev/null +++ b/test/tcl/repmgr012.tcl @@ -0,0 +1,123 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr012 +# TEST repmgr heartbeat test. +# TEST +# TEST Start an appointed master and one client site. Set heartbeat +# TEST send and monitor values and process some transactions. Stop +# TEST sending heartbeats from master and verify that client sees +# TEST a dropped connection. +# TEST +# TEST Run for btree only because access method shouldn't matter. +# TEST +proc repmgr012 { { niter 100 } { tnum "012" } args } { + + source ./include.tcl + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + set method "btree" + set args [convert_args $method $args] + + puts "Repmgr$tnum ($method): repmgr heartbeat test." + repmgr012_sub $method $niter $tnum $args +} + +proc repmgr012_sub { method niter tnum largs } { + global testdir + global rep_verbose + global verbose_type + set nsites 2 + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + set ports [available_ports $nsites] + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Use different connection retry timeout values to handle any + # collisions from starting sites at the same time by retrying + # at different times. + + # Open a master. + puts "\tRepmgr$tnum.a: Start a master." + set ma_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx MASTER -home $masterdir -txn -rep -thread" + set masterenv [eval $ma_envcmd] + $masterenv repmgr -ack all \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master + + # Open a client. + puts "\tRepmgr$tnum.b: Start a client." + set cl_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT -home $clientdir -txn -rep -thread" + set clientenv [eval $cl_envcmd] + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -start client + await_startup_done $clientenv + + # + # Use of -ack all guarantees replication complete before repmgr send + # function returns and rep_test finishes. + # + puts "\tRepmgr$tnum.c: Run first set of transactions at master." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + + puts "\tRepmgr$tnum.d: Verifying client database contents." + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + + # Timeouts are in microseconds, heartbeat monitor should be + # longer than heartbeat_send. + puts "\tRepmgr$tnum.e: Set heartbeat timeouts." + $masterenv repmgr -timeout {heartbeat_send 50000} + $clientenv repmgr -timeout {heartbeat_monitor 90000} + + puts "\tRepmgr$tnum.f: Run second set of transactions at master." + eval rep_test $method $masterenv NULL $niter $niter 0 0 $largs + + puts "\tRepmgr$tnum.g: Verifying client database contents." + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + + # Make sure client reacts to the lost master connection by holding an + # election. To do so, first check initial value of stats, then make + # sure it increases. + # + set init_eh [stat_field $clientenv rep_stat "Elections held"] + set init_cd [stat_field $clientenv repmgr_stat "Connections dropped"] + + # Make sure client notices the lack of heartbeat. Since the client's + # heartbeat monitoring granularity is < 1 second, if we wait up to 5 + # seconds that ought to give it plenty of time to notice and react. + # + puts "\tRepmgr$tnum.h: Remove master heartbeat and wait." + $masterenv repmgr -timeout {heartbeat_send 0} + set max_wait 5 + await_condition {[stat_field $clientenv rep_stat \ + "Elections held"] > $init_eh} $max_wait + error_check_good conndrop [expr \ + [stat_field $clientenv repmgr_stat "Connections dropped"] \ + > $init_cd] 1 + + error_check_good client_close [$clientenv close] 0 + error_check_good master_close [$masterenv close] 0 +} diff --git a/test/tcl/repmgr013.tcl b/test/tcl/repmgr013.tcl new file mode 100644 index 00000000..821b8aff --- /dev/null +++ b/test/tcl/repmgr013.tcl @@ -0,0 +1,127 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr013 +# TEST Site list test. +# TEST +# TEST Configure a master and two clients where one client is a peer of +# TEST the other and verify resulting site lists. +# TEST +# TEST Run for btree only because access method shouldn't matter. +# TEST +proc repmgr013 { { niter 100 } { tnum "013" } args } { + + source ./include.tcl + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + set method "btree" + set args [convert_args $method $args] + + puts "Repmgr$tnum ($method): repmgr site list test." + repmgr013_sub $method $niter $tnum $args +} + +proc repmgr013_sub { method niter tnum largs } { + global testdir + global rep_verbose + global verbose_type + set nsites 3 + + set small_iter [expr $niter / 10] + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + set ports [available_ports $nsites] + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + # Use different connection retry timeout values to handle any + # collisions from starting sites at the same time by retrying + # at different times. + + puts "\tRepmgr$tnum.a: Start a master." + set ma_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx MASTER -home $masterdir -txn -rep -thread" + set masterenv [eval $ma_envcmd] + $masterenv repmgr -ack all \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master + + puts "\tRepmgr$tnum.b: Start first client." + set cl_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT -home $clientdir -txn -rep -thread" + set clientenv [eval $cl_envcmd] + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 2]] \ + -start client + await_startup_done $clientenv + + puts "\tRepmgr$tnum.c: Start second client as peer of first." + set cl2_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT2 -home $clientdir2 -txn -rep -thread" + set clientenv2 [eval $cl2_envcmd] + $clientenv2 repmgr -ack all \ + -timeout {connection_retry 5000000} \ + -local [list localhost [lindex $ports 2]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 1] peer] \ + -start client + await_startup_done $clientenv2 + + puts "\tRepmgr$tnum.d: Verify repmgr site lists." + verify_sitelist $masterenv $nsites {} + verify_sitelist $clientenv $nsites {} + verify_sitelist $clientenv2 $nsites [list [lindex $ports 1]] + + error_check_good client2_close [$clientenv2 close] 0 + error_check_good client_close [$clientenv close] 0 + error_check_good masterenv_close [$masterenv close] 0 +} + +# For numsites, supply the nsites value defined for the test. +# For peervec, supply a list of ports whose sites should be considered peers. +proc verify_sitelist { env numsites peervec } { + set sitelist [$env repmgr_site_list] + + # Make sure there are expected number of other sites. + error_check_good lenchk [llength $sitelist] [expr {$numsites - 1}] + + # Make sure eid and port are integers; host, status and peer are + # the expected string values. + set pvind 0 + foreach tuple $sitelist { + error_check_good eidchk [string is integer -strict \ + [lindex $tuple 0]] 1 + error_check_good hostchk [lindex $tuple 1] "localhost" + set port [lindex $tuple 2] + error_check_good portchk [string is integer -strict $port] 1 + error_check_good statchk [lindex $tuple 3] connected + if { [lsearch $peervec $port] >= 0 } { + error_check_good peerchk [lindex $tuple 4] peer + } else { + error_check_good npeerchk [lindex $tuple 4] non-peer + } + incr pvind + } +} diff --git a/test/tcl/repmgr017.tcl b/test/tcl/repmgr017.tcl new file mode 100644 index 00000000..789d3f7b --- /dev/null +++ b/test/tcl/repmgr017.tcl @@ -0,0 +1,156 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr017 +# TEST repmgr in-memory cache overflow test. +# TEST +# TEST Start an appointed master site and one client, putting databases, +# TEST environment regions, logs and replication files in-memory. Set +# TEST very small cachesize and run enough transactions to overflow cache. +# TEST Shut down and restart master and client, giving master a larger cache. +# TEST Run and verify a small number of transactions. +# TEST +# TEST Run for btree only because access method shouldn't matter. +# TEST +proc repmgr017 { { niter 1000 } { tnum "017" } args } { + + source ./include.tcl + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + set method "btree" + set args [convert_args $method $args] + + puts \ + "Repmgr$tnum ($method): repmgr in-memory cache overflow test." + repmgr017_sub $method $niter $tnum $args +} + +proc repmgr017_sub { method niter tnum largs } { + global rep_verbose + global verbose_type + global databases_in_memory + + # Force databases in-memory for this test but preserve original + # value to restore later so that other tests aren't affected. + set restore_dbinmem $databases_in_memory + set databases_in_memory 1 + + # No need for test directories because test is entirely in-memory. + + set nsites 2 + set ports [available_ports $nsites] + set omethod [convert_method $method] + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + # In-memory logs cannot be used with -txn nosync. + set logargs [adjust_logargs "in-memory"] + set txnargs [adjust_txnargs "in-memory"] + + # Use different connection retry timeout values to handle any + # collisions from starting sites at the same time by retrying + # at different times. + + # Open a master with a very small cache. + puts "\tRepmgr$tnum.a: Start a master with a very small cache." + set cacheargs "-cachesize {0 32768 1}" + set ma_envcmd "berkdb_env_noerr -create $logargs $txnargs $verbargs \ + -errpfx MASTER -rep -thread -rep_inmem_files -private $cacheargs" + set masterenv [eval $ma_envcmd] + $masterenv repmgr -ack all \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master + + # Open a client + puts "\tRepmgr$tnum.b: Start a client." + set cl_envcmd "berkdb_env_noerr -create $logargs $txnargs $verbargs \ + -errpfx CLIENT -rep -thread -rep_inmem_files -private" + set clientenv [eval $cl_envcmd] + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -start client + await_startup_done $clientenv + + puts "\tRepmgr$tnum.c: Run master transactions and verify full cache." + set dbname { "" "test.db" } + set mdb [eval "berkdb_open_noerr -create $omethod -auto_commit \ + -env $masterenv $largs $dbname"] + set stat [catch { + rep_test $method $masterenv $mdb $niter 0 0 0 $largs } ret ] + error_check_good broke $stat 1 + error_check_good nomem \ + [is_substr $ret "not enough memory"] 1 + + puts "\tRepmgr$tnum.d: Close master and client." + error_check_good mdb_close [$mdb close] 0 + error_check_good client_close [$clientenv close] 0 + # Master close should return invalid argument. + catch { $masterenv close } ret2 + error_check_good needrec [is_substr $ret2 "invalid argument"] 1 + + puts "\tRepmgr$tnum.e: Restart master (with larger cache) and client." + # Recovery is a no-op with everything in-memory, but specify it + # anyway after closing the master environment with an error. + set cacheargs "" + set masterenv [eval $ma_envcmd -recover] + $masterenv repmgr -ack all \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master + + # Open -recover to clear env region, including startup_done value. + set clientenv [eval $cl_envcmd -recover] + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -start client + await_startup_done $clientenv + + puts "\tRepmgr$tnum.f: Perform small number of transactions on master." + set numtxns 10 + set mdb [eval "berkdb_open_noerr -create $omethod -auto_commit \ + -env $masterenv $largs $dbname"] + set t [$masterenv txn] + for { set i 1 } { $i <= $numtxns } { incr i } { + error_check_good db_put \ + [eval $mdb put -txn $t $i [chop_data $method data$i]] 0 + } + error_check_good txn_commit [$t commit] 0 + + puts "\tRepmgr$tnum.g: Verify transactions on client." + set cdb [eval "berkdb_open_noerr -create -mode 0644 $omethod \ + -env $clientenv $largs $dbname"] + error_check_good reptest_db [is_valid_db $cdb] TRUE + for { set i 1 } { $i <= $numtxns } { incr i } { + set ret [lindex [$cdb get $i] 0] + error_check_good cdb_get $ret [list $i \ + [pad_data $method data$i]] + } + + # If the test had erroneously created replication files, they would + # be in the current working directory. Verify that this didn't happen. + puts "\tRepmgr$tnum.h: Verify no replication files on disk." + no_rep_files_on_disk "." + + # Restore original databases_in_memory value. + set databases_in_memory $restore_dbinmem + + error_check_good cdb_close [$cdb close] 0 + error_check_good mdb_close [$mdb close] 0 + error_check_good client_close [$clientenv close] 0 + error_check_good master_close [$masterenv close] 0 +} diff --git a/test/tcl/repmgr018.tcl b/test/tcl/repmgr018.tcl new file mode 100644 index 00000000..d5ac4b74 --- /dev/null +++ b/test/tcl/repmgr018.tcl @@ -0,0 +1,142 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr018 +# TEST Check repmgr stats. +# TEST +# TEST Start an appointed master and one client. Shut down the client, +# TEST run some transactions at the master and verify that there are +# TEST acknowledgement failures and one dropped connection. Shut down +# TEST and restart client again and verify that there are two dropped +# TEST connections. +# TEST +# TEST Run for btree only because access method shouldn't matter. +# TEST +proc repmgr018 { { niter 20 } { tnum "018" } args } { + + source ./include.tcl + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + set method "btree" + set args [convert_args $method $args] + + puts "Repmgr$tnum ($method): Test of repmgr stats." + repmgr018_sub $method $niter $tnum $args +} + +proc repmgr018_sub { method niter tnum largs } { + global testdir + global rep_verbose + global verbose_type + set nsites 2 + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + set ports [available_ports $nsites] + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + # Use different connection retry timeout values to handle any + # collisions from starting sites at the same time by retrying + # at different times. + + # Open a master. + puts "\tRepmgr$tnum.a: Start a master." + set ma_envcmd "berkdb_env_noerr -create $verbargs -errpfx MASTER \ + -home $masterdir -txn -rep -thread" + set masterenv [eval $ma_envcmd] + $masterenv repmgr -ack all \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master + + # Open a client + puts "\tRepmgr$tnum.b: Start a client." + set cl_envcmd "berkdb_env_noerr -create $verbargs -errpfx CLIENT \ + -home $clientdir -txn -rep -thread" + set clientenv [eval $cl_envcmd] + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -start client + await_startup_done $clientenv + + puts "\tRepmgr$tnum.c: Run some transactions at master." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + + error_check_good perm_no_failed_stat \ + [stat_field $masterenv repmgr_stat "Acknowledgement failures"] 0 + + # Remember number of connections dropped here for later test. + # Note that normal group membership initialization can result + # in a dropped connection at this point. + set drop1 [stat_field $masterenv repmgr_stat "Connections dropped"] + $clientenv close + + # Just do a few transactions (i.e., 3 of them), because each one is + # expected to time out, and if we did many the test would take a long + # time (with no benefit). + # + puts "\tRepmgr$tnum.d: Run transactions with no client." + eval rep_test $method $masterenv NULL 3 $niter $niter 0 $largs + + error_check_bad perm_failed_stat \ + [stat_field $masterenv repmgr_stat "Acknowledgement failures"] 0 + + # Wait up to 30 seconds when testing for dropped connections. This + # corresponds to the master connection_retry timeout. + set max_wait 30 + await_condition {[stat_field $masterenv repmgr_stat \ + "Connections dropped"] > $drop1} $max_wait + set drop2 [stat_field $masterenv repmgr_stat "Connections dropped"] + + # Bring the client back up, and down, a couple times, to test resetting + # of stats. + # + puts "\tRepmgr$tnum.e: Shut down client (pause), check dropped connection." + # Open -recover to clear env region, including startup_done value. + set clientenv [eval $cl_envcmd -recover] + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -start client + await_startup_done $clientenv + $clientenv close + + await_condition {[stat_field $masterenv repmgr_stat \ + "Connections dropped"] > $drop2} $max_wait + $masterenv repmgr_stat -clear + + puts "\tRepmgr$tnum.f: Shut down, pause, check dropped connection (reset)." + # Open -recover to clear env region, including startup_done value. + set clientenv [eval $cl_envcmd -recover] + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -start client + await_startup_done $clientenv + $clientenv close + + await_condition {[stat_field $masterenv repmgr_stat \ + "Connections dropped"] == 1} $max_wait + + error_check_good masterenv_close [$masterenv close] 0 +} diff --git a/test/tcl/repmgr023.tcl b/test/tcl/repmgr023.tcl new file mode 100644 index 00000000..8fac705d --- /dev/null +++ b/test/tcl/repmgr023.tcl @@ -0,0 +1,183 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr023 +# TEST Test of JOIN_FAILURE event for repmgr applications. +# TEST +# TEST Run for btree only because access method shouldn't matter. + +proc repmgr023 { { niter 50 } { tnum 023 } args } { + + source ./include.tcl + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + # QNX does not support fork() in a multi-threaded environment. + if { $is_qnx_test } { + puts "Skipping repmgr$tnum on QNX." + return + } + + set method "btree" + set args [convert_args $method $args] + + puts "Repmgr$tnum ($method): Test of JOIN_FAILURE event." + repmgr023_sub $method $niter $tnum $args +} + +proc repmgr023_sub { method niter tnum largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + file mkdir [set dira $testdir/SITE_A] + file mkdir [set dirb $testdir/SITE_B] + file mkdir [set dirc $testdir/SITE_C] + foreach { porta portb portc } [available_ports 3] {} + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_buf [expr $pagesize * 2] + set log_max [expr $log_buf * 4] + + set cmda "berkdb_env_noerr -create -txn nosync \ + $verbargs $repmemargs -rep -thread \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_A \ + -home $dira" + set enva [eval $cmda] + $enva repmgr -timeout {connection_retry 5000000} \ + -local [list localhost $porta] -start master + + set cmdb "berkdb_env_noerr -create -txn nosync \ + $verbargs $repmemargs -rep -thread \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_B \ + -home $dirb" + set envb [eval $cmdb] + $envb repmgr -timeout {connection_retry 5000000} \ + -local [list localhost $portb] -start client \ + -remote [list localhost $porta] + puts "\tRepmgr$tnum.a: wait for client B to sync with master." + await_startup_done $envb + + set cmdc "berkdb_env_noerr -create -txn nosync \ + $verbargs $repmemargs -rep -thread \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_C \ + -home $dirc" + set envc [eval $cmdc] + $envc repmgr -timeout {connection_retry 5000000} \ + -local [list localhost $portc] -start client \ + -remote [list localhost $porta] + puts "\tRepmgr$tnum.b: wait for client C to sync with master." + await_startup_done $envc + + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $enva test force noarchive_timeout + + # Run rep_test in the master. + puts "\tRepmgr$tnum.c: Running rep_test in replicated env." + set start 0 + eval rep_test $method $enva NULL $niter $start 0 0 $largs + incr start $niter + + puts "\tRepmgr$tnum.d: Close client." + set last_client_log [get_logfile $envc last] + $envc close + + set stop 0 + while { $stop == 0 } { + # Run rep_test in the master. + puts "\tRepmgr$tnum.e: Running rep_test in replicated env." + eval rep_test $method $enva NULL $niter $start 0 0 $largs + incr start $niter + + puts "\tRepmgr$tnum.f: Run db_archive on master." + $enva log_flush + $enva test force noarchive_timeout + set res [eval exec $util_path/db_archive -d -h $dira] + set first_master_log [get_logfile $enva first] + if { $first_master_log > $last_client_log } { + set stop 1 + } + } + + puts "\tRepmgr$tnum.g: Restart client." + set envc [eval $cmdc -recover -event] + $envc rep_config {autoinit off} + $envc repmgr -timeout {connection_retry 5000000} \ + -local [list localhost $portc] -start client \ + -remote [list localhost $porta] + + # Since we've turned off auto-init, but are too far behind to sync, we + # expect a join_failure event. + # + await_condition {[expr [stat_field $envc rep_stat "Startup complete"] \ + || [is_event_present $envc join_failure]]} + + error_check_good failed [is_event_present $envc join_failure] 1 + + # Do a few more transactions at the master, and see that the client is + # still OK (i.e., simply that it's still accessible, that we can read + # data there), although of course it can't receive the new data. + # + puts "\tRepmgr$tnum.h: Put more new transactions, which won't\ + get to client C" + eval rep_test $method $enva NULL $niter $start 0 0 $largs + incr start $niter + + # Env a (the master) will match env b, but env c will not match. + rep_verify $dira $enva $dirb $envb 0 1 1 + rep_verify $dira $enva $dirc $envc 0 0 1 + + if {$databases_in_memory} { + set dbname { "" "test.db" } + } else { + set dbname "test.db" + } + set dbp [eval berkdb open \ + -env $envc [convert_method $method] $largs $dbname] + set dbc [$dbp cursor] + $dbc get -first + $dbc get -next + $dbc get -next + set result [$dbc get -next] + error_check_good got_data [llength $result] 1 + error_check_good got_data [llength [lindex $result 0]] 2 + $dbc close + $dbp close + + # Shut down the master, so as to force client B to take over. Since we + # didn't do any log archiving at B, client C should now be able to sync + # up again. + # + puts "\tRepmgr$tnum.i: Shut down master, client C should sync up." + $enva close + await_startup_done $envc 40 + + $envc close + $envb close + set test_be_quiet "" +} diff --git a/test/tcl/repmgr024.tcl b/test/tcl/repmgr024.tcl new file mode 100644 index 00000000..75e45f70 --- /dev/null +++ b/test/tcl/repmgr024.tcl @@ -0,0 +1,218 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr024 +# TEST Test of group-wide log archiving awareness. +# TEST Verify that log archiving will use the ack from the clients in +# TEST its decisions about what log files are allowed to be archived. +# +proc repmgr024 { { niter 50 } { tnum 024 } args } { + source ./include.tcl + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + # QNX does not support fork() in a multi-threaded environment. + if { $is_qnx_test } { + puts "Skipping repmgr$tnum on QNX." + return + } + + set method "btree" + set args [convert_args $method $args] + puts "Repmgr$tnum ($method): group wide log archiving." + repmgr024_sub $method $niter $tnum $args +} + +proc repmgr024_sub { method niter tnum largs } { + global testdir + global util_path + global databases_in_memory + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + file mkdir [set dira $testdir/SITE_A] + file mkdir [set dirb $testdir/SITE_B] + file mkdir [set dirc $testdir/SITE_C] + foreach { porta portb portc } [available_ports 3] {} + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_buf [expr $pagesize * 2] + set log_max [expr $log_buf * 4] + + set cmda "berkdb_env_noerr -create -txn nosync \ + $verbargs $repmemargs -rep -thread \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_A \ + -home $dira" + set enva [eval $cmda] + # Use quorum ack policy (default, therefore not specified) + # otherwise it will never wait when + # the client is closed and we want to give it a chance to + # wait later in the test. + $enva repmgr -timeout {connection_retry 5000000} \ + -local [list localhost $porta] -start master + + set cmdb "berkdb_env_noerr -create -txn nosync \ + $verbargs $repmemargs -rep -thread \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_B \ + -home $dirb" + set envb [eval $cmdb] + $envb repmgr -timeout {connection_retry 5000000} \ + -local [list localhost $portb] -start client \ + -remote [list localhost $porta] + puts "\tRepmgr$tnum.a: wait for client B to sync with master." + await_startup_done $envb + + set cmdc "berkdb_env_noerr -create -txn nosync \ + $verbargs $repmemargs -rep -thread \ + -log_buffer $log_buf -log_max $log_max -errpfx SITE_C \ + -home $dirc" + set envc [eval $cmdc] + $envc repmgr -timeout {connection_retry 5000000} \ + -local [list localhost $portc] -start client \ + -remote [list localhost $porta] + puts "\tRepmgr$tnum.b: wait for client C to sync with master." + await_startup_done $envc + + + # Clobber replication's 30-second anti-archive timer, which will have + # been started by client sync-up internal init, so that we can do a + # log_archive in a moment. + # + $enva test force noarchive_timeout + + set stop 0 + set start 0 + while { $stop == 0 } { + # Run rep_test in the master. + puts "\tRepmgr$tnum.c: Running rep_test in replicated env." + eval rep_test $method $enva NULL $niter $start 0 0 $largs + incr start $niter + + set res [eval exec $util_path/db_archive -h $dira] + if { [llength $res] != 0 } { + set stop 1 + } + } + # Save list of files for later. + set files_arch $res + + puts "\tRepmgr$tnum.d: Close client." + $envc close + + # Now that the client closed its connection, verify that + # we cannot archive files. + # + # When a connection is closed, repmgr updates the 30 second + # noarchive timestamp in order to give the client process a + # chance to restart and rejoin the group. We verify that + # when the connection is closed the master cannot archive. + # due to the 30-second timer. + # + set res [eval exec $util_path/db_archive -h $dira] + error_check_good files_archivable_closed [llength $res] 0 + + # + # Clobber the 30-second timer and verify we can again archive the + # files. + # + $enva test force noarchive_timeout + set res [eval exec $util_path/db_archive -h $dira] + error_check_good files_arch2 $files_arch $res + + set res [eval exec $util_path/db_archive -l -h $dirc] + set last_client_log [lindex [lsort $res] end] + + set stop 0 + while { $stop == 0 } { + # Run rep_test in the master. + puts "\tRepmgr$tnum.e: Running rep_test in replicated env." + eval rep_test $method $enva NULL $niter $start 0 0 $largs + incr start $niter + + # We use log_archive when we want to remove log files so + # that if we are running verbose, we get all of the output + # we might need. + # + # However, we can use db_archive for all of the other uses + # we need such as getting a list of what log files exist in + # the environment. + # + puts "\tRepmgr$tnum.f: Run log_archive on master." + set res [$enva log_archive -arch_remove] + set res [eval exec $util_path/db_archive -l -h $dira] + if { [lsearch -exact $res $last_client_log] == -1 } { + set stop 1 + } + } + + # + # Get the new last log file for client 1. + # + set res [eval exec $util_path/db_archive -l -h $dirb] + set last_client_log [lindex [lsort $res] end] + + # + # Set test hook to prevent client 1 from sending any ACKs, + # but remaining alive. + # + puts "\tRepmgr$tnum.g: Turn off acks via test hook on remaining client." + $envb test abort repmgr_perm + + # + # Advance logfiles again. + set stop 0 + while { $stop == 0 } { + # Run rep_test in the master. + puts "\tRepmgr$tnum.h: Running rep_test in replicated env." + eval rep_test $method $enva NULL $niter $start 0 0 $largs + incr start $niter + + puts "\tRepmgr$tnum.i: Run db_archive on master." + set res [eval exec $util_path/db_archive -l -h $dira] + set last_master_log [lindex [lsort $res] end] + if { $last_master_log != $last_client_log } { + set stop 1 + } + } + + puts "\tRepmgr$tnum.j: Try to archive. Verify it didn't." + set res [$enva log_archive -arch_remove] + set res [eval exec $util_path/db_archive -l -h $dira] + error_check_bad cl1_archive [lsearch -exact $res $last_client_log] -1 + # + # Turn off test hook preventing acks. Then run a perm operation + # so that the client can send its ack. + # + puts "\tRepmgr$tnum.k: Enable acks and archive again." + $envb test abort none + $enva txn_checkpoint -force + # + # Now archive again and make sure files were removed. + # + set res [$enva log_archive -arch_remove] + set res [eval exec $util_path/db_archive -l -h $dira] + error_check_good cl1_archive [lsearch -exact $res $last_client_log] -1 + + $enva close + $envb close +} diff --git a/test/tcl/repmgr025.tcl b/test/tcl/repmgr025.tcl new file mode 100644 index 00000000..eb3c11a1 --- /dev/null +++ b/test/tcl/repmgr025.tcl @@ -0,0 +1,179 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr025 +# TEST repmgr heartbeat rerequest test. +# TEST +# TEST Start an appointed master site and one client. Use a test hook +# TEST to inhibit PAGE_REQ processing at the master (i.e., "lose" some +# TEST messages). +# TEST Start a second client that gets stuck in internal init. Wait +# TEST long enough to rely on the heartbeat rerequest to request the +# TEST missing pages, rescind the test hook and verify that all +# TEST data appears on both clients. +# TEST +# TEST Run for btree only because access method shouldn't matter. +# TEST +proc repmgr025 { { niter 100 } { tnum "025" } args } { + + source ./include.tcl + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + # QNX does not support fork() in a multi-threaded environment. + if { $is_qnx_test } { + puts "Skipping repmgr$tnum on QNX." + return + } + + set method "btree" + set args [convert_args $method $args] + + puts "Repmgr$tnum ($method): repmgr heartbeat rerequest test." + repmgr025_sub $method $niter $tnum $args +} + +proc repmgr025_sub { method niter tnum largs } { + global testdir + global rep_verbose + global util_path + global verbose_type + set nsites 3 + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + set ports [available_ports $nsites] + set omethod [convert_method $method] + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + # Log size is small so we quickly create more than one. + # The documentation says that the log file must be at least + # four times the size of the in-memory log buffer. + set pagesize 4096 + append largs " -pagesize $pagesize " + set log_buf [expr $pagesize * 2] + set log_max [expr $log_buf * 4] + + # First just establish the group, because a new client can't join a + # group while the master is in the middle of a txn. + puts "\tRepmgr$tnum.a: Create a group of three." + set common "berkdb_env_noerr -create $verbargs \ + -txn -rep -thread -recover -log_buffer $log_buf -log_max $log_max" + set ma_envcmd "$common -errpfx MASTER -home $masterdir" + set cl_envcmd "$common -errpfx CLIENT -home $clientdir" + set cl2_envcmd "$common -errpfx CLIENT2 -home $clientdir2" + set masterenv [eval $ma_envcmd] + $masterenv repmgr -local [list localhost [lindex $ports 0]] \ + -start master + set clientenv [eval $cl_envcmd] + $clientenv repmgr -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] -start client + await_startup_done $clientenv + set clientenv2 [eval $cl2_envcmd] + $clientenv2 repmgr -local [list localhost [lindex $ports 2]] \ + -remote [list localhost [lindex $ports 0]] -start client + await_startup_done $clientenv2 + $clientenv close + $clientenv2 close + $masterenv close + + # Use different connection retry timeout values to handle any + # collisions from starting sites at the same time by retrying + # at different times. + + # Open a master. + puts "\tRepmgr$tnum.b: Start a master." + set masterenv [eval $ma_envcmd] + $masterenv repmgr -timeout {heartbeat_send 500000} + $masterenv repmgr -ack all \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master + + # Open first client + puts "\tRepmgr$tnum.c: Start first client." + set clientenv [eval $cl_envcmd] + $clientenv repmgr -timeout {heartbeat_monitor 1100000} + $clientenv repmgr -ack all \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -start client + await_startup_done $clientenv + + puts "\tRepmgr$tnum.d: Add some data to master and commit." + # Add enough data to move into a new log file, so that we can force an + # internal init when we restart client2 later. + set res [eval exec $util_path/db_archive -l -h $masterdir] + set log_end [lindex [lsort $res] end] + + set dbname test.db + set mdb [eval {berkdb_open_noerr -create $omethod -auto_commit \ + -env $masterenv} $largs {$dbname}] + set done false + set start 0 + $masterenv test force noarchive_timeout + while { !$done } { + eval rep_test $method $masterenv $mdb $niter $start 0 0 $largs + incr start $niter + $masterenv log_archive -arch_remove + set res [exec $util_path/db_archive -l -h $masterdir] + if { [lsearch -exact $res $log_end] == -1 } { + set done true + } + } + + puts "\tRepmgr$tnum.e: Inhibit PAGE_REQ processing at master." + $masterenv test abort no_pages + + # Open second client. The test hook will cause + # this client to be stuck in internal init until the updates + # are committed, so do not await_startup_done here. + puts "\tRepmgr$tnum.f: Start second client." + set clientenv2 [eval $cl2_envcmd] + $clientenv2 repmgr -timeout {heartbeat_monitor 1100000} + $clientenv2 repmgr -ack all \ + -timeout {connection_retry 5000000} \ + -local [list localhost [lindex $ports 2]] \ + -start client + + puts "\tRepmgr$tnum.g: Test for page requests from rerequest thread." + # Wait 5 seconds (significantly longer than heartbeat send time) to + # process all page requests resulting from master transactions. + set max_wait 5 + tclsleep $max_wait + set init_pagereq [stat_field $clientenv2 rep_stat "Pages requested"] + # Any further page requests can only be from the heartbeat rerequest + # because we processed all other lingering page requests above. + await_condition {[stat_field $clientenv2 rep_stat \ + "Pages requested"] > $init_pagereq} $max_wait + + puts "\tRepmgr$tnum.h: Rescind test hook, finish client startup." + $masterenv test abort none + await_startup_done $clientenv2 + + puts "\tRepmgr$tnum.i: Verifying client database contents." + rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1 + rep_verify $masterdir $masterenv $clientdir2 $clientenv2 1 1 1 + + error_check_good mdb_close [$mdb close] 0 + error_check_good client2_close [$clientenv2 close] 0 + error_check_good client_close [$clientenv close] 0 + error_check_good masterenv_close [$masterenv close] 0 +} diff --git a/test/tcl/repmgr026.tcl b/test/tcl/repmgr026.tcl new file mode 100644 index 00000000..b836230b --- /dev/null +++ b/test/tcl/repmgr026.tcl @@ -0,0 +1,243 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr026 +# TEST Test of "full election" timeouts. +# TEST 1. Cold boot with all sites present. +# TEST 2. Cold boot with some sites missing. +# TEST 3. Partial-participation election with one client having seen a master, +# TEST but another just starting up fresh. +# TEST 4. Partial participation, with all participants already having seen a +# TEST master. +# TEST + +proc repmgr026 { { tnum 026 } } { + source ./include.tcl + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + foreach use_leases {no yes} { + foreach client_down {no yes} { + puts "Repmgr$tnum: Full election test, \ + client_down: $client_down; leases: $use_leases" + repmgr026_sub $tnum $client_down $use_leases + } + } +} + +proc repmgr026_sub { tnum client_down use_leases } { + global testdir + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + file mkdir [set dira $testdir/SITE_A] + file mkdir [set dirb $testdir/SITE_B] + file mkdir [set dirc $testdir/SITE_C] + file mkdir [set dird $testdir/SITE_D] + file mkdir [set dire $testdir/SITE_E] + foreach { porta portb portc portd porte } [available_ports 5] {} + + # First, just create/establish the group. + puts -nonewline "Repmgr$tnum: Create a group of 5 sites: " + set common "-create -txn $verbargs $repmemargs \ + -rep -thread -event" + if { $use_leases } { + append common " -rep_lease {[list 3000000]} " + } + set cmda "berkdb_env_noerr $common -errpfx SITE_A -home $dira" + set cmdb "berkdb_env_noerr $common -errpfx SITE_B -home $dirb" + set cmdc "berkdb_env_noerr $common -errpfx SITE_C -home $dirc" + set cmdd "berkdb_env_noerr $common -errpfx SITE_D -home $dird" + set cmde "berkdb_env_noerr $common -errpfx SITE_E -home $dire" + set common_mgr " -start elect \ + -timeout {connection_retry 5000000} \ + -timeout {election_retry 2000000} \ + -timeout {full_election 60000000} \ + -timeout {election 5000000} -timeout {ack 3000000}" + set enva [eval $cmda] + eval $enva repmgr $common_mgr \ + -local {[list localhost $porta creator]} + puts -nonewline "." ; flush stdout + set envb [eval $cmdb] + eval $envb repmgr $common_mgr \ + -local {[list localhost $portb]} -remote {[list localhost $porta]} + await_startup_done $envb + puts -nonewline "." ; flush stdout + set envc [eval $cmdc] + eval $envc repmgr $common_mgr \ + -local {[list localhost $portc]} -remote {[list localhost $porta]} + await_startup_done $envc + puts -nonewline "." ; flush stdout + set envd [eval $cmdd] + eval $envd repmgr $common_mgr \ + -local {[list localhost $portd]} -remote {[list localhost $porta]} + await_startup_done $envd + puts -nonewline "." ; flush stdout + set enve [eval $cmde] + eval $enve repmgr $common_mgr \ + -local {[list localhost $porte]} -remote {[list localhost $porta]} + await_startup_done $enve + puts "." + $enve close + $envd close + $envc close + $envb close + $enva close + + # Cold boot the group (with or without site E), giving site A a + # high priority. + # + + # The wait_limit's are intended to be an amount that is way more than + # the expected timeout, used for nothing more than preventing the test + # from hanging forever. The leeway amount should be enough less than + # the timeout to allow for any imprecision introduced by the test + # mechanism. + # + set elect_wait_limit 25 + set full_secs_leeway 59 + set full_wait_limit 85 + + puts "\tRepmgr$tnum.a: Start first four sites." + set enva [eval $cmda] + eval $enva repmgr $common_mgr -pri 200 -local {[list localhost $porta]} + + set envb [eval $cmdb] + eval $envb repmgr $common_mgr -pri 100 -local {[list localhost $portb]} + + set envc [eval $cmdc] + eval $envc repmgr $common_mgr -pri 90 -local {[list localhost $portc]} + + set envd [eval $cmdd] + eval $envd repmgr $common_mgr -pri 80 -local {[list localhost $portd]} + + if { $client_down } { + set enve NONE + } else { + puts "\tRepmgr$tnum.b: Start fifth site." + set enve [eval $cmde] + eval $enve repmgr $common_mgr -pri 50 \ + -local {[list localhost $porte]} + } + + # wait for results, and make sure they're correct + # + set envlist [list $enva $envb $envc $envd] + if { $enve != "NONE" } { + lappend envlist $enve + } + set limit $full_wait_limit + puts "\tRepmgr$tnum.c: wait (up to $limit seconds) for first election." + set t [repmgr026_await_election_result $envlist $limit] + if { $client_down } { + error_check_good slow_election [expr $t > $full_secs_leeway] 1 + } else { + # When all sites participate, the election should finish in way + # less than 60 seconds. + # + error_check_good timely_election [expr $t < $full_secs_leeway] 1 + } + puts "\tRepmgr$tnum.d: first election completed in $t seconds" + + puts "\tRepmgr$tnum.e: wait for start-up done" + $enva event_info -clear + await_startup_done $envb + $envb event_info -clear + await_startup_done $envc + $envc event_info -clear + await_startup_done $envd + $envd event_info -clear + if { $enve != "NONE" } { + await_startup_done $enve + $enve event_info -clear + } + + # Shut down site A, in order to test elections with less than the whole + # group voting. However, normally repmgr's reaction to losing master + # connection is to try a "fast election" (the n-1 trick). So we must do + # something to mitigate that (see below). + # + puts "\tRepmgr$tnum.f: shut down master site A" + if { $client_down } { + # The fifth site is already down, so now we'll have just B, C, + # and D running. Therefore, even with repmgr pulling its "fast + # election" (n-1) trick, we don't have enough votes for a + # full-participation short circuit; so this is a valid test of + # the "normal" election timeout. + # + $enva close + } else { + # Here all sites are running, so if we just killed the master + # repmgr would invoke its "fast election" trick, resulting in no + # timeout. Since the purpose of this test is to ensure the + # correct use of timeouts, that's no good. Instead, let's first + # kill one more other site. + $enve close + $enva close + } + + # wait for results, and check them + # + set envlist [list $envb $envc $envd] + set limit $elect_wait_limit + puts "\tRepmgr$tnum.h: wait (up to $limit seconds) for second election." + set t [repmgr026_await_election_result $envlist $limit] + error_check_good normal_election [expr $t < $full_secs_leeway] 1 + puts "\tRepmgr$tnum.i: second election completed in $t seconds" + + $envd close + $envc close + $envb close +} + +# Wait (a limited amount of time) for the election to finish. The first env +# handle in the list is the expected winner, and the others are the remaining +# clients. Returns the approximate amount of time (in seconds) that the +# election took. +# +proc repmgr026_await_election_result { envlist limit } { + set begin [clock seconds] + set deadline [expr $begin + $limit] + while { true } { + set t [clock seconds] + if { $t > $deadline } { + error "FAIL: time limit exceeded" + } + + if { [repmgr026_is_ready $envlist] } { + return [expr $t - $begin] + } + + tclsleep 1 + } +} + +proc repmgr026_is_ready { envlist } { + set winner [lindex $envlist 0] + if {![is_elected $winner]} { + return false + } + + foreach client [lrange $envlist 1 end] { + if {![is_event_present $client newmaster]} { + return false + } + } + return true +} diff --git a/test/tcl/repmgr027.tcl b/test/tcl/repmgr027.tcl new file mode 100644 index 00000000..a9841bf6 --- /dev/null +++ b/test/tcl/repmgr027.tcl @@ -0,0 +1,125 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr027 +# TEST Test of "full election" timeouts, where a client starts up and joins the +# TEST group during the middle of an election. +# TEST + +proc repmgr027 { { tnum 027 } } { + source ./include.tcl + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + puts -nonewline "Repmgr$tnum: Full election test," + puts " with client joining halfway through election" + repmgr027_sub $tnum +} + +proc repmgr027_sub { tnum } { + global testdir + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + file mkdir [set dira $testdir/SITE_A] + file mkdir [set dirb $testdir/SITE_B] + file mkdir [set dirc $testdir/SITE_C] + foreach { porta portb portc } [available_ports 3] {} + + # The election times are arbitrary, but the full election timeout should + # be long enough to allow the test to start two sites, wait for them to + # be in an election, and then have the third site start and join + # (including the leeway time, in seconds), before it times out. + # + set common "-create -txn $verbargs $repmemargs \ + -rep -thread -event -recover" + set common_mgr "-start elect \ + -timeout {connection_retry 5000000} \ + -timeout {election_retry 2000000} \ + -timeout {full_election 180000000} \ + -timeout {election 5000000}" + set leeway 5 + + # Start by simply establishing the group. Then we can shut down and get + # started with the interesting cold-boot scenarios. + set cmda "berkdb_env_noerr $common -errpfx SITE_A -home $dira" + set cmdb "berkdb_env_noerr $common -errpfx SITE_B -home $dirb" + set cmdc "berkdb_env_noerr $common -errpfx SITE_C -home $dirc" + set enva [eval $cmda] + eval $enva repmgr $common_mgr -local {[list localhost $porta creator]} + set envb [eval $cmdb] + eval $envb repmgr $common_mgr \ + -local {[list localhost $portb]} -remote {[list localhost $porta]} + await_startup_done $envb + set envc [eval $cmdc] + eval $envc repmgr $common_mgr \ + -local {[list localhost $portc]} -remote {[list localhost $porta]} + await_startup_done $envc + $envc close + $envb close + $enva close + + # Cold boot, at first just 2 sites. + # + puts "\tRepmgr$tnum.a: Start first two sites." + set enva [eval $cmda] + eval $enva repmgr $common_mgr -pri 200 -local {[list localhost $porta]} + + set envb [eval $cmdb] + eval $envb repmgr $common_mgr -pri 100 -local {[list localhost $portb]} + + # Wait until both sites recognize that they're in an election, plus a + # few extra seconds just for good measure. + # + await_condition {[expr \ + [stat_field $enva rep_stat "Election phase"] == 1 && \ + [stat_field $envb rep_stat "Election phase"] == 1]} + tclsleep $leeway + + # At this point we should not have completed an election yet, even + # though we have a majority, because we don't have full participation. + # + error_check_bad site_a_elected [is_elected $enva] 1 + error_check_bad site_b_elected [is_elected $envb] 1 + + puts "\tRepmgr$tnum.c: Start 3rd site." + + set envc [eval $cmdc] + eval $envc repmgr $common_mgr -pri 100 -local {[list localhost $portc]} + + # Wait for results, and make sure they're correct. The election should + # complete right away, once the third client has joined, regardless of + # the election timeout values. We wait an arbitrary maximum of 60 + # seconds, merely so that the test doesn't hang forever if something + # goes horribly wrong. + # + set envlist [list $enva $envb $envc] + set limit 60 + puts "\tRepmgr$tnum.c: wait (up to $limit seconds) for election." + set t [repmgr026_await_election_result $envlist $limit] + error_check_good timely_election [expr $t < 2 * $leeway] 1 + puts "\tRepmgr$tnum.d: first election completed in $t seconds" + + puts "\tRepmgr$tnum.e: wait for start-up done" + await_startup_done $envb + await_startup_done $envc + + $envb close + $envc close + $enva close +} diff --git a/test/tcl/repmgr028.tcl b/test/tcl/repmgr028.tcl new file mode 100644 index 00000000..97bb4fe7 --- /dev/null +++ b/test/tcl/repmgr028.tcl @@ -0,0 +1,286 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr028 +# TEST Repmgr allows applications to choose master explicitly, instead of +# TEST relying on elections. + +proc repmgr028 { { tnum 028 } } { + source ./include.tcl + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + # QNX does not support fork() in a multi-threaded environment. + if { $is_qnx_test } { + puts "Skipping repmgr$tnum on QNX." + return + } + + puts "Repmgr$tnum: Repmgr applications may choose master explicitly" + repmgr028_sub $tnum +} + +proc repmgr028_sub { tnum } { + global testdir + global tclsh_path + global test_path + global repfiles_in_memory + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + file mkdir [set dira $testdir/SITE_A] + file mkdir [set dirb $testdir/SITE_B] + foreach { porta portb } [available_ports 2] {} + + set common "-create -txn $verbargs $repmemargs \ + -rep -thread -event" + set common_mgr "-msgth 2 -timeout {connection_retry 3000000} \ + -timeout {election_retry 3000000}" + set cmda "berkdb_env_noerr $common -errpfx SITE_A -home $dira" + set cmdb "berkdb_env_noerr $common -errpfx SITE_B -home $dirb" + set enva [eval $cmda] + eval $enva repmgr -local {[list localhost $porta]} -start master + set envb [eval $cmdb] + eval $envb repmgr -start client \ + -local {[list localhost $portb]} -remote {[list localhost $porta]} + await_startup_done $envb + $envb close + $enva close + + # Create a replication group of 2 sites, configured not to use + # elections. Even with this configuration, an "initial" election is + # allowed, so try that to make sure it works. + # + puts "\tRepmgr$tnum.a: Start two sites." + set enva [eval $cmda -recover] + $enva rep_config {mgrelections off} + eval $enva repmgr $common_mgr \ + -local {[list localhost $porta]} -start elect -pri 100 + + set envb [eval $cmdb -recover] + $envb rep_config {mgrelections off} + eval $envb repmgr $common_mgr -start elect -pri 99 \ + -local {[list localhost $portb]} + await_startup_done $envb + + puts "\tRepmgr$tnum.b: Switch roles explicitly." + $enva repmgr -start client -msgth 0 + $envb repmgr -start master -msgth 0 + await_startup_done $enva + + # Check that "-start elect" is forbidden when called as a dynamic + # change, at either master or client. + # + error_check_bad disallow_elect_restart \ + [catch {$enva repmgr -start elect -msgth 0}] 0 + error_check_bad disallow_elect_restart \ + [catch {$envb repmgr -start elect -msgth 0}] 0 + + # Kill master, and observe that client does not react by starting an + # election. Before doing so, reset client's stats, so that the later + # comparison against 0 makes sense. + # + puts "\tRepmgr$tnum.c: Kill master" + $enva rep_stat -clear + $envb close + + # The choice of 5 seconds is arbitrary, not related to any configured + # timeouts, and is simply intended to allow repmgr's threads time to + # react. We presume the system running this test isn't so horribly + # overloaded that repmgr's threads can't get scheduled for that long. + # + puts "\tRepmgr$tnum.d: Pause 5 seconds to observe (lack of) reaction." + tclsleep 5 + + error_check_good event \ + [is_event_present $enva master_failure] 1 + + error_check_good no_election_in_progress \ + [stat_field $enva rep_stat "Election phase"] 0 + error_check_good no_elections_held \ + [stat_field $enva rep_stat "Elections held"] 0 + + # bring master back up, wait for client to get start up done (clear + # event first). make client a master, and observe dupmaster event. + # check that both are then client role. and again no election! + # + puts "\tRepmgr$tnum.e: Restart master (wait for client to sync)." + set orig_gen [stat_field $enva rep_stat "Generation number"] + $enva event_info -clear + set envb [eval $cmdb -recover] + $envb rep_config {mgrelections off} + eval $envb repmgr $common_mgr -start master \ + -local {[list localhost $portb]} + + # Force a checkpoint so that the client hears something from the master, + # which should cause the client to notice the gen number change. Try a + # few times, in case we're not quite completely connected at first. + # + $envb event_info -clear + $envb repmgr -ack all + set tried 0 + set done false + while {$tried < 10 && !$done} { + tclsleep 1 + $envb txn_checkpoint -force + if {![is_event_present $envb perm_failed]} { + set done true + } + incr tried + $envb event_info -clear + } + + await_condition {[stat_field $enva rep_stat "Generation number"] \ + > $orig_gen} + await_startup_done $enva + + puts "\tRepmgr$tnum.f: Set master at other site, leading to dupmaster." + $enva repmgr -start master -msgth 0 + tclsleep 5 + error_check_good dupmaster_event \ + [is_event_present $envb dupmaster] 1 + error_check_good dupmaster_event2 \ + [is_event_present $enva dupmaster] 1 + error_check_good role \ + [stat_field $enva rep_stat "Role"] "client" + error_check_good role2 \ + [stat_field $envb rep_stat "Role"] "client" + error_check_good no_election_in_progress2 \ + [stat_field $enva rep_stat "Election phase"] 0 + error_check_good no_elections_held2 \ + [stat_field $enva rep_stat "Elections held"] 0 + + # Turn on elections mode at just one of the sites. This should cause + # the site to initiate an election, since it currently lacks a master. + # If a (strict) election can succeed, this also tests the rule that the + # other site accepts an invitation to an election even when it is not in + # elections mode itself. + # + puts "\tRepmgr$tnum.g: Turn on elections mode dynamically." + $envb event_info -clear + $enva rep_config {mgr2sitestrict on} + $enva rep_config {mgrelections on} + await_condition {[is_elected $envb] || \ + [is_event_present $envb newmaster]} + error_check_good elections_held \ + [expr [stat_field $enva rep_stat "Elections held"] > 0] 1 + error_check_good elections_held2 \ + [expr [stat_field $envb rep_stat "Elections held"] > 0] 1 + + # Make sure that changing the "elections" config is not allowed in a + # subordinate replication process: + # + set resultfile "$testdir/repmgr028script.log" + exec $tclsh_path $test_path/wrap.tcl repmgr028script.tcl $resultfile + set file [open $resultfile r] + set result [read -nonewline $file] + close $file + error_check_good subprocess_script_result $result "OK" + + $enva close + $envb close + + # Try a traditional set-up, and verify that dynamic role change is + # forbidden. + # + puts "\tRepmgr$tnum.h: Start up again, elections on by default." + set enva [eval $cmda -recover] + eval $enva repmgr $common_mgr \ + -local {[list localhost $porta]} -start master + set envb [eval $cmdb -recover] + eval $envb repmgr $common_mgr -start client \ + -local {[list localhost $portb]} + await_startup_done $envb + + puts "\tRepmgr$tnum.i: Check that dynamic role change attempt fails." + error_check_bad disallow_role_chg \ + [catch {$enva repmgr -start client -msgth 0}] 0 + error_check_bad disallow_role_chg_b \ + [catch {$envb repmgr -start master -msgth 0}] 0 + + # Close master, observe that client tries an election, and gets the + # event info. + # + $envb rep_config {mgr2sitestrict on} + error_check_bad event2 \ + [is_event_present $envb master_failure] 1 + $enva close + tclsleep 5 + error_check_good event3 \ + [is_event_present $envb master_failure] 1 + + error_check_good election \ + [expr [stat_field $envb rep_stat "Election phase"] != 0 || \ + [stat_field $envb rep_stat "Elections held"] > 0] 1 + + await_condition {[is_event_present $envb election_failed]} + $envb close + + # Check that "client" start policy suppresses elections, even if + # elections mode has *NOT* been turned off for the general case. (This + # is old existing behavior which previously lacked a test.) + # + set enva [eval $cmda -recover] + eval $enva repmgr $common_mgr \ + -local {[list localhost $porta]} -start client + set envb [eval $cmdb -recover] + eval $envb repmgr $common_mgr -start client \ + -local {[list localhost $portb]} + puts "\tRepmgr$tnum.j: Pause 10 seconds, check no election held." + tclsleep 10 + error_check_good no_election \ + [expr [stat_field $enva rep_stat "Election phase"] == 0 && \ + [stat_field $enva rep_stat "Elections held"] == 0] 1 + error_check_good no_election2 \ + [expr [stat_field $envb rep_stat "Election phase"] == 0 && \ + [stat_field $envb rep_stat "Elections held"] == 0] 1 + $enva close + $envb close + + # Check that "election" start policy starts an election when + # a site that was previously a client starts up without recovery + # and without finding a master. This is another general test case + # where elections mode is *NOT* turned off. + # + puts "\tRepmgr$tnum.k: Test election start policy on client startup." + set enva [eval $cmda -recover] + eval $enva repmgr $common_mgr \ + -local {[list localhost $porta]} -start master + $enva rep_config {mgr2sitestrict on} + set envb [eval $cmdb -recover] + eval $envb repmgr $common_mgr -start client \ + -local {[list localhost $portb]} -remote {[list localhost $porta]} + await_startup_done $envb + $envb rep_config {mgr2sitestrict on} + $envb close + $enva close + + # Restart previous client with election start policy. The + # 2site_strict setting will cause the election to fail, but we + # only care that the election was initiated. + # + set envb [eval $cmdb] + eval $envb repmgr $common_mgr -start elect \ + -local {[list localhost $portb]} -remote {[list localhost $porta]} + puts "\tRepmgr$tnum.l: Pause 5 seconds, check election was attempted." + tclsleep 5 + error_check_good startup_election \ + [expr [stat_field $envb rep_stat "Election phase"] != 0 || \ + [stat_field $envb rep_stat "Elections held"] > 0] 1 + await_condition {[is_event_present $envb election_failed]} + $envb close +} diff --git a/test/tcl/repmgr028script.tcl b/test/tcl/repmgr028script.tcl new file mode 100644 index 00000000..1cfb0503 --- /dev/null +++ b/test/tcl/repmgr028script.tcl @@ -0,0 +1,18 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# Repmgr028 script - subordinate repmgr processes and dynamic role changes + +source ./include.tcl +source $test_path/test.tcl + +# Make sure a subordinate process is not allowed to set the ELECTIONS config. +# +set dbenv [berkdb_env_noerr -thread -home $testdir/SITE_A -txn -rep] +$dbenv repmgr -start elect +set ret [catch {$dbenv rep_config {mgrelections off}} result] +error_check_bad role_chg_attempt $ret 0 +$dbenv close + +puts "OK" diff --git a/test/tcl/repmgr029.tcl b/test/tcl/repmgr029.tcl new file mode 100644 index 00000000..241d81a6 --- /dev/null +++ b/test/tcl/repmgr029.tcl @@ -0,0 +1,1747 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr029 +# TEST Test repmgr group membership: create, join, re-join and remove from +# TEST repmgr group and observe changes in group membership database. +# TEST +proc repmgr029 { } { + puts "Repmgr029: Repmgr Group Membership operations." + z1 + z2 + z3 + z4 + z5 + z6 + z7 + z8 + z9 + z10 + z11 + z12 + z13 + z14 + z15 + z16 + z17 + z18 + z19 +} + +# See that a joining site that names a non-master as helper gets a +# "forward" response, and manages to then get to the true master. +# +# Note: there's a bit of a race here, depending on the condition of +# site B at the time C tries to join. That should eventually be +# tightened up. +proc z3 {} { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {port0 port1 port2 port3 port4 port5} [available_ports 6] {} + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + set clientdir3 $testdir/CLIENTDIR3 + set clientdir4 $testdir/CLIENTDIR4 + set clientdir5 $testdir/CLIENTDIR5 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + file mkdir $clientdir3 + file mkdir $clientdir4 + file mkdir $clientdir5 + + puts "\tRepmgr029.z3.a: Primordial creation, Start Master site 0" + set env1 [berkdb env -create -errpfx MASTER -home $masterdir -txn \ + -rep -thread -recover -verbose [list rep $rv]] + $env1 repmgr -local [list localhost $port0] -start master + error_check_good nsites_A [$env1 rep_get_nsites] 1 + + puts "\tRepmgr029.z3.b: Simple join request, \ + client 1 points directly at master" + set env2 [berkdb env -create -errpfx CLIENT -home $clientdir -txn \ + -rep -thread -recover -verbose [list rep $rv]] + $env2 rep_config {mgr2sitestrict on} + $env2 repmgr -local [list localhost $port1] \ + -remote [list localhost $port0] -start client + await_startup_done $env2 + error_check_good nsites_A2 [$env1 rep_get_nsites] 2 + error_check_good nsites_B2 [$env2 rep_get_nsites] 2 + + puts "\tRepmgr029.z3.c: Join request forwarding, start client 2." + set env3 [berkdb env -create -errpfx CLIENT2 -home $clientdir2 -txn \ + -rep -thread -recover -verbose [list rep $rv]] + $env3 rep_config {mgr2sitestrict on} + $env3 repmgr -local [list localhost $port2] \ + -remote [list localhost $port1] + set done no + while {!$done} { + if {[catch {$env3 repmgr -start client} msg]} { + puts $msg + tclsleep 1 + } else { + set done yes + } + } + await_startup_done $env3 + error_check_good nsites_A3 [$env1 rep_get_nsites] 3 + error_check_good nsites_B3 [$env2 rep_get_nsites] 3 + error_check_good nsites_C3 [$env3 rep_get_nsites] 3 + + puts "\tRepmgr029.z3.d: Master cannot be removed \ + (by itself, or as requested from a client)" + set ret [catch {$env1 repmgr -remove [list localhost $port0]} result] + error_check_bad no_failure $ret 0 + error_check_match unavail $result "*DB_REP_UNAVAIL*" + set ret [catch {$env2 repmgr -remove [list localhost $port0]} result] + error_check_bad no_failure2 $ret 0 + error_check_match unavail2 $result "*DB_REP_UNAVAIL*" + + set db [berkdb open -env $env1 -thread __db.rep.system __db.membership] + + puts "\tRepmgr029.z3.e: Join request rejected for lack of acks" + puts "\t\tRepmgr029.z3.e.1: Close client 1 and 2." + error_check_good s_3_close [$env3 close] 0 + error_check_good s_2_close [$env2 close] 0 + + puts "\t\tRepmgr029.z3.e.2: Start client 3." + set env4 [berkdb env -create -errpfx CLIENT3 -home $clientdir3 \ + -txn -rep -thread -recover -verbose [list rep $rv]] + set ret [catch {$env4 repmgr -local [list localhost $port3] \ + -remote [list localhost $port0] -start client} result] + error_check_bad no_failure3 $ret 0 + error_check_match unavail3 $result "*DB_REP_UNAVAIL*" + + set prev_vers [repmgr029_gmdb_version $db] + puts "\t\tRepmgr029.z3.e.3: Check previous GMDB version $prev_vers" + set SITE_ADDING 1 + set SITE_PRESENT 4 + error_check_good site_3_adding [repmgr029_gmdb_status $db localhost $port3] \ + $SITE_ADDING + + puts "\t\tRepmgr029.z3.e.4: limbo resolution, restart client 1." + set env2 [berkdb env -create -errpfx CLIENT -home $clientdir -txn \ + -rep -thread -recover -verbose [list rep $rv] -event] + # no helper should be needed this time. + $env2 repmgr -local [list localhost $port1] -start client + await_startup_done $env2 50 + + puts "\t\tRepmgr029.z3.e.5: normal txn at master" + set niter 1 + rep_test btree $env1 NULL $niter 0 0 0 + set new_vers [repmgr029_gmdb_version $db] + puts "\t\tRepmgr029.z3.e.6: NEW GMDB version $new_vers" + error_check_good version_incr $new_vers [expr $prev_vers + 1] + error_check_good site_3_added [repmgr029_gmdb_status $db localhost $port3] \ + $SITE_PRESENT + + puts "\t\tRepmgr029.z3.e.7: client 3 rejoins." + $env4 repmgr -start client + + await_startup_done $env4 60 + + # To verify that the GMDB has been updated on client side. + puts "\t\tRepmgr029.z3.e.8: Verify the GMDB on the client 3." + set db3 [berkdb open -env $env4 -thread __db.rep.system __db.membership] + error_check_good vers [repmgr029_gmdb_version $db3] $new_vers + $db3 close + + # This test case verify a scenario where (1) try another (different) + # join request, still with insufficient acks, and see that it doesn't + # load up another limbo; and then (2) with acks working, + # a second request finishes off the first and then succeeds. + # I guess we also need to try simply retrying the first addition. + puts "\tRepmgr029.z3.f: Join request rejected for lack of acks" + puts "\t\tRepmgr029.z3.f.1: Close client 1." + error_check_good s_1_close [$env2 close] 0 + + set prev_vers [repmgr029_gmdb_version $db] + puts "\t\tRepmgr029.z3.f.2: Check current GMDB version $prev_vers" + + puts "\t\tRepmgr029.z3.f.3: Start client 4." + set env5 [berkdb env -create -errpfx CLIENT4 -home $clientdir4 \ + -txn -rep -thread -recover -verbose [list rep $rv]] + set ret [catch {$env5 repmgr -local [list localhost $port4] \ + -remote [list localhost $port0] -start client} result] + error_check_bad no_failure4 $ret 0 + error_check_match unavail4 $result "*DB_REP_UNAVAIL*" + + set prev_vers [repmgr029_gmdb_version $db] + puts "\t\tRepmgr029.z3.f.4: Check current GMDB version $prev_vers" + error_check_good site_4_adding [repmgr029_gmdb_status $db localhost $port4] \ + $SITE_ADDING + + puts "\t\tRepmgr029.z3.f.5: Start client 5." + set env6 [berkdb env -create -errpfx CLIENT5 -home $clientdir5 \ + -txn -rep -thread -recover -verbose [list rep $rv]] + set ret [catch {$env6 repmgr -local [list localhost $port5] \ + -remote [list localhost $port0] -start client} result] + error_check_bad no_failure5 $ret 0 + error_check_match unavail5 $result "*DB_REP_UNAVAIL*" + + set prev_vers [repmgr029_gmdb_version $db] + puts "\t\tRepmgr029.z3.f.6: Check current GMDB version $prev_vers" + # [M]: There is no gm status for client 5 so far. Let alone the "ADDING". + #error_check_good site_5_adding [repmgr029_gmdb_status $db localhost $port5] \ + # $SITE_ADDING + + puts "\t\tRepmgr029.z3.f.7: limbo resolution, restart client 1." + set env2 [berkdb env -create -errpfx CLIENT -home $clientdir -txn \ + -rep -thread -recover -verbose [list rep $rv] -event] + # no helper should be needed this time. + $env2 repmgr -local [list localhost $port1] -start client + await_startup_done $env2 50 + puts "\t\tRepmgr029.z3.f.8: normal txn at master" + set niter 1 + rep_test btree $env1 NULL $niter 0 0 0 + + set new_vers [repmgr029_gmdb_version $db] + puts "\t\tRepmgr029.z3.f.9: NEW GMDB version $new_vers" + error_check_good version_incr $new_vers [expr $prev_vers + 1] + + puts "\t\tRepmgr029.z3.f.10: client 5 rejoins." + $env6 repmgr -start client + await_startup_done $env6 60 + + set new_vers [repmgr029_gmdb_version $db] + puts "\t\tRepmgr029.z3.f.11: NEW GMDB version $new_vers" + # Check for client 5, which has gm status as "ADDED" + error_check_good site_5_added [repmgr029_gmdb_status $db localhost $port5] \ + $SITE_PRESENT + #[M]: So far gm status for client 4 is "ADDED" + error_check_good site_4_added [repmgr029_gmdb_status $db localhost $port4] \ + $SITE_PRESENT + #[M]: We'd like to check the gm status on the client 4 sides. + # No Way! as client 4 has not been start up and sync. + # puts "\t\tRepmgr029.z3.e.8: Verify the GMDB on the client 4." + # set db4 [berkdb open -env $env5 -thread __db.rep.system __db.membership] + # error_check_good vers [repmgr029_gmdb_version $db4] $new_vers + + puts "\t\tRepmgr029.z3.f.12: client 4 rejoins." + $env5 repmgr -start client + await_startup_done $env5 100 + + set new_vers [repmgr029_gmdb_version $db] + puts "\t\tRepmgr029.z3.f.13: NEW GMDB version $new_vers" + + puts "\tRepmgr029.z3.h: Remove (downed) client 3, from master" + $env1 repmgr -remove [list localhost $port3] + error_check_good site_3_removed [repmgr029_gmdb_status $db localhost $port3] 0 + error_check_good db_close [$db close] 0 + error_check_good s_1_close [$env2 close] 0 + error_check_good s_3_close [$env4 close] 0 + error_check_good s_4_close [$env5 close] 0 + error_check_good s_5_close [$env6 close] 0 + error_check_good s_0_close [$env1 close] 0 + puts "\tRepmgr029.z3.i: End OF Repmgr029" +} + +# Remove a live site from a group, and see that the site gets a +# LOCAL_SITE_REMOVED event, and the other sites get SITE_REMOVED. +# +proc z6 { } { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {portA portB portC} [available_ports 3] {} + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + + puts -nonewline "\tRepmgr029.z6.a: Build basic 3-site group" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -verbose [list rep $rv] -event] + $envA repmgr -local [list localhost $portA creator] -start elect + error_check_good nsites_a [$envA rep_get_nsites] 1 + puts -nonewline "."; flush stdout + + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -verbose [list rep $rv] -event] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start elect + await_startup_done $envB + error_check_good nsites_b [$envB rep_get_nsites] 2 + puts -nonewline "."; flush stdout + + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -verbose [list rep $rv] -event] + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start elect + await_startup_done $envC + error_check_good nsites_c [$envC rep_get_nsites] 3 + puts "."; flush stdout + + set eid_C_at_A [repmgr029_get_eid $envA $portC] + set eid_C_at_B [repmgr029_get_eid $envB $portC] + + puts "\tRepmgr029.z6.b: Remove (live) site C from a request originating at B." + $envB repmgr -remove [list localhost $portC] + set db [berkdb open -env $envA -thread __db.rep.system __db.membership] + error_check_good site_c_removed [repmgr029_gmdb_status $db localhost $portC] 0 + + set master_ev [find_event [$envA event_info] site_removed] + error_check_good site_a_event [llength $master_ev] 2 + error_check_good site_a_event_eid [lindex $master_ev 1] $eid_C_at_A + error_check_good site_a_list [llength [repmgr029_get_eid $envA $portC]] 0 + + await_event $envC local_site_removed + error_check_good s_c_close [$envC close] 0 + + await_condition {[expr [string length [repmgr029_site_list_status $envB $portC]] == 0]} + set b_ev [find_event [$envB event_info] site_removed] + error_check_good site_b_event [llength $b_ev] 2 + error_check_good site_b_event_eid [lindex $b_ev 1] $eid_C_at_B + error_check_good site_b_list [llength [repmgr029_get_eid $envB $portC]] 0 + error_check_good s_b_close [$envB close] 0 + $db close + error_check_good s_a_close [$envA close] 0 +} + +# See that SITE_ADDED events are fired appropriately. +proc z8 { } { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {portA portB portC} [available_ports 3] {} + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + + puts "\tRepmgr029.z8: Create primordial site" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -verbose [list rep $rv] -event] + $envA repmgr -local [list localhost $portA creator] -start elect + + puts "\tRepmgr029.z8: Add client, check for event at master" + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -verbose [list rep $rv] -event] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start elect + set ev [find_event [$envA event_info] site_added] + error_check_good ev_a [llength $ev] 2 + set eid [lindex $ev 1] + error_check_good ev_a_eid $eid [repmgr029_get_eid $envA $portB] + await_startup_done $envB + + puts "\tRepmgr029.z8: Add another client, check for events at both other sites" + $envA event_info -clear + $envB event_info -clear + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -verbose [list rep $rv] -event] + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start elect + + set ev [find_event [$envA event_info] site_added] + error_check_good ev_a2 [llength $ev] 2 + set eid [lindex $ev 1] + error_check_good ev_a_eid2 $eid [repmgr029_get_eid $envA $portC] + + await_event $envB site_added + set ev [find_event [$envB event_info] site_added] + error_check_good ev_b [llength $ev] 2 + set eid [lindex $ev 1] + error_check_good ev_b_eid $eid [repmgr029_get_eid $envB $portC] + + $envC close + $envB close + $envA close +} + +# Remove a site, starting at the site to be removed. See that we at least shut +# down threads (if not also fire event in this case). +# +proc z7 { } { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {portA portB portC} [available_ports 3] {} + + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + + puts -nonewline "\tRepmgr029.z7: Set up a group of 3, A (master), B, C" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envA repmgr -local [list localhost $portA] -start master + puts -nonewline "." ; flush stdout + + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start client + await_startup_done $envB + puts -nonewline "." ; flush stdout + + set eid_B_at_A [repmgr029_get_eid $envA $portB] + + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start client + await_startup_done $envC + puts "." + + puts "\tRepmgr029.z7: Remove site B itself" + $envB repmgr -remove [list localhost $portB] + await_event $envB local_site_removed + + set master_ev [find_event [$envA event_info] site_removed] + error_check_good site_a_event_eid [lindex $master_ev 1] $eid_B_at_A + error_check_good site_a_list [llength [repmgr029_get_eid $envA $portB]] 0 + + $envB close + $envC close + $envA close +} + +# See that a join request is rejected if insufficient acks. (It should +# remain in the db as "adding" though, and apps should be able to query +# nsites to find out that it's been incremented.) +# +proc z4 {} { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {port0 port1 port2} [available_ports 3] {} + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + puts -nonewline "\tRepmgr029.z4.a: Start the master." + set env1 [berkdb_env -create -errpfx MASTER -home $masterdir \ + -txn -rep -thread -recover -verbose [list rep $rv]] + $env1 repmgr -local [list localhost $port0] -start master + error_check_good nsites_1 [$env1 rep_get_nsites] 1 + puts "."; flush stdout + + puts -nonewline "\tRepmgr029.z4.b: Start first client." + set env2 [berkdb_env -create -errpfx CLIENT -home $clientdir -txn \ + -rep -thread -recover -verbose [list rep $rv]] + $env2 rep_config {mgr2sitestrict on} + $env2 repmgr -local [list localhost $port1] \ + -remote [list localhost $port0] -start client + await_startup_done $env2 + error_check_good nsites_2 [$env2 rep_get_nsites] 2 + puts "."; flush stdout + + puts "\tRepmgr029.z4.c: Close the first client." + error_check_good s_2_close [$env2 close] 0 + + puts "\tRepmgr029.z4.d: Start the second client." + set env3 [berkdb_env -create -errpfx CLIENT2 -home $clientdir2 \ + -txn -rep -thread -recover -verbose [list rep $rv]] + $env3 rep_config {mgr2sitestrict on} + set ret [catch {$env3 repmgr -local [list localhost $port2] \ + -remote [list localhost $port0] -start client} result] + error_check_bad no_failure $ret 0 + error_check_match unavail $result "*DB_REP_UNAVAIL*" + puts "\tRepmgr029.z4.e: The second join failed as expected, \ + since the first client is down" + + puts -nonewline "\tRepmgr029.z4.f: restart the first client." + set env2 [berkdb_env -errpfx CLIENT -home $clientdir -txn -rep \ + -thread -recover -create -verbose [list rep $rv]] + $env2 rep_config {mgr2sitestrict on} + $env2 repmgr -local [list localhost $port1] -start client + await_startup_done $env2 + puts "."; flush stdout + + puts "\tRepmgr029.z4.g: try to join the second client again" + if {[catch {$env3 repmgr -start client} result] && \ + [string match "*REP_UNAVAIL*" $result]} { + puts "\tRepmgr029.z4.h: pause and try again" + tclsleep 3 + $env3 repmgr -start client + } + await_startup_done $env3 100 + error_check_good nsites_3 [$env3 rep_get_nsites] 3 + + error_check_good s_3_close [$env3 close] 0 + error_check_good s_2_close [$env2 close] 0 + error_check_good s_1_close [$env1 close] 0 +} + +# Cold-boot an established group, without specifying any remote sites, and see +# that they can elect a master (demonstrating that they have recorded each +# others' addresses). +# +proc z5 {} { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {port0 port1 port2} [available_ports 3] {} + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + puts -nonewline "\tRepmgr029.z5.a: Set up a group of 3, one master and two clients." + set env1 [berkdb env -create -errpfx MASTER -home $masterdir \ + -txn -rep -thread -recover -verbose [list rep $rv]] + $env1 repmgr -local [list localhost $port0] -start master + error_check_good nsites_1 [$env1 rep_get_nsites] 1 + puts -nonewline "." ; flush stdout + + set env2 [berkdb env -create -errpfx CLIENT -home $clientdir \ + -txn -rep -thread -recover -verbose [list rep $rv]] + $env2 repmgr -local [list localhost $port1] \ + -remote [list localhost $port0] -start client + await_startup_done $env2 + error_check_good nsites_2 [$env2 rep_get_nsites] 2 + puts -nonewline "." ; flush stdout + + set env3 [berkdb env -create -errpfx CLIENT2 -home $clientdir2 \ + -txn -rep -thread -recover -verbose [list rep $rv]] + $env3 repmgr -local [list localhost $port2] \ + -remote [list localhost $port0] -start client + await_startup_done $env3 + error_check_good nsites_3 [$env1 rep_get_nsites] 3 + puts "." ; flush stdout + + puts "\tRepmgr029.z5: Shut down all sites and then restart with election" + error_check_good s_2_close [$env2 close] 0 + error_check_good s_3_close [$env3 close] 0 + error_check_good s_1_close [$env1 close] 0 + + set env1 [berkdb env -create -errpfx A -home $masterdir \ + -txn -rep -thread -recover -verbose [list rep $rv]] + $env1 repmgr -local [list localhost $port0] -start elect -pri 100 + set env2 [berkdb env -create -errpfx B -home $clientdir \ + -txn -rep -thread -recover -verbose [list rep $rv]] + $env2 repmgr -local [list localhost $port1] -start elect -pri 200 + set env3 [berkdb env -create -errpfx C -home $clientdir2 \ + -txn -rep -thread -recover -verbose [list rep $rv]] + $env3 repmgr -local [list localhost $port2] -start elect -pri 140 + + puts "\tRepmgr029.z5: Wait for election to choose a new master" + await_condition {[repmgr029_known_master $env1 $env2 $env3]} + error_check_good nsites_1 [$env1 rep_get_nsites] 3 + error_check_good nsites_2 [$env2 rep_get_nsites] 3 + error_check_good nsites_3 [$env3 rep_get_nsites] 3 + + error_check_good s_3_close [$env3 close] 0 + error_check_good s_1_close [$env1 close] 0 + error_check_good s_2_close [$env2 close] 0 +} + +# Remove a site while it is disconnected, and see if it can get an event when it +# tries to reconnect. (2nd try) +proc z2 { } { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {portA portB portC portD portE} [available_ports 5] {} + + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + set dirD $testdir/D + set dirE $testdir/E + + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + file mkdir $dirD + file mkdir $dirE + + puts -nonewline "\tRepmgr029.z2.a: Set up a group of 5: A, B, C, D, E" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envA repmgr -local [list localhost $portA] -start master + error_check_good nsites_a [$envA rep_get_nsites] 1 + puts -nonewline "."; flush stdout + + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start client + await_startup_done $envB + error_check_good nsites_b [$envB rep_get_nsites] 2 + puts -nonewline "."; flush stdout + + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start client + await_startup_done $envC + error_check_good nsites_c [$envC rep_get_nsites] 3 + puts -nonewline "." ; flush stdout + + # It is ideal to increase the await time when the group size is large. + set envD [berkdb env -create -errpfx D -home $dirD -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envD repmgr -local [list localhost $portD] \ + -remote [list localhost $portA] -start client + await_startup_done $envD 100 + error_check_good nsites_d [$envD rep_get_nsites] 4 + puts -nonewline "." ; flush stdout + + set envE [berkdb env -create -errpfx E -home $dirE -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envE repmgr -local [list localhost $portE] \ + -remote [list localhost $portA] -start client + await_startup_done $envE 200 + error_check_good nsites_e [$envE rep_get_nsites] 5 + puts "." ; flush stdout + + puts "\tRepmgr029.z2.b: shut down sites D and E" + error_check_good s_d_close [$envD close] 0 + error_check_good s_e_close [$envE close] 0 + + puts "\tRepmgr029.z2.c: remove site D from the group" + $envA repmgr -remove [list localhost $portD] + error_check_good rm_at_a \ + [string length [repmgr029_site_list_status $envA $portD]] 0 + + puts "\tRepmgr029.z2.d: shut down all remaining sites" + error_check_good s_b_close [$envB close] 0 + error_check_good s_c_close [$envC close] 0 + error_check_good s_a_close [$envA close] 0 + + puts -nonewline "\tRepmgr029.z2.e: start up just D and E \ + (neither of which know that D has been removed)" + set envD [berkdb env -create -errpfx D -home $dirD -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envD repmgr -local [list localhost $portD] -start elect\ + -timeout {connection_retry 2000000} + # Should comments out the await here, otherwise, envD cannot join + #await_startup_done $envD + puts -nonewline "."; flush stdout + + set envE [berkdb env -create -errpfx E -home $dirE -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envE repmgr -local [list localhost $portE] -start elect + await_condition {[expr [stat_field $envD \ + rep_stat "Messages processed"] > 0]} + puts "."; flush stdout + + puts -nonewline "\tRepmgr029.z2.f: Start sites A, B, and C" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envA repmgr -local [list localhost $portA] -start elect -pri 200 + puts -nonewline "." ; flush stdout + + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envB repmgr -local [list localhost $portB] -start elect -pri 150 + puts -nonewline "." ; flush stdout + + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envC repmgr -local [list localhost $portC] -start elect -pri 100 + await_startup_done $envC 1000 + puts "." ; flush stdout + + puts "\tRepmgr029.z2.g: wait for site D to notice that it has been removed" + await_event $envD local_site_removed + + # Yikes! This is not going to work! Site D will be rejected before it + # gets a chance to have its database updated! :-( + # [M] NOW: A is the master, B, C, E get sync with A, but D does not. + + error_check_good s_d_close [$envD close] 0 + error_check_good s_e_close [$envE close] 0 + error_check_good s_c_close [$envC close] 0 + error_check_good s_b_close [$envB close] 0 + error_check_good s_a_close [$envA close] 0 +} + +# Remove a site while it is down. When it starts up again, it should rejoin. +proc z1 { } { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {portA portB portC} [available_ports 3] {} + + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + + puts -nonewline "\tRepmgr029.z1.a: Set up a group of 3: A, B, C" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envA repmgr -local [list localhost $portA] -start master + error_check_good nsitesA [$envA rep_get_nsites] 1 + puts -nonewline "."; flush stdout + + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start client + await_startup_done $envB + error_check_good nsitesB [$envB rep_get_nsites] 2 + puts -nonewline "."; flush stdout + + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start client + await_startup_done $envC + error_check_good nsitesC [$envC rep_get_nsites] 3 + puts "."; flush stdout + + puts "\tRepmgr029.z1.b: Shut down site B, and remove it from the group." + error_check_good s_b_close [$envB close] 0 + + $envA repmgr -remove [list localhost $portB] + error_check_good rm_at_a \ + [string length [repmgr029_site_list_status $envA $portB]] 0 + + puts "\tRepmgr029.z1.c: restart B" + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envB repmgr -local [list localhost $portB] \ + -timeout {connection_retry 2000000} -start client + await_startup_done $envB + + # make sure we haven't fired a LOCAL_SITE_REMOVED event to B + set ev [find_event [$envB event_info] local_site_removed] + error_check_good site_b_not_removed [string length $ev] 0 + + # Now try it again, only this time the auto-rejoin fails due to lack of + # acks, so B should shut down and fire LOCAL_SITE_REMOVED event. TODO: + # should we have some sort of stat query so that the application can + # tell whether threads are running? Or is that just redundant with the + # event? + puts "\tRepmgr029.z1.d: shut down and remove site B again" + error_check_good s_b_close [$envB close] 0 + $envA repmgr -remove [list localhost $portB] + + puts "\tRepmgr029.z1.e: shut down site C, and then restart B" + error_check_good s_c_close [$envC close] 0 + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envB repmgr -local [list localhost $portB] \ + -timeout {connection_retry 2000000} -start client + await_event $envB local_site_removed + + error_check_good s_b_close [$envB close] 0 + error_check_good s_a_close [$envA close] 0 +} + +# Test "sharing", by constructing a situation where a site that's been down for +# a while has an obsolete, too-high notion of nsites. On a cold boot, if that +# site is needed, it would spoil the election by requiring too many votes, +# unless it gets a hint from other sites. +# +# Create a group of 6 sites, A, B, C, D, E, F. Make sure F knows nsites is 6; +# then shut it down. Remove E; now nsites is 5 (A, B, C, D, f). Then remove D; +# nsites is 4 (A, B, C, f). Now shut down everyone, and then reboot only A, B, +# and F (leave C down). Try to elect a master. +# +proc z9 { } { + global rep_verbose + global testdir + + set rv [ expr $rep_verbose ? on : off ] + + env_cleanup $testdir + foreach {portA portB portC portD portE portF} [available_ports 6] {} + + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + set dirD $testdir/D + set dirE $testdir/E + set dirF $testdir/F + + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + file mkdir $dirD + file mkdir $dirE + file mkdir $dirF + + puts -nonewline "\tRepmgr029.z9: Set up a group of 6" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envA repmgr -local [list localhost $portA] -start master + puts -nonewline "." ; flush stdout + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start client + await_startup_done $envB + puts -nonewline "." ; flush stdout + set envC [berkdb env -create -errpfx B -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start client + await_startup_done $envC + puts -nonewline "." ; flush stdout + set envD [berkdb env -create -errpfx B -home $dirD -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envD repmgr -local [list localhost $portD] \ + -remote [list localhost $portA] -start client + await_startup_done $envD 30 + puts -nonewline "." ; flush stdout + set envE [berkdb env -create -errpfx B -home $dirE -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envE repmgr -local [list localhost $portE] \ + -remote [list localhost $portA] -start client + await_startup_done $envE 30 + puts -nonewline "." ; flush stdout + set envF [berkdb env -create -errpfx B -home $dirF -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envF repmgr -local [list localhost $portF] \ + -remote [list localhost $portA] -start client + await_startup_done $envF 40 + puts "." + + puts "\tRepmgr029.z9: Shut down site F" + $envF close + + puts "\tRepmgr029.z9: Remove site E" + $envE close + $envA repmgr -remove [list localhost $portE] + + puts "\tRepmgr029.z9: Remove site D" + $envD close + $envA repmgr -remove [list localhost $portD] + + puts "\tRepmgr029.z9: Shut down site C" + $envC close + + puts "\tRepmgr029.z9: Bounce the master" + $envA close + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envA repmgr -local [list localhost $portA] -start elect + + # We now have a group of 4, with only A and B running. That's not + # enough to elect a master. + + puts "\tRepmgr029.z9: Restart site F" + set envF [berkdb env -create -errpfx F -home $dirF -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envF repmgr -local [list localhost $portF] -start elect + + # There are now 3 sites running, in a 4-site group. That should be + # enough to elect a master, if site F can be advised of the fact that + # the group size has been reduced. + + # Wait for an election to complete. + await_condition {[repmgr029_known_master $envA $envF $envB]} 30 + + $envA close + $envB close + $envF close +} + +# See that a membership list gets restored after an interrupted internal init. +proc z10 { } { + global rep_verbose + global testdir + global tclsh_path + global test_path + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {portA portB portC} [available_ports 3] {} + + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + + set pagesize 4096 + set log_max [expr $pagesize * 8] + + puts "\tRepmgr029.z10: Set up a group of 3, A (master), B, C" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv] -log_max $log_max] + $envA repmgr -local [list localhost $portA] -start master + + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv] -log_max $log_max] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start client + await_startup_done $envB + + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv] -log_max $log_max] + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start client + await_startup_done $envC + + puts "\tRepmgr029.z10: Shut down site C and generate enough churn to force internal init" + set log_endC [get_logfile $envC last] + $envC close + + set niter 50 + while { [get_logfile $envA first] <= $log_endC } { + $envA test force noarchive_timeout + rep_test btree $envA NULL $niter 0 0 0 -pagesize $pagesize + $envA log_flush + $envA log_archive -arch_remove + } + + # Use separate process so that it works even on Windows. + # Inhibit master from sending any PAGE messages. + puts "\tRepmgr029.z10: Restart site C in a separate process" + $envA test abort no_pages + set pid [exec $tclsh_path $test_path/wrap.tcl \ + repmgr029script.tcl $testdir/repmgr029script.log $dirC $portC $rv &] + watch_procs $pid 5 + + puts "\tRepmgr029.z10: Shut down the rest of the group" + $envB close + $envA close + + puts "\tRepmgr029.z10: Restart site C alone" + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envC repmgr -local [list localhost $portC] -start elect + + puts "\tRepmgr029.z10: Check list of known sites, A and B" + set l [$envC repmgr_site_list] + foreach p [list $portA $portB] { + set sought [list localhost $p] + error_check_good port$p \ + [expr [lsearch -glob $l [concat * $sought *]] >= 0] 1 + } + $envC close +} + +# See that a client notices a membership change that happens while it is +# disconnected (via the internal init completion trigger). +proc z11 { } { + global rep_verbose + global testdir + global tclsh_path + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {portA portB portC portD} [available_ports 4] {} + + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + set dirD $testdir/D + + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + file mkdir $dirD + + set pagesize 4096 + set log_max [expr $pagesize * 8] + + puts -nonewline "\tRepmgr029.z11: Set up a group initially of size 3" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv] -log_max $log_max] + $envA repmgr -local [list localhost $portA] -start master + puts -nonewline "." ; flush stdout + + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv] -log_max $log_max] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start client + await_startup_done $envB + puts -nonewline "." ; flush stdout + + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv] -log_max $log_max] + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start client + await_startup_done $envC + puts "." + + puts "\tRepmgr029.z11: Shut down C" + $envC close + + puts "\tRepmgr029.z11: Join new site D" + set envD [berkdb env -create -errpfx D -home $dirD -txn -rep -thread \ + -recover -verbose [list rep $rv] -log_max $log_max] + $envD repmgr -local [list localhost $portD] \ + -remote [list localhost $portA] -start client + + puts "\tRepmgr029.z11: Generate enough churn to force internal init at C later" + set tail [get_logfile $envA last] + set niter 50 + while { [get_logfile $envA first] <= $tail } { + $envA test force noarchive_timeout + rep_test btree $envA NULL $niter 0 0 0 -pagesize $pagesize + $envA log_flush + $envA log_archive -arch_remove + } + + puts "\tRepmgr029.z11: Restart site C" + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envC repmgr -local [list localhost $portC] -start elect + await_startup_done $envC + + puts "\tRepmgr029.z11: Check list of known sites" + set l [$envC repmgr_site_list] + foreach p [list $portA $portB $portD] { + set sought [list localhost $p] + error_check_good port$p \ + [expr [lsearch -glob $l [concat * $sought *]] >= 0] 1 + } + $envC close + $envD close + $envB close + $envA close +} + +# Exercise the new connection-related event types. +proc z12 { } { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {portA portB portC} [available_ports 3] {} + + set dirA $testdir/A + set dirB $testdir/B + + file mkdir $dirA + file mkdir $dirB + + puts "\tRepmgr029.z12: Start primordial master site A" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envA repmgr -local [list localhost $portA] -start master \ + -timeout {connection_retry 2000000} + + puts "\tRepmgr029.z12: Add new client site B" + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envB repmgr -remote [list localhost $portA] \ + -local [list localhost $portB] -start client + await_startup_done $envB + + puts "\tRepmgr029.z12: Check connection events" + set ev [find_event [$envA event_info] connection_established] + error_check_good ev_len [llength $ev] 2 + error_check_good ev_eid [lindex $ev 1] [repmgr029_get_eid $envA $portB] + set ev [find_event [$envB event_info] connection_established] + error_check_good ev_len2 [llength $ev] 2 + error_check_good ev_eid2 [lindex $ev 1] [repmgr029_get_eid $envB $portA] + + puts "\tRepmgr029.z12: Shut down site B, observe event at site A" + $envB close + set ev [await_event $envA connection_broken] + error_check_good ev_len3 [llength $ev] 2 + set evinfo [lindex $ev 1] + error_check_good ev_len3b [llength $evinfo] 2 + foreach {eid err} $evinfo {} + error_check_good ev_eid3 $eid [repmgr029_get_eid $envA $portB] + puts "\t\tRepmgr029.z12: (connection_broken error code is $err)" + + set ev [await_event $envA connection_retry_failed] + error_check_good ev_len3 [llength $ev] 2 + set evinfo [lindex $ev 1] + error_check_good ev_len3c [llength $evinfo] 2 + foreach {eid err} $evinfo {} + error_check_good ev_eid3 $eid [repmgr029_get_eid $envA $portB] + puts "\t\tRepmgr029.z12: (retry_failed error code is $err)" + + puts "\tRepmgr029.z12: Shut down site A, then restart B" + $envA close + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envB repmgr -local [list localhost $portB] -start elect \ + -timeout {connection_retry 2000000} + # new event instances should continue to be fired indefinitely. For + # now, consider '3' to be close enough to infinity. + for { set i 1 } { $i <= 3 } { incr i } { + puts "\tRepmgr029.z12: Observe event ($i)" + set ev [await_event $envB connection_retry_failed] + error_check_good ev_eid4 [lindex $ev 1 0] [repmgr029_get_eid $envB $portA] + error_check_good never_estd \ + [string length [find_event [$envB event_info] \ + connection_established]] 0 + # According to our definition of "connection broken" you can't + # "break" what you never had. + error_check_good never_broken \ + [string length [find_event [$envB event_info] \ + connection_broken]] 0 + $envB event_info -clear + } + $envB close +} + +# Make sure applications aren't bothered by perm failed events from failed GMDB +# operations. +proc z13 { } { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {portA portB portC} [available_ports 3] {} + + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + + puts -nonewline "\tRepmgr029.z13: Create first 2 sites" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envA repmgr -local [list localhost $portA] -start master + puts -nonewline "." ; flush stdout + + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start client + await_startup_done $envB + puts "." + + puts "\tRepmgr029.z13: Shut down site B, try to add third site, site C" + $envB close + + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv]] + set ret [catch {$envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start client} result] + error_check_bad no_failure $ret 0 + error_check_match unavail $result "*DB_REP_UNAVAIL*" + + puts "\tRepmgr029.z13: Make sure site A application didn't see a perm failure" + error_check_good no_failure \ + [string length [find_event [$envA event_info] perm_failed]] 0 + + $envC close + $envA close +} + +# Make sure we can add/remove sites even when ALL policy is in effect. +proc z14 { } { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {portA portB portC} [available_ports 3] {} + + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + + foreach policy {all allpeers} { + puts "\tRepmgr029.z14: Using \"$policy\" ack policy" + puts "\tRepmgr029.z14: Create first site A" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envA repmgr -local [list localhost $portA] -start master -ack $policy + + puts "\tRepmgr029.z14: Add 2nd site, B" + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start client -ack $policy + await_startup_done $envB + + puts "\tRepmgr029.z14: Add 3rd site, C" + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start client -ack $policy + await_startup_done $envC + + puts "\tRepmgr029.z14: Remove site B" + $envC repmgr -remove [list localhost $portB] + error_check_good removed \ + [string length [repmgr029_site_list_status $envA $portB]] 0 + + $envB close + $envC close + $envA close + } +} + +# Rescind a pending (previously incomplete) change, and check effect on nsites. +proc z15 { } { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {portA portB portC portD portE} [available_ports 5] {} + + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + set dirD $testdir/D + set dirE $testdir/E + + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + file mkdir $dirD + file mkdir $dirE + + puts -nonewline "\tRepmgr029.z15: Create initial group of 4 sites" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -verbose [list rep $rv]] + $envA repmgr -local [list localhost $portA] -start master + puts -nonewline "." ; flush stdout + + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -verbose [list rep $rv]] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start client + await_startup_done $envB + puts -nonewline "." ; flush stdout + + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -verbose [list rep $rv]] + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start client + await_startup_done $envC + puts -nonewline "." ; flush stdout + + set envD [berkdb env -create -errpfx D -home $dirD -txn -rep -thread \ + -verbose [list rep $rv]] + $envD repmgr -local [list localhost $portD] \ + -remote [list localhost $portA] -start client + await_startup_done $envD + puts "." + + puts "\tRepmgr029.z15: Shut down C and D, and try to add E" + $envC close + $envD close + set envE [berkdb env -create -errpfx E -home $dirE -txn -rep -thread \ + -verbose [list rep $rv] -event] + set ret [catch {$envE repmgr -local [list localhost $portE] \ + -remote [list localhost $portA] -start client} result] + error_check_bad no_failure $ret 0 + error_check_match unavail $result "*DB_REP_UNAVAIL*" + error_check_good nsites [$envA rep_get_nsites] 5 + await_condition {[expr [$envB rep_get_nsites] == 5]} + + puts "\tRepmgr029.z15: Rescind the addition of site E, by removing it" + $envA repmgr -remove [list localhost $portE] + error_check_good nsites2 [$envA rep_get_nsites] 4 + await_condition {[expr [$envB rep_get_nsites] == 4]} + + puts -nonewline "\tRepmgr029.z15: Restart sites C and D" + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envC repmgr -local [list localhost $portC] -start client + await_startup_done $envC + puts -nonewline "." ; flush stdout + + set envD [berkdb env -create -errpfx D -home $dirD -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envD repmgr -local [list localhost $portD] -start client + await_startup_done $envD + puts "." + + puts "\tRepmgr029.z15: Try adding new site E again,\ + this time it should succeed" + # Note that it was not necessary to bounce the env handle. + $envE repmgr -start client + error_check_good nsites [$envA rep_get_nsites] 5 + await_condition {[expr [$envB rep_get_nsites] == 5]} + await_startup_done $envE + + puts "\tRepmgr029.z15: Shut down C and D again,\ + and this time try removing site E" + $envC close + $envD close + set ret [catch {$envA repmgr -remove [list localhost $portE]} result] + error_check_bad no_failure2 $ret 0 + error_check_match unavail2 $result "*DB_REP_UNAVAIL*" + error_check_good nsites2 [$envA rep_get_nsites] 5 + error_check_good nsites3 [$envB rep_get_nsites] 5 + + set db [berkdb open -env $envA -thread __db.rep.system __db.membership] + set SITE_DELETING 2 + error_check_good deleting \ + [repmgr029_gmdb_status $db localhost $portE] $SITE_DELETING + $db close + + puts "\tRepmgr029.z15: See that site E fired event for as little\ + as DELETING status" + await_event $envE local_site_removed + $envE close + + puts "\tRepmgr029.z15: Rescind the removal of site E" + # The only way add site E is to have it start and try to join. Someday + # (maybe even before code freeze) it will be possible to restart the + # zombie carcass in the same env handle. + set envE [berkdb env -create -errpfx E -home $dirE -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envE repmgr -local [list localhost $portE] -start client + error_check_good nsites4 [$envA rep_get_nsites] 5 + error_check_good nsites5 [$envB rep_get_nsites] 5 + + $envE close + $envB close + $envA close +} + +# See that removing a non-existent site acts as a no-op, and doesn't yield an +# error. +proc z16 { } { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + # Group size will be three, but allocate an extra port to act as the + # non-existent sites. + foreach {portA portB portC portD} [available_ports 4] {} + + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + + puts -nonewline "\tRepmgr029.z16: Create a group of 3 sites" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -verbose [list rep $rv]] + $envA repmgr -local [list localhost $portA] -start master + puts -nonewline "." ; flush stdout + + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -verbose [list rep $rv]] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start client + await_startup_done $envB + puts -nonewline "." ; flush stdout + + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -verbose [list rep $rv] -event] + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start client + await_startup_done $envC + puts "." + + puts "\tRepmgr029.z16: Remove non-existent site" + $envB repmgr -remove [list localhost $portD] + error_check_good nsites [$envA rep_get_nsites] 3 + error_check_good nsites [$envB rep_get_nsites] 3 + error_check_good nsites [$envC rep_get_nsites] 3 + + # While we're on the topic of removing sites, let's try having a site + # remove itself. + puts "\tRepmgr029.z16: Have site C remove itself" + $envC repmgr -remove [list localhost $portC] + error_check_good nsites [$envA rep_get_nsites] 2 + await_event $envC local_site_removed + + $envC close + $envB close + $envA close +} + +# Exercise group creation with non-default ack policies. +proc z17 { } { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + foreach {portA portB portC} [available_ports 3] {} + + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + + foreach policy {one onepeer all allpeers allavailable none} { + env_cleanup $testdir + + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + + puts -nonewline "\tRepmgr029.z17: Create a group of 3 sites using\ + `$policy' ack policy" + set envA [berkdb env -create -errpfx A -home $dirA -txn \ + -rep -thread -verbose [list rep $rv]] + $envA repmgr -local [list localhost $portA creator] \ + -start elect -ack $policy + puts -nonewline "." ; flush stdout + + set envB [berkdb env -create -errpfx B -home $dirB -txn \ + -rep -thread -verbose [list rep $rv] -event] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start elect -ack $policy + await_startup_done $envB + puts -nonewline "." ; flush stdout + + set envC [berkdb env -create -errpfx C -home $dirC -txn \ + -rep -thread -verbose [list rep $rv] -event] + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start elect + await_startup_done $envC + puts "." + + puts "\tRepmgr029.z17: Remove both clients." + $envA repmgr -remove [list localhost $portB] + error_check_good nsites [$envA rep_get_nsites] 2 + await_event $envB local_site_removed + $envB close + $envA repmgr -remove [list localhost $portC] + error_check_good nsites [$envA rep_get_nsites] 1 + await_event $envC local_site_removed + $envC close + + $envA close + } +} + +# +# Add new site to existing group, already populated via hot backup +# a. Start site A as group creator. +# b. Start site B as client, and wait it for sync. +# c. Hot backup the site B's environment to directory C, +# and start up site C using the directory C. +# d. Check membership at site C +# +proc z18 { } { + global rep_verbose + global testdir + global util_path + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {portA portB portC} [available_ports 3] {} + + set dirA $testdir/dirA + set dirB $testdir/dirB + set dirC $testdir/dirC + + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + + puts -nonewline "\tRepmgr029.z18.a: Start site A as master." + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envA repmgr -local [list localhost $portA creator] \ + -start master + error_check_good nsites_A [$envA rep_get_nsites] 1 + puts "." ; flush stdout + + puts -nonewline "\tRepmgr029.z18.b. Start site B" + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -verbose [list rep $rv]] + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start client + await_startup_done $envB + error_check_good nsites_B [$envB rep_get_nsites] 2 + puts "." ; flush stdout + + puts "\tRepmgr029.z18.c.1: Hot backup the site B's environment to $dirC" + # Ensure $dirC is empty before hot backup. + set files [glob -nocomplain $dirC/*] + error_check_good no_files [llength $files] 0 + + eval exec $util_path/db_hotbackup -vh $dirB -b $dirC + + puts "\tRepmgr029.z18.c.2: Start up site C in $dirC." + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start client + await_startup_done $envC + error_check_good nsites_C [$envC rep_get_nsites] 3 + + puts "\tRepmgr029.z18.c.3: Verify site C starts without internal init." + error_check_good no_pages [stat_field $envC rep_stat "Pages received"] 0 + + error_check_good siteC_close [$envC close] 0 + error_check_good siteB_close [$envB close] 0 + error_check_good siteA_close [$envA close] 0 +} + +# +# Initiate group change during long-running transaction at master +# (waits for transaction to abort) +# a. start site A as master +# b. begin a transaction, write a record +# c. start a separate process to add a second site ("B") to the group +# d. in the transaction in b, write a record and sleep for a second in a loop. +# Would run into deadlock +# e. abort the txn when the deadlock occurs +# f. after that, the joining operation in the other thread should complete +# successfully. +# +proc z19 {} { + global rep_verbose + global testdir + global tclsh_path + global test_path + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + env_cleanup $testdir + foreach {portA portB} [available_ports 2] {} + + set dirA $testdir/dirA + set dirB $testdir/dirB + + file mkdir $dirA + file mkdir $dirB + + puts "\tRepmgr029.z19.a: Start up site A as master " + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envA repmgr -local [list localhost $portA creator] -start master + error_check_good nsites_A [$envA rep_get_nsites] 1 + + puts "\tRepmgr029.z19.b: Begin txn and open db on master." + set txn [$envA txn] + error_check_good txn_begin [is_valid_txn $txn $envA] TRUE + set testfile repmg029.db + set oflags {-create -btree -mode 0755 -thread -env $envA \ + -txn $txn $testfile} + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + + puts "\tRepmgr029.z19.c: Add site B in another process" + set pid [exec $tclsh_path $test_path/wrap.tcl repmgr029script2.tcl \ + $testdir/repmgr029script2.log $dirB $portB $dirA $portA &] + + puts "\tRepmgr029.z19.d: Write data in the txn, expecting deadlock" + set maxcount 100 + for { set count 0 } { $count < $maxcount } { incr count } { + set key $count + set data "gmdb data" + if { [catch {$db put -txn $txn $key $data} ret] } { + error_check_good put_deadlock \ + [is_substr $ret DB_LOCK_DEADLOCK] 1 + break + } else { + tclsleep 1 + } + } + error_check_good put_deadlock [is_substr $ret DB_LOCK_DEADLOCK] 1 + error_check_good txn_abort [$txn abort] 0 + + puts "\tRepmgr029.z19.e: Confirm B has joined." + for { set count 0 } { $count < $maxcount } { incr count } { + if { [$envA rep_get_nsites] > 1 } { + break + } else { + tclsleep 1 + } + } + + watch_procs $pid 5 + error_check_good db_close [$db close] 0 + error_check_good master_close [$envA close] 0 + + # Check output file of the sub-process for failures. + set file repmgr029script2.log + set errstrings [eval findfail $testdir/$file] + foreach str $errstrings { + puts "$str" + } + error_check_good errstrings_llength [llength $errstrings] 0 +} + +proc repmgr029_dump_db { e } { + set db [berkdb open -env $e -thread __db.rep.system __db.membership] + set c [$db cursor] + set format_version [lindex [$c get -first] 0 1] + binary scan $format_version II fmt vers + puts "version $vers" + while {[llength [set r [$c get -next]]] > 0} { + set k [lindex $r 0 0] + set v [lindex $r 0 1] + binary scan $k I len + set hostname [string range $k 4 [expr 2 + $len]] + binary scan $hostname A* host + binary scan [string range $k [expr 4 + $len] end] S port + binary scan $v I status + puts "{$host $port} $status" + } + $c close + $db close +} + +proc repmgr029_get_eid { e port } { + set sle [repmgr029_get_site_list_entry $e $port] + if { [string length $sle] == 0} { + return "" + } + return [lindex $sle 0] +} + +proc repmgr029_get_site_list_entry { e port } { + foreach sle [$e repmgr_site_list] { + set p [lindex $sle 2] + if { $p == $port } { + return $sle + } + } + return "" +} + +proc repmgr029_gmdb_status { db host port } { + set l [string length $host] + set key [binary format Ia*cS [expr $l + 1] $host 0 $port] + set kvlist [$db get $key] + if {[llength $kvlist] == 0} { + return 0 + } + set kvpair [lindex $kvlist 0] + set val [lindex $kvpair 1] + binary scan $val I status + return $status +} + +proc repmgr029_gmdb_version { db } { + set key [binary format IS 0 0] + set kvlist [$db get $key] + set kvpair [lindex $kvlist 0] + set val [lindex $kvpair 1] + binary scan $val II format version + return $version +} + +proc repmgr029_known_master { e1 e2 e3 } { + foreach e [list $e1 $e2 $e3] { + set m [stat_field $e rep_stat "Master environment ID"] + if {$m == -2} { + return no + } + } + return yes +} + +proc repmgr029_site_list_status { e port } { + set sle [repmgr029_get_site_list_entry $e $port] + if { [string length $sle] == 0 } { + return "" + } + return [lindex $sle 3] +} diff --git a/test/tcl/repmgr029script.tcl b/test/tcl/repmgr029script.tcl new file mode 100644 index 00000000..29595067 --- /dev/null +++ b/test/tcl/repmgr029script.tcl @@ -0,0 +1,33 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ + +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl +source $test_path/reputils.tcl + +set dirC [lindex $argv 0] +set portC [lindex $argv 1] +set rv [lindex $argv 2] + +proc in_sync_state { d } { + global util_path + set stat [exec $util_path/db_stat -N -r -R A -h $d] + puts "stat is $stat" + set in_page [is_substr $stat "SYNC_PAGE"] + puts "value is $in_page" + return $in_page +} + +puts "Start site C" +set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv]] +$envC repmgr -local [list localhost $portC] -start elect + +puts "Wait until it gets into SYNC_PAGES state" +while {![in_sync_state $dirC]} { + tclsleep 1 +} diff --git a/test/tcl/repmgr029script2.tcl b/test/tcl/repmgr029script2.tcl new file mode 100644 index 00000000..0c4ca275 --- /dev/null +++ b/test/tcl/repmgr029script2.tcl @@ -0,0 +1,44 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ + +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl +source $test_path/reputils.tcl + +set dirB [lindex $argv 0] +set portB [lindex $argv 1] +set dirA [lindex $argv 2] +set portA [lindex $argv 3] + +puts "Repmgr029script2: Open env of A." +set ma_cmd "berkdb_env -home $dirA -txn -thread" +set envA [eval $ma_cmd] +error_check_good script_env_open [is_valid_env $envA] TRUE + +puts "Repmgr029script2: Wait until there is an active txn" +set maxcount 100 +for { set count 0 } { $count < $maxcount } { incr count } { + set active_txn_1 [stat_field $envA txn_stat "Number active txns"] + if { $active_txn_1 > 0 } { + break + } else { + tclsleep 1 + } +} +error_check_good a_txn_A_1 $active_txn_1 1 + +puts "Repmgr029script2: Start up B. It finishes when the txn has been aborted." +set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread] +$envB repmgr -local [list localhost $portB] -remote [list localhost $portA] \ + -start client +await_startup_done $envB +error_check_good a_txn_A_0 [stat_field $envA txn_stat "Number active txns"] 0 + +puts "Repmgr029script2: Check gmdb at site B" +error_check_good nsites_B [$envB rep_get_nsites] 2 + +error_check_good client_close [$envB close] 0 \ No newline at end of file diff --git a/test/tcl/repmgr030.tcl b/test/tcl/repmgr030.tcl new file mode 100644 index 00000000..5c4ac3a5 --- /dev/null +++ b/test/tcl/repmgr030.tcl @@ -0,0 +1,153 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST repmgr030 +# TEST repmgr multiple client-to-client peer test. +# TEST +# TEST Start an appointed master and three clients. The third client +# TEST configures the other two clients as peers and delays client +# TEST sync. Add some data and confirm that the third client uses first +# TEST client as a peer. Close the master so that the first client now +# TEST becomes the master. Add some more data and confirm that the +# TEST third client now uses the second client as a peer. +# TEST +# TEST Run for btree only because access method shouldn't matter. +# TEST +proc repmgr030 { { niter 100 } { tnum "030" } args } { + + source ./include.tcl + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + set method "btree" + set args [convert_args $method $args] + + puts "Repmgr$tnum ($method): repmgr multiple c2c peer test." + repmgr030_sub $method $niter $tnum $args +} + +proc repmgr030_sub { method niter tnum largs } { + global testdir + global rep_verbose + global verbose_type + set nsites 4 + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + env_cleanup $testdir + set ports [available_ports $nsites] + set omethod [convert_method $method] + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + set clientdir3 $testdir/CLIENTDIR3 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + file mkdir $clientdir3 + + # Use different connection retry timeout values to handle any + # collisions from starting sites at the same time by retrying + # at different times. + + # Open a master. + puts "\tRepmgr$tnum.a: Start a master." + set ma_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx MASTER -home $masterdir -txn -rep -thread" + set masterenv [eval $ma_envcmd] + $masterenv repmgr -ack all -pri 100 \ + -timeout {connection_retry 20000000} \ + -local [list localhost [lindex $ports 0]] \ + -start master + + # Open three clients, setting first two as peers of the third and + # configuring third for delayed sync. + puts "\tRepmgr$tnum.b: Start three clients, third with two peers." + set cl_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT -home $clientdir -txn -rep -thread" + set clientenv [eval $cl_envcmd] + $clientenv repmgr -ack all -pri 80 \ + -timeout {connection_retry 10000000} \ + -local [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 2]] \ + -remote [list localhost [lindex $ports 3]] \ + -start client + await_startup_done $clientenv + + set cl2_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT2 -home $clientdir2 -txn -rep -thread" + set clientenv2 [eval $cl2_envcmd] + $clientenv2 repmgr -ack all -pri 50 \ + -timeout {connection_retry 5000000} \ + -local [list localhost [lindex $ports 2]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 1]] \ + -remote [list localhost [lindex $ports 3]] \ + -start client + await_startup_done $clientenv2 + + set cl3_envcmd "berkdb_env_noerr -create $verbargs \ + -errpfx CLIENT3 -home $clientdir3 -txn -rep -thread" + set clientenv3 [eval $cl3_envcmd] + $clientenv3 repmgr -ack all -pri 50 \ + -timeout {connection_retry 75000000} \ + -local [list localhost [lindex $ports 3]] \ + -remote [list localhost [lindex $ports 0]] \ + -remote [list localhost [lindex $ports 1] peer] \ + -remote [list localhost [lindex $ports 2] peer] \ + -start client + await_startup_done $clientenv3 + + # Internally, repmgr does the following to determine the peer + # to use: it scans the internal list of remote sites, selecting + # the first one that is marked as a peer and that is not the + # current master. + + puts "\tRepmgr$tnum.c: Configure third client for delayed sync." + $clientenv3 rep_config {delayclient on} + + puts "\tRepmgr$tnum.d: Check third client used first client as peer." + set creqs [stat_field $clientenv rep_stat "Client service requests"] + set c2reqs [stat_field $clientenv2 rep_stat "Client service requests"] + error_check_good got_client_reqs [expr {$creqs > 0}] 1 + error_check_good no_client2_reqs [expr {$c2reqs == 0}] 1 + + puts "\tRepmgr$tnum.e: Run some transactions at master." + eval rep_test $method $masterenv NULL $niter 0 0 0 $largs + + puts "\tRepmgr$tnum.f: Shut down master, first client takes over." + error_check_good masterenv_close [$masterenv close] 0 + await_expected_master $clientenv + + puts "\tRepmgr$tnum.g: Run some more transactions at new master." + eval rep_test $method $clientenv NULL $niter $niter 0 0 $largs + + puts "\tRepmgr$tnum.h: Sync delayed third client." + error_check_good rep_sync [$clientenv3 rep_sync] 0 + # Give sync requests a bit of time to show up in stats. + tclsleep 1 + + puts "\tRepmgr$tnum.i: Check third client used second client as peer." + set c2reqs [stat_field $clientenv2 rep_stat "Client service requests"] + error_check_good got_client2_reqs [expr {$c2reqs > 0}] 1 + + puts "\tRepmgr$tnum.j: Verifying client database contents." + rep_verify $clientdir $clientenv $clientdir2 $clientenv2 1 1 1 + rep_verify $clientdir $clientenv $clientdir3 $clientenv3 1 1 1 + + error_check_good client3_close [$clientenv3 close] 0 + error_check_good client2_close [$clientenv2 close] 0 + error_check_good client_close [$clientenv close] 0 +} diff --git a/test/tcl/repmgr031.tcl b/test/tcl/repmgr031.tcl new file mode 100644 index 00000000..f0f7519a --- /dev/null +++ b/test/tcl/repmgr031.tcl @@ -0,0 +1,107 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# Test for ack policies that vary throughout the group, and that change +# dynamically. +# +proc repmgr031 { } { + global rep_verbose + global testdir + + set rv off + if { $rep_verbose == 1 } { + set rv on + } + + set tnum "031" + + env_cleanup $testdir + foreach {portA portB portC} [available_ports 3] {} + + set dirA $testdir/A + set dirB $testdir/B + set dirC $testdir/C + + file mkdir $dirA + file mkdir $dirB + file mkdir $dirC + + puts -nonewline "\tRepmgr$tnum: Set up a group of 3:" + set envA [berkdb env -create -errpfx A -home $dirA -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envA rep_config {mgrelections off} + $envA repmgr -local [list localhost $portA] -start master -ack none + puts -nonewline "." ; flush stdout + + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envB rep_config {mgrelections off} + $envB repmgr -local [list localhost $portB] \ + -remote [list localhost $portA] -start client + await_startup_done $envB + puts -nonewline "." ; flush stdout + + set envC [berkdb env -create -errpfx C -home $dirC -txn -rep -thread \ + -recover -verbose [list rep $rv] -event] + $envC rep_config {mgrelections off} + $envC repmgr -local [list localhost $portC] \ + -remote [list localhost $portA] -start client -ack none + await_startup_done $envC + puts "." + + puts "\tRepmgr$tnum: Shut down site B." + $envB close + + puts "\tRepmgr$tnum: Write updates and check perm_failed event." + # Initially sites A and C both have "none" policy. Site C won't even + # bother to send an ack, which is just fine with site A. + # + $envA event_info -clear + set method btree + set niter 1 + eval rep_test $method $envA NULL $niter 0 0 0 + error_check_good nofailure \ + [string length [find_event [$envA event_info] perm_failed]] 0 + + # Change ack policy at site A. Site C will have to be notified of this + # change, or else the lack of an ack would cause a perm failure. + # + $envA repmgr -ack quorum + eval rep_test $method $envA NULL $niter 0 0 0 + error_check_good nofailure \ + [string length [find_event [$envA event_info] perm_failed]] 0 + + puts "\tRepmgr$tnum: Shut down site A, make site C new master." + # Even though site C started sending acks for site A's benefit, its own + # ack policy should still be "none". With no other sites running, that + # will be the only way to avoid perm_failed event. + # + $envA close + $envC repmgr -start master -msgth 0 + eval rep_test $method $envC NULL $niter 0 0 0 + error_check_good nofailure \ + [string length [find_event [$envC event_info] perm_failed]] 0 + + puts "\tRepmgr$tnum: Change ack policy to quorum." + $envC repmgr -ack quorum + eval rep_test $method $envC NULL $niter 0 0 0 + error_check_bad failure \ + [string length [find_event [$envC event_info] perm_failed]] 0 + + puts "\tRepmgr$tnum: Start site B, to provide acks needed by site C" + $envC event_info -clear + set envB [berkdb env -create -errpfx B -home $dirB -txn -rep -thread \ + -recover -verbose [list rep $rv]] + $envB rep_config {mgrelections off} + $envB repmgr -local [list localhost $portB] -start client + await_startup_done $envB + + eval rep_test $method $envC NULL $niter 0 0 0 + error_check_good failure \ + [string length [find_event [$envC event_info] perm_failed]] 0 + + $envB close + $envC close +} diff --git a/test/tcl/repmgr032.tcl b/test/tcl/repmgr032.tcl new file mode 100644 index 00000000..651008d6 --- /dev/null +++ b/test/tcl/repmgr032.tcl @@ -0,0 +1,158 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr032 +# TEST The (undocumented) AUTOROLLBACK config feature. + +proc repmgr032 { { tnum 032 } args } { + source ./include.tcl + global databases_in_memory + + if { $is_freebsd_test == 1 } { + puts "Skipping replication manager test on FreeBSD platform." + return + } + + # This test needs to have explicit control over whether/which databases + # are on disk versus in memory. + # + if { $databases_in_memory } { + puts "Skipping repmgr$tnum for databases_in_memory" + return + } + + set method "btree" + set args [convert_args $method $args] + foreach test_case {normal do_close nimdbs} { + puts "Repmgr$tnum: preventing auto-rollback, $test_case case" + repmgr032_sub $method $tnum $test_case $args + } +} + +proc repmgr032_sub { method tnum test_case largs } { + global testdir + global repfiles_in_memory + global rep_verbose + global verbose_type + + switch $test_case { + normal { + set do_close false + set nimdbs false + } + do_close { + set do_close true + set nimdbs false + } + nimdbs { + set do_close true + set nimdbs true + } + } + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on} " + } + + set repmemargs "" + if { $repfiles_in_memory } { + set repmemargs "-rep_inmem_files " + } + + env_cleanup $testdir + file mkdir [set dira $testdir/SITE_A] + file mkdir [set dirb $testdir/SITE_B] + file mkdir [set dirc $testdir/SITE_C] + foreach { porta portb portc } [available_ports 3] {} + + set common "-create -txn $verbargs $repmemargs \ + -rep -thread -event " + set common_mgr "-msgth 2" + + puts "\tRepmgr$tnum.a: Start sites." + set cmda "berkdb_env_noerr $common -errpfx SITE_A -home $dira" + set enva [eval $cmda] + $enva rep_config {mgrelections off} + eval $enva repmgr $common_mgr -ack allavailable \ + -local {[list localhost $porta]} -start master + + if { $nimdbs } { + set omethod [convert_method $method] + set db [eval {berkdb_open_noerr} -env $enva -auto_commit\ + -create -mode 0644 $omethod $largs {"" nim.db}] + set niter 10 + eval rep_test $method $enva $db $niter 0 0 0 $largs + $db close + } + + set cmdb "berkdb_env_noerr $common -errpfx SITE_B -home $dirb" + set envb [eval $cmdb] + $envb rep_config {mgrelections off} + eval $envb repmgr $common_mgr -start client \ + -local {[list localhost $portb]} -remote {[list localhost $porta]} + await_startup_done $envb + + set cmdc "berkdb_env_noerr $common -errpfx SITE_C -home $dirc" + set envc [eval $cmdc] + $envc rep_config {mgrelections off} + $envc rep_config {autorollback off} + eval $envc repmgr $common_mgr -start client \ + -local {[list localhost $portc]} -remote {[list localhost $porta]} + await_startup_done $envc + + puts "\tRepmgr$tnum.b: Run some transactions at master." + set niter 20 + eval rep_test $method $enva NULL $niter 0 0 0 $largs + + # Remember where we are in the log. Use a log cursor to read the last + # record in the log, then do one more transaction, then read the log end + # again. This gives us a lower and upper bound on where the sync point + # should be. + # + set logc [$enva log_cursor] + set lower [lindex [$logc get -last] 0] + eval rep_test $method $enva NULL 1 0 0 0 $largs + set upper [lindex [$logc get -last] 0] + $logc close + + puts "\tRepmgr$tnum.c: Shut down client B, then do a few more txns" + $envb close + eval rep_test $method $enva NULL $niter 0 0 0 $largs + + puts "\tRepmgr$tnum.d: Kill master, and restart client B as new master" + if { $do_close } { + $envc close + } + $enva close + # maybe wait for masterfailure event at env b? + set envb [eval $cmdb] + $envb rep_config {mgrelections off} + eval $envb repmgr $common_mgr -start master \ + -local {[list localhost $portb]} -remote {[list localhost $portc]} + + if { $do_close } { + set envc [eval $cmdc -recover] + $envc rep_config {autorollback off} + eval $envc repmgr $common_mgr -start client \ + -local {[list localhost $portc]} \ + -remote {[list localhost $portb]} + } + puts "\tRepmgr$tnum.e: Wait for WOULD_ROLLBACK event." + await_condition {[is_event_present $envc would_rollback]} + + set sync_point [lindex [find_event [$envc event_info] would_rollback] 1] + puts "\tRepmgr$tnum.f: Reported sync point is $sync_point" + + error_check_good lower_bound [$envb log_compare $lower $sync_point] -1 + + # The upper bound must be >= to the sync point, which means log_compare + # must *NOT* return a -1. The other possible values (0, 1) are OK. + # + error_check_bad upper_bound [$envb log_compare $upper $sync_point] -1 + + $envc close + $envb close + set be_quiet "" +} diff --git a/test/tcl/repmgr100.tcl b/test/tcl/repmgr100.tcl new file mode 100644 index 00000000..0664c568 --- /dev/null +++ b/test/tcl/repmgr100.tcl @@ -0,0 +1,108 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# + +# TEST repmgr100 +# TEST Basic test of repmgr's multi-process master support. +# TEST +# TEST Set up a simple 2-site group, create data and replicate it. +# TEST Add a second process at the master and have it write some +# TEST updates. It does not explicitly start repmgr (nor do any +# TEST replication configuration, for that matter). Its first +# TEST update triggers initiation of connections, and so it doesn't +# TEST get to the client without a log request. But later updates +# TEST should go directly. + +proc repmgr100 { } { + source ./include.tcl + global rep_verbose + global verbose_type + + set tnum "100" + puts "Repmgr$tnum: Basic repmgr multi-process master support." + set site_prog [setup_site_prog] + + env_cleanup $testdir + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set ports [available_ports 2] + set master_port [lindex $ports 0] + set client_port [lindex $ports 1] + + puts "\tRepmgr$tnum.a: Set up the master (on TCP port $master_port)." + set master [open "| $site_prog" "r+"] + fconfigure $master -buffering line + puts $master "home $masterdir" + make_dbconfig $masterdir \ + [list [list repmgr_site localhost $master_port db_local_site on] \ + "rep_set_config db_repmgr_conf_2site_strict off"] + puts $master "output $testdir/m1output" + puts $master "open_env" + puts $master "start master" + error_check_match start_master [gets $master] "*Successful*" + puts $master "open_db test.db" + puts $master "put myKey myValue" + + puts "\tRepmgr$tnum.b: Set up the client (on TCP port $client_port)." + set client [open "| $site_prog" "r+"] + fconfigure $client -buffering line + puts $client "home $clientdir" + puts $client "local $client_port" + make_dbconfig $clientdir \ + [list [list repmgr_site localhost $client_port db_local_site on] \ + [list repmgr_site localhost $master_port db_bootstrap_helper on] \ + "rep_set_config db_repmgr_conf_2site_strict off"] + puts $client "output $testdir/coutput" + puts $client "open_env" + puts $client "start client" + error_check_match start_client [gets $client] "*Successful*" + + puts "\tRepmgr$tnum.c: Wait for STARTUPDONE." + set clientenv [berkdb_env -home $clientdir] + await_startup_done $clientenv + + puts "\tRepmgr$tnum.d: Start a second process at master." + set m2 [open "| $site_prog" "r+"] + fconfigure $m2 -buffering line + puts $m2 "home $masterdir" + puts $m2 "output $testdir/m2output" + puts $m2 "open_env" + puts $m2 "open_db test.db" + puts $m2 "put sub1 abc" + puts $m2 "echo firstputted" + set sentinel [gets $m2] + error_check_good m2_firstputted $sentinel "firstputted" + puts $m2 "put sub2 xyz" + puts $m2 "put sub3 ijk" + puts $m2 "put sub4 pqr" + puts $m2 "echo putted" + set sentinel [gets $m2] + error_check_good m2_putted $sentinel "putted" + puts $master "put another record" + puts $master "put and again" + puts $master "echo m1putted" + set sentinel [gets $master] + error_check_good m1_putted $sentinel "m1putted" + + puts "\tRepmgr$tnum.e: Check that replicated data is visible at client." + puts $client "open_db test.db" + set expected {{myKey myValue} {sub1 abc} {sub2 xyz} {another record}} + verify_client_data $clientenv test.db $expected + + # make sure there weren't too many rerequests + puts "\tRepmgr$tnum.f: Check rerequest stats" + set pfs [stat_field $clientenv rep_stat "Log records requested"] + error_check_good rerequest_count [expr $pfs <= 1] 1 + + puts "\tRepmgr$tnum.g: Clean up." + $clientenv close + close $client + close $master + close $m2 +} diff --git a/test/tcl/repmgr101.tcl b/test/tcl/repmgr101.tcl new file mode 100644 index 00000000..7d0af34b --- /dev/null +++ b/test/tcl/repmgr101.tcl @@ -0,0 +1,134 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr101 +# TEST Repmgr support for multi-process master. +# TEST +# TEST Start two processes at the master. +# TEST Add a client site (not previously known to the master +# TEST processes), and make sure +# TEST both master processes connect to it. + +proc repmgr101 { } { + source ./include.tcl + + set tnum "101" + puts "Repmgr$tnum: Two master processes both connect to a client." + set site_prog [setup_site_prog] + + env_cleanup $testdir + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set ports [available_ports 2] + set master_port [lindex $ports 0] + set client_port [lindex $ports 1] + + puts "\tRepmgr$tnum.a: Set up the master (on TCP port $master_port)." + set master [open "| $site_prog" "r+"] + fconfigure $master -buffering line + puts $master "home $masterdir" + make_dbconfig $masterdir \ + [list [list repmgr_site localhost $master_port db_local_site on] \ + "rep_set_config db_repmgr_conf_2site_strict off"] + puts $master "output $testdir/m1output" + puts $master "open_env" + puts $master "start master" + set ignored [gets $master] + puts $master "open_db test.db" + puts $master "put myKey myValue" + + # sync. + puts $master "echo setup" + set sentinel [gets $master] + error_check_good echo_setup $sentinel "setup" + + puts "\tRepmgr$tnum.b: Start a second process at master." + set m2 [open "| $site_prog" "r+"] + fconfigure $m2 -buffering line + puts $m2 "home $masterdir" + puts $m2 "output $testdir/m2output" + puts $m2 "open_env" + puts $m2 "open_db test.db" + puts $m2 "put sub1 abc" + puts $m2 "echo firstputted" + set sentinel [gets $m2] + error_check_good m2_firstputted $sentinel "firstputted" + + puts "\tRepmgr$tnum.c: Set up the client (on TCP port $client_port)." + set client [open "| $site_prog" "r+"] + fconfigure $client -buffering line + puts $client "home $clientdir" + make_dbconfig $clientdir \ + [list [list repmgr_site localhost $client_port db_local_site on] \ + [list repmgr_site localhost $master_port db_bootstrap_helper on] \ + "rep_set_config db_repmgr_conf_2site_strict off"] + puts $client "output $testdir/coutput" + puts $client "open_env" + puts $client "start client" + error_check_match start_client [gets $client] "*Successful*" + + puts "\tRepmgr$tnum.d: Wait for STARTUPDONE." + set clientenv [berkdb_env -home $clientdir] + await_startup_done $clientenv + + # Initially there should be no rerequests. + set pfs1 [stat_field $clientenv rep_stat "Log records requested"] + error_check_good rerequest_count $pfs1 0 + + # At this point we know that the master (in its main process) knows + # about the client, so the client address should be in the shared + # region. The second master process will discover the address as a + # result of being asked to send out the log records for the following + # transaction. At that point, it will initiate a connection attempt, + # though without blocking the commit() call of the transaction. This + # means that this first transaction may or may not (probably won't) get + # transmitted directly (as a "live" log record) to the client; it will + # have to be "re-requested". However, we can then wait for the + # connection to be established, and thereafter all transactions should + # be transmitted live; and we know that they must have arrived at the + # client by the time the commit() returns, because of the ack policy. + # + puts $m2 "put sub2 xyz" + set count 0 + puts $m2 "is_connected $client_port" + while {! [gets $m2]} { + if {[incr count] > 30} { + error "FAIL: couldn't connect within 30 seconds" + } + tclsleep 1 + puts $m2 "is_connected $client_port" + } + + puts $m2 "put sub3 ijk" + puts $m2 "put sub4 pqr" + puts $m2 "echo putted" + set sentinel [gets $m2] + error_check_good m2_putted $sentinel "putted" + puts $master "put another record" + puts $master "put and again" + puts $master "echo m1putted" + set sentinel [gets $master] + error_check_good m1_putted $sentinel "m1putted" + + puts "\tRepmgr$tnum.e: Check that replicated data is visible at client." + puts $client "open_db test.db" + set expected {{myKey myValue} {sub1 abc} {sub2 xyz} {another record}} + verify_client_data $clientenv test.db $expected + + # make sure there weren't too many rerequests + puts "\tRepmgr$tnum.f: Check rerequest stats" + set pfs [stat_field $clientenv rep_stat "Log records requested"] + error_check_good rerequest_count [expr $pfs <= 1] 1 + + puts "\tRepmgr$tnum.g: Clean up." + $clientenv close + close $client + close $master + close $m2 +} diff --git a/test/tcl/repmgr102.tcl b/test/tcl/repmgr102.tcl new file mode 100644 index 00000000..e55c16f5 --- /dev/null +++ b/test/tcl/repmgr102.tcl @@ -0,0 +1,144 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr102 +# TEST Ensuring exactly one listener process. +# TEST +# TEST Start a repmgr process with a listener. +# TEST Start a second process, and see that it does not become the listener. +# TEST Shut down the first process (gracefully). Now a second process should +# TEST become listener. +# TEST Kill the listener process abruptly. Running failchk should show that +# TEST recovery is necessary. Run recovery and start a clean listener. + +proc repmgr102 { } { + source ./include.tcl + source $test_path/testutils.tcl + + + set tnum "102" + + # QNX does not support fork() in a multi-threaded environment. + if { $is_qnx_test } { + puts "Skipping repmgr$tnum on QNX." + return + } + + puts "Repmgr$tnum: Ensuring exactly one listener process." + set site_prog [setup_site_prog] + + env_cleanup $testdir + + set masterdir $testdir/MASTERDIR + + file mkdir $masterdir + + set ports [available_ports 1] + set master_port [lindex $ports 0] + + make_dbconfig $masterdir \ + [list [list repmgr_site localhost $master_port db_local_site on] \ + "rep_set_config db_repmgr_conf_2site_strict off"] + set masterenv [berkdb_env -rep -txn -thread -home $masterdir \ + -isalive my_isalive -create] + $masterenv close + + puts "\tRepmgr$tnum.a: Set up the master (on TCP port $master_port)." + set master [open "| $site_prog" "r+"] + fconfigure $master -buffering line + puts $master "home $masterdir" + puts $master "output $testdir/m1output" + puts $master "open_env" + puts $master "start master" + error_check_match ok1 [gets $master] "*Successful*" + + # sync. + puts $master "echo setup" + set sentinel [gets $master] + error_check_good echo_setup $sentinel "setup" + + puts "\tRepmgr$tnum.b: Start a second process at master." + set m2 [open "| $site_prog" "r+"] + fconfigure $m2 -buffering line + puts $m2 "home $masterdir" + puts $m2 "output $testdir/m2output" + puts $m2 "open_env" + puts $m2 "start master" + set ret [gets $m2] + error_check_match ignored "$ret" "*DB_REP_IGNORE*" + + puts $m2 "echo started" + set sentinel [gets $m2] + error_check_good started $sentinel "started" + + close $m2 + close $master + + # Hmm, actually it'd probably be better to send them an "exit" command, + # and then read until we get an EOF error. That we we're sure they've + # had a chance to finish the close operation. This is a recurring + # theme, doing stuff synchronously. There should be a way to wrap this + # up to make it the default behavior. + + puts "\tRepmgr$tnum.c: Restart 2nd process, to act as listener this time" + set m2 [open "| $site_prog" "r+"] + fconfigure $m2 -buffering line + puts $m2 "home $masterdir" + puts $m2 "output $testdir/m2output2" + puts $m2 "open_env" + puts $m2 "start master" + set answer [gets $m2] + error_check_match ok2 "$answer" "*Successful*" + + puts "\tRepmgr$tnum.d: Clean up." + close $m2 + + puts "\tRepmgr$tnum.e: Start main process." + set master [open "| $site_prog" "r+"] + fconfigure $master -buffering line + puts $master "home $masterdir" + puts $master "output $testdir/m1output3" + puts $master "open_env" + puts $master "start master" + set answer [gets $master] + error_check_match ok3 $answer "*Successful*" + + # This seems to require $KILL; tclkill does not work. + puts "\tRepmgr$tnum.f: Kill process [pid $master] without clean-up." + exec $KILL [pid $master] + catch {close $master} + + # In realistic, correct operation, the application should have called + # failchk before trying to restart a new process. But let's just prove + # to ourselves that it's actually doing something. This first try + # should fail. + # + puts "\tRepmgr$tnum.g: Start take-over process without failchk." + set m2 [open "| $site_prog" "r+"] + fconfigure $m2 -buffering line + puts $m2 "home $masterdir" + puts $m2 "output $testdir/m2output3" + puts $m2 "open_env" + puts $m2 "start master" + set answer [gets $m2] + error_check_match ignored3 $answer "*DB_REP_IGNORE*" + close $m2 + + set masterenv [berkdb_env -thread -home $masterdir -isalive my_isalive] + $masterenv failchk + + # This time it should work. + puts "\tRepmgr$tnum.h: Start take-over process after failchk." + set m2 [open "| $site_prog" "r+"] + fconfigure $m2 -buffering line + puts $m2 "home $masterdir" + puts $m2 "output $testdir/m2output4" + puts $m2 "open_env" + puts $m2 "start master" + set answer [gets $m2] + error_check_match ok4 $answer "*Successful*" + + close $m2 + $masterenv close +} diff --git a/test/tcl/repmgr105.tcl b/test/tcl/repmgr105.tcl new file mode 100644 index 00000000..d486786e --- /dev/null +++ b/test/tcl/repmgr105.tcl @@ -0,0 +1,218 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr105 +# TEST Repmgr recognition of peer setting, across processes. +# TEST +# TEST Set up a master and two clients, synchronized with some data. +# TEST Add a new client, configured to use c2c sync with one of the original +# TEST clients. Check stats to make sure the correct c2c peer was used. + +proc repmgr105 { } { + repmgr105_sub position_chg + repmgr105_sub chg_site + repmgr105_sub chg_after_open + repmgr105_sub set_peer_after_open +} + +proc repmgr105_sub { config } { + source ./include.tcl + + set tnum "105" + puts "Repmgr$tnum: Repmgr peer, with \"$config\" configuration." + set site_prog [setup_site_prog] + + env_cleanup $testdir + + set ports [available_ports 4] + set mport [lindex $ports 0] + set portA [lindex $ports 1] + set portB [lindex $ports 2] + + file mkdir [set masterdir $testdir/MASTER] + file mkdir $testdir/A + file mkdir $testdir/B + file mkdir $testdir/C + + puts "\tRepmgr$tnum.a: Start master, write some data." + make_dbconfig $masterdir {} + set cmds { + "home $masterdir" + "local $mport" + "output $testdir/moutput" + "open_env" + "start master" + "open_db test.db" + "put key1 value1" + } + set m [open_site_prog [subst $cmds]] + + puts "\tRepmgr$tnum.b:\ + Start initial two clients; wait for them to synchronize." + # Allowing both A and B to start at the same time, and synchronize + # concurrently would make sense. But it causes very slow performance on + # Windows. Since it's really only client C that's under test here, this + # detail doesn't matter. + # + make_dbconfig $testdir/A {} + set a [open_site_prog [list \ + "home $testdir/A" \ + "local $portA" \ + "output $testdir/aoutput" \ + "remote localhost $mport" \ + "open_env" \ + "start client"]] + set env [berkdb_env -home $testdir/A] + await_startup_done $env + $env close + + make_dbconfig $testdir/B {} + set b [open_site_prog [list \ + "home $testdir/B" \ + "local $portB" \ + "output $testdir/boutput" \ + "remote localhost $mport" \ + "open_env" \ + "start client"]] + set env [berkdb_env -home $testdir/B] + await_startup_done $env + $env close + + # Client C is the one whose behavior is being tested. It has two + # processes. "c" will be the main replication process, and "c2" the + # subordinate process. The initial configuration commands used to set + # up the two processes vary slightly with each test. The variable + # $config contains the name of the proc which will fill out the + # configuration information appropriately for each test variant. + # + puts "\tRepmgr$tnum.c: Start client under test." + make_dbconfig $testdir/C {} + + set c2 [list \ + "home $testdir/C" \ + "local [lindex $ports 3]" \ + "output $testdir/c2output" \ + "open_env"] + set c [list \ + "home $testdir/C" \ + "local [lindex $ports 3]" \ + "output $testdir/coutput" \ + "open_env"] + set lists [repmgr105_$config $c2 $c] + set c2 [lindex $lists 0] + set c [lindex $lists 1] + + # Ugly hack: in this one case, the order of opening the two client + # processes has to be reversed. + # + if {$config == "chg_after_open"} { + set c [open_site_prog $c] + set c2 [open_site_prog $c2] + } else { + set c2 [open_site_prog $c2] + set c [open_site_prog $c] + } + puts $c "start client" + gets $c + + puts "\tRepmgr$tnum.d: Wait for startup-done at test client." + set env [berkdb_env -home $testdir/C] + await_startup_done $env 27 + $env close + + puts "\tRepmgr$tnum.e: Check stats to make sure proper peer was used." + set env [berkdb_env -home $testdir/A] + set reqs [stat_field $env rep_stat "Client service requests"] + error_check_good used_client_A [expr {$reqs > 0}] 1 + $env close + set env [berkdb_env -home $testdir/B] + set reqs [stat_field $env rep_stat "Client service requests"] + error_check_good didnt_use_b [expr {$reqs == 0}] 1 + $env close + + puts "\tRepmgr$tnum.f: Clean up." + close $c2 + close $c + close $b + close $a + close $m +} + +# Scenario 1: client A is the peer; C2 sets B, A; C sets A. For C, this means +# no peer change, but its position in the list changes, requiring some tricky +# shuffling. +# +proc repmgr105_position_chg { c2 c } { + set remote_config [uplevel 1 {list \ + "remote localhost $mport" \ + "remote localhost $portB" \ + "remote -p localhost $portA"}] + set i [lsearch -exact $c2 "open_env"] + + # It should be found, in the middle somewhere, or this will break. + set c2 "[lrange $c2 0 [expr $i - 1]] $remote_config [lrange $c2 $i end]" + + set remote_config [uplevel 1 {list \ + "remote -p localhost $portA" \ + "remote localhost $mport"}] + set i [lsearch -exact $c "open_env"] + set c "[lrange $c 0 [expr $i - 1]] $remote_config [lrange $c $i end]" + + return [list $c2 $c] +} + +# C2 first sets the peer as B, but then C comes along and changes it to A. +# +proc repmgr105_chg_site { c2 c } { + set remote_config [uplevel 1 {list \ + "remote localhost $mport" \ + "remote -p localhost $portB"}] + set i [lsearch -exact $c2 "open_env"] + + # It should be found, in the middle somewhere, or this will break. + set c2 "[lrange $c2 0 [expr $i - 1]] $remote_config [lrange $c2 $i end]" + + set remote_config [uplevel 1 {list \ + "remote localhost $portB" \ + "remote -p localhost $portA" \ + "remote localhost $mport"}] + set i [lsearch -exact $c "open_env"] + set c "[lrange $c 0 [expr $i - 1]] $remote_config [lrange $c $i end]" + + return [list $c2 $c] +} + +# C first sets B as its peer, and creates the env. Then C2 comes along and +# changes it to A. C will have to learn of the change on the fly, rather than +# at env open/join time. Even though the actual order of process creation will +# be reversed (by the caller), we still conform to the convention of putting C2 +# first, and then C, in the ordered list. +# +proc repmgr105_chg_after_open { c2 c } { + set remote_config [uplevel 1 {list \ + "remote localhost $mport" \ + "remote localhost $portB" \ + "remote -p localhost $portA"}] + set i [lsearch -exact $c2 "open_env"] + + # It should be found, in the middle somewhere, or this will break. + set c2 "[lrange $c2 0 [expr $i - 1]] $remote_config [lrange $c2 $i end]" + + set remote_config [uplevel 1 {list \ + "remote -p localhost $portB" \ + "remote localhost $mport"}] + set i [lsearch -exact $c "open_env"] + set c "[lrange $c 0 [expr $i - 1]] $remote_config [lrange $c $i end]" + + return [list $c2 $c] +} + +# Nothing especially exotic here, except this exercises a code path where I +# previously discovered a bug. +# +proc repmgr105_set_peer_after_open { c2 c } { + set remote_config [uplevel 1 {subst "remote -p localhost $portA"}] + lappend c $remote_config + return [list $c2 $c] +} diff --git a/test/tcl/repmgr106.tcl b/test/tcl/repmgr106.tcl new file mode 100644 index 00000000..bdd60c24 --- /dev/null +++ b/test/tcl/repmgr106.tcl @@ -0,0 +1,163 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr106 +# TEST Simple smoke test for repmgr elections with multi-process envs. + +proc repmgr106 { } { + source ./include.tcl + + set tnum "106" + puts "Repmgr$tnum:\ + Smoke test for repmgr elections with multi-process envs." + + env_cleanup $testdir + + # Assign values for each port variable from list contents. + foreach {portA portB portC} [available_ports 3] {} + + set timeouts { + {rep_set_timeout DB_REP_ELECTION_RETRY 2000000} + {rep_set_timeout DB_REP_ELECTION_TIMEOUT 1000000} + } + + # First just create the group. + file mkdir $testdir/A + make_dbconfig $testdir/A \ + [linsert $timeouts 0 [list repmgr_site localhost $portA db_local_site on]] + file mkdir $testdir/B + make_dbconfig $testdir/B \ + [linsert $timeouts 0 [list repmgr_site localhost $portB db_local_site on] \ + [list repmgr_site localhost $portA db_bootstrap_helper on]] + file mkdir $testdir/C + make_dbconfig $testdir/C \ + [linsert $timeouts 0 [list repmgr_site localhost $portC db_local_site on] \ + [list repmgr_site localhost $portA db_bootstrap_helper on]] + set cmds { + {home $testdir/A} + {open_env} + {start master} + } + set cmds [subst $cmds] + set a [open_site_prog [linsert $cmds 1 "output $testdir/aoutput"]] + set cmds { + {home $testdir/B} + {open_env} + {start client} + } + set cmds [subst $cmds] + set b [open_site_prog [linsert $cmds 1 "output $testdir/boutput"]] + set clientenv [berkdb_env -home $testdir/B] + await_startup_done $clientenv + $clientenv close + set cmds { + {home $testdir/C} + {open_env} + {start client} + } + set cmds [subst $cmds] + set c [open_site_prog [linsert $cmds 1 "output $testdir/coutput"]] + set clientenv [berkdb_env -home $testdir/C] + await_startup_done $clientenv + $clientenv close + close $c + close $b + close $a + + puts "\tRepmgr$tnum.a: Start 3 sites (with 2 processes each)." + set cmds { + {home $testdir/A} + {open_env} + {start election} + } + set cmds [subst $cmds] + set a1 [open_site_prog [linsert $cmds 1 "output $testdir/a1output"]] + set a2 [open_site_prog [linsert $cmds 1 "output $testdir/a2output"]] + + set cmds { + {home $testdir/B} + {open_env} + {start election} + } + set cmds [subst $cmds] + set b1 [open_site_prog [linsert $cmds 1 "output $testdir/b1output"]] + set b2 [open_site_prog [linsert $cmds 1 "output $testdir/b2output"]] + + set cmds { + {home $testdir/C} + {open_env} + {start election} + } + set cmds [subst $cmds] + set c1 [open_site_prog [linsert $cmds 1 "output $testdir/c1output"]] + set c2 [open_site_prog [linsert $cmds 1 "output $testdir/c2output"]] + + puts "\tRepmgr$tnum.b: Wait for an election to choose initial master." + set a [berkdb_env -home $testdir/A] + set b [berkdb_env -home $testdir/B] + set c [berkdb_env -home $testdir/C] + set sites "$a $b $c" + set menv [repmgr106_await_election $sites] + set i [lsearch -exact $sites $menv] + error_check_bad notfound $i -1 + set site_names "abc" + set m [string range $site_names $i $i] + puts "\tRepmgr$tnum.c: (site $m is master)." + + puts "\tRepmgr$tnum.d: Wait for other two sites to sync up." + set clients [lreplace $sites $i $i] + set site_names [string replace $site_names $i $i] + await_startup_done [lindex $clients 0] + await_startup_done [lindex $clients 1] + + set m1 [subst $${m}1] + set m2 [subst $${m}2] + + puts $m2 "open_db test.db" + puts $m2 "put key1 value1" + puts $m2 "echo done" + gets $m2 + + puts "\tRepmgr$tnum.e:\ + Shut down master, wait for survivors to elect new master." + $menv close + close $m1 + close $m2 + + set menv [repmgr106_await_election $clients] + set i [lsearch -exact $clients $menv] + error_check_bad notfound2 $i -1 + set m [string range $site_names $i $i] + puts "\tRepmgr$tnum.f: (site $m is new master)." + + puts "\tRepmgr$tnum.g: Wait for remaining client to sync to new master." + set client [lreplace $clients $i $i] + await_condition {[stat_field $client rep_stat "Master changes"] == 3} + await_startup_done $client + + puts "\tRepmgr$tnum.h: Clean up." + $client close + $menv close + + set c [string range $site_names 0 0] + close [subst $${c}1] + close [subst $${c}2] + set c [string range $site_names 1 1] + close [subst $${c}1] + close [subst $${c}2] +} + +proc repmgr106_await_election { env_list } { + set cond { + foreach e $env_list { + if {[stat_field $e rep_stat "Role"] == "master"} { + set answer $e + break + } + } + expr {[info exists answer]} + } + await_condition {[eval $cond]} 20 + return $answer +} diff --git a/test/tcl/repmgr107.tcl b/test/tcl/repmgr107.tcl new file mode 100644 index 00000000..5775ded3 --- /dev/null +++ b/test/tcl/repmgr107.tcl @@ -0,0 +1,131 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr107 +# TEST Repmgr combined with replication-unaware process at master. + +proc repmgr107 { } { + source ./include.tcl + + set tnum "107" + puts "Repmgr$tnum: Replication-unaware process at master." + + env_cleanup $testdir + set ports [available_ports 2] + foreach {mport cport} $ports {} + + file mkdir [set mdir $testdir/MASTER] + file mkdir [set cdir $testdir/CLIENT] + + # Using heartbeats because this test relies on the rerequest + # processing based on heartbeats. + # + puts "\tRepmgr$tnum.a: Set up simple master/client pair." + set dbconfig { + {rep_set_timeout DB_REP_ACK_TIMEOUT 2000000} + {rep_set_timeout DB_REP_HEARTBEAT_SEND 500000} + {rep_set_timeout DB_REP_HEARTBEAT_MONITOR 1100000} + } + make_dbconfig $mdir \ + [linsert $dbconfig 0 [list repmgr_site localhost $mport db_local_site on]] + set cmds { + "home $mdir" + "output $testdir/moutput" + "open_env" + "start master" + "open_db test.db" + "put k1 v1" + "put k2 v2" + } + set m [open_site_prog [subst $cmds]] + + make_dbconfig $cdir \ + [linsert $dbconfig 0 \ + [list repmgr_site localhost $cport db_local_site on] \ + [list repmgr_site localhost $mport db_bootstrap_helper on]] + set cmds { + "home $cdir" + "output $testdir/coutput" + "open_env" + "start client" + } + set c [open_site_prog [subst $cmds]] + + puts "\tRepmgr$tnum.b: Wait for client to finish start-up." + set cenv [berkdb_env -home $cdir] + await_startup_done $cenv + + puts "\tRepmgr$tnum.c: Run checkpoint in a separate process." + exec $util_path/db_checkpoint -h $mdir -1 + + # Find out where the checkpoint record is. + # + set menv [berkdb_env -home $mdir] + set curs [$menv log_cursor] + set ckp_lsn1 [lindex [$curs get -last] 0] + + puts "\tRepmgr$tnum.d: Write more log records at master." + puts $m "put k3 v3" + puts $m "put k4 v4" + puts $m "echo done" + gets $m + + puts "\tRepmgr$tnum.e: Do another checkpoint." + exec $util_path/db_checkpoint -h $mdir -1 + set ckp_lsn2 [lindex [$curs get -last] 0] + + error_check_bad same_ckp_lsn $ckp_lsn2 $ckp_lsn1 + + # db_checkpoint could have produced perm failures, because it doesn't + # start repmgr explicitly. Instead repmgr starts up automatically, on + # the fly, by trapping the first transmitted log record that gets sent. + # This causes a connection to be initiated, but that may take some time, + # too much time for that first log record to be transmitted. This means + # the client will have to request retransmission of this log record + # "gap". + # + # So, pause for a moment, to let replication's gap measurement algorithm + # expire, and then send one more transaction from the master, so that + # the client is forced to request the gap if necessary. + # + set perm_failures "Acknowledgement failures" + set pfs1 [stat_field $menv repmgr_stat $perm_failures] + tclsleep 2 + + puts $m "put k5 v5" + puts $m "echo done" + gets $m + set pfs2 [stat_field $menv repmgr_stat $perm_failures] + + # The last "put" operation shouldn't have resulted in any additional + # perm failures. + # + error_check_good perm_fail $pfs2 $pfs1 + + # Pause again to allow time for the request for retransmission to be + # fulfilled. + # + tclsleep 2 + + # At this point that both checkpoint operations should have been + # successfully replicated. Examine the client-side log at the expected + # LSNs. + # + puts "\tRepmgr$tnum.f: Examine client log." + foreach lsn [list $ckp_lsn1 $ckp_lsn2] { + set lsnarg [join $lsn /] + set listing [exec $util_path/db_printlog \ + -h $cdir -b $lsnarg -e $lsnarg] + + set first_line [lindex [split $listing "\n"] 0] + error_check_good found_ckp \ + [string match "*__txn_ckp*" $first_line] 1 + } + + $curs close + $cenv close + $menv close + close $c + close $m +} diff --git a/test/tcl/repmgr108.tcl b/test/tcl/repmgr108.tcl new file mode 100644 index 00000000..7ec7c839 --- /dev/null +++ b/test/tcl/repmgr108.tcl @@ -0,0 +1,91 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr108 +# TEST Subordinate connections and processes should not trigger elections. + +proc repmgr108 { } { + source ./include.tcl + + set tnum "108" + puts "Repmgr$tnum: Subordinate\ + connections and processes should not trigger elections." + + env_cleanup $testdir + + foreach {mport cport} [available_ports 2] {} + file mkdir [set mdir $testdir/MASTER] + file mkdir [set cdir $testdir/CLIENT] + + make_dbconfig $mdir \ + [list [list repmgr_site localhost $mport db_local_site on]] + make_dbconfig $cdir \ + [list [list repmgr_site localhost $cport db_local_site on] \ + [list repmgr_site localhost $mport db_bootstrap_helper on]] + + puts "\tRepmgr$tnum.a: Set up a pair of sites, two processes each." + set cmds { + "home $mdir" + "output $testdir/m1output" + "open_env" + "start master" + } + set m1 [open_site_prog [subst $cmds]] + + set cmds { + "home $mdir" + "output $testdir/m2output" + "open_env" + "start master" + } + set m2 [open_site_prog [subst $cmds]] + + set cmds { + "home $cdir" + "output $testdir/c1output" + "open_env" + "start client" + } + set c1 [open_site_prog [subst $cmds]] + + set cmds { + "home $cdir" + "output $testdir/c2output" + "open_env" + "start client" + } + set c2 [open_site_prog [subst $cmds]] + + set cenv [berkdb_env -home $cdir] + await_startup_done $cenv + + puts "\tRepmgr$tnum.b: Stop master's subordinate process (pause)." + close $m2 + + # Pause to let client notice the connection loss. + tclsleep 3 + + # The client main process is still running, but it shouldn't care about + # a connection loss to the master's subordinate process. + + puts "\tRepmgr$tnum.c:\ + Stop client's main process, then master's main process (pause)." + close $c1 + tclsleep 2 + close $m1 + tclsleep 3 + + # If the client main process were still running, it would have reacted + # to the loss of the master by calling for an election. However, with + # only the client subordinate process still running, he cannot call for + # an election. So, we should see no elections ever having been + # started. + # + set election_count [stat_field $cenv rep_stat "Elections held"] + puts "\tRepmgr$tnum.d: Check election count ($election_count)." + error_check_good no_elections $election_count 0 + + $cenv close + close $c2 +} diff --git a/test/tcl/repmgr109.tcl b/test/tcl/repmgr109.tcl new file mode 100644 index 00000000..cbb73567 --- /dev/null +++ b/test/tcl/repmgr109.tcl @@ -0,0 +1,212 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr109 +# TEST Test repmgr's internal juggling of peer EID's. +# TEST +# TEST Set up master and 2 clients, A and B. +# TEST Add a third client (C), with two processes. +# TEST The first process will be configured to know about A. +# TEST The second process will know about B, and set that as peer, +# TEST but when it joins the env site B will have to be shuffled +# TEST into a later position in the list, because A is already first. + +# This whole test is highly dependent upon the internal implementation structure +# of repmgr's multi-process support. If that implementation changes, this test +# may become irrelevant, irrational, and inconsequential. If that happens, it +# makes sense to simply discard this test. + +proc repmgr109 { } { + foreach initial_action {true false} { + foreach while_active {true false} { + repmgr109_sub $initial_action $while_active + } + } +} + +proc repmgr109_sub { {a_too false} {while_active true} } { + source ./include.tcl + + if {$a_too} { + set part1 "shuffle with peer reassignment" + } else { + set part1 "shuffle" + } + if {$while_active} { + set part2 "while active" + } else { + set part2 "while not active" + } + set tnum "109" + puts "Repmgr$tnum: ($part1, then peer change $part2)" + + env_cleanup $testdir + foreach {mport aport bport cport} [available_ports 4] {} + file mkdir [set dirm $testdir/M] + file mkdir [set dira $testdir/A] + file mkdir [set dirb $testdir/B] + file mkdir [set dirc $testdir/C] + + set dbc {{repmgr_set_ack_policy DB_REPMGR_ACKS_ALL}} + make_dbconfig $dirm $dbc + make_dbconfig $dira $dbc + make_dbconfig $dirb $dbc + make_dbconfig $dirc $dbc + + puts "\tRepmgr$tnum.a: Create a master and first two clients." + set cmds { + "home $dirm" + "local $mport" + "output $testdir/moutput" + "open_env" + "start master" + } + set m [open_site_prog [subst $cmds]] + + set cmds { + "home $dira" + "local $aport" + "output $testdir/aoutput" + "remote localhost $mport" + "open_env" + "start client" + } + set a [open_site_prog [subst $cmds]] + + set cmds { + "home $dirb" + "local $bport" + "output $testdir/boutput" + "remote localhost $mport" + "open_env" + "start client" + } + set b [open_site_prog [subst $cmds]] + + set aenv [berkdb_env -home $dira] + await_startup_done $aenv + set benv [berkdb_env -home $dirb] + await_startup_done $benv + + # Now it gets interesting. + puts "\tRepmgr$tnum.b: Create client C, with two processes." + if {$a_too} { + set peer_flag "-p" + } else { + set peer_flag "" + } + set cmds { + "home $dirc" + "local $cport" + "output $testdir/c1output" + "remote $peer_flag localhost $aport" + "remote localhost $mport" + "open_env" + } + set c1 [open_site_prog [subst $cmds]] + + set cmds { + "home $dirc" + "local $cport" + "output $testdir/c2output" + "remote -p localhost $bport" + "remote localhost $aport" + "open_env" + } + set c2 [open_site_prog [subst $cmds]] + + puts $c1 "start client" + gets $c1 + set cenv [berkdb_env -home $dirc] + await_startup_done $cenv + + puts "\tRepmgr$tnum.c: Check resulting statistics." + # Make sure we used B, not A, as the c2c peer. + set requests_at_A [repmgr109_get_request_count $aenv] + set requests_at_B [repmgr109_get_request_count $benv] + error_check_good no_requests_at_A $requests_at_A 0 + error_check_bad some_requests_at_B $requests_at_B 0 + + # Check that site list order is what we expect. + set sl [$cenv repmgr_site_list] + error_check_good site_list [lindex $sl 0 2] $aport + error_check_good site_list [lindex $sl 1 2] $mport + error_check_good site_list [lindex $sl 2 2] $bport + + + # Give client C a reason to send another request: shut it down, and + # create some new transactions at the master. + # + puts $c2 "exit" + gets $c2 + close $c2 + puts $c1 "exit" + gets $c1 + close $c1 + + puts $m "open_db test.db" + puts $m "put k1 v1" + puts $m "put k2 v2" + puts $m "echo done" + gets $m + + # Change peer setting at C. + # + puts "\tRepmgr$tnum.d: Start client C again." + if { $while_active } { + set cmds { + "home $dirc" + "output $testdir/c1output2" + "open_env" + "remote localhost $bport" + "remote -p localhost $aport" + "start client" + } + } else { + set cmds { + "home $dirc" + "output $testdir/c1output2" + "remote localhost $bport" + "remote -p localhost $aport" + "open_env" + "start client" + } + } + set c [open_site_prog [subst $cmds]] + + # Wait for restarted client to catch up with master. + set menv [berkdb_env -home $dirm] + set seq 0 + set cond { + incr seq + puts $m "put newkey$seq newdata$seq" + puts $m "echo done" + gets $m + set log_end [next_expected_lsn $menv] + set client_log_end [next_expected_lsn $cenv] + expr [string compare $client_log_end $log_end] == 0 + } + await_condition {[eval $cond]} + + # Make sure client B has not serviced any more requests, and that + # instead now client A has serviced some. + + error_check_good no_addl_reqs \ + [repmgr109_get_request_count $benv] $requests_at_B + error_check_bad some_requests_at_A [repmgr109_get_request_count $aenv] 0 + + $cenv close + $benv close + $aenv close + $menv close + + close $c + close $a + close $b + close $m +} + +proc repmgr109_get_request_count { env } { + stat_field $env rep_stat "Client service requests" +} diff --git a/test/tcl/repmgr110.tcl b/test/tcl/repmgr110.tcl new file mode 100644 index 00000000..0912e44d --- /dev/null +++ b/test/tcl/repmgr110.tcl @@ -0,0 +1,197 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr110 +# TEST Multi-process repmgr start-up policies. +# + +proc repmgr110 { } { + source ./include.tcl + + set tnum "110" + puts "Repmgr$tnum: repmgr multi-process start policies." + + env_cleanup $testdir + + file mkdir [set dira $testdir/A] + file mkdir [set dirb $testdir/B] + file mkdir [set dirc $testdir/C] + + set conf { + {rep_set_timeout DB_REP_ELECTION_RETRY 3000000} + } + make_dbconfig $dira $conf + make_dbconfig $dirb $conf + make_dbconfig $dirc $conf + foreach {aport bport cport} [available_ports 3] {} + + puts "\tRepmgr$tnum.a: Create a master and client." + set cmds { + "home $dira" + "local $aport" + "output $testdir/a1output1" + "open_env" + "start master" + } + set a [open_site_prog [subst $cmds]] + + set cmds { + "home $dirb" + "local $bport" + "output $testdir/boutput1" + "remote localhost $aport" + "open_env" + "start client" + } + set b [open_site_prog [subst $cmds]] + + puts "\tRepmgr$tnum.b: Wait for start-up done at client." + set benv [berkdb_env -home $dirb] + await_startup_done $benv + + puts $a "open_db test.db" + puts $a "put key1 val1" + puts $a "echo done" + gets $a + + # + # 1. shutting down client and restarting as client should do nothing + # + puts "\tRepmgr$tnum.c: Shut down client, restart as client." + set elections0 [stat_field $benv rep_stat "Elections held"] + set aenv [berkdb_env -home $dira] + set requests0 [stat_field $aenv rep_stat "Client service requests"] + puts $b "exit" + gets $b + error_check_good eof1 [eof $b] 1 + close $b + set cmds { + "home $dirb" + "local $bport" + "output $testdir/boutput1" + "remote localhost $aport" + "open_env" + "start election" + } + set b [open_site_prog [subst $cmds]] + error_check_good already_startedup \ + [stat_field $benv rep_stat "Startup complete"] 1 + puts "\tRepmgr$tnum.d: Pause 20 seconds to check for start-up activity" + tclsleep 20 + error_check_good no_more_requests \ + [stat_field $aenv rep_stat "Client service requests"] $requests0 + error_check_good no_more_requests \ + [stat_field $benv rep_stat "Elections held"] $elections0 + + # + # 2. Start policy should be ignored if there's already a listener + # running in a separate process. + # + + # start a second process at master. Even though it specifies "election" + # as its start policy, the fact that a listener is already running + # should force it to continue as master (IMHO). + + puts "\tRepmgr$tnum.e: Second master process accepts existing role" + set cmds { + "home $dira" + "local $aport" + "output $testdir/a2output1" + "open_env" + "start election" + } + set a2 [open_site_prog [subst $cmds]] + + # Make sure we still seem to be master, by checking stats, and by trying + # to write a new transaction. + # + error_check_good still_master \ + [stat_field $aenv rep_stat "Role"] "master" + + puts $a2 "open_db test.db" + puts $a2 "put key2 val2" + puts $a2 "echo done" + gets $a2 + + # + # 3. Specifying MASTER start policy results in rep_start(MASTER), no + # matter what happened previously. + # + puts "\tRepmgr$tnum.f: Restart master as master." + + puts $a "exit" + gets $a + error_check_good eof2 [eof $a] 1 + close $a + puts $a2 "exit" + gets $a2 + error_check_good eof3 [eof $a2] 1 + close $a2 + + set initial_gen [stat_field $aenv rep_stat "Generation number"] + set cmds { + "home $dira" + "local $aport" + "output $testdir/a2output2" + "remote localhost $bport" + "open_env" + "start master" + } + set a [open_site_prog [subst $cmds]] + + # Since we were already master, the gen number shouldn't change. + error_check_good same_gen \ + [stat_field $aenv rep_stat "Generation number"] $initial_gen + + puts $a "exit" + gets $a + error_check_good eof4 [eof $a] 1 + close $a + puts $b "exit" + gets $b + error_check_good eof5 [eof $b] 1 + close $b + + puts "\tRepmgr$tnum.g: Restart client as master." + # Note that site A is not running at this point. + set cmds { + "home $dirb" + "local $bport" + "output $testdir/boutput3" + "open_env" + "start master" + } + set b [open_site_prog [subst $cmds]] + set gen [stat_field $benv rep_stat "Generation number"] + error_check_good bumped_gen [expr $gen > $initial_gen] 1 + + + # + # 4. Specifying CLIENT when we were MASTER causes a change + # + puts $b "exit" + gets $b + error_check_good eof6 [eof $b] 1 + close $b + $benv close + exec $util_path/db_recover -h $dirb + + puts "\tRepmgr$tnum.h: Restart master as client" + set initial_value [stat_field $aenv rep_stat "Elections held"] + set cmds { + "home $dira" + "local $aport" + "output $testdir/aoutput4" + "open_env" + "start election" + } + set a [open_site_prog [subst $cmds]] + puts "\tRepmgr$tnum.i: Pause for 10 seconds to wait for elections." + tclsleep 10 + set elections [stat_field $aenv rep_stat "Elections held"] + error_check_good bumped_gen [expr $elections > $initial_value] 1 + + $aenv close + close $a +} diff --git a/test/tcl/repmgr111.tcl b/test/tcl/repmgr111.tcl new file mode 100644 index 00000000..d2a784f1 --- /dev/null +++ b/test/tcl/repmgr111.tcl @@ -0,0 +1,78 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr111 +# TEST Multi-process repmgr with env open before set local site. + +proc repmgr111 { } { + source ./include.tcl + + set tnum "111" + puts "Repmgr$tnum: set local site after env open." + set site_prog [setup_site_prog] + + env_cleanup $testdir + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + + file mkdir $masterdir + file mkdir $clientdir + + set ports [available_ports 2] + set master_port [lindex $ports 0] + set client_port [lindex $ports 1] + + puts "\tRepmgr$tnum.a: Set up the master (port $master_port)." + set master [open "| $site_prog" "r+"] + fconfigure $master -buffering line + puts $master "home $masterdir" + make_dbconfig $masterdir {{rep_set_config db_repmgr_conf_2site_strict off}} + puts $master "output $testdir/m1output" + puts $master "open_env" + puts $master "local $master_port" + puts $master "start master" + set ignored [gets $master] + + puts "\tRepmgr$tnum.b: Set up the client (on TCP port $client_port)." + set client [open "| $site_prog" "r+"] + fconfigure $client -buffering line + puts $client "home $clientdir" + puts $client "local $client_port" + make_dbconfig $clientdir {{rep_set_config db_repmgr_conf_2site_strict off}} + puts $client "output $testdir/coutput" + puts $client "open_env" + puts $client "remote localhost $master_port" + puts $client "start client" + error_check_match start_client [gets $client] "*Successful*" + + puts "\tRepmgr$tnum.c: Wait for STARTUPDONE." + set clientenv [berkdb_env -home $clientdir] + await_startup_done $clientenv + + puts "\tRepmgr$tnum.d: Start second master process, rep-unaware." + set m2 [open "| $site_prog" "r+"] + fconfigure $m2 -buffering line + puts $m2 "home $masterdir" + puts $m2 "output $testdir/m2output" + puts $m2 "open_env" + puts $m2 "open_db test.db" + puts $m2 "put sub1 abc" + tclsleep 1 + puts $m2 "put sub2 def" + puts $m2 "echo putted" + set sentinel [gets $m2] + error_check_good m2_firstputted $sentinel "putted" + + puts "\tRepmgr$tnum.e: Check that replicated data is visible at client." + puts $client "open_db test.db" + set expected {{sub1 abc}} + verify_client_data $clientenv test.db $expected + + puts "\tRepmgr$tnum.f: Clean up." + $clientenv close + close $client + close $master + close $m2 +} diff --git a/test/tcl/repmgr112.tcl b/test/tcl/repmgr112.tcl new file mode 100644 index 00000000..43b0ee60 --- /dev/null +++ b/test/tcl/repmgr112.tcl @@ -0,0 +1,156 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# TEST repmgr112 +# TEST Multi-process repmgr ack policies. +# TEST +# TEST Subordinate processes sending live log records must observe the +# TEST ack policy set by the main process. Also, a policy change made by a +# TEST subordinate process should be observed by all processes. + +proc repmgr112 { } { + source ./include.tcl + + set tnum "112" + puts "Repmgr$tnum: consistent ack policy among processes." + set site_prog [setup_site_prog] + + env_cleanup $testdir + + set masterdir $testdir/MASTERDIR + set clientdir $testdir/CLIENTDIR + set clientdir2 $testdir/CLIENTDIR2 + + file mkdir $masterdir + file mkdir $clientdir + file mkdir $clientdir2 + + set ports [available_ports 3] + set master_port [lindex $ports 0] + set client_port [lindex $ports 1] + set client2_port [lindex $ports 2] + + puts "\tRepmgr$tnum.b: Set up the master (port $master_port)." + set master [open "| $site_prog" "r+"] + fconfigure $master -buffering line + puts $master "home $masterdir" + make_dbconfig $masterdir {} + puts $master "output $testdir/m1output" + puts $master "open_env" + puts $master "local $master_port" + puts $master "start master" + set ignored [gets $master] + + # The client will have the default ack policy (QUORUM), so that it will + # always send acks. This isn't truly kosher in a real HA deployment, + # because generally the ack policy should be the same at all sites, + # but is useful for testing purposes here. + # + puts "\tRepmgr$tnum.c: Set up the client (on TCP port $client_port)." + set client [open "| $site_prog" "r+"] + fconfigure $client -buffering line + puts $client "home $clientdir" + puts $client "local $client_port" + make_dbconfig $clientdir {} + puts $client "output $testdir/coutput" + puts $client "open_env" + puts $client "remote localhost $master_port" + puts $client "start client" + error_check_match start_client [gets $client] "*Successful*" + + puts "\tRepmgr$tnum.d: Wait for STARTUPDONE." + set clientenv [berkdb_env -home $clientdir] + await_startup_done $clientenv + + # Create a third site by starting another client, but then + # shut it down, so that the test runs with an out-of-service + # client. + puts "\tRepmgr$tnum.c: Set up another client (on TCP port $client2_port)." + set client2 [open "| $site_prog" "r+"] + fconfigure $client2 -buffering line + puts $client2 "home $clientdir2" + puts $client2 "local $client2_port" + make_dbconfig $clientdir2 {} + puts $client2 "output $testdir/c2output" + puts $client2 "open_env" + puts $client2 "remote localhost $master_port" + puts $client2 "start client" + error_check_match start_client2 [gets $client2] "*Successful*" + + set clientenv2 [berkdb_env -home $clientdir2] + await_startup_done $clientenv2 + $clientenv2 close + close $client2 + + # Here the Tcl script itself acts as the subordinate process, sharing + # the environment with the db_repsite main process. Change the ack + # policy from here, and then check that the main process has observed + # it. With the ALL ack policy, since we don't have 3 sites running we + # should get a perm failure. (If the policy were the default QUORUM, + # one client would be enough.) + # + puts "\tRepmgr$tnum.e: Change ack policy from the Tcl process." + set masterenv [berkdb_env -home $masterdir -txn -rep -thread] + $masterenv repmgr -ack all + + puts $master "open_db test.db" + puts $master "echo opendone" + error_check_good opendone [gets $master] "opendone" + + set perm_failures "Acknowledgement failures" + set pfs0 [stat_field $masterenv repmgr_stat $perm_failures] + + puts $master "put mykey mydata" + puts $master "echo putdone" + error_check_good putdone [gets $master] "putdone" + + set pfs1 [stat_field $masterenv repmgr_stat $perm_failures] + error_check_good fail_count $pfs1 [expr $pfs0 + 1] + + puts "\tRepmgr$tnum.f: Change ack policy to 'none'." + $masterenv repmgr -ack none + + # Shut down client, so that from now on we will not be able to get any + # acks. + # + close $client + set count 0 + puts $master "is_connected 0" + while {[gets $master]} { + if {[incr count] > 30} { + error "FAIL: couldn't disconnect within 30 seconds" + } + tclsleep 1 + puts $master "is_connected 0" + } + + # Make sure that a subordinate process sending a live log record + # observes the ack policy of the environment. The default policy would + # be QUORUM, but the policy currently in effect is NONE. With no + # clients running, we won't get any acks. If the default policy were in + # effect, this would cause a perm failure. Thus, if we don't get a perm + # failure we can conclude that the NONE policy must have been used. + # + puts "\tRepmgr$tnum.g: Start second master process, rep-unaware." + set m2 [open "| $site_prog" "r+"] + fconfigure $m2 -buffering line + puts $m2 "home $masterdir" + puts $m2 "output $testdir/m2output" + puts $m2 "open_env" + puts $m2 "open_db test.db" + puts $m2 "put sub1 abc" + puts $m2 "echo putted" + set sentinel [gets $m2] + error_check_good m2_firstputted $sentinel "putted" + + puts "\tRepmgr$tnum.h: Make sure no more perm failures occurred." + set pfs2 [stat_field $masterenv repmgr_stat $perm_failures] + error_check_good fail_count $pfs2 $pfs1 + + puts "\tRepmgr$tnum.i: Clean up." + $clientenv close + $masterenv close + close $master + close $m2 +} diff --git a/test/tcl/reputils.tcl b/test/tcl/reputils.tcl new file mode 100644 index 00000000..02963c33 --- /dev/null +++ b/test/tcl/reputils.tcl @@ -0,0 +1,2924 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Replication testing utilities + +# Environment handle for the env containing the replication "communications +# structure" (really a CDB environment). + +# The test environment consists of a queue and a # directory (environment) +# per replication site. The queue is used to hold messages destined for a +# particular site and the directory will contain the environment for the +# site. So the environment looks like: +# $testdir +# ___________|______________________________ +# / | \ \ +# MSGQUEUEDIR MASTERDIR CLIENTDIR.0 ... CLIENTDIR.N-1 +# | | ... | +# 1 2 .. N+1 +# +# The master is site 1 in the MSGQUEUEDIR and clients 1-N map to message +# queues 2 - N+1. +# +# The globals repenv(1-N) contain the environment handles for the sites +# with a given id (i.e., repenv(1) is the master's environment. + + +# queuedbs is an array of DB handles, one per machine ID/machine ID pair, +# for the databases that contain messages from one machine to another. +# We omit the cases where the "from" and "to" machines are the same. +# +global queuedbs +global machids +global perm_response_list +set perm_response_list {} +global perm_sent_list +set perm_sent_list {} +global electable_pri +set electable_pri 5 +set drop 0 +global anywhere +set anywhere 0 + +global rep_verbose +set rep_verbose 0 +global verbose_type +set verbose_type "rep" + +# To run a replication test with verbose messages, type +# 'run_verbose' and then the usual test command string enclosed +# in double quotes or curly braces. For example: +# +# run_verbose "rep001 btree" +# +# run_verbose {run_repmethod btree test001} +# +# To run a replication test with one of the subsets of verbose +# messages, use the same syntax with 'run_verbose_elect', +# 'run_verbose_lease', etc. + +proc run_verbose { commandstring } { + global verbose_type + set verbose_type "rep" + run_verb $commandstring +} + +proc run_verbose_elect { commandstring } { + global verbose_type + set verbose_type "rep_elect" + run_verb $commandstring +} + +proc run_verbose_lease { commandstring } { + global verbose_type + set verbose_type "rep_lease" + run_verb $commandstring +} + +proc run_verbose_misc { commandstring } { + global verbose_type + set verbose_type "rep_misc" + run_verb $commandstring +} + +proc run_verbose_msgs { commandstring } { + global verbose_type + set verbose_type "rep_msgs" + run_verb $commandstring +} + +proc run_verbose_sync { commandstring } { + global verbose_type + set verbose_type "rep_sync" + run_verb $commandstring +} + +proc run_verbose_test { commandstring } { + global verbose_type + set verbose_type "rep_test" + run_verb $commandstring +} + +proc run_verbose_repmgr_misc { commandstring } { + global verbose_type + set verbose_type "repmgr_misc" + run_verb $commandstring +} + +proc run_verb { commandstring } { + global rep_verbose + global verbose_type + + set rep_verbose 1 + if { [catch { + eval $commandstring + flush stdout + flush stderr + } res] != 0 } { + global errorInfo + + set rep_verbose 0 + set fnl [string first "\n" $errorInfo] + set theError [string range $errorInfo 0 [expr $fnl - 1]] + if {[string first FAIL $errorInfo] == -1} { + error "FAIL:[timestamp]\ + run_verbose: $commandstring: $theError" + } else { + error $theError; + } + } + set rep_verbose 0 +} + +# Databases are on-disk by default for replication testing. +# Some replication tests have been converted to run with databases +# in memory instead. + +global databases_in_memory +set databases_in_memory 0 + +proc run_inmem_db { test method } { + run_inmem $test $method 1 0 0 0 +} + +# Replication files are on-disk by default for replication testing. +# Some replication tests have been converted to run with rep files +# in memory instead. + +global repfiles_in_memory +set repfiles_in_memory 0 + +proc run_inmem_rep { test method } { + run_inmem $test $method 0 0 1 0 +} + +# Region files are on-disk by default for replication testing. +# Replication tests can force the region files in-memory by setting +# the -private flag when opening an env. + +global env_private +set env_private 0 + +proc run_env_private { test method } { + global test_names + + if { [is_substr $test_names(skip_for_env_private) $test] == 1 } { + puts "Test $test is not set up to use private envs." + return + } else { + run_inmem $test $method 0 0 0 1 + } +} + +# Logs are on-disk by default for replication testing. +# Mixed-mode log testing provides a mixture of on-disk and +# in-memory logging, or even all in-memory. When testing on a +# 1-master/1-client test, we try all four options. On a test +# with more clients, we still try four options, randomly +# selecting whether the later clients are on-disk or in-memory. +# + +global mixed_mode_logging +set mixed_mode_logging 0 + +proc create_logsets { nsites } { + global mixed_mode_logging + global logsets + global rand_init + + error_check_good set_random_seed [berkdb srand $rand_init] 0 + if { $mixed_mode_logging == 0 || $mixed_mode_logging == 2 } { + if { $mixed_mode_logging == 0 } { + set logmode "on-disk" + } else { + set logmode "in-memory" + } + set loglist {} + for { set i 0 } { $i < $nsites } { incr i } { + lappend loglist $logmode + } + set logsets [list $loglist] + } + if { $mixed_mode_logging == 1 } { + set set1 {on-disk on-disk} + set set2 {on-disk in-memory} + set set3 {in-memory on-disk} + set set4 {in-memory in-memory} + + # Start with nsites at 2 since we already set up + # the master and first client. + for { set i 2 } { $i < $nsites } { incr i } { + foreach set { set1 set2 set3 set4 } { + if { [berkdb random_int 0 1] == 0 } { + lappend $set "on-disk" + } else { + lappend $set "in-memory" + } + } + } + set logsets [list $set1 $set2 $set3 $set4] + } + return $logsets +} + +proc run_inmem_log { test method } { + run_inmem $test $method 0 1 0 0 +} + +# Run_mixedmode_log is a little different from the other run_inmem procs: +# it provides a mixture of in-memory and on-disk logging on the different +# hosts in a replication group. +proc run_mixedmode_log { test method {display 0} {run 1} \ + {outfile stdout} {largs ""} } { + global mixed_mode_logging + set mixed_mode_logging 1 + + set prefix [string range $test 0 2] + if { $prefix != "rep" } { + puts "Skipping mixed-mode log testing for non-rep test." + set mixed_mode_logging 0 + return + } + + eval run_method $method $test $display $run $outfile $largs + + # Reset to default values after run. + set mixed_mode_logging 0 +} + +# The procs run_inmem_db, run_inmem_log, run_inmem_rep, and run_env_private +# put databases, logs, rep files, or region files in-memory. (Setting up +# an env with the -private flag puts region files in memory.) +# The proc run_inmem allows you to put any or all of these in-memory +# at the same time. + +proc run_inmem { test method\ + {dbinmem 1} {logsinmem 1} {repinmem 1} {envprivate 1} } { + + set prefix [string range $test 0 2] + if { $prefix != "rep" } { + puts "Skipping in-memory testing for non-rep test." + return + } + global databases_in_memory + global mixed_mode_logging + global repfiles_in_memory + global env_private + global test_names + + if { $dbinmem } { + if { [is_substr $test_names(skip_for_inmem_db) $test] == 1 } { + puts "Test $test does not support in-memory databases." + puts "Putting databases on-disk." + set databases_in_memory 0 + } else { + set databases_in_memory 1 + } + } + if { $logsinmem } { + set mixed_mode_logging 2 + } + if { $repinmem } { + set repfiles_in_memory 1 + } + if { $envprivate } { + set env_private 1 + } + + if { [catch {eval run_method $method $test} res] } { + set databases_in_memory 0 + set mixed_mode_logging 0 + set repfiles_in_memory 0 + set env_private 0 + puts "FAIL: $res" + } + + set databases_in_memory 0 + set mixed_mode_logging 0 + set repfiles_in_memory 0 + set env_private 0 +} + +# The proc run_diskless runs run_inmem with its default values. +# It's useful to have this name to remind us of its testing purpose, +# which is to mimic a diskless host. + +proc run_diskless { test method } { + run_inmem $test $method 1 1 1 1 +} + +# Open the master and client environments; store these in the global repenv +# Return the master's environment: "-env masterenv" +proc repl_envsetup { envargs largs test {nclients 1} {droppct 0} { oob 0 } } { + source ./include.tcl + global clientdir + global drop drop_msg + global masterdir + global repenv + global rep_verbose + global verbose_type + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on}" + } + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + if { $droppct != 0 } { + set drop 1 + set drop_msg [expr 100 / $droppct] + } else { + set drop 0 + } + + for { set i 0 } { $i < $nclients } { incr i } { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + } + + # Some tests that use a small db pagesize need a small + # mpool pagesize as well -- otherwise we'll run out of + # mutexes. First determine the natural pagesize, so + # that can be used in the normal case, then adjust where + # needed. + + set env [berkdb_env -create -home $testdir] + set pagesize [$env get_mp_pagesize] + error_check_good env_close [$env close] 0 + berkdb envremove -home $testdir + + set small_pagesize_tests [list test035 test096 test112 test113 test114] + if { [lsearch -exact $small_pagesize_tests $test] != -1 } { + set pagesize 512 + } + + # Open a master. + repladd 1 + # + # Set log smaller than default to force changing files, + # but big enough so that the tests that use binary files + # as keys/data can run. Increase the size of the log region -- + # sdb004 needs this, now that subdatabase names are stored + # in the env region. + # + set logmax [expr 3 * 1024 * 1024] + set lockmax 40000 + set logregion 2097152 + + set ma_cmd "berkdb_env_noerr -create -log_max $logmax $envargs \ + -cachesize { 0 4194304 1 } -log_regionmax $logregion \ + -lock_max_objects $lockmax -lock_max_locks $lockmax \ + -errpfx $masterdir $verbargs -pagesize $pagesize \ + -home $masterdir -txn nosync -rep_master -rep_transport \ + \[list 1 replsend\]" + set masterenv [eval $ma_cmd] + error_check_good master_env [is_valid_env $masterenv] TRUE + set repenv(master) $masterenv + + # Open clients + for { set i 0 } { $i < $nclients } { incr i } { + set envid [expr $i + 2] + repladd $envid + set cl_cmd "berkdb_env_noerr -create $envargs -txn nosync \ + -cachesize { 0 10000000 0 } -log_regionmax $logregion \ + -lock_max_objects $lockmax -lock_max_locks $lockmax \ + -errpfx $clientdir($i) $verbargs -pagesize $pagesize \ + -home $clientdir($i) -rep_client -rep_transport \ + \[list $envid replsend\]" + set clientenv [eval $cl_cmd] + error_check_good client_env [is_valid_env $clientenv] TRUE + set repenv($i) $clientenv + } + set repenv($i) NULL + append largs " -env $masterenv " + + # Process startup messages + repl_envprocq $test $nclients $oob + + # Clobber replication's 30-second anti-archive timer, which + # will have been started by client sync-up internal init, in + # case the test we're about to run wants to do any log + # archiving, or database renaming and/or removal. + $masterenv test force noarchive_timeout + + return $largs +} + +# Process all incoming messages. Iterate until there are no messages left +# in anyone's queue so that we capture all message exchanges. We verify that +# the requested number of clients matches the number of client environments +# we have. The oob parameter indicates if we should process the queue +# with out-of-order delivery. The replprocess procedure actually does +# the real work of processing the queue -- this routine simply iterates +# over the various queues and does the initial setup. +proc repl_envprocq { test { nclients 1 } { oob 0 }} { + global repenv + global drop + + set masterenv $repenv(master) + for { set i 0 } { 1 } { incr i } { + if { $repenv($i) == "NULL"} { + break + } + } + error_check_good i_nclients $nclients $i + + berkdb debug_check + puts -nonewline "\t$test: Processing master/$i client queues" + set rand_skip 0 + if { $oob } { + puts " out-of-order" + } else { + puts " in order" + } + set droprestore $drop + while { 1 } { + set nproced 0 + + if { $oob } { + set rand_skip [berkdb random_int 2 10] + } + incr nproced [replprocessqueue $masterenv 1 $rand_skip] + for { set i 0 } { $i < $nclients } { incr i } { + set envid [expr $i + 2] + if { $oob } { + set rand_skip [berkdb random_int 2 10] + } + set n [replprocessqueue $repenv($i) \ + $envid $rand_skip] + incr nproced $n + } + + if { $nproced == 0 } { + # Now that we delay requesting records until + # we've had a few records go by, we should always + # see that the number of requests is lower than the + # number of messages that were enqueued. + for { set i 0 } { $i < $nclients } { incr i } { + set clientenv $repenv($i) + set queued [stat_field $clientenv rep_stat \ + "Total log records queued"] + error_check_bad queued_stats \ + $queued -1 + set requested [stat_field $clientenv rep_stat \ + "Log records requested"] + error_check_bad requested_stats \ + $requested -1 + + # + # Set to 100 usecs. An average ping + # to localhost should be a few 10s usecs. + # + $clientenv rep_request 100 400 + } + + # If we were dropping messages, we might need + # to flush the log so that we get everything + # and end up in the right state. + if { $drop != 0 } { + set drop 0 + $masterenv rep_flush + berkdb debug_check + puts "\t$test: Flushing Master" + } else { + break + } + } + } + + # Reset the clients back to the default state in case we + # have more processing to do. + for { set i 0 } { $i < $nclients } { incr i } { + set clientenv $repenv($i) + $clientenv rep_request 40000 1280000 + } + set drop $droprestore +} + +# Verify that the directories in the master are exactly replicated in +# each of the client environments. +proc repl_envver0 { test method { nclients 1 } } { + global masterdir + global repenv + + # Verify the database in the client dir. + # First dump the master. + set t1 $masterdir/t1 + set t2 $masterdir/t2 + set t3 $masterdir/t3 + set omethod [convert_method $method] + + # + # We are interested in the keys of whatever databases are present + # in the master environment, so we just call a no-op check function + # since we have no idea what the contents of this database really is. + # We just need to walk the master and the clients and make sure they + # have the same contents. + # + set cwd [pwd] + cd $masterdir + set stat [catch {glob test*.db} dbs] + cd $cwd + if { $stat == 1 } { + return + } + foreach testfile $dbs { + open_and_dump_file $testfile $repenv(master) $masterdir/t2 \ + repl_noop dump_file_direction "-first" "-next" + + if { [string compare [convert_method $method] -recno] != 0 } { + filesort $t2 $t3 + file rename -force $t3 $t2 + } + for { set i 0 } { $i < $nclients } { incr i } { + puts "\t$test: Verifying client $i database $testfile contents." + open_and_dump_file $testfile $repenv($i) \ + $t1 repl_noop dump_file_direction "-first" "-next" + + if { [string compare $omethod "-recno"] != 0 } { + filesort $t1 $t3 + } else { + catch {file copy -force $t1 $t3} ret + } + error_check_good diff_files($t2,$t3) [filecmp $t2 $t3] 0 + } + } +} + +# Remove all the elements from the master and verify that these +# deletions properly propagated to the clients. +proc repl_verdel { test method { nclients 1 } } { + source ./include.tcl + + global clientdir + global masterdir + global repenv + global encrypt + global passwd + global util_path + + # Delete all items in the master. + set cwd [pwd] + cd $masterdir + set stat [catch {glob test*.db} dbs] + cd $cwd + if { $stat == 1 } { + return + } + set utilflag "" + if { $encrypt != 0 } { + set utilflag "-P $passwd" + } + foreach testfile $dbs { + + # Dump the database to determine whether there are subdbs. + # + set ret [catch {eval {exec $util_path/db_dump} $utilflag\ + -f $testdir/dumpfile $masterdir/$testfile} res] + error_check_good dump($testfile:$res) $ret 0 + + set subdbs "" + set fd [open $testdir/dumpfile r] + while { [gets $fd str] != -1 } { + if { [string match database=* $str] } { + set subdbname [string range $str 9 end] + lappend subdbs $subdbname + } + } + close $fd + + # Set up filenames depending on whether there are + # subdatabases or not. + set files "" + if { [llength $subdbs] > 0 } { + foreach sub $subdbs { + set filename "$testfile $sub" + lappend files $filename + } + } else { + set files $testfile + } + + foreach f $files { + puts "\t$test: Deleting all items from the master." + set txn [$repenv(master) txn] + error_check_good txn_begin [is_valid_txn $txn \ + $repenv(master)] TRUE + set db [eval {berkdb_open} -txn $txn -env $repenv(master) $f] + error_check_good reopen_master [is_valid_db $db] TRUE + set dbc [$db cursor -txn $txn] + error_check_good reopen_master_cursor \ + [is_valid_cursor $dbc $db] TRUE + for { set dbt [$dbc get -first] } { [llength $dbt] > 0 } \ + { set dbt [$dbc get -next] } { + error_check_good del_item [$dbc del] 0 + } + error_check_good dbc_close [$dbc close] 0 + error_check_good txn_commit [$txn commit] 0 + error_check_good db_close [$db close] 0 + } + + repl_envprocq $test $nclients + + # Check clients. + for { set i 0 } { $i < $nclients } { incr i } { + foreach f $files { + puts "\t$test: Verifying client database $i is empty." + + set db [eval berkdb_open -env $repenv($i) $f] + error_check_good reopen_client($i) \ + [is_valid_db $db] TRUE + set dbc [$db cursor] + error_check_good reopen_client_cursor($i) \ + [is_valid_cursor $dbc $db] TRUE + + error_check_good client($i)_empty \ + [llength [$dbc get -first]] 0 + + error_check_good dbc_close [$dbc close] 0 + error_check_good db_close [$db close] 0 + } + } + } +} + +# Replication "check" function for the dump procs that expect to +# be able to verify the keys and data. +proc repl_noop { k d } { + return +} + +# Close all the master and client environments in a replication test directory. +proc repl_envclose { test envargs } { + source ./include.tcl + global clientdir + global encrypt + global masterdir + global repenv + global drop + + if { [lsearch $envargs "-encrypta*"] !=-1 } { + set encrypt 1 + } + + # In order to make sure that we have fully-synced and ready-to-verify + # databases on all the clients, do a checkpoint on the master and + # process messages in order to flush all the clients. + set drop 0 + berkdb debug_check + puts "\t$test: Checkpointing master." + error_check_good masterenv_ckp [$repenv(master) txn_checkpoint] 0 + + # Count clients. + for { set ncli 0 } { 1 } { incr ncli } { + if { $repenv($ncli) == "NULL" } { + break + } + $repenv($ncli) rep_request 100 100 + } + repl_envprocq $test $ncli + + error_check_good masterenv_close [$repenv(master) close] 0 + verify_dir $masterdir "\t$test: " 0 0 1 + for { set i 0 } { $i < $ncli } { incr i } { + error_check_good client($i)_close [$repenv($i) close] 0 + verify_dir $clientdir($i) "\t$test: " 0 0 1 + } + replclose $testdir/MSGQUEUEDIR + +} + +# Replnoop is a dummy function to substitute for replsend +# when replication is off. +proc replnoop { control rec fromid toid flags lsn } { + return 0 +} + +proc replclose { queuedir } { + global queueenv queuedbs machids + + foreach m $machids { + set db $queuedbs($m) + error_check_good dbr_close [$db close] 0 + } + error_check_good qenv_close [$queueenv close] 0 + set machids {} +} + +# Create a replication group for testing. +proc replsetup { queuedir } { + global queueenv queuedbs machids + + file mkdir $queuedir + set max_locks 20000 + set queueenv [berkdb_env \ + -create -txn nosync -lock_max_locks $max_locks -home $queuedir] + error_check_good queueenv [is_valid_env $queueenv] TRUE + + if { [info exists queuedbs] } { + unset queuedbs + } + set machids {} + + return $queueenv +} + +# Send function for replication. +proc replsend { control rec fromid toid flags lsn } { + global queuedbs queueenv machids + global drop drop_msg + global perm_sent_list + global anywhere + + set permflags [lsearch $flags "perm"] + if { [llength $perm_sent_list] != 0 && $permflags != -1 } { +# puts "replsend sent perm message, LSN $lsn" + lappend perm_sent_list $lsn + } + + # + # If we are testing with dropped messages, then we drop every + # $drop_msg time. If we do that just return 0 and don't do + # anything. However, avoid dropping PAGE_REQ and LOG_REQ, because + # currently recovering from those cases can take a while, and some tests + # rely on the assumption that a single log_flush from the master clears + # up any missing messages. + # + if { $drop != 0 && + !([berkdb msgtype $control] eq "page_req" || + [berkdb msgtype $control] eq "log_req")} { + incr drop + if { $drop == $drop_msg } { + set drop 1 + return 0 + } + } + # XXX + # -1 is DB_BROADCAST_EID + if { $toid == -1 } { + set machlist $machids + } else { + if { [info exists queuedbs($toid)] != 1 } { + error "replsend: machid $toid not found" + } + set m NULL + if { $anywhere != 0 } { + # + # If we can send this anywhere, send it to the first + # id we find that is neither toid or fromid. + # + set anyflags [lsearch $flags "any"] + if { $anyflags != -1 } { + foreach m $machids { + if { $m == $fromid || $m == $toid } { + continue + } + set machlist [list $m] + break + } + } + } + # + # If we didn't find a different site, then we must + # fallback to the toid. + # + if { $m == "NULL" } { + set machlist [list $toid] + } + } + + foreach m $machlist { + # do not broadcast to self. + if { $m == $fromid } { + continue + } + + set db $queuedbs($m) + set txn [$queueenv txn] + $db put -txn $txn -append [list $control $rec $fromid] + error_check_good replsend_commit [$txn commit] 0 + } + + queue_logcheck + return 0 +} + +# +# If the message queue log files are getting too numerous, checkpoint +# and archive them. Some tests are so large (particularly from +# run_repmethod) that they can consume far too much disk space. +proc queue_logcheck { } { + global queueenv + + + set logs [$queueenv log_archive -arch_log] + set numlogs [llength $logs] + if { $numlogs > 10 } { + $queueenv txn_checkpoint + $queueenv log_archive -arch_remove + } +} + +# Discard all the pending messages for a particular site. +proc replclear { machid } { + global queuedbs queueenv + + if { [info exists queuedbs($machid)] != 1 } { + error "FAIL: replclear: machid $machid not found" + } + + set db $queuedbs($machid) + set txn [$queueenv txn] + set dbc [$db cursor -txn $txn] + for { set dbt [$dbc get -rmw -first] } { [llength $dbt] > 0 } \ + { set dbt [$dbc get -rmw -next] } { + error_check_good replclear($machid)_del [$dbc del] 0 + } + error_check_good replclear($machid)_dbc_close [$dbc close] 0 + error_check_good replclear($machid)_txn_commit [$txn commit] 0 +} + +# Add a machine to a replication environment. +proc repladd { machid } { + global queueenv queuedbs machids + + if { [info exists queuedbs($machid)] == 1 } { + error "FAIL: repladd: machid $machid already exists" + } + + set queuedbs($machid) [berkdb open -auto_commit \ + -env $queueenv -create -recno -renumber repqueue$machid.db] + error_check_good repqueue_create [is_valid_db $queuedbs($machid)] TRUE + + lappend machids $machid +} + +# Acquire a handle to work with an existing machine's replication +# queue. This is for situations where more than one process +# is working with a message queue. In general, having more than one +# process handle the queue is wrong. However, in order to test some +# things, we need two processes (since Tcl doesn't support threads). We +# go to great pain in the test harness to make sure this works, but we +# don't let customers do it. +proc repljoin { machid } { + global queueenv queuedbs machids + + set queuedbs($machid) [berkdb open -auto_commit \ + -env $queueenv repqueue$machid.db] + error_check_good repqueue_create [is_valid_db $queuedbs($machid)] TRUE + + lappend machids $machid +} + +# Process a queue of messages, skipping every "skip_interval" entry. +# We traverse the entire queue, but since we skip some messages, we +# may end up leaving things in the queue, which should get picked up +# on a later run. +proc replprocessqueue { dbenv machid { skip_interval 0 } { hold_electp NONE } \ + { dupmasterp NONE } { errp NONE } } { + global queuedbs queueenv errorCode + global perm_response_list + + # hold_electp is a call-by-reference variable which lets our caller + # know we need to hold an election. + if { [string compare $hold_electp NONE] != 0 } { + upvar $hold_electp hold_elect + } + set hold_elect 0 + + # dupmasterp is a call-by-reference variable which lets our caller + # know we have a duplicate master. + if { [string compare $dupmasterp NONE] != 0 } { + upvar $dupmasterp dupmaster + } + set dupmaster 0 + + # errp is a call-by-reference variable which lets our caller + # know we have gotten an error (that they expect). + if { [string compare $errp NONE] != 0 } { + upvar $errp errorp + } + set errorp 0 + + set nproced 0 + + set txn [$queueenv txn] + + # If we are running separate processes, the second process has + # to join an existing message queue. + if { [info exists queuedbs($machid)] == 0 } { + repljoin $machid + } + + set dbc [$queuedbs($machid) cursor -txn $txn] + + error_check_good process_dbc($machid) \ + [is_valid_cursor $dbc $queuedbs($machid)] TRUE + + for { set dbt [$dbc get -first] } \ + { [llength $dbt] != 0 } \ + { } { + set data [lindex [lindex $dbt 0] 1] + set recno [lindex [lindex $dbt 0] 0] + + # If skip_interval is nonzero, we want to process messages + # out of order. We do this in a simple but slimy way-- + # continue walking with the cursor without processing the + # message or deleting it from the queue, but do increment + # "nproced". The way this proc is normally used, the + # precise value of nproced doesn't matter--we just don't + # assume the queues are empty if it's nonzero. Thus, + # if we contrive to make sure it's nonzero, we'll always + # come back to records we've skipped on a later call + # to replprocessqueue. (If there really are no records, + # we'll never get here.) + # + # Skip every skip_interval'th record (and use a remainder other + # than zero so that we're guaranteed to really process at least + # one record on every call). + if { $skip_interval != 0 } { + if { $nproced % $skip_interval == 1 } { + incr nproced + set dbt [$dbc get -next] + continue + } + } + + # We need to remove the current message from the queue, + # because we're about to end the transaction and someone + # else processing messages might come in and reprocess this + # message which would be bad. + error_check_good queue_remove [$dbc del] 0 + + # We have to play an ugly cursor game here: we currently + # hold a lock on the page of messages, but rep_process_message + # might need to lock the page with a different cursor in + # order to send a response. So save the next recno, close + # the cursor, and then reopen and reset the cursor. + # If someone else is processing this queue, our entry might + # have gone away, and we need to be able to handle that. + + error_check_good dbc_process_close [$dbc close] 0 + error_check_good txn_commit [$txn commit] 0 + + set ret [catch {$dbenv rep_process_message \ + [lindex $data 2] [lindex $data 0] [lindex $data 1]} res] + + # Save all ISPERM and NOTPERM responses so we can compare their + # LSNs to the LSN in the log. The variable perm_response_list + # holds the entire response so we can extract responses and + # LSNs as needed. + # + if { [llength $perm_response_list] != 0 && \ + ([is_substr $res ISPERM] || [is_substr $res NOTPERM]) } { + lappend perm_response_list $res + } + + if { $ret != 0 } { + if { [string compare $errp NONE] != 0 } { + set errorp "$dbenv $machid $res" + } else { + error "FAIL:[timestamp]\ + rep_process_message returned $res" + } + } + + incr nproced + + # Now, re-establish the cursor position. We fetch the + # current record number. If there is something there, + # that is the record for the next iteration. If there + # is nothing there, then we've consumed the last item + # in the queue. + + set txn [$queueenv txn] + set dbc [$queuedbs($machid) cursor -txn $txn] + set dbt [$dbc get -set_range $recno] + + if { $ret == 0 } { + set rettype [lindex $res 0] + set retval [lindex $res 1] + # + # Do nothing for 0 and NEWSITE + # + if { [is_substr $rettype HOLDELECTION] } { + set hold_elect 1 + } + if { [is_substr $rettype DUPMASTER] } { + set dupmaster "1 $dbenv $machid" + } + if { [is_substr $rettype NOTPERM] || \ + [is_substr $rettype ISPERM] } { + set lsnfile [lindex $retval 0] + set lsnoff [lindex $retval 1] + } + } + + if { $errorp != 0 } { + # Break also on an error, caller wants to handle it. + break + } + if { $hold_elect == 1 } { + # Break also on a HOLDELECTION, for the same reason. + break + } + if { $dupmaster == 1 } { + # Break also on a DUPMASTER, for the same reason. + break + } + + } + + error_check_good dbc_close [$dbc close] 0 + error_check_good txn_commit [$txn commit] 0 + + # Return the number of messages processed. + return $nproced +} + + +set run_repl_flag "-run_repl" + +proc extract_repl_args { args } { + global run_repl_flag + + for { set arg [lindex $args [set i 0]] } \ + { [string length $arg] > 0 } \ + { set arg [lindex $args [incr i]] } { + if { [string compare $arg $run_repl_flag] == 0 } { + return [lindex $args [expr $i + 1]] + } + } + return "" +} + +proc delete_repl_args { args } { + global run_repl_flag + + set ret {} + + for { set arg [lindex $args [set i 0]] } \ + { [string length $arg] > 0 } \ + { set arg [lindex $args [incr i]] } { + if { [string compare $arg $run_repl_flag] != 0 } { + lappend ret $arg + } else { + incr i + } + } + return $ret +} + +global elect_serial +global elections_in_progress +set elect_serial 0 + +# Start an election in a sub-process. +proc start_election { \ + pfx qdir home envid nsites nvotes pri timeout {err "none"} {crash 0}} { + source ./include.tcl + global elect_serial elections_in_progress machids + global rep_verbose + global verbose_type + + set filelist {} + set ret [catch {glob $testdir/ELECTION*.$elect_serial} result] + if { $ret == 0 } { + set filelist [concat $filelist $result] + } + foreach f $filelist { + fileremove -f $f + } + + set oid [open $testdir/ELECTION_SOURCE.$elect_serial w] + + puts $oid "source $test_path/test.tcl" + puts $oid "set is_repchild 1" + puts $oid "replsetup $qdir" + foreach i $machids { puts $oid "repladd $i" } + set env_cmd "berkdb env -event -home $home -txn \ + -rep_transport {$envid replsend} -errpfx $pfx" + if { $rep_verbose == 1 } { + append env_cmd " -errfile /dev/stdout -verbose {$verbose_type on}" + } else { + append env_cmd " -errfile $testdir/ELECTION_ERRFILE.$elect_serial" + } + puts $oid "set dbenv \[ $env_cmd \]" + + puts $oid "\$dbenv test abort $err" + puts $oid "set res \[catch \{\$dbenv rep_elect $nsites \ + $nvotes $pri $timeout\} ret\]" + puts $oid "set r \[open \$testdir/ELECTION_RESULT.$elect_serial w\]" + puts $oid "if \{\$res == 0 \} \{" + puts $oid "puts \$r \"SUCCESS \$ret\"" + puts $oid "\} else \{" + puts $oid "puts \$r \"ERROR \$ret\"" + puts $oid "\}" + # + # This loop calls rep_elect a second time with the error cleared. + # We don't want to do that if we are simulating a crash. + if { $err != "none" && $crash != 1 } { + puts $oid "\$dbenv test abort none" + puts $oid "set res \[catch \{\$dbenv rep_elect $nsites \ + $nvotes $pri $timeout\} ret\]" + puts $oid "if \{\$res == 0 \} \{" + puts $oid "puts \$r \"SUCCESS \$ret\"" + puts $oid "\} else \{" + puts $oid "puts \$r \"ERROR \$ret\"" + puts $oid "\}" + } + + puts $oid "if \{ \[is_elected \$dbenv\] \} \{" + puts $oid "puts \$r \"ELECTED \$dbenv\"" + puts $oid "\}" + + puts $oid "close \$r" + close $oid + + set t [open "|$tclsh_path >& $testdir/ELECTION_OUTPUT.$elect_serial" w] + if { $rep_verbose } { + set t [open "|$tclsh_path" w] + } + puts $t "source ./include.tcl" + puts $t "source $testdir/ELECTION_SOURCE.$elect_serial" + flush $t + + set elections_in_progress($elect_serial) $t + return $elect_serial +} + +# +# If we are doing elections during upgrade testing, set +# upgrade to 1. Doing that sets the priority to the +# test priority in rep_elect, which will simulate a +# 0-priority but electable site. +# +proc setpriority { priority nclients winner {start 0} {upgrade 0} } { + global electable_pri + upvar $priority pri + + for { set i $start } { $i < [expr $nclients + $start] } { incr i } { + if { $i == $winner } { + set pri($i) 100 + } else { + if { $upgrade } { + set pri($i) $electable_pri + } else { + set pri($i) 10 + } + } + } +} + +# run_election has the following arguments: +# Arrays: +# celist List of env_handle, EID pairs. +# errcmd Array of where errors should be forced. +# priority Array of the priorities of each client env. +# crash If an error is forced, should we crash or recover? +# The upvar command takes care of making these arrays available to +# the procedure. +# +# Ordinary variables: +# qdir Directory where the message queue is located. +# msg Message prefixed to the output. +# elector This client calls the first election. +# nsites Number of sites in the replication group. +# nvotes Number of votes required to win the election. +# nclients Number of clients participating in the election. +# win The expected winner of the election. +# reset_role Should the new master (i.e. winner) be reset +# to client role after the election? +# dbname Name of the underlying database. The caller +# should send in "NULL" if the database has not +# yet been created. +# ignore Should the winner ignore its own election? +# If ignore is 1, the winner is not made master. +# timeout_ok We expect that this election will not succeed +# in electing a new master (perhaps because there +# already is a master). +# elect_timeout Timeout value to pass to rep_elect, which may be +# a 2-element list in case "full election timeouts" +# are in use. + +proc run_election { celist errcmd priority crsh\ + qdir msg elector nsites nvotes nclients win reset_role\ + dbname {ignore 0} {timeout_ok 0} {elect_timeout 15000000} } { + + global elect_serial + global is_hp_test + global is_windows_test + global rand_init + upvar $celist cenvlist + upvar $errcmd err_cmd + upvar $priority pri + upvar $crsh crash + + # Windows and HP-UX require a longer timeout. + if { [llength $elect_timeout] == 1 && + ($is_windows_test == 1 || $is_hp_test == 1) } { + set elect_timeout [expr $elect_timeout * 2] + } + + # Initialize tries based on timeout. We use tries to loop looking for + # messages because as sites are sleeping waiting for their timeout to + # expire we need to keep checking for messages. + # The $elect_timeout might be either a scalar number, or a + # two-element list in the case where we're interested in testing full + # election timeouts. Either is fine for passing to rep_elect (via + # start_election); but of course for computing "$tries" we need just a + # simple number. + # + if {[llength $elect_timeout] > 1} { + set t [lindex $elect_timeout 1] + } else { + set t $elect_timeout + } + set tries [expr ($t * 4) / 1000000] + + # Initialize each client participating in this election. While we're at + # it, save a copy of the envlist pair for the elector site, because + # we'll need its EID and env handle in a moment (for the initial call to + # start_election). Note that $elector couldn't simple be used to index + # into the list, because for example the envlist could look something + # like this: + # + # { { cenv4 4 } { cenv5 5 } { cenv6 6 } } + # + # and then "4" could be a valid $elector value (meaning EID 6). + # + set elector_pair NOTFOUND + set win_pair NOTFOUND + foreach pair $cenvlist { + set id [lindex $pair 1] + set i [expr $id - 2] + if { $i == $elector } { + set elector_pair $pair + } + set elect_pipe($i) INVALID + set env [lindex $pair 0] + $env event_info -clear + replclear $id + if { $i == $win } { + set win_pair $pair + set orig_pfx [$env get_errpfx] + } + } + error_check_bad unknown_elector $elector_pair NOTFOUND + error_check_good unknown_winner \ + [expr { $win_pair != "NOTFOUND" || ! $reset_role }] 1 + + # + # XXX + # We need to somehow check for the warning if nvotes is not + # a majority. Problem is that warning will go into the child + # process' output. Furthermore, we need a mechanism that can + # handle both sending the output to a file and sending it to + # /dev/stderr when debugging without failing the + # error_check_good check. + # + puts "\t\t$msg.1: Election with nsites=$nsites,\ + nvotes=$nvotes, nclients=$nclients" + puts "\t\t$msg.2: First elector is $elector,\ + expected winner is $win (eid [expr $win + 2])" + incr elect_serial + set pfx "CHILD$elector.$elect_serial" + set env [lindex $elector_pair 0] + set envid [lindex $elector_pair 1] + set home [$env get_home] + set elect_pipe($elector) [start_election \ + $pfx $qdir $home $envid $nsites $nvotes $pri($elector) \ + $elect_timeout $err_cmd($elector) $crash($elector)] + tclsleep 2 + + set got_newmaster 0 + + # If we're simulating a crash, skip the while loop and + # just give the initial election a chance to complete. + set crashing 0 + for { set i 0 } { $i < $nclients } { incr i } { + if { $crash($i) == 1 } { + set crashing 1 + } + } + + set child_elected 0 + + if { $crashing == 1 } { + tclsleep 10 + } else { + set abandoned "" + while { 1 } { + set nproced 0 + set he 0 + + foreach pair $cenvlist { + set he 0 + set unavail 0 + set envid [lindex $pair 1] + set i [expr $envid - 2] + set clientenv($i) [lindex $pair 0] + + # If the "elected" event is received by the + # child process, it writes to a file and we + # use check_election to get the message. In + # that case, the env set up in that child + # is the elected env. + set child_done [check_election $elect_pipe($i)\ + unavail child_elected] + + incr nproced [replprocessqueue \ + $clientenv($i) $envid 0 he] + + # We use normal event processing to detect + # an "elected" event received by the parent + # process. + set parent_elected [is_elected $clientenv($i)] + +# puts "Tries $tries:\ +# Processed queue for client $i, $nproced msgs he $he unavail $unavail" + + # Check for completed election. If it's the + # first time we've noticed it, deal with it. + if { ( $child_elected || $parent_elected ) && \ + $got_newmaster == 0 } { + set got_newmaster 1 + + # Make sure it's the expected winner. + error_check_good right_winner \ + $envid [expr $win + 2] + + # Reconfigure winning env as master. + if { $ignore == 0 } { + $clientenv($i) errpfx \ + NEWMASTER + error_check_good \ + make_master($i) \ + [$clientenv($i) \ + rep_start -master] 0 + + wait_all_startup $cenvlist $envid + + # Don't hold another election + # yet if we are setting up a + # new master. This could + # cause the new master to + # declare itself a client + # during internal init. + set he 0 + } + + # Occasionally force new log records + # to be written, unless the database + # has not yet been created. + set write [berkdb random_int 1 10] + if { $write == 1 && $dbname != "NULL" } { + set db [eval berkdb_open_noerr \ + -env $clientenv($i) \ + -auto_commit $dbname] + error_check_good dbopen \ + [is_valid_db $db] TRUE + error_check_good dbclose \ + [$db close] 0 + } + } + + if { $he == 1 && $got_newmaster == 0 } { + # + # Only close down the election pipe if the + # previously created one is done and + # waiting for new commands, otherwise + # if we try to close it while it's in + # progress we hang this main tclsh. If + # it's not done, hold onto it in an + # "abandoned" list, where we'll clean it + # up later. + # + if { $elect_pipe($i) != "INVALID" && \ + $child_done == 1 } { + close_election $elect_pipe($i) + set elect_pipe($i) "INVALID" + } elseif { $elect_pipe($i) != "INVALID" } { + lappend abandoned $elect_pipe($i) + set elect_pipe($i) "INVALID" + } +# puts "Starting election on client $i" + incr elect_serial + set pfx "CHILD$i.$elect_serial" + set home [$clientenv($i) get_home] + set elect_pipe($i) [start_election \ + $pfx $qdir \ + $home $envid $nsites \ + $nvotes $pri($i) $elect_timeout] + set got_hold_elect($i) 1 + } + } + + # We need to wait around to make doubly sure that the + # election has finished... + if { $nproced == 0 } { + incr tries -1 + # + # If we have a newmaster already, set tries + # down to just allow straggling messages to + # be processed. Tries could be a very large + # number if we have long timeouts. + # + if { $got_newmaster != 0 && $tries > 10 } { + set tries 10 + } + if { $tries == 0 } { + break + } else { + tclsleep 1 + } + } else { + set tries $tries + } + set abandoned [cleanup_abandoned $abandoned] + } + + # If we did get a new master, its identity was checked + # at that time. But we still have to make sure that we + # didn't just time out. + + if { $got_newmaster == 0 && $timeout_ok == 0 } { + error "FAIL: Did not elect new master." + } + } + cleanup_elections + + # + # Make sure we've really processed all the post-election + # sync-up messages. If we're simulating a crash, don't process + # any more messages. + # + if { $crashing == 0 } { + process_msgs $cenvlist + } + + if { $reset_role == 1 } { + puts "\t\t$msg.3: Changing new master to client role" + error_check_good log_flush [$clientenv($win) log_flush] 0 + error_check_good role_chg [$clientenv($win) rep_start -client] 0 + $clientenv($win) errpfx $orig_pfx + + if { $crashing == 0 } { + process_msgs $cenvlist + } + } +} + +proc wait_all_startup { envlist master } { + process_msgs $envlist + + for { set tries 0 } { $tries < 10 } { incr tries } { + # Find a client that has not yet reached startupdone. + # + set found 0 + foreach pair $envlist { + foreach {env eid} $pair {} + if { $eid == $master } { + continue + } + if {![stat_field $env rep_stat "Startup complete"]} { + set found 1 + break + } + } + + # We couldn't find a client who hadn't got startup done. That + # means we're all done and happy. + # + if {!$found} { + return + } + tclsleep 1 + process_msgs $envlist + } + error "FAIL: Clients could not get startupdone after master elected." +} + +proc cleanup_abandoned { es } { + set remaining "" + foreach e $es { + if { [check_election $e unused1 unused2] } { + close_election $e + } else { + lappend remaining $e + } + } + return $remaining +} + +# Retrieve election results that may have been reported by a child process. The +# child process communicates the results to us (the parent process) by writing +# them into a file. +# +proc check_election { id unavailp elected_flagp } { + source ./include.tcl + + if { $id == "INVALID" } { + return 0 + } + upvar $unavailp unavail + upvar $elected_flagp elected_flag + + set unavail 0 + set elected_flag 0 + + set res [catch {open $testdir/ELECTION_RESULT.$id} nmid] + if { $res != 0 } { + return 0 + } + while { [gets $nmid val] != -1 } { +# puts "result $id: $val" + set str [lindex $val 0] + if { [is_substr $val UNAVAIL] } { + set unavail 1 + } + if { [is_substr $val ELECTED] } { + set elected_flag 1 + } + } + close $nmid + return 1 +} + +proc is_elected { env } { + return [is_event_present $env "elected"] +} + +proc is_startup_done { env } { + return [is_event_present $env "startupdone"] +} + +proc is_event_present { env event_name } { + set event_info [find_event [$env event_info] $event_name] + return [expr [llength $event_info] > 0] +} + +# Extracts info about a given event type from a list of events that have +# occurred in an environment. The event_info might look something like this: +# +# {startupdone {}} {newmaster 2} +# +# A key would be something like "startupdone" or "newmaster". The return value +# might look like "newmaster 2". In other words, we return the complete +# information about a single event -- the event named by the key. If the event +# named by the key does not appear in the event_info, we return "". +# +proc find_event { event_info key } { + + # Search for a glob pattern: a string beginning with the key name, and + # containing anything after it. + # + return [lsearch -inline $event_info [append key *]] +} + +proc close_election { i } { + global elections_in_progress + global noenv_messaging + global qtestdir + + if { $noenv_messaging == 1 } { + set testdir $qtestdir + } + + set t $elections_in_progress($i) + puts $t "replclose \$testdir/MSGQUEUEDIR" + puts $t "\$dbenv close" + close $t + unset elections_in_progress($i) +} + +proc cleanup_elections { } { + global elect_serial elections_in_progress + + for { set i 0 } { $i <= $elect_serial } { incr i } { + if { [info exists elections_in_progress($i)] != 0 } { + close_election $i + } + } + + set elect_serial 0 +} + +# +# This is essentially a copy of test001, but it only does the put/get +# loop AND it takes an already-opened db handle. +# +proc rep_test { method env repdb {nentries 10000} \ + {start 0} {skip 0} {needpad 0} args } { + + source ./include.tcl + global databases_in_memory + + # + # Open the db if one isn't given. Close before exit. + # + if { $repdb == "NULL" } { + if { $databases_in_memory == 1 } { + set testfile { "" "test.db" } + } else { + set testfile "test.db" + } + set largs [convert_args $method $args] + set omethod [convert_method $method] + set db [eval {berkdb_open_noerr} -env $env -auto_commit\ + -create -mode 0644 $omethod $largs $testfile] + error_check_good reptest_db [is_valid_db $db] TRUE + } else { + set db $repdb + } + + puts "\t\tRep_test: $method $nentries key/data pairs starting at $start" + set did [open $dict] + + # The "start" variable determines the record number to start + # with, if we're using record numbers. The "skip" variable + # determines which dictionary entry to start with. In normal + # use, skip is equal to start. + + if { $skip != 0 } { + for { set count 0 } { $count < $skip } { incr count } { + gets $did str + } + } + set pflags "" + set gflags "" + set txn "" + + if { [is_record_based $method] == 1 } { + append gflags " -recno" + } + puts "\t\tRep_test.a: put/get loop" + # Here is the loop where we put and get each key/data pair + set count 0 + + # Checkpoint 10 times during the run, but not more + # frequently than every 5 entries. + set checkfreq [expr $nentries / 10] + + # Abort occasionally during the run. + set abortfreq [expr $nentries / 15] + + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1 + $start] + if { 0xffffffff > 0 && $key > 0xffffffff } { + set key [expr $key - 0x100000000] + } + if { $key == 0 || $key - 0xffffffff == 1 } { + incr key + incr count + } + set kvals($key) [pad_data $method $str] + } else { + set key $str + set str [reverse $str] + } + # + # We want to make sure we send in exactly the same + # length data so that LSNs match up for some tests + # in replication (rep021). + # + if { [is_fixed_length $method] == 1 && $needpad } { + # + # Make it something visible and obvious, 'A'. + # + set p 65 + set str [make_fixed_length $method $str $p] + set kvals($key) $str + } + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + set ret [eval \ + {$db put} $txn $pflags {$key [chop_data $method $str]}] + error_check_good put $ret 0 + error_check_good txn [$t commit] 0 + + if { $checkfreq < 5 } { + set checkfreq 5 + } + if { $abortfreq < 3 } { + set abortfreq 3 + } + # + # Do a few aborted transactions to test that + # aborts don't get processed on clients and the + # master handles them properly. Just abort + # trying to delete the key we just added. + # + if { $count % $abortfreq == 0 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set ret [$db del -txn $t $key] + error_check_good txn [$t abort] 0 + } + if { $count % $checkfreq == 0 } { + error_check_good txn_checkpoint($count) \ + [$env txn_checkpoint] 0 + } + incr count + } + close $did + if { $repdb == "NULL" } { + error_check_good rep_close [$db close] 0 + } +} + +# +# This is essentially a copy of rep_test, but it only does the put/get +# loop in a long running txn to an open db. We use it for bulk testing +# because we want to fill the bulk buffer some before sending it out. +# Bulk buffer gets transmitted on every commit. +# +proc rep_test_bulk { method env repdb {nentries 10000} \ + {start 0} {skip 0} {useoverflow 0} args } { + source ./include.tcl + + global overflowword1 + global overflowword2 + global databases_in_memory + + if { [is_fixed_length $method] && $useoverflow == 1 } { + puts "Skipping overflow for fixed length method $method" + return + } + # + # Open the db if one isn't given. Close before exit. + # + if { $repdb == "NULL" } { + if { $databases_in_memory == 1 } { + set testfile { "" "test.db" } + } else { + set testfile "test.db" + } + set largs [convert_args $method $args] + set omethod [convert_method $method] + set db [eval {berkdb_open_noerr -env $env -auto_commit -create \ + -mode 0644} $largs $omethod $testfile] + error_check_good reptest_db [is_valid_db $db] TRUE + } else { + set db $repdb + } + + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + # If we are not using an external env, then test setting + # the database cache size and using multiple caches. + puts \ +"\t\tRep_test_bulk: $method $nentries key/data pairs starting at $start" + set did [open $dict] + + # The "start" variable determines the record number to start + # with, if we're using record numbers. The "skip" variable + # determines which dictionary entry to start with. In normal + # use, skip is equal to start. + + if { $skip != 0 } { + for { set count 0 } { $count < $skip } { incr count } { + gets $did str + } + } + set pflags "" + set gflags "" + set txn "" + + if { [is_record_based $method] == 1 } { + append gflags " -recno" + } + puts "\t\tRep_test_bulk.a: put/get loop in 1 txn" + # Here is the loop where we put and get each key/data pair + set count 0 + + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + set pid [pid] + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1 + $start] + if { 0xffffffff > 0 && $key > 0xffffffff } { + set key [expr $key - 0x100000000] + } + if { $key == 0 || $key - 0xffffffff == 1 } { + incr key + incr count + } + set kvals($key) [pad_data $method $str] + if { [is_fixed_length $method] == 0 } { + set str [repeat $str 100] + } + } else { + set key $str.$pid + set str [repeat $str 100] + } + # + # For use for overflow test. + # + if { $useoverflow == 0 } { + if { [string length $overflowword1] < \ + [string length $str] } { + set overflowword2 $overflowword1 + set overflowword1 $str + } + } else { + if { $count == 0 } { + set len [string length $overflowword1] + set word $overflowword1 + } else { + set len [string length $overflowword2] + set word $overflowword1 + } + set rpt [expr 1024 * 1024 / $len] + incr rpt + set str [repeat $word $rpt] + } + set ret [eval \ + {$db put} $txn $pflags {$key [chop_data $method $str]}] + error_check_good put $ret 0 + incr count + } + error_check_good txn [$t commit] 0 + error_check_good txn_checkpoint [$env txn_checkpoint] 0 + close $did + if { $repdb == "NULL" } { + error_check_good rep_close [$db close] 0 + } +} + +proc rep_test_upg { method env repdb {nentries 10000} \ + {start 0} {skip 0} {needpad 0} {inmem 0} args } { + + source ./include.tcl + + # + # Open the db if one isn't given. Close before exit. + # + if { $repdb == "NULL" } { + if { $inmem == 1 } { + set testfile { "" "test.db" } + } else { + set testfile "test.db" + } + set largs [convert_args $method $args] + set omethod [convert_method $method] + set db [eval {berkdb_open_noerr} -env $env -auto_commit\ + -create -mode 0644 $omethod $largs $testfile] + error_check_good reptest_db [is_valid_db $db] TRUE + } else { + set db $repdb + } + + set pid [pid] + puts "\t\tRep_test_upg($pid): $method $nentries key/data pairs starting at $start" + set did [open $dict] + + # The "start" variable determines the record number to start + # with, if we're using record numbers. The "skip" variable + # determines which dictionary entry to start with. In normal + # use, skip is equal to start. + + if { $skip != 0 } { + for { set count 0 } { $count < $skip } { incr count } { + gets $did str + } + } + set pflags "" + set gflags "" + set txn "" + + if { [is_record_based $method] == 1 } { + append gflags " -recno" + } + puts "\t\tRep_test.a: put/get loop" + # Here is the loop where we put and get each key/data pair + set count 0 + + # Checkpoint 10 times during the run, but not more + # frequently than every 5 entries. + set checkfreq [expr $nentries / 10] + + # Abort occasionally during the run. + set abortfreq [expr $nentries / 15] + + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1 + $start] + if { 0xffffffff > 0 && $key > 0xffffffff } { + set key [expr $key - 0x100000000] + } + if { $key == 0 || $key - 0xffffffff == 1 } { + incr key + incr count + } + set kvals($key) [pad_data $method $str] + } else { + # + # With upgrade test, we run the same test several + # times with the same database. We want to have + # some overwritten records and some new records. + # Therefore append our pid to half the keys. + # + if { $count % 2 } { + set key $str.$pid + } else { + set key $str + } + set str [reverse $str] + } + # + # We want to make sure we send in exactly the same + # length data so that LSNs match up for some tests + # in replication (rep021). + # + if { [is_fixed_length $method] == 1 && $needpad } { + # + # Make it something visible and obvious, 'A'. + # + set p 65 + set str [make_fixed_length $method $str $p] + set kvals($key) $str + } + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" +# puts "rep_test_upg: put $count of $nentries: key $key, data $str" + set ret [eval \ + {$db put} $txn $pflags {$key [chop_data $method $str]}] + error_check_good put $ret 0 + error_check_good txn [$t commit] 0 + + if { $checkfreq < 5 } { + set checkfreq 5 + } + if { $abortfreq < 3 } { + set abortfreq 3 + } + # + # Do a few aborted transactions to test that + # aborts don't get processed on clients and the + # master handles them properly. Just abort + # trying to delete the key we just added. + # + if { $count % $abortfreq == 0 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set ret [$db del -txn $t $key] + error_check_good txn [$t abort] 0 + } + if { $count % $checkfreq == 0 } { + error_check_good txn_checkpoint($count) \ + [$env txn_checkpoint] 0 + } + incr count + } + close $did + if { $repdb == "NULL" } { + error_check_good rep_close [$db close] 0 + } +} + +proc rep_test_upg.check { key data } { + # + # If the key has the pid attached, strip it off before checking. + # If the key does not have the pid attached, then it is a recno + # and we're done. + # + set i [string first . $key] + if { $i != -1 } { + set key [string replace $key $i end] + } + error_check_good "key/data mismatch" $data [reverse $key] +} + +proc rep_test_upg.recno.check { key data } { + # + # If we're a recno database we better not have a pid in the key. + # Otherwise we're done. + # + set i [string first . $key] + error_check_good pid $i -1 +} + +# In a situation where logs are being archived off a master, it's +# possible for a client to get so far behind that there is a gap +# where the highest numbered client log file is lower than the +# lowest numbered master log file, creating the need for internal +# initialization of the client. +# +# This proc creates that situation for use in internal init tests. +# It closes the selected client and pushes the master forward +# while archiving the master's log files. + +proc push_master_ahead { method masterenv masterdir m_logtype \ + clientenv clientid db start niter flags largs } { + global util_path + + # Identify last client log file and then close the client. + puts "\t\tRep_push.a: Close client." + set last_client_log [get_logfile $clientenv last] + error_check_good client_close [$clientenv close] 0 + + set stop 0 + while { $stop == 0 } { + # Run rep_test in the master. Discard messages + # for the closed client. + puts "\t\tRep_push.b: Pushing master ahead." + eval rep_test \ + $method $masterenv $db $niter $start $start 0 $largs + incr start $niter + replclear $clientid + + puts "\t\tRep_push.c: Run db_archive on master." + if { $m_logtype == "on-disk"} { + $masterenv log_flush + eval exec $util_path/db_archive $flags -d -h $masterdir + } + + # Check to see whether the gap has appeared yet. + set first_master_log [get_logfile $masterenv first] + if { $first_master_log > $last_client_log } { + set stop 1 + } + } + return $start +} + +proc run_repmgr_tests { which {display 0} {run 1} } { + source ./include.tcl + if { !$display && $is_freebsd_test == 1 } { + puts "Skipping replication manager tests on FreeBSD platform." + return + } + + if { $which == "basic" } { + set testname basic_repmgr_test + } elseif { $which == "election" } { + set testname basic_repmgr_election_test + } elseif { $which == "init" } { + set testname basic_repmgr_init_test + } else { + puts "No repmgr test of that name" + return + } + + if { $run } { + puts "Running all cases of $testname." + } + + set niter 100 + foreach inmemdb { 0 1 } { + foreach inmemlog { 0 1 } { + foreach inmemrep { 0 1 } { + foreach envprivate { 0 1 } { + foreach bulk { 0 1 } { + if { $display } { + puts "$testname $niter $inmemdb $inmemlog \ + $inmemrep $envprivate $bulk" + } + + if { $run } { + if { [catch {$testname $niter $inmemdb $inmemlog \ + $inmemrep $envprivate $bulk} res ] } { + set databases_in_memory 0 + error "FAIL: $res" + } + } + } + } + } + } + } +} + +proc print_repmgr_headers { test niter inmemdb inmemlog inmemrep \ + envprivate bulk } { + + set dbmsg "on-disk databases" + if { $inmemdb } { + set dbmsg "in-memory databases" + } + + set logmsg "on-disk logs" + if { $inmemlog } { + set logmsg "in-memory logs" + } + + set repmsg "on-disk rep files" + if { $inmemrep } { + set repmsg "in-memory rep files" + } + + set regmsg "on-disk region files" + if { $envprivate } { + set regmsg "in-memory region files" + } + + set bulkmsg "regular processing" + if { $bulk } { + set bulkmsg "bulk processing" + } + + puts "\n$test with:" + puts "\t$dbmsg" + puts "\t$logmsg" + puts "\t$repmsg" + puts "\t$regmsg" + puts "\t$bulkmsg" + puts -nonewline "To reproduce this case: $test " + puts "$niter $inmemdb $inmemlog $inmemrep $envprivate $bulk" +} + +# Verify that no replication files are present in a given directory. +# This checks for the gen, egen, internal init, temp db and page db +# files. +# +proc no_rep_files_on_disk { dir } { + error_check_good nogen [file exists "$dir/__db.rep.gen"] 0 + error_check_good noegen [file exists "$dir/__db.rep.egen"] 0 + error_check_good noinit [file exists "$dir/__db.rep.init"] 0 + error_check_good notmpdb [file exists "$dir/__db.rep.db"] 0 + error_check_good nopgdb [file exists "$dir/__db.reppg.db"] 0 + error_check_good nosysdb [file exists "$dir/__db.rep.system"] 0 +} + +proc process_msgs { elist {perm_response 0} {dupp NONE} {errp NONE} \ + {upg 0} } { + if { $perm_response == 1 } { + global perm_response_list + set perm_response_list {{}} + } + + if { [string compare $dupp NONE] != 0 } { + upvar $dupp dupmaster + set dupmaster 0 + } else { + set dupmaster NONE + } + + if { [string compare $errp NONE] != 0 } { + upvar $errp errorp + set errorp 0 + set var_name errorp + } else { + set errorp NONE + set var_name NONE + } + + set upgcount 0 + while { 1 } { + set nproced 0 + incr nproced [proc_msgs_once $elist dupmaster $var_name] + # + # If we're running the upgrade test, we are running only + # our own env, we need to loop a bit to allow the other + # upgrade procs to run and reply to our messages. + # + if { $upg == 1 && $upgcount < 10 } { + tclsleep 2 + incr upgcount + continue + } + if { $nproced == 0 } { + break + } else { + set upgcount 0 + } + } +} + + +proc proc_msgs_once { elist {dupp NONE} {errp NONE} } { + global noenv_messaging + + if { [string compare $dupp NONE] != 0 } { + upvar $dupp dupmaster + set dupmaster 0 + } else { + set dupmaster NONE + } + + if { [string compare $errp NONE] != 0 } { + upvar $errp errorp + set errorp 0 + set var_name errorp + } else { + set errorp NONE + set var_name NONE + } + + set nproced 0 + foreach pair $elist { + set envname [lindex $pair 0] + set envid [lindex $pair 1] + # + # If we need to send in all the other args +# puts "Call replpq with on $envid" + if { $noenv_messaging } { + incr nproced [replprocessqueue_noenv $envname $envid \ + 0 NONE dupmaster $var_name] + } else { + incr nproced [replprocessqueue $envname $envid \ + 0 NONE dupmaster $var_name] + } + # + # If the user is expecting to handle an error and we get + # one, return the error immediately. + # + if { $dupmaster != 0 && $dupmaster != "NONE" } { + return 0 + } + if { $errorp != 0 && $errorp != "NONE" } { +# puts "Returning due to error $errorp" + return 0 + } + } + return $nproced +} + +proc rep_verify { masterdir masterenv clientdir clientenv \ + {compare_shared_portion 0} {match 1} {logcompare 1} \ + {dbname "test.db"} {datadir ""} } { + global util_path + global encrypt + global passwd + global databases_in_memory + global repfiles_in_memory + global env_private + + # Whether a named database is in-memory or on-disk, only the + # the name itself is passed in. Here we do the syntax adjustment + # from "test.db" to { "" "test.db" } for in-memory databases. + # + if { $databases_in_memory && $dbname != "NULL" } { + set dbname " {} $dbname " + } + + # Check locations of dbs, repfiles, region files. + if { $dbname != "NULL" } { + check_db_location $masterenv $dbname $datadir + check_db_location $clientenv $dbname $datadir + } + + if { $repfiles_in_memory } { + no_rep_files_on_disk $masterdir + no_rep_files_on_disk $clientdir + } + if { $env_private } { + no_region_files_on_disk $masterdir + no_region_files_on_disk $clientdir + } + + # The logcompare flag indicates whether to compare logs. + # Sometimes we run a test where rep_verify is run twice with + # no intervening processing of messages. If that test is + # on a build with debug_rop enabled, the master's log is + # altered by the first rep_verify, and the second rep_verify + # will fail. + # To avoid this, skip the log comparison on the second rep_verify + # by specifying logcompare == 0. + # + if { $logcompare } { + set msg "Logs and databases" + } else { + set msg "Databases ($dbname)" + } + + if { $match } { + puts "\t\tRep_verify: $clientdir: $msg should match" + } else { + puts "\t\tRep_verify: $clientdir: $msg should not match" + } + # Check that master and client logs and dbs are identical. + + # Logs first, if specified ... + # + # If compare_shared_portion is set, run db_printlog on the log + # subset that both client and master have. Either the client or + # the master may have more (earlier) log files, due to internal + # initialization, in-memory log wraparound, or other causes. + # + if { $logcompare } { + error_check_good logcmp \ + [logcmp $masterenv $clientenv $compare_shared_portion] 0 + + if { $dbname == "NULL" } { + return + } + } + + # ... now the databases. + # + # We're defensive here and throw an error if a database does + # not exist. If opening the first database succeeded but the + # second failed, we close the first before reporting the error. + # + if { [catch {eval {berkdb_open_noerr} -env $masterenv\ + -rdonly $dbname} db1] } { + error "FAIL:\ + Unable to open first db $dbname in rep_verify: $db1" + } + if { [catch {eval {berkdb_open_noerr} -env $clientenv\ + -rdonly $dbname} db2] } { + error_check_good close_db1 [$db1 close] 0 + error "FAIL:\ + Unable to open second db $dbname in rep_verify: $db2" + } + + # db_compare uses the database handles to do the comparison, and + # we pass in the $mumbledir/$dbname string as a label to make it + # easier to identify the offending database in case of failure. + # Therefore this will work for both in-memory and on-disk databases. + if { $match } { + error_check_good [concat comparedbs. $dbname] [db_compare \ + $db1 $db2 $masterdir/$dbname $clientdir/$dbname] 0 + } else { + error_check_bad comparedbs [db_compare \ + $db1 $db2 $masterdir/$dbname $clientdir/$dbname] 0 + } + error_check_good db1_close [$db1 close] 0 + error_check_good db2_close [$db2 close] 0 +} + +proc rep_verify_inmem { masterenv clientenv mdb cdb } { + # + # Can't use rep_verify to compare the logs because each + # commit record from db_printlog shows the database name + # as text on the master and as the file uid on the client + # because the client cannot find the "file". + # + # !!! Check the LSN first. Otherwise the DB->stat for the + # number of records will write a log record on the master if + # the build is configured for debug_rop. Work around that issue. + # + set mlsn [next_expected_lsn $masterenv] + set clsn [next_expected_lsn $clientenv] + error_check_good lsn $mlsn $clsn + + set mrecs [stat_field $mdb stat "Number of records"] + set crecs [stat_field $cdb stat "Number of records"] + error_check_good recs $mrecs $crecs +} + +# NOTE: This routine has been copied to ../test/sql/bdb_util.tcl +# and changes to it should be made in both places because the SQL +# tests are currently independent of the core tests. +# +# Return a list of TCP port numbers that are not currently in use on +# the local system. Note that this doesn't actually reserve the +# ports, so it's possible that by the time the caller tries to use +# them, another process could have taken one of them. But for our +# purposes that's unlikely enough that this is still useful: it's +# still better than trying to find hard-coded port numbers that will +# always be available. +# +# Using a starting baseport value that falls in the non-ephemeral port +# range on most platforms. Can override starting baseport by setting +# environment variable BDBBASEPORT. +# +proc available_ports { n { rangeincr 10 } } { + global env + + if { [info exists env(BDBBASEPORT)] } { + set baseport $env(BDBBASEPORT) + } else { + set baseport 30100 + } + + # Try sets of contiguous ports ascending from baseport. + for { set i $baseport } { $i < $baseport + $rangeincr * 100 } \ + { incr i $rangeincr } { + set ports {} + set socks {} + set numports $n + set curport $i + + # Try one set of contiguous ports. + while { [incr numports -1] >= 0 } { + incr curport + if [catch { socket -server Unused \ + -myaddr localhost $curport } sock] { + # A port is unavailable, try another set. + break + } + lappend socks $sock + lappend ports $curport + } + foreach sock $socks { + close $sock + } + if { $numports == -1 } { + # We have all the ports we need. + break + } + } + if { $numports == -1 } { + return $ports + } else { + error "available_ports: could not get ports for $baseport" + } +} + +# Return the corresponding site number for an individual port number +# previously returned by available_ports. This procedure assumes that +# the baseport number, n and rangeincr value are unchanged from the +# original call to available_ports. If a port value is supplied that +# is outside the expected baseport, n and rangeincr range, this procedure +# returns -1. +# +# As in available_ports, it uses a starting baseport number that falls +# in the non-ephemeral range on most platforms, which can be overridden +# by setting environment variable BDBBASEPORT. +# +proc site_from_port { port n { rangeincr 10 } } { + global env + + if { [info exists env(BDBBASEPORT)] } { + set baseport $env(BDBBASEPORT) + } else { + set baseport 30100 + } + + if { $port > $baseport && $port < $baseport + $rangeincr * 100 } { + set site [expr ($port - $baseport) % $rangeincr] + if { $site <= $n } { + return $site + } + } + return -1 +} + +# Wait (a limited amount of time) for an arbitrary condition to become true, +# polling once per second. If time runs out we throw an error: a successful +# return implies the condition is indeed true. +# +proc await_condition { cond { limit 20 } } { + for {set i 0} {$i < $limit} {incr i} { + if {[uplevel 1 [list expr $cond]]} { + return + } + tclsleep 1 + } + error "FAIL: condition \{$cond\} not achieved in $limit seconds." +} + +proc await_startup_done { env { limit 20 } } { + await_condition {[stat_field $env rep_stat "Startup complete"]} $limit +} + +proc await_event { env event_name { limit 20 } } { + await_condition {[is_event_present $env $event_name]} $limit + return [find_event [$env event_info] $event_name] +} + +# Wait (a limited amount of time) for an election to yield the expected +# environment as winner. +# +proc await_expected_master { env { limit 20 } } { + await_condition {[stat_field $env rep_stat "Role"] == "master"} $limit +} + +proc do_leaseop { env db method key envlist { domsgs 1 } } { + global alphabet + + # + # Put a txn to the database. Process messages to envlist + # if directed to do so. Read data on the master, ignoring + # leases (should always succeed). + # + set num [berkdb random_int 1 100] + set data $alphabet.$num + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + set ret [eval \ + {$db put} $txn {$key [chop_data $method $data]}] + error_check_good put $ret 0 + error_check_good txn [$t commit] 0 + + if { $domsgs } { + process_msgs $envlist + } + + # + # Now make sure we can successfully read on the master + # if we ignore leases. That should always work. The + # caller will do any lease related calls and checks + # that are specific to the test. + # + set kd [$db get -nolease $key] + set curs [$db cursor] + set ckd [$curs get -nolease -set $key] + $curs close + error_check_good kd [llength $kd] 1 + error_check_good ckd [llength $ckd] 1 +} + +# +# Get the given key, expecting status depending on whether leases +# are currently expected to be valid or not. +# +proc check_leaseget { db key getarg status } { + set stat [catch {eval {$db get} $getarg $key} kd] + if { $status != 0 } { + error_check_good get_result $stat 1 + error_check_good kd_check \ + [is_substr $kd $status] 1 + } else { + error_check_good get_result_good $stat $status + error_check_good dbkey [lindex [lindex $kd 0] 0] $key + } + set curs [$db cursor] + set stat [catch {eval {$curs get} $getarg -set $key} kd] + if { $status != 0 } { + error_check_good get_result2 $stat 1 + error_check_good kd_check \ + [is_substr $kd $status] 1 + } else { + error_check_good get_result2_good $stat $status + error_check_good dbckey [lindex [lindex $kd 0] 0] $key + } + $curs close +} + +# Simple utility to check a client database for expected values. It does not +# handle dup keys. +# +proc verify_client_data { env db items } { + set dbp [berkdb open -env $env $db] + foreach i $items { + foreach {key expected_value} $i { + set results [$dbp get $key] + error_check_good result_length [llength $results] 1 + set value [lindex $results 0 1] + error_check_good expected_value $value $expected_value + } + } + $dbp close +} + +proc make_dbconfig { dir cnfs } { + global rep_verbose + + set f [open "$dir/DB_CONFIG" "w"] + foreach line $cnfs { + puts $f $line + } + if {$rep_verbose} { + puts $f "set_verbose DB_VERB_REPLICATION" + } + close $f +} + +proc open_site_prog { cmds } { + + set site_prog [setup_site_prog] + + set s [open "| $site_prog" "r+"] + fconfigure $s -buffering line + set synced yes + foreach cmd $cmds { + puts $s $cmd + if {[lindex $cmd 0] == "start"} { + gets $s + set synced yes + } else { + set synced no + } + } + if {! $synced} { + puts $s "echo done" + gets $s + } + return $s +} + +proc setup_site_prog { } { + source ./include.tcl + + # Generate the proper executable name for the system. + if { $is_windows_test } { + set repsite_executable db_repsite.exe + } else { + set repsite_executable db_repsite + } + + # Check whether the executable exists. + if { [file exists $util_path/$repsite_executable] == 0 } { + error "Skipping: db_repsite executable\ + not found. Is it built?" + } else { + set site_prog $util_path/$repsite_executable + } + return $site_prog +} + +proc next_expected_lsn { env } { + return [stat_field $env rep_stat "Next LSN expected"] +} + +proc lsn_file { lsn } { + if { [llength $lsn] != 2 } { + error "not a valid LSN: $lsn" + } + + return [lindex $lsn 0] +} + +proc assert_rep_flag { dir flag value } { + global util_path + + set stat [exec $util_path/db_stat -N -RA -h $dir] + set present [is_substr $stat $flag] + error_check_good expected.flag.$flag $present $value +} + +# Kind of like an abbreviated lsearch(3tcl), except that the list must be a list +# of lists, and we search each list for a key in the "head" (0-th) position. +# +# lsearch_head ?option? list_of_lists key +# +# "option" can be -index or -inline (or may be omitted) +# +proc lsearch_head { args } { + if {[llength $args] > 2} { + foreach { how lists key } $args {} + } else { + set how -index + foreach { lists key } $args {} + } + + set i 0 + foreach list $lists { + if { $key eq [lindex $list 0] } { + if {$how eq "-inline"} { + return $list + } else { + return $i + } + } + incr i + } + if { $how eq "-inline" } { + return "" + } else { + return -1 + } +} + +# +# To build a profiled version of BDB and tclsh and run the rep +# tests individually with profiling you need the following steps: +# +# 0. NOTE: References to 'X' below for BDB versions obviously need +# the current release version number used. +# 1. Need to build a static, profiled version of DB and install it. +# ../dist/configure with --disable-shared and --enable-static. +# +# NOTE: Assumes you already have --enable-debug configured. +# +# (if you use the script 'dbconf' the 'args' line looks like:) +# args="--disable-shared --enable-static --with-tcl=/usr/local/lib --enable-test $args" +# +# Edit build_unix/Makefile and add '-pg' to CFLAGS and LDFLAGS. +# make +# sudo make install +# +# 2. Need to make sure LD_LIBRARY_PATH in your .cshrc is pointing to the +# right path for the profiled DB, such as +# ... :./.libs:/usr/local/BerkeleyDB.5.X/lib: ... +# +# source your new .cshrc if necessary. +# +# [NOTE: Your Tcl version may vary. Use the paths and versions as a +# guide. Mostly it should be the same. These steps work for Tcl 5.8.] +# 3. Build a new, profiling tclsh: +# Go to your Tcl source directory, e.g. <..srcpath>/tcl8.5.8/unix +# make clean +# ./configure --disable-shared +# +# Edit the generated Makefile: +# Add '-L /usr/local/BerkeleyDB.5.X/lib' to tclsh target +# after ${TCLSH_OBJS}. +# Add '-ldb_tcl-5.X' to tclsh target before -ltcl8.5.... +# Should look something like this: +# ${CC} ${LDFLAGS} ${TCLSH_OBJS} -L/usr/local/BerkeleyDB.5.0/lib -L/users/sue/src/tcl8.5.8/unix -ldb_tcl-5.0 -ltcl8.5 ${LIBS} \ +# ${CC_SEARCH_FLAGS} -o tclsh +# +# May want to switch CFLAGS to CFLAGS_DEBUG. +# Add -pg to CFLAGS. +# Add -pthread to CFLAGS if it isn't already there. +# Need to add '-static -pg' to LDFLAGS. +# Change LDFLAGS to use $(LDFLAGS_DEBUG) instead of OPTIMIZE if needed. +# Change TCL_LIB_FILE to '.a' from '.so' if needed +# +# 4. Add Db_tcl_Init call to tclAppInit.c and an extern: +#==================== +#*** tclAppInit.c.orig Mon Mar 17 12:15:42 2008 +#--- tclAppInit.c Mon Mar 17 12:15:23 2008 +#*************** +#*** 30,35 **** +#--- 30,37 ---- +# +# #endif /* TCL_TEST */ +# +#+ extern int Db_tcl_Init _ANSI_ARGS_((Tcl_Interp *interp)); +#+ +# #ifdef TCL_XT_TEST +# extern void XtToolkitInitialize _ANSI_ARGS_((void)); +# extern int Tclxttest_Init _ANSI_ARGS_((Tcl_Interp *interp)); +#*************** +#*** 145,150 **** +#--- 147,153 ---- +# Procbodytest_SafeInit); +# #endif /* TCL_TEST */ +# +#+ Db_tcl_Init(interp); +# /* +# * Call the init procedures for included packages. Each call should +# * look like this: +# +#==================== +# 5. Build tclsh with 'make' but I do NOT suggest 'make install'. +# +# Test it has BDB built-in properly: +# Run ./tclsh in Tcl src (unix) directory: +# % berkdb version +# [Should show current BDB version.] +# % ^D +# +# Current directory should now have a tclsh.gmon or gmon.out file. +# +# +# 6. Edit build_unix/include.tcl to point to profiled tclsh and +# the static DB library: +# +# set tclsh_path /tcl8.5.8/unix/tclsh +# set tcllib .libs/libdb_tcl-5.X.a +# +# 7. Comment out 'load $tcllib' in test/test.tcl +# +# 8. Run *your newly generated, profiled* tclsh as you normally would, +# including 'source ../test/test.tcl' +# build_unix% /unix/tclsh +# % source ../test/test.tcl +# +# 9. Each test will be run in a separate tclsh and profiled individually. +# In the 'build_unix' directory you'll then find a .OUT file +# that contains the profile output. Run: +# % run_rep_gprof [start reptest name] +# This form runs all rep tests, starting with the given +# reptest name, or rep001 if no name is given. +# % run_gprof +# This form runs only the (required) specified test. +# NOTE: This form can be used on any individual test, not +# just replication tests. However, it uses 'run_test' so it +# must be a test that can be run through all the methods. +# +proc run_rep_gprof { {starttest rep001} } { + global test_names + + set tindex [lsearch $test_names(rep) $starttest] + if { $tindex == -1 } { + set tindex 0 + } + set rlist [lrange $test_names(rep) $tindex end] + run_gprof_int $rlist +} + +proc run_gprof { testname } { + global test_names + + set rlist [list $testname] + run_gprof_int $rlist +} + +proc run_gprof_int { rlist } { + global one_test + source ./include.tcl + + foreach test $rlist { + puts "Test $test start: [timestamp]" + fileremove -f $test.OUT + if [catch {exec $tclsh_path << \ + "global one_test; set one_test $one_test; \ + source $test_path/test.tcl; run_test $test" \ + >>& ALL.OUT } res] { + set o [open ALL.OUT a] + puts $o "FAIL: run_gprof_int $test: $res" + close $o + } + puts "Test $test gprof: [timestamp]" + set gmonfile NULL + set known_gmons { tclsh.gmon gmon.out } + foreach gmon $known_gmons { + if { [file exists $gmon] } { + set gmonfile $gmon + break + } + } + if { $gmonfile != "NULL" } { + set stat [catch {exec gprof $tclsh_path $gmonfile \ + >>& $test.OUT} ret] + } else { + puts "FAIL: Could not find execution profile in \ + either tclsh.gmon or gmon.out." + } + puts "Test $test complete: [timestamp]" + } +} + +# +# Make a DB_CONFIG file for a site about to run a db_replicate test. +# Args are +# sitedir - the directory for this site +# i - my site index/number +# pri - my priority +# +proc replicate_make_config { sitedir i pri } { + # + # Generate global config values that should be the same + # across all sites, such as number of sites and log size, etc. + # + set default_cfglist { + { "set_flags" "DB_TXN_NOSYNC" } + { "rep_set_request" "150000 2400000" } + { "rep_set_timeout" "db_rep_checkpoint_delay 0" } + { "rep_set_timeout" "db_rep_connection_retry 2000000" } + { "rep_set_timeout" "db_rep_heartbeat_monitor 5000000" } + { "rep_set_timeout" "db_rep_heartbeat_send 1000000" } + { "set_cachesize" "0 4194304 1" } + { "set_lk_detect" "db_lock_default" } + { "rep_set_config" "db_repmgr_conf_2site_strict" } + } + + # + # Otherwise set up per-site config information + # + set cfglist $default_cfglist + + set litem [list rep_set_priority $pri] + lappend cfglist $litem + + # + # Now set up the local and remote ports. Use 49210 so that + # we don't collide with db_reptest which uses 49200. For + # now, we have site 0 know about no one, and all other sites + # know about site 0. Do not use peers for now. + # + set baseport 49210 + set rporttype NULL + set lport [expr $baseport + $i] + if { $i == 0 } { + set creator_flag "db_group_creator on" + } else { + set creator_flag "" + } + set litem [list repmgr_site \ + "localhost $lport $creator_flag db_local_site on"] + lappend cfglist $litem + set peers 0 + set p NULL + if { $i != 0 } { + set p $baseport + } + if { $peers } { + set remote_arg "db_repmgr_peer on" + } else { + set remote_arg "" + } + if { $p != "NULL" } { + set litem [list repmgr_site \ + "localhost $p $remote_arg db_bootstrap_helper on"] + lappend cfglist $litem + } + # + # Now write out the DB_CONFIG file. + # + set cid [open $sitedir/DB_CONFIG a] + foreach c $cfglist { + set carg [subst [lindex $c 0]] + set cval [subst [lindex $c 1]] + puts $cid "$carg $cval" + } + close $cid +} diff --git a/test/tcl/reputilsnoenv.tcl b/test/tcl/reputilsnoenv.tcl new file mode 100644 index 00000000..61409022 --- /dev/null +++ b/test/tcl/reputilsnoenv.tcl @@ -0,0 +1,514 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# The procs in this file are used for replication messaging +# ONLY when the default mechanism of setting up a queue of +# messages in a environment is not possible. This situation +# is fairly rare, but it is necessary when a replication +# test simultaneously runs different versions of Berkeley DB, +# because different versions cannot share an env. +# +# Note, all procs should be named with the suffix _noenv +# so it's explicit that we are using them. +# +# Close up a replication group - close all message dbs. +proc replclose_noenv { queuedir } { + global queuedbs machids + + set dbs [array names queuedbs] + foreach tofrom $dbs { + set handle $queuedbs($tofrom) + error_check_good db_close [$handle close] 0 + unset queuedbs($tofrom) + } + + set machids {} +} + +# Create a replication group for testing. +proc replsetup_noenv { queuedir } { + global queuedbs machids + + file mkdir $queuedir + + # If there are any leftover handles, get rid of them. + set dbs [array names queuedbs] + foreach tofrom $dbs { + unset queuedbs($tofrom) + } + set machids {} +} + +# Send function for replication. +proc replsend_noenv { control rec fromid toid flags lsn } { + global is_repchild + global queuedbs machids + global drop drop_msg + global perm_sent_list + global anywhere + global qtestdir testdir + + if { ![info exists qtestdir] } { + set qtestdir $testdir + } + set queuedir $qtestdir/MSGQUEUEDIR + set permflags [lsearch $flags "perm"] + if { [llength $perm_sent_list] != 0 && $permflags != -1 } { +# puts "replsend_noenv sent perm message, LSN $lsn" + lappend perm_sent_list $lsn + } + + # + # If we are testing with dropped messages, then we drop every + # $drop_msg time. If we do that just return 0 and don't do + # anything. However, avoid dropping PAGE_REQ and LOG_REQ, because + # currently recovering from those cases can take a while, and some tests + # rely on the assumption that a single log_flush from the master clears + # up any missing messages. + # + if { $drop != 0 && + !([berkdb msgtype $control] eq "page_req" || + [berkdb msgtype $control] eq "log_req")} { + incr drop + if { $drop == $drop_msg } { + set drop 1 + return 0 + } + } + # XXX + # -1 is DB_BROADCAST_EID + if { $toid == -1 } { + set machlist $machids + } else { + set m NULL + # If we can send this anywhere, send it to the first id + # we find that is neither toid or fromid. If we don't + # find any other candidates, this falls back to the + # original toid. + if { $anywhere != 0 } { + set anyflags [lsearch $flags "any"] + if { $anyflags != -1 } { + foreach m $machids { + if { $m == $fromid || $m == $toid } { + continue + } + set machlist [list $m] + break + } + } + } + # + # If we didn't find a different site, fall back + # to the toid. + # + if { $m == "NULL" } { + set machlist [list $toid] + } + } + foreach m $machlist { + # Do not broadcast to self. + if { $m == $fromid } { + continue + } + # Find the handle for the right message file. + set pid [pid] + set db $queuedbs($m.$fromid.$pid) + set stat [catch {$db put -append [list $control $rec $fromid]} ret] + } + if { $is_repchild } { + replready_noenv $fromid from + } + + return 0 +} + +# +proc replmsglen_noenv { machid {tf "to"}} { + global queuedbs qtestdir testdir + + if { ![info exists qtestdir] } { + set qtestdir $testdir + } + set queuedir $qtestdir/MSGQUEUEDIR + set orig [pwd] + + cd $queuedir + if { $tf == "to" } { + set msgdbs [glob -nocomplain ready.$machid.*] + } else { + set msgdbs [glob -nocomplain ready.*.$machid.*] + } + cd $orig + return [llength $msgdbs] +} + +# Discard all the pending messages for a particular site. +proc replclear_noenv { machid {tf "to"}} { + global queuedbs qtestdir testdir + + if { ![info exists qtestdir] } { + set qtestdir $testdir + } + set queuedir $qtestdir/MSGQUEUEDIR + set orig [pwd] + + cd $queuedir + if { $tf == "to" } { + set msgdbs [glob -nocomplain ready.$machid.*] + } else { + set msgdbs [glob -nocomplain ready.*.$machid.*] + } + foreach m $msgdbs { + file delete -force $m + } + cd $orig + set dbs [array names queuedbs] + foreach tofrom $dbs { + # Process only messages _to_ the specified machid. + if { [string match $machid.* $tofrom] == 1 } { + set db $queuedbs($tofrom) + set dbc [$db cursor] + for { set dbt [$dbc get -first] } \ + { [llength $dbt] > 0 } \ + { set dbt [$dbc get -next] } { + error_check_good \ + replclear($machid)_del [$dbc del] 0 + } + error_check_good replclear($db)_dbc_close [$dbc close] 0 + } + } + cd $queuedir + if { $tf == "to" } { + set msgdbs [glob -nocomplain temp.$machid.*] + } else { + set msgdbs [glob -nocomplain temp.*.$machid.*] + } + foreach m $msgdbs { +# file delete -force $m + } + cd $orig +} + +# Makes messages available to replprocessqueue by closing and +# renaming the message files. We ready the files for one machine +# ID at a time -- just those "to" or "from" the machine we want to +# process, depending on 'tf'. +proc replready_noenv { machid tf } { + global queuedbs machids + global counter + global qtestdir testdir + + if { ![info exists qtestdir] } { + set qtestdir $testdir + } + set queuedir $qtestdir/MSGQUEUEDIR + + set pid [pid] + # + # Close the temporary message files for the specified machine. + # Only close it if there are messages available. + # + set dbs [array names queuedbs] + set closed {} + foreach tofrom $dbs { + set toidx [string first . $tofrom] + set toid [string replace $tofrom $toidx end] + set fidx [expr $toidx + 1] + set fromidx [string first . $tofrom $fidx] + # + # First chop off the end, then chop off the toid + # in the beginning. + # + set fromid [string replace $tofrom $fromidx end] + set fromid [string replace $fromid 0 $toidx] + if { ($tf == "to" && $machid == $toid) || \ + ($tf == "from" && $machid == $fromid) } { + set nkeys [stat_field $queuedbs($tofrom) \ + stat "Number of keys"] + if { $nkeys != 0 } { + lappend closed \ + [list $toid $fromid temp.$tofrom] + error_check_good temp_close \ + [$queuedbs($tofrom) close] 0 + } + } + } + + # Rename the message files. + set cwd [pwd] + foreach filename $closed { + set toid [lindex $filename 0] + set fromid [lindex $filename 1] + set fname [lindex $filename 2] + set tofrom [string replace $fname 0 4] + incr counter($machid) + cd $queuedir +# puts "$queuedir: Msg ready $fname to ready.$tofrom.$counter($machid)" + file rename -force $fname ready.$tofrom.$counter($machid) + cd $cwd + replsetuptempfile_noenv $toid $fromid $queuedir + + } +} + +# Add a machine to a replication environment. This checks +# that we have not already established that machine id, and +# adds the machid to the list of ids. +proc repladd_noenv { machid } { + global queuedbs machids counter qtestdir testdir + + if { ![info exists qtestdir] } { + set qtestdir $testdir + } + set queuedir $qtestdir/MSGQUEUEDIR + if { [info exists machids] } { + if { [lsearch -exact $machids $machid] >= 0 } { + error "FAIL: repladd_noenv: machid $machid already exists." + } + } + + set counter($machid) 0 + lappend machids $machid + + # Create all the databases that receive messages sent _to_ + # the new machid. + replcreatetofiles_noenv $machid $queuedir + + # Create all the databases that receive messages sent _from_ + # the new machid. + replcreatefromfiles_noenv $machid $queuedir +} + +# Creates all the databases that a machid needs for receiving messages +# from other participants in a replication group. Used when first +# establishing the temp files, but also used whenever replready_noenv moves +# the temp files away, because we'll need new files for any future messages. +proc replcreatetofiles_noenv { toid queuedir } { + global machids + + foreach m $machids { + # We don't need a file for a machid to send itself messages. + if { $m == $toid } { + continue + } + replsetuptempfile_noenv $toid $m $queuedir + } +} + +# Creates all the databases that a machid needs for sending messages +# to other participants in a replication group. Used when first +# establishing the temp files only. Replready moves files based on +# recipient, so we recreate files based on the recipient, also. +proc replcreatefromfiles_noenv { fromid queuedir } { + global machids + + foreach m $machids { + # We don't need a file for a machid to send itself messages. + if { $m == $fromid } { + continue + } + replsetuptempfile_noenv $m $fromid $queuedir + } +} + +proc replsetuptempfile_noenv { to from queuedir } { + global queuedbs + + set pid [pid] +# puts "Open new temp.$to.$from.$pid" + set queuedbs($to.$from.$pid) [berkdb_open -create -excl -recno\ + -renumber $queuedir/temp.$to.$from.$pid] + error_check_good open_queuedbs [is_valid_db $queuedbs($to.$from.$pid)] TRUE +} + +# Process a queue of messages, skipping every "skip_interval" entry. +# We traverse the entire queue, but since we skip some messages, we +# may end up leaving things in the queue, which should get picked up +# on a later run. +proc replprocessqueue_noenv { dbenv machid { skip_interval 0 } { hold_electp NONE } \ + { dupmasterp NONE } { errp NONE } } { + global errorCode + global perm_response_list + global qtestdir testdir + + # hold_electp is a call-by-reference variable which lets our caller + # know we need to hold an election. + if { [string compare $hold_electp NONE] != 0 } { + upvar $hold_electp hold_elect + } + set hold_elect 0 + + # dupmasterp is a call-by-reference variable which lets our caller + # know we have a duplicate master. + if { [string compare $dupmasterp NONE] != 0 } { + upvar $dupmasterp dupmaster + } + set dupmaster 0 + + # errp is a call-by-reference variable which lets our caller + # know we have gotten an error (that they expect). + if { [string compare $errp NONE] != 0 } { + upvar $errp errorp + } + set errorp 0 + + set nproced 0 + + set queuedir $qtestdir/MSGQUEUEDIR +# puts "replprocessqueue_noenv: Make ready messages to eid $machid" + + # Change directories temporarily so we get just the msg file name. + set cwd [pwd] + cd $queuedir + set msgdbs [glob -nocomplain ready.$machid.*] +# puts "$queuedir.$machid: My messages: $msgdbs" + cd $cwd + + foreach msgdb $msgdbs { + set db [berkdb_open $queuedir/$msgdb] + set dbc [$db cursor] + + error_check_good process_dbc($machid) \ + [is_valid_cursor $dbc $db] TRUE + + for { set dbt [$dbc get -first] } \ + { [llength $dbt] != 0 } \ + { set dbt [$dbc get -next] } { + set data [lindex [lindex $dbt 0] 1] + set recno [lindex [lindex $dbt 0] 0] + + # If skip_interval is nonzero, we want to process + # messages out of order. We do this in a simple but + # slimy way -- continue walking with the cursor + # without processing the message or deleting it from + # the queue, but do increment "nproced". The way + # this proc is normally used, the precise value of + # nproced doesn't matter--we just don't assume the + # queues are empty if it's nonzero. Thus, if we + # contrive to make sure it's nonzero, we'll always + # come back to records we've skipped on a later call + # to replprocessqueue. (If there really are no records, + # we'll never get here.) + # + # Skip every skip_interval'th record (and use a + # remainder other than zero so that we're guaranteed + # to really process at least one record on every call). + if { $skip_interval != 0 } { + if { $nproced % $skip_interval == 1 } { + incr nproced + set dbt [$dbc get -next] + continue + } + } + + # We need to remove the current message from the + # queue, because we're about to end the transaction + # and someone else processing messages might come in + # and reprocess this message which would be bad. + # + error_check_good queue_remove [$dbc del] 0 + + # We have to play an ugly cursor game here: we + # currently hold a lock on the page of messages, but + # rep_process_message might need to lock the page with + # a different cursor in order to send a response. So + # save the next recno, close the cursor, and then + # reopen and reset the cursor. If someone else is + # processing this queue, our entry might have gone + # away, and we need to be able to handle that. + # +# error_check_good dbc_process_close [$dbc close] 0 + + set ret [catch {$dbenv rep_process_message \ + [lindex $data 2] [lindex $data 0] \ + [lindex $data 1]} res] + + # Save all ISPERM and NOTPERM responses so we can + # compare their LSNs to the LSN in the log. The + # variable perm_response_list holds the entire + # response so we can extract responses and LSNs as + # needed. + # + if { [llength $perm_response_list] != 0 && \ + ([is_substr $res ISPERM] || [is_substr $res NOTPERM]) } { + lappend perm_response_list $res + } + + if { $ret != 0 } { + if { [string compare $errp NONE] != 0 } { + set errorp "$dbenv $machid $res" + } else { + error "FAIL:[timestamp]\ + rep_process_message returned $res" + } + } + + incr nproced + if { $ret == 0 } { + set rettype [lindex $res 0] + set retval [lindex $res 1] + # + # Do nothing for 0 and NEWSITE + # + if { [is_substr $rettype HOLDELECTION] } { + set hold_elect 1 + } + if { [is_substr $rettype DUPMASTER] } { + set dupmaster "1 $dbenv $machid" + } + if { [is_substr $rettype NOTPERM] || \ + [is_substr $rettype ISPERM] } { + set lsnfile [lindex $retval 0] + set lsnoff [lindex $retval 1] + } + } + + if { $errorp != 0 } { + # Break on an error, caller wants to handle it. + break + } + if { $hold_elect == 1 } { + # Break on a HOLDELECTION, for the same reason. + break + } + if { $dupmaster == 1 } { + # Break on a DUPMASTER, for the same reason. + break + } + + } + error_check_good dbc_close [$dbc close] 0 + + # + # Check the number of keys remaining because we only + # want to rename to done, message file that are + # fully processed. Some message types might break + # out of the loop early and we want to process + # the remaining messages the next time through. + # + set nkeys [stat_field $db stat "Number of keys"] + error_check_good db_close [$db close] 0 + + if { $nkeys == 0 } { + set dbname [string replace $msgdb 0 5 done.] + # + # We have to do a special dance to get rid of the + # empty messaging files because of the way Windows + # handles open files marked for deletion. + # On Windows, a file is marked for deletion but + # does not actually get deleted until the last handle + # is closed. This causes a problem when a test tries + # to create a new file with a previously-used name, + # and Windows believes the old file still exists. + # Therefore, we rename the files before deleting them, + # to guarantee they are out of the way. + # + file rename -force $queuedir/$msgdb $queuedir/$dbname + file delete -force $queuedir/$dbname + } + } + # Return the number of messages processed. + return $nproced +} + diff --git a/test/tcl/rsrc001.tcl b/test/tcl/rsrc001.tcl new file mode 100644 index 00000000..7665b079 --- /dev/null +++ b/test/tcl/rsrc001.tcl @@ -0,0 +1,215 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rsrc001 +# TEST Recno backing file test. Try different patterns of adding +# TEST records and making sure that the corresponding file matches. +proc rsrc001 { } { + source ./include.tcl + + puts "Rsrc001: Basic recno backing file writeback tests" + + # We run this test essentially twice, once with a db file + # and once without (an in-memory database). + set rec1 "This is record 1" + set rec2 "This is record 2 This is record 2" + set rec3 "This is record 3 This is record 3 This is record 3" + set rec4 [replicate "This is record 4 " 512] + + foreach testfile { "$testdir/rsrc001.db" "" } { + + cleanup $testdir NULL + + if { $testfile == "" } { + puts "Rsrc001: Testing with in-memory database." + } else { + puts "Rsrc001: Testing with disk-backed database." + } + + # Create backing file for the empty-file test. + set oid1 [open $testdir/rsrc.txt w] + fconfigure $oid1 -translation binary + close $oid1 + + puts "\tRsrc001.a: Put to empty file." + set db [eval {berkdb_open -create -mode 0644\ + -recno -source $testdir/rsrc.txt} $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + set txn "" + + set ret [eval {$db put} $txn {1 $rec1}] + error_check_good put_to_empty $ret 0 + error_check_good db_close [$db close] 0 + + # Now fill out the backing file and create the check file. + set oid1 [open $testdir/rsrc.txt a] + set oid2 [open $testdir/check.txt w] + fconfigure $oid1 -translation binary + fconfigure $oid2 -translation binary + + # This one was already put into rsrc.txt. + puts $oid2 $rec1 + + # These weren't. + puts $oid1 $rec2 + puts $oid2 $rec2 + puts $oid1 $rec3 + puts $oid2 $rec3 + puts $oid1 $rec4 + puts $oid2 $rec4 + close $oid1 + close $oid2 + + puts -nonewline "\tRsrc001.b: Read file, rewrite last record;" + puts " write it out and diff" + set db [eval {berkdb_open -create -mode 0644\ + -recno -source $testdir/rsrc.txt} $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Read the last record; replace it (but we won't change it). + # Then close the file and diff the two files. + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + set rec [$dbc get -last] + error_check_good get_last [llength [lindex $rec 0]] 2 + set key [lindex [lindex $rec 0] 0] + set data [lindex [lindex $rec 0] 1] + + # Get the last record from the text file + set oid [open $testdir/rsrc.txt] + fconfigure $oid -translation binary + set laststr "" + while { [gets $oid str] != -1 } { + set laststr $str + } + close $oid + error_check_good getlast $data $laststr + + set ret [eval {$db put} $txn {$key $data}] + error_check_good replace_last $ret 0 + + error_check_good curs_close [$dbc close] 0 + error_check_good db_sync [$db sync] 0 + error_check_good db_sync [$db sync] 0 + error_check_good \ + Rsrc001:diff($testdir/rsrc.txt,$testdir/check.txt) \ + [filecmp $testdir/rsrc.txt $testdir/check.txt] 0 + + puts -nonewline "\tRsrc001.c: " + puts "Append some records in tree and verify in file." + set oid [open $testdir/check.txt a] + fconfigure $oid -translation binary + for {set i 1} {$i < 10} {incr i} { + set rec [replicate "New Record $i" $i] + puts $oid $rec + incr key + set ret [eval {$db put} $txn {-append $rec}] + error_check_good put_append $ret $key + } + error_check_good db_sync [$db sync] 0 + error_check_good db_sync [$db sync] 0 + close $oid + set ret [filecmp $testdir/rsrc.txt $testdir/check.txt] + error_check_good \ + Rsrc001:diff($testdir/{rsrc.txt,check.txt}) $ret 0 + + puts "\tRsrc001.d: Append by record number" + set oid [open $testdir/check.txt a] + fconfigure $oid -translation binary + for {set i 1} {$i < 10} {incr i} { + set rec [replicate "New Record (set 2) $i" $i] + puts $oid $rec + incr key + set ret [eval {$db put} $txn {$key $rec}] + error_check_good put_byno $ret 0 + } + + error_check_good db_sync [$db sync] 0 + error_check_good db_sync [$db sync] 0 + close $oid + set ret [filecmp $testdir/rsrc.txt $testdir/check.txt] + error_check_good \ + Rsrc001:diff($testdir/{rsrc.txt,check.txt}) $ret 0 + + puts "\tRsrc001.e: Put beyond end of file." + set oid [open $testdir/check.txt a] + fconfigure $oid -translation binary + for {set i 1} {$i < 10} {incr i} { + puts $oid "" + incr key + } + set rec "Last Record" + puts $oid $rec + incr key + + set ret [eval {$db put} $txn {$key $rec}] + error_check_good put_byno $ret 0 + + puts "\tRsrc001.f: Put beyond end of file, after reopen." + + error_check_good db_close [$db close] 0 + set db [eval {berkdb_open -create -mode 0644\ + -recno -source $testdir/rsrc.txt} $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set rec "Last record with reopen" + puts $oid $rec + + incr key + set ret [eval {$db put} $txn {$key $rec}] + error_check_good put_byno_with_reopen $ret 0 + + puts "\tRsrc001.g:\ + Put several beyond end of file, after reopen with snapshot." + error_check_good db_close [$db close] 0 + set db [eval {berkdb_open -create -mode 0644\ + -snapshot -recno -source $testdir/rsrc.txt} $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set rec "Really really last record with reopen" + puts $oid "" + puts $oid "" + puts $oid "" + puts $oid $rec + + incr key + incr key + incr key + incr key + + set ret [eval {$db put} $txn {$key $rec}] + error_check_good put_byno_with_reopen $ret 0 + + error_check_good db_sync [$db sync] 0 + error_check_good db_sync [$db sync] 0 + + close $oid + set ret [filecmp $testdir/rsrc.txt $testdir/check.txt] + error_check_good \ + Rsrc001:diff($testdir/{rsrc.txt,check.txt}) $ret 0 + + puts "\tRsrc001.h: Verify proper syncing of changes on close." + error_check_good Rsrc001:db_close [$db close] 0 + set db [eval {berkdb_open -create -mode 0644 -recno \ + -source $testdir/rsrc.txt} $testfile] + set oid [open $testdir/check.txt a] + fconfigure $oid -translation binary + for {set i 1} {$i < 10} {incr i} { + set rec [replicate "New Record $i" $i] + puts $oid $rec + set ret [eval {$db put} $txn {-append $rec}] + # Don't bother checking return; we don't know what + # the key number is, and we'll pick up a failure + # when we compare. + } + error_check_good Rsrc001:db_close [$db close] 0 + close $oid + set ret [filecmp $testdir/rsrc.txt $testdir/check.txt] + error_check_good Rsrc001:diff($testdir/{rsrc,check}.txt) $ret 0 + } +} + diff --git a/test/tcl/rsrc002.tcl b/test/tcl/rsrc002.tcl new file mode 100644 index 00000000..753c85c5 --- /dev/null +++ b/test/tcl/rsrc002.tcl @@ -0,0 +1,65 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rsrc002 +# TEST Recno backing file test #2: test of set_re_delim. Specify a backing +# TEST file with colon-delimited records, and make sure they are correctly +# TEST interpreted. +proc rsrc002 { } { + source ./include.tcl + + puts "Rsrc002: Alternate variable-length record delimiters." + + # We run this test essentially twice, once with a db file + # and once without (an in-memory database). + foreach testfile { "$testdir/rsrc002.db" "" } { + + cleanup $testdir NULL + + # Create the starting files + set oid1 [open $testdir/rsrc.txt w] + set oid2 [open $testdir/check.txt w] + puts -nonewline $oid1 "ostrich:emu:kiwi:moa:cassowary:rhea:" + puts -nonewline $oid2 "ostrich:emu:kiwi:penguin:cassowary:rhea:" + close $oid1 + close $oid2 + + if { $testfile == "" } { + puts "Rsrc002: Testing with in-memory database." + } else { + puts "Rsrc002: Testing with disk-backed database." + } + + puts "\tRsrc002.a: Read file, verify correctness." + set db [eval {berkdb_open -create -mode 0644 -delim 58 \ + -recno -source $testdir/rsrc.txt} $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Read the last record; replace it (but we won't change it). + # Then close the file and diff the two files. + set txn "" + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + set rec [$dbc get -first] + error_check_good get_first $rec [list [list 1 "ostrich"]] + set rec [$dbc get -next] + error_check_good get_next $rec [list [list 2 "emu"]] + + puts "\tRsrc002.b: Write record, verify correctness." + + eval {$dbc get -set 4} + set ret [$dbc put -current "penguin"] + error_check_good dbc_put $ret 0 + + error_check_good dbc_close [$dbc close] 0 + error_check_good db_close [$db close] 0 + + error_check_good \ + Rsrc002:diff($testdir/rsrc.txt,$testdir/check.txt) \ + [filecmp $testdir/rsrc.txt $testdir/check.txt] 0 + } +} diff --git a/test/tcl/rsrc003.tcl b/test/tcl/rsrc003.tcl new file mode 100644 index 00000000..fa414b54 --- /dev/null +++ b/test/tcl/rsrc003.tcl @@ -0,0 +1,178 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rsrc003 +# TEST Recno backing file test. Try different patterns of adding +# TEST records and making sure that the corresponding file matches. +proc rsrc003 { } { + source ./include.tcl + global fixed_len + + puts "Rsrc003: Basic recno backing file writeback tests fixed length" + + # We run this test essentially twice, once with a db file + # and once without (an in-memory database). + # + # Then run with big fixed-length records + set rec1 "This is record 1" + set rec2 "This is record 2" + set rec3 "This is record 3" + set bigrec1 [replicate "This is record 1 " 512] + set bigrec2 [replicate "This is record 2 " 512] + set bigrec3 [replicate "This is record 3 " 512] + + set orig_fixed_len $fixed_len + set rlist { + {{$rec1 $rec2 $rec3} "small records" } + {{$bigrec1 $bigrec2 $bigrec3} "large records" }} + + foreach testfile { "$testdir/rsrc003.db" "" } { + + foreach rec $rlist { + cleanup $testdir NULL + + set recs [lindex $rec 0] + set msg [lindex $rec 1] + + # Create the starting files + # Note that for the rest of the test, we are going + # to append a LF when we 'put' via DB to maintain + # file structure and allow us to use 'gets'. + set oid1 [open $testdir/rsrc.txt w] + set oid2 [open $testdir/check.txt w] + fconfigure $oid1 -translation binary + fconfigure $oid2 -translation binary + foreach record $recs { + set r [subst $record] + set fixed_len [string length $r] + puts $oid1 $r + puts $oid2 $r + } + close $oid1 + close $oid2 + + set reclen [expr $fixed_len + 1] + if { $reclen > [string length $rec1] } { + set repl 512 + } else { + set repl 2 + } + if { $testfile == "" } { + puts \ +"Rsrc003: Testing with in-memory database with $msg." + } else { + puts \ +"Rsrc003: Testing with disk-backed database with $msg." + } + + puts -nonewline \ + "\tRsrc003.a: Read file, rewrite last record;" + puts " write it out and diff" + set db [eval {berkdb_open -create -mode 0644 -recno \ + -len $reclen -source $testdir/rsrc.txt} $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Read the last record; replace it (don't change it). + # Then close the file and diff the two files. + set txn "" + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor \ + [is_valid_cursor $dbc $db] TRUE + + set rec [$dbc get -last] + error_check_good get_last [llength [lindex $rec 0]] 2 + set key [lindex [lindex $rec 0] 0] + set data [lindex [lindex $rec 0] 1] + + # Get the last record from the text file + set oid [open $testdir/rsrc.txt] + fconfigure $oid -translation binary + set laststr "" + while { [gets $oid str] != -1 } { + append str \12 + set laststr $str + } + close $oid + error_check_good getlast $data $laststr + + set ret [eval {$db put} $txn {$key $data}] + error_check_good replace_last $ret 0 + + error_check_good curs_close [$dbc close] 0 + error_check_good db_sync [$db sync] 0 + error_check_good db_sync [$db sync] 0 + error_check_good \ + diff1($testdir/rsrc.txt,$testdir/check.txt) \ + [filecmp $testdir/rsrc.txt $testdir/check.txt] 0 + + puts -nonewline "\tRsrc003.b: " + puts "Append some records in tree and verify in file." + set oid [open $testdir/check.txt a] + fconfigure $oid -translation binary + for {set i 1} {$i < 10} {incr i} { + set rec [chop_data -frecno [replicate \ + "This is New Record $i" $repl]] + puts $oid $rec + append rec \12 + incr key + set ret [eval {$db put} $txn {-append $rec}] + error_check_good put_append $ret $key + } + error_check_good db_sync [$db sync] 0 + error_check_good db_sync [$db sync] 0 + close $oid + set ret [filecmp $testdir/rsrc.txt $testdir/check.txt] + error_check_good \ + diff2($testdir/{rsrc.txt,check.txt}) $ret 0 + + puts "\tRsrc003.c: Append by record number" + set oid [open $testdir/check.txt a] + fconfigure $oid -translation binary + for {set i 1} {$i < 10} {incr i} { + set rec [chop_data -frecno [replicate \ + "New Record (set 2) $i" $repl]] + puts $oid $rec + append rec \12 + incr key + set ret [eval {$db put} $txn {$key $rec}] + error_check_good put_byno $ret 0 + } + + error_check_good db_sync [$db sync] 0 + error_check_good db_sync [$db sync] 0 + close $oid + set ret [filecmp $testdir/rsrc.txt $testdir/check.txt] + error_check_good \ + diff3($testdir/{rsrc.txt,check.txt}) $ret 0 + + puts \ +"\tRsrc003.d: Verify proper syncing of changes on close." + error_check_good Rsrc003:db_close [$db close] 0 + set db [eval {berkdb_open -create -mode 0644 -recno \ + -len $reclen -source $testdir/rsrc.txt} $testfile] + set oid [open $testdir/check.txt a] + fconfigure $oid -translation binary + for {set i 1} {$i < 10} {incr i} { + set rec [chop_data -frecno [replicate \ + "New Record (set 3) $i" $repl]] + puts $oid $rec + append rec \12 + set ret [eval {$db put} $txn {-append $rec}] + # Don't bother checking return; + # we don't know what + # the key number is, and we'll pick up a failure + # when we compare. + } + error_check_good Rsrc003:db_close [$db close] 0 + close $oid + set ret [filecmp $testdir/rsrc.txt $testdir/check.txt] + error_check_good \ + diff5($testdir/{rsrc,check}.txt) $ret 0 + } + } + set fixed_len $orig_fixed_len + return +} diff --git a/test/tcl/rsrc004.tcl b/test/tcl/rsrc004.tcl new file mode 100644 index 00000000..3f5cb5f1 --- /dev/null +++ b/test/tcl/rsrc004.tcl @@ -0,0 +1,51 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST rsrc004 +# TEST Recno backing file test for EOF-terminated records. +proc rsrc004 { } { + source ./include.tcl + + foreach isfixed { 0 1 } { + cleanup $testdir NULL + + # Create the backing text file. + set oid1 [open $testdir/rsrc.txt w] + if { $isfixed == 1 } { + puts -nonewline $oid1 "record 1xxx" + puts -nonewline $oid1 "record 2xxx" + } else { + puts $oid1 "record 1xxx" + puts $oid1 "record 2xxx" + } + puts -nonewline $oid1 "record 3" + close $oid1 + + set args "-create -mode 0644 -recno -source $testdir/rsrc.txt" + if { $isfixed == 1 } { + append args " -len [string length "record 1xxx"]" + set match "record 3 " + puts "Rsrc004: EOF-terminated recs: fixed length" + } else { + puts "Rsrc004: EOF-terminated recs: variable length" + set match "record 3" + } + + puts "\tRsrc004.a: Read file, verify correctness." + set db [eval berkdb_open $args "$testdir/rsrc004.db"] + error_check_good dbopen [is_valid_db $db] TRUE + + # Read the last record + set dbc [eval {$db cursor} ""] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + set rec [$dbc get -last] + error_check_good get_last $rec [list [list 3 $match]] + + error_check_good dbc_close [$dbc close] 0 + error_check_good db_close [$db close] 0 + } +} diff --git a/test/tcl/sdb001.tcl b/test/tcl/sdb001.tcl new file mode 100644 index 00000000..74ddb462 --- /dev/null +++ b/test/tcl/sdb001.tcl @@ -0,0 +1,146 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb001 Tests mixing db and subdb operations +# TEST Tests mixing db and subdb operations +# TEST Create a db, add data, try to create a subdb. +# TEST Test naming db and subdb with a leading - for correct parsing +# TEST Existence check -- test use of -excl with subdbs +# TEST +# TEST Test non-subdb and subdb operations +# TEST Test naming (filenames begin with -) +# TEST Test existence (cannot create subdb of same name with -excl) +proc sdb001 { method args } { + source ./include.tcl + global errorInfo + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_queue $method] == 1 || [is_heap $method] == 1 } { + puts "Subdb001: skipping for method $method" + return + } + puts "Subdb001: $method ($args) subdb and non-subdb tests" + + set testfile $testdir/subdb001.db + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + set env NULL + incr eindex + set env [lindex $args $eindex] + puts "Subdb001 skipping for env $env" + return + } + # Create the database and open the dictionary + set subdb subdb0 + cleanup $testdir NULL + puts "\tSubdb001.a: Non-subdb database and subdb operations" + # + # Create a db with no subdbs. Add some data. Close. Try to + # open/add with a subdb. Should fail. + # + puts "\tSubdb001.a.0: Create db, add data, close, try subdb" + set db [eval {berkdb_open -create -mode 0644} \ + $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set did [open $dict] + + set pflags "" + set gflags "" + set count 0 + + if { [is_record_based $method] == 1 } { + append gflags " -recno" + } + while { [gets $did str] != -1 && $count < 5 } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1] + set kvals($key) $str + } else { + set key $str + } + set ret [eval \ + {$db put} $pflags {$key [chop_data $method $str]}] + error_check_good put $ret 0 + + set ret [eval {$db get} $gflags {$key}] + error_check_good \ + get $ret [list [list $key [pad_data $method $str]]] + incr count + } + close $did + error_check_good db_close [$db close] 0 + set ret [catch {eval {berkdb_open_noerr -create -mode 0644} $args \ + {$omethod $testfile $subdb}} db] + error_check_bad dbopen $ret 0 + # + # Create a db with no subdbs. Add no data. Close. Try to + # open/add with a subdb. Should fail. + # + set testfile $testdir/subdb001a.db + puts "\tSubdb001.a.1: Create db, close, try subdb" + # + # !!! + # Using -truncate is illegal when opening for subdbs, but we + # can use it here because we are not using subdbs for this + # create. + # + set db [eval {berkdb_open -create -truncate -mode 0644} $args \ + {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + + set ret [catch {eval {berkdb_open_noerr -create -mode 0644} $args \ + {$omethod $testfile $subdb}} db] + error_check_bad dbopen $ret 0 + + if { [is_queue $method] == 1 || [is_partitioned $args]} { + puts "Subdb001: skipping remainder of test for method $method $args" + return + } + + # + # Test naming, db and subdb names beginning with -. + # + puts "\tSubdb001.b: Naming" + set cwd [pwd] + cd $testdir + set testfile1 -subdb001.db + set subdb -subdb + puts "\tSubdb001.b.0: Create db and subdb with -name, no --" + set ret [catch {eval {berkdb_open -create -mode 0644} $args \ + {$omethod $testfile1 $subdb}} db] + error_check_bad dbopen $ret 0 + puts "\tSubdb001.b.1: Create db and subdb with -name, with --" + set db [eval {berkdb_open -create -mode 0644} $args \ + {$omethod -- $testfile1 $subdb}] + error_check_good dbopen [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + + cd $cwd + + # + # Create 1 db with 1 subdb. Try to create another subdb of + # the same name. Should fail. + # + + puts "\tSubdb001.c: Existence check" + set testfile $testdir/subdb001d.db + set subdb subdb + set ret [catch {eval {berkdb_open -create -excl -mode 0644} $args \ + {$omethod $testfile $subdb}} db] + error_check_good dbopen [is_valid_db $db] TRUE + set ret [catch {eval {berkdb_open_noerr -create -excl -mode 0644} \ + $args {$omethod $testfile $subdb}} db1] + error_check_bad dbopen $ret 0 + error_check_good db_close [$db close] 0 + + return +} diff --git a/test/tcl/sdb002.tcl b/test/tcl/sdb002.tcl new file mode 100644 index 00000000..b2b0b65c --- /dev/null +++ b/test/tcl/sdb002.tcl @@ -0,0 +1,227 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb002 +# TEST Tests basic subdb functionality +# TEST Small keys, small data +# TEST Put/get per key +# TEST Dump file +# TEST Close, reopen +# TEST Dump file +# TEST +# TEST Use the first 10,000 entries from the dictionary. +# TEST Insert each with self as key and data; retrieve each. +# TEST After all are entered, retrieve all; compare output to original. +# TEST Close file, reopen, do retrieve and re-verify. +# TEST Then repeat using an environment. +proc sdb002 { method {nentries 10000} args } { + global passwd + global has_crypto + + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + set env NULL + incr eindex + set env [lindex $args $eindex] + puts "Subdb002 skipping for env $env" + return + } + set largs $args + subdb002_main $method $nentries $largs + append largs " -chksum " + subdb002_main $method $nentries $largs + + # Skip remainder of test if release does not support encryption. + if { $has_crypto == 0 } { + return + } + + append largs "-encryptaes $passwd " + subdb002_main $method $nentries $largs +} + +proc subdb002_main { method nentries largs } { + source ./include.tcl + global encrypt + + set largs [convert_args $method $largs] + set omethod [convert_method $method] + + env_cleanup $testdir + + puts "Subdb002: $method ($largs) basic subdb tests" + set testfile $testdir/subdb002.db + subdb002_body $method $omethod $nentries $largs $testfile NULL + + # Run convert_encrypt so that old_encrypt will be reset to + # the proper value and cleanup will work. + convert_encrypt $largs + set encargs "" + set largs [split_encargs $largs encargs] + + cleanup $testdir NULL + if { [is_queue $omethod] == 1 || [is_heap $omethod] == 1 } { + set sdb002_env berkdb_env_noerr + } else { + set sdb002_env berkdb_env + } + set env [eval {$sdb002_env -create -cachesize {0 10000000 0} \ + -mode 0644} -home $testdir $encargs] + error_check_good env_open [is_valid_env $env] TRUE + puts "Subdb002: $method ($largs) basic subdb tests in an environment" + + # We're in an env--use default path to database rather than specifying + # it explicitly. + set testfile subdb002.db + subdb002_body $method $omethod $nentries $largs $testfile $env + error_check_good env_close [$env close] 0 +} + +proc subdb002_body { method omethod nentries largs testfile env } { + global encrypt + global passwd + source ./include.tcl + + # Create the database and open the dictionary + set subdb subdb0 + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + + if { [is_queue $omethod] == 1 || [is_heap $omethod] == 1 } { + set sdb002_open berkdb_open_noerr + } else { + set sdb002_open berkdb_open + } + + if { $env == "NULL" } { + set ret [catch {eval {$sdb002_open -create -mode 0644} $largs \ + {$omethod $testfile $subdb}} db] + } else { + set ret [catch {eval {$sdb002_open -create -mode 0644} $largs \ + {-env $env $omethod $testfile $subdb}} db] + } + + # + # If -queue or -heap method, we need to make sure that trying to + # create a subdb fails. + if { [is_queue $method] == 1 || [is_heap $method] == 1 } { + error_check_bad dbopen $ret 0 + puts "Subdb002: skipping remainder of test for method $method" + return + } + + error_check_good dbopen $ret 0 + error_check_good dbopen [is_valid_db $db] TRUE + + set did [open $dict] + + set pflags "" + set gflags "" + set count 0 + + if { [is_record_based $method] == 1 } { + set checkfunc subdb002_recno.check + append gflags " -recno" + } else { + set checkfunc subdb002.check + } + puts "\tSubdb002.a: put/get loop" + # Here is the loop where we put and get each key/data pair + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1] + set kvals($key) [pad_data $method $str] + } else { + set key $str + } + set ret [eval \ + {$db put} $pflags {$key [chop_data $method $str]}] + error_check_good put $ret 0 + + set ret [eval {$db get} $gflags {$key}] + error_check_good \ + get $ret [list [list $key [pad_data $method $str]]] + incr count + } + close $did + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tSubdb002.b: dump file" + set txn "" + dump_file $db $txn $t1 $checkfunc + error_check_good db_close [$db close] 0 + + # Now compare the keys to see if they match the dictionary (or ints) + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $nentries} {set i [incr i]} { + puts $oid $i + } + close $oid + file rename -force $t1 $t3 + } else { + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + filesort $t1 $t3 + } + + error_check_good Subdb002:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + puts "\tSubdb002.c: close, open, and dump file" + # Now, reopen the file and run the last test again. + open_and_dump_subfile $testfile $env $t1 $checkfunc \ + dump_file_direction "-first" "-next" $subdb + if { [is_record_based $method] != 1 } { + filesort $t1 $t3 + } + + error_check_good Subdb002:diff($t2,$t3) \ + [filecmp $t2 $t3] 0 + + # Now, reopen the file and run the last test again in the + # reverse direction. + puts "\tSubdb002.d: close, open, and dump file in reverse direction" + open_and_dump_subfile $testfile $env $t1 $checkfunc \ + dump_file_direction "-last" "-prev" $subdb + + if { [is_record_based $method] != 1 } { + filesort $t1 $t3 + } + + error_check_good Subdb002:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + puts "\tSubdb002.e: db_dump with subdatabase" + set outfile $testdir/subdb002.dump + set dumpargs " -f $outfile -s $subdb " + if { $encrypt > 0 } { + append dumpargs " -P $passwd " + } + if { $env != "NULL" } { + append dumpargs " -h $testdir " + } + append dumpargs " $testfile" + set stat [catch {eval {exec $util_path/db_dump} $dumpargs} ret] + error_check_good dbdump.subdb $stat 0 +} + +# Check function for Subdb002; keys and data are identical +proc subdb002.check { key data } { + error_check_good "key/data mismatch" $data $key +} + +proc subdb002_recno.check { key data } { + global dict + global kvals + + error_check_good key"$key"_exists [info exists kvals($key)] 1 + error_check_good "key/data mismatch, key $key" $data $kvals($key) +} diff --git a/test/tcl/sdb003.tcl b/test/tcl/sdb003.tcl new file mode 100644 index 00000000..e598a689 --- /dev/null +++ b/test/tcl/sdb003.tcl @@ -0,0 +1,179 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb003 +# TEST Tests many subdbs +# TEST Creates many subdbs and puts a small amount of +# TEST data in each (many defaults to 1000) +# TEST +# TEST Use the first 1000 entries from the dictionary as subdbnames. +# TEST Insert each with entry as name of subdatabase and a partial list +# TEST as key/data. After all are entered, retrieve all; compare output +# TEST to original. Close file, reopen, do retrieve and re-verify. +proc sdb003 { method {nentries 1000} args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_queue $method] == 1 || [is_heap $method] == 1 } { + puts "Subdb003: skipping for method $method" + return + } + + puts "Subdb003: $method ($args) many subdb tests" + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/subdb003.db + set env NULL + } else { + set testfile subdb003.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + if { $nentries == 1000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + # Create the database and open the dictionary + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + + set pflags "" + set gflags "" + set txn "" + set fcount 0 + + if { [is_record_based $method] == 1 } { + set checkfunc subdb003_recno.check + append gflags " -recno" + } else { + set checkfunc subdb003.check + } + + # Here is the loop where we put and get each key/data pair + set ndataent 10 + set fdid [open $dict] + while { [gets $fdid str] != -1 && $fcount < $nentries } { + + set subdb $str + set db [eval {berkdb_open -create -mode 0644} \ + $args {$omethod $testfile $subdb}] + error_check_good dbopen [is_valid_db $db] TRUE + + set count 0 + set did [open $dict] + while { [gets $did str] != -1 && $count < $ndataent } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1] + set kvals($key) [pad_data $method $str] + } else { + set key $str + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $str]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + set ret [eval {$db get} $gflags {$key}] + error_check_good get $ret [list [list $key \ + [pad_data $method $str]]] + incr count + } + close $did + incr fcount + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 $checkfunc + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Now compare the keys to see if they match + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $ndataent} {set i [incr i]} { + puts $oid $i + } + close $oid + file rename -force $t1 $t3 + } else { + set q q + filehead $ndataent $dict $t3 + filesort $t3 $t2 + filesort $t1 $t3 + } + + error_check_good Subdb003:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + # Now, reopen the file and run the last test again. + open_and_dump_subfile $testfile $env $t1 $checkfunc \ + dump_file_direction "-first" "-next" $subdb + if { [is_record_based $method] != 1 } { + filesort $t1 $t3 + } + + error_check_good Subdb003:diff($t2,$t3) \ + [filecmp $t2 $t3] 0 + + # Now, reopen the file and run the last test again in the + # reverse direction. + open_and_dump_subfile $testfile $env $t1 $checkfunc \ + dump_file_direction "-last" "-prev" $subdb + + if { [is_record_based $method] != 1 } { + filesort $t1 $t3 + } + + error_check_good Subdb003:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + if { [expr $fcount % 100] == 0 } { + puts -nonewline "$fcount " + flush stdout + } + } + close $fdid + puts "" +} + +# Check function for Subdb003; keys and data are identical +proc subdb003.check { key data } { + error_check_good "key/data mismatch" $data $key +} + +proc subdb003_recno.check { key data } { + global dict + global kvals + + error_check_good key"$key"_exists [info exists kvals($key)] 1 + error_check_good "key/data mismatch, key $key" $data $kvals($key) +} diff --git a/test/tcl/sdb004.tcl b/test/tcl/sdb004.tcl new file mode 100644 index 00000000..9e043f4a --- /dev/null +++ b/test/tcl/sdb004.tcl @@ -0,0 +1,243 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb004 +# TEST Tests large subdb names +# TEST subdb name = filecontents, +# TEST key = filename, data = filecontents +# TEST Put/get per key +# TEST Dump file +# TEST Dump subdbs, verify data and subdb name match +# TEST +# TEST Create 1 db with many large subdbs. Use the contents as subdb names. +# TEST Take the source files and dbtest executable and enter their names as +# TEST the key with their contents as data. After all are entered, retrieve +# TEST all; compare output to original. Close file, reopen, do retrieve and +# TEST re-verify. +proc sdb004 { method args} { + global names + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_queue $method] == 1 || \ + [is_heap $method] == 1 || [is_fixed_length $method] == 1 } { + puts "Subdb004: skipping for method $method" + return + } + + puts "Subdb004: $method ($args) \ + filecontents=subdbname filename=key filecontents=data pairs" + + set txnenv 0 + set envargs "" + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/subdb004.db + set env NULL + } else { + set testfile subdb004.db + incr eindex + set env [lindex $args $eindex] + set envargs " -env $env " + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + append envargs " -auto_commit " + } + set testdir [get_home $env] + } + # Create the database and open the dictionary + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + set t4 $testdir/t4 + + cleanup $testdir $env + set pflags "" + set gflags "" + set txn "" + if { [is_record_based $method] == 1 } { + set checkfunc subdb004_recno.check + append gflags "-recno" + } else { + set checkfunc subdb004.check + } + + # Here is the loop where we put and get each key/data pair + # Note that the subdatabase name is passed in as a char *, not + # in a DBT, so it may not contain nulls; use only source files. + set file_list [glob $src_root/*/*.c $src_root/*/*/*.c \ + $src_root/*/*/*/*.c $src_root/*/*/*/*/*.c \ + $src_root/*/*/*/*/*/*.c] + set fcount [llength $file_list] + if { $txnenv == 1 && $fcount > 100 } { + set file_list [lrange $file_list 0 99] + set fcount 100 + } + + set count 0 + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $fcount} {set i [incr i]} { + puts $oid $i + } + close $oid + } else { + set oid [open $t2.tmp w] + foreach f $file_list { + puts $oid $f + } + close $oid + filesort $t2.tmp $t2 + } + puts "\tSubdb004.a: Set/Check each subdb" + foreach f $file_list { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + set names([expr $count + 1]) $f + } else { + set key $f + } + # Should really catch errors + set fid [open $f r] + fconfigure $fid -translation binary + set data [read $fid] + set subdb $data + close $fid + set db [eval {berkdb_open -create -mode 0644} \ + $args {$omethod $testfile $subdb}] + error_check_good dbopen [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval \ + {$db put} $txn $pflags {$key [chop_data $method $data]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Should really catch errors + set fid [open $t4 w] + fconfigure $fid -translation binary + if [catch {eval {$db get} $gflags {$key}} data] { + puts -nonewline $fid $data + } else { + # Data looks like {{key data}} + set key [lindex [lindex $data 0] 0] + set data [lindex [lindex $data 0] 1] + puts -nonewline $fid $data + } + close $fid + + error_check_good Subdb004:diff($f,$t4) \ + [filecmp $f $t4] 0 + + incr count + + # Now we will get each key from the DB and compare the results + # to the original. + # puts "\tSubdb004.b: dump file" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_bin_file $db $txn $t1 $checkfunc + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + } + + # + # Now for each file, check that the subdb name is the same + # as the data in that subdb and that the filename is the key. + # + puts "\tSubdb004.b: Compare subdb names with key/data" + set db [eval {berkdb_open -rdonly} $envargs {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set c [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $c $db] TRUE + + for {set d [$c get -first] } { [llength $d] != 0 } \ + {set d [$c get -next] } { + set subdbname [lindex [lindex $d 0] 0] + set subdb [eval {berkdb_open} $args {$testfile $subdbname}] + error_check_good dbopen [is_valid_db $db] TRUE + + # Output the subdb name + set ofid [open $t3 w] + fconfigure $ofid -translation binary + if { [string compare "\0" \ + [string range $subdbname end end]] == 0 } { + set slen [expr [string length $subdbname] - 2] + set subdbname [string range $subdbname 1 $slen] + } + puts -nonewline $ofid $subdbname + close $ofid + + # Output the data + set subc [eval {$subdb cursor} $txn] + error_check_good db_cursor [is_valid_cursor $subc $subdb] TRUE + set d [$subc get -first] + error_check_good dbc_get [expr [llength $d] != 0] 1 + set key [lindex [lindex $d 0] 0] + set data [lindex [lindex $d 0] 1] + + set ofid [open $t1 w] + fconfigure $ofid -translation binary + puts -nonewline $ofid $data + close $ofid + + $checkfunc $key $t1 + $checkfunc $key $t3 + + error_check_good Subdb004:diff($t3,$t1) \ + [filecmp $t3 $t1] 0 + error_check_good curs_close [$subc close] 0 + error_check_good db_close [$subdb close] 0 + } + error_check_good curs_close [$c close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + if { [is_record_based $method] != 1 } { + fileremove $t2.tmp + } +} + +# Check function for subdb004; key should be file name; data should be contents +proc subdb004.check { binfile tmpfile } { + source ./include.tcl + + error_check_good Subdb004:datamismatch($binfile,$tmpfile) \ + [filecmp $binfile $tmpfile] 0 +} +proc subdb004_recno.check { binfile tmpfile } { + global names + source ./include.tcl + + set fname $names($binfile) + error_check_good key"$binfile"_exists [info exists names($binfile)] 1 + error_check_good Subdb004:datamismatch($fname,$tmpfile) \ + [filecmp $fname $tmpfile] 0 +} diff --git a/test/tcl/sdb005.tcl b/test/tcl/sdb005.tcl new file mode 100644 index 00000000..9edf6897 --- /dev/null +++ b/test/tcl/sdb005.tcl @@ -0,0 +1,161 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb005 +# TEST Tests cursor operations in subdbs +# TEST Put/get per key +# TEST Verify cursor operations work within subdb +# TEST Verify cursor operations do not work across subdbs +# TEST +# +# We should test this on all btrees, all hash, and a combination thereof +proc sdb005 {method {nentries 100} args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_queue $method] == 1 || [is_heap $method] == 1 } { + puts "Subdb005: skipping for method $method" + return + } + + puts "Subdb005: $method ( $args ) subdb cursor operations test" + set txnenv 0 + set envargs "" + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/subdb005.db + set env NULL + } else { + set testfile subdb005.db + incr eindex + set env [lindex $args $eindex] + set envargs " -env $env " + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + append envargs " -auto_commit " + if { $nentries == 100 } { + set nentries 20 + } + } + set testdir [get_home $env] + } + + cleanup $testdir $env + set txn "" + set psize 8192 + set duplist {-1 -1 -1 -1 -1} + build_all_subdb \ + $testfile [list $method] $psize $duplist $nentries $args + set numdb [llength $duplist] + # + # Get a cursor in each subdb and move past the end of each + # subdb. Make sure we don't end up in another subdb. + # + puts "\tSubdb005.a: Cursor ops - first/prev and last/next" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for {set i 0} {$i < $numdb} {incr i} { + set db [eval {berkdb_open -unknown} $args {$testfile sub$i.db}] + error_check_good dbopen [is_valid_db $db] TRUE + set db_handle($i) $db + # Used in 005.c test + lappend subdbnames sub$i.db + + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + # Get a second cursor for cursor comparison test. + set dbc2 [eval {$db cursor} $txn] + error_check_good db_cursor2 [is_valid_cursor $dbc2 $db] TRUE + + set d [$dbc get -first] + set d2 [$dbc2 get -first] + error_check_good dbc_get [expr [llength $d] != 0] 1 + + # Cursor comparison: both are on get -first. + error_check_good dbc2_cmp [$dbc cmp $dbc2] 0 + + # Used in 005.b test + set db_key($i) [lindex [lindex $d 0] 0] + + set d [$dbc get -prev] + error_check_good dbc_get [expr [llength $d] == 0] 1 + + set d [$dbc get -last] + error_check_good dbc_get [expr [llength $d] != 0] 1 + + # Cursor comparison: the first cursor has moved to + # get -last. + error_check_bad dbc2_cmp [$dbc cmp $dbc2] 0 + + set d [$dbc get -next] + error_check_good dbc_get [expr [llength $d] == 0] 1 + error_check_good dbc_close [$dbc close] 0 + error_check_good dbc2_close [$dbc2 close] 0 + } + # + # Get a key from each subdb and try to get this key in a + # different subdb. Make sure it fails + # + puts "\tSubdb005.b: Get keys in different subdb's" + for {set i 0} {$i < $numdb} {incr i} { + set n [expr $i + 1] + if {$n == $numdb} { + set n 0 + } + set db $db_handle($i) + if { [is_record_based $method] == 1 } { + set d [eval {$db get -recno} $txn {$db_key($n)}] + error_check_good \ + db_get [expr [llength $d] == 0] 1 + } else { + set d [eval {$db get} $txn {$db_key($n)}] + error_check_good db_get [expr [llength $d] == 0] 1 + } + } + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + # + # Clean up + # + for {set i 0} {$i < $numdb} {incr i} { + error_check_good db_close [$db_handle($i) close] 0 + } + + # + # Check contents of DB for subdb names only. Makes sure that + # every subdbname is there and that nothing else is there. + # + puts "\tSubdb005.c: Check DB is read-only" + error_check_bad dbopen [catch \ + {berkdb_open_noerr -unknown $testfile} ret] 0 + + puts "\tSubdb005.d: Check contents of DB for subdb names only" + set db [eval {berkdb_open -unknown -rdonly} $envargs {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set subdblist [$db get -glob *] + foreach kd $subdblist { + # subname also used in subdb005.e,f below + set subname [lindex $kd 0] + set i [lsearch $subdbnames $subname] + error_check_good subdb_search [expr $i != -1] 1 + set subdbnames [lreplace $subdbnames $i $i] + } + error_check_good subdb_done [llength $subdbnames] 0 + + error_check_good db_close [$db close] 0 + return +} diff --git a/test/tcl/sdb006.tcl b/test/tcl/sdb006.tcl new file mode 100644 index 00000000..3b256f89 --- /dev/null +++ b/test/tcl/sdb006.tcl @@ -0,0 +1,168 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb006 +# TEST Tests intra-subdb join +# TEST +# TEST We'll test 2-way, 3-way, and 4-way joins and figure that if those work, +# TEST everything else does as well. We'll create test databases called +# TEST sub1.db, sub2.db, sub3.db, and sub4.db. The number on the database +# TEST describes the duplication -- duplicates are of the form 0, N, 2N, 3N, +# TEST ... where N is the number of the database. Primary.db is the primary +# TEST database, and sub0.db is the database that has no matching duplicates. +# TEST All of these are within a single database. +# +# We should test this on all btrees, all hash, and a combination thereof +proc sdb006 {method {nentries 100} args } { + source ./include.tcl + global rand_init + + # NB: these flags are internal only, ok + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_record_based $method] == 1 || [is_rbtree $method] } { + puts "\tSubdb006 skipping for method $method." + return + } + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/subdb006.db + set env NULL + } else { + set testfile subdb006.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + if { $nentries == 100 } { + # !!! + # nentries must be greater than the number + # of do_join_subdb calls below. + # + set nentries 35 + } + } + set testdir [get_home $env] + } + berkdb srand $rand_init + + set oargs $args + foreach opt {" -dup" " -dupsort"} { + append args $opt + + puts "Subdb006: $method ( $args ) Intra-subdb join" + set txn "" + # + # Get a cursor in each subdb and move past the end of each + # subdb. Make sure we don't end up in another subdb. + # + puts "\tSubdb006.a: Intra-subdb join" + + if { $env != "NULL" } { + set testdir [get_home $env] + } + cleanup $testdir $env + + set psize 8192 + set duplist {0 50 25 16 12} + set numdb [llength $duplist] + build_all_subdb $testfile [list $method] $psize \ + $duplist $nentries $args + + # Build the primary + puts "Subdb006: Building the primary database $method" + set oflags "-create -mode 0644 [conv $omethod \ + [berkdb random_int 1 2]]" + set db [eval {berkdb_open} $oflags $oargs $testfile primary.db] + error_check_good dbopen [is_valid_db $db] TRUE + for { set i 0 } { $i < 1000 } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set key [format "%04d" $i] + set ret [eval {$db put} $txn {$key stub}] + error_check_good "primary put" $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + error_check_good "primary close" [$db close] 0 + set did [open $dict] + gets $did str + do_join_subdb $testfile primary.db "1 0" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "2 0" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 0" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 0" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "2" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "1 2" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "1 2 3" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "1 2 3 4" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "2 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 2 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 3 2 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "1 3" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "1 4" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "2 3" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 2" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "2 4" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 2" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 4" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 3" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "2 3 4" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 4 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 2 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "0 2 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 2 0" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 3 2 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 3 0 1" $str $oargs + + close $did + } +} diff --git a/test/tcl/sdb007.tcl b/test/tcl/sdb007.tcl new file mode 100644 index 00000000..23db4b04 --- /dev/null +++ b/test/tcl/sdb007.tcl @@ -0,0 +1,108 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb007 +# TEST Tests page size difference errors between subdbs. +# TEST If the physical file already exists, we ignore pagesize specifications +# TEST on any subsequent -creates. +# TEST +# TEST 1. Create/open a subdb with system default page size. +# TEST Create/open a second subdb specifying a different page size. +# TEST The create should succeed, but the pagesize of the new db +# TEST will be the system default page size. +# TEST 2. Create/open a subdb with a specified, non-default page size. +# TEST Create/open a second subdb specifying a different page size. +# TEST The create should succeed, but the pagesize of the new db +# TEST will be the specified page size from the first create. + +proc sdb007 { method args } { + source ./include.tcl + + set db2args [convert_args -btree $args] + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_queue $method] == 1 || [is_heap $method] == 1 } { + puts "Subdb007: skipping for method $method" + return + } + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Subdb007: skipping for specific page sizes" + return + } + + puts "Subdb007: $method ($args) subdb tests with different page sizes" + + set txnenv 0 + set envargs "" + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/subdb007.db + set env NULL + } else { + set testfile subdb007.db + incr eindex + set env [lindex $args $eindex] + set envargs " -env $env " + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + append envargs " -auto_commit " + append db2args " -auto_commit " + } + set testdir [get_home $env] + } + set sub1 "sub1" + set sub2 "sub2" + cleanup $testdir $env + set txn "" + + puts "\tSubdb007.a.0: create subdb with default page size" + set db [eval {berkdb_open -create -mode 0644} \ + $args $envargs {$omethod $testfile $sub1}] + error_check_good subdb [is_valid_db $db] TRUE + + # Figure out what the default page size is so that we can send + # a different value to the next -create call. + set default_psize [stat_field $db stat "Page size"] + error_check_good dbclose [$db close] 0 + + if { $default_psize == 512 } { + set psize 2048 + } else { + set psize 512 + } + + puts "\tSubdb007.a.1: Create 2nd subdb with different specified page size" + set db2 [eval {berkdb_open -create -btree} \ + $db2args $envargs {-pagesize $psize $testfile $sub2}] + error_check_good db2_create [is_valid_db $db2] TRUE + + set actual_psize [stat_field $db2 stat "Page size"] + error_check_good check_pagesize [expr $actual_psize == $default_psize] 1 + error_check_good db2close [$db2 close] 0 + + set ret [eval {berkdb dbremove} $envargs {$testfile}] + + puts "\tSubdb007.b.0: Create subdb with specified page size" + set db [eval {berkdb_open -create -mode 0644} \ + $args $envargs {-pagesize $psize $omethod $testfile $sub1}] + error_check_good subdb [is_valid_db $db] TRUE + error_check_good dbclose [$db close] 0 + + puts "\tSubdb007.b.1: Create 2nd subdb with different specified page size" + set newpsize [expr $psize * 2] + set db2 [eval {berkdb_open -create -mode 0644} $args \ + $envargs {-pagesize $newpsize $omethod $testfile $sub2}] + error_check_good subdb [is_valid_db $db2] TRUE + set actual_psize [stat_field $db2 stat "Page size"] + error_check_good check_pagesize [expr $actual_psize == $psize] 1 + error_check_good db2close [$db2 close] 0 +} diff --git a/test/tcl/sdb008.tcl b/test/tcl/sdb008.tcl new file mode 100644 index 00000000..15fa2e5b --- /dev/null +++ b/test/tcl/sdb008.tcl @@ -0,0 +1,93 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb008 +# TEST Tests explicit setting of lorders for subdatabases -- the +# TEST lorder should be ignored. +proc sdb008 { method args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_queue $method] == 1 || [is_heap $method] == 1 } { + puts "Subdb008: skipping for method $method" + return + } + set eindex [lsearch -exact $args "-env"] + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile1 $testdir/subdb008a.db + set testfile2 $testdir/subdb008b.db + set env NULL + } else { + set testfile1 subdb008a.db + set testfile2 subdb008b.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + puts "Subdb008: $method ($args) subdb tests with different lorders" + + puts "\tSubdb008.a.0: create subdb with system default lorder" + set lorder "1234" + if { [big_endian] } { + set lorder "4321" + } + set db [eval {berkdb_open -create -mode 0644} \ + $args {$omethod $testfile1 "sub1"}] + error_check_good subdb [is_valid_db $db] TRUE + error_check_good dbclose [$db close] 0 + + # Explicitly try to create subdb's of each byte order. In both + # cases the subdb should be forced to the byte order of the + # parent database. + puts "\tSubdb008.a.1: Try to create subdb with -1234 lorder" + set db [eval {berkdb_open -create -mode 0644} \ + $args {-lorder 1234 $omethod $testfile1 "sub2"}] + error_check_good lorder_1234 [eval $db get_lorder] $lorder + error_check_good subdb [is_valid_db $db] TRUE + error_check_good dbclose [$db close] 0 + + puts "\tSubdb008.a.2: Try to create subdb with -4321 lorder" + set db [eval {berkdb_open -create -mode 0644} \ + $args {-lorder 4321 $omethod $testfile1 "sub3"}] + error_check_good lorder_4321 [eval $db get_lorder] $lorder + error_check_good subdb [is_valid_db $db] TRUE + error_check_good dbclose [$db close] 0 + + puts "\tSubdb008.b.0: create subdb with non-default lorder" + set reverse_lorder "4321" + if { [big_endian] } { + set reverse_lorder "1234" + } + set db [eval {berkdb_open -create -mode 0644} \ + {-lorder $reverse_lorder} $args {$omethod $testfile2 "sub1"}] + error_check_good subdb [is_valid_db $db] TRUE + error_check_good dbclose [$db close] 0 + + puts "\tSubdb008.b.1: Try to create subdb with -1234 lorder" + set db [eval {berkdb_open -create -mode 0644} \ + $args {-lorder 1234 $omethod $testfile2 "sub2"}] + error_check_good lorder_1234 [eval $db get_lorder] $reverse_lorder + error_check_good subdb [is_valid_db $db] TRUE + error_check_good dbclose [$db close] 0 + + puts "\tSubdb008.b.2: Try to create subdb with -4321 lorder" + set db [eval {berkdb_open -create -mode 0644} \ + $args {-lorder 4321 $omethod $testfile2 "sub3"}] + error_check_good lorder_4321 [eval $db get_lorder] $reverse_lorder + error_check_good subdb [is_valid_db $db] TRUE + error_check_good dbclose [$db close] 0 +} diff --git a/test/tcl/sdb009.tcl b/test/tcl/sdb009.tcl new file mode 100644 index 00000000..628911f2 --- /dev/null +++ b/test/tcl/sdb009.tcl @@ -0,0 +1,107 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb009 +# TEST Test DB->rename() method for subdbs +proc sdb009 { method args } { + global errorCode + source ./include.tcl + + set omethod [convert_method $method] + set args [convert_args $method $args] + + puts "Subdb009: $method ($args): Test of DB->rename()" + + if { [is_queue $method] == 1 || [is_heap $method] == 1 } { + puts "\tSubdb009: Skipping for method $method." + return + } + + set txnenv 0 + set envargs "" + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/subdb009.db + set env NULL + } else { + set testfile subdb009.db + incr eindex + set env [lindex $args $eindex] + set envargs " -env $env " + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + append envargs " -auto_commit " + } + set testdir [get_home $env] + } + set oldsdb OLDDB + set newsdb NEWDB + + # Make sure we're starting from a clean slate. + cleanup $testdir $env + error_check_bad "$testfile exists" [file exists $testfile] 1 + + puts "\tSubdb009.a: Create/rename file" + puts "\t\tSubdb009.a.1: create" + set db [eval {berkdb_open -create -mode 0644}\ + $omethod $args {$testfile $oldsdb}] + error_check_good dbopen [is_valid_db $db] TRUE + + # The nature of the key and data are unimportant; use numeric key + # so record-based methods don't need special treatment. + set txn "" + set key 1 + set data [pad_data $method data] + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + error_check_good dbput [eval {$db put} $txn {$key $data}] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good dbclose [$db close] 0 + + puts "\t\tSubdb009.a.2: rename" + error_check_good rename_file [eval {berkdb dbrename} $envargs \ + {$testfile $oldsdb $newsdb}] 0 + + puts "\t\tSubdb009.a.3: check" + # Open again with create to make sure we've really completely + # disassociated the subdb from the old name. + set odb [eval {berkdb_open -create -mode 0644}\ + $omethod $args $testfile $oldsdb] + error_check_good odb_open [is_valid_db $odb] TRUE + set odbt [$odb get $key] + error_check_good odb_close [$odb close] 0 + + set ndb [eval {berkdb_open -create -mode 0644}\ + $omethod $args $testfile $newsdb] + error_check_good ndb_open [is_valid_db $ndb] TRUE + set ndbt [$ndb get $key] + error_check_good ndb_close [$ndb close] 0 + + # The DBT from the "old" database should be empty, not the "new" one. + error_check_good odbt_empty [llength $odbt] 0 + error_check_bad ndbt_empty [llength $ndbt] 0 + error_check_good ndbt [lindex [lindex $ndbt 0] 1] $data + + # Now there's both an old and a new. Rename the "new" to the "old" + # and make sure that fails. + puts "\tSubdb009.b: Make sure rename fails instead of overwriting" + set ret [catch {eval {berkdb dbrename} $envargs $testfile \ + $oldsdb $newsdb} res] + error_check_bad rename_overwrite $ret 0 + error_check_good rename_overwrite_ret [is_substr $errorCode EEXIST] 1 + + puts "\tSubdb009 succeeded." +} diff --git a/test/tcl/sdb010.tcl b/test/tcl/sdb010.tcl new file mode 100644 index 00000000..bf192bc2 --- /dev/null +++ b/test/tcl/sdb010.tcl @@ -0,0 +1,169 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb010 +# TEST Test DB->remove() method and DB->truncate() for subdbs +proc sdb010 { method args } { + global errorCode + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Subdb010: Test of DB->remove() and DB->truncate" + + if { [is_queue $method] == 1 || [is_heap $method] == 1 } { + puts "\tSubdb010: Skipping for method $method." + return + } + + set txnenv 0 + set envargs "" + set eindex [lsearch -exact $args "-env"] + # + # If we are not given an env, create one. + if { $eindex == -1 } { + set env [berkdb_env -create -home $testdir -mode 0644] + error_check_good env_open [is_valid_env $env] TRUE + } else { + incr eindex + set env [lindex $args $eindex] + } + set testfile subdb010.db + set envargs " -env $env " + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + append envargs " -auto_commit " + } + set testdir [get_home $env] + set tfpath $testdir/$testfile + + cleanup $testdir $env + + set txn "" + set testdb DATABASE + set testdb2 DATABASE2 + + set db [eval {berkdb_open -create -mode 0644} $omethod \ + $args $envargs $testfile $testdb] + error_check_good db_open [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + + puts "\tSubdb010.a: Test of DB->remove()" + error_check_good file_exists_before [file exists $tfpath] 1 + error_check_good db_remove [eval {berkdb dbremove} $envargs \ + $testfile $testdb] 0 + + # File should still exist. + error_check_good file_exists_after [file exists $tfpath] 1 + + # But database should not. + set ret [catch {eval berkdb_open $omethod \ + $args $envargs $testfile $testdb} res] + error_check_bad open_failed ret 0 + error_check_good open_failed_ret [is_substr $errorCode ENOENT] 1 + + puts "\tSubdb010.b: Setup for DB->truncate()" + # The nature of the key and data are unimportant; use numeric key + # so record-based methods don't need special treatment. + set key1 1 + set key2 2 + set data1 [pad_data $method data1] + set data2 [pad_data $method data2] + + set db [eval {berkdb_open -create -mode 0644} $omethod \ + $args $envargs {$testfile $testdb}] + error_check_good db_open [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + error_check_good dbput [eval {$db put} $txn {$key1 $data1}] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + set db2 [eval {berkdb_open -create -mode 0644} $omethod \ + $args $envargs $testfile $testdb2] + error_check_good db_open [is_valid_db $db2] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + error_check_good dbput [eval {$db2 put} $txn {$key2 $data2}] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + error_check_good db_close [$db close] 0 + error_check_good db_close [$db2 close] 0 + + puts "\tSubdb010.c: truncate" + # + # Return value should be 1, the count of how many items were + # destroyed when we truncated. + set db [eval {berkdb_open -create -mode 0644} $omethod \ + $args $envargs $testfile $testdb] + error_check_good db_open [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + error_check_good trunc_subdb [eval {$db truncate} $txn] 1 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + puts "\tSubdb010.d: check" + set db [eval {berkdb_open} $args $envargs {$testfile $testdb}] + error_check_good db_open [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + set kd [$dbc get -first] + error_check_good trunc_dbcget [llength $kd] 0 + error_check_good dbcclose [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + set db2 [eval {berkdb_open} $args $envargs {$testfile $testdb2}] + error_check_good db_open [is_valid_db $db2] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db2 cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db2] TRUE + set kd [$dbc get -first] + error_check_bad notrunc_dbcget1 [llength $kd] 0 + set db2kd [list [list $key2 $data2]] + error_check_good key2 $kd $db2kd + set kd [$dbc get -next] + error_check_good notrunc_dbget2 [llength $kd] 0 + error_check_good dbcclose [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + error_check_good db_close [$db close] 0 + error_check_good db_close [$db2 close] 0 + + # If we created our env, close it. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + } +} diff --git a/test/tcl/sdb011.tcl b/test/tcl/sdb011.tcl new file mode 100644 index 00000000..bba61aff --- /dev/null +++ b/test/tcl/sdb011.tcl @@ -0,0 +1,141 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb011 +# TEST Test deleting Subdbs with overflow pages +# TEST Create 1 db with many large subdbs. +# TEST Test subdatabases with overflow pages. +proc sdb011 { method {ndups 13} {nsubdbs 10} args} { + global names + source ./include.tcl + global rand_init + error_check_good set_random_seed [berkdb srand $rand_init] 0 + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_queue $method] == 1 || \ + [is_heap $method] == 1 || [is_fixed_length $method] == 1 } { + puts "Subdb011: skipping for method $method" + return + } + set txnenv 0 + set envargs "" + set max_files 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/subdb011.db + set env NULL + set tfpath $testfile + } else { + set testfile subdb011.db + incr eindex + set env [lindex $args $eindex] + set envargs " -env $env " + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + append envargs " -auto_commit " + set max_files 50 + if { $ndups == 13 } { + set ndups 7 + } + } + set testdir [get_home $env] + set tfpath $testdir/$testfile + } + + # Create the database and open the dictionary + + cleanup $testdir $env + set txn "" + + # Here is the loop where we put and get each key/data pair + set file_list [get_file_list] + set flen [llength $file_list] + puts "Subdb011: $method ($args) $ndups overflow dups with \ + $flen filename=key filecontents=data pairs" + + puts "\tSubdb011.a: Create each of $nsubdbs subdbs and dups" + set slist {} + set i 0 + set count 0 + foreach f $file_list { + set i [expr $i % $nsubdbs] + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + set names([expr $count + 1]) $f + } else { + set key $f + } + # Should really catch errors + set fid [open $f r] + fconfigure $fid -translation binary + set filecont [read $fid] + set subdb subdb$i + lappend slist $subdb + close $fid + set db [eval {berkdb_open -create -mode 0644} \ + $args {$omethod $testfile $subdb}] + error_check_good dbopen [is_valid_db $db] TRUE + for {set dup 0} {$dup < $ndups} {incr dup} { + set data $dup:$filecont + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key \ + [chop_data $method $data]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + error_check_good dbclose [$db close] 0 + incr i + incr count + } + + puts "\tSubdb011.b: Verify overflow pages" + foreach subdb $slist { + set db [eval {berkdb_open -create -mode 0644} \ + $args {$omethod $testfile $subdb}] + error_check_good dbopen [is_valid_db $db] TRUE + set stat [$db stat] + + # What everyone else calls overflow pages, hash calls "big + # pages", so we need to special-case hash here. (Hash + # overflow pages are additional pages after the first in a + # bucket.) + if { [string compare [$db get_type] hash] == 0 } { + error_check_bad overflow \ + [is_substr $stat "{{Number of big pages} 0}"] 1 + } else { + error_check_bad overflow \ + [is_substr $stat "{{Overflow pages} 0}"] 1 + } + error_check_good dbclose [$db close] 0 + } + + puts "\tSubdb011.c: Delete subdatabases" + for {set i $nsubdbs} {$i > 0} {set i [expr $i - 1]} { + # + # Randomly delete a subdatabase + set sindex [berkdb random_int 0 [expr $i - 1]] + set subdb [lindex $slist $sindex] + # + # Delete the one we did from the list + set slist [lreplace $slist $sindex $sindex] + error_check_good file_exists_before [file exists $tfpath] 1 + error_check_good db_remove [eval {berkdb dbremove} $envargs \ + {$testfile $subdb}] 0 + } +} + diff --git a/test/tcl/sdb012.tcl b/test/tcl/sdb012.tcl new file mode 100644 index 00000000..b20f288d --- /dev/null +++ b/test/tcl/sdb012.tcl @@ -0,0 +1,434 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb012 +# TEST Test subdbs with locking and transactions +# TEST Tests creating and removing subdbs while handles +# TEST are open works correctly, and in the face of txns. +# +proc sdb012 { method args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_queue $method] == 1 || [is_heap $method] == 1 } { + puts "Subdb012: skipping for method $method" + return + } + + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Subdb012 skipping for env $env" + return + } + set encargs "" + set largs [split_encargs $args encargs] + + puts "Subdb012: $method ($largs $encargs) subdb txn/locking tests" + + # + # sdb012_body takes a txn list containing 4 elements. + # {txn command for first subdb + # txn command for second subdb + # txn command for first subdb removal + # txn command for second subdb removal} + # + # The allowed commands are 'none' 'one', 'auto', 'abort', 'commit'. + # 'none' is a special case meaning run without a txn. In the + # case where all 4 items are 'none', we run in a lock-only env. + # 'one' is a special case meaning we create the subdbs together + # in one single transaction. It is indicated as the value for t1, + # and the value in t2 indicates if that single txn should be + # aborted or committed. It is not used and has no meaning + # in the removal case. 'auto' means use the -auto_commit flag + # to the operation, and 'abort' and 'commit' do the obvious. + # "-auto" is applied only to the creation of the subdbs, since + # it is done by default on database removes in transactional + # environments. + # + # First test locking w/o txns. If any in tlist are 'none', + # all must be none. + # + # Now run through the txn-based operations + set count 0 + set sdb "Subdb012." + set teststr "abcdefghijklmnopqrstuvwxyz" + set testlet [split $teststr {}] + foreach t1 { none one abort auto commit } { + foreach t2 { none abort auto commit } { + if { $t1 == "one" } { + if { $t2 == "none" || $t2 == "auto"} { + continue + } + } + set tlet [lindex $testlet $count] + foreach r1 { none abort commit } { + foreach r2 { none abort commit } { + set tlist [list $t1 $t2 $r1 $r2] + set nnone [llength \ + [lsearch -all $tlist none]] + if { $nnone != 0 && $nnone != 4 } { + continue + } + sdb012_body $testdir $omethod $largs \ + $encargs $sdb$tlet $tlist + } + } + incr count + } + } + +} + +proc s012 { method args } { + source ./include.tcl + + set omethod [convert_method $method] + + set encargs "" + set largs "" + + puts "Subdb012: $method ($largs $encargs) subdb txn/locking tests" + + set sdb "Subdb012." + set tlet X + set tlist $args + error_check_good tlist [llength $tlist] 4 + sdb012_body $testdir $omethod $largs $encargs $sdb$tlet $tlist +} + +# +# This proc checks the tlist values and returns the flags +# that should be used when opening the env. If we are running +# with no txns, then just -lock, otherwise -txn. +# +proc sdb012_subsys { tlist } { + set t1 [lindex $tlist 0] + # + # If we have no txns, all elements of the list should be none. + # In that case we only run with locking turned on. + # Otherwise, we use the full txn subsystems. + # + set allnone {none none none none} + if { $allnone == $tlist } { + set subsys "-lock" + } else { + set subsys "-txn" + } + return $subsys +} + +# +# This proc parses the tlist and returns a list of 4 items that +# should be used in operations. I.e. it will begin the txns as +# needed, or return a -auto_commit flag, etc. +# +proc sdb012_tflags { env tlist } { + set ret "" + set t1 "" + foreach t $tlist { + switch $t { + one { + set t1 [$env txn] + error_check_good txnbegin [is_valid_txn $t1 $env] TRUE + lappend ret "-txn $t1" + lappend ret "-txn $t1" + } + auto { + lappend ret "-auto_commit" + } + abort - + commit { + # + # If the previous command was a "one", skip over + # this commit/abort. Otherwise start a new txn + # for the removal case. + # + if { $t1 == "" } { + set txn [$env txn] + error_check_good txnbegin [is_valid_txn $txn \ + $env] TRUE + lappend ret "-txn $txn" + } else { + set t1 "" + } + } + none { + lappend ret "" + } + default { + error "Txn command $t not implemented" + } + } + } + return $ret +} + +# +# This proc parses the tlist and returns a list of 4 items that +# should be used in the txn conclusion operations. I.e. it will +# give "" if using auto_commit (i.e. no final txn op), or a single +# abort/commit if both subdb's are in one txn. +# +proc sdb012_top { tflags tlist } { + set ret "" + set t1 "" + # + # We know both lists have 4 items. Iterate over them + # using multiple value lists so we know which txn goes + # with each op. + # + # The tflags list is needed to extract the txn command + # out for the operation. The tlist list is needed to + # determine what operation we are doing. + # + foreach t $tlist tf $tflags { + switch $t { + one { + set t1 [lindex $tf 1] + } + auto { + lappend ret "sdb012_nop" + } + abort - + commit { + # + # If the previous command was a "one" (i.e. t1 + # is set), append a correct command and then + # an empty one. + # + if { $t1 == "" } { + set txn [lindex $tf 1] + set top "$txn $t" + lappend ret $top + } else { + set top "$t1 $t" + lappend ret "sdb012_nop" + lappend ret $top + set t1 "" + } + } + none { + lappend ret "sdb012_nop" + } + } + } + return $ret +} + +proc sdb012_nop { } { + return 0 +} + +proc sdb012_isabort { tlist item } { + set i [lindex $tlist $item] + if { $i == "one" } { + set i [lindex $tlist [expr $item + 1]] + } + if { $i == "abort" } { + return 1 + } else { + return 0 + } +} + +proc sdb012_body { testdir omethod largs encargs msg tlist } { + + puts "\t$msg: $tlist" + set testfile subdb012.db + set subdb1 sub1 + set subdb2 sub2 + + set subsys [sdb012_subsys $tlist] + env_cleanup $testdir + set env [eval {berkdb_env -create -home} $testdir $subsys $encargs] + error_check_good dbenv [is_valid_env $env] TRUE + error_check_good test_lock [$env test abort subdb_lock] 0 + + # + # Convert from our tlist txn commands into real flags we + # will pass to commands. Use the multiple values feature + # of foreach to do this efficiently. + # + set tflags [sdb012_tflags $env $tlist] + foreach {txn1 txn2 rem1 rem2} $tflags {break} + foreach {top1 top2 rop1 rop2} [sdb012_top $tflags $tlist] {break} + +# puts "txn1 $txn1, txn2 $txn2, rem1 $rem1, rem2 $rem2" +# puts "top1 $top1, top2 $top2, rop1 $rop1, rop2 $rop2" + puts "\t$msg.0: Create sub databases in env with $subsys" + set s1 [eval {berkdb_open -env $env -create -mode 0644} \ + $largs $txn1 {$omethod $testfile $subdb1}] + error_check_good dbopen [is_valid_db $s1] TRUE + + set ret [eval $top1] + error_check_good t1_end $ret 0 + + set s2 [eval {berkdb_open -env $env -create -mode 0644} \ + $largs $txn2 {$omethod $testfile $subdb2}] + error_check_good dbopen [is_valid_db $s2] TRUE + + puts "\t$msg.1: Subdbs are open; resolve txns if necessary" + set ret [eval $top2] + error_check_good t2_end $ret 0 + + set t1_isabort [sdb012_isabort $tlist 0] + set t2_isabort [sdb012_isabort $tlist 1] + set r1_isabort [sdb012_isabort $tlist 2] + set r2_isabort [sdb012_isabort $tlist 3] + +# puts "t1_isabort $t1_isabort, t2_isabort $t2_isabort, r1_isabort $r1_isabort, r2_isabort $r2_isabort" + + puts "\t$msg.2: Subdbs are open; verify removal failures" + # Verify removes of subdbs with open subdb's fail + # + # We should fail no matter what. If we aborted, then the + # subdb should not exist. If we didn't abort, we should fail + # with DB_LOCK_NOTGRANTED. + # + # XXX - Do we need -auto_commit for all these failing ones? + set r [ catch {berkdb dbremove -env $env $testfile $subdb1} result ] + error_check_bad dbremove1_open $r 0 + if { $t1_isabort } { + error_check_good dbremove1_open_ab [is_substr \ + $result "no such file"] 1 + } else { + error_check_good dbremove1_open [is_substr \ + $result DB_LOCK_NOTGRANTED] 1 + } + + set r [ catch {berkdb dbremove -env $env $testfile $subdb2} result ] + error_check_bad dbremove2_open $r 0 + if { $t2_isabort } { + error_check_good dbremove2_open_ab [is_substr \ + $result "no such file"] 1 + } else { + error_check_good dbremove2_open [is_substr \ + $result DB_LOCK_NOTGRANTED] 1 + } + + # Verify file remove fails + set r [catch {berkdb dbremove -env $env $testfile} result] + error_check_bad dbremovef_open $r 0 + + # + # If both aborted, there should be no file?? + # + if { $t1_isabort && $t2_isabort } { + error_check_good dbremovef_open_ab [is_substr \ + $result "no such file"] 1 + } else { + error_check_good dbremovef_open [is_substr \ + $result DB_LOCK_NOTGRANTED] 1 + } + + puts "\t$msg.3: Close subdb2; verify removals" + error_check_good close_s2 [$s2 close] 0 + set r [ catch {eval {berkdb dbremove -env} \ + $env $rem2 $testfile $subdb2} result ] + if { $t2_isabort } { + error_check_bad dbrem2_ab $r 0 + error_check_good dbrem2_ab [is_substr \ + $result "no such file"] 1 + } else { + error_check_good dbrem2 $result 0 + } + # Resolve subdb2 removal txn + set r [eval $rop2] + error_check_good rop2 $r 0 + + set r [ catch {berkdb dbremove -env $env $testfile $subdb1} result ] + error_check_bad dbremove1.2_open $r 0 + if { $t1_isabort } { + error_check_good dbremove1.2_open_ab [is_substr \ + $result "no such file"] 1 + } else { + error_check_good dbremove1.2_open [is_substr \ + $result DB_LOCK_NOTGRANTED] 1 + } + + # There are three cases here: + # 1. if both t1 and t2 aborted, the file shouldn't exist + # 2. if only t1 aborted, the file still exists and nothing is open + # 3. if neither aborted a remove should fail because the first + # subdb is still open + # In case 2, don't try the remove, because it should succeed + # and we won't be able to test anything else. + if { !$t1_isabort || $t2_isabort } { + set r [catch {berkdb dbremove -env $env $testfile} result] + if { $t1_isabort && $t2_isabort } { + error_check_bad dbremovef.2_open $r 0 + error_check_good dbremove.2_open_ab [is_substr \ + $result "no such file"] 1 + } else { + error_check_bad dbremovef.2_open $r 0 + error_check_good dbremove.2_open [is_substr \ + $result DB_LOCK_NOTGRANTED] 1 + } + } + + puts "\t$msg.4: Close subdb1; verify removals" + error_check_good close_s1 [$s1 close] 0 + set r [ catch {eval {berkdb dbremove -env} \ + $env $rem1 $testfile $subdb1} result ] + if { $t1_isabort } { + error_check_bad dbremove1_ab $r 0 + error_check_good dbremove1_ab [is_substr \ + $result "no such file"] 1 + } else { + error_check_good dbremove1 $result 0 + } + # Resolve subdb1 removal txn + set r [eval $rop1] + error_check_good rop1 $r 0 + + # Verify removal of subdb2. All DB handles are closed now. + # So we have two scenarios: + # 1. The removal of subdb2 above was successful and subdb2 + # doesn't exist and we should fail that way. + # 2. The removal of subdb2 above was aborted, and this + # removal should succeed. + # + set r [ catch {berkdb dbremove -env $env $testfile $subdb2} result ] + if { $r2_isabort && !$t2_isabort } { + error_check_good dbremove2.1_ab $result 0 + } else { + error_check_bad dbremove2.1 $r 0 + error_check_good dbremove2.1 [is_substr \ + $result "no such file"] 1 + } + + # Verify removal of subdb1. All DB handles are closed now. + # So we have two scenarios: + # 1. The removal of subdb1 above was successful and subdb1 + # doesn't exist and we should fail that way. + # 2. The removal of subdb1 above was aborted, and this + # removal should succeed. + # + set r [ catch {berkdb dbremove -env $env $testfile $subdb1} result ] + if { $r1_isabort && !$t1_isabort } { + error_check_good dbremove1.1 $result 0 + } else { + error_check_bad dbremove_open $r 0 + error_check_good dbremove.1 [is_substr \ + $result "no such file"] 1 + } + + puts "\t$msg.5: All closed; remove file" + set r [catch {berkdb dbremove -env $env $testfile} result] + if { $t1_isabort && $t2_isabort } { + error_check_bad dbremove_final_ab $r 0 + error_check_good dbremove_file_abstr [is_substr \ + $result "no such file"] 1 + } else { + error_check_good dbremove_final $r 0 + } + error_check_good envclose [$env close] 0 +} diff --git a/test/tcl/sdb013.tcl b/test/tcl/sdb013.tcl new file mode 100644 index 00000000..508e2f48 --- /dev/null +++ b/test/tcl/sdb013.tcl @@ -0,0 +1,179 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb013 +# TEST Tests in-memory subdatabases. +# TEST Create an in-memory subdb. Test for persistence after +# TEST overflowing the cache. Test for conflicts when we have +# TEST two in-memory files. + +proc sdb013 { method { nentries 10 } args } { + source ./include.tcl + + set tnum "013" + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_queueext $method] == 1 || [is_heap $method] == 1 } { + puts "Subdb$tnum: skipping for method $method" + return + } + + puts "Subdb$tnum: $method ($args) in-memory subdb tests" + + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + set env NULL + incr eindex + set env [lindex $args $eindex] + puts "Subdb$tnum skipping for env $env" + return + } + + # In-memory dbs never go to disk, so we can't do checksumming. + # If the test module sent in the -chksum arg, get rid of it. + set chkindex [lsearch -exact $args "-chksum"] + if { $chkindex != -1 } { + set args [lreplace $args $chkindex $chkindex] + } + + # Create the env, with a very small cache that we can easily + # fill. If a particularly large page size is specified, make + # the cache a little larger, but still on the small side. + env_cleanup $testdir + set csize {0 65536 1} + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + incr pgindex + set pagesize [lindex $args $pgindex] + if { $pagesize > 8192 } { + set cache [expr 8 * $pagesize] + set csize "0 $cache 1" + } + } + + set env [berkdb_env_noerr -create -cachesize $csize -home $testdir -txn] + error_check_good dbenv [is_valid_env $env] TRUE + + # Set filename to NULL; this causes the creation of an in-memory + # subdb. + set testfile "" + set subdb subdb0 + + puts "\tSubdb$tnum.a: Create in-mem subdb, add data, close." + set sdb [eval {berkdb_open_noerr -create -mode 0644} \ + $args -env $env -auto_commit {$omethod $testfile $subdb}] + error_check_good dbopen [is_valid_db $sdb] TRUE + + set ret [sdb013_populate $sdb $method $nentries] + error_check_good populate $ret 0 + error_check_good sdb_close [$sdb close] 0 + + # Do a bunch of writing to evict all pages from the memory pool. + puts "\tSubdb$tnum.b: Create another db, overflow the cache." + set dummyfile foo.db + set db [eval {berkdb_open_noerr -create -mode 0644} $args -env $env\ + -auto_commit $omethod $dummyfile] + error_check_good dummy_open [is_valid_db $db] TRUE + + set entries [expr $nentries * 100] + set ret [sdb013_populate $db $method $entries] + error_check_good dummy_close [$db close] 0 + + # Make sure we can still open the in-memory subdb. + puts "\tSubdb$tnum.c: Check we can still open the in-mem subdb." + set sdb [eval {berkdb_open_noerr} \ + $args -env $env -auto_commit {$omethod $testfile $subdb}] + error_check_good sdb_reopen [is_valid_db $sdb] TRUE + error_check_good sdb_close [$sdb close] 0 + + # Exercise the -m (dump in-memory) option on db_dump. + puts "\tSubdb$tnum.d: Exercise in-memory db_dump." + set stat \ + [catch {eval {exec $util_path/db_dump} -h $testdir -m $subdb} res] + error_check_good dump_successful $stat 0 + + puts "\tSubdb$tnum.e: Remove in-mem subdb." + error_check_good \ + sdb_remove [berkdb dbremove -env $env $testfile $subdb] 0 + + puts "\tSubdb$tnum.f: Check we cannot open the in-mem subdb." + set ret [catch {eval {berkdb_open_noerr} -env $env $args \ + -auto_commit {$omethod $testfile $subdb}} db] + error_check_bad dbopen $ret 0 + + foreach end { commit abort } { + # Create an in-memory database. + puts "\tSubdb$tnum.g: Create in-mem subdb, add data, close." + set sdb [eval {berkdb_open_noerr -create -mode 0644} \ + $args -env $env -auto_commit {$omethod $testfile $subdb}] + error_check_good dbopen [is_valid_db $sdb] TRUE + + set ret [sdb013_populate $sdb $method $nentries] + error_check_good populate $ret 0 + error_check_good sdb_close [$sdb close] 0 + + # Transactionally remove the database. + puts "\tSubdb$tnum.h: Transactionally remove in-mem database." + set txn [$env txn] + error_check_good db_remove \ + [berkdb dbremove -env $env -txn $txn $testfile $subdb] 0 + + # Write a cacheful of data. + puts "\tSubdb$tnum.i: Create another db, overflow the cache." + set db [eval {berkdb_open_noerr -create -mode 0644} $args \ + -env $env -auto_commit $omethod $dummyfile] + error_check_good dummy_open [is_valid_db $db] TRUE + + set entries [expr $nentries * 100] + set ret [sdb013_populate $db $method $entries] + error_check_good dummy_close [$db close] 0 + + # Finish the txn and make sure the database is either + # gone (if committed) or still there (if aborted). + error_check_good txn_$end [$txn $end] 0 + if { $end == "abort" } { + puts "\tSubdb$tnum.j: Check that database still exists." + set sdb [eval {berkdb_open_noerr} $args \ + -env $env -auto_commit {$omethod $testfile $subdb}] + error_check_good sdb_reopen [is_valid_db $sdb] TRUE + error_check_good sdb_close [$sdb close] 0 + } else { + puts "\tSubdb$tnum.j: Check that database is gone." + set ret [catch {eval {berkdb_open_noerr} -env $env \ + $args -auto_commit {$omethod $testfile $subdb}} res] + error_check_bad dbopen $ret 0 + } + } + + error_check_good env_close [$env close] 0 +} + +proc sdb013_populate { db method nentries } { + source ./include.tcl + + set did [open $dict] + set count 0 + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + + set r [ catch {$db put $key [chop_data $method $str]} ret ] + if { $r != 0 } { + close $did + return $ret + } + + incr count + } + close $did + return 0 +} + diff --git a/test/tcl/sdb014.tcl b/test/tcl/sdb014.tcl new file mode 100644 index 00000000..3e85af3a --- /dev/null +++ b/test/tcl/sdb014.tcl @@ -0,0 +1,112 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb014 +# TEST Tests mixing in-memory named and in-memory unnamed dbs. +# TEST Create a regular in-memory db, add data. +# TEST Create a named in-memory db. +# TEST Try to create the same named in-memory db again (should fail). +# TEST Try to create a different named in-memory db (should succeed). +# TEST +proc sdb014 { method args } { + source ./include.tcl + + set tnum "014" + set orig_tdir $testdir + if { [is_queueext $method] == 1 || [is_heap $method] == 1 } { + puts "Subdb$tnum: skipping for method $method" + return + } + + set args [convert_args $method $args] + set omethod [convert_method $method] + + # In-memory dbs never go to disk, so we can't do checksumming. + # If the test module sent in the -chksum arg, get rid of it. + set chkindex [lsearch -exact $args "-chksum"] + if { $chkindex != -1 } { + set args [lreplace $args $chkindex $chkindex] + } + + puts "Subdb$tnum ($method $args):\ + In-memory named dbs with regular in-mem dbs." + + # If we are given an env, use it. Otherwise, open one. + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + env_cleanup $testdir + set env [berkdb_env -create -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + } else { + incr eindex + set env [lindex $args $eindex] + set envflags [$env get_open_flags] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + puts "\tSubdb$tnum.a: Create and populate in-memory unnamed database." + set testfile "" + set db [eval {berkdb_open -env $env -create -mode 0644} \ + $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + + set pflags "" + set gflags "" + set count 0 + + if { [is_record_based $method] == 1 } { + append gflags " -recno" + } + while { [gets $did str] != -1 && $count < 5 } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1] + set kvals($key) $str + } else { + set key $str + } + set ret [eval \ + {$db put} $pflags {$key [chop_data $method $str]}] + error_check_good put $ret 0 + + set ret [eval {$db get} $gflags {$key}] + error_check_good \ + get $ret [list [list $key [pad_data $method $str]]] + incr count + } + close $did + error_check_good db_close [$db close] 0 + + # Create named in-memory db. Try to create a second in-memory db of + # the same name. Should fail. + puts "\tSubdb$tnum.b: Create in-memory named database." + set subdb "SUBDB" + set db [eval {berkdb_open -env $env -create -excl -mode 0644} \ + $args $omethod {$testfile $subdb}] + error_check_good dbopen [is_valid_db $db] TRUE + + puts "\tSubdb$tnum.c: Try to create second inmem database." + set ret [catch {eval {berkdb_open_noerr -env $env -create -excl \ + -mode 0644} $args {$omethod $testfile $subdb}} db1] + error_check_bad dbopen $ret 0 + + # Clean up. Close the env if this test created it. + error_check_good db_close [$db close] 0 + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + } + + set testdir $orig_tdir + return +} + diff --git a/test/tcl/sdb015.tcl b/test/tcl/sdb015.tcl new file mode 100644 index 00000000..41372809 --- /dev/null +++ b/test/tcl/sdb015.tcl @@ -0,0 +1,117 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb015 +# TEST Tests basic in-memory named database functionality +# TEST Small keys, small data +# TEST Put/get per key +# TEST +# TEST Use the first 10,000 entries from the dictionary. +# TEST Insert each with self as key and data; retrieve each. +# TEST After all are entered, retrieve all; compare output to original. +# TEST Close file, reopen, do retrieve and re-verify. +# TEST Then repeat using an environment. +proc sdb015 { method {nentries 1000} args } { + global passwd + global has_crypto + + if { [is_queueext $method] == 1 || [is_heap $method] == 1 } { + puts "Subdb015: skipping for method $method" + return + } + + # Skip test if given an env - this test needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Subdb015 skipping for env $env" + return + } + + # In-memory dbs never go to disk, so we can't do checksumming. + # If the test module sent in the -chksum arg, get rid of it. + set chkindex [lsearch -exact $args "-chksum"] + if { $chkindex != -1 } { + set args [lreplace $args $chkindex $chkindex] + } + + set largs $args + subdb015_main $method $nentries $largs + + # Skip remainder of test if release does not support encryption. + if { $has_crypto == 0 } { + return + } + + append largs " -encryptaes $passwd " + subdb015_main $method $nentries $largs +} + +proc subdb015_main { method nentries largs } { + source ./include.tcl + global encrypt + + set largs [convert_args $method $largs] + set omethod [convert_method $method] + + env_cleanup $testdir + + # Run convert_encrypt so that old_encrypt will be reset to + # the proper value and cleanup will work. + convert_encrypt $largs + set encargs "" + set largs [split_encargs $largs encargs] + + set env [eval {berkdb_env -create -cachesize {0 10000000 0} \ + -mode 0644} -home $testdir $encargs] + error_check_good env_open [is_valid_env $env] TRUE + + puts "Subdb015: $method ($largs) basic in-memory named db tests." + subdb015_body $method $omethod $nentries $largs $env + error_check_good env_close [$env close] 0 +} + +proc subdb015_body { method omethod nentries largs env } { + global encrypt + global passwd + source ./include.tcl + + # Create the database and open the dictionary + set subdb subdb0 + set db [eval {berkdb_open -create -mode 0644} $largs \ + {-env $env $omethod "" $subdb}] + error_check_good dbopen [is_valid_db $db] TRUE + + set did [open $dict] + set pflags "" + set gflags "" + set count 0 + + puts "\tSubdb015.a: put/get loop" + # Here is the loop where we put and get each key/data pair + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1] + set kvals($key) [pad_data $method $str] + } else { + set key $str + } + set ret [eval \ + {$db put} $pflags {$key [chop_data $method $str]}] + error_check_good put $ret 0 + + set ret [eval {$db get} $gflags {$key}] + error_check_good \ + get $ret [list [list $key [pad_data $method $str]]] + incr count + } + close $did + error_check_good db_close [$db close] 0 +} + diff --git a/test/tcl/sdb016.tcl b/test/tcl/sdb016.tcl new file mode 100644 index 00000000..f2aec25b --- /dev/null +++ b/test/tcl/sdb016.tcl @@ -0,0 +1,98 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb016 +# TEST Creates many in-memory named dbs and puts a small amount of +# TEST data in each (many defaults to 100) +# TEST +# TEST Use the first 100 entries from the dictionary as names. +# TEST Insert each with entry as name of subdatabase and a partial list +# TEST as key/data. After all are entered, retrieve all; compare output +# TEST to original. +proc sdb016 { method {nentries 100} args } { + source ./include.tcl + + set tnum "016" + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_queueext $method] == 1 || [is_heap $method] == 1 } { + puts "Subdb$tnum skipping for method $method" + return + } + + puts "Subdb$tnum: $method ($args) many in-memory named databases" + + # Skip test if given an env - this test needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "\tSubdb$tnum skipping for env $env" + return + } + + # In-memory dbs never go to disk, so we can't do checksumming. + # If the test module sent in the -chksum arg, get rid of it. + set chkindex [lsearch -exact $args "-chksum"] + if { $chkindex != -1 } { + set args [lreplace $args $chkindex $chkindex] + } + + env_cleanup $testdir + + # Set up env. We'll need a big cache. + set csize {0 16777216 1} + set env [berkdb_env -create \ + -cachesize $csize -home $testdir -mode 0644 -txn] + error_check_good env_open [is_valid_env $env] TRUE + + set pflags "" + set gflags "" + set txn "" + set fcount 0 + + # Here is the loop where we put and get each key/data pair + set ndataent 5 + set fdid [open $dict] + puts "\tSubdb$tnum.a: Open $nentries in-memory databases." + while { [gets $fdid str] != -1 && $fcount < $nentries } { + if { $str == "" } { + continue + } + set subdb $str + set db [eval {berkdb_open -create -mode 0644} \ + -env $env $args {$omethod "" $subdb}] + error_check_good dbopen [is_valid_db $db] TRUE + set count 0 + set did [open $dict] + while { [gets $did str] != -1 && $count < $ndataent } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1] + set kvals($key) [pad_data $method $str] + } else { + set key $str + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $str]}] + error_check_good put $ret 0 + set ret [eval {$db get} $gflags {$key}] + error_check_good get $ret [list [list $key \ + [pad_data $method $str]]] + incr count + } + close $did + error_check_good db_close [$db close] 0 + incr fcount + } + close $fdid + + puts "\tSubdb$tnum.b: Clean up." + error_check_good env_close [$env close] 0 +} + diff --git a/test/tcl/sdb017.tcl b/test/tcl/sdb017.tcl new file mode 100644 index 00000000..65ea0be5 --- /dev/null +++ b/test/tcl/sdb017.tcl @@ -0,0 +1,99 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb017 +# TEST Test DB->rename() for in-memory named databases. +proc sdb017 { method args } { + global errorCode + source ./include.tcl + + if { [is_queueext $method] == 1 || [is_heap $method] == 1 } { + puts "Subdb017: Skipping for method $method" + return + } + + set omethod [convert_method $method] + set args [convert_args $method $args] + + puts "Subdb017: $method ($args): DB->rename() for in-memory named dbs." + + # Skip test if given an env - this test needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Subdb017 skipping for env $env" + return + } + + # In-memory dbs never go to disk, so we can't do checksumming. + # If the test module sent in the -chksum arg, get rid of it. + set chkindex [lsearch -exact $args "-chksum"] + if { $chkindex != -1 } { + set args [lreplace $args $chkindex $chkindex] + } + + # Make sure we're starting from a clean slate. + env_cleanup $testdir + + # Set up env. + set env [berkdb_env_noerr -create -home $testdir -mode 0644] + error_check_good env_open [is_valid_env $env] TRUE + + set oldsdb OLDDB + set newsdb NEWDB + + puts "\tSubdb017.a: Create/rename file" + puts "\t\tSubdb017.a.1: create" + set testfile "" + set db [eval {berkdb_open_noerr -create -mode 0644}\ + $omethod -env $env $args {$testfile $oldsdb}] + error_check_good dbopen [is_valid_db $db] TRUE + + # The nature of the key and data are unimportant; use numeric key + # so record-based methods don't need special treatment. + set key 1 + set data [pad_data $method data] + + error_check_good dbput [eval {$db put} $key $data] 0 + error_check_good dbclose [$db close] 0 + + puts "\t\tSubdb017.a.2: rename" + error_check_good rename_file [eval {berkdb dbrename} -env $env \ + {$testfile $oldsdb $newsdb}] 0 + + puts "\t\tSubdb017.a.3: check" + # Open again with create to make sure we've really completely + # disassociated the subdb from the old name. + set odb [eval {berkdb_open_noerr -create -mode 0644}\ + $omethod -env $env $args {$testfile $oldsdb}] + error_check_good odb_open [is_valid_db $odb] TRUE + set odbt [$odb get $key] + error_check_good odb_close [$odb close] 0 + + set ndb [eval {berkdb_open_noerr -mode 0644}\ + $omethod -env $env $args {$testfile $newsdb}] + error_check_good ndb_open [is_valid_db $ndb] TRUE + set ndbt [$ndb get $key] + error_check_good ndb_close [$ndb close] 0 + + # The DBT from the "old" database should be empty, not the "new" one. + error_check_good odbt_empty [llength $odbt] 0 + error_check_bad ndbt_empty [llength $ndbt] 0 + error_check_good ndbt [lindex [lindex $ndbt 0] 1] $data + + # Now there's both an old and a new. Rename the "new" to the "old" + # and make sure that fails. + puts "\tSubdb017.b: Make sure rename fails instead of overwriting" + set errorCode NONE + set ret [catch {eval {berkdb dbrename} -env $env \ + {$testfile $oldsdb $newsdb}} res] + error_check_bad rename_overwrite $ret 0 + error_check_good rename_overwrite_ret [is_substr $errorCode EEXIST] 1 + + error_check_good env_close [$env close] 0 +} + diff --git a/test/tcl/sdb018.tcl b/test/tcl/sdb018.tcl new file mode 100644 index 00000000..95fe831c --- /dev/null +++ b/test/tcl/sdb018.tcl @@ -0,0 +1,156 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb018 +# TEST Tests join of in-memory named databases. +# TEST +# TEST We'll test 2-way, 3-way, and 4-way joins and figure that if those work, +# TEST everything else does as well. We'll create test databases called +# TEST sub1.db, sub2.db, sub3.db, and sub4.db. The number on the database +# TEST describes the duplication -- duplicates are of the form 0, N, 2N, 3N, +# TEST ... where N is the number of the database. Primary.db is the primary +# TEST database, and sub0.db is the database that has no matching duplicates. +# TEST All of these are within a single database. +# +# We should test this on all btrees, all hash, and a combination thereof +proc sdb018 {method {nentries 100} args } { + source ./include.tcl + global rand_init + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_record_based $method] == 1 || [is_rbtree $method] } { + puts "\tSubdb018 skipping for method $method." + return + } + + set eindex [lsearch -exact $args "-env"] + # + # If we are given an env, then skip this test. It needs its own. + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Subdb016 skipping for env $env" + return + } + + # In-memory dbs never go to disk, so we can't do checksumming. + # If the test module sent in the -chksum arg, get rid of it. + set chkindex [lsearch -exact $args "-chksum"] + if { $chkindex != -1 } { + set args [lreplace $args $chkindex $chkindex] + } + + set testfile "" + berkdb srand $rand_init + + foreach opt {" -dup" " -dupsort"} { + env_cleanup $testdir + + set cache [expr 1024 * 1024 * 10] + set env [berkdb_env -create -home $testdir \ + -cachesize "0 $cache 1" ] + append args $opt + set oargs $args + append oargs " -env $env" + + + puts "Subdb018: $method ( $args ) Intra-subdb join" + set txn "" + # + # Get a cursor in each subdb and move past the end of each + # subdb. Make sure we don't end up in another subdb. + # + puts "\tSubdb018.a: Intra-subdb join" + + set psize 8192 + set duplist {0 50 25 16 12} + set numdb [llength $duplist] + build_all_subdb $testfile [list $method] $psize \ + $duplist $nentries $oargs + + # Build the primary + puts "Subdb018: Building the primary database $method" + set oflags "-create -mode 0644 [conv $omethod \ + [berkdb random_int 1 2]]" + set db [eval {berkdb_open} $oflags $oargs {$testfile} primary.db] + error_check_good dbopen [is_valid_db $db] TRUE + for { set i 0 } { $i < 1000 } { incr i } { + set key [format "%04d" $i] + set ret [eval {$db put} $txn {$key stub}] + error_check_good "primary put" $ret 0 + } + error_check_good "primary close" [$db close] 0 + set did [open $dict] + gets $did str + do_join_subdb $testfile primary.db "1 0" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "2 0" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 0" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 0" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "2" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "1 2" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "1 2 3" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "1 2 3 4" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "2 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 2 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 3 2 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "1 3" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "1 4" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "2 3" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 2" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "2 4" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 2" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 4" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 3" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "2 3 4" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 4 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 2 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "0 2 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "3 2 0" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 3 2 1" $str $oargs + gets $did str + do_join_subdb $testfile primary.db "4 3 0 1" $str $oargs + + close $did + error_check_good env_close [$env close] 0 + } +} + diff --git a/test/tcl/sdb019.tcl b/test/tcl/sdb019.tcl new file mode 100644 index 00000000..481b0501 --- /dev/null +++ b/test/tcl/sdb019.tcl @@ -0,0 +1,139 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb019 +# TEST Tests in-memory subdatabases. +# TEST Create an in-memory subdb. Test for persistence after +# TEST overflowing the cache. Test for conflicts when we have +# TEST two in-memory files. + +proc sdb019 { method { nentries 100 } args } { + source ./include.tcl + + set tnum "019" + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_queueext $method] == 1 || [is_heap $method] == 1 } { + puts "Subdb$tnum: skipping for method $method" + return + } + puts "Subdb$tnum: $method ($args) in-memory subdb tests" + + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + set env NULL + incr eindex + set env [lindex $args $eindex] + puts "Subdb019 skipping for env $env" + return + } + + # In-memory dbs never go to disk, so we can't do checksumming. + # If the test module sent in the -chksum arg, get rid of it. + set chkindex [lsearch -exact $args "-chksum"] + if { $chkindex != -1 } { + set args [lreplace $args $chkindex $chkindex] + } + + # The standard cachesize isn't big enough for 64k pages. + set csize "0 262144 1" + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + incr pgindex + set pagesize [lindex $args $pgindex] + if { $pagesize > 16384 } { + set cache [expr 8 * $pagesize] + set csize "0 $cache 1" + } + } + + # Create the env. + env_cleanup $testdir + set env [eval berkdb_env -create {-cachesize $csize} \ + -home $testdir -txn] + error_check_good dbenv [is_valid_env $env] TRUE + + # Set filename to NULL; this allows us to create an in-memory + # named database. + set testfile "" + + # Create two in-memory subdb and test for conflicts. Try all the + # combinations of named (NULL/NAME) and purely temporary + # (NULL/NULL) databases. + # + foreach s1 { S1 "" } { + foreach s2 { S2 "" } { + puts "\tSubdb$tnum.a:\ + 2 in-memory subdbs (NULL/$s1, NULL/$s2)." + set sdb1 [eval {berkdb_open -create -mode 0644} \ + $args -env $env {$omethod $testfile $s1}] + error_check_good sdb1_open [is_valid_db $sdb1] TRUE + set sdb2 [eval {berkdb_open -create -mode 0644} \ + $args -env $env {$omethod $testfile $s2}] + error_check_good sdb1_open [is_valid_db $sdb2] TRUE + + # Subdatabases are open, now put something in. + set string1 STRING1 + set string2 STRING2 + for { set i 1 } { $i <= $nentries } { incr i } { + set key $i + error_check_good sdb1_put [$sdb1 put $key \ + [chop_data $method $string1.$key]] 0 + error_check_good sdb2_put [$sdb2 put $key \ + [chop_data $method $string2.$key]] 0 + } + + # If the subs are both NULL/NULL, we have two handles + # on the same db. Skip testing the contents. + if { $s1 != "" || $s2 != "" } { + # This can't work when both subs are NULL/NULL. + # Check contents. + for { set i 1 } { $i <= $nentries } { incr i } { + set key $i + set ret1 [lindex \ + [lindex [$sdb1 get $key] 0] 1] + error_check_good sdb1_get $ret1 \ + [pad_data $method $string1.$key] + set ret2 [lindex \ + [lindex [$sdb2 get $key] 0] 1] + error_check_good sdb2_get $ret2 \ + [pad_data $method $string2.$key] + } + + error_check_good sdb1_close [$sdb1 close] 0 + error_check_good sdb2_close [$sdb2 close] 0 + + # Reopen, make sure we get the right data. + set sdb1 [eval {berkdb_open -mode 0644} \ + $args -env $env {$omethod $testfile $s1}] + error_check_good \ + sdb1_open [is_valid_db $sdb1] TRUE + set sdb2 [eval {berkdb_open -mode 0644} \ + $args -env $env {$omethod $testfile $s2}] + error_check_good \ + sdb1_open [is_valid_db $sdb2] TRUE + + for { set i 1 } { $i <= $nentries } { incr i } { + set key $i + set ret1 [lindex \ + [lindex [$sdb1 get $key] 0] 1] + error_check_good sdb1_get $ret1 \ + [pad_data $method $string1.$key] + set ret2 [lindex \ + [lindex [$sdb2 get $key] 0] 1] + error_check_good sdb2_get $ret2 \ + [pad_data $method $string2.$key] + } + } + error_check_good sdb1_close [$sdb1 close] 0 + error_check_good sdb2_close [$sdb2 close] 0 + } + } + error_check_good env_close [$env close] 0 +} + diff --git a/test/tcl/sdb020.tcl b/test/tcl/sdb020.tcl new file mode 100644 index 00000000..be2f1443 --- /dev/null +++ b/test/tcl/sdb020.tcl @@ -0,0 +1,124 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdb020 +# TEST Tests in-memory subdatabases. +# TEST Create an in-memory subdb with one page size. Close, and +# TEST open with a different page size: should fail. + +proc sdb020 { method { nentries 10 } args } { + source ./include.tcl + global errorCode + + set tnum "020" + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_queueext $method] == 1 || [is_heap $method] == 1 } { + puts "Subdb$tnum: skipping for method $method" + return + } + + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Subdb$tnum: skipping for specific page sizes." + return + } + + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + set env NULL + incr eindex + set env [lindex $args $eindex] + puts "Subdb020 skipping for env $env" + return + } + + # In-memory dbs never go to disk, so we can't do checksumming. + # If the test module sent in the -chksum arg, get rid of it. + set chkindex [lsearch -exact $args "-chksum"] + if { $chkindex != -1 } { + set args [lreplace $args $chkindex $chkindex] + } + + puts "Subdb$tnum: $method ($args) \ + in-memory named db tests with different pagesizes" + + # Create the env. + env_cleanup $testdir + set env [berkdb_env_noerr -create -home $testdir -txn] + error_check_good dbenv [is_valid_env $env] TRUE + + # Set filename to NULL; this causes the creation of an in-memory + # subdb. + set testfile "" + set name NAME + + puts "\tSubdb$tnum.a: Create in-mem named db with default page size." + set db [eval {berkdb_open_noerr -create -mode 0644} \ + $args -env $env {$omethod $testfile $name}] + error_check_good dbopen [is_valid_db $db] TRUE + + # Figure out the default page size so we can try to open + # later with a different value. + set psize [stat_field $db stat "Page size"] + if { $psize == 512 } { + set psize2 2048 + } else { + set psize2 512 + } + + error_check_good db_close [$db close] 0 + + # Try to open again with a different page size (should fail). + puts "\tSubdb$tnum.b: Try to reopen with different page size." + set errorCode NONE + catch {set db [eval {berkdb_open_noerr} $args -env $env \ + -pagesize $psize2 {$omethod $testfile $name}]} res + error_check_good expect_error [is_substr $errorCode EINVAL] 1 + + # Try to open again with the correct pagesize (should succeed). + puts "\tSubdb$tnum.c: Reopen with original page size." + set db [eval {berkdb_open_noerr} $args -env $env \ + -pagesize $psize {$omethod $testfile $name}] + # Close DB + error_check_good db_close [$db close] 0 + + puts "\tSubdb$tnum.d: Create in-mem named db with specific page size." + set psize 8192 + set db [eval {berkdb_open_noerr -create -mode 0644} \ + $args -env $env -pagesize $psize {$omethod $testfile $name}] + error_check_good dbopen [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + + # Try to open again with a different page size (should fail). + set psize2 [expr $psize / 2] + puts "\tSubdb$tnum.e: Try to reopen with different page size." + set errorCode NONE + catch {set db [eval {berkdb_open_noerr} $args -env $env \ + -pagesize $psize2 {$omethod $testfile $name}]} res + error_check_good expect_error [is_substr $errorCode EINVAL] 1 + + # Try to open again with the correct pagesize (should succeed). + puts "\tSubdb$tnum.f: Reopen with original page size." + set db [eval {berkdb_open} $args -env $env \ + -pagesize $psize {$omethod $testfile $name}] + + # Try to open a different database with a different page size + # (should succeed). + puts "\tSubdb$tnum.g: Open different db with different page size." + set newname NEWNAME + set db2 [eval {berkdb_open} -create $args -env $env \ + -pagesize $psize2 {$omethod $testfile $newname}] + + # Clean up. + error_check_good db_close [$db close] 0 + error_check_good db2_close [$db2 close] 0 + error_check_good env_close [$env close] 0 +} + + diff --git a/test/tcl/sdbscript.tcl b/test/tcl/sdbscript.tcl new file mode 100644 index 00000000..1512286f --- /dev/null +++ b/test/tcl/sdbscript.tcl @@ -0,0 +1,46 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Usage: subdbscript testfile subdbnumber factor +# testfile: name of DB itself +# subdbnumber: n, subdb indicator, of form sub$n.db +# factor: Delete over factor'th + n'th from my subdb. +# +# I.e. if factor is 10, and n is 0, remove entries, 0, 10, 20, ... +# if factor is 10 and n is 1, remove entries 1, 11, 21, ... +source ./include.tcl +source $test_path/test.tcl + +set usage "subdbscript testfile subdbnumber factor" + +# Verify usage +if { $argc != 3 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set testfile [lindex $argv 0] +set n [ lindex $argv 1 ] +set factor [ lindex $argv 2 ] + +set db [berkdb_open -unknown $testfile sub$n.db] +error_check_good db_open [is_valid_db $db] TRUE + +set dbc [$db cursor] +error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE +set i 1 +for {set d [$dbc get -first]} {[llength $d] != 0} {set d [$dbc get -next]} { + set x [expr $i - $n] + if { $x >= 0 && [expr $x % $factor] == 0 } { + puts "Deleting $d" + error_check_good dbc_del [$dbc del] 0 + } + incr i +} +error_check_good db_close [$db close] 0 + +exit diff --git a/test/tcl/sdbtest001.tcl b/test/tcl/sdbtest001.tcl new file mode 100644 index 00000000..5f5988f6 --- /dev/null +++ b/test/tcl/sdbtest001.tcl @@ -0,0 +1,149 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdbtest001 +# TEST Tests multiple access methods in one subdb +# TEST Open several subdbs, each with a different access method +# TEST Small keys, small data +# TEST Put/get per key per subdb +# TEST Dump file, verify per subdb +# TEST Close, reopen per subdb +# TEST Dump file, verify per subdb +# TEST +# TEST Make several subdb's of different access methods all in one DB. +# TEST Rotate methods and repeat [#762]. +# TEST Use the first 10,000 entries from the dictionary. +# TEST Insert each with self as key and data; retrieve each. +# TEST After all are entered, retrieve all; compare output to original. +# TEST Close file, reopen, do retrieve and re-verify. +proc sdbtest001 { {nentries 10000} } { + source ./include.tcl + + puts "Subdbtest001: many different subdb access methods in one" + + # Create the database and open the dictionary + set testfile $testdir/subdbtest001.db + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + set t4 $testdir/t4 + + set txn "" + set count 0 + + # Set up various methods to rotate through + lappend method_list [list "-rrecno" "-rbtree" "-hash" "-recno" "-btree"] + lappend method_list [list "-recno" "-hash" "-btree" "-rbtree" "-rrecno"] + lappend method_list [list "-btree" "-recno" "-rbtree" "-rrecno" "-hash"] + lappend method_list [list "-hash" "-recno" "-rbtree" "-rrecno" "-btree"] + lappend method_list [list "-rbtree" "-hash" "-btree" "-rrecno" "-recno"] + lappend method_list [list "-rrecno" "-recno"] + lappend method_list [list "-recno" "-rrecno"] + lappend method_list [list "-hash" "-dhash"] + lappend method_list [list "-dhash" "-hash"] + lappend method_list [list "-rbtree" "-btree" "-dbtree" "-ddbtree"] + lappend method_list [list "-btree" "-rbtree" "-ddbtree" "-dbtree"] + lappend method_list [list "-dbtree" "-ddbtree" "-btree" "-rbtree"] + lappend method_list [list "-ddbtree" "-dbtree" "-rbtree" "-btree"] + set plist [list 512 8192 1024 4096 2048 16384] + set mlen [llength $method_list] + set plen [llength $plist] + while { $plen < $mlen } { + set plist [concat $plist $plist] + set plen [llength $plist] + } + set pgsz 0 + foreach methods $method_list { + cleanup $testdir NULL + puts "\tSubdbtest001.a: create subdbs of different access methods:" + puts "\tSubdbtest001.a: $methods" + set nsubdbs [llength $methods] + set duplist "" + for { set i 0 } { $i < $nsubdbs } { incr i } { + lappend duplist -1 + } + set psize [lindex $plist $pgsz] + incr pgsz + set newent [expr $nentries / $nsubdbs] + build_all_subdb $testfile $methods $psize $duplist $newent + + # Now we will get each key from the DB and compare the results + # to the original. + for { set subdb 0 } { $subdb < $nsubdbs } { incr subdb } { + + set method [lindex $methods $subdb] + set method [convert_method $method] + if { [is_record_based $method] == 1 } { + set checkfunc subdbtest001_recno.check + } else { + set checkfunc subdbtest001.check + } + + puts "\tSubdbtest001.b: dump file sub$subdb.db" + set db [berkdb_open -unknown $testfile sub$subdb.db] + dump_file $db $txn $t1 $checkfunc + error_check_good db_close [$db close] 0 + + # Now compare the keys to see if they match the + # dictionary (or ints) + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $newent} {incr i} { + puts $oid [expr $subdb * $newent + $i] + } + close $oid + file rename -force $t1 $t3 + } else { + # filehead uses 1-based line numbers + set beg [expr $subdb * $newent] + incr beg + set end [expr $beg + $newent - 1] + filehead $end $dict $t3 $beg + filesort $t3 $t2 + filesort $t1 $t3 + } + + error_check_good Subdbtest001:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + puts "\tSubdbtest001.c: sub$subdb.db: close, open, and dump file" + # Now, reopen the file and run the last test again. + open_and_dump_subfile $testfile NULL $t1 $checkfunc \ + dump_file_direction "-first" "-next" sub$subdb.db + if { [string compare $method "-recno"] != 0 } { + filesort $t1 $t3 + } + + error_check_good Subdbtest001:diff($t2,$t3) \ + [filecmp $t2 $t3] 0 + + # Now, reopen the file and run the last test again in the + # reverse direction. + puts "\tSubdbtest001.d: sub$subdb.db: close, open, and dump file in reverse direction" + open_and_dump_subfile $testfile NULL $t1 $checkfunc \ + dump_file_direction "-last" "-prev" sub$subdb.db + + if { [string compare $method "-recno"] != 0 } { + filesort $t1 $t3 + } + + error_check_good Subdbtest001:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + } + } +} + +# Check function for Subdbtest001; keys and data are identical +proc subdbtest001.check { key data } { + error_check_good "key/data mismatch" $data $key +} + +proc subdbtest001_recno.check { key data } { +global dict +global kvals + error_check_good key"$key"_exists [info exists kvals($key)] 1 + error_check_good "key/data mismatch, key $key" $data $kvals($key) +} diff --git a/test/tcl/sdbtest002.tcl b/test/tcl/sdbtest002.tcl new file mode 100644 index 00000000..465e480e --- /dev/null +++ b/test/tcl/sdbtest002.tcl @@ -0,0 +1,167 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sdbtest002 +# TEST Tests multiple access methods in one subdb access by multiple +# TEST processes. +# TEST Open several subdbs, each with a different access method +# TEST Small keys, small data +# TEST Put/get per key per subdb +# TEST Fork off several child procs to each delete selected +# TEST data from their subdb and then exit +# TEST Dump file, verify contents of each subdb is correct +# TEST Close, reopen per subdb +# TEST Dump file, verify per subdb +# TEST +# TEST Make several subdb's of different access methods all in one DB. +# TEST Fork of some child procs to each manipulate one subdb and when +# TEST they are finished, verify the contents of the databases. +# TEST Use the first 10,000 entries from the dictionary. +# TEST Insert each with self as key and data; retrieve each. +# TEST After all are entered, retrieve all; compare output to original. +# TEST Close file, reopen, do retrieve and re-verify. +proc sdbtest002 { {nentries 10000} } { + source ./include.tcl + + puts "Subdbtest002: many different subdb access methods in one" + + # Create the database and open the dictionary + set testfile $testdir/subdbtest002.db + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + set t4 $testdir/t4 + + set txn "" + set count 0 + + # Set up various methods to rotate through + set methods \ + [list "-rbtree" "-recno" "-btree" "-btree" "-recno" "-rbtree"] + cleanup $testdir NULL + puts "\tSubdbtest002.a: create subdbs of different methods: $methods" + set psize 4096 + set nsubdbs [llength $methods] + set duplist "" + for { set i 0 } { $i < $nsubdbs } { incr i } { + lappend duplist -1 + } + set newent [expr $nentries / $nsubdbs] + + # + # XXX We need dict sorted to figure out what was deleted + # since things are stored sorted in the btree. + # + filesort $dict $t4 + set dictorig $dict + set dict $t4 + + build_all_subdb $testfile $methods $psize $duplist $newent + + # Now we will get each key from the DB and compare the results + # to the original. + set pidlist "" + puts "\tSubdbtest002.b: create $nsubdbs procs to delete some keys" + for { set subdb 0 } { $subdb < $nsubdbs } { incr subdb } { + puts "$tclsh_path\ + $test_path/sdbscript.tcl $testfile \ + $subdb $nsubdbs >& $testdir/subdb002.log.$subdb" + set p [exec $tclsh_path $test_path/wrap.tcl \ + sdbscript.tcl \ + $testdir/subdb002.log.$subdb $testfile $subdb $nsubdbs &] + lappend pidlist $p + } + watch_procs $pidlist 5 + + for { set subdb 0 } { $subdb < $nsubdbs } { incr subdb } { + set method [lindex $methods $subdb] + set method [convert_method $method] + if { [is_record_based $method] == 1 } { + set checkfunc subdbtest002_recno.check + } else { + set checkfunc subdbtest002.check + } + + puts "\tSubdbtest002.b: dump file sub$subdb.db" + set db [berkdb_open -unknown $testfile sub$subdb.db] + error_check_good db_open [is_valid_db $db] TRUE + dump_file $db $txn $t1 $checkfunc + error_check_good db_close [$db close] 0 + + # Now compare the keys to see if they match the dictionary (or ints) + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $newent} {incr i} { + set x [expr $i - $subdb] + if { [expr $x % $nsubdbs] != 0 } { + puts $oid [expr $subdb * $newent + $i] + } + } + close $oid + file rename -force $t1 $t3 + } else { + set oid [open $t4 r] + for {set i 1} {[gets $oid line] >= 0} {incr i} { + set farr($i) $line + } + close $oid + + set oid [open $t2 w] + for {set i 1} {$i <= $newent} {incr i} { + # Sed uses 1-based line numbers + set x [expr $i - $subdb] + if { [expr $x % $nsubdbs] != 0 } { + set beg [expr $subdb * $newent] + set beg [expr $beg + $i] + puts $oid $farr($beg) + } + } + close $oid + filesort $t1 $t3 + } + + error_check_good Subdbtest002:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + puts "\tSubdbtest002.c: sub$subdb.db: close, open, and dump file" + # Now, reopen the file and run the last test again. + open_and_dump_subfile $testfile NULL $t1 $checkfunc \ + dump_file_direction "-first" "-next" sub$subdb.db + if { [string compare $method "-recno"] != 0 } { + filesort $t1 $t3 + } + + error_check_good Subdbtest002:diff($t2,$t3) \ + [filecmp $t2 $t3] 0 + + # Now, reopen the file and run the last test again in the + # reverse direction. + puts "\tSubdbtest002.d: sub$subdb.db: close, open, and dump file in reverse direction" + open_and_dump_subfile $testfile NULL $t1 $checkfunc \ + dump_file_direction "-last" "-prev" sub$subdb.db + + if { [string compare $method "-recno"] != 0 } { + filesort $t1 $t3 + } + + error_check_good Subdbtest002:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + } + set dict $dictorig + return +} + +# Check function for Subdbtest002; keys and data are identical +proc subdbtest002.check { key data } { + error_check_good "key/data mismatch" $data $key +} + +proc subdbtest002_recno.check { key data } { +global dict +global kvals + error_check_good key"$key"_exists [info exists kvals($key)] 1 + error_check_good "key/data mismatch, key $key" $data $kvals($key) +} diff --git a/test/tcl/sdbutils.tcl b/test/tcl/sdbutils.tcl new file mode 100644 index 00000000..48725d6e --- /dev/null +++ b/test/tcl/sdbutils.tcl @@ -0,0 +1,196 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +proc build_all_subdb { dbname methods psize dups {nentries 100} {dbargs ""}} { + set nsubdbs [llength $dups] + set mlen [llength $methods] + set savearg $dbargs + for {set i 0} {$i < $nsubdbs} { incr i } { + set m [lindex $methods [expr $i % $mlen]] + set dbargs $savearg + subdb_build $dbname $nentries [lindex $dups $i] \ + $i $m $psize sub$i.db $dbargs + } +} + +proc subdb_build { name nkeys ndups dup_interval method psize subdb dbargs} { + source ./include.tcl + + set dbargs [convert_args $method $dbargs] + set omethod [convert_method $method] + + puts "Method: $method" + + set txnenv 0 + set eindex [lsearch -exact $dbargs "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $dbargs $eindex] + set txnenv [is_txnenv $env] + } + # Create the database and open the dictionary + set oflags "-create -mode 0644 $omethod \ + -pagesize $psize $dbargs {$name} $subdb" + set db [eval {berkdb_open} $oflags] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + set count 0 + if { $ndups >= 0 } { + puts "\tBuilding $method {$name} $subdb. \ + $nkeys keys with $ndups duplicates at interval of $dup_interval" + } + if { $ndups < 0 } { + puts "\tBuilding $method {$name} $subdb. \ + $nkeys unique keys of pagesize $psize" + # + # If ndups is < 0, we want unique keys in each subdb, + # so skip ahead in the dict by nkeys * iteration + # + for { set count 0 } \ + { $count < [expr $nkeys * $dup_interval] } { + incr count} { + set ret [gets $did str] + if { $ret == -1 } { + break + } + } + } + set txn "" + for { set count 0 } { [gets $did str] != -1 && $count < $nkeys } { + incr count} { + for { set i 0 } { $i < $ndups } { incr i } { + set data [format "%04d" [expr $i * $dup_interval]] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$str \ + [chop_data $method $data]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if { $ndups == 0 } { + set ret [eval {$db put} $txn {$str \ + [chop_data $method NODUP]}] + error_check_good put $ret 0 + } elseif { $ndups < 0 } { + if { [is_record_based $method] == 1 } { + global kvals + + set num [expr $nkeys * $dup_interval] + set num [expr $num + $count + 1] + set ret [eval {$db put} $txn {$num \ + [chop_data $method $str]}] + set kvals($num) [pad_data $method $str] + error_check_good put $ret 0 + } else { + set ret [eval {$db put} $txn \ + {$str [chop_data $method $str]}] + error_check_good put $ret 0 + } + } + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + close $did + error_check_good close:$name [$db close] 0 +} + +proc do_join_subdb { db primary subdbs key oargs } { + source ./include.tcl + + puts "\tJoining: $subdbs on $key" + + # Open all the databases + set p [eval {berkdb_open -unknown} $oargs { $db } $primary] + error_check_good "primary open" [is_valid_db $p] TRUE + + set dblist "" + set curslist "" + + foreach i $subdbs { + set jdb [eval {berkdb_open -unknown} $oargs { $db } sub$i.db] + error_check_good "sub$i.db open" [is_valid_db $jdb] TRUE + + lappend jlist [list $jdb $key] + lappend dblist $jdb + + } + + set join_res [eval {$p get_join} $jlist] + set ndups [llength $join_res] + + # Calculate how many dups we expect. + # We go through the list of indices. If we find a 0, then we + # expect 0 dups. For everything else, we look at pairs of numbers, + # if the are relatively prime, multiply them and figure out how + # many times that goes into 50. If they aren't relatively prime, + # take the number of times the larger goes into 50. + set expected 50 + set last 1 + foreach n $subdbs { + if { $n == 0 } { + set expected 0 + break + } + if { $last == $n } { + continue + } + + if { [expr $last % $n] == 0 || [expr $n % $last] == 0 } { + if { $n > $last } { + set last $n + set expected [expr 50 / $last] + } + } else { + set last [expr $n * $last / [gcd $n $last]] + set expected [expr 50 / $last] + } + } + + error_check_good number_of_dups:$subdbs $ndups $expected + + # + # If we get here, we have the number expected, now loop + # through each and see if it is what we expected. + # + for { set i 0 } { $i < $ndups } { incr i } { + set pair [lindex $join_res $i] + set k [lindex $pair 0] + foreach j $subdbs { + error_check_bad valid_dup:$j:$subdbs $j 0 + set kval [string trimleft $k 0] + if { [string length $kval] == 0 } { + set kval 0 + } + error_check_good \ + valid_dup:$j:$subdbs [expr $kval % $j] 0 + } + } + + error_check_good close_primary [$p close] 0 + foreach i $dblist { + error_check_good close_index:$i [$i close] 0 + } +} + +proc n_to_subname { n } { + if { $n == 0 } { + return null.db; + } else { + return sub$n.db; + } +} diff --git a/test/tcl/sec001.tcl b/test/tcl/sec001.tcl new file mode 100644 index 00000000..30ae7da6 --- /dev/null +++ b/test/tcl/sec001.tcl @@ -0,0 +1,222 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sec001 +# TEST Test of security interface +proc sec001 { } { + global errorInfo + global errorCode + global has_crypto + global is_hp_test + + source ./include.tcl + # Skip test if release does not support encryption. + if { $has_crypto == 0 } { + puts "Skipping test sec001 for non-crypto release." + return + } + + set testfile1 env1.db + set testfile2 $testdir/env2.db + set subdb1 sub1 + set subdb2 sub2 + + puts "Sec001: Test of basic encryption interface." + env_cleanup $testdir + + set passwd1 "passwd1" + set passwd1_bad "passwd1_bad" + set passwd2 "passwd2" + set key "key" + set data "data" + + # + # This first group tests bad create scenarios and also + # tests attempting to use encryption after creating a + # non-encrypted env/db to begin with. + # + set nopass "" + puts "\tSec001.a.1: Create db with encryption." + set db [berkdb_open -create -encryptaes $passwd1 -btree $testfile2] + error_check_good db [is_valid_db $db] TRUE + error_check_good dbput [$db put $key $data] 0 + error_check_good dbclose [$db close] 0 + + puts "\tSec001.a.2: Open db without encryption." + set stat [catch {berkdb_open_noerr $testfile2} ret] + error_check_good db:nocrypto $stat 1 + error_check_good db:fail [is_substr $ret "no encryption key"] 1 + + set ret [berkdb dbremove -encryptaes $passwd1 $testfile2] + + puts "\tSec001.b.1: Create db without encryption or checksum." + set db [berkdb_open -create -btree $testfile2] + error_check_good db [is_valid_db $db] TRUE + error_check_good dbput [$db put $key $data] 0 + error_check_good dbclose [$db close] 0 + + puts "\tSec001.b.2: Open db with encryption." + set stat [catch {berkdb_open_noerr -encryptaes $passwd1 $testfile2} ret] + error_check_good db:nocrypto $stat 1 + error_check_good db:fail [is_substr $ret "supplied encryption key"] 1 + + set ret [berkdb dbremove $testfile2] + + puts "\tSec001.c.1: Create db with checksum." + set db [berkdb_open -create -chksum -btree $testfile2] + error_check_good db [is_valid_db $db] TRUE + error_check_good dbput [$db put $key $data] 0 + error_check_good dbclose [$db close] 0 + + puts "\tSec001.c.2: Open db with encryption." + set stat [catch {berkdb_open_noerr -encryptaes $passwd1 $testfile2} ret] + error_check_good db:nocrypto $stat 1 + error_check_good db:fail [is_substr $ret "supplied encryption key"] 1 + + set ret [berkdb dbremove $testfile2] + + puts "\tSec001.d.1: Create subdb with encryption." + set db [berkdb_open -create -encryptaes $passwd1 -btree \ + $testfile2 $subdb1] + error_check_good subdb [is_valid_db $db] TRUE + error_check_good dbput [$db put $key $data] 0 + error_check_good dbclose [$db close] 0 + + puts "\tSec001.d.2: Create 2nd subdb without encryption." + set stat [catch {berkdb_open_noerr -create -btree \ + $testfile2 $subdb2} ret] + error_check_good subdb:nocrypto $stat 1 + error_check_good subdb:fail [is_substr $ret "no encryption key"] 1 + + set ret [berkdb dbremove -encryptaes $passwd1 $testfile2] + + puts "\tSec001.e.1: Create subdb without encryption or checksum." + set db [berkdb_open -create -btree $testfile2 $subdb1] + error_check_good db [is_valid_db $db] TRUE + error_check_good dbput [$db put $key $data] 0 + error_check_good dbclose [$db close] 0 + + puts "\tSec001.e.2: Create 2nd subdb with encryption." + set stat [catch {berkdb_open_noerr -create -btree -encryptaes $passwd1 \ + $testfile2 $subdb2} ret] + error_check_good subdb:nocrypto $stat 1 + error_check_good subdb:fail [is_substr $ret "supplied encryption key"] 1 + + env_cleanup $testdir + + puts "\tSec001.f.1: Open env with encryption, empty passwd." + set stat [catch {berkdb_env_noerr -create -home $testdir \ + -encryptaes $nopass} ret] + error_check_good env:nopass $stat 1 + error_check_good env:fail [is_substr $ret "Empty password"] 1 + + puts "\tSec001.f.2: Create without encryption algorithm (DB_ENCRYPT_ANY)." + set stat [catch {berkdb_env_noerr -create -home $testdir \ + -encryptany $passwd1} ret] + error_check_good env:any $stat 1 + error_check_good env:fail [is_substr $ret "algorithm not supplied"] 1 + + puts "\tSec001.f.3: Create without encryption." + set env [berkdb_env -create -home $testdir] + error_check_good env [is_valid_env $env] TRUE + + # Skip this piece of the test on HP-UX, where we can't + # join the env. + if { $is_hp_test != 1 } { + puts "\tSec001.f.4: Open again with encryption." + set stat [catch {berkdb_env_noerr -home $testdir \ + -encryptaes $passwd1} ret] + error_check_good env:unencrypted $stat 1 + error_check_good env:fail [is_substr $ret \ + "Joining non-encrypted environment"] 1 + } + + error_check_good envclose [$env close] 0 + + env_cleanup $testdir + + # + # This second group tests creating and opening a secure env. + # We test that others can join successfully, and that other's with + # bad/no passwords cannot. Also test that we cannot use the + # db->set_encrypt method when we've already got a secure dbenv. + # + puts "\tSec001.g.1: Open with encryption." + set env [berkdb_env_noerr -create -home $testdir -encryptaes $passwd1] + error_check_good env [is_valid_env $env] TRUE + + # We can't open an env twice in HP-UX, so skip the rest. + if { $is_hp_test == 1 } { + puts "Skipping remainder of test for HP-UX." + error_check_good env_close [$env close] 0 + return + } + + puts "\tSec001.g.2: Open again with encryption - same passwd." + set env1 [berkdb_env -home $testdir -encryptaes $passwd1] + error_check_good env [is_valid_env $env1] TRUE + error_check_good envclose [$env1 close] 0 + + puts "\tSec001.g.3: Open again with any encryption (DB_ENCRYPT_ANY)." + set env1 [berkdb_env -home $testdir -encryptany $passwd1] + error_check_good env [is_valid_env $env1] TRUE + error_check_good envclose [$env1 close] 0 + + puts "\tSec001.g.4: Open with encryption - different length passwd." + set stat [catch {berkdb_env_noerr -home $testdir \ + -encryptaes $passwd1_bad} ret] + error_check_good env:$passwd1_bad $stat 1 + error_check_good env:fail [is_substr $ret "Invalid password"] 1 + + puts "\tSec001.g.5: Open with encryption - different passwd." + set stat [catch {berkdb_env_noerr -home $testdir \ + -encryptaes $passwd2} ret] + error_check_good env:$passwd2 $stat 1 + error_check_good env:fail [is_substr $ret "Invalid password"] 1 + + puts "\tSec001.g.6: Open env without encryption." + set stat [catch {berkdb_env_noerr -home $testdir} ret] + error_check_good env:$passwd2 $stat 1 + error_check_good env:fail [is_substr $ret "Encrypted environment"] 1 + + puts "\tSec001.g.7: Open database with encryption in env" + set stat [catch {berkdb_open_noerr -env $env -btree -create \ + -encryptaes $passwd2 $testfile1} ret] + error_check_good db:$passwd2 $stat 1 + error_check_good env:fail [is_substr $ret "method not permitted"] 1 + + puts "\tSec001.g.8: Close creating env" + error_check_good envclose [$env close] 0 + + # + # This third group tests opening the env after the original env + # handle is closed. Just to make sure we can reopen it in + # the right fashion even if no handles are currently open. + # + puts "\tSec001.h.1: Reopen without encryption." + set stat [catch {berkdb_env_noerr -home $testdir} ret] + error_check_good env:noencrypt $stat 1 + error_check_good env:fail [is_substr $ret "Encrypted environment"] 1 + + puts "\tSec001.h.2: Reopen with bad passwd." + set stat [catch {berkdb_env_noerr -home $testdir -encryptaes \ + $passwd1_bad} ret] + error_check_good env:$passwd1_bad $stat 1 + error_check_good env:fail [is_substr $ret "Invalid password"] 1 + + puts "\tSec001.h.3: Reopen with encryption." + set env [berkdb_env -create -home $testdir -encryptaes $passwd1] + error_check_good env [is_valid_env $env] TRUE + + puts "\tSec001.h.4: 2nd Reopen with encryption." + set env1 [berkdb_env -home $testdir -encryptaes $passwd1] + error_check_good env [is_valid_env $env1] TRUE + + error_check_good envclose [$env1 close] 0 + error_check_good envclose [$env close] 0 + + puts "\tSec001 complete." +} diff --git a/test/tcl/sec002.tcl b/test/tcl/sec002.tcl new file mode 100644 index 00000000..396dd450 --- /dev/null +++ b/test/tcl/sec002.tcl @@ -0,0 +1,180 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sec002 +# TEST Test of security interface and catching errors in the +# TEST face of attackers overwriting parts of existing files. +proc sec002 { } { + global errorInfo + global errorCode + global has_crypto + + source ./include.tcl + + # Skip test if release does not support encryption. + if { $has_crypto == 0 } { + puts "Skipping test sec002 for non-crypto release." + return + } + + set testfile1 $testdir/sec002-1.db + set testfile2 $testdir/sec002-2.db + set testfile3 $testdir/sec002-3.db + set testfile4 $testdir/sec002-4.db + + puts "Sec002: Test of basic encryption interface." + env_cleanup $testdir + + set passwd1 "passwd1" + set passwd2 "passwd2" + set key "key" + set data "data" + set pagesize 1024 + + # + # Set up 4 databases, two encrypted, but with different passwords + # and one unencrypt, but with checksumming turned on and one + # unencrypted and no checksumming. Place the exact same data + # in each one. + # + puts "\tSec002.a: Setup databases" + set db_cmd "-create -pagesize $pagesize -btree " + set db [eval {berkdb_open} -encryptaes $passwd1 $db_cmd $testfile1] + error_check_good db [is_valid_db $db] TRUE + error_check_good dbput [$db put $key $data] 0 + error_check_good dbclose [$db close] 0 + + set db [eval {berkdb_open} -encryptaes $passwd2 $db_cmd $testfile2] + error_check_good db [is_valid_db $db] TRUE + error_check_good dbput [$db put $key $data] 0 + error_check_good dbclose [$db close] 0 + + set db [eval {berkdb_open} -chksum $db_cmd $testfile3] + error_check_good db [is_valid_db $db] TRUE + error_check_good dbput [$db put $key $data] 0 + error_check_good dbclose [$db close] 0 + + set db [eval {berkdb_open} $db_cmd $testfile4] + error_check_good db [is_valid_db $db] TRUE + error_check_good dbput [$db put $key $data] 0 + error_check_good dbclose [$db close] 0 + + # + # If we reopen the normal file with the -chksum flag, there + # should be no error and checksumming should be ignored. + # If we reopen a checksummed file without the -chksum flag, + # checksumming should still be in effect. [#6959] + # + puts "\tSec002.b: Inheritance of chksum properties" + puts "\t\tSec002.b1: Reopen ordinary file with -chksum flag" + set db [eval {berkdb_open} -chksum $testfile4] + error_check_good open_with_chksum [is_valid_db $db] TRUE + set retdata [$db get $key] + error_check_good testfile4_get [lindex [lindex $retdata 0] 1] $data + error_check_good dbclose [$db close] 0 + + puts "\t\tSec002.b2: Reopen checksummed file without -chksum flag" + set db [eval {berkdb_open} $testfile3] + error_check_good open_wo_chksum [is_valid_db $db] TRUE + set retdata [$db get $key] + error_check_good testfile3_get [lindex [lindex $retdata 0] 1] $data + error_check_good dbclose [$db close] 0 + + # + # First just touch some bits in the file. We know that in btree + # meta pages, bytes 92-459 are unused. Scribble on them in both + # an encrypted, and both unencrypted files. We should get + # a checksum error for the encrypted, and checksummed files. + # We should get no error for the normal file. + # + set fidlist {} + set fid [open $testfile1 r+] + lappend fidlist $fid + set fid [open $testfile3 r+] + lappend fidlist $fid + set fid [open $testfile4 r+] + lappend fidlist $fid + + puts "\tSec002.c: Overwrite unused space in meta-page" + foreach f $fidlist { + fconfigure $f -translation binary + seek $f 100 start + set byte [read $f 1] + binary scan $byte c val + set newval [expr ~$val] + set newbyte [binary format c $newval] + seek $f 100 start + puts -nonewline $f $newbyte + close $f + } + puts "\tSec002.d: Reopen modified databases" + set stat [catch {berkdb_open_noerr -encryptaes $passwd1 $testfile1} ret] + error_check_good db:$testfile1 $stat 1 + error_check_good db:$testfile1:fail \ + [is_substr $ret "metadata page checksum error"] 1 + + set stat [catch {berkdb_open_noerr -chksum $testfile3} ret] + error_check_good db:$testfile3 $stat 1 + error_check_good db:$testfile3:fail \ + [is_substr $ret "metadata page checksum error"] 1 + + set stat [catch {berkdb_open_noerr $testfile4} db] + error_check_good db:$testfile4 $stat 0 + error_check_good dbclose [$db close] 0 + + # Skip the remainder of the test for Windows platforms. + # Forcing the error which causes DB_RUNRECOVERY to be + # returned ends up leaving open files that cannot be removed. + if { $is_windows_test == 1 } { + cleanup $testdir NULL 1 + puts "Skipping remainder of test for Windows" + return + } + + puts "\tSec002.e: Replace root page in encrypted w/ encrypted" + set fid1 [open $testfile1 r+] + fconfigure $fid1 -translation binary + set fid2 [open $testfile2 r+] + fconfigure $fid2 -translation binary + seek $fid1 $pagesize start + seek $fid2 $pagesize start + fcopy $fid1 $fid2 -size $pagesize + close $fid1 + close $fid2 + + set db [berkdb_open_noerr -encryptaes $passwd2 $testfile2] + error_check_good db [is_valid_db $db] TRUE + set stat [catch {$db get $key} ret] + error_check_good dbget $stat 1 + error_check_good db:$testfile2:fail1 \ + [is_substr $ret "checksum error"] 1 + set stat [catch {$db close} ret] + error_check_good dbclose $stat 1 + error_check_good db:$testfile2:fail2 [is_substr $ret "DB_RUNRECOVERY"] 1 + + puts "\tSec002.f: Replace root page in encrypted w/ unencrypted" + set fid2 [open $testfile2 r+] + fconfigure $fid2 -translation binary + set fid4 [open $testfile4 r+] + fconfigure $fid4 -translation binary + seek $fid2 $pagesize start + seek $fid4 $pagesize start + fcopy $fid4 $fid2 -size $pagesize + close $fid4 + close $fid2 + + set db [berkdb_open_noerr -encryptaes $passwd2 $testfile2] + error_check_good db [is_valid_db $db] TRUE + set stat [catch {$db get $key} ret] + error_check_good dbget $stat 1 + error_check_good db:$testfile2:fail \ + [is_substr $ret "checksum error"] 1 + set stat [catch {$db close} ret] + error_check_good dbclose $stat 1 + error_check_good db:$testfile2:fail [is_substr $ret "DB_RUNRECOVERY"] 1 + + cleanup $testdir NULL 1 +} diff --git a/test/tcl/shelltest.tcl b/test/tcl/shelltest.tcl new file mode 100644 index 00000000..4bd5f57f --- /dev/null +++ b/test/tcl/shelltest.tcl @@ -0,0 +1,80 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST scr### +# TEST The scr### directories are shell scripts that test a variety of +# TEST things, including things about the distribution itself. These +# TEST tests won't run on most systems, so don't even try to run them. +# +# shelltest.tcl: +# Code to run shell script tests, to incorporate Java, C++, +# example compilation, etc. test scripts into the Tcl framework. +proc shelltest {{ run_one 0 } { xml 0 }} { + source ./include.tcl + global shelltest_list + global xmlshelltest_list + + set SH /bin/sh + if { [file executable $SH] != 1 } { + puts "Shell tests require valid shell /bin/sh: not found." + puts "Skipping shell tests." + return 0 + } + + if { $xml == 1 } { + set shelltest_list $xmlshelltest_list + } + + if { $run_one == 0 } { + puts "Running shell script tests..." + + foreach testpair $shelltest_list { + set dir [lindex $testpair 0] + set test [lindex $testpair 1] + set rundir [lindex $testpair 2] + + env_cleanup $testdir + file mkdir $testdir/$rundir + shelltest_copy $test_path/../$dir $testdir/$rundir + shelltest_run $SH $dir $test $testdir/$rundir + } + } else { + set run_one [expr $run_one - 1]; + set dir [lindex [lindex $shelltest_list $run_one] 0] + set test [lindex [lindex $shelltest_list $run_one] 1] + set rundir [lindex [lindex $shelltest_list $run_one] 2] + + env_cleanup $testdir + file mkdir $testdir/$rundir + shelltest_copy $test_path/../$dir $testdir/$rundir + shelltest_run $SH $dir $test $testdir/$rundir + } +} + +proc shelltest_copy { fromdir todir } { + set globall [glob $fromdir/*] + + foreach f $globall { + file copy -force $f $todir/ + } +} + +proc shelltest_run { sh srcdir test testdir } { + puts "Running shell script $srcdir ($test)..." + + set ret [catch {exec $sh -c "cd $testdir && sh $test" >&@ stdout} res] + + if { $ret != 0 } { + puts "FAIL: shell test $srcdir/$test exited abnormally" + } +} + +proc run_c {} { shelltest 1 } +proc run_cxx {} { shelltest 2 } +proc run_junit {} { shelltest 3 } +proc run_java_compat {} { shelltest 4 } +proc run_sql_codegen {} { shelltest 5 } +proc run_xa {} { shelltest 6 } diff --git a/test/tcl/si001.tcl b/test/tcl/si001.tcl new file mode 100644 index 00000000..fb1504f6 --- /dev/null +++ b/test/tcl/si001.tcl @@ -0,0 +1,307 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST si001 +# TEST Secondary index put/delete with lorder test +# TEST +# TEST Put data in primary db and check that pget on secondary +# TEST index finds the right entries. Alter the primary in the +# TEST following ways, checking for correct data each time: +# TEST Overwrite data in primary database. +# TEST Delete half of entries through primary. +# TEST Delete half of remaining entries through secondary. +# TEST Append data (for record-based primaries only). +proc si001 { methods {nentries 200} {tnum "001"} args } { + source ./include.tcl + global dict nsecondaries + global default_pagesize + + # Primary method/args. + set pmethod [lindex $methods 0] + set pargs [convert_args $pmethod $args] + if [big_endian] { + set nativeargs " -lorder 4321" + set swappedargs " -lorder 1234" + } else { + set swappedargs " -lorder 4321" + set nativeargs " -lorder 1234" + } + set argtypes "{$nativeargs} {$swappedargs}" + set pomethod [convert_method $pmethod] + + # Renumbering recno databases can't be used as primaries. + if { [is_rrecno $pmethod] == 1 } { + puts "Skipping si$tnum for method $pmethod" + return + } + + # Method/args for all the secondaries. If only one method + # was specified, assume the same method (for btree or hash) + # and a standard number of secondaries. If primary is not + # btree or hash, force secondaries to be one btree, one hash. + set methods [lrange $methods 1 end] + if { [llength $methods] == 0 } { + for { set i 0 } { $i < $nsecondaries } { incr i } { + if { [is_btree $pmethod] || [is_hash $pmethod] } { + lappend methods $pmethod + } else { + if { [expr $i % 2] == 0 } { + lappend methods "-btree" + } else { + lappend methods "-hash" + } + } + } + } + + set argses [convert_argses $methods $args] + set omethods [convert_methods $methods] + + set mutexargs " -mutex_set_max 10000 " + if { $default_pagesize <= 2048 } { + set mutexargs "-mutex_set_max 40000 " + } + # If we are given an env, use it. Otherwise, open one. + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + env_cleanup $testdir + set cacheargs " -cachesize {0 4194304 1} " + set env [eval {berkdb_env} -create \ + $cacheargs $mutexargs -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + } else { + incr eindex + set env [lindex $args $eindex] + set envflags [$env get_open_flags] + if { [lsearch -exact $envflags "-thread"] != -1 &&\ + [is_queue $pmethod] == 1 } { + puts "Skipping si$tnum for threaded env" + return + } + set testdir [get_home $env] + } + + set pname "primary$tnum.db" + set snamebase "secondary$tnum" + + foreach pbyteargs $argtypes { + foreach sbyteargs $argtypes { + if { $pbyteargs == $nativeargs } { + puts "Si$tnum: Using native\ + byteorder $nativeargs for primary." + } else { + puts "Si$tnum: Using swapped\ + byteorder $swappedargs for primary." + } + if { $sbyteargs == $nativeargs } { + puts "Si$tnum: Using native\ + byteorder $nativeargs for secondaries." + } else { + puts "Si$tnum: Using swapped\ + byteorder $swappedargs for secondaries." + } + + puts "si$tnum\ + \{\[ list $pmethod $methods \]\} $nentries" + cleanup $testdir $env + + # Open primary. + set pdb [eval {berkdb_open -create -env} $env \ + $pomethod $pargs $pbyteargs $pname] + error_check_good primary_open [is_valid_db $pdb] TRUE + + # Open and associate the secondaries + set sdbs {} + for { set i 0 } \ + { $i < [llength $omethods] } { incr i } { + set sdb [eval {berkdb_open -create -env} $env \ + [lindex $omethods $i] [lindex $argses $i] \ + $sbyteargs $snamebase.$i.db] + error_check_good\ + second_open($i) [is_valid_db $sdb] TRUE + + error_check_good db_associate($i) \ + [$pdb associate [callback_n $i] $sdb] 0 + lappend sdbs $sdb + } + + puts "\tSi$tnum.a: Put loop" + set did [open $dict] + for { set n 0 } \ + { [gets $did str] != -1 && $n < $nentries } \ + { incr n } { + if { [is_record_based $pmethod] == 1 } { + set key [expr $n + 1] + set datum $str + } else { + set key $str + gets $did datum + } + set keys($n) $key + set data($n) [pad_data $pmethod $datum] + + set ret [eval {$pdb put}\ + {$key [chop_data $pmethod $datum]}] + error_check_good put($n) $ret 0 + } + close $did + + check_secondaries\ + $pdb $sdbs $nentries keys data "Si$tnum.a" + + puts "\tSi$tnum.b: Put/overwrite loop" + for { set n 0 } { $n < $nentries } { incr n } { + set newd $data($n).$keys($n) + set ret [eval {$pdb put}\ + {$keys($n) [chop_data $pmethod $newd]}] + error_check_good put_overwrite($n) $ret 0 + set data($n) [pad_data $pmethod $newd] + } + + check_secondaries\ + $pdb $sdbs $nentries keys data "Si$tnum.b" + + # Delete the second half of the entries through + # the primary. We do the second half so we can + # just pass keys(0 ... n/2) to check_secondaries. + set half [expr $nentries / 2] + puts "\tSi$tnum.c:\ + Primary delete loop: deleting $half entries" + for { set n $half } { $n < $nentries } { incr n } { + set ret [$pdb del $keys($n)] + error_check_good pdel($n) $ret 0 + } + check_secondaries\ + $pdb $sdbs $half keys data "Si$tnum.c" + + # Check that the nooverwrite flags functions + # properly by trying to overwrite one key that + # still exists and one key that has been deleted. + puts "\tSi$tnum.c1:\ + Check -nooverwrite on existing key." + set ret [eval {$pdb put} -nooverwrite \ + {$keys(1) [chop_data $pmethod "otherdata"]}] + set ret [$pdb get $keys(1)] + set got_data [lindex [lindex $ret 0] 1] + error_check_good got_data $got_data $data(1) + + puts "\tSi$tnum.c2:\ + Check -nooverwrite on deleted key." + set ret [eval {$pdb put} -nooverwrite \ + {$keys([expr $n - 1]) [chop_data $pmethod "otherdata"]}] + set ret [$pdb get $keys([expr $n - 1])] + set got_data [lindex [lindex $ret 0] 1] + error_check_good \ + got_data $got_data [pad_data $pmethod "otherdata"] + + # Clean up the entry for the nooverwrite test. + set ret [$pdb del $keys([expr $n - 1])] + + # Delete half of what's left through + # the first secondary. + set quar [expr $half / 2] + puts "\tSi$tnum.d:\ + Secondary delete loop: deleting $quar entries" + set sdb [lindex $sdbs 0] + set callback [callback_n 0] + for { set n $quar } { $n < $half } { incr n } { + set skey [$callback $keys($n)\ + [pad_data $pmethod $data($n)]] + set ret [$sdb del $skey] + error_check_good sdel($n) $ret 0 + } + check_secondaries\ + $pdb $sdbs $quar keys data "Si$tnum.d" + set left $quar + + # For queue and recno only, test append, adding + # back a quarter of the original number of entries. + if { [is_record_based $pmethod] == 1 && + [is_heap $pmethod] == 0} { + set did [open $dict] + puts "\tSi$tnum.e:\ + Append loop: append $quar entries" + for { set n 0 } { $n < $nentries } { incr n } { + # Skip over the dictionary entries + # we've already used. + gets $did str + } + for { set n $quar } \ + { [gets $did str] != -1 && $n < $half } \ + { incr n } { + set key [expr $n + 1] + set datum $str + set keys($n) $key + set data($n) [pad_data $pmethod $datum] + + set ret [eval {$pdb put} \ + {$key [chop_data $pmethod $datum]}] + error_check_good put($n) $ret 0 + } + close $did + + check_secondaries\ + $pdb $sdbs $half keys data "Si$tnum.e" + set left $half + } + + + puts "\tSi$tnum.f:\ + Truncate primary, check secondaries are empty." + error_check_good truncate [$pdb truncate] $left + foreach sdb $sdbs { + set scursor [$sdb cursor] + error_check_good\ + db_cursor [is_substr $scursor $sdb] 1 + set ret [$scursor get -first] + error_check_good\ + sec_empty [string length $ret] 0 + error_check_good cursor_close [$scursor close] 0 + } + + + puts "\tSi$tnum.g: Closing/disassociating primary first" + error_check_good primary_close [$pdb close] 0 + foreach sdb $sdbs { + error_check_good secondary_close [$sdb close] 0 + } + + # Don't close the env if this test was given one. + # Skip the test of truncating the secondary since + # we can't close and reopen the outside env. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + + # Reopen with _noerr for test of + # truncate secondary. + puts "\tSi$tnum.h:\ + Truncate secondary (should fail)" + + set env [eval {berkdb_env_noerr}\ + -create $mutexargs -home $testdir] + error_check_good\ + env_open [is_valid_env $env] TRUE + + set pdb [eval {berkdb_open_noerr -create -env}\ + $env $pomethod $pargs $pname] + set sdb [eval {berkdb_open_noerr -create -env}\ + $env [lindex $omethods 0]\ + [lindex $argses 0] $snamebase.0.db ] + $pdb associate [callback_n 0] $sdb + + set ret [catch {$sdb truncate} ret] + error_check_good trunc_secondary $ret 1 + + error_check_good primary_close [$pdb close] 0 + error_check_good secondary_close [$sdb close] 0 + } + } + } + # If this test made the last env, close it. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + } +} diff --git a/test/tcl/si002.tcl b/test/tcl/si002.tcl new file mode 100644 index 00000000..f1b1d6cb --- /dev/null +++ b/test/tcl/si002.tcl @@ -0,0 +1,243 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST si002 +# TEST Basic cursor-based secondary index put/delete test +# TEST +# TEST Cursor put data in primary db and check that pget +# TEST on secondary index finds the right entries. +# TEST Open and use a second cursor to exercise the cursor +# TEST comparison API on secondaries. +# TEST Overwrite while walking primary, check pget again. +# TEST Overwrite while walking secondary (use c_pget), check +# TEST pget again. +# TEST Cursor delete half of entries through primary, check. +# TEST Cursor delete half of remainder through secondary, check. +proc si002 { methods {nentries 200} {tnum "002"} args } { + source ./include.tcl + global dict nsecondaries + + # Primary method/args. + set pmethod [lindex $methods 0] + set pargs [convert_args $pmethod $args] + set pomethod [convert_method $pmethod] + + # Renumbering recno databases can't be used as primaries. + if { [is_rrecno $pmethod] == 1 } { + puts "Skipping si$tnum for method $pmethod" + return + } + + # Method/args for all the secondaries. If only one method + # was specified, assume the same method (for btree or hash) + # and a standard number of secondaries. If primary is not + # btree or hash, force secondaries to be one btree, one hash. + set methods [lrange $methods 1 end] + if { [llength $methods] == 0 } { + for { set i 0 } { $i < $nsecondaries } { incr i } { + if { [is_btree $pmethod] || [is_hash $pmethod] } { + lappend methods $pmethod + } else { + if { [expr $i % 2] == 0 } { + lappend methods "-btree" + } else { + lappend methods "-hash" + } + } + } + } + + set argses [convert_argses $methods $args] + set omethods [convert_methods $methods] + + # If we are given an env, use it. Otherwise, open one. + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + env_cleanup $testdir + set env [berkdb_env -create -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + } else { + incr eindex + set env [lindex $args $eindex] + set envflags [$env get_open_flags] + if { [lsearch -exact $envflags "-thread"] != -1 } { + puts "Skipping si$tnum for threaded env" + return + } + set testdir [get_home $env] + } + + puts "si$tnum \{\[ list $pmethod $methods \]\} $nentries" + cleanup $testdir $env + + set pname "primary$tnum.db" + set snamebase "secondary$tnum" + + # Open the primary. + set pdb [eval {berkdb_open -create -env} $env $pomethod $pargs $pname] + error_check_good primary_open [is_valid_db $pdb] TRUE + + # Open and associate the secondaries + set sdbs {} + for { set i 0 } { $i < [llength $omethods] } { incr i } { + set sdb [eval {berkdb_open -create -env} $env \ + [lindex $omethods $i] [lindex $argses $i] $snamebase.$i.db] + error_check_good second_open($i) [is_valid_db $sdb] TRUE + + error_check_good db_associate($i) \ + [$pdb associate [callback_n $i] $sdb] 0 + lappend sdbs $sdb + } + + set did [open $dict] + + # Populate with a cursor, exercising keyfirst/keylast. + # Can't use keyfirst/keylast to insert into a heap, so just append. + puts "\tSi$tnum.a: Cursor put (-keyfirst/-keylast) loop" + set pdbc [$pdb cursor] + error_check_good pdb_cursor [is_valid_cursor $pdbc $pdb] TRUE + for { set n 0 } { [gets $did str] != -1 && $n < $nentries } { incr n } { + + if { [is_record_based $pmethod] == 1 } { + set key [expr $n + 1] + set datum $str + } else { + set key $str + gets $did datum + } + + set ns($key) $n + set keys($n) $key + set data($n) [pad_data $pmethod $datum] + + if { $n % 2 == 0 } { + set pflag " -keyfirst " + } else { + set pflag " -keylast " + } + + if { [is_heap $pmethod] } { + set ret [eval {$pdb put} \ + {$key [chop_data $pmethod $datum]}] + } else { + set ret [eval {$pdbc put} $pflag \ + {$key [chop_data $pmethod $datum]}] + } + error_check_good put($n) $ret 0 + } + error_check_good pdbc_close [$pdbc close] 0 + + close $did + check_secondaries $pdb $sdbs $nentries keys data "Si$tnum.a" + + puts "\tSi$tnum.b: Cursor put overwrite (-current) loop" + set pdbc [$pdb cursor] + error_check_good pdb_cursor [is_valid_cursor $pdbc $pdb] TRUE + for { set dbt [$pdbc get -first] } { [llength $dbt] > 0 } \ + { set dbt [$pdbc get -next] } { + set key [lindex [lindex $dbt 0] 0] + set datum [lindex [lindex $dbt 0] 1] + set newd $datum.$key + + set ret [eval {$pdbc put -current} [chop_data $pmethod $newd]] + error_check_good put_overwrite($key) $ret 0 + set data($ns($key)) [pad_data $pmethod $newd] + } + error_check_good pdbc_close [$pdbc close] 0 + check_secondaries $pdb $sdbs $nentries keys data "Si$tnum.b" + + puts "\tSi$tnum.c: Secondary c_pget/primary put overwrite loop" + + # We walk the first secondary, then put-overwrite each primary key/data + # pair we find. This doubles as a DBC->c_pget test. + # We also test the cursor comparison API on secondaries. + # + set sdb [lindex $sdbs 0] + set sdbc [$sdb cursor] + set sdbc2 [$sdb cursor] + error_check_good sdb_cursor [is_valid_cursor $sdbc $sdb] TRUE + for { set dbt [$sdbc pget -first]; set dbt2 [$sdbc2 pget -first] }\ + { [llength $dbt] > 0 } \ + { set dbt [$sdbc pget -next]; set dbt2 [$sdbc2 pget -next] } { + + # Test the cursor comparison API for secondaries + # before we overwrite. First they should match; + # push one cursor forward, they should not match; + # push it back again before the next get. + # + error_check_good cursor_cmp [$sdbc cmp $sdbc2] 0 + set ret [$sdbc2 get -next] + + # If the second cursor tried to walk past the last item, + # this can't work, so we skip it. + if { [llength $ret] > 0 } { + error_check_bad cursor_cmp_bad [$sdbc cmp $sdbc2] 0 + set ret [$sdbc2 get -prev] + } + + set pkey [lindex [lindex $dbt 0] 1] + set pdatum [lindex [lindex $dbt 0] 2] + + # Extended entries will be showing up underneath us, in + # unpredictable places. Keep track of which pkeys + # we've extended, and don't extend them repeatedly. + if { [info exists pkeys_done($pkey)] == 1 } { + continue + } else { + set pkeys_done($pkey) 1 + } + + set newd $pdatum.[string range $pdatum 0 2] + + set ret [eval {$pdb put} {$pkey [chop_data $pmethod $newd]}] + error_check_good pdb_put($pkey) $ret 0 + set data($ns($pkey)) [pad_data $pmethod $newd] + + } + error_check_good sdbc_close [$sdbc close] 0 + error_check_good sdbc2_close [$sdbc2 close] 0 + check_secondaries $pdb $sdbs $nentries keys data "Si$tnum.c" + + # Delete the second half of the entries through the primary. + # We do the second half so we can just pass keys(0 ... n/2) + # to check_secondaries. + set half [expr $nentries / 2] + puts "\tSi$tnum.d:\ + Primary cursor delete loop: deleting $half entries" + set pdbc [$pdb cursor] + error_check_good pdb_cursor [is_valid_cursor $pdbc $pdb] TRUE + set dbt [$pdbc get -first] + for { set i 0 } { [llength $dbt] > 0 && $i < $half } { incr i } { + error_check_good pdbc_del [$pdbc del] 0 + set dbt [$pdbc get -next] + } + error_check_good pdbc_close [$pdbc close] 0 + cursor_check_secondaries $pdb $sdbs $half "Si$tnum.d" + + # Delete half of what's left, through the first secondary. + set quar [expr $half / 2] + puts "\tSi$tnum.e:\ + Secondary cursor delete loop: deleting $quar entries" + set sdb [lindex $sdbs 0] + set sdbc [$sdb cursor] + set dbt [$sdbc get -first] + for { set i 0 } { [llength $dbt] > 0 && $i < $quar } { incr i } { + error_check_good sdbc_del [$sdbc del] 0 + set dbt [$sdbc get -next] + } + error_check_good sdbc_close [$sdbc close] 0 + cursor_check_secondaries $pdb $sdbs [expr $nentries - $half - $quar] "Si$tnum.e" + + foreach sdb $sdbs { + error_check_good secondary_close [$sdb close] 0 + } + error_check_good primary_close [$pdb close] 0 + + # Close the env if it was created within this test. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + } +} diff --git a/test/tcl/si003.tcl b/test/tcl/si003.tcl new file mode 100644 index 00000000..618f033c --- /dev/null +++ b/test/tcl/si003.tcl @@ -0,0 +1,179 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST si003 +# TEST si001 with secondaries created and closed mid-test +# TEST Basic secondary index put/delete test with secondaries +# TEST created mid-test. +proc si003 { methods {nentries 200} {tnum "003"} args } { + source ./include.tcl + global dict nsecondaries + + # There's no reason to run this test on large lists. + if { $nentries > 1000 } { + puts "Skipping si003 for large lists (over 1000 items)" + return + } + + # Primary method/args. + set pmethod [lindex $methods 0] + set pargs [convert_args $pmethod $args] + set pomethod [convert_method $pmethod] + + # Renumbering recno databases can't be used as primaries. + if { [is_rrecno $pmethod] == 1 } { + puts "Skipping si$tnum for method $pmethod" + return + } + + # Method/args for all the secondaries. If only one method + # was specified, assume the same method (for btree or hash) + # and a standard number of secondaries. If primary is not + # btree or hash, force secondaries to be one btree, one hash. + set methods [lrange $methods 1 end] + if { [llength $methods] == 0 } { + for { set i 0 } { $i < $nsecondaries } { incr i } { + if { [is_btree $pmethod] || [is_hash $pmethod] } { + lappend methods $pmethod + } else { + if { [expr $i % 2] == 0 } { + lappend methods "-btree" + } else { + lappend methods "-hash" + } + } + } + } + + set argses [convert_argses $methods $args] + set omethods [convert_methods $methods] + + # If we are given an env, use it. Otherwise, open one. + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + env_cleanup $testdir + set env [berkdb_env -create -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + } else { + incr eindex + set env [lindex $args $eindex] + set envflags [$env get_open_flags] + if { [lsearch -exact $envflags "-thread"] != -1 &&\ + [is_queue $pmethod] == 1 } { + puts "Skipping si$tnum for threaded env with queue" + return + } + set testdir [get_home $env] + } + + puts "si$tnum \{\[ list $pmethod $methods \]\} $nentries" + cleanup $testdir $env + + set pname "primary$tnum.db" + set snamebase "secondary$tnum" + + # Open the primary. + set pdb [eval {berkdb_open -create -env} $env $pomethod $pargs $pname] + error_check_good primary_open [is_valid_db $pdb] TRUE + + puts -nonewline "\tSi$tnum.a: Put loop ... " + set did [open $dict] + for { set n 0 } { [gets $did str] != -1 && $n < $nentries } { incr n } { + if { [is_record_based $pmethod] == 1 } { + set key [expr $n + 1] + set datum $str + } else { + set key $str + gets $did datum + } + set keys($n) $key + set data($n) [pad_data $pmethod $datum] + + set ret [eval {$pdb put} {$key [chop_data $pmethod $datum]}] + error_check_good put($n) $ret 0 + } + close $did + + # Open and associate the secondaries + set sdbs {} + puts "opening secondaries." + for { set i 0 } { $i < [llength $omethods] } { incr i } { + set sdb [eval {berkdb_open -create -env} $env \ + [lindex $omethods $i] [lindex $argses $i] $snamebase.$i.db] + error_check_good second_open($i) [is_valid_db $sdb] TRUE + + error_check_good db_associate($i) \ + [$pdb associate -create [callback_n $i] $sdb] 0 + lappend sdbs $sdb + } + check_secondaries $pdb $sdbs $nentries keys data "Si$tnum.a" + + puts -nonewline "\tSi$tnum.b: Put/overwrite loop ... " + for { set n 0 } { $n < $nentries } { incr n } { + set newd $data($n).$keys($n) + set ret [eval {$pdb put} {$keys($n) [chop_data $pmethod $newd]}] + error_check_good put_overwrite($n) $ret 0 + set data($n) [pad_data $pmethod $newd] + } + + # Close the secondaries again. + puts "closing secondaries." + for { set sdb [lindex $sdbs end] } { [string length $sdb] > 0 } \ + { set sdb [lindex $sdbs end] } { + error_check_good second_close($sdb) [$sdb close] 0 + set sdbs [lrange $sdbs 0 end-1] + check_secondaries \ + $pdb $sdbs $nentries keys data "Si$tnum.b" + } + + # Delete the second half of the entries through the primary. + # We do the second half so we can just pass keys(0 ... n/2) + # to check_secondaries. + set half [expr $nentries / 2] + puts -nonewline \ + "\tSi$tnum.c: Primary delete loop: deleting $half entries ..." + for { set n $half } { $n < $nentries } { incr n } { + set ret [$pdb del $keys($n)] + error_check_good pdel($n) $ret 0 + } + + # Open and associate the secondaries + set sdbs {} + puts "\n\t\topening secondaries." + for { set i 0 } { $i < [llength $omethods] } { incr i } { + set sdb [eval {berkdb_open -create -env} $env \ + [lindex $omethods $i] [lindex $argses $i] \ + $snamebase.r2.$i.db] + error_check_good second_open($i) [is_valid_db $sdb] TRUE + + error_check_good db_associate($i) \ + [$pdb associate -create [callback_n $i] $sdb] 0 + lappend sdbs $sdb + } + check_secondaries $pdb $sdbs $half keys data "Si$tnum.c" + + # Delete half of what's left, through the first secondary. + set quar [expr $half / 2] + puts "\tSi$tnum.d: Secondary delete loop: deleting $quar entries" + set sdb [lindex $sdbs 0] + set callback [callback_n 0] + for { set n $quar } { $n < $half } { incr n } { + set skey [$callback $keys($n) [pad_data $pmethod $data($n)]] + set ret [$sdb del $skey] + error_check_good sdel($n) $ret 0 + } + check_secondaries $pdb $sdbs $quar keys data "Si$tnum.d" + + foreach sdb $sdbs { + error_check_good secondary_close [$sdb close] 0 + } + error_check_good primary_close [$pdb close] 0 + + # Close the env if it was created within this test. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + } +} diff --git a/test/tcl/si004.tcl b/test/tcl/si004.tcl new file mode 100644 index 00000000..d65bd932 --- /dev/null +++ b/test/tcl/si004.tcl @@ -0,0 +1,237 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST si004 +# TEST si002 with secondaries created and closed mid-test +# TEST Basic cursor-based secondary index put/delete test, with +# TEST secondaries created mid-test. +proc si004 { methods {nentries 200} {tnum "004"} args } { + source ./include.tcl + global dict nsecondaries + + # There's no reason to run this test on large lists. + if { $nentries > 1000 } { + puts "Skipping si004 for large lists (over 1000 items)." + return + } + + # Primary method/args. + set pmethod [lindex $methods 0] + set pargs [convert_args $pmethod $args] + set pomethod [convert_method $pmethod] + + # Renumbering recno databases can't be used as primaries. + if { [is_rrecno $pmethod] == 1 } { + puts "Skipping si$tnum for method $pmethod" + return + } + + # Method/args for all the secondaries. If only one method + # was specified, assume the same method (for btree or hash) + # and a standard number of secondaries. If primary is not + # btree or hash, force secondaries to be one btree, one hash. + set methods [lrange $methods 1 end] + if { [llength $methods] == 0 } { + for { set i 0 } { $i < $nsecondaries } { incr i } { + if { [is_btree $pmethod] || [is_hash $pmethod] } { + lappend methods $pmethod + } else { + if { [expr $i % 2] == 0 } { + lappend methods "-btree" + } else { + lappend methods "-hash" + } + } + } + } + + set argses [convert_argses $methods $args] + set omethods [convert_methods $methods] + + # If we are given an env, use it. Otherwise, open one. + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + env_cleanup $testdir + set env [berkdb_env -create -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + } else { + incr eindex + set env [lindex $args $eindex] + set envflags [$env get_open_flags] + if { [lsearch -exact $envflags "-thread"] != -1 } { + puts "Skipping si$tnum for threaded env" + return + } + set testdir [get_home $env] + } + + puts "si$tnum \{\[ list $pmethod $methods \]\} $nentries" + cleanup $testdir $env + + set pname "primary$tnum.db" + set snamebase "secondary$tnum" + + # Open the primary. + set pdb [eval {berkdb_open -create -env} $env $pomethod $pargs $pname] + error_check_good primary_open [is_valid_db $pdb] TRUE + + # Populate with a cursor put, exercising keyfirst/keylast. + set did [open $dict] + puts -nonewline \ + "\tSi$tnum.a: Cursor put (-keyfirst/-keylast) loop ... " + set pdbc [$pdb cursor] + error_check_good pdb_cursor [is_valid_cursor $pdbc $pdb] TRUE + for { set n 0 } \ + { [gets $did str] != -1 && $n < $nentries } { incr n } { + if { [is_record_based $pmethod] == 1 } { + set key [expr $n + 1] + set datum $str + } else { + set key $str + gets $did datum + } + + set ns($key) $n + set keys($n) $key + set data($n) [pad_data $pmethod $datum] + + if { $n % 2 == 0 } { + set pflag " -keyfirst " + } else { + set pflag " -keylast " + } + if { [is_heap $pmethod] } { + set ret [eval {$pdb put} \ + {$key [chop_data $pmethod $datum]}] + } else { + set ret [eval {$pdbc put} $pflag \ + {$key [chop_data $pmethod $datum]}] + } + error_check_good put($n) $ret 0 + } + error_check_good pdbc_close [$pdbc close] 0 + close $did + + # Open and associate the secondaries + set sdbs {} + puts "\n\t\topening secondaries." + for { set i 0 } { $i < [llength $omethods] } { incr i } { + set sdb [eval {berkdb_open -create -env} $env \ + [lindex $omethods $i] [lindex $argses $i] $snamebase.$i.db] + error_check_good second_open($i) [is_valid_db $sdb] TRUE + + error_check_good db_associate($i) \ + [$pdb associate -create [callback_n $i] $sdb] 0 + lappend sdbs $sdb + } + check_secondaries $pdb $sdbs $nentries keys data "Si$tnum.a" + + puts "\tSi$tnum.b: Cursor put overwrite (-current) loop" + set pdbc [$pdb cursor] + error_check_good pdb_cursor [is_valid_cursor $pdbc $pdb] TRUE + for { set dbt [$pdbc get -first] } { [llength $dbt] > 0 } \ + { set dbt [$pdbc get -next] } { + set key [lindex [lindex $dbt 0] 0] + set datum [lindex [lindex $dbt 0] 1] + set newd $datum.$key + set ret [eval {$pdbc put -current} [chop_data $pmethod $newd]] + error_check_good put_overwrite($key) $ret 0 + set data($ns($key)) [pad_data $pmethod $newd] + } + error_check_good pdbc_close [$pdbc close] 0 + check_secondaries $pdb $sdbs $nentries keys data "Si$tnum.b" + + puts -nonewline "\tSi$tnum.c:\ + Secondary c_pget/primary put overwrite loop ... " + # We walk the first secondary, then put-overwrite each primary key/data + # pair we find. This doubles as a DBC->c_pget test. + set sdb [lindex $sdbs 0] + set sdbc [$sdb cursor] + error_check_good sdb_cursor [is_valid_cursor $sdbc $sdb] TRUE + for { set dbt [$sdbc pget -first] } { [llength $dbt] > 0 } \ + { set dbt [$sdbc pget -next] } { + set pkey [lindex [lindex $dbt 0] 1] + set pdatum [lindex [lindex $dbt 0] 2] + + # Extended entries will be showing up underneath us, in + # unpredictable places. Keep track of which pkeys + # we've extended, and don't extend them repeatedly. + if { [info exists pkeys_done($pkey)] == 1 } { + continue + } else { + set pkeys_done($pkey) 1 + } + + set newd $pdatum.[string range $pdatum 0 2] + set ret [eval {$pdb put} {$pkey [chop_data $pmethod $newd]}] + error_check_good pdb_put($pkey) $ret 0 + set data($ns($pkey)) [pad_data $pmethod $newd] + } + error_check_good sdbc_close [$sdbc close] 0 + + # Close the secondaries again. + puts "\n\t\tclosing secondaries." + for { set sdb [lindex $sdbs end] } { [string length $sdb] > 0 } \ + { set sdb [lindex $sdbs end] } { + error_check_good second_close($sdb) [$sdb close] 0 + set sdbs [lrange $sdbs 0 end-1] + check_secondaries \ + $pdb $sdbs $nentries keys data "Si$tnum.c" + } + + # Delete the second half of the entries through the primary. + # We do the second half so we can just pass keys(0 ... n/2) + # to check_secondaries. + set half [expr $nentries / 2] + puts -nonewline "\tSi$tnum.d:\ + Primary cursor delete loop: deleting $half entries ... " + set pdbc [$pdb cursor] + error_check_good pdb_cursor [is_valid_cursor $pdbc $pdb] TRUE + set dbt [$pdbc get -first] + for { set i 0 } { [llength $dbt] > 0 && $i < $half } { incr i } { + error_check_good pdbc_del [$pdbc del] 0 + set dbt [$pdbc get -next] + } + error_check_good pdbc_close [$pdbc close] 0 + + set sdbs {} + puts "\n\t\topening secondaries." + for { set i 0 } { $i < [llength $omethods] } { incr i } { + set sdb [eval {berkdb_open -create -env} $env \ + [lindex $omethods $i] [lindex $argses $i] \ + $snamebase.r2.$i.db] + error_check_good second_open($i) [is_valid_db $sdb] TRUE + + error_check_good db_associate($i) \ + [$pdb associate -create [callback_n $i] $sdb] 0 + lappend sdbs $sdb + } + cursor_check_secondaries $pdb $sdbs $half "Si$tnum.d" + + # Delete half of what's left, through the first secondary. + set quar [expr $half / 2] + puts "\tSi$tnum.e:\ + Secondary cursor delete loop: deleting $quar entries" + set sdb [lindex $sdbs 0] + set sdbc [$sdb cursor] + set dbt [$sdbc get -first] + for { set i 0 } { [llength $dbt] > 0 && $i < $quar } { incr i } { + error_check_good sdbc_del [$sdbc del] 0 + set dbt [$sdbc get -next] + } + error_check_good sdbc_close [$sdbc close] 0 + cursor_check_secondaries $pdb $sdbs $quar "Si$tnum.e" + + foreach sdb $sdbs { + error_check_good secondary_close [$sdb close] 0 + } + error_check_good primary_close [$pdb close] 0 + + # Close the env if it was created within this test. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + } +} diff --git a/test/tcl/si005.tcl b/test/tcl/si005.tcl new file mode 100644 index 00000000..50b531ea --- /dev/null +++ b/test/tcl/si005.tcl @@ -0,0 +1,170 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST si005 +# TEST Basic secondary index put/delete test with transactions +proc si005 { methods {nentries 200} {tnum "005"} args } { + source ./include.tcl + global dict nsecondaries + + # Primary method/args. + set pmethod [lindex $methods 0] + set pargs [convert_args $pmethod $args] + set pomethod [convert_method $pmethod] + + # Renumbering recno databases can't be used as primaries. + if { [is_rrecno $pmethod] == 1 } { + puts "Skipping si$tnum for method $pmethod" + return + } + + # Method/args for all the secondaries. If only one method + # was specified, assume the same method (for btree or hash) + # and a standard number of secondaries. If primary is not + # btree or hash, force secondaries to be one btree, one hash. + set methods [lrange $methods 1 end] + if { [llength $methods] == 0 } { + for { set i 0 } { $i < $nsecondaries } { incr i } { + if { [is_btree $pmethod] || [is_hash $pmethod] } { + lappend methods $pmethod + } else { + if { [expr $i % 2] == 0 } { + lappend methods "-btree" + } else { + lappend methods "-hash" + } + } + } + } + + # Since this is a transaction test, don't allow nentries to be large. + if { $nentries > 1000 } { + puts "Skipping si005 for large lists (over 1000 items)." + return + } + + set argses [convert_argses $methods $args] + set omethods [convert_methods $methods] + + # If we are given an env, use it. Otherwise, open one. + set eindex [lsearch -exact $args "-env"] + set txnenv 0 + if { $eindex == -1 } { + env_cleanup $testdir + set env [berkdb_env -create -home $testdir -txn] + error_check_good env_open [is_valid_env $env] TRUE + } else { + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append pargs " -auto_commit " + append argses " -auto_commit " + } else { + puts "Skipping si$tnum for non-transactional env." + return + } + set testdir [get_home $env] + } + + cleanup $testdir $env + puts "si$tnum \{\[ list $pmethod $methods \]\} $nentries" + puts "\twith transactions" + + set pname "primary$tnum.db" + set snamebase "secondary$tnum" + + # Open the primary. + set pdb [eval {berkdb_open -create -auto_commit -env} $env $pomethod \ + $pargs $pname] + error_check_good primary_open [is_valid_db $pdb] TRUE + + # Open and associate the secondaries + set sdbs {} + for { set i 0 } { $i < [llength $omethods] } { incr i } { + set sdb [eval {berkdb_open -create -auto_commit -env} $env \ + [lindex $omethods $i] [lindex $argses $i] $snamebase.$i.db] + error_check_good second_open($i) [is_valid_db $sdb] TRUE + + error_check_good db_associate($i) \ + [$pdb associate [callback_n $i] $sdb] 0 + lappend sdbs $sdb + } + + puts "\tSi$tnum.a: Put loop" + set did [open $dict] + for { set n 0 } { [gets $did str] != -1 && $n < $nentries } { incr n } { + if { [is_record_based $pmethod] == 1 } { + set key [expr $n + 1] + set datum $str + } else { + set key $str + gets $did datum + } + set keys($n) $key + set data($n) [pad_data $pmethod $datum] + + set txn [$env txn] + set ret [eval {$pdb put} -txn $txn \ + {$key [chop_data $pmethod $datum]}] + error_check_good put($n) $ret 0 + error_check_good txn_commit($n) [$txn commit] 0 + } + close $did + check_secondaries $pdb $sdbs $nentries keys data "Si$tnum.a" + + puts "\tSi$tnum.b: Put/overwrite loop" + for { set n 0 } { $n < $nentries } { incr n } { + set newd $data($n).$keys($n) + + set txn [$env txn] + set ret [eval {$pdb put} -txn $txn \ + {$keys($n) [chop_data $pmethod $newd]}] + error_check_good put_overwrite($n) $ret 0 + set data($n) [pad_data $pmethod $newd] + error_check_good txn_commit($n) [$txn commit] 0 + } + check_secondaries $pdb $sdbs $nentries keys data "Si$tnum.b" + + # Delete the second half of the entries through the primary. + # We do the second half so we can just pass keys(0 ... n/2) + # to check_secondaries. + set half [expr $nentries / 2] + puts "\tSi$tnum.c: Primary delete loop: deleting $half entries" + for { set n $half } { $n < $nentries } { incr n } { + set txn [$env txn] + set ret [$pdb del -txn $txn $keys($n)] + error_check_good pdel($n) $ret 0 + error_check_good txn_commit($n) [$txn commit] 0 + } + check_secondaries $pdb $sdbs $half keys data "Si$tnum.c" + + # Delete half of what's left, through the first secondary. + set quar [expr $half / 2] + puts "\tSi$tnum.d: Secondary delete loop: deleting $quar entries" + set sdb [lindex $sdbs 0] + set callback [callback_n 0] + for { set n $quar } { $n < $half } { incr n } { + set skey [$callback $keys($n) [pad_data $pmethod $data($n)]] + set txn [$env txn] + set ret [$sdb del -txn $txn $skey] + error_check_good sdel($n) $ret 0 + error_check_good txn_commit($n) [$txn commit] 0 + } + check_secondaries $pdb $sdbs $quar keys data "Si$tnum.d" + + puts "\tSi$tnum.e: Closing/disassociating primary first" + error_check_good primary_close [$pdb close] 0 + foreach sdb $sdbs { + error_check_good secondary_close [$sdb close] 0 + } + + # Close the env if it was created within this test. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + } + return +} diff --git a/test/tcl/si006.tcl b/test/tcl/si006.tcl new file mode 100644 index 00000000..92650b3e --- /dev/null +++ b/test/tcl/si006.tcl @@ -0,0 +1,186 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST si006 +# TEST +# TEST Test -immutable_key interface. +# TEST +# TEST DB_IMMUTABLE_KEY is an optimization to be used when a +# TEST secondary key will not be changed. It does not prevent +# TEST a deliberate change to the secondary key, it just does not +# TEST propagate that change when it is made to the primary. +# TEST This test verifies that a change to the primary is propagated +# TEST to the secondary or not as specified by -immutable_key. + +proc si006 { methods {nentries 200} {tnum "006"} args } { + source ./include.tcl + global dict + + # Primary method/args. + set pmethod [lindex $methods 0] + set pargs [convert_args $pmethod $args] + set pomethod [convert_method $pmethod] + + # Renumbering recno databases can't be used as primaries. + if { [is_rrecno $pmethod] == 1 } { + puts "Skipping si$tnum for method $pmethod" + return + } + + # Method/args for all the secondaries. If only one method + # was specified, assume the same method (for btree or hash) + # and a standard number of secondaries. If primary is not + # btree or hash, force secondaries to be one btree, one hash. + set methods [lrange $methods 1 end] + set nsecondaries 2 + if { [llength $methods] == 0 } { + for { set i 0 } { $i < $nsecondaries } { incr i } { + if { [is_btree $pmethod] || [is_hash $pmethod] } { + lappend methods $pmethod + } else { + if { [expr $i % 2] == 0 } { + lappend methods "-btree" + } else { + lappend methods "-hash" + } + } + } + } + + set argses [convert_argses $methods $args] + set omethods [convert_methods $methods] + + # If we are given an env, use it. Otherwise, open one. + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + env_cleanup $testdir + set env [berkdb_env -create -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + } else { + incr eindex + set env [lindex $args $eindex] + set envflags [$env get_open_flags] + if { [lsearch -exact $envflags "-thread"] != -1 &&\ + [is_queue $pmethod] == 1 } { + puts "Skipping si$tnum for threaded env" + return + } + set testdir [get_home $env] + } + + puts "si$tnum \{\[ list $pmethod $methods \]\} $nentries" + cleanup $testdir $env + + set pname "primary$tnum.db" + set snamebase "secondary$tnum" + + # Open the primary. + set pdb [eval {berkdb_open -create -env} $env $pomethod $pargs $pname] + error_check_good primary_open [is_valid_db $pdb] TRUE + + # Open and associate the secondaries, without -immutable_key. + puts "\tSi$tnum.a: Open primary and secondary databases and associate." + set sdbs {} + + set sdb1 [eval {berkdb_open -create -env} $env \ + [lindex $omethods 0] [lindex $argses 0] $snamebase.1.db] + error_check_good open_sdb1 [is_valid_db $sdb1] TRUE + error_check_good sdb1_associate \ + [$pdb associate [callback_n 0] $sdb1] 0 + lappend sdbs $sdb1 + + set sdb2 [eval {berkdb_open -create -env} $env \ + [lindex $omethods 1] [lindex $argses 1] $snamebase.2.db] + error_check_good open_sdb2 [is_valid_db $sdb2] TRUE + error_check_good sdb2_associate \ + [$pdb associate [callback_n 1] $sdb2] 0 + lappend sdbs $sdb2 + + puts "\tSi$tnum.b: Put loop on primary database." + set did [open $dict] + for { set n 0 } { [gets $did str] != -1 && $n < $nentries } { incr n } { + if { [is_record_based $pmethod] == 1 } { + set key [expr $n + 1] + set datum $str + } else { + set key $str + gets $did datum + } + set keys($n) $key + set data($n) [pad_data $pmethod $datum] + + set ret [eval {$pdb put} {$key [chop_data $pmethod $datum]}] + error_check_good put($n) $ret 0 + } + close $did + + puts "\tSi$tnum.c: Check secondaries." + check_secondaries $pdb $sdbs $nentries keys data "Si$tnum.c" + + puts "\tSi$tnum.d: Closing/disassociating primary first" + error_check_good primary_close [$pdb close] 0 + foreach sdb $sdbs { + error_check_good secondary_close [$sdb close] 0 + } + + puts "\tSi$tnum.e: Reopen databases." + # Reopen the primary. + set pdb [eval {berkdb_open -env} $env $pname] + error_check_good primary_reopen [is_valid_db $pdb] TRUE + + # Reopen and associate secondary without -immutable_key. + set mutable {} + set sdb1 [eval {berkdb_open -create -env} $env \ + [lindex $omethods 0] [lindex $argses 0] $snamebase.1.db] + error_check_good open_sdb1 [is_valid_db $sdb1] TRUE + error_check_good sdb1_associate \ + [$pdb associate [callback_n 0] $sdb1] 0 + lappend goodsdbs $mutable + + # Reopen and associate second secondary with -immutable_key. + set immutable {} + set sdb2 [eval {berkdb_open -env} $env \ + [lindex $omethods 1] [lindex $argses 1] $snamebase.2.db] + error_check_good reopen_sdb2 [is_valid_db $sdb2] TRUE + error_check_good sdb2_associate \ + [$pdb associate -immutable_key [callback_n 1] $sdb2] 0 + lappend immutable $sdb2 + + # Update primary. This should write to sdb1, but not sdb2. + puts "\tSi$tnum.f: Put loop on primary database." + set str "OVERWRITTEN" + for { set n 0 } { $n < $nentries } { incr n } { + if { [is_record_based $pmethod] == 1 } { + set key [expr $n + 1] + } else { + set key $keys($n) + } + set datum $str.$n + set data($n) [pad_data $pmethod $datum] + set ret [eval {$pdb put} {$key [chop_data $pmethod $datum]}] + error_check_good put($n) $ret 0 + } + + puts "\tSi$tnum.g: Check secondaries without -immutable_key." + check_secondaries $pdb $mutable $nentries keys data "Si$tnum.g" + + puts "\tSi$tnum.h: Check secondaries with -immutable_key." + if { [catch {check_secondaries \ + $pdb $immutable $nentries keys data "Si$tnum.h"} res] != 1 } { + puts "FAIL: Immutable secondary key was changed." + } + + puts "\tSi$tnum.i: Closing/disassociating primary first" + error_check_good primary_close [$pdb close] 0 + error_check_good secondary1_close [$sdb1 close] 0 + error_check_good secondary2_close [$sdb2 close] 0 + + # Don't close the env if this test was given one. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + } +} + diff --git a/test/tcl/si007.tcl b/test/tcl/si007.tcl new file mode 100644 index 00000000..2216471a --- /dev/null +++ b/test/tcl/si007.tcl @@ -0,0 +1,188 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST si007 +# TEST Secondary index put/delete with lorder test +# TEST +# TEST This test is the same as si001 with the exception +# TEST that we create and populate the primary and THEN +# TEST create the secondaries and associate them with -create. + +proc si007 { methods {nentries 10} {tnum "007"} args } { + source ./include.tcl + global dict nsecondaries + + # Primary method/args. + set pmethod [lindex $methods 0] + set pargs [convert_args $pmethod $args] + if [big_endian] { + set nativeargs " -lorder 4321" + set swappedargs " -lorder 1234" + } else { + set swappedargs " -lorder 4321" + set nativeargs " -lorder 1234" + } + set argtypes "{$nativeargs} {$swappedargs}" + set pomethod [convert_method $pmethod] + + # Renumbering recno databases can't be used as primaries. + if { [is_rrecno $pmethod] == 1 } { + puts "Skipping si$tnum for method $pmethod" + return + } + + # Method/args for all the secondaries. If only one method + # was specified, assume the same method (for btree or hash) + # and a standard number of secondaries. If primary is not + # btree or hash, force secondaries to be one btree, one hash. + set methods [lrange $methods 1 end] + if { [llength $methods] == 0 } { + for { set i 0 } { $i < $nsecondaries } { incr i } { + if { [is_btree $pmethod] || [is_hash $pmethod] } { + lappend methods $pmethod + } else { + if { [expr $i % 2] == 0 } { + lappend methods "-btree" + } else { + lappend methods "-hash" + } + } + } + } + + set argses [convert_argses $methods $args] + set omethods [convert_methods $methods] + + # If we are given an env, use it. Otherwise, open one. + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + env_cleanup $testdir + set env [berkdb_env -create -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + } else { + incr eindex + set env [lindex $args $eindex] + set envflags [$env get_open_flags] + if { [lsearch -exact $envflags "-thread"] != -1 &&\ + [is_queue $pmethod] == 1 } { + puts "Skipping si$tnum for threaded env" + return + } + set testdir [get_home $env] + } + + set pname "primary$tnum.db" + set snamebase "secondary$tnum" + + foreach pbyteargs $argtypes { + foreach sbyteargs $argtypes { + if { $pbyteargs == $nativeargs } { + puts "Si$tnum: Using native\ + byteorder $nativeargs for primary." + } else { + puts "Si$tnum: Using swapped\ + byteorder $swappedargs for primary." + } + if { $sbyteargs == $nativeargs } { + puts "Si$tnum: Using native\ + byteorder $nativeargs for secondaries." + } else { + puts "Si$tnum: Using swapped\ + byteorder $swappedargs for secondaries." + } + + puts "si$tnum\ + \{\[ list $pmethod $methods \]\} $nentries" + cleanup $testdir $env + + # Open primary. + set pdb [eval {berkdb_open -create -env} $env \ + $pomethod $pargs $pbyteargs $pname] + error_check_good primary_open [is_valid_db $pdb] TRUE + + puts "\tSi$tnum.a: Populate primary." + # Open dictionary. Leave it open until done + # with test .e so append won't require + # configuration for duplicates. + set did [open $dict] + for { set n 0 } \ + { [gets $did str] != -1 && $n < $nentries } \ + { incr n } { + if { [is_record_based $pmethod] == 1 } { + set key [expr $n + 1] + set datum $str + } else { + set key $str + gets $did datum + } + set keys($n) $key + set data($n) [pad_data $pmethod $datum] + + set ret [eval {$pdb put}\ + {$key [chop_data $pmethod $datum]}] + error_check_good put($n) $ret 0 + } + + # Open and associate the secondaries, with -create. + puts "\tSi$tnum.b: Associate secondaries with -create." + set sdbs {} + for { set i 0 } \ + { $i < [llength $omethods] } { incr i } { + set sdb [eval {berkdb_open -create -env} $env \ + [lindex $omethods $i] [lindex $argses $i] \ + $sbyteargs $snamebase.$i.db] + error_check_good\ + second_open($i) [is_valid_db $sdb] TRUE + + error_check_good db_associate($i) \ + [$pdb associate -create [callback_n $i] $sdb] 0 + lappend sdbs $sdb + } + check_secondaries\ + $pdb $sdbs $nentries keys data "Si$tnum.c" + + puts "\tSi$tnum.c: Closing/disassociating primary first" + error_check_good primary_close [$pdb close] 0 + foreach sdb $sdbs { + error_check_good secondary_close [$sdb close] 0 + } + + # Don't close the env if this test was given one. + # Skip the test of truncating the secondary since + # we can't close and reopen the outside env. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + + # Reopen with _noerr for test of + # truncate secondary. + puts "\tSi$tnum.h:\ + Truncate secondary (should fail)" + + set env [berkdb_env_noerr\ + -create -home $testdir] + error_check_good\ + env_open [is_valid_env $env] TRUE + + set pdb [eval {berkdb_open_noerr -create -env}\ + $env $pomethod $pargs $pname] + set sdb [eval {berkdb_open_noerr -create -env}\ + $env [lindex $omethods 0]\ + [lindex $argses 0] $snamebase.0.db ] + $pdb associate [callback_n 0] $sdb + + set ret [catch {$sdb truncate} ret] + error_check_good trunc_secondary $ret 1 + + error_check_good primary_close [$pdb close] 0 + error_check_good secondary_close [$sdb close] 0 + } + } + } + # If this test made the last env, close it. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + } +} diff --git a/test/tcl/si008.tcl b/test/tcl/si008.tcl new file mode 100644 index 00000000..be244782 --- /dev/null +++ b/test/tcl/si008.tcl @@ -0,0 +1,274 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST si008 +# TEST Secondary index put/delete with lorder test +# TEST +# TEST This test is the same as si001 except that we +# TEST create the secondaries with different byte orders: +# TEST one native, one swapped. + +proc si008 { methods {nentries 10} {tnum "008"} args } { + source ./include.tcl + global dict nsecondaries + + # Primary method/args. + set pmethod [lindex $methods 0] + set pargs [convert_args $pmethod $args] + if [big_endian] { + set nativeargs " -lorder 4321" + set swappedargs " -lorder 1234" + } else { + set swappedargs " -lorder 4321" + set nativeargs " -lorder 1234" + } + set argtypes "{$nativeargs} {$swappedargs}" + set pomethod [convert_method $pmethod] + + # Renumbering recno databases can't be used as primaries. + if { [is_rrecno $pmethod] == 1 } { + puts "Skipping si$tnum for method $pmethod" + return + } + + # Method/args for all the secondaries. If only one method + # was specified, assume the same method (for btree or hash) + # and a standard number of secondaries. If primary is not + # btree or hash, force secondaries to be one btree, one hash. + set methods [lrange $methods 1 end] + if { [llength $methods] == 0 } { + for { set i 0 } { $i < $nsecondaries } { incr i } { + if { [is_btree $pmethod] || [is_hash $pmethod] } { + lappend methods $pmethod + } else { + if { [expr $i % 2] == 0 } { + lappend methods "-btree" + } else { + lappend methods "-hash" + } + } + } + } + + set argses [convert_argses $methods $args] + set omethods [convert_methods $methods] + + # If we are given an env, use it. Otherwise, open one. + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + env_cleanup $testdir + set cacheargs " -cachesize {0 1048576 1} " + set env [eval berkdb_env -create $cacheargs -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + } else { + incr eindex + set env [lindex $args $eindex] + set envflags [$env get_open_flags] + if { [lsearch -exact $envflags "-thread"] != -1 &&\ + [is_queue $pmethod] == 1 } { + puts "Skipping si$tnum for threaded env" + return + } + set testdir [get_home $env] + } + + set pname "primary$tnum.db" + set snamebase "secondary$tnum" + + foreach pbyteargs $argtypes { + if { $pbyteargs == $nativeargs } { + puts "Si$tnum: Using native\ + byteorder $nativeargs for primary." + } else { + puts "Si$tnum: Using swapped\ + byteorder $swappedargs for primary." + } + + puts "si$tnum\ + \{\[ list $pmethod $methods \]\} $nentries" + cleanup $testdir $env + + # Open primary. + set pdb [eval {berkdb_open -create -env} $env \ + $pomethod $pargs $pbyteargs $pname] + error_check_good primary_open [is_valid_db $pdb] TRUE + + # Open and associate the secondaries + set sdbs {} + for { set i 0 } { $i < [llength $omethods] } { incr i } { + if { [expr $i % 2] == 0 } { + set sbyteargs $nativeargs + } else { + set sbyteargs $swappedargs + } + + if { $sbyteargs == $nativeargs } { + puts "Si$tnum: Using native byteorder\ + $nativeargs for secondary $i." + } else { + puts "Si$tnum: Using swapped byteorder\ + $swappedargs for secondary $i." + } + + set sdb [eval {berkdb_open -create -env} $env \ + [lindex $omethods $i] [lindex $argses $i] \ + $sbyteargs $snamebase.$i.db] + error_check_good second_open($i) [is_valid_db $sdb] TRUE + + error_check_good db_associate($i) \ + [$pdb associate [callback_n $i] $sdb] 0 + lappend sdbs $sdb + } + + puts "\tSi$tnum.a: Put loop" + set did [open $dict] + for { set n 0 } { [gets $did str] != -1 && $n < $nentries } { incr n } { + if { [is_record_based $pmethod] == 1 } { + set key [expr $n + 1] + set datum $str + } else { + set key $str + gets $did datum + } + set keys($n) $key + set data($n) [pad_data $pmethod $datum] + + set ret [eval {$pdb put}\ + {$key [chop_data $pmethod $datum]}] + error_check_good put($n) $ret 0 + } + close $did + + check_secondaries $pdb $sdbs $nentries keys data "Si$tnum.a" + + puts "\tSi$tnum.b: Put/overwrite loop" + for { set n 0 } { $n < $nentries } { incr n } { + set newd $data($n).$keys($n) + set ret [eval {$pdb put}\ + {$keys($n) [chop_data $pmethod $newd]}] + error_check_good put_overwrite($n) $ret 0 + set data($n) [pad_data $pmethod $newd] + } + check_secondaries\ + $pdb $sdbs $nentries keys data "Si$tnum.b" + + # Delete the second half of the entries through + # the primary. We do the second half so we can + # just pass keys(0 ... n/2) to check_secondaries. + set half [expr $nentries / 2] + puts "\tSi$tnum.c:\ + Primary delete loop: deleting $half entries" + for { set n $half } { $n < $nentries } { incr n } { + set ret [$pdb del $keys($n)] + error_check_good pdel($n) $ret 0 + } + check_secondaries\ + $pdb $sdbs $half keys data "Si$tnum.c" + + # Delete half of what's left through + # the first secondary. + set quar [expr $half / 2] + puts "\tSi$tnum.d:\ + Secondary delete loop: deleting $quar entries" + set sdb [lindex $sdbs 0] + set callback [callback_n 0] + for { set n $quar } { $n < $half } { incr n } { + set skey [$callback $keys($n)\ + [pad_data $pmethod $data($n)]] + set ret [$sdb del $skey] + error_check_good sdel($n) $ret 0 + } + check_secondaries\ + $pdb $sdbs $quar keys data "Si$tnum.d" + set left $quar + + # For queue and recno only, test append, adding back + # a quarter of the original number of entries. + if { [is_record_based $pmethod] == 1 && + [is_heap $pmethod] == 0 } { + set did [open $dict] + puts "\tSi$tnum.e:\ + Append loop: append $quar entries" + for { set n 0 } { $n < $nentries } { incr n } { + # Skip over dictionary entries we've + # already used. + gets $did str + } + for { set n $quar } \ + { [gets $did str] != -1 && $n < $half } \ + { incr n } { + set key [expr $n + 1] + set datum $str + set keys($n) $key + set data($n) [pad_data $pmethod $datum] + + set ret [eval {$pdb put} \ + {$key [chop_data $pmethod $datum]}] + error_check_good put($n) $ret 0 + } + close $did + + check_secondaries\ + $pdb $sdbs $half keys data "Si$tnum.e" + set left $half + } + + puts "\tSi$tnum.f:\ + Truncate primary, check secondaries are empty." + error_check_good truncate [$pdb truncate] $left + foreach sdb $sdbs { + set scursor [$sdb cursor] + error_check_good\ + db_cursor [is_substr $scursor $sdb] 1 + set ret [$scursor get -first] + error_check_good\ + sec_empty [string length $ret] 0 + error_check_good cursor_close [$scursor close] 0 + } + + + puts "\tSi$tnum.g: Closing/disassociating primary first" + error_check_good primary_close [$pdb close] 0 + foreach sdb $sdbs { + error_check_good secondary_close [$sdb close] 0 + } + + # Don't close the env if this test was given one. + # Skip the test of truncating the secondary since + # we can't close and reopen the outside env. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + + # Reopen with _noerr for test of + # truncate secondary. + puts "\tSi$tnum.h:\ + Truncate secondary (should fail)" + + set env [berkdb_env_noerr\ + -create -home $testdir] + error_check_good\ + env_open [is_valid_env $env] TRUE + + set pdb [eval {berkdb_open_noerr -create -env}\ + $env $pomethod $pargs $pname] + set sdb [eval {berkdb_open_noerr -create -env}\ + $env [lindex $omethods 0]\ + [lindex $argses 0] $snamebase.0.db ] + $pdb associate [callback_n 0] $sdb + + set ret [catch {$sdb truncate} ret] + error_check_good trunc_secondary $ret 1 + + error_check_good primary_close [$pdb close] 0 + error_check_good secondary_close [$sdb close] 0 + } + } + + # If this test made the last env, close it. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + } +} diff --git a/test/tcl/sijointest.tcl b/test/tcl/sijointest.tcl new file mode 100644 index 00000000..0a885d9a --- /dev/null +++ b/test/tcl/sijointest.tcl @@ -0,0 +1,179 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST sijointest: Secondary index and join test. +# TEST This used to be si005.tcl. +proc sijointest { methods {nentries 1000} args } { + source ./include.tcl + + # Primary method/args. + set pmethod [lindex $methods 0] + set pargs [convert_args $pmethod $args] + set pomethod [convert_method $pmethod] + + # Si005 does a join within a simulated database schema + # in which the primary index maps a record ID to a ZIP code and + # name in the form "XXXXXname", and there are two secondaries: + # one mapping ZIP to ID, the other mapping name to ID. + # The primary may be of any database type; the two secondaries + # must be either btree or hash. + + # Method/args for all the secondaries. If only one method + # was specified, assume the same method for the two secondaries. + set methods [lrange $methods 1 end] + if { [llength $methods] == 0 } { + for { set i 0 } { $i < 2 } { incr i } { + lappend methods $pmethod + } + } elseif { [llength $methods] != 2 } { + puts "FAIL: Sijoin requires exactly two secondaries." + return + } + + set argses [convert_argses $methods $args] + set omethods [convert_methods $methods] + + puts "Secondary index join test." + puts "sijoin \{\[ list $pmethod $methods \]\} $nentries" + env_cleanup $testdir + + set pname "sijoin-primary.db" + set zipname "sijoin-zip.db" + set namename "sijoin-name.db" + + # Open an environment + # XXX if one is not supplied! + set env [berkdb_env -create -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + + # Open the databases. + set pdb [eval {berkdb_open -create -env} $env $pomethod $pargs $pname] + error_check_good primary_open [is_valid_db $pdb] TRUE + + set zipdb [eval {berkdb_open -create -dup -env} $env \ + [lindex $omethods 0] [lindex $argses 0] $zipname] + error_check_good zip_open [is_valid_db $zipdb] TRUE + error_check_good zip_associate [$pdb associate sj_getzip $zipdb] 0 + + set namedb [eval {berkdb_open -create -dup -env} $env \ + [lindex $omethods 1] [lindex $argses 1] $namename] + error_check_good name_open [is_valid_db $namedb] TRUE + error_check_good name_associate [$pdb associate sj_getname $namedb] 0 + + puts "\tSijoin.a: Populate database with $nentries \"names\"" + sj_populate $pdb $nentries + puts "\tSijoin.b: Perform a join on each \"name\" and \"ZIP\"" + sj_jointest $pdb $zipdb $namedb + + error_check_good name_close [$namedb close] 0 + error_check_good zip_close [$zipdb close] 0 + error_check_good primary_close [$pdb close] 0 + error_check_good env_close [$env close] 0 +} + +proc sj_jointest { pdb zipdb namedb } { + set pdbc [$pdb cursor] + error_check_good pdb_cursor [is_valid_cursor $pdbc $pdb] TRUE + for { set dbt [$pdbc get -first] } { [llength $dbt] > 0 } \ + { set dbt [$pdbc get -next] } { + set item [lindex [lindex $dbt 0] 1] + set retlist [sj_dojoin $item $pdb $zipdb $namedb] + } +} + +proc sj_dojoin { item pdb zipdb namedb } { + set name [sj_getname "" $item] + set zip [sj_getzip "" $item] + + set zipc [$zipdb cursor] + error_check_good zipc($item) [is_valid_cursor $zipc $zipdb] TRUE + + set namec [$namedb cursor] + error_check_good namec($item) [is_valid_cursor $namec $namedb] TRUE + + set pc [$pdb cursor] + error_check_good pc($item) [is_valid_cursor $pc $pdb] TRUE + + set ret [$zipc get -set $zip] + set zd [lindex [lindex $ret 0] 1] + error_check_good zipset($zip) [sj_getzip "" $zd] $zip + + set ret [$namec get -set $name] + set nd [lindex [lindex $ret 0] 1] + error_check_good nameset($name) [sj_getname "" $nd] $name + + set joinc [$pdb join $zipc $namec] + + set anyreturned 0 + for { set dbt [$joinc get] } { [llength $dbt] > 0 } \ + { set dbt [$joinc get] } { + set ritem [lindex [lindex $dbt 0] 1] + error_check_good returned_item($item) $ritem $item + incr anyreturned + } + error_check_bad anyreturned($item) $anyreturned 0 + + error_check_good joinc_close($item) [$joinc close] 0 + error_check_good pc_close($item) [$pc close] 0 + error_check_good namec_close($item) [$namec close] 0 + error_check_good zipc_close($item) [$zipc close] 0 +} + +proc sj_populate { db nentries } { + global dict + + set did [open $dict] + for { set i 1 } { $i <= $nentries } { incr i } { + gets $did word + if { [string length $word] < 3 } { + gets $did word + if { [string length $word] < 3 } { + puts "FAIL:\ + unexpected pair of words < 3 chars long" + } + } + set datalist [sj_name2zips $word] + foreach data $datalist { + error_check_good db_put($data) [$db put $i $data$word] 0 + } + } + close $did +} + +proc sj_getzip { key data } { return [string range $data 0 4] } +proc sj_getname { key data } { return [string range $data 5 end] } + +# The dirty secret of this test is that the ZIP code is a function of the +# name, so we can generate a database and then verify join results easily +# without having to consult actual data. +# +# Any word passed into this function will generate from 1 to 26 ZIP +# entries, out of the set {00000, 01000 ... 99000}. The number of entries +# is just the position in the alphabet of the word's first letter; the +# entries are then hashed to the set {00, 01 ... 99} N different ways. +proc sj_name2zips { name } { + global alphabet + + set n [expr [string first [string index $name 0] $alphabet] + 1] + error_check_bad starts_with_abc($name) $n -1 + + set ret {} + for { set i 0 } { $i < $n } { incr i } { + set b 0 + for { set j 1 } { $j < [string length $name] } \ + { incr j } { + set b [sj_nhash $name $i $j $b] + } + lappend ret [format %05u [expr $b % 100]000] + } + return $ret +} +proc sj_nhash { name i j b } { + global alphabet + + set c [string first [string index $name $j] $alphabet'] + return [expr (($b * 991) + ($i * 997) + $c) % 10000000] +} diff --git a/test/tcl/siutils.tcl b/test/tcl/siutils.tcl new file mode 100644 index 00000000..fd85d293 --- /dev/null +++ b/test/tcl/siutils.tcl @@ -0,0 +1,292 @@ +#See the file LICENSE for redistribution information. +# +# Copyright (c) 2001, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Secondary index utilities. This file used to be known as +# sindex.tcl. +# +# The secondary index tests themselves live in si0*.tcl. +# +# Standard number of secondary indices to create if a single-element +# list of methods is passed into the secondary index tests. +global nsecondaries +set nsecondaries 2 + +# The callback function we use for each given secondary in most tests +# is a simple function of its place in the list of secondaries (0-based) +# and the access method (since recnos may need different callbacks). +# +# !!! +# Note that callbacks 0-3 return unique secondary keys if the input data +# are unique; callbacks 4 and higher may not, so don't use them with +# the normal wordlist and secondaries that don't support dups. +# The callbacks that incorporate a key don't work properly with recno +# access methods, at least not in the current test framework (the +# error_check_good lines test for e.g. 1foo, when the database has +# e.g. 0x010x000x000x00foo). +proc callback_n { n } { + switch $n { + 0 { return _s_reversedata } + 1 { return _s_noop } + 2 { return _s_concatkeydata } + 3 { return _s_concatdatakey } + 4 { return _s_reverseconcat } + 5 { return _s_truncdata } + 6 { return _s_constant } + 7 { return _s_twokeys } + 8 { return _s_variablekeys } + } + return _s_noop +} + +proc _s_noop { a b } { return $b } +proc _s_reversedata { a b } { return [reverse $b] } +proc _s_truncdata { a b } { return [string range $b 1 end] } +proc _s_concatkeydata { a b } { return $a$b } +proc _s_concatdatakey { a b } { return $b$a } +proc _s_reverseconcat { a b } { return [reverse $a$b] } +proc _s_constant { a b } { return "constant-data" } +proc _s_twokeys { a b } { return [list 1 2] } +proc _s_variablekeys { a b } { + set rlen [string length $b] + set result {} + for {set i 0} {$i < $rlen} {incr i} { + lappend $result $i + } + return $result +} + +# Should the check_secondary routines print lots of output? +set verbose_check_secondaries 0 + +# Given a primary database handle, a list of secondary handles, a +# number of entries, and arrays of keys and data, verify that all +# databases have what they ought to. +proc check_secondaries { pdb sdbs nentries keyarr dataarr {pref "Check"} \ + {errp NONE} {errs NONE} {errsg NONE}} { + upvar $keyarr keys + upvar $dataarr data + global verbose_check_secondaries + + if { [string compare $errp NONE] != 0 } { + upvar $errp errorp + } + set errorp 0 + if { [string compare $errs NONE] != 0 } { + upvar $errs errors + } + set errors 0 + if { [string compare $errsg NONE] != 0 } { + upvar $errsg errorsg + } + set errorsg 0 + # Make sure each key/data pair is in the primary. + if { $verbose_check_secondaries } { + puts "\t\t$pref.1: Each key/data pair is in the primary" + } + for { set i 0 } { $i < $nentries } { incr i } { + if { [string equal $errp NONE] } { + error_check_good pdb_get($i) [$pdb get $keys($i)] \ + [list [list $keys($i) $data($i)]] + } else { + set stat [catch {$pdb get $keys($i)} ret] + if { $stat == 1 } { + set errorp $ret + break + } else { + error_check_good pdb_get($i) $ret \ + [list [list $keys($i) $data($i)]] + } + } + } + + for { set j 0 } { $j < [llength $sdbs] } { incr j } { + # Make sure each key/data pair is in this secondary. + if { $verbose_check_secondaries } { + puts "\t\t$pref.2:\ + Each skey/key/data tuple is in secondary #$j" + } + set sdb [lindex $sdbs $j] + set nskeys 0 + for { set i 0 } { $i < $nentries } { incr i } { + set skeys [[callback_n $j] $keys($i) $data($i)] + if { [llength $skeys] == 0 } { + set skeys [list $skeys] + } + foreach skey $skeys { + incr nskeys + # Check with pget on the secondary. + set stat [catch {$sdb pget -get_both \ + $skey $keys($i)} ret] + if { [string equal $errs NONE] } { + error_check_good stat $stat 0 + error_check_good sdb($j)_pget($i) $ret \ + [list [list \ + $skey $keys($i) $data($i)]] + } else { + if { $stat == 1 } { + set errors $ret + } else { + error_check_good \ + sdb($j)_pget($i) $ret \ + [list [list \ + $skey $keys($i) $data($i)]] + } + } + # Check again with get on the secondary. Since + # get_both is not an allowed option with get on + # a secondary handle, we can't guarantee an + # exact match on method 5 and over. We just + # make sure that one of the returned key/data + # pairs is the right one. + if { $j >= 5 } { + error_check_good sdb($j)_get($i) \ + [is_substr [$sdb get $skey] \ + [list [list $skey $data($i)]]] 1 + } else { + set stat [catch {$sdb get $skey} ret] + if { [string equal $errs NONE] } { + error_check_good \ + sdb($j)_get($i) $ret \ + [list [list \ + $skey $data($i)]] + } else { + if { $stat == 1 } { + set errorsg $ret + break + } else { + error_check_good \ + sdb($j)_get($i) \ + $ret [list [list \ + $skey $data($i)]] + } + } + } + # + # We couldn't break above because we need to + # execute the errorsg error as well. + # + if { $errors != 0 } { + break + } + } + } + if { $errors != 0 || $errorsg != 0 } { + break + } + + # Make sure this secondary contains only $nskeys + # items. + if { $verbose_check_secondaries } { + puts "\t\t$pref.3: Secondary #$j has $nskeys items" + } + set dbc [$sdb cursor] + error_check_good dbc($i) \ + [is_valid_cursor $dbc $sdb] TRUE + for { set k 0 } { [llength [$dbc get -next]] > 0 } \ + { incr k } { } + error_check_good numitems($i) $k $nskeys + error_check_good dbc($i)_close [$dbc close] 0 + } + if { $errorp != 0 || $errors != 0 || $errorsg != 0 } { + return + } + + if { $verbose_check_secondaries } { + puts "\t\t$pref.4: Primary has $nentries items" + } + set dbc [$pdb cursor] + error_check_good pdbc [is_valid_cursor $dbc $pdb] TRUE + for { set k 0 } { [llength [$dbc get -next]] > 0 } { incr k } { } + error_check_good numitems $k $nentries + error_check_good pdbc_close [$dbc close] 0 +} + +# Given a primary database handle and a list of secondary handles, walk +# through the primary and make sure all the secondaries are correct, +# then walk through the secondaries and make sure the primary is correct. +# +# This is slightly less rigorous than the normal check_secondaries--we +# use it whenever we don't have up-to-date "keys" and "data" arrays. +proc cursor_check_secondaries { pdb sdbs nentries { pref "Check" } } { + global verbose_check_secondaries + + # Make sure each key/data pair in the primary is in each secondary. + set pdbc [$pdb cursor] + error_check_good ccs_pdbc [is_valid_cursor $pdbc $pdb] TRUE + set i 0 + if { $verbose_check_secondaries } { + puts "\t\t$pref.1:\ + Key/data in primary => key/data in secondaries" + } + + for { set dbt [$pdbc get -first] } { [llength $dbt] > 0 } \ + { set dbt [$pdbc get -next] } { + incr i + set pkey [lindex [lindex $dbt 0] 0] + set pdata [lindex [lindex $dbt 0] 1] + for { set j 0 } { $j < [llength $sdbs] } { incr j } { + set sdb [lindex $sdbs $j] + # Check with pget. + foreach skey [[callback_n $j] $pkey $pdata] { + set sdbt [$sdb pget -get_both $skey $pkey] + error_check_good pkey($pkey,$j) \ + [lindex [lindex $sdbt 0] 1] $pkey + error_check_good pdata($pdata,$j) \ + [lindex [lindex $sdbt 0] 2] $pdata + } + } + } + error_check_good ccs_pdbc_close [$pdbc close] 0 + error_check_good primary_has_nentries $i $nentries + + for { set j 0 } { $j < [llength $sdbs] } { incr j } { + if { $verbose_check_secondaries } { + puts "\t\t$pref.2:\ + Key/data in secondary #$j => key/data in primary" + } + set sdb [lindex $sdbs $j] + set sdbc [$sdb cursor] + error_check_good ccs_sdbc($j) [is_valid_cursor $sdbc $sdb] TRUE + for { set dbt [$sdbc pget -first] } { [llength $dbt] > 0 } \ + { set dbt [$sdbc pget -next] } { + set pkey [lindex [lindex $dbt 0] 1] + set pdata [lindex [lindex $dbt 0] 2] + error_check_good pdb_get($pkey/$pdata,$j) \ + [$pdb get -get_both $pkey $pdata] \ + [list [list $pkey $pdata]] + } + + # To exercise pget -last/pget -prev, we do it backwards too. + for { set dbt [$sdbc pget -last] } { [llength $dbt] > 0 } \ + { set dbt [$sdbc pget -prev] } { + set pkey [lindex [lindex $dbt 0] 1] + set pdata [lindex [lindex $dbt 0] 2] + error_check_good pdb_get_bkwds($pkey/$pdata,$j) \ + [$pdb get -get_both $pkey $pdata] \ + [list [list $pkey $pdata]] + } + + error_check_good ccs_sdbc_close($j) [$sdbc close] 0 + } +} + +# The secondary index tests take a list of the access methods that +# each array ought to use. Convert at one blow into a list of converted +# argses and omethods for each method in the list. +proc convert_argses { methods largs } { + set ret {} + foreach m $methods { + lappend ret [convert_args $m $largs] + } + return $ret +} +proc convert_methods { methods } { + set ret {} + foreach m $methods { + lappend ret [convert_method $m] + } + return $ret +} diff --git a/test/tcl/sysscript.tcl b/test/tcl/sysscript.tcl new file mode 100644 index 00000000..c17488bd --- /dev/null +++ b/test/tcl/sysscript.tcl @@ -0,0 +1,282 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# System integration test script. +# This script runs a single process that tests the full functionality of +# the system. The database under test contains nfiles files. Each process +# randomly generates a key and some data. Both keys and data are bimodally +# distributed between small keys (1-10 characters) and large keys (the avg +# length is indicated via the command line parameter. +# The process then decides on a replication factor between 1 and nfiles. +# It writes the key and data to that many files and tacks on the file ids +# of the files it writes to the data string. For example, let's say that +# I randomly generate the key dog and data cat. Then I pick a replication +# factor of 3. I pick 3 files from the set of n (say 1, 3, and 5). I then +# rewrite the data as 1:3:5:cat. I begin a transaction, add the key/data +# pair to each file and then commit. Notice that I may generate replication +# of the form 1:3:3:cat in which case I simply add a duplicate to file 3. +# +# Usage: sysscript dir nfiles key_avg data_avg +# +# dir: DB_HOME directory +# nfiles: number of files in the set +# key_avg: average big key size +# data_avg: average big data size + +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl + +set mypid [pid] + +set usage "sysscript dir nfiles key_avg data_avg method args" + +# Verify usage +if { $argc < 5 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +puts [concat "Argc: " $argc " Argv: " $argv] + +# Initialize arguments +set dir [lindex $argv 0] +set nfiles [ lindex $argv 1 ] +set key_avg [ lindex $argv 2 ] +set data_avg [ lindex $argv 3 ] +set method [ lindex $argv 4 ] +set args [ lindex $argv 5 ] + +# Initialize seed +global rand_init +berkdb srand $rand_init + +puts "Beginning execution for $mypid" +puts "$dir DB_HOME" +puts "$nfiles files" +puts "$key_avg average key length" +puts "$data_avg average data length" + +flush stdout + +# Create local environment +set dbenv [berkdb_env -txn -home $dir] +set err [catch {error_check_good $mypid:dbenv [is_substr $dbenv env] 1} ret] +if {$err != 0} { + puts $ret + return +} + +# Now open the files +for { set i 0 } { $i < $nfiles } { incr i } { + set file test044.$i.db + set db_set($i) [eval {berkdb open} -auto_commit -env $dbenv $args $method $file ] + set err [catch {error_check_bad $mypid:dbopen $db_set($i) NULL} ret] + if {$err != 0} { + puts $ret + return + } + set err [catch {error_check_bad $mypid:dbopen [is_substr $db_set($i) \ + error] 1} ret] + if {$err != 0} { + puts $ret + return + } +} + +set record_based [is_record_based $method] +while { 1 } { + # Decide if we're going to create a big key or a small key + # We give small keys a 70% chance. + if { [berkdb random_int 1 10] < 8 } { + set k [random_data 5 0 0 $record_based] + } else { + set k [random_data $key_avg 0 0 $record_based] + } + set data [chop_data $method [random_data $data_avg 0 0]] + + set txn [$dbenv txn] + set err [catch {error_check_good $mypid:txn_begin [is_substr $txn \ + $dbenv.txn] 1} ret] + if {$err != 0} { + puts $ret + return + } + + # Open cursors + for { set f 0 } {$f < $nfiles} {incr f} { + set cursors($f) [$db_set($f) cursor -txn $txn] + set err [catch {error_check_good $mypid:cursor_open \ + [is_substr $cursors($f) $db_set($f)] 1} ret] + if {$err != 0} { + puts $ret + return + } + } + set aborted 0 + + # Check to see if key is already in database + set found 0 + for { set i 0 } { $i < $nfiles } { incr i } { + set r [$db_set($i) get -txn $txn $k] + set r [$db_set($i) get -txn $txn $k] + if { $r == "-1" } { + for {set f 0 } {$f < $nfiles} {incr f} { + set err [catch {error_check_good \ + $mypid:cursor_close \ + [$cursors($f) close] 0} ret] + if {$err != 0} { + puts $ret + return + } + } + set err [catch {error_check_good $mypid:txn_abort \ + [$txn abort] 0} ret] + if {$err != 0} { + puts $ret + return + } + set aborted 1 + set found 2 + break + } elseif { $r != "Key $k not found." } { + set found 1 + break + } + } + switch $found { + 2 { + # Transaction aborted, no need to do anything. + } + 0 { + # Key was not found, decide how much to replicate + # and then create a list of that many file IDs. + set repl [berkdb random_int 1 $nfiles] + set fset "" + for { set i 0 } { $i < $repl } {incr i} { + set f [berkdb random_int 0 [expr $nfiles - 1]] + lappend fset $f + set data [chop_data $method $f:$data] + } + + foreach i $fset { + set r [$db_set($i) put -txn $txn $k $data] + if {$r == "-1"} { + for {set f 0 } {$f < $nfiles} {incr f} { + set err [catch {error_check_good \ + $mypid:cursor_close \ + [$cursors($f) close] 0} ret] + if {$err != 0} { + puts $ret + return + } + } + set err [catch {error_check_good \ + $mypid:txn_abort [$txn abort] 0} ret] + if {$err != 0} { + puts $ret + return + } + set aborted 1 + break + } + } + } + 1 { + # Key was found. Make sure that all the data values + # look good. + set f [zero_list $nfiles] + set data $r + while { [set ndx [string first : $r]] != -1 } { + set fnum [string range $r 0 [expr $ndx - 1]] + if { [lindex $f $fnum] == 0 } { + #set flag -set + set full [record $cursors($fnum) get -set $k] + } else { + #set flag -next + set full [record $cursors($fnum) get -next] + } + if {[llength $full] == 0} { + for {set f 0 } {$f < $nfiles} {incr f} { + set err [catch {error_check_good \ + $mypid:cursor_close \ + [$cursors($f) close] 0} ret] + if {$err != 0} { + puts $ret + return + } + } + set err [catch {error_check_good \ + $mypid:txn_abort [$txn abort] 0} ret] + if {$err != 0} { + puts $ret + return + } + set aborted 1 + break + } + set err [catch {error_check_bad \ + $mypid:curs_get($k,$data,$fnum,$flag) \ + [string length $full] 0} ret] + if {$err != 0} { + puts $ret + return + } + set key [lindex [lindex $full 0] 0] + set rec [pad_data $method [lindex [lindex $full 0] 1]] + set err [catch {error_check_good \ + $mypid:dbget_$fnum:key $key $k} ret] + if {$err != 0} { + puts $ret + return + } + set err [catch {error_check_good \ + $mypid:dbget_$fnum:data($k) $rec $data} ret] + if {$err != 0} { + puts $ret + return + } + set f [lreplace $f $fnum $fnum 1] + incr ndx + set r [string range $r $ndx end] + } + } + } + if { $aborted == 0 } { + for {set f 0 } {$f < $nfiles} {incr f} { + set err [catch {error_check_good $mypid:cursor_close \ + [$cursors($f) close] 0} ret] + if {$err != 0} { + puts $ret + return + } + } + set err [catch {error_check_good $mypid:commit [$txn commit] \ + 0} ret] + if {$err != 0} { + puts $ret + return + } + } +} + +# Close files +for { set i 0 } { $i < $nfiles} { incr i } { + set r [$db_set($i) close] + set err [catch {error_check_good $mypid:db_close:$i $r 0} ret] + if {$err != 0} { + puts $ret + return + } +} + +# Close tm and environment +$dbenv close + +puts "[timestamp] [pid] Complete" +flush stdout + +filecheck $file 0 diff --git a/test/tcl/t106script.tcl b/test/tcl/t106script.tcl new file mode 100644 index 00000000..34485505 --- /dev/null +++ b/test/tcl/t106script.tcl @@ -0,0 +1,331 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# + +proc t106_initial { nitems nprod id tnum dbenv order args } { + source ./include.tcl + + set pid [pid] + puts "\tTest$tnum: Producer $pid initializing DBs" + + # Each producer initially loads a small number of items to + # each btree database, then enters a RMW loop where it randomly + # selects and executes a cursor operations which either: + # 1. Read-modify-write an item in db2; or + # 2. Read-modify-write an item in both db2 and db3, randomly + # selecting between db2 and db3 on which to open first, which to + # read first, which to write first, which to close first. This + # may create deadlocks so keep trying until it's successful. + + # Open queue database + set dbq [eval {berkdb_open -create -queue -env $dbenv\ + -auto_commit -len 32 queue.db} ] + error_check_good dbq_open [is_valid_db $dbq] TRUE + + # Open four btree databases + set db1 [berkdb_open \ + -create -btree -env $dbenv -auto_commit testfile1.db] + error_check_good db1_open [is_valid_db $db1] TRUE + set db2 [berkdb_open \ + -create -btree -env $dbenv -auto_commit testfile2.db] + error_check_good db2_open [is_valid_db $db2] TRUE + set db3 [berkdb_open \ + -create -btree -env $dbenv -auto_commit testfile3.db] + error_check_good db3_open [is_valid_db $db3] TRUE + set db4 [berkdb_open \ + -create -btree -env $dbenv -auto_commit testfile4.db] + error_check_good db4_open [is_valid_db $db4] TRUE + + # Initialize databases with $nitems items from each producer. + set did [open $dict] + for { set i 1 } { $i <= $nitems } { incr i } { + set db2data [read $did [berkdb random_int 300 700]] + set db3data [read $did [berkdb random_int 500 1000]] + set qdata [read $did 32] + set suffix _0_$i + set db23key "testclient$id$suffix" + set suffix _$i + set db4key key$id$suffix + + set t [$dbenv txn] + set txn "-txn $t" + error_check_good db2_put [eval {$db2 put} $txn\ + {$db23key $db2data}] 0 + error_check_good db3_put [eval {$db3 put} $txn\ + {$db23key $db3data}] 0 + error_check_good db4_put [eval {$db4 put} $txn\ + {$db4key $db23key}] 0 + + set c [$dbenv txn -parent $t] + set ctxn "-txn $c" + set qrecno [eval {$dbq put -append} $ctxn {$qdata}] + error_check_good db1_put [eval {$db1 put} $ctxn\ + {$qrecno $db2data}] 0 + error_check_good commit_child [$c commit] 0 + error_check_good commit_parent [$t commit] 0 + } + close $did + + set ret [catch {$dbq close} res] + error_check_good dbq_close:$pid $ret 0 + set ret [catch {$db1 close} res] + error_check_good db1_close:$pid $ret 0 + set ret [catch {$db2 close} res] + error_check_good db2_close:$pid $ret 0 + set ret [catch {$db3 close} res] + error_check_good db3_close:$pid $ret 0 + set ret [catch {$db4 close} res] + error_check_good db4_close:$pid $ret 0 + + puts "\t\tTest$tnum: Initializer $pid finished." +} + +proc t106_produce { nitems nprod id tnum dbenv order niter args } { + source ./include.tcl + + set pid [pid] + set did [open $dict] + puts "\tTest$tnum: Producer $pid initializing DBs" + + # Open queue database + set dbq [eval {berkdb_open -create -queue -env $dbenv\ + -auto_commit -len 32 queue.db} ] + error_check_good dbq_open [is_valid_db $dbq] TRUE + + # Open four btree databases + set db1 [berkdb_open \ + -create -btree -env $dbenv -auto_commit testfile1.db] + error_check_good db1_open [is_valid_db $db1] TRUE + set db2 [berkdb_open \ + -create -btree -env $dbenv -auto_commit testfile2.db] + error_check_good db2_open [is_valid_db $db2] TRUE + set db3 [berkdb_open \ + -create -btree -env $dbenv -auto_commit testfile3.db] + error_check_good db3_open [is_valid_db $db3] TRUE + set db4 [berkdb_open \ + -create -btree -env $dbenv -auto_commit testfile4.db] + error_check_good db4_open [is_valid_db $db4] TRUE + + # Now go into RMW phase. + for { set i 1 } { $i <= $niter } { incr i } { + + set op [berkdb random_int 1 2] + set newdb2data [read $did [berkdb random_int 300 700]] + set qdata [read $did 32] + + if { $order == "ordered" } { + set n [expr $i % $nitems] + if { $n == 0 } { + set n $nitems + } + set suffix _0_$n + } else { + # Retrieve a random key from the list + set suffix _0_[berkdb random_int 1 $nitems] + } + set key "testclient$id$suffix" + + set t [$dbenv txn] + set txn "-txn $t" + + # Now execute op1 or op2 + if { $op == 1 } { + op1 $db2 $key $newdb2data $txn + } elseif { $op == 2 } { + set newdb3data [read $did [berkdb random_int 500 1000]] + op2 $db2 $db3 $key $newdb2data $newdb3data $txn $dbenv + } else { + puts "FAIL: unrecogized op $op" + } + set c [$dbenv txn -parent $t] + set ctxn "-txn $c" + set qrecno [eval {$dbq put -append} $ctxn {$qdata}] + error_check_good db1_put [eval {$db1 put} $ctxn\ + {$qrecno $newdb2data}] 0 + error_check_good child_commit [$c commit] 0 + error_check_good parent_commit [$t commit] 0 + } + close $did + + set ret [catch {$dbq close} res] + error_check_good dbq_close:$pid $ret 0 + set ret [catch {$db1 close} res] + error_check_good db1_close:$pid $ret 0 + set ret [catch {$db2 close} res] + error_check_good db2_close:$pid $ret 0 + set ret [catch {$db3 close} res] + error_check_good db3_close:$pid $ret 0 + set ret [catch {$db4 close} res] + error_check_good db4_close:$pid $ret 0 + + puts "\t\tTest$tnum: Producer $pid finished." +} + +proc t106_consume { nitems tnum outputfile mode dbenv niter args } { + source ./include.tcl + set pid [pid] + puts "\tTest$tnum: Consumer $pid starting ($niter iterations)." + + # Open queue database and btree database 1. + set dbq [eval {berkdb_open \ + -create -queue -env $dbenv -auto_commit -len 32 queue.db} ] + error_check_good dbq_open:$pid [is_valid_db $dbq] TRUE + + set db1 [eval {berkdb_open \ + -create -btree -env $dbenv -auto_commit testfile1.db} ] + error_check_good db1_open:$pid [is_valid_db $db1] TRUE + + set oid [open $outputfile a] + + for { set i 1 } { $i <= $nitems } {incr i } { + set t [$dbenv txn] + set txn "-txn $t" + set ret [eval {$dbq get $mode} $txn] + set qrecno [lindex [lindex $ret 0] 0] + set db1curs [eval {$db1 cursor} $txn] + if {[catch {eval $db1curs get -set -rmw $qrecno} res]} { + puts "FAIL: $db1curs get: $res" + } + error_check_good db1curs_del [$db1curs del] 0 + error_check_good db1curs_close [$db1curs close] 0 + error_check_good txn_commit [$t commit] 0 + } + + error_check_good output_close:$pid [close $oid] "" + + set ret [catch {$dbq close} res] + error_check_good dbq_close:$pid $ret 0 + set ret [catch {$db1 close} res] + error_check_good db1_close:$pid $ret 0 + puts "\t\tTest$tnum: Consumer $pid finished." +} + +# op1 overwrites one data item in db2. +proc op1 { db2 key newdata txn } { + + set db2c [eval {$db2 cursor} $txn] +puts "in op1, key is $key" + set ret [eval {$db2c get -set -rmw $key}] + # Make sure we retrieved something + error_check_good db2c_get [llength $ret] 1 + error_check_good db2c_put [eval {$db2c put} -current {$newdata}] 0 + error_check_good db2c_close [$db2c close] 0 +} + +# op 2 +proc op2 { db2 db3 key newdata2 newdata3 txn dbenv } { + + # Randomly choose whether to work on db2 or db3 first for + # each operation: open cursor, get, put, close. + set open1 [berkdb random_int 0 1] + set get1 [berkdb random_int 0 1] + set put1 [berkdb random_int 0 1] + set close1 [berkdb random_int 0 1] +puts "open [expr $open1 + 2] first, get [expr $get1 + 2] first,\ + put [expr $put1 + 2] first, close [expr $close1 + 2] first" +puts "in op2, key is $key" + + # Open cursor + if { $open1 == 0 } { + set db2c [eval {$db2 cursor} $txn] + set db3c [eval {$db3 cursor} $txn] + } else { + set db3c [eval {$db3 cursor} $txn] + set db2c [eval {$db2 cursor} $txn] + } + error_check_good db2_cursor [is_valid_cursor $db2c $db2] TRUE + error_check_good db3_cursor [is_valid_cursor $db3c $db3] TRUE + + # Do the following until we succeed and don't get DB_DEADLOCK: + if { $get1 == 0 } { + get_set_rmw $db2c $key $dbenv + get_set_rmw $db3c $key $dbenv + } else { + get_set_rmw $db3c $key $dbenv + get_set_rmw $db2c $key $dbenv + } + + # Put new data. + if { $put1 == 0 } { + error_check_good db2c_put [eval {$db2c put} \ + -current {$newdata2}] 0 + error_check_good db3c_put [eval {$db3c put} \ + -current {$newdata3}] 0 + } else { + error_check_good db3c_put [eval {$db3c put} \ + -current {$newdata3}] 0 + error_check_good db2c_put [eval {$db2c put} \ + -current {$newdata2}] 0 + } + if { $close1 == 0 } { + error_check_good db2c_close [$db2c close] 0 + error_check_good db3c_close [$db3c close] 0 + } else { + error_check_good db3c_close [$db3c close] 0 + error_check_good db2c_close [$db2c close] 0 + } +} + +proc get_set_rmw { dbcursor key dbenv } { + + while { 1 } { + if {[catch {set ret [eval {$dbcursor get -set -rmw} $key]}\ + res ]} { + # If the get failed, break if it failed for any + # reason other than deadlock. If we have deadlock, + # the deadlock detector should break the deadlock + # as we keep trying. + if { [is_substr $res DB_LOCK_DEADLOCK] != 1 } { + puts "FAIL: get_set_rmw: $res" + break + } + } else { + # We succeeded. Go back to the body of the test. + break + } + } +} + +source ./include.tcl +source $test_path/test.tcl + +# Verify usage +set usage "t106script.tcl dir runtype nitems nprod outputfile id tnum order" +if { $argc < 10 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set dir [lindex $argv 0] +set runtype [lindex $argv 1] +set nitems [lindex $argv 2] +set nprod [lindex $argv 3] +set outputfile [lindex $argv 4] +set id [lindex $argv 5] +set tnum [lindex $argv 6] +set order [lindex $argv 7] +set niter [lindex $argv 8] +# args is the string "{ -len 20 -pad 0}", so we need to extract the +# " -len 20 -pad 0" part. +set args [lindex [lrange $argv 9 end] 0] + +# Open env +set dbenv [berkdb_env -home $dir -txn] +error_check_good dbenv_open [is_valid_env $dbenv] TRUE + +# Invoke initial, produce or consume based on $runtype +if { $runtype == "INITIAL" } { + t106_initial $nitems $nprod $id $tnum $dbenv $order $args +} elseif { $runtype == "PRODUCE" } { + t106_produce $nitems $nprod $id $tnum $dbenv $order $niter $args +} elseif { $runtype == "WAIT" } { + t106_consume $nitems $tnum $outputfile -consume_wait $dbenv $args +} else { + error_check_good bad_args $runtype "either PRODUCE, or WAIT" +} +error_check_good env_close [$dbenv close] 0 +exit diff --git a/test/tcl/test.tcl b/test/tcl/test.tcl new file mode 100644 index 00000000..2d698a92 --- /dev/null +++ b/test/tcl/test.tcl @@ -0,0 +1,2879 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ + +source ./include.tcl + +# Add the default Windows build sub-directory to the path, so that +# the binaries can be found without copies. +if {[string match Win* $tcl_platform(os)]} { + global env + global buildpath + set env(PATH) "$env(PATH)\;$buildpath" +} + +# Load DB's TCL API. +load $tcllib + +# Check for existing files that might interfere with testing. +set badfiles [glob -nocomplain DB_CONFIG __db.*] +if { [llength $badfiles] > 0 } { + error "=====\nPlease move or delete these files from the current\ + directory: \n$badfiles \nThey can cause test failures.\n=====" +} + +if { [file exists $testdir] != 1 } { + file mkdir $testdir +} + +global __debug_print +global __debug_on +global __debug_test + +# +# Test if utilities work to figure out the path. Most systems +# use ., but QNX has a problem with execvp of shell scripts which +# causes it to break. +# +set stat [catch {exec ./db_printlog -?} ret] +if { [string first "exec format error" $ret] != -1 } { + set util_path ./.libs +} else { + set util_path . +} +set __debug_print 0 +set encrypt 0 +set old_encrypt 0 +set passwd test_passwd + +# Error stream that (should!) always go to the console, even if we're +# redirecting to ALL.OUT. +set consoleerr stderr + +set dict $test_path/wordlist +set alphabet "abcdefghijklmnopqrstuvwxyz" +set datastr "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + +# Random number seed. +global rand_init +set rand_init 11302005 + +# Default record length for fixed record length access method(s) +set fixed_len 20 + +set recd_debug 0 +set log_log_record_types 0 +set ohandles {} + +# Normally, we're not running an all-tests-in-one-env run. This matters +# for error stream/error prefix settings in berkdb_open. +global is_envmethod +set is_envmethod 0 + +# +# Set when we're running a child process in a rep test. +# +global is_repchild +set is_repchild 0 + +# Set when we want to use replication test messaging that cannot +# share an env -- for example, because the replication processes +# are not all from the same BDB version. +global noenv_messaging +set noenv_messaging 0 + +# For testing locker id wrap around. +global lock_curid +global lock_maxid +set lock_curid 0 +set lock_maxid 2147483647 +global txn_curid +global txn_maxid +set txn_curid 2147483648 +set txn_maxid 4294967295 + +# The variable one_test allows us to run all the permutations +# of a test with run_all or run_std. +global one_test +if { [info exists one_test] != 1 } { + set one_test "ALL" +} + +# If you call a test with the proc find_valid_methods, it will +# return the list of methods for which it will run, instead of +# actually running. +# Btree and recno are always built, but hash, heap, and queue +# can be disabled, so verify that they are there before adding +# them to the list. +source $test_path/testutils.tcl +global checking_valid_methods +set checking_valid_methods 0 +global valid_methods +set valid_methods { btree rbtree recno frecno rrecno } + +source $test_path/testutils.tcl +set conf [berkdb getconfig] +if { [is_substr $conf "queue"] } { + lappend valid_methods "queue" + lappend valid_methods "queueext" +} +if { [is_substr $conf "hash"] } { + lappend valid_methods "hash" +} +if { [is_substr $conf "heap"] } { + lappend valid_methods "heap" +} + +# The variable test_recopts controls whether we open envs in +# replication tests with the -recover flag. The default is +# to test with and without the flag, but to run a meaningful +# subset of rep tests more quickly, rep_subset will randomly +# pick one or the other. +global test_recopts +set test_recopts { "-recover" "" } + +# Set up any OS-specific values. +global tcl_platform +set is_freebsd_test [string match FreeBSD $tcl_platform(os)] +set is_hp_test [string match HP-UX $tcl_platform(os)] +set is_linux_test [string match Linux $tcl_platform(os)] +set is_osx_test [string match Darwin $tcl_platform(os)] +set is_qnx_test [string match QNX $tcl_platform(os)] +set is_sunos_test [string match SunOS $tcl_platform(os)] +set is_windows_test [string match Win* $tcl_platform(os)] +set is_windows9x_test [string match "Windows 95" $tcl_platform(osVersion)] +set is_je_test 0 +set upgrade_be [big_endian] +global is_fat32 +set is_fat32 [string match FAT32 [lindex [file system check] 1]] +global EXE BAT +if { $is_windows_test == 1 } { + set EXE ".exe" + set BAT ".bat" +} else { + set EXE "" + set BAT "" +} + +if { $is_windows_test == 1 } { + set util_path "./$buildpath" +} + +# This is where the test numbering and parameters now live. +source $test_path/testparams.tcl +source $test_path/db_reptest.tcl + +# Try to open an encrypted database. If it fails, this release +# doesn't support encryption, and encryption tests should be skipped. +set has_crypto 1 +set stat [catch {set db [eval {berkdb_open_noerr \ + -create -btree -encryptaes test_passwd} ] } result ] +if { $stat != 0 } { + # Make sure it's the right error for a non-crypto release. + error_check_good non_crypto_release \ + [expr [is_substr $result "operation not supported"] || \ + [is_substr $result "invalid argument"]] 1 + set has_crypto 0 +} else { + # It is a crypto release. Get rid of the db, we don't need it. + error_check_good close_encrypted_db [$db close] 0 +} + +# Get the default page size of this system +global default_pagesize +set db [berkdb_open_noerr -create -btree] +error_check_good "db open" [is_valid_db $db] TRUE +set stat [catch {set default_pagesize [$db get_pagesize]} result] +error_check_good "db get_pagesize" $stat 0 +error_check_good "db close" [$db close] 0 + +# From here on out, test.tcl contains the procs that are used to +# run all or part of the test suite. + +proc run_std { { testname ALL } args } { + global test_names + global one_test + global has_crypto + global valid_methods + source ./include.tcl + + set one_test $testname + if { $one_test != "ALL" } { + # Source testparams again to adjust test_names. + source $test_path/testparams.tcl + } + + set exflgs [eval extractflags $args] + set args [lindex $exflgs 0] + set flags [lindex $exflgs 1] + + set display 1 + set run 1 + set am_only 0 + set no_am 0 + set std_only 1 + set rflags {--} + foreach f $flags { + switch $f { + A { + set std_only 0 + } + M { + set no_am 1 + puts "run_std: all but access method tests." + } + m { + set am_only 1 + puts "run_std: access method tests only." + } + n { + set display 1 + set run 0 + set rflags [linsert $rflags 0 "-n"] + } + } + } + + if { $std_only == 1 } { + fileremove -f ALL.OUT + + set o [open ALL.OUT a] + if { $run == 1 } { + puts -nonewline "Test suite run started at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + puts [berkdb version -string] + + puts -nonewline $o "Test suite run started at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + puts $o [berkdb version -string] + } + close $o + } + + set test_list { + {"environment" "env"} + {"archive" "archive"} + {"backup" "backup"} + {"file operations" "fop"} + {"locking" "lock"} + {"logging" "log"} + {"memory pool" "memp"} + {"mutex" "mutex"} + {"transaction" "txn"} + {"deadlock detection" "dead"} + {"subdatabase" "sdb"} + {"byte-order" "byte"} + {"recno backing file" "rsrc"} + {"DBM interface" "dbm"} + {"NDBM interface" "ndbm"} + {"Hsearch interface" "hsearch"} + {"secondary index" "sindex"} + {"partition" "partition"} + {"compression" "compressed"} + {"automated repmgr tests" "auto_repmgr"} + {"other repmgr tests" "other_repmgr"} + {"repmgr multi-process" "multi_repmgr"} + } + + # If this is run_std only, run each rep test for a single + # access method. If run_all, run for all access methods. + if { $std_only == 1 } { + lappend test_list {"replication" "rep_subset"} + } else { + lappend test_list {"replication" "rep_complete"} + } + + # If release supports encryption, run security tests. + if { $has_crypto == 1 } { + lappend test_list {"security" "sec"} + } + + if { $am_only == 0 } { + foreach pair $test_list { + set msg [lindex $pair 0] + set cmd [lindex $pair 1] + puts "Running $msg tests" + if [catch {exec $tclsh_path << \ + "global one_test; set one_test $one_test; \ + source $test_path/test.tcl; r $rflags $cmd" \ + >>& ALL.OUT } res] { + set o [open ALL.OUT a] + puts $o "FAIL: $cmd test: $res" + close $o + } + } + + # Run recovery tests. + # + # XXX These too are broken into separate tclsh instantiations + # so we don't require so much memory, but I think it's cleaner + # and more useful to do it down inside proc r than here, + # since "r recd" gets done a lot and needs to work. + # + # Note that we still wrap the test in an exec so that + # its output goes to ALL.OUT. run_recd will wrap each test + # so that both error streams go to stdout (which here goes + # to ALL.OUT); information that run_recd wishes to print + # to the "real" stderr, but outside the wrapping for each test, + # such as which tests are being skipped, it can still send to + # stderr. + puts "Running recovery tests" + if [catch { + exec $tclsh_path << \ + "global one_test; set one_test $one_test; \ + source $test_path/test.tcl; r $rflags recd" \ + 2>@ stderr >> ALL.OUT + } res] { + set o [open ALL.OUT a] + puts $o "FAIL: recd tests: $res" + close $o + } + + # Run join test + # + # XXX + # Broken up into separate tclsh instantiations so we don't + # require so much memory. + if { $one_test == "ALL" } { + puts "Running join test" + foreach test "join1 join2 join3 join4 join5 join6" { + if [catch {exec $tclsh_path << \ + "source $test_path/test.tcl; r $rflags $test" \ + >>& ALL.OUT } res] { + set o [open ALL.OUT a] + puts $o "FAIL: $test test: $res" + close $o + } + } + } + } + + if { $no_am == 0 } { + # Access method tests. + # + # XXX + # Broken up into separate tclsh instantiations so we don't + # require so much memory. + foreach method $valid_methods { + puts "Running $method tests" + foreach test $test_names(test) { + if { $run == 0 } { + set o [open ALL.OUT a] + run_method \ + -$method $test $display $run $o + close $o + } + if { $run } { + if [catch {exec $tclsh_path << \ + "global one_test; \ + set one_test $one_test; \ + source $test_path/test.tcl; \ + run_method \ + -$method $test $display $run"\ + >>& ALL.OUT } res] { + set o [open ALL.OUT a] + puts $o "FAIL:$test $method: $res" + close $o + } + } + } + } + } + + # If not actually running, no need to check for failure. + # If running in the context of the larger 'run_all' we don't + # check for failure here either. + if { $run == 0 || $std_only == 0 } { + return + } + + set failed [check_output ALL.OUT] + + set o [open ALL.OUT a] + if { $failed == 0 } { + puts "Regression Tests Succeeded" + puts $o "Regression Tests Succeeded" + } else { + puts "Regression Tests Failed" + puts "Check UNEXPECTED OUTPUT lines." + puts "Review ALL.OUT.x for details." + puts $o "Regression Tests Failed" + } + + puts -nonewline "Test suite run completed at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + puts -nonewline $o "Test suite run completed at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + close $o +} + +proc check_output { file } { + # These are all the acceptable patterns. + set pattern {(?x) + ^[:space:]*$| + .*?wrap\.tcl.*| + .*?dbscript\.tcl.*| + .*?ddscript\.tcl.*| + .*?db_replicate.*| + .*?Freeing\slog\sinformation\s.*| + .*?Freeing\smutex\s.*| + .*?Freeing\sread\slocks\s.*| + .*?lt-db_replicate.*| + .*?mpoolscript\.tcl.*| + ^\d\d:\d\d:\d\d\s\(\d\d:\d\d:\d\d\)$| + ^\d\d:\d\d:\d\d\s\(\d\d:\d\d:\d\d\)\sCrashing$| + ^\d\d:\d\d:\d\d\s\(\d\d:\d\d:\d\d\)\s[p|P]rocesses\srunning:.*| + ^\d\d:\d\d:\d\d\s\(\d\d:\d\d:\d\d\)\s5\sprocesses\srunning.*| + ^\d:\sPut\s\d*\sstrings\srandom\soffsets.*| + ^100.*| + ^basic_repmgr_.*\swith:| + ^eval\s.*| + ^exec\s.*| + ^jointest.*$| + ^r\sarchive\s*| + ^r\sbackup\s*| + ^r\sdbm\s*| + ^r\shsearch\s*| + ^r\sndbm\s*| + ^run_recd:\s.*| + ^run_reptest\s.*| + ^run_secenv:\s.*| + ^All\sprocesses\shave\sexited.$| + ^Backuptest\s.*| + ^Beginning\scycle\s\d$| + ^Berkeley\sDB\s.*| + ^Byteorder:.*| + ^Child\sruns\scomplete\.\s\sParent\smodifies\sdata\.$| + ^Deadlock\sdetector:\s\d*\sCheckpoint\sdaemon\s\d*$| + ^Ending\srecord.*| + ^Environment\s.*?specified;\s\sskipping\.$| + ^Executing\srecord\s.*| + ^Join\stest:\.*| + ^Method:\s.*| + ^Putting\s.*databases.*| + ^Repl:\stest\d\d\d:.*| + ^Repl:\ssdb\d\d\d:.*| + ^Running\stest\ssdb.*| + ^Running\stest\stest.*| + ^Running\sall\scases\sof\s.*| + ^run_inmem_db\s.*rep.*| + ^run_inmem_log\s.*rep.*| + ^run_mixedmode_log\s.*rep.*| + ^Script\swatcher\sprocess\s.*| + ^Secondary\sindex\sjoin\s.*| + ^Test\ssuite\srun\s.*| + ^Test\s.*rep.*| + ^To\sreproduce\sthis\scase:.*| + ^Unlinking\slog:\serror\smessage\sOK$| + ^Verifying\s.*| + ^\t*\.\.\.dbc->get.*$| + ^\t*\.\.\.dbc->put.*$| + ^\t*\.\.\.key\s\d.*$| + ^\t*\.\.\.Skipping\sdbc.*| + ^\t*and\s\d*\sduplicate\sduplicates\.$| + ^\t*About\sto\srun\srecovery\s.*complete$| + ^\t*Add\sa\sthird\sversion\s.*| + ^\t*Archive[:\.].*| + ^\t*Backuptest.*| + ^\t*Basic\srepmgr\s.*test.*:.*| + ^\t*Bigfile[0-9][0-9][0-9].*| + ^\t*Building\s.*| + ^\t*bulk\sprocessing.*| + ^\t*closing\ssecondaries\.$| + ^\t*Command\sexecuted\sand\s.*$| + ^\t*DBM.*| + ^\t*[d|D]ead[0-9][0-9][0-9].*| + ^\t*Dump\/load\sof.*| + ^\t*[e|E]nv[0-9][0-9][0-9].*| + ^\t*Executing\scommand$| + ^\t*Executing\stxn_.*| + ^\t*File\srecd005\.\d\.db\sexecuted\sand\saborted\.$| + ^\t*File\srecd005\.\d\.db\sexecuted\sand\scommitted\.$| + ^\t*[f|F]op[0-9][0-9][0-9].*| + ^\t*HSEARCH.*| + ^\t*in-memory\s.*| + ^\t*Initial\sCheckpoint$| + ^\t*Iteration\s\d*:\sCheckpointing\.$| + ^\t*Joining:\s.*| + ^\t*Kid[1|2]\sabort\.\.\.complete$| + ^\t*Kid[1|2]\scommit\.\.\.complete$| + ^\t*[l|L]ock[0-9][0-9][0-9].*| + ^\t*[l|L]og[0-9][0-9][0-9].*| + ^\t*[m|M]emp[0-9][0-9][0-9].*| + ^\t*[m|M]ut[0-9][0-9][0-9].*| + ^\t*NDBM.*| + ^\t*no\speering| + ^\t*on-disk\s.*| + ^\t*opening\ssecondaries\.$| + ^\t*op_recover_rec:\sRunning\srecovery.*| + ^\t*peering| + ^\t*[r|R]ecd[0-9][0-9][0-9].*| + ^\t*[r|R]ep[0-9][0-9][0-9].*| + ^\t*[r|R]epmgr[0-9][0-9][0-9].*| + ^\t*[r|R]ep_push.*| + ^\t*[r|R]ep_test.*| + ^\t*[r|R]pc[0-9][0-9][0-9].*| + ^\t*[r|R]src[0-9][0-9][0-9].*| + ^\t*Recover\sfrom\sfirst\sdatabase$| + ^\t*Recover\sfrom\ssecond\sdatabase$| + ^\t*regular\sprocessing.*| + ^\t*Remove\ssecond\sdb$| + ^\t*Rep_verify.*| + ^\t*Running\srecovery\son\s.*| + ^\t*[s|S]ec[0-9][0-9][0-9].*| + ^\t*[s|S]i[0-9][0-9][0-9].*| + ^\t*[s|S]ijoin.*| + ^\t*Salvage\stests\sof.*| + ^\t*sdb[0-9][0-9][0-9].*| + ^\t*Skipping\s.*| + ^\t*Subdb[0-9][0-9][0-9].*| + ^\t*Subdbtest[0-9][0-9][0-9].*| + ^\t*Syncing$| + ^\t*[t|T]est[0-9][0-9][0-9].*| + ^\t*[t|T]xn[0-9][0-9][0-9].*| + ^\t*Txnscript.*| + ^\t*Using\s.*option.*$| + ^\t*Using\s.*?\senvironment\.$| + ^\t*Verification\sof.*| + ^\t*with\stransactions$} + + set failed 0 + set f [open $file r] + while { [gets $f line] >= 0 } { + if { [regexp $pattern $line] == 0 } { + puts -nonewline "UNEXPECTED OUTPUT: " + puts $line + set failed 1 + } + } + close $f + return $failed +} + +proc r { args } { + global test_names + global has_crypto + global rand_init + global one_test + global test_recopts + global checking_valid_methods + + source ./include.tcl + + set exflgs [eval extractflags $args] + set args [lindex $exflgs 0] + set flags [lindex $exflgs 1] + + set display 1 + set run 1 + set saveflags "--" + foreach f $flags { + switch $f { + n { + set display 1 + set run 0 + set saveflags "-n $saveflags" + } + } + } + + if {[catch { + set sub [ lindex $args 0 ] + set starttest [lindex $args 1] + switch $sub { + auto_repmgr - + bigfile - + dead - + env - + lock - + log - + memp - + multi_repmgr - + mutex - + other_repmgr - + rsrc - + sdbtest - + txn { + if { $display } { + run_subsystem $sub 1 0 $starttest + } + if { $run } { + run_subsystem $sub 0 1 $starttest + } + } + byte { + if { $one_test == "ALL" } { + run_test byteorder $display $run + } + } + archive - + backup - + dbm - + hsearch - + ndbm - + shelltest { + if { $one_test == "ALL" } { + if { $display } { puts "eval $sub" } + if { $run } { + check_handles + eval $sub + } + } + } + compact - + elect - + inmemdb - + init - + fop { + set tindx [lsearch $test_names($sub) $starttest] + if { $tindx == -1 } { + set tindx 0 + } + set rlist [lrange $test_names($sub) $tindx end] + foreach test $rlist { + eval run_test $test $display $run + } + } + compressed { + set tindex [lsearch $test_names(test) $starttest] + if { $tindex == -1 } { + set tindex 0 + } + set clist [lrange $test_names(test) $tindex end] + set skip_list [list test126 test127 test128 \ + test129 test131 test132 test133 test134] + foreach stest $skip_list { + set sindx [lsearch -exact $clist $stest] + if {$sindx == -1} { + continue + } + set clist [lreplace $clist $sindx $sindx] + } + foreach test $clist { + eval run_compressed btree $test $display $run + } + } + join { + eval r $saveflags join1 + eval r $saveflags join2 + eval r $saveflags join3 + eval r $saveflags join4 + eval r $saveflags join5 + eval r $saveflags join6 + } + join1 { + if { $display } { puts "eval jointest" } + if { $run } { + check_handles + eval jointest + } + } + joinbench { + puts "[timestamp]" + eval r $saveflags join1 + eval r $saveflags join2 + puts "[timestamp]" + } + join2 { + if { $display } { puts "eval jointest 512" } + if { $run } { + check_handles + eval jointest 512 + } + } + join3 { + if { $display } { + puts "eval jointest 8192 0 -join_item" + } + if { $run } { + check_handles + eval jointest 8192 0 -join_item + } + } + join4 { + if { $display } { puts "eval jointest 8192 2" } + if { $run } { + check_handles + eval jointest 8192 2 + } + } + join5 { + if { $display } { puts "eval jointest 8192 3" } + if { $run } { + check_handles + eval jointest 8192 3 + } + } + join6 { + if { $display } { puts "eval jointest 512 3" } + if { $run } { + check_handles + eval jointest 512 3 + } + } + partition { + foreach method { btree hash } { + foreach test "$test_names(recd)\ + $test_names(test)" { + run_range_partition\ + $test $method $display $run + run_partition_callback\ + $test $method $display $run + } + } + } + recd { + check_handles + eval {run_recds all $run $display} [lrange $args 1 end] + } + rep { + run_rep_subset rep $starttest $testdir \ + $display $run $args + } + repmgr { + r other_repmgr + foreach test $test_names(basic_repmgr) { + $test 100 1 1 1 1 1 + $test 100 1 0 0 0 0 + $test 100 0 1 0 0 0 + $test 100 0 0 1 0 0 + $test 100 0 0 0 1 0 + $test 100 0 0 0 0 1 + $test 100 0 0 0 0 0 + } + } + rep_commit { + run_rep_subset rep_commit $starttest $testdir \ + $display $run $args + r repmgr + } + # To run a subset of the complete rep tests, use + # rep_subset, which randomly picks an access type to + # use, and randomly picks whether to open envs with + # the -recover flag. + rep_subset { + run_rep_subset rep $starttest $testdir \ + $display $run $args + } + rep_complete { + set tindex [lsearch $test_names(rep) $starttest] + if { $tindex == -1 } { + set tindex 0 + } + set rlist [lrange $test_names(rep) $tindex end] + foreach test $rlist { + run_test $test $display $run + } + if { $one_test == "ALL" } { + if { $display } { + #puts "basic_db_reptest" + #puts "basic_db_reptest 1" + } + if { $run } { + #basic_db_reptest + #basic_db_reptest 1 + } + } + } + replicate { + # We seed the random number generator here + # instead of in run_replicate so that we + # aren't always reusing the first few + # responses from random_int. + # + berkdb srand $rand_init + foreach sub { test sdb } { + foreach test $test_names($sub) { + eval run_test run_replicate \ + $display $run $test + } + } + } + repmethod { + # We seed the random number generator here + # instead of in run_repmethod so that we + # aren't always reusing the first few + # responses from random_int. + # + berkdb srand $rand_init + foreach sub { test sdb } { + foreach test $test_names($sub) { + eval run_test run_repmethod \ + $display $run $test + } + } + } + sec { + # Skip secure mode tests if release + # does not support encryption. + if { $has_crypto == 0 } { + return + } + if { $display } { + run_subsystem $sub 1 0 + } + if { $run } { + run_subsystem $sub 0 1 + } + } + secmethod { + # Skip secure mode tests if release + # does not support encryption. + if { $has_crypto == 0 } { + return + } + foreach test $test_names(test) { + eval run_test run_secmethod \ + $display $run $test + eval run_test run_secenv \ + $display $run $test + } + } + sdb { + if { $one_test == "ALL" } { + if { $display } { + run_subsystem sdbtest 1 0 + } + if { $run } { + run_subsystem sdbtest 0 1 + } + } + foreach test $test_names(sdb) { + eval run_test $test $display $run + } + } + sindex { + if { $one_test == "ALL" } { + if { $display } { + sindex 1 0 + sijoin 1 0 + } + if { $run } { + sindex 0 1 + sijoin 0 1 + } + } + } + btree - + rbtree - + hash - + iqueue - + iqueueext - + queue - + queueext - + recno - + frecno - + heap - + rrecno { + foreach test $test_names(test) { + eval run_method [lindex $args 0] $test \ + $display $run stdout [lrange $args 1 end] + } + } + + default { + error \ + "FAIL:[timestamp] r: $args: unknown command" + } + } + flush stdout + flush stderr + } res] != 0} { + global errorInfo; + set fnl [string first "\n" $errorInfo] + set theError [string range $errorInfo 0 [expr $fnl - 1]] + if {[string first FAIL $errorInfo] == -1} { + error "FAIL:[timestamp] r: $args: $theError" + } else { + error $theError; + } + } +} + +proc run_rep_subset { sub starttest testdir display run args } { + global one_test + global rand_init + global test_names + + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + berkdb srand $rand_init + set tindex [lsearch $test_names($sub) $starttest] + if { $tindex == -1 } { + set tindex 0 + } + set rlist [lrange $test_names($sub) $tindex end] + foreach test $rlist { + set random_recopt [berkdb random_int 0 1] + if { $random_recopt == 1 } { + set test_recopts "-recover" + } else { + set test_recopts {""} + } + + set method_list [find_valid_methods $test] + set list_length [expr [llength $method_list] - 1] + set method_index [berkdb random_int 0 $list_length] + set rand_method [lindex $method_list $method_index] + + if { $display } { + puts "eval $test $rand_method; verify_dir \ + $testdir \"\" 1 0 $nodump; salvage_dir $testdir 1" + } + if { $run } { + check_handles + eval $test $rand_method + verify_dir $testdir "" 1 0 $nodump + salvage_dir $testdir 1 + } + } + if { $one_test == "ALL" } { + if { $display } { + #puts "basic_db_reptest" + #puts "basic_db_reptest 1" + } + if { $run } { + #basic_db_reptest + #basic_db_reptest 1 + } + } + set test_recopts { "-recover" "" } +} + +proc run_subsystem { sub {display 0} {run 1} {starttest "NULL"} } { + global test_names + global databases_in_memory + + if { [info exists test_names($sub)] != 1 } { + puts stderr "Subsystem $sub has no tests specified in\ + testparams.tcl; skipping." + return + } + + set index [lsearch $test_names($sub) $starttest] + if { $index == -1 } { + set index 0 + } + set testlist [lrange $test_names($sub) $index end] + + foreach test $testlist { + if { $display } { + puts "eval $test" + } + if { $run } { + check_handles + if {[catch {eval $test} ret] != 0 } { + set databases_in_memory 0 + error "FAIL: run_subsystem: $sub $test: \ + $ret" + } + } + } +} + +proc run_test { test {display 0} {run 1} args } { + source ./include.tcl + global valid_methods + + foreach method $valid_methods { + if { $display } { + puts "eval $test -$method $args; \ + verify_dir $testdir \"\" 1; \ + salvage_dir $testdir 1" + } + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + if { $run } { + check_handles + eval {$test -$method} $args + verify_dir $testdir "" 1 0 $nodump + salvage_dir $testdir 1 + } + } +} + +proc run_method { method test {display 0} {run 1} \ + { outfile stdout } args } { + global __debug_on + global __debug_print + global __debug_test + global test_names + global parms + source ./include.tcl + + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + + if {[catch { + if { $display } { + puts -nonewline $outfile "eval \{ $test \} $method" + puts -nonewline $outfile " $parms($test) { $args }" + puts -nonewline $outfile " ; verify_dir $testdir \"\" 1 0 $nodump" + puts $outfile " ; salvage_dir $testdir 1" + } + if { $run } { + check_handles $outfile + puts $outfile "[timestamp]" + eval {$test} $method $parms($test) $args + if { $__debug_print != 0 } { + puts $outfile "" + } + # Verify all databases the test leaves behind + verify_dir $testdir "" 1 0 $nodump + if { $__debug_on != 0 } { + debug $__debug_test + } + salvage_dir $testdir 1 + } + flush stdout + flush stderr + } res] != 0} { + global errorInfo; + + set fnl [string first "\n" $errorInfo] + set theError [string range $errorInfo 0 [expr $fnl - 1]] + if {[string first FAIL $errorInfo] == -1} { + error "FAIL:[timestamp]\ + run_method: $method $test: $theError" + } else { + error $theError; + } + } +} + +# Run a testNNN or recdNNN test with range partitioning. +proc run_range_partition { test method {display 0} {run 1}\ + {outfile stdout} args } { + + # The only allowed access method for range partitioning is btree. + if { [is_btree $method] == 0 } { + if { $display == 0 } { + puts "Skipping range partition\ + tests for method $method" + } + return + } + + # If we've passed in explicit partitioning args, use them; + # otherwise set them. This particular selection hits some + # interesting cases where we set the key to "key". + set largs $args + if { [is_partitioned $args] == 0 } { + lappend largs -partition {ab cd key key1 zzz} + } + + if { [string first recd $test] == 0 } { + eval {run_recd $method $test $run $display} $largs + } elseif { [string first test $test] == 0 } { + eval {run_method $method $test $display $run $outfile} $largs + } else { + puts "Skipping test $test with range partitioning." + } +} + +# Run a testNNN or recdNNN test with partition callbacks. +proc run_partition_callback { test method {display 0} {run 1}\ + {outfile stdout} args } { + + # The only allowed access methods are btree and hash. + if { [is_btree $method] == 0 && [is_hash $method] == 0 } { + if { $display == 0 } { + puts "Skipping partition callback tests\ + for method $method" + } + return + } + + # If we've passed in explicit partitioning args, use them; + # otherwise set them. + set largs $args + if { [is_partition_callback $args] == 0 } { + lappend largs -partition_callback 5 part + } + + if { [string first recd $test] == 0 } { + eval {run_recd $method $test $run $display} $largs + } elseif { [string first test $test] == 0 } { + eval {run_method $method $test $display $run $outfile} $largs + } else { + puts "Skipping test $test with partition callbacks." + } +} + +# +# Run method tests for btree only using compression. +# +proc run_compressed { method test {display 0} {run 1} \ + { outfile stdout } args } { + + if { [is_btree $method] == 0 } { + puts "Skipping compression test for method $method." + return + } + + set largs $args + append largs " -compress " + eval run_method $method $test $display $run $outfile $largs +} + +# +# Run method tests in secure mode. +# +proc run_secmethod { method test {display 0} {run 1} \ + { outfile stdout } args } { + global passwd + global has_crypto + + # Skip secure mode tests if release does not support encryption. + if { $has_crypto == 0 } { + return + } + + set largs $args + append largs " -encryptaes $passwd " + eval run_method $method $test $display $run $outfile $largs +} + +# +# Run method tests each in its own, new secure environment. +# +proc run_secenv { method test {largs ""} } { + global __debug_on + global __debug_print + global __debug_test + global is_envmethod + global has_crypto + global test_names + global parms + global passwd + source ./include.tcl + + # Skip secure mode tests if release does not support encryption. + if { $has_crypto == 0 } { + return + } + + puts "run_secenv: $method $test $largs" + + set save_largs $largs + env_cleanup $testdir + set is_envmethod 1 + set stat [catch { + check_handles + set env [eval {berkdb_env -create -mode 0644 -home $testdir \ + -encryptaes $passwd -pagesize 512 -cachesize {0 4194304 1}}] + error_check_good env_open [is_valid_env $env] TRUE + append largs " -env $env " + + puts "[timestamp]" + if { [info exists parms($test)] != 1 } { + puts stderr "$test disabled in\ + testparams.tcl; skipping." + continue + } + + # + # Run each test multiple times in the secure env. + # Once with a secure env + clear database + # Once with a secure env + secure database + # + eval $test $method $parms($test) $largs + append largs " -encrypt " + eval $test $method $parms($test) $largs + + if { $__debug_print != 0 } { + puts "" + } + if { $__debug_on != 0 } { + debug $__debug_test + } + flush stdout + flush stderr + set largs $save_largs + error_check_good envclose [$env close] 0 + error_check_good envremove [berkdb envremove \ + -home $testdir -encryptaes $passwd] 0 + } res] + if { $stat != 0} { + global errorInfo; + + set fnl [string first "\n" $errorInfo] + set theError [string range $errorInfo 0 [expr $fnl - 1]] + if {[string first FAIL $errorInfo] == -1} { + error "FAIL:[timestamp]\ + run_secenv: $method $test: $theError" + } else { + error $theError; + } + set is_envmethod 0 + } + +} + +# +# Run replication method tests in master and client env. +# This proc runs a specific test/method using the db_replicate utility. +# +proc run_replicate_test { method test {nsites 2} {largs "" } } { + source ./include.tcl + + global __debug_on + global __debug_print + global __debug_test + global errorInfo + global has_crypto + global is_envmethod + global masterdir + global parms + global passwd + global rep_verbose + global repenv + global verbose_type + + puts "run_replicate_test $method $test $nsites $largs" + + # Test124 can't be run under reptest because we delete all + # the test files at the end of the test to avoid triggering + # verification failures (it uses a non-standard sort). + + if { $test == "test124" } { + puts "Skipping test124 under run_replicate" + return + } + + set verbargs "" + if { $rep_verbose == 1 } { + set verbargs " -verbose {$verbose_type on}" + } + set do_sec 0 + env_cleanup $testdir + set is_envmethod 1 + + # Some tests that use a small db pagesize need a small + # mpool pagesize as well -- otherwise we'll run out of + # mutexes. First determine the natural pagesize, so + # that can be used in the normal case, then adjust where + # needed. + + set tmpenv [berkdb_env -create -home $testdir] + set pg [$tmpenv get_mp_pagesize] + error_check_good env_close [$tmpenv close] 0 + berkdb envremove -home $testdir + + set small_pagesize_tests [list test035 test096 test112 test113 test114] + if { [lsearch -exact $small_pagesize_tests $test] != -1 } { + set pg 512 + } + + # + # Set log smaller than default to force changing files, + # but big enough so that the tests that use binary files + # as keys/data can run. Increase the size of the log region -- + # sdb004 needs this, now that subdatabase names are stored + # in the env region. + # + # All the settings below will be the same for all sites in the group. + # + set logmax [expr 3 * 1024 * 1024] + set lockmax 40000 + set logregion 2097152 + + # + # TODO: Turn on crypto and test with that. Off for now. + # + if { $do_sec && $has_crypto } { + set envargs "-encryptaes $passwd" + append largs " -encrypt " + } else { + set envargs "" + } + check_handles + set last_site [expr $nsites - 1] + set winner [berkdb random_int 0 $last_site] + for { set i 0 } { $i < $nsites } { incr i } { + set repdir($i) $testdir/ENV$i + file mkdir $repdir($i) + if { $i == $winner } { + set pri 10 + } else { + set pri [berkdb random_int 0 1] + } + replicate_make_config $repdir($i) $nsites $i $pri + set envcmd($i) "berkdb_env_noerr -create -log_max $logmax \ + $envargs -rep -home $repdir($i) -txn -thread -pagesize $pg \ + -log_regionmax $logregion -lock_max_objects $lockmax \ + -lock_max_locks $lockmax -errpfx $repdir($i) $verbargs" + set env($i) [eval $envcmd($i)] + error_check_good env_open($i) [is_valid_env $env($i)] TRUE + } + + # + # Now that we have all of the envs opened, we can start db_replicate + # in each one too. Afterward, we check for which site is master. + # + for { set i 0 } { $i < $nsites } { incr i } { + set dpid($i) [eval {exec $util_path/db_replicate -t 3 \ + -h $repdir($i)} -L $testdir/LOG$i &] + puts "Started db_replicate $repdir($i): $dpid($i)" + } + + # + # Wait for enough sites to start and elect someone master. + # For now assume that once the master is elected, all sites + # have started up and we don't have any laggards. If that + # seems to be a problem we could loop checking whether every + # single env knows this master and is at the right LSN. + # + puts "run_replicate_test: Wait for repmgr to elect a master." + await_expected_master $env($winner) 30 + + set masterdir $repdir($winner) + # + # Set up list of client env handles for later checking + # and verification. Skip the master env. + # + set j 0 + set repenv(master) $env($winner) + for { set i 0 } { $i < $nsites } { incr i } { + if { $winner != $i } { + set repenv($j) $env($i) + incr j + } + } + puts "run_replicate_test: Found master at $repdir($winner)" + # + # Give a few seconds for the clients to sync with the master + # before we begin blasting at them. If we don't pause here, + # we otherwise will race with the db_replicate process that is + # in rep_start and our test will fail with DB_LOCK_DEADLOCK. + # This pause gives the group a chance to quiesce. + # + tclsleep 5 + + # + # We went through all that so that we can append '-env masterenv' + # to the largs for the test. Clobber the 30-second anti-archive + # timer in case the test we're about to run wants to do any log + # archiving, database renaming and/or removal. + # + $env($winner) test force noarchive_timeout + append largs " -env $env($winner) " + + # + # Now run the actual test. + # + set stat [catch { + puts "[timestamp]" + if { [info exists parms($test)] != 1 } { + puts stderr "$test disabled in\ + testparams.tcl; skipping." + continue + } + + puts -nonewline "Replicate: $test: $nsites sites " + if { $do_sec } { + puts -nonewline " with security;" + } else { + puts -nonewline " no security;" + } + puts "" + + eval $test $method $parms($test) $largs + + if { $__debug_print != 0 } { + puts "" + } + if { $__debug_on != 0 } { + debug $__debug_test + } + flush stdout + flush stderr + } res] + # + # Test is over. We must kill the db_replicate processes no matter + # whether there was an error or not. + # And we must close the envs. We save the original errorInfo + # because it could be overwritten by tclkill. + # + puts "Replicate: $test: Done ($stat). Wait and kill db_replicate." + set save_errInfo $errorInfo + tclsleep 10 + # + # We kill all the clients first then kill the master. If we + # just kill them in order, and kill the master first, the others + # may complete an election and the processes get killed in the + # middle of recovery, thus leaving the env locked out which is + # a problem in the verify phase. + # + for { set i 0 } { $i < $nsites } { incr i } { + if { $i != $winner } { + tclkill $dpid($i) + } + } + tclsleep 2 + tclkill $dpid($winner) + if { $stat != 0} { + for { set i 0 } { $i < $nsites } { incr i } { + catch { $env($i) close } ignore + } + + puts "Error result string: $res" + set fnl [string first "\n" $save_errInfo] + set theError [string range $save_errInfo 0 [expr $fnl - 1]] + if {[string first FAIL $save_errInfo] == -1} { + error "FAIL:[timestamp]\ + run_reptest: $method $test: $theError" + } else { + error $theError; + } + } else { + repl_envver0 $test $method [expr $nsites - 1] + for { set i 0 } { $i < $nsites } { incr i } { + catch { $env($i) close } ignore + } + } + + set is_envmethod 0 +} + +# +# Run replication method tests in master and client env. +# This proc runs a specific test/method with our own message handling. +# +proc run_reptest { method test {droppct 0} {nclients 1} {do_del 0} \ + {do_sec 0} {do_oob 0} {largs "" } } { + source ./include.tcl + + global __debug_on + global __debug_print + global __debug_test + global is_envmethod + global parms + global passwd + global has_crypto + + puts "run_reptest \ + $method $test $droppct $nclients $do_del $do_sec $do_oob $largs" + + # Test124 can't be run under reptest because we delete all + # the test files at the end of the test to avoid triggering + # verification failures (it uses a non-standard sort). + + if { $test == "test124" } { + puts "Skipping test124 under run_repmethod" + return + } + + env_cleanup $testdir + set is_envmethod 1 + set stat [catch { + if { $do_sec && $has_crypto } { + set envargs "-encryptaes $passwd" + append largs " -encrypt " + } else { + set envargs "" + } + check_handles + # + # This will set up the master and client envs + # and will return us the args to pass to the + # test. + + set largs [repl_envsetup \ + $envargs $largs $test $nclients $droppct $do_oob] + + puts "[timestamp]" + if { [info exists parms($test)] != 1 } { + puts stderr "$test disabled in\ + testparams.tcl; skipping." + continue + } + + puts -nonewline \ + "Repl: $test: dropping $droppct%, $nclients clients " + if { $do_del } { + puts -nonewline " with delete verification;" + } else { + puts -nonewline " no delete verification;" + } + if { $do_sec } { + puts -nonewline " with security;" + } else { + puts -nonewline " no security;" + } + if { $do_oob } { + puts -nonewline " with out-of-order msgs;" + } else { + puts -nonewline " no out-of-order msgs;" + } + puts "" + + eval $test $method $parms($test) $largs + + if { $__debug_print != 0 } { + puts "" + } + if { $__debug_on != 0 } { + debug $__debug_test + } + flush stdout + flush stderr + repl_envprocq $test $nclients $do_oob + repl_envver0 $test $method $nclients + if { $do_del } { + repl_verdel $test $method $nclients + } + repl_envclose $test $envargs + } res] + if { $stat != 0} { + global errorInfo; + + set fnl [string first "\n" $errorInfo] + set theError [string range $errorInfo 0 [expr $fnl - 1]] + if {[string first FAIL $errorInfo] == -1} { + error "FAIL:[timestamp]\ + run_reptest: $method $test: $theError" + } else { + error $theError; + } + } + set is_envmethod 0 +} + +# +# Run replication method tests in master and client env. +# Wrapper to run db_replicate utility test. +# +proc run_replicate { method test {nums 0} {display 0} {run 1} \ + {outfile stdout} {largs ""} } { + source ./include.tcl + + set save_largs $largs + env_cleanup $testdir + + # + # Run 2 sites 40%, 3 sites 40%, 4 sites 10%, 5 sites 10% + set site_list { 2 2 2 2 3 3 3 3 4 5 } + set s_len [expr [llength $site_list] - 1] + + if { $nums == 0 } { + set sindex [berkdb random_int 0 $s_len] + set nsites [lindex $site_list $sindex] + } else { + set nsites $nums + } + + if { $display == 1 } { + puts $outfile "eval run_replicate_test $method $test \ + $nsites $largs" + } + if { $run == 1 } { + run_replicate_test $method $test $nsites $largs + } +} + +# +# Run replication method tests in master and client env. +# Wrapper to run a test on a replicated group. +# +proc run_repmethod { method test {numcl 0} {display 0} {run 1} \ + {outfile stdout} {largs ""} } { + source ./include.tcl + + global __debug_on + global __debug_print + global __debug_test + global is_envmethod + global test_names + global parms + global has_crypto + global passwd + + set save_largs $largs + env_cleanup $testdir + + # Use an array for number of clients because we really don't + # want to evenly-weight all numbers of clients. Favor smaller + # numbers but test more clients occasionally. + set drop_list { 0 0 0 0 0 1 1 5 5 10 20 } + set drop_len [expr [llength $drop_list] - 1] + set client_list { 1 1 2 1 1 1 2 2 3 1 } + set cl_len [expr [llength $client_list] - 1] + + if { $numcl == 0 } { + set clindex [berkdb random_int 0 $cl_len] + set nclients [lindex $client_list $clindex] + } else { + set nclients $numcl + } + set drindex [berkdb random_int 0 $drop_len] + set droppct [lindex $drop_list $drindex] + + # Do not drop messages on Windows. Since we can't set + # re-request times with less than millisecond precision, + # dropping messages will cause test failures. + if { $is_windows_test == 1 } { + set droppct 0 + } + + set do_sec [berkdb random_int 0 1] + set do_oob [berkdb random_int 0 1] + + # Test130 cannot run with delete verification. [#18944] + if { $test == "test130" } { + set do_del 0 + } else { + set do_del [berkdb random_int 0 1] + } + + if { $display == 1 } { + puts $outfile "eval run_reptest $method $test $droppct \ + $nclients $do_del $do_sec $do_oob $largs" + } + if { $run == 1 } { + run_reptest $method $test $droppct $nclients $do_del \ + $do_sec $do_oob $largs + } +} + +# +# Run method tests, each in its own, new environment. (As opposed to +# run_envmethod1 which runs all the tests in a single environment.) +# +proc run_envmethod { method test {display 0} {run 1} {outfile stdout} \ + { largs "" } } { + global __debug_on + global __debug_print + global __debug_test + global is_envmethod + global test_names + global parms + source ./include.tcl + + set save_largs $largs + set envargs "" + + # Enlarge the logging region by default - sdb004 needs this because + # it uses very long subdb names, and the names are stored in the + # env region. + set logargs " -log_regionmax 2057152 " + + # Enlarge the cache by default - some compaction tests need it. + set cacheargs "-cachesize {0 4194304 1} -pagesize 512" + env_cleanup $testdir + + if { $display == 1 } { + if { $run == 0 } { + puts $outfile "eval run_envmethod $method $test 0 1 \ + stdout $largs; verify_log $testdir" + } else { + puts $outfile "eval run_envmethod $method \ + $test 0 1 stdout $largs" + } + } + + # To run a normal test using system memory, call run_envmethod + # with the flag -shm. + set sindex [lsearch -exact $largs "-shm"] + if { $sindex >= 0 } { + set shm_key 20 + if { [mem_chk " -system_mem -shm_key $shm_key"] == 1 } { + break + } else { + append envargs " -system_mem -shm_key $shm_key" + set largs [lreplace $largs $sindex $sindex] + } + } + + set sindex [lsearch -exact $largs "-log_max"] + if { $sindex >= 0 } { + append envargs " -log_max 100000 " + set largs [lreplace $largs $sindex $sindex] + } + + # Test for -thread option and pass to berkdb_env open. Leave in + # $largs because -thread can also be passed to an individual + # test as an arg. Double the number of lockers because a threaded + # env requires more than an ordinary env. + if { [lsearch -exact $largs "-thread"] != -1 } { + append envargs " -thread -lock_max_lockers 2000 " + } + + # Test for -alloc option and pass to berkdb_env open only. + # Remove from largs because -alloc is not an allowed test arg. + set aindex [lsearch -exact $largs "-alloc"] + if { $aindex >= 0 } { + append envargs " -alloc " + set largs [lreplace $largs $aindex $aindex] + } + + # We raise the number of locks and objects - there are a few + # compaction tests that require a large number. + set lockargs " -lock_max_locks 40000 -lock_max_objects 20000 " + + if { $run == 1 } { + set is_envmethod 1 + set stat [catch { + check_handles + set env [eval {berkdb_env -create -txn -mode 0644 \ + -home $testdir} $logargs $cacheargs $lockargs $envargs] + error_check_good env_open [is_valid_env $env] TRUE + append largs " -env $env " + + puts "[timestamp]" + if { [info exists parms($test)] != 1 } { + puts stderr "$test disabled in\ + testparams.tcl; skipping." + continue + } + eval $test $method $parms($test) $largs + + if { $__debug_print != 0 } { + puts "" + } + if { $__debug_on != 0 } { + debug $__debug_test + } + flush stdout + flush stderr + set largs $save_largs + error_check_good envclose [$env close] 0 +# error_check_good envremove [berkdb envremove \ +# -home $testdir] 0 + } res] + if { $stat != 0} { + global errorInfo; + + set fnl [string first "\n" $errorInfo] + set theError [string range $errorInfo 0 [expr $fnl - 1]] + if {[string first FAIL $errorInfo] == -1} { + error "FAIL:[timestamp]\ + run_envmethod: $method $test: $theError" + } else { + error $theError; + } + } + set is_envmethod 0 + } +} + +proc run_compact { method } { + source ./include.tcl + for {set tnum 111} {$tnum <= 115} {incr tnum} { + run_envmethod $method test$tnum 0 1 stdout -log_max + + puts "\tTest$tnum: Test Recovery" + set env1 [eval berkdb env -create -txn \ + -recover_fatal -home $testdir] + error_check_good env_close [$env1 close] 0 + error_check_good verify_dir \ + [verify_dir $testdir "" 0 0 1 ] 0 + puts "\tTest$tnum: Remove db and test Recovery" + exec sh -c "rm -f $testdir/*.db" + set env1 [eval berkdb env -create -txn \ + -recover_fatal -home $testdir] + error_check_good env_close [$env1 close] 0 + error_check_good verify_dir \ + [verify_dir $testdir "" 0 0 1 ] 0 + } +} + +proc run_recd { method test {run 1} {display 0} args } { + global __debug_on + global __debug_print + global __debug_test + global parms + global test_names + global log_log_record_types + global gen_upgrade_log + global upgrade_be + global upgrade_dir + global upgrade_method + global upgrade_name + source ./include.tcl + + if { $run == 1 } { + puts "run_recd: $method $test $parms($test) $args" + } + if {[catch { + if { $display } { + puts "eval { $test } $method $parms($test) { $args }" + } + if { $run } { + check_handles + set upgrade_method $method + set upgrade_name $test + puts "[timestamp]" + # By redirecting stdout to stdout, we make exec + # print output rather than simply returning it. + # By redirecting stderr to stdout too, we make + # sure everything winds up in the ALL.OUT file. + set ret [catch { exec $tclsh_path << \ + "source $test_path/test.tcl; \ + set log_log_record_types $log_log_record_types;\ + set gen_upgrade_log $gen_upgrade_log;\ + set upgrade_be $upgrade_be; \ + set upgrade_dir $upgrade_dir; \ + set upgrade_method $upgrade_method; \ + set upgrade_name $upgrade_name; \ + eval { $test } $method $parms($test) {$args}" \ + >&@ stdout + } res] + + # Don't die if the test failed; we want + # to just proceed. + if { $ret != 0 } { + puts "FAIL:[timestamp] $res" + } + + if { $__debug_print != 0 } { + puts "" + } + if { $__debug_on != 0 } { + debug $__debug_test + } + flush stdout + flush stderr + } + } res] != 0} { + global errorInfo; + + set fnl [string first "\n" $errorInfo] + set theError [string range $errorInfo 0 [expr $fnl - 1]] + if {[string first FAIL $errorInfo] == -1} { + error "FAIL:[timestamp]\ + run_recd: $method: $theError" + } else { + error $theError; + } + } +} + +proc recds {method args} { + eval {run_recds $method 1 0} $args +} + +proc run_recds {{run_methods "all"} {run 1} {display 0} args } { + source ./include.tcl + global log_log_record_types + global test_names + global gen_upgrade_log + global encrypt + global valid_methods + + set log_log_record_types 1 + set run_zero 0 + if { $run_methods == "all" } { + set run_methods $valid_methods + set run_zero 1 + } + logtrack_init + + # Define a small set of tests to run with log file zeroing. + set zero_log_tests \ + {recd001 recd002 recd003 recd004 recd005 recd006 recd007} + + foreach method $run_methods { + check_handles +#set test_names(recd) "recd005 recd017" + foreach test $test_names(recd) { + # Skip recd017 for non-crypto upgrade testing. + # Run only recd017 for crypto upgrade testing. + if { $gen_upgrade_log == 1 && $test == "recd017" && \ + $encrypt == 0 } { + puts "Skipping recd017 for non-crypto run." + continue + } + if { $gen_upgrade_log == 1 && $test != "recd017" && \ + $encrypt == 1 } { + puts "Skipping $test for crypto run." + continue + } + if { [catch {eval {run_recd $method $test $run \ + $display} $args} ret ] != 0 } { + puts $ret + } + + # If it's one of the chosen tests, and btree, run with + # log file zeroing. + set zlog_idx [lsearch -exact $zero_log_tests $test] + if { $run_zero == 1 && \ + $method == "btree" && $zlog_idx > -1 } { + if { [catch {eval {run_recd $method $test \ + $run $display -zero_log} $args} ret ] != 0 } { + puts $ret + } + } + + if { $gen_upgrade_log == 1 } { + save_upgrade_files $testdir + } + } + } + + # We can skip logtrack_summary during the crypto upgrade run - + # it doesn't introduce any new log types. + if { $run } { + if { $gen_upgrade_log == 0 || $encrypt == 0 } { + logtrack_summary + } + } + set log_log_record_types 0 +} + +# A small subset of tests to be used in conjunction with the +# automated builds. Ideally these tests will cover a lot of ground +# but run in only 15 minutes or so. You can put any test in the +# list of tests and it will be run all the ways that run_all +# runs it. +proc run_smoke { } { + source ./include.tcl + global valid_methods + + fileremove -f SMOKE.OUT + + set smoke_tests { \ + lock001 log001 test001 test004 sdb001 sec001 rep001 txn001 } + + # Run each test in all its permutations, and + # concatenate the results in the file SMOKE.OUT. + foreach test $smoke_tests { + run_all $test + set in [open ALL.OUT r] + set out [open SMOKE.OUT a] + while { [gets $in str] != -1 } { + puts $out $str + } + close $in + close $out + } +} + +proc run_inmem_tests { { testname ALL } args } { + global test_names + global one_test + global valid_methods + source ./include.tcl + + fileremove -f ALL.OUT + + set one_test $testname + # Source testparams again to adjust test_names. + source $test_path/testparams.tcl + + set exflgs [eval extractflags $args] + set flags [lindex $exflgs 1] + set display 1 + set run 1 + foreach f $flags { + switch $f { + n { + set display 1 + set run 0 + } + } + } + + set o [open ALL.OUT a] + if { $run == 1 } { + puts -nonewline "Test suite run started at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + puts [berkdb version -string] + + puts -nonewline $o "Test suite run started at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + puts $o [berkdb version -string] + } + close $o + + # Run in-memory testing for databases, logs, replication files, + # and region files (env -private). It is not necessary to run + # both run_inmem_log and run_mixedmode_log because run_mixedmode_log + # includes the pure in-memory case. + set inmem_procs [list run_inmem_db \ + run_inmem_log run_mixedmode_log run_inmem_rep run_env_private] + + # The above 3 procs only support tests like repXXX, so we only run + # these tests here. + foreach inmem_proc $inmem_procs { + foreach method $valid_methods { + foreach test $test_names(rep) { + # Skip the rep tests that don't support + # particular kinds of in-memory testing + # when appropriate. + if { $inmem_proc == "run_inmem_db" } { + set indx [lsearch -exact \ + $test_names(skip_for_inmem_db) $test] + if { $indx >= 0 } { + continue + } + } + if { $inmem_proc == "run_inmem_rep" } { + set indx [lsearch -exact \ + $test_names(skip_for_inmem_rep) $test] + if { $indx >= 0 } { + continue + } + } + if { $inmem_proc == "run_env_private" } { + set indx [lsearch -exact \ + $test_names(skip_for_env_private) $test] + if { $indx >= 0 } { + continue + } + } + + if { $display } { + set o [open ALL.OUT a] + puts $o "eval \ + $inmem_proc $test -$method; \ + verify_dir $testdir \"\" 1 0 0; \ + salvage_dir $testdir" + close $o + } + + if { $run } { + if [catch {exec $tclsh_path << \ + "global one_test; \ + set one_test $one_test; \ + source $test_path/test.tcl; \ + eval $inmem_proc $test -$method;\ + verify_dir $testdir \"\" 1 0 0; \ + salvage_dir $testdir" \ + >>& ALL.OUT } res ] { + set o [open ALL.OUT a] + puts $o "FAIL:$inmem_proc \ + $test -$method: $res" + close $o + } + } + } + } + } + + if { $run == 0 } { + return + } + + set failed [check_output ALL.OUT] + + set o [open ALL.OUT a] + if { $failed == 0 } { + puts "Regression Tests Succeeded" + puts $o "Regression Tests Succeeded" + } else { + puts "Regression Tests Failed" + puts "Check UNEXPECTED OUTPUT lines." + puts "Review ALL.OUT.x for details." + puts $o "Regression Tests Failed" + } + + puts -nonewline "Test suite run completed at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + puts -nonewline $o "Test suite run completed at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + close $o + +} + + +proc run_all { { testname ALL } args } { + global test_names + global one_test + global has_crypto + global valid_methods + source ./include.tcl + + fileremove -f ALL.OUT + + set one_test $testname + if { $one_test != "ALL" } { + # Source testparams again to adjust test_names. + source $test_path/testparams.tcl + } + + set exflgs [eval extractflags $args] + set flags [lindex $exflgs 1] + set display 1 + set run 1 + set am_only 0 + set parallel 0 + set nparalleltests 0 + set rflags {--} + foreach f $flags { + switch $f { + m { + set am_only 1 + } + n { + set display 1 + set run 0 + set rflags [linsert $rflags 0 "-n"] + } + } + } + + set o [open ALL.OUT a] + if { $run == 1 } { + puts -nonewline "Test suite run started at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + puts [berkdb version -string] + + puts -nonewline $o "Test suite run started at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + puts $o [berkdb version -string] + } + close $o + # + # First run standard tests. Send in a -A to let run_std know + # that it is part of the "run_all" run, so that it doesn't + # print out start/end times. + # + lappend args -A + eval {run_std} $one_test $args + + set test_pagesizes [get_test_pagesizes] + set args [lindex $exflgs 0] + set save_args $args + + foreach pgsz $test_pagesizes { + set args $save_args + append args " -pagesize $pgsz -chksum" + if { $am_only == 0 } { + # Run recovery tests. + # + # XXX These don't actually work at multiple pagesizes; + # disable them for now. + # + # XXX These too are broken into separate tclsh + # instantiations so we don't require so much + # memory, but I think it's cleaner + # and more useful to do it down inside proc r than here, + # since "r recd" gets done a lot and needs to work. + # + # XXX See comment in run_std for why this only directs + # stdout and not stderr. Don't worry--the right stuff + # happens. + #puts "Running recovery tests with pagesize $pgsz" + #if [catch {exec $tclsh_path \ + # << "source $test_path/test.tcl; \ + # r $rflags recd $args" \ + # 2>@ stderr >> ALL.OUT } res] { + # set o [open ALL.OUT a] + # puts $o "FAIL: recd test:" + # puts $o $res + # close $o + #} + } + + # Access method tests. + # Run subdb tests with varying pagesizes too. + # XXX + # Broken up into separate tclsh instantiations so + # we don't require so much memory. + foreach method $valid_methods { + puts "Running $method tests with pagesize $pgsz" + foreach sub {test sdb si} { + foreach test $test_names($sub) { + if { $run == 0 } { + set o [open ALL.OUT a] + eval {run_method -$method \ + $test $display $run $o} \ + $args + close $o + } + if { $run } { + if [catch {exec $tclsh_path << \ + "global one_test; \ + set one_test $one_test; \ + source $test_path/test.tcl; \ + eval {run_method -$method \ + $test $display $run \ + stdout} $args" \ + >>& ALL.OUT } res] { + set o [open ALL.OUT a] + puts $o "FAIL: \ + -$method $test: $res" + close $o + } + } + } + } + } + } + set args $save_args + # + # Run access method tests at default page size in one env. + # + foreach method $valid_methods { + puts "Running $method tests in a txn env" + foreach sub {test sdb si} { + foreach test $test_names($sub) { + if { $run == 0 } { + set o [open ALL.OUT a] + run_envmethod -$method $test $display \ + $run $o $args + close $o + } + if { $run } { + if [catch {exec $tclsh_path << \ + "global one_test; \ + set one_test $one_test; \ + source $test_path/test.tcl; \ + run_envmethod -$method $test \ + $display $run stdout $args" \ + >>& ALL.OUT } res] { + set o [open ALL.OUT a] + puts $o "FAIL: run_envmethod \ + $method $test: $res" + close $o + } + } + } + } + } + # + # Run access method tests at default page size in thread-enabled env. + # We're not truly running threaded tests, just testing the interface. + # + foreach method $valid_methods { + puts "Running $method tests in a threaded txn env" + foreach sub {test sdb si} { + foreach test $test_names($sub) { + if { $run == 0 } { + set o [open ALL.OUT a] + eval {run_envmethod -$method $test \ + $display $run $o -thread} + close $o + } + if { $run } { + if [catch {exec $tclsh_path << \ + "global one_test; \ + set one_test $one_test; \ + source $test_path/test.tcl; \ + eval {run_envmethod -$method $test \ + $display $run stdout -thread}" \ + >>& ALL.OUT } res] { + set o [open ALL.OUT a] + puts $o "FAIL: run_envmethod \ + $method $test -thread: $res" + close $o + } + } + } + } + } + # + # Run access method tests at default page size with -alloc enabled. + # + foreach method $valid_methods { + puts "Running $method tests in an env with -alloc" + foreach sub {test sdb si} { + foreach test $test_names($sub) { + if { $run == 0 } { + set o [open ALL.OUT a] + eval {run_envmethod -$method $test \ + $display $run $o -alloc} + close $o + } + if { $run } { + if [catch {exec $tclsh_path << \ + "global one_test; \ + set one_test $one_test; \ + source $test_path/test.tcl; \ + eval {run_envmethod -$method $test \ + $display $run stdout -alloc}" \ + >>& ALL.OUT } res] { + set o [open ALL.OUT a] + puts $o "FAIL: run_envmethod \ + $method $test -alloc: $res" + close $o + } + } + } + } + } + + # Run standard access method tests under replication. + # + set test_list [list {"testNNN under replication" "repmethod"}] + + # If we're on Windows, Linux, FreeBSD, or Solaris, run the + # bigfile tests. These create files larger than 4 GB. + if { $is_freebsd_test == 1 || $is_linux_test == 1 || \ + $is_sunos_test == 1 || $is_windows_test == 1 } { + lappend test_list {"big files" "bigfile"} + } + + # If release supports encryption, run security tests. + # + if { $has_crypto == 1 } { + lappend test_list {"testNNN with security" "secmethod"} + } + + foreach pair $test_list { + set msg [lindex $pair 0] + set cmd [lindex $pair 1] + puts "Running $msg tests" + if [catch {exec $tclsh_path << \ + "global one_test; set one_test $one_test; \ + source $test_path/test.tcl; \ + r $rflags $cmd $args" >>& ALL.OUT } res] { + set o [open ALL.OUT a] + puts $o "FAIL: $cmd test: $res" + close $o + } + } + + # If not actually running, no need to check for failure. + if { $run == 0 } { + return + } + + set failed 0 + set o [open ALL.OUT r] + while { [gets $o line] >= 0 } { + if { [regexp {^FAIL} $line] != 0 } { + set failed 1 + } + } + close $o + set o [open ALL.OUT a] + if { $failed == 0 } { + puts "Regression Tests Succeeded" + puts $o "Regression Tests Succeeded" + } else { + puts "Regression Tests Failed; see ALL.OUT for log" + puts $o "Regression Tests Failed" + } + + puts -nonewline "Test suite run completed at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + puts -nonewline $o "Test suite run completed at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + close $o +} + +proc run_all_new { { testname ALL } args } { + global test_names + global one_test + global has_crypto + global valid_methods + source ./include.tcl + + fileremove -f ALL.OUT + + set one_test $testname + if { $one_test != "ALL" } { + # Source testparams again to adjust test_names. + source $test_path/testparams.tcl + } + + set exflgs [eval extractflags $args] + set flags [lindex $exflgs 1] + set display 1 + set run 1 + set am_only 0 + set parallel 0 + set nparalleltests 0 + set rflags {--} + foreach f $flags { + switch $f { + m { + set am_only 1 + } + n { + set display 1 + set run 0 + set rflags [linsert $rflags 0 "-n"] + } + } + } + + set o [open ALL.OUT a] + if { $run == 1 } { + puts -nonewline "Test suite run started at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + puts [berkdb version -string] + + puts -nonewline $o "Test suite run started at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + puts $o [berkdb version -string] + } + close $o + # + # First run standard tests. Send in a -A to let run_std know + # that it is part of the "run_all" run, so that it doesn't + # print out start/end times. + # + lappend args -A + eval {run_std} $one_test $args + + set test_pagesizes [get_test_pagesizes] + set args [lindex $exflgs 0] + set save_args $args + + # + # Run access method tests at default page size in one env. + # + foreach method $valid_methods { + puts "Running $method tests in a txn env" + foreach sub {test sdb si} { + foreach test $test_names($sub) { + if { $run == 0 } { + set o [open ALL.OUT a] + run_envmethod -$method $test $display \ + $run $o $args + close $o + } + if { $run } { + if [catch {exec $tclsh_path << \ + "global one_test; \ + set one_test $one_test; \ + source $test_path/test.tcl; \ + run_envmethod -$method $test \ + $display $run stdout $args" \ + >>& ALL.OUT } res] { + set o [open ALL.OUT a] + puts $o "FAIL: run_envmethod \ + $method $test: $res" + close $o + } + } + } + } + } + # + # Run access method tests at default page size in thread-enabled env. + # We're not truly running threaded tests, just testing the interface. + # + foreach method $valid_methods { + puts "Running $method tests in a threaded txn env" + set thread_tests "test001" + foreach test $thread_tests { + if { $run == 0 } { + set o [open ALL.OUT a] + eval {run_envmethod -$method $test \ + $display $run $o -thread} + close $o + } + if { $run } { + if [catch {exec $tclsh_path << \ + "global one_test; \ + set one_test $one_test; \ + source $test_path/test.tcl; \ + eval {run_envmethod -$method $test \ + $display $run stdout -thread}" \ + >>& ALL.OUT } res] { + set o [open ALL.OUT a] + puts $o "FAIL: run_envmethod \ + $method $test -thread: $res" + close $o + } + } + } + } + # + # Run access method tests at default page size with -alloc enabled. + # + foreach method $valid_methods { + puts "Running $method tests in an env with -alloc" + set alloc_tests "test001" + foreach test $alloc_tests { + if { $run == 0 } { + set o [open ALL.OUT a] + eval {run_envmethod -$method $test \ + $display $run $o -alloc} + close $o + } + if { $run } { + if [catch {exec $tclsh_path << \ + "global one_test; \ + set one_test $one_test; \ + source $test_path/test.tcl; \ + eval {run_envmethod -$method $test \ + $display $run stdout -alloc}" \ + >>& ALL.OUT } res] { + set o [open ALL.OUT a] + puts $o "FAIL: run_envmethod \ + $method $test -alloc: $res" + close $o + } + } + } + } + + # Run standard access method tests under replication. + # + set test_list [list {"testNNN under replication" "repmethod"}] + + # If we're on Windows, Linux, FreeBSD, or Solaris, run the + # bigfile tests. These create files larger than 4 GB. + if { $is_freebsd_test == 1 || $is_linux_test == 1 || \ + $is_sunos_test == 1 || $is_windows_test == 1 } { + lappend test_list {"big files" "bigfile"} + } + + # If release supports encryption, run security tests. + # + if { $has_crypto == 1 } { + lappend test_list {"testNNN with security" "secmethod"} + } + + foreach pair $test_list { + set msg [lindex $pair 0] + set cmd [lindex $pair 1] + puts "Running $msg tests" + if [catch {exec $tclsh_path << \ + "global one_test; set one_test $one_test; \ + source $test_path/test.tcl; \ + r $rflags $cmd $args" >>& ALL.OUT } res] { + set o [open ALL.OUT a] + puts $o "FAIL: $cmd test: $res" + close $o + } + } + + # If not actually running, no need to check for failure. + if { $run == 0 } { + return + } + + set failed 0 + set o [open ALL.OUT r] + while { [gets $o line] >= 0 } { + if { [regexp {^FAIL} $line] != 0 } { + set failed 1 + } + } + close $o + set o [open ALL.OUT a] + if { $failed == 0 } { + puts "Regression Tests Succeeded" + puts $o "Regression Tests Succeeded" + } else { + puts "Regression Tests Failed; see ALL.OUT for log" + puts $o "Regression Tests Failed" + } + + puts -nonewline "Test suite run completed at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + puts -nonewline $o "Test suite run completed at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + close $o +} + +# +# Run method tests in one environment. (As opposed to run_envmethod +# which runs each test in its own, new environment.) +# +proc run_envmethod1 { method {display 0} {run 1} { outfile stdout } args } { + global __debug_on + global __debug_print + global __debug_test + global is_envmethod + global test_names + global parms + source ./include.tcl + + if { $run == 1 } { + puts "run_envmethod1: $method $args" + } + + set is_envmethod 1 + if { $run == 1 } { + check_handles + env_cleanup $testdir + error_check_good envremove [berkdb envremove -home $testdir] 0 + set env [eval {berkdb_env -create -cachesize {0 10000000 0}} \ + {-pagesize 512 -mode 0644 -home $testdir} $args ] + error_check_good env_open [is_valid_env $env] TRUE + append largs " -env $env " + } + + if { $display } { + # The envmethod1 tests can't be split up, since they share + # an env. + puts $outfile "eval run_envmethod1 $method $args" + } + + set stat [catch { + foreach test $test_names(test) { + if { [info exists parms($test)] != 1 } { + puts stderr "$test disabled in\ + testparams.tcl; skipping." + continue + } + if { $run } { + puts $outfile "[timestamp]" + eval $test $method $parms($test) $largs + if { $__debug_print != 0 } { + puts $outfile "" + } + if { $__debug_on != 0 } { + debug $__debug_test + } + } + flush stdout + flush stderr + } + } res] + if { $stat != 0} { + global errorInfo; + + set fnl [string first "\n" $errorInfo] + set theError [string range $errorInfo 0 [expr $fnl - 1]] + if {[string first FAIL $errorInfo] == -1} { + error "FAIL:[timestamp]\ + run_envmethod: $method $test: $theError" + } else { + error $theError; + } + } + set stat [catch { + foreach test $test_names(test) { + if { [info exists parms($test)] != 1 } { + puts stderr "$test disabled in\ + testparams.tcl; skipping." + continue + } + if { $run } { + puts $outfile "[timestamp]" + eval $test $method $parms($test) $largs + if { $__debug_print != 0 } { + puts $outfile "" + } + if { $__debug_on != 0 } { + debug $__debug_test + } + } + flush stdout + flush stderr + } + } res] + if { $stat != 0} { + global errorInfo; + + set fnl [string first "\n" $errorInfo] + set theError [string range $errorInfo 0 [expr $fnl - 1]] + if {[string first FAIL $errorInfo] == -1} { + error "FAIL:[timestamp]\ + run_envmethod1: $method $test: $theError" + } else { + error $theError; + } + } + if { $run == 1 } { + error_check_good envclose [$env close] 0 + check_handles $outfile + } + set is_envmethod 0 + +} + +# Run the secondary index tests. +proc sindex { {display 0} {run 1} {outfile stdout} {verbose 0} args } { + global test_names + global testdir + global verbose_check_secondaries + set verbose_check_secondaries $verbose + # Standard number of secondary indices to create if a single-element + # list of methods is passed into the secondary index tests. + global nsecondaries + set nsecondaries 2 + + # Run basic tests with a single secondary index and a small number + # of keys, then again with a larger number of keys. (Note that + # we can't go above 5000, since we use two items from our + # 10K-word list for each key/data pair.) + foreach n { 200 5000 } { + foreach pm { btree hash recno frecno queue queueext } { + foreach sm { dbtree dhash ddbtree ddhash btree hash } { + foreach test $test_names(si) { + if { $display } { + puts -nonewline $outfile \ + "eval $test {\[list\ + $pm $sm $sm\]} $n ;" + puts -nonewline $outfile \ + " verify_dir \ + $testdir \"\" 1; " + puts $outfile " salvage_dir \ + $testdir 1" + } + if { $run } { + check_handles $outfile + eval $test \ + {[list $pm $sm $sm]} $n + verify_dir $testdir "" 1 + salvage_dir $testdir 1 + } + } + } + } + } + + # Run tests with 20 secondaries. + foreach pm { btree hash } { + set methlist [list $pm] + for { set j 1 } { $j <= 20 } {incr j} { + # XXX this should incorporate hash after #3726 + if { $j % 2 == 0 } { + lappend methlist "dbtree" + } else { + lappend methlist "ddbtree" + } + } + foreach test $test_names(si) { + if { $display } { + puts "eval $test {\[list $methlist\]} 500" + } + if { $run } { + eval $test {$methlist} 500 + } + } + } +} + +# Run secondary index join test. (There's no point in running +# this with both lengths, the primary is unhappy for now with fixed- +# length records (XXX), and we need unsorted dups in the secondaries.) +proc sijoin { {display 0} {run 1} {outfile stdout} } { + foreach pm { btree hash recno } { + if { $display } { + foreach sm { btree hash } { + puts $outfile "eval sijointest\ + {\[list $pm $sm $sm\]} 1000" + } + puts $outfile "eval sijointest\ + {\[list $pm btree hash\]} 1000" + puts $outfile "eval sijointest\ + {\[list $pm hash btree\]} 1000" + } + if { $run } { + foreach sm { btree hash } { + eval sijointest {[list $pm $sm $sm]} 1000 + } + eval sijointest {[list $pm btree hash]} 1000 + eval sijointest {[list $pm hash btree]} 1000 + } + } +} + +proc run { proc_suffix method {start 1} {stop 999} } { + global test_names + + switch -exact -- $proc_suffix { + envmethod - + method - + recd - + repmethod - + reptest - + secenv - + secmethod { + # Run_recd runs the recd tests, all others + # run the "testxxx" tests. + if { $proc_suffix == "recd" } { + set testtype recd + } else { + set testtype test + } + + for { set i $start } { $i <= $stop } { incr i } { + set name [format "%s%03d" $testtype $i] + # If a test number is missing, silently skip + # to next test; sparse numbering is allowed. + if { [lsearch -exact $test_names($testtype) \ + $name] == -1 } { + continue + } + run_$proc_suffix $method $name + } + } + default { + puts "$proc_suffix is not set up with to be used with run" + } + } +} + + +# We want to test all of 512b, 8Kb, and 64Kb pages, but chances are one +# of these is the default pagesize. We don't want to run all the AM tests +# twice, so figure out what the default page size is, then return the +# other two. +proc get_test_pagesizes { } { + # Create an in-memory database. + set db [berkdb_open -create -btree] + error_check_good gtp_create [is_valid_db $db] TRUE + set statret [$db stat] + set pgsz 0 + foreach pair $statret { + set fld [lindex $pair 0] + if { [string compare $fld {Page size}] == 0 } { + set pgsz [lindex $pair 1] + } + } + + error_check_good gtp_close [$db close] 0 + + error_check_bad gtp_pgsz $pgsz 0 + switch $pgsz { + 512 { return {8192 65536} } + 8192 { return {512 65536} } + 65536 { return {512 8192} } + default { return {512 8192 65536} } + } + error_check_good NOTREACHED 0 1 +} + +proc run_timed_once { timedtest args } { + set start [timestamp -r] + set ret [catch { + eval $timedtest $args + flush stdout + flush stderr + } res] + set stop [timestamp -r] + if { $ret != 0 } { + global errorInfo + + set fnl [string first "\n" $errorInfo] + set theError [string range $errorInfo 0 [expr $fnl - 1]] + if {[string first FAIL $errorInfo] == -1} { + error "FAIL:[timestamp]\ + run_timed: $timedtest: $theError" + } else { + error $theError; + } + } + return [expr $stop - $start] +} + +proc run_timed { niter timedtest args } { + if { $niter < 1 } { + error "run_timed: Invalid number of iterations $niter" + } + set sum 0 + set e {} + for { set i 1 } { $i <= $niter } { incr i } { + set elapsed [eval run_timed_once $timedtest $args] + lappend e $elapsed + set sum [expr $sum + $elapsed] + puts "Test $timedtest run $i completed: $elapsed seconds" + } + if { $niter > 1 } { + set avg [expr $sum / $niter] + puts "Average $timedtest time: $avg" + puts "Raw $timedtest data: $e" + } +} diff --git a/test/tcl/test001.tcl b/test/tcl/test001.tcl new file mode 100644 index 00000000..dd650c9c --- /dev/null +++ b/test/tcl/test001.tcl @@ -0,0 +1,221 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test001 +# TEST Small keys/data +# TEST Put/get per key +# TEST Dump file +# TEST Close, reopen +# TEST Dump file +# TEST +# TEST Use the first 10,000 entries from the dictionary. +# TEST Insert each with self as key and data; retrieve each. +# TEST After all are entered, retrieve all; compare output to original. +# TEST Close file, reopen, do retrieve and re-verify. +proc test001 { method {nentries 10000} \ + {start 0} {skip 0} {tnum "001"} args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + # Create the database and open the dictionary + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + # If we are not using an external env, then test setting + # the database cache size and using multiple caches. + set txnenv 0 + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + append args " -cachesize {0 1048576 3} " + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + puts "Test$tnum: $method ($args) $nentries equal key/data pairs" + set did [open $dict] + + # The "start" variable determines the record number to start + # with, if we're using record numbers. The "skip" variable + # determines the dictionary entry to start with. + # In normal use, skip will match start. + + puts "\tTest$tnum: Starting at $start with dictionary entry $skip" + if { $skip != 0 } { + for { set count 0 } { $count < $skip } { incr count } { + gets $did str + } + } + + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + set temp $testdir/temp + cleanup $testdir $env + + set db [eval {berkdb_open \ + -create -mode 0644} $args $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set pflags "" + set gflags "" + set txn "" + + if { [is_record_based $method] == 1 } { + set checkfunc test001_recno.check + append gflags " -recno" + } else { + set checkfunc test001.check + } + puts "\tTest$tnum.a: put/get loop" + # Here is the loop where we put and get each key/data pair + set count 0 + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1 + $start] + if { 0xffffffff > 0 && $key > 0xffffffff } { + set key [expr $key - 0x100000000] + } + if { $key == 0 || $key - 0xffffffff == 1 } { + incr key + incr count + } + set kvals($key) [pad_data $method $str] + } else { + set key $str + set str [reverse $str] + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval \ + {$db put} $txn $pflags {$key [chop_data $method $str]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + if { $count % 50 == 0 } { + error_check_good txn_checkpoint($count) \ + [$env txn_checkpoint] 0 + } + } + + set ret [eval {$db get} $gflags {$key}] + error_check_good \ + get $ret [list [list $key [pad_data $method $str]]] + + # Test DB_GET_BOTH for success + set ret [$db get -get_both $key [pad_data $method $str]] + error_check_good \ + getboth $ret [list [list $key [pad_data $method $str]]] + + # Test DB_GET_BOTH for failure + set ret [$db get -get_both $key [pad_data $method BAD$str]] + error_check_good getbothBAD [llength $ret] 0 + + incr count + } + close $did + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + # Now we will get each key from the DB and compare the results + # to the original. + + puts "\tTest$tnum.b: dump file" + dump_file $db $txn $t1 $checkfunc + # + # dump_file should just have been "get" calls, so + # aborting a get should really be a no-op. Abort + # just for the fun of it. + if { $txnenv == 1 } { + error_check_good txn [$t abort] 0 + } + error_check_good db_close [$db close] 0 + + # Now compare the keys to see if they match the dictionary (or ints) + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for { set i 1 } { $i <= $nentries } { incr i } { + set j [expr $i + $start] + if { 0xffffffff > 0 && $j > 0xffffffff } { + set j [expr $j - 0x100000000] + } + if { $j == 0 } { + incr i + incr j + } + puts $oid $j + } + close $oid + } else { + filehead [expr $nentries + $start] $dict $t2 [expr $start + 1] + } + filesort $t2 $temp + file rename -force $temp $t2 + filesort $t1 $t3 + + error_check_good Test$tnum:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + puts "\tTest$tnum.c: close, open, and dump file" + # Now, reopen the file and run the last test again. + eval open_and_dump_file $testfile $env $t1 $checkfunc \ + dump_file_direction "-first" "-next" $args + if { [string compare $omethod "-recno"] != 0 } { + filesort $t1 $t3 + } + + error_check_good Test$tnum:diff($t2,$t3) \ + [filecmp $t2 $t3] 0 + + # Now, reopen the file and run the last test again in the + # reverse direction. + puts "\tTest$tnum.d: close, open, and dump file in reverse direction" + eval open_and_dump_file $testfile $env $t1 $checkfunc \ + dump_file_direction "-last" "-prev" $args + + if { [string compare $omethod "-recno"] != 0 } { + filesort $t1 $t3 + } + + error_check_good Test$tnum:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 +} + +# Check function for test001; keys and data are identical +proc test001.check { key data } { + error_check_good "key/data mismatch" $data [reverse $key] +} + +proc test001_recno.check { key data } { + global dict + global kvals + + error_check_good key"$key"_exists [info exists kvals($key)] 1 + error_check_good "key/data mismatch, key $key" $data $kvals($key) +} diff --git a/test/tcl/test002.tcl b/test/tcl/test002.tcl new file mode 100644 index 00000000..ac9e4584 --- /dev/null +++ b/test/tcl/test002.tcl @@ -0,0 +1,160 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test002 +# TEST Small keys/medium data +# TEST Put/get per key +# TEST Dump file +# TEST Close, reopen +# TEST Dump file +# TEST +# TEST Use the first 10,000 entries from the dictionary. +# TEST Insert each with self as key and a fixed, medium length data string; +# TEST retrieve each. After all are entered, retrieve all; compare output +# TEST to original. Close file, reopen, do retrieve and re-verify. + +proc test002 { method {nentries 10000} args } { + global datastr + global pad_datastr + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test002.db + set env NULL + } else { + set testfile test002.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + # Create the database and open the dictionary + puts "Test002: $method ($args) $nentries key pairs" + + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + set db [eval {berkdb_open \ + -create -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + + set pflags "" + set gflags "" + set txn "" + set count 0 + + # Here is the loop where we put and get each key/data pair + + if { [is_record_based $method] == 1 } { + append gflags "-recno" + } + set pad_datastr [pad_data $method $datastr] + puts "\tTest002.a: put/get loop" + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn $pflags {$key [chop_data $method $datastr]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + set ret [eval {$db get} $gflags {$key}] + + error_check_good get $ret [list [list $key [pad_data $method $datastr]]] + incr count + } + close $did + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest002.b: dump file" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 test002.check + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Now compare the keys to see if they match the dictionary + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $nentries} {set i [incr i]} { + puts $oid $i + } + close $oid + filesort $t2 $t3 + file rename -force $t3 $t2 + } else { + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + } + filesort $t1 $t3 + + error_check_good Test002:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + # Now, reopen the file and run the last test again. + puts "\tTest002.c: close, open, and dump file" + eval open_and_dump_file $testfile $env $t1 test002.check \ + dump_file_direction "-first" "-next" $args + + if { [string compare $omethod "-recno"] != 0 } { + filesort $t1 $t3 + } + error_check_good Test002:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + # Now, reopen the file and run the last test again in reverse direction. + puts "\tTest002.d: close, open, and dump file in reverse direction" + eval open_and_dump_file $testfile $env $t1 test002.check \ + dump_file_direction "-last" "-prev" $args + + if { [string compare $omethod "-recno"] != 0 } { + filesort $t1 $t3 + } + error_check_good Test002:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 +} + +# Check function for test002; data should be fixed are identical +proc test002.check { key data } { + global pad_datastr + error_check_good "data mismatch for key $key" $data $pad_datastr +} diff --git a/test/tcl/test003.tcl b/test/tcl/test003.tcl new file mode 100644 index 00000000..948bac68 --- /dev/null +++ b/test/tcl/test003.tcl @@ -0,0 +1,204 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test003 +# TEST Small keys/large data +# TEST Put/get per key +# TEST Dump file +# TEST Close, reopen +# TEST Dump file +# TEST +# TEST Take the source files and dbtest executable and enter their names +# TEST as the key with their contents as data. After all are entered, +# TEST retrieve all; compare output to original. Close file, reopen, do +# TEST retrieve and re-verify. +proc test003 { method args} { + global names + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if {[is_fixed_length $method] == 1} { + puts "Test003 skipping for method $method" + return + } + puts "Test003: $method ($args) filename=key filecontents=data pairs" + + # Create the database and open the dictionary + set limit 0 + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test003.db + set env NULL + } else { + set testfile test003.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + set limit 100 + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + set t4 $testdir/t4 + + cleanup $testdir $env + set db [eval {berkdb_open \ + -create -mode 0644} $args $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + set pflags "" + set gflags "" + set txn "" + if { [is_record_based $method] == 1 } { + set checkfunc test003_recno.check + append gflags "-recno" + } else { + set checkfunc test003.check + } + + # Here is the loop where we put and get each key/data pair + set file_list [get_file_list] + set len [llength $file_list] + puts "\tTest003.a: put/get loop $len entries" + set count 0 + foreach f $file_list { + if { [string compare [file type $f] "file"] != 0 } { + continue + } + + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + set names([expr $count + 1]) $f + } else { + set key $f + } + + # Should really catch errors + set fid [open $f r] + fconfigure $fid -translation binary + set data [read $fid] + close $fid + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $data]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Should really catch errors + set fid [open $t4 w] + fconfigure $fid -translation binary + if [catch {eval {$db get} $gflags {$key}} data] { + puts -nonewline $fid $data + } else { + # Data looks like {{key data}} + set key [lindex [lindex $data 0] 0] + set data [lindex [lindex $data 0] 1] + puts -nonewline $fid [pad_data $method $data] + } + close $fid + + error_check_good \ + Test003:diff($f,$t4) [filecmp $f $t4] 0 + + incr count + } + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest003.b: dump file" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_bin_file $db $txn $t1 $checkfunc + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Now compare the keys to see if they match the entries in the + # current directory + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $count} {set i [incr i]} { + puts $oid $i + } + close $oid + file rename -force $t1 $t3 + } else { + set oid [open $t2.tmp w] + foreach f $file_list { + if { [string compare [file type $f] "file"] != 0 } { + continue + } + puts $oid $f + } + close $oid + filesort $t2.tmp $t2 + fileremove $t2.tmp + filesort $t1 $t3 + } + + error_check_good \ + Test003:diff($t3,$t2) [filecmp $t3 $t2] 0 + + # Now, reopen the file and run the last test again. + puts "\tTest003.c: close, open, and dump file" + eval open_and_dump_file $testfile $env $t1 $checkfunc \ + dump_bin_file_direction "-first" "-next" $args + + if { [is_record_based $method] == 1 } { + filesort $t1 $t3 -n + } + + error_check_good \ + Test003:diff($t3,$t2) [filecmp $t3 $t2] 0 + + # Now, reopen the file and run the last test again in reverse direction. + puts "\tTest003.d: close, open, and dump file in reverse direction" + eval open_and_dump_file $testfile $env $t1 $checkfunc \ + dump_bin_file_direction "-last" "-prev" $args + + if { [is_record_based $method] == 1 } { + filesort $t1 $t3 -n + } + + error_check_good \ + Test003:diff($t3,$t2) [filecmp $t3 $t2] 0 +} + +# Check function for test003; key should be file name; data should be contents +proc test003.check { binfile tmpfile } { + source ./include.tcl + + error_check_good Test003:datamismatch($binfile,$tmpfile) \ + [filecmp $binfile $tmpfile] 0 +} +proc test003_recno.check { binfile tmpfile } { + global names + source ./include.tcl + + set fname $names($binfile) + error_check_good key"$binfile"_exists [info exists names($binfile)] 1 + error_check_good Test003:datamismatch($fname,$tmpfile) \ + [filecmp $fname $tmpfile] 0 +} diff --git a/test/tcl/test004.tcl b/test/tcl/test004.tcl new file mode 100644 index 00000000..275df533 --- /dev/null +++ b/test/tcl/test004.tcl @@ -0,0 +1,168 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test004 +# TEST Small keys/medium data +# TEST Put/get per key +# TEST Sequential (cursor) get/delete +# TEST +# TEST Check that cursor operations work. Create a database. +# TEST Read through the database sequentially using cursors and +# TEST delete each element. +proc test004 { method {nentries 10000} {reopen "004"} {build_only 0} args} { + source ./include.tcl + + set do_renumber [is_rrecno $method] + set args [convert_args $method $args] + set omethod [convert_method $method] + + set tnum test$reopen + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/$tnum.db + set env NULL + } else { + set testfile $tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + + puts -nonewline "$tnum:\ + $method ($args) $nentries delete small key; medium data pairs" + if {$reopen == "005"} { + puts "(with close)" + } else { + puts "" + } + + # Create the database and open the dictionary + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + set db [eval {berkdb_open -create -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set did [open $dict] + + set pflags "" + set gflags "" + set txn "" + set count 0 + + if { [is_record_based $method] == 1 } { + append gflags " -recno" + } + + # Here is the loop where we put and get each key/data pair + set kvals "" + puts "\tTest$reopen.a: put/get loop" + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + lappend kvals $str + } else { + set key $str + } + + set datastr [ make_data_str $str ] + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn $pflags \ + {$key [chop_data $method $datastr]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + set ret [eval {$db get} $gflags {$key}] + error_check_good "$tnum:put" $ret \ + [list [list $key [pad_data $method $datastr]]] + incr count + } + close $did + if { $build_only == 1 } { + return $db + } + if { $reopen == "005" } { + error_check_good db_close [$db close] 0 + + set db [eval {berkdb_open} $args {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + } + puts "\tTest$reopen.b: get/delete loop" + # Now we will get each key from the DB and compare the results + # to the original, then delete it. + set outf [open $t1 w] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set c [eval {$db cursor} $txn] + + set count 0 + for {set d [$c get -first] } { [llength $d] != 0 } { + set d [$c get -next] } { + set k [lindex [lindex $d 0] 0] + set d2 [lindex [lindex $d 0] 1] + if { [is_record_based $method] == 1 } { + set datastr \ + [make_data_str [lindex $kvals [expr $k - 1]]] + } else { + set datastr [make_data_str $k] + } + error_check_good $tnum:$k $d2 [pad_data $method $datastr] + puts $outf $k + $c del + if { [is_record_based $method] == 1 && \ + $do_renumber == 1 } { + set kvals [lreplace $kvals 0 0] + } + incr count + } + close $outf + error_check_good curs_close [$c close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Now compare the keys to see if they match the dictionary + if { [is_record_based $method] == 1 } { + error_check_good test$reopen:keys_deleted $count $nentries + } else { + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + filesort $t1 $t3 + error_check_good Test$reopen:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + } + + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test005.tcl b/test/tcl/test005.tcl new file mode 100644 index 00000000..744090e1 --- /dev/null +++ b/test/tcl/test005.tcl @@ -0,0 +1,18 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test005 +# TEST Small keys/medium data +# TEST Put/get per key +# TEST Close, reopen +# TEST Sequential (cursor) get/delete +# TEST +# TEST Check that cursor operations work. Create a database; close +# TEST it and reopen it. Then read through the database sequentially +# TEST using cursors and delete each element. +proc test005 { method {nentries 10000} args } { + eval {test004 $method $nentries "005" 0} $args +} diff --git a/test/tcl/test006.tcl b/test/tcl/test006.tcl new file mode 100644 index 00000000..020a880b --- /dev/null +++ b/test/tcl/test006.tcl @@ -0,0 +1,199 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test006 +# TEST Small keys/medium data +# TEST Put/get per key +# TEST Keyed delete and verify +# TEST +# TEST Keyed delete test. +# TEST Create database. +# TEST Go through database, deleting all entries by key. +# TEST Then do the same for unsorted and sorted dups. +proc test006 { method {nentries 10000} {reopen 0} {tnum "006"} \ + {ndups 5} args } { + + test006_body $method $nentries $reopen $tnum 1 "" "" $args + + # For methods supporting dups, run the test with sorted and + # with unsorted dups. + if { [is_btree $method] == 1 || [is_hash $method] == 1 } { + foreach {sort flags} {unsorted -dup sorted "-dup -dupsort"} { + test006_body $method $nentries $reopen \ + $tnum $ndups $sort $flags $args + } + } +} + +proc test006_body { method {nentries 10000} {reopen 0} {tnum "006"} \ + {ndups 5} sort flags {largs ""} } { + global is_je_test + source ./include.tcl + + if { [is_compressed $largs] && $sort == "unsorted" } { + puts "Test$tnum skipping $sort duplicates for compression" + return + } + + set do_renumber [is_rrecno $method] + set largs [convert_args $method $largs] + set omethod [convert_method $method] + + set tname Test$tnum + set dbname test$tnum + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $largs "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set basename $testdir/$dbname + set env NULL + } else { + set basename $dbname + incr eindex + set env [lindex $largs $eindex] + if { $is_je_test && $sort == "unsorted" } { + puts "Test$tnum skipping $sort duplicates for JE" + return + } + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append largs " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + puts -nonewline "$tname: $method ($flags $largs) " + puts -nonewline "$nentries equal small key; medium data pairs" + if {$reopen == 1} { + puts " (with close)" + } else { + puts "" + } + + set pflags "" + set gflags "" + set txn "" + if { [is_record_based $method] == 1 } { + append gflags " -recno" + } + + cleanup $testdir $env + + # Here is the loop where we put and get each key/data pair. + + set count 0 + set testfile $basename$sort.db + set db [eval {berkdb_open -create \ + -mode 0644} $largs $flags {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + puts "\t$tname.a: put/get loop" + set did [open $dict] + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1 ] + } else { + set key $str + } + + set str [make_data_str $str] + for { set j 1 } { $j <= $ndups } {incr j} { + set datastr $j$str + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn \ + [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn $pflags \ + {$key [chop_data $method $datastr]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn \ + [$t commit] 0 + } + } + incr count + } + close $did + + # Close and reopen database, if testing reopen. + + if { $reopen == 1 } { + error_check_good db_close [$db close] 0 + + set db [eval {berkdb_open} $largs $flags {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + } + + # Now we will get each key from the DB and compare the results + # to the original, then delete it. + + puts "\t$tname.b: get/delete loop" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_substr $dbc $db] 1 + + set i 1 + for { set ret [$dbc get -first] } \ + { [string length $ret] != 0 } \ + { set ret [$dbc get -next] } { + set key [lindex [lindex $ret 0] 0] + set data [lindex [lindex $ret 0] 1] + if { $i == 1 } { + set curkey $key + } + error_check_good seq_get:key:$i $key $curkey + + if { $i == $ndups } { + set i 1 + } else { + incr i + } + + # Now delete the key + set ret [$dbc del] + error_check_good db_del:$key $ret 0 + } + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + puts "\t$tname.c: verify empty file" + # Double check that file is now empty + set db [eval {berkdb_open} $largs $flags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_substr $dbc $db] 1 + set ret [$dbc get -first] + error_check_good get_on_empty [string length $ret] 0 + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } +error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test007.tcl b/test/tcl/test007.tcl new file mode 100644 index 00000000..881d5371 --- /dev/null +++ b/test/tcl/test007.tcl @@ -0,0 +1,18 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test007 +# TEST Small keys/medium data +# TEST Put/get per key +# TEST Close, reopen +# TEST Keyed delete +# TEST +# TEST Check that delete operations work. Create a database; close +# TEST database and reopen it. Then issues delete by key for each +# TEST entry. (Test006 plus reopen) +proc test007 { method {nentries 10000} {tnum "007"} {ndups 5} args} { + eval {test006 $method $nentries 1 $tnum $ndups} $args +} diff --git a/test/tcl/test008.tcl b/test/tcl/test008.tcl new file mode 100644 index 00000000..7edfbd4b --- /dev/null +++ b/test/tcl/test008.tcl @@ -0,0 +1,199 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test008 +# TEST Small keys/large data +# TEST Put/get per key +# TEST Loop through keys by steps (which change) +# TEST ... delete each key at step +# TEST ... add each key back +# TEST ... change step +# TEST Confirm that overflow pages are getting reused +# TEST +# TEST Take the source files and dbtest executable and enter their names as +# TEST the key with their contents as data. After all are entered, begin +# TEST looping through the entries; deleting some pairs and then readding them. +proc test008 { method {reopen "008"} {debug 0} args} { + source ./include.tcl + + set tnum test$reopen + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_record_based $method] == 1 } { + puts "Test$reopen skipping for method $method" + return + } + + puts -nonewline "$tnum: $method filename=key filecontents=data pairs" + if {$reopen == "009"} { + puts "(with close)" + } else { + puts "" + } + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/$tnum.db + set env NULL + } else { + set testfile $tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + set t4 $testdir/t4 + + cleanup $testdir $env + + set db [eval {berkdb_open -create -mode 0644} \ + $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set pflags "" + set gflags "" + set txn "" + + # Here is the loop where we put and get each key/data pair + set file_list [get_file_list] + + set count 0 + puts "\tTest$reopen.a: Initial put/get loop" + foreach f $file_list { + set names($count) $f + set key $f + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + put_file $db $txn $pflags $f + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + get_file $db $txn $gflags $f $t4 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + error_check_good Test$reopen:diff($f,$t4) \ + [filecmp $f $t4] 0 + + incr count + } + + if {$reopen == "009"} { + error_check_good db_close [$db close] 0 + + set db [eval {berkdb_open} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + } + + # Now we will get step through keys again (by increments) and + # delete all the entries, then re-insert them. + + puts "\tTest$reopen.b: Delete re-add loop" + foreach i "1 2 4 8 16" { + for {set ndx 0} {$ndx < $count} { incr ndx $i} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set r [eval {$db del} $txn {$names($ndx)}] + error_check_good db_del:$names($ndx) $r 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + for {set ndx 0} {$ndx < $count} { incr ndx $i} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + put_file $db $txn $pflags $names($ndx) + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + } + + if {$reopen == "009"} { + error_check_good db_close [$db close] 0 + set db [eval {berkdb_open} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + } + + # Now, reopen the file and make sure the key/data pairs look right. + puts "\tTest$reopen.c: Dump contents forward" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_bin_file $db $txn $t1 test008.check + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + set oid [open $t2.tmp w] + foreach f $file_list { + puts $oid $f + } + close $oid + filesort $t2.tmp $t2 + fileremove $t2.tmp + filesort $t1 $t3 + + error_check_good Test$reopen:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + # Now, reopen the file and run the last test again in reverse direction. + puts "\tTest$reopen.d: Dump contents backward" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_bin_file_direction $db $txn $t1 test008.check "-last" "-prev" + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + filesort $t1 $t3 + + error_check_good Test$reopen:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + error_check_good close:$db [$db close] 0 +} + +proc test008.check { binfile tmpfile } { + global tnum + source ./include.tcl + + error_check_good diff($binfile,$tmpfile) \ + [filecmp $binfile $tmpfile] 0 +} diff --git a/test/tcl/test009.tcl b/test/tcl/test009.tcl new file mode 100644 index 00000000..e9967e27 --- /dev/null +++ b/test/tcl/test009.tcl @@ -0,0 +1,17 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test009 +# TEST Small keys/large data +# TEST Same as test008; close and reopen database +# TEST +# TEST Check that we reuse overflow pages. Create database with lots of +# TEST big key/data pairs. Go through and delete and add keys back +# TEST randomly. Then close the DB and make sure that we have everything +# TEST we think we should. +proc test009 { method args} { + eval {test008 $method "009" 0} $args +} diff --git a/test/tcl/test010.tcl b/test/tcl/test010.tcl new file mode 100644 index 00000000..b66f9379 --- /dev/null +++ b/test/tcl/test010.tcl @@ -0,0 +1,181 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test010 +# TEST Duplicate test +# TEST Small key/data pairs. +# TEST +# TEST Use the first 10,000 entries from the dictionary. +# TEST Insert each with self as key and data; add duplicate records for each. +# TEST After all are entered, retrieve all; verify output. +# TEST Close file, reopen, do retrieve and re-verify. +# TEST This does not work for recno +proc test010 { method {nentries 10000} {ndups 5} {tnum "010"} args } { + source ./include.tcl + + set omethod $method + set args [convert_args $method $args] + set omethod [convert_method $method] + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test$tnum skipping for btree with compression." + return + } + + if { [is_record_based $method] == 1 || \ + [is_rbtree $method] == 1 } { + puts "Test$tnum skipping for method $method" + return + } + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + reduce_dups nentries ndups + } + set testdir [get_home $env] + } + puts "Test$tnum: $method ($args) $nentries \ + small $ndups dup key/data pairs" + + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + + cleanup $testdir $env + + set db [eval {berkdb_open \ + -create -mode 0644 -dup} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set did [open $dict] + + set pflags "" + set gflags "" + set txn "" + set count 0 + + # Here is the loop where we put and get each key/data pair + while { [gets $did str] != -1 && $count < $nentries } { + for { set i 1 } { $i <= $ndups } { incr i } { + set datastr $i:$str + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$str [chop_data $method $datastr]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # Now retrieve all the keys matching this key + set x 1 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + for {set ret [$dbc get "-set" $str]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get "-next"] } { + if {[llength $ret] == 0} { + break + } + set k [lindex [lindex $ret 0] 0] + if { [string compare $k $str] != 0 } { + break + } + set datastr [lindex [lindex $ret 0] 1] + set d [data_of $datastr] + error_check_good "Test$tnum:get" $d $str + set id [ id_of $datastr ] + error_check_good "Test$tnum:dup#" $id $x + incr x + } + error_check_good "Test$tnum:ndups:$str" [expr $x - 1] $ndups + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + incr count + } + close $did + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest$tnum.a: Checking file for correct duplicates" + set dlist "" + for { set i 1 } { $i <= $ndups } {incr i} { + lappend dlist $i + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dup_check $db $txn $t1 $dlist + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Now compare the keys to see if they match the dictionary entries + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + filesort $t1 $t3 + + error_check_good Test$tnum:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + error_check_good db_close [$db close] 0 + set db [eval {berkdb_open} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + puts "\tTest$tnum.b: Checking file for correct duplicates after close" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dup_check $db $txn $t1 $dlist + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Now compare the keys to see if they match the dictionary entries + filesort $t1 $t3 + error_check_good Test$tnum:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test011.tcl b/test/tcl/test011.tcl new file mode 100644 index 00000000..c938f81c --- /dev/null +++ b/test/tcl/test011.tcl @@ -0,0 +1,475 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test011 +# TEST Duplicate test +# TEST Small key/data pairs. +# TEST Test DB_KEYFIRST, DB_KEYLAST, DB_BEFORE and DB_AFTER. +# TEST To test off-page duplicates, run with small pagesize. +# TEST +# TEST Use the first 10,000 entries from the dictionary. +# TEST Insert each with self as key and data; add duplicate records for each. +# TEST Then do some key_first/key_last add_before, add_after operations. +# TEST This does not work for recno +# TEST +# TEST To test if dups work when they fall off the main page, run this with +# TEST a very tiny page size. +proc test011 { method {nentries 10000} {ndups 5} {tnum "011"} args } { + global dlist + global rand_init + source ./include.tcl + + set dlist "" + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test$tnum skipping for btree with compression." + return + } + + if { [is_rbtree $method] == 1 } { + puts "Test$tnum skipping for method $method" + return + } + if { [is_record_based $method] == 1 } { + test011_recno $method $nentries $tnum $args + return + } + if {$ndups < 5} { + set ndups 5 + } + + set args [convert_args $method $args] + set omethod [convert_method $method] + + berkdb srand $rand_init + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + reduce_dups nentries ndups + } + set testdir [get_home $env] + } + + puts -nonewline "Test$tnum: $method $nentries small $ndups dup " + puts "key/data pairs, cursor ops" + + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + + set db [eval {berkdb_open -create \ + -mode 0644} [concat $args "-dup"] {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set did [open $dict] + + set pflags "" + set gflags "" + set txn "" + set count 0 + + # Here is the loop where we put and get each key/data pair + # We will add dups with values 1, 3, ... $ndups. Then we'll add + # 0 and $ndups+1 using keyfirst/keylast. We'll add 2 and 4 using + # add before and add after. + puts "\tTest$tnum.a: put and get duplicate keys." + set i "" + for { set i 1 } { $i <= $ndups } { incr i 2 } { + lappend dlist $i + } + set maxodd $i + while { [gets $did str] != -1 && $count < $nentries } { + for { set i 1 } { $i <= $ndups } { incr i 2 } { + set datastr $i:$str + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn $pflags {$str $datastr}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # Now retrieve all the keys matching this key + set x 1 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + for {set ret [$dbc get "-set" $str ]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get "-next"] } { + if {[llength $ret] == 0} { + break + } + set k [lindex [lindex $ret 0] 0] + if { [string compare $k $str] != 0 } { + break + } + set datastr [lindex [lindex $ret 0] 1] + set d [data_of $datastr] + + error_check_good Test$tnum:put $d $str + set id [ id_of $datastr ] + error_check_good Test$tnum:dup# $id $x + incr x 2 + } + error_check_good Test$tnum:numdups $x $maxodd + error_check_good curs_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + close $did + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest$tnum.b: \ + traverse entire file checking duplicates before close." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dup_check $db $txn $t1 $dlist + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Now compare the keys to see if they match the dictionary entries + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + filesort $t1 $t3 + + error_check_good Test$tnum:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + error_check_good db_close [$db close] 0 + + set db [eval {berkdb_open} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + puts "\tTest$tnum.c: \ + traverse entire file checking duplicates after close." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dup_check $db $txn $t1 $dlist + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Now compare the keys to see if they match the dictionary entries + filesort $t1 $t3 + error_check_good Test$tnum:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + puts "\tTest$tnum.d: Testing key_first functionality" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + add_dup $db $txn $nentries "-keyfirst" 0 0 + set dlist [linsert $dlist 0 0] + dup_check $db $txn $t1 $dlist + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + puts "\tTest$tnum.e: Testing key_last functionality" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + add_dup $db $txn $nentries "-keylast" [expr $maxodd - 1] 0 + lappend dlist [expr $maxodd - 1] + dup_check $db $txn $t1 $dlist + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + puts "\tTest$tnum.f: Testing add_before functionality" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + add_dup $db $txn $nentries "-before" 2 3 + set dlist [linsert $dlist 2 2] + dup_check $db $txn $t1 $dlist + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + puts "\tTest$tnum.g: Testing add_after functionality" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + add_dup $db $txn $nentries "-after" 4 4 + set dlist [linsert $dlist 4 4] + dup_check $db $txn $t1 $dlist + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + error_check_good db_close [$db close] 0 +} + +proc add_dup {db txn nentries flag dataval iter} { + source ./include.tcl + + set dbc [eval {$db cursor} $txn] + set did [open $dict] + set count 0 + while { [gets $did str] != -1 && $count < $nentries } { + set datastr $dataval:$str + set ret [$dbc get "-set" $str] + error_check_bad "cget(SET)" [is_substr $ret Error] 1 + for { set i 1 } { $i < $iter } { incr i } { + set ret [$dbc get "-next"] + error_check_bad "cget(NEXT)" [is_substr $ret Error] 1 + } + + if { [string compare $flag "-before"] == 0 || + [string compare $flag "-after"] == 0 } { + set ret [$dbc put $flag $datastr] + } else { + set ret [$dbc put $flag $str $datastr] + } + error_check_good "$dbc put $flag" $ret 0 + incr count + } + close $did + $dbc close +} + +proc test011_recno { method {nentries 10000} {tnum "011"} largs } { + global dlist + source ./include.tcl + + set largs [convert_args $method $largs] + set omethod [convert_method $method] + set renum [is_rrecno $method] + + puts "Test$tnum: \ + $method ($largs) $nentries test cursor insert functionality" + + # Create the database and open the dictionary + set eindex [lsearch -exact $largs "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set txnenv 0 + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $largs $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append largs " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + + if {$renum == 1} { + append largs " -renumber" + } + set db [eval {berkdb_open \ + -create -mode 0644} $largs {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set did [open $dict] + + set pflags "" + set gflags "" + set txn "" + set count 0 + + # The basic structure of the test is that we pick a random key + # in the database and then add items before, after, ?? it. The + # trickiness is that with RECNO, these are not duplicates, they + # are creating new keys. Therefore, every time we do this, the + # keys assigned to other values change. For this reason, we'll + # keep the database in tcl as a list and insert properly into + # it to verify that the right thing is happening. If we do not + # have renumber set, then the BEFORE and AFTER calls should fail. + + # Seed the database with an initial record + gets $did str + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {1 [chop_data $method $str]}] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good put $ret 0 + set count 1 + + set dlist "NULL $str" + + # Open a cursor + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + puts "\tTest$tnum.a: put and get entries" + while { [gets $did str] != -1 && $count < $nentries } { + # Pick a random key + set key [berkdb random_int 1 $count] + set ret [$dbc get -set $key] + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_good cget:SET:key $k $key + error_check_good \ + cget:SET $d [pad_data $method [lindex $dlist $key]] + + # Current + set ret [$dbc put -current [chop_data $method $str]] + error_check_good cput:$key $ret 0 + set dlist [lreplace $dlist $key $key [pad_data $method $str]] + + # Before + if { [gets $did str] == -1 } { + continue; + } + + if { $renum == 1 } { + set ret [$dbc put \ + -before [chop_data $method $str]] + error_check_good cput:$key:BEFORE $ret $key + set dlist [linsert $dlist $key $str] + incr count + + # After + if { [gets $did str] == -1 } { + continue; + } + set ret [$dbc put \ + -after [chop_data $method $str]] + error_check_good cput:$key:AFTER $ret [expr $key + 1] + set dlist [linsert $dlist [expr $key + 1] $str] + incr count + } + + # Now verify that the keys are in the right place + set i 0 + for {set ret [$dbc get "-set" $key]} \ + {[string length $ret] != 0 && $i < 3} \ + {set ret [$dbc get "-next"] } { + set check_key [expr $key + $i] + + set k [lindex [lindex $ret 0] 0] + error_check_good cget:$key:loop $k $check_key + + set d [lindex [lindex $ret 0] 1] + error_check_good cget:data $d \ + [pad_data $method [lindex $dlist $check_key]] + incr i + } + } + close $did + error_check_good cclose [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Create check key file. + set oid [open $t2 w] + for {set i 1} {$i <= $count} {incr i} { + puts $oid $i + } + close $oid + + puts "\tTest$tnum.b: dump file" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 test011_check + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good Test$tnum:diff($t2,$t1) \ + [filecmp $t2 $t1] 0 + + error_check_good db_close [$db close] 0 + + puts "\tTest$tnum.c: close, open, and dump file" + eval open_and_dump_file $testfile $env $t1 test011_check \ + dump_file_direction "-first" "-next" $largs + error_check_good Test$tnum:diff($t2,$t1) \ + [filecmp $t2 $t1] 0 + + puts "\tTest$tnum.d: close, open, and dump file in reverse direction" + eval open_and_dump_file $testfile $env $t1 test011_check \ + dump_file_direction "-last" "-prev" $largs + + filesort $t1 $t3 -n + error_check_good Test$tnum:diff($t2,$t3) \ + [filecmp $t2 $t3] 0 +} + +proc test011_check { key data } { + global dlist + + error_check_good "get key $key" $data [lindex $dlist $key] +} diff --git a/test/tcl/test012.tcl b/test/tcl/test012.tcl new file mode 100644 index 00000000..cdc616e1 --- /dev/null +++ b/test/tcl/test012.tcl @@ -0,0 +1,138 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test012 +# TEST Large keys/small data +# TEST Same as test003 except use big keys (source files and +# TEST executables) and small data (the file/executable names). +# TEST +# TEST Take the source files and dbtest executable and enter their contents +# TEST as the key with their names as data. After all are entered, retrieve +# TEST all; compare output to original. Close file, reopen, do retrieve and +# TEST re-verify. +proc test012 { method args} { + global names + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_record_based $method] == 1 } { + puts "Test012 skipping for method $method" + return + } + + puts "Test012: $method ($args) filename=data filecontents=key pairs" + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test012.db + set env NULL + } else { + set testfile test012.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + set t4 $testdir/t4 + + cleanup $testdir $env + + set db [eval {berkdb_open \ + -create -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set pflags "" + set gflags "" + set txn "" + + # Here is the loop where we put and get each key/data pair + set file_list [get_file_list] + + puts "\tTest012.a: put/get loop" + set count 0 + foreach f $file_list { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + put_file_as_key $db $txn $pflags $f + + set kd [get_file_as_key $db $txn $gflags $f] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest012.b: dump file" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_binkey_file $db $txn $t1 test012.check + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Now compare the data to see if they match the .o and dbtest files + set oid [open $t2.tmp w] + foreach f $file_list { + puts $oid $f + } + close $oid + filesort $t2.tmp $t2 + fileremove $t2.tmp + filesort $t1 $t3 + + error_check_good Test012:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + # Now, reopen the file and run the last test again. + puts "\tTest012.c: close, open, and dump file" + eval open_and_dump_file $testfile $env $t1 test012.check \ + dump_binkey_file_direction "-first" "-next" $args + + filesort $t1 $t3 + + error_check_good Test012:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + # Now, reopen the file and run the last test again in reverse direction. + puts "\tTest012.d: close, open, and dump file in reverse direction" + eval open_and_dump_file $testfile $env $t1 test012.check\ + dump_binkey_file_direction "-last" "-prev" $args + + filesort $t1 $t3 + + error_check_good Test012:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 +} + +# Check function for test012; key should be file name; data should be contents +proc test012.check { binfile tmpfile } { + source ./include.tcl + + error_check_good Test012:diff($binfile,$tmpfile) \ + [filecmp $binfile $tmpfile] 0 +} diff --git a/test/tcl/test013.tcl b/test/tcl/test013.tcl new file mode 100644 index 00000000..0b16a219 --- /dev/null +++ b/test/tcl/test013.tcl @@ -0,0 +1,239 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test013 +# TEST Partial put test +# TEST Overwrite entire records using partial puts. +# TEST Make sure that NOOVERWRITE flag works. +# TEST +# TEST 1. Insert 10000 keys and retrieve them (equal key/data pairs). +# TEST 2. Attempt to overwrite keys with NO_OVERWRITE set (expect error). +# TEST 3. Actually overwrite each one with its datum reversed. +# TEST +# TEST No partial testing here. +proc test013 { method {nentries 10000} args } { + global errorCode + global errorInfo + global fixed_len + + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test013.db + set env NULL + } else { + set testfile test013.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + puts "Test013: $method ($args) $nentries equal key/data pairs, put test" + + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + + set db [eval {berkdb_open \ + -create -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set did [open $dict] + + set pflags "" + set gflags "" + set txn "" + set count 0 + + if { [is_record_based $method] == 1 } { + set checkfunc test013_recno.check + append gflags " -recno" + global kvals + } else { + set checkfunc test013.check + } + puts "\tTest013.a: put/get loop" + # Here is the loop where we put and get each key/data pair + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + set kvals($key) [pad_data $method $str] + } else { + set key $str + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $str]}] + error_check_good put $ret 0 + + set ret [eval {$db get} $gflags $txn {$key}] + error_check_good \ + get $ret [list [list $key [pad_data $method $str]]] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + close $did + + # Now we will try to overwrite each datum, but set the + # NOOVERWRITE flag. + puts "\tTest013.b: overwrite values with NOOVERWRITE flag." + set did [open $dict] + set count 0 + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn $pflags \ + {-nooverwrite $key [chop_data $method $str]}] + error_check_good put [is_substr $ret "DB_KEYEXIST"] 1 + + # Value should be unchanged. + set ret [eval {$db get} $txn $gflags {$key}] + error_check_good \ + get $ret [list [list $key [pad_data $method $str]]] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + close $did + + # Now we will replace each item with its datum capitalized. + puts "\tTest013.c: overwrite values with capitalized datum" + set did [open $dict] + set count 0 + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + set rstr [string toupper $str] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set r [eval {$db put} \ + $txn $pflags {$key [chop_data $method $rstr]}] + error_check_good put $r 0 + + # Value should be changed. + set ret [eval {$db get} $txn $gflags {$key}] + error_check_good \ + get $ret [list [list $key [pad_data $method $rstr]]] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + close $did + + # Now make sure that everything looks OK + puts "\tTest013.d: check entire file contents" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 $checkfunc + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Now compare the keys to see if they match the dictionary (or ints) + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $nentries} {incr i} { + puts $oid $i + } + close $oid + file rename -force $t1 $t3 + } else { + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + filesort $t1 $t3 + } + + error_check_good \ + Test013:diff($t3,$t2) [filecmp $t3 $t2] 0 + + puts "\tTest013.e: close, open, and dump file" + # Now, reopen the file and run the last test again. + eval open_and_dump_file $testfile $env $t1 $checkfunc \ + dump_file_direction "-first" "-next" $args + + if { [is_record_based $method] == 0 } { + filesort $t1 $t3 + } + + error_check_good \ + Test013:diff($t3,$t2) [filecmp $t3 $t2] 0 + + # Now, reopen the file and run the last test again in the + # reverse direction. + puts "\tTest013.f: close, open, and dump file in reverse direction" + eval open_and_dump_file $testfile $env $t1 $checkfunc \ + dump_file_direction "-last" "-prev" $args + + if { [is_record_based $method] == 0 } { + filesort $t1 $t3 + } + + error_check_good \ + Test013:diff($t3,$t2) [filecmp $t3 $t2] 0 +} + +# Check function for test013; keys and data are identical +proc test013.check { key data } { + error_check_good \ + "key/data mismatch for $key" $data [string toupper $key] +} + +proc test013_recno.check { key data } { + global dict + global kvals + + error_check_good key"$key"_exists [info exists kvals($key)] 1 + error_check_good \ + "data mismatch for $key" $data [string toupper $kvals($key)] +} diff --git a/test/tcl/test014.tcl b/test/tcl/test014.tcl new file mode 100644 index 00000000..65f4adc9 --- /dev/null +++ b/test/tcl/test014.tcl @@ -0,0 +1,252 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test014 +# TEST Exercise partial puts on short data +# TEST Run 5 combinations of numbers of characters to replace, +# TEST and number of times to increase the size by. +# TEST +# TEST Partial put test, small data, replacing with same size. The data set +# TEST consists of the first nentries of the dictionary. We will insert them +# TEST (and retrieve them) as we do in test 1 (equal key/data pairs). Then +# TEST we'll try to perform partial puts of some characters at the beginning, +# TEST some at the end, and some at the middle. +proc test014 { method {nentries 10000} args } { + set fixed 0 + set args [convert_args $method $args] + + if { [is_fixed_length $method] == 1 } { + set fixed 1 + } + + puts "Test014: $method ($args) $nentries equal key/data pairs, put test" + + # flagp indicates whether this is a postpend or a + # normal partial put + set flagp 0 + + eval {test014_body $method $flagp 1 1 $nentries} $args + eval {test014_body $method $flagp 1 4 $nentries} $args + eval {test014_body $method $flagp 2 4 $nentries} $args + eval {test014_body $method $flagp 1 128 $nentries} $args + eval {test014_body $method $flagp 2 16 $nentries} $args + if { $fixed == 0 } { + eval {test014_body $method $flagp 0 1 $nentries} $args + eval {test014_body $method $flagp 0 4 $nentries} $args + eval {test014_body $method $flagp 0 128 $nentries} $args + + # POST-PENDS : + # partial put data after the end of the existent record + # chars: number of empty spaces that will be padded with null + # increase: is the length of the str to be appended (after pad) + # + set flagp 1 + eval {test014_body $method $flagp 1 1 $nentries} $args + eval {test014_body $method $flagp 4 1 $nentries} $args + eval {test014_body $method $flagp 128 1 $nentries} $args + eval {test014_body $method $flagp 1 4 $nentries} $args + eval {test014_body $method $flagp 1 128 $nentries} $args + } + puts "Test014 complete." +} + +proc test014_body { method flagp chars increase {nentries 10000} args } { + source ./include.tcl + + set omethod [convert_method $method] + + if { [is_fixed_length $method] == 1 && $chars != $increase } { + puts "Test014: $method: skipping replace\ + $chars chars with string $increase times larger." + return + } + + if { $flagp == 1} { + puts "Test014: Postpending string of len $increase with \ + gap $chars." + } else { + puts "Test014: Replace $chars chars with string \ + $increase times larger" + } + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test014.db + set env NULL + } else { + set testfile test014.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + + set db [eval {berkdb_open \ + -create -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set gflags "" + set pflags "" + set txn "" + set count 0 + + if { [is_record_based $method] == 1 } { + append gflags " -recno" + } + + puts "\tTest014.a: put/get loop" + # Here is the loop where we put and get each key/data pair + # We will do the initial put and then three Partial Puts + # for the beginning, middle and end of the string. + set did [open $dict] + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + if { $flagp == 1 } { + # this is for postpend only + global dvals + + # initial put + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key $str}] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good dbput $ret 0 + + set offset [string length $str] + + # increase is the actual number of new bytes + # to be postpended (besides the null padding) + set data [repeat "P" $increase] + + # chars is the amount of padding in between + # the old data and the new + set len [expr $offset + $chars + $increase] + set dvals($key) [binary format \ + a[set offset]x[set chars]a[set increase] \ + $str $data] + set offset [expr $offset + $chars] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put -partial [list $offset 0]} \ + $txn {$key $data}] + error_check_good dbput:post $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } else { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + partial_put $method $db $txn \ + $gflags $key $str $chars $increase + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + incr count + } + close $did + + # Now make sure that everything looks OK + puts "\tTest014.b: check entire file contents" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 test014.check + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Now compare the keys to see if they match the dictionary (or ints) + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $nentries} {set i [incr i]} { + puts $oid $i + } + close $oid + file rename -force $t1 $t3 + } else { + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + filesort $t1 $t3 + } + + error_check_good \ + Test014:diff($t3,$t2) [filecmp $t3 $t2] 0 + + puts "\tTest014.c: close, open, and dump file" + # Now, reopen the file and run the last test again. + eval open_and_dump_file $testfile $env \ + $t1 test014.check dump_file_direction "-first" "-next" $args + + if { [string compare $omethod "-recno"] != 0 } { + filesort $t2 $t3 + file rename -force $t3 $t2 + filesort $t1 $t3 + } + + error_check_good \ + Test014:diff($t3,$t2) [filecmp $t3 $t2] 0 + # Now, reopen the file and run the last test again in the + # reverse direction. + puts "\tTest014.d: close, open, and dump file in reverse direction" + eval open_and_dump_file $testfile $env $t1 \ + test014.check dump_file_direction "-last" "-prev" $args + + if { [string compare $omethod "-recno"] != 0 } { + filesort $t2 $t3 + file rename -force $t3 $t2 + filesort $t1 $t3 + } + + error_check_good \ + Test014:diff($t3,$t2) [filecmp $t3 $t2] 0 +} + +# Check function for test014; keys and data are identical +proc test014.check { key data } { + global dvals + + error_check_good key"$key"_exists [info exists dvals($key)] 1 + error_check_good "data mismatch for key $key" $data $dvals($key) +} diff --git a/test/tcl/test015.tcl b/test/tcl/test015.tcl new file mode 100644 index 00000000..0c649c08 --- /dev/null +++ b/test/tcl/test015.tcl @@ -0,0 +1,284 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test015 +# TEST Partial put test +# TEST Partial put test where the key does not initially exist. +proc test015 { method {nentries 7500} { start 0 } args } { + global fixed_len testdir + set orig_tdir $testdir + + set low_range 50 + set mid_range 100 + set high_range 1000 + + if { [is_fixed_length $method] } { + set low_range [expr $fixed_len/2 - 2] + set mid_range [expr $fixed_len/2] + set high_range $fixed_len + } + + set t_table { + { 1 { 1 1 1 } } + { 2 { 1 1 5 } } + { 3 { 1 1 $low_range } } + { 4 { 1 $mid_range 1 } } + { 5 { $mid_range $high_range 5 } } + { 6 { 1 $mid_range $low_range } } + } + + puts "Test015: \ + $method ($args) $nentries equal key/data pairs, partial put test" + test015_init + if { $start == 0 } { + set start { 1 2 3 4 5 6 } + } + if { [is_partitioned $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + foreach entry $t_table { + set this [lindex $entry 0] + if { [lsearch $start $this] == -1 } { + continue + } + puts -nonewline "$this: " + eval [concat test015_body $method [lindex $entry 1] \ + $nentries $args] + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + set testdir [get_home $env] + } + + error_check_good verify \ + [verify_dir $testdir "\tTest015.e:" 0 0 $nodump] 0 + } + set testdir $orig_tdir +} + +proc test015_init { } { + global rand_init + + berkdb srand $rand_init +} + +proc test015_body { method off_low off_hi rcount {nentries 10000} args } { + global dvals + global fixed_len + global testdir + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + set orig_tdir $testdir + set checkfunc test015.check + + if { [is_fixed_length $method] && \ + [string compare $omethod "-recno"] == 0} { + # is fixed recno method + set checkfunc test015.check + } + + puts "Put $rcount strings random offsets between $off_low and $off_hi" + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test015.db + set env NULL + } else { + set testfile test015.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries > 5000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + set retdir $testdir + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + + set db [eval {berkdb_open \ + -create -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set pflags "" + set gflags "" + set txn "" + set count 0 + + puts "\tTest015.a: put/get loop for $nentries entries" + + # Here is the loop where we put and get each key/data pair + # Each put is a partial put of a record that does not exist. + set did [open $dict] + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + if { [string length $str] > $fixed_len } { + continue + } + set key [expr $count + 1] + } else { + set key $str + } + + if { 0 } { + set data [replicate $str $rcount] + set off [ berkdb random_int $off_low $off_hi ] + set offn [expr $off + 1] + if { [is_fixed_length $method] && \ + [expr [string length $data] + $off] >= $fixed_len} { + set data [string range $data 0 [expr $fixed_len-$offn]] + } + set dvals($key) [partial_shift $data $off right] + } else { + set data [chop_data $method [replicate $str $rcount]] + + # This is a hack. In DB we will store the records with + # some padding, but these will get lost if we just return + # them in TCL. As a result, we're going to have to hack + # get to check for 0 padding and return a list consisting + # of the number of 0's and the actual data. + set off [ berkdb random_int $off_low $off_hi ] + + # There is no string concatenation function in Tcl + # (although there is one in TclX), so we have to resort + # to this hack. Ugh. + set slen [string length $data] + if {[is_fixed_length $method] && \ + $slen > $fixed_len - $off} { + set $slen [expr $fixed_len - $off] + } + set a "a" + set dvals($key) [pad_data \ + $method [eval "binary format x$off$a$slen" {$data}]] + } + if {[is_fixed_length $method] && \ + [string length $data] > ($fixed_len - $off)} { + set slen [expr $fixed_len - $off] + set data [eval "binary format a$slen" {$data}] + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn \ + {-partial [list $off [string length $data]] $key $data}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + incr count + } + close $did + + # Now make sure that everything looks OK + puts "\tTest015.b: check entire file contents" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 $checkfunc + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Now compare the keys to see if they match the dictionary (or ints) + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $nentries} {set i [incr i]} { + puts $oid $i + } + close $oid + filesort $t2 $t3 + file rename -force $t3 $t2 + filesort $t1 $t3 + } else { + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + filesort $t1 $t3 + } + + error_check_good Test015:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + puts "\tTest015.c: close, open, and dump file" + # Now, reopen the file and run the last test again. + eval open_and_dump_file $testfile $env $t1 \ + $checkfunc dump_file_direction "-first" "-next" $args + + if { [string compare $omethod "-recno"] != 0 } { + filesort $t1 $t3 + } + + error_check_good Test015:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + # Now, reopen the file and run the last test again in the + # reverse direction. + puts "\tTest015.d: close, open, and dump file in reverse direction" + eval open_and_dump_file $testfile $env $t1 \ + $checkfunc dump_file_direction "-last" "-prev" $args + + if { [string compare $omethod "-recno"] != 0 } { + filesort $t1 $t3 + } + + error_check_good Test015:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + unset dvals + set testdir $orig_tdir +} + +# Check function for test015; keys and data are identical +proc test015.check { key data } { + global dvals + + error_check_good key"$key"_exists [info exists dvals($key)] 1 + binary scan $data "c[string length $data]" a + binary scan $dvals($key) "c[string length $dvals($key)]" b + error_check_good "mismatch on padding for key $key" $a $b +} + +proc test015.fixed.check { key data } { + global dvals + global fixed_len + + error_check_good key"$key"_exists [info exists dvals($key)] 1 + if { [string length $data] > $fixed_len } { + error_check_bad \ + "data length:[string length $data] \ + for fixed:$fixed_len" 1 1 + } + puts "$data : $dvals($key)" + error_check_good compare_data($data,$dvals($key) \ + $dvals($key) $data +} diff --git a/test/tcl/test016.tcl b/test/tcl/test016.tcl new file mode 100644 index 00000000..96915f39 --- /dev/null +++ b/test/tcl/test016.tcl @@ -0,0 +1,206 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test016 +# TEST Partial put test +# TEST Partial put where the datum gets shorter as a result of the put. +# TEST +# TEST Partial put test where partial puts make the record smaller. +# TEST Use the first 10,000 entries from the dictionary. +# TEST Insert each with self as key and a fixed, medium length data string; +# TEST retrieve each. After all are entered, go back and do partial puts, +# TEST replacing a random-length string with the key value. +# TEST Then verify. + +proc test016 { method {nentries 10000} args } { + global datastr + global dvals + global rand_init + source ./include.tcl + + berkdb srand $rand_init + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_fixed_length $method] == 1 } { + puts "Test016: skipping for method $method" + return + } + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test016.db + set env NULL + } else { + set testfile test016.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + puts "Test016: $method ($args) $nentries partial put shorten" + + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + set db [eval {berkdb_open \ + -create -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set pflags "" + set gflags "" + set txn "" + set count 0 + + if { [is_record_based $method] == 1 } { + append gflags " -recno" + } + + # Here is the loop where we put and get each key/data pair + puts "\tTest016.a: put/get loop" + set did [open $dict] + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $datastr]}] + error_check_good put $ret 0 + + set ret [eval {$db get} $txn $gflags {$key}] + error_check_good \ + get $ret [list [list $key [pad_data $method $datastr]]] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + close $did + + # Next we will do a partial put replacement, making the data + # shorter + puts "\tTest016.b: partial put loop" + set did [open $dict] + set count 0 + set len [string length $datastr] + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + + set repl_len [berkdb random_int [string length $key] $len] + set repl_off [berkdb random_int 0 [expr $len - $repl_len] ] + set s1 [string range $datastr 0 [ expr $repl_off - 1] ] + set s2 [string toupper $key] + set s3 [string range $datastr [expr $repl_off + $repl_len] end ] + set dvals($key) [pad_data $method $s1$s2$s3] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {-partial \ + [list $repl_off $repl_len] $key [chop_data $method $s2]}] + error_check_good put $ret 0 + set ret [eval {$db get} $txn $gflags {$key}] + error_check_good \ + put $ret [list [list $key [pad_data $method $s1$s2$s3]]] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + close $did + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest016.c: dump file" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 test016.check + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Now compare the keys to see if they match the dictionary + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $nentries} {set i [incr i]} { + puts $oid $i + } + close $oid + file rename -force $t1 $t3 + } else { + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + filesort $t1 $t3 + } + + error_check_good Test016:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + # Now, reopen the file and run the last test again. + puts "\tTest016.d: close, open, and dump file" + eval open_and_dump_file $testfile $env $t1 test016.check \ + dump_file_direction "-first" "-next" $args + + if { [ is_record_based $method ] == 0 } { + filesort $t1 $t3 + } + error_check_good Test016:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + # Now, reopen the file and run the last test again in reverse direction. + puts "\tTest016.e: close, open, and dump file in reverse direction" + eval open_and_dump_file $testfile $env $t1 test016.check \ + dump_file_direction "-last" "-prev" $args + + if { [ is_record_based $method ] == 0 } { + filesort $t1 $t3 + } + error_check_good Test016:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 +} + +# Check function for test016; data should be whatever is set in dvals +proc test016.check { key data } { + global datastr + global dvals + + error_check_good key"$key"_exists [info exists dvals($key)] 1 + error_check_good "data mismatch for key $key" $data $dvals($key) +} diff --git a/test/tcl/test017.tcl b/test/tcl/test017.tcl new file mode 100644 index 00000000..cfe2cb2f --- /dev/null +++ b/test/tcl/test017.tcl @@ -0,0 +1,321 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test017 +# TEST Basic offpage duplicate test. +# TEST +# TEST Run duplicates with small page size so that we test off page duplicates. +# TEST Then after we have an off-page database, test with overflow pages too. +proc test017 { method {contents 0} {ndups 19} {tnum "017"} args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test$tnum skipping for btree with compression." + return + } + + if { [is_record_based $method] == 1 || [is_rbtree $method] == 1 } { + puts "Test$tnum skipping for method $method" + return + } + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + incr pgindex + if { [lindex $args $pgindex] > 16384 } { + puts "Test$tnum: Skipping for large pagesizes" + return + } + } + + # Create the database and open the dictionary + set limit 0 + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + set limit 100 + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + set t4 $testdir/t4 + + cleanup $testdir $env + + set db [eval {berkdb_open \ + -create -mode 0644 -dup} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set pflags "" + set gflags "" + set txn "" + set count 0 + + set file_list [get_file_list 1] + if { $txnenv == 1 } { + if { [llength $file_list] > $limit } { + set file_list [lrange $file_list 0 $limit] + } + set flen [llength $file_list] + reduce_dups flen ndups + } + puts "Test$tnum: $method ($args) Off page duplicate tests\ + with $ndups duplicates" + + set ovfl "" + # Here is the loop where we put and get each key/data pair + puts -nonewline "\tTest$tnum.a: Creating duplicates with " + if { $contents != 0 } { + puts "file contents as key/data" + } else { + puts "file name as key/data" + } + foreach f $file_list { + if { $contents != 0 } { + set fid [open $f r] + fconfigure $fid -translation binary + # + # Prepend file name to guarantee uniqueness + set filecont [read $fid] + set str $f:$filecont + close $fid + } else { + set str $f + } + for { set i 1 } { $i <= $ndups } { incr i } { + set datastr $i:$str + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$str [chop_data $method $datastr]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # + # Save 10% files for overflow test + # + if { $contents == 0 && [expr $count % 10] == 0 } { + lappend ovfl $f + } + # Now retrieve all the keys matching this key + set ret [$db get $str] + error_check_bad $f:dbget_dups [llength $ret] 0 + error_check_good $f:dbget_dups1 [llength $ret] $ndups + set x 1 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + for {set ret [$dbc get "-set" $str]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get "-next"] } { + set k [lindex [lindex $ret 0] 0] + if { [string compare $k $str] != 0 } { + break + } + set datastr [lindex [lindex $ret 0] 1] + set d [data_of $datastr] + if {[string length $d] == 0} { + break + } + error_check_good "Test$tnum:get" $d $str + set id [ id_of $datastr ] + error_check_good "Test$tnum:$f:dup#" $id $x + incr x + } + error_check_good "Test$tnum:ndups:$str" [expr $x - 1] $ndups + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest$tnum.b: Checking file for correct duplicates" + set dlist "" + for { set i 1 } { $i <= $ndups } {incr i} { + lappend dlist $i + } + set oid [open $t2.tmp w] + set o1id [open $t4.tmp w] + foreach f $file_list { + for {set i 1} {$i <= $ndups} {incr i} { + puts $o1id $f + } + puts $oid $f + } + close $oid + close $o1id + filesort $t2.tmp $t2 + filesort $t4.tmp $t4 + fileremove $t2.tmp + fileremove $t4.tmp + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dup_check $db $txn $t1 $dlist + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + if {$contents == 0} { + filesort $t1 $t3 + + error_check_good Test$tnum:diff($t3,$t2) [filecmp $t3 $t2] 0 + + # Now compare the keys to see if they match the file names + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 test017.check + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + filesort $t1 $t3 + + error_check_good Test$tnum:diff($t3,$t4) [filecmp $t3 $t4] 0 + } + + error_check_good db_close [$db close] 0 + set db [eval {berkdb_open} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + puts "\tTest$tnum.c: Checking file for correct duplicates after close" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dup_check $db $txn $t1 $dlist + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + if {$contents == 0} { + # Now compare the keys to see if they match the filenames + filesort $t1 $t3 + error_check_good Test$tnum:diff($t3,$t2) [filecmp $t3 $t2] 0 + } + error_check_good db_close [$db close] 0 + + puts "\tTest$tnum.d: Verify off page duplicates and overflow status" + set db [eval {berkdb_open} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + set stat [$db stat] + if { [is_btree $method] } { + error_check_bad stat:offpage \ + [is_substr $stat "{{Internal pages} 0}"] 1 + } + if {$contents == 0} { + # This check doesn't work in hash, since overflow + # pages count extra pages in buckets as well as true + # P_OVERFLOW pages. + if { [is_hash $method] == 0 } { + error_check_good overflow \ + [is_substr $stat "{{Overflow pages} 0}"] 1 + } + } else { + if { [is_hash $method] } { + error_check_bad overflow \ + [is_substr $stat "{{Number of big pages} 0}"] 1 + } else { + error_check_bad overflow \ + [is_substr $stat "{{Overflow pages} 0}"] 1 + } + } + + # + # If doing overflow test, do that now. Else we are done. + # Add overflow pages by adding a large entry to a duplicate. + # + if { [llength $ovfl] == 0} { + error_check_good db_close [$db close] 0 + return + } + + puts "\tTest$tnum.e: Add overflow duplicate entries" + set ovfldup [expr $ndups + 1] + foreach f $ovfl { + # + # This is just like put_file, but prepends the dup number + # + set fid [open $f r] + fconfigure $fid -translation binary + set fdata [read $fid] + close $fid + set data $ovfldup:$fdata:$fdata:$fdata:$fdata + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn $pflags {$f $data}] + error_check_good ovfl_put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + puts "\tTest$tnum.f: Verify overflow duplicate entries" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dup_check $db $txn $t1 $dlist $ovfldup + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + filesort $t1 $t3 + error_check_good Test$tnum:diff($t3,$t2) [filecmp $t3 $t2] 0 + + set stat [$db stat] + if { [is_hash [$db get_type]] } { + error_check_bad overflow1_hash [is_substr $stat \ + "{{Number of big pages} 0}"] 1 + } else { + error_check_bad \ + overflow1 [is_substr $stat "{{Overflow pages} 0}"] 1 + } + error_check_good db_close [$db close] 0 +} + +# Check function; verify data contains key +proc test017.check { key data } { + error_check_good "data mismatch for key $key" $key [data_of $data] +} diff --git a/test/tcl/test018.tcl b/test/tcl/test018.tcl new file mode 100644 index 00000000..fc59a1c4 --- /dev/null +++ b/test/tcl/test018.tcl @@ -0,0 +1,20 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test018 +# TEST Offpage duplicate test +# TEST Key_{first,last,before,after} offpage duplicates. +# TEST Run duplicates with small page size so that we test off page +# TEST duplicates. +proc test018 { method {nentries 10000} args} { + puts "Test018: Off page duplicate tests" + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test018: Skipping for specific pagesizes" + return + } + eval {test011 $method $nentries 19 "018" -pagesize 512} $args +} diff --git a/test/tcl/test019.tcl b/test/tcl/test019.tcl new file mode 100644 index 00000000..2ec2d036 --- /dev/null +++ b/test/tcl/test019.tcl @@ -0,0 +1,135 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test019 +# TEST Partial get test. +proc test019 { method {nentries 10000} args } { + global fixed_len + global rand_init + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test019.db + set env NULL + } else { + set testfile test019.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + puts "Test019: $method ($args) $nentries partial get test" + + cleanup $testdir $env + + set db [eval {berkdb_open \ + -create -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + berkdb srand $rand_init + + set pflags "" + set gflags "" + set txn "" + set count 0 + + if { [is_record_based $method] == 1 } { + append gflags " -recno" + } + + puts "\tTest019.a: put/get loop" + for { set i 0 } { [gets $did str] != -1 && $i < $nentries } \ + { incr i } { + + if { [is_record_based $method] == 1 } { + set key [expr $i + 1] + } else { + set key $str + } + set repl [berkdb random_int $fixed_len 100] + set data [chop_data $method [replicate $str $repl]] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {-nooverwrite $key $data}] + error_check_good dbput:$key $ret 0 + + set ret [eval {$db get} $txn $gflags {$key}] + error_check_good \ + dbget:$key $ret [list [list $key [pad_data $method $data]]] + set kvals($key) $repl + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + close $did + + puts "\tTest019.b: partial get loop" + set did [open $dict] + for { set i 0 } { [gets $did str] != -1 && $i < $nentries } \ + { incr i } { + if { [is_record_based $method] == 1 } { + set key [expr $i + 1] + } else { + set key $str + } + set data [pad_data $method [replicate $str $kvals($key)]] + + set maxndx [expr [string length $data] - 1] + + if { $maxndx > 0 } { + set beg [berkdb random_int 0 [expr $maxndx - 1]] + set len [berkdb random_int 0 [expr $maxndx * 2]] + } else { + set beg 0 + set len 0 + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db get} \ + $txn {-partial [list $beg $len]} $gflags {$key}] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # In order for tcl to handle this, we have to overwrite the + # last character with a NULL. That makes the length one less + # than we expect. + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_good dbget_key $k $key + + error_check_good dbget_data $d \ + [string range $data $beg [expr $beg + $len - 1]] + + } + error_check_good db_close [$db close] 0 + close $did +} diff --git a/test/tcl/test020.tcl b/test/tcl/test020.tcl new file mode 100644 index 00000000..35a72374 --- /dev/null +++ b/test/tcl/test020.tcl @@ -0,0 +1,141 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test020 +# TEST In-Memory database tests. +proc test020 { method {nentries 10000} args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + if { [is_queueext $method] == 1 || \ + [is_rbtree $method] == 1 } { + puts "Test020 skipping for method $method" + return + } + + if { [is_partitioned $args] == 1 } { + puts "Test020 skipping for partitioned $method" + return + } + # Create the database and open the dictionary + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # Check if we are using an env. + if { $eindex == -1 } { + set env NULL + } else { + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + puts "Test020: $method ($args) $nentries equal key/data pairs" + + cleanup $testdir $env + set db [eval {berkdb_open \ + -create -mode 0644} $args {$omethod}] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + + set pflags "" + set gflags "" + set txn "" + set count 0 + + if { [is_record_based $method] == 1 } { + set checkfunc test020_recno.check + append gflags " -recno" + } else { + set checkfunc test020.check + } + puts "\tTest020.a: put/get loop" + # Here is the loop where we put and get each key/data pair + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1] + set kvals($key) [pad_data $method $str] + } else { + set key $str + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $str]}] + error_check_good put $ret 0 + set ret [eval {$db get} $txn $gflags {$key}] + error_check_good \ + get $ret [list [list $key [pad_data $method $str]]] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + close $did + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest020.b: dump file" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 $checkfunc + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Now compare the keys to see if they match the dictionary (or ints) + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $nentries} {set i [incr i]} { + puts $oid $i + } + close $oid + file rename -force $t1 $t3 + } else { + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + filesort $t1 $t3 + } + + error_check_good Test020:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 +} + +# Check function for test020; keys and data are identical +proc test020.check { key data } { + error_check_good "key/data mismatch" $data $key +} + +proc test020_recno.check { key data } { + global dict + global kvals + + error_check_good key"$key"_exists [info exists kvals($key)] 1 + error_check_good "data mismatch: key $key" $data $kvals($key) +} diff --git a/test/tcl/test021.tcl b/test/tcl/test021.tcl new file mode 100644 index 00000000..51a3fe10 --- /dev/null +++ b/test/tcl/test021.tcl @@ -0,0 +1,161 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test021 +# TEST Btree range tests. +# TEST +# TEST Use the first 10,000 entries from the dictionary. +# TEST Insert each with self, reversed as key and self as data. +# TEST After all are entered, retrieve each using a cursor SET_RANGE, and +# TEST getting about 20 keys sequentially after it (in some cases we'll +# TEST run out towards the end of the file). +proc test021 { method {nentries 10000} args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test021.db + set env NULL + } else { + set testfile test021.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + puts "Test021: $method ($args) $nentries equal key/data pairs" + + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + set db [eval {berkdb_open \ + -create -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set did [open $dict] + + set pflags "" + set gflags "" + set txn "" + set count 0 + + if { [is_record_based $method] == 1 } { + set checkfunc test021_recno.check + append gflags " -recno" + } else { + set checkfunc test021.check + } + puts "\tTest021.a: put loop" + # Here is the loop where we put each key/data pair + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1] + set kvals($key) [pad_data $method $str] + } else { + set key [reverse $str] + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set r [eval {$db put} \ + $txn $pflags {$key [chop_data $method $str]}] + error_check_good db_put $r 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + close $did + + # Now we will get each key from the DB and retrieve about 20 + # records after it. + error_check_good db_close [$db close] 0 + + puts "\tTest021.b: test ranges" + set db [eval {berkdb_open -rdonly} $args $omethod $testfile ] + error_check_good dbopen [is_valid_db $db] TRUE + + # Open a cursor + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_substr $dbc $db] 1 + + set did [open $dict] + set i 0 + while { [gets $did str] != -1 && $i < $count } { + if { [is_record_based $method] == 1 } { + set key [expr $i + 1] + } else { + set key [reverse $str] + } + + set r [$dbc get -set_range $key] + error_check_bad dbc_get:$key [string length $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + $checkfunc $k $d + + for { set nrecs 0 } { $nrecs < 20 } { incr nrecs } { + set r [$dbc get "-next"] + # no error checking because we may run off the end + # of the database + if { [llength $r] == 0 } { + continue; + } + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + $checkfunc $k $d + } + incr i + } + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + close $did +} + +# Check function for test021; keys and data are reversed +proc test021.check { key data } { + error_check_good "key/data mismatch for $key" $data [reverse $key] +} + +proc test021_recno.check { key data } { + global dict + global kvals + + error_check_good key"$key"_exists [info exists kvals($key)] 1 + error_check_good "data mismatch: key $key" $data $kvals($key) +} diff --git a/test/tcl/test022.tcl b/test/tcl/test022.tcl new file mode 100644 index 00000000..4c9fb5e0 --- /dev/null +++ b/test/tcl/test022.tcl @@ -0,0 +1,61 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test022 +# TEST Test of DB->getbyteswapped(). +proc test022 { method args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test022 ($args) $omethod: DB->getbyteswapped()" + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile1 "$testdir/test022a.db" + set testfile2 "$testdir/test022b.db" + set env NULL + } else { + set testfile1 "test022a.db" + set testfile2 "test022b.db" + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + # Create two databases, one in each byte order. + set db1 [eval {berkdb_open -create \ + -mode 0644} $omethod $args {-lorder 1234} $testfile1] + error_check_good db1_open [is_valid_db $db1] TRUE + + set db2 [eval {berkdb_open -create \ + -mode 0644} $omethod $args {-lorder 4321} $testfile2] + error_check_good db2_open [is_valid_db $db2] TRUE + + # Call DB->get_byteswapped on both of them. + set db1_order [$db1 is_byteswapped] + set db2_order [$db2 is_byteswapped] + + # Make sure that both answers are either 1 or 0, + # and that exactly one of them is 1. + error_check_good is_byteswapped_sensible_1 \ + [expr ($db1_order == 1 && $db2_order == 0) || \ + ($db1_order == 0 && $db2_order == 1)] 1 + + error_check_good db1_close [$db1 close] 0 + error_check_good db2_close [$db2 close] 0 + puts "\tTest022 complete." +} diff --git a/test/tcl/test023.tcl b/test/tcl/test023.tcl new file mode 100644 index 00000000..2b8ff35b --- /dev/null +++ b/test/tcl/test023.tcl @@ -0,0 +1,225 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test023 +# TEST Duplicate test +# TEST Exercise deletes and cursor operations within a duplicate set. +# TEST Add a key with duplicates (first time on-page, second time off-page) +# TEST Number the dups. +# TEST Delete dups and make sure that CURRENT/NEXT/PREV work correctly. +proc test023 { method args } { + global alphabet + global dupnum + global dupstr + global errorInfo + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test023 skipping for btree with compression." + return + } + + puts "Test023: $method delete duplicates/check cursor operations" + if { [is_record_based $method] == 1 || \ + [is_rbtree $method] == 1 } { + puts "Test023: skipping for method $omethod" + return + } + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test023.db + set env NULL + } else { + set testfile test023.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + cleanup $testdir $env + set db [eval {berkdb_open \ + -create -mode 0644 -dup} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set pflags "" + set gflags "" + set txn "" + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + foreach i { onpage offpage } { + if { $i == "onpage" } { + set dupstr DUP + } else { + set dupstr [repeat $alphabet 50] + } + puts "\tTest023.a: Insert key w/$i dups" + set key "duplicate_val_test" + for { set count 0 } { $count < 20 } { incr count } { + set ret \ + [eval {$db put} $txn $pflags {$key $count$dupstr}] + error_check_good db_put $ret 0 + } + + # Now let's get all the items and make sure they look OK. + puts "\tTest023.b: Check initial duplicates" + set dupnum 0 + dump_file $db $txn $t1 test023.check + + # Delete a couple of random items (FIRST, LAST one in middle) + # Make sure that current returns an error and that NEXT and + # PREV do the right things. + + set ret [$dbc get -set $key] + error_check_bad dbc_get:SET [llength $ret] 0 + + puts "\tTest023.c: Delete first and try gets" + # This should be the first duplicate + error_check_good \ + dbc_get:SET $ret [list [list duplicate_val_test 0$dupstr]] + + # Now delete it. + set ret [$dbc del] + error_check_good dbc_del:FIRST $ret 0 + + # Now current should fail + set ret [$dbc get -current] + error_check_good dbc_get:CURRENT $ret "" + + # Now Prev should fail + set ret [$dbc get -prev] + error_check_good dbc_get:prev0 [llength $ret] 0 + + # Now 10 nexts should work to get us in the middle + for { set j 1 } { $j <= 10 } { incr j } { + set ret [$dbc get -next] + error_check_good \ + dbc_get:next [llength [lindex $ret 0]] 2 + error_check_good \ + dbc_get:next [lindex [lindex $ret 0] 1] $j$dupstr + } + + puts "\tTest023.d: Delete middle and try gets" + # Now do the delete on the current key. + set ret [$dbc del] + error_check_good dbc_del:10 $ret 0 + + # Now current should fail + set ret [$dbc get -current] + error_check_good dbc_get:deleted $ret "" + + # Prev and Next should work + set ret [$dbc get -next] + error_check_good dbc_get:next [llength [lindex $ret 0]] 2 + error_check_good \ + dbc_get:next [lindex [lindex $ret 0] 1] 11$dupstr + + set ret [$dbc get -prev] + error_check_good dbc_get:next [llength [lindex $ret 0]] 2 + error_check_good \ + dbc_get:next [lindex [lindex $ret 0] 1] 9$dupstr + + # Now go to the last one + for { set j 11 } { $j <= 19 } { incr j } { + set ret [$dbc get -next] + error_check_good \ + dbc_get:next [llength [lindex $ret 0]] 2 + error_check_good \ + dbc_get:next [lindex [lindex $ret 0] 1] $j$dupstr + } + + puts "\tTest023.e: Delete last and try gets" + # Now do the delete on the current key. + set ret [$dbc del] + error_check_good dbc_del:LAST $ret 0 + + # Now current should fail + set ret [$dbc get -current] + error_check_good dbc_get:deleted $ret "" + + # Next should fail + set ret [$dbc get -next] + error_check_good dbc_get:next19 [llength $ret] 0 + + # Prev should work + set ret [$dbc get -prev] + error_check_good dbc_get:next [llength [lindex $ret 0]] 2 + error_check_good \ + dbc_get:next [lindex [lindex $ret 0] 1] 18$dupstr + + # Now overwrite the current one, then count the number + # of data items to make sure that we have the right number. + + puts "\tTest023.f: Count keys, overwrite current, count again" + # At this point we should have 17 keys the (initial 20 minus + # 3 deletes) + set dbc2 [eval {$db cursor} $txn] + error_check_good db_cursor:2 [is_substr $dbc2 $db] 1 + + set count_check 0 + for { set rec [$dbc2 get -first] } { + [llength $rec] != 0 } { set rec [$dbc2 get -next] } { + incr count_check + } + error_check_good numdups $count_check 17 + + set ret [$dbc put -current OVERWRITE] + error_check_good dbc_put:current $ret 0 + + set count_check 0 + for { set rec [$dbc2 get -first] } { + [llength $rec] != 0 } { set rec [$dbc2 get -next] } { + incr count_check + } + error_check_good numdups $count_check 17 + error_check_good dbc2_close [$dbc2 close] 0 + + # Done, delete all the keys for next iteration + set ret [eval {$db del} $txn {$key}] + error_check_good db_delete $ret 0 + + # database should be empty + + set ret [$dbc get -first] + error_check_good first_after_empty [llength $ret] 0 + } + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + +} + +# Check function for test023; keys and data are identical +proc test023.check { key data } { + global dupnum + global dupstr + error_check_good "bad key" $key duplicate_val_test + error_check_good "data mismatch for $key" $data $dupnum$dupstr + incr dupnum +} diff --git a/test/tcl/test024.tcl b/test/tcl/test024.tcl new file mode 100644 index 00000000..83e4f91a --- /dev/null +++ b/test/tcl/test024.tcl @@ -0,0 +1,280 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test024 +# TEST Record number retrieval test. +# TEST Test the Btree and Record number get-by-number functionality. +proc test024 { method {nentries 10000} args} { + source ./include.tcl + global rand_init + + set do_renumber [is_rrecno $method] + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test024: $method ($args)" + + # Btree with compression does not support -recnum. + if { [is_compressed $args] == 1 } { + puts "Test024 skipping for compressed btree with -recnum." + return + } + if { [string compare $omethod "-hash"] == 0 } { + puts "Test024 skipping for method HASH" + return + } + if { [string compare $omethod "-heap"] == 0 } { + puts "Test024 skipping for method HEAP" + return + } + if { [is_partitioned $args] } { + puts "Test024 skipping for partitioned $omethod" + return + } + + berkdb srand $rand_init + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test024.db + set env NULL + } else { + set testfile test024.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + + cleanup $testdir $env + + # Read the first nentries dictionary elements and reverse them. + # Keep a list of these (these will be the keys). + puts "\tTest024.a: initialization" + set keys "" + set did [open $dict] + set count 0 + while { [gets $did str] != -1 && $count < $nentries } { + lappend keys [reverse $str] + incr count + } + close $did + + # Generate sorted order for the keys + set sorted_keys [lsort $keys] + # Create the database + if { [string compare $omethod "-btree"] == 0 } { + set db [eval {berkdb_open -create \ + -mode 0644 -recnum} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + } else { + set db [eval {berkdb_open -create \ + -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + } + + set pflags "" + set gflags "" + set txn "" + + if { [is_record_based $method] == 1 } { + set gflags " -recno" + } + + puts "\tTest024.b: put/get loop" + foreach k $keys { + if { [is_record_based $method] == 1 } { + set key [lsearch $sorted_keys $k] + incr key + } else { + set key $k + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $k]}] + error_check_good put $ret 0 + set ret [eval {$db get} $txn $gflags {$key}] + error_check_good \ + get $ret [list [list $key [pad_data $method $k]]] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest024.c: dump file" + + # Put sorted keys in file + set oid [open $t1 w] + foreach k $sorted_keys { + puts $oid [pad_data $method $k] + } + close $oid + + # Instead of using dump_file; get all the keys by keynum + set oid [open $t2 w] + if { [string compare $omethod "-btree"] == 0 } { + set do_renumber 1 + } + + set gflags " -recno" + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set k 1 } { $k <= $count } { incr k } { + set ret [eval {$db get} $txn $gflags {$k}] + puts $oid [lindex [lindex $ret 0] 1] + error_check_good recnum_get [lindex [lindex $ret 0] 1] \ + [pad_data $method [lindex $sorted_keys [expr $k - 1]]] + } + close $oid + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + error_check_good Test024.c:diff($t1,$t2) \ + [filecmp $t1 $t2] 0 + + # Now, reopen the file and run the last test again. + puts "\tTest024.d: close, open, and dump file" + set db [eval {berkdb_open -rdonly} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + set oid [open $t2 w] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set k 1 } { $k <= $count } { incr k } { + set ret [eval {$db get} $txn $gflags {$k}] + puts $oid [lindex [lindex $ret 0] 1] + error_check_good recnum_get [lindex [lindex $ret 0] 1] \ + [pad_data $method [lindex $sorted_keys [expr $k - 1]]] + } + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + close $oid + error_check_good db_close [$db close] 0 + error_check_good Test024.d:diff($t1,$t2) \ + [filecmp $t1 $t2] 0 + + # Now, reopen the file and run the last test again in reverse direction. + puts "\tTest024.e: close, open, and dump file in reverse direction" + set db [eval {berkdb_open -rdonly} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + # Put sorted keys in file + set rsorted "" + foreach k $sorted_keys { + set rsorted [linsert $rsorted 0 $k] + } + set oid [open $t1 w] + foreach k $rsorted { + puts $oid [pad_data $method $k] + } + close $oid + + set oid [open $t2 w] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set k $count } { $k > 0 } { incr k -1 } { + set ret [eval {$db get} $txn $gflags {$k}] + puts $oid [lindex [lindex $ret 0] 1] + error_check_good recnum_get [lindex [lindex $ret 0] 1] \ + [pad_data $method [lindex $sorted_keys [expr $k - 1]]] + } + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + close $oid + error_check_good db_close [$db close] 0 + error_check_good Test024.e:diff($t1,$t2) \ + [filecmp $t1 $t2] 0 + + # Now try deleting elements and making sure they work + puts "\tTest024.f: delete test" + set db [eval {berkdb_open} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + while { $count > 0 } { + set kndx [berkdb random_int 1 $count] + set kval [lindex $keys [expr $kndx - 1]] + set recno [expr [lsearch $sorted_keys $kval] + 1] + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if { [is_record_based $method] == 1 } { + set ret [eval {$db del} $txn {$recno}] + } else { + set ret [eval {$db del} $txn {$kval}] + } + error_check_good delete $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Remove the key from the key list + set ndx [expr $kndx - 1] + set keys [lreplace $keys $ndx $ndx] + + if { $do_renumber == 1 } { + set r [expr $recno - 1] + set sorted_keys [lreplace $sorted_keys $r $r] + } + + # Check that the keys after it have been renumbered + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if { $do_renumber == 1 && $recno != $count } { + set r [expr $recno - 1] + set ret [eval {$db get} $txn $gflags {$recno}] + error_check_good get_after_del \ + [lindex [lindex $ret 0] 1] [lindex $sorted_keys $r] + } + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Decrement count + incr count -1 + } + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test025.tcl b/test/tcl/test025.tcl new file mode 100644 index 00000000..8aa04afc --- /dev/null +++ b/test/tcl/test025.tcl @@ -0,0 +1,145 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test025 +# TEST DB_APPEND flag test. +proc test025 { method {nentries 10000} {start 0 } {tnum "025"} args} { + global kvals + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + puts "Test$tnum: $method ($args)" + + if { [string compare $omethod "-btree"] == 0 } { + puts "Test$tnum skipping for method BTREE" + return + } + if { [string compare $omethod "-hash"] == 0 } { + puts "Test$tnum skipping for method HASH" + return + } + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + + cleanup $testdir $env + set db [eval {berkdb_open \ + -create -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + + puts "\tTest$tnum.a: put/get loop" + set gflags " -recno" + set pflags " -append" + set txn "" + set checkfunc test025_check + + # Here is the loop where we put and get each key/data pair + set count $start + set nentries [expr $start + $nentries] + if { $count != 0 } { + gets $did str + set k [expr $count + 1] + set kvals($k) [pad_data $method $str] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$k [chop_data $method $str]}] + error_check_good db_put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + + while { [gets $did str] != -1 && $count < $nentries } { + set k [expr $count + 1] + set kvals($k) [pad_data $method $str] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn $pflags {[chop_data $method $str]}] + error_check_good db_put $ret $k + + set ret [eval {$db get} $txn $gflags {$k}] + error_check_good \ + get $ret [list [list $k [pad_data $method $str]]] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # The recno key will be count + 1, so when we hit + # UINT32_MAX - 1, reset to 0. + if { $count == [expr 0xfffffffe] } { + set count 0 + } else { + incr count + } + } + close $did + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest$tnum.b: dump file" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 $checkfunc + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + puts "\tTest$tnum.c: close, open, and dump file" + # Now, reopen the file and run the last test again. + eval open_and_dump_file $testfile $env $t1 $checkfunc \ + dump_file_direction -first -next $args + + # Now, reopen the file and run the last test again in the + # reverse direction. + puts "\tTest$tnum.d: close, open, and dump file in reverse direction" + eval open_and_dump_file $testfile $env $t1 $checkfunc \ + dump_file_direction -last -prev $args +} + +proc test025_check { key data } { + global kvals + + error_check_good key"$key"_exists [info exists kvals($key)] 1 + error_check_good " key/data mismatch for |$key|" $data $kvals($key) +} diff --git a/test/tcl/test026.tcl b/test/tcl/test026.tcl new file mode 100644 index 00000000..f028eacf --- /dev/null +++ b/test/tcl/test026.tcl @@ -0,0 +1,159 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test026 +# TEST Small keys/medium data w/duplicates +# TEST Put/get per key. +# TEST Loop through keys -- delete each key +# TEST ... test that cursors delete duplicates correctly +# TEST +# TEST Keyed delete test through cursor. If ndups is small; this will +# TEST test on-page dups; if it's large, it will test off-page dups. +proc test026 { method {nentries 2000} {ndups 5} {tnum "026"} args} { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test$tnum skipping for btree with compression." + return + } + if { [is_record_based $method] == 1 || \ + [is_rbtree $method] == 1 } { + puts "Test$tnum skipping for method $method" + return + } + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the defaults down a bit. + # If we are wanting a lot of dups, set that + # down a bit or repl testing takes very long. + # + if { $nentries == 2000 } { + set nentries 100 + } + reduce_dups nentries ndups + } + set testdir [get_home $env] + } + cleanup $testdir $env + puts "Test$tnum: $method ($args) $nentries keys\ + with $ndups dups; cursor delete test" + + set pflags "" + set gflags "" + set txn "" + set count 0 + + # Here is the loop where we put and get each key/data pair + + puts "\tTest$tnum.a: Put loop" + set db [eval {berkdb_open -create \ + -mode 0644} $args {$omethod -dup $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + while { [gets $did str] != -1 && $count < [expr $nentries * $ndups] } { + set datastr [ make_data_str $str ] + for { set j 1 } { $j <= $ndups} {incr j} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$str [chop_data $method $j$datastr]}] + error_check_good db_put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + } + close $did + + error_check_good db_close [$db close] 0 + set db [eval {berkdb_open} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Now we will sequentially traverse the database getting each + # item and deleting it. + set count 0 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_substr $dbc $db] 1 + + puts "\tTest$tnum.b: Get/delete loop" + set i 1 + for { set ret [$dbc get -first] } { + [string length $ret] != 0 } { + set ret [$dbc get -next] } { + + set key [lindex [lindex $ret 0] 0] + set data [lindex [lindex $ret 0] 1] + if { $i == 1 } { + set curkey $key + } + error_check_good seq_get:key $key $curkey + error_check_good \ + seq_get:data $data [pad_data $method $i[make_data_str $key]] + + if { $i == $ndups } { + set i 1 + } else { + incr i + } + + # Now delete the key + set ret [$dbc del] + error_check_good db_del:$key $ret 0 + } + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + puts "\tTest$tnum.c: Verify empty file" + # Double check that file is now empty + set db [eval {berkdb_open} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_substr $dbc $db] 1 + set ret [$dbc get -first] + error_check_good get_on_empty [string length $ret] 0 + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test027.tcl b/test/tcl/test027.tcl new file mode 100644 index 00000000..2e19eaa7 --- /dev/null +++ b/test/tcl/test027.tcl @@ -0,0 +1,16 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test027 +# TEST Off-page duplicate test +# TEST Test026 with parameters to force off-page duplicates. +# TEST +# TEST Check that delete operations work. Create a database; close +# TEST database and reopen it. Then issues delete by key for each +# TEST entry. +proc test027 { method {nentries 100} args} { + eval {test026 $method $nentries 100 "027"} $args +} diff --git a/test/tcl/test028.tcl b/test/tcl/test028.tcl new file mode 100644 index 00000000..1b6ff92c --- /dev/null +++ b/test/tcl/test028.tcl @@ -0,0 +1,224 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test028 +# TEST Cursor delete test +# TEST Test put operations after deleting through a cursor. +proc test028 { method args } { + global dupnum + global dupstr + global alphabet + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test028: $method put after cursor delete test" + + if { [is_rbtree $method] == 1 } { + puts "Test028 skipping for method $method" + return + } + if { [is_record_based $method] == 1 } { + set key 10 + } else { + set key "put_after_cursor_del" + if { [is_compressed $args] == 0 } { + append args " -dup" + } + } + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test028.db + set env NULL + } else { + set testfile test028.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + cleanup $testdir $env + set db [eval {berkdb_open \ + -create -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set ndups 20 + set txn "" + set pflags "" + set gflags "" + + if { [is_record_based $method] == 1 } { + set gflags " -recno" + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_substr $dbc $db] 1 + + foreach i { offpage onpage } { + foreach b { bigitem smallitem } { + if { $i == "onpage" } { + if { $b == "bigitem" } { + set dupstr [repeat $alphabet 100] + } else { + set dupstr DUP + } + } else { + if { $b == "bigitem" } { + set dupstr [repeat $alphabet 100] + } else { + set dupstr [repeat $alphabet 50] + } + } + + if { $b == "bigitem" } { + set dupstr [repeat $dupstr 10] + } + puts "\tTest028: $i/$b" + + puts "\tTest028.a: Insert key with single data item" + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $dupstr]}] + error_check_good db_put $ret 0 + + # Now let's get the item and make sure its OK. + puts "\tTest028.b: Check initial entry" + set ret [eval {$db get} $txn $gflags {$key}] + error_check_good db_get \ + $ret [list [list $key [pad_data $method $dupstr]]] + + # Now try a put with NOOVERWRITE SET (should be error) + puts "\tTest028.c: No_overwrite test" + set ret [eval {$db put} $txn \ + {-nooverwrite $key [chop_data $method $dupstr]}] + error_check_good \ + db_put [is_substr $ret "DB_KEYEXIST"] 1 + + # Now delete the item with a cursor + puts "\tTest028.d: Delete test" + set ret [$dbc get -set $key] + error_check_bad dbc_get:SET [llength $ret] 0 + + set ret [$dbc del] + error_check_good dbc_del $ret 0 + + puts "\tTest028.e: Reput the item" + set ret [eval {$db put} $txn \ + {-nooverwrite $key [chop_data $method $dupstr]}] + error_check_good db_put $ret 0 + + puts "\tTest028.f: Retrieve the item" + set ret [eval {$db get} $txn $gflags {$key}] + error_check_good db_get $ret \ + [list [list $key [pad_data $method $dupstr]]] + + # Delete the key to set up for next test + set ret [eval {$db del} $txn {$key}] + error_check_good db_del $ret 0 + + # Now repeat the above set of tests with + # duplicates (if not RECNO). + if { [is_record_based $method] == 1 ||\ + [is_compressed $args] == 1 } { + continue; + } + + puts "\tTest028.g: Insert key with duplicates" + for { set count 0 } { $count < $ndups } { incr count } { + set ret [eval {$db put} $txn \ + {$key [chop_data $method $count$dupstr]}] + error_check_good db_put $ret 0 + } + + puts "\tTest028.h: Check dups" + set dupnum 0 + dump_file $db $txn $t1 test028.check + + # Try no_overwrite + puts "\tTest028.i: No_overwrite test" + set ret [eval {$db put} \ + $txn {-nooverwrite $key $dupstr}] + error_check_good \ + db_put [is_substr $ret "DB_KEYEXIST"] 1 + + # Now delete all the elements with a cursor + puts "\tTest028.j: Cursor Deletes" + set count 0 + for { set ret [$dbc get -set $key] } { + [string length $ret] != 0 } { + set ret [$dbc get -next] } { + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_good db_seq(key) $k $key + error_check_good db_seq(data) $d $count$dupstr + set ret [$dbc del] + error_check_good dbc_del $ret 0 + incr count + if { $count == [expr $ndups - 1] } { + puts "\tTest028.k:\ + Duplicate No_Overwrite test" + set ret [eval {$db put} $txn \ + {-nooverwrite $key $dupstr}] + error_check_good db_put [is_substr \ + $ret "DB_KEYEXIST"] 1 + } + } + + # Make sure all the items are gone + puts "\tTest028.l: Get after delete" + set ret [$dbc get -set $key] + error_check_good get_after_del [string length $ret] 0 + + puts "\tTest028.m: Reput the item" + set ret [eval {$db put} \ + $txn {-nooverwrite $key 0$dupstr}] + error_check_good db_put $ret 0 + for { set count 1 } { $count < $ndups } { incr count } { + set ret [eval {$db put} $txn \ + {$key $count$dupstr}] + error_check_good db_put $ret 0 + } + + puts "\tTest028.n: Retrieve the item" + set dupnum 0 + dump_file $db $txn $t1 test028.check + + # Clean out in prep for next test + set ret [eval {$db del} $txn {$key}] + error_check_good db_del $ret 0 + } + } + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + +} + +# Check function for test028; keys and data are identical +proc test028.check { key data } { + global dupnum + global dupstr + error_check_good "Bad key" $key put_after_cursor_del + error_check_good "data mismatch for $key" $data $dupnum$dupstr + incr dupnum +} diff --git a/test/tcl/test029.tcl b/test/tcl/test029.tcl new file mode 100644 index 00000000..13ddb435 --- /dev/null +++ b/test/tcl/test029.tcl @@ -0,0 +1,255 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test029 +# TEST Test the Btree and Record number renumbering. +proc test029 { method {nentries 10000} args} { + source ./include.tcl + + set do_renumber [is_rrecno $method] + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test029: $method ($args)" + + if { [string compare $omethod "-hash"] == 0 } { + puts "Test029 skipping for method HASH" + return + } + # Btree with compression does not support -recnum. + if { [is_compressed $args] == 1 } { + puts "Test029 skipping for compressed btree with -recnum." + return + } + if { [is_partitioned $args] } { + puts "Test029 skipping for partitioned $omethod" + return + } + if { [is_record_based $method] == 1 && $do_renumber != 1 } { + puts "Test029 skipping for method RECNO (w/out renumbering)" + return + } + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test029.db + set env NULL + } else { + set testfile test029.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + # Do not set nentries down to 100 until we + # fix SR #5958. + set nentries 1000 + } + } + set testdir [get_home $env] + } + cleanup $testdir $env + + # Read the first nentries dictionary elements and reverse them. + # Keep a list of these (these will be the keys). + puts "\tTest029.a: initialization" + set keys "" + set did [open $dict] + set count 0 + while { [gets $did str] != -1 && $count < $nentries } { + lappend keys [reverse $str] + incr count + } + close $did + + # Generate sorted order for the keys + set sorted_keys [lsort $keys] + + # Save the first and last keys + set last_key [lindex $sorted_keys end] + set last_keynum [llength $sorted_keys] + + set first_key [lindex $sorted_keys 0] + set first_keynum 1 + + # Create the database + if { [string compare $omethod "-btree"] == 0 } { + set db [eval {berkdb_open -create \ + -mode 0644 -recnum} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + } else { + set db [eval {berkdb_open -create \ + -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + } + + set pflags "" + set gflags "" + set txn "" + + if { [is_record_based $method] == 1 } { + append gflags " -recno" + } + + puts "\tTest029.b: put/get loop" + foreach k $keys { + if { [is_record_based $method] == 1 } { + set key [lsearch $sorted_keys $k] + incr key + } else { + set key $k + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $k]}] + error_check_good dbput $ret 0 + + set ret [eval {$db get} $txn $gflags {$key}] + error_check_good dbget [lindex [lindex $ret 0] 1] $k + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # Now delete the first key in the database + puts "\tTest029.c: delete and verify renumber" + + # Delete the first key in the file + if { [is_record_based $method] == 1 } { + set key $first_keynum + } else { + set key $first_key + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db del} $txn {$key}] + error_check_good db_del $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Now we are ready to retrieve records based on + # record number + if { [string compare $omethod "-btree"] == 0 } { + append gflags " -recno" + } + + # First try to get the old last key (shouldn't exist) + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db get} $txn $gflags {$last_keynum}] + error_check_good get_after_del $ret [list] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Now try to get what we think should be the last key + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db get} $txn $gflags {[expr $last_keynum - 1]}] + error_check_good \ + getn_last_after_del [lindex [lindex $ret 0] 1] $last_key + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Create a cursor; we need it for the next test and we + # need it for recno here. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + # OK, now re-put the first key and make sure that we + # renumber the last key appropriately. + if { [string compare $omethod "-btree"] == 0 } { + set ret [eval {$db put} $txn \ + {$key [chop_data $method $first_key]}] + error_check_good db_put $ret 0 + } else { + # Recno + set ret [$dbc get -first] + set ret [eval {$dbc put} $pflags {-before $first_key}] + error_check_bad dbc_put:DB_BEFORE $ret 0 + } + + # Now check that the last record matches the last record number + set ret [eval {$db get} $txn $gflags {$last_keynum}] + error_check_good \ + getn_last_after_put [lindex [lindex $ret 0] 1] $last_key + + # Now delete the first key in the database using a cursor + puts "\tTest029.d: delete with cursor and verify renumber" + + set ret [$dbc get -first] + error_check_good dbc_first $ret [list [list $key $first_key]] + + # Now delete at the cursor + set ret [$dbc del] + error_check_good dbc_del $ret 0 + + # Now check the record numbers of the last keys again. + # First try to get the old last key (shouldn't exist) + set ret [eval {$db get} $txn $gflags {$last_keynum}] + error_check_good get_last_after_cursor_del:$ret $ret [list] + + # Now try to get what we think should be the last key + set ret [eval {$db get} $txn $gflags {[expr $last_keynum - 1]}] + error_check_good \ + getn_after_cursor_del [lindex [lindex $ret 0] 1] $last_key + + # Re-put the first key and make sure that we renumber the last + # key appropriately. We can't do a c_put -current, so do + # a db put instead. + if { [string compare $omethod "-btree"] == 0 } { + puts "\tTest029.e: put (non-cursor) and verify renumber" + set ret [eval {$db put} $txn \ + {$key [chop_data $method $first_key]}] + error_check_good db_put $ret 0 + } else { + puts "\tTest029.e: put with cursor and verify renumber" + set ret [eval {$dbc put} $pflags {-before $first_key}] + error_check_bad dbc_put:DB_BEFORE $ret 0 + } + + # Now check that the last record matches the last record number + set ret [eval {$db get} $txn $gflags {$last_keynum}] + error_check_good \ + get_after_cursor_reput [lindex [lindex $ret 0] 1] $last_key + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test030.tcl b/test/tcl/test030.tcl new file mode 100644 index 00000000..183d02ab --- /dev/null +++ b/test/tcl/test030.tcl @@ -0,0 +1,259 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test030 +# TEST Test DB_NEXT_DUP Functionality. +proc test030 { method {nentries 10000} args } { + global rand_init + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test030 skipping for btree with compression." + return + } + if { [is_record_based $method] == 1 || + [is_rbtree $method] == 1 } { + puts "Test030 skipping for method $method" + return + } + berkdb srand $rand_init + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test030.db + set cntfile $testdir/cntfile.db + set env NULL + } else { + set testfile test030.db + set cntfile cntfile.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + + puts "Test030: $method ($args) $nentries DB_NEXT_DUP testing" + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + + set db [eval {berkdb_open -create \ + -mode 0644 -dup} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + # Use a second DB to keep track of how many duplicates + # we enter per key + + set cntdb [eval {berkdb_open -create \ + -mode 0644} $args {-btree $cntfile}] + error_check_good dbopen:cntfile [is_valid_db $db] TRUE + + set pflags "" + set gflags "" + set txn "" + set count 0 + + # Here is the loop where we put and get each key/data pair + # We will add between 1 and 10 dups with values 1 ... dups + # We'll verify each addition. + + set did [open $dict] + puts "\tTest030.a: put and get duplicate keys." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + + while { [gets $did str] != -1 && $count < $nentries } { + set ndup [berkdb random_int 1 10] + + for { set i 1 } { $i <= $ndup } { incr i 1 } { + set ctxn "" + if { $txnenv == 1 } { + set ct [$env txn] + error_check_good txn \ + [is_valid_txn $ct $env] TRUE + set ctxn "-txn $ct" + } + set ret [eval {$cntdb put} \ + $ctxn $pflags {$str [chop_data $method $ndup]}] + error_check_good put_cnt $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$ct commit] 0 + } + set datastr $i:$str + set ret [eval {$db put} \ + $txn $pflags {$str [chop_data $method $datastr]}] + error_check_good put $ret 0 + } + + # Now retrieve all the keys matching this key + set x 0 + for {set ret [$dbc get -set $str]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get -nextdup] } { + + if { [llength $ret] == 0 } { + break + } + incr x + + set k [lindex [lindex $ret 0] 0] + if { [string compare $k $str] != 0 } { + break + } + + set datastr [lindex [lindex $ret 0] 1] + set d [data_of $datastr] + error_check_good Test030:put $d $str + + set id [ id_of $datastr ] + error_check_good Test030:dup# $id $x + } + error_check_good Test030:numdups $x $ndup + + # Now retrieve them backwards + for {set ret [$dbc get -prev]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get -prevdup] } { + + if { [llength $ret] == 0 } { + break + } + + set k [lindex [lindex $ret 0] 0] + if { [string compare $k $str] != 0 } { + break + } + incr x -1 + + set datastr [lindex [lindex $ret 0] 1] + set d [data_of $datastr] + error_check_good Test030:put $d $str + + set id [ id_of $datastr ] + error_check_good Test030:dup# $id $x + } + error_check_good Test030:numdups $x 1 + incr count + } + close $did + + # Verify on sequential pass of entire file + puts "\tTest030.b: sequential check" + + # We can't just set lastkey to a null string, since that might + # be a key now! + set lastkey "THIS STRING WILL NEVER BE A KEY" + + for {set ret [$dbc get -first]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get -next] } { + + # Outer loop should always get a new key + + set k [lindex [lindex $ret 0] 0] + error_check_bad outer_get_loop:key $k $lastkey + + set datastr [lindex [lindex $ret 0] 1] + set d [data_of $datastr] + set id [ id_of $datastr ] + + error_check_good outer_get_loop:data $d $k + error_check_good outer_get_loop:id $id 1 + + set lastkey $k + # Figure out how may dups we should have + if { $txnenv == 1 } { + set ct [$env txn] + error_check_good txn [is_valid_txn $ct $env] TRUE + set ctxn "-txn $ct" + } + set ret [eval {$cntdb get} $ctxn $pflags {$k}] + set ndup [lindex [lindex $ret 0] 1] + if { $txnenv == 1 } { + error_check_good txn [$ct commit] 0 + } + + set howmany 1 + for { set ret [$dbc get -nextdup] } \ + { [llength $ret] != 0 } \ + { set ret [$dbc get -nextdup] } { + incr howmany + + set k [lindex [lindex $ret 0] 0] + error_check_good inner_get_loop:key $k $lastkey + + set datastr [lindex [lindex $ret 0] 1] + set d [data_of $datastr] + set id [ id_of $datastr ] + + error_check_good inner_get_loop:data $d $k + error_check_good inner_get_loop:id $id $howmany + + } + error_check_good ndups_found $howmany $ndup + } + + # Verify on key lookup + puts "\tTest030.c: keyed check" + set cnt_dbc [$cntdb cursor] + for {set ret [$cnt_dbc get -first]} \ + {[llength $ret] != 0} \ + {set ret [$cnt_dbc get -next] } { + set k [lindex [lindex $ret 0] 0] + + set howmany [lindex [lindex $ret 0] 1] + error_check_bad cnt_seq:data [string length $howmany] 0 + + set i 0 + for {set ret [$dbc get -set $k]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get -nextdup] } { + incr i + + set k [lindex [lindex $ret 0] 0] + + set datastr [lindex [lindex $ret 0] 1] + set d [data_of $datastr] + set id [ id_of $datastr ] + + error_check_good inner_get_loop:data $d $k + error_check_good inner_get_loop:id $id $i + } + error_check_good keyed_count $i $howmany + + } + error_check_good cnt_curs_close [$cnt_dbc close] 0 + error_check_good db_curs_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good cnt_file_close [$cntdb close] 0 + error_check_good db_file_close [$db close] 0 +} diff --git a/test/tcl/test031.tcl b/test/tcl/test031.tcl new file mode 100644 index 00000000..1581267a --- /dev/null +++ b/test/tcl/test031.tcl @@ -0,0 +1,234 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test031 +# TEST Duplicate sorting functionality +# TEST Make sure DB_NODUPDATA works. +# TEST +# TEST Use the first 10,000 entries from the dictionary. +# TEST Insert each with self as key and "ndups" duplicates +# TEST For the data field, prepend random five-char strings (see test032) +# TEST that we force the duplicate sorting code to do something. +# TEST Along the way, test that we cannot insert duplicate duplicates +# TEST using DB_NODUPDATA. +# TEST +# TEST By setting ndups large, we can make this an off-page test +# TEST After all are entered, retrieve all; verify output. +# TEST Close file, reopen, do retrieve and re-verify. +# TEST This does not work for recno +proc test031 { method {nentries 10000} {ndups 5} {tnum "031"} args } { + global alphabet + global rand_init + source ./include.tcl + + berkdb srand $rand_init + + set args [convert_args $method $args] + set checkargs [split_partition_args $args] + + # The checkdb is of type hash so it can't use compression. + set checkargs [strip_compression_args $checkargs] + set omethod [convert_method $method] + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set checkdb $testdir/checkdb.db + set env NULL + } else { + set testfile test$tnum.db + set checkdb checkdb.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + append checkargs " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + reduce_dups nentries ndups + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + + puts "Test$tnum: \ + $method ($args) $nentries small $ndups sorted dup key/data pairs" + if { [is_record_based $method] == 1 || \ + [is_rbtree $method] == 1 } { + puts "Test$tnum skipping for method $omethod" + return + } + set db [eval {berkdb_open -create \ + -mode 0644} $args {$omethod -dup -dupsort $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + + set check_db [eval {berkdb_open \ + -create -mode 0644} $checkargs {-hash $checkdb}] + error_check_good dbopen:check_db [is_valid_db $check_db] TRUE + + set pflags "" + set gflags "" + set txn "" + set count 0 + + # Here is the loop where we put and get each key/data pair + puts "\tTest$tnum.a: Put/get loop, check nodupdata" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor_open [is_valid_cursor $dbc $db] TRUE + while { [gets $did str] != -1 && $count < $nentries } { + # Re-initialize random string generator + randstring_init $ndups + + set dups "" + for { set i 1 } { $i <= $ndups } { incr i } { + set pref [randstring] + set dups $dups$pref + set datastr $pref:$str + if { $i == 2 } { + set nodupstr $datastr + } + set ret [eval {$db put} \ + $txn $pflags {$str [chop_data $method $datastr]}] + error_check_good put $ret 0 + } + + # Test DB_NODUPDATA using the DB handle + set ret [eval {$db put -nodupdata} \ + $txn $pflags {$str [chop_data $method $nodupstr]}] + error_check_good db_nodupdata [is_substr $ret "DB_KEYEXIST"] 1 + + set ret [eval {$check_db put} \ + $txn $pflags {$str [chop_data $method $dups]}] + error_check_good checkdb_put $ret 0 + + # Now retrieve all the keys matching this key + set x 0 + set lastdup "" + # Test DB_NODUPDATA using cursor handle + set ret [$dbc get -set $str] + error_check_bad dbc_get [llength $ret] 0 + set datastr [lindex [lindex $ret 0] 1] + error_check_bad dbc_data [string length $datastr] 0 + set ret [eval {$dbc put -nodupdata} \ + {$str [chop_data $method $datastr]}] + error_check_good dbc_nodupdata [is_substr $ret "DB_KEYEXIST"] 1 + + for {set ret [$dbc get -set $str]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get -nextdup] } { + set k [lindex [lindex $ret 0] 0] + if { [string compare $k $str] != 0 } { + break + } + set datastr [lindex [lindex $ret 0] 1] + if {[string length $datastr] == 0} { + break + } + if {[string compare \ + $lastdup [pad_data $method $datastr]] > 0} { + error_check_good \ + sorted_dups($lastdup,$datastr) 0 1 + } + incr x + set lastdup $datastr + } + error_check_good "Test$tnum:ndups:$str" $x $ndups + incr count + } + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + close $did + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest$tnum.b: Checking file for correct duplicates" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor_open(2) [is_valid_cursor $dbc $db] TRUE + + set lastkey "THIS WILL NEVER BE A KEY VALUE" + # no need to delete $lastkey + set firsttimethru 1 + for {set ret [$dbc get -first]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get -next] } { + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_bad data_check:$d [string length $d] 0 + + if { [string compare $k $lastkey] != 0 } { + # Remove last key from the checkdb + if { $firsttimethru != 1 } { + error_check_good check_db:del:$lastkey \ + [eval {$check_db del} $txn {$lastkey}] 0 + } + set firsttimethru 0 + set lastdup "" + set lastkey $k + set dups [lindex [lindex [eval {$check_db get} \ + $txn {$k}] 0] 1] + error_check_good check_db:get:$k \ + [string length $dups] [expr $ndups * 4] + } + + if { [string compare $lastdup $d] > 0 } { + error_check_good dup_check:$k:$d 0 1 + } + set lastdup $d + + set pref [string range $d 0 3] + set ndx [string first $pref $dups] + error_check_good valid_duplicate [expr $ndx >= 0] 1 + set a [string range $dups 0 [expr $ndx - 1]] + set b [string range $dups [expr $ndx + 4] end] + set dups $a$b + } + # Remove last key from the checkdb + if { [string length $lastkey] != 0 } { + error_check_good check_db:del:$lastkey \ + [eval {$check_db del} $txn {$lastkey}] 0 + } + + # Make sure there is nothing left in check_db + + set check_c [eval {$check_db cursor} $txn] + set ret [$check_c get -first] + error_check_good check_c:get:$ret [llength $ret] 0 + error_check_good check_c:close [$check_c close] 0 + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good check_db:close [$check_db close] 0 + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test032.tcl b/test/tcl/test032.tcl new file mode 100644 index 00000000..1b99b1a3 --- /dev/null +++ b/test/tcl/test032.tcl @@ -0,0 +1,266 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test032 +# TEST DB_GET_BOTH, DB_GET_BOTH_RANGE +# TEST +# TEST Use the first 10,000 entries from the dictionary. Insert each with +# TEST self as key and "ndups" duplicates. For the data field, prepend the +# TEST letters of the alphabet in a random order so we force the duplicate +# TEST sorting code to do something. By setting ndups large, we can make +# TEST this an off-page test. By setting overflow to be 1, we can make +# TEST this an overflow test. +# TEST +# TEST Test the DB_GET_BOTH functionality by retrieving each dup in the file +# TEST explicitly. Test the DB_GET_BOTH_RANGE functionality by retrieving +# TEST the unique key prefix (cursor only). Finally test the failure case. +proc test032 { method {nentries 10000} {ndups 5} {tnum "032"} + {overflow 0} args } { + global alphabet rand_init + source ./include.tcl + + set args [convert_args $method $args] + set checkargs [split_partition_args $args] + + # The checkdb is of type hash so it can't use compression. + set checkargs [strip_compression_args $checkargs] + set omethod [convert_method $method] + + berkdb srand $rand_init + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set checkdb $testdir/checkdb.db + set env NULL + } else { + set testfile test$tnum.db + set checkdb checkdb.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + append checkargs " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + reduce_dups nentries ndups + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + + set dataset "small" + if {$overflow != 0} { + set dataset "large" + } + puts "Test$tnum:\ + $method ($args) $nentries $dataset sorted $ndups dup key/data pairs" + if { [is_record_based $method] == 1 || \ + [is_rbtree $method] == 1 } { + puts "Test$tnum skipping for method $omethod" + return + } + set db [eval {berkdb_open -create -mode 0644 \ + $omethod -dup -dupsort} $args {$testfile} ] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + + set check_db [eval {berkdb_open \ + -create -mode 0644} $checkargs {-hash $checkdb}] + error_check_good dbopen:check_db [is_valid_db $check_db] TRUE + + set pflags "" + set gflags "" + set txn "" + set count 0 + set len 4 + + # + # Find the pagesize if we are testing with overflow pages. We will + # use the pagesize to build overflow items of the correct size. + # + if {$overflow != 0} { + set stat [$db stat] + set pg [get_pagesize $stat] + error_check_bad get_pagesize $pg -1 + set len $pg + } + + # Here is the loop where we put and get each key/data pair + puts "\tTest$tnum.a: Put/get loop" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor_open [is_valid_cursor $dbc $db] TRUE + while { [gets $did str] != -1 && $count < $nentries } { + # Re-initialize random string generator + randstring_init $ndups + + set dups "" + set prefix "" + for { set i 1 } { $i <= $ndups } { incr i } { + set prefix [randstring] + + # + # Pad the data string so that overflow data items + # are large enough to generate overflow pages. + # + for { set j 1} { $j <= [expr $len / 4 - 1] } \ + { incr j } { + append prefix "!@#$" + } + + set dups $dups$prefix + set datastr $prefix:$str + set ret [eval {$db put} \ + $txn $pflags {$str [chop_data $method $datastr]}] + error_check_good put $ret 0 + } + set ret [eval {$check_db put} \ + $txn $pflags {$str [chop_data $method $dups]}] + error_check_good checkdb_put $ret 0 + + # Now retrieve all the keys matching this key + set x 0 + set lastdup "" + for {set ret [$dbc get -set $str]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get -nextdup] } { + set k [lindex [lindex $ret 0] 0] + if { [string compare $k $str] != 0 } { + break + } + set datastr [lindex [lindex $ret 0] 1] + if {[string length $datastr] == 0} { + break + } + if {[string compare $lastdup $datastr] > 0} { + error_check_good \ + sorted_dups($lastdup,$datastr) 0 1 + } + incr x + set lastdup $datastr + } + + error_check_good "Test$tnum:ndups:$str" $x $ndups + incr count + } + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + close $did + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest$tnum.b: Checking file for correct duplicates (no cursor)" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set check_c [eval {$check_db cursor} $txn] + error_check_good check_c_open(2) \ + [is_valid_cursor $check_c $check_db] TRUE + + for {set ndx 0} {$ndx < [expr $len * $ndups]} {incr ndx $len} { + for {set ret [$check_c get -first]} \ + {[llength $ret] != 0} \ + {set ret [$check_c get -next] } { + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_bad data_check:$d [string length $d] 0 + + set prefix [string range $d $ndx \ + [expr $ndx + [expr $len - 1] ] ] + set data $prefix:$k + set ret [eval {$db get} $txn {-get_both $k $data}] + error_check_good \ + get_both_data:$k $ret [list [list $k $data]] + } + } + + $db sync + + # Now repeat the above test using cursor ops + puts "\tTest$tnum.c: Checking file for correct duplicates (cursor)" + set dbc [eval {$db cursor} $txn] + error_check_good cursor_open [is_valid_cursor $dbc $db] TRUE + + for {set ndx 0} {$ndx < [expr $len * $ndups]} {incr ndx $len} { + for {set ret [$check_c get -first]} \ + {[llength $ret] != 0} \ + {set ret [$check_c get -next] } { + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_bad data_check:$d [string length $d] 0 + + set prefix [string range $d $ndx \ + [expr $ndx + [ expr $len - 1]]] + set data $prefix:$k + set ret [eval {$dbc get} {-get_both $k $data}] + error_check_good \ + curs_get_both_data:$k $ret [list [list $k $data]] + + set ret [eval {$dbc get} {-get_both_range $k $prefix}] + error_check_good \ + curs_get_both_range:$k $ret [list [list $k $data]] + } + } + + # Now check the error case + puts "\tTest$tnum.d: Check error case (no cursor)" + for {set ret [$check_c get -first]} \ + {[llength $ret] != 0} \ + {set ret [$check_c get -next] } { + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_bad data_check:$d [string length $d] 0 + + set data XXX$k + set ret [eval {$db get} $txn {-get_both $k $data}] + error_check_good error_case:$k [llength $ret] 0 + } + + # Now check the error case + puts "\tTest$tnum.e: Check error case (cursor)" + for {set ret [$check_c get -first]} \ + {[llength $ret] != 0} \ + {set ret [$check_c get -next] } { + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_bad data_check:$d [string length $d] 0 + + set data XXX$k + set ret [eval {$dbc get} {-get_both $k $data}] + error_check_good error_case:$k [llength $ret] 0 + } + + error_check_good check_c:close [$check_c close] 0 + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good check_db:close [$check_db close] 0 + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test033.tcl b/test/tcl/test033.tcl new file mode 100644 index 00000000..f78dcb32 --- /dev/null +++ b/test/tcl/test033.tcl @@ -0,0 +1,226 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test033 +# TEST DB_GET_BOTH without comparison function +# TEST +# TEST Use the first 10,000 entries from the dictionary. Insert each with +# TEST self as key and data; add duplicate records for each. After all are +# TEST entered, retrieve all and verify output using DB_GET_BOTH (on DB and +# TEST DBC handles) and DB_GET_BOTH_RANGE (on a DBC handle) on existent and +# TEST nonexistent keys. +# TEST +# TEST XXX +# TEST This does not work for rbtree. +proc test033 { method {nentries 10000} {ndups 5} {tnum "033"} \ + {overflow 0} args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + if { [is_rbtree $method] == 1 } { + puts "Test$tnum skipping for method $method" + return + } + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test$tnum skipping for btree with compression." + return + } + + if {$ndups <= 1} { + set ndups 1 + } + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + reduce_dups nentries ndups + } + set testdir [get_home $env] + } + + if {$ndups == 1} { + set dup "non-dup" + } else { + set dup "$ndups-dup" + } + + set dataset "" + if {$overflow != 0} { + set dataset "overflow" + } + puts "Test$tnum:\ + $method ($args) $nentries $dup ${dataset}key/data pairs" + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + + # Duplicate data entries are not allowed in record based methods. + if { [is_record_based $method] == 1 || $ndups == 1} { + set db [eval {berkdb_open -create -mode 0644 \ + $omethod} $args {$testfile}] + } else { + set db [eval {berkdb_open -create -mode 0644 \ + $omethod -dup} $args {$testfile}] + } + error_check_good dbopen [is_valid_db $db] TRUE + + set pflags "" + set gflags "" + set txn "" + # + # Find the pagesize if we are testing with overflow pages. We will + # use the pagesize to build overflow items of the correct size. + # + if {$overflow != 0} { + set stat [$db stat] + set pg [get_pagesize $stat] + error_check_bad get_pagesize $pg -1 + set len $pg + } + + # Allocate a cursor for DB_GET_BOTH_RANGE. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor_open [is_valid_cursor $dbc $db] TRUE + + puts "\tTest$tnum.a: Put/get loop." + # Here is the loop where we put and get each key/data pair + set count 0 + set did [open $dict] + while { [gets $did str] != -1 && $count < $nentries } { + if { $overflow != 0 } { + # + # Pad the data string so that overflow data items + # are large enough to generate overflow pages. + # + for { set j 1} { $j <= [expr $len / 4 - 1] } \ + { incr j } { + append str "!@#$" + } + } + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + set ret [eval {$db put} $txn $pflags \ + {$key [chop_data $method $str]}] + error_check_good put $ret 0 + } else { + for { set i 1 } { $i <= $ndups } { incr i } { + set datastr $i:$str + set ret [eval {$db put} \ + $txn $pflags {$str [chop_data $method $datastr]}] + error_check_good db_put $ret 0 + } + } + + # Now retrieve all the keys matching this key and dup + # for non-record based AMs. + if { [is_record_based $method] == 1 } { + test033_recno.check $db $dbc $method $str $txn $key + } else { + test033_check $db $dbc $method $str $txn $ndups + } + incr count + } + + close $did + + puts "\tTest$tnum.b: Verifying DB_GET_BOTH after creation." + set count 0 + set did [open $dict] + while { [gets $did str] != -1 && $count < $nentries } { + if { $overflow != 0 } { + # + # Pad the data string so that overflow data items + # are large enough to generate overflow pages. + # + for { set j 1} { $j <= [expr $len / 4 - 1] } \ + { incr j } { + append str "!@#$" + } + } + # Now retrieve all the keys matching this key + # for non-record based AMs. + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + test033_recno.check $db $dbc $method $str $txn $key + } else { + test033_check $db $dbc $method $str $txn $ndups + } + incr count + } + close $did + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} + +# No testing of dups is done on record-based methods. +proc test033_recno.check {db dbc method str txn key} { + set ret [eval {$db get} $txn {-recno $key}] + error_check_good "db_get:$method" \ + [lindex [lindex $ret 0] 1] [pad_data $method $str] + set ret [$dbc get -get_both $key [pad_data $method $str]] + error_check_good "db_get_both:$method" \ + [lindex [lindex $ret 0] 1] [pad_data $method $str] +} + +# Testing of non-record-based methods includes duplicates +# and get_both_range. +proc test033_check {db dbc method str txn ndups} { + for {set i 1} {$i <= $ndups } { incr i } { + set datastr $i:$str + + set ret [eval {$db get} $txn {-get_both $str $datastr}] + error_check_good "db_get_both:dup#" \ + [lindex [lindex $ret 0] 1] $datastr + + set ret [$dbc get -get_both $str $datastr] + error_check_good "dbc_get_both:dup#" \ + [lindex [lindex $ret 0] 1] $datastr + + set ret [$dbc get -get_both_range $str $datastr] + error_check_good "dbc_get_both_range:dup#" \ + [lindex [lindex $ret 0] 1] $datastr + } + + # Now retrieve non-existent dup (i is ndups + 1) + set datastr $i:$str + set ret [eval {$db get} $txn {-get_both $str $datastr}] + error_check_good db_get_both:dupfailure [llength $ret] 0 + set ret [$dbc get -get_both $str $datastr] + error_check_good dbc_get_both:dupfailure [llength $ret] 0 + set ret [$dbc get -get_both_range $str $datastr] + error_check_good dbc_get_both_range [llength $ret] 0 +} diff --git a/test/tcl/test034.tcl b/test/tcl/test034.tcl new file mode 100644 index 00000000..64978aa5 --- /dev/null +++ b/test/tcl/test034.tcl @@ -0,0 +1,34 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test034 +# TEST test032 with off-page or overflow case with non-duplicates +# TEST and duplicates. +# TEST +# TEST DB_GET_BOTH, DB_GET_BOTH_RANGE functionality with off-page +# TEST or overflow case within non-duplicates and duplicates. +proc test034 { method {nentries 10000} args} { + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test034: Skipping for specific pagesizes" + return + } + + # Test without duplicate and without overflow. + eval {test032 $method $nentries 1 "034" 0} $args + + # Test without duplicate but with overflows. + eval {test032 $method [expr $nentries / 100] 1 "034" 1} $args + + # Test with off-page duplicates + eval {test032 $method $nentries 20 "034" 0} -pagesize 512 $args + + # Test with multiple pages of off-page duplicates + eval {test032 $method [expr $nentries / 10] 100 "034" 0} -pagesize 512 $args + + # Test with overflow duplicate. + eval {test032 $method [expr $nentries / 100] 5 "034" 1} $args +} diff --git a/test/tcl/test035.tcl b/test/tcl/test035.tcl new file mode 100644 index 00000000..01f49403 --- /dev/null +++ b/test/tcl/test035.tcl @@ -0,0 +1,30 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test035 +# TEST Test033 with off-page non-duplicates and duplicates +# TEST DB_GET_BOTH functionality with off-page non-duplicates +# TEST and duplicates. +proc test035 { method {nentries 10000} args} { + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test035: Skipping for specific pagesizes" + return + } + # Test with off-page duplicates + eval {test033 $method $nentries 20 "035" 0 -pagesize 512} $args + # Test with multiple pages of off-page duplicates + eval {test033 $method [expr $nentries / 10] 100 "035" 0 -pagesize 512} \ + $args + # Test with overflow duplicates + eval {test033 $method [expr $nentries / 100] 20 "035" 1 -pagesize 512} \ + $args + # Test with off-page non-duplicates + eval {test033 $method $nentries 1 "035" 0 -pagesize 512} $args + # Test with overflow non-duplicates + eval {test033 $method [expr $nentries / 100] 1 "035" 1 -pagesize 512} \ + $args +} diff --git a/test/tcl/test036.tcl b/test/tcl/test036.tcl new file mode 100644 index 00000000..5983280c --- /dev/null +++ b/test/tcl/test036.tcl @@ -0,0 +1,172 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test036 +# TEST Test KEYFIRST and KEYLAST when the key doesn't exist +# TEST Put nentries key/data pairs (from the dictionary) using a cursor +# TEST and KEYFIRST and KEYLAST (this tests the case where use use cursor +# TEST put for non-existent keys). +proc test036 { method {nentries 10000} args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + if { [is_record_based $method] == 1 } { + puts "Test036 skipping for method recno" + return + } + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test036.db + set env NULL + } else { + set testfile test036.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + + puts "Test036: $method ($args) $nentries equal key/data pairs" + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + set db [eval {berkdb_open \ + -create -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + + set pflags "" + set gflags "" + set txn "" + set count 0 + + if { [is_record_based $method] == 1 } { + set checkfunc test036_recno.check + append gflags " -recno" + } else { + set checkfunc test036.check + } + puts "\tTest036.a: put/get loop KEYFIRST" + # Here is the loop where we put and get each key/data pair + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor [is_valid_cursor $dbc $db] TRUE + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1] + set kvals($key) $str + } else { + set key $str + } + set ret [eval {$dbc put} $pflags {-keyfirst $key $str}] + error_check_good put $ret 0 + + set ret [eval {$db get} $txn $gflags {$key}] + error_check_good get [lindex [lindex $ret 0] 1] $str + incr count + } + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + puts "\tTest036.a: put/get loop KEYLAST" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor [is_valid_cursor $dbc $db] TRUE + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1] + set kvals($key) $str + } else { + set key $str + } + set ret [eval {$dbc put} $txn $pflags {-keylast $key $str}] + error_check_good put $ret 0 + + set ret [eval {$db get} $txn $gflags {$key}] + error_check_good get [lindex [lindex $ret 0] 1] $str + incr count + } + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + close $did + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest036.c: dump file" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 $checkfunc + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Now compare the keys to see if they match the dictionary (or ints) + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $nentries} {set i [incr i]} { + puts $oid $i + } + close $oid + file rename -force $t1 $t3 + } else { + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + filesort $t1 $t3 + } + +} + +# Check function for test036; keys and data are identical +proc test036.check { key data } { + error_check_good "key/data mismatch" $data $key +} + +proc test036_recno.check { key data } { + global dict + global kvals + + error_check_good key"$key"_exists [info exists kvals($key)] 1 + error_check_good "key/data mismatch, key $key" $data $kvals($key) +} diff --git a/test/tcl/test037.tcl b/test/tcl/test037.tcl new file mode 100644 index 00000000..2b820e18 --- /dev/null +++ b/test/tcl/test037.tcl @@ -0,0 +1,199 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test037 +# TEST Test DB_RMW +proc test037 { method {nentries 100} args } { + global encrypt + + source ./include.tcl + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then skip this test. It needs its own. + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Test037 skipping for env $env" + return + } + + puts "Test037: RMW $method" + + set args [convert_args $method $args] + set encargs "" + set args [split_encargs $args encargs] + set omethod [convert_method $method] + set pageargs "" + split_pageargs $args pageargs + + # Create the database + env_cleanup $testdir + set testfile test037.db + + set local_env \ + [eval {berkdb_env -create -mode 0644 -txn} \ + $encargs $pageargs -home $testdir] + error_check_good dbenv [is_valid_env $local_env] TRUE + + set db [eval {berkdb_open -env $local_env \ + -create -mode 0644 $omethod} $args {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set did [open $dict] + set count 0 + + set pflags "" + set gflags "" + set txn "" + + if { [is_record_based $method] == 1 } { + append gflags " -recno" + } + + puts "\tTest037.a: Creating database" + # Here is the loop where we put and get each key/data pair + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1] + set kvals($key) [pad_data $method $str] + } else { + set key $str + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $str]}] + error_check_good put $ret 0 + + set ret [eval {$db get} $txn $gflags {$key}] + error_check_good get \ + [lindex [lindex $ret 0] 1] [pad_data $method $str] + incr count + } + close $did + error_check_good dbclose [$db close] 0 + error_check_good envclode [$local_env close] 0 + + puts "\tTest037.b: Setting up environments" + + # Open local environment + set env_cmd \ + [concat berkdb_env -create -txn $encargs $pageargs -home $testdir] + set local_env [eval $env_cmd] + error_check_good dbenv [is_valid_env $local_env] TRUE + + # Open local transaction + set local_txn [$local_env txn] + error_check_good txn_open [is_valid_txn $local_txn $local_env] TRUE + + # Open remote environment + set f1 [open |$tclsh_path r+] + puts $f1 "source $test_path/test.tcl" + + set remote_env [send_cmd $f1 $env_cmd] + error_check_good remote:env_open [is_valid_env $remote_env] TRUE + + # Open remote transaction + set remote_txn [send_cmd $f1 "$remote_env txn"] + error_check_good \ + remote:txn_open [is_valid_txn $remote_txn $remote_env] TRUE + + # Now try put test without RMW. Gets on one site should not + # lock out gets on another. + + # Open databases and dictionary + puts "\tTest037.c: Opening databases" + set did [open $dict] + set rkey 0 + + set db [eval {berkdb_open -auto_commit -env $local_env } $args {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set rdb [send_cmd $f1 \ + "berkdb_open -auto_commit -env $remote_env $args -mode 0644 $testfile"] + error_check_good remote:dbopen [is_valid_db $rdb] TRUE + + puts "\tTest037.d: Testing without RMW" + + # Now, get a key and try to "get" it from both DBs. + error_check_bad "gets on new open" [gets $did str] -1 + incr rkey + if { [is_record_based $method] == 1 } { + set key $rkey + } else { + set key $str + } + + set rec [eval {$db get -txn $local_txn} $gflags {$key}] + error_check_good local_get [lindex [lindex $rec 0] 1] \ + [pad_data $method $str] + + set r [send_timed_cmd $f1 0 "$rdb get -txn $remote_txn $gflags $key"] + error_check_good remote_send $r 0 + + # Now sleep before releasing local record lock + tclsleep 5 + error_check_good local_commit [$local_txn commit] 0 + + # Now get the remote result + set remote_time [rcv_result $f1] + error_check_good no_rmw_get:remote_time [expr $remote_time <= 1] 1 + + # Commit the remote + set r [send_cmd $f1 "$remote_txn commit"] + error_check_good remote_commit $r 0 + + puts "\tTest037.e: Testing with RMW" + + # Open local transaction + set local_txn [$local_env txn] + error_check_good \ + txn_open [is_valid_txn $local_txn $local_env] TRUE + + # Open remote transaction + set remote_txn [send_cmd $f1 "$remote_env txn"] + error_check_good remote:txn_open \ + [is_valid_txn $remote_txn $remote_env] TRUE + + # Now, get a key and try to "get" it from both DBs. + error_check_bad "gets on new open" [gets $did str] -1 + incr rkey + if { [is_record_based $method] == 1 } { + set key $rkey + } else { + set key $str + } + + set rec [eval {$db get -txn $local_txn -rmw} $gflags {$key}] + error_check_good \ + local_get [lindex [lindex $rec 0] 1] [pad_data $method $str] + + set r [send_timed_cmd $f1 0 "$rdb get -txn $remote_txn $gflags $key"] + error_check_good remote_send $r 0 + + # Now sleep before releasing local record lock + tclsleep 5 + error_check_good local_commit [$local_txn commit] 0 + + # Now get the remote result + set remote_time [rcv_result $f1] + error_check_good rmw_get:remote_time [expr $remote_time > 4] 1 + + # Commit the remote + set r [send_cmd $f1 "$remote_txn commit"] + error_check_good remote_commit $r 0 + + # Close everything up: remote first + set r [send_cmd $f1 "$rdb close"] + error_check_good remote_db_close $r 0 + + set r [send_cmd $f1 "$remote_env close"] + + # Close locally + error_check_good db_close [$db close] 0 + $local_env close + close $did + close $f1 +} diff --git a/test/tcl/test038.tcl b/test/tcl/test038.tcl new file mode 100644 index 00000000..98a4e9e1 --- /dev/null +++ b/test/tcl/test038.tcl @@ -0,0 +1,232 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test038 +# TEST DB_GET_BOTH, DB_GET_BOTH_RANGE on deleted items +# TEST +# TEST Use the first 10,000 entries from the dictionary. Insert each with +# TEST self as key and "ndups" duplicates. For the data field, prepend the +# TEST letters of the alphabet in a random order so we force the duplicate +# TEST sorting code to do something. By setting ndups large, we can make +# TEST this an off-page test +# TEST +# TEST Test the DB_GET_BOTH and DB_GET_BOTH_RANGE functionality by retrieving +# TEST each dup in the file explicitly. Then remove each duplicate and try +# TEST the retrieval again. +proc test038 { method {nentries 10000} {ndups 5} {tnum "038"} args } { + global alphabet + global rand_init + source ./include.tcl + + berkdb srand $rand_init + + set args [convert_args $method $args] + set checkargs [split_partition_args $args] + + # The checkdb is of type hash so it can't use compression. + set checkargs [strip_compression_args $checkargs] + + set omethod [convert_method $method] + + if { [is_record_based $method] == 1 || \ + [is_rbtree $method] == 1 } { + puts "Test$tnum skipping for method $method" + return + } + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set checkdb $testdir/checkdb.db + set env NULL + } else { + set testfile test$tnum.db + set checkdb checkdb.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + append checkargs " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + reduce_dups nentries ndups + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + + puts "Test$tnum: \ + $method ($args) $nentries small sorted dup key/data pairs" + set db [eval {berkdb_open -create -mode 0644 \ + $omethod -dup -dupsort} $args {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + + set check_db [eval {berkdb_open \ + -create -mode 0644 -hash} $checkargs {$checkdb}] + error_check_good dbopen:check_db [is_valid_db $check_db] TRUE + + set pflags "" + set gflags "" + set txn "" + set count 0 + + # Here is the loop where we put and get each key/data pair + puts "\tTest$tnum.a: Put/get loop" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor_open [is_valid_cursor $dbc $db] TRUE + while { [gets $did str] != -1 && $count < $nentries } { + set dups "" + for { set i 1 } { $i <= $ndups } { incr i } { + set pref \ + [string index $alphabet [berkdb random_int 0 25]] + set pref $pref[string \ + index $alphabet [berkdb random_int 0 25]] + while { [string first $pref $dups] != -1 } { + set pref [string toupper $pref] + if { [string first $pref $dups] != -1 } { + set pref [string index $alphabet \ + [berkdb random_int 0 25]] + set pref $pref[string index $alphabet \ + [berkdb random_int 0 25]] + } + } + if { [string length $dups] == 0 } { + set dups $pref + } else { + set dups "$dups $pref" + } + set datastr $pref:$str + set ret [eval {$db put} \ + $txn $pflags {$str [chop_data $method $datastr]}] + error_check_good put $ret 0 + } + set ret [eval {$check_db put} \ + $txn $pflags {$str [chop_data $method $dups]}] + error_check_good checkdb_put $ret 0 + + # Now retrieve all the keys matching this key + set x 0 + set lastdup "" + for {set ret [$dbc get -set $str]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get -nextdup] } { + set k [lindex [lindex $ret 0] 0] + if { [string compare $k $str] != 0 } { + break + } + set datastr [lindex [lindex $ret 0] 1] + if {[string length $datastr] == 0} { + break + } + if {[string compare $lastdup $datastr] > 0} { + error_check_good sorted_dups($lastdup,$datastr)\ + 0 1 + } + incr x + set lastdup $datastr + } + error_check_good "Test$tnum:ndups:$str" $x $ndups + incr count + } + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + close $did + + # Now check the duplicates, then delete then recheck + puts "\tTest$tnum.b: Checking and Deleting duplicates" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor_open [is_valid_cursor $dbc $db] TRUE + set check_c [eval {$check_db cursor} $txn] + error_check_good cursor_open [is_valid_cursor $check_c $check_db] TRUE + + for {set ndx 0} {$ndx < $ndups} {incr ndx} { + for {set ret [$check_c get -first]} \ + {[llength $ret] != 0} \ + {set ret [$check_c get -next] } { + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_bad data_check:$d [string length $d] 0 + + set nn [expr $ndx * 3] + set pref [string range $d $nn [expr $nn + 1]] + set data $pref:$k + set ret [$dbc get -get_both $k $data] + error_check_good \ + get_both_key:$k [lindex [lindex $ret 0] 0] $k + error_check_good \ + get_both_data:$k [lindex [lindex $ret 0] 1] $data + + set ret [$dbc get -get_both_range $k $pref] + error_check_good \ + get_both_key:$k [lindex [lindex $ret 0] 0] $k + error_check_good \ + get_both_data:$k [lindex [lindex $ret 0] 1] $data + + set ret [$dbc del] + error_check_good del $ret 0 + + set ret [eval {$db get} $txn {-get_both $k $data}] + error_check_good error_case:$k [llength $ret] 0 + + # We should either not find anything (if deleting the + # largest duplicate in the set) or a duplicate that + # sorts larger than the one we deleted. + set ret [$dbc get -get_both_range $k $pref] + if { [llength $ret] != 0 } { + set datastr [lindex [lindex $ret 0] 1]] + if {[string compare \ + $pref [lindex [lindex $ret 0] 1]] >= 0} { + error_check_good \ + error_case_range:sorted_dups($pref,$datastr) 0 1 + } + } + + if {$ndx != 0} { + set n [expr ($ndx - 1) * 3] + set pref [string range $d $n [expr $n + 1]] + set data $pref:$k + set ret \ + [eval {$db get} $txn {-get_both $k $data}] + error_check_good error_case:$k [llength $ret] 0 + } + } + } + + error_check_good check_c:close [$check_c close] 0 + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + error_check_good check_db:close [$check_db close] 0 + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test039.tcl b/test/tcl/test039.tcl new file mode 100644 index 00000000..e3fdc003 --- /dev/null +++ b/test/tcl/test039.tcl @@ -0,0 +1,217 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test039 +# TEST DB_GET_BOTH/DB_GET_BOTH_RANGE on deleted items without comparison +# TEST function. +# TEST +# TEST Use the first 10,000 entries from the dictionary. Insert each with +# TEST self as key and "ndups" duplicates. For the data field, prepend the +# TEST letters of the alphabet in a random order so we force the duplicate +# TEST sorting code to do something. By setting ndups large, we can make +# TEST this an off-page test. +# TEST +# TEST Test the DB_GET_BOTH and DB_GET_BOTH_RANGE functionality by retrieving +# TEST each dup in the file explicitly. Then remove each duplicate and try +# TEST the retrieval again. +proc test039 { method {nentries 10000} {ndups 5} {tnum "039"} args } { + global alphabet + global rand_init + source ./include.tcl + + berkdb srand $rand_init + + set args [convert_args $method $args] + set checkargs [split_partition_args $args] + set omethod [convert_method $method] + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test$tnum skipping for btree with compression." + return + } + if { [is_record_based $method] == 1 || \ + [is_rbtree $method] == 1 } { + puts "Test$tnum skipping for method $method" + return + } + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set checkdb $testdir/checkdb.db + set env NULL + } else { + set testfile test$tnum.db + set checkdb checkdb.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + append checkargs " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + reduce_dups nentries ndups + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + cleanup $testdir $env + + puts "Test$tnum: $method $nentries \ + small $ndups unsorted dup key/data pairs" + + set db [eval {berkdb_open -create -mode 0644 \ + $omethod -dup} $args {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + + set check_db [eval \ + {berkdb_open -create -mode 0644 -hash} $checkargs {$checkdb}] + error_check_good dbopen:check_db [is_valid_db $check_db] TRUE + + set pflags "" + set gflags "" + set txn "" + set count 0 + + # Here is the loop where we put and get each key/data pair + puts "\tTest$tnum.a: Put/get loop" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor_open [is_valid_cursor $dbc $db] TRUE + while { [gets $did str] != -1 && $count < $nentries } { + set dups "" + for { set i 1 } { $i <= $ndups } { incr i } { + set pref \ + [string index $alphabet [berkdb random_int 0 25]] + set pref $pref[string \ + index $alphabet [berkdb random_int 0 25]] + while { [string first $pref $dups] != -1 } { + set pref [string toupper $pref] + if { [string first $pref $dups] != -1 } { + set pref [string index $alphabet \ + [berkdb random_int 0 25]] + set pref $pref[string index $alphabet \ + [berkdb random_int 0 25]] + } + } + if { [string length $dups] == 0 } { + set dups $pref + } else { + set dups "$dups $pref" + } + set datastr $pref:$str + set ret [eval {$db put} \ + $txn $pflags {$str [chop_data $method $datastr]}] + error_check_good put $ret 0 + } + set ret [eval {$check_db put} \ + $txn $pflags {$str [chop_data $method $dups]}] + error_check_good checkdb_put $ret 0 + + # Now retrieve all the keys matching this key + set x 0 + set lastdup "" + for {set ret [$dbc get -set $str]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get -nextdup] } { + set k [lindex [lindex $ret 0] 0] + if { [string compare $k $str] != 0 } { + break + } + set datastr [lindex [lindex $ret 0] 1] + if {[string length $datastr] == 0} { + break + } + set xx [expr $x * 3] + set check_data \ + [string range $dups $xx [expr $xx + 1]]:$k + error_check_good retrieve $datastr $check_data + incr x + } + error_check_good "Test$tnum:ndups:$str" $x $ndups + incr count + } + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + close $did + + # Now check the duplicates, then delete then recheck + puts "\tTest$tnum.b: Checking and Deleting duplicates" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor_open [is_valid_cursor $dbc $db] TRUE + set check_c [eval {$check_db cursor} $txn] + error_check_good cursor_open [is_valid_cursor $check_c $check_db] TRUE + + for {set ndx 0} {$ndx < $ndups} {incr ndx} { + for {set ret [$check_c get -first]} \ + {[llength $ret] != 0} \ + {set ret [$check_c get -next] } { + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_bad data_check:$d [string length $d] 0 + + set nn [expr $ndx * 3] + set pref [string range $d $nn [expr $nn + 1]] + set data $pref:$k + set ret [$dbc get -get_both $k $data] + error_check_good \ + get_both_key:$k [lindex [lindex $ret 0] 0] $k + error_check_good \ + get_both_data:$k [lindex [lindex $ret 0] 1] $data + + set ret [$dbc del] + error_check_good del $ret 0 + + set ret [$dbc get -get_both $k $data] + error_check_good get_both:$k [llength $ret] 0 + + set ret [$dbc get -get_both_range $k $data] + error_check_good get_both_range:$k [llength $ret] 0 + + if {$ndx != 0} { + set n [expr ($ndx - 1) * 3] + set pref [string range $d $n [expr $n + 1]] + set data $pref:$k + set ret [$dbc get -get_both $k $data] + error_check_good error_case:$k [llength $ret] 0 + } + } + } + + error_check_good check_c:close [$check_c close] 0 + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + error_check_good check_db:close [$check_db close] 0 + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test040.tcl b/test/tcl/test040.tcl new file mode 100644 index 00000000..b374ab1f --- /dev/null +++ b/test/tcl/test040.tcl @@ -0,0 +1,22 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1998, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test040 +# TEST Test038 with off-page duplicates +# TEST DB_GET_BOTH functionality with off-page duplicates. +proc test040 { method {nentries 10000} args} { + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test040: skipping for specific pagesizes" + return + } + # Test with off-page duplicates + eval {test038 $method $nentries 20 "040" -pagesize 512} $args + + # Test with multiple pages of off-page duplicates + eval {test038 $method [expr $nentries / 10] 100 "040" -pagesize 512} \ + $args +} diff --git a/test/tcl/test041.tcl b/test/tcl/test041.tcl new file mode 100644 index 00000000..e38c1c2a --- /dev/null +++ b/test/tcl/test041.tcl @@ -0,0 +1,17 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test041 +# TEST Test039 with off-page duplicates +# TEST DB_GET_BOTH functionality with off-page duplicates. +proc test041 { method {nentries 10000} args} { + # Test with off-page duplicates + eval {test039 $method $nentries 20 "041" -pagesize 512} $args + + # Test with multiple pages of off-page duplicates + eval {test039 $method [expr $nentries / 10] 100 "041" -pagesize 512} \ + $args +} diff --git a/test/tcl/test042.tcl b/test/tcl/test042.tcl new file mode 100644 index 00000000..58bfdb8c --- /dev/null +++ b/test/tcl/test042.tcl @@ -0,0 +1,199 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test042 +# TEST Concurrent Data Store test (CDB) +# TEST +# TEST Multiprocess DB test; verify that locking is working for the +# TEST concurrent access method product. +# TEST +# TEST Use the first "nentries" words from the dictionary. Insert each with +# TEST self as key and a fixed, medium length data string. Then fire off +# TEST multiple processes that bang on the database. Each one should try to +# TEST read and write random keys. When they rewrite, they'll append their +# TEST pid to the data string (sometimes doing a rewrite sometimes doing a +# TEST partial put). Some will use cursors to traverse through a few keys +# TEST before finding one to write. + +proc test042 { method {nentries 1000} args } { + global encrypt + + # + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Test042 skipping for env $env" + return + } + + set args [convert_args $method $args] + if { $encrypt != 0 } { + puts "Test042 skipping for security" + return + } + + if { [is_heap $method] } { + puts "Test042 skipping for method $method" + return + } + # Don't 'eval' the args here -- we want them to stay in + # a lump until we pass them to berkdb_open and mdbscript. + test042_body $method $nentries 0 $args + test042_body $method $nentries 1 $args +} + +proc test042_body { method nentries alldb args } { + source ./include.tcl + + if { $alldb } { + set eflag "-cdb -cdb_alldb" + } else { + set eflag "-cdb" + } + puts "Test042: CDB Test ($eflag) $method $nentries" + + # Set initial parameters + set do_exit 0 + set iter 10000 + set procs 5 + + # Process arguments + set oargs "" + for { set i 0 } { $i < [llength $args] } {incr i} { + switch -regexp -- [lindex $args $i] { + -dir { incr i; set testdir [lindex $args $i] } + -iter { incr i; set iter [lindex $args $i] } + -procs { incr i; set procs [lindex $args $i] } + -exit { set do_exit 1 } + default { append oargs " " [lindex $args $i] } + } + } + + # Eval the args into 'tempargs' so we can extract the + # pageargs and readjust the number of mutexes. However, + # leave 'args' itself alone so it can be passed into + # mdbscript. + + eval set tempargs $args + set pageargs "" + split_pageargs $tempargs pageargs + + # Create the database and open the dictionary + set basename test042 + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + + env_cleanup $testdir + + set env [eval {berkdb_env -create} $eflag $pageargs -home $testdir] + error_check_good dbenv [is_valid_env $env] TRUE + + # Env is created, now set up database + test042_dbinit $env $nentries $method $oargs $basename.0.db + if { $alldb } { + for { set i 1 } {$i < $procs} {incr i} { + test042_dbinit $env $nentries $method $oargs \ + $basename.$i.db + } + } + + # Remove old mpools and Open/create the lock and mpool regions + error_check_good env:close:$env [$env close] 0 + set ret [berkdb envremove -home $testdir] + error_check_good env_remove $ret 0 + + set env [eval {berkdb_env \ + -create -cachesize {0 1048576 1}} $pageargs $eflag -home $testdir] + error_check_good dbenv [is_valid_widget $env env] TRUE + + if { $do_exit == 1 } { + return + } + + # Now spawn off processes + berkdb debug_check + puts "\tTest042.b: forking off $procs children" + set pidlist {} + + for { set i 0 } {$i < $procs} {incr i} { + if { $alldb } { + set tf $basename.$i.db + } else { + set tf $basename.0.db + } + puts "exec $tclsh_path $test_path/wrap.tcl \ + mdbscript.tcl $testdir/test042.$i.log \ + $method $testdir $tf $nentries $iter $i $procs $args &" + set p [exec $tclsh_path $test_path/wrap.tcl \ + mdbscript.tcl $testdir/test042.$i.log $method \ + $testdir $tf $nentries $iter $i $procs $args &] + lappend pidlist $p + } + puts "Test042: $procs independent processes now running" + watch_procs $pidlist + + # Make sure we haven't added or lost any entries. + set dblist [glob $testdir/$basename.*.db] + foreach file $dblist { + set tf [file tail $file] + set db [eval {berkdb_open -env $env} $oargs $tf] + set statret [$db stat] + foreach pair $statret { + set fld [lindex $pair 0] + if { [string compare $fld {Number of records}] == 0 } { + set numrecs [lindex $pair 1] + break + } + } + error_check_good nentries $numrecs $nentries + error_check_good db_close [$db close] 0 + } + + # Check for test failure + set errstrings [eval findfail [glob $testdir/test042.*.log]] + foreach str $errstrings { + puts "FAIL: error message in log file: $str" + } + + # Test is done, blow away lock and mpool region + reset_env $env +} + +proc test042_dbinit { env nentries method oargs tf } { + global datastr + source ./include.tcl + + set omethod [convert_method $method] + set db [eval {berkdb_open -env $env -create \ + -mode 0644 $omethod} $oargs $tf] + error_check_good dbopen [is_valid_db $db] TRUE + + set did [open $dict] + + set pflags "" + set gflags "" + set txn "" + set count 0 + + # Here is the loop where we put each key/data pair + puts "\tTest042.a: put loop $tf" + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $datastr]}] + error_check_good put:$db $ret 0 + incr count + } + close $did + error_check_good close:$db [$db close] 0 +} diff --git a/test/tcl/test043.tcl b/test/tcl/test043.tcl new file mode 100644 index 00000000..af00364b --- /dev/null +++ b/test/tcl/test043.tcl @@ -0,0 +1,196 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test043 +# TEST Recno renumbering and implicit creation test +# TEST Test the Record number implicit creation and renumbering options. +proc test043 { method {nentries 10000} args} { + source ./include.tcl + + set do_renumber [is_rrecno $method] + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test043: $method ($args)" + + if { [is_record_based $method] != 1 } { + puts "Test043 skipping for method $method" + return + } + + if { [is_heap $method] == 1 } { + puts "Test043 skipping for method $method" + return + } + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test043.db + set env NULL + } else { + set testfile test043.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + cleanup $testdir $env + + # Create the database + set db [eval {berkdb_open -create -mode 0644} $args \ + {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set pflags "" + set gflags " -recno" + set txn "" + + # First test implicit creation and retrieval + set count 1 + set interval 5 + if { $nentries < $interval } { + set nentries [expr $interval + 1] + } + puts "\tTest043.a: insert keys at $interval record intervals" + while { $count <= $nentries } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$count [chop_data $method $count]}] + error_check_good "$db put $count" $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + set last $count + incr count $interval + } + + puts "\tTest043.b: get keys using DB_FIRST/DB_NEXT" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good "$db cursor" [is_valid_cursor $dbc $db] TRUE + + set check 1 + for { set rec [$dbc get -first] } { [llength $rec] != 0 } { + set rec [$dbc get -next] } { + set k [lindex [lindex $rec 0] 0] + set d [pad_data $method [lindex [lindex $rec 0] 1]] + error_check_good "$dbc get key==data" [pad_data $method $k] $d + error_check_good "$dbc get sequential" $k $check + if { $k > $nentries } { + error_check_good "$dbc get key too large" $k $nentries + } + incr check $interval + } + + # Now make sure that we get DB_KEYEMPTY for non-existent keys + puts "\tTest043.c: Retrieve non-existent keys" + global errorInfo + + set check 1 + for { set rec [$dbc get -first] } { [llength $rec] != 0 } { + set rec [$dbc get -next] } { + set k [lindex [lindex $rec 0] 0] + + set ret [eval {$db get} $txn $gflags {[expr $k + 1]}] + error_check_good "$db \ + get [expr $k + 1]" $ret [list] + + incr check $interval + # Make sure we don't do a retrieve past the end of file + if { $check >= $last } { + break + } + } + + # Now try deleting and make sure the right thing happens. + puts "\tTest043.d: Delete tests" + set rec [$dbc get -first] + error_check_bad "$dbc get -first" [llength $rec] 0 + error_check_good "$dbc get -first key" [lindex [lindex $rec 0] 0] 1 + error_check_good "$dbc get -first data" \ + [lindex [lindex $rec 0] 1] [pad_data $method 1] + + # Delete the first item + error_check_good "$dbc del" [$dbc del] 0 + + # Retrieving 1 should always fail + set ret [eval {$db get} $txn $gflags {1}] + error_check_good "$db get 1" $ret [list] + + # Now, retrieving other keys should work; keys will vary depending + # upon renumbering. + if { $do_renumber == 1 } { + set count [expr 0 + $interval] + set max [expr $nentries - 1] + } else { + set count [expr 1 + $interval] + set max $nentries + } + + while { $count <= $max } { + set rec [eval {$db get} $txn $gflags {$count}] + if { $do_renumber == 1 } { + set data [expr $count + 1] + } else { + set data $count + } + error_check_good "$db get $count" \ + [pad_data $method $data] [lindex [lindex $rec 0] 1] + incr count $interval + } + set max [expr $count - $interval] + + puts "\tTest043.e: Verify LAST/PREV functionality" + set count $max + for { set rec [$dbc get -last] } { [llength $rec] != 0 } { + set rec [$dbc get -prev] } { + set k [lindex [lindex $rec 0] 0] + set d [lindex [lindex $rec 0] 1] + if { $do_renumber == 1 } { + set data [expr $k + 1] + } else { + set data $k + } + error_check_good \ + "$dbc get key==data" [pad_data $method $data] $d + error_check_good "$dbc get sequential" $k $count + if { $k > $nentries } { + error_check_good "$dbc get key too large" $k $nentries + } + set count [expr $count - $interval] + if { $count < 1 } { + break + } + } + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test044.tcl b/test/tcl/test044.tcl new file mode 100644 index 00000000..57cb35a3 --- /dev/null +++ b/test/tcl/test044.tcl @@ -0,0 +1,265 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test044 +# TEST Small system integration tests +# TEST Test proper functioning of the checkpoint daemon, +# TEST recovery, transactions, etc. +# TEST +# TEST System integration DB test: verify that locking, recovery, checkpoint, +# TEST and all the other utilities basically work. +# TEST +# TEST The test consists of $nprocs processes operating on $nfiles files. A +# TEST transaction consists of adding the same key/data pair to some random +# TEST number of these files. We generate a bimodal distribution in key size +# TEST with 70% of the keys being small (1-10 characters) and the remaining +# TEST 30% of the keys being large (uniform distribution about mean $key_avg). +# TEST If we generate a key, we first check to make sure that the key is not +# TEST already in the dataset. If it is, we do a lookup. +# +# XXX +# This test uses grow-only files currently! +proc test044 { method {nprocs 5} {nfiles 10} {cont 0} args } { + source ./include.tcl + global encrypt + global rand_init + + set args [convert_args $method $args] + set omethod [convert_method $method] + set pageargs "" + split_pageargs $args pageargs + + + berkdb srand $rand_init + + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Test044 skipping for env $env" + return + } + if { $encrypt != 0 } { + puts "Test044 skipping for security" + return + } + if { [is_heap $method] } { + puts "Test044 skipping for method $method" + return + } + puts "Test044: system integration test db $method $nprocs processes \ + on $nfiles files" + + # Parse options + set otherargs "" + set key_avg 10 + set data_avg 20 + set do_exit 0 + for { set i 0 } { $i < [llength $args] } {incr i} { + switch -regexp -- [lindex $args $i] { + -key_avg { incr i; set key_avg [lindex $args $i] } + -data_avg { incr i; set data_avg [lindex $args $i] } + -testdir { incr i; set testdir [lindex $args $i] } + -x.* { set do_exit 1 } + default { + lappend otherargs [lindex $args $i] + } + } + } + + if { $cont == 0 } { + # Create the database and open the dictionary + env_cleanup $testdir + + # Create an environment. Bump up the log region because + # we will create lots of files. This is especially + # needed when we test partitioned databases. + set cid [open $testdir/DB_CONFIG w] + puts $cid "set_lg_regionmax 200000" + close $cid + + puts "\tTest044.a: creating environment and $nfiles files" + set dbenv \ + [eval {berkdb_env -create -txn} $pageargs -home $testdir] + error_check_good env_open [is_valid_env $dbenv] TRUE + + # Create a bunch of files + set m $method + + for { set i 0 } { $i < $nfiles } { incr i } { + if { $method == "all" } { + switch [berkdb random_int 1 2] { + 1 { set m -btree } + 2 { set m -hash } + } + } else { + set m $omethod + } + + set db [eval {berkdb_open -env $dbenv -create \ + -mode 0644 $m} $otherargs {test044.$i.db}] + error_check_good dbopen [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + } + } + + # Close the environment + $dbenv close + + if { $do_exit == 1 } { + return + } + + # Database is created, now fork off the kids. + puts "\tTest044.b: forking off $nprocs processes and utilities" + set cycle 1 + set ncycles 3 + while { $cycle <= $ncycles } { + set dbenv \ + [eval {berkdb_env -create -txn} $pageargs -home $testdir] + error_check_good env_open [is_valid_env $dbenv] TRUE + + # Fire off deadlock detector and checkpointer + puts "Beginning cycle $cycle" + set ddpid [exec $util_path/db_deadlock -h $testdir -t 5 &] + set cppid [exec $util_path/db_checkpoint -h $testdir -p 2 &] + puts "Deadlock detector: $ddpid Checkpoint daemon $cppid" + + set pidlist {} + for { set i 0 } {$i < $nprocs} {incr i} { + set p [exec $tclsh_path \ + $test_path/sysscript.tcl $testdir \ + $nfiles $key_avg $data_avg $omethod $args\ + >& $testdir/test044.$i.log &] + lappend pidlist $p + } + set sleep [berkdb random_int 300 600] + puts \ +"[timestamp] $nprocs processes running $pidlist for $sleep seconds" + tclsleep $sleep + + # Now simulate a crash + puts "[timestamp] Crashing" + + # + # The environment must remain open until this point to get + # proper sharing (using the paging file) on Win/9X. [#2342] + # + error_check_good env_close [$dbenv close] 0 + + tclkill $ddpid + tclkill $cppid + + foreach p $pidlist { + tclkill $p + } + + # Check for test failure + set errstrings [eval findfail [glob $testdir/test044.*.log]] + foreach str $errstrings { + puts "FAIL: error message in log file: $str" + } + + # Now run recovery + eval test044_verify $testdir $nfiles $otherargs + incr cycle + } +} + +proc test044_usage { } { + puts -nonewline "test044 method nentries [-d directory] [-i iterations]" + puts " [-p procs] -x" +} + +proc test044_verify { dir nfiles args} { + source ./include.tcl + + # Save everything away in case something breaks +# for { set f 0 } { $f < $nfiles } {incr f} { +# file copy -force $dir/test044.$f.db $dir/test044.$f.save1 +# } +# foreach f [glob $dir/log.*] { +# if { [is_substr $f save] == 0 } { +# file copy -force $f $f.save1 +# } +# } + + # Run recovery and then read through all the database files to make + # sure that they all look good. + + puts "\tTest044.verify: Running recovery and verifying file contents" + set stat [catch {exec $util_path/db_recover -h $dir} result] + if { $stat == 1 } { + error "FAIL: Recovery error: $result." + } + + # Save everything away in case something breaks +# for { set f 0 } { $f < $nfiles } {incr f} { +# file copy -force $dir/test044.$f.db $dir/test044.$f.save2 +# } +# foreach f [glob $dir/log.*] { +# if { [is_substr $f save] == 0 } { +# file copy -force $f $f.save2 +# } +# } + + for { set f 0 } { $f < $nfiles } { incr f } { + set db($f) [eval {berkdb_open} $args {$dir/test044.$f.db}] + error_check_good $f:dbopen [is_valid_db $db($f)] TRUE + + set cursors($f) [$db($f) cursor] + error_check_bad $f:cursor_open $cursors($f) NULL + error_check_good \ + $f:cursor_open [is_substr $cursors($f) $db($f)] 1 + } + + for { set f 0 } { $f < $nfiles } { incr f } { + for {set d [$cursors($f) get -first] } \ + { [string length $d] != 0 } \ + { set d [$cursors($f) get -next] } { + + set k [lindex [lindex $d 0] 0] + set d [lindex [lindex $d 0] 1] + + set flist [zero_list $nfiles] + set r $d + while { [set ndx [string first : $r]] != -1 } { + set fnum [string range $r 0 [expr $ndx - 1]] + if { [lindex $flist $fnum] == 0 } { + set fl "-set" + } else { + set fl "-next" + } + + if { $fl != "-set" || $fnum != $f } { + if { [string compare $fl "-set"] == 0} { + set full [$cursors($fnum) \ + get -set $k] + } else { + set full [$cursors($fnum) \ + get -next] + } + set key [lindex [lindex $full 0] 0] + set rec [lindex [lindex $full 0] 1] + error_check_good \ + $f:dbget_$fnum:key $key $k + error_check_good \ + $f:dbget_$fnum:data $rec $d + } + + set flist [lreplace $flist $fnum $fnum 1] + incr ndx + set r [string range $r $ndx end] + } + } + } + + for { set f 0 } { $f < $nfiles } { incr f } { + error_check_good $cursors($f) [$cursors($f) close] 0 + error_check_good db_close:$f [$db($f) close] 0 + } +} diff --git a/test/tcl/test045.tcl b/test/tcl/test045.tcl new file mode 100644 index 00000000..598ab5f8 --- /dev/null +++ b/test/tcl/test045.tcl @@ -0,0 +1,125 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test045 +# TEST Small random tester +# TEST Runs a number of random add/delete/retrieve operations. +# TEST Tests both successful conditions and error conditions. +# TEST +# TEST Run the random db tester on the specified access method. +# +# Options are: +# -adds +# -cursors +# -dataavg +# -delete +# -dups +# -errpct +# -init +# -keyavg +proc test045 { method {nops 10000} args } { + source ./include.tcl + global encrypt + + # + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Test045 skipping for env $env" + return + } + set args [convert_args $method $args] + if { $encrypt != 0 } { + puts "Test045 skipping for security" + return + } + set omethod [convert_method $method] + + puts "Test045: Random tester on $method for $nops operations" + + # Set initial parameters + set adds [expr $nops * 10] + set cursors 5 + set dataavg 40 + set delete $nops + set dups 0 + set errpct 0 + set init 0 + if { [is_record_based $method] == 1 } { + set keyavg 10 + } else { + set keyavg 25 + } + + # Process arguments + set oargs "" + for { set i 0 } { $i < [llength $args] } {incr i} { + switch -regexp -- [lindex $args $i] { + -adds { incr i; set adds [lindex $args $i] } + -cursors { incr i; set cursors [lindex $args $i] } + -dataavg { incr i; set dataavg [lindex $args $i] } + -delete { incr i; set delete [lindex $args $i] } + -dups { incr i; set dups [lindex $args $i] } + -errpct { incr i; set errpct [lindex $args $i] } + -init { incr i; set init [lindex $args $i] } + -keyavg { incr i; set keyavg [lindex $args $i] } + -extent { incr i; + lappend oargs "-extent" "100" } + default { lappend oargs [lindex $args $i] } + } + } + + # Create the database and and initialize it. + set root $testdir/test045 + set f $root.db + env_cleanup $testdir + + # Run the script with 3 times the number of initial elements to + # set it up. + set db [eval {berkdb_open \ + -create -mode 0644 $omethod} $oargs {$f}] + error_check_good dbopen:$f [is_valid_db $db] TRUE + + set r [$db close] + error_check_good dbclose:$f $r 0 + + # We redirect standard out, but leave standard error here so we + # can see errors. + + puts "\tTest045.a: Initializing database" + if { $init != 0 } { + set n [expr 3 * $init] + exec $tclsh_path \ + $test_path/dbscript.tcl $method $f $n \ + 1 $init $n $keyavg $dataavg $dups 0 -1 \ + > $testdir/test045.init + } + # Check for test failure + set initerrs [findfail $testdir/test045.init] + foreach str $initerrs { + puts "FAIL: error message in .init file: $str" + } + + puts "\tTest045.b: Now firing off berkdb rand dbscript, running: " + # Now the database is initialized, run a test + puts "$tclsh_path\ + $test_path/dbscript.tcl $method $f $nops $cursors $delete $adds \ + $keyavg $dataavg $dups $errpct $oargs > $testdir/test045.log" + + exec $tclsh_path \ + $test_path/dbscript.tcl $method $f \ + $nops $cursors $delete $adds $keyavg \ + $dataavg $dups $errpct $oargs\ + > $testdir/test045.log + + # Check for test failure + set logerrs [findfail $testdir/test045.log] + foreach str $logerrs { + puts "FAIL: error message in log file: $str" + } +} diff --git a/test/tcl/test046.tcl b/test/tcl/test046.tcl new file mode 100644 index 00000000..cf4979ed --- /dev/null +++ b/test/tcl/test046.tcl @@ -0,0 +1,820 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test046 +# TEST Overwrite test of small/big key/data with cursor checks. +proc test046 { method args } { + global alphabet + global errorInfo + global errorCode + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "\tTest046: Overwrite test with cursor and small/big key/data." + puts "\tTest046:\t$method $args" + + if { [is_rrecno $method] == 1} { + puts "\tTest046: skipping for method $method." + return + } + + set key "key" + set data "data" + set txn "" + set flags "" + + if { [is_record_based $method] == 1} { + set key "" + } + + puts "\tTest046: Create $method database." + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test046 + set env NULL + } else { + set testfile test046 + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + cleanup $testdir $env + + set oflags "-create -mode 0644 $args $omethod" + set db [eval {berkdb_open} $oflags $testfile.a.db] + error_check_good dbopen [is_valid_db $db] TRUE + + # keep nkeys even + set nkeys 20 + + # Fill page w/ small key/data pairs + puts "\tTest046: Fill page with $nkeys small key/data pairs." + for { set i 1 } { $i <= $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if { [is_record_based $method] == 1} { + set ret [eval {$db put} $txn {$i $data$i}] + } elseif { $i < 10 } { + set ret [eval {$db put} $txn [set key]00$i \ + [set data]00$i] + } elseif { $i < 100 } { + set ret [eval {$db put} $txn [set key]0$i \ + [set data]0$i] + } else { + set ret [eval {$db put} $txn {$key$i $data$i}] + } + error_check_good dbput $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # open curs to db + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_substr $dbc $db] 1 + + # get db order of keys + for {set i 1; set ret [$dbc get -first]} { [llength $ret] != 0} { \ + set ret [$dbc get -next]} { + set key_set($i) [lindex [lindex $ret 0] 0] + set data_set($i) [lindex [lindex $ret 0] 1] + incr i + } + + puts "\tTest046.a: Deletes by key." + puts "\t\tTest046.a.1: Get data with SET, then delete before cursor." + # get key in middle of page, call this the nth set curr to it + set i [expr $nkeys/2] + set ret [$dbc get -set $key_set($i)] + error_check_bad dbc_get:set [llength $ret] 0 + set curr $ret + + # delete before cursor(n-1), make sure it is gone + set i [expr $i - 1] + error_check_good db_del [eval {$db del} $txn {$key_set($i)}] 0 + + # use set_range to get first key starting at n-1, should + # give us nth--but only works for btree + if { [is_btree $method] == 1 } { + set ret [$dbc get -set_range $key_set($i)] + } else { + if { [is_record_based $method] == 1 } { + set ret [$dbc get -set $key_set($i)] + error_check_good \ + dbc_get:deleted(recno) [llength [lindex $ret 1]] 0 + #error_check_good \ + # catch:get [catch {$dbc get -set $key_set($i)} ret] 1 + #error_check_good \ + # dbc_get:deleted(recno) [is_substr $ret "KEYEMPTY"] 1 + } else { + set ret [$dbc get -set $key_set($i)] + error_check_good dbc_get:deleted [llength $ret] 0 + } + set ret [$dbc get -set $key_set([incr i])] + incr i -1 + } + error_check_bad dbc_get:set(R)(post-delete) [llength $ret] 0 + error_check_good dbc_get(match):set $ret $curr + + puts "\t\tTest046.a.2: Delete cursor item by key." + # nth key, which cursor should be on now + set i [incr i] + set ret [eval {$db del} $txn {$key_set($i)}] + error_check_good db_del $ret 0 + + # this should return n+1 key/data, curr has nth key/data + if { [string compare $omethod "-btree"] == 0 } { + set ret [$dbc get -set_range $key_set($i)] + } else { + if { [is_record_based $method] == 1 } { + set ret [$dbc get -set $key_set($i)] + error_check_good \ + dbc_get:deleted(recno) [llength [lindex $ret 1]] 0 + #error_check_good \ + # catch:get [catch {$dbc get -set $key_set($i)} ret] 1 + #error_check_good \ + # dbc_get:deleted(recno) [is_substr $ret "KEYEMPTY"] 1 + } else { + set ret [$dbc get -set $key_set($i)] + error_check_good dbc_get:deleted [llength $ret] 0 + } + set ret [$dbc get -set $key_set([expr $i+1])] + } + error_check_bad dbc_get(post-delete):set_range [llength $ret] 0 + error_check_bad dbc_get(no-match):set_range $ret $curr + + puts "\t\tTest046.a.3: Delete item after cursor." + # we'll delete n+2, since we have deleted n-1 and n + # i still equal to nth, cursor on n+1 + set i [incr i] + set ret [$dbc get -set $key_set($i)] + error_check_bad dbc_get:set [llength $ret] 0 + set curr [$dbc get -next] + error_check_bad dbc_get:next [llength $curr] 0 + set ret [$dbc get -prev] + error_check_bad dbc_get:prev [llength $curr] 0 + # delete *after* cursor pos. + error_check_good db:del [eval {$db del} $txn {$key_set([incr i])}] 0 + + # make sure item is gone, try to get it + if { [string compare $omethod "-btree"] == 0} { + set ret [$dbc get -set_range $key_set($i)] + } else { + if { [is_record_based $method] == 1 } { + set ret [$dbc get -set $key_set($i)] + error_check_good \ + dbc_get:deleted(recno) [llength [lindex $ret 1]] 0 + #error_check_good \ + # catch:get [catch {$dbc get -set $key_set($i)} ret] 1 + #error_check_good \ + # dbc_get:deleted(recno) [is_substr $ret "KEYEMPTY"] 1 + } else { + set ret [$dbc get -set $key_set($i)] + error_check_good dbc_get:deleted [llength $ret] 0 + } + set ret [$dbc get -set $key_set([expr $i +1])] + } + error_check_bad dbc_get:set(_range) [llength $ret] 0 + error_check_bad dbc_get:set(_range) $ret $curr + error_check_good dbc_get:set [lindex [lindex $ret 0] 0] \ + $key_set([expr $i+1]) + + puts "\tTest046.b: Deletes by cursor." + puts "\t\tTest046.b.1: Delete, do DB_NEXT." + error_check_good dbc:del [$dbc del] 0 + set ret [$dbc get -next] + error_check_bad dbc_get:next [llength $ret] 0 + set i [expr $i+2] + # i = n+4 + error_check_good dbc_get:next(match) \ + [lindex [lindex $ret 0] 0] $key_set($i) + + puts "\t\tTest046.b.2: Delete, do DB_PREV." + error_check_good dbc:del [$dbc del] 0 + set ret [$dbc get -prev] + error_check_bad dbc_get:prev [llength $ret] 0 + set i [expr $i-3] + # i = n+1 (deleted all in between) + error_check_good dbc_get:prev(match) \ + [lindex [lindex $ret 0] 0] $key_set($i) + + puts "\t\tTest046.b.3: Delete, do DB_CURRENT." + error_check_good dbc:del [$dbc del] 0 + # we just deleted, so current item should be KEYEMPTY, throws err + set ret [$dbc get -current] + error_check_good dbc_get:curr:deleted [llength [lindex $ret 1]] 0 + #error_check_good catch:get:current [catch {$dbc get -current} ret] 1 + #error_check_good dbc_get:curr:deleted [is_substr $ret "DB_KEYEMPTY"] 1 + + puts "\tTest046.c: Inserts (before/after), by key then cursor." + puts "\t\tTest046.c.1: Insert by key before the cursor." + # i is at curs pos, i=n+1, we want to go BEFORE + set i [incr i -1] + set ret [eval {$db put} $txn {$key_set($i) $data_set($i)}] + error_check_good db_put:before $ret 0 + + puts "\t\tTest046.c.2: Insert by key after the cursor." + set i [incr i +2] + set ret [eval {$db put} $txn {$key_set($i) $data_set($i)}] + error_check_good db_put:after $ret 0 + + puts "\t\tTest046.c.3: Insert by curs with deleted curs (should fail)." + # cursor is on n+1, we'll change i to match + set i [incr i -1] + + error_check_good dbc:close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db:close [$db close] 0 + if { [is_record_based $method] == 1} { + puts "\t\tSkipping the rest of test for method $method." + puts "\tTest046 ($method) complete." + return + } else { + # Reopen without printing __db_errs. + set db [eval {berkdb_open_noerr} $oflags $testfile.a.db] + error_check_good dbopen [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor [is_valid_cursor $dbc $db] TRUE + + # should fail with EINVAL (deleted cursor) + set errorCode NONE + error_check_good catch:put:before 1 \ + [catch {$dbc put -before $data_set($i)} ret] + error_check_good dbc_put:deleted:before \ + [is_substr $errorCode "EINVAL"] 1 + + # should fail with EINVAL + set errorCode NONE + error_check_good catch:put:after 1 \ + [catch {$dbc put -after $data_set($i)} ret] + error_check_good dbc_put:deleted:after \ + [is_substr $errorCode "EINVAL"] 1 + + puts "\t\tTest046.c.4:\ + Insert by cursor before/after existent cursor." + # can't use before after w/o dup except renumber in recno + # first, restore an item so they don't fail + #set ret [eval {$db put} $txn {$key_set($i) $data_set($i)}] + #error_check_good db_put $ret 0 + + #set ret [$dbc get -set $key_set($i)] + #error_check_bad dbc_get:set [llength $ret] 0 + #set i [incr i -2] + # i = n - 1 + #set ret [$dbc get -prev] + #set ret [$dbc put -before $key_set($i) $data_set($i)] + #error_check_good dbc_put:before $ret 0 + # cursor pos is adjusted to match prev, recently inserted + #incr i + # i = n + #set ret [$dbc put -after $key_set($i) $data_set($i)] + #error_check_good dbc_put:after $ret 0 + } + + # For the next part of the test, we need a db with no dups to test + # overwrites + puts "\tTest046.d.0: Cleanup, close db, open new db with no dups." + error_check_good dbc:close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db:close [$db close] 0 + + set db [eval {berkdb_open} $oflags $testfile.d.db] + error_check_good dbopen [is_valid_db $db] TRUE + # Fill page w/ small key/data pairs + puts "\tTest046.d.0: Fill page with $nkeys small key/data pairs." + for { set i 1 } { $i < $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key$i $data$i}] + error_check_good dbput $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + set nkeys 20 + + # Prepare cursor on item + set ret [$dbc get -first] + error_check_bad dbc_get:first [llength $ret] 0 + + # Prepare unique big/small values for an initial + # and an overwrite set of key/data + foreach ptype {init over} { + foreach size {big small} { + if { [string compare $size big] == 0 } { + set key_$ptype$size \ + KEY_$size[repeat alphabet 250] + set data_$ptype$size \ + DATA_$size[repeat alphabet 250] + } else { + set key_$ptype$size \ + KEY_$size[repeat alphabet 10] + set data_$ptype$size \ + DATA_$size[repeat alphabet 10] + } + } + } + + set i 0 + # Do all overwrites for key and cursor + foreach type {key_over curs_over} { + # Overwrite (i=initial) four different kinds of pairs + incr i + puts "\tTest046.d: Overwrites $type." + foreach i_pair {\ + {small small} {big small} {small big} {big big} } { + # Overwrite (w=write) with four different kinds of data + foreach w_pair {\ + {small small} {big small} {small big} {big big} } { + + # we can only overwrite if key size matches + if { [string compare [lindex \ + $i_pair 0] [lindex $w_pair 0]] != 0} { + continue + } + + # first write the initial key/data + set ret [$dbc put -keyfirst \ + key_init[lindex $i_pair 0] \ + data_init[lindex $i_pair 1]] + error_check_good \ + dbc_put:curr:init:$i_pair $ret 0 + set ret [$dbc get -current] + error_check_bad dbc_get:curr [llength $ret] 0 + error_check_good dbc_get:curr:data \ + [lindex [lindex $ret 0] 1] \ + data_init[lindex $i_pair 1] + + # Now, try to overwrite: dups not supported in + # this db + if { [string compare $type key_over] == 0 } { + puts "\t\tTest046.d.$i: Key\ + Overwrite:($i_pair) by ($w_pair)." + set ret [eval {$db put} $txn \ + $"key_init[lindex $i_pair 0]" \ + $"data_over[lindex $w_pair 1]"] + error_check_good \ + dbput:over:i($i_pair):o($w_pair) $ret 0 + # check value + set ret [eval {$db get} $txn \ + $"key_init[lindex $i_pair 0]"] + error_check_bad \ + db:get:check [llength $ret] 0 + error_check_good db:get:compare_data \ + [lindex [lindex $ret 0] 1] \ + $"data_over[lindex $w_pair 1]" + } else { + # This is a cursor overwrite + puts \ + "\t\tTest046.d.$i:Curs Overwrite:($i_pair) by ($w_pair)." + set ret [$dbc put -current \ + $"data_over[lindex $w_pair 1]"] + error_check_good \ + dbcput:over:i($i_pair):o($w_pair) $ret 0 + # check value + set ret [$dbc get -current] + error_check_bad \ + dbc_get:curr [llength $ret] 0 + error_check_good dbc_get:curr:data \ + [lindex [lindex $ret 0] 1] \ + $"data_over[lindex $w_pair 1]" + } + } ;# foreach write pair + } ;# foreach initial pair + } ;# foreach type big/small + + puts "\tTest046.d.3: Cleanup for next part of test." + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + if { [is_rbtree $method] == 1} { + puts "\tSkipping the rest of Test046 for method $method." + puts "\tTest046 complete." + return + } + + puts "\tTest046.e.1: Open db with sorted dups." + set db [eval {berkdb_open_noerr} $oflags -dup -dupsort $testfile.e.db] + error_check_good dbopen [is_valid_db $db] TRUE + + # keep nkeys even + set nkeys 20 + set ndups 20 + + # Fill page w/ small key/data pairs + puts "\tTest046.e.2:\ + Put $nkeys small key/data pairs and $ndups sorted dups." + for { set i 0 } { $i < $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if { $i < 10 } { + set ret [eval {$db put} $txn [set key]0$i [set data]0$i] + } else { + set ret [eval {$db put} $txn {$key$i $data$i}] + } + error_check_good dbput $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + # open curs to db + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_substr $dbc $db] 1 + + # get db order of keys + for {set i 0; set ret [$dbc get -first]} { [llength $ret] != 0} { \ + set ret [$dbc get -next]} { + set key_set($i) [lindex [lindex $ret 0] 0] + set data_set($i) [lindex [lindex $ret 0] 1] + incr i + } + + # put 20 sorted duplicates on key in middle of page + set i [expr $nkeys/2] + set ret [$dbc get -set $key_set($i)] + error_check_bad dbc_get:set [llength $ret] 0 + + set keym $key_set($i) + + for { set i 0 } { $i < $ndups } { incr i } { + if { $i < 10 } { + set ret [eval {$db put} $txn {$keym DUPLICATE_0$i}] + } else { + set ret [eval {$db put} $txn {$keym DUPLICATE_$i}] + } + error_check_good db_put:DUP($i) $ret 0 + } + + puts "\tTest046.e.3: Check duplicate duplicates" + set ret [eval {$db put} $txn {$keym DUPLICATE_00}] + error_check_good dbput:dupdup [is_substr $ret "DB_KEYEXIST"] 1 + + # get dup ordering + for {set i 0; set ret [$dbc get -set $keym]} { [llength $ret] != 0} {\ + set ret [$dbc get -nextdup] } { + set dup_set($i) [lindex [lindex $ret 0] 1] + incr i + } + + # put cursor on item in middle of dups + set i [expr $ndups/2] + set ret [$dbc get -get_both $keym $dup_set($i)] + error_check_bad dbc_get:get_both [llength $ret] 0 + + puts "\tTest046.f: Deletes by cursor." + puts "\t\tTest046.f.1: Delete by cursor, do a DB_NEXT, check cursor." + set ret [$dbc get -current] + error_check_bad dbc_get:current [llength $ret] 0 + error_check_good dbc:del [$dbc del] 0 + set ret [$dbc get -next] + error_check_bad dbc_get:next [llength $ret] 0 + error_check_good \ + dbc_get:nextdup [lindex [lindex $ret 0] 1] $dup_set([incr i]) + + puts "\t\tTest046.f.2: Delete by cursor, do DB_PREV, check cursor." + error_check_good dbc:del [$dbc del] 0 + set ret [$dbc get -prev] + error_check_bad dbc_get:prev [llength $ret] 0 + set i [incr i -2] + error_check_good dbc_get:prev [lindex [lindex $ret 0] 1] $dup_set($i) + + puts "\t\tTest046.f.3: Delete by cursor, do DB_CURRENT, check cursor." + error_check_good dbc:del [$dbc del] 0 + set ret [$dbc get -current] + error_check_good dbc_get:current:deleted [llength [lindex $ret 1]] 0 + #error_check_good catch:dbc_get:curr [catch {$dbc get -current} ret] 1 + #error_check_good \ + # dbc_get:current:deleted [is_substr $ret "DB_KEYEMPTY"] 1 + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + # restore deleted keys + error_check_good db_put:1 [eval {$db put} $txn {$keym $dup_set($i)}] 0 + error_check_good db_put:2 [eval {$db put} $txn \ + {$keym $dup_set([incr i])}] 0 + error_check_good db_put:3 [eval {$db put} $txn \ + {$keym $dup_set([incr i])}] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # tested above + + # Reopen database without __db_err, reset cursor + error_check_good dbclose [$db close] 0 + set db [eval {berkdb_open_noerr} $oflags -dup -dupsort $testfile.e.db] + error_check_good dbopen [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + set ret [$dbc get -set $keym] + error_check_bad dbc_get:set [llength $ret] 0 + set ret2 [$dbc get -current] + error_check_bad dbc_get:current [llength $ret2] 0 + # match + error_check_good dbc_get:current/set(match) $ret $ret2 + # right one? + error_check_good \ + dbc_get:curr/set(matchdup) [lindex [lindex $ret 0] 1] $dup_set(0) + + # cursor is on first dup + set ret [$dbc get -next] + error_check_bad dbc_get:next [llength $ret] 0 + # now on second dup + error_check_good dbc_get:next [lindex [lindex $ret 0] 1] $dup_set(1) + # check cursor + set ret [$dbc get -current] + error_check_bad dbc_get:curr [llength $ret] 0 + error_check_good \ + dbcget:curr(compare) [lindex [lindex $ret 0] 1] $dup_set(1) + + puts "\tTest046.g: Inserts." + puts "\t\tTest046.g.1: Insert by key before cursor." + set i 0 + + # use "spam" to prevent a duplicate duplicate. + set ret [eval {$db put} $txn {$keym $dup_set($i)spam}] + error_check_good db_put:before $ret 0 + # make sure cursor was maintained + set ret [$dbc get -current] + error_check_bad dbc_get:curr [llength $ret] 0 + error_check_good \ + dbc_get:current(post-put) [lindex [lindex $ret 0] 1] $dup_set(1) + + puts "\t\tTest046.g.2: Insert by key after cursor." + set i [expr $i + 2] + # use "eggs" to prevent a duplicate duplicate + set ret [eval {$db put} $txn {$keym $dup_set($i)eggs}] + error_check_good db_put:after $ret 0 + # make sure cursor was maintained + set ret [$dbc get -current] + error_check_bad dbc_get:curr [llength $ret] 0 + error_check_good \ + dbc_get:curr(post-put,after) [lindex [lindex $ret 0] 1] $dup_set(1) + + puts "\t\tTest046.g.3: Insert by curs before/after curs (should fail)." + # should return EINVAL (dupsort specified) + error_check_good dbc_put:before:catch \ + [catch {$dbc put -before $dup_set([expr $i -1])} ret] 1 + error_check_good \ + dbc_put:before:deleted [is_substr $errorCode "EINVAL"] 1 + error_check_good dbc_put:after:catch \ + [catch {$dbc put -after $dup_set([expr $i +2])} ret] 1 + error_check_good \ + dbc_put:after:deleted [is_substr $errorCode "EINVAL"] 1 + + puts "\tTest046.h: Cursor overwrites." + puts "\t\tTest046.h.1: Test that dupsort disallows current overwrite." + set ret [$dbc get -set $keym] + error_check_bad dbc_get:set [llength $ret] 0 + error_check_good \ + catch:dbc_put:curr [catch {$dbc put -current DATA_OVERWRITE} ret] 1 + error_check_good dbc_put:curr:dupsort [is_substr $errorCode EINVAL] 1 + + puts "\t\tTest046.h.2: New db (no dupsort)." + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Skip the rest of the test for compressed btree, now that + # we're no longer running with -dupsort. + if { [is_compressed $args] == 1 } { + puts "Skipping remainder of test046\ + for btree with compression." + return + } + + set db [eval {berkdb_open} \ + $oflags -dup $testfile.h.db] + error_check_good db_open [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + for {set i 0} {$i < $nkeys} {incr i} { + if { $i < 10 } { + set ret [eval {$db put} $txn {key0$i datum0$i}] + error_check_good db_put $ret 0 + } else { + set ret [eval {$db put} $txn {key$i datum$i}] + error_check_good db_put $ret 0 + } + if { $i == 0 } { + for {set j 0} {$j < $ndups} {incr j} { + if { $i < 10 } { + set keyput key0$i + } else { + set keyput key$i + } + if { $j < 10 } { + set ret [eval {$db put} $txn \ + {$keyput DUP_datum0$j}] + } else { + set ret [eval {$db put} $txn \ + {$keyput DUP_datum$j}] + } + error_check_good dbput:dup $ret 0 + } + } + } + + for {set i 0; set ret [$dbc get -first]} { [llength $ret] != 0} { \ + set ret [$dbc get -next]} { + set key_set($i) [lindex [lindex $ret 0] 0] + set data_set($i) [lindex [lindex $ret 0] 1] + incr i + } + + for {set i 0; set ret [$dbc get -set key00]} {\ + [llength $ret] != 0} {set ret [$dbc get -nextdup]} { + set dup_set($i) [lindex [lindex $ret 0] 1] + incr i + } + set i 0 + set keym key0$i + set ret [$dbc get -set $keym] + error_check_bad dbc_get:set [llength $ret] 0 + error_check_good \ + dbc_get:set(match) [lindex [lindex $ret 0] 1] $dup_set($i) + + set ret [$dbc get -nextdup] + error_check_bad dbc_get:nextdup [llength $ret] 0 + error_check_good dbc_get:nextdup(match) \ + [lindex [lindex $ret 0] 1] $dup_set([expr $i + 1]) + + puts "\t\tTest046.h.3: Insert by cursor before cursor (DB_BEFORE)." + set ret [$dbc put -before BEFOREPUT] + error_check_good dbc_put:before $ret 0 + set ret [$dbc get -current] + error_check_bad dbc_get:curr [llength $ret] 0 + error_check_good \ + dbc_get:curr:match [lindex [lindex $ret 0] 1] BEFOREPUT + # make sure that this is actually a dup w/ dup before + set ret [$dbc get -prev] + error_check_bad dbc_get:prev [llength $ret] 0 + error_check_good dbc_get:prev:match \ + [lindex [lindex $ret 0] 1] $dup_set($i) + set ret [$dbc get -prev] + # should not be a dup + error_check_bad dbc_get:prev(no_dup) \ + [lindex [lindex $ret 0] 0] $keym + + puts "\t\tTest046.h.4: Insert by cursor after cursor (DB_AFTER)." + set ret [$dbc get -set $keym] + + # delete next 3 when fix + #puts "[$dbc get -current]\ + # [$dbc get -next] [$dbc get -next] [$dbc get -next] [$dbc get -next]" + #set ret [$dbc get -set $keym] + + error_check_bad dbc_get:set [llength $ret] 0 + set ret [$dbc put -after AFTERPUT] + error_check_good dbc_put:after $ret 0 + #puts [$dbc get -current] + + # delete next 3 when fix + #set ret [$dbc get -set $keym] + #puts "[$dbc get -current] next: [$dbc get -next] [$dbc get -next]" + #set ret [$dbc get -set AFTERPUT] + #set ret [$dbc get -set $keym] + #set ret [$dbc get -next] + #puts $ret + + set ret [$dbc get -current] + error_check_bad dbc_get:curr [llength $ret] 0 + error_check_good dbc_get:curr:match [lindex [lindex $ret 0] 1] AFTERPUT + set ret [$dbc get -prev] + # now should be on first item (non-dup) of keym + error_check_bad dbc_get:prev1 [llength $ret] 0 + error_check_good \ + dbc_get:match [lindex [lindex $ret 0] 1] $dup_set($i) + set ret [$dbc get -next] + error_check_bad dbc_get:next [llength $ret] 0 + error_check_good \ + dbc_get:match2 [lindex [lindex $ret 0] 1] AFTERPUT + set ret [$dbc get -next] + error_check_bad dbc_get:next [llength $ret] 0 + # this is the dup we added previously + error_check_good \ + dbc_get:match3 [lindex [lindex $ret 0] 1] BEFOREPUT + + # now get rid of the dups we added + error_check_good dbc_del [$dbc del] 0 + set ret [$dbc get -prev] + error_check_bad dbc_get:prev2 [llength $ret] 0 + error_check_good dbc_del2 [$dbc del] 0 + # put cursor on first dup item for the rest of test + set ret [$dbc get -set $keym] + error_check_bad dbc_get:first [llength $ret] 0 + error_check_good \ + dbc_get:first:check [lindex [lindex $ret 0] 1] $dup_set($i) + + puts "\t\tTest046.h.5: Overwrite small by small." + set ret [$dbc put -current DATA_OVERWRITE] + error_check_good dbc_put:current:overwrite $ret 0 + set ret [$dbc get -current] + error_check_good dbc_get:current(put,small/small) \ + [lindex [lindex $ret 0] 1] DATA_OVERWRITE + + puts "\t\tTest046.h.6: Overwrite small with big." + set ret [$dbc put -current DATA_BIG_OVERWRITE[repeat $alphabet 200]] + error_check_good dbc_put:current:overwrite:big $ret 0 + set ret [$dbc get -current] + error_check_good dbc_get:current(put,small/big) \ + [is_substr [lindex [lindex $ret 0] 1] DATA_BIG_OVERWRITE] 1 + + puts "\t\tTest046.h.7: Overwrite big with big." + set ret [$dbc put -current DATA_BIG_OVERWRITE2[repeat $alphabet 200]] + error_check_good dbc_put:current:overwrite(2):big $ret 0 + set ret [$dbc get -current] + error_check_good dbc_get:current(put,big/big) \ + [is_substr [lindex [lindex $ret 0] 1] DATA_BIG_OVERWRITE2] 1 + + puts "\t\tTest046.h.8: Overwrite big with small." + set ret [$dbc put -current DATA_OVERWRITE2] + error_check_good dbc_put:current:overwrite:small $ret 0 + set ret [$dbc get -current] + error_check_good dbc_get:current(put,big/small) \ + [is_substr [lindex [lindex $ret 0] 1] DATA_OVERWRITE2] 1 + + puts "\tTest046.i: Cleaning up from test." + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + puts "\tTest046 complete." +} diff --git a/test/tcl/test047.tcl b/test/tcl/test047.tcl new file mode 100644 index 00000000..460b64d8 --- /dev/null +++ b/test/tcl/test047.tcl @@ -0,0 +1,261 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test047 +# TEST DBcursor->c_get get test with SET_RANGE option. +proc test047 { method args } { + source ./include.tcl + + set tnum 047 + set args [convert_args $method $args] + + if { [is_btree $method] != 1 } { + puts "Test$tnum skipping for method $method" + return + } + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test$tnum skipping for btree with compression." + return + } + set method "-btree" + + puts "\tTest$tnum: Test of SET_RANGE interface to DB->c_get ($method)." + + set key "key" + set data "data" + set txn "" + set flags "" + + puts "\tTest$tnum.a: Create $method database." + set eindex [lsearch -exact $args "-env"] + set txnenv 0 + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set testfile1 $testdir/test$tnum.a.db + set testfile2 $testdir/test$tnum.b.db + set env NULL + } else { + set testfile test$tnum.db + set testfile1 test$tnum.a.db + set testfile2 test$tnum.b.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + cleanup $testdir $env + + set oflags "-create -mode 0644 -dup $args $method" + set db [eval {berkdb_open} $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set nkeys 20 + # Fill page w/ small key/data pairs + # + puts "\tTest$tnum.b: Fill page with $nkeys small key/data pairs." + for { set i 0 } { $i < $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key$i $data$i}] + error_check_good dbput $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + # open curs to db + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + puts "\tTest$tnum.c: Get data with SET_RANGE, then delete by cursor." + set i 0 + set ret [$dbc get -set_range $key$i] + error_check_bad dbc_get:set_range [llength $ret] 0 + set curr $ret + + # delete by cursor, make sure it is gone + error_check_good dbc_del [$dbc del] 0 + + set ret [$dbc get -set_range $key$i] + error_check_bad dbc_get(post-delete):set_range [llength $ret] 0 + error_check_bad dbc_get(no-match):set_range $ret $curr + + puts "\tTest$tnum.d: \ + Use another cursor to fix item on page, delete by db." + set dbcurs2 [eval {$db cursor} $txn] + error_check_good db:cursor2 [is_valid_cursor $dbcurs2 $db] TRUE + + set ret [$dbcurs2 get -set [lindex [lindex $ret 0] 0]] + error_check_bad dbc_get(2):set [llength $ret] 0 + set curr $ret + error_check_good db:del [eval {$db del} $txn \ + {[lindex [lindex $ret 0] 0]}] 0 + + # make sure item is gone + set ret [$dbcurs2 get -set_range [lindex [lindex $curr 0] 0]] + error_check_bad dbc2_get:set_range [llength $ret] 0 + error_check_bad dbc2_get:set_range $ret $curr + + puts "\tTest$tnum.e: Close for second part of test, close db/cursors." + error_check_good dbc:close [$dbc close] 0 + error_check_good dbc2:close [$dbcurs2 close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good dbclose [$db close] 0 + + # open db + set db [eval {berkdb_open} $oflags $testfile1] + error_check_good dbopen2 [is_valid_db $db] TRUE + + set nkeys 10 + puts "\tTest$tnum.f: Fill page with $nkeys pairs, one set of dups." + for {set i 0} { $i < $nkeys } {incr i} { + # a pair + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key$i $data$i}] + error_check_good dbput($i) $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + set j 0 + for {set i 0} { $i < $nkeys } {incr i} { + # a dup set for same 1 key + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key$i DUP_$data$i}] + error_check_good dbput($i):dup $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + puts "\tTest$tnum.g: \ + Get dups key w/ SET_RANGE, pin onpage with another cursor." + set i 0 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + set ret [$dbc get -set_range $key$i] + error_check_bad dbc_get:set_range [llength $ret] 0 + + set dbc2 [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc2 $db] TRUE + set ret2 [$dbc2 get -set_range $key$i] + error_check_bad dbc2_get:set_range [llength $ret] 0 + + error_check_good dbc_compare $ret $ret2 + puts "\tTest$tnum.h: \ + Delete duplicates' key, use SET_RANGE to get next dup." + set ret [$dbc2 del] + error_check_good dbc2_del $ret 0 + set ret [$dbc get -set_range $key$i] + error_check_bad dbc_get:set_range [llength $ret] 0 + error_check_bad dbc_get:set_range $ret $ret2 + + error_check_good dbc_close [$dbc close] 0 + error_check_good dbc2_close [$dbc2 close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + set db [eval {berkdb_open} $oflags $testfile2] + error_check_good dbopen [is_valid_db $db] TRUE + + set nkeys 10 + set ndups 1000 + + puts "\tTest$tnum.i: Fill page with $nkeys pairs and $ndups dups." + for {set i 0} { $i < $nkeys } { incr i} { + # a pair + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key$i $data$i}] + error_check_good dbput $ret 0 + + # dups for single pair + if { $i == 0} { + for {set j 0} { $j < $ndups } { incr j } { + set ret [eval {$db put} $txn \ + {$key$i DUP_$data$i:$j}] + error_check_good dbput:dup $ret 0 + } + } + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + set i 0 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + set dbc2 [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc2 $db] TRUE + puts "\tTest$tnum.j: \ + Get key of first dup with SET_RANGE, fix with 2 curs." + set ret [$dbc get -set_range $key$i] + error_check_bad dbc_get:set_range [llength $ret] 0 + + set ret2 [$dbc2 get -set_range $key$i] + error_check_bad dbc2_get:set_range [llength $ret] 0 + set curr $ret2 + + error_check_good dbc_compare $ret $ret2 + + puts "\tTest$tnum.k: Delete item by cursor, use SET_RANGE to verify." + set ret [$dbc2 del] + error_check_good dbc2_del $ret 0 + set ret [$dbc get -set_range $key$i] + error_check_bad dbc_get:set_range [llength $ret] 0 + error_check_bad dbc_get:set_range $ret $curr + + puts "\tTest$tnum.l: Cleanup." + error_check_good dbc_close [$dbc close] 0 + error_check_good dbc2_close [$dbc2 close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + puts "\tTest$tnum complete." +} diff --git a/test/tcl/test048.tcl b/test/tcl/test048.tcl new file mode 100644 index 00000000..32179a85 --- /dev/null +++ b/test/tcl/test048.tcl @@ -0,0 +1,178 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test048 +# TEST Cursor stability across Btree splits. +proc test048 { method args } { + global errorCode + global is_je_test + source ./include.tcl + + set tnum 048 + set args [convert_args $method $args] + + if { [is_btree $method] != 1 } { + puts "Test$tnum skipping for method $method." + return + } + + # Compression will change the behavior of page splits. + # Skip test for compression. + if { [is_compressed $args] } { + puts "Test$tnum skipping for compression" + return + } + + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + incr pgindex + if { [lindex $args $pgindex] > 8192 } { + puts "Test048: Skipping for large pagesizes" + return + } + } + + set method "-btree" + + puts "\tTest$tnum: Test of cursor stability across btree splits." + + set key "key" + set data "data" + set txn "" + set flags "" + + puts "\tTest$tnum.a: Create $method database." + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + cleanup $testdir $env + + set oflags "-create -mode 0644 $args $method" + set db [eval {berkdb_open} $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set nkeys 5 + # Fill page w/ small key/data pairs, keep at leaf + # + puts "\tTest$tnum.b: Fill page with $nkeys small key/data pairs." + for { set i 0 } { $i < $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {key000$i $data$i}] + error_check_good dbput $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # get db ordering, set cursors + puts "\tTest$tnum.c: Set cursors on each of $nkeys pairs." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for {set i 0; set ret [$db get key000$i]} {\ + $i < $nkeys && [llength $ret] != 0} {\ + incr i; set ret [$db get key000$i]} { + set key_set($i) [lindex [lindex $ret 0] 0] + set data_set($i) [lindex [lindex $ret 0] 1] + set dbc [eval {$db cursor} $txn] + set dbc_set($i) $dbc + error_check_good db_cursor:$i \ + [is_valid_cursor $dbc_set($i) $db] TRUE + set ret [$dbc_set($i) get -set $key_set($i)] + error_check_bad dbc_set($i)_get:set [llength $ret] 0 + } + + # if mkeys is above 1000, need to adjust below for lexical order + set mkeys 1000 + puts "\tTest$tnum.d: Add $mkeys pairs to force split." + for {set i $nkeys} { $i < $mkeys } { incr i } { + if { $i >= 100 } { + set ret [eval {$db put} $txn {key0$i $data$i}] + } elseif { $i >= 10 } { + set ret [eval {$db put} $txn {key00$i $data$i}] + } else { + set ret [eval {$db put} $txn {key000$i $data$i}] + } + error_check_good dbput:more $ret 0 + } + + puts "\tTest$tnum.e: Make sure split happened." + # XXX We cannot call stat with active txns or we deadlock. + if { $txnenv != 1 && !$is_je_test } { + error_check_bad stat:check-split [is_substr [$db stat] \ + "{{Internal pages} 0}"] 1 + } + + puts "\tTest$tnum.f: Check to see that cursors maintained reference." + for {set i 0} { $i < $nkeys } {incr i} { + set ret [$dbc_set($i) get -current] + error_check_bad dbc$i:get:current [llength $ret] 0 + set ret2 [$dbc_set($i) get -set $key_set($i)] + error_check_bad dbc$i:get:set [llength $ret2] 0 + error_check_good dbc$i:get(match) $ret $ret2 + } + + puts "\tTest$tnum.g: Delete added keys to force reverse split." + for {set i $nkeys} { $i < $mkeys } { incr i } { + if { $i >= 100 } { + error_check_good db_del:$i \ + [eval {$db del} $txn {key0$i}] 0 + } elseif { $i >= 10 } { + error_check_good db_del:$i \ + [eval {$db del} $txn {key00$i}] 0 + } else { + error_check_good db_del:$i \ + [eval {$db del} $txn {key000$i}] 0 + } + } + + puts "\tTest$tnum.h: Verify cursor reference." + for {set i 0} { $i < $nkeys } {incr i} { + set ret [$dbc_set($i) get -current] + error_check_bad dbc$i:get:current [llength $ret] 0 + set ret2 [$dbc_set($i) get -set $key_set($i)] + error_check_bad dbc$i:get:set [llength $ret2] 0 + error_check_good dbc$i:get(match) $ret $ret2 + } + + puts "\tTest$tnum.i: Cleanup." + # close cursors + for {set i 0} { $i < $nkeys } {incr i} { + error_check_good dbc_close:$i [$dbc_set($i) close] 0 + } + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + puts "\tTest$tnum.j: Verify reverse split." + error_check_good stat:check-reverse_split [is_substr [$db stat] \ + "{{Internal pages} 0}"] 1 + + error_check_good dbclose [$db close] 0 + + puts "\tTest$tnum complete." +} diff --git a/test/tcl/test049.tcl b/test/tcl/test049.tcl new file mode 100644 index 00000000..73f75ef4 --- /dev/null +++ b/test/tcl/test049.tcl @@ -0,0 +1,186 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test049 +# TEST Cursor operations on uninitialized cursors. +proc test049 { method args } { + global errorInfo + global errorCode + source ./include.tcl + + set tnum 049 + set renum [is_rrecno $method] + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "\tTest$tnum: Test of cursor routines with uninitialized cursors." + + set key "key" + set data "data" + set txn "" + set flags "" + set rflags "" + + if { [is_record_based $method] == 1 } { + set key "" + } + + puts "\tTest$tnum.a: Create $method database." + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + cleanup $testdir $env + + set oflags "-create -mode 0644 $rflags $omethod $args" + if { [is_record_based $method] == 0 &&\ + [is_rbtree $method] != 1 && [is_compressed $args] == 0 } { + append oflags " -dup" + } + set db [eval {berkdb_open_noerr} $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set nkeys 10 + puts "\tTest$tnum.b: Fill page with $nkeys small key/data pairs." + for { set i 1 } { $i <= $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key$i $data$i}] + error_check_good dbput:$i $ret 0 + if { $i == 1 } { + for {set j 0} { $j < [expr $nkeys / 2]} {incr j} { + set ret [eval {$db put} $txn \ + {$key$i DUPLICATE$j}] + error_check_good dbput:dup:$j $ret 0 + } + } + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # DBC GET + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc_u [eval {$db cursor} $txn] + error_check_good db:cursor [is_valid_cursor $dbc_u $db] TRUE + + puts "\tTest$tnum.c: Test dbc->get interfaces..." + set i 0 + foreach flag { current first last next prev nextdup} { + puts "\t\t...dbc->get($flag)" + catch {$dbc_u get -$flag} ret + error_check_good dbc:get:$flag [is_substr $errorCode EINVAL] 1 + } + + foreach flag { set set_range get_both} { + puts "\t\t...dbc->get($flag)" + if { [string compare $flag get_both] == 0} { + catch {$dbc_u get -$flag $key$i data0} ret + } else { + catch {$dbc_u get -$flag $key$i} ret + } + error_check_good dbc:get:$flag [is_substr $errorCode EINVAL] 1 + } + + puts "\t\t...dbc->get(current, partial)" + catch {$dbc_u get -current -partial {0 0}} ret + error_check_good dbc:get:partial [is_substr $errorCode EINVAL] 1 + + puts "\t\t...dbc->get(current, rmw)" + catch {$dbc_u get -rmw -current } ret + error_check_good dbc_get:rmw [is_substr $errorCode EINVAL] 1 + + puts "\tTest$tnum.d: Test dbc->put interface..." + # partial...depends on another + foreach flag { after before current keyfirst keylast } { + puts "\t\t...dbc->put($flag)" + if { [string match key* $flag] == 1 } { + if { [is_record_based $method] == 1 ||\ + [is_compressed $args] == 1 } { + # Keyfirst/keylast not allowed. + puts "\t\t...Skipping dbc->put($flag)." + continue + } else { + # keyfirst/last should succeed + puts "\t\t...dbc->put($flag)...should succeed." + error_check_good dbcput:$flag \ + [$dbc_u put -$flag $key$i data0] 0 + + # now uninitialize cursor + error_check_good dbc_close [$dbc_u close] 0 + set dbc_u [eval {$db cursor} $txn] + error_check_good \ + db_cursor [is_substr $dbc_u $db] 1 + } + } elseif { [string compare $flag before ] == 0 || + [string compare $flag after ] == 0 } { + if { [is_record_based $method] == 0 &&\ + [is_rbtree $method] == 0 &&\ + [is_compressed $args] == 0} { + set ret [$dbc_u put -$flag data0] + error_check_good "$dbc_u:put:-$flag" $ret 0 + } elseif { $renum == 1 } { + # Renumbering recno will return a record number + set currecno \ + [lindex [lindex [$dbc_u get -current] 0] 0] + set ret [$dbc_u put -$flag data0] + if { [string compare $flag after] == 0 } { + error_check_good "$dbc_u put $flag" \ + $ret [expr $currecno + 1] + } else { + error_check_good "$dbc_u put $flag" \ + $ret $currecno + } + } else { + puts "\t\tSkipping $flag for $method" + } + } else { + set ret [$dbc_u put -$flag data0] + error_check_good "$dbc_u:put:-$flag" $ret 0 + } + } + # and partial + puts "\t\t...dbc->put(partial)" + catch {$dbc_u put -partial {0 0} $key$i $data$i} ret + error_check_good dbc_put:partial [is_substr $errorCode EINVAL] 1 + + # XXX dbc->dup, db->join (dbc->get join_item) + # dbc del + puts "\tTest$tnum.e: Test dbc->del interface." + catch {$dbc_u del} ret + error_check_good dbc_del [is_substr $errorCode EINVAL] 1 + + error_check_good dbc_close [$dbc_u close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + puts "\tTest$tnum complete." +} diff --git a/test/tcl/test050.tcl b/test/tcl/test050.tcl new file mode 100644 index 00000000..31272a60 --- /dev/null +++ b/test/tcl/test050.tcl @@ -0,0 +1,220 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test050 +# TEST Overwrite test of small/big key/data with cursor checks for Recno. +proc test050 { method args } { + global alphabet + global errorInfo + global errorCode + source ./include.tcl + + set tstn 050 + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_rrecno $method] != 1 } { + puts "Test$tstn skipping for method $method." + return + } + + puts "\tTest$tstn:\ + Overwrite test with cursor and small/big key/data ($method)." + + set data "data" + set txn "" + set flags "" + + puts "\tTest$tstn: Create $method database." + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test0$tstn.db + set env NULL + } else { + set testfile test0$tstn.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + cleanup $testdir $env + + set oflags "-create -mode 0644 $args $omethod" + set db [eval {berkdb_open_noerr} $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # keep nkeys even + set nkeys 20 + + # Fill page w/ small key/data pairs + # + puts "\tTest$tstn: Fill page with $nkeys small key/data pairs." + for { set i 1 } { $i <= $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$i [chop_data $method $data$i]}] + error_check_good dbput $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + # open curs to db + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + # get db order of keys + for {set i 0; set ret [$dbc get -first]} { [llength $ret] != 0} { \ + set ret [$dbc get -next]} { + set key_set($i) [lindex [lindex $ret 0] 0] + set data_set($i) [lindex [lindex $ret 0] 1] + incr i + } + + # verify ordering: should be unnecessary, but hey, why take chances? + # key_set is zero indexed but keys start at 1 + for {set i 0} { $i < $nkeys } {incr i} { + error_check_good \ + verify_order:$i $key_set($i) [pad_data $method [expr $i+1]] + } + + puts "\tTest$tstn.a: Inserts before/after by cursor." + puts "\t\tTest$tstn.a.1:\ + Insert with uninitialized cursor (should fail)." + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + catch {$dbc put -before DATA1} ret + error_check_good dbc_put:before:uninit [is_substr $errorCode EINVAL] 1 + + catch {$dbc put -after DATA2} ret + error_check_good dbc_put:after:uninit [is_substr $errorCode EINVAL] 1 + + puts "\t\tTest$tstn.a.2: Insert with deleted cursor (should succeed)." + set ret [$dbc get -first] + error_check_bad dbc_get:first [llength $ret] 0 + error_check_good dbc_del [$dbc del] 0 + set ret [$dbc put -current DATAOVER1] + error_check_good dbc_put:current:deleted $ret 0 + + puts "\t\tTest$tstn.a.3: Insert by cursor before cursor (DB_BEFORE)." + set currecno [lindex [lindex [$dbc get -current] 0] 0] + set ret [$dbc put -before DATAPUTBEFORE] + error_check_good dbc_put:before $ret $currecno + set old1 [$dbc get -next] + error_check_bad dbc_get:next [llength $old1] 0 + error_check_good \ + dbc_get:next(compare) [lindex [lindex $old1 0] 1] DATAOVER1 + + puts "\t\tTest$tstn.a.4: Insert by cursor after cursor (DB_AFTER)." + set ret [$dbc get -first] + error_check_bad dbc_get:first [llength $ret] 0 + error_check_good dbc_get:first [lindex [lindex $ret 0] 1] DATAPUTBEFORE + set currecno [lindex [lindex [$dbc get -current] 0] 0] + set ret [$dbc put -after DATAPUTAFTER] + error_check_good dbc_put:after $ret [expr $currecno + 1] + set ret [$dbc get -prev] + error_check_bad dbc_get:prev [llength $ret] 0 + error_check_good \ + dbc_get:prev [lindex [lindex $ret 0] 1] DATAPUTBEFORE + + puts "\t\tTest$tstn.a.5: Verify that all keys have been renumbered." + # should be $nkeys + 2 keys, starting at 1 + for {set i 1; set ret [$dbc get -first]} { \ + $i <= $nkeys && [llength $ret] != 0 } {\ + incr i; set ret [$dbc get -next]} { + error_check_good check_renumber $i [lindex [lindex $ret 0] 0] + } + + # tested above + + puts "\tTest$tstn.b: Overwrite tests (cursor and key)." + # For the next part of the test, we need a db with no dups to test + # overwrites + # + # we should have ($nkeys + 2) keys, ordered: + # DATAPUTBEFORE, DATAPUTAFTER, DATAOVER1, data1, ..., data$nkeys + # + # Prepare cursor on item + # + set ret [$dbc get -first] + error_check_bad dbc_get:first [llength $ret] 0 + + # Prepare unique big/small values for an initial + # and an overwrite set of data + set databig DATA_BIG_[repeat alphabet 250] + set datasmall DATA_SMALL + + # Now, we want to overwrite data: + # by key and by cursor + # 1. small by small + # 2. small by big + # 3. big by small + # 4. big by big + # + set i 0 + # Do all overwrites for key and cursor + foreach type { by_key by_cursor } { + incr i + puts "\tTest$tstn.b.$i: Overwrites $type." + foreach pair { {small small} \ + {small big} {big small} {big big} } { + # put in initial type + set data $data[lindex $pair 0] + set ret [$dbc put -current $data] + error_check_good dbc_put:curr:init:($pair) $ret 0 + + # Now, try to overwrite: dups not supported in this db + if { [string compare $type by_key] == 0 } { + puts "\t\tTest$tstn.b.$i:\ + Overwrite:($pair):$type" + set ret [eval {$db put} $txn \ + 1 {OVER$pair$data[lindex $pair 1]}] + error_check_good dbput:over:($pair) $ret 0 + } else { + # This is a cursor overwrite + puts "\t\tTest$tstn.b.$i:\ + Overwrite:($pair) by cursor." + set ret [$dbc put \ + -current OVER$pair$data[lindex $pair 1]] + error_check_good dbcput:over:($pair) $ret 0 + } + } ;# foreach pair + } ;# foreach type key/cursor + + puts "\tTest$tstn.c: Cleanup and close cursor." + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + +} diff --git a/test/tcl/test051.tcl b/test/tcl/test051.tcl new file mode 100644 index 00000000..f75801a2 --- /dev/null +++ b/test/tcl/test051.tcl @@ -0,0 +1,225 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test051 +# TEST Fixed-length record Recno test. +# TEST 0. Test various flags (legal and illegal) to open +# TEST 1. Test partial puts where dlen != size (should fail) +# TEST 2. Partial puts for existent record -- replaces at beg, mid, and +# TEST end of record, as well as full replace +proc test051 { method { args "" } } { + global fixed_len + global errorInfo + global errorCode + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test051 ($method): Test of the fixed length records." + if { [is_fixed_length $method] != 1 } { + puts "Test051: skipping for method $method" + return + } + if { [is_partitioned $args] } { + puts "Test051 skipping for partitioned $omethod" + return + } + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test051.db + set testfile1 $testdir/test051a.db + set env NULL + } else { + set testfile test051.db + set testfile1 test051a.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + set oflags "-create -mode 0644 $args" + + # Test various flags (legal and illegal) to open + puts "\tTest051.a: Test correct flag behavior on open." + set errorCode NONE + foreach f { "-dup" "-dup -dupsort" "-recnum" } { + puts "\t\tTest051.a: Test flag $f" + set stat [catch {eval {berkdb_open_noerr} $oflags $f $omethod \ + $testfile} ret] + error_check_good dbopen:flagtest:catch $stat 1 + error_check_good \ + dbopen:flagtest:$f [is_substr $errorCode EINVAL] 1 + set errorCode NONE + } + set f "-renumber" + puts "\t\tTest051.a: Test $f" + if { [is_frecno $method] == 1 } { + set db [eval {berkdb_open} $oflags $f $omethod $testfile] + error_check_good dbopen:flagtest:$f [is_valid_db $db] TRUE + $db close + } else { + error_check_good \ + dbopen:flagtest:catch [catch {eval {berkdb_open_noerr}\ + $oflags $f $omethod $testfile} ret] 1 + error_check_good \ + dbopen:flagtest:$f [is_substr $errorCode EINVAL] 1 + } + + # Test partial puts where dlen != size (should fail) + # it is an error to specify a partial put w/ different + # dlen and size in fixed length recno/queue + set key 1 + set data "" + set txn "" + set test_char "a" + + set db [eval {berkdb_open_noerr} $oflags $omethod $testfile1] + error_check_good dbopen [is_valid_db $db] TRUE + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + puts "\tTest051.b: Partial puts with dlen != size." + foreach dlen { 1 16 20 32 } { + foreach doff { 0 10 20 32 } { + # dlen < size + puts "\t\tTest051.e: dlen: $dlen, doff: $doff, \ + size: [expr $dlen+1]" + set data [repeat $test_char [expr $dlen + 1]] + error_check_good \ + catch:put 1 [catch {eval {$db put -partial \ + [list $doff $dlen]} $txn {$key $data}} ret] + + # We don't get back the server error string just + # the result. + if { $eindex == -1 } { + error_check_good "dbput:partial: dlen < size" \ + [is_substr \ + $errorInfo "ecord length"] 1 + } else { + error_check_good "dbput:partial: dlen < size" \ + [is_substr $errorCode "EINVAL"] 1 + } + + # dlen > size + puts "\t\tTest051.e: dlen: $dlen, doff: $doff, \ + size: [expr $dlen-1]" + set data [repeat $test_char [expr $dlen - 1]] + error_check_good \ + catch:put 1 [catch {eval {$db put -partial \ + [list $doff $dlen]} $txn {$key $data}} ret] + if { $eindex == -1 } { + error_check_good "dbput:partial: dlen > size" \ + [is_substr \ + $errorInfo "ecord length"] 1 + } else { + error_check_good "dbput:partial: dlen < size" \ + [is_substr $errorCode "EINVAL"] 1 + } + } + } + + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + $db close + + # Partial puts for existent record -- replaces at beg, mid, and + # end of record, as well as full replace + puts "\tTest051.f: Partial puts within existent record." + set db [eval {berkdb_open} $oflags $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + puts "\t\tTest051.f: First try a put and then a full replace." + set data [repeat "a" $fixed_len] + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {1 $data}] + error_check_good dbput $ret 0 + set ret [eval {$db get} $txn {-recno 1}] + error_check_good dbget $data [lindex [lindex $ret 0] 1] + + set data [repeat "b" $fixed_len] + set ret [eval {$db put -partial [list 0 $fixed_len]} $txn {1 $data}] + error_check_good dbput $ret 0 + set ret [eval {$db get} $txn {-recno 1}] + error_check_good dbget $data [lindex [lindex $ret 0] 1] + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + set data "InitialData" + set pdata "PUT" + set dlen [string length $pdata] + set ilen [string length $data] + set mid [expr $ilen/2] + + # put initial data + set key 0 + + set offlist [list 0 $mid [expr $ilen -1] [expr $fixed_len - $dlen]] + puts "\t\tTest051.g: Now replace at different offsets ($offlist)." + foreach doff $offlist { + incr key + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key $data}] + error_check_good dbput:init $ret 0 + + puts "\t\tTest051.g: Replace at offset $doff." + set ret [eval {$db put -partial [list $doff $dlen]} $txn \ + {$key $pdata}] + error_check_good dbput:partial $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + if { $doff == 0} { + set beg "" + set end [string range $data $dlen $ilen] + } else { + set beg [string range $data 0 [expr $doff - 1]] + set end [string range $data [expr $doff + $dlen] $ilen] + } + if { $doff > $ilen } { + # have to put padding between record and inserted + # string + set newdata [format %s%s $beg $end] + set diff [expr $doff - $ilen] + set nlen [string length $newdata] + set newdata [binary \ + format a[set nlen]x[set diff]a$dlen $newdata $pdata] + } else { + set newdata [make_fixed_length \ + frecno [format %s%s%s $beg $pdata $end]] + } + set ret [$db get -recno $key] + error_check_good compare($newdata,$ret) \ + [binary_compare [lindex [lindex $ret 0] 1] $newdata] 0 + } + + $db close +} diff --git a/test/tcl/test052.tcl b/test/tcl/test052.tcl new file mode 100644 index 00000000..41c43b77 --- /dev/null +++ b/test/tcl/test052.tcl @@ -0,0 +1,268 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test052 +# TEST Renumbering record Recno test. +proc test052 { method args } { + global alphabet + global errorInfo + global errorCode + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test052: Test of renumbering recno." + if { [is_rrecno $method] != 1} { + puts "Test052: skipping for method $method." + return + } + + set data "data" + set txn "" + set flags "" + + puts "\tTest052: Create $method database." + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test052.db + set env NULL + } else { + set testfile test052.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + cleanup $testdir $env + + set oflags "-create -mode 0644 $args $omethod" + set db [eval {berkdb_open} $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # keep nkeys even + set nkeys 20 + + # Fill page w/ small key/data pairs + puts "\tTest052: Fill page with $nkeys small key/data pairs." + for { set i 1 } { $i <= $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$i $data$i}] + error_check_good dbput $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # open curs to db + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + # get db order of keys + for {set i 1; set ret [$dbc get -first]} { [llength $ret] != 0} { \ + set ret [$dbc get -next]} { + set keys($i) [lindex [lindex $ret 0] 0] + set darray($i) [lindex [lindex $ret 0] 1] + incr i + } + + puts "\tTest052: Deletes by key." + puts "\tTest052.a: Get data with SET, then delete before cursor." + # get key in middle of page, call this the nth set curr to it + set i [expr $nkeys/2] + set k $keys($i) + set ret [$dbc get -set $k] + error_check_bad dbc_get:set [llength $ret] 0 + error_check_good dbc_get:set [lindex [lindex $ret 0] 1] $darray($i) + + # delete by key before current + set i [incr i -1] + error_check_good db_del:before [eval {$db del} $txn {$keys($i)}] 0 + # with renumber, current's data should be constant, but key==--key + set i [incr i +1] + error_check_good dbc:data \ + [lindex [lindex [$dbc get -current] 0] 1] $darray($i) + error_check_good dbc:keys \ + [lindex [lindex [$dbc get -current] 0] 0] $keys([expr $nkeys/2 - 1]) + + puts "\tTest052.b: Delete cursor item by key." + set i [expr $nkeys/2 ] + + set ret [$dbc get -set $keys($i)] + error_check_bad dbc:get [llength $ret] 0 + error_check_good dbc:get:curs [lindex [lindex $ret 0] 1] \ + $darray([expr $i + 1]) + error_check_good db_del:curr [eval {$db del} $txn {$keys($i)}] 0 + set ret [$dbc get -current] + + # After a delete, cursor should return DB_NOTFOUND. + error_check_good dbc:get:key [llength [lindex [lindex $ret 0] 0]] 0 + error_check_good dbc:get:data [llength [lindex [lindex $ret 0] 1]] 0 + + # And the item after the cursor should now be + # key: $nkeys/2, data: $nkeys/2 + 2 + set ret [$dbc get -next] + error_check_bad dbc:getnext [llength $ret] 0 + error_check_good dbc:getnext:data \ + [lindex [lindex $ret 0] 1] $darray([expr $i + 2]) + error_check_good dbc:getnext:keys \ + [lindex [lindex $ret 0] 0] $keys($i) + + puts "\tTest052.c: Delete item after cursor." + # should be { keys($nkeys/2), darray($nkeys/2 + 2) } + set i [expr $nkeys/2] + # deleting data for key after current (key $nkeys/2 + 1) + error_check_good db_del [eval {$db del} $txn {$keys([expr $i + 1])}] 0 + + # current should be constant + set ret [$dbc get -current] + error_check_bad dbc:get:current [llength $ret] 0 + error_check_good dbc:get:keys [lindex [lindex $ret 0] 0] \ + $keys($i) + error_check_good dbc:get:data [lindex [lindex $ret 0] 1] \ + $darray([expr $i + 2]) + + puts "\tTest052: Deletes by cursor." + puts "\tTest052.d: Delete, do DB_NEXT." + set i 1 + set ret [$dbc get -first] + error_check_bad dbc_get:first [llength $ret] 0 + error_check_good dbc_get:first [lindex [lindex $ret 0] 1] $darray($i) + error_check_good dbc_del [$dbc del] 0 + set ret [$dbc get -current] + error_check_good dbc_get:current [llength $ret] 0 + + set ret [$dbc get -next] + error_check_bad dbc_get:next [llength $ret] 0 + error_check_good dbc:get:curs \ + [lindex [lindex $ret 0] 1] $darray([expr $i + 1]) + error_check_good dbc:get:keys \ + [lindex [lindex $ret 0] 0] $keys($i) + + # Move one more forward, so we're not on the first item. + error_check_bad dbc:getnext [llength [$dbc get -next]] 0 + + puts "\tTest052.e: Delete, do DB_PREV." + error_check_good dbc:del [$dbc del] 0 + set ret [$dbc get -current] + error_check_good dbc:get:curr [llength $ret] 0 + + # next should now reference the record that was previously after + # old current + set ret [$dbc get -next] + error_check_bad get:next [llength $ret] 0 + error_check_good dbc:get:next:data \ + [lindex [lindex $ret 0] 1] $darray([expr $i + 3]) + error_check_good dbc:get:next:keys \ + [lindex [lindex $ret 0] 0] $keys([expr $i + 1]) + + + set ret [$dbc get -prev] + error_check_bad dbc:get:curr [llength $ret] 0 + error_check_good dbc:get:curr:compare \ + [lindex [lindex $ret 0] 1] $darray([expr $i + 1]) + error_check_good dbc:get:curr:keys \ + [lindex [lindex $ret 0] 0] $keys($i) + + # The rest of the test was written with the old rrecno semantics, + # which required a separate c_del(CURRENT) test; to leave + # the database in the expected state, we now delete the first item. + set ret [$dbc get -first] + error_check_bad getfirst [llength $ret] 0 + error_check_good delfirst [$dbc del] 0 + + puts "\tTest052: Inserts." + puts "\tTest052.g: Insert before (DB_BEFORE)." + set i 1 + set ret [$dbc get -first] + error_check_bad dbc:get:first [llength $ret] 0 + error_check_good dbc_get:first \ + [lindex [lindex $ret 0] 0] $keys($i) + error_check_good dbc_get:first:data \ + [lindex [lindex $ret 0] 1] $darray([expr $i + 3]) + + set ret [$dbc put -before $darray($i)] + # should return new key, which should be $keys($i) + error_check_good dbc_put:before $ret $keys($i) + # cursor should adjust to point to new item + set ret [$dbc get -current] + error_check_bad dbc_get:curr [llength $ret] 0 + error_check_good dbc_put:before:keys \ + [lindex [lindex $ret 0] 0] $keys($i) + error_check_good dbc_put:before:data \ + [lindex [lindex $ret 0] 1] $darray($i) + + set ret [$dbc get -next] + error_check_bad dbc_get:next [llength $ret] 0 + error_check_good dbc_get:next:compare \ + $ret [list [list $keys([expr $i + 1]) $darray([expr $i + 3])]] + set ret [$dbc get -prev] + error_check_bad dbc_get:prev [llength $ret] 0 + + puts "\tTest052.h: Insert by cursor after (DB_AFTER)." + set i [incr i] + set ret [$dbc put -after $darray($i)] + # should return new key, which should be $keys($i) + error_check_good dbcput:after $ret $keys($i) + # cursor should reference new item + set ret [$dbc get -current] + error_check_good dbc:get:current:keys \ + [lindex [lindex $ret 0] 0] $keys($i) + error_check_good dbc:get:current:data \ + [lindex [lindex $ret 0] 1] $darray($i) + + # items after curs should be adjusted + set ret [$dbc get -next] + error_check_bad dbc:get:next [llength $ret] 0 + error_check_good dbc:get:next:compare \ + $ret [list [list $keys([expr $i + 1]) $darray([expr $i + 2])]] + + puts "\tTest052.i: Insert (overwrite) current item (DB_CURRENT)." + set i 1 + set ret [$dbc get -first] + error_check_bad dbc_get:first [llength $ret] 0 + # choose a datum that is not currently in db + set ret [$dbc put -current $darray([expr $i + 2])] + error_check_good dbc_put:curr $ret 0 + # curs should be on new item + set ret [$dbc get -current] + error_check_bad dbc_get:current [llength $ret] 0 + error_check_good dbc_get:curr:keys \ + [lindex [lindex $ret 0] 0] $keys($i) + error_check_good dbc_get:curr:data \ + [lindex [lindex $ret 0] 1] $darray([expr $i + 2]) + + set ret [$dbc get -next] + error_check_bad dbc_get:next [llength $ret] 0 + set i [incr i] + error_check_good dbc_get:next \ + $ret [list [list $keys($i) $darray($i)]] + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + puts "\tTest052 complete." +} diff --git a/test/tcl/test053.tcl b/test/tcl/test053.tcl new file mode 100644 index 00000000..6b8eb392 --- /dev/null +++ b/test/tcl/test053.tcl @@ -0,0 +1,241 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test053 +# TEST Test of the DB_REVSPLITOFF flag in the Btree and Btree-w-recnum +# TEST methods. +proc test053 { method args } { + global alphabet + global errorCode + global is_je_test + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "\tTest053: Test of cursor stability across btree splits." + if { [is_btree $method] != 1 && [is_rbtree $method] != 1 } { + puts "Test053: skipping for method $method." + return + } + + if { [is_partition_callback $args] == 1 } { + puts "Test053: skipping for method $method with partition callback." + return + } + + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test053: skipping for specific pagesizes" + return + } + + set txn "" + set flags "" + + puts "\tTest053.a: Create $omethod $args database." + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test053.db + set env NULL + } else { + set testfile test053.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + cleanup $testdir $env + + set oflags \ + "-create -revsplitoff -pagesize 1024 $args $omethod" + set db [eval {berkdb_open} $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set nkeys 8 + set npages 15 + + # We want to create a db with npages leaf pages, and have each page + # be near full with keys that we can predict. We set pagesize above + # to 1024 bytes, it should breakdown as follows (per page): + # + # ~20 bytes overhead + # key: ~4 bytes overhead, XXX0N where X is a letter, N is 0-9 + # data: ~4 bytes overhead, + 100 bytes + # + # then, with 8 keys/page we should be just under 1024 bytes + puts "\tTest053.b: Create $npages pages with $nkeys pairs on each." + set keystring [string range $alphabet 0 [expr $npages -1]] + set data [repeat DATA 22] + for { set i 0 } { $i < $npages } {incr i } { + set key "" + set keyroot \ + [repeat [string toupper [string range $keystring $i $i]] 3] + set key_set($i) $keyroot + for {set j 0} { $j < $nkeys} {incr j} { + if { $j < 10 } { + set key [set keyroot]0$j + } else { + set key $keyroot$j + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key $data}] + error_check_good dbput $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + } + +# We really should not skip this test for partitioned dbs we need to +# calculate how many pages there should be which is tricky if we +# don't know where the keys are going to fall. If they are all +# in one partition then we can subtract the extra leaf pages +# in the extra partitions. The test further on should at least +# check that the number of pages is the same as what is found here. + if { !$is_je_test && ![is_substr $args "-partition"] } { + puts "\tTest053.c: Check page count." + error_check_good page_count:check \ + [is_substr [$db stat] "{Leaf pages} $npages"] 1 + } + + puts "\tTest053.d: Delete all but one key per page." + for {set i 0} { $i < $npages } {incr i } { + for {set j 1} { $j < $nkeys } {incr j } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db del} $txn {$key_set($i)0$j}] + error_check_good dbdel $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + } + + if { !$is_je_test && ![is_substr $args "-partition"] } { + puts "\tTest053.e: Check to make sure all pages are still there." + error_check_good page_count:check \ + [is_substr [$db stat] "{Leaf pages} $npages"] 1 + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db:cursor [is_valid_cursor $dbc $db] TRUE + + # walk cursor through tree forward, backward. + # delete one key, repeat + for {set i 0} { $i < $npages} {incr i} { + puts -nonewline \ + "\tTest053.f.$i: Walk curs through tree: forward..." + for { set j $i; set curr [$dbc get -first]} { $j < $npages} { \ + incr j; set curr [$dbc get -next]} { + error_check_bad dbc:get:next [llength $curr] 0 + error_check_good dbc:get:keys \ + [lindex [lindex $curr 0] 0] $key_set($j)00 + } + puts -nonewline "backward..." + for { set j [expr $npages - 1]; set curr [$dbc get -last]} { \ + $j >= $i } { \ + set j [incr j -1]; set curr [$dbc get -prev]} { + error_check_bad dbc:get:prev [llength $curr] 0 + error_check_good dbc:get:keys \ + [lindex [lindex $curr 0] 0] $key_set($j)00 + } + puts "complete." + + if { [is_rbtree $method] == 1} { + puts "\t\tTest053.f.$i:\ + Walk through tree with record numbers." + for {set j 1} {$j <= [expr $npages - $i]} {incr j} { + set curr [eval {$db get} $txn {-recno $j}] + error_check_bad \ + db_get:recno:$j [llength $curr] 0 + error_check_good db_get:recno:keys:$j \ + [lindex [lindex $curr 0] 0] \ + $key_set([expr $j + $i - 1])00 + } + } + puts "\tTest053.g.$i:\ + Delete single key ([expr $npages - $i] keys left)." + set ret [eval {$db del} $txn {$key_set($i)00}] + error_check_good dbdel $ret 0 + error_check_good del:check \ + [llength [eval {$db get} $txn {$key_set($i)00}]] 0 + } + + # end for loop, verify db_notfound + set ret [$dbc get -first] + error_check_good dbc:get:verify [llength $ret] 0 + + # loop: until single key restored on each page + for {set i 0} { $i < $npages} {incr i} { + puts "\tTest053.i.$i:\ + Restore single key ([expr $i + 1] keys in tree)." + set ret [eval {$db put} $txn {$key_set($i)00 $data}] + error_check_good dbput $ret 0 + + puts -nonewline \ + "\tTest053.j: Walk cursor through tree: forward..." + for { set j 0; set curr [$dbc get -first]} { $j <= $i} {\ + incr j; set curr [$dbc get -next]} { + error_check_bad dbc:get:next [llength $curr] 0 + error_check_good dbc:get:keys \ + [lindex [lindex $curr 0] 0] $key_set($j)00 + } + error_check_good dbc:get:next [llength $curr] 0 + + puts -nonewline "backward..." + for { set j $i; set curr [$dbc get -last]} { \ + $j >= 0 } { \ + set j [incr j -1]; set curr [$dbc get -prev]} { + error_check_bad dbc:get:prev [llength $curr] 0 + error_check_good dbc:get:keys \ + [lindex [lindex $curr 0] 0] $key_set($j)00 + } + puts "complete." + error_check_good dbc:get:prev [llength $curr] 0 + + if { [is_rbtree $method] == 1} { + puts "\t\tTest053.k.$i:\ + Walk through tree with record numbers." + for {set j 1} {$j <= [expr $i + 1]} {incr j} { + set curr [eval {$db get} $txn {-recno $j}] + error_check_bad \ + db_get:recno:$j [llength $curr] 0 + error_check_good db_get:recno:keys:$j \ + [lindex [lindex $curr 0] 0] \ + $key_set([expr $j - 1])00 + } + } + } + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + puts "Test053 complete." +} diff --git a/test/tcl/test054.tcl b/test/tcl/test054.tcl new file mode 100644 index 00000000..e68a6323 --- /dev/null +++ b/test/tcl/test054.tcl @@ -0,0 +1,459 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test054 +# TEST Cursor maintenance during key/data deletion. +# TEST +# TEST This test checks for cursor maintenance in the presence of deletes. +# TEST There are N different scenarios to tests: +# TEST 1. No duplicates. Cursor A deletes a key, do a GET for the key. +# TEST 2. No duplicates. Cursor is positioned right before key K, Delete K, +# TEST do a next on the cursor. +# TEST 3. No duplicates. Cursor is positioned on key K, do a regular delete +# TEST of K, do a current get on K. +# TEST 4. Repeat 3 but do a next instead of current. +# TEST 5. Duplicates. Cursor A is on the first item of a duplicate set, A +# TEST does a delete. Then we do a non-cursor get. +# TEST 6. Duplicates. Cursor A is in a duplicate set and deletes the item. +# TEST do a delete of the entire Key. Test cursor current. +# TEST 7. Continue last test and try cursor next. +# TEST 8. Duplicates. Cursor A is in a duplicate set and deletes the item. +# TEST Cursor B is in the same duplicate set and deletes a different item. +# TEST Verify that the cursor is in the right place. +# TEST 9. Cursors A and B are in the place in the same duplicate set. A +# TEST deletes its item. Do current on B. +# TEST 10. Continue 8 and do a next on B. +proc test054 { method args } { + global errorInfo + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + append args " -create -mode 0644" + puts "Test054 ($method $args):\ + interspersed cursor and normal operations" + if { [is_record_based $method] == 1 } { + puts "Test054 skipping for method $method" + return + } + + # Find the environment in the argument list, we'll need it + # later. + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + } + + # Create the database and open the dictionary + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test054-nodup.db + set env NULL + } else { + set testfile test054-nodup.db + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + set flags "" + set txn "" + + puts "\tTest054.a: No Duplicate Tests" + set db [eval {berkdb_open} $args {$omethod $testfile}] + error_check_good db_open:nodup [is_valid_db $db] TRUE + + # Put three keys in the database + for { set key 1 } { $key <= 3 } {incr key} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set r [eval {$db put} $txn $flags {$key datum$key}] + error_check_good put $r 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set curs [eval {$db cursor} $txn] + error_check_good curs_open:nodup [is_valid_cursor $curs $db] TRUE + + # Retrieve keys sequentially so we can figure out their order + set i 1 + for {set d [$curs get -first] } \ + {[llength $d] != 0 } \ + {set d [$curs get -next] } { + set key_set($i) [lindex [lindex $d 0] 0] + incr i + } + + # Test case #1. + puts "\tTest054.a1: Delete w/cursor, regular get" + + # Now set the cursor on the middle on. + set r [$curs get -set $key_set(2)] + error_check_bad cursor_get:DB_SET [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_SET:key $k $key_set(2) + error_check_good curs_get:DB_SET:data $d datum$key_set(2) + + # Now do the delete + set r [$curs del] + error_check_good curs_del $r 0 + + # Now do the get + set r [eval {$db get} $txn {$key_set(2)}] + error_check_good get_after_del [llength $r] 0 + + # Free up the cursor. + error_check_good cursor_close [eval {$curs close}] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Test case #2. + puts "\tTest054.a2: Cursor before K, delete K, cursor next" + + # Replace key 2 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set r [eval {$db put} $txn {$key_set(2) datum$key_set(2)}] + error_check_good put $r 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Open and position cursor on first item. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set curs [eval {$db cursor} $txn] + error_check_good curs_open:nodup [is_valid_cursor $curs $db] TRUE + + # Retrieve keys sequentially so we can figure out their order + set i 1 + for {set d [eval {$curs get} -first] } \ + {[llength $d] != 0 } \ + {set d [$curs get -nextdup] } { + set key_set($i) [lindex [lindex $d 0] 0] + incr i + } + + set r [eval {$curs get} -set {$key_set(1)} ] + error_check_bad cursor_get:DB_SET [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_SET:key $k $key_set(1) + error_check_good curs_get:DB_SET:data $d datum$key_set(1) + + # Now delete (next item) $key_set(2) + error_check_good \ + db_del:$key_set(2) [eval {$db del} $txn {$key_set(2)}] 0 + + # Now do next on cursor + set r [$curs get -next] + error_check_bad cursor_get:DB_NEXT [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_NEXT:key $k $key_set(3) + error_check_good curs_get:DB_NEXT:data $d datum$key_set(3) + + # Test case #3. + puts "\tTest054.a3: Cursor on K, delete K, cursor current" + + # delete item 3 + error_check_good \ + db_del:$key_set(3) [eval {$db del} $txn {$key_set(3)}] 0 + # NEEDS TO COME BACK IN, BUG CHECK + set ret [$curs get -current] + error_check_good current_after_del $ret "" + error_check_good cursor_close [$curs close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + puts "\tTest054.a4: Cursor on K, delete K, cursor next" + + # Restore keys 2 and 3 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set r [eval {$db put} $txn {$key_set(2) datum$key_set(2)}] + error_check_good put $r 0 + set r [eval {$db put} $txn {$key_set(3) datum$key_set(3)}] + error_check_good put $r 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + # Create the new cursor and put it on 1 + set curs [eval {$db cursor} $txn] + error_check_good curs_open:nodup [is_valid_cursor $curs $db] TRUE + set r [$curs get -set $key_set(1)] + error_check_bad cursor_get:DB_SET [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_SET:key $k $key_set(1) + error_check_good curs_get:DB_SET:data $d datum$key_set(1) + + # Delete 2 + error_check_good \ + db_del:$key_set(2) [eval {$db del} $txn {$key_set(2)}] 0 + + # Now do next on cursor + set r [$curs get -next] + error_check_bad cursor_get:DB_NEXT [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_NEXT:key $k $key_set(3) + error_check_good curs_get:DB_NEXT:data $d datum$key_set(3) + + # Close cursor + error_check_good curs_close [$curs close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Now get ready for duplicate tests + + if { [is_rbtree $method] == 1 || [is_compressed $args] == 1 } { + puts "Test054: skipping remainder of test for method $method." + return + } + + puts "\tTest054.b: Duplicate Tests" + append args " -dup" + + # Open a new database for the dup tests so -truncate is not needed. + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test054-dup.db + set env NULL + } else { + set testfile test054-dup.db + set env [lindex $args $eindex] + set testdir [get_home $env] + } + cleanup $testdir $env + + set flags "" + set txn "" + + set db [eval {berkdb_open} $args {$omethod $testfile}] + error_check_good db_open:dup [is_valid_db $db] TRUE + + # Put three keys in the database + for { set key 1 } { $key <= 3 } {incr key} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set r [eval {$db put} $txn $flags {$key datum$key}] + error_check_good put $r 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # Retrieve keys sequentially so we can figure out their order + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set curs [eval {$db cursor} $txn] + error_check_good curs_open:dup [is_valid_cursor $curs $db] TRUE + + set i 1 + for {set d [$curs get -first] } \ + {[llength $d] != 0 } \ + {set d [$curs get -nextdup] } { + set key_set($i) [lindex [lindex $d 0] 0] + incr i + } + + # Now put in a bunch of duplicates for key 2 + for { set d 1 } { $d <= 5 } {incr d} { + set r [eval {$db put} $txn $flags {$key_set(2) dup_$d}] + error_check_good dup:put $r 0 + } + + # Test case #5. + puts "\tTest054.b1: Delete dup w/cursor on first item. Get on key." + + # Now set the cursor on the first of the duplicate set. + set r [eval {$curs get} -set {$key_set(2)}] + error_check_bad cursor_get:DB_SET [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_SET:key $k $key_set(2) + error_check_good curs_get:DB_SET:data $d datum$key_set(2) + + # Now do the delete + set r [$curs del] + error_check_good curs_del $r 0 + + # Now do the get + set r [eval {$db get} $txn {$key_set(2)}] + error_check_good get_after_del [lindex [lindex $r 0] 1] dup_1 + + # Test case #6. + puts "\tTest054.b2: Now get the next duplicate from the cursor." + + # Now do next on cursor + set r [$curs get -nextdup] + error_check_bad cursor_get:DB_NEXT [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_NEXT:key $k $key_set(2) + error_check_good curs_get:DB_NEXT:data $d dup_1 + + # Test case #3. + puts "\tTest054.b3: Two cursors in set; each delete different items" + + # Open a new cursor. + set curs2 [eval {$db cursor} $txn] + error_check_good curs_open [is_valid_cursor $curs2 $db] TRUE + + # Set on last of duplicate set. + set r [$curs2 get -set $key_set(3)] + error_check_bad cursor_get:DB_SET [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_SET:key $k $key_set(3) + error_check_good curs_get:DB_SET:data $d datum$key_set(3) + + set r [$curs2 get -prev] + error_check_bad cursor_get:DB_PREV [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_PREV:key $k $key_set(2) + error_check_good curs_get:DB_PREV:data $d dup_5 + + # Delete the item at cursor 1 (dup_1) + error_check_good curs1_del [$curs del] 0 + + # Verify curs1 and curs2 + # current should fail + set ret [$curs get -current] + error_check_good curs1_get_after_del $ret "" + + set r [$curs2 get -current] + error_check_bad curs2_get [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_CURRENT:key $k $key_set(2) + error_check_good curs_get:DB_CURRENT:data $d dup_5 + + # Now delete the item at cursor 2 (dup_5) + error_check_good curs2_del [$curs2 del] 0 + + # Verify curs1 and curs2 + set ret [$curs get -current] + error_check_good curs1_get:del2 $ret "" + + set ret [$curs2 get -current] + error_check_good curs2_get:del2 $ret "" + + # Now verify that next and prev work. + + set r [$curs2 get -prev] + error_check_bad cursor_get:DB_PREV [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_PREV:key $k $key_set(2) + error_check_good curs_get:DB_PREV:data $d dup_4 + + set r [$curs get -next] + error_check_bad cursor_get:DB_NEXT [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_NEXT:key $k $key_set(2) + error_check_good curs_get:DB_NEXT:data $d dup_2 + + puts "\tTest054.b4: Two cursors same item, one delete, one get" + + # Move curs2 onto dup_2 + set r [$curs2 get -prev] + error_check_bad cursor_get:DB_PREV [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_PREV:key $k $key_set(2) + error_check_good curs_get:DB_PREV:data $d dup_3 + + set r [$curs2 get -prev] + error_check_bad cursor_get:DB_PREV [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_PREV:key $k $key_set(2) + error_check_good curs_get:DB_PREV:data $d dup_2 + + # delete on curs 1 + error_check_good curs1_del [$curs del] 0 + + # Verify gets on both 1 and 2 + set ret [$curs get -current] + error_check_good \ + curs1_get:deleted $ret "" + set ret [$curs2 get -current] + error_check_good \ + curs2_get:deleted $ret "" + + puts "\tTest054.b5: Now do a next on both cursors" + + set r [$curs get -next] + error_check_bad cursor_get:DB_NEXT [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_NEXT:key $k $key_set(2) + error_check_good curs_get:DB_NEXT:data $d dup_3 + + set r [$curs2 get -next] + error_check_bad cursor_get:DB_NEXT [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_NEXT:key $k $key_set(2) + error_check_good curs_get:DB_NEXT:data $d dup_3 + + # Close cursor + error_check_good curs_close [$curs close] 0 + error_check_good curs2_close [$curs2 close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test055.tcl b/test/tcl/test055.tcl new file mode 100644 index 00000000..c5a2238e --- /dev/null +++ b/test/tcl/test055.tcl @@ -0,0 +1,140 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test055 +# TEST Basic cursor operations. +# TEST This test checks basic cursor operations. +# TEST There are N different scenarios to tests: +# TEST 1. (no dups) Set cursor, retrieve current. +# TEST 2. (no dups) Set cursor, retrieve next. +# TEST 3. (no dups) Set cursor, retrieve prev. +proc test055 { method args } { + global errorInfo + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test055: $method interspersed cursor and normal operations" + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test055.db + set env NULL + } else { + set testfile test055.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + set flags "" + set txn "" + + puts "\tTest055.a: No duplicates" + set db [eval {berkdb_open -create -mode 0644 $omethod } \ + $args {$testfile}] + error_check_good db_open:nodup [is_valid_db $db] TRUE + + # Put three keys in the database + for { set key 1 } { $key <= 3 } {incr key} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set r [eval {$db put} $txn $flags {$key datum$key}] + error_check_good put $r 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # Retrieve keys sequentially so we can figure out their order + set i 1 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set curs [eval {$db cursor} $txn] + error_check_good curs_open:nodup [is_valid_cursor $curs $db] TRUE + + for {set d [$curs get -first] } { [llength $d] != 0 } {\ + set d [$curs get -next] } { + set key_set($i) [lindex [lindex $d 0] 0] + incr i + } + + # Test case #1. + puts "\tTest055.a1: Set cursor, retrieve current" + + # Now set the cursor on the middle on. + set r [$curs get -set $key_set(2)] + error_check_bad cursor_get:DB_SET [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_SET:key $k $key_set(2) + error_check_good \ + curs_get:DB_SET:data $d [pad_data $method datum$key_set(2)] + + # Now retrieve current + set r [$curs get -current] + error_check_bad cursor_get:DB_CURRENT [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_CURRENT:key $k $key_set(2) + error_check_good \ + curs_get:DB_CURRENT:data $d [pad_data $method datum$key_set(2)] + + # Test case #2. + puts "\tTest055.a2: Set cursor, retrieve previous" + set r [$curs get -prev] + error_check_bad cursor_get:DB_PREV [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_PREV:key $k $key_set(1) + error_check_good \ + curs_get:DB_PREV:data $d [pad_data $method datum$key_set(1)] + + # Test case #3. + puts "\tTest055.a2: Set cursor, retrieve next" + + # Now set the cursor on the middle one. + set r [$curs get -set $key_set(2)] + error_check_bad cursor_get:DB_SET [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_SET:key $k $key_set(2) + error_check_good \ + curs_get:DB_SET:data $d [pad_data $method datum$key_set(2)] + + # Now retrieve next + set r [$curs get -next] + error_check_bad cursor_get:DB_NEXT [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_NEXT:key $k $key_set(3) + error_check_good \ + curs_get:DB_NEXT:data $d [pad_data $method datum$key_set(3)] + + # Close cursor and database. + error_check_good curs_close [$curs close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test056.tcl b/test/tcl/test056.tcl new file mode 100644 index 00000000..d428f4ce --- /dev/null +++ b/test/tcl/test056.tcl @@ -0,0 +1,174 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test056 +# TEST Cursor maintenance during deletes. +# TEST Check if deleting a key when a cursor is on a duplicate of that +# TEST key works. +proc test056 { method args } { + global errorInfo + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + append args " -create -mode 0644 -dup " + if { [is_record_based $method] == 1 || [is_rbtree $method] } { + puts "Test056: skipping for method $method" + return + } + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test056 skipping for btree with compression." + return + } + + puts "Test056: $method delete of key in presence of cursor" + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test056.db + set env NULL + } else { + set testfile test056.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + set flags "" + set txn "" + + set db [eval {berkdb_open} $args {$omethod $testfile}] + error_check_good db_open:dup [is_valid_db $db] TRUE + + puts "\tTest056.a: Key delete with cursor on duplicate." + # Put three keys in the database + for { set key 1 } { $key <= 3 } {incr key} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set r [eval {$db put} $txn $flags {$key datum$key}] + error_check_good put $r 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # Retrieve keys sequentially so we can figure out their order + set i 1 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set curs [eval {$db cursor} $txn] + error_check_good curs_open:dup [is_valid_cursor $curs $db] TRUE + + for {set d [$curs get -first] } { [llength $d] != 0 } { + set d [$curs get -next] } { + set key_set($i) [lindex [lindex $d 0] 0] + incr i + } + + # Now put in a bunch of duplicates for key 2 + for { set d 1 } { $d <= 5 } {incr d} { + set r [eval {$db put} $txn $flags {$key_set(2) dup_$d}] + error_check_good dup:put $r 0 + } + + # Now put the cursor on a duplicate of key 2 + + # Now set the cursor on the first of the duplicate set. + set r [$curs get -set $key_set(2)] + error_check_bad cursor_get:DB_SET [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_SET:key $k $key_set(2) + error_check_good curs_get:DB_SET:data $d datum$key_set(2) + + # Now do two nexts + set r [$curs get -next] + error_check_bad cursor_get:DB_NEXT [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_NEXT:key $k $key_set(2) + error_check_good curs_get:DB_NEXT:data $d dup_1 + + set r [$curs get -next] + error_check_bad cursor_get:DB_NEXT [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_NEXT:key $k $key_set(2) + error_check_good curs_get:DB_NEXT:data $d dup_2 + + # Now do the delete + set r [eval {$db del} $txn $flags {$key_set(2)}] + error_check_good delete $r 0 + + # Now check the get current on the cursor. + set ret [$curs get -current] + error_check_good curs_after_del $ret "" + + # Now check that the rest of the database looks intact. There + # should be only two keys, 1 and 3. + + set r [$curs get -first] + error_check_bad cursor_get:DB_FIRST [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_FIRST:key $k $key_set(1) + error_check_good curs_get:DB_FIRST:data $d datum$key_set(1) + + set r [$curs get -next] + error_check_bad cursor_get:DB_NEXT [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_NEXT:key $k $key_set(3) + error_check_good curs_get:DB_NEXT:data $d datum$key_set(3) + + set r [$curs get -next] + error_check_good cursor_get:DB_NEXT [llength $r] 0 + + puts "\tTest056.b:\ + Cursor delete of first item, followed by cursor FIRST" + # Set to beginning + set r [$curs get -first] + error_check_bad cursor_get:DB_FIRST [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_FIRST:key $k $key_set(1) + error_check_good curs_get:DB_FIRST:data $d datum$key_set(1) + + # Now do delete + error_check_good curs_del [$curs del] 0 + + # Now do DB_FIRST + set r [$curs get -first] + error_check_bad cursor_get:DB_FIRST [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_FIRST:key $k $key_set(3) + error_check_good curs_get:DB_FIRST:data $d datum$key_set(3) + + error_check_good curs_close [$curs close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test057.tcl b/test/tcl/test057.tcl new file mode 100644 index 00000000..021fbd25 --- /dev/null +++ b/test/tcl/test057.tcl @@ -0,0 +1,207 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test057 +# TEST Cursor maintenance during key deletes. +# TEST 1. Delete a key with a cursor. Add the key back with a regular +# TEST put. Make sure the cursor can't get the new item. +# TEST 2. Put two cursors on one item. Delete through one cursor, +# TEST check that the other sees the change. +# TEST 3. Same as 2, with the two cursors on a duplicate. + +proc test057 { method args } { + global errorInfo + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + append args " -create -mode 0644 -dup " + if { [is_record_based $method] == 1 || [is_rbtree $method] == 1 } { + puts "Test057: skipping for method $method" + return + } + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test057 skipping for btree with compression." + return + } + + puts "Test057: $method delete and replace in presence of cursor." + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test057.db + set env NULL + } else { + set testfile test057.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + set flags "" + set txn "" + + set db [eval {berkdb_open} $args {$omethod $testfile}] + error_check_good dbopen:dup [is_valid_db $db] TRUE + + puts "\tTest057.a: Set cursor, delete cursor, put with key." + # Put three keys in the database + for { set key 1 } { $key <= 3 } {incr key} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set r [eval {$db put} $txn $flags {$key datum$key}] + error_check_good put $r 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # Retrieve keys sequentially so we can figure out their order + set i 1 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set curs [eval {$db cursor} $txn] + error_check_good curs_open:dup [is_valid_cursor $curs $db] TRUE + + for {set d [$curs get -first] } {[llength $d] != 0 } \ + {set d [$curs get -next] } { + set key_set($i) [lindex [lindex $d 0] 0] + incr i + } + + # Now put in a bunch of duplicates for key 2 + for { set d 1 } { $d <= 5 } {incr d} { + set r [eval {$db put} $txn $flags {$key_set(2) dup_$d}] + error_check_good dup:put $r 0 + } + + # Now put the cursor on key 1 + + # Now set the cursor on the first of the duplicate set. + set r [$curs get -set $key_set(1)] + error_check_bad cursor_get:DB_SET [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_SET:key $k $key_set(1) + error_check_good curs_get:DB_SET:data $d datum$key_set(1) + + # Now do the delete + set r [$curs del] + error_check_good delete $r 0 + + # Now check the get current on the cursor. + error_check_good curs_get:del [$curs get -current] "" + + # Now do a put on the key + set r [eval {$db put} $txn $flags {$key_set(1) new_datum$key_set(1)}] + error_check_good put $r 0 + + # Do a get + set r [eval {$db get} $txn {$key_set(1)}] + error_check_good get [lindex [lindex $r 0] 1] new_datum$key_set(1) + + # Recheck cursor + error_check_good curs_get:deleted [$curs get -current] "" + + # Move cursor and see if we get the key. + set r [$curs get -first] + error_check_bad cursor_get:DB_FIRST [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_FIRST:key $k $key_set(1) + error_check_good curs_get:DB_FIRST:data $d new_datum$key_set(1) + + puts "\tTest057.b: Set two cursor on a key, delete one, overwrite other" + set curs2 [eval {$db cursor} $txn] + error_check_good curs2_open [is_valid_cursor $curs2 $db] TRUE + + # Set both cursors on the 4rd key + set r [$curs get -set $key_set(3)] + error_check_bad cursor_get:DB_SET [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_SET:key $k $key_set(3) + error_check_good curs_get:DB_SET:data $d datum$key_set(3) + + set r [$curs2 get -set $key_set(3)] + error_check_bad cursor2_get:DB_SET [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs2_get:DB_SET:key $k $key_set(3) + error_check_good curs2_get:DB_SET:data $d datum$key_set(3) + + # Now delete through cursor 1 + error_check_good curs1_del [$curs del] 0 + + # Verify gets on both 1 and 2 + error_check_good curs_get:deleted [$curs get -current] "" + error_check_good curs_get:deleted [$curs2 get -current] "" + + puts "\tTest057.c:\ + Set two cursors on a dup, delete one, overwrite other" + + # Set both cursors on the 2nd duplicate of key 2 + set r [$curs get -set $key_set(2)] + error_check_bad cursor_get:DB_SET [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_SET:key $k $key_set(2) + error_check_good curs_get:DB_SET:data $d datum$key_set(2) + + set r [$curs get -next] + error_check_bad cursor_get:DB_NEXT [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs_get:DB_NEXT:key $k $key_set(2) + error_check_good curs_get:DB_NEXT:data $d dup_1 + + set r [$curs2 get -set $key_set(2)] + error_check_bad cursor2_get:DB_SET [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs2_get:DB_SET:key $k $key_set(2) + error_check_good curs2_get:DB_SET:data $d datum$key_set(2) + + set r [$curs2 get -next] + error_check_bad cursor2_get:DB_NEXT [llength $r] 0 + set k [lindex [lindex $r 0] 0] + set d [lindex [lindex $r 0] 1] + error_check_good curs2_get:DB_NEXT:key $k $key_set(2) + error_check_good curs2_get:DB_NEXT:data $d dup_1 + + # Now delete through cursor 1 + error_check_good curs1_del [$curs del] 0 + + # Verify gets on both 1 and 2 + error_check_good curs_get:deleted [$curs get -current] "" + error_check_good curs_get:deleted [$curs2 get -current] "" + + error_check_good curs2_close [$curs2 close] 0 + error_check_good curs_close [$curs close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test058.tcl b/test/tcl/test058.tcl new file mode 100644 index 00000000..7c34dce0 --- /dev/null +++ b/test/tcl/test058.tcl @@ -0,0 +1,110 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test058 +# TEST Verify that deleting and reading duplicates results in correct ordering. +proc test058 { method args } { + source ./include.tcl + + # + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Test058 skipping for env $env" + return + } + set args [convert_args $method $args] + set encargs "" + set args [split_encargs $args encargs] + set omethod [convert_method $method] + set pageargs "" + split_pageargs $args pageargs + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test058 skipping for btree with compression." + return + } + + if { [is_record_based $method] == 1 || [is_rbtree $method] == 1 } { + puts "Test058: skipping for method $method" + return + } + puts "Test058: $method delete dups after inserting after duped key." + + # environment + env_cleanup $testdir + set eflags "-create -txn $encargs -home $testdir" + set env [eval {berkdb_env} $eflags $pageargs] + error_check_good env [is_valid_env $env] TRUE + + # db open + set flags "-auto_commit -create -mode 0644 -dup -env $env $args" + set db [eval {berkdb_open} $flags $omethod "test058.db"] + error_check_good dbopen [is_valid_db $db] TRUE + + set tn "" + set tid "" + set tn [$env txn] + set tflags "-txn $tn" + + puts "\tTest058.a: Adding 10 duplicates" + # Add a bunch of dups + for { set i 0 } { $i < 10 } {incr i} { + set ret \ + [eval {$db put} $tflags {doghouse $i"DUPLICATE_DATA_VALUE"}] + error_check_good db_put $ret 0 + } + + puts "\tTest058.b: Adding key after duplicates" + # Now add one more key/data AFTER the dup set. + set ret [eval {$db put} $tflags {zebrahouse NOT_A_DUP}] + error_check_good db_put $ret 0 + + error_check_good txn_commit [$tn commit] 0 + + set tn [$env txn] + error_check_good txnbegin [is_substr $tn $env] 1 + set tflags "-txn $tn" + + # Now delete everything + puts "\tTest058.c: Deleting duplicated key" + set ret [eval {$db del} $tflags {doghouse}] + error_check_good del $ret 0 + + # Now reput everything + set pad \ + abcdefghijklmnopqrtsuvabcdefghijklmnopqrtsuvabcdefghijklmnopqrtsuvabcdefghijklmnopqrtsuvabcdefghijklmnopqrtsuvabcdefghijklmnopqrtsuvabcdefghijklmnopqrtsuvabcdefghijklmnopqrtsuvabcdefghijklmnopqrtsuvabcdefghijklmnopqrtsuv + + puts "\tTest058.d: Reputting duplicates with big data vals" + for { set i 0 } { $i < 10 } {incr i} { + set ret [eval {$db put} \ + $tflags {doghouse $i"DUPLICATE_DATA_VALUE"$pad}] + error_check_good db_put $ret 0 + } + error_check_good txn_commit [$tn commit] 0 + + # Check duplicates for order + set dbc [$db cursor] + error_check_good db_cursor [is_substr $dbc $db] 1 + + puts "\tTest058.e: Verifying that duplicates are in order." + set i 0 + for { set ret [$dbc get -set doghouse] } \ + {$i < 10 && [llength $ret] != 0} \ + { set ret [$dbc get -nextdup] } { + set data [lindex [lindex $ret 0] 1] + error_check_good \ + duplicate_value $data $i"DUPLICATE_DATA_VALUE"$pad + incr i + } + + error_check_good dbc_close [$dbc close] 0 + error_check_good db_close [$db close] 0 + reset_env $env +} diff --git a/test/tcl/test059.tcl b/test/tcl/test059.tcl new file mode 100644 index 00000000..a804e174 --- /dev/null +++ b/test/tcl/test059.tcl @@ -0,0 +1,149 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test059 +# TEST Cursor ops work with a partial length of 0. +# TEST Make sure that we handle retrieves of zero-length data items correctly. +# TEST The following ops, should allow a partial data retrieve of 0-length. +# TEST db_get +# TEST db_cget FIRST, NEXT, LAST, PREV, CURRENT, SET, SET_RANGE +proc test059 { method args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test059: $method 0-length partial data retrieval" + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test059.db + set env NULL + } else { + set testfile test059.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + set pflags "" + set gflags "" + set txn "" + set count 0 + + if { [is_record_based $method] == 1 } { + append gflags " -recno" + } + + puts "\tTest059.a: Populate a database" + set oflags "-create -mode 0644 $omethod $args $testfile" + set db [eval {berkdb_open} $oflags] + error_check_good db_create [is_substr $db db] 1 + + # Put ten keys in the database + for { set key 1 } { $key <= 10 } {incr key} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set r [eval {$db put} $txn $pflags {$key datum$key}] + error_check_good put $r 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # Retrieve keys sequentially so we can figure out their order + set i 1 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set curs [eval {$db cursor} $txn] + error_check_good db_curs [is_valid_cursor $curs $db] TRUE + + for {set d [$curs get -first] } { [llength $d] != 0 } { + set d [$curs get -next] } { + set key_set($i) [lindex [lindex $d 0] 0] + incr i + } + + puts "\tTest059.a: db get with 0 partial length retrieve" + + # Now set the cursor on the middle one. + set ret [eval {$db get -partial {0 0}} $txn $gflags {$key_set(5)}] + error_check_bad db_get_0 [llength $ret] 0 + + puts "\tTest059.a: db cget FIRST with 0 partial length retrieve" + set ret [$curs get -first -partial {0 0}] + set data [lindex [lindex $ret 0] 1] + set key [lindex [lindex $ret 0] 0] + error_check_good key_check_first $key $key_set(1) + error_check_good db_cget_first [string length $data] 0 + + puts "\tTest059.b: db cget NEXT with 0 partial length retrieve" + set ret [$curs get -next -partial {0 0}] + set data [lindex [lindex $ret 0] 1] + set key [lindex [lindex $ret 0] 0] + error_check_good key_check_next $key $key_set(2) + error_check_good db_cget_next [string length $data] 0 + + puts "\tTest059.c: db cget LAST with 0 partial length retrieve" + set ret [$curs get -last -partial {0 0}] + set data [lindex [lindex $ret 0] 1] + set key [lindex [lindex $ret 0] 0] + error_check_good key_check_last $key $key_set(10) + error_check_good db_cget_last [string length $data] 0 + + puts "\tTest059.d: db cget PREV with 0 partial length retrieve" + set ret [$curs get -prev -partial {0 0}] + set data [lindex [lindex $ret 0] 1] + set key [lindex [lindex $ret 0] 0] + error_check_good key_check_prev $key $key_set(9) + error_check_good db_cget_prev [string length $data] 0 + + puts "\tTest059.e: db cget CURRENT with 0 partial length retrieve" + set ret [$curs get -current -partial {0 0}] + set data [lindex [lindex $ret 0] 1] + set key [lindex [lindex $ret 0] 0] + error_check_good key_check_current $key $key_set(9) + error_check_good db_cget_current [string length $data] 0 + + puts "\tTest059.f: db cget SET with 0 partial length retrieve" + set ret [$curs get -set -partial {0 0} $key_set(7)] + set data [lindex [lindex $ret 0] 1] + set key [lindex [lindex $ret 0] 0] + error_check_good key_check_set $key $key_set(7) + error_check_good db_cget_set [string length $data] 0 + + if {[is_btree $method] == 1} { + puts "\tTest059.g:\ + db cget SET_RANGE with 0 partial length retrieve" + set ret [$curs get -set_range -partial {0 0} $key_set(5)] + set data [lindex [lindex $ret 0] 1] + set key [lindex [lindex $ret 0] 0] + error_check_good key_check_set $key $key_set(5) + error_check_good db_cget_set [string length $data] 0 + } + + error_check_good curs_close [$curs close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test060.tcl b/test/tcl/test060.tcl new file mode 100644 index 00000000..6710bd94 --- /dev/null +++ b/test/tcl/test060.tcl @@ -0,0 +1,59 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test060 +# TEST Test of the DB_EXCL flag to DB->open(). +# TEST 1) Attempt to open and create a nonexistent database; verify success. +# TEST 2) Attempt to reopen it; verify failure. +proc test060 { method args } { + global errorCode + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test060: $method ($args) Test of the DB_EXCL flag to DB->open" + + # Set the database location and make sure the db doesn't exist yet + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test060.db + set env NULL + } else { + set testfile test060.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + # Create the database and check success + puts "\tTest060.a: open and close non-existent file with DB_EXCL" + set db [eval {berkdb_open \ + -create -excl -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen:excl [is_valid_db $db] TRUE + + # Close it and check success + error_check_good db_close [$db close] 0 + + # Try to open it again, and make sure the open fails + puts "\tTest060.b: open it again with DB_EXCL and make sure it fails" + set errorCode NONE + error_check_good open:excl:catch [catch { \ + set db [eval {berkdb_open_noerr \ + -create -excl -mode 0644} $args {$omethod $testfile}] + } ret ] 1 + + error_check_good dbopen:excl [is_substr $errorCode EEXIST] 1 +} diff --git a/test/tcl/test061.tcl b/test/tcl/test061.tcl new file mode 100644 index 00000000..9c811789 --- /dev/null +++ b/test/tcl/test061.tcl @@ -0,0 +1,231 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test061 +# TEST Test of txn abort and commit for in-memory databases. +# TEST a) Put + abort: verify absence of data +# TEST b) Put + commit: verify presence of data +# TEST c) Overwrite + abort: verify that data is unchanged +# TEST d) Overwrite + commit: verify that data has changed +# TEST e) Delete + abort: verify that data is still present +# TEST f) Delete + commit: verify that data has been deleted +proc test061 { method args } { + global alphabet + global encrypt + global errorCode + global passwd + source ./include.tcl + + # + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Test061 skipping for env $env" + return + } + if { [is_partitioned $args] == 1 } { + puts "Test061 skipping for partitioned $method" + return + } + set args [convert_args $method $args] + set omethod [convert_method $method] + if { [is_queueext $method] == 1} { + puts "Test061 skipping for method $method" + return + } + puts "Test061: Transaction abort and commit test for in-memory data." + puts "Test061: $method $args" + + set encargs "" + set args [split_encargs $args encargs] + set pageargs "" + split_pageargs $args pageargs + + set key "key" + set data "data" + set otherdata "otherdata" + set txn "" + set flags "" + set gflags "" + + if { [is_record_based $method] == 1} { + set key 1 + set gflags " -recno" + } + + puts "\tTest061: Create environment and $method database." + env_cleanup $testdir + + # create environment + set eflags "-create -txn $encargs -home $testdir" + set dbenv [eval {berkdb_env} $eflags $pageargs ] + error_check_good dbenv [is_valid_env $dbenv] TRUE + + # db open -- no file specified, in-memory database + set flags "-auto_commit -create $args $omethod" + set db [eval {berkdb_open -env} $dbenv $flags] + error_check_good dbopen [is_valid_db $db] TRUE + + # Here we go with the six test cases. Since we need to verify + # a different thing each time, and since we can't just reuse + # the same data if we're to test overwrite, we just + # plow through rather than writing some impenetrable loop code; + # each of the cases is only a few lines long, anyway. + + puts "\tTest061.a: put/abort" + + # txn_begin + set txn [$dbenv txn] + error_check_good txn_begin [is_valid_txn $txn $dbenv] TRUE + + # put a key + set ret [eval {$db put} -txn $txn {$key [chop_data $method $data]}] + error_check_good db_put $ret 0 + + # check for existence + set ret [eval {$db get} -txn $txn $gflags {$key}] + error_check_good get $ret [list [list $key [pad_data $method $data]]] + + # abort + error_check_good txn_abort [$txn abort] 0 + + # check for *non*-existence + set ret [eval {$db get} $gflags {$key}] + error_check_good get $ret {} + + puts "\tTest061.b: put/commit" + + # txn_begin + set txn [$dbenv txn] + error_check_good txn_begin [is_valid_txn $txn $dbenv] TRUE + + # put a key + set ret [eval {$db put} -txn $txn {$key [chop_data $method $data]}] + error_check_good db_put $ret 0 + + # check for existence + set ret [eval {$db get} -txn $txn $gflags {$key}] + error_check_good get $ret [list [list $key [pad_data $method $data]]] + + # commit + error_check_good txn_commit [$txn commit] 0 + + # check again for existence + set ret [eval {$db get} $gflags {$key}] + error_check_good get $ret [list [list $key [pad_data $method $data]]] + + puts "\tTest061.c: overwrite/abort" + + # txn_begin + set txn [$dbenv txn] + error_check_good txn_begin [is_valid_txn $txn $dbenv] TRUE + + # overwrite {key,data} with {key,otherdata} + set ret [eval {$db put} -txn $txn {$key [chop_data $method $otherdata]}] + error_check_good db_put $ret 0 + + # check for existence + set ret [eval {$db get} -txn $txn $gflags {$key}] + error_check_good get $ret \ + [list [list $key [pad_data $method $otherdata]]] + + # abort + error_check_good txn_abort [$txn abort] 0 + + # check that data is unchanged ($data not $otherdata) + set ret [eval {$db get} $gflags {$key}] + error_check_good get $ret [list [list $key [pad_data $method $data]]] + + puts "\tTest061.d: overwrite/commit" + + # txn_begin + set txn [$dbenv txn] + error_check_good txn_begin [is_valid_txn $txn $dbenv] TRUE + + # overwrite {key,data} with {key,otherdata} + set ret [eval {$db put} -txn $txn {$key [chop_data $method $otherdata]}] + error_check_good db_put $ret 0 + + # check for existence + set ret [eval {$db get} -txn $txn $gflags {$key}] + error_check_good get $ret \ + [list [list $key [pad_data $method $otherdata]]] + + # commit + error_check_good txn_commit [$txn commit] 0 + + # check that data has changed ($otherdata not $data) + set ret [eval {$db get} $gflags {$key}] + error_check_good get $ret \ + [list [list $key [pad_data $method $otherdata]]] + + puts "\tTest061.e: delete/abort" + + # txn_begin + set txn [$dbenv txn] + error_check_good txn_begin [is_valid_txn $txn $dbenv] TRUE + + # delete + set ret [eval {$db del} -txn $txn {$key}] + error_check_good db_put $ret 0 + + # check for nonexistence + set ret [eval {$db get} -txn $txn $gflags {$key}] + error_check_good get $ret {} + + # abort + error_check_good txn_abort [$txn abort] 0 + + # check for existence + set ret [eval {$db get} $gflags {$key}] + error_check_good get $ret \ + [list [list $key [pad_data $method $otherdata]]] + + puts "\tTest061.f: delete/commit" + + # txn_begin + set txn [$dbenv txn] + error_check_good txn_begin [is_valid_txn $txn $dbenv] TRUE + + # put a key + set ret [eval {$db del} -txn $txn {$key}] + error_check_good db_put $ret 0 + + # check for nonexistence + set ret [eval {$db get} -txn $txn $gflags {$key}] + error_check_good get $ret {} + + # commit + error_check_good txn_commit [$txn commit] 0 + + # check for continued nonexistence + set ret [eval {$db get} $gflags {$key}] + error_check_good get $ret {} + + # We're done; clean up. + error_check_good db_close [eval {$db close}] 0 + error_check_good env_close [eval {$dbenv close}] 0 + + # Now run db_recover and ensure that it runs cleanly. + set utilflag "" + if { $encrypt != 0 } { + set utilflag "-P $passwd" + } + puts "\tTest061.g: Running db_recover -h" + set ret [catch {eval {exec} $util_path/db_recover -h $testdir \ + $utilflag} res] + if { $ret != 0 } { + puts "FAIL: db_recover outputted $res" + } + error_check_good db_recover $ret 0 + + puts "\tTest061.h: Running db_recover -c -h" + set ret [catch {eval {exec} $util_path/db_recover -c -h $testdir \ + $utilflag} res] + error_check_good db_recover-c $ret 0 +} diff --git a/test/tcl/test062.tcl b/test/tcl/test062.tcl new file mode 100644 index 00000000..a024916d --- /dev/null +++ b/test/tcl/test062.tcl @@ -0,0 +1,159 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test062 +# TEST Test of partial puts (using DB_CURRENT) onto duplicate pages. +# TEST Insert the first 200 words into the dictionary 200 times each with +# TEST self as key and :self as data. Use partial puts to +# TEST append self again to data; verify correctness. +proc test062 { method {nentries 200} {ndups 200} {tnum "062"} args } { + global alphabet + global rand_init + source ./include.tcl + + berkdb srand $rand_init + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_record_based $method] == 1 || [is_rbtree $method] == 1 } { + puts "Test$tnum skipping for method $omethod" + return + } + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test$tnum skipping for btree with compression." + return + } + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 200 } { + set nentries 100 + } + reduce_dups nentries ndups + } + set testdir [get_home $env] + } + cleanup $testdir $env + + puts "Test$tnum:\ + $method ($args) $nentries Partial puts and $ndups duplicates." + set db [eval {berkdb_open -create -mode 0644 \ + $omethod -dup} $args {$testfile} ] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + + set pflags "" + set gflags "" + set txn "" + set count 0 + + # Here is the loop where we put each key/data pair + puts "\tTest$tnum.a: Put loop (initialize database)" + while { [gets $did str] != -1 && $count < $nentries } { + for { set i 1 } { $i <= $ndups } { incr i } { + set pref \ + [string index $alphabet [berkdb random_int 0 25]] + set datastr $pref:$str + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$str [chop_data $method $datastr]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + set keys($count) $str + + incr count + } + close $did + + puts "\tTest$tnum.b: Partial puts." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor_open [is_substr $dbc $db] 1 + + # Do a partial write to extend each datum in + # the regular db by the corresponding dictionary word. + # We have to go through each key's dup set using -set + # because cursors are not stable in the hash AM and we + # want to make sure we hit all the keys. + for { set i 0 } { $i < $count } { incr i } { + set key $keys($i) + for {set ret [$dbc get -set $key]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get -nextdup]} { + + set k [lindex [lindex $ret 0] 0] + set orig_d [lindex [lindex $ret 0] 1] + set d [string range $orig_d 2 end] + set doff [expr [string length $d] + 2] + set dlen 0 + error_check_good data_and_key_sanity $d $k + + set ret [$dbc get -current] + error_check_good before_sanity \ + [lindex [lindex $ret 0] 0] \ + [string range [lindex [lindex $ret 0] 1] 2 end] + + error_check_good partial_put [eval {$dbc put -current \ + -partial [list $doff $dlen] $d}] 0 + + set ret [$dbc get -current] + error_check_good partial_put_correct \ + [lindex [lindex $ret 0] 1] $orig_d$d + } + } + + puts "\tTest$tnum.c: Double-checking get loop." + # Double-check that each datum in the regular db has + # been appropriately modified. + + for {set ret [$dbc get -first]} \ + {[llength $ret] != 0} \ + {set ret [$dbc get -next]} { + + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_good modification_correct \ + [string range $d 2 end] [repeat $k 2] + } + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test063.tcl b/test/tcl/test063.tcl new file mode 100644 index 00000000..2509547f --- /dev/null +++ b/test/tcl/test063.tcl @@ -0,0 +1,173 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test063 +# TEST Test of the DB_RDONLY flag to DB->open +# TEST Attempt to both DB->put and DBC->c_put into a database +# TEST that has been opened DB_RDONLY, and check for failure. +proc test063 { method args } { + global errorCode + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + set tnum "063" + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + set key "key" + set data "data" + set key2 "another_key" + set data2 "more_data" + + set gflags "" + set txn "" + + if { [is_record_based $method] == 1 } { + set key "1" + set key2 "2" + append gflags " -recno" + } + + puts "Test$tnum: $method ($args) DB_RDONLY test." + + # Create a test database. + puts "\tTest$tnum.a: Creating test database." + set db [eval {berkdb_open_noerr -create -mode 0644} \ + $omethod $args $testfile] + error_check_good db_create [is_valid_db $db] TRUE + + # Put and get an item so it's nonempty. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key [chop_data $method $data]}] + error_check_good initial_put $ret 0 + + set dbt [eval {$db get} $txn $gflags {$key}] + error_check_good initial_get $dbt \ + [list [list $key [pad_data $method $data]]] + + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + if { $eindex == -1 } { + # Confirm that database is writable. If we are + # using an env (that may be remote on a server) + # we cannot do this check. + error_check_good writable [file writable $testfile] 1 + } + + puts "\tTest$tnum.b: Re-opening DB_RDONLY and attempting to put." + + # Now open it read-only and make sure we can get but not put. + set db [eval {berkdb_open_noerr -rdonly} $args {$testfile}] + error_check_good db_open [is_valid_db $db] TRUE + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbt [eval {$db get} $txn $gflags {$key}] + error_check_good db_get $dbt \ + [list [list $key [pad_data $method $data]]] + + set ret [catch {eval {$db put} $txn \ + {$key2 [chop_data $method $data]}} res] + error_check_good put_failed $ret 1 + error_check_good db_put_rdonly [is_substr $errorCode "EACCES"] 1 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + set errorCode "NONE" + + puts "\tTest$tnum.c: Attempting cursor put." + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor_create [is_valid_cursor $dbc $db] TRUE + + error_check_good cursor_set [$dbc get -first] $dbt + set ret [catch {eval {$dbc put} -current $data} res] + error_check_good c_put_failed $ret 1 + error_check_good dbc_put_rdonly [is_substr $errorCode "EACCES"] 1 + + set dbt [eval {$db get} $gflags {$key2}] + error_check_good db_get_key2 $dbt "" + + puts "\tTest$tnum.d: Attempting ordinary delete." + + set errorCode "NONE" + set ret [catch {eval {$db del} $txn {$key}} 1] + error_check_good del_failed $ret 1 + error_check_good db_del_rdonly [is_substr $errorCode "EACCES"] 1 + + set dbt [eval {$db get} $txn $gflags {$key}] + error_check_good db_get_key $dbt \ + [list [list $key [pad_data $method $data]]] + + puts "\tTest$tnum.e: Attempting cursor delete." + # Just set the cursor to the beginning; we don't care what's there... + # yet. + set dbt2 [$dbc get -first] + error_check_good db_get_first_key $dbt2 $dbt + set errorCode "NONE" + set ret [catch {$dbc del} res] + error_check_good c_del_failed $ret 1 + error_check_good dbc_del_rdonly [is_substr $errorCode "EACCES"] 1 + + set dbt2 [$dbc get -current] + error_check_good db_get_key $dbt2 $dbt + + puts "\tTest$tnum.f: Close, reopen db; verify unchanged." + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + set db [eval {berkdb_open} $omethod $args $testfile] + error_check_good db_reopen [is_valid_db $db] TRUE + + set dbc [$db cursor] + error_check_good cursor_create [is_valid_cursor $dbc $db] TRUE + + error_check_good first_there [$dbc get -first] \ + [list [list $key [pad_data $method $data]]] + error_check_good nomore_there [$dbc get -next] "" + + error_check_good dbc_close [$dbc close] 0 + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test064.tcl b/test/tcl/test064.tcl new file mode 100644 index 00000000..4672c3c3 --- /dev/null +++ b/test/tcl/test064.tcl @@ -0,0 +1,68 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test064 +# TEST Test of DB->get_type +# TEST Create a database of type specified by method. +# TEST Make sure DB->get_type returns the right thing with both a normal +# TEST and DB_UNKNOWN open. +proc test064 { method args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + set tnum "064" + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + puts "Test$tnum: $method ($args) DB->get_type test." + + # Create a test database. + puts "\tTest$tnum.a: Creating test database of type $method." + set db [eval {berkdb_open -create -mode 0644} \ + $omethod $args $testfile] + error_check_good db_create [is_valid_db $db] TRUE + + error_check_good db_close [$db close] 0 + + puts "\tTest$tnum.b: get_type after method specifier." + + set db [eval {berkdb_open} $omethod $args {$testfile}] + error_check_good db_open [is_valid_db $db] TRUE + + set type [$db get_type] + error_check_good get_type $type [string range $omethod 1 end] + + error_check_good db_close [$db close] 0 + + puts "\tTest$tnum.c: get_type after DB_UNKNOWN." + + set db [eval {berkdb_open} $args $testfile] + error_check_good db_open [is_valid_db $db] TRUE + + set type [$db get_type] + error_check_good get_type $type [string range $omethod 1 end] + + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test065.tcl b/test/tcl/test065.tcl new file mode 100644 index 00000000..07bb5531 --- /dev/null +++ b/test/tcl/test065.tcl @@ -0,0 +1,207 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test065 +# TEST Test of DB->stat, both -DB_FAST_STAT and row +# TEST counts with DB->stat -txn. +proc test065 { method args } { + source ./include.tcl + global errorCode + global alphabet + + set nentries 10000 + set args [convert_args $method $args] + set omethod [convert_method $method] + set tnum "065" + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + cleanup $testdir $env + + puts "Test$tnum: $method ($args) DB->stat(DB_FAST_STAT) test." + + puts "\tTest$tnum.a: Create database and check it while empty." + + set db [eval {berkdb_open_noerr -create -mode 0644} \ + $omethod $args $testfile] + error_check_good db_open [is_valid_db $db] TRUE + + set ret [catch {eval $db stat -faststat} res] + + error_check_good db_close [$db close] 0 + + if { ([is_record_based $method] && ![is_queue $method] && \ + ![is_heap $method]) || [is_rbtree $method] } { + error_check_good recordcount_ok [is_substr $res \ + "{{Number of keys} 0}"] 1 + } else { + puts "\tTest$tnum: Test complete for method $method." + return + } + + # If we've got this far, we're on an access method for + # which record counts makes sense. Thus, we no longer + # catch EINVALs, and no longer care about __db_errs. + set db [eval {berkdb_open -create -mode 0644} $omethod $args $testfile] + + puts "\tTest$tnum.b: put $nentries keys." + + if { [is_record_based $method] } { + set gflags " -recno " + set keypfx "" + } else { + set gflags "" + set keypfx "key" + } + + set txn "" + set data [pad_data $method $alphabet] + + for { set ndx 1 } { $ndx <= $nentries } { incr ndx } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$keypfx$ndx $data}] + error_check_good db_put $ret 0 + set statret [eval {$db stat} $txn] + set rowcount [getstats $statret "Number of records"] + error_check_good rowcount $rowcount $ndx + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + set ret [$db stat -faststat] + error_check_good recordcount_after_puts \ + [is_substr $ret "{{Number of keys} $nentries}"] 1 + + puts "\tTest$tnum.c: delete 90% of keys." + set end [expr {$nentries / 10 * 9}] + for { set ndx 1 } { $ndx <= $end } { incr ndx } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if { [is_rrecno $method] == 1 } { + # if we're renumbering, when we hit key 5001 we'll + # have deleted 5000 and we'll croak! So delete key + # 1, repeatedly. + set ret [eval {$db del} $txn {[concat $keypfx 1]}] + set statret [eval {$db stat} $txn] + set rowcount [getstats $statret "Number of records"] + error_check_good rowcount $rowcount [expr $nentries - $ndx] + } else { + set ret [eval {$db del} $txn {$keypfx$ndx}] + set rowcount [getstats $statret "Number of records"] + error_check_good rowcount $rowcount $nentries + } + error_check_good db_del $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + set ret [$db stat -faststat] + if { [is_rrecno $method] == 1 || [is_rbtree $method] == 1 } { + # We allow renumbering--thus the stat should return 10% + # of nentries. + error_check_good recordcount_after_dels [is_substr $ret \ + "{{Number of keys} [expr {$nentries / 10}]}"] 1 + } else { + # No renumbering--no change in RECORDCOUNT! + error_check_good recordcount_after_dels \ + [is_substr $ret "{{Number of keys} $nentries}"] 1 + } + + puts "\tTest$tnum.d: put new keys at the beginning." + set end [expr {$nentries / 10 * 8}] + for { set ndx 1 } { $ndx <= $end } {incr ndx } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$keypfx$ndx $data}] + error_check_good db_put_beginning $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + set ret [$db stat -faststat] + if { [is_rrecno $method] == 1 } { + # With renumbering we're back up to 80% of $nentries + error_check_good recordcount_after_dels [is_substr $ret \ + "{{Number of keys} [expr {$nentries / 10 * 8}]}"] 1 + } elseif { [is_rbtree $method] == 1 } { + # Total records in a btree is now 90% of $nentries + error_check_good recordcount_after_dels [is_substr $ret \ + "{{Number of keys} [expr {$nentries / 10 * 9}]}"] 1 + } else { + # No renumbering--still no change in RECORDCOUNT. + error_check_good recordcount_after_dels [is_substr $ret \ + "{{Number of keys} $nentries}"] 1 + } + + puts "\tTest$tnum.e: put new keys at the end." + set start [expr {1 + $nentries / 10 * 9}] + set end [expr {($nentries / 10 * 9) + ($nentries / 10 * 8)}] + for { set ndx $start } { $ndx <= $end } { incr ndx } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$keypfx$ndx $data}] + error_check_good db_put_end $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + set ret [$db stat -faststat] + if { [is_rbtree $method] != 1 } { + # If this is a recno database, the record count should be up + # to (1.7 x nentries), the largest number we've seen, with + # or without renumbering. + error_check_good recordcount_after_puts2 [is_substr $ret \ + "{{Number of keys} [expr {$start - 1 + $nentries / 10 * 8}]}"] 1 + } else { + # In an rbtree, 1000 of those keys were overwrites, so there + # are (.7 x nentries) new keys and (.9 x nentries) old keys + # for a total of (1.6 x nentries). + error_check_good recordcount_after_puts2 [is_substr $ret \ + "{{Number of keys} [expr {$start -1 + $nentries / 10 * 7}]}"] 1 + } + + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test066.tcl b/test/tcl/test066.tcl new file mode 100644 index 00000000..7d431845 --- /dev/null +++ b/test/tcl/test066.tcl @@ -0,0 +1,103 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test066 +# TEST Test of cursor overwrites of DB_CURRENT w/ duplicates. +# TEST +# TEST Make sure a cursor put to DB_CURRENT acts as an overwrite in a +# TEST database with duplicates. +proc test066 { method args } { + set omethod [convert_method $method] + set args [convert_args $method $args] + + set tnum "066" + + if { [is_record_based $method] || [is_rbtree $method] } { + puts "Test$tnum: Skipping for method $method." + return + } + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test$tnum skipping for btree with compression." + return + } + puts "Test$tnum: Test of cursor put to DB_CURRENT with duplicates." + + source ./include.tcl + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test066.db + set env NULL + } else { + set testfile test066.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + set txn "" + set key "test" + set data "olddata" + + set db [eval {berkdb_open -create -mode 0644 -dup} $omethod $args \ + $testfile] + error_check_good db_open [is_valid_db $db] TRUE + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key [chop_data $method $data]}] + error_check_good db_put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + set ret [$dbc get -first] + error_check_good db_get $ret [list [list $key [pad_data $method $data]]] + + set newdata "newdata" + set ret [$dbc put -current [chop_data $method $newdata]] + error_check_good dbc_put $ret 0 + + # There should be only one (key,data) pair in the database, and this + # is it. + set ret [$dbc get -first] + error_check_good db_get_first $ret \ + [list [list $key [pad_data $method $newdata]]] + + # and this one should come up empty. + set ret [$dbc get -next] + error_check_good db_get_next $ret "" + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + puts "\tTest$tnum: Test completed successfully." +} diff --git a/test/tcl/test067.tcl b/test/tcl/test067.tcl new file mode 100644 index 00000000..31536b16 --- /dev/null +++ b/test/tcl/test067.tcl @@ -0,0 +1,163 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test067 +# TEST Test of DB_CURRENT partial puts onto almost empty duplicate +# TEST pages, with and without DB_DUP_SORT. +# TEST +# TEST Test of DB_CURRENT partial puts on almost-empty duplicate pages. +# TEST This test was written to address the following issue, #2 in the +# TEST list of issues relating to bug #0820: +# TEST +# TEST 2. DBcursor->put, DB_CURRENT flag, off-page duplicates, hash and btree: +# TEST In Btree, the DB_CURRENT overwrite of off-page duplicate records +# TEST first deletes the record and then puts the new one -- this could +# TEST be a problem if the removal of the record causes a reverse split. +# TEST Suggested solution is to acquire a cursor to lock down the current +# TEST record, put a new record after that record, and then delete using +# TEST the held cursor. +# TEST +# TEST It also tests the following, #5 in the same list of issues: +# TEST 5. DBcursor->put, DB_AFTER/DB_BEFORE/DB_CURRENT flags, DB_DBT_PARTIAL +# TEST set, duplicate comparison routine specified. +# TEST The partial change does not change how data items sort, but the +# TEST record to be put isn't built yet, and that record supplied is the +# TEST one that's checked for ordering compatibility. +proc test067 { method {ndups 1000} {tnum "067"} args } { + source ./include.tcl + global alphabet + global errorCode + global is_je_test + + set args [convert_args $method $args] + set omethod [convert_method $method] + + set txn "" + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + if { $ndups == 1000 } { + set ndups 100 + } + } + set testdir [get_home $env] + } + + cleanup $testdir $env + if { [is_record_based $method] == 1 || [is_rbtree $method] == 1 } { + puts "\tTest$tnum: skipping for method $method." + return + } + + puts "Test$tnum:\ + $method ($args) Partial puts on near-empty duplicate pages." + + foreach dupopt { "-dup" "-dup -dupsort" } { + if { $is_je_test || [is_compressed $args] } { + if { $dupopt == "-dup" } { + continue + } + } + + # + # Testdir might get reset from the env's home dir back + # to the default if this calls something that sources + # include.tcl, since testdir is a global. Set it correctly + # here each time through the loop. + # + if { $env != "NULL" } { + set testdir [get_home $env] + } + cleanup $testdir $env + set db [eval {berkdb_open -create -mode 0644 \ + $omethod} $args $dupopt {$testfile}] + error_check_good db_open [is_valid_db $db] TRUE + + puts "\tTest$tnum.a ($dupopt): Put $ndups duplicates." + + set key "key_test$tnum" + + for { set ndx 0 } { $ndx < $ndups } { incr ndx } { + set data $alphabet$ndx + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + # No need for pad_data since we're skipping recno. + set ret [eval {$db put} $txn {$key $data}] + error_check_good put($key,$data) $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # Sync so we can inspect database if the next section bombs. + error_check_good db_sync [$db sync] 0 + puts "\tTest$tnum.b ($dupopt):\ + Deleting dups (last first), overwriting each." + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor_create [is_valid_cursor $dbc $db] TRUE + + set count 0 + while { $count < $ndups - 1 } { + # set cursor to last item in db + set ret [$dbc get -last] + error_check_good \ + verify_key [lindex [lindex $ret 0] 0] $key + + # for error reporting + set currdatum [lindex [lindex $ret 0] 1] + + # partial-overwrite it + # (overwrite offsets 1-4 with "bcde"--which they + # already are) + + # Even though we expect success, we catch this + # since it might return EINVAL, and we want that + # to FAIL. + set errorCode NONE + set ret [catch {eval $dbc put -current \ + {-partial [list 1 4]} "bcde"} \ + res] + error_check_good \ + partial_put_valid($currdatum) $errorCode NONE + error_check_good partial_put($currdatum) $res 0 + + # delete it + error_check_good dbc_del [$dbc del] 0 + + #puts $currdatum + + incr count + } + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + } +} diff --git a/test/tcl/test068.tcl b/test/tcl/test068.tcl new file mode 100644 index 00000000..2751ea2c --- /dev/null +++ b/test/tcl/test068.tcl @@ -0,0 +1,233 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test068 +# TEST Test of DB_BEFORE and DB_AFTER with partial puts. +# TEST Make sure DB_BEFORE and DB_AFTER work properly with partial puts, and +# TEST check that they return EINVAL if DB_DUPSORT is set or if DB_DUP is not. +proc test068 { method args } { + source ./include.tcl + global alphabet + global errorCode + global is_je_test + + set tnum "068" + set orig_tdir $testdir + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test$tnum:\ + $method ($args) Test of DB_BEFORE/DB_AFTER and partial puts." + if { [is_record_based $method] == 1 } { + puts "\tTest$tnum: skipping for method $method." + return + } + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set nkeys 1000 + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + set nkeys 100 + } + set testdir [get_home $env] + } + + # Create a list of $nkeys words to insert into db. + puts "\tTest$tnum.a: Initialize word list." + set txn "" + set wordlist {} + set count 0 + set did [open $dict] + while { [gets $did str] != -1 && $count < $nkeys } { + lappend wordlist $str + incr count + } + close $did + + # Sanity check: did we get $nkeys words? + error_check_good enough_keys [llength $wordlist] $nkeys + + # rbtree can't handle dups, so just test the non-dup case + # if it's the current method. + if { [is_rbtree $method] == 1 } { + set dupoptlist { "" } + } else { + set dupoptlist { "" "-dup" "-dup -dupsort" } + } + + foreach dupopt $dupoptlist { + if { $is_je_test || [is_compressed $args] == 1 } { + if { $dupopt == "-dup" } { + continue + } + } + + # Testdir might be reset in the loop by some proc sourcing + # include.tcl. Reset it to the env's home here, before + # cleanup. + if { $env != "NULL" } { + set testdir [get_home $env] + } + cleanup $testdir $env + set db [eval {berkdb_open_noerr -create -mode 0644 \ + $omethod} $args $dupopt {$testfile}] + error_check_good db_open [is_valid_db $db] TRUE + + puts "\tTest$tnum.b ($dupopt): DB initialization: put loop." + foreach word $wordlist { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$word $word}] + error_check_good db_put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + puts "\tTest$tnum.c ($dupopt): get loop." + foreach word $wordlist { + # Make sure that the Nth word has been correctly + # inserted, and also that the Nth word is the + # Nth one we pull out of the database using a cursor. + + set dbt [$db get $word] + error_check_good get_key [list [list $word $word]] $dbt + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good cursor_open [is_valid_cursor $dbc $db] TRUE + + puts "\tTest$tnum.d ($dupopt): DBC->put w/ DB_AFTER." + + # Set cursor to the first key; make sure it succeeds. + # With an unsorted wordlist, we can't be sure that the + # first item returned will equal the first item in the + # wordlist, so we just make sure it got something back. + set dbt [eval {$dbc get -first}] + error_check_good \ + dbc_get_first [llength $dbt] 1 + + # If -dup is not set, or if -dupsort is set too, we + # need to verify that DB_BEFORE and DB_AFTER fail + # and then move on to the next $dupopt. + if { $dupopt != "-dup" } { + set errorCode "NONE" + set ret [catch {eval $dbc put -after \ + {-partial [list 6 0]} "after"} res] + error_check_good dbc_put_after_fail $ret 1 + error_check_good dbc_put_after_einval \ + [is_substr $errorCode EINVAL] 1 + puts "\tTest$tnum ($dupopt): DB_AFTER returns EINVAL." + set errorCode "NONE" + set ret [catch {eval $dbc put -before \ + {-partial [list 6 0]} "before"} res] + error_check_good dbc_put_before_fail $ret 1 + error_check_good dbc_put_before_einval \ + [is_substr $errorCode EINVAL] 1 + puts "\tTest$tnum ($dupopt): DB_BEFORE returns EINVAL." + puts "\tTest$tnum ($dupopt): Correct error returns,\ + skipping further test." + # continue with broad foreach + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + continue + } + + puts "\tTest$tnum.e ($dupopt): DBC->put(DB_AFTER) loop." + foreach word $wordlist { + # set cursor to $word + set dbt [$dbc get -set $word] + error_check_good \ + dbc_get_set $dbt [list [list $word $word]] + # put after it + set ret [$dbc put -after -partial {4 0} after] + error_check_good dbc_put_after $ret 0 + } + + puts "\tTest$tnum.f ($dupopt): DBC->put(DB_BEFORE) loop." + foreach word $wordlist { + # set cursor to $word + set dbt [$dbc get -set $word] + error_check_good \ + dbc_get_set $dbt [list [list $word $word]] + # put before it + set ret [$dbc put -before -partial {6 0} before] + error_check_good dbc_put_before $ret 0 + } + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + eval $db sync + puts "\tTest$tnum.g ($dupopt): Verify correctness." + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + # loop through the whole db beginning to end, + # make sure we have, in order, {$word "\0\0\0\0\0\0before"}, + # {$word $word}, {$word "\0\0\0\0after"} for each word. + set count 0 + while { $count < $nkeys } { + # Get the first item of each set of three. + # We don't know what the word is, but set $word to + # the key and check that the data is + # "\0\0\0\0\0\0before". + set dbt [$dbc get -next] + set word [lindex [lindex $dbt 0] 0] + + error_check_good dbc_get_one $dbt \ + [list [list $word "\0\0\0\0\0\0before"]] + + set dbt [$dbc get -next] + error_check_good \ + dbc_get_two $dbt [list [list $word $word]] + + set dbt [$dbc get -next] + error_check_good dbc_get_three $dbt \ + [list [list $word "\0\0\0\0after"]] + + incr count + } + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + } + set testdir $orig_tdir +} diff --git a/test/tcl/test069.tcl b/test/tcl/test069.tcl new file mode 100644 index 00000000..4ef4daa6 --- /dev/null +++ b/test/tcl/test069.tcl @@ -0,0 +1,13 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test069 +# TEST Test of DB_CURRENT partial puts without duplicates-- test067 w/ +# TEST small ndups to ensure that partial puts to DB_CURRENT work +# TEST correctly in the absence of duplicate pages. +proc test069 { method {ndups 50} {tnum "069"} args } { + eval test067 $method $ndups $tnum $args +} diff --git a/test/tcl/test070.tcl b/test/tcl/test070.tcl new file mode 100644 index 00000000..41038a3b --- /dev/null +++ b/test/tcl/test070.tcl @@ -0,0 +1,137 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test070 +# TEST Test of DB_CONSUME (Four consumers, 1000 items.) +# TEST +# TEST Fork off six processes, four consumers and two producers. +# TEST The producers will each put 20000 records into a queue; +# TEST the consumers will each get 10000. +# TEST Then, verify that no record was lost or retrieved twice. +proc test070 { method {nconsumers 4} {nproducers 2} \ + {nitems 1000} {mode CONSUME } {start 0} {txn -txn} {tnum "070"} args } { + source ./include.tcl + global alphabet + global encrypt + + # + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Test$tnum skipping for env $env" + return + } + set omethod [convert_method $method] + set args [convert_args $method $args] + if { $encrypt != 0 } { + puts "Test$tnum skipping for security" + return + } + set pageargs "" + split_pageargs $args pageargs + + puts "Test$tnum: $method ($args) Test of DB_$mode flag to DB->get." + puts "\tUsing $txn environment." + + error_check_good enough_consumers [expr $nconsumers > 0] 1 + error_check_good enough_producers [expr $nproducers > 0] 1 + + if { [is_queue $method] != 1 } { + puts "\tSkipping Test$tnum for method $method." + return + } + + env_cleanup $testdir + set testfile test$tnum.db + + # Create environment + set dbenv [eval {berkdb_env -create $txn -home } $testdir $pageargs] + error_check_good dbenv_create [is_valid_env $dbenv] TRUE + + # Create database + set db [eval {berkdb_open -create -mode 0644 -queue}\ + -env $dbenv $args $testfile] + error_check_good db_open [is_valid_db $db] TRUE + + if { $start != 0 } { + error_check_good set_seed [$db put $start "consumer data"] 0 + puts "\tTest$tnum: starting at $start." + } else { + incr start + } + + set pidlist {} + + # Divvy up the total number of records amongst the consumers and + # producers. + error_check_good cons_div_evenly [expr $nitems % $nconsumers] 0 + error_check_good prod_div_evenly [expr $nitems % $nproducers] 0 + set nperconsumer [expr $nitems / $nconsumers] + set nperproducer [expr $nitems / $nproducers] + + set consumerlog $testdir/CONSUMERLOG. + + # Fork consumer processes (we want them to be hungry) + for { set ndx 0 } { $ndx < $nconsumers } { incr ndx } { + set output $consumerlog$ndx + set p [exec $tclsh_path $test_path/wrap.tcl \ + conscript.tcl $testdir/conscript.log.consumer$ndx \ + $testdir $testfile $mode $nperconsumer $output $tnum \ + $args &] + lappend pidlist $p + } + for { set ndx 0 } { $ndx < $nproducers } { incr ndx } { + set p [exec $tclsh_path $test_path/wrap.tcl \ + conscript.tcl $testdir/conscript.log.producer$ndx \ + $testdir $testfile PRODUCE $nperproducer "" $tnum \ + $args &] + lappend pidlist $p + } + + # Wait for all children. + watch_procs $pidlist 10 + + # Verify: slurp all record numbers into list, sort, and make + # sure each appears exactly once. + puts "\tTest$tnum: Verifying results." + set reclist {} + for { set ndx 0 } { $ndx < $nconsumers } { incr ndx } { + set input $consumerlog$ndx + set iid [open $input r] + while { [gets $iid str] != -1 } { + lappend reclist $str + } + close $iid + } + set sortreclist [lsort -command int32_compare $reclist] + + set nitems [expr $start + $nitems] + for { set ndx $start } { $ndx < $nitems } { set ndx [expr $ndx + 1] } { + # Wrap if $ndx goes beyond 32 bits because our + # recno wrapped if it did. + if { $ndx > 0xffffffff } { + set cmp [expr $ndx - 0xffffffff] + } else { + set cmp [expr $ndx + 0] + } + # Skip 0 if we are wrapping around + if { $cmp == 0 } { + incr ndx + incr nitems + incr cmp + } + # Be sure to convert ndx to a number before comparing. + error_check_good pop_num [lindex $sortreclist 0] $cmp + set sortreclist [lreplace $sortreclist 0 0] + } + error_check_good list_ends_empty $sortreclist {} + error_check_good db_close [$db close] 0 + error_check_good dbenv_close [$dbenv close] 0 + + puts "\tTest$tnum completed successfully." +} diff --git a/test/tcl/test071.tcl b/test/tcl/test071.tcl new file mode 100644 index 00000000..c74023aa --- /dev/null +++ b/test/tcl/test071.tcl @@ -0,0 +1,15 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test071 +# TEST Test of DB_CONSUME (One consumer, 10000 items.) +# TEST This is DB Test 70, with one consumer, one producers, and 10000 items. +proc test071 { method {nconsumers 1} {nproducers 1} {nitems 10000} \ + {mode CONSUME} {start 0 } {txn -txn} {tnum "071"} args } { + + eval test070 $method \ + $nconsumers $nproducers $nitems $mode $start $txn $tnum $args +} diff --git a/test/tcl/test072.tcl b/test/tcl/test072.tcl new file mode 100644 index 00000000..5fa549ec --- /dev/null +++ b/test/tcl/test072.tcl @@ -0,0 +1,258 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test072 +# TEST Test of cursor stability when duplicates are moved off-page. +proc test072 { method {pagesize 512} {ndups 20} {tnum "072"} args } { + source ./include.tcl + global alphabet + global is_je_test + + set omethod [convert_method $method] + set args [convert_args $method $args] + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile name should just be + # the db name. Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set basename $testdir/test$tnum + set env NULL + } else { + set basename test$tnum + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + # Keys must sort $prekey < $key < $postkey. + set prekey "a key" + set key "the key" + set postkey "z key" + + # Make these distinguishable from each other and from the + # alphabets used for the $key's data. + set predatum "1234567890" + set postdatum "0987654321" + + puts -nonewline "Test$tnum $omethod ($args): " + if { [is_record_based $method] || [is_rbtree $method] } { + puts "Skipping for method $method." + return + } else { + puts "\nTest$tnum: Test of cursor stability when\ + duplicates are moved off-page." + } + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test$tnum: skipping for specific pagesizes" + return + } + + append args " -pagesize $pagesize " + set txn "" + + set dlist [list "-dup" "-dup -dupsort"] + set testid 0 + foreach dupopt $dlist { + if { $is_je_test || [is_compressed $args] } { + if { $dupopt == "-dup" } { + continue + } + } + + incr testid + set duptestfile $basename$testid.db + set db [eval {berkdb_open -create -mode 0644} \ + $omethod $args $dupopt {$duptestfile}] + error_check_good "db open" [is_valid_db $db] TRUE + + puts \ +"\tTest$tnum.a: ($dupopt) Set up surrounding keys and cursors." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$prekey $predatum}] + error_check_good pre_put $ret 0 + set ret [eval {$db put} $txn {$postkey $postdatum}] + error_check_good post_put $ret 0 + + set precursor [eval {$db cursor} $txn] + error_check_good precursor [is_valid_cursor $precursor \ + $db] TRUE + set postcursor [eval {$db cursor} $txn] + error_check_good postcursor [is_valid_cursor $postcursor \ + $db] TRUE + error_check_good preset [$precursor get -set $prekey] \ + [list [list $prekey $predatum]] + error_check_good postset [$postcursor get -set $postkey] \ + [list [list $postkey $postdatum]] + + puts "\tTest$tnum.b: Put/create cursor/verify all cursor loop." + + for { set i 0 } { $i < $ndups } { incr i } { + set datum [format "%4d$alphabet" [expr $i + 1000]] + set data($i) $datum + + # Uncomment these lines to see intermediate steps. + # error_check_good db_sync($i) [$db sync] 0 + # error_check_good db_dump($i) \ + # [catch {exec $util_path/db_dump \ + # -da $duptestfile > $testdir/out.$i}] 0 + + set ret [eval {$db put} $txn {$key $datum}] + error_check_good "db put ($i)" $ret 0 + + set dbc($i) [eval {$db cursor} $txn] + error_check_good "db cursor ($i)"\ + [is_valid_cursor $dbc($i) $db] TRUE + + error_check_good "dbc get -get_both ($i)"\ + [$dbc($i) get -get_both $key $datum]\ + [list [list $key $datum]] + + for { set j 0 } { $j < $i } { incr j } { + set dbt [$dbc($j) get -current] + set k [lindex [lindex $dbt 0] 0] + set d [lindex [lindex $dbt 0] 1] + + #puts "cursor $j after $i: $d" + + eval {$db sync} + + error_check_good\ + "cursor $j key correctness after $i puts" \ + $k $key + error_check_good\ + "cursor $j data correctness after $i puts" \ + $d $data($j) + } + + # Check correctness of pre- and post- cursors. Do an + # error_check_good on the lengths first so that we don't + # spew garbage as the "got" field and screw up our + # terminal. (It's happened here.) + set pre_dbt [$precursor get -current] + set post_dbt [$postcursor get -current] + error_check_good \ + "key earlier cursor correctness after $i puts" \ + [string length [lindex [lindex $pre_dbt 0] 0]] \ + [string length $prekey] + error_check_good \ + "data earlier cursor correctness after $i puts" \ + [string length [lindex [lindex $pre_dbt 0] 1]] \ + [string length $predatum] + error_check_good \ + "key later cursor correctness after $i puts" \ + [string length [lindex [lindex $post_dbt 0] 0]] \ + [string length $postkey] + error_check_good \ + "data later cursor correctness after $i puts" \ + [string length [lindex [lindex $post_dbt 0] 1]]\ + [string length $postdatum] + + error_check_good \ + "earlier cursor correctness after $i puts" \ + $pre_dbt [list [list $prekey $predatum]] + error_check_good \ + "later cursor correctness after $i puts" \ + $post_dbt [list [list $postkey $postdatum]] + } + + puts "\tTest$tnum.c: Reverse Put/create cursor/verify all cursor loop." + set end [expr $ndups * 2 - 1] + for { set i $end } { $i >= $ndups } { set i [expr $i - 1] } { + set datum [format "%4d$alphabet" [expr $i + 1000]] + set data($i) $datum + + # Uncomment these lines to see intermediate steps. + # error_check_good db_sync($i) [$db sync] 0 + # error_check_good db_dump($i) \ + # [catch {exec $util_path/db_dump \ + # -da $duptestfile > $testdir/out.$i}] 0 + + set ret [eval {$db put} $txn {$key $datum}] + error_check_good "db put ($i)" $ret 0 + + error_check_bad dbc($i)_stomped [info exists dbc($i)] 1 + set dbc($i) [eval {$db cursor} $txn] + error_check_good "db cursor ($i)"\ + [is_valid_cursor $dbc($i) $db] TRUE + + error_check_good "dbc get -get_both ($i)"\ + [$dbc($i) get -get_both $key $datum]\ + [list [list $key $datum]] + + for { set j $i } { $j < $end } { incr j } { + set dbt [$dbc($j) get -current] + set k [lindex [lindex $dbt 0] 0] + set d [lindex [lindex $dbt 0] 1] + + #puts "cursor $j after $i: $d" + + eval {$db sync} + + error_check_good\ + "cursor $j key correctness after $i puts" \ + $k $key + error_check_good\ + "cursor $j data correctness after $i puts" \ + $d $data($j) + } + + # Check correctness of pre- and post- cursors. Do an + # error_check_good on the lengths first so that we don't + # spew garbage as the "got" field and screw up our + # terminal. (It's happened here.) + set pre_dbt [$precursor get -current] + set post_dbt [$postcursor get -current] + error_check_good \ + "key earlier cursor correctness after $i puts" \ + [string length [lindex [lindex $pre_dbt 0] 0]] \ + [string length $prekey] + error_check_good \ + "data earlier cursor correctness after $i puts" \ + [string length [lindex [lindex $pre_dbt 0] 1]] \ + [string length $predatum] + error_check_good \ + "key later cursor correctness after $i puts" \ + [string length [lindex [lindex $post_dbt 0] 0]] \ + [string length $postkey] + error_check_good \ + "data later cursor correctness after $i puts" \ + [string length [lindex [lindex $post_dbt 0] 1]]\ + [string length $postdatum] + + error_check_good \ + "earlier cursor correctness after $i puts" \ + $pre_dbt [list [list $prekey $predatum]] + error_check_good \ + "later cursor correctness after $i puts" \ + $post_dbt [list [list $postkey $postdatum]] + } + + # Close cursors. + puts "\tTest$tnum.d: Closing cursors." + for { set i 0 } { $i <= $end } { incr i } { + error_check_good "dbc close ($i)" [$dbc($i) close] 0 + } + unset dbc + error_check_good precursor_close [$precursor close] 0 + error_check_good postcursor_close [$postcursor close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good "db close" [$db close] 0 + } +} diff --git a/test/tcl/test073.tcl b/test/tcl/test073.tcl new file mode 100644 index 00000000..8577d3c0 --- /dev/null +++ b/test/tcl/test073.tcl @@ -0,0 +1,296 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test073 +# TEST Test of cursor stability on duplicate pages. +# TEST +# TEST Does the following: +# TEST a. Initialize things by DB->putting ndups dups and +# TEST setting a reference cursor to point to each. +# TEST b. c_put ndups dups (and correspondingly expanding +# TEST the set of reference cursors) after the last one, making sure +# TEST after each step that all the reference cursors still point to +# TEST the right item. +# TEST c. Ditto, but before the first one. +# TEST d. Ditto, but after each one in sequence first to last. +# TEST e. Ditto, but after each one in sequence from last to first. +# TEST occur relative to the new datum) +# TEST f. Ditto for the two sequence tests, only doing a +# TEST DBC->c_put(DB_CURRENT) of a larger datum instead of adding a +# TEST new one. +proc test073 { method {pagesize 512} {ndups 50} {tnum "073"} args } { + source ./include.tcl + global alphabet + + set omethod [convert_method $method] + set args [convert_args $method $args] + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + set key "the key" + set txn "" + + puts -nonewline "Test$tnum $omethod ($args): " + if { [is_record_based $method] || [is_rbtree $method] } { + puts "Skipping for method $method." + return + } + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test$tnum skipping for btree with compression." + return + } + + puts "cursor stability on duplicate pages." + + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test073: skipping for specific pagesizes" + return + } + + append args " -pagesize $pagesize -dup" + + set db [eval {berkdb_open \ + -create -mode 0644} $omethod $args $testfile] + error_check_good "db open" [is_valid_db $db] TRUE + + # Number of outstanding keys. + set keys 0 + + puts "\tTest$tnum.a.1: Initializing put loop; $ndups dups, short data." + + for { set i 0 } { $i < $ndups } { incr i } { + set datum [makedatum_t73 $i 0] + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key $datum}] + error_check_good "db put ($i)" $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + set is_long($i) 0 + incr keys + } + + puts "\tTest$tnum.a.2: Initializing cursor get loop; $keys dups." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set i 0 } { $i < $keys } { incr i } { + set datum [makedatum_t73 $i 0] + + set dbc($i) [eval {$db cursor} $txn] + error_check_good "db cursor ($i)"\ + [is_valid_cursor $dbc($i) $db] TRUE + error_check_good "dbc get -get_both ($i)"\ + [$dbc($i) get -get_both $key $datum]\ + [list [list $key $datum]] + } + + puts "\tTest$tnum.b: Cursor put (DB_KEYLAST); $ndups new dups,\ + short data." + + for { set i 0 } { $i < $ndups } { incr i } { + # !!! keys contains the number of the next dup + # to be added (since they start from zero) + + set datum [makedatum_t73 $keys 0] + set curs [eval {$db cursor} $txn] + error_check_good "db cursor create" [is_valid_cursor $curs $db]\ + TRUE + error_check_good "c_put(DB_KEYLAST, $keys)"\ + [$curs put -keylast $key $datum] 0 + + set dbc($keys) $curs + set is_long($keys) 0 + incr keys + + verify_t73 is_long dbc $keys $key + } + + puts "\tTest$tnum.c: Cursor put (DB_KEYFIRST); $ndups new dups,\ + short data." + + for { set i 0 } { $i < $ndups } { incr i } { + # !!! keys contains the number of the next dup + # to be added (since they start from zero) + + set datum [makedatum_t73 $keys 0] + set curs [eval {$db cursor} $txn] + error_check_good "db cursor create" [is_valid_cursor $curs $db]\ + TRUE + error_check_good "c_put(DB_KEYFIRST, $keys)"\ + [$curs put -keyfirst $key $datum] 0 + + set dbc($keys) $curs + set is_long($keys) 0 + incr keys + + verify_t73 is_long dbc $keys $key + } + + puts "\tTest$tnum.d: Cursor put (DB_AFTER) first to last;\ + $keys new dups, short data" + # We want to add a datum after each key from 0 to the current + # value of $keys, which we thus need to save. + set keysnow $keys + for { set i 0 } { $i < $keysnow } { incr i } { + set datum [makedatum_t73 $keys 0] + set curs [eval {$db cursor} $txn] + error_check_good "db cursor create" [is_valid_cursor $curs $db]\ + TRUE + + # Which datum to insert this guy after. + set curdatum [makedatum_t73 $i 0] + error_check_good "c_get(DB_GET_BOTH, $i)"\ + [$curs get -get_both $key $curdatum]\ + [list [list $key $curdatum]] + error_check_good "c_put(DB_AFTER, $i)"\ + [$curs put -after $datum] 0 + + set dbc($keys) $curs + set is_long($keys) 0 + incr keys + + verify_t73 is_long dbc $keys $key + } + + puts "\tTest$tnum.e: Cursor put (DB_BEFORE) last to first;\ + $keys new dups, short data" + + for { set i [expr $keys - 1] } { $i >= 0 } { incr i -1 } { + set datum [makedatum_t73 $keys 0] + set curs [eval {$db cursor} $txn] + error_check_good "db cursor create" [is_valid_cursor $curs $db]\ + TRUE + + # Which datum to insert this guy before. + set curdatum [makedatum_t73 $i 0] + error_check_good "c_get(DB_GET_BOTH, $i)"\ + [$curs get -get_both $key $curdatum]\ + [list [list $key $curdatum]] + error_check_good "c_put(DB_BEFORE, $i)"\ + [$curs put -before $datum] 0 + + set dbc($keys) $curs + set is_long($keys) 0 + incr keys + + if { $i % 10 == 1 } { + verify_t73 is_long dbc $keys $key + } + } + verify_t73 is_long dbc $keys $key + + puts "\tTest$tnum.f: Cursor put (DB_CURRENT), first to last,\ + growing $keys data." + set keysnow $keys + for { set i 0 } { $i < $keysnow } { incr i } { + set olddatum [makedatum_t73 $i 0] + set newdatum [makedatum_t73 $i 1] + set curs [eval {$db cursor} $txn] + error_check_good "db cursor create" [is_valid_cursor $curs $db]\ + TRUE + + error_check_good "c_get(DB_GET_BOTH, $i)"\ + [$curs get -get_both $key $olddatum]\ + [list [list $key $olddatum]] + error_check_good "c_put(DB_CURRENT, $i)"\ + [$curs put -current $newdatum] 0 + + error_check_good "cursor close" [$curs close] 0 + + set is_long($i) 1 + + if { $i % 10 == 1 } { + verify_t73 is_long dbc $keys $key + } + } + verify_t73 is_long dbc $keys $key + + # Close cursors. + puts "\tTest$tnum.g: Closing cursors." + for { set i 0 } { $i < $keys } { incr i } { + error_check_good "dbc close ($i)" [$dbc($i) close] 0 + } + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good "db close" [$db close] 0 +} + +# !!!: This procedure is also used by test087. +proc makedatum_t73 { num is_long } { + global alphabet + if { $is_long == 1 } { + set a $alphabet$alphabet$alphabet + } else { + set a abcdefghijklm + } + + # format won't do leading zeros, alas. + if { $num / 1000 > 0 } { + set i $num + } elseif { $num / 100 > 0 } { + set i 0$num + } elseif { $num / 10 > 0 } { + set i 00$num + } else { + set i 000$num + } + + return $i$a +} + +# !!!: This procedure is also used by test087. +proc verify_t73 { is_long_array curs_array numkeys key } { + upvar $is_long_array is_long + upvar $curs_array dbc + upvar db db + + #useful for debugging, perhaps. + eval $db sync + + for { set j 0 } { $j < $numkeys } { incr j } { + set dbt [$dbc($j) get -current] + set k [lindex [lindex $dbt 0] 0] + set d [lindex [lindex $dbt 0] 1] + + error_check_good\ + "cursor $j key correctness (with $numkeys total items)"\ + $k $key + error_check_good\ + "cursor $j data correctness (with $numkeys total items)"\ + $d [makedatum_t73 $j $is_long($j)] + } +} diff --git a/test/tcl/test074.tcl b/test/tcl/test074.tcl new file mode 100644 index 00000000..8d558ffa --- /dev/null +++ b/test/tcl/test074.tcl @@ -0,0 +1,276 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test074 +# TEST Test of DB_NEXT_NODUP. +proc test074 { method {dir -nextnodup} {nitems 100} {tnum "074"} args } { + source ./include.tcl + global alphabet + global is_je_test + global rand_init + + set omethod [convert_method $method] + set args [convert_args $method $args] + + berkdb srand $rand_init + + # Data prefix--big enough that we get a mix of on-page, off-page, + # and multi-off-page dups with the default nitems + if { [is_fixed_length $method] == 1 } { + set globaldata "somedata" + } else { + set globaldata [repeat $alphabet 4] + } + + puts "Test$tnum $omethod ($args): Test of $dir" + + # First, test non-dup (and not-very-interesting) case with + # all db types. + + puts "\tTest$tnum.a: No duplicates." + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum-nodup.db + set env NULL + } else { + set testfile test$tnum-nodup.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + set db [eval {berkdb_open -create -mode 0644} $omethod\ + $args {$testfile}] + error_check_good db_open [is_valid_db $db] TRUE + set txn "" + + # Insert nitems items. + puts "\t\tTest$tnum.a.1: Put loop." + for {set i 1} {$i <= $nitems} {incr i} { + # + # If record based, set key to $i * 2 to leave + # holes/unused entries for further testing. + # + if {[is_record_based $method] == 1} { + set key [expr $i * 2] + } else { + set key "key$i" + } + set data "$globaldata$i" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key \ + [chop_data $method $data]}] + error_check_good put($i) $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + puts "\t\tTest$tnum.a.2: Get($dir)" + + # foundarray($i) is set when key number i is found in the database + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + # Initialize foundarray($i) to zero for all $i + for {set i 1} {$i < $nitems} {incr i} { + set foundarray($i) 0 + } + + # Walk database using $dir and record each key gotten. + for {set i 1} {$i <= $nitems} {incr i} { + set dbt [$dbc get $dir] + set key [lindex [lindex $dbt 0] 0] + if {[is_record_based $method] == 1} { + set num [expr $key / 2] + set desired_key $key + error_check_good $method:num $key [expr $num * 2] + } else { + set num [string range $key 3 end] + set desired_key key$num + } + + error_check_good dbt_correct($i) $dbt\ + [list [list $desired_key\ + [pad_data $method $globaldata$num]]] + + set foundarray($num) 1 + } + + puts "\t\tTest$tnum.a.3: Final key." + error_check_good last_db_get [$dbc get $dir] [list] + + puts "\t\tTest$tnum.a.4: Verify loop." + for { set i 1 } { $i <= $nitems } { incr i } { + error_check_good found_key($i) $foundarray($i) 1 + } + + error_check_good dbc_close(nodup) [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # If we are a method that doesn't allow dups, verify that + # we get an empty list if we try to use DB_NEXT_DUP + if { [is_record_based $method] == 1 || [is_rbtree $method] == 1 } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + puts "\t\tTest$tnum.a.5: Check DB_NEXT_DUP for $method." + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + set dbt [$dbc get $dir] + error_check_good $method:nextdup [$dbc get -nextdup] [list] + error_check_good dbc_close(nextdup) [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + error_check_good db_close(nodup) [$db close] 0 + + # Quit here if we're a method that won't allow dups. + if { [is_record_based $method] == 1 || [is_rbtree $method] == 1 } { + puts "\tTest$tnum: Skipping remainder for method $method." + return + } + + foreach opt { "-dup" "-dupsort" } { + if { $is_je_test || [is_compressed $args] } { + if { $opt == "-dup" } { + continue + } + } + + # + # If we are using an env, then testfile should just be the + # db name. Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum$opt.db + } else { + set testfile test$tnum$opt.db + } + + if { [string compare $opt "-dupsort"] == 0 } { + set opt "-dup -dupsort" + } + + puts "\tTest$tnum.b: Duplicates ($opt)." + + puts "\t\tTest$tnum.b.1 ($opt): Put loop." + set db [eval {berkdb_open -create -mode 0644}\ + $opt $omethod $args {$testfile}] + error_check_good db_open [is_valid_db $db] TRUE + + # Insert nitems different keys such that key i has i dups. + for {set i 1} {$i <= $nitems} {incr i} { + set key key$i + + for {set j 1} {$j <= $i} {incr j} { + if { $j < 10 } { + set data "${globaldata}00$j" + } elseif { $j < 100 } { + set data "${globaldata}0$j" + } else { + set data "$globaldata$j" + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn \ + [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key $data}] + error_check_good put($i,$j) $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + } + + # Initialize foundarray($i) to 0 for all i. + unset foundarray + for { set i 1 } { $i <= $nitems } { incr i } { + set foundarray($i) 0 + } + + # Get loop--after each get, move forward a random increment + # within the duplicate set. + puts "\t\tTest$tnum.b.2 ($opt): Get loop." + set one "001" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good dbc($opt) [is_valid_cursor $dbc $db] TRUE + for { set i 1 } { $i <= $nitems } { incr i } { + set dbt [$dbc get $dir] + set key [lindex [lindex $dbt 0] 0] + set num [string range $key 3 end] + + set desired_key key$num + if { [string compare $dir "-prevnodup"] == 0 } { + if { $num < 10 } { + set one "00$num" + } elseif { $num < 100 } { + set one "0$num" + } else { + set one $num + } + } + + error_check_good dbt_correct($i) $dbt\ + [list [list $desired_key\ + "$globaldata$one"]] + + set foundarray($num) 1 + + # Go forward by some number w/i dup set. + set inc [berkdb random_int 0 [expr $num - 1]] + for { set j 0 } { $j < $inc } { incr j } { + eval {$dbc get -nextdup} + } + } + + puts "\t\tTest$tnum.b.3 ($opt): Final key." + error_check_good last_db_get($opt) [$dbc get $dir] [list] + + # Verify + puts "\t\tTest$tnum.b.4 ($opt): Verify loop." + for { set i 1 } { $i <= $nitems } { incr i } { + error_check_good found_key($i) $foundarray($i) 1 + } + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + } +} diff --git a/test/tcl/test076.tcl b/test/tcl/test076.tcl new file mode 100644 index 00000000..40174466 --- /dev/null +++ b/test/tcl/test076.tcl @@ -0,0 +1,90 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test076 +# TEST Test creation of many small databases in a single environment. [#1528]. +proc test076 { method { ndbs 1000 } { tnum "076" } args } { + global is_qnx_test + source ./include.tcl + + set args [convert_args $method $args] + set encargs "" + set args [split_encargs $args encargs] + set omethod [convert_method $method] + set pageargs "" + split_pageargs $args pageargs + + if { [is_record_based $method] == 1 } { + set key "" + } else { + set key "key" + } + set data "datamoredatamoredata" + + # Create an env if we weren't passed one. + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + set deleteenv 1 + env_cleanup $testdir + set env [eval \ + {berkdb_env -create -home} $testdir $pageargs $encargs] + error_check_good env [is_valid_env $env] TRUE + set args "$args -env $env" + } else { + set deleteenv 0 + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + if { $ndbs == 1000 } { + set ndbs 100 + } + } + set testdir [get_home $env] + } + if { $is_qnx_test && $ndbs > 100 } { + set ndbs 100 + } + if { [is_queueext $method] } { + set ndbs 500 + } + + puts -nonewline "Test$tnum $method ($args): " + puts -nonewline "Create $ndbs" + puts " small databases in one env." + + cleanup $testdir $env + set txn "" + + for { set i 1 } { $i <= $ndbs } { incr i } { + set testfile test$tnum.$i.db + + set db [eval {berkdb_open -create -mode 0644}\ + $args $omethod $testfile] + error_check_good db_open($i) [is_valid_db $db] TRUE + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key$i \ + [chop_data $method $data$i]}] + error_check_good db_put($i) $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close($i) [$db close] 0 + } + + if { $deleteenv == 1 } { + error_check_good env_close [$env close] 0 + } + + puts "\tTest$tnum passed." +} diff --git a/test/tcl/test077.tcl b/test/tcl/test077.tcl new file mode 100644 index 00000000..e1f9c874 --- /dev/null +++ b/test/tcl/test077.tcl @@ -0,0 +1,92 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test077 +# TEST Test of DB_GET_RECNO [#1206]. +proc test077 { method { nkeys 1000 } { tnum "077" } args } { + source ./include.tcl + global alphabet + + set omethod [convert_method $method] + set args [convert_args $method $args] + + puts "Test$tnum: Test of DB_GET_RECNO." + + if { [is_rbtree $method] != 1 } { + puts "\tTest$tnum: Skipping for method $method." + return + } + + set data $alphabet + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + set db [eval {berkdb_open -create -mode 0644} \ + $omethod $args {$testfile}] + error_check_good db_open [is_valid_db $db] TRUE + + puts "\tTest$tnum.a: Populating database." + set txn "" + + for { set i 1 } { $i <= $nkeys } { incr i } { + set key [format %5d $i] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key $data}] + error_check_good db_put($key) $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + puts "\tTest$tnum.b: Verifying record numbers." + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good dbc_open [is_valid_cursor $dbc $db] TRUE + + set i 1 + for { set dbt [$dbc get -first] } \ + { [string length $dbt] != 0 } \ + { set dbt [$dbc get -next] } { + set recno [$dbc get -get_recno] + set keynum [expr [lindex [lindex $dbt 0] 0]] + + # Verify that i, the number that is the key, and recno + # are all equal. + error_check_good key($i) $keynum $i + error_check_good recno($i) $recno $i + incr i + } + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test078.tcl b/test/tcl/test078.tcl new file mode 100644 index 00000000..ae271ed2 --- /dev/null +++ b/test/tcl/test078.tcl @@ -0,0 +1,252 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test078 +# TEST Test of DBC->c_count(). [#303] +proc test078 { method { nkeys 100 } { pagesize 512 } { tnum "078" } args } { + source ./include.tcl + global alphabet + global is_je_test + global rand_init + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test$tnum ($method): Test of key counts." + + berkdb srand $rand_init + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + } + + if { $eindex == -1 } { + set testfile $testdir/test$tnum-a.db + set env NULL + } else { + set testfile test$tnum-a.db + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + set nkeys 50 + append args " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test078: skipping for specific pagesizes" + return + } + puts "\tTest$tnum.a: No duplicates, trivial answer." + puts "\t\tTest$tnum.a.1: Populate database, verify dup counts." + set db [eval {berkdb_open -create -mode 0644\ + -pagesize $pagesize} $omethod $args {$testfile}] + error_check_good db_open [is_valid_db $db] TRUE + set txn "" + + for { set i 1 } { $i <= $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$i\ + [pad_data $method $alphabet$i]}] + error_check_good put.a($i) $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good count.a [$db count $i] 1 + } + + if { [is_rrecno $method] == 1 } { + error_check_good db_close.a [$db close] 0 + puts "\tTest$tnum.a2: Skipping remainder of test078 for -rrecno." + return + } + + puts "\t\tTest$tnum.a.2: Delete items, verify dup counts again." + for { set i 1 } { $i <= $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db del} $txn $i] + error_check_good del.a($i) $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good count.a [$db count $i] 0 + } + + + error_check_good db_close.a [$db close] 0 + + if { [is_record_based $method] == 1 || [is_rbtree $method] == 1 } { + puts \ + "\tTest$tnum.b: Duplicates not supported in $method, skipping." + return + } + + foreach {let descrip dupopt} \ + {b sorted "-dup -dupsort" c unsorted "-dup"} { + + if { [is_compressed $args] } { + if { $dupopt == "-dup" } { + continue + } + } + if { $eindex == -1 } { + set testfile $testdir/test$tnum-b.db + set env NULL + } else { + set testfile test$tnum-b.db + set env [lindex $args $eindex] + if { $is_je_test } { + if { $dupopt == "-dup" } { + continue + } + } + set testdir [get_home $env] + } + cleanup $testdir $env + + puts "\tTest$tnum.$let: Duplicates ($descrip)." + puts "\t\tTest$tnum.$let.1: Populating database." + + set db [eval {berkdb_open -create -mode 0644\ + -pagesize $pagesize} $dupopt $omethod $args {$testfile}] + error_check_good db_open [is_valid_db $db] TRUE + + for { set i 1 } { $i <= $nkeys } { incr i } { + for { set j 0 } { $j < $i } { incr j } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn \ + [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$i\ + [pad_data $method $j$alphabet]}] + error_check_good put.$let,$i $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + } + + puts -nonewline "\t\tTest$tnum.$let.2: " + puts "Verifying duplicate counts." +$db sync + for { set i 1 } { $i <= $nkeys } { incr i } { + error_check_good count.$let,$i \ + [$db count $i] $i + } + + puts -nonewline "\t\tTest$tnum.$let.3: " + puts "Delete every other dup by cursor, verify counts." + + # Delete every other item by cursor and check counts. + for { set i 1 } { $i <= $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set c [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $c $db] TRUE + set j 0 + + for { set ret [$c get -first]} { [llength $ret] > 0 } \ + { set ret [$c get -next]} { + set key [lindex [lindex $ret 0] 0] + if { $key == $i } { + set data [lindex [lindex $ret 0 ] 1] + set num [string range $data 0 \ + end-[string length $alphabet]] + if { [expr $num % 2] == 0 } { + error_check_good \ + c_del [$c del] 0 + incr j + } + if { $txnenv == 0 } { + error_check_good count.$let.$i-$j \ + [$db count $i] [expr $i - $j] + } + } + } + error_check_good curs_close [$c close] 0 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good count.$let.$i-$j \ + [$db count $i] [expr $i - $j] + } + + puts -nonewline "\t\tTest$tnum.$let.4: " + puts "Delete all items by cursor, verify counts." + for { set i 1 } { $i <= $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set c [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $c $db] TRUE + for { set ret [$c get -first]} { [llength $ret] > 0 } \ + { set ret [$c get -next]} { + set key [lindex [lindex $ret 0] 0] + if { $key == $i } { + error_check_good c_del [$c del] 0 + } + } + error_check_good curs_close [$c close] 0 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good db_count_zero [$db count $i] 0 + } + + puts -nonewline "\t\tTest$tnum.$let.5: " + puts "Add back one item, verify counts." + for { set i 1 } { $i <= $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$i\ + [pad_data $method $alphabet]}] + error_check_good put.$let,$i $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good add_one [$db count $i] 1 + } + + puts -nonewline "\t\tTest$tnum.$let.6: " + puts "Delete remaining entries, verify counts." + for { set i 1 } { $i <= $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + error_check_good db_del [eval {$db del} $txn {$i}] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good count.$let.$i [$db count $i] 0 + } + error_check_good db_close.$let [$db close] 0 + } +} diff --git a/test/tcl/test079.tcl b/test/tcl/test079.tcl new file mode 100644 index 00000000..3e27fff0 --- /dev/null +++ b/test/tcl/test079.tcl @@ -0,0 +1,28 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test079 +# TEST Test of deletes in large trees. (test006 w/ sm. pagesize). +# TEST +# TEST Check that delete operations work in large btrees. 10000 entries +# TEST and a pagesize of 512 push this out to a four-level btree, with a +# TEST small fraction of the entries going on overflow pages. +proc test079 { method {nentries 10000} {pagesize 512} {tnum "079"} \ + {ndups 20} args} { + if { [ is_queueext $method ] == 1 } { + set method "queue"; + lappend args "-extent" "20" + } + + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test$tnum: skipping for specific pagesizes" + return + } + + eval {test006 $method $nentries 1 $tnum $ndups -pagesize \ + $pagesize} $args +} diff --git a/test/tcl/test081.tcl b/test/tcl/test081.tcl new file mode 100644 index 00000000..adf14e8f --- /dev/null +++ b/test/tcl/test081.tcl @@ -0,0 +1,14 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test081 +# TEST Test off-page duplicates and overflow pages together with +# TEST very large keys (key/data as file contents). +proc test081 { method {ndups 13} {tnum "081"} args} { + source ./include.tcl + + eval {test017 $method 1 $ndups $tnum} $args +} diff --git a/test/tcl/test082.tcl b/test/tcl/test082.tcl new file mode 100644 index 00000000..62823356 --- /dev/null +++ b/test/tcl/test082.tcl @@ -0,0 +1,13 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test082 +# TEST Test of DB_PREV_NODUP (uses test074). +proc test082 { method {dir -prevnodup} {nitems 100} {tnum "082"} args} { + source ./include.tcl + + eval {test074 $method $dir $nitems $tnum} $args +} diff --git a/test/tcl/test083.tcl b/test/tcl/test083.tcl new file mode 100644 index 00000000..9adcb4e3 --- /dev/null +++ b/test/tcl/test083.tcl @@ -0,0 +1,171 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test083 +# TEST Test of DB->key_range. +proc test083 { method {pgsz 512} {maxitems 5000} {step 2} args} { + source ./include.tcl + + global rand_init + error_check_good set_random_seed [berkdb srand $rand_init] 0 + + set omethod [convert_method $method] + set args [convert_args $method $args] + + puts "Test083 $method ($args): Test of DB->key_range" + if { [is_btree $method] != 1 } { + puts "\tTest083: Skipping for method $method." + return + } + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test083: skipping for specific pagesizes" + return + } + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + set testfile $testdir/test083.db + set env NULL + } else { + set testfile test083.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + # We assume that numbers will be at most six digits wide + error_check_bad maxitems_range [expr $maxitems > 999999] 1 + + # We want to test key_range on a variety of sizes of btree. + # Start at ten keys and work up to $maxitems keys, at each step + # multiplying the number of keys by $step. + for { set nitems 10 } { $nitems <= $maxitems }\ + { set nitems [expr $nitems * $step] } { + + puts "\tTest083.a: Opening new database" + if { $env != "NULL"} { + set testdir [get_home $env] + } + cleanup $testdir $env + set db [eval {berkdb_open -create -mode 0644} \ + -pagesize $pgsz $omethod $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + t83_build $db $nitems $env $txnenv + t83_test $db $nitems $env $txnenv $args + + error_check_good db_close [$db close] 0 + } +} + +proc t83_build { db nitems env txnenv } { + source ./include.tcl + + puts "\tTest083.b: Populating database with $nitems keys" + + set keylist {} + puts "\t\tTest083.b.1: Generating key list" + for { set i 0 } { $i < $nitems } { incr i } { + lappend keylist $i + } + + # With randomly ordered insertions, the range of errors we + # get from key_range can be unpredictably high [#2134]. For now, + # just skip the randomization step. + #puts "\t\tTest083.b.2: Randomizing key list" + #set keylist [randomize_list $keylist] + #puts "\t\tTest083.b.3: Populating database with randomized keys" + + puts "\t\tTest083.b.2: Populating database" + set data [repeat . 50] + set txn "" + foreach keynum $keylist { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {key[format %6d $keynum] $data}] + error_check_good db_put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } +} + +proc t83_test { db nitems env txnenv args} { + # Look at the first key, then at keys about 1/4, 1/2, 3/4, and + # all the way through the database. Make sure the key_ranges + # aren't off by more than 10%. + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } else { + set txn "" + } + set dbc [eval {$db cursor} $txn] + error_check_good dbc [is_valid_cursor $dbc $db] TRUE + + puts "\tTest083.c: Verifying ranges..." + + # Wild guess. "Tolerance" tests how close the key is to + # its expected position. "Sumtol" tests the sum of the + # "less than", "equal to", and "more than", which is + # expected to be around 1. + + if { [is_compressed $args] == 1 } { + set tolerance 0.5 + set sumtol 0.3 + } else { + set tolerance 0.3 + set sumtol 0.05 + } + + for { set i 0 } { $i < $nitems } \ + { incr i [expr $nitems / [berkdb random_int 3 16]] } { + puts -nonewline "\t\t...key $i" + error_check_bad key0 [llength [set dbt [$dbc get -first]]] 0 + + for { set j 0 } { $j < $i } { incr j } { + error_check_bad key$j \ + [llength [set dbt [$dbc get -next]]] 0 + } + + set ranges [$db keyrange [lindex [lindex $dbt 0] 0]] + #puts "ranges is $ranges" + error_check_good howmanyranges [llength $ranges] 3 + + set lessthan [lindex $ranges 0] + set morethan [lindex $ranges 2] + + puts -nonewline " ... sum of ranges" + set rangesum [expr $lessthan + [lindex $ranges 1] + $morethan] + roughly_equal $rangesum 1 $sumtol + + puts "... position of key." + roughly_equal $lessthan [expr $i * 1.0 / $nitems] $tolerance + + } + + error_check_good dbc_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } +} + +proc roughly_equal { a b tol } { + error_check_good "$a =~ $b" [expr abs($a - $b) < $tol] 1 +} diff --git a/test/tcl/test084.tcl b/test/tcl/test084.tcl new file mode 100644 index 00000000..c62f8f8c --- /dev/null +++ b/test/tcl/test084.tcl @@ -0,0 +1,52 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test084 +# TEST Basic sanity test (test001) with large (64K) pages. +proc test084 { method {nentries 10000} {tnum "084"} {pagesize 65536} args} { + source ./include.tcl + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum-empty.db + set env NULL + } else { + set testfile test$tnum-empty.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test084: skipping for specific pagesizes" + return + } + + cleanup $testdir $env + + set args "-pagesize $pagesize $args" + + eval {test001 $method $nentries 0 0 $tnum} $args + + set omethod [convert_method $method] + set args [convert_args $method $args] + + # For good measure, create a second database that's empty + # with the large page size. (There was a verifier bug that + # choked on empty 64K pages. [#2408]) + set db [eval {berkdb_open -create -mode 0644} $args $omethod $testfile] + error_check_good empty_db [is_valid_db $db] TRUE + error_check_good empty_db_close [$db close] 0 +} diff --git a/test/tcl/test085.tcl b/test/tcl/test085.tcl new file mode 100644 index 00000000..998752e7 --- /dev/null +++ b/test/tcl/test085.tcl @@ -0,0 +1,340 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test085 +# TEST Test of cursor behavior when a cursor is pointing to a deleted +# TEST btree key which then has duplicates added. [#2473] +proc test085 { method {pagesize 512} {onp 3} {offp 10} {tnum "085"} args } { + source ./include.tcl + global alphabet + + set omethod [convert_method $method] + set args [convert_args $method $args] + set encargs "" + set args [split_encargs $args encargs] + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test085: skipping for specific pagesizes" + return + } + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + cleanup $testdir $env + + # Keys must sort $prekey < $key < $postkey. + set prekey "AA" + set key "BBB" + set postkey "CCCC" + + # Make these distinguishable from each other and from the + # alphabets used for the $key's data. + set predatum "1234567890" + set datum $alphabet + set postdatum "0987654321" + set txn "" + + append args " -pagesize $pagesize -dup" + + puts -nonewline "Test$tnum $omethod ($args): " + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test$tnum skipping for btree with compression." + return + } + # Skip for all non-btrees. (Rbtrees don't count as btrees, for + # now, since they don't support dups.) + if { [is_btree $method] != 1 } { + puts "Skipping for method $method." + return + } else { + puts "Duplicates w/ deleted item cursor." + } + + # Repeat the test with both on-page and off-page numbers of dups. + foreach ndups "$onp $offp" { + # Put operations we want to test on a cursor set to the + # deleted item, the key to use with them, and what should + # come before and after them given a placement of + # the deleted item at the beginning or end of the dupset. + set final [expr $ndups - 1] + set putops { + {{-before} "" $predatum {[test085_ddatum 0]} beginning} + {{-before} "" {[test085_ddatum $final]} $postdatum end} + {{-keyfirst} $key $predatum {[test085_ddatum 0]} beginning} + {{-keyfirst} $key $predatum {[test085_ddatum 0]} end} + {{-keylast} $key {[test085_ddatum $final]} $postdatum beginning} + {{-keylast} $key {[test085_ddatum $final]} $postdatum end} + {{-after} "" $predatum {[test085_ddatum 0]} beginning} + {{-after} "" {[test085_ddatum $final]} $postdatum end} + } + + # Get operations we want to test on a cursor set to the + # deleted item, any args to get, and the expected key/data pair. + set getops { + {{-current} "" "" "" beginning} + {{-current} "" "" "" end} + {{-next} "" $key {[test085_ddatum 0]} beginning} + {{-next} "" $postkey $postdatum end} + {{-prev} "" $prekey $predatum beginning} + {{-prev} "" $key {[test085_ddatum $final]} end} + {{-first} "" $prekey $predatum beginning} + {{-first} "" $prekey $predatum end} + {{-last} "" $postkey $postdatum beginning} + {{-last} "" $postkey $postdatum end} + {{-nextdup} "" $key {[test085_ddatum 0]} beginning} + {{-nextdup} "" EMPTYLIST "" end} + {{-nextnodup} "" $postkey $postdatum beginning} + {{-nextnodup} "" $postkey $postdatum end} + {{-prevnodup} "" $prekey $predatum beginning} + {{-prevnodup} "" $prekey $predatum end} + } + + set txn "" + foreach pair $getops { + set op [lindex $pair 0] + puts "\tTest$tnum: Get ($op) with $ndups duplicates,\ + cursor at the [lindex $pair 4]." + set db [eval {berkdb_open -create \ + -mode 0644} $omethod $encargs $args $testfile] + error_check_good "db open" [is_valid_db $db] TRUE + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn \ + [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [test085_setup $db $txn] + + set beginning [expr [string compare \ + [lindex $pair 4] "beginning"] == 0] + + for { set i 0 } { $i < $ndups } { incr i } { + if { $beginning } { + error_check_good db_put($i) \ + [eval {$db put} $txn \ + {$key [test085_ddatum $i]}] 0 + } else { + set c [eval {$db cursor} $txn] + set j [expr $ndups - $i - 1] + error_check_good db_cursor($j) \ + [is_valid_cursor $c $db] TRUE + set d [test085_ddatum $j] + error_check_good dbc_put($j) \ + [$c put -keyfirst $key $d] 0 + error_check_good c_close [$c close] 0 + } + } + + set gargs [lindex $pair 1] + set ekey "" + set edata "" + eval set ekey [lindex $pair 2] + eval set edata [lindex $pair 3] + + set dbt [eval $dbc get $op $gargs] + if { [string compare $ekey EMPTYLIST] == 0 || \ + [string compare $op -current] == 0 } { + error_check_good dbt($op,$ndups) \ + [llength $dbt] 0 + } else { + error_check_good dbt($op,$ndups) $dbt \ + [list [list $ekey $edata]] + } + error_check_good "dbc close" [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good "db close" [$db close] 0 + verify_dir $testdir "\t\t" 0 0 $nodump + + # Remove testfile so we can do without truncate flag. + # This is okay because we've already done verify and + # dump/load. + if { $env == "NULL" } { + set ret [eval {berkdb dbremove} \ + $encargs $testfile] + } elseif { $txnenv == 1 } { + set ret [eval "$env dbremove" \ + -auto_commit $encargs $testfile] + } else { + set ret [eval {berkdb dbremove} \ + -env $env $encargs $testfile] + } + error_check_good dbremove $ret 0 + + } + + foreach pair $putops { + # Open and set up database. + set op [lindex $pair 0] + puts "\tTest$tnum: Put ($op) with $ndups duplicates,\ + cursor at the [lindex $pair 4]." + set db [eval {berkdb_open -create \ + -mode 0644} $omethod $args $encargs $testfile] + error_check_good "db open" [is_valid_db $db] TRUE + + set beginning [expr [string compare \ + [lindex $pair 4] "beginning"] == 0] + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [test085_setup $db $txn] + + # Put duplicates. + for { set i 0 } { $i < $ndups } { incr i } { + if { $beginning } { + error_check_good db_put($i) \ + [eval {$db put} $txn \ + {$key [test085_ddatum $i]}] 0 + } else { + set c [eval {$db cursor} $txn] + set j [expr $ndups - $i - 1] + error_check_good db_cursor($j) \ + [is_valid_cursor $c $db] TRUE + set d [test085_ddatum $j] + error_check_good dbc_put($j) \ + [$c put -keyfirst $key $d] 0 + error_check_good c_close [$c close] 0 + } + } + + # Set up cursors for stability test. + set pre_dbc [eval {$db cursor} $txn] + error_check_good pre_set [$pre_dbc get -set $prekey] \ + [list [list $prekey $predatum]] + set post_dbc [eval {$db cursor} $txn] + error_check_good post_set [$post_dbc get -set $postkey]\ + [list [list $postkey $postdatum]] + set first_dbc [eval {$db cursor} $txn] + error_check_good first_set \ + [$first_dbc get -get_both $key [test085_ddatum 0]] \ + [list [list $key [test085_ddatum 0]]] + set last_dbc [eval {$db cursor} $txn] + error_check_good last_set \ + [$last_dbc get -get_both $key [test085_ddatum \ + [expr $ndups - 1]]] \ + [list [list $key [test085_ddatum [expr $ndups -1]]]] + + set k [lindex $pair 1] + set d_before "" + set d_after "" + eval set d_before [lindex $pair 2] + eval set d_after [lindex $pair 3] + set newdatum "NewDatum" + error_check_good dbc_put($op,$ndups) \ + [eval $dbc put $op $k $newdatum] 0 + error_check_good dbc_prev($op,$ndups) \ + [lindex [lindex [$dbc get -prev] 0] 1] \ + $d_before + error_check_good dbc_current($op,$ndups) \ + [lindex [lindex [$dbc get -next] 0] 1] \ + $newdatum + + error_check_good dbc_next($op,$ndups) \ + [lindex [lindex [$dbc get -next] 0] 1] \ + $d_after + + # Verify stability of pre- and post- cursors. + error_check_good pre_stable [$pre_dbc get -current] \ + [list [list $prekey $predatum]] + error_check_good post_stable [$post_dbc get -current] \ + [list [list $postkey $postdatum]] + error_check_good first_stable \ + [$first_dbc get -current] \ + [list [list $key [test085_ddatum 0]]] + error_check_good last_stable \ + [$last_dbc get -current] \ + [list [list $key [test085_ddatum [expr $ndups -1]]]] + + foreach c "$pre_dbc $post_dbc $first_dbc $last_dbc" { + error_check_good ${c}_close [$c close] 0 + } + + error_check_good "dbc close" [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good "db close" [$db close] 0 + verify_dir $testdir "\t\t" 0 0 $nodump + + # Remove testfile so we can do without truncate flag. + # This is okay because we've already done verify and + # dump/load. + if { $env == "NULL" } { + set ret [eval {berkdb dbremove} \ + $encargs $testfile] + } elseif { $txnenv == 1 } { + set ret [eval "$env dbremove" \ + -auto_commit $encargs $testfile] + } else { + set ret [eval {berkdb dbremove} \ + -env $env $encargs $testfile] + } + error_check_good dbremove $ret 0 + } + } +} + +# Set up the test database; put $prekey, $key, and $postkey with their +# respective data, and then delete $key with a new cursor. Return that +# cursor, still pointing to the deleted item. +proc test085_setup { db txn } { + upvar key key + upvar prekey prekey + upvar postkey postkey + upvar predatum predatum + upvar postdatum postdatum + + # no one else should ever see this one! + set datum "bbbbbbbb" + + error_check_good pre_put [eval {$db put} $txn {$prekey $predatum}] 0 + error_check_good main_put [eval {$db put} $txn {$key $datum}] 0 + error_check_good post_put [eval {$db put} $txn {$postkey $postdatum}] 0 + + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + error_check_good dbc_getset [$dbc get -get_both $key $datum] \ + [list [list $key $datum]] + + error_check_good dbc_del [$dbc del] 0 + + return $dbc +} + +proc test085_ddatum { a } { + global alphabet + return $a$alphabet +} diff --git a/test/tcl/test086.tcl b/test/tcl/test086.tcl new file mode 100644 index 00000000..b463c606 --- /dev/null +++ b/test/tcl/test086.tcl @@ -0,0 +1,168 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test086 +# TEST Test of cursor stability across btree splits/rsplits with +# TEST subtransaction aborts (a variant of test048). [#2373] +proc test086 { method args } { + global errorCode + source ./include.tcl + + set tnum 086 + set args [convert_args $method $args] + set encargs "" + set args [split_encargs $args encargs] + set pageargs "" + split_pageargs $args pageargs + + if { [is_btree $method] != 1 } { + puts "Test$tnum skipping for method $method." + return + } + + set method "-btree" + + puts "\tTest$tnum: Test of cursor stability across aborted\ + btree splits." + + set key "key" + set data "data" + set txn "" + set flags "" + + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then this test won't work. + if { $eindex == -1 } { + # But we will be using our own env... + set testfile test$tnum.db + } else { + puts "\tTest$tnum: Environment provided; skipping test." + return + } + set t1 $testdir/t1 + env_cleanup $testdir + + set env [eval \ + {berkdb_env -create -home $testdir -txn} $pageargs $encargs] + error_check_good berkdb_env [is_valid_env $env] TRUE + + puts "\tTest$tnum.a: Create $method database." + set oflags "-auto_commit -create -env $env -mode 0644 $args $method" + set db [eval {berkdb_open} $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set nkeys 5 + # Fill page w/ small key/data pairs, keep at leaf + # + puts "\tTest$tnum.b: Fill page with $nkeys small key/data pairs." + set txn [$env txn] + error_check_good txn [is_valid_txn $txn $env] TRUE + for { set i 0 } { $i < $nkeys } { incr i } { + set ret [$db put -txn $txn key000$i $data$i] + error_check_good dbput $ret 0 + } + error_check_good commit [$txn commit] 0 + + # get db ordering, set cursors + puts "\tTest$tnum.c: Set cursors on each of $nkeys pairs." + set txn [$env txn] + error_check_good txn [is_valid_txn $txn $env] TRUE + for {set i 0; set ret [$db get -txn $txn key000$i]} {\ + $i < $nkeys && [llength $ret] != 0} {\ + incr i; set ret [$db get -txn $txn key000$i]} { + set key_set($i) [lindex [lindex $ret 0] 0] + set data_set($i) [lindex [lindex $ret 0] 1] + set dbc [$db cursor -txn $txn] + set dbc_set($i) $dbc + error_check_good db_cursor:$i [is_substr $dbc_set($i) $db] 1 + set ret [$dbc_set($i) get -set $key_set($i)] + error_check_bad dbc_set($i)_get:set [llength $ret] 0 + } + + # Create child txn. + set ctxn [$env txn -parent $txn] + error_check_good ctxn [is_valid_txn $txn $env] TRUE + + # if mkeys is above 1000, need to adjust below for lexical order + set mkeys 1000 + puts "\tTest$tnum.d: Add $mkeys pairs to force split." + for {set i $nkeys} { $i < $mkeys } { incr i } { + if { $i >= 100 } { + set ret [$db put -txn $ctxn key0$i $data$i] + } elseif { $i >= 10 } { + set ret [$db put -txn $ctxn key00$i $data$i] + } else { + set ret [$db put -txn $ctxn key000$i $data$i] + } + error_check_good dbput:more $ret 0 + } + + puts "\tTest$tnum.e: Abort." + error_check_good ctxn_abort [$ctxn abort] 0 + + puts "\tTest$tnum.f: Check and see that cursors maintained reference." + for {set i 0} { $i < $nkeys } {incr i} { + set ret [$dbc_set($i) get -current] + error_check_bad dbc$i:get:current [llength $ret] 0 + set ret2 [$dbc_set($i) get -set $key_set($i)] + error_check_bad dbc$i:get:set [llength $ret2] 0 + error_check_good dbc$i:get(match) $ret $ret2 + } + + # Put (and this time keep) the keys that caused the split. + # We'll delete them to test reverse splits. + puts "\tTest$tnum.g: Put back added keys." + for {set i $nkeys} { $i < $mkeys } { incr i } { + if { $i >= 100 } { + set ret [$db put -txn $txn key0$i $data$i] + } elseif { $i >= 10 } { + set ret [$db put -txn $txn key00$i $data$i] + } else { + set ret [$db put -txn $txn key000$i $data$i] + } + error_check_good dbput:more $ret 0 + } + + puts "\tTest$tnum.h: Delete added keys to force reverse split." + set ctxn [$env txn -parent $txn] + error_check_good ctxn [is_valid_txn $txn $env] TRUE + for {set i $nkeys} { $i < $mkeys } { incr i } { + if { $i >= 100 } { + error_check_good db_del:$i [$db del -txn $ctxn key0$i] 0 + } elseif { $i >= 10 } { + error_check_good db_del:$i \ + [$db del -txn $ctxn key00$i] 0 + } else { + error_check_good db_del:$i \ + [$db del -txn $ctxn key000$i] 0 + } + } + + puts "\tTest$tnum.i: Abort." + error_check_good ctxn_abort [$ctxn abort] 0 + + puts "\tTest$tnum.j: Verify cursor reference." + for {set i 0} { $i < $nkeys } {incr i} { + set ret [$dbc_set($i) get -current] + error_check_bad dbc$i:get:current [llength $ret] 0 + set ret2 [$dbc_set($i) get -set $key_set($i)] + error_check_bad dbc$i:get:set [llength $ret2] 0 + error_check_good dbc$i:get(match) $ret $ret2 + } + + puts "\tTest$tnum.j: Cleanup." + # close cursors + for {set i 0} { $i < $nkeys } {incr i} { + error_check_good dbc_close:$i [$dbc_set($i) close] 0 + } + + error_check_good commit [$txn commit] 0 + error_check_good dbclose [$db close] 0 + error_check_good envclose [$env close] 0 + + puts "\tTest$tnum complete." +} diff --git a/test/tcl/test087.tcl b/test/tcl/test087.tcl new file mode 100644 index 00000000..284b738b --- /dev/null +++ b/test/tcl/test087.tcl @@ -0,0 +1,293 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test087 +# TEST Test of cursor stability when converting to and modifying +# TEST off-page duplicate pages with subtransaction aborts. [#2373] +# TEST +# TEST Does the following: +# TEST a. Initialize things by DB->putting ndups dups and +# TEST setting a reference cursor to point to each. Do each put twice, +# TEST first aborting, then committing, so we're sure to abort the move +# TEST to off-page dups at some point. +# TEST b. c_put ndups dups (and correspondingly expanding +# TEST the set of reference cursors) after the last one, making sure +# TEST after each step that all the reference cursors still point to +# TEST the right item. +# TEST c. Ditto, but before the first one. +# TEST d. Ditto, but after each one in sequence first to last. +# TEST e. Ditto, but after each one in sequence from last to first. +# TEST occur relative to the new datum) +# TEST f. Ditto for the two sequence tests, only doing a +# TEST DBC->c_put(DB_CURRENT) of a larger datum instead of adding a +# TEST new one. +proc test087 { method {pagesize 512} {ndups 50} {tnum "087"} args } { + source ./include.tcl + global alphabet + + set args [convert_args $method $args] + set encargs "" + set args [split_encargs $args encargs] + set omethod [convert_method $method] + + puts "Test$tnum $omethod ($args): " + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then return + if { $eindex != -1 } { + puts "Environment specified; skipping." + return + } + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test087: skipping for specific pagesizes" + return + } + env_cleanup $testdir + set testfile test$tnum.db + set key "the key" + append args " -pagesize $pagesize -dup" + + if { [is_record_based $method] || [is_rbtree $method] } { + puts "Skipping for method $method." + return + } elseif { [is_compressed $args] == 1 } { + puts "Test$tnum skipping for btree with compression." + return + } else { + puts "Test$tnum: Cursor stability on dup. pages w/ aborts." + } + + set env [eval {berkdb_env \ + -create -home $testdir -txn -pagesize $pagesize} $encargs] + error_check_good env_create [is_valid_env $env] TRUE + + set db [eval {berkdb_open -auto_commit \ + -create -env $env -mode 0644} $omethod $args $testfile] + error_check_good "db open" [is_valid_db $db] TRUE + + # Number of outstanding keys. + set keys $ndups + + puts "\tTest$tnum.a: put/abort/put/commit loop;\ + $ndups dups, short data." + set txn [$env txn] + error_check_good txn [is_valid_txn $txn $env] TRUE + for { set i 0 } { $i < $ndups } { incr i } { + set datum [makedatum_t73 $i 0] + + set ctxn [$env txn -parent $txn] + error_check_good ctxn(abort,$i) [is_valid_txn $ctxn $env] TRUE + error_check_good "db put/abort ($i)" \ + [$db put -txn $ctxn $key $datum] 0 + error_check_good ctxn_abort($i) [$ctxn abort] 0 + + verify_t73 is_long dbc [expr $i - 1] $key + + set ctxn [$env txn -parent $txn] + error_check_good ctxn(commit,$i) [is_valid_txn $ctxn $env] TRUE + error_check_good "db put/commit ($i)" \ + [$db put -txn $ctxn $key $datum] 0 + error_check_good ctxn_commit($i) [$ctxn commit] 0 + + set is_long($i) 0 + + set dbc($i) [$db cursor -txn $txn] + error_check_good "db cursor ($i)"\ + [is_valid_cursor $dbc($i) $db] TRUE + error_check_good "dbc get -get_both ($i)"\ + [$dbc($i) get -get_both $key $datum]\ + [list [list $key $datum]] + + verify_t73 is_long dbc $i $key + } + + puts "\tTest$tnum.b: Cursor put (DB_KEYLAST); $ndups new dups,\ + short data." + + set ctxn [$env txn -parent $txn] + error_check_good ctxn($i) [is_valid_txn $ctxn $env] TRUE + for { set i 0 } { $i < $ndups } { incr i } { + # !!! keys contains the number of the next dup + # to be added (since they start from zero) + set datum [makedatum_t73 $keys 0] + set curs [$db cursor -txn $ctxn] + error_check_good "db cursor create" [is_valid_cursor $curs $db]\ + TRUE + error_check_good "c_put(DB_KEYLAST, $keys)"\ + [$curs put -keylast $key $datum] 0 + + # We can't do a verification while a child txn is active, + # or we'll run into trouble when DEBUG_ROP is enabled. + # If this test has trouble, though, uncommenting this + # might be illuminating--it makes things a bit more rigorous + # and works fine when DEBUG_ROP is not enabled. + # verify_t73 is_long dbc $keys $key + error_check_good curs_close [$curs close] 0 + } + error_check_good ctxn_abort [$ctxn abort] 0 + verify_t73 is_long dbc $keys $key + + puts "\tTest$tnum.c: Cursor put (DB_KEYFIRST); $ndups new dups,\ + short data." + + set ctxn [$env txn -parent $txn] + error_check_good ctxn($i) [is_valid_txn $ctxn $env] TRUE + for { set i 0 } { $i < $ndups } { incr i } { + # !!! keys contains the number of the next dup + # to be added (since they start from zero) + + set datum [makedatum_t73 $keys 0] + set curs [$db cursor -txn $ctxn] + error_check_good "db cursor create" [is_valid_cursor $curs $db]\ + TRUE + error_check_good "c_put(DB_KEYFIRST, $keys)"\ + [$curs put -keyfirst $key $datum] 0 + + # verify_t73 is_long dbc $keys $key + error_check_good curs_close [$curs close] 0 + } + # verify_t73 is_long dbc $keys $key + # verify_t73 is_long dbc $keys $key + error_check_good ctxn_abort [$ctxn abort] 0 + verify_t73 is_long dbc $keys $key + + puts "\tTest$tnum.d: Cursor put (DB_AFTER) first to last;\ + $keys new dups, short data" + # We want to add a datum after each key from 0 to the current + # value of $keys, which we thus need to save. + set ctxn [$env txn -parent $txn] + error_check_good ctxn($i) [is_valid_txn $ctxn $env] TRUE + set keysnow $keys + for { set i 0 } { $i < $keysnow } { incr i } { + set datum [makedatum_t73 $keys 0] + set curs [$db cursor -txn $ctxn] + error_check_good "db cursor create" [is_valid_cursor $curs $db]\ + TRUE + + # Which datum to insert this guy after. + set curdatum [makedatum_t73 $i 0] + error_check_good "c_get(DB_GET_BOTH, $i)"\ + [$curs get -get_both $key $curdatum]\ + [list [list $key $curdatum]] + error_check_good "c_put(DB_AFTER, $i)"\ + [$curs put -after $datum] 0 + + # verify_t73 is_long dbc $keys $key + error_check_good curs_close [$curs close] 0 + } + error_check_good ctxn_abort [$ctxn abort] 0 + verify_t73 is_long dbc $keys $key + + puts "\tTest$tnum.e: Cursor put (DB_BEFORE) last to first;\ + $keys new dups, short data" + set ctxn [$env txn -parent $txn] + error_check_good ctxn($i) [is_valid_txn $ctxn $env] TRUE + for { set i [expr $keys - 1] } { $i >= 0 } { incr i -1 } { + set datum [makedatum_t73 $keys 0] + set curs [$db cursor -txn $ctxn] + error_check_good "db cursor create" [is_valid_cursor $curs $db]\ + TRUE + + # Which datum to insert this guy before. + set curdatum [makedatum_t73 $i 0] + error_check_good "c_get(DB_GET_BOTH, $i)"\ + [$curs get -get_both $key $curdatum]\ + [list [list $key $curdatum]] + error_check_good "c_put(DB_BEFORE, $i)"\ + [$curs put -before $datum] 0 + + # verify_t73 is_long dbc $keys $key + error_check_good curs_close [$curs close] 0 + } + error_check_good ctxn_abort [$ctxn abort] 0 + verify_t73 is_long dbc $keys $key + + puts "\tTest$tnum.f: Cursor put (DB_CURRENT), first to last,\ + growing $keys data." + set ctxn [$env txn -parent $txn] + error_check_good ctxn($i) [is_valid_txn $ctxn $env] TRUE + for { set i 0 } { $i < $keysnow } { incr i } { + set olddatum [makedatum_t73 $i 0] + set newdatum [makedatum_t73 $i 1] + set curs [$db cursor -txn $ctxn] + error_check_good "db cursor create" [is_valid_cursor $curs $db]\ + TRUE + + error_check_good "c_get(DB_GET_BOTH, $i)"\ + [$curs get -get_both $key $olddatum]\ + [list [list $key $olddatum]] + error_check_good "c_put(DB_CURRENT, $i)"\ + [$curs put -current $newdatum] 0 + + set is_long($i) 1 + + # verify_t73 is_long dbc $keys $key + error_check_good curs_close [$curs close] 0 + } + error_check_good ctxn_abort [$ctxn abort] 0 + for { set i 0 } { $i < $keysnow } { incr i } { + set is_long($i) 0 + } + verify_t73 is_long dbc $keys $key + + # Now delete the first item, abort the deletion, and make sure + # we're still sane. + puts "\tTest$tnum.g: Cursor delete first item, then abort delete." + set ctxn [$env txn -parent $txn] + error_check_good ctxn($i) [is_valid_txn $ctxn $env] TRUE + set curs [$db cursor -txn $ctxn] + error_check_good "db cursor create" [is_valid_cursor $curs $db] TRUE + set datum [makedatum_t73 0 0] + error_check_good "c_get(DB_GET_BOTH, 0)"\ + [$curs get -get_both $key $datum] [list [list $key $datum]] + error_check_good "c_del(0)" [$curs del] 0 + error_check_good curs_close [$curs close] 0 + error_check_good ctxn_abort [$ctxn abort] 0 + verify_t73 is_long dbc $keys $key + + # Ditto, for the last item. + puts "\tTest$tnum.h: Cursor delete last item, then abort delete." + set ctxn [$env txn -parent $txn] + error_check_good ctxn($i) [is_valid_txn $ctxn $env] TRUE + set curs [$db cursor -txn $ctxn] + error_check_good "db cursor create" [is_valid_cursor $curs $db] TRUE + set datum [makedatum_t73 [expr $keys - 1] 0] + error_check_good "c_get(DB_GET_BOTH, [expr $keys - 1])"\ + [$curs get -get_both $key $datum] [list [list $key $datum]] + error_check_good "c_del(0)" [$curs del] 0 + error_check_good curs_close [$curs close] 0 + error_check_good ctxn_abort [$ctxn abort] 0 + verify_t73 is_long dbc $keys $key + + # Ditto, for all the items. + puts "\tTest$tnum.i: Cursor delete all items, then abort delete." + set ctxn [$env txn -parent $txn] + error_check_good ctxn($i) [is_valid_txn $ctxn $env] TRUE + set curs [$db cursor -txn $ctxn] + error_check_good "db cursor create" [is_valid_cursor $curs $db] TRUE + set datum [makedatum_t73 0 0] + error_check_good "c_get(DB_GET_BOTH, 0)"\ + [$curs get -get_both $key $datum] [list [list $key $datum]] + error_check_good "c_del(0)" [$curs del] 0 + for { set i 1 } { $i < $keys } { incr i } { + error_check_good "c_get(DB_NEXT, $i)"\ + [$curs get -next] [list [list $key [makedatum_t73 $i 0]]] + error_check_good "c_del($i)" [$curs del] 0 + } + error_check_good curs_close [$curs close] 0 + error_check_good ctxn_abort [$ctxn abort] 0 + verify_t73 is_long dbc $keys $key + + # Close cursors. + puts "\tTest$tnum.j: Closing cursors." + for { set i 0 } { $i < $keys } { incr i } { + error_check_good "dbc close ($i)" [$dbc($i) close] 0 + } + error_check_good txn_commit [$txn commit] 0 + error_check_good "db close" [$db close] 0 + error_check_good "env close" [$env close] 0 +} diff --git a/test/tcl/test088.tcl b/test/tcl/test088.tcl new file mode 100644 index 00000000..7d978e69 --- /dev/null +++ b/test/tcl/test088.tcl @@ -0,0 +1,176 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test088 +# TEST Test of cursor stability across btree splits with very +# TEST deep trees (a variant of test048). [#2514] +proc test088 { method args } { + source ./include.tcl + global alphabet + global errorCode + global is_je_test + + set tstn 088 + set args [convert_args $method $args] + + if { [is_btree $method] != 1 } { + puts "Test$tstn skipping for method $method." + return + } + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test088: skipping for specific pagesizes" + return + } + + set method "-btree" + + puts "\tTest$tstn: Test of cursor stability across btree splits." + + set key "key$alphabet$alphabet$alphabet" + set data "data$alphabet$alphabet$alphabet" + set txn "" + set flags "" + + puts "\tTest$tstn.a: Create $method database." + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tstn.db + set env NULL + } else { + set testfile test$tstn.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + cleanup $testdir $env + + set ps 512 + set txn "" + set oflags "-create -pagesize $ps -mode 0644 $args $method" + set db [eval {berkdb_open} $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set nkeys 5 + # Fill page w/ key/data pairs. + # + puts "\tTest$tstn.b: Fill page with $nkeys small key/data pairs." + for { set i 0 } { $i < $nkeys } { incr i } { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {${key}00000$i $data$i}] + error_check_good dbput $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + # get db ordering, set cursors + puts "\tTest$tstn.c: Set cursors on each of $nkeys pairs." + # if mkeys is above 1000, need to adjust below for lexical order + set mkeys 30000 + if { [is_compressed $args] } { + set mkeys 300 + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + set mkeys 300 + } + for {set i 0; set ret [$db get ${key}00000$i]} {\ + $i < $nkeys && [llength $ret] != 0} {\ + incr i; set ret [$db get ${key}00000$i]} { + set key_set($i) [lindex [lindex $ret 0] 0] + set data_set($i) [lindex [lindex $ret 0] 1] + set dbc [eval {$db cursor} $txn] + set dbc_set($i) $dbc + error_check_good db_cursor:$i [is_substr $dbc_set($i) $db] 1 + set ret [$dbc_set($i) get -set $key_set($i)] + error_check_bad dbc_set($i)_get:set [llength $ret] 0 + } + + puts "\tTest$tstn.d: Add $mkeys pairs to force splits." + for {set i $nkeys} { $i < $mkeys } { incr i } { + if { $i >= 10000 } { + set ret [eval {$db put} $txn {${key}0$i $data$i}] + } elseif { $i >= 1000 } { + set ret [eval {$db put} $txn {${key}00$i $data$i}] + } elseif { $i >= 100 } { + set ret [eval {$db put} $txn {${key}000$i $data$i}] + } elseif { $i >= 10 } { + set ret [eval {$db put} $txn {${key}0000$i $data$i}] + } else { + set ret [eval {$db put} $txn {${key}00000$i $data$i}] + } + error_check_good dbput:more $ret 0 + } + + puts "\tTest$tstn.e: Make sure splits happened." + # XXX cannot execute stat in presence of txns and cursors. + if { $txnenv == 0 && !$is_je_test } { + error_check_bad stat:check-split [is_substr [$db stat] \ + "{{Internal pages} 0}"] 1 + } + + puts "\tTest$tstn.f: Check to see that cursors maintained reference." + for {set i 0} { $i < $nkeys } {incr i} { + set ret [$dbc_set($i) get -current] + error_check_bad dbc$i:get:current [llength $ret] 0 + set ret2 [$dbc_set($i) get -set $key_set($i)] + error_check_bad dbc$i:get:set [llength $ret2] 0 + error_check_good dbc$i:get(match) $ret $ret2 + } + + puts "\tTest$tstn.g: Delete added keys to force reverse splits." + for {set i $nkeys} { $i < $mkeys } { incr i } { + if { $i >= 10000 } { + set ret [eval {$db del} $txn {${key}0$i}] + } elseif { $i >= 1000 } { + set ret [eval {$db del} $txn {${key}00$i}] + } elseif { $i >= 100 } { + set ret [eval {$db del} $txn {${key}000$i}] + } elseif { $i >= 10 } { + set ret [eval {$db del} $txn {${key}0000$i}] + } else { + set ret [eval {$db del} $txn {${key}00000$i}] + } + error_check_good dbput:more $ret 0 + } + + puts "\tTest$tstn.h: Verify cursor reference." + for {set i 0} { $i < $nkeys } {incr i} { + set ret [$dbc_set($i) get -current] + error_check_bad dbc$i:get:current [llength $ret] 0 + set ret2 [$dbc_set($i) get -set $key_set($i)] + error_check_bad dbc$i:get:set [llength $ret2] 0 + error_check_good dbc$i:get(match) $ret $ret2 + } + + puts "\tTest$tstn.i: Cleanup." + # close cursors + for {set i 0} { $i < $nkeys } {incr i} { + error_check_good dbc_close:$i [$dbc_set($i) close] 0 + } + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good dbclose [$db close] 0 + + puts "\tTest$tstn complete." +} diff --git a/test/tcl/test089.tcl b/test/tcl/test089.tcl new file mode 100644 index 00000000..9212df7a --- /dev/null +++ b/test/tcl/test089.tcl @@ -0,0 +1,275 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test089 +# TEST Concurrent Data Store test (CDB) +# TEST +# TEST Enhanced CDB testing to test off-page dups, cursor dups and +# TEST cursor operations like c_del then c_get. +proc test089 { method {nentries 1000} args } { + global datastr + global encrypt + source ./include.tcl + + # + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Test089 skipping for env $env" + return + } + set encargs "" + set args [convert_args $method $args] + set oargs [split_encargs $args encargs] + set omethod [convert_method $method] + set pageargs "" + split_pageargs $args pageargs + + puts "Test089: ($oargs) $method CDB Test cursor/dup operations" + + # Process arguments + # Create the database and open the dictionary + set testfile test089.db + set testfile1 test089a.db + + env_cleanup $testdir + + set env [eval \ + {berkdb_env -create -cdb} $pageargs $encargs -home $testdir] + error_check_good dbenv [is_valid_env $env] TRUE + + set db [eval {berkdb_open -env $env -create \ + -mode 0644 $omethod} $oargs {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set db1 [eval {berkdb_open -env $env -create \ + -mode 0644 $omethod} $oargs {$testfile1}] + error_check_good dbopen [is_valid_db $db1] TRUE + + set pflags "" + set gflags "" + set txn "" + set count 0 + + # Here is the loop where we put each key/data pair + puts "\tTest089.a: Put loop" + set did [open $dict] + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $datastr]}] + error_check_good put:$db $ret 0 + set ret [eval {$db1 put} \ + $txn $pflags {$key [chop_data $method $datastr]}] + error_check_good put:$db1 $ret 0 + incr count + } + close $did + error_check_good close:$db [$db close] 0 + error_check_good close:$db1 [$db1 close] 0 + + # Database is created, now set up environment + + # Remove old mpools and Open/create the lock and mpool regions + error_check_good env:close:$env [$env close] 0 + set ret [eval {berkdb envremove} $encargs -home $testdir] + error_check_good env_remove $ret 0 + + set env [eval \ + {berkdb_env_noerr -create -cdb} $pageargs $encargs -home $testdir] + error_check_good dbenv [is_valid_widget $env env] TRUE + + puts "\tTest089.b: CDB cursor dups" + + set db1 [eval {berkdb_open_noerr -env $env -create \ + -mode 0644 $omethod} $oargs {$testfile1}] + error_check_good dbopen [is_valid_db $db1] TRUE + + # Create a read-only cursor and make sure we can't write with it. + set dbcr [$db1 cursor] + error_check_good dbcursor [is_valid_cursor $dbcr $db1] TRUE + set ret [$dbcr get -first] + catch { [$dbcr put -current data] } ret + error_check_good is_read_only \ + [is_substr $ret "Write attempted on read-only cursor"] 1 + error_check_good dbcr_close [$dbcr close] 0 + + # Create a write cursor and duplicate it. + set dbcw [$db1 cursor -update] + error_check_good dbcursor [is_valid_cursor $dbcw $db1] TRUE + set dup_dbcw [$dbcw dup] + error_check_good dup_write_cursor [is_valid_cursor $dup_dbcw $db1] TRUE + + # Position both cursors at get -first. They should find the same data. + set get_first [$dbcw get -first] + set get_first_dup [$dup_dbcw get -first] + error_check_good dup_read $get_first $get_first_dup + + # Test that the write cursors can both write and that they + # read each other's writes correctly. First write reversed + # datastr with original cursor and read with dup cursor. + error_check_good put_current_orig \ + [$dbcw put -current [chop_data $method [reverse $datastr]]] 0 + set reversed [$dup_dbcw get -current] + error_check_good check_with_dup [lindex [lindex $reversed 0] 1] \ + [pad_data $method [reverse $datastr]] + + # Write forward datastr with dup cursor and read with original. + error_check_good put_current_dup \ + [$dup_dbcw put -current [chop_data $method $datastr]] 0 + set forward [$dbcw get -current] + error_check_good check_with_orig $forward $get_first + + error_check_good dbcw_close [$dbcw close] 0 + error_check_good dup_dbcw_close [$dup_dbcw close] 0 + + # This tests the failure found in #1923 + puts "\tTest089.c: Test delete then get" + + set dbc [$db1 cursor -update] + error_check_good dbcursor [is_valid_cursor $dbc $db1] TRUE + + for {set kd [$dbc get -first] } { [llength $kd] != 0 } \ + {set kd [$dbc get -next] } { + error_check_good dbcdel [$dbc del] 0 + } + error_check_good dbc_close [$dbc close] 0 + error_check_good db_close [$db1 close] 0 + error_check_good env_close [$env close] 0 + + if { [is_btree $method] != 1 } { + puts "Skipping rest of test089 for $method method." + return + } + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Skipping rest of test089 for specific pagesizes" + return + } + + append oargs " -dup " + # Skip unsorted duplicates for btree with compression. + if { [is_compressed $args] == 0 } { + test089_dup $testdir $encargs $oargs $omethod $nentries + } + + append oargs " -dupsort " + test089_dup $testdir $encargs $oargs $omethod $nentries +} + +proc test089_dup { testdir encargs oargs method nentries } { + env_cleanup $testdir + set pageargs "" + split_pageargs $oargs pageargs + set env [eval \ + {berkdb_env -create -cdb} $encargs $pageargs -home $testdir] + error_check_good dbenv [is_valid_env $env] TRUE + + # + # Set pagesize small to generate lots of off-page dups + # + set page 512 + set nkeys 5 + set data "data" + set key "test089_key" + set testfile test089.db + puts "\tTest089.d: CDB ($oargs) off-page dups" + set oflags "-env $env -create -mode 0644 $oargs $method" + set db [eval {berkdb_open} -pagesize $page $oflags $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + puts "\tTest089.e: Fill page with $nkeys keys, with $nentries dups" + for { set k 0 } { $k < $nkeys } { incr k } { + for { set i 0 } { $i < $nentries } { incr i } { + set ret [$db put $key$k $i$data$k] + error_check_good dbput $ret 0 + } + } + + # Verify we have off-page duplicates + set stat [$db stat] + error_check_bad stat:offpage [is_substr $stat "{{Internal pages} 0}"] 1 + + # This tests the failure reported in #6950. Skip for -dupsort. + puts "\tTest089.f: Clear locks for duped off-page dup cursors." + if { [is_substr $oargs dupsort] != 1 } { + # Create a read cursor, put it on an off-page dup. + set dbcr [$db cursor] + error_check_good dbcr [is_valid_cursor $dbcr $db] TRUE + set offpage [$dbcr get -get_both test089_key4 900data4] + error_check_bad offpage [llength $offpage] 0 + + # Create a write cursor, put it on an off-page dup. + set dbcw [$db cursor -update] + error_check_good dbcw [is_valid_cursor $dbcw $db] TRUE + set offpage [$dbcw get -get_both test089_key3 900data3] + error_check_bad offpage [llength $offpage] 0 + + # Add a new item using the write cursor, then close the cursor. + error_check_good add_dup [$dbcw put -after $data] 0 + error_check_good close_dbcw [$dbcw close] 0 + + # Get next dup with read cursor, then close the cursor. + set nextdup [$dbcr get -nextdup] + error_check_good close_dbcr [$dbcr close] 0 + } + + puts "\tTest089.g: CDB duplicate write cursors with off-page dups" + # Create a write cursor and duplicate it. + set dbcw [$db cursor -update] + error_check_good dbcursor [is_valid_cursor $dbcw $db] TRUE + set dup_dbcw [$dbcw dup] + error_check_good dup_write_cursor [is_valid_cursor $dup_dbcw $db] TRUE + + # Position both cursors at get -first. They should find the same data. + set get_first [$dbcw get -first] + set get_first_dup [$dup_dbcw get -first] + error_check_good dup_read $get_first $get_first_dup + + # Test with -after and -before. Skip for -dupsort. + if { [is_substr $oargs dupsort] != 1 } { + # Original and duplicate cursors both point to first item. + # Do a put -before of new string with original cursor, + # and a put -after of new string with duplicate cursor. + set newdata "newdata" + error_check_good put_before [$dbcw put -before $newdata] 0 + error_check_good put_after [$dup_dbcw put -after $newdata] 0 + + # Now walk forward with original cursor ... + set first [$dbcw get -first] + error_check_good check_first [lindex [lindex $first 0] 1] $newdata + set next1 [$dbcw get -next] + error_check_good check_next1 $next1 $get_first + set next2 [$dbcw get -next] + error_check_good check_next2 [lindex [lindex $next2 0] 1] $newdata + + # ... and backward with duplicate cursor. + set current [$dup_dbcw get -current] + error_check_good check_current [lindex [lindex $current 0] 1] $newdata + set prev1 [$dup_dbcw get -prev] + error_check_good check_prev1 $prev1 $get_first + set prev2 [$dup_dbcw get -prev] + error_check_good check_prev2 [lindex [lindex $prev2 0] 1] $newdata + } + + puts "\tTest089.h: test delete then get of off-page dups" + for {set kd [$dbcw get -first] } { [llength $kd] != 0 } \ + {set kd [$dbcw get -next] } { + error_check_good dbcdel [$dbcw del] 0 + } + + error_check_good dbcw_close [$dbcw close] 0 + error_check_good dup_dbcw_close [$dup_dbcw close] 0 + + error_check_good db_close [$db close] 0 + error_check_good env_close [$env close] 0 +} diff --git a/test/tcl/test090.tcl b/test/tcl/test090.tcl new file mode 100644 index 00000000..5a37f5a9 --- /dev/null +++ b/test/tcl/test090.tcl @@ -0,0 +1,15 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test090 +# TEST Test for functionality near the end of the queue using test001. +proc test090 { method {nentries 10000} {tnum "090"} args} { + if { [is_queueext $method ] == 0 } { + puts "Skipping test$tnum for $method." + return; + } + eval {test001 $method $nentries 4294967000 0 $tnum} $args +} diff --git a/test/tcl/test091.tcl b/test/tcl/test091.tcl new file mode 100644 index 00000000..f2d94f62 --- /dev/null +++ b/test/tcl/test091.tcl @@ -0,0 +1,19 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test091 +# TEST Test of DB_CONSUME_WAIT. +proc test091 { method {nconsumers 4} \ + {nproducers 2} {nitems 1000} {start 0 } {tnum "091"} args} { + if { [is_queue $method ] == 0 } { + puts "Skipping test0$tnum for $method." + return; + } + eval {test070 $method \ + $nconsumers $nproducers $nitems WAIT $start -txn $tnum } $args + eval {test070 $method \ + $nconsumers $nproducers $nitems WAIT $start -cdb $tnum } $args +} diff --git a/test/tcl/test092.tcl b/test/tcl/test092.tcl new file mode 100644 index 00000000..a2484148 --- /dev/null +++ b/test/tcl/test092.tcl @@ -0,0 +1,252 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test092 +# TEST Test of DB_DIRTY_READ [#3395] +# TEST +# TEST We set up a database with nentries in it. We then open the +# TEST database read-only twice. One with dirty reads and one without. +# TEST We open the database for writing and update some entries in it. +# TEST Then read those new entries via db->get (clean and dirty), and +# TEST via cursors (clean and dirty). +proc test092 { method {nentries 1000} args } { + source ./include.tcl + # + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Test092 skipping for env $env" + return + } + set args [convert_args $method $args] + set encargs "" + set args [split_encargs $args encargs] + set omethod [convert_method $method] + set pageargs "" + split_pageargs $args pageargs + + puts "Test092: Dirty Read Test $method $nentries" + + # Create the database and open the dictionary + set testfile test092.db + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + + env_cleanup $testdir + + set lmax [expr $nentries * 2] + set lomax [expr $nentries * 2] + set env [eval \ + {berkdb_env -create -txn} $pageargs $encargs -home $testdir \ + -lock_max_locks $lmax -lock_max_objects $lomax] + error_check_good dbenv [is_valid_env $env] TRUE + + set db [eval {berkdb_open -env $env -create \ + -mode 0644 $omethod} $args {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + # Here is the loop where we put each key/data pair. + # Key is entry, data is entry also. + puts "\tTest092.a: put loop" + set count 0 + set did [open $dict] + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + global kvals + + set key [expr $count + 1] + set kvals($key) [pad_data $method $str] + } else { + set key $str + } + set ret [eval {$db put} {$key [chop_data $method $str]}] + error_check_good put:$db $ret 0 + incr count + } + close $did + error_check_good close:$db [$db close] 0 + + puts "\tTest092.b: Opening all the handles" + # + # Open all of our handles. + # We need: + # 1. Our main txn (t). + # 2. A txn that can read dirty data (tdr). + # 3. A db handle for writing via txn (dbtxn). + # 4. A db handle for clean data (dbcl). + # 5. A db handle for dirty data (dbdr). + # 6. A cursor handle for dirty txn data (clean db handle using + # the dirty txn handle on the cursor call) (dbccl1). + # 7. A cursor handle for dirty data (dirty on get call) (dbcdr0). + # 8. A cursor handle for dirty data (dirty on cursor call) (dbcdr1). + set t [$env txn] + error_check_good txnbegin [is_valid_txn $t $env] TRUE + + set tdr [$env txn -read_uncommitted] + error_check_good txnbegin:dr [is_valid_txn $tdr $env] TRUE + set dbtxn [eval {berkdb_open -auto_commit -env $env -read_uncommitted \ + -mode 0644 $omethod} $args {$testfile}] + error_check_good dbopen:dbtxn [is_valid_db $dbtxn] TRUE + + set dbcl [eval {berkdb_open -auto_commit -env $env \ + -rdonly -mode 0644 $omethod} $args {$testfile}] + error_check_good dbopen:dbcl [is_valid_db $dbcl] TRUE + + set dbdr [eval {berkdb_open -auto_commit -env $env -read_uncommitted \ + -rdonly -mode 0644 $omethod} $args {$testfile}] + error_check_good dbopen:dbdr [is_valid_db $dbdr] TRUE + + set dbccl [$dbcl cursor -txn $tdr] + error_check_good dbcurs:dbcl [is_valid_cursor $dbccl $dbcl] TRUE + + set dbcdr0 [$dbdr cursor] + error_check_good dbcurs:dbdr0 [is_valid_cursor $dbcdr0 $dbdr] TRUE + + set dbcdr1 [$dbdr cursor -read_uncommitted] + error_check_good dbcurs:dbdr1 [is_valid_cursor $dbcdr1 $dbdr] TRUE + + # Test that $db stat can use -read_uncommitted flag. + puts "\tTest092.c: Smoke test for db_stat -txn -read_uncommitted" + if { [catch \ + {set statret [$dbcl stat -txn $t -read_uncommitted]} res] } { + puts "FAIL: db_stat -txn -read_uncommitted returned $res" + } + + # + # Now that we have all of our handles, change all the data in there + # to be the key and data the same, but data is capitalized. + puts "\tTest092.d: put/get data within a txn" + set gflags "" + if { [is_record_based $method] == 1 } { + set checkfunc test092dr_recno.check + append gflags " -recno" + } else { + set checkfunc test092dr.check + } + set count 0 + set did [open $dict] + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + set ustr [string toupper $str] + set clret [list [list $key [pad_data $method $str]]] + set drret [list [list $key [pad_data $method $ustr]]] + # + # Put the data in the txn. + # + set ret [eval {$dbtxn put} -txn $t \ + {$key [chop_data $method $ustr]}] + error_check_good put:$dbtxn $ret 0 + + # + # Now get the data using the different db handles and + # make sure it is dirty or clean data. + # + # Using the dirty txn should show us dirty data + set ret [eval {$dbcl get -txn $tdr} $gflags {$key}] + error_check_good dbdr2:get $ret $drret + + set ret [eval {$dbdr get -read_uncommitted} $gflags {$key}] + error_check_good dbdr1:get $ret $drret + + set ret [eval {$dbdr get -txn $tdr} $gflags {$key}] + error_check_good dbdr2:get $ret $drret + + incr count + } + close $did + + puts "\tTest092.e: Check dirty data using dirty txn and clean db/cursor" + dump_file_walk $dbccl $t1 $checkfunc "-first" "-next" + + puts "\tTest092.f: Check dirty data using -read_uncommitted cget flag" + dump_file_walk \ + $dbcdr0 $t2 $checkfunc "-first" "-next" "-read_uncommitted" + + puts "\tTest092.g: Check dirty data using -read_uncommitted cursor" + dump_file_walk $dbcdr1 $t3 $checkfunc "-first" "-next" + + # + # We must close these before aborting the real txn + # because they all hold read locks on the pages. + # + error_check_good dbccl:close [$dbccl close] 0 + error_check_good dbcdr0:close [$dbcdr0 close] 0 + error_check_good dbcdr1:close [$dbcdr1 close] 0 + + # + # Now abort the modifying transaction and rerun the data checks. + # + puts "\tTest092.h: Aborting the write-txn" + error_check_good txnabort [$t abort] 0 + + set dbccl [$dbcl cursor -txn $tdr] + error_check_good dbcurs:dbcl [is_valid_cursor $dbccl $dbcl] TRUE + + set dbcdr0 [$dbdr cursor] + error_check_good dbcurs:dbdr0 [is_valid_cursor $dbcdr0 $dbdr] TRUE + + set dbcdr1 [$dbdr cursor -read_uncommitted] + error_check_good dbcurs:dbdr1 [is_valid_cursor $dbcdr1 $dbdr] TRUE + + if { [is_record_based $method] == 1 } { + set checkfunc test092cl_recno.check + } else { + set checkfunc test092cl.check + } + puts "\tTest092.i: Check clean data using -read_uncommitted cget flag" + dump_file_walk $dbccl $t1 $checkfunc "-first" "-next" + + puts "\tTest092.j: Check clean data using -read_uncommitted cget flag" + dump_file_walk \ + $dbcdr0 $t2 $checkfunc "-first" "-next" "-read_uncommitted" + + puts "\tTest092.k: Check clean data using -read_uncommitted cursor" + dump_file_walk $dbcdr1 $t3 $checkfunc "-first" "-next" + + # Clean up our handles + error_check_good dbccl:close [$dbccl close] 0 + error_check_good tdrcommit [$tdr commit] 0 + error_check_good dbcdr0:close [$dbcdr0 close] 0 + error_check_good dbcdr1:close [$dbcdr1 close] 0 + error_check_good dbclose [$dbcl close] 0 + error_check_good dbclose [$dbdr close] 0 + error_check_good dbclose [$dbtxn close] 0 + error_check_good envclose [$env close] 0 +} + +# Check functions for test092; keys and data are identical +# Clean checks mean keys and data are identical. +# Dirty checks mean data are uppercase versions of keys. +proc test092cl.check { key data } { + error_check_good "key/data mismatch" $key $data +} + +proc test092cl_recno.check { key data } { + global kvals + + error_check_good key"$key"_exists [info exists kvals($key)] 1 + error_check_good "key/data mismatch, key $key" $data $kvals($key) +} + +proc test092dr.check { key data } { + error_check_good "key/data mismatch" $key [string tolower $data] +} + +proc test092dr_recno.check { key data } { + global kvals + + error_check_good key"$key"_exists [info exists kvals($key)] 1 + error_check_good "key/data mismatch, key $key" $data \ + [string toupper $kvals($key)] +} + diff --git a/test/tcl/test093.tcl b/test/tcl/test093.tcl new file mode 100644 index 00000000..1245e01b --- /dev/null +++ b/test/tcl/test093.tcl @@ -0,0 +1,445 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test093 +# TEST Test set_bt_compare (btree key comparison function) and +# TEST set_h_compare (hash key comparison function). +# TEST +# TEST Open a database with a comparison function specified, +# TEST populate, and close, saving a list with that key order as +# TEST we do so. Reopen and read in the keys, saving in another +# TEST list; the keys should be in the order specified by the +# TEST comparison function. Sort the original saved list of keys +# TEST using the comparison function, and verify that it matches +# TEST the keys as read out of the database. + +proc test093 { method {nentries 10000} {tnum "093"} args} { + source ./include.tcl + + set dbargs [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_btree $method] == 1 } { + set compflag -btcompare + } elseif { [is_hash $method] == 1 } { + set compflag -hashcompare + } else { + puts "Test$tnum: skipping for method $method." + return + } + + # Verification fails for 64k pages on some systems because + # verify's private mpool gets filled. The Tcl API doesn't + # currently allow for resizing this cache. Just skip the + # test for 64k pages because it's not adding much anyway. + # + set pagesize 0 + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + incr pgindex + set pagesize [lindex $args $pgindex] + if { $pagesize >= "65536" } { + puts "Skipping test$tnum for 64k pages." + return + } + } + + set txnenv 0 + set eindex [lsearch -exact $dbargs "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $dbargs $eindex] + set envflags [$env get_open_flags] + + # We can't run this test for the -thread option because + # the comparison function requires the ability to allocate + # memory at the DBT level and our Tcl interface does not + # offer that. + if { [lsearch -exact $envflags "-thread"] != -1 } { + puts "Skipping Test$tnum for threaded env" + return + } + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append dbargs " -auto_commit " + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + cleanup $testdir $env + } else { + set env NULL + } + + puts "Test$tnum: $method ($args) $nentries entries using $compflag" + + test093_run $omethod $dbargs $nentries $tnum \ + $compflag test093_cmp1 test093_sort1 + test093_runbig $omethod $dbargs $nentries $tnum \ + $compflag test093_cmp1 test093_sort1 + test093_run $omethod $dbargs $nentries $tnum \ + $compflag test093_cmp2 test093_sort2 + + # Don't bother running the second, really slow, comparison + # function on test093_runbig (file contents). + + # Clean up so general verification (without the custom comparison + # function) doesn't fail. + if { $env != "NULL" } { + set testdir [get_home $env] + } + cleanup $testdir $env +} + +proc test093_run { method dbargs nentries tnum compflag cmpfunc sortfunc } { + source ./include.tcl + global btvals + global btvalsck + + # We'll need any encryption args separated from the db args + # so we can pass them to dbverify. + set encargs "" + set dbargs [split_encargs $dbargs encargs] + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set eindex [lsearch -exact $dbargs "-env"] + set txnenv 0 + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + set envargs "" + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $dbargs $eindex] + set envargs " -env $env " + set txnenv [is_txnenv $env] + set testdir [get_home $env] + } + cleanup $testdir $env + + set db [eval {berkdb_open $compflag $cmpfunc \ + -create -mode 0644} $method $encargs $dbargs $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + set txn "" + + # Use btvals to save the order of the keys as they are + # written to the database. The btvalsck variable will contain + # the values as sorted by the comparison function. + set btvals {} + set btvalsck {} + + puts "\tTest$tnum.a: put/get loop" + # Here is the loop where we put and get each key/data pair + set count 0 + while { [gets $did str] != -1 && $count < $nentries } { + set key $str + set str [reverse $str] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval \ + {$db put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + lappend btvals $key + + set ret [eval {$db get $key}] + error_check_good \ + get $ret [list [list $key [pad_data $method $str]]] + + incr count + } + close $did + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest$tnum.b: dump file" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 test093_check + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Run verify to check the internal structure and order. + if { [catch {eval {berkdb dbverify} $compflag $cmpfunc\ + $envargs $encargs {$testfile}} res] } { + error "FAIL: Verification failed with $res" + } + + # Now compare the keys to see if they match the dictionary (or ints) + filehead $nentries $dict $t2 + filesort $t2 $t3 + file rename -force $t3 $t2 + filesort $t1 $t3 + + error_check_good Test$tnum:diff($t3,$t2) \ + [filecmp $t3 $t2] 0 + + puts "\tTest$tnum.c: dump file in order" + # Now, reopen the file and run the last test again. + # We open it here, ourselves, because all uses of the db + # need to have the correct comparison func set. Then + # call dump_file_direction directly. + set btvalsck {} + set db [eval {berkdb_open $compflag $cmpfunc -rdonly} \ + $dbargs $encargs $method $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file_direction $db $txn $t1 test093_check "-first" "-next" + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + if { [is_hash $method] == 1 || [is_partition_callback $dbargs] == 1 } { + return + } + + # We need to sort btvals according to the comparison function. + # Once that is done, btvalsck and btvals should be the same. + puts "\tTest$tnum.d: check file order" + + $sortfunc + + error_check_good btvals:len [llength $btvals] [llength $btvalsck] + for {set i 0} {$i < $nentries} {incr i} { + error_check_good vals:$i [lindex $btvals $i] \ + [lindex $btvalsck $i] + } +} + +proc test093_runbig { method dbargs nentries tnum compflag cmpfunc sortfunc } { + source ./include.tcl + global btvals + global btvalsck + + # We'll need any encryption args separated from the db args + # so we can pass them to dbverify. + set encargs "" + set dbargs [split_encargs $dbargs encargs] + + # Create the database and open the dictionary + set eindex [lsearch -exact $dbargs "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set txnenv 0 + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + set envargs "" + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $dbargs $eindex] + set envargs " -env $env " + set txnenv [is_txnenv $env] + set testdir [get_home $env] + } + cleanup $testdir $env + + set db [eval {berkdb_open $compflag $cmpfunc \ + -create -mode 0644} $method $encargs $dbargs $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + set t4 $testdir/t4 + set t5 $testdir/t5 + set txn "" + set btvals {} + set btvalsck {} + puts "\tTest$tnum.e:\ + big key put/get loop key=filecontents data=filename" + + # Here is the loop where we put and get each key/data pair + set file_list [get_file_list 1] + + set count 0 + foreach f $file_list { + set fid [open $f r] + fconfigure $fid -translation binary + set key [read $fid] + close $fid + + set key $f$key + + set fcopy [open $t5 w] + fconfigure $fcopy -translation binary + puts -nonewline $fcopy $key + close $fcopy + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$key \ + [chop_data $method $f]}] + error_check_good put_file $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + lappend btvals $key + + # Should really catch errors + set fid [open $t4 w] + fconfigure $fid -translation binary + if [catch {eval {$db get} {$key}} data] { + puts -nonewline $fid $data + } else { + # Data looks like {{key data}} + set key [lindex [lindex $data 0] 0] + puts -nonewline $fid $key + } + close $fid + error_check_good \ + Test093:diff($t5,$t4) [filecmp $t5 $t4] 0 + + incr count + } + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest$tnum.f: big dump file" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 test093_checkbig + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Run verify to check the internal structure and order. + if { [catch {eval {berkdb dbverify} $compflag $cmpfunc\ + $envargs $encargs {$testfile}} res] } { + error "FAIL: Verification failed with $res" + } + + puts "\tTest$tnum.g: dump file in order" + # Now, reopen the file and run the last test again. + # We open it here, ourselves, because all uses of the db + # need to have the correct comparison func set. Then + # call dump_file_direction directly. + + set btvalsck {} + set db [eval {berkdb_open $compflag $cmpfunc -rdonly} \ + $encargs $dbargs $method $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file_direction $db $txn $t1 test093_checkbig "-first" "-next" + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + if { [is_hash $method] == 1 || [is_partition_callback $dbargs] == 1 } { + return + } + + # We need to sort btvals according to the comparison function. + # Once that is done, btvalsck and btvals should be the same. + puts "\tTest$tnum.h: check file order" + + $sortfunc + error_check_good btvals:len [llength $btvals] [llength $btvalsck] + + set end [llength $btvals] + for {set i 0} {$i < $end} {incr i} { + error_check_good vals:$i [lindex $btvals $i] \ + [lindex $btvalsck $i] + } +} + +# Simple bt comparison. +proc test093_cmp1 { a b } { + return [string compare $b $a] +} + +# Simple bt sorting. +proc test093_sort1 {} { + global btvals + # + # This one is easy, just sort in reverse. + # + set btvals [lsort -decreasing $btvals] +} + +proc test093_cmp2 { a b } { + set arev [reverse $a] + set brev [reverse $b] + return [string compare $arev $brev] +} + +proc test093_sort2 {} { + global btvals + + # We have to reverse them, then sorts them. + # Then reverse them back to real words. + set rbtvals {} + foreach i $btvals { + lappend rbtvals [reverse $i] + } + set rbtvals [lsort -increasing $rbtvals] + set newbtvals {} + foreach i $rbtvals { + lappend newbtvals [reverse $i] + } + set btvals $newbtvals +} + +# Check function for test093; keys and data are identical +proc test093_check { key data } { + global btvalsck + + error_check_good "key/data mismatch" $data [reverse $key] + lappend btvalsck $key +} + +# Check function for test093 big keys; +proc test093_checkbig { key data } { + source ./include.tcl + global btvalsck + + set fid [open $data r] + fconfigure $fid -translation binary + set cont [read $fid] + close $fid + error_check_good "key/data mismatch" $key $data$cont + lappend btvalsck $key +} + diff --git a/test/tcl/test094.tcl b/test/tcl/test094.tcl new file mode 100644 index 00000000..0ed43c95 --- /dev/null +++ b/test/tcl/test094.tcl @@ -0,0 +1,201 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test094 +# TEST Test using set_dup_compare. +# TEST +# TEST Use the first 10,000 entries from the dictionary. +# TEST Insert each with self as key and data; retrieve each. +# TEST After all are entered, retrieve all; compare output to original. +# TEST Close file, reopen, do retrieve and re-verify. +proc test094 { method {nentries 10000} {ndups 10} {tnum "094"} args} { + source ./include.tcl + global errorInfo + + set dbargs [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_btree $method] != 1 && [is_hash $method] != 1 } { + puts "Test$tnum: skipping for method $method." + return + } + + # We'll need any encryption args separated from the db args + # so we can pass them to dbverify. + set encargs "" + set dbargs [split_encargs $dbargs encargs] + + set txnenv 0 + set eindex [lsearch -exact $dbargs "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum-a.db + set env NULL + set envargs "" + } else { + set testfile test$tnum-a.db + incr eindex + set env [lindex $dbargs $eindex] + set envargs " -env $env " + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append dbargs " -auto_commit " + if { $nentries == 10000 } { + set nentries 100 + } + reduce_dups nentries ndups + } + set testdir [get_home $env] + } + puts "Test$tnum: $method ($args) $nentries \ + with $ndups dups using dupcompare" + + cleanup $testdir $env + + set db [eval {berkdb_open -dupcompare test094_cmp -dup -dupsort\ + -create -mode 0644} $omethod $encargs $dbargs {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set did [open $dict] + set t1 $testdir/t1 + set pflags "" + set gflags "" + set txn "" + puts "\tTest$tnum.a: $nentries put/get duplicates loop" + # Here is the loop where we put and get each key/data pair + set count 0 + set dlist {} + for {set i 0} {$i < $ndups} {incr i} { + set dlist [linsert $dlist 0 $i] + } + while { [gets $did str] != -1 && $count < $nentries } { + set key $str + for {set i 0} {$i < $ndups} {incr i} { + set data $i:$str + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $omethod $data]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + set ret [eval {$db get} $gflags {$key}] + error_check_good get [llength $ret] $ndups + incr count + } + close $did + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest$tnum.b: traverse checking duplicates before close" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dup_check $db $txn $t1 $dlist + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + # Now run verify to check the internal structure and order. + if { [catch {eval {berkdb dbverify} -dupcompare test094_cmp\ + $envargs $encargs {$testfile}} res] } { + puts "FAIL: Verification failed with $res" + } + + # Set up second testfile so truncate flag is not needed. + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum-b.db + set env NULL + } else { + set testfile test$tnum-b.db + set env [lindex $dbargs $eindex] + set testdir [get_home $env] + } + cleanup $testdir $env + + # + # Test dupcompare with data items big enough to force offpage dups. + # + puts "\tTest$tnum.c:\ + big key put/get dup loop key=filename data=filecontents" + set db [eval {berkdb_open -dupcompare test094_cmp -dup -dupsort \ + -create -mode 0644} $omethod $encargs $dbargs $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Here is the loop where we put and get each key/data pair + set file_list [get_file_list 1] + if { [llength $file_list] > $nentries } { + set file_list [lrange $file_list 1 $nentries] + } + + set count 0 + foreach f $file_list { + set fid [open $f r] + fconfigure $fid -translation binary + set cont [read $fid] + close $fid + + set key $f + for {set i 0} {$i < $ndups} {incr i} { + set data $i:$cont + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $omethod $data]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + + set ret [eval {$db get} $gflags {$key}] + error_check_good get [llength $ret] $ndups + incr count + } + + puts "\tTest$tnum.d: traverse checking duplicates before close" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dup_file_check $db $txn $t1 $dlist + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + set testdir [get_home $env] + } + error_check_good db_close [$db close] 0 + + # Run verify to check the internal structure and order. + if { [catch {eval {berkdb dbverify} -dupcompare test094_cmp\ + $envargs $encargs {$testfile}} res] } { + puts "FAIL: Verification failed with $res" + } + + # Clean up the test directory, otherwise the general verify + # (without dupcompare) will fail. + cleanup $testdir $env +} + +# Simple dup comparison. +proc test094_cmp { a b } { + return [string compare $b $a] +} diff --git a/test/tcl/test095.tcl b/test/tcl/test095.tcl new file mode 100644 index 00000000..f1793dca --- /dev/null +++ b/test/tcl/test095.tcl @@ -0,0 +1,369 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test095 +# TEST Bulk get test for methods supporting dups. [#2934] +proc test095 { method {tnum "095"} args } { + source ./include.tcl + global is_je_test + global is_qnx_test + + set args [convert_args $method $args] + set omethod [convert_method $method] + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set basename $testdir/test$tnum + set env NULL + # If we've our own env, no reason to swap--this isn't + # an mpool test. + set carg { -cachesize {0 25000000 0} } + } else { + set basename test$tnum + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + puts "Skipping for environment with txns" + return + } + set testdir [get_home $env] + set carg {} + } + cleanup $testdir $env + + puts "Test$tnum: $method ($args) Bulk get test" + + # Tcl leaves a lot of memory allocated after this test + # is run in the tclsh. This ends up being a problem on + # QNX runs as later tests then run out of memory. + if { $is_qnx_test } { + puts "Test$tnum skipping for QNX" + return + } + if { [is_record_based $method] == 1 || [is_rbtree $method] == 1 } { + puts "Test$tnum skipping for method $method" + return + } + + # The test's success is dependent on the relationship between + # the amount of data loaded and the buffer sizes we pick, so + # these parameters don't belong on the command line. + set nsets 300 + set noverflows 25 + + # We run the meat of the test twice: once with unsorted dups, + # once with sorted dups. + foreach { dflag sort } { -dup unsorted {-dup -dupsort} sorted } { + if { $is_je_test || [is_compressed $args] } { + if { $sort == "unsorted" } { + continue + } + } + + set testfile $basename-$sort.db + set did [open $dict] + + # Open and populate the database with $nsets sets of dups. + # Each set contains as many dups as its number + puts "\tTest$tnum.a:\ + Creating database with $nsets sets of $sort dups." + set dargs "$dflag $carg $args" + set db [eval {berkdb_open_noerr -create} \ + $omethod $dargs $testfile] + error_check_good db_open [is_valid_db $db] TRUE + t95_populate $db $did $nsets 0 + + # Determine the pagesize so we can use it to size the buffer. + set stat [$db stat] + set pagesize [get_pagesize $stat] + + # Run basic get tests. + # + # A small buffer will fail if it is smaller than the pagesize. + # Skip small buffer tests if the page size is so small that + # we can't define a buffer smaller than the page size. + # (Buffers must be 1024 or multiples of 1024.) + # + # A big buffer of 66560 (64K + 1K) should always be large + # enough to contain the data, so the test should succeed + # on all platforms. We picked this number because it + # is larger than the largest allowed pagesize, so the test + # always fills more than a page at some point. + + set maxpage [expr 1024 * 64] + set bigbuf [expr $maxpage + 1024] + set smallbuf 1024 + + if { $pagesize > 1024 } { + t95_gettest $db $tnum b $smallbuf 1 + } else { + puts "Skipping small buffer test Test$tnum.b" + } + t95_gettest $db $tnum c $bigbuf 0 + + # Run cursor get tests. + if { $pagesize > 1024 } { + t95_cgettest $db $tnum b $smallbuf 1 + } else { + puts "Skipping small buffer test Test$tnum.d" + } + t95_cgettest $db $tnum e $bigbuf 0 + + # Run invalid flag combination tests + # Sync and reopen test file so errors won't be sent to stderr + error_check_good db_sync [$db sync] 0 + set noerrdb [eval berkdb_open_noerr $dargs $testfile] + t95_flagtest $noerrdb $tnum f [expr 8192] + t95_cflagtest $noerrdb $tnum g [expr 100] + error_check_good noerrdb_close [$noerrdb close] 0 + + # Set up for overflow tests + set max [expr 4096 * $noverflows] + puts "\tTest$tnum.h: Add $noverflows overflow sets\ + to database (max item size $max)" + t95_populate $db $did $noverflows 4096 + + # Run overflow get tests. The overflow test fails with + # our standard big buffer doubled, but succeeds with a + # buffer sized to handle $noverflows pairs of data of + # size $max. + t95_gettest $db $tnum i $bigbuf 1 + t95_gettest $db $tnum j [expr $bigbuf * 2] 1 + t95_gettest $db $tnum k [expr $max * $noverflows * 2] 0 + + # Run overflow cursor get tests. + t95_cgettest $db $tnum l $bigbuf 1 + # Expand buffer to accommodate basekey as well as the padding. + t95_cgettest $db $tnum m [expr ($max + 512) * 2] 0 + + error_check_good db_close [$db close] 0 + close $did + } +} + +proc t95_gettest { db tnum letter bufsize expectfail } { + t95_gettest_body $db $tnum $letter $bufsize $expectfail 0 +} +proc t95_cgettest { db tnum letter bufsize expectfail } { + t95_gettest_body $db $tnum $letter $bufsize $expectfail 1 +} +proc t95_flagtest { db tnum letter bufsize } { + t95_flagtest_body $db $tnum $letter $bufsize 0 +} +proc t95_cflagtest { db tnum letter bufsize } { + t95_flagtest_body $db $tnum $letter $bufsize 1 +} + +# Basic get test +proc t95_gettest_body { db tnum letter bufsize expectfail usecursor } { + global errorCode + + foreach flag { multi multi_key } { + if { $usecursor == 0 } { + if { $flag == "multi_key" } { + # db->get does not allow multi_key + continue + } else { + set action "db get -$flag" + } + } else { + set action "dbc get -$flag -set/-next" + } + puts "\tTest$tnum.$letter: $action with bufsize $bufsize" + set allpassed TRUE + set saved_err "" + + # Cursor for $usecursor. + if { $usecursor != 0 } { + set getcurs [$db cursor] + error_check_good getcurs [is_valid_cursor $getcurs $db] TRUE + } + + # Traverse DB with cursor; do get/c_get($flag) on each item. + set dbc [$db cursor] + error_check_good is_valid_dbc [is_valid_cursor $dbc $db] TRUE + for { set dbt [$dbc get -first] } { [llength $dbt] != 0 } \ + { set dbt [$dbc get -nextnodup] } { + set key [lindex [lindex $dbt 0] 0] + set datum [lindex [lindex $dbt 0] 1] + + if { $usecursor == 0 } { + set ret [catch {eval $db get -$flag $bufsize $key} res] + } else { + set res {} + for { set ret [catch {eval $getcurs get -$flag $bufsize\ + -set $key} tres] } \ + { $ret == 0 && [llength $tres] != 0 } \ + { set ret [catch {eval $getcurs get -$flag $bufsize\ + -nextdup} tres]} { + eval lappend res $tres + } + } + + # If we expect a failure, be more tolerant if the above + # fails; just make sure it's a DB_BUFFER_SMALL or an + # EINVAL (if the buffer is smaller than the pagesize, + # it's EINVAL), mark it, and move along. + if { $expectfail != 0 && $ret != 0 } { + if { [is_substr $errorCode DB_BUFFER_SMALL] != 1 && \ + [is_substr $errorCode EINVAL] != 1 } { + error_check_good \ + "$flag failure errcode" \ + $errorCode "DB_BUFFER_SMALL or EINVAL" + } + set allpassed FALSE + continue + } + error_check_good "get_$flag ($key)" $ret 0 + if { $flag == "multi_key" } { + t95_verify $res TRUE + } else { + t95_verify $res FALSE + } + } + set ret [catch {eval $db get -$flag $bufsize} res] + + if { $expectfail == 1 } { + error_check_good allpassed $allpassed FALSE + puts "\t\tTest$tnum.$letter:\ + returned at least one DB_BUFFER_SMALL (as expected)" + } else { + error_check_good allpassed $allpassed TRUE + puts "\t\tTest$tnum.$letter: succeeded (as expected)" + } + + error_check_good dbc_close [$dbc close] 0 + if { $usecursor != 0 } { + error_check_good getcurs_close [$getcurs close] 0 + } + } +} + +# Test of invalid flag combinations +proc t95_flagtest_body { db tnum letter bufsize usecursor } { + global errorCode + + foreach flag { multi multi_key } { + if { $usecursor == 0 } { + if { $flag == "multi_key" } { + # db->get does not allow multi_key + continue + } else { + set action "db get -$flag" + } + } else { + set action "dbc get -$flag" + } + puts "\tTest$tnum.$letter: $action with invalid flag combinations" + + # Cursor for $usecursor. + if { $usecursor != 0 } { + set getcurs [$db cursor] + error_check_good getcurs [is_valid_cursor $getcurs $db] TRUE + } + + if { $usecursor == 0 } { + # Disallowed flags for db->get + set badflags [list consume consume_wait {rmw some_key}] + + foreach badflag $badflags { + catch {eval $db get -$flag $bufsize -$badflag} ret + error_check_good \ + db:get:$flag:$badflag [is_substr $errorCode EINVAL] 1 + } + } else { + # Disallowed flags for db->cget + set cbadflags [list last get_recno join_item \ + {multi_key 1000} prev prevnodup] + + set dbc [$db cursor] + $dbc get -first + foreach badflag $cbadflags { + catch {eval $dbc get -$flag $bufsize -$badflag} ret + error_check_good dbc:get:$flag:$badflag \ + [is_substr $errorCode EINVAL] 1 + } + error_check_good dbc_close [$dbc close] 0 + } + if { $usecursor != 0 } { + error_check_good getcurs_close [$getcurs close] 0 + } + } + puts "\t\tTest$tnum.$letter completed" +} + +# Verify that a passed-in list of key/data pairs all match the predicted +# structure (e.g. {{thing1 thing1.0}}, {{key2 key2.0} {key2 key2.1}}). +proc t95_verify { res multiple_keys } { + global alphabet + + set i 0 + set orig_key [lindex [lindex $res 0] 0] + set nkeys [string trim $orig_key $alphabet'] + set base_key [string trim $orig_key 0123456789] + set datum_count 0 + + while { 1 } { + set key [lindex [lindex $res $i] 0] + set datum [lindex [lindex $res $i] 1] + + if { $datum_count >= $nkeys } { + if { [llength $key] != 0 } { + # If there are keys beyond $nkeys, we'd + # better have multiple_keys set. + error_check_bad "keys beyond number $i allowed"\ + $multiple_keys FALSE + + # If multiple_keys is set, accept the new key. + set orig_key $key + set nkeys [eval string trim \ + $orig_key {$alphabet'}] + set base_key [eval string trim \ + $orig_key 0123456789] + set datum_count 0 + } else { + # datum_count has hit nkeys. We're done. + return + } + } + + error_check_good returned_key($i) $key $orig_key + error_check_good returned_datum($i) \ + $datum $base_key.[format %4u $datum_count] + incr datum_count + incr i + } +} + +# Add nsets dup sets, each consisting of {word$ndups word$n} pairs, +# with "word" having (i * pad_bytes) bytes extra padding. +proc t95_populate { db did nsets pad_bytes } { + set txn "" + for { set i 1 } { $i <= $nsets } { incr i } { + # basekey is a padded dictionary word + gets $did basekey + + append basekey [repeat "a" [expr $pad_bytes * $i]] + + # key is basekey with the number of dups stuck on. + set key $basekey$i + + for { set j 0 } { $j < $i } { incr j } { + set data $basekey.[format %4u $j] + error_check_good db_put($key,$data) \ + [eval {$db put} $txn {$key $data}] 0 + } + } + + # This will make debugging easier, and since the database is + # read-only from here out, it's cheap. + error_check_good db_sync [$db sync] 0 +} diff --git a/test/tcl/test096.tcl b/test/tcl/test096.tcl new file mode 100644 index 00000000..d2300cb6 --- /dev/null +++ b/test/tcl/test096.tcl @@ -0,0 +1,398 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test096 +# TEST Db->truncate test. +# TEST For all methods: +# TEST Test that truncate empties an existing database. +# TEST Test that truncate-write in an aborted txn doesn't +# TEST change the original contents. +# TEST Test that truncate-write in a committed txn does +# TEST overwrite the original contents. +# TEST For btree and hash, do the same in a database with offpage dups. +proc test096 { method {pagesize 512} {nentries 1000} {ndups 19} args} { + global fixed_len + global alphabet + source ./include.tcl + + set orig_tdir $testdir + set orig_fixed_len $fixed_len + set args [convert_args $method $args] + set encargs "" + set args [split_encargs $args encargs] + set omethod [convert_method $method] + + if { [is_partitioned $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + + puts "Test096: $method db truncate method test" + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test096: Skipping for specific pagesizes" + return + } + + # Create the database and open the dictionary + set eindex [lsearch -exact $args "-env"] + set testfile test096.db + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 0 } { + puts "Environment w/o txns specified; skipping." + return + } + if { $nentries == 1000 } { + set nentries 100 + } + reduce_dups nentries ndups + set testdir [get_home $env] + set closeenv 0 + } else { + env_cleanup $testdir + + # We need an env for exclusive-use testing. Since we are + # using txns, we need at least 1 lock per record for queue. + # We need more locks for heap, because of the aux databases. + set lockmax [expr $nentries * 3] + if { [is_heap $method] } { + set lockmax [expr $lockmax * 3] + } + set env [eval {berkdb_env -create -home $testdir \ + -lock_max_locks $lockmax -lock_max_objects $lockmax \ + -pagesize $pagesize -txn} $encargs] + error_check_good env_create [is_valid_env $env] TRUE + set closeenv 1 + } + + set t1 $testdir/t1 + + puts "\tTest096.a: Create database with $nentries entries" + set db [eval {berkdb_open -create -auto_commit \ + -env $env $omethod -mode 0644} $args $testfile] + error_check_good db_open [is_valid_db $db] TRUE + t96_populate $db $omethod $env $nentries + error_check_good dbclose [$db close] 0 + + puts "\tTest096.b: Truncate database" + set dbtr [eval {berkdb_open -create -auto_commit \ + -env $env $omethod -mode 0644} $args $testfile] + error_check_good db_open [is_valid_db $dbtr] TRUE + + set ret [$dbtr truncate] + error_check_good dbtrunc $ret $nentries + error_check_good db_close [$dbtr close] 0 + + set db [eval {berkdb_open -env $env} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + set number [number_of_entries $db $method] + error_check_good number_of_entries $number 0 + error_check_good dbclose [$db close] 0 + error_check_good dbverify [verify_dir $testdir "\tTest096.c: " 0 0 $nodump] 0 + + # Remove and recreate database. + puts "\tTest096.d: Recreate database with $nentries entries" + set db [eval {berkdb_open -create -auto_commit \ + -env $env $omethod -mode 0644} $args {$testfile}] + error_check_good db_open [is_valid_db $db] TRUE + t96_populate $db $omethod $env $nentries + error_check_good dbclose [$db close] 0 + + puts "\tTest096.e: Truncate and write in a txn, then abort" + txn_truncate $env $omethod $args $testfile $nentries abort 1 + + set db [eval {berkdb_open -env $env} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Database should have original contents since both the truncate + # and the write were aborted + set number [number_of_entries $db $method] + error_check_good number_of_entries $number $nentries + error_check_good dbclose [$db close] 0 + + error_check_good dbverify [verify_dir $testdir "\tTest096.f: " 0 0 $nodump] 0 + + puts "\tTest096.g: Truncate and write in a txn, then commit" + txn_truncate $env $omethod $args $testfile $nentries commit 1 + + set db [eval {berkdb_open -env $env} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Database should contain only the new items + set number [number_of_entries $db $method] + error_check_good number_of_entries $number [expr $nentries / 2] + error_check_good dbclose [$db close] 0 + error_check_good dbverify [verify_dir $testdir "\tTest096.h: " 0 0 $nodump] 0 + + puts "\tTest096.i: Check proper handling of overflow pages." + # Large keys and data compared to page size guarantee + # overflow pages, or split records in heap. + if { [is_fixed_length $method] == 1 } { + puts "Skipping overflow test for fixed-length method." + } else { + set overflowfile overflow096.db + set data [repeat $alphabet 600] + set db [eval {berkdb_open -create -auto_commit -pagesize 512 \ + -env $env $omethod -mode 0644} $args $overflowfile] + error_check_good db_open [is_valid_db $db] TRUE + + set noverflows 100 + for { set i 1 } { $i <= $noverflows } { incr i } { + set ret [eval {$db put} \ + $i [chop_data $method "$i$data"]] + } + + # Hash reports pages of type P_OVERFLOW as "big pages", other + # access methods as "overflow pages". Heap doesn't use + # P_OVERFLOW pages. + if { [is_hash $method] == 1 } { + set bigpages [stat_field $db stat "Number of big pages"] + error_check_good stat:bigpages [expr $bigpages > 0] 1 + } elseif { [is_heap $method] == 0 } { + set overflow [stat_field $db stat "Overflow pages"] + error_check_good stat:overflow [expr $overflow > 0] 1 + } + + error_check_good overflow_truncate [$db truncate] $noverflows + error_check_good overflow_close [$db close] 0 + } + + # Remove database and create a new one with unsorted dups. + # Skip the rest of the test for methods not supporting dups + # and for compression, which does not support unsorted dups. + # + if { [is_record_based $method] == 1 || \ + [is_compressed $args] == 1 || \ + [is_rbtree $method] == 1 } { + puts "Skipping remainder of test096." + if { $closeenv == 1 } { + error_check_good envclose [$env close] 0 + } + return + } + set ret [berkdb dbremove -env $env -auto_commit $testfile] + set ret [berkdb dbremove -env $env -auto_commit $overflowfile] + + puts "\tTest096.j: Create $nentries entries with $ndups duplicates" + set db [eval {berkdb_open -pagesize $pagesize -dup -auto_commit \ + -create -env $env $omethod -mode 0644} $args $testfile] + error_check_good db_open [is_valid_db $db] TRUE + + t96_populate $db $omethod $env $nentries $ndups + + set dlist "" + for { set i 1 } {$i <= $ndups} {incr i} { + lappend dlist $i + } + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + dup_check $db $txn $t1 $dlist + error_check_good txn [$t commit] 0 + + # Make sure there are duplicates. + puts "\tTest096.k: Verify off page duplicates status" + set duplicate [stat_field $db stat "Duplicate pages"] + error_check_good stat:offpage_dups [expr $duplicate > 0] 1 + + set recs [expr $ndups * $nentries] + error_check_good dbclose [$db close] 0 + + puts "\tTest096.l: Truncate database in a txn then abort" + txn_truncate $env $omethod $args $testfile $recs abort + + set db [eval {berkdb_open -auto_commit -env $env} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set number [number_of_entries $db $method] + error_check_good number_of_entries $number $recs + error_check_good dbclose [$db close] 0 + + puts "\tTest096.m: Truncate database in a txn then commit" + txn_truncate $env $omethod $args $testfile $recs commit + + set db [eval {berkdb_open -auto_commit -env $env} $args {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set number [number_of_entries $db $method] + error_check_good number_of_entries $number 0 + error_check_good dbclose [$db close] 0 + + set testdir [get_home $env] + error_check_good dbverify \ + [verify_dir $testdir "\tTest096.n: " 0 0 $nodump] 0 + + # Remove database, and create a new one with dups. Test + # truncate + write within a transaction. + puts "\tTest096.o: Create $nentries entries with $ndups duplicates" + set ret [berkdb dbremove -env $env -auto_commit $testfile] + set db [eval {berkdb_open -pagesize $pagesize -dup -auto_commit \ + -create -env $env $omethod -mode 0644} $args $testfile] + error_check_good db_open [is_valid_db $db] TRUE + + t96_populate $db $omethod $env $nentries $ndups + + set dlist "" + for { set i 1 } {$i <= $ndups} {incr i} { + lappend dlist $i + } + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + dup_check $db $txn $t1 $dlist + error_check_good txn [$t commit] 0 + + puts "\tTest096.p: Verify off page duplicates status" + set duplicate [stat_field $db stat "Duplicate pages"] + error_check_good stat:offpage [expr $duplicate > 0] 1 + + set recs [expr $ndups * $nentries] + error_check_good dbclose [$db close] 0 + + puts "\tTest096.q: Truncate and write in a txn, then abort" + txn_truncate $env $omethod $args $testfile $recs abort 1 + + set db [eval {berkdb_open -auto_commit -env $env} $args $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + set number [number_of_entries $db $method] + error_check_good number_of_entries $number $recs + error_check_good dbclose [$db close] 0 + + puts "\tTest096.r: Truncate and write in a txn, then commit" + txn_truncate $env $omethod $args $testfile $recs commit 1 + + set db [eval {berkdb_open -auto_commit -env $env} $args {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set number [number_of_entries $db $method] + error_check_good number_of_entries $number [expr $recs / 2] + error_check_good dbclose [$db close] 0 + + puts "\tTest096.s: Check overflow pages with dups." + set ndups 3 + set db [eval {berkdb_open -create -auto_commit -pagesize 512 \ + -env $env $omethod -dup -mode 0644} $args $overflowfile] + error_check_good db_open [is_valid_db $db] TRUE + + for { set i 1 } { $i <= $noverflows } { incr i } { + for { set j 0 } { $j < $ndups } { incr j } { + set ret [eval {$db put} \ + $i [chop_data $method "$i.$j$data"]] + } + } + + # Hash reports pages of type P_OVERFLOW as "big pages", other + # access methods as "overflow pages". + if { [is_hash $method] == 1 } { + set bigpages [stat_field $db stat "Number of big pages"] + error_check_good stat:bigpages [expr $bigpages > 0] 1 + } else { + set overflow [stat_field $db stat "Overflow pages"] + error_check_good stat:overflow [expr $overflow > 0] 1 + } + + set nentries [expr $noverflows * $ndups] + error_check_good overflow_truncate [$db truncate] $nentries + error_check_good overflow_close [$db close] 0 + + set testdir [get_home $env] + error_check_good dbverify [verify_dir $testdir "\tTest096.t: " 0 0 $nodump] 0 + + if { $closeenv == 1 } { + error_check_good envclose [$env close] 0 + } + set testdir $orig_tdir +} + +proc t96_populate {db method env nentries {ndups 1}} { + source ./include.tcl + + set did [open $dict] + set count 0 + set txn "" + set pflags "" + set gflags "" + + if { [is_record_based $method] == 1 } { + append gflags "-recno" + } + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + if { $ndups > 1 } { + for { set i 1 } { $i <= $ndups } { incr i } { + set datastr $i:$str + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + set ret [eval {$db put} $txn $pflags \ + {$key [chop_data $method $datastr]}] + error_check_good put $ret 0 + error_check_good txn [$t commit] 0 + } + } else { + set datastr [reverse $str] + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $datastr]}] + error_check_good put $ret 0 + error_check_good txn [$t commit] 0 + } + set ret [eval {$db get} $gflags {$key}] + error_check_good $key:dbget [llength $ret] $ndups + incr count + } + close $did +} + +proc number_of_entries { db method } { + if { [is_record_based $method] == 1 } { + set dbc [$db cursor] + set last [$dbc get -last] + if {[llength $last] == 0} { + set number 0 + } else { + set number [lindex [lindex $last 0] 0] + } + } else { + set ret [$db get -glob *] + set number [llength $ret] + } + return $number +} + +# Open database. Truncate in a transaction, optionally with a write +# included in the transaction as well, then abort or commit. Close database. + +proc txn_truncate { env method args testfile nentries op {write 0}} { + set db [eval {berkdb_open -create -auto_commit \ + -env $env $method -mode 0644} $args $testfile] + error_check_good db_open [is_valid_db $db] TRUE + + set txn [$env txn] + error_check_good txnbegin [is_valid_txn $txn $env] TRUE + + set ret [$db truncate -txn $txn] + error_check_good dbtrunc $ret $nentries + if { $write == 1 } { + for {set i 1} {$i <= [expr $nentries / 2]} {incr i} { + set ret [eval {$db put} -txn $txn \ + {$i [chop_data $method "aaaaaaaaaa"]}] + error_check_good write $ret 0 + } + } + + error_check_good txn$op [$txn $op] 0 + error_check_good db_close [$db close] 0 +} + diff --git a/test/tcl/test097.tcl b/test/tcl/test097.tcl new file mode 100644 index 00000000..72065f8d --- /dev/null +++ b/test/tcl/test097.tcl @@ -0,0 +1,199 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test097 +# TEST Open up a large set of database files simultaneously. +# TEST Adjust for local file descriptor resource limits. +# TEST Then use the first 1000 entries from the dictionary. +# TEST Insert each with self as key and a fixed, medium length data string; +# TEST retrieve each. After all are entered, retrieve all; compare output +# TEST to original. + +proc test097 { method {ndbs 500} {nentries 400} args } { + global pad_datastr + source ./include.tcl + + set largs [convert_args $method $args] + set encargs "" + set largs [split_encargs $largs encargs] + + # Open an environment, with a 1MB cache. + set eindex [lsearch -exact $largs "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $largs $eindex] + puts "Test097: $method: skipping for env $env" + return + } + env_cleanup $testdir + set env [eval {berkdb_env -create -log_regionmax 256000 \ + -pagesize 512 -cachesize { 0 1048576 1 } -txn} \ + -home $testdir $encargs] + error_check_good dbenv [is_valid_env $env] TRUE + + if { [is_partitioned $args] == 1 } { + set ndbs [expr $ndbs / 10] + } + + # heap opens 2 other files under the covers, so limit dbs for heap + if { [is_heap $method] == 1 } { + if { $ndbs > 325 } { + set ndbs 325 + } + } + + # Create the database and open the dictionary + set basename test097 + set t1 $testdir/t1 + set t2 $testdir/t2 + set t3 $testdir/t3 + # + # When running with HAVE_MUTEX_SYSTEM_RESOURCES, + # we can run out of mutex lock slots due to the nature of this test. + # So, for this test, increase the number of pages per extent + # to consume fewer resources. + # + if { [is_queueext $method] } { + set numdb [expr $ndbs / 4] + set eindex [lsearch -exact $largs "-extent"] + error_check_bad extent $eindex -1 + incr eindex + set extval [lindex $largs $eindex] + set extval [expr $extval * 4] + set largs [lreplace $largs $eindex $eindex $extval] + } + puts -nonewline "Test097: $method ($largs) " + puts "$nentries entries in at most $ndbs simultaneous databases" + + puts "\tTest097.a: Simultaneous open" + set numdb [test097_open tdb $ndbs $method $env $basename $largs] + if { $numdb == 0 } { + puts "\tTest097: Insufficient resources available -- skipping." + error_check_good envclose [$env close] 0 + return + } + + set did [open $dict] + + set pflags "" + set gflags "" + set txn "" + set count 0 + + # Here is the loop where we put and get each key/data pair + if { [is_record_based $method] == 1 } { + append gflags "-recno" + } + puts "\tTest097.b: put/get on $numdb databases" + set datastr "abcdefghij" + set pad_datastr [pad_data $method $datastr] + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + for { set i 1 } { $i <= $numdb } { incr i } { + set ret [eval {$tdb($i) put} $txn $pflags \ + {$key [chop_data $method $datastr]}] + error_check_good put $ret 0 + set ret [eval {$tdb($i) get} $gflags {$key}] + error_check_good get $ret [list [list $key \ + [pad_data $method $datastr]]] + } + incr count + } + close $did + + # Now we will get each key from the DB and compare the results + # to the original. + puts "\tTest097.c: dump and check files" + for { set j 1 } { $j <= $numdb } { incr j } { + dump_file $tdb($j) $txn $t1 test097.check + error_check_good db_close [$tdb($j) close] 0 + + # Now compare the keys to see if they match the dictionary + if { [is_record_based $method] == 1 } { + set oid [open $t2 w] + for {set i 1} {$i <= $nentries} {set i [incr i]} { + puts $oid $i + } + close $oid + filesort $t2 $t3 + file rename -force $t3 $t2 + } else { + set q q + filehead $nentries $dict $t3 + filesort $t3 $t2 + } + filesort $t1 $t3 + + error_check_good Test097:diff($t3,$t2) [filecmp $t3 $t2] 0 + } + error_check_good envclose [$env close] 0 +} + +# Check function for test097; data should be fixed are identical +proc test097.check { key data } { + global pad_datastr + error_check_good "data mismatch for key $key" $data $pad_datastr +} + +proc test097_open { tdb ndbs method env basename largs } { + global errorCode + upvar $tdb db + + set j 0 + set numdb $ndbs + if { [is_queueext $method] } { + set numdb [expr $ndbs / 4] + } + set omethod [convert_method $method] + for { set i 1 } {$i <= $numdb } { incr i } { + set stat [catch {eval {berkdb_open -env $env \ + -pagesize 512 -create -mode 0644} \ + $largs {$omethod $basename.$i.db}} db($i)] + # + # Check if we've reached our limit + # + if { $stat == 1 } { + set min 20 + set em [is_substr $errorCode EMFILE] + set en [is_substr $errorCode ENFILE] + error_check_good open_ret [expr $em || $en] 1 + puts \ + "\tTest097.a.1 Encountered resource limits opening $i files, adjusting" + if { [is_queueext $method] } { + set end [expr $j / 4] + set min 10 + } else { + set end [expr $j - 10] + } + # + # If we cannot open even $min files, then this test is + # not very useful. Close up shop and go back. + # + if { $end < $min } { + test097_close db 1 $j + return 0 + } + test097_close db [expr $end + 1] $j + return $end + } else { + error_check_good dbopen [is_valid_db $db($i)] TRUE + set j $i + } + } + return $j +} + +proc test097_close { tdb start end } { + upvar $tdb db + + for { set i $start } { $i <= $end } { incr i } { + error_check_good db($i)close [$db($i) close] 0 + } +} diff --git a/test/tcl/test098.tcl b/test/tcl/test098.tcl new file mode 100644 index 00000000..3dfa8668 --- /dev/null +++ b/test/tcl/test098.tcl @@ -0,0 +1,85 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2002, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test098 +# TEST Test of DB_GET_RECNO and secondary indices. Open a primary and +# TEST a secondary, and do a normal cursor get followed by a get_recno. +# TEST (This is a smoke test for "Bug #1" in [#5811].) + +proc test098 { method args } { + source ./include.tcl + + set omethod [convert_method $method] + set args [convert_args $method $args] + + puts "Test098: $omethod ($args): DB_GET_RECNO and secondary indices." + + if { [is_rbtree $method] != 1 } { + puts "\tTest098: Skipping for method $method." + return + } + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + set txn "" + set auto "" + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set base $testdir/test098 + set env NULL + } else { + set base test098 + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + set auto " -auto_commit " + } + set testdir [get_home $env] + } + cleanup $testdir $env + + puts "\tTest098.a: Set up databases." + + set adb [eval {berkdb_open} $omethod $args $auto \ + {-create} $base-primary.db] + error_check_good adb_create [is_valid_db $adb] TRUE + + set bdb [eval {berkdb_open} $omethod $args $auto \ + {-create} $base-secondary.db] + error_check_good bdb_create [is_valid_db $bdb] TRUE + + set ret [eval $adb associate $auto [callback_n 0] $bdb] + error_check_good associate $ret 0 + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$adb put} $txn aaa data1] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + set bc [$bdb cursor] + error_check_good cursor [is_valid_cursor $bc $bdb] TRUE + + puts "\tTest098.b: c_get(DB_FIRST) on the secondary." + error_check_good get_first [$bc get -first] \ + [list [list [[callback_n 0] aaa data1] data1]] + + puts "\tTest098.c: c_get(DB_GET_RECNO) on the secondary." + error_check_good get_recno [$bc get -get_recno] 1 + + error_check_good c_close [$bc close] 0 + + error_check_good bdb_close [$bdb close] 0 + error_check_good adb_close [$adb close] 0 +} diff --git a/test/tcl/test099.tcl b/test/tcl/test099.tcl new file mode 100644 index 00000000..d2053af4 --- /dev/null +++ b/test/tcl/test099.tcl @@ -0,0 +1,275 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test099 +# TEST +# TEST Test of DB->get and DBC->c_get with set_recno and get_recno. +# TEST +# TEST Populate a small btree -recnum database. +# TEST After all are entered, retrieve each using -recno with DB->get. +# TEST Open a cursor and do the same for DBC->c_get with set_recno. +# TEST Verify that set_recno sets the record number position properly. +# TEST Verify that get_recno returns the correct record numbers. +# TEST +# TEST Using the same database, open 3 cursors and position one at +# TEST the beginning, one in the middle, and one at the end. Delete +# TEST by cursor and check that record renumbering is done properly. +# +proc test099 { method {nentries 10000} args } { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test099: Test of set_recno and get_recno in DBC->c_get." + if { [is_rbtree $method] != 1 } { + puts "Test099: skipping for method $method." + return + } + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test099.db + set env NULL + } else { + set testfile test099.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + set t1 $testdir/t1 + cleanup $testdir $env + + # Create the database and open the dictionary + set db [eval {berkdb_open \ + -create -mode 0644} $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + set did [open $dict] + + set pflags "" + set gflags "" + set txn "" + set count 1 + + append gflags " -recno" + + puts "\tTest099.a: put loop" + # Here is the loop where we put each key/data pair + while { [gets $did str] != -1 && $count <= $nentries } { + set key $str + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set r [eval {$db put} \ + $txn $pflags {$key [chop_data $method $str]}] + error_check_good db_put $r 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + close $did + + puts "\tTest099.b: dump file" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 test099.check + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + + puts "\tTest099.c: Test set_recno then get_recno" + set db [eval {berkdb_open -rdonly} $args $omethod $testfile ] + error_check_good dbopen [is_valid_db $db] TRUE + + # Open a cursor + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + set did [open $t1] + set recno 1 + + # Create key(recno) array to use for later comparison + while { [gets $did str] != -1 } { + set kvals($recno) $str + incr recno + } + + set recno 1 + set ret [$dbc get -first] + error_check_bad dbc_get_first [llength $ret] 0 + + # First walk forward through the database .... + while { $recno < $count } { + # Test set_recno: verify it sets the record number properly. + set current [$dbc get -current] + set r [$dbc get -set_recno $recno] + error_check_good set_recno $current $r + # Test set_recno: verify that we find the expected key + # at the current record number position. + set k [lindex [lindex $r 0] 0] + error_check_good set_recno $kvals($recno) $k + + # Test get_recno: verify that the return from + # get_recno matches the record number just set. + set g [$dbc get -get_recno] + error_check_good get_recno $recno $g + set ret [$dbc get -next] + incr recno + } + + # ... and then backward. + set recno [expr $count - 1] + while { $recno > 0 } { + # Test set_recno: verify that we find the expected key + # at the current record number position. + set r [$dbc get -set_recno $recno] + set k [lindex [lindex $r 0] 0] + error_check_good set_recno $kvals($recno) $k + + # Test get_recno: verify that the return from + # get_recno matches the record number just set. + set g [$dbc get -get_recno] + error_check_good get_recno $recno $g + set recno [expr $recno - 1] + } + + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 + close $did + + puts "\tTest099.d: Test record renumbering with cursor deletes." + # Reopen the database, this time with write permission. + set db [eval {berkdb_open} $args $omethod $testfile ] + error_check_good dbopen [is_valid_db $db] TRUE + + # Open three cursors. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc0 [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc0 $db] TRUE + set dbc1 [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc1 $db] TRUE + set dbc2 [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc2 $db] TRUE + + # Initialize cursor positions. Set dbc0 at the beginning, + # dbc1 at the middle, and dbc2 at the end. + set ret [$dbc0 get -first] + error_check_bad dbc0_get_first [llength $ret] 0 + + set middle [expr $nentries / 2 + 1] + set ret [$dbc1 get -set_recno $middle] + error_check_bad dbc1_get_middle [llength $ret] 0 + + set ret [$dbc2 get -last] + error_check_bad dbc2_get_last [llength $ret] 0 + + # At each iteration, delete the first entry, delete the middle + # entry, and check the record number for beginning, middle and end. + set count 1 + while { $count <= [expr $nentries / 2] } { + # Delete first item. + error_check_good dbc0_del [$dbc0 del] 0 + + # For non-txn env's, check that db_stat is recalculating + # to adjust for items marked for deletion. We can't do this + # in txn env's because the live txn will cause deadlock. + if { $txnenv == 0 } { + set nkeys [expr $nentries - [expr $count * 2] + 1] + set stat [$db stat] + error_check_good keys_after_delete [is_substr $stat \ + "{Number of keys} $nkeys"] 1 + error_check_good records_after_delete [is_substr $stat \ + "{Number of records} $nkeys"] 1 + + # Now delete the same entry again (which should not + # change the database) and make sure db->stat returns + # the same number of keys and records as before. + catch {[$dbc0 del]} result + + set stat [$db stat] + error_check_good keys_after_baddelete [is_substr $stat \ + "{Number of keys} $nkeys"] 1 + error_check_good recs_after_baddelete [is_substr $stat \ + "{Number of records} $nkeys"] 1 + } + + # Reposition cursor to new first item, check that record number + # is 1. + set ret0 [$dbc0 get -next] + error_check_good beginning_recno [$dbc0 get -get_recno] 1 + + # Calculate the current middle recno and compare to actual. + set middle [$dbc1 get -get_recno] + set calcmiddle [expr [expr $nentries / 2] - $count + 1] + error_check_good middle_recno $middle $calcmiddle + + # Delete middle item, reposition cursor to next item. + error_check_good dbc1_del [$dbc1 del] 0 + set ret1 [$dbc1 get -next] + + # Calculate the expected end recno and compare to actual. + set end [$dbc2 get -get_recno] + set calcend [expr $nentries - [expr $count * 2]] + # On the last iteration, all items have been deleted so + # there is no recno. + if { $calcend == 0 } { + error_check_good end_recno $end "" + } else { + error_check_good end_recno $end $calcend + } + incr count + } + + # Close all three cursors. + error_check_good cursor_close [$dbc0 close] 0 + error_check_good cursor_close [$dbc1 close] 0 + error_check_good cursor_close [$dbc2 close] 0 + + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} + +# Check function for dumped file; data should be fixed are identical +proc test099.check { key data } { + error_check_good "data mismatch for key $key" $key $data +} diff --git a/test/tcl/test100.tcl b/test/tcl/test100.tcl new file mode 100644 index 00000000..e59c1c1b --- /dev/null +++ b/test/tcl/test100.tcl @@ -0,0 +1,16 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test100 +# TEST Test for functionality near the end of the queue +# TEST using test025 (DB_APPEND). +proc test100 { method {nentries 10000} {tnum "100"} args} { + if { [is_queueext $method ] == 0 } { + puts "Skipping test$tnum for $method." + return; + } + eval {test025 $method $nentries 4294967000 $tnum} $args +} diff --git a/test/tcl/test101.tcl b/test/tcl/test101.tcl new file mode 100644 index 00000000..d52158c0 --- /dev/null +++ b/test/tcl/test101.tcl @@ -0,0 +1,16 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test101 +# TEST Test for functionality near the end of the queue +# TEST using test070 (DB_CONSUME). +proc test101 { method {nentries 1000} {txn -txn} {tnum "101"} args} { + if { [is_queueext $method ] == 0 } { + puts "Skipping test$tnum for $method." + return; + } + eval {test070 $method 4 2 $nentries WAIT 4294967000 $txn $tnum} $args +} diff --git a/test/tcl/test102.tcl b/test/tcl/test102.tcl new file mode 100644 index 00000000..faa3f0b9 --- /dev/null +++ b/test/tcl/test102.tcl @@ -0,0 +1,235 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test102 +# TEST Bulk get test for record-based methods. [#2934] +proc test102 { method {nsets 1000} {tnum "102"} args } { + source ./include.tcl + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_rbtree $method] == 1 || + [is_heap $method] == 1 || [is_record_based $method] == 0} { + puts "Test$tnum skipping for method $method" + return + } + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set basename $testdir/test$tnum + set env NULL + # If we've our own env, no reason to swap--this isn't + # an mpool test. + set carg { -cachesize {0 25000000 0} } + } else { + set basename test$tnum + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + puts "Skipping for environment with txns" + return + } + set testdir [get_home $env] + set carg {} + } + cleanup $testdir $env + + puts "Test$tnum: $method ($args) Bulk get test" + + # Open and populate the database. + puts "\tTest$tnum.a: Creating $method database\ + with $nsets entries." + set dargs "$carg $args" + set testfile $basename.db + set db [eval {berkdb_open_noerr -create} $omethod $dargs $testfile] + error_check_good db_open [is_valid_db $db] TRUE + t102_populate $db $method $nsets $txnenv 0 + + # Determine the pagesize so we can use it to size the buffer. + set stat [$db stat] + set pagesize [get_pagesize $stat] + + # Run get tests. The gettest should succeed as long as + # the buffer is at least as large as the page size. Test for + # failure of a small buffer unless the page size is so small + # we can't define a smaller buffer (buffers must be multiples + # of 1024). A "big buffer" should succeed in all cases because + # we define it to be larger than 65536, the largest page + # currently allowed. + set maxpage [expr 1024 * 64] + set bigbuf [expr $maxpage + 1024] + set smallbuf 1024 + + # Run regular db->get tests. + if { $pagesize > 1024 } { + t102_gettest $db $tnum b $smallbuf 1 + } else { + puts "Skipping Test$tnum.b for small pagesize." + } + t102_gettest $db $tnum c $bigbuf 0 + + # Run cursor get tests. + if { $pagesize > 1024 } { + t102_gettest $db $tnum d $smallbuf 1 + } else { + puts "Skipping Test$tnum.b for small pagesize." + } + t102_cgettest $db $tnum e $bigbuf 0 + + if { [is_fixed_length $method] == 1 } { + puts "Skipping overflow tests for fixed-length method $omethod." + } else { + + # Set up for overflow tests + puts "\tTest$tnum.f: Growing database with overflow sets" + t102_populate $db $method [expr $nsets / 100] $txnenv 10000 + + # Run overflow get tests. Test should fail for overflow pages + # with our standard big buffer but succeed at twice that size. + t102_gettest $db $tnum g $bigbuf 1 + t102_gettest $db $tnum h [expr $bigbuf * 2] 0 + + # Run overflow cursor get tests. Test will fail for overflow + # pages with 8K buffer but succeed with a large buffer. + t102_cgettest $db $tnum i 8192 1 + t102_cgettest $db $tnum j $bigbuf 0 + } + error_check_good db_close [$db close] 0 +} + +proc t102_gettest { db tnum letter bufsize expectfail } { + t102_gettest_body $db $tnum $letter $bufsize $expectfail 0 +} +proc t102_cgettest { db tnum letter bufsize expectfail } { + t102_gettest_body $db $tnum $letter $bufsize $expectfail 1 +} + +# Basic get test +proc t102_gettest_body { db tnum letter bufsize expectfail usecursor } { + global errorCode + + foreach flag { multi multi_key } { + if { $usecursor == 0 } { + if { $flag == "multi_key" } { + # db->get does not allow multi_key + continue + } else { + set action "db get -$flag" + } + } else { + set action "dbc get -$flag -set/-next" + } + puts "\tTest$tnum.$letter: $action with bufsize $bufsize" + + set allpassed TRUE + set saved_err "" + + # Cursor for $usecursor. + if { $usecursor != 0 } { + set getcurs [$db cursor] + error_check_good \ + getcurs [is_valid_cursor $getcurs $db] TRUE + } + + # Traverse DB with cursor; do get/c_get($flag) on each item. + set dbc [$db cursor] + error_check_good is_valid_dbc [is_valid_cursor $dbc $db] TRUE + for { set dbt [$dbc get -first] } { [llength $dbt] != 0 } \ + { set dbt [$dbc get -next] } { + set key [lindex [lindex $dbt 0] 0] + set datum [lindex [lindex $dbt 0] 1] + + if { $usecursor == 0 } { + set ret [catch \ + {eval $db get -$flag $bufsize $key} res] + } else { + set res {} + for { set ret [catch {eval $getcurs get\ + -$flag $bufsize -set $key} tres] } \ + { $ret == 0 && [llength $tres] != 0 } \ + { set ret [catch {eval $getcurs get\ + -$flag $bufsize -next} tres]} { + eval lappend res $tres + } + } + + # If we expect a failure, be more tolerant if the above + # fails; just make sure it's a DB_BUFFER_SMALL or an + # EINVAL (if the buffer is smaller than the pagesize, + # it's EINVAL), mark it, and move along. + if { $expectfail != 0 && $ret != 0 } { + if { [is_substr $errorCode DB_BUFFER_SMALL] != 1 && \ + [is_substr $errorCode EINVAL] != 1 } { + error_check_good \ + "$flag failure errcode" \ + $errorCode "DB_BUFFER_SMALL or EINVAL" + } + set allpassed FALSE + continue + } + error_check_good "get_$flag ($key)" $ret 0 + } + + if { $expectfail == 1 } { + error_check_good allpassed $allpassed FALSE + puts "\t\tTest$tnum.$letter:\ + returned at least one DB_BUFFER_SMALL (as expected)" + } else { + error_check_good allpassed $allpassed TRUE + puts "\t\tTest$tnum.$letter: succeeded (as expected)" + } + + error_check_good dbc_close [$dbc close] 0 + if { $usecursor != 0 } { + error_check_good getcurs_close [$getcurs close] 0 + } + } +} + +proc t102_populate { db method nentries txnenv pad_bytes } { + source ./include.tcl + + set did [open $dict] + set count 0 + set txn "" + set pflags "" + set gflags " -recno " + + while { [gets $did str] != -1 && $count < $nentries } { + set key [expr $count + 1] + set datastr $str + # Create overflow pages only if method is not fixed-length. + if { [is_fixed_length $method] == 0 } { + append datastr [repeat "a" $pad_bytes] + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} \ + $txn $pflags {$key [chop_data $method $datastr]}] + error_check_good put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + set ret [eval {$db get} $gflags {$key}] + error_check_good $key:dbget [llength $ret] 1 + incr count + } + close $did + + # This will make debugging easier, and since the database is + # read-only from here out, it's cheap. + error_check_good db_sync [$db sync] 0 +} + diff --git a/test/tcl/test103.tcl b/test/tcl/test103.tcl new file mode 100644 index 00000000..28a8ef0b --- /dev/null +++ b/test/tcl/test103.tcl @@ -0,0 +1,222 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test103 +# TEST Test bulk get when record numbers wrap around. +# TEST +# TEST Load database with items starting before and ending after +# TEST the record number wrap around point. Run bulk gets (-multi_key) +# TEST with various buffer sizes and verify the contents returned match +# TEST the results from a regular cursor get. +# TEST +# TEST Then delete items to create a sparse database and make sure it +# TEST still works. Test both -multi and -multi_key since they behave +# TEST differently. +proc test103 { method {nentries 100} {start 4294967250} {tnum "103"} args} { + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + puts "Test$tnum: $method ($args) Test of bulk get with wraparound." + + if { [is_queueext $method] == 0 } { + puts "\tSkipping Test$tnum for method $method." + return + } + + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + + cleanup $testdir $env + + set db [eval {berkdb_open_noerr \ + -create -mode 0644} $args $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Find the pagesize so we can use it to size the buffer. + set stat [$db stat] + set pagesize [get_pagesize $stat] + + set did [open $dict] + + puts "\tTest$tnum.a: put/get loop" + set txn "" + + # Here is the loop where we put each key/data pair + set count 0 + set k [expr $start + 1] + while { [gets $did str] != -1 && $count < $nentries } { + # + # We cannot use 'incr' because it gets unhappy since + # expr above is using 64-bits. + set k [expr $k + 1] + # + # Detect if we're more than 32 bits now. If so, wrap + # our key back to 1. + # + if { [expr $k > 0xffffffff] } { + set k 1 + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set ret [eval {$db put} $txn {$k [chop_data $method $str]}] + error_check_good db_put $ret 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + incr count + } + close $did + + # Run tests in verbose mode for debugging. + set verbose 0 + + puts "\tTest$tnum.b: Bulk get with large buffer (retrieves all data)." + # Buffer is large enough that everything fits in a single get. + check_multi_recno $db [expr $pagesize * $nentries] multi_key $verbose + + puts "\tTest$tnum.c: Bulk get with buffer = (2 x pagesize)." + # Buffer gets several items at a get, but not all. + check_multi_recno $db [expr $pagesize * 2] multi_key $verbose + + # Skip tests if buffer would be smaller than allowed. + if { $pagesize >= 1024 } { + puts "\tTest$tnum.d: Bulk get with buffer = pagesize." + check_multi_recno $db $pagesize multi_key $verbose + } + + if { $pagesize >= 2048 } { + puts "\tTest$tnum.e: Bulk get with buffer < pagesize\ + (returns EINVAL)." + catch { + check_multi_recno $db [expr $pagesize / 2] \ + multi_key $verbose + } res + error_check_good \ + bufsize_less_than_pagesize [is_substr $res "invalid"] 1 + } + + # For a sparsely populated database, test with both -multi_key and + # -multi. In any sort of record numbered database, -multi does not + # return keys, so it returns all items. -multi_key returns both keys + # and data so it skips deleted items. + puts "\tTest$tnum.f: Delete every 10th item to create sparse database." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set curs [ eval {$db cursor} $txn] + error_check_good cursor [is_valid_cursor $curs $db] TRUE + + set count 0 + for { set kd [$curs get -first] } { $count < $nentries } \ + { set kd [$curs get -next] } { + if { [expr $count % 10 == 0] } { + error_check_good cdelete [$curs del] 0 + } + incr count + } + error_check_good curs_close [$curs close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + puts "\tTest$tnum.g: Sparse database, large buffer, multi_key." + check_multi_recno $db [expr $pagesize * $nentries] multi_key $verbose + puts "\tTest$tnum.h: Sparse database, large buffer, multi." + check_multi_recno $db [expr $pagesize * $nentries] multi $verbose + + puts "\tTest$tnum.i: \ + Sparse database, buffer = (2 x pagesize), multi_key." + check_multi_recno $db [expr $pagesize * 2] multi_key $verbose + puts "\tTest$tnum.j: Sparse database, buffer = (2 x pagesize), multi." + check_multi_recno $db [expr $pagesize * 2] multi $verbose + + if { $pagesize >= 1024 } { + puts "\tTest$tnum.k: \ + Sparse database, buffer = pagesize, multi_key." + check_multi_recno $db $pagesize multi_key $verbose + puts "\tTest$tnum.k: Sparse database, buffer = pagesize, multi." + check_multi_recno $db $pagesize multi $verbose + } + + error_check_good db_close [$db close] 0 +} + +# The proc check_multi_recno is a modification of the utility routine +# check_multi_key specifically for recno methods. We use this instead +# check_multi, even with the -multi flag, because the check_multi utility +# assumes that dups are being used which can't happen with record-based +# methods. +proc check_multi_recno { db size flag {verbose 0}} { + source ./include.tcl + set c [eval { $db cursor} ] + set m [eval { $db cursor} ] + + set j 1 + + # Walk the database with -multi_key or -multi bulk get. + for {set d [$m get -first -$flag $size] } { [llength $d] != 0 } { + set d [$m get -next -$flag $size] } { + if {$verbose == 1 } { + puts "FETCH $j" + incr j + } + # For each bulk get return, compare the results to what we + # get by walking the db with an ordinary cursor get. + for {set i 0} { $i < [llength $d] } { incr i } { + set kd [lindex $d $i] + set k [lindex $kd 0] + set data [lindex $kd 1] + set len [string length $data] + + if {$verbose == 1 } { + puts ">> $k << >> $len << " + } + # If we hit a deleted item in -multi, skip over it. + if { $flag == "multi" && $len == 0 } { + continue + } + + set check [$c get -next] + set cd [lindex $check 0] + set ck [lindex $cd 0] + set cdata [lindex $cd 1] + + error_check_good key $k $ck + error_check_good data_len $len [string length $cdata] + error_check_good data $data $cdata + } + } +} diff --git a/test/tcl/test106.tcl b/test/tcl/test106.tcl new file mode 100644 index 00000000..b78d3168 --- /dev/null +++ b/test/tcl/test106.tcl @@ -0,0 +1,113 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test106 +# TEST +# TEST +# TEST +proc test106 { method {nitems 100} {niter 200} {tnum "106"} args } { + source ./include.tcl + global dict + global rand_init + + # Set random seed for use in t106script procs op2 and create_data. + error_check_good set_random_seed [berkdb srand $rand_init] 0 + + set args [convert_args $method $args] + set omethod [convert_method $method] + set pageargs "" + split_pageargs $args pageargs + + if { [is_btree $method] != 1 } { + puts "\tTest$tnum: Skipping for method $method." + return + } + + # Skip for specified pagesizes. This test runs at the native + # pagesize. (For SR #7964 testing, we may need to force + # to 8192.) + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test$tnum: Skipping for specific pagesizes" + return + } + + # This test needs a txn-enabled environment. If one is not + # provided, create it. + # + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + set env \ + [eval {berkdb_env -create -home $testdir -txn} $pageargs] + } else { + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv != 1 } { + puts "Skipping test$tnum for non-txn environment." + return + } + set testdir [get_home $env] + } + + cleanup $testdir $env + + # The bulk of the work of this test is done in t106script.tcl. + # Here we kick off one consumer, then five producers, then sit + # back and wait for them to finish. + foreach order { ordered random } { + set nproducers 5 + + puts "\tTest$tnum.a: Start deadlock detector ($order)." + set dpid [exec $util_path/db_deadlock -a o -v -t 5\ + -h $testdir >& $testdir/dd.out &] + + puts "\tTest$tnum.b: Start consumer process ($order)." + sentinel_init + set pidlist {} + set cpid [exec $tclsh_path $test_path/wrap.tcl t106script.tcl \ + $testdir/t106script.log.cons.$order.1 $testdir WAIT \ + 0 $nproducers $testdir/CONSUMERLOG 1 $tnum $order $niter \ + $args &] + lappend pidlist $cpid + + puts "\tTest$tnum.c: Initialize producers ($order)." + for { set p 1 } { $p <= $nproducers } { incr p } { + set ppid [exec $tclsh_path $test_path/wrap.tcl \ + t106script.tcl \ + $testdir/t106script.log.init.$order.$p \ + $testdir INITIAL $nitems $nproducers \ + $testdir/INITLOG.$p $p $tnum \ + $order $niter $args &] + lappend pidlist $ppid + } + + # Wait for all producers to be initialized before continuing + # to the RMW portion of the test. + watch_procs $pidlist 10 + + sentinel_init + set pidlist {} + puts "\tTest$tnum.d: Run producers in RMW mode ($order)." + for { set p 1 } { $p <= $nproducers } { incr p } { + set ppid [exec $tclsh_path $test_path/wrap.tcl \ + t106script.tcl \ + $testdir/t106script.log.prod.$order.$p \ + $testdir PRODUCE $nitems $nproducers \ + $testdir/PRODUCERLOG.$p $p $tnum \ + $order $niter $args &] + lappend pidlist $ppid + } + + watch_procs $pidlist 10 + tclkill $dpid + } + + # If this test created the env, close it. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + } +} diff --git a/test/tcl/test107.tcl b/test/tcl/test107.tcl new file mode 100644 index 00000000..b7b3a9f0 --- /dev/null +++ b/test/tcl/test107.tcl @@ -0,0 +1,168 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test107 +# TEST Test of read-committed (degree 2 isolation). [#8689] +# TEST +# TEST We set up a database. Open a read-committed transactional cursor and +# TEST a regular transactional cursor on it. Position each cursor on one page, +# TEST and do a put to a different page. +# TEST +# TEST Make sure that: +# TEST - the put succeeds if we are using degree 2 isolation. +# TEST - the put deadlocks within a regular transaction with +# TEST a regular cursor. +# TEST +proc test107 { method args } { + source ./include.tcl + global fixed_len + global passwd + set tnum "107" + + set pageargs "" + split_pageargs $args pageargs + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Test$tnum skipping for env $env" + return + } + + # We'll make the data pretty good sized so we can easily + # move to a different page. Make the data size a little + # smaller for fixed-length methods so it works with + # pagesize 512 tests. + set data_size 512 + set orig_fixed_len $fixed_len + set fixed_len [expr $data_size - [expr $data_size / 8]] + set args [convert_args $method $args] + set encargs "" + set ddargs "" + set args [split_encargs $args encargs] + if { $encargs != "" } { + set ddargs " -P $passwd " + } + set omethod [convert_method $method] + + puts "Test$tnum: Degree 2 Isolation Test ($method $args)" + set testfile test$tnum.db + env_cleanup $testdir + + # Create the environment. + set timeout 10 + set env [eval {berkdb_env -create -mode 0644 -lock \ + -cachesize { 0 1048576 1 } \ + -lock_timeout $timeout -txn} $pageargs $encargs -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + + # Create the database. + set db [eval {berkdb_open -env $env -create -auto_commit\ + -mode 0644 $omethod} $args {$testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + puts "\tTest$tnum.a: put loop" + # The data doesn't need to change from key to key. + # Use numerical keys so we don't need special handling for + # record-based methods. + set origdata "data" + set len [string length $origdata] + set data [repeat $origdata [expr $data_size / $len]] + set nentries 200 + set txn [$env txn] + for { set i 1 } { $i <= $nentries } { incr i } { + set key $i + set ret [eval {$db put} \ + -txn $txn {$key [chop_data $method $data]}] + error_check_good put:$db $ret 0 + } + error_check_good txn_commit [$txn commit] 0 + + puts "\tTest$tnum.b: Start deadlock detector." + # Start up a deadlock detector so we can break self-deadlocks. + set dpid [eval {exec $util_path/db_deadlock} -v -ae -t 1.0 \ + -h $testdir $ddargs >& $testdir/dd.out &] + + puts "\tTest$tnum.c: Open txns and cursors." + # We can get degree 2 isolation with either a degree 2 + # txn or a degree 2 cursor or both. However, the case + # of a regular txn and regular cursor should deadlock. + # We put this case last so it won't deadlock the cases + # which should succeed. + # + # Cursors and transactions are named according to + # whether they specify degree 2 (c2, t2) or not (c, t). + # Set up all four possibilities. + # + set t [$env txn] + error_check_good reg_txn_begin [is_valid_txn $t $env] TRUE + set t2 [$env txn -read_committed] + error_check_good deg2_txn_begin [is_valid_txn $t2 $env] TRUE + + set c2t [$db cursor -txn $t -read_committed] + error_check_good valid_c2t [is_valid_cursor $c2t $db] TRUE + set ct2 [$db cursor -txn $t2] + error_check_good valid_ct2 [is_valid_cursor $ct2 $db] TRUE + set c2t2 [$db cursor -txn $t2 -read_committed] + error_check_good valid_c2t2 [is_valid_cursor $c2t2 $db] TRUE + set ct [$db cursor -txn $t] + error_check_good valid_ct [is_valid_cursor $ct $db] TRUE + + set curslist [list $c2t $ct2 $c2t2 $ct] + set newdata newdata + set offpagekey [expr $nentries - 1] + + # For one cursor at a time, read the first item in the + # database, then move to an item on a different page. + # Put a new value in the first item on the first page. This + # should work with degree 2 isolation and hang without it. + # + # Wrap the whole thing in a catch statement so we still get + # around to killing the deadlock detector and cleaning up + # even if the test fails. + # + puts "\tTest$tnum.d: Test for read-committed (degree 2 isolation)." + set status [catch { + foreach cursor $curslist { + set retfirst [$cursor get -first] + set firstkey [lindex [lindex $retfirst 0] 0] + set ret [$cursor get -set $offpagekey] + error_check_good cursor_off_page \ + [lindex [lindex $ret 0] 0] $offpagekey + if { [catch {eval {$db put} \ + $firstkey [chop_data $method $newdata]} res]} { + error_check_good error_is_deadlock \ + [is_substr $res DB_LOCK_DEADLOCK] 1 + error_check_good right_cursor_failed $cursor $ct + } else { + set ret [lindex [lindex [$db get $firstkey] 0] 1] + error_check_good data_changed \ + $ret [pad_data $method $newdata] + error_check_bad right_cursor_succeeded $cursor $ct + } + error_check_good close_cursor [$cursor close] 0 + } + } res] + if { $status != 0 } { + puts $res + } + + # Smoke test for db_stat -txn -read_committed. + puts "\tTest$tnum.e: Smoke test for db_stat -txn -read_committed" + if { [catch {set statret [$db stat -txn $t -read_committed]} res] } { + puts "FAIL: db_stat -txn -read_committed returned $res" + } + + # End deadlock detection and clean up handles + puts "\tTest$tnum.f: Clean up." + tclkill $dpid + set fixed_len $orig_fixed_len + error_check_good t_commit [$t commit] 0 + error_check_good t2_commit [$t2 commit] 0 + error_check_good dbclose [$db close] 0 + error_check_good envclose [$env close] 0 +} diff --git a/test/tcl/test109.tcl b/test/tcl/test109.tcl new file mode 100644 index 00000000..1808ce93 --- /dev/null +++ b/test/tcl/test109.tcl @@ -0,0 +1,321 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test109 +# TEST +# TEST Test of sequences. +proc test109 { method {tnum "109"} args } { + source ./include.tcl + global rand_init + global fixed_len + global errorCode + + set eindex [lsearch -exact $args "-env"] + set txnenv 0 + set sargs " -thread " + + if { [is_partitioned $args] == 1 } { + puts "Test109 skipping for partitioned $method" + return + } + if { [is_heap $method] } { + puts "Test109 skipping for method $method." + return + } + + if { $eindex == -1 } { + set env NULL + } else { + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + # Fixed_len must be increased from the default to + # accommodate fixed-record length methods. + set orig_fixed_len $fixed_len + set fixed_len 128 + set args [convert_args $method $args] + set omethod [convert_method $method] + error_check_good random_seed [berkdb srand $rand_init] 0 + + # Test with in-memory dbs, regular dbs, and subdbs. + foreach filetype { subdb regular in-memory } { + puts "Test$tnum: $method ($args) Test of sequences ($filetype)." + + # Skip impossible combinations. + if { $filetype == "subdb" && [is_queue $method] } { + puts "Skipping $filetype test for method $method." + continue + } + if { $filetype == "in-memory" && [is_queueext $method] } { + puts "Skipping $filetype test for method $method." + continue + } + + # Reinitialize file name for each file type, then adjust. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + } else { + set testfile test$tnum.db + set testdir [get_home $env] + } + if { $filetype == "subdb" } { + lappend testfile SUBDB + } + if { $filetype == "in-memory" } { + set testfile "" + } + + cleanup $testdir $env + + # Make the key numeric so we can test record-based methods. + set key 1 + + # Open a noerr db, since we expect errors. + set db [eval {berkdb_open_noerr \ + -create -mode 0644} $args $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + puts "\tTest$tnum.a: Max must be greater than min." + set errorCode NONE + catch {set seq [eval {berkdb sequence} -create $sargs \ + -init 0 -min 100 -max 0 $db $key]} res + error_check_good max>min [is_substr $errorCode EINVAL] 1 + + puts "\tTest$tnum.b: Init can't be out of the min-max range." + set errorCode NONE + catch {set seq [eval {berkdb sequence} -create $sargs \ + -init 101 -min 0 -max 100 $db $key]} res + error_check_good init [is_substr $errorCode EINVAL] 1 + + # Test increment and decrement. + set min 0 + set max 100 + foreach { init inc } { $min -inc $max -dec } { + puts "\tTest$tnum.c: Test for overflow error with $inc." + test_sequence $env $db $key $min $max $init $inc + } + + # Test cachesize without wrap. Make sure to test both + # cachesizes that evenly divide the number of items in the + # sequence, and that leave unused elements at the end. + set min 0 + set max 99 + set init 1 + set cachesizes [list 2 7 11] + foreach csize $cachesizes { + foreach inc { -inc -dec } { + puts "\tTest$tnum.d:\ + -cachesize $csize, $inc, no wrap." + test_sequence $env $db $key \ + $min $max $init $inc $csize + } + } + error_check_good db_close [$db close] 0 + + # Open a regular db; we expect success on the rest of the tests. + set db [eval {berkdb_open \ + -create -mode 0644} $args $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Test increment and decrement with wrap. Cross from negative + # to positive integers. + set min -50 + set max 99 + set wrap "-wrap" + set csize 1 + foreach { init inc } { $min -inc $max -dec } { + puts "\tTest$tnum.e: Test wrapping with $inc." + test_sequence $env $db $key \ + $min $max $init $inc $csize $wrap + } + + # Test cachesize with wrap. + set min 0 + set max 99 + set init 0 + set wrap "-wrap" + foreach csize $cachesizes { + puts "\tTest$tnum.f: Test -cachesize $csize with wrap." + test_sequence $env $db $key \ + $min $max $init $inc $csize $wrap + } + + # Test multiple handles on the same sequence. + foreach csize $cachesizes { + puts "\tTest$tnum.g:\ + Test multiple handles (-cachesize $csize) with wrap." + test_sequence $env $db $key \ + $min $max $init $inc $csize $wrap 1 + } + error_check_good db_close [$db close] 0 + } + set fixed_len $orig_fixed_len + return +} + +proc test_sequence { env db key min max init \ + {inc "-inc"} {csize 1} {wrap "" } {second_handle 0} } { + global rand_init + global errorCode + + set txn "" + set txnenv 0 + if { $env != "NULL" } { + set txnenv [is_txnenv $env] + } + + set sargs " -thread " + + # The variable "skip" is the cachesize with a direction. + set skip $csize + if { $inc == "-dec" } { + set skip [expr $csize * -1] + } + + # The "limit" is the closest number to the end of the + # sequence we can ever see. + set limit [expr [expr $max + 1] - $csize] + if { $inc == "-dec" } { + set limit [expr [expr $min - 1] + $csize] + } + + # The number of items in the sequence. + set n [expr [expr $max - $min] + 1] + + # Calculate the number of values returned in the first + # cycle, and in all other cycles. + if { $inc == "-inc" } { + set firstcyclehits \ + [expr [expr [expr $max - $init] + 1] / $csize] + } elseif { $inc == "-dec" } { + set firstcyclehits \ + [expr [expr [expr $init - $min] + 1] / $csize] + } else { + puts "FAIL: unknown inc flag $inc" + } + set hitspercycle [expr $n / $csize] + + # Create the sequence. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set seq [eval {berkdb sequence} -create $sargs -cachesize $csize \ + $wrap -init $init -min $min -max $max $txn $inc $db $key] + error_check_good is_valid_seq [is_valid_seq $seq] TRUE + if { $second_handle == 1 } { + set seq2 [eval {berkdb sequence} -create $sargs $txn $db $key] + error_check_good is_valid_seq2 [is_valid_seq $seq2] TRUE + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + + # Exercise get options. + set getdb [$seq get_db] + error_check_good seq_get_db $getdb $db + + set flags [$seq get_flags] + set exp_flags [list $inc $wrap] + foreach item $exp_flags { + if { [llength $item] == 0 } { + set idx [lsearch -exact $exp_flags $item] + set exp_flags [lreplace $exp_flags $idx $idx] + } + } + error_check_good get_flags $flags $exp_flags + + set range [$seq get_range] + error_check_good get_range_min [lindex $range 0] $min + error_check_good get_range_max [lindex $range 1] $max + + set cache [$seq get_cachesize] + error_check_good get_cachesize $cache $csize + + # Within the loop, for each successive seq get we calculate + # the value we expect to receive, then do the seq get and + # compare. + # + # Always test some multiple of the number of items in the + # sequence; this tests overflow and wrap-around. + # + set mult 2 + for { set i 0 } { $i < [expr $n * $mult] } { incr i } { + # + # Calculate expected return value. + # + # On the first cycle, start from init. + set expected [expr $init + [expr $i * $skip]] + if { $i >= $firstcyclehits && $wrap != "-wrap" } { + set expected "overflow" + } + + # On second and later cycles, start from min or max. + # We do a second cycle only if wrapping is specified. + if { $wrap == "-wrap" } { + if { $inc == "-inc" && $expected > $limit } { + set j [expr $i - $firstcyclehits] + while { $j >= $hitspercycle } { + set j [expr $j - $hitspercycle] + } + set expected [expr $min + [expr $j * $skip]] + } + + if { $inc == "-dec" && $expected < $limit } { + set j [expr $i - $firstcyclehits] + while { $j >= $hitspercycle } { + set j [expr $j - $hitspercycle] + } + set expected [expr $max + [expr $j * $skip]] + } + } + + # Get return value. If we've got a second handle, choose + # randomly which handle does the seq get. + if { $env != "NULL" && [is_txnenv $env] } { + set syncarg " -nosync " + } else { + set syncarg "" + } + set errorCode NONE + if { $second_handle == 0 } { + catch {eval {$seq get} $syncarg $csize} res + } elseif { [berkdb random_int 0 1] == 0 } { + catch {eval {$seq get} $syncarg $csize} res + } else { + catch {eval {$seq2 get} $syncarg $csize} res + } + + # Compare expected to actual value. + if { $expected == "overflow" } { + error_check_good overflow [is_substr $errorCode EINVAL] 1 + } else { + error_check_good seq_get_wrap $res $expected + } + } + + # A single handle requires a 'seq remove', but a second handle + # should be closed, and then we can remove the sequence. + if { $second_handle == 1 } { + error_check_good seq2_close [$seq2 close] 0 + } + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + error_check_good seq_remove [eval {$seq remove} $txn] 0 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } +} diff --git a/test/tcl/test110.tcl b/test/tcl/test110.tcl new file mode 100644 index 00000000..eca73432 --- /dev/null +++ b/test/tcl/test110.tcl @@ -0,0 +1,168 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test110 +# TEST Partial get test with duplicates. +# TEST +# TEST For hash and btree, create and populate a database +# TEST with dups. Randomly selecting offset and length, +# TEST retrieve data from each record and make sure we +# TEST get what we expect. +proc test110 { method {nentries 10000} {ndups 3} args } { + global rand_init + source ./include.tcl + + set args [convert_args $method $args] + set omethod [convert_method $method] + + if { [is_record_based $method] == 1 || \ + [is_rbtree $method] == 1 } { + puts "Test110 skipping for method $method" + return + } + + # Btree with compression does not support unsorted duplicates. + if { [is_compressed $args] == 1 } { + puts "Test110 skipping for btree with compression." + return + } + # Create the database and open the dictionary + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + # + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test110.db + set env NULL + } else { + set testfile test110.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + # + # If we are using txns and running with the + # default, set the default down a bit. + # + if { $nentries == 10000 } { + set nentries 100 + } + } + set testdir [get_home $env] + } + puts "Test110: $method ($args) $nentries partial get test with duplicates" + + cleanup $testdir $env + + set db [eval {berkdb_open \ + -create -mode 0644} -dup $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + berkdb srand $rand_init + + set txn "" + set count 0 + + puts "\tTest110.a: put/get loop" + for { set i 0 } { [gets $did str] != -1 && $i < $nentries } \ + { incr i } { + + set key $str + set repl [berkdb random_int 1 100] + set kvals($key) $repl + set data [chop_data $method [replicate $str $repl]] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set j 0 } { $j < $ndups } { incr j } { + set ret [eval {$db put} $txn {$key $j.$data}] + error_check_good dbput:$key:$j $ret 0 + } + + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + set ret [$dbc get -set $key] + + set j 0 + for { set dbt [$dbc get -current] } \ + { $j < $ndups } \ + { set dbt [$dbc get -next] } { + set d [lindex [lindex $dbt 0] 1] + error_check_good dupget:$key:$j $d [pad_data $method $j.$data] + incr j + } + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + close $did + + puts "\tTest110.b: partial get loop" + set did [open $dict] + for { set i 0 } { [gets $did str] != -1 && $i < $nentries } \ + { incr i } { + set key $str + + set data [pad_data $method [replicate $str $kvals($key)]] + set j 0 + + # Set up cursor. We will use the cursor to walk the dups. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $dbc $db] TRUE + + # Position cursor at the first of the dups. + set ret [$dbc get -set $key] + + for { set dbt [$dbc get -current] } \ + { $j < $ndups } \ + { set dbt [$dbc get -next] } { + + set dupdata $j.$data + set length [expr [string length $dupdata]] + set maxndx [expr $length + 1] + + if { $maxndx > 0 } { + set beg [berkdb random_int 0 [expr $maxndx - 1]] + set len [berkdb random_int 0 [expr $maxndx * 2]] + } else { + set beg 0 + set len 0 + } + + set ret [eval {$dbc get} -current \ + {-partial [list $beg $len]}] + + # In order for tcl to handle this, we have to overwrite the + # last character with a NULL. That makes the length one less + # than we expect. + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_good dbget_key $k $key + error_check_good dbget_data $d \ + [string range $dupdata $beg [expr $beg + $len - 1]] + incr j + } + + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + } + error_check_good db_close [$db close] 0 + close $did +} diff --git a/test/tcl/test111.tcl b/test/tcl/test111.tcl new file mode 100644 index 00000000..6ad16ec6 --- /dev/null +++ b/test/tcl/test111.tcl @@ -0,0 +1,384 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test111 +# TEST Test database compaction. +# TEST +# TEST Populate a database. Remove a high proportion of entries. +# TEST Dump and save contents. Compact the database, dump again, +# TEST and make sure we still have the same contents. +# TEST Add back some entries, delete more entries (this time by +# TEST cursor), dump, compact, and do the before/after check again. + +proc test111 { method {nentries 10000} {tnum "111"} args } { + + # Compaction is an option for btree, recno, and hash databases. + if { [is_queue $method] == 1 || [is_heap $method] == 1} { + puts "Skipping test$tnum for method $method." + return + } + + # If a page size was specified, find out what it is. Pages + # might not be freed in the case of really large pages (64K) + # but we still want to run this test just to make sure + # nothing funny happens. + set pagesize 0 + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + incr pgindex + set pagesize [lindex $args $pgindex] + } + + source ./include.tcl + global rand_init + error_check_good set_random_seed [berkdb srand $rand_init] 0 + set args [convert_args $method $args] + set omethod [convert_method $method] + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + set basename $testdir/test$tnum + set env NULL + } else { + set basename test$tnum + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + puts "Test$tnum: ($method $args) Database compaction." + + set t1 $testdir/t1 + set t2 $testdir/t2 + set splitopts { "" "-revsplitoff" } + set txn "" + + if { [is_record_based $method] == 1 } { + set checkfunc test001_recno.check + } else { + set checkfunc test001.check + } + + foreach splitopt $splitopts { + set testfile $basename.db + if { $splitopt == "-revsplitoff" } { + set testfile $basename.rev.db + if { [is_record_based $method] == 1 } { + puts "Skipping\ + -revsplitoff option for method $method." + continue + } + } + set did [open $dict] + if { $env != "NULL" } { + set testdir [get_home $env] + } + cleanup $testdir $env + + puts "\tTest$tnum.a: Create and populate database ($splitopt)." + set db [eval {berkdb_open -create \ + -mode 0644} $splitopt $args $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set count 0 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + while { [gets $did str] != -1 && $count < $nentries } { + global kvals + + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + set kvals($key) [pad_data $method $str] + } else { + set key $str + set str [reverse $str] + } + + set ret [eval \ + {$db put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + incr count + + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + close $did + error_check_good db_sync [$db sync] 0 + + if { $env != "NULL" } { + set testdir [get_home $env] + set filename $testdir/$testfile + } else { + set filename $testfile + } + + # Record the file size and page count. Both will + # be reduced by compaction. + set size1 [file size $filename] + set count1 [stat_field $db stat "Page count"] + + # Delete between 1 and maxdelete items, then skip over between + # 1 and maxskip items. This is to make the data bunchy, + # so we sometimes follow the code path where merging is + # done record by record, and sometimes the path where + # the whole page is merged at once. + + puts "\tTest$tnum.b: Delete most entries from database." + set did [open $dict] + set count [expr $nentries - 1] + set maxskip 4 + set maxdelete 48 + + # Since rrecno and rbtree renumber, we delete starting at + # nentries and working down to 0. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + while { [gets $did str] != -1 && $count > 0 } { + + # Delete a random number of successive items. + set ndeletes [berkdb random_int 1 $maxdelete] + set target [expr $count - $ndeletes] + while { [expr $count > $target] && $count > 0 } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key [gets $did] + } + + set ret [eval {$db del} $txn {$key}] + error_check_good del $ret 0 + incr count -1 + } + # Skip over a random smaller number of items. + set skip [berkdb random_int 1 [expr $maxskip]] + set target [expr $count - $skip] + while { [expr $count > $target] && $count > 0 } { + incr count -1 + } + } + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + puts "\tTest$tnum.c: Do a dump_file on contents." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + + puts "\tTest$tnum.d: Compact and verify database." + for {set commit 0} {$commit <= $txnenv} {incr commit} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if {[catch {eval {$db compact} $txn {-freespace}} ret] } { + error "FAIL: db compact: $ret" + } + if { $txnenv == 1 } { + if { $commit == 0 } { + puts "\tTest$tnum.d: Aborting." + error_check_good txn_abort [$t abort] 0 + } else { + puts "\tTest$tnum.d: Committing." + error_check_good txn_commit [$t commit] 0 + } + } + error_check_good db_sync [$db sync] 0 + error_check_good verify_dir \ + [verify_dir $testdir "" 0 0 $nodump ] 0 + } + + set size2 [file size $filename] + set count2 [stat_field $db stat "Page count"] + + # Now check for reduction in page count and file size. + error_check_good pages_freed [expr $count1 > $count2] 1 + + #### We should look at the partitioned files ##### + if { [is_partitioned $args] == 0 } { + set reduction .96 + error_check_good \ + file_size [expr [expr $size1 * $reduction] > $size2] 1 + } + + puts "\tTest$tnum.e: Contents are the same after compaction." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t2 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + + if { [is_hash $method] == 1 } { + filesort $t1 $t1.sort + filesort $t2 $t2.sort + error_check_good filecmp [filecmp $t1.sort $t2.sort] 0 + } else { + error_check_good filecmp [filecmp $t1 $t2] 0 + } + + puts "\tTest$tnum.f: Add more entries to database." + # Use integers as keys instead of strings, just to mix it up + # a little. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set i 1 } { $i < $nentries } { incr i } { + set key $i + set str $i + set ret [eval \ + {$db put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + } + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + set size3 [file size $filename] + set count3 [stat_field $db stat "Page count"] + + puts "\tTest$tnum.g: Remove more entries, this time by cursor." + set count 0 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + + # Delete all items except those evenly divisible by + # $maxdelete -- so the db is nearly empty. + for { set dbt [$dbc get -first] } { [llength $dbt] > 0 }\ + { set dbt [$dbc get -next] ; incr count } { + if { [expr $count % $maxdelete] != 0 } { + error_check_good dbc_del [$dbc del] 0 + } + } + + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + puts "\tTest$tnum.h: Save contents." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + + puts "\tTest$tnum.i: Compact and verify database again." + for {set commit 0} {$commit <= $txnenv} {incr commit} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if {[catch {eval {$db compact} $txn {-freespace}} ret] } { + error "FAIL: db compact: $ret" + } + if { $txnenv == 1 } { + if { $commit == 0 } { + puts "\tTest$tnum.i1: Aborting." + error_check_good txn_abort [$t abort] 0 + } else { + puts "\tTest$tnum.i2: Committing." + error_check_good txn_commit [$t commit] 0 + } + } + error_check_good db_sync [$db sync] 0 + error_check_good verify_dir \ + [verify_dir $testdir "" 0 0 $nodump ] 0 + } + + set size4 [file size $filename] + set count4 [stat_field $db stat "Page count"] + + # Check for page count and file size reduction. + # + # Identify cases where we don't expect much reduction in + # size, for example hash with large pagesizes. + # + #### We should look at the partitioned files ##### + set test_filesize 1 + if { [is_partitioned $args] } { + set test_filesize 0 + } + if { [is_hash $method] && $pagesize == 65536 } { + set test_filesize 0 + } + + # Test for reduced file size where expected. In cases where + # we don't expect much (if any) reduction, verify that at + # least the file size hasn't increased. + if { $test_filesize == 1 } { + error_check_good file_size_reduced \ + [expr [expr $size3 * $reduction] > $size4] 1 + error_check_good pages_freed [expr $count3 > $count4] 1 + } else { + error_check_good file_size [expr $size3 >= $size4] 1 + error_check_good pages_freed [expr $count3 >= $count4] 1 + } + + puts "\tTest$tnum.j: Contents are the same after compaction." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t2 + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + if { [is_hash $method] == 1 } { + filesort $t1 $t1.sort + filesort $t2 $t2.sort + error_check_good filecmp [filecmp $t1.sort $t2.sort] 0 + } else { + error_check_good filecmp [filecmp $t1 $t2] 0 + } + + error_check_good db_close [$db close] 0 + close $did + } +} diff --git a/test/tcl/test112.tcl b/test/tcl/test112.tcl new file mode 100644 index 00000000..20a8aae6 --- /dev/null +++ b/test/tcl/test112.tcl @@ -0,0 +1,291 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test112 +# TEST Test database compaction with a deep tree. +# TEST +# TEST This is a lot like test111, but with a large number of +# TEST entries and a small page size to make the tree deep. +# TEST To make it simple we use numerical keys all the time. +# TEST +# TEST Dump and save contents. Compact the database, dump again, +# TEST and make sure we still have the same contents. +# TEST Add back some entries, delete more entries (this time by +# TEST cursor), dump, compact, and do the before/after check again. + +proc test112 { method {nentries 80000} {tnum "112"} args } { + source ./include.tcl + global alphabet + + # Compaction is an option for btree, recno, and hash databases. + if { [is_queue $method] == 1 || [is_heap $method] == 1} { + puts "Skipping test$tnum for method $method." + return + } + + # Skip for specified pagesizes. This test uses a small + # pagesize to generate a deep tree. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test$tnum: Skipping for specific pagesizes" + return + } + + set args [convert_args $method $args] + set omethod [convert_method $method] + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set txnenv 0 + set txn "" + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + puts "Test$tnum: $method ($args) Database compaction with deep tree." + + set t1 $testdir/t1 + set t2 $testdir/t2 + cleanup $testdir $env + + set db [eval {berkdb_open -create\ + -pagesize 512 -mode 0644} $args $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + if { [is_record_based $method] == 1 } { + set checkfunc test001_recno.check + } else { + set checkfunc test001.check + } + + puts "\tTest$tnum.a: Populate database." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set i 1 } { $i <= $nentries } { incr i } { + set key $i + set str $i.$alphabet + + set ret [eval \ + {$db put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + if { $env != "NULL" } { + set testdir [get_home $env] + set filename $testdir/$testfile + } else { + set filename $testfile + } + + # Record file size, page count, and levels. They will + # be reduced by compaction. + set size1 [file size $filename] + set count1 [stat_field $db stat "Page count"] + set levels [stat_field $db stat "Levels"] +# error_check_good enough_levels [expr $levels >= 4] 1 + + puts "\tTest$tnum.b: Delete most entries from database." + # Leave every nth item. Since rrecno renumbers, we + # delete starting at nentries and working down to 0. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set i $nentries } { $i > 0 } { incr i -1 } { + set key $i + + # Leave every n'th item. + set n 121 + if { [expr $i % $n] != 0 } { + set ret [eval {$db del} $txn {$key}] + error_check_good del $ret 0 + } + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + puts "\tTest$tnum.c: Do a dump_file on contents." + dump_file $db "" $t1 + + puts "\tTest$tnum.d: Compact database." + for {set commit 0} {$commit <= $txnenv} {incr commit} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if {[catch {eval {$db compact} $txn {-freespace}} ret] } { + error "FAIL: db compact: $ret" + } + if { $txnenv == 1 } { + if { $commit == 0 } { + puts "\tTest$tnum.d: Aborting." + error_check_good txn_abort [$t abort] 0 + } else { + puts "\tTest$tnum.d: Committing." + error_check_good txn_commit [$t commit] 0 + } + } + error_check_good db_sync [$db sync] 0 + error_check_good verify_dir \ + [ verify_dir $testdir "" 0 0 $nodump] 0 + } + + set size2 [file size $filename] + set count2 [stat_field $db stat "Page count"] + + # The on-disk file size should be significantly smaller. +#### We should look at the partitioned files ##### +if { [is_partitioned $args] == 0 } { + set reduction .80 + error_check_good file_size [expr [expr $size1 * $reduction] > $size2] 1 +} + + # Also, we should have reduced the number of pages and levels. + error_check_good page_count_reduced [expr $count1 > $count2] 1 + if { [is_hash $method] == 0 } { + set newlevels [stat_field $db stat "Levels"] + error_check_good fewer_levels [expr $newlevels < $levels ] 1 + } + + puts "\tTest$tnum.e: Check that contents are the same after compaction." + dump_file $db "" $t2 + if { [is_hash $method] != 0 } { + filesort $t1 $t1.sort + filesort $t2 $t2.sort + error_check_good filecmp [filecmp $t1.sort $t2.sort] 0 + } else { + error_check_good filecmp [filecmp $t1 $t2] 0 + } + + puts "\tTest$tnum.f: Add more entries to database." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set i 1 } { $i < $nentries } { incr i } { + set key $i + set str $i.$alphabet + set ret [eval \ + {$db put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + set size3 [file size $filename] + set count3 [stat_field $db stat "Page count"] + + puts "\tTest$tnum.g: Remove more entries, this time by cursor." + set i 0 + set n 11 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + + for { set dbt [$dbc get -first] } { [llength $dbt] > 0 }\ + { set dbt [$dbc get -next] ; incr i } { + if { [expr $i % $n] != 0 } { + error_check_good dbc_del [$dbc del] 0 + } + } + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + puts "\tTest$tnum.h: Save contents." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + + puts "\tTest$tnum.i: Compact database again." + for {set commit 0} {$commit <= $txnenv} {incr commit} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if {[catch {eval {$db compact} $txn {-freespace}} ret] } { + error "FAIL: db compact: $ret" + } + if { $txnenv == 1 } { + if { $commit == 0 } { + puts "\tTest$tnum.d: Aborting." + error_check_good txn_abort [$t abort] 0 + } else { + puts "\tTest$tnum.d: Committing." + error_check_good txn_commit [$t commit] 0 + } + } + error_check_good db_sync [$db sync] 0 + error_check_good verify_dir \ + [ verify_dir $testdir "" 0 0 $nodump] 0 + } + + set size4 [file size $filename] + set count4 [stat_field $db stat "Page count"] + +#### We should look at the partitioned files ##### +if { [is_partitioned $args] == 0 } { +set reduction .9 +# puts "$size3 $size4" + error_check_good file_size [expr [expr $size3 * $reduction] > $size4] 1 +} + + error_check_good page_count_reduced [expr $count3 > $count4] 1 + + puts "\tTest$tnum.j: Check that contents are the same after compaction." + dump_file $db "" $t2 + if { [is_hash $method] != 0 } { + filesort $t1 $t1.sort + filesort $t2 $t2.sort + error_check_good filecmp [filecmp $t1.sort $t2.sort] 0 + } else { + error_check_good filecmp [filecmp $t1 $t2] 0 + } + + error_check_good db_close [$db close] 0 +} + diff --git a/test/tcl/test113.tcl b/test/tcl/test113.tcl new file mode 100644 index 00000000..4e7dfdb1 --- /dev/null +++ b/test/tcl/test113.tcl @@ -0,0 +1,289 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test113 +# TEST Test database compaction with duplicates. +# TEST +# TEST This is essentially test111 with duplicates. +# TEST To make it simple we use numerical keys all the time. +# TEST +# TEST Dump and save contents. Compact the database, dump again, +# TEST and make sure we still have the same contents. +# TEST Add back some entries, delete more entries (this time by +# TEST cursor), dump, compact, and do the before/after check again. + +proc test113 { method {nentries 10000} {ndups 5} {tnum "113"} args } { + source ./include.tcl + global alphabet + + # Compaction and duplicates can occur only with btree or hash. + if { [is_btree $method] != 1 && [is_hash $method] != 1} { + puts "Skipping test$tnum for method $method." + return + } + + # Skip for specified pagesizes. This test uses a small + # pagesize to generate a deep tree. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test$tnum: Skipping for specific pagesizes" + return + } + + set args [convert_args $method $args] + set omethod [convert_method $method] + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set eindex [lsearch -exact $args "-env"] + set txnenv 0 + set txn "" + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + puts "Test$tnum: $method ($args)\ + Database compaction with duplicates." + set subdb "" + set subindx [lsearch -exact $args "-subdb"] + if { $subindx != -1 } { + set subdb "subdb" + set args [lreplace $args $subindx $subindx] + } + + set t1 $testdir/t1 + set t2 $testdir/t2 + cleanup $testdir $env + + set db [eval {berkdb_open -create -pagesize 512 \ + -dup -dupsort -mode 0644} $args $omethod $testfile $subdb] + error_check_good dbopen [is_valid_db $db] TRUE + + puts "\tTest$tnum.a: Populate database with dups." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set i 1 } { $i <= $nentries } { incr i } { + set key $i + for { set j 1 } { $j <= $ndups } { incr j } { + set str $i.$j.$alphabet + set ret [eval \ + {$db put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + } + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + if { $env != "NULL" } { + set testdir [get_home $env] + set filename $testdir/$testfile + } else { + set filename $testfile + } + set size1 [file size $filename] + set count1 [stat_field $db stat "Page count"] + + puts "\tTest$tnum.b: Delete most entries from database." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set i $nentries } { $i >= 0 } { incr i -1 } { + set key $i + + # Leave every n'th item. + set n 7 + if { [expr $i % $n] != 0 } { + set ret [eval {$db del} $txn {$key}] + error_check_good del $ret 0 + } + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + puts "\tTest$tnum.c: Do a dump_file on contents." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + + puts "\tTest$tnum.d: Compact database." + for {set commit 0} {$commit <= $txnenv} {incr commit} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if {[catch {eval {$db compact} $txn {-freespace}} ret] } { + error "FAIL: db compact: $ret" + } + if { $txnenv == 1 } { + if { $commit == 0 } { + puts "\tTest$tnum.d: Aborting." + error_check_good txn_abort [$t abort] 0 + } else { + puts "\tTest$tnum.d: Committing." + error_check_good txn_commit [$t commit] 0 + } + } + error_check_good db_sync [$db sync] 0 + error_check_good verify_dir \ + [ verify_dir $testdir "" 0 0 $nodump] 0 + } + + set size2 [file size $filename] + set count2 [stat_field $db stat "Page count"] + + error_check_good page_count_reduced [expr $count1 > $count2] 1 +#### We should look at the partitioned files ##### +if { [is_partitioned $args] == 0 } { + set reduction .80 +# puts "$size1 $size2" + error_check_good file_size [expr [expr $size1 * $reduction] > $size2] 1 +} + + puts "\tTest$tnum.e: Check that contents are the same after compaction." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t2 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + if { [is_hash $method] != 0 } { + filesort $t1 $t1.sort + filesort $t2 $t2.sort + error_check_good filecmp [filecmp $t1.sort $t2.sort] 0 + } else { + error_check_good filecmp [filecmp $t1 $t2] 0 + } + + puts "\tTest$tnum.f: Add more entries to database." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set i 1 } { $i <= $nentries } { incr i } { + set key $i + for { set j 1 } { $j <= $ndups } { incr j } { + set str $i.$j.$alphabet.extra + set ret [eval \ + {$db put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + } + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + set size3 [file size $filename] + set count3 [stat_field $db stat "Page count"] + + puts "\tTest$tnum.g: Remove more entries, this time by cursor." + set i 0 + set n 11 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + + for { set dbt [$dbc get -first] } { [llength $dbt] > 0 }\ + { set dbt [$dbc get -next] ; incr i } { + + if { [expr $i % $n] != 0 } { + error_check_good dbc_del [$dbc del] 0 + } + } + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + puts "\tTest$tnum.h: Save contents." + dump_file $db "" $t1 + + puts "\tTest$tnum.i: Compact database again." + for {set commit 0} {$commit <= $txnenv} {incr commit} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + + if {[catch {eval {$db compact} $txn {-freespace}} ret] } { + error "FAIL: db compact: $ret" + } + if { $txnenv == 1 } { + if { $commit == 0 } { + puts "\tTest$tnum.d: Aborting." + error_check_good txn_abort [$t abort] 0 + } else { + puts "\tTest$tnum.d: Committing." + error_check_good txn_commit [$t commit] 0 + } + } + error_check_good db_sync [$db sync] 0 + error_check_good verify_dir \ + [ verify_dir $testdir "" 0 0 $nodump] 0 + } + + set size4 [file size $filename] + set count4 [stat_field $db stat "Page count"] + error_check_good page_count_reduced [expr $count3 > $count4] 1 + +#### We should look at the partitioned files ##### +if { [is_partitioned $args] == 0 } { +# puts "$size3 $size4" + error_check_good file_size [expr [expr $size3 * $reduction] > $size4] 1 +} + + puts "\tTest$tnum.j: Check that contents are the same after compaction." + dump_file $db "" $t2 + if { [is_hash $method] != 0 } { + filesort $t1 $t1.sort + filesort $t2 $t2.sort + error_check_good filecmp [filecmp $t1.sort $t2.sort] 0 + } else { + error_check_good filecmp [filecmp $t1 $t2] 0 + } + + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test114.tcl b/test/tcl/test114.tcl new file mode 100644 index 00000000..540b1b00 --- /dev/null +++ b/test/tcl/test114.tcl @@ -0,0 +1,378 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test114 +# TEST Test database compaction with overflows. +# TEST +# TEST Populate a database. Remove a high proportion of entries. +# TEST Dump and save contents. Compact the database, dump again, +# TEST and make sure we still have the same contents. +# TEST Add back some entries, delete more entries (this time by +# TEST cursor), dump, compact, and do the before/after check again. + +proc test114 { method {nentries 10000} {tnum "114"} args } { + source ./include.tcl + global alphabet + + # Compaction is an option for btree, recno, and hash databases. + if { [is_queue $method] == 1 || [is_heap $method] == 1 } { + puts "Skipping compaction test$tnum for method $method." + return + } + + # Skip for fixed-length methods because we won't encounter + # overflows. + if { [is_fixed_length $method] == 1 } { + puts "Skipping test$tnum for fixed-length method $method." + return + } + + # We run with a small page size to force overflows. Skip + # testing for specified page size. + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + puts "Test$tnum: Skipping for specific pagesize." + return + } + + set args [convert_args $method $args] + set omethod [convert_method $method] + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + set basename $testdir/test$tnum + set env NULL + append args " -cachesize { 0 500000 0 }" + } else { + set basename test$tnum + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit" + } + set testdir [get_home $env] + } + + puts "Test$tnum: ($method $args) Database compaction with overflows." + set t1 $testdir/t1 + set t2 $testdir/t2 + set splitopts { "" "-revsplitoff" } + set txn "" + + if { [is_record_based $method] == 1 } { + set checkfunc test001_recno.check + } else { + set checkfunc test001.check + } + + cleanup $testdir $env + foreach splitopt $splitopts { + set testfile $basename.db + if { $splitopt == "-revsplitoff" } { + set testfile $basename.rev.db + if { [is_record_based $method] == 1 } { + puts "Skipping\ + -revsplitoff option for method $method." + continue + } + } + set did [open $dict] + if { $env != "NULL" } { + set testdir [get_home $env] + } + + puts "\tTest$tnum.a: Create and populate database ($splitopt)." + set pagesize 512 + set db [eval {berkdb_open -create -pagesize $pagesize \ + -mode 0644} $splitopt $args $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set count 0 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + set str [repeat $alphabet 100] + + set ret [eval \ + {$db put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + incr count + + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + close $did + error_check_good db_sync [$db sync] 0 + + if { $env != "NULL" } { + set testdir [get_home $env] + set filename $testdir/$testfile + } else { + set filename $testfile + } + set size1 [file size $filename] + set count1 [stat_field $db stat "Page count"] + + puts "\tTest$tnum.b: Delete most entries from database." + set did [open $dict] + set count [expr $nentries - 1] + set n 57 + + # Leave every nth item. Since rrecno renumbers, we + # delete starting at nentries and working down to 0. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + while { [gets $did str] != -1 && $count > 0 } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + + if { [expr $count % $n] != 0 } { + set ret [eval {$db del} $txn {$key}] + error_check_good del $ret 0 + } + incr count -1 + } + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + # Now that the delete is done we ought to have a + # lot of pages on the free list. + if { [is_hash $method] == 1 } { + set free1 [stat_field $db stat "Free pages"] + } else { + set free1 [stat_field $db stat "Pages on freelist"] + } + + puts "\tTest$tnum.c: Do a dump_file on contents." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + + puts "\tTest$tnum.d: Compact and verify database." + for {set commit 0} {$commit <= $txnenv} {incr commit} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if {[catch {eval {$db compact} $txn {-freespace}} ret] } { + error "FAIL: db compact: $ret" + } + if { $txnenv == 1 } { + if { $commit == 0 } { + puts "\tTest$tnum.d: Aborting." + error_check_good txn_abort [$t abort] 0 + } else { + puts "\tTest$tnum.d: Committing." + error_check_good txn_commit [$t commit] 0 + } + } + error_check_good db_sync [$db sync] 0 + error_check_good verify_dir \ + [ verify_dir $testdir "" 0 0 $nodump] 0 + } + + set size2 [file size $filename] + set count2 [stat_field $db stat "Page count"] + if { [is_hash $method] == 1 } { + set free2 [stat_field $db stat "Free pages"] + } else { + set free2 [stat_field $db stat "Pages on freelist"] + } + + # Reduction in on-disk size should be substantial. +#### We should look at the partitioned files ##### +if { [is_partitioned $args] == 0 } { + set reduction .80 + error_check_good \ + file_size [expr [expr $size1 * $reduction] > $size2] 1 +} + + # The number of free pages should be reduced + # now that we've compacted with -freespace. + error_check_good pages_returned [expr $free1 > $free2] 1 + + # Page count should be reduced for all methods except maybe + # record-based non-queue methods. Even with recno, the + # page count may not increase. + error_check_good page_count_reduced [expr $count1 > $count2] 1 + + puts "\tTest$tnum.e: Contents are the same after compaction." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t2 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + + if { [is_hash $method] != 0 } { + filesort $t1 $t1.sort + filesort $t2 $t2.sort + error_check_good filecmp [filecmp $t1.sort $t2.sort] 0 + } else { + error_check_good filecmp [filecmp $t1 $t2] 0 + } + + puts "\tTest$tnum.f: Add more entries to database." + # Use integers as keys instead of strings, just to mix it up + # a little. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set i 1 } { $i < $nentries } { incr i } { + set key $i + set str [repeat $alphabet 100] + set ret [eval \ + {$db put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + } + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + set size3 [file size $filename] + set count3 [stat_field $db stat "Page count"] + + puts "\tTest$tnum.g: Remove more entries, this time by cursor." + set count 0 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + + # Leave every nth item. + for { set dbt [$dbc get -first] } { [llength $dbt] > 0 }\ + { set dbt [$dbc get -next] ; incr count } { + if { [expr $count % $n] != 0 } { + error_check_good dbc_del [$dbc del] 0 + } + } + + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + if { [is_hash $method] == 1 } { + set free3 [stat_field $db stat "Free pages"] + } else { + set free3 [stat_field $db stat "Pages on freelist"] + } + + puts "\tTest$tnum.h: Save contents." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + + puts "\tTest$tnum.i: Compact and verify database again." + for {set commit 0} {$commit <= $txnenv} {incr commit} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if {[catch {eval {$db compact} $txn {-freespace}} ret] } { + error "FAIL: db compact: $ret" + } + if { $txnenv == 1 } { + if { $commit == 0 } { + puts "\tTest$tnum.i: Aborting." + error_check_good txn_abort [$t abort] 0 + } else { + puts "\tTest$tnum.i: Committing." + error_check_good txn_commit [$t commit] 0 + } + } + error_check_good db_sync [$db sync] 0 + error_check_good verify_dir \ + [ verify_dir $testdir "" 0 0 $nodump] 0 + } + + set size4 [file size $filename] + set count4 [stat_field $db stat "Page count"] + if { [is_hash $method] == 1 } { + set free4 [stat_field $db stat "Free pages"] + } else { + set free4 [stat_field $db stat "Pages on freelist"] + } + +#### We should look at the partitioned files ##### +if { [is_partitioned $args] == 0 } { + error_check_good \ + file_size [expr [expr $size3 * $reduction] > $size4] 1 +} + + error_check_good pages_returned [expr $free3 > $free4] 1 + error_check_good \ + page_count_reduced [expr $count3 > $count4] 1 + puts "\tTest$tnum.j: Contents are the same after compaction." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t2 + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + if { [is_hash $method] != 0 } { + filesort $t1 $t1.sort + filesort $t2 $t2.sort + error_check_good filecmp [filecmp $t1.sort $t2.sort] 0 + } else { + error_check_good filecmp [filecmp $t1 $t2] 0 + } + + error_check_good db_close [$db close] 0 + close $did + } +} diff --git a/test/tcl/test115.tcl b/test/tcl/test115.tcl new file mode 100644 index 00000000..85a3cf09 --- /dev/null +++ b/test/tcl/test115.tcl @@ -0,0 +1,349 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test115 +# TEST Test database compaction with user-specified btree sort. +# TEST +# TEST This is essentially test111 with the user-specified sort. +# TEST Populate a database. Remove a high proportion of entries. +# TEST Dump and save contents. Compact the database, dump again, +# TEST and make sure we still have the same contents. +# TEST Add back some entries, delete more entries (this time by +# TEST cursor), dump, compact, and do the before/after check again. + +proc test115 { method {nentries 10000} {tnum "115"} args } { + source ./include.tcl + global btvals + global btvalsck + global encrypt + global passwd + + if { [is_btree $method] != 1 } { + puts "Skipping test$tnum for method $method." + return + } + + # If a page size was specified, find out what it is. Pages + # might not be freed in the case of really large pages (64K) + # but we still want to run this test just to make sure + # nothing funny happens. + set pagesize 0 + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + incr pgindex + set pagesize [lindex $args $pgindex] + } + + set args [convert_args $method $args] + set encargs "" + set args [split_encargs $args encargs] + set omethod [convert_method $method] + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + set basename $testdir/test$tnum + set env NULL + set envargs "" + } else { + set basename test$tnum + incr eindex + set env [lindex $args $eindex] + set envargs " -env $env " + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + puts "Test$tnum:\ + ($method $args $encargs) Database compaction with user-specified sort." + + cleanup $testdir $env + set t1 $testdir/t1 + set t2 $testdir/t2 + set splitopts { "" "-revsplitoff" } + set txn "" + + set checkfunc test093_check + + foreach splitopt $splitopts { + set testfile $basename.db + if { $splitopt == "-revsplitoff" } { + set testfile $basename.rev.db + } + set did [open $dict] + + puts "\tTest$tnum.a: Create and populate database ($splitopt)." + set db [eval {berkdb_open -create -btcompare test093_cmp1 \ + -mode 0644} $splitopt $args $encargs $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set count 0 + set btvals {} + set btvalsck {} + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + while { [gets $did str] != -1 && $count < $nentries } { + set key $str + set str [reverse $str] + + set ret [eval \ + {$db put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + lappend btvals $key + incr count + + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + close $did + error_check_good db_sync [$db sync] 0 + + if { $env != "NULL" } { + set testdir [get_home $env] + set filename $testdir/$testfile + } else { + set filename $testfile + } + set size1 [file size $filename] + set count1 [stat_field $db stat "Page count"] + + puts "\tTest$tnum.b: Delete most entries from database." + set did [open $dict] + set count [expr $nentries - 1] + set n 14 + + # Leave every nth item. Since rrecno renumbers, we + # delete starting at nentries and working down to 0. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + while { [gets $did str] != -1 && $count > 0 } { + set key $str + + if { [expr $count % $n] != 0 } { + set ret [eval {$db del} $txn {$key}] + error_check_good del $ret 0 + } + incr count -1 + } + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + close $did + error_check_good db_sync [$db sync] 0 + + puts "\tTest$tnum.c: Do a dump_file on contents." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + + dump_file $db $txn $t1 $checkfunc + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + + puts "\tTest$tnum.d: Compact and verify database." + for {set commit 0} {$commit <= $txnenv} {incr commit} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if {[catch {eval {$db compact} $txn {-freespace}} ret] } { + error "FAIL: db compact: $ret" + } + if { $txnenv == 1 } { + if { $commit == 0 } { + puts "\tTest$tnum.d: Aborting." + error_check_good txn_abort [$t abort] 0 + } else { + puts "\tTest$tnum.d: Committing." + error_check_good txn_commit [$t commit] 0 + } + } + error_check_good db_sync [$db sync] 0 + if { [catch {eval \ + {berkdb dbverify -btcompare test093_cmp1}\ + $envargs $encargs {$testfile}} res] } { + puts "FAIL: Verification failed with $res" + } + + } + + set size2 [file size $filename] + set count2 [stat_field $db stat "Page count"] + + error_check_good page_count_reduced [expr $count1 > $count2] 1 + + # Check for reduction in file size. +#### We should look at the partitioned files ##### +if { [is_partitioned $args] == 0 } { + set reduction .95 + error_check_good \ + file_size [expr [expr $size1 * $reduction] > $size2] 1 +} + puts "\tTest$tnum.e: Contents are the same after compaction." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t2 $checkfunc + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + + error_check_good filecmp [filecmp $t1 $t2] 0 + + puts "\tTest$tnum.f: Add more entries to database." + set did [open $dict] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + while { [gets $did str] != -1 && $count < $nentries } { + set key $str + set str [reverse $str] + + set ret [eval \ + {$db put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + lappend btvals $key + incr count + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + close $did + error_check_good db_sync [$db sync] 0 + + set size3 [file size $filename] + set count3 [stat_field $db stat "Page count"] + + puts "\tTest$tnum.g: Remove more entries, this time by cursor." + set count 0 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set dbc [eval {$db cursor} $txn] + + # Leave every nth item. + for { set dbt [$dbc get -first] } { [llength $dbt] > 0 }\ + { set dbt [$dbc get -next] ; incr count } { + if { [expr $count % $n] != 0 } { + error_check_good dbc_del [$dbc del] 0 + } + } + + error_check_good cursor_close [$dbc close] 0 + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + puts "\tTest$tnum.h: Save contents." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 $checkfunc + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + + puts "\tTest$tnum.i: Compact and verify database again." + for {set commit 0} {$commit <= $txnenv} {incr commit} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + if {[catch {eval {$db compact} $txn {-freespace}} ret] } { + error "FAIL: db compact: $ret" + } + # Run compact again in case there were free pages + # in the middle of the file after the first run. + if {[catch {eval {$db compact} $txn {-freespace}} ret] } { + error "FAIL: db compact: $ret" + } + + if { $txnenv == 1 } { + if { $commit == 0 } { + puts "\tTest$tnum.d: Aborting." + error_check_good txn_abort [$t abort] 0 + } else { + puts "\tTest$tnum.d: Committing." + error_check_good txn_commit [$t commit] 0 + } + } + error_check_good db_sync [$db sync] 0 + if { [catch {eval \ + {berkdb dbverify -btcompare test093_cmp1}\ + $envargs $encargs {$testfile}} res] } { + puts "FAIL: Verification failed with $res" + } + } + + set size4 [file size $filename] + set count4 [stat_field $db stat "Page count"] + + error_check_good page_count_reduced [expr $count3 > $count4] 1 + + # Check for file size reduction. +#### We should look at the partitioned files ##### +if { [is_partitioned $args] == 0 } { + error_check_good\ + file_size [expr [expr $size3 * $reduction] > $size4] 1 +} + + puts "\tTest$tnum.j: Contents are the same after compaction." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t2 $checkfunc + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + error_check_good filecmp [filecmp $t1 $t2] 0 + + error_check_good db_close [$db close] 0 + if { $env != "NULL" } { + set testdir [get_home $env] + } + cleanup $testdir $env + } + + # Clean up so the general verification (without the custom comparator) + # doesn't fail. + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + set env NULL + } else { + incr eindex + set env [lindex $args $eindex] + set testdir [get_home $env] + } + cleanup $testdir $env +} diff --git a/test/tcl/test116.tcl b/test/tcl/test116.tcl new file mode 100644 index 00000000..a683fed0 --- /dev/null +++ b/test/tcl/test116.tcl @@ -0,0 +1,343 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test116 +# TEST Test of basic functionality of lsn_reset. +# TEST +# TEST Create a database in an env. Copy it to a new file within +# TEST the same env. Reset the page LSNs. +proc test116 { method {tnum "116"} args } { + source ./include.tcl + global util_path + global passwd + global has_crypto + + set orig_tdir $testdir + puts "Test$tnum ($method): Test lsn_reset." + + set args [convert_args $method $args] + set encargs "" + set args [split_encargs $args encargs] + set omethod [convert_method $method] + + set testfile A.db + set newtag new + set newfile $testfile.$newtag + + if { [is_heap $method] } { + set testfile1 A.db1 + set testfile2 A.db2 + set newfile1 $testfile1.$newtag + set newfile2 $testfile2.$newtag + } + + set nentries 50 + set filenames "A B C D E" + + # This test needs two envs. If one is provided, create the + # second under it. If no env is provided, create both. + set txn "" + set txnenv 0 + set envargs "" + set resetargs "" + set eindex [lsearch -exact $args "-env"] + + if { $eindex == -1 } { + puts "\tTest$tnum.a: Creating env." + env_cleanup $testdir + set env [eval {berkdb_env} \ + -create $encargs $envargs -home $testdir -txn] + append args " -auto_commit " + error_check_good dbenv [is_valid_env $env] TRUE + } else { + incr eindex + set env [lindex $args $eindex] + puts "\tTest$tnum.a: Using provided env $env." + + # Make sure the second env we create has all the + # same flags the provided env does. + if { [is_substr [$env get_open_flags] "-thread"] } { + append envargs " -thread " + } + # We only have to check for encryption flags in + # releases that include encryption, and we get + # in trouble when we call get_encrypt_flags on + # NC releases. + if { $has_crypto } { + if { [is_substr $args "-encrypt"] } { + append envargs " -encryptaes $passwd " + } + if { [is_substr \ + [$env get_encrypt_flags] "-encryptaes"] } { + append envargs " -encryptaes $passwd " + append resetargs " -encrypt " + } + } + + set txn "" + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } elseif { $txnenv == 0 } { + puts "Skipping Test$tnum for non-transactional env." + return + } + set testdir [get_home $env] + } + + foreach lorder { 1234 4321 } { + if { $lorder == 1234 } { + set pattern "i i" + } else { + set pattern "I I" + } + + # Open database A, populate and close. + puts "\tTest$tnum.b: Creating database with lorder $lorder." + cleanup $testdir $env + + # Create a second directory, and create an env there. + set testdir [get_home $env] + set newdir $testdir/NEWDIR + file mkdir $newdir + set newenv [eval {berkdb_env} \ + -create $encargs $envargs -home $newdir -txn] + error_check_good newenv [is_valid_env $newenv] TRUE + + # We test with subdatabases except with the queue access + # method and heap access method, where they are not allowed. + if { [is_queue $method] == 1 || \ + [is_partitioned $args] == 1 || [is_heap $method] == 1} { + set db [eval {berkdb_open} -env $env -lorder $lorder \ + $omethod $args -create -mode 0644 $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + set pgsize [stat_field $db stat "Page size"] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set i 1 } { $i <= $nentries } { incr i } { + set key $i + set data DATA.$i + error_check_good db_put [eval {$db put} \ + $txn $key [chop_data $method $data]] 0 + } + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + error_check_good db_close [$db close] 0 + } else { + foreach filename $filenames { + set db [eval {berkdb_open} -env $env \ + -lorder $lorder $omethod $args -create \ + -mode 0644 $testfile $filename] + error_check_good dbopen [is_valid_db $db] TRUE + set pgsize [stat_field $db stat "Page size"] + if { $txnenv == 1 } { + set t [$env txn] + error_check_good \ + txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + for { set i 1 } { $i <= $nentries } { incr i } { + set key $i + set data DATA.$i + error_check_good \ + db_put [eval {$db put} $txn \ + $key [chop_data $method $data]] 0 + } + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + error_check_good db_close [$db close] 0 + } + } + + # Copy database file A. Reset LSNs on the copy. Then + # test that the copy is usable both in its native env + # and in a new env. + + puts "\tTest$tnum.c: Copy database and reset its LSNs." + set testdir [get_home $env] + set newdir [get_home $newenv] + + # Reset LSNs before copying. We do a little dance here: + # first copy the file within the same directory, then reset + # the fileid on the copy, then reset the LSNs on the copy, + # and only then copy the new file to the new env. Otherwise + # the LSNs would get reset on the original file. Copy + # auxiliary files if heap. + file copy -force $testdir/$testfile $testdir/$newfile + if { [is_heap $method] == 1 } { + file copy -force $testdir/$testfile1 $testdir/$newfile1 + file copy -force $testdir/$testfile2 $testdir/$newfile2 + } + + # If we're using queue extents or partitions , we must + # copy the extents/partitions to the new file name as well. + set extents "" + if { [is_queueext $method] || [is_partitioned $args]} { + copy_extent_file $testdir $testfile $newtag + } + error_check_good fileid_reset [$env id_reset $newfile] 0 + error_check_good \ + lsn_reset [eval {$env lsn_reset} $resetargs {$newfile}] 0 + if { [is_heap $method] == 1 } { + error_check_good fileid_reset [$env id_reset $newfile1] 0 + error_check_good \ + lsn_reset [eval {$env lsn_reset} $resetargs {$newfile1}] 0 + error_check_good fileid_reset [$env id_reset $newfile2] 0 + error_check_good \ + lsn_reset [eval {$env lsn_reset} $resetargs {$newfile2}] 0 + } + + file copy -force $testdir/$newfile $newdir/$testfile + if { [is_heap $method] == 1 } { + file copy -force $testdir/$newfile1 $newdir/$testfile1 + file copy -force $testdir/$newfile2 $newdir/$testfile2 + } + + # If we're using queue extents, we must copy the extents + # to the new directory as well. + if { [is_queueext $method] || [is_partitioned $args]} { + set extents [get_extfiles $testdir $newfile ""] + foreach extent $extents { + set nextent [make_ext_filename \ + $testdir/NEWDIR $testfile $extent] + file copy -force $extent $nextent + } + } + + # Get the LSNs and check them. + set npages [getlsns \ + $testdir $testfile $extents $pgsize orig_lsns] + set newpages [getlsns \ + $testdir $newfile $extents $pgsize new_lsns] + set newdirpages [getlsns \ + $newdir $testfile $extents $pgsize newdir_lsns] + error_check_good newpages_match $npages $newpages + error_check_good newdirpages_match $npages $newdirpages + for { set i 0 } { $i < $npages } { incr i } { + error_check_binary \ + new_lsns [binary format $pattern 0 1] $new_lsns($i) + error_check_binary \ + newdirlsns_match \ + [binary format $pattern 0 1] $newdir_lsns($i) + } + + if { [ is_partitioned $args] } { + set nodump 1 + } else { + set nodump 0 + } + puts "\tTest$tnum.d: Verify directories with reset LSNs." + error_check_good \ + verify [verify_dir $testdir "\tTest$tnum.d: " 0 0 $nodump] 0 + error_check_good \ + verify [verify_dir $newdir "\tTest$tnum.e: " 0 0 $nodump] 0 + + puts "\tTest$tnum.f: Open new db, check data, close db." + if { [is_queue $method] == 1 || \ + [is_partitioned $args] == 1 || [is_heap $method] == 1} { + set db [eval {berkdb_open} -env $newenv \ + -lorder $lorder \ + $omethod $args -create -mode 0644 $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$newenv txn] + error_check_good txn [is_valid_txn $t $newenv] TRUE + set txn "-txn $t" + } + for { set i 1 } { $i <= $nentries } { incr i } { + set key $i + set ret [eval {$db get} $txn $key] + error_check_good db_get \ + [lindex [lindex $ret 0] 1] \ + [pad_data $method DATA.$i] + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good db_close [$db close] 0 + } else { + foreach filename $filenames { + set db [eval {berkdb_open} -env $newenv \ + -lorder $lorder $omethod $args \ + -create -mode 0644 $testfile $filename ] + error_check_good dbopen [is_valid_db $db] TRUE + if { $txnenv == 1 } { + set t [$newenv txn] + error_check_good \ + txn [is_valid_txn $t $newenv] TRUE + set txn "-txn $t" + } + for { set i 1 } { $i <= $nentries } { incr i } { + set key $i + set ret [eval {$db get} $txn $key] + error_check_good db_get \ + [lindex [lindex $ret 0] 1] \ + [pad_data $method DATA.$i] + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good db_close [$db close] 0 + } + } + error_check_good newfile_rm [$env dbremove $newfile] 0 + if { [is_heap $method] == 1 } { + error_check_good newfile1_rm [$env dbremove $newfile1] 0 + error_check_good newfile2_rm [$env dbremove $newfile2] 0 + } + error_check_good newenv_close [$newenv close] 0 + fileremove -f $newdir + } + + set testdir $orig_tdir + # Close the parent env if this test created it. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + } +} + +proc getlsns { testdir dbfile extents pgsize lsns } { + upvar $lsns file_lsns + set fid [open $testdir/$dbfile r] + fconfigure $fid -translation binary + set eof 0 + set pg 0 + while { $eof == 0 } { + set offset [expr $pg * $pgsize] + seek $fid $offset start + set file_lsns($pg) [read $fid 8] + set eof [eof $fid] + incr pg + } + close $fid + incr pg -1 + foreach extent $extents { + set ep [getlsns $testdir \ + [make_ext_filename "." $dbfile $extent] \ + {} $pgsize elsns] + for {set i 0} {$i < $ep} {incr i} { + set file_lsns($pg) $elsns($i) + incr pg + } + } + return $pg +} + +proc error_check_binary {func desired result} { + if { [binary_compare $desired $result] != 0 } { + flush stdout + flush stderr + binary scan $desired h16 d + binary scan $result h16 r + error "FAIL:[timestamp]\ + $func: expected $d, got $r" + } +} diff --git a/test/tcl/test117.tcl b/test/tcl/test117.tcl new file mode 100644 index 00000000..2fb9ca38 --- /dev/null +++ b/test/tcl/test117.tcl @@ -0,0 +1,227 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test117 +# TEST Test database compaction with requested fill percent. +# TEST +# TEST Populate a database. Remove a high proportion of entries. +# TEST Dump and save contents. Compact the database, requesting +# TEST fill percentages starting at 10% and working our way up to +# TEST 100. On each cycle, make sure we still have the same contents. +# TEST +# TEST Unlike the other compaction tests, this one does not +# TEST use -freespace. + +proc test117 { method {nentries 10000} {tnum "117"} args } { + source ./include.tcl + + # Compaction using a requested fill percentage is + # an option for btree and recno databases only. + if { [is_hash $method] == 1 || [is_queue $method] == 1 || [is_heap $method] == 1 } { + puts "Skipping test$tnum for method $method." + return + } + + set args [convert_args $method $args] + set omethod [convert_method $method] + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + set basename $testdir/test$tnum + set env NULL + } else { + set basename test$tnum + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + puts "Test$tnum: ($method $args) Database compaction and fillpercent." + set t1 $testdir/t1 + set t2 $testdir/t2 + set splitopts { "" "-revsplitoff" } + set txn "" + + if { [is_record_based $method] == 1 } { + set checkfunc test001_recno.check + } else { + set checkfunc test001.check + } + + foreach splitopt $splitopts { + set testfile $basename.db + if { $splitopt == "-revsplitoff" } { + set testfile $basename.rev.db + if { [is_record_based $method] == 1 } { + puts "Skipping\ + -revsplitoff option for method $method." + continue + } + } + set did [open $dict] + if { $env != "NULL" } { + set testdir [get_home $env] + } + cleanup $testdir $env + + puts "\tTest$tnum.a: Create and populate database ($splitopt)." + set db [eval {berkdb_open -create \ + -mode 0644} $splitopt $args $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + set count 0 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + while { [gets $did str] != -1 && $count < $nentries } { + global kvals + + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + set kvals($key) [pad_data $method $str] + } else { + set key $str + set str [reverse $str] + } + + set ret [eval \ + {$db put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + incr count + + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + close $did + error_check_good db_sync [$db sync] 0 + + if { $env != "NULL" } { + set testdir [get_home $env] + set filename $testdir/$testfile + } else { + set filename $testfile + } + + puts "\tTest$tnum.b: Delete most entries from database." + set did [open $dict] + set count [expr $nentries - 1] + set n 17 + + # Leave every nth item. Since rrecno renumbers, we + # delete starting at nentries and working down to 0. + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + while { [gets $did str] != -1 && $count > 0 } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + } + + if { [expr $count % $n] != 0 } { + set ret [eval {$db del} $txn {$key}] + error_check_good del $ret 0 + } + incr count -1 + } + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + error_check_good db_sync [$db sync] 0 + + # Get the file size after deleting the items. In some cases + # with compression enabled, the file may grow somewhat while + # the deletes are performed. The file will still shrink + # overall after compacting. [#17402] + set size1 [file size $filename] + set count1 [stat_field $db stat "Page count"] + set internal1 [stat_field $db stat "Internal pages"] + set leaf1 [stat_field $db stat "Leaf pages"] + set in_use1 [expr $internal1 + $leaf1] + set free1 [stat_field $db stat "Pages on freelist"] + + puts "\tTest$tnum.c: Do a dump_file on contents." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t1 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + + # Start by compacting pages filled less than 10% and + # work up to 100%. + for { set fillpercent 10 } { $fillpercent <= 100 }\ + { incr fillpercent 10 } { + + puts "\tTest$tnum.d: Compact and verify database\ + with fillpercent $fillpercent." + + if {[catch { eval {$db compact -fillpercent $fillpercent}} ret] } { + error "FAIL: db compact -fillpercent $fillpercent: $ret" + } + error_check_good db_sync [$db sync] 0 + set size2 [file size $filename] + error_check_good verify_dir \ + [verify_dir $testdir "" 0 0 $nodump] 0 + set count2 [stat_field $db stat "Page count"] + set internal2 [stat_field $db stat "Internal pages"] + set leaf2 [stat_field $db stat "Leaf pages"] + set free2 [stat_field $db stat "Pages on freelist"] + + # The page count and file size should never increase. + error_check_good page_count [expr $count2 <= $count1] 1 + error_check_good file_size [expr $size2 <= $size1] 1 + + # Pages in use (leaf + internal) should never increase; + # pages on free list should never decrease. + set in_use2 [expr $internal2 + $leaf2] + error_check_good pages_in_use [expr $in_use2 <= $in_use1] 1 + error_check_good pages_on_freelist [expr $free2 >= $free1] 1 + + puts "\tTest$tnum.e:\ + Contents are the same after compaction." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db $txn $t2 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good filecmp [filecmp $t1 $t2] 0 + + # Reset originals values to the post-compaction number + # for the next pass. + set count1 $count2 + set free1 $free2 + set size1 $size2 + set in_use1 $in_use2 + } + error_check_good db_close [$db close] 0 + close $did + } +} diff --git a/test/tcl/test119.tcl b/test/tcl/test119.tcl new file mode 100644 index 00000000..7475c16e --- /dev/null +++ b/test/tcl/test119.tcl @@ -0,0 +1,258 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test119 +# TEST Test behavior when Berkeley DB returns DB_BUFFER_SMALL on a cursor. +# TEST +# TEST If the user-supplied buffer is not large enough to contain +# TEST the returned value, DB returns BUFFER_SMALL. If it does, +# TEST check that the cursor does not move -- if it moves, it will +# TEST skip items. [#13815] + +proc test119 { method {tnum "119"} args} { + source ./include.tcl + global alphabet + global errorCode + + set args [convert_args $method $args] + set omethod [convert_method $method] + puts "Test$tnum: $method ($args) Test of DB_BUFFER_SMALL." + + # Skip for queue; it has fixed-length records, so overflowing + # the buffer isn't possible with an ordinary get. + if { [is_queue $method] == 1 } { + puts "Skipping test$tnum for method $method" + return + } + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set txnenv 0 + set txn "" + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + cleanup $testdir $env + + puts "\tTest$tnum.a: Set up database." + set db [eval \ + {berkdb_open_noerr -create -mode 0644} $args $omethod $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + # Test -data_buf_size with db->get. + puts "\tTest$tnum.b: Test db get with -data_buf_size." + set datalength 20 + set data [repeat "a" $datalength] + set key 1 + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + + error_check_good db_put \ + [eval {$db put} $txn {$key [chop_data $method $data]}] 0 + + # A get with data_buf_size equal to the data size should work. + set ret [eval {$db get} $txn -data_buf_size $datalength $key] + error_check_good db_get_key [lindex [lindex $ret 0] 0] $key + error_check_good db_get_data [lindex [lindex $ret 0] 1] $data + + # A get with a data_buf_size decreased by one should fail. + catch {eval {$db get}\ + $txn -data_buf_size [expr $datalength - 1] $key} res + error_check_good buffer_small_error [is_substr $res DB_BUFFER_SMALL] 1 + + # Delete the item so it won't get in the way of the cursor test. + error_check_good db_del [eval {$db del} $txn $key] 0 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + + # Test -data_buf_size and -key_buf_size with dbc->get. + # + # Set up a database that includes large and small keys and + # large and small data in various combinations. + # + # Create small buffer equal to the largest page size. This will + # get DB_BUFFER_SMALL errors. + # Create big buffer large enough to never get DB_BUFFER_SMALL + # errors with this data set. + + puts "\tTest$tnum.c:\ + Test cursor get with -data_buf_size and -key_buf_size." + set key $alphabet + set data $alphabet + set nentries 100 + set start 100 + set bigkey [repeat $key 8192] + set bigdata [repeat $data 8192] + set buffer [expr 64 * 1024] + set bigbuf [expr $buffer * 8] + + puts "\tTest$tnum.c1: Populate database." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + + # Put in a big key every X data items, and big data every + # Y data items. X and Y should be small enough that we + # hit the case where both X and Y are big. + set x 5 + set y 7 + for { set i $start } { $i < [expr $nentries + $start] } { incr i } { + # If we have a record-based method, we can't have big keys. + # Just use the count. + if { [is_record_based $method] == 1 } { + set k $i + } else { + if { [expr $i % $x] == 1 } { + set k $i.$bigkey + } else { + set k $i.$key + } + } + + # We can have big data on any method. + if { [expr $i % $y] == 1 } { + set d $i.$bigdata + } else { + set d $i.$data + } + error_check_good db_put \ + [eval {$db put} $txn {$k [chop_data $method $d]}] 0 + } + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + + # Walk the database with a cursor. When we hit DB_BUFFER_SMALL, + # make sure DB returns the appropriate key/data pair. + puts "\tTest$tnum.c2: Walk the database with a cursor." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + set curs [eval {$db cursor} $txn] + error_check_good cursor [is_valid_cursor $curs $db] TRUE + + # Since hash is not sorted, we'll test that no items are + # skipped by keeping a list of all items retrieved, and + # making sure it is complete and that each item is unique + # at the end of the test. + set hashitems {} + + set count $start + for { set kd [catch {eval $curs get \ + -key_buf_size $buffer -data_buf_size $buffer -first} res] } \ + { $count < [expr $nentries + $start] } \ + { set kd [catch {eval $curs get \ + -key_buf_size $buffer -data_buf_size $buffer -next} res] } { + if { $kd == 1 } { + # Make sure we have the expected error. + error_check_good buffer_small_error \ + [is_substr $errorCode DB_BUFFER_SMALL] 1 + + # Adjust the buffer sizes to fit the big key or data. + if { [expr $count % $x] == 1 } { + set key_buf $bigbuf + } else { + set key_buf $buffer + } + if { [expr $count % $y] == 1 } { + set data_buf $bigbuf + } else { + set data_buf $buffer + } + + # Hash is not sorted, so just make sure we can get + # the item with a large buffer and check it later. + # Likewise for partition callback. + if { [is_hash $method] == 1 || \ + [is_partition_callback $args] == 1} { + set data_buf $bigbuf + set key_buf $bigbuf + } + + # Retrieve with big buffer; there should be no error. + # This also walks the cursor forward. + set nextbig [catch {eval $curs get -key_buf_size \ + $key_buf -data_buf_size $data_buf -next} res] + error_check_good data_big_buffer_get $nextbig 0 + + # Extract the item number. + set key [lindex [lindex $res 0] 0] + set data [lindex [lindex $res 0] 1] + if { [string first . $key] != -1 } { + set keyindex [string first . $key] + set keynumber \ + [string range $key 0 [expr $keyindex - 1]] + } else { + set keynumber $key + } + set dataindex [string first . $data] + set datanumber \ + [string range $data 0 [expr $dataindex - 1]] + + # If not hash, check that item number is correct. + # If hash, save the number for later verification. + if { [is_hash $method] == 0 \ + && [is_partition_callback $args] == 0 } { + error_check_good key_number $keynumber $count + error_check_good data_number $datanumber $count + } else { + lappend hashitems $keynumber + } + } else { + # For hash, save the item numbers of all items + # retrieved, not just those returning DB_BUFFER_SMALL. + if { [is_hash $method] == 1 || \ + [is_partition_callback $args] == 1} { + set key [lindex [lindex $res 0] 0] + set keyindex [string first . $key] + set keynumber \ + [string range $key 0 [expr $keyindex - 1]] + lappend hashitems $keynumber + } + } + incr count + set errorCode NONE + } + error_check_good curs_close [$curs close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + # Now check the list of items retrieved from hash. + if { [is_hash $method] == 1 || \ + [is_partition_callback $args] == 1} { + set sortedhashitems [lsort $hashitems] + for { set i $start } \ + { $i < [expr $nentries + $start] } { incr i } { + set hashitem \ + [lindex $sortedhashitems [expr $i - $start]] + error_check_good hash_check $hashitem $i + } + } + error_check_good db_close [$db close] 0 +} + diff --git a/test/tcl/test120.tcl b/test/tcl/test120.tcl new file mode 100644 index 00000000..830bbafd --- /dev/null +++ b/test/tcl/test120.tcl @@ -0,0 +1,98 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test120 +# TEST Test of multi-version concurrency control. +# TEST +# TEST Test basic functionality: a snapshot transaction started +# TEST before a regular transaction's put can't see the modification. +# TEST A snapshot transaction started after the put can see it. + +proc test120 { method {tnum "120"} args } { + source ./include.tcl + + # This test needs its own env. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Test$tnum skipping for env $env" + return + } + + # MVCC is not allowed with queue methods. + if { [is_queue $method] == 1 } { + puts "Test$tnum skipping for method $method" + return + } + + puts "\tTest$tnum ($method): MVCC and blocking." + + set args [convert_args $method $args] + set omethod [convert_method $method] + set encargs "" + set args [split_encargs $args encargs] + set pageargs "" + split_pageargs $args pageargs + set filename "test.db" + + # Create transactional env. Specifying -multiversion makes + # all databases opened within the env -multiversion. + env_cleanup $testdir + puts "\tTest$tnum.a: Creating txn env." + set env [eval {berkdb_env}\ + -create -txn -multiversion $pageargs $encargs -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + + # Open database. + puts "\tTest$tnum.b: Creating -multiversion db." + set db [eval {berkdb_open} \ + -create -auto_commit -env $env $omethod $args $filename] + error_check_good db_open [is_valid_db $db] TRUE + + puts "\tTest$tnum.c: Start transactions." + # Start two transactions. T1 is the writer, so it's a regular + # transaction. T2 is the reader and uses -snapshot. + set t1 [$env txn] + set txn1 "-txn $t1" + set t2 [$env txn -snapshot] + set txn2 "-txn $t2" + + # Enter some data using txn1. + set key 1 + set data DATA + error_check_good \ + t1_put [eval {$db put} $txn1 $key [chop_data $method $data]] 0 + + # Txn2 cannot see txn1's put, but it does not block. + puts "\tTest$tnum.d: Txn2 can't see txn1's put." + set ret [eval {$db get} $txn2 $key] + error_check_good txn2_get [llength $ret] 0 + + # Commit txn1. Txn2 get still can't see txn1's put. + error_check_good t1_commit [$t1 commit] 0 + set ret [eval {$db get} $txn2 $key] + error_check_good txn2_get [llength $ret] 0 + error_check_good db_sync [$db sync] 0 + set ret [eval {$db get} $txn2 $key] + error_check_good txn2_get [llength $ret] 0 + + # Start a new txn with -snapshot. It can see the put. + puts "\tTest$tnum.e: A new txn can see txn1's put." + set t3 [$env txn -snapshot] + set txn3 "-txn $t3" + set ret [eval {$db get} $txn3 $key] + error_check_good \ + t3_get $ret [list [list $key [pad_data $method $data]]] + + # Commit txns. + error_check_good t2_commit [$t2 commit] 0 + error_check_good t3_commit [$t3 commit] 0 + + # Clean up. + error_check_good db_close [$db close] 0 + error_check_good env_close [$env close] 0 +} diff --git a/test/tcl/test121.tcl b/test/tcl/test121.tcl new file mode 100644 index 00000000..65b3ef45 --- /dev/null +++ b/test/tcl/test121.tcl @@ -0,0 +1,125 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test121 +# TEST Tests of multi-version concurrency control. +# TEST +# TEST MVCC and cursor adjustment. +# TEST Set up a -snapshot cursor and position it in the middle +# TEST of a database. +# TEST Write to the database, both before and after the cursor, +# TEST and verify that it stays on the same position. + +proc test121 { method {tnum "121"} args } { + source ./include.tcl + + # This test needs its own env. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Test$tnum skipping for env $env" + return + } + + # MVCC is not allowed with queue methods. + if { [is_queue $method] == 1 } { + puts "Test$tnum skipping for method $method" + return + } + + puts "\tTest$tnum ($method): MVCC and cursor adjustment." + + set args [convert_args $method $args] + set omethod [convert_method $method] + set encargs "" + set args [split_encargs $args encargs] + set filename "test.db" + set pageargs "" + set args [split_pageargs $args pageargs] + + # Create transactional env. Specifying -multiversion makes + # all databases opened within the env -multiversion. + + env_cleanup $testdir + puts "\tTest$tnum.a: Creating txn env." + + # Raise cachesize so this test focuses on cursor adjustment + # and not on small cache issues. + set cachesize [expr 2 * 1024 * 1024] + set max_locks 2000 + set max_objects 2000 + set env [eval {berkdb_env -create -cachesize "0 $cachesize 1"}\ + -lock_max_locks $max_locks -lock_max_objects $max_objects\ + -txn -multiversion $encargs $pageargs -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + + # Open database. + puts "\tTest$tnum.b: Creating -multiversion db." + set db [eval {berkdb_open} \ + -create -auto_commit -env $env $omethod $args $pageargs $filename] + error_check_good db_open [is_valid_db $db] TRUE + + # Start transactions. + puts "\tTest$tnum.c: Start txns with -snapshot." + set t1 [$env txn -snapshot] + set txn1 "-txn $t1" + + # Enter some data using txn1. Leave holes, by using keys + # 2, 4, 6 .... + set niter 10000 + set data DATA + for { set i 1 } { $i <= $niter } { incr i } { + set key [expr $i * 2] + error_check_good t1_put [eval {$db put} $txn1 $key $data.$key] 0 + } + error_check_good t1_commit [$t1 commit] 0 + + # Open a read-only cursor. + set t2 [$env txn -snapshot] + set txn2 "-txn $t2" + set cursor [eval {$db cursor} $txn2] + error_check_good db_cursor [is_valid_cursor $cursor $db] TRUE + + # Walk the cursor halfway through the database. + set i 1 + set halfway [expr $niter / 2] + for { set ret [$cursor get -first] } \ + { $i <= $halfway } \ + { set ret [$cursor get -next] } { + incr i + } + + set currentkey [lindex [lindex $ret 0] 0] + set currentdata [lindex [lindex $ret 0] 1] + + # Start a new transaction and use it to enter more data. + # Verify that the cursor is not changed. + puts "\tTest$tnum.c: Enter more data." + set t1 [$env txn -snapshot] + set txn1 "-txn $t1" + + # Enter more data, filling in the holes from the first + # time around by using keys 1, 3, 5 .... Cursor should + # stay on the same item. + for { set i 1 } { $i <= $niter } { incr i } { + set key [expr [expr $i * 2] - 1] + error_check_good t1_put [eval {$db put} $txn1 $key $data.$key] 0 + set ret [$cursor get -current] + set k [lindex [lindex $ret 0] 0] + set d [lindex [lindex $ret 0] 1] + error_check_good current_key $k $currentkey + error_check_good current_data $d $currentdata + } + + error_check_good t1_commit [$t1 commit] 0 + error_check_good cursor_close [$cursor close] 0 + error_check_good t2_commit [$t2 commit] 0 + + # Clean up. + error_check_good db_close [$db close] 0 + error_check_good env_close [$env close] 0 +} diff --git a/test/tcl/test122.tcl b/test/tcl/test122.tcl new file mode 100644 index 00000000..6dd87767 --- /dev/null +++ b/test/tcl/test122.tcl @@ -0,0 +1,103 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2006, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test122 +# TEST Tests of multi-version concurrency control. +# TEST +# TEST MVCC and databases that turn multi-version on and off. + +proc test122 { method {tnum "122"} args } { + source ./include.tcl + + # This test needs its own env. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Test$tnum skipping for env $env" + return + } + + # MVCC is not allowed with queue methods. + if { [is_queue $method] == 1 } { + puts "Test$tnum skipping for method $method" + return + } + + puts "\tTest$tnum ($method): Turning MVCC on and off." + + set args [convert_args $method $args] + set omethod [convert_method $method] + set encargs "" + set args [split_encargs $args encargs] + set pageargs "" + split_pageargs $args pageargs + set filename "test.db" + + # Create transactional env. Don't specify -multiversion to + # the env, because we need to turn it on and off. + env_cleanup $testdir + + puts "\tTest$tnum.a: Creating txn env." + set cacheargs " -cachesize {0 524288 1} " + set env [eval {berkdb_env}\ + -create $cacheargs -txn $pageargs $encargs -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + + # Open database. + puts "\tTest$tnum.b: Creating -multiversion db." + set db [eval {berkdb_open} -multiversion \ + -create -auto_commit -env $env $omethod $args $filename] + error_check_good db_open [is_valid_db $db] TRUE + + # Put some data. The tcl interface automatically does it + # transactionally. + set niter 100 + for { set i 1 } { $i < $niter } { incr i } { + set key $i + set data DATA.$i + error_check_good db_put [eval {$db put} $key $data] 0 + } + + # Open a read-only handle and also a txn -snapshot handle. + puts "\tTest$tnum.c: Open read-only handle and txn -snapshot handle." + set t [$env txn -snapshot] + set txn "-txn $t" + set snapshotdb [eval {berkdb_open} \ + $txn -env $env $omethod $args $filename] + error_check_good snapshotdb [is_valid_db $snapshotdb] TRUE + set readonlydb [eval {berkdb_open} \ + -auto_commit -env $env $omethod $args $filename] + error_check_good readonlydb [is_valid_db $readonlydb] TRUE + + + # Overwrite all the data. The read-only handle will see the + # new data and the -snapshot handle will see the old data. + puts "\tTest$tnum.d: Overwrite data." + for { set i 1 } { $i < $niter } { incr i } { + set key $i + set data NEWDATA.$i + error_check_good db_put [eval {$db put} $key $data] 0 + } + + puts "\tTest$tnum.e: Check data through handles." + for { set i 1 } { $i < $niter } { incr i } { + set r_ret [eval {$readonlydb get} $i] + set s_ret [eval {$snapshotdb get} $txn $i] + set r_key [lindex [lindex $r_ret 0] 0] + set r_data [lindex [lindex $r_ret 0] 1] + set s_key [lindex [lindex $s_ret 0] 0] + set s_data [lindex [lindex $s_ret 0] 1] + } + + error_check_good t_commit [$t commit] 0 + + # Clean up. + error_check_good db_close [$db close] 0 + error_check_good snapshotdb_close [$snapshotdb close] 0 + error_check_good readonlydb_close [$readonlydb close] 0 + error_check_good env_close [$env close] 0 +} diff --git a/test/tcl/test123.tcl b/test/tcl/test123.tcl new file mode 100644 index 00000000..3aa0bfbb --- /dev/null +++ b/test/tcl/test123.tcl @@ -0,0 +1,81 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test123 +# TEST Concurrent Data Store cdsgroup smoke test. +# TEST +# TEST Open a CDS env with -cdb_alldb. +# TEST Start a "txn" with -cdsgroup. +# TEST Create two databases in the env, do a cursor put +# TEST in both within the same txn. This should succeed. + +proc test123 { method args } { + source ./include.tcl + + # If we are using an env, then skip this test. It needs its own. + set eindex [lsearch -exact $args "-env"] + if { $eindex != -1 } { + incr eindex + set env [lindex $args $eindex] + puts "Skipping test123 for env $env" + return + } + + # Heap and Queue don't support sub-databases. + if { [is_queue $method] == 1 || [is_heap $method] == 1} { + puts "Skipping test123 for method $method" + return + } + if { [is_partitioned $args] == 1 } { + puts "Test123 skipping for partitioned $method" + return + } + set args [convert_args $method $args] + set encargs "" + set args [split_encargs $args encargs] + set omethod [convert_method $method] + set pageargs "" + split_pageargs $args pageargs + set dbname test123.db + set tnum "123" + + puts "Test$tnum: CDB with cdsgroup ($method)" + env_cleanup $testdir + + # Open environment and start cdsgroup "transaction". + puts "\tTest$tnum.a: Open env." + set env [eval {berkdb_env -create} \ + $pageargs $encargs -cdb -cdb_alldb -home $testdir] + error_check_good dbenv [is_valid_env $env] TRUE + set txn [$env cdsgroup] + + # Env is created, now set up 2 databases + puts "\tTest$tnum.b: Open first database." + set db1 [eval {berkdb_open}\ + -create -env $env $args $omethod -txn $txn $dbname "A"] + puts "\tTest$tnum.b1: Open cursor." + set curs1 [eval {$db1 cursor} -update -txn $txn] + puts "\tTest$tnum.b2: Initialize cursor and do a put." + error_check_good curs1_put [eval {$curs1 put} -keyfirst 1 DATA1] 0 + + puts "\tTest$tnum.c: Open second database." + set db2 [eval {berkdb_open}\ + -create -env $env $args $omethod -txn $txn $dbname "B"] + puts "\tTest$tnum.c1: Open cursor." + set curs2 [eval {$db2 cursor} -update -txn $txn] + puts "\tTest$tnum.b2: Initialize cursor and do a put." + error_check_good curs2_put [eval {$curs2 put} -keyfirst 2 DATA2] 0 + + # Clean up. + $curs2 close + $curs1 close + $txn commit + $db2 close + $db1 close + $env close + +} + diff --git a/test/tcl/test124.tcl b/test/tcl/test124.tcl new file mode 100644 index 00000000..053f2ae8 --- /dev/null +++ b/test/tcl/test124.tcl @@ -0,0 +1,150 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2008, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test124 +# TEST +# TEST Test db->verify with noorderchk and orderchkonly flags. +# TEST +# TEST Create a db with a non-standard sort order. Check that +# TEST it fails a regular verify and succeeds with -noorderchk. +# TEST Do a similar test with a db containing subdbs, one with +# TEST the standard order and another with non-standard. + +proc test124 { method { nentries 1000 } args } { + source ./include.tcl + global encrypt + + set tnum "124" + if { [is_btree $method] == 0 } { + puts "Skipping test$tnum for method $method" + return + } + + set args [convert_args $method $args] + set encargs "" + set args [split_encargs $args encargs] + set omethod [convert_method $method] + + puts "Test$tnum ($method $args):\ + db->verify with -noorderchk and -orderchkonly." + + # If we are given an env, use it. Otherwise, open one. + # We need it for the subdb portion of the test. + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + env_cleanup $testdir + set env [eval\ + {berkdb_env_noerr} $encargs -create -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + } else { + incr eindex + set env [lindex $args $eindex] + set envflags [$env get_open_flags] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + puts "\tTest$tnum.a:\ + Create and populate database with non-standard sort." + set testfile "test124.db" + + # We already know the test is btree only, so btcompare is okay. + set sortflags " -btcompare test093_cmp1 " + set db [eval {berkdb_open_noerr -env $env -create \ + -mode 0644} $sortflags $args {$omethod $testfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + # Start a txn, populate, and close. + set txn "" + if { $txnenv == 1 } { + set txn [$env txn] + } + populate $db $method $txn $nentries 0 0 + error_check_good db_close [$db close] 0 + if { $txnenv == 1 } { + error_check_good txn_commit [$txn commit] 0 + } + + puts "\tTest$tnum.b: Verify with -noorderchk succeeds." + set ret [eval {berkdb dbverify} -env $env -noorderchk $testfile] + error_check_good verify_noorderchk $ret 0 + + puts "\tTest$tnum.c: Check that a regular verify fails." + catch { set ret [eval {berkdb dbverify} -env $env $testfile] } caught + error_check_good verify_fails [is_substr $caught DB_VERIFY_BAD] 1 + + # Skip the subdb portion of the test for partitioned databases -- + # you cannot have multiple databases in a file *and* partitioning. + if { [is_partitioned $args] == 0 } { + + puts "\tTest$tnum.d:\ + Create and populate 2 subdbs, one with non-standard sort." + set testfile2 "test124.db2" + set sub1 "SUB1" + set sub2 "SUB2" + set sdb1 [eval {berkdb_open_noerr -env $env -create \ + -mode 0644} $args {$omethod $testfile2 $sub1}] + set sdb2 [eval {berkdb_open_noerr -env $env -create \ + -mode 0644} $sortflags $args {$omethod $testfile2 $sub2}] + error_check_good sdb1open [is_valid_db $sdb1] TRUE + error_check_good sdb2open [is_valid_db $sdb2] TRUE + + set nentries [expr $nentries * 2] + if { $txnenv == 1 } { + set txn [$env txn] + } + populate $sdb1 $method $txn $nentries 0 0 + populate $sdb2 $method $txn $nentries 0 0 + if { $txnenv == 1 } { + error_check_good txn_commit [$txn commit] 0 + } + + error_check_good sdb1_close [$sdb1 close] 0 + error_check_good sdb2_close [$sdb2 close] 0 + + # Verify the whole file with -noorderchk. + puts "\tTest$tnum.e: Verify with -noorderchk succeeds." + set ret \ + [eval {berkdb dbverify} -env $env -noorderchk $testfile2] + + # Verify with the sorted subdb with -orderchkonly. + puts "\tTest$tnum.f:\ + Verify with -orderchkonly succeeds for sorted subdb." + set ret [eval {berkdb dbverify} \ + -env $env -orderchkonly $testfile2 $sub1] + + # The attempt to verify the non-standard-sort subdb + # with -orderchkonly is expected to fail. + puts "\tTest$tnum.g: Verify with\ + -orderchkonly fails for non-standard-sort subdb." + catch { set ret [eval {berkdb dbverify} \ + -env $env -orderchkonly $testfile2 $sub2] } caught + error_check_good \ + verify_fails [is_substr $caught DB_VERIFY_BAD] 1 + + } + + # Clean up. + # + # Delete test files -- we cannot have non-standard-sort + # files hanging around because they will cause the + # automatic verify in a complete run to fail. + set testdir [get_home $env] + if { [is_partitioned $args] == 0 } { + fileremove -f $testdir/$testfile2 + } + fileremove -f $testdir/$testfile + cleanup $testdir $env + + # Close the env if this test created it. + if { $eindex == -1 } { + error_check_good env_close [$env close] 0 + } +} diff --git a/test/tcl/test125.tcl b/test/tcl/test125.tcl new file mode 100644 index 00000000..e8f40cd6 --- /dev/null +++ b/test/tcl/test125.tcl @@ -0,0 +1,205 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2009, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test125 +# TEST Test cursor comparison API. +# TEST +# TEST The cursor comparison API reports whether two cursors within +# TEST the same database are at the same position. It does not report +# TEST any information about relative position. +# TEST +# TEST 1. Test two uninitialized cursors (error). +# TEST 2. Test one uninitialized cursor, one initialized (error). +# TEST 3. Test two cursors in different databases (error). +# TEST 4. Put two cursors in the same place, test for match. Walk +# TEST them back and forth a bit, more matching. +# TEST 5. Two cursors in the same spot. Delete through one. + +proc test125 { method args } { + global errorInfo + source ./include.tcl + set tnum 125 + + set args [convert_args $method $args] + set omethod [convert_method $method] + + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set testfile2 $testdir/test$tnum-2.db + set env NULL + } else { + set testfile test$tnum.db + set testfile2 test$tnum-2.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + set t "" + set txn "" + + # Run the test with and without duplicates, and with and without + # large data items. + foreach dupflag { "" "-dup" "-dup -dupsort" } { + if { [is_compressed $args] && $dupflag == "-dup" } { + puts "Skipping unsorted dups for btree with compression" + continue + } + foreach bigdata { 0 1 } { + set msg "" + if { $bigdata == 1 } { + set msg "with big data" + } + puts "Test$tnum ($method $dupflag $msg):\ + Cursor comparison API." + if { [llength $dupflag] > 0 } { + if { [is_record_based $method] ||\ + [is_rbtree $method] } { + puts "Skipping test for method $method\ + with duplicates." + continue + } + set dups 1 + } else { + set dups 0 + } + + # Testdir will get reset from the env's home dir back + # to the default if this calls something that sources + # include.tcl, since testdir is a global. Set it correctly + # here each time through the loop. + # + if { $env != "NULL" } { + set testdir [get_home $env] + } + cleanup $testdir $env + + puts "\tTest$tnum.a: Test failure cases." + # Open two databases. + set db [eval {berkdb_open_noerr} -create -mode 0644 \ + $omethod $args $dupflag {$testfile}] + error_check_good db_open [is_valid_db $db] TRUE + set db2 [eval {berkdb_open_noerr} -create -mode 0644 \ + $omethod $args $dupflag {$testfile2}] + error_check_good db2_open [is_valid_db $db2] TRUE + + # Populate the databases. + set nentries 1000 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + populate $db $method $t $nentries $dups $bigdata + populate $db2 $method $t $nentries $dups $bigdata + + # Test error conditions. + puts "\t\tTest$tnum.a1: Uninitialized cursors." + set c1 [eval {$db cursor} $txn] + set c2 [eval {$db cursor} $txn] + set ret [catch {$c1 cmp $c2} res] + error_check_good uninitialized_cursors $ret 1 + + puts "\t\tTest$tnum.a2:\ + One initialized, one uninitialized cursor." + $c1 get -first + set ret [catch {$c1 cmp $c2} res] + error_check_good one_uninitialized_cursor $ret 1 + + puts "\t\tTest$tnum.a3: Cursors in different databases." + set c3 [eval {$db2 cursor} $txn] + set ret [$c3 get -first] + set ret [catch {$c1 cmp $c3} res] + error_check_good cursors_in_different_databases $ret 1 + + # Clean up second database - we won't be using it again. + $c3 close + $db2 close + + # Test valid conditions. + # + # Initialize second cursor to -first. Cursor cmp should + # match; c1 was already there. + puts "\tTest$tnum.b: Cursors initialized to -first." + set ret [$c2 get -first] + error_check_good c1_and_c2_on_first [$c1 cmp $c2] 0 + + # Walk to the end. We should alternate between + # matching and not matching. + puts "\tTest$tnum.c: Walk cursors to the last item." + for { set i 1 } { $i < $nentries } { incr i } { + + # First move c1; cursors won't match. + set ret [$c1 get -next] + error_check_bad cmp_does_not_match [$c1 cmp $c2] 0 + + # Now move c2; cursors will match again. + set ret [$c2 get -next] + error_check_good cmp_matches [$c1 cmp $c2] 0 + } + + # Now do it in reverse, starting at -last and backing up. + puts "\tTest$tnum.d: Cursors initialized to -last." + set ret [$c1 get -last] + set ret [$c2 get -last] + error_check_good c1_and_c2_on_last [$c1 cmp $c2] 0 + + puts "\tTest$tnum.e: Walk cursors back to the first item." + for { set i 1 } { $i < $nentries } { incr i } { + + # First move c1; cursors won't match. + set ret [$c1 get -prev] + error_check_bad cmp_does_not_match [$c1 cmp $c2] 0 + + # Now move c2; cursors will match again. + set ret [$c2 get -prev] + error_check_good cmp_matches [$c1 cmp $c2] 0 + } + + # A cursor delete leaves the cursor in the same place, so a + # comparison should still work. + puts "\tTest$tnum.f:\ + Position comparison works with cursor deletes." + set ret [$c1 get -first] + set ret [$c2 get -first] + + # Do the cursor walk again, deleting as we go. + puts "\tTest$tnum.g: Cursor walk with deletes." + for { set i 1 } { $i < $nentries } { incr i } { + + # First move c1; cursors won't match. + set ret [$c1 get -next] + error_check_bad cmp_does_not_match [$c1 cmp $c2] 0 + + # Now move c2; cursors will match again. + set ret [$c2 get -next] + error_check_good cmp_matches [$c1 cmp $c2] 0 + + # Now delete through c2; cursors should still match. + set ret [$c2 del] + error_check_good cmp_still_matches [$c1 cmp $c2] 0 + } + + # Close cursors and database; commit txn. + error_check_good c1_close [$c1 close] 0 + error_check_good c2_close [$c2 close] 0 + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + + error_check_good db_close [$db close] 0 + } + } +} diff --git a/test/tcl/test126.tcl b/test/tcl/test126.tcl new file mode 100644 index 00000000..87563a0d --- /dev/null +++ b/test/tcl/test126.tcl @@ -0,0 +1,348 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test126 +# TEST Test database bulk update for non-duplicate databases. +# TEST +# TEST Put with -multiple, then with -multiple_key, +# TEST and make sure the items in database are what we put. +# TEST Later, delete some items with -multiple, then with -multiple_key, +# TEST and make sure if the correct items are deleted. + +proc test126 {method { nentries 10000 } { tnum "126" } {callback 1} + {subdb 0} {secondary 0} args } { + source ./include.tcl + + # For rrecno, when keys are deleted, the ones after will move forward, + # and the keys change, which is not good to verify after delete. + # So, we skip rrecno temporarily. + # Heap databases do not current support bulk operations + if {[is_rrecno $method] || [is_heap $method] } { + puts "Skipping test$tnum for $method test." + return + } + + set subname "" + set sub_msg "" + + # Check if we use sub database. + if { $subdb } { + if {[is_queue $method]} { + puts "Skipping test$tnum with sub database for $method." + return + } + if {[is_partitioned $args]} { + puts "Skipping test$tnum with sub database\ + for partitioned $method test." + return + } + set subname "subdb" + set sub_msg "using sub databases" + } + + set sec_msg "" + # Check if we use secondary database. + if { $secondary } { + set sec_msg "with secondary databases" + } + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set eindex [lsearch -exact $args "-env"] + set txnenv 0 + set txn "" + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + if {$subdb && $secondary } { + puts "Skipping test$tnum $sub_msg $sec_msg for non-env test." + return + } + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + cleanup $testdir $env + set sec_args $args + + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test$tnum: $method ($args)\ + Database bulk update $sub_msg $sec_msg." + + set db [eval {berkdb_open_noerr -create -mode 0644} \ + $args $omethod $testfile $subname] + error_check_good dbopen [is_valid_db $db] TRUE + + # Open the secondary database and do association. + # This is the test for [#18878]. + if { $secondary } { + if { $subdb } { + set sec_subname "subdb-secondary" + set sec_testfile $testfile + } else { + set sec_subname "" + if { $eindex == -1 } { + set sec_testfile $testdir/test$tnum-secondary.db + } else { + set sec_testfile test$tnum-secondary.db + } + } + # Open a simple dupsort btree database. + # In order to be consistent, we need to use all the passed-in + # am-unrelated flags. + set sec_db [eval {berkdb_open_noerr -create -mode 0644} $sec_args \ + -dup -dupsort -btree $sec_testfile $sec_subname] + error_check_good secdb_open [is_valid_db $sec_db] TRUE + set ret [$db associate -create [callback_n $callback] $sec_db] + error_check_good db_associate $ret 0 + } + + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + + set did [open $dict] + set count 0 + + + # Do bulk put. + # First, we put half the entries using put -multiple. + # Then, we put the rest half using put -multiple_key. + + puts "\tTest$tnum.a: Bulk put data using -multiple." + set key_list1 {} + set data_list1 {} + while { [gets $did str] != -1 && $count < $nentries / 2 } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + set str [reverse $str] + } + lappend key_list1 $key + lappend data_list1 [make_fixed_length $method $str] + incr count + } + + set ret [eval {$db put} $txn -multiple {$key_list1 $data_list1}] + error_check_good {put(-multiple)} $ret 0 + + # Put again, should succeed + set ret [eval {$db put} $txn -multiple {$key_list1 $data_list1}] + error_check_good {put_again(-multiple)} $ret 0 + + puts "\tTest$tnum.b: Bulk put data using -multiple_key." + set pair_list1 {} + while { [gets $did str] != -1 && $count < $nentries } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key $str + set str [reverse $str] + } + lappend pair_list1 $key [make_fixed_length $method $str] + incr count + } + + set ret [eval {$db put} $txn -multiple_key {$pair_list1}] + error_check_good {put(-multiple_key)} $ret 0 + + # Put again, should succeed + set ret [eval {$db put} $txn -multiple_key {$pair_list1}] + error_check_good {put_again(-multiple_key)} $ret 0 + + close $did + + puts "\tTest$tnum.c: Verify the data after bulk put." + set len [llength $pair_list1] + for {set indx1 0; set indx2 1} {$indx2 < $len} \ + {incr indx1 2; incr indx2 2} { + lappend key_list1 [lindex $pair_list1 $indx1] + lappend data_list1 [lindex $pair_list1 $indx2] + } + + test126_check_prirecords $db $key_list1 $data_list1 $txn + + if { $secondary } { + puts "\tTest$tnum.c.2: Verify the data in secondary database." + set sec_key_list {} + foreach key $key_list1 data $data_list1 { + lappend sec_key_list \ + [[callback_n $callback] $key $data] + } + test126_check_secrecords $sec_db $sec_key_list \ + $key_list1 $data_list1 $txn + } + + puts "\tTest$tnum.d: Bulk delete data using -multiple." + set key_list2 {} + for { set i 0 } { $i < $nentries} { incr i 3 } { + lappend key_list2 [lindex $key_list1 $i] + } + set ret [eval {$db del} $txn -multiple {$key_list2}] + error_check_good {del(-multiple)} $ret 0 + + # Delete again, should return DB_NOTFOUND/DB_KEYEMPTY. + set ret [catch {eval {$db del} $txn -multiple {$key_list2}} res] + error_check_good {Check DB_NOTFOUND/DB_KEYEMPTY} \ + [expr [is_substr $res DB_NOTFOUND] || \ + [is_substr $res DB_KEYEMPTY]] 1 + + puts "\tTest$tnum.e: Bulk delete data using -multiple_key." + set pair_list2 {} + for { set i 1 } { $i < $nentries} { incr i 3} { + lappend pair_list2 [lindex $key_list1 $i] \ + [lindex $data_list1 $i] + } + + set ret [eval {$db del} $txn -multiple_key {$pair_list2}] + error_check_good {del(-multiple_key)} $ret 0 + + # Delete again, should return DB_NOTFOUND/DB_KEYEMPTY. + set ret [catch {eval {$db del} $txn -multiple_key {$pair_list2}} res] + error_check_good {Check DB_NOTFOUND/DB_KEYEMPTY} \ + [expr [is_substr $res DB_NOTFOUND] || \ + [is_substr $res DB_KEYEMPTY]] 1 + + + puts "\tTest$tnum.f: Verify the data after bulk delete." + + # Check if the specified items are deleted + set dbc [eval $db cursor $txn] + error_check_good $dbc [is_valid_cursor $dbc $db] TRUE + set len [llength $key_list2] + for {set i 0} {$i < $len} {incr i} { + set key [lindex $key_list2 $i] + set pair [$dbc get -set $key] + error_check_good pair [llength $pair] 0 + } + + set len [llength $pair_list2] + for {set indx1 0; set indx2 1} {$indx2 < $len} \ + {incr indx1 2; incr indx2 2} { + set key [lindex $pair_list2 $indx1] + set data [lindex $pair_list2 $indx2] + set pair [$dbc get -get_both $key $data] + error_check_good pair [llength $pair] 0 + } + + error_check_good $dbc.close [$dbc close] 0 + + # Remove the deleted items from the original key-data lists. + # Since the primary database is non-duplicate, it is enough + # for us to just compare using keys. + set orig_key_list $key_list1 + set orig_data_list $data_list1 + set key_list1 {} + set data_list1 {} + set i 0 + set j 0 + set k 0 + while {$i < $nentries} { + set key1 [lindex $orig_key_list $i] + set key2 [lindex $key_list2 $j] + set key3 [lindex $pair_list2 $k] + if {$key1 == $key2} { + incr i + incr j + } elseif {$key1 == $key3} { + incr i + incr k 2 + } else { + lappend key_list1 $key1 + lappend data_list1 [lindex $orig_data_list $i] + incr i + } + } + + test126_check_prirecords $db $key_list1 $data_list1 $txn + + if { $secondary } { + puts "\tTest$tnum.f.2: Verify the data in secondary database." + set sec_key_list {} + foreach key $key_list1 data $data_list1 { + lappend sec_key_list \ + [[callback_n $callback] $key $data] + } + test126_check_secrecords $sec_db $sec_key_list \ + $key_list1 $data_list1 $txn + } + + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good db_close [$db close] 0 + if { $secondary } { + error_check_good secdb_close [$sec_db close] 0 + } +} + +proc test126_check_prirecords {db key_list data_list txnarg} { + + set dbc [eval $db cursor $txnarg] + error_check_good $dbc [is_valid_cursor $dbc $db] TRUE + + # Check if all the records are in key_list(key) and data_list(data). + for {set pair [$dbc get -first]} {[llength $pair] > 0} \ + {set pair [$dbc get -next]} { + set key [lindex [lindex $pair 0] 0] + set data [lindex [lindex $pair 0] 1] + set index [lsearch -exact $key_list $key] + error_check_bad key_index $index -1 + error_check_good data $data [lindex $data_list $index] + } + + # Check if all the items in the lists are in the database. + set len [llength $key_list] + for {set i 0} {$i < $len} {incr i} { + set pair [$dbc get -get_both [lindex $key_list $i] \ + [lindex $data_list $i]] + error_check_bad pair [llength $pair] 0 + } + + error_check_good $dbc.close [$dbc close] 0 +} + +proc test126_check_secrecords {db sec_key_list pri_key_list data_list txnarg} { + + set dbc [eval $db cursor $txnarg] + error_check_good $dbc [is_valid_cursor $dbc $db] TRUE + + # Check if all the records are in the lists + for {set pair [$dbc pget -first]} {[llength $pair] > 0} \ + {set pair [$dbc pget -next]} { + set sec_key [lindex [lindex $pair 0] 0] + set pri_key [lindex [lindex $pair 0] 1] + set data [lindex [lindex $pair 0] 2] + set index [lsearch -exact $pri_key_list $pri_key] + error_check_bad key_index $index -1 + error_check_good seckey $sec_key [lindex $sec_key_list $index] + error_check_good data1 $data [lindex $data_list $index] + } + + # Check if all the items in the lists are in the secondary database. + set len [llength $sec_key_list] + for {set i 0} {$i < $len} {incr i} { + set pair [$dbc pget -get_both [lindex $sec_key_list $i] \ + [lindex $pri_key_list $i]] + error_check_bad pair [llength $pair] 0 + error_check_good data2 [lindex $data_list $i] \ + [lindex [lindex $pair 0] 2] + } + + error_check_good $dbc.close [$dbc close] 0 +} diff --git a/test/tcl/test127.tcl b/test/tcl/test127.tcl new file mode 100644 index 00000000..939c6004 --- /dev/null +++ b/test/tcl/test127.tcl @@ -0,0 +1,312 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test127 +# TEST Test database bulk update. +# TEST +# TEST This is essentially test126 with duplicates. +# TEST To make it simple we use numerical keys all the time. +# TEST +# TEST Put with -multiple, then with -multiple_key, +# TEST and make sure the items in database are what we want. +# TEST Later, delete some items with -multiple, then with -multiple_key, +# TEST and make sure if the correct items are deleted. + +proc test127 {method { nentries 10000 } { ndups 5} { tnum "127" } + {subdb 0} args } { + source ./include.tcl + global alphabet + + if {[is_btree $method] != 1 && [is_hash $method] != 1} { + puts "Skipping test$tnum for $method." + return + } + + set args [convert_args $method $args] + + set subname "" + set save_msg "with duplicates" + if { $subdb } { + if {[is_partitioned $args]} { + puts "Skipping test$tnum with sub database\ + for partitioned $method test." + return + } + set subname "subdb" + set save_msg "$save_msg using sub databases" + } + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set eindex [lsearch -exact $args "-env"] + set txnenv 0 + set txn "" + if { $eindex == -1 } { + set testfile $testdir/test$tnum.db + set env NULL + } else { + set testfile test$tnum.db + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + cleanup $testdir $env + + set i 0 + foreach ex_args [list "-dup -dupsort" "-dup"] { + set dboargs [concat $args $ex_args] + set fname $testfile + set dbname $subname + puts "Test$tnum: $method ($dboargs)\ + Database bulk update $save_msg." + if {$subdb} { + set dbname $subname.$i + } else { + set fname $testfile.$i + } + test127_sub $fname $dbname $dboargs + incr i + } +} + +proc test127_sub {fname dbname dboargs} { + source ./include.tcl + global alphabet + upvar method method + upvar nentries nentries + upvar ndups ndups + upvar tnum tnum + upvar subdb subdb + upvar txnenv txnenv + upvar env env + + set sorted [is_substr $dboargs "-dupsort"] + set omethod [convert_method $method] + + set db [eval {berkdb_open_noerr -create -mode 0644} \ + $dboargs $omethod $fname $dbname] + error_check_good dbopen [is_valid_db $db] TRUE + + set txn "" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + + # Do bulk put. + # First, we put half the entries using put -multiple. + # Then, we put the rest half using put -multiple_key. + + puts "\tTest$tnum.a: Bulk put data using -multiple." + set key_list1 {} + set data_list1 {} + set key_list {} + set datas_list {} + for { set i 1 } { $i < $nentries / 2} { incr i } { + lappend key_list $i + set datas {} + for { set j 1 } { $j <= $ndups } { incr j } { + set str $i.$j.$alphabet + lappend key_list1 $i + lappend data_list1 [chop_data $method $str] + lappend datas [chop_data $method $str] + } + lappend datas_list $datas + } + set ret [eval {$db put} $txn -multiple {$key_list1 $data_list1}] + error_check_good {put(-multiple)} $ret 0 + + # Put again without -overwritedup, should return DB_KEYEXIST. + set ret [catch {eval {$db put} \ + $txn -multiple {$key_list1 $data_list1}} res] + if {$sorted} { + error_check_good \ + {Check DB_KEYEXIST} [is_substr $res DB_KEYEXIST] 1 + } else { + error_check_good {put_again(-multiple)} $ret 0 + } + # Put again with -overwritedup, should succeed + set ret [eval {$db put} \ + $txn -multiple -overwritedup {$key_list1 $data_list1}] + error_check_good {put_again(-multiple -overwritedup)} $ret 0 + + puts "\tTest$tnum.b: Bulk put data using -multiple_key." + set pair_list1 {} + for { set i [expr $nentries / 2 ]} { $i <= $nentries} { incr i } { + lappend key_list $i + set datas {} + for { set j 1 } { $j <= $ndups } { incr j } { + set str $i.$j.$alphabet + lappend pair_list1 $i [chop_data $method $str] + lappend datas [chop_data $method $str] + } + lappend datas_list $datas + } + set ret [eval {$db put} $txn -multiple_key {$pair_list1}] + error_check_good {put(-multiple_key)} $ret 0 + + # Put again without -overwritedup, should return DB_KEYEXIST. + set ret [catch { + eval {$db put} $txn -multiple_key {$pair_list1}} res] + if {$sorted} { + error_check_good \ + {Check DB_KEYEXIST} [is_substr $res DB_KEYEXIST] 1 + } else { + error_check_good {put_again(-multiple_key)} $ret 0 + } + # Put again with -overwritedup, should succeed + set ret [eval \ + {$db put} $txn -multiple_key -overwritedup {$pair_list1}] + error_check_good \ + {put_again(-multiple_key -overwritedup)} $ret 0 + + puts "\tTest$tnum.c: Verify the data after bulk put." + test127_check_prirecords $db $key_list $datas_list $txn $sorted + + puts "\tTest$tnum.d: Bulk delete data using -multiple." + set key_list2 {} + for { set i 1 } { $i <= $nentries} { incr i 2 } { + lappend key_list2 $i + } + set ret [eval {$db del} $txn -multiple {$key_list2}] + error_check_good {del(-multiple)} $ret 0 + + # Delete again, should return DB_NOTFOUND. + set ret [catch {eval {$db del} $txn -multiple {$key_list2}} res] + error_check_good {Check DB_NOTFOUND} [is_substr $res DB_NOTFOUND] 1 + + puts "\tTest$tnum.e: Bulk delete using -multiple_key." + set pair_list2 {} + for { set i 2 } { $i <= $nentries} { incr i 2} { + for { set j 1 } { $j <= $ndups / 2 } { incr j } { + set str $i.$j.$alphabet + lappend pair_list2 $i [chop_data $method $str] + } + } + + set ret [eval {$db del} $txn -multiple_key {$pair_list2}] + error_check_good {del(-multiple_key)} $ret 0 + + # Delete again, should return DB_NOTFOUND. + set ret [catch {eval {$db del} $txn -multiple_key {$pair_list2}} res] + if {$sorted} { + error_check_good {Check DB_NOTFOUND} [is_substr $res DB_NOTFOUND] 1 + } else { + error_check_good "del(-multiple_key) 2 round" $ret 0 + set ret [catch {eval {$db del} $txn -multiple_key {$pair_list2}} res] + error_check_good "del(-multiple_key) 3 round" $ret 0 + set ret [catch {eval {$db del} $txn -multiple_key {$pair_list2}} res] + error_check_good {Check DB_NOTFOUND} [is_substr $res DB_NOTFOUND] 1 + } + + puts "\tTest$tnum.f: Verify the data after bulk delete." + + # Check if the specified items are deleted + set dbc [eval $db cursor $txn] + error_check_good $dbc [is_valid_cursor $dbc $db] TRUE + set len [llength $key_list2] + for {set i 0} {$i < $len} {incr i} { + set key [lindex $key_list2 $i] + set pair [$dbc get -set $key] + error_check_good pair [llength $pair] 0 + } + + set len [llength $pair_list2] + for {set indx1 0; set indx2 1} {$indx2 < $len} \ + {incr indx1 2; incr indx2 2} { + set key [lindex $pair_list2 $indx1] + set data [lindex $pair_list2 $indx2] + set pair [$dbc get -get_both $key $data] + error_check_good pair [llength $pair] 0 + } + + # Check all items to mare sure we do not delete other items. + set orig_key_list $key_list + set orig_datas_list $datas_list + set key_list {} + set datas_list {} + set i 0 + set j 0 + set k 0 + while {$i < $nentries} { + set datas [lindex $orig_datas_list $i] + set key1 [lindex $orig_key_list $i] + set key2 [lindex $key_list2 $j] + set key3 [lindex $pair_list2 $k] + if {$key1 == $key2} { + incr i + incr j + continue + } elseif {$key1 == $key3} { + while {$key1 == $key3} { + set data_index [expr $k + 1] + set data [lindex $pair_list2 $data_index] + set index [lsearch -exact $datas $data] + error_check_bad data_index -1 $index + set datas [lreplace $datas $index $index] + incr k 2 + set key3 [lindex $pair_list2 $k] + } + } + if {[llength $datas] > 0} { + lappend key_list $key1 + lappend datas_list $datas + } + incr i + } + + test127_check_prirecords $db $key_list $datas_list $txn $sorted + + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} + +proc test127_check_prirecords {db key_list datas_list txnarg sorted} { + + set dbc [eval $db cursor $txnarg] + error_check_good $dbc [is_valid_cursor $dbc $db] TRUE + + # Check if all the records are in key_list(key) and datas_list(data). + for {set pair [$dbc get -first]} {[llength $pair] > 0} \ + {set pair [$dbc get -next]} { + set key [lindex [lindex $pair 0] 0] + set data [lindex [lindex $pair 0] 1] + set index [lsearch -exact $key_list $key] + error_check_bad key_index $index -1 + error_check_bad data_index -1 \ + [lsearch -exact [lindex $datas_list $index] $data] + } + + error_check_good $dbc.close [$dbc close] 0 + + # Check if all the items in the lists are in the database. + set len [llength $key_list] + for {set i 0} {$i < $len} {incr i} { + set key [lindex $key_list $i] + set datas [lindex $datas_list $i] + set pairs [eval $db get $txnarg $key] + error_check_bad pairs [llength $pairs] 0 + if {$sorted} { + error_check_good pairs [llength $pairs] [llength $datas] + } else { + error_check_good pairs [expr \ + [llength $pairs] >= [llength $datas]] 1 + } + foreach data $datas { + set pair [list $key $data] + error_check_bad pair_index -1 \ + [lsearch -exact $pairs $pair] + } + } +} diff --git a/test/tcl/test128.tcl b/test/tcl/test128.tcl new file mode 100644 index 00000000..d164b10c --- /dev/null +++ b/test/tcl/test128.tcl @@ -0,0 +1,35 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test128 +# TEST Test database bulk update for sub database and duplicate database. +# TEST +# TEST This is essentially test126 with sub database and secondary database. + +proc test128 {method { nentries 10000 } {callback 1} args } { + source ./include.tcl + + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + # Test using sub database + eval {test126 $method $nentries "128" $callback 1 0} $args + eval {verify_dir $testdir "" 1 0 $nodump} + eval {salvage_dir $testdir "" 1} + + # Test using secondary database + eval {test126 $method $nentries "128" $callback 0 1} $args + eval {verify_dir $testdir "" 1 0 $nodump} + eval {salvage_dir $testdir "" 1} + + # Test using both sub database and secondary database + eval {test126 $method $nentries "128" $callback 1 1} $args + +} + + diff --git a/test/tcl/test129.tcl b/test/tcl/test129.tcl new file mode 100644 index 00000000..ee13f3ad --- /dev/null +++ b/test/tcl/test129.tcl @@ -0,0 +1,19 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test129 +# TEST Test database bulk update for duplicate sub database. +# TEST +# TEST This is essentially test127 with sub database. + +proc test129 {method { nentries 10000 } { ndups 5} args } { + + # Test using both sub database and secondary database + eval {test127 $method $nentries $ndups "129" 1} $args + +} + + diff --git a/test/tcl/test130.tcl b/test/tcl/test130.tcl new file mode 100644 index 00000000..b41cb6be --- /dev/null +++ b/test/tcl/test130.tcl @@ -0,0 +1,301 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test130 +# TEST Test moving of subdatabase metadata pages. +# TEST +# TEST Populate num_db sub-database. Open multiple handles on each. +# TEST Remove a high proportion of entries. +# TEST Dump and save contents. Compact the database, dump again, +# TEST and make sure we still have the same contents. +# TEST Make sure handles and cursors still work after compaction. + +proc test130 { method {nentries 10000} {num_db 3} {tnum "130"} args } { + + # Compaction is an option for btree, recno, and hash databases. + if { [is_queue $method] == 1 } { + puts "Skipping test$tnum for method $method." + return + } + + # Heap cannot have subdatabases + if { [is_heap $method] == 1 } { + puts "Skipping test$tnum for method $method." + return + } + + # If a page size was specified, find out what it is. Pages + # might not be freed in the case of really large pages (64K) + # but we still want to run this test just to make sure + # nothing funny happens. + set pagesize 0 + set pgindex [lsearch -exact $args "-pagesize"] + if { $pgindex != -1 } { + incr pgindex + set pagesize [lindex $args $pgindex] + } + + source ./include.tcl + global rand_init + error_check_good set_random_seed [berkdb srand $rand_init] 0 + set args [convert_args $method $args] + set omethod [convert_method $method] + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + + # This test must have an environment because of sub-databases. + set txnenv 0 + set eindex [lsearch -exact $args "-env"] + if { $eindex == -1 } { + puts "Skipping test$tnum, needs environment" + return + } else { + set basename test$tnum + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + set nhandles 3 + } + puts "Test$tnum: ($method $args) Database compaction." + + set t1 $testdir/t1 + if { [is_record_based $method] == 1 } { + set splitopt "" + set special [expr $nentries + 10] + } else { + set splitopt "-revsplitoff" + set special "special" + } + set txn "" + + cleanup $testdir $env + + # Create num_db sub databases and fill them with data. + for {set i 0} {$i < $num_db} {incr i} { + set testfile $basename.db + set did [open $dict] + if { $env != "NULL" } { + set testdir [get_home $env] + } + + puts "\tTest$tnum.a: Create and populate database sub$i ($splitopt)." + set db(0,$i) [eval {berkdb_open -create \ + -mode 0644} $splitopt $args $omethod {$testfile sub$i}] + error_check_good dbopen [is_valid_db $db(0,$i)] TRUE + + set count 0 + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + while { [gets $did str] != -1 && $count < $nentries } { + global kvals + + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + set kvals($key) [pad_data $method $str] + } else { + set key $str + set str [reverse $str] + } + + set ret [eval \ + {$db(0,$i) put} $txn {$key [chop_data $method $str]}] + error_check_good put $ret 0 + incr count + } + + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + close $did + set ret [eval {$db(0,$i) put $special [chop_data $method "data"]} ] + error_check_good put $ret 0 + error_check_good db_sync [$db(0,$i) sync] 0 + + # Open a couple of cursors on this handle. + set dbc(0,$i,0) [eval {$db(0,$i) cursor } ] + error_check_good dbc \ + [is_valid_cursor $dbc(0,$i,0) $db(0,$i)] TRUE + set dbc(0,$i,1) [eval {$db(0,$i) cursor } ] + error_check_good dbc \ + [is_valid_cursor $dbc(0,$i,1) $db(0,$i)] TRUE + + # Open nhandles on each sub database. Each with some cursors + for {set j 1 } {$j < $nhandles} {incr j} { + set db($j,$i) [eval berkdb_open $args $testfile sub$i] + error_check_good db [is_valid_db $db($j,$i)] TRUE + set dbc($j,$i,0) [eval {$db($j,$i) cursor } ] + error_check_good dbc \ + [is_valid_cursor $dbc($j,$i,0) $db($j,$i)] TRUE + set dbc($j,$i,1) [eval {$db($j,$i) cursor } ] + error_check_good dbc \ + [is_valid_cursor $dbc($j,$i,1) $db($j,$i)] TRUE + } + + } + + set testdir [get_home $env] + set filename $testdir/$testfile + + # Delete between 1 and maxdelete items, then skip over between + # 1 and maxskip items. This is to make the data bunchy, + # so we sometimes follow the code path where merging is + # done record by record, and sometimes the path where + # the whole page is merged at once. + + puts "\tTest$tnum.b: Delete most entries from each database." + set did [open $dict] + set count [expr $nentries - 1] + set maxskip 4 + set maxdelete 48 + + # Since rrecno and rbtree renumber, we delete starting at + # nentries and working down to 0. + for {set i 0} {$i < $num_db} {incr i} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + while { [gets $did str] != -1 && $count > 0 } { + + # Delete a random number of successive items. + set ndeletes [berkdb random_int 1 $maxdelete] + set target [expr $count - $ndeletes] + while { [expr $count > $target] && $count > 0 } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } else { + set key [gets $did] + } + + set ret [eval {$db(0,$i) del} $txn {$key}] + error_check_good del $ret 0 + incr count -1 + } + # Skip over a random smaller number of items. + set skip [berkdb random_int 1 [expr $maxskip]] + set target [expr $count - $skip] + while { [expr $count > $target] && $count > 0 } { + incr count -1 + } + } + if { $txnenv == 1 } { + error_check_good t_commit [$t commit] 0 + } + error_check_good db_sync [$db(0,$i) sync] 0 + + puts "\tTest$tnum.c: Do a dump_file on contents." + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + dump_file $db(0,$i) $txn $t1 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + + puts "\tTest$tnum.d: Compact and verify databases" + for {set commit 0} {$commit <= $txnenv} {incr commit} { + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + + # Open a cursor in this transaction -- compact + # should complain. + set txncursor(0,$i) \ + [eval {$db(0,$i) cursor} $txn] + set ret [catch {eval {$db(0,$i) compact} \ + $txn {-freespace}} res] + error_check_good no_txn_cursor $ret 1 + error_check_good txn_cursor_close \ + [$txncursor(0,$i) close] 0 + } + + # Now compact for real. + if {[catch {eval {$db(0,$i) compact} \ + $txn {-freespace}} ret] } { + error "FAIL: db compact: $ret" + } + if { $txnenv == 1 } { + if { $commit == 0 } { + puts "\tTest$tnum.d: Aborting." + error_check_good txn_abort [$t abort] 0 + } else { + puts "\tTest$tnum.d: Committing." + error_check_good txn_commit [$t commit] 0 + } + } + error_check_good db_sync [$db(0,$i) sync] 0 + error_check_good verify_dir \ + [verify_dir $testdir "" 0 0 $nodump ] 0 + } + } + + # See that the handles and cursors can get data. + # Brute force check that the handle locks are right, try to remove + # the database without first closing each handle. Skip the + # cursor and db handle checks for rrecno -- the deletes will + # have changed the value of $special. + set paddeddata [pad_data $method "data"] + for {set i 0} {$i < $num_db} {incr i} { + puts "\tTest$tnum.e: \ + See that the handle $i and cursors still work." + for {set j 0} {$j < $nhandles} {incr j} { + if { ![is_rrecno $method] } { + set ret [eval {$db($j,$i) get $special } ] + error_check_good handle$i \ + [lindex [lindex $ret 0] 0] $special + error_check_good handle$i \ + [lindex [lindex $ret 0] 1] $paddeddata + set ret [eval {$dbc($j,$i,0) get -set $special } ] + error_check_good handle$i \ + [lindex [lindex $ret 0] 0] $special + error_check_good handle$i \ + [lindex [lindex $ret 0] 1] $paddeddata + set ret [eval {$dbc($j,$i,1) get -set $special } ] + error_check_good handle$i \ + [lindex [lindex $ret 0] 0] $special + error_check_good handle$i \ + [lindex [lindex $ret 0] 1] $paddeddata + } + puts "\tTest$tnum.f: Try to remove and then close." + # We only try this if it's transactional -- otherwise + # there are no locks to prevent the removal. + if { $txnenv == 1 } { + set t [eval $env txn -nowait] + catch {$env dbremove -txn $t \ + $testfile sub$i} ret + error_check_bad dbremove $ret 0 + $t commit + } + error_check_good dbc_close [eval $dbc($j,$i,0) close] 0 + error_check_good dbc_close [eval $dbc($j,$i,1) close] 0 + error_check_good db_close [eval $db($j,$i) close] 0 + } + + # Now remove the db and make sure that there are no lingering + # handle locks that have hung around. + if { $txnenv == 1 } { + error_check_good dbremove \ + [eval $env dbremove -auto_commit $testfile sub$i] 0 + } else { + error_check_good dbremove \ + [eval $env dbremove $testfile sub$i ] 0 + } + } +} diff --git a/test/tcl/test131.tcl b/test/tcl/test131.tcl new file mode 100644 index 00000000..e062ddd4 --- /dev/null +++ b/test/tcl/test131.tcl @@ -0,0 +1,479 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test131 +# TEST Test foreign database operations. +# TEST Create a foreign db, and put some records into it. +# TEST Then associate the foreign db with a secondary db, and +# TEST put records into the primary db. +# TEST Do operations in the foreign db and check results. +# TEST Finally, verify the foreign relation between the foreign db +# TEST and secondary db. +# TEST Here, we test three different foreign delete constraints: +# TEST - DB_FOREIGN_ABORT +# TEST - DB_FOREIGN_CASCADE +# TEST - DB_FOREIGN_NULLIFY + +proc test131 {method {nentries 1000} {tnum "131"} {ndups 5} {subdb 0} + {inmem 0} args } { + source ./include.tcl + + # For rrecno, when keys are deleted, the ones after will move forward, + # and the keys change. It is not good to use rrecno for the primary + # database. + if {[is_rrecno $method]} { + puts "Skipping test$tnum for $method test." + return + } + + set sub_msg "" + # Check if we use sub databases. + if { $subdb } { + if {[is_queue $method]} { + puts "Skipping test$tnum with sub database\ + for $method." + return + } + if {[is_partitioned $args]} { + puts "Skipping test$tnum with sub database\ + for partitioned $method test." + return + } + if {[is_heap $method]} { + puts "Skipping test$tnum with sub database\ + for $method." + return + } + # Check if the sub databases should be in-memory. + if {$inmem} { + set sub_msg "using in-memory sub databases" + } else { + set sub_msg "using sub databases" + } + } + + # If we are using an env, then basename should just be the name prefix. + # Otherwise it is the test directory and the name prefix. + set eindex [lsearch -exact $args "-env"] + set txnenv 0 + set txn "" + if { $eindex == -1 } { + set basename $testdir/test$tnum + set env NULL + if {$subdb} { + puts "Skipping test$tnum $sub_msg for non-env test." + return + } + } else { + set basename test$tnum + set nentries 200 + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + cleanup $testdir $env + + + # To simplify the case, just set the type of foreign database as + # btree or hash. And for the secondary database, the type is dupsort + # hash/btree. For foreign and secondary databases, we just pass + # the original args, since the converted args does not work for them. + # For example, if the primary database is rbtree, convert_args will + # generate "-recnum" which is invalid to the foreign and secondary + # databases. + set origargs $args + set secargs $args + set args [convert_args $method $args] + set omethod [convert_method $method] + + puts "Test$tnum: $method ($args) Foreign operations $sub_msg." + + # The type-pairs for the foreign and secondary databases. + set typepairs { + {"-btree" "-btree"} + {"-btree" "-hash"} + {"-hash" "-btree"} + {"-hash" "-hash"} + } + + # The sub procs which test specific foreign operations. + set subtests { + {"test131_sub1" "-abort" "0"} + {"test131_sub2" "-cascade" "0"} + {"test131_sub3" "-nullify test131_nullify_cb1" "1"} + {"test131_sub4" "-nullify test131_nullify_cb2" "0"} + } + + set i 0 + + foreach subtest $subtests { + foreach typepair $typepairs { + # Initialize the names + set pri_subname "" + set sec_subname "" + set foreign_subname "" + set pri_file $basename.db + set sec_file $basename.db + set foreign_file $basename.db + if {$inmem} { + set pri_file "" + set sec_file "" + set foreign_file "" + } + + set foreign_proc [lindex $subtest 0] + set foreign_arg [lindex $subtest 1] + set foreign_vrfy [lindex $subtest 2] + set foreign_dbtype [lindex $typepair 0] + set sec_dbtype [lindex $typepair 1] + if {$subdb} { + set pri_subname "primary_db$i" + set sec_subname "secondary_db$i" + set foreign_subname "foreign_db$i" + } else { + set pri_file "${basename}_primary$i.db" + set sec_file "${basename}_secondary$i.db" + set foreign_file "${basename}_foreign$i.db" + } + + puts "\tTest$tnum.$i subtest={$subtest}, \ + typepair={$typepair}" + + # Skip partition range test and compression test + # when there are non-btree databases. + if {![is_btree $foreign_dbtype] || \ + ![is_btree $sec_dbtype]} { + set skip 0 + if {[is_partitioned $origargs] && \ + ![is_partition_callback $origargs]} { + set skip 1 + } + if {[is_compressed $origargs]} { + set skip 1 + } + if {$skip} { + puts "\tSkipping for {$origargs}" + continue + } + } + + set txn "" + if {$txnenv == 1} { + set t [$env txn] + error_check_good txn \ + [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + # Open the foreign database. + puts "\tTest$tnum.$i.a Open and truncate the databases." + set foreign_db [eval berkdb_open_noerr -create \ + -mode 0644 $foreign_dbtype $origargs \ + {$foreign_file} $foreign_subname] + error_check_good foreign_db_open \ + [is_valid_db $foreign_db] TRUE + + # Open the primary database. + set pri_db [eval berkdb_open_noerr -create -mode 0644 \ + $omethod $args {$pri_file} $pri_subname] + error_check_good pri_db_open \ + [is_valid_db $pri_db] TRUE + + # Open the secondary database. + set sec_db [eval berkdb_open_noerr -create -mode 0644 \ + $sec_dbtype $secargs -dup -dupsort \ + {$sec_file} $sec_subname] + error_check_good sec_db_open \ + [is_valid_db $sec_db] TRUE + + # Truncate the databases. + # For some tests(e.g. run_secenv), we will run this + # unit twice. The "cleanup $testdir" could clean the + # on-disk database files, but it can not clean the + # in-memory ones, so the records are still in the + # in-memory databases, which will affect our test. + # So, we need to truncate these databases first. + error_check_good foreign_db_trunc \ + [expr [eval $foreign_db truncate $txn] >= 0] 1 + error_check_good pri_db_trunc \ + [expr [eval $pri_db truncate $txn] >= 0] 1 + error_check_good sec_db_trunc \ + [expr [eval $sec_db truncate $txn] >= 0] 1 + + # Establish the relations between the databases. + set ret [eval $pri_db \ + associate -create $txn test131_sec_cb1 $sec_db] + error_check_good db_associate $ret 0 + + set ret [eval $foreign_db \ + associate_foreign $foreign_arg $sec_db] + error_check_good db_associate_foreign $ret 0 + + puts "\tTest$tnum.$i.b Populate the foreign database." + test131_populate_foreigndb $txn $foreign_db $nentries + + # Put records into the primary database. + puts "\tTest$tnum.$i.c Populate primary database." + test131_populate_pridb \ + $txn $pri_db $method $nentries $ndups + + # Check records in the secondary database. + puts "\tTest$tnum.$i.d\ + Check records in secondary database." + test131_check_secdb $txn $sec_db $nentries $ndups + + # Update the foreign database. + puts "\tTest$tnum.$i.e Update the foreign database." + test131_update_foreigndb $txn $foreign_db $nentries + + # Delete the keys only in the foreign database. + puts "\tTest$tnum.$i.f Delete foreign-only keys." + test131_delitems_foreigndb $txn $foreign_db $nentries + + # Test different foreign delete constraints. + puts "\tTest$tnum.$i.g\ + Test specific foreign delete constraints." + $foreign_proc $txn $foreign_db $pri_db $sec_db \ + $nentries $ndups "Test$tnum.$i.g" + + puts "\tTest$tnum.$i.h Verifying foreign key \ + relationships ..." + error_check_good verify_foreign [verify_foreign \ + $txn $foreign_db $sec_db 0] $foreign_vrfy + + if {$txnenv == 1} { + error_check_good txn_commit [$t commit] 0 + } + + error_check_good foreign_db_close [$foreign_db close] 0 + error_check_good pri_db_close [$pri_db close] 0 + error_check_good sec_db_close [$sec_db close] 0 + + incr i + } + } +} + +# Test for DB_FOREIGN_ABORT +# Delete the keys which exist in the secondary database. +# The delete should fail because DB_FOREIGN_ABORT has been set. +proc test131_sub1 {txn foreign_db pri_db sec_db nentries ndups msghdr} { + puts "\t\t$msghdr.1 Test DB_FOREIGN_ABORT" + set nentries2 [expr $nentries / 2] + for {set i 0} {$i < $nentries2} {incr i} { + set ret [catch {eval $foreign_db del $txn $i} res] + error_check_bad foreign_del $ret 0 + error_check_good "check DB_FOREIGN_CONFLICT" \ + [is_substr $res DB_FOREIGN_CONFLICT] 1 + error_check_good check_ndups \ + [llength [eval $sec_db get $txn $i]] $ndups + error_check_good check_foreign \ + [llength [eval $foreign_db get $txn $i]] 1 + } +} + +# Test the DB_FOREIGN_CASCADE. +# Delete the keys which exist in the secondary database. +# The delete should succeed, and all the related records in +# the secondary database should be deleted as well. +proc test131_sub2 {txn foreign_db pri_db sec_db nentries ndups msghdr} { + puts "\t\t$msghdr.1 Test DB_FOREIGN_CASCADE" + set nentries2 [expr $nentries / 2] + for {set i 0} {$i < $nentries2} {incr i} { + set ret [catch {eval $foreign_db del $txn $i} res] + error_check_good foreign_del $ret 0 + error_check_good check_ndups \ + [llength [eval $sec_db get $txn $i]] 0 + error_check_good check_foreign \ + [llength [eval $foreign_db get $txn $i]] 0 + } +} + +# Test the DB_FOREIGN_NULLIFY with a bad nullify function( +# the nullify function does not change the data). +# Delete the keys which exist in the secondary database. +# The delete should fail, but the records in the foreign +# database should be removed anyway, while the related records +# in the primary/secondary are still there. +proc test131_sub3 {txn foreign_db pri_db sec_db nentries ndups msghdr} { + puts "\t\t$msghdr.1 \ + Test DB_FOREIGN_NULLIFY with a bad nullify function." + set nentries2 [expr $nentries / 2] + for {set i 0} {$i < $nentries2} {incr i} { + set ret [catch {eval $foreign_db del $txn $i} res] + error_check_good foreign_del $ret 0 + error_check_good check_ndups \ + [llength [eval $sec_db get $txn $i]] $ndups + error_check_good check_foreign \ + [llength [eval $foreign_db get $txn $i]] 0 + } +} + +# Test the DB_FOREIGN_NULLIFY with a good nullify function. +# Delete the keys which exist in the secondary database. +# The delete should succeed, and the records in the foreign +# database should be removed, while the related records +# in the primary/secondary are changed. +proc test131_sub4 {txn foreign_db pri_db sec_db nentries ndups msghdr} { + puts "\t\t$msghdr.1 \ + Test DB_FOREIGN_NULLIFY with a good nullify function." + set nentries2 [expr $nentries / 2 - 1] + for {set i 0} {$i < $nentries2} {incr i 2} { + set ret [catch {eval $foreign_db del $txn $i} res] + error_check_good foreign_del $ret 0 + error_check_good check_ndups \ + [llength [eval $sec_db get $txn $i]] 0 + error_check_good check_foreign \ + [llength [eval $foreign_db get $txn $i]] 0 + set newkey [expr $i + 1] + error_check_good check_ndups \ + [llength [eval $sec_db get $txn $newkey]] [expr $ndups * 2] + error_check_good check_foreign \ + [llength [eval $foreign_db get $txn $newkey]] 1 + } +} + +# The callback function for secondary database. +proc test131_sec_cb1 {pkey pdata} { + set indx [string first "-" $pdata] + error_check_bad good_indx [expr $indx > 0] 0 + return [string range $pdata 0 [expr $indx - 1]] +} + +# The 1st callback function for foreign. +proc test131_nullify_cb1 {pkey pdata fkey} { + return $pdata +} + +# The 2nd callback function for foreign. +proc test131_nullify_cb2 {pkey pdata fkey} { + # We should make sure the size does not grow, since + # that will cause queue test to fail. + set indx [string first "-" $pdata] + error_check_bad good_indx [expr $indx > 0] 0 + set num [string range $pdata 0 [expr $indx - 1]] + set str [string range $pdata [expr $indx + 1] end] + return "[expr $num + 1]-$str" +} + +# Put records into the foreign database. +proc test131_populate_foreigndb {txn foreign_db nentries} { + global alphabet + for {set i 0} {$i < $nentries} {incr i} { + set ret [eval $foreign_db put $txn $i $i$alphabet] + error_check_good foreign_put $ret 0 + } +} + +# Update the records in the foreign database. +proc test131_update_foreigndb {txn foreign_db nentries} { + global alphabet + for {set i 0} {$i < $nentries} {incr i} { + set ret [eval $foreign_db get $txn $i] + error_check_good check_pair [llength $ret] 1 + set key [lindex [lindex $ret 0] 0] + set data [lindex [lindex $ret 0] 1] + error_check_good check_key $key $i + set ret [eval $foreign_db put $txn $key $data-NEW] + error_check_good foreign_put $ret 0 + } +} + +# Delete items which only exist in the foreign database. +proc test131_delitems_foreigndb {txn foreign_db nentries} { + global alphabet + set nentries2 [expr $nentries / 2] + for {set i $nentries2} {$i < $nentries} {incr i 2} { + set ret [eval $foreign_db del $txn $i] + error_check_good foreign_del $ret 0 + set ret [eval $foreign_db get $txn $i] + error_check_good check_empty [llength $ret] 0 + } +} + +# Put records into the primary database. +# Here we make every key in the secondary database +# has $ndups duplicate records. +proc test131_populate_pridb {txn pri_db pri_method nentries ndups} { + source ./include.tcl + global alphabet + + set lastindx [expr [string length $alphabet] - 1] + # We just cover the first half of keys in the foreign database. + set nentries2 [expr $nentries / 2] + set number 1 + for {set i 0} {$i < $nentries2} {incr i} { + for {set j 0} {$j < $ndups} {incr j} { + set beg [berkdb random_int 0 $lastindx] + set end [berkdb random_int 0 $lastindx] + if {$beg <= $end} { + set str [string range $alphabet $beg $end] + } else { + set str [string range $alphabet $end $beg] + } + if {[is_record_based $pri_method] == 1} { + set key $number + } else { + set key $number$str + } + set ret [eval $pri_db put $txn $key \ + [make_fixed_length $pri_method "$i-$str"]] + error_check_good pri_db_put $ret 0 + error_check_good pri_db_get \ + [llength [eval $pri_db get $txn $key]] 1 + incr number + } + } + + set nentries2 [expr $nentries + $nentries2] + for {set i $nentries} {$i < $nentries2} {incr i} { + for {set j 0} {$j < $ndups} {incr j} { + set beg [berkdb random_int 0 $lastindx] + set end [berkdb random_int 0 $lastindx] + if {$beg <= $end} { + set str [string range $alphabet $beg $end] + } else { + set str [string range $alphabet $end $beg] + } + if {[is_record_based $pri_method] == 1} { + set key $number + } else { + set key $number$str + } + set ret [catch {eval $pri_db put $txn $key \ + [make_fixed_length $pri_method "$i-$str"]} res] + error_check_bad pri_db_put $ret 0 + error_check_good "check DB_FOREIGN_CONFLICT" \ + [is_substr $res DB_FOREIGN_CONFLICT] 1 + error_check_good pri_db_get \ + [llength [eval $pri_db get $txn $key]] 0 + incr number + } + } +} + +# Check the records in the secondary database. +proc test131_check_secdb {txn sec_db nentries ndups} { + set nentries2 [expr $nentries / 2] + for {set i 0} {$i < $nentries2} {incr i} { + set ret [eval $sec_db get $txn $i] + error_check_good check_rec_num [llength $ret] $ndups + } + for {set i $nentries2} {$i < $nentries} {incr i} { + set ret [eval $sec_db get $txn $i] + error_check_good check_rec_num [llength $ret] 0 + } + + set nentries2 [expr $nentries + $nentries2] + for {set i $nentries} {$i < $nentries2} {incr i} { + set ret [eval $sec_db get $txn $i] + error_check_good check_rec_num [llength $ret] 0 + } +} diff --git a/test/tcl/test132.tcl b/test/tcl/test132.tcl new file mode 100644 index 00000000..7a7bcfa0 --- /dev/null +++ b/test/tcl/test132.tcl @@ -0,0 +1,29 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test132 +# TEST Test foreign database operations on sub databases and +# TEST in-memory databases. + +proc test132 {method {nentries 1000} {ndups 5} args } { + source ./include.tcl + + if { [is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + + # Test using on-disk sub databases. + eval {test131 $method $nentries "132" $ndups 1 0} $args + eval {verify_dir $testdir "" 1 0 $nodump} + eval {salvage_dir $testdir "" 1} + + # Test using in-memory databases. + eval {test131 $method $nentries "132" $ndups 0 1} $args + +} + diff --git a/test/tcl/test133.tcl b/test/tcl/test133.tcl new file mode 100644 index 00000000..f728465e --- /dev/null +++ b/test/tcl/test133.tcl @@ -0,0 +1,183 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test133 +# TEST Test Cursor Cleanup. +# TEST Open a primary database and a secondary database, +# TEST then open 3 cursors on the secondary database, and +# TEST point them at the first item. +# TEST Do the following operations in loops: +# TEST * The 1st cursor will delete the current item. +# TEST * The 2nd cursor will also try to delete the current item. +# TEST * Move all the 3 cursors to get the next item and check the returns. +# TEST Finally, move the 3rd cursor once. + +proc test133 {method {nentries 1000} {tnum "133"} {subdb 0} args} { + source ./include.tcl + + # For rrecno, when keys are deleted, the ones after will move forward, + # and the keys change, which is not good to verify after delete. + # Therefore we skip rrecno method. + if {[is_rrecno $method]} { + puts "Skipping test$tnum for $method test." + return + } + + set sub_msg "" + + # Check if we use sub database. + if { $subdb } { + if {[is_queue $method]} { + puts "Skipping test$tnum with sub database for $method." + return + } + if {[is_partitioned $args]} { + puts "Skipping test$tnum with sub database\ + for partitioned $method test." + return + } + if {[is_heap $method]} { + puts "Skipping test$tnum with sub database\ + for $method." + return + } + set sub_msg "using sub databases" + } + + # If we are using an env, then testfile should just be the db name. + # Otherwise it is the test directory and the name. + set eindex [lsearch -exact $args "-env"] + set txnenv 0 + if { $eindex == -1 } { + if {$subdb} { + puts "Skipping test$tnum $sub_msg for non-env test." + return + } + set basename $testdir/test$tnum + set env NULL + } else { + set basename test$tnum + incr eindex + set env [lindex $args $eindex] + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append args " -auto_commit " + } + set testdir [get_home $env] + } + + cleanup $testdir $env + set sec_args $args + set args [convert_args $method $args] + + puts "Test$tnum: $method ($args)\ + Cursor Cleanup Test $sub_msg." + + + set secdb_types {"-btree" "-hash"} + set i 0 + foreach sec_method $secdb_types { + test133_sub "\tTest$tnum.$i" $basename $subdb $method $args \ + $sec_method $sec_args $i + incr i + } +} + +proc test133_sub { prefix basename use_subdb pri_method pri_args + sec_method sec_args indx } { + global alphabet + upvar txnenv txnenv + upvar env env + upvar nentries nentries + + # We can not set partition keys to hash. + if {[is_partitioned $sec_args] && ![is_partition_callback $sec_args] \ + && [is_hash $sec_method]} { + puts "Skipping for $sec_method with $sec_args." + return + } + + if { $use_subdb } { + set pri_testfile $basename.$indx.db + set pri_subname "primary" + set sec_testfile $basename.$indx.db + set sec_subname "secondary" + } else { + set pri_testfile $basename.$indx-primary.db + set pri_subname "" + set sec_testfile $basename.$indx-secondary.db + set sec_subname "" + } + + puts "$prefix.a: Open the primary database." + set pri_omethod [convert_method $pri_method] + set db [eval {berkdb_open_noerr -create -mode 0644} \ + $pri_args $pri_omethod $pri_testfile $pri_subname] + error_check_good dbopen [is_valid_db $db] TRUE + + # Open a simple dupsort database. + # In order to be consistent, we need to use all the passed-in + # am-unrelated flags. + puts "$prefix.b: Open the secondary ($sec_method) database." + set sec_db [eval {berkdb_open_noerr -create -mode 0644} $sec_args \ + -dup -dupsort $sec_method $sec_testfile $sec_subname] + error_check_good secdb_open [is_valid_db $sec_db] TRUE + set ret [$db associate -create [callback_n 1] $sec_db] + error_check_good db_associate $ret 0 + + set txn "" + if { $txnenv == 1 } { + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + + puts "$prefix.c: Putting data into the primary database." + + for {set i 1} {$i <= $nentries} {incr i} { + error_check_good "put_$i" [eval $db put $txn \ + $i [make_fixed_length $pri_method $i.$alphabet]] 0 + } + + puts "$prefix.d: Opening three cursors on secondary database." + + set cursor1 [eval $sec_db cursor $txn] + set cursor2 [eval $sec_db cursor $txn] + set cursor3 [eval $sec_db cursor $txn] + + puts "$prefix.e: Deleting records using the 1st cursor." + set delcnt [expr $nentries / 2] + for {set i 0} {$i < $delcnt} {incr i} { + set ret1 [$cursor1 get -next] + set ret2 [$cursor2 get -next] + set ret3 [$cursor3 get -next] + error_check_good cmp_1_2 $ret1 $ret2 + error_check_good cmp_1_3 $ret1 $ret3 + + error_check_good cursor1_del [eval $cursor1 del] 0 + set ret [catch {eval $cursor2 del} res] + error_check_good {Check DB_NOTFOUND/DB_KEYEMPTY} \ + [expr [is_substr $res DB_NOTFOUND] || \ + [is_substr $res DB_KEYEMPTY]] 1 + } + + error_check_good cursor1_close [$cursor1 close] 0 + error_check_good cursor2_close [$cursor2 close] 0 + + # The 3rd cursor is the final cursor pointing to the deleted item. + # Usually, when the last cursor moves after all the deleted items, + # these deleted items will be deleted physically. So here, we move + # to next(it is after all the deleted items), then close the cursor. + puts "$prefix.f: Moving the 3rd cursor." + set ret3 [$cursor3 get -next] + error_check_bad cursor3_get [llength $ret3] 0 + error_check_good cursor3_close [$cursor3 close] 0 + if { $txnenv == 1 } { + error_check_good txn_commit [$t commit] 0 + } + error_check_good secdb_close [$sec_db close] 0 + error_check_good db_close [$db close] 0 +} diff --git a/test/tcl/test134.tcl b/test/tcl/test134.tcl new file mode 100644 index 00000000..32893794 --- /dev/null +++ b/test/tcl/test134.tcl @@ -0,0 +1,15 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST test134 +# TEST Test cursor cleanup for sub databases. + +proc test134 {method {nentries 1000} args} { + source ./include.tcl + + eval {test133 $method $nentries "134" 1} $args + +} diff --git a/test/tcl/testparams.tcl b/test/tcl/testparams.tcl new file mode 100644 index 00000000..333176f5 --- /dev/null +++ b/test/tcl/testparams.tcl @@ -0,0 +1,472 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ + +source ./include.tcl +global is_freebsd_test +global tcl_platform +global one_test +global serial_tests +set serial_tests {rep002 rep005 rep016 rep020 rep022 rep026 rep031 rep063 \ + rep078 rep079 rep096 rep097} +#set serial_tests {} + +set subs {auto_repmgr bigfile dead env fop lock log memp multi_repmgr \ + mutex other_repmgr plat recd rep rsrc sdb sdbtest sec si test txn} + +set test_names(bigfile) [list bigfile001 bigfile002] +set test_names(compact) [list test111 \ + test112 test113 test114 test115 test117 test130] +set test_names(dead) [list dead001 dead002 dead003 dead004 dead005 dead006 \ + dead007 dead008 dead009 dead010 dead011] +set test_names(elect) [list rep002 rep005 rep016 rep020 rep022 rep026 \ + rep063 rep067 rep069 rep076 rep093 rep094] +set test_names(env) [list env001 env002 env003 env004 env005 env006 \ + env007 env008 env009 env010 env011 env012 env013 env014 env015 env016 \ + env017 env018 env019 env020 env021] +set test_names(fop) [list fop001 fop002 fop003 fop004 fop005 fop006 \ + fop007 fop008 fop009 fop010 fop011 fop012] +set test_names(init) [list rep029 rep030 rep031 rep033 rep037 rep038 rep039\ + rep055 rep060 rep061 rep062 rep070 rep072 rep084 rep085 rep086 rep087 \ + rep089 rep098] +set test_names(lock) [list lock001 lock002 lock003 lock004 lock005 lock006] +set test_names(log) [list log001 log002 log003 log004 log005 log006 \ + log007 log008 log009] +set test_names(memp) [list memp001 memp002 memp003 memp004 memp005] +set test_names(mutex) [list mut001 mut002] +set test_names(plat) [list plat001] +set test_names(recd) [list recd001 recd002 recd003 recd004 recd005 recd006 \ + recd007 recd008 recd009 recd010 recd011 recd012 recd013 recd014 recd015 \ + recd016 recd017 recd018 recd019 recd020 recd022 recd023 recd024 recd025] +set test_names(rep) [list rep001 rep002 rep003 rep005 rep006 rep007 \ + rep008 rep009 rep010 rep011 rep012 rep013 rep014 rep015 rep016 rep017 \ + rep018 rep019 rep020 rep021 rep022 rep023 rep024 rep025 rep026 rep027 \ + rep028 rep029 rep030 rep031 rep032 rep033 rep034 rep035 rep036 rep037 \ + rep038 rep039 rep040 rep041 rep042 rep043 rep044 rep045 rep046 rep047 \ + rep048 rep049 rep050 rep051 rep052 rep053 rep054 rep055 \ + rep058 rep060 rep061 rep062 rep063 rep064 rep065 rep066 rep067 \ + rep068 rep069 rep070 rep071 rep072 rep073 rep074 rep075 rep076 rep077 \ + rep078 rep079 rep080 rep081 rep082 rep083 rep084 rep085 rep086 rep087 \ + rep088 rep089 rep090 rep091 rep092 rep093 rep094 rep095 rep096 rep097 \ + rep098] +set test_names(skip_for_env_private) [list rep002 rep003 rep004 rep005 \ + rep014 rep016 rep017 rep018 rep020 rep022 rep026 rep028 rep031 \ + rep033 rep035 rep036 rep038 rep039 rep040 rep041 rep042 rep043 rep044 \ + rep045 rep048 rep054 rep055 rep056 rep057 rep059 rep060 rep061 rep063 \ + rep065 rep066 rep067 rep068 rep069 rep070 rep072 rep076 rep078 \ + rep079 rep081 rep082 rep083 rep088 rep095 rep096 rep098 ] +set test_names(skip_for_inmem_db) [list rep002 rep003 rep004 rep008 rep009 \ + rep011 rep015 rep017 rep018 rep027 rep036 rep042 rep043 rep056 rep057 \ + rep058 rep059 rep065 rep068 rep078 rep079 rep081 rep082 rep083 rep084 \ + rep085 rep086 rep087 rep088 rep090 ] +set test_names(skip_for_inmem_rep) [list rep089] +set test_names(auto_repmgr) [list repmgr001 repmgr002 repmgr003 ] +set test_names(basic_repmgr) [list basic_repmgr_test \ + basic_repmgr_election_test basic_repmgr_init_test ] +set test_names(multi_repmgr) [list repmgr100 repmgr101 repmgr102 \ + repmgr105 repmgr106 repmgr107 repmgr108 repmgr109 \ + repmgr110 repmgr111 repmgr112] +set test_names(other_repmgr) [list repmgr007 repmgr009 repmgr010 repmgr011 \ + repmgr012 repmgr013 repmgr017 repmgr018 repmgr023 repmgr024 repmgr025 \ + repmgr026 repmgr027 repmgr028 repmgr029 repmgr030 repmgr031 repmgr032] +set test_names(rsrc) [list rsrc001 rsrc002 rsrc003 rsrc004] +set test_names(sdb) [list sdb001 sdb002 sdb003 sdb004 sdb005 sdb006 \ + sdb007 sdb008 sdb009 sdb010 sdb011 sdb012 sdb013 sdb014 sdb015 sdb016 \ + sdb017 sdb018 sdb019 sdb020 ] +set test_names(sdbtest) [list sdbtest001 sdbtest002] +set test_names(sec) [list sec001 sec002] +set test_names(si) [list si001 si002 si003 si004 si005 si006 si007 si008] +set test_names(test) [list test001 test002 test003 test004 test005 \ + test006 test007 test008 test009 test010 test011 test012 test013 test014 \ + test015 test016 test017 test018 test019 test020 test021 test022 test023 \ + test024 test025 test026 test027 test028 test029 test030 test031 test032 \ + test033 test034 test035 test036 test037 test038 test039 test040 test041 \ + test042 test043 test044 test045 test046 test047 test048 test049 test050 \ + test051 test052 test053 test054 test055 test056 test057 test058 test059 \ + test060 test061 test062 test063 test064 test065 test066 test067 test068 \ + test069 test070 test071 test072 test073 test074 test076 test077 \ + test078 test079 test081 test082 test083 test084 test085 test086 \ + test087 test088 test089 test090 test091 test092 test093 test094 test095 \ + test096 test097 test098 test099 test100 test101 test102 test103 test107 \ + test109 test110 test111 test112 test113 test114 test115 test116 test117 \ + test119 test120 test121 test122 test123 test124 test125 test126 test127 \ + test128 test129 test130 test131 test132 test133 test134] + +set test_names(txn) [list txn001 txn002 txn003 txn004 txn005 txn006 \ + txn007 txn008 txn009 txn010 txn011 txn012 txn013 txn014] + +# Set up a list of rep tests to run before committing changes to the +# replication system. By default, all tests in test_names(rep) are +# included. To skip a test, add it to the 'skip_for_rep_commit' variable. +set skip_for_rep_commit [list rep005 rep020 rep022 rep026 rep063 rep065 \ + rep069 rep076] +set test_names(rep_commit) $test_names(rep) +foreach test $skip_for_rep_commit { + set idx [lsearch -exact $test_names(rep_commit) $test] + if { $idx >= 0 } { + set test_names(rep_commit) [lreplace \ + $test_names(rep_commit) $idx $idx] + } +} + +# Source all the tests, whether we're running one or many. +foreach sub $subs { + foreach test $test_names($sub) { + source $test_path/$test.tcl + } +} + +# Reset test_names if we're running only one test. +if { $one_test != "ALL" } { + foreach sub $subs { + set test_names($sub) "" + } + set type [string trim $one_test 0123456789] + set test_names($type) [list $one_test] +} + +source $test_path/archive.tcl +source $test_path/backup.tcl +source $test_path/byteorder.tcl +source $test_path/dbm.tcl +source $test_path/foputils.tcl +source $test_path/hsearch.tcl +source $test_path/join.tcl +source $test_path/logtrack.tcl +source $test_path/ndbm.tcl +source $test_path/parallel.tcl +source $test_path/portable.tcl +source $test_path/reputils.tcl +source $test_path/reputilsnoenv.tcl +source $test_path/sdbutils.tcl +source $test_path/shelltest.tcl +source $test_path/sijointest.tcl +source $test_path/siutils.tcl +source $test_path/testutils.tcl +source $test_path/upgrade.tcl + +set parms(recd001) 0 +set parms(recd002) 0 +set parms(recd003) 0 +set parms(recd004) 0 +set parms(recd005) "" +set parms(recd006) 0 +set parms(recd007) "" +set parms(recd008) {4 4} +set parms(recd009) 0 +set parms(recd010) 0 +set parms(recd011) {200 15 1} +set parms(recd012) {0 49 25 100 5} +set parms(recd013) 100 +set parms(recd014) "" +set parms(recd015) "" +set parms(recd016) "" +set parms(recd017) 0 +set parms(recd018) 10 +set parms(recd019) 50 +set parms(recd020) "" +set parms(recd022) "" +set parms(recd023) "" +set parms(recd024) "" +set parms(recd025) "" +set parms(rep001) {1000 "001"} +set parms(rep002) {10 3 "002"} +set parms(rep003) "003" +set parms(rep005) "" +set parms(rep006) {1000 "006"} +set parms(rep007) {10 "007"} +set parms(rep008) {10 "008"} +set parms(rep009) {10 "009"} +set parms(rep010) {100 "010"} +set parms(rep011) "011" +set parms(rep012) {10 "012"} +set parms(rep013) {10 "013"} +set parms(rep014) {10 "014"} +set parms(rep015) {100 "015" 3} +set parms(rep016) "" +set parms(rep017) {10 "017"} +set parms(rep018) {10 "018"} +set parms(rep019) {3 "019"} +set parms(rep020) "" +set parms(rep021) {3 "021"} +set parms(rep022) "" +set parms(rep023) {10 "023"} +set parms(rep024) {1000 "024"} +set parms(rep025) {200 "025"} +set parms(rep026) "" +set parms(rep027) {1000 "027"} +set parms(rep028) {100 "028"} +set parms(rep029) {200 "029"} +set parms(rep030) {500 "030"} +set parms(rep031) {200 "031"} +set parms(rep032) {200 "032"} +set parms(rep033) {200 "033"} +set parms(rep034) {2 "034"} +set parms(rep035) {100 "035"} +set parms(rep036) {200 "036"} +set parms(rep037) {1500 "037"} +set parms(rep038) {200 "038"} +set parms(rep039) {200 "039"} +set parms(rep040) {200 "040"} +set parms(rep041) {500 "041"} +set parms(rep042) {10 "042"} +set parms(rep043) {25 "043"} +set parms(rep044) {"044"} +set parms(rep045) {"045"} +set parms(rep046) {200 "046"} +set parms(rep047) {200 "047"} +set parms(rep048) {3000 "048"} +set parms(rep049) {10 "049"} +set parms(rep050) {10 "050"} +set parms(rep051) {1000 "051"} +set parms(rep052) {200 "052"} +set parms(rep053) {200 "053"} +set parms(rep054) {200 "054"} +set parms(rep055) {200 "055"} +set parms(rep058) "058" +set parms(rep060) {200 "060"} +set parms(rep061) {500 "061"} +set parms(rep062) "062" +set parms(rep063) "" +set parms(rep064) {10 "064"} +set parms(rep065) {3} +set parms(rep066) {10 "066"} +set parms(rep067) "" +set parms(rep068) {"068"} +set parms(rep069) {200 "069"} +set parms(rep070) {200 "070"} +set parms(rep071) { 10 "071"} +set parms(rep072) {200 "072"} +set parms(rep073) {200 "073"} +set parms(rep074) {"074"} +set parms(rep075) {"075"} +set parms(rep076) "" +set parms(rep077) {"077"} +set parms(rep078) {"078"} +set parms(rep079) {"079"} +set parms(rep080) {200 "080"} +set parms(rep081) {200 "081"} +set parms(rep082) {200 "082"} +set parms(rep083) {200 "083"} +set parms(rep084) {200 "084"} +set parms(rep085) {20 "085"} +set parms(rep086) {"086"} +set parms(rep087) {200 "087"} +set parms(rep088) {20 "088"} +set parms(rep089) {200 "089"} +set parms(rep090) {50 "090"} +set parms(rep091) {20 "091"} +set parms(rep092) {20 "092"} +set parms(rep093) {20 "093"} +set parms(rep094) {"094"} +set parms(rep095) {200 "095"} +set parms(rep096) {20 "096"} +set parms(rep097) {"097"} +set parms(rep098) {200 "098"} +set parms(repmgr007) {100 "007"} +set parms(repmgr009) {10 "009"} +set parms(repmgr010) {100 "010"} +set parms(repmgr011) {100 "011"} +set parms(repmgr012) {100 "012"} +set parms(repmgr013) {100 "013"} +set parms(repmgr017) {1000 "017"} +set parms(repmgr018) {100 "018"} +set parms(repmgr023) {50 "023"} +set parms(repmgr024) {50 "024"} +set parms(repmgr025) {100 "025"} +set parms(repmgr026) {"026"} +set parms(repmgr027) {"027"} +set parms(repmgr028) {"028"} +set parms(repmgr030) {100 "030"} +set parms(repmgr032) {"032"} +set parms(repmgr100) "" +set parms(repmgr101) "" +set parms(repmgr102) "" +set parms(repmgr105) "" +set parms(repmgr106) "" +set parms(repmgr107) "" +set parms(repmgr108) "" +set parms(repmgr109) "" +set parms(repmgr110) "" +set parms(repmgr111) "" +set parms(repmgr112) "" +set parms(subdb001) "" +set parms(subdb002) 10000 +set parms(subdb003) 1000 +set parms(subdb004) "" +set parms(subdb005) 100 +set parms(subdb006) 100 +set parms(subdb007) "" +set parms(subdb008) "" +set parms(subdb009) "" +set parms(subdb010) "" +set parms(subdb011) {13 10} +set parms(subdb012) "" +set parms(sdb001) "" +set parms(sdb002) 10000 +set parms(sdb003) 1000 +set parms(sdb004) "" +set parms(sdb005) 100 +set parms(sdb006) 100 +set parms(sdb007) "" +set parms(sdb008) "" +set parms(sdb009) "" +set parms(sdb010) "" +set parms(sdb011) {13 10} +set parms(sdb012) "" +set parms(sdb013) 10 +set parms(sdb014) "" +set parms(sdb015) 1000 +set parms(sdb016) 100 +set parms(sdb017) "" +set parms(sdb018) 100 +set parms(sdb019) 100 +set parms(sdb020) 10 +set parms(si001) {200 "001"} +set parms(si002) {200 "002"} +set parms(si003) {200 "003"} +set parms(si004) {200 "004"} +set parms(si005) {200 "005"} +set parms(si006) {200 "006"} +set parms(si007) {10 "007"} +set parms(si008) {10 "008"} +set parms(test001) {10000 0 0 "001"} +set parms(test002) 10000 +set parms(test003) "" +set parms(test004) {10000 "004" 0} +set parms(test005) 10000 +set parms(test006) {10000 0 "006" 5} +set parms(test007) {10000 "007" 5} +set parms(test008) {"008" 0} +set parms(test009) "" +set parms(test010) {10000 5 "010"} +set parms(test011) {10000 5 "011"} +set parms(test012) "" +set parms(test013) 10000 +set parms(test014) 10000 +set parms(test015) {7500 0} +set parms(test016) 10000 +set parms(test017) {0 19 "017"} +set parms(test018) 10000 +set parms(test019) 10000 +set parms(test020) 10000 +set parms(test021) 10000 +set parms(test022) "" +set parms(test023) "" +set parms(test024) 10000 +set parms(test025) {10000 0 "025"} +set parms(test026) {2000 5 "026"} +set parms(test027) {100} +set parms(test028) "" +set parms(test029) 10000 +set parms(test030) 10000 +set parms(test031) {10000 5 "031"} +set parms(test032) {10000 5 "032" 0} +set parms(test033) {10000 5 "033" 0} +set parms(test034) 10000 +set parms(test035) 10000 +set parms(test036) 10000 +set parms(test037) 100 +set parms(test038) {10000 5 "038"} +set parms(test039) {10000 5 "039"} +set parms(test040) 10000 +set parms(test041) 10000 +set parms(test042) 1000 +set parms(test043) 10000 +set parms(test044) {5 10 0} +set parms(test045) 1000 +set parms(test046) "" +set parms(test047) "" +set parms(test048) "" +set parms(test049) "" +set parms(test050) "" +set parms(test051) "" +set parms(test052) "" +set parms(test053) "" +set parms(test054) "" +set parms(test055) "" +set parms(test056) "" +set parms(test057) "" +set parms(test058) "" +set parms(test059) "" +set parms(test060) "" +set parms(test061) "" +set parms(test062) {200 200 "062"} +set parms(test063) "" +set parms(test064) "" +set parms(test065) "" +set parms(test066) "" +set parms(test067) {1000 "067"} +set parms(test068) "" +set parms(test069) {50 "069"} +set parms(test070) {4 2 1000 CONSUME 0 -txn "070"} +set parms(test071) {1 1 10000 CONSUME 0 -txn "071"} +set parms(test072) {512 20 "072"} +set parms(test073) {512 50 "073"} +set parms(test074) {-nextnodup 100 "074"} +set parms(test076) {1000 "076"} +set parms(test077) {1000 "077"} +set parms(test078) {100 512 "078"} +set parms(test079) {10000 512 "079" 20} +set parms(test081) {13 "081"} +set parms(test082) {-prevnodup 100 "082"} +set parms(test083) {512 5000 2} +set parms(test084) {10000 "084" 65536} +set parms(test085) {512 3 10 "085"} +set parms(test086) "" +set parms(test087) {512 50 "087"} +set parms(test088) "" +set parms(test089) 1000 +set parms(test090) {10000 "090"} +set parms(test091) {4 2 1000 0 "091"} +set parms(test092) {1000} +set parms(test093) {10000 "093"} +set parms(test094) {10000 10 "094"} +set parms(test095) {"095"} +set parms(test096) {512 1000 19} +set parms(test097) {500 400} +set parms(test098) "" +set parms(test099) 10000 +set parms(test100) {10000 "100"} +set parms(test101) {1000 -txn "101"} +set parms(test102) {1000 "102"} +set parms(test103) {100 4294967250 "103"} +set parms(test107) "" +set parms(test109) {"109"} +set parms(test110) {10000 3} +set parms(test111) {10000 "111"} +set parms(test112) {80000 "112"} +set parms(test113) {10000 5 "113"} +set parms(test114) {10000 "114"} +set parms(test115) {10000 "115"} +set parms(test116) {"116"} +set parms(test117) {10000 "117"} +set parms(test119) {"119"} +set parms(test120) {"120"} +set parms(test121) {"121"} +set parms(test122) {"122"} +set parms(test123) "" +set parms(test124) 1000 +set parms(test125) "" +set parms(test126) {10000 "126" 1 0 0} +set parms(test127) {10000 5 "127" 0} +set parms(test128) {10000 1} +set parms(test129) {10000 5} +set parms(test130) {10000 3 "130"} +set parms(test131) {1000 "131" 5 0 0} +set parms(test132) {1000 5} +set parms(test133) {1000 "133" 0} +set parms(test134) {1000} + +# Shell script tests. Each list entry is a {directory filename rundir} list, +# invoked with "/bin/sh filename". +set shelltest_list { + { c chk.ctests "" } + { cxx chk.cxxtests .. } + { java/junit chk.bdb junit } + { java/compat chk.bdb compat } + { sql_codegen chk.bdb "" } + { xa chk.xa "" } +} diff --git a/test/tcl/testutils.tcl b/test/tcl/testutils.tcl new file mode 100644 index 00000000..d7757999 --- /dev/null +++ b/test/tcl/testutils.tcl @@ -0,0 +1,4045 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Test system utilities +# +# Timestamp -- print time along with elapsed time since last invocation +# of timestamp. +proc timestamp {{opt ""}} { + global __timestamp_start + + set now [clock seconds] + + # -c accurate to the click, instead of the second. + # -r seconds since the Epoch + # -t current time in the format expected by db_recover -t. + # -w wallclock time + # else wallclock plus elapsed time. + if {[string compare $opt "-r"] == 0} { + return $now + } elseif {[string compare $opt "-t"] == 0} { + return [clock format $now -format "%y%m%d%H%M.%S"] + } elseif {[string compare $opt "-w"] == 0} { + return [clock format $now -format "%c"] + } else { + if {[string compare $opt "-c"] == 0} { + set printclicks 1 + } else { + set printclicks 0 + } + + if {[catch {set start $__timestamp_start}] != 0} { + set __timestamp_start $now + } + set start $__timestamp_start + + set elapsed [expr $now - $start] + set the_time [clock format $now -format ""] + set __timestamp_start $now + + if { $printclicks == 1 } { + set pc_print [format ".%08u" [__fix_num [clock clicks]]] + } else { + set pc_print "" + } + + format "%02d:%02d:%02d$pc_print (%02d:%02d:%02d)" \ + [__fix_num [clock format $now -format "%H"]] \ + [__fix_num [clock format $now -format "%M"]] \ + [__fix_num [clock format $now -format "%S"]] \ + [expr $elapsed / 3600] \ + [expr ($elapsed % 3600) / 60] \ + [expr ($elapsed % 3600) % 60] + } +} + +proc __fix_num { num } { + set num [string trimleft $num "0"] + if {[string length $num] == 0} { + set num "0" + } + return $num +} + +# Add a {key,data} pair to the specified database where +# key=filename and data=file contents. +proc put_file { db txn flags file } { + source ./include.tcl + + set fid [open $file r] + fconfigure $fid -translation binary + set data [read $fid] + close $fid + + set ret [eval {$db put} $txn $flags {$file $data}] + error_check_good put_file $ret 0 +} + +# Get a {key,data} pair from the specified database where +# key=filename and data=file contents and then write the +# data to the specified file. +proc get_file { db txn flags file outfile } { + source ./include.tcl + + set fid [open $outfile w] + fconfigure $fid -translation binary + if [catch {eval {$db get} $txn $flags {$file}} data] { + puts -nonewline $fid $data + } else { + # Data looks like {{key data}} + set data [lindex [lindex $data 0] 1] + puts -nonewline $fid $data + } + close $fid +} + +# Add a {key,data} pair to the specified database where +# key=file contents and data=file name. +proc put_file_as_key { db txn flags file } { + source ./include.tcl + + set fid [open $file r] + fconfigure $fid -translation binary + set filecont [read $fid] + close $fid + + # Use not the file contents, but the file name concatenated + # before the file contents, as a key, to ensure uniqueness. + set data $file$filecont + + set ret [eval {$db put} $txn $flags {$data $file}] + error_check_good put_file $ret 0 +} + +# Get a {key,data} pair from the specified database where +# key=file contents and data=file name +proc get_file_as_key { db txn flags file} { + source ./include.tcl + + set fid [open $file r] + fconfigure $fid -translation binary + set filecont [read $fid] + close $fid + + set data $file$filecont + + return [eval {$db get} $txn $flags {$data}] +} + +# open file and call dump_file to dumpkeys to tempfile +proc open_and_dump_file { + dbname env outfile checkfunc dump_func beg cont args} { + global encrypt + global passwd + source ./include.tcl + + set encarg "" + if { $encrypt > 0 && $env == "NULL" } { + set encarg "-encryptany $passwd" + } + set envarg "" + set txn "" + set txnenv 0 + if { $env != "NULL" } { + append envarg " -env $env " + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append envarg " -auto_commit " + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + } + set db [eval {berkdb open} $envarg -rdonly -unknown $encarg $args $dbname] + error_check_good dbopen [is_valid_db $db] TRUE + $dump_func $db $txn $outfile $checkfunc $beg $cont + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} + +# open file and call dump_file to dumpkeys to tempfile +proc open_and_dump_subfile { + dbname env outfile checkfunc dump_func beg cont subdb} { + global encrypt + global passwd + source ./include.tcl + + set encarg "" + if { $encrypt > 0 && $env == "NULL" } { + set encarg "-encryptany $passwd" + } + set envarg "" + set txn "" + set txnenv 0 + if { $env != "NULL" } { + append envarg "-env $env" + set txnenv [is_txnenv $env] + if { $txnenv == 1 } { + append envarg " -auto_commit " + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + } + } + set db [eval {berkdb open -rdonly -unknown} \ + $envarg $encarg {$dbname $subdb}] + error_check_good dbopen [is_valid_db $db] TRUE + $dump_func $db $txn $outfile $checkfunc $beg $cont + if { $txnenv == 1 } { + error_check_good txn [$t commit] 0 + } + error_check_good db_close [$db close] 0 +} + +# Sequentially read a file and call checkfunc on each key/data pair. +# Dump the keys out to the file specified by outfile. +proc dump_file { db txn outfile {checkfunc NONE} } { + source ./include.tcl + + dump_file_direction $db $txn $outfile $checkfunc "-first" "-next" +} + +proc dump_file_direction { db txn outfile checkfunc start continue } { + source ./include.tcl + + # Now we will get each key from the DB and dump to outfile + set c [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $c $db] TRUE + dump_file_walk $c $outfile $checkfunc $start $continue + error_check_good curs_close [$c close] 0 +} + +proc dump_file_walk { c outfile checkfunc start continue {flag ""} } { + set outf [open $outfile w] + for {set d [eval {$c get} $flag $start] } \ + { [llength $d] != 0 } \ + {set d [eval {$c get} $flag $continue] } { + set kd [lindex $d 0] + set k [lindex $kd 0] + set d2 [lindex $kd 1] + if { $checkfunc != "NONE" } { + $checkfunc $k $d2 + } + puts $outf $k + # XXX: Geoff Mainland + # puts $outf "$k $d2" + } + close $outf +} + +proc dump_binkey_file { db txn outfile checkfunc } { + source ./include.tcl + + dump_binkey_file_direction $db $txn $outfile $checkfunc \ + "-first" "-next" +} +proc dump_bin_file { db txn outfile checkfunc } { + source ./include.tcl + + dump_bin_file_direction $db $txn $outfile $checkfunc "-first" "-next" +} + +# Note: the following procedure assumes that the binary-file-as-keys were +# inserted into the database by put_file_as_key, and consist of the file +# name followed by the file contents as key, to ensure uniqueness. +proc dump_binkey_file_direction { db txn outfile checkfunc begin cont } { + source ./include.tcl + + set d1 $testdir/d1 + + set outf [open $outfile w] + + # Now we will get each key from the DB and dump to outfile + set c [eval {$db cursor} $txn] + error_check_good db_cursor [is_valid_cursor $c $db] TRUE + + set inf $d1 + for {set d [$c get $begin] } { [llength $d] != 0 } \ + {set d [$c get $cont] } { + set kd [lindex $d 0] + set keyfile [lindex $kd 0] + set data [lindex $kd 1] + + set ofid [open $d1 w] + fconfigure $ofid -translation binary + + # Chop off the first few bytes--that's the file name, + # added for uniqueness in put_file_as_key, which we don't + # want in the regenerated file. + set namelen [string length $data] + set keyfile [string range $keyfile $namelen end] + puts -nonewline $ofid $keyfile + close $ofid + + $checkfunc $data $d1 + puts $outf $data + flush $outf + } + close $outf + error_check_good curs_close [$c close] 0 + fileremove $d1 +} + +proc dump_bin_file_direction { db txn outfile checkfunc begin cont } { + source ./include.tcl + + set d1 $testdir/d1 + + set outf [open $outfile w] + + # Now we will get each key from the DB and dump to outfile + set c [eval {$db cursor} $txn] + + for {set d [$c get $begin] } \ + { [llength $d] != 0 } {set d [$c get $cont] } { + set k [lindex [lindex $d 0] 0] + set data [lindex [lindex $d 0] 1] + set ofid [open $d1 w] + fconfigure $ofid -translation binary + puts -nonewline $ofid $data + close $ofid + + $checkfunc $k $d1 + puts $outf $k + } + close $outf + error_check_good curs_close [$c close] 0 + fileremove -f $d1 +} + +proc make_data_str { key } { + set datastr "" + for {set i 0} {$i < 10} {incr i} { + append datastr $key + } + return $datastr +} + +proc error_check_bad { func result bad {txn 0}} { + if { [binary_compare $result $bad] == 0 } { + if { $txn != 0 } { + $txn abort + } + flush stdout + flush stderr + error "FAIL:[timestamp] $func returned error value $bad" + } +} + +proc error_check_good { func result desired {txn 0} } { + if { [binary_compare $desired $result] != 0 } { + if { $txn != 0 } { + $txn abort + } + flush stdout + flush stderr + error "FAIL:[timestamp]\ + $func: expected $desired, got $result" + } +} + +proc error_check_match { note result desired } { + if { ![string match $desired $result] } { + error "FAIL:[timestamp]\ + $note: expected $desired, got $result" + } +} + +# Locks have the prefix of their manager. +proc is_substr { str sub } { + if { [string first $sub $str] == -1 } { + return 0 + } else { + return 1 + } +} + +proc is_serial { str } { + global serial_tests + + foreach test $serial_tests { + if { [is_substr $str $test] == 1 } { + return 1 + } + } + return 0 +} + +proc release_list { l } { + + # Now release all the locks + foreach el $l { + catch { $el put } ret + error_check_good lock_put $ret 0 + } +} + +proc debug { {stop 0} } { + global __debug_on + global __debug_print + global __debug_test + + set __debug_on 1 + set __debug_print 1 + set __debug_test $stop +} + +# Check if each key appears exactly [llength dlist] times in the file with +# the duplicate tags matching those that appear in dlist. +proc dup_check { db txn tmpfile dlist {extra 0}} { + source ./include.tcl + + set outf [open $tmpfile w] + # Now we will get each key from the DB and dump to outfile + set c [eval {$db cursor} $txn] + set lastkey "" + set done 0 + while { $done != 1} { + foreach did $dlist { + set rec [$c get "-next"] + if { [string length $rec] == 0 } { + set done 1 + break + } + set key [lindex [lindex $rec 0] 0] + set fulldata [lindex [lindex $rec 0] 1] + set id [id_of $fulldata] + set d [data_of $fulldata] + if { [string compare $key $lastkey] != 0 && \ + $id != [lindex $dlist 0] } { + set e [lindex $dlist 0] + error "FAIL: \tKey \ + $key, expected dup id $e, got $id" + } + error_check_good dupget.data $d $key + error_check_good dupget.id $id $did + set lastkey $key + } + # + # Some tests add an extra dup (like overflow entries) + # Check id if it exists. + if { $extra != 0} { + set okey $key + set rec [$c get "-next"] + if { [string length $rec] != 0 } { + set key [lindex [lindex $rec 0] 0] + # + # If this key has no extras, go back for + # next iteration. + if { [string compare $key $lastkey] != 0 } { + set key $okey + set rec [$c get "-prev"] + } else { + set fulldata [lindex [lindex $rec 0] 1] + set id [id_of $fulldata] + set d [data_of $fulldata] + error_check_bad dupget.data1 $d $key + error_check_good dupget.id1 $id $extra + } + } + } + if { $done != 1 } { + puts $outf $key + } + } + close $outf + error_check_good curs_close [$c close] 0 +} + +# Check if each key appears exactly [llength dlist] times in the file with +# the duplicate tags matching those that appear in dlist. +proc dup_file_check { db txn tmpfile dlist } { + source ./include.tcl + + set outf [open $tmpfile w] + # Now we will get each key from the DB and dump to outfile + set c [eval {$db cursor} $txn] + set lastkey "" + set done 0 + while { $done != 1} { + foreach did $dlist { + set rec [$c get "-next"] + if { [string length $rec] == 0 } { + set done 1 + break + } + set key [lindex [lindex $rec 0] 0] + if { [string compare $key $lastkey] != 0 } { + # + # If we changed files read in new contents. + # + set fid [open $key r] + fconfigure $fid -translation binary + set filecont [read $fid] + close $fid + } + set fulldata [lindex [lindex $rec 0] 1] + set id [id_of $fulldata] + set d [data_of $fulldata] + if { [string compare $key $lastkey] != 0 && \ + $id != [lindex $dlist 0] } { + set e [lindex $dlist 0] + error "FAIL: \tKey \ + $key, expected dup id $e, got $id" + } + error_check_good dupget.data $d $filecont + error_check_good dupget.id $id $did + set lastkey $key + } + if { $done != 1 } { + puts $outf $key + } + } + close $outf + error_check_good curs_close [$c close] 0 +} + +# Parse duplicate data entries of the form N:data. Data_of returns +# the data part; id_of returns the numerical part +proc data_of {str} { + set ndx [string first ":" $str] + if { $ndx == -1 } { + return "" + } + return [ string range $str [expr $ndx + 1] end] +} + +proc id_of {str} { + set ndx [string first ":" $str] + if { $ndx == -1 } { + return "" + } + + return [ string range $str 0 [expr $ndx - 1]] +} + +proc nop { {args} } { + return +} + +# Partial put test procedure. +# Munges a data val through three different partial puts. Stores +# the final munged string in the dvals array so that you can check +# it later (dvals should be global). We take the characters that +# are being replaced, make them capitals and then replicate them +# some number of times (n_add). We do this at the beginning of the +# data, at the middle and at the end. The parameters are: +# db, txn, key -- as per usual. Data is the original data element +# from which we are starting. n_replace is the number of characters +# that we will replace. n_add is the number of times we will add +# the replaced string back in. +proc partial_put { method db txn gflags key data n_replace n_add } { + global dvals + source ./include.tcl + + # Here is the loop where we put and get each key/data pair + # We will do the initial put and then three Partial Puts + # for the beginning, middle and end of the string. + + eval {$db put} $txn {$key [chop_data $method $data]} + + # Beginning change + set s [string range $data 0 [ expr $n_replace - 1 ] ] + set repl [ replicate [string toupper $s] $n_add ] + + # This is gross, but necessary: if this is a fixed-length + # method, and the chopped length of $repl is zero, + # it's because the original string was zero-length and our data item + # is all nulls. Set repl to something non-NULL. + if { [is_fixed_length $method] && \ + [string length [chop_data $method $repl]] == 0 } { + set repl [replicate "." $n_add] + } + + set newstr [chop_data $method $repl[string range $data $n_replace end]] + set ret [eval {$db put} $txn {-partial [list 0 $n_replace] \ + $key [chop_data $method $repl]}] + error_check_good put $ret 0 + + set ret [eval {$db get} $gflags $txn {$key}] + error_check_good get $ret [list [list $key [pad_data $method $newstr]]] + + # End Change + set len [string length $newstr] + set spl [expr $len - $n_replace] + # Handle case where $n_replace > $len + if { $spl < 0 } { + set spl 0 + } + + set s [string range $newstr [ expr $len - $n_replace ] end ] + # Handle zero-length keys + if { [string length $s] == 0 } { set s "A" } + + set repl [ replicate [string toupper $s] $n_add ] + set newstr [chop_data $method \ + [string range $newstr 0 [expr $spl - 1 ] ]$repl] + + set ret [eval {$db put} $txn \ + {-partial [list $spl $n_replace] $key [chop_data $method $repl]}] + error_check_good put $ret 0 + + set ret [eval {$db get} $gflags $txn {$key}] + error_check_good get $ret [list [list $key [pad_data $method $newstr]]] + + # Middle Change + set len [string length $newstr] + set mid [expr $len / 2 ] + set beg [expr $mid - [expr $n_replace / 2] ] + set end [expr $beg + $n_replace - 1] + set s [string range $newstr $beg $end] + set repl [ replicate [string toupper $s] $n_add ] + set newstr [chop_data $method [string range $newstr 0 \ + [expr $beg - 1 ] ]$repl[string range $newstr [expr $end + 1] end]] + + set ret [eval {$db put} $txn {-partial [list $beg $n_replace] \ + $key [chop_data $method $repl]}] + error_check_good put $ret 0 + + set ret [eval {$db get} $gflags $txn {$key}] + error_check_good get $ret [list [list $key [pad_data $method $newstr]]] + + set dvals($key) [pad_data $method $newstr] +} + +proc replicate { str times } { + set res $str + for { set i 1 } { $i < $times } { set i [expr $i * 2] } { + append res $res + } + return $res +} + +proc repeat { str n } { + set ret "" + while { $n > 0 } { + set ret $str$ret + incr n -1 + } + return $ret +} + +proc isqrt { l } { + set s [expr sqrt($l)] + set ndx [expr [string first "." $s] - 1] + return [string range $s 0 $ndx] +} + +# If we run watch_procs multiple times without an intervening +# testdir cleanup, it's possible that old sentinel files will confuse +# us. Make sure they're wiped out before we spawn any other processes. +proc sentinel_init { } { + source ./include.tcl + + set filelist {} + set ret [catch {glob $testdir/begin.*} result] + if { $ret == 0 } { + set filelist $result + } + + set ret [catch {glob $testdir/end.*} result] + if { $ret == 0 } { + set filelist [concat $filelist $result] + } + + foreach f $filelist { + fileremove $f + } +} + +proc watch_procs { pidlist {delay 5} {max 3600} {quiet 0} } { + source ./include.tcl + global killed_procs + + set elapsed 0 + set killed_procs {} + + # Don't start watching the processes until a sentinel + # file has been created for each one. + foreach pid $pidlist { + while { [file exists $testdir/begin.$pid] == 0 } { + tclsleep $delay + incr elapsed $delay + # If pids haven't been created in one-fifth + # of the time allowed for the whole test, + # there's a problem. Report an error and fail. + if { $elapsed > [expr {$max / 5}] } { + puts "FAIL: begin.pid not created" + break + } + } + } + + while { 1 } { + + tclsleep $delay + incr elapsed $delay + + # Find the list of processes with outstanding sentinel + # files (i.e. a begin.pid and no end.pid). + set beginlist {} + set endlist {} + set ret [catch {glob $testdir/begin.*} result] + if { $ret == 0 } { + set beginlist $result + } + set ret [catch {glob $testdir/end.*} result] + if { $ret == 0 } { + set endlist $result + } + + set bpids {} + catch {unset epids} + foreach begfile $beginlist { + lappend bpids [string range $begfile \ + [string length $testdir/begin.] end] + } + foreach endfile $endlist { + set epids([string range $endfile \ + [string length $testdir/end.] end]) 1 + } + + # The set of processes that we still want to watch, $l, + # is the set of pids that have begun but not ended + # according to their sentinel files. + set l {} + foreach p $bpids { + if { [info exists epids($p)] == 0 } { + lappend l $p + } + } + + set rlist {} + foreach i $l { + set r [ catch { exec $KILL -0 $i } res ] + if { $r == 0 } { + lappend rlist $i + } + } + if { [ llength $rlist] == 0 } { + break + } else { + puts "[timestamp] processes running: $rlist" + } + + if { $elapsed > $max } { + # We have exceeded the limit; kill processes + # and report an error + foreach i $l { + tclkill $i + } + set killed_procs $l + } + } + if { $quiet == 0 } { + puts "All processes have exited." + } + + # + # Once we are done, remove all old sentinel files. + # + set oldsent [glob -nocomplain $testdir/begin* $testdir/end*] + foreach f oldsent { + fileremove -f $f + } + +} + +# These routines are all used from within the dbscript.tcl tester. +proc db_init { dbp do_data } { + global a_keys + global l_keys + source ./include.tcl + + set txn "" + set nk 0 + set lastkey "" + + set a_keys() BLANK + set l_keys "" + + set c [$dbp cursor] + for {set d [$c get -first] } { [llength $d] != 0 } { + set d [$c get -next] } { + set k [lindex [lindex $d 0] 0] + set d2 [lindex [lindex $d 0] 1] + incr nk + if { $do_data == 1 } { + if { [info exists a_keys($k)] } { + lappend a_keys($k) $d2] + } else { + set a_keys($k) $d2 + } + } + + lappend l_keys $k + } + error_check_good curs_close [$c close] 0 + + return $nk +} + +proc pick_op { min max n } { + if { $n == 0 } { + return add + } + + set x [berkdb random_int 1 12] + if {$n < $min} { + if { $x <= 4 } { + return put + } elseif { $x <= 8} { + return get + } else { + return add + } + } elseif {$n > $max} { + if { $x <= 4 } { + return put + } elseif { $x <= 8 } { + return get + } else { + return del + } + + } elseif { $x <= 3 } { + return del + } elseif { $x <= 6 } { + return get + } elseif { $x <= 9 } { + return put + } else { + return add + } +} + +# random_data: Generate a string of random characters. +# If recno is 0 - Use average to pick a length between 1 and 2 * avg. +# If recno is non-0, generate a number between 1 and 2 ^ (avg * 2), +# that will fit into a 32-bit integer. +# If the unique flag is 1, then make sure that the string is unique +# in the array "where". +proc random_data { avg unique where {recno 0} } { + upvar #0 $where arr + global debug_on + set min 1 + set max [expr $avg+$avg-1] + if { $recno } { + # + # Tcl seems to have problems with values > 30. + # + if { $max > 30 } { + set max 30 + } + set maxnum [expr int(pow(2, $max))] + } + while {1} { + set len [berkdb random_int $min $max] + set s "" + if {$recno} { + set s [berkdb random_int 1 $maxnum] + } else { + for {set i 0} {$i < $len} {incr i} { + append s [int_to_char [berkdb random_int 0 25]] + } + } + + if { $unique == 0 || [info exists arr($s)] == 0 } { + break + } + } + + return $s +} + +proc random_key { } { + global l_keys + global nkeys + set x [berkdb random_int 0 [expr $nkeys - 1]] + return [lindex $l_keys $x] +} + +proc is_err { desired } { + set x [berkdb random_int 1 100] + if { $x <= $desired } { + return 1 + } else { + return 0 + } +} + +proc pick_cursput { } { + set x [berkdb random_int 1 4] + switch $x { + 1 { return "-keylast" } + 2 { return "-keyfirst" } + 3 { return "-before" } + 4 { return "-after" } + } +} + +proc random_cursor { curslist } { + global l_keys + global nkeys + + set x [berkdb random_int 0 [expr [llength $curslist] - 1]] + set dbc [lindex $curslist $x] + + # We want to randomly set the cursor. Pick a key. + set k [random_key] + set r [$dbc get "-set" $k] + error_check_good cursor_get:$k [is_substr Error $r] 0 + + # Now move forward or backward some hops to randomly + # position the cursor. + set dist [berkdb random_int -10 10] + + set dir "-next" + set boundary "-first" + if { $dist < 0 } { + set dir "-prev" + set boundary "-last" + set dist [expr 0 - $dist] + } + + for { set i 0 } { $i < $dist } { incr i } { + set r [ record $dbc get $dir $k ] + if { [llength $d] == 0 } { + set r [ record $dbc get $k $boundary ] + } + error_check_bad dbcget [llength $r] 0 + } + return { [linsert r 0 $dbc] } +} + +proc record { args } { +# Recording every operation makes tests ridiculously slow on +# NT, so we are commenting this out; for debugging purposes, +# it will undoubtedly be useful to uncomment this. +# puts $args +# flush stdout + return [eval $args] +} + +proc newpair { k data } { + global l_keys + global a_keys + global nkeys + + set a_keys($k) $data + lappend l_keys $k + incr nkeys +} + +proc rempair { k } { + global l_keys + global a_keys + global nkeys + + unset a_keys($k) + set n [lsearch $l_keys $k] + error_check_bad rempair:$k $n -1 + set l_keys [lreplace $l_keys $n $n] + incr nkeys -1 +} + +proc changepair { k data } { + global l_keys + global a_keys + global nkeys + + set a_keys($k) $data +} + +proc changedup { k olddata newdata } { + global l_keys + global a_keys + global nkeys + + set d $a_keys($k) + error_check_bad changedup:$k [llength $d] 0 + + set n [lsearch $d $olddata] + error_check_bad changedup:$k $n -1 + + set a_keys($k) [lreplace $a_keys($k) $n $n $newdata] +} + +# Insert a dup into the a_keys array with DB_KEYFIRST. +proc adddup { k olddata newdata } { + global l_keys + global a_keys + global nkeys + + set d $a_keys($k) + if { [llength $d] == 0 } { + lappend l_keys $k + incr nkeys + set a_keys($k) { $newdata } + } + + set ndx 0 + + set d [linsert d $ndx $newdata] + set a_keys($k) $d +} + +proc remdup { k data } { + global l_keys + global a_keys + global nkeys + + set d [$a_keys($k)] + error_check_bad changedup:$k [llength $d] 0 + + set n [lsearch $d $olddata] + error_check_bad changedup:$k $n -1 + + set a_keys($k) [lreplace $a_keys($k) $n $n] +} + +proc dump_full_file { db txn outfile checkfunc start continue } { + source ./include.tcl + + set outf [open $outfile w] + # Now we will get each key from the DB and dump to outfile + set c [eval {$db cursor} $txn] + error_check_good dbcursor [is_valid_cursor $c $db] TRUE + + for {set d [$c get $start] } { [string length $d] != 0 } { + set d [$c get $continue] } { + set k [lindex [lindex $d 0] 0] + set d2 [lindex [lindex $d 0] 1] + $checkfunc $k $d2 + puts $outf "$k\t$d2" + } + close $outf + error_check_good curs_close [$c close] 0 +} + +proc int_to_char { i } { + global alphabet + + return [string index $alphabet $i] +} + +proc dbcheck { key data } { + global l_keys + global a_keys + global nkeys + global check_array + + if { [lsearch $l_keys $key] == -1 } { + error "FAIL: Key |$key| not in list of valid keys" + } + + set d $a_keys($key) + + if { [info exists check_array($key) ] } { + set check $check_array($key) + } else { + set check {} + } + + if { [llength $d] > 1 } { + if { [llength $check] != [llength $d] } { + # Make the check array the right length + for { set i [llength $check] } { $i < [llength $d] } \ + {incr i} { + lappend check 0 + } + set check_array($key) $check + } + + # Find this data's index + set ndx [lsearch $d $data] + if { $ndx == -1 } { + error "FAIL: \ + Data |$data| not found for key $key. Found |$d|" + } + + # Set the bit in the check array + set check_array($key) [lreplace $check_array($key) $ndx $ndx 1] + } elseif { [string compare $d $data] != 0 } { + error "FAIL: \ + Invalid data |$data| for key |$key|. Expected |$d|." + } else { + set check_array($key) 1 + } +} + +# Dump out the file and verify it +proc filecheck { file txn args} { + global check_array + global l_keys + global nkeys + global a_keys + source ./include.tcl + + if { [info exists check_array] == 1 } { + unset check_array + } + + eval open_and_dump_file $file NULL $file.dump dbcheck dump_full_file \ + "-first" "-next" $args + + # Check that everything we checked had all its data + foreach i [array names check_array] { + set count 0 + foreach j $check_array($i) { + if { $j != 1 } { + puts -nonewline "Key |$i| never found datum" + puts " [lindex $a_keys($i) $count]" + } + incr count + } + } + + # Check that all keys appeared in the checked array + set count 0 + foreach k $l_keys { + if { [info exists check_array($k)] == 0 } { + puts "filecheck: key |$k| not found. Data: $a_keys($k)" + } + incr count + } + + if { $count != $nkeys } { + puts "filecheck: Got $count keys; expected $nkeys" + } +} + +proc cleanup { dir env { quiet 0 } } { + global gen_upgrade + global gen_dump + global gen_portable + global is_qnx_test + global is_je_test + global old_encrypt + global passwd + source ./include.tcl + + if { $gen_upgrade == 1 || $gen_dump == 1 } { + save_upgrade_files $dir + } + + if { $gen_portable == 1 } { + save_portable_files $dir + } + +# check_handles + set remfiles {} + set ret [catch { glob $dir/* } result] + if { $ret == 0 } { + foreach fileorig $result { + # + # We: + # - Ignore any env-related files, which are + # those that have __db.* or log.* if we are + # running in an env. + # - Call 'dbremove' on any databases. + # Remove any remaining temp files. + # + switch -glob -- $fileorig { + */__db.* - + */log.* - + */*.jdb { + if { $env != "NULL" } { + continue + } else { + if { $is_qnx_test } { + catch {berkdb envremove -force \ + -home $dir} r + } + lappend remfiles $fileorig + } + } + *.db { + set envargs "" + set encarg "" + # + # If in an env, it should be open crypto + # or not already. + # + if { $env != "NULL"} { + set file [file tail $fileorig] + set envargs " -env $env " + if { [is_txnenv $env] } { + append envargs " -auto_commit " + } + } else { + if { $old_encrypt != 0 } { + set encarg "-encryptany $passwd" + } + set file $fileorig + } + + # If a database is left in a corrupt + # state, dbremove might not be able to handle + # it (it does an open before the remove). + # Be prepared for this, and if necessary, + # just forcibly remove the file with a warning + # message. + set ret [catch \ + {eval {berkdb dbremove} $envargs $encarg \ + $file} res] + # If dbremove failed and we're not in an env, + # note that we don't have 100% certainty + # about whether the previous run used + # encryption. Try to remove with crypto if + # we tried without, and vice versa. + if { $ret != 0 } { + if { $env == "NULL" && \ + $old_encrypt == 0} { + set ret [catch \ + {eval {berkdb dbremove} \ + -encryptany $passwd \ + $file} res] + } + if { $env == "NULL" && \ + $old_encrypt == 1 } { + set ret [catch \ + {eval {berkdb dbremove} \ + $file} res] + } + if { $ret != 0 } { + if { $quiet == 0 } { + puts \ + "FAIL: dbremove in cleanup failed: $res" + } + set file $fileorig + lappend remfiles $file + } + } + } + default { + lappend remfiles $fileorig + } + } + } + if {[llength $remfiles] > 0} { + # + # In the HFS file system there are cases where not + # all files are removed on the first attempt. If + # it fails, try again a few times. HFS is found on + # Mac OS X machines only (although not all of them) + # so we can limit the extra delete attempts to that + # platform. + # + # This bug has been compensated for in Tcl with a fix + # checked into Tcl 8.4. When Berkeley DB requires + # Tcl 8.5, we can remove this while loop and replace + # it with a simple 'fileremove -f $remfiles'. + # + set count 0 + if { $is_osx_test } { + while { [catch {eval fileremove -f $remfiles}] == 1 \ + && $count < 5 } { + incr count + } + } + # The final attempt to remove files can be for all + # OSes including Darwin. Don't catch failures, we'd + # like to notice them. + eval fileremove -f $remfiles + } + + if { $is_je_test } { + set rval [catch {eval {exec \ + $util_path/db_dump} -h $dir -l } res] + if { $rval == 0 } { + set envargs " -env $env " + if { [is_txnenv $env] } { + append envargs " -auto_commit " + } + + foreach db $res { + set ret [catch {eval \ + {berkdb dbremove} $envargs $db } res] + } + } + } + } +} + +proc log_cleanup { dir } { + source ./include.tcl + global gen_upgrade_log + global gen_portable + + if { $gen_upgrade_log == 1 } { + save_upgrade_files $dir + } + + if { $gen_portable == 1 } { + save_portable_files $dir + } + + set files [glob -nocomplain $dir/log.*] + if { [llength $files] != 0} { + foreach f $files { + fileremove -f $f + } + } +} + +proc env_cleanup { dir } { + global old_encrypt + global passwd + source ./include.tcl + + set encarg "" + if { $old_encrypt != 0 } { + set encarg "-encryptany $passwd" + } + set stat [catch {eval {berkdb envremove -home} $dir $encarg} ret] + # + # If something failed and we are left with a region entry + # in /dev/shmem that is zero-length, the envremove will + # succeed, and the shm_unlink will succeed, but it will not + # remove the zero-length entry from /dev/shmem. Remove it + # using fileremove or else all other tests using an env + # will immediately fail. + # + if { $is_qnx_test == 1 } { + set region_files [glob -nocomplain /dev/shmem/$dir*] + if { [llength $region_files] != 0 } { + foreach f $region_files { + fileremove -f $f + } + } + } + log_cleanup $dir + cleanup $dir NULL +} + +proc help { cmd } { + if { [info command $cmd] == $cmd } { + set is_proc [lsearch [info procs $cmd] $cmd] + if { $is_proc == -1 } { + # Not a procedure; must be a C command + # Let's hope that it takes some parameters + # and that it prints out a message + puts "Usage: [eval $cmd]" + } else { + # It is a tcl procedure + puts -nonewline "Usage: $cmd" + set args [info args $cmd] + foreach a $args { + set is_def [info default $cmd $a val] + if { $is_def != 0 } { + # Default value + puts -nonewline " $a=$val" + } elseif {$a == "args"} { + # Print out flag values + puts " options" + args + } else { + # No default value + puts -nonewline " $a" + } + } + puts "" + } + } else { + puts "$cmd is not a command" + } +} + +# Run a recovery test for a particular operation +# Notice that we catch the return from CP and do not do anything with it. +# This is because Solaris CP seems to exit non-zero on occasion, but +# everything else seems to run just fine. +# +# We split it into two functions so that the preparation and command +# could be executed in a different process than the recovery. +# +proc op_codeparse { encodedop op } { + set op1 "" + set op2 "" + switch $encodedop { + "abort" { + set op1 $encodedop + set op2 "" + } + "commit" { + set op1 $encodedop + set op2 "" + } + "prepare-abort" { + set op1 "prepare" + set op2 "abort" + } + "prepare-commit" { + set op1 "prepare" + set op2 "commit" + } + "prepare-discard" { + set op1 "prepare" + set op2 "discard" + } + } + + if { $op == "op" } { + return $op1 + } else { + return $op2 + } +} + +proc op_recover { encodedop dir env_cmd dbfile cmd msg args} { + source ./include.tcl + + set op [op_codeparse $encodedop "op"] + set op2 [op_codeparse $encodedop "sub"] + puts "\t$msg $encodedop" + set gidf "" + # puts "op_recover: $op $dir $env_cmd $dbfile $cmd $args" + if { $op == "prepare" } { + sentinel_init + + # Fork off a child to run the cmd + # We append the gid, so start here making sure + # we don't have old gid's around. + set outfile $testdir/childlog + fileremove -f $testdir/gidfile + set gidf $testdir/gidfile + set pidlist {} + # puts "$tclsh_path $test_path/recdscript.tcl $testdir/recdout \ + # $op $dir $env_cmd $dbfile $gidf $cmd" + set p [exec $tclsh_path $test_path/wrap.tcl recdscript.tcl \ + $testdir/recdout $op $dir $env_cmd $dbfile $gidf $cmd $args &] + lappend pidlist $p + watch_procs $pidlist 5 + set f1 [open $testdir/recdout r] + set r [read $f1] + puts -nonewline $r + close $f1 + fileremove -f $testdir/recdout + } else { + eval {op_recover_prep $op $dir $env_cmd $dbfile $gidf $cmd} $args + } + eval {op_recover_rec $op $op2 $dir $env_cmd $dbfile $gidf} $args +} + +proc op_recover_prep { op dir env_cmd dbfile gidf cmd args} { + global log_log_record_types + global recd_debug + global recd_id + global recd_op + source ./include.tcl + + # puts "op_recover_prep: $op $dir $env_cmd $dbfile $cmd $args" + + set init_file $dir/t1 + set afterop_file $dir/t2 + set final_file $dir/t3 + + set db_cursor "" + + # Keep track of the log types we've seen + if { $log_log_record_types == 1} { + logtrack_read $dir + } + + # Save the initial file and open the environment and the file + catch { file copy -force $dir/$dbfile $dir/$dbfile.init } res + copy_extent_file $dir $dbfile init + + convert_encrypt $env_cmd + set env [eval $env_cmd] + error_check_good envopen [is_valid_env $env] TRUE + + eval set args $args + set db [eval {berkdb open -auto_commit -env $env} $args {$dbfile}] + error_check_good dbopen [is_valid_db $db] TRUE + + # get the type, if heap must do additional prep work + set method [$db get_type] + if { [is_heap $method] == 1 } { + # close db so get a clean copy + error_check_good db_close [$db close] 0 + + # make copies of other heap files + append dbfile1 $dbfile "1" + catch { file copy -force $dir/$dbfile1 $dir/$dbfile.init1 } res + copy_extent_file $dir $dbfile1 init + + append dbfile2 $dbfile "2" + catch { file copy -force $dir/$dbfile2 $dir/$dbfile.init2 } res + copy_extent_file $dir $dbfile2 init + + # reopen to put us back to starting point + set db [eval {berkdb open -auto_commit -env $env} \ + $args {$dbfile}] + error_check_good dbopen [is_valid_db $db] TRUE + } + + # Dump out file contents for initial case + eval open_and_dump_file $dbfile $env $init_file nop \ + dump_file_direction "-first" "-next" $args + + set t [$env txn] + error_check_bad txn_begin $t NULL + error_check_good txn_begin [is_substr $t "txn"] 1 + + # Now fill in the db, tmgr, and the txnid in the command + set exec_cmd $cmd + + set items [lsearch -all $cmd ENV] + foreach i $items { + set exec_cmd [lreplace $exec_cmd $i $i $env] + } + + set items [lsearch -all $cmd TXNID] + foreach i $items { + set exec_cmd [lreplace $exec_cmd $i $i $t] + } + + set items [lsearch -all $cmd DB] + foreach i $items { + set exec_cmd [lreplace $exec_cmd $i $i $db] + } + + set i [lsearch $cmd DBC] + if { $i != -1 } { + set db_cursor [$db cursor -txn $t] + $db_cursor get -first + } + set adjust 0 + set items [lsearch -all $cmd DBC] + foreach i $items { + # make sure the cursor is pointing to something. + set exec_cmd [lreplace $exec_cmd \ + [expr $i + $adjust] [expr $i + $adjust] $db_cursor] + set txn_pos [lsearch $exec_cmd -txn] + if { $txn_pos != -1} { + # Strip out the txn parameter, we've applied it to the + # cursor. + set exec_cmd \ + [lreplace $exec_cmd $txn_pos [expr $txn_pos + 1]] + # Now the offsets in the items list are out-of-whack, + # keep track of how far. + set adjust [expr $adjust - 2] + } + } + + # To test DB_CONSUME, we need to expect a record return, not "0". + set i [lsearch $exec_cmd "-consume"] + if { $i != -1 } { + set record_exec_cmd_ret 1 + } else { + set record_exec_cmd_ret 0 + } + + # For the DB_APPEND test, we need to expect a return other than + # 0; set this flag to be more lenient in the error_check_good. + set i [lsearch $exec_cmd "-append"] + if { $i != -1 } { + set lenient_exec_cmd_ret 1 + } else { + set lenient_exec_cmd_ret 0 + } + + # For some partial tests we want to execute multiple commands. Pull + # pull them out here. + set last 0 + set exec_cmd2 "" + set exec_cmds [list] + set items [lsearch -all $exec_cmd NEW_CMD] + foreach i $items { + if { $last == 0 } { + set exec_cmd2 [lrange $exec_cmd 0 [expr $i - 1]] + } else { + lappend exec_cmds [lrange $exec_cmd \ + [expr $last + 1] [expr $i - 1]] + } + set last $i + } + if { $last != 0 } { + lappend exec_cmds [lrange $exec_cmd [expr $last + 1] end] + set exec_cmd $exec_cmd2 + } + #puts "exec_cmd: $exec_cmd" + #puts "exec_cmds: $exec_cmds" + + # Execute command and commit/abort it. + set ret [eval $exec_cmd] + if { $record_exec_cmd_ret == 1 } { + error_check_good "\"$exec_cmd\"" [llength [lindex $ret 0]] 2 + } elseif { $lenient_exec_cmd_ret == 1 } { + error_check_good "\"$exec_cmd\"" [expr $ret > 0] 1 + } else { + error_check_good "\"$exec_cmd\"" $ret 0 + } + # If there are additional commands, run them. + foreach curr_cmd $exec_cmds { + error_check_good "\"$curr_cmd\"" $ret 0 + } + + # If a cursor was created, close it now. + if {$db_cursor != ""} { + error_check_good close:$db_cursor [$db_cursor close] 0 + } + + set record_exec_cmd_ret 0 + set lenient_exec_cmd_ret 0 + + # Sync the file so that we can capture a snapshot to test recovery. + error_check_good sync:$db [$db sync] 0 + + catch { file copy -force $dir/$dbfile $dir/$dbfile.afterop } res + copy_extent_file $dir $dbfile afterop + + # If access method is heap, copy other files + if { [is_heap $method] == 1 } { + catch { file copy -force $dir/$dbfile1 \ + $dir/$dbfile.afterop1 } res + copy_extent_file $dir $dbfile1 afterop + + catch { file copy -force $dir/$dbfile2 \ + $dir/$dbfile.afterop2 } res + copy_extent_file $dir $dbfile2 afterop + } + eval open_and_dump_file $dir/$dbfile.afterop NULL \ + $afterop_file nop dump_file_direction "-first" "-next" $args + + #puts "\t\t\tExecuting txn_$op:$t" + if { $op == "prepare" } { + set gid [make_gid global:$t] + set gfd [open $gidf w+] + puts $gfd $gid + close $gfd + error_check_good txn_$op:$t [$t $op $gid] 0 + } else { + error_check_good txn_$op:$t [$t $op] 0 + } + + switch $op { + "commit" { puts "\t\tCommand executed and committed." } + "abort" { puts "\t\tCommand executed and aborted." } + "prepare" { puts "\t\tCommand executed and prepared." } + } + + # Sync the file so that we can capture a snapshot to test recovery. + error_check_good sync:$db [$db sync] 0 + + catch { file copy -force $dir/$dbfile $dir/$dbfile.final } res + copy_extent_file $dir $dbfile final + + # If access method is heap, copy other files + if { [is_heap $method] == 1 } { + catch { file copy -force $dir/$dbfile1 \ + $dir/$dbfile.final1 } res + copy_extent_file $dir $dbfile1 afterop + + catch { file copy -force $dir/$dbfile2 \ + $dir/$dbfile.final2 } res + copy_extent_file $dir $dbfile2 afterop + } + + eval open_and_dump_file $dir/$dbfile.final NULL \ + $final_file nop dump_file_direction "-first" "-next" $args + + # If this is an abort or prepare-abort, it should match the + # original file. + # If this was a commit or prepare-commit, then this file should + # match the afterop file. + # If this was a prepare without an abort or commit, we still + # have transactions active, and peering at the database from + # another environment will show data from uncommitted transactions. + # Thus we just skip this in the prepare-only case; what + # we care about are the results of a prepare followed by a + # recovery, which we test later. + if { $op == "commit" } { + filesort $afterop_file $afterop_file.sort + filesort $final_file $final_file.sort + error_check_good \ + diff(post-$op,pre-commit):diff($afterop_file,$final_file) \ + [filecmp $afterop_file.sort $final_file.sort] 0 + } elseif { $op == "abort" } { + filesort $init_file $init_file.sort + filesort $final_file $final_file.sort + error_check_good \ + diff(initial,post-$op):diff($init_file,$final_file) \ + [filecmp $init_file.sort $final_file.sort] 0 + } else { + # Make sure this really is one of the prepare tests + error_check_good assert:prepare-test $op "prepare" + } + + # Running recovery on this database should not do anything. + # Flush all data to disk, close the environment and save the + # file. + # XXX DO NOT CLOSE FILE ON PREPARE -- if you are prepared, + # you really have an active transaction and you're not allowed + # to close files that are being acted upon by in-process + # transactions. + if { $op != "prepare" } { + error_check_good close:$db [$db close] 0 + } + + # + # If we are running 'prepare' don't close the env with an + # active transaction. Leave it alone so the close won't + # quietly abort it on us. + if { [is_substr $op "prepare"] != 1 } { + error_check_good log_flush [$env log_flush] 0 + error_check_good envclose [$env close] 0 + } + return +} + +proc op_recover_rec { op op2 dir env_cmd dbfile gidf args} { + global log_log_record_types + global recd_debug + global recd_id + global recd_op + global encrypt + global passwd + source ./include.tcl + + #puts "op_recover_rec: $op $op2 $dir $env_cmd $dbfile $gidf" + + set init_file $dir/t1 + set afterop_file $dir/t2 + set final_file $dir/t3 + + # Keep track of the log types we've seen + if { $log_log_record_types == 1} { + logtrack_read $dir + } + + berkdb debug_check + puts -nonewline "\t\top_recover_rec: Running recovery ... " + flush stdout + + set recargs "-h $dir -c " + if { $encrypt > 0 } { + append recargs " -P $passwd " + } + set stat [catch {eval exec $util_path/db_recover -e $recargs} result] + if { $stat == 1 } { + error "FAIL: Recovery error: $result." + } + puts -nonewline "complete ... " + + # + # We cannot run db_recover here because that will open an env, run + # recovery, then close it, which will abort the outstanding txns. + # We want to do it ourselves. + # + set env [eval $env_cmd] + error_check_good dbenv [is_valid_widget $env env] TRUE + + if {[is_partition_callback $args] == 1 } { + set nodump 1 + } else { + set nodump 0 + } + error_check_good db_verify [verify_dir $testdir "\t\t" 0 1 $nodump] 0 + puts "verified" + + # If we left a txn as prepared, but not aborted or committed, + # we need to do a txn_recover. Make sure we have the same + # number of txns we want. + if { $op == "prepare"} { + set txns [$env txn_recover] + error_check_bad txnrecover [llength $txns] 0 + set gfd [open $gidf r] + set origgid [read -nonewline $gfd] + close $gfd + set txnlist [lindex $txns 0] + set t [lindex $txnlist 0] + set gid [lindex $txnlist 1] + error_check_good gidcompare $gid $origgid + puts "\t\t\tExecuting txn_$op2:$t" + error_check_good txn_$op2:$t [$t $op2] 0 + # + # If we are testing discard, we do need to resolve + # the txn, so get the list again and now abort it. + # + if { $op2 == "discard" } { + set txns [$env txn_recover] + error_check_bad txnrecover [llength $txns] 0 + set txnlist [lindex $txns 0] + set t [lindex $txnlist 0] + set gid [lindex $txnlist 1] + error_check_good gidcompare $gid $origgid + puts "\t\t\tExecuting txn_abort:$t" + error_check_good disc_txn_abort:$t [$t abort] 0 + } + } + + + eval set args $args + eval open_and_dump_file $dir/$dbfile NULL $final_file nop \ + dump_file_direction "-first" "-next" $args + if { $op == "commit" || $op2 == "commit" } { + filesort $afterop_file $afterop_file.sort + filesort $final_file $final_file.sort + error_check_good \ + diff(post-$op,pre-commit):diff($afterop_file,$final_file) \ + [filecmp $afterop_file.sort $final_file.sort] 0 + } else { + filesort $init_file $init_file.sort + filesort $final_file $final_file.sort + error_check_good \ + diff(initial,post-$op):diff($init_file,$final_file) \ + [filecmp $init_file.sort $final_file.sort] 0 + } + + # Now close the environment, substitute a file that will need + # recovery and try running recovery again. + reset_env $env + if { $op == "commit" || $op2 == "commit" } { + catch { file copy -force $dir/$dbfile.init $dir/$dbfile } res + move_file_extent $dir $dbfile init copy + } else { + catch { file copy -force $dir/$dbfile.afterop $dir/$dbfile } res + move_file_extent $dir $dbfile afterop copy + } + + berkdb debug_check + puts -nonewline "\t\tRunning recovery on pre-op database ... " + flush stdout + + set stat [catch {eval exec $util_path/db_recover $recargs} result] + if { $stat == 1 } { + error "FAIL: Recovery error: $result." + } + puts -nonewline "complete ... " + + error_check_good db_verify_preop \ + [verify_dir $testdir "\t\t" 0 1 $nodump] 0 + + puts "verified" + + set env [eval $env_cmd] + + eval open_and_dump_file $dir/$dbfile NULL $final_file nop \ + dump_file_direction "-first" "-next" $args + if { $op == "commit" || $op2 == "commit" } { + filesort $final_file $final_file.sort + filesort $afterop_file $afterop_file.sort + error_check_good \ + diff(post-$op,recovered):diff($afterop_file,$final_file) \ + [filecmp $afterop_file.sort $final_file.sort] 0 + } else { + filesort $init_file $init_file.sort + filesort $final_file $final_file.sort + error_check_good \ + diff(initial,post-$op):diff($init_file,$final_file) \ + [filecmp $init_file.sort $final_file.sort] 0 + } + + # This should just close the environment, not blow it away. + reset_env $env +} + +proc populate { db method txn n dups bigdata } { + source ./include.tcl + + # Handle non-transactional cases, too. + set t "" + if { [llength $txn] > 0 } { + set t " -txn $txn " + } + + set did [open $dict] + set count 0 + while { [gets $did str] != -1 && $count < $n } { + if { [is_record_based $method] == 1 } { + set key [expr $count + 1] + } elseif { $dups == 1 } { + set key duplicate_key + } else { + set key $str + } + if { $bigdata == 1 && [berkdb random_int 1 3] == 1} { + set str [replicate $str 1000] + } + + set ret [eval {$db put} $t {$key [chop_data $method $str]}] + error_check_good db_put:$key $ret 0 + incr count + } + close $did + return 0 +} + +proc big_populate { db txn n } { + source ./include.tcl + + set did [open $dict] + set count 0 + while { [gets $did str] != -1 && $count < $n } { + set key [replicate $str 50] + set ret [$db put -txn $txn $key $str] + error_check_good db_put:$key $ret 0 + incr count + } + close $did + return 0 +} + +proc unpopulate { db txn num } { + source ./include.tcl + + set c [eval {$db cursor} "-txn $txn"] + error_check_bad $db:cursor $c NULL + error_check_good $db:cursor [is_substr $c $db] 1 + + set i 0 + for {set d [$c get -first] } { [llength $d] != 0 } { + set d [$c get -next] } { + $c del + incr i + if { $num != 0 && $i >= $num } { + break + } + } + error_check_good cursor_close [$c close] 0 + return 0 +} + +# Flush logs for txn envs only. +proc reset_env { env } { + if { [is_txnenv $env] } { + error_check_good log_flush [$env log_flush] 0 + } + error_check_good env_close [$env close] 0 +} + +proc maxlocks { myenv locker_id obj_id num } { + return [countlocks $myenv $locker_id $obj_id $num ] +} + +proc maxwrites { myenv locker_id obj_id num } { + return [countlocks $myenv $locker_id $obj_id $num ] +} + +proc minlocks { myenv locker_id obj_id num } { + return [countlocks $myenv $locker_id $obj_id $num ] +} + +proc minwrites { myenv locker_id obj_id num } { + return [countlocks $myenv $locker_id $obj_id $num ] +} + +proc countlocks { myenv locker_id obj_id num } { + set locklist "" + for { set i 0} {$i < [expr $obj_id * 4]} { incr i } { + set r [catch {$myenv lock_get read $locker_id \ + [expr $obj_id * 1000 + $i]} l ] + if { $r != 0 } { + puts $l + return ERROR + } else { + error_check_good lockget:$obj_id [is_substr $l $myenv] 1 + lappend locklist $l + } + } + + # Now acquire one write lock, except for obj_id 1, which doesn't + # acquire any. We'll use obj_id 1 to test minwrites. + if { $obj_id != 1 } { + set r [catch {$myenv lock_get write $locker_id \ + [expr $obj_id * 1000 + 10]} l ] + if { $r != 0 } { + puts $l + return ERROR + } else { + error_check_good lockget:$obj_id [is_substr $l $myenv] 1 + lappend locklist $l + } + } + + # Get one extra write lock for obj_id 2. We'll use + # obj_id 2 to test maxwrites. + # + if { $obj_id == 2 } { + set extra [catch {$myenv lock_get write \ + $locker_id [expr $obj_id * 1000 + 11]} l ] + if { $extra != 0 } { + puts $l + return ERROR + } else { + error_check_good lockget:$obj_id [is_substr $l $myenv] 1 + lappend locklist $l + } + } + + set ret [ring $myenv $locker_id $obj_id $num] + + foreach l $locklist { + error_check_good lockput:$l [$l put] 0 + } + + return $ret +} + +# This routine will let us obtain a ring of deadlocks. +# Each locker will get a lock on obj_id, then sleep, and +# then try to lock (obj_id + 1) % num. +# When the lock is finally granted, we release our locks and +# return 1 if we got both locks and DEADLOCK if we deadlocked. +# The results here should be that 1 locker deadlocks and the +# rest all finish successfully. +proc ring { myenv locker_id obj_id num } { + source ./include.tcl + + if {[catch {$myenv lock_get write $locker_id $obj_id} lock1] != 0} { + puts $lock1 + return ERROR + } else { + error_check_good lockget:$obj_id [is_substr $lock1 $myenv] 1 + } + + tclsleep 30 + set nextobj [expr ($obj_id + 1) % $num] + set ret 1 + if {[catch {$myenv lock_get write $locker_id $nextobj} lock2] != 0} { + if {[string match "*DEADLOCK*" $lock2] == 1} { + set ret DEADLOCK + } else { + if {[string match "*NOTGRANTED*" $lock2] == 1} { + set ret DEADLOCK + } else { + puts $lock2 + set ret ERROR + } + } + } else { + error_check_good lockget:$obj_id [is_substr $lock2 $myenv] 1 + } + + # Now release the first lock + error_check_good lockput:$lock1 [$lock1 put] 0 + + if {$ret == 1} { + error_check_bad lockget:$obj_id $lock2 NULL + error_check_good lockget:$obj_id [is_substr $lock2 $myenv] 1 + error_check_good lockput:$lock2 [$lock2 put] 0 + } + return $ret +} + +# This routine will create massive deadlocks. +# Each locker will get a readlock on obj_id, then sleep, and +# then try to upgrade the readlock to a write lock. +# When the lock is finally granted, we release our first lock and +# return 1 if we got both locks and DEADLOCK if we deadlocked. +# The results here should be that 1 locker succeeds in getting all +# the locks and everyone else deadlocks. +proc clump { myenv locker_id obj_id num } { + source ./include.tcl + + set obj_id 10 + if {[catch {$myenv lock_get read $locker_id $obj_id} lock1] != 0} { + puts $lock1 + return ERROR + } else { + error_check_good lockget:$obj_id \ + [is_valid_lock $lock1 $myenv] TRUE + } + + tclsleep 30 + set ret 1 + if {[catch {$myenv lock_get write $locker_id $obj_id} lock2] != 0} { + if {[string match "*DEADLOCK*" $lock2] == 1} { + set ret DEADLOCK + } else { + if {[string match "*NOTGRANTED*" $lock2] == 1} { + set ret DEADLOCK + } else { + puts $lock2 + set ret ERROR + } + } + } else { + error_check_good \ + lockget:$obj_id [is_valid_lock $lock2 $myenv] TRUE + } + + # Now release the first lock + error_check_good lockput:$lock1 [$lock1 put] 0 + + if {$ret == 1} { + error_check_good \ + lockget:$obj_id [is_valid_lock $lock2 $myenv] TRUE + error_check_good lockput:$lock2 [$lock2 put] 0 + } + return $ret +} + +proc dead_check { t procs timeout dead clean other } { + error_check_good $t:$procs:other $other 0 + switch $t { + ring { + # With timeouts the number of deadlocks is + # unpredictable: test for at least one deadlock. + if { $timeout != 0 && $dead > 1 } { + set clean [ expr $clean + $dead - 1] + set dead 1 + } + error_check_good $t:$procs:deadlocks $dead 1 + error_check_good $t:$procs:success $clean \ + [expr $procs - 1] + } + clump { + # With timeouts the number of deadlocks is + # unpredictable: test for no more than one + # successful lock. + if { $timeout != 0 && $dead == $procs } { + set clean 1 + set dead [expr $procs - 1] + } + error_check_good $t:$procs:deadlocks $dead \ + [expr $procs - 1] + error_check_good $t:$procs:success $clean 1 + } + oldyoung { + error_check_good $t:$procs:deadlocks $dead 1 + error_check_good $t:$procs:success $clean \ + [expr $procs - 1] + } + maxlocks { + error_check_good $t:$procs:deadlocks $dead 1 + error_check_good $t:$procs:success $clean \ + [expr $procs - 1] + } + maxwrites { + error_check_good $t:$procs:deadlocks $dead 1 + error_check_good $t:$procs:success $clean \ + [expr $procs - 1] + } + minlocks { + error_check_good $t:$procs:deadlocks $dead 1 + error_check_good $t:$procs:success $clean \ + [expr $procs - 1] + } + minwrites { + error_check_good $t:$procs:deadlocks $dead 1 + error_check_good $t:$procs:success $clean \ + [expr $procs - 1] + } + default { + error "Test $t not implemented" + } + } +} + +proc rdebug { id op where } { + global recd_debug + global recd_id + global recd_op + + set recd_debug $where + set recd_id $id + set recd_op $op +} + +proc rtag { msg id } { + set tag [lindex $msg 0] + set tail [expr [string length $tag] - 2] + set tag [string range $tag $tail $tail] + if { $id == $tag } { + return 1 + } else { + return 0 + } +} + +proc zero_list { n } { + set ret "" + while { $n > 0 } { + lappend ret 0 + incr n -1 + } + return $ret +} + +proc check_dump { k d } { + puts "key: $k data: $d" +} + +proc reverse { s } { + set res "" + for { set i 0 } { $i < [string length $s] } { incr i } { + set res "[string index $s $i]$res" + } + + return $res +} + +# +# This is a internal only proc. All tests should use 'is_valid_db' etc. +# +proc is_valid_widget { w expected } { + # First N characters must match "expected" + set l [string length $expected] + incr l -1 + if { [string compare [string range $w 0 $l] $expected] != 0 } { + return $w + } + + # Remaining characters must be digits + incr l 1 + for { set i $l } { $i < [string length $w] } { incr i} { + set c [string index $w $i] + if { $c < "0" || $c > "9" } { + return $w + } + } + + return TRUE +} + +proc is_valid_db { db } { + return [is_valid_widget $db db] +} + +proc is_valid_env { env } { + return [is_valid_widget $env env] +} + +proc is_valid_cursor { dbc db } { + return [is_valid_widget $dbc $db.c] +} + +proc is_valid_lock { lock env } { + return [is_valid_widget $lock $env.lock] +} + +proc is_valid_logc { logc env } { + return [is_valid_widget $logc $env.logc] +} + +proc is_valid_mpool { mpool env } { + return [is_valid_widget $mpool $env.mp] +} + +proc is_valid_page { page mpool } { + return [is_valid_widget $page $mpool.pg] +} + +proc is_valid_txn { txn env } { + return [is_valid_widget $txn $env.txn] +} + +proc is_valid_locker {l } { + return [is_valid_widget $l ""] +} + +proc is_valid_seq { seq } { + return [is_valid_widget $seq seq] +} + +proc send_cmd { fd cmd {sleep 2}} { + source ./include.tcl + + puts $fd "if \[catch {set v \[$cmd\] ; puts \$v} ret\] { \ + puts \"FAIL: \$ret\" \ + }" + puts $fd "flush stdout" + flush $fd + berkdb debug_check + tclsleep $sleep + + set r [rcv_result $fd] + return $r +} + +proc rcv_result { fd } { + global errorInfo + + set r [gets $fd result] + if { $r == -1 } { + puts "FAIL: gets returned -1 (EOF)" + puts "FAIL: errorInfo is $errorInfo" + } + + return $result +} + +proc send_timed_cmd { fd rcv_too cmd } { + set c1 "set start \[timestamp -r\]; " + set c2 "puts \[expr \[timestamp -r\] - \$start\]" + set full_cmd [concat $c1 $cmd ";" $c2] + + puts $fd $full_cmd + puts $fd "flush stdout" + flush $fd + return 0 +} + +# +# The rationale behind why we have *two* "data padding" routines is outlined +# below: +# +# Both pad_data and chop_data truncate data that is too long. However, +# pad_data also adds the pad character to pad data out to the fixed length +# record length. +# +# Which routine you call does not depend on the length of the data you're +# using, but on whether you're doing a put or a get. When we do a put, we +# have to make sure the data isn't longer than the size of a record because +# otherwise we'll get an error (use chop_data). When we do a get, we want to +# check that db padded everything correctly (use pad_data on the value against +# which we are comparing). +# +# We don't want to just use the pad_data routine for both purposes, because +# we want to be able to test whether or not db is padding correctly. For +# example, the queue access method had a bug where when a record was +# overwritten (*not* a partial put), only the first n bytes of the new entry +# were written, n being the new entry's (unpadded) length. So, if we did +# a put with key,value pair (1, "abcdef") and then a put (1, "z"), we'd get +# back (1,"zbcdef"). If we had used pad_data instead of chop_data, we would +# have gotten the "correct" result, but we wouldn't have found this bug. +proc chop_data {method data} { + global fixed_len + + if {[is_fixed_length $method] == 1 && \ + [string length $data] > $fixed_len} { + return [eval {binary format a$fixed_len $data}] + } else { + return $data + } +} + +proc pad_data {method data} { + global fixed_len + + if {[is_fixed_length $method] == 1} { + return [eval {binary format a$fixed_len $data}] + } else { + return $data + } +} + +# +# The make_fixed_length proc is used in special circumstances where we +# absolutely need to send in data that is already padded out to the fixed +# length with a known pad character. Most tests should use chop_data and +# pad_data, not this. +# +proc make_fixed_length {method data {pad 0}} { + global fixed_len + + if {[is_fixed_length $method] == 1} { + set data [chop_data $method $data] + while { [string length $data] < $fixed_len } { + set data [format $data%c $pad] + } + } + return $data +} + +proc make_gid {data} { + while { [string length $data] < 128 } { + set data [format ${data}0] + } + return $data +} + +# shift data for partial +# pad with fixed pad (which is NULL) +proc partial_shift { data offset direction} { + global fixed_len + + set len [expr $fixed_len - 1] + + if { [string compare $direction "right"] == 0 } { + for { set i 1} { $i <= $offset } {incr i} { + set data [binary format x1a$len $data] + } + } elseif { [string compare $direction "left"] == 0 } { + for { set i 1} { $i <= $offset } {incr i} { + set data [string range $data 1 end] + set data [binary format a$len $data] + } + } + return $data +} + +# string compare does not always work to compare +# this data, nor does expr (==) +# specialized routine for comparison +# (for use in fixed len recno and q) +proc binary_compare { data1 data2 } { + if { [string length $data1] != [string length $data2] || \ + [string compare -length \ + [string length $data1] $data1 $data2] != 0 } { + return 1 + } else { + return 0 + } +} + +# This is a comparison function used with the lsort command. +# It treats its inputs as 32 bit signed integers for comparison, +# and is coded to work with both 32 bit and 64 bit versions of tclsh. +proc int32_compare { val1 val2 } { + # Big is set to 2^32 on a 64 bit machine, or 0 on 32 bit machine. + set big [expr 0xffffffff + 1] + if { $val1 >= 0x80000000 } { + set val1 [expr $val1 - $big] + } + if { $val2 >= 0x80000000 } { + set val2 [expr $val2 - $big] + } + return [expr $val1 - $val2] +} + +proc convert_method { method } { + switch -- $method { + -btree - + -dbtree - + dbtree - + -ddbtree - + ddbtree - + -rbtree - + BTREE - + DB_BTREE - + DB_RBTREE - + RBTREE - + bt - + btree - + db_btree - + db_rbtree - + rbt - + rbtree { return "-btree" } + + -dhash - + -ddhash - + -hash - + DB_HASH - + HASH - + dhash - + ddhash - + db_hash - + h - + hash { return "-hash" } + + -queue - + DB_QUEUE - + QUEUE - + db_queue - + q - + qam - + queue - + -iqueue - + DB_IQUEUE - + IQUEUE - + db_iqueue - + iq - + iqam - + iqueue { return "-queue" } + + -queueextent - + QUEUEEXTENT - + qe - + qamext - + -queueext - + queueextent - + queueext - + -iqueueextent - + IQUEUEEXTENT - + iqe - + iqamext - + -iqueueext - + iqueueextent - + iqueueext { return "-queue" } + + -frecno - + -recno - + -rrecno - + DB_FRECNO - + DB_RECNO - + DB_RRECNO - + FRECNO - + RECNO - + RRECNO - + db_frecno - + db_recno - + db_rrecno - + frec - + frecno - + rec - + recno - + rrec - + rrecno { return "-recno" } + + -heap - + -db_heap - + heap - + db_heap - + HEAP - + DB_HEAP {return "-heap" } + + + default { error "FAIL:[timestamp] $method: unknown method" } + } +} + +proc split_partition_args { largs } { + + # First check for -partition_callback, in which case we + # need to remove three args. + set index [lsearch $largs "-partition_callback"] + if { $index == -1 } { + set newl $largs + } else { + set end [expr $index + 2] + set newl [lreplace $largs $index $end] + } + + # Then check for -partition, and remove two args. + set index [lsearch $newl "-partition"] + if { $index > -1 } { + set end [expr $index + 1] + set newl [lreplace $largs $index $end] + } + + return $newl +} + +# Strip "-compress" out of a string of args. +proc strip_compression_args { largs } { + + set cindex [lsearch $largs "-compress"] + if { $cindex == -1 } { + set newargs $largs + } else { + set newargs [lreplace $largs $cindex $cindex] + } + return $newargs +} + +proc split_encargs { largs encargsp } { + global encrypt + upvar $encargsp e + set eindex [lsearch $largs "-encrypta*"] + if { $eindex == -1 } { + set e "" + set newl $largs + } else { + set eend [expr $eindex + 1] + set e [lrange $largs $eindex $eend] + set newl [lreplace $largs $eindex $eend "-encrypt"] + } + return $newl +} + +proc split_pageargs { largs pageargsp } { + upvar $pageargsp e + set eindex [lsearch $largs "-pagesize"] + if { $eindex == -1 } { + set e "" + set newl $largs + } else { + set eend [expr $eindex + 1] + set e [lrange $largs $eindex $eend] + set newl [lreplace $largs $eindex $eend ""] + } + return $newl +} + +proc convert_encrypt { largs } { + global encrypt + global old_encrypt + + set old_encrypt $encrypt + set encrypt 0 + if { [lsearch $largs "-encrypt*"] != -1 } { + set encrypt 1 + } +} + +# If recno-with-renumbering or btree-with-renumbering is specified, then +# fix the arguments to specify the DB_RENUMBER/DB_RECNUM option for the +# -flags argument. +proc convert_args { method {largs ""} } { + global fixed_len + global gen_upgrade + global upgrade_be + source ./include.tcl + + if { [string first - $largs] == -1 &&\ + [string compare $largs ""] != 0 &&\ + [string compare $largs {{}}] != 0 } { + set errstring "args must contain a hyphen; does this test\ + have no numeric args?" + puts "FAIL:[timestamp] $errstring (largs was $largs)" + return -code return + } + + convert_encrypt $largs + if { $gen_upgrade == 1 && $upgrade_be == 1 } { + append largs " -lorder 4321 " + } elseif { $gen_upgrade == 1 && $upgrade_be != 1 } { + append largs " -lorder 1234 " + } + + if { [is_rrecno $method] == 1 } { + append largs " -renumber " + } elseif { [is_rbtree $method] == 1 } { + append largs " -recnum " + } elseif { [is_dbtree $method] == 1 } { + append largs " -dup " + } elseif { [is_ddbtree $method] == 1 } { + append largs " -dup " + append largs " -dupsort " + } elseif { [is_dhash $method] == 1 } { + append largs " -dup " + } elseif { [is_ddhash $method] == 1 } { + append largs " -dup " + append largs " -dupsort " + } elseif { [is_queueext $method] == 1 } { + append largs " -extent 4 " + } + + if { [is_iqueue $method] == 1 || [is_iqueueext $method] == 1 } { + append largs " -inorder " + } + + # Default padding character is ASCII nul. + set fixed_pad 0 + if {[is_fixed_length $method] == 1} { + append largs " -len $fixed_len -pad $fixed_pad " + } + return $largs +} + +proc is_btree { method } { + set names { -btree BTREE DB_BTREE bt btree } + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_dbtree { method } { + set names { -dbtree dbtree } + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_ddbtree { method } { + set names { -ddbtree ddbtree } + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_rbtree { method } { + set names { -rbtree rbtree RBTREE db_rbtree DB_RBTREE rbt } + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_heap { method } { + set names { -heap DB_HEAP HEAP db_heap heap} + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + + +proc is_recno { method } { + set names { -recno DB_RECNO RECNO db_recno rec recno} + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_rrecno { method } { + set names { -rrecno rrecno RRECNO db_rrecno DB_RRECNO rrec } + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_frecno { method } { + set names { -frecno frecno frec FRECNO db_frecno DB_FRECNO} + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_hash { method } { + set names { -hash DB_HASH HASH db_hash h hash } + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_dhash { method } { + set names { -dhash dhash } + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_ddhash { method } { + set names { -ddhash ddhash } + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_queue { method } { + if { [is_queueext $method] == 1 || [is_iqueue $method] == 1 || \ + [is_iqueueext $method] == 1 } { + return 1 + } + + set names { -queue DB_QUEUE QUEUE db_queue q queue qam } + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_queueext { method } { + if { [is_iqueueext $method] == 1 } { + return 1 + } + + set names { -queueextent queueextent QUEUEEXTENT qe qamext \ + queueext -queueext } + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_iqueue { method } { + if { [is_iqueueext $method] == 1 } { + return 1 + } + + set names { -iqueue DB_IQUEUE IQUEUE db_iqueue iq iqueue iqam } + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_iqueueext { method } { + set names { -iqueueextent iqueueextent IQUEUEEXTENT iqe iqamext \ + iqueueext -iqueueext } + if { [lsearch $names $method] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_record_based { method } { + if { [is_recno $method] || [is_frecno $method] || + [is_heap $method] || + [is_rrecno $method] || [is_queue $method] } { + return 1 + } else { + return 0 + } +} + +proc is_fixed_length { method } { + if { [is_queue $method] || [is_frecno $method] } { + return 1 + } else { + return 0 + } +} + +proc is_compressed { args } { + if { [string first "-compress" $args] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_partitioned { args } { + if { [string first "-partition" $args] >= 0 } { + return 1 + } else { + return 0 + } +} + +proc is_partition_callback { args } { + if { [string first "-partition_callback" $args] >= 0 } { + return 1 + } else { + return 0 + } +} + +# Sort lines in file $in and write results to file $out. +# This is a more portable alternative to execing the sort command, +# which has assorted issues on NT [#1576]. +# The addition of a "-n" argument will sort numerically. +proc filesort { in out { arg "" } } { + set i [open $in r] + + set ilines {} + while { [gets $i line] >= 0 } { + lappend ilines $line + } + + if { [string compare $arg "-n"] == 0 } { + set olines [lsort -integer $ilines] + } else { + set olines [lsort $ilines] + } + + close $i + + set o [open $out w] + foreach line $olines { + puts $o $line + } + + close $o +} + +# Print lines up to the nth line of infile out to outfile, inclusive. +# The optional beg argument tells us where to start. +proc filehead { n infile outfile { beg 0 } } { + set in [open $infile r] + set out [open $outfile w] + + # Sed uses 1-based line numbers, and so we do too. + for { set i 1 } { $i < $beg } { incr i } { + if { [gets $in junk] < 0 } { + break + } + } + + for { } { $i <= $n } { incr i } { + if { [gets $in line] < 0 } { + break + } + puts $out $line + } + + close $in + close $out +} + +# Remove file (this replaces $RM). +# Usage: fileremove filenames =~ rm; fileremove -f filenames =~ rm -rf. +proc fileremove { args } { + set forceflag "" + foreach a $args { + if { [string first - $a] == 0 } { + # It's a flag. Better be f. + if { [string first f $a] != 1 } { + return -code error "bad flag to fileremove" + } else { + set forceflag "-force" + } + } else { + eval {file delete $forceflag $a} + } + } +} + +# Look for a certain character string on a single line in one file. +proc findstring { string file } { + + set found 0 + if { [file exists $file] == 0 } { + error "FAIL: file $file does not exist" + } + set f [open $file r] + while { [gets $f line] >= 0 } { + if { [is_substr $line $string] == 1 } { + set found 1 + close $f + return $found + } + } + close $f + return $found +} + +proc findfail { args } { + set errstring {} + foreach a $args { + if { [file exists $a] == 0 } { + continue + } + set f [open $a r] + while { [gets $f line] >= 0 } { + if { [string first FAIL $line] == 0 } { + lappend errstring $a:$line + } + } + close $f + } + return $errstring +} + +# Sleep for s seconds. +proc tclsleep { s } { + # On Windows, the system time-of-day clock may update as much + # as 55 ms late due to interrupt timing. Don't take any + # chances; sleep extra-long so that when tclsleep 1 returns, + # it's guaranteed to be a new second. + after [expr $s * 1000 + 56] +} + +# Kill a process. +proc tclkill { id } { + source ./include.tcl + + while { [ catch {exec $KILL -0 $id} ] == 0 } { + catch {exec $KILL -9 $id} + tclsleep 5 + } +} + +# Compare two files, a la diff. Returns 1 if non-identical, 0 if identical. +proc filecmp { file_a file_b } { + set fda [open $file_a r] + set fdb [open $file_b r] + + fconfigure $fda -translation binary + fconfigure $fdb -translation binary + + set nra 0 + set nrb 0 + + # The gets can't be in the while condition because we'll + # get short-circuit evaluated. + while { $nra >= 0 && $nrb >= 0 } { + set nra [gets $fda aline] + set nrb [gets $fdb bline] + + if { $nra != $nrb || [string compare $aline $bline] != 0} { + close $fda + close $fdb + return 1 + } + } + + close $fda + close $fdb + return 0 +} + +# Compare the log files from 2 envs. Returns 1 if non-identical, +# 0 if identical. +proc logcmp { env1 env2 { compare_shared_portion 0 } } { + set lc1 [$env1 log_cursor] + set lc2 [$env2 log_cursor] + + # If we're comparing the full set of logs in both envs, + # set the starting point by looking at the first LSN in the + # first env's logs. + # + # If we are comparing only the shared portion, look at the + # starting LSN of the second env as well, and select the + # LSN that is larger. + + set start [lindex [$lc1 get -first] 0] + + if { $compare_shared_portion } { + set e2_lsn [lindex [$lc2 get -first] 0] + if { [$env1 log_compare $start $e2_lsn] < 0 } { + set start $e2_lsn + } + } + + # Read through and compare the logs record by record. + for { set l1 [$lc1 get -set $start] ; set l2 [$lc2 get -set $start] }\ + { [llength $l1] > 0 && [llength $l2] > 0 }\ + { set l1 [$lc1 get -next] ; set l2 [$lc2 get -next] } { + if { [string equal $l1 $l2] != 1 } { + $lc1 close + $lc2 close +#puts "l1 is $l1" +#puts "l2 is $l2" + return 1 + } + } + $lc1 close + $lc2 close + return 0 +} + +# Give two SORTED files, one of which is a complete superset of the other, +# extract out the unique portions of the superset and put them in +# the given outfile. +proc fileextract { superset subset outfile } { + set sup [open $superset r] + set sub [open $subset r] + set outf [open $outfile w] + + # The gets can't be in the while condition because we'll + # get short-circuit evaluated. + set nrp [gets $sup pline] + set nrb [gets $sub bline] + while { $nrp >= 0 } { + if { $nrp != $nrb || [string compare $pline $bline] != 0} { + puts $outf $pline + } else { + set nrb [gets $sub bline] + } + set nrp [gets $sup pline] + } + + close $sup + close $sub + close $outf + return 0 +} + +# Verify all .db files in the specified directory. +proc verify_dir { {directory $testdir} { pref "" } \ + { noredo 0 } { quiet 0 } { nodump 0 } { cachesize 0 } { unref 1 } } { + global encrypt + global passwd + + # If we're doing database verification between tests, we don't + # want to do verification twice without an intervening cleanup--some + # test was skipped. Always verify by default (noredo == 0) so + # that explicit calls to verify_dir during tests don't require + # cleanup commands. + if { $noredo == 1 } { + if { [file exists $directory/NOREVERIFY] == 1 } { + if { $quiet == 0 } { + puts "Skipping verification." + } + return 0 + } + set f [open $directory/NOREVERIFY w] + close $f + } + + if { [catch {glob $directory/*.db} dbs] != 0 } { + # No files matched + return 0 + } + set ret 0 + + # Open an env, so that we have a large enough cache. Pick + # a fairly generous default if we haven't specified something else. + + if { $cachesize == 0 } { + set cachesize [expr 1024 * 1024] + } + set encarg "" + if { $encrypt != 0 } { + set encarg "-encryptaes $passwd" + } + + set env [eval {berkdb_env -create -private} $encarg \ + {-cachesize [list 0 $cachesize 0]}] + set earg " -env $env " + + # The 'unref' flag means that we report unreferenced pages + # at all times. This is the default behavior. + # If we have a test which leaves unreferenced pages on systems + # where HAVE_FTRUNCATE is not on, then we call verify_dir with + # unref == 0. + set uflag "-unref" + if { $unref == 0 } { + set uflag "" + } + + foreach db $dbs { + # Replication's temp db uses a custom comparison function, + # so we can't verify it. + # + if { [file tail $db] == "__db.rep.db" } { + continue + } + if { [catch \ + {eval {berkdb dbverify} $uflag $earg $db} res] != 0 } { + puts $res + puts "FAIL:[timestamp] Verification of $db failed." + set ret 1 + continue + } else { + error_check_good verify:$db $res 0 + if { $quiet == 0 } { + puts "${pref}Verification of $db succeeded." + } + } + + # Skip the dump if it's dangerous to do it. + if { $nodump == 0 } { + if { [catch {eval dumploadtest $db} res] != 0 } { + puts $res + puts "FAIL:[timestamp] Dump/load of $db failed." + set ret 1 + continue + } else { + error_check_good dumpload:$db $res 0 + if { $quiet == 0 } { + puts \ + "${pref}Dump/load of $db succeeded." + } + } + } + } + + error_check_good vrfyenv_close [$env close] 0 + + return $ret +} + +# Is the database handle in $db a master database containing subdbs? +proc check_for_subdbs { db } { + set stat [$db stat] + for { set i 0 } { [string length [lindex $stat $i]] > 0 } { incr i } { + set elem [lindex $stat $i] + if { [string compare [lindex $elem 0] Flags] == 0 } { + # This is the list of flags; look for + # "subdatabases". + if { [is_substr [lindex $elem 1] subdatabases] } { + return 1 + } + } + } + return 0 +} + +proc db_compare { olddb newdb olddbname newdbname } { + # Walk through olddb and newdb and make sure their contents + # are identical. + set oc [$olddb cursor] + set nc [$newdb cursor] + error_check_good orig_cursor($olddbname) \ + [is_valid_cursor $oc $olddb] TRUE + error_check_good new_cursor($olddbname) \ + [is_valid_cursor $nc $newdb] TRUE + + for { set odbt [$oc get -first -nolease] } { [llength $odbt] > 0 } \ + { set odbt [$oc get -next -nolease] } { + set ndbt [$nc get -get_both -nolease \ + [lindex [lindex $odbt 0] 0] [lindex [lindex $odbt 0] 1]] + if { [binary_compare $ndbt $odbt] == 1 } { + error_check_good oc_close [$oc close] 0 + error_check_good nc_close [$nc close] 0 +# puts "FAIL: $odbt does not match $ndbt" + return 1 + } + } + + for { set ndbt [$nc get -first -nolease] } { [llength $ndbt] > 0 } \ + { set ndbt [$nc get -next -nolease] } { + set odbt [$oc get -get_both -nolease \ + [lindex [lindex $ndbt 0] 0] [lindex [lindex $ndbt 0] 1]] + if { [binary_compare $ndbt $odbt] == 1 } { + error_check_good oc_close [$oc close] 0 + error_check_good nc_close [$nc close] 0 +# puts "FAIL: $odbt does not match $ndbt" + return 1 + } + } + + error_check_good orig_cursor_close($olddbname) [$oc close] 0 + error_check_good new_cursor_close($newdbname) [$nc close] 0 + + return 0 +} + +proc dumploadtest { db } { + global util_path + global encrypt + global passwd + + set newdbname $db-dumpload.db + + set dbarg "" + set utilflag "" + set keyflag "-k" + set heapdb 0 + + if { $encrypt != 0 } { + set dbarg "-encryptany $passwd" + set utilflag "-P $passwd" + } + + # Open original database to find dbtype. + set olddb [eval {berkdb_open -rdonly} $dbarg $db] + error_check_good olddb($db) [is_valid_db $olddb] TRUE + if { [is_heap [$olddb get_type]] } { + set heapdb 1 + set keyflag "" + } + error_check_good orig_db_close($db) [$olddb close] 0 + + set dumpflags "$utilflag $keyflag" + + # Dump/load the whole file, including all subdbs. + set rval [catch {eval {exec $util_path/db_dump} $dumpflags \ + $db | $util_path/db_load $utilflag $newdbname} res] + error_check_good db_dump/db_load($db:$res) $rval 0 + + # If the old file was empty, there's no new file and we're done. + if { [file exists $newdbname] == 0 } { + return 0 + } + + # Dump/load doesn't preserve order in a heap db, don't run db_compare + if { $heapdb == 1 } { + eval berkdb dbremove $dbarg $newdbname + return 0 + } + + # Open original database. + set olddb [eval {berkdb_open -rdonly} $dbarg $db] + error_check_good olddb($db) [is_valid_db $olddb] TRUE + + if { [check_for_subdbs $olddb] } { + # If $db has subdatabases, compare each one separately. + set oc [$olddb cursor] + error_check_good orig_cursor($db) \ + [is_valid_cursor $oc $olddb] TRUE + + for { set dbt [$oc get -first] } \ + { [llength $dbt] > 0 } \ + { set dbt [$oc get -next] } { + set subdb [lindex [lindex $dbt 0] 0] + + set oldsubdb \ + [eval {berkdb_open -rdonly} $dbarg {$db $subdb}] + error_check_good olddb($db) [is_valid_db $oldsubdb] TRUE + + # Open the new database. + set newdb \ + [eval {berkdb_open -rdonly} $dbarg {$newdbname $subdb}] + error_check_good newdb($db) [is_valid_db $newdb] TRUE + + db_compare $oldsubdb $newdb $db $newdbname + error_check_good new_db_close($db) [$newdb close] 0 + error_check_good old_subdb_close($oldsubdb) [$oldsubdb close] 0 + } + + error_check_good oldcclose [$oc close] 0 + } else { + # Open the new database. + set newdb [eval {berkdb_open -rdonly} $dbarg $newdbname] + error_check_good newdb($db) [is_valid_db $newdb] TRUE + db_compare $olddb $newdb $db $newdbname + error_check_good new_db_close($db) [$newdb close] 0 + } + + error_check_good orig_db_close($db) [$olddb close] 0 + eval berkdb dbremove $dbarg $newdbname +} + +# Test regular and aggressive salvage procedures for all databases +# in a directory. +proc salvage_dir { dir { noredo 0 } { quiet 0 } } { + global util_path + global encrypt + global passwd + + # If we're doing salvage testing between tests, don't do it + # twice without an intervening cleanup. + if { $noredo == 1 } { + if { [file exists $dir/NOREDO] == 1 } { + if { $quiet == 0 } { + puts "Skipping salvage testing." + } + return 0 + } + set f [open $dir/NOREDO w] + close $f + } + + if { [catch {glob $dir/*.db} dbs] != 0 } { + # No files matched + return 0 + } + + foreach db $dbs { + set dumpfile $db-dump + set sorteddump $db-dump-sorted + set salvagefile $db-salvage + set sortedsalvage $db-salvage-sorted + set aggsalvagefile $db-aggsalvage + + set dbarg "" + set utilflag "" + if { $encrypt != 0 } { + set dbarg "-encryptany $passwd" + set utilflag "-P $passwd" + } + + # Dump the database with salvage, with aggressive salvage, + # and without salvage. + # + set rval [catch {eval {exec $util_path/db_dump} $utilflag -r \ + -f $salvagefile $db} res] + error_check_good salvage($db:$res) $rval 0 + filesort $salvagefile $sortedsalvage + + # We can't avoid occasional verify failures in aggressive + # salvage. Make sure it's the expected failure. + set rval [catch {eval {exec $util_path/db_dump} $utilflag -R \ + -f $aggsalvagefile $db} res] + if { $rval == 1 } { +#puts "res is $res" + error_check_good agg_failure \ + [is_substr $res "DB_VERIFY_BAD"] 1 + } else { + error_check_good aggressive_salvage($db:$res) $rval 0 + } + + # Queue databases must be dumped with -k to display record + # numbers if we're not in salvage mode. + if { [isqueuedump $salvagefile] == 1 } { + append utilflag " -k " + } + + # Discard db_pagesize lines from file dumped with ordinary + # db_dump -- they are omitted from a salvage dump. + set rval [catch {eval {exec $util_path/db_dump} $utilflag \ + -f $dumpfile $db} res] + error_check_good dump($db:$res) $rval 0 + filesort $dumpfile $sorteddump + discardline $sorteddump TEMPFILE "db_pagesize=" + file copy -force TEMPFILE $sorteddump + + # A non-aggressively salvaged file should match db_dump. + error_check_good compare_dump_and_salvage \ + [filecmp $sorteddump $sortedsalvage] 0 + + puts "Salvage tests of $db succeeded." + } +} + +# Reads infile, writes to outfile, discarding any line whose +# beginning matches the given string. +proc discardline { infile outfile discard } { + set fdin [open $infile r] + set fdout [open $outfile w] + + while { [gets $fdin str] >= 0 } { + if { [string match $discard* $str] != 1 } { + puts $fdout $str + } + } + close $fdin + close $fdout +} + +# Inspects dumped file for "type=" line. Returns 1 if type=queue. +proc isqueuedump { file } { + set fd [open $file r] + + while { [gets $fd str] >= 0 } { + if { [string match type=* $str] == 1 } { + if { [string match "type=queue" $str] == 1 } { + close $fd + return 1 + } else { + close $fd + return 0 + } + } + } + close $fd +} + +# Generate randomly ordered, guaranteed-unique four-character strings that can +# be used to differentiate duplicates without creating duplicate duplicates. +# (test031 & test032) randstring_init is required before the first call to +# randstring and initializes things for up to $i distinct strings; randstring +# gets the next string. +proc randstring_init { i } { + global rs_int_list alphabet + + # Fail if we can't generate sufficient unique strings. + if { $i > [expr 26 * 26 * 26 * 26] } { + set errstring\ + "Duplicate set too large for random string generator" + puts "FAIL:[timestamp] $errstring" + return -code return $errstring + } + + set rs_int_list {} + + # generate alphabet array + for { set j 0 } { $j < 26 } { incr j } { + set a($j) [string index $alphabet $j] + } + + # Generate a list with $i elements, { aaaa, aaab, ... aaaz, aaba ...} + for { set d1 0 ; set j 0 } { $d1 < 26 && $j < $i } { incr d1 } { + for { set d2 0 } { $d2 < 26 && $j < $i } { incr d2 } { + for { set d3 0 } { $d3 < 26 && $j < $i } { incr d3 } { + for { set d4 0 } { $d4 < 26 && $j < $i } \ + { incr d4 } { + lappend rs_int_list \ + $a($d1)$a($d2)$a($d3)$a($d4) + incr j + } + } + } + } + + # Randomize the list. + set rs_int_list [randomize_list $rs_int_list] +} + +# Randomize a list. Returns a randomly-reordered copy of l. +proc randomize_list { l } { + set i [llength $l] + + for { set j 0 } { $j < $i } { incr j } { + # Pick a random element from $j to the end + set k [berkdb random_int $j [expr $i - 1]] + + # Swap it with element $j + set t1 [lindex $l $j] + set t2 [lindex $l $k] + + set l [lreplace $l $j $j $t2] + set l [lreplace $l $k $k $t1] + } + + return $l +} + +proc randstring {} { + global rs_int_list + + if { [info exists rs_int_list] == 0 || [llength $rs_int_list] == 0 } { + set errstring "randstring uninitialized or used too often" + puts "FAIL:[timestamp] $errstring" + return -code return $errstring + } + + set item [lindex $rs_int_list 0] + set rs_int_list [lreplace $rs_int_list 0 0] + + return $item +} + +# Takes a variable-length arg list, and returns a list containing the list of +# the non-hyphenated-flag arguments, followed by a list of each alphanumeric +# flag it finds. +proc extractflags { args } { + set inflags 1 + set flags {} + while { $inflags == 1 } { + set curarg [lindex $args 0] + if { [string first "-" $curarg] == 0 } { + set i 1 + while {[string length [set f \ + [string index $curarg $i]]] > 0 } { + incr i + if { [string compare $f "-"] == 0 } { + set inflags 0 + break + } else { + lappend flags $f + } + } + set args [lrange $args 1 end] + } else { + set inflags 0 + } + } + return [list $args $flags] +} + +# Wrapper for berkdb open, used throughout the test suite so that we can +# set an errfile/errpfx as appropriate. +proc berkdb_open { args } { + global is_envmethod + + if { [info exists is_envmethod] == 0 } { + set is_envmethod 0 + } + + set errargs {} + if { $is_envmethod == 0 } { + append errargs " -errfile /dev/stderr " + append errargs " -errpfx \\F\\A\\I\\L" + } + + eval {berkdb open} $errargs $args +} + +# Version without errpfx/errfile, used when we're expecting a failure. +proc berkdb_open_noerr { args } { + eval {berkdb open} $args +} + +# Wrapper for berkdb env, used throughout the test suite so that we can +# set an errfile/errpfx as appropriate. +proc berkdb_env { args } { + global is_envmethod + + if { [info exists is_envmethod] == 0 } { + set is_envmethod 0 + } + + set errargs {} + if { $is_envmethod == 0 } { + append errargs " -errfile /dev/stderr " + append errargs " -errpfx \\F\\A\\I\\L" + } + + eval {berkdb env} $errargs $args +} + +# Version without errpfx/errfile, used when we're expecting a failure. +proc berkdb_env_noerr { args } { + eval {berkdb env} $args +} + +proc check_handles { {outf stdout} } { + global ohandles + + set handles [berkdb handles] + if {[llength $handles] != [llength $ohandles]} { + puts $outf "WARNING: Open handles during cleanup: $handles" + } + set ohandles $handles +} + +proc open_handles { } { + return [llength [berkdb handles]] +} + +# Will close any database and cursor handles, cursors first. +# Ignores other handles, like env handles. +proc close_db_handles { } { + set handles [berkdb handles] + set db_handles {} + set cursor_handles {} + + # Find the handles we want to process. We can't use + # is_valid_cursor to find cursors because we don't know + # the cursor's parent database handle. + foreach handle $handles { + if {[string range $handle 0 1] == "db"} { + if { [string first "c" $handle] != -1} { + lappend cursor_handles $handle + } else { + lappend db_handles $handle + } + } + } + + foreach handle $cursor_handles { + error_check_good cursor_close [$handle close] 0 + } + foreach handle $db_handles { + error_check_good db_close [$handle close] 0 + } +} + +proc move_file_extent { dir dbfile tag op } { + set curfiles [get_extfiles $dir $dbfile ""] + set tagfiles [get_extfiles $dir $dbfile $tag] + # + # We want to copy or rename only those that have been saved, + # so delete all the current extent files so that we don't + # end up with extra ones we didn't restore from our saved ones. + foreach extfile $curfiles { + file delete -force $extfile + } + foreach extfile $tagfiles { + set dbq [make_ext_filename $dir $dbfile $extfile] + # + # We can either copy or rename + # + file $op -force $extfile $dbq + } +} + +proc copy_extent_file { dir dbfile tag { op copy } } { + set files [get_extfiles $dir $dbfile ""] + foreach extfile $files { + set dbq [make_ext_filename $dir $dbfile $extfile $tag] + file $op -force $extfile $dbq + } +} + +proc get_extfiles { dir dbfile tag } { + if { $tag == "" } { + set filepat $dir/__db?.$dbfile.\[0-9\]* + } else { + set filepat $dir/__db?.$dbfile.$tag.\[0-9\]* + } + return [glob -nocomplain -- $filepat] +} + +proc make_ext_filename { dir dbfile extfile {tag ""}} { + set i [string last "." $extfile] + incr i + set extnum [string range $extfile $i end] + set j [string last "/" $extfile] + incr j + set i [string first "." [string range $extfile $j end]] + incr i $j + incr i -1 + set prefix [string range $extfile $j $i] + if {$tag == "" } { + return $dir/$prefix.$dbfile.$extnum + } else { + return $dir/$prefix.$dbfile.$tag.$extnum + } +} + +# All pids for Windows 9X are negative values. When we want to have +# unsigned int values, unique to the process, we'll take the absolute +# value of the pid. This avoids unsigned/signed mistakes, yet +# guarantees uniqueness, since each system has pids that are all +# either positive or negative. +# +proc sanitized_pid { } { + set mypid [pid] + if { $mypid < 0 } { + set mypid [expr - $mypid] + } + puts "PID: [pid] $mypid\n" + return $mypid +} + +# +# Extract the page size field from a stat record. Return -1 if +# none is found. +# +proc get_pagesize { stat } { + foreach field $stat { + set title [lindex $field 0] + if {[string compare $title "Page size"] == 0} { + return [lindex $field 1] + } + } + return -1 +} + +# Get a globbed list of source files and executables to use as large +# data items in overflow page tests. +proc get_file_list { {small 0} } { + global is_windows_test + global is_qnx_test + global is_je_test + global src_root + + # Skip libraries if we have a debug build. + if { $is_qnx_test || $is_je_test || [is_debug] == 1 } { + set small 1 + } + + if { $small && $is_windows_test } { + set templist [glob $src_root/*/*.c $src_root/*/*/*.c \ + */*/*/env*.obj] + } elseif { $small } { + set templist [glob $src_root/*/*.c $src_root/*/*/*.c \ + ./.libs/env*.o] + } elseif { $is_windows_test } { + set templist \ + [glob $src_root/*/*.c $src_root/*/*/*.c */*/*/*.obj \ + */*/*.exe */*/libdb??.dll */*/libdb??d.dll] + } else { + set templist [glob $src_root/*/*.c $src_root/*/*/*.c \ + ./.libs/*.o ./.libs/libdb-?.?.s?] + } + + # We don't want a huge number of files, but we do want a nice + # variety. If there are more than nfiles files, pick out a list + # by taking every other, or every third, or every nth file. + set filelist {} + set nfiles 500 + if { [llength $templist] > $nfiles } { + set skip \ + [expr [llength $templist] / [expr [expr $nfiles / 3] * 2]] + set i $skip + while { $i < [llength $templist] } { + lappend filelist [lindex $templist $i] + incr i $skip + } + } else { + set filelist $templist + } + return $filelist +} + +proc is_cdbenv { env } { + set sys [$env attributes] + if { [lsearch $sys -cdb] != -1 } { + return 1 + } else { + return 0 + } +} + +proc is_lockenv { env } { + set sys [$env attributes] + if { [lsearch $sys -lock] != -1 } { + return 1 + } else { + return 0 + } +} + +proc is_logenv { env } { + set sys [$env attributes] + if { [lsearch $sys -log] != -1 } { + return 1 + } else { + return 0 + } +} + +proc is_mpoolenv { env } { + set sys [$env attributes] + if { [lsearch $sys -mpool] != -1 } { + return 1 + } else { + return 0 + } +} + +proc is_repenv { env } { + set sys [$env attributes] + if { [lsearch $sys -rep] != -1 } { + return 1 + } else { + return 0 + } +} + +proc is_secenv { env } { + set sys [$env attributes] + if { [lsearch $sys -crypto] != -1 } { + return 1 + } else { + return 0 + } +} + +proc is_txnenv { env } { + set sys [$env attributes] + if { [lsearch $sys -txn] != -1 } { + return 1 + } else { + return 0 + } +} + +proc get_home { env } { + set sys [$env attributes] + set h [lsearch $sys -home] + if { $h == -1 } { + return NULL + } + incr h + return [lindex $sys $h] +} + +proc reduce_dups { nent ndp } { + upvar $nent nentries + upvar $ndp ndups + + # If we are using a txnenv, assume it is using + # the default maximum number of locks, cut back + # so that we don't run out of locks. Reduce + # by 25% until we fit. + # + while { [expr $nentries * $ndups] > 5000 } { + set nentries [expr ($nentries / 4) * 3] + set ndups [expr ($ndups / 4) * 3] + } +} + +proc getstats { statlist field } { + foreach pair $statlist { + set txt [lindex $pair 0] + if { [string equal $txt $field] == 1 } { + return [lindex $pair 1] + } + } + return -1 +} + +# Return the value for a particular field in a set of statistics. +# Works for regular db stat as well as env stats (log_stat, +# lock_stat, txn_stat, rep_stat, etc.). +proc stat_field { handle which_stat field } { + set stat [$handle $which_stat] + return [getstats $stat $field ] +} + +proc big_endian { } { + global tcl_platform + set e $tcl_platform(byteOrder) + if { [string compare $e littleEndian] == 0 } { + return 0 + } elseif { [string compare $e bigEndian] == 0 } { + return 1 + } else { + error "FAIL: Unknown endianness $e" + } +} + +# Check if this is a debug build. Use 'string equal' so we +# don't get fooled by debug_rop and debug_wop. +proc is_debug { } { + + set conf [berkdb getconfig] + foreach item $conf { + if { [string equal $item "debug"] } { + return 1 + } + } + return 0 +} + +proc adjust_logargs { logtype {lbufsize 0} } { + if { $logtype == "in-memory" } { + if { $lbufsize == 0 } { + set lbuf [expr 1 * [expr 1024 * 1024]] + set logargs " -log_inmemory -log_buffer $lbuf " + } else { + set logargs " -log_inmemory -log_buffer $lbufsize " + } + } elseif { $logtype == "on-disk" } { + set logargs "" + } else { + error "FAIL: unrecognized log type $logtype" + } + return $logargs +} + +proc adjust_txnargs { logtype } { + if { $logtype == "in-memory" } { + set txnargs " -txn " + } elseif { $logtype == "on-disk" } { + set txnargs " -txn nosync " + } else { + error "FAIL: unrecognized log type $logtype" + } + return $txnargs +} + +proc get_logfile { env where } { + # Open a log cursor. + set m_logc [$env log_cursor] + error_check_good m_logc [is_valid_logc $m_logc $env] TRUE + + # Check that we're in the expected virtual log file. + if { $where == "first" } { + set rec [$m_logc get -first] + } else { + set rec [$m_logc get -last] + } + error_check_good cursor_close [$m_logc close] 0 + set lsn [lindex $rec 0] + set log [lindex $lsn 0] + return $log +} + +# Determine whether logs are in-mem or on-disk. +# This requires the existence of logs to work correctly. +proc check_log_location { env } { + if { [catch {get_logfile $env first} res] } { + puts "FAIL: env $env not configured for logging" + } + set inmemory [$env log_get_config inmemory] + + set env_home [get_home $env] + set logfiles [glob -nocomplain $env_home/log.*] + if { $inmemory == 1 } { + error_check_good no_logs_on_disk [llength $logfiles] 0 + } else { + error_check_bad logs_on_disk [llength $logfiles] 0 + } +} + +# Verify the logs. +# By default, if we do not specify the directory for temporary environment, +# the temporary databases will be in-memory, which is not good for test, +# Since there may be a lot of logs for verification, which makes the +# temporary databases very large. +proc verify_log { {env_dir $testdir} { tmp_dir lgverify_dir } \ + { mcachesize 10 } { cont 1 } } { + global encrypt + global passwd + global EXE + source ./include.tcl + + set succ_patt {Log verification ended and SUCCEEDED} + if { $mcachesize == 0 } { + set mcachesize 10 + } + set encarg "" + if { $encrypt != 0 } { + set encarg "-encryptaes $passwd" + } + set contarg "" + if { $cont == 1 } { + set contarg "-c" + } + if { $tmp_dir == ""} { + set tmp_dir "lgverify_dir" + } + file delete -force $tmp_dir + file mkdir $tmp_dir + puts -nonewline "Verifying log files .... " + set ret [catch {eval exec $util_path/db_log_verify {-C $mcachesize} \ + $encarg $contarg {-h $testdir} {-H $tmp_dir} >& lgvrfy.log } msg ] + puts "done." + if {$ret} { + puts "FAIL:db_log_verify: $msg" + return 1 + } + + # Check if we could find the message for success. + set logf [open lgvrfy.log "r"] + set line {} + set found 0 + while {[gets $logf line] >= 0} { + if {[regexp $succ_patt $line]} { + set found 1 + break + } + } + close $logf + error_check_good log_verify $found 1 + return 0 +} + +# This proc is to verify that all the keys in secondary +# exist in the foreign database. +proc verify_foreign {txn fdb sdb puterr} { + set fkeys {} + set fdbc [eval $fdb cursor $txn] + error_check_good check_fdbc [is_valid_cursor $fdbc $fdb] TRUE + for {set ret [$fdbc get -first]} {[llength $ret] > 0} \ + {set ret [$fdbc get -next]} { + lappend fkeys [lindex [lindex $ret 0] 0] + } + error_check_good close_fdbc [$fdbc close] 0 + + set skeys {} + set sdbc [eval $sdb cursor $txn] + error_check_good check_sdbc [is_valid_cursor $sdbc $sdb] TRUE + for {set ret [$sdbc get -first]} {[llength $ret] > 0} \ + {set ret [$sdbc get -nextnodup]} { + lappend skeys [lindex [lindex $ret 0] 0] + } + error_check_good close_sdbc [$sdbc close] 0 + foreach fkey $fkeys { + set has_keys($fkey) 1 + } + foreach skey $skeys { + if {![info exists has_keys($skey)]} { + if {$puterr} { + puts "FAIL: VERIFY_FOREIGN_BAD" + } + return 1 + } + } + return 0 +} + +# Given the env and file name, verify that a given database is on-disk +# or in-memory as expected. If "db_on_disk" is 1, "databases_in_memory" +# is 0 and vice versa, so we use error_check_bad. +proc check_db_location { env { dbname "test.db" } { datadir "" } } { + global databases_in_memory + + if { $datadir != "" } { + set env_home $datadir + } else { + set env_home [get_home $env] + } + set db_on_disk [file exists $env_home/$dbname] + + error_check_bad db_location $db_on_disk $databases_in_memory +} + +# If we have a private env, check that no region files are found on-disk. +proc no_region_files_on_disk { dir } { + set regionfiles [glob -nocomplain $dir/__db.???] + error_check_good regionfiles [llength $regionfiles] 0 + global env_private + if { $env_private } { + set regionfiles [glob -nocomplain $dir/__db.???] + error_check_good regionfiles [llength $regionfiles] 0 + } +} + +proc find_valid_methods { test } { + global checking_valid_methods + global valid_methods + + # To find valid methods, call the test with checking_valid_methods + # on. It doesn't matter what method we use for this call, so we + # arbitrarily pick btree. + # + set checking_valid_methods 1 + set test_methods [$test btree] + set checking_valid_methods 0 + if { $test_methods == "ALL" } { + return $valid_methods + } else { + return $test_methods + } +} + +proc part {data} { + if { [string length $data] < 2 } { + return 0 + } + binary scan $data s res + return $res +} + +proc my_isalive { pid } { + source ./include.tcl + + if {[catch {exec $KILL -0 $pid}]} { + return 0 + } + return 1 +} diff --git a/test/tcl/txn001.tcl b/test/tcl/txn001.tcl new file mode 100644 index 00000000..39260625 --- /dev/null +++ b/test/tcl/txn001.tcl @@ -0,0 +1,114 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST txn001 +# TEST Begin, commit, abort testing. +proc txn001 { {tnum "001"} { max 1024 } { ntxns 50 } } { + source ./include.tcl + global txn_curid + global txn_maxid + + puts -nonewline "Txn$tnum: Basic begin, commit, abort" + + if { $tnum != "001"} { + puts " (with ID wrap)" + } else { + puts "" + } + + # Open environment + env_cleanup $testdir + + set env [eval {berkdb_env -create -mode 0644 -txn \ + -txn_max $max -home $testdir}] + error_check_good evn_open [is_valid_env $env] TRUE + error_check_good txn_id_set \ + [ $env txn_id_set $txn_curid $txn_maxid ] 0 + txn001_suba $ntxns $env $tnum + txn001_subb $ntxns $env $tnum + txn001_subc $ntxns $env $tnum + # Close and unlink the file + error_check_good env_close:$env [$env close] 0 +} + +proc txn001_suba { ntxns env tnum } { + source ./include.tcl + + # We will create a bunch of transactions and commit them. + set txn_list {} + set tid_list {} + puts "\tTxn$tnum.a: Beginning/Committing $ntxns Transactions in $env" + for { set i 0 } { $i < $ntxns } { incr i } { + set txn [$env txn] + error_check_good txn_begin [is_valid_txn $txn $env] TRUE + + lappend txn_list $txn + + set tid [$txn id] + error_check_good tid_check [lsearch $tid_list $tid] -1 + + lappend tid_list $tid + } + + # Now commit them all + foreach t $txn_list { + error_check_good txn_commit:$t [$t commit] 0 + } +} + +proc txn001_subb { ntxns env tnum } { + # We will create a bunch of transactions and abort them. + set txn_list {} + set tid_list {} + puts "\tTxn$tnum.b: Beginning/Aborting Transactions" + for { set i 0 } { $i < $ntxns } { incr i } { + set txn [$env txn] + error_check_good txn_begin [is_valid_txn $txn $env] TRUE + + lappend txn_list $txn + + set tid [$txn id] + error_check_good tid_check [lsearch $tid_list $tid] -1 + + lappend tid_list $tid + } + + # Now abort them all + foreach t $txn_list { + error_check_good txn_abort:$t [$t abort] 0 + } +} + +proc txn001_subc { ntxns env tnum } { + # We will create a bunch of transactions and commit them. + set txn_list {} + set tid_list {} + puts "\tTxn$tnum.c: Beginning/Prepare/Committing Transactions" + for { set i 0 } { $i < $ntxns } { incr i } { + set txn [$env txn] + error_check_good txn_begin [is_valid_txn $txn $env] TRUE + + lappend txn_list $txn + + set tid [$txn id] + error_check_good tid_check [lsearch $tid_list $tid] -1 + + lappend tid_list $tid + } + + # Now prepare them all + foreach t $txn_list { + error_check_good txn_prepare:$t \ + [$t prepare [make_gid global:$t]] 0 + } + + # Now commit them all + foreach t $txn_list { + error_check_good txn_commit:$t [$t commit] 0 + } + +} + diff --git a/test/tcl/txn002.tcl b/test/tcl/txn002.tcl new file mode 100644 index 00000000..a1a84e01 --- /dev/null +++ b/test/tcl/txn002.tcl @@ -0,0 +1,89 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST txn002 +# TEST Verify that read-only transactions do not write log records. +proc txn002 { {tnum "002" } { max 1024 } { ntxns 50 } } { + source ./include.tcl + global txn_curid + global txn_maxid + + puts -nonewline "Txn$tnum: Read-only transaction test ($max) ($ntxns)" + + if { $tnum != "002" } { + puts " (with ID wrap)" + } else { + puts "" + } + + env_cleanup $testdir + set env [berkdb \ + env -create -mode 0644 -txn -txn_max $max -home $testdir] + error_check_good dbenv [is_valid_env $env] TRUE + error_check_good txn_id_set \ + [$env txn_id_set $txn_curid $txn_maxid ] 0 + + # Save the current bytes in the log. + set off_start [txn002_logoff $env] + + # We will create a bunch of transactions and commit them. + set txn_list {} + set tid_list {} + puts "\tTxn$tnum.a: Beginning/Committing Transactions" + for { set i 0 } { $i < $ntxns } { incr i } { + set txn [$env txn] + error_check_good txn_begin [is_valid_txn $txn $env] TRUE + + lappend txn_list $txn + + set tid [$txn id] + error_check_good tid_check [lsearch $tid_list $tid] -1 + + lappend tid_list $tid + } + foreach t $txn_list { + error_check_good txn_commit:$t [$t commit] 0 + } + + # Make sure we haven't written any new log records except + # potentially some recycle records if we were wrapping txnids. + set off_stop [txn002_logoff $env] + if { $off_stop != $off_start } { + txn002_recycle_only $testdir + } + + error_check_good env_close [$env close] 0 +} + +proc txn002_logoff { env } { + set stat [$env log_stat] + foreach i $stat { + foreach {txt val} $i {break} + if { [string compare \ + $txt {Current log file offset}] == 0 } { + return $val + } + } +} + +# Make sure that the only log records found are txn_recycle records +proc txn002_recycle_only { dir } { + global util_path + + set tmpfile $dir/printlog.out + set stat [catch {exec $util_path/db_printlog -h $dir > $tmpfile} ret] + error_check_good db_printlog $stat 0 + + set f [open $tmpfile r] + while { [gets $f record] >= 0 } { + set r [regexp {\[[^\]]*\]\[[^\]]*\]([^\:]*)\:} $record whl name] + if { $r == 1 } { + error_check_good record_type __txn_recycle $name + } + } + close $f + fileremove $tmpfile +} diff --git a/test/tcl/txn003.tcl b/test/tcl/txn003.tcl new file mode 100644 index 00000000..a844d80c --- /dev/null +++ b/test/tcl/txn003.tcl @@ -0,0 +1,230 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST txn003 +# TEST Test abort/commit/prepare of txns with outstanding child txns. +proc txn003 { {tnum "003"} } { + source ./include.tcl + global txn_curid + global txn_maxid + + puts -nonewline "Txn$tnum: Outstanding child transaction test" + + if { $tnum != "003" } { + puts " (with ID wrap)" + } else { + puts "" + } + env_cleanup $testdir + set testfile txn003.db + + set env_cmd "berkdb_env_noerr -create -txn -home $testdir" + set env [eval $env_cmd] + error_check_good dbenv [is_valid_env $env] TRUE + error_check_good txn_id_set \ + [$env txn_id_set $txn_curid $txn_maxid] 0 + + set oflags {-auto_commit -create -btree -mode 0644 -env $env $testfile} + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + + # + # Put some data so that we can check commit or abort of child + # + set key 1 + set origdata some_data + set newdata this_is_new_data + set newdata2 some_other_new_data + + error_check_good db_put [$db put $key $origdata] 0 + error_check_good dbclose [$db close] 0 + + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + + txn003_check $db $key "Origdata" $origdata + + puts "\tTxn$tnum.a: Parent abort" + set parent [$env txn] + error_check_good txn_begin [is_valid_txn $parent $env] TRUE + set child [$env txn -parent $parent] + error_check_good txn_begin [is_valid_txn $child $env] TRUE + error_check_good db_put [$db put -txn $child $key $newdata] 0 + error_check_good parent_abort [$parent abort] 0 + txn003_check $db $key "parent_abort" $origdata + # Check child handle is invalid + set stat [catch {$child abort} ret] + error_check_good child_handle $stat 1 + error_check_good child_h2 [is_substr $ret "invalid command name"] 1 + + puts "\tTxn$tnum.b: Parent commit" + set parent [$env txn] + error_check_good txn_begin [is_valid_txn $parent $env] TRUE + set child [$env txn -parent $parent] + error_check_good txn_begin [is_valid_txn $child $env] TRUE + error_check_good db_put [$db put -txn $child $key $newdata] 0 + error_check_good parent_commit [$parent commit] 0 + txn003_check $db $key "parent_commit" $newdata + # Check child handle is invalid + set stat [catch {$child abort} ret] + error_check_good child_handle $stat 1 + error_check_good child_h2 [is_substr $ret "invalid command name"] 1 + error_check_good dbclose [$db close] 0 + error_check_good env_close [$env close] 0 + + # + # Since the data check assumes what has come before, the 'commit' + # operation must be last. + # + set hdr "\tTxn$tnum" + set rlist { + {begin ".c"} + {prepare ".d"} + {abort ".e"} + {commit ".f"} + } + set count 0 + foreach pair $rlist { + incr count + set op [lindex $pair 0] + set msg [lindex $pair 1] + set msg $hdr$msg + txn003_body $env_cmd $testfile $testdir $key $newdata2 $msg $op + set env [eval $env_cmd] + error_check_good dbenv [is_valid_env $env] TRUE + + berkdb debug_check + set db [eval {berkdb_open} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + # + # For prepare we'll then just + # end up aborting after we test what we need to. + # So set gooddata to the same as abort. + switch $op { + abort { + set gooddata $newdata + } + begin { + set gooddata $newdata + } + commit { + set gooddata $newdata2 + } + prepare { + set gooddata $newdata + } + } + txn003_check $db $key "parent_$op" $gooddata + error_check_good dbclose [$db close] 0 + error_check_good env_close [$env close] 0 + } + + puts "\tTxn$tnum.g: Attempt child prepare" + set env [eval $env_cmd] + error_check_good dbenv [is_valid_env $env] TRUE + berkdb debug_check + set db [eval {berkdb_open_noerr} $oflags] + error_check_good db_open [is_valid_db $db] TRUE + + set parent [$env txn] + error_check_good txn_begin [is_valid_txn $parent $env] TRUE + set child [$env txn -parent $parent] + error_check_good txn_begin [is_valid_txn $child $env] TRUE + error_check_good db_put [$db put -txn $child $key $newdata] 0 + set gid [make_gid child_prepare:$child] + set stat [catch {$child prepare $gid} ret] + error_check_good child_prepare $stat 1 + error_check_good child_prep_err [is_substr $ret "txn prepare"] 1 + + puts "\tTxn$tnum.h: Attempt child discard" + set stat [catch {$child discard} ret] + error_check_good child_discard $stat 1 + + # We just panic'd the region, so the next operations will fail. + # No matter, we still have to clean up all the handles. + + set stat [catch {$parent commit} ret] + error_check_good parent_commit $stat 1 + error_check_good parent_commit:fail [is_substr $ret "DB_RUNRECOVERY"] 1 + + set stat [catch {$db close} ret] + error_check_good db_close $stat 1 + error_check_good db_close:fail [is_substr $ret "DB_RUNRECOVERY"] 1 + + set stat [catch {$env close} ret] + error_check_good env_close $stat 1 + error_check_good env_close:fail [is_substr $ret "DB_RUNRECOVERY"] 1 +} + +proc txn003_body { env_cmd testfile dir key newdata2 msg op } { + source ./include.tcl + + berkdb debug_check + sentinel_init + set gidf $dir/gidfile + fileremove -f $gidf + set pidlist {} + puts "$msg.0: Executing child script to prepare txns" + berkdb debug_check + set p [exec $tclsh_path $test_path/wrap.tcl txnscript.tcl \ + $testdir/txnout $env_cmd $testfile $gidf $key $newdata2 &] + lappend pidlist $p + watch_procs $pidlist 5 + set f1 [open $testdir/txnout r] + set r [read $f1] + puts $r + close $f1 + fileremove -f $testdir/txnout + + berkdb debug_check + puts -nonewline "$msg.1: Running recovery ... " + flush stdout + berkdb debug_check + set env [eval $env_cmd "-recover"] + error_check_good dbenv-recover [is_valid_env $env] TRUE + puts "complete" + + puts "$msg.2: getting txns from txn_recover" + set txnlist [$env txn_recover] + error_check_good txnlist_len [llength $txnlist] 1 + set tpair [lindex $txnlist 0] + + set gfd [open $gidf r] + set ret [gets $gfd parentgid] + close $gfd + set txn [lindex $tpair 0] + set gid [lindex $tpair 1] + if { $op == "begin" } { + puts "$msg.2: $op new txn" + } else { + puts "$msg.2: $op parent" + } + error_check_good gidcompare $gid $parentgid + if { $op == "prepare" } { + set gid [make_gid prepare_recover:$txn] + set stat [catch {$txn $op $gid} ret] + error_check_good prep_error $stat 1 + error_check_good prep_err \ + [is_substr $ret "transaction already prepared"] 1 + error_check_good txn:prep_abort [$txn abort] 0 + } elseif { $op == "begin" } { + # As of the 4.6 release, we allow new txns to be created + # while prepared but not committed txns exist, so this + # should succeed. + set txn2 [$env txn] + error_check_good txn:begin_abort [$txn abort] 0 + error_check_good txn2:begin_abort [$txn2 abort] 0 + } else { + error_check_good txn:$op [$txn $op] 0 + } + error_check_good envclose [$env close] 0 +} + +proc txn003_check { db key msg gooddata } { + set kd [$db get $key] + set data [lindex [lindex $kd 0] 1] + error_check_good $msg $data $gooddata +} diff --git a/test/tcl/txn004.tcl b/test/tcl/txn004.tcl new file mode 100644 index 00000000..7bbd4544 --- /dev/null +++ b/test/tcl/txn004.tcl @@ -0,0 +1,60 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST txn004 +# TEST Test of wraparound txnids (txn001) +proc txn004 { } { + source ./include.tcl + global txn_curid + global txn_maxid + + set orig_curid $txn_curid + set orig_maxid $txn_maxid + puts "\tTxn004.1: wraparound txnids" + set txn_curid [expr $txn_maxid - 2] + txn001 "004.1" + puts "\tTxn004.2: closer wraparound txnids" + set txn_curid [expr $txn_maxid - 3] + set txn_maxid [expr $txn_maxid - 2] + txn001 "004.2" + + puts "\tTxn004.3: test wraparound txnids" + txn_idwrap_check $testdir + set txn_curid $orig_curid + set txn_maxid $orig_maxid + return +} + +proc txn_idwrap_check { testdir } { + global txn_curid + global txn_maxid + + env_cleanup $testdir + + # Open/create the txn region + set e [berkdb_env -create -txn -home $testdir] + error_check_good env_open [is_substr $e env] 1 + + set txn1 [$e txn] + error_check_good txn1 [is_valid_txn $txn1 $e] TRUE + error_check_good txn_id_set \ + [$e txn_id_set [expr $txn_maxid - 1] $txn_maxid] 0 + + set txn2 [$e txn] + error_check_good txn2 [is_valid_txn $txn2 $e] TRUE + + # txn3 will require a wraparound txnid + # XXX How can we test it has a wrapped id? + set txn3 [$e txn] + error_check_good wrap_txn3 [is_valid_txn $txn3 $e] TRUE + + error_check_good free_txn1 [$txn1 commit] 0 + error_check_good free_txn2 [$txn2 commit] 0 + error_check_good free_txn3 [$txn3 commit] 0 + + error_check_good close [$e close] 0 +} + diff --git a/test/tcl/txn005.tcl b/test/tcl/txn005.tcl new file mode 100644 index 00000000..f45e2a37 --- /dev/null +++ b/test/tcl/txn005.tcl @@ -0,0 +1,73 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST txn005 +# TEST Test transaction ID wraparound and recovery. +proc txn005 {} { + source ./include.tcl + global txn_curid + global txn_maxid + + env_cleanup $testdir + puts "Txn005: Test transaction wraparound recovery" + + # Open/create the txn region + puts "\tTxn005.a: Create environment" + set e [berkdb_env -create -txn -home $testdir] + error_check_good env_open [is_valid_env $e] TRUE + + set txn1 [$e txn] + error_check_good txn1 [is_valid_txn $txn1 $e] TRUE + + set db [berkdb_open -env $e -txn $txn1 -create -btree txn005.db] + error_check_good db [is_valid_db $db] TRUE + error_check_good txn1_commit [$txn1 commit] 0 + + puts "\tTxn005.b: Set txn ids" + error_check_good txn_id_set \ + [$e txn_id_set [expr $txn_maxid - 1] $txn_maxid] 0 + + # txn2 and txn3 will require a wraparound txnid + set txn2 [$e txn] + error_check_good txn2 [is_valid_txn $txn2 $e] TRUE + + error_check_good put [$db put -txn $txn2 "a" ""] 0 + error_check_good txn2_commit [$txn2 commit] 0 + + error_check_good get_a [$db get "a"] "{a {}}" + + error_check_good close [$db close] 0 + + set txn3 [$e txn] + error_check_good txn3 [is_valid_txn $txn3 $e] TRUE + + set db [berkdb_open -env $e -txn $txn3 -btree txn005.db] + error_check_good db [is_valid_db $db] TRUE + + error_check_good put2 [$db put -txn $txn3 "b" ""] 0 + error_check_good sync [$db sync] 0 + error_check_good txn3_abort [$txn3 abort] 0 + error_check_good dbclose [$db close] 0 + error_check_good eclose [$e close] 0 + + puts "\tTxn005.c: Run recovery" + set stat [catch {exec $util_path/db_recover -h $testdir -e -c} result] + if { $stat == 1 } { + error "FAIL: Recovery error: $result." + } + + puts "\tTxn005.d: Check data" + set e [berkdb_env -txn -home $testdir] + error_check_good env_open [is_valid_env $e] TRUE + + set db [berkdb_open -env $e -auto_commit -btree txn005.db] + error_check_good db [is_valid_db $db] TRUE + + error_check_good get_a [$db get "a"] "{a {}}" + error_check_bad get_b [$db get "b"] "{b {}}" + error_check_good dbclose [$db close] 0 + error_check_good eclose [$e close] 0 +} diff --git a/test/tcl/txn006.tcl b/test/tcl/txn006.tcl new file mode 100644 index 00000000..ae931ef8 --- /dev/null +++ b/test/tcl/txn006.tcl @@ -0,0 +1,45 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +#TEST txn006 +#TEST Test dump/load in transactional environment. +proc txn006 { { iter 50 } } { + source ./include.tcl + set testfile txn006.db + + puts "Txn006: Test dump/load in transaction environment" + env_cleanup $testdir + + puts "\tTxn006.a: Create environment and database" + # Open/create the txn region + set e [berkdb_env -create -home $testdir -txn] + error_check_good env_open [is_valid_env $e] TRUE + + # Open/create database + set db [berkdb_open -auto_commit -env $e \ + -create -btree -dup $testfile] + error_check_good db_open [is_valid_db $db] TRUE + + # Start a transaction + set txn [$e txn] + error_check_good txn [is_valid_txn $txn $e] TRUE + + puts "\tTxn006.b: Put data" + # Put some data + for { set i 1 } { $i < $iter } { incr i } { + error_check_good put [$db put -txn $txn key$i data$i] 0 + } + + # End transaction, close db + error_check_good txn_commit [$txn commit] 0 + error_check_good db_close [$db close] 0 + error_check_good env_close [$e close] 0 + + puts "\tTxn006.c: dump/load" + # Dump and load + exec $util_path/db_dump -p -h $testdir $testfile | \ + $util_path/db_load -h $testdir $testfile +} diff --git a/test/tcl/txn007.tcl b/test/tcl/txn007.tcl new file mode 100644 index 00000000..2f6bb88c --- /dev/null +++ b/test/tcl/txn007.tcl @@ -0,0 +1,56 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +#TEST txn007 +#TEST Test of DB_TXN_WRITE_NOSYNC +proc txn007 { { iter 50 } } { + source ./include.tcl + set testfile txn007.db + + puts "Txn007: DB_TXN_WRITE_NOSYNC" + env_cleanup $testdir + + # Open/create the txn region + puts "\tTxn007.a: Create env and database with -wrnosync" + set e [berkdb_env -create -home $testdir -txn -wrnosync] + error_check_good env_open [is_valid_env $e] TRUE + + # Open/create database + set db [berkdb open -auto_commit -env $e \ + -create -btree -dup $testfile] + error_check_good db_open [is_valid_db $db] TRUE + + # Put some data + puts "\tTxn007.b: Put $iter data items in individual transactions" + for { set i 1 } { $i < $iter } { incr i } { + # Start a transaction + set txn [$e txn] + error_check_good txn [is_valid_txn $txn $e] TRUE + $db put -txn $txn key$i data$i + error_check_good txn_commit [$txn commit] 0 + } + set stat [$e log_stat] + puts "\tTxn007.c: Check log stats" + foreach i $stat { + set txt [lindex $i 0] + if { [string equal $txt {Times log written}] == 1 } { + set wrval [lindex $i 1] + } + if { [string equal $txt {Times log flushed to disk}] == 1 } { + set syncval [lindex $i 1] + } + } + error_check_good wrval [expr $wrval >= $iter] 1 + # + # We should have written at least 'iter' number of times, + # but not synced on any of those. + # + set val [expr $wrval - $iter] + error_check_good syncval [expr $syncval <= $val] 1 + + error_check_good db_close [$db close] 0 + error_check_good env_close [$e close] 0 +} diff --git a/test/tcl/txn008.tcl b/test/tcl/txn008.tcl new file mode 100644 index 00000000..384a6f2d --- /dev/null +++ b/test/tcl/txn008.tcl @@ -0,0 +1,30 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST txn008 +# TEST Test of wraparound txnids (txn002) +proc txn008 { } { + source ./include.tcl + global txn_curid + global txn_maxid + + set orig_curid $txn_curid + set orig_maxid $txn_maxid + puts "\tTxn008.1: wraparound txnids" + set txn_curid [expr $txn_maxid - 2] + txn002 "008.1" + puts "\tTxn008.2: closer wraparound txnids" + set txn_curid [expr $txn_maxid - 3] + set txn_maxid [expr $txn_maxid - 2] + txn002 "008.2" + + puts "\tTxn008.3: test wraparound txnids" + txn_idwrap_check $testdir + set txn_curid $orig_curid + set txn_maxid $orig_maxid + return +} + diff --git a/test/tcl/txn009.tcl b/test/tcl/txn009.tcl new file mode 100644 index 00000000..561174b6 --- /dev/null +++ b/test/tcl/txn009.tcl @@ -0,0 +1,30 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST txn009 +# TEST Test of wraparound txnids (txn003) +proc txn009 { } { + source ./include.tcl + global txn_curid + global txn_maxid + + set orig_curid $txn_curid + set orig_maxid $txn_maxid + puts "\tTxn009.1: wraparound txnids" + set txn_curid [expr $txn_maxid - 2] + txn003 "009.1" + puts "\tTxn009.2: closer wraparound txnids" + set txn_curid [expr $txn_maxid - 3] + set txn_maxid [expr $txn_maxid - 2] + txn003 "009.2" + + puts "\tTxn009.3: test wraparound txnids" + txn_idwrap_check $testdir + set txn_curid $orig_curid + set txn_maxid $orig_maxid + return +} + diff --git a/test/tcl/txn010.tcl b/test/tcl/txn010.tcl new file mode 100644 index 00000000..ef1de2d8 --- /dev/null +++ b/test/tcl/txn010.tcl @@ -0,0 +1,143 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST txn010 +# TEST Test DB_ENV->txn_checkpoint arguments/flags +proc txn010 { } { + source ./include.tcl + + puts "Txn010: test DB_ENV->txn_checkpoint arguments/flags." + env_cleanup $testdir + + # Open an environment and database. + puts "\tTxn010.a: open the environment and a database, checkpoint." + set env [berkdb_env -create -home $testdir -txn] + error_check_good envopen [is_valid_env $env] TRUE + set db [berkdb_open \ + -env $env -create -mode 0644 -btree -auto_commit a.db] + error_check_good dbopen [is_valid_db $db] TRUE + + # Insert some data and do a checkpoint. + for { set count 0 } { $count < 100 } { incr count } { + set t [$env txn] + error_check_good "init: put" \ + [$db put -txn $t "key_a_$count" "data"] 0 + error_check_good "init: commit" [$t commit] 0 + } + tclsleep 1 + error_check_good checkpoint [$env txn_checkpoint] 0 + + # Test that checkpoint calls are ignored in quiescent systems. + puts "\tTxn010.b: test for checkpoints when system is quiescent" + set chkpt [txn010_stat $env "Time of last checkpoint"] + for { set count 0 } { $count < 5 } {incr count } { + tclsleep 1 + error_check_good checkpoint [$env txn_checkpoint] 0 + set test_chkpt [txn010_stat $env "Time of last checkpoint"] + error_check_good "quiescent: checkpoint time changed" \ + [expr $test_chkpt == $chkpt] 1 + } + + # Add a single record, and test that checkpoint does something. + set chkpt [txn010_stat $env "Time of last checkpoint"] + set t [$env txn] + error_check_good \ + "quiescent: put" [$db put -txn $t "key_b_$count" "data"] 0 + error_check_good "quiescent: commit" [$t commit] 0 + tclsleep 1 + error_check_good checkpoint [$env txn_checkpoint] 0 + set test_chkpt [txn010_stat $env "Time of last checkpoint"] + error_check_good "quiescent: checkpoint time unchanged" \ + [expr $test_chkpt > $chkpt] 1 + + # Test that -force causes a checkpoint. + puts "\tTxn010.c: test checkpoint -force" + set chkpt [txn010_stat $env "Time of last checkpoint"] + for { set count 0 } { $count < 5 } {incr count } { + tclsleep 1 + error_check_good checkpoint [$env txn_checkpoint -force] 0 + set test_chkpt [txn010_stat $env "Time of last checkpoint"] + error_check_good "force: checkpoint time unchanged" \ + [expr $test_chkpt > $chkpt] 1 + set chkpt $test_chkpt + } + + # Test that -kbyte doesn't cause a checkpoint unless there's + # enough activity. + puts "\tTxn010.d: test checkpoint -kbyte" + + # Put in lots of data, and verify that -kbyte causes a checkpoint + for { set count 0 } { $count < 1000 } { incr count } { + set t [$env txn] + error_check_good "kbyte: put" \ + [$db put -txn $t "key_c_$count" "data"] 0 + error_check_good "kbyte: commit" [$t commit] 0 + } + + set chkpt [txn010_stat $env "Time of last checkpoint"] + tclsleep 1 + error_check_good checkpoint [$env txn_checkpoint -kbyte 2] 0 + set test_chkpt [txn010_stat $env "Time of last checkpoint"] + error_check_good "kbytes: checkpoint time unchanged" \ + [expr $test_chkpt > $chkpt] 1 + + # Put in a little data and verify that -kbyte doesn't cause a + # checkpoint + set chkpt [txn010_stat $env "Time of last checkpoint"] + for { set count 0 } { $count < 20 } { incr count } { + set t [$env txn] + error_check_good "kbyte: put" \ + [$db put -txn $t "key_d_$count" "data"] 0 + error_check_good "kbyte: commit" [$t commit] 0 + tclsleep 1 + error_check_good checkpoint [$env txn_checkpoint -kbyte 20] 0 + set test_chkpt [txn010_stat $env "Time of last checkpoint"] + error_check_good "kbytes: checkpoint time changed" \ + [expr $test_chkpt == $chkpt] 1 + } + + # Test that -min doesn't cause a checkpoint unless enough time has + # passed. + puts "\tTxn010.e: test checkpoint -min" + set t [$env txn] + error_check_good "min: put" [$db put -txn $t "key_e_$count" "data"] 0 + error_check_good "min: commit" [$t commit] 0 + set chkpt [txn010_stat $env "Time of last checkpoint"] + for { set count 0 } { $count < 5 } {incr count } { + tclsleep 1 + error_check_good checkpoint [$env txn_checkpoint -min 2] 0 + set test_chkpt [txn010_stat $env "Time of last checkpoint"] + error_check_good "min: checkpoint time changed" \ + [expr $test_chkpt == $chkpt] 1 + } + + # Wait long enough, and then check to see if -min causes a checkpoint. + set chkpt [txn010_stat $env "Time of last checkpoint"] + tclsleep 120 + error_check_good checkpoint [$env txn_checkpoint -min 2] 0 + set test_chkpt [txn010_stat $env "Time of last checkpoint"] + error_check_good "min: checkpoint time unchanged" \ + [expr $test_chkpt > $chkpt] 1 + + # Close down the database and the environment. + error_check_good db_close [$db close] 0 + error_check_good env_close [$env close] 0 +} + +# txn010_stat -- +# Return the current log statistics. +proc txn010_stat { env s } { + set stat [$env txn_stat] + foreach statpair $stat { + set statmsg [lindex $statpair 0] + set statval [lindex $statpair 1] + if {[is_substr $statmsg $s] != 0} { + return $statval + } + } + puts "FAIL: Txn010: stat string $s not found" + return 0 +} diff --git a/test/tcl/txn011.tcl b/test/tcl/txn011.tcl new file mode 100644 index 00000000..6b1ab398 --- /dev/null +++ b/test/tcl/txn011.tcl @@ -0,0 +1,224 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2003, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST txn011 +# TEST Test durable and non-durable txns. +# TEST Test a mixed env (with both durable and non-durable +# TEST dbs), then a purely non-durable env. Make sure commit +# TEST and abort work, and that only the log records we +# TEST expect are written. +# TEST Test that we can't get a durable handle on an open ND +# TEST database, or vice versa. Test that all subdb's +# TEST must be of the same type (D or ND). +proc txn011 { {ntxns 100} } { + source ./include.tcl + global util_path + + foreach envtype { "" "-private" } { + puts "Txn011: Non-durable txns ($envtype)." + env_cleanup $testdir + + puts "\tTxn011.a: Persistent env recovery with -log_inmemory" + set lbuf [expr 8 * [expr 1024 * 1024]] + set env_cmd "berkdb_env -create \ + -home $testdir -txn -log_inmemory -log_buffer $lbuf" + set ndenv [eval $env_cmd $envtype] + set db [berkdb_open -create -auto_commit \ + -btree -env $ndenv -notdurable test.db] + check_log_records $testdir + error_check_good db_close [$db close] 0 + error_check_good ndenv_close [$ndenv close] 0 + + # Run recovery with -e to retain environment. + set stat [catch {exec $util_path/db_recover -e -h $testdir} ret] + error_check_good db_printlog $stat 0 + + # Rejoin env and make sure that the db is still there. + set ndenv [berkdb_env -home $testdir] + set db [berkdb_open -auto_commit -env $ndenv test.db] + error_check_good db_close [$db close] 0 + error_check_good ndenv_close [$ndenv close] 0 + env_cleanup $testdir + + # Start with a new env for the next test. + set ndenv [eval $env_cmd] + error_check_good env_open [is_valid_env $ndenv] TRUE + + # Open/create the database. + set testfile notdurable.db + set db [eval berkdb_open -create \ + -auto_commit -env $ndenv -notdurable -btree $testfile] + error_check_good dbopen [is_valid_db $db] TRUE + + puts "\tTxn011.b: Abort txns in in-memory logging env." + txn011_runtxns $ntxns $db $ndenv abort + # Make sure there is nothing in the db. + txn011_check_empty $db $ndenv + + puts "\tTxn011.c: Commit txns in in-memory logging env." + txn011_runtxns $ntxns $db $ndenv commit + + # Make sure we haven't written any inappropriate log records + check_log_records $testdir + + # Clean up non-durable env tests. + error_check_good db_close [$db close] 0 + error_check_good ndenv_close [$ndenv close] 0 + env_cleanup $testdir + + puts "\tTxn011.d: Set up mixed durable/non-durable test." + # Open/create the mixed environment + set mixed_env_cmd "berkdb_env_noerr -create \ + -home $testdir -txn -log_inmemory -log_buffer $lbuf" + set env [eval $mixed_env_cmd] + error_check_good env_open [is_valid_env $env] TRUE + check_log_records $testdir + + # Open/create the non-durable database + set nondurfile nondurable.db + set ndb [berkdb_open_noerr -create\ + -auto_commit -env $env -btree -notdurable $nondurfile] + error_check_good dbopen [is_valid_db $ndb] TRUE + check_log_records $testdir + + puts "\tTxn011.e: Abort txns in non-durable db." + txn011_runtxns $ntxns $ndb $env abort + # Make sure there is nothing in the db. + txn011_check_empty $ndb $env + check_log_records $testdir + + puts "\tTxn011.f: Commit txns in non-durable db." + txn011_runtxns $ntxns $ndb $env commit + check_log_records $testdir + + # Open/create the durable database + set durfile durable.db + set ddb [eval berkdb_open_noerr \ + -create -auto_commit -env $env -btree $durfile] + error_check_good dbopen [is_valid_db $ddb] TRUE + + # Try to get a not-durable handle on the durable db. + puts "\tTxn011.g: Try to get a not-durable handle on\ + an open durable db." + set errormsg "Cannot open DURABLE and NOT DURABLE handles" + catch {berkdb_open_noerr \ + -auto_commit -env $env -notdurable $durfile} res + error_check_good handle_error1 [is_substr $res $errormsg] 1 + error_check_good ddb_close [$ddb close] 0 + + # Try to get a not-durable handle when reopening the durable + # db (this should work). + set db [berkdb_open_noerr \ + -auto_commit -env $env -notdurable $durfile] + error_check_good db_reopen [is_valid_db $db] TRUE + error_check_good db_close [$db close] 0 + + # Now reopen as durable for the remainder of the test. + set ddb [berkdb_open_noerr \ + -auto_commit -env $env -btree $durfile] + error_check_good dbopen [is_valid_db $ddb] TRUE + + puts "\tTxn011.h: Abort txns in durable db." + # Add items to db in several txns but abort every one. + txn011_runtxns $ntxns $ddb $env abort + # Make sure there is nothing in the db. + txn011_check_empty $ddb $env + + puts "\tTxn011.i: Commit txns in durable db." + txn011_runtxns $ntxns $ddb $env commit + + puts "\tTxn011.j: Subdbs must all be durable or all not durable." + # Ask for -notdurable on durable db/subdb + set sdb1 [eval berkdb_open_noerr -create -auto_commit \ + -env $env -btree testfile1.db subdb1] + catch {set sdb2 [eval berkdb_open_noerr -create -auto_commit \ + -env $env -btree -notdurable testfile1.db subdb2]} res + error_check_good same_type_subdb1 [is_substr $res $errormsg] 1 + error_check_good sdb1_close [$sdb1 close] 0 + + # Ask for durable on notdurable db/subdb + set sdb3 [eval berkdb_open_noerr -create -auto_commit \ + -env $env -btree -notdurable testfile2.db subdb3] + catch {set sdb4 [eval berkdb_open_noerr -create -auto_commit \ + -env $env -btree testfile2.db subdb4]} res + error_check_good same_type_subdb2 [is_substr $res $errormsg] 1 + error_check_good sdb3_close [$sdb3 close] 0 + + puts "\tTxn011.k: Try to get a durable handle on a\ + not-durable db." + # Try to get a durable handle on a not-durable database, + # while open. This should fail, but getting a durable handle + # when re-opening should work. + catch {berkdb_open_noerr -auto_commit -env $env $nondurfile} res + error_check_good handle_error [is_substr $res $errormsg] 1 + error_check_good ndb_close [$ndb close] 0 + + set ndb [berkdb_open_noerr -auto_commit -env $env $nondurfile] + error_check_good ndb_reopen [is_valid_db $ndb] TRUE + error_check_good ndb_close [$ndb close] 0 + + # Clean up mixed env. + error_check_good ddb_close [$ddb close] 0 + error_check_good env_close [$env close] 0 + } +} + +proc txn011_runtxns { ntxns db env end } { + source ./include.tcl + + set did [open $dict] + set i 0 + while { [gets $did str] != -1 && $i < $ntxns } { + set txn [$env txn] + error_check_good txn_begin [is_valid_txn $txn $env] TRUE + + error_check_good db_put_txn [$db put -txn $txn $i $str] 0 + error_check_good txn_$end [$txn $end] 0 + incr i + } + close $did +} + +# Verify that a database is empty +proc txn011_check_empty { db env } { + # Start a transaction + set t [$env txn] + error_check_good txn [is_valid_txn $t $env] TRUE + set txn "-txn $t" + + # If a cursor get -first returns nothing, the db is empty. + set dbc [eval {$db cursor} $txn] + error_check_good db_cursor [is_substr $dbc $db] 1 + set ret [$dbc get -first] + error_check_good get_on_empty [string length $ret] 0 + error_check_good dbc_close [$dbc close] 0 + + # End transaction + error_check_good txn [$t commit] 0 +} + +# Some log records are still produced when we run create in a +# non-durable db in a regular env. Just make sure we don't see +# any unexpected types. +proc check_log_records { dir } { + global util_path + + set tmpfile $dir/printlog.out + set stat [catch {exec $util_path/db_printlog -h $dir > $tmpfile} ret] + error_check_good db_printlog $stat 0 + + set f [open $tmpfile r] + while { [gets $f record] >= 0 } { + set r [regexp {\[[^\]]*\]\[[^\]]*\]([^\:]*)\:} $record whl name] + if { $r == 1 && [string match *_debug $name] != 1 && \ + [string match __txn_regop $name] != 1 && \ + [string match __txn_child $name] != 1 } { + puts "FAIL: unexpected log record $name found" + } + } + close $f + fileremove $tmpfile +} diff --git a/test/tcl/txn012.tcl b/test/tcl/txn012.tcl new file mode 100644 index 00000000..9e3946dc --- /dev/null +++ b/test/tcl/txn012.tcl @@ -0,0 +1,61 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST txn012 +# TEST Test txn->getname and txn->setname. + +proc txn012 { {ntxns 100} } { + source ./include.tcl + global util_path + + puts "Txn012: Test txn->setname and txn->getname." + env_cleanup $testdir + set txnname "this is a short txn name" + set longtxnname "transaction names longer than 50 characters will be truncated" + + puts "\tTxn012.a: Set up env and txn." + set env [berkdb_env -create -home $testdir -txn] + set db [berkdb_open -create -auto_commit -btree -env $env test.db] + set txn0 [$env txn] + set txn1 [$env txn] + + # Name the transactions, check the name. + error_check_good name_txn0 [$txn0 setname $txnname] 0 + set getname [$txn0 getname] + error_check_good txnname $getname $txnname + + error_check_good longname_txn [$txn1 setname $longtxnname] 0 + set getlongname [$txn1 getname] + error_check_good longtxnname $getlongname $longtxnname + + # Run db_stat. The long txn name will be truncated. + set stat [exec $util_path/db_stat -h $testdir -t] + error_check_good stat_name [is_substr $stat $txnname] 1 + error_check_good stat_longname [is_substr $stat $longtxnname] 0 + set truncname [string range $longtxnname 0 49] + error_check_good stat_truncname [is_substr $stat $truncname] 1 + + # Start another process and make sure it can see the names too. + puts "\tTxn012.b: Fork child process." + set pid [exec $tclsh_path $test_path/wrap.tcl txn012script.tcl \ + $testdir/txn012script.log $testdir $txnname $longtxnname &] + + watch_procs $pid 1 + + error_check_good txn0_commit [$txn0 commit] 0 + error_check_good txn1_commit [$txn1 commit] 0 + + # Check for errors in child log file. + set errstrings [eval findfail $testdir/txn012script.log] + foreach str $errstrings { + puts "FAIL: error message in log file: $str" + } + + # Clean up. + error_check_good db_close [$db close] 0 + error_check_good env_close [$env close] 0 +} + diff --git a/test/tcl/txn012script.tcl b/test/tcl/txn012script.tcl new file mode 100644 index 00000000..45698073 --- /dev/null +++ b/test/tcl/txn012script.tcl @@ -0,0 +1,33 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Script to check that txn names can be seen across processes. +# Names over 50 characters will be truncated. +# +# Usage: txn012script dir txnname longtxnname + +source ./include.tcl +source $test_path/test.tcl + +set usage "txn012script dir txnname longtxnname" + +# Verify usage +if { $argc != 3 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set dir [ lindex $argv 0 ] +set txnname [ lindex $argv 1 ] +set longtxnname [ lindex $argv 2 ] + +# Run db_stat to view txn names. +set stat [exec $util_path/db_stat -h $dir -t] +error_check_good txnname [is_substr $stat $txnname] 1 +error_check_good longtxnname [is_substr $stat $longtxnname] 0 +set truncname [string range $longtxnname 0 49] +error_check_good truncname [is_substr $stat $truncname] 1 diff --git a/test/tcl/txn013.tcl b/test/tcl/txn013.tcl new file mode 100644 index 00000000..ec9aa39c --- /dev/null +++ b/test/tcl/txn013.tcl @@ -0,0 +1,76 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST txn013 +# TEST Test of txns used in the wrong environment. +# TEST Set up two envs. Start a txn in one env, and attempt to use it +# TEST in the other env. Verify we get the appropriate error message. +proc txn013 { } { + source ./include.tcl + + set tnum "013" + puts "Txn$tnum: Test use of txns in wrong environment." + set testfile FILE.db + set key KEY + set data DATA + + env_cleanup $testdir + + puts "\tTxn$tnum.a: Create two environments." + set env1 [berkdb_env_noerr -create -mode 0644 -txn -home $testdir] + file mkdir $testdir/SUBDIR + set env2 \ + [berkdb_env_noerr -create -mode 0644 -txn -home $testdir/SUBDIR] + error_check_good env1 [is_valid_env $env1] TRUE + error_check_good env2 [is_valid_env $env2] TRUE + + # Open a database in each environment. + puts "\tTxn$tnum.b: Open a database in each environment." + set db1 [berkdb_open_noerr \ + -env $env1 -create -auto_commit -btree $testfile] + set db2 [berkdb_open_noerr \ + -env $env2 -create -auto_commit -btree $testfile] + + # Create txns in both environments. + puts "\tTxn$tnum.c: Start a transaction in each environment." + set txn1 [$env1 txn] + set txn2 [$env2 txn] + error_check_good txn1_begin [is_valid_txn $txn1 $env1] TRUE + error_check_good txn2_begin [is_valid_txn $txn2 $env2] TRUE + + # First do the puts in the correct envs, so we have something + # for the gets and deletes. + error_check_good txn1_env1 [$db1 put -txn $txn1 $key $data] 0 + error_check_good txn2_env2 [$db2 put -txn $txn2 $key $data] 0 + + puts "\tTxn$tnum.d: Execute db put in wrong environment." + set errormsg "from different environments" + catch {$db1 put -txn $txn2 $key $data} res + error_check_good put_env1txn2 [is_substr $res $errormsg] 1 + catch {$db2 put -txn $txn1 $key $data} res + error_check_good put_env2txn1 [is_substr $res $errormsg] 1 + + puts "\tTxn$tnum.e: Execute db get in wrong environment." + catch {$db1 get -txn $txn2 $key} res + error_check_good get_env1txn2 [is_substr $res $errormsg] 1 + catch {$db2 get -txn $txn1 $key} res + error_check_good get_env2txn1 [is_substr $res $errormsg] 1 + + puts "\tTxn$tnum.f: Execute db del in wrong environment." + catch {$db1 del -txn $txn2 $key} res + error_check_good get_env1txn2 [is_substr $res $errormsg] 1 + catch {$db2 del -txn $txn1 $key} res + error_check_good get_env2txn1 [is_substr $res $errormsg] 1 + + # Clean up. + error_check_good txn1_commit [$txn1 commit] 0 + error_check_good txn2_commit [$txn2 commit] 0 + error_check_good db1_close [$db1 close] 0 + error_check_good db2_close [$db2 close] 0 + error_check_good env1_close [$env1 close] 0 + error_check_good env2_close [$env2 close] 0 +} + diff --git a/test/tcl/txn014.tcl b/test/tcl/txn014.tcl new file mode 100644 index 00000000..8f95e5ad --- /dev/null +++ b/test/tcl/txn014.tcl @@ -0,0 +1,158 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2005, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# TEST txn014 +# TEST Test of parent and child txns working on the same database. +# TEST A txn that will become a parent create a database. +# TEST A txn that will not become a parent creates another database. +# TEST Start a child txn of the 1st txn. +# TEST Verify that the parent txn is disabled while child is open. +# TEST 1. Child reads contents with child handle (should succeed). +# TEST 2. Child reads contents with parent handle (should succeed). +# TEST Verify that the non-parent txn can read from its database, +# TEST and that the child txn cannot. +# TEST Return to the child txn. +# TEST 3. Child writes with child handle (should succeed). +# TEST 4. Child writes with parent handle (should succeed). +# TEST +# TEST Commit the child, verify that the parent can write again. +# TEST Check contents of database with a second child. +proc txn014 { } { + source ./include.tcl + global default_pagesize + + set page_size $default_pagesize + # If the page size is very small, we increase page size, + # so we won't run out of lockers. + if { $page_size < 2048 } { + set page_size 2048 + } + set tnum "014" + puts "Txn$tnum: Test use of parent and child txns." + set parentfile test$tnum.db + set nonparentfile test$tnum.db.2 + set method "-btree" + + # Use 5000 entries so there will be new items on the wordlist + # when we double nentries in part h. + set nentries 5000 + + env_cleanup $testdir + + puts "\tTxn$tnum.a: Create environment." + set eflags "-create -mode 0644 -txn -home $testdir" + set env [eval {berkdb_env_noerr} $eflags] + error_check_good env [is_valid_env $env] TRUE + + # Open a database with parent txn and populate. We populate + # before starting up the child txn, because the only allowed + # Berkeley DB calls for a parent txn are beginning child txns, + # committing, or aborting. + + puts "\tTxn$tnum.b: Start parent txn and open database." + set parent [$env txn] + error_check_good parent_begin [is_valid_txn $parent $env] TRUE + set db [berkdb_open_noerr -pagesize $page_size \ + -env $env -txn $parent -create $method $parentfile] + populate $db $method $parent $nentries 0 0 + + puts "\tTxn$tnum.c: Start non-parent txn and open database." + set nonparent [$env txn] + error_check_good nonparent_begin [is_valid_txn $nonparent $env] TRUE + set db2 [berkdb_open_noerr -pagesize $page_size \ + -env $env -txn $nonparent -create $method $nonparentfile] + populate $db2 $method $nonparent $nentries 0 0 + + # Start child txn and open database. Parent txn is not yet + # committed, but the child should be able to read what's there. + # The child txn should also be able to use the parent txn. + + puts "\tTxn$tnum.d: Start child txn." + set child [$env txn -parent $parent] + + puts "\tTxn$tnum.e: Verify parent is disabled." + catch {$db put -txn $parent a a} ret + error_check_good \ + parent_disabled [is_substr $ret "Child transaction is active"] 1 + + puts "\tTxn$tnum.f: Get a handle on parent's database using child txn." + set childdb [berkdb_open_noerr -pagesize $page_size \ + -env $env -txn $child $method $parentfile] + + puts "\tTxn$tnum.g: Read database with child txn/child handle," + puts "\tTxn$tnum.g: and with child txn/parent handle." + set did [open $dict] + set count 0 + while { [gets $did str] != -1 && $count < $nentries } { + set key $str + + # First use child's handle. + set ret [$childdb get -txn $child $key] + error_check_good \ + get $ret [list [list $key [pad_data $method $str]]] + + # Have the child use the parent's handle. + set ret [$db get -txn $child $key] + error_check_good \ + get $ret [list [list $key [pad_data $method $str]]] + incr count + } + close $did + + # Read the last key from the non-parent database, then try + # to read the same key using the child txn. It will fail. + puts "\tTxn$tnum.h: Child cannot read data from non-parent." + set ret [$db2 get -txn $nonparent $key] + + # Check the return against $key, because $str has gone on to + # the next item in the wordlist. + error_check_good \ + np_get $ret [list [list $key [pad_data $method $key]]] + catch {$db2 get -txn $child $key} ret + error_check_good \ + child_np_get [is_substr $ret "is still active"] 1 + + # The child should also be able to update the database, using + # either handle. + puts "\tTxn$tnum.i: Write to database with child txn & child handle." + populate $childdb $method $child $nentries 0 0 + puts "\tTxn$tnum.j: Write to database with child txn & parent handle." + populate $db $method $child $nentries 0 0 + + puts "\tTxn$tnum.k: Commit child, freeing parent." + error_check_good child_commit [$child commit] 0 + error_check_good childdb_close [$childdb close] 0 + + puts "\tTxn$tnum.l: Add more entries to db using parent txn." + set nentries [expr $nentries * 2] + populate $db $method $parent $nentries 0 0 + + puts "\tTxn$tnum.m: Start new child txn and read database." + set child2 [$env txn -parent $parent] + set child2db [berkdb_open_noerr -pagesize $page_size \ + -env $env -txn $child2 $method $parentfile] + + set did [open $dict] + set count 0 + while { [gets $did str] != -1 && $count < $nentries } { + set key $str + set ret [$child2db get -txn $child2 $key] + error_check_good \ + get $ret [list [list $key [pad_data $method $str]]] 1 + incr count + } + close $did + + puts "\tTxn$tnum.n: Clean up." + error_check_good child2_commit [$child2 commit] 0 + error_check_good nonparent_commit [$nonparent commit] 0 + error_check_good parent_commit [$parent commit] 0 + error_check_good db_close [$db close] 0 + error_check_good db2_close [$db2 close] 0 + error_check_good childdb_close [$child2db close] 0 + error_check_good env_close [$env close] 0 +} + diff --git a/test/tcl/txnscript.tcl b/test/tcl/txnscript.tcl new file mode 100644 index 00000000..795b1b4b --- /dev/null +++ b/test/tcl/txnscript.tcl @@ -0,0 +1,66 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Txn003 script - outstanding child prepare script +# Usage: txnscript envcmd dbcmd gidf key data +# envcmd: command to open env +# dbfile: name of database file +# gidf: name of global id file +# key: key to use +# data: new data to use + +source ./include.tcl +source $test_path/test.tcl +source $test_path/testutils.tcl + +set usage "txnscript envcmd dbfile gidfile key data" + +# Verify usage +if { $argc != 5 } { + puts stderr "FAIL:[timestamp] Usage: $usage" + exit +} + +# Initialize arguments +set envcmd [ lindex $argv 0 ] +set dbfile [ lindex $argv 1 ] +set gidfile [ lindex $argv 2 ] +set key [ lindex $argv 3 ] +set data [ lindex $argv 4 ] + +set dbenv [eval $envcmd] +error_check_good envopen [is_valid_env $dbenv] TRUE + +set usedb 1 +set db [berkdb_open -auto_commit -env $dbenv $dbfile] +error_check_good dbopen [is_valid_db $db] TRUE + +puts "\tTxnscript.a: begin parent and child txn" +set parent [$dbenv txn] +error_check_good parent [is_valid_txn $parent $dbenv] TRUE +set child [$dbenv txn -parent $parent] +error_check_good parent [is_valid_txn $child $dbenv] TRUE + +puts "\tTxnscript.b: Modify data" +error_check_good db_put [$db put -txn $child $key $data] 0 + +set gfd [open $gidfile w+] +set gid [make_gid txnscript:$parent] +puts $gfd $gid +puts "\tTxnscript.c: Prepare parent only" +error_check_good txn_prepare:$parent [$parent prepare $gid] 0 +close $gfd + +puts "\tTxnscript.d: Check child handle" +set stat [catch {$child abort} ret] +error_check_good child_handle $stat 1 +error_check_good child_h2 [is_substr $ret "invalid command name"] 1 + +# +# We do not close the db or env, but exit with the txns outstanding. +# +puts "\tTxnscript completed successfully" +flush stdout diff --git a/test/tcl/update.tcl b/test/tcl/update.tcl new file mode 100644 index 00000000..201068d3 --- /dev/null +++ b/test/tcl/update.tcl @@ -0,0 +1,92 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ + +source ./include.tcl +global update_dir +set update_dir "$test_path/update_test" + +proc update { } { + source ./include.tcl + global update_dir + + foreach version [glob $update_dir/*] { + regexp \[^\/\]*$ $version version + foreach method [glob $update_dir/$version/*] { + regexp \[^\/\]*$ $method method + foreach file [glob $update_dir/$version/$method/*] { + regexp (\[^\/\]*)\.tar\.gz$ $file dummy name + foreach endianness {"le" "be"} { + puts "Update:\ + $version $method $name $endianness" + set ret [catch {_update $update_dir $testdir $version $method $name $endianness 1 1} message] + if { $ret != 0 } { + puts $message + } + } + } + } + } +} + +proc _update { source_dir temp_dir \ + version method file endianness do_db_load_test do_update_test } { + source include.tcl + global errorInfo + + cleanup $temp_dir NULL + + exec sh -c \ +"gzcat $source_dir/$version/$method/$file.tar.gz | (cd $temp_dir && tar xf -)" + + if { $do_db_load_test } { + set ret [catch \ + {exec $util_path/db_load -f "$temp_dir/$file.dump" \ + "$temp_dir/update.db"} message] + error_check_good \ + "Update load: $version $method $file $message" $ret 0 + + set ret [catch \ + {exec $util_path/db_dump -f "$temp_dir/update.dump" \ + "$temp_dir/update.db"} message] + error_check_good \ + "Update dump: $version $method $file $message" $ret 0 + + error_check_good "Update diff.1.1: $version $method $file" \ + [filecmp "$temp_dir/$file.dump" "$temp_dir/update.dump"] 0 + error_check_good \ + "Update diff.1.2: $version $method $file" $ret "" + } + + if { $do_update_test } { + set ret [catch \ + {berkdb open -update "$temp_dir/$file-$endianness.db"} db] + if { $ret == 1 } { + if { ![is_substr $errorInfo "version upgrade"] } { + set fnl [string first "\n" $errorInfo] + set theError \ + [string range $errorInfo 0 [expr $fnl - 1]] + error $theError + } + } else { + error_check_good dbopen [is_valid_db $db] TRUE + error_check_good dbclose [$db close] 0 + + set ret [catch \ + {exec $util_path/db_dump -f \ + "$temp_dir/update.dump" \ + "$temp_dir/$file-$endianness.db"} message] + error_check_good "Update\ + dump: $version $method $file $message" $ret 0 + + error_check_good \ + "Update diff.2: $version $method $file" \ + [filecmp "$temp_dir/$file.dump" \ + "$temp_dir/update.dump"] 0 + error_check_good \ + "Update diff.2: $version $method $file" $ret "" + } + } +} diff --git a/test/tcl/upgrade.tcl b/test/tcl/upgrade.tcl new file mode 100644 index 00000000..c470e922 --- /dev/null +++ b/test/tcl/upgrade.tcl @@ -0,0 +1,856 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ + +source ./include.tcl + +global upgrade_dir +# set upgrade_dir "$test_path/upgrade_test" +set upgrade_dir "$test_path/upgrade/databases" + +global gen_upgrade +set gen_upgrade 0 +global gen_dump +set gen_dump 0 +global gen_chksum +set gen_chksum 0 +global gen_upgrade_log +set gen_upgrade_log 0 + +global upgrade_dir +global upgrade_be +global upgrade_method +global upgrade_name + +proc upgrade { { archived_test_loc "DEFAULT" } } { + source ./include.tcl + global test_names + global upgrade_dir + global tcl_platform + global saved_logvers + + set saved_upgrade_dir $upgrade_dir + + # Identify endianness of the machine running upgrade. + if { [big_endian] == 1 } { + set myendianness be + } else { + set myendianness le + } + set e $tcl_platform(byteOrder) + + if { [file exists $archived_test_loc/logversion] == 1 } { + set fd [open $archived_test_loc/logversion r] + set saved_logvers [read $fd] + close $fd + } else { + puts "Old log version number must be available \ + in $archived_test_loc/logversion" + return + } + + fileremove -f UPGRADE.OUT + set o [open UPGRADE.OUT a] + + puts -nonewline $o "Upgrade test started at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + puts $o [berkdb version -string] + puts $o "Testing $e files" + + puts -nonewline "Upgrade test started at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + puts [berkdb version -string] + puts "Testing $e files" + + if { $archived_test_loc == "DEFAULT" } { + puts $o "Using default archived databases in $upgrade_dir." + puts "Using default archived databases in $upgrade_dir." + } else { + set upgrade_dir $archived_test_loc + puts $o "Using archived databases in $upgrade_dir." + puts "Using archived databases in $upgrade_dir." + } + close $o + + foreach version [glob $upgrade_dir/*] { + if { [string first CVS $version] != -1 } { continue } + regexp \[^\/\]*$ $version version + + # Test only files where the endianness of the db matches + # the endianness of the test platform. These are the + # meaningful tests: + # 1. File generated on le, tested on le + # 2. File generated on be, tested on be + # 3. Byte-swapped file generated on le, tested on be + # 4. Byte-swapped file generated on be, tested on le + # + set dbendianness [string range $version end-1 end] + if { [string compare $myendianness $dbendianness] != 0 } { + puts "Skipping test of $version \ + on $myendianness platform." + } else { + set release [string trim $version -lbe] + set o [open UPGRADE.OUT a] + puts $o "Files created on release $release" + close $o + puts "Files created on release $release" + + foreach method [glob $upgrade_dir/$version/*] { + regexp \[^\/\]*$ $method method + set o [open UPGRADE.OUT a] + puts $o "\nTesting $method files" + close $o + puts "\tTesting $method files" + + foreach file [lsort -dictionary \ + [glob -nocomplain \ + $upgrade_dir/$version/$method/*]] { + regexp (\[^\/\]*)\.tar\.gz$ \ + $file dummy name + + cleanup $testdir NULL 1 + set curdir [pwd] + cd $testdir + set tarfd [open "|tar xf -" w] + cd $curdir + + catch {exec gunzip -c \ + "$upgrade_dir/$version/$method/$name.tar.gz" \ + >@$tarfd} + close $tarfd + + set f [open $testdir/$name.tcldump \ + {RDWR CREAT}] + close $f + + # We exec a separate tclsh for each + # separate subtest to keep the + # testing process from consuming a + # tremendous amount of memory. + # + # First we test the .db files. + if { [file exists \ + $testdir/$name-$myendianness.db] } { + if { [catch {exec $tclsh_path \ + << "source \ + $test_path/test.tcl;\ + _upgrade_test $testdir \ + $version $method $name \ + $myendianness" >>& \ + UPGRADE.OUT } message] } { + set o [open \ + UPGRADE.OUT a] + puts $o "FAIL: $message" + close $o + } + if { [catch {exec $tclsh_path\ + << "source \ + $test_path/test.tcl;\ + _db_load_test $testdir \ + $version $method $name" >>&\ + UPGRADE.OUT } message] } { + set o [open \ + UPGRADE.OUT a] + puts $o "FAIL: $message" + close $o + } + } + # Then we test log files. + if { [file exists \ + $testdir/$name.prlog] } { + if { [catch {exec $tclsh_path \ + << "source \ + $test_path/test.tcl;\ + global saved_logvers;\ + set saved_logvers \ + $saved_logvers;\ + _log_test $testdir \ + $release $method \ + $name" >>& \ + UPGRADE.OUT } message] } { + set o [open \ + UPGRADE.OUT a] + puts $o "FAIL: $message" + close $o + } + } + + # Then we test any .dmp files. Move + # the saved file to the current working + # directory. Run the test locally. + # Compare the dumps; they should match. + if { [file exists $testdir/$name.dmp] } { + file rename -force \ + $testdir/$name.dmp $name.dmp + + foreach test $test_names(plat) { + eval $test $method + } + + # Discard lines that can differ. + discardline $name.dmp \ + TEMPFILE "db_pagesize=" + file copy -force \ + TEMPFILE $name.dmp + discardline $testdir/$test.dmp \ + TEMPFILE "db_pagesize=" + file copy -force \ + TEMPFILE $testdir/$test.dmp + + error_check_good compare_dump \ + [filecmp $name.dmp \ + $testdir/$test.dmp] 0 + + fileremove $name.dmp + } + } + } + } + } + set upgrade_dir $saved_upgrade_dir + + set o [open UPGRADE.OUT a] + puts -nonewline $o "Completed at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + close $o + + puts -nonewline "Completed at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + + # Don't provide a return value. + return +} + +proc _upgrade_test { temp_dir version method file endianness } { + source include.tcl + global errorInfo + global passwd + global encrypt + + puts "Upgrade: $version $method $file $endianness" + + # Check whether we're working with an encrypted file. + if { [string match c-* $file] } { + set encrypt 1 + } + + # Open the database prior to upgrading. If it fails, + # it should fail with the DB_OLDVERSION message. + set encargs "" + set upgradeargs "" + if { $encrypt == 1 } { + set encargs " -encryptany $passwd " + set upgradeargs " -P $passwd " + } + if { [catch \ + { set db [eval {berkdb open} $encargs \ + $temp_dir/$file-$endianness.db] } res] } { + error_check_good old_version [is_substr $res DB_OLDVERSION] 1 + } else { + error_check_good db_close [$db close] 0 + } + + # Now upgrade the database. + set ret [catch {eval exec {$util_path/db_upgrade} $upgradeargs \ + "$temp_dir/$file-$endianness.db" } message] + error_check_good dbupgrade $ret 0 + + error_check_good dbupgrade_verify [verify_dir $temp_dir "" 0 0 1] 0 + + upgrade_dump "$temp_dir/$file-$endianness.db" "$temp_dir/temp.dump" + + error_check_good "Upgrade diff.$endianness: $version $method $file" \ + [filecmp "$temp_dir/$file.tcldump" "$temp_dir/temp.dump"] 0 +} + +proc _db_load_test { temp_dir version method file } { + source include.tcl + global errorInfo + + puts "Db_load: $version $method $file" + + set ret [catch \ + {exec $util_path/db_load -f "$temp_dir/$file.dump" \ + "$temp_dir/upgrade.db"} message] + error_check_good \ + "Upgrade load: $version $method $file $message" $ret 0 + + upgrade_dump "$temp_dir/upgrade.db" "$temp_dir/temp.dump" + + error_check_good "Upgrade diff.1.1: $version $method $file" \ + [filecmp "$temp_dir/$file.tcldump" "$temp_dir/temp.dump"] 0 +} + +proc _log_test { temp_dir release method file } { + source ./include.tcl + global saved_logvers + global passwd + puts "Check log file: $temp_dir $release $method $file" + + # Get log version number of current system + set env [berkdb_env -create -log -home $testdir] + error_check_good is_valid_env [is_valid_env $env] TRUE + set current_logvers [get_log_vers $env] + error_check_good env_close [$env close] 0 + error_check_good env_remove [berkdb envremove -home $testdir] 0 + + # Rename recd001-x-log.000000000n to log.000000000n. + set logfiles [glob -nocomplain $temp_dir/*log.0*] + foreach logfile $logfiles { + set logname [string replace $logfile 0 \ + [string last - $logfile]] + file rename -force $logfile $temp_dir/$logname + } + + # Use db_printlog to dump the logs. If the current log file + # version is greater than the saved log file version, the log + # files are expected to be unreadable. If the log file is + # readable, check that the current printlog dump matches the + # archived printlog. + # + set ret [catch {exec $util_path/db_printlog -h $temp_dir \ + > $temp_dir/logs.prlog} message] + if { [is_substr $message "magic number"] } { + # The failure is probably due to encryption, try + # crypto printlog. + set ret [catch {exec $util_path/db_printlog -h $temp_dir \ + -P $passwd > $temp_dir/logs.prlog} message] + if { $ret == 1 } { + # If the failure is because of a historic + # log version, that's okay. + if { $current_logvers <= $saved_logvers } { + puts "db_printlog failed: $message" + } + } + } + + # Log versions prior to 8 can only be read by their own version. + # Log versions of 8 or greater are readable by Berkeley DB 4.5 + # or greater, but the output of printlog does not match unless + # the versions are identical. + # + # As of Berkeley 4.5 (log version 12), versions can read each + # other's log records, for purposes of mixed-version replication. + # 4.5 or newer can still run a db_printlog on log versions 8 + # through 11, but may encounter unrecognized log record types. + # + set logoldver 8 + if { $current_logvers > $saved_logvers &&\ + $saved_logvers < $logoldver } { + error_check_good historic_log_version \ + [is_substr $message "historic log version"] 1 + } elseif { $current_logvers > $saved_logvers } { + error_check_good db_printlog:$message $ret 0 + } elseif { $current_logvers == $saved_logvers } { + error_check_good db_printlog:$message $ret 0 + # Compare logs.prlog and $file.prlog (should match) + error_check_good "Compare printlogs" [filecmp \ + "$temp_dir/logs.prlog" "$temp_dir/$file.prlog"] 0 + } elseif { $current_logvers < $saved_logvers } { + puts -nonewline "FAIL: current log version $current_logvers " + puts "cannot be less than saved log version $save_logvers." + } +} + +proc gen_upgrade { dir { save_crypto 1 } { save_non_crypto 1 } } { + global gen_upgrade + global gen_upgrade_log + global gen_chksum + global gen_dump + global upgrade_dir + global upgrade_be + global upgrade_method + global upgrade_name + global valid_methods + global test_names + global parms + global encrypt + global passwd + source ./include.tcl + + set upgrade_dir $dir + env_cleanup $testdir + + fileremove -f GENERATE.OUT + set o [open GENERATE.OUT a] + + puts -nonewline $o "Generating upgrade files. Started at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + puts $o [berkdb version -string] + + puts -nonewline "Generating upgrade files. Started at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + puts [berkdb version -string] + + close $o + + # Create a file that contains the log version number. + # If necessary, create the directory to contain the file. + set env [berkdb_env -create -log -home $testdir] + error_check_good is_valid_env [is_valid_env $env] TRUE + + if { [file exists $dir] == 0 } { + file mkdir $dir + } + set lv [open $dir/logversion w] + puts $lv [get_log_vers $env] + close $lv + + error_check_good env_close [$env close] 0 + + # Generate test databases for each access method and endianness. + foreach method $valid_methods { + set o [open GENERATE.OUT a] + puts $o "\nGenerating $method files" + close $o + puts "\tGenerating $method files" + set upgrade_method $method + + # We piggyback testing of dumped sequence files on upgrade + # testing because this is the only place that we ship files + # from one machine to another. Create files for both + # endiannesses, because who knows what platform we'll + # be testing on. + + set gen_dump 1 + foreach test $test_names(plat) { + set upgrade_name $test + foreach upgrade_be { 0 1 } { + eval $test $method + cleanup $testdir NULL + } + } + set gen_dump 0 + +#set test_names(test) "" + set gen_upgrade 1 + foreach test $test_names(test) { + if { [info exists parms($test)] != 1 } { + continue + } + + set o [open GENERATE.OUT a] + puts $o "\t\tGenerating files for $test" + close $o + puts "\t\tGenerating files for $test" + + if { $save_non_crypto == 1 } { + set encrypt 0 + foreach upgrade_be { 0 1 } { + set upgrade_name $test + if [catch {exec $tclsh_path \ + << "source $test_path/test.tcl;\ + global gen_upgrade upgrade_be;\ + global upgrade_method upgrade_name;\ + global encrypt;\ + set encrypt $encrypt;\ + set gen_upgrade 1;\ + set upgrade_be $upgrade_be;\ + set upgrade_method $upgrade_method;\ + set upgrade_name $upgrade_name;\ + run_method -$method $test" \ + >>& GENERATE.OUT} res] { + puts "FAIL: run_method \ + $test $method" + } + cleanup $testdir NULL 1 + } + # Save checksummed files for only one test. + # Checksumming should work in all or no cases. + set gen_chksum 1 + foreach upgrade_be { 0 1 } { + set upgrade_name $test + if { $test == "test001" } { + if { [catch {exec $tclsh_path \ + << "source $test_path/test.tcl;\ + global gen_upgrade;\ + global upgrade_be;\ + global upgrade_method;\ + global upgrade_name;\ + global encrypt gen_chksum;\ + set encrypt $encrypt;\ + set gen_upgrade 1;\ + set gen_chksum 1;\ + set upgrade_be $upgrade_be;\ + set upgrade_method \ + $upgrade_method;\ + set upgrade_name \ + $upgrade_name;\ + run_method -$method $test \ + 0 1 stdout -chksum" \ + >>& GENERATE.OUT} res] } { + puts "FAIL: run_method \ + $test $method \ + -chksum: $res" + } + cleanup $testdir NULL 1 + } + } + set gen_chksum 0 + } + # Save encrypted db's only of native endianness. + # Encrypted files are not portable across endianness. + if { $save_crypto == 1 } { + set upgrade_be [big_endian] + set encrypt 1 + set upgrade_name $test + if [catch {exec $tclsh_path \ + << "source $test_path/test.tcl;\ + global gen_upgrade upgrade_be;\ + global upgrade_method upgrade_name;\ + global encrypt passwd;\ + set encrypt $encrypt;\ + set passwd $passwd;\ + set gen_upgrade 1;\ + set upgrade_be $upgrade_be;\ + set upgrade_method $upgrade_method;\ + set upgrade_name $upgrade_name;\ + run_secmethod $method $test" \ + >>& GENERATE.OUT} res] { + puts "FAIL: run_secmethod \ + $test $method" + } + cleanup $testdir NULL 1 + } + } + set gen_upgrade 0 + } + + # Set upgrade_be to the native value so log files go to the + # right place. + set upgrade_be [big_endian] + + # Generate log files. + set o [open GENERATE.OUT a] + puts $o "\tGenerating log files" + close $o + puts "\tGenerating log files" + + set gen_upgrade_log 1 + # Pass the global variables and their values to the new tclsh. + if { $save_non_crypto == 1 } { + set encrypt 0 + if [catch {exec $tclsh_path << "source $test_path/test.tcl;\ + global gen_upgrade_log upgrade_be upgrade_dir;\ + global encrypt;\ + set encrypt $encrypt;\ + set gen_upgrade_log $gen_upgrade_log; \ + set upgrade_be $upgrade_be;\ + set upgrade_dir $upgrade_dir;\ + run_recds" >>& GENERATE.OUT} res] { + puts "FAIL: run_recds: $res" + } + } + if { $save_crypto == 1 } { + set encrypt 1 + if [catch {exec $tclsh_path << "source $test_path/test.tcl;\ + global gen_upgrade_log upgrade_be upgrade_dir;\ + global encrypt;\ + set encrypt $encrypt;\ + set gen_upgrade_log $gen_upgrade_log; \ + set upgrade_be $upgrade_be;\ + set upgrade_dir $upgrade_dir;\ + run_recds " >>& GENERATE.OUT} res] { + puts "FAIL: run_recds with crypto: $res" + } + } + set gen_upgrade_log 0 + + set o [open GENERATE.OUT a] + puts -nonewline $o "Completed at: " + puts $o [clock format [clock seconds] -format "%H:%M %D"] + puts -nonewline "Completed at: " + puts [clock format [clock seconds] -format "%H:%M %D"] + close $o +} + +proc save_upgrade_files { dir } { + global upgrade_dir + global upgrade_be + global upgrade_method + global upgrade_name + global gen_upgrade + global gen_upgrade_log + global gen_dump + global encrypt + global gen_chksum + global passwd + source ./include.tcl + + set vers [berkdb version] + set maj [lindex $vers 0] + set min [lindex $vers 1] + + # Is this machine big or little endian? We want to mark + # the test directories appropriately, since testing + # little-endian databases generated by a big-endian machine, + # and/or vice versa, is interesting. + if { [big_endian] } { + set myendianness be + } else { + set myendianness le + } + + if { $upgrade_be == 1 } { + set version_dir "$myendianness-$maj.${min}be" + set en be + } else { + set version_dir "$myendianness-$maj.${min}le" + set en le + } + + set dest $upgrade_dir/$version_dir/$upgrade_method + exec mkdir -p $dest + + if { $gen_upgrade == 1 } { + # Save db files from test001 - testxxx. + set dbfiles [glob -nocomplain $dir/*.db] + set dumpflag "" + # Encrypted files are identified by the prefix "c-". + if { $encrypt == 1 } { + set upgrade_name c-$upgrade_name + set dumpflag " -P $passwd " + } + # Checksummed files are identified by the prefix "s-". + if { $gen_chksum == 1 } { + set upgrade_name s-$upgrade_name + } + foreach dbfile $dbfiles { + set basename [string range $dbfile \ + [expr [string length $dir] + 1] end-3] + + set newbasename $upgrade_name-$basename + + # db_dump file + if { [catch {eval exec $util_path/db_dump -k $dumpflag \ + $dbfile > $dir/$newbasename.dump} res] } { + puts "FAIL: $res" + } + + # tcl_dump file + upgrade_dump $dbfile $dir/$newbasename.tcldump + + # Rename dbfile and any dbq files. + file rename $dbfile $dir/$newbasename-$en.db + foreach dbq \ + [glob -nocomplain $dir/__dbq.$basename.db.*] { + set s [string length $dir/__dbq.] + set newname [string replace $dbq $s \ + [expr [string length $basename] + $s - 1] \ + $newbasename-$en] + file rename $dbq $newname + } + set cwd [pwd] + cd $dir + catch {eval exec tar -cvf $dest/$newbasename.tar \ + [glob $newbasename* __dbq.$newbasename-$en.db.*]} + catch {exec gzip -9v $dest/$newbasename.tar} res + cd $cwd + } + } + + if { $gen_upgrade_log == 1 } { + # Save log files from recd tests. + set logfiles [glob -nocomplain $dir/log.*] + if { [llength $logfiles] > 0 } { + # More than one log.0000000001 file may be produced + # per recd test, so we generate unique names: + # recd001-0-log.0000000001, recd001-1-log.0000000001, + # and so on. + # We may also have log.0000000001, log.0000000002, + # and so on, and they will all be dumped together + # by db_printlog. + set count 0 + while { [file exists \ + $dest/$upgrade_name-$count-log.tar.gz] \ + == 1 } { + incr count + } + set newname $upgrade_name-$count-log + + # Run db_printlog on all the log files + if {[catch {exec $util_path/db_printlog -h $dir > \ + $dir/$newname.prlog} res] != 0} { + puts "Regular printlog failed, try encryption" + eval {exec $util_path/db_printlog} -h $dir \ + -P $passwd > $dir/$newname.prlog + } + + # Rename each log file so we can identify which + # recd test created it. + foreach logfile $logfiles { + set lognum [string range $logfile \ + end-9 end] + file rename $logfile $dir/$newname.$lognum + } + + set cwd [pwd] + cd $dir + + catch {eval exec tar -cvf $dest/$newname.tar \ + [glob $newname*]} + catch {exec gzip -9v $dest/$newname.tar} + cd $cwd + } + } + + if { $gen_dump == 1 } { + # Save dump files. We require that the files have + # been created with the extension .dmp. + set dumpfiles [glob -nocomplain $dir/*.dmp] + + foreach dumpfile $dumpfiles { + set basename [string range $dumpfile \ + [expr [string length $dir] + 1] end-4] + + set newbasename $upgrade_name-$basename + + # Rename dumpfile. + file rename $dumpfile $dir/$newbasename.dmp + + set cwd [pwd] + cd $dir + catch {eval exec tar -cvf $dest/$newbasename.tar \ + [glob $newbasename.dmp]} + catch {exec gzip -9v $dest/$newbasename.tar} res + cd $cwd + } + } +} + +proc upgrade_dump { database file {stripnulls 0} } { + global errorInfo + global encrypt + global passwd + + set encargs "" + if { $encrypt == 1 } { + set encargs " -encryptany $passwd " + } + set db [eval {berkdb open} -rdonly $encargs $database] + set dbc [$db cursor] + + set f [open $file w+] + fconfigure $f -encoding binary -translation binary + + # + # Get a sorted list of keys + # + set key_list "" + set pair [$dbc get -first] + + while { 1 } { + if { [llength $pair] == 0 } { + break + } + set k [lindex [lindex $pair 0] 0] + lappend key_list $k + set pair [$dbc get -next] + } + + # Discard duplicated keys; we now have a key for each + # duplicate, not each unique key, and we don't want to get each + # duplicate multiple times when we iterate over key_list. + set uniq_keys "" + foreach key $key_list { + if { [info exists existence_list($key)] == 0 } { + lappend uniq_keys $key + } + set existence_list($key) 1 + } + set key_list $uniq_keys + + set key_list [lsort -command _comp $key_list] + + # + # Get the data for each key + # + set i 0 + foreach key $key_list { + set pair [$dbc get -set $key] + if { $stripnulls != 0 } { + # the Tcl interface to db versions before 3.X + # added nulls at the end of all keys and data, so + # we provide functionality to strip that out. + set key [strip_null $key] + } + set data_list {} + catch { while { [llength $pair] != 0 } { + set data [lindex [lindex $pair 0] 1] + if { $stripnulls != 0 } { + set data [strip_null $data] + } + lappend data_list [list $data] + set pair [$dbc get -nextdup] + } } + #lsort -command _comp data_list + set data_list [lsort -command _comp $data_list] + puts -nonewline $f [binary format i [string length $key]] + puts -nonewline $f $key + puts -nonewline $f [binary format i [llength $data_list]] + for { set j 0 } { $j < [llength $data_list] } { incr j } { + puts -nonewline $f [binary format i [string length \ + [concat [lindex $data_list $j]]]] + puts -nonewline $f [concat [lindex $data_list $j]] + } + if { [llength $data_list] == 0 } { + puts "WARNING: zero-length data list" + } + incr i + } + + close $f + error_check_good upgrade_dump_c_close [$dbc close] 0 + error_check_good upgrade_dump_db_close [$db close] 0 +} + +proc _comp { a b } { + if { 0 } { + # XXX + set a [strip_null [concat $a]] + set b [strip_null [concat $b]] + #return [expr [concat $a] < [concat $b]] + } else { + set an [string first "\0" $a] + set bn [string first "\0" $b] + + if { $an != -1 } { + set a [string range $a 0 [expr $an - 1]] + } + if { $bn != -1 } { + set b [string range $b 0 [expr $bn - 1]] + } + } + #puts "$a $b" + return [string compare $a $b] +} + +proc strip_null { str } { + set len [string length $str] + set last [expr $len - 1] + + set termchar [string range $str $last $last] + if { [string compare $termchar \0] == 0 } { + set ret [string range $str 0 [expr $last - 1]] + } else { + set ret $str + } + + return $ret +} + +proc get_log_vers { env } { + set stat [$env log_stat] + foreach pair $stat { + set msg [lindex $pair 0] + set val [lindex $pair 1] + if { $msg == "Log file Version" } { + return $val + } + } + puts "FAIL: Log file Version not found in log_stat" + return 0 +} + diff --git a/test/tcl/wordlist b/test/tcl/wordlist new file mode 100644 index 00000000..cb23d0d4 --- /dev/null +++ b/test/tcl/wordlist @@ -0,0 +1,10001 @@ +addresser +cataract +colonially +atoned +avow +bathroom +adjutants +cooperate +benighted +apologist's +affidavits +baptisms +bubbling +classic +allaying +component +battlement +backtrack + +courage +bore +advertisement +attests +bunny's +airlifts +cajole +cataloging +airily +collected +abridged +compel +aftermath +barrow +approve +chillier +bequest +attendant +abjures +adjudication +banished +asymptotes +borrower +caustic +claim +cohabitation +corporacies +buoy +benchmark's +averting +anecdote's +caress +annihilate +cajoles +anywhere +apparitions +coves +bribed +casually +clue's +asserted +anaesthesia +columnated +bogs +astral +barbed +captives +acclaims +architects +abstained +attitude +accumulating +coalesced +angelic +agnostic +breathed +bother +congregating +amatory +caging +countryside +chapel +buttonhole +bartenders +bridging +bombardment +accurately +confirmed +alleviated +acquiring +bruise +antelope +albums +allusive +corker +cavity's +compliment +climb +caterpillar +almond +authenticated +balkan +assembly's +acidity +abases +bonny +been +abbots +abductor's +aerials +cancels +chalked +beeps +affirms +contrariness +clearest +appropriations +critiquing +affluence +bouts +abiding +comprises +brunches +biology +conceptualization's +assaying +abutter +adorable +beatable +appenders +aggressors +agrarian +bottleneck +angled +beholds +bereaved +creation +animated +candied +bar +aeronautics +cousin's +cleaver +alienation +billet +bungler +contention +businessman +braids +assert +boisterous +consolidate +breathing +ballot +averted +conscientiously +bellow +brazenness +coaches +bulldog +classify +checksum +almond's +cornered +caskets +capacitors +beefer +connoisseurs +consisted +adore +circumvented +colonels +addenda +boost +compatibility's +bumblebee +commonest +containment +active +absorption's +creaks +administer +beset +aborted +aforesaid +aridity +broken +azimuths +aerial +addition's +aggrieve +anthology +circuitous +checks +alley's +beam +boss +corrupting +absolutes +asteroid's +bandstands +beatitude's +analogue's +busts +confession +bedstead +affairs +blackmailers +collared +buckboard +assassin +accessor +adjudging +binders +constituent's +blister +aromas +approved +absorbent +barbarously +cat's +builder +brandish +assailing +constitute +christening +acutely +amount +blurry +blocks +advertise +chain +brigade's +confusion +beds +arrangers +colonizers +beautifying +bankruptcy +bedazzles +candidates +clearness +admonishment's +behind +abbreviations +basting +ballasts +amateurism +celled +constituted +calibrate +brambly +befuddles +azure +busiest +admiringly +appropriator +accumulator +cables +abhor +civil +botulinus +creaked +bismuth +astronomical +abscissas +bodice +aunt +cascades +cares +comradeship +assemblages +boater +bellmen +admission's +ambitious +baldness +abortive +controlled +chinked +coded +courtrooms +arteriolar +cooler's +cared +brewer +christians +barbecues +contacts +blackjack's +buzzing +blasters +accords +braziers +allegretto +catered +breveting +cleaning +amicably +bummed +consulted +allegro's +accumulator's +compartmented +condemned +concludes +bitwise +cheered +appropriator's +accessors +casting +carolina's +accompanying +budding +correspond +bach's +angel's +bearing +arresters +biweekly +character +badgering +cantankerous +avalanching +adjudges +barometer +append +continuations +burped +boxtop's +abstention +amp +axiomatized +bimonthlies +aghast +arresting +breakwater's +continuing +bridle +bobbin's +antagonistically +blindly +biochemical +biologically +antifundamentalist +confer +cloudiness +bonded +comfortingly +caption +blackmailed +bidders +breakpoint +brigadier +criminals +coyotes +casserole's +annex +cereals +breadboxes +belgian +conductivity +counterexample +anarchist +couches +atavistic +clipped +button +axiomatic +capping +correcting +chase +chastise +angle +burnished +beauteously +antipodes +crippling +crowns +amends +bah +brigadiers +alleged +correctives +bristles +buzzards +barbs +bagel +bonfire +bugled +advisee's +battled +budded +burners +causeway's +adaptation +caliber +browner +apprehensions +bonnet +anachronistically +composites +bothered +assurer +arc +chaser +bastards +calmed +bunches +apocalypse +countably +crowned +contrivance +boomerang's +airplane's +boarded +consumption +attuning +blamed +cooing +annihilation +abused +absence +coin +coronaries +applicatively +binomial +ablates +banishes +boating +companions +bilking +captivate +comment +claimants +admonish +ameliorated +bankruptcies +author +cheat +chocolates +botch +averring +beneath +crudely +creeping +acolytes +ass's +cheese's +checksum's +chillers +bracelet +archenemy +assistantship +baroque +butterfly +coolie's +anecdote +coring +cleansing +accreditation +ceaselessly +attitudes +bag +belong +assented +aped +constrains +balalaikas +consent +carpeting +conspiracy +allude +contradictory +adverb's +constitutive +arterial +admirable +begot +affectation +antiquate +attribution +competition's +bovine +commodores +alerters +abatements +corks +battlements +cave +buoys +credible +bowdlerizes +connector +amorphously +boredom +bashing +creams +arthropods +amalgamated +ballets +chafe +autograph +age +aid +colleague's +atrocious +carbonizing +chutes +barbecued +circuits +bandages +corporations +beehive +bandwagon +accommodated +councillor's +belted +airdrop +confrontations +chieftain's +canonicalization +amyl +abjectness +choke +consider +adjuster +crossover's +agreeing +consolations +capitalizers +binges +annihilating +callers +coordinate +banshees +biscuits +absorbency +corollary +corresponded +aristocrat's +banally +cruiser +bathtub's +abbreviated +balkiness +crew +acidulous +air +birdies +canvassing +concretion +collectively +chasteness +chapels +copiousness +benign +armies +competing +buss +awakened +breakpoint's +conceptualizing +cleansers +acorns +conveyance's +bluer +battle +budges +characteristically +be +contour +beguiling +awarding +armhole +airship's +bathtub +breathable +crowded +compiles +certain +brutalizing +bacteria +baronies +abode +blacksmith +brinkmanship +capitalizations +cousin +botany +avionic +companion +consists +connoisseur's +avalanched +claimant's +backstitches +affixes +bikes +atomically +cowed +asleep +becomingly +acorn's +complainers +appreciated +cross +cringed +booting +attitudinal +broadcasting +childishly +breeze's +craven +boll +clause's +burden +appendages +atemporal +allah +carnival's +anchorage +adjures +besought +abounding +crucifying +arrangements +antiquarians +burrows +antipode +canvas +constable's +coopers +ascended +companionship +bakery's +bayonets +conclusively +boasters +beneficiaries +conspicuous +contriver +architecture +breakthroughs +brownie's +blur +academics +antagonist +contemplates +arena +caravan's +administers +comprehensively +convey +bigot +blitz +bibliography's +coerced +assail +amazons +banned +alabaster +concluding +bouquet +barks +acquaintances +astonishment +constraint +backpack's +breakthroughes +blocking +accomplishers +catastrophe +bushels +algae +ailment's +anemometers +beginning's +chefs +converse +cornerstone +astound +assuring +adornment +anyone +alumni +club +bestselling +businessmen +constructed +attendee's +cooped +ablute +chronicler +alaska +clam +canonicals +concerned +aligned +creek +burrow +allay +admirals +blackens +compressing +confirm +cows +battleship's +belched +affixing +chalices +choirs +absentee's +baseboard's +apportionment +adheres +accounts +chef +access +clearings +accompanists +concentrating +ado +bathos +bailiff +continuance +ball +bearer +congress +cites +can't +balloon +crams +consults +bungled +bike's +apes +assassinations +colt's +consecrate +ancients +chick +analyst +adsorbing +burntly +accompanist's +apprehensive +bengal +boughs +ankles +anchored +benefits +accommodation +amiss +brink +chewers +blueberry's +chairs +adjoin +bivalve +autobiography's +automated +comparisons +climbed +artists +congruent +cold +atonement +cashier +armageddon +allocations +bereavements +bumblebees +blew +busboys +bottoming +alternations +apprenticed +bestial +cinder's +consumption's +abbey's +amended +continued +birefringent +barbados +ability's +compulsory +antler +centerpieces +accountant's +arrogant +ballads +ascenders +appliers +adjustment's +blabbed +baits +activity's +clod's +adjudicating +bleak +commutes +bumming +beating +cohesiveness +branded +acknowledger +communications +blockhouses +booklets +consenters +creek's +consulting +binary +coaster +ascription +bushwhack +boggles +affidavit's +arrangement's +congressionally +convenient +avoider +abaft +bootlegger's +befriending +ceases +carbonizes +clumps +commented +competence +conversing +butting +astonishing +armful +allegory's +crisis +critiques +concurred +conservative +aristotelian +blizzard's +corner +amateur's +compare +affiliations +bestseller +batch +cleanly +assayed +bravos +bowls +conceptualized +babe's +algorithm's +baptist +cheeks +conquerer +bidder's +behaving +briefcase's +analogues +amply +attitude's +apple +crossable +ambushed +besmirches +creditors +bandwagons +continentally +adjuncts +concerns +agers +cop +amoebas +bisected +bombing +appendices +cocking +bused +babied +blackjacks +controller's +aquarius +charm +clip +awarder +consistently +calibrated +bushwhacking +avaricious +ceaselessness +basically +accolades +adduction +commending +consulates +certifiable +admire +appropriateness +bandlimits +chill +adds +constable +chirping +cologne +cowardice +baklava +amusedly +blackberry +crises +bedeviling +botching +backbend +attaining +continuity +artistry +beginner +cleaner's +adores +commemorating +amusement +burial +bungalow's +abstinence +contractually +advancement's +conjecture +buckling +conferrer +bankers +cherub's +belonged +classifications +baseball +carbonation +craved +bans +aphid +arbor +ague +acropolis +applied +aspired +calibrating +abundance +appeased +chanted +ascent +convenes +beep +bottles +aborigines +clips +acquainting +aiming +creditor's +abolitionists +cloves +containments +bungling +bunt +anchors +brazed +communicator's +brew +accumulate +addicting +actively +befog +anachronisms +bumblers +closest +calculators +absurdity +colleagues +college +assesses +conflicted +associational +betide +conceptualization +adjutant +alliances +corresponding +barometers +cot +brooch's +coiled +arboreal +convicted +artless +certificates +bourbon +astonish +bust +correlate +amounts +anal +abstraction's +corns +conqueror's +boldly +bob's +beer +blanks +corpses +contingent +blackly +backed +appearances +cancers +actuating +apprehension's +colorings +anglicanism +armament +armer +bizarre +begotten +actions +archly +capriciously +clue +contractor +contributions +agendas +coached +blamable +annoyers +coupons +brooked +assortment +axes +celebrates +courageously +baroqueness +blasphemous +asserter +contents +correctly +challenged +bulldoze +casement +acknowledge +bitterness +belongs +allotments +chalice's +bequest's +adjacent +consumer's +conservatively +coalition +background's +backache +befouls +brushfire's +analysts +branch +airways +awaiting +breakfast +anoints +baying +contrary +bilge +chasm's +babes +afresh +centerpiece's +barked +coffin +assumed +actresses +accentuating +aching +abet +balancers +consumptively +cagers +backing +angiography +chord's +cheapened +bewailed +arson +begged +convergent +bowlers +conflicting +confiscated +bitch +bloody +brushfires +bleach +computation's +choppers +circuitously +chancing +bunker +concept's +alacrity +boyhood +ammo +bobwhites +carter +ardent +bier +airway's +brownies +aura +cannibalizing +confirms +australian +barrage +closures +assertive +abstainer +bicarbonate +clone +back +cipher +crown +cannibalizes +away +crafty +airings +amtrak +comical +burnish +continuum +apparition +apologizing +blot +blacker +characters +built +apparent +applicative +assiduous +attorneys +affectionately +bobbing +baggy +comic's +attempt +appealers +amortize +bonanza +backwards +bowers +anemometer +ambulance's +creeps +abduction's +coal +chiller +adjudications +clogging +ascending +bookkeeper +crawlers +battery's +artifacts +attributions +amusements +aftermost +allophones +bemoaned +comptroller +bugger's +buoyancy +booboo +award +amplifying +certify +bivariate +attunes +asteroidal +chant +compounds +asserts +believably +alert +apostate +catalysts +aureomycin +convex +beetle's +banishing +agitating +bystanders +bow +connotes +blanch +charmingly +animal's +baritones +brier +astronomer +company's +balding +actually +aunt's +avalanches +acquisition +base +compilations +bathtubs +actualization +chanced +atom +banged +befuddled +apologized +componentwise +britisher +began +conservationist +actuate +crosser +appended +bitten +ambivalence +acetate +conversions +buzzwords +askance +abolishing +birdied +creeds +anglers +colossal +bereft +chock +apprentice +cooper +besmirching +allocating +antiques +bikini's +bonders +afflictive +augmentation +atheist +bucket +bibliophile +annexes +beguiles +birdbaths +amendments +animators +asymptotically +communally +barber +biographers +arguable +confidant +apologies +adorns +contacting +coarsest +artichokes +arraign +absorbing +alden +commercially +cabbage's +coincides +clumping +cents +alleviater +buzzard +braked +anesthetized +bugling +capitalist +befriended +appreciatively +boomtown's +cozier +critic's +correspondent +bard +attenuator +bake +brings +chews +anechoic +brutal +colder +buckshot +canvassers +analytic +allies +alloys +awake +alienates +bin's +crimes +constructible +classifiers +bulb +cream +banquet +axiomatize +adjourn +converted +auditioned +comfortably +bandwidth +cannibalize +ascensions +bussing +balloons +contenders +commemoration +aspersions +consultation +cashes +belting +augurs +architectural +bluebird's +breastworks +absconded +bullets +bloodstain's +blunder +astronautics +coo +approves +authority +assure +amsterdam +acquitted +adversity +celebrate +bred +bridged +bloc's +bullied +affinity +breezes +baptistry's +constitutions +avouch +amazingly +consolation +abnormality +clashes +buttes +buzzard's +breathers +chipmunk +contented +carol's +armers +amazedly +comprehends +canonicalize +breakthrough +arbitrator +butterfat +cases +besiegers +affianced +amelia +bush +airplane +annulled +bike +alternated +attackers +crude +carelessness +akin +combated +assisting +clocker +attacked +briefed +antic's +attendants +attracting +cope +allotting +bandwidths +add +assaulting +breakage +climes +arrival's +burp +accelerator +capacitance +arabians +bankruptcy's +archeological +coins +browbeating +convene +aficionado +anachronism's +chasm +cardinalities +compartmentalize +courter +assess +abreaction +brakes +compatibly +compression +characterizable +briefing's +alto's +classifiable +contrast +correlation +colonial +applying +authorizers +contesters +basely +cherries +clicking +cornfield's +alarmingly +conferences +business's +banker +bloomed +airfield +attracts +building +commutative +atomization +competitions +boatsmen +acquirable +arkansas +command +beings +compactors +anodize +arguments +conforming +adsorption +accustomed +blends +bowstring's +blackout +appender +buggy +bricklaying +chart +calmer +cage +attractive +causation's +athenian +advise +cranks +containers +besotter +beret +attender +cone +bills +aligns +brushlike +brownest +bosom's +berth +accountably +bequeathed +affirmatively +boundless +alleyways +commute +bendable +abhors +calculation +affidavit +answerable +bellicose +counterfeiting +admiral's +chisel +bridesmaids +believers +aggregated +conspicuously +abased +armenian +conspirator +canonical +assignable +barrage's +clearance's +casts +administratively +befoul +chaffer +amazer +colorer +broaching +crevice +aniline +coursing +compassionate +adhesive +bibliographies +corrects +augments +between +causer +amorist +cellist's +acoustical +baseless +cigarettes +astuteness +appropriators +convincing +bellhop's +bemoaning +calmingly +chronologically +castles +algebraically +appointees +academic +blunderings +assassins +barrel +accuracy +amortized +ballpark +acrobat's +brazier's +abortively +coarser +airfields +contester +circus's +creased +amorphous +accomplisher +blabs +butchers +crackles +bachelor +aviators +chariot's +circumflex +binocular +alienating +artificially +agreement's +aglow +afghan +abrupt +annihilates +apologetic +barge +betters +algorithms +conjurer +chargeable +brindle +alphabetizes +coder +availing +bandpass +arrogance +convent's +advertiser +connected +basso +breakfaster +comic +congenial +beau +courters +adapters +abruptly +chemicals +bringed +creaming +butterer +attained +actuals +averred +brainwash +centerpiece +blabbermouth +byproduct's +adaptable +automata +art +cheery +beheld +beehive's +claimed +crucial +brokenness +agility +combating +cleft +amenity +after +configuration +contrasting +coarsely +brass +barnstormed +bowel +bridesmaid's +cornfield +crazing +autocracies +adult +conceptualizations +corroboration +bedders +arroyo +alarmist +boatman +chests +burglary +budgets +canary's +arraigning +chin +barnstorms +blamers +brimful +calculate +cellular +contended +challenges +brusque +bikinis +arithmetics +chairpersons +class +aircraft +capably +centralize +awhile +compacting +courteous +archaeologist's +cram +adagio +affronts +amplitude's +bureau's +audaciously +autism +blueberries +an +chips +confiner +chopper's +chronology +breaching +bead +amass +camouflage +compensation +aspect +broker +atrophy +balk +bloodless +barnyard +benefactor's +airdrops +caused +anthem +activist's +bottomless +arrogates +avoided +bouncy +clarified +articulate +almoner +communists +blokes +butternut +clockings +barium +blows +criticism's +associations +brute +bleeds +alliteration's +bluestocking +boxwood +clearer +allegiance +conceptualizes +captivating +bolshevik's +belabored +biographic +contaminates +chanticleer's +adjusted +childhood +arguing +cape +conversantly +compensating +collaborations +arraignment's +blasted +charging +aggregation +apprentices +bird +codifiers +ballistic +breve +bells +carolina +chalk +buckles +boyfriend's +adorn +accoutrements +availability +antisymmetry +blades +alluded +asterisks +bookcases +additive +consents +advanced +balalaika +coders +caliph +alundum +are +controllable +blazing +clattered +asiatic +axiomatizes +ace +coining +column +auditor's +carol +concatenated +arrayed +capital +cautioner +clan +beauteous +abbreviate +asteroids +canal's +consolidation +closets +concealer +crevices +abed +complex +conviction's +abide +arrests +begrudges +adolescent +conceals +cells +circles +bravest +compromiser +bagels +areas +afore +allergies +arrangement +attraction's +amulets +abstraction +captured +crouched +brothers +cash +achieving +bastard +compete +boiling +beaching +amphetamines +clerking +congestion +alleviates +angry +bared +comprehended +bloodstain +constituency's +automating +aerial's +counterfeit +besotted +basses +biofeedback +compilation's +band +consulate +appellant +cough +antennae +contend +anniversary +boor +artifactually +aerobics +booths +chubbiest +consumable +assignments +bromide's +confined +breakers +alongside +courtier +boisterously +bilaterally +alternation +auspiciously +arbitrated +condemning +burns +correspondents +composition +cavalierly +coverlets +capacities +clatter +apotheoses +cartography +ceased +capitalized +auditor +appendicitis +chops +barony +anemometry +befouled +briefer +chest +begetting +bloats +bookseller's +commitment +confides +carcass's +battering +altruistically +ballots +adornments +broaden +angularly +coefficient +cataloged +brae +advantage +anthems +calculated +counseling +agitate +accentuated +camel +ambivalent +bedposts +beacons +chubbier +cheerer +assumes +concord +autumns +convention's +alpha +adulterates +arbiters +archaically +criteria +achilles +cheaper +bulling +associators +bloater +brawler +ability +adherents +commonwealth +coyote's +centrally +bequeathing +abandonment +circumstantially +courteously +borrow +countermeasure's +capricious +allied +anagram's +absorptive +assuage +asset +booked +aspects +commits +crates +capacitive +condones +assimilates +carriage +competitor's +cocoons +aggravated +caravans +arbitrator's +baked +balanced +annihilated +addressable +autonomous +bandwagon's +contesting +burrowing +coroutines +abjection +correctable +applauded +bragged +code +aggressiveness +cluttered +attacking +chide +am +coasters +blizzard +contentment +altruism +certifier +capturing +combinators +carefree +activate +blindfolding +assassinating +approximate +biplane's +aplenty +arteriosclerosis +concentrates +antisymmetric +assurances +anarchist's +ascend +advancing +atrocities +butt's +bearable +craftiness +categorized +barn +contributor's +arises +bushy +bisque +coasted +bargaining +area's +couples +cabs +barter +bulletin +chisels +broadcasters +contingency +bywords +antimicrobial +coexisted +blinding +arithmetize +coweringly +convince +competed +bauble's +crab +boggling +advocacy +atlas +assembled +ancient +bloodstream +balking +bin +bully +affirm +cruelest +atone +conserved +confession's +bat +captive +aster +blames +colonel's +bones +borderline +cleanses +classified +crudest +contiguity +bailing +ablaze +bender +attendee +clobbers +aliasing +boats +brand +church +bandy +adhering +barred +ammunition +chime +accompaniment's +battleground's +composing +caveats +armor +amoeba +composure +collides +avowed +banding +counsels +asymmetric +abbreviates +balky +adjudicates +anointing +accursed +copse +action +construction's +accents +ambition's +caressing +autopilot +coolers +cache +allayed +barnyards +britons +appointment +adaptor +blockers +abridges +bloodiest +betrothal +bombards +bony +bus +canary +antinomy +awash +comrades +ablating +collectible +cosmetic +accession +clutters +censures +allusions +belittled +armchair +abode's +conception's +ascribe +aliases +ancestry +ax +companionable +aright +boxed +brighteners +alloy's +checkable +arraignments +bed +bunkhouses +abbeys +ceasing +companies +cherishing +chunk's +barony's +chinning +burdens +briskness +beggarly +beloved +clambered +constitutionality +beguiled +archers +alleyway +apostle's +consulate's +antiformant +categories +construct +aliments +acquired +blotted +alterations +adolescent's +cranes +bluntest +accusation +chafer +airstrips +abolished +bothersome +churchly +airy +bedded +awareness +alliterative +arose +amputates +civilization's +arenas +certifying +aspirators +carbon's +bunching +aerates +bilked +checking +cloned +administrations +canvasses +colorless +chamber +circumspectly +benedictine +advisedly +classifier +approachable +banners +concurrently +chores +agape +convention +bindings +budget +comedies +ants +ambassadors +chroniclers +carrots +colorful +bulkhead's +coherence +buyer +aggressions +congressional +commoners +cheapen +concealed +columnates +anarchy +actress's +baseboards +creature's +centuries +barbarian +concrete +bicycles +acceptably +acclimating +biceps +bloodhound's +becalmed +apostle +bible +conjunctive +comb +ballers +bickering +adulterous +austrian +applicable +blackberries +creasing +catalogs +avert +asparagus +cambridge +bird's +belgians +admonished +admirations +conscientious +crescent's +connectives +blissful +commenting +bagged +assimilate +abounded +copyright's +advancement +axiom's +compilation +circumlocution's +catheter +chances +concretely +codification +browned +clustering +bum's +clauses +boundlessness +arteriole's +alfresco +begrudged +blustered +anglican +adjoined +bamboo +bathed +consortium +carrot's +cloak +album +bunglers +approbate +colored +aim +cowboy +alienate +cleverest +ambiguous +confrontation's +clear +africa +bowline's +astronauts +belayed +censorship +animation +bedrooms +chasms +compared +cogitated +barbarians +accomplices +columnizes +beaming +busied +counterpointing +aluminum +coconut's +acclamation +chokers +biomedicine +basalt +buckwheat +cardinality's +bafflers +arid +chap's +abound +biblical +backbone +anticipation +condemner +angular +advisability +believing +boiler +arclike +abetter +bespeaks +axiomatically +coarse +auditions +bludgeoning +clam's +chief +arrow +cementing +anxiety +aberrations +brushes +cherub +corollary's +bunters +beefers +barbiturate +circumlocution +conjoined +charities +coverage +campaigner +burrowed +barracks +bristling +accomplice +abandoned +bull +caked +century's +bantu +bristled +airer +bench +bevy +chamberlain's +attention +cloning +camouflaging +alder +counter +credibly +approvingly +breakup +artillery +celestially +bail +baker +bullish +canvass +conversationally +bringers +augment +creditably +butterers +botswana +contemptible +bribing +adumbrate +barb +calico +alludes +amplified +chills +cloak's +aver +arthropod's +budgeter +bereavement +cellars +crewing +blackmailer +ayes +bedsteads +breachers +bazaar +centered +celebrity +blameless +abscissa +aerators +awaited +british +adversary +cowslip +buttons +confusing +buggy's +belts +canceled +addresses +bribes +condoning +bonneted +coarsen +amazement +angels +chemise +carbonates +apostolic +bandit's +contending +consummate +counterclockwise +beneficence +benefitted +contradicts +comfortabilities +anemone +conductive +articles +bookcase +burst +baptizes +countless +costs +agonizes +byte +creeper +begs +bunnies +attract +able +calories +baskets +american +brunt +cognition +closing +chef's +backbone's +complicates +cloister +bedsprings +arrays +brigs +archbishop +buckler +clove +catholic's +bellboys +chairmen +clap +clarifications +ambuscade +bight +bellyfull +allowance's +academy's +construe +cancer +bay +aristocratic +alleviaters +binoculars +axiomatizing +changer +bustle +civic +ariser +axiomatization +aggravates +confiscation +bowdlerize +backspaced +alters +clarity +blots +bland +belligerent's +burgher +cardinally +bookcase's +buggers +byte's +avarice +bostonians +crops +authorizations +cogitation +baptize +caressed +abase +crowding +beriberi +allegories +coronets +cell +calculative +adduce +amperes +bladders +adages +contests +cognizant +actuates +ambiguity +brighten +concert +conviction +booty +ashtray +braves +blouses +avoiders +confederate +bombings +couplings +convictions +attractiveness +chronicled +corers +anger +covertly +aural +asynchrony +arrowheads +breakdown's +bulletins +acquiescence +ambush +catches +at +billion +contact +bees +adopters +approximately +chiseled +attributively +criers +codification's +cowslips +contradictions +buttock's +categorically +counterpart's +confessor +appreciably +adjusts +altitude +ceremonialness +clipper +bracelets +anthropomorphically +benedict +connecting +bacterium +achievers +abutter's +autocorrelate +coupling +blanketer +continental +assignment +conundrum +arab +besides +cheerful +blowup +bastion +arrive +combines +agar +cookie +astronaut's +constraint's +article's +confiscations +bounded +adjudicate +belligerently +boron +brownness +adept +creep +abduction +accosting +asylum +autographed +clash +chiseler +clumsily +capitally +braking +absenting +bagatelle's +comet +basked +anything +buffeted +absentia +bounty +carols +characteristic's +constructive +comforting +aflame +brainwashed +booby +aspirations +adjudge +behaviorism +computability +assessment +consultations +bowstring +acknowledgment +arranger +chancellor +attest +compresses +concessions +asymmetrically +administering +clamoring +arraigned +archived +admonition +actor's +aimers +colorers +booklet +calibers +affix +bushel's +atomizes +creeks +bleedings +casuals +archives +certainly +animate +cons +affiliate +answered +coyote +coughed +alligator's +antagonized +arousal +assisted +aerated +competently +conquering +acclaimed +assign +announcer +controllers +amalgamation +comfort +antihistorical +availed +balsa +annoyed +basted +asymptomatically +cropped +combinational +barging +conversant +causality +botches +bedspread +considerately +bookstores +climate +blessing +accordion's +cdr +bonanza's +construing +bearings +bluster +backspaces +babyish +countermeasure +crime +battered +audit +associating +corps +application +archangel's +aided +breasted +compelled +acrobats +breakfasts +chronologies +beet's +averts +convergence +attributable +adverbial +churns +arrest +breastwork +beefs +brownie +create +contradistinctions +coordinators +abandoning +byline +beatitude +autosuggestibility +bipartite +annals +assents +conceives +amalgams +cleft's +clicked +appointers +bible's +boots +caret +attaches +controversy's +combinatorial +bazaars +cardinals +bored +catering +christian's +ashman +consequence's +austere +clay +birthday's +amongst +arbitrariness +brainstorms +chateaus +coaxer +applause +cautiousness +adorned +compromises +creatures +compliance +apartheid +archiving +amoeba's +communal +comedian's +aggressive +crop +ante +better +chalice +aristocrats +circling +belittle +abortion's +coldly +certification +befriends +courthouse +anesthesia +accorder +athletic +blithe +bedder +abasements +councils +beware +abductor +assonant +clench +aspersion +abortion +abating +birches +breakpoints +acyclic +ablate +canners +cistern +boxtop +composite +cloudless +computation +chastely +abusing +bunker's +compounding +alveolar +chaplains +bias +audiological +capability's +bangle +barren +antidote's +cranking +baptizing +bond +borders +automobile's +allegoric +chargers +baltic +autumn +columns +absolute +connoisseur +cranberry +contiguous +consoled +confirmations +argot +blouse +annotated +callous +astounded +crashed +autonavigators +chivalry +columnating +beefed +convincer +allegorical +bagger +assume +containable +artistically +calibration +architectonic +campaigns +addressability +crazier +buy +brightener +bastion's +blurb +awaits +commands +chocolate +bleaching +antenna +blowers +chorused +composers +assigners +aspires +coils +bid +application's +clamped +bedding +awkwardly +coppers +costumes +borax +caged +candler +badges +clutches +consign +apprised +buys +adiabatically +aggregately +canned +abstract +acrimony +coax +analytically +absurd +alluring +contradicted +aspersion's +bribe +boos +chattererz +backache's +complying +continent +cohabitate +causation +astronomer's +cities +bookie +bleating +cracking +bicameral +convoluted +adjustable +ambulance +can +boulders +consideration +announces +briars +antipode's +bartered +ancestor +biplanes +characterize +crested +bum +bridling +consolable +bungles +coffee +buffets +congratulation +commitment's +adequately +clown +capacitor's +broomsticks +agglutinate +activations +asians +canon's +authenticity +complexities +cripple +bracket +counselor's +beatably +bounced +baton's +crankiest +barbell's +caster +casseroles +ballad's +bob +batched +attenuated +beakers +biologist +bleary +condescend +blondes +augustness +boldface +battlefronts +acumen +bolting +articulatory +butyrate +bowel's +backwater's +colonel +creating +authorized +bijection +accruing +admirably +correctness +citadels +clasps +bandlimit +bib +appalachia +contrives +bundle +audiology +circumventing +blinker +choked +bilks +clears +affirmations +arbitrating +bites +bootstraps +capitals +commuters +billeted +authentication +choice +attentively +aggressor +arterioles +crowds +chestnut +backstitched +attachments +assimilating +bewilderment +atrophied +chintz +blackjack +armadillos +bonfire's +ballast +agonies +busier +coefficient's +adventurous +ballet's +coil +chewed +come +bonder +catalogue +coursed +arise +biennium +ceremony's +blanching +appraisers +acolyte +argues +beholden +appanage +astatine +banana's +coons +civilians +bodyguard +archipelago +bug's +candles +antique's +accidently +blighted +belgium +besieged +burned +abuse +asian +chute +awkwardness +abasing +bottler +ardently +blab +breakwater +cavity +cheated +befall +according +chronicle +airframes +bats +choring +authorize +consumed +chatter +annunciated +capers +anomalous +clustered +burner +acquaintance's +badger's +basic +affectations +buzzy +coast +attendances +activating +beams +cohesive +attainable +barbecueing +beautiful +acronyms +communion +client +atypical +antagonists +conservations +arguers +agglomerate +antigen +battalion +ambition +countered +assistant +classed +arming +alveoli +buff's +backplanes +busted +bermuda +converting +brutish +boot +acidities +confrontation +chapel's +berlin +ascender +behead +buddy's +commandment +actuated +brilliancy +chance +bedrock's +bridgeheads +arable +avid +arteries +caresser +ballyhoo +attested +african +comradely +consciences +commencing +antennas +annulments +bobolink's +advisee +acceptance +crack +ascendent +appendage's +accommodates +accumulated +clones +apocryphal +ages +cluster +capitols +camper +beading +amble +buffeting +circumspect +advances +analyzes +courier's +aperiodic +appealer +atonally +attentive +conspire +appropriating +armed +allergic +agglomeration +consternation +blinks +audibly +aspirins +bunions +adverbs +armload +bet's +caring +carryover +coordinator's +afterthoughts +allays +abided +brownish +baiting +capitalism +coined +conspirators +automatic +contradistinction +conductor's +backstitching +conjure +casings +accountant +clinched +constrain +alcohol +bee +anticompetitive +britain +bade +camera's +antimony +activated +burglarizes +compatible +cotyledon's +artificiality +bath +citadel +archivist +chandelier +addiction +ampersand +bitterer +constructively +afield +bing +attractor's +cringe +allergy's +bigots +assimilation +ate +capitalization +abridge +buzzword +befit +bandlimited +commandant +alabama +acculturated +brightening +bulldozing +cooky +bunks +centers +bespectacled +adherent's +abducts +another's +condensation +billeting +bye +chess +craziest +ballgown's +archaism +consorted +chinned +cowl +beat +bootlegger +bravado +classically +bulging +browbeat +accommodate +borne +bronzed +artifice +arcade +become +backlog +addressers +amphitheaters +befogging +crochet +aiding +celebrated +conversational +backbends +authentications +advertisement's +blockade +bulldozes +contraction's +bricklayer's +brain +conveying +anemia +chronology's +channeling +caution +commanding +crosses +artisan +conditions +admired +authenticator +airships +blunter +bridesmaid +counseled +cheeriness +chiefs +boils +clerical +atrocity's +balls +ambled +canvases +consoles +abscessed +abetting +blitzkrieg +bottlers +beveled +condemn +alumna +cords +admittance +annotates +citing +corrector +appreciative +branching +betrays +buttoned +ailment +boulevards +bottlenecks +chamberlains +bedbug +covenant's +crispness +considering +broadcasts +audubon +arousing +correction +barrack +closure +contrastingly +brittleness +assassin's +bursa +bungalows +balked +conceptual +carcasses +arabia +blueprint's +affectingly +consorting +buses +auger +appointed +brute's +bosoms +anyway +arrowed +anaphorically +clarify +approachability +assistance +buzzes +commonplace +bluebonnet's +adroitness +availers +aquifers +architecture's +action's +backgrounds +abduct +attired +briber +admissibility +cease +beck +auctioneers +birdbath's +atomic +crossing +considerate +biconvex +bulge +bedridden +arising +aggression's +cherish +bureaucratic +abater +amputating +atop +climber +clutched +afford +bisections +bonnets +commendations +bloke +abundant +clamp +aloes +aboard +atheistic +advantageously +buffs +chimney's +cheerily +benefactor +ample +bushwhacked +captain +buckskins +contextually +antiquarian's +browns +bubble +ban's +brine +acculturates +anhydrously +beaver's +advantaged +bibliographic +clasping +clattering +coerce +colorado +airmen +bandlimiting +balks +boners +attached +chosen +convened +bordello +composer +botanist +backtracks +civilization +commutativity +bloodshed +cohere +bunkhouse +archdiocese +boycotted +crosswords +bedspread's +anteaters +cove +apothecary +chute's +addressee +climatically +blower +bane +cask's +beetling +ambiguities +before +abstain +arachnids +bucket's +amateurs +blackouts +adverb +butchery +conjunction's +barricade +audiologists +aphorism +complete +butts +bishops +allotment's +confusingly +channeller's +blanches +bragging +bathe +comedians +celestial +citizens +couple +backpack +aphasic +brothels +axles +cancellations +bonus's +consolidates +authoritative +axle's +acclimatization +carolinas +chime's +antibiotic +bisons +biographically +achieve +bleachers +bicentennial +behavioral +accomplish +concealment +biddies +antitoxins +arriving +apprehend +affluent +cliffs +bleached +astronomers +connection +bride +backs +bog's +casket's +continual +ampere +cat +alternator +cotton +athletes +communicant's +best +befuddling +benefactors +appease +annoyingly +context +astonished +cracked +amnesty +autumn's +binder +babying +contributory +assumption +cowls +cocks +airless +consummated +atypically +beneficially +chairing +accusative +commanded +bufferrer's +alerter +arbiter +civilly +charms +backscattering +cheater +bushes +caverns +chieftain +calf +comparing +aurora +butyl +cower +bemoans +baptistry +carpenter's +capes +bordered +arrows +blocker +crest +appeal +arabic +conventions +axis +brains +bookkeeper's +circle +cooks +circumlocutions +adventists +barringer +affording +anatomically +basements +barbarities +configuration's +contributes +collaborating +beach +comet's +bakes +assigns +ballerina +cheapens +clinging +conquered +bisecting +closenesses +bugle +boatmen +beatings +complicator +bight's +banister's +archaic +anthropologists +clams +beginners +committee's +communicants +alone +bounteously +bastes +ascertain +alphabetical +bringing +batters +amazon's +constituent +benders +being +constitutionally +audiometric +blast +copings +bailiffs +colts +coolies +airlift's +boomerang +bifocal +clothes +cashiers +congenially +billows +boilerplate +biochemistry +betting +brimmed +complementers +breading +bragger +adducting +bisectors +abrogates +criticized +comrade +bucolic +birthright +blurs +challenger +complicated +bluebonnet +biscuit's +classmates +campus's +boundary +bedbug's +adjustor's +acre +bicycling +awe +additions +baiter +authorizes +beautify +copier +buffet +belfries +acquisitions +brooch +crickets +caterpillars +beefsteak +complicating +bedpost +criminal +celebrity's +bookseller +christened +coerces +clamors +all +boatyard's +canoe's +begin +anaerobic +bushing +agreers +concedes +countermeasures +beg +agglutinin +bunted +ammonium +aspiration's +bathrobes +changeable +beached +bestowal +beaner +catsup +admires +clockwise +agile +alarms +ached +chinks +buffer's +cartesian +annunciate +chanticleer +avenue +anchor +alliterations +blanking +bargained +breathtaking +crime's +assiduity +argentina +contiguously +aqua +bested +borderlands +appetite +captive's +bipolar +conceal +counters +costumed +arrestingly +bunting +blight +champagne +brusquely +address +bloodhounds +associative +creed +arithmetical +balustrade's +belabors +complementing +checkout +archivers +badlands +behaviors +ampoules +bridgehead's +antiquarian +clumsiness +considerable +apportions +anglicans +appealingly +barfly's +absorptions +awards +congregates +cloister's +armour +avoid +correctively +chucks +burps +bums +berry +batches +administration +atones +bishop's +blonde's +casualty's +cores +bodied +alter +assonance +apprise +antitoxin +avariciously +checkpoint's +affirmative +conjures +angstrom +aesthetically +canyon +binge +crazed +breastwork's +aids +boston +conceits +announcement's +beechen +accessory +authorities +constrained +automation +anaplasmosis +commander +commendation's +belabor +cornfields +artemis +asphalt +contracted +brochure +crafted +allegedly +alien's +auditory +blowfish +adducible +confederations +annuals +britches +acquaintance +appallingly +abounds +burglarproof +crossers +bayous +brisk +authority's +covetousness +averse +accomplished +aromatic +admiral +bijective +avenging +bran +boatyards +beseeching +challenging +bares +acts +abductions +compendium +compulsion's +calendar's +clad +blockage +conventional +craze +cajoling +acceptability +bungalow +buff +cramps +attackable +calculator's +asp +braved +colors +balling +contaminate +crackling +comes +complimenters +across +astronomy +aborigine +bobwhite's +autopilot's +chattered +appall +autonavigator +bashed +acoustics +beachhead's +apartments +convenience +blackout's +bands +autonomously +amounters +centripetal +achievable +astringency +attuned +concatenating +copyright +coding +assumption's +anastomoses +confiscate +asking +beneficial +adhesions +busboy +bronzes +audacity +bruises +crash +beau's +circuit's +aborts +baubles +beliefs +assuaged +costed +blinking +characterized +bowled +block +conquests +confesses +amusers +ceiling +berets +berliner +abstentions +child +authoritatively +closeness +bushel +considered +communicates +cheerlessly +autofluorescence +aquarium +affects +appurtenances +airbag +approaches +admonishments +bets +bounden +courtly +bodybuilder's +campus +brainstorm +americans +chairperson's +botanical +askew +amazon +bleed +clime's +cooperations +commonness +boatloads +blinked +courtyard +adapted +aforethought +backwater +burr +cathode +awaking +buzzed +bridgeable +arrives +adventuring +beseech +attrition +copied +colon +client's +bandstand's +advice +baptistries +antithetical +alcohol's +contradicting +ambidextrous +belches +category +bluntness +coupon's +assimilations +comfortable +caller +affliction's +attends +compactest +baler +beacon +blind +bleakness +beseeches +courts +couch +consequential +adulterers +craving +biggest +astray +bigoted +barfly +charges +ambiguity's +commentary +crankily +cowerer +carnival +bachelor's +bituminous +continuance's +calamities +claws +apiece +century +ascendancy +charts +animations +aggression +chickadee's +carve +confidence +actor +bubbled +becalming +convulsion +chivalrous +brightest +centralized +beautifies +amateurishness +birthrights +alligator +circumstantial +constructors +conceptions +arranging +cart +cent +ager +congruence +carrot +chariots +cloudier +captivity +conquerers +compartmentalizes +condensing +celebrities +chalks +accordance +chilled +conversations +apples +conceiving +average +blessed +creator +ant +cling +annoyer +aviation +cohesively +correspondences +boor's +apprehended +bessel +both +characterizes +bards +cots +acculturating +cemeteries +carting +alcohols +bitterest +ascetic's +conducts +caking +airspace +autocrats +ashes +chimes +broadcaster +commuter +basket +borderland's +broadened +boyish +allegretto's +ban +bidder +christen +blessings +bury +arranged +choir's +apathetic +boring +aryan +appearing +binds +cooperates +bounces +airspeed +complicators +adapting +babbled +agglomerates +bedraggled +addictions +bolt +calmly +blur's +boatload's +anesthetic +bugs +colt +completing +boxer +billers +affronting +absurdity's +chides +comparatively +braided +clipper's +cot's +calves +articulations +branchings +attraction +concatenates +alligators +cake +boom +crashing +afar +abler +beamed +adverse +adrenaline +agriculture +beehives +crankier +courthouses +advises +consigns +bisect +azimuth's +carpets +arthropod +brewery's +commonalities +altruist +astride +appreciate +carved +briefs +admitter +celery +congregate +clocking +assassinated +adding +canvasser +civics +contemptuously +calculates +advisees +bumbling +algorithmically +cloudy +algebras +addiction's +cop's +assurers +confidently +affector +analyzers +chimneys +burdening +antitrust +admix +avoidance +choking +coexists +accustoms +cellar +anchovy +constructor's +confinements +consequently +accelerations +accoutrement +churchman +biller +affected +brigades +cremating +corridor's +bagging +ah +berating +collective +acuteness +arrestors +cab's +border +agitation +animism +arches +alveolus +cessation's +averrer +abash +counterrevolution +attesting +animateness +bawdy +americana +bloodstained +applicator +annotating +annunciator +clamored +acting +aerosols +axiomatization's +brags +coalesces +avocation +combining +crazily +bravery +burying +adored +airfield's +accounting +broadeners +anise +chimney +added +avenges +bellicosity +cranberries +arsenic +communities +comparable +bunkered +architect +alphabetically +beautified +apogees +communist +anatomical +complexity +accost +autographing +browsing +ameliorate +bookers +bandaging +clinical +appellants +counteract +clairvoyantly +bootstrap's +canner +boastful +attainer +ash +beaded +brake +barest +befriend +burglarproofing +allegorically +bunts +believes +accession's +buck +boathouse's +byword's +anthracite +accuse +conjunction +burping +commandant's +creativity +affirming +bark +amuses +balcony's +auditors +counsel +clamber +borates +cowboy's +bickered +boors +combing +biting +breeze +crowder +corn +bloke's +bombast +bookstore +blared +bedlam +carbohydrate +coops +bundles +blistering +antarctic +anterior +bilinear +chocolate's +context's +alternating +annoyance +constancy +ambivalently +buddy +brutalize +bobbin +alleles +commotion +attributes +airborne +creed's +bolstering +coaxed +airframe +breaker +accept +abashes +attentional +contributor +comparability +auscultating +cocked +computationally +buffered +career's +analyzable +absently +courtyard's +buildups +apportioned +balkanized +annulling +cremation +buffetings +conditional +confided +airliner +bulldozer +approaching +anagram +apollonian +canaries +bloat +bluebird +collision +cool +connectedness +abasement +artisan's +avoidably +clerks +afflict +briton +corroborates +cameras +counted +boldest +burglars +brutes +brows +abhorrent +configuring +averaged +ace's +buying +abandon +bayou +cottons +auditioning +amplifies +clippers +brainstorm's +alto +brutalities +bunch +agricultural +bursts +blunting +archer +activity +carefulness +bedroom's +concomitant +balm's +artificer +barking +breathy +babies +acacia +bodies +cap's +criticised +conversed +crewed +ascendant +budgeting +coroutine's +charmed +bellboy's +conservatism +butler +acculturation +conclusion's +adapt +cellist +contempt +adumbrates +borrowed +confounds +allegiance's +blabbermouths +accrues +captor +coop +baseballs +cottages +apartment's +assertiveness +assent +artfully +bagger's +abolishment +acetylene +accessory's +blackbird +baptist's +consist +cavern +buttock +corporal's +autoregressive +bailiff's +birds +corder +bracketing +antlered +barbiturates +county's +addicted +agglutinated +abashed +competitively +captains +bloating +accepts +choose +ashamed +backyard's +apiary +contradiction +balalaika's +arctic +broom +anvils +coffee's +alliance's +agitator's +change +adjusters +cremates +complexes +bodyguard's +burl +antithyroid +ambient +airfoil +apricots +athleticism +abjectly +bankrupts +answerers +alternatively +confronter +breaking +baronial +cannibalized +appetites +breaded +blackboard's +battlegrounds +cosine +barrenness +abbreviation +budging +boolean +acrobatics +again +ashtrays +clashed +contingent's +compulsion +bedazzled +collapsing +comparison's +businesses +compassionately +achievement +buffering +candlesticks +austerely +awls +associate +absolved +annexed +airway +clipping +counselors +conscience +attempters +constructing +biases +cautioners +comma's +cosines +char +auscultates +afire +comely +amity +beverage's +anew +ballplayer's +adulterated +authorship +alterers +burdened +attributive +afflictions +blinded +barrier's +attachment +brotherhood +bridegroom +atoms +cobweb's +copes +controversies +complexion +crawling +atomized +adjust +accuracies +concern +cinders +authorization +appraisingly +bladder's +cooked +cowers +batter +commissioner +close +burglar's +allocated +anvil +aftershock +abrogating +chemistries +advisable +conduct +committee +blaring +appalling +braveness +alertly +artificialities +brevet +collision's +arizona +bower +creamers +awnings +arsenals +crane +city +contemplative +catheters +administrators +attorney +churned +attractions +columnation +bobbed +centipedes +bostonian's +apprises +buries +allege +botulism +adobe +ambassador's +covenants +boon +asynchronously +bigness +axial +chaffing +battleships +ant's +anthropological +accent +brushing +brassy +consumptions +battleship +absorb +beckons +brook +connectors +clinches +accesses +beaters +archaicness +bursitis +chided +bomb +assimilated +addicts +convening +arianists +counting +altar's +confusions +attachment's +clipping's +amazing +corset +bossed +attach +commandingly +animatedly +allegations +assuages +annulment +compress +aptitude +absurdities +autobiographic +aspect's +concentrator +burgesses +anagrams +bedeviled +assemblers +convinced +commentary's +agglomerated +biological +callousness +axolotl's +atmospheres +authoritarian +cancer's +above +charting +aldermen +battler +cistern's +bouncer +amassed +conquest +altering +arrogantly +brokenly +comparator +counsellor's +attenders +cackle +criticize +authored +ably +believed +compelling +accepter +cleansed +afflicted +backslash +computed +almighty +attache +braes +carriage's +benediction +brigadier's +contemporariness +boomtown +amplitudes +breakwaters +clod +catch +bar's +activist +caves +assenting +camp +attainments +brotherliness +continuances +appearance +applicator's +browbeats +banjos +addendum +became +adduces +armadillo +brothel +almanac +courageous +assault +chunk +coaching +atheist's +blunted +aperiodicity +congresses +boastfully +burglarproofed +broadest +bashfulness +affect +acne +bottleneck's +criticisms +corrupts +colonized +closeted +canonicalizing +auditorium +antenna's +awfully +anti +consumes +agonize +algebra's +championing +blush +bugger +antagonize +beethoven +blase +boycotts +compensatory +bugged +boroughs +anatomic +batons +arguably +affricates +appreciations +cavalry +alumna's +arcing +backpacks +braces +contextual +coupon +chillingly +allocates +abuts +contribution +commodity +admonishing +coolly +cabinet's +collapsed +confessions +adjured +capriciousness +chastising +babe +aerodynamics +accepting +concept +contour's +consequentialities +birthday +bankrupted +birthed +benefit +concentrations +azalea +channels +chestnuts +contenting +antedate +censors +contagious +abbot's +channellers +apt +commend +avocation's +admonition's +abolition +confederation +carried +clumsy +coincidences +bumper +burr's +bugles +bribers +attainably +consume +comma +creativeness +accuser +bombs +abbey +baffled +aside +clip's +appeases +compass +bundling +abstractionism +confide +creases +apropos +confronted +corrective +concurrencies +autocratic +alien +attending +antagonistic +broadcast +asymptote's +belied +breasts +contrapositives +coiner +accordingly +cohering +computers +cow +bibs +ancestral +controller +attacker +alerts +coconut +agency +alerted +alcoholism +ammoniac +actinometers +acquitter +bud +cessation +alleging +centralizes +articulators +council's +carvings +arduously +blown +anode's +arrogate +bisects +centimeters +burgeoning +course +appointee's +ascribable +communicate +contrivance's +adoptions +attune +acres +abyss's +corporal +certifiers +analyze +augusta +bestseller's +checkpoint +coexist +attainers +argon +bearded +crudeness +averaging +brick +adducing +annulment's +chicks +blocked +cisterns +afoul +affiliates +briskly +adhesion +ascertainable +appeasement +blueprints +agreements +blindfolds +communicator +characterization +annoyances +breeches +brushed +clinic +competes +chuckled +cradled +balmy +antisubmarine +alternate +armpits +barn's +conjuncts +adhere +allows +counteracted +appetizer +capturers +cleanse +avant +abbe +corpse's +arduousness +badge +begets +contemplated +caveat +copiously +athena +aggrieving +alibi +accumulation +basket's +aftershocks +bass +conjuncted +chaps +brunch +colonials +bibbed +clusters +antagonizing +constituencies +combings +bearish +continuously +adequacy +brow's +catalog +alderman +comedic +chemists +concernedly +conceded +alarm +arced +buckle +confidingly +coherent +closes +buffoon +brace +adjustably +crackers +contamination +burgess's +aerobic +constitutes +baptismal +broadness +blimps +concatenation +claiming +bard's +aerosolize +adjoins +copies +coats +boggle +corroborated +concreteness +bill +cautions +bantam +bearably +armchair's +birthright's +cravat's +cone's +courtiers +asunder +bulletin's +biopsies +alley +contrive +blasphemies +amuser +ballerinas +blushed +causticly +brandy +blinkers +complimenting +crimsoning +angola +apprehensiveness +bolster +columnate +byproducts +berths +accusal +chubby +arrived +camps +anaconda +cook +airfoils +atlantic +boosted +converge +availer +blemish's +appalachians +coffin's +boarding +alga +crouch +columnizing +consul's +chastises +angling +apple's +billiard +attentiveness +adroit +apprehensible +cereal +blouse's +browning +bodybuilder +coaxing +assertion's +connective's +commemorated +accountability +crooked +blips +chandeliers +aristocracy +bangs +coke +abutment +community +calculus +congregated +crepe +compromised +airlines +contributing +contingencies +coordinated +alginate +batted +contender +alma +antagonisms +accompanied +airport +administrator's +appraisal +breadbox +condemnation +backlog's +available +consequents +crooks +commonwealths +barring +channeller +crucially +archaeological +charming +adventist +credits +appetizing +breads +clients +climbing +aloneness +abstractness +appearer +astute +clockers +antagonizes +agonized +bastard's +conjectured +aqueducts +aureole +boatswains +conjured +chauffeur +complementer +behold +bustards +bivouac +cluck +anus +bless +catastrophic +bounty's +allowed +answer +concealers +brainchild's +coercion +buzzword's +bordellos +appertain +applier +couriers +aesthetic's +craft +capacitances +capped +coupler +category's +anvil's +conquest's +checksums +clucking +bronchus +acrimonious +changeably +accenting +argued +conditioning +brewing +backwardness +cascaded +atomize +contours +arianist +apart +conflict +carefully +banshee's +conveys +arbitrates +amphitheater's +amen +alimony +bound +buzz +courtroom +apparently +coalescing +circulating +amounter +bypasses +breadth +choral +completion +arisen +anticipating +bilges +contractions +bedspring +commune +blacklisted +beagle +alkaline +atolls +carelessly +blimp +corking +brevity +alterable +canada +bear +bluntly +cartridges +connoted +countries +corroborate +consecration +corrupted +appreciating +combatant's +alkalis +affecting +blues +casserole +ballad +bewitches +common +as +because +bathroom's +anchorages +beguile +connect +convenience's +counteracting +assorted +care +contains +centimeter +ancestors +briefings +busses +churchyards +breakable +amortizing +courthouse's +click +courses +ajar +county +covet +confidences +capitalizer +agog +backtracking +copious +bestsellers +chilliness +bringer +browse +centipede +bawled +bricklayer +breath +assailants +abysses +command's +characterizer +calculating +america's +aurally +contain +alias +commentators +confounded +appending +accidents +chatters +coordinates +bleeder +blueness +badger +bolsters +astounding +capitalist's +conservation's +commences +aimed +bun +comparators +competition +bauble +audiometry +affinity's +amalgamates +cowardly +consolidating +beads +brackish +bookings +accuses +bog +compartmentalizing +clutching +calming +collars +clambers +banqueting +beaked +authoring +correspondence +apostrophes +affirmation's +bespeak +costing +brought +backbend's +bled +assassinate +chop +anemometer's +cobbler +coldness +complainer +battalions +asymmetry +boathouse +canyon's +awarded +amplitude +anarchical +anticipatory +bolder +cooperatives +caterer +adviser +balkanizing +augur +cannibal's +balustrades +attaching +collector's +commercials +capaciously +coincidence's +bumps +ascot +bale +blackmail +baby +aftereffect +bloomers +buttresses +avenues +climaxes +aqueduct +cater +brainchild +avail +bypassed +bowl +california +cements +boxes +brained +bedevils +captors +acuity +ascends +breakthrough's +assigner +caner +bequests +ceilings +axers +bookshelf +autistic +celebrations +axons +chiding +asterisk +allophonic +blindingly +cherubim +boaster +confining +anxious +clowning +advisement +approach +anesthetic's +crescent +alertedly +birdbath +beardless +bras +auspices +choosers +approval's +afflicts +corrosion +arpeggio's +bodyweight +cranky +battlefront +affirmation +churchyard's +aeroacoustic +anders +adjustment +baneful +citation's +acetone +blend +binuclear +boner +annotation +announce +claimable +contemporary +clothing +acquitting +choosing +attacher +bananas +binaural +arrestor's +aches +conclude +collaborators +await +blaspheme +bequeaths +crows +balconies +begging +conducting +abstracts +assignee's +causations +approximation +articulated +considerably +apricot's +afferent +assertively +bonding +calms +cranberry's +cost +captaining +agenda +corridors +complaint +christens +aggravate +countess +arbitrators +ascribing +breech's +bellwether's +burglarized +confinement's +animating +adjectives +cannister's +bemoan +cleanest +acme +cheapest +activities +allophone +boy +belaboring +captions +compactor's +actuator's +befouling +arachnid's +computerizes +compile +absorption +bridled +absorber +convicts +birch +alkaloid's +cannot +bacilli +charitableness +abated +ceaseless +beavers +bookshelves +commensurate +appreciates +basil +cartoons +aides +buxom +cages +cantor's +acceptances +antiquated +amalgamate +babyhood +beers +conforms +bouquets +canner's +baste +cashed +argue +butcher +backbones +absolve +crib's +cafes +abstracted +book +committees +authentically +conference +antisera +bourgeoisie +attribute +biddy +autobiographies +chivalrousness +coverlet +ambiguously +calorie +anhydrous +alignments +around +archfool +advance +bedpost's +affective +contained +amain +bromides +clogs +bricker +arduous +consistent +amidst +confess +complain +anniversaries +coasting +cobwebs +aries +benchmark +aviaries +bombard +boxers +ashtray's +assyriology +blaze +ablative +chaos +burro +arguer +ashamedly +crier +allocator's +aggressively +carts +advisory +airship +alkali's +backup +chaining +continue +cartoon +circumference +breadwinners +autonomy +banking +armored +cabin +chunks +antigens +blistered +airers +breakaway +belief's +belays +coveting +auburn +careful +anybody +bumbled +cautious +adopter +ballplayers +anteater +citadel's +avails +agent's +caliphs +bridgehead +already +caterpillar's +coachman +centralizing +alphabet +concede +barbell +breadboard +ballast's +activators +attendance +blandly +calculator +codeword +addressee's +avenue's +alcoves +alternately +admonishes +concentrate +crossbars +adjoining +basset +carbons +beast +blonde +castle +clarification +bitch's +abrasion's +books +amputate +bicycler +aphonic +arraigns +acquiesce +buster +chaperon +advisements +buyer's +attack +birthdays +blazed +confuser +crag +ballet +airports +bison +counterexamples +arteriole +colony's +adamantly +blunders +chivalrously +adult's +authors +amplifiers +counterfeited +complicity +astrophysical +axolotl +bash +battleground +butterfly's +axioms +allegory +blitzes +blindfold +bufferrers +approximating +byways +computations +alight +avoiding +assurance's +barrages +canonicalized +callously +auditing +authenticating +bag's +asters +artistic +bonanzas +applaud +certainties +auto's +concession's +cascade +chubbiness +churchyard +afternoons +antigen's +baron's +amphibian +banister +capitalize +approval +appropriated +bureaucrat's +covets +cloisters +circulate +bivalve's +beta +collector +among +cane +birdlike +attenuating +conjunctions +appliance's +coral +crucify +abnormal +combined +classroom +buckskin +commissions +abolishments +arching +croak +americium +associates +car's +assuringly +agreer +anticoagulation +closure's +corkers +attend +alphabet's +awakening +composedly +attracted +construed +cricket's +applicability +autonavigator's +chloroplast's +ashen +beggars +corporation +another +conflicts +bootlegs +archeologist +alcove's +agitates +cargoes +creditor +cops +advisably +coronation +bourgeois +crochets +cropper's +cramp's +adulterer's +corroborations +changing +combinatorics +calm +comprehensible +blooms +coolness +copying +blacksmiths +commodore +compulsions +clump +afterward +crucified +brooder +buckets +accelerating +accented +boat +adventitious +baseline's +courier +calamity's +atoll's +brutalizes +bundled +chairperson +cheeses +continuation +celebrating +apologists +behest +bumpers +consonants +circulation +betraying +commuting +breezily +circumstance +coughing +benefiting +conquerors +chemically +commencement +adjustors +angel +congratulate +conspired +causally +bud's +conquers +augmented +bereaving +advisor +articulation +angler +admission +bide +competitors +amusement's +collecting +adder +arithmetized +cheek's +apostrophe +blockages +clockwork +bubbly +apricot +adjudicated +banter +amused +breacher +bracketed +aimer +comprehending +bunkers +canton +arcane +absent +capitol +consequence +cognitive +abjuring +clever +coronet +anathema +artichoke +controls +credulous +acid +crawled +coupled +boomtowns +aspen +acted +anyhow +burdensome +backdrop's +apocalyptic +cornerstone's +cautiously +blisters +conveniences +arbor's +accessories +alleges +clubs +accompaniment +blazes +annually +clique's +beamers +ballgown +autumnal +acreage +conjunct +balances +consoling +canvas's +competent +aggrieves +although +afraid +clearly +cognizance +acoustic +colleague +causing +absences +closers +airs +cinder +adversaries +altruistic +brews +ceremonially +appraisal's +commissioners +army's +assists +acceptor +comparison +cooling +conveniently +couching +changes +clinic's +confronting +adjunct's +blandness +alternates +bunter +consequent +clean +autos +accumulators +carver +aprons +awful +bobbins +blasphemy +assuming +abscess +assemble +cabinet +atomics +blacklists +audacious +assay +anthropology +barnstorm +awl +bumping +assembles +capture +compensates +coverable +amend +array +continually +absented +cigarette +antiresonance +backspace +branched +appellate +courtroom's +alienated +austerity +cement +asked +antelopes +cottager +bluebonnets +booze +amendment's +backslashes +begun +bijections +cafe's +boatload +collect +appeals +belittles +befit's +beauty +arrogated +academia +contagion +blemishes +coverlet's +comfortability +antecedent +controllably +congressman +complicate +coincide +arrears +clumped +credited +buffoon's +catholic +accompanist +beauty's +aster's +blatantly +bothering +bewilder +canceling +carbonizer +accentuation +backstairs +anticipations +bestowed +civilian +blooming +blunts +airlocks +argo +blueprint +aristocrat +cakes +complements +ale +camping +army +adrift +bengali +barely +blasphemes +briefcase +brooches +ailments +blazers +crevice's +bankrupt +archiver +articulator +alphabets +bonds +colliding +candidate +cashier's +bellwethers +airstrip +announcers +calendars +corrupter +aqueduct's +axiom +bathing +blusters +ascribed +admittedly +angrily +analytical +contraption +convertibility +abysmal +cathedral's +aversion's +algol +articulately +breveted +bickers +chatterer +adoptive +bijectively +cloudiest +coarseness +carted +cocktail's +capacious +anion +buffoons +bleeding +bedrock +adventurer +compositions +camouflages +brittle +chip's +aloe +chorus +cargo +critical +biographer's +abject +blasphemousness +charmer +betray +blacking +awoke +allele +bags +claimant +clover +biographies +confound +advertises +crafter +cripples +bygone +concentric +couldn't +contentions +acrid +costume +aft +aesthetic +bandits +adducts +constellations +coffer's +created +commercial +art's +cookie's +ammonia +adjunct +articulateness +congratulated +crags +brandishes +annual +byword +affection's +college's +aboriginal +bikini +buttering +allotter +console +advent +activates +beverage +april +acceptable +barrel's +boys +attractor +azimuth +critics +ballooner +aren't +adulterating +criticise +abeyance +automatically +collaborative +capabilities +crawls +anomaly's +climaxed +animately +aroma +belie +attires +argumentation +baseboard +bluebirds +cactus +byproduct +balancer +beholder +conservationist's +betrayer +agony +accusingly +convict +coaxes +breeds +agitated +championship +brevets +auscultate +counselling +cornerstones +america +canoes +aspirator +compensate +antiseptic +bereave +absinthe +compose +collide +alabamian +candid +civilized +clamps +authoritarianism +colonist +bugging +bins +abashing +battlers +canning +berate +assembler +amateurish +boasted +angriest +bluffs +colonize +balcony +bleat +bustard's +attenuate +contagiously +bicep +babel +beatniks +brush +analogy's +audiologist +assessment's +camera +arbitrary +alleyway's +concession +constructions +accompanies +accretion's +aroused +charcoaled +belated +bottom +bloodshot +bisques +advocate +arabs +cathodes +adamant +challenge +absurdly +abolitionist +cleavers +bludgeons +bassinet +clause +coiling +cask +boob +azalea's +afghanistan +carriages +blade's +bobby +asinine +acclaiming +absorbed +blacken +cheating +bootleg +anonymous +addict +astonishes +awry +adequate +categorization +casks +blaster +aspirants +abscesses +airing +assumptions +capitalists +board +asynchronism +body +aye +contraction +athens +arsine +cohabitations +below +bows +aviator's +ampoule +connective +adapter +authenticate +blackboard +brilliant +appoints +attics +conquer +boning +comestible +camped +blonds +aisle +coals +billboards +characterizers +crow +clout +admirer +actuarially +abstruse +accessing +bonfires +clenched +characteristic +catching +chars +canons +barrier +championed +butterflies +completely +calendar +artwork +abjections +burgher's +correlates +arrivals +accepters +circuses +breadboards +accomplishment +analyzed +appropriates +cancel +bordering +aperture +civilizing +assortments +blackest +blitz's +copy +commenced +admirers +cheers +croppers +cliff's +circumstance's +bibles +buttressed +consecutively +birefringence +automaton +cheerless +chopping +ballooned +convent +acknowledgers +appointing +belies +comeliness +bangle's +communication +bisector +avocations +clique +brainstem +campusses +allocators +bramble's +assaults +commemorate +appendix +agent +apportioning +bottled +artifact's +block's +archery +bagatelles +candies +catched +cognitively +creepers +concentrated +bout +balustrade +abodes +carrying +confirming +cannibal +chinners +carbonate +anguish +butt +colons +ablated +corporation's +cock +convincers +beret's +bluish +compressive +authenticates +commemorative +bureaucracies +coinage +coach +assigning +concentrators +capitalizing +appraisals +belaying +candy +blossomed +bricks +atonal +analogue +caters +barbaric +applique +clink +audio +actress +assyrian +apprehension +conversation +apsis +bedevil +comics +affricate +comings +buttress +angering +buckboards +bombed +adversely +adequacies +commended +causeways +adherers +codes +aquaria +ape +bulks +compactly +brainwashes +bleats +commandants +conditionally +adjourns +clobbering +allowances +buildings +complemented +blanker +algeria +brief +creak +adductor +categorizer +approacher +argument's +clocked +bedazzle +cause +coordinator +buildup +countenance +abhorrer +backtracked +bogus +closer +broilers +chirps +adjournment +belles +bitingly +befogged +contexts +amorous +breeding +abortions +blockage's +alternatives +bouncing +beryl +ballistics +banters +carpenters +auction +bowdlerizing +brazen +bonuses +circulated +adultery +archival +bears +baptized +burglaries +borrowing +barbarous +casher +adolescents +atrophic +busily +aerating +coatings +athenians +casing +consuming +alphanumeric +beaches +bisection's +conjecturing +aspirate +biography's +accompany +bureaucrat +broomstick's +colony +coalesce +clock +bequeath +collaborates +belonging +configured +burlesques +anode +consenter +bug +counterpoint +counts +bangladesh +analogical +accident +bulky +affinities +abysmally +boorish +assiduously +cannisters +autocollimator +bassinet's +barrelling +blurts +carbonize +candle +act +addressees +constraints +boast +complaining +coziness +avocado +coolest +blank +beadles +anytime +covetous +appellant's +angers +academies +ageless +chased +constitution +consonant's +boosting +ascetics +aerosol +apse +blushes +clang +confers +confidentiality +coolie +colon's +chickadees +badminton +argonaut +constituting +aloha +contracts +broomstick +brackets +attendant's +connection's +conciseness +abstractor's +composes +chaste +assures +conjuring +barbital +bunion +bases +clowns +barrelled +audience +auctioneer +complexly +aviator +conjectures +backscatters +cheerfulness +communicating +agreement +bricklayers +bilabial +abstruseness +cobol +cooperating +admit +blundering +accelerates +assaulted +concealing +anachronism +bowels +butane +anniversary's +converts +convoyed +climates +barriers +clubbing +additives +bask +confessing +caravan +colonizes +continuous +cheerlessness +boggled +armpit's +bridgework +allegro +cricket +cannon +adoption +clanging +auscultations +billowed +alphabetize +airlift +appointee +boyfriend +chaotic +corrections +bonus +contrasted +convulsion's +confessors +adumbrating +autocrat's +coronary +authentic +barley +brawling +aegis +appends +bolshevism +charted +applicant +aileron +considers +chin's +alkyl +amendment +boulevard's +avian +breather +canyons +cannon's +apportion +badgered +augers +advisers +censuses +beveling +aught +arthogram +anonymity +appliance +atmospheric +anesthetizing +ambulances +blustering +burnt +chestnut's +collects +aliment +anxieties +championship's +channeled +arrival +amassing +corpse +bedtime +blackbirds +cats +constants +chemistry +brewery +brother's +boasts +accentual +bellwether +bely +courted +baroness +configure +collection +aviary +achieves +belfry's +beech +baseman +bacterial +contestable +blond +contracting +comparably +consultation's +booster +conspiracies +belief +candidate's +boardinghouses +connectivity +check +crazy +collided +assistant's +critic +bilateral +cheapening +appalled +autopsy +balled +abnormally +acquires +aloofness +backwaters +combative +computerizing +craters +contributorily +behaved +comers +axiomatizations +analogously +banjo's +cleanser +capitalizes +chamberlain +aggregates +amenorrhea +begins +condone +cleaved +bustard +adsorb +airedale +bridles +audited +could +amour +checkbooks +admiring +arrested +commerce +asbestos +can's +clamping +bathers +acknowledgments +census +acrobat +bargains +apogee +creaking +busboy's +additional +chants +circumvents +afloat +anyplace +alumnae +anions +classroom's +ballerina's +convents +angered +climbers +citation +cools +clamor +capaciousness +beatific +abrades +advocating +coverings +claims +brethren +advertised +atrophies +coffer +beagle's +brazenly +bitterly +clergyman +braiding +compressible +convicting +agreeableness +antithesis +cogently +botanist's +bidirectional +bewilders +airlock +costumer +blamelessness +agglutinins +catalyst's +allocation +annunciates +borderings +accomplishes +confronters +clinically +breadbox's +canvassed +communicative +coercing +backpointer's +bramble +congregations +crave +courtesy's +cocoon's +admitting +chieftains +acclimate +consequences +cones +contradict +axolotls +contractual +artist +atrociously +consecutive +berated +bluing +attacks +choruses +blatant +balance +amplifier +assist +analyst's +ambler +conveyance +compromising +baffler +corridor +bed's +condoned +boulevard +anomie +averages +basics +apologia +cabbages +concretes +alcoholic +aliased +chocks +balsam +collies +censor +arouses +conundrum's +academically +bent +codings +coastal +allots +acclaim +citations +cantor +circularly +boarder +caribou +biologist's +cowling +connects +chasing +bootstrap +backscatter +abstractly +corrupt +alleviating +biasing +abrade +arraignment +beaten +blanketing +compactness +adage +coincided +borate +bra's +concepts +bootleger +christian +argos +basal +abate +campuses +abridging +confusers +cabin's +audition's +amphibians +attractively +adhesive's +ascendency +beforehand +ache +brokers +bowler +criminally +american's +chock's +artillerist +appropriation +characterization's +artifices +annoys +constituents +bottle +beaned +consisting +beholding +ceremony +carpeted +absolutely +anorexia +accredited +azaleas +amaze +commit +afflicting +contriving +adventure +blood +blabbing +absoluteness +appreciable +approachers +bumptious +behavioristic +anticipates +adults +barnyard's +banging +banana +bilge's +aware +coheres +bronchi +commissioned +arrogation +confines +core +attenuation +afterwards +clearing +applies +alphabetized +cemetery's +campaigning +abolishes +brig +cheer +combers +backtracker +clinker +clouds +clog +berries +advising +childish +clobbered +bride's +astrophysics +canker +concatenate +bite +chagrin +bodybuilders +calamity +admiralty +councillors +competitive +assessments +copper's +cabling +casket +conducted +backplane +boyfriends +bingo +broader +confiscates +communicated +baton +cocktails +albanians +boardinghouse's +brats +akimbo +categorizers +comparator's +blackbird's +accidentally +companion's +clippings +accosted +bell's +burly +aggregations +boathouses +airmails +abreactions +changers +carbon +cleaners +bookkeeping +correlations +backer +conclusions +brainstem's +anecdotes +chateau +cogitating +amphibious +compounded +completeness +comptroller's +boatswain's +bolstered +acquiescing +actors +calorie's +adaptability +abstractor +bimolecular +belly's +automobile +automotive +analyticities +awesome +colonizer +approximated +chemist +coronet's +classmate +anteater's +altars +adulthood +amid +assails +blizzards +corroborative +biographer +compartment +blooded +bipartisan +bluff +aloof +bronchiole +clincher +congratulations +ablation +caught +collier +chooses +antidotes +artery +clearance +civility +basketball +auscultated +behaviorally +crowning +autobiographical +cheaply +brutally +agonizing +clerk +comprising +baller +confuses +acquiesced +astonishingly +birthplace +covered +chopper +combinator +benignly +bedside +blasts +billboard +appraise +aboveground +comforter +credulousness +battlefield +barefoot +cleverness +apparatus +bartering +bromine +aerodynamic +crabs +chains +airflow +allegrettos +armchairs +blacklist +approvals +bait +collections +antecedent's +airbags +casted +content +conferrer's +crouching +coughs +canal +amphetamine +augustly +bedraggle +arithmetic +cataloger +alluding +credulity +coffees +crueler +beautifully +caresses +correlative +consul +criticizing +couched +baths +alchemy +bargain +accomplishments +conveyer +benevolence +broil +chilling +axed +attire +collisions +categorizes +cited +aeration +accommodating +coordinations +boxcar +cattle +bullion +afternoon's +captures +afghans +comets +component's +ark +bounds +adjusting +bravely +capability +chap +absolving +aspirating +arcs +conspires +collaborated +admonishment +astounds +brasses +compromise +changed +consumers +connoting +buttonholes +cordial +anionic +chastisers +archive +alleviate +burglarize +acquainted +copiers +cashers +antisocial +creations +bookie's +censure +beadle's +banded +circled +bulged +cheapness +attorney's +chewer +bookshelf's +councillor +assertion +broom's +contemplations +club's +balkans +cherubs +alas +chair +apologizes +compartments +beyond +aptly +censured +allegros +boosts +card +arithmetizes +attainment's +arrester +anding +asker +compatibilities +confidentially +commissioning +cleaner +aversion +cooperative +battalion's +cemented +charity's +conceited +capable +anymore +computing +aping +chiefly +affair +beaners +allying +caption's +antipathy +causal +abyss +botchers +burglarizing +confidant's +activator +continent's +census's +brat's +antagonism +bedspring's +antiserum +charge +connector's +alike +believable +belfry +cast's +bureaus +beneficiary +abolisher +artichoke's +broadly +concurrent +alteration +bookies +crafts +bays +ass +bouquet's +ave +chords +crazes +anemic +appoint +beets +billing +contest +assassination +allot +brindled +acute +absolves +adsorbed +auxiliaries +belatedly +businesslike +assassinates +bookkeepers +bevel +adders +automate +archangels +breakfasted +changeability +contested +cradles +combatants +besieging +certainty +attempts +bankrupting +compiler's +complications +banquets +ancestor's +ail +abbreviating +compacter +approvers +acknowledges +comically +almonds +counsellors +calmness +assailed +crane's +baser +big +corruption +circuitry +briefness +community's +banquetings +alms +bass's +bellowing +adoption's +blockading +compellingly +builders +befallen +bombproof +cartons +chore +crimson +anther +clucks +assemblies +beatitudes +aspiration +compels +angst +balancing +bowstrings +bayonet's +butte +biomedical +casualness +accolade +blackberry's +bunched +affright +clung +burlesque +bare +corrected +arbitrate +cropping +coherently +bloodhound +circularity +courtesies +articulating +concluded +analogy +brutalized +airmail +cooperator +cousins +centralization +bibbing +beside +bravo +abductors +cars +bovines +bump +absconding +chins +chasers +boundary's +antecedents +awed +counselled +aback +attenuator's +blazer +bettered +awaken +abreast +beagles +artisans +buckled +credence +control's +bewhiskered +calloused +breathe +collaring +blossoms +bring +actualities +bivalves +animals +cowboys +constituency +affordable +acrobatic +attiring +boatswain +concurrence +abrasions +babel's +cowerers +chiffon +bostonian +criterion +blinds +cased +affections +conditioners +clutter +accrued +attractors +botcher +compunction +bludgeoned +censored +allah's +chronic +burrs +commodity's +appraiser +asserters +cheaters +besting +anchorite +combine +afforded +cigarette's +bathrooms +apostles +chloroplast +bootlegging +bibliographical +beans +bylaw +benefited +brochure's +cordially +brashly +beastly +bologna +alderman's +burning +billow +convert +buffaloes +comparatives +assistances +camouflaged +announcement +bobwhite +brawl +adducted +cavern's +affectation's +bandying +brunette +architect's +aphorisms +cremate +bray +billed +conception +battlefield's +bandaged +broaches +bazaar's +beatification +bigotry +clergy +abstains +befits +bantering +conceivable +attachers +analogies +bimonthly +august +additionally +confirmation's +ballooning +cardboard +belle's +counterparts +candor +bishop +comprehension +affronted +bravura +courting +antidote +buggies +arisings +appendix's +bright +categorize +cooking +agnostic's +billets +amok +bewitching +audiograms +column's +bussed +checkbook +alteration's +atherosclerosis +broached +based +cacti +boardinghouse +bowdlerized +anchoritism +achievement's +bald +cover +codifications +capacitor +brashness +causes +acyclically +argument +boarders +audiometer +compute +contribute +crisply +bitters +circumvent +assailant +bosun +buyers +alibis +blurting +coasts +bivouacs +arrogating +albanian +attempted +acquisitiveness +applauding +alfalfa +cantors +canonicalizes +alkaloid +bruising +associativity +budgetary +carbolic +clashing +buffalo +acorn +analyzing +backyards +comedian +betwixt +aces +chartered +additivity +becalm +combat +characterizations +clinics +bulbs +bloc +amenable +civilian's +breech +attainment +bounding +compiler +cotyledons +billboard's +caper +aphasia +chester +combats +biddable +articulates +caps +assignees +bifocals +beady +chinese +assertions +allegation +championships +accrue +containment's +croaking +classifying +annum +brightened +bits +appointer +besieger +citizen's +cerebral +canto +bakers +capitol's +authorizer +blockaded +anodizes +alarmed +buttressing +attenuates +bumptiously +chronological +colleges +coward +contraption's +abstractions +controversial +boric +bids +agents +backpointer +bumped +bottoms +bowlines +captivated +article +cliche's +chases +choker +bremsstrahlung +consult +adjudged +auctioneer's +covers +accurateness +clues +bugler +bareness +cedar +alleviation +anesthetically +backpointers +arched +administered +arrowhead +continues +asks +confessor's +allure +backlogs +childishness +appointive +covering +conscience's +bellows +blanked +considerations +appalachian +aerate +budged +city's +accordion +cliche +collectors +comprehensive +boomed +chariot +baffling +bunkmate's +bumbles +contaminating +corroborating +applications +bursting +cabbage +befalling +acquittal +compromisers +components +arpeggio +brothel's +credibility +begrudge +confirmation +academy +appertains +calibrates +bureaucrats +bawl +costuming +biography +adoration +cloaks +aggregating +business +aphorism's +carters +admixture +coexistence +anomalously +adapts +amide +affiliation +capillary +biscuit +brainy +bellhops +chartings +cohered +austria +champions +basin's +cascading +consultants +bison's +admixed +arithmetically +clothed +betterments +conspirator's +addition +adolescence +bolsheviks +abominable +breathless +cozy +arouse +bumble +about +apace +astronaut +asteroid +cable +crab's +beachhead +assets +analyses +bisection +coconuts +alleys +armament's +bloodstains +arpeggios +apologist +blithely +anabaptist's +beadle +channelled +confuse +annoy +beautifiers +cheats +clenches +amuse +bewail +constitutional +birth +appendixes +amazed +berry's +bilingual +blustery +amplification +clogged +blackmailing +breakables +adduct +bondsmen +conferred +codewords +bequeathal +abundantly +banner's +atrocity +congested +closely +absolution +concatenations +anarchic +crag's +communicators +cavities +comptrollers +backstage +bewailing +charcoal +conveyances +collar +bores +briefest +comments +awning's +associator's +antarctica +correspondingly +bidden +ad +clings +bit's +apollo +bulldogs +chateau's +amounting +cogitates +bellhop +bookish +bout's +cannister +bicep's +asses +beef +battlefields +consort +auspicious +breezy +buried +beverages +approximates +conduction +bleakly +blanketers +ascertained +absentminded +bolivia +births +behave +bilk +breaths +charter +abstaining +appareled +boulder's +breadwinner's +correct +accessed +befitted +adulterer +axe +activation +betrothed +asymptote +bullet's +clusterings +baud +bustling +ballplayer +constraining +cleared +brown +affirmed +agencies +churches +backyard +burntness +bronchioles +charmers +backscattered +abridgment +claw +blow +adjourning +constantly +brightens +autobiography +cards +bypassing +alcibiades +concurrency +chuckles +bests +belligerents +adjustments +bolshevik +cabins +astronomically +cartridge +boxcars +boned +bottomed +burgeoned +adjourned +apprenticeship +chastiser +breached +boycott +butchered +coordinating +cottage +brainwashing +confinement +bandies +absentee +collapses +cruel +along +alloy +convoying +assignment's +crisp +ambidextrously +blindfolded +chilly +condenses +avers +broiler +anesthetics +beaker +cholera +brag +coffins +cranked +allocator +brutality +acquire +blushing +briar +abolish +crossovers +broiling +consolers +beatify +almanac's +cooled +commencements +clasp +committing +condemnations +altar +by +bombastic +confederates +bong +concerted +compilers +counterproductive +brig's +accurate +avidity +cleavage +blame +conceive +assessor +consolingly +concise +computes +alliance +clucked +axon's +annunciating +baseball's +allusion +brays +auras +blond's +bronchitis +ciphers +blowing +broth +canonically +baseness +byline's +appetite's +colonists +condensed +cawing +beaning +broadening +colonist's +apocrypha +chauffeured +cored +branding +carrier +assessed +collegiate +chirped +accounted +clubbed +antibodies +behalf +alphabetizing +conqueror +alpine +budgeters +casements +appropriate +compliments +cast +accountancy +cathedral +conserve +accorders +arbitrarily +cowing +bars +bagel's +climax +attention's +cautioning +centipede's +almost +abstractionist +carpenter +containing +arab's +courtesy +carton +accelerated +bowman +boastings +banal +bucking +accomplishment's +classification +baldly +abruptness +calibrations +blocs +biking +assenter +adversities +compartmentalized +chemical +attic +audiogram's +applauds +crests +bad +bounce +accelerators +contemptuous +attentions +cancellation +battles +aging +advantages +answers +bruised +castes +anthologies +any +coped +arcade's +adaptively +arsenal's +confessed +controllability +acceptor's +abrogated +abutted +amusingly +apology +broils +court +adiabatic +ambitions +charged +awfulness +consorts +boundaries +bode +collie +botanists +blurring +absents +batten +backwoods +breaks +certified +chattering +admitted +bathrobe's +analogous +corporacy +bijection's +combatant +checked +condition +amoral +bayed +bedroom +chanting +antics +charity +blip's +biped +brilliance +catchers +booted +anabaptist +clothe +comforted +complaints +coacher +admissible +bang +concisely +cookery +capita +assurance +codifying +benchmarks +aunts +commentaries +anon +applicators +constructor +associated +abuses +choicest +confiding +antislavery +apron +ashore +cheerfully +betterment +administration's +campaign +cremated +ambulatory +bleacher +afterthought +barkers +choir +crossly +conducive +cache's +battery +actinium +countryman +cajoled +appeasing +beamer +cleaves +anthem's +clearing's +cooperated +barker +crowing +apprising +accusation's +beginning +associator +booking +caved +amicable +codify +clairvoyant +bevels +becalms +brawn +bunkhouse's +arms +antiredeposition +belt +antiphonal +cried +brae's +bridal +acronym +clay's +checkers +auxiliary +bind +compares +agilely +askers +blankly +antagonist's +bimodal +captivation +creditable +concentration +amateur +adjure +ascertaining +budge +adulterate +additive's +cardiac +born +brewed +borneo +bun's +blue +cackled +acclimates +airline +blinder +brokerage +communicant +central +aggrieved +asynchronous +bough's +acidly +archaeology +complementary +animator's +bodyguards +climbs +apathy +constellation's +acculturate +archaeologists +contingents +calling +bartender's +autopsied +correspondent's +carnivals +abjure +bystander's +bungle +chanticleers +conceding +burghers +boards +accessions +compensations +arabian +churn +crowed +centering +abnormalities +courtier's +congregation +aberrant +annexing +blockhouse +anthropomorphic +bedder's +abutting +conundrums +affiliated +cancellation's +bolts +ballgowns +augmenting +bureaucracy's +bootlegged +audiometers +blueberry +affliction +appreciation +codifier +amasses +countering +crackle +canoe +consuls +breathes +broiled +amalgam's +bodes +ballooners +coating +corollaries +amphibology +agenda's +chafing +alcoholics +accredit +anisotropy +anchovies +carriers +acceptors +betrayed +buttocks +busy +bunny +cropper +accreditations +bumblebee's +adhesives +civilize +accedes +abroad +arch +crept +cotyledon +alphabetic +braille +control +anglophilia +billings +corporate +athlete +accusing +appear +announcing +accordions +computerize +combinations +bile +abut +charger +columnize +computer +blacks +converges +blamer +bulked +convincingly +checker +correspondence's +accelerate +accessible +conceivably +abscissa's +adsorbs +anglophobia +anomic +casters +churning +crease +brood +appendage +bulwark +bombers +arcaded +breadboard's +aphrodite +color +commodore's +answerer +bobolink +cloth +conversion +clime +artery's +birthplaces +compiled +arrack +beetles +bobs +compatibility +cocoon +counterpart +audible +colonies +airport's +beige +cogent +bromide +begrudging +acids +crucifies +beggary +archipelagoes +availably +counterfeiter +blanketed +amending +accelerometer's +advisors +byway +alignment +amber +austin +copyrights +beaus +brigantine +comforts +appointment's +crawler +bangles +contemplation +concur +characterizing +censoring +charters +catalogues +appropriately +builds +aeronautic +confused +comber +axially +cackler +coercive +ambassador +arcades +brash +amorality +belittling +battling +bloodied +acrylic +bantered +clasped +carcass +archangel +annunciators +aristotle +boulder +burglarproofs +chooser +abilities +calmest +bach +always +blaspheming +crossover +bakeries +clocks +ankle's +accidental +arbitration +chirp +aeronautical +boy's +acidic +bowline +anonymously +cod +couplers +beautifications +bluffing +backarrows +brow +covenant +acronym's +banning +albeit +ascetic +burn +animator +beatnik's +coveted +cipher's +broke +cap +bellman +bulldozed +clarifies +bathes +blip +availabilities +booth +clangs +audiences +cathedrals +confounding +bigot's +beecher +arts +company +attributed +avenged +bawling +caustics +alee +bordello's +banks +affords +complied +commas +collaborate +aquatic +ambitiously +burro's +beard +bittersweet +candlestick +bylaws +broadcastings +believe +barrels +braying +certifications +contrasts +crashes +audition +confine +bucks +abates +bureaucracy +ambles +besiege +broccoli +antibiotics +attenuators +accelerometer +caste +bib's +browbeaten +appurtenance +bauxite +asceticism +case +chewing +aerator +achievements +barricade's +agglutinates +bewildering +cartridge's +children +bufferrer +actuator +converging +bolted +chat +combs +chemist's +adduced +algebraic +circular +bloated +conclusion +burgess +certifies +absconds +comprise +benzedrine +bumbler +banjo +allow +appealing +cooperation +abraded +chaperoned +biracial +braced +censurer +acoustician +appraised +benefitting +constructs +convertible +administrative +asocial +area +creature +besetting +crater +begrudgingly +blanket +ablest +alba +airplanes +allowing +briefly +beneficences +concurring +adjective's +cork +aerospace +anomalies +asher +auger's +boilers +abhorring +broadenings +bladder +belay +approver +abdominal +commends +cringing +billiards +beater +auspice +contrasters +bights +absentees +atoll +cooler +activator's +basement +burgeon +allusiveness +codeword's +bandage +contemplate +adopted +coping +carving +baptism +colds +altos +background +closet +commuted +acre's +aliens +council +cans +cheese +ally +aseptic +belgian's +crossbar +addressed +commons +call +careers +breakfasting +brazilian +catholics +bachelors +consultant +brighter +crossword's +burglar +avoidable +batting +cigar +amps +axiological +combed +comforters +albumin +cookies +booming +archaize +canton's +bunkmate +combination +bondsman +anxiously +affixed +associatively +cigar's +backstitch +calls +captivates +commodities +atmosphere's +asserting +beaver +beatnik +container +activists +consoler +commoner +buttonhole's +abhorred +aggregate +cliff +antidisestablishmentarianism +broach +ambling +comer +bited +advocated +behaves +bosom +continents +conserves +bashful +ago +backarrow +circumventable +avocados +briar's +annuls +barnstorming +aired +carry +crossbar's +aspire +beards +abides +cliques +completes +brassiere +absorbs +annul +chairman +baron +battens +africans +abatement +colonization +carries +borough +allurement +breakfasters +alkali +acoustically +corners +capturer +casualties +asphyxia +animized +administrator +belying +basketballs +bylines +bandit +autopsies +braining +contradiction's +antic +butted +bacillus +blurt +conditioned +backers +agreeable +almanacs +cider +chicken +chambers +clutch +assailant's +conveyers +amazers +beribboned +breeder +caveat's +buffers +combination's +ampersand's +crafting +clanged +caving +aspirant +butlers +adjective +auckland +announced +creators +caches +baseline +codifies +baptism's +coarsened +cohesion +airman +avenge +backaches +budgeted +armpit +bicycled +converged +besmirched +autonomic +coming +assemblage's +chained +admissions +alcoholic's +branches +bunk +anciently +bloods +adventurers +amazes +coloring +abstractors +adaptation's +boar +amulet +agglutination +conquerable +booker +confronts +barometer's +bedbugs +barricades +cheap +bewitch +circus +backward +archeology +automobiles +bending +amino +beckoning +admits +berliners +borer +clambering +atomizing +banner +blissfully +catchable +breakdown +abjured +computerized +chaplain's +amphitheater +ballot's +craziness +croaks +counties +adopting +breast +airstrip's +basin +contemplating +commitments +critique +appears +bellies +baccalaureate +abducted +blackened +animosity +appraising +antiquity +assistants +asthma +bootstrapping +bounties +agleam +advertisements +benches +artful +broadens +chuck's +betrayal +blasphemed +brooms +castled +coroutine +conscious +beetle +banshee +advertising +baring +awakens +balm +billions +compromisingly +ballroom's +burrower +bayou's +ambiance +beheading +bought +adagios +adornment's +anointed +abolishment's +anesthetizes +badly +boyishness +consultant's +cheek +cannibals +breakdowns +assured +agates +bicker +appliances +cafe +bagpipes +adrenal +combinatorially +belligerence +bricked +adjacency +aimless +crook +cherry's +assessing +brushfire +cormorant +captained +blundered +conceptually +congress's +contraster +ambushes +bronze +autotransformer +corded +brisker +contently +announcements +bullet +apportionments +columnized +canon +conservation +algaecide +blackening +compassion +beaks +constructibility +chapter +abscond +costly +bacon +coldest +aptness +billionth +altercation +approbation +alternator's +criticizes +befell +canopy +buoyant +brazil +anticipate +absenteeism +champion +aesthetics +cadence +betroth +confidants +bean +braid +aphids +cluttering +cantankerously +bloom +barbarity +clawing +bogged +agreed +asia +abrasion +corporals +baselines +box +chartering +apotheosis +ampersands +conceit +creamer +adhered +circuit +carpet +accompaniments +boomerangs +blindness +chipmunks +bewitched +allocate +bicycle +compacted +cab +calcium +cellists +apex +borrows +completed +brightly +constables +ascertains +conspiracy's +badgers +bunion's +anabaptists +broadband +clefts +accepted +benched +catalogued +cadenced +alliteration +acquiesces +boxcar's +athlete's +bracing +cremations +analysis +crossings +assorts +apologize +brazier +configurable +basking +craves +belle +conversation's +belligerent +anesthetize +brewers +cackles +adventures +airlock's +booklet's +apply +anecdotal +bewails +computer's +autographs +acclimated +coefficients +avidly +beckoned +broadener +bulk +blacklisting +belly +acquit +convoy +achiever +aversions +advisor's +captor's +camel's +asset's +advantageous +basement's +confident +crescents +compiling +butler's +cartoon's +adaptive +chlorine +abets +cruelly +amiable +baleful +ceiling's +adumbrated +cherry +aspirant's +cashing +candidly +chaff +bitter +brim +alcove +bulb's +carbonizers +citizen +attic's +breed +consumer +conferrers +accommodations +contrapositive +beget +brilliantly +attentionality +continuation's +bosses +brave +configurations +benediction's +conferring +accessor's +bobolinks +bulled +cleanness +algorithm +advancements +altogether +accumulations +albacore +bowing +belching +apical +consequentiality +bagpipe's +ambrosial +bullying +cleans +attendance's +complimenter +blink +cager +assembling +coat +allowable +astringent +antiresonator +cardinal +clicks +commentator's +blossom +categorizing +amphibian's +commonality +consonant +classics +affable +accorded +aimlessly +archetype +administerings +boldness +anatomy +apprehensively +absence's +actuality +attempting +categorical +checkpoints +allemande +corer +behoove +bleaches +bough +blended +blotting +baptists +courtship +benevolent +bumptiousness +chum +anguished +auto +career +bookstore's +carbonized +autocratically +cherishes +attendees +contends +anastomotic +attributing +abbot +came +blunt +battlement's +affection +coordination +annotate +besets +bucked +boasting +benedictions +adherent +blimp's +acknowledging +cleverly +applejack +annexation +bat's +cantons +beetled +closed +country +creatively +bakery +blasphemously +chalking +bold +attended +crasher +backtrackers +artist's +bracelet's +allowably +affiliating +arrant +brayed +barbells +consigned +abolishers +climatic +atrophying +amigo +arsenal +ascribes +converses +aura's +allotted +bliss +classical +bigger +ahead +chopped +blade +casualty +acceded +bottling +axon +casement's +battlefront's +convinces +alerting +advertisers +blemish +agglutinating +commonplaces +autocorrelation +armistice +crediting +besmirch +amplify +auscultation +befalls +called +alnico +arbiter's +abort +argonauts +cessations +cribs +blare +aforementioned +condemners +contaminated +complained +bootstrapped +criticism +cooperatively +binding +bullies +basins +contrived +assort +adulterously +booms +abandons +also +appealed +count +contributed +beet +crashers +carryovers +clays +blackness +cosmetics +awkward +blurted +bothers +analyzer +backups +alarming +bicyclers +credit +abrogate +audience's +architecturally +alibi's +complicator's +chuckle +corporately +banishment +communist's +birdie +asymptotic +break +braze +benzene +bridgework's +beak +agitators +collateral +arranges +bayonet +breathlessly +counsellor +creates +convulsions +backdrops +applicants +altercation's +commission +breathtakingly +corresponds +backdrop +armaments +build +biannual +buttoning +computational +chaired +bather +critically +amanuensis +bantus +confidential +annoyance's +carder +authorizing +acquits +bipeds +cocktail +cinnamon +burros +brocade +abdomen's +creative +acquisition's +abdomen +baited +aristocratically +alive +committed +arrestor +cleaving +comedy's +baggage +bra +adaptors +afoot +bulls +contoured +amalgam +comprehensibility +amortizes +biographical +confront +covert +cravat +animates +booksellers +bypass +bootleggers +bedfast +affair's +buzzer +bellowed +aligning +bystander +acclimatized +accomplishing +against +blankness +adopt +addressing +croaked +boaters +behooves +audits +boatyard +cruise +agnostics +ailing +anchorage's +adaptations +conceptualize +advised +cries +bank +actuators +brazing +catalyst +beachheads +aplomb +compressed +amputated +contractor's +bedspreads +bowed +coon +chaplain +cannons +coffers +assembly +bouffant +converters +ampoule's +borderland +archaeologist +blankets +conserving +avalanche +assortment's +aspic +axle +bereaves +allowance +carbonization +bartender +clawed +coincidental +appeared +chipmunk's +countable +authenticators +bestow +alps +caw +aniseikonic +avows +blackmails +controlling +correlating +audiologist's +bit +approving +collapse +coon's +cleave +atheists +brigade +autopilots +bounteous +commercialness +accede +cavalierness +accustoming +burnishing +clobber +aspirates +brochures +cellar's +communes +berkelium +chickadee +cobweb +circumstances +chose +comprehend +baritone's +aggravation +adopts +cruelty +and +axer +cautioned +carbonic +babbles +bet +charitable +computable +cardinality +amenities +confiscating +catcher +audaciousness +complaint's +cooperator's +buddies +baking +constant +classmate's +accentuate +choices +crop's +authorization's +comedy +brushy +brotherly +canals +ads +causeway +abrading +cemetery +autocrat +briefing +abdomens +apparition's +consummately +alkaloids +bulkheads +cravats +bales +campaigners +bagpipe +accentuates +arm +barometric +bas +agitator +behavior +abutters +blockades +alertness +civilizes +chinner +anthropologist +artificialness +balkanize +automates +cackling +anarchists +amounted +cereal's +anodized +cobblers +acknowledgment's +blear +copper +alphabetics +blackboards +apish +answering +afternoon +arbors +accused +chickens +agency's +contractors +contraptions +cosmology +anomaly +bandstand +attempter +account +challengers +admiration +calculations +autocracy +analyticity +accord +buildup's +commonly +babbling +adjudication's +attain +ameliorating +candlestick's +chronicles +align +consensus +agate +adulation +aspirated +conclusive +biologists +cracks +conform +chambered +beryllium +connote +amusing +aquifer +ankle +batteries +conservationists +accountants +apiaries +actinometer +beckon +clearances +clouded +antitoxin's +consolation's +collectives +boxtops +bombarded +bombarding +bluest +allusion's +construction +ballpark's +codified +coincidence +celebration +chip +beginner's +algerian +boo +athletics +condenser +bytes +beauties +concerts +conductors +awl's +agitations +buttered +codifier's +armory +ascii +aspirin +arthritis +bylaw's +conformity +blasting +coinciding +aphid's +ceremonial +banisters +bristle +bid's +buckboard's +bandied +biopsy +ballrooms +chloroplasts +bidding +boil +algebra +constellation +chuck +cringes +cleanliness +apron's +cosmopolitan +bashes +abusive +believer +conductor +butters +breweries +allotment +artfulness +bunkmates +blares +connections +anticipated +classifies +commandments +beginnings +bend +brambles +blacked +basketball's +affectionate +cocoa +anacondas +busing +bone +birchen +creamed +aged +commemorates +brother +aberration +crawl +actuarial +apology's +alumnus +adversary's +anaphoric +aspiring +consciousness +cokes +assignee +boxing +blanched +camels +contemporaries +carnivorous +assigned +apologetically +corpus +accusations +beefing +champaign +claps +adherence +aloft +complication +citizenship +becomes +compound +arabesque +bronchiole's +appraises +breach +collection's +botched +bitches +biblically +bronchial +amalgamating +commoner's +barbarian's +arrange +cradle +conformed +complimentary +anodes +cowering +anoint +brocaded +bedazzling +avionics +burnishes +bulkhead +chink +consciously +contract +clinch +applicant's +awning +aloud +chandelier's +cathode's +babble +arachnid +biplane +clamorous +assuredly +consented +axing +avenger +commence +braving +brandishing +careless +burningly +boatsman +channelling +clarifying +beggar +berates +cite +cowered +buffer +condescending +admixes +bettering +bedazzlement +cord +burglary's +characteristics +aptitudes +adieu +agree +bends +ceremonies +accustom +accessibly +commanders +ask +cavalier +brayer +affront +courser +becoming +carves +configures +beasts +biters +conditionals +bodybuilding +accretions +chapter's +cleverer +corning +brat +classes +almsman +consumptive +antique +comprised +beholders +anthropologically +buns +bridge +accretion +acceptance's +confederacy +armorer +argumentative +crossword +cowslip's +analog +counselor +chastised +barters +clerked +americas +cloud +aide +alternators +admitters +bagatelle +bridges +civilizations +anion's +briton's +apartment +acquaints +consummation +chord +coated +barer +carnivorously +cheering +allergy +capacity +classrooms +assistantships +complimented +amphibiously +commandment's +audiogram +corked +badness +bewildered +assemblage +backplane's +asterisk's +blob +coexisting +approximations +counteractive +barns +adherer +aborigine's +brooding +conceived +adjustor +cabled +belongings +breadwinner +blot's +brightness +consigning +barflies +bisector's +basing +complement +conditioner +brazes +crank +antinomian +crowd +accelerometers +befitting +backlash +bastions +acceleration +briefcases +correlated +baffle +chew +accosts +agreeably +bassinets +cogitate +concerning +contouring +broadside +compact +brainstems +atom's +bondage +biter +archdioceses +basis +bellboy +blobs +barons +clods +campaigned +assessors +bubbles +annal +casual +altercations +clog's +biased +arianism +ancillary +collaborator +butter +bureau +blending +antiquities +brands +activism +crews +beats +broad +buds +baggers +cobbler's +condemns +cabinets +bomber +blinders +center +contacted +bewilderingly +circulates +burnings +achieved +belch +barbecue +angles +comparative +befuddle +cherished +chapters +chanter +allegation's +armstrong +converter +combinatoric +angrier +brooks +clinked +blubber +appointments +compactor +cleaned +car +contention's +artificial +cramp +consistency +aborting +collaboration +awarders +crippled +anaphora +creamy +buoyed +baptistery +altered +anchoring +alterer +adjuring +beacon's +commencement's +ascension +candidness +clouding +cigars +boiled +christmas +contingency's +alum +apparel +contributors +anisotropic +annotations +bushwhacks +brides +continuities +carton's +blurred +antibody +aorta +blankest +combinator's +banish +breaches +accumulates +bowling +braver +antibacterial +cooperators +banked +compensated +chartable +conjunctively +antelope's +bluefish +annoying +composed +barges +biconcave +australia +ballparks +bearers +acknowledged +advocates +crossed +competitor +blaming +andorra +baritone +collaborator's +accessibility +complains +commentator +bibliography +conference's +atmosphere +agrees +bedstead's +ardor +character's +conventionally +arena's +chokes +channel +bludgeon +convoys +condense +beautifier +ailerons +compacts +black +bell +completions +ballroom +besotting +conservatives +adventured +bulldog's +conversely +arroyos +compositional +alternative +association +broods +beefy +consolidated +balms +acquaint +animal +certificate +combustion +aims +cracker +abetted +cautionings +bread +attains +agriculturally +courtyards +bawls +country's +creator's +checkbook's +cliches +colonizing +biennial +aqueous +craftsman +contrivances +algorithmic +crate +barefooted +bodily +anthropologist's +but +climate's +campers +crackled +awakes +conveyed +borrowers +approached +avoids +crib +albania +bathrobe +admonitions +architectures +consenting +anastomosis +blob's +actual +arrowhead's +accountable +allegiances +commendation +appearers +comply +concurs +controversy +abstracting +artifact diff --git a/test/tcl/wrap.tcl b/test/tcl/wrap.tcl new file mode 100644 index 00000000..9ab1cd37 --- /dev/null +++ b/test/tcl/wrap.tcl @@ -0,0 +1,99 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# Sentinel file wrapper for multi-process tests. This is designed to avoid a +# set of nasty bugs, primarily on Windows, where pid reuse causes watch_procs +# to sit around waiting for some random process that's not DB's and is not +# exiting. + +source ./include.tcl +source $test_path/testutils.tcl + +# Arguments: +if { $argc < 2 } { + puts "FAIL: wrap.tcl: Usage: wrap.tcl script log [scriptargs]" + exit +} + +set script [lindex $argv 0] +set logfile [lindex $argv 1] +if { $argc >= 2 } { + set skip [lindex $argv 2] + set args [lrange $argv 3 end] +} else { + set skip "" + set args "" +} +# +# Account in args for SKIP command, or not. +# +if { $skip != "SKIP" && $argc >= 2 } { + set args [lrange $argv 2 end] +} + +# Create a sentinel file to mark our creation and signal that watch_procs +# should look for us. +set parentpid [pid] +set parentsentinel $testdir/begin.$parentpid +set f [open $parentsentinel w] +close $f + +# Create a Tcl subprocess that will actually run the test. +set t [open "|$tclsh_path >& $logfile" w] + +# Create a sentinel for the subprocess. +set childpid [pid $t] +puts "Script watcher process $parentpid launching $script process $childpid." +set childsentinel $testdir/begin.$childpid +set f [open $childsentinel w] +close $f + +# +# For the upgrade tests where a current release tclsh is starting up +# a tclsh in an older release, we cannot tell it to source the current +# test.tcl because new things may not exist in the old release. So, +# we need to skip that and the script we're running in the old +# release will have to take care of itself. +# +if { $skip != "SKIP" } { + puts $t "source $test_path/test.tcl" +} +puts $t "set script $script" + +# Set up argv for the subprocess, since the args aren't passed in as true +# arguments thanks to the pipe structure. +puts $t "set argc [llength $args]" +puts $t "set argv [list $args]" + +set has_path [file dirname $script] +if { $has_path != "." } { + set scr $script +} else { + set scr $test_path/$script +} +#puts "Script $script: path $has_path, scr $scr" +puts $t "set scr $scr" +puts $t {set ret [catch { source $scr } result]} +puts $t {if { [string length $result] > 0 } { puts $result }} +puts $t {error_check_good "$scr run: $result: pid [pid]" $ret 0} + +# Close the pipe. This will flush the above commands and actually run the +# test, and will also return an error a la exec if anything bad happens +# to the subprocess. The magic here is that closing a pipe blocks +# and waits for the exit of processes in the pipeline, at least according +# to Ousterhout (p. 115). + +set ret [catch {close $t} res] + +# Write ending sentinel files--we're done. +set f [open $testdir/end.$childpid w] +close $f +set f [open $testdir/end.$parentpid w] +close $f + +error_check_good "Pipe close ($childpid: $script $argv: logfile $logfile)"\ + $ret 0 +exit $ret diff --git a/test/tcl/wrap_reptest.tcl b/test/tcl/wrap_reptest.tcl new file mode 100644 index 00000000..e3d12836 --- /dev/null +++ b/test/tcl/wrap_reptest.tcl @@ -0,0 +1,63 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2000, 2011 Oracle and/or its affiliates. All rights reserved. +# +# $Id$ +# +# This is a very cut down version of wrap.tcl. We don't want to +# use wrap.tcl because that will create yet another Tcl subprocess +# to execute the test. We want to open the test program directly +# here so that we get the pid for the program (not the Tcl shell) +# and watch_procs can kill the program if needed. + +source ./include.tcl +source $test_path/test.tcl + +# Arguments: +if { $argc != 3 } { + puts "FAIL: wrap_reptest.tcl: Usage: wrap_reptest.tcl argfile dir log" + exit +} + +set argfile [lindex $argv 0] +set homedir [lindex $argv 1] +set logfile [lindex $argv 2] + +# Create a sentinel file to mark our creation and signal that watch_procs +# should look for us. +set parentpid [pid] +set parentsentinel $testdir/begin.$parentpid +set f [open $parentsentinel w] +close $f + +# Create a Tcl subprocess that will actually run the test. +set argf [open $argfile r] +set progargs [read $argf] +close $argf +set topdir [pwd] +cd $homedir +set cmd [open "| ../../$util_path/db_reptest $progargs >& ../../$logfile" w] +set childpid [pid $cmd] +cd $topdir + +puts "Script watcher process $parentpid launching db_reptest process $childpid to $logfile in dir $homedir." +set childsentinel $testdir/begin.$childpid +set f [open $childsentinel w] +close $f + +# Close the pipe. This will flush the above commands and actually run the +# test, and will also return an error a la exec if anything bad happens +# to the subprocess. The magic here is that closing a pipe blocks +# and waits for the exit of processes in the pipeline, at least according +# to Ousterhout (p. 115). +set ret [catch {close $cmd} res] + +# Write ending sentinel files--we're done. +set f [open $testdir/end.$childpid w] +close $f +set f [open $testdir/end.$parentpid w] +close $f + +error_check_good "($childpid: db_reptest $progargs: logfile $logfile)"\ + $ret 0 +exit $ret diff --git a/test/xa/chk.xa b/test/xa/chk.xa new file mode 100644 index 00000000..186ba7c9 --- /dev/null +++ b/test/xa/chk.xa @@ -0,0 +1,133 @@ +#! /bin/sh +# +# Test XA support. + +func_clean() +{ + rm -rf run + mkdir -p run/bin run/config run/data +} + +# Debug the shell script. +# set -x + +# Check to make sure we have a Tuxedo build we understand. +test -z "$TUXDIR" && { + echo "FAIL: the TUXDIR environment variable NOT set" + echo \ + "FAIL: TUXDIR must be set to the absolute path of the Tuxedo install" + echo "FAIL: immediately above the subdirectories bin, include and lib" + exit 1 +} +dlist="include lib" +for i in $dlist; do + test -d $TUXDIR/$i || { + echo "FAIL: check the Tuxedo install" + echo "FAIL: the required directory $TUXDIR/$i does not exist" + exit 1 + } +done +flist="bin/buildclient bin/buildserver bin/buildtms bin/tmadmin bin/tmboot + bin/tmloadcf bin/tmshutdown udataobj/RM" +for i in $flist; do + test -f $TUXDIR/$i || { + echo "FAIL: check the Tuxedo install" + echo "FAIL: the required file $TUXDIR/$i does not exist" + exit 1 + } +done + +# Set the location of the Berkeley DB libraries -- allow the user to override. +# Check to make sure we have a Berkeley DB installation. (I'd like to use the +# local DB installation, but I've never been able to make Tuxedo load shared +# libraries from the .libs directory.) +REL=../../dist/RELEASE +test -z "$DB_INSTALL" && test -f $REL && { + . $REL + DB_INSTALL=/usr/local/BerkeleyDB.${DB_VERSION_MAJOR}.${DB_VERSION_MINOR} + export DB_INSTALL +} +if test -f "$DB_INSTALL/lib/libdb.so"; then + echo "Using Berkeley DB $DB_INSTALL/lib/ installation" +else + echo "FAIL: $DB_INSTALL/lib/libdb.so not found" + echo \ + "FAIL: DB_INSTALL must be set to the absolute path of the Berkeley DB install" + exit 1 +fi + +# You may need to update the Tuxedo resource manager file. It should be in: +# +# $TUXDIR/udataobj/RM +# +# Solaris requires a line something like the following: +# +# BERKELEY-DB:db_xa_switch:-L${DB_INSTALL}/lib -ldb +# +# where DB_INSTALL is a Berkeley DB install, and /lib contains DB libraries. +egrep "^BERKELEY-DB:db_xa_switch:" $TUXDIR/udataobj/RM > /dev/null || { + echo "FAIL: $TUXDIR/udataobj/RM does not list DB as one of its RMs" + echo "FAIL: Try adding:" + echo "FAIL: BERKELEY-DB:db_xa_switch:-L\${DB_INSTALL}/lib -ldb" + exit 1 +} + +RUN=`pwd`/run; export RUN + +FIELDTBLS32=datafml.fml; export FIELDTBLS32 +FLDTBLDIR32=$RUN/config; export FLDTBLDIR32 +TLOGDEVICE=$RUN/data/dlog; export TLOGDEVICE +TUXCONFIG=$RUN/config/tuxconfig;export TUXCONFIG +APPDIR="$RUN/bin"; export APPDIR +MACHINE_NAME=`uname -n`; export MACHINE_NAME +TLOGNAME="TLOG"; export TLOGNAME +# Causes the TMS to call close before calling open +#TUXWA4ORACLE=1; export TUXWA4ORACLE +# TMTRACE will print each call done by the txn manager +#TMTRACE=*:ulog:dye; export TMTRACE +# DVERBOSE will enable verbose output when set to 1 +#DVERBOSE=1; export DVERBOSE + +PATH="$RUN/bin:$TUXDIR/bin:$PATH" +LD_LIBRARY_PATH=$DB_INSTALL/lib:$TUXDIR/lib:$LD_LIBRARY_PATH; +export LD_LIBRARY_PATH PATH + +echo "TEST 1" +func_clean +src1/tuxconfig.sh +src1/run.sh +if test "$?" -ne 0; then + exit 1 +fi +echo "TEST 2" +func_clean +src2/tuxconfig.sh +src2/run.sh +if test "$?" -ne 0; then + exit 1 +fi +echo "TEST 3" +# multi threaded test +func_clean +src3/tuxconfig.sh 0 +src3/run.sh 0 +if test "$?" -ne 0; then + exit 1 +fi +# multi threaded test where one thread dies +func_clean +src3/tuxconfig.sh 1 +src3/run.sh 1 +if test "$?" -ne 0; then + exit 1 +fi +echo "TEST 4" +# multi threaded test +func_clean +src4/tuxconfig.sh 0 +src4/run.sh 0 +if test "$?" -ne 0; then + exit 1 +fi + +exit 0 diff --git a/test/xa/src1/client.c b/test/xa/src1/client.c new file mode 100644 index 00000000..b5571097 --- /dev/null +++ b/test/xa/src1/client.c @@ -0,0 +1,359 @@ +/* + * Basic smoke test for XA transactions. The client randomly sends requests + * to each of the servers to insert data into table 1, and inserts the + * same data into table 3 using regular transactions. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "datafml.h" +#include "hdbrec.h" +#include "htimestampxa.h" + +#define HOME "../data3" +#define TABLE1 "../data/table1.db" +#define TABLE3 "../data3/table3.db" + +#ifdef VERBOSE +static int verbose = 1; /* Debugging output. */ +#else +static int verbose = 0; +#endif + +DB_ENV *dbenv; +char *progname; /* Client run-time name. */ + +int check_data(DB *); +char *db_buf(DBT *); +int usage(void); + +int +main(int argc, char* argv[]) +{ + DB *dbp3; + DBT key, data; + FBFR *buf, *replyBuf; + HDbRec rec; + TPINIT *initBuf; + long len, replyLen, seqNo; + int ch, cnt, cnt_abort, cnt_commit, cnt_server1, i, ret; + char *target; + char *home = HOME; + u_int32_t flags = DB_INIT_MPOOL | DB_INIT_LOG | DB_INIT_TXN | + DB_INIT_LOCK | DB_CREATE | DB_THREAD | DB_RECOVER | DB_REGISTER; + u_int32_t dbflags = DB_CREATE | DB_THREAD; + + progname = argv[0]; + + dbp3 = NULL; + buf = replyBuf = NULL; + initBuf = NULL; + cnt = 1000; + cnt_abort = cnt_commit = cnt_server1 = 0; + + while ((ch = getopt(argc, argv, "n:v")) != EOF) + switch (ch) { + case 'n': + cnt = atoi(optarg); + break; + case 'v': + verbose = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (verbose) + printf("%s: called\n", progname); + + /* Seed random number generator. */ + srand((u_int)(time(NULL) | getpid())); + + if (tpinit((TPINIT *)NULL) == -1) + goto tuxedo_err; + if (verbose) + printf("%s: tpinit() OK\n", progname); + + /* Allocate init buffer */ + if ((initBuf = (TPINIT *)tpalloc("TPINIT", NULL, TPINITNEED(0))) == 0) + goto tuxedo_err; + if (verbose) + printf("%s: tpalloc(\"TPINIT\") OK\n", progname); + + /* Create the DB environment. */ + if ((ret = db_env_create(&dbenv, 0)) != 0 || + (ret = dbenv->open(dbenv, home, flags, 0)) != 0) { + fprintf(stderr, + "%s: %s: %s\n", progname, home, db_strerror(ret)); + goto err; + } + dbenv->set_errfile(dbenv, stderr); + if (verbose) + printf("%s: opened %s OK\n", progname, home); + + /* + * Open table #3 -- Data is inserted into table 1 using XA + * transactions, and inserted into table 3 using regular transactions. + */ + if ((ret = db_create(&dbp3, dbenv, 0)) != 0 || + (ret = dbp3->open(dbp3, + NULL, TABLE3, NULL, DB_BTREE, dbflags, 0660)) != 0) { + fprintf(stderr, + "%s: %s %s\n", progname, TABLE3, db_strerror(ret)); + goto err; + } + if (verbose) + printf("%s: opened %s OK\n", progname, TABLE3); + + /* Allocate send buffer. */ + len = Fneeded(1, 3 * sizeof(long)); + if ((buf = (FBFR*)tpalloc("FML32", NULL, len)) == 0) + goto tuxedo_err; + if (verbose) + printf("%s: tpalloc(\"FML32\"), send buffer OK\n", progname); + + /* Allocate reply buffer. */ + replyLen = 1024; + if ((replyBuf = (FBFR*)tpalloc("FML32", NULL, replyLen)) == NULL) + goto tuxedo_err; + if (verbose) + printf("%s: tpalloc(\"FML32\"), reply buffer OK\n", progname); + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + for (rec.SeqNo = 1, i = 0; i < cnt; ++i, ++rec.SeqNo) { + GetTime(&rec.Ts); + + if (Fchg(buf, SEQ_NO, 0, (char *)&rec.SeqNo, 0) == -1) + goto tuxedo_fml_err; + if (verbose) + printf("%s: Fchg(), sequence number OK\n", progname); + if (Fchg(buf, TS_SEC, 0, (char *)&rec.Ts.Sec, 0) == -1) + goto tuxedo_fml_err; + if (verbose) + printf("%s: Fchg(), seconds OK\n", progname); + if (Fchg(buf, TS_USEC, 0, (char *)&rec.Ts.Usec, 0) == -1) + goto tuxedo_fml_err; + if (verbose) + printf("%s: Fchg(), microseconds OK\n", progname); + + if (tpbegin(60L, 0L) == -1) + goto tuxedo_err; + if (verbose) + printf("%s: tpbegin() OK\n", progname); + + /* Randomly send half of our requests to each server. */ + if (rand() % 2 > 0) { + ++cnt_server1; + target = "TestTxn1"; + } else + target = "TestTxn2"; + if (tpcall(target, (char *)buf, + 0L, (char **)&replyBuf, &replyLen, TPSIGRSTRT) == -1) + goto tuxedo_err; + /* Commit for a return value of 0, otherwise abort. */ + if (tpurcode == 0) { + ++cnt_commit; + if (verbose) + printf("%s: txn success\n", progname); + + if (tpcommit(0L) == -1) + goto abort; + if (verbose) + printf("%s: tpcommit() OK\n", progname); + + /* + * Store a copy of the key/data pair into table #3 + * on success, we'll compare table #1 and table #3 + * after the run finishes. + */ + seqNo = rec.SeqNo; + key.data = &seqNo; + key.size = sizeof(seqNo); + data.data = &rec; + data.size = sizeof(rec); + if ((ret = + dbp3->put(dbp3, NULL, &key, &data, 0)) != 0) { + fprintf(stderr, "%s: DB->put: %s %s\n", + progname, TABLE3, db_strerror(ret)); + goto err; + } + } else { + abort: ++cnt_abort; + if (verbose) + printf("%s: txn failure\n", progname); + + if (tpabort(0L) == -1) + goto tuxedo_err; + if (verbose) + printf("%s: tpabort() OK\n", progname); + } + } + + printf("%s: %d requests: %d committed, %d aborted\n", + progname, cnt, cnt_commit, cnt_abort); + printf("%s: %d sent to server #1, %d sent to server #2\n", + progname, cnt_server1, cnt - cnt_server1); + + ret = check_data(dbp3); + + if (0) { +tuxedo_err: fprintf(stderr, "%s: TUXEDO ERROR: %s (code %d)\n", + progname, tpstrerror(tperrno), tperrno); + goto err; + } + if (0) { +tuxedo_fml_err: fprintf(stderr, "%s: FML ERROR: %s (code %d)\n", + progname, Fstrerror(Ferror), Ferror); + } + if (0) { +err: ret = EXIT_FAILURE; + } + + if (replyBuf != NULL) + tpfree((char *)replyBuf); + if (buf != NULL) + tpfree((char *)buf); + if (initBuf != NULL) + tpfree((char *)initBuf); + if (dbp3 != NULL) + (void)dbp3->close(dbp3, 0); + if (dbenv != NULL) + (void)dbenv->close(dbenv, 0); + + tpterm(); + if (verbose) + printf("%s: tpterm() OK\n", progname); + + return (ret); +} + +/* + * check_data -- + * Compare committed data with our local copy, stored in table3. + */ +int +check_data(dbp3) + DB *dbp3; +{ + DB *dbp1; + DBC *dbc1, *dbc3; + DBT key1, data1, key3, data3; + int ret, ret1, ret3; + DB_ENV *dbenv1; + u_int32_t flags = DB_INIT_MPOOL | DB_INIT_LOG | DB_INIT_TXN | + DB_INIT_LOCK | DB_THREAD; + + dbp1 = NULL; + dbc1 = dbc3 = NULL; + dbenv1 = NULL; + + if ((ret = db_env_create(&dbenv1, 0)) != 0 || + (ret = dbenv1->open(dbenv1, "../data", flags, 0)) != 0) + goto err; + /* Open table #1. */ + if ((ret = db_create(&dbp1, dbenv1, 0)) != 0 || + (ret = dbp1->open( + dbp1, NULL, TABLE1, NULL, DB_UNKNOWN, DB_RDONLY, 0)) != 0) { + fprintf(stderr, + "%s: %s: %s\n", progname, TABLE1, db_strerror(ret)); + goto err; + } + if (verbose) + printf("%s: opened %s OK\n", progname, TABLE1); + + /* Open cursors. */ + if ((ret = dbp1->cursor(dbp1, NULL, &dbc1, 0)) != 0 || + (ret = dbp3->cursor(dbp3, NULL, &dbc3, 0)) != 0) { + fprintf(stderr, + "%s: DB->cursor: %s\n", progname, db_strerror(ret)); + goto err; + } + if (verbose) + printf("%s: opened cursors OK\n", progname); + + /* Compare the two databases. */ + memset(&key1, 0, sizeof(key1)); + memset(&data1, 0, sizeof(data1)); + memset(&key3, 0, sizeof(key3)); + memset(&data3, 0, sizeof(data3)); + for (;;) { + ret1 = dbc1->c_get(dbc1, &key1, &data1, DB_NEXT); + ret3 = dbc3->c_get(dbc3, &key3, &data3, DB_NEXT); + if (verbose) { + printf("get: key1: %s\n", db_buf(&key1)); + printf("get: key3: %s\n", db_buf(&key3)); + printf("get: data1: %s\n", db_buf(&data1)); + printf("get: data3: %s\n", db_buf(&data3)); + } + if (ret1 != 0 || ret3 != 0) + break; + /* + * Only compare the first N bytes, the saved message chunks + * are different. + */ + if (key1.size != key3.size || + memcmp(key1.data, key3.data, key1.size) != 0 || + data1.size != data3.size || + memcmp(data1.data, data3.data, + sizeof(long) + sizeof(HTimestampData)) != 0) + goto mismatch; + } + if (ret1 != DB_NOTFOUND || ret3 != DB_NOTFOUND) { +mismatch: fprintf(stderr, + "%s: DB_ERROR: databases 1 and 3 weren't identical\n", + progname); + ret = 1; + } + +err: if (dbc1 != NULL) + (void)dbc1->c_close(dbc1); + if (dbc3 != NULL) + (void)dbc3->c_close(dbc3); + if (dbp1 != NULL) + (void)dbp1->close(dbp1, 0); + if(dbenv1 != NULL) + (void)dbenv1->close(dbenv1, 0); + + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +char * +db_buf(dbt) + DBT *dbt; +{ + static u_char buf[1024]; + size_t len; + u_char *p, *b; + + for (p = dbt->data, len = dbt->size, b = buf; len > 0; ++p, --len) + if (isprint(*p)) + b += sprintf((char *)b, "%c", *p); + else + b += sprintf((char *)b, "%#o", *p); + return ((char *)buf); +} + +int +usage() +{ + fprintf(stderr, "usage: %s [-v] [-n txn]\n", progname); + return (EXIT_FAILURE); +} diff --git a/test/xa/src1/datafml.fml b/test/xa/src1/datafml.fml new file mode 100644 index 00000000..d503759a --- /dev/null +++ b/test/xa/src1/datafml.fml @@ -0,0 +1,5 @@ +# datafml.fml was generated by tuxdev on 12/18/98 @ 12:15:33 ***** DO NOT EDIT ***** + +SEQ_NO 401 long - +TS_SEC 402 long - +TS_USEC 403 long - diff --git a/test/xa/src1/datafml.h b/test/xa/src1/datafml.h new file mode 100644 index 00000000..143b484e --- /dev/null +++ b/test/xa/src1/datafml.h @@ -0,0 +1,5 @@ +/* fname fldid */ +/* ----- ----- */ +#define SEQ_NO ((FLDID32)33554833) /* number: 401 type: long */ +#define TS_SEC ((FLDID32)33554834) /* number: 402 type: long */ +#define TS_USEC ((FLDID32)33554835) /* number: 403 type: long */ diff --git a/test/xa/src1/hdbrec.h b/test/xa/src1/hdbrec.h new file mode 100644 index 00000000..16932f60 --- /dev/null +++ b/test/xa/src1/hdbrec.h @@ -0,0 +1,14 @@ +#ifndef HDBREC_H +#define HDBREC_H + +#include "htimestampxa.h" + +/* + * DB record + */ +typedef struct __HDbRec { + long SeqNo; + HTimestampData Ts; + char Msg[10]; +} HDbRec; +#endif diff --git a/test/xa/src1/htimestampxa.c b/test/xa/src1/htimestampxa.c new file mode 100644 index 00000000..0e73e6aa --- /dev/null +++ b/test/xa/src1/htimestampxa.c @@ -0,0 +1,14 @@ +#include +#include + +#include "htimestampxa.h" + +void +GetTime(HTimestampData *ts) +{ + struct timeval timeNow; + + (void)gettimeofday(&timeNow, 0); + ts->Sec = timeNow.tv_sec; + ts->Usec = timeNow.tv_usec; +} diff --git a/test/xa/src1/htimestampxa.h b/test/xa/src1/htimestampxa.h new file mode 100644 index 00000000..975b315e --- /dev/null +++ b/test/xa/src1/htimestampxa.h @@ -0,0 +1,13 @@ +#ifndef HTIMESTAMPXA_H +#define HTIMESTAMPXA_H + +/* + * Timestamp with microseconds precision + */ +typedef struct __HTimestampData { + time_t Sec; + time_t Usec; +} HTimestampData; + +void GetTime(HTimestampData *); +#endif diff --git a/test/xa/src1/run.sh b/test/xa/src1/run.sh new file mode 100644 index 00000000..eb5ff3ec --- /dev/null +++ b/test/xa/src1/run.sh @@ -0,0 +1,130 @@ +#! /bin/sh +# +# Run test 1. + +msg() +{ + test "$DVERBOSE" == 1 && { + echo "========" + echo "======== $1" + echo "========" + } +} + +init_tmadmin() +{ +tmadmin << END_OF_TMADMIN + crdl -z $TLOGDEVICE -b 500 + crlog -m cluster3 +END_OF_TMADMIN +} + +mkdir $RUN/data3 + +# Everything else is done in run/bin. +cd $RUN/bin + +# The CFLAGS variable defines the pre-processor defines -- start with +# whatever the user set, and add our own stuff. +# +# For debugging output, add -DDVERBOSE + +test "$DVERBOSE" == 1 && { + COMPILE_FLAGS="-DDVERBOSE" + DVERBOSE_FLAG="-v" +} +COMPILE_FLAGS="$CFLAGS $COMPILE_FLAGS -g -I../../.." + +msg "BUILDING CLIENT" +CFLAGS="$COMPILE_FLAGS"; export CFLAGS +buildclient -r BERKELEY-DB $DVERBOSE_FLAG -o client \ + -f ../../src1/htimestampxa.c -f ../../src1/client.c +test "$?" -eq 0 || { + echo "FAIL: buildclient failed." + exit 1 +} + +msg "BUILDING SERVER #1" +CFLAGS="$COMPILE_FLAGS -DSERVER1"; export CFLAGS +buildserver -r BERKELEY-DB $DVERBOSE_FLAG -o server1 \ + -s TestTxn1:TestTxn1 \ + -f ../../src1/htimestampxa.c -f ../../src1/server.c +test "$?" -eq 0 || { + echo "FAIL: buildserver failed." + exit 1 +} + +msg "BUILDING SERVER #2" +CFLAGS="$COMPILE_FLAGS -DSERVER2"; export CFLAGS +buildserver $DVERBOSE_FLAG -r BERKELEY-DB -o server2 \ + -s TestTxn2:TestTxn2 \ + -f ../../src1/htimestampxa.c -f ../../src1/server.c +test "$?" -eq 0 || { + echo "FAIL: buildserver failed." + exit 1 +} + +msg "BUILDING THE RESOURCE MANAGER." +buildtms -o DBRM -r BERKELEY-DB + +init_tmadmin + +# Boot Tuxedo. +# You should see something like: +# +# Booting admin processes ... +# +# exec BBL -A : +# process id=13845 ... Started. +# +# Booting server processes ... +# +# exec DBRM -A : +# process id=13846 ... Started. +# exec DBRM -A : +# process id=13847 ... Started. +# exec server1 -A : +# process id=13848 ... Started. +# exec server2 -A : +# process id=13849 ... Started. +# 5 processes started. +msg "BOOTING TUXEDO." +tmboot -y + +# Run the client with 10, 100 and 1000 transactions. +exitval=0 +for i in 10 100 1000; do + msg "RUN THE CLIENT WITH $i TRANSACTIONS." + ./client $DVERBOSE_FLAG -n $i + test "$?" -ne 0 && { + echo "FAIL: client failed" + exitval=1 + break; + } +done + +msg "SHUTTING DOWN THE TRANSACTION MANAGER." +echo 'y' | tmshutdown + +# Copy out any server output. +echo "STDOUT:" +cat stdout + +# Copy out any server errors.echo "STDERR:" +cat stderr +test -s stderr && { + echo "STDERR:" + cat stderr + echo "FAIL: stderr file not empty" + exitval=1 +} + +# We never checkpointed, run recovery to make sure it all works. +msg "RECOVERY:" +db_recover -h ../data -v +test "$?" -ne 0 && { + echo "FAIL: recovery failed" + exitval=1 +} + +exit $exitval diff --git a/test/xa/src1/server.c b/test/xa/src1/server.c new file mode 100644 index 00000000..0ed21cfe --- /dev/null +++ b/test/xa/src1/server.c @@ -0,0 +1,244 @@ +/* + * This is the code for the two servers used in the first XA test. Server 1 + * and Server 2 are both called by the client to insert data into table 1 + * using XA transactions. Server 1 also can forward requests to Server 2. + */ +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include "datafml.h" +#include "hdbrec.h" +#include "htimestampxa.h" + +/* + * The two servers are largely identical, #ifdef the source code. + */ +#ifdef SERVER1 +#define TXN_FUNC TestTxn1 +#define TXN_STRING "TestTxn1" +#endif +#ifdef SERVER2 +#define TXN_FUNC TestTxn2 +#define TXN_STRING "TestTxn2" +#endif +void TXN_FUNC(TPSVCINFO *); + +#define HOME "../data" +#define TABLE1 "table1.db" +#define TABLE2 "table2.db" + +#ifdef VERBOSE +static int verbose = 1; /* Debugging output. */ +#else +static int verbose = 0; +#endif + +DB *db1, *db2; /* Table handles. */ + +int cnt_forward; /* Forwarded requests. */ +int cnt_request; /* Total requests. */ + +char *progname; /* Server run-time name. */ + +char *db_buf(DBT *); + +/* + * Called when each server is started. It creates and opens the + * two databases. + */ +int +tpsvrinit(int argc, char* argv[]) +{ + int ret; + + progname = argv[0]; + if (verbose) + printf("%s: called\n", progname); + + /* Open resource managers. */ + if (tx_open() == TX_ERROR) { + fprintf(stderr, "tx_open: TX_ERROR\n"); + return (-1); + } + + /* Seed random number generator. */ + srand((u_int)(time(NULL) | getpid())); + + /* Open permanent XA handles. */ + if ((ret = db_create(&db1, NULL, DB_XA_CREATE)) != 0) { + fprintf(stderr, "db_create: %s\n", db_strerror(ret)); + return (-1); + } + db1->set_errfile(db1, stderr); + if ((ret = db1->open(db1, NULL, + TABLE1, NULL, DB_BTREE, DB_AUTO_COMMIT | DB_CREATE, 0660)) != 0) { + fprintf(stderr, "DB open: %s: %s\n", TABLE1, db_strerror(ret)); + return (-1); + } + if ((ret = db_create(&db2, NULL, DB_XA_CREATE)) != 0) { + fprintf(stderr, "db_create: %s\n", db_strerror(ret)); + return (-1); + } + db2->set_errfile(db2, stderr); + if ((ret = db2->open(db2, NULL, + TABLE2, NULL, DB_BTREE, DB_AUTO_COMMIT | DB_CREATE, 0660)) != 0) { + fprintf(stderr, "DB open: %s: %s\n", TABLE2, db_strerror(ret)); + return (-1); + } + + if (verbose) + printf("%s: tpsvrinit: initialization done\n", progname); + + return (0); +} + +/* Called when the servers are shutdown. This closes the databases. */ +void +tpsvrdone() +{ + if (db1 != NULL) + (void)db1->close(db1, 0); + if (db2 != NULL) + (void)db2->close(db2, 0); + + tx_close(); + + if (verbose) + printf("%s: tpsvrdone: shutdown done\n", progname); + + printf("%s: %d requests, %d requests forwarded to the other server\n", + progname, cnt_request, cnt_forward); +} + +/* + * This function is called by the client. Here Server 1 and Server 2 insert + * data into table 1 using XA transactions. Server 1 can also forward its + * request to Server 2. + */ +void +TXN_FUNC(TPSVCINFO *msg) +{ + DBT data; + DBT key; + FBFR *replyBuf; + HDbRec rcrd; + long replyLen, seqNo; + int ret; + + ++cnt_request; + +#ifdef SERVER1 + /* + * Test that servers can forward to other servers. Randomly forward + * half of server #1's requests to server #2. + */ + if (rand() % 2 > 0) { + ++cnt_forward; + + replyLen = 1024; + if ((replyBuf = + (FBFR*)tpalloc("FML32", NULL, replyLen)) == NULL || + tpcall("TestTxn2", msg->data, + 0, (char**)&replyBuf, &replyLen, TPSIGRSTRT) == -1) { + fprintf(stderr, "%s: TUXEDO ERROR: %s (code %d)\n", + progname, tpstrerror(tperrno), tperrno); + tpfree((char*)replyBuf); + tpreturn(TPFAIL, 0L, 0, 0L, 0); + } else { + tpfree((char*)replyBuf); + tpreturn(TPSUCCESS, tpurcode, 0, 0L, 0); + } + return; + } +#endif + /* Read the record. */ + if (Fget((FBFR*)msg->data, SEQ_NO, 0, (char *)&rcrd.SeqNo, 0) == -1) + goto fml_err; + if (Fget((FBFR*)msg->data, TS_SEC, 0, (char *)&rcrd.Ts.Sec, 0) == -1) + goto fml_err; + if (Fget( + (FBFR*)msg->data, TS_USEC, 0, (char *)&rcrd.Ts.Usec, 0) == -1) { +fml_err: fprintf(stderr, "%s: FML ERROR: %s (code %d)\n", + progname, Fstrerror(Ferror), Ferror); + goto err; + } + + seqNo = rcrd.SeqNo; /* Update the record. */ + memset(&key, 0, sizeof(key)); + key.data = &seqNo; + key.size = sizeof(seqNo); + memset(&data, 0, sizeof(data)); + data.data = &rcrd; + data.size = sizeof(rcrd); + + strcpy(rcrd.Msg, "Table1"); /* Table 1. */ + if (verbose) { + printf("put1: key: %s\n", db_buf(&key)); + printf("put1: data: %s\n", db_buf(&data)); + } + if ((ret = db1->put(db1, NULL, &key, &data, 0)) != 0) { + if (ret == DB_LOCK_DEADLOCK) + goto abort; + fprintf(stderr, "%s: %s: Table1->put: %s\n", + progname, TXN_STRING, db_strerror(ret)); + goto err; + } + + strcpy(rcrd.Msg, "Table2"); /* Table 2. */ + if ((ret = db2->put(db2, NULL, &key, &data, 0)) != 0) { + if (ret == DB_LOCK_DEADLOCK) + goto abort; + fprintf(stderr, "%s: %s: Table2->put: %s\n", + progname, TXN_STRING, db_strerror(ret)); + goto err; + } + + /* + * Decide if the client is going to commit the global transaction or + * not, testing the return-value path back to the client; this is the + * path we'd use to resolve deadlock, for example. Commit 80% of the + * time. Returning 0 causes the client to commit, 1 to abort. + */ + if (rand() % 10 > 7) { + if (verbose) + printf("%s: %s: commit\n", progname, TXN_STRING); + tpreturn(TPSUCCESS, 0L, 0, 0L, 0); + } else { +abort: if (verbose) + printf("%s: %s: abort\n", progname, TXN_STRING); + tpreturn(TPSUCCESS, 1L, 0, 0L, 0); + } + return; + +err: tpreturn(TPFAIL, 1L, 0, 0L, 0); +} + +char * +db_buf(dbt) + DBT *dbt; +{ + static u_char buf[1024]; + size_t len; + u_char *p, *b; + + for (p = dbt->data, len = dbt->size, b = buf; len > 0; ++p, --len) + if (isprint(*p)) + b += sprintf((char *)b, "%c", *p); + else + b += sprintf((char *)b, "%#o", *p); + return ((char *)buf); +} diff --git a/test/xa/src1/tuxconfig.sh b/test/xa/src1/tuxconfig.sh new file mode 100644 index 00000000..61e61909 --- /dev/null +++ b/test/xa/src1/tuxconfig.sh @@ -0,0 +1,47 @@ +#! /bin/sh +# +# Build the configuration file for test 1 -- +# We do this work in the shell script because we have to fill in +# lots of shell variables. + + MACHINE_NAME=`uname -n` + cat > $RUN/config/ubb.cfg << END_OF_UBB_FILE +*RESOURCES +IPCKEY 200103 +DOMAINID domain3 +MASTER cluster3 +MAXACCESSERS 10 +MAXSERVERS 5 +MAXSERVICES 10 +MODEL SHM +LDBAL N + +*MACHINES +DEFAULT: + APPDIR="$APPDIR" + TUXCONFIG="$TUXCONFIG" + TLOGDEVICE="$TLOGDEVICE" + TUXDIR="$TUXDIR" +# Machine name is 30 characters max +"$MACHINE_NAME" LMID=cluster3 + +*GROUPS +# Group name is 30 characters max +group_tm LMID=cluster3 GRPNO=1 TMSNAME=DBRM TMSCOUNT=2 OPENINFO="BERKELEY-DB:$RUN/data" + +*SERVERS +DEFAULT: + CLOPT="-A" + +# Server name is 78 characters max (same for any pathname) +server1 SRVGRP=group_tm SRVID=1 MAXGEN=3 RESTART=Y +server2 SRVGRP=group_tm SRVID=2 MAXGEN=3 RESTART=Y + +*SERVICES +# Service name is 15 characters max +# server1 +TestTxn1 +# server2 +TestTxn2 +END_OF_UBB_FILE + tmloadcf -y $RUN/config/ubb.cfg diff --git a/test/xa/src2/bdb1.c b/test/xa/src2/bdb1.c new file mode 100644 index 00000000..be8032f3 --- /dev/null +++ b/test/xa/src2/bdb1.c @@ -0,0 +1,223 @@ +/* +* Copyright (c) 1997 BEA Systems, Inc. +* All Rights Reserved +* +* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF +* BEA Systems, Inc. +* The copyright notice above does not evidence any +* actual or intended publication of such source code. +* +* This server is called by the client. It inserts a value into db1 then +* calls bdb2 to insert a value into db2. +*/ + +#include +#include +#include +#include +#include +#include /* TUXEDO Header File */ +#include /* TUXEDO Header File */ +#include +#include +#include +#include + +#define DATABASE1 "data1.db" + +static u_int32_t open_flags = DB_CREATE | DB_AUTO_COMMIT; +static DB *gdbp; +static int times = 1; +static long seq = 1; + +static DB* getdbp(){ + return gdbp; +} + +/* Open the database when the server is started. */ +int +tpsvrinit(argc, argv) +int argc; +char **argv; +{ + int ret; + char ch; + /* Some compilers warn if argc and argv aren't used. */ + while ((ch = getopt(argc, argv, "t:")) != EOF) + switch (ch) { + case 't': + times = atoi(optarg); + break; + } + + + tpopen(); + + /* Create and initialize database object, open the database. */ + if ((ret = db_create(&gdbp, NULL, DB_XA_CREATE)) != 0) { + userlog("db_create: %s", db_strerror(ret)); + return (EXIT_FAILURE); + } + + if ((ret = gdbp->open(gdbp, NULL, DATABASE1, NULL, DB_BTREE, open_flags, + 0664)) != 0) { + userlog("open: %s", db_strerror(ret)); + return (EXIT_FAILURE); + } + + return(0); +} + +/* Close the database when the server is shutdown. */ +void +tpsvrdone(void) +{ + DB* dbp = getdbp(); + dbp->close(dbp, 0); + tpclose(); +} + +/* Insert a value into db1, then call bdb2 to insert that value into db2. */ +int +WRITE(rqst) +TPSVCINFO *rqst; +{ + long rcvlen; + int ret, i; + DB *dbp = getdbp(); + DBT key, data; + size_t len; + int ch; + char *p, *t, buf[1024]; + + tpbegin(10,0); + + for(i=0; iput(dbp, NULL, &key, &data, + DB_NOOVERWRITE)) { + case 0: + break; + case DB_LOCK_DEADLOCK: + if(tpabort(0) == -1){ + userlog("tpabort() fail:%s", + tpstrerror(tperrno)); + } + tpreturn(TPSUCCESS, 1, rqst->data, 0L, 0); + default: + userlog("put: %s", db_strerror(ret)); + if(tpabort(0) == -1){ + userlog("tpabort() fail:%s", + tpstrerror(tperrno)); + } + tpreturn(TPFAIL, 0, rqst->data, 0L, 0); + } + } + + /* + * Insert the key into a queue and call bdb2 where it will read + * the key out of the queue. + */ + FBFR32 *reqbuf; + char * rcvbuf; + + if((reqbuf = (FBFR32*) tpalloc("FML32", NULL, 100)) == NULL) { + tpabort(0); + userlog("alloc fail"); + tpreturn(TPFAIL, 0, rqst->data, 0L, 0); + } + + if((rcvbuf = (char *) tpalloc("STRING", NULL, 100)) == NULL) { + tpabort(0); + userlog("alloc fail"); + tpreturn(TPFAIL, 0, rqst->data, 0L, 0); + } + + Fadd32(reqbuf, TA_REPOSPARAM, buf, 0); + + /* Call bdb2 */ + ret = tpcall("WRITE2", (char *)reqbuf, 0, &rcvbuf, &rcvlen,TPSIGRSTRT); + tpfree((char*)reqbuf); + tpfree((char*)rcvbuf); + if(ret == -1){ + userlog("call WRITE2 fail"); + if(-1 == tpabort(0)) + userlog("tpabort() fail:%s", tpstrerror(tperrno)); + tpreturn(TPFAIL, 0, rqst->data, 0L, 0); + } + /* Commit for a return value of 0, otherwise abort. */ + if (tpurcode == 0) { + if(tpcommit(0) == -1){ + userlog("tpcommit fail"); + tpreturn(TPFAIL, 0, rqst->data, 0L, 0); + } + } else { + if(tpabort(0) == -1){ + userlog("tpabort fail"); + tpreturn(TPFAIL, 0, rqst->data, 0L, 0); + } + } + + tpreturn(TPSUCCESS, 0, rqst->data, 0L, 0); +} + +/* Iterates the database with a cursor. */ +int +CURSOR(rqst) +TPSVCINFO *rqst; +{ + int ret,count; + DBT key, value; + DBC *cursorp; + DB *dbp = getdbp(); + + tpbegin(60*10,0); + + /* Get the cursor */ + ret = dbp->cursor(dbp, NULL, &cursorp, 0); + if (ret != 0) { + userlog("count_records: cursor open failed."); + tpreturn(TPFAIL, 0, rqst->data, 0L, 0); + } + + /* Get the key DBT used for the database read */ + memset(&key, 0, sizeof(DBT)); + memset(&value, 0, sizeof(DBT)); + do { + ret = cursorp->c_get(cursorp, &key, &value, DB_NEXT); + switch (ret) { + case 0: + count++; + break; + case DB_NOTFOUND: + break; + case DB_LOCK_DEADLOCK: + if(tpcommit(0) == -1) + userlog("tpcommit() fail:%s", + tpstrerror(tperrno)); + cursorp->c_close(cursorp); + tpreturn(TPSUCCESS, 1L, rqst->data, 0L, 0); + break; + default: + if(tpabort(0) == -1) + userlog("tpabort() fail:%s", + tpstrerror(tperrno)); + cursorp->c_close(cursorp); + dbp->err(dbp, ret, + "Count records unspecified error"); + tpreturn(TPFAIL, 0, rqst->data, 0L, 0); + } + } while (ret == 0); + cursorp->c_close(cursorp); + tpcommit(0); + tpreturn(TPSUCCESS, 0, rqst->data, 0L, 0); + } diff --git a/test/xa/src2/bdb2.c b/test/xa/src2/bdb2.c new file mode 100644 index 00000000..b0b6a49f --- /dev/null +++ b/test/xa/src2/bdb2.c @@ -0,0 +1,158 @@ +/* +* Copyright (c) 1997 BEA Systems, Inc. +* All Rights Reserved +* +* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF +* BEA Systems, Inc. +* The copyright notice above does not evidence any +* actual or intended publication of such source code. +* +* This server is only called by bdb1. It takes the data sent by bdb1 and +* inserts it into db2. +*/ + +#include +#include +#include +#include +#include +#include /* TUXEDO Header File */ +#include /* TUXEDO Header File */ +#include +#include +#include + +#define DATABASE1 "data1.db" +#define DATABASE2 "data2.db" + +static DB *dbp1; +static DB *dbp2; + +static int opendb(){ + int ret; + u_int32_t open_flags = DB_CREATE | DB_AUTO_COMMIT; + + /* Create and initialize database object, open the database. */ + if ((ret = db_create(&dbp1, NULL, DB_XA_CREATE)) != 0) { + userlog("db_create: %s", db_strerror(ret)); + return (EXIT_FAILURE); + } + + if ((ret = db_create(&dbp2, NULL, DB_XA_CREATE)) != 0) { + userlog("db_create: %s", db_strerror(ret)); + return (EXIT_FAILURE); + } + + if ((ret = dbp1->open(dbp1, NULL, DATABASE1, NULL, DB_BTREE, open_flags, + 0664)) != 0) { + userlog("open: %s", db_strerror(ret)); + return (EXIT_FAILURE); + } + if ((ret = dbp2->open(dbp2, NULL, DATABASE2, NULL, DB_BTREE, open_flags, + 0664)) != 0) { + userlog("open: %s", db_strerror(ret)); + return (EXIT_FAILURE); + } + return 0; +} + +static void closedb(){ + (void)dbp1->close(dbp1, 0); + (void)dbp2->close(dbp2, 0); +} + +/* Write the given data into the given database. */ +static int writedb(DB * dbp, void *buf, u_int32_t size){ + DBT key, data; + size_t len; + int ch, ret; + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + key.data = buf; + data.data = buf; + data.size = key.size = size; + ret = dbp->put(dbp, NULL, &key, &data, DB_NOOVERWRITE); + switch (ret) { + case 0: + return (EXIT_SUCCESS); + case DB_KEYEXIST: + return (EXIT_SUCCESS); + case DB_LOCK_DEADLOCK: + return (EXIT_SUCCESS); + default: + userlog("put: %s", db_strerror(ret)); + return -1; + } +} + +/* Open the databases used by this server when it is started. */ +int +tpsvrinit(argc, argv) +int argc; +char **argv; +{ + int ret; + /* Some compilers warn if argc and argv aren't used. */ + argc = argc; + argv = argv; + + tpopen(); + if (ret = opendb() != 0){ + userlog("put: %s", db_strerror(ret)); + } + + return(0); +} + +/* Close the database when the server is shutdown. */ +void +tpsvrdone(void) +{ + closedb(); + tpclose(); +} + +/* + * Get the data the calling server just inserted into db1 and insert it into + * db2. Fgets32 is used to get the key passed by the calling server. + */ +int +WRITE2(rqst) +TPSVCINFO *rqst; + +{ + char buf[100]; + DBT key, data; + FBFR32 *reqbuf = (FBFR32*)rqst->data; + int ret; + + Fgets32(reqbuf, TA_REPOSPARAM, 0, buf); + userlog("buf:[%s]", buf); + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + + key.data = buf; + key.size = (u_int32_t)strlen(buf); + data.flags = DB_DBT_MALLOC; + + /* Get the data that the calling server inserted into db1 */ + switch (ret = dbp1->get(dbp1, NULL, &key, &data, DB_READ_UNCOMMITTED)){ + case 0: + break; + case DB_LOCK_DEADLOCK: + tpreturn(TPSUCCESS, 1L, rqst->data, 0L, 0); + default: + userlog("get: %s", db_strerror(ret)); + tpreturn(TPFAIL, 0L, rqst->data, 0L, 0); + } + + /* Write the data to db2 */ + if(writedb(dbp2, data.data, data.size) != 0){ + tpreturn(TPSUCCESS, 1L, rqst->data, 0L, 0); + } + tpreturn(TPSUCCESS, 0, rqst->data, 0L, 0); +} + diff --git a/test/xa/src2/client.c b/test/xa/src2/client.c new file mode 100644 index 00000000..517671ac --- /dev/null +++ b/test/xa/src2/client.c @@ -0,0 +1,149 @@ +/* +* Copyright (c) 1997 BEA Systems, Inc. +* All Rights Reserved +* +* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF +* BEA Systems, Inc. +* The copyright notice above does not evidence any +* actual or intended publication of such source code. +* +* This client is part of Test 2 of the XA test suite. It calls bdb1, which +* in turn calls bdb2. In the test several clients are executed at once to test +* how XA performs with multiple processes. +*/ + +#include +#include +#include "atmi.h" /* TUXEDO Header File */ + + +void my_exit(); +char *sendbuf, *rcvbuf; + +#ifdef VERBOSE +static int verbose = 1; /* Debugging output. */ +#else +static int verbose = 0; +#endif + +void +my_exit(string) +char *string; +{ + tpfree(sendbuf); + tpfree(rcvbuf); + fprintf(stderr,"SIMPAPP:ERROR call to service %s failed \n",string); + tpterm(); + exit(1); +} + +int +main(argc, argv) +int argc; +char *argv[]; +{ + + extern char *optarg; + char * msg = NULL; + TPQCTL qctl; /* Q control structures */ + long sendlen=10, rcvlen=10, len; + int ret; + char ch; + char * service; + int times = 1; + int i; + int delete = 0; + int need_print = 0; + int nostop =0; + + if(argc < 5) { + fprintf(stderr, + "Usage: %s -s service -m messgae [-t times][-d][-p]\n", + argv[0] ); + exit(1); + } + + while ((ch = getopt(argc, argv, "s:m:t:dpn")) != EOF) + switch (ch) { + case 's': + service = optarg; + break; + case 'm': + msg = optarg; + break; + case 't': + times = atoi(optarg); + break; + case 'd': + delete = 1; + break; + case 'n': + nostop = 1; + break; + } + + /* Attach to System/T as a Client Process */ + if (tpinit((TPINIT *) NULL) == -1) { + userlog("SIMPAPP:ERROR tpinit failed \n"); + userlog("SIMPAPP:SYSTEM ERROR %s \n", tpstrerror(tperrno)); + fprintf(stderr, "Simpapp:ERROR Tpinit failed\n"); + my_exit("tpinit"); + } + + if(msg){ + sendlen = strlen(msg); + } + + /* Allocate STRING buffers for the request and the reply */ + + if((sendbuf = (char *) tpalloc("STRING", NULL, sendlen+1)) == NULL) { + fprintf(stderr,"Simpapp:ERRROR Allocating send buffer\n"); + tpterm(); + my_exit("tpalloc"); + } + + if((rcvbuf = (char *) tpalloc("STRING", NULL, sendlen+1)) == NULL) { + fprintf(stderr,"Simpapp:ERROR allocating receive buffer\n"); + tpfree(sendbuf); + tpterm(); + my_exit("tpalloc"); + } + + strcpy(sendbuf, msg); + + for(i=0; i> stderr +#./client -s WRITE -m hello2 -t 1000 2>> stderr + + +msg "SHUTTING DOWN THE TRANSACTION MANAGER." +echo 'y' | tmshutdown + +# Copy out any server output. +echo "STDOUT:" +cat stdout + +# Copy out any server errors. +test -s stderr && { + echo "STDERR:" + cat stderr + echo "FAIL: stderr file not empty" + exitval=1 +} + +exit $exitval + diff --git a/test/xa/src2/tuxconfig.sh b/test/xa/src2/tuxconfig.sh new file mode 100644 index 00000000..7dbeb643 --- /dev/null +++ b/test/xa/src2/tuxconfig.sh @@ -0,0 +1,53 @@ +#! /bin/sh +# +# Build the configuration file for test 2 -- +# We do this work in the shell script because we have to fill in +# + cat > $RUN/config/ubb.cfg << END_OF_UBB_FILE +*RESOURCES +IPCKEY 261110 + +DOMAINID BDBapp +MASTER L1 +MAXACCESSERS 100 +MAXSERVERS 50 +MAXSERVICES 200 +MODEL SHM +LDBAL Y + +*MACHINES + +"$MACHINE_NAME" LMID=L1 + TUXDIR="$TUXDIR" + APPDIR="$APPDIR" + TUXCONFIG="$TUXCONFIG" + TLOGDEVICE="$TLOGDEVICE" + TLOGNAME="$TLOGNAME" + TYPE="machine1" + +*GROUPS +BDBG + LMID=L1 GRPNO=1 TMSNAME=TMS_BDB TMSCOUNT=3 + OPENINFO="BERKELEY-DB:$RUN/data" + +LMSG LMID=L1 GRPNO=2 + +*SERVERS +DEFAULT: + CLOPT="-A" + +bdb1 SRVGRP=BDBG SRVID=1 + MIN=2 MAX=2 + #RQADDR="BDB1.QUEUE" REPLYQ=Y CLOPT="-A -- -t 1" + RQADDR="BDB1.QUEUE" REPLYQ=Y CLOPT="-A -p L10,10:100,10 -- -t 1" + +bdb2 SRVGRP=BDBG SRVID=11 + MIN=1 MAX=1 RQADDR="BDB2.QUEUE" REPLYQ=Y + +#LMS SRVGRP=LMSG SRVID= 1 CLOPT="-A -- -l$MACHINE_NAME:8080/tsam" + +*SERVICES +DEFAULT: + SVCTIMEOUT=30 +END_OF_UBB_FILE + tmloadcf -y $RUN/config/ubb.cfg diff --git a/test/xa/src3/client.c b/test/xa/src3/client.c new file mode 100644 index 00000000..e01fd34e --- /dev/null +++ b/test/xa/src3/client.c @@ -0,0 +1,361 @@ +/* + * This is the multithreaded test for XA. The client creates several threads + * and uses each thread to send requests to the servers, which are also + * multithreaded. There are two tests. The first one runs the client with + * two threads that sends requests to the servers then exit. In the second + * test the client creates 3 threads. The first 2 execute the same as in + * the first test, but the third thread calls the servers with a command + * to kill that server. This is done to test that the environment and + * servers can recover from a thread failure. + */ +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#define HOME "../data" +#define TABLE1 "../data/table1.db" +#define TABLE2 "../data/table2.db" +#define NUM_SERVERS 3 + +#ifdef VERBOSE +static int verbose = 1; /* Debugging output. */ +#else +static int verbose = 0; +#endif +static int expected_error = 0; + +char *progname; /* Client run-time name. */ + +int check_data(void); + +int +usage() +{ + fprintf(stderr, "usage: %s [-v] [-k]\n", progname); + return (EXIT_FAILURE); +} + +static int thread_error = 1; + +/* + * This function is called by the client threads. Threads 1 and 2 randomly + * call each of the servers. If thread 3 is created it calls one of the + * servers and orders it to exit. + */ +void * +call_server_thread(void *server_name) +{ + FBFR *replyBuf; + long replyLen; + char *server_names[NUM_SERVERS]; + char *name, *thread_name, *kill_thread; + int commit, j, iterations; + void *result = NULL; + TPINIT *initBuf = NULL; + kill_thread = NULL; + iterations = 100; + replyBuf = NULL; + + /* Names of the function to call in the servers. */ + server_names[0] = "TestThread1"; + server_names[1] = "TestThread2"; + thread_name = (char *)server_name; + + /* Allocate init buffer */ + if ((initBuf = (TPINIT *)tpalloc("TPINIT", NULL, TPINITNEED(0))) == 0) + goto tuxedo_err; + initBuf->flags = TPMULTICONTEXTS; + + if (tpinit(initBuf) == -1) + goto tuxedo_err; + if (verbose) + printf("%s:%s: tpinit() OK\n", progname, thread_name); + + /* Create the command to kill the server. */ + if (strcmp(thread_name, "3") == 0) { + kill_thread = (char *)tpalloc("STRING", NULL, 1); + if (kill_thread == NULL) + goto tuxedo_err; + iterations = 1; + } else if (expected_error) + sleep(30); + + for (j = 0; j < iterations; j++) { + commit = 1; + if (replyBuf != NULL) + tpfree((char *)replyBuf); + + /* Randomly select a server. */ + name = server_names[j % 2]; + + /* Allocate reply buffer. */ + replyLen = 1024; + replyBuf = NULL; + if ((replyBuf = (FBFR*)tpalloc("FML32", NULL, replyLen)) + == NULL) + goto tuxedo_err; + if (verbose) + printf("%s:%s: tpalloc(\"FML32\"), reply buffer OK\n", + progname, thread_name); + + /* Begin the XA transaction. */ + if (tpbegin(60L, 0L) == -1) + goto tuxedo_err; + if (verbose) + printf("%s:%s: tpbegin() OK\n", progname, thread_name); + /* Call the server to kill it. */ + if (kill_thread != NULL) { + tpcall(name, kill_thread, 1L, (char **)&replyBuf, + &replyLen, 0L); + goto abort; + } else { + if (tpcall(name, NULL, 0L, (char **)&replyBuf, + &replyLen, TPSIGRSTRT) == -1) + + /* + * When one of the servers is killed TPNOENT or + * TPESVCERR is an expected error. + */ + if (expected_error && (tperrno == TPESVCERR || tperrno == TPENOENT || tperrno == TPETIME)) + goto abort; + else + goto tuxedo_err; + } + + /* + * Commit or abort the transaction depending the what the + * server returns. + */ + commit = !tpurcode; + if (commit) { +commit: if (verbose) { + printf("%s:%s: txn success\n", progname, + thread_name); + } + if (tpcommit(0L) == -1) { + if (expected_error && tperrno == TPETIME) + continue; + else if (tperrno == TPEABORT) + continue; + else + goto tuxedo_err; + } + if (verbose) { + printf("%s:%s: tpcommit() OK\n", progname, + thread_name); + } + } else { +abort: if (verbose) { + printf("%s:%s: txn failure\n", progname, + thread_name); + } + if (tpabort(0L) == -1) { + if (expected_error && tperrno == TPETIME) + continue; + else + goto tuxedo_err; + } + if (verbose) { + printf("%s:%s: tpabort() OK\n", progname, + thread_name); + } + if (strcmp(thread_name, "3") == 0) + break; + } + } + + if (0) { +tuxedo_err: fprintf(stderr, "%s:%s: TUXEDO ERROR: %s (code %d)\n", + progname, thread_name, tpstrerror(tperrno), tperrno); + result = (void *)&thread_error; + } +end: tpterm(); + if (verbose) + printf("%s:%s: tpterm() OK\n", progname, thread_name); + + if (replyBuf != NULL) + tpfree((char *)replyBuf); + if (initBuf != NULL) + tpfree((char *)initBuf); + if(kill_thread != NULL) + tpfree((char *)kill_thread); + + return(result); +} + +/* + * Create the threads to call the servers, and check that data in the two + * databases is identical. + */ +int +main(int argc, char* argv[]) +{ + int ch, i, ret, num_threads; + pthread_t threads[NUM_SERVERS]; + void *results = NULL; + char *names[NUM_SERVERS]; + + names[0] = "1"; + names[1] = "2"; + names[2] = "3"; + progname = argv[0]; + num_threads = 2; + + while ((ch = getopt(argc, argv, "n:vk")) != EOF) + switch (ch) { + case 'k': + num_threads = 3; + expected_error = 1; + break; + case 'v': + verbose = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (verbose) + printf("%s: called\n", progname); + + + /* Create threads for different contexts*/ + for (i = 0; i < num_threads; i++) { + if (verbose) + printf("calling server thread\n"); + ret = pthread_create(&threads[i], NULL, + call_server_thread, names[i]); + if (ret) { + fprintf(stderr, "%s: failed to create thread %s.\n", + progname, ret); + goto err; + } + } + + /* Wait for each thread to finish. */ + for (i = 0; i < num_threads; i++) { + if ((ret = pthread_join(threads[i], &results)) != 0) { + fprintf(stderr, "%s: failed to join thread %s.\n", + progname, ret); + goto err; + } + if (results != NULL) + goto err; + } + + /* If the kill thread was not used, check the data in the two tables.*/ + if (num_threads < NUM_SERVERS) + ret = check_data(); + + if (0) { +err: ret = EXIT_FAILURE; + } + + return (ret); +} + +/* + * check_data -- + * Compare committed data in the two tables, should be identical. + */ +int +check_data() +{ + DB *dbp1, *dbp2; + DBC *dbc1, *dbc2; + DB_ENV *dbenv; + DBT key1, data1, key2, data2; + int ret, ret1, ret2; + u_int32_t flags = DB_INIT_MPOOL | DB_INIT_LOG | DB_INIT_TXN | + DB_INIT_LOCK | DB_CREATE | DB_THREAD | DB_RECOVER | DB_REGISTER; + char *home = HOME; + + dbp1 = dbp2 = NULL; + dbc1 = dbc2 = NULL; + dbenv = NULL; + + /* Join the DB environment. */ + if ((ret = db_env_create(&dbenv, 0)) != 0 || + (ret = dbenv->open(dbenv, home, flags, 0)) != 0) { + fprintf(stderr, + "%s: %s: %s\n", progname, home, db_strerror(ret)); + goto err; + } + + /* Open the tables. */ + if ((ret = db_create(&dbp1, dbenv, 0)) != 0 || + (ret = db_create(&dbp2, dbenv, 0)) != 0 || + ((ret = dbp1->open( + dbp1, NULL, TABLE1, NULL, DB_UNKNOWN, DB_RDONLY, 0)) != 0) || + ((ret = dbp2->open( + dbp2, NULL, TABLE2, NULL, DB_UNKNOWN, DB_RDONLY, 0)) != 0)) { + fprintf(stderr, + "%s: %s: %s\n", progname, TABLE1, db_strerror(ret)); + goto err; + } + if (verbose) + printf("%s: opened tables OK\n", progname); + + /* Open cursors. */ + if ((ret = dbp1->cursor(dbp1, NULL, &dbc1, 0)) != 0 || + (ret = dbp2->cursor(dbp2, NULL, &dbc2, 0)) != 0) { + fprintf(stderr, + "%s: DB->cursor: %s\n", progname, db_strerror(ret)); + goto err; + } + if (verbose) + printf("%s: opened cursors OK\n", progname); + + /* Compare the two databases, they should be identical. */ + memset(&key1, 0, sizeof(key1)); + memset(&data1, 0, sizeof(data1)); + memset(&key2, 0, sizeof(key2)); + memset(&data2, 0, sizeof(data2)); + for (;;) { + + ret1 = dbc1->c_get(dbc1, &key1, &data1, DB_NEXT); + ret2 = dbc2->c_get(dbc2, &key2, &data2, DB_NEXT); + if (ret1 != 0 || ret2 != 0) + break; + if (key1.size != key2.size || + memcmp(key1.data, key2.data, key1.size) != 0 || + data1.size != data2.size || + memcmp(data1.data, data2.data, data1.size) != 0) + goto mismatch; + } + if (ret1 != DB_NOTFOUND || ret2 != DB_NOTFOUND) { +mismatch: fprintf(stderr, + "%s: DB_ERROR: databases 1 and 2 weren't identical\n", + progname); + ret = 1; + } + +err: if (dbc1 != NULL) + (void)dbc1->c_close(dbc1); + if (dbc2 != NULL) + (void)dbc2->c_close(dbc2); + if (dbp1 != NULL) + (void)dbp1->close(dbp1, 0); + if (dbp2 != NULL) + (void)dbp2->close(dbp2, 0); + if (dbenv != NULL) + (void)dbenv->close(dbenv, 0); + + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + diff --git a/test/xa/src3/run.sh b/test/xa/src3/run.sh new file mode 100644 index 00000000..f48aaf0a --- /dev/null +++ b/test/xa/src3/run.sh @@ -0,0 +1,122 @@ +#! /bin/sh +# +# Run test 3. + +msg() +{ + test "$DVERBOSE" == 1 && { + echo "========" + echo "======== $1" + echo "========" + } +} + +init_tmadmin() +{ +tmadmin << END_OF_TMADMIN + crdl -z $TLOGDEVICE -b 500 + crlog -m cluster3 +END_OF_TMADMIN +} + + +# Everything else is done in run/bin. +cd $RUN/bin + +# The CFLAGS variable defines the pre-processor defines -- start with +# whatever the user set, and add our own stuff. +# +# For debugging output, add -DDVERBOSE + +test "$DVERBOSE" == 1 && { + COMPILE_FLAGS="-DDVERBOSE" + DVERBOSE_FLAG="-v" +} +COMPILE_FLAGS="$CFLAGS $COMPILE_FLAGS -g -I../../.." + +msg "BUILDING CLIENT" +CFLAGS="$COMPILE_FLAGS"; export CFLAGS +buildclient -r BERKELEY-DB $DVERBOSE_FLAG -o client \ + -f ../../src3/client.c +test "$?" -eq 0 || { + echo "FAIL: buildclient failed." + exit 1 +} + +msg "BUILDING SERVER #1" +CFLAGS="$COMPILE_FLAGS -DSERVER1"; export CFLAGS +buildserver -r BERKELEY-DB $DVERBOSE_FLAG -t -o server1 \ + -s TestThread1:TestThread1 -f ../../src3/server.c +test "$?" -eq 0 || { + echo "FAIL: buildserver failed." + exit 1 +} + +msg "BUILDING SERVER #2" +CFLAGS="$COMPILE_FLAGS -DSERVER2"; export CFLAGS +buildserver -r BERKELEY-DB $DVERBOSE_FLAG -t -o server2 \ + -s TestThread2:TestThread2 -f ../../src3/server.c +test "$?" -eq 0 || { + echo "FAIL: buildserver failed." + exit 1 +} + +msg "BUILDING THE RESOURCE MANAGER." +buildtms -o DBRM -r BERKELEY-DB + +init_tmadmin + +# Boot Tuxedo. +# You should see something like: +# +# Booting admin processes ... +# +# exec BBL -A : +# process id=13845 ... Started. +# +# Booting server processes ... +# +# exec DBRM -A : +# process id=13846 ... Started. +# exec DBRM -A : +# process id=13847 ... Started. +# exec server1 -A : +# process id=13848 ... Started. +# exec server2 -A : +# process id=13849 ... Started. +# 5 processes started. +msg "BOOTING TUXEDO." +tmboot -y + +exitval=0 +if [ $1 == 1 ]; then + kill_thread=-k +fi +./client $kill_thread $DVERBOSE_FLAG +test "$?" -ne 0 && { + echo "FAIL: client failed" + exitval=1 + break; +} + +msg "SHUTTING DOWN THE TRANSACTION MANAGER." +echo 'y' | tmshutdown + +# Copy out any server output. +echo "STDOUT:" +cat stdout + +# Killing a thread can result in expected database "run recovery" errors +if [ $1 == 1 ]; then + exit $exitval +fi + +# Copy out any server errors. +test -s stderr && { + echo "STDERR:" + cat stderr + echo "FAIL: stderr file not empty" + exitval=1 +} + +exit $exitval \ No newline at end of file diff --git a/test/xa/src3/server.c b/test/xa/src3/server.c new file mode 100644 index 00000000..22cdca23 --- /dev/null +++ b/test/xa/src3/server.c @@ -0,0 +1,174 @@ +/* + * Multi-threaded servers that insert data into databases 1 and 2 at the + * request of the client, and can die at the request of the client. + */ +#include +#include +#include +#include +#include + +#include +#include + +#include + +/* + * The two servers are largely identical, #ifdef the source code. + */ +#ifdef SERVER1 +#define TXN_FUNC TestThread1 +#define TXN_STRING "TestThread1" +#endif +#ifdef SERVER2 +#define TXN_FUNC TestThread2 +#define TXN_STRING "TestThread2" +#endif +void TXN_FUNC(TPSVCINFO *); + +#define HOME "../data" +#define TABLE1 "table1.db" +#define TABLE2 "table2.db" + +#ifdef VERBOSE +static int verbose = 1; /* Debugging output. */ +#else +static int verbose = 0; +#endif + +DB *db1, *db2; /* Table handles. */ + +int cnt_request; /* Total requests. */ + +char *progname; /* Server run-time name. */ + +/* Called once when the server is started. Creates and opens the databases. */ +int +tpsvrinit(int argc, char* argv[]) +{ + int ret; + int dbflags = DB_AUTO_COMMIT | DB_CREATE | DB_THREAD; + + progname = argv[0]; + if (verbose) + printf("%s: called\n", progname); + + /* Open resource managers. */ + if (tx_open() == TX_ERROR) { + fprintf(stderr, "tx_open: TX_ERROR\n"); + return (-1); + } + + /* Seed random number generator. */ + srand((u_int)(time(NULL) | getpid())); + + /* Open XA database handles. */ + if ((ret = db_create(&db1, NULL, DB_XA_CREATE)) != 0) { + fprintf(stderr, "db_create: %s\n", db_strerror(ret)); + return (-1); + } + db1->set_errfile(db1, stderr); + if ((ret = db1->open(db1, NULL, + TABLE1, NULL, DB_BTREE, dbflags, 0660)) != 0) { + fprintf(stderr, "DB open: %s: %s\n", TABLE1, db_strerror(ret)); + return (-1); + } + if ((ret = db_create(&db2, NULL, DB_XA_CREATE)) != 0) { + fprintf(stderr, "db_create: %s\n", db_strerror(ret)); + return (-1); + } + db2->set_errfile(db2, stderr); + if ((ret = db2->open(db2, NULL, + TABLE2, NULL, DB_BTREE, dbflags, 0660)) != 0) { + fprintf(stderr, "DB open: %s: %s\n", TABLE2, db_strerror(ret)); + return (-1); + } + + if (verbose) + printf("%s: tpsvrinit: initialization done\n", progname); + + return (0); +} + +/* Called once when the servers are shutdown. Closes the databases. */ +void +tpsvrdone() + { + if (verbose) + printf("%s: tpsvrdone: shutdown done\n", progname); + if (db1 != NULL) + (void)db1->close(db1, 0); + if (db2 != NULL) + (void)db2->close(db2, 0); + db1 = db2 = NULL; + + tx_close(); + + if (verbose) + printf("%s: tpsvrdone: shutdown done\n", progname); +} +/* + * Called by the client to insert data into the databases. Also can kill this + * thread if commanded to by the client. + */ +void +TXN_FUNC(TPSVCINFO *msg) +{ + int ret, i, commit, key_value, data_value; + DBT key, data; + + memset(&key, 0, sizeof key); + memset(&data, 0, sizeof data); + commit = 1; + ++cnt_request; + +#ifdef SERVER1 + key_value = data_value = cnt_request + 1; +#else + key_value = data_value = (rand() % 1000) + 1; +#endif + data.data = &data_value; + data.size = sizeof(data_value); + key.data = &key_value; + key.size = sizeof(key_value); + + /* Kill the server to see what happens. */ + if (msg->data != NULL) { + pthread_exit(NULL); + } + + /* Insert data into the tables. */ + if (verbose) { + printf("put1: key: n"); + printf("put1: data:\n"); + } + if ((ret = db1->put(db1, NULL, &key, &data, 0)) != 0) { + if (ret == DB_LOCK_DEADLOCK) + goto abort; + fprintf(stderr, "%s: %s: Table1->put: %s\n", + progname, TXN_STRING, db_strerror(ret)); + goto err; + } + + if ((ret = db2->put(db2, NULL, &key, &data, 0)) != 0) { + if (ret == DB_LOCK_DEADLOCK) + goto abort; + fprintf(stderr, "%s: %s: Table2->put: %s\n", + progname, TXN_STRING, db_strerror(ret)); + goto err; + } + + /* Returns a commit or abort command to the client. */ + if (verbose) + printf("%s: %s: commit\n", progname, TXN_STRING); + tpreturn(TPSUCCESS, 0L, 0, 0L, 0); + if (0) { +abort: if (verbose) + printf("%s: %s: abort\n", progname, TXN_STRING); + tpreturn(TPSUCCESS, 1L, 0, 0L, 0); + } + return; + +err: tpreturn(TPFAIL, 1L, 0, 0L, 0); +} + diff --git a/test/xa/src3/tuxconfig.sh b/test/xa/src3/tuxconfig.sh new file mode 100644 index 00000000..425ccc4e --- /dev/null +++ b/test/xa/src3/tuxconfig.sh @@ -0,0 +1,56 @@ +#! /bin/sh +# +# Build the configuration file for test 3 -- +# We do this work in the shell script because we have to fill in +# lots of shell variables. + + if [ $1 == 1 ]; then + IPCKEY=200104 + else + IPCKEY=200105 + fi + MACHINE_NAME=`uname -n` + cat > $RUN/config/ubb.cfg << END_OF_UBB_FILE +*RESOURCES +IPCKEY $IPCKEY +DOMAINID domain3 +MASTER cluster3 +MAXACCESSERS 16 +MAXSERVERS 6 +MAXSERVICES 16 +MODEL SHM +LDBAL N + +*MACHINES +DEFAULT: + APPDIR="$APPDIR" + TUXCONFIG="$TUXCONFIG" + TLOGDEVICE="$TLOGDEVICE" + TUXDIR="$TUXDIR" +# Machine name is 30 characters max +"$MACHINE_NAME" LMID=cluster3 + +*GROUPS +# Group name is 30 characters max +group_tm LMID=cluster3 GRPNO=1 TMSNAME=DBRM TMSCOUNT=2 OPENINFO="BERKELEY-DB:$RUN/data" + +*SERVERS +DEFAULT: + CLOPT="-A" + MINDISPATCHTHREADS=1 + MAXDISPATCHTHREADS=8 + +# Server name is 78 characters max (same for any pathname) +server1 SRVGRP=group_tm SRVID=1 MAXGEN=4 RESTART=Y +server2 SRVGRP=group_tm SRVID=2 MAXGEN=4 RESTART=Y + +*SERVICES +DEFAULT: + SVCTIMEOUT=20 +# Service name is 15 characters max +# server1 +TestThread1 +# server2 +TestThread2 +END_OF_UBB_FILE + tmloadcf -y $RUN/config/ubb.cfg diff --git a/test/xa/src4/client.c b/test/xa/src4/client.c new file mode 100644 index 00000000..7c3bb720 --- /dev/null +++ b/test/xa/src4/client.c @@ -0,0 +1,244 @@ +/* + * #19612 Data not rolled back when transaction aborted after timout. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#define HOME "../data" +#define TABLE1 "../data/table1.db" +#define TABLE2 "../data/table2.db" + +#ifdef VERBOSE +static int verbose = 1; /* Debugging output. */ +#else +static int verbose = 0; +#endif + +DB_ENV *dbenv; +char *progname; /* Client run-time name. */ + +int check_data(); +int usage(void); + +int +main(int argc, char* argv[]) +{ + DB *dbp3; + DBT key, data; + TPINIT *initBuf; + FBFR *replyBuf; + long replyLen; + int ch, ret, i; + char *target; + char *home = HOME; + u_int32_t flags = DB_INIT_MPOOL | DB_INIT_LOG | DB_INIT_TXN | + DB_INIT_LOCK | DB_CREATE | DB_THREAD | DB_RECOVER | DB_REGISTER; + u_int32_t dbflags = DB_CREATE | DB_THREAD; + + progname = argv[0]; + + initBuf = NULL; + ret = 0; + replyBuf = NULL; + replyLen = 1024; + + while ((ch = getopt(argc, argv, "v")) != EOF) + switch (ch) { + case 'v': + verbose = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (verbose) + printf("%s: called\n", progname); + + if (tpinit((TPINIT *)NULL) == -1) + goto tuxedo_err; + if (verbose) + printf("%s: tpinit() OK\n", progname); + + /* Create the DB environment. */ + if ((ret = db_env_create(&dbenv, 0)) != 0 || + (ret = dbenv->open(dbenv, home, flags, 0)) != 0) { + fprintf(stderr, + "%s: %s: %s\n", progname, home, db_strerror(ret)); + goto err; + } + dbenv->set_errfile(dbenv, stderr); + if (verbose) + printf("%s: opened %s OK\n", progname, home); + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + + /* Allocate reply buffer. */ + if ((replyBuf = (FBFR*)tpalloc("FML32", NULL, replyLen)) + == NULL) + goto tuxedo_err; + if (verbose) + printf("%s: tpalloc(\"FML32\"), reply buffer OK\n", + progname); + for (i = 0; i < 2; i++) { + if (tpbegin(10L, 0L) == -1) + goto tuxedo_err; + if (verbose) + printf("%s: tpbegin() OK\n", progname); + + if (tpcall("TestTxn1", NULL, 0L, (char **)&replyBuf, + &replyLen, TPSIGRSTRT) == -1) + goto tuxedo_err; + + /* This call will timeout. */ + tpcall("TestTxn2", NULL, 0L, (char **)&replyBuf, &replyLen, + TPSIGRSTRT); + if (tperrno != TPETIME) + goto tuxedo_err; + + if (i == 0) { + if (tpabort(0L) == -1) + goto tuxedo_err; + if (verbose) + printf("%s: tpabort() OK\n", progname); + } else { + /* Commit will fail due to the time out. */ + tpcommit(0L); + if (tperrno != TPEABORT) + goto tuxedo_err; + if (verbose) + printf("%s: tpcommit() OK\n", progname); + } + + + ret = check_data(); + } + + if (0) { +tuxedo_err: fprintf(stderr, "%s: TUXEDO ERROR: %s (code %d)\n", + progname, tpstrerror(tperrno), tperrno); + goto err; + } + if (0) { +err: ret = EXIT_FAILURE; + } + + if (replyBuf != NULL) + tpfree((char *)replyBuf); + if (dbenv != NULL) + (void)dbenv->close(dbenv, 0); + + tpterm(); + if (verbose) + printf("%s: tpterm() OK\n", progname); + + if (verbose && ret == 0) + printf("%s: test passed.\n", progname); + else if (verbose) + printf("%s: test failed.\n", progname); + return (ret); +} + +/* + * check_data -- + * Check that both databases are empty. + */ +int +check_data() +{ + DB *dbp1, *dbp2; + DBC *dbc1, *dbc2; + DBT key1, data1, key2, data2; + int ret, ret1, ret2; + u_int32_t flags = DB_INIT_MPOOL | DB_INIT_LOG | DB_INIT_TXN | + DB_INIT_LOCK | DB_THREAD; + + dbp1 = dbp2 = NULL; + dbc1 = dbc2 = NULL; + + /* Open table #1. */ + if ((ret = db_create(&dbp1, dbenv, 0)) != 0 || + (ret = dbp1->open( + dbp1, NULL, TABLE1, NULL, DB_UNKNOWN, DB_RDONLY, 0)) != 0) { + fprintf(stderr, + "%s: %s: %s\n", progname, TABLE1, db_strerror(ret)); + goto err; + } + if (verbose) + printf("%s: opened %s OK\n", progname, TABLE1); + + /* Open table #2. */ + if ((ret = db_create(&dbp2, dbenv, 0)) != 0 || + (ret = dbp2->open( + dbp2, NULL, TABLE2, NULL, DB_UNKNOWN, DB_RDONLY, 0)) != 0) { + fprintf(stderr, + "%s: %s: %s\n", progname, TABLE2, db_strerror(ret)); + goto err; + } + if (verbose) + printf("%s: opened %s OK\n", progname, TABLE2); + + /* Open cursors. */ + if ((ret = dbp1->cursor(dbp1, NULL, &dbc1, 0)) != 0 || + (ret = dbp2->cursor(dbp2, NULL, &dbc2, 0)) != 0) { + fprintf(stderr, + "%s: DB->cursor: %s\n", progname, db_strerror(ret)); + goto err; + } + if (verbose) + printf("%s: opened cursors OK\n", progname); + + /* Compare the two databases. */ + memset(&key1, 0, sizeof(key1)); + memset(&data1, 0, sizeof(data1)); + memset(&key2, 0, sizeof(key2)); + memset(&data2, 0, sizeof(data2)); + ret1 = dbc1->get(dbc1, &key1, &data1, DB_NEXT); + ret2 = dbc2->get(dbc2, &key2, &data2, DB_NEXT); + if (ret1 != DB_NOTFOUND || ret2 !=DB_NOTFOUND) { + fprintf(stderr, + "%s: DB_ERROR: databases 1 and 2 weren't identical\n", + progname); + ret = 1; + } + +err: if (dbc1 != NULL) + (void)dbc1->close(dbc1); + if (dbc2 != NULL) + (void)dbc2->close(dbc2); + if (dbp1 != NULL) + (void)dbp1->close(dbp1, 0); + if (dbp2 != NULL) + (void)dbp2->close(dbp2, 0); + + if (verbose && ret == 0) + printf("%s: data check passed.\n", progname); + else if (verbose) + printf("%s: data check failed.\n", progname); + + return (ret); +} + +int +usage() +{ + fprintf(stderr, "usage: %s [-v] [-n txn]\n", progname); + return (EXIT_FAILURE); +} diff --git a/test/xa/src4/run.sh b/test/xa/src4/run.sh new file mode 100644 index 00000000..f602b1d9 --- /dev/null +++ b/test/xa/src4/run.sh @@ -0,0 +1,117 @@ +#! /bin/sh +# +# Run test 4. + +msg() +{ + test "$DVERBOSE" == 1 && { + echo "========" + echo "======== $1" + echo "========" + } +} + +init_tmadmin() +{ +tmadmin << END_OF_TMADMIN + crdl -z $TLOGDEVICE -b 500 + crlog -m cluster3 +END_OF_TMADMIN +} + +# Everything is done in run/bin. +cd $RUN/bin + +# The CFLAGS variable defines the pre-processor defines -- start with +# whatever the user set, and add our own stuff. +# +# For debugging output, add -DDVERBOSE + +test "$DVERBOSE" == 1 && { + COMPILE_FLAGS="-DDVERBOSE" + DVERBOSE_FLAG="-v" +} +COMPILE_FLAGS="$CFLAGS $COMPILE_FLAGS -g -I../../.." + +msg "BUILDING CLIENT" +CFLAGS="$COMPILE_FLAGS"; export CFLAGS +buildclient -r BERKELEY-DB $DVERBOSE_FLAG -o client \ + -f ../../src4/client.c +test "$?" -eq 0 || { + echo "FAIL: buildclient failed." + exit 1 +} + +msg "BUILDING SERVER #1" +CFLAGS="$COMPILE_FLAGS -DSERVER1"; export CFLAGS +buildserver -r BERKELEY-DB $DVERBOSE_FLAG -o server1 \ + -s TestTxn1:TestTxn1 \ + -f ../../src4/server.c +test "$?" -eq 0 || { + echo "FAIL: buildserver failed." + exit 1 +} + +msg "BUILDING SERVER #2" +CFLAGS="$COMPILE_FLAGS -DSERVER2"; export CFLAGS +buildserver $DVERBOSE_FLAG -r BERKELEY-DB -o server2 \ + -s TestTxn2:TestTxn2 \ + -f ../../src4/server.c +test "$?" -eq 0 || { + echo "FAIL: buildserver failed." + exit 1 +} + +msg "BUILDING THE RESOURCE MANAGER." +buildtms -o DBRM -r BERKELEY-DB + +init_tmadmin + +# Boot Tuxedo. +# You should see something like: +# +# Booting admin processes ... +# +# exec BBL -A : +# process id=13845 ... Started. +# +# Booting server processes ... +# +# exec DBRM -A : +# process id=13846 ... Started. +# exec DBRM -A : +# process id=13847 ... Started. +# exec server1 -A : +# process id=13848 ... Started. +# exec server2 -A : +# process id=13849 ... Started. +# 5 processes started. +msg "BOOTING TUXEDO." +tmboot -y + +exitval=0 +msg "RUNING THE CLIENT." +./client $DVERBOSE_FLAG +test "$?" -ne 0 && { + echo "FAIL: client failed" + exitval=1 + break; +} + +msg "SHUTTING DOWN THE TRANSACTION MANAGER." +echo 'y' | tmshutdown + +# Copy out any server output. +echo "STDOUT:" +cat stdout + +# Copy out any server errors.echo "STDERR:" +cat stderr +test -s stderr && { + echo "STDERR:" + cat stderr + echo "FAIL: stderr file not empty" + exitval=1 +} + +exit $exitval \ No newline at end of file diff --git a/test/xa/src4/server.c b/test/xa/src4/server.c new file mode 100644 index 00000000..ce891310 --- /dev/null +++ b/test/xa/src4/server.c @@ -0,0 +1,149 @@ +/* + * This code is for servers used in test 4. Server 1 inserts + * into database 1 and Server 2 inserts into database 2, then + * sleeps for 30 seconds to cause a timeout error in the client. + */ +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +/* + * The two servers are largely identical, #ifdef the source code. + */ +#ifdef SERVER1 +#define TXN_FUNC TestTxn1 +#define TXN_STRING "TestTxn1" +#define TABLE "table1.db" +#endif +#ifdef SERVER2 +#define TXN_FUNC TestTxn2 +#define TXN_STRING "TestTxn2" +#define TABLE "table2.db" +#endif +void TXN_FUNC(TPSVCINFO *); + +#define HOME "../data" + +#ifdef VERBOSE +static int verbose = 1; /* Debugging output. */ +#else +static int verbose = 0; +#endif + +DB *db; /* Table handle. */ + +char *progname; /* Server run-time name. */ + +/* + * Called when each server is started. It creates and opens a database. + */ +int +tpsvrinit(int argc, char* argv[]) +{ + int ret; + + progname = argv[0]; + if (verbose) + printf("%s: called\n", progname); + + /* Open resource managers. */ + if (tx_open() == TX_ERROR) { + fprintf(stderr, "tx_open: TX_ERROR\n"); + return (-1); + } + + /* Seed random number generator. */ + srand((u_int)(time(NULL) | getpid())); + + /* Open permanent XA handles. */ + if ((ret = db_create(&db, NULL, DB_XA_CREATE)) != 0) { + fprintf(stderr, "db_create: %s\n", db_strerror(ret)); + return (-1); + } + db->set_errfile(db, stderr); + if ((ret = db->open(db, NULL, + TABLE, NULL, DB_BTREE, DB_AUTO_COMMIT | DB_CREATE, 0660)) != 0) { + fprintf(stderr, "DB open: %s: %s\n", TABLE, db_strerror(ret)); + return (-1); + } + + if (verbose) + printf("%s: tpsvrinit: initialization done\n", progname); + + return (0); +} + +/* Called when the servers are shutdown. This closes the database. */ +void +tpsvrdone() +{ + if (db != NULL) + (void)db->close(db, 0); + + tx_close(); + + if (verbose) + printf("%s: tpsvrdone: shutdown done\n", progname); +} + +/* + * This function is called by the client. Here Server 1 and Server 2 insert + * data into a table using XA transactions. */ +void +TXN_FUNC(TPSVCINFO *msg) +{ + DBT data; + DBT key; + int ret, val; + + val = 1; + + memset(&key, 0, sizeof(key)); + key.data = &val; + key.size = sizeof(val); + memset(&data, 0, sizeof(data)); + data.data = &val; + data.size = sizeof(val); + + /* Table 1. */ + if (verbose) { + printf("put: key in %s: %i\n", val, TABLE); + printf("put: data in %s: %i\n", val, TABLE); + } + if ((ret = db->put(db, NULL, &key, &data, 0)) != 0) { + if (ret == DB_LOCK_DEADLOCK) + goto abort; + fprintf(stderr, "%s: %s: Table->put: %s\n", + progname, TXN_STRING, db_strerror(ret)); + goto err; + } + + /* Sleep for 30 seconds to force a timeout error. */ +#ifdef SERVER2 + sleep(30); +#endif + + tpreturn(TPSUCCESS, 0L, 0, 0L, 0); + if (0) { +abort: if (verbose) + printf("%s: %s: abort\n", progname, TXN_STRING); + tpreturn(TPSUCCESS, 1L, 0, 0L, 0); + } + return; +err: + tpreturn(TPFAIL, 0L, 0, 0L, 0); +} + diff --git a/test/xa/src4/tuxconfig.sh b/test/xa/src4/tuxconfig.sh new file mode 100644 index 00000000..1cc84c1e --- /dev/null +++ b/test/xa/src4/tuxconfig.sh @@ -0,0 +1,47 @@ +#! /bin/sh +# +# Build the configuration file for test 4 -- +# We do this work in the shell script because we have to fill in +# lots of shell variables. + + MACHINE_NAME=`uname -n` + cat > $RUN/config/ubb.cfg << END_OF_UBB_FILE +*RESOURCES +IPCKEY 200103 +DOMAINID domain3 +MASTER cluster3 +MAXACCESSERS 10 +MAXSERVERS 5 +MAXSERVICES 10 +MODEL SHM +LDBAL N + +*MACHINES +DEFAULT: + APPDIR="$APPDIR" + TUXCONFIG="$TUXCONFIG" + TLOGDEVICE="$TLOGDEVICE" + TUXDIR="$TUXDIR" +# Machine name is 30 characters max +"$MACHINE_NAME" LMID=cluster3 + +*GROUPS +# Group name is 30 characters max +group_tm LMID=cluster3 GRPNO=1 TMSNAME=DBRM TMSCOUNT=2 OPENINFO="BERKELEY-DB:$RUN/data" + +*SERVERS +DEFAULT: + CLOPT="-A" + +# Server name is 78 characters max (same for any pathname) +server1 SRVGRP=group_tm SRVID=1 MAXGEN=3 RESTART=Y +server2 SRVGRP=group_tm SRVID=2 MAXGEN=3 RESTART=Y + +*SERVICES +# Service name is 15 characters max +# server1 +TestTxn1 +# server2 +TestTxn2 +END_OF_UBB_FILE + tmloadcf -y $RUN/config/ubb.cfg diff --git a/util/db_archive.c b/util/db_archive.c new file mode 100644 index 00000000..a643c194 --- /dev/null +++ b/util/db_archive.c @@ -0,0 +1,185 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +int main __P((int, char *[])); +int usage __P((void)); +int version_check __P((void)); + +const char *progname; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB_ENV *dbenv; + u_int32_t flags; + int ch, exitval, ret, verbose; + char **file, *home, **list, *passwd; + + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + if ((ret = version_check()) != 0) + return (ret); + + dbenv = NULL; + flags = 0; + exitval = verbose = 0; + home = passwd = NULL; + file = list = NULL; + while ((ch = getopt(argc, argv, "adh:lP:sVv")) != EOF) + switch (ch) { + case 'a': + LF_SET(DB_ARCH_ABS); + break; + case 'd': + LF_SET(DB_ARCH_REMOVE); + break; + case 'h': + home = optarg; + break; + case 'l': + LF_SET(DB_ARCH_LOG); + break; + case 'P': + passwd = strdup(optarg); + memset(optarg, 0, strlen(optarg)); + if (passwd == NULL) { + fprintf(stderr, DB_STR_A("5119", + "%s: strdup: %s\n", "%s %s\n"), + progname, strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 's': + LF_SET(DB_ARCH_DATA); + break; + case 'V': + printf("%s\n", db_version(NULL, NULL, NULL)); + return (EXIT_SUCCESS); + case 'v': + /* + * !!! + * The verbose flag no longer actually does anything, + * but it's left rather than adding it back at some + * future date. + */ + verbose = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc != 0) + return (usage()); + + /* Handle possible interruptions. */ + __db_util_siginit(); + + /* + * Create an environment object and initialize it for error + * reporting. + */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + goto err; + } + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + if (passwd != NULL && (ret = dbenv->set_encrypt(dbenv, + passwd, DB_ENCRYPT_AES)) != 0) { + dbenv->err(dbenv, ret, "set_passwd"); + goto err; + } + /* + * If attaching to a pre-existing environment fails, create a + * private one and try again. + */ + if ((ret = dbenv->open(dbenv, home, DB_USE_ENVIRON, 0)) != 0 && + (ret == DB_VERSION_MISMATCH || + (ret = dbenv->open(dbenv, home, DB_CREATE | + DB_INIT_LOG | DB_PRIVATE | DB_USE_ENVIRON, 0)) != 0)) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + goto err; + } + + /* Get the list of names. */ + if ((ret = dbenv->log_archive(dbenv, &list, flags)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->log_archive"); + goto err; + } + + /* Print the list of names. */ + if (list != NULL) { + for (file = list; *file != NULL; ++file) + printf("%s\n", *file); + free(list); + } + + if (0) { +err: exitval = 1; + } + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + } + + if (passwd != NULL) + free(passwd); + + /* Resend any caught signal. */ + __db_util_sigresend(); + + return (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +int +usage() +{ + (void)fprintf(stderr, + "usage: %s [-adlsVv] [-h home] [-P password]\n", progname); + return (EXIT_FAILURE); +} + +int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5120", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d\n"), progname, DB_VERSION_MAJOR, + DB_VERSION_MINOR, v_major, v_minor); + return (EXIT_FAILURE); + } + return (0); +} diff --git a/util/db_checkpoint.c b/util/db_checkpoint.c new file mode 100644 index 00000000..31bef6e5 --- /dev/null +++ b/util/db_checkpoint.c @@ -0,0 +1,240 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +int main __P((int, char *[])); +int usage __P((void)); +int version_check __P((void)); + +const char *progname; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB_ENV *dbenv; + time_t now; + long argval; + u_int32_t flags, kbytes, minutes, seconds; + int ch, exitval, once, ret, verbose; + char *home, *logfile, *passwd, time_buf[CTIME_BUFLEN]; + + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + if ((ret = version_check()) != 0) + return (ret); + + /* + * !!! + * Don't allow a fully unsigned 32-bit number, some compilers get + * upset and require it to be specified in hexadecimal and so on. + */ +#define MAX_UINT32_T 2147483647 + + dbenv = NULL; + kbytes = minutes = 0; + exitval = once = verbose = 0; + flags = 0; + home = logfile = passwd = NULL; + while ((ch = getopt(argc, argv, "1h:k:L:P:p:Vv")) != EOF) + switch (ch) { + case '1': + once = 1; + flags = DB_FORCE; + break; + case 'h': + home = optarg; + break; + case 'k': + if (__db_getlong(NULL, progname, + optarg, 1, (long)MAX_UINT32_T, &argval)) + return (EXIT_FAILURE); + kbytes = (u_int32_t)argval; + break; + case 'L': + logfile = optarg; + break; + case 'P': + passwd = strdup(optarg); + memset(optarg, 0, strlen(optarg)); + if (passwd == NULL) { + fprintf(stderr, DB_STR_A("5121", + "%s: strdup: %s\n", "%s %s"), + progname, strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 'p': + if (__db_getlong(NULL, progname, + optarg, 1, (long)MAX_UINT32_T, &argval)) + return (EXIT_FAILURE); + minutes = (u_int32_t)argval; + break; + case 'V': + printf("%s\n", db_version(NULL, NULL, NULL)); + return (EXIT_SUCCESS); + case 'v': + verbose = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc != 0) + return (usage()); + + if (once == 0 && kbytes == 0 && minutes == 0) { + (void)fprintf(stderr, DB_STR_A("5122", + "%s: at least one of -1, -k and -p must be specified\n", + "%s\n"), progname); + return (usage()); + } + + /* Handle possible interruptions. */ + __db_util_siginit(); + + /* Log our process ID. */ + if (logfile != NULL && __db_util_logset(progname, logfile)) + goto err; + + /* + * Create an environment object and initialize it for error + * reporting. + */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + goto err; + } + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + if (passwd != NULL && (ret = dbenv->set_encrypt(dbenv, + passwd, DB_ENCRYPT_AES)) != 0) { + dbenv->err(dbenv, ret, "set_passwd"); + goto err; + } + + /* + * If attaching to a pre-existing environment fails, create a + * private one and try again. Turn on DB_THREAD in case a repmgr + * application wants to do checkpointing using this utility: repmgr + * requires DB_THREAD for all env handles. + */ +#ifdef HAVE_REPLICATION_THREADS +#define ENV_FLAGS (DB_THREAD | DB_USE_ENVIRON) +#else +#define ENV_FLAGS DB_USE_ENVIRON +#endif + if ((ret = dbenv->open(dbenv, home, ENV_FLAGS, 0)) != 0 && + (!once || ret == DB_VERSION_MISMATCH || + (ret = dbenv->open(dbenv, home, + DB_CREATE | DB_INIT_TXN | DB_PRIVATE | DB_USE_ENVIRON, 0)) != 0)) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + goto err; + } + + /* + * If we have only a time delay, then we'll sleep the right amount + * to wake up when a checkpoint is necessary. If we have a "kbytes" + * field set, then we'll check every 30 seconds. + */ + seconds = kbytes != 0 ? 30 : minutes * 60; + while (!__db_util_interrupted()) { + if (verbose) { + (void)time(&now); + dbenv->errx(dbenv, DB_STR_A("5123", + "checkpoint begin: %s", "%s"), + __os_ctime(&now, time_buf)); + } + + if ((ret = dbenv->txn_checkpoint(dbenv, + kbytes, minutes, flags)) != 0) { + dbenv->err(dbenv, ret, "txn_checkpoint"); + goto err; + } + + if (verbose) { + (void)time(&now); + dbenv->errx(dbenv, DB_STR_A("5124", + "checkpoint complete: %s", "%s"), + __os_ctime(&now, time_buf)); + } + + if (once) + break; + + __os_yield(dbenv->env, seconds, 0); + } + + if (0) { +err: exitval = 1; + } + + /* Clean up the logfile. */ + if (logfile != NULL) + (void)remove(logfile); + + /* Clean up the environment. */ + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + } + + if (passwd != NULL) + free(passwd); + + /* Resend any caught signal. */ + __db_util_sigresend(); + + return (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +int +usage() +{ + (void)fprintf(stderr, "usage: %s [-1Vv]\n\t%s\n", progname, + "[-h home] [-k kbytes] [-L file] [-P password] [-p min]"); + return (EXIT_FAILURE); +} + +int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5125", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d\n"), progname, DB_VERSION_MAJOR, + DB_VERSION_MINOR, v_major, v_minor); + return (EXIT_FAILURE); + } + return (0); +} diff --git a/util/db_deadlock.c b/util/db_deadlock.c new file mode 100644 index 00000000..f3c852cb --- /dev/null +++ b/util/db_deadlock.c @@ -0,0 +1,237 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +int main __P((int, char *[])); +int usage __P((void)); +int version_check __P((void)); + +const char *progname; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB_ENV *dbenv; + u_int32_t atype; + time_t now; + u_long secs, usecs; + int rejected, ch, exitval, ret, verbose; + char *home, *logfile, *passwd, *str, time_buf[CTIME_BUFLEN]; + + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + if ((ret = version_check()) != 0) + return (ret); + + dbenv = NULL; + atype = DB_LOCK_DEFAULT; + home = logfile = passwd = NULL; + secs = usecs = 0; + exitval = verbose = 0; + while ((ch = getopt(argc, argv, "a:h:L:P:t:Vv")) != EOF) + switch (ch) { + case 'a': + switch (optarg[0]) { + case 'e': + atype = DB_LOCK_EXPIRE; + break; + case 'm': + atype = DB_LOCK_MAXLOCKS; + break; + case 'n': + atype = DB_LOCK_MINLOCKS; + break; + case 'o': + atype = DB_LOCK_OLDEST; + break; + case 'W': + atype = DB_LOCK_MAXWRITE; + break; + case 'w': + atype = DB_LOCK_MINWRITE; + break; + case 'y': + atype = DB_LOCK_YOUNGEST; + break; + default: + return (usage()); + /* NOTREACHED */ + } + if (optarg[1] != '\0') + return (usage()); + break; + case 'h': + home = optarg; + break; + case 'L': + logfile = optarg; + break; + case 'P': + passwd = strdup(optarg); + memset(optarg, 0, strlen(optarg)); + if (passwd == NULL) { + fprintf(stderr, DB_STR_A("5100", + "%s: strdup: %s\n", "%s %s\n"), + progname, strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 't': + if ((str = strchr(optarg, '.')) != NULL) { + *str++ = '\0'; + if (*str != '\0' && __db_getulong( + NULL, progname, str, 0, LONG_MAX, &usecs)) + return (EXIT_FAILURE); + } + if (*optarg != '\0' && __db_getulong( + NULL, progname, optarg, 0, LONG_MAX, &secs)) + return (EXIT_FAILURE); + if (secs == 0 && usecs == 0) + return (usage()); + + break; + case 'V': + printf("%s\n", db_version(NULL, NULL, NULL)); + return (EXIT_SUCCESS); + case 'v': + verbose = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc != 0) + return (usage()); + + /* Handle possible interruptions. */ + __db_util_siginit(); + + /* Log our process ID. */ + if (logfile != NULL && __db_util_logset(progname, logfile)) + goto err; + + /* + * Create an environment object and initialize it for error + * reporting. + */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + goto err; + } + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + if (passwd != NULL && (ret = dbenv->set_encrypt(dbenv, + passwd, DB_ENCRYPT_AES)) != 0) { + dbenv->err(dbenv, ret, "set_passwd"); + goto err; + } + + if (verbose) { + (void)dbenv->set_verbose(dbenv, DB_VERB_DEADLOCK, 1); + (void)dbenv->set_verbose(dbenv, DB_VERB_WAITSFOR, 1); + } + + /* An environment is required. */ + if ((ret = dbenv->open(dbenv, home, DB_USE_ENVIRON, 0)) != 0) { + dbenv->err(dbenv, ret, DB_STR("5101", "open")); + goto err; + } + + while (!__db_util_interrupted()) { + if (verbose) { + (void)time(&now); + dbenv->errx(dbenv, DB_STR_A("5102", + "running at %.24s", "%.24s"), + __os_ctime(&now, time_buf)); + } + + if ((ret = + dbenv->lock_detect(dbenv, 0, atype, &rejected)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->lock_detect"); + goto err; + } + if (verbose) + dbenv->errx(dbenv, DB_STR_A("5103", + "rejected %d locks", "%d"), rejected); + + /* Make a pass every "secs" secs and "usecs" usecs. */ + if (secs == 0 && usecs == 0) + break; + __os_yield(dbenv->env, secs, usecs); + } + + if (0) { +err: exitval = 1; + } + + /* Clean up the logfile. */ + if (logfile != NULL) + (void)remove(logfile); + + /* Clean up the environment. */ + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + } + + if (passwd != NULL) + free(passwd); + + /* Resend any caught signal. */ + __db_util_sigresend(); + + return (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +int +usage() +{ + (void)fprintf(stderr, + "usage: %s [-Vv] [-a e | m | n | o | W | w | y]\n\t%s\n", progname, + "[-h home] [-L file] [-P password] [-t sec.usec]"); + return (EXIT_FAILURE); +} + +int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5104", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d\n"), progname, DB_VERSION_MAJOR, + DB_VERSION_MINOR, v_major, v_minor); + return (EXIT_FAILURE); + } + return (0); +} diff --git a/util/db_dump.c b/util/db_dump.c new file mode 100644 index 00000000..b901a854 --- /dev/null +++ b/util/db_dump.c @@ -0,0 +1,528 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +int db_init __P((DB_ENV *, char *, int, u_int32_t, int *)); +int dump_sub __P((DB_ENV *, DB *, char *, int, int)); +int main __P((int, char *[])); +int show_subs __P((DB *)); +int usage __P((void)); +int version_check __P((void)); + +const char *progname; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB_ENV *dbenv; + DB *dbp; + db_pgno_t first, last; + u_int32_t cache; + int ch; + int exitval, keyflag, lflag, mflag, nflag, pflag, sflag, private; + int ret, Rflag, rflag, resize; + char *data_len, *dbname, *dopt, *filename, *home, *passwd; + + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + if ((ret = version_check()) != 0) + return (ret); + + dbenv = NULL; + dbp = NULL; + exitval = lflag = mflag = nflag = pflag = rflag = Rflag = sflag = 0; + first = last = PGNO_INVALID; + keyflag = 0; + cache = MEGABYTE; + private = 0; + data_len = dbname = dopt = filename = home = passwd = NULL; + while ((ch = getopt(argc, argv, "d:D:f:F:h:klL:m:NpP:rRs:V")) != EOF) + switch (ch) { + case 'd': + dopt = optarg; + break; + case 'D': + data_len = optarg; + break; + case 'f': + if (freopen(optarg, "w", stdout) == NULL) { + fprintf(stderr, DB_STR_A("5108", + "%s: %s: reopen: %s\n", "%s %s %s\n"), + progname, optarg, strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 'F': + first = (db_pgno_t)strtoul(optarg, NULL, 10); + break; + case 'h': + home = optarg; + break; + case 'k': + keyflag = 1; + break; + case 'l': + lflag = 1; + break; + case 'L': + last = (db_pgno_t)strtoul(optarg, NULL, 10); + break; + case 'm': + mflag = 1; + dbname = optarg; + break; + case 'N': + nflag = 1; + break; + case 'P': + passwd = strdup(optarg); + memset(optarg, 0, strlen(optarg)); + if (passwd == NULL) { + fprintf(stderr, DB_STR_A("5109", + "%s: strdup: %s\n", "%s %s\n"), + progname, strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 'p': + pflag = 1; + break; + case 's': + sflag = 1; + dbname = optarg; + break; + case 'R': + Rflag = 1; + /* DB_AGGRESSIVE requires DB_SALVAGE */ + /* FALLTHROUGH */ + case 'r': + rflag = 1; + break; + case 'V': + printf("%s\n", db_version(NULL, NULL, NULL)); + return (EXIT_SUCCESS); + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + /* + * A file name must be specified, unless we're looking for an in-memory + * db, in which case it must not. + */ + if (argc == 0 && mflag) + filename = NULL; + else if (argc == 1 && !mflag) + filename = argv[0]; + else + return (usage()); + + if (dopt != NULL && pflag) { + fprintf(stderr, DB_STR_A("5110", + "%s: the -d and -p options may not both be specified\n", + "%s\n"), progname); + return (EXIT_FAILURE); + } + if (lflag && sflag) { + fprintf(stderr, DB_STR_A("5111", + "%s: the -l and -s options may not both be specified\n", + "%s\n"), progname); + return (EXIT_FAILURE); + } + if ((lflag || sflag) && mflag) { + fprintf(stderr, DB_STR_A("5112", + "%s: the -m option may not be specified with -l or -s\n", + "%s\n"), progname); + return (EXIT_FAILURE); + } + + if (keyflag && rflag) { + fprintf(stderr, DB_STR_A("5113", + "%s: the -k and -r or -R options may not both be specified\n", + "%s\n"), progname); + return (EXIT_FAILURE); + } + + if ((mflag || sflag) && rflag) { + fprintf(stderr, DB_STR_A("5114", + "%s: the -r or R options may not be specified with -m or -s\n", + "%s\n"), progname); + return (EXIT_FAILURE); + } + + /* Handle possible interruptions. */ + __db_util_siginit(); + + /* + * Create an environment object and initialize it for error + * reporting. + */ +retry: if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + goto err; + } + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + if (data_len != NULL) + (void)dbenv->set_data_len(dbenv, (u_int32_t)atol(data_len)); + + if (nflag) { + if ((ret = dbenv->set_flags(dbenv, DB_NOLOCKING, 1)) != 0) { + dbenv->err(dbenv, ret, "set_flags: DB_NOLOCKING"); + goto err; + } + if ((ret = dbenv->set_flags(dbenv, DB_NOPANIC, 1)) != 0) { + dbenv->err(dbenv, ret, "set_flags: DB_NOPANIC"); + goto err; + } + } + if (passwd != NULL && (ret = dbenv->set_encrypt(dbenv, + passwd, DB_ENCRYPT_AES)) != 0) { + dbenv->err(dbenv, ret, "set_passwd"); + goto err; + } + + /* Initialize the environment. */ + if (db_init(dbenv, home, rflag, cache, &private) != 0) + goto err; + + /* Create the DB object and open the file. */ + if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + goto err; + } + +#if 0 + Set application-specific btree compression functions here. For example: + if ((ret = dbp->set_bt_compress( + dbp, local_compress_func, local_decompress_func)) != 0) { + dbp->err(dbp, ret, "DB->set_bt_compress"); + goto err; + } +#endif + + /* + * If we're salvaging, don't do an open; it might not be safe. + * Dispatch now into the salvager. + */ + if (rflag) { + /* The verify method is a destructor. */ + ret = dbp->verify(dbp, filename, NULL, stdout, + DB_SALVAGE | + (Rflag ? DB_AGGRESSIVE : 0) | + (pflag ? DB_PRINTABLE : 0)); + dbp = NULL; + if (ret != 0) + goto err; + goto done; + } + + if ((ret = dbp->open(dbp, NULL, + filename, dbname, DB_UNKNOWN, DB_RDWRMASTER|DB_RDONLY, 0)) != 0) { + dbp->err(dbp, ret, DB_STR_A("5115", "open: %s", "%s"), + filename == NULL ? dbname : filename); + goto err; + } + if (private != 0) { + if ((ret = __db_util_cache(dbp, &cache, &resize)) != 0) + goto err; + if (resize) { + (void)dbp->close(dbp, 0); + dbp = NULL; + + (void)dbenv->close(dbenv, 0); + dbenv = NULL; + goto retry; + } + } + + if (dopt != NULL) { + if ((ret = + __db_dumptree(dbp, NULL, dopt, NULL, first, last)) != 0) { + dbp->err(dbp, ret, "__db_dumptree: %s", filename); + goto err; + } + } else if (lflag) { + if (dbp->get_multiple(dbp)) { + if (show_subs(dbp)) + goto err; + } else { + dbp->errx(dbp, DB_STR_A("5116", + "%s: does not contain multiple databases", "%s"), + filename); + goto err; + } + } else { + if (dbname == NULL && dbp->get_multiple(dbp)) { + if (dump_sub(dbenv, dbp, filename, pflag, keyflag)) + goto err; + } else + if (dbp->dump(dbp, NULL, + __db_pr_callback, stdout, pflag, keyflag)) + goto err; + } + + if (0) { +err: exitval = 1; + } +done: if (dbp != NULL && (ret = dbp->close(dbp, 0)) != 0) { + exitval = 1; + dbenv->err(dbenv, ret, DB_STR("5117", "close")); + } + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + } + + if (passwd != NULL) + free(passwd); + + /* Resend any caught signal. */ + __db_util_sigresend(); + + return (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* + * db_init -- + * Initialize the environment. + */ +int +db_init(dbenv, home, is_salvage, cache, is_privatep) + DB_ENV *dbenv; + char *home; + int is_salvage; + u_int32_t cache; + int *is_privatep; +{ + int ret; + + /* + * Try and use the underlying environment when opening a database. + * We wish to use the buffer pool so our information is as up-to-date + * as possible, even if the mpool cache hasn't been flushed. + * + * If we are not doing a salvage, we want to join the environment; + * if a locking system is present, this will let us use it and be + * safe to run concurrently with other threads of control. (We never + * need to use transactions explicitly, as we're read-only.) Note + * that in CDB, too, this will configure our environment + * appropriately, and our cursors will (correctly) do locking as CDB + * read cursors. + * + * If we are doing a salvage, the verification code will protest + * if we initialize transactions, logging, or locking; do an + * explicit DB_INIT_MPOOL to try to join any existing environment + * before we create our own. + */ + *is_privatep = 0; + if ((ret = dbenv->open(dbenv, home, + DB_USE_ENVIRON | (is_salvage ? DB_INIT_MPOOL : 0), 0)) == 0) + return (0); + if (ret == DB_VERSION_MISMATCH) + goto err; + + /* + * An environment is required because we may be trying to look at + * databases in directories other than the current one. We could + * avoid using an environment iff the -h option wasn't specified, + * but that seems like more work than it's worth. + * + * No environment exists (or, at least no environment that includes + * an mpool region exists). Create one, but make it private so that + * no files are actually created. + */ + *is_privatep = 1; + if ((ret = dbenv->set_cachesize(dbenv, 0, cache, 1)) == 0 && + (ret = dbenv->open(dbenv, home, + DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE | DB_USE_ENVIRON, 0)) == 0) + return (0); + + /* An environment is required. */ +err: dbenv->err(dbenv, ret, "DB_ENV->open"); + return (1); +} + +/* + * dump_sub -- + * Dump out the records for a DB containing subdatabases. + */ +int +dump_sub(dbenv, parent_dbp, parent_name, pflag, keyflag) + DB_ENV *dbenv; + DB *parent_dbp; + char *parent_name; + int pflag, keyflag; +{ + DB *dbp; + DBC *dbcp; + DBT key, data; + int ret; + char *subdb; + + /* + * Get a cursor and step through the database, dumping out each + * subdatabase. + */ + if ((ret = parent_dbp->cursor(parent_dbp, NULL, &dbcp, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->cursor"); + return (1); + } + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + while ((ret = dbcp->get(dbcp, &key, &data, + DB_IGNORE_LEASE | DB_NEXT)) == 0) { + /* Nul terminate the subdatabase name. */ + if ((subdb = malloc(key.size + 1)) == NULL) { + dbenv->err(dbenv, ENOMEM, NULL); + return (1); + } + memcpy(subdb, key.data, key.size); + subdb[key.size] = '\0'; + + /* Create the DB object and open the file. */ + if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + free(subdb); + return (1); + } + +#if 0 + Set application-specific btree compression functions here. + For example: + + if ((ret = dbp->set_bt_compress( + dbp, local_compress_func, local_decompress_func)) != 0) { + dbp->err(dbp, ret, "DB->set_bt_compress"); + goto err; + } +#endif + + if ((ret = dbp->open(dbp, NULL, + parent_name, subdb, DB_UNKNOWN, DB_RDONLY, 0)) != 0) + dbp->err(dbp, ret, + "DB->open: %s:%s", parent_name, subdb); + if (ret == 0 && dbp->dump( + dbp, subdb, __db_pr_callback, stdout, pflag, keyflag)) + ret = 1; + (void)dbp->close(dbp, 0); + free(subdb); + if (ret != 0) + return (1); + } + if (ret != DB_NOTFOUND) { + parent_dbp->err(parent_dbp, ret, "DBcursor->get"); + return (1); + } + + if ((ret = dbcp->close(dbcp)) != 0) { + parent_dbp->err(parent_dbp, ret, "DBcursor->close"); + return (1); + } + + return (0); +} + +/* + * show_subs -- + * Display the subdatabases for a database. + */ +int +show_subs(dbp) + DB *dbp; +{ + DBC *dbcp; + DBT key, data; + int ret; + + /* + * Get a cursor and step through the database, printing out the key + * of each key/data pair. + */ + if ((ret = dbp->cursor(dbp, NULL, &dbcp, 0)) != 0) { + dbp->err(dbp, ret, "DB->cursor"); + return (1); + } + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + while ((ret = dbcp->get(dbcp, &key, &data, + DB_IGNORE_LEASE | DB_NEXT)) == 0) { + if ((ret = dbp->dbenv->prdbt( + &key, 1, NULL, stdout, __db_pr_callback, 0, 0)) != 0) { + dbp->errx(dbp, NULL); + return (1); + } + } + if (ret != DB_NOTFOUND) { + dbp->err(dbp, ret, "DBcursor->get"); + return (1); + } + + if ((ret = dbcp->close(dbcp)) != 0) { + dbp->err(dbp, ret, "DBcursor->close"); + return (1); + } + return (0); +} + +/* + * usage -- + * Display the usage message. + */ +int +usage() +{ + (void)fprintf(stderr, "usage: %s [-klNprRV]\n\t%s\n", + progname, + "[-d ahr] [-f output] [-h home] [-P password] [-s database] db_file"); + (void)fprintf(stderr, "usage: %s [-kNpV] %s\n", + progname, "[-d ahr] [-f output] [-h home] -m database"); + return (EXIT_FAILURE); +} + +int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5118", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d\n"), progname, + DB_VERSION_MAJOR, DB_VERSION_MINOR, + v_major, v_minor); + return (EXIT_FAILURE); + } + return (0); +} diff --git a/util/db_dump185.c b/util/db_dump185.c new file mode 100644 index 00000000..49bceb95 --- /dev/null +++ b/util/db_dump185.c @@ -0,0 +1,358 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_DB_185_H +#include +#else +#include +#endif + +/* Hash Table Information */ +typedef struct hashhdr185 { /* Disk resident portion */ + int magic; /* Magic NO for hash tables */ + int version; /* Version ID */ + u_int32_t lorder; /* Byte Order */ + int bsize; /* Bucket/Page Size */ + int bshift; /* Bucket shift */ + int dsize; /* Directory Size */ + int ssize; /* Segment Size */ + int sshift; /* Segment shift */ + int ovfl_point; /* Where overflow pages are being + * allocated */ + int last_freed; /* Last overflow page freed */ + int max_bucket; /* ID of Maximum bucket in use */ + int high_mask; /* Mask to modulo into entire table */ + int low_mask; /* Mask to modulo into lower half of + * table */ + int ffactor; /* Fill factor */ + int nkeys; /* Number of keys in hash table */ +} HASHHDR185; +typedef struct htab185 { /* Memory resident data structure */ + HASHHDR185 hdr; /* Header */ +} HTAB185; + +/* Hash Table Information */ +typedef struct hashhdr186 { /* Disk resident portion */ + int32_t magic; /* Magic NO for hash tables */ + int32_t version; /* Version ID */ + int32_t lorder; /* Byte Order */ + int32_t bsize; /* Bucket/Page Size */ + int32_t bshift; /* Bucket shift */ + int32_t ovfl_point; /* Where overflow pages are being allocated */ + int32_t last_freed; /* Last overflow page freed */ + int32_t max_bucket; /* ID of Maximum bucket in use */ + int32_t high_mask; /* Mask to modulo into entire table */ + int32_t low_mask; /* Mask to modulo into lower half of table */ + int32_t ffactor; /* Fill factor */ + int32_t nkeys; /* Number of keys in hash table */ + int32_t hdrpages; /* Size of table header */ + int32_t h_charkey; /* value of hash(CHARKEY) */ +#define NCACHED 32 /* number of bit maps and spare points */ + int32_t spares[NCACHED];/* spare pages for overflow */ + /* address of overflow page bitmaps */ + u_int16_t bitmaps[NCACHED]; +} HASHHDR186; +typedef struct htab186 { /* Memory resident data structure */ + void *unused[2]; + HASHHDR186 hdr; /* Header */ +} HTAB186; + +typedef struct _epgno { + u_int32_t pgno; /* the page number */ + u_int16_t index; /* the index on the page */ +} EPGNO; + +typedef struct _epg { + void *page; /* the (pinned) page */ + u_int16_t index; /* the index on the page */ +} EPG; + +typedef struct _cursor { + EPGNO pg; /* B: Saved tree reference. */ + DBT key; /* B: Saved key, or key.data == NULL. */ + u_int32_t rcursor; /* R: recno cursor (1-based) */ + +#define CURS_ACQUIRE 0x01 /* B: Cursor needs to be reacquired. */ +#define CURS_AFTER 0x02 /* B: Unreturned cursor after key. */ +#define CURS_BEFORE 0x04 /* B: Unreturned cursor before key. */ +#define CURS_INIT 0x08 /* RB: Cursor initialized. */ + u_int8_t flags; +} CURSOR; + +/* The in-memory btree/recno data structure. */ +typedef struct _btree { + void *bt_mp; /* memory pool cookie */ + + void *bt_dbp; /* pointer to enclosing DB */ + + EPG bt_cur; /* current (pinned) page */ + void *bt_pinned; /* page pinned across calls */ + + CURSOR bt_cursor; /* cursor */ + + EPGNO bt_stack[50]; /* stack of parent pages */ + EPGNO *bt_sp; /* current stack pointer */ + + DBT bt_rkey; /* returned key */ + DBT bt_rdata; /* returned data */ + + int bt_fd; /* tree file descriptor */ + + u_int32_t bt_free; /* next free page */ + u_int32_t bt_psize; /* page size */ + u_int16_t bt_ovflsize; /* cut-off for key/data overflow */ + int bt_lorder; /* byte order */ + /* sorted order */ + enum { NOT, BACK, FORWARD } bt_order; + EPGNO bt_last; /* last insert */ + + /* B: key comparison function */ + int (*bt_cmp) __P((DBT *, DBT *)); + /* B: prefix comparison function */ + size_t (*bt_pfx) __P((DBT *, DBT *)); + /* R: recno input function */ + int (*bt_irec) __P((struct _btree *, u_int32_t)); + + FILE *bt_rfp; /* R: record FILE pointer */ + int bt_rfd; /* R: record file descriptor */ + + void *bt_cmap; /* R: current point in mapped space */ + void *bt_smap; /* R: start of mapped space */ + void *bt_emap; /* R: end of mapped space */ + size_t bt_msize; /* R: size of mapped region. */ + + u_int32_t bt_nrecs; /* R: number of records */ + size_t bt_reclen; /* R: fixed record length */ + u_char bt_bval; /* R: delimiting byte/pad character */ + +/* + * NB: + * B_NODUPS and R_RECNO are stored on disk, and may not be changed. + */ +#define B_INMEM 0x00001 /* in-memory tree */ +#define B_METADIRTY 0x00002 /* need to write metadata */ +#define B_MODIFIED 0x00004 /* tree modified */ +#define B_NEEDSWAP 0x00008 /* if byte order requires swapping */ +#define B_RDONLY 0x00010 /* read-only tree */ + +#define B_NODUPS 0x00020 /* no duplicate keys permitted */ +#define R_RECNO 0x00080 /* record oriented tree */ + +#define R_CLOSEFP 0x00040 /* opened a file pointer */ +#define R_EOF 0x00100 /* end of input file reached. */ +#define R_FIXLEN 0x00200 /* fixed length records */ +#define R_MEMMAPPED 0x00400 /* memory mapped file. */ +#define R_INMEM 0x00800 /* in-memory file */ +#define R_MODIFIED 0x01000 /* modified file */ +#define R_RDONLY 0x02000 /* read-only file */ + +#define B_DB_LOCK 0x04000 /* DB_LOCK specified. */ +#define B_DB_SHMEM 0x08000 /* DB_SHMEM specified. */ +#define B_DB_TXN 0x10000 /* DB_TXN specified. */ + u_int32_t flags; +} BTREE; + +void db_btree __P((DB *, int)); +void db_hash __P((DB *, int)); +void dbt_dump __P((DBT *)); +void dbt_print __P((DBT *)); +int main __P((int, char *[])); +int usage __P((void)); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB *dbp; + DBT key, data; + int ch, pflag, rval; + + pflag = 0; + while ((ch = getopt(argc, argv, "f:p")) != EOF) + switch (ch) { + case 'f': + if (freopen(optarg, "w", stdout) == NULL) { + fprintf(stderr, "db_dump185: %s: %s\n", + optarg, strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 'p': + pflag = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc != 1) + return (usage()); + + if ((dbp = dbopen(argv[0], O_RDONLY, 0, DB_BTREE, NULL)) == NULL) { + if ((dbp = + dbopen(argv[0], O_RDONLY, 0, DB_HASH, NULL)) == NULL) { + fprintf(stderr, + "db_dump185: %s: %s\n", argv[0], strerror(errno)); + return (EXIT_FAILURE); + } + db_hash(dbp, pflag); + } else + db_btree(dbp, pflag); + + /* + * !!! + * DB 1.85 DBTs are a subset of DB 2.0 DBTs, so we just use the + * new dump/print routines. + */ + if (pflag) + while (!(rval = dbp->seq(dbp, &key, &data, R_NEXT))) { + dbt_print(&key); + dbt_print(&data); + } + else + while (!(rval = dbp->seq(dbp, &key, &data, R_NEXT))) { + dbt_dump(&key); + dbt_dump(&data); + } + + if (rval == -1) { + fprintf(stderr, "db_dump185: seq: %s\n", strerror(errno)); + return (EXIT_FAILURE); + } + return (EXIT_SUCCESS); +} + +/* + * db_hash -- + * Dump out hash header information. + */ +void +db_hash(dbp, pflag) + DB *dbp; + int pflag; +{ + HTAB185 *hash185p; + HTAB186 *hash186p; + + printf("format=%s\n", pflag ? "print" : "bytevalue"); + printf("type=hash\n"); + + /* DB 1.85 was version 2, DB 1.86 was version 3. */ + hash185p = dbp->internal; + if (hash185p->hdr.version > 2) { + hash186p = dbp->internal; + printf("h_ffactor=%lu\n", (u_long)hash186p->hdr.ffactor); + if (hash186p->hdr.lorder != 0) + printf("db_lorder=%lu\n", (u_long)hash186p->hdr.lorder); + printf("db_pagesize=%lu\n", (u_long)hash186p->hdr.bsize); + } else { + printf("h_ffactor=%lu\n", (u_long)hash185p->hdr.ffactor); + if (hash185p->hdr.lorder != 0) + printf("db_lorder=%lu\n", (u_long)hash185p->hdr.lorder); + printf("db_pagesize=%lu\n", (u_long)hash185p->hdr.bsize); + } + printf("HEADER=END\n"); +} + +/* + * db_btree -- + * Dump out btree header information. + */ +void +db_btree(dbp, pflag) + DB *dbp; + int pflag; +{ + BTREE *btp; + + btp = dbp->internal; + + printf("format=%s\n", pflag ? "print" : "bytevalue"); + printf("type=btree\n"); +#ifdef NOT_AVAILABLE_IN_185 + printf("bt_minkey=%lu\n", (u_long)XXX); + printf("bt_maxkey=%lu\n", (u_long)XXX); +#endif + if (btp->bt_lorder != 0) + printf("db_lorder=%lu\n", (u_long)btp->bt_lorder); + printf("db_pagesize=%lu\n", (u_long)btp->bt_psize); + if (!(btp->flags & B_NODUPS)) + printf("duplicates=1\n"); + printf("HEADER=END\n"); +} + +static char hex[] = "0123456789abcdef"; + +/* + * dbt_dump -- + * Write out a key or data item using byte values. + */ +void +dbt_dump(dbtp) + DBT *dbtp; +{ + size_t len; + u_int8_t *p; + + for (len = dbtp->size, p = dbtp->data; len--; ++p) + (void)printf("%c%c", + hex[(*p & 0xf0) >> 4], hex[*p & 0x0f]); + printf("\n"); +} + +/* + * dbt_print -- + * Write out a key or data item using printable characters. + */ +void +dbt_print(dbtp) + DBT *dbtp; +{ + size_t len; + u_int8_t *p; + + for (len = dbtp->size, p = dbtp->data; len--; ++p) + if (isprint((int)*p)) { + if (*p == '\\') + (void)printf("\\"); + (void)printf("%c", *p); + } else + (void)printf("\\%c%c", + hex[(*p & 0xf0) >> 4], hex[*p & 0x0f]); + printf("\n"); +} + +/* + * usage -- + * Display the usage message. + */ +int +usage() +{ + (void)fprintf(stderr, "usage: db_dump185 [-p] [-f file] db_file\n"); + return (EXIT_FAILURE); +} diff --git a/util/db_hotbackup.c b/util/db_hotbackup.c new file mode 100644 index 00000000..8b976f30 --- /dev/null +++ b/util/db_hotbackup.c @@ -0,0 +1,1158 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/log.h" +#include "dbinc/db_page.h" +#include "dbinc/qam.h" +#include "dbinc/partition.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +enum which_open { OPEN_ORIGINAL, OPEN_HOT_BACKUP }; + +int backup_dir_clean __P((DB_ENV *, char *, char *, int *, int, int)); +int data_copy __P((DB_ENV *, char *, char *, char *, int, int)); +int env_data_copy __P((DB_ENV *, char *, char *, int, const char *)); +int env_init __P((DB_ENV **, + char *, char **, char ***, char *, enum which_open, int)); +int main __P((int, char *[])); +int read_log_dir __P((DB_ENV *, char *, int, char *, char *, int *, int, int)); +int read_data_dir __P((DB_ENV *, + char *, char *, char *, int, int, int, int, const char *)); +void save_error __P((const DB_ENV *, const char *, const char *)); +int usage __P((void)); +int version_check __P((void)); + +const char *progname; + +/* + * For backporting. + */ +#if (DB_VERSION_MAJOR < 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR < 2)) +#define DB_STR_A(code, string, fmt) string +#define PART_PREFIX "__dbp." +#ifndef DB_WIN32 +#include "db_copy.c" +#endif +#endif + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + time_t now; + DB_ENV *dbenv; + u_int data_cnt, data_next; + int ch, checkpoint, copy_min, db_config, debug, env_copy, exitval; + int remove_max, ret, update, verbose, hotbackup_flag_set, relative; + char *backup_dir, **data_dir, **dir, *home, *log_dir, *passwd; + char home_buf[DB_MAXPATHLEN], time_buf[CTIME_BUFLEN]; + u_int32_t flags; + + /* + * Make sure all verbose message are output before any error messages + * in the case where the output is being logged into a file. This + * call has to be done before any operation is performed on the stream. + * + * Use unbuffered I/O because line-buffered I/O requires a buffer, and + * some operating systems have buffer alignment and size constraints we + * don't want to care about. There isn't enough output for the calls + * to matter. + */ + setbuf(stdout, NULL); + + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + if ((ret = version_check()) != 0) + return (ret); + + /* We default to the safe environment copy. */ + env_copy = 1; + + checkpoint = db_config = data_cnt = data_next = debug = + exitval = update = verbose = hotbackup_flag_set = 0; + data_dir = NULL; + backup_dir = home = passwd = NULL; + log_dir = NULL; + copy_min = remove_max = 0; + while ((ch = getopt(argc, argv, "b:cDd:Fgh:l:P:uVv")) != EOF) + switch (ch) { + case 'b': + backup_dir = optarg; + break; + case 'c': + checkpoint = 1; + break; + case 'D': + db_config = 1; + break; + case 'd': + /* + * User can specify a list of directories -- keep an + * array, leaving room for the trailing NULL. + */ + if (data_dir == NULL || data_next >= data_cnt - 2) { + data_cnt = data_cnt == 0 ? 20 : data_cnt * 2; + if ((data_dir = realloc(data_dir, + data_cnt * sizeof(*data_dir))) == NULL) { + fprintf(stderr, "%s: %s\n", + progname, strerror(errno)); + return (EXIT_FAILURE); + } + } + data_dir[data_next++] = optarg; + break; + case 'F': + /* The default is to use environment copy. */ + env_copy = 0; + break; + case 'g': + debug = 1; + break; + case 'h': + home = optarg; + break; + case 'l': + log_dir = optarg; + break; + case 'P': + passwd = strdup(optarg); + memset(optarg, 0, strlen(optarg)); + if (passwd == NULL) { + fprintf(stderr, DB_STR_A("5026", + "%s: strdup: %s\n", "%s %s\n"), + progname, strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 'u': + update = 1; + break; + case 'V': + printf("%s\n", db_version(NULL, NULL, NULL)); + return (EXIT_SUCCESS); + case 'v': + verbose = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc != 0) + return (usage()); + + /* NULL-terminate any list of data directories. */ + if (data_dir != NULL) { + data_dir[data_next] = NULL; + /* + * -d is relative to the current directory, to run a checkpoint + * we must have directories relative to the environment. + */ + if (checkpoint == 1) { + fprintf(stderr, DB_STR_A("5027", + "%s: cannot specify -d and -c\n", "%s\n"), + progname); + return (usage()); + } + } + + if (db_config && (data_dir != NULL || log_dir != NULL)) { + fprintf(stderr, DB_STR_A("5028", + "%s: cannot specify -D and -d or -l\n", "%s\n"), progname); + return (usage()); + } + + /* Handle possible interruptions. */ + __db_util_siginit(); + + /* + * The home directory defaults to the environment variable DB_HOME. + * The log directory defaults to the home directory. + * + * We require a source database environment directory and a target + * backup directory. + */ + if (home == NULL) { + home = home_buf; + if ((ret = __os_getenv( + NULL, "DB_HOME", &home, sizeof(home_buf))) != 0) { + fprintf(stderr, DB_STR_A("5029", + "%s failed to get environment variable DB_HOME: %s\n", + "%s %s\n"), progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + /* + * home set to NULL if __os_getenv failed to find DB_HOME. + */ + } + if (home == NULL) { + fprintf(stderr, DB_STR_A("5030", + "%s: no source database environment specified\n", + "%s\n"), progname); + return (usage()); + } + if (backup_dir == NULL) { + fprintf(stderr, DB_STR_A("5031", + "%s: no target backup directory specified\n", "%s\n"), + progname); + return (usage()); + } + + if (verbose) { + (void)time(&now); + printf(DB_STR_A("5032", "%s: hot backup started at %s", + "%s %s"), progname, __os_ctime(&now, time_buf)); + } + + /* Open the source environment. */ + if (env_init(&dbenv, home, + (db_config || log_dir != NULL) ? &log_dir : NULL, + db_config ? &data_dir : NULL, + passwd, OPEN_ORIGINAL, verbose) != 0) + goto err; + + if (env_copy) { + if ((ret = dbenv->get_open_flags(dbenv, &flags)) != 0) + goto err; + if (flags & DB_PRIVATE) { + fprintf(stderr, DB_STR_A("5129", + "%s: Cannot copy data from a PRIVATE environment\n", + "%s"), progname); + goto err; + } + } + + if (log_dir != NULL) { + if (db_config && __os_abspath(log_dir)) { + fprintf(stderr, DB_STR_A("5033", + "%s: DB_CONFIG must not contain an absolute " + "path for the log directory\n", "%s\n"), progname); + goto err; + } + } + +#if (DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR > 0)) + /* + * Record in the environment that a hot backup is in progress. + * This disables the bulk loading optimization for the + * duration of the backup. This increments a persistent + * counter in the database's environment, so it is essential + * that the corresponding decrement takes place upon + * completion of the backup. + */ + if ((ret = dbenv->set_flags(dbenv, DB_HOTBACKUP_IN_PROGRESS, 1))) { + fprintf(stderr, DB_STR_A("5034", + "%s: dbenv->set_flags(DB_HOTBACKUP_IN_PROGRESS, 1): %s\n", + "%s %s\n"), progname, db_strerror(ret)); + goto err; + } + hotbackup_flag_set = 1; +#endif + + /* + * If the -c option is specified, checkpoint the source home + * database environment, and remove any unnecessary log files. + */ + if (checkpoint) { + if (verbose) + printf(DB_STR_A("5035", "%s: %s: force checkpoint\n", + "%s %s\n"), progname, home); + if ((ret = + dbenv->txn_checkpoint(dbenv, 0, 0, DB_FORCE)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->txn_checkpoint"); + goto err; + } + if (!update) { + if (verbose) + printf(DB_STR_A("5036", + "%s: %s: remove unnecessary log files\n", + "%s %s\n"), progname, home); + if ((ret = dbenv->log_archive(dbenv, + NULL, DB_ARCH_REMOVE)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->log_archive"); + goto err; + } + } + } + + /* + * If the target directory for the backup does not exist, create it + * with mode read-write-execute for the owner. Ignore errors here, + * it's simpler and more portable to just always try the create. If + * there's a problem, we'll fail with reasonable errors later. + */ + (void)__os_mkdir(NULL, backup_dir, DB_MODE_700); + + /* + * If -u was specified, remove all log files; if -u was not specified, + * remove all files. + * + * Potentially there are two directories to clean, the log directory + * and the target directory. First, clean up the log directory if + * it's different from the target directory, then clean up the target + * directory. + */ + if (db_config && log_dir != NULL && + backup_dir_clean( + dbenv, backup_dir, log_dir, &remove_max, update, verbose) != 0) + goto err; + if (backup_dir_clean(dbenv, + backup_dir, NULL, &remove_max, update, verbose) != 0) + goto err; + + /* + * If the -u option was not specified, copy all database files found in + * the database environment home directory, or any directory specified + * using the -d option, into the target directory for the backup. + */ + if (!update) { + if (read_data_dir(dbenv, home, backup_dir, + home, verbose, db_config, env_copy, 0, passwd) != 0) + goto err; + /* + * Use a DB_CONFIG if it exists and neither -D/-d was set. + */ + relative = 0; + if (!db_config && data_dir == NULL) { + (void)dbenv->get_data_dirs( + dbenv, (const char ***)&data_dir); + relative = 1; + } + + if (data_dir != NULL) + for (dir = data_dir; *dir != NULL; ++dir) { + /* + * Don't allow absolute path names taken from + * the DB_CONFIG file -- running recovery with + * them would corrupt the source files. + */ + if (db_config && __os_abspath(*dir)) { + fprintf(stderr, DB_STR_A("5037", +"%s: data directory '%s' is absolute path, not permitted with -D option\n", + "%s %s\n"), progname, *dir); + goto err; + } + if (read_data_dir(dbenv, home, backup_dir, + *dir, verbose, db_config, + env_copy, relative, passwd) != 0) + goto err; + } + } + + /* + * Copy all log files found in the directory specified by the -l option + * (or in the database environment home directory, if no -l option was + * specified), into the target directory for the backup. + * + * The log directory defaults to the home directory. + */ + if (read_log_dir(dbenv, home, db_config, backup_dir, + log_dir, ©_min, update, verbose) != 0) + goto err; + /* + * If we're updating a snapshot, the lowest-numbered log file copied + * into the backup directory should be less than, or equal to, the + * highest-numbered log file removed from the backup directory during + * cleanup. + */ + if (update && remove_max < copy_min && + !(remove_max == 0 && copy_min == 1)) { + fprintf(stderr, DB_STR_A("5038", + "%s: the largest log file removed (%d) must be greater\n", + "%s %d\n"), progname, remove_max); + fprintf(stderr, DB_STR_A("5039", + "%s: than or equal the smallest log file copied (%d)\n", + "%s %d\n"), progname, copy_min); + goto err; + } + +#if (DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR > 0)) + /* Turn off the hotbackup flag in the environment now. */ + if ((ret = dbenv->set_flags(dbenv, DB_HOTBACKUP_IN_PROGRESS, 0))) { + fprintf(stderr, + "%s: dbenv->set_flags(DB_HOTBACKUP_IN_PROGRESS, 0): %s\n", + progname, db_strerror(ret)); + goto err; + } + hotbackup_flag_set = 0; +#endif + + /* Close the source environment. */ + if ((ret = dbenv->close(dbenv, 0)) != 0) { + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + dbenv = NULL; + goto err; + } + /* Perform catastrophic recovery on the hot backup. */ + if (verbose) + printf(DB_STR_A("5040", "%s: %s: run catastrophic recovery\n", + "%s %s\n"), progname, backup_dir); + if (env_init(&dbenv, + backup_dir, NULL, NULL, passwd, OPEN_HOT_BACKUP, verbose) != 0) + goto err; + + /* + * Remove any unnecessary log files from the hot backup. + * For debugging purposes, leave them around. + */ + if (debug == 0) { + if (verbose) + printf(DB_STR_A("5041", + "%s: %s: remove unnecessary log files\n", + "%s %s\n"), progname, backup_dir); + if ((ret = + dbenv->log_archive(dbenv, NULL, DB_ARCH_REMOVE)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->log_archive"); + goto err; + } + } + + if (0) { +err: exitval = 1; + } +#if (DB_VERSION_MAJOR > 5 || (DB_VERSION_MAJOR == 5 && DB_VERSION_MINOR > 0)) + if (hotbackup_flag_set && + (ret = dbenv->set_flags(dbenv, DB_HOTBACKUP_IN_PROGRESS, 0))) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->set_flags(DB_HOTBACKUP_IN_PROGRESS, 0): %s\n", + progname, db_strerror(ret)); + } +#endif + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + } + + if (exitval == 0) { + if (verbose) { + (void)time(&now); + printf(DB_STR_A("5042", + "%s: hot backup completed at %s", + "%s %s"), progname, __os_ctime(&now, time_buf)); + } + } else { + fprintf(stderr, DB_STR_A("5043", "%s: HOT BACKUP FAILED!\n", + "%s\n"), progname); + } + + /* Resend any caught signal. */ + __db_util_sigresend(); + + return (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE); + +} + +/* + * env_init -- + * Open a database environment. + */ +int +env_init(dbenvp, home, log_dirp, data_dirp, passwd, which, verbose) + DB_ENV **dbenvp; + char *home, **log_dirp, ***data_dirp, *passwd; + enum which_open which; + int verbose; +{ + DB_ENV *dbenv; + int ret; + + *dbenvp = NULL; + + /* + * Create an environment object and initialize it for error reporting. + */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + return (1); + } + + dbenv->set_errfile(dbenv, stderr); + setbuf(stderr, NULL); + dbenv->set_errpfx(dbenv, progname); + + /* Any created intermediate directories are created private. */ + if ((ret = dbenv->set_intermediate_dir_mode(dbenv, "rwx------")) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->set_intermediate_dir_mode"); + return (1); + } + + /* + * If a log directory has been specified, and it's not the same as the + * home directory, set it for the environment. + */ + if (log_dirp != NULL && *log_dirp != NULL && + (ret = dbenv->set_lg_dir(dbenv, *log_dirp)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->set_lg_dir: %s", *log_dirp); + return (1); + } + + /* Optionally set the password. */ + if (passwd != NULL && + (ret = dbenv->set_encrypt(dbenv, passwd, DB_ENCRYPT_AES)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->set_encrypt"); + return (1); + } + + switch (which) { + case OPEN_ORIGINAL: + /* + * Opening the database environment we're trying to back up. + * We try to attach to a pre-existing environment; if that + * fails, we create a private environment and try again. + */ + if ((ret = dbenv->open(dbenv, home, DB_USE_ENVIRON, 0)) != 0 && + (ret == DB_VERSION_MISMATCH || + (ret = dbenv->open(dbenv, home, DB_CREATE | + DB_INIT_LOG | DB_INIT_TXN | DB_PRIVATE | DB_USE_ENVIRON, + 0)) != 0)) { + dbenv->err(dbenv, ret, "DB_ENV->open: %s", home); + return (1); + } + if (log_dirp != NULL && *log_dirp == NULL) + (void)dbenv->get_lg_dir(dbenv, (const char **)log_dirp); + if (data_dirp != NULL && *data_dirp == NULL) + (void)dbenv->get_data_dirs( + dbenv, (const char ***)data_dirp); + break; + case OPEN_HOT_BACKUP: + /* + * Opening the backup copy of the database environment. We + * better be the only user, we're running recovery. + * Ensure that there at least minimal cache for worst + * case page size. + */ + if ((ret = + dbenv->set_cachesize(dbenv, 0, 64 * 1024 * 10, 0)) != 0) { + dbenv->err(dbenv, + ret, "DB_ENV->set_cachesize: %s", home); + return (1); + } + if (verbose == 1) + (void)dbenv->set_verbose(dbenv, DB_VERB_RECOVERY, 1); + if ((ret = dbenv->open(dbenv, home, DB_CREATE | + DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_PRIVATE | + DB_RECOVER_FATAL | DB_USE_ENVIRON, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->open: %s", home); + return (1); + } + break; + } + + *dbenvp = dbenv; + return (0); +} + +/* + * backup_dir_clean -- + * Clean out the backup directory. + */ +int +backup_dir_clean(dbenv, backup_dir, log_dir, remove_maxp, update, verbose) + DB_ENV *dbenv; + char *backup_dir, *log_dir; + int *remove_maxp, update, verbose; +{ + ENV *env; + int cnt, fcnt, ret, v; + char **names, *dir, buf[DB_MAXPATHLEN], path[DB_MAXPATHLEN]; + + env = dbenv->env; + + /* We may be cleaning a log directory separate from the target. */ + if (log_dir != NULL) { + if ((size_t)snprintf(buf, sizeof(buf), "%s%c%s", + backup_dir, PATH_SEPARATOR[0] ,log_dir) >= sizeof(buf)) { + dbenv->errx(dbenv, DB_STR_A("5044", + "%s%c%s: path too long", "%s %c %s"), + backup_dir, PATH_SEPARATOR[0] ,log_dir); + return (1); + } + dir = buf; + } else + dir = backup_dir; + + /* Get a list of file names. */ + if ((ret = __os_dirlist(env, dir, 0, &names, &fcnt)) != 0) { + if (log_dir != NULL && !update) + return (0); + dbenv->err(dbenv, ret, DB_STR_A("5045", "%s: directory read", + "%s"), dir); + return (1); + } + for (cnt = fcnt; --cnt >= 0;) { + /* + * Skip non-log files (if update was specified). + */ + if (strncmp(names[cnt], LFPREFIX, sizeof(LFPREFIX) - 1)) { + if (update) + continue; + } else { + /* Track the highest-numbered log file removed. */ + v = atoi(names[cnt] + sizeof(LFPREFIX) - 1); + if (*remove_maxp < v) + *remove_maxp = v; + } + if ((size_t)snprintf(path, sizeof(path), "%s%c%s", + dir, PATH_SEPARATOR[0], names[cnt]) >= sizeof(path)) { + dbenv->errx(dbenv, DB_STR_A("5046", + "%s%c%s: path too long", "%s %c %s"), + dir, PATH_SEPARATOR[0], names[cnt]); + return (1); + } + if (verbose) + printf(DB_STR_A("5047", "%s: removing %s\n", + "%s %s\n"), progname, path); + if (__os_unlink(env, path, 0) != 0) + return (1); + } + + __os_dirfree(env, names, fcnt); + + if (verbose && *remove_maxp != 0) + printf(DB_STR_A("5048", + "%s: highest numbered log file removed: %d\n", "%s %d\n"), + progname, *remove_maxp); + + return (0); +} + +/* + * read_data_dir -- + * Read a directory looking for databases to copy. + */ +int +read_data_dir(dbenv, + home, backup_dir, dir, verbose, db_config, env_copy, relative, passwd) + DB_ENV *dbenv; + char *home, *backup_dir, *dir; + int verbose, db_config, env_copy, relative; + const char *passwd; +{ + ENV *env; + int cnt, fcnt, ret; + char *bd, **names; + char buf[DB_MAXPATHLEN], bbuf[DB_MAXPATHLEN]; + + env = dbenv->env; + + bd = backup_dir; + if (db_config && dir != home) { + /* Build a path name to the destination. */ + if ((size_t)(cnt = snprintf(bbuf, sizeof(bbuf), "%s%c%s%c", + backup_dir, PATH_SEPARATOR[0], + dir, PATH_SEPARATOR[0])) >= sizeof(buf)) { + dbenv->errx(dbenv, DB_STR_A("5049", + "%s%c%s: path too long", "%s %c %s"), + backup_dir, PATH_SEPARATOR[0], dir); + return (1); + } + bd = bbuf; + + /* Create the path. */ + if ((ret = __db_mkpath(env, bd)) != 0) { + dbenv->err(dbenv, ret, DB_STR_A("5050", + "%s: cannot create", "%s"), bd); + return (1); + } + /* step on the trailing '/' */ + bd[cnt - 1] = '\0'; + + } + if ((relative && !__os_abspath(dir)) || (db_config && dir != home)) { + /* Build a path name to the source. */ + if ((size_t)snprintf(buf, sizeof(buf), + "%s%c%s", home, PATH_SEPARATOR[0], dir) >= sizeof(buf)) { + dbenv->errx(dbenv, DB_STR_A("5051", + "%s%c%s: path too long", "%s %c %s"), + home, PATH_SEPARATOR[0], dir); + return (1); + } + dir = buf; + } + /* Get a list of file names. */ + if ((ret = __os_dirlist(env, dir, 0, &names, &fcnt)) != 0) { + dbenv->err(dbenv, ret, DB_STR_A("5052", "%s: directory read", + "%s"), dir); + return (1); + } + for (cnt = fcnt; --cnt >= 0;) { + /* + * Skip files in DB's name space (but not Queue extent files or + * the replicated system database, we need them). + */ + if (!strncmp(names[cnt], LFPREFIX, sizeof(LFPREFIX) - 1)) + continue; + if (!strncmp(names[cnt], + DB_REGION_PREFIX, sizeof(DB_REGION_PREFIX) - 1) && + (env_copy || strncmp(names[cnt], + QUEUE_EXTENT_PREFIX, sizeof(QUEUE_EXTENT_PREFIX) - 1)) +#ifdef HAVE_PARTITION + && + strncmp(names[cnt], + PART_PREFIX, sizeof(PART_PREFIX) - 1) +#endif +#if (DB_VERSION_MAJOR >= 5) + && + strncmp(names[cnt], + REPSYSDBNAME, sizeof(REPSYSDBNAME) - 1) +#endif + ) + continue; + + /* + * Skip DB_CONFIG. + */ + if (!db_config && + !strncmp(names[cnt], "DB_CONFIG", sizeof("DB_CONFIG"))) + continue; + + /* + * Copy the file. + * By default we copy through the environment, this works + * in all cases. If the OS or Filesystem supports atomic + * reads then we can read directly from the filesystem. + * This is known to be true only for UNIX based operating + * systems and not LINUX or Windows based operating systems. + */ + if (env_copy) + ret = env_data_copy(dbenv, + names[cnt], bd, verbose, passwd); + /* The file might not be a database. */ + if (!env_copy || ret == ENOENT || ret == EINVAL) + ret = data_copy(dbenv, names[cnt], dir, bd, 0, verbose); + if (ret != 0) + return (1); + } + + __os_dirfree(env, names, fcnt); + + return (0); +} + +/* + * read_log_dir -- + * Read a directory looking for log files to copy. If home + * is passed then we are possibly using a log dir in the destination, + * following DB_CONFIG configuration. + */ +int +read_log_dir(dbenv, home, db_config, backup_dir, log_dir, copy_minp, + update, verbose) + DB_ENV *dbenv; + char *home, *backup_dir, *log_dir; + int *copy_minp, db_config, update, verbose; +{ + ENV *env; + u_int32_t aflag; + int cnt, ret, v; + char **begin, **names, *backupd, *logd; + char from[DB_MAXPATHLEN], to[DB_MAXPATHLEN]; + char cfpath[DB_MAXPATHLEN]; + + env = dbenv->env; + + /* + * If the log directory is specified from DB_CONFIG then it is + * located relative to the the log file source dircectory. If + * the log directory is set from the -l argument and it is not an + * absolute path it is also located relative to the log file source + * directory. Otherwise the log file source directory is home. + */ + if (db_config && log_dir != NULL) { + if ((size_t)snprintf(from, sizeof(from), "%s%c%s", + home, PATH_SEPARATOR[0], log_dir) >= sizeof(from)) { + dbenv->errx(dbenv, DB_STR_A("5053", + "%s%c%s: path too long", "%s %c %s"), + home, PATH_SEPARATOR[0], log_dir); + return (1); + } + logd = strdup(from); + + /* + * With a DB_CONFIG file, logs are copied to the specified + * location relative to the backup_dir. Otherwise the logs are + * copied to the backup_dir. + */ + if ((size_t)(cnt = snprintf(to, sizeof(to), + "%s%c%s%c", backup_dir, PATH_SEPARATOR[0], + log_dir, PATH_SEPARATOR[0])) >= sizeof(to)) { + dbenv->errx(dbenv, DB_STR_A("5054", + "%s%c%s: path too long", "%s %c %s"), + backup_dir, PATH_SEPARATOR[0], log_dir); + return (1); + } + backupd = strdup(to); + + /* Create the backup log directory. */ + if ((ret = __db_mkpath(env, backupd)) != 0) { + dbenv->err(dbenv, ret, DB_STR_A("5055", + "%s: cannot create", "%s"), backupd); + return (1); + } + /* Step on the trailing '/'. */ + backupd[cnt - 1] = '\0'; + } else { + /* + * Use a DB_CONFIG if it exists and neither -D/-l was set. + */ + if (log_dir == NULL) { + (void)dbenv->get_lg_dir(dbenv, (const char **)&log_dir); + + if (log_dir) + logd = log_dir; + else + logd = log_dir = home; + } else { + logd = log_dir; + /* + * Do we have -l and an existing DB_CONFIG? That is a + * usage problem, but for backward compatibility, keep + * going if log_dir happens to be the same as the + * DB_CONFIG path. + */ + if ((size_t)snprintf(cfpath, sizeof(cfpath), + "%s%c%s", home, PATH_SEPARATOR[0], "DB_CONFIG") >= + sizeof(cfpath)) { + dbenv->errx(dbenv,DB_STR_A("5056", + "%s%c%s: path too long", "%s %c %s"), + home, PATH_SEPARATOR[0], "DB_CONFIG"); + return (1); + } + if (__os_exists(NULL, cfpath, NULL) == 0) { + if (strcmp(log_dir,dbenv->db_log_dir)) { + fprintf(stderr, DB_STR_A("5057", + "%s: cannot specify -l with conflicting DB_CONFIG file\n", + "%s\n"), progname); + return (usage()); + } else + fprintf(stderr, DB_STR_A("5058", + "%s: use of -l with DB_CONFIG file is deprecated\n", + "%s\n"), progname); + } + } + + if (logd != home && !__os_abspath(logd)) { + if ((size_t)snprintf(from, sizeof(from), + "%s%c%s", home, PATH_SEPARATOR[0], log_dir) + >= sizeof(from)) { + dbenv->errx(dbenv, DB_STR_A("5059", + "%s%c%s: path too long", + "%s %c %s"), home, + PATH_SEPARATOR[0], log_dir); + return 1; + } + logd = strdup(from); + } + backupd = backup_dir; + } + +again: aflag = DB_ARCH_LOG; + + /* + * If this is an update and we are deleting files, first process + * those files that can be removed, then repeat with the rest. + */ + if (update) + aflag = 0; + + /* Flush the log to get latest info. */ + if ((ret = dbenv->log_flush(dbenv, NULL)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->log_flush"); + return (1); + } + + /* Get a list of file names to be copied. */ + if ((ret = dbenv->log_archive(dbenv, &names, aflag)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->log_archive"); + return (1); + } + if (names == NULL) + goto done; + begin = names; + for (; *names != NULL; names++) { + /* Track the lowest-numbered log file copied. */ + v = atoi(*names + sizeof(LFPREFIX) - 1); + if (*copy_minp == 0 || *copy_minp > v) + *copy_minp = v; + + if ((size_t)snprintf(from, sizeof(from), "%s%c%s", + logd, PATH_SEPARATOR[0], *names) >= sizeof(from)) { + dbenv->errx(dbenv, DB_STR_A("5060", + "%s%c%s: path too long", "%s %c %s"), + logd, PATH_SEPARATOR[0], *names); + return (1); + } + + /* + * If we're going to remove the file, attempt to rename the + * instead of copying and then removing. The likely failure + * is EXDEV (source and destination are on different volumes). + * Fall back to a copy, regardless of the error. We don't + * worry about partial contents, the copy truncates the file + * on open. + */ + if (update) { + if ((size_t)snprintf(to, sizeof(to), "%s%c%s", + backupd, PATH_SEPARATOR[0], *names) >= sizeof(to)) { + dbenv->errx(dbenv, DB_STR_A("5061", + "%s%c%s: path too long", "%s %c %s"), + backupd, PATH_SEPARATOR[0], *names); + return (1); + } + if (__os_rename(env, from, to, 1) == 0) { + if (verbose) + printf(DB_STR_A("5062", + "%s: moving %s to %s\n", + "%s %s %s\n"), + progname, from, to); + continue; + } + } + + /* Copy the file. */ + if (data_copy(dbenv, *names, logd, backupd, 1, verbose) != 0) + return (1); + + if (update) { + if (verbose) + printf(DB_STR_A("5063", "%s: removing %s\n", + "%s %s\n"), progname, from); + if ((ret = __os_unlink(env, from, 0)) != 0) { + dbenv->err(dbenv, ret, DB_STR_A("5064", + "unlink of %s failed", "%s"), from); + return (1); + } + } + + } + + free(begin); +done: if (update) { + update = 0; + goto again; + } + + if (verbose && *copy_minp != 0) + printf(DB_STR_A("5065", + "%s: lowest numbered log file copied: %d\n", "%s %d\n"), + progname, *copy_minp); + if (logd != log_dir) + free(logd); + if (backupd != backup_dir) + free(backupd); + + return (0); +} + +/* + * data_copy -- + * Copy a file into the backup directory. + */ +int +data_copy(dbenv, file, from_dir, to_dir, log, verbose) + DB_ENV *dbenv; + char *file, *from_dir, *to_dir; + int log, verbose; +{ + DB_FH *rfhp, *wfhp; + ENV *env; + size_t nr, nw; + int ret; + char *buf; + + rfhp = wfhp = NULL; + env = dbenv->env; + ret = 0; + + if (verbose) + printf(DB_STR_A("5066", "%s: copying %s%c%s to %s%c%s\n", + "%s %s %c %s %s %c %s\n"), progname, from_dir, + PATH_SEPARATOR[0], file, to_dir, PATH_SEPARATOR[0], file); + + /* + * We MUST copy multiples of the page size, atomically, to ensure a + * database page is not updated by another thread of control during + * the copy. + * + * !!! + * The current maximum page size for Berkeley DB is 64KB; we will have + * to increase this value if the maximum page size is ever more than a + * megabyte + */ + if ((buf = malloc(MEGABYTE)) == NULL) { + dbenv->err(dbenv, errno, DB_STR_A("5067", + "%lu buffer allocation", "%lu"), (u_long)MEGABYTE); + return (1); + } + + /* Open the input file. */ + if (snprintf(buf, MEGABYTE, "%s%c%s", + from_dir, PATH_SEPARATOR[0], file) >= MEGABYTE) { + dbenv->errx(dbenv, DB_STR_A("5068", "%s%c%s: path too long", + "%s %c %s"), from_dir, PATH_SEPARATOR[0], file); + goto err; + } + if ((ret = __os_open(env, buf, 0, DB_OSO_RDONLY, 0, &rfhp)) != 0) { + if (ret == ENOENT && !log) { + ret = 0; + if (verbose) + printf(DB_STR_A("5069", + "%s: %s%c%s not present\n", "%s %s %c %s\n"), + progname, from_dir, PATH_SEPARATOR[0], file); + goto done; + } + dbenv->err(dbenv, ret, "%s", buf); + goto err; + } + + /* Open the output file. */ + if (snprintf(buf, MEGABYTE, "%s%c%s", + to_dir, PATH_SEPARATOR[0], file) >= MEGABYTE) { + dbenv->errx(dbenv, DB_STR_A("5070", "%s%c%s: path too long", + "%s %c %s"), to_dir, PATH_SEPARATOR[0], file); + goto err; + } + if ((ret = __os_open(env, buf, 0, + DB_OSO_CREATE | DB_OSO_TRUNC, DB_MODE_600, &wfhp)) != 0) { + dbenv->err(dbenv, ret, "%s", buf); + goto err; + } + + /* Copy the data. */ + while ((ret = __os_read(env, rfhp, buf, MEGABYTE, &nr)) == 0 && + nr > 0) + if ((ret = __os_write(env, wfhp, buf, nr, &nw)) != 0) + break; + + if (0) { +err: ret = 1; + } +done: if (buf != NULL) + free(buf); + + if (rfhp != NULL && __os_closehandle(env, rfhp) != 0) + ret = 1; + + /* We may be running on a remote filesystem; force the flush. */ + if (wfhp != NULL) { + if (__os_fsync(env, wfhp) != 0) + ret = 1; + if (__os_closehandle(env, wfhp) != 0) + ret = 1; + } + return (ret); +} + +char *saved_prefix; +char *saved_errstr; +void save_error(dbenv, prefix, errstr) + const DB_ENV *dbenv; + const char *prefix; + const char *errstr; +{ + COMPQUIET(dbenv, NULL); + saved_prefix = strdup(prefix); + saved_errstr = strdup(errstr); +} + +/* + * env_data_copy -- + * Copy a file into the backup directory through the environment. + */ +int +env_data_copy(dbenv, file, to_dir, verbose, passwd) + DB_ENV *dbenv; + char *file, *to_dir; + int verbose; + const char *passwd; +{ + FILE *savefile; + int ret; + + ret = 0; + + if (verbose) + printf(DB_STR_A("5066", "%s: copying database %s to %s%c%s", + "%s %s %s %c %s"), progname, + file, to_dir, PATH_SEPARATOR[0], file); + + dbenv->set_errcall(dbenv, save_error); + dbenv->get_errfile(dbenv, &savefile); + dbenv->set_errfile(dbenv, NULL); + if ((ret = db_copy(dbenv, file, to_dir, passwd)) != 0) { + if (ret != ENOENT && ret != EINVAL) { + if (saved_errstr != NULL) + fprintf(stderr, "%s: %s\n", + saved_prefix, saved_errstr); + else + fprintf(stderr, " %s\n", db_strerror(ret)); + } else if (verbose) + printf(": not a database\n"); + + dbenv->set_errcall(dbenv, NULL); + dbenv->set_errfile(dbenv, savefile); + goto err; + } + dbenv->set_errcall(dbenv, NULL); + dbenv->set_errfile(dbenv, savefile); + + if (verbose) + printf("\n"); + +err: if (saved_prefix != NULL) + free(saved_prefix); + if (saved_errstr != NULL) + free(saved_errstr); + saved_prefix = saved_errstr = NULL; + return (ret); +} + +int +usage() +{ + (void)fprintf(stderr, "usage: %s [-cDuVv]\n\t%s\n", progname, + "[-d data_dir ...] [-h home] [-l log_dir] [-P password] -b backup_dir"); + return (EXIT_FAILURE); +} + +int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5071", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d\n"), progname, + DB_VERSION_MAJOR, DB_VERSION_MINOR, + v_major, v_minor); + return (EXIT_FAILURE); + } + return (0); +} diff --git a/util/db_load.c b/util/db_load.c new file mode 100644 index 00000000..4d78727d --- /dev/null +++ b/util/db_load.c @@ -0,0 +1,1502 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +typedef struct { /* XXX: Globals. */ + const char *progname; /* Program name. */ + char *hdrbuf; /* Input file header. */ + u_long lineno; /* Input file line number. */ + u_long origline; /* Original file line number. */ + int endodata; /* Reached the end of a database. */ + int endofile; /* Reached the end of the input. */ + int version; /* Input version. */ + char *home; /* Env home. */ + char *passwd; /* Env passwd. */ + int private; /* Private env. */ + u_int32_t cache; /* Env cache size. */ +} LDG; + +int badend __P((DB_ENV *)); +void badnum __P((DB_ENV *)); +int configure __P((DB_ENV *, DB *, char **, char **, int *)); +int convprintable __P((DB_ENV *, char *, char **)); +int db_init __P((DB_ENV *, char *, u_int32_t, int *)); +int dbt_rdump __P((DB_ENV *, DBT *)); +int dbt_rprint __P((DB_ENV *, DBT *)); +int dbt_rrecno __P((DB_ENV *, DBT *, int)); +int dbt_to_recno __P((DB_ENV *, DBT *, db_recno_t *)); +int env_create __P((DB_ENV **, LDG *)); +void free_keys __P((DBT *part_keys)); +int load __P((DB_ENV *, char *, DBTYPE, char **, u_int, LDG *, int *)); +int main __P((int, char *[])); +int rheader __P((DB_ENV *, DB *, DBTYPE *, char **, int *, int *, DBT **)); +int usage __P((void)); +int version_check __P((void)); + +const char *progname; + +#define G(f) ((LDG *)dbenv->app_private)->f + + /* Flags to the load function. */ +#define LDF_NOHEADER 0x01 /* No dump header. */ +#define LDF_NOOVERWRITE 0x02 /* Don't overwrite existing rows. */ +#define LDF_PASSWORD 0x04 /* Encrypt created databases. */ + +int +main(argc, argv) + int argc; + char *argv[]; +{ + enum { NOTSET, FILEID_RESET, LSN_RESET, STANDARD_LOAD } mode; + extern char *optarg; + extern int optind; + DBTYPE dbtype; + DB_ENV *dbenv; + LDG ldg; + u_int ldf; + int ch, existed, exitval, ret; + char **clist, **clp; + + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + if ((exitval = version_check()) != 0) + goto done; + + ldg.progname = progname; + ldg.lineno = 0; + ldg.endodata = ldg.endofile = 0; + ldg.version = 1; + ldg.cache = MEGABYTE; + ldg.hdrbuf = NULL; + ldg.home = NULL; + ldg.passwd = NULL; + + mode = NOTSET; + ldf = 0; + exitval = existed = 0; + dbtype = DB_UNKNOWN; + + /* Allocate enough room for configuration arguments. */ + if ((clp = clist = + (char **)calloc((size_t)argc + 1, sizeof(char *))) == NULL) { + fprintf(stderr, "%s: %s\n", ldg.progname, strerror(ENOMEM)); + exitval = 1; + goto done; + } + + /* + * There are two modes for db_load: -r and everything else. The -r + * option zeroes out the database LSN's or resets the file ID, it + * doesn't really "load" a new database. The functionality is in + * db_load because we don't have a better place to put it, and we + * don't want to create a new utility for just that functionality. + */ + while ((ch = getopt(argc, argv, "c:f:h:nP:r:Tt:V")) != EOF) + switch (ch) { + case 'c': + if (mode != NOTSET && mode != STANDARD_LOAD) { + exitval = usage(); + goto done; + } + mode = STANDARD_LOAD; + + *clp++ = optarg; + break; + case 'f': + if (mode != NOTSET && mode != STANDARD_LOAD) { + exitval = usage(); + goto done; + } + mode = STANDARD_LOAD; + + if (freopen(optarg, "r", stdin) == NULL) { + fprintf(stderr, DB_STR_A("5072", + "%s: %s: reopen: %s\n", "%s %s %s\n"), + ldg.progname, optarg, strerror(errno)); + exitval = usage(); + goto done; + } + break; + case 'h': + ldg.home = optarg; + break; + case 'n': + if (mode != NOTSET && mode != STANDARD_LOAD) { + exitval = usage(); + goto done; + } + mode = STANDARD_LOAD; + + ldf |= LDF_NOOVERWRITE; + break; + case 'P': + ldg.passwd = strdup(optarg); + memset(optarg, 0, strlen(optarg)); + if (ldg.passwd == NULL) { + fprintf(stderr, DB_STR_A("5073", + "%s: strdup: %s\n", "%s %s\n"), + ldg.progname, strerror(errno)); + exitval = usage(); + goto done; + } + ldf |= LDF_PASSWORD; + break; + case 'r': + if (mode == STANDARD_LOAD) { + exitval = usage(); + goto done; + } + if (strcmp(optarg, "lsn") == 0) + mode = LSN_RESET; + else if (strcmp(optarg, "fileid") == 0) + mode = FILEID_RESET; + else { + exitval = usage(); + goto done; + } + break; + case 'T': + if (mode != NOTSET && mode != STANDARD_LOAD) { + exitval = usage(); + goto done; + } + mode = STANDARD_LOAD; + + ldf |= LDF_NOHEADER; + break; + case 't': + if (mode != NOTSET && mode != STANDARD_LOAD) { + exitval = usage(); + goto done; + } + mode = STANDARD_LOAD; + + if (strcmp(optarg, "btree") == 0) { + dbtype = DB_BTREE; + break; + } + if (strcmp(optarg, "hash") == 0) { + dbtype = DB_HASH; + break; + } + if (strcmp(optarg, "recno") == 0) { + dbtype = DB_RECNO; + break; + } + if (strcmp(optarg, "queue") == 0) { + dbtype = DB_QUEUE; + break; + } + exitval = usage(); + goto done; + case 'V': + printf("%s\n", db_version(NULL, NULL, NULL)); + return (EXIT_SUCCESS); + case '?': + default: + exitval = usage(); + goto done; + } + argc -= optind; + argv += optind; + + if (argc != 1) { + exitval = usage(); + goto done; + } + + /* Handle possible interruptions. */ + __db_util_siginit(); + + /* + * Create an environment object initialized for error reporting, and + * then open it. + */ + if (env_create(&dbenv, &ldg) != 0) + goto err; + + /* If we're resetting the LSNs, that's an entirely separate path. */ + switch (mode) { + case FILEID_RESET: + exitval = dbenv->fileid_reset( + dbenv, argv[0], ldf & LDF_PASSWORD ? DB_ENCRYPT : 0); + break; + case LSN_RESET: + exitval = dbenv->lsn_reset( + dbenv, argv[0], ldf & LDF_PASSWORD ? DB_ENCRYPT : 0); + break; + case NOTSET: + case STANDARD_LOAD: + while (!ldg.endofile) + if (load(dbenv, argv[0], dbtype, clist, ldf, + &ldg, &existed) != 0) + goto err; + break; + } + + if (0) { +err: exitval = 1; + } + if ((ret = dbenv->close(dbenv, 0)) != 0) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->close: %s\n", ldg.progname, db_strerror(ret)); + } + + /* Resend any caught signal. */ + __db_util_sigresend(); + free(clist); + if (ldg.passwd != NULL) + free(ldg.passwd); + + /* + * Return 0 on success, 1 if keys existed already, and 2 on failure. + * + * Technically, this is wrong, because exit of anything other than + * 0 is implementation-defined by the ANSI C standard. I don't see + * any good solutions that don't involve API changes. + */ +done: + return (exitval == 0 ? (existed == 0 ? 0 : 1) : 2); +} + +/* + * load -- + * Load a database. + */ +int +load(dbenv, name, argtype, clist, flags, ldg, existedp) + DB_ENV *dbenv; + char *name, **clist; + DBTYPE argtype; + u_int flags; + LDG *ldg; + int *existedp; +{ + DB *dbp; + DBC *dbc; + DBT key, rkey, data, *part_keys, *readp, *writep; + DBTYPE dbtype; + DB_HEAP_RID rid; + DB_TXN *ctxn, *txn; + db_recno_t recno, datarecno; + u_int32_t put_flags; + int ascii_recno, checkprint, hexkeys, keyflag, keys, resize, ret, rval; + char *subdb; + + put_flags = LF_ISSET(LDF_NOOVERWRITE) ? DB_NOOVERWRITE : 0; + G(endodata) = 0; + + dbc = NULL; + subdb = NULL; + ctxn = txn = NULL; + part_keys = NULL; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + memset(&rkey, 0, sizeof(DBT)); + +retry_db: + dbtype = DB_UNKNOWN; + keys = -1; + hexkeys = -1; + keyflag = -1; + + /* Create the DB object. */ + if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + goto err; + } + + /* Read the header -- if there's no header, we expect flat text. */ + if (LF_ISSET(LDF_NOHEADER)) { + checkprint = 1; + dbtype = argtype; + } else { + if (rheader(dbenv, + dbp, &dbtype, &subdb, &checkprint, &keys, &part_keys) != 0) + goto err; + if (G(endofile)) + goto done; + } + + /* + * Apply command-line configuration changes. (We apply command-line + * configuration changes to all databases that are loaded, e.g., all + * subdatabases.) + */ + if (configure(dbenv, dbp, clist, &subdb, &keyflag)) + goto err; + + if (keys != 1) { + if (keyflag == 1) { + dbp->err(dbp, EINVAL, DB_STR("5074", + "No keys specified in file")); + goto err; + } + } + else if (keyflag == 0) { + dbp->err(dbp, EINVAL, DB_STR("5075", + "Keys specified in file")); + goto err; + } + else + keyflag = 1; + + if (dbtype == DB_BTREE || dbtype == DB_HASH) { + if (keyflag == 0) + dbp->err(dbp, + EINVAL, DB_STR("5076", + "Btree and Hash must specify keys")); + else + keyflag = 1; + } + + if (dbtype == DB_HEAP && keyflag == 1) { + dbp->err(dbp, + EINVAL, DB_STR("5127", "Heap must not specify keys")); + goto err; + } + + if (argtype != DB_UNKNOWN) { + if (dbtype == DB_HEAP) { + dbenv->errx(dbenv, DB_STR("5128", + "improper database type conversion specified")); + goto err; + } + + if (dbtype == DB_RECNO || dbtype == DB_QUEUE) + if (keyflag != 1 && argtype != DB_RECNO && + argtype != DB_QUEUE) { + dbenv->errx(dbenv, DB_STR("5077", + "improper database type conversion specified")); + goto err; + } + dbtype = argtype; + } + + if (dbtype == DB_UNKNOWN) { + dbenv->errx(dbenv, DB_STR("5078", + "no database type specified")); + goto err; + } + + if (dbtype == DB_HEAP) + put_flags = DB_APPEND; + + if (keyflag == -1) + keyflag = 0; + + /* + * Recno keys have only been printed in hexadecimal starting + * with db_dump format version 3 (DB 3.2). + * + * !!! + * Note that version is set in rheader(), which must be called before + * this assignment. + */ + hexkeys = (G(version) >= 3 && keyflag == 1 && checkprint == 0); + + if (keyflag == 1 && (dbtype == DB_RECNO || dbtype == DB_QUEUE)) + ascii_recno = 1; + else + ascii_recno = 0; + + /* If configured with a password, encrypt databases we create. */ + if (LF_ISSET(LDF_PASSWORD) && + (ret = dbp->set_flags(dbp, DB_ENCRYPT)) != 0) { + dbp->err(dbp, ret, "DB->set_flags: DB_ENCRYPT"); + goto err; + } + +#if 0 + Set application-specific btree comparison, compression, or hash + functions here. For example: + + if ((ret = dbp->set_bt_compare(dbp, local_comparison_func)) != 0) { + dbp->err(dbp, ret, "DB->set_bt_compare"); + goto err; + } + if ((ret = dbp->set_bt_compress(dbp, local_compress_func, + local_decompress_func)) != 0) { + dbp->err(dbp, ret, "DB->set_bt_compress"); + goto err; + } + if ((ret = dbp->set_h_hash(dbp, local_hash_func)) != 0) { + dbp->err(dbp, ret, "DB->set_h_hash"); + goto err; + } +#endif + + /* Open the DB file. */ + if ((ret = dbp->open(dbp, NULL, name, subdb, dbtype, + DB_CREATE | (TXN_ON(dbenv->env) ? DB_AUTO_COMMIT : 0), + DB_MODE_666)) != 0) { + dbp->err(dbp, ret, "DB->open: %s", name); + goto err; + } + if (ldg->private != 0) { + if ((ret = __db_util_cache(dbp, &ldg->cache, &resize)) != 0) + goto err; + if (resize) { + if ((ret = dbp->close(dbp, 0)) != 0) + goto err; + dbp = NULL; + if ((ret = dbenv->close(dbenv, 0)) != 0) + goto err; + if ((ret = env_create(&dbenv, ldg)) != 0) + goto err; + goto retry_db; + } + } + + /* Initialize the key/data pair. */ + readp = writep = &key; + if (dbtype == DB_RECNO || dbtype == DB_QUEUE) { + key.size = sizeof(recno); + if (keyflag) { + key.data = &datarecno; + if (checkprint) { + readp = &rkey; + goto key_data; + } + } else + key.data = &recno; + } else if (dbtype == DB_HEAP) { + key.size = sizeof(DB_HEAP_RID); + key.data = &rid; + } else +key_data: if ((readp->data = malloc(readp->ulen = 1024)) == NULL) { + dbenv->err(dbenv, ENOMEM, NULL); + goto err; + } + if ((data.data = malloc(data.ulen = 1024)) == NULL) { + dbenv->err(dbenv, ENOMEM, NULL); + goto err; + } + + if (TXN_ON(dbenv->env) && + (ret = dbenv->txn_begin(dbenv, NULL, &txn, 0)) != 0) + goto err; + + if (put_flags == 0 && (ret = dbp->cursor(dbp, + txn, &dbc, DB_CURSOR_BULK)) != 0) + goto err; + + /* Get each key/data pair and add them to the database. */ + for (recno = 1; !__db_util_interrupted(); ++recno) { + if (!keyflag) { + if (checkprint) { + if (dbt_rprint(dbenv, &data)) + goto err; + } else { + if (dbt_rdump(dbenv, &data)) + goto err; + } + } else { + if (checkprint) { + if (dbt_rprint(dbenv, readp)) + goto err; + if (ascii_recno && + dbt_to_recno(dbenv, readp, &datarecno) != 0) + goto err; + + if (!G(endodata) && dbt_rprint(dbenv, &data)) + goto odd_count; + } else { + if (ascii_recno) { + if (dbt_rrecno(dbenv, readp, hexkeys)) + goto err; + } else + if (dbt_rdump(dbenv, readp)) + goto err; + + if (!G(endodata) && dbt_rdump(dbenv, &data)) { +odd_count: dbenv->errx(dbenv, DB_STR("5079", + "odd number of key/data pairs")); + goto err; + } + } + } + if (G(endodata)) + break; +retry: + if (put_flags != 0 && txn != NULL) + if ((ret = dbenv->txn_begin(dbenv, txn, &ctxn, 0)) != 0) + goto err; + switch (ret = ((put_flags == 0) ? + dbc->put(dbc, writep, &data, DB_KEYLAST) : + dbp->put(dbp, ctxn, writep, &data, put_flags))) { + case 0: + if (ctxn != NULL) { + if ((ret = + ctxn->commit(ctxn, DB_TXN_NOSYNC)) != 0) + goto err; + ctxn = NULL; + } + break; + case DB_KEYEXIST: + *existedp = 1; + dbenv->errx(dbenv, DB_STR_A("5080", + "%s: line %d: key already exists, not loaded:", + "%s %d"), name, + !keyflag ? recno : recno * 2 - 1); + + (void)dbenv->prdbt(&key, + checkprint, 0, stderr, __db_pr_callback, 0, 0); + break; + case DB_LOCK_DEADLOCK: + /* If we have a child txn, retry--else it's fatal. */ + if (ctxn != NULL) { + if ((ret = ctxn->abort(ctxn)) != 0) + goto err; + ctxn = NULL; + goto retry; + } + /* FALLTHROUGH */ + default: + dbenv->err(dbenv, ret, NULL); + if (ctxn != NULL) { + (void)ctxn->abort(ctxn); + ctxn = NULL; + } + goto err; + } + if (ctxn != NULL) { + if ((ret = ctxn->abort(ctxn)) != 0) + goto err; + ctxn = NULL; + } + } +done: rval = 0; + if (dbc != NULL && (ret = dbc->close(dbc)) != 0) { + dbc = NULL; + goto err; + } + if (txn != NULL && (ret = txn->commit(txn, 0)) != 0) { + txn = NULL; + goto err; + } + + if (0) { +err: rval = 1; + if (dbc != NULL) + (void)dbc->close(dbc); + if (txn != NULL) + (void)txn->abort(txn); + } + + /* Close the database. */ + if (dbp != NULL && (ret = dbp->close(dbp, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->close"); + rval = 1; + } + + if (G(hdrbuf) != NULL) + free(G(hdrbuf)); + G(hdrbuf) = NULL; + /* Free allocated memory. */ + if (subdb != NULL) + free(subdb); + if (dbtype != DB_HEAP && + dbtype != DB_RECNO && dbtype != DB_QUEUE && key.data != NULL) + free(key.data); + if (rkey.data != NULL) + free(rkey.data); + free(data.data); + free_keys(part_keys); + return (rval); +} + +/* + * env_create -- + * Create the environment and initialize it for error reporting. + */ +int +env_create(dbenvp, ldg) + DB_ENV **dbenvp; + LDG *ldg; +{ + DB_ENV *dbenv; + int ret; + + if ((ret = db_env_create(dbenvp, 0)) != 0) { + fprintf(stderr, "%s: db_env_create: %s\n", + ldg->progname, db_strerror(ret)); + return (ret); + } + dbenv = *dbenvp; + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, ldg->progname); + if (ldg->passwd != NULL && (ret = dbenv->set_encrypt(dbenv, + ldg->passwd, DB_ENCRYPT_AES)) != 0) { + dbenv->err(dbenv, ret, "set_passwd"); + return (ret); + } + if ((ret = db_init(dbenv, ldg->home, ldg->cache, &ldg->private)) != 0) + return (ret); + dbenv->app_private = ldg; + + return (0); +} + +/* + * db_init -- + * Initialize the environment. + */ +int +db_init(dbenv, home, cache, is_private) + DB_ENV *dbenv; + char *home; + u_int32_t cache; + int *is_private; +{ + u_int32_t flags; + int ret; + + *is_private = 0; + /* We may be loading into a live environment. Try and join. */ + flags = DB_USE_ENVIRON | + DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN; + if ((ret = dbenv->open(dbenv, home, flags, 0)) == 0) + return (0); + if (ret == DB_VERSION_MISMATCH) + goto err; + + /* + * We're trying to load a database. + * + * An environment is required because we may be trying to look at + * databases in directories other than the current one. We could + * avoid using an environment iff the -h option wasn't specified, + * but that seems like more work than it's worth. + * + * No environment exists (or, at least no environment that includes + * an mpool region exists). Create one, but make it private so that + * no files are actually created. + */ + LF_CLR(DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN); + LF_SET(DB_CREATE | DB_PRIVATE); + *is_private = 1; + if ((ret = dbenv->set_cachesize(dbenv, 0, cache, 1)) != 0) { + dbenv->err(dbenv, ret, "set_cachesize"); + return (1); + } + if ((ret = dbenv->open(dbenv, home, flags, 0)) == 0) + return (0); + + /* An environment is required. */ +err: dbenv->err(dbenv, ret, "DB_ENV->open"); + return (1); +} + +#define FLAG(name, value, keyword, flag) \ + if (strcmp(name, keyword) == 0) { \ + switch (*value) { \ + case '1': \ + if ((ret = dbp->set_flags(dbp, flag)) != 0) { \ + dbp->err(dbp, ret, "%s: set_flags: %s", \ + G(progname), name); \ + goto err; \ + } \ + break; \ + case '0': \ + break; \ + default: \ + badnum(dbenv); \ + goto err; \ + } \ + continue; \ + } +#define NUMBER(name, value, keyword, func, t) \ + if (strcmp(name, keyword) == 0) { \ + if ((ret = __db_getlong(dbenv, \ + NULL, value, 0, LONG_MAX, &val)) != 0 || \ + (ret = dbp->func(dbp, (t)val)) != 0) \ + goto nameerr; \ + continue; \ + } +#define STRING(name, value, keyword, func) \ + if (strcmp(name, keyword) == 0) { \ + if ((ret = dbp->func(dbp, value[0])) != 0) \ + goto nameerr; \ + continue; \ + } + +/* + * The code to check a command-line or input header argument against a list + * of configuration options. It's #defined because it's used in two places + * and the two places have gotten out of sync more than once. + */ +#define CONFIGURATION_LIST_COMPARE \ + NUMBER(name, value, "bt_minkey", set_bt_minkey, u_int32_t); \ + FLAG(name, value, "chksum", DB_CHKSUM); \ + NUMBER(name, value, "db_lorder", set_lorder, int); \ + NUMBER(name, value, "db_pagesize", set_pagesize, u_int32_t); \ + FLAG(name, value, "duplicates", DB_DUP); \ + FLAG(name, value, "dupsort", DB_DUPSORT); \ + NUMBER(name, value, "extentsize", set_q_extentsize, u_int32_t); \ + NUMBER(name, value, "h_ffactor", set_h_ffactor, u_int32_t); \ + NUMBER(name, value, "h_nelem", set_h_nelem, u_int32_t); \ + NUMBER(name, value, "re_len", set_re_len, u_int32_t); \ + STRING(name, value, "re_pad", set_re_pad); \ + FLAG(name, value, "recnum", DB_RECNUM); \ + FLAG(name, value, "renumber", DB_RENUMBER); \ + if (strcmp(name, "compressed") == 0) { \ + switch (*value) { \ + case '1': \ + if ((ret = dbp->set_bt_compress(dbp, NULL, \ + NULL)) != 0) \ + goto nameerr; \ + break; \ + case '0': \ + break; \ + default: \ + badnum(dbenv); \ + goto err; \ + } \ + continue; \ + } + +/* + * configure -- + * Handle command-line configuration options. + */ +int +configure(dbenv, dbp, clp, subdbp, keysp) + DB_ENV *dbenv; + DB *dbp; + char **clp, **subdbp; + int *keysp; +{ + long val; + int ret, savech; + char *name, *value; + u_int32_t heap_bytes, heap_gbytes; + + heap_bytes = heap_gbytes = 0; + + for (; (name = *clp) != NULL; *--value = savech, ++clp) { + if ((value = strchr(name, '=')) == NULL) { + dbp->errx(dbp, DB_STR("5081", + "command-line configuration uses name=value format")); + return (1); + } + savech = *value; + *value++ = '\0'; + + if (strcmp(name, "database") == 0 || + strcmp(name, "subdatabase") == 0) { + if (*subdbp != NULL) + free(*subdbp); + if ((*subdbp = strdup(value)) == NULL) { + dbp->err(dbp, ENOMEM, NULL); + return (1); + } + continue; + } + if (strcmp(name, "keys") == 0) { + if (strcmp(value, "1") == 0) + *keysp = 1; + else if (strcmp(value, "0") == 0) + *keysp = 0; + else { + badnum(dbenv); + return (1); + } + continue; + } + + CONFIGURATION_LIST_COMPARE; + + /* Have to special case heap size, because we need 2 values. */ + if (strcmp(name, "heap_bytes") == 0) { + if ((ret = __db_getlong(dbenv, + NULL, value, 0, LONG_MAX, &val)) != 0) + goto nameerr; + heap_bytes = (u_int32_t)val; + } + if (strcmp(name, "heap_gbytes") == 0) { + if ((ret = __db_getlong(dbenv, + NULL, value, 0, LONG_MAX, &val)) != 0) + goto nameerr; + heap_gbytes = (u_int32_t)val; + } + + dbp->errx(dbp, DB_STR_A("5082", + "unknown command-line configuration keyword \"%s\"", + "%s"), name); + return (1); + } + if (heap_gbytes != 0 || heap_bytes != 0) + if ((ret = dbp->set_heapsize(dbp, + heap_gbytes, heap_bytes, 0)) != 0) + goto heaperr; + return (0); + +nameerr: + dbp->err(dbp, ret, "%s: %s=%s", G(progname), name, value); + if (0) { +heaperr: dbp->err(dbp, ret, "%s: heap_gbytes/heap_bytes=%lu/%lu", + G(progname), heap_gbytes, heap_bytes); + } +err: return (1); +} + +/* + * rheader -- + * Read the header message. + */ +int +rheader(dbenv, dbp, dbtypep, subdbp, checkprintp, keysp, part_keyp) + DB_ENV *dbenv; + DB *dbp; + DBTYPE *dbtypep; + char **subdbp; + int *checkprintp, *keysp; + DBT **part_keyp; +{ + DBT *keys, *kp; + size_t buflen, linelen, start; + long val; + int ch, first, hdr, ret; + char *buf, *name, *p, *value; + u_int32_t heap_bytes, heap_gbytes, i, nparts; + + *dbtypep = DB_UNKNOWN; + *checkprintp = 0; + name = NULL; + *part_keyp = NULL; + keys = NULL; + heap_bytes = heap_gbytes = 0; + + /* + * We start with a smallish buffer; most headers are small. + * We may need to realloc it for a large subdatabase name. + */ + buflen = 4096; + if (G(hdrbuf) == NULL) { + hdr = 0; + if ((buf = malloc(buflen)) == NULL) + goto memerr; + G(hdrbuf) = buf; + G(origline) = G(lineno); + } else { + hdr = 1; + buf = G(hdrbuf); + G(lineno) = G(origline); + } + + start = 0; + for (first = 1;; first = 0) { + ++G(lineno); + + /* Read a line, which may be of arbitrary length, into buf. */ + linelen = 0; + buf = &G(hdrbuf)[start]; + if (hdr == 0) { + for (;;) { + if ((ch = getchar()) == EOF) { + if (!first || ferror(stdin)) + goto badfmt; + G(endofile) = 1; + break; + } + + /* + * If the buffer is too small, double it. + */ + if (linelen + start == buflen) { + G(hdrbuf) = + realloc(G(hdrbuf), buflen *= 2); + if (G(hdrbuf) == NULL) + goto memerr; + buf = &G(hdrbuf)[start]; + } + + if (ch == '\n') + break; + + buf[linelen++] = ch; + } + if (G(endofile) == 1) + break; + buf[linelen++] = '\0'; + } else + linelen = strlen(buf) + 1; + start += linelen; + + if (name != NULL) { + free(name); + name = NULL; + } + /* If we don't see the expected information, it's an error. */ + if ((name = strdup(buf)) == NULL) + goto memerr; + if ((p = strchr(name, '=')) == NULL) + goto badfmt; + *p++ = '\0'; + + value = p--; + + if (name[0] == '\0') + goto badfmt; + + /* + * The only values that may be zero-length are database names. + * In the original Berkeley DB code it was possible to create + * zero-length database names, and the db_load code was then + * changed to allow such databases to be be dumped and loaded. + * [#8204] + */ + if (strcmp(name, "database") == 0 || + strcmp(name, "subdatabase") == 0) { + if ((ret = convprintable(dbenv, value, subdbp)) != 0) { + dbp->err(dbp, ret, DB_STR("5083", + "error reading db name")); + goto err; + } + continue; + } + + /* No other values may be zero-length. */ + if (value[0] == '\0') + goto badfmt; + + if (strcmp(name, "HEADER") == 0) + break; + if (strcmp(name, "VERSION") == 0) { + /* + * Version 1 didn't have a "VERSION" header line. We + * only support versions 1, 2, and 3 of the dump format. + */ + G(version) = atoi(value); + + if (G(version) > 3) { + dbp->errx(dbp, DB_STR_A("5084", + "line %lu: VERSION %d is unsupported", + "%lu %d"), G(lineno), G(version)); + goto err; + } + continue; + } + if (strcmp(name, "format") == 0) { + if (strcmp(value, "bytevalue") == 0) { + *checkprintp = 0; + continue; + } + if (strcmp(value, "print") == 0) { + *checkprintp = 1; + continue; + } + goto badfmt; + } + if (strcmp(name, "type") == 0) { + if (strcmp(value, "btree") == 0) { + *dbtypep = DB_BTREE; + continue; + } + if (strcmp(value, "hash") == 0) { + *dbtypep = DB_HASH; + continue; + } + if (strcmp(value, "heap") == 0) { + *dbtypep = DB_HEAP; + continue; + } + if (strcmp(value, "recno") == 0) { + *dbtypep = DB_RECNO; + continue; + } + if (strcmp(value, "queue") == 0) { + *dbtypep = DB_QUEUE; + continue; + } + dbp->errx(dbp, DB_STR_A("5085", + "line %lu: unknown type", "%lu"), G(lineno)); + goto err; + } + if (strcmp(name, "keys") == 0) { + if (strcmp(value, "1") == 0) + *keysp = 1; + else if (strcmp(value, "0") == 0) + *keysp = 0; + else { + badnum(dbenv); + goto err; + } + continue; + } + if (strcmp(name, "nparts") == 0) { + if ((ret = __db_getlong(dbenv, + NULL, value, 0, LONG_MAX, &val)) != 0) { + badnum(dbenv); + goto err; + } + nparts = (u_int32_t) val; + if ((keys = + malloc(nparts * sizeof(DBT))) == NULL) { + dbenv->err(dbenv, ENOMEM, NULL); + goto err; + } + keys[nparts - 1].data = NULL; + kp = keys; + for (i = 1; i < nparts; kp++, i++) { + if ((kp->data = + malloc(kp->ulen = 1024)) == NULL) { + dbenv->err(dbenv, ENOMEM, NULL); + goto err; + } + if (*checkprintp) { + if (dbt_rprint(dbenv, kp)) + goto err; + } else { + if (dbt_rdump(dbenv, kp)) + goto err; + } + } + if ((ret = dbp->set_partition( + dbp, nparts, keys, NULL)) != 0) + goto err; + + *part_keyp = keys; + + continue; + } + + CONFIGURATION_LIST_COMPARE; + + /* Have to special case heap size, because we need 2 values. */ + if (strcmp(name, "heap_bytes") == 0) { + if ((ret = __db_getlong(dbenv, + NULL, value, 0, LONG_MAX, &val)) != 0) + goto nameerr; + heap_bytes = (u_int32_t)val; + } + if (strcmp(name, "heap_gbytes") == 0) { + if ((ret = __db_getlong(dbenv, + NULL, value, 0, LONG_MAX, &val)) != 0) + goto nameerr; + heap_gbytes = (u_int32_t)val; + } + + dbp->errx(dbp, DB_STR_A("5086", + "unknown input-file header configuration keyword \"%s\"", + "%s"), name); + goto err; + } + ret = 0; + + if (heap_gbytes != 0 || heap_bytes != 0) + if ((ret = dbp->set_heapsize(dbp, + heap_gbytes, heap_bytes, 0)) != 0) + goto heaperr; + + if (0) { +nameerr: dbp->err(dbp, ret, "%s: %s=%s", G(progname), name, value); + ret = 1; + } + if (0) { +heaperr: dbp->err(dbp, ret, "%s: heap_gbytes/heap_bytes=%lu/%lu", + G(progname), (u_long)heap_gbytes, (u_long)heap_bytes); + ret = 1; + } + if (0) { +badfmt: dbp->errx(dbp, DB_STR_A("5087", + "line %lu: unexpected format", "%lu"), G(lineno)); + ret = 1; + } + if (0) { +memerr: dbp->errx(dbp, DB_STR("5088", + "unable to allocate memory")); +err: ret = 1; + } + if (name != NULL) + free(name); + if (ret != 0) { + *part_keyp = NULL; + free_keys(keys); + } + return (ret); +} + +void free_keys(part_keys) + DBT *part_keys; +{ + DBT *kp; + if (part_keys != NULL) { + for (kp = part_keys; kp->data != NULL; kp++) + free(kp->data); + free(part_keys); + } +} + +/* + * Macro to convert a pair of hex bytes to a decimal value. + * + * !!! + * Note that this macro is side-effect safe. This was done deliberately, + * callers depend on it. + */ +#define DIGITIZE(store, v1, v2) { \ + char _v1, _v2; \ + _v1 = (v1); \ + _v2 = (v2); \ + if ((_v1) > 'f' || (_v2) > 'f') \ + return (badend(dbenv)); \ + (store) = \ + ((_v1) == '0' ? 0 : \ + ((_v1) == '1' ? 1 : \ + ((_v1) == '2' ? 2 : \ + ((_v1) == '3' ? 3 : \ + ((_v1) == '4' ? 4 : \ + ((_v1) == '5' ? 5 : \ + ((_v1) == '6' ? 6 : \ + ((_v1) == '7' ? 7 : \ + ((_v1) == '8' ? 8 : \ + ((_v1) == '9' ? 9 : \ + ((_v1) == 'a' ? 10 : \ + ((_v1) == 'b' ? 11 : \ + ((_v1) == 'c' ? 12 : \ + ((_v1) == 'd' ? 13 : \ + ((_v1) == 'e' ? 14 : 15))))))))))))))) << 4 | \ + ((_v2) == '0' ? 0 : \ + ((_v2) == '1' ? 1 : \ + ((_v2) == '2' ? 2 : \ + ((_v2) == '3' ? 3 : \ + ((_v2) == '4' ? 4 : \ + ((_v2) == '5' ? 5 : \ + ((_v2) == '6' ? 6 : \ + ((_v2) == '7' ? 7 : \ + ((_v2) == '8' ? 8 : \ + ((_v2) == '9' ? 9 : \ + ((_v2) == 'a' ? 10 : \ + ((_v2) == 'b' ? 11 : \ + ((_v2) == 'c' ? 12 : \ + ((_v2) == 'd' ? 13 : \ + ((_v2) == 'e' ? 14 : 15))))))))))))))); \ +} + +/* + * convprintable -- + * Convert a printable-encoded string into a newly allocated string. + * + * In an ideal world, this would probably share code with dbt_rprint but + * that's set up to read character-by-character (to avoid large memory + * allocations that aren't likely to be a problem here), and this has fewer + * special cases to deal with. + * + * Note that despite the printable encoding, the char * interface to this + * function (which is, not coincidentally, also used for database naming) + * means that outstr cannot contain any nuls. + */ +int +convprintable(dbenv, instr, outstrp) + DB_ENV *dbenv; + char *instr, **outstrp; +{ + char *outstr; + + /* + * Just malloc a string big enough for the whole input string; + * the output string will be smaller (or of equal length). + * + * Note that we may be passed a zero-length string and need to + * be able to duplicate it. + */ + if ((outstr = malloc(strlen(instr) + 1)) == NULL) + return (ENOMEM); + + *outstrp = outstr; + + for ( ; *instr != '\0'; instr++) + if (*instr == '\\') { + if (*++instr == '\\') { + *outstr++ = '\\'; + continue; + } + DIGITIZE(*outstr++, *instr, *++instr); + } else + *outstr++ = *instr; + + *outstr = '\0'; + + return (0); +} + +/* + * dbt_rprint -- + * Read a printable line into a DBT structure. + */ +int +dbt_rprint(dbenv, dbtp) + DB_ENV *dbenv; + DBT *dbtp; +{ + u_int32_t len; + u_int8_t *p; + int c1, c2, escape, first; + char buf[32]; + + ++G(lineno); + + first = 1; + escape = 0; + for (p = dbtp->data, len = 0; (c1 = getchar()) != '\n';) { + if (c1 == EOF) { + if (len == 0) { + G(endofile) = G(endodata) = 1; + return (0); + } + return (badend(dbenv)); + } + if (first) { + first = 0; + if (G(version) > 1) { + if (c1 != ' ') { + buf[0] = c1; + if (fgets(buf + 1, + sizeof(buf) - 1, stdin) == NULL || + strcmp(buf, "DATA=END\n") != 0) + return (badend(dbenv)); + G(endodata) = 1; + return (0); + } + continue; + } + } + if (escape) { + if (c1 != '\\') { + if ((c2 = getchar()) == EOF) + return (badend(dbenv)); + DIGITIZE(c1, c1, c2); + } + escape = 0; + } else + if (c1 == '\\') { + escape = 1; + continue; + } + if (len >= dbtp->ulen - 10) { + dbtp->ulen *= 2; + if ((dbtp->data = + realloc(dbtp->data, dbtp->ulen)) == NULL) { + dbenv->err(dbenv, ENOMEM, NULL); + return (1); + } + p = (u_int8_t *)dbtp->data + len; + } + ++len; + *p++ = c1; + } + dbtp->size = len; + + return (0); +} + +/* + * dbt_rdump -- + * Read a byte dump line into a DBT structure. + */ +int +dbt_rdump(dbenv, dbtp) + DB_ENV *dbenv; + DBT *dbtp; +{ + u_int32_t len; + u_int8_t *p; + int c1, c2, first; + char buf[32]; + + ++G(lineno); + + first = 1; + for (p = dbtp->data, len = 0; (c1 = getchar()) != '\n';) { + if (c1 == EOF) { + if (len == 0) { + G(endofile) = G(endodata) = 1; + return (0); + } + return (badend(dbenv)); + } + if (first) { + first = 0; + if (G(version) > 1) { + if (c1 != ' ') { + buf[0] = c1; + if (fgets(buf + 1, + sizeof(buf) - 1, stdin) == NULL || + strcmp(buf, "DATA=END\n") != 0) + return (badend(dbenv)); + G(endodata) = 1; + return (0); + } + continue; + } + } + if ((c2 = getchar()) == EOF) + return (badend(dbenv)); + if (len >= dbtp->ulen - 10) { + dbtp->ulen *= 2; + if ((dbtp->data = + realloc(dbtp->data, dbtp->ulen)) == NULL) { + dbenv->err(dbenv, ENOMEM, NULL); + return (1); + } + p = (u_int8_t *)dbtp->data + len; + } + ++len; + DIGITIZE(*p++, c1, c2); + } + dbtp->size = len; + + return (0); +} + +/* + * dbt_rrecno -- + * Read a record number dump line into a DBT structure. + */ +int +dbt_rrecno(dbenv, dbtp, ishex) + DB_ENV *dbenv; + DBT *dbtp; + int ishex; +{ + char buf[32], *p, *q; + u_long recno; + + ++G(lineno); + + if (fgets(buf, sizeof(buf), stdin) == NULL) { + G(endofile) = G(endodata) = 1; + return (0); + } + + if (strcmp(buf, "DATA=END\n") == 0) { + G(endodata) = 1; + return (0); + } + + if (buf[0] != ' ') + goto err; + + /* + * If we're expecting a hex key, do an in-place conversion + * of hex to straight ASCII before calling __db_getulong(). + */ + if (ishex) { + for (p = q = buf + 1; *q != '\0' && *q != '\n';) { + /* + * 0-9 in hex are 0x30-0x39, so this is easy. + * We should alternate between 3's and [0-9], and + * if the [0-9] are something unexpected, + * __db_getulong will fail, so we only need to catch + * end-of-string conditions. + */ + if (*q++ != '3') + goto err; + if (*q == '\n' || *q == '\0') + goto err; + *p++ = *q++; + } + *p = '\0'; + } + + if (__db_getulong(dbenv, G(progname), buf + 1, 0, 0, &recno)) + goto err; + + *((db_recno_t *)dbtp->data) = recno; + dbtp->size = sizeof(db_recno_t); + return (0); + +err: return (badend(dbenv)); +} + +int +dbt_to_recno(dbenv, dbt, recnop) + DB_ENV *dbenv; + DBT *dbt; + db_recno_t *recnop; +{ + char buf[32]; /* Large enough for 2^64. */ + + memcpy(buf, dbt->data, dbt->size); + buf[dbt->size] = '\0'; + + return (__db_getulong(dbenv, G(progname), buf, 0, 0, (u_long *)recnop)); +} + +/* + * badnum -- + * Display the bad number message. + */ +void +badnum(dbenv) + DB_ENV *dbenv; +{ + dbenv->errx(dbenv, DB_STR("5089", + "boolean name=value pairs require a value of 0 or 1")); +} + +/* + * badend -- + * Display the bad end to input message. + */ +int +badend(dbenv) + DB_ENV *dbenv; +{ + dbenv->errx(dbenv, DB_STR("5090", + "unexpected end of input data or key/data pair")); + return (1); +} + +/* + * usage -- + * Display the usage message. + */ +int +usage() +{ + (void)fprintf(stderr, "usage: %s %s\n\t%s\n", progname, + "[-nTV] [-c name=value] [-f file]", + "[-h home] [-P password] [-t btree | hash | recno | queue] db_file"); + (void)fprintf(stderr, "usage: %s %s\n", + progname, "-r lsn | fileid [-h home] [-P password] db_file"); + return (EXIT_FAILURE); +} + +int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5091", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d\n"), progname, DB_VERSION_MAJOR, + DB_VERSION_MINOR, v_major, v_minor); + return (EXIT_FAILURE); + } + return (0); +} diff --git a/util/db_log_verify.c b/util/db_log_verify.c new file mode 100644 index 00000000..518dfc2a --- /dev/null +++ b/util/db_log_verify.c @@ -0,0 +1,338 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id: db_log_verify.c,v 0f73af5ae3da 2010/05/10 05:38:40 alexander $ + */ +#include "db_config.h" + +#include "db_int.h" + +#define MB 1024 * 1024 + +int main __P((int, char *[])); +int lsn_arg __P((char *, DB_LSN *)); +int usage __P((void)); +int version_check __P((void)); +int db_log_verify_app_record __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); +const char *progname; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB_ENV *dbenv; + DB_LSN start, stop; + int ch, cmb, exitval, nflag, rflag, ret, vsn_mismtch; + time_t starttime, endtime; + char *dbfile, *dbname, *home, *lvhome, *passwd; + DB_LOG_VERIFY_CONFIG lvconfig; + + vsn_mismtch = 0; + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + if ((ret = version_check()) != 0) + return (ret); + + dbenv = NULL; + dbfile = dbname = home = lvhome = passwd = NULL; + exitval = nflag = rflag = 0; + starttime = endtime = 0; + ZERO_LSN(start); + ZERO_LSN(stop); + + memset(&lvconfig, 0, sizeof(lvconfig)); + + while ((ch = getopt(argc, argv, "b:cC:d:D:e:h:H:NP:Vvs:z:")) != EOF) + switch (ch) { + case 'b': + /* Don't use getsubopt(3), not all systems have it. */ + if (lsn_arg(optarg, &start)) + return (usage()); + break; + case 'c': + lvconfig.continue_after_fail = 1; + break; + case 'C': + cmb = atoi(optarg); + if (cmb <= 0) + return (usage()); + lvconfig.cachesize = cmb * MB; + break; + case 'd': + dbfile = optarg; + break; + case 'D': + dbname = optarg; + break; + case 'e': + /* Don't use getsubopt(3), not all systems have it. */ + if (lsn_arg(optarg, &stop)) + return (usage()); + break; + case 'h': + home = optarg; + break; + case 'H': + lvhome = optarg; + break; + case 'N': + nflag = 1; + break; + case 'P': + if ((ret = __os_strdup(NULL, optarg, &passwd)) != 0) { + __db_err(NULL, ret, "__os_strdup: "); + return (EXIT_FAILURE); + } + memset(optarg, 0, strlen(optarg)); + break; + case 'V': + printf("%s\n", db_version(NULL, NULL, NULL)); + return (EXIT_SUCCESS); + case 'v': + lvconfig.verbose = 1; + break; + case 's': + starttime = atoi(optarg); + break; + case 'z': + endtime = atoi(optarg); + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc > 0) + return (usage()); + + lvconfig.temp_envhome = lvhome; + lvconfig.dbfile = dbfile; + lvconfig.dbname = dbname; + lvconfig.start_lsn = start; + lvconfig.end_lsn = stop; + lvconfig.start_time = starttime; + lvconfig.end_time = endtime; + +create_again: + /* + * Create an environment object and initialize it for error + * reporting. + */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + goto err; + } + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + if (nflag) { + if ((ret = dbenv->set_flags(dbenv, DB_NOLOCKING, 1)) != 0) { + dbenv->err(dbenv, ret, "set_flags: DB_NOLOCKING"); + goto err; + } + if ((ret = dbenv->set_flags(dbenv, DB_NOPANIC, 1)) != 0) { + dbenv->err(dbenv, ret, "set_flags: DB_NOPANIC"); + goto err; + } + } + + if (passwd != NULL && (ret = dbenv->set_encrypt(dbenv, + passwd, DB_ENCRYPT_AES)) != 0) { + dbenv->err(dbenv, ret, "set_passwd"); + goto err; + } + + /* + * Set up an app-specific dispatch function so that we can gracefully + * handle app-specific log records. + */ + if ((ret = dbenv->set_app_dispatch( + dbenv, db_log_verify_app_record)) != 0) { + dbenv->err(dbenv, ret, "app_dispatch"); + goto err; + } + + /* + * An environment is required, but as all we're doing is reading log + * files, we create one if it doesn't already exist. If we create + * it, create it private so it automatically goes away when we're done. + * If we are reading the replication database, do not open the env + * with logging, because we don't want to log the opens. + */ + if (!vsn_mismtch && (ret = dbenv->open(dbenv, home, + DB_USE_ENVIRON, 0)) != 0) { + if (dbenv->close(dbenv, 0) != 0) { + dbenv = NULL; + goto err; + } + vsn_mismtch = 1; + goto create_again; + } + if (vsn_mismtch && (ret = dbenv->open(dbenv, home, DB_CREATE | + DB_INIT_LOG | DB_PRIVATE | DB_USE_ENVIRON, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + goto err; + } + + /* Handle possible interruptions. */ + __db_util_siginit(); + + if ((ret = dbenv->log_verify(dbenv, &lvconfig)) != 0) + goto err; + + if (0) { +err: exitval = 1; + } + + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + } + + /* Resend any caught signal. */ + __db_util_sigresend(); + + if (passwd != NULL) + __os_free(NULL, passwd); + + return (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +int +usage() +{ + fprintf(stderr, "\nusage: %s %s\n%s\n%s\n%s\n%s\n", progname, + "[-NcvV] [-h home] " + "[-H temporary environment home for internal use]", + "[-P password] [-C cache size in megabytes]", + "[-d db file name] [-D db name]", + "[-b file/offset] [-e file/offset]", + "[-s start time] [-z end time]"); + + return (EXIT_FAILURE); +} + +int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5003", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d\n"), progname, + DB_VERSION_MAJOR, DB_VERSION_MINOR, + v_major, v_minor); + return (EXIT_FAILURE); + } + return (0); +} + +/* + * Print an unknown, application-specific log record as best we can, this is + * all we can do to such a log record during the verification. The counting + * is done in __db_dispatch because we can't pass the log verify handle into + * this function. + */ +int +db_log_verify_app_record(dbenv, dbt, lsnp, op) + DB_ENV *dbenv; + DBT *dbt; + DB_LSN *lsnp; + db_recops op; +{ + u_int32_t i, len, len2, rectype; + int ret; + u_int8_t ch; + char *buf, *p; + + DB_ASSERT(dbenv->env, op == DB_TXN_LOG_VERIFY); + COMPQUIET(op, DB_TXN_LOG_VERIFY); + ch = 0; + ret = 0; + i = len = len2 = rectype = 0; + buf = p = NULL; + + /* + * Fetch the rectype, which always must be at the beginning of the + * record (if dispatching is to work at all). + */ + memcpy(&rectype, dbt->data, sizeof(rectype)); + + /* + * Applications may wish to customize the output here based on the + * rectype. We just print the entire log record in the generic + * mixed-hex-and-printable format we use for binary data. + */ + if ((ret = __os_malloc(dbenv->env, + len = 256 + 2 * dbt->size, &buf)) != 0) + goto err; + memset(buf, 0, len); + snprintf(buf, len, DB_STR_A("5004", + "[%lu][%lu] App-specific log record: %lu\n\tdata: ", + "%lu %lu %lu"), (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)rectype); + + /* + * Each unprintable character takes up several bytes, so be aware of + * memory violation. + */ + for (i = 0; i < dbt->size && len2 < len; i++) { + ch = ((u_int8_t *)dbt->data)[i]; + len2 = (u_int32_t)strlen(buf); + p = buf + len2; + snprintf(p, len - len2 - 1, + isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); + } + len2 = (u_int32_t)strlen(buf); + p = buf + len2; + snprintf(p, len - len2 - 1, "\n\n"); + __db_msg(dbenv->env, "%s", buf); + +err: if (buf != NULL) + __os_free(dbenv->env, buf); + return (ret); +} + +/* + * lsn_arg -- + * Parse a LSN argument. + */ +int +lsn_arg(arg, lsnp) + char *arg; + DB_LSN *lsnp; +{ + u_long uval; + char *p; + + /* + * Expected format is: lsn.file/lsn.offset. + */ + if ((p = strchr(arg, '/')) == NULL) + return (1); + *p = '\0'; + + if (__db_getulong(NULL, progname, arg, 0, UINT32_MAX, &uval)) + return (1); + lsnp->file = uval; + if (__db_getulong(NULL, progname, p + 1, 0, UINT32_MAX, &uval)) + return (1); + lsnp->offset = uval; + return (0); +} diff --git a/util/db_printlog.c b/util/db_printlog.c new file mode 100644 index 00000000..b5120b9b --- /dev/null +++ b/util/db_printlog.c @@ -0,0 +1,678 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/fop.h" +#include "dbinc/hash.h" +#ifdef HAVE_HEAP +#include "dbinc/heap.h" +#endif +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +int db_printlog_print_app_record __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); +int env_init_print __P((ENV *, u_int32_t, DB_DISTAB *)); +int env_init_print_42 __P((ENV *, DB_DISTAB *)); +int env_init_print_43 __P((ENV *, DB_DISTAB *)); +int env_init_print_47 __P((ENV *, DB_DISTAB *)); +int env_init_print_48 __P((ENV *, DB_DISTAB *)); +int lsn_arg __P((char *, DB_LSN *)); +int main __P((int, char *[])); +int open_rep_db __P((DB_ENV *, DB **, DBC **)); +int usage __P((void)); +int version_check __P((void)); + +const char *progname; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB *dbp; + DBC *dbc; + DBT data, keydbt; + DB_DISTAB dtab; + DB_ENV *dbenv; + DB_LOG dblog; + DB_LOGC *logc; + DB_LSN key, start, stop, verslsn; + ENV *env; + u_int32_t logcflag, newversion, version; + int ch, cmp, exitval, i, nflag, rflag, ret, repflag; + char *data_len, *home, *passwd; + + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + if ((ret = version_check()) != 0) + return (ret); + + dbp = NULL; + dbc = NULL; + dbenv = NULL; + env = NULL; + logc = NULL; + ZERO_LSN(start); + ZERO_LSN(stop); + exitval = nflag = rflag = repflag = 0; + data_len = home = passwd = NULL; + + memset(&dtab, 0, sizeof(dtab)); + memset(&dblog, 0, sizeof(dblog)); + + while ((ch = getopt(argc, argv, "b:D:e:h:NP:rRV")) != EOF) + switch (ch) { + case 'b': + /* Don't use getsubopt(3), not all systems have it. */ + if (lsn_arg(optarg, &start)) + return (usage()); + break; + case 'D': + data_len = optarg; + break; + case 'e': + /* Don't use getsubopt(3), not all systems have it. */ + if (lsn_arg(optarg, &stop)) + return (usage()); + break; + case 'h': + home = optarg; + break; + case 'N': + nflag = 1; + break; + case 'P': + passwd = strdup(optarg); + memset(optarg, 0, strlen(optarg)); + if (passwd == NULL) { + fprintf(stderr, DB_STR_A("5010", + "%s: strdup: %s\n", "%s %s\n"), + progname, strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 'r': + rflag = 1; + break; + case 'R': /* Undocumented */ + repflag = 1; + break; + case 'V': + printf("%s\n", db_version(NULL, NULL, NULL)); + return (EXIT_SUCCESS); + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc > 0) + return (usage()); + + /* Handle possible interruptions. */ + __db_util_siginit(); + + /* + * Create an environment object and initialize it for error + * reporting. + */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + goto err; + } + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + dbenv->set_msgfile(dbenv, stdout); + + if (data_len != NULL) + (void)dbenv->set_data_len(dbenv, (u_int32_t)atol(data_len)); + + if (nflag) { + if ((ret = dbenv->set_flags(dbenv, DB_NOLOCKING, 1)) != 0) { + dbenv->err(dbenv, ret, "set_flags: DB_NOLOCKING"); + goto err; + } + if ((ret = dbenv->set_flags(dbenv, DB_NOPANIC, 1)) != 0) { + dbenv->err(dbenv, ret, "set_flags: DB_NOPANIC"); + goto err; + } + } + + if (passwd != NULL && (ret = dbenv->set_encrypt(dbenv, + passwd, DB_ENCRYPT_AES)) != 0) { + dbenv->err(dbenv, ret, "set_passwd"); + goto err; + } + + /* + * Set up an app-specific dispatch function so that we can gracefully + * handle app-specific log records. + */ + if ((ret = dbenv->set_app_dispatch( + dbenv, db_printlog_print_app_record)) != 0) { + dbenv->err(dbenv, ret, "app_dispatch"); + goto err; + } + + /* + * An environment is required, but as all we're doing is reading log + * files, we create one if it doesn't already exist. If we create + * it, create it private so it automatically goes away when we're done. + * If we are reading the replication database, do not open the env + * with logging, because we don't want to log the opens. + */ + if (repflag) { + if ((ret = dbenv->open(dbenv, home, + DB_INIT_MPOOL | DB_USE_ENVIRON, 0)) != 0 && + (ret == DB_VERSION_MISMATCH || + (ret = dbenv->open(dbenv, home, + DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE | DB_USE_ENVIRON, 0)) + != 0)) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + goto err; + } + } else if ((ret = dbenv->open(dbenv, home, DB_USE_ENVIRON, 0)) != 0 && + (ret == DB_VERSION_MISMATCH || + (ret = dbenv->open(dbenv, home, + DB_CREATE | DB_INIT_LOG | DB_PRIVATE | DB_USE_ENVIRON, 0)) != 0)) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + goto err; + } + env = dbenv->env; + + /* Allocate a log cursor. */ + if (repflag) { + if ((ret = open_rep_db(dbenv, &dbp, &dbc)) != 0) + goto err; + } else if ((ret = dbenv->log_cursor(dbenv, &logc, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->log_cursor"); + goto err; + } + + if (IS_ZERO_LSN(start)) { + memset(&keydbt, 0, sizeof(keydbt)); + logcflag = rflag ? DB_PREV : DB_NEXT; + } else { + key = start; + logcflag = DB_SET; + } + memset(&data, 0, sizeof(data)); + + /* + * If we're using the repflag, we're immediately initializing + * the print table. Use the current version. If we're printing + * the log then initialize version to 0 so that we get the + * correct version right away. + */ + if (repflag) + version = DB_LOGVERSION; + else + version = 0; + ZERO_LSN(verslsn); + + /* Initialize print callbacks if repflag. */ + if (repflag && + (ret = env_init_print(env, version, &dtab)) != 0) { + dbenv->err(dbenv, ret, DB_STR("5011", + "callback: initialization")); + goto err; + } + for (; !__db_util_interrupted(); logcflag = rflag ? DB_PREV : DB_NEXT) { + if (repflag) { + ret = dbc->get(dbc, &keydbt, &data, logcflag); + if (ret == 0) + key = ((__rep_control_args *)keydbt.data)->lsn; + } else + ret = logc->get(logc, &key, &data, logcflag); + if (ret != 0) { + if (ret == DB_NOTFOUND) + break; + dbenv->err(dbenv, + ret, repflag ? "DBC->get" : "DB_LOGC->get"); + goto err; + } + + /* + * We may have reached the end of the range we're displaying. + */ + if (!IS_ZERO_LSN(stop)) { + cmp = LOG_COMPARE(&key, &stop); + if ((rflag && cmp < 0) || (!rflag && cmp > 0)) + break; + } + if (!repflag && key.file != verslsn.file) { + /* + * If our log file changed, we need to see if the + * version of the log file changed as well. + * If it changed, reset the print table. + */ + if ((ret = logc->version(logc, &newversion, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_LOGC->version"); + goto err; + } + if (version != newversion) { + version = newversion; + if ((ret = env_init_print(env, version, + &dtab)) != 0) { + dbenv->err(dbenv, ret, DB_STR("5012", + "callback: initialization")); + goto err; + } + } + } + + ret = __db_dispatch(env, + &dtab, &data, &key, DB_TXN_PRINT, (void*)&dblog); + + /* + * XXX + * Just in case the underlying routines don't flush. + */ + (void)fflush(stdout); + + if (ret != 0) { + dbenv->err(dbenv, ret, DB_STR("5013", + "tx: dispatch")); + goto err; + } + } + + if (0) { +err: exitval = 1; + } + + /* + * Call __db_close to free the dummy DB handles that were used + * by the print routines. + */ + for (i = 0; i < dblog.dbentry_cnt; i++) + if (dblog.dbentry[i].dbp != NULL) + (void)__db_close(dblog.dbentry[i].dbp, NULL, DB_NOSYNC); + if (env != NULL && dblog.dbentry != NULL) + __os_free(env, dblog.dbentry); + if (logc != NULL && (ret = logc->close(logc, 0)) != 0) + exitval = 1; + + if (dbc != NULL && (ret = dbc->close(dbc)) != 0) + exitval = 1; + + if (dbp != NULL && (ret = dbp->close(dbp, 0)) != 0) + exitval = 1; + + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + } + + if (passwd != NULL) + free(passwd); + + /* Resend any caught signal. */ + __db_util_sigresend(); + + return (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* + * env_init_print -- + */ +int +env_init_print(env, version, dtabp) + ENV *env; + u_int32_t version; + DB_DISTAB *dtabp; +{ + int ret; + + /* + * We need to prime the print table with the current print + * functions. Then we overwrite only specific entries based on + * each previous version we support. + */ + if ((ret = __bam_init_print(env, dtabp)) != 0) + goto err; + if ((ret = __crdel_init_print(env, dtabp)) != 0) + goto err; + if ((ret = __db_init_print(env, dtabp)) != 0) + goto err; + if ((ret = __dbreg_init_print(env, dtabp)) != 0) + goto err; + if ((ret = __fop_init_print(env, dtabp)) != 0) + goto err; +#ifdef HAVE_HASH + if ((ret = __ham_init_print(env, dtabp)) != 0) + goto err; +#endif +#ifdef HAVE_HEAP + if ((ret = __heap_init_print(env, dtabp)) != 0) + goto err; +#endif +#ifdef HAVE_QUEUE + if ((ret = __qam_init_print(env, dtabp)) != 0) + goto err; +#endif +#ifdef HAVE_REPLICATION_THREADS + if ((ret = __repmgr_init_print(env, dtabp)) != 0) + goto err; +#endif + if ((ret = __txn_init_print(env, dtabp)) != 0) + goto err; + + /* + * There are no log differences between 5.0 and 5.2, but 5.2 + * is a superset of 5.0. Patch 2 of 4.8 added __db_pg_trunc + * but didn't alter any log records so we want the same + * override as 4.8 + */ + if (version > DB_LOGVERSION_48p2) + goto done; + if ((ret = env_init_print_48(env, dtabp)) != 0) + goto err; + if (version >= DB_LOGVERSION_48) + goto done; + if ((ret = env_init_print_47(env, dtabp)) != 0) + goto err; + if (version == DB_LOGVERSION_47) + goto done; + /* + * There are no log record/recovery differences between 4.4 and 4.5. + * The log version changed due to checksum. There are no log recovery + * differences between 4.5 and 4.6. The name of the rep_gen in + * txn_checkpoint changed (to spare, since we don't use it anymore). + */ + if (version >= DB_LOGVERSION_44) + goto done; + if ((ret = env_init_print_43(env, dtabp)) != 0) + goto err; + if (version == DB_LOGVERSION_43) + goto done; + if (version != DB_LOGVERSION_42) { + __db_errx(env, DB_STR_A("5014", + "Unknown version %lu", "%lu"), (u_long)version); + ret = EINVAL; + goto err; + } + ret = env_init_print_42(env, dtabp); +done: +err: return (ret); +} + +int +env_init_print_42(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __db_relink_42_print, DB___db_relink_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_alloc_42_print, DB___db_pg_alloc_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_free_42_print, DB___db_pg_free_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_freedata_42_print, DB___db_pg_freedata_42)) != 0) + goto err; +#if HAVE_HASH + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_metagroup_42_print, DB___ham_metagroup_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_groupalloc_42_print, DB___ham_groupalloc_42)) != 0) + goto err; +#endif + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_ckp_42_print, DB___txn_ckp_42)) != 0) + goto err; +err: + return (ret); +} + +int +env_init_print_43(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_relink_43_print, DB___bam_relink_43)) != 0) + goto err; + /* + * We want to use the 4.2-based txn_regop record. + */ + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_regop_42_print, DB___txn_regop_42)) != 0) + goto err; + +err: + return (ret); +} + +/* + * env_init_print_47 -- + * + */ +int +env_init_print_47(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_split_42_print, DB___bam_split_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_sort_44_print, DB___db_pg_sort_44)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_sort_44_print, DB___db_pg_sort_44)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_create_42_print, DB___fop_create_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_write_42_print, DB___fop_write_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_rename_42_print, DB___fop_rename_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __fop_rename_42_print, DB___fop_rename_noundo_46)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __txn_xa_regop_42_print, DB___txn_xa_regop_42)) != 0) + goto err; + +err: + return (ret); +} + +int +env_init_print_48(env, dtabp) + ENV *env; + DB_DISTAB *dtabp; +{ + int ret; + if ((ret = __db_add_recovery_int(env, dtabp, + __db_pg_sort_44_print, DB___db_pg_sort_44)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __db_addrem_42_print, DB___db_addrem_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __db_big_42_print, DB___db_big_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __bam_split_48_print, DB___bam_split_48)) != 0) + goto err; +#ifdef HAVE_HASH + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_insdel_42_print, DB___ham_insdel_42)) != 0) + goto err; + if ((ret = __db_add_recovery_int(env, dtabp, + __ham_replace_42_print, DB___ham_replace_42)) != 0) + goto err; +#endif + +err: + return (ret); +} + +int +usage() +{ + fprintf(stderr, "usage: %s %s\n", progname, + "[-NrV] [-b file/offset] [-e file/offset] [-h home] [-P password]"); + return (EXIT_FAILURE); +} + +int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5015", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d\n"), progname, + DB_VERSION_MAJOR, DB_VERSION_MINOR, + v_major, v_minor); + return (EXIT_FAILURE); + } + return (0); +} + +/* Print an unknown, application-specific log record as best we can. */ +int +db_printlog_print_app_record(dbenv, dbt, lsnp, op) + DB_ENV *dbenv; + DBT *dbt; + DB_LSN *lsnp; + db_recops op; +{ + u_int32_t i, rectype; + int ch; + + DB_ASSERT(dbenv->env, op == DB_TXN_PRINT); + + COMPQUIET(dbenv, NULL); + COMPQUIET(op, DB_TXN_PRINT); + + /* + * Fetch the rectype, which always must be at the beginning of the + * record (if dispatching is to work at all). + */ + memcpy(&rectype, dbt->data, sizeof(rectype)); + + /* + * Applications may wish to customize the output here based on the + * rectype. We just print the entire log record in the generic + * mixed-hex-and-printable format we use for binary data. + */ + printf(DB_STR_A("5016", + "[%lu][%lu]application specific record: rec: %lu\n", + "%lu %lu %lu"), (u_long)lsnp->file, (u_long)lsnp->offset, + (u_long)rectype); + printf(DB_STR("5017", "\tdata: ")); + for (i = 0; i < dbt->size; i++) { + ch = ((u_int8_t *)dbt->data)[i]; + printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); + } + printf("\n\n"); + + return (0); +} + +int +open_rep_db(dbenv, dbpp, dbcp) + DB_ENV *dbenv; + DB **dbpp; + DBC **dbcp; +{ + int ret; + + DB *dbp; + *dbpp = NULL; + *dbcp = NULL; + + if ((ret = db_create(dbpp, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + return (ret); + } + + dbp = *dbpp; + if ((ret = + dbp->open(dbp, NULL, REPDBNAME, NULL, DB_BTREE, 0, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->open"); + goto err; + } + + if ((ret = dbp->cursor(dbp, NULL, dbcp, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->cursor"); + goto err; + } + + return (0); + +err: if (*dbpp != NULL) + (void)(*dbpp)->close(*dbpp, 0); + return (ret); +} + +/* + * lsn_arg -- + * Parse a LSN argument. + */ +int +lsn_arg(arg, lsnp) + char *arg; + DB_LSN *lsnp; +{ + u_long uval; + char *p; + + /* + * Expected format is: lsn.file/lsn.offset. + */ + if ((p = strchr(arg, '/')) == NULL) + return (1); + *p = '\0'; + + if (__db_getulong(NULL, progname, arg, 0, UINT32_MAX, &uval)) + return (1); + lsnp->file = uval; + if (__db_getulong(NULL, progname, p + 1, 0, UINT32_MAX, &uval)) + return (1); + lsnp->offset = uval; + return (0); +} diff --git a/util/db_printlog/README b/util/db_printlog/README new file mode 100644 index 00000000..8e8279d1 --- /dev/null +++ b/util/db_printlog/README @@ -0,0 +1,34 @@ +# $Id$ + +Berkeley DB log dump utility. This utility dumps out a DB log in human +readable form, a record at a time, to assist in recovery and transaction +abort debugging. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +commit.awk Output transaction ID of committed transactions. + +count.awk Print out the number of log records for transactions + that we encountered. + +dbname.awk Take a comma-separated list of database names and spit + out all the log records that affect those databases. + +fileid.awk Take a comma-separated list of file numbers and spit out + all the log records that affect those file numbers. + +logstat.awk Display log record count/size statistics. + +pgno.awk Take a comma-separated list of page numbers and spit + out all the log records that affect those page numbers. + +range.awk Print out a range of the log. + +rectype.awk Print out a range of the log -- command line should + set RECTYPE to the a comma separated list of the + rectypes (or partial strings of rectypes) sought. + +status.awk Read through db_printlog output and list the transactions + encountered, and whether they committed or aborted. + +txn.awk Print out all the records for a comma-separated list of + transaction IDs. diff --git a/util/db_printlog/commit.awk b/util/db_printlog/commit.awk new file mode 100644 index 00000000..569d2382 --- /dev/null +++ b/util/db_printlog/commit.awk @@ -0,0 +1,7 @@ +# $Id$ +# +# Output tid of committed transactions. + +/txn_regop/ { + print $5 +} diff --git a/util/db_printlog/count.awk b/util/db_printlog/count.awk new file mode 100644 index 00000000..a5e87cc6 --- /dev/null +++ b/util/db_printlog/count.awk @@ -0,0 +1,9 @@ +# $Id$ +# +# Print out the number of log records for transactions that we +# encountered. + +/^\[/{ + if ($5 != 0) + print $5 +} diff --git a/util/db_printlog/dbname.awk b/util/db_printlog/dbname.awk new file mode 100644 index 00000000..126f454a --- /dev/null +++ b/util/db_printlog/dbname.awk @@ -0,0 +1,83 @@ +# $Id$ +# +# Take a comma-separated list of database names and spit out all the +# log records that affect those databases. +# Note: this will not start printing records until a dbreg_register +# record for that file is found. + +NR == 1 { + nfiles = 0 + while ((ndx = index(DBNAME, ",")) != 0) { + filenames[nfiles] = substr(DBNAME, 1, ndx - 1) "\\0"; + DBNAME = substr(DBNAME, ndx + 1, length(DBNAME) - ndx); + files[nfiles] = -1 + nfiles++ + } + filenames[nfiles] = DBNAME "\\0"; + files[nfiles] = -1 + myfile = -1; + nreg = 0; +} + +/^\[.*dbreg_register/ { + register = 1; +} +/opcode:/ { + if (register == 1) { + if ($2 == "CHKPNT" || $2 == "OPEN" || $2 == "PREOPEN" || $2 == "REOPEN") + register = 3; + } +} +/name:/ { + if (register >= 2) { + myfile = -2; + for (i = 0; i <= nfiles; i++) { + if ($5 == filenames[i]) { + if (register == 2) { + printme = 0; + myfile = -2; + } else { + myfile = i; + } + break; + } + } + } + register = 0; +} +/fileid:/{ + if (myfile == -2) + files[$2] = 0; + else if (myfile != -1) { + files[$2] = 1; + if ($2 > nreg) + nreg = $2; + printme = 1; + register = 0; + myfile = -1; + } else if ($2 <= nreg && files[$2] == 1) { + printme = 1 + } + myfile = -1; +} + +/^\[/{ + if (printme == 1) { + printf("%s\n", rec); + printme = 0 + } + rec = ""; + + rec = $0 +} + +TXN == 1 && /txn_regop/ {printme = 1} +/^ /{ + if (length(rec) + length($0) < 2040) + rec = sprintf("%s\n%s", rec, $0); +} + +END { + if (printme == 1) + printf("%s\n", rec); +} diff --git a/util/db_printlog/fileid.awk b/util/db_printlog/fileid.awk new file mode 100644 index 00000000..2fcf6598 --- /dev/null +++ b/util/db_printlog/fileid.awk @@ -0,0 +1,38 @@ +# $Id$ +# +# Take a comma-separated list of file numbers and spit out all the +# log records that affect those file numbers. + +NR == 1 { + nfiles = 0 + while ((ndx = index(FILEID, ",")) != 0) { + files[nfiles] = substr(FILEID, 1, ndx - 1); + FILEID = substr(FILEID, ndx + 1, length(FILEID) - ndx); + nfiles++ + } + files[nfiles] = FILEID; +} + +/^\[/{ + if (printme == 1) { + printf("%s\n", rec); + printme = 0 + } + rec = ""; + + rec = $0 +} +/^ /{ + if (length(rec) + length($0) < 2040) + rec = sprintf("%s\n%s", rec, $0); +} +/fileid/{ + for (i = 0; i <= nfiles; i++) + if ($2 == files[i]) + printme = 1 +} + +END { + if (printme == 1) + printf("%s\n", rec); +} diff --git a/util/db_printlog/logstat.awk b/util/db_printlog/logstat.awk new file mode 100644 index 00000000..b22a3f15 --- /dev/null +++ b/util/db_printlog/logstat.awk @@ -0,0 +1,36 @@ +# $Id$ +# +# Output accumulated log record count/size statistics. +BEGIN { + l_file = 0; + l_offset = 0; +} + +/^\[/{ + gsub("[][: ]", " ", $1) + split($1, a) + + if (a[1] == l_file) { + l[a[3]] += a[2] - l_offset + ++n[a[3]] + } else + ++s[a[3]] + + l_file = a[1] + l_offset = a[2] +} + +END { + # We can't figure out the size of the first record in each log file, + # use the average for other records we found as an estimate. + for (i in s) + if (s[i] != 0 && n[i] != 0) { + l[i] += s[i] * (l[i]/n[i]) + n[i] += s[i] + delete s[i] + } + for (i in l) + printf "%s: %d (n: %d, avg: %.2f)\n", i, l[i], n[i], l[i]/n[i] + for (i in s) + printf "%s: unknown (n: %d, unknown)\n", i, s[i] +} diff --git a/util/db_printlog/pgno.awk b/util/db_printlog/pgno.awk new file mode 100644 index 00000000..419568fa --- /dev/null +++ b/util/db_printlog/pgno.awk @@ -0,0 +1,56 @@ +# $Id$ +# +# Take a comma-separated list of page numbers and spit out all the +# log records that affect those page numbers. + +BEGIN { + INDX = -1 +} +NR == 1 { + npages = 0 + while ((ndx = index(PGNO, ",")) != 0) { + pgno[npages] = substr(PGNO, 1, ndx - 1); + PGNO = substr(PGNO, ndx + 1, length(PGNO) - ndx); + npages++ + } + pgno[npages] = PGNO; +} + +/^\[/{ + if (printme == 1) { + printf("%s\n", rec); + printme = 0 + } + rec = ""; + + rec = $0 +} +/^ /{ + if (length(rec) + length($0) < 2040) + rec = sprintf("%s\n%s", rec, $0); +} +/pgno/{ + for (i = 0; i <= npages; i++) + if ($2 == pgno[i]) + printme = 1 +} +/right/{ + for (i = 0; i <= npages; i++) + if ($2 == pgno[i]) + printme = 1 +} +/left/{ + for (i = 0; i <= npages; i++) + if ($2 == pgno[i]) + printme = 1 +} +/indx/{ + if (printme == 1 && INDX != -1) + if(INDX != $2) + printme = 0; +} + +END { + if (printme == 1) + printf("%s\n", rec); +} diff --git a/util/db_printlog/range.awk b/util/db_printlog/range.awk new file mode 100644 index 00000000..e2c01129 --- /dev/null +++ b/util/db_printlog/range.awk @@ -0,0 +1,27 @@ +# $Id$ +# +# Print out a range of the log + +/^\[/{ + l = length($1) - 1; + i = index($1, "]"); + file = substr($1, 2, i - 2); + file += 0; + start = i + 2; + offset = substr($1, start, l - start + 1); + i = index(offset, "]"); + offset = substr($1, start, i - 1); + offset += 0; + + if ((file == START_FILE && offset >= START_OFFSET || file > START_FILE)\ + && (file < END_FILE || (file == END_FILE && offset < END_OFFSET))) + printme = 1 + else if (file == END_FILE && offset > END_OFFSET || file > END_FILE) + exit + else + printme = 0 +} +{ + if (printme == 1) + print $0 +} diff --git a/util/db_printlog/rectype.awk b/util/db_printlog/rectype.awk new file mode 100644 index 00000000..b5da8fde --- /dev/null +++ b/util/db_printlog/rectype.awk @@ -0,0 +1,27 @@ +# $Id$ +# +# Print out a range of the log. +# Command line should set RECTYPE to a comma separated list +# of the rectypes (or partial strings of rectypes) sought. +NR == 1 { + ntypes = 0 + while ((ndx = index(RECTYPE, ",")) != 0) { + types[ntypes] = substr(RECTYPE, 1, ndx - 1); + RECTYPE = substr(RECTYPE, ndx + 1, length(RECTYPE) - ndx); + ntypes++ + } + types[ntypes] = RECTYPE; +} + +/^\[/{ + printme = 0 + for (i = 0; i <= ntypes; i++) + if (index($1, types[i]) != 0) { + printme = 1 + break; + } +} +{ + if (printme == 1) + print $0 +} diff --git a/util/db_printlog/status.awk b/util/db_printlog/status.awk new file mode 100644 index 00000000..7145d979 --- /dev/null +++ b/util/db_printlog/status.awk @@ -0,0 +1,50 @@ +# $Id$ +# +# Read through db_printlog output and list all the transactions encountered +# and whether they committed or aborted. +# +# 1 = started +# 2 = committed +# 3 = explicitly aborted +# 4 = other +BEGIN { + cur_txn = 0 +} +/^\[.*]\[/{ + in_regop = 0 + if (status[$5] == 0) { + status[$5] = 1; + txns[cur_txn] = $5; + cur_txn++; + } +} +/ child:/ { + txnid = substr($2, 3); + status[txnid] = 2; +} +/txn_regop/ { + txnid = $5 + in_regop = 1 +} +/opcode:/ { + if (in_regop == 1) { + if ($2 == 1) + status[txnid] = 2 + else if ($2 == 3) + status[txnid] = 3 + else + status[txnid] = 4 + } +} +END { + for (i = 0; i < cur_txn; i++) { + if (status[txns[i]] == 1) + printf("%s\tABORT\n", txns[i]); + else if (status[txns[i]] == 2) + printf("%s\tCOMMIT\n", txns[i]); + else if (status[txns[i]] == 3) + printf("%s\tABORT\n", txns[i]); + else if (status[txns[i]] == 4) + printf("%s\tOTHER\n", txns[i]); + } +} diff --git a/util/db_printlog/txn.awk b/util/db_printlog/txn.awk new file mode 100644 index 00000000..4da98554 --- /dev/null +++ b/util/db_printlog/txn.awk @@ -0,0 +1,35 @@ +# $Id$ +# +# Print out all the records for a comma-separated list of transaction ids. +NR == 1 { + ntxns = 0 + while ((ndx = index(TXN, ",")) != 0) { + txn[ntxns] = substr(TXN, 1, ndx - 1); + TXN = substr(TXN, ndx + 1, length(TXN) - ndx); + ntxns++ + } + txn[ntxns] = TXN; +} + +/^\[/{ + if (printme == 1) { + printf("%s\n", rec); + printme = 0 + } + rec = ""; + + for (i = 0; i <= ntxns; i++) + if (txn[i] == $5) { + rec = $0 + printme = 1 + } +} +/^ /{ + if (length(rec) + length($0) < 2040) + rec = sprintf("%s\n%s", rec, $0); +} + +END { + if (printme == 1) + printf("%s\n", rec); +} diff --git a/util/db_recover.c b/util/db_recover.c new file mode 100644 index 00000000..20550de9 --- /dev/null +++ b/util/db_recover.c @@ -0,0 +1,316 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +void db_recover_feedback __P((DB_ENV *, int, int)); +int main __P((int, char *[])); +int read_timestamp __P((char *, time_t *)); +int usage __P((void)); +int version_check __P((void)); + +const char *progname; +int newline_needed; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB_ENV *dbenv; + time_t timestamp; + u_int32_t flags; + int ch, exitval, fatal_recover, ret, retain_env, set_feedback, verbose; + char *home, *passwd; + + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + if ((ret = version_check()) != 0) + return (ret); + + home = passwd = NULL; + timestamp = 0; + exitval = fatal_recover = retain_env = set_feedback = verbose = 0; + while ((ch = getopt(argc, argv, "cefh:P:t:Vv")) != EOF) + switch (ch) { + case 'c': + fatal_recover = 1; + break; + case 'e': + retain_env = 1; + break; + case 'f': + set_feedback = 1; + break; + case 'h': + home = optarg; + break; + case 'P': + passwd = strdup(optarg); + memset(optarg, 0, strlen(optarg)); + if (passwd == NULL) { + fprintf(stderr, DB_STR_A("5021", + "%s: strdup: %s\n", "%s %s\n"), + progname, strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 't': + if ((ret = read_timestamp(optarg, ×tamp)) != 0) + return (ret); + break; + case 'V': + printf("%s\n", db_version(NULL, NULL, NULL)); + return (EXIT_SUCCESS); + case 'v': + verbose = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc != 0) + return (usage()); + + /* Handle possible interruptions. */ + __db_util_siginit(); + + /* + * Create an environment object and initialize it for error + * reporting. + */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + return (EXIT_FAILURE); + } + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + if (set_feedback) + (void)dbenv->set_feedback(dbenv, db_recover_feedback); + if (verbose) + (void)dbenv->set_verbose(dbenv, DB_VERB_RECOVERY, 1); + if (timestamp && + (ret = dbenv->set_tx_timestamp(dbenv, ×tamp)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->set_timestamp"); + goto err; + } + + if (passwd != NULL && (ret = dbenv->set_encrypt(dbenv, + passwd, DB_ENCRYPT_AES)) != 0) { + dbenv->err(dbenv, ret, "set_passwd"); + goto err; + } + + /* + * Initialize the environment -- we don't actually do anything + * else, that all that's needed to run recovery. + * + * Note that unless the caller specified the -e option, we use a + * private environment, as we're about to create a region, and we + * don't want to to leave it around. If we leave the region around, + * the application that should create it will simply join it instead, + * and will then be running with incorrectly sized (and probably + * terribly small) caches. Applications that use -e should almost + * certainly use DB_CONFIG files in the directory. + */ + flags = 0; + LF_SET(DB_CREATE | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN | DB_USE_ENVIRON); + LF_SET(fatal_recover ? DB_RECOVER_FATAL : DB_RECOVER); + LF_SET(retain_env ? DB_INIT_LOCK : DB_PRIVATE); + if ((ret = dbenv->open(dbenv, home, flags, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + goto err; + } + + if (0) { +err: exitval = 1; + } + + /* Flush to the next line of the output device. */ + if (newline_needed) + printf("\n"); + + /* Clean up the environment. */ + if ((ret = dbenv->close(dbenv, 0)) != 0) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + } + if (passwd != NULL) + free(passwd); + + /* Resend any caught signal. */ + __db_util_sigresend(); + + return (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* + * db_recover_feedback -- + * Provide feedback on recovery progress. + */ +void +db_recover_feedback(dbenv, opcode, percent) + DB_ENV *dbenv; + int opcode; + int percent; +{ + COMPQUIET(dbenv, NULL); + + if (opcode == DB_RECOVER) { + printf(DB_STR_A("5022", "\rrecovery %d%% complete", "%d"), + percent); + (void)fflush(stdout); + newline_needed = 1; + } +} + +#define ATOI2(ar) ((ar)[0] - '0') * 10 + ((ar)[1] - '0'); (ar) += 2; + +/* + * read_timestamp -- + * Convert a time argument to Epoch seconds. + * + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +int +read_timestamp(arg, timep) + char *arg; + time_t *timep; +{ + struct tm *t; + time_t now; + int yearset; + char *p; + /* Start with the current time. */ + (void)time(&now); + if ((t = localtime(&now)) == NULL) { + fprintf(stderr, DB_STR_A("5023", "%s: localtime: %s\n", + "%s %s\n"), progname, strerror(errno)); + return (EXIT_FAILURE); + } + /* [[CC]YY]MMDDhhmm[.SS] */ + if ((p = strchr(arg, '.')) == NULL) + t->tm_sec = 0; /* Seconds defaults to 0. */ + else { + if (strlen(p + 1) != 2) + goto terr; + *p++ = '\0'; + t->tm_sec = ATOI2(p); + } + + yearset = 0; + switch (strlen(arg)) { + case 12: /* CCYYMMDDhhmm */ + t->tm_year = ATOI2(arg); + t->tm_year *= 100; + yearset = 1; + /* FALLTHROUGH */ + case 10: /* YYMMDDhhmm */ + if (yearset) { + yearset = ATOI2(arg); + t->tm_year += yearset; + } else { + yearset = ATOI2(arg); + if (yearset < 69) + t->tm_year = yearset + 2000; + else + t->tm_year = yearset + 1900; + } + t->tm_year -= 1900; /* Convert to UNIX time. */ + /* FALLTHROUGH */ + case 8: /* MMDDhhmm */ + t->tm_mon = ATOI2(arg); + --t->tm_mon; /* Convert from 01-12 to 00-11 */ + t->tm_mday = ATOI2(arg); + t->tm_hour = ATOI2(arg); + t->tm_min = ATOI2(arg); + break; + default: + goto terr; + } + + t->tm_isdst = -1; /* Figure out DST. */ + + *timep = mktime(t); + if (*timep == -1) { +terr: fprintf(stderr, DB_STR_A("5024", + "%s: out of range or illegal time specification: [[CC]YY]MMDDhhmm[.SS]", + "%s"), progname); + return (EXIT_FAILURE); + } + return (0); +} + +int +usage() +{ + (void)fprintf(stderr, "usage: %s %s\n", progname, + "[-cefVv] [-h home] [-P password] [-t [[CC]YY]MMDDhhmm[.SS]]"); + return (EXIT_FAILURE); +} + +int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5025", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d\n"), progname, + DB_VERSION_MAJOR, DB_VERSION_MINOR, + v_major, v_minor); + return (EXIT_FAILURE); + } + return (0); +} diff --git a/util/db_replicate.c b/util/db_replicate.c new file mode 100644 index 00000000..be7be62f --- /dev/null +++ b/util/db_replicate.c @@ -0,0 +1,419 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +int main __P((int, char *[])); + +#ifndef HAVE_REPLICATION_THREADS +int +main(argc, argv) + int argc; + char *argv[]; +{ + fprintf(stderr, DB_STR_A("5092", + "Cannot run %s without Replication Manager.\n", "%s\n"), + argv[0]); + COMPQUIET(argc, 0); + exit (1); +} +#else + +static int usage __P((void)); +static int version_check __P((void)); +static void event_callback __P((DB_ENV *, u_int32_t, void *)); +static int db_replicate_logmsg __P((DB_ENV *, const char *)); +static void prog_close __P((DB_ENV *, int)); + +/* * Buffer for logging messages. */ +#define MSG_SIZE 256 +char log_msg[MSG_SIZE]; +char *logfile; +FILE *logfp; +pid_t pid; + +char progname[MSG_SIZE]; +int panic_exit; +#define REP_NTHREADS 3 +#define MAX_RETRY 3 + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB_ENV *dbenv; + time_t now; + long argval; + db_timeout_t max_req; + u_int32_t flags, repmgr_th, seconds, start_state; + int ch, count, done, exitval, ret, verbose; + char *home, *passwd, *prog, time_buf[CTIME_BUFLEN]; + + dbenv = NULL; + logfp = NULL; + log_msg[MSG_SIZE - 1] = '\0'; + __os_id(NULL, &pid, NULL); + panic_exit = 0; + + if ((prog = __db_rpath(argv[0])) == NULL) + prog = argv[0]; + else + ++prog; + + if ((size_t)(count = snprintf(progname, sizeof(progname), "%s(%lu)", + prog, (u_long)pid)) >= sizeof(progname)) { + fprintf(stderr, DB_STR("5093", "Program name too long\n")); + goto err; + } + if ((ret = version_check()) != 0) + goto err; + + /* + * !!! + * Don't allow a fully unsigned 32-bit number, some compilers get + * upset and require it to be specified in hexadecimal and so on. + */ +#define MAX_UINT32_T 2147483647 + + + /* + * Create an environment object and initialize it for error + * reporting. Create it before parsing args so that we can + * call methods to set the values directly. + */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + goto err; + } + + (void)dbenv->set_event_notify(dbenv, event_callback); + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + exitval = verbose = 0; + flags = 0; + home = logfile = passwd = NULL; + seconds = 30; + start_state = DB_REP_ELECTION; + repmgr_th = REP_NTHREADS; + while ((ch = getopt(argc, argv, "h:L:MP:T:t:Vv")) != EOF) + switch (ch) { + case 'h': + home = optarg; + break; + case 'L': + logfile = optarg; + break; + case 'M': + start_state = DB_REP_MASTER; + break; + case 'P': + passwd = strdup(optarg); + memset(optarg, 0, strlen(optarg)); + if (passwd == NULL) { + fprintf(stderr, DB_STR_A("5094", + "%s: strdup: %s\n", "%s %s\n"), + progname, strerror(errno)); + return (EXIT_FAILURE); + } + ret = dbenv->set_encrypt(dbenv, passwd, DB_ENCRYPT_AES); + free(passwd); + if (ret != 0) { + dbenv->err(dbenv, ret, "set_passwd"); + goto err; + } + break; + case 'T': + if (__db_getlong(NULL, progname, + optarg, 1, (long)MAX_UINT32_T, &argval)) + return (EXIT_FAILURE); + repmgr_th = (u_int32_t)argval; + break; + case 't': + if (__db_getlong(NULL, progname, + optarg, 1, (long)MAX_UINT32_T, &argval)) + return (EXIT_FAILURE); + seconds = (u_int32_t)argval; + break; + case 'V': + printf("%s\n", db_version(NULL, NULL, NULL)); + return (EXIT_SUCCESS); + case 'v': + verbose = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc != 0) + return (usage()); + + /* Handle possible interruptions. */ + __db_util_siginit(); + + /* + * Log our process ID. This is a specialized case of + * __db_util_logset because we retain the logfp and keep + * the file open for additional logging. + */ + if (logfile != NULL) { + if ((logfp = fopen(logfile, "w")) == NULL) + goto err; + if ((ret = db_replicate_logmsg(dbenv, "STARTED")) != 0) + goto err; + } + + /* + * If attaching to a pre-existing environment fails, error. + */ +#define ENV_FLAGS (DB_THREAD | DB_USE_ENVIRON) + if ((ret = dbenv->open(dbenv, home, ENV_FLAGS, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + goto err; + } + + /* + * Confirm that replication is configured in the underlying + * environment. We need the max request value anyway and + * the method to get the value returns an error if replication + * is not configured. + */ + if ((ret = dbenv->rep_get_request(dbenv, NULL, &max_req)) != 0) { + dbenv->err(dbenv, ret, "rep_get_request"); + goto err; + } + + /* + * Start replication. + */ + if (verbose && ((ret = dbenv->set_verbose(dbenv, + DB_VERB_REPLICATION, 1)) != 0)) { + dbenv->err(dbenv, ret, "set_verbose"); + goto err; + } + count = done = 0; + while (!done && count < MAX_RETRY) { + /* + * Retry if we get an error that indicates that the port is + * in use. An old version of this program could still be + * running. The application restarts with recovery, and that + * should panic the old environment, but it may take a little + * bit of time for the old program to notice the panic. + * + * We wait the max_req time because at worst the rerequest + * thread runs every max_req time and should notice a panic. On + * the other hand, if we're joining the replication group for + * the first time and the master is not available + * (DB_REP_UNAVAIL), it makes sense to pause a bit longer before + * retrying. + */ + if ((ret = dbenv->repmgr_start(dbenv, + repmgr_th, start_state)) == DB_REP_UNAVAIL) { + count++; + __os_yield(dbenv->env, 5, 0); + } else if (ret != 0) { + count++; + __os_yield(dbenv->env, 0, max_req); + } else + done = 1; + } + if (!done) { + dbenv->err(dbenv, ret, "repmgr_start"); + goto err; + } + + /* Main loop of the program. */ + while (!__db_util_interrupted() && !panic_exit) { + /* + * The program itself does not have much to do. All the + * interesting replication stuff is happening underneath. + * Each period, we'll wake up and call rep_flush just to + * force a log record and cause any gaps to fill as well as + * check program status to see if it was interrupted. + */ + __os_yield(dbenv->env, seconds, 0); + if (verbose) { + (void)time(&now); + dbenv->errx(dbenv, DB_STR_A("5095", + "db_replicate begin: %s", "%s"), + __os_ctime(&now, time_buf)); + } + + /* + * Hmm, do we really want to exit on error here? This is + * a non-essential piece of the program, so if it gets + * an error, we may just want to ignore it. Note we call + * rep_flush without checking if we're a master or client. + */ + if ((ret = dbenv->rep_flush(dbenv)) != 0) { + dbenv->err(dbenv, ret, "rep_flush"); + goto err; + } + } + + if (panic_exit) +err: exitval = 1; + + prog_close(dbenv, exitval); + return (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +static void +prog_close(dbenv, exitval) + DB_ENV *dbenv; + int exitval; +{ + int ret; + + if (logfp != NULL) { + fclose(logfp); + (void)remove(logfile); + } + /* Clean up the environment. */ + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + } + + /* Resend any caught signal. */ + __db_util_sigresend(); + + exit (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +static void +event_callback(dbenv, which, info) + DB_ENV *dbenv; + u_int32_t which; + void *info; +{ + COMPQUIET(info, NULL); + switch (which) { + case DB_EVENT_PANIC: + /* + * If the app restarted with recovery, and we're an old + * program running against the old regions, we'll discover + * the panic and want to exit quickly to give a new + * instantiation of the program access to the port. + */ + printf(DB_STR("5096", "received panic event\n")); + db_replicate_logmsg(dbenv, "PANIC"); + panic_exit = 1; + break; + + case DB_EVENT_REP_CLIENT: + db_replicate_logmsg(dbenv, "CLIENT"); + break; + + case DB_EVENT_REP_CONNECT_BROKEN: + db_replicate_logmsg(dbenv, "CONNECTIONBROKEN"); + break; + + case DB_EVENT_REP_DUPMASTER: + db_replicate_logmsg(dbenv, "DUPMASTER"); + break; + + case DB_EVENT_REP_ELECTED: + db_replicate_logmsg(dbenv, "ELECTED"); + break; + + case DB_EVENT_REP_MASTER: + db_replicate_logmsg(dbenv, "MASTER"); + break; + + case DB_EVENT_REP_NEWMASTER: + db_replicate_logmsg(dbenv, "NEWMASTER"); + break; + + case DB_EVENT_REP_STARTUPDONE: + db_replicate_logmsg(dbenv, "STARTUPDONE"); + break; + + case DB_EVENT_REP_CONNECT_ESTD: + case DB_EVENT_REP_CONNECT_TRY_FAILED: + case DB_EVENT_REP_INIT_DONE: + case DB_EVENT_REP_LOCAL_SITE_REMOVED: + case DB_EVENT_REP_PERM_FAILED: + case DB_EVENT_REP_SITE_ADDED: + case DB_EVENT_REP_SITE_REMOVED: + /* We don't care about these, for now. */ + break; + + default: + db_replicate_logmsg(dbenv, "IGNORED"); + dbenv->errx(dbenv, DB_STR_A("5097", "ignoring event %d", + "%d"), which); + } +} + +static int +usage() +{ + (void)fprintf(stderr, "usage: %s [-MVv]\n\t%s\n", progname, +"[-h home] [-P password] [-T nthreads] [-t seconds]"); + return (EXIT_FAILURE); +} + +static int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5098", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d\n"), progname, + DB_VERSION_MAJOR, DB_VERSION_MINOR, + v_major, v_minor); + return (EXIT_FAILURE); + } + return (0); +} + +static int +db_replicate_logmsg(dbenv, msg) + DB_ENV *dbenv; + const char *msg; +{ + time_t now; + int cnt; + char time_buf[CTIME_BUFLEN]; + + if (logfp == NULL) + return (0); + + (void)time(&now); + (void)__os_ctime(&now, time_buf); + if ((size_t)(cnt = snprintf(log_msg, sizeof(log_msg), "%s: %lu %s %s", + progname, (u_long)pid, time_buf, msg)) >= sizeof(log_msg)) { + dbenv->errx(dbenv, DB_STR_A("5099", + "%s: %lu %s %s: message too long", "%s %lu %s %s"), + progname, (u_long)pid, time_buf, msg); + return (1); + } + fprintf(logfp, "%s\n", log_msg); + return (0); +} +#endif diff --git a/util/db_stat.c b/util/db_stat.c new file mode 100644 index 00000000..a2f3943a --- /dev/null +++ b/util/db_stat.c @@ -0,0 +1,477 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +typedef enum { T_NOTSET, T_DB, + T_ENV, T_LOCK, T_LOG, T_MPOOL, T_MUTEX, T_REP, T_TXN } test_t; + +int db_init __P((DB_ENV *, char *, test_t, u_int32_t, int *)); +int main __P((int, char *[])); +int usage __P((void)); +int version_check __P((void)); + +const char *progname; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB_ENV *dbenv; + DB *dbp; + test_t ttype; + u_int32_t cache, flags; + int ch, exitval; + int nflag, private, resize, ret; + char *db, *home, *p, *passwd, *subdb; + + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + if ((ret = version_check()) != 0) + return (ret); + + dbenv = NULL; + dbp = NULL; + ttype = T_NOTSET; + cache = MEGABYTE; + exitval = flags = nflag = private = 0; + db = home = passwd = subdb = NULL; + + while ((ch = getopt(argc, + argv, "aC:cd:Eefgh:L:lM:mNP:R:rs:tVxX:Z")) != EOF) + switch (ch) { + case 'a': + LF_SET(DB_STAT_ALLOC); + break; + case 'C': case 'c': + if (ttype != T_NOTSET && ttype != T_LOCK) + goto argcombo; + ttype = T_LOCK; + if (ch != 'c') + for (p = optarg; *p; ++p) + switch (*p) { + case 'A': + LF_SET(DB_STAT_ALL); + break; + case 'c': + LF_SET(DB_STAT_LOCK_CONF); + break; + case 'l': + LF_SET(DB_STAT_LOCK_LOCKERS); + break; + case 'm': /* Backward compatible. */ + break; + case 'o': + LF_SET(DB_STAT_LOCK_OBJECTS); + break; + case 'p': + LF_SET(DB_STAT_LOCK_PARAMS); + break; + default: + return (usage()); + } + break; + case 'd': + if (ttype != T_NOTSET && ttype != T_DB) + goto argcombo; + ttype = T_DB; + db = optarg; + break; + case 'E': case 'e': + if (ttype != T_NOTSET && ttype != T_ENV) + goto argcombo; + ttype = T_ENV; + LF_SET(DB_STAT_SUBSYSTEM); + if (ch == 'E') + LF_SET(DB_STAT_ALL); + break; + case 'f': + if (ttype != T_NOTSET && ttype != T_DB) + goto argcombo; + ttype = T_DB; + LF_SET(DB_FAST_STAT); + break; + case 'h': + home = optarg; + break; + case 'L': case 'l': + if (ttype != T_NOTSET && ttype != T_LOG) + goto argcombo; + ttype = T_LOG; + if (ch != 'l') + for (p = optarg; *p; ++p) + switch (*p) { + case 'A': + LF_SET(DB_STAT_ALL); + break; + default: + return (usage()); + } + break; + case 'M': case 'm': + if (ttype != T_NOTSET && ttype != T_MPOOL) + goto argcombo; + ttype = T_MPOOL; + if (ch != 'm') + for (p = optarg; *p; ++p) + switch (*p) { + case 'A': + LF_SET(DB_STAT_ALL); + break; + case 'h': + LF_SET(DB_STAT_MEMP_HASH); + break; + case 'm': /* Backward compatible. */ + break; + default: + return (usage()); + } + break; + case 'N': + nflag = 1; + break; + case 'P': + passwd = strdup(optarg); + memset(optarg, 0, strlen(optarg)); + if (passwd == NULL) { + fprintf(stderr, DB_STR_A("5005", + "%s: strdup: %s\n", "%s %s\n"), + progname, strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 'R': case 'r': + if (ttype != T_NOTSET && ttype != T_REP) + goto argcombo; + ttype = T_REP; + if (ch != 'r') + for (p = optarg; *p; ++p) + switch (*p) { + case 'A': + LF_SET(DB_STAT_ALL); + break; + default: + return (usage()); + } + break; + case 's': + if (ttype != T_NOTSET && ttype != T_DB) + goto argcombo; + ttype = T_DB; + subdb = optarg; + break; + case 't': + if (ttype != T_NOTSET) { +argcombo: fprintf(stderr, DB_STR_A("5006", + "%s: illegal option combination\n", + "%s\n"), progname); + return (usage()); + } + ttype = T_TXN; + break; + case 'V': + printf("%s\n", db_version(NULL, NULL, NULL)); + return (EXIT_SUCCESS); + case 'X': case 'x': + if (ttype != T_NOTSET && ttype != T_MUTEX) + goto argcombo; + ttype = T_MUTEX; + if (ch != 'x') + for (p = optarg; *p; ++p) + switch (*p) { + case 'A': + LF_SET(DB_STAT_ALL); + break; + default: + return (usage()); + } + break; + case 'Z': + LF_SET(DB_STAT_CLEAR); + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + switch (ttype) { + case T_DB: + if (db == NULL) + return (usage()); + break; + case T_ENV: + case T_LOCK: + case T_LOG: + case T_MPOOL: + case T_MUTEX: + case T_REP: + case T_TXN: + break; + case T_NOTSET: + return (usage()); + } + + if (LF_ISSET(DB_STAT_ALL | DB_STAT_ALLOC) == DB_STAT_ALLOC) + return (usage()); + + /* Handle possible interruptions. */ + __db_util_siginit(); + + /* + * Create an environment object and initialize it for error + * reporting. + */ +retry: if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + goto err; + } + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + if (nflag) { + if ((ret = dbenv->set_flags(dbenv, DB_NOLOCKING, 1)) != 0) { + dbenv->err(dbenv, ret, "set_flags: DB_NOLOCKING"); + goto err; + } + if ((ret = dbenv->set_flags(dbenv, DB_NOPANIC, 1)) != 0) { + dbenv->err(dbenv, ret, "set_flags: DB_NOPANIC"); + goto err; + } + } + + if (passwd != NULL && + (ret = dbenv->set_encrypt(dbenv, passwd, DB_ENCRYPT_AES)) != 0) { + dbenv->err(dbenv, ret, "set_passwd"); + goto err; + } + + /* Initialize the environment. */ + if (db_init(dbenv, home, ttype, cache, &private) != 0) + goto err; + + switch (ttype) { + case T_DB: + /* Create the DB object and open the file. */ + if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + goto err; + } + + /* + * We open the database for writing so we can update the cached + * statistics, but it's OK to fail, we can open read-only and + * proceed. + * + * Turn off error messages for now -- we can't open lots of + * databases read-write (for example, master databases and + * hash databases for which we don't know the hash function). + */ + dbenv->set_errfile(dbenv, NULL); + ret = dbp->open(dbp, NULL, db, subdb, DB_UNKNOWN, 0, 0); + dbenv->set_errfile(dbenv, stderr); + if (ret != 0) { + /* Handles cannot be reused after a failed DB->open. */ + (void)dbp->close(dbp, 0); + if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create"); + goto err; + } + + if ((ret = dbp->open(dbp, + NULL, db, subdb, DB_UNKNOWN, DB_RDONLY, 0)) != 0) { + dbenv->err(dbenv, ret, "DB->open: %s", db); + goto err; + } + } + + /* Check if cache is too small for this DB's pagesize. */ + if (private) { + if ((ret = __db_util_cache(dbp, &cache, &resize)) != 0) + goto err; + if (resize) { + (void)dbp->close(dbp, DB_NOSYNC); + dbp = NULL; + + (void)dbenv->close(dbenv, 0); + dbenv = NULL; + goto retry; + } + } + + if (dbp->stat_print(dbp, flags)) + goto err; + break; + case T_ENV: + if (dbenv->stat_print(dbenv, flags)) + goto err; + break; + case T_LOCK: + if (dbenv->lock_stat_print(dbenv, flags)) + goto err; + break; + case T_LOG: + if (dbenv->log_stat_print(dbenv, flags)) + goto err; + break; + case T_MPOOL: + if (dbenv->memp_stat_print(dbenv, flags)) + goto err; + break; + case T_MUTEX: + if (dbenv->mutex_stat_print(dbenv, flags)) + goto err; + break; + case T_REP: +#ifdef HAVE_REPLICATION_THREADS + if (dbenv->repmgr_stat_print(dbenv, flags)) + goto err; +#endif + if (dbenv->rep_stat_print(dbenv, flags)) + goto err; + break; + case T_TXN: + if (dbenv->txn_stat_print(dbenv, flags)) + goto err; + break; + case T_NOTSET: + dbenv->errx(dbenv, DB_STR("5007", + "Unknown statistics flag")); + goto err; + } + + if (0) { +err: exitval = 1; + } + if (dbp != NULL && (ret = dbp->close(dbp, DB_NOSYNC)) != 0) { + exitval = 1; + dbenv->err(dbenv, ret, DB_STR("5008", "close")); + } + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + } + + if (passwd != NULL) + free(passwd); + + /* Resend any caught signal. */ + __db_util_sigresend(); + + return (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* + * db_init -- + * Initialize the environment. + */ +int +db_init(dbenv, home, ttype, cache, is_private) + DB_ENV *dbenv; + char *home; + test_t ttype; + u_int32_t cache; + int *is_private; +{ + u_int32_t oflags; + int ret; + + /* + * If our environment open fails, and we're trying to look at a + * shared region, it's a hard failure. + * + * We will probably just drop core if the environment we join does + * not include a memory pool. This is probably acceptable; trying + * to use an existing environment that does not contain a memory + * pool to look at a database can be safely construed as operator + * error, I think. + */ + *is_private = 0; + if ((ret = dbenv->open(dbenv, home, DB_USE_ENVIRON, 0)) == 0) + return (0); + if (ret == DB_VERSION_MISMATCH) + goto err; + if (ttype != T_DB && ttype != T_LOG) { + dbenv->err(dbenv, ret, "DB_ENV->open%s%s", + home == NULL ? "" : ": ", home == NULL ? "" : home); + return (1); + } + + /* + * We're looking at a database or set of log files and no environment + * exists. Create one, but make it private so no files are actually + * created. Declare a reasonably large cache so that we don't fail + * when reporting statistics on large databases. + * + * An environment is required to look at databases because we may be + * trying to look at databases in directories other than the current + * one. + */ + if ((ret = dbenv->set_cachesize(dbenv, 0, cache, 1)) != 0) { + dbenv->err(dbenv, ret, "set_cachesize"); + return (1); + } + *is_private = 1; + oflags = DB_CREATE | DB_PRIVATE | DB_USE_ENVIRON; + if (ttype == T_DB) + oflags |= DB_INIT_MPOOL; + if (ttype == T_LOG) + oflags |= DB_INIT_LOG; + if ((ret = dbenv->open(dbenv, home, oflags, 0)) == 0) + return (0); + + /* An environment is required. */ +err: dbenv->err(dbenv, ret, "DB_ENV->open"); + return (1); +} + +int +usage() +{ + fprintf(stderr, "usage: %s %s\n", progname, + "-d file [-fN] [-h home] [-P password] [-s database]"); + fprintf(stderr, "usage: %s %s\n\t%s\n", progname, + "[-cEelmrtVx] [-C Aclop]", + "[-h home] [-L A] [-M A] [-P password] [-R A] [-X A] [-aNZ]"); + return (EXIT_FAILURE); +} + +int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5009", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d\n"), progname, + DB_VERSION_MAJOR, DB_VERSION_MINOR, + v_major, v_minor); + return (EXIT_FAILURE); + } + return (0); +} diff --git a/util/db_stat/dd.sh b/util/db_stat/dd.sh new file mode 100644 index 00000000..f6a1117d --- /dev/null +++ b/util/db_stat/dd.sh @@ -0,0 +1,82 @@ +#! /bin/sh +# +# Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. +# +# Display environment's deadlocks based on "db_stat -Co" output. + +t1=__a +t2=__b + +trap 'rm -f $t1 $t2; exit 0' 0 1 2 3 13 15 + +if [ $# -ne 1 ]; then + echo "Usage: dd.sh [db_stat -Co output]" + exit 1 +fi + +if `egrep '\.*\' $1 > /dev/null`; then + n=`egrep '\.*\' $1 | wc -l | awk '{print $1}'` + echo "dd.sh: $1: $n page locks in a WAIT state." +else + echo "dd.sh: $1: No page locks in a WAIT state found." + exit 1 +fi + +# Print out list of node wait states, and output cycles in the graph. +egrep '\.*\' $1 | awk '{print $1 " " $5 " " $7}' | +while read l f p; do + for i in `egrep "\.*\<$f\>.*\.*\<$p\>" $1 | + awk '{print $1}'`; do + echo "$l $i" + done +done | tsort > /dev/null 2>$t1 + +# Display the locks in a single cycle. +c=1 +display_one() { + if [ -s $1 ]; then + echo "Deadlock #$c ============" + c=`expr $c + 1` + cat $1 | sort -n +6 + :> $1 + fi +} + +# Display the locks in all of the cycles. +# +# Requires tsort output some text before each list of nodes in the cycle, +# and the actual node displayed on the line be the second (white-space) +# separated item on the line. For example: +# +# tsort: cycle in data +# tsort: 8000177f +# tsort: 80001792 +# tsort: 80001774 +# tsort: cycle in data +# tsort: 80001776 +# tsort: 80001793 +# tsort: cycle in data +# tsort: 8000176a +# tsort: 8000178a +# +# XXX +# Currently, db_stat doesn't display the implicit wait relationship between +# parent and child transactions, where the parent won't release a lock until +# the child commits/aborts. This means the deadlock where parent holds a +# lock, thread A waits on parent, child waits on thread A won't be shown. +if [ -s $t1 ]; then + :>$t2 + while read a b; do + case $b in + [0-9]*) + egrep $b $1 >> $t2;; + *) + display_one $t2;; + esac + done < $t1 + display_one $t2 +else + echo 'No deadlocks found.' +fi + +exit 0 diff --git a/util/db_tuner.c b/util/db_tuner.c new file mode 100644 index 00000000..0ff51215 --- /dev/null +++ b/util/db_tuner.c @@ -0,0 +1,1336 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + * + * This utility choses a reasonable pagesize for a BTREE database. + * + * Here we assume that: + * 1) This set of records are already in a BTREE database, which may been + * configured with a unreasonable page size. + * 2) Treat the database as if it were compacted. + * 3) The goal is to optimize the database for the current content, + * rather than for ongoing insertions. + * + * The page size of a BTREE can be 512, 1024, 2048, 4096, 8192, 16384, + * 32768, 65536, totally 8 different cases. So we have 8 possible BTREE, + * each with different pagesize to contain them. Without actually creating + * those 8 databases, this utility tries to simulate situations, that is, + * for each pagesize, how many leaf pages, over flow pages and duplicate + * pages are needed, and what's the distribution of each kind of pages + * based on their fill factor. + * + * db_tuner contains 2 parts: + * + * I) Simulation of 8 different pagesized databases. + * This includes, the number of leaf pages, overflow pages caused by + * big key/data in leaf layers, and duplicate pages, the distribution + * of each kind of pages in different fill factor ranges. + * + * This is achieved by retrieving those records from existing btree and + * inserting them into different kind of pages. Since the records from + * the btree are sorted, they are inserted into the end of each page. + * If this page become full, that is no enough space, only this new + * record will be put into next new page. + * + * II) Recommend the best page size. + * From our simulation results, this utility choose a page size based on + * the number of overflow pages and storage (on-disk space). + * If there is no overflow pages, then choose the one resulting in + * the smallest storage as the recommended page size. Otherwise, + * choose the one that results in a reasonable small number of overflow pages. + */ + +#include "db_config.h" + +#include +#include + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +/* + * Fill factor distribution division, + * e.g., [0.000 - 0.099], ..., [0.900 - 0.999], [1.000- 1.000] + */ +#define DIST_DIVISION 11 + +/* Error return code in db_tuner, different with those in src\dbinc\db.in. */ +/* Dist >= DIST_DIVISION. */ +#define EXIT_DIST_OUTRANGE (-31000) +/* "Insert" zero needed. */ +#define EXIT_INSERT_ZERO_NEED (-31001) +/* On-page duplicate set = 0. */ +#define EXIT_INSERT_ZERO_ONPGDUP (-31002) + +/* Follows are some special "insert" types based on the data. */ +/* Insert as normal case. */ +#define INSERT_NORMAL 0x0001 +/* Nothing is inserted. */ +#define INSERT_NOTHING 0x0002 +/* No key but a slot is inserted. */ +#define INSERT_SLOT 0x0003 +/* A B_DUPLICATE point is inserted. */ +#define INSERT_BDUPLICATE 0x0004 + +/* + * Page size of BTREE can be from DB_MIN_PGSIZE (512) to DB_MAX_PGSIZE(64K), + * and all the pagesize is the power of 2, so we have 8 possible cases. + * + * 8 is from ((int)(log(DB_MAX_PGSIZE) - log(DB_MIN_PGSIZE) + 1)). + */ +#define NUM_PGSIZES 8 + +/* Structure used to store statistics of the assessment. */ +typedef struct __tuner_ff_stat { + uintmax_t pgsize_leaf_dist[NUM_PGSIZES][DIST_DIVISION]; + uintmax_t pgsize_ovfl_dist[NUM_PGSIZES][DIST_DIVISION]; + uintmax_t pgsize_dup_dist[NUM_PGSIZES][DIST_DIVISION]; + + /* Info used to track stats across page in a traverse. */ + u_int32_t pg_leaf_offset[NUM_PGSIZES]; + u_int32_t pg_dup_offset[NUM_PGSIZES]; +}TUNER_FF_STAT; + +static int __tuner_analyze_btree __P((DB_ENV *, DB *, u_int32_t)); +static int __tuner_ff_stat_callback __P((DBC *, PAGE *, void *, int *)); +static int __tuner_generate_fillfactor_stats __P((DB_ENV *, DB *, + TUNER_FF_STAT *)); +static int __tuner_insert_dupdata __P((DB *, u_int32_t, int, TUNER_FF_STAT *)); +static int __tuner_insert_kvpair __P((DB *, u_int32_t, u_int32_t, int, int, + int, TUNER_FF_STAT *)); +static int __tuner_leaf_page __P((DBC *, PAGE *, TUNER_FF_STAT *)); +static int __tuner_leaf_dupdata __P((DBC*, PAGE *, int, int, u_int32_t, + TUNER_FF_STAT *)); +static int __tuner_leaf_dupdata_entries __P((DBC *, PAGE *, int, int, int, int, + TUNER_FF_STAT *)); +static int __tuner_opd_data_entries __P((DBC *, PAGE *, int, int, + TUNER_FF_STAT *)); +static int __tuner_opd_data __P((DBC *, PAGE *, int, int, TUNER_FF_STAT *)); +static int __tuner_opd_page __P((DBC *, PAGE *, TUNER_FF_STAT *)); +static int __tuner_print_btree_fillfactor __P((u_int32_t, TUNER_FF_STAT *)); +static int __tuner_record_dup_pg __P((int, TUNER_FF_STAT *)); +static int __tuner_record_last_opd __P((int, TUNER_FF_STAT *)); +static int __tuner_record_leaf_pg __P((int, TUNER_FF_STAT *)); +static int __tuner_record_ovfl_pg __P((u_int32_t, int, TUNER_FF_STAT *)); + +static int get_opd_size __P((DBC*, PAGE*, u_int32_t*)); +static int item_size __P((DB *, PAGE *, db_indx_t)); +static int item_space __P((DB *, PAGE *, db_indx_t)); +int main __P((int, char *[])); +static int open_db __P((DB **, DB_ENV *, char *, char *)); +static int sum_opd_page_data_entries __P((DB *, PAGE *)); +static int usage __P((void)); +static int version_check __P((void)); + +const char *progname = "db_tuner"; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB *dbp; + DB_ENV *dbenv; + DBTYPE dbtype; + char *dbname, *home, *subdb; + int ch, is_set_dbfile, ret; + u_int32_t cachesize, verbose; + + if ((ret = version_check()) != 0) + return (ret); + + dbenv = NULL; + dbp = NULL; + cachesize = 0; + dbname = home = subdb = NULL; + is_set_dbfile = verbose = 0; + dbtype = DB_UNKNOWN; + + while ((ch = getopt(argc, argv, "c:d:h:vs:")) != EOF) + switch (ch) { + case 'c': + cachesize = atoi(optarg); + break; + case 'd': + dbname = optarg; + is_set_dbfile = 1; + break; + case 'h': + home = optarg; + break; + case 's': + subdb = optarg; + break; + case 'v': + verbose = 1; + break; + default: + usage(); + } + + /* Handle possible interruptions. */ + __db_util_siginit(); + + if (!is_set_dbfile) + usage(); + + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, "%s: db_env_create: %s\n", + progname, db_strerror(ret)); + goto err; + } + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + if ((cachesize != 0) && (ret = + dbenv->set_cachesize(dbenv, (u_int32_t)0, cachesize, 1)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->set_cachesize:"); + goto err; + } + + /* + * If attaching to a pre-existing environment fails, create a + * private one and try again. + */ + if ((ret = dbenv->open(dbenv, home, DB_USE_ENVIRON, 0)) != 0 && + (ret == DB_VERSION_MISMATCH || (ret = dbenv->open(dbenv, home, + DB_CREATE | DB_INIT_MPOOL | DB_USE_ENVIRON | DB_PRIVATE, + 0)) != 0)) { + dbenv->err(dbenv, ret, "DB_ENV->open:"); + goto err; + } + + if ((ret = open_db(&dbp, dbenv, dbname, subdb)) != 0) { + dbenv->err(dbenv, ret, "open_db:"); + goto err; + } + + if ((ret = dbp->get_type(dbp, &dbtype)) != 0) { + dbenv->err(dbenv, ret, "DB->get_type:"); + goto err; + } + + switch (dbtype) { + case DB_BTREE: + if ((ret = __tuner_analyze_btree(dbenv, dbp, verbose)) != 0) + dbenv->err(dbenv, ret, "__tuner_analyze_btree fails."); + break; + default: + dbenv->errx(dbenv, DB_STR("5001", + "%s: Unsupported database type"), progname); + } + +err: + if (dbp != NULL && (ret = dbp->close(dbp, 0)) != 0) + dbenv->err(dbenv, ret, "DB->close: %s", dbname); + + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) + fprintf(stderr, "%s: dbenv->close: %s", progname, + db_strerror(ret)); + + /* Resend any caught signal. */ + __db_util_sigresend(); + + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* + * Generate the simulated statistics for each different btree pagesize, + * then print out this information if verbose enabled, finally make our + * recommendation of our best pagesize based on our simulated results. + */ +static int +__tuner_analyze_btree(dbenv, dbp, verbose) + DB_ENV *dbenv; + DB *dbp; + u_int32_t verbose; +{ + TUNER_FF_STAT stats; + int ret; + + memset(&stats, 0, sizeof(TUNER_FF_STAT)); + + if ((ret = __tuner_generate_fillfactor_stats(dbenv, dbp, + &stats)) != 0) { + dbenv->err(dbenv, ret, + "__tuner_generate_fillfactor_stats fails."); + return (ret); + } + + (void)__tuner_print_btree_fillfactor(verbose, &stats); + + return (EXIT_SUCCESS); +} + +/* Traverse the database to gather simulated statistics for each pagesize.*/ +static int +__tuner_generate_fillfactor_stats(dbenv, dbp, stats) + DB_ENV *dbenv; + DB *dbp; + TUNER_FF_STAT *stats; +{ + DBC *dbc; + int i, ret, t_ret; + + ret = t_ret = 0; + + if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->cursor:"); + return (ret); + } + + /* + * Call the internal Berkeley DB function, that triggers a callback + * for each page in a btree database. + */ + if ((ret = __bam_traverse(dbc, DB_LOCK_READ, PGNO_INVALID, + __tuner_ff_stat_callback, (void *)stats)) != 0) { + dbenv->err(dbenv, ret, "__bam_traverse:"); + goto err; + } + + /* + * Record the last simulated page for leaf and dup page, + * which ensure at least one page is used. + */ + for (i = 0; i < NUM_PGSIZES; ++i) { + if (stats->pg_leaf_offset[i] > 0 && + (ret = __tuner_record_leaf_pg(i, stats)) != 0) { + dbenv->err(dbenv, ret, "__tuner_record_leaf"); + break; + } + + if (stats->pg_dup_offset[i] > 0 && + (ret = __tuner_record_dup_pg(i, stats)) != 0) { + dbenv->err(dbenv, ret, "__tuner_record_dup_pg"); + break; + } + } + +err: + if(dbc != NULL && (t_ret = dbc->close(dbc)) != 0) + dbenv->err(dbenv, t_ret, "DBC->close:"); + + if (ret == 0 && t_ret != 0) + ret = t_ret; + + return (ret); +} + +/* + * This callback is used in __bam_traverse. When traversing each page in + * the BTREE, it retrieves each record for simulation. + */ +static int +__tuner_ff_stat_callback(dbc, h, cookie, putp) + DBC *dbc; + PAGE *h; + void *cookie; + int *putp; +{ + DB_ENV *dbenv; + int ret; + + dbenv = dbc->dbenv; + *putp = 0; + + switch (TYPE(h)) { + case P_LBTREE: + if ((ret = __tuner_leaf_page(dbc, h, cookie)) != 0) { + dbenv->err(dbenv, ret, "__tuner_leaf_page"); + return (ret); + } + break; + case P_LDUP: + case P_LRECNO: + /* Coming a new off-page duplicate set.*/ + if (h->prev_pgno == PGNO_INVALID && + (ret = __tuner_opd_page(dbc, h, cookie)) != 0) { + dbenv->err(dbenv, ret, "__tuner_opd_page:"); + return (ret); + } + break; + case P_IBTREE: + case P_IRECNO: + case P_OVERFLOW: + break; + default: + return (EXIT_FAILURE); + } + + return (EXIT_SUCCESS); +} + +/* + * Deal with the leaf page of existing database. This includes: + * 1: determine the on-page duplicate set, and calculate its total size + * 2: decise where should this set go (on-page or off-page) in the later + * simulation stage and do some "movement". + * 3: "move" the unique key data pairs to the simulated leaf pages. + */ +static int +__tuner_leaf_page(dbc, h, cookie) + DBC *dbc; + PAGE *h; + TUNER_FF_STAT *cookie; +{ + DB *dbp; + DB_ENV *dbenv; + db_indx_t findx, lindx, indx, *inp, top; + u_int32_t data_sz, key_sz, onpd_sz; + int i, ret, in_data_type; + + dbp = dbc->dbp; + dbenv = dbp->dbenv; + /* + * Use some macros from db_page.h to retrieve information from the + * page. P_INP retrieves the offset to the start of page index array. + * NUM_ENT retrieves the number of items on the page. + */ + inp = P_INP(dbp, h); + top = NUM_ENT(h); + ret = 0; + + for (indx = 0; indx < top;) { + /* + * If on-page duplicate, first calculate the total size, + * including one key and all data. + */ + onpd_sz = 0; + if ((indx + P_INDX) < top && inp[indx] == inp[indx + P_INDX]) { + + /* Ignore deleted items. */ + if (B_DISSET(GET_BKEYDATA(dbp, h, indx)->type)) + continue; + + /* Count the key once. */ + onpd_sz += item_space(dbp, h, indx); + + for (findx = indx; + indx < top && inp[findx] == inp[indx]; + indx += P_INDX) { + /* Ignore deleted items. */ + if (B_DISSET(GET_BKEYDATA(dbp, h, + indx + O_INDX)->type)) + continue; + /* Count all the data items. */ + onpd_sz += item_space(dbp, h, indx + O_INDX); + } + + /*Indx range of on-page duplicate set: [findx, lindx)*/ + lindx = indx; + + if (onpd_sz == 0) + return (EXIT_INSERT_ZERO_ONPGDUP); + + /* "Move" on-page duplicate set to simualted pages.*/ + if ((ret = __tuner_leaf_dupdata(dbc, h, findx, lindx, + onpd_sz, cookie)) != 0) { + dbenv->err(dbenv, ret, "__tuner_leaf_dupdata"); + return (ret); + } + } else { + in_data_type = INSERT_NORMAL; + /* Ignore deleted items. */ + if (B_DISSET(GET_BKEYDATA(dbp, h, indx)->type)) + continue; + + /* First consider key. */ + key_sz = item_size(dbp, h, indx); + + /* Ignore deleted items. */ + if (B_DISSET(GET_BKEYDATA(dbp, h, indx + O_INDX)->type)) + continue; + + /* next consider data.*/ + if (B_TYPE(GET_BKEYDATA(dbp, h, + indx + O_INDX)->type) == B_DUPLICATE) { + /* + * Off-page duplicate set is not handled here + * but on the duplicate pages. + * Here the key is inserted into "simulated" + * leaf_page. + */ + in_data_type = INSERT_NOTHING; + data_sz = 0; + } else + data_sz = item_size(dbp, h, indx + O_INDX); + + for (i = 0; i < NUM_PGSIZES; ++i) { + if ((ret = __tuner_insert_kvpair(dbp, key_sz, + data_sz, i, INSERT_NORMAL, in_data_type, + cookie)) != 0) { + dbenv->err(dbenv, ret, + "__tuner_insert_kvpair"); + break; + } + } + + indx += P_INDX; + } + } + + return (ret); +} + +/* + * "Move" the on-page duplicate data set from the specific page to our + * simulated databases (Indx range of this on-page duplicate set: + * [findx, lindx)), it includes following steps: + * + * First check where should this set go (on-page or off-page duplicate tree). + * + * This is determined as "If total size of duplicate data set is more than 25% + * of a specific page size, then this set go to off-page duplicate tree. + * Otherwise, it goes to on-page duplicate. " + * + * Then "move" this duplicate set to our simulated pages in each simulated + * database. + */ +static int +__tuner_leaf_dupdata(dbc, h, findx, lindx, dup_sz, cookie) + DBC *dbc; + PAGE *h; + int findx, lindx; + u_int32_t dup_sz; + TUNER_FF_STAT *cookie; +{ + DB_ENV *dbenv; + int i, is_opd, ret; + u_int32_t pgsize; + + dbenv = dbc->dbenv; + + for (i = 0; i < NUM_PGSIZES; ++i) { + pgsize = (1 << i) * DB_MIN_PGSIZE; + + /* Check whether this duplicate set go to opd? */ + is_opd = (dup_sz < (pgsize / 4)) ? 0 : 1; + + /* "Move" this on-page duplicate to our simulated pages. */ + if ((ret = __tuner_leaf_dupdata_entries(dbc, h, findx, + lindx, i, is_opd, cookie)) != 0) { + dbenv->err(dbenv, ret, "__tuner_leaf_dupdata_entries"); + return (ret); + } + + /* + * Record the last simulated duplicate pages for a finished + * off-page duplicate set then reset the offset to zero + * for next opd set. + */ + if (is_opd && + (ret = __tuner_record_last_opd(i, cookie)) != 0) { + dbenv->err(dbenv, ret, "__tuner_record_last_opd"); + return (ret); + } + } + + return (EXIT_SUCCESS); +} + +/* + * "Move" the on-page duplicate set [findx, lindx) on the specific page to + * simulated database with pagesize = (1 << indx_pgsz) * DB_MIN_PGSIZE; + */ +static int +__tuner_leaf_dupdata_entries(dbc, h, findx, lindx, indx_pgsz, is_opd, cookie) + DBC *dbc; + PAGE *h; + int findx, lindx, indx_pgsz, is_opd; + TUNER_FF_STAT *cookie; +{ + DB *dbp; + DB_ENV *dbenv; + db_indx_t indx; + u_int32_t data_sz, key_sz; + int ret, in_key_type; + + dbp = dbc->dbp; + dbenv = dbp->dbenv; + + for (indx = findx; indx < lindx; indx += P_INDX) { + + key_sz = 0; + in_key_type = INSERT_SLOT; + /* + * For on-page duplicate data, the key is inserted once, + * then its corresponding data. + */ + if (indx == findx) { + /* Ignore deleted items. */ + if (B_DISSET(GET_BKEYDATA(dbp, h, indx)->type)) + continue; + + key_sz = item_size(dbp, h, indx); + in_key_type = INSERT_NORMAL; + + /* + * If is_opd, then insert a key + B_DUPLICATE pair for + * this on-page duplicate to simulated leaf page. + * INSERT_BDUPLICATE: B_DUPLICATE point. + */ + if (is_opd && (ret = + __tuner_insert_kvpair(dbp, key_sz, 0, indx_pgsz, + INSERT_NORMAL, INSERT_BDUPLICATE, cookie)) != 0) { + dbenv->err(dbenv, ret, "__tuner_insert_kvpair"); + return (ret); + } + } + + /* Ignore deleted items. */ + if (B_DISSET(GET_BKEYDATA(dbp, h, indx + O_INDX)->type)) + continue; + + data_sz = item_size(dbp, h, indx + O_INDX); + + if (is_opd) { + ret = __tuner_insert_dupdata(dbp, data_sz, + indx_pgsz, cookie); + if (ret != 0) { + dbenv->err(dbenv, ret, + "__tuner_insert_dupdata"); + return (ret); + } + } else { + ret = __tuner_insert_kvpair(dbp, key_sz, data_sz, + indx_pgsz, in_key_type, INSERT_NORMAL, + cookie); + + if (ret != 0) { + dbenv->err(dbenv, ret, "__tuner_insert_kvpair"); + return (ret); + } + } + } + return (EXIT_SUCCESS); +} + +/* Tuner the off-page duplicate pages from existing database. */ +static int +__tuner_opd_page(dbc, h, cookie) + DBC *dbc; + PAGE *h; + TUNER_FF_STAT *cookie; +{ + DB_ENV *dbenv; + u_int32_t opd_sz, pgsize; + int i, is_opd, ret; + + dbenv = dbc->dbenv; + ret = opd_sz = 0; + + /* 1st calculate the total size of the duplicate set. */ + if ((ret = get_opd_size(dbc, h, &opd_sz)) != 0) { + dbenv->err(dbenv, ret, "get_opd_size:"); + return (ret); + } + + /* 2nd insert this set into "simulated" pages for each page size.*/ + for (i = 0; i < NUM_PGSIZES; ++i) { + pgsize = (1 << i) * DB_MIN_PGSIZE; + + /* Check whether this duplicate set go to opd? */ + is_opd = (opd_sz < (pgsize / 4)) ? 0 : 1; + + if ((ret = __tuner_opd_data(dbc, h, i, is_opd, cookie)) != 0) { + dbenv->err(dbenv, ret, "__tuner_opd_data:"); + break; + } + } + + return (ret); +} + +/* "Move" all the off-page duplicate data into simulated on-page or off-page.*/ +static int +__tuner_opd_data(dbc, h, indx_pgsz, is_opd, cookie) + DBC *dbc; + PAGE *h; + int indx_pgsz, is_opd; + TUNER_FF_STAT *cookie; +{ + DB *dbp; + DB_ENV *dbenv; + DB_MPOOLFILE *mpf; + PAGE *p; + db_pgno_t next_pgno; + u_int32_t pgsize; + int ret; + + dbp = dbc->dbp; + dbenv = dbp->dbenv; + mpf = dbp->mpf; + + p = h; + pgsize = (1 << indx_pgsz) * DB_MIN_PGSIZE; + + /* + * __tuner_leaf_page has inserted one key for each opd already, + * so here only a B_DUPLICATE point is inserted into simulate + * leaf page if this duplicate set goes to on-page. + */ + if (is_opd) { + ret = __tuner_insert_kvpair(dbp, 0, 0, indx_pgsz, + INSERT_NOTHING, INSERT_BDUPLICATE, cookie); + + if (ret!= 0) { + dbenv->err(dbenv, ret, "__tuner_insert_kvpair"); + return (ret); + } + } + + /* Next insert all the data of this duplicate set. */ + while (1) { + ret = __tuner_opd_data_entries(dbc, h, indx_pgsz, is_opd, + cookie); + if (ret != 0) { + dbenv->err(dbenv, ret, "__tuner_opd_data_entries"); + return (ret); + } + + next_pgno = p->next_pgno; + + if (p != h && (ret = mpf->put(mpf, p, dbc->priority, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_MPOOLFILE->put:"); + return (ret); + } + + if (next_pgno == PGNO_INVALID) + break; + + if ((ret = mpf->get(mpf, &next_pgno, dbc->txn, 0, &p)) != 0) { + dbenv->err(dbenv, ret, "DB_MPOOLFILE->get:"); + return (ret); + } + } + + /* Record the last simulate duplicate page if goto off-page duplicate*/ + if (is_opd && (ret = __tuner_record_last_opd(indx_pgsz, cookie)) != 0) { + dbenv->err(dbenv, ret, "__tuner_record_last_opd"); + return (ret); + } + + return (EXIT_SUCCESS); +} + +/* + * "Move" the off-page duplicate data set to our simulated on-page or + * off-page. + */ +static int +__tuner_opd_data_entries(dbc, h, indx_pgsz, is_opd, cookie) + DBC *dbc; + PAGE *h; + int indx_pgsz, is_opd; + TUNER_FF_STAT *cookie; +{ + DB *dbp; + DB_ENV *dbenv; + db_indx_t indx; + u_int32_t data_sz; + int ret; + + dbp = dbc->dbp; + dbenv = dbp->dbenv; + + for (indx = 0; indx < NUM_ENT(h); indx += O_INDX) { + /* Ignore deleted items. */ + if (B_DISSET(GET_BKEYDATA(dbp, h, indx)->type)) + continue; + + data_sz = item_size(dbp, h, indx); + + if (is_opd) { + ret = __tuner_insert_dupdata(dbp, data_sz, + indx_pgsz, cookie); + + if (ret != 0) { + dbenv->err(dbenv, ret, + "__tuner_insert_dupdata"); + return (ret); + } + } else { + /* + * __tuner_leaf_page has inserted one key for each + * opd already (this will insert moment later), + * so only data items and key slots are inserted. + */ + ret = __tuner_insert_kvpair(dbp, 0, data_sz, + indx_pgsz, INSERT_SLOT, INSERT_NORMAL, + cookie); + + if (ret != 0) { + dbenv->err(dbenv, ret, + "__tuner_insert_kvpair"); + return (ret); + } + } + } + return (EXIT_SUCCESS); +} + +/* + * Try to insert a key and data pair into simulated leaf pages. + * Key and data pairs are always stored (or referenced) on the same leaf page. + */ +static int +__tuner_insert_kvpair(dbp, key_sz, data_sz, indx_pgsz, in_key, in_data, stats) + DB *dbp; + u_int32_t key_sz, data_sz; + int indx_pgsz, in_key, in_data; + TUNER_FF_STAT *stats; +{ + DB_ENV *dbenv; + int is_big_data, is_big_key, ret; + u_int32_t needed, pgsize; + + dbenv = dbp->dbenv; + is_big_data = is_big_key = 0; + needed = 0; + pgsize = (1 << indx_pgsz) * DB_MIN_PGSIZE; + + if (key_sz > B_MINKEY_TO_OVFLSIZE(dbp, 2, pgsize)) + is_big_key = 1; + + if (data_sz > B_MINKEY_TO_OVFLSIZE(dbp, 2, pgsize)) + is_big_data = 1; + + if (is_big_key) { + needed += BOVERFLOW_PSIZE; + + /* Add big key into ovfl pages. */ + if ((ret = + __tuner_record_ovfl_pg(key_sz, indx_pgsz, stats)) != 0) { + dbenv->err(dbenv, ret, "__tuner_record_ovfl_pg:key_sz"); + return (ret); + } + } else { + /* + * key_sz = INSERT_SLOT indicates no key is inserted + * but a slot in the inp array, e.g., on-page duplicate. + * key_sz = INSERT_NOTHING indicates no key no slot is + * inserted. + */ + if (in_key == INSERT_NOTHING) + needed += 0; + else if (in_key == INSERT_SLOT) + needed += sizeof(db_indx_t); + else if (in_key == INSERT_NORMAL) + needed += BKEYDATA_PSIZE(key_sz); + } + + if (is_big_data) { + needed += BOVERFLOW_PSIZE; + + /* Add big data into ovfl pages. */ + if ((ret = + __tuner_record_ovfl_pg(data_sz, indx_pgsz, stats)) != 0) { + dbenv->err(dbenv, ret, "__tuner_record_ovfl_pg"); + return (ret); + } + } else { + /* + * in_data = INSERT_BDUPLICATE indicates a B_DUPLICATE is + * inserted, e.g., off-page duplicate case. + * in_data = INSERT_NOTHING indicates nothing is inserted, + * happens when there is a key + B_DUPLICATE pair in + * __tuner_leaf_page, in which case, only the key is inserted + * but no data because the data will considered in + * __tuner_opd_page when an off-page + * duplicate set is coming. + */ + if (in_data == INSERT_NOTHING) + needed += 0; + else if (in_data == INSERT_BDUPLICATE) + needed += BOVERFLOW_PSIZE; + else if (in_data == INSERT_NORMAL) + needed += BKEYDATA_PSIZE(data_sz); + } + + if (needed == 0) + return (EXIT_INSERT_ZERO_NEED); + + /* 1st leaf page, add overhead size. */ + if (stats->pg_leaf_offset[indx_pgsz] == 0) + stats->pg_leaf_offset[indx_pgsz] = SIZEOF_PAGE; + + if ((stats->pg_leaf_offset[indx_pgsz] + needed) > pgsize) { + /* No enough space, then record current page info. */ + if ((ret = __tuner_record_leaf_pg(indx_pgsz, stats)) != 0) { + dbenv->err(dbenv, ret, "__tuner_record_leaf_pg"); + return (ret); + } + + /* Insert pair into new page. */ + stats->pg_leaf_offset[indx_pgsz] = needed + SIZEOF_PAGE; + } else + stats->pg_leaf_offset[indx_pgsz] += needed; + + return (EXIT_SUCCESS); +} + +/* Try to insert a duplicate data into simulated off duplicate pages. */ +static int +__tuner_insert_dupdata(dbp, data_sz, indx_pgsz, stats) + DB *dbp; + u_int32_t data_sz; + int indx_pgsz; + TUNER_FF_STAT *stats; +{ + DB_ENV *dbenv; + int is_big_data, ret; + u_int32_t needed, pgsize; + + dbenv = dbp->dbenv; + is_big_data = 0; + needed = 0; + pgsize = (1 << indx_pgsz) * DB_MIN_PGSIZE; + + if (data_sz > B_MINKEY_TO_OVFLSIZE(dbp, 2, pgsize)) + is_big_data = 1; + + if (is_big_data) { + needed = BOVERFLOW_PSIZE; + + /* Add big data into ovfl pages. */ + if ((ret = + __tuner_record_ovfl_pg(data_sz, indx_pgsz, stats)) != 0) { + dbenv->err(dbenv, ret, "__tuner_record_ovfl_pg"); + return (ret); + } + } else + needed += BKEYDATA_PSIZE(data_sz); + + if (needed == 0) + return (EXIT_INSERT_ZERO_NEED); + + /* 1st opd page, add overhead size. */ + if (stats->pg_dup_offset[indx_pgsz] == 0) + stats->pg_dup_offset[indx_pgsz] = SIZEOF_PAGE; + + if ((stats->pg_dup_offset[indx_pgsz] + needed) > pgsize) { + /* no enough space then record current page info. */ + if ((ret = __tuner_record_dup_pg(indx_pgsz, stats)) != 0) { + dbenv->err(dbenv, ret, "__tuner_record_dup_pg"); + return (ret); + } + + /* insert new item into new page. */ + stats->pg_dup_offset[indx_pgsz] = needed + SIZEOF_PAGE; + } else + stats->pg_dup_offset[indx_pgsz] += needed; + + return (EXIT_SUCCESS); +} + +/* Insert big item into simulated over flow pages. */ +static int +__tuner_record_ovfl_pg(size, indx_pgsz, stats) + u_int32_t size; + int indx_pgsz; + TUNER_FF_STAT *stats; +{ + u_int32_t pgsize = (1 << indx_pgsz) * DB_MIN_PGSIZE; + int dist; + + /* Update OVFLPAGE list: 1. Add to "full" ovfl pages.*/ + stats->pgsize_ovfl_dist[indx_pgsz][DIST_DIVISION - 1] += + (size / (pgsize - SIZEOF_PAGE)); + /* Update OVFLPAGE list: 2. Add the remainder.*/ + size = size % (pgsize - SIZEOF_PAGE); + dist = (int)(((double)(size + SIZEOF_PAGE) * + (DIST_DIVISION - 1)) / pgsize); + + /* assert(dist < DIST_DIVISION); */ + if (dist >= DIST_DIVISION) + return (EXIT_DIST_OUTRANGE); + + ++stats->pgsize_ovfl_dist[indx_pgsz][dist]; + + return (EXIT_SUCCESS); +} + +/* Record simulated leaf page if it has no space to contain new record. */ +static int +__tuner_record_leaf_pg(indx_pgsz, stats) + int indx_pgsz; + TUNER_FF_STAT *stats; +{ + int dist; + u_int32_t pgsize; + + pgsize = (1 << indx_pgsz) * DB_MIN_PGSIZE; + + /* First calculate its fill factor. */ + dist = (int)(((double)stats->pg_leaf_offset[indx_pgsz] * + (DIST_DIVISION - 1)) / pgsize); + + /* assert(dist < DIST_DIVISION); */ + if (dist >= DIST_DIVISION) + return (EXIT_DIST_OUTRANGE); + + /* Then add one page to its corresponding distribution. */ + ++stats->pgsize_leaf_dist[indx_pgsz][dist]; + + return (EXIT_SUCCESS); +} + +/* Record simulated duplicate page if it has no enough space for new record. */ +static int +__tuner_record_dup_pg(indx_pgsz, stats) + int indx_pgsz; + TUNER_FF_STAT *stats; +{ + int dist; + u_int32_t pgsize; + + pgsize = (1 << indx_pgsz) * DB_MIN_PGSIZE; + + /* First calculate its fill factor. */ + dist = (int)(((double)stats->pg_dup_offset[indx_pgsz] * + (DIST_DIVISION - 1)) / pgsize); + + /* assert(dist < DIST_DIVISION); */ + if (dist >= DIST_DIVISION) + return (EXIT_DIST_OUTRANGE); + + /* Then add one page to its corresponding distribution. */ + ++stats->pgsize_dup_dist[indx_pgsz][dist]; + + return (EXIT_SUCCESS); +} + +/* + * Record the last simulated duplicate page when an off-page duplicate set + * is finished, also reset its offset to be zero for next set. + */ +static int +__tuner_record_last_opd(indx_pgsz, stats) + int indx_pgsz; + TUNER_FF_STAT *stats; +{ + int ret; + if (stats->pg_dup_offset[indx_pgsz] != 0 && + (ret = __tuner_record_dup_pg(indx_pgsz, stats)) != 0) + return (ret); + + /* Reset offset to zero for new opd set. */ + stats->pg_dup_offset[indx_pgsz] = 0; + + return (EXIT_SUCCESS); +} + +/* + * When a new off-page duplicate set is coming, we first calculate its total + * size, which will be used to determine whether this set should go to on-page + * or off-page duplicate tree in our simulation part. + * + * As a off-page duplicate set is in a linked pages, we simply traverse this + * link and sum up all the size of each data in each page. + */ +static int +get_opd_size(dbc, h, opd_sz) + DBC *dbc; + PAGE *h; + u_int32_t *opd_sz; +{ + DB *dbp; + DB_ENV *dbenv; + DB_MPOOLFILE *mpf; + PAGE *p; + db_pgno_t next_pgno; + int ret; + u_int32_t dup_sz; + + dbp = dbc->dbp; + dbenv = dbp->dbenv; + mpf = dbp->mpf; + dup_sz = 0; + ret = 0; + p = h; + + while (1) { + dup_sz += sum_opd_page_data_entries(dbp, p); + + next_pgno = p->next_pgno; + if (p != h && (ret = + mpf->put(mpf, p, dbc->priority, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_MPOOLFILE->put:"); + return (ret); + } + + if (next_pgno == PGNO_INVALID) + break; + + if ((ret = + mpf->get(mpf, &next_pgno, dbc->txn, 0, &p)) != 0) { + dbenv->err(dbenv, ret, "DB_MPOOLFILE->get:"); + return (ret); + } + } + + *opd_sz = dup_sz; + + return (EXIT_SUCCESS); +} + +/* Sum up the space used to contain all the data in a specific page.*/ +static int +sum_opd_page_data_entries(dbp, h) + DB *dbp; + PAGE *h; +{ + db_indx_t i; + u_int32_t sz; + sz = 0; + + for (i = 0; i < NUM_ENT(h); i += O_INDX) { + /* Ignore deleted items. */ + if (B_DISSET(GET_BKEYDATA(dbp, h, i)->type)) + continue; + sz += item_space(dbp, h, i); + } + + return sz; +} + +/* The space used by one item in a page. */ +static int +item_space(dbp, h, indx) + DB *dbp; + PAGE *h; + db_indx_t indx; +{ + return (B_TYPE(GET_BKEYDATA(dbp, h, indx)->type) == B_KEYDATA ? + BKEYDATA_PSIZE(GET_BKEYDATA(dbp, h, indx)->len) : + BKEYDATA_PSIZE(GET_BOVERFLOW(dbp, h, indx)->tlen)); +} + +/* The actual length of a item. */ +static int +item_size(dbp, h, indx) + DB *dbp; + PAGE *h; + db_indx_t indx; +{ + return (B_TYPE(GET_BKEYDATA(dbp, h, indx)->type) == B_KEYDATA ? + GET_BKEYDATA(dbp, h, indx)->len : GET_BOVERFLOW(dbp, h, + indx)->tlen); +} + +/* Print out the information according to user's options. */ +static int +__tuner_print_btree_fillfactor(verbose, stats) + u_int32_t verbose; + TUNER_FF_STAT *stats; +{ + const char * DIVIDE_LINE1 = "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="; + const char * DIVIDE_LINE2 = "-----------|"; + const char * DIVIDE_LINE3 = "---------------------------------------"; + double shift_point; + int best_indx, i, j; + u_int32_t pgsize; + u_int64_t minispace, ttpgcnt[NUM_PGSIZES], ttspace[NUM_PGSIZES]; + uintmax_t dup_cnt[NUM_PGSIZES], leaf_cnt[NUM_PGSIZES], + ovfl_cnt[NUM_PGSIZES]; + + shift_point = 0.099; + best_indx = 0; + minispace = UINT64_MAX; + + for (i = 0; i < NUM_PGSIZES; ++i) { + pgsize = (1 << i) * DB_MIN_PGSIZE; + ovfl_cnt[i] = leaf_cnt[i] = dup_cnt[i] = ttpgcnt[i] = 0; + + for (j = 0; j < DIST_DIVISION; ++j) { + ovfl_cnt[i] += stats->pgsize_ovfl_dist[i][j]; + leaf_cnt[i] += stats->pgsize_leaf_dist[i][j]; + dup_cnt[i] += stats->pgsize_dup_dist[i][j]; + } + + ttpgcnt[i] = ovfl_cnt[i] + leaf_cnt[i] + dup_cnt[i]; + ttspace[i] = pgsize * ttpgcnt[i]; + } + + if (verbose == 1) { + printf("\n %50s \n", + "===========Simulation Results==========="); + printf("\n %s\n %s\n %s\n", + "leaf_pg:\t percentage of leaf page in that range", + "dup_pg:\t percentage of duplicate page in that range", + "ovfl_pg:\t percentage of over flow page in that range"); + + for (i = 0; i < NUM_PGSIZES; ++i) { + printf("\n\n%s%s\n", DIVIDE_LINE1, DIVIDE_LINE1); + printf("page size = %d\n", (1 << i) * DB_MIN_PGSIZE); + printf("%s%s\n", DIVIDE_LINE1, DIVIDE_LINE1); + printf("%s\n", DIVIDE_LINE3); + printf("%s %s %s %s\n", "fill factor", + "leaf_pg", "dup_pg", "ovfl_pg"); + + for (j = 0; j < DIST_DIVISION; ++j) { + if (j == (DIST_DIVISION - 1)) + shift_point = 0.000; + else + shift_point = 0.099; + + printf("\n[%2.1f-%4.3f]\t", + (double)j/(DIST_DIVISION - 1), + ((double)j/(DIST_DIVISION - 1) + + shift_point)); + + if (leaf_cnt[i] == 0 || + stats->pgsize_leaf_dist[i][j] == 0) + printf("%3.2f\t", (double)0); + else + printf("%3.2f%%\t", (double) + (stats->pgsize_leaf_dist[i][j] * + 100) / leaf_cnt[i]); + + if (dup_cnt[i] == 0 || + stats->pgsize_dup_dist[i][j] == 0) + printf("%3.2f\t", (double)0); + else + printf("%3.2f%%\t", (double) + (stats->pgsize_dup_dist[i][j] * + 100) / dup_cnt[i]); + + if (ovfl_cnt[i] == 0 || + stats->pgsize_ovfl_dist[i][j] == 0) + printf("%3.2f\t", (double)0); + else + printf("%3.2f%%\t", (double) + (stats->pgsize_ovfl_dist[i][j] * + 100) / ovfl_cnt[i]); + } + } + + printf("\n\n\n\n %55s\n\n", + "=====Summary of simulated statistic====="); + printf(" %s\n %s\n %s\n %s\n %s\n %s\n\n", + "pgsize: \tpage size", "storage: \ton-disk space", + "pgcnt: \ttotal number of all pages " + "(e.g, sum of ovflcnt, leafcnt, dupcnt)", + "ovflcnt: \tnumber of over flow pages", + "leafcnt: \tnumber of leaf pages", + "dupcnt: \tnumber of duplicate pages"); + printf("%s%s%s%s%s%s\n", DIVIDE_LINE2, DIVIDE_LINE2, + DIVIDE_LINE2, DIVIDE_LINE2, DIVIDE_LINE2, DIVIDE_LINE2); + printf(" %10s| %10s| %10s| %10s| %10s| %10s|\n", "pgsize", + "storage", "pgcnt", "ovflcnt", "leafcnt", "dupcnt"); + printf("%s%s%s%s%s%s\n", DIVIDE_LINE2, DIVIDE_LINE2, + DIVIDE_LINE2, DIVIDE_LINE2, DIVIDE_LINE2, DIVIDE_LINE2); + for (i = 0; i < NUM_PGSIZES; ++i) { + printf(" %10d|", (1 << i) * DB_MIN_PGSIZE); + printf(" %10u|", (u_int32_t)ttspace[i]); + if (ttspace[i] != (u_int32_t)ttspace[i]) + printf("(truncated value reported)"); + printf(" %10u|", (u_int32_t)ttpgcnt[i]); + if (ttpgcnt[i] != (u_int32_t)ttpgcnt[i]) + printf("(truncated value reported)"); + printf(" %10u|", (u_int32_t)ovfl_cnt[i]); + if (ovfl_cnt[i] != (u_int32_t)ovfl_cnt[i]) + printf("(truncated value reported)"); + printf(" %10u|", (u_int32_t)leaf_cnt[i]); + if (leaf_cnt[i] != (u_int32_t)leaf_cnt[i]) + printf("(truncated value reported)"); + printf(" %10u|", (u_int32_t)dup_cnt[i]); + if (dup_cnt[i] != (u_int32_t)dup_cnt[i]) + printf("(truncated value reported)"); + printf("\n"); + } + printf("%s%s%s%s%s%s\n", DIVIDE_LINE2, DIVIDE_LINE2, + DIVIDE_LINE2, DIVIDE_LINE2, DIVIDE_LINE2, DIVIDE_LINE2); + } + + /* + * Choose a page size based on the overflow calculation. If there + * is no overflow consideration, then use the smallest on-disk + * space as a recommended page size. + */ + if (ovfl_cnt[0] == 0) { + minispace = ttspace[0]; + for (i = 1; i < NUM_PGSIZES; ++i) + if ((ttspace[i] != 0) && (minispace > ttspace[i])) { + minispace = ttspace[i]; + best_indx = i; + } + } else + for (i = 1; i < NUM_PGSIZES; ++i) + if ((ovfl_cnt[i - 1] - ovfl_cnt[i]) > 0.02 * ttpgcnt[i]) + best_indx = i; + + printf("\n\nFor your input database, we recommend page size = %d \n \t" + "out of 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536 for you.\n", + (1 << best_indx) * DB_MIN_PGSIZE); + + return (EXIT_SUCCESS); +} + +/* Open the specific existing database. */ +static int +open_db(dbpp, dbenv, dbname, subdb) + DB **dbpp; + DB_ENV *dbenv; + char *dbname; + char *subdb; +{ + DB *dbp; + int ret = 0; + + if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "db_create fails.\n"); + return (ret); + } + + *dbpp = dbp; + + /* Open a database for read-only.*/ + if ((ret = + dbp->open(dbp, NULL, dbname, subdb, DB_UNKNOWN, DB_RDONLY, 0)) != 0) + dbenv->err(dbenv, ret, "DB->open"); + + return (ret); +} + +/* Usage flag information to indicate what can user query for given database.*/ +static int +usage() +{ + fprintf(stderr, "usage: %s %s\n", progname, + "[-c cachesize] -d file [-h home] [-s database] [-v verbose]"); + exit(EXIT_FAILURE); +} + +/*Check the verion of Berkeley DB libaray, make sure it is the right version.*/ +static int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5002", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d"), progname, DB_VERSION_MAJOR, + DB_VERSION_MINOR, v_major, v_minor); + + return (EXIT_FAILURE); + } + + return (EXIT_SUCCESS); +} diff --git a/util/db_upgrade.c b/util/db_upgrade.c new file mode 100644 index 00000000..de39c639 --- /dev/null +++ b/util/db_upgrade.c @@ -0,0 +1,199 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +int main __P((int, char *[])); +int usage __P((void)); +int version_check __P((void)); + +const char *progname; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB *dbp; + DB_ENV *dbenv; + u_int32_t flags; + int ch, exitval, nflag, ret, t_ret, verbose; + char *home, *passwd; + + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + if ((ret = version_check()) != 0) + return (ret); + + dbenv = NULL; + flags = nflag = verbose = 0; + exitval = 0; + home = passwd = NULL; + while ((ch = getopt(argc, argv, "h:NP:sVv")) != EOF) + switch (ch) { + case 'h': + home = optarg; + break; + case 'N': + nflag = 1; + break; + case 'P': + passwd = strdup(optarg); + memset(optarg, 0, strlen(optarg)); + if (passwd == NULL) { + fprintf(stderr, DB_STR_A("5018", + "%s: strdup: %s\n", "%s %s\n"), + progname, strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 's': + LF_SET(DB_DUPSORT); + break; + case 'V': + printf("%s\n", db_version(NULL, NULL, NULL)); + return (EXIT_SUCCESS); + case 'v': + verbose = 1; + break; + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc <= 0) + return (usage()); + + /* Handle possible interruptions. */ + __db_util_siginit(); + + /* + * Create an environment object and initialize it for error + * reporting. + */ + if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, "%s: db_env_create: %s\n", + progname, db_strerror(ret)); + goto err; + } + + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + + if (nflag) { + if ((ret = dbenv->set_flags(dbenv, DB_NOLOCKING, 1)) != 0) { + dbenv->err(dbenv, ret, "set_flags: DB_NOLOCKING"); + goto err; + } + if ((ret = dbenv->set_flags(dbenv, DB_NOPANIC, 1)) != 0) { + dbenv->err(dbenv, ret, "set_flags: DB_NOPANIC"); + goto err; + } + } + + if (passwd != NULL && (ret = dbenv->set_encrypt(dbenv, + passwd, DB_ENCRYPT_AES)) != 0) { + dbenv->err(dbenv, ret, "set_passwd"); + goto err; + } + + /* + * If attaching to a pre-existing environment fails, create a + * private one and try again. + */ + if ((ret = dbenv->open(dbenv, home, DB_USE_ENVIRON, 0)) != 0 && + (ret == DB_VERSION_MISMATCH || + (ret = dbenv->open(dbenv, home, + DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE | DB_USE_ENVIRON, + 0)) != 0)) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + goto err; + } + + for (; !__db_util_interrupted() && argv[0] != NULL; ++argv) { + if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_create: %s\n", progname, db_strerror(ret)); + goto err; + } + dbp->set_errfile(dbp, stderr); + dbp->set_errpfx(dbp, progname); + if ((ret = dbp->upgrade(dbp, argv[0], flags)) != 0) + dbp->err(dbp, ret, "DB->upgrade: %s", argv[0]); + if ((t_ret = dbp->close(dbp, 0)) != 0 && ret == 0) { + dbenv->err(dbenv, ret, "DB->close: %s", argv[0]); + ret = t_ret; + } + if (ret != 0) + goto err; + /* + * People get concerned if they don't see a success message. + * If verbose is set, give them one. + */ + if (verbose) + printf(DB_STR_A("5019", + "%s: %s upgraded successfully\n", + "%s %s\n"), progname, argv[0]); + } + + if (0) { +err: exitval = 1; + } + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + } + + if (passwd != NULL) + free(passwd); + + /* Resend any caught signal. */ + __db_util_sigresend(); + + return (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +int +usage() +{ + fprintf(stderr, "usage: %s %s\n", progname, + "[-NsVv] [-h home] [-P password] db_file ..."); + return (EXIT_FAILURE); +} + +int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5020", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d\n"), progname, DB_VERSION_MAJOR, + DB_VERSION_MINOR, v_major, v_minor); + return (EXIT_FAILURE); + } + return (0); +} diff --git a/util/db_verify.c b/util/db_verify.c new file mode 100644 index 00000000..736ecc47 --- /dev/null +++ b/util/db_verify.c @@ -0,0 +1,272 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996, 2011 Oracle and/or its affiliates. All rights reserved.\n"; +#endif + +int main __P((int, char *[])); +int usage __P((void)); +int version_check __P((void)); + +const char *progname; + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + DB *dbp, *dbp1; + DB_ENV *dbenv; + u_int32_t flags, cache; + int ch, exitval, nflag, private; + int quiet, resize, ret; + char *home, *passwd; + + if ((progname = __db_rpath(argv[0])) == NULL) + progname = argv[0]; + else + ++progname; + + if ((ret = version_check()) != 0) + return (ret); + + dbenv = NULL; + dbp = NULL; + cache = MEGABYTE; + exitval = nflag = quiet = 0; + flags = 0; + home = passwd = NULL; + while ((ch = getopt(argc, argv, "h:NoP:quV")) != EOF) + switch (ch) { + case 'h': + home = optarg; + break; + case 'N': + nflag = 1; + break; + case 'P': + passwd = strdup(optarg); + memset(optarg, 0, strlen(optarg)); + if (passwd == NULL) { + fprintf(stderr, "%s: strdup: %s\n", + progname, strerror(errno)); + return (EXIT_FAILURE); + } + break; + case 'o': + LF_SET(DB_NOORDERCHK); + break; + case 'q': + quiet = 1; + break; + case 'u': /* Undocumented. */ + LF_SET(DB_UNREF); + break; + case 'V': + printf("%s\n", db_version(NULL, NULL, NULL)); + return (EXIT_SUCCESS); + case '?': + default: + return (usage()); + } + argc -= optind; + argv += optind; + + if (argc <= 0) + return (usage()); + + /* Handle possible interruptions. */ + __db_util_siginit(); + + /* + * Create an environment object and initialize it for error + * reporting. + */ +retry: if ((ret = db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_env_create: %s\n", progname, db_strerror(ret)); + goto err; + } + + if (!quiet) { + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + } + + if (nflag) { + if ((ret = dbenv->set_flags(dbenv, DB_NOLOCKING, 1)) != 0) { + dbenv->err(dbenv, ret, "set_flags: DB_NOLOCKING"); + goto err; + } + if ((ret = dbenv->set_flags(dbenv, DB_NOPANIC, 1)) != 0) { + dbenv->err(dbenv, ret, "set_flags: DB_NOPANIC"); + goto err; + } + } + + if (passwd != NULL && + (ret = dbenv->set_encrypt(dbenv, passwd, DB_ENCRYPT_AES)) != 0) { + dbenv->err(dbenv, ret, "set_passwd"); + goto err; + } + /* + * Attach to an mpool if it exists, but if that fails, attach to a + * private region. In the latter case, declare a reasonably large + * cache so that we don't fail when verifying large databases. + */ + private = 0; + if ((ret = + dbenv->open(dbenv, home, DB_INIT_MPOOL | DB_USE_ENVIRON, 0)) != 0) { + if (ret != DB_VERSION_MISMATCH) { + if ((ret = + dbenv->set_cachesize(dbenv, 0, cache, 1)) != 0) { + dbenv->err(dbenv, ret, "set_cachesize"); + goto err; + } + private = 1; + ret = dbenv->open(dbenv, home, DB_CREATE | + DB_INIT_MPOOL | DB_PRIVATE | DB_USE_ENVIRON, 0); + } + if (ret != 0) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + goto err; + } + } + + /* + * Find out if we have a transactional environment so that we can + * make sure that we don't open the verify database with logging + * enabled. + */ + for (; !__db_util_interrupted() && argv[0] != NULL; ++argv) { + if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + dbenv->err(dbenv, ret, "%s: db_create", progname); + goto err; + } + + if (TXN_ON(dbenv->env) && + (ret = dbp->set_flags(dbp, DB_TXN_NOT_DURABLE)) != 0) { + dbenv->err( + dbenv, ret, "%s: db_set_flags", progname); + goto err; + } + + /* + * We create a 2nd dbp to this database to get its pagesize + * because the dbp we're using for verify cannot be opened. + * + * If the database is corrupted, we may not be able to open + * it, of course. In that case, just continue, using the + * cache size we have. + */ + if (private) { + if ((ret = db_create(&dbp1, dbenv, 0)) != 0) { + dbenv->err( + dbenv, ret, "%s: db_create", progname); + goto err; + } + + if (TXN_ON(dbenv->env) && (ret = + dbp1->set_flags(dbp1, DB_TXN_NOT_DURABLE)) != 0) { + dbenv->err( + dbenv, ret, "%s: db_set_flags", progname); + goto err; + } + + ret = dbp1->open(dbp1, + NULL, argv[0], NULL, DB_UNKNOWN, DB_RDONLY, 0); + + /* + * If we get here, we can check the cache/page. + * !!! + * If we have to retry with an env with a larger + * cache, we jump out of this loop. However, we + * will still be working on the same argv when we + * get back into the for-loop. + */ + if (ret == 0) { + if (__db_util_cache( + dbp1, &cache, &resize) == 0 && resize) { + (void)dbp1->close(dbp1, 0); + (void)dbp->close(dbp, 0); + dbp = NULL; + + (void)dbenv->close(dbenv, 0); + dbenv = NULL; + goto retry; + } + } + (void)dbp1->close(dbp1, 0); + } + + /* The verify method is a destructor. */ + ret = dbp->verify(dbp, argv[0], NULL, NULL, flags); + dbp = NULL; + if (ret != 0) + exitval = 1; + if (!quiet) + printf(DB_STR_A("5105", "Verification of %s %s.\n", + "%s %s\n"), argv[0], ret == 0 ? + DB_STR_P("succeeded") : DB_STR_P("failed")); + } + + if (0) { +err: exitval = 1; + } + + if (dbp != NULL && (ret = dbp->close(dbp, 0)) != 0) { + exitval = 1; + dbenv->err(dbenv, ret, DB_STR("5106", "close")); + } + if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) { + exitval = 1; + fprintf(stderr, + "%s: dbenv->close: %s\n", progname, db_strerror(ret)); + } + + if (passwd != NULL) + free(passwd); + + /* Resend any caught signal. */ + __db_util_sigresend(); + + return (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +int +usage() +{ + fprintf(stderr, "usage: %s %s\n", progname, + "[-NoqV] [-h home] [-P password] db_file ..."); + return (EXIT_FAILURE); +} + +int +version_check() +{ + int v_major, v_minor, v_patch; + + /* Make sure we're loaded with the right version of the DB library. */ + (void)db_version(&v_major, &v_minor, &v_patch); + if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) { + fprintf(stderr, DB_STR_A("5107", + "%s: version %d.%d doesn't match library version %d.%d\n", + "%s %d %d %d %d\n"), progname, DB_VERSION_MAJOR, + DB_VERSION_MINOR, v_major, v_minor); + return (EXIT_FAILURE); + } + return (0); +} diff --git a/util/dtrace/apicalls.d b/util/dtrace/apicalls.d new file mode 100755 index 00000000..a6d3d0e5 --- /dev/null +++ b/util/dtrace/apicalls.d @@ -0,0 +1,35 @@ +#!/usr/sbin/dtrace -qs +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * apicalls.d - Summarize DB API function calls + * + * This script graphs the count of the main API calls grouped by thread. + * + * The optional integer maxcount parameter directs the script to exit once + * that many functions calls have been accumulated. + * + * usage: apicalls.d { -p | -c " [ 0 ? $1 : -1; + functioncount = 0; + printf("DB API call counts of process %d; interrupt to display summary\n", $target); +} + +pid$target::db*_create:return, +pid$target::__*_pp:return +{ + @calls[tid, probefunc] = count(); + functioncount++; +} + +pid$target::db*_create:return, +pid$target::__*_pp:return +/functioncount == maxcount/ +{ + exit(0); +} diff --git a/util/dtrace/apitimes.d b/util/dtrace/apitimes.d new file mode 100755 index 00000000..1d6c703d --- /dev/null +++ b/util/dtrace/apitimes.d @@ -0,0 +1,53 @@ +#!/usr/sbin/dtrace -qs +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * apitimes.d - Summarize time spent in DB API functions + * + * This script graphs the time spent in the main API calls, grouped by thread. + * + * The optional integer maxcount parameter directs the script to exit after + * that many functions have been accumulated. + * + * usage: apitimes.d { -p | -c " [ 0 ? $1 : -1; + functioncount = 0; + printf("DB API times of process %d grouped by function; ", $target); + printf("interrupt to display summary\n"); +} + +pid$target::db*_create:entry, +pid$target::__*_pp:entry +{ + self->start = timestamp; +} + +pid$target::db*_create:return, +pid$target::__*_pp:return +/self->start != 0/ +{ + @calltimes[tid, probefunc] = quantize(timestamp - self->start); + self->start = 0; + functioncount++; +} + +pid$target::db*_create:return, +pid$target::__*_pp:return +/functioncount == maxcount/ +{ + exit(0); +} + +dtrace:::END +{ + printf("\n"); + printa("Times that thread %x spent in %s in nanoseconds %@a", @calltimes); +} diff --git a/util/dtrace/apitrace.d b/util/dtrace/apitrace.d new file mode 100755 index 00000000..c9258265 --- /dev/null +++ b/util/dtrace/apitrace.d @@ -0,0 +1,65 @@ +#!/usr/sbin/dtrace -s +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * apitrace.d - Trace the primary BDB API calls + * + * This script displays the entry to and return from each of the main API calls. + * + * The optional integer maxcount parameter directs the script to exit once that + * many functions have been displayed. + * + * On a multiprocessor or multicore machine it is possible to see results which + * are slightly out of order when a thread changes switches to another cpu. + * The output can be sent through "sort -t@ -k2n" to order the lines by time. + * + * usage: apitrace.d { -p | -c " [start = timestamp; + printf("called with (%x, %x, %x...) @ %u\n", arg0, arg1, arg2, self->start - epoch); +} + +pid$target::db_*create:return, +pid$target::__*_pp:return +/self->start != 0/ +{ + this->now = timestamp; + this->duration = this->now - self->start; + self->start = 0; + eventcount++; + printf("returns %d after %d ns @ %u\n", arg1, this->duration, this->now - epoch); +} + +pid$target::db_*create:return, +pid$target::__*_pp:return +/eventcount == maxcount/ +{ + printf("Exiting %s:%s count %d\n", probefunc, probename, eventcount); + exit(0); +} diff --git a/util/dtrace/cache.d b/util/dtrace/cache.d new file mode 100755 index 00000000..1e118e0c --- /dev/null +++ b/util/dtrace/cache.d @@ -0,0 +1,106 @@ +#!/usr/sbin/dtrace -qs +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * cache.d - Display DB cache activity + * + * usage: cache.d { -p | -c " [= warmup/ +{ + misses++; + @misses[/*arg1 == 0 ? "" : */copyinstr(arg1)] = count(); +} + +/* mpool-hit(unsigned hits, char *file, unsigned pgno) */ +bdb$target:::mpool-hit +/tick >= warmup/ +{ + hits++; + @hits[arg1 == 0 ? "" : copyinstr(arg1)] = count(); +} + +/* mpool-evict(char *file, unsigned pgno, BH *buf */ +bdb$target:::mpool-evict +/tick >= warmup/ +{ + evictions++; + @evictions[arg0 == 0 ? "" : copyinstr(arg0)] = count(); +} + +profile:::tick-1sec +{ + tick++; + secs--; +} + +/* + * Print a banner when starting the measurement period. + */ +profile:::tick-1sec +/tick == warmup/ +{ + printf("Cache info: %8s %8s %8s starting @ %Y\n", "hits", "misses", + "evictions", walltimestamp); + secs = interval; +} + +profile:::tick-1sec +/secs == 0 && tick >= warmup/ +{ + printf(" %6d %8d %8d %8d\n", tick, hits, misses, evictions); + hits = misses = evictions = 0; + printa("Hits for %20s %@u\n", @hits); + printa("Misses for %20s %@u\n", @misses); + printa("Evictions for %20s %@u\n", @evictions); + trunc(@hits); + trunc(@misses); + trunc(@evictions); + secs = interval; +} + +profile:::tick-1sec +/tick == maxtick/ +{ + exit(0); +} + +dtrace:::END +{ + printf("\n"); + printa("Hits for %20s %@u\n", @hits); + printa("Misses for %20s %@u\n", @misses); + printa("Evictions for %20s %@u\n", @evictions); +} diff --git a/util/dtrace/dbdefs.d b/util/dtrace/dbdefs.d new file mode 100755 index 00000000..38a68ca3 --- /dev/null +++ b/util/dtrace/dbdefs.d @@ -0,0 +1,135 @@ +/* + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * dbdefs.d - DTrace declarations of DB data structures. + */ +#ifndef _DBDEFS_D +#define _DBDEFS_D + +typedef D`uint32_t db_pgno_t; +typedef D`uint16_t db_indx_t; + +typedef D`uintptr_t roff_t; +typedef D`int32_t db_atomic_t; + +/* A DBT for the DB API */ +typedef struct __db_dbt { + uintptr_t data; /* Key/data */ + uint32_t size; /* key/data length */ + + uint32_t ulen; /* RO: length of user buffer. */ + uint32_t dlen; /* RO: get/put record length. */ + uint32_t doff; /* RO: get/put record offset. */ + + uintptr_t app_data; + uint32_t flags; +} DBT; + +/* A DBT in shared memory */ +typedef struct __sh_dbt { + uint32_t size; /* key/data length */ + roff_t off; /* Region offset. */ +} SH_DBT; + + +/* + * DB fileids are actually uint8_t fileid[20]; in D it is easier to handle them + * as a struct of 5 four-byte integers. + */ +typedef struct fileid +{ + uint32_t id1; + uint32_t id2; + uint32_t id3; + uint32_t id4; + uint32_t id5; +} FILEID; + +typedef struct __db_lock_ilock { + uint32_t pgno; + FILEID fileid; + uint32_t type; +} DB_ILOCK; + +typedef struct { + uint32_t file; + uint32_t offset; +} DB_LSN; + +typedef struct { + D`ssize_t stqe_next; /* relative offset of next element */ + ssize_t stqe_prev; /* relative offset of prev's next */ +} SH_TAILQ_ENTRY; + +typedef struct { + ssize_t sce_next; /* relative offset to next element */ + ssize_t sce_prev; /* relative offset of prev element */ +} SH_CHAIN_ENTRY; + +typedef struct _db_page { + DB_LSN lsn; /* 00-07: Log sequence number. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + db_pgno_t prev_pgno; /* 12-15: Previous page number. */ + db_pgno_t next_pgno; /* 16-19: Next page number. */ + db_indx_t entries; /* 20-21: Number of items on the page. */ + db_indx_t hf_offset; /* 22-23: High free byte page offset. */ + +#define LEAFLEVEL 1 +#define MAXBTREELEVEL 255 + D`uint8_t level; /* 24: Btree tree level. */ + uint8_t type; /* 25: Page type. */ +} PAGE; + +typedef struct __bh { + uint32_t mtx_buf; /* Shared/Exclusive mutex */ + db_atomic_t ref; /* Reference count. */ +#define BH_CALLPGIN 0x001 /* Convert the page before use. */ +#define BH_DIRTY 0x002 /* Page is modified. */ +#define BH_DIRTY_CREATE 0x004 /* Page is modified. */ +#define BH_DISCARD 0x008 /* Page is useless. */ +#define BH_EXCLUSIVE 0x010 /* Exclusive access acquired. */ +#define BH_FREED 0x020 /* Page was freed. */ +#define BH_FROZEN 0x040 /* Frozen buffer: allocate & re-read. */ +#define BH_TRASH 0x080 /* Page is garbage. */ +#define BH_THAWED 0x100 /* Page was thawed. */ + uint16_t flags; + + uint32_t priority; /* Priority. */ + SH_TAILQ_ENTRY hq; /* MPOOL hash bucket queue. */ + + db_pgno_t pgno; /* Underlying MPOOLFILE page number. */ + roff_t mf_offset; /* Associated MPOOLFILE offset. */ + uint32_t bucket; /* Hash bucket containing header. */ + int region; /* Region containing header. */ + + roff_t td_off; /* MVCC: creating TXN_DETAIL offset. */ + SH_CHAIN_ENTRY vc; /* MVCC: version chain. */ +#ifdef DIAG_MVCC + uint16_t align_off; /* Alignment offset for diagnostics.*/ +#endif + + /* + * !!! + * This array must be at least size_t aligned -- the DB access methods + * put PAGE and other structures into it, and then access them directly. + * (We guarantee size_t alignment to applications in the documentation, + * too.) + */ + uint8_t buf[1]; /* Variable length data. */ +} BH; + +typedef enum { + DB_LOCK_NG=0, /* Not granted. */ + DB_LOCK_READ=1, /* Shared/read. */ + DB_LOCK_WRITE=2, /* Exclusive/write. */ + DB_LOCK_WAIT=3, /* Wait for event */ + DB_LOCK_IWRITE=4, /* Intent exclusive/write. */ + DB_LOCK_IREAD=5, /* Intent to share/read. */ + DB_LOCK_IWR=6, /* Intent to read and write. */ + DB_LOCK_READ_UNCOMMITTED=7, /* Degree 1 isolation. */ + DB_LOCK_WWRITE=8 /* Was Written. */ +} db_lockmode_t; + +#endif diff --git a/util/dtrace/locktimes.d b/util/dtrace/locktimes.d new file mode 100755 index 00000000..b9664223 --- /dev/null +++ b/util/dtrace/locktimes.d @@ -0,0 +1,160 @@ +#!/usr/sbin/dtrace -qs +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * locktimesid.d - Display lock wait times grouped by filename. + * + * This script graphs the time spent waiting for DB page locks. + * + * The optional integer maxcount parameter directs the script to exit once that + * many page lock waits have been measured. + * + * usage: locktimes.d { -p | -c " [suspend = timestamp; +} + + +/* lock-resume(DBT *lockobj, int lock_mode) */ +bdb$target:::lock-resume +/self->suspend > 0/ +{ + this->duration = timestamp - self->suspend; + self->suspend = 0; + this->dbt = copyin(arg0, sizeof(DBT)); + this->ilock = copyin(this->dbt->data, sizeof(DB_ILOCK)); + this->filename = filenames[this->ilock->fileid.id1, + this->ilock->fileid.id2, this->ilock->fileid.id3, + this->ilock->fileid.id4, this->ilock->fileid.id5]; + @locktimes[this->filename, this->ilock->pgno, modes[arg1]] = + quantize(this->duration); + lockcount++; +} + +bdb$target:::lock-resume +/lockcount == maxcount/ +{ + exit(0); +} + +/* db-open(char *file, char *db, uint32_t flags, uint8_t fileid[20]) + * + * Watch db-open probes in order to get the fileid -> file name mapping. + */ +bdb$target:::db-open +/arg0 != 0/ +{ + this->filename = copyinstr(arg0); + this->fileid = (FILEID *) copyin(arg3, 20); + filenames[this->fileid->id1, this->fileid->id2, this->fileid->id3, + this->fileid->id4, this->fileid->id5] = this->filename; +} + +/* db-cursor(char *file, char *db, unsigned txnid, unsigned flags, uint8_t fileid[20]) + * + * Watch cursor creation probes in order to get the fileid -> file name mapping. + */ +bdb$target:::db-cursor +/arg0 != 0/ +{ + this->filename = (string) copyinstr(arg0); + this->fileid = (FILEID *) copyin(arg4, 20); + filenames[this->fileid->id1, this->fileid->id2, this->fileid->id3, + this->fileid->id4, this->fileid->id5] = this->filename; +} + +dtrace:::END +{ + printa("Wait time for file %s page %u %s locks in nanoseconds %@a\n", @locktimes); +} diff --git a/util/dtrace/locktimesid.d b/util/dtrace/locktimesid.d new file mode 100755 index 00000000..f5329faf --- /dev/null +++ b/util/dtrace/locktimesid.d @@ -0,0 +1,113 @@ +#!/usr/sbin/dtrace -qs +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * lockstimesi.d - Display lock wait times grouped by fileid. + * + * This script graphs the time spent waiting for DB page locks. + * + * The optional integer maxcount parameter directs the script to exit once that + * many page lock waits have been measured. + * + * usage: locktimesid.d { -p | -c " [suspend = timestamp; +} + + +/* lock-resume(DBT *lockobj, db_lockmode_t lock_mode) */ +bdb$target:::lock-resume +/self->suspend > 0/ +{ + this->duration = timestamp - self->suspend; + self->suspend = 0; + this->dbt = copyin(arg0, sizeof(struct __db_dbt)); + this->ilock = copyin(this->dbt->data, sizeof(DB_ILOCK)); + @locktimes[this->ilock->fileid, this->ilock->pgno, arg1] = + quantize(this->duration); + lockcount++ +} + +bdb$target:::lock-resume +/lockcount == maxcount/ +{ + exit(0); +} + +dtrace:::END +{ + /* This example uses the default display format for @locktimes. */ +} diff --git a/util/dtrace/mutex.d b/util/dtrace/mutex.d new file mode 100755 index 00000000..c9bd45f6 --- /dev/null +++ b/util/dtrace/mutex.d @@ -0,0 +1,102 @@ +#!/usr/sbin/dtrace -qs +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * mutex.d - Display DB mutex wait times. + * + * Specify the target application with -p or -c " [..]" + * + * The optional integer maxcount parameter directs the script to exit once that + * many mutex times have been accumulated. + * + * usage: apicalls.d { -p | -c " [suspend = timestamp; +} + +/* mutex-resume(unsigned mutex, boolean exclusive, unsigned alloc_id, struct __db_mutex_t *mutexp) */ +bdb$target:::mutex-resume +/self->suspend/ +{ + this->duration = timestamp - self->suspend; + self->suspend = 0; + @mutexwaits[arg0, arg1, idnames[arg2], tid] = quantize(this->duration); + @classwaits[idnames[arg2], arg1] = quantize(this->duration); + mutexcount++; +} + +bdb$target:::mutex-resume +/mutexcount == maxcount/ +{ + exit(0); +} + +dtrace:::END +{ + printf("Mutex wait times grouped by (mutex, mode, thread)\n"); + printa("Mutex %d exclusive %d %s thread %p wait times in nanoseconds %@d\n", @mutexwaits); + printf("\nAggregate mutex wait times grouped by (alloc_id, mode)\n"); + printa("Mutex class %s exclusive %d wait times in nanoseconds %@d\n", @classwaits); +} diff --git a/util/dtrace/showerror.d b/util/dtrace/showerror.d new file mode 100755 index 00000000..5961837e --- /dev/null +++ b/util/dtrace/showerror.d @@ -0,0 +1,89 @@ +#!/usr/sbin/dtrace -qs +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * showerror.d - Capture context about certain DB errors. + * + * This shows the stack when a panic or a variant of DB->err() is called. + * At the end of execution it displays an aggregated summary showing + * which function invoked which of the traced error routines. + * + * The optional integer maxcount parameter directs the script to exit when + * that many error messages have been displayed. + * + * usage: showerror.d { -p | -c " [code = arg1; + self->message = (uintptr_t) arg2; + self->arg = (void *) arg3; +} + +pid$target::__db_err:return +{ + this->message = copyinstr(self->message); + printf("DB error %d message \"%s\"(%p,...) from:\n", + self->code, this->message, self->arg); + ustack(); + @errors[this->message, ustack(6)] = count(); + errorcount++; +} + +/* __db_errx(ENV *, char *message, ...) */ +pid$target::__db_errx:entry +{ + self->code = 0; + self->message = (uintptr_t) arg1; + self->arg = (void *) arg2; +} + +pid$target::__db_errx:return +{ + this->message = copyinstr(self->message); + printf("DB error message \"%s\"(%p,...) from:\n", this->message, + self->arg); + ustack(); + @errors[this->message, ustack(4)] = count(); + errorcount++; +} + +pid$target::__env_panic:entry +{ + printf("DB panic with error code %d from:\n", arg0); + ustack(); + errorcount++; +} + +pid$target::__db_err*:return, +pid$target::__env_panic:return +/errorcount == maxcount/ +{ + exit(0); +} + +dtrace:::END +{ + printa("Instances of the message \"%s\"", @errors); +} diff --git a/util/systemtap/apicalls.stp b/util/systemtap/apicalls.stp new file mode 100755 index 00000000..a8bf92ce --- /dev/null +++ b/util/systemtap/apicalls.stp @@ -0,0 +1,51 @@ +#!/usr/bin/stap +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * apicalls - Summarize DB API call count grouped by thread. + * + * This summarizes the count of DB apicalls grouped by thread or process. + * + * The path to the DB library is required to be the first argument. + * + * To limit tracing to a particular process use one of the stap options: + * -x or + * -c " [..]" + * + */ + +global calls + +probe begin +{ + printf("DB API call counts of "); + if (target() == 0) + printf("processes using \"%s\"", @1) + else + printf("process %d", target()); + printf("; interrupt to display summary\n") +} + +probe process(@1).function("db_*create").call, + process(@1).function("__*pp").call +{ + calls[(target() == 0 ? pid() : tid()), probefunc()] <<< 1; +} + +probe end +{ + any_seen = 0; + /* Display counts grouped by process, or by thread. */ + if (target() == 0) + printf("%-20s %7s %9s\n", + "Function", "process", "callcount") + else + printf("%-20s %7s %9s for process %d\n", + "Function", "thread", "callcount", target()); + foreach ([a,b] in calls+) { + printf("%-20s %7d %9d\n", b, a, @count(calls[a,b])); + any_seen = 1; + } + if (!any_seen) + printf("No probes were triggered in %s; it might not be the correct library\n", @1); +} diff --git a/util/systemtap/apitimes.stp b/util/systemtap/apitimes.stp new file mode 100755 index 00000000..c9d06da2 --- /dev/null +++ b/util/systemtap/apitimes.stp @@ -0,0 +1,54 @@ +#!/usr/bin/stap +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * apitimes - Graph the time spent in DB API calls grouped by thread or processid + * + * The path to the DB library is required to be the first argument. + * + * To limit tracing to a particular process use one of the stap options: + * -x or + * -c " [..]" + * + */ + +global functioncount, maxcount, starts, times; + +probe begin +{ + functioncount = 0; + maxcount = -1; + %( $# >= 2 %? maxcount = $2 %) + printf("DB API times of "); + if (target() == 0) + printf("processes using \"%s\"", @1) + else + printf("process %d", target()); + printf(" grouped by function; interrupt to display summary\n"); +} + +probe process(@1).function("db*_create").call, + process(@1).function("__*_pp").call +{ + starts[tid(), probefunc()] = gettimeofday_ns(); +} + + +probe process(@1).function("db*_create").return, + process(@1).function("__*_pp").return +{ + if ((start = starts[tid(), probefunc()]) != 0) { + times[tid(), probefunc()] <<< gettimeofday_ns() - start; + if (++functioncount == maxcount) + exit(); + } +} + +probe end +{ + foreach ([tid, func] in times) { + printf("Times that thread %x spent in %s in nanoseconds\n", + tid, func); + print(@hist_log(times[tid, func])); + } +} diff --git a/util/systemtap/apitrace.stp b/util/systemtap/apitrace.stp new file mode 100755 index 00000000..e69ee7b5 --- /dev/null +++ b/util/systemtap/apitrace.stp @@ -0,0 +1,45 @@ +#!/usr/bin/stap +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * apitrace - Display DB API calls and return values + * + * The path to the DB library is required to be the first argument. + * + * To limit tracing to a particular process use one of the stap options: + * -x or + * -c " [..]" + * + */ + +global tracecount, maxlimit; + +probe begin +{ + printf("DB API call trace of "); + if (target() == 0) + printf("processes using \"%s\"\n", @1); + else + printf("process %d\n", target()); + printf("Interrupt to display summary\n"); + maxlimit = -1; + %( $# >= 2 %? maxlimit = $2 %) + tracecount = 0; +} + +probe process(@1).function("db_*create").call, + process(@1).function("__*_*pp").call +{ + printf("%s -> %s called with (%s)\n", + thread_indent(1), probefunc(), $$parms); + +} + +probe process(@1).function("db*_create").return, + process(@1).function("__*_*pp").return +{ + printf("%s <- %s returns %d\n", thread_indent(-1), + probefunc(), $return) + if (++tracecount == maxlimit) + exit(); +} diff --git a/util/systemtap/cache.stp b/util/systemtap/cache.stp new file mode 100755 index 00000000..c13451ec --- /dev/null +++ b/util/systemtap/cache.stp @@ -0,0 +1,104 @@ +#!/usr/bin/stap +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * cache.stp - Display DB cache activity groups by file. + * + * The path to the DB library is required to be the first argument. + * + * usage: + * cache [>] \ + * [interval [#iterations [warmup period]]] + * + * The defaults are + * interval - 1 second + * iterations - 0: unlimited) + * warmup - 0: start measuring right away) + * + * For each 'interval' seconds display overall and per-file cache stats: + * hits + * misses + * evictions + * Empty lines are displayed when where was no cache activity for that + * file & counter during that interval + */ +global interval, current, iterations; +global warmup, hits, misses, evictions, secs, tick; + +probe begin +{ + interval = 10; + iterations = 60; + warmup = 0; + %( $# >= 2 %? interval = $2 %) + %( $# >= 3 %? iterations = $3 %) + %( $# >= 4 %? warmup = $4 %) + secs = interval; + + tick = 0; + current = 0; +} + +function printstats() +{ + foreach ([file+] in hits) + printf("Hits for %s %d\n", file, @count(hits[file])); + foreach ([file+] in misses) + printf("Misses for %s %d\n", file, @count(misses[file])); + foreach ([file+] in evictions) + printf("Evictions for %s %d\n", file, @count(evictions[file])); + delete hits; + delete misses; + delete evictions; +} + +/* mpool-miss(unsigned misses, char *file, unsigned pgno) + */ + +probe process(@1).mark("mpool__miss") +{ + if (tick >= warmup) + misses[$arg2 == 0 ? "" : user_string($arg2)] <<< 1; +} + +/* mpool-hit(unsigned hits, char *file, unsigned pgno) */ +probe process(@1).mark("mpool__hit") +{ + if (tick >= warmup) + hits[$arg2 == 0 ? "" : user_string($arg2)] <<< 1; +} + +/* mpool-evict(char *file, unsigned pgno, BH *buf */ +probe process(@1).mark("mpool__evict") +{ + if (tick >= warmup) + evictions[$arg1 == 0 ? "" : user_string($arg1)] <<< 1; +} + +probe timer.sec(1) +{ + /* Print a banner when starting the first measurement interval. */ + if (tick == warmup) { + printf("Cache info: %8s %8s %8s %sstarting @ %s\n", "hits", + "misses", "evictions", + iterations == 0 ? "" : sprintf("%d iterations ", iterations), + ctime(gettimeofday_s())); + secs = interval; + } + tick++; + secs--; + /* Display and erase statistics when ending an interval */ + if (secs == 0 && tick >= warmup) { + printf(" %6d %8d %8d %8d\n", tick, hits, misses, evictions); + hits = misses = evictions = 0; + printstats(); + secs = interval; + if (++current == iterations) + exit(); + } +} + +probe end +{ + printf("\ncache.stp completed\n"); +} diff --git a/util/systemtap/locktimes.stp b/util/systemtap/locktimes.stp new file mode 100755 index 00000000..5997e4b8 --- /dev/null +++ b/util/systemtap/locktimes.stp @@ -0,0 +1,154 @@ +#!/usr/bin/stap +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * lockstimes.d - Display lock wait times grouped by filename + * + * This graphs the time spent waiting for DB locks to become available. + * + * The path to the DB library is required to be the first argument. + * + * usage: + * locktimes [>] [] + * + * The result times in nanoseconds/cycles? are grouped by + * (filename, pgno, lock_mode) + * + * If an integer limit argument is given, then only the top locks are + * displayed, ordered by #times that the lock was waited for. + * + * Markers used: + * lock__suspend(struct __db_dbt *, db_lockmode_t lock_ mode) + * lock__resume(struct __db_dbt *, db_lockmode_t lock_ mode) + * db-open(char *file, char *db, uint32_t flags, uint8_t fileid[20]) + * db-cursor(char *file, char *db, unsigned txnid, uint32_t flags, + * uint8_t fileid[20]) + */ + +global filenames, lockcount, locktimes, maxcount, modes, suspend, top + +probe begin +{ + maxcount = 0; + lockcount = 0; + %( $# >= 2 %? maxcount = $2; %) + top = 0; + topdesc = ""; + %( $# >= 3 %? top = $3; topdesc = sprintf("top %d ", top) %) + printf("Tracing %sDB lock wait times grouped by ", topdesc); + printf("(filename, pgno, lock_mode)\n^C to display summary\n"); + modes[0] = "NOTGRANTED"; + modes[1] = "READ"; + modes[2] = "WRITE"; + modes[3] = "WAIT"; + modes[4] = "INTENT_WRITE"; + modes[5] = "INTENT_READ"; + modes[6] = "INTENT_WR"; + modes[7] = "READ_UNCOMMITTED"; + modes[8] = "WAS_WRITE"; +} + +function getns() +{ + t = gettimeofday_ns(); + /* + * On some virtual machine monitors gettimeofday_ns() returns 0. When + * that happens approximate it as if this has a 2 Ghz processor. + */ + if (t == 0) + t = get_cycles() / 2; + return (t); +} + +function fileidstr:string(fileid) +{ + if (fileid < 10000) + return sprintf("Null ptr? fileid@%p", fileid); + return sprintf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", + user_char(fileid + 0) & 0xff, + user_char(fileid + 1) & 0xff, + user_char(fileid + 2) & 0xff, + user_char(fileid + 3) & 0xff, + user_char(fileid + 4) & 0xff, + user_char(fileid + 5) & 0xff, + user_char(fileid + 6) & 0xff, + user_char(fileid + 7) & 0xff, + user_char(fileid + 8) & 0xff, + user_char(fileid + 9) & 0xff, + user_char(fileid + 10) & 0xff, + user_char(fileid + 11) & 0xff, + user_char(fileid + 12) & 0xff, + user_char(fileid + 13) & 0xff, + user_char(fileid + 14) & 0xff, + user_char(fileid + 15) & 0xff, + user_char(fileid + 16) & 0xff, + user_char(fileid + 17) & 0xff, + user_char(fileid + 18) & 0xff, + user_char(fileid + 19) & 0xff) +} + +/* db-open(char *file, char *db, uint32_t flags, uint8_t fileid[20]) + * + * Watch db-open probes in order to get the fileid -> file name mapping. + */ +probe process(@1).mark("db__open") +{ + if ($arg1 != 0 && $arg4 != 0) + filenames[fileidstr($arg4)] = user_string($arg1); +} + +/* db-cursor(char *file, char *db, unsigned txnid, uint32_t flags, uint8_t fileid[20]) + * + * Watch cursor creation markers in order to get the fileid -> file name mapping. + */ +probe process(@1).mark("db__cursor") +{ + if ($arg1 != 0) + filenames[fileidstr($arg5)] = user_string($arg1); +} + + +probe process(@1).mark("lock__suspend") +{ + suspend[tid()] = getns(); +} + +/* lock__resume(DBT *lockobj, db_lockmode_t lock_mode) */ +probe process(@1).mark("lock__resume") +{ + start = suspend[tid()]; + if (start != 0) { + ilock = @cast($arg1, "DBT", "")->data; + idstr = fileidstr(@cast(ilock, "DB_LOCK_ILOCK", "")->fileid); + duration = getns() - start; + locktimes[filenames[idstr], + @cast(ilock, "DB_LOCK_ILOCK", "")->pgno, $arg2] <<< duration; + suspend[tid()] = 0; + /* Stop if we've reached the request sample size. */ + if (maxcount != 0 && ++lockcount >= maxcount) + exit(); + } +} + +function printstats(filename, pgno, lock_mode) +{ + printf("Wait time for file %s page %u %s locks in nanoseconds; %d waits totalling %d\n", + filename, pgno, modes[lock_mode], + @count(locktimes[filename, pgno, lock_mode]), + @sum(locktimes[filename, pgno, lock_mode])); + print(@hist_log(locktimes[filename, pgno, lock_mode])) +} + +probe end +{ + /* + * Order results by lock wait count if top results were requested, + * otherwise order by filename. + */ + if (top != 0) + foreach ([filename, pgno, lock_mode] in locktimes- limit top) + printstats(filename, pgno, lock_mode) + else + foreach ([filename+, pgno, lock_mode] in locktimes) + printstats(filename, pgno, lock_mode) +} diff --git a/util/systemtap/locktimesid.stp b/util/systemtap/locktimesid.stp new file mode 100755 index 00000000..7c7df439 --- /dev/null +++ b/util/systemtap/locktimesid.stp @@ -0,0 +1,123 @@ +#!/usr/bin/stap +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * lockstimesid.d - Display lock wait times grouped by fileid. + * + * Graphs the time spent in DB lock waits + * + * The result times in nanoseconds/cycles? are grouped by + * (fileid, pgno, lock_mode) + * + * Markers used: + * lock__suspend(struct __db_dbt *, db_lockmode_t lock_ mode) + * lock__resume(struct __db_dbt *, db_lockmode_t lock_ mode) + */ + +global lockcount, locktimes, maxcount, modes, suspend, top; + +function getns() +{ + t = gettimeofday_ns(); + /* + * On some virtual machine monitors gettimeofday_ns() returns 0. When + * that happens approximate it as if this has a 2 Ghz processor. + */ + if (t == 0) + t = get_cycles() / 2; + return (t); +} + + +function fileidstr:string(fileid) +{ + return sprintf("%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x", + user_char(fileid + 0) & 0xff, + user_char(fileid + 1) & 0xff, + user_char(fileid + 2) & 0xff, + user_char(fileid + 3) & 0xff, + user_char(fileid + 4) & 0xff, + user_char(fileid + 5) & 0xff, + user_char(fileid + 6) & 0xff, + user_char(fileid + 7) & 0xff, + user_char(fileid + 8) & 0xff, + user_char(fileid + 9) & 0xff, + user_char(fileid + 10) & 0xff, + user_char(fileid + 11) & 0xff, + user_char(fileid + 12) & 0xff, + user_char(fileid + 13) & 0xff, + user_char(fileid + 14) & 0xff, + user_char(fileid + 15) & 0xff, + user_char(fileid + 16) & 0xff, + user_char(fileid + 17) & 0xff, + user_char(fileid + 18) & 0xff, + user_char(fileid + 19) & 0xff) +} + +probe begin +{ + maxcount = 0; + lockcount = 0; + %( $# >= 2 %? maxcount = $2; %) + top = 0; + topdesc = ""; + %( $# >= 3 %? top = $3; topdesc = sprintf("top %d ", top) %) + printf("Tracing %sDB lock wait times grouped by ", topdesc); + printf("(binary fileid, pgno, lock_mode)\n^C to display summary\n"); + modes[0] = "NOTGRANTED"; + modes[1] = "READ"; + modes[2] = "WRITE"; + modes[3] = "WAIT"; + modes[4] = "INTENT_WRITE"; + modes[5] = "INTENT_READ"; + modes[6] = "INTENT_WR"; + modes[7] = "READ_UNCOMMITTED"; + modes[8] = "WAS_WRITE"; +} + +probe process(@1).mark("lock__suspend") +{ + /* printf("%s\n", pp()); */ + suspend[tid()] = getns(); + +} + +/* lock__resume(DBT *lockobj, db_lockmode_t lock_mode) */ +probe process(@1).mark("lock__resume") +{ + start = suspend[tid()]; + if (start != 0) { + ilock = @cast($arg1, "DBT", "")->data; + idstr = fileidstr(@cast(ilock, "DB_LOCK_ILOCK", "")->fileid); + duration = getns() - start; + locktimes[idstr, + @cast(ilock, "DB_LOCK_ILOCK", "")->pgno, $arg2] <<< duration; + suspend[tid()] = 0; + /* Stop if we've reached the request sample size. */ + if (maxcount != 0 && ++lockcount >= maxcount) + exit(); + } +} + +function printstats(fileid, pgno, lock_mode) +{ + printf("Wait time for fileid %s %s pgno %u; %d waits totalling %d\n", + fileid, modes[lock_mode], pgno, + @count(locktimes[fileid, pgno, lock_mode]), + @sum(locktimes[fileid, pgno, lock_mode])); + print(@hist_log(locktimes[fileid, pgno, lock_mode])) +} + +probe end +{ + /* + * Order results by lock wait count if top results were requested, + * otherwise order by fileid. + */ + if (top != 0) + foreach ([fileid, pgno, lock_mode] in locktimes- limit top) + printstats(fileid, pgno, lock_mode) + else + foreach ([fileid+, pgno, lock_mode] in locktimes) + printstats(fileid, pgno, lock_mode) +} diff --git a/util/systemtap/mutex.stp b/util/systemtap/mutex.stp new file mode 100755 index 00000000..b615a401 --- /dev/null +++ b/util/systemtap/mutex.stp @@ -0,0 +1,112 @@ +#!/usr/bin/stap +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * mutex.stp - Display DB mutex wait times. + * + * Specify one particular process with -x or -c " [..]" + * + * Probes: mutex suspend and resume + * suspend(unsigned mutex, unsigned exclusive, unsigned alloc_id); + * resume(unsigned mutex, unsigned exclusive, unsigned alloc_id); + * + * mutex: + * is the integer mutex id returned by mutex_alloc() + * exclusive: + * is set to 1 except when obtaining a read-write latch with + * shared access (allocated with DB_MUTEX_SHARED), then it is 0. + * alloc_id: + * is one of dbinc/mutex.h's MTX_XXX definitions (i.e., the class) + */ + +global idnames; +global suspend; +global mutexwaits; +global classwaits; + +function getns() +{ + t = gettimeofday_ns(); + /* + * On some virtual machine monitors gettimeofday_ns() returns 0. When + * that happens approximate it as if this has a 2 Ghz processor. + */ + if (t == 0) + t = get_cycles() / 2; + return (t); +} + +probe begin +{ + idnames[1] = "APPLICATION"; + idnames[2] = "ATOMIC_EMULATION"; + idnames[3] = "DB_HANDLE"; + idnames[4] = "ENV_DBLIST"; + idnames[5] = "ENV_HANDLE"; + idnames[6] = "ENV_REGION"; + idnames[7] = "LOCK_REGION"; + idnames[8] = "LOGICAL_LOCK"; + idnames[9] = "LOG_FILENAME"; + idnames[10] = "LOG_FLUSH"; + idnames[11] = "LOG_HANDLE"; + idnames[12] = "LOG_REGION"; + idnames[13] = "MPOOLFILE_HANDLE"; + idnames[14] = "MPOOL_BH"; + idnames[15] = "MPOOL_FH"; + idnames[16] = "MPOOL_FILE_BUCKET"; + idnames[17] = "MPOOL_HANDLE"; + idnames[18] = "MPOOL_HASH_BUCKET"; + idnames[19] = "MPOOL_REGION"; + idnames[20] = "MUTEX_REGION"; + idnames[21] = "MUTEX_TEST"; + idnames[22] = "REP_CHKPT"; + idnames[23] = "REP_DATABASE"; + idnames[24] = "REP_DIAG"; + idnames[25] = "REP_EVENT"; + idnames[26] = "REP_REGION"; + idnames[27] = "REP_START"; + idnames[28] = "REP_WAITER"; + idnames[29] = "REPMGR"; + idnames[30] = "SEQUENCE"; + idnames[31] = "TWISTER"; + idnames[32] = "TCL_EVENTS"; + idnames[33] = "TXN_ACTIVE"; + idnames[34] = "TXN_CHKPT"; + idnames[35] = "TXN_COMMIT"; + idnames[36] = "TXN_MVCC"; + idnames[37] = "TXN_REGION"; +} + +probe process(@1).mark("mutex__suspend") +{ + suspend[tid()] = getns(); + +} + +/* mutex-resume(unsigned mutex, boolean exclusive, unsigned alloc_id, struct __db_mutex_t *mutexp) */ +probe process(@1).mark("mutex__resume") +{ + start = suspend[tid()]; + class = idnames[$arg3]; + if (start != 0) { + duration = getns() - start; + mutexwaits[$arg1, $arg2, class, tid()] <<< duration; + classwaits[class, $arg2] <<< duration; + suspend[tid()] = 0; + } +} + +probe end +{ + foreach ([mutex, excl, alloc_id, tid] in mutexwaits) { + printf("Mutex %d %s %s thread %d wait times in nanoseconds\n", + mutex, excl ? "exclusive" : "shared", alloc_id, tid); + print(@hist_log(mutexwaits[mutex, excl, alloc_id, tid])) + } + print("Aggregate mutex wait times grouped by (alloc_id, mode)\n"); + foreach ([class, excl] in classwaits) { + printf("Mutex class %s %s wait times in nanoseconds\n", class, + excl ? "exclusive" : "shared"); + print(@hist_log(classwaits[class, excl])); + } +} diff --git a/util/systemtap/showerror.stp b/util/systemtap/showerror.stp new file mode 100755 index 00000000..4c7fc433 --- /dev/null +++ b/util/systemtap/showerror.stp @@ -0,0 +1,62 @@ +#!/usr/bin/stap +/* + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * showerror.stp - Capture context about certain DB errors. + * + * This shows the stack when a variant of DB->err() is called. + * Specify the target process on the command line as + * -p or + * -c " [..]" + * + * This script does not require DWARF symbols in libdb. + */ + +global errors, errorcount, maxlimit; + +probe begin +{ + errorcount = 0; + maxlimit = -1; + %( $# >= 2 %? maxlimit = $2 %) + printf("Display DB stack when "); + if (pid() == 0) + printf("an application using %s", @1) + else + printf("process %d", pid()); + printf(" prints a DB internal error message: limit %d\n", maxlimit); +} + +/* __db_err(ENV *, int errorcode, char *message, ...) */ +probe process(@1).function("__db_err").call +{ + message = user_string(pointer_arg(3)); + printf("DB error %d message \"%s\"(%x,...) from:\n", + int_arg(2), message, ulong_arg(4)); + print_backtrace(); + errors[message, backtrace()] <<< 1; + if (++errorcount == maxlimit) + exit(); +} + +/* __db_errx(ENV *, char *message, ...) */ +probe process(@1).function("__db_errx").call +{ + message = user_string(pointer_arg(2)); + printf("DB error message \"%s\"(%x,...) parms %s from:\n", message, + int_arg(3), $$parms); + print_backtrace(); + errors[message, backtrace()] <<< 1; + if (++errorcount == maxlimit) + exit(); +} + + +/* __env_panic(ENV *, int errorcode) */ +probe process(@1).function("__env_panic").call +{ + printf("DB panic with error code %d from:\n", int_arg(2)); + print_backtrace(); + if (++errorcount == maxlimit) + exit(); +}